summaryrefslogtreecommitdiff
path: root/storage
diff options
context:
space:
mode:
Diffstat (limited to 'storage')
-rw-r--r--storage/archive/azio.c2
-rw-r--r--storage/archive/ha_archive.cc40
-rw-r--r--storage/cassandra/CMakeLists.txt1
-rw-r--r--storage/connect/ha_connect.cc4
-rw-r--r--storage/connect/jdbconn.cpp1
-rw-r--r--storage/connect/jsonudf.h5
-rw-r--r--storage/connect/mysql-test/connect/r/bin.result4
-rw-r--r--storage/connect/mysql-test/connect/r/dir.result2
-rw-r--r--storage/connect/mysql-test/connect/r/grant.result59
-rw-r--r--storage/connect/mysql-test/connect/r/grant2.result337
-rw-r--r--storage/connect/mysql-test/connect/r/ini_grant.result8
-rw-r--r--storage/connect/mysql-test/connect/r/jdbc.result4
-rw-r--r--storage/connect/mysql-test/connect/r/jdbc_new.result21
-rw-r--r--storage/connect/mysql-test/connect/r/jdbc_postgresql.result2
-rw-r--r--storage/connect/mysql-test/connect/r/mysql_discovery.result13
-rw-r--r--storage/connect/mysql-test/connect/r/mysql_exec.result8
-rw-r--r--storage/connect/mysql-test/connect/r/mysql_grant.result8
-rw-r--r--storage/connect/mysql-test/connect/r/mysql_new.result23
-rw-r--r--storage/connect/mysql-test/connect/r/vcol.result2
-rw-r--r--storage/connect/mysql-test/connect/r/xml.result2
-rw-r--r--storage/connect/mysql-test/connect/r/xml2.result2
-rw-r--r--storage/connect/mysql-test/connect/r/xml2_grant.result10
-rw-r--r--storage/connect/mysql-test/connect/r/xml2_mdev5261.result4
-rw-r--r--storage/connect/mysql-test/connect/r/xml_grant.result10
-rw-r--r--storage/connect/mysql-test/connect/r/xml_mdev5261.result4
-rw-r--r--storage/connect/mysql-test/connect/t/dir.test4
-rw-r--r--storage/connect/mysql-test/connect/t/vcol.test2
-rw-r--r--storage/connect/tabrest.cpp3
-rw-r--r--storage/connect/tabrest.h5
-rw-r--r--storage/federated/ha_federated.cc83
-rw-r--r--storage/federatedx/ha_federatedx.cc23
-rw-r--r--storage/heap/ha_heap.cc2
-rw-r--r--storage/heap/heapdef.h3
-rw-r--r--storage/heap/hp_delete.c2
-rw-r--r--storage/heap/hp_hash.c17
-rw-r--r--storage/heap/hp_scan.c4
-rw-r--r--storage/heap/hp_update.c4
-rw-r--r--storage/heap/hp_write.c2
-rw-r--r--storage/innobase/CMakeLists.txt438
-rw-r--r--storage/innobase/Doxyfile1419
-rw-r--r--storage/innobase/api/api0api.cc3886
-rw-r--r--storage/innobase/api/api0misc.cc203
-rw-r--r--storage/innobase/btr/btr0btr.cc3553
-rw-r--r--storage/innobase/btr/btr0bulk.cc1058
-rw-r--r--storage/innobase/btr/btr0cur.cc4702
-rw-r--r--storage/innobase/btr/btr0defragment.cc123
-rw-r--r--storage/innobase/btr/btr0pcur.cc268
-rw-r--r--storage/innobase/btr/btr0scrub.cc110
-rw-r--r--storage/innobase/btr/btr0sea.cc1453
-rw-r--r--storage/innobase/buf/buf0buddy.cc256
-rw-r--r--storage/innobase/buf/buf0buf.cc4734
-rw-r--r--storage/innobase/buf/buf0checksum.cc148
-rw-r--r--storage/innobase/buf/buf0dblwr.cc729
-rw-r--r--storage/innobase/buf/buf0dump.cc388
-rw-r--r--storage/innobase/buf/buf0flu.cc2531
-rw-r--r--storage/innobase/buf/buf0lru.cc1132
-rw-r--r--storage/innobase/buf/buf0mtflu.cc175
-rw-r--r--storage/innobase/buf/buf0rea.cc703
-rw-r--r--storage/innobase/bzip2.cmake33
-rw-r--r--storage/innobase/data/data0data.cc299
-rw-r--r--storage/innobase/data/data0type.cc68
-rw-r--r--storage/innobase/dict/dict0boot.cc197
-rw-r--r--storage/innobase/dict/dict0crea.cc1253
-rw-r--r--storage/innobase/dict/dict0defrag_bg.cc333
-rw-r--r--storage/innobase/dict/dict0dict.cc3345
-rw-r--r--storage/innobase/dict/dict0load.cc2537
-rw-r--r--storage/innobase/dict/dict0mem.cc668
-rw-r--r--storage/innobase/dict/dict0stats.cc1083
-rw-r--r--storage/innobase/dict/dict0stats_bg.cc379
-rw-r--r--storage/innobase/dyn/dyn0dyn.cc65
-rw-r--r--storage/innobase/eval/eval0eval.cc440
-rw-r--r--storage/innobase/eval/eval0proc.cc24
-rw-r--r--storage/innobase/fil/fil0crypt.cc827
-rw-r--r--storage/innobase/fil/fil0fil.cc7516
-rw-r--r--storage/innobase/fil/fil0pagecompress.cc29
-rw-r--r--storage/innobase/fsp/fsp0file.cc1060
-rw-r--r--storage/innobase/fsp/fsp0fsp.cc3363
-rw-r--r--storage/innobase/fsp/fsp0space.cc218
-rw-r--r--storage/innobase/fsp/fsp0sysspace.cc1000
-rw-r--r--storage/innobase/fts/Makefile.query20
-rw-r--r--storage/innobase/fts/fts0ast.cc268
-rw-r--r--storage/innobase/fts/fts0blex.cc28
-rw-r--r--storage/innobase/fts/fts0blex.l1
-rw-r--r--storage/innobase/fts/fts0config.cc184
-rw-r--r--storage/innobase/fts/fts0fts.cc2491
-rw-r--r--storage/innobase/fts/fts0opt.cc335
-rw-r--r--storage/innobase/fts/fts0pars.cc13
-rw-r--r--storage/innobase/fts/fts0pars.y17
-rw-r--r--storage/innobase/fts/fts0plugin.cc297
-rw-r--r--storage/innobase/fts/fts0que.cc978
-rw-r--r--storage/innobase/fts/fts0sql.cc40
-rw-r--r--storage/innobase/fts/fts0tlex.cc28
-rw-r--r--storage/innobase/fts/fts0tlex.l1
-rw-r--r--storage/innobase/fut/fut0fut.cc31
-rw-r--r--storage/innobase/fut/fut0lst.cc214
-rw-r--r--storage/innobase/gis/gis0geo.cc782
-rw-r--r--storage/innobase/gis/gis0rtree.cc1975
-rw-r--r--storage/innobase/gis/gis0sea.cc2017
-rw-r--r--storage/innobase/ha/ha0ha.cc276
-rw-r--r--storage/innobase/ha/ha0storage.cc15
-rw-r--r--storage/innobase/ha/hash0hash.cc287
-rw-r--r--storage/innobase/ha_innodb.def4
-rw-r--r--storage/innobase/handler/ha_innodb.cc15173
-rw-r--r--storage/innobase/handler/ha_innodb.h1044
-rw-r--r--storage/innobase/handler/ha_xtradb.h1009
-rw-r--r--storage/innobase/handler/handler0alter.cc4861
-rw-r--r--storage/innobase/handler/i_s.cc1519
-rw-r--r--storage/innobase/handler/i_s.h5
-rw-r--r--storage/innobase/ibuf/ibuf0ibuf.cc1790
-rw-r--r--storage/innobase/include/api0api.h1312
-rw-r--r--storage/innobase/include/api0misc.h78
-rw-r--r--storage/innobase/include/btr0btr.h721
-rw-r--r--storage/innobase/include/btr0btr.ic155
-rw-r--r--storage/innobase/include/btr0bulk.h381
-rw-r--r--storage/innobase/include/btr0cur.h459
-rw-r--r--storage/innobase/include/btr0cur.ic69
-rw-r--r--storage/innobase/include/btr0defragment.h5
-rw-r--r--storage/innobase/include/btr0pcur.h227
-rw-r--r--storage/innobase/include/btr0pcur.ic195
-rw-r--r--storage/innobase/include/btr0scrub.h6
-rw-r--r--storage/innobase/include/btr0sea.h356
-rw-r--r--storage/innobase/include/btr0sea.ic186
-rw-r--r--storage/innobase/include/btr0types.h204
-rw-r--r--storage/innobase/include/buf0buddy.h25
-rw-r--r--storage/innobase/include/buf0buddy.ic10
-rw-r--r--storage/innobase/include/buf0buf.h1325
-rw-r--r--storage/innobase/include/buf0buf.ic739
-rw-r--r--storage/innobase/include/buf0checksum.h85
-rw-r--r--storage/innobase/include/buf0dblwr.h54
-rw-r--r--storage/innobase/include/buf0dump.h7
-rw-r--r--storage/innobase/include/buf0flu.h308
-rw-r--r--storage/innobase/include/buf0flu.ic98
-rw-r--r--storage/innobase/include/buf0lru.h89
-rw-r--r--storage/innobase/include/buf0lru.ic25
-rw-r--r--storage/innobase/include/buf0rea.h123
-rw-r--r--storage/innobase/include/buf0types.h118
-rw-r--r--storage/innobase/include/data0data.h406
-rw-r--r--storage/innobase/include/data0data.ic307
-rw-r--r--storage/innobase/include/data0type.h158
-rw-r--r--storage/innobase/include/data0type.ic133
-rw-r--r--storage/innobase/include/db0err.h61
-rw-r--r--storage/innobase/include/dict0boot.h55
-rw-r--r--storage/innobase/include/dict0boot.ic10
-rw-r--r--storage/innobase/include/dict0crea.h340
-rw-r--r--storage/innobase/include/dict0crea.ic68
-rw-r--r--storage/innobase/include/dict0defrag_bg.h108
-rw-r--r--storage/innobase/include/dict0dict.h1179
-rw-r--r--storage/innobase/include/dict0dict.ic931
-rw-r--r--storage/innobase/include/dict0load.h284
-rw-r--r--storage/innobase/include/dict0load.ic26
-rw-r--r--storage/innobase/include/dict0mem.h1476
-rw-r--r--storage/innobase/include/dict0mem.ic18
-rw-r--r--storage/innobase/include/dict0pagecompress.h22
-rw-r--r--storage/innobase/include/dict0pagecompress.ic24
-rw-r--r--storage/innobase/include/dict0priv.h10
-rw-r--r--storage/innobase/include/dict0priv.ic34
-rw-r--r--storage/innobase/include/dict0stats.h127
-rw-r--r--storage/innobase/include/dict0stats.ic41
-rw-r--r--storage/innobase/include/dict0stats_bg.h86
-rw-r--r--storage/innobase/include/dict0stats_bg.ic45
-rw-r--r--storage/innobase/include/dict0types.h65
-rw-r--r--storage/innobase/include/dyn0buf.h511
-rw-r--r--storage/innobase/include/dyn0dyn.h199
-rw-r--r--storage/innobase/include/dyn0dyn.ic298
-rw-r--r--storage/innobase/include/dyn0types.h (renamed from storage/innobase/include/row0quiesce.ic)23
-rw-r--r--storage/innobase/include/eval0eval.h13
-rw-r--r--storage/innobase/include/eval0eval.ic12
-rw-r--r--storage/innobase/include/eval0proc.h28
-rw-r--r--storage/innobase/include/eval0proc.ic6
-rw-r--r--storage/innobase/include/fil0crypt.h139
-rw-r--r--storage/innobase/include/fil0crypt.ic49
-rw-r--r--storage/innobase/include/fil0fil.h1564
-rw-r--r--storage/innobase/include/fil0fil.ic52
-rw-r--r--storage/innobase/include/fil0pagecompress.h32
-rw-r--r--storage/innobase/include/fsp0file.h580
-rw-r--r--storage/innobase/include/fsp0fsp.h716
-rw-r--r--storage/innobase/include/fsp0fsp.ic145
-rw-r--r--storage/innobase/include/fsp0pagecompress.h11
-rw-r--r--storage/innobase/include/fsp0pagecompress.ic19
-rw-r--r--storage/innobase/include/fsp0space.h242
-rw-r--r--storage/innobase/include/fsp0sysspace.h298
-rw-r--r--storage/innobase/include/fsp0types.h335
-rw-r--r--storage/innobase/include/fts0ast.h88
-rw-r--r--storage/innobase/include/fts0blex.h2
-rw-r--r--storage/innobase/include/fts0fts.h396
-rw-r--r--storage/innobase/include/fts0opt.h3
-rw-r--r--storage/innobase/include/fts0plugin.h50
-rw-r--r--storage/innobase/include/fts0priv.h144
-rw-r--r--storage/innobase/include/fts0priv.ic33
-rw-r--r--storage/innobase/include/fts0tlex.h2
-rw-r--r--storage/innobase/include/fts0tokenize.h188
-rw-r--r--storage/innobase/include/fts0types.h115
-rw-r--r--storage/innobase/include/fts0types.ic319
-rw-r--r--storage/innobase/include/fut0fut.h36
-rw-r--r--storage/innobase/include/fut0fut.ic40
-rw-r--r--storage/innobase/include/fut0lst.h66
-rw-r--r--storage/innobase/include/fut0lst.ic32
-rw-r--r--storage/innobase/include/gis0geo.h150
-rw-r--r--storage/innobase/include/gis0rtree.h537
-rw-r--r--storage/innobase/include/gis0rtree.ic276
-rw-r--r--storage/innobase/include/gis0type.h159
-rw-r--r--storage/innobase/include/ha0ha.h128
-rw-r--r--storage/innobase/include/ha0ha.ic66
-rw-r--r--storage/innobase/include/ha0storage.h33
-rw-r--r--storage/innobase/include/ha0storage.ic8
-rw-r--r--storage/innobase/include/ha_prototypes.h418
-rw-r--r--storage/innobase/include/handler0alter.h17
-rw-r--r--storage/innobase/include/hash0hash.h211
-rw-r--r--storage/innobase/include/hash0hash.ic78
-rw-r--r--storage/innobase/include/ib0mutex.h818
-rw-r--r--storage/innobase/include/ibuf0ibuf.h223
-rw-r--r--storage/innobase/include/ibuf0ibuf.ic133
-rw-r--r--storage/innobase/include/lock0iter.h7
-rw-r--r--storage/innobase/include/lock0lock.h509
-rw-r--r--storage/innobase/include/lock0lock.ic78
-rw-r--r--storage/innobase/include/lock0prdt.h217
-rw-r--r--storage/innobase/include/lock0priv.h669
-rw-r--r--storage/innobase/include/lock0priv.ic362
-rw-r--r--storage/innobase/include/lock0types.h228
-rw-r--r--storage/innobase/include/log0crypt.h175
-rw-r--r--storage/innobase/include/log0log.h926
-rw-r--r--storage/innobase/include/log0log.ic228
-rw-r--r--storage/innobase/include/log0recv.h523
-rw-r--r--storage/innobase/include/log0recv.ic53
-rw-r--r--storage/innobase/include/log0types.h (renamed from storage/innobase/include/row0import.ic)35
-rw-r--r--storage/innobase/include/mach0data.h228
-rw-r--r--storage/innobase/include/mach0data.ic479
-rw-r--r--storage/innobase/include/mem0dbg.h150
-rw-r--r--storage/innobase/include/mem0dbg.ic109
-rw-r--r--storage/innobase/include/mem0mem.h338
-rw-r--r--storage/innobase/include/mem0mem.ic337
-rw-r--r--storage/innobase/include/mem0pool.h121
-rw-r--r--storage/innobase/include/mem0pool.ic24
-rw-r--r--storage/innobase/include/mtr0log.h140
-rw-r--r--storage/innobase/include/mtr0log.ic224
-rw-r--r--storage/innobase/include/mtr0mtr.h823
-rw-r--r--storage/innobase/include/mtr0mtr.ic380
-rw-r--r--storage/innobase/include/mtr0types.h249
-rw-r--r--storage/innobase/include/os0api.h75
-rw-r--r--storage/innobase/include/os0event.h135
-rw-r--r--storage/innobase/include/os0file.h2185
-rw-r--r--storage/innobase/include/os0file.ic630
-rw-r--r--storage/innobase/include/os0once.h17
-rw-r--r--storage/innobase/include/os0proc.h50
-rw-r--r--storage/innobase/include/os0proc.ic27
-rw-r--r--storage/innobase/include/os0sync.h948
-rw-r--r--storage/innobase/include/os0sync.ic266
-rw-r--r--storage/innobase/include/os0thread.h59
-rw-r--r--storage/innobase/include/os0thread.ic25
-rw-r--r--storage/innobase/include/page0cur.h192
-rw-r--r--storage/innobase/include/page0cur.ic106
-rw-r--r--storage/innobase/include/page0page.h703
-rw-r--r--storage/innobase/include/page0page.ic551
-rw-r--r--storage/innobase/include/page0size.h5
-rw-r--r--storage/innobase/include/page0types.h67
-rw-r--r--storage/innobase/include/page0zip.h262
-rw-r--r--storage/innobase/include/page0zip.ic70
-rw-r--r--storage/innobase/include/pars0grm.h334
-rw-r--r--storage/innobase/include/pars0opt.h15
-rw-r--r--storage/innobase/include/pars0opt.ic24
-rw-r--r--storage/innobase/include/pars0pars.h203
-rw-r--r--storage/innobase/include/pars0pars.ic24
-rw-r--r--storage/innobase/include/pars0sym.h35
-rw-r--r--storage/innobase/include/pars0sym.ic24
-rw-r--r--storage/innobase/include/que0que.h163
-rw-r--r--storage/innobase/include/que0que.ic23
-rw-r--r--storage/innobase/include/que0types.h41
-rw-r--r--storage/innobase/include/read0read.h255
-rw-r--r--storage/innobase/include/read0read.ic148
-rw-r--r--storage/innobase/include/read0types.h304
-rw-r--r--storage/innobase/include/rem0cmp.h367
-rw-r--r--storage/innobase/include/rem0cmp.ic194
-rw-r--r--storage/innobase/include/rem0rec.h612
-rw-r--r--storage/innobase/include/rem0rec.ic308
-rw-r--r--storage/innobase/include/rem0types.h4
-rw-r--r--storage/innobase/include/row0ext.h14
-rw-r--r--storage/innobase/include/row0ftsort.h70
-rw-r--r--storage/innobase/include/row0import.h30
-rw-r--r--storage/innobase/include/row0ins.h77
-rw-r--r--storage/innobase/include/row0ins.ic26
-rw-r--r--storage/innobase/include/row0log.h113
-rw-r--r--storage/innobase/include/row0log.ic18
-rw-r--r--storage/innobase/include/row0merge.h252
-rw-r--r--storage/innobase/include/row0mysql.h491
-rw-r--r--storage/innobase/include/row0mysql.ic24
-rw-r--r--storage/innobase/include/row0purge.h143
-rw-r--r--storage/innobase/include/row0purge.ic25
-rw-r--r--storage/innobase/include/row0quiesce.h9
-rw-r--r--storage/innobase/include/row0row.h161
-rw-r--r--storage/innobase/include/row0row.ic68
-rw-r--r--storage/innobase/include/row0sel.h200
-rw-r--r--storage/innobase/include/row0sel.ic39
-rw-r--r--storage/innobase/include/row0trunc.h428
-rw-r--r--storage/innobase/include/row0types.h95
-rw-r--r--storage/innobase/include/row0uins.h12
-rw-r--r--storage/innobase/include/row0uins.ic25
-rw-r--r--storage/innobase/include/row0umod.h12
-rw-r--r--storage/innobase/include/row0umod.ic24
-rw-r--r--storage/innobase/include/row0undo.h26
-rw-r--r--storage/innobase/include/row0undo.ic24
-rw-r--r--storage/innobase/include/row0upd.h230
-rw-r--r--storage/innobase/include/row0upd.ic76
-rw-r--r--storage/innobase/include/row0vers.h112
-rw-r--r--storage/innobase/include/row0vers.ic30
-rw-r--r--storage/innobase/include/srv0conc.h27
-rw-r--r--storage/innobase/include/srv0mon.h192
-rw-r--r--storage/innobase/include/srv0mon.ic12
-rw-r--r--storage/innobase/include/srv0srv.h595
-rw-r--r--storage/innobase/include/srv0srv.ic24
-rw-r--r--storage/innobase/include/srv0start.h114
-rw-r--r--storage/innobase/include/sync0arr.h82
-rw-r--r--storage/innobase/include/sync0arr.ic54
-rw-r--r--storage/innobase/include/sync0debug.h105
-rw-r--r--storage/innobase/include/sync0policy.h540
-rw-r--r--storage/innobase/include/sync0policy.ic101
-rw-r--r--storage/innobase/include/sync0rw.h628
-rw-r--r--storage/innobase/include/sync0rw.ic696
-rw-r--r--storage/innobase/include/sync0sync.h799
-rw-r--r--storage/innobase/include/sync0sync.ic415
-rw-r--r--storage/innobase/include/sync0types.h1224
-rw-r--r--storage/innobase/include/trx0i_s.h42
-rw-r--r--storage/innobase/include/trx0purge.h448
-rw-r--r--storage/innobase/include/trx0purge.ic27
-rw-r--r--storage/innobase/include/trx0rec.h180
-rw-r--r--storage/innobase/include/trx0rec.ic33
-rw-r--r--storage/innobase/include/trx0roll.h128
-rw-r--r--storage/innobase/include/trx0roll.ic46
-rw-r--r--storage/innobase/include/trx0rseg.h228
-rw-r--r--storage/innobase/include/trx0rseg.ic103
-rw-r--r--storage/innobase/include/trx0sys.h433
-rw-r--r--storage/innobase/include/trx0sys.ic291
-rw-r--r--storage/innobase/include/trx0trx.h867
-rw-r--r--storage/innobase/include/trx0trx.ic93
-rw-r--r--storage/innobase/include/trx0types.h102
-rw-r--r--storage/innobase/include/trx0undo.h345
-rw-r--r--storage/innobase/include/trx0undo.ic69
-rw-r--r--storage/innobase/include/univ.i365
-rw-r--r--storage/innobase/include/usr0sess.h77
-rw-r--r--storage/innobase/include/usr0sess.ic24
-rw-r--r--storage/innobase/include/usr0types.h31
-rw-r--r--storage/innobase/include/ut0bh.h152
-rw-r--r--storage/innobase/include/ut0bh.ic125
-rw-r--r--storage/innobase/include/ut0byte.h18
-rw-r--r--storage/innobase/include/ut0byte.ic18
-rw-r--r--storage/innobase/include/ut0counter.h78
-rw-r--r--storage/innobase/include/ut0crc32.h24
-rw-r--r--storage/innobase/include/ut0dbg.h170
-rw-r--r--storage/innobase/include/ut0list.h53
-rw-r--r--storage/innobase/include/ut0list.ic6
-rw-r--r--storage/innobase/include/ut0lst.h628
-rw-r--r--storage/innobase/include/ut0mem.h186
-rw-r--r--storage/innobase/include/ut0mem.ic67
-rw-r--r--storage/innobase/include/ut0mutex.h200
-rw-r--r--storage/innobase/include/ut0new.h898
-rw-r--r--storage/innobase/include/ut0pool.h384
-rw-r--r--storage/innobase/include/ut0rbt.h104
-rw-r--r--storage/innobase/include/ut0rnd.h108
-rw-r--r--storage/innobase/include/ut0rnd.ic126
-rw-r--r--storage/innobase/include/ut0sort.h2
-rw-r--r--storage/innobase/include/ut0stage.h593
-rw-r--r--storage/innobase/include/ut0ut.h592
-rw-r--r--storage/innobase/include/ut0ut.ic105
-rw-r--r--storage/innobase/include/ut0vec.h58
-rw-r--r--storage/innobase/include/ut0vec.ic95
-rw-r--r--storage/innobase/include/ut0wqueue.h51
-rw-r--r--storage/innobase/innodb.cmake288
-rw-r--r--storage/innobase/lock/lock0iter.cc10
-rw-r--r--storage/innobase/lock/lock0lock.cc6032
-rw-r--r--storage/innobase/lock/lock0prdt.cc1060
-rw-r--r--storage/innobase/lock/lock0wait.cc103
-rw-r--r--storage/innobase/log/log0crypt.cc899
-rw-r--r--storage/innobase/log/log0log.cc3389
-rw-r--r--storage/innobase/log/log0recv.cc5201
-rw-r--r--storage/innobase/lz4.cmake56
-rw-r--r--storage/innobase/lzma.cmake33
-rw-r--r--storage/innobase/lzo.cmake48
-rw-r--r--storage/innobase/mach/mach0data.cc102
-rw-r--r--storage/innobase/mem/mem0dbg.cc1050
-rw-r--r--storage/innobase/mem/mem0mem.cc272
-rw-r--r--storage/innobase/mem/mem0pool.cc727
-rw-r--r--storage/innobase/mtr/mtr0log.cc181
-rw-r--r--storage/innobase/mtr/mtr0mtr.cc1146
-rw-r--r--storage/innobase/mysql-test/storage_engine/define_engine.inc4
-rw-r--r--storage/innobase/mysql-test/storage_engine/disabled.def4
-rw-r--r--storage/innobase/mysql-test/storage_engine/lock_concurrent.rdiff7
-rw-r--r--storage/innobase/mysql-test/storage_engine/repair_table.rdiff2
-rw-r--r--storage/innobase/mysql-test/storage_engine/suite.opt2
-rw-r--r--storage/innobase/mysql-test/storage_engine/tbl_opt_index_dir.rdiff (renamed from storage/innobase/mysql-test/storage_engine/tbl_opt_data_index_dir.rdiff)0
-rw-r--r--storage/innobase/mysql-test/storage_engine/tbl_opt_key_block_size.opt3
-rw-r--r--storage/innobase/mysql-test/storage_engine/tbl_opt_row_format.opt3
-rw-r--r--storage/innobase/mysql-test/storage_engine/tbl_opt_row_format.rdiff48
-rw-r--r--storage/innobase/mysql-test/storage_engine/type_spatial_indexes.rdiff712
-rw-r--r--storage/innobase/os/os0event.cc520
-rw-r--r--storage/innobase/os/os0file.cc10370
-rw-r--r--storage/innobase/os/os0proc.cc173
-rw-r--r--storage/innobase/os/os0sync.cc915
-rw-r--r--storage/innobase/os/os0thread.cc187
-rw-r--r--storage/innobase/page/page0cur.cc1030
-rw-r--r--storage/innobase/page/page0page.cc1230
-rw-r--r--storage/innobase/page/page0zip.cc1087
-rw-r--r--storage/innobase/pars/lexyy.cc1202
-rwxr-xr-xstorage/innobase/pars/make_bison.sh3
-rwxr-xr-xstorage/innobase/pars/make_flex.sh3
-rw-r--r--storage/innobase/pars/pars0grm.cc3388
-rw-r--r--storage/innobase/pars/pars0grm.y138
-rw-r--r--storage/innobase/pars/pars0lex.l95
-rw-r--r--storage/innobase/pars/pars0opt.cc82
-rw-r--r--storage/innobase/pars/pars0pars.cc448
-rw-r--r--storage/innobase/pars/pars0sym.cc64
-rw-r--r--storage/innobase/que/que0que.cc290
-rw-r--r--storage/innobase/read/read0read.cc802
-rw-r--r--storage/innobase/rem/rem0cmp.cc1704
-rw-r--r--storage/innobase/rem/rem0rec.cc841
-rw-r--r--storage/innobase/row/row0ext.cc36
-rw-r--r--storage/innobase/row/row0ftsort.cc552
-rw-r--r--storage/innobase/row/row0import.cc1505
-rw-r--r--storage/innobase/row/row0ins.cc1585
-rw-r--r--storage/innobase/row/row0log.cc812
-rw-r--r--storage/innobase/row/row0merge.cc2607
-rw-r--r--storage/innobase/row/row0mysql.cc3599
-rw-r--r--storage/innobase/row/row0purge.cc623
-rw-r--r--storage/innobase/row/row0quiesce.cc140
-rw-r--r--storage/innobase/row/row0row.cc533
-rw-r--r--storage/innobase/row/row0sel.cc2301
-rw-r--r--storage/innobase/row/row0trunc.cc3013
-rw-r--r--storage/innobase/row/row0uins.cc242
-rw-r--r--storage/innobase/row/row0umod.cc435
-rw-r--r--storage/innobase/row/row0undo.cc154
-rw-r--r--storage/innobase/row/row0upd.cc1511
-rw-r--r--storage/innobase/row/row0vers.cc1091
-rw-r--r--storage/innobase/snappy.cmake32
-rw-r--r--storage/innobase/srv/srv0conc.cc464
-rw-r--r--storage/innobase/srv/srv0mon.cc312
-rw-r--r--storage/innobase/srv/srv0srv.cc1546
-rw-r--r--storage/innobase/srv/srv0start.cc3691
-rw-r--r--storage/innobase/sync/sync0arr.cc1267
-rw-r--r--storage/innobase/sync/sync0debug.cc1764
-rw-r--r--storage/innobase/sync/sync0rw.cc1175
-rw-r--r--storage/innobase/sync/sync0sync.cc1664
-rw-r--r--storage/innobase/trx/trx0i_s.cc372
-rw-r--r--storage/innobase/trx/trx0purge.cc1327
-rw-r--r--storage/innobase/trx/trx0rec.cc1649
-rw-r--r--storage/innobase/trx/trx0roll.cc715
-rw-r--r--storage/innobase/trx/trx0rseg.cc338
-rw-r--r--storage/innobase/trx/trx0sys.cc1031
-rw-r--r--storage/innobase/trx/trx0trx.cc2199
-rw-r--r--storage/innobase/trx/trx0undo.cc980
-rw-r--r--storage/innobase/usr/usr0sess.cc67
-rw-r--r--storage/innobase/ut/crc32_power8/crc32.S775
-rw-r--r--storage/innobase/ut/crc32_power8/crc32_constants.h911
-rw-r--r--storage/innobase/ut/crc32_power8/crc32_wrapper.c68
-rw-r--r--storage/innobase/ut/crc32_power8/ppc-opcode.h23
-rw-r--r--storage/innobase/ut/ut0bh.cc159
-rw-r--r--storage/innobase/ut/ut0byte.cc30
-rw-r--r--storage/innobase/ut/ut0crc32.cc736
-rw-r--r--storage/innobase/ut/ut0dbg.cc96
-rw-r--r--storage/innobase/ut/ut0list.cc90
-rw-r--r--storage/innobase/ut/ut0mem.cc563
-rw-r--r--storage/innobase/ut/ut0new.cc226
-rw-r--r--storage/innobase/ut/ut0rbt.cc254
-rw-r--r--storage/innobase/ut/ut0rnd.cc14
-rw-r--r--storage/innobase/ut/ut0ut.cc534
-rw-r--r--storage/innobase/ut/ut0vec.cc7
-rw-r--r--storage/innobase/ut/ut0wqueue.cc28
-rw-r--r--storage/maria/ha_maria.cc86
-rw-r--r--storage/maria/ma_bitmap.c10
-rw-r--r--storage/maria/ma_blockrec.c6
-rw-r--r--storage/maria/ma_blockrec.h22
-rw-r--r--storage/maria/ma_check.c19
-rw-r--r--storage/maria/ma_check_standalone.h2
-rw-r--r--storage/maria/ma_checkpoint.c9
-rw-r--r--storage/maria/ma_checksum.c2
-rw-r--r--storage/maria/ma_close.c29
-rw-r--r--storage/maria/ma_control_file.c6
-rw-r--r--storage/maria/ma_create.c22
-rw-r--r--storage/maria/ma_delete.c16
-rw-r--r--storage/maria/ma_delete_all.c10
-rw-r--r--storage/maria/ma_dynrec.c4
-rw-r--r--storage/maria/ma_extra.c4
-rw-r--r--storage/maria/ma_ft_boolean_search.c8
-rw-r--r--storage/maria/ma_ft_nlq_search.c4
-rw-r--r--storage/maria/ma_ft_parser.c2
-rw-r--r--storage/maria/ma_ft_update.c6
-rw-r--r--storage/maria/ma_key.c6
-rw-r--r--storage/maria/ma_key_recover.c2
-rw-r--r--storage/maria/ma_keycache.c4
-rw-r--r--storage/maria/ma_loghandler.c213
-rw-r--r--storage/maria/ma_loghandler.h6
-rw-r--r--storage/maria/ma_open.c28
-rw-r--r--storage/maria/ma_packrec.c58
-rw-r--r--storage/maria/ma_page.c4
-rw-r--r--storage/maria/ma_pagecache.c232
-rw-r--r--storage/maria/ma_range.c2
-rw-r--r--storage/maria/ma_recovery.c17
-rw-r--r--storage/maria/ma_rkey.c4
-rw-r--r--storage/maria/ma_rt_index.c8
-rw-r--r--storage/maria/ma_rt_key.c2
-rw-r--r--storage/maria/ma_rt_split.c10
-rw-r--r--storage/maria/ma_search.c56
-rw-r--r--storage/maria/ma_servicethread.c6
-rw-r--r--storage/maria/ma_sort.c41
-rw-r--r--storage/maria/ma_sp_key.c2
-rw-r--r--storage/maria/ma_state.c45
-rw-r--r--storage/maria/ma_unique.c2
-rw-r--r--storage/maria/ma_write.c23
-rw-r--r--storage/maria/maria_chk.c10
-rw-r--r--storage/maria/maria_def.h102
-rw-r--r--storage/maria/maria_pack.c16
-rw-r--r--storage/maria/maria_read_log.c2
-rw-r--r--storage/maria/unittest/sequence_storage.c2
-rw-r--r--storage/mroonga/CMakeLists.txt2
-rw-r--r--storage/mroonga/data/install.sql.in6
-rw-r--r--storage/mroonga/ha_mroonga.cpp35
-rw-r--r--storage/mroonga/ha_mroonga.hpp188
-rw-r--r--storage/mroonga/mrn_table.cpp5
-rw-r--r--storage/mroonga/mysql-test/mroonga/include/mroonga/have_mroonga_deinit.inc6
-rw-r--r--storage/mroonga/mysql-test/mroonga/storage/r/alter_table_change_column_rename_multiple.result2
-rw-r--r--storage/mroonga/mysql-test/mroonga/storage/r/column_date_zero_date.result2
-rw-r--r--storage/mroonga/mysql-test/mroonga/storage/r/column_datetime_32bit_2038.result2
-rw-r--r--storage/mroonga/mysql-test/mroonga/storage/r/column_datetime_32bit_before_unix_epoch.result2
-rw-r--r--storage/mroonga/mysql-test/mroonga/storage/r/column_datetime_32bit_max.result2
-rw-r--r--storage/mroonga/mysql-test/mroonga/storage/r/column_datetime_32bit_out_of_range.result2
-rw-r--r--storage/mroonga/mysql-test/mroonga/storage/r/column_datetime_64bit_strict_sql_mode_out_of_range.result2
-rw-r--r--storage/mroonga/mysql-test/mroonga/storage/r/column_datetime_64bit_version_5_6_or_later_out_of_range.result2
-rw-r--r--storage/mroonga/mysql-test/mroonga/storage/r/column_generated_stored_add_column.result3
-rw-r--r--storage/mroonga/mysql-test/mroonga/storage/r/column_time_fractional_seconds_with_index.result2
-rw-r--r--storage/mroonga/mysql-test/mroonga/storage/r/column_time_with_index.result2
-rw-r--r--storage/mroonga/mysql-test/mroonga/storage/r/foreign_key_alter_add.result3
-rw-r--r--storage/mroonga/mysql-test/mroonga/storage/r/foreign_key_alter_drop.result2
-rw-r--r--storage/mroonga/mysql-test/mroonga/storage/r/geometry_bulk_insert_null.result2
-rw-r--r--storage/mroonga/mysql-test/mroonga/storage/r/index_multiple_column_unique_date_32bit_equal.result8
-rw-r--r--storage/mroonga/mysql-test/mroonga/storage/r/index_multiple_column_unique_date_order_32bit_asc.result8
-rw-r--r--storage/mroonga/mysql-test/mroonga/storage/r/index_multiple_column_unique_date_order_32bit_desc.result8
-rw-r--r--storage/mroonga/mysql-test/mroonga/storage/r/index_multiple_column_unique_datetime_insert_delete_insert_invalid_value.result4
-rw-r--r--storage/mroonga/mysql-test/mroonga/storage/r/index_multiple_column_unique_decimal.result2
-rw-r--r--storage/mroonga/mysql-test/mroonga/storage/r/index_multiple_column_unique_year_32bit_equal.result8
-rw-r--r--storage/mroonga/mysql-test/mroonga/storage/r/index_multiple_column_unique_year_order_32bit_asc.result8
-rw-r--r--storage/mroonga/mysql-test/mroonga/storage/r/index_multiple_column_unique_year_order_32bit_desc.result8
-rw-r--r--storage/mroonga/mysql-test/mroonga/storage/r/partition_insert.result4
-rw-r--r--storage/mroonga/mysql-test/mroonga/storage/r/partition_update.result4
-rw-r--r--storage/mroonga/mysql-test/mroonga/storage/suite.pm2
-rw-r--r--storage/mroonga/mysql-test/mroonga/storage/t/alter_table_change_column_rename_multiple.test2
-rw-r--r--storage/mroonga/mysql-test/mroonga/storage/t/column_date_zero_date.test2
-rw-r--r--storage/mroonga/mysql-test/mroonga/storage/t/column_datetime_32bit_2038.test2
-rw-r--r--storage/mroonga/mysql-test/mroonga/storage/t/column_datetime_32bit_before_unix_epoch.test2
-rw-r--r--storage/mroonga/mysql-test/mroonga/storage/t/column_datetime_32bit_max.test2
-rw-r--r--storage/mroonga/mysql-test/mroonga/storage/t/column_datetime_32bit_out_of_range.test2
-rw-r--r--storage/mroonga/mysql-test/mroonga/storage/t/column_generated_stored_add_column.test5
-rw-r--r--storage/mroonga/mysql-test/mroonga/storage/t/geometry_bulk_insert_null.test2
-rw-r--r--storage/mroonga/mysql-test/mroonga/storage/t/index_multiple_column_unique_date_32bit_equal.test8
-rw-r--r--storage/mroonga/mysql-test/mroonga/storage/t/index_multiple_column_unique_date_order_32bit_asc.test8
-rw-r--r--storage/mroonga/mysql-test/mroonga/storage/t/index_multiple_column_unique_date_order_32bit_desc.test8
-rw-r--r--storage/mroonga/mysql-test/mroonga/storage/t/index_multiple_column_unique_datetime_insert_delete_insert_invalid_value.test4
-rw-r--r--storage/mroonga/mysql-test/mroonga/storage/t/index_multiple_column_unique_year_32bit_equal.test8
-rw-r--r--storage/mroonga/mysql-test/mroonga/storage/t/index_multiple_column_unique_year_order_32bit_asc.test8
-rw-r--r--storage/mroonga/mysql-test/mroonga/storage/t/index_multiple_column_unique_year_order_32bit_desc.test8
-rw-r--r--storage/mroonga/mysql-test/mroonga/wrapper/suite.pm2
-rw-r--r--storage/mroonga/vendor/groonga/examples/dictionary/html/js/jquery-1.7.2.js2
-rw-r--r--storage/mroonga/vendor/groonga/examples/dictionary/html/js/jquery-ui-1.8.18.custom.js2
-rw-r--r--storage/mroonga/vendor/groonga/lib/com.c2
-rw-r--r--storage/mroonga/vendor/groonga/lib/ctx.c3
-rw-r--r--storage/mroonga/vendor/groonga/lib/db.c2
-rw-r--r--storage/mroonga/vendor/groonga/lib/hash.c34
-rw-r--r--storage/mroonga/vendor/groonga/lib/ii.c19
-rw-r--r--storage/mroonga/vendor/groonga/lib/pat.c14
-rw-r--r--storage/mroonga/vendor/groonga/lib/ts.c2
-rw-r--r--storage/mroonga/vendor/groonga/lib/ts/ts_expr_node.c51
-rw-r--r--storage/myisam/NEWS2
-rw-r--r--storage/myisam/ft_boolean_search.c8
-rw-r--r--storage/myisam/ft_nlq_search.c4
-rw-r--r--storage/myisam/ft_parser.c2
-rw-r--r--storage/myisam/ft_stopwords.c2
-rw-r--r--storage/myisam/ft_update.c6
-rw-r--r--storage/myisam/ha_myisam.cc154
-rw-r--r--storage/myisam/ha_myisam.h2
-rw-r--r--storage/myisam/mi_check.c54
-rw-r--r--storage/myisam/mi_close.c25
-rw-r--r--storage/myisam/mi_create.c7
-rw-r--r--storage/myisam/mi_delete.c14
-rw-r--r--storage/myisam/mi_delete_all.c4
-rw-r--r--storage/myisam/mi_dynrec.c9
-rw-r--r--storage/myisam/mi_extra.c5
-rw-r--r--storage/myisam/mi_key.c2
-rw-r--r--storage/myisam/mi_open.c52
-rw-r--r--storage/myisam/mi_packrec.c51
-rw-r--r--storage/myisam/mi_preload.c6
-rw-r--r--storage/myisam/mi_rkey.c4
-rw-r--r--storage/myisam/mi_search.c50
-rw-r--r--storage/myisam/mi_unique.c8
-rw-r--r--storage/myisam/mi_write.c16
-rw-r--r--storage/myisam/myisamchk.c29
-rw-r--r--storage/myisam/myisamdef.h78
-rw-r--r--storage/myisam/mysql-test/storage_engine/alter_tablespace.rdiff2
-rw-r--r--storage/myisam/rt_split.c4
-rw-r--r--storage/myisam/sort.c35
-rw-r--r--storage/myisam/sp_key.c2
-rw-r--r--storage/myisammrg/ha_myisammrg.cc24
-rw-r--r--storage/myisammrg/myrg_open.c13
-rw-r--r--storage/myisammrg/mysql-test/storage_engine/alter_tablespace.rdiff2
-rw-r--r--storage/myisammrg/mysql-test/storage_engine/create_table.rdiff2
-rw-r--r--storage/myisammrg/mysql-test/storage_engine/disabled.def4
-rw-r--r--storage/myisammrg/mysql-test/storage_engine/parts/repair_table.rdiff7
-rw-r--r--storage/myisammrg/mysql-test/storage_engine/parts/truncate_table.rdiff16
-rw-r--r--storage/myisammrg/mysql-test/storage_engine/repair_table.rdiff8
-rw-r--r--storage/myisammrg/mysql-test/storage_engine/tbl_opt_data_dir.rdiff (renamed from storage/myisammrg/mysql-test/storage_engine/tbl_opt_data_index_dir.rdiff)16
-rw-r--r--storage/myisammrg/mysql-test/storage_engine/tbl_opt_index_dir.rdiff18
-rw-r--r--storage/myisammrg/mysql-test/storage_engine/tbl_opt_row_format.rdiff28
-rw-r--r--storage/myisammrg/mysql-test/storage_engine/vcol.rdiff30
-rw-r--r--storage/oqgraph/ha_oqgraph.cc23
-rw-r--r--storage/oqgraph/mysql-test/oqgraph/connections_mdev5748.result3
-rw-r--r--storage/oqgraph/mysql-test/oqgraph/regression_mdev6282.result6
-rw-r--r--storage/oqgraph/oqgraph_thunk.cc16
-rw-r--r--storage/perfschema/ha_perfschema.cc5
-rw-r--r--storage/perfschema/pfs.cc15
-rw-r--r--storage/perfschema/pfs_account.cc2
-rw-r--r--storage/perfschema/pfs_autosize.cc6
-rw-r--r--storage/perfschema/pfs_engine_table.cc2
-rw-r--r--storage/perfschema/pfs_events_waits.h9
-rw-r--r--storage/perfschema/pfs_global.h2
-rw-r--r--storage/perfschema/pfs_host.cc2
-rw-r--r--storage/perfschema/pfs_instr.cc4
-rw-r--r--storage/perfschema/pfs_instr_class.cc4
-rw-r--r--storage/perfschema/pfs_setup_actor.cc2
-rw-r--r--storage/perfschema/pfs_setup_object.cc2
-rw-r--r--storage/perfschema/pfs_user.cc2
-rw-r--r--storage/perfschema/table_events_statements.cc9
-rw-r--r--storage/perfschema/table_events_waits.cc2
-rw-r--r--storage/perfschema/table_threads.cc4
-rw-r--r--storage/rocksdb/.clang-format137
-rw-r--r--storage/rocksdb/.gitignore2
-rw-r--r--storage/rocksdb/CMakeLists.txt281
-rw-r--r--storage/rocksdb/README50
-rw-r--r--storage/rocksdb/atomic_stat.h94
-rw-r--r--storage/rocksdb/build_rocksdb.cmake495
-rw-r--r--storage/rocksdb/event_listener.cc97
-rw-r--r--storage/rocksdb/event_listener.h49
-rwxr-xr-xstorage/rocksdb/get_rocksdb_files.sh27
-rw-r--r--storage/rocksdb/ha_rocksdb.cc14580
-rw-r--r--storage/rocksdb/ha_rocksdb.h1067
-rw-r--r--storage/rocksdb/ha_rocksdb_proto.h103
-rw-r--r--storage/rocksdb/logger.h85
-rwxr-xr-xstorage/rocksdb/myrocks_hotbackup.py698
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/combinations5
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/include/autoinc_crash_safe.inc150
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/include/bulk_load.inc165
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/include/bulk_load_unsorted.inc143
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/include/bypass_create_table.inc298
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/include/ddl_high_priority.inc174
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/include/ddl_high_priority_module.inc141
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/include/deadlock_stats.inc52
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/include/dup_key_update.inc69
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/include/group_min_max.inc1438
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/include/have_direct_io.inc23
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/include/have_rocksdb.inc10
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/include/have_rocksdb.opt12
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/include/have_rocksdb_default.inc10
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/include/have_rocksdb_replication.inc11
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/include/have_write_committed.inc3
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/include/have_write_prepared.inc3
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/include/index_merge1.inc910
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/include/index_merge2.inc520
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/include/index_merge_2sweeps.inc80
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/include/index_merge_ror.inc463
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/include/index_merge_ror_cpk.inc178
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/include/locking_issues_case1_1.inc51
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/include/locking_issues_case1_2.inc48
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/include/locking_issues_case2.inc97
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/include/locking_issues_case3.inc71
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/include/locking_issues_case4.inc69
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/include/locking_issues_case5.inc77
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/include/locking_issues_case6.inc77
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/include/locking_issues_case7.inc89
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/include/prefix_index_only_query_check.inc21
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/include/restart_mysqld_with_invalid_option.inc8
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/include/restart_mysqld_with_option.inc31
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/include/rocksdb_icp.inc199
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/include/simple_deadlock.inc29
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/include/start_mysqld_with_option.inc14
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/include/use_direct_io_option.inc23
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/my.cnf19
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/1st.result22
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/2pc_group_commit.result101
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/add_index_inplace.result489
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/add_index_inplace_cardinality.result24
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/add_index_inplace_crash.result93
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/add_index_inplace_sstfilewriter.result79
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/add_unique_index_inplace.result103
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/allow_no_pk_concurrent_insert.result7
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/allow_no_primary_key.result295
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/allow_no_primary_key_with_sk.result797
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/allow_to_start_after_corruption.result38
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/alter_table.result183
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/analyze_table.result55
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/apply_changes_iter.result64
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/autoinc_crash_safe.result132
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/autoinc_crash_safe_partition.result132
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/autoinc_debug.result99
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/autoinc_secondary.result16
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/autoinc_vars.result199
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/autoinc_vars_thread.result39
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/autoinc_vars_thread_2.result96
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/blind_delete_rc.result87
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/blind_delete_rr.result87
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/bloomfilter.result2042
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/bloomfilter2.result71
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/bloomfilter3.result140
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/bloomfilter4.result30
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/bloomfilter5.result85
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/bloomfilter_bulk_load.result15
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/bloomfilter_skip.result2042
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/bulk_load.result77
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/bulk_load_drop_table.result11
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/bulk_load_errors.result99
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/bulk_load_rev_cf.result77
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/bulk_load_rev_cf_and_data.result77
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/bulk_load_rev_data.result77
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/bulk_load_sk.result229
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/bulk_load_unsorted.result108
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/bulk_load_unsorted_errors.result4
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/bulk_load_unsorted_rev.result108
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/bypass_select_basic.result693
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/bypass_select_basic_bloom.result693
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/bytes_written.result10
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/cardinality.result100
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/check_flags.result66
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/check_ignore_unknown_options.result7
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/check_table.result68
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/checkpoint.result59
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/checksum_table.result92
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/checksum_table_live.result20
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/col_opt_default.result20
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/col_opt_not_null.result2613
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/col_opt_null.result2268
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/col_opt_unsigned.result749
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/col_opt_zerofill.result731
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/collation.result144
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/collation_exception.result25
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/com_rpc_tx.result21
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/commit_in_the_middle_ddl.result14
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/compact_deletes.result78
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/compression_zstd.result2
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/concurrent_alter.result12
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/cons_snapshot_read_committed.result151
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/cons_snapshot_repeatable_read.result144
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/cons_snapshot_serializable.result24
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/corrupted_data_reads_debug.result74
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/covered_unpack_info_format.result73
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/create_no_primary_key_table.result52
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/create_table.result165
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/ddl_high_priority.result1058
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/deadlock.result37
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/deadlock_stats.result14
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/deadlock_tracking.result488
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/delete.result166
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/delete_ignore.result59
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/delete_quick.result24
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/delete_with_keys.result38
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/describe.result19
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/drop_database.result6
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/drop_index_inplace.result154
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/drop_table.result79
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/drop_table2.result63
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/drop_table3.result24
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/dup_key_update.result366
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/duplicate_table.result15
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/explicit_snapshot.result265
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/fail_system_cf.result4
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/fast_prefix_index_fetch.result80
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/force_shutdown.result38
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/foreign_key.result25
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/gap_lock_issue254.result9
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/gap_lock_raise_error.result504
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/get_error_message.result8
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/group_min_max.result3504
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/ha_extra_keyread.result10
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/handler_basic.result127
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/hermitage.result648
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/i_s_ddl.result22
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/i_s_deadlock.result216
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/index.result89
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/index_file_map.result31
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/index_key_block_size.result51
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/index_merge_rocksdb.result48
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/index_merge_rocksdb2.result1416
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/index_primary.result71
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/index_type_btree.result69
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/index_type_hash.result69
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/information_schema.result83
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/innodb_i_s_tables_disabled.result344
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/insert.result202
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/insert_optimized_config.result16
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/insert_with_keys.result262
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/issue100.result23
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/issue100_delete.result17
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/issue111.result37
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/issue243_transactionStatus.result161
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/issue255.result68
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/issue290.result28
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/issue314.result12
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/issue495.result30
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/issue884.result79
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/issue896.result17
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/issue900.result11
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/iterator_bounds.result15
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/kill.result6
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/level_read_committed.result111
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/level_read_uncommitted.result116
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/level_repeatable_read.result100
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/level_serializable.result56
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/loaddata.result239
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/lock.result121
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/lock_info.result31
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/lock_rows_not_exist.result40
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/lock_wait_timeout_stats.result35
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/locking_issues.result1
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/locking_issues_case1_1_rc.result30
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/locking_issues_case1_1_rr.result30
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/locking_issues_case1_2_rc.result30
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/locking_issues_case1_2_rr.result30
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/locking_issues_case2_rc.result50
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/locking_issues_case2_rc_lsr.result37
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/locking_issues_case2_rr.result50
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/locking_issues_case2_rr_lsr.result37
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/locking_issues_case3_rc.result25
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/locking_issues_case3_rr.result23
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/locking_issues_case4_rc.result23
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/locking_issues_case4_rr.result23
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/locking_issues_case5_rc.result29
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/locking_issues_case5_rr.result28
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/locking_issues_case6_rc.result29
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/locking_issues_case6_rr.result28
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/locking_issues_case7_rc.result41
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/locking_issues_case7_rc_lsr.result45
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/locking_issues_case7_rr.result41
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/locking_issues_case7_rr_lsr.result45
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/mariadb_ignore_dirs.result9
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/mariadb_misc_binlog.result33
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/mariadb_plugin.result35
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/mariadb_port_fixes.result119
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/mariadb_port_rpl.result14
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/max_open_files.result21
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/misc.result95
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/multi_varchar_sk_lookup.result37
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/mysqlbinlog_blind_replace.result128
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/mysqlbinlog_gtid_skip_empty_trans_rocksdb.result143
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/mysqldump.result246
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/mysqldump2.result16
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/native_procedure.result397
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/negative_stats.result9
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/no_merge_sort.result63
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/optimize_myrocks_replace_into_base.result98
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/optimize_myrocks_replace_into_lock.result46
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/optimize_table.result77
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/optimizer_loose_index_scans.result281
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/partition.result688
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/percona_nonflushing_analyze_debug.result19
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/perf_context.result191
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/persistent_cache.result11
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/prefix_extractor_override.result82
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/read_only_tx.result46
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/records_in_range.result210
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/repair_table.result37
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/replace.result32
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/rocksdb.result2635
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/rocksdb_cf_options.result61
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/rocksdb_cf_per_partition.result423
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/rocksdb_cf_reverse.result120
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/rocksdb_checksums.result128
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/rocksdb_concurrent_delete.result671
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/rocksdb_datadir.result2
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/rocksdb_deadlock_detect_rc.result89
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/rocksdb_deadlock_detect_rr.result89
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/rocksdb_deadlock_stress_rc.result8
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/rocksdb_deadlock_stress_rr.result8
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/rocksdb_debug.result11
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/rocksdb_icp.result257
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/rocksdb_icp_rev.result223
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/rocksdb_locks.result64
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/rocksdb_parts.result151
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/rocksdb_qcache.result45
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/rocksdb_range.result293
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/rocksdb_range2.result28
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/rocksdb_read_free_rpl.result335
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/rocksdb_read_free_rpl_stress.result35
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/rocksdb_row_stats.result66
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/rocksdb_table_stats_sampling_pct_change.result23
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/rocksdb_timeout_rollback.result84
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/rollback_savepoint.result29
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/rpl_row_not_found.result56
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/rpl_row_not_found_rc.result56
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/rpl_row_rocksdb.result45
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/rpl_row_stats.result98
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/rpl_row_triggers.result286
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/rpl_savepoint.result110
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/rpl_statement.result57
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/rpl_statement_not_found.result70
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/rqg_examples.result3
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/rqg_runtime.result30
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/rqg_transactions.result11
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/secondary_key_update_lock.result18
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/select.result374
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/select_for_update.result35
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/select_for_update_skip_locked_nowait.result28
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/select_lock_in_share_mode.result37
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/show_engine.result491
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/show_table_status.result24
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/shutdown.result9
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/singledelete.result86
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/skip_core_dump_on_error.result31
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/skip_validate_tmp_table.result20
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/slow_query_log.result10
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/statistics.result69
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/table_stats.result12
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/tbl_opt_ai.result38
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/tbl_opt_avg_row_length.result18
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/tbl_opt_checksum.result18
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/tbl_opt_connection.result26
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/tbl_opt_data_index_dir.result41
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/tbl_opt_delay_key_write.result18
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/tbl_opt_insert_method.result18
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/tbl_opt_key_block_size.result18
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/tbl_opt_max_rows.result18
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/tbl_opt_min_rows.result18
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/tbl_opt_pack_keys.result18
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/tbl_opt_password.result18
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/tbl_opt_row_format.result18
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/tbl_opt_union.result16
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/tbl_standard_opts.result46
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/tmpdir.result32
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/transaction.result977
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/truncate_partition.result620
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/truncate_table.result33
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/truncate_table3.result22
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/trx_info.result13
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/trx_info_rpl.result16
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/ttl_primary.result489
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/ttl_primary_read_filtering.result283
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/ttl_primary_with_partitions.result256
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/ttl_rows_examined.result45
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/ttl_secondary.result709
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/ttl_secondary_read_filtering.result511
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/ttl_secondary_read_filtering_multiple_index.result82
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/ttl_secondary_with_partitions.result389
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/type_binary.result48
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/type_binary_indexes.result80
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/type_bit.result53
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/type_bit_indexes.result58
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/type_blob.result57
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/type_blob_indexes.result188
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/type_bool.result73
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/type_char.result76
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/type_char_indexes.result73
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/type_char_indexes_collation.result91
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/type_date_time.result56
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/type_date_time_indexes.result119
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/type_decimal.result103
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/type_enum.result47
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/type_enum_indexes.result69
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/type_fixed.result131
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/type_fixed_indexes.result129
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/type_float.result314
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/type_float_indexes.result189
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/type_int.result212
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/type_int_indexes.result99
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/type_set.result49
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/type_set_indexes.result115
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/type_text.result57
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/type_text_indexes.result165
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/type_varbinary.result93
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/type_varchar.result770
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/unique_check.result117
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/unique_sec.result221
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/unique_sec_rev_cf.result177
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/unsupported_tx_isolations.result18
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/update.result121
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/update_ignore.result57
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/update_multi.result691
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/update_with_keys.result38
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/use_direct_io_for_flush_and_compaction.result18
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/use_direct_reads.result18
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/use_direct_reads_writes.result18
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/validate_datadic.result9
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/varbinary_format.result260
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/write_sync.result30
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/xa.result32
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/slow_query_log.awk25
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/suite.opt2
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/suite.pm28
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/1st.test36
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/2pc_group_commit-master.opt1
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/2pc_group_commit.test168
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/add_index_inplace.test417
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/add_index_inplace_cardinality-master.opt1
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/add_index_inplace_cardinality.test44
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/add_index_inplace_crash.test118
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/add_index_inplace_sstfilewriter.test113
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/add_unique_index_inplace.test101
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/allow_no_pk_concurrent_insert.test22
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/allow_no_primary_key.test126
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/allow_no_primary_key_with_sk.test149
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/allow_to_start_after_corruption-master.opt1
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/allow_to_start_after_corruption.test75
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/alter_table.test94
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/analyze_table.test57
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/apply_changes_iter.test44
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/autoinc_crash_safe.cnf8
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/autoinc_crash_safe.test9
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/autoinc_crash_safe_partition.cnf8
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/autoinc_crash_safe_partition.test10
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/autoinc_debug-master.opt1
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/autoinc_debug.test119
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/autoinc_secondary.test16
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/autoinc_vars.test171
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/autoinc_vars_thread.test65
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/autoinc_vars_thread_2.test142
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/blind_delete_rc.cnf11
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/blind_delete_rc.test3
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/blind_delete_rr.cnf11
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/blind_delete_rr.test3
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/blind_delete_without_tx_api.inc132
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/bloomfilter-master.opt2
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/bloomfilter.inc78
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/bloomfilter.test1
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/bloomfilter2-master.opt1
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/bloomfilter2.test103
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/bloomfilter3-master.opt4
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/bloomfilter3.test136
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/bloomfilter4-master.opt1
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/bloomfilter4.test52
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/bloomfilter5-master.opt3
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/bloomfilter5.test86
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/bloomfilter_bulk_load-master.opt2
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/bloomfilter_bulk_load.test35
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/bloomfilter_load_select.inc190
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/bloomfilter_skip-master.opt3
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/bloomfilter_skip.test1
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/bloomfilter_table_def.inc33
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/bulk_load.test11
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/bulk_load_drop_table.test19
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/bulk_load_errors.test168
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/bulk_load_rev_cf.test10
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/bulk_load_rev_cf_and_data.test10
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/bulk_load_rev_data.test10
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/bulk_load_sk.test119
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/bulk_load_unsorted.test6
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/bulk_load_unsorted_errors.test8
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/bulk_load_unsorted_rev.test5
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/bypass_select_basic.inc213
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/bypass_select_basic.test3
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/bypass_select_basic_bloom-master.opt3
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/bypass_select_basic_bloom.test3
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/bytes_written.test22
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/cardinality-master.opt3
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/cardinality.test117
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/check_flags.test117
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/check_ignore_unknown_options.test56
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/check_log_for_xa.py31
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/check_table.inc54
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/check_table.test12
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/checkpoint.test107
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/checksum_table.test84
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/checksum_table_live.test24
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/col_not_null.inc55
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/col_not_null_timestamp.inc70
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/col_null.inc34
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/col_opt_default.test27
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/col_opt_not_null.test229
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/col_opt_null.test220
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/col_opt_unsigned.test74
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/col_opt_zerofill.test67
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/collation-master.opt1
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/collation.test211
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/collation_exception-master.opt2
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/collation_exception.test29
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/com_rpc_tx.cnf4
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/com_rpc_tx.test90
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/commit_in_the_middle_ddl.test27
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/compact_deletes-master.opt3
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/compact_deletes.test88
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/compact_deletes_test.inc72
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/compression_zstd.test14
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/concurrent_alter.test39
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/cons_snapshot_read_committed.opt1
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/cons_snapshot_read_committed.test6
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/cons_snapshot_repeatable_read.opt1
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/cons_snapshot_repeatable_read.test6
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/cons_snapshot_serializable.opt1
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/cons_snapshot_serializable.test6
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/consistent_snapshot.inc136
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/corrupted_data_reads_debug.test80
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/covered_unpack_info_format.test79
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/create_no_primary_key_table-master.opt1
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/create_no_primary_key_table.test63
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/create_table.test192
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/ddl_high_priority.test18
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/deadlock.test44
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/deadlock_stats.test3
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/deadlock_tracking.test185
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/delete.test101
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/delete_ignore.test37
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/delete_quick.test32
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/delete_with_keys.test39
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/describe.test24
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/disabled.def95
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/drop_database.test11
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/drop_index_inplace.test116
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/drop_stats_procedure.inc3
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/drop_table-master.opt3
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/drop_table.test145
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/drop_table2.test131
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/drop_table3-master.opt2
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/drop_table3.inc52
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/drop_table3.test5
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/drop_table3_repopulate_table.inc15
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/drop_table_repopulate_table.inc15
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/drop_table_sync.inc6
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/dup_key_update.test45
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/duplicate_table.test16
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/explicit_snapshot-master.opt1
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/explicit_snapshot.test263
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/fail_system_cf.test17
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/fast_prefix_index_fetch.test120
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/force_shutdown.test97
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/foreign_key.test47
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/gap_lock_issue254-master.opt1
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/gap_lock_issue254.test14
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/gap_lock_raise_error.test37
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/get_error_message.test27
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/group_min_max-master.opt1
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/group_min_max.test9
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/ha_extra_keyread.test15
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/handler_basic.test52
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/hermitage.inc257
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/hermitage.test10
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/hermitage_init.inc8
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/i_s_ddl.test29
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/i_s_deadlock.test158
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/index.inc155
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/index.test44
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/index_file_map-master.opt1
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/index_file_map.test54
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/index_key_block_size.test70
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/index_merge_rocksdb-master.opt1
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/index_merge_rocksdb.test110
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/index_merge_rocksdb2-master.opt1
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/index_merge_rocksdb2.test72
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/index_primary.test96
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/index_type_btree.test12
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/index_type_hash.test12
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/information_schema-master.opt1
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/information_schema.test89
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/init_stats_procedure.inc40
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/innodb_i_s_tables_disabled-master.opt30
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/innodb_i_s_tables_disabled.test43
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/insert.test99
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/insert_optimized_config.test51
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/insert_with_keys.test169
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/issue100.test23
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/issue100_delete-master.opt1
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/issue100_delete.test19
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/issue111.test38
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/issue243_transactionStatus.test80
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/issue255.test51
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/issue290.test40
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/issue314.test16
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/issue495.test34
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/issue884.test43
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/issue896.test17
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/issue900.test13
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/iterator_bounds-master.opt2
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/iterator_bounds.test29
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/kill.test9
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/level_read_committed.test6
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/level_read_uncommitted.opt1
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/level_read_uncommitted.test6
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/level_repeatable_read.test5
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/level_serializable.test5
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/loaddata.inc117
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/loaddata.test7
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/lock.test224
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/lock_info.test31
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/lock_rows_not_exist.test110
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/lock_wait_timeout_stats.test38
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/locking_issues.test3
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/locking_issues_case1_1_rc.test4
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/locking_issues_case1_1_rr.test4
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/locking_issues_case1_2_rc.test4
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/locking_issues_case1_2_rr.test4
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/locking_issues_case2_rc.test5
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/locking_issues_case2_rc_lsr.test5
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/locking_issues_case2_rr.test5
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/locking_issues_case2_rr_lsr.test5
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/locking_issues_case3_rc.test4
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/locking_issues_case3_rr.test4
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/locking_issues_case4_rc.test4
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/locking_issues_case4_rr.test4
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/locking_issues_case5_rc.test4
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/locking_issues_case5_rr.test4
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/locking_issues_case6_rc.test4
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/locking_issues_case6_rr.test4
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/locking_issues_case7_rc.test5
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/locking_issues_case7_rc_lsr.test5
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/locking_issues_case7_rr.test5
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/locking_issues_case7_rr_lsr.test5
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/mariadb_ignore_dirs.test17
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/mariadb_misc_binlog-master.opt1
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/mariadb_misc_binlog.test40
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/mariadb_plugin-master.opt1
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/mariadb_plugin.test59
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/mariadb_port_fixes.test114
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/mariadb_port_rpl.test14
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/max_open_files.test53
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/misc.test45
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/multi_varchar_sk_lookup.test49
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/mysqlbinlog_blind_replace.test62
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/mysqlbinlog_gtid_skip_empty_trans_rocksdb-master.opt1
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/mysqlbinlog_gtid_skip_empty_trans_rocksdb.test16
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/mysqldump-master.opt1
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/mysqldump.test67
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/mysqldump2-master.opt1
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/mysqldump2.test43
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/native_procedure-master.opt1
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/native_procedure.test2
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/negative_stats.test26
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/no_merge_sort.test32
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/no_primary_key_basic_ops.inc65
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/optimize_myrocks_replace_into_base.test96
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/optimize_myrocks_replace_into_lock.test88
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/optimize_table-master.opt1
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/optimize_table.inc20
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/optimize_table.test81
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/optimizer_loose_index_scans.test4
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/partition.test762
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/percona_nonflushing_analyze_debug.test11
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/perf_context.test96
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/persistent_cache.test41
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/prefix_extractor_override-master.opt1
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/prefix_extractor_override.test96
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/read_only_tx-master.opt1
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/read_only_tx.test70
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/records_in_range-master.opt3
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/records_in_range.test146
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/repair_table.inc38
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/repair_table.test8
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/replace.test54
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/rocksdb-master.opt1
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/rocksdb.test1964
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/rocksdb_cf_options-master.opt1
-rwxr-xr-xstorage/rocksdb/mysql-test/rocksdb/t/rocksdb_cf_options-master.sh5
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/rocksdb_cf_options.test75
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/rocksdb_cf_per_partition.test513
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/rocksdb_cf_reverse-master.opt1
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/rocksdb_cf_reverse.test71
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/rocksdb_checksums-master.opt3
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/rocksdb_checksums.test152
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/rocksdb_concurrent_delete.inc109
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/rocksdb_concurrent_delete.test38
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/rocksdb_concurrent_delete_main.inc30
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/rocksdb_concurrent_delete_range.inc85
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/rocksdb_concurrent_delete_sk.inc82
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/rocksdb_concurrent_insert.py95
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/rocksdb_datadir.test33
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/rocksdb_deadlock_detect.inc123
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/rocksdb_deadlock_detect_rc-master.opt1
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/rocksdb_deadlock_detect_rc.test1
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/rocksdb_deadlock_detect_rr.test1
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/rocksdb_deadlock_stress.inc18
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/rocksdb_deadlock_stress.py94
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/rocksdb_deadlock_stress_rc-master.opt1
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/rocksdb_deadlock_stress_rc.test1
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/rocksdb_deadlock_stress_rr.test1
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/rocksdb_debug.test14
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/rocksdb_icp-master.opt1
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/rocksdb_icp.test44
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/rocksdb_icp_rev-master.opt1
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/rocksdb_icp_rev.test7
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/rocksdb_locks-master.opt1
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/rocksdb_locks.test94
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/rocksdb_parts-master.opt1
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/rocksdb_parts.test160
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/rocksdb_qcache-master.opt1
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/rocksdb_qcache.test43
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/rocksdb_range-master.opt1
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/rocksdb_range.test196
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/rocksdb_range2.test33
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/rocksdb_read_free_rpl.cnf16
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/rocksdb_read_free_rpl.test414
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/rocksdb_read_free_rpl_stress.cnf17
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/rocksdb_read_free_rpl_stress.inc69
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/rocksdb_read_free_rpl_stress.test22
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/rocksdb_row_stats.test57
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/rocksdb_table_stats_sampling_pct_change.test80
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/rocksdb_timeout_rollback-master.opt1
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/rocksdb_timeout_rollback.test78
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/rollback_savepoint.test33
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/rpl_row_not_found.cnf9
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/rpl_row_not_found.inc98
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/rpl_row_not_found.test4
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/rpl_row_not_found_rc.cnf11
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/rpl_row_not_found_rc.test4
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/rpl_row_rocksdb.cnf1
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/rpl_row_rocksdb.test48
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/rpl_row_stats-slave.opt1
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/rpl_row_stats.cnf1
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/rpl_row_stats.test47
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/rpl_row_triggers.cnf19
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/rpl_row_triggers.test262
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/rpl_savepoint.cnf1
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/rpl_savepoint.test91
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/rpl_statement.cnf7
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/rpl_statement.test59
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/rpl_statement_not_found.cnf9
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/rpl_statement_not_found.test3
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/rqg.inc44
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/rqg_examples-master.opt1
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/rqg_examples.test12
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/rqg_runtime-master.opt1
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/rqg_runtime.test58
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/rqg_transactions-master.opt1
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/rqg_transactions.test14
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/se-innodb.out1
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/secondary_key_update_lock.test26
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/select.test202
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/select_for_update.test55
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/select_for_update_skip_locked_nowait.test48
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/select_lock_in_share_mode.test54
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/set_checkpoint.inc30
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/show_engine.test103
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/show_table_status-master.opt3
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/show_table_status.test65
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/shutdown-master.opt1
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/shutdown.test36
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/singledelete-master.opt1
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/singledelete.test105
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/skip_core_dump_on_error-master.opt1
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/skip_core_dump_on_error.test53
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/skip_validate_tmp_table.test39
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/slow_query_log-master.opt1
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/slow_query_log.test37
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/statistics-master.opt3
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/statistics.test74
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/table_stats-master.opt1
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/table_stats.test29
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/tbl_opt_ai.test29
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/tbl_opt_avg_row_length.test23
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/tbl_opt_checksum.test19
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/tbl_opt_connection.test32
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/tbl_opt_data_index_dir.test60
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/tbl_opt_delay_key_write.test23
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/tbl_opt_insert_method.test23
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/tbl_opt_key_block_size.test23
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/tbl_opt_max_rows.test23
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/tbl_opt_min_rows.test23
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/tbl_opt_pack_keys.test23
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/tbl_opt_password.test27
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/tbl_opt_row_format.test23
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/tbl_opt_union.test28
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/tbl_standard_opts.test42
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/tmpdir.test35
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/transaction.test158
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/transaction_isolation.inc150
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/transaction_select.inc14
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/truncate_partition.inc102
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/truncate_partition.test83
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/truncate_table.test74
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/truncate_table3-master.opt2
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/truncate_table3.test5
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/trx_info.test17
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/trx_info_rpl.cnf11
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/trx_info_rpl.test44
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/ttl_primary-master.opt2
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/ttl_primary.test545
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/ttl_primary_read_filtering-master.opt1
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/ttl_primary_read_filtering.test388
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/ttl_primary_with_partitions-master.opt2
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/ttl_primary_with_partitions.test254
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/ttl_rows_examined.test57
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/ttl_secondary-master.opt2
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/ttl_secondary.test780
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/ttl_secondary_read_filtering-master.opt1
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/ttl_secondary_read_filtering.test503
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/ttl_secondary_read_filtering_multiple_index.test87
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/ttl_secondary_with_partitions-master.opt1
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/ttl_secondary_with_partitions.test300
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/type_binary.inc45
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/type_binary.test8
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/type_binary_indexes-master.opt1
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/type_binary_indexes.test99
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/type_bit.inc53
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/type_bit.test8
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/type_bit_indexes-master.opt1
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/type_bit_indexes.test113
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/type_blob.inc49
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/type_blob.test8
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/type_blob_indexes-master.opt1
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/type_blob_indexes.test176
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/type_bool.inc64
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/type_bool.test8
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/type_char.inc45
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/type_char.test19
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/type_char_indexes-master.opt1
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/type_char_indexes.test107
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/type_char_indexes_collation-master.opt1
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/type_char_indexes_collation.test126
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/type_date_time.inc47
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/type_date_time.test9
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/type_date_time_indexes-master.opt1
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/type_date_time_indexes.test157
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/type_decimal-master.opt1
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/type_decimal.test88
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/type_enum.inc50
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/type_enum.test8
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/type_enum_indexes-master.opt1
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/type_enum_indexes.test93
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/type_fixed.inc85
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/type_fixed.test8
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/type_fixed_indexes-master.opt1
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/type_fixed_indexes.test107
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/type_float.inc121
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/type_float.test8
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/type_float_indexes-master.opt1
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/type_float_indexes.test175
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/type_int.inc68
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/type_int.test8
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/type_int_indexes-master.opt1
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/type_int_indexes.test75
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/type_set.inc49
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/type_set.test8
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/type_set_indexes-master.opt1
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/type_set_indexes.test100
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/type_text.inc49
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/type_text.test8
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/type_text_indexes-master.opt1
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/type_text_indexes.test171
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/type_varbinary.inc75
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/type_varbinary.test8
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/type_varchar-master.opt1
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/type_varchar.inc77
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/type_varchar.test82
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/type_varchar_endspace.inc85
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/unique_check.test173
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/unique_sec.inc198
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/unique_sec.test51
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/unique_sec_rev_cf.test4
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/unsupported_tx_isolations.test25
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/update.test82
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/update_ignore-master.opt1
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/update_ignore.test35
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/update_multi.test15
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/update_multi_exec.inc27
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/update_with_keys.test78
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/use_direct_io_for_flush_and_compaction.test5
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/use_direct_reads.test5
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/use_direct_reads_writes.test62
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/validate_datadic.test108
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/varbinary_format.test131
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/write_sync.test41
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/xa-master.opt1
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/xa.test38
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_hotbackup/base.cnf25
-rwxr-xr-xstorage/rocksdb/mysql-test/rocksdb_hotbackup/include/clean_tmpfiles.sh8
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_hotbackup/include/cleanup.inc3
-rwxr-xr-xstorage/rocksdb/mysql-test/rocksdb_hotbackup/include/create_slocket_socket.sh4
-rwxr-xr-xstorage/rocksdb/mysql-test/rocksdb_hotbackup/include/create_table.sh18
-rwxr-xr-xstorage/rocksdb/mysql-test/rocksdb_hotbackup/include/load_data.sh45
-rwxr-xr-xstorage/rocksdb/mysql-test/rocksdb_hotbackup/include/load_data_and_run.sh11
-rwxr-xr-xstorage/rocksdb/mysql-test/rocksdb_hotbackup/include/load_data_slocket.sh45
-rwxr-xr-xstorage/rocksdb/mysql-test/rocksdb_hotbackup/include/remove_slocket_socket.sh4
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_hotbackup/include/setup.inc16
-rwxr-xr-xstorage/rocksdb/mysql-test/rocksdb_hotbackup/include/setup_replication_gtid.sh22
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_hotbackup/include/setup_replication_gtid_and_sync.inc4
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_hotbackup/include/setup_slocket.inc10
-rwxr-xr-xstorage/rocksdb/mysql-test/rocksdb_hotbackup/include/stream_run.sh81
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_hotbackup/my.cnf2
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_hotbackup/r/gtid.result23
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_hotbackup/r/slocket.result41
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_hotbackup/r/stream.result20
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_hotbackup/r/wdt.result20
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_hotbackup/r/xbstream.result21
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_hotbackup/r/xbstream_direct.result21
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_hotbackup/r/xbstream_socket.result20
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_hotbackup/t/gtid-master.opt1
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_hotbackup/t/gtid-slave.opt1
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_hotbackup/t/gtid.test47
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_hotbackup/t/slocket.test46
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_hotbackup/t/stream.test22
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_hotbackup/t/wdt.test22
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_hotbackup/t/xbstream.inc25
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_hotbackup/t/xbstream.test7
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_hotbackup/t/xbstream_direct-master.opt1
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_hotbackup/t/xbstream_direct.test7
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_hotbackup/t/xbstream_socket.test22
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_rpl/combinations7
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_rpl/include/have_rocksdb.inc10
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_rpl/include/have_rocksdb.opt12
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_rpl/include/rpl_gtid_crash_safe.inc37
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_rpl/include/rpl_no_unique_check_on_lag.inc72
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_rpl/my.cnf17
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_rpl/r/consistent_snapshot_mixed_engines.result68
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_rpl/r/multiclient_2pc.result26
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_rpl/r/optimize_myrocks_replace_into.result282
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_rpl/r/rocksdb_slave_check_before_image_consistency.result165
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_rpl/r/rpl_binlog_xid_count.result204
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_rpl/r/rpl_crash_safe_wal_corrupt.result135
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_rpl/r/rpl_ddl_high_priority.result39
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_rpl/r/rpl_gtid_crash_safe.result361
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_rpl/r/rpl_gtid_crash_safe_optimized.result361
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_rpl/r/rpl_gtid_crash_safe_wal_corrupt.result140
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_rpl/r/rpl_gtid_rocksdb_sys_header.result16
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_rpl/r/rpl_missing_columns_sk_update.result62
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_rpl/r/rpl_mts_dependency_unique_key_conflicts.result44
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_rpl/r/rpl_no_unique_check_on_lag.result34
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_rpl/r/rpl_no_unique_check_on_lag_mts.result31
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_rpl/r/rpl_rocksdb_2pc_crash_recover.result44
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_rpl/r/rpl_rocksdb_slave_gtid_info_optimized.result43
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_rpl/r/rpl_rocksdb_snapshot.result222
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_rpl/r/rpl_rocksdb_snapshot_without_gtid.result15
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_rpl/r/rpl_rocksdb_stress_crash.result28
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_rpl/r/rpl_skip_trx_api_binlog_format.result27
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_rpl/r/singledelete_idempotent_recovery.result25
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_rpl/r/singledelete_idempotent_table.result29
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_rpl/rpl_1slave_base.cnf51
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_rpl/suite.opt2
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_rpl/suite.pm25
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_rpl/t/consistent_snapshot_mixed_engines-master.opt1
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_rpl/t/consistent_snapshot_mixed_engines.test81
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_rpl/t/disabled.def34
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_rpl/t/multiclient_2pc-master.opt1
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_rpl/t/multiclient_2pc.test77
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_rpl/t/optimize_myrocks_replace_into.test149
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_rpl/t/rocksdb_slave_check_before_image_consistency-slave.opt1
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_rpl/t/rocksdb_slave_check_before_image_consistency.test22
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_binlog_xid_count-master.opt3
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_binlog_xid_count.test20
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_check_for_binlog_info.pl19
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_crash_safe_wal_corrupt.cnf13
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_crash_safe_wal_corrupt.test12
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_ddl_high_priority.test2
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_gtid_crash_safe-master.opt1
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_gtid_crash_safe-slave.opt2
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_gtid_crash_safe.test11
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_gtid_crash_safe_optimized-master.opt1
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_gtid_crash_safe_optimized-slave.opt2
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_gtid_crash_safe_optimized.test11
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_gtid_crash_safe_wal_corrupt.cnf18
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_gtid_crash_safe_wal_corrupt.inc154
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_gtid_crash_safe_wal_corrupt.test12
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_gtid_rocksdb_sys_header-master.opt1
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_gtid_rocksdb_sys_header-slave.opt1
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_gtid_rocksdb_sys_header.test40
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_missing_columns_sk_update.cnf13
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_missing_columns_sk_update.test69
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_mts_dependency_unique_key_conflicts.test64
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_no_unique_check_on_lag-slave.opt1
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_no_unique_check_on_lag.test7
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_no_unique_check_on_lag_mts-slave.opt1
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_no_unique_check_on_lag_mts.test3
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_rocksdb_2pc_crash_recover-master.opt1
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_rocksdb_2pc_crash_recover-slave.opt1
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_rocksdb_2pc_crash_recover.test57
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_rocksdb_slave_gtid_info_optimized-master.opt1
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_rocksdb_slave_gtid_info_optimized-slave.opt1
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_rocksdb_slave_gtid_info_optimized.test51
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_rocksdb_snapshot-master.opt1
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_rocksdb_snapshot-slave.opt1
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_rocksdb_snapshot.test374
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_rocksdb_snapshot_without_gtid.test18
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_rocksdb_stress_crash-master.opt2
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_rocksdb_stress_crash-slave.opt3
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_rocksdb_stress_crash.test26
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_skip_trx_api_binlog_format-master.opt2
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_skip_trx_api_binlog_format-slave.opt2
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_skip_trx_api_binlog_format.test52
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_rpl/t/singledelete_idempotent_recovery.cnf15
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_rpl/t/singledelete_idempotent_recovery.test78
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_rpl/t/singledelete_idempotent_table.cnf11
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_rpl/t/singledelete_idempotent_table.test44
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_stress/combinations5
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_stress/include/have_rocksdb.inc10
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_stress/include/have_rocksdb.opt12
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_stress/include/rocksdb_stress.inc57
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_stress/my.cnf9
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_stress/r/rocksdb_stress.result23
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_stress/r/rocksdb_stress_crash.result23
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_stress/suite.opt2
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_stress/suite.pm28
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_stress/t/disabled.def2
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_stress/t/load_generator.py1042
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_stress/t/rocksdb_stress.test33
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_stress/t/rocksdb_stress_crash.test34
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/include/correctboolvalue.inc25
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/include/have_rocksdb.inc10
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/include/have_rocksdb.opt12
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/include/rocksdb_sys_var.inc124
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/my.cnf10
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/all_vars.result13
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_access_hint_on_compaction_start_basic.result7
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_advise_random_on_open_basic.result7
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_allow_concurrent_memtable_write_basic.result7
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_allow_mmap_reads_basic.result7
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_allow_mmap_writes_basic.result7
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_allow_to_start_after_corruption_basic.result7
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_blind_delete_primary_key_basic.result100
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_block_cache_size_basic.result85
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_block_restart_interval_basic.result7
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_block_size_basic.result7
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_block_size_deviation_basic.result7
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_bulk_load_allow_sk_basic.result100
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_bulk_load_allow_unsorted_basic.result100
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_bulk_load_basic.result100
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_bulk_load_size_basic.result72
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_bytes_per_sync_basic.result85
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_cache_dump_basic.result19
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_cache_high_pri_pool_ratio_basic.result22
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_cache_index_and_filter_blocks_basic.result7
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_cache_index_and_filter_with_high_priority_basic.result19
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_checksums_pct_basic.result93
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_collect_sst_properties_basic.result7
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_commit_in_the_middle_basic.result100
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_commit_time_batch_for_recovery_basic.result121
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_compact_cf_basic.result40
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_compaction_readahead_size_basic.result70
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_compaction_sequential_deletes_basic.result64
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_compaction_sequential_deletes_count_sd_basic.result64
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_compaction_sequential_deletes_file_size_basic.result46
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_compaction_sequential_deletes_window_basic.result64
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_create_checkpoint_basic.result15
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_create_if_missing_basic.result14
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_create_missing_column_families_basic.result14
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_datadir_basic.result7
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_db_write_buffer_size_basic.result7
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_deadlock_detect_basic.result121
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_deadlock_detect_depth_basic.result79
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_debug_manual_compaction_delay_basic.result46
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_debug_optimizer_no_zero_cardinality_basic.result64
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_debug_ttl_ignore_pk_basic.result64
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_debug_ttl_read_filter_ts_basic.result46
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_debug_ttl_rec_ts_basic.result46
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_debug_ttl_snapshot_ts_basic.result46
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_default_cf_options_basic.result7
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_delayed_write_rate_basic.result85
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_delete_cf_basic.result6
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_delete_obsolete_files_period_micros_basic.result7
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_enable_2pc_basic.result75
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_enable_bulk_load_api_basic.result14
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_enable_insert_with_update_caching_basic.result75
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_enable_thread_tracking_basic.result7
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_enable_ttl_basic.result64
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_enable_ttl_read_filtering_basic.result64
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_enable_write_thread_adaptive_yield_basic.result7
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_error_if_exists_basic.result14
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_error_on_suboptimal_collation_basic.result7
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_flush_log_at_trx_commit_basic.result57
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_force_compute_memtable_stats_basic.result15
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_force_compute_memtable_stats_cachetime_basic.result68
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_force_flush_memtable_and_lzero_now_basic.result50
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_force_flush_memtable_now_basic.result50
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_force_index_records_in_range_basic.result106
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_git_hash_basic.result7
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_hash_index_allow_collision_basic.result7
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_ignore_unknown_options_basic.result14
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_index_type_basic.result7
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_info_log_level_basic.result93
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_io_write_timeout_basic.result86
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_is_fd_close_on_exec_basic.result7
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_keep_log_file_num_basic.result7
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_large_prefix_basic.result64
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_lock_scanned_rows_basic.result170
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_lock_wait_timeout_basic.result72
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_log_file_time_to_roll_basic.result7
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_manifest_preallocation_size_basic.result7
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_manual_compaction_threads_basic.result93
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_manual_wal_flush_basic.result14
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_master_skip_tx_api_basic.result100
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_max_background_jobs_basic.result46
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_max_latest_deadlocks_basic.result53
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_max_log_file_size_basic.result7
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_max_manifest_file_size_basic.result7
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_max_manual_compactions_basic.result57
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_max_open_files_basic.result3
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_max_row_locks_basic.result93
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_max_subcompactions_basic.result7
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_max_total_wal_size_basic.result7
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_merge_buf_size_basic.result43
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_merge_combine_read_size_basic.result29
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_merge_tmp_file_removal_delay_ms_basic.result93
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_new_table_reader_for_compaction_inputs_basic.result7
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_no_block_cache_basic.result7
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_override_cf_options_basic.result7
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_paranoid_checks_basic.result7
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_pause_background_work_basic.result75
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_perf_context_level_basic.result114
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_persistent_cache_path_basic.result13
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_persistent_cache_size_mb_basic.result14
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_pin_l0_filter_and_index_blocks_in_cache_basic.result7
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_print_snapshot_conflict_queries_basic.result64
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_rate_limiter_bytes_per_sec_basic.result101
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_read_free_rpl_basic.result58
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_read_free_rpl_tables_basic.result49
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_records_in_range_basic.result100
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_remove_mariabackup_checkpoint_basic.result4
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_reset_stats_basic.result97
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_rollback_on_timeout_basic.result97
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_seconds_between_stat_computes_basic.result64
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_signal_drop_index_thread_basic.result64
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_sim_cache_size_basic.result7
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_skip_bloom_filter_on_read_basic.result100
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_skip_fill_cache_basic.result100
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_skip_unique_check_tables_basic.result67
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_sst_mgr_rate_bytes_per_sec_basic.result85
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_stats_dump_period_sec_basic.result7
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_stats_level_basic.result85
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_stats_recalc_rate_basic.result53
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_store_row_debug_checksums_basic.result100
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_strict_collation_check_basic.result75
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_strict_collation_exceptions_basic.result36
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_supported_compression_types_basic.result4
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_table_cache_numshardbits_basic.result7
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_table_stats_sampling_pct_basic.result85
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_tmpdir_basic.result29
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_trace_sst_api_basic.result100
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_two_write_queues_basic.result14
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_unsafe_for_binlog_basic.result100
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_update_cf_options.result38
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_update_cf_options_basic.result126
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_use_adaptive_mutex_basic.result7
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_use_clock_cache_basic.result19
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_use_direct_io_for_flush_and_compaction_basic.result7
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_use_direct_reads_basic.result7
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_use_fsync_basic.result7
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_validate_tables_basic.result7
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_verify_row_debug_checksums_basic.result100
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_wal_bytes_per_sync_basic.result85
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_wal_dir_basic.result7
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_wal_recovery_mode_basic.result46
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_wal_size_limit_mb_basic.result7
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_wal_ttl_seconds_basic.result7
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_whole_key_filtering_basic.result7
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_write_batch_max_bytes_basic.result15
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_write_disable_wal_basic.result114
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_write_ignore_missing_column_families_basic.result100
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_write_policy_basic.result15
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/suite.opt2
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/suite.pm21
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/all_vars.test40
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/disabled.def5
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_access_hint_on_compaction_start_basic.test7
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_advise_random_on_open_basic.test6
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_allow_concurrent_memtable_write_basic.test5
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_allow_mmap_reads_basic.test6
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_allow_mmap_writes_basic.test6
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_allow_to_start_after_corruption_basic.test6
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_blind_delete_primary_key_basic.test18
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_block_cache_size_basic.test21
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_block_restart_interval_basic.test6
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_block_size_basic.test7
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_block_size_deviation_basic.test7
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_bulk_load_allow_sk_basic.test18
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_bulk_load_allow_unsorted_basic.test18
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_bulk_load_basic.test18
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_bulk_load_size_basic.test16
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_bytes_per_sync_basic.test22
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_cache_dump_basic.test21
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_cache_high_pri_pool_ratio_basic.test24
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_cache_index_and_filter_blocks_basic.test6
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_cache_index_and_filter_with_high_priority_basic.test21
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_checksums_pct_basic.test17
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_collect_sst_properties_basic.test8
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_commit_in_the_middle_basic.test18
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_commit_time_batch_for_recovery_basic.test20
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_compact_cf_basic.test19
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_compaction_readahead_size_basic.test23
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_compaction_sequential_deletes_basic.test18
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_compaction_sequential_deletes_count_sd_basic.test18
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_compaction_sequential_deletes_file_size_basic.test16
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_compaction_sequential_deletes_window_basic.test18
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_create_checkpoint_basic.test29
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_create_if_missing_basic.test16
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_create_missing_column_families_basic.test16
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_datadir_basic.test6
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_db_write_buffer_size_basic.test6
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_deadlock_detect_basic.test20
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_deadlock_detect_depth_basic.test17
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_debug_manual_compaction_delay_basic.test16
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_debug_optimizer_no_zero_cardinality_basic.test18
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_debug_ttl_ignore_pk_basic.test18
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_debug_ttl_read_filter_ts_basic.test16
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_debug_ttl_rec_ts_basic.test16
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_debug_ttl_snapshot_ts_basic.test16
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_default_cf_options_basic.test6
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_delayed_write_rate_basic.test22
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_delete_cf_basic-master.opt1
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_delete_cf_basic.test75
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_delete_obsolete_files_period_micros_basic.test6
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_enable_2pc_basic.test20
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_enable_bulk_load_api_basic.test16
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_enable_insert_with_update_caching_basic.test21
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_enable_thread_tracking_basic.test6
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_enable_ttl_basic.test18
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_enable_ttl_read_filtering_basic.test18
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_enable_write_thread_adaptive_yield_basic.test5
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_error_if_exists_basic.test16
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_error_on_suboptimal_collation_basic.test6
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_flush_log_at_trx_commit_basic.test17
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_force_compute_memtable_stats_basic.test23
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_force_compute_memtable_stats_cachetime_basic.test18
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_force_flush_memtable_and_lzero_now_basic.test17
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_force_flush_memtable_now_basic.test17
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_force_index_records_in_range_basic.test23
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_git_hash_basic.test6
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_hash_index_allow_collision_basic.test7
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_ignore_unknown_options_basic.test16
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_index_type_basic.test7
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_info_log_level_basic.test21
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_io_write_timeout_basic.test20
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_is_fd_close_on_exec_basic.test6
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_keep_log_file_num_basic.test7
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_large_prefix_basic.test18
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_lock_scanned_rows_basic.test22
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_lock_wait_timeout_basic.test16
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_log_file_time_to_roll_basic.test6
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_manifest_preallocation_size_basic.test6
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_manual_compaction_threads_basic.test17
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_manual_wal_flush_basic.test16
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_master_skip_tx_api_basic.test18
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_max_background_jobs_basic.test16
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_max_latest_deadlocks_basic.test17
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_max_log_file_size_basic.test6
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_max_manifest_file_size_basic.test7
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_max_manual_compactions_basic.test17
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_max_open_files_basic.test8
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_max_row_locks_basic.test17
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_max_subcompactions_basic.test7
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_max_total_wal_size_basic.test6
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_merge_buf_size_basic.test50
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_merge_combine_read_size_basic.test32
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_merge_tmp_file_removal_delay_ms_basic.test49
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_new_table_reader_for_compaction_inputs_basic.test7
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_no_block_cache_basic.test6
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_override_cf_options_basic.test6
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_paranoid_checks_basic.test7
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_pause_background_work_basic.test20
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_perf_context_level_basic.test18
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_persistent_cache_path_basic.test16
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_persistent_cache_size_mb_basic.test16
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_pin_l0_filter_and_index_blocks_in_cache_basic.test6
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_print_snapshot_conflict_queries_basic.test18
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_rate_limiter_bytes_per_sec_basic.test63
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_read_free_rpl_basic.test19
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_read_free_rpl_tables_basic.test20
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_records_in_range_basic.test18
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_remove_mariabackup_checkpoint_basic.test5
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_reset_stats_basic.test21
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_rollback_on_timeout_basic.test21
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_seconds_between_stat_computes_basic.test18
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_signal_drop_index_thread_basic.test19
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_sim_cache_size_basic.test6
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_skip_bloom_filter_on_read_basic.test18
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_skip_fill_cache_basic.test18
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_skip_unique_check_tables_basic.test18
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_sst_mgr_rate_bytes_per_sec_basic.test22
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_stats_dump_period_sec_basic.test6
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_stats_level_basic.test21
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_stats_recalc_rate_basic.test17
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_store_row_debug_checksums_basic.test18
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_strict_collation_check_basic.test19
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_strict_collation_exceptions_basic.test35
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_supported_compression_types_basic.test7
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_table_cache_numshardbits_basic.test6
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_table_stats_sampling_pct_basic.test22
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_tmpdir_basic.test38
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_trace_sst_api_basic.test18
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_two_write_queues_basic.test16
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_unsafe_for_binlog_basic.test18
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_update_cf_options.test22
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_update_cf_options_basic.test119
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_use_adaptive_mutex_basic.test6
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_use_clock_cache_basic.test21
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_use_direct_io_for_flush_and_compaction_basic.test6
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_use_direct_reads_basic.test6
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_use_fsync_basic.test6
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_validate_tables_basic.test6
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_verify_row_debug_checksums_basic.test18
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_wal_bytes_per_sync_basic.test22
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_wal_dir_basic.test6
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_wal_recovery_mode_basic.test17
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_wal_size_limit_mb_basic.test6
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_wal_ttl_seconds_basic.test6
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_whole_key_filtering_basic.test6
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_write_batch_max_bytes_basic.test26
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_write_disable_wal_basic.test18
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_write_ignore_missing_column_families_basic.test18
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_write_policy_basic.test17
-rw-r--r--storage/rocksdb/mysql-test/storage_engine/cache_index.rdiff71
-rw-r--r--storage/rocksdb/mysql-test/storage_engine/checksum_table_live.rdiff13
-rw-r--r--storage/rocksdb/mysql-test/storage_engine/cleanup_engine.inc25
-rw-r--r--storage/rocksdb/mysql-test/storage_engine/define_engine.inc45
-rw-r--r--storage/rocksdb/mysql-test/storage_engine/disabled.def27
-rw-r--r--storage/rocksdb/mysql-test/storage_engine/index.rdiff60
-rw-r--r--storage/rocksdb/mysql-test/storage_engine/index_type_btree.rdiff60
-rw-r--r--storage/rocksdb/mysql-test/storage_engine/index_type_hash.rdiff60
-rw-r--r--storage/rocksdb/mysql-test/storage_engine/mask_engine.inc15
-rw-r--r--storage/rocksdb/mysql-test/storage_engine/misc.rdiff25
-rw-r--r--storage/rocksdb/mysql-test/storage_engine/parts/checksum_table.rdiff13
-rw-r--r--storage/rocksdb/mysql-test/storage_engine/parts/create_table.rdiff20
-rw-r--r--storage/rocksdb/mysql-test/storage_engine/parts/disabled.def3
-rw-r--r--storage/rocksdb/mysql-test/storage_engine/parts/suite.opt1
-rw-r--r--storage/rocksdb/mysql-test/storage_engine/show_engine.rdiff15
-rw-r--r--storage/rocksdb/mysql-test/storage_engine/show_table_status.rdiff20
-rw-r--r--storage/rocksdb/mysql-test/storage_engine/suite.opt1
-rw-r--r--storage/rocksdb/mysql-test/storage_engine/tbl_opt_insert_method.rdiff11
-rw-r--r--storage/rocksdb/mysql-test/storage_engine/tbl_opt_union.rdiff16
-rw-r--r--storage/rocksdb/mysql-test/storage_engine/tbl_temporary.rdiff24
-rw-r--r--storage/rocksdb/mysql-test/storage_engine/truncate_table.rdiff24
-rw-r--r--storage/rocksdb/mysql-test/storage_engine/trx/delete.rdiff10
-rw-r--r--storage/rocksdb/mysql-test/storage_engine/trx/disabled.def4
-rw-r--r--storage/rocksdb/mysql-test/storage_engine/trx/insert.rdiff24
-rw-r--r--storage/rocksdb/mysql-test/storage_engine/trx/level_read_committed.rdiff10
-rw-r--r--storage/rocksdb/mysql-test/storage_engine/trx/level_repeatable_read.rdiff35
-rw-r--r--storage/rocksdb/mysql-test/storage_engine/trx/suite.opt1
-rw-r--r--storage/rocksdb/mysql-test/storage_engine/trx/update.rdiff38
-rw-r--r--storage/rocksdb/mysql-test/storage_engine/type_binary_indexes.rdiff11
-rw-r--r--storage/rocksdb/mysql-test/storage_engine/type_bit_indexes.rdiff20
-rw-r--r--storage/rocksdb/mysql-test/storage_engine/type_enum.rdiff20
-rw-r--r--storage/rocksdb/mysql-test/storage_engine/type_enum_indexes.rdiff11
-rw-r--r--storage/rocksdb/mysql-test/storage_engine/type_set.rdiff11
-rw-r--r--storage/rocksdb/mysql-test/storage_engine/type_set_indexes.rdiff20
-rw-r--r--storage/rocksdb/nosql_access.cc52
-rw-r--r--storage/rocksdb/nosql_access.h36
-rw-r--r--storage/rocksdb/properties_collector.cc544
-rw-r--r--storage/rocksdb/properties_collector.h215
-rw-r--r--storage/rocksdb/rdb_buff.h549
-rw-r--r--storage/rocksdb/rdb_cf_manager.cc273
-rw-r--r--storage/rocksdb/rdb_cf_manager.h108
-rw-r--r--storage/rocksdb/rdb_cf_options.cc341
-rw-r--r--storage/rocksdb/rdb_cf_options.h104
-rw-r--r--storage/rocksdb/rdb_compact_filter.h220
-rw-r--r--storage/rocksdb/rdb_comparator.h85
-rw-r--r--storage/rocksdb/rdb_converter.cc838
-rw-r--r--storage/rocksdb/rdb_converter.h247
-rw-r--r--storage/rocksdb/rdb_datadic.cc5386
-rw-r--r--storage/rocksdb/rdb_datadic.h1620
-rw-r--r--storage/rocksdb/rdb_global.h392
-rw-r--r--storage/rocksdb/rdb_i_s.cc2020
-rw-r--r--storage/rocksdb/rdb_i_s.h37
-rw-r--r--storage/rocksdb/rdb_index_merge.cc630
-rw-r--r--storage/rocksdb/rdb_index_merge.h227
-rw-r--r--storage/rocksdb/rdb_io_watchdog.cc240
-rw-r--r--storage/rocksdb/rdb_io_watchdog.h119
-rw-r--r--storage/rocksdb/rdb_mariadb_port.h55
-rw-r--r--storage/rocksdb/rdb_mariadb_server_port.cc123
-rw-r--r--storage/rocksdb/rdb_mariadb_server_port.h76
-rw-r--r--storage/rocksdb/rdb_mutex_wrapper.cc214
-rw-r--r--storage/rocksdb/rdb_mutex_wrapper.h143
-rw-r--r--storage/rocksdb/rdb_perf_context.cc285
-rw-r--r--storage/rocksdb/rdb_perf_context.h168
-rw-r--r--storage/rocksdb/rdb_psi.cc115
-rw-r--r--storage/rocksdb/rdb_psi.h58
-rw-r--r--storage/rocksdb/rdb_source_revision.h.in1
-rw-r--r--storage/rocksdb/rdb_sst_info.cc559
-rw-r--r--storage/rocksdb/rdb_sst_info.h265
-rw-r--r--storage/rocksdb/rdb_threads.cc83
-rw-r--r--storage/rocksdb/rdb_threads.h195
-rw-r--r--storage/rocksdb/rdb_utils.cc369
-rw-r--r--storage/rocksdb/rdb_utils.h335
m---------storage/rocksdb/rocksdb0
-rw-r--r--storage/rocksdb/rocksdb-range-access.txt292
-rw-r--r--storage/rocksdb/tools/mysql_ldb.cc18
-rw-r--r--storage/rocksdb/unittest/CMakeLists.txt22
-rw-r--r--storage/rocksdb/unittest/test_properties_collector.cc54
-rw-r--r--storage/rocksdb/ut0counter.h203
-rw-r--r--storage/sequence/sequence.cc6
-rw-r--r--storage/sphinx/ha_sphinx.cc9
-rw-r--r--storage/sphinx/mysql-test/sphinx/disabled.def2
-rw-r--r--storage/sphinx/mysql-test/sphinx/sphinx.result20
-rw-r--r--storage/sphinx/mysql-test/sphinx/sphinx.test13
-rw-r--r--storage/sphinx/mysql-test/sphinx/suite.pm49
-rw-r--r--storage/sphinx/mysql-test/sphinx/union-5539.result2
-rw-r--r--storage/sphinx/snippets_udf.cc4
-rw-r--r--storage/spider/CMakeLists.txt3
-rw-r--r--storage/spider/ha_spider.cc85
-rw-r--r--storage/spider/ha_spider.h2
-rw-r--r--storage/spider/hs_client/allocator.hpp2
-rw-r--r--storage/spider/hs_client/config.cpp4
-rw-r--r--storage/spider/hs_client/hstcpcli.cpp2
-rw-r--r--storage/spider/hs_client/socket.cpp6
-rw-r--r--storage/spider/mysql-test/spider/bg/r/basic_sql.result104
-rw-r--r--storage/spider/mysql-test/spider/bg/r/basic_sql_part.result20
-rw-r--r--storage/spider/mysql-test/spider/bg/r/direct_aggregate.result9
-rw-r--r--storage/spider/mysql-test/spider/bg/r/direct_aggregate_part.result8
-rw-r--r--storage/spider/mysql-test/spider/bg/r/direct_update.result9
-rw-r--r--storage/spider/mysql-test/spider/bg/r/direct_update_part.result8
-rw-r--r--storage/spider/mysql-test/spider/bg/r/function.result11
-rw-r--r--storage/spider/mysql-test/spider/bg/r/ha.result26
-rw-r--r--storage/spider/mysql-test/spider/bg/r/ha_part.result24
-rw-r--r--storage/spider/mysql-test/spider/bg/r/spider3_fixes.result14
-rw-r--r--storage/spider/mysql-test/spider/bg/r/spider3_fixes_part.result14
-rw-r--r--storage/spider/mysql-test/spider/bg/r/spider_fixes.result49
-rw-r--r--storage/spider/mysql-test/spider/bg/r/spider_fixes_part.result21
-rw-r--r--storage/spider/mysql-test/spider/bg/r/vp_fixes.result13
-rw-r--r--storage/spider/mysql-test/spider/bg/suite.pm2
-rw-r--r--storage/spider/mysql-test/spider/handler/r/basic_sql.result179
-rw-r--r--storage/spider/mysql-test/spider/handler/r/basic_sql_part.result141
-rw-r--r--storage/spider/mysql-test/spider/handler/r/direct_aggregate.result60
-rw-r--r--storage/spider/mysql-test/spider/handler/r/direct_aggregate_part.result90
-rw-r--r--storage/spider/mysql-test/spider/handler/r/direct_update.result113
-rw-r--r--storage/spider/mysql-test/spider/handler/r/direct_update_part.result137
-rw-r--r--storage/spider/mysql-test/spider/handler/r/function.result11
-rw-r--r--storage/spider/mysql-test/spider/handler/r/ha.result219
-rw-r--r--storage/spider/mysql-test/spider/handler/r/ha_part.result286
-rw-r--r--storage/spider/mysql-test/spider/handler/r/spider3_fixes.result191
-rw-r--r--storage/spider/mysql-test/spider/handler/r/spider3_fixes_part.result238
-rw-r--r--storage/spider/mysql-test/spider/handler/r/spider_fixes.result228
-rw-r--r--storage/spider/mysql-test/spider/handler/r/spider_fixes_part.result241
-rw-r--r--storage/spider/mysql-test/spider/handler/r/vp_fixes.result13
-rw-r--r--storage/spider/mysql-test/spider/handler/suite.opt1
-rw-r--r--storage/spider/mysql-test/spider/handler/suite.pm12
-rw-r--r--storage/spider/mysql-test/spider/r/basic_sql.result104
-rw-r--r--storage/spider/mysql-test/spider/r/basic_sql_part.result20
-rw-r--r--storage/spider/mysql-test/spider/r/direct_aggregate.result9
-rw-r--r--storage/spider/mysql-test/spider/r/direct_aggregate_part.result8
-rw-r--r--storage/spider/mysql-test/spider/r/direct_update.result9
-rw-r--r--storage/spider/mysql-test/spider/r/direct_update_part.result8
-rw-r--r--storage/spider/mysql-test/spider/r/function.result11
-rw-r--r--storage/spider/mysql-test/spider/r/ha.result26
-rw-r--r--storage/spider/mysql-test/spider/r/ha_part.result24
-rw-r--r--storage/spider/mysql-test/spider/r/pushdown_not_like.result63
-rw-r--r--storage/spider/mysql-test/spider/r/spider3_fixes.result14
-rw-r--r--storage/spider/mysql-test/spider/r/spider3_fixes_part.result14
-rw-r--r--storage/spider/mysql-test/spider/r/spider_fixes.result49
-rw-r--r--storage/spider/mysql-test/spider/r/spider_fixes_part.result22
-rw-r--r--storage/spider/mysql-test/spider/r/vp_fixes.result13
-rw-r--r--storage/spider/mysql-test/spider/suite.pm2
-rw-r--r--storage/spider/mysql-test/spider/t/pushdown_not_like.test138
-rw-r--r--storage/spider/scripts/install_spider.sql28
-rw-r--r--storage/spider/spd_conn.cc22
-rw-r--r--storage/spider/spd_copy_tables.cc84
-rw-r--r--storage/spider/spd_db_conn.cc13
-rw-r--r--storage/spider/spd_db_handlersocket.cc9
-rw-r--r--storage/spider/spd_db_handlersocket.h3
-rw-r--r--storage/spider/spd_db_include.h45
-rw-r--r--storage/spider/spd_db_mysql.cc110
-rw-r--r--storage/spider/spd_db_mysql.h3
-rw-r--r--storage/spider/spd_db_oracle.cc59
-rw-r--r--storage/spider/spd_db_oracle.h3
-rw-r--r--storage/spider/spd_direct_sql.cc118
-rw-r--r--storage/spider/spd_include.h3
-rw-r--r--storage/spider/spd_param.cc38
-rw-r--r--storage/spider/spd_param.h2
-rw-r--r--storage/spider/spd_sys_table.cc174
-rw-r--r--storage/spider/spd_table.cc252
-rw-r--r--storage/spider/spd_table.h216
-rw-r--r--storage/spider/spd_trx.cc21
-rw-r--r--storage/test_sql_discovery/mysql-test/sql_discovery/simple.result8
-rw-r--r--storage/tokudb/CMakeLists.txt57
-rw-r--r--storage/tokudb/PerconaFT/CMakeLists.txt2
-rw-r--r--storage/tokudb/PerconaFT/DartConfig.cmake10
-rw-r--r--storage/tokudb/PerconaFT/ft/bndata.cc1
-rw-r--r--storage/tokudb/PerconaFT/ft/cachetable/cachetable.cc1
-rw-r--r--storage/tokudb/PerconaFT/ft/cachetable/checkpoint.cc1
-rw-r--r--storage/tokudb/PerconaFT/ft/cursor.cc1
-rw-r--r--storage/tokudb/PerconaFT/ft/ft-cachetable-wrappers.cc1
-rw-r--r--storage/tokudb/PerconaFT/ft/ft-flusher.cc1
-rw-r--r--storage/tokudb/PerconaFT/ft/ft-hot-flusher.cc1
-rw-r--r--storage/tokudb/PerconaFT/ft/ft-ops.cc1
-rw-r--r--storage/tokudb/PerconaFT/ft/ft-status.cc1
-rw-r--r--storage/tokudb/PerconaFT/ft/ft-test-helpers.cc1
-rw-r--r--storage/tokudb/PerconaFT/ft/ft-verify.cc1
-rw-r--r--storage/tokudb/PerconaFT/ft/ft.cc1
-rw-r--r--storage/tokudb/PerconaFT/ft/le-cursor.cc1
-rw-r--r--storage/tokudb/PerconaFT/ft/leafentry.cc1
-rw-r--r--storage/tokudb/PerconaFT/ft/loader/dbufio.cc7
-rw-r--r--storage/tokudb/PerconaFT/ft/loader/loader.cc1
-rw-r--r--storage/tokudb/PerconaFT/ft/loader/pqueue.cc1
-rw-r--r--storage/tokudb/PerconaFT/ft/logger/log_upgrade.cc1
-rw-r--r--storage/tokudb/PerconaFT/ft/logger/logcursor.cc1
-rw-r--r--storage/tokudb/PerconaFT/ft/logger/logfilemgr.cc1
-rw-r--r--storage/tokudb/PerconaFT/ft/logger/logger.cc1
-rw-r--r--storage/tokudb/PerconaFT/ft/msg.cc1
-rw-r--r--storage/tokudb/PerconaFT/ft/node.cc1
-rw-r--r--storage/tokudb/PerconaFT/ft/pivotkeys.cc1
-rw-r--r--storage/tokudb/PerconaFT/ft/serialize/block_table.cc1
-rw-r--r--storage/tokudb/PerconaFT/ft/serialize/compress.cc8
-rw-r--r--storage/tokudb/PerconaFT/ft/serialize/ft_node-serialize.cc4
-rw-r--r--storage/tokudb/PerconaFT/ft/txn/txn_manager.cc4
-rw-r--r--storage/tokudb/PerconaFT/ft/ule.cc1
-rw-r--r--storage/tokudb/PerconaFT/portability/toku_config.h.in4
-rw-r--r--storage/tokudb/PerconaFT/portability/toku_portability.h2
l---------storage/tokudb/PerconaFT/scripts/tokuvalgrind1
-rw-r--r--storage/tokudb/PerconaFT/src/ydb.cc1
-rw-r--r--storage/tokudb/PerconaFT/src/ydb_db.cc1
-rw-r--r--storage/tokudb/PerconaFT/src/ydb_env_func.cc1
-rwxr-xr-xstorage/tokudb/PerconaFT/third_party/xz-4.999.9beta/configure2
-rw-r--r--storage/tokudb/PerconaFT/third_party/xz-4.999.9beta/configure.ac2
-rw-r--r--storage/tokudb/PerconaFT/third_party/xz-4.999.9beta/doc/man/txt/xz.txt2
-rw-r--r--storage/tokudb/PerconaFT/third_party/xz-4.999.9beta/src/xz/main.c2
-rw-r--r--storage/tokudb/PerconaFT/third_party/xz-4.999.9beta/src/xz/xz.12
-rw-r--r--storage/tokudb/PerconaFT/tools/CMakeLists.txt6
-rw-r--r--storage/tokudb/ha_tokudb.cc18
-rw-r--r--storage/tokudb/ha_tokudb_alter_56.cc26
-rw-r--r--storage/tokudb/hatoku_cmp.cc9
-rw-r--r--storage/tokudb/hatoku_defines.h2
-rw-r--r--storage/tokudb/hatoku_hton.cc8
-rw-r--r--storage/tokudb/man/CMakeLists.txt2
-rw-r--r--storage/tokudb/man/tokuft_logprint.116
-rw-r--r--storage/tokudb/man/tokuftdump.1237
-rw-r--r--storage/tokudb/mysql-test/rpl/r/rpl_deadlock_tokudb.result8
-rw-r--r--storage/tokudb/mysql-test/rpl/r/rpl_extra_col_master_tokudb.result88
-rw-r--r--storage/tokudb/mysql-test/rpl/r/rpl_extra_col_slave_tokudb.result154
-rw-r--r--storage/tokudb/mysql-test/rpl/r/rpl_mixed_replace_into.result2
-rw-r--r--storage/tokudb/mysql-test/rpl/r/rpl_not_null_tokudb.result35
-rw-r--r--storage/tokudb/mysql-test/rpl/r/rpl_parallel_tokudb.result9
-rw-r--r--storage/tokudb/mysql-test/rpl/r/rpl_parallel_tokudb_delete_pk.result12
-rw-r--r--storage/tokudb/mysql-test/rpl/r/rpl_parallel_tokudb_update_pk_uc0_lookup0.result13
-rw-r--r--storage/tokudb/mysql-test/rpl/r/rpl_parallel_tokudb_write_pk.result9
-rw-r--r--storage/tokudb/mysql-test/rpl/r/rpl_partition_tokudb.result23
-rw-r--r--storage/tokudb/mysql-test/rpl/r/rpl_relay_space_tokudb.result2
-rw-r--r--storage/tokudb/mysql-test/rpl/r/rpl_rfr_disable_on_expl_pk_absence.result8
-rw-r--r--storage/tokudb/mysql-test/rpl/r/rpl_row_basic_3tokudb.result149
-rw-r--r--storage/tokudb/mysql-test/rpl/r/rpl_row_blob_tokudb.result10
-rw-r--r--storage/tokudb/mysql-test/rpl/r/rpl_row_rec_comp_tokudb.result16
-rw-r--r--storage/tokudb/mysql-test/rpl/r/rpl_row_replace_into.result2
-rw-r--r--storage/tokudb/mysql-test/rpl/r/rpl_row_sp007_tokudb.result16
-rw-r--r--storage/tokudb/mysql-test/rpl/r/rpl_row_tabledefs_3tokudb.result53
-rw-r--r--storage/tokudb/mysql-test/rpl/r/rpl_set_null_tokudb.result12
-rw-r--r--storage/tokudb/mysql-test/rpl/r/rpl_stm_tokudb.result22
-rw-r--r--storage/tokudb/mysql-test/rpl/r/rpl_stmt_replace_into.result2
-rw-r--r--storage/tokudb/mysql-test/rpl/r/rpl_tokudb_bug28430.result33
-rw-r--r--storage/tokudb/mysql-test/rpl/r/rpl_tokudb_bug30888.result2
-rw-r--r--storage/tokudb/mysql-test/rpl/r/rpl_tokudb_commit_after_flush.result3
-rw-r--r--storage/tokudb/mysql-test/rpl/r/rpl_tokudb_insert_id.result50
-rw-r--r--storage/tokudb/mysql-test/rpl/r/rpl_tokudb_insert_id_pk.result10
-rw-r--r--storage/tokudb/mysql-test/rpl/r/rpl_tokudb_mixed_ddl.result20
-rw-r--r--storage/tokudb/mysql-test/rpl/r/rpl_tokudb_mixed_dml.result208
-rw-r--r--storage/tokudb/mysql-test/rpl/r/rpl_tokudb_multi_update.result2
-rw-r--r--storage/tokudb/mysql-test/rpl/r/rpl_tokudb_multi_update2.result8
-rw-r--r--storage/tokudb/mysql-test/rpl/r/rpl_tokudb_multi_update3.result13
-rw-r--r--storage/tokudb/mysql-test/rpl/r/rpl_tokudb_read_only_ft.result9
-rw-r--r--storage/tokudb/mysql-test/rpl/r/rpl_tokudb_row_crash_safe.result2183
-rw-r--r--storage/tokudb/mysql-test/rpl/r/rpl_tokudb_row_log.result22
-rw-r--r--storage/tokudb/mysql-test/rpl/r/rpl_tokudb_row_sp003.result12
-rw-r--r--storage/tokudb/mysql-test/rpl/r/rpl_tokudb_row_sp006.result5
-rw-r--r--storage/tokudb/mysql-test/rpl/r/rpl_tokudb_row_trig004.result5
-rw-r--r--storage/tokudb/mysql-test/rpl/r/rpl_tokudb_stm_log.result12
-rw-r--r--storage/tokudb/mysql-test/rpl/r/rpl_tokudb_stm_mixed_crash_safe.result1773
-rw-r--r--storage/tokudb/mysql-test/rpl/r/rpl_truncate_3tokudb.result28
-rw-r--r--storage/tokudb/mysql-test/rpl/r/rpl_typeconv_tokudb.result5
-rw-r--r--storage/tokudb/mysql-test/rpl/r/rpl_xa_interleave.result19
-rw-r--r--storage/tokudb/mysql-test/rpl/r/tokudb_innodb_xa_crash.result3
-rw-r--r--storage/tokudb/mysql-test/rpl/t/rpl_tokudb_row_crash_safe-master.opt1
-rw-r--r--storage/tokudb/mysql-test/rpl/t/rpl_tokudb_row_crash_safe-slave.opt1
-rw-r--r--storage/tokudb/mysql-test/rpl/t/rpl_tokudb_row_crash_safe.test19
-rw-r--r--storage/tokudb/mysql-test/rpl/t/rpl_tokudb_stm_mixed_crash_safe-master.opt1
-rw-r--r--storage/tokudb/mysql-test/rpl/t/rpl_tokudb_stm_mixed_crash_safe-slave.opt1
-rw-r--r--storage/tokudb/mysql-test/rpl/t/rpl_tokudb_stm_mixed_crash_safe.test18
-rw-r--r--storage/tokudb/mysql-test/tokudb/disabled.def2
-rw-r--r--storage/tokudb/mysql-test/tokudb/r/background_job_manager.result2
-rw-r--r--storage/tokudb/mysql-test/tokudb/r/bf_delete_trigger.result12
-rw-r--r--storage/tokudb/mysql-test/tokudb/r/bf_insert_select_trigger.result12
-rw-r--r--storage/tokudb/mysql-test/tokudb/r/bf_insert_select_update_trigger.result36
-rw-r--r--storage/tokudb/mysql-test/tokudb/r/bf_replace_select_trigger.result36
-rw-r--r--storage/tokudb/mysql-test/tokudb/r/change_column_int_default.result40
-rw-r--r--storage/tokudb/mysql-test/tokudb/r/cluster_2968-1.result8
-rw-r--r--storage/tokudb/mysql-test/tokudb/r/cluster_2968-2.result4
-rw-r--r--storage/tokudb/mysql-test/tokudb/r/cluster_2968-3.result6
-rw-r--r--storage/tokudb/mysql-test/tokudb/r/cluster_key_part.result8
-rw-r--r--storage/tokudb/mysql-test/tokudb/r/compressions.result10
-rw-r--r--storage/tokudb/mysql-test/tokudb/r/ctype_collate.result2
-rw-r--r--storage/tokudb/mysql-test/tokudb/r/ctype_cp1250_ch.result4
-rw-r--r--storage/tokudb/mysql-test/tokudb/r/dir_per_db_rename_to_nonexisting_schema.result1
-rw-r--r--storage/tokudb/mysql-test/tokudb/r/ext_key_1_innodb.result3
-rw-r--r--storage/tokudb/mysql-test/tokudb/r/ext_key_1_tokudb.result3
-rw-r--r--storage/tokudb/mysql-test/tokudb/r/ext_key_2_innodb.result3
-rw-r--r--storage/tokudb/mysql-test/tokudb/r/ext_key_2_tokudb.result3
-rw-r--r--storage/tokudb/mysql-test/tokudb/r/hotindex-del-0.result5
-rw-r--r--storage/tokudb/mysql-test/tokudb/r/hotindex-del-1.result5
-rw-r--r--storage/tokudb/mysql-test/tokudb/r/hotindex-del-fast.result5
-rw-r--r--storage/tokudb/mysql-test/tokudb/r/hotindex-del-slow.result5
-rw-r--r--storage/tokudb/mysql-test/tokudb/r/hotindex-insert-0.result5
-rw-r--r--storage/tokudb/mysql-test/tokudb/r/hotindex-insert-1.result5
-rw-r--r--storage/tokudb/mysql-test/tokudb/r/hotindex-insert-2.result5
-rw-r--r--storage/tokudb/mysql-test/tokudb/r/hotindex-insert-bigchar.result5
-rw-r--r--storage/tokudb/mysql-test/tokudb/r/hotindex-update-0.result5
-rw-r--r--storage/tokudb/mysql-test/tokudb/r/hotindex-update-1.result5
-rw-r--r--storage/tokudb/mysql-test/tokudb/r/i_s_tokudb_lock_waits_timeout.result8
-rw-r--r--storage/tokudb/mysql-test/tokudb/r/i_s_tokudb_locks.result5
-rw-r--r--storage/tokudb/mysql-test/tokudb/r/i_s_tokudb_trx.result5
-rw-r--r--storage/tokudb/mysql-test/tokudb/r/locking-read-repeatable-read-1.result3
-rw-r--r--storage/tokudb/mysql-test/tokudb/r/locking-read-repeatable-read-2.result3
-rw-r--r--storage/tokudb/mysql-test/tokudb/r/lockretry-insert.writelocktable.result4004
-rw-r--r--storage/tokudb/mysql-test/tokudb/r/lockretry-writelocktable.insert.result4003
-rw-r--r--storage/tokudb/mysql-test/tokudb/r/lockretry-writelocktable.insert2.result4003
-rw-r--r--storage/tokudb/mysql-test/tokudb/r/locks-blocking-row-locks.result399
-rw-r--r--storage/tokudb/mysql-test/tokudb/r/locks-delete-deadlock-1.result7
-rw-r--r--storage/tokudb/mysql-test/tokudb/r/locks-no-read-lock-serializable-autocommit.result5
-rw-r--r--storage/tokudb/mysql-test/tokudb/r/locks-select-update-1.result5
-rw-r--r--storage/tokudb/mysql-test/tokudb/r/locks-select-update-2.result5
-rw-r--r--storage/tokudb/mysql-test/tokudb/r/locks-select-update-3.result5
-rw-r--r--storage/tokudb/mysql-test/tokudb/r/locks-update-deadlock-1.result7
-rw-r--r--storage/tokudb/mysql-test/tokudb/r/mvcc-10.result27
-rw-r--r--storage/tokudb/mysql-test/tokudb/r/mvcc-11.result6
-rw-r--r--storage/tokudb/mysql-test/tokudb/r/mvcc-12.result6
-rw-r--r--storage/tokudb/mysql-test/tokudb/r/mvcc-13.result7
-rw-r--r--storage/tokudb/mysql-test/tokudb/r/mvcc-14.result7
-rw-r--r--storage/tokudb/mysql-test/tokudb/r/mvcc-15.result7
-rw-r--r--storage/tokudb/mysql-test/tokudb/r/mvcc-16.result7
-rw-r--r--storage/tokudb/mysql-test/tokudb/r/mvcc-17.result10
-rw-r--r--storage/tokudb/mysql-test/tokudb/r/mvcc-18.result10
-rw-r--r--storage/tokudb/mysql-test/tokudb/r/mvcc-2.result9
-rw-r--r--storage/tokudb/mysql-test/tokudb/r/mvcc-21.result27
-rw-r--r--storage/tokudb/mysql-test/tokudb/r/mvcc-22.result27
-rw-r--r--storage/tokudb/mysql-test/tokudb/r/mvcc-23.result27
-rw-r--r--storage/tokudb/mysql-test/tokudb/r/mvcc-24.result13
-rw-r--r--storage/tokudb/mysql-test/tokudb/r/mvcc-25.result16
-rw-r--r--storage/tokudb/mysql-test/tokudb/r/mvcc-28.result8
-rw-r--r--storage/tokudb/mysql-test/tokudb/r/mvcc-2808-read-committed.result11
-rw-r--r--storage/tokudb/mysql-test/tokudb/r/mvcc-2808-read-uncommitted.result11
-rw-r--r--storage/tokudb/mysql-test/tokudb/r/mvcc-29.result9
-rw-r--r--storage/tokudb/mysql-test/tokudb/r/mvcc-3.result10
-rw-r--r--storage/tokudb/mysql-test/tokudb/r/mvcc-30.result9
-rw-r--r--storage/tokudb/mysql-test/tokudb/r/mvcc-31.result9
-rw-r--r--storage/tokudb/mysql-test/tokudb/r/mvcc-33.result7
-rw-r--r--storage/tokudb/mysql-test/tokudb/r/mvcc-34.result8
-rw-r--r--storage/tokudb/mysql-test/tokudb/r/mvcc-35.result8
-rw-r--r--storage/tokudb/mysql-test/tokudb/r/mvcc-36.result8
-rw-r--r--storage/tokudb/mysql-test/tokudb/r/mvcc-37.result8
-rw-r--r--storage/tokudb/mysql-test/tokudb/r/mvcc-38.result6
-rw-r--r--storage/tokudb/mysql-test/tokudb/r/mvcc-39.result8
-rw-r--r--storage/tokudb/mysql-test/tokudb/r/mvcc-4.result6
-rw-r--r--storage/tokudb/mysql-test/tokudb/r/mvcc-40.result8
-rw-r--r--storage/tokudb/mysql-test/tokudb/r/mvcc-5.result14
-rw-r--r--storage/tokudb/mysql-test/tokudb/r/mvcc-6.result8
-rw-r--r--storage/tokudb/mysql-test/tokudb/r/mvcc-7.result6
-rw-r--r--storage/tokudb/mysql-test/tokudb/r/mvcc-8.result6
-rw-r--r--storage/tokudb/mysql-test/tokudb/r/mvcc-9.result27
-rw-r--r--storage/tokudb/mysql-test/tokudb/r/mvcc-checksum-locks.result6
-rw-r--r--storage/tokudb/mysql-test/tokudb/r/replace-ignore.result8
-rw-r--r--storage/tokudb/mysql-test/tokudb/r/rows-32m-0.result3
-rw-r--r--storage/tokudb/mysql-test/tokudb/r/rows-32m-1.result3
-rw-r--r--storage/tokudb/mysql-test/tokudb/r/rows-32m-rand-insert.result3
-rw-r--r--storage/tokudb/mysql-test/tokudb/r/rows-32m-seq-insert.result3
-rw-r--r--storage/tokudb/mysql-test/tokudb/r/tokudb_mrr.result8
-rw-r--r--storage/tokudb/mysql-test/tokudb/r/tokudb_support_xa.result5
-rw-r--r--storage/tokudb/mysql-test/tokudb/r/type_bit_innodb.result2
-rw-r--r--storage/tokudb/mysql-test/tokudb/r/type_blob.result32
-rw-r--r--storage/tokudb/mysql-test/tokudb/r/type_date.result6
-rw-r--r--storage/tokudb/mysql-test/tokudb/r/type_datetime.result12
-rw-r--r--storage/tokudb/mysql-test/tokudb/r/type_decimal.result24
-rw-r--r--storage/tokudb/mysql-test/tokudb/r/type_enum.result2
-rw-r--r--storage/tokudb/mysql-test/tokudb/r/type_float.result8
-rw-r--r--storage/tokudb/mysql-test/tokudb/r/type_newdecimal.result158
-rw-r--r--storage/tokudb/mysql-test/tokudb/r/type_ranges.result26
-rw-r--r--storage/tokudb/mysql-test/tokudb/r/type_timestamp.result22
-rw-r--r--storage/tokudb/mysql-test/tokudb/r/type_timestamp_explicit.result16
-rw-r--r--storage/tokudb/mysql-test/tokudb/r/type_varchar.result8
-rw-r--r--storage/tokudb/mysql-test/tokudb/t/change_column_all.py1
-rw-r--r--storage/tokudb/mysql-test/tokudb/t/cluster_2968-0.test1
-rw-r--r--storage/tokudb/mysql-test/tokudb/t/cluster_2968-1.test1
-rw-r--r--storage/tokudb/mysql-test/tokudb/t/cluster_2968-2.test1
-rw-r--r--storage/tokudb/mysql-test/tokudb/t/cluster_2968-3.test2
-rw-r--r--storage/tokudb/mysql-test/tokudb/t/ext_key_1_innodb.test2
-rw-r--r--storage/tokudb/mysql-test/tokudb/t/ext_key_1_tokudb.test2
-rw-r--r--storage/tokudb/mysql-test/tokudb/t/ext_key_2_innodb.test2
-rw-r--r--storage/tokudb/mysql-test/tokudb/t/ext_key_2_tokudb.test2
-rw-r--r--storage/tokudb/mysql-test/tokudb/t/type_blob.test14
-rw-r--r--storage/tokudb/mysql-test/tokudb/t/type_newdecimal.test24
-rw-r--r--storage/tokudb/mysql-test/tokudb/t/type_varchar.opt1
-rw-r--r--storage/tokudb/mysql-test/tokudb/t/type_varchar.test1
-rw-r--r--storage/tokudb/mysql-test/tokudb_alter_table/r/ai_part.result8
-rw-r--r--storage/tokudb/mysql-test/tokudb_alter_table/r/alter_column_default.result6
-rw-r--r--storage/tokudb/mysql-test/tokudb_alter_table/r/frm_discover_partition.result16
-rw-r--r--storage/tokudb/mysql-test/tokudb_alter_table/r/hcad_part.result8
-rw-r--r--storage/tokudb/mysql-test/tokudb_alter_table/r/hcad_tmp_tables_56.result2
-rw-r--r--storage/tokudb/mysql-test/tokudb_alter_table/r/hcad_with_dels.result7
-rw-r--r--storage/tokudb/mysql-test/tokudb_alter_table/r/hcad_with_lock_sps.result2
-rw-r--r--storage/tokudb/mysql-test/tokudb_alter_table/r/hcr.result2
-rw-r--r--storage/tokudb/mysql-test/tokudb_alter_table/r/hcr3.result6
-rw-r--r--storage/tokudb/mysql-test/tokudb_alter_table/r/mod_enum.result1
-rw-r--r--storage/tokudb/mysql-test/tokudb_alter_table/r/null_bytes_add_key.result1
-rw-r--r--storage/tokudb/mysql-test/tokudb_alter_table/r/null_bytes_col_rename.result1
-rw-r--r--storage/tokudb/mysql-test/tokudb_alter_table/r/null_bytes_drop_default.result1
-rw-r--r--storage/tokudb/mysql-test/tokudb_alter_table/r/null_bytes_drop_key.result1
-rw-r--r--storage/tokudb/mysql-test/tokudb_alter_table/t/mod_enum.test1
-rw-r--r--storage/tokudb/mysql-test/tokudb_alter_table/t/null_bytes_add_key.test1
-rw-r--r--storage/tokudb/mysql-test/tokudb_alter_table/t/null_bytes_col_rename.test1
-rw-r--r--storage/tokudb/mysql-test/tokudb_alter_table/t/null_bytes_drop_default.test1
-rw-r--r--storage/tokudb/mysql-test/tokudb_alter_table/t/null_bytes_drop_key.test1
-rw-r--r--storage/tokudb/mysql-test/tokudb_bugs/r/1853.result46
-rw-r--r--storage/tokudb/mysql-test/tokudb_bugs/r/2494-read-committed.result11
-rw-r--r--storage/tokudb/mysql-test/tokudb_bugs/r/2641.result7
-rw-r--r--storage/tokudb/mysql-test/tokudb_bugs/r/2952.result10
-rw-r--r--storage/tokudb/mysql-test/tokudb_bugs/r/5974-2.result7
-rw-r--r--storage/tokudb/mysql-test/tokudb_bugs/r/5974.result7
-rw-r--r--storage/tokudb/mysql-test/tokudb_bugs/r/PS-3773.result4
-rw-r--r--storage/tokudb/mysql-test/tokudb_bugs/r/PS-5163.result2
-rw-r--r--storage/tokudb/mysql-test/tokudb_bugs/r/alter_table_copy_table.result1
-rw-r--r--storage/tokudb/mysql-test/tokudb_bugs/r/checkpoint_lock.result7
-rw-r--r--storage/tokudb/mysql-test/tokudb_bugs/r/checkpoint_lock_3.result7
-rw-r--r--storage/tokudb/mysql-test/tokudb_bugs/r/db397_delete_trigger.result2
-rw-r--r--storage/tokudb/mysql-test/tokudb_bugs/r/db801.result10
-rw-r--r--storage/tokudb/mysql-test/tokudb_bugs/r/db938.result5
-rw-r--r--storage/tokudb/mysql-test/tokudb_bugs/r/db945.result2
-rw-r--r--storage/tokudb/mysql-test/tokudb_bugs/r/fileops-2.result6
-rw-r--r--storage/tokudb/mysql-test/tokudb_bugs/r/fileops-4.result7
-rw-r--r--storage/tokudb/mysql-test/tokudb_bugs/r/ft-index-40.result10
-rw-r--r--storage/tokudb/mysql-test/tokudb_bugs/r/leak172.result5
-rw-r--r--storage/tokudb/mysql-test/tokudb_bugs/r/lock_uniq_key_empty.result14
-rw-r--r--storage/tokudb/mysql-test/tokudb_bugs/r/lock_uniq_key_left.result14
-rw-r--r--storage/tokudb/mysql-test/tokudb_bugs/r/lock_uniq_key_middle.result14
-rw-r--r--storage/tokudb/mysql-test/tokudb_bugs/r/lock_uniq_key_right.result14
-rw-r--r--storage/tokudb/mysql-test/tokudb_bugs/r/xa.result32
-rw-r--r--storage/tokudb/mysql-test/tokudb_bugs/t/PS-5163.test2
-rw-r--r--storage/tokudb/mysql-test/tokudb_bugs/t/alter_table_copy_table.test1
-rw-r--r--storage/tokudb/mysql-test/tokudb_bugs/t/db397_delete_trigger.test2
-rw-r--r--storage/tokudb/mysql-test/tokudb_bugs/t/db945.test2
-rw-r--r--storage/tokudb/mysql-test/tokudb_bugs/t/xa.test2
-rw-r--r--storage/tokudb/mysql-test/tokudb_mariadb/r/alter.result10
-rw-r--r--storage/tokudb/mysql-test/tokudb_mariadb/r/autoinc.result7
-rw-r--r--storage/tokudb/mysql-test/tokudb_mariadb/r/clustering.result2
-rw-r--r--storage/tokudb/mysql-test/tokudb_mariadb/r/mdev5426.result3
-rw-r--r--storage/tokudb/mysql-test/tokudb_mariadb/r/optimize.result3
-rw-r--r--storage/tokudb/mysql-test/tokudb_mariadb/r/xa.result5
-rw-r--r--storage/tokudb/mysql-test/tokudb_mariadb/t/xa-recovery-9214.test2
-rw-r--r--storage/tokudb/mysql-test/tokudb_parts/disabled.def1
-rw-r--r--storage/tokudb/mysql-test/tokudb_parts/r/part_supported_sql_func_tokudb.result256
-rw-r--r--storage/tokudb/mysql-test/tokudb_parts/r/partition_alter1_1_2_tokudb.result1050
-rw-r--r--storage/tokudb/mysql-test/tokudb_parts/r/partition_alter1_1_tokudb.result600
-rw-r--r--storage/tokudb/mysql-test/tokudb_parts/r/partition_alter1_2_tokudb.result1500
-rw-r--r--storage/tokudb/mysql-test/tokudb_parts/r/partition_alter2_1_1_tokudb.result670
-rw-r--r--storage/tokudb/mysql-test/tokudb_parts/r/partition_alter2_1_2_tokudb.result590
-rw-r--r--storage/tokudb/mysql-test/tokudb_parts/r/partition_alter2_2_1_tokudb.result670
-rw-r--r--storage/tokudb/mysql-test/tokudb_parts/r/partition_alter2_2_2_tokudb.result590
-rw-r--r--storage/tokudb/mysql-test/tokudb_parts/r/partition_alter3_tokudb.result272
-rw-r--r--storage/tokudb/mysql-test/tokudb_parts/r/partition_alter4_tokudb.result3712
-rw-r--r--storage/tokudb/mysql-test/tokudb_parts/r/partition_auto_increment_tokudb.result124
-rw-r--r--storage/tokudb/mysql-test/tokudb_parts/r/partition_basic_tokudb.result1200
-rw-r--r--storage/tokudb/mysql-test/tokudb_parts/r/partition_bit_tokudb.result56
-rw-r--r--storage/tokudb/mysql-test/tokudb_parts/r/partition_char_tokudb.resultbin50278 -> 50218 bytes
-rw-r--r--storage/tokudb/mysql-test/tokudb_parts/r/partition_datetime_tokudb.result146
-rw-r--r--storage/tokudb/mysql-test/tokudb_parts/r/partition_debug_sync_tokudb.result21
-rw-r--r--storage/tokudb/mysql-test/tokudb_parts/r/partition_debug_tokudb.result1612
-rw-r--r--storage/tokudb/mysql-test/tokudb_parts/r/partition_decimal_tokudb.result42
-rw-r--r--storage/tokudb/mysql-test/tokudb_parts/r/partition_engine_tokudb.result170
-rw-r--r--storage/tokudb/mysql-test/tokudb_parts/r/partition_exch_qa_1_tokudb.result16
-rw-r--r--storage/tokudb/mysql-test/tokudb_parts/r/partition_exch_qa_5_tokudb.result6
-rw-r--r--storage/tokudb/mysql-test/tokudb_parts/r/partition_exch_qa_7_tokudb.result2
-rw-r--r--storage/tokudb/mysql-test/tokudb_parts/r/partition_exch_qa_8_tokudb.result4
-rw-r--r--storage/tokudb/mysql-test/tokudb_parts/r/partition_exchange_tokudb.result43
-rw-r--r--storage/tokudb/mysql-test/tokudb_parts/r/partition_float_tokudb.result28
-rw-r--r--storage/tokudb/mysql-test/tokudb_parts/r/partition_int_tokudb.result90
-rw-r--r--storage/tokudb/mysql-test/tokudb_parts/r/partition_mgm_lc0_tokudb.result206
-rw-r--r--storage/tokudb/mysql-test/tokudb_parts/r/partition_mgm_lc10_tokudb.result190
-rw-r--r--storage/tokudb/mysql-test/tokudb_parts/r/partition_mgm_lc1_tokudb.result204
-rw-r--r--storage/tokudb/mysql-test/tokudb_parts/r/partition_special_tokudb.result77
-rw-r--r--storage/tokudb/mysql-test/tokudb_parts/r/partition_syntax_tokudb.result208
-rw-r--r--storage/tokudb/tokudb.cnf.in (renamed from storage/tokudb/tokudb.cnf)7
-rw-r--r--storage/tokudb/tokudb.conf.in2
-rw-r--r--storage/xtradb/CMakeLists.txt18
-rw-r--r--storage/xtradb/api/api0api.cc2
-rw-r--r--storage/xtradb/btr/btr0sea.cc16
-rw-r--r--storage/xtradb/buf/buf0buf.cc11
-rw-r--r--storage/xtradb/buf/buf0dblwr.cc2
-rw-r--r--storage/xtradb/buf/buf0flu.cc8
-rw-r--r--storage/xtradb/dict/dict0load.cc2
-rw-r--r--storage/xtradb/fil/fil0crypt.cc4
-rw-r--r--storage/xtradb/fil/fil0fil.cc12
-rw-r--r--storage/xtradb/fsp/fsp0fsp.cc2
-rw-r--r--storage/xtradb/handler/ha_innodb.cc125
-rw-r--r--storage/xtradb/handler/handler0alter.cc30
-rw-r--r--storage/xtradb/include/btr0sea.h7
-rw-r--r--storage/xtradb/include/dict0dict.ic2
-rw-r--r--storage/xtradb/include/fsp0fsp.h16
-rw-r--r--storage/xtradb/include/ha_prototypes.h1
-rw-r--r--storage/xtradb/include/log0log.h2
-rw-r--r--storage/xtradb/include/os0sync.h9
-rw-r--r--storage/xtradb/include/srv0srv.h3
-rw-r--r--storage/xtradb/include/trx0sys.h14
-rw-r--r--storage/xtradb/include/trx0trx.h9
-rw-r--r--storage/xtradb/include/ut0counter.h12
-rw-r--r--storage/xtradb/include/ut0crc32.h3
-rw-r--r--storage/xtradb/include/ut0wqueue.h15
-rw-r--r--storage/xtradb/mysql-test/storage_engine/lock_concurrent.rdiff9
-rw-r--r--storage/xtradb/mysql-test/storage_engine/parts/suite.pm8
-rw-r--r--storage/xtradb/mysql-test/storage_engine/suite.pm8
-rw-r--r--storage/xtradb/mysql-test/storage_engine/tbl_opt_index_dir.rdiff (renamed from storage/xtradb/mysql-test/storage_engine/tbl_opt_data_index_dir.rdiff)0
-rw-r--r--storage/xtradb/mysql-test/storage_engine/trx/suite.pm8
-rw-r--r--storage/xtradb/os/os0sync.cc167
-rw-r--r--storage/xtradb/row/row0import.cc2
-rw-r--r--storage/xtradb/row/row0merge.cc6
-rw-r--r--storage/xtradb/srv/srv0conc.cc2
-rw-r--r--storage/xtradb/srv/srv0srv.cc5
-rw-r--r--storage/xtradb/srv/srv0start.cc35
-rw-r--r--storage/xtradb/ut/crc32_power8/crc32.S775
-rw-r--r--storage/xtradb/ut/crc32_power8/crc32_constants.h911
-rw-r--r--storage/xtradb/ut/crc32_power8/crc32_wrapper.c68
-rw-r--r--storage/xtradb/ut/crc32_power8/ppc-opcode.h23
-rw-r--r--storage/xtradb/ut/ut0crc32.cc52
-rw-r--r--storage/xtradb/ut/ut0wqueue.cc19
2398 files changed, 293420 insertions, 125806 deletions
diff --git a/storage/archive/azio.c b/storage/archive/azio.c
index c1efe19e91f..cc2140e838e 100644
--- a/storage/archive/azio.c
+++ b/storage/archive/azio.c
@@ -220,7 +220,7 @@ int write_header(azio_stream *s)
AZHEADER_SIZE + AZMETA_BUFFER_SIZE); /* FRM position */
*(ptr + AZ_DIRTY_POS)= (unsigned char)s->dirty; /* Start of Data Block Index Block */
- /* Always begin at the begining, and end there as well */
+ /* Always begin at the beginning, and end there as well */
return my_pwrite(s->file, (uchar*) buffer, AZHEADER_SIZE + AZMETA_BUFFER_SIZE,
0, MYF(MY_NABP)) ? 1 : 0;
}
diff --git a/storage/archive/ha_archive.cc b/storage/archive/ha_archive.cc
index 49b5ef97097..bd04295daa6 100644
--- a/storage/archive/ha_archive.cc
+++ b/storage/archive/ha_archive.cc
@@ -376,6 +376,27 @@ unsigned int ha_archive::pack_row_v1(uchar *record)
uchar *pos;
DBUG_ENTER("pack_row_v1");
memcpy(record_buffer->buffer, record, table->s->reclength);
+
+ /*
+ The end of VARCHAR fields are filled with garbage,so here
+ we explicitly set the end of the VARCHAR fields with zeroes
+ */
+
+ for (Field** field= table->field; (*field) ; field++)
+ {
+ Field *fld= *field;
+ if (fld->type() == MYSQL_TYPE_VARCHAR)
+ {
+ if (!(fld->is_real_null(record - table->record[0])))
+ {
+ ptrdiff_t start= (fld->ptr - table->record[0]);
+ Field_varstring *const field_var= (Field_varstring *)fld;
+ uint offset= field_var->data_length() + field_var->length_size();
+ memset(record_buffer->buffer + start + offset, 0,
+ fld->field_length - offset + 1);
+ }
+ }
+ }
pos= record_buffer->buffer + table->s->reclength;
for (blob= table->s->blob_field, end= blob + table->s->blob_fields;
blob != end; blob++)
@@ -383,13 +404,12 @@ unsigned int ha_archive::pack_row_v1(uchar *record)
uint32 length= ((Field_blob *) table->field[*blob])->get_length();
if (length)
{
- uchar *data_ptr;
- ((Field_blob *) table->field[*blob])->get_ptr(&data_ptr);
+ uchar *data_ptr= ((Field_blob *) table->field[*blob])->get_ptr();
memcpy(pos, data_ptr, length);
pos+= length;
}
}
- DBUG_RETURN(pos - record_buffer->buffer);
+ DBUG_RETURN((int)(pos - record_buffer->buffer));
}
/*
@@ -877,18 +897,19 @@ int ha_archive::real_write_row(uchar *buf, azio_stream *writer)
the bytes required for the length in the header.
*/
-uint32 ha_archive::max_row_length(const uchar *buf)
+uint32 ha_archive::max_row_length(const uchar *record)
{
uint32 length= (uint32)(table->s->reclength + table->s->fields*2);
length+= ARCHIVE_ROW_HEADER_SIZE;
+ my_ptrdiff_t const rec_offset= record - table->record[0];
uint *ptr, *end;
for (ptr= table->s->blob_field, end=ptr + table->s->blob_fields ;
ptr != end ;
ptr++)
{
- if (!table->field[*ptr]->is_null())
- length += 2 + ((Field_blob*)table->field[*ptr])->get_length();
+ if (!table->field[*ptr]->is_null(rec_offset))
+ length += 2 + ((Field_blob*)table->field[*ptr])->get_length(rec_offset);
}
return length;
@@ -898,10 +919,9 @@ uint32 ha_archive::max_row_length(const uchar *buf)
unsigned int ha_archive::pack_row(uchar *record, azio_stream *writer)
{
uchar *ptr;
-
+ my_ptrdiff_t const rec_offset= record - table->record[0];
DBUG_ENTER("ha_archive::pack_row");
-
if (fix_rec_buff(max_row_length(record)))
DBUG_RETURN(HA_ERR_OUT_OF_MEM); /* purecov: inspected */
@@ -915,7 +935,7 @@ unsigned int ha_archive::pack_row(uchar *record, azio_stream *writer)
for (Field **field=table->field ; *field ; field++)
{
- if (!((*field)->is_null()))
+ if (!((*field)->is_null(rec_offset)))
ptr= (*field)->pack(ptr, record + (*field)->offset(record));
}
@@ -1645,7 +1665,7 @@ void ha_archive::update_create_info(HA_CREATE_INFO *create_info)
}
if (!(my_readlink(tmp_real_path, share->data_file_name, MYF(0))))
- create_info->data_file_name= sql_strdup(tmp_real_path);
+ create_info->data_file_name= thd_strdup(ha_thd(), tmp_real_path);
DBUG_VOID_RETURN;
}
diff --git a/storage/cassandra/CMakeLists.txt b/storage/cassandra/CMakeLists.txt
index df097c90a47..a5d58234d97 100644
--- a/storage/cassandra/CMakeLists.txt
+++ b/storage/cassandra/CMakeLists.txt
@@ -50,7 +50,6 @@ SET(cassandra_sources
LINK_DIRECTORIES(${LINK_DIR})
IF(CASSANDRASE_OK)
- SET(CASSANDRA_DEB_FILES "usr/lib/mysql/plugin/ha_cassandra.so" PARENT_SCOPE)
MYSQL_ADD_PLUGIN(cassandra ${cassandra_sources} STORAGE_ENGINE
MODULE_ONLY LINK_LIBRARIES thrift COMPONENT cassandra-engine)
ENDIF(CASSANDRASE_OK)
diff --git a/storage/connect/ha_connect.cc b/storage/connect/ha_connect.cc
index a111082e786..105ebf0545b 100644
--- a/storage/connect/ha_connect.cc
+++ b/storage/connect/ha_connect.cc
@@ -4509,7 +4509,9 @@ bool ha_connect::check_privileges(THD *thd, PTOS options, char *dbn, bool quick)
case TAB_OEM:
if (table && table->pos_in_table_list) // if SELECT
{
- //Switch_to_definer_security_ctx backup_ctx(thd, table->pos_in_table_list);
+#if MYSQL_VERSION_ID > 100200
+ Switch_to_definer_security_ctx backup_ctx(thd, table->pos_in_table_list);
+#endif // VERSION_ID > 100200
return check_global_access(thd, FILE_ACL);
}
else
diff --git a/storage/connect/jdbconn.cpp b/storage/connect/jdbconn.cpp
index 2cb75e0adc1..2dab385a36f 100644
--- a/storage/connect/jdbconn.cpp
+++ b/storage/connect/jdbconn.cpp
@@ -766,7 +766,6 @@ void JDBConn::AddJars(PSTRG jpop, char sep)
/***********************************************************************/
bool JDBConn::Connect(PJPARM sop)
{
- int irc = RC_FX;
bool err = false;
jint rc;
jboolean jt = (trace(1));
diff --git a/storage/connect/jsonudf.h b/storage/connect/jsonudf.h
index 23e8c0e1aed..ee56869a111 100644
--- a/storage/connect/jsonudf.h
+++ b/storage/connect/jsonudf.h
@@ -238,6 +238,11 @@ extern "C" {
DllExport my_bool envar_init(UDF_INIT*, UDF_ARGS*, char*);
DllExport char *envar(UDF_EXEC_ARGS);
+#if defined(DEVELOPMENT)
+ DllExport my_bool uvar_init(UDF_INIT*, UDF_ARGS*, char*);
+ DllExport char *uvar(UDF_EXEC_ARGS);
+#endif // DEVELOPMENT
+
DllExport my_bool countin_init(UDF_INIT*, UDF_ARGS*, char*);
DllExport long long countin(UDF_EXEC_ARGS);
} // extern "C"
diff --git a/storage/connect/mysql-test/connect/r/bin.result b/storage/connect/mysql-test/connect/r/bin.result
index 4ba353ac705..1baa18a1e4d 100644
--- a/storage/connect/mysql-test/connect/r/bin.result
+++ b/storage/connect/mysql-test/connect/r/bin.result
@@ -57,7 +57,7 @@ t1 CREATE TABLE `t1` (
`name` char(10) NOT NULL,
`birth` date NOT NULL,
`id` char(5) NOT NULL `FIELD_FORMAT`='S',
- `salary` double(9,2) NOT NULL DEFAULT '0.00' `FIELD_FORMAT`='F',
+ `salary` double(9,2) NOT NULL DEFAULT 0.00 `FIELD_FORMAT`='F',
`dept` int(4) NOT NULL `FIELD_FORMAT`='S'
) ENGINE=CONNECT DEFAULT CHARSET=latin1 `TABLE_TYPE`=BIN `FILE_NAME`='Testbal.dat' `OPTION_LIST`='Endian=Little' `READONLY`=NO
INSERT INTO t1 VALUES (7777,'BILL','1973-06-30',4444,5555.555,777);
@@ -76,7 +76,7 @@ t1 CREATE TABLE `t1` (
`name` char(10) NOT NULL,
`birth` date NOT NULL,
`id` char(5) NOT NULL `FIELD_FORMAT`='S',
- `salary` double(9,2) NOT NULL DEFAULT '0.00' `FIELD_FORMAT`='F',
+ `salary` double(9,2) NOT NULL DEFAULT 0.00 `FIELD_FORMAT`='F',
`dept` int(4) NOT NULL `FIELD_FORMAT`='S'
) ENGINE=CONNECT DEFAULT CHARSET=latin1 `TABLE_TYPE`=BIN `FILE_NAME`='Testbal.dat' `OPTION_LIST`='Endian=Little' `READONLY`=YES
INSERT INTO t1 VALUES (7777,'BILL','1973-06-30',4444,5555.555,777);
diff --git a/storage/connect/mysql-test/connect/r/dir.result b/storage/connect/mysql-test/connect/r/dir.result
index 34a591fb26c..139544b99e9 100644
--- a/storage/connect/mysql-test/connect/r/dir.result
+++ b/storage/connect/mysql-test/connect/r/dir.result
@@ -26,7 +26,7 @@ fname ftype size
boys .txt 282
boyswin .txt 288
INSERT INTO t1 VALUES ('','','','');
-ERROR HY000: Got error 174 'COLBLK SetBuffer: undefined Access Method' from CONNECT
+ERROR 22007: Incorrect double value: '' for column `test`.`t1`.`size` at row 1
DROP TABLE t1;
CREATE TABLE t1 ENGINE=CONNECT TABLE_TYPE=DIR FILE_NAME='*.txt';
ERROR HY000: Cannot get column info for table type DIR
diff --git a/storage/connect/mysql-test/connect/r/grant.result b/storage/connect/mysql-test/connect/r/grant.result
index 681442724e5..eeecfde9df4 100644
--- a/storage/connect/mysql-test/connect/r/grant.result
+++ b/storage/connect/mysql-test/connect/r/grant.result
@@ -4,6 +4,8 @@ set sql_mode="";
#
GRANT ALL PRIVILEGES ON *.* TO user@localhost;
REVOKE FILE ON *.* FROM user@localhost;
+connect user,localhost,user,,;
+connection user;
SELECT user();
user()
user@localhost
@@ -14,6 +16,7 @@ ftype CHAR(4) NOT NULL,
size DOUBLE(12,0) NOT NULL flag=5
) ENGINE=CONNECT TABLE_TYPE=DIR FILE_NAME='*.*';
ERROR 42000: Access denied; you need (at least one of) the FILE privilege(s) for this operation
+connection default;
SELECT user();
user()
root@localhost
@@ -25,7 +28,9 @@ size DOUBLE(12,0) NOT NULL flag=5
) ENGINE=CONNECT TABLE_TYPE=DIR FILE_NAME='*.*';
SELECT fname, ftype, size FROM t1 WHERE size>0;
fname ftype size
+db .opt 65
t1 .frm 1081
+connection user;
SELECT user();
user()
user@localhost
@@ -44,10 +49,12 @@ ERROR 42000: Access denied; you need (at least one of) the FILE privilege(s) for
CREATE VIEW v1 AS SELECT * FROM t1;
ERROR 42000: Access denied; you need (at least one of) the FILE privilege(s) for this operation
# Testing a VIEW created with FILE privileges but accessed with no FILE
+connection default;
SELECT user();
user()
root@localhost
CREATE SQL SECURITY INVOKER VIEW v1 AS SELECT * FROM t1;
+connection user;
SELECT user();
user()
user@localhost
@@ -59,6 +66,8 @@ UPDATE v1 SET path=123;
ERROR 42000: Access denied; you need (at least one of) the FILE privilege(s) for this operation
DELETE FROM v1;
ERROR 42000: Access denied; you need (at least one of) the FILE privilege(s) for this operation
+disconnect user;
+connection default;
SELECT user();
user()
root@localhost
@@ -74,6 +83,8 @@ DROP USER user@localhost;
CREATE USER user@localhost;
GRANT ALL PRIVILEGES ON *.* TO user@localhost;
REVOKE FILE ON *.* FROM user@localhost;
+connect user,localhost,user,,;
+connection user;
SELECT user();
user()
user@localhost
@@ -102,11 +113,13 @@ DROP VIEW v1;
DROP TABLE t1;
CREATE TABLE t1 (a INT NOT NULL) ENGINE=CONNECT TABLE_TYPE=BIN FILE_NAME='t1.EXT';
ERROR 42000: Access denied; you need (at least one of) the FILE privilege(s) for this operation
+connection default;
SELECT user();
user()
root@localhost
CREATE TABLE t1 (a INT NOT NULL) ENGINE=CONNECT TABLE_TYPE=BIN FILE_NAME='t1.EXT';
INSERT INTO t1 VALUES (10);
+connection user;
SELECT user();
user()
user@localhost
@@ -129,10 +142,12 @@ ERROR 42000: Access denied; you need (at least one of) the FILE privilege(s) for
CREATE VIEW v1 AS SELECT * FROM t1;
ERROR 42000: Access denied; you need (at least one of) the FILE privilege(s) for this operation
# Testing a VIEW created with FILE privileges but accessed with no FILE
+connection default;
SELECT user();
user()
root@localhost
CREATE SQL SECURITY INVOKER VIEW v1 AS SELECT * FROM t1;
+connection user;
SELECT user();
user()
user@localhost
@@ -144,6 +159,7 @@ UPDATE v1 SET a=123;
ERROR 42000: Access denied; you need (at least one of) the FILE privilege(s) for this operation
DELETE FROM v1;
ERROR 42000: Access denied; you need (at least one of) the FILE privilege(s) for this operation
+connection default;
SELECT user();
user()
root@localhost
@@ -153,12 +169,15 @@ CREATE TABLE t1 (a INT NOT NULL) ENGINE=CONNECT TABLE_TYPE=BIN;
Warnings:
Warning 1105 No file name. Table will use t1.bin
INSERT INTO t1 VALUES (10);
+connection user;
SELECT user();
user()
user@localhost
ALTER TABLE t1 FILE_NAME='t1.EXT';
ERROR 42000: Access denied; you need (at least one of) the FILE privilege(s) for this operation
+connection default;
DROP TABLE t1;
+disconnect user;
DROP USER user@localhost;
#
# End of grant.inc
@@ -169,6 +188,8 @@ DROP USER user@localhost;
CREATE USER user@localhost;
GRANT ALL PRIVILEGES ON *.* TO user@localhost;
REVOKE FILE ON *.* FROM user@localhost;
+connect user,localhost,user,,;
+connection user;
SELECT user();
user()
user@localhost
@@ -197,11 +218,13 @@ DROP VIEW v1;
DROP TABLE t1;
CREATE TABLE t1 (a INT NOT NULL) ENGINE=CONNECT TABLE_TYPE=CSV FILE_NAME='t1.EXT';
ERROR 42000: Access denied; you need (at least one of) the FILE privilege(s) for this operation
+connection default;
SELECT user();
user()
root@localhost
CREATE TABLE t1 (a INT NOT NULL) ENGINE=CONNECT TABLE_TYPE=CSV FILE_NAME='t1.EXT';
INSERT INTO t1 VALUES (10);
+connection user;
SELECT user();
user()
user@localhost
@@ -224,10 +247,12 @@ ERROR 42000: Access denied; you need (at least one of) the FILE privilege(s) for
CREATE VIEW v1 AS SELECT * FROM t1;
ERROR 42000: Access denied; you need (at least one of) the FILE privilege(s) for this operation
# Testing a VIEW created with FILE privileges but accessed with no FILE
+connection default;
SELECT user();
user()
root@localhost
CREATE SQL SECURITY INVOKER VIEW v1 AS SELECT * FROM t1;
+connection user;
SELECT user();
user()
user@localhost
@@ -239,6 +264,7 @@ UPDATE v1 SET a=123;
ERROR 42000: Access denied; you need (at least one of) the FILE privilege(s) for this operation
DELETE FROM v1;
ERROR 42000: Access denied; you need (at least one of) the FILE privilege(s) for this operation
+connection default;
SELECT user();
user()
root@localhost
@@ -248,12 +274,15 @@ CREATE TABLE t1 (a INT NOT NULL) ENGINE=CONNECT TABLE_TYPE=CSV;
Warnings:
Warning 1105 No file name. Table will use t1.csv
INSERT INTO t1 VALUES (10);
+connection user;
SELECT user();
user()
user@localhost
ALTER TABLE t1 FILE_NAME='t1.EXT';
ERROR 42000: Access denied; you need (at least one of) the FILE privilege(s) for this operation
+connection default;
DROP TABLE t1;
+disconnect user;
DROP USER user@localhost;
#
# End of grant.inc
@@ -264,6 +293,8 @@ DROP USER user@localhost;
CREATE USER user@localhost;
GRANT ALL PRIVILEGES ON *.* TO user@localhost;
REVOKE FILE ON *.* FROM user@localhost;
+connect user,localhost,user,,;
+connection user;
SELECT user();
user()
user@localhost
@@ -292,11 +323,13 @@ DROP VIEW v1;
DROP TABLE t1;
CREATE TABLE t1 (a INT NOT NULL) ENGINE=CONNECT TABLE_TYPE=DBF FILE_NAME='t1.EXT';
ERROR 42000: Access denied; you need (at least one of) the FILE privilege(s) for this operation
+connection default;
SELECT user();
user()
root@localhost
CREATE TABLE t1 (a INT NOT NULL) ENGINE=CONNECT TABLE_TYPE=DBF FILE_NAME='t1.EXT';
INSERT INTO t1 VALUES (10);
+connection user;
SELECT user();
user()
user@localhost
@@ -319,10 +352,12 @@ ERROR 42000: Access denied; you need (at least one of) the FILE privilege(s) for
CREATE VIEW v1 AS SELECT * FROM t1;
ERROR 42000: Access denied; you need (at least one of) the FILE privilege(s) for this operation
# Testing a VIEW created with FILE privileges but accessed with no FILE
+connection default;
SELECT user();
user()
root@localhost
CREATE SQL SECURITY INVOKER VIEW v1 AS SELECT * FROM t1;
+connection user;
SELECT user();
user()
user@localhost
@@ -334,6 +369,7 @@ UPDATE v1 SET a=123;
ERROR 42000: Access denied; you need (at least one of) the FILE privilege(s) for this operation
DELETE FROM v1;
ERROR 42000: Access denied; you need (at least one of) the FILE privilege(s) for this operation
+connection default;
SELECT user();
user()
root@localhost
@@ -343,12 +379,15 @@ CREATE TABLE t1 (a INT NOT NULL) ENGINE=CONNECT TABLE_TYPE=DBF;
Warnings:
Warning 1105 No file name. Table will use t1.dbf
INSERT INTO t1 VALUES (10);
+connection user;
SELECT user();
user()
user@localhost
ALTER TABLE t1 FILE_NAME='t1.EXT';
ERROR 42000: Access denied; you need (at least one of) the FILE privilege(s) for this operation
+connection default;
DROP TABLE t1;
+disconnect user;
DROP USER user@localhost;
#
# End of grant.inc
@@ -359,6 +398,8 @@ DROP USER user@localhost;
CREATE USER user@localhost;
GRANT ALL PRIVILEGES ON *.* TO user@localhost;
REVOKE FILE ON *.* FROM user@localhost;
+connect user,localhost,user,,;
+connection user;
SELECT user();
user()
user@localhost
@@ -387,11 +428,13 @@ DROP VIEW v1;
DROP TABLE t1;
CREATE TABLE t1 (a INT NOT NULL) ENGINE=CONNECT TABLE_TYPE=FIX FILE_NAME='t1.EXT';
ERROR 42000: Access denied; you need (at least one of) the FILE privilege(s) for this operation
+connection default;
SELECT user();
user()
root@localhost
CREATE TABLE t1 (a INT NOT NULL) ENGINE=CONNECT TABLE_TYPE=FIX FILE_NAME='t1.EXT';
INSERT INTO t1 VALUES (10);
+connection user;
SELECT user();
user()
user@localhost
@@ -414,10 +457,12 @@ ERROR 42000: Access denied; you need (at least one of) the FILE privilege(s) for
CREATE VIEW v1 AS SELECT * FROM t1;
ERROR 42000: Access denied; you need (at least one of) the FILE privilege(s) for this operation
# Testing a VIEW created with FILE privileges but accessed with no FILE
+connection default;
SELECT user();
user()
root@localhost
CREATE SQL SECURITY INVOKER VIEW v1 AS SELECT * FROM t1;
+connection user;
SELECT user();
user()
user@localhost
@@ -429,6 +474,7 @@ UPDATE v1 SET a=123;
ERROR 42000: Access denied; you need (at least one of) the FILE privilege(s) for this operation
DELETE FROM v1;
ERROR 42000: Access denied; you need (at least one of) the FILE privilege(s) for this operation
+connection default;
SELECT user();
user()
root@localhost
@@ -438,12 +484,15 @@ CREATE TABLE t1 (a INT NOT NULL) ENGINE=CONNECT TABLE_TYPE=FIX;
Warnings:
Warning 1105 No file name. Table will use t1.fix
INSERT INTO t1 VALUES (10);
+connection user;
SELECT user();
user()
user@localhost
ALTER TABLE t1 FILE_NAME='t1.EXT';
ERROR 42000: Access denied; you need (at least one of) the FILE privilege(s) for this operation
+connection default;
DROP TABLE t1;
+disconnect user;
DROP USER user@localhost;
#
# End of grant.inc
@@ -454,6 +503,8 @@ DROP USER user@localhost;
CREATE USER user@localhost;
GRANT ALL PRIVILEGES ON *.* TO user@localhost;
REVOKE FILE ON *.* FROM user@localhost;
+connect user,localhost,user,,;
+connection user;
SELECT user();
user()
user@localhost
@@ -482,11 +533,13 @@ DROP VIEW v1;
DROP TABLE t1;
CREATE TABLE t1 (a INT NOT NULL) ENGINE=CONNECT TABLE_TYPE=VEC MAX_ROWS=100 FILE_NAME='t1.EXT';
ERROR 42000: Access denied; you need (at least one of) the FILE privilege(s) for this operation
+connection default;
SELECT user();
user()
root@localhost
CREATE TABLE t1 (a INT NOT NULL) ENGINE=CONNECT TABLE_TYPE=VEC MAX_ROWS=100 FILE_NAME='t1.EXT';
INSERT INTO t1 VALUES (10);
+connection user;
SELECT user();
user()
user@localhost
@@ -509,10 +562,12 @@ ERROR 42000: Access denied; you need (at least one of) the FILE privilege(s) for
CREATE VIEW v1 AS SELECT * FROM t1;
ERROR 42000: Access denied; you need (at least one of) the FILE privilege(s) for this operation
# Testing a VIEW created with FILE privileges but accessed with no FILE
+connection default;
SELECT user();
user()
root@localhost
CREATE SQL SECURITY INVOKER VIEW v1 AS SELECT * FROM t1;
+connection user;
SELECT user();
user()
user@localhost
@@ -524,6 +579,7 @@ UPDATE v1 SET a=123;
ERROR 42000: Access denied; you need (at least one of) the FILE privilege(s) for this operation
DELETE FROM v1;
ERROR 42000: Access denied; you need (at least one of) the FILE privilege(s) for this operation
+connection default;
SELECT user();
user()
root@localhost
@@ -533,12 +589,15 @@ CREATE TABLE t1 (a INT NOT NULL) ENGINE=CONNECT TABLE_TYPE=VEC MAX_ROWS=100;
Warnings:
Warning 1105 No file name. Table will use t1.vec
INSERT INTO t1 VALUES (10);
+connection user;
SELECT user();
user()
user@localhost
ALTER TABLE t1 FILE_NAME='t1.EXT';
ERROR 42000: Access denied; you need (at least one of) the FILE privilege(s) for this operation
+connection default;
DROP TABLE t1;
+disconnect user;
DROP USER user@localhost;
#
# End of grant.inc
diff --git a/storage/connect/mysql-test/connect/r/grant2.result b/storage/connect/mysql-test/connect/r/grant2.result
index 0259dd74cdc..2e20dc39596 100644
--- a/storage/connect/mysql-test/connect/r/grant2.result
+++ b/storage/connect/mysql-test/connect/r/grant2.result
@@ -5,6 +5,7 @@ CREATE USER user@localhost;
GRANT ALL PRIVILEGES ON *.* TO user@localhost;
REVOKE FILE ON *.* FROM user@localhost;
# Testing SQLCOM_SELECT
+connection default;
CREATE TABLE t1 (a INT) ENGINE=CONNECT TABLE_TYPE=fix FILE_NAME='t1.fix';
INSERT INTO t1 VALUES (10);
CREATE SQL SECURITY INVOKER VIEW v1_invoker AS SELECT * FROM t1;
@@ -20,17 +21,20 @@ SELECT * FROM v1_definer;
a
10
SELECT * FROM v1_baddefiner;
-ERROR 28000: Access denied for user 'root'@'localhost' (using password: NO)
+ERROR 42000: Access denied; you need (at least one of) the FILE privilege(s) for this operation
+connect user,localhost,user,,;
SELECT * FROM t1;
-ERROR 28000: Access denied for user 'user'@'localhost' (using password: NO)
+ERROR 42000: Access denied; you need (at least one of) the FILE privilege(s) for this operation
SELECT * FROM v1_invoker;
-ERROR 28000: Access denied for user 'user'@'localhost' (using password: NO)
+ERROR 42000: Access denied; you need (at least one of) the FILE privilege(s) for this operation
SELECT * FROM v1_definer;
a
10
+connection default;
DROP VIEW v1_invoker, v1_definer, v1_baddefiner;
DROP TABLE t1;
# Testing SQLCOM_UPDATE
+connection default;
CREATE TABLE t1 (a INT) ENGINE=CONNECT TABLE_TYPE=fix FILE_NAME='t1.fix';
INSERT INTO t1 VALUES (10);
CREATE SQL SECURITY INVOKER VIEW v1_invoker AS SELECT * FROM t1;
@@ -38,14 +42,17 @@ CREATE SQL SECURITY DEFINER VIEW v1_definer AS SELECT * FROM t1;
UPDATE t1 SET a=11;
UPDATE v1_invoker SET a=12;
UPDATE v1_definer SET a=13;
+connection user;
UPDATE t1 SET a=21;
-ERROR 28000: Access denied for user 'user'@'localhost' (using password: NO)
+ERROR 42000: Access denied; you need (at least one of) the FILE privilege(s) for this operation
UPDATE v1_invoker SET a=22;
-ERROR 28000: Access denied for user 'user'@'localhost' (using password: NO)
+ERROR 42000: Access denied; you need (at least one of) the FILE privilege(s) for this operation
UPDATE v1_definer SET a=23;
+connection default;
DROP VIEW v1_invoker, v1_definer;
DROP TABLE t1;
# Testing SQLCOM_INSERT
+connection default;
CREATE TABLE t1 (a INT) ENGINE=CONNECT TABLE_TYPE=fix FILE_NAME='t1.fix';
INSERT INTO t1 VALUES (10);
CREATE SQL SECURITY INVOKER VIEW v1_invoker AS SELECT * FROM t1;
@@ -53,14 +60,17 @@ CREATE SQL SECURITY DEFINER VIEW v1_definer AS SELECT * FROM t1;
INSERT INTO t1 VALUES (11);
INSERT INTO v1_invoker VALUES (12);
INSERT INTO v1_definer VALUES (13);
+connection user;
INSERT INTO t1 VALUES (21);
-ERROR 28000: Access denied for user 'user'@'localhost' (using password: NO)
+ERROR 42000: Access denied; you need (at least one of) the FILE privilege(s) for this operation
INSERT INTO v1_invoker VALUES (22);
-ERROR 28000: Access denied for user 'user'@'localhost' (using password: NO)
+ERROR 42000: Access denied; you need (at least one of) the FILE privilege(s) for this operation
INSERT INTO v1_definer VALUES (23);
+connection default;
DROP VIEW v1_invoker, v1_definer;
DROP TABLE t1;
# Testing SQLCOM_REPLACE
+connection default;
CREATE TABLE t1 (a INT) ENGINE=CONNECT TABLE_TYPE=fix FILE_NAME='t1.fix';
INSERT INTO t1 VALUES (10);
CREATE SQL SECURITY INVOKER VIEW v1_invoker AS SELECT * FROM t1;
@@ -71,15 +81,18 @@ REPLACE INTO v1_invoker VALUES (12);
ERROR 42000: CONNECT Unsupported command
REPLACE INTO v1_definer VALUES (13);
ERROR 42000: CONNECT Unsupported command
+connection user;
REPLACE INTO t1 VALUES (21);
-ERROR 28000: Access denied for user 'user'@'localhost' (using password: NO)
+ERROR 42000: Access denied; you need (at least one of) the FILE privilege(s) for this operation
REPLACE INTO v1_invoker VALUES (22);
-ERROR 28000: Access denied for user 'user'@'localhost' (using password: NO)
+ERROR 42000: Access denied; you need (at least one of) the FILE privilege(s) for this operation
REPLACE INTO v1_definer VALUES (23);
ERROR 42000: CONNECT Unsupported command
+connection default;
DROP VIEW v1_invoker, v1_definer;
DROP TABLE t1;
# Testing SQLCOM_DELETE
+connection default;
CREATE TABLE t1 (a INT) ENGINE=CONNECT TABLE_TYPE=fix FILE_NAME='t1.fix';
INSERT INTO t1 VALUES (10),(11),(12),(13),(21),(22),(23);
CREATE SQL SECURITY INVOKER VIEW v1_invoker AS SELECT * FROM t1;
@@ -87,14 +100,17 @@ CREATE SQL SECURITY DEFINER VIEW v1_definer AS SELECT * FROM t1;
DELETE FROM t1 WHERE a=11;
DELETE FROM v1_invoker WHERE a=12;
DELETE FROM v1_definer WHERE a=13;
+connection user;
DELETE FROM t1 WHERE a=21;
-ERROR 28000: Access denied for user 'user'@'localhost' (using password: NO)
+ERROR 42000: Access denied; you need (at least one of) the FILE privilege(s) for this operation
DELETE FROM v1_invoker WHERE a=22;
-ERROR 28000: Access denied for user 'user'@'localhost' (using password: NO)
+ERROR 42000: Access denied; you need (at least one of) the FILE privilege(s) for this operation
DELETE FROM v1_definer WHERE a=23;
+connection default;
DROP VIEW v1_invoker, v1_definer;
DROP TABLE t1;
# Testing SQLCOM_LOAD
+connection default;
CREATE TABLE t1 (a VARCHAR(128)) ENGINE=CONNECT TABLE_TYPE=fix FILE_NAME='t1.fix';
INSERT INTO t1 VALUES (10);
CREATE SQL SECURITY INVOKER VIEW v1_invoker AS SELECT * FROM t1;
@@ -102,29 +118,38 @@ CREATE SQL SECURITY DEFINER VIEW v1_definer AS SELECT * FROM t1;
LOAD DATA LOCAL INFILE 'MTR_SUITE_DIR/std_data/boys.txt' INTO TABLE t1;
LOAD DATA LOCAL INFILE 'MTR_SUITE_DIR/std_data/boys.txt' INTO TABLE v1_invoker;
LOAD DATA LOCAL INFILE 'MTR_SUITE_DIR/std_data/boys.txt' INTO TABLE v1_definer;
+connection user;
LOAD DATA LOCAL INFILE 'MTR_SUITE_DIR/std_data/boys.txt' INTO TABLE t1;
-ERROR 28000: Access denied for user 'user'@'localhost' (using password: NO)
+ERROR 42000: Access denied; you need (at least one of) the FILE privilege(s) for this operation
LOAD DATA LOCAL INFILE 'MTR_SUITE_DIR/std_data/boys.txt' INTO TABLE v1_invoker;
-ERROR 28000: Access denied for user 'user'@'localhost' (using password: NO)
+ERROR 42000: Access denied; you need (at least one of) the FILE privilege(s) for this operation
LOAD DATA LOCAL INFILE 'MTR_SUITE_DIR/std_data/boys.txt' INTO TABLE v1_definer;
+connection default;
DROP VIEW v1_invoker, v1_definer;
DROP TABLE t1;
# Testing SQLCOM_TRUNCATE
+connection default;
CREATE TABLE t1 (a INT) ENGINE=CONNECT TABLE_TYPE=fix FILE_NAME='t1.fix';
INSERT INTO t1 VALUES (10);
TRUNCATE TABLE t1;
INSERT INTO t1 VALUES (11);
+connection user;
TRUNCATE TABLE t1;
-ERROR 28000: Access denied for user 'user'@'localhost' (using password: NO)
+ERROR 42000: Access denied; you need (at least one of) the FILE privilege(s) for this operation
+connection default;
DROP TABLE t1;
# Testing SQLCOM_DROP_TABLE
+connection default;
CREATE TABLE t1 (a INT) ENGINE=CONNECT TABLE_TYPE=fix FILE_NAME='t1.fix';
INSERT INTO t1 VALUES (10);
+connection user;
DROP TABLE t1;
-ERROR 28000: Access denied for user 'user'@'localhost' (using password: NO)
+ERROR 42000: Access denied; you need (at least one of) the FILE privilege(s) for this operation
+connection default;
DROP TABLE t1;
# Testing SQLCOM_DROP_VIEW
# DROP VIEW does not need FILE_ACL.
+connection default;
CREATE TABLE t1 (a INT) ENGINE=CONNECT TABLE_TYPE=fix FILE_NAME='t1.fix';
INSERT INTO t1 VALUES (10),(11),(12),(13),(21),(22),(23);
CREATE SQL SECURITY INVOKER VIEW v1_invoker AS SELECT * FROM t1;
@@ -132,13 +157,18 @@ CREATE SQL SECURITY DEFINER VIEW v1_definer AS SELECT * FROM t1;
DROP VIEW v1_invoker, v1_definer;
CREATE SQL SECURITY INVOKER VIEW v1_invoker AS SELECT * FROM t1;
CREATE SQL SECURITY DEFINER VIEW v1_definer AS SELECT * FROM t1;
+connection user;
DROP VIEW v1_invoker;
DROP VIEW v1_definer;
+connection default;
DROP TABLE t1;
# Testing SQLCOM_CREATE_TABLE
+connection user;
CREATE TABLE t1 (a INT) ENGINE=CONNECT TABLE_TYPE=fix FILE_NAME='t1.fix';
-ERROR 28000: Access denied for user 'user'@'localhost' (using password: NO)
+ERROR 42000: Access denied; you need (at least one of) the FILE privilege(s) for this operation
+connection default;
# Testing SQLCOM_LOCK_TABLES
+connection default;
CREATE TABLE t1 (a INT) ENGINE=CONNECT TABLE_TYPE=fix FILE_NAME='t1.fix';
INSERT INTO t1 VALUES (10);
CREATE SQL SECURITY INVOKER VIEW v1_invoker AS SELECT * FROM t1;
@@ -155,21 +185,24 @@ LOCK TABLE v1_definer READ;
UNLOCK TABLES;
LOCK TABLE v1_definer WRITE;
UNLOCK TABLES;
+connection user;
LOCK TABLE t1 READ;
-ERROR 28000: Access denied for user 'user'@'localhost' (using password: NO)
+ERROR 42000: Access denied; you need (at least one of) the FILE privilege(s) for this operation
LOCK TABLE t1 WRITE;
-ERROR 28000: Access denied for user 'user'@'localhost' (using password: NO)
+ERROR 42000: Access denied; you need (at least one of) the FILE privilege(s) for this operation
LOCK TABLE v1_invoker READ;
-ERROR 28000: Access denied for user 'user'@'localhost' (using password: NO)
+ERROR 42000: Access denied; you need (at least one of) the FILE privilege(s) for this operation
LOCK TABLE v1_invoker WRITE;
-ERROR 28000: Access denied for user 'user'@'localhost' (using password: NO)
+ERROR 42000: Access denied; you need (at least one of) the FILE privilege(s) for this operation
LOCK TABLE v1_definer READ;
UNLOCK TABLES;
LOCK TABLE v1_definer WRITE;
UNLOCK TABLES;
+connection default;
DROP VIEW v1_invoker, v1_definer;
DROP TABLE t1;
# Testing SQLCOM_UPDATE_MULTI
+connection default;
CREATE TABLE t1 (a INT) ENGINE=CONNECT TABLE_TYPE=fix FILE_NAME='t1.fix';
CREATE TABLE t2 (a INT) ENGINE=CONNECT TABLE_TYPE=fix FILE_NAME='t2.fix';
CREATE TABLE t3 (a INT);
@@ -229,98 +262,101 @@ UPDATE v2_definer a1,v1_invoker a2 SET a1.a=50 WHERE a1.a=a2.a;
UPDATE v2_definer a1,v1_definer a2 SET a1.a=50 WHERE a1.a=a2.a;
UPDATE v2_definer a1,v2_invoker a2 SET a1.a=50 WHERE a1.a=a2.a;
UPDATE v2_definer a1,v2_definer a2 SET a1.a=50 WHERE a1.a=a2.a;
+connection user;
UPDATE t1 a1,t1 a2 SET a1.a=50 WHERE a1.a=a2.a;
-ERROR 28000: Access denied for user 'user'@'localhost' (using password: NO)
+ERROR 42000: Access denied; you need (at least one of) the FILE privilege(s) for this operation
UPDATE t1 a1,t2 a2 SET a1.a=50 WHERE a1.a=a2.a;
-ERROR 28000: Access denied for user 'user'@'localhost' (using password: NO)
+ERROR 42000: Access denied; you need (at least one of) the FILE privilege(s) for this operation
UPDATE t1 a1,t3 a2 SET a1.a=50 WHERE a1.a=a2.a;
-ERROR 28000: Access denied for user 'user'@'localhost' (using password: NO)
+ERROR 42000: Access denied; you need (at least one of) the FILE privilege(s) for this operation
UPDATE t1 a1,v1_invoker a2 SET a1.a=50 WHERE a1.a=a2.a;
-ERROR 28000: Access denied for user 'user'@'localhost' (using password: NO)
+ERROR 42000: Access denied; you need (at least one of) the FILE privilege(s) for this operation
UPDATE t1 a1,v1_definer a2 SET a1.a=50 WHERE a1.a=a2.a;
-ERROR 28000: Access denied for user 'user'@'localhost' (using password: NO)
+ERROR 42000: Access denied; you need (at least one of) the FILE privilege(s) for this operation
UPDATE t1 a1,v2_invoker a2 SET a1.a=50 WHERE a1.a=a2.a;
-ERROR 28000: Access denied for user 'user'@'localhost' (using password: NO)
+ERROR 42000: Access denied; you need (at least one of) the FILE privilege(s) for this operation
UPDATE t1 a1,v2_definer a2 SET a1.a=50 WHERE a1.a=a2.a;
-ERROR 28000: Access denied for user 'user'@'localhost' (using password: NO)
+ERROR 42000: Access denied; you need (at least one of) the FILE privilege(s) for this operation
UPDATE t2 a1,t1 a2 SET a1.a=50 WHERE a1.a=a2.a;
-ERROR 28000: Access denied for user 'user'@'localhost' (using password: NO)
+ERROR 42000: Access denied; you need (at least one of) the FILE privilege(s) for this operation
UPDATE t2 a1,t2 a2 SET a1.a=50 WHERE a1.a=a2.a;
-ERROR 28000: Access denied for user 'user'@'localhost' (using password: NO)
+ERROR 42000: Access denied; you need (at least one of) the FILE privilege(s) for this operation
UPDATE t2 a1,t3 a2 SET a1.a=50 WHERE a1.a=a2.a;
-ERROR 28000: Access denied for user 'user'@'localhost' (using password: NO)
+ERROR 42000: Access denied; you need (at least one of) the FILE privilege(s) for this operation
UPDATE t2 a1,v1_invoker a2 SET a1.a=50 WHERE a1.a=a2.a;
-ERROR 28000: Access denied for user 'user'@'localhost' (using password: NO)
+ERROR 42000: Access denied; you need (at least one of) the FILE privilege(s) for this operation
UPDATE t2 a1,v1_definer a2 SET a1.a=50 WHERE a1.a=a2.a;
-ERROR 28000: Access denied for user 'user'@'localhost' (using password: NO)
+ERROR 42000: Access denied; you need (at least one of) the FILE privilege(s) for this operation
UPDATE t2 a1,v2_invoker a2 SET a1.a=50 WHERE a1.a=a2.a;
-ERROR 28000: Access denied for user 'user'@'localhost' (using password: NO)
+ERROR 42000: Access denied; you need (at least one of) the FILE privilege(s) for this operation
UPDATE t2 a1,v2_definer a2 SET a1.a=50 WHERE a1.a=a2.a;
-ERROR 28000: Access denied for user 'user'@'localhost' (using password: NO)
+ERROR 42000: Access denied; you need (at least one of) the FILE privilege(s) for this operation
UPDATE t3 a1,t1 a2 SET a1.a=50 WHERE a1.a=a2.a;
-ERROR 28000: Access denied for user 'user'@'localhost' (using password: NO)
+ERROR 42000: Access denied; you need (at least one of) the FILE privilege(s) for this operation
UPDATE t3 a1,t2 a2 SET a1.a=50 WHERE a1.a=a2.a;
-ERROR 28000: Access denied for user 'user'@'localhost' (using password: NO)
+ERROR 42000: Access denied; you need (at least one of) the FILE privilege(s) for this operation
UPDATE t3 a1,t3 a2 SET a1.a=50 WHERE a1.a=a2.a;
UPDATE t3 a1,v1_invoker a2 SET a1.a=50 WHERE a1.a=a2.a;
-ERROR 28000: Access denied for user 'user'@'localhost' (using password: NO)
+ERROR 42000: Access denied; you need (at least one of) the FILE privilege(s) for this operation
UPDATE t3 a1,v1_definer a2 SET a1.a=50 WHERE a1.a=a2.a;
UPDATE t3 a1,v2_invoker a2 SET a1.a=50 WHERE a1.a=a2.a;
-ERROR 28000: Access denied for user 'user'@'localhost' (using password: NO)
+ERROR 42000: Access denied; you need (at least one of) the FILE privilege(s) for this operation
UPDATE t3 a1,v2_definer a2 SET a1.a=50 WHERE a1.a=a2.a;
UPDATE v1_invoker a1,t1 a2 SET a1.a=50 WHERE a1.a=a2.a;
-ERROR 28000: Access denied for user 'user'@'localhost' (using password: NO)
+ERROR 42000: Access denied; you need (at least one of) the FILE privilege(s) for this operation
UPDATE v1_invoker a1,t2 a2 SET a1.a=50 WHERE a1.a=a2.a;
-ERROR 28000: Access denied for user 'user'@'localhost' (using password: NO)
+ERROR 42000: Access denied; you need (at least one of) the FILE privilege(s) for this operation
UPDATE v1_invoker a1,t3 a2 SET a1.a=50 WHERE a1.a=a2.a;
-ERROR 28000: Access denied for user 'user'@'localhost' (using password: NO)
+ERROR 42000: Access denied; you need (at least one of) the FILE privilege(s) for this operation
UPDATE v1_invoker a1,v1_invoker a2 SET a1.a=50 WHERE a1.a=a2.a;
-ERROR 28000: Access denied for user 'user'@'localhost' (using password: NO)
+ERROR 42000: Access denied; you need (at least one of) the FILE privilege(s) for this operation
UPDATE v1_invoker a1,v1_definer a2 SET a1.a=50 WHERE a1.a=a2.a;
-ERROR 28000: Access denied for user 'user'@'localhost' (using password: NO)
+ERROR 42000: Access denied; you need (at least one of) the FILE privilege(s) for this operation
UPDATE v1_invoker a1,v2_invoker a2 SET a1.a=50 WHERE a1.a=a2.a;
-ERROR 28000: Access denied for user 'user'@'localhost' (using password: NO)
+ERROR 42000: Access denied; you need (at least one of) the FILE privilege(s) for this operation
UPDATE v1_invoker a1,v2_definer a2 SET a1.a=50 WHERE a1.a=a2.a;
-ERROR 28000: Access denied for user 'user'@'localhost' (using password: NO)
+ERROR 42000: Access denied; you need (at least one of) the FILE privilege(s) for this operation
UPDATE v1_definer a1,t1 a2 SET a1.a=50 WHERE a1.a=a2.a;
-ERROR 28000: Access denied for user 'user'@'localhost' (using password: NO)
+ERROR 42000: Access denied; you need (at least one of) the FILE privilege(s) for this operation
UPDATE v1_definer a1,t2 a2 SET a1.a=50 WHERE a1.a=a2.a;
-ERROR 28000: Access denied for user 'user'@'localhost' (using password: NO)
+ERROR 42000: Access denied; you need (at least one of) the FILE privilege(s) for this operation
UPDATE v1_definer a1,t3 a2 SET a1.a=50 WHERE a1.a=a2.a;
UPDATE v1_definer a1,v1_invoker a2 SET a1.a=50 WHERE a1.a=a2.a;
-ERROR 28000: Access denied for user 'user'@'localhost' (using password: NO)
+ERROR 42000: Access denied; you need (at least one of) the FILE privilege(s) for this operation
UPDATE v1_definer a1,v1_definer a2 SET a1.a=50 WHERE a1.a=a2.a;
UPDATE v1_definer a1,v2_invoker a2 SET a1.a=50 WHERE a1.a=a2.a;
-ERROR 28000: Access denied for user 'user'@'localhost' (using password: NO)
+ERROR 42000: Access denied; you need (at least one of) the FILE privilege(s) for this operation
UPDATE v1_definer a1,v2_definer a2 SET a1.a=50 WHERE a1.a=a2.a;
UPDATE v2_invoker a1,t1 a2 SET a1.a=50 WHERE a1.a=a2.a;
-ERROR 28000: Access denied for user 'user'@'localhost' (using password: NO)
+ERROR 42000: Access denied; you need (at least one of) the FILE privilege(s) for this operation
UPDATE v2_invoker a1,t2 a2 SET a1.a=50 WHERE a1.a=a2.a;
-ERROR 28000: Access denied for user 'user'@'localhost' (using password: NO)
+ERROR 42000: Access denied; you need (at least one of) the FILE privilege(s) for this operation
UPDATE v2_invoker a1,t3 a2 SET a1.a=50 WHERE a1.a=a2.a;
-ERROR 28000: Access denied for user 'user'@'localhost' (using password: NO)
+ERROR 42000: Access denied; you need (at least one of) the FILE privilege(s) for this operation
UPDATE v2_invoker a1,v1_invoker a2 SET a1.a=50 WHERE a1.a=a2.a;
-ERROR 28000: Access denied for user 'user'@'localhost' (using password: NO)
+ERROR 42000: Access denied; you need (at least one of) the FILE privilege(s) for this operation
UPDATE v2_invoker a1,v1_definer a2 SET a1.a=50 WHERE a1.a=a2.a;
-ERROR 28000: Access denied for user 'user'@'localhost' (using password: NO)
+ERROR 42000: Access denied; you need (at least one of) the FILE privilege(s) for this operation
UPDATE v2_invoker a1,v2_invoker a2 SET a1.a=50 WHERE a1.a=a2.a;
-ERROR 28000: Access denied for user 'user'@'localhost' (using password: NO)
+ERROR 42000: Access denied; you need (at least one of) the FILE privilege(s) for this operation
UPDATE v2_invoker a1,v2_definer a2 SET a1.a=50 WHERE a1.a=a2.a;
-ERROR 28000: Access denied for user 'user'@'localhost' (using password: NO)
+ERROR 42000: Access denied; you need (at least one of) the FILE privilege(s) for this operation
UPDATE v2_definer a1,t1 a2 SET a1.a=50 WHERE a1.a=a2.a;
-ERROR 28000: Access denied for user 'user'@'localhost' (using password: NO)
+ERROR 42000: Access denied; you need (at least one of) the FILE privilege(s) for this operation
UPDATE v2_definer a1,t2 a2 SET a1.a=50 WHERE a1.a=a2.a;
-ERROR 28000: Access denied for user 'user'@'localhost' (using password: NO)
+ERROR 42000: Access denied; you need (at least one of) the FILE privilege(s) for this operation
UPDATE v2_definer a1,t3 a2 SET a1.a=50 WHERE a1.a=a2.a;
UPDATE v2_definer a1,v1_invoker a2 SET a1.a=50 WHERE a1.a=a2.a;
-ERROR 28000: Access denied for user 'user'@'localhost' (using password: NO)
+ERROR 42000: Access denied; you need (at least one of) the FILE privilege(s) for this operation
UPDATE v2_definer a1,v1_definer a2 SET a1.a=50 WHERE a1.a=a2.a;
UPDATE v2_definer a1,v2_invoker a2 SET a1.a=50 WHERE a1.a=a2.a;
-ERROR 28000: Access denied for user 'user'@'localhost' (using password: NO)
+ERROR 42000: Access denied; you need (at least one of) the FILE privilege(s) for this operation
UPDATE v2_definer a1,v2_definer a2 SET a1.a=50 WHERE a1.a=a2.a;
+connection default;
DROP VIEW v1_invoker, v1_definer, v2_invoker, v2_definer;
DROP TABLE t1, t2, t3;
# Testing SQLCOM_DELETE_MULTI
+connection default;
CREATE TABLE t1 (a INT) ENGINE=CONNECT TABLE_TYPE=fix FILE_NAME='t1.fix';
CREATE TABLE t2 (a INT) ENGINE=CONNECT TABLE_TYPE=fix FILE_NAME='t2.fix';
CREATE TABLE t3 (a INT);
@@ -380,98 +416,101 @@ DELETE a1 FROM v2_definer a1,v1_invoker a2 WHERE a1.a=a2.a;
DELETE a1 FROM v2_definer a1,v1_definer a2 WHERE a1.a=a2.a;
DELETE a1 FROM v2_definer a1,v2_invoker a2 WHERE a1.a=a2.a;
DELETE a1 FROM v2_definer a1,v2_definer a2 WHERE a1.a=a2.a;
+connection user;
DELETE a1 FROM t1 a1,t1 a2 WHERE a1.a=a2.a;
-ERROR 28000: Access denied for user 'user'@'localhost' (using password: NO)
+ERROR 42000: Access denied; you need (at least one of) the FILE privilege(s) for this operation
DELETE a1 FROM t1 a1,t2 a2 WHERE a1.a=a2.a;
-ERROR 28000: Access denied for user 'user'@'localhost' (using password: NO)
+ERROR 42000: Access denied; you need (at least one of) the FILE privilege(s) for this operation
DELETE a1 FROM t1 a1,t3 a2 WHERE a1.a=a2.a;
-ERROR 28000: Access denied for user 'user'@'localhost' (using password: NO)
+ERROR 42000: Access denied; you need (at least one of) the FILE privilege(s) for this operation
DELETE a1 FROM t1 a1,v1_invoker a2 WHERE a1.a=a2.a;
-ERROR 28000: Access denied for user 'user'@'localhost' (using password: NO)
+ERROR 42000: Access denied; you need (at least one of) the FILE privilege(s) for this operation
DELETE a1 FROM t1 a1,v1_definer a2 WHERE a1.a=a2.a;
-ERROR 28000: Access denied for user 'user'@'localhost' (using password: NO)
+ERROR 42000: Access denied; you need (at least one of) the FILE privilege(s) for this operation
DELETE a1 FROM t1 a1,v2_invoker a2 WHERE a1.a=a2.a;
-ERROR 28000: Access denied for user 'user'@'localhost' (using password: NO)
+ERROR 42000: Access denied; you need (at least one of) the FILE privilege(s) for this operation
DELETE a1 FROM t1 a1,v2_definer a2 WHERE a1.a=a2.a;
-ERROR 28000: Access denied for user 'user'@'localhost' (using password: NO)
+ERROR 42000: Access denied; you need (at least one of) the FILE privilege(s) for this operation
DELETE a1 FROM t2 a1,t1 a2 WHERE a1.a=a2.a;
-ERROR 28000: Access denied for user 'user'@'localhost' (using password: NO)
+ERROR 42000: Access denied; you need (at least one of) the FILE privilege(s) for this operation
DELETE a1 FROM t2 a1,t2 a2 WHERE a1.a=a2.a;
-ERROR 28000: Access denied for user 'user'@'localhost' (using password: NO)
+ERROR 42000: Access denied; you need (at least one of) the FILE privilege(s) for this operation
DELETE a1 FROM t2 a1,t3 a2 WHERE a1.a=a2.a;
-ERROR 28000: Access denied for user 'user'@'localhost' (using password: NO)
+ERROR 42000: Access denied; you need (at least one of) the FILE privilege(s) for this operation
DELETE a1 FROM t2 a1,v1_invoker a2 WHERE a1.a=a2.a;
-ERROR 28000: Access denied for user 'user'@'localhost' (using password: NO)
+ERROR 42000: Access denied; you need (at least one of) the FILE privilege(s) for this operation
DELETE a1 FROM t2 a1,v1_definer a2 WHERE a1.a=a2.a;
-ERROR 28000: Access denied for user 'user'@'localhost' (using password: NO)
+ERROR 42000: Access denied; you need (at least one of) the FILE privilege(s) for this operation
DELETE a1 FROM t2 a1,v2_invoker a2 WHERE a1.a=a2.a;
-ERROR 28000: Access denied for user 'user'@'localhost' (using password: NO)
+ERROR 42000: Access denied; you need (at least one of) the FILE privilege(s) for this operation
DELETE a1 FROM t2 a1,v2_definer a2 WHERE a1.a=a2.a;
-ERROR 28000: Access denied for user 'user'@'localhost' (using password: NO)
+ERROR 42000: Access denied; you need (at least one of) the FILE privilege(s) for this operation
DELETE a1 FROM t3 a1,t1 a2 WHERE a1.a=a2.a;
-ERROR 28000: Access denied for user 'user'@'localhost' (using password: NO)
+ERROR 42000: Access denied; you need (at least one of) the FILE privilege(s) for this operation
DELETE a1 FROM t3 a1,t2 a2 WHERE a1.a=a2.a;
-ERROR 28000: Access denied for user 'user'@'localhost' (using password: NO)
+ERROR 42000: Access denied; you need (at least one of) the FILE privilege(s) for this operation
DELETE a1 FROM t3 a1,t3 a2 WHERE a1.a=a2.a;
DELETE a1 FROM t3 a1,v1_invoker a2 WHERE a1.a=a2.a;
-ERROR 28000: Access denied for user 'user'@'localhost' (using password: NO)
+ERROR 42000: Access denied; you need (at least one of) the FILE privilege(s) for this operation
DELETE a1 FROM t3 a1,v1_definer a2 WHERE a1.a=a2.a;
DELETE a1 FROM t3 a1,v2_invoker a2 WHERE a1.a=a2.a;
-ERROR 28000: Access denied for user 'user'@'localhost' (using password: NO)
+ERROR 42000: Access denied; you need (at least one of) the FILE privilege(s) for this operation
DELETE a1 FROM t3 a1,v2_definer a2 WHERE a1.a=a2.a;
DELETE a1 FROM v1_invoker a1,t1 a2 WHERE a1.a=a2.a;
-ERROR 28000: Access denied for user 'user'@'localhost' (using password: NO)
+ERROR 42000: Access denied; you need (at least one of) the FILE privilege(s) for this operation
DELETE a1 FROM v1_invoker a1,t2 a2 WHERE a1.a=a2.a;
-ERROR 28000: Access denied for user 'user'@'localhost' (using password: NO)
+ERROR 42000: Access denied; you need (at least one of) the FILE privilege(s) for this operation
DELETE a1 FROM v1_invoker a1,t3 a2 WHERE a1.a=a2.a;
-ERROR 28000: Access denied for user 'user'@'localhost' (using password: NO)
+ERROR 42000: Access denied; you need (at least one of) the FILE privilege(s) for this operation
DELETE a1 FROM v1_invoker a1,v1_invoker a2 WHERE a1.a=a2.a;
-ERROR 28000: Access denied for user 'user'@'localhost' (using password: NO)
+ERROR 42000: Access denied; you need (at least one of) the FILE privilege(s) for this operation
DELETE a1 FROM v1_invoker a1,v1_definer a2 WHERE a1.a=a2.a;
-ERROR 28000: Access denied for user 'user'@'localhost' (using password: NO)
+ERROR 42000: Access denied; you need (at least one of) the FILE privilege(s) for this operation
DELETE a1 FROM v1_invoker a1,v2_invoker a2 WHERE a1.a=a2.a;
-ERROR 28000: Access denied for user 'user'@'localhost' (using password: NO)
+ERROR 42000: Access denied; you need (at least one of) the FILE privilege(s) for this operation
DELETE a1 FROM v1_invoker a1,v2_definer a2 WHERE a1.a=a2.a;
-ERROR 28000: Access denied for user 'user'@'localhost' (using password: NO)
+ERROR 42000: Access denied; you need (at least one of) the FILE privilege(s) for this operation
DELETE a1 FROM v1_definer a1,t1 a2 WHERE a1.a=a2.a;
-ERROR 28000: Access denied for user 'user'@'localhost' (using password: NO)
+ERROR 42000: Access denied; you need (at least one of) the FILE privilege(s) for this operation
DELETE a1 FROM v1_definer a1,t2 a2 WHERE a1.a=a2.a;
-ERROR 28000: Access denied for user 'user'@'localhost' (using password: NO)
+ERROR 42000: Access denied; you need (at least one of) the FILE privilege(s) for this operation
DELETE a1 FROM v1_definer a1,t3 a2 WHERE a1.a=a2.a;
DELETE a1 FROM v1_definer a1,v1_invoker a2 WHERE a1.a=a2.a;
-ERROR 28000: Access denied for user 'user'@'localhost' (using password: NO)
+ERROR 42000: Access denied; you need (at least one of) the FILE privilege(s) for this operation
DELETE a1 FROM v1_definer a1,v1_definer a2 WHERE a1.a=a2.a;
DELETE a1 FROM v1_definer a1,v2_invoker a2 WHERE a1.a=a2.a;
-ERROR 28000: Access denied for user 'user'@'localhost' (using password: NO)
+ERROR 42000: Access denied; you need (at least one of) the FILE privilege(s) for this operation
DELETE a1 FROM v1_definer a1,v2_definer a2 WHERE a1.a=a2.a;
DELETE a1 FROM v2_invoker a1,t1 a2 WHERE a1.a=a2.a;
-ERROR 28000: Access denied for user 'user'@'localhost' (using password: NO)
+ERROR 42000: Access denied; you need (at least one of) the FILE privilege(s) for this operation
DELETE a1 FROM v2_invoker a1,t2 a2 WHERE a1.a=a2.a;
-ERROR 28000: Access denied for user 'user'@'localhost' (using password: NO)
+ERROR 42000: Access denied; you need (at least one of) the FILE privilege(s) for this operation
DELETE a1 FROM v2_invoker a1,t3 a2 WHERE a1.a=a2.a;
-ERROR 28000: Access denied for user 'user'@'localhost' (using password: NO)
+ERROR 42000: Access denied; you need (at least one of) the FILE privilege(s) for this operation
DELETE a1 FROM v2_invoker a1,v1_invoker a2 WHERE a1.a=a2.a;
-ERROR 28000: Access denied for user 'user'@'localhost' (using password: NO)
+ERROR 42000: Access denied; you need (at least one of) the FILE privilege(s) for this operation
DELETE a1 FROM v2_invoker a1,v1_definer a2 WHERE a1.a=a2.a;
-ERROR 28000: Access denied for user 'user'@'localhost' (using password: NO)
+ERROR 42000: Access denied; you need (at least one of) the FILE privilege(s) for this operation
DELETE a1 FROM v2_invoker a1,v2_invoker a2 WHERE a1.a=a2.a;
-ERROR 28000: Access denied for user 'user'@'localhost' (using password: NO)
+ERROR 42000: Access denied; you need (at least one of) the FILE privilege(s) for this operation
DELETE a1 FROM v2_invoker a1,v2_definer a2 WHERE a1.a=a2.a;
-ERROR 28000: Access denied for user 'user'@'localhost' (using password: NO)
+ERROR 42000: Access denied; you need (at least one of) the FILE privilege(s) for this operation
DELETE a1 FROM v2_definer a1,t1 a2 WHERE a1.a=a2.a;
-ERROR 28000: Access denied for user 'user'@'localhost' (using password: NO)
+ERROR 42000: Access denied; you need (at least one of) the FILE privilege(s) for this operation
DELETE a1 FROM v2_definer a1,t2 a2 WHERE a1.a=a2.a;
-ERROR 28000: Access denied for user 'user'@'localhost' (using password: NO)
+ERROR 42000: Access denied; you need (at least one of) the FILE privilege(s) for this operation
DELETE a1 FROM v2_definer a1,t3 a2 WHERE a1.a=a2.a;
DELETE a1 FROM v2_definer a1,v1_invoker a2 WHERE a1.a=a2.a;
-ERROR 28000: Access denied for user 'user'@'localhost' (using password: NO)
+ERROR 42000: Access denied; you need (at least one of) the FILE privilege(s) for this operation
DELETE a1 FROM v2_definer a1,v1_definer a2 WHERE a1.a=a2.a;
DELETE a1 FROM v2_definer a1,v2_invoker a2 WHERE a1.a=a2.a;
-ERROR 28000: Access denied for user 'user'@'localhost' (using password: NO)
+ERROR 42000: Access denied; you need (at least one of) the FILE privilege(s) for this operation
DELETE a1 FROM v2_definer a1,v2_definer a2 WHERE a1.a=a2.a;
+connection default;
DROP VIEW v1_invoker, v1_definer, v2_invoker, v2_definer;
DROP TABLE t1, t2, t3;
# Testing SQLCOM_CREATE_VIEW
+connection default;
CREATE TABLE t1 (a INT) ENGINE=CONNECT TABLE_TYPE=fix FILE_NAME='t1.fix';
INSERT INTO t1 VALUES (10);
CREATE SQL SECURITY INVOKER VIEW v1_invoker AS SELECT * FROM t1;
@@ -480,15 +519,18 @@ CREATE VIEW v2 AS SELECT * FROM v1_invoker;
DROP VIEW v2;
CREATE VIEW v2 AS SELECT * FROM v1_definer;
DROP VIEW v2;
+connection user;
CREATE VIEW v2 AS SELECT * FROM t1;
-ERROR 28000: Access denied for user 'user'@'localhost' (using password: NO)
+ERROR 42000: Access denied; you need (at least one of) the FILE privilege(s) for this operation
CREATE VIEW v2 AS SELECT * FROM v1_invoker;
-ERROR 28000: Access denied for user 'user'@'localhost' (using password: NO)
+ERROR 42000: Access denied; you need (at least one of) the FILE privilege(s) for this operation
CREATE VIEW v2 AS SELECT * FROM v1_definer;
DROP VIEW v2;
+connection default;
DROP VIEW v1_invoker, v1_definer;
DROP TABLE t1;
# Testing SQLCOM_INSERT_SELECT
+connection default;
CREATE TABLE t1 (a INT) ENGINE=CONNECT TABLE_TYPE=fix FILE_NAME='t1.fix';
INSERT INTO t1 VALUES (10);
CREATE SQL SECURITY INVOKER VIEW v1_invoker AS SELECT * FROM t1;
@@ -502,26 +544,29 @@ INSERT INTO v1_invoker SELECT * FROM v1_definer WHERE a=20;
INSERT INTO v1_definer SELECT * FROM t1 WHERE a=20;
INSERT INTO v1_definer SELECT * FROM v1_invoker WHERE a=20;
INSERT INTO v1_definer SELECT * FROM v1_definer WHERE a=20;
+connection user;
INSERT INTO t1 SELECT * FROM t1 WHERE a=20;
-ERROR 28000: Access denied for user 'user'@'localhost' (using password: NO)
+ERROR 42000: Access denied; you need (at least one of) the FILE privilege(s) for this operation
INSERT INTO t1 SELECT * FROM v1_invoker WHERE a=20;
-ERROR 28000: Access denied for user 'user'@'localhost' (using password: NO)
+ERROR 42000: Access denied; you need (at least one of) the FILE privilege(s) for this operation
INSERT INTO t1 SELECT * FROM v1_definer WHERE a=20;
-ERROR 28000: Access denied for user 'user'@'localhost' (using password: NO)
+ERROR 42000: Access denied; you need (at least one of) the FILE privilege(s) for this operation
INSERT INTO v1_invoker SELECT * FROM t1 WHERE a=20;
-ERROR 28000: Access denied for user 'user'@'localhost' (using password: NO)
+ERROR 42000: Access denied; you need (at least one of) the FILE privilege(s) for this operation
INSERT INTO v1_invoker SELECT * FROM v1_invoker WHERE a=20;
-ERROR 28000: Access denied for user 'user'@'localhost' (using password: NO)
+ERROR 42000: Access denied; you need (at least one of) the FILE privilege(s) for this operation
INSERT INTO v1_invoker SELECT * FROM v1_definer WHERE a=20;
-ERROR 28000: Access denied for user 'user'@'localhost' (using password: NO)
+ERROR 42000: Access denied; you need (at least one of) the FILE privilege(s) for this operation
INSERT INTO v1_definer SELECT * FROM t1 WHERE a=20;
-ERROR 28000: Access denied for user 'user'@'localhost' (using password: NO)
+ERROR 42000: Access denied; you need (at least one of) the FILE privilege(s) for this operation
INSERT INTO v1_definer SELECT * FROM v1_invoker WHERE a=20;
-ERROR 28000: Access denied for user 'user'@'localhost' (using password: NO)
+ERROR 42000: Access denied; you need (at least one of) the FILE privilege(s) for this operation
INSERT INTO v1_definer SELECT * FROM v1_definer WHERE a=20;
+connection default;
DROP VIEW v1_invoker, v1_definer;
DROP TABLE t1;
# Testing SQLCOM_REPLACE_SELECT
+connection default;
CREATE TABLE t1 (a INT) ENGINE=CONNECT TABLE_TYPE=fix FILE_NAME='t1.fix';
INSERT INTO t1 VALUES (10);
CREATE SQL SECURITY INVOKER VIEW v1_invoker AS SELECT * FROM t1;
@@ -544,27 +589,30 @@ REPLACE INTO v1_definer SELECT * FROM v1_invoker WHERE a=20;
ERROR 42000: CONNECT Unsupported command
REPLACE INTO v1_definer SELECT * FROM v1_definer WHERE a=20;
ERROR 42000: CONNECT Unsupported command
+connection user;
REPLACE INTO t1 SELECT * FROM t1 WHERE a=20;
-ERROR 28000: Access denied for user 'user'@'localhost' (using password: NO)
+ERROR 42000: Access denied; you need (at least one of) the FILE privilege(s) for this operation
REPLACE INTO t1 SELECT * FROM v1_invoker WHERE a=20;
-ERROR 28000: Access denied for user 'user'@'localhost' (using password: NO)
+ERROR 42000: Access denied; you need (at least one of) the FILE privilege(s) for this operation
REPLACE INTO t1 SELECT * FROM v1_definer WHERE a=20;
-ERROR 28000: Access denied for user 'user'@'localhost' (using password: NO)
+ERROR 42000: Access denied; you need (at least one of) the FILE privilege(s) for this operation
REPLACE INTO v1_invoker SELECT * FROM t1 WHERE a=20;
-ERROR 28000: Access denied for user 'user'@'localhost' (using password: NO)
+ERROR 42000: Access denied; you need (at least one of) the FILE privilege(s) for this operation
REPLACE INTO v1_invoker SELECT * FROM v1_invoker WHERE a=20;
-ERROR 28000: Access denied for user 'user'@'localhost' (using password: NO)
+ERROR 42000: Access denied; you need (at least one of) the FILE privilege(s) for this operation
REPLACE INTO v1_invoker SELECT * FROM v1_definer WHERE a=20;
-ERROR 28000: Access denied for user 'user'@'localhost' (using password: NO)
+ERROR 42000: Access denied; you need (at least one of) the FILE privilege(s) for this operation
REPLACE INTO v1_definer SELECT * FROM t1 WHERE a=20;
ERROR 42000: CONNECT Unsupported command
REPLACE INTO v1_definer SELECT * FROM v1_invoker WHERE a=20;
ERROR 42000: CONNECT Unsupported command
REPLACE INTO v1_definer SELECT * FROM v1_definer WHERE a=20;
ERROR 42000: CONNECT Unsupported command
+connection default;
DROP VIEW v1_invoker, v1_definer;
DROP TABLE t1;
# Testing SQLCOM_RENAME_TABLE
+connection default;
CREATE TABLE t1 (a INT) ENGINE=CONNECT TABLE_TYPE=fix FILE_NAME='t1.fix';
INSERT INTO t1 VALUES (10);
RENAME TABLE t1 TO t2;
@@ -574,10 +622,13 @@ t2 CREATE TABLE `t2` (
`a` int(11) DEFAULT NULL
) ENGINE=CONNECT DEFAULT CHARSET=latin1 `TABLE_TYPE`=fix `FILE_NAME`='t1.fix'
RENAME TABLE t2 TO t1;
+connection user;
RENAME TABLE t1 TO t2;
-ERROR 28000: Access denied for user 'user'@'localhost' (using password: NO)
+ERROR 42000: Access denied; you need (at least one of) the FILE privilege(s) for this operation
+connection default;
DROP TABLE t1;
# Testing SQLCOM_ALTER_TABLE (for ALTER..RENAME)
+connection default;
CREATE TABLE t1 (a INT) ENGINE=CONNECT TABLE_TYPE=fix FILE_NAME='t1.fix';
INSERT INTO t1 VALUES (10);
ALTER TABLE t1 RENAME TO t2;
@@ -587,20 +638,26 @@ t2 CREATE TABLE `t2` (
`a` int(11) DEFAULT NULL
) ENGINE=CONNECT DEFAULT CHARSET=latin1 `TABLE_TYPE`=fix `FILE_NAME`='t1.fix'
ALTER TABLE t2 RENAME TO t1;
+connection user;
ALTER TABLE t1 RENAME TO t2;
-ERROR 28000: Access denied for user 'user'@'localhost' (using password: NO)
+ERROR 42000: Access denied; you need (at least one of) the FILE privilege(s) for this operation
+connection default;
DROP TABLE t1;
# Testing SQLCOM_ALTER_TABLE (changing ENGINE to non-CONNECT)
+connection default;
CREATE TABLE t1 (a INT) ENGINE=CONNECT TABLE_TYPE=fix FILE_NAME='t1.fix';
INSERT INTO t1 VALUES (10);
ALTER TABLE t1 ENGINE=MyISAM;
DROP TABLE t1;
CREATE TABLE t1 (a INT) ENGINE=CONNECT TABLE_TYPE=fix FILE_NAME='t1.fix';
INSERT INTO t1 VALUES (10);
+connection user;
ALTER TABLE t1 ENGINE=MyISAM;
-ERROR 28000: Access denied for user 'user'@'localhost' (using password: NO)
+ERROR 42000: Access denied; you need (at least one of) the FILE privilege(s) for this operation
+connection default;
DROP TABLE t1;
# Testing SQLCOM_ALTER_TABLE (changing ENGINE to CONNECT)
+connection default;
CREATE TABLE t1 (a INT) ENGINE=MyISAM;
INSERT INTO t1 VALUES (10);
SELECT * FROM t1;
@@ -610,63 +667,83 @@ ALTER TABLE t1 ENGINE=CONNECT TABLE_TYPE=fix FILE_NAME='t1.fix';
DROP TABLE t1;
CREATE TABLE t1 (a INT) ENGINE=MyISAM;
INSERT INTO t1 VALUES (10);
+connection user;
ALTER TABLE t1 ENGINE=CONNECT TABLE_TYPE=fix FILE_NAME='t1.fix';
-ERROR 28000: Access denied for user 'user'@'localhost' (using password: NO)
+ERROR 42000: Access denied; you need (at least one of) the FILE privilege(s) for this operation
+connection default;
DROP TABLE t1;
# Testing SQLCOM_OPTIMIZE
+connection default;
CREATE TABLE t1 (a INT NOT NULL, KEY(a)) ENGINE=CONNECT TABLE_TYPE=fix FILE_NAME='t1.fix';
INSERT INTO t1 VALUES (10);
OPTIMIZE TABLE t1;
Table Op Msg_type Msg_text
test.t1 optimize status OK
+connection user;
OPTIMIZE TABLE t1;
Table Op Msg_type Msg_text
-test.t1 optimize Error Access denied for user 'user'@'localhost' (using password: NO)
+test.t1 optimize Error Access denied; you need (at least one of) the FILE privilege(s) for this operation
test.t1 optimize Error Got error 122 'This operation requires the FILE privilege' from CONNECT
test.t1 optimize error Corrupt
+connection default;
DROP TABLE t1;
# Testing SQLCOM_ALTER_TABLE (adding columns)
+connection default;
CREATE TABLE t1 (a INT) ENGINE=CONNECT TABLE_TYPE=fix FILE_NAME='t1.fix';
INSERT INTO t1 VALUES (10);
ALTER TABLE t1 ADD b INT;
Warnings:
Warning 1105 This is an outward table, table data were not modified.
+connection user;
ALTER TABLE t1 ADD c INT;
-ERROR 28000: Access denied for user 'user'@'localhost' (using password: NO)
+ERROR 42000: Access denied; you need (at least one of) the FILE privilege(s) for this operation
+connection default;
DROP TABLE t1;
# Testing SQLCOM_ALTER_TABLE (removing columns)
+connection default;
CREATE TABLE t1 (a INT,b INT,c INT) ENGINE=CONNECT TABLE_TYPE=fix FILE_NAME='t1.fix';
INSERT INTO t1 VALUES (10,10,10);
ALTER TABLE t1 DROP b;
Warnings:
Warning 1105 This is an outward table, table data were not modified.
+connection user;
ALTER TABLE t1 DROP c;
-ERROR 28000: Access denied for user 'user'@'localhost' (using password: NO)
+ERROR 42000: Access denied; you need (at least one of) the FILE privilege(s) for this operation
+connection default;
DROP TABLE t1;
# Testing SQLCOM_ALTER_TABLE (adding keys)
+connection default;
CREATE TABLE t1 (a INT NOT NULL,b INT NOT NULL) ENGINE=CONNECT TABLE_TYPE=fix FILE_NAME='t1.fix';
INSERT INTO t1 VALUES (10,10);
ALTER TABLE t1 ADD KEY(a);
+connection user;
ALTER TABLE t1 ADD KEY(b);
-ERROR 28000: Access denied for user 'user'@'localhost' (using password: NO)
+ERROR 42000: Access denied; you need (at least one of) the FILE privilege(s) for this operation
+connection default;
DROP TABLE t1;
# Testing SQLCOM_ALTER_TABLE (removing keys)
+connection default;
CREATE TABLE t1 (a INT NOT NULL,b INT NOT NULL, KEY a(a), KEY b(b)) ENGINE=CONNECT TABLE_TYPE=fix FILE_NAME='t1.fix';
INSERT INTO t1 VALUES (10,10);
ALTER TABLE t1 DROP KEY a;
+connection user;
ALTER TABLE t1 DROP KEY b;
-ERROR 28000: Access denied for user 'user'@'localhost' (using password: NO)
+ERROR 42000: Access denied; you need (at least one of) the FILE privilege(s) for this operation
+connection default;
DROP TABLE t1;
# Testing SQLCOM_CREATE_INDEX and SQLCOM_DROP_INDEX
+connection default;
CREATE TABLE t1 (a INT NOT NULL,b INT NOT NULL) ENGINE=CONNECT TABLE_TYPE=fix FILE_NAME='t1.fix';
INSERT INTO t1 VALUES (10,10);
CREATE INDEX a ON t1 (a);
DROP INDEX a ON t1;
CREATE INDEX a ON t1 (a);
+connection user;
CREATE INDEX b ON t1 (b);
-ERROR 28000: Access denied for user 'user'@'localhost' (using password: NO)
+ERROR 42000: Access denied; you need (at least one of) the FILE privilege(s) for this operation
DROP INDEX a ON t1;
-ERROR 28000: Access denied for user 'user'@'localhost' (using password: NO)
+ERROR 42000: Access denied; you need (at least one of) the FILE privilege(s) for this operation
+connection default;
DROP TABLE t1;
# Testing stored procedures
CREATE PROCEDURE p_definer() SQL SECURITY DEFINER
@@ -680,10 +757,12 @@ DROP TABLE t1;
CALL p_invoker();
DROP TABLE t1;
CALL p_baddefiner();
-ERROR 28000: Access denied for user 'user'@'localhost' (using password: NO)
+ERROR 42000: Access denied; you need (at least one of) the FILE privilege(s) for this operation
+connection user;
CALL p_invoker();
-ERROR 28000: Access denied for user 'user'@'localhost' (using password: NO)
+ERROR 42000: Access denied; you need (at least one of) the FILE privilege(s) for this operation
CALL p_definer();
+connection default;
DROP TABLE t1;
DROP PROCEDURE p_definer;
DROP PROCEDURE p_invoker;
diff --git a/storage/connect/mysql-test/connect/r/ini_grant.result b/storage/connect/mysql-test/connect/r/ini_grant.result
index ce53b0ef5ca..96d5e192c7d 100644
--- a/storage/connect/mysql-test/connect/r/ini_grant.result
+++ b/storage/connect/mysql-test/connect/r/ini_grant.result
@@ -5,6 +5,8 @@ set sql_mode="";
GRANT ALL PRIVILEGES ON *.* TO user@localhost;
REVOKE FILE ON *.* FROM user@localhost;
set sql_mode=default;
+connect user,localhost,user,,;
+connection user;
SELECT user();
user()
user@localhost
@@ -33,11 +35,13 @@ DROP VIEW v1;
DROP TABLE t1;
CREATE TABLE t1 (sec CHAR(10) NOT NULL FLAG=1, val CHAR(10) NOT NULL) ENGINE=CONNECT TABLE_TYPE=INI FILE_NAME='t1.EXT';
ERROR 42000: Access denied; you need (at least one of) the FILE privilege(s) for this operation
+connection default;
SELECT user();
user()
root@localhost
CREATE TABLE t1 (sec CHAR(10) NOT NULL FLAG=1, val CHAR(10) NOT NULL) ENGINE=CONNECT TABLE_TYPE=INI FILE_NAME='t1.EXT';
INSERT INTO t1 VALUES ('sec1','val1');
+connection user;
SELECT user();
user()
user@localhost
@@ -58,10 +62,12 @@ ERROR 42000: Access denied; you need (at least one of) the FILE privilege(s) for
CREATE VIEW v1 AS SELECT * FROM t1;
ERROR 42000: Access denied; you need (at least one of) the FILE privilege(s) for this operation
# Testing a VIEW created with FILE privileges but accessed with no FILE
+connection default;
SELECT user();
user()
root@localhost
CREATE SQL SECURITY INVOKER VIEW v1 AS SELECT * FROM t1;
+connection user;
SELECT user();
user()
user@localhost
@@ -73,6 +79,8 @@ UPDATE v1 SET val='val11';
ERROR 42000: Access denied; you need (at least one of) the FILE privilege(s) for this operation
DELETE FROM v1;
ERROR 42000: Access denied; you need (at least one of) the FILE privilege(s) for this operation
+disconnect user;
+connection default;
DROP VIEW v1;
DROP TABLE t1;
DROP USER user@localhost;
diff --git a/storage/connect/mysql-test/connect/r/jdbc.result b/storage/connect/mysql-test/connect/r/jdbc.result
index 6bf67ec416f..0dbdf851860 100644
--- a/storage/connect/mysql-test/connect/r/jdbc.result
+++ b/storage/connect/mysql-test/connect/r/jdbc.result
@@ -238,9 +238,9 @@ DROP TABLE t1, connect.emp;
CREATE TABLE t2 (command varchar(128) not null,number int(5) not null flag=1,message varchar(255) flag=2) ENGINE=CONNECT TABLE_TYPE=JDBC CONNECTION='jdbc:mariadb://localhost:PORT/connect' OPTION_LIST='User=root,Execsrc=1';
SELECT * FROM t2 WHERE command='drop table tx1';
command number message
-drop table tx1 0 Execute: java.sql.SQLSyntaxErrorException: (conn:23) Unknown table 'connect.tx1'
+drop table tx1 0 Execute: java.sql.SQLSyntaxErrorException: (conn:24) Unknown table 'connect.tx1'
Warnings:
-Warning 1105 Execute: java.sql.SQLSyntaxErrorException: (conn:23) Unknown table 'connect.tx1'
+Warning 1105 Execute: java.sql.SQLSyntaxErrorException: (conn:24) Unknown table 'connect.tx1'
SELECT * FROM t2 WHERE command = 'create table tx1 (a int not null, b char(32), c double(8,2))';
command number message
create table tx1 (a int not null, b char(32), c double(8,2)) 0 Affected rows
diff --git a/storage/connect/mysql-test/connect/r/jdbc_new.result b/storage/connect/mysql-test/connect/r/jdbc_new.result
index f49cb6b6d51..33d8bd3b7d8 100644
--- a/storage/connect/mysql-test/connect/r/jdbc_new.result
+++ b/storage/connect/mysql-test/connect/r/jdbc_new.result
@@ -1,3 +1,7 @@
+connect master,127.0.0.1,root,,test,$MASTER_MYPORT,;
+connect slave,127.0.0.1,root,,test,$SLAVE_MYPORT,;
+connection master;
+connection slave;
SET GLOBAL time_zone='+1:00';
CREATE TABLE t1 (a int, b char(10));
INSERT INTO t1 VALUES (NULL,NULL),(0,'test00'),(1,'test01'),(2,'test02'),(3,'test03');
@@ -11,6 +15,7 @@ NULL NULL
#
# Testing errors
#
+connection master;
SET GLOBAL time_zone='+1:00';
CREATE TABLE t1 ENGINE=CONNECT TABLE_TYPE=JDBC
CONNECTION='jdbc:mysql://127.0.0.1:SLAVE_PORT/test?user=unknown';
@@ -36,10 +41,14 @@ ERROR HY000: Got error 174 'ExecuteQuery: java.sql.SQLSyntaxErrorException: Unkn
DROP TABLE t1;
CREATE TABLE t1 (a int, b char(10)) ENGINE=CONNECT TABLE_TYPE=JDBC
CONNECTION='jdbc:mysql://127.0.0.1:SLAVE_PORT/test?user=root';
+connection slave;
ALTER TABLE t1 RENAME t1backup;
+connection master;
SELECT * FROM t1;
ERROR HY000: Got error 174 'ExecuteQuery: java.sql.SQLSyntaxErrorException: Table 'test.t1' doesn't exist' from CONNECT
+connection slave;
ALTER TABLE t1backup RENAME t1;
+connection master;
DROP TABLE t1;
#
# Testing SELECT, etc.
@@ -108,6 +117,7 @@ NULL NULL
2 0
3 0
DROP TABLE t1;
+connection slave;
DROP TABLE t1;
#
# Testing numeric data types
@@ -126,6 +136,7 @@ t1 CREATE TABLE `t1` (
`h` decimal(20,5) DEFAULT NULL
) ENGINE=MyISAM DEFAULT CHARSET=latin1
INSERT INTO t1 VALUES(100,3333,41235,1234567890,235000000000,3.14159265,3.14159265,3141.59265);
+connection master;
CREATE TABLE t1 ENGINE=CONNECT TABLE_TYPE=JDBC
CONNECTION='jdbc:mysql://127.0.0.1:SLAVE_PORT/test?user=root';
SHOW CREATE TABLE t1;
@@ -144,6 +155,7 @@ SELECT * FROM t1;
a b c d e f g h
100 3333 41235 1234567890 235000000000 3 3 3141.59265
DROP TABLE t1;
+connection slave;
DROP TABLE t1;
#
# Testing character data types
@@ -159,6 +171,7 @@ INSERT INTO t1 VALUES('Welcome','Hello, World');
SELECT * FROM t1;
a b
Welcome Hello, World
+connection master;
CREATE TABLE t1 ENGINE=CONNECT TABLE_TYPE=JDBC
CONNECTION='jdbc:mysql://127.0.0.1:SLAVE_PORT/test?user=root';
SHOW CREATE TABLE t1;
@@ -171,6 +184,7 @@ SELECT * FROM t1;
a b
Welcome Hello, World
DROP TABLE t1;
+connection slave;
DROP TABLE t1;
#
# Testing temporal data types
@@ -182,7 +196,7 @@ t1 CREATE TABLE `t1` (
`a` date DEFAULT NULL,
`b` datetime DEFAULT NULL,
`c` time DEFAULT NULL,
- `d` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
+ `d` timestamp NOT NULL DEFAULT current_timestamp() ON UPDATE current_timestamp(),
`e` year(4) DEFAULT NULL
) ENGINE=MyISAM DEFAULT CHARSET=latin1
INSERT IGNORE INTO t1 VALUES('2003-05-27 10:45:23','2003-05-27 10:45:23','2003-05-27 10:45:23','2003-05-27 10:45:23','2003-05-27 10:45:23');
@@ -193,6 +207,7 @@ Warning 1265 Data truncated for column 'e' at row 1
SELECT * FROM t1;
a b c d e
2003-05-27 2003-05-27 10:45:23 10:45:23 2003-05-27 10:45:23 2003
+connection master;
CREATE TABLE t1 ENGINE=CONNECT TABLE_TYPE=JDBC
CONNECTION='jdbc:mysql://127.0.0.1:SLAVE_PORT/test?user=root';
SHOW CREATE TABLE t1;
@@ -201,13 +216,15 @@ t1 CREATE TABLE `t1` (
`a` date DEFAULT NULL,
`b` datetime DEFAULT NULL,
`c` time DEFAULT NULL,
- `d` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
+ `d` timestamp NOT NULL DEFAULT current_timestamp() ON UPDATE current_timestamp(),
`e` year(4) DEFAULT NULL
) ENGINE=CONNECT DEFAULT CHARSET=latin1 CONNECTION='jdbc:mysql://127.0.0.1:SLAVE_PORT/test?user=root' `TABLE_TYPE`='JDBC'
SELECT * FROM t1;
a b c d e
2003-05-27 2003-05-27 11:45:23 10:45:23 2003-05-27 10:45:23 2003
DROP TABLE t1;
+connection slave;
DROP TABLE t1;
SET GLOBAL time_zone=SYSTEM;
+connection master;
SET GLOBAL time_zone=SYSTEM;
diff --git a/storage/connect/mysql-test/connect/r/jdbc_postgresql.result b/storage/connect/mysql-test/connect/r/jdbc_postgresql.result
index bec1dc8725b..07cc3c465ea 100644
--- a/storage/connect/mysql-test/connect/r/jdbc_postgresql.result
+++ b/storage/connect/mysql-test/connect/r/jdbc_postgresql.result
@@ -1,4 +1,4 @@
-SET GLOBAL connect_class_path='C:/MariaDB-10.0/MariaDB/storage/connect/mysql-test/connect/std_data/JavaWrappers.jar;C:/Jconnectors/postgresql-42.2.1.jar';
+SET GLOBAL connect_class_path='C:/MariaDB-10.2/MariaDB/storage/connect/mysql-test/connect/std_data/JavaWrappers.jar;C:/Jconnectors/postgresql-42.2.1.jar';
CREATE TABLE t2 (
command varchar(128) not null,
number int(5) not null flag=1,
diff --git a/storage/connect/mysql-test/connect/r/mysql_discovery.result b/storage/connect/mysql-test/connect/r/mysql_discovery.result
index 4580c68558f..32bd4761f37 100644
--- a/storage/connect/mysql-test/connect/r/mysql_discovery.result
+++ b/storage/connect/mysql-test/connect/r/mysql_discovery.result
@@ -1,5 +1,10 @@
+connect master,127.0.0.1,root,,test,$MASTER_MYPORT,;
+connect slave,127.0.0.1,root,,test,$SLAVE_MYPORT,;
+connection master;
CREATE DATABASE connect;
+connection slave;
CREATE DATABASE connect;
+connection slave;
CREATE TABLE t1 (
`id` int(20) primary key,
`group` int NOT NULL default 1,
@@ -7,14 +12,15 @@ CREATE TABLE t1 (
`a\\` int unsigned,
`name` varchar(32) default 'name')
DEFAULT CHARSET=latin1;
+connection master;
CREATE TABLE t1 ENGINE=CONNECT TABLE_TYPE=MYSQL
CONNECTION='mysql://root@127.0.0.1:SLAVE_PORT/test/t1';
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
`id` int(20) NOT NULL,
- `group` int(11) NOT NULL DEFAULT '1',
- `a\\b` int(11) NOT NULL DEFAULT '2',
+ `group` int(11) NOT NULL DEFAULT 1,
+ `a\\b` int(11) NOT NULL DEFAULT 2,
`a\\` int(10) unsigned DEFAULT NULL,
`name` varchar(32) DEFAULT 'name'
) ENGINE=CONNECT DEFAULT CHARSET=latin1 CONNECTION='mysql://root@127.0.0.1:SLAVE_PORT/test/t1' `TABLE_TYPE`='MYSQL'
@@ -25,12 +31,15 @@ id group a\\b a\\ name
1 1 2 NULL foo
2 1 2 NULL fee
DROP TABLE t1;
+connection slave;
SELECT * FROM t1;
id group a\\b a\\ name
1 1 2 NULL foo
2 1 2 NULL fee
DROP TABLE t1;
+connection master;
DROP TABLE IF EXISTS connect.t1;
DROP DATABASE IF EXISTS connect;
+connection slave;
DROP TABLE IF EXISTS connect.t1;
DROP DATABASE IF EXISTS connect;
diff --git a/storage/connect/mysql-test/connect/r/mysql_exec.result b/storage/connect/mysql-test/connect/r/mysql_exec.result
index add98a6235d..cc77240503b 100644
--- a/storage/connect/mysql-test/connect/r/mysql_exec.result
+++ b/storage/connect/mysql-test/connect/r/mysql_exec.result
@@ -1,8 +1,13 @@
+connect master,127.0.0.1,root,,test,$MASTER_MYPORT,;
+connect slave,127.0.0.1,root,,test,$SLAVE_MYPORT,;
+connection master;
CREATE DATABASE connect;
+connection slave;
CREATE DATABASE connect;
#
# Checking Sending Commands
#
+connection master;
CREATE TABLE t1 (
command VARCHAR(128) NOT NULL,
warnings INT(4) NOT NULL FLAG=3,
@@ -54,6 +59,7 @@ Warnings:
Warning 1105 Affected rows
DROP PROCEDURE p1;
DROP TABLE t1;
+connection slave;
SELECT * FROM t1;
id msg
1 One
@@ -62,7 +68,9 @@ id msg
4 Four
5 Five
DROP TABLE t1;
+connection master;
DROP TABLE IF EXISTS connect.t1;
DROP DATABASE IF EXISTS connect;
+connection slave;
DROP TABLE IF EXISTS connect.t1;
DROP DATABASE IF EXISTS connect;
diff --git a/storage/connect/mysql-test/connect/r/mysql_grant.result b/storage/connect/mysql-test/connect/r/mysql_grant.result
index 7bcae585425..83a906afed5 100644
--- a/storage/connect/mysql-test/connect/r/mysql_grant.result
+++ b/storage/connect/mysql-test/connect/r/mysql_grant.result
@@ -5,11 +5,14 @@ set sql_mode="";
GRANT ALL PRIVILEGES ON *.* TO user@localhost;
REVOKE FILE ON *.* FROM user@localhost;
set sql_mode=default;
+connect user,localhost,user,,;
+connection user;
SELECT user();
user()
user@localhost
CREATE TABLE t1 (a INT NOT NULL) ENGINE=CONNECT TABLE_TYPE=MySQL OPTION_LIST='host=localhost,user=root1,port=PORT';
ERROR 42000: Access denied; you need (at least one of) the FILE privilege(s) for this operation
+connection default;
SELECT user();
user()
root@localhost
@@ -21,6 +24,7 @@ a
10
20
30
+connection user;
SELECT user();
user()
user@localhost
@@ -39,10 +43,12 @@ ERROR 42000: Access denied; you need (at least one of) the FILE privilege(s) for
CREATE VIEW v1 AS SELECT * FROM t1;
ERROR 42000: Access denied; you need (at least one of) the FILE privilege(s) for this operation
# Testing a VIEW created with FILE privileges but accessed with no FILE
+connection default;
SELECT user();
user()
root@localhost
CREATE SQL SECURITY INVOKER VIEW v1 AS SELECT * FROM t1;
+connection user;
SELECT user();
user()
user@localhost
@@ -54,6 +60,8 @@ UPDATE v1 SET a=123;
ERROR 42000: Access denied; you need (at least one of) the FILE privilege(s) for this operation
DELETE FROM v1;
ERROR 42000: Access denied; you need (at least one of) the FILE privilege(s) for this operation
+disconnect user;
+connection default;
SELECT user();
user()
root@localhost
diff --git a/storage/connect/mysql-test/connect/r/mysql_new.result b/storage/connect/mysql-test/connect/r/mysql_new.result
index 4dc394b8960..0394caac8af 100644
--- a/storage/connect/mysql-test/connect/r/mysql_new.result
+++ b/storage/connect/mysql-test/connect/r/mysql_new.result
@@ -1,5 +1,10 @@
+connect master,127.0.0.1,root,,test,$MASTER_MYPORT,;
+connect slave,127.0.0.1,root,,test,$SLAVE_MYPORT,;
+connection master;
CREATE DATABASE connect;
+connection slave;
CREATE DATABASE connect;
+connection slave;
CREATE TABLE t1 (a int, b char(10));
INSERT INTO t1 VALUES (NULL,NULL),(0,'test00'),(1,'test01'),(2,'test02'),(3,'test03');
SELECT * FROM t1;
@@ -12,6 +17,7 @@ NULL NULL
#
# Testing errors
#
+connection master;
CREATE TABLE t1 ENGINE=CONNECT TABLE_TYPE=MYSQL
CONNECTION='mysql://unknown@127.0.0.1:SLAVE_PORT/test/t1';
ERROR HY000: (1045) Access denied for user 'unknown'@'localhost' (using password: NO)
@@ -39,10 +45,14 @@ ERROR HY000: Got error 174 '(1054) Unknown column 'x' in 'field list' [SELECT `x
DROP TABLE t1;
CREATE TABLE t1 (a int, b char(10)) ENGINE=CONNECT TABLE_TYPE=MYSQL
CONNECTION='mysql://root@127.0.0.1:SLAVE_PORT/test/t1';
+connection slave;
ALTER TABLE t1 RENAME t1backup;
+connection master;
SELECT * FROM t1;
ERROR HY000: Got error 174 '(1146) Table 'test.t1' doesn't exist [SELECT `a`, `b` FROM `t1`]' from CONNECT
+connection slave;
ALTER TABLE t1backup RENAME t1;
+connection master;
DROP TABLE t1;
#
# Testing SELECT, etc.
@@ -111,6 +121,7 @@ NULL NULL
2 0
3 0
DROP TABLE t1;
+connection slave;
DROP TABLE t1;
#
# Testing numeric data types
@@ -129,6 +140,7 @@ t1 CREATE TABLE `t1` (
`h` decimal(20,5) DEFAULT NULL
) ENGINE=MyISAM DEFAULT CHARSET=latin1
INSERT INTO t1 VALUES(100,3333,41235,1234567890,235000000000,3.14159265,3.14159265,3141.59265);
+connection master;
CREATE TABLE t1 ENGINE=CONNECT TABLE_TYPE=MYSQL
OPTION_LIST='host=127.0.0.1,user=root,port=SLAVE_PORT';
SHOW CREATE TABLE t1;
@@ -147,6 +159,7 @@ SELECT * FROM t1;
a b c d e f g h
100 3333 41235 1234567890 235000000000 3.14159 3.14159265 3141.59265
DROP TABLE t1;
+connection slave;
DROP TABLE t1;
#
# Testing character data types
@@ -162,6 +175,7 @@ INSERT INTO t1 VALUES('Welcome','Hello, World');
SELECT * FROM t1;
a b
Welcome Hello, World
+connection master;
CREATE TABLE t1 ENGINE=CONNECT TABLE_TYPE=MYSQL
CONNECTION='mysql://root@127.0.0.1:SLAVE_PORT';
SHOW CREATE TABLE t1;
@@ -174,6 +188,7 @@ SELECT * FROM t1;
a b
Welcome Hello, World
DROP TABLE t1;
+connection slave;
DROP TABLE t1;
#
# Testing temporal data types
@@ -185,7 +200,7 @@ t1 CREATE TABLE `t1` (
`a` date DEFAULT NULL,
`b` datetime DEFAULT NULL,
`c` time DEFAULT NULL,
- `d` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
+ `d` timestamp NOT NULL DEFAULT current_timestamp() ON UPDATE current_timestamp(),
`e` year(4) DEFAULT NULL
) ENGINE=MyISAM DEFAULT CHARSET=latin1
INSERT IGNORE INTO t1 VALUES('2003-05-27 10:45:23','2003-05-27 10:45:23','2003-05-27 10:45:23','2003-05-27 10:45:23','2003-05-27 10:45:23');
@@ -196,6 +211,7 @@ Warning 1265 Data truncated for column 'e' at row 1
SELECT * FROM t1;
a b c d e
2003-05-27 2003-05-27 10:45:23 10:45:23 2003-05-27 10:45:23 2003
+connection master;
CREATE TABLE t1 ENGINE=CONNECT TABLE_TYPE=MYSQL
CONNECTION='mysql://root@127.0.0.1:SLAVE_PORT';
SHOW CREATE TABLE t1;
@@ -204,15 +220,18 @@ t1 CREATE TABLE `t1` (
`a` date DEFAULT NULL,
`b` datetime DEFAULT NULL,
`c` time DEFAULT NULL,
- `d` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
+ `d` timestamp NOT NULL DEFAULT current_timestamp() ON UPDATE current_timestamp(),
`e` year(4) DEFAULT NULL
) ENGINE=CONNECT DEFAULT CHARSET=latin1 CONNECTION='mysql://root@127.0.0.1:SLAVE_PORT' `TABLE_TYPE`='MYSQL'
SELECT * FROM t1;
a b c d e
2003-05-27 2003-05-27 10:45:23 10:45:23 2003-05-27 10:45:23 2003
DROP TABLE t1;
+connection slave;
DROP TABLE t1;
+connection master;
DROP TABLE IF EXISTS connect.t1;
DROP DATABASE IF EXISTS connect;
+connection slave;
DROP TABLE IF EXISTS connect.t1;
DROP DATABASE IF EXISTS connect;
diff --git a/storage/connect/mysql-test/connect/r/vcol.result b/storage/connect/mysql-test/connect/r/vcol.result
index 4c59a3b06d8..e0fd37203e4 100644
--- a/storage/connect/mysql-test/connect/r/vcol.result
+++ b/storage/connect/mysql-test/connect/r/vcol.result
@@ -26,4 +26,4 @@ agehired int(3) as (floor(datediff(hired,birth)/365.25)),
index (agehired)
)
engine=CONNECT table_type=FIX file_name='boys.txt' mapped=YES lrecl=47 ending=1;
-ERROR HY000: Key/Index cannot be defined on a non-stored computed column
+ERROR 42000: Table handler doesn't support NULL in given index. Please change column 'agehired' to be NOT NULL or use another handler
diff --git a/storage/connect/mysql-test/connect/r/xml.result b/storage/connect/mysql-test/connect/r/xml.result
index 99739b1ec10..6a0c9db27b3 100644
--- a/storage/connect/mysql-test/connect/r/xml.result
+++ b/storage/connect/mysql-test/connect/r/xml.result
@@ -323,7 +323,7 @@ HEX(c) 3F3F3F3F3F3F3F
Warnings:
Level Warning
Code 1366
-Message Incorrect string value: '\xC3\x81\xC3\x82\xC3\x83...' for column 'c' at row 1
+Message Incorrect string value: '\xC3\x81\xC3\x82\xC3\x83...' for column `test`.`t1`.`c` at row 1
Level Warning
Code 1105
Message Out of range value ÃÂÃÄÅÆÇ for column 'c' at row 1
diff --git a/storage/connect/mysql-test/connect/r/xml2.result b/storage/connect/mysql-test/connect/r/xml2.result
index b8075fa1928..f7bbc17c8a0 100644
--- a/storage/connect/mysql-test/connect/r/xml2.result
+++ b/storage/connect/mysql-test/connect/r/xml2.result
@@ -325,7 +325,7 @@ HEX(c) 3F3F3F3F3F3F3F
Warnings:
Level Warning
Code 1366
-Message Incorrect string value: '\xC3\x81\xC3\x82\xC3\x83...' for column 'c' at row 1
+Message Incorrect string value: '\xC3\x81\xC3\x82\xC3\x83...' for column `test`.`t1`.`c` at row 1
Level Warning
Code 1105
Message Out of range value ÃÂÃÄÅÆÇ for column 'c' at row 1
diff --git a/storage/connect/mysql-test/connect/r/xml2_grant.result b/storage/connect/mysql-test/connect/r/xml2_grant.result
index 74f372f493c..a6e6c150b59 100644
--- a/storage/connect/mysql-test/connect/r/xml2_grant.result
+++ b/storage/connect/mysql-test/connect/r/xml2_grant.result
@@ -6,6 +6,8 @@ Warning 1105 No file name. Table will use t1.xml
CREATE USER user@localhost;
GRANT ALL PRIVILEGES ON *.* TO user@localhost;
REVOKE FILE ON *.* FROM user@localhost;
+connect user,localhost,user,,;
+connection user;
SELECT user();
user()
user@localhost
@@ -34,11 +36,13 @@ DROP VIEW v1;
DROP TABLE t1;
CREATE TABLE t1 (a INT NOT NULL) ENGINE=CONNECT TABLE_TYPE=XML OPTION_LIST='xmlsup=libxml2,rownode=row' FILE_NAME='t1.EXT';
ERROR 42000: Access denied; you need (at least one of) the FILE privilege(s) for this operation
+connection default;
SELECT user();
user()
root@localhost
CREATE TABLE t1 (a INT NOT NULL) ENGINE=CONNECT TABLE_TYPE=XML OPTION_LIST='xmlsup=libxml2,rownode=row' FILE_NAME='t1.EXT';
INSERT INTO t1 VALUES (10);
+connection user;
SELECT user();
user()
user@localhost
@@ -61,10 +65,12 @@ ERROR 42000: Access denied; you need (at least one of) the FILE privilege(s) for
CREATE VIEW v1 AS SELECT * FROM t1;
ERROR 42000: Access denied; you need (at least one of) the FILE privilege(s) for this operation
# Testing a VIEW created with FILE privileges but accessed with no FILE
+connection default;
SELECT user();
user()
root@localhost
CREATE SQL SECURITY INVOKER VIEW v1 AS SELECT * FROM t1;
+connection user;
SELECT user();
user()
user@localhost
@@ -76,6 +82,7 @@ UPDATE v1 SET a=123;
ERROR 42000: Access denied; you need (at least one of) the FILE privilege(s) for this operation
DELETE FROM v1;
ERROR 42000: Access denied; you need (at least one of) the FILE privilege(s) for this operation
+connection default;
SELECT user();
user()
root@localhost
@@ -85,12 +92,15 @@ CREATE TABLE t1 (a INT NOT NULL) ENGINE=CONNECT TABLE_TYPE=XML OPTION_LIST='xmls
Warnings:
Warning 1105 No file name. Table will use t1.xml
INSERT INTO t1 VALUES (10);
+connection user;
SELECT user();
user()
user@localhost
ALTER TABLE t1 FILE_NAME='t1.EXT';
ERROR 42000: Access denied; you need (at least one of) the FILE privilege(s) for this operation
+connection default;
DROP TABLE t1;
+disconnect user;
DROP USER user@localhost;
#
# End of grant.inc
diff --git a/storage/connect/mysql-test/connect/r/xml2_mdev5261.result b/storage/connect/mysql-test/connect/r/xml2_mdev5261.result
index 23e72c6eb0f..0ee5712dd02 100644
--- a/storage/connect/mysql-test/connect/r/xml2_mdev5261.result
+++ b/storage/connect/mysql-test/connect/r/xml2_mdev5261.result
@@ -19,7 +19,7 @@ SELECT * FROM t1 WHERE i = 5;
i
5
ALTER TABLE t1 DROP INDEX i;
-ERROR 42000: Can't DROP 'i'; check that column/key exists
+ERROR 42000: Can't DROP INDEX `i`; check that it exists
DROP INDEX i ON t1;
-ERROR 42000: Can't DROP 'i'; check that column/key exists
+ERROR 42000: Can't DROP INDEX `i`; check that it exists
DROP TABLE t1;
diff --git a/storage/connect/mysql-test/connect/r/xml_grant.result b/storage/connect/mysql-test/connect/r/xml_grant.result
index d2a2e444e81..f1a6ad101cb 100644
--- a/storage/connect/mysql-test/connect/r/xml_grant.result
+++ b/storage/connect/mysql-test/connect/r/xml_grant.result
@@ -4,6 +4,8 @@
CREATE USER user@localhost;
GRANT ALL PRIVILEGES ON *.* TO user@localhost;
REVOKE FILE ON *.* FROM user@localhost;
+connect user,localhost,user,,;
+connection user;
SELECT user();
user()
user@localhost
@@ -32,11 +34,13 @@ DROP VIEW v1;
DROP TABLE t1;
CREATE TABLE t1 (a INT NOT NULL) ENGINE=CONNECT TABLE_TYPE=XML OPTION_LIST='xmlsup=domdoc,rownode=row' FILE_NAME='t1.EXT';
ERROR 42000: Access denied; you need (at least one of) the FILE privilege(s) for this operation
+connection default;
SELECT user();
user()
root@localhost
CREATE TABLE t1 (a INT NOT NULL) ENGINE=CONNECT TABLE_TYPE=XML OPTION_LIST='xmlsup=domdoc,rownode=row' FILE_NAME='t1.EXT';
INSERT INTO t1 VALUES (10);
+connection user;
SELECT user();
user()
user@localhost
@@ -59,10 +63,12 @@ ERROR 42000: Access denied; you need (at least one of) the FILE privilege(s) for
CREATE VIEW v1 AS SELECT * FROM t1;
ERROR 42000: Access denied; you need (at least one of) the FILE privilege(s) for this operation
# Testing a VIEW created with FILE privileges but accessed with no FILE
+connection default;
SELECT user();
user()
root@localhost
CREATE SQL SECURITY INVOKER VIEW v1 AS SELECT * FROM t1;
+connection user;
SELECT user();
user()
user@localhost
@@ -74,6 +80,7 @@ UPDATE v1 SET a=123;
ERROR 42000: Access denied; you need (at least one of) the FILE privilege(s) for this operation
DELETE FROM v1;
ERROR 42000: Access denied; you need (at least one of) the FILE privilege(s) for this operation
+connection default;
SELECT user();
user()
root@localhost
@@ -83,12 +90,15 @@ CREATE TABLE t1 (a INT NOT NULL) ENGINE=CONNECT TABLE_TYPE=XML OPTION_LIST='xmls
Warnings:
Warning 1105 No file name. Table will use t1.xml
INSERT INTO t1 VALUES (10);
+connection user;
SELECT user();
user()
user@localhost
ALTER TABLE t1 FILE_NAME='t1.EXT';
ERROR 42000: Access denied; you need (at least one of) the FILE privilege(s) for this operation
+connection default;
DROP TABLE t1;
+disconnect user;
DROP USER user@localhost;
#
# End of grant.inc
diff --git a/storage/connect/mysql-test/connect/r/xml_mdev5261.result b/storage/connect/mysql-test/connect/r/xml_mdev5261.result
index 10959eab951..b5ae32c7784 100644
--- a/storage/connect/mysql-test/connect/r/xml_mdev5261.result
+++ b/storage/connect/mysql-test/connect/r/xml_mdev5261.result
@@ -17,7 +17,7 @@ SELECT * FROM t1 WHERE i = 5;
i
5
ALTER TABLE t1 DROP INDEX i;
-ERROR 42000: Can't DROP 'i'; check that column/key exists
+ERROR 42000: Can't DROP INDEX `i`; check that it exists
DROP INDEX i ON t1;
-ERROR 42000: Can't DROP 'i'; check that column/key exists
+ERROR 42000: Can't DROP INDEX `i`; check that it exists
DROP TABLE t1;
diff --git a/storage/connect/mysql-test/connect/t/dir.test b/storage/connect/mysql-test/connect/t/dir.test
index fb69813d9f0..684b5522b08 100644
--- a/storage/connect/mysql-test/connect/t/dir.test
+++ b/storage/connect/mysql-test/connect/t/dir.test
@@ -23,9 +23,7 @@ ALTER TABLE t1 OPTION_LIST='subdir=0';
SHOW CREATE TABLE t1;
--replace_result $MYSQLD_DATADIR DATADIR/
SELECT fname, ftype, size FROM t1 ORDER BY fname, ftype, size;
-
-# TODO: add a better error message
---error ER_GET_ERRMSG
+--error ER_TRUNCATED_WRONG_VALUE_FOR_FIELD
INSERT INTO t1 VALUES ('','','','');
DROP TABLE t1;
diff --git a/storage/connect/mysql-test/connect/t/vcol.test b/storage/connect/mysql-test/connect/t/vcol.test
index 88b822102d0..cdf37175f41 100644
--- a/storage/connect/mysql-test/connect/t/vcol.test
+++ b/storage/connect/mysql-test/connect/t/vcol.test
@@ -13,7 +13,7 @@ engine=CONNECT table_type=FIX file_name='boys.txt' mapped=YES lrecl=47 ending=1;
select * from t1;
drop table t1;
---error ER_KEY_BASED_ON_GENERATED_VIRTUAL_COLUMN
+--error ER_NULL_COLUMN_IN_INDEX
create table t1 (
#linenum int(6) not null default 0 special=rowid,
name char(12) not null,
diff --git a/storage/connect/tabrest.cpp b/storage/connect/tabrest.cpp
index 9c6b724973f..3ef2a460b9d 100644
--- a/storage/connect/tabrest.cpp
+++ b/storage/connect/tabrest.cpp
@@ -167,8 +167,7 @@ PQRYRES __stdcall ColREST(PGLOBAL g, PTOS tp, char *tab, char *db, bool info)
#endif // !MARIADB
// We used the file name relative to recorded datapath
- strcat(strcat(strcat(strcpy(filename, "."), slash), db), slash);
- strncat(filename, fn, _MAX_PATH - strlen(filename));
+ snprintf(filename, sizeof filename, IF_WIN(".\\%s\\%s","./%s/%s"), db, fn);
// Retrieve the file from the web and copy it locally
if (http && grf(g->Message, trace(515), http, uri, filename)) {
diff --git a/storage/connect/tabrest.h b/storage/connect/tabrest.h
index 9cf2d10a6b8..f08ac7984c9 100644
--- a/storage/connect/tabrest.h
+++ b/storage/connect/tabrest.h
@@ -5,10 +5,7 @@
/***********************************************************************/
#pragma once
-#if defined(__WIN__)
-static PCSZ slash = "\\";
-#else // !__WIN__
-static PCSZ slash = "/";
+#ifndef __WIN__
#define stricmp strcasecmp
#endif // !__WIN__
diff --git a/storage/federated/ha_federated.cc b/storage/federated/ha_federated.cc
index 26cc63885f3..2e8d5b12e81 100644
--- a/storage/federated/ha_federated.cc
+++ b/storage/federated/ha_federated.cc
@@ -561,8 +561,7 @@ static bool append_ident(String *string, const char *name, size_t length,
for (name_end= name+length; name < name_end; name+= clen)
{
uchar c= *(uchar *) name;
- if (!(clen= my_mbcharlen(system_charset_info, c)))
- clen= 1;
+ clen= my_charlen_fix(system_charset_info, name, name_end);
if (clen == 1 && c == (uchar) quote_char &&
(result= string->append(&quote_char, 1, system_charset_info)))
goto err;
@@ -615,8 +614,8 @@ int get_connection(MEM_ROOT *mem_root, FEDERATED_SHARE *share)
error_num=1;
goto error;
}
- DBUG_PRINT("info", ("get_server_by_name returned server at %lx",
- (long unsigned int) server));
+ DBUG_PRINT("info", ("get_server_by_name returned server at %p",
+ server));
/*
Most of these should never be empty strings, error handling will
@@ -717,15 +716,15 @@ static int parse_url(MEM_ROOT *mem_root, FEDERATED_SHARE *share, TABLE *table,
share->port= 0;
share->socket= 0;
- DBUG_PRINT("info", ("share at %lx", (long unsigned int) share));
+ DBUG_PRINT("info", ("share at %p", share));
DBUG_PRINT("info", ("Length: %u", (uint) table->s->connect_string.length));
DBUG_PRINT("info", ("String: '%.*s'", (int) table->s->connect_string.length,
table->s->connect_string.str));
share->connection_string= strmake_root(mem_root, table->s->connect_string.str,
table->s->connect_string.length);
- DBUG_PRINT("info",("parse_url alloced share->connection_string %lx",
- (long unsigned int) share->connection_string));
+ DBUG_PRINT("info",("parse_url alloced share->connection_string %p",
+ share->connection_string));
DBUG_PRINT("info",("share->connection_string %s",share->connection_string));
/*
@@ -738,9 +737,9 @@ static int parse_url(MEM_ROOT *mem_root, FEDERATED_SHARE *share, TABLE *table,
DBUG_PRINT("info",
("share->connection_string %s internal format \
- share->connection_string %lx",
+ share->connection_string %p",
share->connection_string,
- (long unsigned int) share->connection_string));
+ share->connection_string));
/* ok, so we do a little parsing, but not completely! */
share->parsed= FALSE;
@@ -794,8 +793,8 @@ static int parse_url(MEM_ROOT *mem_root, FEDERATED_SHARE *share, TABLE *table,
// Add a null for later termination of table name
share->connection_string[table->s->connect_string.length]= 0;
share->scheme= share->connection_string;
- DBUG_PRINT("info",("parse_url alloced share->scheme %lx",
- (long unsigned int) share->scheme));
+ DBUG_PRINT("info",("parse_url alloced share->scheme %p",
+ share->scheme));
/*
remove addition of null terminator and store length
@@ -1657,7 +1656,7 @@ public:
public:
bool handle_condition(THD *thd, uint sql_errno, const char* sqlstate,
- Sql_condition::enum_warning_level level,
+ Sql_condition::enum_warning_level *level,
const char* msg, Sql_condition ** cond_hdl)
{
return sql_errno >= ER_ABORTING_CONNECTION &&
@@ -3295,66 +3294,6 @@ int ha_federated::external_lock(THD *thd, int lock_type)
int error= 0;
DBUG_ENTER("ha_federated::external_lock");
- /*
- Support for transactions disabled until WL#2952 fixes it.
- */
-#ifdef XXX_SUPERCEDED_BY_WL2952
- if (lock_type != F_UNLCK)
- {
- ha_federated *trx= (ha_federated *)thd_get_ha_data(thd, ht);
-
- DBUG_PRINT("info",("federated not lock F_UNLCK"));
- if (!(thd->options & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)))
- {
- DBUG_PRINT("info",("federated autocommit"));
- /*
- This means we are doing an autocommit
- */
- error= connection_autocommit(TRUE);
- if (error)
- {
- DBUG_PRINT("info", ("error setting autocommit TRUE: %d", error));
- DBUG_RETURN(error);
- }
- trans_register_ha(thd, FALSE, ht);
- }
- else
- {
- DBUG_PRINT("info",("not autocommit"));
- if (!trx)
- {
- /*
- This is where a transaction gets its start
- */
- error= connection_autocommit(FALSE);
- if (error)
- {
- DBUG_PRINT("info", ("error setting autocommit FALSE: %d", error));
- DBUG_RETURN(error);
- }
- thd_set_ha_data(thd, ht, this);
- trans_register_ha(thd, TRUE, ht);
- /*
- Send a lock table to the remote end.
- We do not support this at the moment
- */
- if (thd->options & (OPTION_TABLE_LOCK))
- {
- DBUG_PRINT("info", ("We do not support lock table yet"));
- }
- }
- else
- {
- ha_federated *ptr;
- for (ptr= trx; ptr; ptr= ptr->trx_next)
- if (ptr == this)
- break;
- else if (!ptr->trx_next)
- ptr->trx_next= this;
- }
- }
- }
-#endif /* XXX_SUPERCEDED_BY_WL2952 */
table_will_be_deleted = FALSE;
DBUG_RETURN(error);
}
diff --git a/storage/federatedx/ha_federatedx.cc b/storage/federatedx/ha_federatedx.cc
index 0c3d39c1ab5..840a5a68885 100644
--- a/storage/federatedx/ha_federatedx.cc
+++ b/storage/federatedx/ha_federatedx.cc
@@ -499,8 +499,7 @@ bool append_ident(String *string, const char *name, uint length,
for (name_end= name+length; name < name_end; name+= clen)
{
uchar c= *(uchar *) name;
- if (!(clen= my_mbcharlen(system_charset_info, c)))
- clen= 1;
+ clen= my_charlen_fix(system_charset_info, name, name_end);
if (clen == 1 && c == (uchar) quote_char &&
(result= string->append(&quote_char, 1, system_charset_info)))
goto err;
@@ -554,8 +553,8 @@ int get_connection(MEM_ROOT *mem_root, FEDERATEDX_SHARE *share)
error_num=1;
goto error;
}
- DBUG_PRINT("info", ("get_server_by_name returned server at %lx",
- (long unsigned int) server));
+ DBUG_PRINT("info", ("get_server_by_name returned server at %p",
+ server));
/*
Most of these should never be empty strings, error handling will
@@ -656,15 +655,15 @@ static int parse_url(MEM_ROOT *mem_root, FEDERATEDX_SHARE *share,
share->port= 0;
share->socket= 0;
- DBUG_PRINT("info", ("share at %lx", (long unsigned int) share));
+ DBUG_PRINT("info", ("share at %p", share));
DBUG_PRINT("info", ("Length: %u", (uint) table_s->connect_string.length));
DBUG_PRINT("info", ("String: '%.*s'", (int) table_s->connect_string.length,
table_s->connect_string.str));
share->connection_string= strmake_root(mem_root, table_s->connect_string.str,
table_s->connect_string.length);
- DBUG_PRINT("info",("parse_url alloced share->connection_string %lx",
- (long unsigned int) share->connection_string));
+ DBUG_PRINT("info",("parse_url alloced share->connection_string %p",
+ share->connection_string));
DBUG_PRINT("info",("share->connection_string: %s",share->connection_string));
/*
@@ -677,9 +676,9 @@ static int parse_url(MEM_ROOT *mem_root, FEDERATEDX_SHARE *share,
DBUG_PRINT("info",
("share->connection_string: %s internal format "
- "share->connection_string: %lx",
+ "share->connection_string: %p",
share->connection_string,
- (ulong) share->connection_string));
+ share->connection_string));
/* ok, so we do a little parsing, but not completely! */
share->parsed= FALSE;
@@ -732,8 +731,8 @@ static int parse_url(MEM_ROOT *mem_root, FEDERATEDX_SHARE *share,
// Add a null for later termination of table name
share->connection_string[table_s->connect_string.length]= 0;
share->scheme= share->connection_string;
- DBUG_PRINT("info",("parse_url alloced share->scheme: %lx",
- (ulong) share->scheme));
+ DBUG_PRINT("info",("parse_url alloced share->scheme: %p",
+ share->scheme));
/*
Remove addition of null terminator and store length
@@ -1789,7 +1788,7 @@ public:
public:
bool handle_condition(THD *thd, uint sql_errno, const char* sqlstate,
- Sql_condition::enum_warning_level level,
+ Sql_condition::enum_warning_level *level,
const char* msg, Sql_condition ** cond_hdl)
{
return sql_errno >= ER_ABORTING_CONNECTION &&
diff --git a/storage/heap/ha_heap.cc b/storage/heap/ha_heap.cc
index 102293fc801..51e47ea2271 100644
--- a/storage/heap/ha_heap.cc
+++ b/storage/heap/ha_heap.cc
@@ -216,7 +216,7 @@ void ha_heap::update_key_stats()
else
{
ha_rows hash_buckets= file->s->keydef[i].hash_buckets;
- ha_rows no_records= hash_buckets ? (file->s->records/hash_buckets) : 2;
+ ulong no_records= hash_buckets ? (ulong)(file->s->records/hash_buckets) : 2;
if (no_records < 2)
no_records= 2;
key->rec_per_key[key->user_defined_key_parts-1]= no_records;
diff --git a/storage/heap/heapdef.h b/storage/heap/heapdef.h
index 430fdaa8949..019e9d521b3 100644
--- a/storage/heap/heapdef.h
+++ b/storage/heap/heapdef.h
@@ -86,8 +86,7 @@ extern ulong hp_mask(ulong hashnr,ulong buffmax,ulong maxlength);
extern void hp_movelink(HASH_INFO *pos,HASH_INFO *next_link,
HASH_INFO *newlink);
extern int hp_rec_key_cmp(HP_KEYDEF *keydef,const uchar *rec1,
- const uchar *rec2,
- my_bool diff_if_only_endspace_difference);
+ const uchar *rec2);
extern int hp_key_cmp(HP_KEYDEF *keydef,const uchar *rec,
const uchar *key);
extern void hp_make_key(HP_KEYDEF *keydef,uchar *key,const uchar *rec);
diff --git a/storage/heap/hp_delete.c b/storage/heap/hp_delete.c
index bc3c82e9406..c60db98f59b 100644
--- a/storage/heap/hp_delete.c
+++ b/storage/heap/hp_delete.c
@@ -123,7 +123,7 @@ int hp_delete_key(HP_INFO *info, register HP_KEYDEF *keyinfo,
while (pos->ptr_to_rec != recpos)
{
- if (flag && !hp_rec_key_cmp(keyinfo, record, pos->ptr_to_rec, 0))
+ if (flag && !hp_rec_key_cmp(keyinfo, record, pos->ptr_to_rec))
last_ptr=pos; /* Previous same key */
gpos=pos;
if (!(pos=pos->next_key))
diff --git a/storage/heap/hp_hash.c b/storage/heap/hp_hash.c
index 8b8a20d685f..f0238d06010 100644
--- a/storage/heap/hp_hash.c
+++ b/storage/heap/hp_hash.c
@@ -513,8 +513,6 @@ ulong hp_rec_hashnr(register HP_KEYDEF *keydef, register const uchar *rec)
keydef Key definition
rec1 Record to compare
rec2 Other record to compare
- diff_if_only_endspace_difference
- Different number of end space is significant
NOTES
diff_if_only_endspace_difference is used to allow us to insert
@@ -525,8 +523,7 @@ ulong hp_rec_hashnr(register HP_KEYDEF *keydef, register const uchar *rec)
<> 0 Key differes
*/
-int hp_rec_key_cmp(HP_KEYDEF *keydef, const uchar *rec1, const uchar *rec2,
- my_bool diff_if_only_endspace_difference)
+int hp_rec_key_cmp(HP_KEYDEF *keydef, const uchar *rec1, const uchar *rec2)
{
HA_KEYSEG *seg,*endseg;
@@ -561,7 +558,7 @@ int hp_rec_key_cmp(HP_KEYDEF *keydef, const uchar *rec1, const uchar *rec2,
}
if (seg->charset->coll->strnncollsp(seg->charset,
pos1,char_length1,
- pos2,char_length2, 0))
+ pos2,char_length2))
return 1;
}
else if (seg->type == HA_KEYTYPE_VARTEXT1) /* Any VARCHAR segments */
@@ -601,9 +598,7 @@ int hp_rec_key_cmp(HP_KEYDEF *keydef, const uchar *rec1, const uchar *rec2,
if (cs->coll->strnncollsp(seg->charset,
pos1, char_length1,
- pos2, char_length2,
- seg->flag & HA_END_SPACE_ARE_EQUAL ?
- 0 : diff_if_only_endspace_difference))
+ pos2, char_length2))
return 1;
}
else
@@ -671,7 +666,7 @@ int hp_key_cmp(HP_KEYDEF *keydef, const uchar *rec, const uchar *key)
if (seg->charset->coll->strnncollsp(seg->charset,
(uchar*) pos, char_length_rec,
- (uchar*) key, char_length_key, 0))
+ (uchar*) key, char_length_key))
return 1;
}
else if (seg->type == HA_KEYTYPE_VARTEXT1) /* Any VARCHAR segments */
@@ -699,7 +694,7 @@ int hp_key_cmp(HP_KEYDEF *keydef, const uchar *rec, const uchar *key)
if (cs->coll->strnncollsp(seg->charset,
(uchar*) pos, char_length_rec,
- (uchar*) key, char_length_key, 0))
+ (uchar*) key, char_length_key))
return 1;
}
else
@@ -783,7 +778,6 @@ uint hp_rb_make_key(HP_KEYDEF *keydef, uchar *key,
uchar *pos= (uchar*) rec + seg->start;
DBUG_ASSERT(seg->type != HA_KEYTYPE_BIT);
-#ifdef HAVE_ISNAN
if (seg->type == HA_KEYTYPE_FLOAT)
{
float nr;
@@ -807,7 +801,6 @@ uint hp_rb_make_key(HP_KEYDEF *keydef, uchar *key,
continue;
}
}
-#endif
pos+= length;
while (length--)
{
diff --git a/storage/heap/hp_scan.c b/storage/heap/hp_scan.c
index 3315cb05b3f..f07efe6cf67 100644
--- a/storage/heap/hp_scan.c
+++ b/storage/heap/hp_scan.c
@@ -50,7 +50,9 @@ int heap_scan(register HP_INFO *info, uchar *record)
}
else
{
- info->next_block+=share->block.records_in_block;
+ /* increase next_block to the next records_in_block boundary */
+ ulong rem= info->next_block % share->block.records_in_block;
+ info->next_block+=share->block.records_in_block - rem;
if (info->next_block >= share->records+share->deleted)
{
info->next_block= share->records+share->deleted;
diff --git a/storage/heap/hp_update.c b/storage/heap/hp_update.c
index dfcb8b1f126..da83a9c76a8 100644
--- a/storage/heap/hp_update.c
+++ b/storage/heap/hp_update.c
@@ -37,7 +37,7 @@ int heap_update(HP_INFO *info, const uchar *old, const uchar *heap_new)
p_lastinx= share->keydef + info->lastinx;
for (keydef= share->keydef, end= keydef + share->keys; keydef < end; keydef++)
{
- if (hp_rec_key_cmp(keydef, old, heap_new, 0))
+ if (hp_rec_key_cmp(keydef, old, heap_new))
{
if ((*keydef->delete_key)(info, keydef, old, pos, keydef == p_lastinx) ||
(*keydef->write_key)(info, keydef, heap_new, pos))
@@ -76,7 +76,7 @@ int heap_update(HP_INFO *info, const uchar *old, const uchar *heap_new)
}
while (keydef >= share->keydef)
{
- if (hp_rec_key_cmp(keydef, old, heap_new, 0))
+ if (hp_rec_key_cmp(keydef, old, heap_new))
{
if ((*keydef->delete_key)(info, keydef, heap_new, pos, 0) ||
(*keydef->write_key)(info, keydef, old, pos))
diff --git a/storage/heap/hp_write.c b/storage/heap/hp_write.c
index 3a2d1cd6784..9ddbb40b706 100644
--- a/storage/heap/hp_write.c
+++ b/storage/heap/hp_write.c
@@ -386,7 +386,7 @@ int hp_write_key(HP_INFO *info, HP_KEYDEF *keyinfo,
do
{
if (pos->hash_of_key == hash_of_key &&
- ! hp_rec_key_cmp(keyinfo, record, pos->ptr_to_rec, 1))
+ ! hp_rec_key_cmp(keyinfo, record, pos->ptr_to_rec))
{
DBUG_RETURN(my_errno=HA_ERR_FOUND_DUPP_KEY);
}
diff --git a/storage/innobase/CMakeLists.txt b/storage/innobase/CMakeLists.txt
index 6558c3ae0dc..1b5345cbddc 100644
--- a/storage/innobase/CMakeLists.txt
+++ b/storage/innobase/CMakeLists.txt
@@ -1,5 +1,5 @@
# Copyright (c) 2006, 2017, Oracle and/or its affiliates. All rights reserved.
-# Copyright (c) 2019, MariaDB Corporation.
+# Copyright (c) 2014, 2019, MariaDB Corporation.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
@@ -16,365 +16,13 @@
# This is the CMakeLists for InnoDB
-INCLUDE(CheckFunctionExists)
-INCLUDE(CheckCSourceCompiles)
-INCLUDE(CheckCSourceRuns)
-INCLUDE(lz4)
-INCLUDE(lzo)
-INCLUDE(lzma)
-INCLUDE(bzip2)
-INCLUDE(snappy)
-MYSQL_CHECK_LZ4()
-MYSQL_CHECK_LZO()
-MYSQL_CHECK_LZMA()
-MYSQL_CHECK_BZIP2()
-MYSQL_CHECK_SNAPPY()
-IF(CMAKE_CROSSCOMPILING)
- # Use CHECK_C_SOURCE_COMPILES instead of CHECK_C_SOURCE_RUNS when
- # cross-compiling. Not as precise, but usually good enough.
- # This only make sense for atomic tests in this file, this trick doesn't
- # work in a general case.
- MACRO(CHECK_C_SOURCE SOURCE VAR)
- CHECK_C_SOURCE_COMPILES("${SOURCE}" "${VAR}")
- ENDMACRO()
-ELSE()
- MACRO(CHECK_C_SOURCE SOURCE VAR)
- CHECK_C_SOURCE_RUNS("${SOURCE}" "${VAR}")
- ENDMACRO()
-ENDIF()
-
-# OS tests
-IF(UNIX)
- IF(CMAKE_SYSTEM_NAME STREQUAL "Linux")
- CHECK_INCLUDE_FILES (libaio.h HAVE_LIBAIO_H)
- CHECK_LIBRARY_EXISTS(aio io_queue_init "" HAVE_LIBAIO)
- ADD_DEFINITIONS("-DUNIV_LINUX -D_GNU_SOURCE=1")
- IF(HAVE_LIBAIO_H AND HAVE_LIBAIO)
- ADD_DEFINITIONS(-DLINUX_NATIVE_AIO=1)
- LINK_LIBRARIES(aio)
- ENDIF()
- IF(HAVE_LIBNUMA)
- LINK_LIBRARIES(numa)
- ENDIF()
- ELSEIF(CMAKE_SYSTEM_NAME MATCHES "HP*")
- ADD_DEFINITIONS("-DUNIV_HPUX")
- ELSEIF(CMAKE_SYSTEM_NAME STREQUAL "AIX")
- ADD_DEFINITIONS("-DUNIV_AIX")
- ELSEIF(CMAKE_SYSTEM_NAME STREQUAL "SunOS")
- ADD_DEFINITIONS("-DUNIV_SOLARIS")
- ENDIF()
-ENDIF()
-
-IF(CMAKE_CXX_COMPILER_ID MATCHES "GNU")
-# After: WL#5825 Using C++ Standard Library with MySQL code
-# we no longer use -fno-exceptions
-# SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-exceptions")
-ENDIF()
-
-# Enable InnoDB's UNIV_DEBUG and UNIV_SYNC_DEBUG in debug builds
-SET(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -DUNIV_DEBUG -DUNIV_SYNC_DEBUG")
-
-# Add -Wconversion if compiling with GCC
-## As of Mar 15 2011 this flag causes 3573+ warnings. If you are reading this
-## please fix them and enable the following code:
-#IF(CMAKE_CXX_COMPILER_ID MATCHES "GNU")
-#SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wconversion")
-#ENDIF()
-
-CHECK_FUNCTION_EXISTS(sched_getcpu HAVE_SCHED_GETCPU)
-
-MY_CHECK_AND_SET_COMPILER_FLAG("-Wno-class-memaccess")
-
-IF(NOT MSVC)
- # Work around MDEV-18417, MDEV-18656, MDEV-18417
- IF(WITH_ASAN AND CMAKE_COMPILER_IS_GNUCC AND
- CMAKE_C_COMPILER_VERSION VERSION_LESS "6.0.0")
- SET_SOURCE_FILES_PROPERTIES(trx/trx0rec.cc PROPERTIES COMPILE_FLAGS -O1)
- ENDIF()
-
- # either define HAVE_IB_GCC_ATOMIC_BUILTINS or not
- # workaround for old gcc on x86, gcc atomic ops only work under -march=i686
- IF(CMAKE_SYSTEM_PROCESSOR STREQUAL "i686" AND CMAKE_COMPILER_IS_GNUCC AND
- CMAKE_C_COMPILER_VERSION VERSION_LESS "4.4.0")
- SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -march=i686")
- SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=i686")
- ENDIF()
- CHECK_C_SOURCE(
- "
- int main()
- {
- long x;
- long y;
- long res;
-
- x = 10;
- y = 123;
- res = __sync_bool_compare_and_swap(&x, x, y);
- if (!res || x != y) {
- return(1);
- }
-
- x = 10;
- y = 123;
- res = __sync_bool_compare_and_swap(&x, x + 1, y);
- if (res || x != 10) {
- return(1);
- }
- x = 10;
- y = 123;
- res = __sync_add_and_fetch(&x, y);
- if (res != 123 + 10 || x != 123 + 10) {
- return(1);
- }
- return(0);
- }"
- HAVE_IB_GCC_ATOMIC_BUILTINS
- )
- CHECK_C_SOURCE(
- "
- int main()
- {
- long res;
- char c;
-
- c = 10;
- res = __sync_lock_test_and_set(&c, 123);
- if (res != 10 || c != 123) {
- return(1);
- }
- return(0);
- }"
- HAVE_IB_GCC_ATOMIC_BUILTINS_BYTE
- )
- CHECK_C_SOURCE(
- "#include<stdint.h>
- int main()
- {
- int64_t x,y,res;
-
- x = 10;
- y = 123;
- res = __sync_sub_and_fetch(&y, x);
- if (res != y || y != 113) {
- return(1);
- }
- res = __sync_add_and_fetch(&y, x);
- if (res != y || y != 123) {
- return(1);
- }
- return(0);
- }"
- HAVE_IB_GCC_ATOMIC_BUILTINS_64
- )
- CHECK_C_SOURCE(
- "#include<stdint.h>
- int main()
- {
- __sync_synchronize();
- return(0);
- }"
- HAVE_IB_GCC_SYNC_SYNCHRONISE
- )
- CHECK_C_SOURCE(
- "#include<stdint.h>
- int main()
- {
- __atomic_thread_fence(__ATOMIC_ACQUIRE);
- __atomic_thread_fence(__ATOMIC_RELEASE);
- return(0);
- }"
- HAVE_IB_GCC_ATOMIC_THREAD_FENCE
- )
- CHECK_C_SOURCE(
- "#include<stdint.h>
- int main()
- {
- unsigned char c;
-
- __atomic_test_and_set(&c, __ATOMIC_ACQUIRE);
- __atomic_clear(&c, __ATOMIC_RELEASE);
- return(0);
- }"
- HAVE_IB_GCC_ATOMIC_TEST_AND_SET
- )
-
-IF(HAVE_IB_GCC_ATOMIC_BUILTINS)
- ADD_DEFINITIONS(-DHAVE_IB_GCC_ATOMIC_BUILTINS=1)
-ENDIF()
-
-IF(HAVE_IB_GCC_ATOMIC_BUILTINS_BYTE)
- ADD_DEFINITIONS(-DHAVE_IB_GCC_ATOMIC_BUILTINS_BYTE=1)
-ENDIF()
-
-IF(HAVE_IB_GCC_ATOMIC_BUILTINS_64)
- ADD_DEFINITIONS(-DHAVE_IB_GCC_ATOMIC_BUILTINS_64=1)
-ENDIF()
-
-IF(HAVE_IB_GCC_SYNC_SYNCHRONISE)
- ADD_DEFINITIONS(-DHAVE_IB_GCC_SYNC_SYNCHRONISE=1)
-ENDIF()
-
-IF(HAVE_IB_GCC_ATOMIC_THREAD_FENCE)
- ADD_DEFINITIONS(-DHAVE_IB_GCC_ATOMIC_THREAD_FENCE=1)
-ENDIF()
-
-IF(HAVE_IB_GCC_ATOMIC_TEST_AND_SET)
- ADD_DEFINITIONS(-DHAVE_IB_GCC_ATOMIC_TEST_AND_SET=1)
-ENDIF()
-
-# either define HAVE_IB_ATOMIC_PTHREAD_T_GCC or not
-CHECK_C_SOURCE(
-"
-#include <pthread.h>
-#include <string.h>
-
-int main() {
- pthread_t x1;
- pthread_t x2;
- pthread_t x3;
-
- memset(&x1, 0x0, sizeof(x1));
- memset(&x2, 0x0, sizeof(x2));
- memset(&x3, 0x0, sizeof(x3));
-
- __sync_bool_compare_and_swap(&x1, x2, x3);
-
- return(0);
-}"
-HAVE_IB_ATOMIC_PTHREAD_T_GCC)
-
-IF(HAVE_IB_ATOMIC_PTHREAD_T_GCC)
- ADD_DEFINITIONS(-DHAVE_IB_ATOMIC_PTHREAD_T_GCC=1)
-ENDIF()
-
-CHECK_C_SOURCE_COMPILES("struct t1{ int a; char *b; }; struct t1 c= { .a=1, .b=0 }; main() { }" HAVE_C99_INITIALIZERS)
-
-ENDIF(NOT MSVC)
-
-CHECK_FUNCTION_EXISTS(asprintf HAVE_ASPRINTF)
-CHECK_FUNCTION_EXISTS(vasprintf HAVE_VASPRINTF)
-
-# Solaris atomics
-IF(CMAKE_SYSTEM_NAME STREQUAL "SunOS")
- CHECK_FUNCTION_EXISTS(atomic_cas_ulong HAVE_ATOMIC_CAS_ULONG)
- CHECK_FUNCTION_EXISTS(atomic_cas_32 HAVE_ATOMIC_CAS_32)
- CHECK_FUNCTION_EXISTS(atomic_cas_64 HAVE_ATOMIC_CAS_64)
- CHECK_FUNCTION_EXISTS(atomic_add_long_nv HAVE_ATOMIC_ADD_LONG_NV)
- CHECK_FUNCTION_EXISTS(atomic_swap_uchar HAVE_ATOMIC_SWAP_UCHAR)
- IF(HAVE_ATOMIC_CAS_ULONG AND
- HAVE_ATOMIC_CAS_32 AND
- HAVE_ATOMIC_CAS_64 AND
- HAVE_ATOMIC_ADD_LONG_NV AND
- HAVE_ATOMIC_SWAP_UCHAR)
- SET(HAVE_IB_SOLARIS_ATOMICS 1)
- ENDIF()
-
- IF(HAVE_IB_SOLARIS_ATOMICS)
- ADD_DEFINITIONS(-DHAVE_IB_SOLARIS_ATOMICS=1)
- ENDIF()
-
- # either define HAVE_IB_ATOMIC_PTHREAD_T_SOLARIS or not
- CHECK_C_SOURCE_COMPILES(
- " #include <pthread.h>
- #include <string.h>
-
- int main(int argc, char** argv) {
- pthread_t x1;
- pthread_t x2;
- pthread_t x3;
-
- memset(&x1, 0x0, sizeof(x1));
- memset(&x2, 0x0, sizeof(x2));
- memset(&x3, 0x0, sizeof(x3));
-
- if (sizeof(pthread_t) == 4) {
-
- atomic_cas_32(&x1, x2, x3);
-
- } else if (sizeof(pthread_t) == 8) {
-
- atomic_cas_64(&x1, x2, x3);
-
- } else {
-
- return(1);
- }
-
- return(0);
- }
- " HAVE_IB_ATOMIC_PTHREAD_T_SOLARIS)
- CHECK_C_SOURCE_COMPILES(
- "#include <mbarrier.h>
- int main() {
- __machine_r_barrier();
- __machine_w_barrier();
- return(0);
- }"
- HAVE_IB_MACHINE_BARRIER_SOLARIS)
-
- IF(HAVE_IB_ATOMIC_PTHREAD_T_SOLARIS)
- ADD_DEFINITIONS(-DHAVE_IB_ATOMIC_PTHREAD_T_SOLARIS=1)
- ENDIF()
- IF(HAVE_IB_MACHINE_BARRIER_SOLARIS)
- ADD_DEFINITIONS(-DHAVE_IB_MACHINE_BARRIER_SOLARIS=1)
- ENDIF()
-ENDIF()
-
-
-IF(UNIX)
-# this is needed to know which one of atomic_cas_32() or atomic_cas_64()
-# to use in the source
-SET(CMAKE_EXTRA_INCLUDE_FILES pthread.h)
-CHECK_TYPE_SIZE(pthread_t SIZEOF_PTHREAD_T)
-SET(CMAKE_EXTRA_INCLUDE_FILES)
-ENDIF()
-
-IF(SIZEOF_PTHREAD_T)
- ADD_DEFINITIONS(-DSIZEOF_PTHREAD_T=${SIZEOF_PTHREAD_T})
-ENDIF()
-
-IF(MSVC)
- ADD_DEFINITIONS(-DHAVE_WINDOWS_ATOMICS)
- ADD_DEFINITIONS(-DHAVE_WINDOWS_MM_FENCE)
-ENDIF()
-
-
-# Include directories under innobase
-INCLUDE_DIRECTORIES(${CMAKE_SOURCE_DIR}/storage/innobase/include
- ${CMAKE_SOURCE_DIR}/storage/innobase/handler)
-
-# Sun Studio bug with -xO2
-IF(CMAKE_CXX_COMPILER_ID MATCHES "SunPro"
- AND CMAKE_CXX_FLAGS_RELEASE MATCHES "O2"
- AND NOT CMAKE_BUILD_TYPE STREQUAL "Debug")
- # Sun Studio 12 crashes with -xO2 flag, but not with higher optimization
- # -xO3
- SET_SOURCE_FILES_PROPERTIES(${CMAKE_CURRENT_SOURCE_DIR}/rem/rem0rec.cc
- PROPERTIES COMPILE_FLAGS -xO3)
-ENDIF()
-
-# Removing compiler optimizations for innodb/mem/* files on 64-bit Windows
-# due to 64-bit compiler error, See MySQL Bug #19424, #36366, #34297
-IF (MSVC AND CMAKE_SIZEOF_VOID_P EQUAL 8)
- SET_SOURCE_FILES_PROPERTIES(mem/mem0mem.cc mem/mem0pool.cc
- PROPERTIES COMPILE_FLAGS -Od)
-ENDIF()
-
-IF(MSVC)
- # Avoid "unreferenced label" warning in generated file
- GET_FILENAME_COMPONENT(_SRC_DIR ${CMAKE_CURRENT_LIST_FILE} PATH)
- SET_SOURCE_FILES_PROPERTIES(${_SRC_DIR}/pars/pars0grm.c
- PROPERTIES COMPILE_FLAGS "/wd4102")
- SET_SOURCE_FILES_PROPERTIES(${_SRC_DIR}/pars/lexyy.c
- PROPERTIES COMPILE_FLAGS "/wd4003")
-ENDIF()
+INCLUDE(innodb.cmake)
SET(INNOBASE_SOURCES
- api/api0api.cc
- api/api0misc.cc
btr/btr0btr.cc
+ btr/btr0bulk.cc
btr/btr0cur.cc
btr/btr0pcur.cc
btr/btr0scrub.cc
@@ -398,14 +46,16 @@ SET(INNOBASE_SOURCES
dict/dict0mem.cc
dict/dict0stats.cc
dict/dict0stats_bg.cc
- dyn/dyn0dyn.cc
+ dict/dict0defrag_bg.cc
eval/eval0eval.cc
eval/eval0proc.cc
fil/fil0fil.cc
fil/fil0pagecompress.cc
fil/fil0crypt.cc
fsp/fsp0fsp.cc
- fut/fut0fut.cc
+ fsp/fsp0file.cc
+ fsp/fsp0space.cc
+ fsp/fsp0sysspace.cc
fut/fut0lst.cc
ha/ha0ha.cc
ha/ha0storage.cc
@@ -419,11 +69,16 @@ SET(INNOBASE_SOURCES
fts/fts0que.cc
fts/fts0sql.cc
fts/fts0tlex.cc
+ gis/gis0geo.cc
+ gis/gis0rtree.cc
+ gis/gis0sea.cc
+ fts/fts0plugin.cc
handler/ha_innodb.cc
handler/handler0alter.cc
handler/i_s.cc
ibuf/ibuf0ibuf.cc
lock/lock0iter.cc
+ lock/lock0prdt.cc
lock/lock0lock.cc
lock/lock0wait.cc
log/log0log.cc
@@ -431,12 +86,11 @@ SET(INNOBASE_SOURCES
log/log0crypt.cc
mach/mach0data.cc
mem/mem0mem.cc
- mem/mem0pool.cc
mtr/mtr0log.cc
mtr/mtr0mtr.cc
os/os0file.cc
os/os0proc.cc
- os/os0sync.cc
+ os/os0event.cc
os/os0thread.cc
page/page0cur.cc
page/page0page.cc
@@ -460,6 +114,7 @@ SET(INNOBASE_SOURCES
row/row0purge.cc
row/row0row.cc
row/row0sel.cc
+ row/row0trunc.cc
row/row0uins.cc
row/row0umod.cc
row/row0undo.cc
@@ -472,6 +127,7 @@ SET(INNOBASE_SOURCES
srv/srv0start.cc
sync/sync0arr.cc
sync/sync0rw.cc
+ sync/sync0debug.cc
sync/sync0sync.cc
trx/trx0i_s.cc
trx/trx0purge.cc
@@ -481,46 +137,56 @@ SET(INNOBASE_SOURCES
trx/trx0sys.cc
trx/trx0trx.cc
trx/trx0undo.cc
- usr/usr0sess.cc
- ut/ut0bh.cc
- ut/ut0byte.cc
ut/ut0crc32.cc
ut/ut0dbg.cc
ut/ut0list.cc
ut/ut0mem.cc
+ ut/ut0new.cc
ut/ut0rbt.cc
ut/ut0rnd.cc
ut/ut0ut.cc
ut/ut0vec.cc
ut/ut0wqueue.cc)
-IF(CMAKE_SYSTEM_PROCESSOR MATCHES "ppc64le")
- enable_language(ASM)
- LIST(APPEND INNOBASE_SOURCES
- ut/crc32_power8/crc32.S
- ut/crc32_power8/crc32_wrapper.c
- )
-ENDIF()
-
-IF(WITH_INNODB)
- # Legacy option
- SET(WITH_INNOBASE_STORAGE_ENGINE TRUE)
+MYSQL_ADD_PLUGIN(innobase ${INNOBASE_SOURCES} STORAGE_ENGINE
+ MODULE_OUTPUT_NAME ha_innodb
+ DEFAULT RECOMPILE_FOR_EMBEDDED
+ LINK_LIBRARIES
+ ${ZLIB_LIBRARY}
+ ${CRC32_VPMSUM_LIBRARY}
+ ${NUMA_LIBRARY}
+ ${LIBSYSTEMD}
+ ${LINKER_SCRIPT})
+
+IF(NOT TARGET innobase)
+ RETURN()
+ENDIF()
+
+# A GCC bug causes crash when compiling these files on ARM64 with -O1+
+# Compile them with -O0 as a workaround.
+IF(CMAKE_COMPILER_IS_GNUCXX AND CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64"
+ AND CMAKE_C_COMPILER_VERSION VERSION_LESS "5.2.0")
+ ADD_COMPILE_FLAGS(
+ btr/btr0btr.cc
+ btr/btr0cur.cc
+ buf/buf0buf.cc
+ fts/fts0fts.cc
+ gis/gis0sea.cc
+ handler/handler0alter.cc
+ mtr/mtr0mtr.cc
+ row/row0merge.cc
+ row/row0mysql.cc
+ srv/srv0srv.cc
+ COMPILE_FLAGS "-O0"
+ )
ENDIF()
-
-# On solaris, reduce symbol visibility, so loader does not mix
-# the same symbols from builtin innodb and from shared one.
-# Only required for old GCC (3.4.3) that does not support hidden visibility
-IF(CMAKE_SYSTEM_NAME MATCHES "SunOS" AND CMAKE_COMPILER_IS_GNUCC
- AND NOT HAVE_VISIBILITY_HIDDEN)
- SET(LINKER_SCRIPT "-Wl,-M${CMAKE_CURRENT_SOURCE_DIR}/plugin_exports")
-ELSE()
- SET(LINKER_SCRIPT)
+IF(MSVC)
+ # silence "switch statement contains 'default' but no 'case' label
+ # on generated file.
+ TARGET_COMPILE_OPTIONS(innobase PRIVATE "/wd4065")
ENDIF()
-MYSQL_ADD_PLUGIN(innobase ${INNOBASE_SOURCES} STORAGE_ENGINE
- MODULE_ONLY
- MODULE_OUTPUT_NAME ha_innodb
- LINK_LIBRARIES ${ZLIB_LIBRARY} ${LIBSYSTEMD} ${LINKER_SCRIPT})
+ADD_SUBDIRECTORY(${CMAKE_SOURCE_DIR}/extra/mariabackup ${CMAKE_BINARY_DIR}/extra/mariabackup)
IF(TARGET innobase)
ADD_DEPENDENCIES(innobase GenError)
diff --git a/storage/innobase/Doxyfile b/storage/innobase/Doxyfile
deleted file mode 100644
index 7cf5048fa52..00000000000
--- a/storage/innobase/Doxyfile
+++ /dev/null
@@ -1,1419 +0,0 @@
-# Doxyfile 1.5.6
-
-# Usage: SVNVERSION=-r$(svnversion) doxygen
-
-# This file describes the settings to be used by the documentation system
-# doxygen (www.doxygen.org) for a project
-#
-# All text after a hash (#) is considered a comment and will be ignored
-# The format is:
-# TAG = value [value, ...]
-# For lists items can also be appended using:
-# TAG += value [value, ...]
-# Values that contain spaces should be placed between quotes (" ")
-
-#---------------------------------------------------------------------------
-# Project related configuration options
-#---------------------------------------------------------------------------
-
-# This tag specifies the encoding used for all characters in the config file
-# that follow. The default is UTF-8 which is also the encoding used for all
-# text before the first occurrence of this tag. Doxygen uses libiconv (or the
-# iconv built into libc) for the transcoding. See
-# http://www.gnu.org/software/libiconv for the list of possible encodings.
-
-DOXYFILE_ENCODING = UTF-8
-
-# The PROJECT_NAME tag is a single word (or a sequence of words surrounded
-# by quotes) that should identify the project.
-
-PROJECT_NAME = "InnoDB Plugin"
-
-# The PROJECT_NUMBER tag can be used to enter a project or revision number.
-# This could be handy for archiving the generated documentation or
-# if some version control system is used.
-
-PROJECT_NUMBER = 1.0$(SVNVERSION)
-
-# The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute)
-# base path where the generated documentation will be put.
-# If a relative path is entered, it will be relative to the location
-# where doxygen was started. If left blank the current directory will be used.
-
-OUTPUT_DIRECTORY = dox
-
-# If the CREATE_SUBDIRS tag is set to YES, then doxygen will create
-# 4096 sub-directories (in 2 levels) under the output directory of each output
-# format and will distribute the generated files over these directories.
-# Enabling this option can be useful when feeding doxygen a huge amount of
-# source files, where putting all generated files in the same directory would
-# otherwise cause performance problems for the file system.
-
-CREATE_SUBDIRS = NO
-
-# The OUTPUT_LANGUAGE tag is used to specify the language in which all
-# documentation generated by doxygen is written. Doxygen will use this
-# information to generate all constant output in the proper language.
-# The default language is English, other supported languages are:
-# Afrikaans, Arabic, Brazilian, Catalan, Chinese, Chinese-Traditional,
-# Croatian, Czech, Danish, Dutch, Farsi, Finnish, French, German, Greek,
-# Hungarian, Italian, Japanese, Japanese-en (Japanese with English messages),
-# Korean, Korean-en, Lithuanian, Norwegian, Macedonian, Persian, Polish,
-# Portuguese, Romanian, Russian, Serbian, Slovak, Slovene, Spanish, Swedish,
-# and Ukrainian.
-
-OUTPUT_LANGUAGE = English
-
-# If the BRIEF_MEMBER_DESC tag is set to YES (the default) Doxygen will
-# include brief member descriptions after the members that are listed in
-# the file and class documentation (similar to JavaDoc).
-# Set to NO to disable this.
-
-BRIEF_MEMBER_DESC = YES
-
-# If the REPEAT_BRIEF tag is set to YES (the default) Doxygen will prepend
-# the brief description of a member or function before the detailed description.
-# Note: if both HIDE_UNDOC_MEMBERS and BRIEF_MEMBER_DESC are set to NO, the
-# brief descriptions will be completely suppressed.
-
-REPEAT_BRIEF = YES
-
-# This tag implements a quasi-intelligent brief description abbreviator
-# that is used to form the text in various listings. Each string
-# in this list, if found as the leading text of the brief description, will be
-# stripped from the text and the result after processing the whole list, is
-# used as the annotated text. Otherwise, the brief description is used as-is.
-# If left blank, the following values are used ("$name" is automatically
-# replaced with the name of the entity): "The $name class" "The $name widget"
-# "The $name file" "is" "provides" "specifies" "contains"
-# "represents" "a" "an" "the"
-
-ABBREVIATE_BRIEF =
-
-# If the ALWAYS_DETAILED_SEC and REPEAT_BRIEF tags are both set to YES then
-# Doxygen will generate a detailed section even if there is only a brief
-# description.
-
-ALWAYS_DETAILED_SEC = NO
-
-# If the INLINE_INHERITED_MEMB tag is set to YES, doxygen will show all
-# inherited members of a class in the documentation of that class as if those
-# members were ordinary class members. Constructors, destructors and assignment
-# operators of the base classes will not be shown.
-
-INLINE_INHERITED_MEMB = NO
-
-# If the FULL_PATH_NAMES tag is set to YES then Doxygen will prepend the full
-# path before files name in the file list and in the header files. If set
-# to NO the shortest path that makes the file name unique will be used.
-
-FULL_PATH_NAMES = YES
-
-# If the FULL_PATH_NAMES tag is set to YES then the STRIP_FROM_PATH tag
-# can be used to strip a user-defined part of the path. Stripping is
-# only done if one of the specified strings matches the left-hand part of
-# the path. The tag can be used to show relative paths in the file list.
-# If left blank the directory from which doxygen is run is used as the
-# path to strip.
-
-STRIP_FROM_PATH =
-
-# The STRIP_FROM_INC_PATH tag can be used to strip a user-defined part of
-# the path mentioned in the documentation of a class, which tells
-# the reader which header file to include in order to use a class.
-# If left blank only the name of the header file containing the class
-# definition is used. Otherwise one should specify the include paths that
-# are normally passed to the compiler using the -I flag.
-
-STRIP_FROM_INC_PATH =
-
-# If the SHORT_NAMES tag is set to YES, doxygen will generate much shorter
-# (but less readable) file names. This can be useful is your file systems
-# doesn't support long names like on DOS, Mac, or CD-ROM.
-
-SHORT_NAMES = NO
-
-# If the JAVADOC_AUTOBRIEF tag is set to YES then Doxygen
-# will interpret the first line (until the first dot) of a JavaDoc-style
-# comment as the brief description. If set to NO, the JavaDoc
-# comments will behave just like regular Qt-style comments
-# (thus requiring an explicit @brief command for a brief description.)
-
-JAVADOC_AUTOBRIEF = NO
-
-# If the QT_AUTOBRIEF tag is set to YES then Doxygen will
-# interpret the first line (until the first dot) of a Qt-style
-# comment as the brief description. If set to NO, the comments
-# will behave just like regular Qt-style comments (thus requiring
-# an explicit \brief command for a brief description.)
-
-QT_AUTOBRIEF = NO
-
-# The MULTILINE_CPP_IS_BRIEF tag can be set to YES to make Doxygen
-# treat a multi-line C++ special comment block (i.e. a block of //! or ///
-# comments) as a brief description. This used to be the default behaviour.
-# The new default is to treat a multi-line C++ comment block as a detailed
-# description. Set this tag to YES if you prefer the old behaviour instead.
-
-MULTILINE_CPP_IS_BRIEF = NO
-
-# If the DETAILS_AT_TOP tag is set to YES then Doxygen
-# will output the detailed description near the top, like JavaDoc.
-# If set to NO, the detailed description appears after the member
-# documentation.
-
-DETAILS_AT_TOP = NO
-
-# If the INHERIT_DOCS tag is set to YES (the default) then an undocumented
-# member inherits the documentation from any documented member that it
-# re-implements.
-
-INHERIT_DOCS = YES
-
-# If the SEPARATE_MEMBER_PAGES tag is set to YES, then doxygen will produce
-# a new page for each member. If set to NO, the documentation of a member will
-# be part of the file/class/namespace that contains it.
-
-SEPARATE_MEMBER_PAGES = NO
-
-# The TAB_SIZE tag can be used to set the number of spaces in a tab.
-# Doxygen uses this value to replace tabs by spaces in code fragments.
-
-TAB_SIZE = 8
-
-# This tag can be used to specify a number of aliases that acts
-# as commands in the documentation. An alias has the form "name=value".
-# For example adding "sideeffect=\par Side Effects:\n" will allow you to
-# put the command \sideeffect (or @sideeffect) in the documentation, which
-# will result in a user-defined paragraph with heading "Side Effects:".
-# You can put \n's in the value part of an alias to insert newlines.
-
-ALIASES =
-
-# Set the OPTIMIZE_OUTPUT_FOR_C tag to YES if your project consists of C
-# sources only. Doxygen will then generate output that is more tailored for C.
-# For instance, some of the names that are used will be different. The list
-# of all members will be omitted, etc.
-
-OPTIMIZE_OUTPUT_FOR_C = YES
-
-# Set the OPTIMIZE_OUTPUT_JAVA tag to YES if your project consists of Java
-# sources only. Doxygen will then generate output that is more tailored for
-# Java. For instance, namespaces will be presented as packages, qualified
-# scopes will look different, etc.
-
-OPTIMIZE_OUTPUT_JAVA = NO
-
-# Set the OPTIMIZE_FOR_FORTRAN tag to YES if your project consists of Fortran
-# sources only. Doxygen will then generate output that is more tailored for
-# Fortran.
-
-OPTIMIZE_FOR_FORTRAN = NO
-
-# Set the OPTIMIZE_OUTPUT_VHDL tag to YES if your project consists of VHDL
-# sources. Doxygen will then generate output that is tailored for
-# VHDL.
-
-OPTIMIZE_OUTPUT_VHDL = NO
-
-# If you use STL classes (i.e. std::string, std::vector, etc.) but do not want
-# to include (a tag file for) the STL sources as input, then you should
-# set this tag to YES in order to let doxygen match functions declarations and
-# definitions whose arguments contain STL classes (e.g. func(std::string); v.s.
-# func(std::string) {}). This also make the inheritance and collaboration
-# diagrams that involve STL classes more complete and accurate.
-
-BUILTIN_STL_SUPPORT = NO
-
-# If you use Microsoft's C++/CLI language, you should set this option to YES to
-# enable parsing support.
-
-CPP_CLI_SUPPORT = NO
-
-# Set the SIP_SUPPORT tag to YES if your project consists of sip sources only.
-# Doxygen will parse them like normal C++ but will assume all classes use public
-# instead of private inheritance when no explicit protection keyword is present.
-
-SIP_SUPPORT = NO
-
-# For Microsoft's IDL there are propget and propput attributes to indicate getter
-# and setter methods for a property. Setting this option to YES (the default)
-# will make doxygen to replace the get and set methods by a property in the
-# documentation. This will only work if the methods are indeed getting or
-# setting a simple type. If this is not the case, or you want to show the
-# methods anyway, you should set this option to NO.
-
-IDL_PROPERTY_SUPPORT = YES
-
-# If member grouping is used in the documentation and the DISTRIBUTE_GROUP_DOC
-# tag is set to YES, then doxygen will reuse the documentation of the first
-# member in the group (if any) for the other members of the group. By default
-# all members of a group must be documented explicitly.
-
-DISTRIBUTE_GROUP_DOC = NO
-
-# Set the SUBGROUPING tag to YES (the default) to allow class member groups of
-# the same type (for instance a group of public functions) to be put as a
-# subgroup of that type (e.g. under the Public Functions section). Set it to
-# NO to prevent subgrouping. Alternatively, this can be done per class using
-# the \nosubgrouping command.
-
-SUBGROUPING = YES
-
-# When TYPEDEF_HIDES_STRUCT is enabled, a typedef of a struct, union, or enum
-# is documented as struct, union, or enum with the name of the typedef. So
-# typedef struct TypeS {} TypeT, will appear in the documentation as a struct
-# with name TypeT. When disabled the typedef will appear as a member of a file,
-# namespace, or class. And the struct will be named TypeS. This can typically
-# be useful for C code in case the coding convention dictates that all compound
-# types are typedef'ed and only the typedef is referenced, never the tag name.
-
-TYPEDEF_HIDES_STRUCT = NO
-
-#---------------------------------------------------------------------------
-# Build related configuration options
-#---------------------------------------------------------------------------
-
-# If the EXTRACT_ALL tag is set to YES doxygen will assume all entities in
-# documentation are documented, even if no documentation was available.
-# Private class members and static file members will be hidden unless
-# the EXTRACT_PRIVATE and EXTRACT_STATIC tags are set to YES
-
-EXTRACT_ALL = NO
-
-# If the EXTRACT_PRIVATE tag is set to YES all private members of a class
-# will be included in the documentation.
-
-EXTRACT_PRIVATE = YES
-
-# If the EXTRACT_STATIC tag is set to YES all static members of a file
-# will be included in the documentation.
-
-EXTRACT_STATIC = YES
-
-# If the EXTRACT_LOCAL_CLASSES tag is set to YES classes (and structs)
-# defined locally in source files will be included in the documentation.
-# If set to NO only classes defined in header files are included.
-
-EXTRACT_LOCAL_CLASSES = YES
-
-# This flag is only useful for Objective-C code. When set to YES local
-# methods, which are defined in the implementation section but not in
-# the interface are included in the documentation.
-# If set to NO (the default) only methods in the interface are included.
-
-EXTRACT_LOCAL_METHODS = NO
-
-# If this flag is set to YES, the members of anonymous namespaces will be
-# extracted and appear in the documentation as a namespace called
-# 'anonymous_namespace{file}', where file will be replaced with the base
-# name of the file that contains the anonymous namespace. By default
-# anonymous namespace are hidden.
-
-EXTRACT_ANON_NSPACES = NO
-
-# If the HIDE_UNDOC_MEMBERS tag is set to YES, Doxygen will hide all
-# undocumented members of documented classes, files or namespaces.
-# If set to NO (the default) these members will be included in the
-# various overviews, but no documentation section is generated.
-# This option has no effect if EXTRACT_ALL is enabled.
-
-HIDE_UNDOC_MEMBERS = NO
-
-# If the HIDE_UNDOC_CLASSES tag is set to YES, Doxygen will hide all
-# undocumented classes that are normally visible in the class hierarchy.
-# If set to NO (the default) these classes will be included in the various
-# overviews. This option has no effect if EXTRACT_ALL is enabled.
-
-HIDE_UNDOC_CLASSES = NO
-
-# If the HIDE_FRIEND_COMPOUNDS tag is set to YES, Doxygen will hide all
-# friend (class|struct|union) declarations.
-# If set to NO (the default) these declarations will be included in the
-# documentation.
-
-HIDE_FRIEND_COMPOUNDS = NO
-
-# If the HIDE_IN_BODY_DOCS tag is set to YES, Doxygen will hide any
-# documentation blocks found inside the body of a function.
-# If set to NO (the default) these blocks will be appended to the
-# function's detailed documentation block.
-
-HIDE_IN_BODY_DOCS = NO
-
-# The INTERNAL_DOCS tag determines if documentation
-# that is typed after a \internal command is included. If the tag is set
-# to NO (the default) then the documentation will be excluded.
-# Set it to YES to include the internal documentation.
-
-INTERNAL_DOCS = NO
-
-# If the CASE_SENSE_NAMES tag is set to NO then Doxygen will only generate
-# file names in lower-case letters. If set to YES upper-case letters are also
-# allowed. This is useful if you have classes or files whose names only differ
-# in case and if your file system supports case sensitive file names. Windows
-# and Mac users are advised to set this option to NO.
-
-CASE_SENSE_NAMES = YES
-
-# If the HIDE_SCOPE_NAMES tag is set to NO (the default) then Doxygen
-# will show members with their full class and namespace scopes in the
-# documentation. If set to YES the scope will be hidden.
-
-HIDE_SCOPE_NAMES = NO
-
-# If the SHOW_INCLUDE_FILES tag is set to YES (the default) then Doxygen
-# will put a list of the files that are included by a file in the documentation
-# of that file.
-
-SHOW_INCLUDE_FILES = YES
-
-# If the INLINE_INFO tag is set to YES (the default) then a tag [inline]
-# is inserted in the documentation for inline members.
-
-INLINE_INFO = YES
-
-# If the SORT_MEMBER_DOCS tag is set to YES (the default) then doxygen
-# will sort the (detailed) documentation of file and class members
-# alphabetically by member name. If set to NO the members will appear in
-# declaration order.
-
-SORT_MEMBER_DOCS = YES
-
-# If the SORT_BRIEF_DOCS tag is set to YES then doxygen will sort the
-# brief documentation of file, namespace and class members alphabetically
-# by member name. If set to NO (the default) the members will appear in
-# declaration order.
-
-SORT_BRIEF_DOCS = NO
-
-# If the SORT_GROUP_NAMES tag is set to YES then doxygen will sort the
-# hierarchy of group names into alphabetical order. If set to NO (the default)
-# the group names will appear in their defined order.
-
-SORT_GROUP_NAMES = NO
-
-# If the SORT_BY_SCOPE_NAME tag is set to YES, the class list will be
-# sorted by fully-qualified names, including namespaces. If set to
-# NO (the default), the class list will be sorted only by class name,
-# not including the namespace part.
-# Note: This option is not very useful if HIDE_SCOPE_NAMES is set to YES.
-# Note: This option applies only to the class list, not to the
-# alphabetical list.
-
-SORT_BY_SCOPE_NAME = NO
-
-# The GENERATE_TODOLIST tag can be used to enable (YES) or
-# disable (NO) the todo list. This list is created by putting \todo
-# commands in the documentation.
-
-GENERATE_TODOLIST = YES
-
-# The GENERATE_TESTLIST tag can be used to enable (YES) or
-# disable (NO) the test list. This list is created by putting \test
-# commands in the documentation.
-
-GENERATE_TESTLIST = YES
-
-# The GENERATE_BUGLIST tag can be used to enable (YES) or
-# disable (NO) the bug list. This list is created by putting \bug
-# commands in the documentation.
-
-GENERATE_BUGLIST = YES
-
-# The GENERATE_DEPRECATEDLIST tag can be used to enable (YES) or
-# disable (NO) the deprecated list. This list is created by putting
-# \deprecated commands in the documentation.
-
-GENERATE_DEPRECATEDLIST= YES
-
-# The ENABLED_SECTIONS tag can be used to enable conditional
-# documentation sections, marked by \if sectionname ... \endif.
-
-ENABLED_SECTIONS =
-
-# The MAX_INITIALIZER_LINES tag determines the maximum number of lines
-# the initial value of a variable or define consists of for it to appear in
-# the documentation. If the initializer consists of more lines than specified
-# here it will be hidden. Use a value of 0 to hide initializers completely.
-# The appearance of the initializer of individual variables and defines in the
-# documentation can be controlled using \showinitializer or \hideinitializer
-# command in the documentation regardless of this setting.
-
-MAX_INITIALIZER_LINES = 30
-
-# Set the SHOW_USED_FILES tag to NO to disable the list of files generated
-# at the bottom of the documentation of classes and structs. If set to YES the
-# list will mention the files that were used to generate the documentation.
-
-SHOW_USED_FILES = YES
-
-# If the sources in your project are distributed over multiple directories
-# then setting the SHOW_DIRECTORIES tag to YES will show the directory hierarchy
-# in the documentation. The default is NO.
-
-SHOW_DIRECTORIES = NO
-
-# Set the SHOW_FILES tag to NO to disable the generation of the Files page.
-# This will remove the Files entry from the Quick Index and from the
-# Folder Tree View (if specified). The default is YES.
-
-SHOW_FILES = YES
-
-# Set the SHOW_NAMESPACES tag to NO to disable the generation of the
-# Namespaces page. This will remove the Namespaces entry from the Quick Index
-# and from the Folder Tree View (if specified). The default is YES.
-
-SHOW_NAMESPACES = YES
-
-# The FILE_VERSION_FILTER tag can be used to specify a program or script that
-# doxygen should invoke to get the current version for each file (typically from
-# the version control system). Doxygen will invoke the program by executing (via
-# popen()) the command <command> <input-file>, where <command> is the value of
-# the FILE_VERSION_FILTER tag, and <input-file> is the name of an input file
-# provided by doxygen. Whatever the program writes to standard output
-# is used as the file version. See the manual for examples.
-
-FILE_VERSION_FILTER =
-
-#---------------------------------------------------------------------------
-# configuration options related to warning and progress messages
-#---------------------------------------------------------------------------
-
-# The QUIET tag can be used to turn on/off the messages that are generated
-# by doxygen. Possible values are YES and NO. If left blank NO is used.
-
-QUIET = YES
-
-# The WARNINGS tag can be used to turn on/off the warning messages that are
-# generated by doxygen. Possible values are YES and NO. If left blank
-# NO is used.
-
-WARNINGS = YES
-
-# If WARN_IF_UNDOCUMENTED is set to YES, then doxygen will generate warnings
-# for undocumented members. If EXTRACT_ALL is set to YES then this flag will
-# automatically be disabled.
-
-WARN_IF_UNDOCUMENTED = YES
-
-# If WARN_IF_DOC_ERROR is set to YES, doxygen will generate warnings for
-# potential errors in the documentation, such as not documenting some
-# parameters in a documented function, or documenting parameters that
-# don't exist or using markup commands wrongly.
-
-WARN_IF_DOC_ERROR = YES
-
-# This WARN_NO_PARAMDOC option can be abled to get warnings for
-# functions that are documented, but have no documentation for their parameters
-# or return value. If set to NO (the default) doxygen will only warn about
-# wrong or incomplete parameter documentation, but not about the absence of
-# documentation.
-
-WARN_NO_PARAMDOC = NO
-
-# The WARN_FORMAT tag determines the format of the warning messages that
-# doxygen can produce. The string should contain the $file, $line, and $text
-# tags, which will be replaced by the file and line number from which the
-# warning originated and the warning text. Optionally the format may contain
-# $version, which will be replaced by the version of the file (if it could
-# be obtained via FILE_VERSION_FILTER)
-
-WARN_FORMAT = "$file:$line: $text"
-
-# The WARN_LOGFILE tag can be used to specify a file to which warning
-# and error messages should be written. If left blank the output is written
-# to stderr.
-
-WARN_LOGFILE =
-
-#---------------------------------------------------------------------------
-# configuration options related to the input files
-#---------------------------------------------------------------------------
-
-# The INPUT tag can be used to specify the files and/or directories that contain
-# documented source files. You may enter file names like "myfile.cpp" or
-# directories like "/usr/src/myproject". Separate the files or directories
-# with spaces.
-
-INPUT = . include/univ.i
-
-# This tag can be used to specify the character encoding of the source files
-# that doxygen parses. Internally doxygen uses the UTF-8 encoding, which is
-# also the default input encoding. Doxygen uses libiconv (or the iconv built
-# into libc) for the transcoding. See http://www.gnu.org/software/libiconv for
-# the list of possible encodings.
-
-INPUT_ENCODING = UTF-8
-
-# If the value of the INPUT tag contains directories, you can use the
-# FILE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp
-# and *.h) to filter out the source-files in the directories. If left
-# blank the following patterns are tested:
-# *.c *.cc *.cxx *.cpp *.c++ *.java *.ii *.ixx *.ipp *.i++ *.inl *.h *.hh *.hxx
-# *.hpp *.h++ *.idl *.odl *.cs *.php *.php3 *.inc *.m *.mm *.py *.f90
-
-FILE_PATTERNS = *.c *.ic *.h
-
-# The RECURSIVE tag can be used to turn specify whether or not subdirectories
-# should be searched for input files as well. Possible values are YES and NO.
-# If left blank NO is used.
-
-RECURSIVE = YES
-
-# The EXCLUDE tag can be used to specify files and/or directories that should
-# excluded from the INPUT source files. This way you can easily exclude a
-# subdirectory from a directory tree whose root is specified with the INPUT tag.
-
-EXCLUDE =
-
-# The EXCLUDE_SYMLINKS tag can be used select whether or not files or
-# directories that are symbolic links (a Unix filesystem feature) are excluded
-# from the input.
-
-EXCLUDE_SYMLINKS = NO
-
-# If the value of the INPUT tag contains directories, you can use the
-# EXCLUDE_PATTERNS tag to specify one or more wildcard patterns to exclude
-# certain files from those directories. Note that the wildcards are matched
-# against the file with absolute path, so to exclude all test directories
-# for example use the pattern */test/*
-
-EXCLUDE_PATTERNS =
-
-# The EXCLUDE_SYMBOLS tag can be used to specify one or more symbol names
-# (namespaces, classes, functions, etc.) that should be excluded from the
-# output. The symbol name can be a fully qualified name, a word, or if the
-# wildcard * is used, a substring. Examples: ANamespace, AClass,
-# AClass::ANamespace, ANamespace::*Test
-
-EXCLUDE_SYMBOLS =
-
-# The EXAMPLE_PATH tag can be used to specify one or more files or
-# directories that contain example code fragments that are included (see
-# the \include command).
-
-EXAMPLE_PATH =
-
-# If the value of the EXAMPLE_PATH tag contains directories, you can use the
-# EXAMPLE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp
-# and *.h) to filter out the source-files in the directories. If left
-# blank all files are included.
-
-EXAMPLE_PATTERNS =
-
-# If the EXAMPLE_RECURSIVE tag is set to YES then subdirectories will be
-# searched for input files to be used with the \include or \dontinclude
-# commands irrespective of the value of the RECURSIVE tag.
-# Possible values are YES and NO. If left blank NO is used.
-
-EXAMPLE_RECURSIVE = NO
-
-# The IMAGE_PATH tag can be used to specify one or more files or
-# directories that contain image that are included in the documentation (see
-# the \image command).
-
-IMAGE_PATH =
-
-# The INPUT_FILTER tag can be used to specify a program that doxygen should
-# invoke to filter for each input file. Doxygen will invoke the filter program
-# by executing (via popen()) the command <filter> <input-file>, where <filter>
-# is the value of the INPUT_FILTER tag, and <input-file> is the name of an
-# input file. Doxygen will then use the output that the filter program writes
-# to standard output. If FILTER_PATTERNS is specified, this tag will be
-# ignored.
-
-INPUT_FILTER =
-
-# The FILTER_PATTERNS tag can be used to specify filters on a per file pattern
-# basis. Doxygen will compare the file name with each pattern and apply the
-# filter if there is a match. The filters are a list of the form:
-# pattern=filter (like *.cpp=my_cpp_filter). See INPUT_FILTER for further
-# info on how filters are used. If FILTER_PATTERNS is empty, INPUT_FILTER
-# is applied to all files.
-
-FILTER_PATTERNS =
-
-# If the FILTER_SOURCE_FILES tag is set to YES, the input filter (if set using
-# INPUT_FILTER) will be used to filter the input files when producing source
-# files to browse (i.e. when SOURCE_BROWSER is set to YES).
-
-FILTER_SOURCE_FILES = NO
-
-#---------------------------------------------------------------------------
-# configuration options related to source browsing
-#---------------------------------------------------------------------------
-
-# If the SOURCE_BROWSER tag is set to YES then a list of source files will
-# be generated. Documented entities will be cross-referenced with these sources.
-# Note: To get rid of all source code in the generated output, make sure also
-# VERBATIM_HEADERS is set to NO.
-
-SOURCE_BROWSER = NO
-
-# Setting the INLINE_SOURCES tag to YES will include the body
-# of functions and classes directly in the documentation.
-
-INLINE_SOURCES = NO
-
-# Setting the STRIP_CODE_COMMENTS tag to YES (the default) will instruct
-# doxygen to hide any special comment blocks from generated source code
-# fragments. Normal C and C++ comments will always remain visible.
-
-STRIP_CODE_COMMENTS = YES
-
-# If the REFERENCED_BY_RELATION tag is set to YES
-# then for each documented function all documented
-# functions referencing it will be listed.
-
-REFERENCED_BY_RELATION = NO
-
-# If the REFERENCES_RELATION tag is set to YES
-# then for each documented function all documented entities
-# called/used by that function will be listed.
-
-REFERENCES_RELATION = NO
-
-# If the REFERENCES_LINK_SOURCE tag is set to YES (the default)
-# and SOURCE_BROWSER tag is set to YES, then the hyperlinks from
-# functions in REFERENCES_RELATION and REFERENCED_BY_RELATION lists will
-# link to the source code. Otherwise they will link to the documentstion.
-
-REFERENCES_LINK_SOURCE = YES
-
-# If the USE_HTAGS tag is set to YES then the references to source code
-# will point to the HTML generated by the htags(1) tool instead of doxygen
-# built-in source browser. The htags tool is part of GNU's global source
-# tagging system (see http://www.gnu.org/software/global/global.html). You
-# will need version 4.8.6 or higher.
-
-USE_HTAGS = NO
-
-# If the VERBATIM_HEADERS tag is set to YES (the default) then Doxygen
-# will generate a verbatim copy of the header file for each class for
-# which an include is specified. Set to NO to disable this.
-
-VERBATIM_HEADERS = YES
-
-#---------------------------------------------------------------------------
-# configuration options related to the alphabetical class index
-#---------------------------------------------------------------------------
-
-# If the ALPHABETICAL_INDEX tag is set to YES, an alphabetical index
-# of all compounds will be generated. Enable this if the project
-# contains a lot of classes, structs, unions or interfaces.
-
-ALPHABETICAL_INDEX = NO
-
-# If the alphabetical index is enabled (see ALPHABETICAL_INDEX) then
-# the COLS_IN_ALPHA_INDEX tag can be used to specify the number of columns
-# in which this list will be split (can be a number in the range [1..20])
-
-COLS_IN_ALPHA_INDEX = 5
-
-# In case all classes in a project start with a common prefix, all
-# classes will be put under the same header in the alphabetical index.
-# The IGNORE_PREFIX tag can be used to specify one or more prefixes that
-# should be ignored while generating the index headers.
-
-IGNORE_PREFIX =
-
-#---------------------------------------------------------------------------
-# configuration options related to the HTML output
-#---------------------------------------------------------------------------
-
-# If the GENERATE_HTML tag is set to YES (the default) Doxygen will
-# generate HTML output.
-
-GENERATE_HTML = YES
-
-# The HTML_OUTPUT tag is used to specify where the HTML docs will be put.
-# If a relative path is entered the value of OUTPUT_DIRECTORY will be
-# put in front of it. If left blank `html' will be used as the default path.
-
-HTML_OUTPUT = html
-
-# The HTML_FILE_EXTENSION tag can be used to specify the file extension for
-# each generated HTML page (for example: .htm,.php,.asp). If it is left blank
-# doxygen will generate files with .html extension.
-
-HTML_FILE_EXTENSION = .html
-
-# The HTML_HEADER tag can be used to specify a personal HTML header for
-# each generated HTML page. If it is left blank doxygen will generate a
-# standard header.
-
-HTML_HEADER =
-
-# The HTML_FOOTER tag can be used to specify a personal HTML footer for
-# each generated HTML page. If it is left blank doxygen will generate a
-# standard footer.
-
-HTML_FOOTER =
-
-# The HTML_STYLESHEET tag can be used to specify a user-defined cascading
-# style sheet that is used by each HTML page. It can be used to
-# fine-tune the look of the HTML output. If the tag is left blank doxygen
-# will generate a default style sheet. Note that doxygen will try to copy
-# the style sheet file to the HTML output directory, so don't put your own
-# stylesheet in the HTML output directory as well, or it will be erased!
-
-HTML_STYLESHEET =
-
-# If the HTML_ALIGN_MEMBERS tag is set to YES, the members of classes,
-# files or namespaces will be aligned in HTML using tables. If set to
-# NO a bullet list will be used.
-
-HTML_ALIGN_MEMBERS = YES
-
-# If the GENERATE_HTMLHELP tag is set to YES, additional index files
-# will be generated that can be used as input for tools like the
-# Microsoft HTML help workshop to generate a compiled HTML help file (.chm)
-# of the generated HTML documentation.
-
-GENERATE_HTMLHELP = NO
-
-# If the GENERATE_DOCSET tag is set to YES, additional index files
-# will be generated that can be used as input for Apple's Xcode 3
-# integrated development environment, introduced with OSX 10.5 (Leopard).
-# To create a documentation set, doxygen will generate a Makefile in the
-# HTML output directory. Running make will produce the docset in that
-# directory and running "make install" will install the docset in
-# ~/Library/Developer/Shared/Documentation/DocSets so that Xcode will find
-# it at startup.
-
-GENERATE_DOCSET = NO
-
-# When GENERATE_DOCSET tag is set to YES, this tag determines the name of the
-# feed. A documentation feed provides an umbrella under which multiple
-# documentation sets from a single provider (such as a company or product suite)
-# can be grouped.
-
-DOCSET_FEEDNAME = "Doxygen generated docs"
-
-# When GENERATE_DOCSET tag is set to YES, this tag specifies a string that
-# should uniquely identify the documentation set bundle. This should be a
-# reverse domain-name style string, e.g. com.mycompany.MyDocSet. Doxygen
-# will append .docset to the name.
-
-DOCSET_BUNDLE_ID = org.doxygen.Project
-
-# If the HTML_DYNAMIC_SECTIONS tag is set to YES then the generated HTML
-# documentation will contain sections that can be hidden and shown after the
-# page has loaded. For this to work a browser that supports
-# JavaScript and DHTML is required (for instance Mozilla 1.0+, Firefox
-# Netscape 6.0+, Internet explorer 5.0+, Konqueror, or Safari).
-
-HTML_DYNAMIC_SECTIONS = NO
-
-# If the GENERATE_HTMLHELP tag is set to YES, the CHM_FILE tag can
-# be used to specify the file name of the resulting .chm file. You
-# can add a path in front of the file if the result should not be
-# written to the html output directory.
-
-CHM_FILE =
-
-# If the GENERATE_HTMLHELP tag is set to YES, the HHC_LOCATION tag can
-# be used to specify the location (absolute path including file name) of
-# the HTML help compiler (hhc.exe). If non-empty doxygen will try to run
-# the HTML help compiler on the generated index.hhp.
-
-HHC_LOCATION =
-
-# If the GENERATE_HTMLHELP tag is set to YES, the GENERATE_CHI flag
-# controls if a separate .chi index file is generated (YES) or that
-# it should be included in the master .chm file (NO).
-
-GENERATE_CHI = NO
-
-# If the GENERATE_HTMLHELP tag is set to YES, the CHM_INDEX_ENCODING
-# is used to encode HtmlHelp index (hhk), content (hhc) and project file
-# content.
-
-CHM_INDEX_ENCODING =
-
-# If the GENERATE_HTMLHELP tag is set to YES, the BINARY_TOC flag
-# controls whether a binary table of contents is generated (YES) or a
-# normal table of contents (NO) in the .chm file.
-
-BINARY_TOC = NO
-
-# The TOC_EXPAND flag can be set to YES to add extra items for group members
-# to the contents of the HTML help documentation and to the tree view.
-
-TOC_EXPAND = NO
-
-# The DISABLE_INDEX tag can be used to turn on/off the condensed index at
-# top of each HTML page. The value NO (the default) enables the index and
-# the value YES disables it.
-
-DISABLE_INDEX = NO
-
-# This tag can be used to set the number of enum values (range [1..20])
-# that doxygen will group on one line in the generated HTML documentation.
-
-ENUM_VALUES_PER_LINE = 4
-
-# The GENERATE_TREEVIEW tag is used to specify whether a tree-like index
-# structure should be generated to display hierarchical information.
-# If the tag value is set to FRAME, a side panel will be generated
-# containing a tree-like index structure (just like the one that
-# is generated for HTML Help). For this to work a browser that supports
-# JavaScript, DHTML, CSS and frames is required (for instance Mozilla 1.0+,
-# Netscape 6.0+, Internet explorer 5.0+, or Konqueror). Windows users are
-# probably better off using the HTML help feature. Other possible values
-# for this tag are: HIERARCHIES, which will generate the Groups, Directories,
-# and Class Hiererachy pages using a tree view instead of an ordered list;
-# ALL, which combines the behavior of FRAME and HIERARCHIES; and NONE, which
-# disables this behavior completely. For backwards compatibility with previous
-# releases of Doxygen, the values YES and NO are equivalent to FRAME and NONE
-# respectively.
-
-GENERATE_TREEVIEW = NONE
-
-# If the treeview is enabled (see GENERATE_TREEVIEW) then this tag can be
-# used to set the initial width (in pixels) of the frame in which the tree
-# is shown.
-
-TREEVIEW_WIDTH = 250
-
-# Use this tag to change the font size of Latex formulas included
-# as images in the HTML documentation. The default is 10. Note that
-# when you change the font size after a successful doxygen run you need
-# to manually remove any form_*.png images from the HTML output directory
-# to force them to be regenerated.
-
-FORMULA_FONTSIZE = 10
-
-#---------------------------------------------------------------------------
-# configuration options related to the LaTeX output
-#---------------------------------------------------------------------------
-
-# If the GENERATE_LATEX tag is set to YES (the default) Doxygen will
-# generate Latex output.
-
-GENERATE_LATEX = NO
-
-# The LATEX_OUTPUT tag is used to specify where the LaTeX docs will be put.
-# If a relative path is entered the value of OUTPUT_DIRECTORY will be
-# put in front of it. If left blank `latex' will be used as the default path.
-
-LATEX_OUTPUT = latex
-
-# The LATEX_CMD_NAME tag can be used to specify the LaTeX command name to be
-# invoked. If left blank `latex' will be used as the default command name.
-
-LATEX_CMD_NAME = latex
-
-# The MAKEINDEX_CMD_NAME tag can be used to specify the command name to
-# generate index for LaTeX. If left blank `makeindex' will be used as the
-# default command name.
-
-MAKEINDEX_CMD_NAME = makeindex
-
-# If the COMPACT_LATEX tag is set to YES Doxygen generates more compact
-# LaTeX documents. This may be useful for small projects and may help to
-# save some trees in general.
-
-COMPACT_LATEX = NO
-
-# The PAPER_TYPE tag can be used to set the paper type that is used
-# by the printer. Possible values are: a4, a4wide, letter, legal and
-# executive. If left blank a4wide will be used.
-
-PAPER_TYPE = a4wide
-
-# The EXTRA_PACKAGES tag can be to specify one or more names of LaTeX
-# packages that should be included in the LaTeX output.
-
-EXTRA_PACKAGES =
-
-# The LATEX_HEADER tag can be used to specify a personal LaTeX header for
-# the generated latex document. The header should contain everything until
-# the first chapter. If it is left blank doxygen will generate a
-# standard header. Notice: only use this tag if you know what you are doing!
-
-LATEX_HEADER =
-
-# If the PDF_HYPERLINKS tag is set to YES, the LaTeX that is generated
-# is prepared for conversion to pdf (using ps2pdf). The pdf file will
-# contain links (just like the HTML output) instead of page references
-# This makes the output suitable for online browsing using a pdf viewer.
-
-PDF_HYPERLINKS = YES
-
-# If the USE_PDFLATEX tag is set to YES, pdflatex will be used instead of
-# plain latex in the generated Makefile. Set this option to YES to get a
-# higher quality PDF documentation.
-
-USE_PDFLATEX = YES
-
-# If the LATEX_BATCHMODE tag is set to YES, doxygen will add the \\batchmode.
-# command to the generated LaTeX files. This will instruct LaTeX to keep
-# running if errors occur, instead of asking the user for help.
-# This option is also used when generating formulas in HTML.
-
-LATEX_BATCHMODE = NO
-
-# If LATEX_HIDE_INDICES is set to YES then doxygen will not
-# include the index chapters (such as File Index, Compound Index, etc.)
-# in the output.
-
-LATEX_HIDE_INDICES = NO
-
-#---------------------------------------------------------------------------
-# configuration options related to the RTF output
-#---------------------------------------------------------------------------
-
-# If the GENERATE_RTF tag is set to YES Doxygen will generate RTF output
-# The RTF output is optimized for Word 97 and may not look very pretty with
-# other RTF readers or editors.
-
-GENERATE_RTF = NO
-
-# The RTF_OUTPUT tag is used to specify where the RTF docs will be put.
-# If a relative path is entered the value of OUTPUT_DIRECTORY will be
-# put in front of it. If left blank `rtf' will be used as the default path.
-
-RTF_OUTPUT = rtf
-
-# If the COMPACT_RTF tag is set to YES Doxygen generates more compact
-# RTF documents. This may be useful for small projects and may help to
-# save some trees in general.
-
-COMPACT_RTF = NO
-
-# If the RTF_HYPERLINKS tag is set to YES, the RTF that is generated
-# will contain hyperlink fields. The RTF file will
-# contain links (just like the HTML output) instead of page references.
-# This makes the output suitable for online browsing using WORD or other
-# programs which support those fields.
-# Note: wordpad (write) and others do not support links.
-
-RTF_HYPERLINKS = NO
-
-# Load stylesheet definitions from file. Syntax is similar to doxygen's
-# config file, i.e. a series of assignments. You only have to provide
-# replacements, missing definitions are set to their default value.
-
-RTF_STYLESHEET_FILE =
-
-# Set optional variables used in the generation of an rtf document.
-# Syntax is similar to doxygen's config file.
-
-RTF_EXTENSIONS_FILE =
-
-#---------------------------------------------------------------------------
-# configuration options related to the man page output
-#---------------------------------------------------------------------------
-
-# If the GENERATE_MAN tag is set to YES (the default) Doxygen will
-# generate man pages
-
-GENERATE_MAN = NO
-
-# The MAN_OUTPUT tag is used to specify where the man pages will be put.
-# If a relative path is entered the value of OUTPUT_DIRECTORY will be
-# put in front of it. If left blank `man' will be used as the default path.
-
-MAN_OUTPUT = man
-
-# The MAN_EXTENSION tag determines the extension that is added to
-# the generated man pages (default is the subroutine's section .3)
-
-MAN_EXTENSION = .3
-
-# If the MAN_LINKS tag is set to YES and Doxygen generates man output,
-# then it will generate one additional man file for each entity
-# documented in the real man page(s). These additional files
-# only source the real man page, but without them the man command
-# would be unable to find the correct page. The default is NO.
-
-MAN_LINKS = NO
-
-#---------------------------------------------------------------------------
-# configuration options related to the XML output
-#---------------------------------------------------------------------------
-
-# If the GENERATE_XML tag is set to YES Doxygen will
-# generate an XML file that captures the structure of
-# the code including all documentation.
-
-GENERATE_XML = NO
-
-# The XML_OUTPUT tag is used to specify where the XML pages will be put.
-# If a relative path is entered the value of OUTPUT_DIRECTORY will be
-# put in front of it. If left blank `xml' will be used as the default path.
-
-XML_OUTPUT = xml
-
-# The XML_SCHEMA tag can be used to specify an XML schema,
-# which can be used by a validating XML parser to check the
-# syntax of the XML files.
-
-XML_SCHEMA =
-
-# The XML_DTD tag can be used to specify an XML DTD,
-# which can be used by a validating XML parser to check the
-# syntax of the XML files.
-
-XML_DTD =
-
-# If the XML_PROGRAMLISTING tag is set to YES Doxygen will
-# dump the program listings (including syntax highlighting
-# and cross-referencing information) to the XML output. Note that
-# enabling this will significantly increase the size of the XML output.
-
-XML_PROGRAMLISTING = YES
-
-#---------------------------------------------------------------------------
-# configuration options for the AutoGen Definitions output
-#---------------------------------------------------------------------------
-
-# If the GENERATE_AUTOGEN_DEF tag is set to YES Doxygen will
-# generate an AutoGen Definitions (see autogen.sf.net) file
-# that captures the structure of the code including all
-# documentation. Note that this feature is still experimental
-# and incomplete at the moment.
-
-GENERATE_AUTOGEN_DEF = NO
-
-#---------------------------------------------------------------------------
-# configuration options related to the Perl module output
-#---------------------------------------------------------------------------
-
-# If the GENERATE_PERLMOD tag is set to YES Doxygen will
-# generate a Perl module file that captures the structure of
-# the code including all documentation. Note that this
-# feature is still experimental and incomplete at the
-# moment.
-
-GENERATE_PERLMOD = NO
-
-# If the PERLMOD_LATEX tag is set to YES Doxygen will generate
-# the necessary Makefile rules, Perl scripts and LaTeX code to be able
-# to generate PDF and DVI output from the Perl module output.
-
-PERLMOD_LATEX = NO
-
-# If the PERLMOD_PRETTY tag is set to YES the Perl module output will be
-# nicely formatted so it can be parsed by a human reader. This is useful
-# if you want to understand what is going on. On the other hand, if this
-# tag is set to NO the size of the Perl module output will be much smaller
-# and Perl will parse it just the same.
-
-PERLMOD_PRETTY = YES
-
-# The names of the make variables in the generated doxyrules.make file
-# are prefixed with the string contained in PERLMOD_MAKEVAR_PREFIX.
-# This is useful so different doxyrules.make files included by the same
-# Makefile don't overwrite each other's variables.
-
-PERLMOD_MAKEVAR_PREFIX =
-
-#---------------------------------------------------------------------------
-# Configuration options related to the preprocessor
-#---------------------------------------------------------------------------
-
-# If the ENABLE_PREPROCESSING tag is set to YES (the default) Doxygen will
-# evaluate all C-preprocessor directives found in the sources and include
-# files.
-
-ENABLE_PREPROCESSING = YES
-
-# If the MACRO_EXPANSION tag is set to YES Doxygen will expand all macro
-# names in the source code. If set to NO (the default) only conditional
-# compilation will be performed. Macro expansion can be done in a controlled
-# way by setting EXPAND_ONLY_PREDEF to YES.
-
-MACRO_EXPANSION = YES
-
-# If the EXPAND_ONLY_PREDEF and MACRO_EXPANSION tags are both set to YES
-# then the macro expansion is limited to the macros specified with the
-# PREDEFINED and EXPAND_AS_DEFINED tags.
-
-EXPAND_ONLY_PREDEF = YES
-
-# If the SEARCH_INCLUDES tag is set to YES (the default) the includes files
-# in the INCLUDE_PATH (see below) will be search if a #include is found.
-
-SEARCH_INCLUDES = YES
-
-# The INCLUDE_PATH tag can be used to specify one or more directories that
-# contain include files that are not input files but should be processed by
-# the preprocessor.
-
-INCLUDE_PATH =
-
-# You can use the INCLUDE_FILE_PATTERNS tag to specify one or more wildcard
-# patterns (like *.h and *.hpp) to filter out the header-files in the
-# directories. If left blank, the patterns specified with FILE_PATTERNS will
-# be used.
-
-INCLUDE_FILE_PATTERNS =
-
-# The PREDEFINED tag can be used to specify one or more macro names that
-# are defined before the preprocessor is started (similar to the -D option of
-# gcc). The argument of the tag is a list of macros of the form: name
-# or name=definition (no spaces). If the definition and the = are
-# omitted =1 is assumed. To prevent a macro definition from being
-# undefined via #undef or recursively expanded use the := operator
-# instead of the = operator.
-
-PREDEFINED = DOXYGEN UNIV_DEBUG UNIV_SYNC_DEBUG __attribute__()=
-
-# If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then
-# this tag can be used to specify a list of macro names that should be expanded.
-# The macro definition that is found in the sources will be used.
-# Use the PREDEFINED tag if you want to use a different macro definition.
-
-EXPAND_AS_DEFINED = UT_LIST_BASE_NODE_T UT_LIST_NODE_T
-
-# If the SKIP_FUNCTION_MACROS tag is set to YES (the default) then
-# doxygen's preprocessor will remove all function-like macros that are alone
-# on a line, have an all uppercase name, and do not end with a semicolon. Such
-# function macros are typically used for boiler-plate code, and will confuse
-# the parser if not removed.
-
-SKIP_FUNCTION_MACROS = YES
-
-#---------------------------------------------------------------------------
-# Configuration::additions related to external references
-#---------------------------------------------------------------------------
-
-# The TAGFILES option can be used to specify one or more tagfiles.
-# Optionally an initial location of the external documentation
-# can be added for each tagfile. The format of a tag file without
-# this location is as follows:
-# TAGFILES = file1 file2 ...
-# Adding location for the tag files is done as follows:
-# TAGFILES = file1=loc1 "file2 = loc2" ...
-# where "loc1" and "loc2" can be relative or absolute paths or
-# URLs. If a location is present for each tag, the installdox tool
-# does not have to be run to correct the links.
-# Note that each tag file must have a unique name
-# (where the name does NOT include the path)
-# If a tag file is not located in the directory in which doxygen
-# is run, you must also specify the path to the tagfile here.
-
-TAGFILES =
-
-# When a file name is specified after GENERATE_TAGFILE, doxygen will create
-# a tag file that is based on the input files it reads.
-
-GENERATE_TAGFILE =
-
-# If the ALLEXTERNALS tag is set to YES all external classes will be listed
-# in the class index. If set to NO only the inherited external classes
-# will be listed.
-
-ALLEXTERNALS = NO
-
-# If the EXTERNAL_GROUPS tag is set to YES all external groups will be listed
-# in the modules index. If set to NO, only the current project's groups will
-# be listed.
-
-EXTERNAL_GROUPS = NO
-
-# The PERL_PATH should be the absolute path and name of the perl script
-# interpreter (i.e. the result of `which perl').
-
-PERL_PATH = /usr/bin/perl
-
-#---------------------------------------------------------------------------
-# Configuration options related to the dot tool
-#---------------------------------------------------------------------------
-
-# If the CLASS_DIAGRAMS tag is set to YES (the default) Doxygen will
-# generate a inheritance diagram (in HTML, RTF and LaTeX) for classes with base
-# or super classes. Setting the tag to NO turns the diagrams off. Note that
-# this option is superseded by the HAVE_DOT option below. This is only a
-# fallback. It is recommended to install and use dot, since it yields more
-# powerful graphs.
-
-CLASS_DIAGRAMS = YES
-
-# You can define message sequence charts within doxygen comments using the \msc
-# command. Doxygen will then run the mscgen tool (see
-# http://www.mcternan.me.uk/mscgen/) to produce the chart and insert it in the
-# documentation. The MSCGEN_PATH tag allows you to specify the directory where
-# the mscgen tool resides. If left empty the tool is assumed to be found in the
-# default search path.
-
-MSCGEN_PATH =
-
-# If set to YES, the inheritance and collaboration graphs will hide
-# inheritance and usage relations if the target is undocumented
-# or is not a class.
-
-HIDE_UNDOC_RELATIONS = YES
-
-# If you set the HAVE_DOT tag to YES then doxygen will assume the dot tool is
-# available from the path. This tool is part of Graphviz, a graph visualization
-# toolkit from AT&T and Lucent Bell Labs. The other options in this section
-# have no effect if this option is set to NO (the default)
-
-HAVE_DOT = YES
-
-# By default doxygen will write a font called FreeSans.ttf to the output
-# directory and reference it in all dot files that doxygen generates. This
-# font does not include all possible unicode characters however, so when you need
-# these (or just want a differently looking font) you can specify the font name
-# using DOT_FONTNAME. You need need to make sure dot is able to find the font,
-# which can be done by putting it in a standard location or by setting the
-# DOTFONTPATH environment variable or by setting DOT_FONTPATH to the directory
-# containing the font.
-
-DOT_FONTNAME = FreeSans
-
-# By default doxygen will tell dot to use the output directory to look for the
-# FreeSans.ttf font (which doxygen will put there itself). If you specify a
-# different font using DOT_FONTNAME you can set the path where dot
-# can find it using this tag.
-
-DOT_FONTPATH =
-
-# If the CLASS_GRAPH and HAVE_DOT tags are set to YES then doxygen
-# will generate a graph for each documented class showing the direct and
-# indirect inheritance relations. Setting this tag to YES will force the
-# the CLASS_DIAGRAMS tag to NO.
-
-CLASS_GRAPH = YES
-
-# If the COLLABORATION_GRAPH and HAVE_DOT tags are set to YES then doxygen
-# will generate a graph for each documented class showing the direct and
-# indirect implementation dependencies (inheritance, containment, and
-# class references variables) of the class with other documented classes.
-
-COLLABORATION_GRAPH = YES
-
-# If the GROUP_GRAPHS and HAVE_DOT tags are set to YES then doxygen
-# will generate a graph for groups, showing the direct groups dependencies
-
-GROUP_GRAPHS = NO
-
-# If the UML_LOOK tag is set to YES doxygen will generate inheritance and
-# collaboration diagrams in a style similar to the OMG's Unified Modeling
-# Language.
-
-UML_LOOK = NO
-
-# If set to YES, the inheritance and collaboration graphs will show the
-# relations between templates and their instances.
-
-TEMPLATE_RELATIONS = NO
-
-# If the ENABLE_PREPROCESSING, SEARCH_INCLUDES, INCLUDE_GRAPH, and HAVE_DOT
-# tags are set to YES then doxygen will generate a graph for each documented
-# file showing the direct and indirect include dependencies of the file with
-# other documented files.
-
-INCLUDE_GRAPH = YES
-
-# If the ENABLE_PREPROCESSING, SEARCH_INCLUDES, INCLUDED_BY_GRAPH, and
-# HAVE_DOT tags are set to YES then doxygen will generate a graph for each
-# documented header file showing the documented files that directly or
-# indirectly include this file.
-
-INCLUDED_BY_GRAPH = YES
-
-# If the CALL_GRAPH and HAVE_DOT options are set to YES then
-# doxygen will generate a call dependency graph for every global function
-# or class method. Note that enabling this option will significantly increase
-# the time of a run. So in most cases it will be better to enable call graphs
-# for selected functions only using the \callgraph command.
-
-CALL_GRAPH = NO
-
-# If the CALLER_GRAPH and HAVE_DOT tags are set to YES then
-# doxygen will generate a caller dependency graph for every global function
-# or class method. Note that enabling this option will significantly increase
-# the time of a run. So in most cases it will be better to enable caller
-# graphs for selected functions only using the \callergraph command.
-
-CALLER_GRAPH = NO
-
-# If the GRAPHICAL_HIERARCHY and HAVE_DOT tags are set to YES then doxygen
-# will graphical hierarchy of all classes instead of a textual one.
-
-GRAPHICAL_HIERARCHY = YES
-
-# If the DIRECTORY_GRAPH, SHOW_DIRECTORIES and HAVE_DOT tags are set to YES
-# then doxygen will show the dependencies a directory has on other directories
-# in a graphical way. The dependency relations are determined by the #include
-# relations between the files in the directories.
-
-DIRECTORY_GRAPH = YES
-
-# The DOT_IMAGE_FORMAT tag can be used to set the image format of the images
-# generated by dot. Possible values are png, jpg, or gif
-# If left blank png will be used.
-
-DOT_IMAGE_FORMAT = png
-
-# The tag DOT_PATH can be used to specify the path where the dot tool can be
-# found. If left blank, it is assumed the dot tool can be found in the path.
-
-DOT_PATH =
-
-# The DOTFILE_DIRS tag can be used to specify one or more directories that
-# contain dot files that are included in the documentation (see the
-# \dotfile command).
-
-DOTFILE_DIRS =
-
-# The DOT_GRAPH_MAX_NODES tag can be used to set the maximum number of
-# nodes that will be shown in the graph. If the number of nodes in a graph
-# becomes larger than this value, doxygen will truncate the graph, which is
-# visualized by representing a node as a red box. Note that doxygen if the
-# number of direct children of the root node in a graph is already larger than
-# DOT_GRAPH_MAX_NODES then the graph will not be shown at all. Also note
-# that the size of a graph can be further restricted by MAX_DOT_GRAPH_DEPTH.
-
-DOT_GRAPH_MAX_NODES = 50
-
-# The MAX_DOT_GRAPH_DEPTH tag can be used to set the maximum depth of the
-# graphs generated by dot. A depth value of 3 means that only nodes reachable
-# from the root by following a path via at most 3 edges will be shown. Nodes
-# that lay further from the root node will be omitted. Note that setting this
-# option to 1 or 2 may greatly reduce the computation time needed for large
-# code bases. Also note that the size of a graph can be further restricted by
-# DOT_GRAPH_MAX_NODES. Using a depth of 0 means no depth restriction.
-
-MAX_DOT_GRAPH_DEPTH = 3
-
-# Set the DOT_TRANSPARENT tag to YES to generate images with a transparent
-# background. This is enabled by default, which results in a transparent
-# background. Warning: Depending on the platform used, enabling this option
-# may lead to badly anti-aliased labels on the edges of a graph (i.e. they
-# become hard to read).
-
-DOT_TRANSPARENT = YES
-
-# Set the DOT_MULTI_TARGETS tag to YES allow dot to generate multiple output
-# files in one run (i.e. multiple -o and -T options on the command line). This
-# makes dot run faster, but since only newer versions of dot (>1.8.10)
-# support this, this feature is disabled by default.
-
-DOT_MULTI_TARGETS = NO
-
-# If the GENERATE_LEGEND tag is set to YES (the default) Doxygen will
-# generate a legend page explaining the meaning of the various boxes and
-# arrows in the dot generated graphs.
-
-GENERATE_LEGEND = YES
-
-# If the DOT_CLEANUP tag is set to YES (the default) Doxygen will
-# remove the intermediate dot files that are used to generate
-# the various graphs.
-
-DOT_CLEANUP = YES
-
-#---------------------------------------------------------------------------
-# Configuration::additions related to the search engine
-#---------------------------------------------------------------------------
-
-# The SEARCHENGINE tag specifies whether or not a search engine should be
-# used. If set to NO the values of all tags below this one will be ignored.
-
-SEARCHENGINE = NO
diff --git a/storage/innobase/api/api0api.cc b/storage/innobase/api/api0api.cc
deleted file mode 100644
index 98d45c2000f..00000000000
--- a/storage/innobase/api/api0api.cc
+++ /dev/null
@@ -1,3886 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 2008, 2015, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file api/api0api.cc
-InnoDB Native API
-
-2008-08-01 Created Sunny Bains
-3/20/2011 Jimmy Yang extracted from Embedded InnoDB
-*******************************************************/
-
-#include "univ.i"
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <stdarg.h>
-#ifdef HAVE_UNISTD_H
-#include <unistd.h>
-#endif
-
-#include "api0api.h"
-#include "api0misc.h"
-#include "srv0start.h"
-#include "dict0dict.h"
-#include "btr0pcur.h"
-#include "row0ins.h"
-#include "row0upd.h"
-#include "row0vers.h"
-#include "trx0roll.h"
-#include "dict0crea.h"
-#include "row0merge.h"
-#include "pars0pars.h"
-#include "lock0types.h"
-#include "row0sel.h"
-#include "lock0lock.h"
-#include "rem0cmp.h"
-#include "ut0dbg.h"
-#include "dict0priv.h"
-#include "ut0ut.h"
-#include "ha_prototypes.h"
-#include "trx0roll.h"
-
-/** configure variable for binlog option with InnoDB APIs */
-my_bool ib_binlog_enabled = FALSE;
-
-/** configure variable for MDL option with InnoDB APIs */
-my_bool ib_mdl_enabled = FALSE;
-
-/** configure variable for disable rowlock with InnoDB APIs */
-my_bool ib_disable_row_lock = FALSE;
-
-/** configure variable for Transaction isolation levels */
-ulong ib_trx_level_setting = IB_TRX_READ_UNCOMMITTED;
-
-/** configure variable for background commit interval in seconds */
-ulong ib_bk_commit_interval = 0;
-
-/** InnoDB tuple types. */
-enum ib_tuple_type_t{
- TPL_TYPE_ROW, /*!< Data row tuple */
- TPL_TYPE_KEY /*!< Index key tuple */
-};
-
-/** Query types supported. */
-enum ib_qry_type_t{
- QRY_NON, /*!< None/Sentinel */
- QRY_INS, /*!< Insert operation */
- QRY_UPD, /*!< Update operation */
- QRY_SEL /*!< Select operation */
-};
-
-/** Query graph types. */
-struct ib_qry_grph_t {
- que_fork_t* ins; /*!< Innobase SQL query graph used
- in inserts */
- que_fork_t* upd; /*!< Innobase SQL query graph used
- in updates or deletes */
- que_fork_t* sel; /*!< dummy query graph used in
- selects */
-};
-
-/** Query node types. */
-struct ib_qry_node_t {
- ins_node_t* ins; /*!< Innobase SQL insert node
- used to perform inserts to the table */
- upd_node_t* upd; /*!< Innobase SQL update node
- used to perform updates and deletes */
- sel_node_t* sel; /*!< Innobase SQL select node
- used to perform selects on the table */
-};
-
-/** Query processing fields. */
-struct ib_qry_proc_t {
-
- ib_qry_node_t node; /*!< Query node*/
-
- ib_qry_grph_t grph; /*!< Query graph */
-};
-
-/** Cursor instance for traversing tables/indexes. This will eventually
-become row_prebuilt_t. */
-struct ib_cursor_t {
- mem_heap_t* heap; /*!< Instance heap */
-
- mem_heap_t* query_heap; /*!< Heap to use for query graphs */
-
- ib_qry_proc_t q_proc; /*!< Query processing info */
-
- ib_match_mode_t match_mode; /*!< ib_cursor_moveto match mode */
-
- row_prebuilt_t* prebuilt; /*!< For reading rows */
-
- bool valid_trx; /*!< Valid transaction attached */
-};
-
-/** InnoDB table columns used during table and index schema creation. */
-struct ib_col_t {
- const char* name; /*!< Name of column */
-
- ib_col_type_t ib_col_type; /*!< Main type of the column */
-
- ulint len; /*!< Length of the column */
-
- ib_col_attr_t ib_col_attr; /*!< Column attributes */
-
-};
-
-/** InnoDB index columns used during index and index schema creation. */
-struct ib_key_col_t {
- const char* name; /*!< Name of column */
-
- ulint prefix_len; /*!< Column index prefix len or 0 */
-};
-
-struct ib_table_def_t;
-
-/** InnoDB index schema used during index creation */
-struct ib_index_def_t {
- mem_heap_t* heap; /*!< Heap used to build this and all
- its columns in the list */
-
- const char* name; /*!< Index name */
-
- dict_table_t* table; /*!< Parent InnoDB table */
-
- ib_table_def_t* schema; /*!< Parent table schema that owns
- this instance */
-
- ibool clustered; /*!< True if clustered index */
-
- ibool unique; /*!< True if unique index */
-
- ib_vector_t* cols; /*!< Vector of columns */
-
- trx_t* usr_trx; /*!< User transacton covering the
- DDL operations */
-};
-
-/** InnoDB table schema used during table creation */
-struct ib_table_def_t {
- mem_heap_t* heap; /*!< Heap used to build this and all
- its columns in the list */
- const char* name; /*!< Table name */
-
- ib_tbl_fmt_t ib_tbl_fmt; /*!< Row format */
-
- ulint page_size; /*!< Page size */
-
- ib_vector_t* cols; /*!< Vector of columns */
-
- ib_vector_t* indexes; /*!< Vector of indexes */
-
- dict_table_t* table; /* Table read from or NULL */
-};
-
-/** InnoDB tuple used for key operations. */
-struct ib_tuple_t {
- mem_heap_t* heap; /*!< Heap used to build
- this and for copying
- the column values. */
-
- ib_tuple_type_t type; /*!< Tuple discriminitor. */
-
- const dict_index_t* index; /*!< Index for tuple can be either
- secondary or cluster index. */
-
- dtuple_t* ptr; /*!< The internal tuple
- instance */
-};
-
-/** The following counter is used to convey information to InnoDB
-about server activity: in case of normal DML ops it is not
-sensible to call srv_active_wake_master_thread after each
-operation, we only do it every INNOBASE_WAKE_INTERVAL'th step. */
-
-#define INNOBASE_WAKE_INTERVAL 32
-
-/*****************************************************************//**
-Check whether the Innodb persistent cursor is positioned.
-@return IB_TRUE if positioned */
-UNIV_INLINE
-ib_bool_t
-ib_btr_cursor_is_positioned(
-/*========================*/
- btr_pcur_t* pcur) /*!< in: InnoDB persistent cursor */
-{
- return(pcur->old_stored == BTR_PCUR_OLD_STORED
- && (pcur->pos_state == BTR_PCUR_IS_POSITIONED
- || pcur->pos_state == BTR_PCUR_WAS_POSITIONED));
-}
-
-
-/********************************************************************//**
-Open a table using the table id, if found then increment table ref count.
-@return table instance if found */
-static
-dict_table_t*
-ib_open_table_by_id(
-/*================*/
- ib_id_u64_t tid, /*!< in: table id to lookup */
- ib_bool_t locked) /*!< in: TRUE if own dict mutex */
-{
- dict_table_t* table;
- table_id_t table_id;
-
- table_id = tid;
-
- if (!locked) {
- dict_mutex_enter_for_mysql();
- }
-
- table = dict_table_open_on_id(table_id, TRUE, DICT_TABLE_OP_NORMAL);
-
- if (table != NULL && table->file_unreadable) {
- table = NULL;
- }
-
- if (!locked) {
- dict_mutex_exit_for_mysql();
- }
-
- return(table);
-}
-
-/********************************************************************//**
-Open a table using the table name, if found then increment table ref count.
-@return table instance if found */
-UNIV_INTERN
-void*
-ib_open_table_by_name(
-/*==================*/
- const char* name) /*!< in: table name to lookup */
-{
- dict_table_t* table;
-
- table = dict_table_open_on_name(name, FALSE, FALSE,
- DICT_ERR_IGNORE_NONE);
-
- if (table != NULL && table->file_unreadable) {
- table = NULL;
- }
-
- return(table);
-}
-
-/********************************************************************//**
-Find table using table name.
-@return table instance if found */
-static
-dict_table_t*
-ib_lookup_table_by_name(
-/*====================*/
- const char* name) /*!< in: table name to lookup */
-{
- dict_table_t* table;
-
- table = dict_table_get_low(name);
-
- if (table != NULL && table->file_unreadable) {
- table = NULL;
- }
-
- return(table);
-}
-
-/********************************************************************//**
-Increments innobase_active_counter and every INNOBASE_WAKE_INTERVALth
-time calls srv_active_wake_master_thread. This function should be used
-when a single database operation may introduce a small need for
-server utility activity, like checkpointing. */
-UNIV_INLINE
-void
-ib_wake_master_thread(void)
-/*=======================*/
-{
- static ulint ib_signal_counter = 0;
-
- ++ib_signal_counter;
-
- if ((ib_signal_counter % INNOBASE_WAKE_INTERVAL) == 0) {
- srv_active_wake_master_thread();
- }
-}
-
-/*****************************************************************//**
-Read the columns from a rec into a tuple. */
-static
-void
-ib_read_tuple(
-/*==========*/
- const rec_t* rec, /*!< in: Record to read */
- ib_bool_t page_format, /*!< in: IB_TRUE if compressed format */
- ib_tuple_t* tuple, /*!< in: tuple to read into */
- void** rec_buf, /*!< in/out: row buffer */
- ulint* len) /*!< in/out: buffer len */
-{
- ulint i;
- void* ptr;
- rec_t* copy;
- ulint rec_meta_data;
- ulint n_index_fields;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- ulint* offsets = offsets_;
- dtuple_t* dtuple = tuple->ptr;
- const dict_index_t* index = tuple->index;
- ulint offset_size;
-
- rec_offs_init(offsets_);
-
- offsets = rec_get_offsets(
- rec, index, offsets, ULINT_UNDEFINED, &tuple->heap);
-
- rec_meta_data = rec_get_info_bits(rec, page_format);
- dtuple_set_info_bits(dtuple, rec_meta_data);
-
- offset_size = rec_offs_size(offsets);
-
- if (rec_buf && *rec_buf) {
- if (*len < offset_size) {
- free(*rec_buf);
- *rec_buf = malloc(offset_size);
- *len = offset_size;
- }
- ptr = *rec_buf;
- } else {
- /* Make a copy of the rec. */
- ptr = mem_heap_alloc(tuple->heap, offset_size);
- }
-
- copy = rec_copy(ptr, rec, offsets);
-
- n_index_fields = ut_min(
- rec_offs_n_fields(offsets), dtuple_get_n_fields(dtuple));
-
- for (i = 0; i < n_index_fields; ++i) {
- ulint len;
- const byte* data;
- dfield_t* dfield;
-
- if (tuple->type == TPL_TYPE_ROW) {
- const dict_col_t* col;
- ulint col_no;
- const dict_field_t* index_field;
-
- index_field = dict_index_get_nth_field(index, i);
- col = dict_field_get_col(index_field);
- col_no = dict_col_get_no(col);
-
- dfield = dtuple_get_nth_field(dtuple, col_no);
- } else {
- dfield = dtuple_get_nth_field(dtuple, i);
- }
-
- data = rec_get_nth_field(copy, offsets, i, &len);
-
- /* Fetch and copy any externally stored column. */
- if (rec_offs_nth_extern(offsets, i)) {
-
- ulint zip_size;
-
- zip_size = dict_table_zip_size(index->table);
-
- data = btr_rec_copy_externally_stored_field(
- copy, offsets, zip_size, i, &len,
- tuple->heap);
-
- ut_a(len != UNIV_SQL_NULL);
- }
-
- dfield_set_data(dfield, data, len);
- }
-}
-
-/*****************************************************************//**
-Create an InnoDB key tuple.
-@return tuple instance created, or NULL */
-static
-ib_tpl_t
-ib_key_tuple_new_low(
-/*=================*/
- const dict_index_t* index, /*!< in: index for which tuple
- required */
- ulint n_cols, /*!< in: no. of user defined cols */
- mem_heap_t* heap) /*!< in: memory heap */
-{
- ib_tuple_t* tuple;
- ulint i;
- ulint n_cmp_cols;
-
- tuple = static_cast<ib_tuple_t*>(
- mem_heap_alloc(heap, sizeof(*tuple)));
-
- if (tuple == NULL) {
- mem_heap_free(heap);
- return(NULL);
- }
-
- tuple->heap = heap;
- tuple->index = index;
- tuple->type = TPL_TYPE_KEY;
-
- /* Is it a generated clustered index ? */
- if (n_cols == 0) {
- ++n_cols;
- }
-
- tuple->ptr = dtuple_create(heap, n_cols);
-
- /* Copy types and set to SQL_NULL. */
- dict_index_copy_types(tuple->ptr, index, n_cols);
-
- for (i = 0; i < n_cols; i++) {
-
- dfield_t* dfield;
-
- dfield = dtuple_get_nth_field(tuple->ptr, i);
- dfield_set_null(dfield);
- }
-
- n_cmp_cols = dict_index_get_n_ordering_defined_by_user(index);
-
- dtuple_set_n_fields_cmp(tuple->ptr, n_cmp_cols);
-
- return((ib_tpl_t) tuple);
-}
-
-/*****************************************************************//**
-Create an InnoDB key tuple.
-@return tuple instance created, or NULL */
-static
-ib_tpl_t
-ib_key_tuple_new(
-/*=============*/
- const dict_index_t* index, /*!< in: index of tuple */
- ulint n_cols) /*!< in: no. of user defined cols */
-{
- mem_heap_t* heap;
-
- heap = mem_heap_create(64);
-
- if (heap == NULL) {
- return(NULL);
- }
-
- return(ib_key_tuple_new_low(index, n_cols, heap));
-}
-
-/*****************************************************************//**
-Create an InnoDB row tuple.
-@return tuple instance, or NULL */
-static
-ib_tpl_t
-ib_row_tuple_new_low(
-/*=================*/
- const dict_index_t* index, /*!< in: index of tuple */
- ulint n_cols, /*!< in: no. of cols in tuple */
- mem_heap_t* heap) /*!< in: memory heap */
-{
- ib_tuple_t* tuple;
-
- tuple = static_cast<ib_tuple_t*>(mem_heap_alloc(heap, sizeof(*tuple)));
-
- if (tuple == NULL) {
- mem_heap_free(heap);
- return(NULL);
- }
-
- tuple->heap = heap;
- tuple->index = index;
- tuple->type = TPL_TYPE_ROW;
-
- tuple->ptr = dtuple_create(heap, n_cols);
-
- /* Copy types and set to SQL_NULL. */
- dict_table_copy_types(tuple->ptr, index->table);
-
- return((ib_tpl_t) tuple);
-}
-
-/*****************************************************************//**
-Create an InnoDB row tuple.
-@return tuple instance, or NULL */
-static
-ib_tpl_t
-ib_row_tuple_new(
-/*=============*/
- const dict_index_t* index, /*!< in: index of tuple */
- ulint n_cols) /*!< in: no. of cols in tuple */
-{
- mem_heap_t* heap;
-
- heap = mem_heap_create(64);
-
- if (heap == NULL) {
- return(NULL);
- }
-
- return(ib_row_tuple_new_low(index, n_cols, heap));
-}
-
-/*****************************************************************//**
-Begin a transaction.
-@return innobase txn handle */
-UNIV_INTERN
-ib_err_t
-ib_trx_start(
-/*=========*/
- ib_trx_t ib_trx, /*!< in: transaction to restart */
- ib_trx_level_t ib_trx_level, /*!< in: trx isolation level */
- ib_bool_t read_write, /*!< in: true if read write
- transaction */
- ib_bool_t auto_commit, /*!< in: auto commit after each
- single DML */
- void* thd) /*!< in: THD */
-{
- ib_err_t err = DB_SUCCESS;
- trx_t* trx = (trx_t*) ib_trx;
-
- ut_a(ib_trx_level <= IB_TRX_SERIALIZABLE);
-
- trx->api_trx = true;
- trx->api_auto_commit = auto_commit;
- trx->read_write = read_write;
-
- trx_start_if_not_started(trx);
-
- trx->isolation_level = ib_trx_level;
-
- /* FIXME: This is a place holder, we should add an arg that comes
- from the client. */
- trx->mysql_thd = static_cast<THD*>(thd);
-
- return(err);
-}
-
-/*****************************************************************//**
-Begin a transaction. This will allocate a new transaction handle.
-put the transaction in the active state.
-@return innobase txn handle */
-UNIV_INTERN
-ib_trx_t
-ib_trx_begin(
-/*=========*/
- ib_trx_level_t ib_trx_level, /*!< in: trx isolation level */
- ib_bool_t read_write, /*!< in: true if read write
- transaction */
- ib_bool_t auto_commit) /*!< in: auto commit after each
- single DML */
-{
- trx_t* trx;
- ib_bool_t started;
-
- trx = trx_allocate_for_mysql();
-
- started = ib_trx_start(static_cast<ib_trx_t>(trx), ib_trx_level,
- read_write, auto_commit, NULL);
- ut_a(started);
-
- return(static_cast<ib_trx_t>(trx));
-}
-
-
-/*****************************************************************//**
-Check if transaction is read_only
-@return transaction read_only status */
-UNIV_INTERN
-ib_u32_t
-ib_trx_read_only(
-/*=============*/
- ib_trx_t ib_trx) /*!< in: trx handle */
-{
- trx_t* trx = (trx_t*) ib_trx;
-
- return(trx->read_only);
-}
-
-/*****************************************************************//**
-Get the transaction's state.
-@return transaction state */
-UNIV_INTERN
-ib_trx_state_t
-ib_trx_state(
-/*=========*/
- ib_trx_t ib_trx) /*!< in: trx handle */
-{
- trx_t* trx = (trx_t*) ib_trx;
-
- return((ib_trx_state_t) trx->state);
-}
-
-/*****************************************************************//**
-Get a trx start time.
-@return trx start_time */
-UNIV_INTERN
-ib_u64_t
-ib_trx_get_start_time(
-/*==================*/
- ib_trx_t ib_trx) /*!< in: transaction */
-{
- trx_t* trx = (trx_t*) ib_trx;
- return(static_cast<ib_u64_t>(trx->start_time));
-}
-/*****************************************************************//**
-Release the resources of the transaction.
-@return DB_SUCCESS or err code */
-UNIV_INTERN
-ib_err_t
-ib_trx_release(
-/*===========*/
- ib_trx_t ib_trx) /*!< in: trx handle */
-{
- trx_t* trx = (trx_t*) ib_trx;
-
- ut_ad(trx != NULL);
- trx_free_for_mysql(trx);
-
- return(DB_SUCCESS);
-}
-
-/*****************************************************************//**
-Commit a transaction. This function will also release the schema
-latches too.
-@return DB_SUCCESS or err code */
-
-ib_err_t
-ib_trx_commit(
-/*==========*/
- ib_trx_t ib_trx) /*!< in: trx handle */
-{
- ib_err_t err = DB_SUCCESS;
- trx_t* trx = (trx_t*) ib_trx;
-
- if (trx->state == TRX_STATE_NOT_STARTED) {
- return(err);
- }
-
- trx_commit(trx);
-
- return(DB_SUCCESS);
-}
-
-/*****************************************************************//**
-Rollback a transaction. This function will also release the schema
-latches too.
-@return DB_SUCCESS or err code */
-UNIV_INTERN
-ib_err_t
-ib_trx_rollback(
-/*============*/
- ib_trx_t ib_trx) /*!< in: trx handle */
-{
- ib_err_t err;
- trx_t* trx = (trx_t*) ib_trx;
-
- err = static_cast<ib_err_t>(trx_rollback_for_mysql(trx));
-
- /* It should always succeed */
- ut_a(err == DB_SUCCESS);
-
- return(err);
-}
-
-#ifdef __WIN__
-/*****************************************************************//**
-Convert a string to lower case. */
-static
-void
-ib_to_lower_case(
-/*=============*/
- char* ptr) /*!< string to convert to lower case */
-{
- while (*ptr) {
- *ptr = tolower(*ptr);
- ++ptr;
- }
-}
-#endif /* __WIN__ */
-
-/*****************************************************************//**
-Normalizes a table name string. A normalized name consists of the
-database name catenated to '/' and table name. An example:
-test/mytable. On Windows normalization puts both the database name and the
-table name always to lower case. This function can be called for system
-tables and they don't have a database component. For tables that don't have
-a database component, we don't normalize them to lower case on Windows.
-The assumption is that they are system tables that reside in the system
-table space. */
-static
-void
-ib_normalize_table_name(
-/*====================*/
- char* norm_name, /*!< out: normalized name as a
- null-terminated string */
- const char* name) /*!< in: table name string */
-{
- const char* ptr = name;
-
- /* Scan name from the end */
-
- ptr += ut_strlen(name) - 1;
-
- /* Find the start of the table name. */
- while (ptr >= name && *ptr != '\\' && *ptr != '/' && ptr > name) {
- --ptr;
- }
-
-
- /* For system tables there is no '/' or dbname. */
- ut_a(ptr >= name);
-
- if (ptr > name) {
- const char* db_name;
- const char* table_name;
-
- table_name = ptr + 1;
-
- --ptr;
-
- while (ptr >= name && *ptr != '\\' && *ptr != '/') {
- ptr--;
- }
-
- db_name = ptr + 1;
-
- memcpy(norm_name, db_name,
- ut_strlen(name) + 1 - (db_name - name));
-
- norm_name[table_name - db_name - 1] = '/';
-#ifdef __WIN__
- ib_to_lower_case(norm_name);
-#endif
- } else {
- ut_strcpy(norm_name, name);
- }
-}
-
-/*****************************************************************//**
-Check whether the table name conforms to our requirements. Currently
-we only do a simple check for the presence of a '/'.
-@return DB_SUCCESS or err code */
-UNIV_INTERN
-ib_err_t
-ib_table_name_check(
-/*================*/
- const char* name) /*!< in: table name to check */
-{
- const char* slash = NULL;
- ulint len = ut_strlen(name);
-
- if (len < 2
- || *name == '/'
- || name[len - 1] == '/'
- || (name[0] == '.' && name[1] == '/')
- || (name[0] == '.' && name[1] == '.' && name[2] == '/')) {
-
- return(DB_DATA_MISMATCH);
- }
-
- for ( ; *name; ++name) {
-#ifdef __WIN__
- /* Check for reserved characters in DOS filenames. */
- switch (*name) {
- case ':':
- case '|':
- case '"':
- case '*':
- case '<':
- case '>':
- return(DB_DATA_MISMATCH);
- }
-#endif /* __WIN__ */
- if (*name == '/') {
- if (slash) {
- return(DB_DATA_MISMATCH);
- }
- slash = name;
- }
- }
-
- return(slash ? DB_SUCCESS : DB_DATA_MISMATCH);
-}
-
-
-
-/*****************************************************************//**
-Get a table id. The caller must have acquired the dictionary mutex.
-@return DB_SUCCESS if found */
-static
-ib_err_t
-ib_table_get_id_low(
-/*================*/
- const char* table_name, /*!< in: table to find */
- ib_id_u64_t* table_id) /*!< out: table id if found */
-{
- dict_table_t* table;
- ib_err_t err = DB_TABLE_NOT_FOUND;
-
- *table_id = 0;
-
- table = ib_lookup_table_by_name(table_name);
-
- if (table != NULL) {
- *table_id = (table->id);
-
- err = DB_SUCCESS;
- }
-
- return(err);
-}
-
-/*****************************************************************//**
-Create an internal cursor instance.
-@return DB_SUCCESS or err code */
-static
-ib_err_t
-ib_create_cursor(
-/*=============*/
- ib_crsr_t* ib_crsr, /*!< out: InnoDB cursor */
- dict_table_t* table, /*!< in: table instance */
- dict_index_t* index, /*!< in: index to use */
- trx_t* trx) /*!< in: transaction */
-{
- mem_heap_t* heap;
- ib_cursor_t* cursor;
- ib_err_t err = DB_SUCCESS;
-
- heap = mem_heap_create(sizeof(*cursor) * 2);
-
- if (heap != NULL) {
- row_prebuilt_t* prebuilt;
-
- cursor = static_cast<ib_cursor_t*>(
- mem_heap_zalloc(heap, sizeof(*cursor)));
-
- cursor->heap = heap;
-
- cursor->query_heap = mem_heap_create(64);
-
- if (cursor->query_heap == NULL) {
- mem_heap_free(heap);
-
- return(DB_OUT_OF_MEMORY);
- }
-
- cursor->prebuilt = row_create_prebuilt(table, 0);
-
- prebuilt = cursor->prebuilt;
-
- prebuilt->trx = trx;
-
- cursor->valid_trx = TRUE;
-
- prebuilt->table = table;
- prebuilt->select_lock_type = LOCK_NONE;
- prebuilt->innodb_api = TRUE;
-
- prebuilt->index = index;
-
- ut_a(prebuilt->index != NULL);
-
- if (prebuilt->trx != NULL) {
- ++prebuilt->trx->n_mysql_tables_in_use;
-
- prebuilt->index_usable =
- row_merge_is_index_usable(
- prebuilt->trx, prebuilt->index);
-
- /* Assign a read view if the transaction does
- not have it yet */
-
- trx_assign_read_view(prebuilt->trx);
- }
-
- *ib_crsr = (ib_crsr_t) cursor;
- } else {
- err = DB_OUT_OF_MEMORY;
- }
-
- return(err);
-}
-
-/*****************************************************************//**
-Create an internal cursor instance, and set prebuilt->index to index
-with supplied index_id.
-@return DB_SUCCESS or err code */
-static
-ib_err_t
-ib_create_cursor_with_index_id(
-/*===========================*/
- ib_crsr_t* ib_crsr, /*!< out: InnoDB cursor */
- dict_table_t* table, /*!< in: table instance */
- ib_id_u64_t index_id, /*!< in: index id or 0 */
- trx_t* trx) /*!< in: transaction */
-{
- dict_index_t* index;
-
- if (index_id != 0) {
- mutex_enter(&dict_sys->mutex);
- index = dict_index_find_on_id_low(index_id);
- mutex_exit(&dict_sys->mutex);
- } else {
- index = dict_table_get_first_index(table);
- }
-
- return(ib_create_cursor(ib_crsr, table, index, trx));
-}
-
-/*****************************************************************//**
-Open an InnoDB table and return a cursor handle to it.
-@return DB_SUCCESS or err code */
-UNIV_INTERN
-ib_err_t
-ib_cursor_open_table_using_id(
-/*==========================*/
- ib_id_u64_t table_id, /*!< in: table id of table to open */
- ib_trx_t ib_trx, /*!< in: Current transaction handle
- can be NULL */
- ib_crsr_t* ib_crsr) /*!< out,own: InnoDB cursor */
-{
- ib_err_t err;
- dict_table_t* table;
-
- if (ib_trx == NULL || !ib_schema_lock_is_exclusive(ib_trx)) {
- table = ib_open_table_by_id(table_id, FALSE);
- } else {
- table = ib_open_table_by_id(table_id, TRUE);
- }
-
- if (table == NULL) {
-
- return(DB_TABLE_NOT_FOUND);
- }
-
- err = ib_create_cursor_with_index_id(ib_crsr, table, 0,
- (trx_t*) ib_trx);
-
- return(err);
-}
-
-/*****************************************************************//**
-Open an InnoDB index and return a cursor handle to it.
-@return DB_SUCCESS or err code */
-UNIV_INTERN
-ib_err_t
-ib_cursor_open_index_using_id(
-/*==========================*/
- ib_id_u64_t index_id, /*!< in: index id of index to open */
- ib_trx_t ib_trx, /*!< in: Current transaction handle
- can be NULL */
- ib_crsr_t* ib_crsr) /*!< out: InnoDB cursor */
-{
- ib_err_t err;
- dict_table_t* table;
- ulint table_id = (ulint)( index_id >> 32);
-
- if (ib_trx == NULL || !ib_schema_lock_is_exclusive(ib_trx)) {
- table = ib_open_table_by_id(table_id, FALSE);
- } else {
- table = ib_open_table_by_id(table_id, TRUE);
- }
-
- if (table == NULL) {
-
- return(DB_TABLE_NOT_FOUND);
- }
-
- /* We only return the lower 32 bits of the dulint. */
- err = ib_create_cursor_with_index_id(
- ib_crsr, table, index_id, (trx_t*) ib_trx);
-
- if (ib_crsr != NULL) {
- const ib_cursor_t* cursor;
-
- cursor = *(ib_cursor_t**) ib_crsr;
-
- if (cursor->prebuilt->index == NULL) {
- ib_err_t crsr_err;
-
- crsr_err = ib_cursor_close(*ib_crsr);
- ut_a(crsr_err == DB_SUCCESS);
-
- *ib_crsr = NULL;
- }
- }
-
- return(err);
-}
-
-/*****************************************************************//**
-Open an InnoDB secondary index cursor and return a cursor handle to it.
-@return DB_SUCCESS or err code */
-UNIV_INTERN
-ib_err_t
-ib_cursor_open_index_using_name(
-/*============================*/
- ib_crsr_t ib_open_crsr, /*!< in: open/active cursor */
- const char* index_name, /*!< in: secondary index name */
- ib_crsr_t* ib_crsr, /*!< out,own: InnoDB index cursor */
- int* idx_type, /*!< out: index is cluster index */
- ib_id_u64_t* idx_id) /*!< out: index id */
-{
- dict_table_t* table;
- dict_index_t* index;
- index_id_t index_id = 0;
- ib_err_t err = DB_TABLE_NOT_FOUND;
- ib_cursor_t* cursor = (ib_cursor_t*) ib_open_crsr;
-
- *idx_type = 0;
- *idx_id = 0;
- *ib_crsr = NULL;
-
- /* We want to increment the ref count, so we do a redundant search. */
- table = dict_table_open_on_id(cursor->prebuilt->table->id,
- FALSE, DICT_TABLE_OP_NORMAL);
- ut_a(table != NULL);
-
- /* The first index is always the cluster index. */
- index = dict_table_get_first_index(table);
-
- /* Traverse the user defined indexes. */
- while (index != NULL) {
- if (innobase_strcasecmp(index->name, index_name) == 0) {
- index_id = index->id;
- *idx_type = index->type;
- *idx_id = index_id;
- break;
- }
- index = UT_LIST_GET_NEXT(indexes, index);
- }
-
- if (!index_id) {
- dict_table_close(table, FALSE, FALSE);
- return(DB_ERROR);
- }
-
- if (index_id > 0) {
- ut_ad(index->id == index_id);
- err = ib_create_cursor(
- ib_crsr, table, index, cursor->prebuilt->trx);
- }
-
- if (*ib_crsr != NULL) {
- const ib_cursor_t* cursor;
-
- cursor = *(ib_cursor_t**) ib_crsr;
-
- if (cursor->prebuilt->index == NULL) {
- err = ib_cursor_close(*ib_crsr);
- ut_a(err == DB_SUCCESS);
- *ib_crsr = NULL;
- }
- }
-
- return(err);
-}
-
-/*****************************************************************//**
-Open an InnoDB table and return a cursor handle to it.
-@return DB_SUCCESS or err code */
-UNIV_INTERN
-ib_err_t
-ib_cursor_open_table(
-/*=================*/
- const char* name, /*!< in: table name */
- ib_trx_t ib_trx, /*!< in: Current transaction handle
- can be NULL */
- ib_crsr_t* ib_crsr) /*!< out,own: InnoDB cursor */
-{
- ib_err_t err;
- dict_table_t* table;
- char* normalized_name;
-
- normalized_name = static_cast<char*>(mem_alloc(ut_strlen(name) + 1));
- ib_normalize_table_name(normalized_name, name);
-
- if (ib_trx != NULL) {
- if (!ib_schema_lock_is_exclusive(ib_trx)) {
- table = (dict_table_t*)ib_open_table_by_name(
- normalized_name);
- } else {
- /* NOTE: We do not acquire MySQL metadata lock */
- table = ib_lookup_table_by_name(normalized_name);
- }
- } else {
- table = (dict_table_t*)ib_open_table_by_name(normalized_name);
- }
-
- mem_free(normalized_name);
- normalized_name = NULL;
-
- /* It can happen that another thread has created the table but
- not the cluster index or it's a broken table definition. Refuse to
- open if that's the case. */
- if (table != NULL && dict_table_get_first_index(table) == NULL) {
- table = NULL;
- }
-
- if (table != NULL) {
- err = ib_create_cursor_with_index_id(ib_crsr, table, 0,
- (trx_t*) ib_trx);
- } else {
- err = DB_TABLE_NOT_FOUND;
- }
-
- return(err);
-}
-
-/********************************************************************//**
-Free a context struct for a table handle. */
-static
-void
-ib_qry_proc_free(
-/*=============*/
- ib_qry_proc_t* q_proc) /*!< in, own: qproc struct */
-{
- que_graph_free_recursive(q_proc->grph.ins);
- que_graph_free_recursive(q_proc->grph.upd);
- que_graph_free_recursive(q_proc->grph.sel);
-
- memset(q_proc, 0x0, sizeof(*q_proc));
-}
-
-/*****************************************************************//**
-set a cursor trx to NULL */
-UNIV_INTERN
-void
-ib_cursor_clear_trx(
-/*================*/
- ib_crsr_t ib_crsr) /*!< in/out: InnoDB cursor */
-{
- ib_cursor_t* cursor = (ib_cursor_t*) ib_crsr;
-
- cursor->prebuilt->trx = NULL;
-}
-
-/*****************************************************************//**
-Reset the cursor.
-@return DB_SUCCESS or err code */
-UNIV_INTERN
-ib_err_t
-ib_cursor_reset(
-/*============*/
- ib_crsr_t ib_crsr) /*!< in/out: InnoDB cursor */
-{
- ib_cursor_t* cursor = (ib_cursor_t*) ib_crsr;
- row_prebuilt_t* prebuilt = cursor->prebuilt;
-
- if (cursor->valid_trx && prebuilt->trx != NULL
- && prebuilt->trx->n_mysql_tables_in_use > 0) {
-
- --prebuilt->trx->n_mysql_tables_in_use;
- }
-
- /* The fields in this data structure are allocated from
- the query heap and so need to be reset too. */
- ib_qry_proc_free(&cursor->q_proc);
-
- mem_heap_empty(cursor->query_heap);
-
- return(DB_SUCCESS);
-}
-
-/*****************************************************************//**
-update the cursor with new transactions and also reset the cursor
-@return DB_SUCCESS or err code */
-ib_err_t
-ib_cursor_new_trx(
-/*==============*/
- ib_crsr_t ib_crsr, /*!< in/out: InnoDB cursor */
- ib_trx_t ib_trx) /*!< in: transaction */
-{
- ib_err_t err = DB_SUCCESS;
- ib_cursor_t* cursor = (ib_cursor_t*) ib_crsr;
- trx_t* trx = (trx_t*) ib_trx;
-
- row_prebuilt_t* prebuilt = cursor->prebuilt;
-
- row_update_prebuilt_trx(prebuilt, trx);
-
- cursor->valid_trx = TRUE;
-
- trx_assign_read_view(prebuilt->trx);
-
- ib_qry_proc_free(&cursor->q_proc);
-
- mem_heap_empty(cursor->query_heap);
-
- return(err);
-}
-
-/*****************************************************************//**
-Commit the transaction in a cursor
-@return DB_SUCCESS or err code */
-ib_err_t
-ib_cursor_commit_trx(
-/*=================*/
- ib_crsr_t ib_crsr, /*!< in/out: InnoDB cursor */
- ib_trx_t ib_trx) /*!< in: transaction */
-{
- ib_err_t err = DB_SUCCESS;
- ib_cursor_t* cursor = (ib_cursor_t*) ib_crsr;
-#ifdef UNIV_DEBUG
- row_prebuilt_t* prebuilt = cursor->prebuilt;
-
- ut_ad(prebuilt->trx == (trx_t*) ib_trx);
-#endif /* UNIV_DEBUG */
- ib_trx_commit(ib_trx);
- cursor->valid_trx = FALSE;
- return(err);
-}
-
-/*****************************************************************//**
-Close an InnoDB table and free the cursor.
-@return DB_SUCCESS or err code */
-UNIV_INTERN
-ib_err_t
-ib_cursor_close(
-/*============*/
- ib_crsr_t ib_crsr) /*!< in,own: InnoDB cursor */
-{
- ib_cursor_t* cursor = (ib_cursor_t*) ib_crsr;
- row_prebuilt_t* prebuilt;
- trx_t* trx;
-
- if (!cursor) {
- return(DB_SUCCESS);
- }
-
- prebuilt = cursor->prebuilt;
- trx = prebuilt->trx;
-
- ib_qry_proc_free(&cursor->q_proc);
-
- /* The transaction could have been detached from the cursor. */
- if (cursor->valid_trx && trx != NULL
- && trx->n_mysql_tables_in_use > 0) {
- --trx->n_mysql_tables_in_use;
- }
-
- row_prebuilt_free(prebuilt, FALSE);
- cursor->prebuilt = NULL;
-
- mem_heap_free(cursor->query_heap);
- mem_heap_free(cursor->heap);
- cursor = NULL;
-
- return(DB_SUCCESS);
-}
-
-/*****************************************************************//**
-Close the table, decrement n_ref_count count.
-@return DB_SUCCESS or err code */
-UNIV_INTERN
-ib_err_t
-ib_cursor_close_table(
-/*==================*/
- ib_crsr_t ib_crsr) /*!< in,own: InnoDB cursor */
-{
- ib_cursor_t* cursor = (ib_cursor_t*) ib_crsr;
- row_prebuilt_t* prebuilt = cursor->prebuilt;
-
- if (prebuilt && prebuilt->table) {
- dict_table_close(prebuilt->table, FALSE, FALSE);
- }
-
- return(DB_SUCCESS);
-}
-/**********************************************************************//**
-Run the insert query and do error handling.
-@return DB_SUCCESS or error code */
-UNIV_INLINE
-ib_err_t
-ib_insert_row_with_lock_retry(
-/*==========================*/
- que_thr_t* thr, /*!< in: insert query graph */
- ins_node_t* node, /*!< in: insert node for the query */
- trx_savept_t* savept) /*!< in: savepoint to rollback to
- in case of an error */
-{
- trx_t* trx;
- ib_err_t err;
- ib_bool_t lock_wait;
-
- trx = thr_get_trx(thr);
-
- do {
- thr->run_node = node;
- thr->prev_node = node;
-
- row_ins_step(thr);
-
- err = trx->error_state;
-
- if (err != DB_SUCCESS) {
- que_thr_stop_for_mysql(thr);
-
- thr->lock_state = QUE_THR_LOCK_ROW;
- lock_wait = static_cast<ib_bool_t>(
- ib_handle_errors(&err, trx, thr, savept));
- thr->lock_state = QUE_THR_LOCK_NOLOCK;
- } else {
- lock_wait = FALSE;
- }
- } while (lock_wait);
-
- return(err);
-}
-
-/*****************************************************************//**
-Write a row.
-@return DB_SUCCESS or err code */
-static
-ib_err_t
-ib_execute_insert_query_graph(
-/*==========================*/
- dict_table_t* table, /*!< in: table where to insert */
- que_fork_t* ins_graph, /*!< in: query graph */
- ins_node_t* node) /*!< in: insert node */
-{
- trx_t* trx;
- que_thr_t* thr;
- trx_savept_t savept;
- ib_err_t err = DB_SUCCESS;
-
- trx = ins_graph->trx;
-
- savept = trx_savept_take(trx);
-
- thr = que_fork_get_first_thr(ins_graph);
-
- que_thr_move_to_run_state_for_mysql(thr, trx);
-
- err = ib_insert_row_with_lock_retry(thr, node, &savept);
-
- if (err == DB_SUCCESS) {
- que_thr_stop_for_mysql_no_error(thr, trx);
-
- dict_table_n_rows_inc(table);
-
- if (table->is_system_db) {
- srv_stats.n_system_rows_inserted.inc();
- } else {
- srv_stats.n_rows_inserted.inc();
- }
- }
-
- trx->op_info = "";
-
- return(err);
-}
-
-/*****************************************************************//**
-Create an insert query graph node. */
-static
-void
-ib_insert_query_graph_create(
-/*==========================*/
- ib_cursor_t* cursor) /*!< in: Cursor instance */
-{
- ib_qry_proc_t* q_proc = &cursor->q_proc;
- ib_qry_node_t* node = &q_proc->node;
- trx_t* trx = cursor->prebuilt->trx;
-
- ut_a(trx->state != TRX_STATE_NOT_STARTED);
-
- if (node->ins == NULL) {
- dtuple_t* row;
- ib_qry_grph_t* grph = &q_proc->grph;
- mem_heap_t* heap = cursor->query_heap;
- dict_table_t* table = cursor->prebuilt->table;
-
- node->ins = ins_node_create(INS_DIRECT, table, heap);
-
- node->ins->select = NULL;
- node->ins->values_list = NULL;
-
- row = dtuple_create(heap, dict_table_get_n_cols(table));
- dict_table_copy_types(row, table);
-
- ins_node_set_new_row(node->ins, row);
-
- grph->ins = static_cast<que_fork_t*>(
- que_node_get_parent(
- pars_complete_graph_for_exec(node->ins, trx,
- heap)));
-
- grph->ins->state = QUE_FORK_ACTIVE;
- }
-}
-
-/*****************************************************************//**
-Insert a row to a table.
-@return DB_SUCCESS or err code */
-UNIV_INTERN
-ib_err_t
-ib_cursor_insert_row(
-/*=================*/
- ib_crsr_t ib_crsr, /*!< in/out: InnoDB cursor instance */
- const ib_tpl_t ib_tpl) /*!< in: tuple to insert */
-{
- ib_ulint_t i;
- ib_qry_node_t* node;
- ib_qry_proc_t* q_proc;
- ulint n_fields;
- dtuple_t* dst_dtuple;
- ib_err_t err = DB_SUCCESS;
- ib_cursor_t* cursor = (ib_cursor_t*) ib_crsr;
- const ib_tuple_t* src_tuple = (const ib_tuple_t*) ib_tpl;
-
- ib_insert_query_graph_create(cursor);
-
- ut_ad(src_tuple->type == TPL_TYPE_ROW);
-
- q_proc = &cursor->q_proc;
- node = &q_proc->node;
-
- node->ins->state = INS_NODE_ALLOC_ROW_ID;
- dst_dtuple = node->ins->row;
-
- n_fields = dtuple_get_n_fields(src_tuple->ptr);
- ut_ad(n_fields == dtuple_get_n_fields(dst_dtuple));
-
- /* Do a shallow copy of the data fields and check for NULL
- constraints on columns. */
- for (i = 0; i < n_fields; i++) {
- ulint mtype;
- dfield_t* src_field;
- dfield_t* dst_field;
-
- src_field = dtuple_get_nth_field(src_tuple->ptr, i);
-
- mtype = dtype_get_mtype(dfield_get_type(src_field));
-
- /* Don't touch the system columns. */
- if (mtype != DATA_SYS) {
- ulint prtype;
-
- prtype = dtype_get_prtype(dfield_get_type(src_field));
-
- if ((prtype & DATA_NOT_NULL)
- && dfield_is_null(src_field)) {
-
- err = DB_DATA_MISMATCH;
- break;
- }
-
- dst_field = dtuple_get_nth_field(dst_dtuple, i);
- ut_ad(mtype
- == dtype_get_mtype(dfield_get_type(dst_field)));
-
- /* Do a shallow copy. */
- dfield_set_data(
- dst_field, src_field->data, src_field->len);
-
- if (dst_field->len != IB_SQL_NULL) {
- UNIV_MEM_ASSERT_RW(dst_field->data,
- dst_field->len);
- }
- }
- }
-
- if (err == DB_SUCCESS) {
- err = ib_execute_insert_query_graph(
- src_tuple->index->table, q_proc->grph.ins, node->ins);
- }
-
- ib_wake_master_thread();
-
- return(err);
-}
-
-/*********************************************************************//**
-Gets pointer to a prebuilt update vector used in updates.
-@return update vector */
-UNIV_INLINE
-upd_t*
-ib_update_vector_create(
-/*====================*/
- ib_cursor_t* cursor) /*!< in: current cursor */
-{
- trx_t* trx = cursor->prebuilt->trx;
- mem_heap_t* heap = cursor->query_heap;
- dict_table_t* table = cursor->prebuilt->table;
- ib_qry_proc_t* q_proc = &cursor->q_proc;
- ib_qry_grph_t* grph = &q_proc->grph;
- ib_qry_node_t* node = &q_proc->node;
-
- ut_a(trx->state != TRX_STATE_NOT_STARTED);
-
- if (node->upd == NULL) {
- node->upd = static_cast<upd_node_t*>(
- row_create_update_node_for_mysql(table, heap));
- }
-
- grph->upd = static_cast<que_fork_t*>(
- que_node_get_parent(
- pars_complete_graph_for_exec(node->upd, trx, heap)));
-
- grph->upd->state = QUE_FORK_ACTIVE;
-
- return(node->upd->update);
-}
-
-/**********************************************************************//**
-Note that a column has changed. */
-static
-void
-ib_update_col(
-/*==========*/
-
- ib_cursor_t* cursor, /*!< in: current cursor */
- upd_field_t* upd_field, /*!< in/out: update field */
- ulint col_no, /*!< in: column number */
- dfield_t* dfield) /*!< in: updated dfield */
-{
- ulint data_len;
- dict_table_t* table = cursor->prebuilt->table;
- dict_index_t* index = dict_table_get_first_index(table);
-
- data_len = dfield_get_len(dfield);
-
- if (data_len == UNIV_SQL_NULL) {
- dfield_set_null(&upd_field->new_val);
- } else {
- dfield_copy_data(&upd_field->new_val, dfield);
- }
-
- upd_field->exp = NULL;
-
- upd_field->orig_len = 0;
-
- upd_field->field_no = dict_col_get_clust_pos(
- &table->cols[col_no], index);
-}
-
-/**********************************************************************//**
-Checks which fields have changed in a row and stores the new data
-to an update vector.
-@return DB_SUCCESS or err code */
-static
-ib_err_t
-ib_calc_diff(
-/*=========*/
- ib_cursor_t* cursor, /*!< in: current cursor */
- upd_t* upd, /*!< in/out: update vector */
- const ib_tuple_t*old_tuple, /*!< in: Old tuple in table */
- const ib_tuple_t*new_tuple) /*!< in: New tuple to update */
-{
- ulint i;
- ulint n_changed = 0;
- ib_err_t err = DB_SUCCESS;
- ulint n_fields = dtuple_get_n_fields(new_tuple->ptr);
-
- ut_a(old_tuple->type == TPL_TYPE_ROW);
- ut_a(new_tuple->type == TPL_TYPE_ROW);
- ut_a(old_tuple->index->table == new_tuple->index->table);
-
- for (i = 0; i < n_fields; ++i) {
- ulint mtype;
- ulint prtype;
- upd_field_t* upd_field;
- dfield_t* new_dfield;
- dfield_t* old_dfield;
-
- new_dfield = dtuple_get_nth_field(new_tuple->ptr, i);
- old_dfield = dtuple_get_nth_field(old_tuple->ptr, i);
-
- mtype = dtype_get_mtype(dfield_get_type(old_dfield));
- prtype = dtype_get_prtype(dfield_get_type(old_dfield));
-
- /* Skip the system columns */
- if (mtype == DATA_SYS) {
- continue;
-
- } else if ((prtype & DATA_NOT_NULL)
- && dfield_is_null(new_dfield)) {
-
- err = DB_DATA_MISMATCH;
- break;
- }
-
- if (dfield_get_len(new_dfield) != dfield_get_len(old_dfield)
- || (!dfield_is_null(old_dfield)
- && memcmp(dfield_get_data(new_dfield),
- dfield_get_data(old_dfield),
- dfield_get_len(old_dfield)) != 0)) {
-
- upd_field = &upd->fields[n_changed];
-
- ib_update_col(cursor, upd_field, i, new_dfield);
-
- ++n_changed;
- }
- }
-
- if (err == DB_SUCCESS) {
- upd->info_bits = 0;
- upd->n_fields = n_changed;
- }
-
- return(err);
-}
-
-/**********************************************************************//**
-Run the update query and do error handling.
-@return DB_SUCCESS or error code */
-UNIV_INLINE
-ib_err_t
-ib_update_row_with_lock_retry(
-/*==========================*/
- que_thr_t* thr, /*!< in: Update query graph */
- upd_node_t* node, /*!< in: Update node for the query */
- trx_savept_t* savept) /*!< in: savepoint to rollback to
- in case of an error */
-
-{
- trx_t* trx;
- ib_err_t err;
- ib_bool_t lock_wait;
-
- trx = thr_get_trx(thr);
-
- do {
- thr->run_node = node;
- thr->prev_node = node;
-
- row_upd_step(thr);
-
- err = trx->error_state;
-
- if (err != DB_SUCCESS) {
- que_thr_stop_for_mysql(thr);
-
- if (err != DB_RECORD_NOT_FOUND) {
- thr->lock_state = QUE_THR_LOCK_ROW;
-
- lock_wait = static_cast<ib_bool_t>(
- ib_handle_errors(&err, trx, thr, savept));
-
- thr->lock_state = QUE_THR_LOCK_NOLOCK;
- } else {
- lock_wait = FALSE;
- }
- } else {
- lock_wait = FALSE;
- }
- } while (lock_wait);
-
- return(err);
-}
-
-/*********************************************************************//**
-Does an update or delete of a row.
-@return DB_SUCCESS or err code */
-UNIV_INLINE
-ib_err_t
-ib_execute_update_query_graph(
-/*==========================*/
- ib_cursor_t* cursor, /*!< in: Cursor instance */
- btr_pcur_t* pcur) /*!< in: Btree persistent cursor */
-{
- ib_err_t err;
- que_thr_t* thr;
- upd_node_t* node;
- trx_savept_t savept;
- trx_t* trx = cursor->prebuilt->trx;
- dict_table_t* table = cursor->prebuilt->table;
- ib_qry_proc_t* q_proc = &cursor->q_proc;
-
- /* The transaction must be running. */
- ut_a(trx->state != TRX_STATE_NOT_STARTED);
-
- node = q_proc->node.upd;
-
- ut_a(dict_index_is_clust(pcur->btr_cur.index));
- btr_pcur_copy_stored_position(node->pcur, pcur);
-
- ut_a(node->pcur->rel_pos == BTR_PCUR_ON);
-
- savept = trx_savept_take(trx);
-
- thr = que_fork_get_first_thr(q_proc->grph.upd);
-
- node->state = UPD_NODE_UPDATE_CLUSTERED;
-
- que_thr_move_to_run_state_for_mysql(thr, trx);
-
- err = ib_update_row_with_lock_retry(thr, node, &savept);
-
- if (err == DB_SUCCESS) {
-
- que_thr_stop_for_mysql_no_error(thr, trx);
-
- if (node->is_delete) {
-
- dict_table_n_rows_dec(table);
-
- if (table->is_system_db) {
- srv_stats.n_system_rows_deleted.inc();
- } else {
- srv_stats.n_rows_deleted.inc();
- }
- } else {
- if (table->is_system_db) {
- srv_stats.n_system_rows_updated.inc();
- } else {
- srv_stats.n_rows_updated.inc();
- }
- }
-
- } else if (err == DB_RECORD_NOT_FOUND) {
- trx->error_state = DB_SUCCESS;
- }
-
- trx->op_info = "";
-
- return(err);
-}
-
-/*****************************************************************//**
-Update a row in a table.
-@return DB_SUCCESS or err code */
-UNIV_INTERN
-ib_err_t
-ib_cursor_update_row(
-/*=================*/
- ib_crsr_t ib_crsr, /*!< in: InnoDB cursor instance */
- const ib_tpl_t ib_old_tpl, /*!< in: Old tuple in table */
- const ib_tpl_t ib_new_tpl) /*!< in: New tuple to update */
-{
- upd_t* upd;
- ib_err_t err;
- btr_pcur_t* pcur;
- ib_cursor_t* cursor = (ib_cursor_t*) ib_crsr;
- row_prebuilt_t* prebuilt = cursor->prebuilt;
- const ib_tuple_t*old_tuple = (const ib_tuple_t*) ib_old_tpl;
- const ib_tuple_t*new_tuple = (const ib_tuple_t*) ib_new_tpl;
-
- if (dict_index_is_clust(prebuilt->index)) {
- pcur = &cursor->prebuilt->pcur;
- } else if (prebuilt->need_to_access_clustered) {
- pcur = &cursor->prebuilt->clust_pcur;
- } else {
- return(DB_ERROR);
- }
-
- ut_a(old_tuple->type == TPL_TYPE_ROW);
- ut_a(new_tuple->type == TPL_TYPE_ROW);
-
- upd = ib_update_vector_create(cursor);
-
- err = ib_calc_diff(cursor, upd, old_tuple, new_tuple);
-
- if (err == DB_SUCCESS) {
- /* Note that this is not a delete. */
- cursor->q_proc.node.upd->is_delete = FALSE;
-
- err = ib_execute_update_query_graph(cursor, pcur);
- }
-
- ib_wake_master_thread();
-
- return(err);
-}
-
-/**********************************************************************//**
-Build the update query graph to delete a row from an index.
-@return DB_SUCCESS or err code */
-static
-ib_err_t
-ib_delete_row(
-/*==========*/
- ib_cursor_t* cursor, /*!< in: current cursor */
- btr_pcur_t* pcur, /*!< in: Btree persistent cursor */
- const rec_t* rec) /*!< in: record to delete */
-{
- ulint i;
- upd_t* upd;
- ib_err_t err;
- ib_tuple_t* tuple;
- ib_tpl_t ib_tpl;
- ulint n_cols;
- upd_field_t* upd_field;
- ib_bool_t page_format;
- dict_table_t* table = cursor->prebuilt->table;
- dict_index_t* index = dict_table_get_first_index(table);
-
- n_cols = dict_index_get_n_ordering_defined_by_user(index);
- ib_tpl = ib_key_tuple_new(index, n_cols);
-
- if (!ib_tpl) {
- return(DB_OUT_OF_MEMORY);
- }
-
- tuple = (ib_tuple_t*) ib_tpl;
-
- upd = ib_update_vector_create(cursor);
-
- page_format = static_cast<ib_bool_t>(
- dict_table_is_comp(index->table));
- ib_read_tuple(rec, page_format, tuple, NULL, NULL);
-
- upd->n_fields = ib_tuple_get_n_cols(ib_tpl);
-
- for (i = 0; i < upd->n_fields; ++i) {
- dfield_t* dfield;
-
- upd_field = &upd->fields[i];
- dfield = dtuple_get_nth_field(tuple->ptr, i);
-
- dfield_copy_data(&upd_field->new_val, dfield);
-
- upd_field->exp = NULL;
-
- upd_field->orig_len = 0;
-
- upd->info_bits = 0;
-
- upd_field->field_no = dict_col_get_clust_pos(
- &table->cols[i], index);
- }
-
- /* Note that this is a delete. */
- cursor->q_proc.node.upd->is_delete = TRUE;
-
- err = ib_execute_update_query_graph(cursor, pcur);
-
- ib_tuple_delete(ib_tpl);
-
- return(err);
-}
-
-/*****************************************************************//**
-Delete a row in a table.
-@return DB_SUCCESS or err code */
-UNIV_INTERN
-ib_err_t
-ib_cursor_delete_row(
-/*=================*/
- ib_crsr_t ib_crsr) /*!< in: InnoDB cursor instance */
-{
- ib_err_t err;
- btr_pcur_t* pcur;
- dict_index_t* index;
- ib_cursor_t* cursor = (ib_cursor_t*) ib_crsr;
- row_prebuilt_t* prebuilt = cursor->prebuilt;
-
- index = dict_table_get_first_index(prebuilt->index->table);
-
- /* Check whether this is a secondary index cursor */
- if (index != prebuilt->index) {
- if (prebuilt->need_to_access_clustered) {
- pcur = &prebuilt->clust_pcur;
- } else {
- return(DB_ERROR);
- }
- } else {
- pcur = &prebuilt->pcur;
- }
-
- if (ib_btr_cursor_is_positioned(pcur)) {
- const rec_t* rec;
- ib_bool_t page_format;
- mtr_t mtr;
- rec_t* copy = NULL;
- byte ptr[UNIV_PAGE_SIZE_MAX];
-
- page_format = static_cast<ib_bool_t>(
- dict_table_is_comp(index->table));
-
- mtr_start(&mtr);
-
- if (btr_pcur_restore_position(
- BTR_SEARCH_LEAF, pcur, &mtr)) {
- mem_heap_t* heap = NULL;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- ulint* offsets = offsets_;
-
- rec_offs_init(offsets_);
-
- rec = btr_pcur_get_rec(pcur);
-
- /* Since mtr will be commited, the rec
- will not be protected. Make a copy of
- the rec. */
- offsets = rec_get_offsets(
- rec, index, offsets, ULINT_UNDEFINED, &heap);
- ut_ad(rec_offs_size(offsets) < UNIV_PAGE_SIZE_MAX);
- copy = rec_copy(ptr, rec, offsets);
- }
-
- mtr_commit(&mtr);
-
- if (copy && !rec_get_deleted_flag(copy, page_format)) {
- err = ib_delete_row(cursor, pcur, copy);
- } else {
- err = DB_RECORD_NOT_FOUND;
- }
- } else {
- err = DB_RECORD_NOT_FOUND;
- }
-
- ib_wake_master_thread();
-
- return(err);
-}
-
-/*****************************************************************//**
-Read current row.
-@return DB_SUCCESS or err code */
-UNIV_INTERN
-ib_err_t
-ib_cursor_read_row(
-/*===============*/
- ib_crsr_t ib_crsr, /*!< in: InnoDB cursor instance */
- ib_tpl_t ib_tpl, /*!< out: read cols into this tuple */
- void** row_buf, /*!< in/out: row buffer */
- ib_ulint_t* row_len) /*!< in/out: row buffer len */
-{
- ib_err_t err;
- ib_tuple_t* tuple = (ib_tuple_t*) ib_tpl;
- ib_cursor_t* cursor = (ib_cursor_t*) ib_crsr;
-
- ut_a(cursor->prebuilt->trx->state != TRX_STATE_NOT_STARTED);
-
- /* When searching with IB_EXACT_MATCH set, row_search_for_mysql()
- will not position the persistent cursor but will copy the record
- found into the row cache. It should be the only entry. */
- if (!ib_cursor_is_positioned(ib_crsr) ) {
- err = DB_RECORD_NOT_FOUND;
- } else {
- mtr_t mtr;
- btr_pcur_t* pcur;
- row_prebuilt_t* prebuilt = cursor->prebuilt;
-
- if (prebuilt->need_to_access_clustered
- && tuple->type == TPL_TYPE_ROW) {
- pcur = &prebuilt->clust_pcur;
- } else {
- pcur = &prebuilt->pcur;
- }
-
- if (pcur == NULL) {
- return(DB_ERROR);
- }
-
- mtr_start(&mtr);
-
- if (btr_pcur_restore_position(BTR_SEARCH_LEAF, pcur, &mtr)) {
- const rec_t* rec;
- ib_bool_t page_format;
-
- page_format = static_cast<ib_bool_t>(
- dict_table_is_comp(tuple->index->table));
- rec = btr_pcur_get_rec(pcur);
-
- if (prebuilt->innodb_api_rec &&
- prebuilt->innodb_api_rec != rec) {
- rec = prebuilt->innodb_api_rec;
- }
-
- if (!rec_get_deleted_flag(rec, page_format)) {
- ib_read_tuple(rec, page_format, tuple,
- row_buf, (ulint*) row_len);
- err = DB_SUCCESS;
- } else{
- err = DB_RECORD_NOT_FOUND;
- }
-
- } else {
- err = DB_RECORD_NOT_FOUND;
- }
-
- mtr_commit(&mtr);
- }
-
- return(err);
-}
-
-/*****************************************************************//**
-Move cursor to the first record in the table.
-@return DB_SUCCESS or err code */
-UNIV_INLINE
-ib_err_t
-ib_cursor_position(
-/*===============*/
- ib_cursor_t* cursor, /*!< in: InnoDB cursor instance */
- ib_srch_mode_t mode) /*!< in: Search mode */
-{
- ib_err_t err;
- row_prebuilt_t* prebuilt = cursor->prebuilt;
- unsigned char* buf;
-
- buf = static_cast<unsigned char*>(mem_alloc(UNIV_PAGE_SIZE));
-
- /* We want to position at one of the ends, row_search_for_mysql()
- uses the search_tuple fields to work out what to do. */
- dtuple_set_n_fields(prebuilt->search_tuple, 0);
-
- err = static_cast<ib_err_t>(row_search_for_mysql(
- buf, mode, prebuilt, 0, 0));
-
- mem_free(buf);
-
- return(err);
-}
-
-/*****************************************************************//**
-Move cursor to the first record in the table.
-@return DB_SUCCESS or err code */
-UNIV_INTERN
-ib_err_t
-ib_cursor_first(
-/*============*/
- ib_crsr_t ib_crsr) /*!< in: InnoDB cursor instance */
-{
- ib_cursor_t* cursor = (ib_cursor_t*) ib_crsr;
-
- return(ib_cursor_position(cursor, IB_CUR_G));
-}
-
-/*****************************************************************//**
-Move cursor to the last record in the table.
-@return DB_SUCCESS or err code */
-UNIV_INTERN
-ib_err_t
-ib_cursor_last(
-/*===========*/
- ib_crsr_t ib_crsr) /*!< in: InnoDB cursor instance */
-{
- ib_cursor_t* cursor = (ib_cursor_t*) ib_crsr;
-
- return(ib_cursor_position(cursor, IB_CUR_L));
-}
-
-/*****************************************************************//**
-Move cursor to the next user record in the table.
-@return DB_SUCCESS or err code */
-UNIV_INTERN
-ib_err_t
-ib_cursor_next(
-/*===========*/
- ib_crsr_t ib_crsr) /*!< in: InnoDB cursor instance */
-{
- ib_err_t err;
- ib_cursor_t* cursor = (ib_cursor_t*) ib_crsr;
- row_prebuilt_t* prebuilt = cursor->prebuilt;
- byte buf[UNIV_PAGE_SIZE_MAX];
-
- /* We want to move to the next record */
- dtuple_set_n_fields(prebuilt->search_tuple, 0);
-
- err = static_cast<ib_err_t>(row_search_for_mysql(
- buf, PAGE_CUR_G, prebuilt, 0, ROW_SEL_NEXT));
-
- return(err);
-}
-
-/*****************************************************************//**
-Search for key.
-@return DB_SUCCESS or err code */
-UNIV_INTERN
-ib_err_t
-ib_cursor_moveto(
-/*=============*/
- ib_crsr_t ib_crsr, /*!< in: InnoDB cursor instance */
- ib_tpl_t ib_tpl, /*!< in: Key to search for */
- ib_srch_mode_t ib_srch_mode) /*!< in: search mode */
-{
- ulint i;
- ulint n_fields;
- ib_err_t err = DB_SUCCESS;
- ib_tuple_t* tuple = (ib_tuple_t*) ib_tpl;
- ib_cursor_t* cursor = (ib_cursor_t*) ib_crsr;
- row_prebuilt_t* prebuilt = cursor->prebuilt;
- dtuple_t* search_tuple = prebuilt->search_tuple;
- unsigned char* buf;
-
- ut_a(tuple->type == TPL_TYPE_KEY);
-
- n_fields = dict_index_get_n_ordering_defined_by_user(prebuilt->index);
-
- if (n_fields > dtuple_get_n_fields(tuple->ptr)) {
- n_fields = dtuple_get_n_fields(tuple->ptr);
- }
-
- dtuple_set_n_fields(search_tuple, n_fields);
- dtuple_set_n_fields_cmp(search_tuple, n_fields);
-
- /* Do a shallow copy */
- for (i = 0; i < n_fields; ++i) {
- dfield_copy(dtuple_get_nth_field(search_tuple, i),
- dtuple_get_nth_field(tuple->ptr, i));
- }
-
- ut_a(prebuilt->select_lock_type <= LOCK_NUM);
-
- prebuilt->innodb_api_rec = NULL;
-
- buf = static_cast<unsigned char*>(mem_alloc(UNIV_PAGE_SIZE));
-
- err = static_cast<ib_err_t>(row_search_for_mysql(
- buf, ib_srch_mode, prebuilt, cursor->match_mode, 0));
-
- mem_free(buf);
-
- return(err);
-}
-
-/*****************************************************************//**
-Set the cursor search mode. */
-UNIV_INTERN
-void
-ib_cursor_set_match_mode(
-/*=====================*/
- ib_crsr_t ib_crsr, /*!< in: Cursor instance */
- ib_match_mode_t match_mode) /*!< in: ib_cursor_moveto match mode */
-{
- ib_cursor_t* cursor = (ib_cursor_t*) ib_crsr;
-
- cursor->match_mode = match_mode;
-}
-
-/*****************************************************************//**
-Get the dfield instance for the column in the tuple.
-@return dfield instance in tuple */
-UNIV_INLINE
-dfield_t*
-ib_col_get_dfield(
-/*==============*/
- ib_tuple_t* tuple, /*!< in: tuple instance */
- ulint col_no) /*!< in: col no. in tuple */
-{
- dfield_t* dfield;
-
- dfield = dtuple_get_nth_field(tuple->ptr, col_no);
-
- return(dfield);
-}
-
-/*****************************************************************//**
-Predicate to check whether a column type contains variable length data.
-@return DB_SUCCESS or error code */
-UNIV_INLINE
-ib_err_t
-ib_col_is_capped(
-/*==============*/
- const dtype_t* dtype) /*!< in: column type */
-{
- return(static_cast<ib_err_t>(
- (dtype_get_mtype(dtype) == DATA_VARCHAR
- || dtype_get_mtype(dtype) == DATA_CHAR
- || dtype_get_mtype(dtype) == DATA_MYSQL
- || dtype_get_mtype(dtype) == DATA_VARMYSQL
- || dtype_get_mtype(dtype) == DATA_FIXBINARY
- || dtype_get_mtype(dtype) == DATA_BINARY)
- && dtype_get_len(dtype) > 0));
-}
-
-/*****************************************************************//**
-Set a column of the tuple. Make a copy using the tuple's heap.
-@return DB_SUCCESS or error code */
-UNIV_INTERN
-ib_err_t
-ib_col_set_value(
-/*=============*/
- ib_tpl_t ib_tpl, /*!< in: tuple instance */
- ib_ulint_t col_no, /*!< in: column index in tuple */
- const void* src, /*!< in: data value */
- ib_ulint_t len, /*!< in: data value len */
- ib_bool_t need_cpy) /*!< in: if need memcpy */
-{
- const dtype_t* dtype;
- dfield_t* dfield;
- void* dst = NULL;
- ib_tuple_t* tuple = (ib_tuple_t*) ib_tpl;
- ulint col_len;
-
- dfield = ib_col_get_dfield(tuple, col_no);
-
- /* User wants to set the column to NULL. */
- if (len == IB_SQL_NULL) {
- dfield_set_null(dfield);
- return(DB_SUCCESS);
- }
-
- dtype = dfield_get_type(dfield);
- col_len = dtype_get_len(dtype);
-
- /* Not allowed to update system columns. */
- if (dtype_get_mtype(dtype) == DATA_SYS) {
- return(DB_DATA_MISMATCH);
- }
-
- dst = dfield_get_data(dfield);
-
- /* Since TEXT/CLOB also map to DATA_VARCHAR we need to make an
- exception. Perhaps we need to set the precise type and check
- for that. */
- if (ib_col_is_capped(dtype)) {
-
- len = ut_min(len, static_cast<ib_ulint_t>(col_len));
-
- if (dst == NULL || len > dfield_get_len(dfield)) {
- dst = mem_heap_alloc(tuple->heap, col_len);
- ut_a(dst != NULL);
- }
- } else if (dst == NULL || len > dfield_get_len(dfield)) {
- dst = mem_heap_alloc(tuple->heap, len);
- }
-
- if (dst == NULL) {
- return(DB_OUT_OF_MEMORY);
- }
-
- switch (dtype_get_mtype(dtype)) {
- case DATA_INT: {
-
- if (col_len == len) {
- ibool usign;
-
- usign = dtype_get_prtype(dtype) & DATA_UNSIGNED;
- mach_write_int_type(static_cast<byte*>(dst),
- static_cast<const byte*>(src),
- len, usign);
-
- } else {
- return(DB_DATA_MISMATCH);
- }
- break;
- }
-
- case DATA_FLOAT:
- if (len == sizeof(float)) {
- mach_float_write(static_cast<byte*>(dst), *(float*)src);
- } else {
- return(DB_DATA_MISMATCH);
- }
- break;
-
- case DATA_DOUBLE:
- if (len == sizeof(double)) {
- mach_double_write(static_cast<byte*>(dst),
- *(double*)src);
- } else {
- return(DB_DATA_MISMATCH);
- }
- break;
-
- case DATA_SYS:
- ut_error;
- break;
-
- case DATA_CHAR: {
- ulint pad_char = ULINT_UNDEFINED;
-
- pad_char = dtype_get_pad_char(
- dtype_get_mtype(dtype), dtype_get_prtype(dtype));
-
- ut_a(pad_char != ULINT_UNDEFINED);
-
- memset((byte*) dst + len,
- static_cast<int>(pad_char),
- static_cast<size_t>(col_len - len));
-
- memcpy(dst, src, len);
-
- len = static_cast<ib_ulint_t>(col_len);
- break;
- }
- case DATA_BLOB:
- case DATA_BINARY:
- case DATA_DECIMAL:
- case DATA_VARCHAR:
- case DATA_FIXBINARY:
- if (need_cpy) {
- memcpy(dst, src, len);
- } else {
- dfield_set_data(dfield, src, len);
- dst = dfield_get_data(dfield);
- }
- break;
-
- case DATA_MYSQL:
- case DATA_VARMYSQL: {
- ulint cset;
- CHARSET_INFO* cs;
- int error = 0;
- ulint true_len = len;
-
- /* For multi byte character sets we need to
- calculate the true length of the data. */
- cset = dtype_get_charset_coll(
- dtype_get_prtype(dtype));
- cs = all_charsets[cset];
- if (cs) {
- uint pos = (uint)(col_len / cs->mbmaxlen);
-
- if (len > 0 && cs->mbmaxlen > 1) {
- true_len = (ulint)
- cs->cset->well_formed_len(
- cs,
- (const char*)src,
- (const char*)src + len,
- pos,
- &error);
-
- if (true_len < len) {
- len = static_cast<ib_ulint_t>(true_len);
- }
- }
- }
-
- /* All invalid bytes in data need be truncated.
- If len == 0, means all bytes of the data is invalid.
- In this case, the data will be truncated to empty.*/
- memcpy(dst, src, len);
-
- /* For DATA_MYSQL, need to pad the unused
- space with spaces. */
- if (dtype_get_mtype(dtype) == DATA_MYSQL) {
- ulint n_chars;
-
- if (len < col_len) {
- ulint pad_len = col_len - len;
-
- ut_a(cs != NULL);
- ut_a(!(pad_len % cs->mbminlen));
-
- cs->cset->fill(cs, (char*)dst + len,
- pad_len,
- 0x20 /* space */);
- }
-
- /* Why we should do below? See function
- row_mysql_store_col_in_innobase_format */
-
- ut_a(!(dtype_get_len(dtype)
- % dtype_get_mbmaxlen(dtype)));
-
- n_chars = dtype_get_len(dtype)
- / dtype_get_mbmaxlen(dtype);
-
- /* Strip space padding. */
- while (col_len > n_chars
- && ((char*)dst)[col_len - 1] == 0x20) {
- col_len--;
- }
-
- len = static_cast<ib_ulint_t>(col_len);
- }
- break;
- }
-
- default:
- ut_error;
- }
-
- if (dst != dfield_get_data(dfield)) {
- dfield_set_data(dfield, dst, len);
- } else {
- dfield_set_len(dfield, len);
- }
-
- return(DB_SUCCESS);
-}
-
-/*****************************************************************//**
-Get the size of the data available in a column of the tuple.
-@return bytes avail or IB_SQL_NULL */
-UNIV_INTERN
-ib_ulint_t
-ib_col_get_len(
-/*===========*/
- ib_tpl_t ib_tpl, /*!< in: tuple instance */
- ib_ulint_t i) /*!< in: column index in tuple */
-{
- const dfield_t* dfield;
- ulint data_len;
- ib_tuple_t* tuple = (ib_tuple_t*) ib_tpl;
-
- dfield = ib_col_get_dfield(tuple, i);
-
- data_len = dfield_get_len(dfield);
-
- return(static_cast<ib_ulint_t>(
- data_len == UNIV_SQL_NULL ? IB_SQL_NULL : data_len));
-}
-
-/*****************************************************************//**
-Copy a column value from the tuple.
-@return bytes copied or IB_SQL_NULL */
-UNIV_INLINE
-ib_ulint_t
-ib_col_copy_value_low(
-/*==================*/
- ib_tpl_t ib_tpl, /*!< in: tuple instance */
- ib_ulint_t i, /*!< in: column index in tuple */
- void* dst, /*!< out: copied data value */
- ib_ulint_t len) /*!< in: max data value len to copy */
-{
- const void* data;
- const dfield_t* dfield;
- ulint data_len;
- ib_tuple_t* tuple = (ib_tuple_t*) ib_tpl;
-
- dfield = ib_col_get_dfield(tuple, i);
-
- data = dfield_get_data(dfield);
- data_len = dfield_get_len(dfield);
-
- if (data_len != UNIV_SQL_NULL) {
-
- const dtype_t* dtype = dfield_get_type(dfield);
-
- switch (dtype_get_mtype(dfield_get_type(dfield))) {
- case DATA_INT: {
- ibool usign;
- ullint ret;
-
- ut_a(data_len == len);
-
- usign = dtype_get_prtype(dtype) & DATA_UNSIGNED;
- ret = mach_read_int_type(static_cast<const byte*>(data),
- data_len, usign);
-
- if (usign) {
- if (len == 1) {
- *(ib_i8_t*)dst = (ib_i8_t)ret;
- } else if (len == 2) {
- *(ib_i16_t*)dst = (ib_i16_t)ret;
- } else if (len == 4) {
- *(ib_i32_t*)dst = (ib_i32_t)ret;
- } else {
- *(ib_i64_t*)dst = (ib_i64_t)ret;
- }
- } else {
- if (len == 1) {
- *(ib_u8_t*)dst = (ib_i8_t)ret;
- } else if (len == 2) {
- *(ib_u16_t*)dst = (ib_i16_t)ret;
- } else if (len == 4) {
- *(ib_u32_t*)dst = (ib_i32_t)ret;
- } else {
- *(ib_u64_t*)dst = (ib_i64_t)ret;
- }
- }
-
- break;
- }
- case DATA_FLOAT:
- if (len == data_len) {
- float f;
-
- ut_a(data_len == sizeof(f));
- f = mach_float_read(static_cast<const byte*>(
- data));
- memcpy(dst, &f, sizeof(f));
- } else {
- data_len = 0;
- }
- break;
- case DATA_DOUBLE:
- if (len == data_len) {
- double d;
-
- ut_a(data_len == sizeof(d));
- d = mach_double_read(static_cast<const byte*>(
- data));
- memcpy(dst, &d, sizeof(d));
- } else {
- data_len = 0;
- }
- break;
- default:
- data_len = ut_min(data_len, len);
- memcpy(dst, data, data_len);
- }
- } else {
- data_len = IB_SQL_NULL;
- }
-
- return(static_cast<ib_ulint_t>(data_len));
-}
-
-/*****************************************************************//**
-Copy a column value from the tuple.
-@return bytes copied or IB_SQL_NULL */
-UNIV_INTERN
-ib_ulint_t
-ib_col_copy_value(
-/*==============*/
- ib_tpl_t ib_tpl, /*!< in: tuple instance */
- ib_ulint_t i, /*!< in: column index in tuple */
- void* dst, /*!< out: copied data value */
- ib_ulint_t len) /*!< in: max data value len to copy */
-{
- return(ib_col_copy_value_low(ib_tpl, i, dst, len));
-}
-
-/*****************************************************************//**
-Get the InnoDB column attribute from the internal column precise type.
-@return precise type in api format */
-UNIV_INLINE
-ib_col_attr_t
-ib_col_get_attr(
-/*============*/
- ulint prtype) /*!< in: column definition */
-{
- ib_col_attr_t attr = IB_COL_NONE;
-
- if (prtype & DATA_UNSIGNED) {
- attr = static_cast<ib_col_attr_t>(attr | IB_COL_UNSIGNED);
- }
-
- if (prtype & DATA_NOT_NULL) {
- attr = static_cast<ib_col_attr_t>(attr | IB_COL_NOT_NULL);
- }
-
- return(attr);
-}
-
-/*****************************************************************//**
-Get a column name from the tuple.
-@return name of the column */
-UNIV_INTERN
-const char*
-ib_col_get_name(
-/*============*/
- ib_crsr_t ib_crsr, /*!< in: InnoDB cursor instance */
- ib_ulint_t i) /*!< in: column index in tuple */
-{
- const char* name;
- ib_cursor_t* cursor = (ib_cursor_t*) ib_crsr;
- dict_table_t* table = cursor->prebuilt->table;
- dict_col_t* col = dict_table_get_nth_col(table, i);
- ulint col_no = dict_col_get_no(col);
-
- name = dict_table_get_col_name(table, col_no);
-
- return(name);
-}
-
-/*****************************************************************//**
-Get an index field name from the cursor.
-@return name of the field */
-UNIV_INTERN
-const char*
-ib_get_idx_field_name(
-/*==================*/
- ib_crsr_t ib_crsr, /*!< in: InnoDB cursor instance */
- ib_ulint_t i) /*!< in: column index in tuple */
-{
- ib_cursor_t* cursor = (ib_cursor_t*) ib_crsr;
- dict_index_t* index = cursor->prebuilt->index;
- dict_field_t* field;
-
- if (index) {
- field = dict_index_get_nth_field(cursor->prebuilt->index, i);
-
- if (field) {
- return(field->name);
- }
- }
-
- return(NULL);
-}
-
-/*****************************************************************//**
-Get a column type, length and attributes from the tuple.
-@return len of column data */
-UNIV_INLINE
-ib_ulint_t
-ib_col_get_meta_low(
-/*================*/
- ib_tpl_t ib_tpl, /*!< in: tuple instance */
- ib_ulint_t i, /*!< in: column index in tuple */
- ib_col_meta_t* ib_col_meta) /*!< out: column meta data */
-{
- ib_u16_t prtype;
- const dfield_t* dfield;
- ulint data_len;
- ib_tuple_t* tuple = (ib_tuple_t*) ib_tpl;
-
- dfield = ib_col_get_dfield(tuple, i);
-
- data_len = dfield_get_len(dfield);
-
- /* We assume 1-1 mapping between the ENUM and internal type codes. */
- ib_col_meta->type = static_cast<ib_col_type_t>(
- dtype_get_mtype(dfield_get_type(dfield)));
-
- ib_col_meta->type_len = static_cast<ib_u32_t>(
- dtype_get_len(dfield_get_type(dfield)));
-
- prtype = (ib_u16_t) dtype_get_prtype(dfield_get_type(dfield));
-
- ib_col_meta->attr = ib_col_get_attr(prtype);
- ib_col_meta->client_type = prtype & DATA_MYSQL_TYPE_MASK;
-
- return(static_cast<ib_ulint_t>(data_len));
-}
-
-/*************************************************************//**
-Read a signed int 8 bit column from an InnoDB tuple. */
-UNIV_INLINE
-ib_err_t
-ib_tuple_check_int(
-/*===============*/
- ib_tpl_t ib_tpl, /*!< in: InnoDB tuple */
- ib_ulint_t i, /*!< in: column number */
- ib_bool_t usign, /*!< in: true if unsigned */
- ulint size) /*!< in: size of integer */
-{
- ib_col_meta_t ib_col_meta;
-
- ib_col_get_meta_low(ib_tpl, i, &ib_col_meta);
-
- if (ib_col_meta.type != IB_INT) {
- return(DB_DATA_MISMATCH);
- } else if (ib_col_meta.type_len == IB_SQL_NULL) {
- return(DB_UNDERFLOW);
- } else if (ib_col_meta.type_len != size) {
- return(DB_DATA_MISMATCH);
- } else if ((ib_col_meta.attr & IB_COL_UNSIGNED) && !usign) {
- return(DB_DATA_MISMATCH);
- }
-
- return(DB_SUCCESS);
-}
-
-/*************************************************************//**
-Read a signed int 8 bit column from an InnoDB tuple.
-@return DB_SUCCESS or error */
-UNIV_INTERN
-ib_err_t
-ib_tuple_read_i8(
-/*=============*/
- ib_tpl_t ib_tpl, /*!< in: InnoDB tuple */
- ib_ulint_t i, /*!< in: column number */
- ib_i8_t* ival) /*!< out: integer value */
-{
- ib_err_t err;
-
- err = ib_tuple_check_int(ib_tpl, i, IB_FALSE, sizeof(*ival));
-
- if (err == DB_SUCCESS) {
- ib_col_copy_value_low(ib_tpl, i, ival, sizeof(*ival));
- }
-
- return(err);
-}
-
-/*************************************************************//**
-Read an unsigned int 8 bit column from an InnoDB tuple.
-@return DB_SUCCESS or error */
-UNIV_INTERN
-ib_err_t
-ib_tuple_read_u8(
-/*=============*/
- ib_tpl_t ib_tpl, /*!< in: InnoDB tuple */
- ib_ulint_t i, /*!< in: column number */
- ib_u8_t* ival) /*!< out: integer value */
-{
- ib_err_t err;
-
- err = ib_tuple_check_int(ib_tpl, i, IB_TRUE, sizeof(*ival));
-
- if (err == DB_SUCCESS) {
- ib_col_copy_value_low(ib_tpl, i, ival, sizeof(*ival));
- }
-
- return(err);
-}
-
-/*************************************************************//**
-Read a signed int 16 bit column from an InnoDB tuple.
-@return DB_SUCCESS or error */
-UNIV_INTERN
-ib_err_t
-ib_tuple_read_i16(
-/*==============*/
- ib_tpl_t ib_tpl, /*!< in: InnoDB tuple */
- ib_ulint_t i, /*!< in: column number */
- ib_i16_t* ival) /*!< out: integer value */
-{
- ib_err_t err;
-
- err = ib_tuple_check_int(ib_tpl, i, FALSE, sizeof(*ival));
-
- if (err == DB_SUCCESS) {
- ib_col_copy_value_low(ib_tpl, i, ival, sizeof(*ival));
- }
-
- return(err);
-}
-
-/*************************************************************//**
-Read an unsigned int 16 bit column from an InnoDB tuple.
-@return DB_SUCCESS or error */
-UNIV_INTERN
-ib_err_t
-ib_tuple_read_u16(
-/*==============*/
- ib_tpl_t ib_tpl, /*!< in: InnoDB tuple */
- ib_ulint_t i, /*!< in: column number */
- ib_u16_t* ival) /*!< out: integer value */
-{
- ib_err_t err;
-
- err = ib_tuple_check_int(ib_tpl, i, IB_TRUE, sizeof(*ival));
-
- if (err == DB_SUCCESS) {
- ib_col_copy_value_low(ib_tpl, i, ival, sizeof(*ival));
- }
-
- return(err);
-}
-
-/*************************************************************//**
-Read a signed int 32 bit column from an InnoDB tuple.
-@return DB_SUCCESS or error */
-UNIV_INTERN
-ib_err_t
-ib_tuple_read_i32(
-/*==============*/
- ib_tpl_t ib_tpl, /*!< in: InnoDB tuple */
- ib_ulint_t i, /*!< in: column number */
- ib_i32_t* ival) /*!< out: integer value */
-{
- ib_err_t err;
-
- err = ib_tuple_check_int(ib_tpl, i, FALSE, sizeof(*ival));
-
- if (err == DB_SUCCESS) {
- ib_col_copy_value_low(ib_tpl, i, ival, sizeof(*ival));
- }
-
- return(err);
-}
-
-/*************************************************************//**
-Read an unsigned int 32 bit column from an InnoDB tuple.
-@return DB_SUCCESS or error */
-UNIV_INTERN
-ib_err_t
-ib_tuple_read_u32(
-/*==============*/
- ib_tpl_t ib_tpl, /*!< in: InnoDB tuple */
- ib_ulint_t i, /*!< in: column number */
- ib_u32_t* ival) /*!< out: integer value */
-{
- ib_err_t err;
-
- err = ib_tuple_check_int(ib_tpl, i, IB_TRUE, sizeof(*ival));
-
- if (err == DB_SUCCESS) {
- ib_col_copy_value_low(ib_tpl, i, ival, sizeof(*ival));
- }
-
- return(err);
-}
-
-/*************************************************************//**
-Read a signed int 64 bit column from an InnoDB tuple.
-@return DB_SUCCESS or error */
-UNIV_INTERN
-ib_err_t
-ib_tuple_read_i64(
-/*==============*/
- ib_tpl_t ib_tpl, /*!< in: InnoDB tuple */
- ib_ulint_t i, /*!< in: column number */
- ib_i64_t* ival) /*!< out: integer value */
-{
- ib_err_t err;
-
- err = ib_tuple_check_int(ib_tpl, i, FALSE, sizeof(*ival));
-
- if (err == DB_SUCCESS) {
- ib_col_copy_value_low(ib_tpl, i, ival, sizeof(*ival));
- }
-
- return(err);
-}
-
-/*************************************************************//**
-Read an unsigned int 64 bit column from an InnoDB tuple.
-@return DB_SUCCESS or error */
-UNIV_INTERN
-ib_err_t
-ib_tuple_read_u64(
-/*==============*/
- ib_tpl_t ib_tpl, /*!< in: InnoDB tuple */
- ib_ulint_t i, /*!< in: column number */
- ib_u64_t* ival) /*!< out: integer value */
-{
- ib_err_t err;
-
- err = ib_tuple_check_int(ib_tpl, i, IB_TRUE, sizeof(*ival));
-
- if (err == DB_SUCCESS) {
- ib_col_copy_value_low(ib_tpl, i, ival, sizeof(*ival));
- }
-
- return(err);
-}
-
-/*****************************************************************//**
-Get a column value pointer from the tuple.
-@return NULL or pointer to buffer */
-UNIV_INTERN
-const void*
-ib_col_get_value(
-/*=============*/
- ib_tpl_t ib_tpl, /*!< in: tuple instance */
- ib_ulint_t i) /*!< in: column index in tuple */
-{
- const void* data;
- const dfield_t* dfield;
- ulint data_len;
- ib_tuple_t* tuple = (ib_tuple_t*) ib_tpl;
-
- dfield = ib_col_get_dfield(tuple, i);
-
- data = dfield_get_data(dfield);
- data_len = dfield_get_len(dfield);
-
- return(data_len != UNIV_SQL_NULL ? data : NULL);
-}
-
-/*****************************************************************//**
-Get a column type, length and attributes from the tuple.
-@return len of column data */
-UNIV_INTERN
-ib_ulint_t
-ib_col_get_meta(
-/*============*/
- ib_tpl_t ib_tpl, /*!< in: tuple instance */
- ib_ulint_t i, /*!< in: column index in tuple */
- ib_col_meta_t* ib_col_meta) /*!< out: column meta data */
-{
- return(ib_col_get_meta_low(ib_tpl, i, ib_col_meta));
-}
-
-/*****************************************************************//**
-"Clear" or reset an InnoDB tuple. We free the heap and recreate the tuple.
-@return new tuple, or NULL */
-UNIV_INTERN
-ib_tpl_t
-ib_tuple_clear(
-/*============*/
- ib_tpl_t ib_tpl) /*!< in,own: tuple (will be freed) */
-{
- const dict_index_t* index;
- ulint n_cols;
- ib_tuple_t* tuple = (ib_tuple_t*) ib_tpl;
- ib_tuple_type_t type = tuple->type;
- mem_heap_t* heap = tuple->heap;
-
- index = tuple->index;
- n_cols = dtuple_get_n_fields(tuple->ptr);
-
- mem_heap_empty(heap);
-
- if (type == TPL_TYPE_ROW) {
- return(ib_row_tuple_new_low(index, n_cols, heap));
- } else {
- return(ib_key_tuple_new_low(index, n_cols, heap));
- }
-}
-
-/*****************************************************************//**
-Create a new cluster key search tuple and copy the contents of the
-secondary index key tuple columns that refer to the cluster index record
-to the cluster key. It does a deep copy of the column data.
-@return DB_SUCCESS or error code */
-UNIV_INTERN
-ib_err_t
-ib_tuple_get_cluster_key(
-/*=====================*/
- ib_crsr_t ib_crsr, /*!< in: secondary index cursor */
- ib_tpl_t* ib_dst_tpl, /*!< out,own: destination tuple */
- const ib_tpl_t ib_src_tpl) /*!< in: source tuple */
-{
- ulint i;
- ulint n_fields;
- ib_err_t err = DB_SUCCESS;
- ib_tuple_t* dst_tuple = NULL;
- ib_cursor_t* cursor = (ib_cursor_t*) ib_crsr;
- ib_tuple_t* src_tuple = (ib_tuple_t*) ib_src_tpl;
- dict_index_t* clust_index;
-
- clust_index = dict_table_get_first_index(cursor->prebuilt->table);
-
- /* We need to ensure that the src tuple belongs to the same table
- as the open cursor and that it's not a tuple for a cluster index. */
- if (src_tuple->type != TPL_TYPE_KEY) {
- return(DB_ERROR);
- } else if (src_tuple->index->table != cursor->prebuilt->table) {
- return(DB_DATA_MISMATCH);
- } else if (src_tuple->index == clust_index) {
- return(DB_ERROR);
- }
-
- /* Create the cluster index key search tuple. */
- *ib_dst_tpl = ib_clust_search_tuple_create(ib_crsr);
-
- if (!*ib_dst_tpl) {
- return(DB_OUT_OF_MEMORY);
- }
-
- dst_tuple = (ib_tuple_t*) *ib_dst_tpl;
- ut_a(dst_tuple->index == clust_index);
-
- n_fields = dict_index_get_n_unique(dst_tuple->index);
-
- /* Do a deep copy of the data fields. */
- for (i = 0; i < n_fields; i++) {
- ulint pos;
- dfield_t* src_field;
- dfield_t* dst_field;
-
- pos = dict_index_get_nth_field_pos(
- src_tuple->index, dst_tuple->index, i);
-
- ut_a(pos != ULINT_UNDEFINED);
-
- src_field = dtuple_get_nth_field(src_tuple->ptr, pos);
- dst_field = dtuple_get_nth_field(dst_tuple->ptr, i);
-
- if (!dfield_is_null(src_field)) {
- UNIV_MEM_ASSERT_RW(src_field->data, src_field->len);
-
- dst_field->data = mem_heap_dup(
- dst_tuple->heap,
- src_field->data,
- src_field->len);
-
- dst_field->len = src_field->len;
- } else {
- dfield_set_null(dst_field);
- }
- }
-
- return(err);
-}
-
-/*****************************************************************//**
-Copy the contents of source tuple to destination tuple. The tuples
-must be of the same type and belong to the same table/index.
-@return DB_SUCCESS or error code */
-UNIV_INTERN
-ib_err_t
-ib_tuple_copy(
-/*==========*/
- ib_tpl_t ib_dst_tpl, /*!< in: destination tuple */
- const ib_tpl_t ib_src_tpl) /*!< in: source tuple */
-{
- ulint i;
- ulint n_fields;
- ib_err_t err = DB_SUCCESS;
- const ib_tuple_t*src_tuple = (const ib_tuple_t*) ib_src_tpl;
- ib_tuple_t* dst_tuple = (ib_tuple_t*) ib_dst_tpl;
-
- /* Make sure src and dst are not the same. */
- ut_a(src_tuple != dst_tuple);
-
- /* Make sure they are the same type and refer to the same index. */
- if (src_tuple->type != dst_tuple->type
- || src_tuple->index != dst_tuple->index) {
-
- return(DB_DATA_MISMATCH);
- }
-
- n_fields = dtuple_get_n_fields(src_tuple->ptr);
- ut_ad(n_fields == dtuple_get_n_fields(dst_tuple->ptr));
-
- /* Do a deep copy of the data fields. */
- for (i = 0; i < n_fields; ++i) {
- dfield_t* src_field;
- dfield_t* dst_field;
-
- src_field = dtuple_get_nth_field(src_tuple->ptr, i);
- dst_field = dtuple_get_nth_field(dst_tuple->ptr, i);
-
- if (!dfield_is_null(src_field)) {
- UNIV_MEM_ASSERT_RW(src_field->data, src_field->len);
-
- dst_field->data = mem_heap_dup(
- dst_tuple->heap,
- src_field->data,
- src_field->len);
-
- dst_field->len = src_field->len;
- } else {
- dfield_set_null(dst_field);
- }
- }
-
- return(err);
-}
-
-/*****************************************************************//**
-Create an InnoDB tuple used for index/table search.
-@return own: Tuple for current index */
-UNIV_INTERN
-ib_tpl_t
-ib_sec_search_tuple_create(
-/*=======================*/
- ib_crsr_t ib_crsr) /*!< in: Cursor instance */
-{
- ulint n_cols;
- ib_cursor_t* cursor = (ib_cursor_t*) ib_crsr;
- dict_index_t* index = cursor->prebuilt->index;
-
- n_cols = dict_index_get_n_unique_in_tree(index);
- return(ib_key_tuple_new(index, n_cols));
-}
-
-/*****************************************************************//**
-Create an InnoDB tuple used for index/table search.
-@return own: Tuple for current index */
-UNIV_INTERN
-ib_tpl_t
-ib_sec_read_tuple_create(
-/*=====================*/
- ib_crsr_t ib_crsr) /*!< in: Cursor instance */
-{
- ulint n_cols;
- ib_cursor_t* cursor = (ib_cursor_t*) ib_crsr;
- dict_index_t* index = cursor->prebuilt->index;
-
- n_cols = dict_index_get_n_fields(index);
- return(ib_row_tuple_new(index, n_cols));
-}
-
-/*****************************************************************//**
-Create an InnoDB tuple used for table key operations.
-@return own: Tuple for current table */
-UNIV_INTERN
-ib_tpl_t
-ib_clust_search_tuple_create(
-/*=========================*/
- ib_crsr_t ib_crsr) /*!< in: Cursor instance */
-{
- ulint n_cols;
- ib_cursor_t* cursor = (ib_cursor_t*) ib_crsr;
- dict_index_t* index;
-
- index = dict_table_get_first_index(cursor->prebuilt->table);
-
- n_cols = dict_index_get_n_ordering_defined_by_user(index);
- return(ib_key_tuple_new(index, n_cols));
-}
-
-/*****************************************************************//**
-Create an InnoDB tuple for table row operations.
-@return own: Tuple for current table */
-UNIV_INTERN
-ib_tpl_t
-ib_clust_read_tuple_create(
-/*=======================*/
- ib_crsr_t ib_crsr) /*!< in: Cursor instance */
-{
- ulint n_cols;
- ib_cursor_t* cursor = (ib_cursor_t*) ib_crsr;
- dict_index_t* index;
-
- index = dict_table_get_first_index(cursor->prebuilt->table);
-
- n_cols = dict_table_get_n_cols(cursor->prebuilt->table);
- return(ib_row_tuple_new(index, n_cols));
-}
-
-/*****************************************************************//**
-Return the number of user columns in the tuple definition.
-@return number of user columns */
-UNIV_INTERN
-ib_ulint_t
-ib_tuple_get_n_user_cols(
-/*=====================*/
- const ib_tpl_t ib_tpl) /*!< in: Tuple for current table */
-{
- const ib_tuple_t* tuple = (const ib_tuple_t*) ib_tpl;
-
- if (tuple->type == TPL_TYPE_ROW) {
- return(static_cast<ib_ulint_t>(
- dict_table_get_n_user_cols(tuple->index->table)));
- }
-
- return(static_cast<ib_ulint_t>(
- dict_index_get_n_ordering_defined_by_user(tuple->index)));
-}
-
-/*****************************************************************//**
-Return the number of columns in the tuple definition.
-@return number of columns */
-UNIV_INTERN
-ib_ulint_t
-ib_tuple_get_n_cols(
-/*================*/
- const ib_tpl_t ib_tpl) /*!< in: Tuple for table/index */
-{
- const ib_tuple_t* tuple = (const ib_tuple_t*) ib_tpl;
-
- return(static_cast<ib_ulint_t>(dtuple_get_n_fields(tuple->ptr)));
-}
-
-/*****************************************************************//**
-Destroy an InnoDB tuple. */
-UNIV_INTERN
-void
-ib_tuple_delete(
-/*============*/
- ib_tpl_t ib_tpl) /*!< in,own: Tuple instance to delete */
-{
- ib_tuple_t* tuple = (ib_tuple_t*) ib_tpl;
-
- if (!ib_tpl) {
- return;
- }
-
- mem_heap_free(tuple->heap);
-}
-
-/*****************************************************************//**
-Get a table id. This function will acquire the dictionary mutex.
-@return DB_SUCCESS if found */
-UNIV_INTERN
-ib_err_t
-ib_table_get_id(
-/*============*/
- const char* table_name, /*!< in: table to find */
- ib_id_u64_t* table_id) /*!< out: table id if found */
-{
- ib_err_t err;
-
- dict_mutex_enter_for_mysql();
-
- err = ib_table_get_id_low(table_name, table_id);
-
- dict_mutex_exit_for_mysql();
-
- return(err);
-}
-
-/*****************************************************************//**
-Get an index id.
-@return DB_SUCCESS if found */
-UNIV_INTERN
-ib_err_t
-ib_index_get_id(
-/*============*/
- const char* table_name, /*!< in: find index for this table */
- const char* index_name, /*!< in: index to find */
- ib_id_u64_t* index_id) /*!< out: index id if found */
-{
- dict_table_t* table;
- char* normalized_name;
- ib_err_t err = DB_TABLE_NOT_FOUND;
-
- *index_id = 0;
-
- normalized_name = static_cast<char*>(
- mem_alloc(ut_strlen(table_name) + 1));
- ib_normalize_table_name(normalized_name, table_name);
-
- table = ib_lookup_table_by_name(normalized_name);
-
- mem_free(normalized_name);
- normalized_name = NULL;
-
- if (table != NULL) {
- dict_index_t* index;
-
- index = dict_table_get_index_on_name(table, index_name);
-
- if (index != NULL) {
- /* We only support 32 bit table and index ids. Because
- we need to pack the table id into the index id. */
-
- *index_id = (table->id);
- *index_id <<= 32;
- *index_id |= (index->id);
-
- err = DB_SUCCESS;
- }
- }
-
- return(err);
-}
-
-#ifdef __WIN__
-#define SRV_PATH_SEPARATOR '\\'
-#else
-#define SRV_PATH_SEPARATOR '/'
-#endif
-
-
-/*****************************************************************//**
-Check if cursor is positioned.
-@return IB_TRUE if positioned */
-UNIV_INTERN
-ib_bool_t
-ib_cursor_is_positioned(
-/*====================*/
- const ib_crsr_t ib_crsr) /*!< in: InnoDB cursor instance */
-{
- const ib_cursor_t* cursor = (const ib_cursor_t*) ib_crsr;
- row_prebuilt_t* prebuilt = cursor->prebuilt;
-
- return(ib_btr_cursor_is_positioned(&prebuilt->pcur));
-}
-
-
-/*****************************************************************//**
-Checks if the data dictionary is latched in exclusive mode.
-@return TRUE if exclusive latch */
-UNIV_INTERN
-ib_bool_t
-ib_schema_lock_is_exclusive(
-/*========================*/
- const ib_trx_t ib_trx) /*!< in: transaction */
-{
- const trx_t* trx = (const trx_t*) ib_trx;
-
- return(trx->dict_operation_lock_mode == RW_X_LATCH);
-}
-
-/*****************************************************************//**
-Checks if the data dictionary is latched in shared mode.
-@return TRUE if shared latch */
-UNIV_INTERN
-ib_bool_t
-ib_schema_lock_is_shared(
-/*=====================*/
- const ib_trx_t ib_trx) /*!< in: transaction */
-{
- const trx_t* trx = (const trx_t*) ib_trx;
-
- return(trx->dict_operation_lock_mode == RW_S_LATCH);
-}
-
-/*****************************************************************//**
-Set the Lock an InnoDB cursor/table.
-@return DB_SUCCESS or error code */
-UNIV_INTERN
-ib_err_t
-ib_cursor_lock(
-/*===========*/
- ib_crsr_t ib_crsr, /*!< in/out: InnoDB cursor */
- ib_lck_mode_t ib_lck_mode) /*!< in: InnoDB lock mode */
-{
- ib_cursor_t* cursor = (ib_cursor_t*) ib_crsr;
- row_prebuilt_t* prebuilt = cursor->prebuilt;
- trx_t* trx = prebuilt->trx;
- dict_table_t* table = prebuilt->table;
-
- return(ib_trx_lock_table_with_retry(
- trx, table, (enum lock_mode) ib_lck_mode));
-}
-
-/*****************************************************************//**
-Set the Lock an InnoDB table using the table id.
-@return DB_SUCCESS or error code */
-UNIV_INTERN
-ib_err_t
-ib_table_lock(
-/*==========*/
- ib_trx_t ib_trx, /*!< in/out: transaction */
- ib_id_u64_t table_id, /*!< in: table id */
- ib_lck_mode_t ib_lck_mode) /*!< in: InnoDB lock mode */
-{
- ib_err_t err;
- que_thr_t* thr;
- mem_heap_t* heap;
- dict_table_t* table;
- ib_qry_proc_t q_proc;
- trx_t* trx = (trx_t*) ib_trx;
-
- ut_a(trx->state != TRX_STATE_NOT_STARTED);
-
- table = ib_open_table_by_id(table_id, FALSE);
-
- if (table == NULL) {
- return(DB_TABLE_NOT_FOUND);
- }
-
- ut_a(ib_lck_mode <= static_cast<ib_lck_mode_t>(LOCK_NUM));
-
- heap = mem_heap_create(128);
-
- q_proc.node.sel = sel_node_create(heap);
-
- thr = pars_complete_graph_for_exec(q_proc.node.sel, trx, heap);
-
- q_proc.grph.sel = static_cast<que_fork_t*>(que_node_get_parent(thr));
- q_proc.grph.sel->state = QUE_FORK_ACTIVE;
-
- trx->op_info = "setting table lock";
-
- ut_a(ib_lck_mode == IB_LOCK_IS || ib_lck_mode == IB_LOCK_IX);
- err = static_cast<ib_err_t>(
- lock_table(0, table, (enum lock_mode) ib_lck_mode, thr));
-
- trx->error_state = err;
-
- mem_heap_free(heap);
-
- return(err);
-}
-
-/*****************************************************************//**
-Unlock an InnoDB table.
-@return DB_SUCCESS or error code */
-UNIV_INTERN
-ib_err_t
-ib_cursor_unlock(
-/*=============*/
- ib_crsr_t ib_crsr) /*!< in/out: InnoDB cursor */
-{
- ib_err_t err = DB_SUCCESS;
- ib_cursor_t* cursor = (ib_cursor_t*) ib_crsr;
- row_prebuilt_t* prebuilt = cursor->prebuilt;
-
- if (prebuilt->trx->mysql_n_tables_locked > 0) {
- --prebuilt->trx->mysql_n_tables_locked;
- } else {
- err = DB_ERROR;
- }
-
- return(err);
-}
-
-/*****************************************************************//**
-Set the Lock mode of the cursor.
-@return DB_SUCCESS or error code */
-UNIV_INTERN
-ib_err_t
-ib_cursor_set_lock_mode(
-/*====================*/
- ib_crsr_t ib_crsr, /*!< in/out: InnoDB cursor */
- ib_lck_mode_t ib_lck_mode) /*!< in: InnoDB lock mode */
-{
- ib_err_t err = DB_SUCCESS;
- ib_cursor_t* cursor = (ib_cursor_t*) ib_crsr;
- row_prebuilt_t* prebuilt = cursor->prebuilt;
-
- ut_a(ib_lck_mode <= static_cast<ib_lck_mode_t>(LOCK_NUM));
-
- if (ib_lck_mode == IB_LOCK_X) {
- err = ib_cursor_lock(ib_crsr, IB_LOCK_IX);
- } else if (ib_lck_mode == IB_LOCK_S) {
- err = ib_cursor_lock(ib_crsr, IB_LOCK_IS);
- }
-
- if (err == DB_SUCCESS) {
- prebuilt->select_lock_type = (enum lock_mode) ib_lck_mode;
- ut_a(prebuilt->trx->state != TRX_STATE_NOT_STARTED);
- }
-
- return(err);
-}
-
-/*****************************************************************//**
-Set need to access clustered index record. */
-UNIV_INTERN
-void
-ib_cursor_set_cluster_access(
-/*=========================*/
- ib_crsr_t ib_crsr) /*!< in/out: InnoDB cursor */
-{
- ib_cursor_t* cursor = (ib_cursor_t*) ib_crsr;
- row_prebuilt_t* prebuilt = cursor->prebuilt;
-
- prebuilt->need_to_access_clustered = TRUE;
-}
-
-/*****************************************************************//**
-Write an integer value to a column. Integers are stored in big-endian
-format and will need to be converted from the host format.
-@return DB_SUCESS or error */
-UNIV_INTERN
-ib_err_t
-ib_tuple_write_i8(
-/*==============*/
- ib_tpl_t ib_tpl, /*!< in/out: tuple to write to */
- int col_no, /*!< in: column number */
- ib_i8_t val) /*!< in: value to write */
-{
- return(ib_col_set_value(ib_tpl, col_no, &val, sizeof(val), true));
-}
-
-/*****************************************************************//**
-Write an integer value to a column. Integers are stored in big-endian
-format and will need to be converted from the host format.
-@return DB_SUCESS or error */
-UNIV_INTERN
-ib_err_t
-ib_tuple_write_i16(
-/*===============*/
- ib_tpl_t ib_tpl, /*!< in/out: tuple to write to */
- int col_no, /*!< in: column number */
- ib_i16_t val) /*!< in: value to write */
-{
- return(ib_col_set_value(ib_tpl, col_no, &val, sizeof(val), true));
-}
-
-/*****************************************************************//**
-Write an integer value to a column. Integers are stored in big-endian
-format and will need to be converted from the host format.
-@return DB_SUCCESS or error */
-UNIV_INTERN
-ib_err_t
-ib_tuple_write_i32(
-/*===============*/
- ib_tpl_t ib_tpl, /*!< in/out: tuple to write to */
- int col_no, /*!< in: column number */
- ib_i32_t val) /*!< in: value to write */
-{
- return(ib_col_set_value(ib_tpl, col_no, &val, sizeof(val), true));
-}
-
-/*****************************************************************//**
-Write an integer value to a column. Integers are stored in big-endian
-format and will need to be converted from the host format.
-@return DB_SUCCESS or error */
-UNIV_INTERN
-ib_err_t
-ib_tuple_write_i64(
-/*===============*/
- ib_tpl_t ib_tpl, /*!< in/out: tuple to write to */
- int col_no, /*!< in: column number */
- ib_i64_t val) /*!< in: value to write */
-{
- return(ib_col_set_value(ib_tpl, col_no, &val, sizeof(val), true));
-}
-
-/*****************************************************************//**
-Write an integer value to a column. Integers are stored in big-endian
-format and will need to be converted from the host format.
-@return DB_SUCCESS or error */
-UNIV_INTERN
-ib_err_t
-ib_tuple_write_u8(
-/*==============*/
- ib_tpl_t ib_tpl, /*!< in/out: tuple to write to */
- int col_no, /*!< in: column number */
- ib_u8_t val) /*!< in: value to write */
-{
- return(ib_col_set_value(ib_tpl, col_no, &val, sizeof(val), true));
-}
-
-/*****************************************************************//**
-Write an integer value to a column. Integers are stored in big-endian
-format and will need to be converted from the host format.
-@return DB_SUCCESS or error */
-UNIV_INTERN
-ib_err_t
-ib_tuple_write_u16(
-/*===============*/
- ib_tpl_t ib_tpl, /*!< in/out: tupe to write to */
- int col_no, /*!< in: column number */
- ib_u16_t val) /*!< in: value to write */
-{
- return(ib_col_set_value(ib_tpl, col_no, &val, sizeof(val), true));
-}
-
-/*****************************************************************//**
-Write an integer value to a column. Integers are stored in big-endian
-format and will need to be converted from the host format.
-@return DB_SUCCESS or error */
-UNIV_INTERN
-ib_err_t
-ib_tuple_write_u32(
-/*===============*/
- ib_tpl_t ib_tpl, /*!< in/out: tuple to write to */
- int col_no, /*!< in: column number */
- ib_u32_t val) /*!< in: value to write */
-{
- return(ib_col_set_value(ib_tpl, col_no, &val, sizeof(val), true));
-}
-
-/*****************************************************************//**
-Write an integer value to a column. Integers are stored in big-endian
-format and will need to be converted from the host format.
-@return DB_SUCCESS or error */
-UNIV_INTERN
-ib_err_t
-ib_tuple_write_u64(
-/*===============*/
- ib_tpl_t ib_tpl, /*!< in/out: tuple to write to */
- int col_no, /*!< in: column number */
- ib_u64_t val) /*!< in: value to write */
-{
- return(ib_col_set_value(ib_tpl, col_no, &val, sizeof(val), true));
-}
-
-/*****************************************************************//**
-Inform the cursor that it's the start of an SQL statement. */
-UNIV_INTERN
-void
-ib_cursor_stmt_begin(
-/*=================*/
- ib_crsr_t ib_crsr) /*!< in: cursor */
-{
- ib_cursor_t* cursor = (ib_cursor_t*) ib_crsr;
-
- cursor->prebuilt->sql_stat_start = TRUE;
-}
-
-/*****************************************************************//**
-Write a double value to a column.
-@return DB_SUCCESS or error */
-UNIV_INTERN
-ib_err_t
-ib_tuple_write_double(
-/*==================*/
- ib_tpl_t ib_tpl, /*!< in/out: tuple to write to */
- int col_no, /*!< in: column number */
- double val) /*!< in: value to write */
-{
- const dfield_t* dfield;
- ib_tuple_t* tuple = (ib_tuple_t*) ib_tpl;
-
- dfield = ib_col_get_dfield(tuple, col_no);
-
- if (dtype_get_mtype(dfield_get_type(dfield)) == DATA_DOUBLE) {
- return(ib_col_set_value(ib_tpl, col_no,
- &val, sizeof(val), true));
- } else {
- return(DB_DATA_MISMATCH);
- }
-}
-
-/*************************************************************//**
-Read a double column value from an InnoDB tuple.
-@return DB_SUCCESS or error */
-UNIV_INTERN
-ib_err_t
-ib_tuple_read_double(
-/*=================*/
- ib_tpl_t ib_tpl, /*!< in: InnoDB tuple */
- ib_ulint_t col_no, /*!< in: column number */
- double* dval) /*!< out: double value */
-{
- ib_err_t err;
- const dfield_t* dfield;
- ib_tuple_t* tuple = (ib_tuple_t*) ib_tpl;
-
- dfield = ib_col_get_dfield(tuple, col_no);
-
- if (dtype_get_mtype(dfield_get_type(dfield)) == DATA_DOUBLE) {
- ib_col_copy_value_low(ib_tpl, col_no, dval, sizeof(*dval));
- err = DB_SUCCESS;
- } else {
- err = DB_DATA_MISMATCH;
- }
-
- return(err);
-}
-
-/*****************************************************************//**
-Write a float value to a column.
-@return DB_SUCCESS or error */
-UNIV_INTERN
-ib_err_t
-ib_tuple_write_float(
-/*=================*/
- ib_tpl_t ib_tpl, /*!< in/out: tuple to write to */
- int col_no, /*!< in: column number */
- float val) /*!< in: value to write */
-{
- const dfield_t* dfield;
- ib_tuple_t* tuple = (ib_tuple_t*) ib_tpl;
-
- dfield = ib_col_get_dfield(tuple, col_no);
-
- if (dtype_get_mtype(dfield_get_type(dfield)) == DATA_FLOAT) {
- return(ib_col_set_value(ib_tpl, col_no,
- &val, sizeof(val), true));
- } else {
- return(DB_DATA_MISMATCH);
- }
-}
-
-/*************************************************************//**
-Read a float value from an InnoDB tuple.
-@return DB_SUCCESS or error */
-UNIV_INTERN
-ib_err_t
-ib_tuple_read_float(
-/*================*/
- ib_tpl_t ib_tpl, /*!< in: InnoDB tuple */
- ib_ulint_t col_no, /*!< in: column number */
- float* fval) /*!< out: float value */
-{
- ib_err_t err;
- const dfield_t* dfield;
- ib_tuple_t* tuple = (ib_tuple_t*) ib_tpl;
-
- dfield = ib_col_get_dfield(tuple, col_no);
-
- if (dtype_get_mtype(dfield_get_type(dfield)) == DATA_FLOAT) {
- ib_col_copy_value_low(ib_tpl, col_no, fval, sizeof(*fval));
- err = DB_SUCCESS;
- } else {
- err = DB_DATA_MISMATCH;
- }
-
- return(err);
-}
-
-/*****************************************************************//**
-Truncate a table. The cursor handle will be closed and set to NULL
-on success.
-@return DB_SUCCESS or error code */
-UNIV_INTERN
-ib_err_t
-ib_cursor_truncate(
-/*===============*/
- ib_crsr_t* ib_crsr, /*!< in/out: cursor for table
- to truncate */
- ib_id_u64_t* table_id) /*!< out: new table id */
-{
- ib_err_t err;
- ib_cursor_t* cursor = *(ib_cursor_t**) ib_crsr;
- row_prebuilt_t* prebuilt = cursor->prebuilt;
-
- *table_id = 0;
-
- err = ib_cursor_lock(*ib_crsr, IB_LOCK_X);
-
- if (err == DB_SUCCESS) {
- trx_t* trx;
- dict_table_t* table = prebuilt->table;
-
- /* We are going to free the cursor and the prebuilt. Store
- the transaction handle locally. */
- trx = prebuilt->trx;
- err = ib_cursor_close(*ib_crsr);
- ut_a(err == DB_SUCCESS);
-
- *ib_crsr = NULL;
-
- /* A temp go around for assertion in trx_start_for_ddl_low
- we already start the trx */
- if (trx->state == TRX_STATE_ACTIVE) {
-#ifdef UNIV_DEBUG
- trx->start_file = 0;
-#endif /* UNIV_DEBUG */
- trx->dict_operation = TRX_DICT_OP_TABLE;
- }
-
- /* This function currently commits the transaction
- on success. */
- err = static_cast<ib_err_t>(
- row_truncate_table_for_mysql(table, trx));
-
- if (err == DB_SUCCESS) {
- *table_id = (table->id);
- }
- }
-
- return(err);
-}
-
-/*****************************************************************//**
-Truncate a table.
-@return DB_SUCCESS or error code */
-UNIV_INTERN
-ib_err_t
-ib_table_truncate(
-/*==============*/
- const char* table_name, /*!< in: table name */
- ib_id_u64_t* table_id) /*!< out: new table id */
-{
- ib_err_t err;
- dict_table_t* table;
- ib_err_t trunc_err;
- ib_trx_t ib_trx = NULL;
- ib_crsr_t ib_crsr = NULL;
- ib_ulint_t memcached_sync = 0;
-
- ib_trx = ib_trx_begin(IB_TRX_SERIALIZABLE, true, false);
-
- dict_mutex_enter_for_mysql();
-
- table = dict_table_open_on_name(table_name, TRUE, FALSE,
- DICT_ERR_IGNORE_NONE);
-
- if (table != NULL && dict_table_get_first_index(table)) {
- err = ib_create_cursor_with_index_id(&ib_crsr, table, 0,
- (trx_t*) ib_trx);
- } else {
- err = DB_TABLE_NOT_FOUND;
- }
-
- /* Remember the memcached_sync_count and set it to 0, so the
- truncate can be executed. */
- if (table != NULL && err == DB_SUCCESS) {
- memcached_sync = static_cast<ib_ulint_t>(
- table->memcached_sync_count);
- table->memcached_sync_count = 0;
- }
-
- dict_mutex_exit_for_mysql();
-
- if (err == DB_SUCCESS) {
- trunc_err = ib_cursor_truncate(&ib_crsr, table_id);
- ut_a(err == DB_SUCCESS);
- } else {
- trunc_err = err;
- }
-
- if (ib_crsr != NULL) {
- err = ib_cursor_close(ib_crsr);
- ut_a(err == DB_SUCCESS);
- }
-
- if (trunc_err == DB_SUCCESS) {
- ut_a(ib_trx_state(ib_trx) == static_cast<ib_trx_state_t>(
- TRX_STATE_NOT_STARTED));
- } else {
- err = ib_trx_rollback(ib_trx);
- ut_a(err == DB_SUCCESS);
- }
-
- err = ib_trx_release(ib_trx);
- ut_a(err == DB_SUCCESS);
-
- /* Set the memcached_sync_count back. */
- if (table != NULL && memcached_sync != 0) {
- dict_mutex_enter_for_mysql();
-
- table->memcached_sync_count = memcached_sync;
-
- dict_mutex_exit_for_mysql();
- }
-
- return(trunc_err);
-}
-
-/*****************************************************************//**
-Frees a possible InnoDB trx object associated with the current THD.
-@return 0 or error number */
-UNIV_INTERN
-ib_err_t
-ib_close_thd(
-/*=========*/
- void* thd) /*!< in: handle to the MySQL thread of the user
- whose resources should be free'd */
-{
- innobase_close_thd(static_cast<THD*>(thd));
-
- return(DB_SUCCESS);
-}
-
-/*****************************************************************//**
-Return isolation configuration set by "innodb_api_trx_level"
-@return trx isolation level*/
-UNIV_INTERN
-ib_trx_state_t
-ib_cfg_trx_level()
-/*==============*/
-{
- return(static_cast<ib_trx_state_t>(ib_trx_level_setting));
-}
-
-/*****************************************************************//**
-Return configure value for background commit interval (in seconds)
-@return background commit interval (in seconds) */
-UNIV_INTERN
-ib_ulint_t
-ib_cfg_bk_commit_interval()
-/*=======================*/
-{
- return(static_cast<ib_ulint_t>(ib_bk_commit_interval));
-}
-
-/*****************************************************************//**
-Get generic configure status
-@return configure status*/
-UNIV_INTERN
-int
-ib_cfg_get_cfg()
-/*============*/
-{
- int cfg_status;
-
- cfg_status = (ib_binlog_enabled) ? IB_CFG_BINLOG_ENABLED : 0;
-
- if (ib_mdl_enabled) {
- cfg_status |= IB_CFG_MDL_ENABLED;
- }
-
- if (ib_disable_row_lock) {
- cfg_status |= IB_CFG_DISABLE_ROWLOCK;
- }
-
- return(cfg_status);
-}
-
-/*****************************************************************//**
-Increase/decrease the memcached sync count of table to sync memcached
-DML with SQL DDLs.
-@return DB_SUCCESS or error number */
-UNIV_INTERN
-ib_err_t
-ib_cursor_set_memcached_sync(
-/*=========================*/
- ib_crsr_t ib_crsr, /*!< in: cursor */
- ib_bool_t flag) /*!< in: true for increase */
-{
- const ib_cursor_t* cursor = (const ib_cursor_t*) ib_crsr;
- row_prebuilt_t* prebuilt = cursor->prebuilt;
- dict_table_t* table = prebuilt->table;
- ib_err_t err = DB_SUCCESS;
-
- if (table != NULL) {
- /* If memcached_sync_count is -1, means table is
- doing DDL, we just return error. */
- if (table->memcached_sync_count == DICT_TABLE_IN_DDL) {
- return(DB_ERROR);
- }
-
- if (flag) {
-#ifdef HAVE_ATOMIC_BUILTINS
- os_atomic_increment_lint(&table->memcached_sync_count, 1);
-#else
- dict_mutex_enter_for_mysql();
- ++table->memcached_sync_count;
- dict_mutex_exit_for_mysql();
-#endif
- } else {
-#ifdef HAVE_ATOMIC_BUILTINS
- os_atomic_decrement_lint(&table->memcached_sync_count, 1);
-#else
- dict_mutex_enter_for_mysql();
- --table->memcached_sync_count;
- dict_mutex_exit_for_mysql();
-#endif
- ut_a(table->memcached_sync_count >= 0);
- }
- } else {
- err = DB_TABLE_NOT_FOUND;
- }
-
- return(err);
-}
diff --git a/storage/innobase/api/api0misc.cc b/storage/innobase/api/api0misc.cc
deleted file mode 100644
index b16abf9b84f..00000000000
--- a/storage/innobase/api/api0misc.cc
+++ /dev/null
@@ -1,203 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 2008, 2012, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file api/api0misc.cc
-InnoDB Native API
-
-2008-08-01 Created by Sunny Bains
-3/20/2011 Jimmy Yang extracted from Embedded InnoDB
-*******************************************************/
-
-#include <my_config.h>
-#include <errno.h>
-
-#ifdef HAVE_UNISTD_H
-#include <unistd.h>
-#endif /* HAVE_UNISTD_H */
-
-#include "api0misc.h"
-#include "trx0roll.h"
-#include "srv0srv.h"
-#include "dict0mem.h"
-#include "dict0dict.h"
-#include "pars0pars.h"
-#include "row0sel.h"
-#include "lock0lock.h"
-#include "ha_prototypes.h"
-#include <m_ctype.h>
-#include <mysys_err.h>
-#include <mysql/plugin.h>
-
-/*********************************************************************//**
-Sets a lock on a table.
-@return error code or DB_SUCCESS */
-UNIV_INTERN
-dberr_t
-ib_trx_lock_table_with_retry(
-/*=========================*/
- trx_t* trx, /*!< in/out: transaction */
- dict_table_t* table, /*!< in: table to lock */
- enum lock_mode mode) /*!< in: LOCK_X or LOCK_S */
-{
- que_thr_t* thr;
- dberr_t err;
- mem_heap_t* heap;
- sel_node_t* node;
-
- heap = mem_heap_create(512);
-
- trx->op_info = "setting table lock";
-
- node = sel_node_create(heap);
- thr = pars_complete_graph_for_exec(node, trx, heap);
- thr->graph->state = QUE_FORK_ACTIVE;
-
- /* We use the select query graph as the dummy graph needed
- in the lock module call */
-
- thr = que_fork_get_first_thr(static_cast<que_fork_t*>(
- que_node_get_parent(thr)));
- que_thr_move_to_run_state_for_mysql(thr, trx);
-
-run_again:
- thr->run_node = thr;
- thr->prev_node = thr->common.parent;
-
- err = lock_table(0, table, mode, thr);
-
- trx->error_state = err;
-
- if (UNIV_LIKELY(err == DB_SUCCESS)) {
- que_thr_stop_for_mysql_no_error(thr, trx);
- } else {
- que_thr_stop_for_mysql(thr);
-
- if (err != DB_QUE_THR_SUSPENDED) {
- ibool was_lock_wait;
-
- was_lock_wait = ib_handle_errors(&err, trx, thr, NULL);
-
- if (was_lock_wait) {
- goto run_again;
- }
- } else {
- que_thr_t* run_thr;
- que_node_t* parent;
-
- parent = que_node_get_parent(thr);
- run_thr = que_fork_start_command(
- static_cast<que_fork_t*>(parent));
-
- ut_a(run_thr == thr);
-
- /* There was a lock wait but the thread was not
- in a ready to run or running state. */
- trx->error_state = DB_LOCK_WAIT;
-
- goto run_again;
- }
- }
-
- que_graph_free(thr->graph);
- trx->op_info = "";
-
- return(err);
-}
-/****************************************************************//**
-Handles user errors and lock waits detected by the database engine.
-@return TRUE if it was a lock wait and we should continue running
-the query thread */
-UNIV_INTERN
-ibool
-ib_handle_errors(
-/*=============*/
- dberr_t* new_err,/*!< out: possible new error encountered in
- lock wait, or if no new error, the value
- of trx->error_state at the entry of this
- function */
- trx_t* trx, /*!< in: transaction */
- que_thr_t* thr, /*!< in: query thread */
- trx_savept_t* savept) /*!< in: savepoint or NULL */
-{
- dberr_t err;
-handle_new_error:
- err = trx->error_state;
-
- ut_a(err != DB_SUCCESS);
-
- trx->error_state = DB_SUCCESS;
-
- switch (err) {
- case DB_LOCK_WAIT_TIMEOUT:
- trx_rollback_for_mysql(trx);
- break;
- /* fall through */
- case DB_DUPLICATE_KEY:
- case DB_FOREIGN_DUPLICATE_KEY:
- case DB_TOO_BIG_RECORD:
- case DB_ROW_IS_REFERENCED:
- case DB_NO_REFERENCED_ROW:
- case DB_CANNOT_ADD_CONSTRAINT:
- case DB_TOO_MANY_CONCURRENT_TRXS:
- case DB_OUT_OF_FILE_SPACE:
- if (savept) {
- /* Roll back the latest, possibly incomplete
- insertion or update */
-
- trx_rollback_to_savepoint(trx, savept);
- }
- break;
- case DB_LOCK_WAIT:
- lock_wait_suspend_thread(thr);
-
- if (trx->error_state != DB_SUCCESS) {
- que_thr_stop_for_mysql(thr);
-
- goto handle_new_error;
- }
-
- *new_err = err;
-
- return(TRUE); /* Operation needs to be retried. */
-
- case DB_DEADLOCK:
- case DB_LOCK_TABLE_FULL:
- /* Roll back the whole transaction; this resolution was added
- to version 3.23.43 */
-
- trx_rollback_for_mysql(trx);
- break;
-
- case DB_CORRUPTION:
- case DB_FOREIGN_EXCEED_MAX_CASCADE:
- break;
- default:
- ut_error;
- }
-
- if (trx->error_state != DB_SUCCESS) {
- *new_err = trx->error_state;
- } else {
- *new_err = err;
- }
-
- trx->error_state = DB_SUCCESS;
-
- return(FALSE);
-}
diff --git a/storage/innobase/btr/btr0btr.cc b/storage/innobase/btr/btr0btr.cc
index 02c613cabbf..0a8ffe23cbb 100644
--- a/storage/innobase/btr/btr0btr.cc
+++ b/storage/innobase/btr/btr0btr.cc
@@ -2,7 +2,7 @@
Copyright (c) 1994, 2016, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2012, Facebook Inc.
-Copyright (c) 2014, 2017, MariaDB Corporation.
+Copyright (c) 2014, 2019, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -27,15 +27,10 @@ Created 6/2/1994 Heikki Tuuri
#include "btr0btr.h"
-#ifdef UNIV_NONINL
-#include "btr0btr.ic"
-#endif
-
-#include "fsp0fsp.h"
#include "page0page.h"
#include "page0zip.h"
+#include "gis0rtree.h"
-#ifndef UNIV_HOTBACKUP
#include "btr0cur.h"
#include "btr0sea.h"
#include "btr0pcur.h"
@@ -45,13 +40,16 @@ Created 6/2/1994 Heikki Tuuri
#include "ibuf0ibuf.h"
#include "trx0trx.h"
#include "srv0mon.h"
+#include "gis0geo.h"
+#include "dict0boot.h"
+#include "row0sel.h" /* row_search_max_autoinc() */
/**************************************************************//**
Checks if the page in the cursor can be merged with given page.
If necessary, re-organize the merge_page.
-@return TRUE if possible to merge. */
-UNIV_INTERN
-ibool
+@return true if possible to merge. */
+static
+bool
btr_can_merge_with_page(
/*====================*/
btr_cur_t* cursor, /*!< in: cursor on the page to merge */
@@ -59,603 +57,102 @@ btr_can_merge_with_page(
buf_block_t** merge_block, /*!< out: the merge block */
mtr_t* mtr); /*!< in: mini-transaction */
-#endif /* UNIV_HOTBACKUP */
-
/**************************************************************//**
Report that an index page is corrupted. */
-UNIV_INTERN
void
btr_corruption_report(
/*==================*/
const buf_block_t* block, /*!< in: corrupted block */
const dict_index_t* index) /*!< in: index tree */
{
- fprintf(stderr, "InnoDB: flag mismatch in space %u page %u"
- " index %s of table %s\n",
- (unsigned) buf_block_get_space(block),
- (unsigned) buf_block_get_page_no(block),
- index->name, index->table_name);
- if (block->page.zip.data) {
- buf_page_print(block->page.zip.data,
- buf_block_get_zip_size(block));
- }
- buf_page_print(buf_block_get_frame(block), 0);
-}
-
-#ifndef UNIV_HOTBACKUP
-#ifdef UNIV_BLOB_DEBUG
-# include "srv0srv.h"
-# include "ut0rbt.h"
-
-/** TRUE when messages about index->blobs modification are enabled. */
-static ibool btr_blob_dbg_msg;
-
-/** Issue a message about an operation on index->blobs.
-@param op operation
-@param b the entry being subjected to the operation
-@param ctx the context of the operation */
-#define btr_blob_dbg_msg_issue(op, b, ctx) \
- fprintf(stderr, op " %u:%u:%u->%u %s(%u,%u,%u)\n", \
- (b)->ref_page_no, (b)->ref_heap_no, \
- (b)->ref_field_no, (b)->blob_page_no, ctx, \
- (b)->owner, (b)->always_owner, (b)->del)
-
-/** Insert to index->blobs a reference to an off-page column.
-@param index the index tree
-@param b the reference
-@param ctx context (for logging) */
-UNIV_INTERN
-void
-btr_blob_dbg_rbt_insert(
-/*====================*/
- dict_index_t* index, /*!< in/out: index tree */
- const btr_blob_dbg_t* b, /*!< in: the reference */
- const char* ctx) /*!< in: context (for logging) */
-{
- if (btr_blob_dbg_msg) {
- btr_blob_dbg_msg_issue("insert", b, ctx);
- }
- mutex_enter(&index->blobs_mutex);
- rbt_insert(index->blobs, b, b);
- mutex_exit(&index->blobs_mutex);
-}
-
-/** Remove from index->blobs a reference to an off-page column.
-@param index the index tree
-@param b the reference
-@param ctx context (for logging) */
-UNIV_INTERN
-void
-btr_blob_dbg_rbt_delete(
-/*====================*/
- dict_index_t* index, /*!< in/out: index tree */
- const btr_blob_dbg_t* b, /*!< in: the reference */
- const char* ctx) /*!< in: context (for logging) */
-{
- if (btr_blob_dbg_msg) {
- btr_blob_dbg_msg_issue("delete", b, ctx);
- }
- mutex_enter(&index->blobs_mutex);
- ut_a(rbt_delete(index->blobs, b));
- mutex_exit(&index->blobs_mutex);
-}
-
-/**************************************************************//**
-Comparator for items (btr_blob_dbg_t) in index->blobs.
-The key in index->blobs is (ref_page_no, ref_heap_no, ref_field_no).
-@return negative, 0 or positive if *a<*b, *a=*b, *a>*b */
-static
-int
-btr_blob_dbg_cmp(
-/*=============*/
- const void* a, /*!< in: first btr_blob_dbg_t to compare */
- const void* b) /*!< in: second btr_blob_dbg_t to compare */
-{
- const btr_blob_dbg_t* aa = static_cast<const btr_blob_dbg_t*>(a);
- const btr_blob_dbg_t* bb = static_cast<const btr_blob_dbg_t*>(b);
-
- ut_ad(aa != NULL);
- ut_ad(bb != NULL);
-
- if (aa->ref_page_no != bb->ref_page_no) {
- return(aa->ref_page_no < bb->ref_page_no ? -1 : 1);
- }
- if (aa->ref_heap_no != bb->ref_heap_no) {
- return(aa->ref_heap_no < bb->ref_heap_no ? -1 : 1);
- }
- if (aa->ref_field_no != bb->ref_field_no) {
- return(aa->ref_field_no < bb->ref_field_no ? -1 : 1);
- }
- return(0);
-}
-
-/**************************************************************//**
-Add a reference to an off-page column to the index->blobs map. */
-UNIV_INTERN
-void
-btr_blob_dbg_add_blob(
-/*==================*/
- const rec_t* rec, /*!< in: clustered index record */
- ulint field_no, /*!< in: off-page column number */
- ulint page_no, /*!< in: start page of the column */
- dict_index_t* index, /*!< in/out: index tree */
- const char* ctx) /*!< in: context (for logging) */
-{
- btr_blob_dbg_t b;
- const page_t* page = page_align(rec);
-
- ut_a(index->blobs);
-
- b.blob_page_no = page_no;
- b.ref_page_no = page_get_page_no(page);
- b.ref_heap_no = page_rec_get_heap_no(rec);
- b.ref_field_no = field_no;
- ut_a(b.ref_field_no >= index->n_uniq);
- b.always_owner = b.owner = TRUE;
- b.del = FALSE;
- ut_a(!rec_get_deleted_flag(rec, page_is_comp(page)));
- btr_blob_dbg_rbt_insert(index, &b, ctx);
+ ib::error()
+ << "Flag mismatch in page " << block->page.id
+ << " index " << index->name
+ << " of table " << index->table->name;
}
-/**************************************************************//**
-Add to index->blobs any references to off-page columns from a record.
-@return number of references added */
-UNIV_INTERN
-ulint
-btr_blob_dbg_add_rec(
-/*=================*/
- const rec_t* rec, /*!< in: record */
- dict_index_t* index, /*!< in/out: index */
- const ulint* offsets,/*!< in: offsets */
- const char* ctx) /*!< in: context (for logging) */
-{
- ulint count = 0;
- ulint i;
- btr_blob_dbg_t b;
- ibool del;
-
- ut_ad(rec_offs_validate(rec, index, offsets));
-
- if (!rec_offs_any_extern(offsets)) {
- return(0);
- }
+/*
+Latching strategy of the InnoDB B-tree
+--------------------------------------
- b.ref_page_no = page_get_page_no(page_align(rec));
- b.ref_heap_no = page_rec_get_heap_no(rec);
- del = (rec_get_deleted_flag(rec, rec_offs_comp(offsets)) != 0);
+Node pointer page latches acquisition is protected by index->lock latch.
- for (i = 0; i < rec_offs_n_fields(offsets); i++) {
- if (rec_offs_nth_extern(offsets, i)) {
- ulint len;
- const byte* field_ref = rec_get_nth_field(
- rec, offsets, i, &len);
+Before MariaDB 10.2.2, all node pointer pages were protected by index->lock
+either in S (shared) or X (exclusive) mode and block->lock was not acquired on
+node pointer pages.
- ut_a(len != UNIV_SQL_NULL);
- ut_a(len >= BTR_EXTERN_FIELD_REF_SIZE);
- field_ref += len - BTR_EXTERN_FIELD_REF_SIZE;
+After MariaDB 10.2.2, block->lock S-latch or X-latch is used to protect
+node pointer pages and obtaiment of node pointer page latches is protected by
+index->lock.
- if (!memcmp(field_ref, field_ref_zero,
- BTR_EXTERN_FIELD_REF_SIZE)) {
- /* the column has not been stored yet */
- continue;
- }
+(0) Definition: B-tree level.
- b.ref_field_no = i;
- b.blob_page_no = mach_read_from_4(
- field_ref + BTR_EXTERN_PAGE_NO);
- ut_a(b.ref_field_no >= index->n_uniq);
- b.always_owner = b.owner
- = !(field_ref[BTR_EXTERN_LEN]
- & BTR_EXTERN_OWNER_FLAG);
- b.del = del;
-
- btr_blob_dbg_rbt_insert(index, &b, ctx);
- count++;
- }
- }
+(0.1) The leaf pages of the B-tree are at level 0.
- return(count);
-}
+(0.2) The parent of a page at level L has level L+1. (The level of the
+root page is equal to the tree height.)
-/**************************************************************//**
-Display the references to off-page columns.
-This function is to be called from a debugger,
-for example when a breakpoint on ut_dbg_assertion_failed is hit. */
-UNIV_INTERN
-void
-btr_blob_dbg_print(
-/*===============*/
- const dict_index_t* index) /*!< in: index tree */
-{
- const ib_rbt_node_t* node;
+(0.3) The B-tree lock (index->lock) is the parent of the root page and
+has a level = tree height + 1.
- if (!index->blobs) {
- return;
- }
+Index->lock has 3 possible locking modes:
- /* We intentionally do not acquire index->blobs_mutex here.
- This function is to be called from a debugger, and the caller
- should make sure that the index->blobs_mutex is held. */
+(1) S-latch:
- for (node = rbt_first(index->blobs);
- node != NULL; node = rbt_next(index->blobs, node)) {
- const btr_blob_dbg_t* b
- = rbt_value(btr_blob_dbg_t, node);
- fprintf(stderr, "%u:%u:%u->%u%s%s%s\n",
- b->ref_page_no, b->ref_heap_no, b->ref_field_no,
- b->blob_page_no,
- b->owner ? "" : "(disowned)",
- b->always_owner ? "" : "(has disowned)",
- b->del ? "(deleted)" : "");
- }
-}
+(1.1) All latches for pages must be obtained in descending order of tree level.
-/**************************************************************//**
-Remove from index->blobs any references to off-page columns from a record.
-@return number of references removed */
-UNIV_INTERN
-ulint
-btr_blob_dbg_remove_rec(
-/*====================*/
- const rec_t* rec, /*!< in: record */
- dict_index_t* index, /*!< in/out: index */
- const ulint* offsets,/*!< in: offsets */
- const char* ctx) /*!< in: context (for logging) */
-{
- ulint i;
- ulint count = 0;
- btr_blob_dbg_t b;
-
- ut_ad(rec_offs_validate(rec, index, offsets));
-
- if (!rec_offs_any_extern(offsets)) {
- return(0);
- }
-
- b.ref_page_no = page_get_page_no(page_align(rec));
- b.ref_heap_no = page_rec_get_heap_no(rec);
-
- for (i = 0; i < rec_offs_n_fields(offsets); i++) {
- if (rec_offs_nth_extern(offsets, i)) {
- ulint len;
- const byte* field_ref = rec_get_nth_field(
- rec, offsets, i, &len);
-
- ut_a(len != UNIV_SQL_NULL);
- ut_a(len >= BTR_EXTERN_FIELD_REF_SIZE);
- field_ref += len - BTR_EXTERN_FIELD_REF_SIZE;
-
- b.ref_field_no = i;
- b.blob_page_no = mach_read_from_4(
- field_ref + BTR_EXTERN_PAGE_NO);
-
- switch (b.blob_page_no) {
- case 0:
- /* The column has not been stored yet.
- The BLOB pointer must be all zero.
- There cannot be a BLOB starting at
- page 0, because page 0 is reserved for
- the tablespace header. */
- ut_a(!memcmp(field_ref, field_ref_zero,
- BTR_EXTERN_FIELD_REF_SIZE));
- /* fall through */
- case FIL_NULL:
- /* the column has been freed already */
- continue;
- }
+(1.2) Before obtaining the first node pointer page latch at a given B-tree
+level, parent latch must be held (at level +1 ).
- btr_blob_dbg_rbt_delete(index, &b, ctx);
- count++;
- }
- }
+(1.3) If a node pointer page is already latched at the same level
+we can only obtain latch to its right sibling page latch at the same level.
- return(count);
-}
+(1.4) Release of the node pointer page latches must be done in
+child-to-parent order. (Prevents deadlocks when obtained index->lock
+in SX mode).
-/**************************************************************//**
-Check that there are no references to off-page columns from or to
-the given page. Invoked when freeing or clearing a page.
-@return TRUE when no orphan references exist */
-UNIV_INTERN
-ibool
-btr_blob_dbg_is_empty(
-/*==================*/
- dict_index_t* index, /*!< in: index */
- ulint page_no) /*!< in: page number */
-{
- const ib_rbt_node_t* node;
- ibool success = TRUE;
+(1.4.1) Level L node pointer page latch can be released only when
+no latches at children level i.e. level < L are hold.
- if (!index->blobs) {
- return(success);
- }
+(1.4.2) All latches from node pointer pages must be released so
+that no latches are obtained between.
- mutex_enter(&index->blobs_mutex);
+(1.5) [implied by (1.1), (1.2)] Root page latch must be first node pointer
+latch obtained.
- for (node = rbt_first(index->blobs);
- node != NULL; node = rbt_next(index->blobs, node)) {
- const btr_blob_dbg_t* b
- = rbt_value(btr_blob_dbg_t, node);
+(2) SX-latch:
- if (b->ref_page_no != page_no && b->blob_page_no != page_no) {
- continue;
- }
+In this case rules (1.2) and (1.3) from S-latch case are relaxed and
+merged into (2.2) and rule (1.4) is removed. Thus, latch acquisition
+can be skipped at some tree levels and latches can be obtained in
+a less restricted order.
- fprintf(stderr,
- "InnoDB: orphan BLOB ref%s%s%s %u:%u:%u->%u\n",
- b->owner ? "" : "(disowned)",
- b->always_owner ? "" : "(has disowned)",
- b->del ? "(deleted)" : "",
- b->ref_page_no, b->ref_heap_no, b->ref_field_no,
- b->blob_page_no);
-
- if (b->blob_page_no != page_no || b->owner || !b->del) {
- success = FALSE;
- }
- }
+(2.1) [identical to (1.1)]: All latches for pages must be obtained in descending
+order of tree level.
- mutex_exit(&index->blobs_mutex);
- return(success);
-}
+(2.2) When a node pointer latch at level L is obtained,
+the left sibling page latch in the same level or some ancestor
+page latch (at level > L) must be hold.
-/**************************************************************//**
-Count and process all references to off-page columns on a page.
-@return number of references processed */
-UNIV_INTERN
-ulint
-btr_blob_dbg_op(
-/*============*/
- const page_t* page, /*!< in: B-tree leaf page */
- const rec_t* rec, /*!< in: record to start from
- (NULL to process the whole page) */
- dict_index_t* index, /*!< in/out: index */
- const char* ctx, /*!< in: context (for logging) */
- const btr_blob_dbg_op_f op) /*!< in: operation on records */
-{
- ulint count = 0;
- mem_heap_t* heap = NULL;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- ulint* offsets = offsets_;
- rec_offs_init(offsets_);
+(2.3) [implied by (2.1), (2.2)] The first node pointer page latch obtained can
+be any node pointer page.
- ut_a(fil_page_get_type(page) == FIL_PAGE_INDEX);
- ut_a(!rec || page_align(rec) == page);
+(3) X-latch:
- if (!index->blobs || !page_is_leaf(page)
- || !dict_index_is_clust(index)) {
- return(0);
- }
+Node pointer latches can be obtained in any order.
- if (rec == NULL) {
- rec = page_get_infimum_rec(page);
- }
+NOTE: New rules after MariaDB 10.2.2 does not affect the latching rules of leaf pages:
- do {
- offsets = rec_get_offsets(rec, index, offsets,
- ULINT_UNDEFINED, &heap);
- count += op(rec, index, offsets, ctx);
- rec = page_rec_get_next_const(rec);
- } while (!page_rec_is_supremum(rec));
+index->lock S-latch is needed in read for the node pointer traversal. When the leaf
+level is reached, index-lock can be released (and with the MariaDB 10.2.2 changes, all
+node pointer latches). Left to right index travelsal in leaf page level can be safely done
+by obtaining right sibling leaf page latch and then releasing the old page latch.
- if (heap) {
- mem_heap_free(heap);
- }
+Single leaf page modifications (BTR_MODIFY_LEAF) are protected by index->lock
+S-latch.
- return(count);
-}
-
-/**************************************************************//**
-Count and add to index->blobs any references to off-page columns
-from records on a page.
-@return number of references added */
-UNIV_INTERN
-ulint
-btr_blob_dbg_add(
-/*=============*/
- const page_t* page, /*!< in: rewritten page */
- dict_index_t* index, /*!< in/out: index */
- const char* ctx) /*!< in: context (for logging) */
-{
- btr_blob_dbg_assert_empty(index, page_get_page_no(page));
-
- return(btr_blob_dbg_op(page, NULL, index, ctx, btr_blob_dbg_add_rec));
-}
-
-/**************************************************************//**
-Count and remove from index->blobs any references to off-page columns
-from records on a page.
-Used when reorganizing a page, before copying the records.
-@return number of references removed */
-UNIV_INTERN
-ulint
-btr_blob_dbg_remove(
-/*================*/
- const page_t* page, /*!< in: b-tree page */
- dict_index_t* index, /*!< in/out: index */
- const char* ctx) /*!< in: context (for logging) */
-{
- ulint count;
-
- count = btr_blob_dbg_op(page, NULL, index, ctx,
- btr_blob_dbg_remove_rec);
-
- /* Check that no references exist. */
- btr_blob_dbg_assert_empty(index, page_get_page_no(page));
-
- return(count);
-}
-
-/**************************************************************//**
-Restore in index->blobs any references to off-page columns
-Used when page reorganize fails due to compressed page overflow. */
-UNIV_INTERN
-void
-btr_blob_dbg_restore(
-/*=================*/
- const page_t* npage, /*!< in: page that failed to compress */
- const page_t* page, /*!< in: copy of original page */
- dict_index_t* index, /*!< in/out: index */
- const char* ctx) /*!< in: context (for logging) */
-{
- ulint removed;
- ulint added;
-
- ut_a(page_get_page_no(npage) == page_get_page_no(page));
- ut_a(page_get_space_id(npage) == page_get_space_id(page));
-
- removed = btr_blob_dbg_remove(npage, index, ctx);
- added = btr_blob_dbg_add(page, index, ctx);
- ut_a(added == removed);
-}
-
-/**************************************************************//**
-Modify the 'deleted' flag of a record. */
-UNIV_INTERN
-void
-btr_blob_dbg_set_deleted_flag(
-/*==========================*/
- const rec_t* rec, /*!< in: record */
- dict_index_t* index, /*!< in/out: index */
- const ulint* offsets,/*!< in: rec_get_offs(rec, index) */
- ibool del) /*!< in: TRUE=deleted, FALSE=exists */
-{
- const ib_rbt_node_t* node;
- btr_blob_dbg_t b;
- btr_blob_dbg_t* c;
- ulint i;
-
- ut_ad(rec_offs_validate(rec, index, offsets));
- ut_a(dict_index_is_clust(index));
- ut_a(del == !!del);/* must be FALSE==0 or TRUE==1 */
-
- if (!rec_offs_any_extern(offsets) || !index->blobs) {
-
- return;
- }
-
- b.ref_page_no = page_get_page_no(page_align(rec));
- b.ref_heap_no = page_rec_get_heap_no(rec);
-
- for (i = 0; i < rec_offs_n_fields(offsets); i++) {
- if (rec_offs_nth_extern(offsets, i)) {
- ulint len;
- const byte* field_ref = rec_get_nth_field(
- rec, offsets, i, &len);
-
- ut_a(len != UNIV_SQL_NULL);
- ut_a(len >= BTR_EXTERN_FIELD_REF_SIZE);
- field_ref += len - BTR_EXTERN_FIELD_REF_SIZE;
-
- b.ref_field_no = i;
- b.blob_page_no = mach_read_from_4(
- field_ref + BTR_EXTERN_PAGE_NO);
-
- switch (b.blob_page_no) {
- case 0:
- ut_a(memcmp(field_ref, field_ref_zero,
- BTR_EXTERN_FIELD_REF_SIZE));
- /* page number 0 is for the
- page allocation bitmap */
- case FIL_NULL:
- /* the column has been freed already */
- ut_error;
- }
-
- mutex_enter(&index->blobs_mutex);
- node = rbt_lookup(index->blobs, &b);
- ut_a(node);
-
- c = rbt_value(btr_blob_dbg_t, node);
- /* The flag should be modified. */
- c->del = del;
- if (btr_blob_dbg_msg) {
- b = *c;
- mutex_exit(&index->blobs_mutex);
- btr_blob_dbg_msg_issue("del_mk", &b, "");
- } else {
- mutex_exit(&index->blobs_mutex);
- }
- }
- }
-}
-
-/**************************************************************//**
-Change the ownership of an off-page column. */
-UNIV_INTERN
-void
-btr_blob_dbg_owner(
-/*===============*/
- const rec_t* rec, /*!< in: record */
- dict_index_t* index, /*!< in/out: index */
- const ulint* offsets,/*!< in: rec_get_offs(rec, index) */
- ulint i, /*!< in: ith field in rec */
- ibool own) /*!< in: TRUE=owned, FALSE=disowned */
-{
- const ib_rbt_node_t* node;
- btr_blob_dbg_t b;
- const byte* field_ref;
- ulint len;
-
- ut_ad(rec_offs_validate(rec, index, offsets));
- ut_a(rec_offs_nth_extern(offsets, i));
-
- field_ref = rec_get_nth_field(rec, offsets, i, &len);
- ut_a(len != UNIV_SQL_NULL);
- ut_a(len >= BTR_EXTERN_FIELD_REF_SIZE);
- field_ref += len - BTR_EXTERN_FIELD_REF_SIZE;
-
- b.ref_page_no = page_get_page_no(page_align(rec));
- b.ref_heap_no = page_rec_get_heap_no(rec);
- b.ref_field_no = i;
- b.owner = !(field_ref[BTR_EXTERN_LEN] & BTR_EXTERN_OWNER_FLAG);
- b.blob_page_no = mach_read_from_4(field_ref + BTR_EXTERN_PAGE_NO);
-
- ut_a(b.owner == own);
-
- mutex_enter(&index->blobs_mutex);
- node = rbt_lookup(index->blobs, &b);
- /* row_ins_clust_index_entry_by_modify() invokes
- btr_cur_unmark_extern_fields() also for the newly inserted
- references, which are all zero bytes until the columns are stored.
- The node lookup must fail if and only if that is the case. */
- ut_a(!memcmp(field_ref, field_ref_zero, BTR_EXTERN_FIELD_REF_SIZE)
- == !node);
-
- if (node) {
- btr_blob_dbg_t* c = rbt_value(btr_blob_dbg_t, node);
- /* Some code sets ownership from TRUE to TRUE.
- We do not allow changing ownership from FALSE to FALSE. */
- ut_a(own || c->owner);
-
- c->owner = own;
- if (!own) {
- c->always_owner = FALSE;
- }
- }
-
- mutex_exit(&index->blobs_mutex);
-}
-#endif /* UNIV_BLOB_DEBUG */
-
-/*
-Latching strategy of the InnoDB B-tree
---------------------------------------
-A tree latch protects all non-leaf nodes of the tree. Each node of a tree
-also has a latch of its own.
-
-A B-tree operation normally first acquires an S-latch on the tree. It
-searches down the tree and releases the tree latch when it has the
-leaf node latch. To save CPU time we do not acquire any latch on
-non-leaf nodes of the tree during a search, those pages are only bufferfixed.
-
-If an operation needs to restructure the tree, it acquires an X-latch on
-the tree before searching to a leaf node. If it needs, for example, to
-split a leaf,
-(1) InnoDB decides the split point in the leaf,
-(2) allocates a new page,
-(3) inserts the appropriate node pointer to the first non-leaf level,
-(4) releases the tree X-latch,
-(5) and then moves records from the leaf to the new allocated page.
+B-tree operations involving page splits or merges (BTR_MODIFY_TREE) and page
+allocations are protected by index->lock X-latch.
Node pointers
-------------
@@ -695,7 +192,7 @@ we allocate pages for the non-leaf levels of the tree.
#ifdef UNIV_BTR_DEBUG
/**************************************************************//**
Checks a file segment header within a B-tree root page.
-@return TRUE if valid */
+@return TRUE if valid */
static
ibool
btr_root_fseg_validate(
@@ -714,8 +211,7 @@ btr_root_fseg_validate(
/**************************************************************//**
Gets the root node of a tree and x- or s-latches it.
-@return root page, x- or s-latched */
-static
+@return root page, x- or s-latched */
buf_block_t*
btr_root_block_get(
/*===============*/
@@ -724,16 +220,12 @@ btr_root_block_get(
or RW_X_LATCH */
mtr_t* mtr) /*!< in: mtr */
{
- ulint space;
- ulint zip_size;
- ulint root_page_no;
- buf_block_t* block;
+ const ulint space = dict_index_get_space(index);
+ const page_id_t page_id(space, dict_index_get_page(index));
+ const page_size_t page_size(dict_table_page_size(index->table));
- space = dict_index_get_space(index);
- zip_size = dict_table_zip_size(index->table);
- root_page_no = dict_index_get_page(index);
-
- block = btr_block_get(space, zip_size, root_page_no, mode, (dict_index_t*)index, mtr);
+ buf_block_t* block = btr_block_get(page_id, page_size, mode,
+ index, mtr);
if (!block) {
if (index && index->table) {
@@ -744,7 +236,7 @@ btr_root_block_get(
"Table %s in tablespace %lu is encrypted but encryption service or"
" used key_id is not available. "
" Can't continue reading table.",
- index->table->name, space);
+ index->table->name.m_name, space);
}
return NULL;
@@ -767,22 +259,19 @@ btr_root_block_get(
}
/**************************************************************//**
-Gets the root node of a tree and x-latches it.
-@return root page, x-latched */
-UNIV_INTERN
+Gets the root node of a tree and sx-latches it for segment access.
+@return root page, sx-latched */
page_t*
btr_root_get(
/*=========*/
const dict_index_t* index, /*!< in: index tree */
mtr_t* mtr) /*!< in: mtr */
{
- buf_block_t* root = btr_root_block_get(index, RW_X_LATCH,
- mtr);
-
- if (root && root->page.encrypted == true) {
- root = NULL;
- }
-
+ /* Intended to be used for segment list access.
+ SX lock doesn't block reading user data by other threads.
+ And block the segment list access by others.*/
+ buf_block_t* root = btr_root_block_get(index, RW_SX_LATCH,
+ mtr);
return(root ? buf_block_get_frame(root) : NULL);
}
@@ -790,8 +279,7 @@ btr_root_get(
Gets the height of the B-tree (the level of the root, when the leaf
level is assumed to be 0). The caller must hold an S or X latch on
the index.
-@return tree height (level of the root) */
-UNIV_INTERN
+@return tree height (level of the root) */
ulint
btr_height_get(
/*===========*/
@@ -801,22 +289,22 @@ btr_height_get(
ulint height=0;
buf_block_t* root_block;
- ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(index),
- MTR_MEMO_S_LOCK)
- || mtr_memo_contains(mtr, dict_index_get_lock(index),
- MTR_MEMO_X_LOCK));
+ ut_ad(srv_read_only_mode
+ || mtr_memo_contains_flagged(mtr, dict_index_get_lock(index),
+ MTR_MEMO_S_LOCK
+ | MTR_MEMO_X_LOCK
+ | MTR_MEMO_SX_LOCK));
- /* S latches the page */
- root_block = btr_root_block_get(index, RW_S_LATCH, mtr);
+ /* S latches the page */
+ root_block = btr_root_block_get(index, RW_S_LATCH, mtr);
if (root_block) {
height = btr_page_get_level(buf_block_get_frame(root_block), mtr);
/* Release the S latch on the root page. */
- mtr_memo_release(mtr, root_block, MTR_MEMO_PAGE_S_FIX);
-#ifdef UNIV_SYNC_DEBUG
- sync_thread_reset_level(&root_block->lock);
-#endif /* UNIV_SYNC_DEBUG */
+ mtr->memo_release(root_block, MTR_MEMO_PAGE_S_FIX);
+
+ ut_d(sync_check_unlock(&root_block->lock));
}
return(height);
@@ -825,7 +313,7 @@ btr_height_get(
/**************************************************************//**
Checks a file segment header within a B-tree root page and updates
the segment header space id.
-@return TRUE if valid */
+@return TRUE if valid */
static
bool
btr_root_fseg_adjust_on_import(
@@ -833,67 +321,57 @@ btr_root_fseg_adjust_on_import(
fseg_header_t* seg_header, /*!< in/out: segment header */
page_zip_des_t* page_zip, /*!< in/out: compressed page,
or NULL */
- ulint space, /*!< in: tablespace identifier */
- mtr_t* mtr) /*!< in/out: mini-transaction */
+ ulint space) /*!< in: tablespace identifier */
{
ulint offset = mach_read_from_2(seg_header + FSEG_HDR_OFFSET);
if (offset < FIL_PAGE_DATA
- || offset > UNIV_PAGE_SIZE - FIL_PAGE_DATA_END) {
+ || offset > srv_page_size - FIL_PAGE_DATA_END) {
+ return false;
+ }
- return(FALSE);
+ seg_header += FSEG_HDR_SPACE;
- } else if (page_zip) {
- mach_write_to_4(seg_header + FSEG_HDR_SPACE, space);
- page_zip_write_header(page_zip, seg_header + FSEG_HDR_SPACE,
- 4, mtr);
- } else {
- mlog_write_ulint(seg_header + FSEG_HDR_SPACE,
- space, MLOG_4BYTES, mtr);
+ mach_write_to_4(seg_header, space);
+ if (UNIV_LIKELY_NULL(page_zip)) {
+ memcpy(page_zip->data + page_offset(seg_header), seg_header,
+ 4);
}
- return(TRUE);
+ return true;
}
/**************************************************************//**
Checks and adjusts the root node of a tree during IMPORT TABLESPACE.
@return error code, or DB_SUCCESS */
-UNIV_INTERN
dberr_t
btr_root_adjust_on_import(
/*======================*/
const dict_index_t* index) /*!< in: index tree */
{
- dberr_t err;
- mtr_t mtr;
- page_t* page;
- buf_block_t* block;
- page_zip_des_t* page_zip;
- dict_table_t* table = index->table;
- ulint space_id = dict_index_get_space(index);
- ulint zip_size = dict_table_zip_size(table);
- ulint root_page_no = dict_index_get_page(index);
+ dberr_t err;
+ mtr_t mtr;
+ page_t* page;
+ buf_block_t* block;
+ page_zip_des_t* page_zip;
+ dict_table_t* table = index->table;
+ const ulint space_id = dict_index_get_space(index);
+ const page_id_t page_id(space_id, dict_index_get_page(index));
+ const page_size_t page_size(dict_table_page_size(table));
+
+ DBUG_EXECUTE_IF("ib_import_trigger_corruption_3",
+ return(DB_CORRUPTION););
mtr_start(&mtr);
mtr_set_log_mode(&mtr, MTR_LOG_NO_REDO);
- DBUG_EXECUTE_IF("ib_import_trigger_corruption_3",
- return(DB_CORRUPTION););
-
- block = btr_block_get(
- space_id, zip_size, root_page_no, RW_X_LATCH, (dict_index_t*)index, &mtr);
+ block = btr_block_get(page_id, page_size, RW_X_LATCH, index, &mtr);
page = buf_block_get_frame(block);
page_zip = buf_block_get_page_zip(block);
- /* Check that this is a B-tree page and both the PREV and NEXT
- pointers are FIL_NULL, because the root page does not have any
- siblings. */
- if (fil_page_get_type(page) != FIL_PAGE_INDEX
- || fil_page_get_prev(page) != FIL_NULL
- || fil_page_get_next(page) != FIL_NULL) {
-
+ if (!fil_page_index_page_check(page) || page_has_siblings(page)) {
err = DB_CORRUPTION;
} else if (dict_index_is_clust(index)) {
@@ -905,18 +383,12 @@ btr_root_adjust_on_import(
if (page_is_compact_format != dict_table_is_comp(table)) {
err = DB_CORRUPTION;
} else {
-
/* Check that the table flags and the tablespace
flags match. */
- ulint flags = fil_space_get_flags(table->space);
-
- if (flags
- && flags != dict_tf_to_fsp_flags(table->flags)) {
-
- err = DB_CORRUPTION;
- } else {
- err = DB_SUCCESS;
- }
+ ulint flags = dict_tf_to_fsp_flags(table->flags);
+ ulint fsp_flags = fil_space_get_flags(table->space);
+ err = flags == fsp_flags
+ ? DB_SUCCESS : DB_CORRUPTION;
}
} else {
err = DB_SUCCESS;
@@ -926,10 +398,10 @@ btr_root_adjust_on_import(
if (err == DB_SUCCESS
&& (!btr_root_fseg_adjust_on_import(
FIL_PAGE_DATA + PAGE_BTR_SEG_LEAF
- + page, page_zip, space_id, &mtr)
+ + page, page_zip, space_id)
|| !btr_root_fseg_adjust_on_import(
FIL_PAGE_DATA + PAGE_BTR_SEG_TOP
- + page, page_zip, space_id, &mtr))) {
+ + page, page_zip, space_id))) {
err = DB_CORRUPTION;
}
@@ -939,124 +411,9 @@ btr_root_adjust_on_import(
return(err);
}
-/*************************************************************//**
-Gets pointer to the previous user record in the tree. It is assumed that
-the caller has appropriate latches on the page and its neighbor.
-@return previous user record, NULL if there is none */
-UNIV_INTERN
-rec_t*
-btr_get_prev_user_rec(
-/*==================*/
- rec_t* rec, /*!< in: record on leaf level */
- mtr_t* mtr) /*!< in: mtr holding a latch on the page, and if
- needed, also to the previous page */
-{
- page_t* page;
- page_t* prev_page;
- ulint prev_page_no;
-
- if (!page_rec_is_infimum(rec)) {
-
- rec_t* prev_rec = page_rec_get_prev(rec);
-
- if (!page_rec_is_infimum(prev_rec)) {
-
- return(prev_rec);
- }
- }
-
- page = page_align(rec);
- prev_page_no = btr_page_get_prev(page, mtr);
-
- if (prev_page_no != FIL_NULL) {
-
- ulint space;
- ulint zip_size;
- buf_block_t* prev_block;
-
- space = page_get_space_id(page);
- zip_size = fil_space_get_zip_size(space);
-
- prev_block = buf_page_get_with_no_latch(space, zip_size,
- prev_page_no, mtr);
- prev_page = buf_block_get_frame(prev_block);
- /* The caller must already have a latch to the brother */
- ut_ad(mtr_memo_contains(mtr, prev_block,
- MTR_MEMO_PAGE_S_FIX)
- || mtr_memo_contains(mtr, prev_block,
- MTR_MEMO_PAGE_X_FIX));
-#ifdef UNIV_BTR_DEBUG
- ut_a(page_is_comp(prev_page) == page_is_comp(page));
- ut_a(btr_page_get_next(prev_page, mtr)
- == page_get_page_no(page));
-#endif /* UNIV_BTR_DEBUG */
-
- return(page_rec_get_prev(page_get_supremum_rec(prev_page)));
- }
-
- return(NULL);
-}
-
-/*************************************************************//**
-Gets pointer to the next user record in the tree. It is assumed that the
-caller has appropriate latches on the page and its neighbor.
-@return next user record, NULL if there is none */
-UNIV_INTERN
-rec_t*
-btr_get_next_user_rec(
-/*==================*/
- rec_t* rec, /*!< in: record on leaf level */
- mtr_t* mtr) /*!< in: mtr holding a latch on the page, and if
- needed, also to the next page */
-{
- page_t* page;
- page_t* next_page;
- ulint next_page_no;
-
- if (!page_rec_is_supremum(rec)) {
-
- rec_t* next_rec = page_rec_get_next(rec);
-
- if (!page_rec_is_supremum(next_rec)) {
-
- return(next_rec);
- }
- }
-
- page = page_align(rec);
- next_page_no = btr_page_get_next(page, mtr);
-
- if (next_page_no != FIL_NULL) {
- ulint space;
- ulint zip_size;
- buf_block_t* next_block;
-
- space = page_get_space_id(page);
- zip_size = fil_space_get_zip_size(space);
-
- next_block = buf_page_get_with_no_latch(space, zip_size,
- next_page_no, mtr);
- next_page = buf_block_get_frame(next_block);
- /* The caller must already have a latch to the brother */
- ut_ad(mtr_memo_contains(mtr, next_block, MTR_MEMO_PAGE_S_FIX)
- || mtr_memo_contains(mtr, next_block,
- MTR_MEMO_PAGE_X_FIX));
-#ifdef UNIV_BTR_DEBUG
- ut_a(page_is_comp(next_page) == page_is_comp(page));
- ut_a(btr_page_get_prev(next_page, mtr)
- == page_get_page_no(page));
-#endif /* UNIV_BTR_DEBUG */
-
- return(page_rec_get_next(page_get_infimum_rec(next_page)));
- }
-
- return(NULL);
-}
-
/**************************************************************//**
Creates a new index page (not the root, and also not
used in page reorganization). @see btr_page_empty(). */
-static
void
btr_page_create(
/*============*/
@@ -1068,18 +425,21 @@ btr_page_create(
{
page_t* page = buf_block_get_frame(block);
- ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
- btr_blob_dbg_assert_empty(index, buf_block_get_page_no(block));
+ ut_ad(mtr_is_block_fix(mtr, block, MTR_MEMO_PAGE_X_FIX, index->table));
if (page_zip) {
- page_create_zip(block, index, level, 0, mtr);
+ page_create_zip(block, index, level, 0, NULL, mtr);
} else {
- page_create(block, mtr, dict_table_is_comp(index->table));
+ page_create(block, mtr, dict_table_is_comp(index->table),
+ dict_index_is_spatial(index));
/* Set the level of the new index page */
btr_page_set_level(page, NULL, level, mtr);
}
- block->check_index_page_at_flush = TRUE;
+ /* For Spatial Index, initialize the Split Sequence Number */
+ if (dict_index_is_spatial(index)) {
+ page_set_ssn_id(block, page_zip, 0, mtr);
+ }
btr_page_set_index_id(page, page_zip, index->id, mtr);
}
@@ -1087,7 +447,7 @@ btr_page_create(
/**************************************************************//**
Allocates a new file page to be used in an ibuf tree. Takes the page from
the free list of the tree, which must contain pages!
-@return new allocated block, x-latched */
+@return new allocated block, x-latched */
static
buf_block_t*
btr_page_alloc_for_ibuf(
@@ -1106,9 +466,11 @@ btr_page_alloc_for_ibuf(
+ PAGE_BTR_IBUF_FREE_LIST, mtr);
ut_a(node_addr.page != FIL_NULL);
- new_block = buf_page_get(dict_index_get_space(index),
- dict_table_zip_size(index->table),
- node_addr.page, RW_X_LATCH, mtr);
+ new_block = buf_page_get(
+ page_id_t(dict_index_get_space(index), node_addr.page),
+ dict_table_page_size(index->table),
+ RW_X_LATCH, mtr);
+
new_page = buf_block_get_frame(new_block);
buf_block_dbg_add_level(new_block, SYNC_IBUF_TREE_NODE_NEW);
@@ -1192,7 +554,6 @@ that the caller has made the reservation for free extents!
@retval block, rw_lock_x_lock_count(&block->lock) == 1 if allocation succeeded
(init_mtr == mtr, or the page was not previously freed in mtr)
@retval block (not allocated or initialized) otherwise */
-UNIV_INTERN
buf_block_t*
btr_page_alloc(
/*===========*/
@@ -1227,8 +588,7 @@ btr_page_alloc(
/**************************************************************//**
Gets the number of pages in a B-tree.
-@return number of pages, or ULINT_UNDEFINED if the index is unavailable */
-UNIV_INTERN
+@return number of pages, or ULINT_UNDEFINED if the index is unavailable */
ulint
btr_get_size(
/*=========*/
@@ -1237,16 +597,45 @@ btr_get_size(
mtr_t* mtr) /*!< in/out: mini-transaction where index
is s-latched */
{
- ulint used;
- if (flag == BTR_N_LEAF_PAGES) {
- btr_get_size_and_reserved(index, flag, &used, mtr);
- return used;
- } else if (flag == BTR_TOTAL_SIZE) {
- return btr_get_size_and_reserved(index, flag, &used, mtr);
+ fseg_header_t* seg_header;
+ page_t* root;
+ ulint n=0;
+ ulint dummy;
+
+ ut_ad(srv_read_only_mode
+ || mtr_memo_contains(mtr, dict_index_get_lock(index),
+ MTR_MEMO_S_LOCK));
+
+ if (index->page == FIL_NULL
+ || dict_index_is_online_ddl(index)
+ || !index->is_committed()) {
+ return(ULINT_UNDEFINED);
+ }
+
+ root = btr_root_get(index, mtr);
+
+ if (root) {
+ if (flag == BTR_N_LEAF_PAGES) {
+ seg_header = root + PAGE_HEADER + PAGE_BTR_SEG_LEAF;
+
+ fseg_n_reserved_pages(seg_header, &n, mtr);
+
+ } else if (flag == BTR_TOTAL_SIZE) {
+ seg_header = root + PAGE_HEADER + PAGE_BTR_SEG_TOP;
+
+ n = fseg_n_reserved_pages(seg_header, &dummy, mtr);
+
+ seg_header = root + PAGE_HEADER + PAGE_BTR_SEG_LEAF;
+
+ n += fseg_n_reserved_pages(seg_header, &dummy, mtr);
+ } else {
+ ut_error;
+ }
} else {
- ut_error;
+ n = ULINT_UNDEFINED;
}
- return (ULINT_UNDEFINED);
+
+ return(n);
}
/**************************************************************//**
@@ -1273,8 +662,9 @@ btr_get_size_and_reserved(
ut_a(flag == BTR_N_LEAF_PAGES || flag == BTR_TOTAL_SIZE);
- if (index->page == FIL_NULL || dict_index_is_online_ddl(index)
- || *index->name == TEMP_INDEX_PREFIX) {
+ if (index->page == FIL_NULL
+ || dict_index_is_online_ddl(index)
+ || !index->is_committed()) {
return(ULINT_UNDEFINED);
}
@@ -1312,7 +702,7 @@ btr_page_free_for_ibuf(
{
page_t* root;
- ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
+ ut_ad(mtr_is_block_fix(mtr, block, MTR_MEMO_PAGE_X_FIX, index->table));
root = btr_root_get(index, mtr);
flst_add_first(root + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST,
@@ -1323,153 +713,59 @@ btr_page_free_for_ibuf(
mtr));
}
-/**************************************************************//**
-Frees a file page used in an index tree. Can be used also to (BLOB)
-external storage pages, because the page level 0 can be given as an
-argument. */
-UNIV_INTERN
-void
-btr_page_free_low(
-/*==============*/
- dict_index_t* index, /*!< in: index tree */
- buf_block_t* block, /*!< in: block to be freed, x-latched */
- ulint level, /*!< in: page level */
- bool blob, /*!< in: blob page */
- mtr_t* mtr) /*!< in: mtr */
+/** Free an index page.
+@param[in,out] index index tree
+@param[in,out] block block to be freed
+@param[in,out] mtr mini-transaction
+@param[in] blob whether this is freeing a BLOB page */
+void btr_page_free(dict_index_t* index, buf_block_t* block, mtr_t* mtr,
+ bool blob)
{
- fseg_header_t* seg_header;
- page_t* root;
+ ut_ad(mtr_is_block_fix(mtr, block, MTR_MEMO_PAGE_X_FIX, index->table));
+#ifdef BTR_CUR_HASH_ADAPT
+ ut_ad(!block->index || !blob);
+ ut_ad(!block->index || page_is_leaf(block->frame));
+#endif
+ ut_ad(index->space == block->page.id.space());
+ /* The root page is freed by btr_free_root(). */
+ ut_ad(block->page.id.page_no() != index->page);
+ ut_ad(mtr->is_named_space(index->space));
- ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
/* The page gets invalid for optimistic searches: increment the frame
modify clock */
buf_block_modify_clock_inc(block);
- btr_blob_dbg_assert_empty(index, buf_block_get_page_no(block));
-
- if (blob) {
- ut_a(level == 0);
- }
-
- bool scrub = srv_immediate_scrub_data_uncompressed;
- /* scrub page */
- if (scrub && blob) {
- /* blob page: scrub entire page */
- // TODO(jonaso): scrub only what is actually needed
- page_t* page = buf_block_get_frame(block);
- memset(page + PAGE_HEADER, 0,
- UNIV_PAGE_SIZE - PAGE_HEADER);
-#ifdef UNIV_DEBUG_SCRUBBING
- fprintf(stderr,
- "btr_page_free_low: scrub blob page %lu/%lu\n",
- buf_block_get_space(block),
- buf_block_get_page_no(block));
-#endif /* UNIV_DEBUG_SCRUBBING */
- } else if (scrub) {
- /* scrub records on page */
-
- /* TODO(jonaso): in theory we could clear full page
- * but, since page still remains in buffer pool, and
- * gets flushed etc. Lots of routines validates consistency
- * of it. And in order to remain structurally consistent
- * we clear each record by it own
- *
- * NOTE: The TODO below mentions removing page from buffer pool
- * and removing redo entries, once that is done, clearing full
- * pages should be possible
- */
- uint cnt = 0;
- uint bytes = 0;
- page_t* page = buf_block_get_frame(block);
- mem_heap_t* heap = NULL;
- ulint* offsets = NULL;
- rec_t* rec = page_rec_get_next(page_get_infimum_rec(page));
- while (!page_rec_is_supremum(rec)) {
- offsets = rec_get_offsets(rec, index,
- offsets, ULINT_UNDEFINED,
- &heap);
- uint size = rec_offs_data_size(offsets);
- memset(rec, 0, size);
- rec = page_rec_get_next(rec);
- cnt++;
- bytes += size;
- }
-#ifdef UNIV_DEBUG_SCRUBBING
- fprintf(stderr,
- "btr_page_free_low: scrub %lu/%lu - "
- "%u records %u bytes\n",
- buf_block_get_space(block),
- buf_block_get_page_no(block),
- cnt, bytes);
-#endif /* UNIV_DEBUG_SCRUBBING */
- if (heap) {
- mem_heap_free(heap);
- }
- }
-
-#ifdef UNIV_DEBUG_SCRUBBING
- if (scrub == false) {
- fprintf(stderr,
- "btr_page_free_low %lu/%lu blob: %u\n",
- buf_block_get_space(block),
- buf_block_get_page_no(block),
- blob);
- }
-#endif /* UNIV_DEBUG_SCRUBBING */
if (dict_index_is_ibuf(index)) {
-
btr_page_free_for_ibuf(index, block, mtr);
-
return;
}
- root = btr_root_get(index, mtr);
-
- if (level == 0) {
- seg_header = root + PAGE_HEADER + PAGE_BTR_SEG_LEAF;
- } else {
- seg_header = root + PAGE_HEADER + PAGE_BTR_SEG_TOP;
- }
-
- if (scrub) {
- /**
- * Reset page type so that scrub thread won't try to scrub it
- */
- mlog_write_ulint(buf_block_get_frame(block) + FIL_PAGE_TYPE,
- FIL_PAGE_TYPE_ALLOCATED, MLOG_2BYTES, mtr);
- }
-
+ /* TODO: Discard any operations for block from mtr->log.
+ The page will be freed, so previous changes to it by this
+ mini-transaction should not matter. */
+ page_t* root = btr_root_get(index, mtr);
+ fseg_header_t* seg_header = &root[blob || page_is_leaf(block->frame)
+ ? PAGE_HEADER + PAGE_BTR_SEG_LEAF
+ : PAGE_HEADER + PAGE_BTR_SEG_TOP];
fseg_free_page(seg_header,
- buf_block_get_space(block),
- buf_block_get_page_no(block), mtr);
+ block->page.id.space(),
+ block->page.id.page_no(),
+ block->index != NULL, mtr);
/* The page was marked free in the allocation bitmap, but it
- should remain buffer-fixed until mtr_commit(mtr) or until it
+ should remain exclusively latched until mtr_t::commit() or until it
is explicitly freed from the mini-transaction. */
- ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
- /* TODO: Discard any operations on the page from the redo log
- and remove the block from the flush list and the buffer pool.
- This would free up buffer pool earlier and reduce writes to
- both the tablespace and the redo log. */
-}
+ ut_ad(mtr_is_block_fix(mtr, block, MTR_MEMO_PAGE_X_FIX, index->table));
-/**************************************************************//**
-Frees a file page used in an index tree. NOTE: cannot free field external
-storage pages because the page must contain info on its level. */
-UNIV_INTERN
-void
-btr_page_free(
-/*==========*/
- dict_index_t* index, /*!< in: index tree */
- buf_block_t* block, /*!< in: block to be freed, x-latched */
- mtr_t* mtr) /*!< in: mtr */
-{
- const page_t* page = buf_block_get_frame(block);
- ulint level = btr_page_get_level(page, mtr);
-
- ut_ad(fil_page_get_type(block->frame) == FIL_PAGE_INDEX);
- btr_page_free_low(index, block, level, false, mtr);
+ if (srv_immediate_scrub_data_uncompressed) {
+ /* In MDEV-15528 this code must be removed and the
+ check in buf_flush_init_for_writing() re-enabled. We
+ should zero out the page after the redo log for this
+ mini-transaction has been durably written. The log
+ would include the 10.4 MLOG_INIT_FREE_PAGE record. */
+ fsp_init_file_page(fil_space_get(index->space), block, mtr);
+ }
}
/**************************************************************//**
@@ -1481,7 +777,7 @@ btr_node_ptr_set_child_page_no(
rec_t* rec, /*!< in: node pointer record */
page_zip_des_t* page_zip,/*!< in/out: compressed page whose uncompressed
part will be updated, or NULL */
- const ulint* offsets,/*!< in: array returned by rec_get_offsets() */
+ const offset_t* offsets,/*!< in: array returned by rec_get_offsets() */
ulint page_no,/*!< in: child node address */
mtr_t* mtr) /*!< in: mtr */
{
@@ -1489,7 +785,7 @@ btr_node_ptr_set_child_page_no(
ulint len;
ut_ad(rec_offs_validate(rec, NULL, offsets));
- ut_ad(!page_is_leaf(page_align(rec)));
+ ut_ad(!page_rec_is_leaf(rec));
ut_ad(!rec_offs_comp(offsets) || rec_get_node_ptr_flag(rec));
/* The child address is in the last field */
@@ -1508,43 +804,44 @@ btr_node_ptr_set_child_page_no(
}
/************************************************************//**
-Returns the child page of a node pointer and x-latches it.
-@return child page, x-latched */
+Returns the child page of a node pointer and sx-latches it.
+@return child page, sx-latched */
static
buf_block_t*
btr_node_ptr_get_child(
/*===================*/
const rec_t* node_ptr,/*!< in: node pointer */
dict_index_t* index, /*!< in: index */
- const ulint* offsets,/*!< in: array returned by rec_get_offsets() */
+ const offset_t* offsets,/*!< in: array returned by rec_get_offsets() */
mtr_t* mtr) /*!< in: mtr */
{
- ulint page_no;
- ulint space;
-
ut_ad(rec_offs_validate(node_ptr, index, offsets));
- space = page_get_space_id(page_align(node_ptr));
- page_no = btr_node_ptr_get_child_page_no(node_ptr, offsets);
- return(btr_block_get(space, dict_table_zip_size(index->table),
- page_no, RW_X_LATCH, index, mtr));
+ const page_id_t page_id(
+ page_get_space_id(page_align(node_ptr)),
+ btr_node_ptr_get_child_page_no(node_ptr, offsets));
+
+ return(btr_block_get(page_id, dict_table_page_size(index->table),
+ RW_SX_LATCH, index, mtr));
}
/************************************************************//**
Returns the upper level node pointer to a page. It is assumed that mtr holds
-an x-latch on the tree.
-@return rec_get_offsets() of the node pointer record */
+an sx-latch on the tree.
+@return rec_get_offsets() of the node pointer record */
static
-ulint*
+offset_t*
btr_page_get_father_node_ptr_func(
/*==============================*/
- ulint* offsets,/*!< in: work area for the return value */
+ offset_t* offsets,/*!< in: work area for the return value */
mem_heap_t* heap, /*!< in: memory heap to use */
btr_cur_t* cursor, /*!< in: cursor pointing to user record,
out: cursor on node pointer record,
its page x-latched */
+ ulint latch_mode,/*!< in: BTR_CONT_MODIFY_TREE
+ or BTR_CONT_SEARCH_TREE */
const char* file, /*!< in: file name */
- ulint line, /*!< in: line where called */
+ unsigned line, /*!< in: line where called */
mtr_t* mtr) /*!< in: mtr */
{
dtuple_t* tuple;
@@ -1554,11 +851,17 @@ btr_page_get_father_node_ptr_func(
ulint page_no;
dict_index_t* index;
- page_no = buf_block_get_page_no(btr_cur_get_block(cursor));
+ ut_ad(latch_mode == BTR_CONT_MODIFY_TREE
+ || latch_mode == BTR_CONT_SEARCH_TREE);
+
+ page_no = btr_cur_get_block(cursor)->page.id.page_no();
index = btr_cur_get_index(cursor);
+ ut_ad(!dict_index_is_spatial(index));
- ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(index),
- MTR_MEMO_X_LOCK));
+ ut_ad(srv_read_only_mode
+ || mtr_memo_contains_flagged(mtr, dict_index_get_lock(index),
+ MTR_MEMO_X_LOCK
+ | MTR_MEMO_SX_LOCK));
ut_ad(dict_index_get_page(index) != page_no);
@@ -1566,69 +869,78 @@ btr_page_get_father_node_ptr_func(
user_rec = btr_cur_get_rec(cursor);
ut_a(page_rec_is_user_rec(user_rec));
+
tuple = dict_index_build_node_ptr(index, user_rec, 0, heap, level);
+ dberr_t err = DB_SUCCESS;
+
+ err = btr_cur_search_to_nth_level(
+ index, level + 1, tuple,
+ PAGE_CUR_LE, latch_mode, cursor, 0,
+ file, line, mtr);
- btr_cur_search_to_nth_level(index, level + 1, tuple, PAGE_CUR_LE,
- BTR_CONT_MODIFY_TREE, cursor, 0,
- file, line, mtr);
+ if (err != DB_SUCCESS) {
+ ib::warn() << " Error code: " << err
+ << " btr_page_get_father_node_ptr_func "
+ << " level: " << level + 1
+ << " called from file: "
+ << file << " line: " << line
+ << " table: " << index->table->name
+ << " index: " << index->name();
+ }
node_ptr = btr_cur_get_rec(cursor);
- ut_ad(!page_rec_is_comp(node_ptr)
- || rec_get_status(node_ptr) == REC_STATUS_NODE_PTR);
- offsets = rec_get_offsets(node_ptr, index, offsets,
+
+ offsets = rec_get_offsets(node_ptr, index, offsets, false,
ULINT_UNDEFINED, &heap);
if (btr_node_ptr_get_child_page_no(node_ptr, offsets) != page_no) {
rec_t* print_rec;
- fputs("InnoDB: Dump of the child page:\n", stderr);
- buf_page_print(page_align(user_rec), 0);
- fputs("InnoDB: Dump of the parent page:\n", stderr);
- buf_page_print(page_align(node_ptr), 0);
-
- fputs("InnoDB: Corruption of an index tree: table ", stderr);
- ut_print_name(stderr, NULL, TRUE, index->table_name);
- fputs(", index ", stderr);
- ut_print_name(stderr, NULL, FALSE, index->name);
- fprintf(stderr, ",\n"
- "InnoDB: father ptr page no %lu, child page no %lu\n",
- (ulong)
- btr_node_ptr_get_child_page_no(node_ptr, offsets),
- (ulong) page_no);
+
+ ib::error()
+ << "Corruption of an index tree: table "
+ << index->table->name
+ << " index " << index->name
+ << ", father ptr page no "
+ << btr_node_ptr_get_child_page_no(node_ptr, offsets)
+ << ", child page no " << page_no;
+
print_rec = page_rec_get_next(
page_get_infimum_rec(page_align(user_rec)));
- offsets = rec_get_offsets(print_rec, index,
- offsets, ULINT_UNDEFINED, &heap);
+ offsets = rec_get_offsets(print_rec, index, offsets,
+ page_rec_is_leaf(user_rec),
+ ULINT_UNDEFINED, &heap);
page_rec_print(print_rec, offsets);
- offsets = rec_get_offsets(node_ptr, index, offsets,
+ offsets = rec_get_offsets(node_ptr, index, offsets, false,
ULINT_UNDEFINED, &heap);
page_rec_print(node_ptr, offsets);
- fputs("InnoDB: You should dump + drop + reimport the table"
- " to fix the\n"
- "InnoDB: corruption. If the crash happens at "
- "the database startup, see\n"
- "InnoDB: " REFMAN "forcing-innodb-recovery.html about\n"
- "InnoDB: forcing recovery. "
- "Then dump + drop + reimport.\n", stderr);
-
- ut_error;
+ ib::fatal()
+ << "You should dump + drop + reimport the table to"
+ << " fix the corruption. If the crash happens at"
+ << " database startup. " << FORCE_RECOVERY_MSG
+ << " Then dump + drop + reimport.";
}
return(offsets);
}
#define btr_page_get_father_node_ptr(of,heap,cur,mtr) \
- btr_page_get_father_node_ptr_func(of,heap,cur,__FILE__,__LINE__,mtr)
+ btr_page_get_father_node_ptr_func( \
+ of,heap,cur,BTR_CONT_MODIFY_TREE,__FILE__,__LINE__,mtr)
+
+#define btr_page_get_father_node_ptr_for_validate(of,heap,cur,mtr) \
+ btr_page_get_father_node_ptr_func( \
+ of,heap,cur,BTR_CONT_SEARCH_TREE,__FILE__,__LINE__,mtr)
/************************************************************//**
Returns the upper level node pointer to a page. It is assumed that mtr holds
an x-latch on the tree.
-@return rec_get_offsets() of the node pointer record */
+@return rec_get_offsets() of the node pointer record */
static
-ulint*
+offset_t*
btr_page_get_father_block(
/*======================*/
- ulint* offsets,/*!< in: work area for the return value */
+ offset_t* offsets,/*!< in: work area for the return value */
mem_heap_t* heap, /*!< in: memory heap to use */
dict_index_t* index, /*!< in: b-tree index */
buf_block_t* block, /*!< in: child page in the index */
@@ -1643,18 +955,13 @@ btr_page_get_father_block(
return(btr_page_get_father_node_ptr(offsets, heap, cursor, mtr));
}
-/************************************************************//**
-Seeks to the upper level node pointer to a page.
-It is assumed that mtr holds an x-latch on the tree. */
-static
-void
-btr_page_get_father(
-/*================*/
- dict_index_t* index, /*!< in: b-tree index */
- buf_block_t* block, /*!< in: child page in the index */
- mtr_t* mtr, /*!< in: mtr */
- btr_cur_t* cursor) /*!< out: cursor on node pointer record,
- its page x-latched */
+/** Seek to the parent page of a B-tree page.
+@param[in,out] index b-tree
+@param[in] block child page
+@param[in,out] mtr mini-transaction
+@param[out] cursor cursor pointing to the x-latched parent page */
+void btr_page_get_father(dict_index_t* index, buf_block_t* block, mtr_t* mtr,
+ btr_cur_t* cursor)
{
mem_heap_t* heap;
rec_t* rec
@@ -1667,24 +974,107 @@ btr_page_get_father(
mem_heap_free(heap);
}
-/************************************************************//**
-Creates the root node for a new index tree.
-@return page number of the created root, FIL_NULL if did not succeed */
-UNIV_INTERN
+/** PAGE_INDEX_ID value for freed index B-trees */
+static const index_id_t BTR_FREED_INDEX_ID = 0;
+
+/** Free a B-tree root page. btr_free_but_not_root() must already
+have been called.
+In a persistent tablespace, the caller must invoke fsp_init_file_page()
+before mtr.commit().
+@param[in,out] block index root page
+@param[in,out] mtr mini-transaction
+@param[in] invalidate whether to invalidate PAGE_INDEX_ID */
+static void btr_free_root(buf_block_t* block, mtr_t* mtr, bool invalidate)
+{
+ fseg_header_t* header;
+
+ ut_ad(mtr_memo_contains_flagged(mtr, block, MTR_MEMO_PAGE_X_FIX
+ | MTR_MEMO_PAGE_SX_FIX));
+ ut_ad(mtr->is_named_space(block->page.id.space()));
+
+ btr_search_drop_page_hash_index(block);
+
+ header = buf_block_get_frame(block) + PAGE_HEADER + PAGE_BTR_SEG_TOP;
+#ifdef UNIV_BTR_DEBUG
+ ut_a(btr_root_fseg_validate(header, block->page.id.space()));
+#endif /* UNIV_BTR_DEBUG */
+ if (invalidate) {
+ btr_page_set_index_id(
+ buf_block_get_frame(block),
+ buf_block_get_page_zip(block),
+ BTR_FREED_INDEX_ID, mtr);
+ }
+
+ while (!fseg_free_step(header, true, mtr)) {
+ /* Free the entire segment in small steps. */
+ }
+}
+
+/** Prepare to free a B-tree.
+@param[in] page_id page id
+@param[in] page_size page size
+@param[in] index_id PAGE_INDEX_ID contents
+@param[in,out] mtr mini-transaction
+@return root block, to invoke btr_free_but_not_root() and btr_free_root()
+@retval NULL if the page is no longer a matching B-tree page */
+static MY_ATTRIBUTE((warn_unused_result))
+buf_block_t*
+btr_free_root_check(
+ const page_id_t page_id,
+ const page_size_t& page_size,
+ index_id_t index_id,
+ mtr_t* mtr)
+{
+ ut_ad(page_id.space() != SRV_TMP_SPACE_ID);
+ ut_ad(index_id != BTR_FREED_INDEX_ID);
+
+ buf_block_t* block = buf_page_get(
+ page_id, page_size, RW_X_LATCH, mtr);
+
+ if (block) {
+ buf_block_dbg_add_level(block, SYNC_TREE_NODE);
+
+ if (fil_page_index_page_check(block->frame)
+ && index_id == btr_page_get_index_id(block->frame)) {
+ /* This should be a root page.
+ It should not be possible to reassign the same
+ index_id for some other index in the tablespace. */
+ ut_ad(!page_has_siblings(block->frame));
+ } else {
+ block = NULL;
+ }
+ }
+
+ return(block);
+}
+
+/** Create the root node for a new index tree.
+@param[in] type type of the index
+@param[in] space space where created
+@param[in] page_size page size
+@param[in] index_id index id
+@param[in] index index, or NULL when applying TRUNCATE
+log record during recovery
+@param[in] btr_redo_create_info used for applying TRUNCATE log
+@param[in] mtr mini-transaction handle
+record during recovery
+@return page number of the created root, FIL_NULL if did not succeed */
ulint
btr_create(
-/*=======*/
- ulint type, /*!< in: type of the index */
- ulint space, /*!< in: space where created */
- ulint zip_size,/*!< in: compressed page size in bytes
- or 0 for uncompressed pages */
- index_id_t index_id,/*!< in: index id */
- dict_index_t* index, /*!< in: index */
- mtr_t* mtr) /*!< in: mini-transaction handle */
+ ulint type,
+ ulint space,
+ const page_size_t& page_size,
+ index_id_t index_id,
+ dict_index_t* index,
+ const btr_create_t* btr_redo_create_info,
+ mtr_t* mtr)
{
- buf_block_t* block;
- page_t* page;
- page_zip_des_t* page_zip;
+ buf_block_t* block;
+ page_t* page;
+ page_zip_des_t* page_zip;
+
+ ut_ad(mtr->is_named_space(space));
+ ut_ad(index_id != BTR_FREED_INDEX_ID);
/* Create the two new segments (one, in the case of an ibuf tree) for
the index tree; the segment headers are put on the allocated root page
@@ -1704,7 +1094,7 @@ btr_create(
buf_block_dbg_add_level(
ibuf_hdr_block, SYNC_IBUF_TREE_NODE_NEW);
- ut_ad(buf_block_get_page_no(ibuf_hdr_block)
+ ut_ad(ibuf_hdr_block->page.id.page_no()
== IBUF_HEADER_PAGE_NO);
/* Allocate then the next page to the segment: it will be the
tree root page */
@@ -1719,21 +1109,13 @@ btr_create(
return(FIL_NULL);
}
- ut_ad(buf_block_get_page_no(block) == IBUF_TREE_ROOT_PAGE_NO);
+ ut_ad(block->page.id.page_no() == IBUF_TREE_ROOT_PAGE_NO);
buf_block_dbg_add_level(block, SYNC_IBUF_TREE_NODE_NEW);
flst_init(block->frame + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST,
mtr);
} else {
-#ifdef UNIV_BLOB_DEBUG
- if ((type & DICT_CLUSTERED) && !index->blobs) {
- mutex_create(PFS_NOT_INSTRUMENTED,
- &index->blobs_mutex, SYNC_ANY_LATCH);
- index->blobs = rbt_create(sizeof(btr_blob_dbg_t),
- btr_blob_dbg_cmp);
- }
-#endif /* UNIV_BLOB_DEBUG */
block = fseg_create(space, 0,
PAGE_HEADER + PAGE_BTR_SEG_TOP, mtr);
@@ -1743,12 +1125,12 @@ btr_create(
buf_block_dbg_add_level(block, SYNC_TREE_NODE_NEW);
- if (!fseg_create(space, buf_block_get_page_no(block),
+ if (!fseg_create(space, block->page.id.page_no(),
PAGE_HEADER + PAGE_BTR_SEG_LEAF, mtr)) {
/* Not enough space for new segment, free root
segment before return. */
- btr_free_root(space, zip_size,
- buf_block_get_page_no(block), mtr);
+ btr_free_root(block, mtr,
+ !index->table->is_temporary());
return(FIL_NULL);
}
@@ -1761,16 +1143,48 @@ btr_create(
page_zip = buf_block_get_page_zip(block);
if (page_zip) {
- page = page_create_zip(block, index, 0, 0, mtr);
+ if (index != NULL) {
+ page = page_create_zip(block, index, 0, 0, NULL, mtr);
+ } else {
+ /* Create a compressed index page when applying
+ TRUNCATE log record during recovery */
+ ut_ad(btr_redo_create_info != NULL);
+
+ redo_page_compress_t page_comp_info;
+
+ page_comp_info.type = type;
+
+ page_comp_info.index_id = index_id;
+
+ page_comp_info.n_fields =
+ btr_redo_create_info->n_fields;
+
+ page_comp_info.field_len =
+ btr_redo_create_info->field_len;
+
+ page_comp_info.fields = btr_redo_create_info->fields;
+
+ page_comp_info.trx_id_pos =
+ btr_redo_create_info->trx_id_pos;
+
+ page = page_create_zip(block, NULL, 0, 0,
+ &page_comp_info, mtr);
+ }
} else {
- page = page_create(block, mtr,
- dict_table_is_comp(index->table));
+ if (index != NULL) {
+ page = page_create(block, mtr,
+ dict_table_is_comp(index->table),
+ dict_index_is_spatial(index));
+ } else {
+ ut_ad(btr_redo_create_info != NULL);
+ page = page_create(
+ block, mtr, btr_redo_create_info->format_flags,
+ type == DICT_SPATIAL);
+ }
/* Set the level of the new index page */
btr_page_set_level(page, NULL, 0, mtr);
}
- block->check_index_page_at_flush = TRUE;
-
/* Set the index id of the page */
btr_page_set_index_id(page, page_zip, index_id, mtr);
@@ -1780,9 +1194,16 @@ btr_create(
/* We reset the free bits for the page to allow creation of several
trees in the same mtr, otherwise the latch on a bitmap page would
- prevent it because of the latching order */
+ prevent it because of the latching order.
+
+ index will be NULL if we are recreating the table during recovery
+ on behalf of TRUNCATE.
+
+ Note: Insert Buffering is disabled for temporary tables given that
+ most temporary tables are smaller in size and short-lived. */
+ if (!(type & DICT_CLUSTERED)
+ && (index == NULL || !dict_table_is_temporary(index->table))) {
- if (!(type & DICT_CLUSTERED)) {
ibuf_reset_free_bits(block);
}
@@ -1792,30 +1213,30 @@ btr_create(
ut_ad(page_get_max_insert_size(page, 2) > 2 * BTR_PAGE_MAX_REC_SIZE);
- return(buf_block_get_page_no(block));
+ return(block->page.id.page_no());
}
-/************************************************************//**
-Frees a B-tree except the root page, which MUST be freed after this
-by calling btr_free_root. */
-UNIV_INTERN
+/** Free a B-tree except the root page. The root page MUST be freed after
+this by calling btr_free_root.
+@param[in,out] block root page
+@param[in] log_mode mtr logging mode */
+static
void
btr_free_but_not_root(
-/*==================*/
- ulint space, /*!< in: space where created */
- ulint zip_size, /*!< in: compressed page size in bytes
- or 0 for uncompressed pages */
- ulint root_page_no) /*!< in: root page number */
+ buf_block_t* block,
+ mtr_log_t log_mode)
{
ibool finished;
- page_t* root;
mtr_t mtr;
+ ut_ad(fil_page_index_page_check(block->frame));
+ ut_ad(!page_has_siblings(block->frame));
leaf_loop:
mtr_start(&mtr);
+ mtr_set_log_mode(&mtr, log_mode);
+ mtr.set_named_space(block->page.id.space());
- root = btr_page_get(space, zip_size, root_page_no, RW_X_LATCH,
- NULL, &mtr);
+ page_t* root = block->frame;
if (!root) {
mtr_commit(&mtr);
@@ -1824,16 +1245,16 @@ leaf_loop:
#ifdef UNIV_BTR_DEBUG
ut_a(btr_root_fseg_validate(FIL_PAGE_DATA + PAGE_BTR_SEG_LEAF
- + root, space));
+ + root, block->page.id.space()));
ut_a(btr_root_fseg_validate(FIL_PAGE_DATA + PAGE_BTR_SEG_TOP
- + root, space));
+ + root, block->page.id.space()));
#endif /* UNIV_BTR_DEBUG */
/* NOTE: page hash indexes are dropped when a page is freed inside
fsp0fsp. */
finished = fseg_free_step(root + PAGE_HEADER + PAGE_BTR_SEG_LEAF,
- &mtr);
+ true, &mtr);
mtr_commit(&mtr);
if (!finished) {
@@ -1842,16 +1263,18 @@ leaf_loop:
}
top_loop:
mtr_start(&mtr);
+ mtr_set_log_mode(&mtr, log_mode);
+ mtr.set_named_space(block->page.id.space());
+
+ root = block->frame;
- root = btr_page_get(space, zip_size, root_page_no, RW_X_LATCH,
- NULL, &mtr);
#ifdef UNIV_BTR_DEBUG
ut_a(btr_root_fseg_validate(FIL_PAGE_DATA + PAGE_BTR_SEG_TOP
- + root, space));
+ + root, block->page.id.space()));
#endif /* UNIV_BTR_DEBUG */
finished = fseg_free_step_not_header(
- root + PAGE_HEADER + PAGE_BTR_SEG_TOP, &mtr);
+ root + PAGE_HEADER + PAGE_BTR_SEG_TOP, true, &mtr);
mtr_commit(&mtr);
if (!finished) {
@@ -1860,38 +1283,162 @@ top_loop:
}
}
-/************************************************************//**
-Frees the B-tree root page. Other tree MUST already have been freed. */
-UNIV_INTERN
+/** Free a persistent index tree if it exists.
+@param[in] page_id root page id
+@param[in] page_size page size
+@param[in] index_id PAGE_INDEX_ID contents
+@param[in,out] mtr mini-transaction */
void
-btr_free_root(
-/*==========*/
- ulint space, /*!< in: space where created */
- ulint zip_size, /*!< in: compressed page size in bytes
- or 0 for uncompressed pages */
- ulint root_page_no, /*!< in: root page number */
- mtr_t* mtr) /*!< in/out: mini-transaction */
+btr_free_if_exists(
+ const page_id_t page_id,
+ const page_size_t& page_size,
+ index_id_t index_id,
+ mtr_t* mtr)
{
- buf_block_t* block;
- fseg_header_t* header;
+ buf_block_t* root = btr_free_root_check(
+ page_id, page_size, index_id, mtr);
+
+ if (root == NULL) {
+ return;
+ }
+
+ btr_free_but_not_root(root, mtr->get_log_mode());
+ mtr->set_named_space(page_id.space());
+ btr_free_root(root, mtr, true);
+}
+
+/** Free an index tree in a temporary tablespace or during TRUNCATE TABLE.
+@param[in] page_id root page id
+@param[in] page_size page size */
+void
+btr_free(
+ const page_id_t page_id,
+ const page_size_t& page_size)
+{
+ mtr_t mtr;
+ mtr.start();
+ mtr.set_log_mode(MTR_LOG_NO_REDO);
- block = btr_block_get(space, zip_size, root_page_no, RW_X_LATCH,
- NULL, mtr);
+ buf_block_t* block = buf_page_get(
+ page_id, page_size, RW_X_LATCH, &mtr);
if (block) {
- btr_search_drop_page_hash_index(block);
+ btr_free_but_not_root(block, MTR_LOG_NO_REDO);
+ btr_free_root(block, &mtr, false);
+ }
+ mtr.commit();
+}
- header = buf_block_get_frame(block) + PAGE_HEADER + PAGE_BTR_SEG_TOP;
-#ifdef UNIV_BTR_DEBUG
- ut_a(btr_root_fseg_validate(header, space));
-#endif /* UNIV_BTR_DEBUG */
+/** Read the last used AUTO_INCREMENT value from PAGE_ROOT_AUTO_INC.
+@param[in,out] index clustered index
+@return the last used AUTO_INCREMENT value
+@retval 0 on error or if no AUTO_INCREMENT value was used yet */
+ib_uint64_t
+btr_read_autoinc(dict_index_t* index)
+{
+ ut_ad(dict_index_is_clust(index));
+ ut_ad(index->table->persistent_autoinc);
+ ut_ad(!dict_table_is_temporary(index->table));
+
+ if (fil_space_t* space = fil_space_acquire(index->space)) {
+ mtr_t mtr;
+ mtr.start();
+ ib_uint64_t autoinc;
+ if (buf_block_t* block = buf_page_get(
+ page_id_t(index->space, index->page),
+ page_size_t(space->flags),
+ RW_S_LATCH, &mtr)) {
+ autoinc = page_get_autoinc(block->frame);
+ } else {
+ autoinc = 0;
+ }
+ mtr.commit();
+ fil_space_release(space);
+ return(autoinc);
+ }
- while (!fseg_free_step(header, mtr)) {
- /* Free the entire segment in small steps. */
+ return(0);
+}
+
+/** Read the last used AUTO_INCREMENT value from PAGE_ROOT_AUTO_INC,
+or fall back to MAX(auto_increment_column).
+@param[in] table table containing an AUTO_INCREMENT column
+@param[in] col_no index of the AUTO_INCREMENT column
+@return the AUTO_INCREMENT value
+@retval 0 on error or if no AUTO_INCREMENT value was used yet */
+ib_uint64_t
+btr_read_autoinc_with_fallback(const dict_table_t* table, unsigned col_no)
+{
+ ut_ad(table->persistent_autoinc);
+ ut_ad(!dict_table_is_temporary(table));
+
+ dict_index_t* index = dict_table_get_first_index(table);
+
+ if (index == NULL) {
+ } else if (fil_space_t* space = fil_space_acquire(index->space)) {
+ mtr_t mtr;
+ mtr.start();
+ buf_block_t* block = buf_page_get(
+ page_id_t(index->space, index->page),
+ page_size_t(space->flags),
+ RW_S_LATCH, &mtr);
+
+ ib_uint64_t autoinc = block
+ ? page_get_autoinc(block->frame) : 0;
+ const bool retry = block && autoinc == 0
+ && !page_is_empty(block->frame);
+ mtr.commit();
+ fil_space_release(space);
+
+ if (retry) {
+ /* This should be an old data file where
+ PAGE_ROOT_AUTO_INC was initialized to 0.
+ Fall back to reading MAX(autoinc_col).
+ There should be an index on it. */
+ const dict_col_t* autoinc_col
+ = dict_table_get_nth_col(table, col_no);
+ while (index != NULL
+ && index->fields[0].col != autoinc_col) {
+ index = dict_table_get_next_index(index);
+ }
+
+ if (index != NULL && index->space == space->id) {
+ autoinc = row_search_max_autoinc(index);
+ }
}
+
+ return(autoinc);
+ }
+
+ return(0);
+}
+
+/** Write the next available AUTO_INCREMENT value to PAGE_ROOT_AUTO_INC.
+@param[in,out] index clustered index
+@param[in] autoinc the AUTO_INCREMENT value
+@param[in] reset whether to reset the AUTO_INCREMENT
+ to a possibly smaller value than currently
+ exists in the page */
+void
+btr_write_autoinc(dict_index_t* index, ib_uint64_t autoinc, bool reset)
+{
+ ut_ad(dict_index_is_clust(index));
+ ut_ad(index->table->persistent_autoinc);
+ ut_ad(!dict_table_is_temporary(index->table));
+
+ if (fil_space_t* space = fil_space_acquire(index->space)) {
+ mtr_t mtr;
+ mtr.start();
+ mtr.set_named_space(space);
+ page_set_autoinc(buf_page_get(
+ page_id_t(index->space, index->page),
+ page_size_t(space->flags),
+ RW_SX_LATCH, &mtr),
+ index, autoinc, &mtr, reset);
+ mtr.commit();
+ fil_space_release(space);
}
}
-#endif /* !UNIV_HOTBACKUP */
/*************************************************************//**
Reorganizes an index page.
@@ -1904,7 +1451,6 @@ IBUF_BITMAP_FREE is unaffected by reorganization.
@retval true if the operation was successful
@retval false if it is a compressed page, and recompression failed */
-UNIV_INTERN
bool
btr_page_reorganize_low(
/*====================*/
@@ -1920,14 +1466,11 @@ btr_page_reorganize_low(
mtr_t* mtr) /*!< in/out: mini-transaction */
{
buf_block_t* block = page_cur_get_block(cursor);
-#ifndef UNIV_HOTBACKUP
buf_pool_t* buf_pool = buf_pool_from_bpage(&block->page);
-#endif /* !UNIV_HOTBACKUP */
page_t* page = buf_block_get_frame(block);
page_zip_des_t* page_zip = buf_block_get_page_zip(block);
buf_block_t* temp_block;
page_t* temp_page;
- ulint log_mode;
ulint data_size1;
ulint data_size2;
ulint max_ins_size1;
@@ -1935,47 +1478,48 @@ btr_page_reorganize_low(
bool success = false;
ulint pos;
bool log_compressed;
+ bool is_spatial;
- ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
+ ut_ad(mtr_is_block_fix(mtr, block, MTR_MEMO_PAGE_X_FIX, index->table));
btr_assert_not_corrupted(block, index);
+ ut_ad(fil_page_index_page_check(block->frame));
+ ut_ad(index->is_dummy
+ || block->page.id.space() == index->space);
+ ut_ad(index->is_dummy
+ || block->page.id.page_no() != index->page
+ || !page_has_siblings(page));
#ifdef UNIV_ZIP_DEBUG
ut_a(!page_zip || page_zip_validate(page_zip, page, index));
#endif /* UNIV_ZIP_DEBUG */
data_size1 = page_get_data_size(page);
max_ins_size1 = page_get_max_insert_size_after_reorganize(page, 1);
-
/* Turn logging off */
- log_mode = mtr_set_log_mode(mtr, MTR_LOG_NONE);
+ mtr_log_t log_mode = mtr_set_log_mode(mtr, MTR_LOG_NONE);
-#ifndef UNIV_HOTBACKUP
temp_block = buf_block_alloc(buf_pool);
-#else /* !UNIV_HOTBACKUP */
- ut_ad(block == back_block1);
- temp_block = back_block2;
-#endif /* !UNIV_HOTBACKUP */
temp_page = temp_block->frame;
MONITOR_INC(MONITOR_INDEX_REORG_ATTEMPTS);
+ /* This function can be called by log redo with a "dummy" index.
+ So we would trust more on the original page's type */
+ is_spatial = (fil_page_get_type(page) == FIL_PAGE_RTREE
+ || dict_index_is_spatial(index));
+
/* Copy the old page to temporary space */
buf_frame_copy(temp_page, page);
-#ifndef UNIV_HOTBACKUP
if (!recovery) {
btr_search_drop_page_hash_index(block);
}
- block->check_index_page_at_flush = TRUE;
-#endif /* !UNIV_HOTBACKUP */
- btr_blob_dbg_remove(page, index, "btr_page_reorganize");
-
/* Save the cursor position. */
pos = page_rec_get_n_recs_before(page_cur_get_rec(cursor));
/* Recreate the page: note that global data on page (possible
segment headers, next page-field, etc.) is preserved intact */
- page_create(block, mtr, dict_table_is_comp(index->table));
+ page_create(block, mtr, dict_table_is_comp(index->table), is_spatial);
/* Copy the records from the temporary space to the recreated page;
do not copy the lock bits yet */
@@ -1984,15 +1528,29 @@ btr_page_reorganize_low(
page_get_infimum_rec(temp_page),
index, mtr);
- if (dict_index_is_sec_or_ibuf(index) && page_is_leaf(page)) {
- /* Copy max trx id to recreated page */
- trx_id_t max_trx_id = page_get_max_trx_id(temp_page);
- page_set_max_trx_id(block, NULL, max_trx_id, mtr);
- /* In crash recovery, dict_index_is_sec_or_ibuf() always
- holds, even for clustered indexes. max_trx_id is
- unused in clustered index pages. */
- ut_ad(max_trx_id != 0 || recovery);
- }
+ /* Copy the PAGE_MAX_TRX_ID or PAGE_ROOT_AUTO_INC. */
+ memcpy(page + (PAGE_HEADER + PAGE_MAX_TRX_ID),
+ temp_page + (PAGE_HEADER + PAGE_MAX_TRX_ID), 8);
+ /* PAGE_MAX_TRX_ID is unused in clustered index pages
+ (other than the root where it is repurposed as PAGE_ROOT_AUTO_INC),
+ non-leaf pages, and in temporary tables. It was always
+ zero-initialized in page_create() in all InnoDB versions.
+ PAGE_MAX_TRX_ID must be nonzero on dict_index_is_sec_or_ibuf()
+ leaf pages.
+
+ During redo log apply, dict_index_is_sec_or_ibuf() always
+ holds, even for clustered indexes. */
+ ut_ad(recovery || dict_table_is_temporary(index->table)
+ || !page_is_leaf(temp_page)
+ || !dict_index_is_sec_or_ibuf(index)
+ || page_get_max_trx_id(page) != 0);
+ /* PAGE_MAX_TRX_ID must be zero on non-leaf pages other than
+ clustered index root pages. */
+ ut_ad(recovery
+ || page_get_max_trx_id(page) == 0
+ || (dict_index_is_sec_or_ibuf(index)
+ ? page_is_leaf(temp_page)
+ : block->page.id.page_no() == index->page));
/* If innodb_log_compressed_pages is ON, page reorganize should log the
compressed page image.*/
@@ -2003,12 +1561,9 @@ btr_page_reorganize_low(
}
if (page_zip
- && !page_zip_compress(page_zip, page, index, z_level, mtr)) {
+ && !page_zip_compress(page_zip, page, index, z_level, NULL, mtr)) {
/* Restore the old page and exit. */
- btr_blob_dbg_restore(page, temp_page, index,
- "btr_page_reorganize_compress_fail");
-
#if defined UNIV_DEBUG || defined UNIV_ZIP_DEBUG
/* Check that the bytes that we skip are identical. */
ut_a(!memcmp(page, temp_page, PAGE_HEADER));
@@ -2032,30 +1587,22 @@ btr_page_reorganize_low(
goto func_exit;
}
-#ifndef UNIV_HOTBACKUP
- if (!recovery) {
+ if (!recovery && !dict_table_is_locking_disabled(index->table)) {
/* Update the record lock bitmaps */
lock_move_reorganize_page(block, temp_block);
}
-#endif /* !UNIV_HOTBACKUP */
data_size2 = page_get_data_size(page);
max_ins_size2 = page_get_max_insert_size_after_reorganize(page, 1);
if (data_size1 != data_size2 || max_ins_size1 != max_ins_size2) {
- buf_page_print(page, 0);
- buf_page_print(temp_page, 0);
+ ib::error()
+ << "Page old data size " << data_size1
+ << " new data size " << data_size2
+ << ", page old max ins size " << max_ins_size1
+ << " new max ins size " << max_ins_size2;
- fprintf(stderr,
- "InnoDB: Error: page old data size %lu"
- " new data size %lu\n"
- "InnoDB: Error: page old max ins size %lu"
- " new max ins size %lu\n"
- "InnoDB: Submit a detailed bug report"
- " to https://jira.mariadb.org/\n",
- (unsigned long) data_size1, (unsigned long) data_size2,
- (unsigned long) max_ins_size1,
- (unsigned long) max_ins_size2);
+ ib::error() << BUG_REPORT_MSG;
ut_ad(0);
} else {
success = true;
@@ -2072,17 +1619,14 @@ func_exit:
#ifdef UNIV_ZIP_DEBUG
ut_a(!page_zip || page_zip_validate(page_zip, page, index));
#endif /* UNIV_ZIP_DEBUG */
-#ifndef UNIV_HOTBACKUP
buf_block_free(temp_block);
-#endif /* !UNIV_HOTBACKUP */
/* Restore logging mode */
mtr_set_log_mode(mtr, log_mode);
-#ifndef UNIV_HOTBACKUP
if (success) {
- byte type;
- byte* log_ptr;
+ mlog_id_t type;
+ byte* log_ptr;
/* Write the log record */
if (page_zip) {
@@ -2108,7 +1652,6 @@ func_exit:
MONITOR_INC(MONITOR_INDEX_REORG_SUCCESSFUL);
}
-#endif /* !UNIV_HOTBACKUP */
return(success);
}
@@ -2124,7 +1667,6 @@ IBUF_BITMAP_FREE is unaffected by reorganization.
@retval true if the operation was successful
@retval false if it is a compressed page, and recompression failed */
-UNIV_INTERN
bool
btr_page_reorganize_block(
/*======================*/
@@ -2145,7 +1687,6 @@ btr_page_reorganize_block(
return(btr_page_reorganize_low(recovery, z_level, &cur, index, mtr));
}
-#ifndef UNIV_HOTBACKUP
/*************************************************************//**
Reorganizes an index page.
@@ -2157,7 +1698,6 @@ IBUF_BITMAP_FREE is unaffected by reorganization.
@retval true if the operation was successful
@retval false if it is a compressed page, and recompression failed */
-UNIV_INTERN
bool
btr_page_reorganize(
/*================*/
@@ -2168,12 +1708,10 @@ btr_page_reorganize(
return(btr_page_reorganize_low(false, page_zip_level,
cursor, index, mtr));
}
-#endif /* !UNIV_HOTBACKUP */
/***********************************************************//**
Parses a redo log record of reorganizing a page.
-@return end of log record or NULL */
-UNIV_INTERN
+@return end of log record or NULL */
byte*
btr_parse_page_reorganize(
/*======================*/
@@ -2188,6 +1726,7 @@ btr_parse_page_reorganize(
ut_ad(ptr != NULL);
ut_ad(end_ptr != NULL);
+ ut_ad(index != NULL);
/* If dealing with a compressed page the record has the
compression level used during original compression written in
@@ -2212,7 +1751,6 @@ btr_parse_page_reorganize(
return(ptr);
}
-#ifndef UNIV_HOTBACKUP
/*************************************************************//**
Empties an index page. @see btr_page_create(). */
static
@@ -2227,26 +1765,38 @@ btr_page_empty(
{
page_t* page = buf_block_get_frame(block);
- ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
+ ut_ad(mtr_is_block_fix(mtr, block, MTR_MEMO_PAGE_X_FIX, index->table));
ut_ad(page_zip == buf_block_get_page_zip(block));
+ ut_ad(!index->is_dummy);
+ ut_ad(index->space == block->page.id.space());
#ifdef UNIV_ZIP_DEBUG
ut_a(!page_zip || page_zip_validate(page_zip, page, index));
#endif /* UNIV_ZIP_DEBUG */
btr_search_drop_page_hash_index(block);
- btr_blob_dbg_remove(page, index, "btr_page_empty");
/* Recreate the page: note that global data on page (possible
segment headers, next page-field, etc.) is preserved intact */
+ /* Preserve PAGE_ROOT_AUTO_INC when creating a clustered index
+ root page. */
+ const ib_uint64_t autoinc
+ = dict_index_is_clust(index)
+ && index->page == block->page.id.page_no()
+ ? page_get_autoinc(page)
+ : 0;
+
if (page_zip) {
- page_create_zip(block, index, level, 0, mtr);
+ page_create_zip(block, index, level, autoinc, NULL, mtr);
} else {
- page_create(block, mtr, dict_table_is_comp(index->table));
+ page_create(block, mtr, dict_table_is_comp(index->table),
+ dict_index_is_spatial(index));
btr_page_set_level(page, NULL, level, mtr);
+ if (autoinc) {
+ mlog_write_ull(PAGE_HEADER + PAGE_MAX_TRX_ID + page,
+ autoinc, mtr);
+ }
}
-
- block->check_index_page_at_flush = TRUE;
}
/*************************************************************//**
@@ -2255,8 +1805,7 @@ the tuple. It is assumed that mtr contains an x-latch on the tree.
NOTE that the operation of this function must always succeed,
we cannot reverse it: therefore enough free disk space must be
guaranteed to be available before this function is called.
-@return inserted record or NULL if run out of space */
-UNIV_INTERN
+@return inserted record */
rec_t*
btr_root_raise_and_insert(
/*======================*/
@@ -2265,7 +1814,7 @@ btr_root_raise_and_insert(
on the root page; when the function returns,
the cursor is positioned on the predecessor
of the inserted record */
- ulint** offsets,/*!< out: offsets on inserted record */
+ offset_t** offsets,/*!< out: offsets on inserted record */
mem_heap_t** heap, /*!< in/out: pointer to memory heap, or NULL */
const dtuple_t* tuple, /*!< in: tuple to insert */
ulint n_ext, /*!< in: number of externally stored columns */
@@ -2305,9 +1854,11 @@ btr_root_raise_and_insert(
ut_a(dict_index_get_page(index) == page_get_page_no(root));
#endif /* UNIV_BTR_DEBUG */
- ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(index),
- MTR_MEMO_X_LOCK));
- ut_ad(mtr_memo_contains(mtr, root_block, MTR_MEMO_PAGE_X_FIX));
+ ut_ad(mtr_memo_contains_flagged(mtr, dict_index_get_lock(index),
+ MTR_MEMO_X_LOCK
+ | MTR_MEMO_SX_LOCK));
+ ut_ad(mtr_is_block_fix(
+ mtr, root_block, MTR_MEMO_PAGE_X_FIX, index->table));
/* Allocate a new page to the tree. Root splitting is done by first
moving the root records to the new page, emptying the root, putting
@@ -2319,7 +1870,7 @@ btr_root_raise_and_insert(
if (new_block == NULL && os_has_said_disk_full) {
return(NULL);
- }
+ }
new_page = buf_block_get_frame(new_block);
new_page_zip = buf_block_get_page_zip(new_block);
@@ -2351,19 +1902,56 @@ btr_root_raise_and_insert(
/* Update the lock table and possible hash index. */
- lock_move_rec_list_end(new_block, root_block,
- page_get_infimum_rec(root));
+ if (!dict_table_is_locking_disabled(index->table)) {
+ lock_move_rec_list_end(new_block, root_block,
+ page_get_infimum_rec(root));
+ }
+
+ /* Move any existing predicate locks */
+ if (dict_index_is_spatial(index)) {
+ lock_prdt_rec_move(new_block, root_block);
+ }
btr_search_move_or_delete_hash_entries(new_block, root_block,
index);
}
+ if (dict_index_is_sec_or_ibuf(index)) {
+ /* In secondary indexes and the change buffer,
+ PAGE_MAX_TRX_ID can be reset on the root page, because
+ the field only matters on leaf pages, and the root no
+ longer is a leaf page. (Older versions of InnoDB did
+ set PAGE_MAX_TRX_ID on all secondary index pages.) */
+ if (root_page_zip) {
+ byte* p = PAGE_HEADER + PAGE_MAX_TRX_ID + root;
+ memset(p, 0, 8);
+ page_zip_write_header(root_page_zip, p, 8, mtr);
+ } else {
+ mlog_write_ull(PAGE_HEADER + PAGE_MAX_TRX_ID
+ + root, 0, mtr);
+ }
+ } else {
+ /* PAGE_ROOT_AUTO_INC is only present in the clustered index
+ root page; on other clustered index pages, we want to reserve
+ the field PAGE_MAX_TRX_ID for future use. */
+ if (new_page_zip) {
+ byte* p = PAGE_HEADER + PAGE_MAX_TRX_ID + new_page;
+ memset(p, 0, 8);
+ page_zip_write_header(new_page_zip, p, 8, mtr);
+ } else {
+ mlog_write_ull(PAGE_HEADER + PAGE_MAX_TRX_ID
+ + new_page, 0, mtr);
+ }
+ }
+
/* If this is a pessimistic insert which is actually done to
perform a pessimistic update then we have stored the lock
information of the record to be inserted on the infimum of the
root page: we cannot discard the lock structs on the root page */
- lock_update_root_raise(new_block, root_block);
+ if (!dict_table_is_locking_disabled(index->table)) {
+ lock_update_root_raise(new_block, root_block);
+ }
/* Create a memory heap where the node pointer is stored */
if (!*heap) {
@@ -2371,13 +1959,20 @@ btr_root_raise_and_insert(
}
rec = page_rec_get_next(page_get_infimum_rec(new_page));
- new_page_no = buf_block_get_page_no(new_block);
+ new_page_no = new_block->page.id.page_no();
/* Build the node pointer (= node key and page address) for the
child */
+ if (dict_index_is_spatial(index)) {
+ rtr_mbr_t new_mbr;
- node_ptr = dict_index_build_node_ptr(
- index, rec, new_page_no, *heap, level);
+ rtr_page_cal_mbr(index, new_block, &new_mbr, *heap);
+ node_ptr = rtr_index_build_node_ptr(
+ index, &new_mbr, rec, new_page_no, *heap, level);
+ } else {
+ node_ptr = dict_index_build_node_ptr(
+ index, rec, new_page_no, *heap, level);
+ }
/* The node pointer must be marked as the predefined minimum record,
as there is no lower alphabetical limit to records in the leftmost
node of a level: */
@@ -2388,13 +1983,7 @@ btr_root_raise_and_insert(
/* Rebuild the root page to get free space */
btr_page_empty(root_block, root_page_zip, index, level + 1, mtr);
- /* Set the next node and previous node fields, although
- they should already have been set. The previous node field
- must be FIL_NULL if root_page_zip != NULL, because the
- REC_INFO_MIN_REC_FLAG (of the first user record) will be
- set if and only if btr_page_get_prev() == FIL_NULL. */
- btr_page_set_next(root, root_page_zip, FIL_NULL, mtr);
- btr_page_set_prev(root, root_page_zip, FIL_NULL, mtr);
+ ut_ad(!page_has_siblings(root));
page_cursor = btr_cur_get_page_cur(cursor);
@@ -2411,128 +2000,102 @@ btr_root_raise_and_insert(
/* We play safe and reset the free bits for the new page */
-#if 0
- fprintf(stderr, "Root raise new page no %lu\n", new_page_no);
-#endif
-
- if (!dict_index_is_clust(index)) {
+ if (!dict_index_is_clust(index)
+ && !dict_table_is_temporary(index->table)) {
ibuf_reset_free_bits(new_block);
}
if (tuple != NULL) {
/* Reposition the cursor to the child node */
- page_cur_search(new_block, index, tuple,
- PAGE_CUR_LE, page_cursor);
+ page_cur_search(new_block, index, tuple, page_cursor);
} else {
/* Set cursor to first record on child node */
page_cur_set_before_first(new_block, page_cursor);
}
/* Split the child and insert tuple */
- return(btr_page_split_and_insert(flags, cursor, offsets, heap,
- tuple, n_ext, mtr));
+ if (dict_index_is_spatial(index)) {
+ /* Split rtree page and insert tuple */
+ return(rtr_page_split_and_insert(flags, cursor, offsets, heap,
+ tuple, n_ext, mtr));
+ } else {
+ return(btr_page_split_and_insert(flags, cursor, offsets, heap,
+ tuple, n_ext, mtr));
+ }
}
-/*************************************************************//**
-Decides if the page should be split at the convergence point of inserts
+/** Decide if the page should be split at the convergence point of inserts
converging to the left.
-@return TRUE if split recommended */
-UNIV_INTERN
-ibool
-btr_page_get_split_rec_to_left(
-/*===========================*/
- btr_cur_t* cursor, /*!< in: cursor at which to insert */
- rec_t** split_rec) /*!< out: if split recommended,
- the first record on upper half page,
- or NULL if tuple to be inserted should
- be first */
+@param[in] cursor insert position
+@return the first record to be moved to the right half page
+@retval NULL if no split is recommended */
+rec_t* btr_page_get_split_rec_to_left(const btr_cur_t* cursor)
{
- page_t* page;
- rec_t* insert_point;
- rec_t* infimum;
-
- page = btr_cur_get_page(cursor);
- insert_point = btr_cur_get_rec(cursor);
+ rec_t* split_rec = btr_cur_get_rec(cursor);
+ const page_t* page = page_align(split_rec);
if (page_header_get_ptr(page, PAGE_LAST_INSERT)
- == page_rec_get_next(insert_point)) {
-
- infimum = page_get_infimum_rec(page);
+ != page_rec_get_next(split_rec)) {
+ return NULL;
+ }
- /* If the convergence is in the middle of a page, include also
- the record immediately before the new insert to the upper
- page. Otherwise, we could repeatedly move from page to page
- lots of records smaller than the convergence point. */
+ const rec_t* infimum = page_get_infimum_rec(page);
- if (infimum != insert_point
- && page_rec_get_next(infimum) != insert_point) {
+ /* If the convergence is in the middle of a page, include also
+ the record immediately before the new insert to the upper
+ page. Otherwise, we could repeatedly move from page to page
+ lots of records smaller than the convergence point. */
- *split_rec = insert_point;
- } else {
- *split_rec = page_rec_get_next(insert_point);
- }
-
- return(TRUE);
+ if (split_rec == infimum
+ || split_rec == page_rec_get_next_const(infimum)) {
+ split_rec = page_rec_get_next(split_rec);
}
- return(FALSE);
+ return split_rec;
}
-/*************************************************************//**
-Decides if the page should be split at the convergence point of inserts
+/** Decide if the page should be split at the convergence point of inserts
converging to the right.
-@return TRUE if split recommended */
-UNIV_INTERN
-ibool
-btr_page_get_split_rec_to_right(
-/*============================*/
- btr_cur_t* cursor, /*!< in: cursor at which to insert */
- rec_t** split_rec) /*!< out: if split recommended,
- the first record on upper half page,
- or NULL if tuple to be inserted should
- be first */
+@param[in] cursor insert position
+@param[out] split_rec if split recommended, the first record
+ on the right half page, or
+ NULL if the to-be-inserted record
+ should be first
+@return whether split is recommended */
+bool
+btr_page_get_split_rec_to_right(const btr_cur_t* cursor, rec_t** split_rec)
{
- page_t* page;
- rec_t* insert_point;
-
- page = btr_cur_get_page(cursor);
- insert_point = btr_cur_get_rec(cursor);
+ rec_t* insert_point = btr_cur_get_rec(cursor);
+ const page_t* page = page_align(insert_point);
/* We use eager heuristics: if the new insert would be right after
the previous insert on the same page, we assume that there is a
pattern of sequential inserts here. */
- if (page_header_get_ptr(page, PAGE_LAST_INSERT) == insert_point) {
-
- rec_t* next_rec;
-
- next_rec = page_rec_get_next(insert_point);
-
- if (page_rec_is_supremum(next_rec)) {
-split_at_new:
- /* Split at the new record to insert */
- *split_rec = NULL;
- } else {
- rec_t* next_next_rec = page_rec_get_next(next_rec);
- if (page_rec_is_supremum(next_next_rec)) {
-
- goto split_at_new;
- }
+ if (page_header_get_ptr(page, PAGE_LAST_INSERT) != insert_point) {
+ return false;
+ }
- /* If there are >= 2 user records up from the insert
- point, split all but 1 off. We want to keep one because
- then sequential inserts can use the adaptive hash
- index, as they can do the necessary checks of the right
- search position just by looking at the records on this
- page. */
+ insert_point = page_rec_get_next(insert_point);
- *split_rec = next_next_rec;
+ if (page_rec_is_supremum(insert_point)) {
+ insert_point = NULL;
+ } else {
+ insert_point = page_rec_get_next(insert_point);
+ if (page_rec_is_supremum(insert_point)) {
+ insert_point = NULL;
}
- return(TRUE);
+ /* If there are >= 2 user records up from the insert
+ point, split all but 1 off. We want to keep one because
+ then sequential inserts can use the adaptive hash
+ index, as they can do the necessary checks of the right
+ search position just by looking at the records on this
+ page. */
}
- return(FALSE);
+ *split_rec = insert_point;
+ return true;
}
/*************************************************************//**
@@ -2562,7 +2125,7 @@ btr_page_get_split_rec(
rec_t* next_rec;
ulint n;
mem_heap_t* heap;
- ulint* offsets;
+ offset_t* offsets;
page = btr_cur_get_page(cursor);
@@ -2618,9 +2181,9 @@ btr_page_get_split_rec(
/* Include tuple */
incl_data += insert_size;
} else {
- offsets = rec_get_offsets(rec, cursor->index,
- offsets, ULINT_UNDEFINED,
- &heap);
+ offsets = rec_get_offsets(rec, cursor->index, offsets,
+ page_is_leaf(page),
+ ULINT_UNDEFINED, &heap);
incl_data += rec_offs_size(offsets);
}
@@ -2658,7 +2221,7 @@ func_exit:
/*************************************************************//**
Returns TRUE if the insert fits on the appropriate half-page with the
chosen split_rec.
-@return true if fits */
+@return true if fits */
static MY_ATTRIBUTE((nonnull(1,3,4,6), warn_unused_result))
bool
btr_page_insert_fits(
@@ -2668,7 +2231,7 @@ btr_page_insert_fits(
const rec_t* split_rec,/*!< in: suggestion for first record
on upper half-page, or NULL if
tuple to be inserted should be first */
- ulint** offsets,/*!< in: rec_get_offsets(
+ offset_t** offsets,/*!< in: rec_get_offsets(
split_rec, cursor->index); out: garbage */
const dtuple_t* tuple, /*!< in: tuple to insert */
ulint n_ext, /*!< in: number of externally stored columns */
@@ -2728,6 +2291,7 @@ btr_page_insert_fits(
space after rec is removed from page. */
*offsets = rec_get_offsets(rec, cursor->index, *offsets,
+ page_is_leaf(page),
ULINT_UNDEFINED, heap);
total_data -= rec_offs_size(*offsets);
@@ -2751,7 +2315,6 @@ btr_page_insert_fits(
/*******************************************************//**
Inserts a data tuple to a tree on a non-leaf level. It is assumed
that mtr holds an x-latch on the tree. */
-UNIV_INTERN
void
btr_insert_on_non_leaf_level_func(
/*==============================*/
@@ -2760,21 +2323,48 @@ btr_insert_on_non_leaf_level_func(
ulint level, /*!< in: level, must be > 0 */
dtuple_t* tuple, /*!< in: the record to be inserted */
const char* file, /*!< in: file name */
- ulint line, /*!< in: line where called */
+ unsigned line, /*!< in: line where called */
mtr_t* mtr) /*!< in: mtr */
{
big_rec_t* dummy_big_rec;
btr_cur_t cursor;
dberr_t err;
rec_t* rec;
- ulint* offsets = NULL;
mem_heap_t* heap = NULL;
+ offset_t offsets_[REC_OFFS_NORMAL_SIZE];
+ offset_t* offsets = offsets_;
+ rec_offs_init(offsets_);
+ rtr_info_t rtr_info;
ut_ad(level > 0);
- btr_cur_search_to_nth_level(index, level, tuple, PAGE_CUR_LE,
- BTR_CONT_MODIFY_TREE,
- &cursor, 0, file, line, mtr);
+ if (!dict_index_is_spatial(index)) {
+ dberr_t err = btr_cur_search_to_nth_level(
+ index, level, tuple, PAGE_CUR_LE,
+ BTR_CONT_MODIFY_TREE,
+ &cursor, 0, file, line, mtr);
+
+ if (err != DB_SUCCESS) {
+ ib::warn() << " Error code: " << err
+ << " btr_page_get_father_node_ptr_func "
+ << " level: " << level
+ << " called from file: "
+ << file << " line: " << line
+ << " table: " << index->table->name
+ << " index: " << index->name;
+ }
+ } else {
+ /* For spatial index, initialize structures to track
+ its parents etc. */
+ rtr_init_rtr_info(&rtr_info, false, &cursor, index, false);
+
+ rtr_info_update_btr(&cursor, &rtr_info);
+
+ btr_cur_search_to_nth_level(index, level, tuple,
+ PAGE_CUR_RTREE_INSERT,
+ BTR_CONT_MODIFY_TREE,
+ &cursor, 0, file, line, mtr);
+ }
ut_ad(cursor.flag == BTR_CUR_BINARY);
@@ -2796,7 +2386,16 @@ btr_insert_on_non_leaf_level_func(
&dummy_big_rec, 0, NULL, mtr);
ut_a(err == DB_SUCCESS);
}
- mem_heap_free(heap);
+
+ if (heap != NULL) {
+ mem_heap_free(heap);
+ }
+
+ if (dict_index_is_spatial(index)) {
+ ut_ad(cursor.rtr_info);
+
+ rtr_clean_rtr_info(&rtr_info, true);
+ }
}
/**************************************************************//**
@@ -2816,8 +2415,6 @@ btr_attach_half_pages(
ulint direction, /*!< in: FSP_UP or FSP_DOWN */
mtr_t* mtr) /*!< in: mtr */
{
- ulint space;
- ulint zip_size;
ulint prev_page_no;
ulint next_page_no;
ulint level;
@@ -2830,9 +2427,12 @@ btr_attach_half_pages(
page_zip_des_t* upper_page_zip;
dtuple_t* node_ptr_upper;
mem_heap_t* heap;
+ buf_block_t* prev_block = NULL;
+ buf_block_t* next_block = NULL;
- ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
- ut_ad(mtr_memo_contains(mtr, new_block, MTR_MEMO_PAGE_X_FIX));
+ ut_ad(mtr_is_block_fix(mtr, block, MTR_MEMO_PAGE_X_FIX, index->table));
+ ut_ad(mtr_is_block_fix(
+ mtr, new_block, MTR_MEMO_PAGE_X_FIX, index->table));
/* Create a memory heap where the data tuple is stored */
heap = mem_heap_create(1024);
@@ -2841,13 +2441,13 @@ btr_attach_half_pages(
if (direction == FSP_DOWN) {
btr_cur_t cursor;
- ulint* offsets;
+ offset_t* offsets;
lower_page = buf_block_get_frame(new_block);
- lower_page_no = buf_block_get_page_no(new_block);
+ lower_page_no = new_block->page.id.page_no();
lower_page_zip = buf_block_get_page_zip(new_block);
upper_page = buf_block_get_frame(block);
- upper_page_no = buf_block_get_page_no(block);
+ upper_page_no = block->page.id.page_no();
upper_page_zip = buf_block_get_page_zip(block);
/* Look up the index for the node pointer to page */
@@ -2864,13 +2464,31 @@ btr_attach_half_pages(
mem_heap_empty(heap);
} else {
lower_page = buf_block_get_frame(block);
- lower_page_no = buf_block_get_page_no(block);
+ lower_page_no = block->page.id.page_no();
lower_page_zip = buf_block_get_page_zip(block);
upper_page = buf_block_get_frame(new_block);
- upper_page_no = buf_block_get_page_no(new_block);
+ upper_page_no = new_block->page.id.page_no();
upper_page_zip = buf_block_get_page_zip(new_block);
}
+ /* Get the previous and next pages of page */
+ prev_page_no = btr_page_get_prev(page);
+ next_page_no = btr_page_get_next(page);
+
+ const ulint space = block->page.id.space();
+
+ /* for consistency, both blocks should be locked, before change */
+ if (prev_page_no != FIL_NULL && direction == FSP_DOWN) {
+ prev_block = btr_block_get(
+ page_id_t(space, prev_page_no), block->page.size,
+ RW_X_LATCH, index, mtr);
+ }
+ if (next_page_no != FIL_NULL && direction != FSP_DOWN) {
+ next_block = btr_block_get(
+ page_id_t(space, next_page_no), block->page.size,
+ RW_X_LATCH, index, mtr);
+ }
+
/* Get the level of the split pages */
level = btr_page_get_level(buf_block_get_frame(block), mtr);
ut_ad(level
@@ -2891,22 +2509,13 @@ btr_attach_half_pages(
/* Free the memory heap */
mem_heap_free(heap);
- /* Get the previous and next pages of page */
-
- prev_page_no = btr_page_get_prev(page, mtr);
- next_page_no = btr_page_get_next(page, mtr);
- space = buf_block_get_space(block);
- zip_size = buf_block_get_zip_size(block);
-
/* Update page links of the level */
- if (prev_page_no != FIL_NULL) {
- buf_block_t* prev_block = btr_block_get(
- space, zip_size, prev_page_no, RW_X_LATCH, index, mtr);
+ if (prev_block) {
#ifdef UNIV_BTR_DEBUG
ut_a(page_is_comp(prev_block->frame) == page_is_comp(page));
- ut_a(btr_page_get_next(prev_block->frame, mtr)
- == buf_block_get_page_no(block));
+ ut_a(btr_page_get_next(prev_block->frame)
+ == block->page.id.page_no());
#endif /* UNIV_BTR_DEBUG */
btr_page_set_next(buf_block_get_frame(prev_block),
@@ -2914,12 +2523,10 @@ btr_attach_half_pages(
lower_page_no, mtr);
}
- if (next_page_no != FIL_NULL) {
- buf_block_t* next_block = btr_block_get(
- space, zip_size, next_page_no, RW_X_LATCH, index, mtr);
+ if (next_block) {
#ifdef UNIV_BTR_DEBUG
ut_a(page_is_comp(next_block->frame) == page_is_comp(page));
- ut_a(btr_page_get_prev(next_block->frame, mtr)
+ ut_a(btr_page_get_prev(next_block->frame)
== page_get_page_no(page));
#endif /* UNIV_BTR_DEBUG */
@@ -2928,11 +2535,24 @@ btr_attach_half_pages(
upper_page_no, mtr);
}
- btr_page_set_prev(lower_page, lower_page_zip, prev_page_no, mtr);
- btr_page_set_next(lower_page, lower_page_zip, upper_page_no, mtr);
+ if (direction == FSP_DOWN) {
+ /* lower_page is new */
+ btr_page_set_prev(lower_page, lower_page_zip,
+ prev_page_no, mtr);
+ } else {
+ ut_ad(btr_page_get_prev(lower_page) == prev_page_no);
+ }
+ btr_page_set_next(lower_page, lower_page_zip, upper_page_no, mtr);
btr_page_set_prev(upper_page, upper_page_zip, lower_page_no, mtr);
- btr_page_set_next(upper_page, upper_page_zip, next_page_no, mtr);
+
+ if (direction != FSP_DOWN) {
+ /* upper_page is new */
+ btr_page_set_next(upper_page, upper_page_zip,
+ next_page_no, mtr);
+ } else {
+ ut_ad(btr_page_get_next(upper_page) == next_page_no);
+ }
}
/*************************************************************//**
@@ -2944,7 +2564,7 @@ btr_page_tuple_smaller(
/*===================*/
btr_cur_t* cursor, /*!< in: b-tree cursor */
const dtuple_t* tuple, /*!< in: tuple to consider */
- ulint** offsets,/*!< in/out: temporary storage */
+ offset_t** offsets,/*!< in/out: temporary storage */
ulint n_uniq, /*!< in: number of unique fields
in the index page records */
mem_heap_t** heap) /*!< in/out: heap for offsets */
@@ -2960,7 +2580,7 @@ btr_page_tuple_smaller(
first_rec = page_cur_get_rec(&pcur);
*offsets = rec_get_offsets(
- first_rec, cursor->index, *offsets,
+ first_rec, cursor->index, *offsets, page_is_leaf(block->frame),
n_uniq, heap);
return(cmp_dtuple_rec(tuple, first_rec, *offsets) < 0);
@@ -2984,7 +2604,7 @@ rec_t*
btr_insert_into_right_sibling(
ulint flags,
btr_cur_t* cursor,
- ulint** offsets,
+ offset_t** offsets,
mem_heap_t* heap,
const dtuple_t* tuple,
ulint n_ext,
@@ -2992,11 +2612,13 @@ btr_insert_into_right_sibling(
{
buf_block_t* block = btr_cur_get_block(cursor);
page_t* page = buf_block_get_frame(block);
- ulint next_page_no = btr_page_get_next(page, mtr);
+ const uint32_t next_page_no = btr_page_get_next(page);
- ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(cursor->index),
- MTR_MEMO_X_LOCK));
- ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
+ ut_ad(mtr_memo_contains_flagged(
+ mtr, dict_index_get_lock(cursor->index),
+ MTR_MEMO_X_LOCK | MTR_MEMO_SX_LOCK));
+ ut_ad(mtr_is_block_fix(
+ mtr, block, MTR_MEMO_PAGE_X_FIX, cursor->index->table));
ut_ad(heap);
if (next_page_no == FIL_NULL || !page_rec_is_supremum(
@@ -3010,12 +2632,13 @@ btr_insert_into_right_sibling(
page_t* next_page;
btr_cur_t next_father_cursor;
rec_t* rec = NULL;
- ulint zip_size = buf_block_get_zip_size(block);
ulint max_size;
+ const ulint space = block->page.id.space();
+
next_block = btr_block_get(
- buf_block_get_space(block), zip_size,
- next_page_no, RW_X_LATCH, cursor->index, mtr);
+ page_id_t(space, next_page_no), block->page.size,
+ RW_X_LATCH, cursor->index, mtr);
next_page = buf_block_get_frame(next_block);
bool is_leaf = page_is_leaf(next_page);
@@ -3030,15 +2653,19 @@ btr_insert_into_right_sibling(
max_size = page_get_max_insert_size_after_reorganize(next_page, 1);
/* Extends gap lock for the next page */
- lock_update_split_left(next_block, block);
+ if (!dict_table_is_locking_disabled(cursor->index->table)) {
+ lock_update_split_left(next_block, block);
+ }
rec = page_cur_tuple_insert(
&next_page_cursor, tuple, cursor->index, offsets, &heap,
n_ext, mtr);
if (rec == NULL) {
- if (zip_size && is_leaf
- && !dict_index_is_clust(cursor->index)) {
+ if (is_leaf
+ && next_block->page.size.is_compressed()
+ && !dict_index_is_clust(cursor->index)
+ && !dict_table_is_temporary(cursor->index->table)) {
/* Reset the IBUF_BITMAP_FREE bits, because
page_cur_tuple_insert() will have attempted page
reorganize before failing. */
@@ -3061,7 +2688,7 @@ btr_insert_into_right_sibling(
compressed = btr_cur_pessimistic_delete(
&err, TRUE, &next_father_cursor,
- BTR_CREATE_FLAG, RB_NONE, mtr);
+ BTR_CREATE_FLAG, false, mtr);
ut_a(err == DB_SUCCESS);
@@ -3070,7 +2697,7 @@ btr_insert_into_right_sibling(
}
dtuple_t* node_ptr = dict_index_build_node_ptr(
- cursor->index, rec, buf_block_get_page_no(next_block),
+ cursor->index, rec, next_block->page.id.page_no(),
heap, level);
btr_insert_on_non_leaf_level(
@@ -3078,11 +2705,13 @@ btr_insert_into_right_sibling(
ut_ad(rec_offs_validate(rec, cursor->index, *offsets));
- if (is_leaf && !dict_index_is_clust(cursor->index)) {
+ if (is_leaf
+ && !dict_index_is_clust(cursor->index)
+ && !dict_table_is_temporary(cursor->index->table)) {
/* Update the free bits of the B-tree page in the
insert buffer bitmap. */
- if (zip_size) {
+ if (next_block->page.size.is_compressed()) {
ibuf_update_free_bits_zip(next_block, mtr);
} else {
ibuf_update_free_bits_if_full(
@@ -3101,12 +2730,10 @@ released within this function! NOTE that the operation of this
function must always succeed, we cannot reverse it: therefore enough
free disk space (2 pages) must be guaranteed to be available before
this function is called.
-
NOTE: jonaso added support for calling function with tuple == NULL
which cause it to only split a page.
@return inserted record or NULL if run out of space */
-UNIV_INTERN
rec_t*
btr_page_split_and_insert(
/*======================*/
@@ -3114,7 +2741,7 @@ btr_page_split_and_insert(
btr_cur_t* cursor, /*!< in: cursor at which to insert; when the
function returns, the cursor is positioned
on the predecessor of the inserted record */
- ulint** offsets,/*!< out: offsets on inserted record */
+ offset_t** offsets,/*!< out: offsets on inserted record */
mem_heap_t** heap, /*!< in/out: pointer to memory heap, or NULL */
const dtuple_t* tuple, /*!< in: tuple to insert */
ulint n_ext, /*!< in: number of externally stored columns */
@@ -3123,26 +2750,25 @@ btr_page_split_and_insert(
buf_block_t* block;
page_t* page;
page_zip_des_t* page_zip;
- ulint page_no;
- byte direction;
- ulint hint_page_no;
buf_block_t* new_block;
page_t* new_page;
page_zip_des_t* new_page_zip;
rec_t* split_rec;
buf_block_t* left_block;
buf_block_t* right_block;
- buf_block_t* insert_block;
page_cur_t* page_cursor;
rec_t* first_rec;
byte* buf = 0; /* remove warning */
rec_t* move_limit;
- ibool insert_will_fit;
- ibool insert_left;
ulint n_iterations = 0;
- rec_t* rec;
ulint n_uniq;
+ if (dict_index_is_spatial(cursor->index)) {
+ /* Split rtree page and update parent */
+ return(rtr_page_split_and_insert(flags, cursor, offsets, heap,
+ tuple, n_ext, mtr));
+ }
+
if (!*heap) {
*heap = mem_heap_create(1024);
}
@@ -3151,40 +2777,37 @@ func_start:
mem_heap_empty(*heap);
*offsets = NULL;
- ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(cursor->index),
- MTR_MEMO_X_LOCK));
+ ut_ad(mtr_memo_contains_flagged(mtr,
+ dict_index_get_lock(cursor->index),
+ MTR_MEMO_X_LOCK | MTR_MEMO_SX_LOCK));
ut_ad(!dict_index_is_online_ddl(cursor->index)
|| (flags & BTR_CREATE_FLAG)
|| dict_index_is_clust(cursor->index));
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(rw_lock_own(dict_index_get_lock(cursor->index), RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
+ ut_ad(rw_lock_own_flagged(dict_index_get_lock(cursor->index),
+ RW_LOCK_FLAG_X | RW_LOCK_FLAG_SX));
block = btr_cur_get_block(cursor);
page = buf_block_get_frame(block);
page_zip = buf_block_get_page_zip(block);
- ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
+ ut_ad(mtr_is_block_fix(
+ mtr, block, MTR_MEMO_PAGE_X_FIX, cursor->index->table));
ut_ad(!page_is_empty(page));
/* try to insert to the next page if possible before split */
- rec = btr_insert_into_right_sibling(
- flags, cursor, offsets, *heap, tuple, n_ext, mtr);
-
- if (rec != NULL) {
+ if (rec_t* rec = btr_insert_into_right_sibling(
+ flags, cursor, offsets, *heap, tuple, n_ext, mtr)) {
return(rec);
}
- page_no = buf_block_get_page_no(block);
-
/* 1. Decide the split record; split_rec == NULL means that the
tuple to be inserted should be the first record on the upper
half-page */
- insert_left = FALSE;
+ bool insert_left = false;
+ ulint hint_page_no = block->page.id.page_no() + 1;
+ byte direction = FSP_UP;
- if (tuple != NULL && n_iterations > 0) {
- direction = FSP_UP;
- hint_page_no = page_no + 1;
+ if (tuple && n_iterations > 0) {
split_rec = btr_page_get_split_rec(cursor, tuple, n_ext);
if (split_rec == NULL) {
@@ -3192,17 +2815,10 @@ func_start:
cursor, tuple, offsets, n_uniq, heap);
}
} else if (btr_page_get_split_rec_to_right(cursor, &split_rec)) {
- direction = FSP_UP;
- hint_page_no = page_no + 1;
-
- } else if (btr_page_get_split_rec_to_left(cursor, &split_rec)) {
+ } else if ((split_rec = btr_page_get_split_rec_to_left(cursor))) {
direction = FSP_DOWN;
- hint_page_no = page_no - 1;
- ut_ad(split_rec);
+ hint_page_no -= 2;
} else {
- direction = FSP_UP;
- hint_page_no = page_no + 1;
-
/* If there is only one record in the index page, we
can't split the node in the middle by default. We need
to determine whether the new record will be inserted
@@ -3221,15 +2837,15 @@ func_start:
DBUG_EXECUTE_IF("disk_is_full",
os_has_said_disk_full = true;
- return(NULL););
+ return(NULL););
/* 2. Allocate a new page to the index */
new_block = btr_page_alloc(cursor->index, hint_page_no, direction,
btr_page_get_level(page, mtr), mtr, mtr);
- if (new_block == NULL && os_has_said_disk_full) {
+ if (!new_block) {
return(NULL);
- }
+ }
new_page = buf_block_get_frame(new_block);
new_page_zip = buf_block_get_page_zip(new_block);
@@ -3250,14 +2866,10 @@ func_start:
first_rec = move_limit = split_rec;
*offsets = rec_get_offsets(split_rec, cursor->index, *offsets,
- n_uniq, heap);
+ page_is_leaf(page), n_uniq, heap);
- if (tuple != NULL) {
- insert_left = cmp_dtuple_rec(
- tuple, split_rec, *offsets) < 0;
- } else {
- insert_left = 1;
- }
+ insert_left = !tuple
+ || cmp_dtuple_rec(tuple, split_rec, *offsets) < 0;
if (!insert_left && new_page_zip && n_iterations > 0) {
/* If a compressed page has already been split,
@@ -3274,8 +2886,9 @@ func_start:
insert_empty:
ut_ad(!split_rec);
ut_ad(!insert_left);
- buf = (byte*) mem_alloc(rec_get_converted_size(cursor->index,
- tuple, n_ext));
+ buf = UT_NEW_ARRAY_NOKEY(
+ byte,
+ rec_get_converted_size(cursor->index, tuple, n_ext));
first_rec = rec_convert_dtuple_to_rec(buf, cursor->index,
tuple, n_ext);
@@ -3291,16 +2904,16 @@ insert_empty:
on the appropriate half-page, we may release the tree x-latch.
We can then move the records after releasing the tree latch,
thus reducing the tree latch contention. */
+ bool insert_will_fit;
if (tuple == NULL) {
- insert_will_fit = 1;
- }
- else if (split_rec) {
+ insert_will_fit = true;
+ } else if (split_rec) {
insert_will_fit = !new_page_zip
&& btr_page_insert_fits(cursor, split_rec,
offsets, tuple, n_ext, heap);
} else {
if (!insert_left) {
- mem_free(buf);
+ UT_DELETE_ARRAY(buf);
buf = NULL;
}
@@ -3309,11 +2922,17 @@ insert_empty:
offsets, tuple, n_ext, heap);
}
- if (insert_will_fit && page_is_leaf(page)
+ if (!srv_read_only_mode
+ && insert_will_fit
+ && page_is_leaf(page)
&& !dict_index_is_online_ddl(cursor->index)) {
- mtr_memo_release(mtr, dict_index_get_lock(cursor->index),
- MTR_MEMO_X_LOCK);
+ mtr->memo_release(
+ dict_index_get_lock(cursor->index),
+ MTR_MEMO_X_LOCK | MTR_MEMO_SX_LOCK);
+
+ /* NOTE: We cannot release root block latch here, because it
+ has segment header and already modified in most of cases.*/
}
/* 5. Move then the records to the new page */
@@ -3342,9 +2961,12 @@ insert_empty:
/* Update the lock table and possible hash index. */
- lock_move_rec_list_start(
- new_block, block, move_limit,
- new_page + PAGE_NEW_INFIMUM);
+ if (!dict_table_is_locking_disabled(
+ cursor->index->table)) {
+ lock_move_rec_list_start(
+ new_block, block, move_limit,
+ new_page + PAGE_NEW_INFIMUM);
+ }
btr_search_move_or_delete_hash_entries(
new_block, block, cursor->index);
@@ -3358,7 +2980,9 @@ insert_empty:
left_block = new_block;
right_block = block;
- lock_update_split_left(right_block, left_block);
+ if (!dict_table_is_locking_disabled(cursor->index->table)) {
+ lock_update_split_left(right_block, left_block);
+ }
} else {
/* fputs("Split right\n", stderr); */
@@ -3382,8 +3006,11 @@ insert_empty:
cursor->index, mtr);
/* Update the lock table and possible hash index. */
-
- lock_move_rec_list_end(new_block, block, move_limit);
+ if (!dict_table_is_locking_disabled(
+ cursor->index->table)) {
+ lock_move_rec_list_end(
+ new_block, block, move_limit);
+ }
btr_search_move_or_delete_hash_entries(
new_block, block, cursor->index);
@@ -3399,7 +3026,9 @@ insert_empty:
left_block = block;
right_block = new_block;
- lock_update_split_right(right_block, left_block);
+ if (!dict_table_is_locking_disabled(cursor->index->table)) {
+ lock_update_split_right(right_block, left_block);
+ }
}
#ifdef UNIV_ZIP_DEBUG
@@ -3414,23 +3043,19 @@ insert_empty:
/* 6. The split and the tree modification is now completed. Decide the
page where the tuple should be inserted */
+ rec_t* rec;
+ buf_block_t* const insert_block = insert_left
+ ? left_block : right_block;
- if (tuple == NULL) {
+ if (UNIV_UNLIKELY(!tuple)) {
rec = NULL;
goto func_exit;
}
- if (insert_left) {
- insert_block = left_block;
- } else {
- insert_block = right_block;
- }
-
/* 7. Reposition the cursor for insert and try insertion */
page_cursor = btr_cur_get_page_cur(cursor);
- page_cur_search(insert_block, cursor->index, tuple,
- PAGE_CUR_LE, page_cursor);
+ page_cur_search(insert_block, cursor->index, tuple, page_cursor);
rec = page_cur_tuple_insert(page_cursor, tuple, cursor->index,
offsets, heap, n_ext, mtr);
@@ -3471,14 +3096,13 @@ insert_empty:
/* The insert did not fit on the page: loop back to the
start of the function for a new split */
insert_failed:
- /* We play safe and reset the free bits */
- if (!dict_index_is_clust(cursor->index)) {
+ /* We play safe and reset the free bits for new_page */
+ if (!dict_index_is_clust(cursor->index)
+ && !dict_table_is_temporary(cursor->index->table)) {
ibuf_reset_free_bits(new_block);
ibuf_reset_free_bits(block);
}
- /* fprintf(stderr, "Split second round %lu\n",
- page_get_page_no(page)); */
n_iterations++;
ut_ad(n_iterations < 2
|| buf_block_get_page_zip(insert_block));
@@ -3491,66 +3115,60 @@ func_exit:
/* Insert fit on the page: update the free bits for the
left and right pages in the same mtr */
- if (!dict_index_is_clust(cursor->index) && page_is_leaf(page)) {
+ if (!dict_index_is_clust(cursor->index)
+ && !dict_table_is_temporary(cursor->index->table)
+ && page_is_leaf(page)) {
+
ibuf_update_free_bits_for_two_pages_low(
- buf_block_get_zip_size(left_block),
left_block, right_block, mtr);
}
-#if 0
- fprintf(stderr, "Split and insert done %lu %lu\n",
- buf_block_get_page_no(left_block),
- buf_block_get_page_no(right_block));
-#endif
MONITOR_INC(MONITOR_INDEX_SPLIT);
ut_ad(page_validate(buf_block_get_frame(left_block), cursor->index));
ut_ad(page_validate(buf_block_get_frame(right_block), cursor->index));
- if (tuple == NULL) {
- ut_ad(rec == NULL);
- }
+ ut_ad(tuple || !rec);
ut_ad(!rec || rec_offs_validate(rec, cursor->index, *offsets));
return(rec);
}
-/*************************************************************//**
-Removes a page from the level list of pages. */
-UNIV_INTERN
+/** Removes a page from the level list of pages.
+@param[in] space space where removed
+@param[in] page_size page size
+@param[in,out] page page to remove
+@param[in] index index tree
+@param[in,out] mtr mini-transaction */
void
btr_level_list_remove_func(
-/*=======================*/
- ulint space, /*!< in: space where removed */
- ulint zip_size,/*!< in: compressed page size in bytes
- or 0 for uncompressed pages */
- page_t* page, /*!< in/out: page to remove */
- dict_index_t* index, /*!< in: index tree */
- mtr_t* mtr) /*!< in/out: mini-transaction */
+ ulint space,
+ const page_size_t& page_size,
+ page_t* page,
+ dict_index_t* index,
+ mtr_t* mtr)
{
- ulint prev_page_no;
- ulint next_page_no;
-
ut_ad(page != NULL);
ut_ad(mtr != NULL);
- ut_ad(mtr_memo_contains_page(mtr, page, MTR_MEMO_PAGE_X_FIX));
+ ut_ad(mtr_is_page_fix(mtr, page, MTR_MEMO_PAGE_X_FIX, index->table));
ut_ad(space == page_get_space_id(page));
/* Get the previous and next page numbers of page */
- prev_page_no = btr_page_get_prev(page, mtr);
- next_page_no = btr_page_get_next(page, mtr);
+ const uint32_t prev_page_no = btr_page_get_prev(page);
+ const uint32_t next_page_no = btr_page_get_next(page);
/* Update page links of the level */
if (prev_page_no != FIL_NULL) {
buf_block_t* prev_block
- = btr_block_get(space, zip_size, prev_page_no,
- RW_X_LATCH, index, mtr);
+ = btr_block_get(page_id_t(space, prev_page_no),
+ page_size, RW_X_LATCH, index, mtr);
+
page_t* prev_page
= buf_block_get_frame(prev_block);
#ifdef UNIV_BTR_DEBUG
ut_a(page_is_comp(prev_page) == page_is_comp(page));
- ut_a(btr_page_get_next(prev_page, mtr)
- == page_get_page_no(page));
+ ut_a(!memcmp(prev_page + FIL_PAGE_NEXT, page + FIL_PAGE_OFFSET,
+ 4));
#endif /* UNIV_BTR_DEBUG */
btr_page_set_next(prev_page,
@@ -3560,14 +3178,16 @@ btr_level_list_remove_func(
if (next_page_no != FIL_NULL) {
buf_block_t* next_block
- = btr_block_get(space, zip_size, next_page_no,
- RW_X_LATCH, index, mtr);
+ = btr_block_get(
+ page_id_t(space, next_page_no), page_size,
+ RW_X_LATCH, index, mtr);
+
page_t* next_page
= buf_block_get_frame(next_block);
#ifdef UNIV_BTR_DEBUG
ut_a(page_is_comp(next_page) == page_is_comp(page));
- ut_a(btr_page_get_prev(next_page, mtr)
- == page_get_page_no(page));
+ ut_a(!memcmp(next_page + FIL_PAGE_PREV, page + FIL_PAGE_OFFSET,
+ 4));
#endif /* UNIV_BTR_DEBUG */
btr_page_set_prev(next_page,
@@ -3583,24 +3203,21 @@ UNIV_INLINE
void
btr_set_min_rec_mark_log(
/*=====================*/
- rec_t* rec, /*!< in: record */
- byte type, /*!< in: MLOG_COMP_REC_MIN_MARK or MLOG_REC_MIN_MARK */
- mtr_t* mtr) /*!< in: mtr */
+ rec_t* rec, /*!< in: record */
+ mlog_id_t type, /*!< in: MLOG_COMP_REC_MIN_MARK or
+ MLOG_REC_MIN_MARK */
+ mtr_t* mtr) /*!< in: mtr */
{
mlog_write_initial_log_record(rec, type, mtr);
/* Write rec offset as a 2-byte ulint */
mlog_catenate_ulint(mtr, page_offset(rec), MLOG_2BYTES);
}
-#else /* !UNIV_HOTBACKUP */
-# define btr_set_min_rec_mark_log(rec,comp,mtr) ((void) 0)
-#endif /* !UNIV_HOTBACKUP */
/****************************************************************//**
Parses the redo log record for setting an index record as the predefined
minimum record.
-@return end of log record or NULL */
-UNIV_INTERN
+@return end of log record or NULL */
byte*
btr_parse_set_min_rec_mark(
/*=======================*/
@@ -3628,61 +3245,28 @@ btr_parse_set_min_rec_mark(
return(ptr + 2);
}
-/****************************************************************//**
-Sets a record as the predefined minimum record. */
-UNIV_INTERN
-void
-btr_set_min_rec_mark(
-/*=================*/
- rec_t* rec, /*!< in: record */
- mtr_t* mtr) /*!< in: mtr */
+/** Sets a record as the predefined minimum record. */
+void btr_set_min_rec_mark(rec_t* rec, mtr_t* mtr)
{
- ulint info_bits;
+ const bool comp = page_rec_is_comp(rec);
- if (page_rec_is_comp(rec)) {
- info_bits = rec_get_info_bits(rec, TRUE);
+ ut_ad(rec == page_rec_get_next_const(page_get_infimum_rec(
+ page_align(rec))));
+ ut_ad(!(rec_get_info_bits(page_rec_get_next(rec), comp)
+ & REC_INFO_MIN_REC_FLAG));
+ size_t info_bits = rec_get_info_bits(rec, comp);
+ if (comp) {
rec_set_info_bits_new(rec, info_bits | REC_INFO_MIN_REC_FLAG);
btr_set_min_rec_mark_log(rec, MLOG_COMP_REC_MIN_MARK, mtr);
} else {
- info_bits = rec_get_info_bits(rec, FALSE);
-
rec_set_info_bits_old(rec, info_bits | REC_INFO_MIN_REC_FLAG);
btr_set_min_rec_mark_log(rec, MLOG_REC_MIN_MARK, mtr);
}
}
-#ifndef UNIV_HOTBACKUP
-/*************************************************************//**
-Deletes on the upper level the node pointer to a page. */
-UNIV_INTERN
-void
-btr_node_ptr_delete(
-/*================*/
- dict_index_t* index, /*!< in: index tree */
- buf_block_t* block, /*!< in: page whose node pointer is deleted */
- mtr_t* mtr) /*!< in: mtr */
-{
- btr_cur_t cursor;
- ibool compressed;
- dberr_t err;
-
- ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
-
- /* Delete node pointer on father page */
- btr_page_get_father(index, block, mtr, &cursor);
-
- compressed = btr_cur_pessimistic_delete(&err, TRUE, &cursor,
- BTR_CREATE_FLAG, RB_NONE, mtr);
- ut_a(err == DB_SUCCESS);
-
- if (!compressed) {
- btr_cur_compress_if_useful(&cursor, FALSE, mtr);
- }
-}
-
/*************************************************************//**
If page is the only on its level, this function moves its records to the
father page, thus reducing the tree height.
@@ -3710,23 +3294,29 @@ btr_lift_page_up(
bool lift_father_up;
buf_block_t* block_orig = block;
- ut_ad(btr_page_get_prev(page, mtr) == FIL_NULL);
- ut_ad(btr_page_get_next(page, mtr) == FIL_NULL);
- ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
+ ut_ad(!page_has_siblings(page));
+ ut_ad(mtr_is_block_fix(mtr, block, MTR_MEMO_PAGE_X_FIX, index->table));
page_level = btr_page_get_level(page, mtr);
root_page_no = dict_index_get_page(index);
{
btr_cur_t cursor;
- ulint* offsets = NULL;
+ offset_t* offsets = NULL;
mem_heap_t* heap = mem_heap_create(
sizeof(*offsets)
* (REC_OFFS_HEADER_SIZE + 1 + 1 + index->n_fields));
buf_block_t* b;
- offsets = btr_page_get_father_block(offsets, heap, index,
- block, mtr, &cursor);
+ if (dict_index_is_spatial(index)) {
+ offsets = rtr_page_get_father_block(
+ NULL, heap, index, block, mtr,
+ NULL, &cursor);
+ } else {
+ offsets = btr_page_get_father_block(offsets, heap,
+ index, block,
+ mtr, &cursor);
+ }
father_block = btr_cur_get_block(&cursor);
father_page_zip = buf_block_get_page_zip(father_block);
father_page = buf_block_get_frame(father_block);
@@ -3739,12 +3329,20 @@ btr_lift_page_up(
the first level, the tree is in an inconsistent state
and can not be searched. */
for (b = father_block;
- buf_block_get_page_no(b) != root_page_no; ) {
+ b->page.id.page_no() != root_page_no; ) {
ut_a(n_blocks < BTR_MAX_LEVELS);
- offsets = btr_page_get_father_block(offsets, heap,
- index, b,
- mtr, &cursor);
+ if (dict_index_is_spatial(index)) {
+ offsets = rtr_page_get_father_block(
+ NULL, heap, index, b, mtr,
+ NULL, &cursor);
+ } else {
+ offsets = btr_page_get_father_block(offsets,
+ heap,
+ index, b,
+ mtr,
+ &cursor);
+ }
blocks[n_blocks++] = b = btr_cur_get_block(&cursor);
}
@@ -3763,9 +3361,9 @@ btr_lift_page_up(
page = buf_block_get_frame(block);
page_level = btr_page_get_level(page, mtr);
- ut_ad(btr_page_get_prev(page, mtr) == FIL_NULL);
- ut_ad(btr_page_get_next(page, mtr) == FIL_NULL);
- ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
+ ut_ad(!page_has_siblings(page));
+ ut_ad(mtr_is_block_fix(
+ mtr, block, MTR_MEMO_PAGE_X_FIX, index->table));
father_block = blocks[0];
father_page_zip = buf_block_get_page_zip(father_block);
@@ -3800,15 +3398,30 @@ btr_lift_page_up(
/* Update the lock table and possible hash index. */
- lock_move_rec_list_end(father_block, block,
- page_get_infimum_rec(page));
+ if (!dict_table_is_locking_disabled(index->table)) {
+ lock_move_rec_list_end(father_block, block,
+ page_get_infimum_rec(page));
+ }
+
+ /* Also update the predicate locks */
+ if (dict_index_is_spatial(index)) {
+ lock_prdt_rec_move(father_block, block);
+ }
btr_search_move_or_delete_hash_entries(father_block, block,
index);
}
- btr_blob_dbg_remove(page, index, "btr_lift_page_up");
- lock_update_copy_and_discard(father_block, block);
+ if (!dict_table_is_locking_disabled(index->table)) {
+ /* Free predicate page locks on the block */
+ if (dict_index_is_spatial(index)) {
+ lock_mutex_enter();
+ lock_prdt_page_free_from_discard(
+ block, lock_sys->prdt_page_hash);
+ lock_mutex_exit();
+ }
+ lock_update_copy_and_discard(father_block, block);
+ }
/* Go upward to root page, decrementing levels by one. */
for (i = lift_father_up ? 1 : 0; i < n_blocks; i++, page_level++) {
@@ -3823,11 +3436,16 @@ btr_lift_page_up(
#endif /* UNIV_ZIP_DEBUG */
}
+ if (dict_index_is_spatial(index)) {
+ rtr_check_discard_page(index, NULL, block);
+ }
+
/* Free the file page */
btr_page_free(index, block, mtr);
/* We play it safe and reset the free bits for the father */
- if (!dict_index_is_clust(index)) {
+ if (!dict_index_is_clust(index)
+ && !dict_table_is_temporary(index->table)) {
ibuf_reset_free_bits(father_block);
}
ut_ad(page_validate(father_page, index));
@@ -3845,8 +3463,7 @@ level lifts the records of the page to the father page, thus reducing the
tree height. It is assumed that mtr holds an x-latch on the tree and on the
page. If cursor is on the leaf level, mtr must also hold x-latches to the
brothers, if they exist.
-@return TRUE on success */
-UNIV_INTERN
+@return TRUE on success */
ibool
btr_compress(
/*=========*/
@@ -3860,7 +3477,6 @@ btr_compress(
{
dict_index_t* index;
ulint space;
- ulint zip_size;
ulint left_page_no;
ulint right_page_no;
buf_block_t* merge_block;
@@ -3871,8 +3487,12 @@ btr_compress(
page_t* page;
btr_cur_t father_cursor;
mem_heap_t* heap;
- ulint* offsets;
+ offset_t* offsets;
ulint nth_rec = 0; /* remove bogus warning */
+ bool mbr_changed = false;
+#ifdef UNIV_DEBUG
+ bool leftmost_child;
+#endif
DBUG_ENTER("btr_compress");
block = btr_cur_get_block(cursor);
@@ -3881,16 +3501,26 @@ btr_compress(
btr_assert_not_corrupted(block, index);
- ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(index),
- MTR_MEMO_X_LOCK));
- ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
+#ifdef UNIV_DEBUG
+ if (dict_index_is_spatial(index)) {
+ ut_ad(mtr_memo_contains_flagged(mtr, dict_index_get_lock(index),
+ MTR_MEMO_X_LOCK));
+ } else {
+ ut_ad(mtr_memo_contains_flagged(mtr, dict_index_get_lock(index),
+ MTR_MEMO_X_LOCK
+ | MTR_MEMO_SX_LOCK));
+ }
+#endif /* UNIV_DEBUG */
+
+ ut_ad(mtr_is_block_fix(mtr, block, MTR_MEMO_PAGE_X_FIX, index->table));
space = dict_index_get_space(index);
- zip_size = dict_table_zip_size(index->table);
+
+ const page_size_t page_size(dict_table_page_size(index->table));
MONITOR_INC(MONITOR_INDEX_MERGE_ATTEMPTS);
- left_page_no = btr_page_get_prev(page, mtr);
- right_page_no = btr_page_get_next(page, mtr);
+ left_page_no = btr_page_get_prev(page);
+ right_page_no = btr_page_get_next(page);
#ifdef UNIV_DEBUG
if (!page_is_leaf(page) && left_page_no == FIL_NULL) {
@@ -3901,8 +3531,27 @@ btr_compress(
#endif /* UNIV_DEBUG */
heap = mem_heap_create(100);
- offsets = btr_page_get_father_block(NULL, heap, index, block, mtr,
- &father_cursor);
+
+ if (dict_index_is_spatial(index)) {
+ offsets = rtr_page_get_father_block(
+ NULL, heap, index, block, mtr, cursor, &father_cursor);
+ ut_ad(cursor->page_cur.block->page.id.page_no()
+ == block->page.id.page_no());
+ rec_t* my_rec = father_cursor.page_cur.rec;
+
+ ulint page_no = btr_node_ptr_get_child_page_no(my_rec, offsets);
+
+ if (page_no != block->page.id.page_no()) {
+ ib::info() << "father positioned on page "
+ << page_no << "instead of "
+ << block->page.id.page_no();
+ offsets = btr_page_get_father_block(
+ NULL, heap, index, block, mtr, &father_cursor);
+ }
+ } else {
+ offsets = btr_page_get_father_block(
+ NULL, heap, index, block, mtr, &father_cursor);
+ }
if (adjust) {
nth_rec = page_rec_get_n_recs_before(btr_cur_get_rec(cursor));
@@ -3917,6 +3566,13 @@ btr_compress(
goto func_exit;
}
+ ut_d(leftmost_child =
+ left_page_no != FIL_NULL
+ && (page_rec_get_next(
+ page_get_infimum_rec(
+ btr_cur_get_page(&father_cursor)))
+ == btr_cur_get_rec(&father_cursor)));
+
/* Decide the page to which we try to merge and which will inherit
the locks */
@@ -3924,10 +3580,13 @@ btr_compress(
&merge_block, mtr);
DBUG_EXECUTE_IF("ib_always_merge_right", is_left = FALSE;);
-
- if(!is_left
+retry:
+ if (!is_left
&& !btr_can_merge_with_page(cursor, right_page_no, &merge_block,
mtr)) {
+ if (!merge_block) {
+ merge_page = NULL;
+ }
goto err_exit;
}
@@ -3935,14 +3594,26 @@ btr_compress(
#ifdef UNIV_BTR_DEBUG
if (is_left) {
- ut_a(btr_page_get_next(merge_page, mtr)
- == buf_block_get_page_no(block));
+ ut_a(btr_page_get_next(merge_page)
+ == block->page.id.page_no());
} else {
- ut_a(btr_page_get_prev(merge_page, mtr)
- == buf_block_get_page_no(block));
+ ut_a(btr_page_get_prev(merge_page)
+ == block->page.id.page_no());
}
#endif /* UNIV_BTR_DEBUG */
+#ifdef UNIV_GIS_DEBUG
+ if (dict_index_is_spatial(index)) {
+ if (is_left) {
+ fprintf(stderr, "GIS_DIAG: merge left %ld to %ld \n",
+ (long) block->page.id.page_no(), left_page_no);
+ } else {
+ fprintf(stderr, "GIS_DIAG: merge right %ld to %ld\n",
+ (long) block->page.id.page_no(), right_page_no);
+ }
+ }
+#endif /* UNIV_GIS_DEBUG */
+
ut_ad(page_validate(merge_page, index));
merge_page_zip = buf_block_get_page_zip(merge_block);
@@ -3958,6 +3629,39 @@ btr_compress(
/* Move records to the merge page */
if (is_left) {
+ btr_cur_t cursor2;
+ rtr_mbr_t new_mbr;
+ offset_t* offsets2 = NULL;
+
+ /* For rtree, we need to update father's mbr. */
+ if (dict_index_is_spatial(index)) {
+ /* We only support merge pages with the same parent
+ page */
+ if (!rtr_check_same_block(
+ index, &cursor2,
+ btr_cur_get_block(&father_cursor),
+ merge_block, heap)) {
+ is_left = false;
+ goto retry;
+ }
+
+ /* Set rtr_info for cursor2, since it is
+ necessary in recursive page merge. */
+ cursor2.rtr_info = cursor->rtr_info;
+ cursor2.tree_height = cursor->tree_height;
+
+ offsets2 = rec_get_offsets(
+ btr_cur_get_rec(&cursor2), index, NULL,
+ page_is_leaf(cursor2.page_cur.block->frame),
+ ULINT_UNDEFINED, &heap);
+
+ /* Check if parent entry needs to be updated */
+ mbr_changed = rtr_merge_mbr_changed(
+ &cursor2, &father_cursor,
+ offsets2, offsets, &new_mbr,
+ merge_block, block, index);
+ }
+
rec_t* orig_pred = page_copy_rec_list_start(
merge_block, block, page_get_supremum_rec(page),
index, mtr);
@@ -3969,10 +3673,53 @@ btr_compress(
btr_search_drop_page_hash_index(block);
/* Remove the page from the level list */
- btr_level_list_remove(space, zip_size, page, index, mtr);
+ btr_level_list_remove(space, page_size, page, index, mtr);
+
+ if (dict_index_is_spatial(index)) {
+ rec_t* my_rec = father_cursor.page_cur.rec;
+
+ ulint page_no = btr_node_ptr_get_child_page_no(
+ my_rec, offsets);
+
+ if (page_no != block->page.id.page_no()) {
+
+ ib::fatal() << "father positioned on "
+ << page_no << " instead of "
+ << block->page.id.page_no();
+
+ ut_ad(0);
+ }
+
+ if (mbr_changed) {
+#ifdef UNIV_DEBUG
+ bool success = rtr_update_mbr_field(
+ &cursor2, offsets2, &father_cursor,
+ merge_page, &new_mbr, NULL, mtr);
+
+ ut_ad(success);
+#else
+ rtr_update_mbr_field(
+ &cursor2, offsets2, &father_cursor,
+ merge_page, &new_mbr, NULL, mtr);
+#endif
+ } else {
+ rtr_node_ptr_delete(
+ index, &father_cursor, block, mtr);
+ }
- btr_node_ptr_delete(index, block, mtr);
- lock_update_merge_left(merge_block, orig_pred, block);
+ /* No GAP lock needs to be worrying about */
+ lock_mutex_enter();
+ lock_prdt_page_free_from_discard(
+ block, lock_sys->prdt_page_hash);
+ lock_rec_free_all_from_discard_page(block);
+ lock_mutex_exit();
+ } else {
+ btr_cur_node_ptr_delete(&father_cursor, mtr);
+ if (!dict_table_is_locking_disabled(index->table)) {
+ lock_update_merge_left(
+ merge_block, orig_pred, block);
+ }
+ }
if (adjust) {
nth_rec += page_rec_get_n_recs_before(orig_pred);
@@ -3988,7 +3735,27 @@ btr_compress(
byte fil_page_prev[4];
#endif /* UNIV_BTR_DEBUG */
- btr_page_get_father(index, merge_block, mtr, &cursor2);
+ if (dict_index_is_spatial(index)) {
+ cursor2.rtr_info = NULL;
+
+ /* For spatial index, we disallow merge of blocks
+ with different parents, since the merge would need
+ to update entry (for MBR and Primary key) in the
+ parent of block being merged */
+ if (!rtr_check_same_block(
+ index, &cursor2,
+ btr_cur_get_block(&father_cursor),
+ merge_block, heap)) {
+ goto err_exit;
+ }
+
+ /* Set rtr_info for cursor2, since it is
+ necessary in recursive page merge. */
+ cursor2.rtr_info = cursor->rtr_info;
+ cursor2.tree_height = cursor->tree_height;
+ } else {
+ btr_page_get_father(index, merge_block, mtr, &cursor2);
+ }
if (merge_page_zip && left_page_no == FIL_NULL) {
@@ -4038,7 +3805,11 @@ btr_compress(
#endif /* UNIV_BTR_DEBUG */
/* Remove the page from the level list */
- btr_level_list_remove(space, zip_size, page, index, mtr);
+ btr_level_list_remove(space, page_size, (page_t*)page, index, mtr);
+
+ ut_ad(btr_node_ptr_get_child_page_no(
+ btr_cur_get_rec(&father_cursor), offsets)
+ == block->page.id.page_no());
/* Replace the address of the old child node (= page) with the
address of the merge page to the right */
@@ -4047,21 +3818,83 @@ btr_compress(
btr_cur_get_page_zip(&father_cursor),
offsets, right_page_no, mtr);
- compressed = btr_cur_pessimistic_delete(&err, TRUE, &cursor2,
- BTR_CREATE_FLAG,
- RB_NONE, mtr);
- ut_a(err == DB_SUCCESS);
-
- if (!compressed) {
- btr_cur_compress_if_useful(&cursor2, FALSE, mtr);
+#ifdef UNIV_DEBUG
+ if (!page_is_leaf(page) && left_page_no == FIL_NULL) {
+ ut_ad(REC_INFO_MIN_REC_FLAG & rec_get_info_bits(
+ page_rec_get_next(page_get_infimum_rec(
+ buf_block_get_frame(merge_block))),
+ page_is_comp(page)));
}
+#endif /* UNIV_DEBUG */
- lock_update_merge_right(merge_block, orig_succ, block);
- }
+ /* For rtree, we need to update father's mbr. */
+ if (dict_index_is_spatial(index)) {
+ offset_t* offsets2;
+ ulint rec_info;
+
+ offsets2 = rec_get_offsets(
+ btr_cur_get_rec(&cursor2), index, NULL,
+ page_is_leaf(cursor2.page_cur.block->frame),
+ ULINT_UNDEFINED, &heap);
+
+ ut_ad(btr_node_ptr_get_child_page_no(
+ btr_cur_get_rec(&cursor2), offsets2)
+ == right_page_no);
+
+ rec_info = rec_get_info_bits(
+ btr_cur_get_rec(&father_cursor),
+ rec_offs_comp(offsets));
+ if (rec_info & REC_INFO_MIN_REC_FLAG) {
+ /* When the father node ptr is minimal rec,
+ we will keep it and delete the node ptr of
+ merge page. */
+ rtr_merge_and_update_mbr(&father_cursor,
+ &cursor2,
+ offsets, offsets2,
+ merge_page,
+ merge_block,
+ block, index, mtr);
+ } else {
+ /* Otherwise, we will keep the node ptr of
+ merge page and delete the father node ptr.
+ This is for keeping the rec order in upper
+ level. */
+ rtr_merge_and_update_mbr(&cursor2,
+ &father_cursor,
+ offsets2, offsets,
+ merge_page,
+ merge_block,
+ block, index, mtr);
+ }
+ lock_mutex_enter();
+ lock_prdt_page_free_from_discard(
+ block, lock_sys->prdt_page_hash);
+ lock_rec_free_all_from_discard_page(block);
+ lock_mutex_exit();
+ } else {
- btr_blob_dbg_remove(page, index, "btr_compress");
+ compressed = btr_cur_pessimistic_delete(&err, TRUE,
+ &cursor2,
+ BTR_CREATE_FLAG,
+ false, mtr);
+ ut_a(err == DB_SUCCESS);
- if (!dict_index_is_clust(index) && page_is_leaf(merge_page)) {
+ if (!compressed) {
+ btr_cur_compress_if_useful(&cursor2,
+ FALSE,
+ mtr);
+ }
+
+ if (!dict_table_is_locking_disabled(index->table)) {
+ lock_update_merge_right(
+ merge_block, orig_succ, block);
+ }
+ }
+ }
+
+ if (!dict_index_is_clust(index)
+ && !dict_table_is_temporary(index->table)
+ && page_is_leaf(merge_page)) {
/* Update the free bits of the B-tree page in the
insert buffer bitmap. This has to be done in a
separate mini-transaction that is committed before the
@@ -4084,7 +3917,7 @@ btr_compress(
committed mini-transaction, because in crash recovery,
the free bits could momentarily be set too high. */
- if (zip_size) {
+ if (page_size.is_compressed()) {
/* Because the free bits may be incremented
and we cannot update the insert buffer bitmap
in the same mini-transaction, the only safe
@@ -4108,10 +3941,25 @@ btr_compress(
index));
#endif /* UNIV_ZIP_DEBUG */
+ if (dict_index_is_spatial(index)) {
+#ifdef UNIV_GIS_DEBUG
+ fprintf(stderr, "GIS_DIAG: compressed away %ld\n",
+ (long) block->page.id.page_no());
+ fprintf(stderr, "GIS_DIAG: merged to %ld\n",
+ (long) merge_block->page.id.page_no());
+#endif
+
+ rtr_check_discard_page(index, NULL, block);
+ }
+
/* Free the file page */
btr_page_free(index, block, mtr);
- ut_ad(btr_check_node_ptr(index, merge_block, mtr));
+ /* btr_check_node_ptr() needs parent block latched.
+ If the merge_block's parent block is not same,
+ we cannot use btr_check_node_ptr() */
+ ut_ad(leftmost_child
+ || btr_check_node_ptr(index, merge_block, mtr));
func_exit:
mem_heap_free(heap);
@@ -4129,10 +3977,11 @@ func_exit:
err_exit:
/* We play it safe and reset the free bits. */
- if (zip_size
+ if (page_size.is_compressed()
&& merge_page
&& page_is_leaf(merge_page)
&& !dict_index_is_clust(index)) {
+
ibuf_reset_free_bits(merge_block);
}
@@ -4143,8 +3992,9 @@ err_exit:
/*************************************************************//**
Discards a page that is the only page on its level. This will empty
the whole B-tree, leaving just an empty root page. This function
-should never be reached, because btr_compress(), which is invoked in
+should almost never be reached, because btr_compress(), which is invoked in
delete operations, calls btr_lift_page_up() to flatten the B-tree. */
+ATTRIBUTE_COLD
static
void
btr_discard_only_page_on_level(
@@ -4156,26 +4006,37 @@ btr_discard_only_page_on_level(
ulint page_level = 0;
trx_id_t max_trx_id;
+ ut_ad(!index->is_dummy);
+
/* Save the PAGE_MAX_TRX_ID from the leaf page. */
max_trx_id = page_get_max_trx_id(buf_block_get_frame(block));
- while (buf_block_get_page_no(block) != dict_index_get_page(index)) {
+ while (block->page.id.page_no() != dict_index_get_page(index)) {
btr_cur_t cursor;
buf_block_t* father;
const page_t* page = buf_block_get_frame(block);
ut_a(page_get_n_recs(page) == 1);
ut_a(page_level == btr_page_get_level(page, mtr));
- ut_a(btr_page_get_prev(page, mtr) == FIL_NULL);
- ut_a(btr_page_get_next(page, mtr) == FIL_NULL);
-
+ ut_a(!page_has_siblings(page));
+ ut_ad(fil_page_index_page_check(page));
+ ut_ad(block->page.id.space() == index->space);
ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
btr_search_drop_page_hash_index(block);
- btr_page_get_father(index, block, mtr, &cursor);
+ if (dict_index_is_spatial(index)) {
+ /* Check any concurrent search having this page */
+ rtr_check_discard_page(index, NULL, block);
+ rtr_page_get_father(index, block, mtr, NULL, &cursor);
+ } else {
+ btr_page_get_father(index, block, mtr, &cursor);
+ }
father = btr_cur_get_block(&cursor);
- lock_update_discard(father, PAGE_HEAP_NO_SUPREMUM, block);
+ if (!dict_table_is_locking_disabled(index->table)) {
+ lock_update_discard(
+ father, PAGE_HEAP_NO_SUPREMUM, block);
+ }
/* Free the file page */
btr_page_free(index, block, mtr);
@@ -4186,6 +4047,7 @@ btr_discard_only_page_on_level(
/* block is the root page, which must be empty, except
for the node pointer to the (now discarded) block(s). */
+ ut_ad(!page_has_siblings(block->frame));
#ifdef UNIV_BTR_DEBUG
if (!dict_index_is_ibuf(index)) {
@@ -4201,7 +4063,8 @@ btr_discard_only_page_on_level(
btr_page_empty(block, buf_block_get_page_zip(block), index, 0, mtr);
ut_ad(page_is_leaf(buf_block_get_frame(block)));
- if (!dict_index_is_clust(index)) {
+ if (!dict_index_is_clust(index)
+ && !dict_table_is_temporary(index->table)) {
/* We play it safe and reset the free bits for the root */
ibuf_reset_free_bits(block);
@@ -4216,7 +4079,6 @@ btr_discard_only_page_on_level(
Discards a page from a B-tree. This is used to remove the last record from
a B-tree page: the whole page must be removed at the same time. This cannot
be used for the root page, which is allowed to be empty. */
-UNIV_INTERN
void
btr_discard_page(
/*=============*/
@@ -4225,8 +4087,6 @@ btr_discard_page(
mtr_t* mtr) /*!< in: mtr */
{
dict_index_t* index;
- ulint space;
- ulint zip_size;
ulint left_page_no;
ulint right_page_no;
buf_block_t* merge_block;
@@ -4234,40 +4094,63 @@ btr_discard_page(
buf_block_t* block;
page_t* page;
rec_t* node_ptr;
+ btr_cur_t parent_cursor;
block = btr_cur_get_block(cursor);
index = btr_cur_get_index(cursor);
- ut_ad(dict_index_get_page(index) != buf_block_get_page_no(block));
- ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(index),
- MTR_MEMO_X_LOCK));
- ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
- space = dict_index_get_space(index);
- zip_size = dict_table_zip_size(index->table);
+ ut_ad(dict_index_get_page(index) != block->page.id.page_no());
+
+ ut_ad(mtr_memo_contains_flagged(mtr, dict_index_get_lock(index),
+ MTR_MEMO_X_LOCK | MTR_MEMO_SX_LOCK));
+
+ ut_ad(mtr_is_block_fix(mtr, block, MTR_MEMO_PAGE_X_FIX, index->table));
+
+ const ulint space = dict_index_get_space(index);
MONITOR_INC(MONITOR_INDEX_DISCARD);
+ if (dict_index_is_spatial(index)) {
+ rtr_page_get_father(index, block, mtr, cursor, &parent_cursor);
+ } else {
+ btr_page_get_father(index, block, mtr, &parent_cursor);
+ }
+
/* Decide the page which will inherit the locks */
- left_page_no = btr_page_get_prev(buf_block_get_frame(block), mtr);
- right_page_no = btr_page_get_next(buf_block_get_frame(block), mtr);
+ left_page_no = btr_page_get_prev(buf_block_get_frame(block));
+ right_page_no = btr_page_get_next(buf_block_get_frame(block));
+ const page_size_t page_size(dict_table_page_size(index->table));
+ ut_d(bool parent_is_different = false);
if (left_page_no != FIL_NULL) {
- merge_block = btr_block_get(space, zip_size, left_page_no,
- RW_X_LATCH, index, mtr);
+ merge_block = btr_block_get(
+ page_id_t(space, left_page_no), page_size,
+ RW_X_LATCH, index, mtr);
+
merge_page = buf_block_get_frame(merge_block);
#ifdef UNIV_BTR_DEBUG
- ut_a(btr_page_get_next(merge_page, mtr)
- == buf_block_get_page_no(block));
+ ut_a(btr_page_get_next(merge_page)
+ == block->page.id.page_no());
#endif /* UNIV_BTR_DEBUG */
+ ut_d(parent_is_different =
+ (page_rec_get_next(
+ page_get_infimum_rec(
+ btr_cur_get_page(
+ &parent_cursor)))
+ == btr_cur_get_rec(&parent_cursor)));
} else if (right_page_no != FIL_NULL) {
- merge_block = btr_block_get(space, zip_size, right_page_no,
- RW_X_LATCH, index, mtr);
+ merge_block = btr_block_get(
+ page_id_t(space, right_page_no), page_size,
+ RW_X_LATCH, index, mtr);
+
merge_page = buf_block_get_frame(merge_block);
#ifdef UNIV_BTR_DEBUG
- ut_a(btr_page_get_prev(merge_page, mtr)
- == buf_block_get_page_no(block));
+ ut_a(btr_page_get_prev(merge_page)
+ == block->page.id.page_no());
#endif /* UNIV_BTR_DEBUG */
+ ut_d(parent_is_different = page_rec_is_supremum(
+ page_rec_get_next(btr_cur_get_rec(&parent_cursor))));
} else {
btr_discard_only_page_on_level(index, block, mtr);
@@ -4294,10 +4177,15 @@ btr_discard_page(
btr_set_min_rec_mark(node_ptr, mtr);
}
- btr_node_ptr_delete(index, block, mtr);
+ if (dict_index_is_spatial(index)) {
+ rtr_node_ptr_delete(index, &parent_cursor, block, mtr);
+ } else {
+ btr_cur_node_ptr_delete(&parent_cursor, mtr);
+ }
/* Remove the page from the level list */
- btr_level_list_remove(space, zip_size, page, index, mtr);
+ btr_level_list_remove(space, page_size, page, index, mtr);
+
#ifdef UNIV_ZIP_DEBUG
{
page_zip_des_t* merge_page_zip
@@ -4307,27 +4195,40 @@ btr_discard_page(
}
#endif /* UNIV_ZIP_DEBUG */
- if (left_page_no != FIL_NULL) {
- lock_update_discard(merge_block, PAGE_HEAP_NO_SUPREMUM,
- block);
- } else {
- lock_update_discard(merge_block,
- lock_get_min_heap_no(merge_block),
- block);
+ if (!dict_table_is_locking_disabled(index->table)) {
+ if (left_page_no != FIL_NULL) {
+ lock_update_discard(merge_block, PAGE_HEAP_NO_SUPREMUM,
+ block);
+ } else {
+ lock_update_discard(merge_block,
+ lock_get_min_heap_no(merge_block),
+ block);
+ }
}
- btr_blob_dbg_remove(page, index, "btr_discard_page");
+ if (dict_index_is_spatial(index)) {
+ rtr_check_discard_page(index, cursor, block);
+ }
/* Free the file page */
btr_page_free(index, block, mtr);
- ut_ad(btr_check_node_ptr(index, merge_block, mtr));
+ /* btr_check_node_ptr() needs parent block latched.
+ If the merge_block's parent block is not same,
+ we cannot use btr_check_node_ptr() */
+ ut_ad(parent_is_different
+ || btr_check_node_ptr(index, merge_block, mtr));
+
+ if (btr_cur_get_block(&parent_cursor)->page.id.page_no() == index->page
+ && !page_has_siblings(btr_cur_get_page(&parent_cursor))
+ && page_get_n_recs(btr_cur_get_page(&parent_cursor)) == 1) {
+ btr_lift_page_up(index, merge_block, mtr);
+ }
}
#ifdef UNIV_BTR_PRINT
/*************************************************************//**
Prints size info of a B-tree. */
-UNIV_INTERN
void
btr_print_size(
/*===========*/
@@ -4353,7 +4254,7 @@ btr_print_size(
fputs("INFO OF THE NON-LEAF PAGE SEGMENT\n", stderr);
fseg_print(seg, &mtr);
- if (!dict_index_is_univ(index)) {
+ if (!dict_index_is_ibuf(index)) {
seg = root + PAGE_HEADER + PAGE_BTR_SEG_LEAF;
@@ -4375,7 +4276,7 @@ btr_print_recursive(
ulint width, /*!< in: print this many entries from start
and end */
mem_heap_t** heap, /*!< in/out: heap for rec_get_offsets() */
- ulint** offsets,/*!< in/out: buffer for rec_get_offsets() */
+ offset_t** offsets,/*!< in/out: buffer for rec_get_offsets() */
mtr_t* mtr) /*!< in: mtr */
{
const page_t* page = buf_block_get_frame(block);
@@ -4384,10 +4285,10 @@ btr_print_recursive(
ulint i = 0;
mtr_t mtr2;
- ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
- fprintf(stderr, "NODE ON LEVEL %lu page number %lu\n",
- (ulong) btr_page_get_level(page, mtr),
- (ulong) buf_block_get_page_no(block));
+ ut_ad(mtr_is_block_fix(mtr, block, MTR_MEMO_PAGE_SX_FIX, index->table));
+
+ ib::info() << "NODE ON LEVEL " << btr_page_get_level(page, mtr)
+ << " page " << block->page.id;
page_print(block, index, width, width);
@@ -4410,8 +4311,9 @@ btr_print_recursive(
node_ptr = page_cur_get_rec(&cursor);
- *offsets = rec_get_offsets(node_ptr, index, *offsets,
- ULINT_UNDEFINED, heap);
+ *offsets = rec_get_offsets(
+ node_ptr, index, *offsets, false,
+ ULINT_UNDEFINED, heap);
btr_print_recursive(index,
btr_node_ptr_get_child(node_ptr,
index,
@@ -4428,7 +4330,6 @@ btr_print_recursive(
/**************************************************************//**
Prints directories and other info of all nodes in the tree. */
-UNIV_INTERN
void
btr_print_index(
/*============*/
@@ -4439,8 +4340,8 @@ btr_print_index(
mtr_t mtr;
buf_block_t* root;
mem_heap_t* heap = NULL;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- ulint* offsets = offsets_;
+ offset_t offsets_[REC_OFFS_NORMAL_SIZE];
+ offset_t* offsets = offsets_;
rec_offs_init(offsets_);
fputs("--------------------------\n"
@@ -4448,7 +4349,7 @@ btr_print_index(
mtr_start(&mtr);
- root = btr_root_block_get(index, RW_X_LATCH, &mtr);
+ root = btr_root_block_get(index, RW_SX_LATCH, &mtr);
btr_print_recursive(index, root, width, &heap, &offsets, &mtr);
if (heap) {
@@ -4457,15 +4358,14 @@ btr_print_index(
mtr_commit(&mtr);
- btr_validate_index(index, 0);
+ ut_ad(btr_validate_index(index, 0, false));
}
#endif /* UNIV_BTR_PRINT */
#ifdef UNIV_DEBUG
/************************************************************//**
Checks that the node pointer to a page is appropriate.
-@return TRUE */
-UNIV_INTERN
+@return TRUE */
ibool
btr_check_node_ptr(
/*===============*/
@@ -4475,19 +4375,26 @@ btr_check_node_ptr(
{
mem_heap_t* heap;
dtuple_t* tuple;
- ulint* offsets;
+ offset_t* offsets;
btr_cur_t cursor;
page_t* page = buf_block_get_frame(block);
- ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
- if (dict_index_get_page(index) == buf_block_get_page_no(block)) {
+ ut_ad(mtr_is_block_fix(mtr, block, MTR_MEMO_PAGE_X_FIX, index->table));
+
+ if (dict_index_get_page(index) == block->page.id.page_no()) {
return(TRUE);
}
heap = mem_heap_create(256);
- offsets = btr_page_get_father_block(NULL, heap, index, block, mtr,
- &cursor);
+
+ if (dict_index_is_spatial(index)) {
+ offsets = rtr_page_get_father_block(NULL, heap, index, block, mtr,
+ NULL, &cursor);
+ } else {
+ offsets = btr_page_get_father_block(NULL, heap, index, block, mtr,
+ &cursor);
+ }
if (page_is_leaf(page)) {
@@ -4498,7 +4405,16 @@ btr_check_node_ptr(
index, page_rec_get_next(page_get_infimum_rec(page)), 0, heap,
btr_page_get_level(page, mtr));
- ut_a(!cmp_dtuple_rec(tuple, btr_cur_get_rec(&cursor), offsets));
+ /* For spatial index, the MBR in the parent rec could be different
+ with that of first rec of child, their relationship should be
+ "WITHIN" relationship */
+ if (dict_index_is_spatial(index)) {
+ ut_a(!cmp_dtuple_rec_with_gis(
+ tuple, btr_cur_get_rec(&cursor),
+ offsets, PAGE_CUR_WITHIN));
+ } else {
+ ut_a(!cmp_dtuple_rec(tuple, btr_cur_get_rec(&cursor), offsets));
+ }
func_exit:
mem_heap_free(heap);
@@ -4516,17 +4432,17 @@ btr_index_rec_validate_report(
const rec_t* rec, /*!< in: index record */
const dict_index_t* index) /*!< in: index */
{
- fputs("InnoDB: Record in ", stderr);
- dict_index_name_print(stderr, NULL, index);
- fprintf(stderr, ", page %lu, at offset %lu\n",
- page_get_page_no(page), (ulint) page_offset(rec));
+ ib::info() << "Record in index " << index->name
+ << " of table " << index->table->name
+ << ", page " << page_id_t(page_get_space_id(page),
+ page_get_page_no(page))
+ << ", at offset " << page_offset(rec);
}
/************************************************************//**
Checks the size and number of fields in a record based on the definition of
the index.
-@return TRUE if ok */
-UNIV_INTERN
+@return TRUE if ok */
ibool
btr_index_rec_validate(
/*===================*/
@@ -4541,13 +4457,13 @@ btr_index_rec_validate(
ulint i;
const page_t* page;
mem_heap_t* heap = NULL;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- ulint* offsets = offsets_;
+ offset_t offsets_[REC_OFFS_NORMAL_SIZE];
+ offset_t* offsets = offsets_;
rec_offs_init(offsets_);
page = page_align(rec);
- if (dict_index_is_univ(index)) {
+ if (dict_index_is_ibuf(index)) {
/* The insert buffer index tree can contain records from any
other index: we cannot check the number of fields or
their length */
@@ -4555,25 +4471,34 @@ btr_index_rec_validate(
return(TRUE);
}
+#ifdef VIRTUAL_INDEX_DEBUG
+ if (dict_index_has_virtual(index)) {
+ fprintf(stderr, "index name is %s\n", index->name());
+ }
+#endif
if ((ibool)!!page_is_comp(page) != dict_table_is_comp(index->table)) {
btr_index_rec_validate_report(page, rec, index);
- fprintf(stderr, "InnoDB: compact flag=%lu, should be %lu\n",
- (ulong) !!page_is_comp(page),
- (ulong) dict_table_is_comp(index->table));
+
+ ib::error() << "Compact flag=" << !!page_is_comp(page)
+ << ", should be " << dict_table_is_comp(index->table);
return(FALSE);
}
n = dict_index_get_n_fields(index);
- if (!page_is_comp(page) && rec_get_n_fields_old(rec) != n) {
+ if (!page_is_comp(page)
+ && (rec_get_n_fields_old(rec) != n
+ /* a record for older SYS_INDEXES table
+ (missing merge_threshold column) is acceptable. */
+ && !(index->id == DICT_INDEXES_ID
+ && rec_get_n_fields_old(rec) == n - 1))) {
btr_index_rec_validate_report(page, rec, index);
- fprintf(stderr, "InnoDB: has %lu fields, should have %lu\n",
- (ulong) rec_get_n_fields_old(rec), (ulong) n);
- if (dump_on_error) {
- buf_page_print(page, 0);
+ ib::error() << "Has " << rec_get_n_fields_old(rec)
+ << " fields, should have " << n;
+ if (dump_on_error) {
fputs("InnoDB: corrupt record ", stderr);
rec_print_old(stderr, rec);
putc('\n', stderr);
@@ -4581,40 +4506,45 @@ btr_index_rec_validate(
return(FALSE);
}
- offsets = rec_get_offsets(rec, index, offsets, ULINT_UNDEFINED, &heap);
+ offsets = rec_get_offsets(rec, index, offsets, page_is_leaf(page),
+ ULINT_UNDEFINED, &heap);
for (i = 0; i < n; i++) {
- ulint fixed_size = dict_col_get_fixed_size(
- dict_index_get_nth_col(index, i), page_is_comp(page));
+ dict_field_t* field = dict_index_get_nth_field(index, i);
+ ulint fixed_size = dict_col_get_fixed_size(
+ dict_field_get_col(field),
+ page_is_comp(page));
rec_get_nth_field_offs(offsets, i, &len);
/* Note that if fixed_size != 0, it equals the
length of a fixed-size column in the clustered index.
+ We should adjust it here.
A prefix index of the column is of fixed, but different
length. When fixed_size == 0, prefix_len is the maximum
length of the prefix index column. */
- if ((dict_index_get_nth_field(index, i)->prefix_len == 0
+ if ((field->prefix_len == 0
&& len != UNIV_SQL_NULL && fixed_size
&& len != fixed_size)
- || (dict_index_get_nth_field(index, i)->prefix_len > 0
+ || (field->prefix_len > 0
&& len != UNIV_SQL_NULL
&& len
- > dict_index_get_nth_field(index, i)->prefix_len)) {
+ > field->prefix_len)) {
btr_index_rec_validate_report(page, rec, index);
- fprintf(stderr,
- "InnoDB: field %lu len is %lu,"
- " should be %lu\n",
- (ulong) i, (ulong) len, (ulong) fixed_size);
- if (dump_on_error) {
- buf_page_print(page, 0);
+ ib::error error;
- fputs("InnoDB: corrupt record ", stderr);
- rec_print_new(stderr, rec, offsets);
- putc('\n', stderr);
+ error << "Field " << i << " len is " << len
+ << ", should be " << fixed_size;
+
+ if (dump_on_error) {
+ error << "; ";
+ rec_print(error.m_oss, rec,
+ rec_get_info_bits(
+ rec, rec_offs_comp(offsets)),
+ offsets);
}
if (heap) {
mem_heap_free(heap);
@@ -4623,6 +4553,12 @@ btr_index_rec_validate(
}
}
+#ifdef VIRTUAL_INDEX_DEBUG
+ if (dict_index_has_virtual(index)) {
+ rec_print_new(stderr, rec, offsets);
+ }
+#endif
+
if (heap) {
mem_heap_free(heap);
}
@@ -4632,7 +4568,7 @@ btr_index_rec_validate(
/************************************************************//**
Checks the size and number of fields in records based on the definition of
the index.
-@return TRUE if ok */
+@return TRUE if ok */
static
ibool
btr_index_page_validate(
@@ -4697,13 +4633,14 @@ btr_validate_report1(
ulint level, /*!< in: B-tree level */
const buf_block_t* block) /*!< in: index page */
{
- fprintf(stderr, "InnoDB: Error in page %lu of ",
- buf_block_get_page_no(block));
- dict_index_name_print(stderr, NULL, index);
- if (level) {
- fprintf(stderr, ", index tree level %lu", level);
+ ib::error error;
+ error << "In page " << block->page.id.page_no()
+ << " of index " << index->name
+ << " of table " << index->table->name;
+
+ if (level > 0) {
+ error << ", index tree level " << level;
}
- putc('\n', stderr);
}
/************************************************************//**
@@ -4717,30 +4654,28 @@ btr_validate_report2(
const buf_block_t* block1, /*!< in: first index page */
const buf_block_t* block2) /*!< in: second index page */
{
- fprintf(stderr, "InnoDB: Error in pages %lu and %lu of ",
- buf_block_get_page_no(block1),
- buf_block_get_page_no(block2));
- dict_index_name_print(stderr, NULL, index);
- if (level) {
- fprintf(stderr, ", index tree level %lu", level);
- }
- putc('\n', stderr);
+ ib::error error;
+ error << "In pages " << block1->page.id
+ << " and " << block2->page.id << " of index " << index->name
+ << " of table " << index->table->name;
+
+ if (level > 0) {
+ error << ", index tree level " << level;
+ }
}
/************************************************************//**
Validates index tree level.
-@return TRUE if ok */
+@return TRUE if ok */
static
bool
btr_validate_level(
/*===============*/
dict_index_t* index, /*!< in: index tree */
const trx_t* trx, /*!< in: transaction or NULL */
- ulint level) /*!< in: level number */
+ ulint level, /*!< in: level number */
+ bool lockout)/*!< in: true if X-latch index is intended */
{
- ulint space;
- ulint space_flags;
- ulint zip_size;
buf_block_t* block;
page_t* page;
buf_block_t* right_block = 0; /* remove warning */
@@ -4756,31 +4691,39 @@ btr_validate_level(
bool ret = true;
mtr_t mtr;
mem_heap_t* heap = mem_heap_create(256);
- fseg_header_t* seg;
- ulint* offsets = NULL;
- ulint* offsets2= NULL;
+ offset_t* offsets = NULL;
+ offset_t* offsets2= NULL;
#ifdef UNIV_ZIP_DEBUG
page_zip_des_t* page_zip;
#endif /* UNIV_ZIP_DEBUG */
+ ulint savepoint = 0;
+ ulint savepoint2 = 0;
+ ulint parent_page_no = FIL_NULL;
+ ulint parent_right_page_no = FIL_NULL;
+ bool rightmost_child = false;
mtr_start(&mtr);
- mtr_x_lock(dict_index_get_lock(index), &mtr);
+ if (!srv_read_only_mode) {
+ if (lockout) {
+ mtr_x_lock(dict_index_get_lock(index), &mtr);
+ } else {
+ mtr_sx_lock(dict_index_get_lock(index), &mtr);
+ }
+ }
- block = btr_root_block_get(index, RW_X_LATCH, &mtr);
+ block = btr_root_block_get(index, RW_SX_LATCH, &mtr);
page = buf_block_get_frame(block);
- seg = page + PAGE_HEADER + PAGE_BTR_SEG_TOP;
-
- space = dict_index_get_space(index);
- zip_size = dict_table_zip_size(index->table);
- fil_space_get_latch(space, &space_flags);
+ fil_space_t* space = fil_space_get(index->space);
+ const page_size_t table_page_size(
+ dict_table_page_size(index->table));
+ const page_size_t space_page_size(space->flags);
- if (zip_size != dict_tf_get_zip_size(space_flags)) {
+ if (!table_page_size.equals_to(space_page_size)) {
- ib_logf(IB_LOG_LEVEL_WARN,
- "Flags mismatch: table=%lu, tablespace=%lu",
- (ulint) index->table->flags, (ulint) space_flags);
+ ib::warn() << "Flags mismatch: table=" << index->table->flags
+ << ", tablespace=" << space->flags;
mtr_commit(&mtr);
@@ -4790,18 +4733,17 @@ btr_validate_level(
while (level != btr_page_get_level(page, &mtr)) {
const rec_t* node_ptr;
- if (fseg_page_is_free(seg,
- block->page.space, block->page.offset)) {
+ if (fseg_page_is_free(space, block->page.id.page_no())) {
btr_validate_report1(index, level, block);
- ib_logf(IB_LOG_LEVEL_WARN, "page is free");
+ ib::warn() << "Page is free";
ret = false;
}
- ut_a(space == buf_block_get_space(block));
- ut_a(space == page_get_space_id(page));
+ ut_a(index->space == block->page.id.space());
+ ut_a(index->space == page_get_space_id(page));
#ifdef UNIV_ZIP_DEBUG
page_zip = buf_block_get_page_zip(block);
ut_a(!page_zip || page_zip_validate(page_zip, page, index));
@@ -4812,45 +4754,74 @@ btr_validate_level(
page_cur_move_to_next(&cursor);
node_ptr = page_cur_get_rec(&cursor);
- offsets = rec_get_offsets(node_ptr, index, offsets,
+ offsets = rec_get_offsets(node_ptr, index, offsets, false,
ULINT_UNDEFINED, &heap);
+
+ savepoint2 = mtr_set_savepoint(&mtr);
block = btr_node_ptr_get_child(node_ptr, index, offsets, &mtr);
page = buf_block_get_frame(block);
+
+ /* For R-Tree, since record order might not be the same as
+ linked index page in the lower level, we need to travers
+ backwards to get the first page rec in this level.
+ This is only used for index validation. Spatial index
+ does not use such scan for any of its DML or query
+ operations */
+ if (dict_index_is_spatial(index)) {
+ left_page_no = btr_page_get_prev(page);
+
+ while (left_page_no != FIL_NULL) {
+ page_id_t left_page_id(
+ index->space, left_page_no);
+ /* To obey latch order of tree blocks,
+ we should release the right_block once to
+ obtain lock of the uncle block. */
+ mtr_release_block_at_savepoint(
+ &mtr, savepoint2, block);
+
+ savepoint2 = mtr_set_savepoint(&mtr);
+ block = btr_block_get(
+ left_page_id,
+ table_page_size,
+ RW_SX_LATCH, index, &mtr);
+ page = buf_block_get_frame(block);
+ left_page_no = btr_page_get_prev(page);
+ }
+ }
}
/* Now we are on the desired level. Loop through the pages on that
level. */
- if (level == 0) {
- /* Leaf pages are managed in their own file segment. */
- seg -= PAGE_BTR_SEG_TOP - PAGE_BTR_SEG_LEAF;
- }
-
loop:
mem_heap_empty(heap);
offsets = offsets2 = NULL;
- mtr_x_lock(dict_index_get_lock(index), &mtr);
+ if (!srv_read_only_mode) {
+ if (lockout) {
+ mtr_x_lock(dict_index_get_lock(index), &mtr);
+ } else {
+ mtr_sx_lock(dict_index_get_lock(index), &mtr);
+ }
+ }
#ifdef UNIV_ZIP_DEBUG
page_zip = buf_block_get_page_zip(block);
ut_a(!page_zip || page_zip_validate(page_zip, page, index));
#endif /* UNIV_ZIP_DEBUG */
- ut_a(block->page.space == space);
+ ut_a(block->page.id.space() == index->space);
- if (fseg_page_is_free(seg, block->page.space, block->page.offset)) {
+ if (fseg_page_is_free(space, block->page.id.page_no())) {
btr_validate_report1(index, level, block);
- ib_logf(IB_LOG_LEVEL_WARN, "Page is marked as free");
+ ib::warn() << "Page is marked as free";
ret = false;
} else if (btr_page_get_index_id(page) != index->id) {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Page index id " IB_ID_FMT " != data dictionary "
- "index id " IB_ID_FMT,
- btr_page_get_index_id(page), index->id);
+ ib::error() << "Page index id " << btr_page_get_index_id(page)
+ << " != data dictionary index id " << index->id;
ret = false;
@@ -4869,8 +4840,8 @@ loop:
ut_a(btr_page_get_level(page, &mtr) == level);
- right_page_no = btr_page_get_next(page, &mtr);
- left_page_no = btr_page_get_prev(page, &mtr);
+ right_page_no = btr_page_get_next(page);
+ left_page_no = btr_page_get_prev(page);
ut_a(!page_is_empty(page)
|| (level == 0
@@ -4878,17 +4849,19 @@ loop:
if (right_page_no != FIL_NULL) {
const rec_t* right_rec;
- right_block = btr_block_get(space, zip_size, right_page_no,
- RW_X_LATCH, index, &mtr);
+ savepoint = mtr_set_savepoint(&mtr);
+
+ right_block = btr_block_get(
+ page_id_t(index->space, right_page_no),
+ table_page_size,
+ RW_SX_LATCH, index, &mtr);
+
right_page = buf_block_get_frame(right_block);
- if (btr_page_get_prev(right_page, &mtr)
- != page_get_page_no(page)) {
+ if (btr_page_get_prev(right_page) != page_get_page_no(page)) {
btr_validate_report2(index, level, block, right_block);
fputs("InnoDB: broken FIL_PAGE_NEXT"
" or FIL_PAGE_PREV links\n", stderr);
- buf_page_print(page, 0);
- buf_page_print(right_page, 0);
ret = false;
}
@@ -4896,8 +4869,6 @@ loop:
if (page_is_comp(right_page) != page_is_comp(page)) {
btr_validate_report2(index, level, block, right_block);
fputs("InnoDB: 'compact' flag mismatch\n", stderr);
- buf_page_print(page, 0);
- buf_page_print(right_page, 0);
ret = false;
@@ -4907,21 +4878,25 @@ loop:
rec = page_rec_get_prev(page_get_supremum_rec(page));
right_rec = page_rec_get_next(page_get_infimum_rec(
right_page));
- offsets = rec_get_offsets(rec, index,
- offsets, ULINT_UNDEFINED, &heap);
- offsets2 = rec_get_offsets(right_rec, index,
- offsets2, ULINT_UNDEFINED, &heap);
- if (cmp_rec_rec(rec, right_rec, offsets, offsets2,
- index) >= 0) {
+ offsets = rec_get_offsets(rec, index, offsets,
+ page_is_leaf(page),
+ ULINT_UNDEFINED, &heap);
+ offsets2 = rec_get_offsets(right_rec, index, offsets2,
+ page_is_leaf(right_page),
+ ULINT_UNDEFINED, &heap);
+
+ /* For spatial index, we cannot guarantee the key ordering
+ across pages, so skip the record compare verification for
+ now. Will enhanced in special R-Tree index validation scheme */
+ if (!dict_index_is_spatial(index)
+ && cmp_rec_rec(rec, right_rec,
+ offsets, offsets2, index) >= 0) {
btr_validate_report2(index, level, block, right_block);
fputs("InnoDB: records in wrong order"
" on adjacent pages\n", stderr);
- buf_page_print(page, 0);
- buf_page_print(right_page, 0);
-
fputs("InnoDB: record ", stderr);
rec = page_rec_get_prev(page_get_supremum_rec(page));
rec_print(stderr, rec, index);
@@ -4942,43 +4917,57 @@ loop:
page_is_comp(page)));
}
- if (buf_block_get_page_no(block) != dict_index_get_page(index)) {
+ /* Similarly skip the father node check for spatial index for now,
+ for a couple of reasons:
+ 1) As mentioned, there is no ordering relationship between records
+ in parent level and linked pages in the child level.
+ 2) Search parent from root is very costly for R-tree.
+ We will add special validation mechanism for R-tree later (WL #7520) */
+ if (!dict_index_is_spatial(index)
+ && block->page.id.page_no() != dict_index_get_page(index)) {
/* Check father node pointers */
-
rec_t* node_ptr;
- offsets = btr_page_get_father_block(offsets, heap, index,
- block, &mtr, &node_cur);
+ btr_cur_position(
+ index, page_rec_get_next(page_get_infimum_rec(page)),
+ block, &node_cur);
+ offsets = btr_page_get_father_node_ptr_for_validate(
+ offsets, heap, &node_cur, &mtr);
+
father_page = btr_cur_get_page(&node_cur);
node_ptr = btr_cur_get_rec(&node_cur);
+ parent_page_no = page_get_page_no(father_page);
+ parent_right_page_no = btr_page_get_next(father_page);
+ rightmost_child = page_rec_is_supremum(
+ page_rec_get_next(node_ptr));
+
btr_cur_position(
- index, page_rec_get_prev(page_get_supremum_rec(page)),
+ index,
+ page_rec_get_prev(page_get_supremum_rec(page)),
block, &node_cur);
- offsets = btr_page_get_father_node_ptr(offsets, heap,
- &node_cur, &mtr);
+
+ offsets = btr_page_get_father_node_ptr_for_validate(
+ offsets, heap, &node_cur, &mtr);
if (node_ptr != btr_cur_get_rec(&node_cur)
|| btr_node_ptr_get_child_page_no(node_ptr, offsets)
- != buf_block_get_page_no(block)) {
+ != block->page.id.page_no()) {
btr_validate_report1(index, level, block);
fputs("InnoDB: node pointer to the page is wrong\n",
stderr);
- buf_page_print(father_page, 0);
- buf_page_print(page, 0);
-
fputs("InnoDB: node ptr ", stderr);
rec_print(stderr, node_ptr, index);
rec = btr_cur_get_rec(&node_cur);
fprintf(stderr, "\n"
- "InnoDB: node ptr child page n:o %lu\n",
- (ulong) btr_node_ptr_get_child_page_no(
- rec, offsets));
+ "InnoDB: node ptr child page n:o "
+ ULINTPF "\n",
+ btr_node_ptr_get_child_page_no(rec, offsets));
fputs("InnoDB: record on page ", stderr);
rec_print_new(stderr, rec, offsets);
@@ -5001,12 +4990,9 @@ loop:
btr_validate_report1(index, level, block);
- buf_page_print(father_page, 0);
- buf_page_print(page, 0);
+ ib::error() << "Node ptrs differ on levels > 0";
- fputs("InnoDB: Error: node ptrs differ"
- " on levels > 0\n"
- "InnoDB: node ptr ", stderr);
+ fputs("InnoDB: node ptr ",stderr);
rec_print_new(stderr, node_ptr, offsets);
fputs("InnoDB: first rec ", stderr);
rec_print(stderr, first_rec, index);
@@ -5020,20 +5006,49 @@ loop:
if (left_page_no == FIL_NULL) {
ut_a(node_ptr == page_rec_get_next(
page_get_infimum_rec(father_page)));
- ut_a(btr_page_get_prev(father_page, &mtr) == FIL_NULL);
+ ut_a(!page_has_prev(father_page));
}
if (right_page_no == FIL_NULL) {
ut_a(node_ptr == page_rec_get_prev(
page_get_supremum_rec(father_page)));
- ut_a(btr_page_get_next(father_page, &mtr) == FIL_NULL);
+ ut_a(!page_has_next(father_page));
} else {
- const rec_t* right_node_ptr
- = page_rec_get_next(node_ptr);
+ const rec_t* right_node_ptr;
+
+ right_node_ptr = page_rec_get_next(node_ptr);
+
+ if (!lockout && rightmost_child) {
+
+ /* To obey latch order of tree blocks,
+ we should release the right_block once to
+ obtain lock of the uncle block. */
+ mtr_release_block_at_savepoint(
+ &mtr, savepoint, right_block);
+
+ btr_block_get(
+ page_id_t(index->space,
+ parent_right_page_no),
+ table_page_size,
+ RW_SX_LATCH, index, &mtr);
+
+ right_block = btr_block_get(
+ page_id_t(index->space,
+ right_page_no),
+ table_page_size,
+ RW_SX_LATCH, index, &mtr);
+ }
+
+ btr_cur_position(
+ index, page_rec_get_next(
+ page_get_infimum_rec(
+ buf_block_get_frame(
+ right_block))),
+ right_block, &right_node_cur);
+
+ offsets = btr_page_get_father_node_ptr_for_validate(
+ offsets, heap, &right_node_cur, &mtr);
- offsets = btr_page_get_father_block(
- offsets, heap, index, right_block,
- &mtr, &right_node_cur);
if (right_node_ptr
!= page_get_supremum_rec(father_page)) {
@@ -5046,10 +5061,6 @@ loop:
btr_validate_report1(index, level,
block);
-
- buf_page_print(father_page, 0);
- buf_page_print(page, 0);
- buf_page_print(right_page, 0);
}
} else {
page_t* right_father_page
@@ -5066,15 +5077,10 @@ loop:
btr_validate_report1(index, level,
block);
-
- buf_page_print(father_page, 0);
- buf_page_print(right_father_page, 0);
- buf_page_print(page, 0);
- buf_page_print(right_page, 0);
}
if (page_get_page_no(right_father_page)
- != btr_page_get_next(father_page, &mtr)) {
+ != btr_page_get_next(father_page)) {
ret = false;
fputs("InnoDB: node pointer 3 to"
@@ -5083,11 +5089,6 @@ loop:
btr_validate_report1(index, level,
block);
-
- buf_page_print(father_page, 0);
- buf_page_print(right_father_page, 0);
- buf_page_print(page, 0);
- buf_page_print(right_page, 0);
}
}
}
@@ -5105,9 +5106,29 @@ node_ptr_fails:
mtr_start(&mtr);
+ if (!lockout) {
+ if (rightmost_child) {
+ if (parent_right_page_no != FIL_NULL) {
+ btr_block_get(
+ page_id_t(
+ index->space,
+ parent_right_page_no),
+ table_page_size,
+ RW_SX_LATCH, index, &mtr);
+ }
+ } else if (parent_page_no != FIL_NULL) {
+ btr_block_get(
+ page_id_t(index->space,
+ parent_page_no),
+ table_page_size,
+ RW_SX_LATCH, index, &mtr);
+ }
+ }
+
block = btr_block_get(
- space, zip_size, right_page_no,
- RW_X_LATCH, index, &mtr);
+ page_id_t(index->space, right_page_no),
+ table_page_size,
+ RW_SX_LATCH, index, &mtr);
page = buf_block_get_frame(block);
@@ -5120,14 +5141,55 @@ node_ptr_fails:
}
/**************************************************************//**
+Do an index level validation of spaital index tree.
+@return true if no error found */
+static
+bool
+btr_validate_spatial_index(
+/*=======================*/
+ dict_index_t* index, /*!< in: index */
+ const trx_t* trx) /*!< in: transaction or NULL */
+{
+
+ mtr_t mtr;
+ bool ok = true;
+
+ mtr_start(&mtr);
+
+ mtr_x_lock(dict_index_get_lock(index), &mtr);
+
+ page_t* root = btr_root_get(index, &mtr);
+ ulint n = btr_page_get_level(root, &mtr);
+
+#ifdef UNIV_RTR_DEBUG
+ fprintf(stderr, "R-tree level is %lu\n", n);
+#endif /* UNIV_RTR_DEBUG */
+
+ for (ulint i = 0; i <= n; ++i) {
+#ifdef UNIV_RTR_DEBUG
+ fprintf(stderr, "Level %lu:\n", n - i);
+#endif /* UNIV_RTR_DEBUG */
+
+ if (!btr_validate_level(index, trx, n - i, true)) {
+ ok = false;
+ break;
+ }
+ }
+
+ mtr_commit(&mtr);
+
+ return(ok);
+}
+
+/**************************************************************//**
Checks the consistency of an index tree.
@return DB_SUCCESS if ok, error code if not */
-UNIV_INTERN
dberr_t
btr_validate_index(
/*===============*/
dict_index_t* index, /*!< in: index */
- const trx_t* trx) /*!< in: transaction or NULL */
+ const trx_t* trx, /*!< in: transaction or NULL */
+ bool lockout)/*!< in: true if X-latch index is intended */
{
dberr_t err = DB_SUCCESS;
@@ -5137,16 +5199,29 @@ btr_validate_index(
return(err);
}
+ if (dict_index_is_spatial(index)) {
+ if(!btr_validate_spatial_index(index, trx)) {
+ err = DB_ERROR;
+ }
+ return(err);
+ }
+
mtr_t mtr;
mtr_start(&mtr);
- mtr_x_lock(dict_index_get_lock(index), &mtr);
+ if (!srv_read_only_mode) {
+ if (lockout) {
+ mtr_x_lock(dict_index_get_lock(index), &mtr);
+ } else {
+ mtr_sx_lock(dict_index_get_lock(index), &mtr);
+ }
+ }
page_t* root = btr_root_get(index, &mtr);
- if (root == NULL && index->table->file_unreadable) {
- err = DB_DECRYPTION_FAILED;
+ if (!root) {
+ err = DB_CORRUPTION;
mtr_commit(&mtr);
return err;
}
@@ -5155,7 +5230,7 @@ btr_validate_index(
for (ulint i = 0; i <= n; ++i) {
- if (!btr_validate_level(index, trx, n - i)) {
+ if (!btr_validate_level(index, trx, n - i, lockout)) {
err = DB_CORRUPTION;
break;
}
@@ -5169,9 +5244,9 @@ btr_validate_index(
/**************************************************************//**
Checks if the page in the cursor can be merged with given page.
If necessary, re-organize the merge_page.
-@return TRUE if possible to merge. */
-UNIV_INTERN
-ibool
+@return true if possible to merge. */
+static
+bool
btr_can_merge_with_page(
/*====================*/
btr_cur_t* cursor, /*!< in: cursor on the page to merge */
@@ -5181,34 +5256,33 @@ btr_can_merge_with_page(
{
dict_index_t* index;
page_t* page;
- ulint space;
- ulint zip_size;
ulint n_recs;
ulint data_size;
- ulint max_ins_size_reorg;
+ ulint max_ins_size_reorg;
ulint max_ins_size;
buf_block_t* mblock;
page_t* mpage;
DBUG_ENTER("btr_can_merge_with_page");
if (page_no == FIL_NULL) {
- goto error;
+ *merge_block = NULL;
+ DBUG_RETURN(false);
}
index = btr_cur_get_index(cursor);
- page = btr_cur_get_page(cursor);
- space = dict_index_get_space(index);
- zip_size = dict_table_zip_size(index->table);
+ page = btr_cur_get_page(cursor);
- mblock = btr_block_get(space, zip_size, page_no, RW_X_LATCH, index,
- mtr);
+ const page_id_t page_id(dict_index_get_space(index), page_no);
+ const page_size_t page_size(dict_table_page_size(index->table));
+
+ mblock = btr_block_get(page_id, page_size, RW_X_LATCH, index, mtr);
mpage = buf_block_get_frame(mblock);
- n_recs = page_get_n_recs(page);
- data_size = page_get_data_size(page);
+ n_recs = page_get_n_recs(page);
+ data_size = page_get_data_size(page);
- max_ins_size_reorg = page_get_max_insert_size_after_reorganize(
- mpage, n_recs);
+ max_ins_size_reorg = page_get_max_insert_size_after_reorganize(
+ mpage, n_recs);
if (data_size > max_ins_size_reorg) {
goto error;
@@ -5217,14 +5291,13 @@ btr_can_merge_with_page(
/* If compression padding tells us that merging will result in
too packed up page i.e.: which is likely to cause compression
failure then don't merge the pages. */
- if (zip_size && page_is_leaf(mpage)
+ if (page_size.is_compressed() && page_is_leaf(mpage)
&& (page_get_data_size(mpage) + data_size
>= dict_index_zip_pad_optimal_page_size(index))) {
goto error;
}
-
max_ins_size = page_get_max_insert_size(mpage, n_recs);
if (data_size > max_ins_size) {
@@ -5252,11 +5325,9 @@ btr_can_merge_with_page(
}
*merge_block = mblock;
- DBUG_RETURN(TRUE);
+ DBUG_RETURN(true);
error:
*merge_block = NULL;
- DBUG_RETURN(FALSE);
+ DBUG_RETURN(false);
}
-
-#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innobase/btr/btr0bulk.cc b/storage/innobase/btr/btr0bulk.cc
new file mode 100644
index 00000000000..de45bd4ca6c
--- /dev/null
+++ b/storage/innobase/btr/btr0bulk.cc
@@ -0,0 +1,1058 @@
+/*****************************************************************************
+
+Copyright (c) 2014, 2019, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2017, 2019, MariaDB Corporation.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file btr/btr0bulk.cc
+The B-tree bulk load
+
+Created 03/11/2014 Shaohua Wang
+*******************************************************/
+
+#include "btr0bulk.h"
+#include "btr0btr.h"
+#include "btr0cur.h"
+#include "btr0pcur.h"
+#include "ibuf0ibuf.h"
+#include "trx0trx.h"
+
+/** Innodb B-tree index fill factor for bulk load. */
+long innobase_fill_factor;
+/** whether to reduce redo logging during ALTER TABLE */
+my_bool innodb_log_optimize_ddl;
+
+/** Initialize members, allocate page if needed and start mtr.
+Note: we commit all mtrs on failure.
+@return error code. */
+dberr_t
+PageBulk::init()
+{
+ buf_block_t* new_block;
+ page_t* new_page;
+ page_zip_des_t* new_page_zip;
+ ulint new_page_no;
+
+ ut_ad(m_heap == NULL);
+ m_heap = mem_heap_create(1000);
+
+ m_mtr.start();
+
+ if (m_flush_observer) {
+ m_mtr.set_log_mode(MTR_LOG_NO_REDO);
+ m_mtr.set_flush_observer(m_flush_observer);
+ } else {
+ m_mtr.set_named_space(m_index->space);
+ }
+
+ if (m_page_no == FIL_NULL) {
+ mtr_t alloc_mtr;
+
+ /* We commit redo log for allocation by a separate mtr,
+ because we don't guarantee pages are committed following
+ the allocation order, and we will always generate redo log
+ for page allocation, even when creating a new tablespace. */
+ alloc_mtr.start();
+ alloc_mtr.set_named_space(m_index->space);
+
+ ulint n_reserved;
+ bool success;
+ success = fsp_reserve_free_extents(&n_reserved, m_index->space,
+ 1, FSP_NORMAL, &alloc_mtr);
+ if (!success) {
+ alloc_mtr.commit();
+ m_mtr.commit();
+ return(DB_OUT_OF_FILE_SPACE);
+ }
+
+ /* Allocate a new page. */
+ new_block = btr_page_alloc(m_index, 0, FSP_UP, m_level,
+ &alloc_mtr, &m_mtr);
+
+ if (n_reserved > 0) {
+ fil_space_release_free_extents(m_index->space,
+ n_reserved);
+ }
+
+ alloc_mtr.commit();
+
+ new_page = buf_block_get_frame(new_block);
+ new_page_zip = buf_block_get_page_zip(new_block);
+ new_page_no = page_get_page_no(new_page);
+
+ if (new_page_zip) {
+ page_create_zip(new_block, m_index, m_level, 0,
+ NULL, &m_mtr);
+ memset(FIL_PAGE_PREV + new_page, 0xff, 8);
+ page_zip_write_header(new_page_zip,
+ FIL_PAGE_PREV + new_page,
+ 8, &m_mtr);
+ mach_write_to_8(PAGE_HEADER + PAGE_INDEX_ID + new_page,
+ m_index->id);
+ page_zip_write_header(new_page_zip,
+ PAGE_HEADER + PAGE_INDEX_ID
+ + new_page, 8, &m_mtr);
+ } else {
+ ut_ad(!dict_index_is_spatial(m_index));
+ page_create(new_block, &m_mtr,
+ dict_table_is_comp(m_index->table),
+ false);
+ mlog_write_ulint(FIL_PAGE_PREV + new_page, FIL_NULL,
+ MLOG_4BYTES, &m_mtr);
+ mlog_write_ulint(FIL_PAGE_NEXT + new_page, FIL_NULL,
+ MLOG_4BYTES, &m_mtr);
+ mlog_write_ulint(PAGE_HEADER + PAGE_LEVEL + new_page,
+ m_level, MLOG_2BYTES, &m_mtr);
+ mlog_write_ull(PAGE_HEADER + PAGE_INDEX_ID + new_page,
+ m_index->id, &m_mtr);
+ }
+ } else {
+ page_id_t page_id(dict_index_get_space(m_index), m_page_no);
+ page_size_t page_size(dict_table_page_size(m_index->table));
+
+ new_block = btr_block_get(page_id, page_size,
+ RW_X_LATCH, m_index, &m_mtr);
+
+ new_page = buf_block_get_frame(new_block);
+ new_page_zip = buf_block_get_page_zip(new_block);
+ new_page_no = page_get_page_no(new_page);
+ ut_ad(m_page_no == new_page_no);
+
+ ut_ad(page_dir_get_n_heap(new_page) == PAGE_HEAP_NO_USER_LOW);
+
+ btr_page_set_level(new_page, new_page_zip, m_level, &m_mtr);
+ }
+
+ if (!m_level && dict_index_is_sec_or_ibuf(m_index)) {
+ page_update_max_trx_id(new_block, new_page_zip, m_trx_id,
+ &m_mtr);
+ }
+
+ m_block = new_block;
+ m_block->skip_flush_check = true;
+ m_page = new_page;
+ m_page_zip = new_page_zip;
+ m_page_no = new_page_no;
+ m_cur_rec = page_get_infimum_rec(new_page);
+ ut_ad(m_is_comp == !!page_is_comp(new_page));
+ m_free_space = page_get_free_space_of_empty(m_is_comp);
+
+ if (innobase_fill_factor == 100 && dict_index_is_clust(m_index)) {
+ /* Keep default behavior compatible with 5.6 */
+ m_reserved_space = dict_index_get_space_reserve();
+ } else {
+ m_reserved_space =
+ UNIV_PAGE_SIZE * (100 - innobase_fill_factor) / 100;
+ }
+
+ m_padding_space =
+ UNIV_PAGE_SIZE - dict_index_zip_pad_optimal_page_size(m_index);
+ m_heap_top = page_header_get_ptr(new_page, PAGE_HEAP_TOP);
+ m_rec_no = page_header_get_field(new_page, PAGE_N_RECS);
+
+ ut_d(m_total_data = 0);
+ /* See page_copy_rec_list_end_to_created_page() */
+ ut_d(page_header_set_field(m_page, NULL, PAGE_HEAP_TOP,
+ srv_page_size - 1));
+
+ return(DB_SUCCESS);
+}
+
+/** Insert a record in the page.
+@param[in] rec record
+@param[in] offsets record offsets */
+void
+PageBulk::insert(
+ const rec_t* rec,
+ offset_t* offsets)
+{
+ ulint rec_size;
+
+ ut_ad(m_heap != NULL);
+
+ rec_size = rec_offs_size(offsets);
+
+#ifdef UNIV_DEBUG
+ /* Check whether records are in order. */
+ if (!page_rec_is_infimum(m_cur_rec)) {
+ rec_t* old_rec = m_cur_rec;
+ offset_t* old_offsets = rec_get_offsets(
+ old_rec, m_index, NULL, page_rec_is_leaf(old_rec),
+ ULINT_UNDEFINED, &m_heap);
+
+ ut_ad(cmp_rec_rec(rec, old_rec, offsets, old_offsets, m_index)
+ > 0);
+ }
+
+ m_total_data += rec_size;
+#endif /* UNIV_DEBUG */
+
+ /* 1. Copy the record to page. */
+ rec_t* insert_rec = rec_copy(m_heap_top, rec, offsets);
+ rec_offs_make_valid(insert_rec, m_index, offsets);
+
+ /* 2. Insert the record in the linked list. */
+ rec_t* next_rec = page_rec_get_next(m_cur_rec);
+
+ page_rec_set_next(insert_rec, next_rec);
+ page_rec_set_next(m_cur_rec, insert_rec);
+
+ /* 3. Set the n_owned field in the inserted record to zero,
+ and set the heap_no field. */
+ if (m_is_comp) {
+ rec_set_n_owned_new(insert_rec, NULL, 0);
+ rec_set_heap_no_new(insert_rec,
+ PAGE_HEAP_NO_USER_LOW + m_rec_no);
+ } else {
+ rec_set_n_owned_old(insert_rec, 0);
+ rec_set_heap_no_old(insert_rec,
+ PAGE_HEAP_NO_USER_LOW + m_rec_no);
+ }
+
+ /* 4. Set member variables. */
+ ulint slot_size;
+ slot_size = page_dir_calc_reserved_space(m_rec_no + 1)
+ - page_dir_calc_reserved_space(m_rec_no);
+
+ ut_ad(m_free_space >= rec_size + slot_size);
+ ut_ad(m_heap_top + rec_size < m_page + UNIV_PAGE_SIZE);
+
+ m_free_space -= rec_size + slot_size;
+ m_heap_top += rec_size;
+ m_rec_no += 1;
+
+ if (!m_flush_observer && !m_page_zip) {
+ /* For ROW_FORMAT=COMPRESSED, redo log may be written
+ in PageBulk::compress(). */
+ page_cur_insert_rec_write_log(insert_rec, rec_size,
+ m_cur_rec, m_index, &m_mtr);
+ }
+
+ m_cur_rec = insert_rec;
+}
+
+/** Mark end of insertion to the page. Scan all records to set page dirs,
+and set page header members.
+Note: we refer to page_copy_rec_list_end_to_created_page. */
+void
+PageBulk::finish()
+{
+ ut_ad(m_rec_no > 0);
+ ut_ad(m_total_data + page_dir_calc_reserved_space(m_rec_no)
+ <= page_get_free_space_of_empty(m_is_comp));
+ /* See page_copy_rec_list_end_to_created_page() */
+ ut_d(page_dir_set_n_slots(m_page, NULL, srv_page_size / 2));
+
+ ulint count = 0;
+ ulint n_recs = 0;
+ ulint slot_index = 0;
+ rec_t* insert_rec = page_rec_get_next(page_get_infimum_rec(m_page));
+ page_dir_slot_t* slot = NULL;
+
+ /* Set owner & dir. */
+ do {
+
+ count++;
+ n_recs++;
+
+ if (count == (PAGE_DIR_SLOT_MAX_N_OWNED + 1) / 2) {
+
+ slot_index++;
+
+ slot = page_dir_get_nth_slot(m_page, slot_index);
+
+ page_dir_slot_set_rec(slot, insert_rec);
+ page_dir_slot_set_n_owned(slot, NULL, count);
+
+ count = 0;
+ }
+
+ insert_rec = page_rec_get_next(insert_rec);
+ } while (!page_rec_is_supremum(insert_rec));
+
+ if (slot_index > 0
+ && (count + 1 + (PAGE_DIR_SLOT_MAX_N_OWNED + 1) / 2
+ <= PAGE_DIR_SLOT_MAX_N_OWNED)) {
+ /* We can merge the two last dir slots. This operation is
+ here to make this function imitate exactly the equivalent
+ task made using page_cur_insert_rec, which we use in database
+ recovery to reproduce the task performed by this function.
+ To be able to check the correctness of recovery, it is good
+ that it imitates exactly. */
+
+ count += (PAGE_DIR_SLOT_MAX_N_OWNED + 1) / 2;
+
+ page_dir_slot_set_n_owned(slot, NULL, 0);
+
+ slot_index--;
+ }
+
+ slot = page_dir_get_nth_slot(m_page, 1 + slot_index);
+ page_dir_slot_set_rec(slot, page_get_supremum_rec(m_page));
+ page_dir_slot_set_n_owned(slot, NULL, count + 1);
+
+ ut_ad(!dict_index_is_spatial(m_index));
+
+ if (!m_flush_observer && !m_page_zip) {
+ mlog_write_ulint(PAGE_HEADER + PAGE_N_DIR_SLOTS + m_page,
+ 2 + slot_index, MLOG_2BYTES, &m_mtr);
+ mlog_write_ulint(PAGE_HEADER + PAGE_HEAP_TOP + m_page,
+ ulint(m_heap_top - m_page),
+ MLOG_2BYTES, &m_mtr);
+ mlog_write_ulint(PAGE_HEADER + PAGE_N_HEAP + m_page,
+ (PAGE_HEAP_NO_USER_LOW + m_rec_no)
+ | ulint(m_is_comp) << 15,
+ MLOG_2BYTES, &m_mtr);
+ mlog_write_ulint(PAGE_HEADER + PAGE_N_RECS + m_page, m_rec_no,
+ MLOG_2BYTES, &m_mtr);
+ mlog_write_ulint(PAGE_HEADER + PAGE_LAST_INSERT + m_page,
+ ulint(m_cur_rec - m_page),
+ MLOG_2BYTES, &m_mtr);
+ mlog_write_ulint(PAGE_HEADER + PAGE_DIRECTION + m_page,
+ PAGE_RIGHT, MLOG_2BYTES, &m_mtr);
+ mlog_write_ulint(PAGE_HEADER + PAGE_N_DIRECTION + m_page, 0,
+ MLOG_2BYTES, &m_mtr);
+ } else {
+ /* For ROW_FORMAT=COMPRESSED, redo log may be written
+ in PageBulk::compress(). */
+ mach_write_to_2(PAGE_HEADER + PAGE_N_DIR_SLOTS + m_page,
+ 2 + slot_index);
+ mach_write_to_2(PAGE_HEADER + PAGE_HEAP_TOP + m_page,
+ ulint(m_heap_top - m_page));
+ mach_write_to_2(PAGE_HEADER + PAGE_N_HEAP + m_page,
+ (PAGE_HEAP_NO_USER_LOW + m_rec_no)
+ | ulint(m_is_comp) << 15);
+ mach_write_to_2(PAGE_HEADER + PAGE_N_RECS + m_page, m_rec_no);
+ mach_write_to_2(PAGE_HEADER + PAGE_LAST_INSERT + m_page,
+ ulint(m_cur_rec - m_page));
+ mach_write_to_2(PAGE_HEADER + PAGE_DIRECTION + m_page,
+ PAGE_RIGHT);
+ mach_write_to_2(PAGE_HEADER + PAGE_N_DIRECTION + m_page, 0);
+ }
+
+ m_block->skip_flush_check = false;
+}
+
+/** Commit inserts done to the page
+@param[in] success Flag whether all inserts succeed. */
+void
+PageBulk::commit(
+ bool success)
+{
+ if (success) {
+ ut_ad(page_validate(m_page, m_index));
+
+ /* Set no free space left and no buffered changes in ibuf. */
+ if (!dict_index_is_clust(m_index) && page_is_leaf(m_page)) {
+ ibuf_set_bitmap_for_bulk_load(
+ m_block, innobase_fill_factor == 100);
+ }
+ }
+
+ m_mtr.commit();
+}
+
+/** Compress a page of compressed table
+@return true compress successfully or no need to compress
+@return false compress failed. */
+bool
+PageBulk::compress()
+{
+ ut_ad(m_page_zip != NULL);
+
+ return(page_zip_compress(m_page_zip, m_page, m_index,
+ page_zip_level, NULL, &m_mtr));
+}
+
+/** Get node pointer
+@return node pointer */
+dtuple_t*
+PageBulk::getNodePtr()
+{
+ rec_t* first_rec;
+ dtuple_t* node_ptr;
+
+ /* Create node pointer */
+ first_rec = page_rec_get_next(page_get_infimum_rec(m_page));
+ ut_a(page_rec_is_user_rec(first_rec));
+ node_ptr = dict_index_build_node_ptr(m_index, first_rec, m_page_no,
+ m_heap, m_level);
+
+ return(node_ptr);
+}
+
+/** Get split rec in left page.We split a page in half when compresssion fails,
+and the split rec will be copied to right page.
+@return split rec */
+rec_t*
+PageBulk::getSplitRec()
+{
+ rec_t* rec;
+ offset_t* offsets;
+ ulint total_used_size;
+ ulint total_recs_size;
+ ulint n_recs;
+
+ ut_ad(m_page_zip != NULL);
+ ut_ad(m_rec_no >= 2);
+
+ ut_ad(page_get_free_space_of_empty(m_is_comp) > m_free_space);
+ total_used_size = page_get_free_space_of_empty(m_is_comp)
+ - m_free_space;
+
+ total_recs_size = 0;
+ n_recs = 0;
+ offsets = NULL;
+ rec = page_get_infimum_rec(m_page);
+
+ do {
+ rec = page_rec_get_next(rec);
+ ut_ad(page_rec_is_user_rec(rec));
+
+ offsets = rec_get_offsets(rec, m_index, offsets,
+ page_is_leaf(m_page),
+ ULINT_UNDEFINED, &m_heap);
+ total_recs_size += rec_offs_size(offsets);
+ n_recs++;
+ } while (total_recs_size + page_dir_calc_reserved_space(n_recs)
+ < total_used_size / 2);
+
+ /* Keep at least one record on left page */
+ if (page_rec_is_infimum(page_rec_get_prev(rec))) {
+ rec = page_rec_get_next(rec);
+ ut_ad(page_rec_is_user_rec(rec));
+ }
+
+ return(rec);
+}
+
+/** Copy all records after split rec including itself.
+@param[in] rec split rec */
+void
+PageBulk::copyIn(
+ rec_t* split_rec)
+{
+
+ rec_t* rec = split_rec;
+ offset_t* offsets = NULL;
+
+ ut_ad(m_rec_no == 0);
+ ut_ad(page_rec_is_user_rec(rec));
+
+ do {
+ offsets = rec_get_offsets(rec, m_index, offsets,
+ page_rec_is_leaf(split_rec),
+ ULINT_UNDEFINED, &m_heap);
+
+ insert(rec, offsets);
+
+ rec = page_rec_get_next(rec);
+ } while (!page_rec_is_supremum(rec));
+
+ ut_ad(m_rec_no > 0);
+}
+
+/** Remove all records after split rec including itself.
+@param[in] rec split rec */
+void
+PageBulk::copyOut(
+ rec_t* split_rec)
+{
+ rec_t* rec;
+ rec_t* last_rec;
+ ulint n;
+
+ /* Suppose before copyOut, we have 5 records on the page:
+ infimum->r1->r2->r3->r4->r5->supremum, and r3 is the split rec.
+
+ after copyOut, we have 2 records on the page:
+ infimum->r1->r2->supremum. slot ajustment is not done. */
+
+ rec = page_rec_get_next(page_get_infimum_rec(m_page));
+ last_rec = page_rec_get_prev(page_get_supremum_rec(m_page));
+ n = 0;
+
+ while (rec != split_rec) {
+ rec = page_rec_get_next(rec);
+ n++;
+ }
+
+ ut_ad(n > 0);
+
+ /* Set last record's next in page */
+ offset_t* offsets = NULL;
+ rec = page_rec_get_prev(split_rec);
+ offsets = rec_get_offsets(rec, m_index, offsets,
+ page_rec_is_leaf(split_rec),
+ ULINT_UNDEFINED, &m_heap);
+ page_rec_set_next(rec, page_get_supremum_rec(m_page));
+
+ /* Set related members */
+ m_cur_rec = rec;
+ m_heap_top = rec_get_end(rec, offsets);
+
+ offsets = rec_get_offsets(last_rec, m_index, offsets,
+ page_rec_is_leaf(split_rec),
+ ULINT_UNDEFINED, &m_heap);
+
+ m_free_space += rec_get_end(last_rec, offsets)
+ - m_heap_top
+ + page_dir_calc_reserved_space(m_rec_no)
+ - page_dir_calc_reserved_space(n);
+ ut_ad(m_free_space > 0);
+ m_rec_no = n;
+
+#ifdef UNIV_DEBUG
+ m_total_data -= rec_get_end(last_rec, offsets) - m_heap_top;
+#endif /* UNIV_DEBUG */
+}
+
+/** Set next page
+@param[in] next_page_no next page no */
+inline void PageBulk::setNext(ulint next_page_no)
+{
+ if (UNIV_LIKELY_NULL(m_page_zip)) {
+ /* For ROW_FORMAT=COMPRESSED, redo log may be written
+ in PageBulk::compress(). */
+ mach_write_to_4(m_page + FIL_PAGE_NEXT, next_page_no);
+ } else {
+ mlog_write_ulint(m_page + FIL_PAGE_NEXT, next_page_no,
+ MLOG_4BYTES, &m_mtr);
+ }
+}
+
+/** Set previous page
+@param[in] prev_page_no previous page no */
+inline void PageBulk::setPrev(ulint prev_page_no)
+{
+ if (UNIV_LIKELY_NULL(m_page_zip)) {
+ /* For ROW_FORMAT=COMPRESSED, redo log may be written
+ in PageBulk::compress(). */
+ mach_write_to_4(m_page + FIL_PAGE_PREV, prev_page_no);
+ } else {
+ mlog_write_ulint(m_page + FIL_PAGE_PREV, prev_page_no,
+ MLOG_4BYTES, &m_mtr);
+ }
+}
+
+/** Check if required space is available in the page for the rec to be inserted.
+We check fill factor & padding here.
+@param[in] length required length
+@return true if space is available */
+bool
+PageBulk::isSpaceAvailable(
+ ulint rec_size)
+{
+ ulint slot_size;
+ ulint required_space;
+
+ slot_size = page_dir_calc_reserved_space(m_rec_no + 1)
+ - page_dir_calc_reserved_space(m_rec_no);
+
+ required_space = rec_size + slot_size;
+
+ if (required_space > m_free_space) {
+ ut_ad(m_rec_no > 0);
+ return false;
+ }
+
+ /* Fillfactor & Padding apply to both leaf and non-leaf pages.
+ Note: we keep at least 2 records in a page to avoid B-tree level
+ growing too high. */
+ if (m_rec_no >= 2
+ && ((m_page_zip == NULL && m_free_space - required_space
+ < m_reserved_space)
+ || (m_page_zip != NULL && m_free_space - required_space
+ < m_padding_space))) {
+ return(false);
+ }
+
+ return(true);
+}
+
+/** Check whether the record needs to be stored externally.
+@return false if the entire record can be stored locally on the page */
+bool
+PageBulk::needExt(
+ const dtuple_t* tuple,
+ ulint rec_size)
+{
+ return(page_zip_rec_needs_ext(rec_size, m_is_comp,
+ dtuple_get_n_fields(tuple), m_block->page.size));
+}
+
+/** Store external record
+Since the record is not logged yet, so we don't log update to the record.
+the blob data is logged first, then the record is logged in bulk mode.
+@param[in] big_rec external recrod
+@param[in] offsets record offsets
+@return error code */
+dberr_t
+PageBulk::storeExt(
+ const big_rec_t* big_rec,
+ offset_t* offsets)
+{
+ /* Note: not all fileds are initialized in btr_pcur. */
+ btr_pcur_t btr_pcur;
+ btr_pcur.pos_state = BTR_PCUR_IS_POSITIONED;
+ btr_pcur.latch_mode = BTR_MODIFY_LEAF;
+ btr_pcur.btr_cur.index = m_index;
+ btr_pcur.btr_cur.page_cur.index = m_index;
+ btr_pcur.btr_cur.page_cur.rec = m_cur_rec;
+ btr_pcur.btr_cur.page_cur.offsets = offsets;
+ btr_pcur.btr_cur.page_cur.block = m_block;
+
+ dberr_t err = btr_store_big_rec_extern_fields(
+ &btr_pcur, offsets, big_rec, &m_mtr, BTR_STORE_INSERT_BULK);
+
+ /* Reset m_block and m_cur_rec from page cursor, because
+ block may be changed during blob insert. (FIXME: Can it really?) */
+ ut_ad(m_block == btr_pcur.btr_cur.page_cur.block);
+
+ m_block = btr_pcur.btr_cur.page_cur.block;
+ m_cur_rec = btr_pcur.btr_cur.page_cur.rec;
+ m_page = buf_block_get_frame(m_block);
+
+ return(err);
+}
+
+/** Release block by commiting mtr
+Note: log_free_check requires holding no lock/latch in current thread. */
+void
+PageBulk::release()
+{
+ ut_ad(!dict_index_is_spatial(m_index));
+
+ /* We fix the block because we will re-pin it soon. */
+ buf_block_buf_fix_inc(m_block, __FILE__, __LINE__);
+
+ /* No other threads can modify this block. */
+ m_modify_clock = buf_block_get_modify_clock(m_block);
+
+ m_mtr.commit();
+}
+
+/** Start mtr and latch the block */
+dberr_t
+PageBulk::latch()
+{
+ m_mtr.start();
+
+ if (m_flush_observer) {
+ m_mtr.set_log_mode(MTR_LOG_NO_REDO);
+ m_mtr.set_flush_observer(m_flush_observer);
+ } else {
+ m_mtr.set_named_space(m_index->space);
+ }
+
+ /* In case the block is S-latched by page_cleaner. */
+ if (!buf_page_optimistic_get(RW_X_LATCH, m_block, m_modify_clock,
+ __FILE__, __LINE__, &m_mtr)) {
+ m_block = buf_page_get_gen(page_id_t(m_index->space,
+ m_page_no),
+ univ_page_size, RW_X_LATCH,
+ m_block, BUF_GET_IF_IN_POOL,
+ __FILE__, __LINE__, &m_mtr, &m_err);
+
+ if (m_err != DB_SUCCESS) {
+ return (m_err);
+ }
+
+ ut_ad(m_block != NULL);
+ }
+
+ buf_block_buf_fix_dec(m_block);
+
+ ut_ad(m_cur_rec > m_page && m_cur_rec < m_heap_top);
+
+ return (m_err);
+}
+
+/** Split a page
+@param[in] page_bulk page to split
+@param[in] next_page_bulk next page
+@return error code */
+dberr_t
+BtrBulk::pageSplit(
+ PageBulk* page_bulk,
+ PageBulk* next_page_bulk)
+{
+ ut_ad(page_bulk->getPageZip() != NULL);
+
+ /* 1. Check if we have only one user record on the page. */
+ if (page_bulk->getRecNo() <= 1) {
+ return(DB_TOO_BIG_RECORD);
+ }
+
+ /* 2. create a new page. */
+ PageBulk new_page_bulk(m_index, m_trx->id, FIL_NULL,
+ page_bulk->getLevel(), m_flush_observer);
+ dberr_t err = new_page_bulk.init();
+ if (err != DB_SUCCESS) {
+ return(err);
+ }
+
+ /* 3. copy the upper half to new page. */
+ rec_t* split_rec = page_bulk->getSplitRec();
+ new_page_bulk.copyIn(split_rec);
+ page_bulk->copyOut(split_rec);
+
+ /* 4. commit the splitted page. */
+ err = pageCommit(page_bulk, &new_page_bulk, true);
+ if (err != DB_SUCCESS) {
+ pageAbort(&new_page_bulk);
+ return(err);
+ }
+
+ /* 5. commit the new page. */
+ err = pageCommit(&new_page_bulk, next_page_bulk, true);
+ if (err != DB_SUCCESS) {
+ pageAbort(&new_page_bulk);
+ return(err);
+ }
+
+ return(err);
+}
+
+/** Commit(finish) a page. We set next/prev page no, compress a page of
+compressed table and split the page if compression fails, insert a node
+pointer to father page if needed, and commit mini-transaction.
+@param[in] page_bulk page to commit
+@param[in] next_page_bulk next page
+@param[in] insert_father false when page_bulk is a root page and
+ true when it's a non-root page
+@return error code */
+dberr_t
+BtrBulk::pageCommit(
+ PageBulk* page_bulk,
+ PageBulk* next_page_bulk,
+ bool insert_father)
+{
+ page_bulk->finish();
+
+ /* Set page links */
+ if (next_page_bulk != NULL) {
+ ut_ad(page_bulk->getLevel() == next_page_bulk->getLevel());
+
+ page_bulk->setNext(next_page_bulk->getPageNo());
+ next_page_bulk->setPrev(page_bulk->getPageNo());
+ } else {
+ /** Suppose a page is released and latched again, we need to
+ mark it modified in mini-transaction. */
+ page_bulk->setNext(FIL_NULL);
+ }
+
+ ut_ad(!rw_lock_own_flagged(&m_index->lock,
+ RW_LOCK_FLAG_X | RW_LOCK_FLAG_SX
+ | RW_LOCK_FLAG_S));
+
+ /* Compress page if it's a compressed table. */
+ if (page_bulk->getPageZip() != NULL && !page_bulk->compress()) {
+ return(pageSplit(page_bulk, next_page_bulk));
+ }
+
+ /* Insert node pointer to father page. */
+ if (insert_father) {
+ dtuple_t* node_ptr = page_bulk->getNodePtr();
+ dberr_t err = insert(node_ptr, page_bulk->getLevel()+1);
+
+ if (err != DB_SUCCESS) {
+ return(err);
+ }
+ }
+
+ /* Commit mtr. */
+ page_bulk->commit(true);
+
+ return(DB_SUCCESS);
+}
+
+/** Log free check */
+inline void BtrBulk::logFreeCheck()
+{
+ if (log_sys->check_flush_or_checkpoint) {
+ release();
+
+ log_free_check();
+
+ latch();
+ }
+}
+
+/** Release all latches */
+void
+BtrBulk::release()
+{
+ ut_ad(m_root_level + 1 == m_page_bulks.size());
+
+ for (ulint level = 0; level <= m_root_level; level++) {
+ PageBulk* page_bulk = m_page_bulks.at(level);
+
+ page_bulk->release();
+ }
+}
+
+/** Re-latch all latches */
+void
+BtrBulk::latch()
+{
+ ut_ad(m_root_level + 1 == m_page_bulks.size());
+
+ for (ulint level = 0; level <= m_root_level; level++) {
+ PageBulk* page_bulk = m_page_bulks.at(level);
+ page_bulk->latch();
+ }
+}
+
+/** Insert a tuple to page in a level
+@param[in] tuple tuple to insert
+@param[in] level B-tree level
+@return error code */
+dberr_t
+BtrBulk::insert(
+ dtuple_t* tuple,
+ ulint level)
+{
+ bool is_left_most = false;
+ dberr_t err = DB_SUCCESS;
+
+ /* Check if we need to create a PageBulk for the level. */
+ if (level + 1 > m_page_bulks.size()) {
+ PageBulk* new_page_bulk
+ = UT_NEW_NOKEY(PageBulk(m_index, m_trx->id, FIL_NULL,
+ level, m_flush_observer));
+ err = new_page_bulk->init();
+ if (err != DB_SUCCESS) {
+ UT_DELETE(new_page_bulk);
+ return(err);
+ }
+
+ m_page_bulks.push_back(new_page_bulk);
+ ut_ad(level + 1 == m_page_bulks.size());
+ m_root_level = level;
+
+ is_left_most = true;
+ }
+
+ ut_ad(m_page_bulks.size() > level);
+
+ PageBulk* page_bulk = m_page_bulks.at(level);
+
+ if (is_left_most && level > 0 && page_bulk->getRecNo() == 0) {
+ /* The node pointer must be marked as the predefined minimum
+ record, as there is no lower alphabetical limit to records in
+ the leftmost node of a level: */
+ dtuple_set_info_bits(tuple, dtuple_get_info_bits(tuple)
+ | REC_INFO_MIN_REC_FLAG);
+ }
+
+ ulint n_ext = 0;
+ ulint rec_size = rec_get_converted_size(m_index, tuple, n_ext);
+ big_rec_t* big_rec = NULL;
+ rec_t* rec = NULL;
+ offset_t* offsets = NULL;
+
+ if (page_bulk->needExt(tuple, rec_size)) {
+ /* The record is so big that we have to store some fields
+ externally on separate database pages */
+ big_rec = dtuple_convert_big_rec(m_index, 0, tuple, &n_ext);
+
+ if (big_rec == NULL) {
+ return(DB_TOO_BIG_RECORD);
+ }
+
+ rec_size = rec_get_converted_size(m_index, tuple, n_ext);
+ }
+
+ if (page_bulk->getPageZip() != NULL
+ && page_zip_is_too_big(m_index, tuple)) {
+ err = DB_TOO_BIG_RECORD;
+ goto func_exit;
+ }
+
+ if (!page_bulk->isSpaceAvailable(rec_size)) {
+ /* Create a sibling page_bulk. */
+ PageBulk* sibling_page_bulk;
+ sibling_page_bulk = UT_NEW_NOKEY(PageBulk(m_index, m_trx->id,
+ FIL_NULL, level,
+ m_flush_observer));
+ err = sibling_page_bulk->init();
+ if (err != DB_SUCCESS) {
+ UT_DELETE(sibling_page_bulk);
+ goto func_exit;
+ }
+
+ /* Commit page bulk. */
+ err = pageCommit(page_bulk, sibling_page_bulk, true);
+ if (err != DB_SUCCESS) {
+ pageAbort(sibling_page_bulk);
+ UT_DELETE(sibling_page_bulk);
+ goto func_exit;
+ }
+
+ /* Set new page bulk to page_bulks. */
+ ut_ad(sibling_page_bulk->getLevel() <= m_root_level);
+ m_page_bulks.at(level) = sibling_page_bulk;
+
+ UT_DELETE(page_bulk);
+ page_bulk = sibling_page_bulk;
+
+ /* Important: log_free_check whether we need a checkpoint. */
+ if (page_is_leaf(sibling_page_bulk->getPage())) {
+ if (trx_is_interrupted(m_trx)) {
+ if (m_flush_observer) {
+ m_flush_observer->interrupted();
+ }
+
+ err = DB_INTERRUPTED;
+ goto func_exit;
+ }
+
+ /* Wake up page cleaner to flush dirty pages. */
+ srv_inc_activity_count();
+ os_event_set(buf_flush_event);
+
+ logFreeCheck();
+ }
+
+ }
+
+ /* Convert tuple to rec. */
+ rec = rec_convert_dtuple_to_rec(static_cast<byte*>(mem_heap_alloc(
+ page_bulk->m_heap, rec_size)), m_index, tuple, n_ext);
+ offsets = rec_get_offsets(rec, m_index, offsets, !level,
+ ULINT_UNDEFINED, &page_bulk->m_heap);
+
+ page_bulk->insert(rec, offsets);
+
+ if (big_rec != NULL) {
+ ut_ad(dict_index_is_clust(m_index));
+ ut_ad(page_bulk->getLevel() == 0);
+ ut_ad(page_bulk == m_page_bulks.at(0));
+
+ /* Release all latched but leaf node. */
+ for (ulint level = 1; level <= m_root_level; level++) {
+ PageBulk* page_bulk = m_page_bulks.at(level);
+
+ page_bulk->release();
+ }
+
+ err = page_bulk->storeExt(big_rec, offsets);
+
+ /* Latch */
+ for (ulint level = 1; level <= m_root_level; level++) {
+ PageBulk* page_bulk = m_page_bulks.at(level);
+ page_bulk->latch();
+ }
+ }
+
+func_exit:
+ if (big_rec != NULL) {
+ dtuple_convert_back_big_rec(m_index, tuple, big_rec);
+ }
+
+ return(err);
+}
+
+/** Btree bulk load finish. We commit the last page in each level
+and copy the last page in top level to the root page of the index
+if no error occurs.
+@param[in] err whether bulk load was successful until now
+@return error code */
+dberr_t
+BtrBulk::finish(dberr_t err)
+{
+ ulint last_page_no = FIL_NULL;
+
+ ut_ad(!dict_table_is_temporary(m_index->table));
+
+ if (m_page_bulks.size() == 0) {
+ /* The table is empty. The root page of the index tree
+ is already in a consistent state. No need to flush. */
+ return(err);
+ }
+
+ ut_ad(m_root_level + 1 == m_page_bulks.size());
+
+ /* Finish all page bulks */
+ for (ulint level = 0; level <= m_root_level; level++) {
+ PageBulk* page_bulk = m_page_bulks.at(level);
+
+ last_page_no = page_bulk->getPageNo();
+
+ if (err == DB_SUCCESS) {
+ err = pageCommit(page_bulk, NULL,
+ level != m_root_level);
+ }
+
+ if (err != DB_SUCCESS) {
+ pageAbort(page_bulk);
+ }
+
+ UT_DELETE(page_bulk);
+ }
+
+ if (err == DB_SUCCESS) {
+ rec_t* first_rec;
+ mtr_t mtr;
+ buf_block_t* last_block;
+ page_t* last_page;
+ page_id_t page_id(dict_index_get_space(m_index),
+ last_page_no);
+ page_size_t page_size(dict_table_page_size(m_index->table));
+ ulint root_page_no = dict_index_get_page(m_index);
+ PageBulk root_page_bulk(m_index, m_trx->id,
+ root_page_no, m_root_level,
+ m_flush_observer);
+
+ mtr_start(&mtr);
+ mtr.set_named_space(dict_index_get_space(m_index));
+ mtr_x_lock(dict_index_get_lock(m_index), &mtr);
+
+ ut_ad(last_page_no != FIL_NULL);
+ last_block = btr_block_get(page_id, page_size,
+ RW_X_LATCH, m_index, &mtr);
+ last_page = buf_block_get_frame(last_block);
+ first_rec = page_rec_get_next(page_get_infimum_rec(last_page));
+ ut_ad(page_rec_is_user_rec(first_rec));
+
+ /* Copy last page to root page. */
+ err = root_page_bulk.init();
+ if (err != DB_SUCCESS) {
+ mtr_commit(&mtr);
+ return(err);
+ }
+ root_page_bulk.copyIn(first_rec);
+
+ /* Remove last page. */
+ btr_page_free(m_index, last_block, &mtr);
+
+ /* Do not flush the last page. */
+ last_block->page.flush_observer = NULL;
+
+ mtr_commit(&mtr);
+
+ err = pageCommit(&root_page_bulk, NULL, false);
+ ut_ad(err == DB_SUCCESS);
+ }
+
+ ut_ad(!sync_check_iterate(dict_sync_check()));
+
+ ut_ad(err != DB_SUCCESS
+ || btr_validate_index(m_index, NULL, false) == DB_SUCCESS);
+ return(err);
+}
diff --git a/storage/innobase/btr/btr0cur.cc b/storage/innobase/btr/btr0cur.cc
index 7b2fbfa0f0e..a47121399d2 100644
--- a/storage/innobase/btr/btr0cur.cc
+++ b/storage/innobase/btr/btr0cur.cc
@@ -1,9 +1,9 @@
/*****************************************************************************
-Copyright (c) 1994, 2016, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1994, 2019, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2008, Google Inc.
Copyright (c) 2012, Facebook Inc.
-Copyright (c) 2015, 2017, MariaDB Corporation.
+Copyright (c) 2015, 2020, MariaDB Corporation.
Portions of this file contain modifications contributed and copyrighted by
Google, Inc. Those modifications are gratefully acknowledged and are described
@@ -44,13 +44,7 @@ Created 10/16/1994 Heikki Tuuri
*******************************************************/
#include "btr0cur.h"
-
-#ifdef UNIV_NONINL
-#include "btr0cur.ic"
-#endif
-
#include "row0upd.h"
-#ifndef UNIV_HOTBACKUP
#include "mtr0log.h"
#include "page0page.h"
#include "page0zip.h"
@@ -63,13 +57,16 @@ Created 10/16/1994 Heikki Tuuri
#include "row0purge.h"
#include "row0upd.h"
#include "trx0rec.h"
-#include "trx0roll.h" /* trx_is_recv() */
+#include "trx0roll.h"
#include "que0que.h"
#include "row0row.h"
#include "srv0srv.h"
#include "ibuf0ibuf.h"
#include "lock0lock.h"
#include "zlib.h"
+#include "srv0start.h"
+#include "mysql_com.h"
+#include "dict0stats.h"
/** Buffered B-tree operation types, introduced as part of delete buffering. */
enum btr_op_t {
@@ -80,29 +77,49 @@ enum btr_op_t {
BTR_DELMARK_OP /*!< Mark a record for deletion */
};
-#ifdef UNIV_DEBUG
-/** If the following is set to TRUE, this module prints a lot of
-trace information of individual record operations */
-UNIV_INTERN ibool btr_cur_print_record_ops = FALSE;
-#endif /* UNIV_DEBUG */
+/** Modification types for the B-tree operation. */
+enum btr_intention_t {
+ BTR_INTENTION_DELETE,
+ BTR_INTENTION_BOTH,
+ BTR_INTENTION_INSERT
+};
+#if BTR_INTENTION_DELETE > BTR_INTENTION_BOTH
+#error "BTR_INTENTION_DELETE > BTR_INTENTION_BOTH"
+#endif
+#if BTR_INTENTION_BOTH > BTR_INTENTION_INSERT
+#error "BTR_INTENTION_BOTH > BTR_INTENTION_INSERT"
+#endif
+
+/** For the index->lock scalability improvement, only possibility of clear
+performance regression observed was caused by grown huge history list length.
+That is because the exclusive use of index->lock also worked as reserving
+free blocks and read IO bandwidth with priority. To avoid huge glowing history
+list as same level with previous implementation, prioritizes pessimistic tree
+operations by purge as the previous, when it seems to be growing huge.
+
+ Experimentally, the history list length starts to affect to performance
+throughput clearly from about 100000. */
+#define BTR_CUR_FINE_HISTORY_LENGTH 100000
/** Number of searches down the B-tree in btr_cur_search_to_nth_level(). */
-UNIV_INTERN ulint btr_cur_n_non_sea = 0;
-/** Number of successful adaptive hash index lookups in
-btr_cur_search_to_nth_level(). */
-UNIV_INTERN ulint btr_cur_n_sea = 0;
+ulint btr_cur_n_non_sea;
/** Old value of btr_cur_n_non_sea. Copied by
srv_refresh_innodb_monitor_stats(). Referenced by
srv_printf_innodb_monitor(). */
-UNIV_INTERN ulint btr_cur_n_non_sea_old = 0;
+ulint btr_cur_n_non_sea_old;
+#ifdef BTR_CUR_HASH_ADAPT
+/** Number of successful adaptive hash index lookups in
+btr_cur_search_to_nth_level(). */
+ulint btr_cur_n_sea;
/** Old value of btr_cur_n_sea. Copied by
srv_refresh_innodb_monitor_stats(). Referenced by
srv_printf_innodb_monitor(). */
-UNIV_INTERN ulint btr_cur_n_sea_old = 0;
+ulint btr_cur_n_sea_old;
+#endif /* BTR_CUR_HASH_ADAPT */
#ifdef UNIV_DEBUG
/* Flag to limit optimistic insert records */
-UNIV_INTERN uint btr_cur_limit_optimistic_insert_debug = 0;
+uint btr_cur_limit_optimistic_insert_debug;
#endif /* UNIV_DEBUG */
/** In the optimistic insert, if the insert does not fit, but this much space
@@ -121,30 +138,18 @@ can be released by page reorganize, then it is reorganized */
part header, in bytes */
/** Estimated table level stats from sampled value.
-@param value sampled stats
-@param index index being sampled
-@param sample number of sampled rows
-@param ext_size external stored data size
-@param not_empty table not empty
+@param value sampled stats
+@param index index being sampled
+@param sample number of sampled rows
+@param ext_size external stored data size
+@param not_empty table not empty
@return estimated table wide stats from sampled value */
-#define BTR_TABLE_STATS_FROM_SAMPLE(value, index, sample, ext_size, not_empty)\
- (((value) * (ib_int64_t) index->stat_n_leaf_pages \
+#define BTR_TABLE_STATS_FROM_SAMPLE(value, index, sample, ext_size, not_empty) \
+ (((value) * static_cast<ib_uint64_t>(index->stat_n_leaf_pages) \
+ (sample) - 1 + (ext_size) + (not_empty)) / ((sample) + (ext_size)))
/* @} */
-#endif /* !UNIV_HOTBACKUP */
-
-/** A BLOB field reference full of zero, for use in assertions and tests.
-Initially, BLOB field references are set to zero, in
-dtuple_convert_big_rec(). */
-const byte field_ref_zero[BTR_EXTERN_FIELD_REF_SIZE] = {
- 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0,
-};
-#ifndef UNIV_HOTBACKUP
/*******************************************************************//**
Marks all extern fields in a record as owned by the record. This function
should be called if the delete mark of a record is removed: a not delete
@@ -157,7 +162,7 @@ btr_cur_unmark_extern_fields(
part will be updated, or NULL */
rec_t* rec, /*!< in/out: record in a clustered index */
dict_index_t* index, /*!< in: index of the page */
- const ulint* offsets,/*!< in: array returned by rec_get_offsets() */
+ const offset_t* offsets,/*!< in: array returned by rec_get_offsets() */
mtr_t* mtr); /*!< in: mtr, or NULL if not logged */
/*******************************************************************//**
Adds path information to the cursor for the current page, for which
@@ -182,9 +187,9 @@ btr_rec_free_updated_extern_fields(
rec_t* rec, /*!< in: record */
page_zip_des_t* page_zip,/*!< in: compressed page whose uncompressed
part will be updated, or NULL */
- const ulint* offsets,/*!< in: rec_get_offsets(rec, index) */
+ const offset_t* offsets,/*!< in: rec_get_offsets(rec, index) */
const upd_t* update, /*!< in: update vector */
- enum trx_rb_ctx rb_ctx, /*!< in: rollback context */
+ bool rollback,/*!< in: performing rollback? */
mtr_t* mtr); /*!< in: mini-transaction handle which contains
an X-latch to record page and to the tree */
/***********************************************************//**
@@ -196,143 +201,654 @@ btr_rec_free_externally_stored_fields(
dict_index_t* index, /*!< in: index of the data, the index
tree MUST be X-latched */
rec_t* rec, /*!< in: record */
- const ulint* offsets,/*!< in: rec_get_offsets(rec, index) */
+ const offset_t* offsets,/*!< in: rec_get_offsets(rec, index) */
page_zip_des_t* page_zip,/*!< in: compressed page whose uncompressed
part will be updated, or NULL */
- enum trx_rb_ctx rb_ctx, /*!< in: rollback context */
+ bool rollback,/*!< in: performing rollback? */
mtr_t* mtr); /*!< in: mini-transaction handle which contains
an X-latch to record page and to the index
tree */
-#endif /* !UNIV_HOTBACKUP */
-/******************************************************//**
-The following function is used to set the deleted bit of a record. */
-UNIV_INLINE
-void
-btr_rec_set_deleted_flag(
-/*=====================*/
- rec_t* rec, /*!< in/out: physical record */
- page_zip_des_t* page_zip,/*!< in/out: compressed page (or NULL) */
- ulint flag) /*!< in: nonzero if delete marked */
-{
- if (page_rec_is_comp(rec)) {
- rec_set_deleted_flag_new(rec, page_zip, flag);
- } else {
- ut_ad(!page_zip);
- rec_set_deleted_flag_old(rec, flag);
- }
-}
-
-#ifndef UNIV_HOTBACKUP
/*==================== B-TREE SEARCH =========================*/
-/********************************************************************//**
-Latches the leaf page or pages requested. */
-static
-void
+#if MTR_MEMO_PAGE_S_FIX != RW_S_LATCH
+#error "MTR_MEMO_PAGE_S_FIX != RW_S_LATCH"
+#endif
+#if MTR_MEMO_PAGE_X_FIX != RW_X_LATCH
+#error "MTR_MEMO_PAGE_X_FIX != RW_X_LATCH"
+#endif
+#if MTR_MEMO_PAGE_SX_FIX != RW_SX_LATCH
+#error "MTR_MEMO_PAGE_SX_FIX != RW_SX_LATCH"
+#endif
+
+/** Latches the leaf page or pages requested.
+@param[in] block leaf page where the search converged
+@param[in] page_id page id of the leaf
+@param[in] latch_mode BTR_SEARCH_LEAF, ...
+@param[in] cursor cursor
+@param[in] mtr mini-transaction
+@return blocks and savepoints which actually latched. */
+btr_latch_leaves_t
btr_cur_latch_leaves(
-/*=================*/
- page_t* page, /*!< in: leaf page where the search
- converged */
- ulint space, /*!< in: space id */
- ulint zip_size, /*!< in: compressed page size in bytes
- or 0 for uncompressed pages */
- ulint page_no, /*!< in: page number of the leaf */
- ulint latch_mode, /*!< in: BTR_SEARCH_LEAF, ... */
- btr_cur_t* cursor, /*!< in: cursor */
- mtr_t* mtr) /*!< in: mtr */
+ buf_block_t* block,
+ const page_id_t page_id,
+ const page_size_t& page_size,
+ ulint latch_mode,
+ btr_cur_t* cursor,
+ mtr_t* mtr)
{
ulint mode;
ulint left_page_no;
ulint right_page_no;
buf_block_t* get_block;
+ page_t* page = buf_block_get_frame(block);
+ bool spatial;
+ btr_latch_leaves_t latch_leaves = {{NULL, NULL, NULL}, {0, 0, 0}};
- ut_ad(page && mtr);
+ spatial = dict_index_is_spatial(cursor->index) && cursor->rtr_info;
+ ut_ad(buf_page_in_file(&block->page));
switch (latch_mode) {
case BTR_SEARCH_LEAF:
case BTR_MODIFY_LEAF:
- mode = latch_mode == BTR_SEARCH_LEAF ? RW_S_LATCH : RW_X_LATCH;
- get_block = btr_block_get(
- space, zip_size, page_no, mode, cursor->index, mtr);
+ case BTR_SEARCH_TREE:
+ if (spatial) {
+ cursor->rtr_info->tree_savepoints[RTR_MAX_LEVELS]
+ = mtr_set_savepoint(mtr);
+ }
+
+ mode = latch_mode == BTR_MODIFY_LEAF ? RW_X_LATCH : RW_S_LATCH;
+ latch_leaves.savepoints[1] = mtr_set_savepoint(mtr);
+ get_block = btr_block_get(page_id, page_size, mode,
+ cursor->index, mtr);
+ latch_leaves.blocks[1] = get_block;
#ifdef UNIV_BTR_DEBUG
ut_a(page_is_comp(get_block->frame) == page_is_comp(page));
#endif /* UNIV_BTR_DEBUG */
- get_block->check_index_page_at_flush = TRUE;
- return;
+ if (spatial) {
+ cursor->rtr_info->tree_blocks[RTR_MAX_LEVELS]
+ = get_block;
+ }
+
+ return(latch_leaves);
case BTR_MODIFY_TREE:
- /* x-latch also brothers from left to right */
- left_page_no = btr_page_get_prev(page, mtr);
+ /* It is exclusive for other operations which calls
+ btr_page_set_prev() */
+ ut_ad(mtr_memo_contains_flagged(
+ mtr,
+ dict_index_get_lock(cursor->index),
+ MTR_MEMO_X_LOCK | MTR_MEMO_SX_LOCK));
+ /* x-latch also siblings from left to right */
+ left_page_no = btr_page_get_prev(page);
mode = latch_mode;
if (left_page_no != FIL_NULL) {
+
+ if (spatial) {
+ cursor->rtr_info->tree_savepoints[
+ RTR_MAX_LEVELS] = mtr_set_savepoint(mtr);
+ }
+
+ latch_leaves.savepoints[0] = mtr_set_savepoint(mtr);
get_block = btr_block_get(
- space, zip_size, left_page_no,
- RW_X_LATCH, cursor->index, mtr);
-#ifdef UNIV_BTR_DEBUG
- ut_a(page_is_comp(get_block->frame)
- == page_is_comp(page));
- ut_a(btr_page_get_next(get_block->frame, mtr)
- == page_get_page_no(page));
-#endif /* UNIV_BTR_DEBUG */
- get_block->check_index_page_at_flush = TRUE;
+ page_id_t(page_id.space(), left_page_no),
+ page_size, RW_X_LATCH, cursor->index, mtr);
+ latch_leaves.blocks[0] = get_block;
+
+ if (spatial) {
+ cursor->rtr_info->tree_blocks[RTR_MAX_LEVELS]
+ = get_block;
+ }
+ }
+
+ if (spatial) {
+ cursor->rtr_info->tree_savepoints[RTR_MAX_LEVELS + 1]
+ = mtr_set_savepoint(mtr);
}
+ latch_leaves.savepoints[1] = mtr_set_savepoint(mtr);
get_block = btr_block_get(
- space, zip_size, page_no,
- RW_X_LATCH, cursor->index, mtr);
+ page_id, page_size, RW_X_LATCH, cursor->index, mtr);
+ latch_leaves.blocks[1] = get_block;
+
#ifdef UNIV_BTR_DEBUG
+ /* Sanity check only after both the blocks are latched. */
+ if (latch_leaves.blocks[0] != NULL) {
+ ut_a(page_is_comp(latch_leaves.blocks[0]->frame)
+ == page_is_comp(page));
+ ut_a(btr_page_get_next(latch_leaves.blocks[0]->frame)
+ == page_get_page_no(page));
+ }
ut_a(page_is_comp(get_block->frame) == page_is_comp(page));
#endif /* UNIV_BTR_DEBUG */
- get_block->check_index_page_at_flush = TRUE;
- right_page_no = btr_page_get_next(page, mtr);
+ if (spatial) {
+ cursor->rtr_info->tree_blocks[RTR_MAX_LEVELS + 1]
+ = get_block;
+ }
+
+ right_page_no = btr_page_get_next(page);
if (right_page_no != FIL_NULL) {
+ if (spatial) {
+ cursor->rtr_info->tree_savepoints[
+ RTR_MAX_LEVELS + 2] = mtr_set_savepoint(
+ mtr);
+ }
+ latch_leaves.savepoints[2] = mtr_set_savepoint(mtr);
get_block = btr_block_get(
- space, zip_size, right_page_no,
- RW_X_LATCH, cursor->index, mtr);
+ page_id_t(page_id.space(), right_page_no),
+ page_size, RW_X_LATCH, cursor->index, mtr);
+ latch_leaves.blocks[2] = get_block;
#ifdef UNIV_BTR_DEBUG
ut_a(page_is_comp(get_block->frame)
== page_is_comp(page));
- ut_a(btr_page_get_prev(get_block->frame, mtr)
+ ut_a(btr_page_get_prev(get_block->frame)
== page_get_page_no(page));
#endif /* UNIV_BTR_DEBUG */
- get_block->check_index_page_at_flush = TRUE;
+ if (spatial) {
+ cursor->rtr_info->tree_blocks[
+ RTR_MAX_LEVELS + 2] = get_block;
+ }
}
- return;
+ return(latch_leaves);
case BTR_SEARCH_PREV:
case BTR_MODIFY_PREV:
mode = latch_mode == BTR_SEARCH_PREV ? RW_S_LATCH : RW_X_LATCH;
- /* latch also left brother */
- left_page_no = btr_page_get_prev(page, mtr);
+ /* latch also left sibling */
+ rw_lock_s_lock(&block->lock);
+ left_page_no = btr_page_get_prev(page);
+ rw_lock_s_unlock(&block->lock);
if (left_page_no != FIL_NULL) {
+ latch_leaves.savepoints[0] = mtr_set_savepoint(mtr);
get_block = btr_block_get(
- space, zip_size,
- left_page_no, mode, cursor->index, mtr);
+ page_id_t(page_id.space(), left_page_no),
+ page_size, mode, cursor->index, mtr);
+ latch_leaves.blocks[0] = get_block;
cursor->left_block = get_block;
#ifdef UNIV_BTR_DEBUG
ut_a(page_is_comp(get_block->frame)
== page_is_comp(page));
- ut_a(btr_page_get_next(get_block->frame, mtr)
+ ut_a(btr_page_get_next(get_block->frame)
== page_get_page_no(page));
#endif /* UNIV_BTR_DEBUG */
- get_block->check_index_page_at_flush = TRUE;
}
- get_block = btr_block_get(
- space, zip_size, page_no, mode, cursor->index, mtr);
+ latch_leaves.savepoints[1] = mtr_set_savepoint(mtr);
+ get_block = btr_block_get(page_id, page_size, mode,
+ cursor->index, mtr);
+ latch_leaves.blocks[1] = get_block;
#ifdef UNIV_BTR_DEBUG
ut_a(page_is_comp(get_block->frame) == page_is_comp(page));
#endif /* UNIV_BTR_DEBUG */
- get_block->check_index_page_at_flush = TRUE;
- return;
+ return(latch_leaves);
+ case BTR_CONT_MODIFY_TREE:
+ ut_ad(dict_index_is_spatial(cursor->index));
+ return(latch_leaves);
}
ut_error;
+ return(latch_leaves);
+}
+
+/** Optimistically latches the leaf page or pages requested.
+@param[in] block guessed buffer block
+@param[in] modify_clock modify clock value
+@param[in,out] latch_mode BTR_SEARCH_LEAF, ...
+@param[in,out] cursor cursor
+@param[in] file file name
+@param[in] line line where called
+@param[in] mtr mini-transaction
+@return true if success */
+bool
+btr_cur_optimistic_latch_leaves(
+ buf_block_t* block,
+ ib_uint64_t modify_clock,
+ ulint* latch_mode,
+ btr_cur_t* cursor,
+ const char* file,
+ unsigned line,
+ mtr_t* mtr)
+{
+ ulint mode;
+ ulint left_page_no;
+
+ switch (*latch_mode) {
+ case BTR_SEARCH_LEAF:
+ case BTR_MODIFY_LEAF:
+ return(buf_page_optimistic_get(*latch_mode, block,
+ modify_clock, file, line, mtr));
+ case BTR_SEARCH_PREV:
+ case BTR_MODIFY_PREV:
+ mode = *latch_mode == BTR_SEARCH_PREV
+ ? RW_S_LATCH : RW_X_LATCH;
+
+ buf_page_mutex_enter(block);
+ if (buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE) {
+ buf_page_mutex_exit(block);
+ return(false);
+ }
+ /* pin the block not to be relocated */
+ buf_block_buf_fix_inc(block, file, line);
+ buf_page_mutex_exit(block);
+
+ rw_lock_s_lock(&block->lock);
+ if (block->modify_clock != modify_clock) {
+ rw_lock_s_unlock(&block->lock);
+
+ goto unpin_failed;
+ }
+ left_page_no = btr_page_get_prev(
+ buf_block_get_frame(block));
+ rw_lock_s_unlock(&block->lock);
+
+ if (left_page_no != FIL_NULL) {
+ const page_id_t page_id(
+ dict_index_get_space(cursor->index),
+ left_page_no);
+
+ cursor->left_block = btr_block_get(
+ page_id,
+ dict_table_page_size(cursor->index->table),
+ mode, cursor->index, mtr);
+ } else {
+ cursor->left_block = NULL;
+ }
+
+ if (buf_page_optimistic_get(mode, block, modify_clock,
+ file, line, mtr)) {
+ if (btr_page_get_prev(buf_block_get_frame(block))
+ == left_page_no) {
+ buf_block_buf_fix_dec(block);
+ *latch_mode = mode;
+ return(true);
+ } else {
+ /* release the block */
+ btr_leaf_page_release(block, mode, mtr);
+ }
+ }
+
+ /* release the left block */
+ if (cursor->left_block != NULL) {
+ btr_leaf_page_release(cursor->left_block,
+ mode, mtr);
+ }
+unpin_failed:
+ /* unpin the block */
+ buf_block_buf_fix_dec(block);
+ return(false);
+
+ default:
+ ut_error;
+ return(false);
+ }
+}
+
+/**
+Gets intention in btr_intention_t from latch_mode, and cleares the intention
+at the latch_mode.
+@param latch_mode in/out: pointer to latch_mode
+@return intention for latching tree */
+static
+btr_intention_t
+btr_cur_get_and_clear_intention(
+ ulint *latch_mode)
+{
+ btr_intention_t intention;
+
+ switch (*latch_mode & (BTR_LATCH_FOR_INSERT | BTR_LATCH_FOR_DELETE)) {
+ case BTR_LATCH_FOR_INSERT:
+ intention = BTR_INTENTION_INSERT;
+ break;
+ case BTR_LATCH_FOR_DELETE:
+ intention = BTR_INTENTION_DELETE;
+ break;
+ default:
+ /* both or unknown */
+ intention = BTR_INTENTION_BOTH;
+ }
+ *latch_mode &= ~(BTR_LATCH_FOR_INSERT | BTR_LATCH_FOR_DELETE);
+
+ return(intention);
+}
+
+/**
+Gets the desired latch type for the root leaf (root page is root leaf)
+at the latch mode.
+@param latch_mode in: BTR_SEARCH_LEAF, ...
+@return latch type */
+static
+rw_lock_type_t
+btr_cur_latch_for_root_leaf(
+ ulint latch_mode)
+{
+ switch (latch_mode) {
+ case BTR_SEARCH_LEAF:
+ case BTR_SEARCH_TREE:
+ case BTR_SEARCH_PREV:
+ return(RW_S_LATCH);
+ case BTR_MODIFY_LEAF:
+ case BTR_MODIFY_TREE:
+ case BTR_MODIFY_PREV:
+ return(RW_X_LATCH);
+ case BTR_CONT_MODIFY_TREE:
+ case BTR_CONT_SEARCH_TREE:
+ /* A root page should be latched already,
+ and don't need to be latched here.
+ fall through (RW_NO_LATCH) */
+ case BTR_NO_LATCHES:
+ return(RW_NO_LATCH);
+ }
+
+ ut_error;
+ return(RW_NO_LATCH); /* avoid compiler warnings */
+}
+
+/** Detects whether the modifying record might need a modifying tree structure.
+@param[in] index index
+@param[in] page page
+@param[in] lock_intention lock intention for the tree operation
+@param[in] rec record (current node_ptr)
+@param[in] rec_size size of the record or max size of node_ptr
+@param[in] page_size page size
+@param[in] mtr mtr
+@return true if tree modification is needed */
+static
+bool
+btr_cur_will_modify_tree(
+ dict_index_t* index,
+ const page_t* page,
+ btr_intention_t lock_intention,
+ const rec_t* rec,
+ ulint rec_size,
+ const page_size_t& page_size,
+ mtr_t* mtr)
+{
+ ut_ad(!page_is_leaf(page));
+ ut_ad(mtr_memo_contains_flagged(mtr, dict_index_get_lock(index),
+ MTR_MEMO_X_LOCK | MTR_MEMO_SX_LOCK));
+
+ /* Pessimistic delete of the first record causes delete & insert
+ of node_ptr at upper level. And a subsequent page shrink is
+ possible. It causes delete of node_ptr at the upper level.
+ So we should pay attention also to 2nd record not only
+ first record and last record. Because if the "delete & insert" are
+ done for the different page, the 2nd record become
+ first record and following compress might delete the record and causes
+ the uppper level node_ptr modification. */
+
+ const ulint n_recs = page_get_n_recs(page);
+
+ if (lock_intention <= BTR_INTENTION_BOTH) {
+ compile_time_assert(BTR_INTENTION_DELETE < BTR_INTENTION_BOTH);
+ compile_time_assert(BTR_INTENTION_BOTH < BTR_INTENTION_INSERT);
+
+ if (!page_has_siblings(page)) {
+ return true;
+ }
+
+ ulint margin = rec_size;
+
+ if (lock_intention == BTR_INTENTION_BOTH) {
+ ulint level = btr_page_get_level(page, mtr);
+
+ /* This value is the worst expectation for the node_ptr
+ records to be deleted from this page. It is used to
+ expect whether the cursor position can be the left_most
+ record in this page or not. */
+ ulint max_nodes_deleted = 0;
+
+ /* By modifying tree operations from the under of this
+ level, logically (2 ^ (level - 1)) opportunities to
+ deleting records in maximum even unreally rare case. */
+ if (level > 7) {
+ /* TODO: adjust this practical limit. */
+ max_nodes_deleted = 64;
+ } else if (level > 0) {
+ max_nodes_deleted = (ulint)1 << (level - 1);
+ }
+ /* check delete will cause. (BTR_INTENTION_BOTH
+ or BTR_INTENTION_DELETE) */
+ if (n_recs <= max_nodes_deleted * 2
+ || page_rec_is_first(rec, page)) {
+ /* The cursor record can be the left most record
+ in this page. */
+ return true;
+ }
+
+ if (page_has_prev(page)
+ && page_rec_distance_is_at_most(
+ page_get_infimum_rec(page), rec,
+ max_nodes_deleted)) {
+ return true;
+ }
+
+ if (page_has_next(page)
+ && page_rec_distance_is_at_most(
+ rec, page_get_supremum_rec(page),
+ max_nodes_deleted)) {
+ return true;
+ }
+
+ /* Delete at leftmost record in a page causes delete
+ & insert at its parent page. After that, the delete
+ might cause btr_compress() and delete record at its
+ parent page. Thus we should consider max deletes. */
+ margin *= max_nodes_deleted;
+ }
+
+ /* Safe because we already have SX latch of the index tree */
+ if (page_get_data_size(page)
+ < margin + BTR_CUR_PAGE_COMPRESS_LIMIT(index)) {
+ return(true);
+ }
+ }
+
+ if (lock_intention >= BTR_INTENTION_BOTH) {
+ /* check insert will cause. BTR_INTENTION_BOTH
+ or BTR_INTENTION_INSERT*/
+
+ /* Once we invoke the btr_cur_limit_optimistic_insert_debug,
+ we should check it here in advance, since the max allowable
+ records in a page is limited. */
+ LIMIT_OPTIMISTIC_INSERT_DEBUG(n_recs, return true);
+
+ /* needs 2 records' space for the case the single split and
+ insert cannot fit.
+ page_get_max_insert_size_after_reorganize() includes space
+ for page directory already */
+ ulint max_size
+ = page_get_max_insert_size_after_reorganize(page, 2);
+
+ if (max_size < BTR_CUR_PAGE_REORGANIZE_LIMIT + rec_size
+ || max_size < rec_size * 2) {
+ return(true);
+ }
+
+ /* TODO: optimize this condition for ROW_FORMAT=COMPRESSED.
+ This is based on the worst case, and we could invoke
+ page_zip_available() on the block->page.zip. */
+ /* needs 2 records' space also for worst compress rate. */
+ if (page_size.is_compressed()
+ && page_zip_empty_size(index->n_fields,
+ page_size.physical())
+ <= rec_size * 2 + page_get_data_size(page)
+ + page_dir_calc_reserved_space(n_recs + 2)) {
+ return(true);
+ }
+ }
+
+ return(false);
+}
+
+/** Detects whether the modifying record might need a opposite modification
+to the intention.
+@param[in] page page
+@param[in] lock_intention lock intention for the tree operation
+@param[in] rec record (current node_ptr)
+@return true if tree modification is needed */
+static
+bool
+btr_cur_need_opposite_intention(
+ const page_t* page,
+ btr_intention_t lock_intention,
+ const rec_t* rec)
+{
+ switch (lock_intention) {
+ case BTR_INTENTION_DELETE:
+ return (page_has_prev(page) && page_rec_is_first(rec, page)) ||
+ (page_has_next(page) && page_rec_is_last(rec, page));
+ case BTR_INTENTION_INSERT:
+ return page_has_next(page) && page_rec_is_last(rec, page);
+ case BTR_INTENTION_BOTH:
+ return(false);
+ }
+
+ ut_error;
+ return(false);
+}
+
+/**
+@param[in] index b-tree
+@return maximum size of a node pointer record in bytes */
+static ulint btr_node_ptr_max_size(const dict_index_t* index)
+{
+ if (dict_index_is_ibuf(index)) {
+ /* cannot estimate accurately */
+ /* This is universal index for change buffer.
+ The max size of the entry is about max key length * 2.
+ (index key + primary key to be inserted to the index)
+ (The max key length is UNIV_PAGE_SIZE / 16 * 3 at
+ ha_innobase::max_supported_key_length(),
+ considering MAX_KEY_LENGTH = 3072 at MySQL imposes
+ the 3500 historical InnoDB value for 16K page size case.)
+ For the universal index, node_ptr contains most of the entry.
+ And 512 is enough to contain ibuf columns and meta-data */
+ return srv_page_size / 8 * 3 + 512;
+ }
+
+ /* Each record has page_no, length of page_no and header. */
+ ulint comp = dict_table_is_comp(index->table);
+ ulint rec_max_size = comp
+ ? REC_NODE_PTR_SIZE + 1 + REC_N_NEW_EXTRA_BYTES
+ + UT_BITS_IN_BYTES(index->n_nullable)
+ : REC_NODE_PTR_SIZE + 2 + REC_N_OLD_EXTRA_BYTES
+ + 2 * index->n_fields;
+
+ /* Compute the maximum possible record size. */
+ for (ulint i = 0; i < dict_index_get_n_unique_in_tree(index); i++) {
+ const dict_field_t* field
+ = dict_index_get_nth_field(index, i);
+ const dict_col_t* col
+ = dict_field_get_col(field);
+ ulint field_max_size;
+ ulint field_ext_max_size;
+
+ /* Determine the maximum length of the index field. */
+
+ field_max_size = dict_col_get_fixed_size(col, comp);
+ if (field_max_size) {
+ /* dict_index_add_col() should guarantee this */
+ ut_ad(!field->prefix_len
+ || field->fixed_len == field->prefix_len);
+ /* Fixed lengths are not encoded
+ in ROW_FORMAT=COMPACT. */
+ rec_max_size += field_max_size;
+ continue;
+ }
+
+ field_max_size = dict_col_get_max_size(col);
+ if (UNIV_UNLIKELY(!field_max_size)) {
+ switch (col->mtype) {
+ case DATA_VARCHAR:
+ if (!comp
+ && (!strcmp(index->table->name.m_name,
+ "SYS_FOREIGN")
+ || !strcmp(index->table->name.m_name,
+ "SYS_FOREIGN_COLS"))) {
+ break;
+ }
+ /* fall through */
+ case DATA_VARMYSQL:
+ case DATA_CHAR:
+ case DATA_MYSQL:
+ /* CHAR(0) and VARCHAR(0) are possible
+ data type definitions in MariaDB.
+ The InnoDB internal SQL parser maps
+ CHAR to DATA_VARCHAR, so DATA_CHAR (or
+ DATA_MYSQL) is only coming from the
+ MariaDB SQL layer. */
+ if (comp) {
+ /* Add a length byte, because
+ fixed-length empty field are
+ encoded as variable-length.
+ For ROW_FORMAT=REDUNDANT,
+ these bytes were added to
+ rec_max_size before this loop. */
+ rec_max_size++;
+ }
+ continue;
+ }
+
+ /* SYS_FOREIGN.ID is defined as CHAR in the
+ InnoDB internal SQL parser, which translates
+ into the incorrect VARCHAR(0). InnoDB does
+ not enforce maximum lengths of columns, so
+ that is why any data can be inserted in the
+ first place.
+
+ Likewise, SYS_FOREIGN.FOR_NAME,
+ SYS_FOREIGN.REF_NAME, SYS_FOREIGN_COLS.ID, are
+ defined as CHAR, and also they are part of a key. */
+
+ ut_ad(!strcmp(index->table->name.m_name,
+ "SYS_FOREIGN")
+ || !strcmp(index->table->name.m_name,
+ "SYS_FOREIGN_COLS"));
+ ut_ad(!comp);
+ ut_ad(col->mtype == DATA_VARCHAR);
+
+ rec_max_size += (srv_page_size == UNIV_PAGE_SIZE_MAX)
+ ? REDUNDANT_REC_MAX_DATA_SIZE
+ : page_get_free_space_of_empty(FALSE) / 2;
+ } else if (field_max_size == NAME_LEN && i == 1
+ && (!strcmp(index->table->name.m_name,
+ TABLE_STATS_NAME)
+ || !strcmp(index->table->name.m_name,
+ INDEX_STATS_NAME))) {
+ ut_ad(!strcmp(field->name, "table_name"));
+ /* Interpret "table_name" as VARCHAR(199) even
+ if it was incorrectly defined as VARCHAR(64).
+ While the caller of ha_innobase enforces the
+ maximum length on any data written, the InnoDB
+ internal SQL parser will happily write as much
+ data as is provided. The purpose of this hack
+ is to avoid InnoDB hangs after persistent
+ statistics on partitioned tables are
+ deleted. */
+ field_max_size = 199 * SYSTEM_CHARSET_MBMAXLEN;
+ }
+ field_ext_max_size = field_max_size < 256 ? 1 : 2;
+
+ if (field->prefix_len
+ && field->prefix_len < field_max_size) {
+ field_max_size = field->prefix_len;
+ }
+
+ if (comp) {
+ /* Add the extra size for ROW_FORMAT=COMPACT.
+ For ROW_FORMAT=REDUNDANT, these bytes were
+ added to rec_max_size before this loop. */
+ rec_max_size += field_ext_max_size;
+ }
+
+ rec_max_size += field_max_size;
+ }
+
+ return rec_max_size;
}
/********************************************************************//**
@@ -347,7 +863,6 @@ If mode is PAGE_CUR_LE , cursor is left at the place where an insert of the
search tuple should be performed in the B-tree. InnoDB does an insert
immediately after the cursor. Thus, the cursor may end up on a user record,
or on a page infimum record. */
-UNIV_INTERN
dberr_t
btr_cur_search_to_nth_level(
/*========================*/
@@ -356,7 +871,7 @@ btr_cur_search_to_nth_level(
const dtuple_t* tuple, /*!< in: data tuple; NOTE: n_fields_cmp in
tuple must be set so that it cannot get
compared to the node ptr page number field! */
- ulint mode, /*!< in: PAGE_CUR_L, ...;
+ page_cur_mode_t mode, /*!< in: PAGE_CUR_L, ...;
Inserts should always be made using
PAGE_CUR_LE to search the position! */
ulint latch_mode, /*!< in: BTR_SEARCH_LEAF, ..., ORed with
@@ -372,45 +887,73 @@ btr_cur_search_to_nth_level(
to protect the record! */
btr_cur_t* cursor, /*!< in/out: tree cursor; the cursor page is
s- or x-latched, but see also above! */
- ulint has_search_latch,/*!< in: info on the latch mode the
- caller currently has on btr_search_latch:
+ ulint has_search_latch,
+ /*!< in: info on the latch mode the
+ caller currently has on search system:
RW_S_LATCH, or 0 */
const char* file, /*!< in: file name */
- ulint line, /*!< in: line where called */
- mtr_t* mtr) /*!< in: mtr */
+ unsigned line, /*!< in: line where called */
+ mtr_t* mtr, /*!< in: mtr */
+ ib_uint64_t autoinc)/*!< in: PAGE_ROOT_AUTO_INC to be written
+ (0 if none) */
{
- page_t* page;
+ page_t* page = NULL; /* remove warning */
buf_block_t* block;
- ulint space;
buf_block_t* guess;
ulint height;
- ulint page_no;
ulint up_match;
ulint up_bytes;
ulint low_match;
ulint low_bytes;
ulint savepoint;
ulint rw_latch;
- ulint page_mode;
+ page_cur_mode_t page_mode;
+ page_cur_mode_t search_mode = PAGE_CUR_UNSUPP;
ulint buf_mode;
ulint estimate;
- ulint zip_size;
+ ulint node_ptr_max_size = UNIV_PAGE_SIZE / 2;
page_cur_t* page_cursor;
btr_op_t btr_op;
ulint root_height = 0; /* remove warning */
dberr_t err = DB_SUCCESS;
+ ulint upper_rw_latch, root_leaf_rw_latch;
+ btr_intention_t lock_intention;
+ bool modify_external;
+ buf_block_t* tree_blocks[BTR_MAX_LEVELS];
+ ulint tree_savepoints[BTR_MAX_LEVELS];
+ ulint n_blocks = 0;
+ ulint n_releases = 0;
+ bool detected_same_key_root = false;
+
+ bool retrying_for_search_prev = false;
+ ulint leftmost_from_level = 0;
+ buf_block_t** prev_tree_blocks = NULL;
+ ulint* prev_tree_savepoints = NULL;
+ ulint prev_n_blocks = 0;
+ ulint prev_n_releases = 0;
+ bool need_path = true;
+ bool rtree_parent_modified = false;
+ bool mbr_adj = false;
+ bool found = false;
+
+ DBUG_ENTER("btr_cur_search_to_nth_level");
+
#ifdef BTR_CUR_ADAPT
btr_search_t* info;
-#endif
+#endif /* BTR_CUR_ADAPT */
mem_heap_t* heap = NULL;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- ulint* offsets = offsets_;
+ offset_t offsets_[REC_OFFS_NORMAL_SIZE];
+ offset_t* offsets = offsets_;
+ offset_t offsets2_[REC_OFFS_NORMAL_SIZE];
+ offset_t* offsets2 = offsets2_;
rec_offs_init(offsets_);
+ rec_offs_init(offsets2_);
/* Currently, PAGE_CUR_LE is the only search mode used for searches
ending to upper levels */
- ut_ad(level == 0 || mode == PAGE_CUR_LE);
+ ut_ad(level == 0 || mode == PAGE_CUR_LE
+ || RTREE_SEARCH_MODE(mode));
ut_ad(dict_index_check_search_tuple(index, tuple));
ut_ad(!dict_index_is_ibuf(index) || ibuf_inside(mtr));
ut_ad(dtuple_check_typed(tuple));
@@ -424,15 +967,18 @@ btr_cur_search_to_nth_level(
#ifdef UNIV_DEBUG
cursor->up_match = ULINT_UNDEFINED;
cursor->low_match = ULINT_UNDEFINED;
-#endif
+#endif /* UNIV_DEBUG */
ibool s_latch_by_caller;
s_latch_by_caller = latch_mode & BTR_ALREADY_S_LATCHED;
ut_ad(!s_latch_by_caller
- || mtr_memo_contains(mtr, dict_index_get_lock(index),
- MTR_MEMO_S_LOCK));
+ || srv_read_only_mode
+ || mtr_memo_contains_flagged(mtr,
+ dict_index_get_lock(index),
+ MTR_MEMO_S_LOCK
+ | MTR_MEMO_SX_LOCK));
/* These flags are mutually exclusive, they are lumped together
with the latch mode for historical reasons. It's possible for
@@ -465,16 +1011,33 @@ btr_cur_search_to_nth_level(
ut_ad(btr_op == BTR_NO_OP || !dict_index_is_ibuf(index));
/* Operations on the clustered index cannot be buffered. */
ut_ad(btr_op == BTR_NO_OP || !dict_index_is_clust(index));
+ /* Operations on the temporary table(indexes) cannot be buffered. */
+ ut_ad(btr_op == BTR_NO_OP || !dict_table_is_temporary(index->table));
+ /* Operation on the spatial index cannot be buffered. */
+ ut_ad(btr_op == BTR_NO_OP || !dict_index_is_spatial(index));
estimate = latch_mode & BTR_ESTIMATE;
+ lock_intention = btr_cur_get_and_clear_intention(&latch_mode);
+
+ modify_external = latch_mode & BTR_MODIFY_EXTERNAL;
+
/* Turn the flags unrelated to the latch mode off. */
latch_mode = BTR_LATCH_MODE_WITHOUT_FLAGS(latch_mode);
+ ut_ad(!modify_external || latch_mode == BTR_MODIFY_LEAF);
+
ut_ad(!s_latch_by_caller
|| latch_mode == BTR_SEARCH_LEAF
+ || latch_mode == BTR_SEARCH_TREE
|| latch_mode == BTR_MODIFY_LEAF);
+ ut_ad(autoinc == 0 || dict_index_is_clust(index));
+ ut_ad(autoinc == 0
+ || latch_mode == BTR_MODIFY_TREE
+ || latch_mode == BTR_MODIFY_LEAF);
+ ut_ad(autoinc == 0 || level == 0);
+
cursor->flag = BTR_CUR_BINARY;
cursor->index = index;
@@ -483,24 +1046,35 @@ btr_cur_search_to_nth_level(
#else
info = btr_search_get_info(index);
- guess = info->root_guess;
+ if (!buf_pool_is_obsolete(info->withdraw_clock)) {
+ guess = info->root_guess;
+ } else {
+ guess = NULL;
+ }
#ifdef BTR_CUR_HASH_ADAPT
# ifdef UNIV_SEARCH_PERF_STAT
info->n_searches++;
# endif
- if (rw_lock_get_writer(&btr_search_latch) == RW_LOCK_NOT_LOCKED
+ if (autoinc == 0
&& latch_mode <= BTR_MODIFY_LEAF
&& info->last_hash_succ
+# ifdef MYSQL_INDEX_DISABLE_AHI
+ && !index->disable_ahi
+# endif
&& !estimate
# ifdef PAGE_CUR_LE_OR_EXTENDS
&& mode != PAGE_CUR_LE_OR_EXTENDS
# endif /* PAGE_CUR_LE_OR_EXTENDS */
+ && !dict_index_is_spatial(index)
/* If !has_search_latch, we do a dirty read of
btr_search_enabled below, and btr_search_guess_on_hash()
will have to check it again. */
- && UNIV_LIKELY(btr_search_enabled)
+ && btr_search_enabled
+ && !modify_external
+ && rw_lock_get_writer(btr_get_search_latch(index))
+ == RW_LOCK_NOT_LOCKED
&& btr_search_guess_on_hash(index, info, tuple, mode,
latch_mode, cursor,
has_search_latch, mtr)) {
@@ -515,7 +1089,7 @@ btr_cur_search_to_nth_level(
|| mode != PAGE_CUR_LE);
btr_cur_n_sea++;
- return err;
+ DBUG_RETURN(err);
}
# endif /* BTR_CUR_HASH_ADAPT */
#endif /* BTR_CUR_ADAPT */
@@ -526,7 +1100,7 @@ btr_cur_search_to_nth_level(
if (has_search_latch) {
/* Release possible search latch to obey latching order */
- rw_lock_s_unlock(&btr_search_latch);
+ btr_search_s_unlock(index);
}
/* Store the position of the tree latch we push to mtr so that we
@@ -536,23 +1110,76 @@ btr_cur_search_to_nth_level(
switch (latch_mode) {
case BTR_MODIFY_TREE:
- mtr_x_lock(dict_index_get_lock(index), mtr);
+ /* Most of delete-intended operations are purging.
+ Free blocks and read IO bandwidth should be prior
+ for them, when the history list is glowing huge. */
+ if (lock_intention == BTR_INTENTION_DELETE
+ && trx_sys->rseg_history_len > BTR_CUR_FINE_HISTORY_LENGTH
+ && buf_get_n_pending_read_ios()) {
+ mtr_x_lock(dict_index_get_lock(index), mtr);
+ } else if (dict_index_is_spatial(index)
+ && lock_intention <= BTR_INTENTION_BOTH) {
+ /* X lock the if there is possibility of
+ pessimistic delete on spatial index. As we could
+ lock upward for the tree */
+
+ mtr_x_lock(dict_index_get_lock(index), mtr);
+ } else {
+ mtr_sx_lock(dict_index_get_lock(index), mtr);
+ }
+ upper_rw_latch = RW_X_LATCH;
break;
case BTR_CONT_MODIFY_TREE:
+ case BTR_CONT_SEARCH_TREE:
/* Do nothing */
- ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(index),
- MTR_MEMO_X_LOCK));
+ ut_ad(srv_read_only_mode
+ || mtr_memo_contains_flagged(mtr,
+ dict_index_get_lock(index),
+ MTR_MEMO_X_LOCK
+ | MTR_MEMO_SX_LOCK));
+ if (dict_index_is_spatial(index)
+ && latch_mode == BTR_CONT_MODIFY_TREE) {
+ /* If we are about to locating parent page for split
+ and/or merge operation for R-Tree index, X latch
+ the parent */
+ upper_rw_latch = RW_X_LATCH;
+ } else {
+ upper_rw_latch = RW_NO_LATCH;
+ }
break;
default:
- if (!s_latch_by_caller) {
- mtr_s_lock(dict_index_get_lock(index), mtr);
+ if (!srv_read_only_mode) {
+ if (s_latch_by_caller) {
+ ut_ad(rw_lock_own(dict_index_get_lock(index),
+ RW_LOCK_S));
+ } else if (!modify_external) {
+ /* BTR_SEARCH_TREE is intended to be used with
+ BTR_ALREADY_S_LATCHED */
+ ut_ad(latch_mode != BTR_SEARCH_TREE);
+
+ mtr_s_lock(dict_index_get_lock(index), mtr);
+ } else {
+ /* BTR_MODIFY_EXTERNAL needs to be excluded */
+ mtr_sx_lock(dict_index_get_lock(index), mtr);
+ }
+ upper_rw_latch = RW_S_LATCH;
+ } else {
+ upper_rw_latch = RW_NO_LATCH;
}
}
+ root_leaf_rw_latch = btr_cur_latch_for_root_leaf(latch_mode);
page_cursor = btr_cur_get_page_cur(cursor);
- space = dict_index_get_space(index);
- page_no = dict_index_get_page(index);
+ const ulint space = dict_index_get_space(index);
+ const page_size_t page_size(dict_table_page_size(index->table));
+
+ /* Start with the root page. */
+ page_id_t page_id(space, dict_index_get_page(index));
+
+ if (root_leaf_rw_latch == RW_X_LATCH) {
+ node_ptr_max_size = btr_node_ptr_max_size(index);
+ }
up_match = 0;
up_bytes = 0;
@@ -575,22 +1202,41 @@ btr_cur_search_to_nth_level(
default:
#ifdef PAGE_CUR_LE_OR_EXTENDS
ut_ad(mode == PAGE_CUR_L || mode == PAGE_CUR_LE
+ || RTREE_SEARCH_MODE(mode)
|| mode == PAGE_CUR_LE_OR_EXTENDS);
#else /* PAGE_CUR_LE_OR_EXTENDS */
- ut_ad(mode == PAGE_CUR_L || mode == PAGE_CUR_LE);
+ ut_ad(mode == PAGE_CUR_L || mode == PAGE_CUR_LE
+ || RTREE_SEARCH_MODE(mode));
#endif /* PAGE_CUR_LE_OR_EXTENDS */
page_mode = mode;
break;
}
/* Loop and search until we arrive at the desired level */
+ btr_latch_leaves_t latch_leaves = {{NULL, NULL, NULL}, {0, 0, 0}};
search_loop:
buf_mode = BUF_GET;
rw_latch = RW_NO_LATCH;
+ rtree_parent_modified = false;
if (height != 0) {
/* We are about to fetch the root or a non-leaf page. */
+ if ((latch_mode != BTR_MODIFY_TREE || height == level)
+ && !retrying_for_search_prev) {
+ /* If doesn't have SX or X latch of index,
+ each pages should be latched before reading. */
+ if (height == ULINT_UNDEFINED
+ && upper_rw_latch == RW_S_LATCH
+ && (modify_external || autoinc)) {
+ /* needs sx-latch of root page
+ for fseg operation or for writing
+ PAGE_ROOT_AUTO_INC */
+ rw_latch = RW_SX_LATCH;
+ } else {
+ rw_latch = upper_rw_latch;
+ }
+ }
} else if (latch_mode <= BTR_MODIFY_LEAF) {
rw_latch = latch_mode;
@@ -606,12 +1252,12 @@ search_loop:
}
}
- zip_size = dict_table_zip_size(index->table);
-
retry_page_get:
- block = buf_page_get_gen(
- space, zip_size, page_no, rw_latch, guess, buf_mode,
- file, line, mtr, &err);
+ ut_ad(n_blocks < BTR_MAX_LEVELS);
+ tree_savepoints[n_blocks] = mtr_set_savepoint(mtr);
+ block = buf_page_get_gen(page_id, page_size, rw_latch, guess,
+ buf_mode, file, line, mtr, &err);
+ tree_blocks[n_blocks] = block;
/* Note that block==NULL signifies either an error or change
buffering. */
@@ -624,7 +1270,7 @@ retry_page_get:
"Table %s is encrypted but encryption service or"
" used key_id is not available. "
" Can't continue reading table.",
- index->table->name);
+ index->table->name.m_name);
index->table->file_unreadable = true;
}
@@ -642,10 +1288,10 @@ retry_page_get:
case BTR_INSERT_OP:
case BTR_INSERT_IGNORE_UNIQUE_OP:
ut_ad(buf_mode == BUF_GET_IF_IN_POOL);
+ ut_ad(!dict_index_is_spatial(index));
if (ibuf_insert(IBUF_OP_INSERT, tuple, index,
- space, zip_size, page_no,
- cursor->thr)) {
+ page_id, page_size, cursor->thr)) {
cursor->flag = BTR_CUR_INSERT_TO_IBUF;
@@ -655,10 +1301,11 @@ retry_page_get:
case BTR_DELMARK_OP:
ut_ad(buf_mode == BUF_GET_IF_IN_POOL);
+ ut_ad(!dict_index_is_spatial(index));
if (ibuf_insert(IBUF_OP_DELETE_MARK, tuple,
- index, space, zip_size,
- page_no, cursor->thr)) {
+ index, page_id, page_size,
+ cursor->thr)) {
cursor->flag = BTR_CUR_DEL_MARK_IBUF;
@@ -669,6 +1316,7 @@ retry_page_get:
case BTR_DELETE_OP:
ut_ad(buf_mode == BUF_GET_IF_IN_POOL_OR_WATCH);
+ ut_ad(!dict_index_is_spatial(index));
if (!row_purge_poss_sec(cursor->purge_node,
index, tuple)) {
@@ -676,19 +1324,18 @@ retry_page_get:
/* The record cannot be purged yet. */
cursor->flag = BTR_CUR_DELETE_REF;
} else if (ibuf_insert(IBUF_OP_DELETE, tuple,
- index, space, zip_size,
- page_no,
+ index, page_id, page_size,
cursor->thr)) {
/* The purge was buffered. */
cursor->flag = BTR_CUR_DELETE_IBUF;
} else {
/* The purge could not be buffered. */
- buf_pool_watch_unset(space, page_no);
+ buf_pool_watch_unset(page_id);
break;
}
- buf_pool_watch_unset(space, page_no);
+ buf_pool_watch_unset(page_id);
goto func_exit;
default:
@@ -703,9 +1350,98 @@ retry_page_get:
goto retry_page_get;
}
- block->check_index_page_at_flush = TRUE;
+ if (retrying_for_search_prev && height != 0) {
+ /* also latch left sibling */
+ ulint left_page_no;
+ buf_block_t* get_block;
+
+ ut_ad(rw_latch == RW_NO_LATCH);
+
+ rw_latch = upper_rw_latch;
+
+ rw_lock_s_lock(&block->lock);
+ left_page_no = btr_page_get_prev(buf_block_get_frame(block));
+ rw_lock_s_unlock(&block->lock);
+
+ if (left_page_no != FIL_NULL) {
+ ut_ad(prev_n_blocks < leftmost_from_level);
+
+ prev_tree_savepoints[prev_n_blocks]
+ = mtr_set_savepoint(mtr);
+ get_block = buf_page_get_gen(
+ page_id_t(page_id.space(), left_page_no),
+ page_size, rw_latch, NULL, buf_mode,
+ file, line, mtr, &err);
+ prev_tree_blocks[prev_n_blocks] = get_block;
+ prev_n_blocks++;
+
+ if (err != DB_SUCCESS) {
+ if (err == DB_DECRYPTION_FAILED) {
+ ib_push_warning((void *)NULL,
+ DB_DECRYPTION_FAILED,
+ "Table %s is encrypted but encryption service or"
+ " used key_id is not available. "
+ " Can't continue reading table.",
+ index->table->name.m_name);
+ index->table->file_unreadable = true;
+ }
+
+ goto func_exit;
+ }
+
+ /* BTR_MODIFY_TREE doesn't update prev/next_page_no,
+ without their parent page's lock. So, not needed to
+ retry here, because we have the parent page's lock. */
+ }
+
+ /* release RW_NO_LATCH page and lock with RW_S_LATCH */
+ mtr_release_block_at_savepoint(
+ mtr, tree_savepoints[n_blocks],
+ tree_blocks[n_blocks]);
+
+ tree_savepoints[n_blocks] = mtr_set_savepoint(mtr);
+ block = buf_page_get_gen(page_id, page_size, rw_latch, NULL,
+ buf_mode, file, line, mtr, &err);
+ tree_blocks[n_blocks] = block;
+
+ if (err != DB_SUCCESS) {
+ if (err == DB_DECRYPTION_FAILED) {
+ ib_push_warning((void *)NULL,
+ DB_DECRYPTION_FAILED,
+ "Table %s is encrypted but encryption service or"
+ " used key_id is not available. "
+ " Can't continue reading table.",
+ index->table->name.m_name);
+ index->table->file_unreadable = true;
+ }
+
+ goto func_exit;
+ }
+ }
+
page = buf_block_get_frame(block);
+ if (height == ULINT_UNDEFINED
+ && page_is_leaf(page)
+ && rw_latch != RW_NO_LATCH
+ && rw_latch != root_leaf_rw_latch) {
+ /* The root page is also a leaf page (root_leaf).
+ We should reacquire the page, because the root page
+ is latched differently from leaf pages. */
+ ut_ad(root_leaf_rw_latch != RW_NO_LATCH);
+ ut_ad(rw_latch == RW_S_LATCH || rw_latch == RW_SX_LATCH);
+ ut_ad(rw_latch == RW_S_LATCH || modify_external || autoinc);
+ ut_ad(!autoinc || root_leaf_rw_latch == RW_X_LATCH);
+
+ ut_ad(n_blocks == 0);
+ mtr_release_block_at_savepoint(
+ mtr, tree_savepoints[n_blocks],
+ tree_blocks[n_blocks]);
+
+ upper_rw_latch = root_leaf_rw_latch;
+ goto search_loop;
+ }
+
if (rw_latch != RW_NO_LATCH) {
#ifdef UNIV_ZIP_DEBUG
const page_zip_des_t* page_zip
@@ -718,7 +1454,7 @@ retry_page_get:
? SYNC_IBUF_TREE_NODE : SYNC_TREE_NODE);
}
- ut_ad(fil_page_get_type(page) == FIL_PAGE_INDEX);
+ ut_ad(fil_page_index_page_check(page));
ut_ad(index->id == btr_page_get_index_id(page));
if (UNIV_UNLIKELY(height == ULINT_UNDEFINED)) {
@@ -728,9 +1464,32 @@ retry_page_get:
root_height = height;
cursor->tree_height = root_height + 1;
+ if (dict_index_is_spatial(index)) {
+ ut_ad(cursor->rtr_info);
+
+ node_seq_t seq_no = rtr_get_current_ssn_id(index);
+
+ /* If SSN in memory is not initialized, fetch
+ it from root page */
+ if (seq_no < 1) {
+ node_seq_t root_seq_no;
+
+ root_seq_no = page_get_ssn_id(page);
+
+ mutex_enter(&(index->rtr_ssn.mutex));
+ index->rtr_ssn.seq_no = root_seq_no + 1;
+ mutex_exit(&(index->rtr_ssn.mutex));
+ }
+
+ /* Save the MBR */
+ cursor->rtr_info->thr = cursor->thr;
+ rtr_get_mbr_from_tuple(tuple, &cursor->rtr_info->mbr);
+ }
+
#ifdef BTR_CUR_ADAPT
if (block != guess) {
info->root_guess = block;
+ info->withdraw_clock = buf_withdraw_clock;
}
#endif
}
@@ -738,30 +1497,155 @@ retry_page_get:
if (height == 0) {
if (rw_latch == RW_NO_LATCH) {
- btr_cur_latch_leaves(
- page, space, zip_size, page_no, latch_mode,
+ latch_leaves = btr_cur_latch_leaves(
+ block, page_id, page_size, latch_mode,
cursor, mtr);
}
switch (latch_mode) {
case BTR_MODIFY_TREE:
case BTR_CONT_MODIFY_TREE:
+ case BTR_CONT_SEARCH_TREE:
break;
default:
- if (!s_latch_by_caller) {
+ if (!s_latch_by_caller
+ && !srv_read_only_mode
+ && !modify_external) {
/* Release the tree s-latch */
+ /* NOTE: BTR_MODIFY_EXTERNAL
+ needs to keep tree sx-latch */
mtr_release_s_latch_at_savepoint(
mtr, savepoint,
dict_index_get_lock(index));
}
+
+ /* release upper blocks */
+ if (retrying_for_search_prev) {
+ ut_ad(!autoinc);
+ for (;
+ prev_n_releases < prev_n_blocks;
+ prev_n_releases++) {
+ mtr_release_block_at_savepoint(
+ mtr,
+ prev_tree_savepoints[
+ prev_n_releases],
+ prev_tree_blocks[
+ prev_n_releases]);
+ }
+ }
+
+ for (; n_releases < n_blocks; n_releases++) {
+ if (n_releases == 0
+ && (modify_external || autoinc)) {
+ /* keep the root page latch */
+ ut_ad(mtr_memo_contains_flagged(
+ mtr, tree_blocks[n_releases],
+ MTR_MEMO_PAGE_SX_FIX
+ | MTR_MEMO_PAGE_X_FIX));
+ continue;
+ }
+
+ mtr_release_block_at_savepoint(
+ mtr, tree_savepoints[n_releases],
+ tree_blocks[n_releases]);
+ }
}
page_mode = mode;
}
- page_cur_search_with_match(
- block, index, tuple, page_mode, &up_match, &up_bytes,
- &low_match, &low_bytes, page_cursor);
+ if (dict_index_is_spatial(index)) {
+ /* Remember the page search mode */
+ search_mode = page_mode;
+
+ /* Some adjustment on search mode, when the
+ page search mode is PAGE_CUR_RTREE_LOCATE
+ or PAGE_CUR_RTREE_INSERT, as we are searching
+ with MBRs. When it is not the target level, we
+ should search all sub-trees that "CONTAIN" the
+ search range/MBR. When it is at the target
+ level, the search becomes PAGE_CUR_LE */
+ if (page_mode == PAGE_CUR_RTREE_LOCATE
+ && level == height) {
+ if (level == 0) {
+ page_mode = PAGE_CUR_LE;
+ } else {
+ page_mode = PAGE_CUR_RTREE_GET_FATHER;
+ }
+ }
+
+ if (page_mode == PAGE_CUR_RTREE_INSERT) {
+ page_mode = (level == height)
+ ? PAGE_CUR_LE
+ : PAGE_CUR_RTREE_INSERT;
+
+ ut_ad(!page_is_leaf(page) || page_mode == PAGE_CUR_LE);
+ }
+
+ /* "need_path" indicates if we need to tracking the parent
+ pages, if it is not spatial comparison, then no need to
+ track it */
+ if (page_mode < PAGE_CUR_CONTAIN) {
+ need_path = false;
+ }
+
+ up_match = 0;
+ low_match = 0;
+
+ if (latch_mode == BTR_MODIFY_TREE
+ || latch_mode == BTR_CONT_MODIFY_TREE
+ || latch_mode == BTR_CONT_SEARCH_TREE) {
+ /* Tree are locked, no need for Page Lock to protect
+ the "path" */
+ cursor->rtr_info->need_page_lock = false;
+ }
+ }
+
+ if (dict_index_is_spatial(index) && page_mode >= PAGE_CUR_CONTAIN) {
+ ut_ad(need_path);
+ found = rtr_cur_search_with_match(
+ block, index, tuple, page_mode, page_cursor,
+ cursor->rtr_info);
+
+ /* Need to use BTR_MODIFY_TREE to do the MBR adjustment */
+ if (search_mode == PAGE_CUR_RTREE_INSERT
+ && cursor->rtr_info->mbr_adj) {
+ if (latch_mode & BTR_MODIFY_LEAF) {
+ /* Parent MBR needs updated, should retry
+ with BTR_MODIFY_TREE */
+ goto func_exit;
+ } else if (latch_mode & BTR_MODIFY_TREE) {
+ rtree_parent_modified = true;
+ cursor->rtr_info->mbr_adj = false;
+ mbr_adj = true;
+ } else {
+ ut_ad(0);
+ }
+ }
+
+ if (found && page_mode == PAGE_CUR_RTREE_GET_FATHER) {
+ cursor->low_match =
+ DICT_INDEX_SPATIAL_NODEPTR_SIZE + 1;
+ }
+#ifdef BTR_CUR_HASH_ADAPT
+ } else if (height == 0 && btr_search_enabled
+ && !dict_index_is_spatial(index)) {
+ /* The adaptive hash index is only used when searching
+ for leaf pages (height==0), but not in r-trees.
+ We only need the byte prefix comparison for the purpose
+ of updating the adaptive hash index. */
+ page_cur_search_with_match_bytes(
+ block, index, tuple, page_mode, &up_match, &up_bytes,
+ &low_match, &low_bytes, page_cursor);
+#endif /* BTR_CUR_HASH_ADAPT */
+ } else {
+ /* Search for complete index fields. */
+ up_bytes = low_bytes = 0;
+ page_cur_search_with_match(
+ block, index, tuple, page_mode, &up_match,
+ &low_match, page_cursor,
+ need_path ? cursor->rtr_info : NULL);
+ }
if (estimate) {
btr_cur_add_path_info(cursor, height, root_height);
@@ -772,6 +1656,34 @@ retry_page_get:
ut_ad(height == btr_page_get_level(page_cur_get_page(page_cursor),
mtr));
+ /* Add Predicate lock if it is serializable isolation
+ and only if it is in the search case */
+ if (dict_index_is_spatial(index)
+ && cursor->rtr_info->need_prdt_lock
+ && mode != PAGE_CUR_RTREE_INSERT
+ && mode != PAGE_CUR_RTREE_LOCATE
+ && mode >= PAGE_CUR_CONTAIN) {
+ trx_t* trx = thr_get_trx(cursor->thr);
+ lock_prdt_t prdt;
+
+ lock_mutex_enter();
+ lock_init_prdt_from_mbr(
+ &prdt, &cursor->rtr_info->mbr, mode,
+ trx->lock.lock_heap);
+ lock_mutex_exit();
+
+ if (rw_latch == RW_NO_LATCH && height != 0) {
+ rw_lock_s_lock(&(block->lock));
+ }
+
+ lock_prdt_lock(block, &prdt, index, LOCK_S,
+ LOCK_PREDICATE, cursor->thr, mtr);
+
+ if (rw_latch == RW_NO_LATCH && height != 0) {
+ rw_lock_s_unlock(&(block->lock));
+ }
+ }
+
if (level != height) {
const rec_t* node_ptr;
@@ -782,11 +1694,292 @@ retry_page_get:
node_ptr = page_cur_get_rec(page_cursor);
- offsets = rec_get_offsets(
- node_ptr, index, offsets, ULINT_UNDEFINED, &heap);
+ offsets = rec_get_offsets(node_ptr, index, offsets, false,
+ ULINT_UNDEFINED, &heap);
+
+ /* If the rec is the first or last in the page for
+ pessimistic delete intention, it might cause node_ptr insert
+ for the upper level. We should change the intention and retry.
+ */
+ if (latch_mode == BTR_MODIFY_TREE
+ && btr_cur_need_opposite_intention(
+ page, lock_intention, node_ptr)) {
+
+need_opposite_intention:
+ ut_ad(upper_rw_latch == RW_X_LATCH);
+
+ if (n_releases > 0) {
+ /* release root block */
+ mtr_release_block_at_savepoint(
+ mtr, tree_savepoints[0],
+ tree_blocks[0]);
+ }
+
+ /* release all blocks */
+ for (; n_releases <= n_blocks; n_releases++) {
+ mtr_release_block_at_savepoint(
+ mtr, tree_savepoints[n_releases],
+ tree_blocks[n_releases]);
+ }
+
+ lock_intention = BTR_INTENTION_BOTH;
+
+ page_id = page_id_t(space, dict_index_get_page(index));
+ up_match = 0;
+ low_match = 0;
+ height = ULINT_UNDEFINED;
+
+ n_blocks = 0;
+ n_releases = 0;
+
+ goto search_loop;
+ }
+
+ if (dict_index_is_spatial(index)) {
+ if (page_rec_is_supremum(node_ptr)) {
+ cursor->low_match = 0;
+ cursor->up_match = 0;
+ goto func_exit;
+ }
+
+ /* If we are doing insertion or record locating,
+ remember the tree nodes we visited */
+ if (page_mode == PAGE_CUR_RTREE_INSERT
+ || (search_mode == PAGE_CUR_RTREE_LOCATE
+ && (latch_mode != BTR_MODIFY_LEAF))) {
+ bool add_latch = false;
+
+ if (latch_mode == BTR_MODIFY_TREE
+ && rw_latch == RW_NO_LATCH) {
+ ut_ad(mtr_memo_contains_flagged(
+ mtr, dict_index_get_lock(index),
+ MTR_MEMO_X_LOCK
+ | MTR_MEMO_SX_LOCK));
+ rw_lock_s_lock(&block->lock);
+ add_latch = true;
+ }
+
+ /* Store the parent cursor location */
+#ifdef UNIV_DEBUG
+ ulint num_stored = rtr_store_parent_path(
+ block, cursor, latch_mode,
+ height + 1, mtr);
+#else
+ rtr_store_parent_path(
+ block, cursor, latch_mode,
+ height + 1, mtr);
+#endif
+
+ if (page_mode == PAGE_CUR_RTREE_INSERT) {
+ btr_pcur_t* r_cursor =
+ rtr_get_parent_cursor(
+ cursor, height + 1,
+ true);
+ /* If it is insertion, there should
+ be only one parent for each level
+ traverse */
+#ifdef UNIV_DEBUG
+ ut_ad(num_stored == 1);
+#endif
+
+ node_ptr = btr_pcur_get_rec(r_cursor);
+
+ }
+
+ if (add_latch) {
+ rw_lock_s_unlock(&block->lock);
+ }
+
+ ut_ad(!page_rec_is_supremum(node_ptr));
+ }
+
+ ut_ad(page_mode == search_mode
+ || (page_mode == PAGE_CUR_WITHIN
+ && search_mode == PAGE_CUR_RTREE_LOCATE));
+
+ page_mode = search_mode;
+ }
+
+ /* If the first or the last record of the page
+ or the same key value to the first record or last record,
+ the another page might be choosen when BTR_CONT_MODIFY_TREE.
+ So, the parent page should not released to avoiding deadlock
+ with blocking the another search with the same key value. */
+ if (!detected_same_key_root
+ && lock_intention == BTR_INTENTION_BOTH
+ && !dict_index_is_unique(index)
+ && latch_mode == BTR_MODIFY_TREE
+ && (up_match >= rec_offs_n_fields(offsets) - 1
+ || low_match >= rec_offs_n_fields(offsets) - 1)) {
+ const rec_t* first_rec = page_rec_get_next_const(
+ page_get_infimum_rec(page));
+ ulint matched_fields;
+
+ ut_ad(upper_rw_latch == RW_X_LATCH);
+
+ if (node_ptr == first_rec
+ || page_rec_is_last(node_ptr, page)) {
+ detected_same_key_root = true;
+ } else {
+ matched_fields = 0;
+
+ offsets2 = rec_get_offsets(
+ first_rec, index, offsets2,
+ false, ULINT_UNDEFINED, &heap);
+ cmp_rec_rec(node_ptr, first_rec,
+ offsets, offsets2, index, false,
+ &matched_fields);
+
+ if (matched_fields
+ >= rec_offs_n_fields(offsets) - 1) {
+ detected_same_key_root = true;
+ } else {
+ const rec_t* last_rec;
+
+ last_rec = page_rec_get_prev_const(
+ page_get_supremum_rec(page));
+
+ matched_fields = 0;
+
+ offsets2 = rec_get_offsets(
+ last_rec, index, offsets2,
+ false, ULINT_UNDEFINED, &heap);
+ cmp_rec_rec(
+ node_ptr, last_rec,
+ offsets, offsets2, index,
+ false, &matched_fields);
+ if (matched_fields
+ >= rec_offs_n_fields(offsets) - 1) {
+ detected_same_key_root = true;
+ }
+ }
+ }
+ }
+
+ /* If the page might cause modify_tree,
+ we should not release the parent page's lock. */
+ if (!detected_same_key_root
+ && latch_mode == BTR_MODIFY_TREE
+ && !btr_cur_will_modify_tree(
+ index, page, lock_intention, node_ptr,
+ node_ptr_max_size, page_size, mtr)
+ && !rtree_parent_modified) {
+ ut_ad(upper_rw_latch == RW_X_LATCH);
+ ut_ad(n_releases <= n_blocks);
+
+ /* we can release upper blocks */
+ for (; n_releases < n_blocks; n_releases++) {
+ if (n_releases == 0) {
+ /* we should not release root page
+ to pin to same block. */
+ continue;
+ }
+
+ /* release unused blocks to unpin */
+ mtr_release_block_at_savepoint(
+ mtr, tree_savepoints[n_releases],
+ tree_blocks[n_releases]);
+ }
+ }
+
+ if (height == level
+ && latch_mode == BTR_MODIFY_TREE) {
+ ut_ad(upper_rw_latch == RW_X_LATCH);
+ /* we should sx-latch root page, if released already.
+ It contains seg_header. */
+ if (n_releases > 0) {
+ mtr_block_sx_latch_at_savepoint(
+ mtr, tree_savepoints[0],
+ tree_blocks[0]);
+ }
+
+ /* x-latch the branch blocks not released yet. */
+ for (ulint i = n_releases; i <= n_blocks; i++) {
+ mtr_block_x_latch_at_savepoint(
+ mtr, tree_savepoints[i],
+ tree_blocks[i]);
+ }
+ }
+
+ /* We should consider prev_page of parent page, if the node_ptr
+ is the leftmost of the page. because BTR_SEARCH_PREV and
+ BTR_MODIFY_PREV latches prev_page of the leaf page. */
+ if ((latch_mode == BTR_SEARCH_PREV
+ || latch_mode == BTR_MODIFY_PREV)
+ && !retrying_for_search_prev) {
+ /* block should be latched for consistent
+ btr_page_get_prev() */
+ ut_ad(mtr_memo_contains_flagged(mtr, block,
+ MTR_MEMO_PAGE_S_FIX
+ | MTR_MEMO_PAGE_X_FIX));
+
+ if (page_has_prev(page)
+ && page_rec_is_first(node_ptr, page)) {
+
+ if (leftmost_from_level == 0) {
+ leftmost_from_level = height + 1;
+ }
+ } else {
+ leftmost_from_level = 0;
+ }
+
+ if (height == 0 && leftmost_from_level > 0) {
+ /* should retry to get also prev_page
+ from level==leftmost_from_level. */
+ retrying_for_search_prev = true;
+
+ prev_tree_blocks = static_cast<buf_block_t**>(
+ ut_malloc_nokey(sizeof(buf_block_t*)
+ * leftmost_from_level));
+
+ prev_tree_savepoints = static_cast<ulint*>(
+ ut_malloc_nokey(sizeof(ulint)
+ * leftmost_from_level));
+
+ /* back to the level (leftmost_from_level+1) */
+ ulint idx = n_blocks
+ - (leftmost_from_level - 1);
+
+ page_id = page_id_t(
+ space,
+ tree_blocks[idx]->page.id.page_no());
+
+ for (ulint i = n_blocks
+ - (leftmost_from_level - 1);
+ i <= n_blocks; i++) {
+ mtr_release_block_at_savepoint(
+ mtr, tree_savepoints[i],
+ tree_blocks[i]);
+ }
+
+ n_blocks -= (leftmost_from_level - 1);
+ height = leftmost_from_level;
+ ut_ad(n_releases == 0);
+
+ /* replay up_match, low_match */
+ up_match = 0;
+ low_match = 0;
+ rtr_info_t* rtr_info = need_path
+ ? cursor->rtr_info : NULL;
+
+ for (ulint i = 0; i < n_blocks; i++) {
+ page_cur_search_with_match(
+ tree_blocks[i], index, tuple,
+ page_mode, &up_match,
+ &low_match, page_cursor,
+ rtr_info);
+ }
+
+ goto search_loop;
+ }
+ }
/* Go to the child node */
- page_no = btr_node_ptr_get_child_page_no(node_ptr, offsets);
+ page_id = page_id_t(
+ space,
+ btr_node_ptr_get_child_page_no(node_ptr, offsets));
+
+ n_blocks++;
if (UNIV_UNLIKELY(height == 0 && dict_index_is_ibuf(index))) {
/* We're doing a search on an ibuf tree and we're one
@@ -799,31 +1992,136 @@ retry_page_get:
goto retry_page_get;
}
+ if (dict_index_is_spatial(index)
+ && page_mode >= PAGE_CUR_CONTAIN
+ && page_mode != PAGE_CUR_RTREE_INSERT) {
+ ut_ad(need_path);
+ rtr_node_path_t* path =
+ cursor->rtr_info->path;
+
+ if (!path->empty() && found) {
+ ut_ad(path->back().page_no
+ == page_id.page_no());
+ path->pop_back();
+#ifdef UNIV_DEBUG
+ if (page_mode == PAGE_CUR_RTREE_LOCATE
+ && (latch_mode != BTR_MODIFY_LEAF)) {
+ btr_pcur_t* cur
+ = cursor->rtr_info->parent_path->back(
+ ).cursor;
+ rec_t* my_node_ptr
+ = btr_pcur_get_rec(cur);
+
+ offsets = rec_get_offsets(
+ my_node_ptr, index, offsets,
+ false, ULINT_UNDEFINED, &heap);
+
+ ulint my_page_no
+ = btr_node_ptr_get_child_page_no(
+ my_node_ptr, offsets);
+
+ ut_ad(page_id.page_no() == my_page_no);
+ }
+#endif
+ }
+ }
+
goto search_loop;
+ } else if (!dict_index_is_spatial(index)
+ && latch_mode == BTR_MODIFY_TREE
+ && lock_intention == BTR_INTENTION_INSERT
+ && page_has_next(page)
+ && page_rec_is_last(page_cur_get_rec(page_cursor), page)) {
+
+ /* btr_insert_into_right_sibling() might cause
+ deleting node_ptr at upper level */
+
+ guess = NULL;
+
+ if (height == 0) {
+ /* release the leaf pages if latched */
+ for (uint i = 0; i < 3; i++) {
+ if (latch_leaves.blocks[i] != NULL) {
+ mtr_release_block_at_savepoint(
+ mtr, latch_leaves.savepoints[i],
+ latch_leaves.blocks[i]);
+ latch_leaves.blocks[i] = NULL;
+ }
+ }
+ }
+
+ goto need_opposite_intention;
}
if (level != 0) {
- /* x-latch the page */
- buf_block_t* child_block = btr_block_get(
- space, zip_size, page_no, RW_X_LATCH, index, mtr);
+ ut_ad(!autoinc);
+
+ if (upper_rw_latch == RW_NO_LATCH) {
+ /* latch the page */
+ buf_block_t* child_block;
+
+ if (latch_mode == BTR_CONT_MODIFY_TREE) {
+ child_block = btr_block_get(
+ page_id, page_size, RW_X_LATCH,
+ index, mtr);
+ } else {
+ ut_ad(latch_mode == BTR_CONT_SEARCH_TREE);
+ child_block = btr_block_get(
+ page_id, page_size, RW_SX_LATCH,
+ index, mtr);
+ }
+
+ btr_assert_not_corrupted(child_block, index);
+ } else {
+ ut_ad(mtr_memo_contains(mtr, block, upper_rw_latch));
+ btr_assert_not_corrupted(block, index);
+
+ if (s_latch_by_caller) {
+ ut_ad(latch_mode == BTR_SEARCH_TREE);
+ /* to exclude modifying tree operations
+ should sx-latch the index. */
+ ut_ad(mtr_memo_contains(
+ mtr, dict_index_get_lock(index),
+ MTR_MEMO_SX_LOCK));
+ /* because has sx-latch of index,
+ can release upper blocks. */
+ for (; n_releases < n_blocks; n_releases++) {
+ mtr_release_block_at_savepoint(
+ mtr,
+ tree_savepoints[n_releases],
+ tree_blocks[n_releases]);
+ }
+ }
+ }
- page = buf_block_get_frame(child_block);
- btr_assert_not_corrupted(child_block, index);
+ if (page_mode <= PAGE_CUR_LE) {
+ cursor->low_match = low_match;
+ cursor->up_match = up_match;
+ }
} else {
cursor->low_match = low_match;
cursor->low_bytes = low_bytes;
cursor->up_match = up_match;
cursor->up_bytes = up_bytes;
-#ifdef BTR_CUR_ADAPT
+ if (autoinc) {
+ page_set_autoinc(tree_blocks[0],
+ index, autoinc, mtr, false);
+ }
+
+#ifdef BTR_CUR_HASH_ADAPT
/* We do a dirty read of btr_search_enabled here. We
will properly check btr_search_enabled again in
btr_search_build_page_hash_index() before building a
- page hash index, while holding btr_search_latch. */
- if (btr_search_enabled) {
+ page hash index, while holding search latch. */
+ if (btr_search_enabled
+# ifdef MYSQL_INDEX_DISABLE_AHI
+ && !index->disable_ahi
+# endif
+ ) {
btr_search_info_update(index, cursor);
}
-#endif
+#endif /* BTR_CUR_HASH_ADAPT */
ut_ad(cursor->up_match != ULINT_UNDEFINED
|| mode != PAGE_CUR_GE);
ut_ad(cursor->up_match != ULINT_UNDEFINED
@@ -832,23 +2130,46 @@ retry_page_get:
|| mode != PAGE_CUR_LE);
}
+ /* For spatial index, remember what blocks are still latched */
+ if (dict_index_is_spatial(index)
+ && (latch_mode == BTR_MODIFY_TREE
+ || latch_mode == BTR_MODIFY_LEAF)) {
+ for (ulint i = 0; i < n_releases; i++) {
+ cursor->rtr_info->tree_blocks[i] = NULL;
+ cursor->rtr_info->tree_savepoints[i] = 0;
+ }
+
+ for (ulint i = n_releases; i <= n_blocks; i++) {
+ cursor->rtr_info->tree_blocks[i] = tree_blocks[i];
+ cursor->rtr_info->tree_savepoints[i] = tree_savepoints[i];
+ }
+ }
+
func_exit:
if (UNIV_LIKELY_NULL(heap)) {
mem_heap_free(heap);
}
+ if (retrying_for_search_prev) {
+ ut_free(prev_tree_blocks);
+ ut_free(prev_tree_savepoints);
+ }
+
if (has_search_latch) {
+ btr_search_s_lock(index);
+ }
- rw_lock_s_lock(&btr_search_latch);
+ if (mbr_adj) {
+ /* remember that we will need to adjust parent MBR */
+ cursor->rtr_info->mbr_adj = true;
}
- return err;
+ DBUG_RETURN(err);
}
/*****************************************************************//**
Opens a cursor at either end of an index. */
-UNIV_INTERN
dberr_t
btr_cur_open_at_index_side_func(
/*============================*/
@@ -860,21 +2181,25 @@ btr_cur_open_at_index_side_func(
ulint level, /*!< in: level to search for
(0=leaf). */
const char* file, /*!< in: file name */
- ulint line, /*!< in: line where called */
+ unsigned line, /*!< in: line where called */
mtr_t* mtr) /*!< in/out: mini-transaction */
{
page_cur_t* page_cursor;
- ulint page_no;
- ulint space;
- ulint zip_size;
+ ulint node_ptr_max_size = UNIV_PAGE_SIZE / 2;
ulint height;
ulint root_height = 0; /* remove warning */
rec_t* node_ptr;
ulint estimate;
ulint savepoint;
+ ulint upper_rw_latch, root_leaf_rw_latch;
+ btr_intention_t lock_intention;
+ buf_block_t* tree_blocks[BTR_MAX_LEVELS];
+ ulint tree_savepoints[BTR_MAX_LEVELS];
+ ulint n_blocks = 0;
+ ulint n_releases = 0;
mem_heap_t* heap = NULL;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- ulint* offsets = offsets_;
+ offset_t offsets_[REC_OFFS_NORMAL_SIZE];
+ offset_t* offsets = offsets_;
dberr_t err = DB_SUCCESS;
rec_offs_init(offsets_);
@@ -884,6 +2209,22 @@ btr_cur_open_at_index_side_func(
ut_ad(level != ULINT_UNDEFINED);
+ bool s_latch_by_caller;
+
+ s_latch_by_caller = latch_mode & BTR_ALREADY_S_LATCHED;
+ latch_mode &= ~BTR_ALREADY_S_LATCHED;
+
+ lock_intention = btr_cur_get_and_clear_intention(&latch_mode);
+
+ ut_ad(!(latch_mode & BTR_MODIFY_EXTERNAL));
+
+ /* This function doesn't need to lock left page of the leaf page */
+ if (latch_mode == BTR_SEARCH_PREV) {
+ latch_mode = BTR_SEARCH_LEAF;
+ } else if (latch_mode == BTR_MODIFY_PREV) {
+ latch_mode = BTR_MODIFY_LEAF;
+ }
+
/* Store the position of the tree latch we push to mtr so that we
know how to release it when we have latched the leaf node */
@@ -891,37 +2232,75 @@ btr_cur_open_at_index_side_func(
switch (latch_mode) {
case BTR_CONT_MODIFY_TREE:
+ case BTR_CONT_SEARCH_TREE:
+ upper_rw_latch = RW_NO_LATCH;
break;
case BTR_MODIFY_TREE:
- mtr_x_lock(dict_index_get_lock(index), mtr);
- break;
- case BTR_SEARCH_LEAF | BTR_ALREADY_S_LATCHED:
- case BTR_MODIFY_LEAF | BTR_ALREADY_S_LATCHED:
- ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(index),
- MTR_MEMO_S_LOCK));
+ /* Most of delete-intended operations are purging.
+ Free blocks and read IO bandwidth should be prior
+ for them, when the history list is glowing huge. */
+ if (lock_intention == BTR_INTENTION_DELETE
+ && trx_sys->rseg_history_len > BTR_CUR_FINE_HISTORY_LENGTH
+ && buf_get_n_pending_read_ios()) {
+ mtr_x_lock(dict_index_get_lock(index), mtr);
+ } else {
+ mtr_sx_lock(dict_index_get_lock(index), mtr);
+ }
+ upper_rw_latch = RW_X_LATCH;
break;
default:
- mtr_s_lock(dict_index_get_lock(index), mtr);
+ ut_ad(!s_latch_by_caller
+ || mtr_memo_contains_flagged(mtr,
+ dict_index_get_lock(index),
+ MTR_MEMO_SX_LOCK
+ | MTR_MEMO_S_LOCK));
+ if (!srv_read_only_mode) {
+ if (!s_latch_by_caller) {
+ /* BTR_SEARCH_TREE is intended to be used with
+ BTR_ALREADY_S_LATCHED */
+ ut_ad(latch_mode != BTR_SEARCH_TREE);
+
+ mtr_s_lock(dict_index_get_lock(index), mtr);
+ }
+ upper_rw_latch = RW_S_LATCH;
+ } else {
+ upper_rw_latch = RW_NO_LATCH;
+ }
}
+ root_leaf_rw_latch = btr_cur_latch_for_root_leaf(latch_mode);
page_cursor = btr_cur_get_page_cur(cursor);
cursor->index = index;
- space = dict_index_get_space(index);
- zip_size = dict_table_zip_size(index->table);
- page_no = dict_index_get_page(index);
+ page_id_t page_id(dict_index_get_space(index),
+ dict_index_get_page(index));
+ const page_size_t& page_size = dict_table_page_size(index->table);
+
+ if (root_leaf_rw_latch == RW_X_LATCH) {
+ node_ptr_max_size = btr_node_ptr_max_size(index);
+ }
height = ULINT_UNDEFINED;
for (;;) {
- buf_block_t* block=NULL;
- page_t* page=NULL;
+ buf_block_t* block;
+ ulint rw_latch;
- block = buf_page_get_gen(space, zip_size, page_no,
- RW_NO_LATCH, NULL, BUF_GET,
- file, line, mtr, &err);
+ ut_ad(n_blocks < BTR_MAX_LEVELS);
+
+ if (height != 0
+ && (latch_mode != BTR_MODIFY_TREE
+ || height == level)) {
+ rw_latch = upper_rw_latch;
+ } else {
+ rw_latch = RW_NO_LATCH;
+ }
+ tree_savepoints[n_blocks] = mtr_set_savepoint(mtr);
+ block = buf_page_get_gen(page_id, page_size, rw_latch, NULL,
+ BUF_GET, file, line, mtr, &err);
ut_ad((block != NULL) == (err == DB_SUCCESS));
+ tree_blocks[n_blocks] = block;
if (err != DB_SUCCESS) {
if (err == DB_DECRYPTION_FAILED) {
@@ -930,18 +2309,35 @@ btr_cur_open_at_index_side_func(
"Table %s is encrypted but encryption service or"
" used key_id is not available. "
" Can't continue reading table.",
- index->table->name);
+ index->table->name.m_name);
index->table->file_unreadable = true;
}
goto exit_loop;
}
- page = buf_block_get_frame(block);
- ut_ad(fil_page_get_type(page) == FIL_PAGE_INDEX);
- ut_ad(index->id == btr_page_get_index_id(page));
+ const page_t* page = buf_block_get_frame(block);
+
+ if (height == ULINT_UNDEFINED
+ && page_is_leaf(page)
+ && rw_latch != RW_NO_LATCH
+ && rw_latch != root_leaf_rw_latch) {
+ /* We should retry to get the page, because the root page
+ is latched with different level as a leaf page. */
+ ut_ad(root_leaf_rw_latch != RW_NO_LATCH);
+ ut_ad(rw_latch == RW_S_LATCH);
+
+ ut_ad(n_blocks == 0);
+ mtr_release_block_at_savepoint(
+ mtr, tree_savepoints[n_blocks],
+ tree_blocks[n_blocks]);
+
+ upper_rw_latch = root_leaf_rw_latch;
+ continue;
+ }
- block->check_index_page_at_flush = TRUE;
+ ut_ad(fil_page_index_page_check(page));
+ ut_ad(index->id == btr_page_get_index_id(page));
if (height == ULINT_UNDEFINED) {
/* We are in the root node */
@@ -955,12 +2351,16 @@ btr_cur_open_at_index_side_func(
}
if (height == level) {
- btr_cur_latch_leaves(
- page, space, zip_size, page_no,
- latch_mode & ~BTR_ALREADY_S_LATCHED,
- cursor, mtr);
-
- if (height == 0) {
+ if (srv_read_only_mode) {
+ btr_cur_latch_leaves(
+ block, page_id, page_size,
+ latch_mode, cursor, mtr);
+ } else if (height == 0) {
+ if (rw_latch == RW_NO_LATCH) {
+ btr_cur_latch_leaves(
+ block, page_id, page_size,
+ latch_mode, cursor, mtr);
+ }
/* In versions <= 3.23.52 we had
forgotten to release the tree latch
here. If in an index scan we had to
@@ -972,15 +2372,55 @@ btr_cur_open_at_index_side_func(
switch (latch_mode) {
case BTR_MODIFY_TREE:
case BTR_CONT_MODIFY_TREE:
- case BTR_SEARCH_LEAF | BTR_ALREADY_S_LATCHED:
- case BTR_MODIFY_LEAF | BTR_ALREADY_S_LATCHED:
+ case BTR_CONT_SEARCH_TREE:
break;
default:
- /* Release the tree s-latch */
-
- mtr_release_s_latch_at_savepoint(
- mtr, savepoint,
- dict_index_get_lock(index));
+ if (!s_latch_by_caller) {
+ /* Release the tree s-latch */
+ mtr_release_s_latch_at_savepoint(
+ mtr, savepoint,
+ dict_index_get_lock(
+ index));
+ }
+
+ /* release upper blocks */
+ for (; n_releases < n_blocks;
+ n_releases++) {
+ mtr_release_block_at_savepoint(
+ mtr,
+ tree_savepoints[
+ n_releases],
+ tree_blocks[
+ n_releases]);
+ }
+ }
+ } else { /* height != 0 */
+ /* We already have the block latched. */
+ ut_ad(latch_mode == BTR_SEARCH_TREE);
+ ut_ad(s_latch_by_caller);
+ ut_ad(upper_rw_latch == RW_S_LATCH);
+
+ ut_ad(mtr_memo_contains(mtr, block,
+ upper_rw_latch));
+
+ if (s_latch_by_caller) {
+ /* to exclude modifying tree operations
+ should sx-latch the index. */
+ ut_ad(mtr_memo_contains(
+ mtr,
+ dict_index_get_lock(index),
+ MTR_MEMO_SX_LOCK));
+ /* because has sx-latch of index,
+ can release upper blocks. */
+ for (; n_releases < n_blocks;
+ n_releases++) {
+ mtr_release_block_at_savepoint(
+ mtr,
+ tree_savepoints[
+ n_releases],
+ tree_blocks[
+ n_releases]);
+ }
}
}
}
@@ -1016,9 +2456,82 @@ btr_cur_open_at_index_side_func(
node_ptr = page_cur_get_rec(page_cursor);
offsets = rec_get_offsets(node_ptr, cursor->index, offsets,
- ULINT_UNDEFINED, &heap);
+ false, ULINT_UNDEFINED, &heap);
+
+ /* If the rec is the first or last in the page for
+ pessimistic delete intention, it might cause node_ptr insert
+ for the upper level. We should change the intention and retry.
+ */
+ if (latch_mode == BTR_MODIFY_TREE
+ && btr_cur_need_opposite_intention(
+ page, lock_intention, node_ptr)) {
+
+ ut_ad(upper_rw_latch == RW_X_LATCH);
+ /* release all blocks */
+ for (; n_releases <= n_blocks; n_releases++) {
+ mtr_release_block_at_savepoint(
+ mtr, tree_savepoints[n_releases],
+ tree_blocks[n_releases]);
+ }
+
+ lock_intention = BTR_INTENTION_BOTH;
+
+ page_id.set_page_no(dict_index_get_page(index));
+
+ height = ULINT_UNDEFINED;
+
+ n_blocks = 0;
+ n_releases = 0;
+
+ continue;
+ }
+
+ if (latch_mode == BTR_MODIFY_TREE
+ && !btr_cur_will_modify_tree(
+ cursor->index, page, lock_intention, node_ptr,
+ node_ptr_max_size, page_size, mtr)) {
+ ut_ad(upper_rw_latch == RW_X_LATCH);
+ ut_ad(n_releases <= n_blocks);
+
+ /* we can release upper blocks */
+ for (; n_releases < n_blocks; n_releases++) {
+ if (n_releases == 0) {
+ /* we should not release root page
+ to pin to same block. */
+ continue;
+ }
+
+ /* release unused blocks to unpin */
+ mtr_release_block_at_savepoint(
+ mtr, tree_savepoints[n_releases],
+ tree_blocks[n_releases]);
+ }
+ }
+
+ if (height == level
+ && latch_mode == BTR_MODIFY_TREE) {
+ ut_ad(upper_rw_latch == RW_X_LATCH);
+ /* we should sx-latch root page, if released already.
+ It contains seg_header. */
+ if (n_releases > 0) {
+ mtr_block_sx_latch_at_savepoint(
+ mtr, tree_savepoints[0],
+ tree_blocks[0]);
+ }
+
+ /* x-latch the branch blocks not released yet. */
+ for (ulint i = n_releases; i <= n_blocks; i++) {
+ mtr_block_x_latch_at_savepoint(
+ mtr, tree_savepoints[i],
+ tree_blocks[i]);
+ }
+ }
+
/* Go to the child node */
- page_no = btr_node_ptr_get_child_page_no(node_ptr, offsets);
+ page_id.set_page_no(
+ btr_node_ptr_get_child_page_no(node_ptr, offsets));
+
+ n_blocks++;
}
exit_loop:
@@ -1030,55 +2543,122 @@ btr_cur_open_at_index_side_func(
}
/**********************************************************************//**
-Positions a cursor at a randomly chosen position within a B-tree. */
-UNIV_INTERN
-void
+Positions a cursor at a randomly chosen position within a B-tree.
+@return true if the index is available and we have put the cursor, false
+if the index is unavailable */
+bool
btr_cur_open_at_rnd_pos_func(
/*=========================*/
dict_index_t* index, /*!< in: index */
ulint latch_mode, /*!< in: BTR_SEARCH_LEAF, ... */
btr_cur_t* cursor, /*!< in/out: B-tree cursor */
const char* file, /*!< in: file name */
- ulint line, /*!< in: line where called */
+ unsigned line, /*!< in: line where called */
mtr_t* mtr) /*!< in: mtr */
{
page_cur_t* page_cursor;
- ulint page_no;
- ulint space;
- ulint zip_size;
+ ulint node_ptr_max_size = UNIV_PAGE_SIZE / 2;
ulint height;
rec_t* node_ptr;
+ ulint savepoint;
+ ulint upper_rw_latch, root_leaf_rw_latch;
+ btr_intention_t lock_intention;
+ buf_block_t* tree_blocks[BTR_MAX_LEVELS];
+ ulint tree_savepoints[BTR_MAX_LEVELS];
+ ulint n_blocks = 0;
+ ulint n_releases = 0;
mem_heap_t* heap = NULL;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- ulint* offsets = offsets_;
+ offset_t offsets_[REC_OFFS_NORMAL_SIZE];
+ offset_t* offsets = offsets_;
rec_offs_init(offsets_);
+ ut_ad(!dict_index_is_spatial(index));
+
+ lock_intention = btr_cur_get_and_clear_intention(&latch_mode);
+
+ ut_ad(!(latch_mode & BTR_MODIFY_EXTERNAL));
+
+ savepoint = mtr_set_savepoint(mtr);
+
switch (latch_mode) {
case BTR_MODIFY_TREE:
- mtr_x_lock(dict_index_get_lock(index), mtr);
+ /* Most of delete-intended operations are purging.
+ Free blocks and read IO bandwidth should be prior
+ for them, when the history list is glowing huge. */
+ if (lock_intention == BTR_INTENTION_DELETE
+ && trx_sys->rseg_history_len > BTR_CUR_FINE_HISTORY_LENGTH
+ && buf_get_n_pending_read_ios()) {
+ mtr_x_lock(dict_index_get_lock(index), mtr);
+ } else {
+ mtr_sx_lock(dict_index_get_lock(index), mtr);
+ }
+ upper_rw_latch = RW_X_LATCH;
break;
+ case BTR_SEARCH_PREV:
+ case BTR_MODIFY_PREV:
+ /* This function doesn't support left uncle
+ page lock for left leaf page lock, when
+ needed. */
+ case BTR_SEARCH_TREE:
+ case BTR_CONT_MODIFY_TREE:
+ case BTR_CONT_SEARCH_TREE:
+ ut_ad(0);
+ /* fall through */
default:
- ut_ad(latch_mode != BTR_CONT_MODIFY_TREE);
- mtr_s_lock(dict_index_get_lock(index), mtr);
+ if (!srv_read_only_mode) {
+ mtr_s_lock(dict_index_get_lock(index), mtr);
+ upper_rw_latch = RW_S_LATCH;
+ } else {
+ upper_rw_latch = RW_NO_LATCH;
+ }
+ }
+
+ DBUG_EXECUTE_IF("test_index_is_unavailable",
+ return(false););
+
+ if (index->page == FIL_NULL) {
+ /* Since we don't hold index lock until just now, the index
+ could be modified by others, for example, if this is a
+ statistics updater for referenced table, it could be marked
+ as unavailable by 'DROP TABLE' in the mean time, since
+ we don't hold lock for statistics updater */
+ return(false);
}
+ root_leaf_rw_latch = btr_cur_latch_for_root_leaf(latch_mode);
+
page_cursor = btr_cur_get_page_cur(cursor);
cursor->index = index;
- space = dict_index_get_space(index);
- zip_size = dict_table_zip_size(index->table);
- page_no = dict_index_get_page(index);
+ page_id_t page_id(dict_index_get_space(index),
+ dict_index_get_page(index));
+ const page_size_t& page_size = dict_table_page_size(index->table);
+ dberr_t err = DB_SUCCESS;
+
+ if (root_leaf_rw_latch == RW_X_LATCH) {
+ node_ptr_max_size = btr_node_ptr_max_size(index);
+ }
height = ULINT_UNDEFINED;
for (;;) {
buf_block_t* block;
page_t* page;
- dberr_t err=DB_SUCCESS;
+ ulint rw_latch;
+
+ ut_ad(n_blocks < BTR_MAX_LEVELS);
+
+ if (height != 0
+ && latch_mode != BTR_MODIFY_TREE) {
+ rw_latch = upper_rw_latch;
+ } else {
+ rw_latch = RW_NO_LATCH;
+ }
- block = buf_page_get_gen(space, zip_size, page_no,
- RW_NO_LATCH, NULL, BUF_GET,
- file, line, mtr, &err);
+ tree_savepoints[n_blocks] = mtr_set_savepoint(mtr);
+ block = buf_page_get_gen(page_id, page_size, rw_latch, NULL,
+ BUF_GET, file, line, mtr, &err);
+ tree_blocks[n_blocks] = block;
ut_ad((block != NULL) == (err == DB_SUCCESS));
@@ -1089,15 +2669,34 @@ btr_cur_open_at_rnd_pos_func(
"Table %s is encrypted but encryption service or"
" used key_id is not available. "
" Can't continue reading table.",
- index->table->name);
+ index->table->name.m_name);
index->table->file_unreadable = true;
}
- goto exit_loop;
+ break;
}
page = buf_block_get_frame(block);
- ut_ad(fil_page_get_type(page) == FIL_PAGE_INDEX);
+
+ if (height == ULINT_UNDEFINED
+ && page_is_leaf(page)
+ && rw_latch != RW_NO_LATCH
+ && rw_latch != root_leaf_rw_latch) {
+ /* We should retry to get the page, because the root page
+ is latched with different level as a leaf page. */
+ ut_ad(root_leaf_rw_latch != RW_NO_LATCH);
+ ut_ad(rw_latch == RW_S_LATCH);
+
+ ut_ad(n_blocks == 0);
+ mtr_release_block_at_savepoint(
+ mtr, tree_savepoints[n_blocks],
+ tree_blocks[n_blocks]);
+
+ upper_rw_latch = root_leaf_rw_latch;
+ continue;
+ }
+
+ ut_ad(fil_page_index_page_check(page));
ut_ad(index->id == btr_page_get_index_id(page));
if (height == ULINT_UNDEFINED) {
@@ -1107,8 +2706,37 @@ btr_cur_open_at_rnd_pos_func(
}
if (height == 0) {
- btr_cur_latch_leaves(page, space, zip_size, page_no,
- latch_mode, cursor, mtr);
+ if (rw_latch == RW_NO_LATCH
+ || srv_read_only_mode) {
+ btr_cur_latch_leaves(
+ block, page_id, page_size,
+ latch_mode, cursor, mtr);
+ }
+
+ /* btr_cur_open_at_index_side_func() and
+ btr_cur_search_to_nth_level() release
+ tree s-latch here.*/
+ switch (latch_mode) {
+ case BTR_MODIFY_TREE:
+ case BTR_CONT_MODIFY_TREE:
+ case BTR_CONT_SEARCH_TREE:
+ break;
+ default:
+ /* Release the tree s-latch */
+ if (!srv_read_only_mode) {
+ mtr_release_s_latch_at_savepoint(
+ mtr, savepoint,
+ dict_index_get_lock(index));
+ }
+
+ /* release upper blocks */
+ for (; n_releases < n_blocks; n_releases++) {
+ mtr_release_block_at_savepoint(
+ mtr,
+ tree_savepoints[n_releases],
+ tree_blocks[n_releases]);
+ }
+ }
}
page_cur_open_on_rnd_user_rec(block, page_cursor);
@@ -1124,15 +2752,89 @@ btr_cur_open_at_rnd_pos_func(
node_ptr = page_cur_get_rec(page_cursor);
offsets = rec_get_offsets(node_ptr, cursor->index, offsets,
- ULINT_UNDEFINED, &heap);
+ false, ULINT_UNDEFINED, &heap);
+
+ /* If the rec is the first or last in the page for
+ pessimistic delete intention, it might cause node_ptr insert
+ for the upper level. We should change the intention and retry.
+ */
+ if (latch_mode == BTR_MODIFY_TREE
+ && btr_cur_need_opposite_intention(
+ page, lock_intention, node_ptr)) {
+
+ ut_ad(upper_rw_latch == RW_X_LATCH);
+ /* release all blocks */
+ for (; n_releases <= n_blocks; n_releases++) {
+ mtr_release_block_at_savepoint(
+ mtr, tree_savepoints[n_releases],
+ tree_blocks[n_releases]);
+ }
+
+ lock_intention = BTR_INTENTION_BOTH;
+
+ page_id.set_page_no(dict_index_get_page(index));
+
+ height = ULINT_UNDEFINED;
+
+ n_blocks = 0;
+ n_releases = 0;
+
+ continue;
+ }
+
+ if (latch_mode == BTR_MODIFY_TREE
+ && !btr_cur_will_modify_tree(
+ cursor->index, page, lock_intention, node_ptr,
+ node_ptr_max_size, page_size, mtr)) {
+ ut_ad(upper_rw_latch == RW_X_LATCH);
+ ut_ad(n_releases <= n_blocks);
+
+ /* we can release upper blocks */
+ for (; n_releases < n_blocks; n_releases++) {
+ if (n_releases == 0) {
+ /* we should not release root page
+ to pin to same block. */
+ continue;
+ }
+
+ /* release unused blocks to unpin */
+ mtr_release_block_at_savepoint(
+ mtr, tree_savepoints[n_releases],
+ tree_blocks[n_releases]);
+ }
+ }
+
+ if (height == 0
+ && latch_mode == BTR_MODIFY_TREE) {
+ ut_ad(upper_rw_latch == RW_X_LATCH);
+ /* we should sx-latch root page, if released already.
+ It contains seg_header. */
+ if (n_releases > 0) {
+ mtr_block_sx_latch_at_savepoint(
+ mtr, tree_savepoints[0],
+ tree_blocks[0]);
+ }
+
+ /* x-latch the branch blocks not released yet. */
+ for (ulint i = n_releases; i <= n_blocks; i++) {
+ mtr_block_x_latch_at_savepoint(
+ mtr, tree_savepoints[i],
+ tree_blocks[i]);
+ }
+ }
+
/* Go to the child node */
- page_no = btr_node_ptr_get_child_page_no(node_ptr, offsets);
+ page_id.set_page_no(
+ btr_node_ptr_get_child_page_no(node_ptr, offsets));
+
+ n_blocks++;
}
- exit_loop:
if (UNIV_LIKELY_NULL(heap)) {
mem_heap_free(heap);
}
+
+ return err == DB_SUCCESS;
}
/*==================== B-TREE INSERT =========================*/
@@ -1148,7 +2850,7 @@ if this is a compressed leaf page in a secondary index.
This has to be done either within the same mini-transaction,
or by invoking ibuf_reset_free_bits() before mtr_commit().
-@return pointer to inserted record if succeed, else NULL */
+@return pointer to inserted record if succeed, else NULL */
static MY_ATTRIBUTE((nonnull, warn_unused_result))
rec_t*
btr_cur_insert_if_possible(
@@ -1157,7 +2859,7 @@ btr_cur_insert_if_possible(
cursor stays valid */
const dtuple_t* tuple, /*!< in: tuple to insert; the size info need not
have been stored to tuple */
- ulint** offsets,/*!< out: offsets on *rec */
+ offset_t** offsets,/*!< out: offsets on *rec */
mem_heap_t** heap, /*!< in/out: pointer to memory heap, or NULL */
ulint n_ext, /*!< in: number of externally stored columns */
mtr_t* mtr) /*!< in/out: mini-transaction */
@@ -1167,8 +2869,9 @@ btr_cur_insert_if_possible(
ut_ad(dtuple_check_typed(tuple));
- ut_ad(mtr_memo_contains(mtr, btr_cur_get_block(cursor),
- MTR_MEMO_PAGE_X_FIX));
+ ut_ad(mtr_is_block_fix(
+ mtr, btr_cur_get_block(cursor),
+ MTR_MEMO_PAGE_X_FIX, cursor->index->table));
page_cursor = btr_cur_get_page_cur(cursor);
/* Now, try the insert */
@@ -1191,7 +2894,7 @@ btr_cur_insert_if_possible(
/*************************************************************//**
For an insert, checks the locks and does the undo logging if desired.
-@return DB_SUCCESS, DB_WAIT_LOCK, DB_FAIL, or error number */
+@return DB_SUCCESS, DB_WAIT_LOCK, DB_FAIL, or error number */
UNIV_INLINE MY_ATTRIBUTE((warn_unused_result, nonnull(2,3,5,6)))
dberr_t
btr_cur_ins_lock_and_undo(
@@ -1208,7 +2911,7 @@ btr_cur_ins_lock_and_undo(
successor record */
{
dict_index_t* index;
- dberr_t err;
+ dberr_t err = DB_SUCCESS;
rec_t* rec;
roll_ptr_t roll_ptr;
@@ -1221,10 +2924,32 @@ btr_cur_ins_lock_and_undo(
ut_ad(!dict_index_is_online_ddl(index)
|| dict_index_is_clust(index)
|| (flags & BTR_CREATE_FLAG));
+ ut_ad(mtr->is_named_space(index->space));
- err = lock_rec_insert_check_and_lock(flags, rec,
- btr_cur_get_block(cursor),
- index, thr, mtr, inherit);
+ /* Check if there is predicate or GAP lock preventing the insertion */
+ if (!(flags & BTR_NO_LOCKING_FLAG)) {
+ if (dict_index_is_spatial(index)) {
+ lock_prdt_t prdt;
+ rtr_mbr_t mbr;
+
+ rtr_get_mbr_from_tuple(entry, &mbr);
+
+ /* Use on stack MBR variable to test if a lock is
+ needed. If so, the predicate (MBR) will be allocated
+ from lock heap in lock_prdt_insert_check_and_lock() */
+ lock_init_prdt_from_mbr(
+ &prdt, &mbr, 0, NULL);
+
+ err = lock_prdt_insert_check_and_lock(
+ flags, rec, btr_cur_get_block(cursor),
+ index, thr, mtr, &prdt);
+ *inherit = false;
+ } else {
+ err = lock_rec_insert_check_and_lock(
+ flags, rec, btr_cur_get_block(cursor),
+ index, thr, mtr, inherit);
+ }
+ }
if (err != DB_SUCCESS
|| !(~flags | (BTR_NO_UNDO_LOG_FLAG | BTR_KEEP_SYS_FLAG))
@@ -1234,7 +2959,7 @@ btr_cur_ins_lock_and_undo(
}
if (flags & BTR_NO_UNDO_LOG_FLAG) {
- roll_ptr = 0;
+ roll_ptr = roll_ptr_t(1) << ROLL_PTR_INSERT_FLAG_POS;
} else {
err = trx_undo_report_row_operation(thr, index, entry,
NULL, 0, NULL, NULL,
@@ -1245,7 +2970,6 @@ btr_cur_ins_lock_and_undo(
}
/* Now we can fill in the roll ptr field in entry */
-
if (!(flags & BTR_KEEP_SYS_FLAG)) {
row_upd_index_entry_sys_field(entry, index,
@@ -1255,23 +2979,36 @@ btr_cur_ins_lock_and_undo(
return(DB_SUCCESS);
}
-#ifdef UNIV_DEBUG
-/*************************************************************//**
-Report information about a transaction. */
+/**
+Prefetch siblings of the leaf for the pessimistic operation.
+@param block leaf page */
static
void
-btr_cur_trx_report(
-/*===============*/
- trx_id_t trx_id, /*!< in: transaction id */
- const dict_index_t* index, /*!< in: index */
- const char* op) /*!< in: operation */
+btr_cur_prefetch_siblings(
+ buf_block_t* block)
{
- fprintf(stderr, "Trx with id " TRX_ID_FMT " going to ", trx_id);
- fputs(op, stderr);
- dict_index_name_print(stderr, NULL, index);
- putc('\n', stderr);
+ page_t* page = buf_block_get_frame(block);
+
+ ut_ad(page_is_leaf(page));
+
+ ulint left_page_no = fil_page_get_prev(page);
+ ulint right_page_no = fil_page_get_next(page);
+
+ if (left_page_no != FIL_NULL) {
+ buf_read_page_background(
+ page_id_t(block->page.id.space(), left_page_no),
+ block->page.size, false);
+ }
+ if (right_page_no != FIL_NULL) {
+ buf_read_page_background(
+ page_id_t(block->page.id.space(), right_page_no),
+ block->page.size, false);
+ }
+ if (left_page_no != FIL_NULL
+ || right_page_no != FIL_NULL) {
+ os_aio_simulated_wake_handler_threads();
+ }
}
-#endif /* UNIV_DEBUG */
/*************************************************************//**
Tries to perform an insert to a page in an index tree, next to cursor.
@@ -1279,8 +3016,7 @@ It is assumed that mtr holds an x-latch on the page. The operation does
not succeed if there is too little space on the page. If there is just
one record on the page, the insert will always succeed; this is to
prevent trying to split a page with just one record.
-@return DB_SUCCESS, DB_WAIT_LOCK, DB_FAIL, or error number */
-UNIV_INTERN
+@return DB_SUCCESS, DB_WAIT_LOCK, DB_FAIL, or error number */
dberr_t
btr_cur_optimistic_insert(
/*======================*/
@@ -1289,7 +3025,7 @@ btr_cur_optimistic_insert(
specified */
btr_cur_t* cursor, /*!< in: cursor on page after which to insert;
cursor stays valid */
- ulint** offsets,/*!< out: offsets on *rec */
+ offset_t** offsets,/*!< out: offsets on *rec */
mem_heap_t** heap, /*!< in/out: pointer to memory heap */
dtuple_t* entry, /*!< in/out: entry to insert */
rec_t** rec, /*!< out: pointer to inserted record if
@@ -1317,7 +3053,6 @@ btr_cur_optimistic_insert(
ibool leaf;
ibool reorg;
ibool inherit = TRUE;
- ulint zip_size;
ulint rec_size;
dberr_t err;
@@ -1328,38 +3063,32 @@ btr_cur_optimistic_insert(
page = buf_block_get_frame(block);
index = cursor->index;
- ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
+ ut_ad(mtr_is_block_fix(mtr, block, MTR_MEMO_PAGE_X_FIX, index->table));
ut_ad(!dict_index_is_online_ddl(index)
|| dict_index_is_clust(index)
|| (flags & BTR_CREATE_FLAG));
ut_ad(dtuple_check_typed(entry));
- zip_size = buf_block_get_zip_size(block);
+ const page_size_t& page_size = block->page.size;
+
#ifdef UNIV_DEBUG_VALGRIND
- if (zip_size) {
- UNIV_MEM_ASSERT_RW(page, UNIV_PAGE_SIZE);
- UNIV_MEM_ASSERT_RW(block->page.zip.data, zip_size);
+ if (page_size.is_compressed()) {
+ UNIV_MEM_ASSERT_RW(page, page_size.logical());
+ UNIV_MEM_ASSERT_RW(block->page.zip.data, page_size.physical());
}
#endif /* UNIV_DEBUG_VALGRIND */
-#ifdef UNIV_DEBUG
- if (btr_cur_print_record_ops && thr) {
- btr_cur_trx_report(thr_get_trx(thr)->id, index, "insert ");
- dtuple_print(stderr, entry);
- }
-#endif /* UNIV_DEBUG */
-
leaf = page_is_leaf(page);
/* Calculate the record size when entry is converted to a record */
rec_size = rec_get_converted_size(index, entry, n_ext);
if (page_zip_rec_needs_ext(rec_size, page_is_comp(page),
- dtuple_get_n_fields(entry), zip_size)) {
+ dtuple_get_n_fields(entry), page_size)) {
/* The record is so big that we have to store some fields
externally on separate database pages */
- big_rec_vec = dtuple_convert_big_rec(index, entry, &n_ext);
+ big_rec_vec = dtuple_convert_big_rec(index, 0, entry, &n_ext);
if (UNIV_UNLIKELY(big_rec_vec == NULL)) {
@@ -1369,52 +3098,18 @@ btr_cur_optimistic_insert(
rec_size = rec_get_converted_size(index, entry, n_ext);
}
- if (zip_size) {
- /* Estimate the free space of an empty compressed page.
- Subtract one byte for the encoded heap_no in the
- modification log. */
- ulint free_space_zip = page_zip_empty_size(
- cursor->index->n_fields, zip_size);
- ulint n_uniq = dict_index_get_n_unique_in_tree(index);
-
- ut_ad(dict_table_is_comp(index->table));
-
- if (free_space_zip == 0) {
-too_big:
- if (big_rec_vec) {
- dtuple_convert_back_big_rec(
- index, entry, big_rec_vec);
- }
-
- return(DB_TOO_BIG_RECORD);
+ if (page_size.is_compressed() && page_zip_is_too_big(index, entry)) {
+ if (big_rec_vec != NULL) {
+ dtuple_convert_back_big_rec(index, entry, big_rec_vec);
}
- /* Subtract one byte for the encoded heap_no in the
- modification log. */
- free_space_zip--;
-
- /* There should be enough room for two node pointer
- records on an empty non-leaf page. This prevents
- infinite page splits. */
-
- if (entry->n_fields >= n_uniq
- && (REC_NODE_PTR_SIZE
- + rec_get_converted_size_comp_prefix(
- index, entry->fields, n_uniq, NULL)
- /* On a compressed page, there is
- a two-byte entry in the dense
- page directory for every record.
- But there is no record header. */
- - (REC_N_NEW_EXTRA_BYTES - 2)
- > free_space_zip / 2)) {
- goto too_big;
- }
+ return(DB_TOO_BIG_RECORD);
}
LIMIT_OPTIMISTIC_INSERT_DEBUG(page_get_n_recs(page),
goto fail);
- if (leaf && zip_size
+ if (leaf && page_size.is_compressed()
&& (page_get_data_size(page) + rec_size
>= dict_index_zip_pad_optimal_page_size(index))) {
/* If compression padding tells us that insertion will
@@ -1423,6 +3118,12 @@ too_big:
insertion. */
fail:
err = DB_FAIL;
+
+ /* prefetch siblings of the leaf for the pessimistic
+ operation, if the page is leaf. */
+ if (page_is_leaf(page)) {
+ btr_cur_prefetch_siblings(block);
+ }
fail_err:
if (big_rec_vec) {
@@ -1451,40 +3152,73 @@ fail_err:
we have to split the page to reserve enough free space for
future updates of records. */
- if (leaf && !zip_size && dict_index_is_clust(index)
+ if (leaf && !page_size.is_compressed() && dict_index_is_clust(index)
&& page_get_n_recs(page) >= 2
&& dict_index_get_space_reserve() + rec_size > max_size
&& (btr_page_get_split_rec_to_right(cursor, &dummy)
- || btr_page_get_split_rec_to_left(cursor, &dummy))) {
+ || btr_page_get_split_rec_to_left(cursor))) {
goto fail;
}
- /* Check locks and write to the undo log, if specified */
- err = btr_cur_ins_lock_and_undo(flags, cursor, entry,
- thr, mtr, &inherit);
+ page_cursor = btr_cur_get_page_cur(cursor);
- if (UNIV_UNLIKELY(err != DB_SUCCESS)) {
+ DBUG_LOG("ib_cur",
+ "insert " << index->name << " (" << index->id << ") by "
+ << ib::hex(thr ? thr->graph->trx->id : 0)
+ << ' ' << rec_printer(entry).str());
+ DBUG_EXECUTE_IF("do_page_reorganize",
+ btr_page_reorganize(page_cursor, index, mtr););
- goto fail_err;
- }
+ /* Now, try the insert */
+ {
+ const rec_t* page_cursor_rec = page_cur_get_rec(page_cursor);
- page_cursor = btr_cur_get_page_cur(cursor);
+ /* Check locks and write to the undo log,
+ if specified */
+ err = btr_cur_ins_lock_and_undo(flags, cursor, entry,
+ thr, mtr, &inherit);
+ if (err != DB_SUCCESS) {
+ goto fail_err;
+ }
- /* Now, try the insert */
+#ifdef UNIV_DEBUG
+ if (!(flags & BTR_CREATE_FLAG)
+ && index->is_primary() && page_is_leaf(page)) {
+ const dfield_t* trx_id = dtuple_get_nth_field(
+ entry, dict_col_get_clust_pos(
+ dict_table_get_sys_col(index->table,
+ DATA_TRX_ID),
+ index));
+
+ ut_ad(trx_id->len == DATA_TRX_ID_LEN);
+ ut_ad(trx_id[1].len == DATA_ROLL_PTR_LEN);
+ ut_ad(*static_cast<const byte*>
+ (trx_id[1].data) & 0x80);
+ if (!(flags & BTR_NO_UNDO_LOG_FLAG)) {
+ ut_ad(thr->graph->trx->id);
+ ut_ad(thr->graph->trx->id
+ == trx_read_trx_id(
+ static_cast<const byte*>(
+ trx_id->data)));
+ }
+ }
+#endif
+
+ *rec = page_cur_tuple_insert(
+ page_cursor, entry, index, offsets, heap,
+ n_ext, mtr);
- {
- const rec_t* page_cursor_rec = page_cur_get_rec(page_cursor);
- *rec = page_cur_tuple_insert(page_cursor, entry, index,
- offsets, heap, n_ext, mtr);
reorg = page_cursor_rec != page_cur_get_rec(page_cursor);
}
if (*rec) {
- } else if (zip_size) {
+ } else if (page_size.is_compressed()) {
+ ut_ad(!dict_table_is_temporary(index->table));
/* Reset the IBUF_BITMAP_FREE bits, because
page_cur_tuple_insert() will have attempted page
reorganize before failing. */
- if (leaf && !dict_index_is_clust(index)) {
+ if (leaf
+ && !dict_index_is_clust(index)) {
ibuf_reset_free_bits(block);
}
@@ -1506,30 +3240,33 @@ fail_err:
offsets, heap, n_ext, mtr);
if (UNIV_UNLIKELY(!*rec)) {
- fputs("InnoDB: Error: cannot insert tuple ", stderr);
- dtuple_print(stderr, entry);
- fputs(" into ", stderr);
- dict_index_name_print(stderr, thr_get_trx(thr), index);
- fprintf(stderr, "\nInnoDB: max insert size %lu\n",
- (ulong) max_size);
- ut_error;
+ ib::fatal() << "Cannot insert tuple " << *entry
+ << "into index " << index->name
+ << " of table " << index->table->name
+ << ". Max size: " << max_size;
}
}
#ifdef BTR_CUR_HASH_ADAPT
- if (!reorg && leaf && (cursor->flag == BTR_CUR_HASH)) {
+ if (!leaf) {
+# ifdef MYSQL_INDEX_DISABLE_AHI
+ } else if (index->disable_ahi) {
+# endif
+ } else if (!reorg && cursor->flag == BTR_CUR_HASH) {
btr_search_update_hash_node_on_insert(cursor);
} else {
btr_search_update_hash_on_insert(cursor);
}
-#endif
+#endif /* BTR_CUR_HASH_ADAPT */
if (!(flags & BTR_NO_LOCKING_FLAG) && inherit) {
lock_update_insert(block, *rec);
}
- if (leaf && !dict_index_is_clust(index)) {
+ if (leaf
+ && !dict_index_is_clust(index)
+ && !dict_table_is_temporary(index->table)) {
/* Update the free bits of the B-tree page in the
insert buffer bitmap. */
@@ -1543,7 +3280,7 @@ fail_err:
committed mini-transaction, because in crash recovery,
the free bits could momentarily be set too high. */
- if (zip_size) {
+ if (page_size.is_compressed()) {
/* Update the bits in the same mini-transaction. */
ibuf_update_free_bits_zip(block, mtr);
} else {
@@ -1565,8 +3302,7 @@ Performs an insert on a page of an index tree. It is assumed that mtr
holds an x-latch on the tree and on the cursor page. If the insert is
made on the leaf level, to avoid deadlocks, mtr must also own x-latches
to brothers of page, if those brothers exist.
-@return DB_SUCCESS or error number */
-UNIV_INTERN
+@return DB_SUCCESS or error number */
dberr_t
btr_cur_pessimistic_insert(
/*=======================*/
@@ -1578,7 +3314,7 @@ btr_cur_pessimistic_insert(
insertion will certainly succeed */
btr_cur_t* cursor, /*!< in: cursor after which to insert;
cursor stays valid */
- ulint** offsets,/*!< out: offsets on *rec */
+ offset_t** offsets,/*!< out: offsets on *rec */
mem_heap_t** heap, /*!< in/out: pointer to memory heap
that can be emptied */
dtuple_t* entry, /*!< in/out: entry to insert */
@@ -1594,11 +3330,10 @@ btr_cur_pessimistic_insert(
mtr_t* mtr) /*!< in/out: mini-transaction */
{
dict_index_t* index = cursor->index;
- ulint zip_size = dict_table_zip_size(index->table);
big_rec_t* big_rec_vec = NULL;
dberr_t err;
ibool inherit = FALSE;
- ibool success;
+ bool success;
ulint n_reserved = 0;
ut_ad(dtuple_check_typed(entry));
@@ -1606,11 +3341,12 @@ btr_cur_pessimistic_insert(
*big_rec = NULL;
- ut_ad(mtr_memo_contains(mtr,
- dict_index_get_lock(btr_cur_get_index(cursor)),
- MTR_MEMO_X_LOCK));
- ut_ad(mtr_memo_contains(mtr, btr_cur_get_block(cursor),
- MTR_MEMO_PAGE_X_FIX));
+ ut_ad(mtr_memo_contains_flagged(
+ mtr, dict_index_get_lock(btr_cur_get_index(cursor)),
+ MTR_MEMO_X_LOCK | MTR_MEMO_SX_LOCK));
+ ut_ad(mtr_is_block_fix(
+ mtr, btr_cur_get_block(cursor),
+ MTR_MEMO_PAGE_X_FIX, cursor->index->table));
ut_ad(!dict_index_is_online_ddl(index)
|| dict_index_is_clust(index)
|| (flags & BTR_CREATE_FLAG));
@@ -1644,7 +3380,7 @@ btr_cur_pessimistic_insert(
if (page_zip_rec_needs_ext(rec_get_converted_size(index, entry, n_ext),
dict_table_is_comp(index->table),
dtuple_get_n_fields(entry),
- zip_size)) {
+ dict_table_page_size(index->table))) {
/* The record is so big that we have to store some fields
externally on separate database pages */
@@ -1655,7 +3391,7 @@ btr_cur_pessimistic_insert(
dtuple_convert_back_big_rec(index, entry, big_rec_vec);
}
- big_rec_vec = dtuple_convert_big_rec(index, entry, &n_ext);
+ big_rec_vec = dtuple_convert_big_rec(index, 0, entry, &n_ext);
if (big_rec_vec == NULL) {
@@ -1668,7 +3404,7 @@ btr_cur_pessimistic_insert(
}
if (dict_index_get_page(index)
- == buf_block_get_page_no(btr_cur_get_block(cursor))) {
+ == btr_cur_get_block(cursor)->page.id.page_no()) {
/* The page is the root page */
*rec = btr_root_raise_and_insert(
@@ -1682,44 +3418,46 @@ btr_cur_pessimistic_insert(
return(DB_OUT_OF_FILE_SPACE);
}
- ut_ad(page_rec_get_next(btr_cur_get_rec(cursor)) == *rec);
+ ut_ad(page_rec_get_next(btr_cur_get_rec(cursor)) == *rec
+ || dict_index_is_spatial(index));
if (!(flags & BTR_NO_LOCKING_FLAG)) {
- /* The cursor might be moved to the other page,
- and the max trx id field should be updated after
- the cursor was fixed. */
- if (!dict_index_is_clust(index)) {
- page_update_max_trx_id(
- btr_cur_get_block(cursor),
- btr_cur_get_page_zip(cursor),
- thr_get_trx(thr)->id, mtr);
- }
-
- if (!page_rec_is_infimum(btr_cur_get_rec(cursor))) {
- /* split and inserted need to call
- lock_update_insert() always. */
- inherit = TRUE;
- }
-
- buf_block_t* block = btr_cur_get_block(cursor);
- buf_frame_t* frame = NULL;
+ ut_ad(!dict_table_is_temporary(index->table));
+ if (dict_index_is_spatial(index)) {
+ /* Do nothing */
+ } else {
+ /* The cursor might be moved to the other page
+ and the max trx id field should be updated after
+ the cursor was fixed. */
+ if (!dict_index_is_clust(index)) {
+ page_update_max_trx_id(
+ btr_cur_get_block(cursor),
+ btr_cur_get_page_zip(cursor),
+ thr_get_trx(thr)->id, mtr);
+ }
- if (block) {
- frame = buf_block_get_frame(block);
- }
- /* split and inserted need to call
- lock_update_insert() always. */
- if (frame && btr_page_get_prev(frame, mtr) == FIL_NULL) {
- inherit = TRUE;
+ if (!page_rec_is_infimum(btr_cur_get_rec(cursor))
+ || !page_has_prev(btr_cur_get_page(cursor))) {
+ /* split and inserted need to call
+ lock_update_insert() always. */
+ inherit = TRUE;
+ }
}
}
-#ifdef BTR_CUR_ADAPT
- btr_search_update_hash_on_insert(cursor);
-#endif
- if (inherit && !(flags & BTR_NO_LOCKING_FLAG)) {
+ if (!page_is_leaf(btr_cur_get_page(cursor))) {
+ ut_ad(!big_rec_vec);
+ } else {
+#ifdef BTR_CUR_HASH_ADAPT
+# ifdef MYSQL_INDEX_DISABLE_AHI
+ if (index->disable_ahi); else
+# endif
+ btr_search_update_hash_on_insert(cursor);
+#endif /* BTR_CUR_HASH_ADAPT */
+ if (inherit && !(flags & BTR_NO_LOCKING_FLAG)) {
- lock_update_insert(btr_cur_get_block(cursor), *rec);
+ lock_update_insert(btr_cur_get_block(cursor), *rec);
+ }
}
if (n_reserved > 0) {
@@ -1735,14 +3473,14 @@ btr_cur_pessimistic_insert(
/*************************************************************//**
For an update, checks the locks and does the undo logging.
-@return DB_SUCCESS, DB_WAIT_LOCK, or error number */
-UNIV_INLINE MY_ATTRIBUTE((warn_unused_result, nonnull(2,3,6,7)))
+@return DB_SUCCESS, DB_WAIT_LOCK, or error number */
+UNIV_INLINE MY_ATTRIBUTE((warn_unused_result))
dberr_t
btr_cur_upd_lock_and_undo(
/*======================*/
ulint flags, /*!< in: undo logging and locking flags */
btr_cur_t* cursor, /*!< in: cursor on record to update */
- const ulint* offsets,/*!< in: rec_get_offsets() on cursor */
+ const offset_t* offsets,/*!< in: rec_get_offsets() on cursor */
const upd_t* update, /*!< in: update vector */
ulint cmpl_info,/*!< in: compiler info on secondary index
updates */
@@ -1761,6 +3499,7 @@ btr_cur_upd_lock_and_undo(
index = cursor->index;
ut_ad(rec_offs_validate(rec, index, offsets));
+ ut_ad(mtr->is_named_space(index->space));
if (!dict_index_is_clust(index)) {
ut_ad(dict_index_is_online_ddl(index)
@@ -1796,7 +3535,6 @@ btr_cur_upd_lock_and_undo(
/***********************************************************//**
Writes a redo log record of updating a record in-place. */
-UNIV_INTERN
void
btr_cur_update_in_place_log(
/*========================*/
@@ -1844,7 +3582,7 @@ btr_cur_update_in_place_log(
trx_write_roll_ptr(log_ptr, 0);
log_ptr += DATA_ROLL_PTR_LEN;
/* TRX_ID */
- log_ptr += mach_ull_write_compressed(log_ptr, 0);
+ log_ptr += mach_u64_write_compressed(log_ptr, 0);
}
mach_write_to_2(log_ptr, page_offset(rec));
@@ -1852,12 +3590,10 @@ btr_cur_update_in_place_log(
row_upd_index_write_log(update, log_ptr, mtr);
}
-#endif /* UNIV_HOTBACKUP */
/***********************************************************//**
Parses a redo log record of updating a record in-place.
-@return end of log record or NULL */
-UNIV_INTERN
+@return end of log record or NULL */
byte*
btr_cur_parse_update_in_place(
/*==========================*/
@@ -1875,7 +3611,7 @@ btr_cur_parse_update_in_place(
roll_ptr_t roll_ptr;
ulint rec_offset;
mem_heap_t* heap;
- ulint* offsets;
+ offset_t* offsets;
if (end_ptr < ptr + 1) {
@@ -1914,10 +3650,19 @@ btr_cur_parse_update_in_place(
ut_a((ibool)!!page_is_comp(page) == dict_table_is_comp(index->table));
rec = page + rec_offset;
- /* We do not need to reserve btr_search_latch, as the page is only
+ /* We do not need to reserve search latch, as the page is only
being recovered, and there cannot be a hash index to it. */
- offsets = rec_get_offsets(rec, index, NULL, ULINT_UNDEFINED, &heap);
+ /* The function rtr_update_mbr_field_in_place() is generating
+ these records on node pointer pages; therefore we have to
+ check if this is a leaf page. */
+
+ offsets = rec_get_offsets(rec, index, NULL,
+ flags != (BTR_NO_UNDO_LOG_FLAG
+ | BTR_NO_LOCKING_FLAG
+ | BTR_KEEP_SYS_FLAG)
+ || page_is_leaf(page),
+ ULINT_UNDEFINED, &heap);
if (!(flags & BTR_KEEP_SYS_FLAG)) {
row_upd_rec_sys_fields_in_recovery(rec, page_zip, offsets,
@@ -1932,20 +3677,18 @@ func_exit:
return(ptr);
}
-#ifndef UNIV_HOTBACKUP
/*************************************************************//**
See if there is enough place in the page modification log to log
an update-in-place.
@retval false if out of space; IBUF_BITMAP_FREE will be reset
outside mtr if the page was recompressed
-@retval true if enough place;
+@retval true if enough place;
IMPORTANT: The caller will have to update IBUF_BITMAP_FREE if this is
a secondary index leaf page. This has to be done either within the
same mini-transaction, or by invoking ibuf_reset_free_bits() before
mtr_commit(mtr). */
-UNIV_INTERN
bool
btr_cur_update_alloc_zip_func(
/*==========================*/
@@ -1953,7 +3696,7 @@ btr_cur_update_alloc_zip_func(
page_cur_t* cursor, /*!< in/out: B-tree page cursor */
dict_index_t* index, /*!< in: the index corresponding to cursor */
#ifdef UNIV_DEBUG
- ulint* offsets,/*!< in/out: offsets of the cursor record */
+ offset_t* offsets,/*!< in/out: offsets of the cursor record */
#endif /* UNIV_DEBUG */
ulint length, /*!< in: size needed */
bool create, /*!< in: true=delete-and-insert,
@@ -2011,7 +3754,9 @@ out_of_space:
ut_ad(rec_offs_validate(page_cur_get_rec(cursor), index, offsets));
/* Out of space: reset the free bits. */
- if (!dict_index_is_clust(index) && page_is_leaf(page)) {
+ if (!dict_index_is_clust(index)
+ && !dict_table_is_temporary(index->table)
+ && page_is_leaf(page)) {
ibuf_reset_free_bits(page_cur_get_block(cursor));
}
@@ -2025,7 +3770,6 @@ We assume here that the ordering fields of the record do not change.
@retval DB_SUCCESS on success
@retval DB_ZIP_OVERFLOW if there is not enough space left
on the compressed page (IBUF_BITMAP_FREE was reset outside mtr) */
-UNIV_INTERN
dberr_t
btr_cur_update_in_place(
/*====================*/
@@ -2033,7 +3777,7 @@ btr_cur_update_in_place(
btr_cur_t* cursor, /*!< in: cursor on the record to update;
cursor stays valid and positioned on the
same record */
- ulint* offsets,/*!< in/out: offsets on cursor->page_cur.rec */
+ offset_t* offsets,/*!< in/out: offsets on cursor->page_cur.rec */
const upd_t* update, /*!< in: update vector */
ulint cmpl_info,/*!< in: compiler info on secondary index
updates */
@@ -2051,35 +3795,36 @@ btr_cur_update_in_place(
rec_t* rec;
roll_ptr_t roll_ptr = 0;
ulint was_delete_marked;
- ibool is_hashed;
+ ut_ad(page_is_leaf(cursor->page_cur.block->frame));
rec = btr_cur_get_rec(cursor);
index = cursor->index;
ut_ad(rec_offs_validate(rec, index, offsets));
ut_ad(!!page_rec_is_comp(rec) == dict_table_is_comp(index->table));
+ ut_ad(trx_id > 0 || (flags & BTR_KEEP_SYS_FLAG));
/* The insert buffer tree should never be updated in place. */
ut_ad(!dict_index_is_ibuf(index));
ut_ad(dict_index_is_online_ddl(index) == !!(flags & BTR_CREATE_FLAG)
|| dict_index_is_clust(index));
ut_ad(thr_get_trx(thr)->id == trx_id
- || (flags & ~(BTR_KEEP_POS_FLAG | BTR_KEEP_IBUF_BITMAP))
+ || (flags & ulint(~(BTR_KEEP_POS_FLAG | BTR_KEEP_IBUF_BITMAP)))
== (BTR_NO_UNDO_LOG_FLAG | BTR_NO_LOCKING_FLAG
| BTR_CREATE_FLAG | BTR_KEEP_SYS_FLAG));
- ut_ad(fil_page_get_type(btr_cur_get_page(cursor)) == FIL_PAGE_INDEX);
+ ut_ad(fil_page_index_page_check(btr_cur_get_page(cursor)));
ut_ad(btr_page_get_index_id(btr_cur_get_page(cursor)) == index->id);
-#ifdef UNIV_DEBUG
- if (btr_cur_print_record_ops) {
- btr_cur_trx_report(trx_id, index, "update ");
- rec_print_new(stderr, rec, offsets);
- }
-#endif /* UNIV_DEBUG */
+ DBUG_LOG("ib_cur",
+ "update-in-place " << index->name << " (" << index->id
+ << ") by " << ib::hex(trx_id) << ": "
+ << rec_printer(rec, offsets).str());
block = btr_cur_get_block(cursor);
page_zip = buf_block_get_page_zip(block);
/* Check that enough space is available on the compressed page. */
if (page_zip) {
+ ut_ad(!dict_table_is_temporary(index->table));
+
if (!btr_cur_update_alloc_zip(
page_zip, btr_cur_get_page_cur(cursor),
index, offsets, rec_offs_size(offsets),
@@ -2108,34 +3853,47 @@ btr_cur_update_in_place(
was_delete_marked = rec_get_deleted_flag(
rec, page_is_comp(buf_block_get_frame(block)));
+ /* In delete-marked records, DB_TRX_ID must always refer to an
+ existing undo log record. */
+ ut_ad(!was_delete_marked
+ || !dict_index_is_clust(index)
+ || row_get_rec_trx_id(rec, index, offsets));
- is_hashed = (block->index != NULL);
-
- if (is_hashed) {
- /* TO DO: Can we skip this if none of the fields
- index->search_info->curr_n_fields
- are being updated? */
-
- /* The function row_upd_changes_ord_field_binary works only
- if the update vector was built for a clustered index, we must
- NOT call it if index is secondary */
-
- if (!dict_index_is_clust(index)
- || row_upd_changes_ord_field_binary(index, update, thr,
- NULL, NULL)) {
+#ifdef BTR_CUR_HASH_ADAPT
+ {
+ rw_lock_t* ahi_latch = block->index
+ ? btr_get_search_latch(index) : NULL;
+ if (ahi_latch) {
+ /* TO DO: Can we skip this if none of the fields
+ index->search_info->curr_n_fields
+ are being updated? */
+
+ /* The function row_upd_changes_ord_field_binary
+ does not work on a secondary index. */
+
+ if (!dict_index_is_clust(index)
+ || row_upd_changes_ord_field_binary(
+ index, update, thr, NULL, NULL)) {
+
+ /* Remove possible hash index pointer
+ to this record */
+ btr_search_update_hash_on_delete(cursor);
+ }
- /* Remove possible hash index pointer to this record */
- btr_search_update_hash_on_delete(cursor);
+ rw_lock_x_lock(ahi_latch);
}
- rw_lock_x_lock(&btr_search_latch);
- }
+ assert_block_ahi_valid(block);
+#endif /* BTR_CUR_HASH_ADAPT */
- row_upd_rec_in_place(rec, index, offsets, update, page_zip);
+ row_upd_rec_in_place(rec, index, offsets, update, page_zip);
- if (is_hashed) {
- rw_lock_x_unlock(&btr_search_latch);
+#ifdef BTR_CUR_HASH_ADAPT
+ if (ahi_latch) {
+ rw_lock_x_unlock(ahi_latch);
+ }
}
+#endif /* BTR_CUR_HASH_ADAPT */
btr_cur_update_in_place_log(flags, rec, index, update,
trx_id, roll_ptr, mtr);
@@ -2156,12 +3914,10 @@ func_exit:
if (page_zip
&& !(flags & BTR_KEEP_IBUF_BITMAP)
&& !dict_index_is_clust(index)
- && block) {
- buf_frame_t* frame = buf_block_get_frame(block);
- if (frame && page_is_leaf(frame)) {
- /* Update the free bits in the insert buffer. */
- ibuf_update_free_bits_zip(block, mtr);
- }
+ && page_is_leaf(buf_block_get_frame(block))) {
+ /* Update the free bits in the insert buffer. */
+ ut_ad(!dict_table_is_temporary(index->table));
+ ibuf_update_free_bits_zip(block, mtr);
}
return(err);
@@ -2179,7 +3935,6 @@ fields of the record do not change.
@retval DB_UNDERFLOW if the page would become too empty
@retval DB_ZIP_OVERFLOW if there is not enough space left
on the compressed page (IBUF_BITMAP_FREE was reset outside mtr) */
-UNIV_INTERN
dberr_t
btr_cur_optimistic_update(
/*======================*/
@@ -2187,7 +3942,7 @@ btr_cur_optimistic_update(
btr_cur_t* cursor, /*!< in: cursor on the record to update;
cursor stays valid and positioned on the
same record */
- ulint** offsets,/*!< out: offsets on cursor->page_cur.rec */
+ offset_t** offsets,/*!< out: offsets on cursor->page_cur.rec */
mem_heap_t** heap, /*!< in/out: pointer to NULL or memory heap */
const upd_t* update, /*!< in: update vector; this must also
contain trx id and roll ptr fields */
@@ -2214,39 +3969,34 @@ btr_cur_optimistic_update(
dtuple_t* new_entry;
roll_ptr_t roll_ptr;
ulint i;
- ulint n_ext;
block = btr_cur_get_block(cursor);
page = buf_block_get_frame(block);
rec = btr_cur_get_rec(cursor);
index = cursor->index;
+ ut_ad(trx_id > 0 || (flags & BTR_KEEP_SYS_FLAG));
ut_ad(!!page_rec_is_comp(rec) == dict_table_is_comp(index->table));
- ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
+ ut_ad(mtr_is_block_fix(mtr, block, MTR_MEMO_PAGE_X_FIX, index->table));
+ /* This is intended only for leaf page updates */
+ ut_ad(page_is_leaf(page));
/* The insert buffer tree should never be updated in place. */
ut_ad(!dict_index_is_ibuf(index));
ut_ad(dict_index_is_online_ddl(index) == !!(flags & BTR_CREATE_FLAG)
|| dict_index_is_clust(index));
ut_ad(thr_get_trx(thr)->id == trx_id
- || (flags & ~(BTR_KEEP_POS_FLAG | BTR_KEEP_IBUF_BITMAP))
+ || (flags & ulint(~(BTR_KEEP_POS_FLAG | BTR_KEEP_IBUF_BITMAP)))
== (BTR_NO_UNDO_LOG_FLAG | BTR_NO_LOCKING_FLAG
| BTR_CREATE_FLAG | BTR_KEEP_SYS_FLAG));
- ut_ad(fil_page_get_type(page) == FIL_PAGE_INDEX);
+ ut_ad(fil_page_index_page_check(page));
ut_ad(btr_page_get_index_id(page) == index->id);
- *offsets = rec_get_offsets(rec, index, *offsets,
+ *offsets = rec_get_offsets(rec, index, *offsets, true,
ULINT_UNDEFINED, heap);
#if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG
ut_a(!rec_offs_any_null_extern(rec, *offsets)
|| trx_is_recv(thr_get_trx(thr)));
#endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */
-#ifdef UNIV_DEBUG
- if (btr_cur_print_record_ops) {
- btr_cur_trx_report(trx_id, index, "update ");
- rec_print_new(stderr, rec, *offsets);
- }
-#endif /* UNIV_DEBUG */
-
if (!row_upd_changes_field_size_or_external(index, *offsets, update)) {
/* The simplest and the most common case: the update does not
@@ -2264,6 +4014,10 @@ any_extern:
/* Externally stored fields are treated in pessimistic
update */
+ /* prefetch siblings of the leaf for the pessimistic
+ operation. */
+ btr_cur_prefetch_siblings(block);
+
return(DB_OVERFLOW);
}
@@ -2274,6 +4028,11 @@ any_extern:
}
}
+ DBUG_LOG("ib_cur",
+ "update " << index->name << " (" << index->id << ") by "
+ << ib::hex(trx_id) << ": "
+ << rec_printer(rec, *offsets).str());
+
page_cursor = btr_cur_get_page_cur(cursor);
if (!*heap) {
@@ -2282,10 +4041,8 @@ any_extern:
+ DTUPLE_EST_ALLOC(rec_offs_n_fields(*offsets)));
}
- new_entry = row_rec_to_index_entry(rec, index, *offsets,
- &n_ext, *heap);
- /* We checked above that there are no externally stored fields. */
- ut_a(!n_ext);
+ new_entry = row_rec_to_index_entry(rec, index, *offsets, *heap);
+ ut_ad(!dtuple_get_n_ext(new_entry));
/* The page containing the clustered index record
corresponding to new_entry is latched in mtr.
@@ -2301,9 +4058,11 @@ any_extern:
#endif /* UNIV_ZIP_DEBUG */
if (page_zip) {
+ ut_ad(!dict_table_is_temporary(index->table));
+
if (page_zip_rec_needs_ext(new_rec_size, page_is_comp(page),
dict_index_get_n_fields(index),
- page_zip_get_size(page_zip))) {
+ dict_table_page_size(index->table))) {
goto any_extern;
}
@@ -2317,10 +4076,10 @@ any_extern:
}
/* We limit max record size to 16k even for 64k page size. */
- if (new_rec_size >= COMPRESSED_REC_MAX_DATA_SIZE ||
- (!dict_table_is_comp(index->table)
- && new_rec_size >= REDUNDANT_REC_MAX_DATA_SIZE)) {
- err = DB_OVERFLOW;
+ if (new_rec_size >= COMPRESSED_REC_MAX_DATA_SIZE ||
+ (!dict_table_is_comp(index->table)
+ && new_rec_size >= REDUNDANT_REC_MAX_DATA_SIZE)) {
+ err = DB_OVERFLOW;
goto func_exit;
}
@@ -2337,7 +4096,7 @@ any_extern:
if (UNIV_UNLIKELY(page_get_data_size(page)
- old_rec_size + new_rec_size
- < BTR_CUR_PAGE_COMPRESS_LIMIT)) {
+ < BTR_CUR_PAGE_COMPRESS_LIMIT(index))) {
/* We may need to update the IBUF_BITMAP_FREE
bits after a reorganize that was done in
btr_cur_update_alloc_zip(). */
@@ -2355,7 +4114,8 @@ any_extern:
+ page_get_max_insert_size_after_reorganize(page, 1));
if (!page_zip) {
- max_ins_size = page_get_max_insert_size_after_reorganize(page, 1);
+ max_ins_size = page_get_max_insert_size_after_reorganize(
+ page, 1);
}
if (!(((max_size >= BTR_CUR_PAGE_REORGANIZE_LIMIT)
@@ -2388,8 +4148,9 @@ any_extern:
/* Ok, we may do the replacement. Store on the page infimum the
explicit locks on rec, before deleting rec (see the comment in
btr_cur_pessimistic_update). */
-
- lock_rec_store_on_page_infimum(block, rec);
+ if (!dict_table_is_locking_disabled(index->table)) {
+ lock_rec_store_on_page_infimum(block, rec);
+ }
btr_search_update_hash_on_delete(cursor);
@@ -2410,24 +4171,31 @@ any_extern:
ut_a(rec); /* <- We calculated above the insert would fit */
/* Restore the old explicit lock state on the record */
-
- lock_rec_restore_from_page_infimum(block, rec, block);
+ if (!dict_table_is_locking_disabled(index->table)) {
+ lock_rec_restore_from_page_infimum(block, rec, block);
+ }
page_cur_move_to_next(page_cursor);
ut_ad(err == DB_SUCCESS);
func_exit:
if (!(flags & BTR_KEEP_IBUF_BITMAP)
- && !dict_index_is_clust(index)
- && page_is_leaf(page)) {
-
+ && !dict_index_is_clust(index)) {
+ /* Update the free bits in the insert buffer. */
if (page_zip) {
+ ut_ad(!dict_table_is_temporary(index->table));
ibuf_update_free_bits_zip(block, mtr);
- } else {
+ } else if (!dict_table_is_temporary(index->table)) {
ibuf_update_free_bits_low(block, max_ins_size, mtr);
}
}
+ if (err != DB_SUCCESS) {
+ /* prefetch siblings of the leaf for the pessimistic
+ operation. */
+ btr_cur_prefetch_siblings(block);
+ }
+
return(err);
}
@@ -2447,9 +4215,6 @@ btr_cur_pess_upd_restore_supremum(
{
page_t* page;
buf_block_t* prev_block;
- ulint space;
- ulint zip_size;
- ulint prev_page_no;
page = buf_block_get_frame(block);
@@ -2459,16 +4224,14 @@ btr_cur_pess_upd_restore_supremum(
return;
}
- space = buf_block_get_space(block);
- zip_size = buf_block_get_zip_size(block);
- prev_page_no = btr_page_get_prev(page, mtr);
+ const uint32_t prev_page_no = btr_page_get_prev(page);
+
+ const page_id_t page_id(block->page.id.space(), prev_page_no);
ut_ad(prev_page_no != FIL_NULL);
- prev_block = buf_page_get_with_no_latch(space, zip_size,
- prev_page_no, mtr);
+ prev_block = buf_page_get_with_no_latch(page_id, block->page.size, mtr);
#ifdef UNIV_BTR_DEBUG
- ut_a(btr_page_get_next(prev_block->frame, mtr)
- == page_get_page_no(page));
+ ut_a(btr_page_get_next(prev_block->frame) == block->page.id.page_no());
#endif /* UNIV_BTR_DEBUG */
/* We must already have an x-latch on prev_block! */
@@ -2480,45 +4243,12 @@ btr_cur_pess_upd_restore_supremum(
}
/*************************************************************//**
-Check if the total length of the modified blob for the row is within 10%
-of the total redo log size. This constraint on the blob length is to
-avoid overwriting the redo logs beyond the last checkpoint lsn.
-@return DB_SUCCESS or DB_TOO_BIG_FOR_REDO. */
-static
-dberr_t
-btr_check_blob_limit(const big_rec_t* big_rec_vec)
-{
- const ib_uint64_t redo_size = srv_n_log_files * srv_log_file_size
- * UNIV_PAGE_SIZE;
- const ib_uint64_t redo_10p = redo_size / 10;
- ib_uint64_t total_blob_len = 0;
- dberr_t err = DB_SUCCESS;
-
- /* Calculate the total number of bytes for blob data */
- for (ulint i = 0; i < big_rec_vec->n_fields; i++) {
- total_blob_len += big_rec_vec->fields[i].len;
- }
-
- if (total_blob_len > redo_10p) {
- ib_logf(IB_LOG_LEVEL_ERROR, "The total blob data"
- " length (" UINT64PF ") is greater than"
- " 10%% of the total redo log size (" UINT64PF
- "). Please increase total redo log size.",
- total_blob_len, redo_size);
- err = DB_TOO_BIG_FOR_REDO;
- }
-
- return(err);
-}
-
-/*************************************************************//**
Performs an update of a record on a page of a tree. It is assumed
that mtr holds an x-latch on the tree and on the cursor page. If the
update is made on the leaf level, to avoid deadlocks, mtr must also
own x-latches to brothers of page, if those brothers exist. We assume
here that the ordering fields of the record do not change.
-@return DB_SUCCESS or error code */
-UNIV_INTERN
+@return DB_SUCCESS or error code */
dberr_t
btr_cur_pessimistic_update(
/*=======================*/
@@ -2527,7 +4257,7 @@ btr_cur_pessimistic_update(
btr_cur_t* cursor, /*!< in/out: cursor on the record to update;
cursor may become invalid if *big_rec == NULL
|| !(flags & BTR_KEEP_POS_FLAG) */
- ulint** offsets,/*!< out: offsets on cursor->page_cur.rec */
+ offset_t** offsets,/*!< out: offsets on cursor->page_cur.rec */
mem_heap_t** offsets_heap,
/*!< in/out: pointer to memory heap
that can be emptied */
@@ -2536,9 +4266,10 @@ btr_cur_pessimistic_update(
big_rec and the index tuple */
big_rec_t** big_rec,/*!< out: big rec vector whose fields have to
be stored externally by the caller */
- const upd_t* update, /*!< in: update vector; this is allowed also
- contain trx id and roll ptr fields, but
- the values in update vector have no effect */
+ upd_t* update, /*!< in/out: update vector; this is allowed to
+ also contain trx id and roll ptr fields.
+ Non-updated columns that are moved offpage will
+ be appended to this. */
ulint cmpl_info,/*!< in: compiler info on secondary index
updates */
que_thr_t* thr, /*!< in: query thread */
@@ -2559,7 +4290,6 @@ btr_cur_pessimistic_update(
roll_ptr_t roll_ptr;
ibool was_first;
ulint n_reserved = 0;
- ulint n_ext;
ulint max_ins_size = 0;
*offsets = NULL;
@@ -2570,18 +4300,22 @@ btr_cur_pessimistic_update(
page_zip = buf_block_get_page_zip(block);
index = cursor->index;
- ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(index),
- MTR_MEMO_X_LOCK));
- ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
+ ut_ad(mtr_memo_contains_flagged(mtr, dict_index_get_lock(index),
+ MTR_MEMO_X_LOCK |
+ MTR_MEMO_SX_LOCK));
+ ut_ad(mtr_is_block_fix(mtr, block, MTR_MEMO_PAGE_X_FIX, index->table));
#ifdef UNIV_ZIP_DEBUG
ut_a(!page_zip || page_zip_validate(page_zip, page, index));
#endif /* UNIV_ZIP_DEBUG */
+ ut_ad(!page_zip || !dict_table_is_temporary(index->table));
/* The insert buffer tree should never be updated in place. */
ut_ad(!dict_index_is_ibuf(index));
+ ut_ad(trx_id > 0
+ || (flags & BTR_KEEP_SYS_FLAG));
ut_ad(dict_index_is_online_ddl(index) == !!(flags & BTR_CREATE_FLAG)
|| dict_index_is_clust(index));
ut_ad(thr_get_trx(thr)->id == trx_id
- || (flags & ~BTR_KEEP_POS_FLAG)
+ || (flags & ulint(~BTR_KEEP_POS_FLAG))
== (BTR_NO_UNDO_LOG_FLAG | BTR_NO_LOCKING_FLAG
| BTR_CREATE_FLAG | BTR_KEEP_SYS_FLAG));
@@ -2605,49 +4339,22 @@ btr_cur_pessimistic_update(
&& optim_err != DB_ZIP_OVERFLOW
&& !dict_index_is_clust(index)
&& page_is_leaf(page)) {
+ ut_ad(!dict_table_is_temporary(index->table));
ibuf_update_free_bits_zip(block, mtr);
}
- return(err);
- }
-
- /* Do lock checking and undo logging */
- err = btr_cur_upd_lock_and_undo(flags, cursor, *offsets,
- update, cmpl_info,
- thr, mtr, &roll_ptr);
- if (err != DB_SUCCESS) {
- goto err_exit;
- }
-
- if (optim_err == DB_OVERFLOW) {
- ulint reserve_flag;
-
- /* First reserve enough free space for the file segments
- of the index tree, so that the update will not fail because
- of lack of space */
-
- ulint n_extents = cursor->tree_height / 16 + 3;
-
- if (flags & BTR_NO_UNDO_LOG_FLAG) {
- reserve_flag = FSP_CLEANING;
- } else {
- reserve_flag = FSP_NORMAL;
+ if (big_rec_vec != NULL) {
+ dtuple_big_rec_free(big_rec_vec);
}
- if (!fsp_reserve_free_extents(&n_reserved, index->space,
- n_extents, reserve_flag, mtr)) {
- err = DB_OUT_OF_FILE_SPACE;
- goto err_exit;
- }
+ return(err);
}
rec = btr_cur_get_rec(cursor);
-
- *offsets = rec_get_offsets(
- rec, index, *offsets, ULINT_UNDEFINED, offsets_heap);
+ ut_ad(rec_offs_validate(rec, index, *offsets));
dtuple_t* new_entry = row_rec_to_index_entry(
- rec, index, *offsets, &n_ext, entry_heap);
+ rec, index, *offsets, entry_heap);
/* The page containing the clustered index record
corresponding to new_entry is latched in mtr. If the
@@ -2657,14 +4364,15 @@ btr_cur_pessimistic_update(
itself. Thus the following call is safe. */
row_upd_index_replace_new_col_vals_index_pos(new_entry, index, update,
FALSE, entry_heap);
- if (!(flags & BTR_KEEP_SYS_FLAG)) {
- row_upd_index_entry_sys_field(new_entry, index, DATA_ROLL_PTR,
- roll_ptr);
- row_upd_index_entry_sys_field(new_entry, index, DATA_TRX_ID,
- trx_id);
- }
- if ((flags & BTR_NO_UNDO_LOG_FLAG) && rec_offs_any_extern(*offsets)) {
+ /* We have to set appropriate extern storage bits in the new
+ record to be inserted: we have to remember which fields were such */
+
+ ut_ad(!page_is_comp(page) || !rec_get_node_ptr_flag(rec));
+ ut_ad(rec_offs_validate(rec, index, *offsets));
+
+ if ((flags & BTR_NO_UNDO_LOG_FLAG)
+ && rec_offs_any_extern(*offsets)) {
/* We are in a transaction rollback undoing a row
update: we must free possible externally stored fields
which got new values in the update, if they are not
@@ -2673,35 +4381,24 @@ btr_cur_pessimistic_update(
update it back again. */
ut_ad(big_rec_vec == NULL);
+ ut_ad(dict_index_is_clust(index));
+ ut_ad(thr_get_trx(thr)->in_rollback);
+
+ DEBUG_SYNC_C("blob_rollback_middle");
btr_rec_free_updated_extern_fields(
- index, rec, page_zip, *offsets, update,
- trx_is_recv(thr_get_trx(thr))
- ? RB_RECOVERY : RB_NORMAL, mtr);
+ index, rec, page_zip, *offsets, update, true, mtr);
}
- /* We have to set appropriate extern storage bits in the new
- record to be inserted: we have to remember which fields were such */
+ ulint n_ext = dtuple_get_n_ext(new_entry);
- ut_ad(!page_is_comp(page) || !rec_get_node_ptr_flag(rec));
- ut_ad(rec_offs_validate(rec, index, *offsets));
- n_ext += btr_push_update_extern_fields(new_entry, update, entry_heap);
+ if (page_zip_rec_needs_ext(
+ rec_get_converted_size(index, new_entry, n_ext),
+ page_is_comp(page),
+ dict_index_get_n_fields(index),
+ block->page.size)) {
- if (page_zip) {
- ut_ad(page_is_comp(page));
- if (page_zip_rec_needs_ext(
- rec_get_converted_size(index, new_entry, n_ext),
- TRUE,
- dict_index_get_n_fields(index),
- page_zip_get_size(page_zip))) {
-
- goto make_external;
- }
- } else if (page_zip_rec_needs_ext(
- rec_get_converted_size(index, new_entry, n_ext),
- page_is_comp(page), 0, 0)) {
-make_external:
- big_rec_vec = dtuple_convert_big_rec(index, new_entry, &n_ext);
+ big_rec_vec = dtuple_convert_big_rec(index, update, new_entry, &n_ext);
if (UNIV_UNLIKELY(big_rec_vec == NULL)) {
/* We cannot goto return_after_reservations,
@@ -2726,21 +4423,42 @@ make_external:
ut_ad(flags & BTR_KEEP_POS_FLAG);
}
- if (big_rec_vec) {
+ /* Do lock checking and undo logging */
+ err = btr_cur_upd_lock_and_undo(flags, cursor, *offsets,
+ update, cmpl_info,
+ thr, mtr, &roll_ptr);
+ if (err != DB_SUCCESS) {
+ goto err_exit;
+ }
- err = btr_check_blob_limit(big_rec_vec);
+ if (optim_err == DB_OVERFLOW) {
- if (err != DB_SUCCESS) {
- if (n_reserved > 0) {
- fil_space_release_free_extents(
- index->space, n_reserved);
- }
+ /* First reserve enough free space for the file segments
+ of the index tree, so that the update will not fail because
+ of lack of space */
+
+ ulint n_extents = cursor->tree_height / 16 + 3;
+
+ if (!fsp_reserve_free_extents(
+ &n_reserved, index->space, n_extents,
+ flags & BTR_NO_UNDO_LOG_FLAG
+ ? FSP_CLEANING : FSP_NORMAL,
+ mtr)) {
+ err = DB_OUT_OF_FILE_SPACE;
goto err_exit;
}
}
+ if (!(flags & BTR_KEEP_SYS_FLAG)) {
+ row_upd_index_entry_sys_field(new_entry, index, DATA_ROLL_PTR,
+ roll_ptr);
+ row_upd_index_entry_sys_field(new_entry, index, DATA_TRX_ID,
+ trx_id);
+ }
+
if (!page_zip) {
- max_ins_size = page_get_max_insert_size_after_reorganize(page, 1);
+ max_ins_size = page_get_max_insert_size_after_reorganize(
+ page, 1);
}
/* Store state of explicit locks on rec on the page infimum record,
@@ -2751,8 +4469,9 @@ make_external:
btr_root_raise_and_insert. Therefore we cannot in the lock system
delete the lock structs set on the root page even if the root
page carries just node pointers. */
-
- lock_rec_store_on_page_infimum(block, rec);
+ if (!dict_table_is_locking_disabled(index->table)) {
+ lock_rec_store_on_page_infimum(block, rec);
+ }
btr_search_update_hash_on_delete(cursor);
@@ -2771,14 +4490,20 @@ make_external:
if (rec) {
page_cursor->rec = rec;
- lock_rec_restore_from_page_infimum(btr_cur_get_block(cursor),
- rec, block);
+ if (!dict_table_is_locking_disabled(index->table)) {
+ lock_rec_restore_from_page_infimum(
+ btr_cur_get_block(cursor), rec, block);
+ }
if (!rec_get_deleted_flag(rec, rec_offs_comp(*offsets))) {
/* The new inserted record owns its possible externally
stored fields */
btr_cur_unmark_extern_fields(
page_zip, rec, index, *offsets, mtr);
+ } else {
+ /* In delete-marked records, DB_TRX_ID must
+ always refer to an existing undo log record. */
+ ut_ad(row_get_rec_trx_id(rec, index, *offsets));
}
bool adjust = big_rec_vec && (flags & BTR_KEEP_POS_FLAG);
@@ -2790,18 +4515,30 @@ make_external:
}
} else if (!dict_index_is_clust(index)
&& page_is_leaf(page)) {
-
/* Update the free bits in the insert buffer.
This is the same block which was skipped by
BTR_KEEP_IBUF_BITMAP. */
if (page_zip) {
+ ut_ad(!dict_table_is_temporary(index->table));
ibuf_update_free_bits_zip(block, mtr);
- } else {
+ } else if (!dict_table_is_temporary(index->table)) {
ibuf_update_free_bits_low(block, max_ins_size,
mtr);
}
}
+ if (!srv_read_only_mode
+ && !big_rec_vec
+ && page_is_leaf(page)
+ && !dict_index_is_online_ddl(index)) {
+
+ mtr_memo_release(mtr, dict_index_get_lock(index),
+ MTR_MEMO_X_LOCK | MTR_MEMO_SX_LOCK);
+
+ /* NOTE: We cannot release root block latch here, because it
+ has segment header and already modified in most of cases.*/
+ }
+
err = DB_SUCCESS;
goto return_after_reservations;
} else {
@@ -2815,24 +4552,31 @@ make_external:
/* Out of space: reset the free bits.
This is the same block which was skipped by
BTR_KEEP_IBUF_BITMAP. */
- if (!dict_index_is_clust(index) && page_is_leaf(page)) {
+ if (!dict_index_is_clust(index)
+ && !dict_table_is_temporary(index->table)
+ && page_is_leaf(page)) {
ibuf_reset_free_bits(block);
}
}
- if (big_rec_vec) {
+ if (big_rec_vec != NULL) {
ut_ad(page_is_leaf(page));
ut_ad(dict_index_is_clust(index));
ut_ad(flags & BTR_KEEP_POS_FLAG);
/* btr_page_split_and_insert() in
btr_cur_pessimistic_insert() invokes
- mtr_memo_release(mtr, index->lock, MTR_MEMO_X_LOCK).
+ mtr_memo_release(mtr, index->lock, MTR_MEMO_SX_LOCK).
We must keep the index->lock when we created a
big_rec, so that row_upd_clust_rec() can store the
big_rec in the same mini-transaction. */
- mtr_x_lock(dict_index_get_lock(index), mtr);
+ ut_ad(mtr_memo_contains_flagged(mtr,
+ dict_index_get_lock(index),
+ MTR_MEMO_X_LOCK |
+ MTR_MEMO_SX_LOCK));
+
+ mtr_sx_lock(dict_index_get_lock(index), mtr);
}
/* Was the record to be updated positioned as the first user
@@ -2856,7 +4600,12 @@ make_external:
ut_ad(rec_offs_validate(rec, cursor->index, *offsets));
page_cursor->rec = rec;
- if (dict_index_is_sec_or_ibuf(index)) {
+ /* Multiple transactions cannot simultaneously operate on the
+ same temp-table in parallel.
+ max_trx_id is ignored for temp tables because it not required
+ for MVCC. */
+ if (dict_index_is_sec_or_ibuf(index)
+ && !dict_table_is_temporary(index->table)) {
/* Update PAGE_MAX_TRX_ID in the index page header.
It was not updated by btr_cur_pessimistic_insert()
because of BTR_NO_LOCKING_FLAG. */
@@ -2882,17 +4631,23 @@ make_external:
btr_cur_unmark_extern_fields(page_zip,
rec, index, *offsets, mtr);
+ } else {
+ /* In delete-marked records, DB_TRX_ID must
+ always refer to an existing undo log record. */
+ ut_ad(row_get_rec_trx_id(rec, index, *offsets));
}
- lock_rec_restore_from_page_infimum(btr_cur_get_block(cursor),
- rec, block);
+ if (!dict_table_is_locking_disabled(index->table)) {
+ lock_rec_restore_from_page_infimum(
+ btr_cur_get_block(cursor), rec, block);
+ }
/* If necessary, restore also the correct lock state for a new,
preceding supremum record created in a page split. While the old
record was nonexistent, the supremum might have inherited its locks
from a wrong record. */
- if (!was_first) {
+ if (!was_first && !dict_table_is_locking_disabled(index->table)) {
btr_cur_pess_upd_restore_supremum(btr_cur_get_block(cursor),
rec, mtr);
}
@@ -2929,6 +4684,7 @@ btr_cur_del_mark_set_clust_rec_log(
byte* log_ptr;
ut_ad(!!page_rec_is_comp(rec) == dict_table_is_comp(index->table));
+ ut_ad(mtr->is_named_space(index->space));
log_ptr = mlog_open_and_write_index(mtr, rec, index,
page_rec_is_comp(rec)
@@ -2952,13 +4708,11 @@ btr_cur_del_mark_set_clust_rec_log(
mlog_close(mtr, log_ptr);
}
-#endif /* !UNIV_HOTBACKUP */
/****************************************************************//**
Parses the redo log record for delete marking or unmarking of a clustered
index record.
-@return end of log record or NULL */
-UNIV_INTERN
+@return end of log record or NULL */
byte*
btr_cur_parse_del_mark_set_clust_rec(
/*=================================*/
@@ -3006,51 +4760,72 @@ btr_cur_parse_del_mark_set_clust_rec(
ut_a(offset <= UNIV_PAGE_SIZE);
+ /* In delete-marked records, DB_TRX_ID must
+ always refer to an existing undo log record. */
+ ut_ad(trx_id || (flags & BTR_KEEP_SYS_FLAG));
+
if (page) {
rec = page + offset;
- /* We do not need to reserve btr_search_latch, as the page
+ /* We do not need to reserve search latch, as the page
is only being recovered, and there cannot be a hash index to
it. Besides, these fields are being updated in place
and the adaptive hash index does not depend on them. */
btr_rec_set_deleted_flag(rec, page_zip, val);
+ /* pos is the offset of DB_TRX_ID in the clustered index.
+ Debug assertions may also access DB_ROLL_PTR at pos+1.
+ Therefore, we must compute offsets for the first pos+2
+ clustered index fields. */
+ ut_ad(pos <= MAX_REF_PARTS);
- if (!(flags & BTR_KEEP_SYS_FLAG)) {
- mem_heap_t* heap = NULL;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- rec_offs_init(offsets_);
+ offset_t offsets[REC_OFFS_HEADER_SIZE + MAX_REF_PARTS + 2];
+ rec_offs_init(offsets);
+ mem_heap_t* heap = NULL;
+ if (!(flags & BTR_KEEP_SYS_FLAG)) {
row_upd_rec_sys_fields_in_recovery(
rec, page_zip,
- rec_get_offsets(rec, index, offsets_,
- ULINT_UNDEFINED, &heap),
+ rec_get_offsets(rec, index, offsets, true,
+ pos + 2, &heap),
pos, trx_id, roll_ptr);
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
+ } else {
+ /* In delete-marked records, DB_TRX_ID must
+ always refer to an existing undo log record. */
+ ut_ad(memcmp(rec_get_nth_field(
+ rec,
+ rec_get_offsets(rec, index,
+ offsets, true,
+ pos, &heap),
+ pos, &offset),
+ field_ref_zero, DATA_TRX_ID_LEN));
+ ut_ad(offset == DATA_TRX_ID_LEN);
+ }
+
+ if (UNIV_LIKELY_NULL(heap)) {
+ mem_heap_free(heap);
}
}
return(ptr);
}
-#ifndef UNIV_HOTBACKUP
/***********************************************************//**
Marks a clustered index record deleted. Writes an undo log record to
undo log on this delete marking. Writes in the trx id field the id
of the deleting transaction, and in the roll ptr field pointer to the
undo log record created.
-@return DB_SUCCESS, DB_LOCK_WAIT, or error number */
-UNIV_INTERN
+@return DB_SUCCESS, DB_LOCK_WAIT, or error number */
dberr_t
btr_cur_del_mark_set_clust_rec(
/*===========================*/
buf_block_t* block, /*!< in/out: buffer block of the record */
rec_t* rec, /*!< in/out: record */
dict_index_t* index, /*!< in: clustered index of the record */
- const ulint* offsets,/*!< in: rec_get_offsets(rec) */
+ const offset_t* offsets,/*!< in: rec_get_offsets(rec) */
que_thr_t* thr, /*!< in: query thread */
+ const dtuple_t* entry, /*!< in: dtuple for the deleting record, also
+ contains the virtual cols if there are any */
mtr_t* mtr) /*!< in/out: mini-transaction */
{
roll_ptr_t roll_ptr;
@@ -3062,17 +4837,16 @@ btr_cur_del_mark_set_clust_rec(
ut_ad(rec_offs_validate(rec, index, offsets));
ut_ad(!!page_rec_is_comp(rec) == dict_table_is_comp(index->table));
ut_ad(buf_block_get_frame(block) == page_align(rec));
- ut_ad(page_is_leaf(page_align(rec)));
+ ut_ad(page_rec_is_leaf(rec));
+ ut_ad(mtr->is_named_space(index->space));
-#ifdef UNIV_DEBUG
- if (btr_cur_print_record_ops) {
- btr_cur_trx_report(thr_get_trx(thr)->id, index, "del mark ");
- rec_print_new(stderr, rec, offsets);
+ if (rec_get_deleted_flag(rec, rec_offs_comp(offsets))) {
+ /* We may already have delete-marked this record
+ when executing an ON DELETE CASCADE operation. */
+ ut_ad(row_get_rec_trx_id(rec, index, offsets)
+ == thr_get_trx(thr)->id);
+ return(DB_SUCCESS);
}
-#endif /* UNIV_DEBUG */
-
- ut_ad(dict_index_is_clust(index));
- ut_ad(!rec_get_deleted_flag(rec, rec_offs_comp(offsets)));
err = lock_clust_rec_modify_check_and_lock(BTR_NO_LOCKING_FLAG, block,
rec, index, offsets, thr);
@@ -3082,25 +4856,30 @@ btr_cur_del_mark_set_clust_rec(
return(err);
}
- err = trx_undo_report_row_operation(thr,
- index, NULL, NULL, 0, rec, offsets,
+ err = trx_undo_report_row_operation(thr, index,
+ entry, NULL, 0, rec, offsets,
&roll_ptr);
if (err != DB_SUCCESS) {
return(err);
}
- /* The btr_search_latch is not needed here, because
+ /* The search latch is not needed here, because
the adaptive hash index does not depend on the delete-mark
and the delete-mark is being updated in place. */
page_zip = buf_block_get_page_zip(block);
- btr_blob_dbg_set_deleted_flag(rec, index, offsets, TRUE);
btr_rec_set_deleted_flag(rec, page_zip, TRUE);
trx = thr_get_trx(thr);
+ DBUG_LOG("ib_cur",
+ "delete-mark clust " << index->table->name
+ << " (" << index->id << ") by "
+ << ib::hex(trx_get_id_for_print(trx)) << ": "
+ << rec_printer(rec, offsets).str());
+
if (dict_index_is_online_ddl(index)) {
row_log_table_delete(rec, index, offsets, NULL);
}
@@ -3145,13 +4924,11 @@ btr_cur_del_mark_set_sec_rec_log(
mlog_close(mtr, log_ptr);
}
-#endif /* !UNIV_HOTBACKUP */
/****************************************************************//**
Parses the redo log record for delete marking or unmarking of a secondary
index record.
-@return end of log record or NULL */
-UNIV_INTERN
+@return end of log record or NULL */
byte*
btr_cur_parse_del_mark_set_sec_rec(
/*===============================*/
@@ -3180,7 +4957,7 @@ btr_cur_parse_del_mark_set_sec_rec(
if (page) {
rec = page + offset;
- /* We do not need to reserve btr_search_latch, as the page
+ /* We do not need to reserve search latch, as the page
is only being recovered, and there cannot be a hash index to
it. Besides, the delete-mark flag is being updated in place
and the adaptive hash index does not depend on it. */
@@ -3191,11 +4968,9 @@ btr_cur_parse_del_mark_set_sec_rec(
return(ptr);
}
-#ifndef UNIV_HOTBACKUP
/***********************************************************//**
Sets a secondary index record delete mark to TRUE or FALSE.
-@return DB_SUCCESS, DB_LOCK_WAIT, or error number */
-UNIV_INTERN
+@return DB_SUCCESS, DB_LOCK_WAIT, or error number */
dberr_t
btr_cur_del_mark_set_sec_rec(
/*=========================*/
@@ -3212,14 +4987,6 @@ btr_cur_del_mark_set_sec_rec(
block = btr_cur_get_block(cursor);
rec = btr_cur_get_rec(cursor);
-#ifdef UNIV_DEBUG
- if (btr_cur_print_record_ops) {
- btr_cur_trx_report(thr_get_trx(thr)->id, cursor->index,
- "del mark ");
- rec_print(stderr, rec, cursor->index);
- }
-#endif /* UNIV_DEBUG */
-
err = lock_sec_rec_modify_check_and_lock(flags,
btr_cur_get_block(cursor),
rec, cursor->index, thr, mtr);
@@ -3231,7 +4998,15 @@ btr_cur_del_mark_set_sec_rec(
ut_ad(!!page_rec_is_comp(rec)
== dict_table_is_comp(cursor->index->table));
- /* We do not need to reserve btr_search_latch, as the
+ DBUG_PRINT("ib_cur", ("delete-mark=%u sec %u:%u:%u in %s("
+ IB_ID_FMT ") by " TRX_ID_FMT,
+ unsigned(val),
+ block->page.id.space(), block->page.id.page_no(),
+ unsigned(page_rec_get_heap_no(rec)),
+ cursor->index->name(), cursor->index->id,
+ trx_get_id_for_print(thr_get_trx(thr))));
+
+ /* We do not need to reserve search latch, as the
delete-mark flag is being updated in place and the adaptive
hash index does not depend on it. */
btr_rec_set_deleted_flag(rec, buf_block_get_page_zip(block), val);
@@ -3244,7 +5019,6 @@ btr_cur_del_mark_set_sec_rec(
/***********************************************************//**
Sets a secondary index record's delete mark to the given value. This
function is only used by the insert buffer merge mechanism. */
-UNIV_INTERN
void
btr_cur_set_deleted_flag_for_ibuf(
/*==============================*/
@@ -3256,7 +5030,7 @@ btr_cur_set_deleted_flag_for_ibuf(
ibool val, /*!< in: value to set */
mtr_t* mtr) /*!< in/out: mini-transaction */
{
- /* We do not need to reserve btr_search_latch, as the page
+ /* We do not need to reserve search latch, as the page
has just been read to the buffer pool and there cannot be
a hash index to it. Besides, the delete-mark flag is being
updated in place and the adaptive hash index does not depend
@@ -3275,8 +5049,7 @@ that mtr holds an x-latch on the tree and on the cursor page. To avoid
deadlocks, mtr must also own x-latches to brothers of page, if those
brothers exist. NOTE: it is assumed that the caller has reserved enough
free extents so that the compression will always succeed if done!
-@return TRUE if compression occurred */
-UNIV_INTERN
+@return TRUE if compression occurred */
ibool
btr_cur_compress_if_useful(
/*=======================*/
@@ -3287,11 +5060,27 @@ btr_cur_compress_if_useful(
cursor position even if compression occurs */
mtr_t* mtr) /*!< in/out: mini-transaction */
{
- ut_ad(mtr_memo_contains(mtr,
- dict_index_get_lock(btr_cur_get_index(cursor)),
- MTR_MEMO_X_LOCK));
- ut_ad(mtr_memo_contains(mtr, btr_cur_get_block(cursor),
- MTR_MEMO_PAGE_X_FIX));
+ ut_ad(mtr_memo_contains_flagged(
+ mtr, dict_index_get_lock(btr_cur_get_index(cursor)),
+ MTR_MEMO_X_LOCK | MTR_MEMO_SX_LOCK));
+ ut_ad(mtr_is_block_fix(
+ mtr, btr_cur_get_block(cursor),
+ MTR_MEMO_PAGE_X_FIX, cursor->index->table));
+
+ if (dict_index_is_spatial(cursor->index)) {
+ const page_t* page = btr_cur_get_page(cursor);
+ const trx_t* trx = NULL;
+
+ if (cursor->rtr_info->thr != NULL) {
+ trx = thr_get_trx(cursor->rtr_info->thr);
+ }
+
+ /* Check whether page lock prevents the compression */
+ if (!lock_test_prdt_page_lock(trx, page_get_space_id(page),
+ page_get_page_no(page))) {
+ return(false);
+ }
+ }
return(btr_cur_compress_recommendation(cursor, mtr)
&& btr_compress(cursor, adjust, mtr));
@@ -3301,8 +5090,7 @@ btr_cur_compress_if_useful(
Removes the record on which the tree cursor is positioned on a leaf page.
It is assumed that the mtr has an x-latch on the page where the cursor is
positioned, but no latch on the whole tree.
-@return TRUE if success, i.e., the page did not become too empty */
-UNIV_INTERN
+@return TRUE if success, i.e., the page did not become too empty */
ibool
btr_cur_optimistic_delete_func(
/*===========================*/
@@ -3321,25 +5109,29 @@ btr_cur_optimistic_delete_func(
buf_block_t* block;
rec_t* rec;
mem_heap_t* heap = NULL;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- ulint* offsets = offsets_;
+ offset_t offsets_[REC_OFFS_NORMAL_SIZE];
+ offset_t* offsets = offsets_;
ibool no_compress_needed;
rec_offs_init(offsets_);
ut_ad(flags == 0 || flags == BTR_CREATE_FLAG);
ut_ad(mtr_memo_contains(mtr, btr_cur_get_block(cursor),
MTR_MEMO_PAGE_X_FIX));
+ ut_ad(mtr->is_named_space(cursor->index->space));
+ ut_ad(!cursor->index->is_dummy);
+
/* This is intended only for leaf page deletions */
block = btr_cur_get_block(cursor);
+ ut_ad(block->page.id.space() == cursor->index->space);
ut_ad(page_is_leaf(buf_block_get_frame(block)));
ut_ad(!dict_index_is_online_ddl(cursor->index)
|| dict_index_is_clust(cursor->index)
|| (flags & BTR_CREATE_FLAG));
rec = btr_cur_get_rec(cursor);
- offsets = rec_get_offsets(rec, cursor->index, offsets,
+ offsets = rec_get_offsets(rec, cursor->index, offsets, true,
ULINT_UNDEFINED, &heap);
no_compress_needed = !rec_offs_any_extern(offsets)
@@ -3381,12 +5173,16 @@ btr_cur_optimistic_delete_func(
/* The change buffer does not handle inserts
into non-leaf pages, into clustered indexes,
or into the change buffer. */
- if (page_is_leaf(page)
- && !dict_index_is_clust(cursor->index)
+ if (!dict_index_is_clust(cursor->index)
+ && !dict_table_is_temporary(cursor->index->table)
&& !dict_index_is_ibuf(cursor->index)) {
ibuf_update_free_bits_low(block, max_ins, mtr);
}
}
+ } else {
+ /* prefetch siblings of the leaf for the pessimistic
+ operation. */
+ btr_cur_prefetch_siblings(block);
}
if (UNIV_LIKELY_NULL(heap)) {
@@ -3403,8 +5199,8 @@ or if it is the only page on the level. It is assumed that mtr holds
an x-latch on the tree and on the cursor page. To avoid deadlocks,
mtr must also own x-latches to brothers of page, if those brothers
exist.
-@return TRUE if compression occurred */
-UNIV_INTERN
+@return TRUE if compression occurred and FALSE if not or something
+wrong. */
ibool
btr_cur_pessimistic_delete(
/*=======================*/
@@ -3422,7 +5218,7 @@ btr_cur_pessimistic_delete(
stays valid: it points to successor of
deleted record on function exit */
ulint flags, /*!< in: BTR_CREATE_FLAG or 0 */
- enum trx_rb_ctx rb_ctx, /*!< in: rollback context */
+ bool rollback,/*!< in: performing rollback? */
mtr_t* mtr) /*!< in: mtr */
{
buf_block_t* block;
@@ -3431,10 +5227,13 @@ btr_cur_pessimistic_delete(
dict_index_t* index;
rec_t* rec;
ulint n_reserved = 0;
- ibool success;
+ bool success;
ibool ret = FALSE;
mem_heap_t* heap;
- ulint* offsets;
+ offset_t* offsets;
+#ifdef UNIV_DEBUG
+ bool parent_latched = false;
+#endif /* UNIV_DEBUG */
block = btr_cur_get_block(cursor);
page = buf_block_get_frame(block);
@@ -3444,9 +5243,14 @@ btr_cur_pessimistic_delete(
ut_ad(!dict_index_is_online_ddl(index)
|| dict_index_is_clust(index)
|| (flags & BTR_CREATE_FLAG));
- ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(index),
- MTR_MEMO_X_LOCK));
+ ut_ad(mtr_memo_contains_flagged(mtr, dict_index_get_lock(index),
+ MTR_MEMO_X_LOCK
+ | MTR_MEMO_SX_LOCK));
ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
+ ut_ad(mtr->is_named_space(index->space));
+ ut_ad(!index->is_dummy);
+ ut_ad(block->page.id.space() == index->space);
+
if (!has_reserved_extents) {
/* First reserve enough free space for the file segments
of the index tree, so that the node pointer updates will
@@ -3472,24 +5276,32 @@ btr_cur_pessimistic_delete(
ut_a(!page_zip || page_zip_validate(page_zip, page, index));
#endif /* UNIV_ZIP_DEBUG */
- offsets = rec_get_offsets(rec, index, NULL, ULINT_UNDEFINED, &heap);
+ offsets = rec_get_offsets(rec, index, NULL, page_is_leaf(page),
+ ULINT_UNDEFINED, &heap);
if (rec_offs_any_extern(offsets)) {
btr_rec_free_externally_stored_fields(index,
rec, offsets, page_zip,
- rb_ctx, mtr);
+ rollback, mtr);
#ifdef UNIV_ZIP_DEBUG
ut_a(!page_zip || page_zip_validate(page_zip, page, index));
#endif /* UNIV_ZIP_DEBUG */
}
- if (flags == 0) {
- lock_update_delete(block, rec);
+ rec_t* next_rec = NULL;
+ bool min_mark_next_rec = false;
+
+ if (page_is_leaf(page)) {
+ ut_ad(!(rec_get_info_bits(rec, page_rec_is_comp(rec))
+ & REC_INFO_MIN_REC_FLAG));
+ if (flags == 0) {
+ lock_update_delete(block, rec);
+ }
}
if (UNIV_UNLIKELY(page_get_n_recs(page) < 2)
&& UNIV_UNLIKELY(dict_index_get_page(index)
- != buf_block_get_page_no(block))) {
+ != block->page.id.page_no())) {
/* If there is only one record, drop the whole page in
btr_discard_page, if this is not the root page */
@@ -3497,62 +5309,128 @@ btr_cur_pessimistic_delete(
btr_discard_page(cursor, mtr);
ret = TRUE;
-
goto return_after_reservations;
}
- if (!page_is_leaf(page)
- && UNIV_UNLIKELY(rec == page_rec_get_next(
- page_get_infimum_rec(page)))) {
-
- rec_t* next_rec = page_rec_get_next(rec);
-
- if (btr_page_get_prev(page, mtr) == FIL_NULL) {
+ if (page_is_leaf(page)) {
+ btr_search_update_hash_on_delete(cursor);
+ } else if (UNIV_UNLIKELY(page_rec_is_first(rec, page))) {
+ next_rec = page_rec_get_next(rec);
+ if (!page_has_prev(page)) {
/* If we delete the leftmost node pointer on a
non-leaf level, we must mark the new leftmost node
pointer as the predefined minimum record */
- /* This will make page_zip_validate() fail until
- page_cur_delete_rec() completes. This is harmless,
- because everything will take place within a single
- mini-transaction and because writing to the redo log
- is an atomic operation (performed by mtr_commit()). */
- btr_set_min_rec_mark(next_rec, mtr);
+ min_mark_next_rec = true;
+ } else if (dict_index_is_spatial(index)) {
+ /* For rtree, if delete the leftmost node pointer,
+ we need to update parent page. */
+ rtr_mbr_t father_mbr;
+ rec_t* father_rec;
+ btr_cur_t father_cursor;
+ offset_t* offsets;
+ bool upd_ret;
+ ulint len;
+
+ rtr_page_get_father_block(NULL, heap, index,
+ block, mtr, NULL,
+ &father_cursor);
+ offsets = rec_get_offsets(
+ btr_cur_get_rec(&father_cursor), index, NULL,
+ false, ULINT_UNDEFINED, &heap);
+
+ father_rec = btr_cur_get_rec(&father_cursor);
+ rtr_read_mbr(rec_get_nth_field(
+ father_rec, offsets, 0, &len), &father_mbr);
+
+ upd_ret = rtr_update_mbr_field(&father_cursor, offsets,
+ NULL, page, &father_mbr,
+ next_rec, mtr);
+
+ if (!upd_ret) {
+ *err = DB_ERROR;
+
+ mem_heap_free(heap);
+ return(FALSE);
+ }
+
+ ut_d(parent_latched = true);
} else {
/* Otherwise, if we delete the leftmost node pointer
- on a page, we have to change the father node pointer
+ on a page, we have to change the parent node pointer
so that it is equal to the new leftmost node pointer
on the page */
ulint level = btr_page_get_level(page, mtr);
- btr_node_ptr_delete(index, block, mtr);
-
+ btr_cur_t cursor;
+ btr_page_get_father(index, block, mtr, &cursor);
+ btr_cur_node_ptr_delete(&cursor, mtr);
+ // FIXME: reuse the node_ptr from above
dtuple_t* node_ptr = dict_index_build_node_ptr(
- index, next_rec, buf_block_get_page_no(block),
+ index, next_rec, block->page.id.page_no(),
heap, level);
btr_insert_on_non_leaf_level(
flags, index, level + 1, node_ptr, mtr);
+
+ ut_d(parent_latched = true);
}
}
- btr_search_update_hash_on_delete(cursor);
+ /* SPATIAL INDEX never use SX locks; we can allow page merges
+ while holding X lock on the spatial index tree.
+ Do not allow merges of non-leaf B-tree pages unless it is
+ safe to do so. */
+ {
+ const bool allow_merge = page_is_leaf(page)
+ || dict_index_is_spatial(index)
+ || btr_cur_will_modify_tree(
+ index, page, BTR_INTENTION_DELETE, rec,
+ btr_node_ptr_max_size(index),
+ block->page.size, mtr);
+ page_cur_delete_rec(btr_cur_get_page_cur(cursor), index,
+ offsets, mtr);
+
+ if (min_mark_next_rec) {
+ btr_set_min_rec_mark(next_rec, mtr);
+ }
- page_cur_delete_rec(btr_cur_get_page_cur(cursor), index, offsets, mtr);
#ifdef UNIV_ZIP_DEBUG
- ut_a(!page_zip || page_zip_validate(page_zip, page, index));
+ ut_a(!page_zip || page_zip_validate(page_zip, page, index));
#endif /* UNIV_ZIP_DEBUG */
- ut_ad(btr_check_node_ptr(index, block, mtr));
+ ut_ad(!parent_latched
+ || btr_check_node_ptr(index, block, mtr));
+
+ if (!ret && btr_cur_compress_recommendation(cursor, mtr)) {
+ if (UNIV_LIKELY(allow_merge)) {
+ ret = btr_cur_compress_if_useful(
+ cursor, FALSE, mtr);
+ } else {
+ ib::warn() << "Not merging page "
+ << block->page.id
+ << " in index " << index->name
+ << " of " << index->table->name;
+ ut_ad(!"MDEV-14637");
+ }
+ }
+ }
return_after_reservations:
*err = DB_SUCCESS;
mem_heap_free(heap);
- if (ret == FALSE) {
- ret = btr_cur_compress_if_useful(cursor, FALSE, mtr);
+ if (!srv_read_only_mode
+ && page_is_leaf(page)
+ && !dict_index_is_online_ddl(index)) {
+
+ mtr_memo_release(mtr, dict_index_get_lock(index),
+ MTR_MEMO_X_LOCK | MTR_MEMO_SX_LOCK);
+
+ /* NOTE: We cannot release root block latch here, because it
+ has segment header and already modified in most of cases.*/
}
if (n_reserved > 0) {
@@ -3562,6 +5440,23 @@ return_after_reservations:
return(ret);
}
+/** Delete the node pointer in a parent page.
+@param[in,out] parent cursor pointing to parent record
+@param[in,out] mtr mini-transaction */
+void btr_cur_node_ptr_delete(btr_cur_t* parent, mtr_t* mtr)
+{
+ ut_ad(mtr_memo_contains(mtr, btr_cur_get_block(parent),
+ MTR_MEMO_PAGE_X_FIX));
+ dberr_t err;
+ ibool compressed = btr_cur_pessimistic_delete(&err, TRUE, parent,
+ BTR_CREATE_FLAG, false,
+ mtr);
+ ut_a(err == DB_SUCCESS);
+ if (!compressed) {
+ btr_cur_compress_if_useful(parent, FALSE, mtr);
+ }
+}
+
/*******************************************************************//**
Adds path information to the cursor for the current page, for which
the binary search has been performed. */
@@ -3618,63 +5513,63 @@ so far and assume that all pages that we did not scan up to slot2->page
contain the same number of records, then we multiply that average to
the number of pages between slot1->page and slot2->page (which is
n_rows_on_prev_level). In this case we set is_n_rows_exact to FALSE.
-@return number of rows (exact or estimated) */
+@return number of rows, not including the borders (exact or estimated) */
static
-ib_int64_t
+int64_t
btr_estimate_n_rows_in_range_on_level(
/*==================================*/
dict_index_t* index, /*!< in: index */
btr_path_t* slot1, /*!< in: left border */
btr_path_t* slot2, /*!< in: right border */
- ib_int64_t n_rows_on_prev_level, /*!< in: number of rows
+ int64_t n_rows_on_prev_level, /*!< in: number of rows
on the previous level for the
same descend paths; used to
- determine the numbe of pages
+ determine the number of pages
on this level */
ibool* is_n_rows_exact) /*!< out: TRUE if the returned
value is exact i.e. not an
estimation */
{
- ulint space;
- ib_int64_t n_rows;
+ int64_t n_rows;
ulint n_pages_read;
- ulint page_no;
- ulint zip_size;
ulint level;
- space = dict_index_get_space(index);
-
n_rows = 0;
n_pages_read = 0;
/* Assume by default that we will scan all pages between
- slot1->page_no and slot2->page_no */
+ slot1->page_no and slot2->page_no. */
*is_n_rows_exact = TRUE;
- /* add records from slot1->page_no which are to the right of
- the record which serves as a left border of the range, if any */
- if (slot1->nth_rec < slot1->n_recs) {
+ /* Add records from slot1->page_no which are to the right of
+ the record which serves as a left border of the range, if any
+ (we don't include the record itself in this count). */
+ if (slot1->nth_rec <= slot1->n_recs) {
n_rows += slot1->n_recs - slot1->nth_rec;
}
- /* add records from slot2->page_no which are to the left of
- the record which servers as a right border of the range, if any */
+ /* Add records from slot2->page_no which are to the left of
+ the record which servers as a right border of the range, if any
+ (we don't include the record itself in this count). */
if (slot2->nth_rec > 1) {
n_rows += slot2->nth_rec - 1;
}
- /* count the records in the pages between slot1->page_no and
- slot2->page_no (non inclusive), if any */
-
- zip_size = fil_space_get_zip_size(space);
+ /* Count the records in the pages between slot1->page_no and
+ slot2->page_no (non inclusive), if any. */
/* Do not read more than this number of pages in order not to hurt
performance with this code which is just an estimation. If we read
this many pages before reaching slot2->page_no then we estimate the
- average from the pages scanned so far */
+ average from the pages scanned so far. */
# define N_PAGES_READ_LIMIT 10
- page_no = slot1->page_no;
+ page_id_t page_id(
+ dict_index_get_space(index), slot1->page_no);
+ const fil_space_t* space = fil_space_get(index->space);
+ ut_ad(space);
+ const page_size_t page_size(space->flags);
+
level = slot1->page_level;
do {
@@ -3690,7 +5585,7 @@ btr_estimate_n_rows_in_range_on_level(
attempting to read a page that is no longer part of
the B-tree. We pass BUF_GET_POSSIBLY_FREED in order to
silence a debug assertion about this. */
- block = buf_page_get_gen(space, zip_size, page_no, RW_S_LATCH,
+ block = buf_page_get_gen(page_id, page_size, RW_S_LATCH,
NULL, BUF_GET_POSSIBLY_FREED,
__FILE__, __LINE__, &mtr, &err);
@@ -3703,7 +5598,7 @@ btr_estimate_n_rows_in_range_on_level(
"Table %s is encrypted but encryption service or"
" used key_id is not available. "
" Can't continue reading table.",
- index->table->name);
+ index->table->name.m_name);
index->table->file_unreadable = true;
}
@@ -3719,7 +5614,7 @@ btr_estimate_n_rows_in_range_on_level(
this is only an estimate. We are sure that a page with
page_no exists because InnoDB never frees pages, only
reuses them. */
- if (fil_page_get_type(page) != FIL_PAGE_INDEX
+ if (!fil_page_index_page_check(page)
|| btr_page_get_index_id(page) != index->id
|| btr_page_get_level_low(page) != level) {
@@ -3737,18 +5632,18 @@ btr_estimate_n_rows_in_range_on_level(
n_pages_read++;
- if (page_no != slot1->page_no) {
+ if (page_id.page_no() != slot1->page_no) {
/* Do not count the records on slot1->page_no,
we already counted them before this loop. */
n_rows += page_get_n_recs(page);
}
- page_no = btr_page_get_next(page, &mtr);
+ page_id.set_page_no(btr_page_get_next(page));
mtr_commit(&mtr);
if (n_pages_read == N_PAGES_READ_LIMIT
- || page_no == FIL_NULL) {
+ || page_id.page_no() == FIL_NULL) {
/* Either we read too many pages or
we reached the end of the level without passing
through slot2->page_no, the tree must have changed
@@ -3756,7 +5651,7 @@ btr_estimate_n_rows_in_range_on_level(
goto inexact;
}
- } while (page_no != slot2->page_no);
+ } while (page_id.page_no() != slot2->page_no);
return(n_rows);
@@ -3781,18 +5676,40 @@ inexact:
return(n_rows);
}
-/*******************************************************************//**
-Estimates the number of rows in a given index range.
-@return estimated number of rows */
-UNIV_INTERN
-ib_int64_t
-btr_estimate_n_rows_in_range(
-/*=========================*/
- dict_index_t* index, /*!< in: index */
- const dtuple_t* tuple1, /*!< in: range start, may also be empty tuple */
- ulint mode1, /*!< in: search mode for range start */
- const dtuple_t* tuple2, /*!< in: range end, may also be empty tuple */
- ulint mode2) /*!< in: search mode for range end */
+/** If the tree gets changed too much between the two dives for the left
+and right boundary then btr_estimate_n_rows_in_range_low() will retry
+that many times before giving up and returning the value stored in
+rows_in_range_arbitrary_ret_val. */
+static const unsigned rows_in_range_max_retries = 4;
+
+/** We pretend that a range has that many records if the tree keeps changing
+for rows_in_range_max_retries retries while we try to estimate the records
+in a given range. */
+static const int64_t rows_in_range_arbitrary_ret_val = 10;
+
+/** Estimates the number of rows in a given index range.
+@param[in] index index
+@param[in] tuple1 range start, may also be empty tuple
+@param[in] mode1 search mode for range start
+@param[in] tuple2 range end, may also be empty tuple
+@param[in] mode2 search mode for range end
+@param[in] nth_attempt if the tree gets modified too much while
+we are trying to analyze it, then we will retry (this function will call
+itself, incrementing this parameter)
+@return estimated number of rows; if after rows_in_range_max_retries
+retries the tree keeps changing, then we will just return
+rows_in_range_arbitrary_ret_val as a result (if
+nth_attempt >= rows_in_range_max_retries and the tree is modified between
+the two dives). */
+static
+int64_t
+btr_estimate_n_rows_in_range_low(
+ dict_index_t* index,
+ const dtuple_t* tuple1,
+ page_cur_mode_t mode1,
+ const dtuple_t* tuple2,
+ page_cur_mode_t mode2,
+ unsigned nth_attempt)
{
btr_path_t path1[BTR_PATH_ARRAY_N_SLOTS];
btr_path_t path2[BTR_PATH_ARRAY_N_SLOTS];
@@ -3802,64 +5719,160 @@ btr_estimate_n_rows_in_range(
ibool diverged;
ibool diverged_lot;
ulint divergence_level;
- ib_int64_t n_rows;
+ int64_t n_rows;
ibool is_n_rows_exact;
ulint i;
mtr_t mtr;
- ib_int64_t table_n_rows;
+ int64_t table_n_rows;
table_n_rows = dict_table_get_n_rows(index->table);
+ /* Below we dive to the two records specified by tuple1 and tuple2 and
+ we remember the entire dive paths from the tree root. The place where
+ the tuple1 path ends on the leaf level we call "left border" of our
+ interval and the place where the tuple2 path ends on the leaf level -
+ "right border". We take care to either include or exclude the interval
+ boundaries depending on whether <, <=, > or >= was specified. For
+ example if "5 < x AND x <= 10" then we should not include the left
+ boundary, but should include the right one. */
+
mtr_start(&mtr);
cursor.path_arr = path1;
+ bool should_count_the_left_border;
+
if (dtuple_get_n_fields(tuple1) > 0) {
btr_cur_search_to_nth_level(index, 0, tuple1, mode1,
BTR_SEARCH_LEAF | BTR_ESTIMATE,
&cursor, 0,
__FILE__, __LINE__, &mtr);
+
+ ut_ad(!page_rec_is_infimum(btr_cur_get_rec(&cursor)));
+
+ /* We should count the border if there are any records to
+ match the criteria, i.e. if the maximum record on the tree is
+ 5 and x > 3 is specified then the cursor will be positioned at
+ 5 and we should count the border, but if x > 7 is specified,
+ then the cursor will be positioned at 'sup' on the rightmost
+ leaf page in the tree and we should not count the border. */
+ should_count_the_left_border
+ = !page_rec_is_supremum(btr_cur_get_rec(&cursor));
} else {
- btr_cur_open_at_index_side(true, index,
+ dberr_t err = DB_SUCCESS;
+
+ err = btr_cur_open_at_index_side(true, index,
BTR_SEARCH_LEAF | BTR_ESTIMATE,
&cursor, 0, &mtr);
+
+ if (err != DB_SUCCESS) {
+ ib::warn() << " Error code: " << err
+ << " btr_estimate_n_rows_in_range_low "
+ << " called from file: "
+ << __FILE__ << " line: " << __LINE__
+ << " table: " << index->table->name
+ << " index: " << index->name;
+ }
+
+ ut_ad(page_rec_is_infimum(btr_cur_get_rec(&cursor)));
+
+ /* The range specified is wihout a left border, just
+ 'x < 123' or 'x <= 123' and btr_cur_open_at_index_side()
+ positioned the cursor on the infimum record on the leftmost
+ page, which must not be counted. */
+ should_count_the_left_border = false;
}
mtr_commit(&mtr);
- if (index->table->file_unreadable) {
- return (0);
+ if (!index->is_readable()) {
+ return 0;
}
mtr_start(&mtr);
cursor.path_arr = path2;
+ bool should_count_the_right_border;
+
if (dtuple_get_n_fields(tuple2) > 0) {
btr_cur_search_to_nth_level(index, 0, tuple2, mode2,
BTR_SEARCH_LEAF | BTR_ESTIMATE,
&cursor, 0,
__FILE__, __LINE__, &mtr);
+
+ const rec_t* rec = btr_cur_get_rec(&cursor);
+
+ ut_ad(!(mode2 == PAGE_CUR_L && page_rec_is_supremum(rec)));
+
+ should_count_the_right_border
+ = (mode2 == PAGE_CUR_LE /* if the range is '<=' */
+ /* and the record was found */
+ && cursor.low_match >= dtuple_get_n_fields(tuple2))
+ || (mode2 == PAGE_CUR_L /* or if the range is '<' */
+ /* and there are any records to match the criteria,
+ i.e. if the minimum record on the tree is 5 and
+ x < 7 is specified then the cursor will be
+ positioned at 5 and we should count the border, but
+ if x < 2 is specified, then the cursor will be
+ positioned at 'inf' and we should not count the
+ border */
+ && !page_rec_is_infimum(rec));
+ /* Notice that for "WHERE col <= 'foo'" MySQL passes to
+ ha_innobase::records_in_range():
+ min_key=NULL (left-unbounded) which is expected
+ max_key='foo' flag=HA_READ_AFTER_KEY (PAGE_CUR_G), which is
+ unexpected - one would expect
+ flag=HA_READ_KEY_OR_PREV (PAGE_CUR_LE). In this case the
+ cursor will be positioned on the first record to the right of
+ the requested one (can also be positioned on the 'sup') and
+ we should not count the right border. */
} else {
- btr_cur_open_at_index_side(false, index,
+ dberr_t err = DB_SUCCESS;
+
+ err = btr_cur_open_at_index_side(false, index,
BTR_SEARCH_LEAF | BTR_ESTIMATE,
&cursor, 0, &mtr);
+
+ if (err != DB_SUCCESS) {
+ ib::warn() << " Error code: " << err
+ << " btr_estimate_n_rows_in_range_low "
+ << " called from file: "
+ << __FILE__ << " line: " << __LINE__
+ << " table: " << index->table->name
+ << " index: " << index->name;
+ }
+
+ ut_ad(page_rec_is_supremum(btr_cur_get_rec(&cursor)));
+
+ /* The range specified is wihout a right border, just
+ 'x > 123' or 'x >= 123' and btr_cur_open_at_index_side()
+ positioned the cursor on the supremum record on the rightmost
+ page, which must not be counted. */
+ should_count_the_right_border = false;
}
mtr_commit(&mtr);
/* We have the path information for the range in path1 and path2 */
- n_rows = 1;
+ n_rows = 0;
is_n_rows_exact = TRUE;
- diverged = FALSE; /* This becomes true when the path is not
- the same any more */
- diverged_lot = FALSE; /* This becomes true when the paths are
- not the same or adjacent any more */
- divergence_level = 1000000; /* This is the level where paths diverged
- a lot */
+
+ /* This becomes true when the two paths do not pass through the
+ same pages anymore. */
+ diverged = FALSE;
+
+ /* This becomes true when the paths are not the same or adjacent
+ any more. This means that they pass through the same or
+ neighboring-on-the-same-level pages only. */
+ diverged_lot = FALSE;
+
+ /* This is the level where paths diverged a lot. */
+ divergence_level = 1000000;
+
for (i = 0; ; i++) {
ut_ad(i < BTR_PATH_ARRAY_N_SLOTS);
@@ -3869,6 +5882,70 @@ btr_estimate_n_rows_in_range(
if (slot1->nth_rec == ULINT_UNDEFINED
|| slot2->nth_rec == ULINT_UNDEFINED) {
+ /* Here none of the borders were counted. For example,
+ if on the leaf level we descended to:
+ (inf, a, b, c, d, e, f, sup)
+ ^ ^
+ path1 path2
+ then n_rows will be 2 (c and d). */
+
+ if (is_n_rows_exact) {
+ /* Only fiddle to adjust this off-by-one
+ if the number is exact, otherwise we do
+ much grosser adjustments below. */
+
+ btr_path_t* last1 = &path1[i - 1];
+ btr_path_t* last2 = &path2[i - 1];
+
+ /* If both paths end up on the same record on
+ the leaf level. */
+ if (last1->page_no == last2->page_no
+ && last1->nth_rec == last2->nth_rec) {
+
+ /* n_rows can be > 0 here if the paths
+ were first different and then converged
+ to the same record on the leaf level.
+ For example:
+ SELECT ... LIKE 'wait/synch/rwlock%'
+ mode1=PAGE_CUR_GE,
+ tuple1="wait/synch/rwlock"
+ path1[0]={nth_rec=58, n_recs=58,
+ page_no=3, page_level=1}
+ path1[1]={nth_rec=56, n_recs=55,
+ page_no=119, page_level=0}
+
+ mode2=PAGE_CUR_G
+ tuple2="wait/synch/rwlock"
+ path2[0]={nth_rec=57, n_recs=57,
+ page_no=3, page_level=1}
+ path2[1]={nth_rec=56, n_recs=55,
+ page_no=119, page_level=0} */
+
+ /* If the range is such that we should
+ count both borders, then avoid
+ counting that record twice - once as a
+ left border and once as a right
+ border. */
+ if (should_count_the_left_border
+ && should_count_the_right_border) {
+
+ n_rows = 1;
+ } else {
+ /* Some of the borders should
+ not be counted, e.g. [3,3). */
+ n_rows = 0;
+ }
+ } else {
+ if (should_count_the_left_border) {
+ n_rows++;
+ }
+
+ if (should_count_the_right_border) {
+ n_rows++;
+ }
+ }
+ }
+
if (i > divergence_level + 1 && !is_n_rows_exact) {
/* In trees whose height is > 1 our algorithm
tends to underestimate: multiply the estimate
@@ -3900,12 +5977,41 @@ btr_estimate_n_rows_in_range(
if (!diverged && slot1->nth_rec != slot2->nth_rec) {
+ /* If both slots do not point to the same page,
+ this means that the tree must have changed between
+ the dive for slot1 and the dive for slot2 at the
+ beginning of this function. */
+ if (slot1->page_no != slot2->page_no
+ || slot1->page_level != slot2->page_level) {
+
+ /* If the tree keeps changing even after a
+ few attempts, then just return some arbitrary
+ number. */
+ if (nth_attempt >= rows_in_range_max_retries) {
+ return(rows_in_range_arbitrary_ret_val);
+ }
+
+ const int64_t ret =
+ btr_estimate_n_rows_in_range_low(
+ index, tuple1, mode1,
+ tuple2, mode2, nth_attempt + 1);
+
+ return(ret);
+ }
+
diverged = TRUE;
if (slot1->nth_rec < slot2->nth_rec) {
- n_rows = slot2->nth_rec - slot1->nth_rec;
-
- if (n_rows > 1) {
+ /* We do not count the borders (nor the left
+ nor the right one), thus "- 1". */
+ n_rows = slot2->nth_rec - slot1->nth_rec - 1;
+
+ if (n_rows > 0) {
+ /* There is at least one row between
+ the two borders pointed to by slot1
+ and slot2, so on the level below the
+ slots will point to non-adjacent
+ pages. */
diverged_lot = TRUE;
divergence_level = i;
}
@@ -3917,8 +6023,10 @@ btr_estimate_n_rows_in_range(
and we select where x > 20 and x < 30;
in this case slot1->nth_rec will point
to the supr record and slot2->nth_rec
- will point to 6 */
+ will point to 6. */
n_rows = 0;
+ should_count_the_left_border = false;
+ should_count_the_right_border = false;
}
} else if (diverged && !diverged_lot) {
@@ -3949,6 +6057,27 @@ btr_estimate_n_rows_in_range(
}
}
+/** Estimates the number of rows in a given index range.
+@param[in] index index
+@param[in] tuple1 range start, may also be empty tuple
+@param[in] mode1 search mode for range start
+@param[in] tuple2 range end, may also be empty tuple
+@param[in] mode2 search mode for range end
+@return estimated number of rows */
+int64_t
+btr_estimate_n_rows_in_range(
+ dict_index_t* index,
+ const dtuple_t* tuple1,
+ page_cur_mode_t mode1,
+ const dtuple_t* tuple2,
+ page_cur_mode_t mode2)
+{
+ const int64_t ret = btr_estimate_n_rows_in_range_low(
+ index, tuple1, mode1, tuple2, mode2, 1 /* first attempt */);
+
+ return(ret);
+}
+
/*******************************************************************//**
Record the number of non_null key values in a given index for
each n-column prefix of the index where 1 <= n <= dict_index_get_n_unique(index).
@@ -3961,7 +6090,7 @@ btr_record_not_null_field_in_rec(
ulint n_unique, /*!< in: dict_index_get_n_unique(index),
number of columns uniquely determine
an index entry */
- const ulint* offsets, /*!< in: rec_get_offsets(rec, index),
+ const offset_t* offsets, /*!< in: rec_get_offsets(rec, index),
its size could be for all fields or
that of "n_unique" */
ib_uint64_t* n_not_null) /*!< in/out: array to record number of
@@ -3992,9 +6121,10 @@ The estimates are stored in the array index->stat_n_diff_key_vals[] (indexed
index->stat_n_sample_sizes[].
If innodb_stats_method is nulls_ignored, we also record the number of
non-null values for each prefix and stored the estimates in
-array index->stat_n_non_null_key_vals. */
-UNIV_INTERN
-void
+array index->stat_n_non_null_key_vals.
+@return true if the index is available and we get the estimated numbers,
+false if the index is unavailable. */
+bool
btr_estimate_number_of_different_key_vals(
/*======================================*/
dict_index_t* index) /*!< in: index */
@@ -4003,21 +6133,25 @@ btr_estimate_number_of_different_key_vals(
page_t* page;
rec_t* rec;
ulint n_cols;
- ulint matched_fields;
- ulint matched_bytes;
ib_uint64_t* n_diff;
ib_uint64_t* n_not_null;
ibool stats_null_not_equal;
- ullint n_sample_pages = 1; /* number of pages to sample */
+ uintmax_t n_sample_pages=1; /* number of pages to sample */
ulint not_empty_flag = 0;
ulint total_external_size = 0;
ulint i;
ulint j;
- ullint add_on;
+ uintmax_t add_on;
mtr_t mtr;
mem_heap_t* heap = NULL;
- ulint* offsets_rec = NULL;
- ulint* offsets_next_rec = NULL;
+ offset_t* offsets_rec = NULL;
+ offset_t* offsets_next_rec = NULL;
+
+ /* For spatial index, there is no such stats can be
+ fetched. */
+ if (dict_index_is_spatial(index)) {
+ return(false);
+ }
n_cols = dict_index_get_n_unique(index);
@@ -4028,7 +6162,7 @@ btr_estimate_number_of_different_key_vals(
+ sizeof *offsets_next_rec));
n_diff = (ib_uint64_t*) mem_heap_zalloc(
- heap, n_cols * sizeof(ib_int64_t));
+ heap, n_cols * sizeof(n_diff[0]));
n_not_null = NULL;
@@ -4053,7 +6187,7 @@ btr_estimate_number_of_different_key_vals(
default:
ut_error;
- }
+ }
if (srv_stats_sample_traditional) {
/* It makes no sense to test more pages than are contained
@@ -4102,8 +6236,8 @@ btr_estimate_number_of_different_key_vals(
*/
if (index->stat_index_size > 1) {
n_sample_pages = (srv_stats_transient_sample_pages < index->stat_index_size) ?
- ut_min(index->stat_index_size,
- ulint(log2(index->stat_index_size)*srv_stats_transient_sample_pages))
+ ut_min(static_cast<ulint>(index->stat_index_size),
+ static_cast<ulint>(log2(index->stat_index_size)*srv_stats_transient_sample_pages))
: index->stat_index_size;
}
@@ -4117,7 +6251,17 @@ btr_estimate_number_of_different_key_vals(
for (i = 0; i < n_sample_pages; i++) {
mtr_start(&mtr);
- btr_cur_open_at_rnd_pos(index, BTR_SEARCH_LEAF, &cursor, &mtr);
+ bool available;
+
+ available = btr_cur_open_at_rnd_pos(index, BTR_SEARCH_LEAF,
+ &cursor, &mtr);
+
+ if (!available) {
+ mtr_commit(&mtr);
+ mem_heap_free(heap);
+
+ return(false);
+ }
/* Count the number of different key values for each prefix of
the key on this index page. If the prefix does not determine
@@ -4125,7 +6269,7 @@ btr_estimate_number_of_different_key_vals(
because otherwise our algorithm would give a wrong estimate
for an index where there is just one key value. */
- if (index->table->file_unreadable) {
+ if (!index->is_readable()) {
mtr_commit(&mtr);
goto exit_loop;
}
@@ -4133,10 +6277,12 @@ btr_estimate_number_of_different_key_vals(
page = btr_cur_get_page(&cursor);
rec = page_rec_get_next(page_get_infimum_rec(page));
+ ut_d(const bool is_leaf = page_is_leaf(page));
if (!page_rec_is_supremum(rec)) {
not_empty_flag = 1;
offsets_rec = rec_get_offsets(rec, index, offsets_rec,
+ is_leaf,
ULINT_UNDEFINED, &heap);
if (n_not_null != NULL) {
@@ -4146,6 +6292,7 @@ btr_estimate_number_of_different_key_vals(
}
while (!page_rec_is_supremum(rec)) {
+ ulint matched_fields;
rec_t* next_rec = page_rec_get_next(rec);
if (page_rec_is_supremum(next_rec)) {
total_external_size +=
@@ -4154,18 +6301,16 @@ btr_estimate_number_of_different_key_vals(
break;
}
- matched_fields = 0;
- matched_bytes = 0;
offsets_next_rec = rec_get_offsets(next_rec, index,
offsets_next_rec,
+ is_leaf,
ULINT_UNDEFINED,
&heap);
- cmp_rec_rec_with_match(rec, next_rec,
- offsets_rec, offsets_next_rec,
- index, stats_null_not_equal,
- &matched_fields,
- &matched_bytes);
+ cmp_rec_rec(rec, next_rec,
+ offsets_rec, offsets_next_rec,
+ index, stats_null_not_equal,
+ &matched_fields);
for (j = matched_fields; j < n_cols; j++) {
/* We add one if this index record has
@@ -4188,14 +6333,14 @@ btr_estimate_number_of_different_key_vals(
and assign the old offsets_rec buffer to
offsets_next_rec. */
{
- ulint* offsets_tmp = offsets_rec;
+ offset_t* offsets_tmp = offsets_rec;
offsets_rec = offsets_next_rec;
offsets_next_rec = offsets_tmp;
}
}
-
- if (n_cols == dict_index_get_n_unique_in_tree(index)) {
+ if (n_cols == dict_index_get_n_unique_in_tree(index)
+ && page_has_siblings(page)) {
/* If there is more than one leaf page in the tree,
we add one because we know that the first record
@@ -4206,11 +6351,7 @@ btr_estimate_number_of_different_key_vals(
algorithm grossly underestimated the number of rows
in the table. */
- if (btr_page_get_prev(page, &mtr) != FIL_NULL
- || btr_page_get_next(page, &mtr) != FIL_NULL) {
-
- n_diff[n_cols - 1]++;
- }
+ n_diff[n_cols - 1]++;
}
mtr_commit(&mtr);
@@ -4264,18 +6405,20 @@ exit_loop:
}
mem_heap_free(heap);
+
+ return(true);
}
/*================== EXTERNAL STORAGE OF BIG FIELDS ===================*/
/***********************************************************//**
Gets the offset of the pointer to the externally stored part of a field.
-@return offset of the pointer to the externally stored part */
+@return offset of the pointer to the externally stored part */
static
ulint
btr_rec_get_field_ref_offs(
/*=======================*/
- const ulint* offsets,/*!< in: array returned by rec_get_offsets() */
+ const offset_t* offsets,/*!< in: array returned by rec_get_offsets() */
ulint n) /*!< in: index of the external field */
{
ulint field_ref_offs;
@@ -4290,9 +6433,9 @@ btr_rec_get_field_ref_offs(
}
/** Gets a pointer to the externally stored part of a field.
-@param rec record
-@param offsets rec_get_offsets(rec)
-@param n index of the externally stored field
+@param rec record
+@param offsets rec_get_offsets(rec)
+@param n index of the externally stored field
@return pointer to the externally stored part */
#define btr_rec_get_field_ref(rec, offsets, n) \
((rec) + btr_rec_get_field_ref_offs(offsets, n))
@@ -4300,12 +6443,11 @@ btr_rec_get_field_ref_offs(
/** Gets the externally stored size of a record, in units of a database page.
@param[in] rec record
@param[in] offsets array returned by rec_get_offsets()
-@return externally stored part, in units of a database page */
-
+@return externally stored part, in units of a database page */
ulint
btr_rec_get_externally_stored_len(
const rec_t* rec,
- const ulint* offsets)
+ const offset_t* offsets)
{
ulint n_fields;
ulint total_extern_len = 0;
@@ -4344,7 +6486,7 @@ btr_cur_set_ownership_of_extern_field(
part will be updated, or NULL */
rec_t* rec, /*!< in/out: clustered index record */
dict_index_t* index, /*!< in: index of the page */
- const ulint* offsets,/*!< in: array returned by rec_get_offsets() */
+ const offset_t* offsets,/*!< in: array returned by rec_get_offsets() */
ulint i, /*!< in: field number */
ibool val, /*!< in: value to set */
mtr_t* mtr) /*!< in: mtr, or NULL if not logged */
@@ -4362,12 +6504,12 @@ btr_cur_set_ownership_of_extern_field(
byte_val = mach_read_from_1(data + local_len + BTR_EXTERN_LEN);
if (val) {
- byte_val = byte_val & (~BTR_EXTERN_OWNER_FLAG);
+ byte_val &= ~BTR_EXTERN_OWNER_FLAG;
} else {
#if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG
ut_a(!(byte_val & BTR_EXTERN_OWNER_FLAG));
#endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */
- byte_val = byte_val | BTR_EXTERN_OWNER_FLAG;
+ byte_val |= BTR_EXTERN_OWNER_FLAG;
}
if (page_zip) {
@@ -4380,8 +6522,6 @@ btr_cur_set_ownership_of_extern_field(
} else {
mach_write_to_1(data + local_len + BTR_EXTERN_LEN, byte_val);
}
-
- btr_blob_dbg_owner(rec, index, offsets, i, val);
}
/*******************************************************************//**
@@ -4389,7 +6529,6 @@ Marks non-updated off-page fields as disowned by this record. The ownership
must be transferred to the updated record which is inserted elsewhere in the
index tree. In purge only the owner of externally stored field is allowed
to free the field. */
-UNIV_INTERN
void
btr_cur_disown_inherited_fields(
/*============================*/
@@ -4397,7 +6536,7 @@ btr_cur_disown_inherited_fields(
part will be updated, or NULL */
rec_t* rec, /*!< in/out: record in a clustered index */
dict_index_t* index, /*!< in: index of the page */
- const ulint* offsets,/*!< in: array returned by rec_get_offsets() */
+ const offset_t* offsets,/*!< in: array returned by rec_get_offsets() */
const upd_t* update, /*!< in: update vector */
mtr_t* mtr) /*!< in/out: mini-transaction */
{
@@ -4409,7 +6548,7 @@ btr_cur_disown_inherited_fields(
for (i = 0; i < rec_offs_n_fields(offsets); i++) {
if (rec_offs_nth_extern(offsets, i)
- && !upd_get_field_by_field_no(update, i)) {
+ && !upd_get_field_by_field_no(update, i, false)) {
btr_cur_set_ownership_of_extern_field(
page_zip, rec, index, offsets, i, FALSE, mtr);
}
@@ -4428,7 +6567,7 @@ btr_cur_unmark_extern_fields(
part will be updated, or NULL */
rec_t* rec, /*!< in/out: record in a clustered index */
dict_index_t* index, /*!< in: index of the page */
- const ulint* offsets,/*!< in: array returned by rec_get_offsets() */
+ const offset_t* offsets,/*!< in: array returned by rec_get_offsets() */
mtr_t* mtr) /*!< in: mtr, or NULL if not logged */
{
ulint n;
@@ -4452,86 +6591,8 @@ btr_cur_unmark_extern_fields(
}
/*******************************************************************//**
-Flags the data tuple fields that are marked as extern storage in the
-update vector. We use this function to remember which fields we must
-mark as extern storage in a record inserted for an update.
-@return number of flagged external columns */
-UNIV_INTERN
-ulint
-btr_push_update_extern_fields(
-/*==========================*/
- dtuple_t* tuple, /*!< in/out: data tuple */
- const upd_t* update, /*!< in: update vector */
- mem_heap_t* heap) /*!< in: memory heap */
-{
- ulint n_pushed = 0;
- ulint n;
- const upd_field_t* uf;
-
- uf = update->fields;
- n = upd_get_n_fields(update);
-
- for (; n--; uf++) {
- if (dfield_is_ext(&uf->new_val)) {
- dfield_t* field
- = dtuple_get_nth_field(tuple, uf->field_no);
-
- if (!dfield_is_ext(field)) {
- dfield_set_ext(field);
- n_pushed++;
- }
-
- switch (uf->orig_len) {
- byte* data;
- ulint len;
- byte* buf;
- case 0:
- break;
- case BTR_EXTERN_FIELD_REF_SIZE:
- /* Restore the original locally stored
- part of the column. In the undo log,
- InnoDB writes a longer prefix of externally
- stored columns, so that column prefixes
- in secondary indexes can be reconstructed. */
- dfield_set_data(field, (byte*) dfield_get_data(field)
- + dfield_get_len(field)
- - BTR_EXTERN_FIELD_REF_SIZE,
- BTR_EXTERN_FIELD_REF_SIZE);
- dfield_set_ext(field);
- break;
- default:
- /* Reconstruct the original locally
- stored part of the column. The data
- will have to be copied. */
- ut_a(uf->orig_len > BTR_EXTERN_FIELD_REF_SIZE);
-
- data = (byte*) dfield_get_data(field);
- len = dfield_get_len(field);
-
- buf = (byte*) mem_heap_alloc(heap,
- uf->orig_len);
- /* Copy the locally stored prefix. */
- memcpy(buf, data,
- uf->orig_len
- - BTR_EXTERN_FIELD_REF_SIZE);
- /* Copy the BLOB pointer. */
- memcpy(buf + uf->orig_len
- - BTR_EXTERN_FIELD_REF_SIZE,
- data + len - BTR_EXTERN_FIELD_REF_SIZE,
- BTR_EXTERN_FIELD_REF_SIZE);
-
- dfield_set_data(field, buf, uf->orig_len);
- dfield_set_ext(field);
- }
- }
- }
-
- return(n_pushed);
-}
-
-/*******************************************************************//**
Returns the length of a BLOB part stored on the header page.
-@return part length */
+@return part length */
static
ulint
btr_blob_get_part_len(
@@ -4543,7 +6604,7 @@ btr_blob_get_part_len(
/*******************************************************************//**
Returns the page number where the next BLOB part is stored.
-@return page number or FIL_NULL if no more pages */
+@return page number or FIL_NULL if no more pages */
static
ulint
btr_blob_get_next_page_no(
@@ -4559,16 +6620,17 @@ static
void
btr_blob_free(
/*==========*/
+ dict_index_t* index, /*!< in: index */
buf_block_t* block, /*!< in: buffer block */
ibool all, /*!< in: TRUE=remove also the compressed page
if there is one */
mtr_t* mtr) /*!< in: mini-transaction to commit */
{
buf_pool_t* buf_pool = buf_pool_from_block(block);
- ulint space = buf_block_get_space(block);
- ulint page_no = buf_block_get_page_no(block);
+ ulint space = block->page.id.space();
+ ulint page_no = block->page.id.page_no();
- ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
+ ut_ad(mtr_is_block_fix(mtr, block, MTR_MEMO_PAGE_X_FIX, index->table));
mtr_commit(mtr);
@@ -4579,8 +6641,8 @@ btr_blob_free(
if (buf_block_get_state(block)
== BUF_BLOCK_FILE_PAGE
- && buf_block_get_space(block) == space
- && buf_block_get_page_no(block) == page_no) {
+ && block->page.id.space() == space
+ && block->page.id.page_no() == page_no) {
if (!buf_LRU_free_page(&block->page, all)
&& all && block->page.zip.data) {
@@ -4594,28 +6656,147 @@ btr_blob_free(
buf_pool_mutex_exit(buf_pool);
}
+/** Helper class used while writing blob pages, during insert or update. */
+struct btr_blob_log_check_t {
+ /** Persistent cursor on a clusterex index record with blobs. */
+ btr_pcur_t* m_pcur;
+ /** Mini transaction holding the latches for m_pcur */
+ mtr_t* m_mtr;
+ /** rec_get_offsets(rec, index); offset of clust_rec */
+ const offset_t* m_offsets;
+ /** The block containing clustered record */
+ buf_block_t** m_block;
+ /** The clustered record pointer */
+ rec_t** m_rec;
+ /** The blob operation code */
+ enum blob_op m_op;
+
+ /** Constructor
+ @param[in] pcur persistent cursor on a clustered
+ index record with blobs.
+ @param[in] mtr mini-transaction holding latches for
+ pcur.
+ @param[in] offsets offsets of the clust_rec
+ @param[in,out] block record block containing pcur record
+ @param[in,out] rec the clustered record pointer
+ @param[in] op the blob operation code */
+ btr_blob_log_check_t(
+ btr_pcur_t* pcur,
+ mtr_t* mtr,
+ const offset_t* offsets,
+ buf_block_t** block,
+ rec_t** rec,
+ enum blob_op op)
+ : m_pcur(pcur),
+ m_mtr(mtr),
+ m_offsets(offsets),
+ m_block(block),
+ m_rec(rec),
+ m_op(op)
+ {
+ ut_ad(rec_offs_validate(*m_rec, m_pcur->index(), m_offsets));
+ ut_ad((*m_block)->frame == page_align(*m_rec));
+ ut_ad(*m_rec == btr_pcur_get_rec(m_pcur));
+ }
+
+ /** Check if there is enough space in log file. Commit and re-start the
+ mini transaction. */
+ void check()
+ {
+ dict_index_t* index = m_pcur->index();
+ ulint offs = 0;
+ ulint page_no = ULINT_UNDEFINED;
+ FlushObserver* observer = m_mtr->get_flush_observer();
+
+ if (UNIV_UNLIKELY(m_op == BTR_STORE_INSERT_BULK)) {
+ offs = page_offset(*m_rec);
+ page_no = page_get_page_no(
+ buf_block_get_frame(*m_block));
+
+ buf_block_buf_fix_inc(*m_block, __FILE__, __LINE__);
+ } else {
+ btr_pcur_store_position(m_pcur, m_mtr);
+ }
+ m_mtr->commit();
+
+ DEBUG_SYNC_C("blob_write_middle");
+
+ log_free_check();
+
+ DEBUG_SYNC_C("blob_write_middle_after_check");
+
+ const mtr_log_t log_mode = m_mtr->get_log_mode();
+ m_mtr->start();
+ m_mtr->set_log_mode(log_mode);
+ m_mtr->set_named_space(index->space);
+ m_mtr->set_flush_observer(observer);
+
+ if (UNIV_UNLIKELY(m_op == BTR_STORE_INSERT_BULK)) {
+ page_id_t page_id(dict_index_get_space(index),
+ page_no);
+ page_size_t page_size(dict_table_page_size(
+ index->table));
+ page_cur_t* page_cur = &m_pcur->btr_cur.page_cur;
+
+ page_cur->block = btr_block_get(
+ page_id, page_size, RW_X_LATCH, index, m_mtr);
+ page_cur->rec = buf_block_get_frame(page_cur->block)
+ + offs;
+
+ buf_block_buf_fix_dec(page_cur->block);
+ } else {
+ ut_ad(m_pcur->rel_pos == BTR_PCUR_ON);
+ bool ret = btr_pcur_restore_position(
+ BTR_MODIFY_LEAF | BTR_MODIFY_EXTERNAL,
+ m_pcur, m_mtr);
+
+ ut_a(ret);
+ }
+
+ *m_block = btr_pcur_get_block(m_pcur);
+ *m_rec = btr_pcur_get_rec(m_pcur);
+
+ ut_d(rec_offs_make_valid(
+ *m_rec, index, const_cast<offset_t*>(m_offsets)));
+
+ ut_ad(m_mtr->memo_contains_page_flagged(
+ *m_rec,
+ MTR_MEMO_PAGE_X_FIX | MTR_MEMO_PAGE_SX_FIX));
+
+ ut_ad((m_op == BTR_STORE_INSERT_BULK)
+ == !mtr_memo_contains_flagged(m_mtr, &index->lock,
+ MTR_MEMO_SX_LOCK
+ | MTR_MEMO_X_LOCK));
+ }
+};
+
/*******************************************************************//**
Stores the fields in big_rec_vec to the tablespace and puts pointers to
them in rec. The extern flags in rec will have to be set beforehand.
The fields are stored on pages allocated from leaf node
file segment of the index tree.
-@return DB_SUCCESS or DB_OUT_OF_FILE_SPACE or DB_TOO_BIG_FOR_REDO */
-UNIV_INTERN
+
+TODO: If the allocation extends the tablespace, it will not be redo logged, in
+any mini-transaction. Tablespace extension should be redo-logged, so that
+recovery will not fail when the big_rec was written to the extended portion of
+the file, in case the file was somehow truncated in the crash.
+
+@return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
dberr_t
btr_store_big_rec_extern_fields(
/*============================*/
- dict_index_t* index, /*!< in: index of rec; the index tree
- MUST be X-latched */
- buf_block_t* rec_block, /*!< in/out: block containing rec */
- rec_t* rec, /*!< in/out: record */
- const ulint* offsets, /*!< in: rec_get_offsets(rec, index);
- the "external storage" flags in offsets
- will not correspond to rec when
- this function returns */
+ btr_pcur_t* pcur, /*!< in/out: a persistent cursor. if
+ btr_mtr is restarted, then this can
+ be repositioned. */
+ offset_t* offsets, /*!< in/out: rec_get_offsets() on
+ pcur. the "external storage" flags
+ in offsets will correctly correspond
+ to rec when this function returns */
const big_rec_t*big_rec_vec, /*!< in: vector containing fields
to be stored externally */
- mtr_t* btr_mtr, /*!< in: mtr containing the
- latches to the clustered index */
+ mtr_t* btr_mtr, /*!< in/out: mtr containing the
+ latches to the clustered index. can be
+ committed and restarted. */
enum blob_op op) /*! in: operation code */
{
ulint rec_page_no;
@@ -4624,42 +6805,40 @@ btr_store_big_rec_extern_fields(
ulint store_len;
ulint page_no;
ulint space_id;
- ulint zip_size;
ulint prev_page_no;
ulint hint_page_no;
ulint i;
mtr_t mtr;
- mtr_t* alloc_mtr;
+ mtr_t mtr_bulk;
mem_heap_t* heap = NULL;
page_zip_des_t* page_zip;
z_stream c_stream;
- buf_block_t** freed_pages = NULL;
- ulint n_freed_pages = 0;
dberr_t error = DB_SUCCESS;
+ dict_index_t* index = pcur->index();
+ buf_block_t* rec_block = btr_pcur_get_block(pcur);
+ rec_t* rec = btr_pcur_get_rec(pcur);
ut_ad(rec_offs_validate(rec, index, offsets));
ut_ad(rec_offs_any_extern(offsets));
- ut_ad(mtr_memo_contains(btr_mtr, dict_index_get_lock(index),
- MTR_MEMO_X_LOCK));
- ut_ad(mtr_memo_contains(btr_mtr, rec_block, MTR_MEMO_PAGE_X_FIX));
+ ut_ad(op == BTR_STORE_INSERT_BULK
+ || mtr_memo_contains_flagged(btr_mtr, &index->lock,
+ MTR_MEMO_X_LOCK
+ | MTR_MEMO_SX_LOCK));
+ ut_ad(mtr_is_block_fix(
+ btr_mtr, rec_block, MTR_MEMO_PAGE_X_FIX, index->table));
ut_ad(buf_block_get_frame(rec_block) == page_align(rec));
ut_a(dict_index_is_clust(index));
- page_zip = buf_block_get_page_zip(rec_block);
- ut_a(dict_table_zip_size(index->table)
- == buf_block_get_zip_size(rec_block));
-
- space_id = buf_block_get_space(rec_block);
- zip_size = buf_block_get_zip_size(rec_block);
- rec_page_no = buf_block_get_page_no(rec_block);
- ut_a(fil_page_get_type(page_align(rec)) == FIL_PAGE_INDEX);
+ ut_a(dict_table_page_size(index->table)
+ .equals_to(rec_block->page.size));
- error = btr_check_blob_limit(big_rec_vec);
-
- if (error != DB_SUCCESS) {
- ut_ad(op == BTR_STORE_INSERT);
- return(error);
- }
+ btr_blob_log_check_t redo_log(pcur, btr_mtr, offsets, &rec_block,
+ &rec, op);
+ page_zip = buf_block_get_page_zip(rec_block);
+ space_id = rec_block->page.id.space();
+ rec_page_no = rec_block->page.id.page_no();
+ ut_a(fil_page_index_page_check(page_align(rec))
+ || op == BTR_STORE_INSERT_BULK);
if (page_zip) {
int err;
@@ -4677,52 +6856,13 @@ btr_store_big_rec_extern_fields(
ut_a(err == Z_OK);
}
- if (btr_blob_op_is_update(op)) {
- /* Avoid reusing pages that have been previously freed
- in btr_mtr. */
- if (btr_mtr->n_freed_pages) {
- if (heap == NULL) {
- heap = mem_heap_create(
- btr_mtr->n_freed_pages
- * sizeof *freed_pages);
- }
-
- freed_pages = static_cast<buf_block_t**>(
- mem_heap_alloc(
- heap,
- btr_mtr->n_freed_pages
- * sizeof *freed_pages));
- n_freed_pages = 0;
- }
-
- /* Because btr_mtr will be committed after mtr, it is
- possible that the tablespace has been extended when
- the B-tree record was updated or inserted, or it will
- be extended while allocating pages for big_rec.
-
- TODO: In mtr (not btr_mtr), write a redo log record
- about extending the tablespace to its current size,
- and remember the current size. Whenever the tablespace
- grows as pages are allocated, write further redo log
- records to mtr. (Currently tablespace extension is not
- covered by the redo log. If it were, the record would
- only be written to btr_mtr, which is committed after
- mtr.) */
- alloc_mtr = btr_mtr;
- } else {
- /* Use the local mtr for allocations. */
- alloc_mtr = &mtr;
- }
-
#if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG
/* All pointers to externally stored columns in the record
must either be zero or they must be pointers to inherited
columns, owned by this record or an earlier record version. */
- for (i = 0; i < rec_offs_n_fields(offsets); i++) {
- if (!rec_offs_nth_extern(offsets, i)) {
- continue;
- }
- field_ref = btr_rec_get_field_ref(rec, offsets, i);
+ for (i = 0; i < big_rec_vec->n_fields; i++) {
+ field_ref = btr_rec_get_field_ref(
+ rec, offsets, big_rec_vec->fields[i].field_no);
ut_a(!(field_ref[BTR_EXTERN_LEN] & BTR_EXTERN_OWNER_FLAG));
/* Either this must be an update in place,
@@ -4734,12 +6874,24 @@ btr_store_big_rec_extern_fields(
BTR_EXTERN_FIELD_REF_SIZE));
}
#endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */
+
+ const page_size_t page_size(dict_table_page_size(index->table));
+
+ /* Space available in compressed page to carry blob data */
+ const ulint payload_size_zip = page_size.physical()
+ - FIL_PAGE_DATA;
+
+ /* Space available in uncompressed page to carry blob data */
+ const ulint payload_size = page_size.physical()
+ - FIL_PAGE_DATA - BTR_BLOB_HDR_SIZE - FIL_PAGE_DATA_END;
+
/* We have to create a file segment to the tablespace
for each field and put the pointer to the field in rec */
for (i = 0; i < big_rec_vec->n_fields; i++) {
- field_ref = btr_rec_get_field_ref(
- rec, offsets, big_rec_vec->fields[i].field_no);
+ const ulint field_no = big_rec_vec->fields[i].field_no;
+
+ field_ref = btr_rec_get_field_ref(rec, offsets, field_no);
#if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG
/* A zero BLOB pointer should have been initially inserted. */
ut_a(!memcmp(field_ref, field_ref_zero,
@@ -4762,11 +6914,32 @@ btr_store_big_rec_extern_fields(
c_stream.avail_in = static_cast<uInt>(extern_len);
}
- for (;;) {
+ for (ulint blob_npages = 0;; ++blob_npages) {
buf_block_t* block;
page_t* page;
+ const ulint commit_freq = 4;
+ ulint r_extents;
+
+ ut_ad(page_align(field_ref) == page_align(rec));
+
+ if (!(blob_npages % commit_freq)) {
+
+ redo_log.check();
+
+ field_ref = btr_rec_get_field_ref(
+ rec, offsets, field_no);
+
+ page_zip = buf_block_get_page_zip(rec_block);
+ rec_page_no = rec_block->page.id.page_no();
+ }
mtr_start(&mtr);
+ mtr.set_named_space(index->space);
+ mtr.set_log_mode(btr_mtr->get_log_mode());
+ mtr.set_flush_observer(btr_mtr->get_flush_observer());
+
+ buf_page_get(rec_block->page.id,
+ rec_block->page.size, RW_X_LATCH, &mtr);
if (prev_page_no == FIL_NULL) {
hint_page_no = 1 + rec_page_no;
@@ -4774,36 +6947,48 @@ btr_store_big_rec_extern_fields(
hint_page_no = prev_page_no + 1;
}
-alloc_another:
- block = btr_page_alloc(index, hint_page_no,
- FSP_NO_DIR, 0, alloc_mtr, &mtr);
- if (UNIV_UNLIKELY(block == NULL)) {
- mtr_commit(&mtr);
+ mtr_t *alloc_mtr;
+
+ if (UNIV_UNLIKELY(op == BTR_STORE_INSERT_BULK)) {
+ mtr_start(&mtr_bulk);
+ mtr_bulk.set_spaces(mtr);
+ alloc_mtr = &mtr_bulk;
+ } else {
+ alloc_mtr = &mtr;
+ }
+
+ if (!fsp_reserve_free_extents(&r_extents, space_id, 1,
+ FSP_BLOB, alloc_mtr,
+ 1)) {
+
+ mtr_commit(alloc_mtr);
error = DB_OUT_OF_FILE_SPACE;
goto func_exit;
}
- if (rw_lock_get_x_lock_count(&block->lock) > 1) {
- /* This page must have been freed in
- btr_mtr previously. Put it aside, and
- allocate another page for the BLOB data. */
- ut_ad(alloc_mtr == btr_mtr);
- ut_ad(btr_blob_op_is_update(op));
- ut_ad(n_freed_pages < btr_mtr->n_freed_pages);
- freed_pages[n_freed_pages++] = block;
- goto alloc_another;
+ block = btr_page_alloc(index, hint_page_no, FSP_NO_DIR,
+ 0, alloc_mtr, &mtr);
+
+ alloc_mtr->release_free_extents(r_extents);
+
+ if (UNIV_UNLIKELY(op == BTR_STORE_INSERT_BULK)) {
+ mtr_commit(&mtr_bulk);
}
- page_no = buf_block_get_page_no(block);
+ ut_a(block != NULL);
+
+ page_no = block->page.id.page_no();
page = buf_block_get_frame(block);
if (prev_page_no != FIL_NULL) {
buf_block_t* prev_block;
page_t* prev_page;
- prev_block = buf_page_get(space_id, zip_size,
- prev_page_no,
- RW_X_LATCH, &mtr);
+ prev_block = buf_page_get(
+ page_id_t(space_id, prev_page_no),
+ rec_block->page.size,
+ RW_X_LATCH, &mtr);
+
buf_block_dbg_add_level(prev_block,
SYNC_EXTERN_STORAGE);
prev_page = buf_block_get_frame(prev_block);
@@ -4849,9 +7034,8 @@ alloc_another:
c_stream.next_out = page
+ FIL_PAGE_DATA;
- c_stream.avail_out
- = static_cast<uInt>(page_zip_get_size(page_zip))
- - FIL_PAGE_DATA;
+ c_stream.avail_out = static_cast<uInt>(
+ payload_size_zip);
err = deflate(&c_stream, Z_FINISH);
ut_a(err == Z_OK || err == Z_STREAM_END);
@@ -4864,28 +7048,6 @@ alloc_another:
/* Initialize the unused "prev page" pointer */
mlog_write_ulint(page + FIL_PAGE_PREV,
FIL_NULL, MLOG_4BYTES, &mtr);
- /* Write a back pointer to the record
- into the otherwise unused area. This
- information could be useful in
- debugging. Later, we might want to
- implement the possibility to relocate
- BLOB pages. Then, we would need to be
- able to adjust the BLOB pointer in the
- record. We do not store the heap
- number of the record, because it can
- change in page_zip_reorganize() or
- btr_page_reorganize(). However, also
- the page number of the record may
- change when B-tree nodes are split or
- merged. */
- mlog_write_ulint(page
- + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION,
- space_id,
- MLOG_4BYTES, &mtr);
- mlog_write_ulint(page
- + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION + 4,
- rec_page_no,
- MLOG_4BYTES, &mtr);
/* Zero out the unused part of the page. */
memset(page + page_zip_get_size(page_zip)
@@ -4911,16 +7073,6 @@ alloc_another:
goto next_zip_page;
}
- if (alloc_mtr == &mtr) {
- rec_block = buf_page_get(
- space_id, zip_size,
- rec_page_no,
- RW_X_LATCH, &mtr);
- buf_block_dbg_add_level(
- rec_block,
- SYNC_NO_ORDER_CHECK);
- }
-
if (err == Z_STREAM_END) {
mach_write_to_4(field_ref
+ BTR_EXTERN_LEN, 0);
@@ -4933,11 +7085,7 @@ alloc_another:
}
if (prev_page_no == FIL_NULL) {
- btr_blob_dbg_add_blob(
- rec, big_rec_vec->fields[i]
- .field_no, page_no, index,
- "store");
-
+ ut_ad(blob_npages == 0);
mach_write_to_4(field_ref
+ BTR_EXTERN_SPACE_ID,
space_id);
@@ -4951,17 +7099,19 @@ alloc_another:
FIL_PAGE_NEXT);
}
- page_zip_write_blob_ptr(
- page_zip, rec, index, offsets,
- big_rec_vec->fields[i].field_no,
- alloc_mtr);
+ /* We compress a page when finish bulk insert.*/
+ if (UNIV_LIKELY(op != BTR_STORE_INSERT_BULK)) {
+ page_zip_write_blob_ptr(
+ page_zip, rec, index, offsets,
+ field_no, &mtr);
+ }
next_zip_page:
prev_page_no = page_no;
/* Commit mtr and release the
uncompressed page frame to save memory. */
- btr_blob_free(block, FALSE, &mtr);
+ btr_blob_free(index, block, FALSE, &mtr);
if (err == Z_STREAM_END) {
break;
@@ -4971,14 +7121,8 @@ next_zip_page:
FIL_PAGE_TYPE_BLOB,
MLOG_2BYTES, &mtr);
- if (extern_len > (UNIV_PAGE_SIZE
- - FIL_PAGE_DATA
- - BTR_BLOB_HDR_SIZE
- - FIL_PAGE_DATA_END)) {
- store_len = UNIV_PAGE_SIZE
- - FIL_PAGE_DATA
- - BTR_BLOB_HDR_SIZE
- - FIL_PAGE_DATA_END;
+ if (extern_len > payload_size) {
+ store_len = payload_size;
} else {
store_len = extern_len;
}
@@ -4999,45 +7143,31 @@ next_zip_page:
extern_len -= store_len;
- if (alloc_mtr == &mtr) {
- rec_block = buf_page_get(
- space_id, zip_size,
- rec_page_no,
- RW_X_LATCH, &mtr);
- buf_block_dbg_add_level(
- rec_block,
- SYNC_NO_ORDER_CHECK);
- }
-
- mlog_write_ulint(field_ref + BTR_EXTERN_LEN, 0,
- MLOG_4BYTES, alloc_mtr);
+ ut_ad(!mach_read_from_4(BTR_EXTERN_LEN
+ + field_ref));
mlog_write_ulint(field_ref
+ BTR_EXTERN_LEN + 4,
big_rec_vec->fields[i].len
- extern_len,
- MLOG_4BYTES, alloc_mtr);
+ MLOG_4BYTES, &mtr);
if (prev_page_no == FIL_NULL) {
- btr_blob_dbg_add_blob(
- rec, big_rec_vec->fields[i]
- .field_no, page_no, index,
- "store");
-
+ ut_ad(blob_npages == 0);
mlog_write_ulint(field_ref
+ BTR_EXTERN_SPACE_ID,
space_id, MLOG_4BYTES,
- alloc_mtr);
+ &mtr);
mlog_write_ulint(field_ref
+ BTR_EXTERN_PAGE_NO,
page_no, MLOG_4BYTES,
- alloc_mtr);
+ &mtr);
mlog_write_ulint(field_ref
+ BTR_EXTERN_OFFSET,
FIL_PAGE_DATA,
MLOG_4BYTES,
- alloc_mtr);
+ &mtr);
}
prev_page_no = page_no;
@@ -5053,6 +7183,8 @@ next_zip_page:
DBUG_EXECUTE_IF("btr_store_big_rec_extern",
error = DB_OUT_OF_FILE_SPACE;
goto func_exit;);
+
+ rec_offs_make_nth_extern(offsets, field_no);
}
func_exit:
@@ -5060,21 +7192,6 @@ func_exit:
deflateEnd(&c_stream);
}
- if (n_freed_pages) {
- ulint i;
-
- ut_ad(alloc_mtr == btr_mtr);
- ut_ad(btr_blob_op_is_update(op));
-
- for (i = 0; i < n_freed_pages; i++) {
- btr_page_free_low(index, freed_pages[i], 0, true, alloc_mtr);
- }
-
- DBUG_EXECUTE_IF("btr_store_big_rec_extern",
- error = DB_OUT_OF_FILE_SPACE;
- goto func_exit;);
- }
-
if (heap != NULL) {
mem_heap_free(heap);
}
@@ -5130,13 +7247,10 @@ btr_check_blob_fil_page_type(
}
#endif /* !UNIV_DEBUG */
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: FIL_PAGE_TYPE=%lu"
- " on BLOB %s space %lu page %lu flags %lx\n",
- (ulong) type, read ? "read" : "purge",
- (ulong) space_id, (ulong) page_no, (ulong) flags);
- ut_error;
+ ib::fatal() << "FIL_PAGE_TYPE=" << type
+ << " on BLOB " << (read ? "read" : "purge")
+ << " space " << space_id << " page " << page_no
+ << " flags " << flags;
}
}
@@ -5145,7 +7259,6 @@ Frees the space in an externally stored field to the file space
management if the field in data is owned by the externally stored field,
in a rollback we may have the additional condition that the field must
not be inherited. */
-UNIV_INTERN
void
btr_free_externally_stored_field(
/*=============================*/
@@ -5160,14 +7273,14 @@ btr_free_externally_stored_field(
byte* field_ref, /*!< in/out: field reference */
const rec_t* rec, /*!< in: record containing field_ref, for
page_zip_write_blob_ptr(), or NULL */
- const ulint* offsets, /*!< in: rec_get_offsets(rec, index),
+ const offset_t* offsets, /*!< in: rec_get_offsets(rec, index),
or NULL */
page_zip_des_t* page_zip, /*!< in: compressed page corresponding
to rec, or NULL if rec == NULL */
ulint i, /*!< in: field number of field_ref;
ignored if rec == NULL */
- enum trx_rb_ctx rb_ctx, /*!< in: rollback context */
- mtr_t* local_mtr MY_ATTRIBUTE((unused))) /*!< in: mtr
+ bool rollback, /*!< in: performing rollback? */
+ mtr_t* local_mtr) /*!< in: mtr
containing the latch to data an an
X-latch to the index tree */
{
@@ -5176,110 +7289,66 @@ btr_free_externally_stored_field(
field_ref + BTR_EXTERN_SPACE_ID);
const ulint start_page = mach_read_from_4(
field_ref + BTR_EXTERN_PAGE_NO);
- ulint rec_zip_size = dict_table_zip_size(index->table);
- ulint ext_zip_size;
ulint page_no;
ulint next_page_no;
mtr_t mtr;
ut_ad(dict_index_is_clust(index));
- ut_ad(mtr_memo_contains(local_mtr, dict_index_get_lock(index),
- MTR_MEMO_X_LOCK));
- ut_ad(mtr_memo_contains_page(local_mtr, field_ref,
- MTR_MEMO_PAGE_X_FIX));
+ ut_ad(mtr_memo_contains_flagged(local_mtr, dict_index_get_lock(index),
+ MTR_MEMO_X_LOCK | MTR_MEMO_SX_LOCK));
+ ut_ad(mtr_is_page_fix(
+ local_mtr, field_ref, MTR_MEMO_PAGE_X_FIX, index->table));
ut_ad(!rec || rec_offs_validate(rec, index, offsets));
ut_ad(!rec || field_ref == btr_rec_get_field_ref(rec, offsets, i));
+ ut_ad(local_mtr->is_named_space(
+ page_get_space_id(page_align(field_ref))));
if (UNIV_UNLIKELY(!memcmp(field_ref, field_ref_zero,
BTR_EXTERN_FIELD_REF_SIZE))) {
/* In the rollback, we may encounter a clustered index
record with some unwritten off-page columns. There is
nothing to free then. */
- if (rb_ctx == RB_NONE) {
- char buf[3 * 512];
- char *bufend;
- ulint ispace = dict_index_get_space(index);
- bufend = innobase_convert_name(buf, sizeof buf,
- index->name, strlen(index->name),
- NULL,
- FALSE);
- buf[bufend - buf]='\0';
- ib_logf(IB_LOG_LEVEL_ERROR, "Unwritten off-page columns in "
- "rollback context %d. Table %s index %s space_id %lu "
- "index space %lu.",
- rb_ctx, index->table->name, buf, space_id, ispace);
- }
-
- ut_a(rb_ctx != RB_NONE);
+ ut_a(rollback);
return;
}
+ ut_ad(!(mach_read_from_4(field_ref + BTR_EXTERN_LEN)
+ & ~((BTR_EXTERN_OWNER_FLAG
+ | BTR_EXTERN_INHERITED_FLAG) << 24)));
ut_ad(space_id == index->space);
- if (UNIV_UNLIKELY(space_id != dict_index_get_space(index))) {
- ext_zip_size = fil_space_get_zip_size(space_id);
- /* This must be an undo log record in the system tablespace,
- that is, in row_purge_upd_exist_or_extern().
- Currently, externally stored records are stored in the
- same tablespace as the referring records. */
- ut_ad(!page_get_space_id(page_align(field_ref)));
- ut_ad(!rec);
- ut_ad(!page_zip);
- } else {
- ext_zip_size = rec_zip_size;
- }
-
- if (!rec) {
+ const page_size_t ext_page_size(dict_table_page_size(index->table));
+ const page_size_t& rec_page_size(rec == NULL
+ ? univ_page_size
+ : ext_page_size);
+ if (rec == NULL) {
/* This is a call from row_purge_upd_exist_or_extern(). */
ut_ad(!page_zip);
- rec_zip_size = 0;
- }
-
-#ifdef UNIV_BLOB_DEBUG
- if (!(field_ref[BTR_EXTERN_LEN] & BTR_EXTERN_OWNER_FLAG)
- && !((field_ref[BTR_EXTERN_LEN] & BTR_EXTERN_INHERITED_FLAG)
- && (rb_ctx == RB_NORMAL || rb_ctx == RB_RECOVERY))) {
- /* This off-page column will be freed.
- Check that no references remain. */
-
- btr_blob_dbg_t b;
-
- b.blob_page_no = start_page;
-
- if (rec) {
- /* Remove the reference from the record to the
- BLOB. If the BLOB were not freed, the
- reference would be removed when the record is
- removed. Freeing the BLOB will overwrite the
- BTR_EXTERN_PAGE_NO in the field_ref of the
- record with FIL_NULL, which would make the
- btr_blob_dbg information inconsistent with the
- record. */
- b.ref_page_no = page_get_page_no(page_align(rec));
- b.ref_heap_no = page_rec_get_heap_no(rec);
- b.ref_field_no = i;
- btr_blob_dbg_rbt_delete(index, &b, "free");
- }
-
- btr_blob_dbg_assert_empty(index, b.blob_page_no);
}
-#endif /* UNIV_BLOB_DEBUG */
for (;;) {
-#ifdef UNIV_SYNC_DEBUG
+#ifdef UNIV_DEBUG
buf_block_t* rec_block;
-#endif /* UNIV_SYNC_DEBUG */
+#endif /* UNIV_DEBUG */
buf_block_t* ext_block;
mtr_start(&mtr);
+ mtr.set_spaces(*local_mtr);
+ mtr.set_log_mode(local_mtr->get_log_mode());
+
+ ut_ad(!dict_table_is_temporary(index->table)
+ || local_mtr->get_log_mode() == MTR_LOG_NO_REDO);
+
+ const page_t* p = page_align(field_ref);
-#ifdef UNIV_SYNC_DEBUG
+ const page_id_t page_id(page_get_space_id(p),
+ page_get_page_no(p));
+
+#ifdef UNIV_DEBUG
rec_block =
-#endif /* UNIV_SYNC_DEBUG */
- buf_page_get(page_get_space_id(page_align(field_ref)),
- rec_zip_size,
- page_get_page_no(page_align(field_ref)),
- RW_X_LATCH, &mtr);
+#endif /* UNIV_DEBUG */
+ buf_page_get(page_id, rec_page_size, RW_X_LATCH, &mtr);
+
buf_block_dbg_add_level(rec_block, SYNC_NO_ORDER_CHECK);
page_no = mach_read_from_4(field_ref + BTR_EXTERN_PAGE_NO);
@@ -5289,7 +7358,7 @@ btr_free_externally_stored_field(
|| (mach_read_from_1(field_ref + BTR_EXTERN_LEN)
& BTR_EXTERN_OWNER_FLAG)
/* Rollback and inherited field */
- || ((rb_ctx == RB_NORMAL || rb_ctx == RB_RECOVERY)
+ || (rollback
&& (mach_read_from_1(field_ref + BTR_EXTERN_LEN)
& BTR_EXTERN_INHERITED_FLAG))) {
@@ -5303,12 +7372,14 @@ btr_free_externally_stored_field(
row_log_table_blob_free(index, start_page);
}
- ext_block = buf_page_get(space_id, ext_zip_size, page_no,
- RW_X_LATCH, &mtr);
+ ext_block = buf_page_get(
+ page_id_t(space_id, page_no), ext_page_size,
+ RW_X_LATCH, &mtr);
+
buf_block_dbg_add_level(ext_block, SYNC_EXTERN_STORAGE);
page = buf_block_get_frame(ext_block);
- if (ext_zip_size) {
+ if (ext_page_size.is_compressed()) {
/* Note that page_zip will be NULL
in row_purge_upd_exist_or_extern(). */
switch (fil_page_get_type(page)) {
@@ -5320,7 +7391,7 @@ btr_free_externally_stored_field(
}
next_page_no = mach_read_from_4(page + FIL_PAGE_NEXT);
- btr_page_free_low(index, ext_block, 0, true, &mtr);
+ btr_page_free(index, ext_block, &mtr, true);
if (page_zip != NULL) {
mach_write_to_4(field_ref + BTR_EXTERN_PAGE_NO,
@@ -5346,12 +7417,7 @@ btr_free_externally_stored_field(
next_page_no = mach_read_from_4(
page + FIL_PAGE_DATA
+ BTR_BLOB_HDR_NEXT_PAGE_NO);
-
- /* We must supply the page level (= 0) as an argument
- because we did not store it on the page (we save the
- space overhead from an index page header. */
-
- btr_page_free_low(index, ext_block, 0, true, &mtr);
+ btr_page_free(index, ext_block, &mtr, true);
mlog_write_ulint(field_ref + BTR_EXTERN_PAGE_NO,
next_page_no,
@@ -5367,7 +7433,7 @@ btr_free_externally_stored_field(
}
/* Commit mtr and release the BLOB block to save memory. */
- btr_blob_free(ext_block, TRUE, &mtr);
+ btr_blob_free(index, ext_block, TRUE, &mtr);
}
}
@@ -5380,10 +7446,10 @@ btr_rec_free_externally_stored_fields(
dict_index_t* index, /*!< in: index of the data, the index
tree MUST be X-latched */
rec_t* rec, /*!< in/out: record */
- const ulint* offsets,/*!< in: rec_get_offsets(rec, index) */
+ const offset_t* offsets,/*!< in: rec_get_offsets(rec, index) */
page_zip_des_t* page_zip,/*!< in: compressed page whose uncompressed
part will be updated, or NULL */
- enum trx_rb_ctx rb_ctx, /*!< in: rollback context */
+ bool rollback,/*!< in: performing rollback? */
mtr_t* mtr) /*!< in: mini-transaction handle which contains
an X-latch to record page and to the index
tree */
@@ -5392,7 +7458,9 @@ btr_rec_free_externally_stored_fields(
ulint i;
ut_ad(rec_offs_validate(rec, index, offsets));
- ut_ad(mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_X_FIX));
+ ut_ad(mtr_is_page_fix(mtr, rec, MTR_MEMO_PAGE_X_FIX, index->table));
+ ut_ad(dict_index_is_clust(index));
+ ut_ad(page_rec_is_leaf(rec));
/* Free possible externally stored fields in the record */
ut_ad(dict_table_is_comp(index->table) == !!rec_offs_comp(offsets));
@@ -5402,7 +7470,7 @@ btr_rec_free_externally_stored_fields(
if (rec_offs_nth_extern(offsets, i)) {
btr_free_externally_stored_field(
index, btr_rec_get_field_ref(rec, offsets, i),
- rec, offsets, page_zip, i, rb_ctx, mtr);
+ rec, offsets, page_zip, i, rollback, mtr);
}
}
}
@@ -5419,9 +7487,9 @@ btr_rec_free_updated_extern_fields(
rec_t* rec, /*!< in/out: record */
page_zip_des_t* page_zip,/*!< in: compressed page whose uncompressed
part will be updated, or NULL */
- const ulint* offsets,/*!< in: rec_get_offsets(rec, index) */
+ const offset_t* offsets,/*!< in: rec_get_offsets(rec, index) */
const upd_t* update, /*!< in: update vector */
- enum trx_rb_ctx rb_ctx, /*!< in: rollback context */
+ bool rollback,/*!< in: performing rollback? */
mtr_t* mtr) /*!< in: mini-transaction handle which contains
an X-latch to record page and to the tree */
{
@@ -5429,7 +7497,7 @@ btr_rec_free_updated_extern_fields(
ulint i;
ut_ad(rec_offs_validate(rec, index, offsets));
- ut_ad(mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_X_FIX));
+ ut_ad(mtr_is_page_fix(mtr, rec, MTR_MEMO_PAGE_X_FIX, index->table));
/* Free possible externally stored fields in the record */
@@ -5447,7 +7515,7 @@ btr_rec_free_updated_extern_fields(
btr_free_externally_stored_field(
index, data + len - BTR_EXTERN_FIELD_REF_SIZE,
rec, offsets, page_zip,
- ufield->field_no, rb_ctx, mtr);
+ ufield->field_no, rollback, mtr);
}
}
}
@@ -5455,7 +7523,7 @@ btr_rec_free_updated_extern_fields(
/*******************************************************************//**
Copies the prefix of an uncompressed BLOB. The clustered index record
that points to this BLOB must be protected by a lock or a page latch.
-@return number of bytes written to buf */
+@return number of bytes written to buf */
static
ulint
btr_copy_blob_prefix(
@@ -5479,7 +7547,8 @@ btr_copy_blob_prefix(
mtr_start(&mtr);
- block = buf_page_get(space_id, 0, page_no, RW_S_LATCH, &mtr);
+ block = buf_page_get(page_id_t(space_id, page_no),
+ univ_page_size, RW_S_LATCH, &mtr);
buf_block_dbg_add_level(block, SYNC_EXTERN_STORAGE);
page = buf_block_get_frame(block);
@@ -5511,21 +7580,25 @@ btr_copy_blob_prefix(
}
}
-/*******************************************************************//**
-Copies the prefix of a compressed BLOB. The clustered index record
-that points to this BLOB must be protected by a lock or a page latch.
-@return number of bytes written to buf */
+/** Copies the prefix of a compressed BLOB.
+The clustered index record that points to this BLOB must be protected
+by a lock or a page latch.
+@param[out] buf the externally stored part of the field,
+or a prefix of it
+@param[in] len length of buf, in bytes
+@param[in] page_size compressed BLOB page size
+@param[in] space_id space id of the BLOB pages
+@param[in] offset offset on the first BLOB page
+@return number of bytes written to buf */
static
ulint
btr_copy_zblob_prefix(
-/*==================*/
- byte* buf, /*!< out: the externally stored part of
- the field, or a prefix of it */
- ulint len, /*!< in: length of buf, in bytes */
- ulint zip_size,/*!< in: compressed BLOB page size */
- ulint space_id,/*!< in: space id of the BLOB pages */
- ulint page_no,/*!< in: page number of the first BLOB page */
- ulint offset) /*!< in: offset on the first BLOB page */
+ byte* buf,
+ ulint len,
+ const page_size_t& page_size,
+ ulint space_id,
+ ulint page_no,
+ ulint offset)
{
ulint page_type = FIL_PAGE_TYPE_ZBLOB;
mem_heap_t* heap;
@@ -5542,9 +7615,7 @@ btr_copy_zblob_prefix(
heap = mem_heap_create(40000);
page_zip_set_alloc(&d_stream, heap);
- ut_ad(ut_is_2pow(zip_size));
- ut_ad(zip_size >= UNIV_ZIP_SIZE_MIN);
- ut_ad(zip_size <= UNIV_ZIP_SIZE_MAX);
+ ut_ad(page_size.is_compressed());
ut_ad(space_id);
err = inflateInit(&d_stream);
@@ -5558,27 +7629,23 @@ btr_copy_zblob_prefix(
bpage is protected by the B-tree page latch that
is being held on the clustered index record, or,
in row_merge_copy_blobs(), by an exclusive table lock. */
- bpage = buf_page_get_zip(space_id, zip_size, page_no);
+ bpage = buf_page_get_zip(page_id_t(space_id, page_no),
+ page_size);
if (UNIV_UNLIKELY(!bpage)) {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Cannot load"
- " compressed BLOB"
- " page %lu space %lu\n",
- (ulong) page_no, (ulong) space_id);
+ ib::error() << "Cannot load compressed BLOB "
+ << page_id_t(space_id, page_no);
goto func_exit;
}
if (UNIV_UNLIKELY
(fil_page_get_type(bpage->zip.data) != page_type)) {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Unexpected type %lu of"
- " compressed BLOB"
- " page %lu space %lu\n",
- (ulong) fil_page_get_type(bpage->zip.data),
- (ulong) page_no, (ulong) space_id);
+
+ ib::error() << "Unexpected type "
+ << fil_page_get_type(bpage->zip.data)
+ << " of compressed BLOB page "
+ << page_id_t(space_id, page_no);
+
ut_ad(0);
goto end_of_blob;
}
@@ -5595,7 +7662,8 @@ btr_copy_zblob_prefix(
}
d_stream.next_in = bpage->zip.data + offset;
- d_stream.avail_in = static_cast<uInt>(zip_size - offset);
+ d_stream.avail_in = static_cast<uInt>(page_size.physical()
+ - offset);
err = inflate(&d_stream, Z_NO_FLUSH);
switch (err) {
@@ -5611,26 +7679,21 @@ btr_copy_zblob_prefix(
/* fall through */
default:
inflate_error:
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: inflate() of"
- " compressed BLOB"
- " page %lu space %lu returned %d (%s)\n",
- (ulong) page_no, (ulong) space_id,
- err, d_stream.msg);
+ ib::error() << "inflate() of compressed BLOB page "
+ << page_id_t(space_id, page_no)
+ << " returned " << err
+ << " (" << d_stream.msg << ")";
+
case Z_BUF_ERROR:
goto end_of_blob;
}
if (next_page_no == FIL_NULL) {
if (!d_stream.avail_in) {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: unexpected end of"
- " compressed BLOB"
- " page %lu space %lu\n",
- (ulong) page_no,
- (ulong) space_id);
+ ib::error()
+ << "Unexpected end of compressed "
+ << "BLOB page "
+ << page_id_t(space_id, page_no);
} else {
err = inflate(&d_stream, Z_FINISH);
switch (err) {
@@ -5664,55 +7727,59 @@ func_exit:
return(d_stream.total_out);
}
-/*******************************************************************//**
-Copies the prefix of an externally stored field of a record. The
-clustered index record that points to this BLOB must be protected by a
-lock or a page latch.
-@return number of bytes written to buf */
+/** Copies the prefix of an externally stored field of a record.
+The clustered index record that points to this BLOB must be protected
+by a lock or a page latch.
+@param[out] buf the externally stored part of the
+field, or a prefix of it
+@param[in] len length of buf, in bytes
+@param[in] page_size BLOB page size
+@param[in] space_id space id of the first BLOB page
+@param[in] page_no page number of the first BLOB page
+@param[in] offset offset on the first BLOB page
+@return number of bytes written to buf */
static
ulint
btr_copy_externally_stored_field_prefix_low(
-/*========================================*/
- byte* buf, /*!< out: the externally stored part of
- the field, or a prefix of it */
- ulint len, /*!< in: length of buf, in bytes */
- ulint zip_size,/*!< in: nonzero=compressed BLOB page size,
- zero for uncompressed BLOBs */
- ulint space_id,/*!< in: space id of the first BLOB page */
- ulint page_no,/*!< in: page number of the first BLOB page */
- ulint offset) /*!< in: offset on the first BLOB page */
+ byte* buf,
+ ulint len,
+ const page_size_t& page_size,
+ ulint space_id,
+ ulint page_no,
+ ulint offset)
{
- if (UNIV_UNLIKELY(len == 0)) {
+ if (len == 0) {
return(0);
}
- if (zip_size) {
- return(btr_copy_zblob_prefix(buf, len, zip_size,
+ if (page_size.is_compressed()) {
+ return(btr_copy_zblob_prefix(buf, len, page_size,
space_id, page_no, offset));
} else {
+ ut_ad(page_size.equals_to(univ_page_size));
return(btr_copy_blob_prefix(buf, len, space_id,
page_no, offset));
}
}
-/*******************************************************************//**
-Copies the prefix of an externally stored field of a record. The
-clustered index record must be protected by a lock or a page latch.
+/** Copies the prefix of an externally stored field of a record.
+The clustered index record must be protected by a lock or a page latch.
+@param[out] buf the field, or a prefix of it
+@param[in] len length of buf, in bytes
+@param[in] page_size BLOB page size
+@param[in] data 'internally' stored part of the field
+containing also the reference to the external part; must be protected by
+a lock or a page latch
+@param[in] local_len length of data, in bytes
@return the length of the copied field, or 0 if the column was being
or has been deleted */
-UNIV_INTERN
ulint
btr_copy_externally_stored_field_prefix(
-/*====================================*/
- byte* buf, /*!< out: the field, or a prefix of it */
- ulint len, /*!< in: length of buf, in bytes */
- ulint zip_size,/*!< in: nonzero=compressed BLOB page size,
- zero for uncompressed BLOBs */
- const byte* data, /*!< in: 'internally' stored part of the
- field containing also the reference to
- the external part; must be protected by
- a lock or a page latch */
- ulint local_len)/*!< in: length of data, in bytes */
+ byte* buf,
+ ulint len,
+ const page_size_t& page_size,
+ const byte* data,
+ ulint local_len)
{
ulint space_id;
ulint page_no;
@@ -5749,28 +7816,28 @@ btr_copy_externally_stored_field_prefix(
return(local_len
+ btr_copy_externally_stored_field_prefix_low(buf + local_len,
len - local_len,
- zip_size,
+ page_size,
space_id, page_no,
offset));
}
-/*******************************************************************//**
-Copies an externally stored field of a record to mem heap. The
-clustered index record must be protected by a lock or a page latch.
-@return the whole field copied to heap */
-UNIV_INTERN
+/** Copies an externally stored field of a record to mem heap.
+The clustered index record must be protected by a lock or a page latch.
+@param[out] len length of the whole field
+@param[in] data 'internally' stored part of the field
+containing also the reference to the external part; must be protected by
+a lock or a page latch
+@param[in] page_size BLOB page size
+@param[in] local_len length of data
+@param[in,out] heap mem heap
+@return the whole field copied to heap */
byte*
btr_copy_externally_stored_field(
-/*=============================*/
- ulint* len, /*!< out: length of the whole field */
- const byte* data, /*!< in: 'internally' stored part of the
- field containing also the reference to
- the external part; must be protected by
- a lock or a page latch */
- ulint zip_size,/*!< in: nonzero=compressed BLOB page size,
- zero for uncompressed BLOBs */
- ulint local_len,/*!< in: length of data */
- mem_heap_t* heap) /*!< in: mem heap */
+ ulint* len,
+ const byte* data,
+ const page_size_t& page_size,
+ ulint local_len,
+ mem_heap_t* heap)
{
ulint space_id;
ulint page_no;
@@ -5799,28 +7866,30 @@ btr_copy_externally_stored_field(
*len = local_len
+ btr_copy_externally_stored_field_prefix_low(buf + local_len,
extern_len,
- zip_size,
+ page_size,
space_id,
page_no, offset);
return(buf);
}
-/*******************************************************************//**
-Copies an externally stored field of a record to mem heap.
-@return the field copied to heap, or NULL if the field is incomplete */
-UNIV_INTERN
+/** Copies an externally stored field of a record to mem heap.
+@param[in] rec record in a clustered index; must be
+protected by a lock or a page latch
+@param[in] offset array returned by rec_get_offsets()
+@param[in] page_size BLOB page size
+@param[in] no field number
+@param[out] len length of the field
+@param[in,out] heap mem heap
+@return the field copied to heap, or NULL if the field is incomplete */
byte*
btr_rec_copy_externally_stored_field(
-/*=================================*/
- const rec_t* rec, /*!< in: record in a clustered index;
- must be protected by a lock or a page latch */
- const ulint* offsets,/*!< in: array returned by rec_get_offsets() */
- ulint zip_size,/*!< in: nonzero=compressed BLOB page size,
- zero for uncompressed BLOBs */
- ulint no, /*!< in: field number */
- ulint* len, /*!< out: length of the field */
- mem_heap_t* heap) /*!< in: mem heap */
+ const rec_t* rec,
+ const offset_t* offsets,
+ const page_size_t& page_size,
+ ulint no,
+ ulint* len,
+ mem_heap_t* heap)
{
ulint local_len;
const byte* data;
@@ -5851,6 +7920,5 @@ btr_rec_copy_externally_stored_field(
}
return(btr_copy_externally_stored_field(len, data,
- zip_size, local_len, heap));
+ page_size, local_len, heap));
}
-#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innobase/btr/btr0defragment.cc b/storage/innobase/btr/btr0defragment.cc
index f679f22dfe4..0f6c7d87ca2 100644
--- a/storage/innobase/btr/btr0defragment.cc
+++ b/storage/innobase/btr/btr0defragment.cc
@@ -26,12 +26,13 @@ Modified 30/07/2014 Jan Lindström jan.lindstrom@mariadb.com
*******************************************************/
#include "btr0defragment.h"
-#ifndef UNIV_HOTBACKUP
+#include "btr0btr.h"
#include "btr0cur.h"
#include "btr0sea.h"
#include "btr0pcur.h"
#include "dict0stats.h"
#include "dict0stats_bg.h"
+#include "dict0defrag_bg.h"
#include "ibuf0ibuf.h"
#include "lock0lock.h"
#include "srv0start.h"
@@ -102,8 +103,7 @@ void
btr_defragment_init()
{
srv_defragment_interval = 1000000000ULL / srv_defragment_frequency;
- mutex_create(btr_defragment_mutex_key, &btr_defragment_mutex,
- SYNC_ANY_LATCH);
+ mutex_create(LATCH_ID_BTR_DEFRAGMENT_MUTEX, &btr_defragment_mutex);
}
/******************************************************************//**
@@ -112,7 +112,7 @@ void
btr_defragment_shutdown()
{
mutex_enter(&btr_defragment_mutex);
- list< btr_defragment_item_t* >::iterator iter = btr_defragment_wq.begin();
+ std::list< btr_defragment_item_t* >::iterator iter = btr_defragment_wq.begin();
while(iter != btr_defragment_wq.end()) {
btr_defragment_item_t* item = *iter;
iter = btr_defragment_wq.erase(iter);
@@ -134,7 +134,7 @@ btr_defragment_find_index(
dict_index_t* index) /*!< Index to find. */
{
mutex_enter(&btr_defragment_mutex);
- for (list< btr_defragment_item_t* >::iterator iter = btr_defragment_wq.begin();
+ for (std::list< btr_defragment_item_t* >::iterator iter = btr_defragment_wq.begin();
iter != btr_defragment_wq.end();
++iter) {
btr_defragment_item_t* item = *iter;
@@ -162,26 +162,29 @@ btr_defragment_add_index(
dberr_t* err) /*!< out: error code */
{
mtr_t mtr;
- ulint space = dict_index_get_space(index);
- ulint zip_size = dict_table_zip_size(index->table);
ulint page_no = dict_index_get_page(index);
*err = DB_SUCCESS;
mtr_start(&mtr);
// Load index rood page.
- buf_block_t* block = btr_block_get(space, zip_size, page_no, RW_NO_LATCH, index, &mtr);
+ const page_id_t page_id(dict_index_get_space(index), page_no);
+ const page_size_t page_size(dict_table_page_size(index->table));
+ buf_block_t* block = btr_block_get(page_id, page_size, RW_NO_LATCH, index, &mtr);
page_t* page = NULL;
if (block) {
page = buf_block_get_frame(block);
}
- if (page == NULL && index->table->file_unreadable) {
+ if (page == NULL && !index->is_readable()) {
mtr_commit(&mtr);
*err = DB_DECRYPTION_FAILED;
return NULL;
}
+ ut_ad(fil_page_index_page_check(page));
+ ut_ad(!page_has_siblings(page));
+
if (page_is_leaf(page)) {
// Index root is a leaf page, no need to defragment.
mtr_commit(&mtr);
@@ -190,7 +193,7 @@ btr_defragment_add_index(
btr_pcur_t* pcur = btr_pcur_create_for_mysql();
os_event_t event = NULL;
if (!async) {
- event = os_event_create();
+ event = os_event_create(0);
}
btr_pcur_open_at_index_side(true, index, BTR_SEARCH_LEAF, pcur,
true, 0, &mtr);
@@ -214,7 +217,7 @@ btr_defragment_remove_table(
dict_table_t* table) /*!< Index to be removed. */
{
mutex_enter(&btr_defragment_mutex);
- for (list< btr_defragment_item_t* >::iterator iter = btr_defragment_wq.begin();
+ for (std::list< btr_defragment_item_t* >::iterator iter = btr_defragment_wq.begin();
iter != btr_defragment_wq.end();
++iter) {
btr_defragment_item_t* item = *iter;
@@ -236,7 +239,7 @@ btr_defragment_remove_index(
dict_index_t* index) /*!< Index to be removed. */
{
mutex_enter(&btr_defragment_mutex);
- for (list< btr_defragment_item_t* >::iterator iter = btr_defragment_wq.begin();
+ for (std::list< btr_defragment_item_t* >::iterator iter = btr_defragment_wq.begin();
iter != btr_defragment_wq.end();
++iter) {
btr_defragment_item_t* item = *iter;
@@ -265,7 +268,7 @@ btr_defragment_remove_item(
btr_defragment_item_t* item) /*!< Item to be removed. */
{
mutex_enter(&btr_defragment_mutex);
- for (list< btr_defragment_item_t* >::iterator iter = btr_defragment_wq.begin();
+ for (std::list< btr_defragment_item_t* >::iterator iter = btr_defragment_wq.begin();
iter != btr_defragment_wq.end();
++iter) {
if (item == *iter) {
@@ -294,7 +297,7 @@ btr_defragment_get_item()
//return nullptr;
}
mutex_enter(&btr_defragment_mutex);
- list< btr_defragment_item_t* >::iterator iter = btr_defragment_wq.begin();
+ std::list< btr_defragment_item_t* >::iterator iter = btr_defragment_wq.begin();
if (iter == btr_defragment_wq.end()) {
iter = btr_defragment_wq.begin();
}
@@ -338,8 +341,8 @@ btr_defragment_calc_n_recs_for_size(
{
page_t* page = buf_block_get_frame(block);
ulint n_recs = 0;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- ulint* offsets = offsets_;
+ offset_t offsets_[REC_OFFS_NORMAL_SIZE];
+ offset_t* offsets = offsets_;
rec_offs_init(offsets_);
mem_heap_t* heap = NULL;
ulint size = 0;
@@ -350,6 +353,7 @@ btr_defragment_calc_n_recs_for_size(
while (page_cur_get_rec(&cur) != page_get_supremum_rec(page)) {
rec_t* cur_rec = page_cur_get_rec(&cur);
offsets = rec_get_offsets(cur_rec, index, offsets,
+ page_is_leaf(page),
ULINT_UNDEFINED, &heap);
ulint rec_size = rec_offs_size(offsets);
size += rec_size;
@@ -374,7 +378,7 @@ btr_defragment_merge_pages(
dict_index_t* index, /*!< in: index tree */
buf_block_t* from_block, /*!< in: origin of merge */
buf_block_t* to_block, /*!< in: destination of merge */
- ulint zip_size, /*!< in: zip size of the block */
+ const page_size_t page_size, /*!< in: page size of the block */
ulint reserved_space, /*!< in: space reserved for future
insert to avoid immediate page split */
ulint* max_data_size, /*!< in/out: max data size to
@@ -403,7 +407,7 @@ btr_defragment_merge_pages(
// Estimate how many records can be moved from the from_page to
// the to_page.
- if (zip_size) {
+ if (page_size.is_compressed()) {
ulint page_diff = UNIV_PAGE_SIZE - *max_data_size;
max_ins_size_to_use = (max_ins_size_to_use > page_diff)
? max_ins_size_to_use - page_diff : 0;
@@ -447,7 +451,7 @@ btr_defragment_merge_pages(
// n_recs_to_move number of records to to_page. We try to reduce
// the targeted data size on the to_page by
// BTR_DEFRAGMENT_PAGE_REDUCTION_STEP_SIZE and try again.
- os_atomic_increment_ulint(
+ my_atomic_addlint(
&btr_defragment_compression_failures, 1);
max_ins_size_to_use =
move_size > BTR_DEFRAGMENT_PAGE_REDUCTION_STEP_SIZE
@@ -472,7 +476,7 @@ btr_defragment_merge_pages(
// Set ibuf free bits if necessary.
if (!dict_index_is_clust(index)
&& page_is_leaf(to_page)) {
- if (zip_size) {
+ if (page_size.is_compressed()) {
ibuf_reset_free_bits(to_block);
} else {
ibuf_update_free_bits_if_full(
@@ -481,17 +485,18 @@ btr_defragment_merge_pages(
ULINT_UNDEFINED);
}
}
+ btr_cur_t parent;
if (n_recs_to_move == n_recs) {
/* The whole page is merged with the previous page,
free it. */
lock_update_merge_left(to_block, orig_pred,
from_block);
btr_search_drop_page_hash_index(from_block);
- btr_level_list_remove(space, zip_size, from_page,
- index, mtr);
- btr_node_ptr_delete(index, from_block, mtr);
- btr_blob_dbg_remove(from_page, index,
- "btr_defragment_n_pages");
+ btr_level_list_remove(space, page_size, (page_t*)from_page, index, mtr);
+ btr_page_get_father(index, from_block, mtr, &parent);
+ btr_cur_node_ptr_delete(&parent, mtr);
+ /* btr_blob_dbg_remove(from_page, index,
+ "btr_defragment_n_pages"); */
btr_page_free(index, from_block, mtr);
} else {
// There are still records left on the page, so
@@ -507,12 +512,14 @@ btr_defragment_merge_pages(
lock_update_split_and_merge(to_block,
orig_pred,
from_block);
- btr_node_ptr_delete(index, from_block, mtr);
+ // FIXME: reuse the node_ptr!
+ btr_page_get_father(index, from_block, mtr, &parent);
+ btr_cur_node_ptr_delete(&parent, mtr);
rec = page_rec_get_next(
page_get_infimum_rec(from_page));
node_ptr = dict_index_build_node_ptr(
index, rec, page_get_page_no(from_page),
- heap, level + 1);
+ heap, level);
btr_insert_on_non_leaf_level(0, index, level+1,
node_ptr, mtr);
}
@@ -540,7 +547,6 @@ btr_defragment_n_pages(
mtr_t* mtr) /*!< in/out: mini-transaction */
{
ulint space;
- ulint zip_size;
/* We will need to load the n+1 block because if the last page is freed
and we need to modify the prev_page_no of that block. */
buf_block_t* blocks[BTR_DEFRAGMENT_MAX_N_PAGES + 1];
@@ -561,8 +567,6 @@ btr_defragment_n_pages(
/* It doesn't make sense to call this function with n_pages = 1. */
ut_ad(n_pages > 1);
- ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(index),
- MTR_MEMO_X_LOCK));
space = dict_index_get_space(index);
if (space == 0) {
/* Ignore space 0. */
@@ -573,9 +577,9 @@ btr_defragment_n_pages(
n_pages = BTR_DEFRAGMENT_MAX_N_PAGES;
}
- zip_size = dict_table_zip_size(index->table);
first_page = buf_block_get_frame(block);
level = btr_page_get_level(first_page, mtr);
+ const page_size_t page_size(dict_table_page_size(index->table));
if (level != 0) {
return NULL;
@@ -585,7 +589,7 @@ btr_defragment_n_pages(
blocks[0] = block;
for (uint i = 1; i <= n_pages; i++) {
page_t* page = buf_block_get_frame(blocks[i-1]);
- ulint page_no = btr_page_get_next(page, mtr);
+ ulint page_no = btr_page_get_next(page);
total_data_size += page_get_data_size(page);
total_n_recs += page_get_n_recs(page);
if (page_no == FIL_NULL) {
@@ -593,12 +597,15 @@ btr_defragment_n_pages(
end_of_index = TRUE;
break;
}
- blocks[i] = btr_block_get(space, zip_size, page_no,
+
+ const page_id_t page_id(dict_index_get_space(index), page_no);
+
+ blocks[i] = btr_block_get(page_id, page_size,
RW_X_LATCH, index, mtr);
}
if (n_pages == 1) {
- if (btr_page_get_prev(first_page, mtr) == FIL_NULL) {
+ if (!page_has_prev(first_page)) {
/* last page in the index */
if (dict_index_get_page(index)
== page_get_page_no(first_page))
@@ -619,7 +626,7 @@ btr_defragment_n_pages(
optimal_page_size = page_get_free_space_of_empty(
page_is_comp(first_page));
// For compressed pages, we take compression failures into account.
- if (zip_size) {
+ if (page_size.is_compressed()) {
ulint size = 0;
int i = 0;
// We estimate the optimal data size of the index use samples of
@@ -636,18 +643,18 @@ btr_defragment_n_pages(
}
if (i != 0) {
size = size / i;
- optimal_page_size = min(optimal_page_size, size);
+ optimal_page_size = ut_min(optimal_page_size, size);
}
max_data_size = optimal_page_size;
}
- reserved_space = min((ulint)(optimal_page_size
+ reserved_space = ut_min((ulint)(optimal_page_size
* (1 - srv_defragment_fill_factor)),
(data_size_per_rec
* srv_defragment_fill_factor_n_recs));
optimal_page_size -= reserved_space;
- n_new_slots = (total_data_size + optimal_page_size - 1)
- / optimal_page_size;
+ n_new_slots = uint((total_data_size + optimal_page_size - 1)
+ / optimal_page_size);
if (n_new_slots >= n_pages) {
/* Can't defragment. */
if (end_of_index)
@@ -662,7 +669,7 @@ btr_defragment_n_pages(
// Start from the second page.
for (uint i = 1; i < n_pages; i ++) {
buf_block_t* new_block = btr_defragment_merge_pages(
- index, blocks[i], current_block, zip_size,
+ index, blocks[i], current_block, page_size,
reserved_space, &max_data_size, heap, mtr);
if (new_block != current_block) {
n_defragmented ++;
@@ -671,10 +678,10 @@ btr_defragment_n_pages(
}
mem_heap_free(heap);
n_defragmented ++;
- os_atomic_increment_ulint(
+ my_atomic_addlint(
&btr_defragment_count, 1);
if (n_pages == n_defragmented) {
- os_atomic_increment_ulint(
+ my_atomic_addlint(
&btr_defragment_failures, 1);
} else {
index->stat_defrag_n_pages_freed += (n_pages - n_defragmented);
@@ -746,10 +753,17 @@ DECLARE_THREAD(btr_defragment_thread)(void*)
now = my_interval_timer();
mtr_start(&mtr);
- btr_pcur_restore_position(BTR_MODIFY_TREE, pcur, &mtr);
cursor = btr_pcur_get_btr_cur(pcur);
index = btr_cur_get_index(cursor);
+ mtr.set_named_space(index->space);
+ /* To follow the latching order defined in WL#6326, acquire index->lock X-latch.
+ This entitles us to acquire page latches in any order for the index. */
+ mtr_x_lock(&index->lock, &mtr);
+ /* This will acquire index->lock SX-latch, which per WL#6363 is allowed
+ when we are already holding the X-latch. */
+ btr_pcur_restore_position(BTR_MODIFY_TREE, pcur, &mtr);
first_block = btr_cur_get_block(cursor);
+
last_block = btr_defragment_n_pages(first_block, index,
srv_defragment_n_pages,
&mtr);
@@ -768,17 +782,32 @@ DECLARE_THREAD(btr_defragment_thread)(void*)
/* Update the last_processed time of this index. */
item->last_processed = now;
} else {
+ dberr_t err = DB_SUCCESS;
mtr_commit(&mtr);
/* Reaching the end of the index. */
dict_stats_empty_defrag_stats(index);
- dict_stats_save_defrag_stats(index);
- dict_stats_save_defrag_summary(index);
+ err = dict_stats_save_defrag_stats(index);
+ if (err != DB_SUCCESS) {
+ ib::error() << "Saving defragmentation stats for table "
+ << index->table->name
+ << " index " << index->name()
+ << " failed with error " << err;
+ } else {
+ err = dict_stats_save_defrag_summary(index);
+
+ if (err != DB_SUCCESS) {
+ ib::error() << "Saving defragmentation summary for table "
+ << index->table->name
+ << " index " << index->name()
+ << " failed with error " << err;
+ }
+ }
+
btr_defragment_remove_item(item);
}
}
btr_defragment_thread_active = false;
- os_thread_exit(NULL);
+ os_thread_exit();
OS_THREAD_DUMMY_RETURN;
}
-#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innobase/btr/btr0pcur.cc b/storage/innobase/btr/btr0pcur.cc
index 7ba8b8f7412..25a39e9646b 100644
--- a/storage/innobase/btr/btr0pcur.cc
+++ b/storage/innobase/btr/btr0pcur.cc
@@ -1,6 +1,7 @@
/*****************************************************************************
-Copyright (c) 1996, 2013, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2016, 2019, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -24,54 +25,44 @@ Created 2/23/1996 Heikki Tuuri
*******************************************************/
#include "btr0pcur.h"
-
-#ifdef UNIV_NONINL
-#include "btr0pcur.ic"
-#endif
-
#include "ut0byte.h"
#include "rem0cmp.h"
#include "trx0trx.h"
/**************************************************************//**
Allocates memory for a persistent cursor object and initializes the cursor.
-@return own: persistent cursor */
-UNIV_INTERN
+@return own: persistent cursor */
btr_pcur_t*
btr_pcur_create_for_mysql(void)
/*============================*/
{
btr_pcur_t* pcur;
+ DBUG_ENTER("btr_pcur_create_for_mysql");
- pcur = (btr_pcur_t*) mem_alloc(sizeof(btr_pcur_t));
+ pcur = (btr_pcur_t*) ut_malloc_nokey(sizeof(btr_pcur_t));
pcur->btr_cur.index = NULL;
btr_pcur_init(pcur);
- return(pcur);
+ DBUG_PRINT("btr_pcur_create_for_mysql", ("pcur: %p", pcur));
+ DBUG_RETURN(pcur);
}
/**************************************************************//**
Resets a persistent cursor object, freeing ::old_rec_buf if it is
allocated and resetting the other members to their initial values. */
-UNIV_INTERN
void
btr_pcur_reset(
/*===========*/
btr_pcur_t* cursor) /*!< in, out: persistent cursor */
{
- if (cursor->old_rec_buf != NULL) {
-
- mem_free(cursor->old_rec_buf);
-
- cursor->old_rec_buf = NULL;
- }
-
+ btr_pcur_free(cursor);
+ cursor->old_rec_buf = NULL;
cursor->btr_cur.index = NULL;
cursor->btr_cur.page_cur.rec = NULL;
cursor->old_rec = NULL;
cursor->old_n_fields = 0;
- cursor->old_stored = BTR_PCUR_OLD_NOT_STORED;
+ cursor->old_stored = false;
cursor->latch_mode = BTR_NO_LATCHES;
cursor->pos_state = BTR_PCUR_NOT_POSITIONED;
@@ -79,14 +70,17 @@ btr_pcur_reset(
/**************************************************************//**
Frees the memory for a persistent cursor object. */
-UNIV_INTERN
void
btr_pcur_free_for_mysql(
/*====================*/
btr_pcur_t* cursor) /*!< in, own: persistent cursor */
{
- btr_pcur_reset(cursor);
- mem_free(cursor);
+ DBUG_ENTER("btr_pcur_free_for_mysql");
+ DBUG_PRINT("btr_pcur_free_for_mysql", ("pcur: %p", cursor));
+
+ btr_pcur_free(cursor);
+ ut_free(cursor);
+ DBUG_VOID_RETURN;
}
/**************************************************************//**
@@ -96,7 +90,6 @@ cursor data structure, or just setting a flag if the cursor id before the
first in an EMPTY tree, or after the last in an EMPTY tree. NOTE that the
page where the cursor is positioned must not be empty if the index tree is
not totally empty! */
-UNIV_INTERN
void
btr_pcur_store_position(
/*====================*/
@@ -107,7 +100,6 @@ btr_pcur_store_position(
buf_block_t* block;
rec_t* rec;
dict_index_t* index;
- page_t* page;
ulint offs;
ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
@@ -119,23 +111,30 @@ btr_pcur_store_position(
page_cursor = btr_pcur_get_page_cur(cursor);
rec = page_cur_get_rec(page_cursor);
- page = page_align(rec);
- offs = page_offset(rec);
-
- ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_S_FIX)
- || mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
-
- if (page_is_empty(page)) {
+ offs = rec - block->frame;
+ ut_ad(block->page.id.page_no() == page_get_page_no(block->frame));
+ ut_ad(block->page.buf_fix_count);
+ /* For spatial index, when we do positioning on parent
+ buffer if necessary, it might not hold latches, but the
+ tree must be locked to prevent change on the page */
+ ut_ad(mtr_memo_contains_flagged(mtr, block,
+ MTR_MEMO_PAGE_S_FIX
+ | MTR_MEMO_PAGE_X_FIX)
+ || (dict_index_is_spatial(index)
+ && mtr_memo_contains_flagged(
+ mtr, dict_index_get_lock(index),
+ MTR_MEMO_X_LOCK | MTR_MEMO_SX_LOCK)));
+
+ cursor->old_stored = true;
+
+ if (page_is_empty(block->frame)) {
/* It must be an empty index tree; NOTE that in this case
we do not store the modify_clock, but always do a search
if we restore the cursor position */
- ut_a(btr_page_get_next(page, mtr) == FIL_NULL);
- ut_a(btr_page_get_prev(page, mtr) == FIL_NULL);
- ut_ad(page_is_leaf(page));
- ut_ad(page_get_page_no(page) == index->page);
-
- cursor->old_stored = BTR_PCUR_OLD_STORED;
+ ut_a(!page_has_siblings(block->frame));
+ ut_ad(page_is_leaf(block->frame));
+ ut_ad(block->page.id.page_no() == index->page);
if (page_rec_is_supremum_low(offs)) {
@@ -162,18 +161,19 @@ btr_pcur_store_position(
cursor->rel_pos = BTR_PCUR_ON;
}
- cursor->old_stored = BTR_PCUR_OLD_STORED;
cursor->old_rec = dict_index_copy_rec_order_prefix(
index, rec, &cursor->old_n_fields,
&cursor->old_rec_buf, &cursor->buf_size);
cursor->block_when_stored = block;
+
+ /* Function try to check if block is S/X latch. */
cursor->modify_clock = buf_block_get_modify_clock(block);
+ cursor->withdraw_clock = buf_withdraw_clock;
}
/**************************************************************//**
Copies the stored position of a pcur to another pcur. */
-UNIV_INTERN
void
btr_pcur_copy_stored_position(
/*==========================*/
@@ -182,16 +182,13 @@ btr_pcur_copy_stored_position(
btr_pcur_t* pcur_donate) /*!< in: pcur from which the info is
copied */
{
- if (pcur_receive->old_rec_buf) {
- mem_free(pcur_receive->old_rec_buf);
- }
-
+ ut_free(pcur_receive->old_rec_buf);
ut_memcpy(pcur_receive, pcur_donate, sizeof(btr_pcur_t));
if (pcur_donate->old_rec_buf) {
pcur_receive->old_rec_buf = (byte*)
- mem_alloc(pcur_donate->buf_size);
+ ut_malloc_nokey(pcur_donate->buf_size);
ut_memcpy(pcur_receive->old_rec_buf, pcur_donate->old_rec_buf,
pcur_donate->buf_size);
@@ -217,25 +214,23 @@ restores to before first or after the last in the tree.
@return TRUE if the cursor position was stored when it was on a user
record and it can be restored on a user record whose ordering fields
are identical to the ones of the original user record */
-UNIV_INTERN
ibool
btr_pcur_restore_position_func(
/*===========================*/
ulint latch_mode, /*!< in: BTR_SEARCH_LEAF, ... */
btr_pcur_t* cursor, /*!< in: detached persistent cursor */
const char* file, /*!< in: file name */
- ulint line, /*!< in: line where called */
+ unsigned line, /*!< in: line where called */
mtr_t* mtr) /*!< in: mtr */
{
dict_index_t* index;
dtuple_t* tuple;
- ulint mode;
- ulint old_mode;
+ page_cur_mode_t mode;
+ page_cur_mode_t old_mode;
mem_heap_t* heap;
- ut_ad(mtr);
- ut_ad(mtr->state == MTR_ACTIVE);
- ut_ad(cursor->old_stored == BTR_PCUR_OLD_STORED);
+ ut_ad(mtr->is_active());
+ //ut_ad(cursor->old_stored);
ut_ad(cursor->pos_state == BTR_PCUR_WAS_POSITIONED
|| cursor->pos_state == BTR_PCUR_IS_POSITIONED);
@@ -244,16 +239,27 @@ btr_pcur_restore_position_func(
if (UNIV_UNLIKELY
(cursor->rel_pos == BTR_PCUR_AFTER_LAST_IN_TREE
|| cursor->rel_pos == BTR_PCUR_BEFORE_FIRST_IN_TREE)) {
+ dberr_t err = DB_SUCCESS;
/* In these cases we do not try an optimistic restoration,
but always do a search */
- btr_cur_open_at_index_side(
+ err = btr_cur_open_at_index_side(
cursor->rel_pos == BTR_PCUR_BEFORE_FIRST_IN_TREE,
index, latch_mode,
btr_pcur_get_btr_cur(cursor), 0, mtr);
- cursor->latch_mode = latch_mode;
+ if (err != DB_SUCCESS) {
+ ib::warn() << " Error code: " << err
+ << " btr_pcur_restore_position_func "
+ << " called from file: "
+ << file << " line: " << line
+ << " table: " << index->table->name
+ << " index: " << index->name;
+ }
+
+ cursor->latch_mode =
+ BTR_LATCH_MODE_WITHOUT_INTENTION(latch_mode);
cursor->pos_state = BTR_PCUR_IS_POSITIONED;
cursor->block_when_stored = btr_pcur_get_block(cursor);
@@ -263,14 +269,19 @@ btr_pcur_restore_position_func(
ut_a(cursor->old_rec);
ut_a(cursor->old_n_fields);
- if (UNIV_LIKELY(latch_mode == BTR_SEARCH_LEAF)
- || UNIV_LIKELY(latch_mode == BTR_MODIFY_LEAF)) {
+ switch (latch_mode) {
+ case BTR_SEARCH_LEAF:
+ case BTR_MODIFY_LEAF:
+ case BTR_SEARCH_PREV:
+ case BTR_MODIFY_PREV:
/* Try optimistic restoration. */
- if (buf_page_optimistic_get(latch_mode,
- cursor->block_when_stored,
- cursor->modify_clock,
- file, line, mtr)) {
+ if (!buf_pool_is_obsolete(cursor->withdraw_clock)
+ && btr_cur_optimistic_latch_leaves(
+ cursor->block_when_stored, cursor->modify_clock,
+ &latch_mode, btr_pcur_get_btr_cur(cursor),
+ file, line, mtr)) {
+
cursor->pos_state = BTR_PCUR_IS_POSITIONED;
cursor->latch_mode = latch_mode;
@@ -282,16 +293,21 @@ btr_pcur_restore_position_func(
if (cursor->rel_pos == BTR_PCUR_ON) {
#ifdef UNIV_DEBUG
const rec_t* rec;
- const ulint* offsets1;
- const ulint* offsets2;
+ offset_t offsets1_[REC_OFFS_NORMAL_SIZE];
+ offset_t offsets2_[REC_OFFS_NORMAL_SIZE];
+ offset_t* offsets1 = offsets1_;
+ offset_t* offsets2 = offsets2_;
rec = btr_pcur_get_rec(cursor);
+ rec_offs_init(offsets1_);
+ rec_offs_init(offsets2_);
+
heap = mem_heap_create(256);
offsets1 = rec_get_offsets(
- cursor->old_rec, index, NULL,
+ cursor->old_rec, index, offsets1, true,
cursor->old_n_fields, &heap);
offsets2 = rec_get_offsets(
- rec, index, NULL,
+ rec, index, offsets2, true,
cursor->old_n_fields, &heap);
ut_ad(!cmp_rec_rec(cursor->old_rec,
@@ -316,7 +332,7 @@ btr_pcur_restore_position_func(
heap = mem_heap_create(256);
- tuple = dict_index_build_data_tuple(index, cursor->old_rec,
+ tuple = dict_index_build_data_tuple(cursor->old_rec, index, true,
cursor->old_n_fields, heap);
/* Save the old search mode of the cursor */
@@ -334,7 +350,7 @@ btr_pcur_restore_position_func(
break;
default:
ut_error;
- mode = 0;
+ mode = PAGE_CUR_UNSUPP;
}
btr_pcur_open_with_no_init_func(index, tuple, mode, latch_mode,
@@ -343,39 +359,31 @@ btr_pcur_restore_position_func(
/* Restore the old search mode */
cursor->search_mode = old_mode;
- switch (cursor->rel_pos) {
- case BTR_PCUR_ON:
- if (btr_pcur_is_on_user_rec(cursor)
- && !cmp_dtuple_rec(
- tuple, btr_pcur_get_rec(cursor),
- rec_get_offsets(btr_pcur_get_rec(cursor),
- index, NULL,
- ULINT_UNDEFINED, &heap))) {
-
- /* We have to store the NEW value for
- the modify clock, since the cursor can
- now be on a different page! But we can
- retain the value of old_rec */
-
- cursor->block_when_stored =
- btr_pcur_get_block(cursor);
- cursor->modify_clock =
- buf_block_get_modify_clock(
- cursor->block_when_stored);
- cursor->old_stored = BTR_PCUR_OLD_STORED;
-
- mem_heap_free(heap);
-
- return(TRUE);
- }
-#ifdef UNIV_DEBUG
- /* fall through */
- case BTR_PCUR_BEFORE:
- case BTR_PCUR_AFTER:
- break;
- default:
- ut_error;
-#endif /* UNIV_DEBUG */
+ ut_ad(cursor->rel_pos == BTR_PCUR_ON
+ || cursor->rel_pos == BTR_PCUR_BEFORE
+ || cursor->rel_pos == BTR_PCUR_AFTER);
+ offset_t offsets[REC_OFFS_NORMAL_SIZE];
+ rec_offs_init(offsets);
+ if (cursor->rel_pos == BTR_PCUR_ON
+ && btr_pcur_is_on_user_rec(cursor)
+ && !cmp_dtuple_rec(tuple, btr_pcur_get_rec(cursor),
+ rec_get_offsets(btr_pcur_get_rec(cursor),
+ index, offsets, true,
+ ULINT_UNDEFINED, &heap))) {
+
+ /* We have to store the NEW value for the modify clock,
+ since the cursor can now be on a different page!
+ But we can retain the value of old_rec */
+
+ cursor->block_when_stored = btr_pcur_get_block(cursor);
+ cursor->modify_clock = buf_block_get_modify_clock(
+ cursor->block_when_stored);
+ cursor->old_stored = true;
+ cursor->withdraw_clock = buf_withdraw_clock;
+
+ mem_heap_free(heap);
+
+ return(TRUE);
}
mem_heap_free(heap);
@@ -394,7 +402,6 @@ Moves the persistent cursor to the first record on the next page. Releases the
latch on the current page, and bufferunfixes it. Note that there must not be
modifications on the current page, as then the x-latch can be released only in
mtr_commit. */
-UNIV_INTERN
void
btr_pcur_move_to_next_page(
/*=======================*/
@@ -403,17 +410,16 @@ btr_pcur_move_to_next_page(
mtr_t* mtr) /*!< in: mtr */
{
ulint next_page_no;
- ulint space;
- ulint zip_size;
page_t* page;
buf_block_t* next_block;
page_t* next_page;
+ ulint mode;
ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
ut_ad(btr_pcur_is_after_last_on_page(cursor));
- cursor->old_stored = BTR_PCUR_OLD_NOT_STORED;
+ cursor->old_stored = false;
page = btr_pcur_get_page(cursor);
@@ -421,15 +427,25 @@ btr_pcur_move_to_next_page(
return;
}
- next_page_no = btr_page_get_next(page, mtr);
- space = buf_block_get_space(btr_pcur_get_block(cursor));
- zip_size = buf_block_get_zip_size(btr_pcur_get_block(cursor));
+ next_page_no = btr_page_get_next(page);
ut_ad(next_page_no != FIL_NULL);
- next_block = btr_block_get(space, zip_size, next_page_no,
- cursor->latch_mode,
- btr_pcur_get_btr_cur(cursor)->index, mtr);
+ mode = cursor->latch_mode;
+ switch (mode) {
+ case BTR_SEARCH_TREE:
+ mode = BTR_SEARCH_LEAF;
+ break;
+ case BTR_MODIFY_TREE:
+ mode = BTR_MODIFY_LEAF;
+ }
+
+ buf_block_t* block = btr_pcur_get_block(cursor);
+
+ next_block = btr_block_get(
+ page_id_t(block->page.id.space(), next_page_no),
+ block->page.size, mode,
+ btr_pcur_get_btr_cur(cursor)->index, mtr);
if (UNIV_UNLIKELY(!next_block)) {
return;
@@ -438,17 +454,15 @@ btr_pcur_move_to_next_page(
next_page = buf_block_get_frame(next_block);
#ifdef UNIV_BTR_DEBUG
ut_a(page_is_comp(next_page) == page_is_comp(page));
- ut_a(btr_page_get_prev(next_page, mtr)
- == buf_block_get_page_no(btr_pcur_get_block(cursor)));
+ ut_a(btr_page_get_prev(next_page)
+ == btr_pcur_get_block(cursor)->page.id.page_no());
#endif /* UNIV_BTR_DEBUG */
- next_block->check_index_page_at_flush = TRUE;
- btr_leaf_page_release(btr_pcur_get_block(cursor),
- cursor->latch_mode, mtr);
+ btr_leaf_page_release(btr_pcur_get_block(cursor), mode, mtr);
page_cur_set_before_first(next_block, btr_pcur_get_page_cur(cursor));
- page_check_dir(next_page);
+ ut_d(page_check_dir(next_page));
}
/*********************************************************//**
@@ -460,7 +474,7 @@ alphabetical position of the cursor is guaranteed to be sensible on
return, but it may happen that the cursor is not positioned on the last
record of any page, because the structure of the tree may have changed
during the time when the cursor had no latches. */
-UNIV_INTERN
+static
void
btr_pcur_move_backward_from_page(
/*=============================*/
@@ -502,7 +516,7 @@ btr_pcur_move_backward_from_page(
page = btr_pcur_get_page(cursor);
- prev_page_no = btr_page_get_prev(page, mtr);
+ prev_page_no = btr_page_get_prev(page);
if (prev_page_no == FIL_NULL) {
} else if (btr_pcur_is_before_first_on_page(cursor)) {
@@ -516,9 +530,10 @@ btr_pcur_move_backward_from_page(
btr_pcur_get_page_cur(cursor));
} else {
- /* The repositioned cursor did not end on an infimum record on
- a page. Cursor repositioning acquired a latch also on the
- previous page, but we do not need the latch: release it. */
+ /* The repositioned cursor did not end on an infimum
+ record on a page. Cursor repositioning acquired a latch
+ also on the previous page, but we do not need the latch:
+ release it. */
prev_block = btr_pcur_get_btr_cur(cursor)->left_block;
@@ -526,15 +541,13 @@ btr_pcur_move_backward_from_page(
}
cursor->latch_mode = latch_mode;
-
- cursor->old_stored = BTR_PCUR_OLD_NOT_STORED;
+ cursor->old_stored = false;
}
/*********************************************************//**
Moves the persistent cursor to the previous record in the tree. If no records
are left, the cursor stays 'before first in tree'.
-@return TRUE if the cursor was not before first in tree */
-UNIV_INTERN
+@return TRUE if the cursor was not before first in tree */
ibool
btr_pcur_move_to_prev(
/*==================*/
@@ -545,7 +558,7 @@ btr_pcur_move_to_prev(
ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
- cursor->old_stored = BTR_PCUR_OLD_NOT_STORED;
+ cursor->old_stored = false;
if (btr_pcur_is_before_first_on_page(cursor)) {
@@ -571,23 +584,22 @@ PAGE_CUR_LE, on the last user record. If no such user record exists, then
in the first case sets the cursor after last in tree, and in the latter case
before first in tree. The latching mode must be BTR_SEARCH_LEAF or
BTR_MODIFY_LEAF. */
-UNIV_INTERN
void
btr_pcur_open_on_user_rec_func(
/*===========================*/
dict_index_t* index, /*!< in: index */
const dtuple_t* tuple, /*!< in: tuple on which search done */
- ulint mode, /*!< in: PAGE_CUR_L, ... */
+ page_cur_mode_t mode, /*!< in: PAGE_CUR_L, ... */
ulint latch_mode, /*!< in: BTR_SEARCH_LEAF or
BTR_MODIFY_LEAF */
btr_pcur_t* cursor, /*!< in: memory buffer for persistent
cursor */
const char* file, /*!< in: file name */
- ulint line, /*!< in: line where called */
+ unsigned line, /*!< in: line where called */
mtr_t* mtr) /*!< in: mtr */
{
btr_pcur_open_low(index, 0, tuple, mode, latch_mode, cursor,
- file, line, mtr);
+ file, line, 0, mtr);
if ((mode == PAGE_CUR_GE) || (mode == PAGE_CUR_G)) {
diff --git a/storage/innobase/btr/btr0scrub.cc b/storage/innobase/btr/btr0scrub.cc
index 0e7c0d5b061..6170ab5188b 100644
--- a/storage/innobase/btr/btr0scrub.cc
+++ b/storage/innobase/btr/btr0scrub.cc
@@ -1,4 +1,5 @@
// Copyright (c) 2014, Google Inc.
+// Copyright (c) 2017, MariaDB Corporation.
/**************************************************//**
@file btr/btr0scrub.cc
@@ -77,6 +78,7 @@ static
void
log_scrub_failure(
/*===============*/
+ dict_index_t* index, /*!< in: index */
btr_scrub_t* scrub_data, /*!< in: data to store statistics on */
buf_block_t* block, /*!< in: block */
dberr_t err) /*!< in: error */
@@ -100,11 +102,10 @@ log_scrub_failure(
reason = "unknown";
scrub_data->scrub_stat.page_split_failures_unknown++;
}
- fprintf(stderr,
- "InnoDB: Warning: Failed to scrub page %lu in space %lu : %s\n",
- buf_block_get_page_no(block),
- buf_block_get_space(block),
- reason);
+
+ ib::warn() << "Failed to scrub index " << index->name
+ << " of table " << index->table->name
+ << " page " << block->page.id << ": " << reason;
}
/****************************************************************
@@ -124,7 +125,7 @@ btr_scrub_lock_dict_func(ulint space_id, bool lock_to_close_table,
This function should be rewritten as part of MDEV-8139:
Fix scrubbing tests. */
- while (mutex_enter_nowait_func(&(dict_sys->mutex), file, line)) {
+ while (mutex_enter_nowait(&(dict_sys->mutex))) {
/* if we lock to close a table, we wait forever
* if we don't lock to close a table, we check if space
* is closing, and then instead give up
@@ -143,12 +144,13 @@ btr_scrub_lock_dict_func(ulint space_id, bool lock_to_close_table,
os_thread_sleep(250000);
time_t now = time(0);
+
if (now >= last + 30) {
fprintf(stderr,
"WARNING: %s:%u waited %ld seconds for"
- " dict_sys lock, space: %lu"
+ " dict_sys lock, space: " ULINTPF
" lock_to_close_table: %d\n",
- file, line, (long)(now - start), space_id,
+ file, line, long(now - start), space_id,
lock_to_close_table);
last = now;
@@ -264,11 +266,10 @@ btr_page_needs_scrubbing(
return BTR_SCRUB_SKIP_PAGE_AND_CLOSE_TABLE;
}
- page_t* page = buf_block_get_frame(block);
- uint type = fil_page_get_type(page);
+ const page_t* page = buf_block_get_frame(block);
if (allocated == BTR_SCRUB_PAGE_ALLOCATED) {
- if (type != FIL_PAGE_INDEX) {
+ if (fil_page_get_type(page) != FIL_PAGE_INDEX) {
/* this function is called from fil-crypt-threads.
* these threads iterate all pages of all tablespaces
* and don't know about fil_page_type.
@@ -285,7 +286,7 @@ btr_page_needs_scrubbing(
return BTR_SCRUB_SKIP_PAGE_AND_CLOSE_TABLE;
}
- if (page_has_garbage(page) == false) {
+ if (!page_has_garbage(page)) {
/* no garbage (from deleted/shrunken records) */
return BTR_SCRUB_SKIP_PAGE_AND_CLOSE_TABLE;
}
@@ -293,11 +294,12 @@ btr_page_needs_scrubbing(
} else if (allocated == BTR_SCRUB_PAGE_FREE ||
allocated == BTR_SCRUB_PAGE_ALLOCATION_UNKNOWN) {
- if (! (type == FIL_PAGE_INDEX ||
- type == FIL_PAGE_TYPE_BLOB ||
- type == FIL_PAGE_TYPE_ZBLOB ||
- type == FIL_PAGE_TYPE_ZBLOB2)) {
-
+ switch (fil_page_get_type(page)) {
+ case FIL_PAGE_INDEX:
+ case FIL_PAGE_TYPE_ZBLOB:
+ case FIL_PAGE_TYPE_ZBLOB2:
+ break;
+ default:
/**
* If this is a dropped page, we also need to scrub
* BLOB pages
@@ -309,7 +311,8 @@ btr_page_needs_scrubbing(
}
}
- if (btr_page_get_index_id(page) == IBUF_INDEX_ID) {
+ if (block->page.id.space() == TRX_SYS_SPACE
+ && btr_page_get_index_id(page) == IBUF_INDEX_ID) {
/* skip ibuf */
return BTR_SCRUB_SKIP_PAGE_AND_CLOSE_TABLE;
}
@@ -362,16 +365,7 @@ btr_optimistic_scrub(
page_get_n_recs(buf_block_get_frame(block)) > 2 &&
(rand() % 100) < test_pessimistic_scrub_pct) {
- fprintf(stderr,
- "scrub: simulate btr_page_reorganize failed %lu:%lu "
- " table: %llu:%s index: %llu:%s get_n_recs(): %lu\n",
- buf_block_get_space(block),
- buf_block_get_page_no(block),
- (ulonglong)scrub_data->current_table->id,
- scrub_data->current_table->name,
- (ulonglong)scrub_data->current_index->id,
- scrub_data->current_index->name,
- page_get_n_recs(buf_block_get_frame(block)));
+ log_scrub_failure(index, scrub_data, block, DB_OVERFLOW);
return DB_OVERFLOW;
}
#endif
@@ -415,11 +409,12 @@ btr_pessimistic_scrub(
mtr_t* mtr) /*!< in: mtr */
{
page_t* page = buf_block_get_frame(block);
+
if (page_get_n_recs(page) < 2) {
/**
* There is no way we can split a page with < 2 records
*/
- log_scrub_failure(scrub_data, block, DB_UNDERFLOW);
+ log_scrub_failure(index, scrub_data, block, DB_UNDERFLOW);
return DB_UNDERFLOW;
}
@@ -430,17 +425,19 @@ btr_pessimistic_scrub(
ulint n_reserved = 0;
if (!fsp_reserve_free_extents(&n_reserved, index->space,
n_extents, FSP_NORMAL, mtr)) {
- log_scrub_failure(scrub_data, block,
+ log_scrub_failure(index, scrub_data, block,
DB_OUT_OF_FILE_SPACE);
return DB_OUT_OF_FILE_SPACE;
}
/* read block variables */
- ulint space = buf_block_get_space(block);
- ulint page_no = buf_block_get_page_no(block);
- ulint zip_size = buf_block_get_zip_size(block);
- ulint left_page_no = btr_page_get_prev(page, mtr);
- ulint right_page_no = btr_page_get_next(page, mtr);
+ const ulint page_no = mach_read_from_4(page + FIL_PAGE_OFFSET);
+ const page_id_t page_id(dict_index_get_space(index), page_no);
+ const uint32_t left_page_no = btr_page_get_prev(page);
+ const uint32_t right_page_no = btr_page_get_next(page);
+ const page_id_t lpage_id(dict_index_get_space(index), left_page_no);
+ const page_id_t rpage_id(dict_index_get_space(index), right_page_no);
+ const page_size_t page_size(dict_table_page_size(index->table));
/**
* When splitting page, we need X-latches on left/right brothers
@@ -453,19 +450,17 @@ btr_pessimistic_scrub(
* and re-lock. We still have x-lock on index
* so this should be safe
*/
- mtr_release_buf_page_at_savepoint(mtr, scrub_data->savepoint,
- block);
+ mtr->release_block_at_savepoint(scrub_data->savepoint, block);
- buf_block_t* get_block = btr_block_get(
- space, zip_size, left_page_no,
+ buf_block_t* get_block __attribute__((unused)) = btr_block_get(
+ lpage_id, page_size,
RW_X_LATCH, index, mtr);
- get_block->check_index_page_at_flush = TRUE;
/**
* Refetch block and re-initialize page
*/
block = btr_block_get(
- space, zip_size, page_no,
+ page_id, page_size,
RW_X_LATCH, index, mtr);
page = buf_block_get_frame(block);
@@ -473,21 +468,20 @@ btr_pessimistic_scrub(
/**
* structure should be unchanged
*/
- ut_a(left_page_no == btr_page_get_prev(page, mtr));
- ut_a(right_page_no == btr_page_get_next(page, mtr));
+ ut_a(left_page_no == btr_page_get_prev(page));
+ ut_a(right_page_no == btr_page_get_next(page));
}
if (right_page_no != FIL_NULL) {
- buf_block_t* get_block = btr_block_get(
- space, zip_size, right_page_no,
+ buf_block_t* get_block __attribute__((unused))= btr_block_get(
+ rpage_id, page_size,
RW_X_LATCH, index, mtr);
- get_block->check_index_page_at_flush = TRUE;
}
/* arguments to btr_page_split_and_insert */
mem_heap_t* heap = NULL;
dtuple_t* entry = NULL;
- ulint* offsets = NULL;
+ offset_t* offsets = NULL;
ulint n_ext = 0;
ulint flags = BTR_MODIFY_TREE;
@@ -501,7 +495,7 @@ btr_pessimistic_scrub(
/**
* call split page with NULL as argument for entry to insert
*/
- if (dict_index_get_page(index) == buf_block_get_page_no(block)) {
+ if (dict_index_get_page(index) == page_no) {
/* The page is the root page
* NOTE: ibuf_reset_free_bits is called inside
* btr_root_raise_and_insert */
@@ -638,13 +632,8 @@ btr_scrub_get_table_and_index(
scrub_data->current_table = NULL;
}
- /* argument to dict_table_open_on_index_id */
- bool dict_locked = true;
-
/* open table based on index_id */
- dict_table_t* table = dict_table_open_on_index_id(
- index_id,
- dict_locked);
+ dict_table_t* table = dict_table_open_on_index_id(index_id);
if (table != NULL) {
/* mark table as being scrubbed */
@@ -686,8 +675,9 @@ btr_scrub_free_page(
FIL_PAGE_TYPE_ALLOCATED);
}
- ulint compact = 1;
- page_create(block, mtr, compact);
+ page_create(block, mtr,
+ dict_table_is_comp(scrub_data->current_table),
+ dict_index_is_spatial(scrub_data->current_index));
mtr_commit(mtr);
@@ -852,11 +842,13 @@ btr_scrub_start_space(
ulint space, /*!< in: space */
btr_scrub_t* scrub_data) /*!< in/out: scrub data */
{
+ bool found;
scrub_data->space = space;
scrub_data->current_table = NULL;
scrub_data->current_index = NULL;
+ const page_size_t page_size = fil_space_get_page_size(space, &found);
- scrub_data->compressed = fil_space_get_zip_size(space) > 0;
+ scrub_data->compressed = page_size.is_compressed();
scrub_data->scrubbing = check_scrub_setting(scrub_data);
return scrub_data->scrubbing;
}
@@ -913,8 +905,7 @@ UNIV_INTERN
void
btr_scrub_init()
{
- mutex_create(scrub_stat_mutex_key,
- &scrub_stat_mutex, SYNC_NO_ORDER_CHECK);
+ mutex_create(LATCH_ID_SCRUB_STAT_MUTEX, &scrub_stat_mutex);
memset(&scrub_stat, 0, sizeof(scrub_stat));
}
@@ -927,3 +918,4 @@ btr_scrub_cleanup()
{
mutex_free(&scrub_stat_mutex);
}
+
diff --git a/storage/innobase/btr/btr0sea.cc b/storage/innobase/btr/btr0sea.cc
index a60c68fc70a..713540ecf81 100644
--- a/storage/innobase/btr/btr0sea.cc
+++ b/storage/innobase/btr/btr0sea.cc
@@ -2,7 +2,7 @@
Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2008, Google Inc.
-Copyright (c) 2018, MariaDB Corporation.
+Copyright (c) 2017, 2019, MariaDB Corporation.
Portions of this file contain modifications contributed and copyrighted by
Google, Inc. Those modifications are gratefully acknowledged and are described
@@ -32,10 +32,7 @@ Created 2/17/1996 Heikki Tuuri
*************************************************************************/
#include "btr0sea.h"
-#ifdef UNIV_NONINL
-#include "btr0sea.ic"
-#endif
-
+#ifdef BTR_CUR_HASH_ADAPT
#include "buf0buf.h"
#include "page0page.h"
#include "page0cur.h"
@@ -43,57 +40,76 @@ Created 2/17/1996 Heikki Tuuri
#include "btr0pcur.h"
#include "btr0btr.h"
#include "ha0ha.h"
+#include "srv0mon.h"
+#include "sync0sync.h"
-/** Flag: has the search system been enabled?
-Protected by btr_search_latch. */
-UNIV_INTERN char btr_search_enabled = TRUE;
+/** Is search system enabled.
+Search system is protected by array of latches. */
+char btr_search_enabled = true;
-/** A dummy variable to fool the compiler */
-UNIV_INTERN ulint btr_search_this_is_zero = 0;
+/** Number of adaptive hash index partition. */
+ulong btr_ahi_parts = 8;
#ifdef UNIV_SEARCH_PERF_STAT
/** Number of successful adaptive hash index lookups */
-UNIV_INTERN ulint btr_search_n_succ = 0;
+ulint btr_search_n_succ = 0;
/** Number of failed adaptive hash index lookups */
-UNIV_INTERN ulint btr_search_n_hash_fail = 0;
+ulint btr_search_n_hash_fail = 0;
#endif /* UNIV_SEARCH_PERF_STAT */
/** padding to prevent other memory update
hotspots from residing on the same memory
-cache line as btr_search_latch */
-UNIV_INTERN byte btr_sea_pad1[64];
+cache line as btr_search_latches */
+UNIV_INTERN byte btr_sea_pad1[CACHE_LINE_SIZE];
-/** The latch protecting the adaptive search system: this latch protects the
+/** The latches protecting the adaptive search system: this latches protects the
(1) positions of records on those pages where a hash index has been built.
NOTE: It does not protect values of non-ordering fields within a record from
being updated in-place! We can use fact (1) to perform unique searches to
-indexes. */
-
-/* We will allocate the latch from dynamic memory to get it to the
+indexes. We will allocate the latches from dynamic memory to get it to the
same DRAM page as other hotspot semaphores */
-UNIV_INTERN rw_lock_t* btr_search_latch_temp;
+rw_lock_t** btr_search_latches;
/** padding to prevent other memory update hotspots from residing on
the same memory cache line */
-UNIV_INTERN byte btr_sea_pad2[64];
+UNIV_INTERN byte btr_sea_pad2[CACHE_LINE_SIZE];
/** The adaptive hash index */
-UNIV_INTERN btr_search_sys_t* btr_search_sys;
-
-#ifdef UNIV_PFS_RWLOCK
-/* Key to register btr_search_sys with performance schema */
-UNIV_INTERN mysql_pfs_key_t btr_search_latch_key;
-#endif /* UNIV_PFS_RWLOCK */
+btr_search_sys_t* btr_search_sys;
/** If the number of records on the page divided by this parameter
would have been successfully accessed using a hash index, the index
is then built on the page, assuming the global limit has been reached */
-#define BTR_SEARCH_PAGE_BUILD_LIMIT 16
+#define BTR_SEARCH_PAGE_BUILD_LIMIT 16U
/** The global limit for consecutive potentially successful hash searches,
before hash index building is started */
#define BTR_SEARCH_BUILD_LIMIT 100
+/** Determine the number of accessed key fields.
+@param[in] n_fields number of complete fields
+@param[in] n_bytes number of bytes in an incomplete last field
+@return number of complete or incomplete fields */
+inline MY_ATTRIBUTE((warn_unused_result))
+ulint
+btr_search_get_n_fields(
+ ulint n_fields,
+ ulint n_bytes)
+{
+ return(n_fields + (n_bytes > 0 ? 1 : 0));
+}
+
+/** Determine the number of accessed key fields.
+@param[in] cursor b-tree cursor
+@return number of complete or incomplete fields */
+inline MY_ATTRIBUTE((warn_unused_result))
+ulint
+btr_search_get_n_fields(
+ const btr_cur_t* cursor)
+{
+ return(btr_search_get_n_fields(cursor->n_fields, cursor->n_bytes));
+}
+
/********************************************************************//**
Builds a hash index on a page with the given parameters. If the page already
has a hash index with different parameters, the old hash index is removed.
@@ -111,8 +127,7 @@ btr_search_build_page_hash_index(
field */
ibool left_side);/*!< in: hash for searches from left side? */
-/*****************************************************************//**
-This function should be called before reserving any btr search mutex, if
+/** This function should be called before reserving any btr search mutex, if
the intended operation might add nodes to the search system hash table.
Because of the latching order, once we have reserved the btr search system
latch, we cannot allocate a free frame from the buffer pool. Checks that
@@ -120,21 +135,19 @@ there is a free buffer frame allocated for hash table heap in the btr search
system. If not, allocates a free frames for the heap. This check makes it
probable that, when have reserved the btr search system latch and we need to
allocate a new node to the hash table, it will succeed. However, the check
-will not guarantee success. */
+will not guarantee success.
+@param[in] index index handler */
static
void
-btr_search_check_free_space_in_heap(void)
-/*=====================================*/
+btr_search_check_free_space_in_heap(dict_index_t* index)
{
hash_table_t* table;
mem_heap_t* heap;
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_SHARED));
- ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
+ ut_ad(!rw_lock_own(btr_get_search_latch(index), RW_LOCK_S));
+ ut_ad(!rw_lock_own(btr_get_search_latch(index), RW_LOCK_X));
- table = btr_search_sys->hash_index;
+ table = btr_get_search_table(index);
heap = table->heap;
@@ -145,96 +158,174 @@ btr_search_check_free_space_in_heap(void)
if (heap->free_block == NULL) {
buf_block_t* block = buf_block_alloc(NULL);
- rw_lock_x_lock(&btr_search_latch);
+ btr_search_x_lock(index);
- if (heap->free_block == NULL) {
+ if (btr_search_enabled
+ && heap->free_block == NULL) {
heap->free_block = block;
} else {
buf_block_free(block);
}
- rw_lock_x_unlock(&btr_search_latch);
+ btr_search_x_unlock(index);
}
}
-/*****************************************************************//**
-Creates and initializes the adaptive search system at a database start. */
-UNIV_INTERN
+/** Creates and initializes the adaptive search system at a database start.
+@param[in] hash_size hash table size. */
void
-btr_search_sys_create(
-/*==================*/
- ulint hash_size) /*!< in: hash index hash table size */
+btr_search_sys_create(ulint hash_size)
{
- /* We allocate the search latch from dynamic memory:
- see above at the global variable definition */
+ /* Search System is divided into n parts.
+ Each part controls access to distinct set of hash buckets from
+ hash table through its own latch. */
+
+ /* Step-1: Allocate latches (1 per part). */
+ btr_search_latches = reinterpret_cast<rw_lock_t**>(
+ ut_malloc(sizeof(rw_lock_t*) * btr_ahi_parts, mem_key_ahi));
- btr_search_latch_temp = (rw_lock_t*) mem_alloc(sizeof(rw_lock_t));
+ for (ulint i = 0; i < btr_ahi_parts; ++i) {
- rw_lock_create(btr_search_latch_key, &btr_search_latch,
- SYNC_SEARCH_SYS);
+ btr_search_latches[i] = reinterpret_cast<rw_lock_t*>(
+ ut_malloc(sizeof(rw_lock_t), mem_key_ahi));
+
+ rw_lock_create(btr_search_latch_key,
+ btr_search_latches[i], SYNC_SEARCH_SYS);
+ }
- btr_search_sys = (btr_search_sys_t*)
- mem_alloc(sizeof(btr_search_sys_t));
+ /* Step-2: Allocate hash tablees. */
+ btr_search_sys = reinterpret_cast<btr_search_sys_t*>(
+ ut_malloc(sizeof(btr_search_sys_t), mem_key_ahi));
+
+ btr_search_sys->hash_tables = reinterpret_cast<hash_table_t**>(
+ ut_malloc(sizeof(hash_table_t*) * btr_ahi_parts, mem_key_ahi));
+
+ for (ulint i = 0; i < btr_ahi_parts; ++i) {
+
+ btr_search_sys->hash_tables[i] =
+ ib_create((hash_size / btr_ahi_parts),
+ LATCH_ID_HASH_TABLE_MUTEX,
+ 0, MEM_HEAP_FOR_BTR_SEARCH);
- btr_search_sys->hash_index = ib_create(hash_size, 0,
- MEM_HEAP_FOR_BTR_SEARCH, 0);
#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
- btr_search_sys->hash_index->adaptive = TRUE;
+ btr_search_sys->hash_tables[i]->adaptive = TRUE;
#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
+ }
+}
+
+/** Resize hash index hash table.
+@param[in] hash_size hash index hash table size */
+void
+btr_search_sys_resize(ulint hash_size)
+{
+ /* Step-1: Lock all search latches in exclusive mode. */
+ btr_search_x_lock_all();
+
+ if (btr_search_enabled) {
+
+ btr_search_x_unlock_all();
+
+ ib::error() << "btr_search_sys_resize failed because"
+ " hash index hash table is not empty.";
+ ut_ad(0);
+ return;
+ }
+ /* Step-2: Recreate hash tables with new size. */
+ for (ulint i = 0; i < btr_ahi_parts; ++i) {
+
+ mem_heap_free(btr_search_sys->hash_tables[i]->heap);
+ hash_table_free(btr_search_sys->hash_tables[i]);
+
+ btr_search_sys->hash_tables[i] =
+ ib_create((hash_size / btr_ahi_parts),
+ LATCH_ID_HASH_TABLE_MUTEX,
+ 0, MEM_HEAP_FOR_BTR_SEARCH);
+
+#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
+ btr_search_sys->hash_tables[i]->adaptive = TRUE;
+#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
+ }
+
+ /* Step-3: Unlock all search latches from exclusive mode. */
+ btr_search_x_unlock_all();
}
-/*****************************************************************//**
-Frees the adaptive search system at a database shutdown. */
-UNIV_INTERN
+/** Frees the adaptive search system at a database shutdown. */
void
-btr_search_sys_free(void)
-/*=====================*/
+btr_search_sys_free()
{
- rw_lock_free(&btr_search_latch);
- mem_free(btr_search_latch_temp);
- btr_search_latch_temp = NULL;
- mem_heap_free(btr_search_sys->hash_index->heap);
- hash_table_free(btr_search_sys->hash_index);
- mem_free(btr_search_sys);
+ ut_ad(btr_search_sys != NULL && btr_search_latches != NULL);
+
+ /* Step-1: Release the hash tables. */
+ for (ulint i = 0; i < btr_ahi_parts; ++i) {
+
+ mem_heap_free(btr_search_sys->hash_tables[i]->heap);
+ hash_table_free(btr_search_sys->hash_tables[i]);
+
+ }
+
+ ut_free(btr_search_sys->hash_tables);
+ ut_free(btr_search_sys);
btr_search_sys = NULL;
+
+ /* Step-2: Release all allocates latches. */
+ for (ulint i = 0; i < btr_ahi_parts; ++i) {
+
+ rw_lock_free(btr_search_latches[i]);
+ ut_free(btr_search_latches[i]);
+ }
+
+ ut_free(btr_search_latches);
+ btr_search_latches = NULL;
}
-/********************************************************************//**
-Set index->ref_count = 0 on all indexes of a table. */
+/** Set index->ref_count = 0 on all indexes of a table.
+@param[in,out] table table handler */
static
void
btr_search_disable_ref_count(
-/*=========================*/
- dict_table_t* table) /*!< in/out: table */
+ dict_table_t* table)
{
dict_index_t* index;
ut_ad(mutex_own(&dict_sys->mutex));
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(rw_lock_own(&btr_search_latch, RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
- for (index = dict_table_get_first_index(table); index;
+ for (index = dict_table_get_first_index(table);
+ index != NULL;
index = dict_table_get_next_index(index)) {
+ ut_ad(rw_lock_own(btr_get_search_latch(index), RW_LOCK_X));
+
index->search_info->ref_count = 0;
}
}
-/********************************************************************//**
-Disable the adaptive hash search system and empty the index. */
-UNIV_INTERN
+/** Disable the adaptive hash search system and empty the index.
+@param[in] need_mutex need to acquire dict_sys->mutex */
void
-btr_search_disable(void)
-/*====================*/
+btr_search_disable(
+ bool need_mutex)
{
dict_table_t* table;
- mutex_enter(&dict_sys->mutex);
- rw_lock_x_lock(&btr_search_latch);
+ if (need_mutex) {
+ mutex_enter(&dict_sys->mutex);
+ }
+
+ ut_ad(mutex_own(&dict_sys->mutex));
+ btr_search_x_lock_all();
+
+ if (!btr_search_enabled) {
+ if (need_mutex) {
+ mutex_exit(&dict_sys->mutex);
+ }
+
+ btr_search_x_unlock_all();
+ return;
+ }
- btr_search_enabled = FALSE;
+ btr_search_enabled = false;
/* Clear the index->search_info->ref_count of every index in
the data dictionary cache. */
@@ -250,120 +341,82 @@ btr_search_disable(void)
btr_search_disable_ref_count(table);
}
- mutex_exit(&dict_sys->mutex);
+ if (need_mutex) {
+ mutex_exit(&dict_sys->mutex);
+ }
/* Set all block->index = NULL. */
buf_pool_clear_hash_index();
/* Clear the adaptive hash index. */
- hash_table_clear(btr_search_sys->hash_index);
- mem_heap_empty(btr_search_sys->hash_index->heap);
+ for (ulint i = 0; i < btr_ahi_parts; ++i) {
+ hash_table_clear(btr_search_sys->hash_tables[i]);
+ mem_heap_empty(btr_search_sys->hash_tables[i]->heap);
+ }
- rw_lock_x_unlock(&btr_search_latch);
+ btr_search_x_unlock_all();
}
-/********************************************************************//**
-Enable the adaptive hash search system. */
-UNIV_INTERN
+/** Enable the adaptive hash search system. */
void
-btr_search_enable(void)
-/*====================*/
-{
- rw_lock_x_lock(&btr_search_latch);
-
- btr_search_enabled = TRUE;
-
- rw_lock_x_unlock(&btr_search_latch);
-}
-
-/*****************************************************************//**
-Creates and initializes a search info struct.
-@return own: search info struct */
-UNIV_INTERN
-btr_search_t*
-btr_search_info_create(
-/*===================*/
- mem_heap_t* heap) /*!< in: heap where created */
+btr_search_enable()
{
- btr_search_t* info;
-
- info = (btr_search_t*) mem_heap_alloc(heap, sizeof(btr_search_t));
-
-#ifdef UNIV_DEBUG
- info->magic_n = BTR_SEARCH_MAGIC_N;
-#endif /* UNIV_DEBUG */
-
- info->ref_count = 0;
- info->root_guess = NULL;
-
- info->hash_analysis = 0;
- info->n_hash_potential = 0;
-
- info->last_hash_succ = FALSE;
-
-#ifdef UNIV_SEARCH_PERF_STAT
- info->n_hash_succ = 0;
- info->n_hash_fail = 0;
- info->n_patt_succ = 0;
- info->n_searches = 0;
-#endif /* UNIV_SEARCH_PERF_STAT */
-
- /* Set some sensible values */
- info->n_fields = 1;
- info->n_bytes = 0;
-
- info->left_side = TRUE;
+ buf_pool_mutex_enter_all();
+ if (srv_buf_pool_old_size != srv_buf_pool_size) {
+ buf_pool_mutex_exit_all();
+ return;
+ }
+ buf_pool_mutex_exit_all();
- return(info);
+ btr_search_x_lock_all();
+ btr_search_enabled = true;
+ btr_search_x_unlock_all();
}
-/*****************************************************************//**
-Returns the value of ref_count. The value is protected by
-btr_search_latch.
-@return ref_count value. */
-UNIV_INTERN
+/** Returns the value of ref_count. The value is protected by latch.
+@param[in] info search info
+@param[in] index index identifier
+@return ref_count value. */
ulint
btr_search_info_get_ref_count(
-/*==========================*/
- btr_search_t* info) /*!< in: search info. */
+ btr_search_t* info,
+ dict_index_t* index)
{
- ulint ret;
+ ulint ret = 0;
+
+ if (!btr_search_enabled) {
+ return(ret);
+ }
ut_ad(info);
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_SHARED));
- ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
+ ut_ad(!rw_lock_own(btr_get_search_latch(index), RW_LOCK_S));
+ ut_ad(!rw_lock_own(btr_get_search_latch(index), RW_LOCK_X));
- rw_lock_s_lock(&btr_search_latch);
+ btr_search_s_lock(index);
ret = info->ref_count;
- rw_lock_s_unlock(&btr_search_latch);
+ btr_search_s_unlock(index);
return(ret);
}
-/*********************************************************************//**
-Updates the search info of an index about hash successes. NOTE that info
+/** Updates the search info of an index about hash successes. NOTE that info
is NOT protected by any semaphore, to save CPU time! Do not assume its fields
-are consistent. */
+are consistent.
+@param[in,out] info search info
+@param[in] cursor cursor which was just positioned */
static
void
btr_search_info_update_hash(
-/*========================*/
- btr_search_t* info, /*!< in/out: search info */
- const btr_cur_t* cursor)/*!< in: cursor which was just positioned */
+ btr_search_t* info,
+ btr_cur_t* cursor)
{
- dict_index_t* index;
+ dict_index_t* index = cursor->index;
ulint n_unique;
int cmp;
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_SHARED));
- ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
-
- index = cursor->index;
+ ut_ad(!rw_lock_own(btr_get_search_latch(index), RW_LOCK_S));
+ ut_ad(!rw_lock_own(btr_get_search_latch(index), RW_LOCK_X));
if (dict_index_is_ibuf(index)) {
/* So many deletes are performed on an insert buffer tree
@@ -449,7 +502,6 @@ set_new_recomm:
info->n_fields = n_unique;
info->n_bytes = 0;
-
} else if (cursor->low_match > cursor->up_match) {
info->n_fields = cursor->up_match + 1;
@@ -463,27 +515,24 @@ set_new_recomm:
}
}
-/*********************************************************************//**
-Updates the block search info on hash successes. NOTE that info and
-block->n_hash_helps, n_fields, n_bytes, side are NOT protected by any
+/** Update the block search info on hash successes. NOTE that info and
+block->n_hash_helps, n_fields, n_bytes, left_side are NOT protected by any
semaphore, to save CPU time! Do not assume the fields are consistent.
-@return TRUE if building a (new) hash index on the block is recommended */
+@return TRUE if building a (new) hash index on the block is recommended
+@param[in,out] info search info
+@param[in,out] block buffer block
+@param[in] cursor cursor */
static
ibool
btr_search_update_block_hash_info(
-/*==============================*/
- btr_search_t* info, /*!< in: search info */
- buf_block_t* block, /*!< in: buffer block */
- btr_cur_t* cursor MY_ATTRIBUTE((unused)))
- /*!< in: cursor */
+ btr_search_t* info,
+ buf_block_t* block,
+ const btr_cur_t* cursor)
{
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_SHARED));
- ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_EX));
- ut_ad(rw_lock_own(&block->lock, RW_LOCK_SHARED)
- || rw_lock_own(&block->lock, RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
- ut_ad(cursor);
+ ut_ad(!rw_lock_own_flagged(btr_get_search_latch(cursor->index),
+ RW_LOCK_FLAG_X | RW_LOCK_FLAG_S));
+ ut_ad(rw_lock_own_flagged(&block->lock,
+ RW_LOCK_FLAG_X | RW_LOCK_FLAG_S));
info->last_hash_succ = FALSE;
@@ -515,19 +564,13 @@ btr_search_update_block_hash_info(
block->left_side = info->left_side;
}
-#ifdef UNIV_DEBUG
- if (cursor->index->table->does_not_fit_in_memory) {
- block->n_hash_helps = 0;
- }
-#endif /* UNIV_DEBUG */
-
if ((block->n_hash_helps > page_get_n_recs(block->frame)
/ BTR_SEARCH_PAGE_BUILD_LIMIT)
&& (info->n_hash_potential >= BTR_SEARCH_BUILD_LIMIT)) {
if ((!block->index)
|| (block->n_hash_helps
- > 2 * page_get_n_recs(block->frame))
+ > 2U * page_get_n_recs(block->frame))
|| (block->n_fields != block->curr_n_fields)
|| (block->n_bytes != block->curr_n_bytes)
|| (block->left_side != block->curr_left_side)) {
@@ -541,34 +584,34 @@ btr_search_update_block_hash_info(
return(FALSE);
}
-/*********************************************************************//**
-Updates a hash node reference when it has been unsuccessfully used in a
+/** Updates a hash node reference when it has been unsuccessfully used in a
search which could have succeeded with the used hash parameters. This can
happen because when building a hash index for a page, we do not check
what happens at page boundaries, and therefore there can be misleading
hash nodes. Also, collisions in the fold value can lead to misleading
references. This function lazily fixes these imperfections in the hash
-index. */
+index.
+@param[in] info search info
+@param[in] block buffer block where cursor positioned
+@param[in] cursor cursor */
static
void
btr_search_update_hash_ref(
-/*=======================*/
- btr_search_t* info, /*!< in: search info */
- buf_block_t* block, /*!< in: buffer block where cursor positioned */
- btr_cur_t* cursor) /*!< in: cursor */
+ const btr_search_t* info,
+ buf_block_t* block,
+ const btr_cur_t* cursor)
{
dict_index_t* index;
ulint fold;
rec_t* rec;
ut_ad(cursor->flag == BTR_CUR_HASH_FAIL);
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(rw_lock_own(&btr_search_latch, RW_LOCK_EX));
- ut_ad(rw_lock_own(&(block->lock), RW_LOCK_SHARED)
- || rw_lock_own(&(block->lock), RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
- ut_ad(page_align(btr_cur_get_rec(cursor))
- == buf_block_get_frame(block));
+ ut_ad(rw_lock_own(btr_get_search_latch(cursor->index), RW_LOCK_X));
+ ut_ad(rw_lock_own_flagged(&block->lock,
+ RW_LOCK_FLAG_X | RW_LOCK_FLAG_S));
+ ut_ad(page_align(btr_cur_get_rec(cursor)) == block->frame);
+ ut_ad(page_is_leaf(block->frame));
+ assert_block_ahi_valid(block);
index = block->index;
@@ -577,6 +620,7 @@ btr_search_update_hash_ref(
return;
}
+ ut_ad(block->page.id.space() == index->space);
ut_a(index == cursor->index);
ut_a(!dict_index_is_ibuf(index));
@@ -585,7 +629,7 @@ btr_search_update_hash_ref(
&& (block->curr_n_bytes == info->n_bytes)
&& (block->curr_left_side == info->left_side)) {
mem_heap_t* heap = NULL;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
+ offset_t offsets_[REC_OFFS_NORMAL_SIZE];
rec_offs_init(offsets_);
rec = btr_cur_get_rec(cursor);
@@ -596,42 +640,35 @@ btr_search_update_hash_ref(
}
fold = rec_fold(rec,
- rec_get_offsets(rec, index, offsets_,
+ rec_get_offsets(rec, index, offsets_, true,
ULINT_UNDEFINED, &heap),
block->curr_n_fields,
block->curr_n_bytes, index->id);
if (UNIV_LIKELY_NULL(heap)) {
mem_heap_free(heap);
}
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(rw_lock_own(&btr_search_latch, RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
+ ut_ad(rw_lock_own(btr_get_search_latch(index), RW_LOCK_X));
- ha_insert_for_fold(btr_search_sys->hash_index, fold,
+ ha_insert_for_fold(btr_get_search_table(index), fold,
block, rec);
MONITOR_INC(MONITOR_ADAPTIVE_HASH_ROW_ADDED);
}
}
-/*********************************************************************//**
-Updates the search info. */
-UNIV_INTERN
+/** Updates the search info.
+@param[in,out] info search info
+@param[in] cursor cursor which was just positioned */
void
btr_search_info_update_slow(
-/*========================*/
- btr_search_t* info, /*!< in/out: search info */
- btr_cur_t* cursor) /*!< in: cursor which was just positioned */
+ btr_search_t* info,
+ btr_cur_t* cursor)
{
buf_block_t* block;
ibool build_index;
- ulint* params;
- ulint* params2;
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_SHARED));
- ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
+ ut_ad(!rw_lock_own_flagged(btr_get_search_latch(cursor->index),
+ RW_LOCK_FLAG_X | RW_LOCK_FLAG_S));
block = btr_cur_get_block(cursor);
@@ -646,7 +683,7 @@ btr_search_info_update_slow(
if (build_index || (cursor->flag == BTR_CUR_HASH_FAIL)) {
- btr_search_check_free_space_in_heap();
+ btr_search_check_free_space_in_heap(cursor->index);
}
if (cursor->flag == BTR_CUR_HASH_FAIL) {
@@ -656,72 +693,55 @@ btr_search_info_update_slow(
btr_search_n_hash_fail++;
#endif /* UNIV_SEARCH_PERF_STAT */
- rw_lock_x_lock(&btr_search_latch);
+ btr_search_x_lock(cursor->index);
btr_search_update_hash_ref(info, block, cursor);
- rw_lock_x_unlock(&btr_search_latch);
+ btr_search_x_unlock(cursor->index);
}
if (build_index) {
/* Note that since we did not protect block->n_fields etc.
with any semaphore, the values can be inconsistent. We have
- to check inside the function call that they make sense. We
- also malloc an array and store the values there to make sure
- the compiler does not let the function call parameters change
- inside the called function. It might be that the compiler
- would optimize the call just to pass pointers to block. */
-
- params = (ulint*) mem_alloc(3 * sizeof(ulint));
- params[0] = block->n_fields;
- params[1] = block->n_bytes;
- params[2] = block->left_side;
-
- /* Make sure the compiler cannot deduce the values and do
- optimizations */
-
- params2 = params + btr_search_this_is_zero;
-
- btr_search_build_page_hash_index(cursor->index,
- block,
- params2[0],
- params2[1],
- params2[2]);
- mem_free(params);
+ to check inside the function call that they make sense. */
+ btr_search_build_page_hash_index(cursor->index, block,
+ block->n_fields,
+ block->n_bytes,
+ block->left_side);
}
}
-/******************************************************************//**
-Checks if a guessed position for a tree cursor is right. Note that if
+/** Checks if a guessed position for a tree cursor is right. Note that if
mode is PAGE_CUR_LE, which is used in inserts, and the function returns
TRUE, then cursor->up_match and cursor->low_match both have sensible values.
-@return TRUE if success */
+@param[in,out] cursor guess cursor position
+@param[in] can_only_compare_to_cursor_rec
+ if we do not have a latch on the page of cursor,
+ but a latch corresponding search system, then
+ ONLY the columns of the record UNDER the cursor
+ are protected, not the next or previous record
+ in the chain: we cannot look at the next or
+ previous record to check our guess!
+@param[in] tuple data tuple
+@param[in] mode PAGE_CUR_L, PAGE_CUR_LE, PAGE_CUR_G, PAGE_CUR_GE
+@param[in] mtr mini transaction
+@return TRUE if success */
static
ibool
btr_search_check_guess(
-/*===================*/
- btr_cur_t* cursor, /*!< in: guessed cursor position */
+ btr_cur_t* cursor,
ibool can_only_compare_to_cursor_rec,
- /*!< in: if we do not have a latch on the page
- of cursor, but only a latch on
- btr_search_latch, then ONLY the columns
- of the record UNDER the cursor are
- protected, not the next or previous record
- in the chain: we cannot look at the next or
- previous record to check our guess! */
- const dtuple_t* tuple, /*!< in: data tuple */
- ulint mode, /*!< in: PAGE_CUR_L, PAGE_CUR_LE, PAGE_CUR_G,
- or PAGE_CUR_GE */
- mtr_t* mtr) /*!< in: mtr */
+ const dtuple_t* tuple,
+ ulint mode,
+ mtr_t* mtr)
{
rec_t* rec;
ulint n_unique;
ulint match;
- ulint bytes;
int cmp;
mem_heap_t* heap = NULL;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- ulint* offsets = offsets_;
+ offset_t offsets_[REC_OFFS_NORMAL_SIZE];
+ offset_t* offsets = offsets_;
ibool success = FALSE;
rec_offs_init(offsets_);
@@ -730,17 +750,16 @@ btr_search_check_guess(
rec = btr_cur_get_rec(cursor);
ut_ad(page_rec_is_user_rec(rec));
+ ut_ad(page_rec_is_leaf(rec));
match = 0;
- bytes = 0;
- offsets = rec_get_offsets(rec, cursor->index, offsets,
+ offsets = rec_get_offsets(rec, cursor->index, offsets, true,
n_unique, &heap);
- cmp = page_cmp_dtuple_rec_with_match(tuple, rec,
- offsets, &match, &bytes);
+ cmp = cmp_dtuple_rec_with_match(tuple, rec, offsets, &match);
if (mode == PAGE_CUR_GE) {
- if (cmp == 1) {
+ if (cmp > 0) {
goto exit_func;
}
@@ -751,18 +770,18 @@ btr_search_check_guess(
goto exit_func;
}
} else if (mode == PAGE_CUR_LE) {
- if (cmp == -1) {
+ if (cmp < 0) {
goto exit_func;
}
cursor->low_match = match;
} else if (mode == PAGE_CUR_G) {
- if (cmp != -1) {
+ if (cmp >= 0) {
goto exit_func;
}
} else if (mode == PAGE_CUR_L) {
- if (cmp != 1) {
+ if (cmp <= 0) {
goto exit_func;
}
}
@@ -774,7 +793,6 @@ btr_search_check_guess(
}
match = 0;
- bytes = 0;
if ((mode == PAGE_CUR_G) || (mode == PAGE_CUR_GE)) {
rec_t* prev_rec;
@@ -784,20 +802,18 @@ btr_search_check_guess(
prev_rec = page_rec_get_prev(rec);
if (page_rec_is_infimum(prev_rec)) {
- success = btr_page_get_prev(page_align(prev_rec), mtr)
- == FIL_NULL;
-
+ success = !page_has_prev(page_align(prev_rec));
goto exit_func;
}
offsets = rec_get_offsets(prev_rec, cursor->index, offsets,
- n_unique, &heap);
- cmp = page_cmp_dtuple_rec_with_match(tuple, prev_rec,
- offsets, &match, &bytes);
+ true, n_unique, &heap);
+ cmp = cmp_dtuple_rec_with_match(
+ tuple, prev_rec, offsets, &match);
if (mode == PAGE_CUR_GE) {
- success = cmp == 1;
+ success = cmp > 0;
} else {
- success = cmp != -1;
+ success = cmp >= 0;
}
goto exit_func;
@@ -809,9 +825,7 @@ btr_search_check_guess(
next_rec = page_rec_get_next(rec);
if (page_rec_is_supremum(next_rec)) {
- if (btr_page_get_next(page_align(next_rec), mtr)
- == FIL_NULL) {
-
+ if (!page_has_next(page_align(next_rec))) {
cursor->up_match = 0;
success = TRUE;
}
@@ -820,14 +834,14 @@ btr_search_check_guess(
}
offsets = rec_get_offsets(next_rec, cursor->index, offsets,
- n_unique, &heap);
- cmp = page_cmp_dtuple_rec_with_match(tuple, next_rec,
- offsets, &match, &bytes);
+ true, n_unique, &heap);
+ cmp = cmp_dtuple_rec_with_match(
+ tuple, next_rec, offsets, &match);
if (mode == PAGE_CUR_LE) {
- success = cmp == -1;
+ success = cmp < 0;
cursor->up_match = match;
} else {
- success = cmp != 1;
+ success = cmp <= 0;
}
}
exit_func:
@@ -837,34 +851,53 @@ exit_func:
return(success);
}
-/******************************************************************//**
-Tries to guess the right search position based on the hash search info
+static
+void
+btr_search_failure(btr_search_t* info, btr_cur_t* cursor)
+{
+ cursor->flag = BTR_CUR_HASH_FAIL;
+
+#ifdef UNIV_SEARCH_PERF_STAT
+ ++info->n_hash_fail;
+
+ if (info->n_hash_succ > 0) {
+ --info->n_hash_succ;
+ }
+#endif /* UNIV_SEARCH_PERF_STAT */
+
+ info->last_hash_succ = FALSE;
+}
+
+/** Tries to guess the right search position based on the hash search info
of the index. Note that if mode is PAGE_CUR_LE, which is used in inserts,
and the function returns TRUE, then cursor->up_match and cursor->low_match
both have sensible values.
-@return TRUE if succeeded */
-UNIV_INTERN
+@param[in,out] index index
+@param[in,out] info index search info
+@param[in] tuple logical record
+@param[in] mode PAGE_CUR_L, ....
+@param[in] latch_mode BTR_SEARCH_LEAF, ...;
+ NOTE that only if has_search_latch is 0, we will
+ have a latch set on the cursor page, otherwise
+ we assume the caller uses his search latch
+ to protect the record!
+@param[out] cursor tree cursor
+@param[in] has_search_latch
+ latch mode the caller currently has on
+ search system: RW_S/X_LATCH or 0
+@param[in] mtr mini transaction
+@return TRUE if succeeded */
ibool
btr_search_guess_on_hash(
-/*=====================*/
- dict_index_t* index, /*!< in: index */
- btr_search_t* info, /*!< in: index search info */
- const dtuple_t* tuple, /*!< in: logical record */
- ulint mode, /*!< in: PAGE_CUR_L, ... */
- ulint latch_mode, /*!< in: BTR_SEARCH_LEAF, ...;
- NOTE that only if has_search_latch
- is 0, we will have a latch set on
- the cursor page, otherwise we assume
- the caller uses his search latch
- to protect the record! */
- btr_cur_t* cursor, /*!< out: tree cursor */
- ulint has_search_latch,/*!< in: latch mode the caller
- currently has on btr_search_latch:
- RW_S_LATCH, RW_X_LATCH, or 0 */
- mtr_t* mtr) /*!< in: mtr */
+ dict_index_t* index,
+ btr_search_t* info,
+ const dtuple_t* tuple,
+ ulint mode,
+ ulint latch_mode,
+ btr_cur_t* cursor,
+ ulint has_search_latch,
+ mtr_t* mtr)
{
- buf_pool_t* buf_pool;
- buf_block_t* block;
const rec_t* rec;
ulint fold;
index_id_t index_id;
@@ -872,15 +905,23 @@ btr_search_guess_on_hash(
btr_cur_t cursor2;
btr_pcur_t pcur;
#endif
+
+ if (!btr_search_enabled) {
+ return(FALSE);
+ }
+
ut_ad(index && info && tuple && cursor && mtr);
ut_ad(!dict_index_is_ibuf(index));
ut_ad((latch_mode == BTR_SEARCH_LEAF)
|| (latch_mode == BTR_MODIFY_LEAF));
+ /* Not supported for spatial index */
+ ut_ad(!dict_index_is_spatial(index));
+
/* Note that, for efficiency, the struct info may not be protected by
any latch here! */
- if (UNIV_UNLIKELY(info->n_hash_potential == 0)) {
+ if (info->n_hash_potential == 0) {
return(FALSE);
}
@@ -888,8 +929,7 @@ btr_search_guess_on_hash(
cursor->n_fields = info->n_fields;
cursor->n_bytes = info->n_bytes;
- if (UNIV_UNLIKELY(dtuple_get_n_fields(tuple)
- < cursor->n_fields + (cursor->n_bytes > 0))) {
+ if (dtuple_get_n_fields(tuple) < btr_search_get_n_fields(cursor)) {
return(FALSE);
}
@@ -904,49 +944,69 @@ btr_search_guess_on_hash(
cursor->fold = fold;
cursor->flag = BTR_CUR_HASH;
- if (UNIV_LIKELY(!has_search_latch)) {
- rw_lock_s_lock(&btr_search_latch);
+ if (!has_search_latch) {
+ btr_search_s_lock(index);
+
+ if (!btr_search_enabled) {
+ btr_search_s_unlock(index);
- if (UNIV_UNLIKELY(!btr_search_enabled)) {
- goto failure_unlock;
+ btr_search_failure(info, cursor);
+
+ return(FALSE);
}
}
- ut_ad(rw_lock_get_writer(&btr_search_latch) != RW_LOCK_EX);
- ut_ad(rw_lock_get_reader_count(&btr_search_latch) > 0);
+ ut_ad(rw_lock_get_writer(btr_get_search_latch(index)) != RW_LOCK_X);
+ ut_ad(rw_lock_get_reader_count(btr_get_search_latch(index)) > 0);
+
+ rec = (rec_t*) ha_search_and_get_data(
+ btr_get_search_table(index), fold);
+
+ if (rec == NULL) {
+
+ if (!has_search_latch) {
+ btr_search_s_unlock(index);
+ }
- rec = (rec_t*) ha_search_and_get_data(btr_search_sys->hash_index, fold);
+ btr_search_failure(info, cursor);
- if (UNIV_UNLIKELY(!rec)) {
- goto failure_unlock;
+ return(FALSE);
}
- block = buf_block_align(rec);
+ buf_block_t* block = buf_block_from_ahi(rec);
+
+ if (!has_search_latch) {
- if (UNIV_LIKELY(!has_search_latch)) {
+ if (!buf_page_get_known_nowait(
+ latch_mode, block, BUF_MAKE_YOUNG,
+ __FILE__, __LINE__, mtr)) {
+
+ if (!has_search_latch) {
+ btr_search_s_unlock(index);
+ }
- if (UNIV_UNLIKELY(
- !buf_page_get_known_nowait(latch_mode, block,
- BUF_MAKE_YOUNG,
- __FILE__, __LINE__,
- mtr))) {
- goto failure_unlock;
+ btr_search_failure(info, cursor);
+
+ return(FALSE);
}
- rw_lock_s_unlock(&btr_search_latch);
+ btr_search_s_unlock(index);
buf_block_dbg_add_level(block, SYNC_TREE_NODE_FROM_HASH);
}
- if (UNIV_UNLIKELY(buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE)) {
+ if (buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE) {
+
ut_ad(buf_block_get_state(block) == BUF_BLOCK_REMOVE_HASH);
- if (UNIV_LIKELY(!has_search_latch)) {
+ if (!has_search_latch) {
btr_leaf_page_release(block, latch_mode, mtr);
}
- goto failure;
+ btr_search_failure(info, cursor);
+
+ return(FALSE);
}
ut_ad(page_rec_is_user_rec(rec));
@@ -955,23 +1015,26 @@ btr_search_guess_on_hash(
/* Check the validity of the guess within the page */
- /* If we only have the latch on btr_search_latch, not on the
+ /* If we only have the latch on search system, not on the
page, it only protects the columns of the record the cursor
is positioned on. We cannot look at the next of the previous
record to determine if our guess for the cursor position is
right. */
- if (UNIV_UNLIKELY(index_id != btr_page_get_index_id(block->frame))
+ if (index_id != btr_page_get_index_id(block->frame)
|| !btr_search_check_guess(cursor,
has_search_latch,
tuple, mode, mtr)) {
- if (UNIV_LIKELY(!has_search_latch)) {
+
+ if (!has_search_latch) {
btr_leaf_page_release(block, latch_mode, mtr);
}
- goto failure;
+ btr_search_failure(info, cursor);
+
+ return(FALSE);
}
- if (UNIV_LIKELY(info->n_hash_potential < BTR_SEARCH_BUILD_LIMIT + 5)) {
+ if (info->n_hash_potential < BTR_SEARCH_BUILD_LIMIT + 5) {
info->n_hash_potential++;
}
@@ -987,8 +1050,9 @@ btr_search_guess_on_hash(
btr_leaf_page_release(block, latch_mode, mtr);
- btr_cur_search_to_nth_level(index, 0, tuple, mode, latch_mode,
- &cursor2, 0, mtr);
+ btr_cur_search_to_nth_level(
+ index, 0, tuple, mode, latch_mode, &cursor2, 0, mtr);
+
if (mode == PAGE_CUR_GE
&& page_rec_is_supremum(btr_cur_get_rec(&cursor2))) {
@@ -998,8 +1062,9 @@ btr_search_guess_on_hash(
info->last_hash_succ = FALSE;
- btr_pcur_open_on_user_rec(index, tuple, mode, latch_mode,
- &pcur, mtr);
+ btr_pcur_open_on_user_rec(
+ index, tuple, mode, latch_mode, &pcur, mtr);
+
ut_ad(btr_pcur_get_rec(&pcur) == btr_cur_get_rec(cursor));
} else {
ut_ad(btr_cur_get_rec(&cursor2) == btr_cur_get_rec(cursor));
@@ -1014,94 +1079,103 @@ btr_search_guess_on_hash(
#ifdef UNIV_SEARCH_PERF_STAT
btr_search_n_succ++;
#endif
- if (UNIV_LIKELY(!has_search_latch)
- && buf_page_peek_if_too_old(&block->page)) {
+ if (!has_search_latch && buf_page_peek_if_too_old(&block->page)) {
buf_page_make_young(&block->page);
}
/* Increment the page get statistics though we did not really
fix the page: for user info only */
- buf_pool = buf_pool_from_bpage(&block->page);
- buf_pool->stat.n_page_gets++;
-
- return(TRUE);
-
- /*-------------------------------------------*/
-failure_unlock:
- if (UNIV_LIKELY(!has_search_latch)) {
- rw_lock_s_unlock(&btr_search_latch);
- }
-failure:
- cursor->flag = BTR_CUR_HASH_FAIL;
-
-#ifdef UNIV_SEARCH_PERF_STAT
- info->n_hash_fail++;
+ {
+ buf_pool_t* buf_pool = buf_pool_from_bpage(&block->page);
- if (info->n_hash_succ > 0) {
- info->n_hash_succ--;
+ ++buf_pool->stat.n_page_gets;
}
-#endif
- info->last_hash_succ = FALSE;
- return(FALSE);
+ return(TRUE);
}
-/********************************************************************//**
-Drops a page hash index. */
-UNIV_INTERN
+/** Drop any adaptive hash index entries that point to an index page.
+@param[in,out] block block containing index page, s- or x-latched, or an
+ index page for which we know that
+ block->buf_fix_count == 0 or it is an index page which
+ has already been removed from the buf_pool->page_hash
+ i.e.: it is in state BUF_BLOCK_REMOVE_HASH */
void
-btr_search_drop_page_hash_index(
-/*============================*/
- buf_block_t* block) /*!< in: block containing index page,
- s- or x-latched, or an index page
- for which we know that
- block->buf_fix_count == 0 or it is an
- index page which has already been
- removed from the buf_pool->page_hash
- i.e.: it is in state
- BUF_BLOCK_REMOVE_HASH */
+btr_search_drop_page_hash_index(buf_block_t* block)
{
- hash_table_t* table;
ulint n_fields;
ulint n_bytes;
const page_t* page;
const rec_t* rec;
ulint fold;
ulint prev_fold;
- index_id_t index_id;
ulint n_cached;
ulint n_recs;
ulint* folds;
ulint i;
mem_heap_t* heap;
const dict_index_t* index;
- ulint* offsets;
+ offset_t* offsets;
+ rw_lock_t* latch;
btr_search_t* info;
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_SHARED));
- ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
-
+retry:
/* Do a dirty check on block->index, return if the block is
- not in the adaptive hash index. This is to avoid acquiring
- shared btr_search_latch for performance consideration. */
- if (!block->index) {
+ not in the adaptive hash index. */
+ index = block->index;
+ /* This debug check uses a dirty read that could theoretically cause
+ false positives while buf_pool_clear_hash_index() is executing. */
+ assert_block_ahi_valid(block);
+
+ if (index == NULL) {
return;
}
-retry:
- rw_lock_s_lock(&btr_search_latch);
- index = block->index;
+ ut_ad(block->page.buf_fix_count == 0
+ || buf_block_get_state(block) == BUF_BLOCK_REMOVE_HASH
+ || rw_lock_own_flagged(&block->lock,
+ RW_LOCK_FLAG_X | RW_LOCK_FLAG_S));
+ ut_ad(page_is_leaf(block->frame));
+
+ /* We must not dereference index here, because it could be freed
+ if (index->table->n_ref_count == 0 && !mutex_own(&dict_sys->mutex)).
+ Determine the ahi_slot based on the block contents. */
- if (UNIV_LIKELY(!index)) {
+ const index_id_t index_id
+ = btr_page_get_index_id(block->frame);
+ const ulint ahi_slot
+ = ut_fold_ulint_pair(static_cast<ulint>(index_id),
+ static_cast<ulint>(block->page.id.space()))
+ % btr_ahi_parts;
+ latch = btr_search_latches[ahi_slot];
- rw_lock_s_unlock(&btr_search_latch);
+ ut_ad(!btr_search_own_any(RW_LOCK_S));
+ ut_ad(!btr_search_own_any(RW_LOCK_X));
+ rw_lock_s_lock(latch);
+ assert_block_ahi_valid(block);
+
+ if (block->index == NULL) {
+ rw_lock_s_unlock(latch);
return;
}
+ /* The index associated with a block must remain the
+ same, because we are holding block->lock or the block is
+ not accessible by other threads (BUF_BLOCK_REMOVE_HASH),
+ or the index is not accessible to other threads
+ (buf_fix_count == 0 when DROP TABLE or similar is executing
+ buf_LRU_drop_page_hash_for_tablespace()). */
+ ut_a(index == block->index);
+#ifdef MYSQL_INDEX_DISABLE_AHI
+ ut_ad(!index->disable_ahi);
+#endif
+ ut_ad(btr_search_enabled);
+
+ ut_ad(index->space == FIL_NULL
+ || block->page.id.space() == index->space);
+ ut_a(index_id == index->id);
ut_a(!dict_index_is_ibuf(index));
#ifdef UNIV_DEBUG
switch (dict_index_get_online_status(index)) {
@@ -1124,25 +1198,15 @@ retry:
}
#endif /* UNIV_DEBUG */
- table = btr_search_sys->hash_index;
-
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(rw_lock_own(&(block->lock), RW_LOCK_SHARED)
- || rw_lock_own(&(block->lock), RW_LOCK_EX)
- || block->page.buf_fix_count == 0
- || buf_block_get_state(block) == BUF_BLOCK_REMOVE_HASH);
-#endif /* UNIV_SYNC_DEBUG */
-
n_fields = block->curr_n_fields;
n_bytes = block->curr_n_bytes;
- /* NOTE: The fields of block must not be accessed after
- releasing btr_search_latch, as the index page might only
- be s-latched! */
+ /* NOTE: The AHI fields of block must not be accessed after
+ releasing search latch, as the index page might only be s-latched! */
- rw_lock_s_unlock(&btr_search_latch);
+ rw_lock_s_unlock(latch);
- ut_a(n_fields + n_bytes > 0);
+ ut_a(n_fields > 0 || n_bytes > 0);
page = block->frame;
n_recs = page_get_n_recs(page);
@@ -1150,26 +1214,23 @@ retry:
/* Calculate and cache fold values into an array for fast deletion
from the hash index */
- folds = (ulint*) mem_alloc(n_recs * sizeof(ulint));
+ folds = (ulint*) ut_malloc_nokey(n_recs * sizeof(ulint));
n_cached = 0;
rec = page_get_infimum_rec(page);
rec = page_rec_get_next_low(rec, page_is_comp(page));
- index_id = btr_page_get_index_id(page);
-
- ut_a(index_id == index->id);
-
prev_fold = 0;
heap = NULL;
offsets = NULL;
while (!page_rec_is_supremum(rec)) {
- offsets = rec_get_offsets(rec, index, offsets,
- n_fields + (n_bytes > 0), &heap);
- ut_a(rec_offs_n_fields(offsets) == n_fields + (n_bytes > 0));
+ offsets = rec_get_offsets(
+ rec, index, offsets, true,
+ btr_search_get_n_fields(n_fields, n_bytes),
+ &heap);
fold = rec_fold(rec, offsets, n_fields, n_bytes, index_id);
if (fold == prev_fold && prev_fold != 0) {
@@ -1191,7 +1252,7 @@ next_rec:
mem_heap_free(heap);
}
- rw_lock_x_lock(&btr_search_latch);
+ rw_lock_x_lock(latch);
if (UNIV_UNLIKELY(!block->index)) {
/* Someone else has meanwhile dropped the hash index */
@@ -1201,21 +1262,23 @@ next_rec:
ut_a(block->index == index);
- if (UNIV_UNLIKELY(block->curr_n_fields != n_fields)
- || UNIV_UNLIKELY(block->curr_n_bytes != n_bytes)) {
+ if (block->curr_n_fields != n_fields
+ || block->curr_n_bytes != n_bytes) {
/* Someone else has meanwhile built a new hash index on the
page, with different parameters */
- rw_lock_x_unlock(&btr_search_latch);
+ rw_lock_x_unlock(latch);
- mem_free(folds);
+ ut_free(folds);
goto retry;
}
for (i = 0; i < n_cached; i++) {
- ha_remove_all_nodes_to_page(table, folds[i], page);
+ ha_remove_all_nodes_to_page(
+ btr_search_sys->hash_tables[ahi_slot],
+ folds[i], page);
}
info = btr_search_get_info(block->index);
@@ -1228,37 +1291,20 @@ next_rec:
MONITOR_INC_VALUE(MONITOR_ADAPTIVE_HASH_ROW_REMOVED, n_cached);
cleanup:
-#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
- if (UNIV_UNLIKELY(block->n_pointers)) {
- /* Corruption */
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Corruption of adaptive hash index."
- " After dropping\n"
- "InnoDB: the hash index to a page of %s,"
- " still %lu hash nodes remain.\n",
- index->name, (ulong) block->n_pointers);
- rw_lock_x_unlock(&btr_search_latch);
-
- ut_ad(btr_search_validate());
- } else {
- rw_lock_x_unlock(&btr_search_latch);
- }
-#else /* UNIV_AHI_DEBUG || UNIV_DEBUG */
- rw_lock_x_unlock(&btr_search_latch);
-#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
+ assert_block_ahi_valid(block);
+ rw_lock_x_unlock(latch);
- mem_free(folds);
+ ut_free(folds);
}
/** Drop possible adaptive hash index entries when a page is evicted
-from the buffer pool or freed in a file, or the index is being dropped. */
-UNIV_INTERN
-void
-btr_search_drop_page_hash_when_freed(ulint space, ulint page_no)
+from the buffer pool or freed in a file, or the index is being dropped.
+@param[in] page_id page id */
+void btr_search_drop_page_hash_when_freed(const page_id_t page_id)
{
buf_block_t* block;
mtr_t mtr;
+ dberr_t err = DB_SUCCESS;
mtr_start(&mtr);
@@ -1268,35 +1314,49 @@ btr_search_drop_page_hash_when_freed(ulint space, ulint page_no)
are possibly holding, we cannot s-latch the page, but must
(recursively) x-latch it, even though we are only reading. */
- block = buf_page_get_gen(space, 0, page_no, RW_X_LATCH, NULL,
+ block = buf_page_get_gen(page_id, univ_page_size, RW_X_LATCH, NULL,
BUF_PEEK_IF_IN_POOL, __FILE__, __LINE__,
- &mtr);
+ &mtr, &err);
- if (block && block->index) {
+ if (block) {
+
+ /* If AHI is still valid, page can't be in free state.
+ AHI is dropped when page is freed. */
+ ut_ad(!block->page.file_page_was_freed);
buf_block_dbg_add_level(block, SYNC_TREE_NODE_FROM_HASH);
- btr_search_drop_page_hash_index(block);
+ dict_index_t* index = block->index;
+ if (index != NULL) {
+ /* In all our callers, the table handle should
+ be open, or we should be in the process of
+ dropping the table (preventing eviction). */
+ ut_ad(index->table->get_ref_count() > 0
+ || mutex_own(&dict_sys->mutex));
+ btr_search_drop_page_hash_index(block);
+ }
}
mtr_commit(&mtr);
}
-/********************************************************************//**
-Builds a hash index on a page with the given parameters. If the page already
+/** Build a hash index on a page with the given parameters. If the page already
has a hash index with different parameters, the old hash index is removed.
If index is non-NULL, this function checks if n_fields and n_bytes are
-sensible values, and does not build a hash index if not. */
+sensible, and does not build a hash index if not.
+@param[in,out] index index for which to build.
+@param[in,out] block index page, s-/x- latched.
+@param[in] n_fields hash this many full fields
+@param[in] n_bytes hash this many bytes of the next field
+@param[in] left_side hash for searches from left side */
static
void
btr_search_build_page_hash_index(
-/*=============================*/
- dict_index_t* index, /*!< in: index for which to build */
- buf_block_t* block, /*!< in: index page, s- or x-latched */
- ulint n_fields,/*!< in: hash this many full fields */
- ulint n_bytes,/*!< in: hash this many bytes from the next
- field */
- ibool left_side)/*!< in: hash for searches from left side? */
+ dict_index_t* index,
+ buf_block_t* block,
+ ulint n_fields,
+ ulint n_bytes,
+ ibool left_side)
{
hash_table_t* table;
page_t* page;
@@ -1310,65 +1370,66 @@ btr_search_build_page_hash_index(
rec_t** recs;
ulint i;
mem_heap_t* heap = NULL;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- ulint* offsets = offsets_;
- rec_offs_init(offsets_);
+ offset_t offsets_[REC_OFFS_NORMAL_SIZE];
+ offset_t* offsets = offsets_;
+
+#ifdef MYSQL_INDEX_DISABLE_AHI
+ if (index->disable_ahi) return;
+#endif
+ if (!btr_search_enabled) {
+ return;
+ }
+ rec_offs_init(offsets_);
ut_ad(index);
+ ut_ad(block->page.id.space() == index->space);
ut_a(!dict_index_is_ibuf(index));
+ ut_ad(page_is_leaf(block->frame));
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_EX));
- ut_ad(rw_lock_own(&(block->lock), RW_LOCK_SHARED)
- || rw_lock_own(&(block->lock), RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
+ ut_ad(!rw_lock_own(btr_get_search_latch(index), RW_LOCK_X));
+ ut_ad(rw_lock_own_flagged(&block->lock,
+ RW_LOCK_FLAG_X | RW_LOCK_FLAG_S));
- rw_lock_s_lock(&btr_search_latch);
+ btr_search_s_lock(index);
- if (!btr_search_enabled) {
- rw_lock_s_unlock(&btr_search_latch);
- return;
- }
-
- table = btr_search_sys->hash_index;
+ table = btr_get_search_table(index);
page = buf_block_get_frame(block);
if (block->index && ((block->curr_n_fields != n_fields)
|| (block->curr_n_bytes != n_bytes)
|| (block->curr_left_side != left_side))) {
- rw_lock_s_unlock(&btr_search_latch);
+ btr_search_s_unlock(index);
btr_search_drop_page_hash_index(block);
} else {
- rw_lock_s_unlock(&btr_search_latch);
+ btr_search_s_unlock(index);
}
- n_recs = page_get_n_recs(page);
+ /* Check that the values for hash index build are sensible */
- if (n_recs == 0) {
+ if (n_fields == 0 && n_bytes == 0) {
return;
}
- /* Check that the values for hash index build are sensible */
-
- if (n_fields + n_bytes == 0) {
-
+ if (dict_index_get_n_unique_in_tree(index)
+ < btr_search_get_n_fields(n_fields, n_bytes)) {
return;
}
- if (dict_index_get_n_unique_in_tree(index) < n_fields
- || (dict_index_get_n_unique_in_tree(index) == n_fields
- && n_bytes > 0)) {
+ n_recs = page_get_n_recs(page);
+
+ if (n_recs == 0) {
+
return;
}
/* Calculate and cache fold values and corresponding records into
an array for fast insertion to the hash index */
- folds = (ulint*) mem_alloc(n_recs * sizeof(ulint));
- recs = (rec_t**) mem_alloc(n_recs * sizeof(rec_t*));
+ folds = (ulint*) ut_malloc_nokey(n_recs * sizeof(ulint));
+ recs = (rec_t**) ut_malloc_nokey(n_recs * sizeof(rec_t*));
n_cached = 0;
@@ -1376,16 +1437,12 @@ btr_search_build_page_hash_index(
rec = page_rec_get_next(page_get_infimum_rec(page));
- offsets = rec_get_offsets(rec, index, offsets,
- n_fields + (n_bytes > 0), &heap);
-
- if (!page_rec_is_supremum(rec)) {
- ut_a(n_fields <= rec_offs_n_fields(offsets));
-
- if (n_bytes > 0) {
- ut_a(n_fields < rec_offs_n_fields(offsets));
- }
- }
+ offsets = rec_get_offsets(
+ rec, index, offsets, true,
+ btr_search_get_n_fields(n_fields, n_bytes),
+ &heap);
+ ut_ad(page_rec_is_supremum(rec)
+ || n_fields + (n_bytes > 0) == rec_offs_n_fields(offsets));
fold = rec_fold(rec, offsets, n_fields, n_bytes, index->id);
@@ -1411,8 +1468,9 @@ btr_search_build_page_hash_index(
break;
}
- offsets = rec_get_offsets(next_rec, index, offsets,
- n_fields + (n_bytes > 0), &heap);
+ offsets = rec_get_offsets(
+ next_rec, index, offsets, true,
+ btr_search_get_n_fields(n_fields, n_bytes), &heap);
next_fold = rec_fold(next_rec, offsets, n_fields,
n_bytes, index->id);
@@ -1435,11 +1493,11 @@ btr_search_build_page_hash_index(
fold = next_fold;
}
- btr_search_check_free_space_in_heap();
+ btr_search_check_free_space_in_heap(index);
- rw_lock_x_lock(&btr_search_latch);
+ btr_search_x_lock(index);
- if (UNIV_UNLIKELY(!btr_search_enabled)) {
+ if (!btr_search_enabled) {
goto exit_func;
}
@@ -1455,14 +1513,15 @@ btr_search_build_page_hash_index(
have to take care not to increment the counter in that
case. */
if (!block->index) {
+ assert_block_ahi_empty(block);
index->search_info->ref_count++;
}
block->n_hash_helps = 0;
- block->curr_n_fields = n_fields;
- block->curr_n_bytes = n_bytes;
- block->curr_left_side = left_side;
+ block->curr_n_fields = unsigned(n_fields);
+ block->curr_n_bytes = unsigned(n_bytes);
+ block->curr_left_side = unsigned(left_side);
block->index = index;
for (i = 0; i < n_cached; i++) {
@@ -1473,51 +1532,52 @@ btr_search_build_page_hash_index(
MONITOR_INC(MONITOR_ADAPTIVE_HASH_PAGE_ADDED);
MONITOR_INC_VALUE(MONITOR_ADAPTIVE_HASH_ROW_ADDED, n_cached);
exit_func:
- rw_lock_x_unlock(&btr_search_latch);
+ assert_block_ahi_valid(block);
+ btr_search_x_unlock(index);
- mem_free(folds);
- mem_free(recs);
+ ut_free(folds);
+ ut_free(recs);
if (UNIV_LIKELY_NULL(heap)) {
mem_heap_free(heap);
}
}
-/********************************************************************//**
-Moves or deletes hash entries for moved records. If new_page is already hashed,
-then the hash index for page, if any, is dropped. If new_page is not hashed,
-and page is hashed, then a new hash index is built to new_page with the same
-parameters as page (this often happens when a page is split). */
-UNIV_INTERN
+/** Moves or deletes hash entries for moved records. If new_page is already
+hashed, then the hash index for page, if any, is dropped. If new_page is not
+hashed, and page is hashed, then a new hash index is built to new_page with the
+same parameters as page (this often happens when a page is split).
+@param[in,out] new_block records are copied to this page.
+@param[in,out] block index page from which record are copied, and the
+ copied records will be deleted from this page.
+@param[in,out] index record descriptor */
void
btr_search_move_or_delete_hash_entries(
-/*===================================*/
- buf_block_t* new_block, /*!< in: records are copied
- to this page */
- buf_block_t* block, /*!< in: index page from which
- records were copied, and the
- copied records will be deleted
- from this page */
- dict_index_t* index) /*!< in: record descriptor */
+ buf_block_t* new_block,
+ buf_block_t* block,
+ dict_index_t* index)
{
- ulint n_fields;
- ulint n_bytes;
- ibool left_side;
+#ifdef MYSQL_INDEX_DISABLE_AHI
+ if (index->disable_ahi) return;
+#endif
+ if (!btr_search_enabled) {
+ return;
+ }
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(rw_lock_own(&(block->lock), RW_LOCK_EX));
- ut_ad(rw_lock_own(&(new_block->lock), RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
+ ut_ad(rw_lock_own(&(block->lock), RW_LOCK_X));
+ ut_ad(rw_lock_own(&(new_block->lock), RW_LOCK_X));
- rw_lock_s_lock(&btr_search_latch);
+ btr_search_s_lock(index);
ut_a(!new_block->index || new_block->index == index);
ut_a(!block->index || block->index == index);
ut_a(!(new_block->index || block->index)
|| !dict_index_is_ibuf(index));
+ assert_block_ahi_valid(block);
+ assert_block_ahi_valid(new_block);
if (new_block->index) {
- rw_lock_s_unlock(&btr_search_latch);
+ btr_search_s_unlock(index);
btr_search_drop_page_hash_index(block);
@@ -1525,55 +1585,58 @@ btr_search_move_or_delete_hash_entries(
}
if (block->index) {
-
- n_fields = block->curr_n_fields;
- n_bytes = block->curr_n_bytes;
- left_side = block->curr_left_side;
+ ulint n_fields = block->curr_n_fields;
+ ulint n_bytes = block->curr_n_bytes;
+ ibool left_side = block->curr_left_side;
new_block->n_fields = block->curr_n_fields;
new_block->n_bytes = block->curr_n_bytes;
new_block->left_side = left_side;
- rw_lock_s_unlock(&btr_search_latch);
+ btr_search_s_unlock(index);
- ut_a(n_fields + n_bytes > 0);
+ ut_a(n_fields > 0 || n_bytes > 0);
- btr_search_build_page_hash_index(index, new_block, n_fields,
- n_bytes, left_side);
+ btr_search_build_page_hash_index(
+ index, new_block, n_fields, n_bytes, left_side);
ut_ad(n_fields == block->curr_n_fields);
ut_ad(n_bytes == block->curr_n_bytes);
ut_ad(left_side == block->curr_left_side);
return;
}
- rw_lock_s_unlock(&btr_search_latch);
+ btr_search_s_unlock(index);
}
-/********************************************************************//**
-Updates the page hash index when a single record is deleted from a page. */
-UNIV_INTERN
+/** Updates the page hash index when a single record is deleted from a page.
+@param[in] cursor cursor which was positioned on the record to delete
+ using btr_cur_search_, the record is not yet deleted.*/
void
-btr_search_update_hash_on_delete(
-/*=============================*/
- btr_cur_t* cursor) /*!< in: cursor which was positioned on the
- record to delete using btr_cur_search_...,
- the record is not yet deleted */
+btr_search_update_hash_on_delete(btr_cur_t* cursor)
{
hash_table_t* table;
buf_block_t* block;
const rec_t* rec;
ulint fold;
dict_index_t* index;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
+ offset_t offsets_[REC_OFFS_NORMAL_SIZE];
mem_heap_t* heap = NULL;
rec_offs_init(offsets_);
+ ut_ad(page_is_leaf(btr_cur_get_page(cursor)));
+#ifdef MYSQL_INDEX_DISABLE_AHI
+ if (cursor->index->disable_ahi) return;
+#endif
+
+ if (!btr_search_enabled) {
+ return;
+ }
+
block = btr_cur_get_block(cursor);
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(rw_lock_own(&(block->lock), RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
+ ut_ad(rw_lock_own(&(block->lock), RW_LOCK_X));
+ assert_block_ahi_valid(block);
index = block->index;
if (!index) {
@@ -1581,22 +1644,24 @@ btr_search_update_hash_on_delete(
return;
}
+ ut_ad(block->page.id.space() == index->space);
ut_a(index == cursor->index);
- ut_a(block->curr_n_fields + block->curr_n_bytes > 0);
+ ut_a(block->curr_n_fields > 0 || block->curr_n_bytes > 0);
ut_a(!dict_index_is_ibuf(index));
- table = btr_search_sys->hash_index;
+ table = btr_get_search_table(index);
rec = btr_cur_get_rec(cursor);
- fold = rec_fold(rec, rec_get_offsets(rec, index, offsets_,
+ fold = rec_fold(rec, rec_get_offsets(rec, index, offsets_, true,
ULINT_UNDEFINED, &heap),
block->curr_n_fields, block->curr_n_bytes, index->id);
if (UNIV_LIKELY_NULL(heap)) {
mem_heap_free(heap);
}
- rw_lock_x_lock(&btr_search_latch);
+ btr_search_x_lock(index);
+ assert_block_ahi_valid(block);
if (block->index) {
ut_a(block->index == index);
@@ -1607,34 +1672,37 @@ btr_search_update_hash_on_delete(
MONITOR_INC(
MONITOR_ADAPTIVE_HASH_ROW_REMOVE_NOT_FOUND);
}
+
+ assert_block_ahi_valid(block);
}
- rw_lock_x_unlock(&btr_search_latch);
+ btr_search_x_unlock(index);
}
-/********************************************************************//**
-Updates the page hash index when a single record is inserted on a page. */
-UNIV_INTERN
+/** Updates the page hash index when a single record is inserted on a page.
+@param[in] cursor cursor which was positioned to the place to insert
+ using btr_cur_search_, and the new record has been
+ inserted next to the cursor. */
void
-btr_search_update_hash_node_on_insert(
-/*==================================*/
- btr_cur_t* cursor) /*!< in: cursor which was positioned to the
- place to insert using btr_cur_search_...,
- and the new record has been inserted next
- to the cursor */
+btr_search_update_hash_node_on_insert(btr_cur_t* cursor)
{
hash_table_t* table;
buf_block_t* block;
dict_index_t* index;
rec_t* rec;
+#ifdef MYSQL_INDEX_DISABLE_AHI
+ if (cursor->index->disable_ahi) return;
+#endif
+ if (!btr_search_enabled) {
+ return;
+ }
+
rec = btr_cur_get_rec(cursor);
block = btr_cur_get_block(cursor);
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(rw_lock_own(&(block->lock), RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
+ ut_ad(rw_lock_own(&(block->lock), RW_LOCK_X));
index = block->index;
@@ -1646,7 +1714,7 @@ btr_search_update_hash_node_on_insert(
ut_a(cursor->index == index);
ut_a(!dict_index_is_ibuf(index));
- rw_lock_x_lock(&btr_search_latch);
+ btr_search_x_lock(index);
if (!block->index) {
@@ -1660,7 +1728,7 @@ btr_search_update_hash_node_on_insert(
&& (cursor->n_bytes == block->curr_n_bytes)
&& !block->curr_left_side) {
- table = btr_search_sys->hash_index;
+ table = btr_get_search_table(index);
if (ha_search_and_update_if_found(
table, cursor->fold, rec, block,
@@ -1669,24 +1737,22 @@ btr_search_update_hash_node_on_insert(
}
func_exit:
- rw_lock_x_unlock(&btr_search_latch);
+ assert_block_ahi_valid(block);
+ btr_search_x_unlock(index);
} else {
- rw_lock_x_unlock(&btr_search_latch);
+ btr_search_x_unlock(index);
btr_search_update_hash_on_insert(cursor);
}
}
-/********************************************************************//**
-Updates the page hash index when a single record is inserted on a page. */
-UNIV_INTERN
-void
-btr_search_update_hash_on_insert(
-/*=============================*/
- btr_cur_t* cursor) /*!< in: cursor which was positioned to the
+/** Updates the page hash index when a single record is inserted on a page.
+@param[in,out] cursor cursor which was positioned to the
place to insert using btr_cur_search_...,
and the new record has been inserted next
to the cursor */
+void
+btr_search_update_hash_on_insert(btr_cur_t* cursor)
{
hash_table_t* table;
buf_block_t* block;
@@ -1702,15 +1768,22 @@ btr_search_update_hash_on_insert(
ibool left_side;
ibool locked = FALSE;
mem_heap_t* heap = NULL;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- ulint* offsets = offsets_;
+ offset_t offsets_[REC_OFFS_NORMAL_SIZE];
+ offset_t* offsets = offsets_;
rec_offs_init(offsets_);
+ ut_ad(page_is_leaf(btr_cur_get_page(cursor)));
+#ifdef MYSQL_INDEX_DISABLE_AHI
+ if (cursor->index->disable_ahi) return;
+#endif
+ if (!btr_search_enabled) {
+ return;
+ }
+
block = btr_cur_get_block(cursor);
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(rw_lock_own(&(block->lock), RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
+ ut_ad(rw_lock_own(&(block->lock), RW_LOCK_X));
+ assert_block_ahi_valid(block);
index = block->index;
@@ -1719,12 +1792,16 @@ btr_search_update_hash_on_insert(
return;
}
- btr_search_check_free_space_in_heap();
+ ut_ad(block->page.id.space() == index->space);
+ btr_search_check_free_space_in_heap(index);
- table = btr_search_sys->hash_index;
+ table = btr_get_search_table(index);
rec = btr_cur_get_rec(cursor);
+#ifdef MYSQL_INDEX_DISABLE_AHI
+ ut_a(!index->disable_ahi);
+#endif
ut_a(index == cursor->index);
ut_a(!dict_index_is_ibuf(index));
@@ -1735,25 +1812,27 @@ btr_search_update_hash_on_insert(
ins_rec = page_rec_get_next_const(rec);
next_rec = page_rec_get_next_const(ins_rec);
- offsets = rec_get_offsets(ins_rec, index, offsets,
+ offsets = rec_get_offsets(ins_rec, index, offsets, true,
ULINT_UNDEFINED, &heap);
ins_fold = rec_fold(ins_rec, offsets, n_fields, n_bytes, index->id);
if (!page_rec_is_supremum(next_rec)) {
- offsets = rec_get_offsets(next_rec, index, offsets,
- n_fields + (n_bytes > 0), &heap);
+ offsets = rec_get_offsets(
+ next_rec, index, offsets, true,
+ btr_search_get_n_fields(n_fields, n_bytes), &heap);
next_fold = rec_fold(next_rec, offsets, n_fields,
n_bytes, index->id);
}
if (!page_rec_is_infimum(rec)) {
- offsets = rec_get_offsets(rec, index, offsets,
- n_fields + (n_bytes > 0), &heap);
+ offsets = rec_get_offsets(
+ rec, index, offsets, true,
+ btr_search_get_n_fields(n_fields, n_bytes), &heap);
fold = rec_fold(rec, offsets, n_fields, n_bytes, index->id);
} else {
if (left_side) {
- rw_lock_x_lock(&btr_search_latch);
+ btr_search_x_lock(index);
locked = TRUE;
@@ -1771,7 +1850,7 @@ btr_search_update_hash_on_insert(
if (!locked) {
- rw_lock_x_lock(&btr_search_latch);
+ btr_search_x_lock(index);
locked = TRUE;
@@ -1793,7 +1872,7 @@ check_next_rec:
if (!left_side) {
if (!locked) {
- rw_lock_x_lock(&btr_search_latch);
+ btr_search_x_lock(index);
locked = TRUE;
@@ -1812,7 +1891,7 @@ check_next_rec:
if (!locked) {
- rw_lock_x_lock(&btr_search_latch);
+ btr_search_x_lock(index);
locked = TRUE;
@@ -1822,13 +1901,7 @@ check_next_rec:
}
if (!left_side) {
-
ha_insert_for_fold(table, ins_fold, block, ins_rec);
- /*
- fputs("Hash insert for ", stderr);
- dict_index_name_print(stderr, index);
- fprintf(stderr, " fold %lu\n", ins_fold);
- */
} else {
ha_insert_for_fold(table, next_fold, block, next_rec);
}
@@ -1839,56 +1912,75 @@ function_exit:
mem_heap_free(heap);
}
if (locked) {
- rw_lock_x_unlock(&btr_search_latch);
+ btr_search_x_unlock(index);
}
}
#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
-/********************************************************************//**
-Validates the search system.
-@return TRUE if ok */
-UNIV_INTERN
+
+/** Validates the search system for given hash table.
+@param[in] hash_table_id hash table to validate
+@return TRUE if ok */
+static
ibool
-btr_search_validate(void)
-/*=====================*/
+btr_search_hash_table_validate(ulint hash_table_id)
{
ha_node_t* node;
- ulint n_page_dumps = 0;
ibool ok = TRUE;
ulint i;
ulint cell_count;
mem_heap_t* heap = NULL;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- ulint* offsets = offsets_;
+ offset_t offsets_[REC_OFFS_NORMAL_SIZE];
+ offset_t* offsets = offsets_;
+
+ if (!btr_search_enabled) {
+ return(TRUE);
+ }
/* How many cells to check before temporarily releasing
- btr_search_latch. */
+ search latches. */
ulint chunk_size = 10000;
rec_offs_init(offsets_);
- rw_lock_x_lock(&btr_search_latch);
+ btr_search_x_lock_all();
buf_pool_mutex_enter_all();
- cell_count = hash_get_n_cells(btr_search_sys->hash_index);
+ cell_count = hash_get_n_cells(
+ btr_search_sys->hash_tables[hash_table_id]);
for (i = 0; i < cell_count; i++) {
- /* We release btr_search_latch every once in a while to
+ /* We release search latches every once in a while to
give other queries a chance to run. */
if ((i != 0) && ((i % chunk_size) == 0)) {
+
buf_pool_mutex_exit_all();
- rw_lock_x_unlock(&btr_search_latch);
+ btr_search_x_unlock_all();
+
os_thread_yield();
- rw_lock_x_lock(&btr_search_latch);
+
+ btr_search_x_lock_all();
buf_pool_mutex_enter_all();
+
+ ulint curr_cell_count = hash_get_n_cells(
+ btr_search_sys->hash_tables[hash_table_id]);
+
+ if (cell_count != curr_cell_count) {
+
+ cell_count = curr_cell_count;
+
+ if (i >= cell_count) {
+ break;
+ }
+ }
}
- node = (ha_node_t*)
- hash_get_nth_cell(btr_search_sys->hash_index, i)->node;
+ node = (ha_node_t*) hash_get_nth_cell(
+ btr_search_sys->hash_tables[hash_table_id], i)->node;
for (; node != NULL; node = node->next) {
const buf_block_t* block
- = buf_block_align((byte*) node->data);
+ = buf_block_from_ahi((byte*) node->data);
const buf_block_t* hash_block;
buf_pool_t* buf_pool;
index_id_t page_index_id;
@@ -1905,8 +1997,7 @@ btr_search_validate(void)
assertion and the comment below) */
hash_block = buf_block_hash_get(
buf_pool,
- buf_block_get_space(block),
- buf_block_get_page_no(block));
+ block->page.id);
} else {
hash_block = NULL;
}
@@ -1922,92 +2013,116 @@ btr_search_validate(void)
After that, it invokes
btr_search_drop_page_hash_index() to
remove the block from
- btr_search_sys->hash_index. */
+ btr_search_sys->hash_tables[i]. */
ut_a(buf_block_get_state(block)
== BUF_BLOCK_REMOVE_HASH);
}
ut_a(!dict_index_is_ibuf(block->index));
+ ut_ad(block->page.id.space() == block->index->space);
page_index_id = btr_page_get_index_id(block->frame);
- offsets = rec_get_offsets(node->data,
- block->index, offsets,
- block->curr_n_fields
- + (block->curr_n_bytes > 0),
- &heap);
-
- if (!block->index || node->fold
- != rec_fold(node->data,
- offsets,
- block->curr_n_fields,
- block->curr_n_bytes,
- page_index_id)) {
+ offsets = rec_get_offsets(
+ node->data, block->index, offsets, true,
+ btr_search_get_n_fields(block->curr_n_fields,
+ block->curr_n_bytes),
+ &heap);
+
+ const ulint fold = rec_fold(
+ node->data, offsets,
+ block->curr_n_fields,
+ block->curr_n_bytes,
+ page_index_id);
+
+ if (node->fold != fold) {
const page_t* page = block->frame;
ok = FALSE;
- ut_print_timestamp(stderr);
-
- fprintf(stderr,
- " InnoDB: Error in an adaptive hash"
- " index pointer to page %lu\n"
- "InnoDB: ptr mem address %p"
- " index id %llu,"
- " node fold %lu, rec fold %lu\n",
- (ulong) page_get_page_no(page),
- node->data,
- (ullint) page_index_id,
- (ulong) node->fold,
- (ulong) rec_fold(node->data,
- offsets,
- block->curr_n_fields,
- block->curr_n_bytes,
- page_index_id));
+
+ ib::error() << "Error in an adaptive hash"
+ << " index pointer to page "
+ << page_id_t(page_get_space_id(page),
+ page_get_page_no(page))
+ << ", ptr mem address "
+ << reinterpret_cast<const void*>(
+ node->data)
+ << ", index id " << page_index_id
+ << ", node fold " << node->fold
+ << ", rec fold " << fold;
fputs("InnoDB: Record ", stderr);
rec_print_new(stderr, node->data, offsets);
fprintf(stderr, "\nInnoDB: on that page."
" Page mem address %p, is hashed %p,"
- " n fields %lu, n bytes %lu\n"
+ " n fields %lu\n"
"InnoDB: side %lu\n",
(void*) page, (void*) block->index,
(ulong) block->curr_n_fields,
- (ulong) block->curr_n_bytes,
(ulong) block->curr_left_side);
-
- if (n_page_dumps < 20) {
- buf_page_print(page, 0);
- n_page_dumps++;
- }
+ ut_ad(0);
}
}
}
for (i = 0; i < cell_count; i += chunk_size) {
- ulint end_index = ut_min(i + chunk_size - 1, cell_count - 1);
-
- /* We release btr_search_latch every once in a while to
+ /* We release search latches every once in a while to
give other queries a chance to run. */
if (i != 0) {
+
buf_pool_mutex_exit_all();
- rw_lock_x_unlock(&btr_search_latch);
+ btr_search_x_unlock_all();
+
os_thread_yield();
- rw_lock_x_lock(&btr_search_latch);
+
+ btr_search_x_lock_all();
buf_pool_mutex_enter_all();
+
+ ulint curr_cell_count = hash_get_n_cells(
+ btr_search_sys->hash_tables[hash_table_id]);
+
+ if (cell_count != curr_cell_count) {
+
+ cell_count = curr_cell_count;
+
+ if (i >= cell_count) {
+ break;
+ }
+ }
}
- if (!ha_validate(btr_search_sys->hash_index, i, end_index)) {
+ ulint end_index = ut_min(i + chunk_size - 1, cell_count - 1);
+
+ if (!ha_validate(btr_search_sys->hash_tables[hash_table_id],
+ i, end_index)) {
ok = FALSE;
}
}
buf_pool_mutex_exit_all();
- rw_lock_x_unlock(&btr_search_latch);
+ btr_search_x_unlock_all();
+
if (UNIV_LIKELY_NULL(heap)) {
mem_heap_free(heap);
}
return(ok);
}
+
+/** Validate the search system.
+@return true if ok. */
+bool
+btr_search_validate()
+{
+ for (ulint i = 0; i < btr_ahi_parts; ++i) {
+ if (!btr_search_hash_table_validate(i)) {
+ return(false);
+ }
+ }
+
+ return(true);
+}
+
#endif /* defined UNIV_AHI_DEBUG || defined UNIV_DEBUG */
+#endif /* BTR_CUR_HASH_ADAPT */
diff --git a/storage/innobase/buf/buf0buddy.cc b/storage/innobase/buf/buf0buddy.cc
index bcb935ff211..0863facad52 100644
--- a/storage/innobase/buf/buf0buddy.cc
+++ b/storage/innobase/buf/buf0buddy.cc
@@ -24,12 +24,7 @@ Binary buddy allocator for compressed pages
Created December 2006 by Marko Makela
*******************************************************/
-#define THIS_MODULE
#include "buf0buddy.h"
-#ifdef UNIV_NONINL
-# include "buf0buddy.ic"
-#endif
-#undef THIS_MODULE
#include "buf0buf.h"
#include "buf0lru.h"
#include "buf0flu.h"
@@ -72,11 +67,11 @@ are written.*/
/** Value that we stamp on all buffers that are currently on the zip_free
list. This value is stamped at BUF_BUDDY_STAMP_OFFSET offset */
-#define BUF_BUDDY_STAMP_FREE (SRV_LOG_SPACE_FIRST_ID)
+#define BUF_BUDDY_STAMP_FREE SRV_LOG_SPACE_FIRST_ID
/** Stamp value for non-free buffers. Will be overwritten by a non-zero
value by the consumer of the block */
-#define BUF_BUDDY_STAMP_NONFREE (0XFFFFFFFF)
+#define BUF_BUDDY_STAMP_NONFREE 0XFFFFFFFFUL
#if BUF_BUDDY_STAMP_FREE >= BUF_BUDDY_STAMP_NONFREE
# error "BUF_BUDDY_STAMP_FREE >= BUF_BUDDY_STAMP_NONFREE"
@@ -112,7 +107,7 @@ buf_buddy_mem_invalid(
/**********************************************************************//**
Check if a buddy is stamped free.
-@return whether the buddy is free */
+@return whether the buddy is free */
UNIV_INLINE MY_ATTRIBUTE((warn_unused_result))
bool
buf_buddy_stamp_is_free(
@@ -141,7 +136,7 @@ buf_buddy_stamp_free(
/**********************************************************************//**
Stamps a buddy nonfree.
-@param[in/out] buf block to stamp
+@param[in,out] buf block to stamp
@param[in] i block size */
#define buf_buddy_stamp_nonfree(buf, i) do { \
buf_buddy_mem_invalid(buf, i); \
@@ -153,7 +148,7 @@ Stamps a buddy nonfree.
/**********************************************************************//**
Get the offset of the buddy of a compressed page frame.
-@return the buddy relative of page */
+@return the buddy relative of page */
UNIV_INLINE
void*
buf_buddy_get(
@@ -175,23 +170,32 @@ buf_buddy_get(
}
}
+#ifdef UNIV_DEBUG
/** Validate a given zip_free list. */
struct CheckZipFree {
- ulint i;
- CheckZipFree(ulint i) : i (i) {}
+ CheckZipFree(ulint i) : m_i(i) {}
- void operator()(const buf_buddy_free_t* elem) const
+ void operator()(const buf_buddy_free_t* elem) const
{
- ut_a(buf_buddy_stamp_is_free(elem));
- ut_a(elem->stamp.size <= i);
+ ut_ad(buf_buddy_stamp_is_free(elem));
+ ut_ad(elem->stamp.size <= m_i);
}
+
+ const ulint m_i;
};
-#define BUF_BUDDY_LIST_VALIDATE(bp, i) \
- UT_LIST_VALIDATE(list, buf_buddy_free_t, \
- bp->zip_free[i], CheckZipFree(i))
+/** Validate a buddy list.
+@param[in] buf_pool buffer pool instance
+@param[in] i buddy size to validate */
+static
+void
+buf_buddy_list_validate(
+ const buf_pool_t* buf_pool,
+ ulint i)
+{
+ ut_list_validate(buf_pool->zip_free[i], CheckZipFree(i));
+}
-#ifdef UNIV_DEBUG
/**********************************************************************//**
Debug function to validate that a buffer is indeed free i.e.: in the
zip_free[].
@@ -283,8 +287,8 @@ buf_buddy_add_to_free(
ut_ad(buf_pool->zip_free[i].start != buf);
buf_buddy_stamp_free(buf, i);
- UT_LIST_ADD_FIRST(list, buf_pool->zip_free[i], buf);
- ut_d(BUF_BUDDY_LIST_VALIDATE(buf_pool, i));
+ UT_LIST_ADD_FIRST(buf_pool->zip_free[i], buf);
+ ut_d(buf_buddy_list_validate(buf_pool, i));
}
/**********************************************************************//**
@@ -294,20 +298,21 @@ void
buf_buddy_remove_from_free(
/*=======================*/
buf_pool_t* buf_pool, /*!< in: buffer pool instance */
- buf_buddy_free_t* buf, /*!< in,own: block to be freed */
+ buf_buddy_free_t* buf, /*!< in,own: block to be
+ freed */
ulint i) /*!< in: index of
buf_pool->zip_free[] */
{
ut_ad(buf_pool_mutex_own(buf_pool));
ut_ad(buf_buddy_check_free(buf_pool, buf, i));
- UT_LIST_REMOVE(list, buf_pool->zip_free[i], buf);
+ UT_LIST_REMOVE(buf_pool->zip_free[i], buf);
buf_buddy_stamp_nonfree(buf, i);
}
/**********************************************************************//**
Try to allocate a block from buf_pool->zip_free[].
-@return allocated block, or NULL if buf_pool->zip_free[] was empty */
+@return allocated block, or NULL if buf_pool->zip_free[] was empty */
static
buf_buddy_free_t*
buf_buddy_alloc_zip(
@@ -321,10 +326,22 @@ buf_buddy_alloc_zip(
ut_a(i < BUF_BUDDY_SIZES);
ut_a(i >= buf_buddy_get_slot(UNIV_ZIP_SIZE_MIN));
- ut_d(BUF_BUDDY_LIST_VALIDATE(buf_pool, i));
+ ut_d(buf_buddy_list_validate(buf_pool, i));
buf = UT_LIST_GET_FIRST(buf_pool->zip_free[i]);
+ if (buf_pool->curr_size < buf_pool->old_size
+ && UT_LIST_GET_LEN(buf_pool->withdraw)
+ < buf_pool->withdraw_target) {
+
+ while (buf != NULL
+ && buf_frame_will_withdrawn(
+ buf_pool, reinterpret_cast<byte*>(buf))) {
+ /* This should be withdrawn, not to be allocated */
+ buf = UT_LIST_GET_NEXT(list, buf);
+ }
+ }
+
if (buf) {
buf_buddy_remove_from_free(buf_pool, buf, i);
} else if (i + 1 < BUF_BUDDY_SIZES) {
@@ -344,7 +361,7 @@ buf_buddy_alloc_zip(
if (buf) {
/* Trash the page other than the BUF_BUDDY_STAMP_NONFREE. */
- UNIV_MEM_TRASH(buf, ~i, BUF_BUDDY_STAMP_OFFSET);
+ UNIV_MEM_TRASH((void*) buf, ~i, BUF_BUDDY_STAMP_OFFSET);
UNIV_MEM_TRASH(BUF_BUDDY_STAMP_OFFSET + 4
+ buf->stamp.bytes, ~i,
(BUF_BUDDY_LOW << i)
@@ -389,9 +406,9 @@ buf_buddy_block_free(
UNIV_MEM_INVALID(buf, UNIV_PAGE_SIZE);
block = (buf_block_t*) bpage;
- mutex_enter(&block->mutex);
+ buf_page_mutex_enter(block);
buf_LRU_block_free_non_file_page(block);
- mutex_exit(&block->mutex);
+ buf_page_mutex_exit(block);
ut_ad(buf_pool->buddy_n_frames > 0);
ut_d(buf_pool->buddy_n_frames--);
@@ -426,7 +443,7 @@ buf_buddy_block_register(
/**********************************************************************//**
Allocate a block from a bigger object.
-@return allocated block */
+@return allocated block */
static
void*
buf_buddy_alloc_from(
@@ -464,8 +481,7 @@ buf_buddy_alloc_from(
Allocate a block. The thread calling this function must hold
buf_pool->mutex and must not hold buf_pool->zip_mutex or any block->mutex.
The buf_pool_mutex may be released and reacquired.
-@return allocated block, never NULL */
-UNIV_INTERN
+@return allocated block, never NULL */
void*
buf_buddy_alloc_low(
/*================*/
@@ -520,7 +536,7 @@ func_exit:
/**********************************************************************//**
Try to relocate a block.
-@return true if relocated */
+@return true if relocated */
static
bool
buf_buddy_relocate(
@@ -528,11 +544,13 @@ buf_buddy_relocate(
buf_pool_t* buf_pool, /*!< in: buffer pool instance */
void* src, /*!< in: block to relocate */
void* dst, /*!< in: free block to relocate to */
- ulint i) /*!< in: index of
+ ulint i, /*!< in: index of
buf_pool->zip_free[] */
+ bool force) /*!< in: true if we must relocate
+ always */
{
buf_page_t* bpage;
- const ulint size = BUF_BUDDY_LOW << i;
+ const ulint size = BUF_BUDDY_LOW << i;
ulint space;
ulint offset;
@@ -555,12 +573,19 @@ buf_buddy_relocate(
ut_ad(space != BUF_BUDDY_STAMP_FREE);
- ulint fold = buf_page_address_fold(space, offset);
- rw_lock_t* hash_lock = buf_page_hash_lock_get(buf_pool, fold);
+ const page_id_t page_id(space, offset);
+
+ /* If space,offset is bogus, then we know that the
+ buf_page_hash_get_low() call below will return NULL. */
+ if (!force && buf_pool != buf_pool_get(page_id)) {
+ return(false);
+ }
+
+ rw_lock_t* hash_lock = buf_page_hash_lock_get(buf_pool, page_id);
rw_lock_x_lock(hash_lock);
- bpage = buf_page_hash_get_low(buf_pool, space, offset, fold);
+ bpage = buf_page_hash_get_low(buf_pool, page_id);
if (!bpage || bpage->zip.data != src) {
/* The block has probably been freshly
@@ -570,7 +595,27 @@ buf_buddy_relocate(
rw_lock_x_unlock(hash_lock);
- return(false);
+ if (!force || space != 0 || offset != 0) {
+ return(false);
+ }
+
+ /* It might be just uninitialized page.
+ We should search from LRU list also. */
+
+ bpage = UT_LIST_GET_FIRST(buf_pool->LRU);
+ while (bpage != NULL) {
+ if (bpage->zip.data == src) {
+ hash_lock = buf_page_hash_lock_get(
+ buf_pool, bpage->id);
+ rw_lock_x_lock(hash_lock);
+ break;
+ }
+ bpage = UT_LIST_GET_NEXT(LRU, bpage);
+ }
+
+ if (bpage == NULL) {
+ return(false);
+ }
}
if (page_zip_get_size(&bpage->zip) != size) {
@@ -588,7 +633,7 @@ buf_buddy_relocate(
contain uninitialized data. */
UNIV_MEM_ASSERT_W(src, size);
- ib_mutex_t* block_mutex = buf_page_get_mutex(bpage);
+ BPageMutex* block_mutex = buf_page_get_mutex(bpage);
mutex_enter(block_mutex);
@@ -598,10 +643,7 @@ buf_buddy_relocate(
ut_a(bpage->zip.data == src);
- /* Note: This is potentially expensive, we need a better
- solution here. We go with correctness for now. */
- ::memcpy(dst, src, size);
-
+ memcpy(dst, src, size);
bpage->zip.data = reinterpret_cast<page_zip_t*>(dst);
rw_lock_x_unlock(hash_lock);
@@ -612,7 +654,7 @@ buf_buddy_relocate(
reinterpret_cast<buf_buddy_free_t*>(src), i);
buf_buddy_stat_t* buddy_stat = &buf_pool->buddy_stat[i];
- ++buddy_stat->relocated;
+ buddy_stat->relocated++;
buddy_stat->relocated_usec+= (my_interval_timer() - ns) / 1000;
return(true);
}
@@ -620,13 +662,11 @@ buf_buddy_relocate(
rw_lock_x_unlock(hash_lock);
mutex_exit(block_mutex);
-
return(false);
}
/**********************************************************************//**
Deallocate a block. */
-UNIV_INTERN
void
buf_buddy_free_low(
/*===============*/
@@ -660,7 +700,8 @@ recombine:
/* Do not recombine blocks if there are few free blocks.
We may waste up to 15360*max_len bytes to free blocks
(1024 + 2048 + 4096 + 8192 = 15360) */
- if (UT_LIST_GET_LEN(buf_pool->zip_free[i]) < 16) {
+ if (UT_LIST_GET_LEN(buf_pool->zip_free[i]) < 16
+ && buf_pool->curr_size >= buf_pool->old_size) {
goto func_exit;
}
@@ -681,7 +722,7 @@ buddy_is_free:
goto recombine;
case BUF_BUDDY_STATE_USED:
- ut_d(BUF_BUDDY_LIST_VALIDATE(buf_pool, i));
+ ut_d(buf_buddy_list_validate(buf_pool, i));
/* The buddy is not free. Is there a free block of
this size? */
@@ -695,7 +736,8 @@ buddy_is_free:
/* Try to relocate the buddy of buf to the free
block. */
- if (buf_buddy_relocate(buf_pool, buddy, zip_buf, i)) {
+ if (buf_buddy_relocate(buf_pool, buddy, zip_buf, i,
+ false)) {
goto buddy_is_free;
}
@@ -716,3 +758,119 @@ func_exit:
reinterpret_cast<buf_buddy_free_t*>(buf),
i);
}
+
+/** Reallocate a block.
+@param[in] buf_pool buffer pool instance
+@param[in] buf block to be reallocated, must be pointed
+to by the buffer pool
+@param[in] size block size, up to UNIV_PAGE_SIZE
+@retval false if failed because of no free blocks. */
+bool
+buf_buddy_realloc(
+ buf_pool_t* buf_pool,
+ void* buf,
+ ulint size)
+{
+ buf_block_t* block = NULL;
+ ulint i = buf_buddy_get_slot(size);
+
+ ut_ad(buf_pool_mutex_own(buf_pool));
+ ut_ad(!mutex_own(&buf_pool->zip_mutex));
+ ut_ad(i <= BUF_BUDDY_SIZES);
+ ut_ad(i >= buf_buddy_get_slot(UNIV_ZIP_SIZE_MIN));
+
+ if (i < BUF_BUDDY_SIZES) {
+ /* Try to allocate from the buddy system. */
+ block = reinterpret_cast<buf_block_t*>(
+ buf_buddy_alloc_zip(buf_pool, i));
+ }
+
+ if (block == NULL) {
+ /* Try allocating from the buf_pool->free list. */
+ block = buf_LRU_get_free_only(buf_pool);
+
+ if (block == NULL) {
+ return(false); /* free_list was not enough */
+ }
+
+ buf_buddy_block_register(block);
+
+ block = reinterpret_cast<buf_block_t*>(
+ buf_buddy_alloc_from(
+ buf_pool, block->frame, i, BUF_BUDDY_SIZES));
+ }
+
+ buf_pool->buddy_stat[i].used++;
+
+ /* Try to relocate the buddy of buf to the free block. */
+ if (buf_buddy_relocate(buf_pool, buf, block, i, true)) {
+ /* succeeded */
+ buf_buddy_free_low(buf_pool, buf, i);
+ } else {
+ /* failed */
+ buf_buddy_free_low(buf_pool, block, i);
+ }
+
+ return(true); /* free_list was enough */
+}
+
+/** Combine all pairs of free buddies.
+@param[in] buf_pool buffer pool instance */
+void
+buf_buddy_condense_free(
+ buf_pool_t* buf_pool)
+{
+ ut_ad(buf_pool_mutex_own(buf_pool));
+ ut_ad(buf_pool->curr_size < buf_pool->old_size);
+
+ for (ulint i = 0; i < UT_ARR_SIZE(buf_pool->zip_free); ++i) {
+ buf_buddy_free_t* buf =
+ UT_LIST_GET_FIRST(buf_pool->zip_free[i]);
+
+ /* seek to withdraw target */
+ while (buf != NULL
+ && !buf_frame_will_withdrawn(
+ buf_pool, reinterpret_cast<byte*>(buf))) {
+ buf = UT_LIST_GET_NEXT(list, buf);
+ }
+
+ while (buf != NULL) {
+ buf_buddy_free_t* next =
+ UT_LIST_GET_NEXT(list, buf);
+
+ buf_buddy_free_t* buddy =
+ reinterpret_cast<buf_buddy_free_t*>(
+ buf_buddy_get(
+ reinterpret_cast<byte*>(buf),
+ BUF_BUDDY_LOW << i));
+
+ /* seek to the next withdraw target */
+ while (true) {
+ while (next != NULL
+ && !buf_frame_will_withdrawn(
+ buf_pool,
+ reinterpret_cast<byte*>(next))) {
+ next = UT_LIST_GET_NEXT(list, next);
+ }
+
+ if (buddy != next) {
+ break;
+ }
+
+ next = UT_LIST_GET_NEXT(list, next);
+ }
+
+ if (buf_buddy_is_free(buddy, i)
+ == BUF_BUDDY_STATE_FREE) {
+ /* Both buf and buddy are free.
+ Try to combine them. */
+ buf_buddy_remove_from_free(buf_pool, buf, i);
+ buf_pool->buddy_stat[i].used++;
+
+ buf_buddy_free_low(buf_pool, buf, i);
+ }
+
+ buf = next;
+ }
+ }
+}
diff --git a/storage/innobase/buf/buf0buf.cc b/storage/innobase/buf/buf0buf.cc
index 136d46b7027..954b16eb2d2 100644
--- a/storage/innobase/buf/buf0buf.cc
+++ b/storage/innobase/buf/buf0buf.cc
@@ -1,8 +1,8 @@
/*****************************************************************************
-Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1995, 2018, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2008, Google Inc.
-Copyright (c) 2013, 2019, MariaDB Corporation.
+Copyright (c) 2013, 2020, MariaDB Corporation.
Portions of this file contain modifications contributed and copyrighted by
Google, Inc. Those modifications are gratefully acknowledged and are described
@@ -31,8 +31,9 @@ The database buffer buf_pool
Created 11/5/1995 Heikki Tuuri
*******************************************************/
-#include "univ.i"
+#include "mtr0types.h"
#include "mach0data.h"
+#include "page0size.h"
#include "buf0buf.h"
#include <string.h>
@@ -45,28 +46,32 @@ Created 11/5/1995 Heikki Tuuri
#include "btr0btr.h"
#include "fil0fil.h"
#include "fil0crypt.h"
-#ifndef UNIV_HOTBACKUP
#include "buf0buddy.h"
#include "lock0lock.h"
+#include "sync0rw.h"
#include "btr0sea.h"
#include "ibuf0ibuf.h"
#include "trx0undo.h"
+#include "trx0purge.h"
#include "log0log.h"
-#endif /* !UNIV_HOTBACKUP */
+#include "dict0stats_bg.h"
#include "srv0srv.h"
+#include "srv0start.h"
#include "dict0dict.h"
#include "log0recv.h"
#include "srv0mon.h"
-#ifdef HAVE_LIBNUMA
-#include <numa.h>
-#include <numaif.h>
-#endif // HAVE_LIBNUMA
+#include "log0crypt.h"
#endif /* !UNIV_INNOCHECKSUM */
#include "page0zip.h"
-#include "buf0checksum.h"
+#include "sync0sync.h"
+#include "buf0dump.h"
+#include <new>
+#include <map>
+#include <sstream>
#ifndef UNIV_INNOCHECKSUM
#include "fil0pagecompress.h"
-#include "ha_prototypes.h"
+#include "fsp0pagecompress.h"
+#endif
#include "ut0byte.h"
#include <new>
@@ -78,29 +83,55 @@ Created 11/5/1995 Heikki Tuuri
#include "lzo/lzo1x.h"
#endif
-#ifdef HAVE_SNAPPY
-#include "snappy-c.h"
-#endif
+using st_::span;
-static void *aligned_malloc(size_t size, size_t align)
+#ifdef HAVE_LIBNUMA
+#include <numa.h>
+#include <numaif.h>
+struct set_numa_interleave_t
{
-#ifdef _MSC_VER
- return _aligned_malloc(size, align);
-#else
- void *result;
- if (posix_memalign(&result, align, size))
- result= NULL;
- return result;
-#endif
-}
+ set_numa_interleave_t()
+ {
+ if (srv_numa_interleave) {
+
+ struct bitmask *numa_mems_allowed = numa_get_mems_allowed();
+ ib::info() << "Setting NUMA memory policy to"
+ " MPOL_INTERLEAVE";
+ if (set_mempolicy(MPOL_INTERLEAVE,
+ numa_mems_allowed->maskp,
+ numa_mems_allowed->size) != 0) {
+
+ ib::warn() << "Failed to set NUMA memory"
+ " policy to MPOL_INTERLEAVE: "
+ << strerror(errno);
+ }
+ numa_bitmask_free(numa_mems_allowed);
+ }
+ }
-inline void aligned_free(void *ptr) {
-#ifdef _MSC_VER
- _aligned_free(ptr);
+ ~set_numa_interleave_t()
+ {
+ if (srv_numa_interleave) {
+
+ ib::info() << "Setting NUMA memory policy to"
+ " MPOL_DEFAULT";
+ if (set_mempolicy(MPOL_DEFAULT, NULL, 0) != 0) {
+ ib::warn() << "Failed to set NUMA memory"
+ " policy to MPOL_DEFAULT: "
+ << strerror(errno);
+ }
+ }
+ }
+};
+
+#define NUMA_MEMPOLICY_INTERLEAVE_IN_SCOPE set_numa_interleave_t scoped_numa
#else
- free(ptr);
+#define NUMA_MEMPOLICY_INTERLEAVE_IN_SCOPE
+#endif /* HAVE_LIBNUMA */
+
+#ifdef HAVE_SNAPPY
+#include "snappy-c.h"
#endif
-}
/*
IMPLEMENTATION OF THE BUFFER POOL
@@ -290,41 +321,58 @@ that the whole area may be needed in the near future, and issue
the read requests for the whole area.
*/
-#ifndef UNIV_HOTBACKUP
+#ifndef UNIV_INNOCHECKSUM
/** Value in microseconds */
static const int WAIT_FOR_READ = 100;
-/** Number of attemtps made to read in a page in the buffer pool */
-static const ulint BUF_PAGE_READ_MAX_RETRIES = 100;
+static const int WAIT_FOR_WRITE = 100;
+/** Number of attempts made to read in a page in the buffer pool */
+static const ulint BUF_PAGE_READ_MAX_RETRIES = 100;
+/** Number of pages to read ahead */
+static const ulint BUF_READ_AHEAD_PAGES = 64;
+/** The maximum portion of the buffer pool that can be used for the
+read-ahead buffer. (Divide buf_pool size by this amount) */
+static const ulint BUF_READ_AHEAD_PORTION = 32;
/** The buffer pools of the database */
-UNIV_INTERN buf_pool_t* buf_pool_ptr;
+buf_pool_t* buf_pool_ptr;
+
+/** true when resizing buffer pool is in the critical path. */
+volatile bool buf_pool_resizing;
+
+/** true when withdrawing buffer pool pages might cause page relocation */
+volatile bool buf_pool_withdrawing;
+
+/** the clock is incremented every time a pointer to a page may become obsolete;
+if the withdrwa clock has not changed, the pointer is still valid in buffer
+pool. if changed, the pointer might not be in buffer pool any more. */
+volatile ulint buf_withdraw_clock;
+
+/** Map of buffer pool chunks by its first frame address
+This is newly made by initialization of buffer pool and buf_resize_thread.
+Currently, no need mutex protection for update. */
+typedef std::map<
+ const byte*,
+ buf_chunk_t*,
+ std::less<const byte*>,
+ ut_allocator<std::pair<const byte* const, buf_chunk_t*> > >
+ buf_pool_chunk_map_t;
+
+static buf_pool_chunk_map_t* buf_chunk_map_reg;
+
+/** Chunk map to be used to lookup.
+The map pointed by this should not be updated */
+static buf_pool_chunk_map_t* buf_chunk_map_ref = NULL;
-#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
-static ulint buf_dbg_counter = 0; /*!< This is used to insert validation
- operations in execution in the
- debug version */
-#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
#ifdef UNIV_DEBUG
-/** If this is set TRUE, the program prints info whenever
-read-ahead or flush occurs */
-UNIV_INTERN ibool buf_debug_prints = FALSE;
+/** Disable resizing buffer pool to make assertion code not expensive. */
+my_bool buf_disable_resize_buffer_pool_debug = TRUE;
#endif /* UNIV_DEBUG */
-#ifdef UNIV_PFS_RWLOCK
-/* Keys to register buffer block related rwlocks and mutexes with
-performance schema */
-UNIV_INTERN mysql_pfs_key_t buf_block_lock_key;
-# ifdef UNIV_SYNC_DEBUG
-UNIV_INTERN mysql_pfs_key_t buf_block_debug_latch_key;
-# endif /* UNIV_SYNC_DEBUG */
-#endif /* UNIV_PFS_RWLOCK */
-
-#ifdef UNIV_PFS_MUTEX
-UNIV_INTERN mysql_pfs_key_t buffer_block_mutex_key;
-UNIV_INTERN mysql_pfs_key_t buf_pool_mutex_key;
-UNIV_INTERN mysql_pfs_key_t buf_pool_zip_mutex_key;
-UNIV_INTERN mysql_pfs_key_t flush_list_mutex_key;
-#endif /* UNIV_PFS_MUTEX */
+#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
+/** This is used to insert validation operations in execution
+in the debug version */
+static ulint buf_dbg_counter = 0;
+#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
#if defined UNIV_PFS_MUTEX || defined UNIV_PFS_RWLOCK
# ifndef PFS_SKIP_BUFFER_MUTEX_RWLOCK
@@ -398,6 +446,56 @@ static void buf_tmp_reserve_compression_buf(buf_tmp_buffer_t* slot)
}
}
+/** Registers a chunk to buf_pool_chunk_map
+@param[in] chunk chunk of buffers */
+static
+void
+buf_pool_register_chunk(
+ buf_chunk_t* chunk)
+{
+ buf_chunk_map_reg->insert(buf_pool_chunk_map_t::value_type(
+ chunk->blocks->frame, chunk));
+}
+
+/** Decrypt a page for temporary tablespace.
+@param[in,out] tmp_frame Temporary buffer
+@param[in] src_frame Page to decrypt
+@return true if temporary tablespace decrypted, false if not */
+static bool buf_tmp_page_decrypt(byte* tmp_frame, byte* src_frame)
+{
+ if (buf_is_zeroes(span<const byte>(src_frame, srv_page_size))) {
+ return true;
+ }
+
+ /* read space & lsn */
+ uint header_len = FIL_PAGE_DATA;
+
+ /* Copy FIL page header, it is not encrypted */
+ memcpy(tmp_frame, src_frame, header_len);
+
+ /* Calculate the offset where decryption starts */
+ const byte* src = src_frame + header_len;
+ byte* dst = tmp_frame + header_len;
+ uint srclen = uint(srv_page_size)
+ - header_len - FIL_PAGE_DATA_END;
+ ulint offset = mach_read_from_4(src_frame + FIL_PAGE_OFFSET);
+
+ if (!log_tmp_block_decrypt(src, srclen, dst,
+ (offset * srv_page_size))) {
+ return false;
+ }
+
+ memcpy(tmp_frame + srv_page_size - FIL_PAGE_DATA_END,
+ src_frame + srv_page_size - FIL_PAGE_DATA_END,
+ FIL_PAGE_DATA_END);
+
+ memcpy(src_frame, tmp_frame, srv_page_size);
+ srv_stats.pages_decrypted.inc();
+ srv_stats.n_temp_blocks_decrypted.inc();
+
+ return true; /* page was decrypted */
+}
+
/** Decrypt a page.
@param[in,out] bpage Page control block
@param[in,out] space tablespace
@@ -405,15 +503,31 @@ static void buf_tmp_reserve_compression_buf(buf_tmp_buffer_t* slot)
static bool buf_page_decrypt_after_read(buf_page_t* bpage, fil_space_t* space)
{
ut_ad(space->n_pending_ios > 0);
- ut_ad(space->id == bpage->space);
+ ut_ad(space->id == bpage->id.space());
byte* dst_frame = bpage->zip.data ? bpage->zip.data :
((buf_block_t*) bpage)->frame;
bool page_compressed = fil_page_is_compressed(dst_frame);
buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
- if (bpage->offset == 0) {
+ if (bpage->id.page_no() == 0) {
/* File header pages are not encrypted/compressed */
+ return (true);
+ }
+
+ if (space->purpose == FIL_TYPE_TEMPORARY
+ && innodb_encrypt_temporary_tables) {
+ buf_tmp_buffer_t* slot = buf_pool_reserve_tmp_slot(buf_pool);
+ buf_tmp_reserve_crypt_buf(slot);
+
+ if (!buf_tmp_page_decrypt(slot->crypt_buf, dst_frame)) {
+ slot->release();
+ ib::error() << "Encrypted page " << bpage->id
+ << " in file " << space->chain.start->name;
+ return false;
+ }
+
+ slot->release();
return true;
}
@@ -447,25 +561,14 @@ decompress_with_slot:
+ dst_frame)) {
/* Verify encryption checksum before we even try to
decrypt. */
- if (!fil_space_verify_crypt_checksum(
- dst_frame, buf_page_get_zip_size(bpage))) {
-
+ if (!fil_space_verify_crypt_checksum(dst_frame, bpage->size)) {
decrypt_failed:
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Encrypted page %u:%u in file %s"
- " looks corrupted; key_version=" ULINTPF,
- bpage->space, bpage->offset,
- space->chain.start->name,
- mach_read_from_4(
- FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION
- + dst_frame));
-
- /* Mark page encrypted in case it should be. */
- if (space->crypt_data->type
- != CRYPT_SCHEME_UNENCRYPTED) {
- bpage->encrypted = true;
- }
-
+ ib::error() << "Encrypted page " << bpage->id
+ << " in file " << space->chain.start->name
+ << " looks corrupted; key_version="
+ << mach_read_from_4(
+ FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION
+ + dst_frame);
return false;
}
@@ -476,8 +579,7 @@ decrypt_failed:
ut_d(fil_page_type_validate(dst_frame));
/* decrypt using crypt_buf to dst_frame */
- if (!fil_space_decrypt(space, slot->crypt_buf,
- dst_frame, &bpage->encrypted)) {
+ if (!fil_space_decrypt(space, slot->crypt_buf, dst_frame)) {
slot->release();
goto decrypt_failed;
}
@@ -497,15 +599,6 @@ decrypt_failed:
return true;
}
-/********************************************************************//**
-Mark a table with the specified space pointed by bpage->space corrupted.
-Also remove the bpage from LRU list.
-@param[in,out] bpage Block */
-static
-void
-buf_mark_space_corrupt(
- buf_page_t* bpage);
-
/* prototypes for new functions added to ha_innodb.cc */
trx_t* innobase_get_trx();
@@ -513,13 +606,10 @@ trx_t* innobase_get_trx();
Gets the smallest oldest_modification lsn for any page in the pool. Returns
zero if all modified pages have been flushed to disk.
@return oldest modification in pool, zero if none */
-UNIV_INTERN
lsn_t
buf_pool_get_oldest_modification(void)
/*==================================*/
{
- ulint i;
- buf_page_t* bpage;
lsn_t lsn = 0;
lsn_t oldest_lsn = 0;
@@ -527,14 +617,24 @@ buf_pool_get_oldest_modification(void)
thread to add a dirty page to any flush list. */
log_flush_order_mutex_enter();
- for (i = 0; i < srv_buf_pool_instances; i++) {
+ for (ulint i = 0; i < srv_buf_pool_instances; i++) {
buf_pool_t* buf_pool;
buf_pool = buf_pool_from_array(i);
buf_flush_list_mutex_enter(buf_pool);
- bpage = UT_LIST_GET_LAST(buf_pool->flush_list);
+ buf_page_t* bpage;
+
+ /* We don't let log-checkpoint halt because pages from system
+ temporary are not yet flushed to the disk. Anyway, object
+ residing in system temporary doesn't generate REDO logging. */
+ for (bpage = UT_LIST_GET_LAST(buf_pool->flush_list);
+ bpage != NULL
+ && fsp_is_system_temporary(bpage->id.space());
+ bpage = UT_LIST_GET_PREV(list, bpage)) {
+ /* Do nothing. */
+ }
if (bpage != NULL) {
ut_ad(bpage->in_flush_list);
@@ -558,7 +658,6 @@ buf_pool_get_oldest_modification(void)
/********************************************************************//**
Get total buffer pool statistics. */
-UNIV_INTERN
void
buf_get_total_list_len(
/*===================*/
@@ -585,7 +684,6 @@ buf_get_total_list_len(
/********************************************************************//**
Get total list size in bytes from all buffer pools. */
-UNIV_INTERN
void
buf_get_total_list_size_in_bytes(
/*=============================*/
@@ -611,7 +709,6 @@ buf_get_total_list_size_in_bytes(
/********************************************************************//**
Get total buffer pool statistics. */
-UNIV_INTERN
void
buf_get_total_stat(
/*===============*/
@@ -645,7 +742,6 @@ buf_get_total_stat(
/********************************************************************//**
Allocates a buffer block.
@return own: the allocated block, in state BUF_BLOCK_MEMORY */
-UNIV_INTERN
buf_block_t*
buf_block_alloc(
/*============*/
@@ -670,22 +766,20 @@ buf_block_alloc(
return(block);
}
-#endif /* !UNIV_HOTBACKUP */
#endif /* !UNIV_INNOCHECKSUM */
/** Checks if the page is in crc32 checksum format.
-@param[in] read_buf database page
-@param[in] checksum_field1 new checksum field
-@param[in] checksum_field2 old checksum field
-@return true if the page is in crc32 checksum format */
-UNIV_INTERN
+@param[in] read_buf database page
+@param[in] checksum_field1 new checksum field
+@param[in] checksum_field2 old checksum field
+@return true if the page is in crc32 checksum format. */
bool
buf_page_is_checksum_valid_crc32(
- const byte* read_buf,
- ulint checksum_field1,
- ulint checksum_field2)
+ const byte* read_buf,
+ ulint checksum_field1,
+ ulint checksum_field2)
{
- ib_uint32_t crc32 = buf_calc_page_crc32(read_buf);
+ const uint32_t crc32 = buf_calc_page_crc32(read_buf);
#ifdef UNIV_INNOCHECKSUM
if (log_file
@@ -698,29 +792,27 @@ buf_page_is_checksum_valid_crc32(
}
#endif /* UNIV_INNOCHECKSUM */
- if (!(checksum_field1 == crc32 && checksum_field2 == crc32)) {
- DBUG_PRINT("buf_checksum",
- ("Page checksum crc32 not valid field1 " ULINTPF
- " field2 " ULINTPF " crc32 %u.",
- checksum_field1, checksum_field2, crc32));
- return (false);
+ if (checksum_field1 != checksum_field2) {
+ return false;
}
-
- return (true);
+ return checksum_field1 == crc32
+#ifdef INNODB_BUG_ENDIAN_CRC32
+ || checksum_field1 == buf_calc_page_crc32(read_buf, true)
+#endif
+ ;
}
/** Checks if the page is in innodb checksum format.
@param[in] read_buf database page
@param[in] checksum_field1 new checksum field
@param[in] checksum_field2 old checksum field
-@return true if the page is in innodb checksum format */
-UNIV_INTERN
+@return true if the page is in innodb checksum format. */
bool
buf_page_is_checksum_valid_innodb(
- const byte* read_buf,
- ulint checksum_field1,
- ulint checksum_field2)
+ const byte* read_buf,
+ ulint checksum_field1,
+ ulint checksum_field2)
{
/* There are 2 valid formulas for
checksum_field2 (old checksum field) which algo=innodb could have
@@ -765,14 +857,16 @@ buf_page_is_checksum_valid_innodb(
}
#endif /* UNIV_INNOCHECKSUM */
+
if (checksum_field2 != mach_read_from_4(read_buf + FIL_PAGE_LSN)
&& checksum_field2 != old_checksum) {
- DBUG_PRINT("buf_checksum",
- ("Page checksum innodb not valid field1 " ULINTPF
- " field2 " ULINTPF "crc32 " ULINTPF " lsn " ULINTPF ".",
- checksum_field1, checksum_field2, old_checksum,
- mach_read_from_4(read_buf + FIL_PAGE_LSN)));
-
+ DBUG_LOG("checksum",
+ "Page checksum crc32 not valid"
+ << " field1 " << checksum_field1
+ << " field2 " << checksum_field2
+ << " crc32 " << buf_calc_page_old_checksum(read_buf)
+ << " lsn " << mach_read_from_4(
+ read_buf + FIL_PAGE_LSN));
return(false);
}
@@ -781,14 +875,14 @@ buf_page_is_checksum_valid_innodb(
/* InnoDB versions < 4.0.14 and < 4.1.1 stored the space id
(always equal to 0), to FIL_PAGE_SPACE_OR_CHKSUM */
- if (checksum_field1 != 0
- && checksum_field1 != new_checksum) {
- DBUG_PRINT("buf_checksum",
- ("Page checksum innodb not valid field1 " ULINTPF
- " field2 " ULINTPF "crc32 " ULINTPF " lsn " ULINTPF ".",
- checksum_field1, checksum_field2, new_checksum,
- mach_read_from_4(read_buf + FIL_PAGE_LSN)));
-
+ if (checksum_field1 != 0 && checksum_field1 != new_checksum) {
+ DBUG_LOG("checksum",
+ "Page checksum crc32 not valid"
+ << " field1 " << checksum_field1
+ << " field2 " << checksum_field2
+ << " crc32 " << buf_calc_page_new_checksum(read_buf)
+ << " lsn " << mach_read_from_4(
+ read_buf + FIL_PAGE_LSN));
return(false);
}
@@ -799,29 +893,32 @@ buf_page_is_checksum_valid_innodb(
@param[in] read_buf database page
@param[in] checksum_field1 new checksum field
@param[in] checksum_field2 old checksum field
-@return true if the page is in none checksum format */
-UNIV_INTERN
+@return true if the page is in none checksum format. */
bool
buf_page_is_checksum_valid_none(
- const byte* read_buf,
- ulint checksum_field1,
- ulint checksum_field2)
+ const byte* read_buf,
+ ulint checksum_field1,
+ ulint checksum_field2)
{
-
- if (!(checksum_field1 == checksum_field2 && checksum_field1 == BUF_NO_CHECKSUM_MAGIC)) {
- DBUG_PRINT("buf_checksum",
- ("Page checksum none not valid field1 " ULINTPF
- " field2 " ULINTPF "crc32 " ULINTPF " lsn " ULINTPF ".",
- checksum_field1, checksum_field2, BUF_NO_CHECKSUM_MAGIC,
- mach_read_from_4(read_buf + FIL_PAGE_LSN)));
- }
+#ifndef DBUG_OFF
+ if (checksum_field1 != checksum_field2
+ && checksum_field1 != BUF_NO_CHECKSUM_MAGIC) {
+ DBUG_LOG("checksum",
+ "Page checksum crc32 not valid"
+ << " field1 " << checksum_field1
+ << " field2 " << checksum_field2
+ << " crc32 " << BUF_NO_CHECKSUM_MAGIC
+ << " lsn " << mach_read_from_4(read_buf
+ + FIL_PAGE_LSN));
+ }
+#endif /* DBUG_OFF */
#ifdef UNIV_INNOCHECKSUM
if (log_file
&& srv_checksum_algorithm == SRV_CHECKSUM_ALGORITHM_STRICT_NONE) {
fprintf(log_file,
"page::%llu; none checksum: calculated"
- " = " ULINTPF "; recorded checksum_field1 = " ULINTPF
+ " = %lu; recorded checksum_field1 = " ULINTPF
" recorded checksum_field2 = " ULINTPF "\n",
cur_page_num, BUF_NO_CHECKSUM_MAGIC,
checksum_field1, checksum_field2);
@@ -832,29 +929,64 @@ buf_page_is_checksum_valid_none(
&& checksum_field1 == BUF_NO_CHECKSUM_MAGIC);
}
+#ifdef INNODB_BUG_ENDIAN_CRC32
+/** Validate the CRC-32C checksum of a page.
+@param[in] page buffer page (srv_page_size bytes)
+@param[in] checksum CRC-32C checksum stored on page
+@return computed checksum */
+static uint32_t buf_page_check_crc32(const byte* page, uint32_t checksum)
+{
+ uint32_t crc32 = buf_calc_page_crc32(page);
+
+ if (checksum != crc32) {
+ crc32 = buf_calc_page_crc32(page, true);
+ }
+
+ return crc32;
+}
+#else /* INNODB_BUG_ENDIAN_CRC32 */
+/** Validate the CRC-32C checksum of a page.
+@param[in] page buffer page (srv_page_size bytes)
+@param[in] checksum CRC-32C checksum stored on page
+@return computed checksum */
+# define buf_page_check_crc32(page, checksum) buf_calc_page_crc32(page)
+#endif /* INNODB_BUG_ENDIAN_CRC32 */
+
+
+/** Check if a buffer is all zeroes.
+@param[in] buf data to check
+@return whether the buffer is all zeroes */
+bool buf_is_zeroes(span<const byte> buf)
+{
+ ut_ad(buf.size() <= sizeof field_ref_zero);
+ return memcmp(buf.data(), field_ref_zero, buf.size()) == 0;
+}
+
/** Check if a page is corrupt.
-@param[in] check_lsn true if LSN should be checked
-@param[in] read_buf Page to be checked
-@param[in] zip_size compressed size or 0
-@param[in] space Pointer to tablespace
-@return true if corrupted, false if not */
-UNIV_INTERN
+@param[in] check_lsn whether the LSN should be checked
+@param[in] read_buf database page
+@param[in] page_size page size
+@param[in] space tablespace
+@return whether the page is corrupted */
bool
buf_page_is_corrupted(
bool check_lsn,
const byte* read_buf,
- ulint zip_size,
+ const page_size_t& page_size,
#ifndef UNIV_INNOCHECKSUM
const fil_space_t* space)
#else
const void* space)
#endif
{
- DBUG_EXECUTE_IF("buf_page_import_corrupt_failure", return(TRUE); );
- ulint checksum_field1 = 0;
- ulint checksum_field2 = 0;
- bool crc32_inited = false;
- ib_uint32_t crc32 = ULINT32_UNDEFINED;
+ ut_ad(page_size.logical() == srv_page_size);
+#ifndef UNIV_INNOCHECKSUM
+ DBUG_EXECUTE_IF("buf_page_import_corrupt_failure", return(true); );
+#endif
+ size_t checksum_field1 = 0;
+ size_t checksum_field2 = 0;
+ uint32_t crc32 = 0;
+ bool crc32_inited = false;
ulint page_type = mach_read_from_2(read_buf + FIL_PAGE_TYPE);
@@ -873,12 +1005,12 @@ buf_page_is_corrupted(
&& space && FSP_FLAGS_HAS_PAGE_COMPRESSION(space->flags)
#endif
) {
- return (false);
+ return(false);
}
- if (!zip_size
+ if (!page_size.is_compressed()
&& memcmp(read_buf + FIL_PAGE_LSN + 4,
- read_buf + UNIV_PAGE_SIZE
+ read_buf + page_size.logical()
- FIL_PAGE_END_LSN_OLD_CHKSUM + 4, 4)) {
/* Stored log sequence numbers at the start and the end
@@ -887,38 +1019,37 @@ buf_page_is_corrupted(
return(true);
}
-#if !defined(UNIV_HOTBACKUP) && !defined(UNIV_INNOCHECKSUM)
+#ifndef UNIV_INNOCHECKSUM
if (check_lsn && recv_lsn_checks_on) {
- lsn_t current_lsn;
+ lsn_t current_lsn;
+ const lsn_t page_lsn
+ = mach_read_from_8(read_buf + FIL_PAGE_LSN);
/* Since we are going to reset the page LSN during the import
phase it makes no sense to spam the log with error messages. */
- if (log_peek_lsn(&current_lsn)
- && current_lsn
- < mach_read_from_8(read_buf + FIL_PAGE_LSN)) {
- ut_print_timestamp(stderr);
-
- fprintf(stderr,
- " InnoDB: Error: page " ULINTPF " log sequence number"
- " " LSN_PF "\n"
- "InnoDB: is in the future! Current system "
- "log sequence number " LSN_PF ".\n"
- "InnoDB: Your database may be corrupt or "
- "you may have copied the InnoDB\n"
- "InnoDB: tablespace but not the InnoDB "
- "log files. See\n"
- "InnoDB: " REFMAN
- "forcing-innodb-recovery.html\n"
- "InnoDB: for more information.\n",
- (ulint) mach_read_from_4(
- read_buf + FIL_PAGE_OFFSET),
- (lsn_t) mach_read_from_8(
- read_buf + FIL_PAGE_LSN),
- current_lsn);
+ if (log_peek_lsn(&current_lsn) && current_lsn < page_lsn) {
+
+ const ulint space_id = mach_read_from_4(
+ read_buf + FIL_PAGE_SPACE_ID);
+ const ulint page_no = mach_read_from_4(
+ read_buf + FIL_PAGE_OFFSET);
+
+ ib::error() << "Page " << page_id_t(space_id, page_no)
+ << " log sequence number " << page_lsn
+ << " is in the future! Current system"
+ << " log sequence number "
+ << current_lsn << ".";
+
+ ib::error() << "Your database may be corrupt or"
+ " you may have copied the InnoDB"
+ " tablespace but not the InnoDB"
+ " log files. "
+ << FORCE_RECOVERY_MSG;
+
}
}
-#endif
+#endif /* !UNIV_INNOCHECKSUM */
/* Check whether the checksum fields have correct values */
@@ -929,15 +1060,16 @@ buf_page_is_corrupted(
return(false);
}
- if (zip_size) {
- return(!page_zip_verify_checksum(read_buf, zip_size));
+ if (page_size.is_compressed()) {
+ return(!page_zip_verify_checksum(read_buf,
+ page_size.physical()));
}
checksum_field1 = mach_read_from_4(
read_buf + FIL_PAGE_SPACE_OR_CHKSUM);
checksum_field2 = mach_read_from_4(
- read_buf + UNIV_PAGE_SIZE - FIL_PAGE_END_LSN_OLD_CHKSUM);
+ read_buf + page_size.logical() - FIL_PAGE_END_LSN_OLD_CHKSUM);
#if FIL_PAGE_LSN % 8
#error "FIL_PAGE_LSN must be 64 bit aligned"
@@ -945,7 +1077,7 @@ buf_page_is_corrupted(
/* A page filled with NUL bytes is considered not corrupted.
The FIL_PAGE_FILE_FLUSH_LSN field may be written nonzero for
- the first page of each file of the system tablespace.
+ the first page of the system tablespace.
Ignore it for the system tablespace. */
if (!checksum_field1 && !checksum_field2) {
/* Checksum fields can have valid value as zero.
@@ -974,7 +1106,6 @@ buf_page_is_corrupted(
case SRV_CHECKSUM_ALGORITHM_STRICT_CRC32:
return !buf_page_is_checksum_valid_crc32(
read_buf, checksum_field1, checksum_field2);
-
case SRV_CHECKSUM_ALGORITHM_STRICT_INNODB:
return !buf_page_is_checksum_valid_innodb(
read_buf, checksum_field1, checksum_field2);
@@ -983,6 +1114,28 @@ buf_page_is_corrupted(
read_buf, checksum_field1, checksum_field2);
case SRV_CHECKSUM_ALGORITHM_CRC32:
case SRV_CHECKSUM_ALGORITHM_INNODB:
+ if (buf_page_is_checksum_valid_none(read_buf,
+ checksum_field1, checksum_field2)) {
+#ifdef UNIV_INNOCHECKSUM
+ if (log_file) {
+ fprintf(log_file, "page::%llu;"
+ " old style: calculated = %u;"
+ " recorded = " ULINTPF ";\n",
+ cur_page_num,
+ buf_calc_page_old_checksum(read_buf),
+ checksum_field2);
+ fprintf(log_file, "page::%llu;"
+ " new style: calculated = %u;"
+ " crc32 = %u; recorded = " ULINTPF ";\n",
+ cur_page_num,
+ buf_calc_page_new_checksum(read_buf),
+ buf_calc_page_crc32(read_buf),
+ checksum_field1);
+ }
+#endif /* UNIV_INNOCHECKSUM */
+ return false;
+ }
+
/* Very old versions of InnoDB only stored 8 byte lsn to the
start and the end of the page. */
@@ -993,13 +1146,17 @@ buf_page_is_corrupted(
!= mach_read_from_4(read_buf + FIL_PAGE_LSN)
&& checksum_field2 != BUF_NO_CHECKSUM_MAGIC) {
- /* The checksum does not match any of the
- fast to check. First check the selected algorithm
- for writing checksums because we assume that the
- chance of it matching is higher. */
-
if (curr_algo == SRV_CHECKSUM_ALGORITHM_CRC32) {
- crc32 = buf_calc_page_crc32(read_buf);
+ DBUG_EXECUTE_IF(
+ "page_intermittent_checksum_mismatch", {
+ static int page_counter;
+ if (page_counter++ == 2) {
+ checksum_field2++;
+ }
+ });
+
+ crc32 = buf_page_check_crc32(read_buf,
+ checksum_field2);
crc32_inited = true;
if (checksum_field2 != crc32
@@ -1013,8 +1170,8 @@ buf_page_is_corrupted(
if (checksum_field2
!= buf_calc_page_old_checksum(read_buf)) {
-
- crc32 = buf_calc_page_crc32(read_buf);
+ crc32 = buf_page_check_crc32(
+ read_buf, checksum_field2);
crc32_inited = true;
if (checksum_field2 != crc32) {
@@ -1024,53 +1181,38 @@ buf_page_is_corrupted(
}
}
- /* old field is fine, check the new field */
-
- /* InnoDB versions < 4.0.14 and < 4.1.1 stored the space id
- (always equal to 0), to FIL_PAGE_SPACE_OR_CHKSUM */
-
- if (checksum_field1 != 0
- && checksum_field1 != BUF_NO_CHECKSUM_MAGIC) {
+ if (checksum_field1 == 0
+ || checksum_field1 == BUF_NO_CHECKSUM_MAGIC) {
+ } else if (curr_algo == SRV_CHECKSUM_ALGORITHM_CRC32) {
+ if (!crc32_inited) {
+ crc32 = buf_page_check_crc32(
+ read_buf, checksum_field2);
+ crc32_inited = true;
+ }
- /* The checksum does not match any of the
- fast to check. First check the selected algorithm
- for writing checksums because we assume that the
- chance of it matching is higher. */
+ if (checksum_field1 != crc32
+ && checksum_field1
+ != buf_calc_page_new_checksum(read_buf)) {
+ return true;
+ }
+ } else {
+ ut_ad(curr_algo == SRV_CHECKSUM_ALGORITHM_INNODB);
- if (curr_algo == SRV_CHECKSUM_ALGORITHM_CRC32) {
+ if (checksum_field1
+ != buf_calc_page_new_checksum(read_buf)) {
if (!crc32_inited) {
- crc32 = buf_calc_page_crc32(read_buf);
+ crc32 = buf_page_check_crc32(
+ read_buf, checksum_field2);
crc32_inited = true;
}
- if (checksum_field1 != crc32
- && checksum_field1
- != buf_calc_page_new_checksum(read_buf)) {
+ if (checksum_field1 != crc32) {
return true;
}
- } else {
- ut_ad(curr_algo
- == SRV_CHECKSUM_ALGORITHM_INNODB);
-
- if (checksum_field1
- != buf_calc_page_new_checksum(read_buf)) {
-
- if (!crc32_inited) {
- crc32 = buf_calc_page_crc32(
- read_buf);
- crc32_inited = true;
- }
-
- if (checksum_field1 != crc32) {
- return true;
- }
- }
}
}
- /* If CRC32 is stored in at least one of the fields then the
- other field must also be CRC32 */
if (crc32_inited
&& ((checksum_field1 == crc32
&& checksum_field2 != crc32)
@@ -1081,10 +1223,8 @@ buf_page_is_corrupted(
break;
case SRV_CHECKSUM_ALGORITHM_NONE:
- /* should have returned FALSE earlier */
- ut_error;
- /* no default so the compiler will emit a warning if new enum
- is added and not handled here */
+ /* should have returned false earlier */
+ break;
}
return false;
@@ -1093,110 +1233,125 @@ buf_page_is_corrupted(
#ifndef UNIV_INNOCHECKSUM
/** Dump a page to stderr.
@param[in] read_buf database page
-@param[in] zip_size compressed page size, or 0 for uncompressed */
+@param[in] page_size page size */
UNIV_INTERN
void
-buf_page_print(const byte* read_buf, ulint zip_size)
+buf_page_print(const byte* read_buf, const page_size_t& page_size)
{
-#ifndef UNIV_HOTBACKUP
dict_index_t* index;
-#endif /* !UNIV_HOTBACKUP */
- ulint size = zip_size;
- if (!size) {
- size = UNIV_PAGE_SIZE;
- }
+#ifndef UNIV_DEBUG
+ ib::info() << "Page dump in ascii and hex ("
+ << page_size.physical() << " bytes):";
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Page dump in ascii and hex (" ULINTPF " bytes):\n",
- size);
- ut_print_buf(stderr, read_buf, size);
+ ut_print_buf(stderr, read_buf, page_size.physical());
fputs("\nInnoDB: End of page dump\n", stderr);
+#endif
- if (zip_size) {
+ if (page_size.is_compressed()) {
/* Print compressed page. */
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Compressed page type (" ULINTPF "); "
- "stored checksum in field1 " ULINTPF "; "
- "calculated checksums for field1: "
- "%s " ULINTPF ", "
- "%s " ULINTPF ", "
- "%s " ULINTPF "; "
- "page LSN " LSN_PF "; "
- "page number (if stored to page already) " ULINTPF "; "
- "space id (if stored to page already) " ULINTPF "\n",
- fil_page_get_type(read_buf),
- mach_read_from_4(read_buf + FIL_PAGE_SPACE_OR_CHKSUM),
- buf_checksum_algorithm_name(
- SRV_CHECKSUM_ALGORITHM_CRC32),
- page_zip_calc_checksum(read_buf, zip_size,
- SRV_CHECKSUM_ALGORITHM_CRC32),
- buf_checksum_algorithm_name(
- SRV_CHECKSUM_ALGORITHM_INNODB),
- page_zip_calc_checksum(read_buf, zip_size,
- SRV_CHECKSUM_ALGORITHM_INNODB),
- buf_checksum_algorithm_name(
- SRV_CHECKSUM_ALGORITHM_NONE),
- page_zip_calc_checksum(read_buf, zip_size,
- SRV_CHECKSUM_ALGORITHM_NONE),
- mach_read_from_8(read_buf + FIL_PAGE_LSN),
- mach_read_from_4(read_buf + FIL_PAGE_OFFSET),
- mach_read_from_4(read_buf
- + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID));
- } else {
- ut_print_timestamp(stderr);
- fprintf(stderr, " InnoDB: uncompressed page, "
- "stored checksum in field1 " ULINTPF ", "
- "calculated checksums for field1: "
- "%s " UINT32PF ", "
- "%s " ULINTPF ", "
- "%s " ULINTPF ", "
-
- "stored checksum in field2 " ULINTPF ", "
- "calculated checksums for field2: "
- "%s " UINT32PF ", "
- "%s " ULINTPF ", "
- "%s " ULINTPF ", "
-
- "page LSN " ULINTPF " " ULINTPF ", "
- "low 4 bytes of LSN at page end " ULINTPF ", "
- "page number (if stored to page already) " ULINTPF ", "
- "space id (if created with >= MySQL-4.1.1 "
- "and stored already) " ULINTPF "\n",
- mach_read_from_4(read_buf + FIL_PAGE_SPACE_OR_CHKSUM),
- buf_checksum_algorithm_name(SRV_CHECKSUM_ALGORITHM_CRC32),
- buf_calc_page_crc32(read_buf),
- buf_checksum_algorithm_name(SRV_CHECKSUM_ALGORITHM_INNODB),
- buf_calc_page_new_checksum(read_buf),
- buf_checksum_algorithm_name(SRV_CHECKSUM_ALGORITHM_NONE),
- BUF_NO_CHECKSUM_MAGIC,
-
- mach_read_from_4(read_buf + UNIV_PAGE_SIZE
- - FIL_PAGE_END_LSN_OLD_CHKSUM),
- buf_checksum_algorithm_name(SRV_CHECKSUM_ALGORITHM_CRC32),
- buf_calc_page_crc32(read_buf),
- buf_checksum_algorithm_name(SRV_CHECKSUM_ALGORITHM_INNODB),
- buf_calc_page_old_checksum(read_buf),
- buf_checksum_algorithm_name(SRV_CHECKSUM_ALGORITHM_NONE),
- BUF_NO_CHECKSUM_MAGIC,
-
- mach_read_from_4(read_buf + FIL_PAGE_LSN),
- mach_read_from_4(read_buf + FIL_PAGE_LSN + 4),
- mach_read_from_4(read_buf + UNIV_PAGE_SIZE
- - FIL_PAGE_END_LSN_OLD_CHKSUM + 4),
- mach_read_from_4(read_buf + FIL_PAGE_OFFSET),
- mach_read_from_4(read_buf
- + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID));
+ ib::info() << "Compressed page type ("
+ << fil_page_get_type(read_buf)
+ << "); stored checksum in field1 "
+ << mach_read_from_4(
+ read_buf + FIL_PAGE_SPACE_OR_CHKSUM)
+ << "; calculated checksums for field1: "
+ << buf_checksum_algorithm_name(
+ SRV_CHECKSUM_ALGORITHM_CRC32)
+ << " "
+ << page_zip_calc_checksum(
+ read_buf, page_size.physical(),
+ SRV_CHECKSUM_ALGORITHM_CRC32)
+#ifdef INNODB_BUG_ENDIAN_CRC32
+ << "/"
+ << page_zip_calc_checksum(
+ read_buf, page_size.physical(),
+ SRV_CHECKSUM_ALGORITHM_CRC32, true)
+#endif
+ << ", "
+ << buf_checksum_algorithm_name(
+ SRV_CHECKSUM_ALGORITHM_INNODB)
+ << " "
+ << page_zip_calc_checksum(
+ read_buf, page_size.physical(),
+ SRV_CHECKSUM_ALGORITHM_INNODB)
+ << ", "
+ << buf_checksum_algorithm_name(
+ SRV_CHECKSUM_ALGORITHM_NONE)
+ << " "
+ << page_zip_calc_checksum(
+ read_buf, page_size.physical(),
+ SRV_CHECKSUM_ALGORITHM_NONE)
+ << "; page LSN "
+ << mach_read_from_8(read_buf + FIL_PAGE_LSN)
+ << "; page number (if stored to page"
+ << " already) "
+ << mach_read_from_4(read_buf + FIL_PAGE_OFFSET)
+ << "; space id (if stored to page already) "
+ << mach_read_from_4(
+ read_buf + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID);
+ } else {
+ const uint32_t crc32 = buf_calc_page_crc32(read_buf);
+#ifdef INNODB_BUG_ENDIAN_CRC32
+ const uint32_t crc32_legacy = buf_calc_page_crc32(read_buf,
+ true);
+#endif /* INNODB_BUG_ENDIAN_CRC32 */
ulint page_type = fil_page_get_type(read_buf);
- fprintf(stderr, "InnoDB: page type %ld meaning %s\n", page_type,
- fil_get_page_type_name(page_type));
+ ib::info() << "Uncompressed page, stored checksum in field1 "
+ << mach_read_from_4(
+ read_buf + FIL_PAGE_SPACE_OR_CHKSUM)
+ << ", calculated checksums for field1: "
+ << buf_checksum_algorithm_name(
+ SRV_CHECKSUM_ALGORITHM_CRC32) << " "
+ << crc32
+#ifdef INNODB_BUG_ENDIAN_CRC32
+ << "/" << crc32_legacy
+#endif
+ << ", "
+ << buf_checksum_algorithm_name(
+ SRV_CHECKSUM_ALGORITHM_INNODB) << " "
+ << buf_calc_page_new_checksum(read_buf)
+ << ", "
+ << " page type " << page_type << " == "
+ << fil_get_page_type_name(page_type) << "."
+ << buf_checksum_algorithm_name(
+ SRV_CHECKSUM_ALGORITHM_NONE) << " "
+ << BUF_NO_CHECKSUM_MAGIC
+ << ", stored checksum in field2 "
+ << mach_read_from_4(read_buf + page_size.logical()
+ - FIL_PAGE_END_LSN_OLD_CHKSUM)
+ << ", calculated checksums for field2: "
+ << buf_checksum_algorithm_name(
+ SRV_CHECKSUM_ALGORITHM_CRC32) << " "
+ << crc32
+#ifdef INNODB_BUG_ENDIAN_CRC32
+ << "/" << crc32_legacy
+#endif
+ << ", "
+ << buf_checksum_algorithm_name(
+ SRV_CHECKSUM_ALGORITHM_INNODB) << " "
+ << buf_calc_page_old_checksum(read_buf)
+ << ", "
+ << buf_checksum_algorithm_name(
+ SRV_CHECKSUM_ALGORITHM_NONE) << " "
+ << BUF_NO_CHECKSUM_MAGIC
+ << ", page LSN "
+ << mach_read_from_4(read_buf + FIL_PAGE_LSN)
+ << " "
+ << mach_read_from_4(read_buf + FIL_PAGE_LSN + 4)
+ << ", low 4 bytes of LSN at page end "
+ << mach_read_from_4(read_buf + page_size.logical()
+ - FIL_PAGE_END_LSN_OLD_CHKSUM + 4)
+ << ", page number (if stored to page already) "
+ << mach_read_from_4(read_buf + FIL_PAGE_OFFSET)
+ << ", space id (if created with >= MySQL-4.1.1"
+ " and stored already) "
+ << mach_read_from_4(
+ read_buf + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID);
}
-#ifndef UNIV_HOTBACKUP
if (mach_read_from_2(read_buf + TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_TYPE)
== TRX_UNDO_INSERT) {
fprintf(stderr,
@@ -1207,24 +1362,22 @@ buf_page_print(const byte* read_buf, ulint zip_size)
fprintf(stderr,
"InnoDB: Page may be an update undo log page\n");
}
-#endif /* !UNIV_HOTBACKUP */
switch (fil_page_get_type(read_buf)) {
index_id_t index_id;
case FIL_PAGE_INDEX:
+ case FIL_PAGE_RTREE:
index_id = btr_page_get_index_id(read_buf);
- fprintf(stderr,
- "InnoDB: Page may be an index page where"
- " index id is %llu\n",
- (ullint) index_id);
-#ifndef UNIV_HOTBACKUP
+ ib::info() << "Page may be an index page where"
+ " index id is " << index_id;
+
index = dict_index_find_on_id_low(index_id);
if (index) {
- fputs("InnoDB: (", stderr);
- dict_index_name_print(stderr, NULL, index);
- fputs(")\n", stderr);
+ ib::info()
+ << "Index " << index_id
+ << " is " << index->name
+ << " in table " << index->table->name;
}
-#endif /* !UNIV_HOTBACKUP */
break;
case FIL_PAGE_INODE:
fputs("InnoDB: Page may be an 'inode' page\n", stderr);
@@ -1269,9 +1422,9 @@ buf_page_print(const byte* read_buf, ulint zip_size)
}
}
-#ifndef UNIV_HOTBACKUP
-
# ifdef PFS_GROUP_BUFFER_SYNC
+extern mysql_pfs_key_t buffer_block_mutex_key;
+
/********************************************************************//**
This function registers mutexes and rwlocks in buffer blocks with
performance schema. If PFS_MAX_BUFFER_MUTEX_LOCK_REGISTER is
@@ -1284,27 +1437,24 @@ pfs_register_buffer_block(
/*======================*/
buf_chunk_t* chunk) /*!< in/out: chunk of buffers */
{
- ulint i;
- ulint num_to_register;
buf_block_t* block;
+ ulint num_to_register;
block = chunk->blocks;
- num_to_register = ut_min(chunk->size,
- PFS_MAX_BUFFER_MUTEX_LOCK_REGISTER);
-
- for (i = 0; i < num_to_register; i++) {
- ib_mutex_t* mutex;
- rw_lock_t* rwlock;
+ num_to_register = ut_min(
+ chunk->size, PFS_MAX_BUFFER_MUTEX_LOCK_REGISTER);
+ for (ulint i = 0; i < num_to_register; i++) {
# ifdef UNIV_PFS_MUTEX
+ BPageMutex* mutex;
+
mutex = &block->mutex;
- ut_a(!mutex->pfs_psi);
- mutex->pfs_psi = (PSI_server)
- ? PSI_server->init_mutex(buffer_block_mutex_key, mutex)
- : NULL;
+ mutex->pfs_add(buffer_block_mutex_key);
# endif /* UNIV_PFS_MUTEX */
+ rw_lock_t* rwlock;
+
# ifdef UNIV_PFS_RWLOCK
rwlock = &block->lock;
ut_a(!rwlock->pfs_psi);
@@ -1312,14 +1462,14 @@ pfs_register_buffer_block(
? PSI_server->init_rwlock(buf_block_lock_key, rwlock)
: NULL;
-# ifdef UNIV_SYNC_DEBUG
+# ifdef UNIV_DEBUG
rwlock = &block->debug_latch;
ut_a(!rwlock->pfs_psi);
rwlock->pfs_psi = (PSI_server)
? PSI_server->init_rwlock(buf_block_debug_latch_key,
rwlock)
: NULL;
-# endif /* UNIV_SYNC_DEBUG */
+# endif /* UNIV_DEBUG */
# endif /* UNIV_PFS_RWLOCK */
block++;
@@ -1339,6 +1489,10 @@ buf_block_init(
{
UNIV_MEM_DESC(frame, UNIV_PAGE_SIZE);
+ /* This function should only be executed at database startup or by
+ buf_pool_resize(). Either way, adaptive hash index must not exist. */
+ assert_block_ahi_empty_on_init(block);
+
block->frame = frame;
block->page.buf_pool_index = buf_pool_index(buf_pool);
@@ -1346,63 +1500,61 @@ buf_block_init(
block->page.state = BUF_BLOCK_NOT_USED;
block->page.buf_fix_count = 0;
block->page.io_fix = BUF_IO_NONE;
- block->page.encrypted = false;
+ block->page.flush_observer = NULL;
block->page.real_size = 0;
block->page.write_size = 0;
block->modify_clock = 0;
block->page.slot = NULL;
-#if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG
- block->page.file_page_was_freed = FALSE;
-#endif /* UNIV_DEBUG_FILE_ACCESSES || UNIV_DEBUG */
+ ut_d(block->page.file_page_was_freed = FALSE);
- block->check_index_page_at_flush = FALSE;
+#ifdef BTR_CUR_HASH_ADAPT
block->index = NULL;
+#endif /* BTR_CUR_HASH_ADAPT */
+ block->skip_flush_check = false;
+
+ ut_d(block->page.in_page_hash = FALSE);
+ ut_d(block->page.in_zip_hash = FALSE);
+ ut_d(block->page.in_flush_list = FALSE);
+ ut_d(block->page.in_free_list = FALSE);
+ ut_d(block->page.in_LRU_list = FALSE);
+ ut_d(block->in_unzip_LRU_list = FALSE);
+ ut_d(block->in_withdraw_list = FALSE);
-#ifdef UNIV_DEBUG
- block->page.in_page_hash = FALSE;
- block->page.in_zip_hash = FALSE;
- block->page.in_flush_list = FALSE;
- block->page.in_free_list = FALSE;
- block->page.in_LRU_list = FALSE;
- block->in_unzip_LRU_list = FALSE;
-#endif /* UNIV_DEBUG */
-#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
- block->n_pointers = 0;
-#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
page_zip_des_init(&block->page.zip);
+ mutex_create(LATCH_ID_BUF_BLOCK_MUTEX, &block->mutex);
+
#if defined PFS_SKIP_BUFFER_MUTEX_RWLOCK || defined PFS_GROUP_BUFFER_SYNC
/* If PFS_SKIP_BUFFER_MUTEX_RWLOCK is defined, skip registration
- of buffer block mutex/rwlock with performance schema. If
- PFS_GROUP_BUFFER_SYNC is defined, skip the registration
- since buffer block mutex/rwlock will be registered later in
- pfs_register_buffer_block() */
+ of buffer block rwlock with performance schema.
+
+ If PFS_GROUP_BUFFER_SYNC is defined, skip the registration
+ since buffer block rwlock will be registered later in
+ pfs_register_buffer_block(). */
- mutex_create(PFS_NOT_INSTRUMENTED, &block->mutex, SYNC_BUF_BLOCK);
rw_lock_create(PFS_NOT_INSTRUMENTED, &block->lock, SYNC_LEVEL_VARYING);
-# ifdef UNIV_SYNC_DEBUG
- rw_lock_create(PFS_NOT_INSTRUMENTED,
- &block->debug_latch, SYNC_NO_ORDER_CHECK);
-# endif /* UNIV_SYNC_DEBUG */
+ ut_d(rw_lock_create(PFS_NOT_INSTRUMENTED, &block->debug_latch,
+ SYNC_LEVEL_VARYING));
#else /* PFS_SKIP_BUFFER_MUTEX_RWLOCK || PFS_GROUP_BUFFER_SYNC */
- mutex_create(buffer_block_mutex_key, &block->mutex, SYNC_BUF_BLOCK);
+
rw_lock_create(buf_block_lock_key, &block->lock, SYNC_LEVEL_VARYING);
-# ifdef UNIV_SYNC_DEBUG
- rw_lock_create(buf_block_debug_latch_key,
- &block->debug_latch, SYNC_NO_ORDER_CHECK);
-# endif /* UNIV_SYNC_DEBUG */
+ ut_d(rw_lock_create(buf_block_debug_latch_key,
+ &block->debug_latch, SYNC_LEVEL_VARYING));
+
#endif /* PFS_SKIP_BUFFER_MUTEX_RWLOCK || PFS_GROUP_BUFFER_SYNC */
+ block->lock.is_block_lock = 1;
+
ut_ad(rw_lock_validate(&(block->lock)));
}
/********************************************************************//**
Allocates a chunk of buffer frames.
-@return chunk, or NULL on failure */
+@return chunk, or NULL on failure */
static
buf_chunk_t*
buf_chunk_init(
@@ -1422,8 +1574,10 @@ buf_chunk_init(
mem_size += ut_2pow_round((mem_size / UNIV_PAGE_SIZE) * (sizeof *block)
+ (UNIV_PAGE_SIZE - 1), UNIV_PAGE_SIZE);
- chunk->mem_size = mem_size;
- chunk->mem = os_mem_alloc_large(&chunk->mem_size);
+ DBUG_EXECUTE_IF("ib_buf_chunk_init_fails", return(NULL););
+
+ chunk->mem = buf_pool->allocator.allocate_large(mem_size,
+ &chunk->mem_pfx);
if (UNIV_UNLIKELY(chunk->mem == NULL)) {
@@ -1432,31 +1586,33 @@ buf_chunk_init(
#ifdef HAVE_LIBNUMA
if (srv_numa_interleave) {
- int st = mbind(chunk->mem, chunk->mem_size,
+ struct bitmask *numa_mems_allowed = numa_get_mems_allowed();
+ int st = mbind(chunk->mem, chunk->mem_size(),
MPOL_INTERLEAVE,
- numa_all_nodes_ptr->maskp,
- numa_all_nodes_ptr->size,
+ numa_mems_allowed->maskp,
+ numa_mems_allowed->size,
MPOL_MF_MOVE);
if (st != 0) {
- ib_logf(IB_LOG_LEVEL_WARN,
- "Failed to set NUMA memory policy of buffer"
- " pool page frames to MPOL_INTERLEAVE"
- " (error: %s).", strerror(errno));
+ ib::warn() << "Failed to set NUMA memory policy of"
+ " buffer pool page frames to MPOL_INTERLEAVE"
+ " (error: " << strerror(errno) << ").";
}
+ numa_bitmask_free(numa_mems_allowed);
}
-#endif // HAVE_LIBNUMA
+#endif /* HAVE_LIBNUMA */
+
/* Allocate the block descriptors from
the start of the memory block. */
chunk->blocks = (buf_block_t*) chunk->mem;
/* Align a pointer to the first frame. Note that when
- os_large_page_size is smaller than UNIV_PAGE_SIZE,
+ opt_large_page_size is smaller than UNIV_PAGE_SIZE,
we may allocate one fewer block than requested. When
it is bigger, we may allocate more blocks than requested. */
frame = (byte*) ut_align(chunk->mem, UNIV_PAGE_SIZE);
- chunk->size = chunk->mem_size / UNIV_PAGE_SIZE
+ chunk->size = chunk->mem_pfx.m_size / UNIV_PAGE_SIZE
- (frame != chunk->mem);
/* Subtract the space needed for block descriptors. */
@@ -1483,7 +1639,7 @@ buf_chunk_init(
UNIV_MEM_INVALID(block->frame, UNIV_PAGE_SIZE);
/* Add the block to the free list */
- UT_LIST_ADD_LAST(list, buf_pool->free, (&block->page));
+ UT_LIST_ADD_LAST(buf_pool->free, &block->page);
ut_d(block->page.in_free_list = TRUE);
ut_ad(buf_pool_from_block(block) == buf_pool);
@@ -1492,9 +1648,11 @@ buf_chunk_init(
frame += UNIV_PAGE_SIZE;
}
+ buf_pool_register_chunk(chunk);
+
#ifdef PFS_GROUP_BUFFER_SYNC
pfs_register_buffer_block(chunk);
-#endif
+#endif /* PFS_GROUP_BUFFER_SYNC */
return(chunk);
}
@@ -1502,7 +1660,7 @@ buf_chunk_init(
/*********************************************************************//**
Finds a block in the given buffer chunk that points to a
given compressed page.
-@return buffer block pointing to the compressed page, or NULL */
+@return buffer block pointing to the compressed page, or NULL */
static
buf_block_t*
buf_chunk_contains_zip(
@@ -1528,8 +1686,7 @@ buf_chunk_contains_zip(
/*********************************************************************//**
Finds a block in the buffer pool that points to a
given compressed page.
-@return buffer block pointing to the compressed page, or NULL */
-UNIV_INTERN
+@return buffer block pointing to the compressed page, or NULL */
buf_block_t*
buf_pool_contains_zip(
/*==================*/
@@ -1556,7 +1713,7 @@ buf_pool_contains_zip(
/*********************************************************************//**
Checks that all file pages in the buffer chunk are in a replaceable state.
-@return address of a non-free block, or NULL if all freed */
+@return address of a non-free block, or NULL if all freed */
static
const buf_block_t*
buf_chunk_not_freed(
@@ -1587,12 +1744,27 @@ buf_chunk_not_freed(
file pages. */
break;
case BUF_BLOCK_FILE_PAGE:
- mutex_enter(&block->mutex);
+ if (srv_read_only_mode) {
+ /* The page cleaner is disabled in
+ read-only mode. No pages can be
+ dirtied, so all of them must be clean. */
+ ut_ad(block->page.oldest_modification
+ == block->page.newest_modification);
+ ut_ad(block->page.oldest_modification == 0
+ || block->page.oldest_modification
+ == recv_sys->recovered_lsn
+ || srv_force_recovery
+ == SRV_FORCE_NO_LOG_REDO);
+ ut_ad(block->page.buf_fix_count == 0);
+ ut_ad(block->page.io_fix == BUF_IO_NONE);
+ break;
+ }
+
+ buf_page_mutex_enter(block);
ready = buf_flush_ready_for_replace(&block->page);
- mutex_exit(&block->mutex);
+ buf_page_mutex_exit(block);
if (!ready) {
-
return(block);
}
@@ -1624,6 +1796,7 @@ buf_pool_set_sizes(void)
srv_buf_pool_curr_size = curr_size;
srv_buf_pool_old_size = srv_buf_pool_size;
+ srv_buf_pool_base_size = srv_buf_pool_size;
buf_pool_mutex_exit_all();
}
@@ -1631,7 +1804,7 @@ buf_pool_set_sizes(void)
/********************************************************************//**
Initialize a buffer pool instance.
@return DB_SUCCESS if all goes well. */
-UNIV_INTERN
+static
ulint
buf_pool_init_instance(
/*===================*/
@@ -1640,48 +1813,99 @@ buf_pool_init_instance(
ulint instance_no) /*!< in: id of the instance */
{
ulint i;
+ ulint chunk_size;
buf_chunk_t* chunk;
+ ut_ad(buf_pool_size % srv_buf_pool_chunk_unit == 0);
+
/* 1. Initialize general fields
------------------------------- */
- mutex_create(buf_pool_mutex_key,
- &buf_pool->mutex, SYNC_BUF_POOL);
- mutex_create(buf_pool_zip_mutex_key,
- &buf_pool->zip_mutex, SYNC_BUF_BLOCK);
+ mutex_create(LATCH_ID_BUF_POOL, &buf_pool->mutex);
+
+ mutex_create(LATCH_ID_BUF_POOL_ZIP, &buf_pool->zip_mutex);
+
+ new(&buf_pool->allocator)
+ ut_allocator<unsigned char>(mem_key_buf_buf_pool);
buf_pool_mutex_enter(buf_pool);
if (buf_pool_size > 0) {
- buf_pool->n_chunks = 1;
+ buf_pool->n_chunks
+ = buf_pool_size / srv_buf_pool_chunk_unit;
+ chunk_size = srv_buf_pool_chunk_unit;
+
+ buf_pool->chunks =
+ reinterpret_cast<buf_chunk_t*>(ut_zalloc_nokey(
+ buf_pool->n_chunks * sizeof(*chunk)));
+ buf_pool->chunks_old = NULL;
+
+ UT_LIST_INIT(buf_pool->LRU, &buf_page_t::LRU);
+ UT_LIST_INIT(buf_pool->free, &buf_page_t::list);
+ UT_LIST_INIT(buf_pool->withdraw, &buf_page_t::list);
+ buf_pool->withdraw_target = 0;
+ UT_LIST_INIT(buf_pool->flush_list, &buf_page_t::list);
+ UT_LIST_INIT(buf_pool->unzip_LRU, &buf_block_t::unzip_LRU);
- buf_pool->chunks = chunk =
- (buf_chunk_t*) mem_zalloc(sizeof *chunk);
+#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
+ UT_LIST_INIT(buf_pool->zip_clean, &buf_page_t::list);
+#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
- if (!buf_chunk_init(buf_pool, chunk, buf_pool_size)) {
- mem_free(chunk);
- mem_free(buf_pool);
+ for (i = 0; i < UT_ARR_SIZE(buf_pool->zip_free); ++i) {
+ UT_LIST_INIT(
+ buf_pool->zip_free[i], &buf_buddy_free_t::list);
+ }
- buf_pool_mutex_exit(buf_pool);
+ buf_pool->curr_size = 0;
+ chunk = buf_pool->chunks;
- return(DB_ERROR);
- }
+ do {
+ if (!buf_chunk_init(buf_pool, chunk, chunk_size)) {
+ while (--chunk >= buf_pool->chunks) {
+ buf_block_t* block = chunk->blocks;
+
+ for (i = chunk->size; i--; block++) {
+ mutex_free(&block->mutex);
+ rw_lock_free(&block->lock);
+
+ ut_d(rw_lock_free(
+ &block->debug_latch));
+ }
+
+ buf_pool->allocator.deallocate_large(
+ chunk->mem, &chunk->mem_pfx);
+ }
+ ut_free(buf_pool->chunks);
+ buf_pool_mutex_exit(buf_pool);
+
+ return(DB_ERROR);
+ }
+
+ buf_pool->curr_size += chunk->size;
+ } while (++chunk < buf_pool->chunks + buf_pool->n_chunks);
buf_pool->instance_no = instance_no;
- buf_pool->old_pool_size = buf_pool_size;
- buf_pool->curr_size = chunk->size;
+ buf_pool->read_ahead_area =
+ ut_min(BUF_READ_AHEAD_PAGES,
+ ut_2_power_up(buf_pool->curr_size /
+ BUF_READ_AHEAD_PORTION));
buf_pool->curr_pool_size = buf_pool->curr_size * UNIV_PAGE_SIZE;
+ buf_pool->old_size = buf_pool->curr_size;
+ buf_pool->n_chunks_new = buf_pool->n_chunks;
+
/* Number of locks protecting page_hash must be a
power of two */
srv_n_page_hash_locks = static_cast<ulong>(
- ut_2_power_up(srv_n_page_hash_locks));
+ ut_2_power_up(srv_n_page_hash_locks));
ut_a(srv_n_page_hash_locks != 0);
ut_a(srv_n_page_hash_locks <= MAX_PAGE_HASH_LOCKS);
- buf_pool->page_hash = ib_create(2 * buf_pool->curr_size,
- srv_n_page_hash_locks,
- MEM_HEAP_FOR_PAGE_HASH,
- SYNC_BUF_PAGE_HASH);
+ buf_pool->page_hash = ib_create(
+ 2 * buf_pool->curr_size,
+ LATCH_ID_HASH_TABLE_RW_LOCK,
+ srv_n_page_hash_locks, MEM_HEAP_FOR_PAGE_HASH);
+
+ buf_pool->page_hash_old = NULL;
buf_pool->zip_hash = hash_create(2 * buf_pool->curr_size);
@@ -1690,17 +1914,20 @@ buf_pool_init_instance(
/* 2. Initialize flushing fields
-------------------------------- */
- mutex_create(flush_list_mutex_key, &buf_pool->flush_list_mutex,
- SYNC_BUF_FLUSH_LIST);
+ mutex_create(LATCH_ID_FLUSH_LIST, &buf_pool->flush_list_mutex);
for (i = BUF_FLUSH_LRU; i < BUF_FLUSH_N_TYPES; i++) {
- buf_pool->no_flush[i] = os_event_create();
+ buf_pool->no_flush[i] = os_event_create(0);
}
- buf_pool->watch = (buf_page_t*) mem_zalloc(
+ buf_pool->watch = (buf_page_t*) ut_zalloc_nokey(
sizeof(*buf_pool->watch) * BUF_POOL_WATCH_SIZE);
+ for (i = 0; i < BUF_POOL_WATCH_SIZE; i++) {
+ buf_pool->watch[i].buf_pool_index
+ = unsigned(buf_pool->instance_no);
+ }
- /* All fields are initialized by mem_zalloc(). */
+ /* All fields are initialized by ut_zalloc_nokey(). */
buf_pool->try_LRU_scan = TRUE;
@@ -1718,10 +1945,12 @@ buf_pool_init_instance(
new(&buf_pool->single_scan_itr) LRUItr(buf_pool, &buf_pool->mutex);
/* Initialize the temporal memory array and slots */
- buf_pool->tmp_arr = (buf_tmp_array_t *)mem_zalloc(sizeof(buf_tmp_array_t));
+ buf_pool->tmp_arr = (buf_tmp_array_t *)ut_malloc_nokey(sizeof(buf_tmp_array_t));
+ memset(buf_pool->tmp_arr, 0, sizeof(buf_tmp_array_t));
ulint n_slots = (srv_n_read_io_threads + srv_n_write_io_threads) * (8 * OS_AIO_N_PENDING_IOS_PER_THREAD);
buf_pool->tmp_arr->n_slots = n_slots;
- buf_pool->tmp_arr->slots = (buf_tmp_buffer_t*)mem_zalloc(sizeof(buf_tmp_buffer_t) * n_slots);
+ buf_pool->tmp_arr->slots = (buf_tmp_buffer_t*)ut_malloc_nokey(sizeof(buf_tmp_buffer_t) * n_slots);
+ memset(buf_pool->tmp_arr->slots, 0, (sizeof(buf_tmp_buffer_t) * n_slots));
buf_pool_mutex_exit(buf_pool);
@@ -1743,11 +1972,23 @@ buf_pool_free_instance(
buf_chunk_t* chunk;
buf_chunk_t* chunks;
buf_page_t* bpage;
+ buf_page_t* prev_bpage = 0;
- bpage = UT_LIST_GET_LAST(buf_pool->LRU);
- while (bpage != NULL) {
- buf_page_t* prev_bpage = UT_LIST_GET_PREV(LRU, bpage);
- enum buf_page_state state = buf_page_get_state(bpage);
+ mutex_free(&buf_pool->mutex);
+ mutex_free(&buf_pool->zip_mutex);
+ mutex_free(&buf_pool->flush_list_mutex);
+
+ if (buf_pool->flush_rbt) {
+ rbt_free(buf_pool->flush_rbt);
+ buf_pool->flush_rbt = NULL;
+ }
+
+ for (bpage = UT_LIST_GET_LAST(buf_pool->LRU);
+ bpage != NULL;
+ bpage = prev_bpage) {
+
+ prev_bpage = UT_LIST_GET_PREV(LRU, bpage);
+ buf_page_state state = buf_page_get_state(bpage);
ut_ad(buf_page_in_file(bpage));
ut_ad(bpage->in_LRU_list);
@@ -1759,21 +2000,33 @@ buf_pool_free_instance(
|| srv_fast_shutdown == 2);
buf_page_free_descriptor(bpage);
}
-
- bpage = prev_bpage;
}
- mem_free(buf_pool->watch);
+ ut_free(buf_pool->watch);
buf_pool->watch = NULL;
chunks = buf_pool->chunks;
chunk = chunks + buf_pool->n_chunks;
while (--chunk >= chunks) {
- os_mem_free_large(chunk->mem, chunk->mem_size);
+ buf_block_t* block = chunk->blocks;
+
+ for (ulint i = chunk->size; i--; block++) {
+ mutex_free(&block->mutex);
+ rw_lock_free(&block->lock);
+
+ ut_d(rw_lock_free(&block->debug_latch));
+ }
+
+ buf_pool->allocator.deallocate_large(
+ chunk->mem, &chunk->mem_pfx);
}
- mem_free(buf_pool->chunks);
+ for (ulint i = BUF_FLUSH_LRU; i < BUF_FLUSH_N_TYPES; ++i) {
+ os_event_destroy(buf_pool->no_flush[i]);
+ }
+
+ ut_free(buf_pool->chunks);
ha_clear(buf_pool->page_hash);
hash_table_free(buf_pool->page_hash);
hash_table_free(buf_pool->zip_hash);
@@ -1792,17 +2045,18 @@ buf_pool_free_instance(
slot->comp_buf = NULL;
}
}
+
+ ut_free(buf_pool->tmp_arr->slots);
+ ut_free(buf_pool->tmp_arr);
+ buf_pool->tmp_arr = NULL;
}
- mem_free(buf_pool->tmp_arr->slots);
- mem_free(buf_pool->tmp_arr);
- buf_pool->tmp_arr = NULL;
+ buf_pool->allocator.~ut_allocator();
}
/********************************************************************//**
Creates the buffer pool.
-@return DB_SUCCESS if success, DB_ERROR if not enough memory or error */
-UNIV_INTERN
+@return DB_SUCCESS if success, DB_ERROR if not enough memory or error */
dberr_t
buf_pool_init(
/*==========*/
@@ -1816,24 +2070,17 @@ buf_pool_init(
ut_ad(n_instances <= MAX_BUFFER_POOLS);
ut_ad(n_instances == srv_buf_pool_instances);
-#ifdef HAVE_LIBNUMA
- if (srv_numa_interleave) {
- ib_logf(IB_LOG_LEVEL_INFO,
- "Setting NUMA memory policy to MPOL_INTERLEAVE");
- if (set_mempolicy(MPOL_INTERLEAVE,
- numa_all_nodes_ptr->maskp,
- numa_all_nodes_ptr->size) != 0) {
- ib_logf(IB_LOG_LEVEL_WARN,
- "Failed to set NUMA memory policy to"
- " MPOL_INTERLEAVE (error: %s).",
- strerror(errno));
- }
- }
-#endif // HAVE_LIBNUMA
+ NUMA_MEMPOLICY_INTERLEAVE_IN_SCOPE;
- buf_pool_ptr = (buf_pool_t*) mem_zalloc(
+ buf_pool_resizing = false;
+ buf_pool_withdrawing = false;
+ buf_withdraw_clock = 0;
+
+ buf_pool_ptr = (buf_pool_t*) ut_zalloc_nokey(
n_instances * sizeof *buf_pool_ptr);
+ buf_chunk_map_reg = UT_NEW_NOKEY(buf_pool_chunk_map_t());
+
for (i = 0; i < n_instances; i++) {
buf_pool_t* ptr = &buf_pool_ptr[i];
@@ -1846,59 +2093,1088 @@ buf_pool_init(
}
}
+ buf_chunk_map_ref = buf_chunk_map_reg;
+
buf_pool_set_sizes();
buf_LRU_old_ratio_update(100 * 3/ 8, FALSE);
btr_search_sys_create(buf_pool_get_curr_size() / sizeof(void*) / 64);
-#ifdef HAVE_LIBNUMA
- if (srv_numa_interleave) {
- ib_logf(IB_LOG_LEVEL_INFO,
- "Setting NUMA memory policy to MPOL_DEFAULT");
- if (set_mempolicy(MPOL_DEFAULT, NULL, 0) != 0) {
- ib_logf(IB_LOG_LEVEL_WARN,
- "Failed to set NUMA memory policy to"
- " MPOL_DEFAULT (error: %s).", strerror(errno));
- }
- }
-#endif // HAVE_LIBNUMA
-
- buf_flush_event = os_event_create();
-
return(DB_SUCCESS);
}
/********************************************************************//**
Frees the buffer pool at shutdown. This must not be invoked before
freeing all mutexes. */
-UNIV_INTERN
void
buf_pool_free(
/*==========*/
ulint n_instances) /*!< in: numbere of instances to free */
{
- ulint i;
-
- for (i = 0; i < n_instances; i++) {
+ for (ulint i = 0; i < n_instances; i++) {
buf_pool_free_instance(buf_pool_from_array(i));
}
- mem_free(buf_pool_ptr);
+ UT_DELETE(buf_chunk_map_reg);
+ buf_chunk_map_reg = buf_chunk_map_ref = NULL;
+
+ ut_free(buf_pool_ptr);
buf_pool_ptr = NULL;
}
-/********************************************************************//**
-Clears the adaptive hash index on all pages in the buffer pool. */
-UNIV_INTERN
+/** Reallocate a control block.
+@param[in] buf_pool buffer pool instance
+@param[in] block pointer to control block
+@retval false if failed because of no free blocks. */
+static
+bool
+buf_page_realloc(
+ buf_pool_t* buf_pool,
+ buf_block_t* block)
+{
+ buf_block_t* new_block;
+
+ ut_ad(buf_pool_withdrawing);
+ ut_ad(buf_pool_mutex_own(buf_pool));
+ ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
+
+ new_block = buf_LRU_get_free_only(buf_pool);
+
+ if (new_block == NULL) {
+ return(false); /* free_list was not enough */
+ }
+
+ rw_lock_t* hash_lock = buf_page_hash_lock_get(buf_pool, block->page.id);
+
+ rw_lock_x_lock(hash_lock);
+ mutex_enter(&block->mutex);
+
+ if (buf_page_can_relocate(&block->page)) {
+ mutex_enter(&new_block->mutex);
+
+ memcpy(new_block->frame, block->frame, srv_page_size);
+ new (&new_block->page) buf_page_t(block->page);
+
+ /* relocate LRU list */
+ ut_ad(block->page.in_LRU_list);
+ ut_ad(!block->page.in_zip_hash);
+ ut_d(block->page.in_LRU_list = FALSE);
+
+ buf_LRU_adjust_hp(buf_pool, &block->page);
+
+ buf_page_t* prev_b = UT_LIST_GET_PREV(LRU, &block->page);
+ UT_LIST_REMOVE(buf_pool->LRU, &block->page);
+
+ if (prev_b != NULL) {
+ UT_LIST_INSERT_AFTER(buf_pool->LRU, prev_b, &new_block->page);
+ } else {
+ UT_LIST_ADD_FIRST(buf_pool->LRU, &new_block->page);
+ }
+
+ if (buf_pool->LRU_old == &block->page) {
+ buf_pool->LRU_old = &new_block->page;
+ }
+
+ ut_ad(new_block->page.in_LRU_list);
+
+ /* relocate unzip_LRU list */
+ if (block->page.zip.data != NULL) {
+ ut_ad(block->in_unzip_LRU_list);
+ ut_d(new_block->in_unzip_LRU_list = TRUE);
+ UNIV_MEM_DESC(&new_block->page.zip.data,
+ page_zip_get_size(&new_block->page.zip));
+
+ buf_block_t* prev_block = UT_LIST_GET_PREV(unzip_LRU, block);
+ UT_LIST_REMOVE(buf_pool->unzip_LRU, block);
+
+ ut_d(block->in_unzip_LRU_list = FALSE);
+ block->page.zip.data = NULL;
+ page_zip_set_size(&block->page.zip, 0);
+
+ if (prev_block != NULL) {
+ UT_LIST_INSERT_AFTER(buf_pool->unzip_LRU, prev_block, new_block);
+ } else {
+ UT_LIST_ADD_FIRST(buf_pool->unzip_LRU, new_block);
+ }
+ } else {
+ ut_ad(!block->in_unzip_LRU_list);
+ ut_d(new_block->in_unzip_LRU_list = FALSE);
+ }
+
+ /* relocate buf_pool->page_hash */
+ ut_ad(block->page.in_page_hash);
+ ut_ad(&block->page == buf_page_hash_get_low(buf_pool,
+ block->page.id));
+ ut_d(block->page.in_page_hash = FALSE);
+ ulint fold = block->page.id.fold();
+ ut_ad(fold == new_block->page.id.fold());
+ HASH_REPLACE(buf_page_t, hash, buf_pool->page_hash, fold,
+ &block->page, &new_block->page);
+
+ ut_ad(new_block->page.in_page_hash);
+
+ buf_block_modify_clock_inc(block);
+ memset(block->frame + FIL_PAGE_OFFSET, 0xff, 4);
+ memset(block->frame + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID, 0xff, 4);
+ UNIV_MEM_INVALID(block->frame, UNIV_PAGE_SIZE);
+ buf_block_set_state(block, BUF_BLOCK_REMOVE_HASH);
+ block->page.id
+ = page_id_t(ULINT32_UNDEFINED, ULINT32_UNDEFINED);
+
+ /* Relocate buf_pool->flush_list. */
+ if (block->page.oldest_modification) {
+ buf_flush_relocate_on_flush_list(
+ &block->page, &new_block->page);
+ }
+
+ /* set other flags of buf_block_t */
+
+#ifdef BTR_CUR_HASH_ADAPT
+ /* This code should only be executed by buf_pool_resize(),
+ while the adaptive hash index is disabled. */
+ assert_block_ahi_empty(block);
+ assert_block_ahi_empty_on_init(new_block);
+ ut_ad(!block->index);
+ new_block->index = NULL;
+ new_block->n_hash_helps = 0;
+ new_block->n_fields = 1;
+ new_block->left_side = TRUE;
+#endif /* BTR_CUR_HASH_ADAPT */
+
+ new_block->lock_hash_val = block->lock_hash_val;
+ ut_ad(new_block->lock_hash_val == lock_rec_hash(
+ new_block->page.id.space(),
+ new_block->page.id.page_no()));
+
+ rw_lock_x_unlock(hash_lock);
+ mutex_exit(&new_block->mutex);
+
+ /* free block */
+ buf_block_set_state(block, BUF_BLOCK_MEMORY);
+ buf_LRU_block_free_non_file_page(block);
+
+ mutex_exit(&block->mutex);
+ } else {
+ rw_lock_x_unlock(hash_lock);
+ mutex_exit(&block->mutex);
+
+ /* free new_block */
+ mutex_enter(&new_block->mutex);
+ buf_LRU_block_free_non_file_page(new_block);
+ mutex_exit(&new_block->mutex);
+ }
+
+ return(true); /* free_list was enough */
+}
+
+/** Sets the global variable that feeds MySQL's innodb_buffer_pool_resize_status
+to the specified string. The format and the following parameters are the
+same as the ones used for printf(3).
+@param[in] fmt format
+@param[in] ... extra parameters according to fmt */
+static
+void
+buf_resize_status(
+ const char* fmt,
+ ...)
+{
+ va_list ap;
+
+ va_start(ap, fmt);
+
+ vsnprintf(
+ export_vars.innodb_buffer_pool_resize_status,
+ sizeof(export_vars.innodb_buffer_pool_resize_status),
+ fmt, ap);
+
+ va_end(ap);
+
+ ib::info() << export_vars.innodb_buffer_pool_resize_status;
+}
+
+/** Determines if a block is intended to be withdrawn.
+@param[in] buf_pool buffer pool instance
+@param[in] block pointer to control block
+@retval true if will be withdrawn */
+bool
+buf_block_will_withdrawn(
+ buf_pool_t* buf_pool,
+ const buf_block_t* block)
+{
+ ut_ad(buf_pool->curr_size < buf_pool->old_size);
+ ut_ad(!buf_pool_resizing || buf_pool_mutex_own(buf_pool));
+
+ const buf_chunk_t* chunk
+ = buf_pool->chunks + buf_pool->n_chunks_new;
+ const buf_chunk_t* echunk
+ = buf_pool->chunks + buf_pool->n_chunks;
+
+ while (chunk < echunk) {
+ if (block >= chunk->blocks
+ && block < chunk->blocks + chunk->size) {
+ return(true);
+ }
+ ++chunk;
+ }
+
+ return(false);
+}
+
+/** Determines if a frame is intended to be withdrawn.
+@param[in] buf_pool buffer pool instance
+@param[in] ptr pointer to a frame
+@retval true if will be withdrawn */
+bool
+buf_frame_will_withdrawn(
+ buf_pool_t* buf_pool,
+ const byte* ptr)
+{
+ ut_ad(buf_pool->curr_size < buf_pool->old_size);
+ ut_ad(!buf_pool_resizing || buf_pool_mutex_own(buf_pool));
+
+ const buf_chunk_t* chunk
+ = buf_pool->chunks + buf_pool->n_chunks_new;
+ const buf_chunk_t* echunk
+ = buf_pool->chunks + buf_pool->n_chunks;
+
+ while (chunk < echunk) {
+ if (ptr >= chunk->blocks->frame
+ && ptr < (chunk->blocks + chunk->size - 1)->frame
+ + UNIV_PAGE_SIZE) {
+ return(true);
+ }
+ ++chunk;
+ }
+
+ return(false);
+}
+
+/** Withdraw the buffer pool blocks from end of the buffer pool instance
+until withdrawn by buf_pool->withdraw_target.
+@param[in] buf_pool buffer pool instance
+@retval true if retry is needed */
+static
+bool
+buf_pool_withdraw_blocks(
+ buf_pool_t* buf_pool)
+{
+ buf_block_t* block;
+ ulint loop_count = 0;
+ ulint i = buf_pool_index(buf_pool);
+
+ ib::info() << "buffer pool " << i
+ << " : start to withdraw the last "
+ << buf_pool->withdraw_target << " blocks.";
+
+ /* Minimize buf_pool->zip_free[i] lists */
+ buf_pool_mutex_enter(buf_pool);
+ buf_buddy_condense_free(buf_pool);
+ buf_pool_mutex_exit(buf_pool);
+
+ while (UT_LIST_GET_LEN(buf_pool->withdraw)
+ < buf_pool->withdraw_target) {
+
+ /* try to withdraw from free_list */
+ ulint count1 = 0;
+
+ buf_pool_mutex_enter(buf_pool);
+ block = reinterpret_cast<buf_block_t*>(
+ UT_LIST_GET_FIRST(buf_pool->free));
+ while (block != NULL
+ && UT_LIST_GET_LEN(buf_pool->withdraw)
+ < buf_pool->withdraw_target) {
+ ut_ad(block->page.in_free_list);
+ ut_ad(!block->page.in_flush_list);
+ ut_ad(!block->page.in_LRU_list);
+ ut_a(!buf_page_in_file(&block->page));
+
+ buf_block_t* next_block;
+ next_block = reinterpret_cast<buf_block_t*>(
+ UT_LIST_GET_NEXT(
+ list, &block->page));
+
+ if (buf_block_will_withdrawn(buf_pool, block)) {
+ /* This should be withdrawn */
+ UT_LIST_REMOVE(
+ buf_pool->free,
+ &block->page);
+ UT_LIST_ADD_LAST(
+ buf_pool->withdraw,
+ &block->page);
+ ut_d(block->in_withdraw_list = TRUE);
+ count1++;
+ }
+
+ block = next_block;
+ }
+ buf_pool_mutex_exit(buf_pool);
+
+ /* reserve free_list length */
+ if (UT_LIST_GET_LEN(buf_pool->withdraw)
+ < buf_pool->withdraw_target) {
+ ulint scan_depth;
+ flush_counters_t n;
+
+ /* cap scan_depth with current LRU size. */
+ buf_pool_mutex_enter(buf_pool);
+ scan_depth = UT_LIST_GET_LEN(buf_pool->LRU);
+ buf_pool_mutex_exit(buf_pool);
+
+ scan_depth = ut_min(
+ ut_max(buf_pool->withdraw_target
+ - UT_LIST_GET_LEN(buf_pool->withdraw),
+ static_cast<ulint>(srv_LRU_scan_depth)),
+ scan_depth);
+
+ buf_flush_do_batch(buf_pool, BUF_FLUSH_LRU,
+ scan_depth, 0, &n);
+ buf_flush_wait_batch_end(buf_pool, BUF_FLUSH_LRU);
+
+ if (n.flushed) {
+ MONITOR_INC_VALUE_CUMULATIVE(
+ MONITOR_LRU_BATCH_FLUSH_TOTAL_PAGE,
+ MONITOR_LRU_BATCH_FLUSH_COUNT,
+ MONITOR_LRU_BATCH_FLUSH_PAGES,
+ n.flushed);
+ }
+ }
+
+ /* relocate blocks/buddies in withdrawn area */
+ ulint count2 = 0;
+
+ buf_pool_mutex_enter(buf_pool);
+ buf_page_t* bpage;
+ bpage = UT_LIST_GET_FIRST(buf_pool->LRU);
+ while (bpage != NULL) {
+ BPageMutex* block_mutex;
+ buf_page_t* next_bpage;
+
+ block_mutex = buf_page_get_mutex(bpage);
+ mutex_enter(block_mutex);
+
+ next_bpage = UT_LIST_GET_NEXT(LRU, bpage);
+
+ if (bpage->zip.data != NULL
+ && buf_frame_will_withdrawn(
+ buf_pool,
+ static_cast<byte*>(bpage->zip.data))) {
+
+ if (buf_page_can_relocate(bpage)) {
+ mutex_exit(block_mutex);
+ buf_pool_mutex_exit_forbid(buf_pool);
+ if(!buf_buddy_realloc(
+ buf_pool, bpage->zip.data,
+ page_zip_get_size(
+ &bpage->zip))) {
+
+ /* failed to allocate block */
+ buf_pool_mutex_exit_allow(
+ buf_pool);
+ break;
+ }
+ buf_pool_mutex_exit_allow(buf_pool);
+ mutex_enter(block_mutex);
+ count2++;
+ }
+ /* NOTE: if the page is in use,
+ not reallocated yet */
+ }
+
+ if (buf_page_get_state(bpage)
+ == BUF_BLOCK_FILE_PAGE
+ && buf_block_will_withdrawn(
+ buf_pool,
+ reinterpret_cast<buf_block_t*>(bpage))) {
+
+ if (buf_page_can_relocate(bpage)) {
+ mutex_exit(block_mutex);
+ buf_pool_mutex_exit_forbid(buf_pool);
+ if(!buf_page_realloc(
+ buf_pool,
+ reinterpret_cast<buf_block_t*>(
+ bpage))) {
+ /* failed to allocate block */
+ buf_pool_mutex_exit_allow(
+ buf_pool);
+ break;
+ }
+ buf_pool_mutex_exit_allow(buf_pool);
+ count2++;
+ } else {
+ mutex_exit(block_mutex);
+ }
+ /* NOTE: if the page is in use,
+ not reallocated yet */
+ } else {
+ mutex_exit(block_mutex);
+ }
+
+ bpage = next_bpage;
+ }
+ buf_pool_mutex_exit(buf_pool);
+
+ buf_resize_status(
+ "buffer pool %lu : withdrawing blocks. (%lu/%lu)",
+ i, UT_LIST_GET_LEN(buf_pool->withdraw),
+ buf_pool->withdraw_target);
+
+ ib::info() << "buffer pool " << i << " : withdrew "
+ << count1 << " blocks from free list."
+ << " Tried to relocate " << count2 << " pages ("
+ << UT_LIST_GET_LEN(buf_pool->withdraw) << "/"
+ << buf_pool->withdraw_target << ").";
+
+ if (++loop_count >= 10) {
+ /* give up for now.
+ retried after user threads paused. */
+
+ ib::info() << "buffer pool " << i
+ << " : will retry to withdraw later.";
+
+ /* need retry later */
+ return(true);
+ }
+ }
+
+ /* confirm withdrawn enough */
+ const buf_chunk_t* chunk
+ = buf_pool->chunks + buf_pool->n_chunks_new;
+ const buf_chunk_t* echunk
+ = buf_pool->chunks + buf_pool->n_chunks;
+
+ while (chunk < echunk) {
+ block = chunk->blocks;
+ for (ulint j = chunk->size; j--; block++) {
+ /* If !=BUF_BLOCK_NOT_USED block in the
+ withdrawn area, it means corruption
+ something */
+ ut_a(buf_block_get_state(block)
+ == BUF_BLOCK_NOT_USED);
+ ut_ad(block->in_withdraw_list);
+ }
+ ++chunk;
+ }
+
+ ib::info() << "buffer pool " << i << " : withdrawn target "
+ << UT_LIST_GET_LEN(buf_pool->withdraw) << " blocks.";
+
+ /* retry is not needed */
+ ++buf_withdraw_clock;
+
+ return(false);
+}
+
+/** resize page_hash and zip_hash for a buffer pool instance.
+@param[in] buf_pool buffer pool instance */
+static
+void
+buf_pool_resize_hash(
+ buf_pool_t* buf_pool)
+{
+ hash_table_t* new_hash_table;
+
+ ut_ad(buf_pool->page_hash_old == NULL);
+
+ /* recreate page_hash */
+ new_hash_table = ib_recreate(
+ buf_pool->page_hash, 2 * buf_pool->curr_size);
+
+ for (ulint i = 0; i < hash_get_n_cells(buf_pool->page_hash); i++) {
+ buf_page_t* bpage;
+
+ bpage = static_cast<buf_page_t*>(
+ HASH_GET_FIRST(
+ buf_pool->page_hash, i));
+
+ while (bpage) {
+ buf_page_t* prev_bpage = bpage;
+ ulint fold;
+
+ bpage = static_cast<buf_page_t*>(
+ HASH_GET_NEXT(
+ hash, prev_bpage));
+
+ fold = prev_bpage->id.fold();
+
+ HASH_DELETE(buf_page_t, hash,
+ buf_pool->page_hash, fold,
+ prev_bpage);
+
+ HASH_INSERT(buf_page_t, hash,
+ new_hash_table, fold,
+ prev_bpage);
+ }
+ }
+
+ buf_pool->page_hash_old = buf_pool->page_hash;
+ buf_pool->page_hash = new_hash_table;
+
+ /* recreate zip_hash */
+ new_hash_table = hash_create(2 * buf_pool->curr_size);
+
+ for (ulint i = 0; i < hash_get_n_cells(buf_pool->zip_hash); i++) {
+ buf_page_t* bpage;
+
+ bpage = static_cast<buf_page_t*>(
+ HASH_GET_FIRST(buf_pool->zip_hash, i));
+
+ while (bpage) {
+ buf_page_t* prev_bpage = bpage;
+ ulint fold;
+
+ bpage = static_cast<buf_page_t*>(
+ HASH_GET_NEXT(
+ hash, prev_bpage));
+
+ fold = BUF_POOL_ZIP_FOLD(
+ reinterpret_cast<buf_block_t*>(
+ prev_bpage));
+
+ HASH_DELETE(buf_page_t, hash,
+ buf_pool->zip_hash, fold,
+ prev_bpage);
+
+ HASH_INSERT(buf_page_t, hash,
+ new_hash_table, fold,
+ prev_bpage);
+ }
+ }
+
+ hash_table_free(buf_pool->zip_hash);
+ buf_pool->zip_hash = new_hash_table;
+}
+
+/** Resize the buffer pool based on srv_buf_pool_size from
+srv_buf_pool_old_size. */
+static
void
-buf_pool_clear_hash_index(void)
-/*===========================*/
+buf_pool_resize()
+{
+ buf_pool_t* buf_pool;
+ ulint new_instance_size;
+ bool warning = false;
+
+ NUMA_MEMPOLICY_INTERLEAVE_IN_SCOPE;
+
+ ut_ad(!buf_pool_resizing);
+ ut_ad(!buf_pool_withdrawing);
+ ut_ad(srv_buf_pool_chunk_unit > 0);
+
+ new_instance_size = srv_buf_pool_size / srv_buf_pool_instances;
+ new_instance_size /= UNIV_PAGE_SIZE;
+
+ buf_resize_status("Resizing buffer pool from " ULINTPF " to "
+ ULINTPF " (unit=" ULINTPF ").",
+ srv_buf_pool_old_size, srv_buf_pool_size,
+ srv_buf_pool_chunk_unit);
+
+ /* set new limit for all buffer pool for resizing */
+ for (ulint i = 0; i < srv_buf_pool_instances; i++) {
+ buf_pool = buf_pool_from_array(i);
+ buf_pool_mutex_enter(buf_pool);
+
+ ut_ad(buf_pool->curr_size == buf_pool->old_size);
+ ut_ad(buf_pool->n_chunks_new == buf_pool->n_chunks);
+ ut_ad(UT_LIST_GET_LEN(buf_pool->withdraw) == 0);
+ ut_ad(buf_pool->flush_rbt == NULL);
+
+ buf_pool->curr_size = new_instance_size;
+
+ buf_pool->n_chunks_new = new_instance_size * UNIV_PAGE_SIZE
+ / srv_buf_pool_chunk_unit;
+
+ buf_pool_mutex_exit(buf_pool);
+ }
+#ifdef BTR_CUR_HASH_ADAPT
+ /* disable AHI if needed */
+ bool btr_search_disabled = false;
+
+ buf_resize_status("Disabling adaptive hash index.");
+
+ btr_search_s_lock_all();
+ if (btr_search_enabled) {
+ btr_search_s_unlock_all();
+ btr_search_disabled = true;
+ } else {
+ btr_search_s_unlock_all();
+ }
+
+ btr_search_disable(true);
+
+ if (btr_search_disabled) {
+ ib::info() << "disabled adaptive hash index.";
+ }
+#endif /* BTR_CUR_HASH_ADAPT */
+
+ /* set withdraw target */
+ for (ulint i = 0; i < srv_buf_pool_instances; i++) {
+ buf_pool = buf_pool_from_array(i);
+ if (buf_pool->curr_size < buf_pool->old_size) {
+ ulint withdraw_target = 0;
+
+ const buf_chunk_t* chunk
+ = buf_pool->chunks + buf_pool->n_chunks_new;
+ const buf_chunk_t* echunk
+ = buf_pool->chunks + buf_pool->n_chunks;
+
+ while (chunk < echunk) {
+ withdraw_target += chunk->size;
+ ++chunk;
+ }
+
+ ut_ad(buf_pool->withdraw_target == 0);
+ buf_pool->withdraw_target = withdraw_target;
+ buf_pool_withdrawing = true;
+ }
+ }
+
+ buf_resize_status("Withdrawing blocks to be shrunken.");
+
+ time_t withdraw_started = time(NULL);
+ ulint message_interval = 60;
+ ulint retry_interval = 1;
+
+withdraw_retry:
+ bool should_retry_withdraw = false;
+
+ /* wait for the number of blocks fit to the new size (if needed)*/
+ for (ulint i = 0; i < srv_buf_pool_instances; i++) {
+ buf_pool = buf_pool_from_array(i);
+ if (buf_pool->curr_size < buf_pool->old_size) {
+
+ should_retry_withdraw |=
+ buf_pool_withdraw_blocks(buf_pool);
+ }
+ }
+
+ if (srv_shutdown_state != SRV_SHUTDOWN_NONE) {
+ /* abort to resize for shutdown. */
+ buf_pool_withdrawing = false;
+ return;
+ }
+
+ /* abort buffer pool load */
+ buf_load_abort();
+
+ const time_t current_time = time(NULL);
+
+ if (should_retry_withdraw
+ && difftime(current_time, withdraw_started) >= message_interval) {
+
+ if (message_interval > 900) {
+ message_interval = 1800;
+ } else {
+ message_interval *= 2;
+ }
+
+ lock_mutex_enter();
+ trx_sys_mutex_enter();
+ bool found = false;
+ for (trx_t* trx = UT_LIST_GET_FIRST(trx_sys->mysql_trx_list);
+ trx != NULL;
+ trx = UT_LIST_GET_NEXT(mysql_trx_list, trx)) {
+ if (trx->state != TRX_STATE_NOT_STARTED
+ && trx->mysql_thd != NULL
+ && withdraw_started > trx->start_time) {
+ if (!found) {
+ ib::warn() <<
+ "The following trx might hold"
+ " the blocks in buffer pool to"
+ " be withdrawn. Buffer pool"
+ " resizing can complete only"
+ " after all the transactions"
+ " below release the blocks.";
+ found = true;
+ }
+
+ lock_trx_print_wait_and_mvcc_state(
+ stderr, trx, current_time);
+ }
+ }
+ trx_sys_mutex_exit();
+ lock_mutex_exit();
+
+ withdraw_started = current_time;
+ }
+
+ if (should_retry_withdraw) {
+ ib::info() << "Will retry to withdraw " << retry_interval
+ << " seconds later.";
+ os_thread_sleep(retry_interval * 1000000);
+
+ if (retry_interval > 5) {
+ retry_interval = 10;
+ } else {
+ retry_interval *= 2;
+ }
+
+ goto withdraw_retry;
+ }
+
+ buf_pool_withdrawing = false;
+
+ buf_resize_status("Latching whole of buffer pool.");
+
+#ifndef DBUG_OFF
+ {
+ bool should_wait = true;
+
+ while (should_wait) {
+ should_wait = false;
+ DBUG_EXECUTE_IF(
+ "ib_buf_pool_resize_wait_before_resize",
+ should_wait = true; os_thread_sleep(10000););
+ }
+ }
+#endif /* !DBUG_OFF */
+
+ if (srv_shutdown_state != SRV_SHUTDOWN_NONE) {
+ return;
+ }
+
+ /* Indicate critical path */
+ buf_pool_resizing = true;
+
+ /* Acquire all buf_pool_mutex/hash_lock */
+ for (ulint i = 0; i < srv_buf_pool_instances; ++i) {
+ buf_pool_t* buf_pool = buf_pool_from_array(i);
+
+ buf_pool_mutex_enter(buf_pool);
+ }
+ for (ulint i = 0; i < srv_buf_pool_instances; ++i) {
+ buf_pool_t* buf_pool = buf_pool_from_array(i);
+
+ hash_lock_x_all(buf_pool->page_hash);
+ }
+
+ buf_chunk_map_reg = UT_NEW_NOKEY(buf_pool_chunk_map_t());
+
+ /* add/delete chunks */
+ for (ulint i = 0; i < srv_buf_pool_instances; ++i) {
+ buf_pool_t* buf_pool = buf_pool_from_array(i);
+ buf_chunk_t* chunk;
+ buf_chunk_t* echunk;
+
+ buf_resize_status("buffer pool %lu :"
+ " resizing with chunks %lu to %lu.",
+ i, buf_pool->n_chunks, buf_pool->n_chunks_new);
+
+ if (buf_pool->n_chunks_new < buf_pool->n_chunks) {
+ /* delete chunks */
+ chunk = buf_pool->chunks
+ + buf_pool->n_chunks_new;
+ echunk = buf_pool->chunks + buf_pool->n_chunks;
+
+ ulint sum_freed = 0;
+
+ while (chunk < echunk) {
+ buf_block_t* block = chunk->blocks;
+
+ for (ulint j = chunk->size;
+ j--; block++) {
+ mutex_free(&block->mutex);
+ rw_lock_free(&block->lock);
+
+ ut_d(rw_lock_free(
+ &block->debug_latch));
+ }
+
+ buf_pool->allocator.deallocate_large(
+ chunk->mem, &chunk->mem_pfx);
+
+ sum_freed += chunk->size;
+
+ ++chunk;
+ }
+
+ /* discard withdraw list */
+ UT_LIST_INIT(buf_pool->withdraw,
+ &buf_page_t::list);
+ buf_pool->withdraw_target = 0;
+
+ ib::info() << "buffer pool " << i << " : "
+ << buf_pool->n_chunks - buf_pool->n_chunks_new
+ << " chunks (" << sum_freed
+ << " blocks) were freed.";
+
+ buf_pool->n_chunks = buf_pool->n_chunks_new;
+ }
+
+ {
+ /* reallocate buf_pool->chunks */
+ const ulint new_chunks_size
+ = buf_pool->n_chunks_new * sizeof(*chunk);
+
+ buf_chunk_t* new_chunks
+ = reinterpret_cast<buf_chunk_t*>(
+ ut_zalloc_nokey_nofatal(new_chunks_size));
+
+ DBUG_EXECUTE_IF("buf_pool_resize_chunk_null",
+ ut_free(new_chunks);
+ new_chunks = NULL;);
+
+ if (new_chunks == NULL) {
+ ib::error() << "buffer pool " << i
+ << " : failed to allocate"
+ " the chunk array.";
+ buf_pool->n_chunks_new
+ = buf_pool->n_chunks;
+ warning = true;
+ buf_pool->chunks_old = NULL;
+ for (ulint j = 0; j < buf_pool->n_chunks_new; j++) {
+ buf_pool_register_chunk(&(buf_pool->chunks[j]));
+ }
+ goto calc_buf_pool_size;
+ }
+
+ ulint n_chunks_copy = ut_min(buf_pool->n_chunks_new,
+ buf_pool->n_chunks);
+
+ memcpy(new_chunks, buf_pool->chunks,
+ n_chunks_copy * sizeof(*chunk));
+
+ for (ulint j = 0; j < n_chunks_copy; j++) {
+ buf_pool_register_chunk(&new_chunks[j]);
+ }
+
+ buf_pool->chunks_old = buf_pool->chunks;
+ buf_pool->chunks = new_chunks;
+ }
+
+
+ if (buf_pool->n_chunks_new > buf_pool->n_chunks) {
+ /* add chunks */
+ chunk = buf_pool->chunks + buf_pool->n_chunks;
+ echunk = buf_pool->chunks
+ + buf_pool->n_chunks_new;
+
+ ulint sum_added = 0;
+ ulint n_chunks = buf_pool->n_chunks;
+
+ while (chunk < echunk) {
+ ulong unit = srv_buf_pool_chunk_unit;
+
+ if (!buf_chunk_init(buf_pool, chunk, unit)) {
+
+ ib::error() << "buffer pool " << i
+ << " : failed to allocate"
+ " new memory.";
+
+ warning = true;
+
+ buf_pool->n_chunks_new
+ = n_chunks;
+
+ break;
+ }
+
+ sum_added += chunk->size;
+
+ ++n_chunks;
+ ++chunk;
+ }
+
+ ib::info() << "buffer pool " << i << " : "
+ << buf_pool->n_chunks_new - buf_pool->n_chunks
+ << " chunks (" << sum_added
+ << " blocks) were added.";
+
+ buf_pool->n_chunks = n_chunks;
+ }
+calc_buf_pool_size:
+
+ /* recalc buf_pool->curr_size */
+ ulint new_size = 0;
+
+ chunk = buf_pool->chunks;
+ do {
+ new_size += chunk->size;
+ } while (++chunk < buf_pool->chunks
+ + buf_pool->n_chunks);
+
+ buf_pool->curr_size = new_size;
+ buf_pool->n_chunks_new = buf_pool->n_chunks;
+
+ if (buf_pool->chunks_old) {
+ ut_free(buf_pool->chunks_old);
+ buf_pool->chunks_old = NULL;
+ }
+ }
+
+ buf_pool_chunk_map_t* chunk_map_old = buf_chunk_map_ref;
+ buf_chunk_map_ref = buf_chunk_map_reg;
+
+ /* set instance sizes */
+ {
+ ulint curr_size = 0;
+
+ for (ulint i = 0; i < srv_buf_pool_instances; i++) {
+ buf_pool = buf_pool_from_array(i);
+
+ ut_ad(UT_LIST_GET_LEN(buf_pool->withdraw) == 0);
+
+ buf_pool->read_ahead_area =
+ ut_min(BUF_READ_AHEAD_PAGES,
+ ut_2_power_up(buf_pool->curr_size /
+ BUF_READ_AHEAD_PORTION));
+ buf_pool->curr_pool_size
+ = buf_pool->curr_size * UNIV_PAGE_SIZE;
+ curr_size += buf_pool->curr_pool_size;
+ buf_pool->old_size = buf_pool->curr_size;
+ }
+ srv_buf_pool_curr_size = curr_size;
+ innodb_set_buf_pool_size(buf_pool_size_align(curr_size));
+ }
+
+ const bool new_size_too_diff
+ = srv_buf_pool_base_size > srv_buf_pool_size * 2
+ || srv_buf_pool_base_size * 2 < srv_buf_pool_size;
+
+ /* Normalize page_hash and zip_hash,
+ if the new size is too different */
+ if (!warning && new_size_too_diff) {
+
+ buf_resize_status("Resizing hash tables.");
+
+ for (ulint i = 0; i < srv_buf_pool_instances; ++i) {
+ buf_pool_t* buf_pool = buf_pool_from_array(i);
+
+ buf_pool_resize_hash(buf_pool);
+
+ ib::info() << "buffer pool " << i
+ << " : hash tables were resized.";
+ }
+ }
+
+ /* Release all buf_pool_mutex/page_hash */
+ for (ulint i = 0; i < srv_buf_pool_instances; ++i) {
+ buf_pool_t* buf_pool = buf_pool_from_array(i);
+
+ hash_unlock_x_all(buf_pool->page_hash);
+ buf_pool_mutex_exit(buf_pool);
+
+ if (buf_pool->page_hash_old != NULL) {
+ hash_table_free(buf_pool->page_hash_old);
+ buf_pool->page_hash_old = NULL;
+ }
+ }
+
+ UT_DELETE(chunk_map_old);
+
+ buf_pool_resizing = false;
+
+ /* Normalize other components, if the new size is too different */
+ if (!warning && new_size_too_diff) {
+ srv_buf_pool_base_size = srv_buf_pool_size;
+
+ buf_resize_status("Resizing also other hash tables.");
+
+ /* normalize lock_sys */
+ srv_lock_table_size = 5 * (srv_buf_pool_size / UNIV_PAGE_SIZE);
+ lock_sys_resize(srv_lock_table_size);
+
+ /* normalize btr_search_sys */
+ btr_search_sys_resize(
+ buf_pool_get_curr_size() / sizeof(void*) / 64);
+
+ /* normalize dict_sys */
+ dict_resize();
+
+ ib::info() << "Resized hash tables at lock_sys,"
+#ifdef BTR_CUR_HASH_ADAPT
+ " adaptive hash index,"
+#endif /* BTR_CUR_HASH_ADAPT */
+ " dictionary.";
+ }
+
+ /* normalize ibuf->max_size */
+ ibuf_max_size_update(srv_change_buffer_max_size);
+
+ if (srv_buf_pool_old_size != srv_buf_pool_size) {
+
+ ib::info() << "Completed to resize buffer pool from "
+ << srv_buf_pool_old_size
+ << " to " << srv_buf_pool_size << ".";
+ srv_buf_pool_old_size = srv_buf_pool_size;
+ }
+
+#ifdef BTR_CUR_HASH_ADAPT
+ /* enable AHI if needed */
+ if (btr_search_disabled) {
+ btr_search_enable();
+ ib::info() << "Re-enabled adaptive hash index.";
+ }
+#endif /* BTR_CUR_HASH_ADAPT */
+
+ char now[32];
+
+ ut_sprintf_timestamp(now);
+ if (!warning) {
+ buf_resize_status("Completed resizing buffer pool at %s.",
+ now);
+ } else {
+ buf_resize_status("Resizing buffer pool failed,"
+ " finished resizing at %s.", now);
+ }
+
+#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
+ ut_a(buf_validate());
+#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
+
+ return;
+}
+
+/** This is the thread for resizing buffer pool. It waits for an event and
+when waked up either performs a resizing and sleeps again.
+@return this function does not return, calls os_thread_exit()
+*/
+extern "C"
+os_thread_ret_t
+DECLARE_THREAD(buf_resize_thread)(void*)
+{
+ my_thread_init();
+
+ while (srv_shutdown_state == SRV_SHUTDOWN_NONE) {
+ os_event_wait(srv_buf_resize_event);
+ os_event_reset(srv_buf_resize_event);
+
+ if (srv_shutdown_state != SRV_SHUTDOWN_NONE) {
+ break;
+ }
+
+ buf_pool_mutex_enter_all();
+ if (srv_buf_pool_old_size == srv_buf_pool_size) {
+ buf_pool_mutex_exit_all();
+ std::ostringstream sout;
+ sout << "Size did not change (old size = new size = "
+ << srv_buf_pool_size << ". Nothing to do.";
+ buf_resize_status(sout.str().c_str());
+
+ /* nothing to do */
+ continue;
+ }
+ buf_pool_mutex_exit_all();
+
+ buf_pool_resize();
+ }
+
+ srv_buf_resize_thread_active = false;
+
+ my_thread_end();
+ os_thread_exit();
+
+ OS_THREAD_DUMMY_RETURN;
+}
+
+#ifdef BTR_CUR_HASH_ADAPT
+/** Clear the adaptive hash index on all pages in the buffer pool. */
+void
+buf_pool_clear_hash_index()
{
ulint p;
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(rw_lock_own(&btr_search_latch, RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
+ ut_ad(btr_search_own_all(RW_LOCK_X));
+ ut_ad(!buf_pool_resizing);
ut_ad(!btr_search_enabled);
for (p = 0; p < srv_buf_pool_instances; p++) {
@@ -1912,30 +3188,49 @@ buf_pool_clear_hash_index(void)
for (; i--; block++) {
dict_index_t* index = block->index;
+ assert_block_ahi_valid(block);
/* We can set block->index = NULL
- when we have an x-latch on btr_search_latch;
- see the comment in buf0buf.h */
+ and block->n_pointers = 0
+ when btr_search_own_all(RW_LOCK_X);
+ see the comments in buf0buf.h */
if (!index) {
- /* Not hashed */
+# if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
+ ut_a(!block->n_pointers);
+# endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
continue;
}
- block->index = NULL;
+ ut_d(buf_page_state state
+ = buf_block_get_state(block));
+ /* Another thread may have set the
+ state to BUF_BLOCK_REMOVE_HASH in
+ buf_LRU_block_remove_hashed().
+
+ The state change in buf_page_realloc()
+ is not observable here, because in
+ that case we would have !block->index.
+
+ In the end, the entire adaptive hash
+ index will be removed. */
+ ut_ad(state == BUF_BLOCK_FILE_PAGE
+ || state == BUF_BLOCK_REMOVE_HASH);
# if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
block->n_pointers = 0;
# endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
+ block->index = NULL;
}
}
}
}
+#endif /* BTR_CUR_HASH_ADAPT */
/********************************************************************//**
Relocate a buffer control block. Relocates the block on the LRU list
and in buf_pool->page_hash. Does not relocate bpage->list.
The caller must take care of relocating bpage->list. */
-UNIV_INTERN
+static
void
buf_relocate(
/*=========*/
@@ -1945,11 +3240,8 @@ buf_relocate(
buf_page_t* dpage) /*!< in/out: destination control block */
{
buf_page_t* b;
- ulint fold;
buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
- fold = buf_page_address_fold(bpage->space, bpage->offset);
-
ut_ad(buf_pool_mutex_own(buf_pool));
ut_ad(buf_page_hash_lock_held_x(buf_pool, bpage));
ut_ad(mutex_own(buf_page_get_mutex(bpage)));
@@ -1958,10 +3250,7 @@ buf_relocate(
ut_ad(bpage->in_LRU_list);
ut_ad(!bpage->in_zip_hash);
ut_ad(bpage->in_page_hash);
- ut_ad(bpage == buf_page_hash_get_low(buf_pool,
- bpage->space,
- bpage->offset,
- fold));
+ ut_ad(bpage == buf_page_hash_get_low(buf_pool, bpage->id));
ut_ad(!buf_pool_watch_is_sentinel(buf_pool, bpage));
#ifdef UNIV_DEBUG
@@ -1990,12 +3279,12 @@ buf_relocate(
/* relocate buf_pool->LRU */
b = UT_LIST_GET_PREV(LRU, bpage);
- UT_LIST_REMOVE(LRU, buf_pool->LRU, bpage);
+ UT_LIST_REMOVE(buf_pool->LRU, bpage);
- if (b) {
- UT_LIST_INSERT_AFTER(LRU, buf_pool->LRU, b, dpage);
+ if (b != NULL) {
+ UT_LIST_INSERT_AFTER(buf_pool->LRU, b, dpage);
} else {
- UT_LIST_ADD_FIRST(LRU, buf_pool->LRU, dpage);
+ UT_LIST_ADD_FIRST(buf_pool->LRU, dpage);
}
if (UNIV_UNLIKELY(buf_pool->LRU_old == bpage)) {
@@ -2015,12 +3304,13 @@ buf_relocate(
#endif /* UNIV_LRU_DEBUG */
}
- ut_d(UT_LIST_VALIDATE(
- LRU, buf_page_t, buf_pool->LRU, CheckInLRUList()));
+ ut_d(CheckInLRUList::validate(buf_pool));
/* relocate buf_pool->page_hash */
- HASH_DELETE(buf_page_t, hash, buf_pool->page_hash, fold, bpage);
- HASH_INSERT(buf_page_t, hash, buf_pool->page_hash, fold, dpage);
+ ulint fold = bpage->id.fold();
+ ut_ad(fold == dpage->id.fold());
+ HASH_REPLACE(buf_page_t, hash, buf_pool->page_hash, fold, bpage,
+ dpage);
}
/** Hazard Pointer implementation. */
@@ -2101,15 +3391,14 @@ LRUItr::start()
return(m_hp);
}
-/********************************************************************//**
-Determine if a block is a sentinel for a buffer pool watch.
-@return TRUE if a sentinel for a buffer pool watch, FALSE if not */
-UNIV_INTERN
+/** Determine if a block is a sentinel for a buffer pool watch.
+@param[in] buf_pool buffer pool instance
+@param[in] bpage block
+@return TRUE if a sentinel for a buffer pool watch, FALSE if not */
ibool
buf_pool_watch_is_sentinel(
-/*=======================*/
- buf_pool_t* buf_pool, /*!< buffer pool instance */
- const buf_page_t* bpage) /*!< in: block */
+ const buf_pool_t* buf_pool,
+ const buf_page_t* bpage)
{
/* We must also own the appropriate hash lock. */
ut_ad(buf_page_hash_lock_held_s_or_x(buf_pool, bpage));
@@ -2128,35 +3417,30 @@ buf_pool_watch_is_sentinel(
ut_ad(!bpage->in_zip_hash);
ut_ad(bpage->in_page_hash);
ut_ad(bpage->zip.data == NULL);
- ut_ad(bpage->buf_fix_count > 0);
return(TRUE);
}
-/****************************************************************//**
-Add watch for the given page to be read in. Caller must have
+/** Add watch for the given page to be read in. Caller must have
appropriate hash_lock for the bpage. This function may release the
hash_lock and reacquire it.
+@param[in] page_id page id
+@param[in,out] hash_lock hash_lock currently latched
@return NULL if watch set, block if the page is in the buffer pool */
-UNIV_INTERN
+static
buf_page_t*
buf_pool_watch_set(
-/*===============*/
- ulint space, /*!< in: space id */
- ulint offset, /*!< in: page number */
- ulint fold) /*!< in: buf_page_address_fold(space, offset) */
+ const page_id_t page_id,
+ rw_lock_t** hash_lock)
{
buf_page_t* bpage;
ulint i;
- buf_pool_t* buf_pool = buf_pool_get(space, offset);
- rw_lock_t* hash_lock;
+ buf_pool_t* buf_pool = buf_pool_get(page_id);
- hash_lock = buf_page_hash_lock_get(buf_pool, fold);
+ ut_ad(*hash_lock == buf_page_hash_lock_get(buf_pool, page_id));
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(rw_lock_own(hash_lock, RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
+ ut_ad(rw_lock_own(*hash_lock, RW_LOCK_X));
- bpage = buf_page_hash_get_low(buf_pool, space, offset, fold);
+ bpage = buf_page_hash_get_low(buf_pool, page_id);
if (bpage != NULL) {
page_found:
@@ -2166,11 +3450,7 @@ page_found:
}
/* Add to an existing watch. */
-#ifdef PAGE_ATOMIC_REF_COUNT
- os_atomic_increment_uint32(&bpage->buf_fix_count, 1);
-#else
- ++bpage->buf_fix_count;
-#endif /* PAGE_ATOMIC_REF_COUNT */
+ buf_block_fix(bpage);
return(NULL);
}
@@ -2182,28 +3462,25 @@ page_found:
buf_pool->watch[]. However, it is not in the critical code path
as this function will be called only by the purge thread. */
-/* Enable this for checksum error messages. Currently on by
-default on UNIV_DEBUG for encryption bugs. */
-#ifdef UNIV_DEBUG
-#define UNIV_DEBUG_LEVEL2 1
-#endif
-
/* To obey latching order first release the hash_lock. */
- rw_lock_x_unlock(hash_lock);
+ rw_lock_x_unlock(*hash_lock);
buf_pool_mutex_enter(buf_pool);
hash_lock_x_all(buf_pool->page_hash);
+ /* If not own buf_pool_mutex, page_hash can be changed. */
+ *hash_lock = buf_page_hash_lock_get(buf_pool, page_id);
+
/* We have to recheck that the page
was not loaded or a watch set by some other
purge thread. This is because of the small
time window between when we release the
hash_lock to acquire buf_pool mutex above. */
- bpage = buf_page_hash_get_low(buf_pool, space, offset, fold);
+ bpage = buf_page_hash_get_low(buf_pool, page_id);
if (UNIV_LIKELY_NULL(bpage)) {
buf_pool_mutex_exit(buf_pool);
- hash_unlock_x_all_but(buf_pool->page_hash, hash_lock);
+ hash_unlock_x_all_but(buf_pool->page_hash, *hash_lock);
goto page_found;
}
@@ -2230,20 +3507,19 @@ default on UNIV_DEBUG for encryption bugs. */
buf_block_t::mutex or buf_pool->zip_mutex or both. */
bpage->state = BUF_BLOCK_ZIP_PAGE;
- bpage->space = static_cast<ib_uint32_t>(space);
- bpage->offset = static_cast<ib_uint32_t>(offset);
+ bpage->id = page_id;
bpage->buf_fix_count = 1;
ut_d(bpage->in_page_hash = TRUE);
HASH_INSERT(buf_page_t, hash, buf_pool->page_hash,
- fold, bpage);
+ page_id.fold(), bpage);
buf_pool_mutex_exit(buf_pool);
/* Once the sentinel is in the page_hash we can
safely release all locks except just the
relevant hash_lock */
hash_unlock_x_all_but(buf_pool->page_hash,
- hash_lock);
+ *hash_lock);
return(NULL);
case BUF_BLOCK_ZIP_PAGE:
@@ -2265,48 +3541,40 @@ default on UNIV_DEBUG for encryption bugs. */
return(NULL);
}
-/****************************************************************//**
-Remove the sentinel block for the watch before replacing it with a real block.
-buf_page_watch_clear() or buf_page_watch_occurred() will notice that
-the block has been replaced with the real block.
+/** Remove the sentinel block for the watch before replacing it with a
+real block. buf_page_watch_clear() or buf_page_watch_occurred() will notice
+that the block has been replaced with the real block.
+@param[in,out] buf_pool buffer pool instance
+@param[in,out] watch sentinel for watch
@return reference count, to be added to the replacement block */
static
void
buf_pool_watch_remove(
-/*==================*/
- buf_pool_t* buf_pool, /*!< buffer pool instance */
- ulint fold, /*!< in: buf_page_address_fold(
- space, offset) */
- buf_page_t* watch) /*!< in/out: sentinel for watch */
+ buf_pool_t* buf_pool,
+ buf_page_t* watch)
{
-#ifdef UNIV_SYNC_DEBUG
+#ifdef UNIV_DEBUG
/* We must also own the appropriate hash_bucket mutex. */
- rw_lock_t* hash_lock = buf_page_hash_lock_get(buf_pool, fold);
- ut_ad(rw_lock_own(hash_lock, RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
+ rw_lock_t* hash_lock = buf_page_hash_lock_get(buf_pool, watch->id);
+ ut_ad(rw_lock_own(hash_lock, RW_LOCK_X));
+#endif /* UNIV_DEBUG */
ut_ad(buf_pool_mutex_own(buf_pool));
- HASH_DELETE(buf_page_t, hash, buf_pool->page_hash, fold, watch);
+ HASH_DELETE(buf_page_t, hash, buf_pool->page_hash, watch->id.fold(),
+ watch);
ut_d(watch->in_page_hash = FALSE);
watch->buf_fix_count = 0;
watch->state = BUF_BLOCK_POOL_WATCH;
}
-/****************************************************************//**
-Stop watching if the page has been read in.
-buf_pool_watch_set(space,offset) must have returned NULL before. */
-UNIV_INTERN
-void
-buf_pool_watch_unset(
-/*=================*/
- ulint space, /*!< in: space id */
- ulint offset) /*!< in: page number */
+/** Stop watching if the page has been read in.
+buf_pool_watch_set(same_page_id) must have returned NULL before.
+@param[in] page_id page id */
+void buf_pool_watch_unset(const page_id_t page_id)
{
buf_page_t* bpage;
- buf_pool_t* buf_pool = buf_pool_get(space, offset);
- ulint fold = buf_page_address_fold(space, offset);
- rw_lock_t* hash_lock = buf_page_hash_lock_get(buf_pool, fold);
+ buf_pool_t* buf_pool = buf_pool_get(page_id);
/* We only need to have buf_pool mutex in case where we end
up calling buf_pool_watch_remove but to obey latching order
@@ -2315,58 +3583,42 @@ buf_pool_watch_unset(
called from the purge thread. */
buf_pool_mutex_enter(buf_pool);
+ rw_lock_t* hash_lock = buf_page_hash_lock_get(buf_pool, page_id);
rw_lock_x_lock(hash_lock);
- /* The page must exist because buf_pool_watch_set() increments
- buf_fix_count. */
-
- bpage = buf_page_hash_get_low(buf_pool, space, offset, fold);
-
- if (!buf_pool_watch_is_sentinel(buf_pool, bpage)) {
- buf_block_unfix(reinterpret_cast<buf_block_t*>(bpage));
- } else {
-
- ut_ad(bpage->buf_fix_count > 0);
-
-#ifdef PAGE_ATOMIC_REF_COUNT
- os_atomic_decrement_uint32(&bpage->buf_fix_count, 1);
-#else
- --bpage->buf_fix_count;
-#endif /* PAGE_ATOMIC_REF_COUNT */
+ /* The page must exist because buf_pool_watch_set()
+ increments buf_fix_count. */
+ bpage = buf_page_hash_get_low(buf_pool, page_id);
- if (bpage->buf_fix_count == 0) {
- buf_pool_watch_remove(buf_pool, fold, bpage);
- }
+ if (buf_block_unfix(bpage) == 0
+ && buf_pool_watch_is_sentinel(buf_pool, bpage)) {
+ buf_pool_watch_remove(buf_pool, bpage);
}
buf_pool_mutex_exit(buf_pool);
rw_lock_x_unlock(hash_lock);
}
-/****************************************************************//**
-Check if the page has been read in.
-This may only be called after buf_pool_watch_set(space,offset)
-has returned NULL and before invoking buf_pool_watch_unset(space,offset).
-@return FALSE if the given page was not read in, TRUE if it was */
-UNIV_INTERN
-ibool
-buf_pool_watch_occurred(
-/*====================*/
- ulint space, /*!< in: space id */
- ulint offset) /*!< in: page number */
+/** Check if the page has been read in.
+This may only be called after buf_pool_watch_set(same_page_id)
+has returned NULL and before invoking buf_pool_watch_unset(same_page_id).
+@param[in] page_id page id
+@return false if the given page was not read in, true if it was */
+bool buf_pool_watch_occurred(const page_id_t page_id)
{
- ibool ret;
+ bool ret;
buf_page_t* bpage;
- buf_pool_t* buf_pool = buf_pool_get(space, offset);
- ulint fold = buf_page_address_fold(space, offset);
- rw_lock_t* hash_lock = buf_page_hash_lock_get(buf_pool,
- fold);
+ buf_pool_t* buf_pool = buf_pool_get(page_id);
+ rw_lock_t* hash_lock = buf_page_hash_lock_get(buf_pool, page_id);
rw_lock_s_lock(hash_lock);
+ /* If not own buf_pool_mutex, page_hash can be changed. */
+ hash_lock = buf_page_hash_lock_s_confirm(hash_lock, buf_pool, page_id);
+
/* The page must exist because buf_pool_watch_set()
increments buf_fix_count. */
- bpage = buf_page_hash_get_low(buf_pool, space, offset, fold);
+ bpage = buf_page_hash_get_low(buf_pool, page_id);
ret = !buf_pool_watch_is_sentinel(buf_pool, bpage);
rw_lock_s_unlock(hash_lock);
@@ -2378,7 +3630,6 @@ buf_pool_watch_occurred(
Moves a page to the start of the buffer pool LRU list. This high-level
function can be used to prevent an important page from slipping out of
the buffer pool. */
-UNIV_INTERN
void
buf_page_make_young(
/*================*/
@@ -2417,54 +3668,24 @@ buf_page_make_young_if_needed(
}
}
-/********************************************************************//**
-Resets the check_index_page_at_flush field of a page if found in the buffer
-pool. */
-UNIV_INTERN
-void
-buf_reset_check_index_page_at_flush(
-/*================================*/
- ulint space, /*!< in: space id */
- ulint offset) /*!< in: page number */
-{
- buf_block_t* block;
- buf_pool_t* buf_pool = buf_pool_get(space, offset);
-
- buf_pool_mutex_enter(buf_pool);
-
- block = (buf_block_t*) buf_page_hash_get(buf_pool, space, offset);
-
- if (block && buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE) {
- ut_ad(!buf_pool_watch_is_sentinel(buf_pool, &block->page));
- block->check_index_page_at_flush = FALSE;
- }
-
- buf_pool_mutex_exit(buf_pool);
-}
+#ifdef UNIV_DEBUG
-#if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG
-/********************************************************************//**
-Sets file_page_was_freed TRUE if the page is found in the buffer pool.
+/** Sets file_page_was_freed TRUE if the page is found in the buffer pool.
This function should be called when we free a file page and want the
debug version to check that it is not accessed any more unless
reallocated.
-@return control block if found in page hash table, otherwise NULL */
-UNIV_INTERN
-buf_page_t*
-buf_page_set_file_page_was_freed(
-/*=============================*/
- ulint space, /*!< in: space id */
- ulint offset) /*!< in: page number */
+@param[in] page_id page id
+@return control block if found in page hash table, otherwise NULL */
+buf_page_t* buf_page_set_file_page_was_freed(const page_id_t page_id)
{
buf_page_t* bpage;
- buf_pool_t* buf_pool = buf_pool_get(space, offset);
+ buf_pool_t* buf_pool = buf_pool_get(page_id);
rw_lock_t* hash_lock;
- bpage = buf_page_hash_get_s_locked(buf_pool, space, offset,
- &hash_lock);
+ bpage = buf_page_hash_get_s_locked(buf_pool, page_id, &hash_lock);
if (bpage) {
- ib_mutex_t* block_mutex = buf_page_get_mutex(bpage);
+ BPageMutex* block_mutex = buf_page_get_mutex(bpage);
ut_ad(!buf_pool_watch_is_sentinel(buf_pool, bpage));
mutex_enter(block_mutex);
rw_lock_s_unlock(hash_lock);
@@ -2477,27 +3698,21 @@ buf_page_set_file_page_was_freed(
return(bpage);
}
-/********************************************************************//**
-Sets file_page_was_freed FALSE if the page is found in the buffer pool.
+/** Sets file_page_was_freed FALSE if the page is found in the buffer pool.
This function should be called when we free a file page and want the
debug version to check that it is not accessed any more unless
reallocated.
-@return control block if found in page hash table, otherwise NULL */
-UNIV_INTERN
-buf_page_t*
-buf_page_reset_file_page_was_freed(
-/*===============================*/
- ulint space, /*!< in: space id */
- ulint offset) /*!< in: page number */
+@param[in] page_id page id
+@return control block if found in page hash table, otherwise NULL */
+buf_page_t* buf_page_reset_file_page_was_freed(const page_id_t page_id)
{
buf_page_t* bpage;
- buf_pool_t* buf_pool = buf_pool_get(space, offset);
+ buf_pool_t* buf_pool = buf_pool_get(page_id);
rw_lock_t* hash_lock;
- bpage = buf_page_hash_get_s_locked(buf_pool, space, offset,
- &hash_lock);
+ bpage = buf_page_hash_get_s_locked(buf_pool, page_id, &hash_lock);
if (bpage) {
- ib_mutex_t* block_mutex = buf_page_get_mutex(bpage);
+ BPageMutex* block_mutex = buf_page_get_mutex(bpage);
ut_ad(!buf_pool_watch_is_sentinel(buf_pool, bpage));
mutex_enter(block_mutex);
rw_lock_s_unlock(hash_lock);
@@ -2507,21 +3722,15 @@ buf_page_reset_file_page_was_freed(
return(bpage);
}
-#endif /* UNIV_DEBUG_FILE_ACCESSES || UNIV_DEBUG */
+#endif /* UNIV_DEBUG */
-/********************************************************************//**
-Attempts to discard the uncompressed frame of a compressed page. The
-caller should not be holding any mutexes when this function is called.
-@return TRUE if successful, FALSE otherwise. */
-static
-void
-buf_block_try_discard_uncompressed(
-/*===============================*/
- ulint space, /*!< in: space id */
- ulint offset) /*!< in: page number */
+/** Attempts to discard the uncompressed frame of a compressed page.
+The caller should not be holding any mutexes when this function is called.
+@param[in] page_id page id */
+static void buf_block_try_discard_uncompressed(const page_id_t page_id)
{
buf_page_t* bpage;
- buf_pool_t* buf_pool = buf_pool_get(space, offset);
+ buf_pool_t* buf_pool = buf_pool_get(page_id);
/* Since we need to acquire buf_pool mutex to discard
the uncompressed frame and because page_hash mutex resides
@@ -2531,7 +3740,7 @@ buf_block_try_discard_uncompressed(
we need to check again if the block is still in page_hash. */
buf_pool_mutex_enter(buf_pool);
- bpage = buf_page_hash_get(buf_pool, space, offset);
+ bpage = buf_page_hash_get(buf_pool, page_id);
if (bpage) {
buf_LRU_free_page(bpage, false);
@@ -2540,29 +3749,27 @@ buf_block_try_discard_uncompressed(
buf_pool_mutex_exit(buf_pool);
}
-/********************************************************************//**
-Get read access to a compressed page (usually of type
+/** Get read access to a compressed page (usually of type
FIL_PAGE_TYPE_ZBLOB or FIL_PAGE_TYPE_ZBLOB2).
The page must be released with buf_page_release_zip().
NOTE: the page is not protected by any latch. Mutual exclusion has to
be implemented at a higher level. In other words, all possible
accesses to a given page through this function must be protected by
the same set of mutexes or latches.
-@return pointer to the block */
-UNIV_INTERN
+@param[in] page_id page id
+@param[in] page_size page size
+@return pointer to the block */
buf_page_t*
buf_page_get_zip(
-/*=============*/
- ulint space, /*!< in: space id */
- ulint zip_size,/*!< in: compressed page size */
- ulint offset) /*!< in: page number */
+ const page_id_t page_id,
+ const page_size_t& page_size)
{
buf_page_t* bpage;
- ib_mutex_t* block_mutex;
+ BPageMutex* block_mutex;
rw_lock_t* hash_lock;
ibool discard_attempted = FALSE;
ibool must_read;
- buf_pool_t* buf_pool = buf_pool_get(space, offset);
+ buf_pool_t* buf_pool = buf_pool_get(page_id);
buf_pool->stat.n_page_gets++;
@@ -2571,8 +3778,8 @@ lookup:
/* The following call will also grab the page_hash
mutex if the page is found. */
- bpage = buf_page_hash_get_s_locked(buf_pool, space,
- offset, &hash_lock);
+ bpage = buf_page_hash_get_s_locked(buf_pool, page_id,
+ &hash_lock);
if (bpage) {
ut_ad(!buf_pool_watch_is_sentinel(buf_pool, bpage));
break;
@@ -2581,19 +3788,15 @@ lookup:
/* Page not in buf_pool: needs to be read from file */
ut_ad(!hash_lock);
- dberr_t err = buf_read_page(space, zip_size, offset);
+ dberr_t err = buf_read_page(page_id, page_size);
if (err != DB_SUCCESS) {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Reading compressed page " ULINTPF
- ":" ULINTPF
- " failed with error: %s.",
- space, offset, ut_strerr(err));
+ ib::error() << "Reading compressed page " << page_id
+ << " failed with error: " << ut_strerr(err);
goto err_exit;
}
-
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
ut_a(++buf_dbg_counter % 5771 || buf_validate());
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
@@ -2611,50 +3814,39 @@ err_exit:
ut_ad(!buf_pool_watch_is_sentinel(buf_pool, bpage));
switch (buf_page_get_state(bpage)) {
- case BUF_BLOCK_POOL_WATCH:
- case BUF_BLOCK_NOT_USED:
- case BUF_BLOCK_READY_FOR_USE:
- case BUF_BLOCK_MEMORY:
- case BUF_BLOCK_REMOVE_HASH:
- ut_error;
-
case BUF_BLOCK_ZIP_PAGE:
case BUF_BLOCK_ZIP_DIRTY:
+ buf_block_fix(bpage);
block_mutex = &buf_pool->zip_mutex;
- mutex_enter(block_mutex);
-#ifdef PAGE_ATOMIC_REF_COUNT
- os_atomic_increment_uint32(&bpage->buf_fix_count, 1);
-#else
- ++bpage->buf_fix_count;
-#endif /* PAGE_ATOMIC_REF_COUNT */
goto got_block;
case BUF_BLOCK_FILE_PAGE:
/* Discard the uncompressed page frame if possible. */
if (!discard_attempted) {
rw_lock_s_unlock(hash_lock);
- buf_block_try_discard_uncompressed(space, offset);
+ buf_block_try_discard_uncompressed(page_id);
discard_attempted = TRUE;
goto lookup;
}
- block_mutex = &((buf_block_t*) bpage)->mutex;
-
- mutex_enter(block_mutex);
+ buf_block_buf_fix_inc((buf_block_t*) bpage,
+ __FILE__, __LINE__);
- buf_block_buf_fix_inc((buf_block_t*) bpage, __FILE__, __LINE__);
+ block_mutex = &((buf_block_t*) bpage)->mutex;
goto got_block;
+ default:
+ break;
}
ut_error;
goto err_exit;
got_block:
+ mutex_enter(block_mutex);
must_read = buf_page_get_io_fix(bpage) == BUF_IO_READ;
rw_lock_s_unlock(hash_lock);
-#if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG
- ut_a(!bpage->file_page_was_freed);
-#endif /* defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG */
+
+ ut_ad(!bpage->file_page_was_freed);
buf_page_set_accessed(bpage);
@@ -2688,10 +3880,6 @@ got_block:
}
}
-#ifdef UNIV_IBUF_COUNT_DEBUG
- ut_a(ibuf_count_get(buf_page_get_space(bpage),
- buf_page_get_page_no(bpage)) == 0);
-#endif
return(bpage);
}
@@ -2703,20 +3891,23 @@ buf_block_init_low(
/*===============*/
buf_block_t* block) /*!< in: block to init */
{
- block->check_index_page_at_flush = FALSE;
+ block->skip_flush_check = false;
+#ifdef BTR_CUR_HASH_ADAPT
+ /* No adaptive hash index entries may point to a previously
+ unused (and now freshly allocated) block. */
+ assert_block_ahi_empty_on_init(block);
block->index = NULL;
block->n_hash_helps = 0;
block->n_fields = 1;
block->n_bytes = 0;
block->left_side = TRUE;
+#endif /* BTR_CUR_HASH_ADAPT */
}
-#endif /* !UNIV_HOTBACKUP */
/********************************************************************//**
Decompress a block.
-@return TRUE if successful */
-UNIV_INTERN
+@return TRUE if successful */
ibool
buf_zip_decompress(
/*===============*/
@@ -2725,40 +3916,49 @@ buf_zip_decompress(
{
const byte* frame = block->page.zip.data;
ulint size = page_zip_get_size(&block->page.zip);
- /* Space is not found if this function is called during IMPORT */
- fil_space_t* space = fil_space_acquire_for_io(block->page.space);
- const unsigned key_version = mach_read_from_4(frame +
- FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION);
+ /* The tablespace will not be found if this function is called
+ during IMPORT. */
+ fil_space_t* space = fil_space_acquire_for_io(block->page.id.space());
+ const unsigned key_version = mach_read_from_4(
+ frame + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION);
fil_space_crypt_t* crypt_data = space ? space->crypt_data : NULL;
const bool encrypted = crypt_data
- && crypt_data->type != CRYPT_SCHEME_UNENCRYPTED
- && (!crypt_data->is_default_encryption()
- || srv_encrypt_tables);
+ && crypt_data->type != CRYPT_SCHEME_UNENCRYPTED
+ && (!crypt_data->is_default_encryption()
+ || srv_encrypt_tables);
- ut_ad(buf_block_get_zip_size(block));
- ut_a(buf_block_get_space(block) != 0);
+ ut_ad(block->page.size.is_compressed());
+ ut_a(block->page.id.space() != 0);
if (UNIV_UNLIKELY(check && !page_zip_verify_checksum(frame, size))) {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Compressed page checksum mismatch"
- " for %s [%u:%u]: stored: " ULINTPF ", crc32: " ULINTPF
- " innodb: " ULINTPF ", none: " ULINTPF ".",
- space ? space->chain.start->name : "N/A",
- block->page.space, block->page.offset,
- mach_read_from_4(frame + FIL_PAGE_SPACE_OR_CHKSUM),
- page_zip_calc_checksum(frame, size,
- SRV_CHECKSUM_ALGORITHM_CRC32),
- page_zip_calc_checksum(frame, size,
- SRV_CHECKSUM_ALGORITHM_INNODB),
- page_zip_calc_checksum(frame, size,
- SRV_CHECKSUM_ALGORITHM_NONE));
+ ib::error() << "Compressed page checksum mismatch for "
+ << (space ? space->chain.start->name : "")
+ << block->page.id << ": stored: "
+ << mach_read_from_4(frame + FIL_PAGE_SPACE_OR_CHKSUM)
+ << ", crc32: "
+ << page_zip_calc_checksum(
+ frame, size, SRV_CHECKSUM_ALGORITHM_CRC32)
+#ifdef INNODB_BUG_ENDIAN_CRC32
+ << "/"
+ << page_zip_calc_checksum(
+ frame, size, SRV_CHECKSUM_ALGORITHM_CRC32,
+ true)
+#endif
+ << " innodb: "
+ << page_zip_calc_checksum(
+ frame, size, SRV_CHECKSUM_ALGORITHM_INNODB)
+ << ", none: "
+ << page_zip_calc_checksum(
+ frame, size, SRV_CHECKSUM_ALGORITHM_NONE)
+ << " (algorithm: " << srv_checksum_algorithm << ")";
+
goto err_exit;
}
switch (fil_page_get_type(frame)) {
- case FIL_PAGE_INDEX: {
-
+ case FIL_PAGE_INDEX:
+ case FIL_PAGE_RTREE:
if (page_zip_decompress(&block->page.zip,
block->frame, TRUE)) {
if (space) {
@@ -2767,14 +3967,10 @@ buf_zip_decompress(
return(TRUE);
}
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Unable to decompress space %s [%u:%u]",
- space ? space->chain.start->name : "N/A",
- block->page.space,
- block->page.offset);
-
+ ib::error() << "Unable to decompress "
+ << (space ? space->chain.start->name : "")
+ << block->page.id;
goto err_exit;
- }
case FIL_PAGE_TYPE_ALLOCATED:
case FIL_PAGE_INODE:
case FIL_PAGE_IBUF_BITMAP:
@@ -2783,9 +3979,7 @@ buf_zip_decompress(
case FIL_PAGE_TYPE_ZBLOB:
case FIL_PAGE_TYPE_ZBLOB2:
/* Copy to uncompressed storage. */
- memcpy(block->frame, frame,
- buf_block_get_zip_size(block));
-
+ memcpy(block->frame, frame, block->page.size.physical());
if (space) {
fil_space_release_for_io(space);
}
@@ -2793,22 +3987,18 @@ buf_zip_decompress(
return(TRUE);
}
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Unknown compressed page in %s [%u:%u]"
- " type %s [" ULINTPF "].",
- space ? space->chain.start->name : "N/A",
- block->page.space, block->page.offset,
- fil_get_page_type_name(fil_page_get_type(frame)), fil_page_get_type(frame));
+ ib::error() << "Unknown compressed page type "
+ << fil_page_get_type(frame)
+ << " in " << (space ? space->chain.start->name : "")
+ << block->page.id;
err_exit:
if (encrypted) {
- ib_logf(IB_LOG_LEVEL_INFO,
- "Row compressed page could be encrypted with key_version %u.",
- key_version);
- block->page.encrypted = true;
- dict_set_encrypted_by_space(block->page.space);
+ ib::info() << "Row compressed page could be encrypted"
+ " with key_version " << key_version;
+ dict_set_encrypted_by_space(block->page.id.space());
} else {
- dict_set_corrupted_by_space(block->page.space);
+ dict_set_corrupted_by_space(block->page.id.space());
}
if (space) {
@@ -2818,142 +4008,56 @@ err_exit:
return(FALSE);
}
-#ifndef UNIV_HOTBACKUP
-/*******************************************************************//**
-Gets the block to whose frame the pointer is pointing to if found
-in this buffer pool instance.
-@return pointer to block */
-UNIV_INTERN
+#ifdef BTR_CUR_HASH_ADAPT
+/** Get a buffer block from an adaptive hash index pointer.
+This function does not return if the block is not identified.
+@param[in] ptr pointer to within a page frame
+@return pointer to block, never NULL */
buf_block_t*
-buf_block_align_instance(
-/*=====================*/
- buf_pool_t* buf_pool, /*!< in: buffer in which the block
- resides */
- const byte* ptr) /*!< in: pointer to a frame */
+buf_block_from_ahi(const byte* ptr)
{
- buf_chunk_t* chunk;
- ulint i;
-
- /* TODO: protect buf_pool->chunks with a mutex (it will
- currently remain constant after buf_pool_init()) */
- for (chunk = buf_pool->chunks, i = buf_pool->n_chunks; i--; chunk++) {
- ulint offs;
-
- if (UNIV_UNLIKELY(ptr < chunk->blocks->frame)) {
-
- continue;
- }
- /* else */
+ buf_pool_chunk_map_t::iterator it;
- offs = ptr - chunk->blocks->frame;
-
- offs >>= UNIV_PAGE_SIZE_SHIFT;
-
- if (UNIV_LIKELY(offs < chunk->size)) {
- buf_block_t* block = &chunk->blocks[offs];
-
- /* The function buf_chunk_init() invokes
- buf_block_init() so that block[n].frame ==
- block->frame + n * UNIV_PAGE_SIZE. Check it. */
- ut_ad(block->frame == page_align(ptr));
-#ifdef UNIV_DEBUG
- /* A thread that updates these fields must
- hold buf_pool->mutex and block->mutex. Acquire
- only the latter. */
- mutex_enter(&block->mutex);
-
- switch (buf_block_get_state(block)) {
- case BUF_BLOCK_POOL_WATCH:
- case BUF_BLOCK_ZIP_PAGE:
- case BUF_BLOCK_ZIP_DIRTY:
- /* These types should only be used in
- the compressed buffer pool, whose
- memory is allocated from
- buf_pool->chunks, in UNIV_PAGE_SIZE
- blocks flagged as BUF_BLOCK_MEMORY. */
- ut_error;
- break;
- case BUF_BLOCK_NOT_USED:
- case BUF_BLOCK_READY_FOR_USE:
- case BUF_BLOCK_MEMORY:
- /* Some data structures contain
- "guess" pointers to file pages. The
- file pages may have been freed and
- reused. Do not complain. */
- break;
- case BUF_BLOCK_REMOVE_HASH:
- /* buf_LRU_block_remove_hashed_page()
- will overwrite the FIL_PAGE_OFFSET and
- FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID with
- 0xff and set the state to
- BUF_BLOCK_REMOVE_HASH. */
- ut_ad(page_get_space_id(page_align(ptr))
- == 0xffffffff);
- ut_ad(page_get_page_no(page_align(ptr))
- == 0xffffffff);
- break;
- case BUF_BLOCK_FILE_PAGE: {
- ulint space = page_get_space_id(page_align(ptr));
- ulint offset = page_get_page_no(page_align(ptr));
-
- if (block->page.space != space ||
- block->page.offset != offset) {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Corruption: Block space_id " ULINTPF " != page space_id " ULINTPF " or "
- "Block offset " ULINTPF " != page offset " ULINTPF " ",
- (ulint)block->page.space, space,
- (ulint)block->page.offset, offset);
- }
+ buf_pool_chunk_map_t* chunk_map = buf_chunk_map_ref;
+ ut_ad(buf_chunk_map_ref == buf_chunk_map_reg);
+ ut_ad(!buf_pool_resizing);
- ut_ad(block->page.space
- == page_get_space_id(page_align(ptr)));
- ut_ad(block->page.offset
- == page_get_page_no(page_align(ptr)));
- break;
- }
- }
+ buf_chunk_t* chunk;
+ it = chunk_map->upper_bound(ptr);
- mutex_exit(&block->mutex);
-#endif /* UNIV_DEBUG */
+ ut_a(it != chunk_map->begin());
- return(block);
- }
+ if (it == chunk_map->end()) {
+ chunk = chunk_map->rbegin()->second;
+ } else {
+ chunk = (--it)->second;
}
- return(NULL);
-}
+ ulint offs = ptr - chunk->blocks->frame;
-/*******************************************************************//**
-Gets the block to whose frame the pointer is pointing to.
-@return pointer to block, never NULL */
-UNIV_INTERN
-buf_block_t*
-buf_block_align(
-/*============*/
- const byte* ptr) /*!< in: pointer to a frame */
-{
- ulint i;
+ offs >>= UNIV_PAGE_SIZE_SHIFT;
- for (i = 0; i < srv_buf_pool_instances; i++) {
- buf_block_t* block;
+ ut_a(offs < chunk->size);
- block = buf_block_align_instance(
- buf_pool_from_array(i), ptr);
- if (block) {
- return(block);
- }
- }
+ buf_block_t* block = &chunk->blocks[offs];
- /* The block should always be found. */
- ut_error;
- return(NULL);
+ /* The function buf_chunk_init() invokes buf_block_init() so that
+ block[n].frame == block->frame + n * UNIV_PAGE_SIZE. Check it. */
+ ut_ad(block->frame == page_align(ptr));
+ /* Read the state of the block without holding a mutex.
+ A state transition from BUF_BLOCK_FILE_PAGE to
+ BUF_BLOCK_REMOVE_HASH is possible during this execution. */
+ ut_d(const buf_page_state state = buf_block_get_state(block));
+ ut_ad(state == BUF_BLOCK_FILE_PAGE || state == BUF_BLOCK_REMOVE_HASH);
+ return(block);
}
+#endif /* BTR_CUR_HASH_ADAPT */
/********************************************************************//**
Find out if a pointer belongs to a buf_block_t. It can be a pointer to
the buf_block_t itself or a member of it. This functions checks one of
the buffer pool instances.
-@return TRUE if ptr belongs to a buf_block_t struct */
+@return TRUE if ptr belongs to a buf_block_t struct */
static
ibool
buf_pointer_is_block_field_instance(
@@ -2962,10 +4066,11 @@ buf_pointer_is_block_field_instance(
const void* ptr) /*!< in: pointer not dereferenced */
{
const buf_chunk_t* chunk = buf_pool->chunks;
- const buf_chunk_t* const echunk = chunk + buf_pool->n_chunks;
+ const buf_chunk_t* const echunk = chunk + ut_min(
+ buf_pool->n_chunks, buf_pool->n_chunks_new);
- /* TODO: protect buf_pool->chunks with a mutex (it will
- currently remain constant after buf_pool_init()) */
+ /* TODO: protect buf_pool->chunks with a mutex (the older pointer will
+ currently remain while during buf_pool_resize()) */
while (chunk < echunk) {
if (ptr >= (void*) chunk->blocks
&& ptr < (void*) (chunk->blocks + chunk->size)) {
@@ -2982,8 +4087,7 @@ buf_pointer_is_block_field_instance(
/********************************************************************//**
Find out if a pointer belongs to a buf_block_t. It can be a pointer to
the buf_block_t itself or a member of it
-@return TRUE if ptr belongs to a buf_block_t struct */
-UNIV_INTERN
+@return TRUE if ptr belongs to a buf_block_t struct */
ibool
buf_pointer_is_block_field(
/*=======================*/
@@ -3006,7 +4110,7 @@ buf_pointer_is_block_field(
/********************************************************************//**
Find out if a buffer block was created by buf_chunk_init().
-@return TRUE if "block" has been added to buf_pool->free by buf_chunk_init() */
+@return TRUE if "block" has been added to buf_pool->free by buf_chunk_init() */
static
ibool
buf_block_is_uncompressed(
@@ -3045,14 +4149,14 @@ buf_debug_execute_is_force_flush()
}
#endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */
-/**
-Wait for the block to be read in.
-@param block The block to check */
+/** Wait for the block to be read in.
+@param[in] block The block to check */
static
void
-buf_wait_for_read(buf_block_t* block)
+buf_wait_for_read(
+ buf_block_t* block)
{
- /* Note: For the PAGE_ATOMIC_REF_COUNT case:
+ /* Note:
We are using the block->lock to check for IO state (and a dirty read).
We set the IO_READ state under the protection of the hash_lock
@@ -3064,7 +4168,7 @@ buf_wait_for_read(buf_block_t* block)
/* Wait until the read operation completes */
- ib_mutex_t* mutex = buf_page_get_mutex(&block->page);
+ BPageMutex* mutex = buf_page_get_mutex(&block->page);
for (;;) {
buf_io_fix io_fix;
@@ -3086,41 +4190,78 @@ buf_wait_for_read(buf_block_t* block)
}
}
-/********************************************************************//**
-This is the general function used to get access to a database page.
-@return pointer to the block or NULL */
-UNIV_INTERN
+/** Lock the page with the given latch type.
+@param[in,out] block block to be locked
+@param[in] rw_latch RW_S_LATCH, RW_X_LATCH, RW_NO_LATCH
+@param[in] mtr mini-transaction
+@param[in] file file name
+@param[in] line line where called
+@return pointer to locked block */
+static buf_block_t* buf_page_mtr_lock(buf_block_t *block,
+ ulint rw_latch,
+ mtr_t* mtr,
+ const char *file,
+ unsigned line)
+{
+ mtr_memo_type_t fix_type;
+ switch (rw_latch)
+ {
+ case RW_NO_LATCH:
+ fix_type= MTR_MEMO_BUF_FIX;
+ break;
+ case RW_S_LATCH:
+ rw_lock_s_lock_inline(&block->lock, 0, file, line);
+ fix_type= MTR_MEMO_PAGE_S_FIX;
+ break;
+ case RW_SX_LATCH:
+ rw_lock_sx_lock_inline(&block->lock, 0, file, line);
+ fix_type= MTR_MEMO_PAGE_SX_FIX;
+ break;
+ default:
+ ut_ad(rw_latch == RW_X_LATCH);
+ rw_lock_x_lock_inline(&block->lock, 0, file, line);
+ fix_type= MTR_MEMO_PAGE_X_FIX;
+ break;
+ }
+
+ mtr_memo_push(mtr, block, fix_type);
+ return block;
+}
+
+/** This is the low level function used to get access to a database page.
+@param[in] page_id page id
+@param[in] rw_latch RW_S_LATCH, RW_X_LATCH, RW_NO_LATCH
+@param[in] guess guessed block or NULL
+@param[in] mode BUF_GET, BUF_GET_IF_IN_POOL,
+BUF_PEEK_IF_IN_POOL, BUF_GET_NO_LATCH, or BUF_GET_IF_IN_POOL_OR_WATCH
+@param[in] file file name
+@param[in] line line where called
+@param[in] mtr mini-transaction
+@return pointer to the block or NULL */
buf_block_t*
-buf_page_get_gen(
-/*=============*/
- ulint space, /*!< in: space id */
- ulint zip_size,/*!< in: compressed page size in bytes
- or 0 for uncompressed pages */
- ulint offset, /*!< in: page number */
- ulint rw_latch,/*!< in: RW_S_LATCH, RW_X_LATCH, RW_NO_LATCH */
- buf_block_t* guess, /*!< in: guessed block or NULL */
- ulint mode, /*!< in: BUF_GET, BUF_GET_IF_IN_POOL,
- BUF_PEEK_IF_IN_POOL, BUF_GET_NO_LATCH, or
- BUF_GET_IF_IN_POOL_OR_WATCH */
- const char* file, /*!< in: file name */
- ulint line, /*!< in: line where called */
- mtr_t* mtr, /*!< in: mini-transaction */
- dberr_t* err) /*!< out: error code */
+buf_page_get_low(
+ const page_id_t page_id,
+ const page_size_t& page_size,
+ ulint rw_latch,
+ buf_block_t* guess,
+ ulint mode,
+ const char* file,
+ unsigned line,
+ mtr_t* mtr,
+ dberr_t* err)
{
buf_block_t* block;
- ulint fold;
unsigned access_time;
- ulint fix_type;
rw_lock_t* hash_lock;
- ulint retries = 0;
buf_block_t* fix_block;
- ib_mutex_t* fix_mutex = NULL;
- buf_pool_t* buf_pool = buf_pool_get(space, offset);
+ ulint retries = 0;
+ buf_pool_t* buf_pool = buf_pool_get(page_id);
ut_ad((mtr == NULL) == (mode == BUF_EVICT_IF_IN_POOL));
- ut_ad(!mtr || mtr->state == MTR_ACTIVE);
+ ut_ad(!mtr || mtr->is_active());
ut_ad((rw_latch == RW_S_LATCH)
|| (rw_latch == RW_X_LATCH)
+ || (rw_latch == RW_SX_LATCH)
|| (rw_latch == RW_NO_LATCH));
if (err) {
@@ -3130,40 +4271,45 @@ buf_page_get_gen(
#ifdef UNIV_DEBUG
switch (mode) {
case BUF_EVICT_IF_IN_POOL:
- case BUF_PEEK_IF_IN_POOL:
/* After DISCARD TABLESPACE, the tablespace would not exist,
but in IMPORT TABLESPACE, PageConverter::operator() must
replace any old pages, which were not evicted during DISCARD.
- Similarly, btr_search_drop_page_hash_when_freed() must
- remove any old pages. Skip the assertion on zip_size. */
+ Skip the assertion on space_page_size. */
break;
+ case BUF_PEEK_IF_IN_POOL:
+ case BUF_GET_IF_IN_POOL:
+ /* The caller may pass a dummy page size,
+ because it does not really matter. */
+ break;
+ default:
+ ut_error;
case BUF_GET_NO_LATCH:
ut_ad(rw_latch == RW_NO_LATCH);
/* fall through */
case BUF_GET:
- case BUF_GET_IF_IN_POOL:
case BUF_GET_IF_IN_POOL_OR_WATCH:
case BUF_GET_POSSIBLY_FREED:
- ut_ad(zip_size == fil_space_get_zip_size(space));
- break;
- default:
- ut_error;
+ bool found;
+ const page_size_t& space_page_size
+ = fil_space_get_page_size(page_id.space(), &found);
+ ut_ad(found);
+ ut_ad(page_size.equals_to(space_page_size));
}
#endif /* UNIV_DEBUG */
- ut_ad(ut_is_2pow(zip_size));
-#ifndef UNIV_LOG_DEBUG
+
ut_ad(!mtr || !ibuf_inside(mtr)
- || ibuf_page_low(space, zip_size, offset,
- FALSE, file, line, NULL));
-#endif
+ || ibuf_page_low(page_id, page_size, FALSE, file, line, NULL));
+
buf_pool->stat.n_page_gets++;
- fold = buf_page_address_fold(space, offset);
- hash_lock = buf_page_hash_lock_get(buf_pool, fold);
+ hash_lock = buf_page_hash_lock_get(buf_pool, page_id);
loop:
block = guess;
rw_lock_s_lock(hash_lock);
+ /* If not own buf_pool_mutex, page_hash can be changed. */
+ hash_lock = buf_page_hash_lock_s_confirm(hash_lock, buf_pool, page_id);
+
if (block != NULL) {
/* If the guess is a compressed page descriptor that
@@ -3171,8 +4317,7 @@ loop:
it may have been freed by buf_relocate(). */
if (!buf_block_is_uncompressed(buf_pool, block)
- || offset != block->page.offset
- || space != block->page.space
+ || page_id != block->page.id
|| buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE) {
/* Our guess was bogus or things have changed
@@ -3184,8 +4329,7 @@ loop:
}
if (block == NULL) {
- block = (buf_block_t*) buf_page_hash_get_low(
- buf_pool, space, offset, fold);
+ block = (buf_block_t*) buf_page_hash_get_low(buf_pool, page_id);
}
if (!block || buf_pool_watch_is_sentinel(buf_pool, &block->page)) {
@@ -3199,15 +4343,39 @@ loop:
if (mode == BUF_GET_IF_IN_POOL_OR_WATCH) {
rw_lock_x_lock(hash_lock);
+
+ /* If not own buf_pool_mutex,
+ page_hash can be changed. */
+ hash_lock = buf_page_hash_lock_x_confirm(
+ hash_lock, buf_pool, page_id);
+
block = (buf_block_t*) buf_pool_watch_set(
- space, offset, fold);
+ page_id, &hash_lock);
- if (UNIV_LIKELY_NULL(block)) {
+ if (block) {
/* We can release hash_lock after we
increment the fix count to make
sure that no state change takes place. */
fix_block = block;
- buf_block_fix(fix_block);
+
+ if (fsp_is_system_temporary(page_id.space())) {
+ /* For temporary tablespace,
+ the mutex is being used for
+ synchronization between user
+ thread and flush thread,
+ instead of block->lock. See
+ buf_flush_page() for the flush
+ thread counterpart. */
+
+ BPageMutex* fix_mutex
+ = buf_page_get_mutex(
+ &fix_block->page);
+ mutex_enter(fix_mutex);
+ buf_block_fix(fix_block);
+ mutex_exit(fix_mutex);
+ } else {
+ buf_block_fix(fix_block);
+ }
/* Now safe to release page_hash mutex */
rw_lock_x_unlock(hash_lock);
@@ -3222,26 +4390,27 @@ loop:
case BUF_GET_IF_IN_POOL_OR_WATCH:
case BUF_PEEK_IF_IN_POOL:
case BUF_EVICT_IF_IN_POOL:
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(!rw_lock_own(hash_lock, RW_LOCK_EX));
- ut_ad(!rw_lock_own(hash_lock, RW_LOCK_SHARED));
-#endif /* UNIV_SYNC_DEBUG */
+ ut_ad(!rw_lock_own_flagged(
+ hash_lock,
+ RW_LOCK_FLAG_X | RW_LOCK_FLAG_S));
return(NULL);
}
- /* Call path is buf_read_page() -> buf_read_page_low()
- (_fil_io()) -> buf_page_io_complete() ->
- buf_decrypt_after_read() here fil_space_t* is used
- and we decrypt -> buf_page_check_corrupt() where
- page checksums are compared. Decryption/decompression
- is handled lower level, error handling is handled on lower
- level, here we need only to know is page really corrupted
- or encrypted page with correct checksum. */
+ /* The call path is buf_read_page() ->
+ buf_read_page_low() (fil_io()) ->
+ buf_page_io_complete() ->
+ buf_decrypt_after_read(). Here fil_space_t* is used
+ and we decrypt -> buf_page_check_corrupt() where page
+ checksums are compared. Decryption, decompression as
+ well as error handling takes place at a lower level.
+ Here we only need to know whether the page really is
+ corrupted, or if an encrypted page with a valid
+ checksum cannot be decypted. */
- dberr_t local_err = buf_read_page(space, zip_size, offset);
+ dberr_t local_err = buf_read_page(page_id, page_size);
if (local_err == DB_SUCCESS) {
- buf_read_ahead_random(space, zip_size, offset,
+ buf_read_ahead_random(page_id, page_size,
ibuf_inside(mtr));
retries = 0;
@@ -3273,66 +4442,66 @@ loop:
return (NULL);
}
+ if (local_err == DB_PAGE_CORRUPTED
+ && srv_force_recovery) {
+ return NULL;
+ }
+
/* Try to set table as corrupted instead of
asserting. */
- if (space > TRX_SYS_SPACE &&
- dict_set_corrupted_by_space(space)) {
+ if (page_id.space() != TRX_SYS_SPACE &&
+ dict_set_corrupted_by_space(page_id.space())) {
return (NULL);
}
- ib_logf(IB_LOG_LEVEL_FATAL, "Unable"
- " to read tablespace " ULINTPF " page no "
- ULINTPF " into the buffer pool after "
- ULINTPF " attempts."
- " The most probable cause"
+ ib::fatal() << "Unable to read page " << page_id
+ << " into the buffer pool after "
+ << BUF_PAGE_READ_MAX_RETRIES
+ << ". The most probable cause"
" of this error may be that the"
" table has been corrupted."
- " You can try to fix this"
- " problem by using"
- " innodb_force_recovery."
- " Please see " REFMAN " for more"
- " details. Aborting...",
- space, offset,
- BUF_PAGE_READ_MAX_RETRIES);
+ " See https://mariadb.com/kb/en/library/innodb-recovery-modes/";
}
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
- ut_a(++buf_dbg_counter % 5771 || buf_validate());
+ ut_a(fsp_skip_sanity_check(page_id.space())
+ || ++buf_dbg_counter % 5771
+ || buf_validate());
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
goto loop;
} else {
fix_block = block;
}
- buf_block_fix(fix_block);
+ if (fsp_is_system_temporary(page_id.space())) {
+ /* For temporary tablespace, the mutex is being used
+ for synchronization between user thread and flush
+ thread, instead of block->lock. See buf_flush_page()
+ for the flush thread counterpart. */
+ BPageMutex* fix_mutex = buf_page_get_mutex(
+ &fix_block->page);
+ mutex_enter(fix_mutex);
+ buf_block_fix(fix_block);
+ mutex_exit(fix_mutex);
+ } else {
+ buf_block_fix(fix_block);
+ }
/* Now safe to release page_hash mutex */
rw_lock_s_unlock(hash_lock);
got_block:
- fix_mutex = buf_page_get_mutex(&fix_block->page);
-
- ut_ad(page_zip_get_size(&block->page.zip) == zip_size
- || mode == BUF_PEEK_IF_IN_POOL);
-
switch (mode) {
case BUF_GET_IF_IN_POOL:
case BUF_PEEK_IF_IN_POOL:
case BUF_EVICT_IF_IN_POOL:
- bool must_read;
-
- {
- buf_page_t* fix_page = &fix_block->page;
-
- mutex_enter(fix_mutex);
-
- buf_io_fix io_fix = buf_page_get_io_fix(fix_page);
-
- must_read = (io_fix == BUF_IO_READ);
-
- mutex_exit(fix_mutex);
- }
+ buf_page_t* fix_page = &fix_block->page;
+ BPageMutex* fix_mutex = buf_page_get_mutex(fix_page);
+ mutex_enter(fix_mutex);
+ const bool must_read
+ = (buf_page_get_io_fix(fix_page) == BUF_IO_READ);
+ mutex_exit(fix_mutex);
if (must_read) {
/* The page is being read to buffer pool,
@@ -3344,10 +4513,21 @@ got_block:
}
}
- switch(buf_block_get_state(fix_block)) {
+ switch (buf_block_get_state(fix_block)) {
buf_page_t* bpage;
case BUF_BLOCK_FILE_PAGE:
+ bpage = &block->page;
+ if (fsp_is_system_temporary(page_id.space())
+ && buf_page_get_io_fix(bpage) != BUF_IO_NONE) {
+ /* This suggests that the page is being flushed.
+ Avoid returning reference to this page.
+ Instead wait for the flush action to complete. */
+ buf_block_unfix(fix_block);
+ os_thread_sleep(WAIT_FOR_WRITE);
+ goto loop;
+ }
+
if (UNIV_UNLIKELY(mode == BUF_EVICT_IF_IN_POOL)) {
evict_from_pool:
ut_ad(!fix_block->page.oldest_modification);
@@ -3405,24 +4585,19 @@ evict_from_pool:
buf_pool_mutex_enter(buf_pool);
+ /* If not own buf_pool_mutex, page_hash can be changed. */
+ hash_lock = buf_page_hash_lock_get(buf_pool, page_id);
+
rw_lock_x_lock(hash_lock);
/* Buffer-fixing prevents the page_hash from changing. */
- ut_ad(bpage == buf_page_hash_get_low(
- buf_pool, space, offset, fold));
+ ut_ad(bpage == buf_page_hash_get_low(buf_pool, page_id));
- buf_block_mutex_enter(block);
+ buf_block_unfix(fix_block);
+ buf_page_mutex_enter(block);
mutex_enter(&buf_pool->zip_mutex);
- ut_ad(fix_block->page.buf_fix_count > 0);
-
-#ifdef PAGE_ATOMIC_REF_COUNT
- os_atomic_decrement_uint32(&fix_block->page.buf_fix_count, 1);
-#else
- --fix_block->page.buf_fix_count;
-#endif /* PAGE_ATOMIC_REF_COUNT */
-
fix_block = block;
if (bpage->buf_fix_count > 0
@@ -3438,7 +4613,7 @@ evict_from_pool:
buf_LRU_block_free_non_file_page(block);
buf_pool_mutex_exit(buf_pool);
rw_lock_x_unlock(hash_lock);
- buf_block_mutex_exit(block);
+ buf_page_mutex_exit(block);
/* Try again */
goto loop;
@@ -3454,18 +4629,18 @@ evict_from_pool:
buf_block_init_low(block);
- /* Set after relocate(). */
+ /* Set after buf_relocate(). */
block->page.buf_fix_count = 1;
- block->lock_hash_val = lock_rec_hash(space, offset);
+ block->lock_hash_val = lock_rec_hash(page_id.space(),
+ page_id.page_no());
UNIV_MEM_DESC(&block->page.zip.data,
- page_zip_get_size(&block->page.zip));
+ page_zip_get_size(&block->page.zip));
if (buf_page_get_state(&block->page) == BUF_BLOCK_ZIP_PAGE) {
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
- UT_LIST_REMOVE(list, buf_pool->zip_clean,
- &block->page);
+ UT_LIST_REMOVE(buf_pool->zip_clean, &block->page);
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
ut_ad(!block->page.in_flush_list);
} else {
@@ -3487,15 +4662,13 @@ evict_from_pool:
UNIV_MEM_INVALID(bpage, sizeof *bpage);
rw_lock_x_unlock(hash_lock);
-
- ++buf_pool->n_pend_unzip;
-
+ buf_pool->n_pend_unzip++;
mutex_exit(&buf_pool->zip_mutex);
buf_pool_mutex_exit(buf_pool);
access_time = buf_page_is_accessed(&block->page);
- buf_block_mutex_exit(block);
+ buf_page_mutex_exit(block);
buf_page_free_descriptor(bpage);
@@ -3507,39 +4680,34 @@ evict_from_pool:
if (!success) {
buf_pool_mutex_enter(buf_pool);
- buf_block_mutex_enter(fix_block);
+ buf_page_mutex_enter(fix_block);
buf_block_set_io_fix(fix_block, BUF_IO_NONE);
- buf_block_mutex_exit(fix_block);
+ buf_page_mutex_exit(fix_block);
--buf_pool->n_pend_unzip;
buf_block_unfix(fix_block);
buf_pool_mutex_exit(buf_pool);
rw_lock_x_unlock(&fix_block->lock);
- *err = DB_PAGE_CORRUPTED;
+ if (err) {
+ *err = DB_PAGE_CORRUPTED;
+ }
return NULL;
}
}
- if (!recv_no_ibuf_operations) {
- if (access_time) {
-#ifdef UNIV_IBUF_COUNT_DEBUG
- ut_a(ibuf_count_get(space, offset) == 0);
-#endif /* UNIV_IBUF_COUNT_DEBUG */
- } else {
- ibuf_merge_or_delete_for_page(
- block, space, offset, zip_size, TRUE);
- }
+ if (!access_time && !recv_no_ibuf_operations) {
+ ibuf_merge_or_delete_for_page(
+ block, page_id, &page_size, TRUE);
}
buf_pool_mutex_enter(buf_pool);
- /* Unfix and unlatch the block. */
- buf_block_mutex_enter(fix_block);
+ buf_page_mutex_enter(fix_block);
buf_block_set_io_fix(fix_block, BUF_IO_NONE);
- buf_block_mutex_exit(fix_block);
+ buf_page_mutex_exit(fix_block);
--buf_pool->n_pend_unzip;
@@ -3561,10 +4729,8 @@ evict_from_pool:
ut_ad(block == fix_block);
ut_ad(fix_block->page.buf_fix_count > 0);
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(!rw_lock_own(hash_lock, RW_LOCK_EX));
- ut_ad(!rw_lock_own(hash_lock, RW_LOCK_SHARED));
-#endif /* UNIV_SYNC_DEBUG */
+ ut_ad(!rw_lock_own_flagged(hash_lock,
+ RW_LOCK_FLAG_X | RW_LOCK_FLAG_S));
ut_ad(buf_block_get_state(fix_block) == BUF_BLOCK_FILE_PAGE);
@@ -3586,18 +4752,29 @@ evict_from_pool:
are holding the buf_pool->mutex. */
if (buf_LRU_free_page(&fix_block->page, true)) {
+
buf_pool_mutex_exit(buf_pool);
+
+ /* If not own buf_pool_mutex,
+ page_hash can be changed. */
+ hash_lock = buf_page_hash_lock_get(buf_pool, page_id);
+
rw_lock_x_lock(hash_lock);
+ /* If not own buf_pool_mutex,
+ page_hash can be changed. */
+ hash_lock = buf_page_hash_lock_x_confirm(
+ hash_lock, buf_pool, page_id);
+
if (mode == BUF_GET_IF_IN_POOL_OR_WATCH) {
/* Set the watch, as it would have
been set if the page were not in the
buffer pool in the first place. */
block = (buf_block_t*) buf_pool_watch_set(
- space, offset, fold);
+ page_id, &hash_lock);
} else {
block = (buf_block_t*) buf_page_hash_get_low(
- buf_pool, space, offset, fold);
+ buf_pool, page_id);
}
rw_lock_x_unlock(hash_lock);
@@ -3609,20 +4786,22 @@ evict_from_pool:
and before we acquire the hash_lock
above. Try again. */
guess = block;
+
goto loop;
}
return(NULL);
}
- mutex_enter(&fix_block->mutex);
+ buf_page_mutex_enter(fix_block);
if (buf_flush_page_try(buf_pool, fix_block)) {
guess = fix_block;
+
goto loop;
}
- buf_block_mutex_exit(fix_block);
+ buf_page_mutex_exit(fix_block);
buf_block_fix(fix_block);
@@ -3634,30 +4813,40 @@ evict_from_pool:
ut_ad(fix_block->page.buf_fix_count > 0);
-#ifdef UNIV_SYNC_DEBUG
+#ifdef UNIV_DEBUG
/* We have already buffer fixed the page, and we are committed to
- returning this page to the caller. Register for debugging. */
- {
- ibool ret;
- ret = rw_lock_s_lock_nowait(&fix_block->debug_latch, file, line);
+ returning this page to the caller. Register for debugging.
+ Avoid debug latching if page/block belongs to system temporary
+ tablespace (Not much needed for table with single threaded access.). */
+ if (!fsp_is_system_temporary(page_id.space())) {
+ ibool ret;
+ ret = rw_lock_s_lock_nowait(
+ &fix_block->debug_latch, file, line);
ut_a(ret);
}
-#endif /* UNIV_SYNC_DEBUG */
+#endif /* UNIV_DEBUG */
+
+ /* While tablespace is reinited the indexes are already freed but the
+ blocks related to it still resides in buffer pool. Trying to remove
+ such blocks from buffer pool would invoke removal of AHI entries
+ associated with these blocks. Logic to remove AHI entry will try to
+ load the block but block is already in free state. Handle the said case
+ with mode = BUF_PEEK_IF_IN_POOL that is invoked from
+ "btr_search_drop_page_hash_when_freed". */
+ ut_ad(mode == BUF_GET_POSSIBLY_FREED
+ || mode == BUF_PEEK_IF_IN_POOL
+ || !fix_block->page.file_page_was_freed);
-#if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG
- ut_a(mode == BUF_GET_POSSIBLY_FREED
- || !fix_block->page.file_page_was_freed);
-#endif
/* Check if this is the first access to the page */
access_time = buf_page_is_accessed(&fix_block->page);
/* This is a heuristic and we don't care about ordering issues. */
if (access_time == 0) {
- buf_block_mutex_enter(fix_block);
+ buf_page_mutex_enter(fix_block);
buf_page_set_accessed(&fix_block->page);
- buf_block_mutex_exit(fix_block);
+ buf_page_mutex_exit(fix_block);
}
if (mode != BUF_PEEK_IF_IN_POOL) {
@@ -3665,68 +4854,89 @@ evict_from_pool:
}
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
- ut_a(++buf_dbg_counter % 5771 || buf_validate());
- ut_a(fix_block->page.buf_fix_count > 0);
+ ut_a(fsp_skip_sanity_check(page_id.space())
+ || ++buf_dbg_counter % 5771
+ || buf_validate());
ut_a(buf_block_get_state(fix_block) == BUF_BLOCK_FILE_PAGE);
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
-#ifdef PAGE_ATOMIC_REF_COUNT
/* We have to wait here because the IO_READ state was set
- under the protection of the hash_lock and the block->mutex
- but not the block->lock. */
+ under the protection of the hash_lock and not the block->mutex
+ and block->lock. */
buf_wait_for_read(fix_block);
-#endif /* PAGE_ATOMIC_REF_COUNT */
- switch (rw_latch) {
- case RW_NO_LATCH:
+ if (fix_block->page.id != page_id) {
-#ifndef PAGE_ATOMIC_REF_COUNT
- buf_wait_for_read(fix_block);
-#endif /* !PAGE_ATOMIC_REF_COUNT */
-
- fix_type = MTR_MEMO_BUF_FIX;
- break;
-
- case RW_S_LATCH:
- rw_lock_s_lock_inline(&fix_block->lock, 0, file, line);
+ buf_block_unfix(fix_block);
- fix_type = MTR_MEMO_PAGE_S_FIX;
- break;
+#ifdef UNIV_DEBUG
+ if (!fsp_is_system_temporary(page_id.space())) {
+ rw_lock_s_unlock(&fix_block->debug_latch);
+ }
+#endif /* UNIV_DEBUG */
- default:
- ut_ad(rw_latch == RW_X_LATCH);
- rw_lock_x_lock_inline(&fix_block->lock, 0, file, line);
+ if (err) {
+ *err = DB_PAGE_CORRUPTED;
+ }
- fix_type = MTR_MEMO_PAGE_X_FIX;
- break;
+ return NULL;
}
- mtr_memo_push(mtr, fix_block, fix_type);
+ fix_block = buf_page_mtr_lock(fix_block, rw_latch, mtr, file, line);
if (mode != BUF_PEEK_IF_IN_POOL && !access_time) {
/* In the case of a first access, try to apply linear
read-ahead */
- buf_read_ahead_linear(
- space, zip_size, offset, ibuf_inside(mtr));
+ buf_read_ahead_linear(page_id, page_size, ibuf_inside(mtr));
}
-#ifdef UNIV_IBUF_COUNT_DEBUG
- ut_a(ibuf_count_get(buf_block_get_space(fix_block),
- buf_block_get_page_no(fix_block)) == 0);
-#endif
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(!rw_lock_own(hash_lock, RW_LOCK_EX));
- ut_ad(!rw_lock_own(hash_lock, RW_LOCK_SHARED));
-#endif /* UNIV_SYNC_DEBUG */
+ ut_ad(!rw_lock_own_flagged(hash_lock,
+ RW_LOCK_FLAG_X | RW_LOCK_FLAG_S));
+
return(fix_block);
}
+/** This is the general function used to get access to a database page.
+It does page initialization and applies the buffered redo logs.
+@param[in] page_id page id
+@param[in] rw_latch RW_S_LATCH, RW_X_LATCH, RW_NO_LATCH
+@param[in] guess guessed block or NULL
+@param[in] mode BUF_GET, BUF_GET_IF_IN_POOL,
+BUF_PEEK_IF_IN_POOL, BUF_GET_NO_LATCH, or BUF_GET_IF_IN_POOL_OR_WATCH
+@param[in] file file name
+@param[in] line line where called
+@param[in] mtr mini-transaction
+@param[out] err DB_SUCCESS or error code
+@return pointer to the block or NULL */
+buf_block_t*
+buf_page_get_gen(
+ const page_id_t page_id,
+ const page_size_t& page_size,
+ ulint rw_latch,
+ buf_block_t* guess,
+ ulint mode,
+ const char* file,
+ unsigned line,
+ mtr_t* mtr,
+ dberr_t* err)
+{
+ if (buf_block_t *block = recv_recovery_create_page(page_id))
+ {
+ buf_block_fix(block);
+ ut_ad(rw_lock_s_lock_nowait(&block->debug_latch, file, line));
+ block= buf_page_mtr_lock(block, rw_latch, mtr, file, line);
+ return block;
+ }
+
+ return buf_page_get_low(page_id, page_size, rw_latch,
+ guess, mode, file, line, mtr, err);
+}
+
/********************************************************************//**
This is the general function used to get optimistic access to a database
page.
-@return TRUE if success */
-UNIV_INTERN
+@return TRUE if success */
ibool
buf_page_optimistic_get(
/*====================*/
@@ -3734,24 +4944,23 @@ buf_page_optimistic_get(
buf_block_t* block, /*!< in: guessed buffer block */
ib_uint64_t modify_clock,/*!< in: modify clock value */
const char* file, /*!< in: file name */
- ulint line, /*!< in: line where called */
+ unsigned line, /*!< in: line where called */
mtr_t* mtr) /*!< in: mini-transaction */
{
buf_pool_t* buf_pool;
unsigned access_time;
ibool success;
- ulint fix_type;
ut_ad(block);
ut_ad(mtr);
- ut_ad(mtr->state == MTR_ACTIVE);
+ ut_ad(mtr->is_active());
ut_ad((rw_latch == RW_S_LATCH) || (rw_latch == RW_X_LATCH));
- mutex_enter(&block->mutex);
+ buf_page_mutex_enter(block);
if (UNIV_UNLIKELY(buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE)) {
- mutex_exit(&block->mutex);
+ buf_page_mutex_exit(block);
return(FALSE);
}
@@ -3762,73 +4971,71 @@ buf_page_optimistic_get(
buf_page_set_accessed(&block->page);
- mutex_exit(&block->mutex);
+ buf_page_mutex_exit(block);
buf_page_make_young_if_needed(&block->page);
ut_ad(!ibuf_inside(mtr)
- || ibuf_page(buf_block_get_space(block),
- buf_block_get_zip_size(block),
- buf_block_get_page_no(block), NULL));
+ || ibuf_page(block->page.id, block->page.size, NULL));
+
+ mtr_memo_type_t fix_type;
+
+ switch (rw_latch) {
+ case RW_S_LATCH:
+ success = rw_lock_s_lock_nowait(&block->lock, file, line);
- if (rw_latch == RW_S_LATCH) {
- success = rw_lock_s_lock_nowait(&(block->lock),
- file, line);
fix_type = MTR_MEMO_PAGE_S_FIX;
- } else {
- success = rw_lock_x_lock_func_nowait_inline(&(block->lock),
- file, line);
+ break;
+ case RW_X_LATCH:
+ success = rw_lock_x_lock_func_nowait_inline(
+ &block->lock, file, line);
+
fix_type = MTR_MEMO_PAGE_X_FIX;
+ break;
+ default:
+ ut_error; /* RW_SX_LATCH is not implemented yet */
}
- if (UNIV_UNLIKELY(!success)) {
+ if (!success) {
buf_block_buf_fix_dec(block);
-
return(FALSE);
}
- if (UNIV_UNLIKELY(modify_clock != block->modify_clock)) {
+ if (modify_clock != block->modify_clock) {
+
buf_block_dbg_add_level(block, SYNC_NO_ORDER_CHECK);
if (rw_latch == RW_S_LATCH) {
- rw_lock_s_unlock(&(block->lock));
+ rw_lock_s_unlock(&block->lock);
} else {
- rw_lock_x_unlock(&(block->lock));
+ rw_lock_x_unlock(&block->lock);
}
buf_block_buf_fix_dec(block);
-
return(FALSE);
}
mtr_memo_push(mtr, block, fix_type);
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
- ut_a(++buf_dbg_counter % 5771 || buf_validate());
+ ut_a(fsp_skip_sanity_check(block->page.id.space())
+ || ++buf_dbg_counter % 5771
+ || buf_validate());
ut_a(block->page.buf_fix_count > 0);
ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
-#if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG
- mutex_enter(&block->mutex);
- ut_a(!block->page.file_page_was_freed);
- mutex_exit(&block->mutex);
-#endif
+ ut_d(buf_page_mutex_enter(block));
+ ut_ad(!block->page.file_page_was_freed);
+ ut_d(buf_page_mutex_exit(block));
if (!access_time) {
/* In the case of a first access, try to apply linear
read-ahead */
-
- buf_read_ahead_linear(buf_block_get_space(block),
- buf_block_get_zip_size(block),
- buf_block_get_page_no(block),
+ buf_read_ahead_linear(block->page.id, block->page.size,
ibuf_inside(mtr));
}
-#ifdef UNIV_IBUF_COUNT_DEBUG
- ut_a(ibuf_count_get(buf_block_get_space(block),
- buf_block_get_page_no(block)) == 0);
-#endif
buf_pool = buf_pool_from_block(block);
buf_pool->stat.n_page_gets++;
@@ -3839,8 +5046,7 @@ buf_page_optimistic_get(
This is used to get access to a known database page, when no waiting can be
done. For example, if a search in an adaptive hash index leads us to this
frame.
-@return TRUE if success */
-UNIV_INTERN
+@return TRUE if success */
ibool
buf_page_get_known_nowait(
/*======================*/
@@ -3848,18 +5054,16 @@ buf_page_get_known_nowait(
buf_block_t* block, /*!< in: the known page */
ulint mode, /*!< in: BUF_MAKE_YOUNG or BUF_KEEP_OLD */
const char* file, /*!< in: file name */
- ulint line, /*!< in: line where called */
+ unsigned line, /*!< in: line where called */
mtr_t* mtr) /*!< in: mini-transaction */
{
buf_pool_t* buf_pool;
ibool success;
- ulint fix_type;
- ut_ad(mtr);
- ut_ad(mtr->state == MTR_ACTIVE);
+ ut_ad(mtr->is_active());
ut_ad((rw_latch == RW_S_LATCH) || (rw_latch == RW_X_LATCH));
- mutex_enter(&block->mutex);
+ buf_page_mutex_enter(block);
if (buf_block_get_state(block) == BUF_BLOCK_REMOVE_HASH) {
/* Another thread is just freeing the block from the LRU list
@@ -3869,7 +5073,7 @@ buf_page_get_known_nowait(
we have already removed it from the page address hash table
of the buffer pool. */
- mutex_exit(&block->mutex);
+ buf_page_mutex_exit(block);
return(FALSE);
}
@@ -3880,7 +5084,7 @@ buf_page_get_known_nowait(
buf_page_set_accessed(&block->page);
- mutex_exit(&block->mutex);
+ buf_page_mutex_exit(block);
buf_pool = buf_pool_from_block(block);
@@ -3890,19 +5094,25 @@ buf_page_get_known_nowait(
ut_ad(!ibuf_inside(mtr) || mode == BUF_KEEP_OLD);
- if (rw_latch == RW_S_LATCH) {
- success = rw_lock_s_lock_nowait(&(block->lock),
- file, line);
+ mtr_memo_type_t fix_type;
+
+ switch (rw_latch) {
+ case RW_S_LATCH:
+ success = rw_lock_s_lock_nowait(&block->lock, file, line);
fix_type = MTR_MEMO_PAGE_S_FIX;
- } else {
- success = rw_lock_x_lock_func_nowait_inline(&(block->lock),
- file, line);
+ break;
+ case RW_X_LATCH:
+ success = rw_lock_x_lock_func_nowait_inline(
+ &block->lock, file, line);
+
fix_type = MTR_MEMO_PAGE_X_FIX;
+ break;
+ default:
+ ut_error; /* RW_SX_LATCH is not implemented yet */
}
if (!success) {
buf_block_buf_fix_dec(block);
-
return(FALSE);
}
@@ -3913,7 +5123,8 @@ buf_page_get_known_nowait(
ut_a(block->page.buf_fix_count > 0);
ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
-#if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG
+
+#ifdef UNIV_DEBUG
if (mode != BUF_KEEP_OLD) {
/* If mode == BUF_KEEP_OLD, we are executing an I/O
completion routine. Avoid a bogus assertion failure
@@ -3922,50 +5133,41 @@ buf_page_get_known_nowait(
deleting a record from SYS_INDEXES. This check will be
skipped in recv_recover_page() as well. */
- mutex_enter(&block->mutex);
+ buf_page_mutex_enter(block);
ut_a(!block->page.file_page_was_freed);
- mutex_exit(&block->mutex);
+ buf_page_mutex_exit(block);
}
-#endif
+#endif /* UNIV_DEBUG */
-#ifdef UNIV_IBUF_COUNT_DEBUG
- ut_a((mode == BUF_KEEP_OLD)
- || (ibuf_count_get(buf_block_get_space(block),
- buf_block_get_page_no(block)) == 0));
-#endif
buf_pool->stat.n_page_gets++;
return(TRUE);
}
-/*******************************************************************//**
-Given a tablespace id and page number tries to get that page. If the
+/** Given a tablespace id and page number tries to get that page. If the
page is not in the buffer pool it is not loaded and NULL is returned.
Suitable for using when holding the lock_sys_t::mutex.
-@return pointer to a page or NULL */
-UNIV_INTERN
+@param[in] page_id page id
+@param[in] file file name
+@param[in] line line where called
+@param[in] mtr mini-transaction
+@return pointer to a page or NULL */
buf_block_t*
buf_page_try_get_func(
-/*==================*/
- ulint space_id,/*!< in: tablespace id */
- ulint page_no,/*!< in: page number */
- ulint rw_latch,/*!< in: RW_S_LATCH, RW_X_LATCH */
- bool possibly_freed,
- const char* file, /*!< in: file name */
- ulint line, /*!< in: line where called */
- mtr_t* mtr) /*!< in: mini-transaction */
+ const page_id_t page_id,
+ const char* file,
+ unsigned line,
+ mtr_t* mtr)
{
buf_block_t* block;
ibool success;
- ulint fix_type;
- buf_pool_t* buf_pool = buf_pool_get(space_id, page_no);
+ buf_pool_t* buf_pool = buf_pool_get(page_id);
rw_lock_t* hash_lock;
ut_ad(mtr);
- ut_ad(mtr->state == MTR_ACTIVE);
+ ut_ad(mtr->is_active());
- block = buf_block_hash_get_s_locked(buf_pool, space_id,
- page_no, &hash_lock);
+ block = buf_block_hash_get_s_locked(buf_pool, page_id, &hash_lock);
if (!block || buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE) {
if (block) {
@@ -3976,24 +5178,19 @@ buf_page_try_get_func(
ut_ad(!buf_pool_watch_is_sentinel(buf_pool, &block->page));
- mutex_enter(&block->mutex);
+ buf_page_mutex_enter(block);
rw_lock_s_unlock(hash_lock);
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
- ut_a(buf_block_get_space(block) == space_id);
- ut_a(buf_block_get_page_no(block) == page_no);
+ ut_a(page_id == block->page.id);
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
buf_block_buf_fix_inc(block, file, line);
- mutex_exit(&block->mutex);
+ buf_page_mutex_exit(block);
- if (rw_latch == RW_S_LATCH) {
- fix_type = MTR_MEMO_PAGE_S_FIX;
- success = rw_lock_s_lock_nowait(&block->lock, file, line);
- } else {
- success = false;
- }
+ mtr_memo_type_t fix_type = MTR_MEMO_PAGE_S_FIX;
+ success = rw_lock_s_lock_nowait(&block->lock, file, line);
if (!success) {
/* Let us try to get an X-latch. If the current thread
@@ -4007,32 +5204,27 @@ buf_page_try_get_func(
if (!success) {
buf_block_buf_fix_dec(block);
-
return(NULL);
}
mtr_memo_push(mtr, block, fix_type);
+
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
- ut_a(++buf_dbg_counter % 5771 || buf_validate());
+ ut_a(fsp_skip_sanity_check(block->page.id.space())
+ || ++buf_dbg_counter % 5771
+ || buf_validate());
ut_a(block->page.buf_fix_count > 0);
ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
-#if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG
- if (!possibly_freed) {
- mutex_enter(&block->mutex);
- ut_a(!block->page.file_page_was_freed);
- mutex_exit(&block->mutex);
- }
-#endif /* UNIV_DEBUG_FILE_ACCESSES || UNIV_DEBUG */
+
+ ut_d(buf_page_mutex_enter(block));
+ ut_d(ut_a(!block->page.file_page_was_freed));
+ ut_d(buf_page_mutex_exit(block));
+
buf_block_dbg_add_level(block, SYNC_NO_ORDER_CHECK);
buf_pool->stat.n_page_gets++;
-#ifdef UNIV_IBUF_COUNT_DEBUG
- ut_a(ibuf_count_get(buf_block_get_space(block),
- buf_block_get_page_no(block)) == 0);
-#endif
-
return(block);
}
@@ -4053,48 +5245,42 @@ buf_page_init_low(
bpage->newest_modification = 0;
bpage->oldest_modification = 0;
bpage->write_size = 0;
- bpage->encrypted = false;
bpage->real_size = 0;
bpage->slot = NULL;
HASH_INVALIDATE(bpage, hash);
-#if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG
- bpage->file_page_was_freed = FALSE;
-#endif /* UNIV_DEBUG_FILE_ACCESSES || UNIV_DEBUG */
+
+ ut_d(bpage->file_page_was_freed = FALSE);
}
-/********************************************************************//**
-Inits a page to the buffer buf_pool. */
-static MY_ATTRIBUTE((nonnull))
+/** Inits a page to the buffer buf_pool.
+@param[in,out] buf_pool buffer pool
+@param[in] page_id page id
+@param[in,out] block block to init */
+static
void
buf_page_init(
-/*==========*/
- buf_pool_t* buf_pool,/*!< in/out: buffer pool */
- ulint space, /*!< in: space id */
- ulint offset, /*!< in: offset of the page within space
- in units of a page */
- ulint fold, /*!< in: buf_page_address_fold(space,offset) */
- ulint zip_size,/*!< in: compressed page size, or 0 */
- buf_block_t* block) /*!< in/out: block to init */
+ buf_pool_t* buf_pool,
+ const page_id_t page_id,
+ const page_size_t& page_size,
+ buf_block_t* block)
{
buf_page_t* hash_page;
- ut_ad(buf_pool == buf_pool_get(space, offset));
+ ut_ad(buf_pool == buf_pool_get(page_id));
ut_ad(buf_pool_mutex_own(buf_pool));
- ut_ad(mutex_own(&(block->mutex)));
+ ut_ad(buf_page_mutex_own(block));
ut_a(buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE);
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(rw_lock_own(buf_page_hash_lock_get(buf_pool, fold),
- RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
+ ut_ad(rw_lock_own(buf_page_hash_lock_get(buf_pool, page_id),
+ RW_LOCK_X));
/* Set the state of the block */
- buf_block_set_file_page(block, space, offset);
+ buf_block_set_file_page(block, page_id);
#ifdef UNIV_DEBUG_VALGRIND
- if (!space) {
+ if (is_system_tablespace(page_id.space())) {
/* Silence valid Valgrind warnings about uninitialized
data being written to data files. There are some unused
bytes on some pages that InnoDB does not initialize. */
@@ -4104,44 +5290,38 @@ buf_page_init(
buf_block_init_low(block);
- block->lock_hash_val = lock_rec_hash(space, offset);
+ block->lock_hash_val = lock_rec_hash(page_id.space(),
+ page_id.page_no());
buf_page_init_low(&block->page);
/* Insert into the hash table of file pages */
- hash_page = buf_page_hash_get_low(buf_pool, space, offset, fold);
+ hash_page = buf_page_hash_get_low(buf_pool, page_id);
if (hash_page == NULL) {
- /* Block not found in the hash table */
+ /* Block not found in hash table */
} else if (buf_pool_watch_is_sentinel(buf_pool, hash_page)) {
+ /* Preserve the reference count. */
ib_uint32_t buf_fix_count = hash_page->buf_fix_count;
- ut_a(buf_fix_count > 0);
+ ut_a(buf_fix_count > 0);
-#ifdef PAGE_ATOMIC_REF_COUNT
- os_atomic_increment_uint32(
- &block->page.buf_fix_count, buf_fix_count);
-#else
- block->page.buf_fix_count += ulint(buf_fix_count);
-#endif /* PAGE_ATOMIC_REF_COUNT */
+ my_atomic_add32((int32*) &block->page.buf_fix_count, buf_fix_count);
- buf_pool_watch_remove(buf_pool, fold, hash_page);
+ buf_pool_watch_remove(buf_pool, hash_page);
} else {
- fprintf(stderr,
- "InnoDB: Error: page %lu %lu already found"
- " in the hash table: %p, %p\n",
- space,
- offset,
- (const void*) hash_page, (const void*) block);
-#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
- mutex_exit(&block->mutex);
- buf_pool_mutex_exit(buf_pool);
- buf_print();
- buf_LRU_print();
- buf_validate();
- buf_LRU_validate();
-#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
+
+ ib::error() << "Page " << page_id
+ << " already found in the hash table: "
+ << hash_page << ", " << block;
+
+ ut_d(buf_page_mutex_exit(block));
+ ut_d(buf_pool_mutex_exit(buf_pool));
+ ut_d(buf_print());
+ ut_d(buf_LRU_print());
+ ut_d(buf_validate());
+ ut_d(buf_LRU_validate());
ut_error;
}
@@ -4149,15 +5329,18 @@ buf_page_init(
ut_ad(!block->page.in_page_hash);
ut_d(block->page.in_page_hash = TRUE);
- HASH_INSERT(buf_page_t, hash, buf_pool->page_hash, fold, &block->page);
+ block->page.id = page_id;
+ block->page.size.copy_from(page_size);
- if (zip_size) {
- page_zip_set_size(&block->page.zip, zip_size);
+ HASH_INSERT(buf_page_t, hash, buf_pool->page_hash,
+ page_id.fold(), &block->page);
+
+ if (page_size.is_compressed()) {
+ page_zip_set_size(&block->page.zip, page_size.physical());
}
}
-/********************************************************************//**
-Function which inits a page for read to the buffer buf_pool. If the page is
+/** Initialize a page for read to the buffer buf_pool. If the page is
(1) already in buf_pool, or
(2) if we specify to read only ibuf pages and the page is not an ibuf page, or
(3) if the space is deleted or being deleted,
@@ -4165,31 +5348,29 @@ then this function does nothing.
Sets the io_fix flag to BUF_IO_READ and sets a non-recursive exclusive lock
on the buffer frame. The io-handler must take care that the flag is cleared
and the lock released later.
-@return pointer to the block or NULL */
-UNIV_INTERN
+@param[out] err DB_SUCCESS or DB_TABLESPACE_DELETED
+@param[in] mode BUF_READ_IBUF_PAGES_ONLY, ...
+@param[in] page_id page id
+@param[in] unzip whether the uncompressed page is
+ requested (for ROW_FORMAT=COMPRESSED)
+@return pointer to the block
+@retval NULL in case of an error */
buf_page_t*
buf_page_init_for_read(
-/*===================*/
- dberr_t* err, /*!< out: DB_SUCCESS or DB_TABLESPACE_DELETED */
- ulint mode, /*!< in: BUF_READ_IBUF_PAGES_ONLY, ... */
- ulint space, /*!< in: space id */
- ulint zip_size,/*!< in: compressed page size, or 0 */
- ibool unzip, /*!< in: TRUE=request uncompressed page */
- ib_int64_t tablespace_version,
- /*!< in: prevents reading from a wrong
- version of the tablespace in case we have done
- DISCARD + IMPORT */
- ulint offset) /*!< in: page number */
+ dberr_t* err,
+ ulint mode,
+ const page_id_t page_id,
+ const page_size_t& page_size,
+ bool unzip)
{
buf_block_t* block;
buf_page_t* bpage = NULL;
buf_page_t* watch_page;
rw_lock_t* hash_lock;
mtr_t mtr;
- ulint fold;
ibool lru = FALSE;
void* data;
- buf_pool_t* buf_pool = buf_pool_get(space, offset);
+ buf_pool_t* buf_pool = buf_pool_get(page_id);
ut_ad(buf_pool);
@@ -4198,12 +5379,12 @@ buf_page_init_for_read(
if (mode == BUF_READ_IBUF_PAGES_ONLY) {
/* It is a read-ahead within an ibuf routine */
- ut_ad(!ibuf_bitmap_page(zip_size, offset));
+ ut_ad(!ibuf_bitmap_page(page_id, page_size));
ibuf_mtr_start(&mtr);
- if (!recv_no_ibuf_operations
- && !ibuf_page(space, zip_size, offset, &mtr)) {
+ if (!recv_no_ibuf_operations &&
+ !ibuf_page(page_id, page_size, &mtr)) {
ibuf_mtr_commit(&mtr);
@@ -4213,7 +5394,7 @@ buf_page_init_for_read(
ut_ad(mode == BUF_READ_ANY_PAGE);
}
- if (zip_size && !unzip && !recv_recovery_is_on()) {
+ if (page_size.is_compressed() && !unzip && !recv_recovery_is_on()) {
block = NULL;
} else {
block = buf_LRU_get_free_block(buf_pool);
@@ -4221,53 +5402,40 @@ buf_page_init_for_read(
ut_ad(buf_pool_from_block(block) == buf_pool);
}
- fold = buf_page_address_fold(space, offset);
- hash_lock = buf_page_hash_lock_get(buf_pool, fold);
-
buf_pool_mutex_enter(buf_pool);
+
+ hash_lock = buf_page_hash_lock_get(buf_pool, page_id);
rw_lock_x_lock(hash_lock);
- watch_page = buf_page_hash_get_low(buf_pool, space, offset, fold);
+ watch_page = buf_page_hash_get_low(buf_pool, page_id);
if (watch_page && !buf_pool_watch_is_sentinel(buf_pool, watch_page)) {
/* The page is already in the buffer pool. */
watch_page = NULL;
-err_exit:
rw_lock_x_unlock(hash_lock);
if (block) {
- mutex_enter(&block->mutex);
+ buf_page_mutex_enter(block);
buf_LRU_block_free_non_file_page(block);
- mutex_exit(&block->mutex);
+ buf_page_mutex_exit(block);
}
bpage = NULL;
goto func_exit;
}
- if (fil_tablespace_deleted_or_being_deleted_in_mem(
- space, tablespace_version)) {
- /* The page belongs to a space which has been
- deleted or is being deleted. */
- *err = DB_TABLESPACE_DELETED;
-
- goto err_exit;
- }
-
if (block) {
bpage = &block->page;
- mutex_enter(&block->mutex);
+ buf_page_mutex_enter(block);
ut_ad(buf_pool_from_bpage(bpage) == buf_pool);
- buf_page_init(buf_pool, space, offset, fold, zip_size, block);
+ buf_page_init(buf_pool, page_id, page_size, block);
-#ifdef PAGE_ATOMIC_REF_COUNT
- /* Note: We set the io state without the protection of
- the block->lock. This is because other threads cannot
- access this block unless it is in the hash table. */
+ /* Note: We are using the hash_lock for protection. This is
+ safe because no other thread can lookup the block from the
+ page hashtable yet. */
buf_page_set_io_fix(bpage, BUF_IO_READ);
-#endif /* PAGE_ATOMIC_REF_COUNT */
rw_lock_x_unlock(hash_lock);
@@ -4285,11 +5453,7 @@ err_exit:
rw_lock_x_lock_gen(&block->lock, BUF_IO_READ);
-#ifndef PAGE_ATOMIC_REF_COUNT
- buf_page_set_io_fix(bpage, BUF_IO_READ);
-#endif /* !PAGE_ATOMIC_REF_COUNT */
-
- if (zip_size) {
+ if (page_size.is_compressed()) {
/* buf_pool->mutex may be released and
reacquired by buf_buddy_alloc(). Thus, we
must release block->mutex in order not to
@@ -4298,9 +5462,10 @@ err_exit:
operation until after the block descriptor has
been added to buf_pool->LRU and
buf_pool->page_hash. */
- mutex_exit(&block->mutex);
- data = buf_buddy_alloc(buf_pool, zip_size, &lru);
- mutex_enter(&block->mutex);
+ buf_page_mutex_exit(block);
+ data = buf_buddy_alloc(buf_pool, page_size.physical(),
+ &lru);
+ buf_page_mutex_enter(block);
block->page.zip.data = (page_zip_t*) data;
/* To maintain the invariant
@@ -4312,7 +5477,7 @@ err_exit:
buf_unzip_LRU_add_block(block, TRUE);
}
- mutex_exit(&block->mutex);
+ buf_page_mutex_exit(block);
} else {
rw_lock_x_unlock(hash_lock);
@@ -4320,7 +5485,7 @@ err_exit:
control block (bpage), in order to avoid the
invocation of buf_buddy_relocate_block() on
uninitialized data. */
- data = buf_buddy_alloc(buf_pool, zip_size, &lru);
+ data = buf_buddy_alloc(buf_pool, page_size.physical(), &lru);
rw_lock_x_lock(hash_lock);
@@ -4329,8 +5494,7 @@ err_exit:
check the page_hash again, as it may have been modified. */
if (UNIV_UNLIKELY(lru)) {
- watch_page = buf_page_hash_get_low(
- buf_pool, space, offset, fold);
+ watch_page = buf_page_hash_get_low(buf_pool, page_id);
if (UNIV_UNLIKELY(watch_page
&& !buf_pool_watch_is_sentinel(buf_pool,
@@ -4339,7 +5503,8 @@ err_exit:
/* The block was added by some other thread. */
rw_lock_x_unlock(hash_lock);
watch_page = NULL;
- buf_buddy_free(buf_pool, data, zip_size);
+ buf_buddy_free(buf_pool, data,
+ page_size.physical());
bpage = NULL;
goto func_exit;
@@ -4352,28 +5517,25 @@ err_exit:
bpage->buf_pool_index = buf_pool_index(buf_pool);
page_zip_des_init(&bpage->zip);
- page_zip_set_size(&bpage->zip, zip_size);
+ page_zip_set_size(&bpage->zip, page_size.physical());
bpage->zip.data = (page_zip_t*) data;
- bpage->slot = NULL;
+ bpage->size.copy_from(page_size);
mutex_enter(&buf_pool->zip_mutex);
- UNIV_MEM_DESC(bpage->zip.data,
- page_zip_get_size(&bpage->zip));
+ UNIV_MEM_DESC(bpage->zip.data, bpage->size.physical());
buf_page_init_low(bpage);
- bpage->state = BUF_BLOCK_ZIP_PAGE;
- bpage->space = static_cast<ib_uint32_t>(space);
- bpage->offset = static_cast<ib_uint32_t>(offset);
+ bpage->state = BUF_BLOCK_ZIP_PAGE;
+ bpage->id = page_id;
+ bpage->flush_observer = NULL;
-#ifdef UNIV_DEBUG
- bpage->in_page_hash = FALSE;
- bpage->in_zip_hash = FALSE;
- bpage->in_flush_list = FALSE;
- bpage->in_free_list = FALSE;
- bpage->in_LRU_list = FALSE;
-#endif /* UNIV_DEBUG */
+ ut_d(bpage->in_page_hash = FALSE);
+ ut_d(bpage->in_zip_hash = FALSE);
+ ut_d(bpage->in_flush_list = FALSE);
+ ut_d(bpage->in_free_list = FALSE);
+ ut_d(bpage->in_LRU_list = FALSE);
ut_d(bpage->in_page_hash = TRUE);
@@ -4386,24 +5548,19 @@ err_exit:
ut_a(buf_fix_count > 0);
-#ifdef PAGE_ATOMIC_REF_COUNT
- os_atomic_increment_uint32(
- &bpage->buf_fix_count, buf_fix_count);
-#else
- bpage->buf_fix_count += buf_fix_count;
-#endif /* PAGE_ATOMIC_REF_COUNT */
+ my_atomic_add32((int32*) &bpage->buf_fix_count, buf_fix_count);
ut_ad(buf_pool_watch_is_sentinel(buf_pool, watch_page));
- buf_pool_watch_remove(buf_pool, fold, watch_page);
+ buf_pool_watch_remove(buf_pool, watch_page);
}
- HASH_INSERT(buf_page_t, hash, buf_pool->page_hash, fold,
- bpage);
+ HASH_INSERT(buf_page_t, hash, buf_pool->page_hash,
+ bpage->id.fold(), bpage);
rw_lock_x_unlock(hash_lock);
/* The block must be put to the LRU list, to the old blocks.
- The zip_size is already set into the page zip */
+ The zip size is already set into the page zip */
buf_LRU_add_block(bpage, TRUE/* to old blocks */);
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
buf_LRU_insert_zip_clean(bpage);
@@ -4423,63 +5580,49 @@ func_exit:
ibuf_mtr_commit(&mtr);
}
-
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(!rw_lock_own(hash_lock, RW_LOCK_EX));
- ut_ad(!rw_lock_own(hash_lock, RW_LOCK_SHARED));
-#endif /* UNIV_SYNC_DEBUG */
-
+ ut_ad(!rw_lock_own_flagged(hash_lock,
+ RW_LOCK_FLAG_X | RW_LOCK_FLAG_S));
ut_ad(!bpage || buf_page_in_file(bpage));
+
return(bpage);
}
-/********************************************************************//**
-Initializes a page to the buffer buf_pool. The page is usually not read
+/** Initializes a page to the buffer buf_pool. The page is usually not read
from a file even if it cannot be found in the buffer buf_pool. This is one
of the functions which perform to a block a state transition NOT_USED =>
FILE_PAGE (the other is buf_page_get_gen).
-@return pointer to the block, page bufferfixed */
-UNIV_INTERN
+@param[in] page_id page id
+@param[in] page_size page size
+@param[in] mtr mini-transaction
+@return pointer to the block, page bufferfixed */
buf_block_t*
buf_page_create(
-/*============*/
- ulint space, /*!< in: space id */
- ulint offset, /*!< in: offset of the page within space in units of
- a page */
- ulint zip_size,/*!< in: compressed page size, or 0 */
- mtr_t* mtr) /*!< in: mini-transaction handle */
+ const page_id_t page_id,
+ const page_size_t& page_size,
+ mtr_t* mtr)
{
buf_frame_t* frame;
buf_block_t* block;
- ulint fold;
buf_block_t* free_block = NULL;
- buf_pool_t* buf_pool = buf_pool_get(space, offset);
+ buf_pool_t* buf_pool = buf_pool_get(page_id);
rw_lock_t* hash_lock;
- ut_ad(mtr);
- ut_ad(mtr->state == MTR_ACTIVE);
- ut_ad(space || !zip_size);
+ ut_ad(mtr->is_active());
+ ut_ad(page_id.space() != 0 || !page_size.is_compressed());
free_block = buf_LRU_get_free_block(buf_pool);
- fold = buf_page_address_fold(space, offset);
- hash_lock = buf_page_hash_lock_get(buf_pool, fold);
-
buf_pool_mutex_enter(buf_pool);
+
+ hash_lock = buf_page_hash_lock_get(buf_pool, page_id);
rw_lock_x_lock(hash_lock);
- block = (buf_block_t*) buf_page_hash_get_low(
- buf_pool, space, offset, fold);
+ block = (buf_block_t*) buf_page_hash_get_low(buf_pool, page_id);
if (block
&& buf_page_in_file(&block->page)
&& !buf_pool_watch_is_sentinel(buf_pool, &block->page)) {
-#ifdef UNIV_IBUF_COUNT_DEBUG
- ut_a(ibuf_count_get(space, offset) == 0);
-#endif
-#if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG
- block->page.file_page_was_freed = FALSE;
-#endif /* UNIV_DEBUG_FILE_ACCESSES || UNIV_DEBUG */
+ ut_d(block->page.file_page_was_freed = FALSE);
/* Page can be found in buf_pool */
buf_pool_mutex_exit(buf_pool);
@@ -4487,23 +5630,27 @@ buf_page_create(
buf_block_free(free_block);
- return(buf_page_get_with_no_latch(space, zip_size, offset, mtr));
+ if (!recv_recovery_is_on()) {
+ return buf_page_get_with_no_latch(page_id, page_size,
+ mtr);
+ }
+
+ mutex_exit(&recv_sys->mutex);
+ block = buf_page_get_with_no_latch(page_id, page_size, mtr);
+ mutex_enter(&recv_sys->mutex);
+ return block;
}
/* If we get here, the page was not in buf_pool: init it there */
-#ifdef UNIV_DEBUG
- if (buf_debug_prints) {
- fprintf(stderr, "Creating space %lu page %lu to buffer\n",
- space, offset);
- }
-#endif /* UNIV_DEBUG */
+ DBUG_PRINT("ib_buf", ("create page %u:%u",
+ page_id.space(), page_id.page_no()));
block = free_block;
- mutex_enter(&block->mutex);
+ buf_page_mutex_enter(block);
- buf_page_init(buf_pool, space, offset, fold, zip_size, block);
+ buf_page_init(buf_pool, page_id, page_size, block);
rw_lock_x_unlock(hash_lock);
@@ -4513,7 +5660,7 @@ buf_page_create(
buf_block_buf_fix_inc(block, __FILE__, __LINE__);
buf_pool->stat.n_pages_created++;
- if (zip_size) {
+ if (page_size.is_compressed()) {
void* data;
ibool lru;
@@ -4524,15 +5671,15 @@ buf_page_create(
buf_page_set_io_fix(&block->page, BUF_IO_READ);
rw_lock_x_lock(&block->lock);
- mutex_exit(&block->mutex);
+ buf_page_mutex_exit(block);
/* buf_pool->mutex may be released and reacquired by
buf_buddy_alloc(). Thus, we must release block->mutex
in order not to break the latching order in
the reacquisition of buf_pool->mutex. We also must
defer this operation until after the block descriptor
has been added to buf_pool->LRU and buf_pool->page_hash. */
- data = buf_buddy_alloc(buf_pool, zip_size, &lru);
- mutex_enter(&block->mutex);
+ data = buf_buddy_alloc(buf_pool, page_size.physical(), &lru);
+ buf_page_mutex_enter(block);
block->page.zip.data = (page_zip_t*) data;
/* To maintain the invariant
@@ -4553,12 +5700,13 @@ buf_page_create(
buf_page_set_accessed(&block->page);
- mutex_exit(&block->mutex);
+ buf_page_mutex_exit(block);
/* Delete possible entries for the page from the insert buffer:
such can exist if the page belonged to an index which was dropped */
-
- ibuf_merge_or_delete_for_page(NULL, space, offset, zip_size, TRUE);
+ if (!recv_recovery_is_on()) {
+ ibuf_merge_or_delete_for_page(NULL, page_id, &page_size, TRUE);
+ }
frame = block->frame;
@@ -4573,14 +5721,11 @@ buf_page_create(
(3) key_version on encrypted pages (not page 0:0) */
memset(frame + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION, 0, 8);
+ memset(frame + FIL_PAGE_LSN, 0, 8);
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
ut_a(++buf_dbg_counter % 5771 || buf_validate());
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
-#ifdef UNIV_IBUF_COUNT_DEBUG
- ut_a(ibuf_count_get(buf_block_get_space(block),
- buf_block_get_page_no(block)) == 0);
-#endif
return(block);
}
@@ -4613,6 +5758,7 @@ buf_page_monitor(
ulint level;
case FIL_PAGE_INDEX:
+ case FIL_PAGE_RTREE:
level = btr_page_get_level_low(frame);
/* Check if it is an index page for insert buffer */
@@ -4637,49 +5783,49 @@ buf_page_monitor(
}
break;
- case FIL_PAGE_UNDO_LOG:
+ case FIL_PAGE_UNDO_LOG:
counter = MONITOR_RW_COUNTER(io_type, MONITOR_UNDO_LOG_PAGE);
break;
- case FIL_PAGE_INODE:
+ case FIL_PAGE_INODE:
counter = MONITOR_RW_COUNTER(io_type, MONITOR_INODE_PAGE);
break;
- case FIL_PAGE_IBUF_FREE_LIST:
+ case FIL_PAGE_IBUF_FREE_LIST:
counter = MONITOR_RW_COUNTER(io_type,
MONITOR_IBUF_FREELIST_PAGE);
break;
- case FIL_PAGE_IBUF_BITMAP:
+ case FIL_PAGE_IBUF_BITMAP:
counter = MONITOR_RW_COUNTER(io_type,
MONITOR_IBUF_BITMAP_PAGE);
break;
- case FIL_PAGE_TYPE_SYS:
+ case FIL_PAGE_TYPE_SYS:
counter = MONITOR_RW_COUNTER(io_type, MONITOR_SYSTEM_PAGE);
break;
- case FIL_PAGE_TYPE_TRX_SYS:
+ case FIL_PAGE_TYPE_TRX_SYS:
counter = MONITOR_RW_COUNTER(io_type, MONITOR_TRX_SYSTEM_PAGE);
break;
- case FIL_PAGE_TYPE_FSP_HDR:
+ case FIL_PAGE_TYPE_FSP_HDR:
counter = MONITOR_RW_COUNTER(io_type, MONITOR_FSP_HDR_PAGE);
break;
- case FIL_PAGE_TYPE_XDES:
+ case FIL_PAGE_TYPE_XDES:
counter = MONITOR_RW_COUNTER(io_type, MONITOR_XDES_PAGE);
break;
- case FIL_PAGE_TYPE_BLOB:
+ case FIL_PAGE_TYPE_BLOB:
counter = MONITOR_RW_COUNTER(io_type, MONITOR_BLOB_PAGE);
break;
- case FIL_PAGE_TYPE_ZBLOB:
+ case FIL_PAGE_TYPE_ZBLOB:
counter = MONITOR_RW_COUNTER(io_type, MONITOR_ZBLOB_PAGE);
break;
- case FIL_PAGE_TYPE_ZBLOB2:
+ case FIL_PAGE_TYPE_ZBLOB2:
counter = MONITOR_RW_COUNTER(io_type, MONITOR_ZBLOB2_PAGE);
break;
@@ -4690,26 +5836,46 @@ buf_page_monitor(
MONITOR_INC_NOCHECK(counter);
}
-/********************************************************************//**
-Mark a table with the specified space pointed by bpage->space corrupted.
-Also remove the bpage from LRU list.
-@param[in,out] bpage Block */
+/** Mark a table corrupted.
+@param[in] bpage corrupted page
+@param[in] space tablespace of the corrupted page */
+ATTRIBUTE_COLD
+static void buf_mark_space_corrupt(buf_page_t* bpage, const fil_space_t& space)
+{
+ /* If block is not encrypted find the table with specified
+ space id, and mark it corrupted. Encrypted tables
+ are marked unusable later e.g. in ::open(). */
+ if (!space.crypt_data
+ || space.crypt_data->type == CRYPT_SCHEME_UNENCRYPTED) {
+ dict_set_corrupted_by_space(bpage->id.space());
+ } else {
+ dict_set_encrypted_by_space(bpage->id.space());
+ }
+}
+
+/** Mark a table corrupted.
+@param[in] bpage Corrupted page
+@param[in] space Corrupted page belongs to tablespace
+Also remove the bpage from LRU list. */
static
void
-buf_mark_space_corrupt(
- buf_page_t* bpage)
+buf_corrupt_page_release(buf_page_t* bpage, const fil_space_t* space)
{
buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
const ibool uncompressed = (buf_page_get_state(bpage)
== BUF_BLOCK_FILE_PAGE);
- ulint space = bpage->space;
+ page_id_t old_page_id = bpage->id;
/* First unfix and release lock on the bpage */
buf_pool_mutex_enter(buf_pool);
mutex_enter(buf_page_get_mutex(bpage));
ut_ad(buf_page_get_io_fix(bpage) == BUF_IO_READ);
- ut_ad(bpage->buf_fix_count == 0);
+ ut_ad(bpage->id.space() == space->id);
+ /* buf_fix_count can be greater than zero. Because other thread
+ can wait in buf_page_wait_read() for the page to be read. */
+
+ bpage->id.set_corrupt_id();
/* Set BUF_IO_NONE before we remove the block from LRU list */
buf_page_set_io_fix(bpage, BUF_IO_NONE);
@@ -4721,17 +5887,12 @@ buf_mark_space_corrupt(
mutex_exit(buf_page_get_mutex(bpage));
- /* If block is not encrypted find the table with specified
- space id, and mark it corrupted. Encrypted tables
- are marked unusable later e.g. in ::open(). */
- if (!bpage->encrypted) {
- dict_set_corrupted_by_space(space);
- } else {
- dict_set_encrypted_by_space(space);
+ if (!srv_force_recovery) {
+ buf_mark_space_corrupt(bpage, *space);
}
/* After this point bpage can't be referenced. */
- buf_LRU_free_one_page(bpage);
+ buf_LRU_free_one_page(bpage, old_page_id);
ut_ad(buf_pool->n_pend_reads > 0);
buf_pool->n_pend_reads--;
@@ -4754,11 +5915,9 @@ static dberr_t buf_page_check_corrupt(buf_page_t* bpage, fil_space_t* space)
{
ut_ad(space->n_pending_ios > 0);
- ulint zip_size = buf_page_get_zip_size(bpage);
- byte* dst_frame = (zip_size) ? bpage->zip.data :
+ byte* dst_frame = (bpage->zip.data) ? bpage->zip.data :
((buf_block_t*) bpage)->frame;
dberr_t err = DB_SUCCESS;
- bool corrupted = false;
/* In buf_decrypt_after_read we have either decrypted the page if
page post encryption checksum matches and used key_id is found
@@ -4766,51 +5925,40 @@ static dberr_t buf_page_check_corrupt(buf_page_t* bpage, fil_space_t* space)
not decrypted and it could be either encrypted and corrupted
or corrupted or good page. If we decrypted, there page could
still be corrupted if used key does not match. */
- const bool still_encrypted = mach_read_from_4(
+ const bool seems_encrypted = mach_read_from_4(
dst_frame + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION)
&& space->crypt_data
- && space->crypt_data->type != CRYPT_SCHEME_UNENCRYPTED
- && !bpage->encrypted
- && fil_space_verify_crypt_checksum(dst_frame, zip_size);
-
- if (!still_encrypted) {
- /* If traditional checksums match, we assume that page is
- not anymore encrypted. */
- corrupted = buf_page_is_corrupted(true, dst_frame, zip_size,
- space);
- if (!corrupted) {
- bpage->encrypted = false;
- } else {
- err = DB_PAGE_CORRUPTED;
- }
+ && space->crypt_data->type != CRYPT_SCHEME_UNENCRYPTED;
+
+ /* If traditional checksums match, we assume that page is
+ not anymore encrypted. */
+ if (buf_page_is_corrupted(
+ true, dst_frame, bpage->size, space)) {
+ err = DB_PAGE_CORRUPTED;
}
- /* Pages that we think are unencrypted but do not match the checksum
- checks could be corrupted or encrypted or both. */
- if (corrupted && !bpage->encrypted) {
- /* An error will be reported by
- buf_page_io_complete(). */
- } else if (still_encrypted || (bpage->encrypted && corrupted)) {
- bpage->encrypted = true;
+ if (seems_encrypted && err == DB_PAGE_CORRUPTED
+ && bpage->id.page_no() != 0) {
err = DB_DECRYPTION_FAILED;
- ib_logf(IB_LOG_LEVEL_ERROR,
- "The page [page id: space=%u"
- ", page number=%u]"
- " in file '%s' cannot be decrypted.",
- bpage->space, bpage->offset,
- space->chain.start->name);
-
- ib_logf(IB_LOG_LEVEL_INFO,
- "However key management plugin or used key_version " ULINTPF
- " is not found or"
- " used encryption algorithm or method does not match.",
- mach_read_from_4(dst_frame+FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION));
-
- if (bpage->space > TRX_SYS_SPACE) {
- ib_logf(IB_LOG_LEVEL_INFO,
- "Marking tablespace as missing. You may drop this table or"
- " install correct key management plugin and key file.");
+ ib::error()
+ << "The page " << bpage->id << " in file '"
+ << space->chain.start->name
+ << "' cannot be decrypted.";
+
+ ib::info()
+ << "However key management plugin or used key_version "
+ << mach_read_from_4(dst_frame
+ + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION)
+ << " is not found or"
+ " used encryption algorithm or method does not match.";
+
+ if (bpage->id.space() != TRX_SYS_SPACE) {
+ ib::info()
+ << "Marking tablespace as missing."
+ " You may drop this table or"
+ " install correct key management plugin"
+ " and key file.";
}
}
@@ -4818,25 +5966,24 @@ static dberr_t buf_page_check_corrupt(buf_page_t* bpage, fil_space_t* space)
}
/** Complete a read or write request of a file page to or from the buffer pool.
-@param[in,out] bpage Page to complete
-@param[in] evict whether or not to evict the page
- from LRU list.
+@param[in,out] bpage page to complete
+@param[in] dblwr whether the doublewrite buffer was used (on write)
+@param[in] evict whether or not to evict the page from LRU list
@return whether the operation succeeded
@retval DB_SUCCESS always when writing, or if a read page was OK
+@retval DB_TABLESPACE_DELETED if the tablespace does not exist
@retval DB_PAGE_CORRUPTED if the checksum fails on a page read
@retval DB_DECRYPTION_FAILED if page post encryption checksum matches but
after decryption normal page checksum does
not match */
UNIV_INTERN
dberr_t
-buf_page_io_complete(buf_page_t* bpage, bool evict)
+buf_page_io_complete(buf_page_t* bpage, bool dblwr, bool evict)
{
enum buf_io_fix io_type;
buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
- const ibool uncompressed = (buf_page_get_state(bpage)
+ const bool uncompressed = (buf_page_get_state(bpage)
== BUF_BLOCK_FILE_PAGE);
- dberr_t err = DB_SUCCESS;
-
ut_a(buf_page_in_file(bpage));
/* We do not need protect io_fix here by mutex to read
@@ -4847,47 +5994,43 @@ buf_page_io_complete(buf_page_t* bpage, bool evict)
io_type = buf_page_get_io_fix(bpage);
ut_ad(io_type == BUF_IO_READ || io_type == BUF_IO_WRITE);
+ ut_ad(bpage->size.is_compressed() == (bpage->zip.data != NULL));
+ ut_ad(uncompressed || bpage->zip.data);
if (io_type == BUF_IO_READ) {
ulint read_page_no = 0;
ulint read_space_id = 0;
- uint key_version = 0;
byte* frame = bpage->zip.data
? bpage->zip.data
: reinterpret_cast<buf_block_t*>(bpage)->frame;
ut_ad(frame);
- fil_space_t* space = fil_space_acquire_for_io(bpage->space);
+ fil_space_t* space = fil_space_acquire_for_io(
+ bpage->id.space());
if (!space) {
- return(DB_TABLESPACE_DELETED);
+ return DB_TABLESPACE_DELETED;
}
+ dberr_t err;
+
if (!buf_page_decrypt_after_read(bpage, space)) {
err = DB_DECRYPTION_FAILED;
goto database_corrupted;
}
- if (buf_page_get_zip_size(bpage)) {
- frame = bpage->zip.data;
- os_atomic_increment_ulint(&buf_pool->n_pend_unzip, 1);
- if (uncompressed
- && !buf_zip_decompress((buf_block_t*) bpage,
- FALSE)) {
+ if (bpage->zip.data && uncompressed) {
+ my_atomic_addlint(&buf_pool->n_pend_unzip, 1);
+ ibool ok = buf_zip_decompress((buf_block_t*) bpage,
+ FALSE);
+ my_atomic_addlint(&buf_pool->n_pend_unzip, -1);
- os_atomic_decrement_ulint(&buf_pool->n_pend_unzip, 1);
-
- ib_logf(IB_LOG_LEVEL_INFO,
- "Page %u in tablespace %u zip_decompress failure.",
- bpage->offset, bpage->space);
+ if (!ok) {
+ ib::info() << "Page "
+ << bpage->id
+ << " zip_decompress failure.";
err = DB_PAGE_CORRUPTED;
-
goto database_corrupted;
}
-
- os_atomic_decrement_ulint(&buf_pool->n_pend_unzip, 1);
- } else {
- ut_a(uncompressed);
- frame = ((buf_block_t*) bpage)->frame;
}
/* If this page is not uninitialized and not in the
@@ -4896,37 +6039,27 @@ buf_page_io_complete(buf_page_t* bpage, bool evict)
read_page_no = mach_read_from_4(frame + FIL_PAGE_OFFSET);
read_space_id = mach_read_from_4(
frame + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID);
- key_version = mach_read_from_4(
- frame + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION);
-
- if (bpage->space == TRX_SYS_SPACE
- && buf_dblwr_page_inside(bpage->offset)) {
-
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Error: reading page %u\n"
- "InnoDB: which is in the"
- " doublewrite buffer!\n",
- bpage->offset);
- } else if (!read_space_id && !read_page_no) {
+
+ if (bpage->id.space() == TRX_SYS_SPACE
+ && buf_dblwr_page_inside(bpage->id.page_no())) {
+
+ ib::error() << "Reading page " << bpage->id
+ << ", which is in the doublewrite buffer!";
+
+ } else if (read_space_id == 0 && read_page_no == 0) {
/* This is likely an uninitialized page. */
- } else if ((bpage->space
- && bpage->space != read_space_id)
- || bpage->offset != read_page_no) {
+ } else if ((bpage->id.space() != TRX_SYS_SPACE
+ && bpage->id.space() != read_space_id)
+ || bpage->id.page_no() != read_page_no) {
/* We did not compare space_id to read_space_id
- if bpage->space == 0, because the field on the
- page may contain garbage in MySQL < 4.1.1,
- which only supported bpage->space == 0. */
-
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Space id and page n:o"
- " stored in the page"
- " read in are " ULINTPF ":" ULINTPF ","
- " should be %u:%u!",
- read_space_id,
- read_page_no,
- bpage->space,
- bpage->offset);
+ in the system tablespace, because the field
+ was written as garbage before MySQL 4.1.1,
+ which did not support innodb_file_per_table. */
+
+ ib::error() << "Space id and page no stored in "
+ "the page, read in are "
+ << page_id_t(read_space_id, read_page_no)
+ << ", should be " << bpage->id;
}
err = buf_page_check_corrupt(bpage, space);
@@ -4936,11 +6069,13 @@ database_corrupted:
if (err != DB_SUCCESS) {
/* Not a real corruption if it was triggered by
error injection */
- DBUG_EXECUTE_IF("buf_page_import_corrupt_failure",
- if (bpage->space > TRX_SYS_SPACE) {
- buf_mark_space_corrupt(bpage);
- ib_logf(IB_LOG_LEVEL_INFO,
- "Simulated page corruption");
+ DBUG_EXECUTE_IF(
+ "buf_page_import_corrupt_failure",
+ if (!is_predefined_tablespace(
+ bpage->id.space())) {
+ buf_corrupt_page_release(bpage, space);
+ ib::info() << "Simulated IMPORT "
+ "corruption";
fil_space_release_for_io(space);
return(err);
}
@@ -4949,21 +6084,17 @@ database_corrupted:
);
if (err == DB_PAGE_CORRUPTED) {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Database page corruption on disk"
- " or a failed file read of tablespace %s"
- " page [page id: space=%u"
- ", page number=%u]"
- ". You may have to recover from "
- "a backup.",
- space->name,
- bpage->space, bpage->offset);
-
- buf_page_print(frame,
- buf_page_get_zip_size(bpage));
-
- ib_logf(IB_LOG_LEVEL_INFO,
- "It is also possible that your"
+ ib::error()
+ << "Database page corruption on disk"
+ " or a failed file read of tablespace "
+ << space->name << " page " << bpage->id
+ << ". You may have to recover from "
+ << "a backup.";
+
+ buf_page_print(frame, bpage->size);
+
+ ib::info()
+ << "It is also possible that your"
" operating system has corrupted"
" its own file cache and rebooting"
" your computer removes the error."
@@ -4973,53 +6104,59 @@ database_corrupted:
" and reimporting the corrupt table."
" You can use CHECK TABLE to scan"
" your table for corruption. "
- "Please refer to " REFMAN "forcing-innodb-recovery.html"
- " for information about forcing recovery.");
+ << FORCE_RECOVERY_MSG;
}
- if (srv_force_recovery < SRV_FORCE_IGNORE_CORRUPT) {
+ if (!srv_force_recovery) {
+
/* If page space id is larger than TRX_SYS_SPACE
(0), we will attempt to mark the corresponding
table as corrupted instead of crashing server */
- if (bpage->space > TRX_SYS_SPACE) {
- buf_mark_space_corrupt(bpage);
- fil_space_release_for_io(space);
- return(err);
- } else {
- ib_logf(IB_LOG_LEVEL_FATAL,
- "Ending processing because of a corrupt database page.");
+ if (bpage->id.space() == TRX_SYS_SPACE) {
+ ib::fatal() << "Aborting because of"
+ " a corrupt database page.";
}
+
+ buf_corrupt_page_release(bpage, space);
+ fil_space_release_for_io(space);
+ return(err);
}
}
DBUG_EXECUTE_IF("buf_page_import_corrupt_failure",
- page_not_corrupt: bpage = bpage; );
+ page_not_corrupt: bpage = bpage; );
+
+ if (err == DB_PAGE_CORRUPTED
+ || err == DB_DECRYPTION_FAILED) {
+ const page_id_t corrupt_page_id = bpage->id;
+
+ buf_corrupt_page_release(bpage, space);
+
+ if (recv_recovery_is_on()) {
+ recv_recover_corrupt_page(corrupt_page_id);
+ }
+
+ fil_space_release_for_io(space);
+ return err;
+ }
if (recv_recovery_is_on()) {
- /* Pages must be uncompressed for crash recovery. */
- ut_a(uncompressed);
- recv_recover_page(TRUE, (buf_block_t*) bpage);
+ recv_recover_page(bpage);
}
- if (uncompressed && !recv_no_ibuf_operations
+ /* If space is being truncated then avoid ibuf operation.
+ During re-init we have already freed ibuf entries. */
+ if (uncompressed
+ && !recv_no_ibuf_operations
+ && (bpage->id.space() == 0
+ || !is_predefined_tablespace(bpage->id.space()))
+ && !srv_is_tablespace_truncated(bpage->id.space())
&& fil_page_get_type(frame) == FIL_PAGE_INDEX
&& page_is_leaf(frame)) {
- if (bpage->encrypted) {
- ib_logf(IB_LOG_LEVEL_WARN,
- "Table in tablespace " ULINTPF " encrypted."
- "However key management plugin or used key_version %u is not found or"
- " used encryption algorithm or method does not match."
- " Can't continue opening the table.",
- read_space_id, key_version);
- } else {
-
- ibuf_merge_or_delete_for_page(
- (buf_block_t*)bpage, bpage->space,
- bpage->offset, buf_page_get_zip_size(bpage),
- TRUE);
- }
-
+ ibuf_merge_or_delete_for_page(
+ (buf_block_t*) bpage, bpage->id,
+ &bpage->size, TRUE);
}
fil_space_release_for_io(space);
@@ -5032,17 +6169,10 @@ database_corrupted:
}
}
+ BPageMutex* block_mutex = buf_page_get_mutex(bpage);
buf_pool_mutex_enter(buf_pool);
- mutex_enter(buf_page_get_mutex(bpage));
+ mutex_enter(block_mutex);
-#ifdef UNIV_IBUF_COUNT_DEBUG
- if (io_type == BUF_IO_WRITE || uncompressed) {
- /* For BUF_IO_READ of compressed-only blocks, the
- buffered operations will be merged by buf_page_get_gen()
- after the block has been uncompressed. */
- ut_a(ibuf_count_get(bpage->space, bpage->offset) == 0);
- }
-#endif
/* Because this thread which does the unlocking is not the same that
did the locking, we use a pass value != 0 in unlock, which simply
removes the newest lock debug record, without checking the thread
@@ -5051,8 +6181,7 @@ database_corrupted:
buf_page_set_io_fix(bpage, BUF_IO_NONE);
buf_page_monitor(bpage, io_type);
- switch (io_type) {
- case BUF_IO_READ:
+ if (io_type == BUF_IO_READ) {
/* NOTE that the call to ibuf may have moved the ownership of
the x-latch to this OS thread: do not let this confuse you in
debugging! */
@@ -5066,64 +6195,49 @@ database_corrupted:
BUF_IO_READ);
}
- mutex_exit(buf_page_get_mutex(bpage));
-
- break;
-
- case BUF_IO_WRITE:
+ mutex_exit(block_mutex);
+ } else {
/* Write means a flush operation: call the completion
routine in the flush system */
- buf_flush_write_complete(bpage);
+ buf_flush_write_complete(bpage, dblwr);
if (uncompressed) {
- rw_lock_s_unlock_gen(&((buf_block_t*) bpage)->lock,
- BUF_IO_WRITE);
+ rw_lock_sx_unlock_gen(&((buf_block_t*) bpage)->lock,
+ BUF_IO_WRITE);
}
buf_pool->stat.n_pages_written++;
- /* In case of flush batches i.e.: BUF_FLUSH_LIST and
- BUF_FLUSH_LRU this function is always called from IO
- helper thread. In this case, we decide whether or not
- to evict the page based on flush type. The value
- passed as evict is the default value in function
- definition which is false.
- We always evict in case of LRU batch and never evict
- in case of flush list batch. For single page flush
- the caller sets the appropriate value. */
+ /* We decide whether or not to evict the page from the
+ LRU list based on the flush_type.
+ * BUF_FLUSH_LIST: don't evict
+ * BUF_FLUSH_LRU: always evict
+ * BUF_FLUSH_SINGLE_PAGE: eviction preference is passed
+ by the caller explicitly. */
if (buf_page_get_flush_type(bpage) == BUF_FLUSH_LRU) {
evict = true;
}
- mutex_exit(buf_page_get_mutex(bpage));
+ mutex_exit(block_mutex);
+
if (evict) {
buf_LRU_free_page(bpage, true);
}
-
- break;
-
- default:
- ut_error;
}
-#ifdef UNIV_DEBUG
- if (buf_debug_prints) {
- fprintf(stderr, "Has %s page space %lu page no %lu\n",
- io_type == BUF_IO_READ ? "read" : "written",
- buf_page_get_space(bpage),
- buf_page_get_page_no(bpage));
- }
-#endif /* UNIV_DEBUG */
+ DBUG_PRINT("ib_buf", ("%s page %u:%u",
+ io_type == BUF_IO_READ ? "read" : "wrote",
+ bpage->id.space(), bpage->id.page_no()));
buf_pool_mutex_exit(buf_pool);
- return(err);
+ return DB_SUCCESS;
}
/*********************************************************************//**
Asserts that all file pages in the buffer are in a replaceable state.
-@return TRUE */
+@return TRUE */
static
ibool
buf_all_freed_instance(
@@ -5141,24 +6255,9 @@ buf_all_freed_instance(
for (i = buf_pool->n_chunks; i--; chunk++) {
- const buf_block_t* block = buf_chunk_not_freed(chunk);
-
- if (UNIV_LIKELY_NULL(block)) {
- fil_space_t* space = fil_space_get(block->page.space);
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Page %u %u still fixed or dirty.",
- block->page.space,
- block->page.offset);
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Page oldest_modification " LSN_PF
- " fix_count %d io_fix %d.",
- block->page.oldest_modification,
- block->page.buf_fix_count,
- buf_page_get_io_fix(&block->page));
- ib_logf(IB_LOG_LEVEL_FATAL,
- "Page space_id %u name %s.",
- block->page.space,
- space->name ? space->name : "NULL");
+ if (const buf_block_t* block = buf_chunk_not_freed(chunk)) {
+ ib::fatal() << "Page " << block->page.id
+ << " still fixed or dirty";
}
}
@@ -5167,6 +6266,17 @@ buf_all_freed_instance(
return(TRUE);
}
+/** Refreshes the statistics used to print per-second averages.
+@param[in,out] buf_pool buffer pool instance */
+static
+void
+buf_refresh_io_stats(
+ buf_pool_t* buf_pool)
+{
+ buf_pool->last_printout_time = time(NULL);
+ buf_pool->old_stat = buf_pool->stat;
+}
+
/*********************************************************************//**
Invalidates file pages in one buffer pool instance */
static
@@ -5207,7 +6317,7 @@ buf_pool_invalidate_instance(
buf_pool_mutex_enter(buf_pool);
- while (buf_LRU_scan_and_free_block(buf_pool, TRUE)) {
+ while (buf_LRU_scan_and_free_block(buf_pool, true)) {
}
ut_ad(UT_LIST_GET_LEN(buf_pool->LRU) == 0);
@@ -5227,7 +6337,6 @@ buf_pool_invalidate_instance(
Invalidates the file pages in the buffer pool when an archive recovery is
completed. All the file pages buffered must be in a replaceable state when
this function is called: not latched and not modified. */
-UNIV_INTERN
void
buf_pool_invalidate(void)
/*=====================*/
@@ -5242,7 +6351,7 @@ buf_pool_invalidate(void)
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
/*********************************************************************//**
Validates data in one buffer pool instance
-@return TRUE */
+@return TRUE */
static
ibool
buf_pool_validate_instance(
@@ -5259,9 +6368,6 @@ buf_pool_validate_instance(
ulint n_flush = 0;
ulint n_free = 0;
ulint n_zip = 0;
- ulint fold = 0;
- ulint space = 0;
- ulint offset = 0;
ut_ad(buf_pool);
@@ -5279,7 +6385,7 @@ buf_pool_validate_instance(
for (j = chunk->size; j--; block++) {
- mutex_enter(&block->mutex);
+ buf_page_mutex_enter(block);
switch (buf_block_get_state(block)) {
case BUF_BLOCK_POOL_WATCH:
@@ -5291,23 +6397,10 @@ buf_pool_validate_instance(
break;
case BUF_BLOCK_FILE_PAGE:
- space = buf_block_get_space(block);
- offset = buf_block_get_page_no(block);
- fold = buf_page_address_fold(space, offset);
- ut_a(buf_page_hash_get_low(buf_pool,
- space,
- offset,
- fold)
+ ut_a(buf_page_hash_get_low(
+ buf_pool, block->page.id)
== &block->page);
-#ifdef UNIV_IBUF_COUNT_DEBUG
- ut_a(buf_page_get_io_fix(&block->page)
- == BUF_IO_READ
- || !ibuf_count_get(buf_block_get_space(
- block),
- buf_block_get_page_no(
- block)));
-#endif
switch (buf_page_get_io_fix(&block->page)) {
case BUF_IO_NONE:
break;
@@ -5323,7 +6416,10 @@ buf_pool_validate_instance(
assert_s_latched:
ut_a(rw_lock_is_locked(
&block->lock,
- RW_LOCK_SHARED));
+ RW_LOCK_S)
+ || rw_lock_is_locked(
+ &block->lock,
+ RW_LOCK_SX));
break;
case BUF_FLUSH_LIST:
n_list_flush++;
@@ -5337,7 +6433,7 @@ assert_s_latched:
case BUF_IO_READ:
ut_a(rw_lock_is_locked(&block->lock,
- RW_LOCK_EX));
+ RW_LOCK_X));
break;
case BUF_IO_PIN:
@@ -5358,7 +6454,7 @@ assert_s_latched:
break;
}
- mutex_exit(&block->mutex);
+ buf_page_mutex_exit(block);
}
}
@@ -5389,9 +6485,7 @@ assert_s_latched:
we have acquired buf_pool->zip_mutex above which acts
as the 'block->mutex' for these bpages. */
ut_a(!b->oldest_modification);
- fold = buf_page_address_fold(b->space, b->offset);
- ut_a(buf_page_hash_get_low(buf_pool, b->space, b->offset,
- fold) == b);
+ ut_a(buf_page_hash_get_low(buf_pool, b->id) == b);
n_lru++;
n_zip++;
}
@@ -5443,9 +6537,7 @@ assert_s_latched:
ut_error;
break;
}
- fold = buf_page_address_fold(b->space, b->offset);
- ut_a(buf_page_hash_get_low(buf_pool, b->space, b->offset,
- fold) == b);
+ ut_a(buf_page_hash_get_low(buf_pool, b->id) == b);
}
ut_a(UT_LIST_GET_LEN(buf_pool->flush_list) == n_flush);
@@ -5455,19 +6547,21 @@ assert_s_latched:
mutex_exit(&buf_pool->zip_mutex);
- if (n_lru + n_free > buf_pool->curr_size + n_zip) {
- fprintf(stderr, "n LRU %lu, n free %lu, pool %lu zip %lu\n",
- n_lru, n_free,
- buf_pool->curr_size, n_zip);
- ut_error;
+ if (buf_pool->curr_size == buf_pool->old_size
+ && n_lru + n_free > buf_pool->curr_size + n_zip) {
+
+ ib::fatal() << "n_LRU " << n_lru << ", n_free " << n_free
+ << ", pool " << buf_pool->curr_size
+ << " zip " << n_zip << ". Aborting...";
}
ut_a(UT_LIST_GET_LEN(buf_pool->LRU) == n_lru);
- if (UT_LIST_GET_LEN(buf_pool->free) != n_free) {
- fprintf(stderr, "Free list len %lu, free blocks %lu\n",
- UT_LIST_GET_LEN(buf_pool->free),
- n_free);
- ut_error;
+ if (buf_pool->curr_size == buf_pool->old_size
+ && UT_LIST_GET_LEN(buf_pool->free) != n_free) {
+
+ ib::fatal() << "Free list len "
+ << UT_LIST_GET_LEN(buf_pool->free)
+ << ", free blocks " << n_free << ". Aborting...";
}
ut_a(buf_pool->n_flush[BUF_FLUSH_LIST] == n_list_flush);
@@ -5484,8 +6578,7 @@ assert_s_latched:
/*********************************************************************//**
Validates the buffer buf_pool data structure.
-@return TRUE */
-UNIV_INTERN
+@return TRUE */
ibool
buf_validate(void)
/*==============*/
@@ -5528,37 +6621,14 @@ buf_print_instance(
size = buf_pool->curr_size;
index_ids = static_cast<index_id_t*>(
- mem_alloc(size * sizeof *index_ids));
+ ut_malloc_nokey(size * sizeof *index_ids));
- counts = static_cast<ulint*>(mem_alloc(sizeof(ulint) * size));
+ counts = static_cast<ulint*>(ut_malloc_nokey(sizeof(ulint) * size));
buf_pool_mutex_enter(buf_pool);
buf_flush_list_mutex_enter(buf_pool);
- fprintf(stderr,
- "buf_pool size %lu\n"
- "database pages %lu\n"
- "free pages %lu\n"
- "modified database pages %lu\n"
- "n pending decompressions %lu\n"
- "n pending reads %lu\n"
- "n pending flush LRU %lu list %lu single page %lu\n"
- "pages made young %lu, not young %lu\n"
- "pages read %lu, created %lu, written %lu\n",
- (ulint) size,
- (ulint) UT_LIST_GET_LEN(buf_pool->LRU),
- (ulint) UT_LIST_GET_LEN(buf_pool->free),
- (ulint) UT_LIST_GET_LEN(buf_pool->flush_list),
- (ulint) buf_pool->n_pend_unzip,
- (ulint) buf_pool->n_pend_reads,
- (ulint) buf_pool->n_flush[BUF_FLUSH_LRU],
- (ulint) buf_pool->n_flush[BUF_FLUSH_LIST],
- (ulint) buf_pool->n_flush[BUF_FLUSH_SINGLE_PAGE],
- (ulint) buf_pool->stat.n_pages_made_young,
- (ulint) buf_pool->stat.n_pages_not_made_young,
- (ulint) buf_pool->stat.n_pages_read,
- (ulint) buf_pool->stat.n_pages_created,
- (ulint) buf_pool->stat.n_pages_written);
+ ib::info() << *buf_pool;
buf_flush_list_mutex_exit(buf_pool);
@@ -5575,7 +6645,7 @@ buf_print_instance(
for (; n_blocks--; block++) {
const buf_frame_t* frame = block->frame;
- if (fil_page_get_type(frame) == FIL_PAGE_INDEX) {
+ if (fil_page_index_page_check(frame)) {
id = btr_page_get_index_id(frame);
@@ -5606,28 +6676,26 @@ buf_print_instance(
for (i = 0; i < n_found; i++) {
index = dict_index_get_if_in_cache(index_ids[i]);
- fprintf(stderr,
- "Block count for index %llu in buffer is about %lu",
- (ullint) index_ids[i],
- (ulint) counts[i]);
-
- if (index) {
- putc(' ', stderr);
- dict_index_name_print(stderr, NULL, index);
+ if (!index) {
+ ib::info() << "Block count for index "
+ << index_ids[i] << " in buffer is about "
+ << counts[i];
+ } else {
+ ib::info() << "Block count for index " << index_ids[i]
+ << " in buffer is about " << counts[i]
+ << ", index " << index->name
+ << " of table " << index->table->name;
}
-
- putc('\n', stderr);
}
- mem_free(index_ids);
- mem_free(counts);
+ ut_free(index_ids);
+ ut_free(counts);
ut_a(buf_pool_validate_instance(buf_pool));
}
/*********************************************************************//**
Prints info of the buffer buf_pool data structure. */
-UNIV_INTERN
void
buf_print(void)
/*===========*/
@@ -5646,8 +6714,8 @@ buf_print(void)
#ifdef UNIV_DEBUG
/*********************************************************************//**
Returns the number of latched pages in the buffer pool.
-@return number of latched pages */
-UNIV_INTERN
+@return number of latched pages */
+static
ulint
buf_get_latched_pages_number_instance(
/*==================================*/
@@ -5675,7 +6743,7 @@ buf_get_latched_pages_number_instance(
continue;
}
- mutex_enter(&block->mutex);
+ buf_page_mutex_enter(block);
if (block->page.buf_fix_count != 0
|| buf_page_get_io_fix(&block->page)
@@ -5683,7 +6751,7 @@ buf_get_latched_pages_number_instance(
fixed_pages_number++;
}
- mutex_exit(&block->mutex);
+ buf_page_mutex_exit(block);
}
}
@@ -5737,8 +6805,7 @@ buf_get_latched_pages_number_instance(
/*********************************************************************//**
Returns the number of latched pages in all the buffer pools.
-@return number of latched pages */
-UNIV_INTERN
+@return number of latched pages */
ulint
buf_get_latched_pages_number(void)
/*==============================*/
@@ -5762,16 +6829,14 @@ buf_get_latched_pages_number(void)
/*********************************************************************//**
Returns the number of pending buf pool read ios.
-@return number of pending read I/O operations */
-UNIV_INTERN
+@return number of pending read I/O operations */
ulint
buf_get_n_pending_read_ios(void)
/*============================*/
{
- ulint i;
ulint pend_ios = 0;
- for (i = 0; i < srv_buf_pool_instances; i++) {
+ for (ulint i = 0; i < srv_buf_pool_instances; i++) {
pend_ios += buf_pool_from_array(i)->n_pend_reads;
}
@@ -5781,24 +6846,24 @@ buf_get_n_pending_read_ios(void)
/*********************************************************************//**
Returns the ratio in percents of modified pages in the buffer pool /
database pages in the buffer pool.
-@return modified page percentage ratio */
-UNIV_INTERN
+@return modified page percentage ratio */
double
buf_get_modified_ratio_pct(void)
/*============================*/
{
- double percentage = 0.0;
+ double ratio;
ulint lru_len = 0;
ulint free_len = 0;
ulint flush_list_len = 0;
buf_get_total_list_len(&lru_len, &free_len, &flush_list_len);
- percentage = (100.0 * flush_list_len) / (1.0 + lru_len + free_len);
+ ratio = static_cast<double>(100 * flush_list_len)
+ / (1 + lru_len + free_len);
/* 1 + is there to avoid division by zero */
- return(percentage);
+ return(ratio);
}
/*******************************************************************//**
@@ -5861,7 +6926,6 @@ buf_stats_aggregate_pool_info(
Collect buffer pool stats information for a buffer pool. Also
record aggregated stats if there are more than one buffer pool
in the server */
-UNIV_INTERN
void
buf_stats_get_pool_info(
/*====================*/
@@ -5870,7 +6934,7 @@ buf_stats_get_pool_info(
buf_pool_info_t* all_pool_info) /*!< in/out: buffer pool info
to fill */
{
- buf_pool_info_t* pool_info;
+ buf_pool_info_t* pool_info;
time_t current_time;
double time_elapsed;
@@ -5996,7 +7060,7 @@ buf_stats_get_pool_info(
/*********************************************************************//**
Prints info of the buffer i/o. */
-UNIV_INTERN
+static
void
buf_print_io_instance(
/*==================*/
@@ -6006,15 +7070,16 @@ buf_print_io_instance(
ut_ad(pool_info);
fprintf(file,
- "Buffer pool size %lu\n"
- "Free buffers %lu\n"
- "Database pages %lu\n"
- "Old database pages %lu\n"
- "Modified db pages %lu\n"
+ "Buffer pool size " ULINTPF "\n"
+ "Free buffers " ULINTPF "\n"
+ "Database pages " ULINTPF "\n"
+ "Old database pages " ULINTPF "\n"
+ "Modified db pages " ULINTPF "\n"
"Percent of dirty pages(LRU & free pages): %.3f\n"
"Max dirty pages percent: %.3f\n"
- "Pending reads %lu\n"
- "Pending writes: LRU %lu, flush list %lu, single page %lu\n",
+ "Pending reads " ULINTPF "\n"
+ "Pending writes: LRU " ULINTPF ", flush list " ULINTPF
+ ", single page " ULINTPF "\n",
pool_info->pool_size,
pool_info->free_list_len,
pool_info->lru_len,
@@ -6029,9 +7094,10 @@ buf_print_io_instance(
pool_info->n_pending_flush_single_page);
fprintf(file,
- "Pages made young %lu, not young %lu\n"
+ "Pages made young " ULINTPF ", not young " ULINTPF "\n"
"%.2f youngs/s, %.2f non-youngs/s\n"
- "Pages read %lu, created %lu, written %lu\n"
+ "Pages read " ULINTPF ", created " ULINTPF
+ ", written " ULINTPF "\n"
"%.2f reads/s, %.2f creates/s, %.2f writes/s\n",
pool_info->n_pages_made_young,
pool_info->n_pages_not_made_young,
@@ -6078,8 +7144,9 @@ buf_print_io_instance(
/* Print some values to help us with visualizing what is
happening with LRU eviction. */
fprintf(file,
- "LRU len: %lu, unzip_LRU len: %lu\n"
- "I/O sum[%lu]:cur[%lu], unzip sum[%lu]:cur[%lu]\n",
+ "LRU len: " ULINTPF ", unzip_LRU len: " ULINTPF "\n"
+ "I/O sum[" ULINTPF "]:cur[" ULINTPF "], "
+ "unzip sum[" ULINTPF "]:cur[" ULINTPF "]\n",
pool_info->lru_len, pool_info->unzip_lru_len,
pool_info->io_sum, pool_info->io_cur,
pool_info->unzip_sum, pool_info->unzip_cur);
@@ -6087,7 +7154,6 @@ buf_print_io_instance(
/*********************************************************************//**
Prints info of the buffer i/o. */
-UNIV_INTERN
void
buf_print_io(
/*=========*/
@@ -6101,7 +7167,7 @@ buf_print_io(
one extra buf_pool_info_t, the last one stores
aggregated/total values from all pools */
if (srv_buf_pool_instances > 1) {
- pool_info = (buf_pool_info_t*) mem_zalloc((
+ pool_info = (buf_pool_info_t*) ut_zalloc_nokey((
srv_buf_pool_instances + 1) * sizeof *pool_info);
pool_info_total = &pool_info[srv_buf_pool_instances];
@@ -6110,7 +7176,7 @@ buf_print_io(
pool_info_total = pool_info =
static_cast<buf_pool_info_t*>(
- mem_zalloc(sizeof *pool_info));
+ ut_zalloc_nokey(sizeof *pool_info));
}
for (i = 0; i < srv_buf_pool_instances; i++) {
@@ -6141,29 +7207,16 @@ buf_print_io(
"----------------------\n", file);
for (i = 0; i < srv_buf_pool_instances; i++) {
- fprintf(file, "---BUFFER POOL %lu\n", i);
+ fprintf(file, "---BUFFER POOL " ULINTPF "\n", i);
buf_print_io_instance(&pool_info[i], file);
}
}
- mem_free(pool_info);
-}
-
-/**********************************************************************//**
-Refreshes the statistics used to print per-second averages. */
-UNIV_INTERN
-void
-buf_refresh_io_stats(
-/*=================*/
- buf_pool_t* buf_pool) /*!< in: buffer pool instance */
-{
- buf_pool->last_printout_time = time(NULL);
- buf_pool->old_stat = buf_pool->stat;
+ ut_free(pool_info);
}
/**********************************************************************//**
Refreshes the statistics used to print per-second averages. */
-UNIV_INTERN
void
buf_refresh_io_stats_all(void)
/*==========================*/
@@ -6180,7 +7233,6 @@ buf_refresh_io_stats_all(void)
/**********************************************************************//**
Check if all pages in all buffer pools are in a replacable state.
@return FALSE if not */
-UNIV_INTERN
ibool
buf_all_freed(void)
/*===============*/
@@ -6201,8 +7253,7 @@ buf_all_freed(void)
/*********************************************************************//**
Checks that there currently are no pending i/o-operations for the buffer
pool.
-@return number of pending i/o */
-UNIV_INTERN
+@return number of pending i/o */
ulint
buf_pool_check_no_pending_io(void)
/*==============================*/
@@ -6229,58 +7280,85 @@ buf_pool_check_no_pending_io(void)
return(pending_io);
}
-#if 0
-Code currently not used
-/*********************************************************************//**
-Gets the current length of the free list of buffer blocks.
-@return length of the free list */
-UNIV_INTERN
-ulint
-buf_get_free_list_len(void)
-/*=======================*/
+/** Print the given page_id_t object.
+@param[in,out] out the output stream
+@param[in] page_id the page_id_t object to be printed
+@return the output stream */
+std::ostream&
+operator<<(
+ std::ostream& out,
+ const page_id_t page_id)
{
- ulint len;
-
- buf_pool_mutex_enter(buf_pool);
-
- len = UT_LIST_GET_LEN(buf_pool->free);
-
- buf_pool_mutex_exit(buf_pool);
+ out << "[page id: space=" << page_id.m_space
+ << ", page number=" << page_id.m_page_no << "]";
+ return(out);
+}
- return(len);
+/** Print the given buf_pool_t object.
+@param[in,out] out the output stream
+@param[in] buf_pool the buf_pool_t object to be printed
+@return the output stream */
+std::ostream&
+operator<<(
+ std::ostream& out,
+ const buf_pool_t& buf_pool)
+{
+ out << "[buffer pool instance: "
+ << "buf_pool size=" << buf_pool.curr_size
+ << ", database pages=" << UT_LIST_GET_LEN(buf_pool.LRU)
+ << ", free pages=" << UT_LIST_GET_LEN(buf_pool.free)
+ << ", modified database pages="
+ << UT_LIST_GET_LEN(buf_pool.flush_list)
+ << ", n pending decompressions=" << buf_pool.n_pend_unzip
+ << ", n pending reads=" << buf_pool.n_pend_reads
+ << ", n pending flush LRU=" << buf_pool.n_flush[BUF_FLUSH_LRU]
+ << " list=" << buf_pool.n_flush[BUF_FLUSH_LIST]
+ << " single page=" << buf_pool.n_flush[BUF_FLUSH_SINGLE_PAGE]
+ << ", pages made young=" << buf_pool.stat.n_pages_made_young
+ << ", not young=" << buf_pool.stat.n_pages_not_made_young
+ << ", pages read=" << buf_pool.stat.n_pages_read
+ << ", created=" << buf_pool.stat.n_pages_created
+ << ", written=" << buf_pool.stat.n_pages_written << "]";
+ return(out);
}
-#endif
-#else /* !UNIV_HOTBACKUP */
-/********************************************************************//**
-Inits a page to the buffer buf_pool, for use in mysqlbackup --restore. */
-UNIV_INTERN
-void
-buf_page_init_for_backup_restore(
-/*=============================*/
- ulint space, /*!< in: space id */
- ulint offset, /*!< in: offset of the page within space
- in units of a page */
- ulint zip_size,/*!< in: compressed page size in bytes
- or 0 for uncompressed pages */
- buf_block_t* block) /*!< in: block to init */
+/** Encrypt a buffer of temporary tablespace
+@param[in] offset Page offset
+@param[in] src_frame Page to encrypt
+@param[in,out] dst_frame Output buffer
+@return encrypted buffer or NULL */
+static byte* buf_tmp_page_encrypt(
+ ulint offset,
+ byte* src_frame,
+ byte* dst_frame)
{
- block->page.state = BUF_BLOCK_FILE_PAGE;
- block->page.space = space;
- block->page.offset = offset;
+ uint header_len = FIL_PAGE_DATA;
+ /* FIL page header is not encrypted */
+ memcpy(dst_frame, src_frame, header_len);
- page_zip_des_init(&block->page.zip);
+ /* Calculate the start offset in a page */
+ uint unencrypted_bytes = header_len + FIL_PAGE_DATA_END;
+ uint srclen = srv_page_size - unencrypted_bytes;
+ const byte* src = src_frame + header_len;
+ byte* dst = dst_frame + header_len;
- /* We assume that block->page.data has been allocated
- with zip_size == UNIV_PAGE_SIZE. */
- ut_ad(zip_size <= UNIV_ZIP_SIZE_MAX);
- ut_ad(ut_is_2pow(zip_size));
- page_zip_set_size(&block->page.zip, zip_size);
- if (zip_size) {
- block->page.zip.data = block->frame + UNIV_PAGE_SIZE;
+ if (!log_tmp_block_encrypt(src, srclen, dst, (offset * srv_page_size),
+ true)) {
+ return NULL;
}
+
+ memcpy(dst_frame + srv_page_size - FIL_PAGE_DATA_END,
+ src_frame + srv_page_size - FIL_PAGE_DATA_END,
+ FIL_PAGE_DATA_END);
+
+ /* Handle post encryption checksum */
+ mach_write_to_4(dst_frame + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION + 4,
+ buf_calc_page_crc32(dst_frame));
+
+ srv_stats.pages_encrypted.inc();
+ srv_stats.n_temp_blocks_encrypted.inc();
+ return dst_frame;
}
-#endif /* !UNIV_HOTBACKUP */
/** Encryption and page_compression hook that is called just before
a page is written to disk.
@@ -6296,17 +7374,17 @@ buf_page_encrypt_before_write(
buf_page_t* bpage,
byte* src_frame)
{
- ut_ad(space->id == bpage->space);
+ ut_ad(space->id == bpage->id.space());
bpage->real_size = UNIV_PAGE_SIZE;
fil_page_type_validate(src_frame);
- switch (bpage->offset) {
+ switch (bpage->id.page_no()) {
case 0:
/* Page 0 of a tablespace is not encrypted/compressed */
return src_frame;
case TRX_SYS_PAGE_NO:
- if (bpage->space == TRX_SYS_SPACE) {
+ if (bpage->id.space() == TRX_SYS_SPACE) {
/* don't encrypt/compress page as it contains
address to dblwr buffer */
return src_frame;
@@ -6315,13 +7393,21 @@ buf_page_encrypt_before_write(
fil_space_crypt_t* crypt_data = space->crypt_data;
- const bool encrypted = crypt_data
- && !crypt_data->not_encrypted()
- && crypt_data->type != CRYPT_SCHEME_UNENCRYPTED
- && (!crypt_data->is_default_encryption()
- || srv_encrypt_tables);
+ bool encrypted, page_compressed;
- bool page_compressed = FSP_FLAGS_HAS_PAGE_COMPRESSION(space->flags);
+ if (space->purpose == FIL_TYPE_TEMPORARY) {
+ ut_ad(!crypt_data);
+ encrypted = innodb_encrypt_temporary_tables;
+ page_compressed = false;
+ } else {
+ encrypted = crypt_data
+ && !crypt_data->not_encrypted()
+ && crypt_data->type != CRYPT_SCHEME_UNENCRYPTED
+ && (!crypt_data->is_default_encryption()
+ || srv_encrypt_tables);
+
+ page_compressed = FSP_FLAGS_HAS_PAGE_COMPRESSION(space->flags);
+ }
if (!encrypted && !page_compressed) {
/* No need to encrypt or page compress the page.
@@ -6330,8 +7416,7 @@ buf_page_encrypt_before_write(
return src_frame;
}
- ulint zip_size = buf_page_get_zip_size(bpage);
- ut_ad(!zip_size || !page_compressed);
+ ut_ad(!bpage->size.is_compressed() || !page_compressed);
buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
/* Find free slot from temporary memory array */
buf_tmp_buffer_t* slot = buf_pool_reserve_tmp_slot(buf_pool);
@@ -6343,25 +7428,32 @@ buf_page_encrypt_before_write(
if (!page_compressed) {
not_compressed:
- /* Encrypt page content */
- byte* tmp = fil_space_encrypt(space,
- bpage->offset,
- bpage->newest_modification,
- src_frame,
- dst_frame);
+ byte* tmp;
+ if (space->purpose == FIL_TYPE_TEMPORARY) {
+ /* Encrypt temporary tablespace page content */
+ tmp = buf_tmp_page_encrypt(bpage->id.page_no(),
+ src_frame, dst_frame);
+ } else {
+ /* Encrypt page content */
+ tmp = fil_space_encrypt(
+ space, bpage->id.page_no(),
+ bpage->newest_modification,
+ src_frame, dst_frame);
+ }
bpage->real_size = UNIV_PAGE_SIZE;
slot->out_buf = dst_frame = tmp;
ut_d(fil_page_type_validate(tmp));
} else {
+ ut_ad(space->purpose != FIL_TYPE_TEMPORARY);
/* First we compress the page content */
buf_tmp_reserve_compression_buf(slot);
byte* tmp = slot->comp_buf;
ulint out_len = fil_page_compress(
src_frame, tmp,
fsp_flags_get_page_compression_level(space->flags),
- fil_space_get_block_size(space, bpage->offset),
+ fil_space_get_block_size(space, bpage->id.page_no()),
encrypted);
if (!out_len) {
goto not_compressed;
@@ -6369,12 +7461,14 @@ not_compressed:
bpage->real_size = out_len;
+ /* Workaround for MDEV-15527. */
+ memset(tmp + out_len, 0 , srv_page_size - out_len);
ut_d(fil_page_type_validate(tmp));
if (encrypted) {
/* And then we encrypt the page content */
tmp = fil_space_encrypt(space,
- bpage->offset,
+ bpage->id.page_no(),
bpage->newest_modification,
tmp,
dst_frame);
@@ -6388,4 +7482,28 @@ not_compressed:
// return dst_frame which will be written
return dst_frame;
}
+
+/**
+Should we punch hole to deallocate unused portion of the page.
+@param[in] bpage Page control block
+@return true if punch hole should be used, false if not */
+bool
+buf_page_should_punch_hole(
+ const buf_page_t* bpage)
+{
+ return (bpage->real_size != bpage->size.physical());
+}
+
+/**
+Calculate the length of trim (punch_hole) operation.
+@param[in] bpage Page control block
+@param[in] write_length Write length
+@return length of the trim or zero. */
+ulint
+buf_page_get_trim_length(
+ const buf_page_t* bpage,
+ ulint write_length)
+{
+ return (bpage->size.physical() - write_length);
+}
#endif /* !UNIV_INNOCHECKSUM */
diff --git a/storage/innobase/buf/buf0checksum.cc b/storage/innobase/buf/buf0checksum.cc
index bc9a96bf6d3..9e81b0384c6 100644
--- a/storage/innobase/buf/buf0checksum.cc
+++ b/storage/innobase/buf/buf0checksum.cc
@@ -1,6 +1,7 @@
/*****************************************************************************
-Copyright (c) 1995, 2015, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2017, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -23,66 +24,79 @@ Buffer pool checksum functions, also linked from /extra/innochecksum.cc
Created Aug 11, 2011 Vasil Dimov
*******************************************************/
-#include "univ.i"
-#include "fil0fil.h" /* FIL_* */
-#include "ut0crc32.h" /* ut_crc32() */
-#include "ut0rnd.h" /* ut_fold_binary() */
#include "buf0checksum.h"
+#include "fil0fil.h"
+#include "ut0crc32.h"
+#include "ut0rnd.h"
#ifndef UNIV_INNOCHECKSUM
-
-#include "srv0srv.h" /* SRV_CHECKSUM_* */
-#include "buf0types.h"
-
+#include "srv0srv.h"
#endif /* !UNIV_INNOCHECKSUM */
/** the macro MYSQL_SYSVAR_ENUM() requires "long unsigned int" and if we
use srv_checksum_algorithm_t here then we get a compiler error:
ha_innodb.cc:12251: error: cannot convert 'srv_checksum_algorithm_t*' to
'long unsigned int*' in initialization */
-UNIV_INTERN ulong srv_checksum_algorithm = SRV_CHECKSUM_ALGORITHM_INNODB;
-
-/********************************************************************//**
-Calculates a page CRC32 which is stored to the page when it is written
-to a file. Note that we must be careful to calculate the same value on
-32-bit and 64-bit architectures.
-@return checksum */
-UNIV_INTERN
-ib_uint32_t
-buf_calc_page_crc32(
-/*================*/
- const byte* page) /*!< in: buffer page */
+ulong srv_checksum_algorithm = SRV_CHECKSUM_ALGORITHM_INNODB;
+
+#ifdef INNODB_BUG_ENDIAN_CRC32
+/** Calculate the CRC32 checksum of a page. The value is stored to the page
+when it is written to a file and also checked for a match when reading from
+the file. Note that we must be careful to calculate the same value on all
+architectures.
+@param[in] page buffer page (srv_page_size bytes)
+@param[in] bug_endian whether to use big endian byteorder
+when converting byte strings to integers, for bug-compatibility with
+big-endian architecture running MySQL 5.6, MariaDB 10.0 or MariaDB 10.1
+@return CRC-32C */
+uint32_t buf_calc_page_crc32(const byte* page, bool bug_endian)
{
- ib_uint32_t checksum;
-
- /* Since the field FIL_PAGE_FILE_FLUSH_LSN, and in versions <= 4.1.x
- FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID, are written outside the buffer pool
- to the first pages of data files, we have to skip them in the page
- checksum calculation.
- We must also skip the field FIL_PAGE_SPACE_OR_CHKSUM where the
- checksum is stored, and also the last 8 bytes of page because
- there we store the old formula checksum. */
-
- checksum = ut_crc32(page + FIL_PAGE_OFFSET,
- FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION
- - FIL_PAGE_OFFSET)
+ return bug_endian
+ ? ut_crc32_legacy_big_endian(
+ page + FIL_PAGE_OFFSET,
+ FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION
+ - FIL_PAGE_OFFSET)
+ ^ ut_crc32_legacy_big_endian(page + FIL_PAGE_DATA,
+ srv_page_size
+ - (FIL_PAGE_DATA
+ + FIL_PAGE_END_LSN_OLD_CHKSUM))
+ : ut_crc32(page + FIL_PAGE_OFFSET,
+ FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION
+ - FIL_PAGE_OFFSET)
^ ut_crc32(page + FIL_PAGE_DATA,
- UNIV_PAGE_SIZE - FIL_PAGE_DATA
- - FIL_PAGE_END_LSN_OLD_CHKSUM);
-
- return(checksum);
+ srv_page_size
+ - (FIL_PAGE_DATA + FIL_PAGE_END_LSN_OLD_CHKSUM));
+}
+#else
+/** Calculate the CRC32 checksum of a page. The value is stored to the page
+when it is written to a file and also checked for a match when reading from
+the file. Note that we must be careful to calculate the same value on all
+architectures.
+@param[in] page buffer page (srv_page_size bytes)
+@return CRC-32C */
+uint32_t buf_calc_page_crc32(const byte* page)
+{
+ /* Note: innodb_checksum_algorithm=crc32 could and should have
+ included the entire page in the checksum, and CRC-32 values
+ should be combined with the CRC-32 function, not with
+ exclusive OR. We stick to the current algorithm in order to
+ remain compatible with old data files. */
+ return ut_crc32(page + FIL_PAGE_OFFSET,
+ FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION
+ - FIL_PAGE_OFFSET)
+ ^ ut_crc32(page + FIL_PAGE_DATA,
+ srv_page_size
+ - (FIL_PAGE_DATA + FIL_PAGE_END_LSN_OLD_CHKSUM));
}
+#endif
-/********************************************************************//**
-Calculates a page checksum which is stored to the page when it is written
+/** Calculate a checksum which is stored to the page when it is written
to a file. Note that we must be careful to calculate the same value on
32-bit and 64-bit architectures.
-@return checksum */
-UNIV_INTERN
-ulint
-buf_calc_page_new_checksum(
-/*=======================*/
- const byte* page) /*!< in: buffer page */
+@param[in] page file page (srv_page_size bytes)
+@return checksum */
+uint32_t
+buf_calc_page_new_checksum(const byte* page)
{
ulint checksum;
@@ -100,42 +114,29 @@ buf_calc_page_new_checksum(
+ ut_fold_binary(page + FIL_PAGE_DATA,
UNIV_PAGE_SIZE - FIL_PAGE_DATA
- FIL_PAGE_END_LSN_OLD_CHKSUM);
- checksum = checksum & 0xFFFFFFFFUL;
-
- return(checksum);
+ return(static_cast<uint32_t>(checksum));
}
-/********************************************************************//**
-In versions < 4.0.14 and < 4.1.1 there was a bug that the checksum only
-looked at the first few bytes of the page. This calculates that old
-checksum.
+/** In MySQL before 4.0.14 or 4.1.1 there was an InnoDB bug that
+the checksum only looked at the first few bytes of the page.
+This calculates that old checksum.
NOTE: we must first store the new formula checksum to
FIL_PAGE_SPACE_OR_CHKSUM before calculating and storing this old checksum
because this takes that field as an input!
-@return checksum */
-UNIV_INTERN
-ulint
-buf_calc_page_old_checksum(
-/*=======================*/
- const byte* page) /*!< in: buffer page */
+@param[in] page file page (srv_page_size bytes)
+@return checksum */
+uint32_t
+buf_calc_page_old_checksum(const byte* page)
{
- ulint checksum;
-
- checksum = ut_fold_binary(page, FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION);
-
- checksum = checksum & 0xFFFFFFFFUL;
-
- return(checksum);
+ return(static_cast<uint32_t>
+ (ut_fold_binary(page, FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION)));
}
-/********************************************************************//**
-Return a printable string describing the checksum algorithm.
-@return algorithm name */
-UNIV_INTERN
+/** Return a printable string describing the checksum algorithm.
+@param[in] algo algorithm
+@return algorithm name */
const char*
-buf_checksum_algorithm_name(
-/*========================*/
- srv_checksum_algorithm_t algo) /*!< in: algorithm */
+buf_checksum_algorithm_name(srv_checksum_algorithm_t algo)
{
switch (algo) {
case SRV_CHECKSUM_ALGORITHM_CRC32:
@@ -155,4 +156,3 @@ buf_checksum_algorithm_name(
ut_error;
return(NULL);
}
-
diff --git a/storage/innobase/buf/buf0dblwr.cc b/storage/innobase/buf/buf0dblwr.cc
index 32b4399b41d..7fb4cf9a9d3 100644
--- a/storage/innobase/buf/buf0dblwr.cc
+++ b/storage/innobase/buf/buf0dblwr.cc
@@ -25,11 +25,6 @@ Created 2011/12/19
*******************************************************/
#include "buf0dblwr.h"
-
-#ifdef UNIV_NONINL
-#include "buf0buf.ic"
-#endif
-
#include "buf0buf.h"
#include "buf0checksum.h"
#include "srv0start.h"
@@ -39,18 +34,13 @@ Created 2011/12/19
#include "fil0crypt.h"
#include "fil0pagecompress.h"
-#ifndef UNIV_HOTBACKUP
-
-#ifdef UNIV_PFS_MUTEX
-/* Key to register the mutex with performance schema */
-UNIV_INTERN mysql_pfs_key_t buf_dblwr_mutex_key;
-#endif /* UNIV_PFS_RWLOCK */
+using st_::span;
/** The doublewrite buffer */
-UNIV_INTERN buf_dblwr_t* buf_dblwr = NULL;
+buf_dblwr_t* buf_dblwr = NULL;
/** Set to TRUE when the doublewrite buffer is being created */
-UNIV_INTERN ibool buf_dblwr_being_created = FALSE;
+ibool buf_dblwr_being_created = FALSE;
#define TRX_SYS_DOUBLEWRITE_BLOCKS 2
@@ -58,7 +48,6 @@ UNIV_INTERN ibool buf_dblwr_being_created = FALSE;
Determines if a page number is located inside the doublewrite buffer.
@return TRUE if the location is inside the two blocks of the
doublewrite buffer */
-UNIV_INTERN
ibool
buf_dblwr_page_inside(
/*==================*/
@@ -87,7 +76,7 @@ buf_dblwr_page_inside(
/****************************************************************//**
Calls buf_page_get() on the TRX_SYS_PAGE and returns a pointer to the
doublewrite buffer within it.
-@return pointer to the doublewrite buffer within the filespace header
+@return pointer to the doublewrite buffer within the filespace header
page. */
UNIV_INLINE
byte*
@@ -97,8 +86,9 @@ buf_dblwr_get(
{
buf_block_t* block;
- block = buf_page_get(TRX_SYS_SPACE, 0, TRX_SYS_PAGE_NO,
- RW_X_LATCH, mtr);
+ block = buf_page_get(page_id_t(TRX_SYS_SPACE, TRX_SYS_PAGE_NO),
+ univ_page_size, RW_X_LATCH, mtr);
+
buf_block_dbg_add_level(block, SYNC_NO_ORDER_CHECK);
return(buf_block_get_frame(block) + TRX_SYS_DOUBLEWRITE);
@@ -107,7 +97,6 @@ buf_dblwr_get(
/********************************************************************//**
Flush a batch of writes to the datafiles that have already been
written to the dblwr buffer on disk. */
-UNIV_INLINE
void
buf_dblwr_sync_datafiles()
/*======================*/
@@ -119,9 +108,6 @@ buf_dblwr_sync_datafiles()
/* Wait that all async writes to tablespaces have been posted to
the OS */
os_aio_wait_until_no_pending_writes();
-
- /* Now we flush the data to disk (for example, with fsync) */
- fil_flush_file_spaces(FIL_TABLESPACE);
}
/****************************************************************//**
@@ -136,7 +122,7 @@ buf_dblwr_init(
ulint buf_size;
buf_dblwr = static_cast<buf_dblwr_t*>(
- mem_zalloc(sizeof(buf_dblwr_t)));
+ ut_zalloc_nokey(sizeof(buf_dblwr_t)));
/* There are two blocks of same size in the doublewrite
buffer. */
@@ -147,11 +133,10 @@ buf_dblwr_init(
ut_a(srv_doublewrite_batch_size > 0
&& srv_doublewrite_batch_size < buf_size);
- mutex_create(buf_dblwr_mutex_key,
- &buf_dblwr->mutex, SYNC_DOUBLEWRITE);
+ mutex_create(LATCH_ID_BUF_DBLWR, &buf_dblwr->mutex);
- buf_dblwr->b_event = os_event_create();
- buf_dblwr->s_event = os_event_create();
+ buf_dblwr->b_event = os_event_create("dblwr_batch_event");
+ buf_dblwr->s_event = os_event_create("dblwr_single_event");
buf_dblwr->first_free = 0;
buf_dblwr->s_reserved = 0;
buf_dblwr->b_reserved = 0;
@@ -162,17 +147,17 @@ buf_dblwr_init(
doublewrite + TRX_SYS_DOUBLEWRITE_BLOCK2);
buf_dblwr->in_use = static_cast<bool*>(
- mem_zalloc(buf_size * sizeof(bool)));
+ ut_zalloc_nokey(buf_size * sizeof(bool)));
buf_dblwr->write_buf_unaligned = static_cast<byte*>(
- ut_malloc((1 + buf_size) * UNIV_PAGE_SIZE));
+ ut_malloc_nokey((1 + buf_size) * UNIV_PAGE_SIZE));
buf_dblwr->write_buf = static_cast<byte*>(
ut_align(buf_dblwr->write_buf_unaligned,
UNIV_PAGE_SIZE));
buf_dblwr->buf_block_arr = static_cast<buf_page_t**>(
- mem_zalloc(buf_size * sizeof(void*)));
+ ut_zalloc_nokey(buf_size * sizeof(void*)));
}
/** Create the doublewrite buffer if the doublewrite buffer header
@@ -180,7 +165,6 @@ is not present in the TRX_SYS page.
@return whether the operation succeeded
@retval true if the doublewrite buffer exists or was created
@retval false if the creation failed (too small first data file) */
-UNIV_INTERN
bool
buf_dblwr_create()
{
@@ -199,7 +183,7 @@ buf_dblwr_create()
}
start_again:
- mtr_start(&mtr);
+ mtr.start();
buf_dblwr_being_created = TRUE;
doublewrite = buf_dblwr_get(&mtr);
@@ -211,21 +195,9 @@ start_again:
buf_dblwr_init(doublewrite);
- mtr_commit(&mtr);
+ mtr.commit();
buf_dblwr_being_created = FALSE;
return(true);
- }
-
- if (buf_pool_get_curr_size()
- < ((TRX_SYS_DOUBLEWRITE_BLOCKS * TRX_SYS_DOUBLEWRITE_BLOCK_SIZE
- + FSP_EXTENT_SIZE / 2 + 100)
- * UNIV_PAGE_SIZE)) {
-
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Cannot create doublewrite buffer: "
- "innodb_buffer_pool_size is too small.");
- mtr_commit(&mtr);
- return(false);
} else {
fil_space_t* space = fil_space_acquire(TRX_SYS_SPACE);
const bool fail = UT_LIST_GET_FIRST(space->chain)->size
@@ -243,17 +215,17 @@ start_again:
if (block2 == NULL) {
too_small:
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Cannot create doublewrite buffer: "
+ ib::error()
+ << "Cannot create doublewrite buffer: "
"the first file in innodb_data_file_path"
- " must be at least %luM.",
- 3 * (FSP_EXTENT_SIZE * UNIV_PAGE_SIZE) >> 20);
- mtr_commit(&mtr);
+ " must be at least "
+ << (3 * (FSP_EXTENT_SIZE * UNIV_PAGE_SIZE) >> 20)
+ << "M.";
+ mtr.commit();
return(false);
}
- ib_logf(IB_LOG_LEVEL_INFO,
- "Doublewrite buffer not found: creating new");
+ ib::info() << "Doublewrite buffer not found: creating new";
/* FIXME: After this point, the doublewrite buffer creation
is not atomic. The doublewrite buffer should not exist in
@@ -274,10 +246,16 @@ too_small:
new_block = fseg_alloc_free_page(
fseg_header, prev_page_no + 1, FSP_UP, &mtr);
if (new_block == NULL) {
- ib_logf(IB_LOG_LEVEL_FATAL,
- "Cannot create doublewrite buffer: you must "
- "increase your tablespace size. "
- "Cannot continue operation.");
+ ib::error() << "Cannot create doublewrite buffer: "
+ " you must increase your tablespace size."
+ " Cannot continue operation.";
+ /* This may essentially corrupt the doublewrite
+ buffer. However, usually the doublewrite buffer
+ is created at database initialization, and it
+ should not matter (just remove all newly created
+ InnoDB files and restart). */
+ mtr.commit();
+ return(false);
}
/* We read the allocated pages to the buffer pool;
@@ -290,7 +268,7 @@ too_small:
has not been written to in doublewrite. */
ut_ad(rw_lock_get_x_lock_count(&new_block->lock) == 1);
- page_no = buf_block_get_page_no(new_block);
+ page_no = new_block->page.id.page_no();
/* We only do this in the debug build, to ensure that
both the check in buf_flush_init_for_writing() and
recv_parse_or_apply_log_rec_body() will see a valid
@@ -359,70 +337,70 @@ too_small:
mtr_commit(&mtr);
/* Flush the modified pages to disk and make a checkpoint */
- log_make_checkpoint_at(LSN_MAX, TRUE);
+ log_make_checkpoint();
buf_dblwr_being_created = FALSE;
/* Remove doublewrite pages from LRU */
buf_pool_invalidate();
- ib_logf(IB_LOG_LEVEL_INFO, "Doublewrite buffer created");
+ ib::info() << "Doublewrite buffer created";
goto start_again;
}
-/** Check if a page is all zeroes.
-@param[in] read_buf database page
-@param[in] zip_size ROW_FORMAT=COMPRESSED page size, or 0
-@return whether the page is all zeroes */
-static bool buf_page_is_zeroes(const byte* read_buf, ulint zip_size)
-{
- const ulint page_size = zip_size ? zip_size : UNIV_PAGE_SIZE;
-
- for (ulint i = 0; i < page_size; i++) {
- if (read_buf[i] != 0) {
- return false;
- }
- }
- return true;
-}
-
-/****************************************************************//**
-At a database startup initializes the doublewrite buffer memory structure if
+/**
+At database startup initializes the doublewrite buffer memory structure if
we already have a doublewrite buffer created in the data files. If we are
upgrading to an InnoDB version which supports multiple tablespaces, then this
function performs the necessary update operations. If we are in a crash
-recovery, this function loads the pages from double write buffer into memory. */
-void
+recovery, this function loads the pages from double write buffer into memory.
+@param[in] file File handle
+@param[in] path Path name of file
+@return DB_SUCCESS or error code */
+dberr_t
buf_dblwr_init_or_load_pages(
-/*=========================*/
pfs_os_file_t file,
- char* path,
- bool load_corrupt_pages)
+ const char* path)
{
- byte* buf;
- byte* read_buf;
- byte* unaligned_read_buf;
- ulint block1;
- ulint block2;
- byte* page;
- ibool reset_space_ids = FALSE;
- byte* doublewrite;
- ulint space_id;
- ulint i;
- ulint block_bytes = 0;
- recv_dblwr_t& recv_dblwr = recv_sys->dblwr;
+ byte* buf;
+ byte* page;
+ ulint block1;
+ ulint block2;
+ ulint space_id;
+ byte* read_buf;
+ byte* doublewrite;
+ byte* unaligned_read_buf;
+ ibool reset_space_ids = FALSE;
+ recv_dblwr_t& recv_dblwr = recv_sys->dblwr;
/* We do the file i/o past the buffer pool */
- unaligned_read_buf = static_cast<byte*>(ut_malloc(3 * UNIV_PAGE_SIZE));
+ unaligned_read_buf = static_cast<byte*>(
+ ut_malloc_nokey(3 * UNIV_PAGE_SIZE));
read_buf = static_cast<byte*>(
ut_align(unaligned_read_buf, UNIV_PAGE_SIZE));
/* Read the trx sys header to check if we are using the doublewrite
buffer */
- off_t trx_sys_page = TRX_SYS_PAGE_NO * UNIV_PAGE_SIZE;
- os_file_read(file, read_buf, trx_sys_page, UNIV_PAGE_SIZE);
+ dberr_t err;
+
+ IORequest read_request(IORequest::READ);
+
+ err = os_file_read(
+ read_request,
+ file, read_buf, TRX_SYS_PAGE_NO * UNIV_PAGE_SIZE,
+ UNIV_PAGE_SIZE);
+
+ if (err != DB_SUCCESS) {
+
+ ib::error()
+ << "Failed to read the system tablespace header page";
+
+ ut_free(unaligned_read_buf);
+
+ return(err);
+ }
doublewrite = read_buf + TRX_SYS_DOUBLEWRITE;
@@ -439,7 +417,8 @@ buf_dblwr_init_or_load_pages(
buf = buf_dblwr->write_buf;
} else {
- goto leave_func;
+ ut_free(unaligned_read_buf);
+ return(DB_SUCCESS);
}
if (mach_read_from_4(doublewrite + TRX_SYS_DOUBLEWRITE_SPACE_ID_STORED)
@@ -453,32 +432,56 @@ buf_dblwr_init_or_load_pages(
reset_space_ids = TRUE;
- ib_logf(IB_LOG_LEVEL_INFO,
- "Resetting space id's in the doublewrite buffer");
+ ib::info() << "Resetting space id's in the doublewrite buffer";
}
/* Read the pages from the doublewrite buffer to memory */
+ err = os_file_read(
+ read_request,
+ file, buf, block1 * UNIV_PAGE_SIZE,
+ TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * UNIV_PAGE_SIZE);
+
+ if (err != DB_SUCCESS) {
+
+ ib::error()
+ << "Failed to read the first double write buffer "
+ "extent";
+
+ ut_free(unaligned_read_buf);
+
+ return(err);
+ }
+
+ err = os_file_read(
+ read_request,
+ file,
+ buf + TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * UNIV_PAGE_SIZE,
+ block2 * UNIV_PAGE_SIZE,
+ TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * UNIV_PAGE_SIZE);
- block_bytes = TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * UNIV_PAGE_SIZE;
+ if (err != DB_SUCCESS) {
- os_file_read(file, buf, block1 * UNIV_PAGE_SIZE, block_bytes);
- os_file_read(file, buf + block_bytes, block2 * UNIV_PAGE_SIZE,
- block_bytes);
+ ib::error()
+ << "Failed to read the second double write buffer "
+ "extent";
+
+ ut_free(unaligned_read_buf);
+
+ return(err);
+ }
/* Check if any of these pages is half-written in data files, in the
intended position */
page = buf;
- for (i = 0; i < TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * TRX_SYS_DOUBLEWRITE_BLOCKS; i++) {
-
- ulint source_page_no;
-
+ for (ulint i = 0; i < TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * 2; i++) {
if (reset_space_ids) {
+ ulint source_page_no;
space_id = 0;
- mach_write_to_4(page
- + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID, space_id);
+ mach_write_to_4(page + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID,
+ space_id);
/* We do not need to calculate new checksums for the
pages because the field .._SPACE_ID does not affect
them. Write the page back to where we read it from. */
@@ -490,99 +493,124 @@ buf_dblwr_init_or_load_pages(
+ i - TRX_SYS_DOUBLEWRITE_BLOCK_SIZE;
}
- os_file_write(path, file, page,
- source_page_no * UNIV_PAGE_SIZE,
- UNIV_PAGE_SIZE);
- } else if (load_corrupt_pages
- && !buf_page_is_zeroes(page, FIL_PAGE_DATA)) {
- /* Each valid page header must contain some
- nonzero bytes, such as FIL_PAGE_OFFSET
- or FIL_PAGE_LSN. */
+ IORequest write_request(IORequest::WRITE);
+
+ err = os_file_write(
+ write_request, path, file, page,
+ source_page_no * UNIV_PAGE_SIZE,
+ UNIV_PAGE_SIZE);
+ if (err != DB_SUCCESS) {
+
+ ib::error()
+ << "Failed to write to the double write"
+ " buffer";
+
+ ut_free(unaligned_read_buf);
+
+ return(err);
+ }
+
+ } else if (memcmp(field_ref_zero, page + FIL_PAGE_LSN, 8)) {
+ /* Each valid page header must contain
+ a nonzero FIL_PAGE_LSN field. */
recv_dblwr.add(page);
}
- page += UNIV_PAGE_SIZE;
+ page += univ_page_size.physical();
}
if (reset_space_ids) {
os_file_flush(file);
}
-leave_func:
ut_free(unaligned_read_buf);
+
+ return(DB_SUCCESS);
}
-/****************************************************************//**
-Process the double write buffer pages. */
+/** Process and remove the double write buffer pages for all tablespaces. */
void
buf_dblwr_process()
-/*===============*/
{
- ulint space_id;
- ulint page_no;
- ulint page_no_dblwr = 0;
- byte* page;
- byte* read_buf;
- byte* unaligned_read_buf;
- recv_dblwr_t& recv_dblwr = recv_sys->dblwr;
+ ulint page_no_dblwr = 0;
+ byte* read_buf;
+ byte* unaligned_read_buf;
+ recv_dblwr_t& recv_dblwr = recv_sys->dblwr;
if (!buf_dblwr) {
return;
}
- ib_logf(IB_LOG_LEVEL_INFO,
- "Restoring possible half-written data pages "
- "from the doublewrite buffer...");
-
- unaligned_read_buf = static_cast<byte*>(ut_malloc(3 * UNIV_PAGE_SIZE));
+ unaligned_read_buf = static_cast<byte*>(
+ ut_malloc_nokey(3 * UNIV_PAGE_SIZE));
read_buf = static_cast<byte*>(
ut_align(unaligned_read_buf, UNIV_PAGE_SIZE));
byte* const buf = read_buf + UNIV_PAGE_SIZE;
- for (std::list<byte*>::iterator i = recv_dblwr.pages.begin();
- i != recv_dblwr.pages.end(); ++i, ++page_no_dblwr ) {
- page = *i;
- page_no = mach_read_from_4(page + FIL_PAGE_OFFSET);
- space_id = mach_read_from_4(page + FIL_PAGE_SPACE_ID);
-
- FilSpace space(space_id, true);
+ for (recv_dblwr_t::list::iterator i = recv_dblwr.pages.begin();
+ i != recv_dblwr.pages.end();
+ ++i, ++page_no_dblwr) {
+ byte* page = *i;
+ ulint space_id = page_get_space_id(page);
+ fil_space_t* space = fil_space_get(space_id);
- if (!space()) {
- /* Maybe we have dropped the single-table tablespace
+ if (space == NULL) {
+ /* Maybe we have dropped the tablespace
and this page once belonged to it: do nothing */
continue;
}
- if (!fil_check_adress_in_tablespace(space_id, page_no)) {
- ib_logf(IB_LOG_LEVEL_WARN,
- "A copy of page " ULINTPF ":" ULINTPF
- " in the doublewrite buffer slot " ULINTPF
- " is not within space bounds",
- space_id, page_no, page_no_dblwr);
+ fil_space_open_if_needed(space);
+
+ const ulint page_no = page_get_page_no(page);
+ const page_id_t page_id(space_id, page_no);
+
+ if (page_no >= space->size) {
+
+ /* Do not report the warning if the tablespace
+ is scheduled for truncation or was truncated
+ and we have parsed an MLOG_TRUNCATE record. */
+ if (!srv_is_tablespace_truncated(space_id)
+ && !srv_was_tablespace_truncated(space)
+ && !srv_is_undo_tablespace(space_id)) {
+ ib::warn() << "A copy of page " << page_id
+ << " in the doublewrite buffer slot "
+ << page_no_dblwr
+ << " is not within space bounds";
+ }
continue;
}
- ulint zip_size = fsp_flags_get_zip_size(space()->flags);
- ut_ad(!buf_page_is_zeroes(page, zip_size));
+ const page_size_t page_size(space->flags);
+ ut_ad(!buf_is_zeroes(span<const byte>(page,
+ page_size.physical())));
+
+ /* We want to ensure that for partial reads the
+ unread portion of the page is NUL. */
+ memset(read_buf, 0x0, page_size.physical());
+
+ IORequest request;
+
+ request.dblwr_recover();
/* Read in the actual page from the file */
- fil_io(OS_FILE_READ,
- true,
- space_id,
- zip_size,
- page_no,
- 0,
- zip_size ? zip_size : UNIV_PAGE_SIZE,
- read_buf,
- NULL,
- 0);
+ dberr_t err = fil_io(
+ request, true,
+ page_id, page_size,
+ 0, page_size.physical(), read_buf, NULL);
+
+ if (err != DB_SUCCESS) {
+ ib::warn()
+ << "Double write buffer recovery: "
+ << page_id << " read failed with "
+ << "error: " << ut_strerr(err);
+ }
- const bool is_all_zero = buf_page_is_zeroes(
- read_buf, zip_size);
- const bool expect_encrypted = space()->crypt_data
- && space()->crypt_data->type
- != CRYPT_SCHEME_UNENCRYPTED;
+ const bool is_all_zero = buf_is_zeroes(
+ span<const byte>(read_buf, page_size.physical()));
+ const bool expect_encrypted = space->crypt_data
+ && space->crypt_data->type != CRYPT_SCHEME_UNENCRYPTED;
if (is_all_zero) {
/* We will check if the copy in the
@@ -593,7 +621,8 @@ buf_dblwr_process()
/* Decompress the page before
validating the checksum. */
ulint decomp = fil_page_decompress(buf, read_buf);
- if (!decomp || (decomp != srv_page_size && zip_size)) {
+ if (!decomp || (decomp != srv_page_size
+ && page_size.is_compressed())) {
goto bad;
}
@@ -601,9 +630,9 @@ buf_dblwr_process()
read_buf
+ FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION)
? fil_space_verify_crypt_checksum(read_buf,
- zip_size)
+ page_size)
: !buf_page_is_corrupted(true, read_buf,
- zip_size, space())) {
+ page_size, space)) {
/* The page is good; there is no need
to consult the doublewrite buffer. */
continue;
@@ -612,21 +641,21 @@ buf_dblwr_process()
bad:
/* We intentionally skip this message for
is_all_zero pages. */
- ib_logf(IB_LOG_LEVEL_INFO,
- "Trying to recover page " ULINTPF ":" ULINTPF
- " from the doublewrite buffer.",
- space_id, page_no);
+ ib::info()
+ << "Trying to recover page " << page_id
+ << " from the doublewrite buffer.";
}
ulint decomp = fil_page_decompress(buf, page);
- if (!decomp || (decomp != srv_page_size && zip_size)) {
+ if (!decomp || (decomp != srv_page_size
+ && page_size.is_compressed())) {
continue;
}
if (expect_encrypted && mach_read_from_4(
page + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION)
- ? !fil_space_verify_crypt_checksum(page, zip_size)
- : buf_page_is_corrupted(true, page, zip_size, space())) {
+ ? !fil_space_verify_crypt_checksum(page, page_size)
+ : buf_page_is_corrupted(true, page, page_size, space)) {
/* Theoretically we could have another good
copy for this page in the doublewrite
buffer. If not, we will report a fatal error
@@ -641,10 +670,10 @@ bad:
if (!fsp_flags_is_valid(flags, space_id)
&& fsp_flags_convert_from_101(flags)
== ULINT_UNDEFINED) {
- ib_logf(IB_LOG_LEVEL_WARN,
- "Ignoring a doublewrite copy of page "
- ULINTPF ":0 due to invalid flags 0x%x",
- space_id, int(flags));
+ ib::warn() << "Ignoring a doublewrite copy"
+ " of page " << page_id
+ << " due to invalid flags "
+ << ib::hex(flags);
continue;
}
/* The flags on the page should be converted later. */
@@ -653,77 +682,60 @@ bad:
/* Write the good page from the doublewrite buffer to
the intended position. */
- fil_io(OS_FILE_WRITE, true, space_id, zip_size, page_no, 0,
- zip_size ? zip_size : UNIV_PAGE_SIZE,
- page, NULL, 0);
+ IORequest write_request(IORequest::WRITE);
- ib_logf(IB_LOG_LEVEL_INFO,
- "Recovered page " ULINTPF ":" ULINTPF " from"
- " the doublewrite buffer.",
- space_id, page_no);
- }
-
- ut_free(unaligned_read_buf);
- fil_flush_file_spaces(FIL_TABLESPACE);
+ fil_io(write_request, true, page_id, page_size,
+ 0, page_size.physical(),
+ const_cast<byte*>(page), NULL);
- {
- size_t bytes = TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * UNIV_PAGE_SIZE;
- byte *unaligned_buf = static_cast<byte*>(
- ut_malloc(bytes + UNIV_PAGE_SIZE - 1));
-
- byte *buf = static_cast<byte*>(
- ut_align(unaligned_buf, UNIV_PAGE_SIZE));
- memset(buf, 0, bytes);
+ ib::info() << "Recovered page " << page_id
+ << " from the doublewrite buffer.";
+ }
- fil_io(OS_FILE_WRITE, true, TRX_SYS_SPACE, 0,
- buf_dblwr->block1, 0, bytes, buf, NULL, NULL);
- fil_io(OS_FILE_WRITE, true, TRX_SYS_SPACE, 0,
- buf_dblwr->block2, 0, bytes, buf, NULL, NULL);
+ recv_dblwr.pages.clear();
- ut_free(unaligned_buf);
- }
+ fil_flush_file_spaces(FIL_TYPE_TABLESPACE);
+ ut_free(unaligned_read_buf);
}
/****************************************************************//**
Frees doublewrite buffer. */
-UNIV_INTERN
void
-buf_dblwr_free(void)
-/*================*/
+buf_dblwr_free()
{
/* Free the double write data structures. */
ut_a(buf_dblwr != NULL);
ut_ad(buf_dblwr->s_reserved == 0);
ut_ad(buf_dblwr->b_reserved == 0);
- os_event_free(buf_dblwr->b_event);
- os_event_free(buf_dblwr->s_event);
+ os_event_destroy(buf_dblwr->b_event);
+ os_event_destroy(buf_dblwr->s_event);
ut_free(buf_dblwr->write_buf_unaligned);
buf_dblwr->write_buf_unaligned = NULL;
- mem_free(buf_dblwr->buf_block_arr);
+ ut_free(buf_dblwr->buf_block_arr);
buf_dblwr->buf_block_arr = NULL;
- mem_free(buf_dblwr->in_use);
+ ut_free(buf_dblwr->in_use);
buf_dblwr->in_use = NULL;
mutex_free(&buf_dblwr->mutex);
- mem_free(buf_dblwr);
+ ut_free(buf_dblwr);
buf_dblwr = NULL;
}
/********************************************************************//**
Updates the doublewrite buffer when an IO request is completed. */
-UNIV_INTERN
void
buf_dblwr_update(
/*=============*/
const buf_page_t* bpage, /*!< in: buffer block descriptor */
buf_flush_t flush_type)/*!< in: flush type */
{
- if (!srv_use_doublewrite_buf || buf_dblwr == NULL) {
- return;
- }
+ ut_ad(srv_use_doublewrite_buf);
+ ut_ad(buf_dblwr);
+ ut_ad(!fsp_is_system_temporary(bpage->id.space()));
+ ut_ad(!srv_read_only_mode);
switch (flush_type) {
case BUF_FLUSH_LIST:
@@ -740,7 +752,7 @@ buf_dblwr_update(
mutex_exit(&buf_dblwr->mutex);
/* This will finish the batch. Sync data files
to the disk. */
- fil_flush_file_spaces(FIL_TABLESPACE);
+ fil_flush_file_spaces(FIL_TYPE_TABLESPACE);
mutex_enter(&buf_dblwr->mutex);
/* We can now reuse the doublewrite memory buffer: */
@@ -798,18 +810,16 @@ buf_dblwr_check_page_lsn(
- FIL_PAGE_END_LSN_OLD_CHKSUM + 4),
4)) {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: ERROR: The page to be written"
- " seems corrupt!\n"
- "InnoDB: The low 4 bytes of LSN fields do not match "
- "(" ULINTPF " != " ULINTPF ")!"
- " Noticed in the buffer pool.\n",
- mach_read_from_4(
- page + FIL_PAGE_LSN + 4),
- mach_read_from_4(
- page + UNIV_PAGE_SIZE
- - FIL_PAGE_END_LSN_OLD_CHKSUM + 4));
+ const ulint lsn1 = mach_read_from_4(
+ page + FIL_PAGE_LSN + 4);
+ const ulint lsn2 = mach_read_from_4(
+ page + UNIV_PAGE_SIZE - FIL_PAGE_END_LSN_OLD_CHKSUM
+ + 4);
+
+ ib::error() << "The page to be written seems corrupt!"
+ " The low 4 bytes of LSN fields do not match"
+ " (" << lsn1 << " != " << lsn2 << ")!"
+ " Noticed in the buffer pool.";
}
}
@@ -822,21 +832,13 @@ buf_dblwr_assert_on_corrupt_block(
/*==============================*/
const buf_block_t* block) /*!< in: block to check */
{
- buf_page_print(block->frame, 0);
-
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Apparent corruption of an"
- " index page n:o %lu in space %lu\n"
- "InnoDB: to be written to data file."
- " We intentionally crash server\n"
- "InnoDB: to prevent corrupt data"
- " from ending up in data\n"
- "InnoDB: files.\n",
- (ulong) buf_block_get_page_no(block),
- (ulong) buf_block_get_space(block));
-
- ut_error;
+ buf_page_print(block->frame, univ_page_size);
+
+ ib::fatal() << "Apparent corruption of an index page "
+ << block->page.id
+ << " to be written to data file. We intentionally crash"
+ " the server to prevent corrupt data from ending up in"
+ " data files.";
}
/********************************************************************//**
@@ -848,26 +850,50 @@ buf_dblwr_check_block(
/*==================*/
const buf_block_t* block) /*!< in: block to check */
{
- if (buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE
- || block->page.zip.data) {
- /* No simple validate for compressed pages exists. */
- return;
- }
+ ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
- buf_dblwr_check_page_lsn(block->frame);
-
- if (!block->check_index_page_at_flush) {
+ if (block->skip_flush_check) {
return;
}
- if (page_is_comp(block->frame)) {
- if (!page_simple_validate_new(block->frame)) {
- buf_dblwr_assert_on_corrupt_block(block);
+ switch (fil_page_get_type(block->frame)) {
+ case FIL_PAGE_INDEX:
+ case FIL_PAGE_RTREE:
+ if (page_is_comp(block->frame)) {
+ if (page_simple_validate_new(block->frame)) {
+ return;
+ }
+ } else if (page_simple_validate_old(block->frame)) {
+ return;
}
- } else if (!page_simple_validate_old(block->frame)) {
-
- buf_dblwr_assert_on_corrupt_block(block);
+ /* While it is possible that this is not an index page
+ but just happens to have wrongly set FIL_PAGE_TYPE,
+ such pages should never be modified to without also
+ adjusting the page type during page allocation or
+ buf_flush_init_for_writing() or fil_block_reset_type(). */
+ break;
+ case FIL_PAGE_TYPE_FSP_HDR:
+ case FIL_PAGE_IBUF_BITMAP:
+ case FIL_PAGE_TYPE_UNKNOWN:
+ /* Do not complain again, we already reset this field. */
+ case FIL_PAGE_UNDO_LOG:
+ case FIL_PAGE_INODE:
+ case FIL_PAGE_IBUF_FREE_LIST:
+ case FIL_PAGE_TYPE_SYS:
+ case FIL_PAGE_TYPE_TRX_SYS:
+ case FIL_PAGE_TYPE_XDES:
+ case FIL_PAGE_TYPE_BLOB:
+ case FIL_PAGE_TYPE_ZBLOB:
+ case FIL_PAGE_TYPE_ZBLOB2:
+ /* TODO: validate also non-index pages */
+ return;
+ case FIL_PAGE_TYPE_ALLOCATED:
+ /* empty pages should never be flushed */
+ return;
+ break;
}
+
+ buf_dblwr_assert_on_corrupt_block(block);
}
/********************************************************************//**
@@ -881,45 +907,43 @@ buf_dblwr_write_block_to_datafile(
bool sync) /*!< in: true if sync IO
is requested */
{
- ut_a(bpage);
ut_a(buf_page_in_file(bpage));
- const ulint flags = sync
- ? OS_FILE_WRITE
- : OS_FILE_WRITE | OS_AIO_SIMULATED_WAKE_LATER;
+ ulint type = IORequest::WRITE;
+
+ if (sync) {
+ type |= IORequest::DO_NOT_WAKE;
+ }
+ IORequest request(type, const_cast<buf_page_t*>(bpage));
+
+ /* We request frame here to get correct buffer in case of
+ encryption and/or page compression */
void * frame = buf_page_get_frame(bpage);
- if (bpage->zip.data) {
- fil_io(flags,
- sync,
- buf_page_get_space(bpage),
- buf_page_get_zip_size(bpage),
- buf_page_get_page_no(bpage),
- 0,
- buf_page_get_zip_size(bpage),
- frame,
- (void*) bpage,
- 0);
+ if (bpage->zip.data != NULL) {
+ ut_ad(bpage->size.is_compressed());
- return;
- }
+ fil_io(request, sync, bpage->id, bpage->size, 0,
+ bpage->size.physical(),
+ (void*) frame,
+ (void*) bpage);
+ } else {
+ ut_ad(!bpage->size.is_compressed());
+
+ /* Our IO API is common for both reads and writes and is
+ therefore geared towards a non-const parameter. */
+
+ buf_block_t* block = reinterpret_cast<buf_block_t*>(
+ const_cast<buf_page_t*>(bpage));
+ ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
+ buf_dblwr_check_page_lsn(block->frame);
- const buf_block_t* block = (buf_block_t*) bpage;
- ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
- buf_dblwr_check_page_lsn(block->frame);
-
- fil_io(flags,
- sync,
- buf_block_get_space(block),
- 0,
- buf_block_get_page_no(block),
- 0,
- bpage->real_size,
- frame,
- (void*) block,
- (ulint *)&bpage->write_size);
+ fil_io(request,
+ sync, bpage->id, bpage->size, 0, bpage->real_size,
+ frame, block);
+ }
}
/********************************************************************//**
@@ -928,10 +952,8 @@ and also wakes up the aio thread if simulated aio is used. It is very
important to call this function after a batch of writes has been posted,
and also when we may have to wait for a page latch! Otherwise a deadlock
of threads can occur. */
-UNIV_INTERN
void
-buf_dblwr_flush_buffered_writes(void)
-/*=================================*/
+buf_dblwr_flush_buffered_writes()
{
byte* write_buf;
ulint first_free;
@@ -940,9 +962,13 @@ buf_dblwr_flush_buffered_writes(void)
if (!srv_use_doublewrite_buf || buf_dblwr == NULL) {
/* Sync the writes to the disk. */
buf_dblwr_sync_datafiles();
+ /* Now we flush the data to disk (for example, with fsync) */
+ fil_flush_file_spaces(FIL_TYPE_TABLESPACE);
return;
}
+ ut_ad(!srv_read_only_mode);
+
try_again:
mutex_enter(&buf_dblwr->mutex);
@@ -954,13 +980,19 @@ try_again:
mutex_exit(&buf_dblwr->mutex);
+ /* Wake possible simulated aio thread as there could be
+ system temporary tablespace pages active for flushing.
+ Note: system temporary tablespace pages are not scheduled
+ for doublewrite. */
+ os_aio_simulated_wake_handler_threads();
+
return;
}
if (buf_dblwr->batch_running) {
/* Another thread is running the batch right now. Wait
for it to finish. */
- ib_int64_t sig_count = os_event_reset(buf_dblwr->b_event);
+ int64_t sig_count = os_event_reset(buf_dblwr->b_event);
mutex_exit(&buf_dblwr->mutex);
os_aio_simulated_wake_handler_threads();
@@ -968,7 +1000,6 @@ try_again:
goto try_again;
}
- ut_a(!buf_dblwr->batch_running);
ut_ad(buf_dblwr->first_free == buf_dblwr->b_reserved);
/* Disallow anyone else to post to doublewrite buffer or to
@@ -1012,9 +1043,9 @@ try_again:
len = ut_min(TRX_SYS_DOUBLEWRITE_BLOCK_SIZE,
buf_dblwr->first_free) * UNIV_PAGE_SIZE;
- fil_io(OS_FILE_WRITE, true, TRX_SYS_SPACE, 0,
- buf_dblwr->block1, 0, len,
- (void*) write_buf, NULL, 0);
+ fil_io(IORequestWrite, true,
+ page_id_t(TRX_SYS_SPACE, buf_dblwr->block1), univ_page_size,
+ 0, len, (void*) write_buf, NULL);
if (buf_dblwr->first_free <= TRX_SYS_DOUBLEWRITE_BLOCK_SIZE) {
/* No unwritten pages in the second block. */
@@ -1028,9 +1059,9 @@ try_again:
write_buf = buf_dblwr->write_buf
+ TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * UNIV_PAGE_SIZE;
- fil_io(OS_FILE_WRITE, true, TRX_SYS_SPACE, 0,
- buf_dblwr->block2, 0, len,
- (void*) write_buf, NULL, 0);
+ fil_io(IORequestWrite, true,
+ page_id_t(TRX_SYS_SPACE, buf_dblwr->block2), univ_page_size,
+ 0, len, (void*) write_buf, NULL);
flush:
/* increment the doublewrite flushed pages counter */
@@ -1038,7 +1069,7 @@ flush:
srv_stats.dblwr_writes.inc();
/* Now flush the doublewrite buffer data to disk */
- fil_flush(ulint(TRX_SYS_SPACE));
+ fil_flush(TRX_SYS_SPACE);
/* We know that the writes have been flushed to disk now
and in recovery we will find them in the doublewrite buffer
@@ -1072,14 +1103,11 @@ flush:
Posts a buffer page for writing. If the doublewrite memory buffer is
full, calls buf_dblwr_flush_buffered_writes and waits for for free
space to appear. */
-UNIV_INTERN
void
buf_dblwr_add_to_batch(
/*====================*/
buf_page_t* bpage) /*!< in: buffer block to write */
{
- ulint zip_size;
-
ut_a(buf_page_in_file(bpage));
try_again:
@@ -1095,7 +1123,7 @@ try_again:
point. The only exception is when a user thread is
forced to do a flush batch because of a sync
checkpoint. */
- ib_int64_t sig_count = os_event_reset(buf_dblwr->b_event);
+ int64_t sig_count = os_event_reset(buf_dblwr->b_event);
mutex_exit(&buf_dblwr->mutex);
os_aio_simulated_wake_handler_threads();
@@ -1111,26 +1139,28 @@ try_again:
goto try_again;
}
- zip_size = buf_page_get_zip_size(bpage);
+ byte* p = buf_dblwr->write_buf
+ + univ_page_size.physical() * buf_dblwr->first_free;
+
+ /* We request frame here to get correct buffer in case of
+ encryption and/or page compression */
void * frame = buf_page_get_frame(bpage);
- if (zip_size) {
- UNIV_MEM_ASSERT_RW(bpage->zip.data, zip_size);
+ if (bpage->size.is_compressed()) {
+ UNIV_MEM_ASSERT_RW(bpage->zip.data, bpage->size.physical());
/* Copy the compressed page and clear the rest. */
- memcpy(buf_dblwr->write_buf
- + UNIV_PAGE_SIZE * buf_dblwr->first_free,
- frame, zip_size);
- memset(buf_dblwr->write_buf
- + UNIV_PAGE_SIZE * buf_dblwr->first_free
- + zip_size, 0, UNIV_PAGE_SIZE - zip_size);
+
+ memcpy(p, frame, bpage->size.physical());
+
+ memset(p + bpage->size.physical(), 0x0,
+ univ_page_size.physical() - bpage->size.physical());
} else {
ut_a(buf_page_get_state(bpage) == BUF_BLOCK_FILE_PAGE);
- UNIV_MEM_ASSERT_RW(((buf_block_t*) bpage)->frame,
- UNIV_PAGE_SIZE);
- memcpy(buf_dblwr->write_buf
- + UNIV_PAGE_SIZE * buf_dblwr->first_free,
- frame, UNIV_PAGE_SIZE);
+ UNIV_MEM_ASSERT_RW(frame,
+ bpage->size.logical());
+
+ memcpy(p, frame, bpage->size.logical());
}
buf_dblwr->buf_block_arr[buf_dblwr->first_free] = bpage;
@@ -1161,7 +1191,6 @@ flushes in the doublewrite buffer are in use we wait here for one to
become free. We are guaranteed that a slot will become free because any
thread that is using a slot must also release the slot before leaving
this function. */
-UNIV_INTERN
void
buf_dblwr_write_single_page(
/*========================*/
@@ -1170,7 +1199,6 @@ buf_dblwr_write_single_page(
{
ulint n_slots;
ulint size;
- ulint zip_size;
ulint offset;
ulint i;
@@ -1204,8 +1232,7 @@ retry:
if (buf_dblwr->s_reserved == n_slots) {
/* All slots are reserved. */
- ib_int64_t sig_count =
- os_event_reset(buf_dblwr->s_event);
+ int64_t sig_count = os_event_reset(buf_dblwr->s_event);
mutex_exit(&buf_dblwr->mutex);
os_event_wait_low(buf_dblwr->s_event, sig_count);
@@ -1251,44 +1278,44 @@ retry:
write it. This is so because we want to pad the remaining
bytes in the doublewrite page with zeros. */
- zip_size = buf_page_get_zip_size(bpage);
+ /* We request frame here to get correct buffer in case of
+ encryption and/or page compression */
void * frame = buf_page_get_frame(bpage);
- if (zip_size) {
- memcpy(buf_dblwr->write_buf + UNIV_PAGE_SIZE * i,
- frame, zip_size);
- memset(buf_dblwr->write_buf + UNIV_PAGE_SIZE * i
- + zip_size, 0, UNIV_PAGE_SIZE - zip_size);
-
- fil_io(OS_FILE_WRITE,
- true,
- TRX_SYS_SPACE, 0,
- offset,
- 0,
- UNIV_PAGE_SIZE,
- (void*) (buf_dblwr->write_buf + UNIV_PAGE_SIZE * i),
- NULL,
- 0);
+ if (bpage->size.is_compressed()) {
+ memcpy(buf_dblwr->write_buf + univ_page_size.physical() * i,
+ frame, bpage->size.physical());
+
+ memset(buf_dblwr->write_buf + univ_page_size.physical() * i
+ + bpage->size.physical(), 0x0,
+ univ_page_size.physical() - bpage->size.physical());
+
+ fil_io(IORequestWrite,
+ true,
+ page_id_t(TRX_SYS_SPACE, offset),
+ univ_page_size,
+ 0,
+ univ_page_size.physical(),
+ (void *)(buf_dblwr->write_buf + univ_page_size.physical() * i),
+ NULL);
} else {
/* It is a regular page. Write it directly to the
doublewrite buffer */
- fil_io(OS_FILE_WRITE,
- true,
- TRX_SYS_SPACE, 0,
- offset,
- 0,
- bpage->real_size,
- frame,
- NULL,
- 0);
+ fil_io(IORequestWrite,
+ true,
+ page_id_t(TRX_SYS_SPACE, offset),
+ univ_page_size,
+ 0,
+ univ_page_size.physical(),
+ (void*) frame,
+ NULL);
}
/* Now flush the doublewrite buffer data to disk */
- fil_flush(ulint(TRX_SYS_SPACE));
+ fil_flush(TRX_SYS_SPACE);
/* We know that the write has been flushed to disk now
and during recovery we will find it in the doublewrite buffer
blocks. Next do the write to the intended position. */
buf_dblwr_write_block_to_datafile(bpage, sync);
}
-#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innobase/buf/buf0dump.cc b/storage/innobase/buf/buf0dump.cc
index eabc554036a..3e01b66eb76 100644
--- a/storage/innobase/buf/buf0dump.cc
+++ b/storage/innobase/buf/buf0dump.cc
@@ -24,34 +24,33 @@ Implements a buffer pool dump/load.
Created April 08, 2011 Vasil Dimov
*******************************************************/
-#include "univ.i"
+#include "my_global.h"
+#include "my_sys.h"
-#include <stdarg.h> /* va_* */
-#include <string.h> /* strerror() */
+#include "mysql/psi/mysql_stage.h"
+#include "mysql/psi/psi.h"
-#include "buf0buf.h" /* buf_pool_mutex_enter(), srv_buf_pool_instances */
+#include "buf0buf.h"
#include "buf0dump.h"
-#include "db0err.h"
-#include "dict0dict.h" /* dict_operation_lock */
-#include "os0file.h" /* OS_FILE_MAX_PATH */
-#include "os0sync.h" /* os_event* */
-#include "os0thread.h" /* os_thread_* */
-#include "srv0srv.h" /* srv_fast_shutdown, srv_buf_dump* */
-#include "srv0start.h" /* srv_shutdown_state */
-#include "sync0rw.h" /* rw_lock_s_lock() */
-#include "ut0byte.h" /* ut_ull_create() */
-#include "ut0sort.h" /* UT_SORT_FUNCTION_BODY */
+#include "dict0dict.h"
+#include "os0file.h"
+#include "os0thread.h"
+#include "srv0srv.h"
+#include "srv0start.h"
+#include "sync0rw.h"
+#include "ut0byte.h"
+
+#include <algorithm>
+
#include "mysql/service_wsrep.h" /* wsrep_recovery */
#include <my_service_manager.h>
enum status_severity {
STATUS_INFO,
- STATUS_NOTICE,
STATUS_ERR
};
-#define SHUTTING_DOWN() (UNIV_UNLIKELY(srv_shutdown_state \
- != SRV_SHUTDOWN_NONE))
+#define SHUTTING_DOWN() (srv_shutdown_state != SRV_SHUTDOWN_NONE)
/* Flags that tell the buffer pool dump/load thread which action should it
take after being waked up. */
@@ -76,7 +75,6 @@ Wakes up the buffer pool dump/load thread and instructs it to start
a dump. This function is called by MySQL code via buffer_pool_dump_now()
and it should return immediately because the whole MySQL is frozen during
its execution. */
-UNIV_INTERN
void
buf_dump_start()
/*============*/
@@ -90,7 +88,6 @@ Wakes up the buffer pool dump/load thread and instructs it to start
a load. This function is called by MySQL code via buffer_pool_load_now()
and it should return immediately because the whole MySQL is frozen during
its execution. */
-UNIV_INTERN
void
buf_load_start()
/*============*/
@@ -121,12 +118,20 @@ buf_dump_status(
va_start(ap, fmt);
- ut_vsnprintf(
+ vsnprintf(
export_vars.innodb_buffer_pool_dump_status,
sizeof(export_vars.innodb_buffer_pool_dump_status),
fmt, ap);
- ib_logf((ib_log_level_t) severity, "%s", export_vars.innodb_buffer_pool_dump_status);
+ switch (severity) {
+ case STATUS_INFO:
+ ib::info() << export_vars.innodb_buffer_pool_dump_status;
+ break;
+
+ case STATUS_ERR:
+ ib::error() << export_vars.innodb_buffer_pool_dump_status;
+ break;
+ }
va_end(ap);
}
@@ -152,15 +157,19 @@ buf_load_status(
va_start(ap, fmt);
- ut_vsnprintf(
+ vsnprintf(
export_vars.innodb_buffer_pool_load_status,
sizeof(export_vars.innodb_buffer_pool_load_status),
fmt, ap);
- if (severity == STATUS_NOTICE || severity == STATUS_ERR) {
- ut_print_timestamp(stderr);
- fprintf(stderr, " InnoDB: %s\n",
- export_vars.innodb_buffer_pool_load_status);
+ switch (severity) {
+ case STATUS_INFO:
+ ib::info() << export_vars.innodb_buffer_pool_load_status;
+ break;
+
+ case STATUS_ERR:
+ ib::error() << export_vars.innodb_buffer_pool_load_status;
+ break;
}
va_end(ap);
@@ -185,6 +194,56 @@ get_buf_dump_dir()
return(dump_dir);
}
+/** Generate the path to the buffer pool dump/load file.
+@param[out] path generated path
+@param[in] path_size size of 'path', used as in snprintf(3). */
+static
+void
+buf_dump_generate_path(
+ char* path,
+ size_t path_size)
+{
+ char buf[FN_REFLEN];
+
+ snprintf(buf, sizeof(buf), "%s%c%s", get_buf_dump_dir(),
+ OS_PATH_SEPARATOR, srv_buf_dump_filename);
+
+ os_file_type_t type;
+ bool exists = false;
+ bool ret;
+
+ ret = os_file_status(buf, &exists, &type);
+
+ /* For realpath() to succeed the file must exist. */
+
+ if (ret && exists) {
+ /* my_realpath() assumes the destination buffer is big enough
+ to hold FN_REFLEN bytes. */
+ ut_a(path_size >= FN_REFLEN);
+
+ my_realpath(path, buf, 0);
+ } else {
+ /* If it does not exist, then resolve only srv_data_home
+ and append srv_buf_dump_filename to it. */
+ char srv_data_home_full[FN_REFLEN];
+
+ my_realpath(srv_data_home_full, get_buf_dump_dir(), 0);
+
+ if (srv_data_home_full[strlen(srv_data_home_full) - 1]
+ == OS_PATH_SEPARATOR) {
+
+ snprintf(path, path_size, "%s%s",
+ srv_data_home_full,
+ srv_buf_dump_filename);
+ } else {
+ snprintf(path, path_size, "%s%c%s",
+ srv_data_home_full,
+ OS_PATH_SEPARATOR,
+ srv_buf_dump_filename);
+ }
+ }
+}
+
/*****************************************************************//**
Perform a buffer pool dump into the file specified by
innodb_buffer_pool_filename. If any errors occur then the value of
@@ -207,14 +266,12 @@ buf_dump(
ulint i;
int ret;
- ut_snprintf(full_filename, sizeof(full_filename),
- "%s%c%s", get_buf_dump_dir(), SRV_PATH_SEPARATOR,
- srv_buf_dump_filename);
+ buf_dump_generate_path(full_filename, sizeof(full_filename));
- ut_snprintf(tmp_filename, sizeof(tmp_filename),
- "%s.incomplete", full_filename);
+ snprintf(tmp_filename, sizeof(tmp_filename),
+ "%s.incomplete", full_filename);
- buf_dump_status(STATUS_NOTICE, "Dumping buffer pool(s) to %s",
+ buf_dump_status(STATUS_INFO, "Dumping buffer pool(s) to %s",
full_filename);
#if defined(__GLIBC__) || defined(__WIN__) || O_CLOEXEC == 0
@@ -246,8 +303,6 @@ buf_dump(
buf_dump_t* dump;
ulint n_pages;
ulint j;
- ulint limit;
- ulint counter;
buf_pool = buf_pool_from_array(i);
@@ -264,17 +319,32 @@ buf_dump(
}
if (srv_buf_pool_dump_pct != 100) {
+ ulint t_pages;
+
ut_ad(srv_buf_pool_dump_pct < 100);
- n_pages = n_pages * srv_buf_pool_dump_pct / 100;
+ /* limit the number of total pages dumped to X% of the
+ * total number of pages */
+ t_pages = buf_pool->curr_size
+ * srv_buf_pool_dump_pct / 100;
+ if (n_pages > t_pages) {
+ buf_dump_status(STATUS_INFO,
+ "Instance " ULINTPF
+ ", restricted to " ULINTPF
+ " pages due to "
+ "innodb_buf_pool_dump_pct=%lu",
+ i, t_pages,
+ srv_buf_pool_dump_pct);
+ n_pages = t_pages;
+ }
if (n_pages == 0) {
n_pages = 1;
}
}
- dump = static_cast<buf_dump_t*>(
- ut_malloc(n_pages * sizeof(*dump))) ;
+ dump = static_cast<buf_dump_t*>(ut_malloc_nokey(
+ n_pages * sizeof(*dump)));
if (dump == NULL) {
buf_pool_mutex_exit(buf_pool);
@@ -289,20 +359,22 @@ buf_dump(
for (bpage = UT_LIST_GET_FIRST(buf_pool->LRU), j = 0;
bpage != NULL && j < n_pages;
- bpage = UT_LIST_GET_NEXT(LRU, bpage), j++) {
+ bpage = UT_LIST_GET_NEXT(LRU, bpage)) {
ut_a(buf_page_in_file(bpage));
+ if (bpage->id.space() >= SRV_LOG_SPACE_FIRST_ID) {
+ /* Ignore the innodb_temporary tablespace. */
+ continue;
+ }
- dump[j] = BUF_DUMP_CREATE(buf_page_get_space(bpage),
- buf_page_get_page_no(bpage));
+ dump[j++] = BUF_DUMP_CREATE(bpage->id.space(),
+ bpage->id.page_no());
}
- ut_a(j == n_pages);
-
buf_pool_mutex_exit(buf_pool);
- limit = (ulint)((double)n_pages * ((double)srv_buf_dump_status_frequency / (double)100));
- counter = 0;
+ ut_a(j <= n_pages);
+ n_pages = j;
for (j = 0; j < n_pages && !SHOULD_QUIT(); j++) {
ret = fprintf(f, ULINTPF "," ULINTPF "\n",
@@ -317,27 +389,10 @@ buf_dump(
/* leave tmp_filename to exist */
return;
}
-
- counter++;
-
- /* Print buffer pool dump status only if
- srv_buf_dump_status_frequency is > 0 and
- we have processed that amount of pages. */
- if (srv_buf_dump_status_frequency &&
- counter == limit) {
- counter = 0;
- buf_dump_status(
- STATUS_INFO,
- "Dumping buffer pool "
- ULINTPF "/" ULINTPF ", "
- "page " ULINTPF "/" ULINTPF,
- i + 1, srv_buf_pool_instances,
- j + 1, n_pages);
- }
if (SHUTTING_DOWN() && !(j % 1024)) {
service_manager_extend_timeout(INNODB_EXTEND_TIMEOUT_INTERVAL,
"Dumping buffer pool "
- ULINTPF "/" ULINTPF ", "
+ ULINTPF "/%lu, "
"page " ULINTPF "/" ULINTPF,
i + 1, srv_buf_pool_instances,
j + 1, n_pages);
@@ -381,47 +436,11 @@ buf_dump(
ut_sprintf_timestamp(now);
- buf_dump_status(STATUS_NOTICE,
+ buf_dump_status(STATUS_INFO,
"Buffer pool(s) dump completed at %s", now);
}
/*****************************************************************//**
-Compare two buffer pool dump entries, used to sort the dump on
-space_no,page_no before loading in order to increase the chance for
-sequential IO.
-@return -1/0/1 if entry 1 is smaller/equal/bigger than entry 2 */
-static
-lint
-buf_dump_cmp(
-/*=========*/
- const buf_dump_t d1, /*!< in: buffer pool dump entry 1 */
- const buf_dump_t d2) /*!< in: buffer pool dump entry 2 */
-{
- if (d1 < d2) {
- return(-1);
- } else if (d1 == d2) {
- return(0);
- } else {
- return(1);
- }
-}
-
-/*****************************************************************//**
-Sort a buffer pool dump on space_no, page_no. */
-static
-void
-buf_dump_sort(
-/*==========*/
- buf_dump_t* dump, /*!< in/out: buffer pool dump to sort */
- buf_dump_t* tmp, /*!< in/out: temp storage */
- ulint low, /*!< in: lowest index (inclusive) */
- ulint high) /*!< in: highest index (non-inclusive) */
-{
- UT_SORT_FUNCTION_BODY(buf_dump_sort, dump, tmp, low, high,
- buf_dump_cmp);
-}
-
-/*****************************************************************//**
Artificially delay the buffer pool loading if necessary. The idea of
this function is to prevent hogging the server with IO and slowing down
too much normal client queries. */
@@ -429,7 +448,7 @@ UNIV_INLINE
void
buf_load_throttle_if_needed(
/*========================*/
- ulint* last_check_time, /*!< in/out: miliseconds since epoch
+ ulint* last_check_time, /*!< in/out: milliseconds since epoch
of the last time we did check if
throttling is needed, we do the check
every srv_io_capacity IO ops. */
@@ -479,7 +498,7 @@ buf_load_throttle_if_needed(
"cur_activity_count == *last_activity_count" check and calling
ut_time_ms() that often may turn out to be too expensive. */
- if (elapsed_time < 1000 /* 1 sec (1000 mili secs) */) {
+ if (elapsed_time < 1000 /* 1 sec (1000 milli secs) */) {
os_thread_sleep((1000 - elapsed_time) * 1000 /* micro secs */);
}
@@ -502,7 +521,6 @@ buf_load()
char now[32];
FILE* f;
buf_dump_t* dump;
- buf_dump_t* dump_tmp;
ulint dump_n;
ulint total_buffer_pools_pages;
ulint i;
@@ -513,16 +531,14 @@ buf_load()
/* Ignore any leftovers from before */
buf_load_abort_flag = FALSE;
- ut_snprintf(full_filename, sizeof(full_filename),
- "%s%c%s", get_buf_dump_dir(), SRV_PATH_SEPARATOR,
- srv_buf_dump_filename);
+ buf_dump_generate_path(full_filename, sizeof(full_filename));
- buf_load_status(STATUS_NOTICE,
+ buf_load_status(STATUS_INFO,
"Loading buffer pool(s) from %s", full_filename);
f = fopen(full_filename, "r");
if (f == NULL) {
- buf_load_status(STATUS_ERR,
+ buf_load_status(STATUS_INFO,
"Cannot open '%s' for reading: %s",
full_filename, strerror(errno));
return;
@@ -547,41 +563,38 @@ buf_load()
what = "parsing";
}
fclose(f);
- buf_load_status(STATUS_ERR, "Error %s '%s', "
- "unable to load buffer pool (stage 1)",
+ buf_load_status(STATUS_ERR, "Error %s '%s',"
+ " unable to load buffer pool (stage 1)",
what, full_filename);
return;
}
/* If dump is larger than the buffer pool(s), then we ignore the
extra trailing. This could happen if a dump is made, then buffer
- pool is shrunk and then load it attempted. */
+ pool is shrunk and then load is attempted. */
total_buffer_pools_pages = buf_pool_get_n_pages()
* srv_buf_pool_instances;
if (dump_n > total_buffer_pools_pages) {
dump_n = total_buffer_pools_pages;
}
- dump = static_cast<buf_dump_t*>(ut_malloc(dump_n * sizeof(*dump)));
-
- if (dump == NULL) {
+ if(dump_n != 0) {
+ dump = static_cast<buf_dump_t*>(ut_malloc_nokey(
+ dump_n * sizeof(*dump)));
+ } else {
fclose(f);
- buf_load_status(STATUS_ERR,
- "Cannot allocate " ULINTPF " bytes: %s",
- (ulint) (dump_n * sizeof(*dump)),
- strerror(errno));
+ ut_sprintf_timestamp(now);
+ buf_load_status(STATUS_INFO,
+ "Buffer pool(s) load completed at %s"
+ " (%s was empty)", now, full_filename);
return;
}
- dump_tmp = static_cast<buf_dump_t*>(
- ut_malloc(dump_n * sizeof(*dump_tmp)));
-
- if (dump_tmp == NULL) {
- ut_free(dump);
+ if (dump == NULL) {
fclose(f);
buf_load_status(STATUS_ERR,
"Cannot allocate " ULINTPF " bytes: %s",
- (ulint) (dump_n * sizeof(*dump_tmp)),
+ dump_n * sizeof(*dump),
strerror(errno));
return;
}
@@ -599,24 +612,22 @@ buf_load()
/* else */
ut_free(dump);
- ut_free(dump_tmp);
fclose(f);
buf_load_status(STATUS_ERR,
- "Error parsing '%s', unable "
- "to load buffer pool (stage 2)",
+ "Error parsing '%s', unable"
+ " to load buffer pool (stage 2)",
full_filename);
return;
}
if (space_id > ULINT32_MASK || page_no > ULINT32_MASK) {
ut_free(dump);
- ut_free(dump_tmp);
fclose(f);
buf_load_status(STATUS_ERR,
- "Error parsing '%s': bogus "
- "space,page " ULINTPF "," ULINTPF
- " at line " ULINTPF ", "
- "unable to load buffer pool",
+ "Error parsing '%s': bogus"
+ " space,page " ULINTPF "," ULINTPF
+ " at line " ULINTPF ","
+ " unable to load buffer pool",
full_filename,
space_id, page_no,
i);
@@ -635,44 +646,98 @@ buf_load()
if (dump_n == 0) {
ut_free(dump);
- ut_free(dump_tmp);
ut_sprintf_timestamp(now);
- buf_load_status(STATUS_NOTICE,
- "Buffer pool(s) load completed at %s "
- "(%s was empty)", now, full_filename);
+ buf_load_status(STATUS_INFO,
+ "Buffer pool(s) load completed at %s"
+ " (%s was empty)", now, full_filename);
return;
}
if (!SHUTTING_DOWN()) {
- buf_dump_sort(dump, dump_tmp, 0, dump_n);
+ std::sort(dump, dump + dump_n);
}
- ut_free(dump_tmp);
-
- ulint last_check_time = 0;
- ulint last_activity_cnt = 0;
+ ulint last_check_time = 0;
+ ulint last_activity_cnt = 0;
+
+ /* Avoid calling the expensive fil_space_acquire_silent() for each
+ page within the same tablespace. dump[] is sorted by (space, page),
+ so all pages from a given tablespace are consecutive. */
+ ulint cur_space_id = BUF_DUMP_SPACE(dump[0]);
+ fil_space_t* space = fil_space_acquire_silent(cur_space_id);
+ page_size_t page_size(space ? space->flags : 0);
+
+ /* JAN: TODO: MySQL 5.7 PSI
+#ifdef HAVE_PSI_STAGE_INTERFACE
+ PSI_stage_progress* pfs_stage_progress
+ = mysql_set_stage(srv_stage_buffer_pool_load.m_key);
+ #endif*/ /* HAVE_PSI_STAGE_INTERFACE */
+ /*
+ mysql_stage_set_work_estimated(pfs_stage_progress, dump_n);
+ mysql_stage_set_work_completed(pfs_stage_progress, 0);
+ */
for (i = 0; i < dump_n && !SHUTTING_DOWN(); i++) {
- buf_read_page_async(BUF_DUMP_SPACE(dump[i]),
- BUF_DUMP_PAGE(dump[i]));
+ /* space_id for this iteration of the loop */
+ const ulint this_space_id = BUF_DUMP_SPACE(dump[i]);
- if (i % 64 == 63) {
- os_aio_simulated_wake_handler_threads();
+ if (this_space_id >= SRV_LOG_SPACE_FIRST_ID) {
+ /* Ignore the innodb_temporary tablespace. */
+ continue;
+ }
+
+ if (this_space_id != cur_space_id) {
+ if (space != NULL) {
+ fil_space_release(space);
+ }
+
+ cur_space_id = this_space_id;
+ space = fil_space_acquire_silent(cur_space_id);
+
+ if (space != NULL) {
+ const page_size_t cur_page_size(
+ space->flags);
+ page_size.copy_from(cur_page_size);
+ }
}
- if (i % 128 == 0) {
- buf_load_status(STATUS_INFO,
- "Loaded " ULINTPF "/" ULINTPF " pages",
- i + 1, dump_n);
+ /* JAN: TODO: As we use background page read below,
+ if tablespace is encrypted we cant use it. */
+ if (space == NULL ||
+ (space && space->crypt_data &&
+ space->crypt_data->encryption != FIL_ENCRYPTION_OFF &&
+ space->crypt_data->type != CRYPT_SCHEME_UNENCRYPTED)) {
+ continue;
+ }
+
+ buf_read_page_background(
+ page_id_t(this_space_id, BUF_DUMP_PAGE(dump[i])),
+ page_size, true);
+
+ if (i % 64 == 63) {
+ os_aio_simulated_wake_handler_threads();
}
if (buf_load_abort_flag) {
+ if (space != NULL) {
+ fil_space_release(space);
+ }
buf_load_abort_flag = FALSE;
ut_free(dump);
buf_load_status(
- STATUS_NOTICE,
+ STATUS_INFO,
"Buffer pool(s) load aborted on request");
+ /* Premature end, set estimated = completed = i and
+ end the current stage event. */
+ /*
+ mysql_stage_set_work_estimated(pfs_stage_progress, i);
+ mysql_stage_set_work_completed(pfs_stage_progress,
+ i);
+ */
+#ifdef HAVE_PSI_STAGE_INTERFACE
+ /* mysql_end_stage(); */
+#endif /* HAVE_PSI_STAGE_INTERFACE */
return;
}
@@ -680,19 +745,29 @@ buf_load()
&last_check_time, &last_activity_cnt, i);
}
+ if (space != NULL) {
+ fil_space_release(space);
+ }
+
ut_free(dump);
ut_sprintf_timestamp(now);
- buf_load_status(STATUS_NOTICE,
+ buf_load_status(STATUS_INFO,
"Buffer pool(s) load completed at %s", now);
+
+ /* Make sure that estimated = completed when we end. */
+ /* mysql_stage_set_work_completed(pfs_stage_progress, dump_n); */
+ /* End the stage progress event. */
+#ifdef HAVE_PSI_STAGE_INTERFACE
+ /* mysql_end_stage(); */
+#endif /* HAVE_PSI_STAGE_INTERFACE */
}
/*****************************************************************//**
Aborts a currently running buffer pool load. This function is called by
MySQL code via buffer_pool_load_abort() and it should return immediately
because the whole MySQL is frozen during its execution. */
-UNIV_INTERN
void
buf_load_abort()
/*============*/
@@ -705,15 +780,16 @@ This is the main thread for buffer pool dump/load. It waits for an
event and when waked up either performs a dump or load and sleeps
again.
@return this function does not return, it calls os_thread_exit() */
-extern "C" UNIV_INTERN
+extern "C"
os_thread_ret_t
DECLARE_THREAD(buf_dump_thread)(void*)
{
my_thread_init();
ut_ad(!srv_read_only_mode);
-
- buf_dump_status(STATUS_INFO, "Dumping buffer pool(s) not yet started");
- buf_load_status(STATUS_INFO, "Loading buffer pool(s) not yet started");
+ /* JAN: TODO: MySQL 5.7 PSI
+#ifdef UNIV_PFS_THREAD
+ pfs_register_thread(buf_dump_thread_key);
+ #endif */ /* UNIV_PFS_THREAD */
if (srv_buffer_pool_load_at_startup) {
@@ -763,7 +839,7 @@ DECLARE_THREAD(buf_dump_thread)(void*)
my_thread_end();
/* We count the number of threads in os_thread_exit(). A created
thread should always use that to exit and not use return() to exit. */
- os_thread_exit(NULL);
+ os_thread_exit();
OS_THREAD_DUMMY_RETURN;
}
diff --git a/storage/innobase/buf/buf0flu.cc b/storage/innobase/buf/buf0flu.cc
index 1325a938dd6..3c3cc99de72 100644
--- a/storage/innobase/buf/buf0flu.cc
+++ b/storage/innobase/buf/buf0flu.cc
@@ -25,21 +25,18 @@ The database buffer buf_pool flush algorithm
Created 11/11/1995 Heikki Tuuri
*******************************************************/
-#include "buf0flu.h"
-
-#ifdef UNIV_NONINL
-#include "buf0flu.ic"
-#endif
+#include "univ.i"
+#include <mysql/service_thd_wait.h>
+#include <sql_class.h>
+#include "buf0flu.h"
#include "buf0buf.h"
#include "buf0mtflu.h"
#include "buf0checksum.h"
#include "srv0start.h"
#include "srv0srv.h"
#include "page0zip.h"
-#ifndef UNIV_HOTBACKUP
#include "ut0byte.h"
-#include "ut0lst.h"
#include "page0page.h"
#include "fil0fil.h"
#include "buf0lru.h"
@@ -47,12 +44,22 @@ Created 11/11/1995 Heikki Tuuri
#include "ibuf0ibuf.h"
#include "log0log.h"
#include "os0file.h"
-#include "os0sync.h"
#include "trx0sys.h"
#include "srv0mon.h"
-#include "mysql/plugin.h"
-#include "mysql/service_thd_wait.h"
+#include "ut0stage.h"
#include "fil0pagecompress.h"
+#ifdef UNIV_LINUX
+/* include defs for CPU time priority settings */
+#include <unistd.h>
+#include <sys/syscall.h>
+#include <sys/time.h>
+#include <sys/resource.h>
+static const int buf_flush_page_cleaner_priority = -20;
+#endif /* UNIV_LINUX */
+
+/** Sleep time in microseconds for loop waiting for the oldest
+modification lsn */
+static const ulint buf_flush_wait_flushed_sleep_time = 10000;
#include <my_service_manager.h>
@@ -64,14 +71,125 @@ is set to TRUE by the page_cleaner thread when it is spawned and is set
back to FALSE at shutdown by the page_cleaner as well. Therefore no
need to protect it by a mutex. It is only ever read by the thread
doing the shutdown */
-UNIV_INTERN bool buf_page_cleaner_is_active;
+bool buf_page_cleaner_is_active;
+
+/** Factor for scan length to determine n_pages for intended oldest LSN
+progress */
+static ulint buf_flush_lsn_scan_factor = 3;
+
+/** Average redo generation rate */
+static lsn_t lsn_avg_rate = 0;
+
+/** Target oldest LSN for the requested flush_sync */
+static lsn_t buf_flush_sync_lsn = 0;
#ifdef UNIV_PFS_THREAD
-UNIV_INTERN mysql_pfs_key_t buf_page_cleaner_thread_key;
+mysql_pfs_key_t page_cleaner_thread_key;
#endif /* UNIV_PFS_THREAD */
/** Event to synchronise with the flushing. */
- os_event_t buf_flush_event;
+os_event_t buf_flush_event;
+
+/** State for page cleaner array slot */
+enum page_cleaner_state_t {
+ /** Not requested any yet.
+ Moved from FINISHED by the coordinator. */
+ PAGE_CLEANER_STATE_NONE = 0,
+ /** Requested but not started flushing.
+ Moved from NONE by the coordinator. */
+ PAGE_CLEANER_STATE_REQUESTED,
+ /** Flushing is on going.
+ Moved from REQUESTED by the worker. */
+ PAGE_CLEANER_STATE_FLUSHING,
+ /** Flushing was finished.
+ Moved from FLUSHING by the worker. */
+ PAGE_CLEANER_STATE_FINISHED
+};
+
+/** Page cleaner request state for each buffer pool instance */
+struct page_cleaner_slot_t {
+ page_cleaner_state_t state; /*!< state of the request.
+ protected by page_cleaner_t::mutex
+ if the worker thread got the slot and
+ set to PAGE_CLEANER_STATE_FLUSHING,
+ n_flushed_lru and n_flushed_list can be
+ updated only by the worker thread */
+ /* This value is set during state==PAGE_CLEANER_STATE_NONE */
+ ulint n_pages_requested;
+ /*!< number of requested pages
+ for the slot */
+ /* These values are updated during state==PAGE_CLEANER_STATE_FLUSHING,
+ and commited with state==PAGE_CLEANER_STATE_FINISHED.
+ The consistency is protected by the 'state' */
+ ulint n_flushed_lru;
+ /*!< number of flushed pages
+ by LRU scan flushing */
+ ulint n_flushed_list;
+ /*!< number of flushed pages
+ by flush_list flushing */
+ bool succeeded_list;
+ /*!< true if flush_list flushing
+ succeeded. */
+ ulint flush_lru_time;
+ /*!< elapsed time for LRU flushing */
+ ulint flush_list_time;
+ /*!< elapsed time for flush_list
+ flushing */
+ ulint flush_lru_pass;
+ /*!< count to attempt LRU flushing */
+ ulint flush_list_pass;
+ /*!< count to attempt flush_list
+ flushing */
+};
+
+/** Page cleaner structure common for all threads */
+struct page_cleaner_t {
+ ib_mutex_t mutex; /*!< mutex to protect whole of
+ page_cleaner_t struct and
+ page_cleaner_slot_t slots. */
+ os_event_t is_requested; /*!< event to activate worker
+ threads. */
+ os_event_t is_finished; /*!< event to signal that all
+ slots were finished. */
+ volatile ulint n_workers; /*!< number of worker threads
+ in existence */
+ bool requested; /*!< true if requested pages
+ to flush */
+ lsn_t lsn_limit; /*!< upper limit of LSN to be
+ flushed */
+ ulint n_slots; /*!< total number of slots */
+ ulint n_slots_requested;
+ /*!< number of slots
+ in the state
+ PAGE_CLEANER_STATE_REQUESTED */
+ ulint n_slots_flushing;
+ /*!< number of slots
+ in the state
+ PAGE_CLEANER_STATE_FLUSHING */
+ ulint n_slots_finished;
+ /*!< number of slots
+ in the state
+ PAGE_CLEANER_STATE_FINISHED */
+ ulint flush_time; /*!< elapsed time to flush
+ requests for all slots */
+ ulint flush_pass; /*!< count to finish to flush
+ requests for all slots */
+ page_cleaner_slot_t slots[MAX_BUFFER_POOLS];
+ bool is_running; /*!< false if attempt
+ to shutdown */
+
+#ifdef UNIV_DEBUG
+ ulint n_disabled_debug;
+ /*<! how many of pc threads
+ have been disabled */
+#endif /* UNIV_DEBUG */
+};
+
+static page_cleaner_t page_cleaner;
+
+#ifdef UNIV_DEBUG
+my_bool innodb_page_cleaner_disabled_debug;
+#endif /* UNIV_DEBUG */
/** If LRU list of a buf_pool is less than this size then LRU eviction
should not happen. This is because when we do LRU flushing we also put
@@ -82,8 +200,7 @@ in thrashing. */
/* @} */
/******************************************************************//**
-Increases flush_list size in bytes with zip_size for compressed page,
-UNIV_PAGE_SIZE for uncompressed page in inline function */
+Increases flush_list size in bytes with the page size in inline function */
static inline
void
incr_flush_list_size_in_bytes(
@@ -92,15 +209,16 @@ incr_flush_list_size_in_bytes(
buf_pool_t* buf_pool) /*!< in: buffer pool instance */
{
ut_ad(buf_flush_list_mutex_own(buf_pool));
- ulint zip_size = page_zip_get_size(&block->page.zip);
- buf_pool->stat.flush_list_bytes += zip_size ? zip_size : UNIV_PAGE_SIZE;
+
+ buf_pool->stat.flush_list_bytes += block->page.size.physical();
+
ut_ad(buf_pool->stat.flush_list_bytes <= buf_pool->curr_pool_size);
}
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
/******************************************************************//**
Validates the flush list.
-@return TRUE if ok */
+@return TRUE if ok */
static
ibool
buf_flush_validate_low(
@@ -109,7 +227,7 @@ buf_flush_validate_low(
/******************************************************************//**
Validates the flush list some of the time.
-@return TRUE if ok or the check was skipped */
+@return TRUE if ok or the check was skipped */
static
ibool
buf_flush_validate_skip(
@@ -140,7 +258,7 @@ buf_flush_validate_skip(
Insert a block in the flush_rbt and returns a pointer to its
predecessor or NULL if no predecessor. The ordering is maintained
on the basis of the <oldest_modification, space, offset> key.
-@return pointer to the predecessor or NULL if no predecessor. */
+@return pointer to the predecessor or NULL if no predecessor. */
static
buf_page_t*
buf_flush_insert_in_flush_rbt(
@@ -204,7 +322,7 @@ buf_pool->flush_rbt.
Note that for the purpose of flush_rbt, we only need to order blocks
on the oldest_modification. The other two fields are used to uniquely
identify the blocks.
-@return < 0 if b2 < b1, 0 if b2 == b1, > 0 if b2 > b1 */
+@return < 0 if b2 < b1, 0 if b2 == b1, > 0 if b2 > b1 */
static
int
buf_flush_block_cmp(
@@ -215,13 +333,14 @@ buf_flush_block_cmp(
int ret;
const buf_page_t* b1 = *(const buf_page_t**) p1;
const buf_page_t* b2 = *(const buf_page_t**) p2;
-#ifdef UNIV_DEBUG
- buf_pool_t* buf_pool = buf_pool_from_bpage(b1);
-#endif /* UNIV_DEBUG */
ut_ad(b1 != NULL);
ut_ad(b2 != NULL);
+#ifdef UNIV_DEBUG
+ buf_pool_t* buf_pool = buf_pool_from_bpage(b1);
+#endif /* UNIV_DEBUG */
+
ut_ad(buf_flush_list_mutex_own(buf_pool));
ut_ad(b1->in_flush_list);
@@ -234,17 +353,16 @@ buf_flush_block_cmp(
}
/* If oldest_modification is same then decide on the space. */
- ret = (int)(b2->space - b1->space);
+ ret = (int)(b2->id.space() - b1->id.space());
- /* Or else decide ordering on the offset field. */
- return(ret ? ret : (int)(b2->offset - b1->offset));
+ /* Or else decide ordering on the page number. */
+ return(ret ? ret : (int) (b2->id.page_no() - b1->id.page_no()));
}
/********************************************************************//**
Initialize the red-black tree to speed up insertions into the flush_list
during recovery process. Should be called at the start of recovery
process before any page has been read/written. */
-UNIV_INTERN
void
buf_flush_init_flush_rbt(void)
/*==========================*/
@@ -258,6 +376,8 @@ buf_flush_init_flush_rbt(void)
buf_flush_list_mutex_enter(buf_pool);
+ ut_ad(buf_pool->flush_rbt == NULL);
+
/* Create red black tree for speedy insertions in flush list. */
buf_pool->flush_rbt = rbt_create(
sizeof(buf_page_t*), buf_flush_block_cmp);
@@ -268,7 +388,6 @@ buf_flush_init_flush_rbt(void)
/********************************************************************//**
Frees up the red-black tree. */
-UNIV_INTERN
void
buf_flush_free_flush_rbt(void)
/*==========================*/
@@ -295,7 +414,6 @@ buf_flush_free_flush_rbt(void)
/********************************************************************//**
Inserts a modified block into the flush list. */
-UNIV_INTERN
void
buf_flush_insert_into_flush_list(
/*=============================*/
@@ -305,7 +423,7 @@ buf_flush_insert_into_flush_list(
{
ut_ad(!buf_pool_mutex_own(buf_pool));
ut_ad(log_flush_order_mutex_own());
- ut_ad(mutex_own(&block->mutex));
+ ut_ad(buf_page_mutex_own(block));
buf_flush_list_mutex_enter(buf_pool);
@@ -315,7 +433,7 @@ buf_flush_insert_into_flush_list(
/* If we are in the recovery then we need to update the flush
red-black tree as well. */
- if (UNIV_LIKELY_NULL(buf_pool->flush_rbt)) {
+ if (buf_pool->flush_rbt != NULL) {
buf_flush_list_mutex_exit(buf_pool);
buf_flush_insert_sorted_into_flush_list(buf_pool, block, lsn);
return;
@@ -326,20 +444,23 @@ buf_flush_insert_into_flush_list(
ut_d(block->page.in_flush_list = TRUE);
block->page.oldest_modification = lsn;
- UT_LIST_ADD_FIRST(list, buf_pool->flush_list, &block->page);
+
+ UT_LIST_ADD_FIRST(buf_pool->flush_list, &block->page);
+
incr_flush_list_size_in_bytes(block, buf_pool);
#ifdef UNIV_DEBUG_VALGRIND
- {
- ulint zip_size = buf_block_get_zip_size(block);
+ void* p;
- if (zip_size) {
- UNIV_MEM_ASSERT_RW(block->page.zip.data, zip_size);
- } else {
- UNIV_MEM_ASSERT_RW(block->frame, UNIV_PAGE_SIZE);
- }
+ if (block->page.size.is_compressed()) {
+ p = block->page.zip.data;
+ } else {
+ p = block->frame;
}
+
+ UNIV_MEM_ASSERT_RW(p, block->page.size.physical());
#endif /* UNIV_DEBUG_VALGRIND */
+
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
ut_a(buf_flush_validate_skip(buf_pool));
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
@@ -351,7 +472,6 @@ buf_flush_insert_into_flush_list(
Inserts a modified block into the flush list in the right sorted position.
This function is used by recovery, because there the modifications do not
necessarily come in the order of lsn's. */
-UNIV_INTERN
void
buf_flush_insert_sorted_into_flush_list(
/*====================================*/
@@ -365,7 +485,7 @@ buf_flush_insert_sorted_into_flush_list(
ut_ad(srv_shutdown_state != SRV_SHUTDOWN_FLUSH_PHASE);
ut_ad(!buf_pool_mutex_own(buf_pool));
ut_ad(log_flush_order_mutex_own());
- ut_ad(mutex_own(&block->mutex));
+ ut_ad(buf_page_mutex_own(block));
ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
buf_flush_list_mutex_enter(buf_pool);
@@ -391,15 +511,15 @@ buf_flush_insert_sorted_into_flush_list(
block->page.oldest_modification = lsn;
#ifdef UNIV_DEBUG_VALGRIND
- {
- ulint zip_size = buf_block_get_zip_size(block);
+ void* p;
- if (zip_size) {
- UNIV_MEM_ASSERT_RW(block->page.zip.data, zip_size);
- } else {
- UNIV_MEM_ASSERT_RW(block->frame, UNIV_PAGE_SIZE);
- }
+ if (block->page.size.is_compressed()) {
+ p = block->page.zip.data;
+ } else {
+ p = block->frame;
}
+
+ UNIV_MEM_ASSERT_RW(p, block->page.size.physical());
#endif /* UNIV_DEBUG_VALGRIND */
prev_b = NULL;
@@ -408,9 +528,9 @@ buf_flush_insert_sorted_into_flush_list(
should not be NULL. In a very rare boundary case it is possible
that the flush_rbt has already been freed by the recovery thread
before the last page was hooked up in the flush_list by the
- io-handler thread. In that case we'll just do a simple
+ io-handler thread. In that case we'll just do a simple
linear search in the else block. */
- if (buf_pool->flush_rbt) {
+ if (buf_pool->flush_rbt != NULL) {
prev_b = buf_flush_insert_in_flush_rbt(&block->page);
@@ -418,8 +538,9 @@ buf_flush_insert_sorted_into_flush_list(
b = UT_LIST_GET_FIRST(buf_pool->flush_list);
- while (b && b->oldest_modification
+ while (b != NULL && b->oldest_modification
> block->page.oldest_modification) {
+
ut_ad(b->in_flush_list);
prev_b = b;
b = UT_LIST_GET_NEXT(list, b);
@@ -427,10 +548,9 @@ buf_flush_insert_sorted_into_flush_list(
}
if (prev_b == NULL) {
- UT_LIST_ADD_FIRST(list, buf_pool->flush_list, &block->page);
+ UT_LIST_ADD_FIRST(buf_pool->flush_list, &block->page);
} else {
- UT_LIST_INSERT_AFTER(list, buf_pool->flush_list,
- prev_b, &block->page);
+ UT_LIST_INSERT_AFTER(buf_pool->flush_list, prev_b, &block->page);
}
incr_flush_list_size_in_bytes(block, buf_pool);
@@ -445,8 +565,7 @@ buf_flush_insert_sorted_into_flush_list(
/********************************************************************//**
Returns TRUE if the file page block is immediately suitable for replacement,
i.e., the transition FILE_PAGE => NOT_USED allowed.
-@return TRUE if can replace immediately */
-UNIV_INTERN
+@return TRUE if can replace immediately */
ibool
buf_flush_ready_for_replace(
/*========================*/
@@ -467,21 +586,15 @@ buf_flush_ready_for_replace(
&& buf_page_get_io_fix(bpage) == BUF_IO_NONE);
}
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Error: buffer block state %lu"
- " in the LRU list!\n",
- (ulong) buf_page_get_state(bpage));
- ut_print_buf(stderr, bpage, sizeof(buf_page_t));
- putc('\n', stderr);
+ ib::fatal() << "Buffer block " << bpage << " state " << bpage->state
+ << " in the LRU list!";
return(FALSE);
}
/********************************************************************//**
Returns true if the block is modified and ready for flushing.
-@return true if can flush immediately */
-UNIV_INTERN
+@return true if can flush immediately */
bool
buf_flush_ready_for_flush(
/*======================*/
@@ -521,14 +634,12 @@ buf_flush_ready_for_flush(
/********************************************************************//**
Remove a block from the flush list of modified blocks. */
-UNIV_INTERN
void
buf_flush_remove(
/*=============*/
buf_page_t* bpage) /*!< in: pointer to the block in question */
{
buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
- ulint zip_size;
#if 0 // FIXME: Rate-limit the output. Move this to the page cleaner?
if (UNIV_UNLIKELY(srv_shutdown_state == SRV_SHUTDOWN_FLUSH_PHASE)) {
@@ -563,18 +674,18 @@ buf_flush_remove(
return;
case BUF_BLOCK_ZIP_DIRTY:
buf_page_set_state(bpage, BUF_BLOCK_ZIP_PAGE);
- UT_LIST_REMOVE(list, buf_pool->flush_list, bpage);
+ UT_LIST_REMOVE(buf_pool->flush_list, bpage);
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
buf_LRU_insert_zip_clean(bpage);
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
break;
case BUF_BLOCK_FILE_PAGE:
- UT_LIST_REMOVE(list, buf_pool->flush_list, bpage);
+ UT_LIST_REMOVE(buf_pool->flush_list, bpage);
break;
}
/* If the flush_rbt is active then delete from there as well. */
- if (UNIV_LIKELY_NULL(buf_pool->flush_rbt)) {
+ if (buf_pool->flush_rbt != NULL) {
buf_flush_delete_from_flush_rbt(bpage);
}
@@ -582,8 +693,7 @@ buf_flush_remove(
because we assert on in_flush_list in comparison function. */
ut_d(bpage->in_flush_list = FALSE);
- zip_size = page_zip_get_size(&bpage->zip);
- buf_pool->stat.flush_list_bytes -= zip_size ? zip_size : UNIV_PAGE_SIZE;
+ buf_pool->stat.flush_list_bytes -= bpage->size.physical();
bpage->oldest_modification = 0;
@@ -591,6 +701,14 @@ buf_flush_remove(
ut_a(buf_flush_validate_skip(buf_pool));
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
+ /* If there is an observer that want to know if the asynchronous
+ flushing was done then notify it. */
+ if (bpage->flush_observer != NULL) {
+ bpage->flush_observer->notify_remove(buf_pool, bpage);
+
+ bpage->flush_observer = NULL;
+ }
+
buf_flush_list_mutex_exit(buf_pool);
}
@@ -605,7 +723,6 @@ use the current list node (bpage) to do the list manipulation because
the list pointers could have changed between the time that we copied
the contents of bpage to the dpage and the flush list manipulation
below. */
-UNIV_INTERN
void
buf_flush_relocate_on_flush_list(
/*=============================*/
@@ -636,7 +753,7 @@ buf_flush_relocate_on_flush_list(
/* If recovery is active we must swap the control blocks in
the flush_rbt as well. */
- if (UNIV_LIKELY_NULL(buf_pool->flush_rbt)) {
+ if (buf_pool->flush_rbt != NULL) {
buf_flush_delete_from_flush_rbt(bpage);
prev_b = buf_flush_insert_in_flush_rbt(dpage);
}
@@ -650,24 +767,18 @@ buf_flush_relocate_on_flush_list(
ut_d(bpage->in_flush_list = FALSE);
prev = UT_LIST_GET_PREV(list, bpage);
- UT_LIST_REMOVE(list, buf_pool->flush_list, bpage);
+ UT_LIST_REMOVE(buf_pool->flush_list, bpage);
if (prev) {
ut_ad(prev->in_flush_list);
- UT_LIST_INSERT_AFTER(
- list,
- buf_pool->flush_list,
- prev, dpage);
+ UT_LIST_INSERT_AFTER( buf_pool->flush_list, prev, dpage);
} else {
- UT_LIST_ADD_FIRST(
- list,
- buf_pool->flush_list,
- dpage);
+ UT_LIST_ADD_FIRST(buf_pool->flush_list, dpage);
}
/* Just an extra check. Previous in flush_list
should be the same control block as in flush_rbt. */
- ut_a(!buf_pool->flush_rbt || prev_b == prev);
+ ut_a(buf_pool->flush_rbt == NULL || prev_b == prev);
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
ut_a(buf_flush_validate_low(buf_pool));
@@ -676,13 +787,10 @@ buf_flush_relocate_on_flush_list(
buf_flush_list_mutex_exit(buf_pool);
}
-/********************************************************************//**
-Updates the flush system data structures when a write is completed. */
-UNIV_INTERN
-void
-buf_flush_write_complete(
-/*=====================*/
- buf_page_t* bpage) /*!< in: pointer to the block in question */
+/** Update the flush system data structures when a write is completed.
+@param[in,out] bpage flushed page
+@param[in] dblwr whether the doublewrite buffer was used */
+void buf_flush_write_complete(buf_page_t* bpage, bool dblwr)
{
buf_flush_t flush_type;
buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
@@ -705,83 +813,94 @@ buf_flush_write_complete(
os_event_set(buf_pool->no_flush[flush_type]);
}
- buf_dblwr_update(bpage, flush_type);
+ if (dblwr) {
+ buf_dblwr_update(bpage, flush_type);
+ }
}
-#endif /* !UNIV_HOTBACKUP */
-/********************************************************************//**
-Calculate the checksum of a page from compressed table and update the page. */
-UNIV_INTERN
+/** Calculate the checksum of a page from compressed table and update
+the page.
+@param[in,out] page page to update
+@param[in] size compressed page size
+@param[in] lsn LSN to stamp on the page */
void
buf_flush_update_zip_checksum(
-/*==========================*/
- buf_frame_t* page, /*!< in/out: Page to update */
- ulint zip_size, /*!< in: Compressed page size */
- lsn_t lsn) /*!< in: Lsn to stamp on the page */
+ buf_frame_t* page,
+ ulint size,
+ lsn_t lsn)
{
- ut_a(zip_size > 0);
+ ut_a(size > 0);
- ib_uint32_t checksum = static_cast<ib_uint32_t>(
- page_zip_calc_checksum(
- page, zip_size,
- static_cast<srv_checksum_algorithm_t>(
- srv_checksum_algorithm)));
+ const uint32_t checksum = page_zip_calc_checksum(
+ page, size,
+ static_cast<srv_checksum_algorithm_t>(srv_checksum_algorithm));
mach_write_to_8(page + FIL_PAGE_LSN, lsn);
mach_write_to_4(page + FIL_PAGE_SPACE_OR_CHKSUM, checksum);
}
-/********************************************************************//**
-Initializes a page for writing to the tablespace. */
-UNIV_INTERN
+/** Initialize a page for writing to the tablespace.
+@param[in] block buffer block; NULL if bypassing the buffer pool
+@param[in,out] page page frame
+@param[in,out] page_zip_ compressed page, or NULL if uncompressed
+@param[in] newest_lsn newest modification LSN to the page */
void
buf_flush_init_for_writing(
-/*=======================*/
- byte* page, /*!< in/out: page */
- void* page_zip_, /*!< in/out: compressed page, or NULL */
- lsn_t newest_lsn) /*!< in: newest modification lsn
- to the page */
+ const buf_block_t* block,
+ byte* page,
+ void* page_zip_,
+ lsn_t newest_lsn)
{
- ib_uint32_t checksum = 0 /* silence bogus gcc warning */;
-
+ ut_ad(block == NULL || block->frame == page);
+ ut_ad(block == NULL || page_zip_ == NULL
+ || &block->page.zip == page_zip_);
+ ut_ad(!srv_safe_truncate || !block || newest_lsn);
ut_ad(page);
+#if 0 /* MDEV-15528 TODO: reinstate this check */
+ /* innodb_immediate_scrub_data_uncompressed=ON would cause
+ fsp_init_file_page() to be called on freed pages, and thus
+ cause them to be written as almost-all-zeroed.
+ In MDEV-15528 we should change that implement an option to
+ make freed pages appear all-zero, bypassing this code. */
+ ut_ad(!srv_safe_truncate || !newest_lsn || fil_page_get_type(page));
+#endif
if (page_zip_) {
page_zip_des_t* page_zip;
- ulint zip_size;
+ ulint size;
page_zip = static_cast<page_zip_des_t*>(page_zip_);
- zip_size = page_zip_get_size(page_zip);
+ size = page_zip_get_size(page_zip);
- ut_ad(zip_size);
- ut_ad(ut_is_2pow(zip_size));
- ut_ad(zip_size <= UNIV_ZIP_SIZE_MAX);
+ ut_ad(size);
+ ut_ad(ut_is_2pow(size));
+ ut_ad(size <= UNIV_ZIP_SIZE_MAX);
- switch (UNIV_EXPECT(fil_page_get_type(page), FIL_PAGE_INDEX)) {
+ switch (fil_page_get_type(page)) {
case FIL_PAGE_TYPE_ALLOCATED:
case FIL_PAGE_INODE:
case FIL_PAGE_IBUF_BITMAP:
case FIL_PAGE_TYPE_FSP_HDR:
case FIL_PAGE_TYPE_XDES:
/* These are essentially uncompressed pages. */
- memcpy(page_zip->data, page, zip_size);
+ memcpy(page_zip->data, page, size);
/* fall through */
case FIL_PAGE_TYPE_ZBLOB:
case FIL_PAGE_TYPE_ZBLOB2:
case FIL_PAGE_INDEX:
+ case FIL_PAGE_RTREE:
buf_flush_update_zip_checksum(
- page_zip->data, zip_size, newest_lsn);
+ page_zip->data, size, newest_lsn);
return;
}
- ut_print_timestamp(stderr);
- fputs(" InnoDB: ERROR: The compressed page to be written"
- " seems corrupt:", stderr);
- ut_print_buf(stderr, page, zip_size);
+ ib::error() << "The compressed page to be written"
+ " seems corrupt:";
+ ut_print_buf(stderr, page, size);
fputs("\nInnoDB: Possibly older version of the page:", stderr);
- ut_print_buf(stderr, page_zip->data, zip_size);
+ ut_print_buf(stderr, page_zip->data, size);
putc('\n', stderr);
ut_error;
}
@@ -792,47 +911,101 @@ buf_flush_init_for_writing(
mach_write_to_8(page + UNIV_PAGE_SIZE - FIL_PAGE_END_LSN_OLD_CHKSUM,
newest_lsn);
- /* Store the new formula checksum */
+ if (block && srv_page_size == 16384) {
+ /* The page type could be garbage in old files
+ created before MySQL 5.5. Such files always
+ had a page size of 16 kilobytes. */
+ ulint page_type = fil_page_get_type(page);
+ ulint reset_type = page_type;
+
+ switch (block->page.id.page_no() % 16384) {
+ case 0:
+ reset_type = block->page.id.page_no() == 0
+ ? FIL_PAGE_TYPE_FSP_HDR
+ : FIL_PAGE_TYPE_XDES;
+ break;
+ case 1:
+ reset_type = FIL_PAGE_IBUF_BITMAP;
+ break;
+ case FSP_TRX_SYS_PAGE_NO:
+ if (block->page.id.page_no()
+ == TRX_SYS_PAGE_NO
+ && block->page.id.space()
+ == TRX_SYS_SPACE) {
+ reset_type = FIL_PAGE_TYPE_TRX_SYS;
+ break;
+ }
+ /* fall through */
+ default:
+ switch (page_type) {
+ case FIL_PAGE_INDEX:
+ case FIL_PAGE_RTREE:
+ case FIL_PAGE_UNDO_LOG:
+ case FIL_PAGE_INODE:
+ case FIL_PAGE_IBUF_FREE_LIST:
+ case FIL_PAGE_TYPE_ALLOCATED:
+ case FIL_PAGE_TYPE_SYS:
+ case FIL_PAGE_TYPE_TRX_SYS:
+ case FIL_PAGE_TYPE_BLOB:
+ case FIL_PAGE_TYPE_ZBLOB:
+ case FIL_PAGE_TYPE_ZBLOB2:
+ break;
+ case FIL_PAGE_TYPE_FSP_HDR:
+ case FIL_PAGE_TYPE_XDES:
+ case FIL_PAGE_IBUF_BITMAP:
+ /* These pages should have
+ predetermined page numbers
+ (see above). */
+ default:
+ reset_type = FIL_PAGE_TYPE_UNKNOWN;
+ break;
+ }
+ }
+
+ if (UNIV_UNLIKELY(page_type != reset_type)) {
+ ib::info()
+ << "Resetting invalid page "
+ << block->page.id << " type "
+ << page_type << " to "
+ << reset_type << " when flushing.";
+ fil_page_set_type(page, reset_type);
+ }
+ }
+
+ uint32_t checksum = BUF_NO_CHECKSUM_MAGIC;
- switch ((srv_checksum_algorithm_t) srv_checksum_algorithm) {
+ switch (srv_checksum_algorithm_t(srv_checksum_algorithm)) {
+ case SRV_CHECKSUM_ALGORITHM_INNODB:
+ case SRV_CHECKSUM_ALGORITHM_STRICT_INNODB:
+ checksum = buf_calc_page_new_checksum(page);
+ mach_write_to_4(page + FIL_PAGE_SPACE_OR_CHKSUM,
+ checksum);
+ /* With the InnoDB checksum, we overwrite the first 4 bytes of
+ the end lsn field to store the old formula checksum. Since it
+ depends also on the field FIL_PAGE_SPACE_OR_CHKSUM, it has to
+ be calculated after storing the new formula checksum. */
+ checksum = buf_calc_page_old_checksum(page);
+ break;
case SRV_CHECKSUM_ALGORITHM_CRC32:
case SRV_CHECKSUM_ALGORITHM_STRICT_CRC32:
+ /* In other cases we write the same checksum to both fields. */
checksum = buf_calc_page_crc32(page);
- mach_write_to_4(page + FIL_PAGE_SPACE_OR_CHKSUM, checksum);
- break;
- case SRV_CHECKSUM_ALGORITHM_INNODB:
- case SRV_CHECKSUM_ALGORITHM_STRICT_INNODB:
- checksum = (ib_uint32_t) buf_calc_page_new_checksum(page);
- mach_write_to_4(page + FIL_PAGE_SPACE_OR_CHKSUM, checksum);
- checksum = (ib_uint32_t) buf_calc_page_old_checksum(page);
+ mach_write_to_4(page + FIL_PAGE_SPACE_OR_CHKSUM,
+ checksum);
break;
case SRV_CHECKSUM_ALGORITHM_NONE:
case SRV_CHECKSUM_ALGORITHM_STRICT_NONE:
- checksum = BUF_NO_CHECKSUM_MAGIC;
- mach_write_to_4(page + FIL_PAGE_SPACE_OR_CHKSUM, checksum);
+ mach_write_to_4(page + FIL_PAGE_SPACE_OR_CHKSUM,
+ checksum);
break;
- /* no default so the compiler will emit a warning if new enum
- is added and not handled here */
+ /* no default so the compiler will emit a warning if
+ new enum is added and not handled here */
}
- /* With the InnoDB checksum, we overwrite the first 4 bytes of
- the end lsn field to store the old formula checksum. Since it
- depends also on the field FIL_PAGE_SPACE_OR_CHKSUM, it has to
- be calculated after storing the new formula checksum.
-
- In other cases we write the same value to both fields.
- If CRC32 is used then it is faster to use that checksum
- (calculated above) instead of calculating another one.
- We can afford to store something other than
- buf_calc_page_old_checksum() or BUF_NO_CHECKSUM_MAGIC in
- this field because the file will not be readable by old
- versions of MySQL/InnoDB anyway (older than MySQL 5.6.3) */
-
mach_write_to_4(page + UNIV_PAGE_SIZE - FIL_PAGE_END_LSN_OLD_CHKSUM,
checksum);
}
-#ifndef UNIV_HOTBACKUP
/********************************************************************//**
Does an asynchronous write of a buffer page. NOTE: in simulated aio and
also when the doublewrite buffer is used, we must call
@@ -846,20 +1019,24 @@ buf_flush_write_block_low(
buf_flush_t flush_type, /*!< in: type of flush */
bool sync) /*!< in: true if sync IO request */
{
- fil_space_t* space = fil_space_acquire_for_io(bpage->space);
+ fil_space_t* space = fil_space_acquire_for_io(bpage->id.space());
if (!space) {
return;
}
- ulint zip_size = buf_page_get_zip_size(bpage);
- page_t* frame = NULL;
+ ut_ad(space->purpose == FIL_TYPE_TEMPORARY
+ || space->purpose == FIL_TYPE_IMPORT
+ || space->purpose == FIL_TYPE_TABLESPACE);
+ ut_ad((space->purpose == FIL_TYPE_TEMPORARY)
+ == fsp_is_system_temporary(space->id));
+ page_t* frame = NULL;
#ifdef UNIV_DEBUG
buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
ut_ad(!buf_pool_mutex_own(buf_pool));
-#endif
+#endif /* UNIV_DEBUG */
-#ifdef UNIV_LOG_DEBUG
- static ibool univ_log_debug_warned;
-#endif /* UNIV_LOG_DEBUG */
+ DBUG_PRINT("ib_buf", ("flush %s %u page %u:%u",
+ sync ? "sync" : "async", (unsigned) flush_type,
+ bpage->id.space(), bpage->id.page_no()));
ut_ad(buf_page_in_file(bpage));
@@ -870,27 +1047,16 @@ buf_flush_write_block_low(
LRU_list. */
ut_ad(!buf_pool_mutex_own(buf_pool));
ut_ad(!buf_flush_list_mutex_own(buf_pool));
- ut_ad(!mutex_own(buf_page_get_mutex(bpage)));
+ ut_ad(!buf_page_get_mutex(bpage)->is_owned());
ut_ad(buf_page_get_io_fix(bpage) == BUF_IO_WRITE);
ut_ad(bpage->oldest_modification != 0);
-
-#ifdef UNIV_IBUF_COUNT_DEBUG
- ut_a(ibuf_count_get(bpage->space, bpage->offset) == 0);
-#endif
ut_ad(bpage->newest_modification != 0);
-#ifdef UNIV_LOG_DEBUG
- if (!univ_log_debug_warned) {
- univ_log_debug_warned = TRUE;
- fputs("Warning: cannot force log to disk if"
- " UNIV_LOG_DEBUG is defined!\n"
- "Crash recovery will not work!\n",
- stderr);
- }
-#else
/* Force the log to the disk before writing the modified block */
- log_write_up_to(bpage->newest_modification, LOG_WAIT_ALL_GROUPS, TRUE);
-#endif
+ if (!srv_read_only_mode) {
+ log_write_up_to(bpage->newest_modification, true);
+ }
+
switch (buf_page_get_state(bpage)) {
case BUF_BLOCK_POOL_WATCH:
case BUF_BLOCK_ZIP_PAGE: /* The page should be dirty. */
@@ -902,10 +1068,11 @@ buf_flush_write_block_low(
break;
case BUF_BLOCK_ZIP_DIRTY:
frame = bpage->zip.data;
+
mach_write_to_8(frame + FIL_PAGE_LSN,
bpage->newest_modification);
- ut_a(page_zip_verify_checksum(frame, zip_size));
+ ut_a(page_zip_verify_checksum(frame, bpage->size.physical()));
break;
case BUF_BLOCK_FILE_PAGE:
frame = bpage->zip.data;
@@ -913,48 +1080,31 @@ buf_flush_write_block_low(
frame = ((buf_block_t*) bpage)->frame;
}
- buf_flush_init_for_writing(((buf_block_t*) bpage)->frame,
- bpage->zip.data
- ? &bpage->zip : NULL,
- bpage->newest_modification);
+ buf_flush_init_for_writing(
+ reinterpret_cast<const buf_block_t*>(bpage),
+ reinterpret_cast<const buf_block_t*>(bpage)->frame,
+ bpage->zip.data ? &bpage->zip : NULL,
+ bpage->newest_modification);
break;
}
frame = buf_page_encrypt_before_write(space, bpage, frame);
- if (!srv_use_doublewrite_buf || !buf_dblwr) {
- fil_io(OS_FILE_WRITE | OS_AIO_SIMULATED_WAKE_LATER,
- sync,
- buf_page_get_space(bpage),
- zip_size,
- buf_page_get_page_no(bpage),
- 0,
- zip_size ? zip_size : bpage->real_size,
- frame,
- bpage,
- &bpage->write_size);
+ ut_ad(space->purpose == FIL_TYPE_TABLESPACE
+ || space->atomic_write_supported);
+ if (!space->use_doublewrite()) {
+ ulint type = IORequest::WRITE | IORequest::DO_NOT_WAKE;
+
+ IORequest request(type, bpage);
+
+ /* TODO: pass the tablespace to fil_io() */
+ fil_io(request,
+ sync, bpage->id, bpage->size, 0, bpage->size.physical(),
+ frame, bpage);
} else {
+ ut_ad(!srv_read_only_mode);
- /* InnoDB uses doublewrite buffer and doublewrite buffer
- is initialized. User can define do we use atomic writes
- on a file space (table) or not. If atomic writes are
- not used we should use doublewrite buffer and if
- atomic writes should be used, no doublewrite buffer
- is used. */
-
- if (fsp_flags_get_atomic_writes(space->flags)
- == ATOMIC_WRITES_ON) {
- fil_io(OS_FILE_WRITE | OS_AIO_SIMULATED_WAKE_LATER,
- FALSE,
- buf_page_get_space(bpage),
- zip_size,
- buf_page_get_page_no(bpage),
- 0,
- zip_size ? zip_size : bpage->real_size,
- frame,
- bpage,
- &bpage->write_size);
- } else if (flush_type == BUF_FLUSH_SINGLE_PAGE) {
+ if (flush_type == BUF_FLUSH_SINGLE_PAGE) {
buf_dblwr_write_single_page(bpage, sync);
} else {
ut_ad(!sync);
@@ -967,7 +1117,9 @@ buf_flush_write_block_low(
are working on. */
if (sync) {
ut_ad(flush_type == BUF_FLUSH_SINGLE_PAGE);
- fil_flush(space);
+ if (space->purpose != FIL_TYPE_TEMPORARY) {
+ fil_flush(space);
+ }
/* The tablespace could already have been dropped,
because fil_io(request, sync) would already have
@@ -975,13 +1127,12 @@ buf_flush_write_block_low(
buf_page_io_complete() only needs to look up the
tablespace during read requests, not during writes. */
ut_ad(buf_page_get_io_fix(bpage) == BUF_IO_WRITE);
-
- /* true means we want to evict this page from the
- LRU list as well. */
#ifdef UNIV_DEBUG
dberr_t err =
#endif
- buf_page_io_complete(bpage, true);
+ /* true means we want to evict this page from the
+ LRU list as well. */
+ buf_page_io_complete(bpage, space->use_doublewrite(), true);
ut_ad(err == DB_SUCCESS);
}
@@ -1001,8 +1152,7 @@ writes! NOTE: buf_pool->mutex and buf_page_get_mutex(bpage) must be
held upon entering this function, and they will be released by this
function if it returns true.
@return TRUE if the page was flushed */
-UNIV_INTERN
-bool
+ibool
buf_flush_page(
/*===========*/
buf_pool_t* buf_pool, /*!< in: buffer pool instance */
@@ -1010,47 +1160,50 @@ buf_flush_page(
buf_flush_t flush_type, /*!< in: type of flush */
bool sync) /*!< in: true if sync IO request */
{
+ BPageMutex* block_mutex;
+
ut_ad(flush_type < BUF_FLUSH_N_TYPES);
ut_ad(buf_pool_mutex_own(buf_pool));
ut_ad(buf_page_in_file(bpage));
ut_ad(!sync || flush_type == BUF_FLUSH_SINGLE_PAGE);
- ib_mutex_t* block_mutex = buf_page_get_mutex(bpage);
-
+ block_mutex = buf_page_get_mutex(bpage);
ut_ad(mutex_own(block_mutex));
ut_ad(buf_flush_ready_for_flush(bpage, flush_type));
- bool is_uncompressed;
+ bool is_uncompressed;
- is_uncompressed = (buf_page_get_state(bpage) == BUF_BLOCK_FILE_PAGE);
- ut_ad(is_uncompressed == (block_mutex != &buf_pool->zip_mutex));
+ is_uncompressed = (buf_page_get_state(bpage) == BUF_BLOCK_FILE_PAGE);
+ ut_ad(is_uncompressed == (block_mutex != &buf_pool->zip_mutex));
- ibool flush;
- rw_lock_t* rw_lock;
- bool no_fix_count = bpage->buf_fix_count == 0;
+ ibool flush;
+ rw_lock_t* rw_lock;
+ bool no_fix_count = bpage->buf_fix_count == 0;
- if (!is_uncompressed) {
- flush = TRUE;
+ if (!is_uncompressed) {
+ flush = TRUE;
rw_lock = NULL;
-
- } else if (!(no_fix_count || flush_type == BUF_FLUSH_LIST)) {
- /* This is a heuristic, to avoid expensive S attempts. */
+ } else if (!(no_fix_count || flush_type == BUF_FLUSH_LIST)
+ || (!no_fix_count
+ && srv_shutdown_state <= SRV_SHUTDOWN_CLEANUP
+ && fsp_is_system_temporary(bpage->id.space()))) {
+ /* This is a heuristic, to avoid expensive SX attempts. */
+ /* For table residing in temporary tablespace sync is done
+ using IO_FIX and so before scheduling for flush ensure that
+ page is not fixed. */
flush = FALSE;
} else {
-
rw_lock = &reinterpret_cast<buf_block_t*>(bpage)->lock;
-
if (flush_type != BUF_FLUSH_LIST) {
- flush = rw_lock_s_lock_gen_nowait(
- rw_lock, BUF_IO_WRITE);
+ flush = rw_lock_sx_lock_nowait(rw_lock, BUF_IO_WRITE);
} else {
- /* Will S lock later */
+ /* Will SX lock later */
flush = TRUE;
}
}
- if (flush) {
+ if (flush) {
/* We are committed to flushing by the time we get here */
@@ -1059,7 +1212,6 @@ buf_flush_page(
buf_page_set_flush_type(bpage, flush_type);
if (buf_pool->n_flush[flush_type] == 0) {
-
os_event_reset(buf_pool->no_flush[flush_type]);
}
@@ -1067,26 +1219,45 @@ buf_flush_page(
ut_ad(buf_pool->n_flush[flush_type] != 0);
mutex_exit(block_mutex);
+
buf_pool_mutex_exit(buf_pool);
if (flush_type == BUF_FLUSH_LIST
&& is_uncompressed
- && !rw_lock_s_lock_gen_nowait(rw_lock, BUF_IO_WRITE)) {
- /* avoiding deadlock possibility involves doublewrite
- buffer, should flush it, because it might hold the
- another block->lock. */
- buf_dblwr_flush_buffered_writes();
+ && !rw_lock_sx_lock_nowait(rw_lock, BUF_IO_WRITE)) {
- rw_lock_s_lock_gen(rw_lock, BUF_IO_WRITE);
- }
+ if (!fsp_is_system_temporary(bpage->id.space())) {
+ /* avoiding deadlock possibility involves
+ doublewrite buffer, should flush it, because
+ it might hold the another block->lock. */
+ buf_dblwr_flush_buffered_writes();
+ } else {
+ buf_dblwr_sync_datafiles();
+ }
+
+ rw_lock_sx_lock_gen(rw_lock, BUF_IO_WRITE);
+ }
+
+ /* If there is an observer that want to know if the asynchronous
+ flushing was sent then notify it.
+ Note: we set flush observer to a page with x-latch, so we can
+ guarantee that notify_flush and notify_remove are called in pair
+ with s-latch on a uncompressed page. */
+ if (bpage->flush_observer != NULL) {
+ buf_pool_mutex_enter(buf_pool);
+
+ bpage->flush_observer->notify_flush(buf_pool, bpage);
+
+ buf_pool_mutex_exit(buf_pool);
+ }
/* Even though bpage is not protected by any mutex at this
point, it is safe to access bpage, because it is io_fixed and
oldest_modification != 0. Thus, it cannot be relocated in the
buffer pool or removed from flush_list or LRU_list. */
- buf_flush_write_block_low(bpage, flush_type, sync);
- }
+ buf_flush_write_block_low(bpage, flush_type, sync);
+ }
return(flush);
}
@@ -1098,7 +1269,6 @@ NOTE: buf_pool->mutex and block->mutex must be held upon entering this
function, and they will be released by this function after flushing.
This is loosely based on buf_flush_batch() and buf_flush_page().
@return TRUE if the page was flushed and the mutexes released */
-UNIV_INTERN
ibool
buf_flush_page_try(
/*===============*/
@@ -1107,7 +1277,7 @@ buf_flush_page_try(
{
ut_ad(buf_pool_mutex_own(buf_pool));
ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
- ut_ad(mutex_own(&block->mutex));
+ ut_ad(buf_page_mutex_own(block));
if (!buf_flush_ready_for_flush(&block->page, BUF_FLUSH_SINGLE_PAGE)) {
return(FALSE);
@@ -1116,23 +1286,23 @@ buf_flush_page_try(
/* The following call will release the buffer pool and
block mutex. */
return(buf_flush_page(
- buf_pool, &block->page, BUF_FLUSH_SINGLE_PAGE, true));
+ buf_pool, &block->page,
+ BUF_FLUSH_SINGLE_PAGE, true));
}
# endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */
-/***********************************************************//**
-Check the page is in buffer pool and can be flushed.
-@return true if the page can be flushed. */
+
+/** Check the page is in buffer pool and can be flushed.
+@param[in] page_id page id
+@param[in] flush_type BUF_FLUSH_LRU or BUF_FLUSH_LIST
+@return true if the page can be flushed. */
static
bool
buf_flush_check_neighbor(
-/*=====================*/
- ulint space, /*!< in: space id */
- ulint offset, /*!< in: page offset */
- buf_flush_t flush_type) /*!< in: BUF_FLUSH_LRU or
- BUF_FLUSH_LIST */
+ const page_id_t page_id,
+ buf_flush_t flush_type)
{
buf_page_t* bpage;
- buf_pool_t* buf_pool = buf_pool_get(space, offset);
+ buf_pool_t* buf_pool = buf_pool_get(page_id);
bool ret;
ut_ad(flush_type == BUF_FLUSH_LRU
@@ -1141,7 +1311,7 @@ buf_flush_check_neighbor(
buf_pool_mutex_enter(buf_pool);
/* We only want to flush pages from this buffer pool. */
- bpage = buf_page_hash_get(buf_pool, space, offset);
+ bpage = buf_page_hash_get(buf_pool, page_id);
if (!bpage) {
@@ -1156,7 +1326,7 @@ buf_flush_check_neighbor(
ret = false;
if (flush_type != BUF_FLUSH_LRU || buf_page_is_old(bpage)) {
- ib_mutex_t* block_mutex = buf_page_get_mutex(bpage);
+ BPageMutex* block_mutex = buf_page_get_mutex(bpage);
mutex_enter(block_mutex);
if (buf_flush_ready_for_flush(bpage, flush_type)) {
@@ -1169,26 +1339,25 @@ buf_flush_check_neighbor(
return(ret);
}
-/***********************************************************//**
-Flushes to disk all flushable pages within the flush area.
-@return number of pages flushed */
+/** Flushes to disk all flushable pages within the flush area.
+@param[in] page_id page id
+@param[in] flush_type BUF_FLUSH_LRU or BUF_FLUSH_LIST
+@param[in] n_flushed number of pages flushed so far in this batch
+@param[in] n_to_flush maximum number of pages we are allowed to flush
+@return number of pages flushed */
static
ulint
buf_flush_try_neighbors(
-/*====================*/
- ulint space, /*!< in: space id */
- ulint offset, /*!< in: page offset */
- buf_flush_t flush_type, /*!< in: BUF_FLUSH_LRU or
- BUF_FLUSH_LIST */
- ulint n_flushed, /*!< in: number of pages
- flushed so far in this batch */
- ulint n_to_flush) /*!< in: maximum number of pages
- we are allowed to flush */
+ const page_id_t page_id,
+ buf_flush_t flush_type,
+ ulint n_flushed,
+ ulint n_to_flush)
{
ulint i;
ulint low;
ulint high;
- buf_pool_t* buf_pool = buf_pool_get(space, offset);
+ ulint count = 0;
+ buf_pool_t* buf_pool = buf_pool_get(page_id);
ut_ad(flush_type == BUF_FLUSH_LRU || flush_type == BUF_FLUSH_LIST);
@@ -1196,8 +1365,8 @@ buf_flush_try_neighbors(
|| srv_flush_neighbors == 0) {
/* If there is little space or neighbor flushing is
not enabled then just flush the victim. */
- low = offset;
- high = offset + 1;
+ low = page_id.page_no();
+ high = page_id.page_no() + 1;
} else {
/* When flushed, dirty blocks are searched in
neighborhoods of this size, and flushed along with the
@@ -1209,27 +1378,38 @@ buf_flush_try_neighbors(
BUF_READ_AHEAD_AREA(buf_pool),
buf_pool->curr_size / 16);
- low = (offset / buf_flush_area) * buf_flush_area;
- high = (offset / buf_flush_area + 1) * buf_flush_area;
+ low = (page_id.page_no() / buf_flush_area) * buf_flush_area;
+ high = (page_id.page_no() / buf_flush_area + 1) * buf_flush_area;
if (srv_flush_neighbors == 1) {
/* adjust 'low' and 'high' to limit
for contiguous dirty area */
- if (offset > low) {
- for (i = offset - 1;
- i >= low
- && buf_flush_check_neighbor(
- space, i, flush_type);
- i--) {
- /* do nothing */
+ if (page_id.page_no() > low) {
+ for (i = page_id.page_no() - 1; i >= low; i--) {
+ if (!buf_flush_check_neighbor(
+ page_id_t(page_id.space(), i),
+ flush_type)) {
+
+ break;
+ }
+
+ if (i == low) {
+ /* Avoid overwrap when low == 0
+ and calling
+ buf_flush_check_neighbor() with
+ i == (ulint) -1 */
+ i--;
+ break;
+ }
}
low = i + 1;
}
- for (i = offset + 1;
+ for (i = page_id.page_no() + 1;
i < high
&& buf_flush_check_neighbor(
- space, i, flush_type);
+ page_id_t(page_id.space(), i),
+ flush_type);
i++) {
/* do nothing */
}
@@ -1237,17 +1417,17 @@ buf_flush_try_neighbors(
}
}
-#ifdef UNIV_DEBUG
- /* fprintf(stderr, "Flush area: low %lu high %lu\n", low, high); */
-#endif
-
- if (high > fil_space_get_size(space)) {
- high = fil_space_get_size(space);
+ const ulint space_size = fil_space_get_size(page_id.space());
+ if (high > space_size) {
+ high = space_size;
}
- ulint count = 0;
+ DBUG_PRINT("ib_buf", ("flush %u:%u..%u",
+ page_id.space(),
+ (unsigned) low, (unsigned) high));
- for (i = low; i < high; i++) {
+ for (ulint i = low; i < high; i++) {
+ buf_page_t* bpage;
if ((count + n_flushed) >= n_to_flush) {
@@ -1257,19 +1437,21 @@ buf_flush_try_neighbors(
are flushing has not been flushed yet then
we'll try to flush the victim that we
selected originally. */
- if (i <= offset) {
- i = offset;
+ if (i <= page_id.page_no()) {
+ i = page_id.page_no();
} else {
break;
}
}
- buf_pool = buf_pool_get(space, i);
+ const page_id_t cur_page_id(page_id.space(), i);
+
+ buf_pool = buf_pool_get(cur_page_id);
buf_pool_mutex_enter(buf_pool);
/* We only want to flush pages from this buffer pool. */
- buf_page_t* bpage = buf_page_hash_get(buf_pool, space, i);
+ bpage = buf_page_hash_get(buf_pool, cur_page_id);
if (bpage == NULL) {
@@ -1283,70 +1465,76 @@ buf_flush_try_neighbors(
because the flushed blocks are soon freed */
if (flush_type != BUF_FLUSH_LRU
- || i == offset
+ || i == page_id.page_no()
|| buf_page_is_old(bpage)) {
- ib_mutex_t* block_mutex = buf_page_get_mutex(bpage);
+ BPageMutex* block_mutex = buf_page_get_mutex(bpage);
mutex_enter(block_mutex);
if (buf_flush_ready_for_flush(bpage, flush_type)
- && (i == offset || bpage->buf_fix_count == 0)
- && buf_flush_page(
+ && (i == page_id.page_no()
+ || bpage->buf_fix_count == 0)) {
+
+ /* We also try to flush those
+ neighbors != offset */
+
+ if (buf_flush_page(
buf_pool, bpage, flush_type, false)) {
- ++count;
+ ++count;
+ } else {
+ mutex_exit(block_mutex);
+ buf_pool_mutex_exit(buf_pool);
+ }
continue;
+ } else {
+ mutex_exit(block_mutex);
}
-
- mutex_exit(block_mutex);
}
-
buf_pool_mutex_exit(buf_pool);
}
- if (count > 0) {
+ if (count > 1) {
MONITOR_INC_VALUE_CUMULATIVE(
- MONITOR_FLUSH_NEIGHBOR_TOTAL_PAGE,
- MONITOR_FLUSH_NEIGHBOR_COUNT,
- MONITOR_FLUSH_NEIGHBOR_PAGES,
- (count - 1));
+ MONITOR_FLUSH_NEIGHBOR_TOTAL_PAGE,
+ MONITOR_FLUSH_NEIGHBOR_COUNT,
+ MONITOR_FLUSH_NEIGHBOR_PAGES,
+ (count - 1));
}
return(count);
}
-/********************************************************************//**
-Check if the block is modified and ready for flushing. If the the block
-is ready to flush then flush the page and try o flush its neighbors.
-
-@return TRUE if buf_pool mutex was released during this function.
+/** Check if the block is modified and ready for flushing.
+If the the block is ready to flush then flush the page and try o flush
+its neighbors.
+@param[in] bpage buffer control block,
+must be buf_page_in_file(bpage)
+@param[in] flush_type BUF_FLUSH_LRU or BUF_FLUSH_LIST
+@param[in] n_to_flush number of pages to flush
+@param[in,out] count number of pages flushed
+@return TRUE if buf_pool mutex was released during this function.
This does not guarantee that some pages were written as well.
Number of pages written are incremented to the count. */
static
-ibool
+bool
buf_flush_page_and_try_neighbors(
-/*=============================*/
- buf_page_t* bpage, /*!< in: buffer control block,
- must be
- buf_page_in_file(bpage) */
- buf_flush_t flush_type, /*!< in: BUF_FLUSH_LRU
- or BUF_FLUSH_LIST */
- ulint n_to_flush, /*!< in: number of pages to
- flush */
- ulint* count) /*!< in/out: number of pages
- flushed */
+ buf_page_t* bpage,
+ buf_flush_t flush_type,
+ ulint n_to_flush,
+ ulint* count)
{
- ibool flushed;
- ib_mutex_t* block_mutex;
#ifdef UNIV_DEBUG
buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
-#endif /* UNIV_DEBUG */
ut_ad(buf_pool_mutex_own(buf_pool));
+#endif /* UNIV_DEBUG */
+
+ bool flushed;
+ BPageMutex* block_mutex = buf_page_get_mutex(bpage);
- block_mutex = buf_page_get_mutex(bpage);
mutex_enter(block_mutex);
ut_a(buf_page_in_file(bpage));
@@ -1356,26 +1544,22 @@ buf_flush_page_and_try_neighbors(
buf_pool = buf_pool_from_bpage(bpage);
- buf_pool_mutex_exit(buf_pool);
-
- /* These fields are protected by both the
- buffer pool mutex and block mutex. */
- ulint space = buf_page_get_space(bpage);
- ulint offset = buf_page_get_page_no(bpage);
+ const page_id_t page_id = bpage->id;
mutex_exit(block_mutex);
+ buf_pool_mutex_exit(buf_pool);
+
/* Try to flush also all the neighbors */
*count += buf_flush_try_neighbors(
- space, offset, flush_type, *count, n_to_flush);
+ page_id, flush_type, *count, n_to_flush);
buf_pool_mutex_enter(buf_pool);
-
flushed = TRUE;
-
} else {
mutex_exit(block_mutex);
- flushed = FALSE;
+
+ flushed = false;
}
ut_ad(buf_pool_mutex_own(buf_pool));
@@ -1400,7 +1584,6 @@ buf_free_from_unzip_LRU_list_batch(
ulint max) /*!< in: desired number of
blocks in the free_list */
{
- buf_block_t* block;
ulint scanned = 0;
ulint count = 0;
ulint free_len = UT_LIST_GET_LEN(buf_pool->free);
@@ -1408,8 +1591,10 @@ buf_free_from_unzip_LRU_list_batch(
ut_ad(buf_pool_mutex_own(buf_pool));
- block = UT_LIST_GET_LAST(buf_pool->unzip_LRU);
- while (block != NULL && count < max
+ buf_block_t* block = UT_LIST_GET_LAST(buf_pool->unzip_LRU);
+
+ while (block != NULL
+ && count < max
&& free_len < srv_LRU_scan_depth
&& lru_len > UT_LIST_GET_LEN(buf_pool->LRU) / 10) {
@@ -1448,7 +1633,7 @@ The calling thread is not allowed to own any latches on pages!
It attempts to make 'max' blocks available in the free list. Note that
it is a best effort attempt and it is not guaranteed that after a call
to this function there will be 'max' blocks in the free list.*/
-__attribute__((nonnull))
+
void
buf_flush_LRU_list_batch(
/*=====================*/
@@ -1462,16 +1647,21 @@ buf_flush_LRU_list_batch(
ulint scanned = 0;
ulint free_len = UT_LIST_GET_LEN(buf_pool->free);
ulint lru_len = UT_LIST_GET_LEN(buf_pool->LRU);
+ ulint withdraw_depth = 0;
n->flushed = 0;
n->evicted = 0;
n->unzip_LRU_evicted = 0;
-
ut_ad(buf_pool_mutex_own(buf_pool));
+ if (buf_pool->curr_size < buf_pool->old_size
+ && buf_pool->withdraw_target > 0) {
+ withdraw_depth = buf_pool->withdraw_target
+ - UT_LIST_GET_LEN(buf_pool->withdraw);
+ }
for (bpage = UT_LIST_GET_LAST(buf_pool->LRU);
- bpage != NULL && (n->evicted + n->flushed) < max
- && free_len < srv_LRU_scan_depth
+ bpage != NULL && n->flushed + n->evicted < max
+ && free_len < srv_LRU_scan_depth + withdraw_depth
&& lru_len > BUF_LRU_MIN_LEN;
++scanned,
bpage = buf_pool->lru_hp.get()) {
@@ -1479,23 +1669,29 @@ buf_flush_LRU_list_batch(
buf_page_t* prev = UT_LIST_GET_PREV(LRU, bpage);
buf_pool->lru_hp.set(prev);
- ib_mutex_t* block_mutex = buf_page_get_mutex(bpage);
+ BPageMutex* block_mutex = buf_page_get_mutex(bpage);
+
mutex_enter(block_mutex);
- bool evict = buf_flush_ready_for_replace(bpage);
- mutex_exit(block_mutex);
- if (evict) {
+ if (buf_flush_ready_for_replace(bpage)) {
/* block is ready for eviction i.e., it is
clean and is not IO-fixed or buffer fixed. */
+ mutex_exit(block_mutex);
if (buf_LRU_free_page(bpage, true)) {
- n->evicted++;
+ ++n->evicted;
}
- } else {
+ } else if (buf_flush_ready_for_flush(bpage, BUF_FLUSH_LRU)) {
/* Block is ready for flush. Dispatch an IO
request. The IO helper thread will put it on
free list in IO completion routine. */
+ mutex_exit(block_mutex);
buf_flush_page_and_try_neighbors(
bpage, BUF_FLUSH_LRU, max, &n->flushed);
+ } else {
+ /* Can't evict or dispatch this block. Go to
+ previous. */
+ ut_ad(buf_pool->lru_hp.is_hp(prev));
+ mutex_exit(block_mutex);
}
ut_ad(!mutex_own(block_mutex));
@@ -1514,6 +1710,14 @@ buf_flush_LRU_list_batch(
ut_ad(buf_pool_mutex_own(buf_pool));
+ if (n->evicted) {
+ MONITOR_INC_VALUE_CUMULATIVE(
+ MONITOR_LRU_BATCH_EVICT_TOTAL_PAGE,
+ MONITOR_LRU_BATCH_EVICT_COUNT,
+ MONITOR_LRU_BATCH_EVICT_PAGES,
+ n->evicted);
+ }
+
if (scanned) {
MONITOR_INC_VALUE_CUMULATIVE(
MONITOR_LRU_BATCH_SCANNED,
@@ -1526,7 +1730,7 @@ buf_flush_LRU_list_batch(
/*******************************************************************//**
Flush and move pages from LRU or unzip_LRU list to the free list.
Whether LRU or unzip_LRU is used depends on the state of the system.*/
-__attribute__((nonnull))
+
static
void
buf_do_LRU_batch(
@@ -1555,26 +1759,22 @@ buf_do_LRU_batch(
n->evicted += n->unzip_LRU_evicted;
}
-/*******************************************************************//**
-This utility flushes dirty blocks from the end of the flush_list.
-the calling thread is not allowed to own any latches on pages!
+/** This utility flushes dirty blocks from the end of the flush_list.
+The calling thread is not allowed to own any latches on pages!
+@param[in] buf_pool buffer pool instance
+@param[in] min_n wished minimum mumber of blocks flushed (it is
+not guaranteed that the actual number is that big, though)
+@param[in] lsn_limit all blocks whose oldest_modification is smaller
+than this should be flushed (if their number does not exceed min_n)
@return number of blocks for which the write request was queued;
ULINT_UNDEFINED if there was a flush of the same type already
running */
static
ulint
buf_do_flush_list_batch(
-/*====================*/
- buf_pool_t* buf_pool, /*!< in: buffer pool instance */
- ulint min_n, /*!< in: wished minimum mumber
- of blocks flushed (it is not
- guaranteed that the actual
- number is that big, though) */
- lsn_t lsn_limit) /*!< all blocks whose
- oldest_modification is smaller
- than this should be flushed (if
- their number does not exceed
- min_n) */
+ buf_pool_t* buf_pool,
+ ulint min_n,
+ lsn_t lsn_limit)
{
ulint count = 0;
ulint scanned = 0;
@@ -1622,47 +1822,53 @@ buf_do_flush_list_batch(
buf_pool->flush_hp.set(NULL);
buf_flush_list_mutex_exit(buf_pool);
- MONITOR_INC_VALUE_CUMULATIVE(MONITOR_FLUSH_BATCH_SCANNED,
- MONITOR_FLUSH_BATCH_SCANNED_NUM_CALL,
- MONITOR_FLUSH_BATCH_SCANNED_PER_CALL,
- scanned);
+ if (scanned) {
+ MONITOR_INC_VALUE_CUMULATIVE(
+ MONITOR_FLUSH_BATCH_SCANNED,
+ MONITOR_FLUSH_BATCH_SCANNED_NUM_CALL,
+ MONITOR_FLUSH_BATCH_SCANNED_PER_CALL,
+ scanned);
+ }
+
+ if (count) {
+ MONITOR_INC_VALUE_CUMULATIVE(
+ MONITOR_FLUSH_BATCH_TOTAL_PAGE,
+ MONITOR_FLUSH_BATCH_COUNT,
+ MONITOR_FLUSH_BATCH_PAGES,
+ count);
+ }
ut_ad(buf_pool_mutex_own(buf_pool));
return(count);
}
-/*******************************************************************//**
-This utility flushes dirty blocks from the end of the LRU list or flush_list.
+/** This utility flushes dirty blocks from the end of the LRU list or
+flush_list.
NOTE 1: in the case of an LRU flush the calling thread may own latches to
pages: to avoid deadlocks, this function must be written so that it cannot
end up waiting for these latches! NOTE 2: in the case of a flush list flush,
-the calling thread is not allowed to own any latches on pages! */
-__attribute__((nonnull))
+the calling thread is not allowed to own any latches on pages!
+@param[in] buf_pool buffer pool instance
+@param[in] flush_type BUF_FLUSH_LRU or BUF_FLUSH_LIST; if
+BUF_FLUSH_LIST, then the caller must not own any latches on pages
+@param[in] min_n wished minimum mumber of blocks flushed (it is
+not guaranteed that the actual number is that big, though)
+@param[in] lsn_limit in the case of BUF_FLUSH_LIST all blocks whose
+oldest_modification is smaller than this should be flushed (if their number
+does not exceed min_n), otherwise ignored */
void
buf_flush_batch(
-/*============*/
- buf_pool_t* buf_pool, /*!< in: buffer pool instance */
- buf_flush_t flush_type, /*!< in: BUF_FLUSH_LRU or
- BUF_FLUSH_LIST; if BUF_FLUSH_LIST,
- then the caller must not own any
- latches on pages */
- ulint min_n, /*!< in: wished minimum mumber of blocks
- flushed (it is not guaranteed that the
- actual number is that big, though) */
- lsn_t lsn_limit, /*!< in: in the case of BUF_FLUSH_LIST
- all blocks whose oldest_modification is
- smaller than this should be flushed
- (if their number does not exceed
- min_n), otherwise ignored */
+ buf_pool_t* buf_pool,
+ buf_flush_t flush_type,
+ ulint min_n,
+ lsn_t lsn_limit,
flush_counters_t* n) /*!< out: flushed/evicted page
counts */
{
ut_ad(flush_type == BUF_FLUSH_LRU || flush_type == BUF_FLUSH_LIST);
-#ifdef UNIV_SYNC_DEBUG
- ut_ad((flush_type != BUF_FLUSH_LIST)
- || sync_thread_levels_empty_except_dict());
-#endif /* UNIV_SYNC_DEBUG */
+ ut_ad(flush_type == BUF_FLUSH_LRU
+ || !sync_check_iterate(dict_sync_check()));
buf_pool_mutex_enter(buf_pool);
@@ -1682,38 +1888,26 @@ buf_flush_batch(
buf_pool_mutex_exit(buf_pool);
-#ifdef UNIV_DEBUG
- if (buf_debug_prints && n->flushed > 0) {
- fprintf(stderr, flush_type == BUF_FLUSH_LRU
- ? "Flushed %lu pages in LRU flush\n"
- : "Flushed %lu pages in flush list flush\n",
- (ulong) n->flushed);
- }
-#endif /* UNIV_DEBUG */
+ DBUG_LOG("ib_buf", "flush " << flush_type << " completed");
}
/******************************************************************//**
-Gather the aggregated stats for both flush list and LRU list flushing */
+Gather the aggregated stats for both flush list and LRU list flushing.
+@param page_count_flush number of pages flushed from the end of the flush_list
+@param page_count_LRU number of pages flushed from the end of the LRU list
+*/
void
-buf_flush_common(
-/*=============*/
- buf_flush_t flush_type, /*!< in: type of flush */
- ulint page_count) /*!< in: number of pages flushed */
+buf_flush_stats(
+/*============*/
+ ulint page_count_flush,
+ ulint page_count_LRU)
{
- buf_dblwr_flush_buffered_writes();
+ DBUG_PRINT("ib_buf", ("flush completed, from flush_list %u pages, "
+ "from LRU_list %u pages",
+ unsigned(page_count_flush),
+ unsigned(page_count_LRU)));
- ut_a(flush_type == BUF_FLUSH_LRU || flush_type == BUF_FLUSH_LIST);
-
-#ifdef UNIV_DEBUG
- if (buf_debug_prints && page_count > 0) {
- fprintf(stderr, flush_type == BUF_FLUSH_LRU
- ? "Flushed %lu pages in LRU flush\n"
- : "Flushed %lu pages in flush list flush\n",
- (ulong) page_count);
- }
-#endif /* UNIV_DEBUG */
-
- srv_stats.buf_pool_flushed.add(page_count);
+ srv_stats.buf_pool_flushed.add(page_count_flush + page_count_LRU);
}
/******************************************************************//**
@@ -1725,6 +1919,8 @@ buf_flush_start(
buf_flush_t flush_type) /*!< in: BUF_FLUSH_LRU
or BUF_FLUSH_LIST */
{
+ ut_ad(flush_type == BUF_FLUSH_LRU || flush_type == BUF_FLUSH_LIST);
+
buf_pool_mutex_enter(buf_pool);
if (buf_pool->n_flush[flush_type] > 0
@@ -1739,12 +1935,29 @@ buf_flush_start(
buf_pool->init_flush[flush_type] = TRUE;
+ os_event_reset(buf_pool->no_flush[flush_type]);
+
buf_pool_mutex_exit(buf_pool);
return(TRUE);
}
/******************************************************************//**
+Gather the aggregated stats for both flush list and LRU list flushing */
+void
+buf_flush_common(
+/*=============*/
+ buf_flush_t flush_type, /*!< in: type of flush */
+ ulint page_count) /*!< in: number of pages flushed */
+{
+ buf_dblwr_flush_buffered_writes();
+
+ ut_a(flush_type == BUF_FLUSH_LRU || flush_type == BUF_FLUSH_LIST);
+
+ srv_stats.buf_pool_flushed.add(page_count);
+}
+
+/******************************************************************//**
End a buffer flush batch for LRU or flush list */
void
buf_flush_end(
@@ -1767,11 +1980,16 @@ buf_flush_end(
}
buf_pool_mutex_exit(buf_pool);
+
+ if (!srv_read_only_mode) {
+ buf_dblwr_flush_buffered_writes();
+ } else {
+ os_aio_simulated_wake_handler_threads();
+ }
}
/******************************************************************//**
Waits until a flush batch of the given type ends */
-UNIV_INTERN
void
buf_flush_wait_batch_end(
/*=====================*/
@@ -1800,31 +2018,116 @@ buf_flush_wait_batch_end(
}
}
-/*******************************************************************//**
-This utility flushes dirty blocks from the end of the flush list of
-all buffer pool instances.
+/** Do flushing batch of a given type.
NOTE: The calling thread is not allowed to own any latches on pages!
+@param[in,out] buf_pool buffer pool instance
+@param[in] type flush type
+@param[in] min_n wished minimum mumber of blocks flushed
+(it is not guaranteed that the actual number is that big, though)
+@param[in] lsn_limit in the case BUF_FLUSH_LIST all blocks whose
+oldest_modification is smaller than this should be flushed (if their number
+does not exceed min_n), otherwise ignored
+@param[out] n_processed the number of pages which were processed is
+passed back to caller. Ignored if NULL
+@retval true if a batch was queued successfully.
+@retval false if another batch of same type was already running. */
+bool
+buf_flush_do_batch(
+ buf_pool_t* buf_pool,
+ buf_flush_t type,
+ ulint min_n,
+ lsn_t lsn_limit,
+ flush_counters_t* n)
+{
+ ut_ad(type == BUF_FLUSH_LRU || type == BUF_FLUSH_LIST);
+
+ if (n != NULL) {
+ n->flushed = 0;
+ }
+
+ if (!buf_flush_start(buf_pool, type)) {
+ return(false);
+ }
+
+ buf_flush_batch(buf_pool, type, min_n, lsn_limit, n);
+
+ buf_flush_end(buf_pool, type);
+
+ return(true);
+}
+/**
+Waits until a flush batch of the given lsn ends
+@param[in] new_oldest target oldest_modified_lsn to wait for */
+
+void
+buf_flush_wait_flushed(
+ lsn_t new_oldest)
+{
+ for (ulint i = 0; i < srv_buf_pool_instances; ++i) {
+ buf_pool_t* buf_pool;
+ lsn_t oldest;
+
+ buf_pool = buf_pool_from_array(i);
+
+ for (;;) {
+ /* We don't need to wait for fsync of the flushed
+ blocks, because anyway we need fsync to make chekpoint.
+ So, we don't need to wait for the batch end here. */
+
+ buf_flush_list_mutex_enter(buf_pool);
+
+ buf_page_t* bpage;
+
+ /* We don't need to wait for system temporary pages */
+ for (bpage = UT_LIST_GET_LAST(buf_pool->flush_list);
+ bpage != NULL
+ && fsp_is_system_temporary(bpage->id.space());
+ bpage = UT_LIST_GET_PREV(list, bpage)) {
+ /* Do nothing. */
+ }
+
+ if (bpage != NULL) {
+ ut_ad(bpage->in_flush_list);
+ oldest = bpage->oldest_modification;
+ } else {
+ oldest = 0;
+ }
+
+ buf_flush_list_mutex_exit(buf_pool);
+
+ if (oldest == 0 || oldest >= new_oldest) {
+ break;
+ }
+
+ /* sleep and retry */
+ os_thread_sleep(buf_flush_wait_flushed_sleep_time);
+
+ MONITOR_INC(MONITOR_FLUSH_SYNC_WAITS);
+ }
+ }
+}
+
+/** This utility flushes dirty blocks from the end of the flush list of all
+buffer pool instances.
+NOTE: The calling thread is not allowed to own any latches on pages!
+@param[in] min_n wished minimum mumber of blocks flushed (it is
+not guaranteed that the actual number is that big, though)
+@param[in] lsn_limit in the case BUF_FLUSH_LIST all blocks whose
+oldest_modification is smaller than this should be flushed (if their number
+does not exceed min_n), otherwise ignored
+@param[out] n_processed the number of pages which were processed is
+passed back to caller. Ignored if NULL.
@return true if a batch was queued successfully for each buffer pool
instance. false if another batch of same type was already running in
at least one of the buffer pool instance */
-UNIV_INTERN
bool
-buf_flush_list(
-/*===========*/
- ulint min_n, /*!< in: wished minimum mumber of blocks
- flushed (it is not guaranteed that the
- actual number is that big, though) */
- lsn_t lsn_limit, /*!< in the case BUF_FLUSH_LIST all
- blocks whose oldest_modification is
- smaller than this should be flushed
- (if their number does not exceed
- min_n), otherwise ignored */
- ulint* n_processed) /*!< out: the number of pages
- which were processed is passed
- back to caller. Ignored if NULL */
-
+buf_flush_lists(
+ ulint min_n,
+ lsn_t lsn_limit,
+ ulint* n_processed)
{
ulint i;
+ ulint n_flushed = 0;
bool success = true;
if (buf_mtflu_init_done()) {
@@ -1849,9 +2152,14 @@ buf_flush_list(
buf_pool_t* buf_pool;
flush_counters_t n;
+ memset(&n, 0, sizeof(flush_counters_t));
buf_pool = buf_pool_from_array(i);
- if (!buf_flush_start(buf_pool, BUF_FLUSH_LIST)) {
+ if (!buf_flush_do_batch(buf_pool,
+ BUF_FLUSH_LIST,
+ min_n,
+ lsn_limit,
+ &n)) {
/* We have two choices here. If lsn_limit was
specified then skipping an instance of buffer
pool means we cannot guarantee that all pages
@@ -1864,26 +2172,15 @@ buf_flush_list(
failure. */
success = false;
- continue;
}
- buf_flush_batch(
- buf_pool, BUF_FLUSH_LIST, min_n, lsn_limit, &n);
-
- buf_flush_end(buf_pool, BUF_FLUSH_LIST);
-
- buf_flush_common(BUF_FLUSH_LIST, n.flushed);
+ n_flushed += n.flushed;
+ }
+ if (n_flushed) {
+ buf_flush_stats(n_flushed, 0);
if (n_processed) {
- *n_processed += n.flushed;
- }
-
- if (n.flushed) {
- MONITOR_INC_VALUE_CUMULATIVE(
- MONITOR_FLUSH_BATCH_TOTAL_PAGE,
- MONITOR_FLUSH_BATCH_COUNT,
- MONITOR_FLUSH_BATCH_PAGES,
- n.flushed);
+ *n_processed = n_flushed;
}
}
@@ -1897,9 +2194,8 @@ list and puts it on the free list. It is called from user threads when
they are unable to find a replaceable page at the tail of the LRU
list i.e.: when the background LRU flushing in the page_cleaner thread
is not fast enough to keep pace with the workload.
-@return TRUE if success. */
-UNIV_INTERN
-ibool
+@return true if success. */
+bool
buf_flush_single_page_from_LRU(
/*===========================*/
buf_pool_t* buf_pool) /*!< in/out: buffer pool instance */
@@ -1910,48 +2206,56 @@ buf_flush_single_page_from_LRU(
buf_pool_mutex_enter(buf_pool);
- for (bpage = buf_pool->single_scan_itr.start(),
- scanned = 0, freed = FALSE;
+ for (bpage = buf_pool->single_scan_itr.start(), scanned = 0,
+ freed = false;
bpage != NULL;
++scanned, bpage = buf_pool->single_scan_itr.get()) {
ut_ad(buf_pool_mutex_own(buf_pool));
- buf_page_t* prev = UT_LIST_GET_PREV(LRU, bpage);
+ buf_page_t* prev = UT_LIST_GET_PREV(LRU, bpage);
buf_pool->single_scan_itr.set(prev);
+ BPageMutex* block_mutex;
+
+ block_mutex = buf_page_get_mutex(bpage);
- ib_mutex_t* block_mutex = buf_page_get_mutex(bpage);
mutex_enter(block_mutex);
if (buf_flush_ready_for_replace(bpage)) {
/* block is ready for eviction i.e., it is
clean and is not IO-fixed or buffer fixed. */
mutex_exit(block_mutex);
+
if (buf_LRU_free_page(bpage, true)) {
buf_pool_mutex_exit(buf_pool);
- freed = TRUE;
+ freed = true;
break;
}
+
} else if (buf_flush_ready_for_flush(
- bpage, BUF_FLUSH_SINGLE_PAGE)) {
- /* Block is ready for flush. Dispatch an IO
- request. We'll put it on free list in IO
- completion routine. The following call, if
- successful, will release the buffer pool and
- block mutex. */
- freed = buf_flush_page(buf_pool, bpage,
- BUF_FLUSH_SINGLE_PAGE, true);
+ bpage, BUF_FLUSH_SINGLE_PAGE)) {
+
+ /* Block is ready for flush. Try and dispatch an IO
+ request. We'll put it on free list in IO completion
+ routine if it is not buffer fixed. The following call
+ will release the buffer pool and block mutex.
+
+ Note: There is no guarantee that this page has actually
+ been freed, only that it has been flushed to disk */
+
+ freed = buf_flush_page(
+ buf_pool, bpage, BUF_FLUSH_SINGLE_PAGE, true);
+
if (freed) {
- /* block and buffer pool mutex have
- already been reelased. */
break;
}
+
mutex_exit(block_mutex);
} else {
mutex_exit(block_mutex);
}
+ ut_ad(!mutex_own(block_mutex));
}
-
if (!freed) {
/* Can't find a single flushable page. */
ut_ad(!bpage);
@@ -1970,78 +2274,60 @@ buf_flush_single_page_from_LRU(
return(freed);
}
-/*********************************************************************//**
-Clears up tail of the LRU lists:
+/**
+Clears up tail of the LRU list of a given buffer pool instance:
* Put replaceable pages at the tail of LRU to the free list
* Flush dirty pages at the tail of LRU to the disk
The depth to which we scan each buffer pool is controlled by dynamic
config parameter innodb_LRU_scan_depth.
+@param buf_pool buffer pool instance
@return total pages flushed */
-UNIV_INTERN
+static
ulint
-buf_flush_LRU_tail(void)
-/*====================*/
+buf_flush_LRU_list(
+ buf_pool_t* buf_pool)
{
- ulint total_flushed = 0;
+ ulint scan_depth, withdraw_depth;
+ flush_counters_t n;
+
+ memset(&n, 0, sizeof(flush_counters_t));
if(buf_mtflu_init_done())
{
return(buf_mtflu_flush_LRU_tail());
}
- for (ulint i = 0; i < srv_buf_pool_instances; i++) {
-
- buf_pool_t* buf_pool = buf_pool_from_array(i);
- ulint scan_depth;
- flush_counters_t n;
-
- /* srv_LRU_scan_depth can be arbitrarily large value.
- We cap it with current LRU size. */
- buf_pool_mutex_enter(buf_pool);
- scan_depth = UT_LIST_GET_LEN(buf_pool->LRU);
- buf_pool_mutex_exit(buf_pool);
-
- scan_depth = ut_min(srv_LRU_scan_depth, scan_depth);
-
- /* Currently page_cleaner is the only thread
- that can trigger an LRU flush. It is possible
- that a batch triggered during last iteration is
- still running, */
- if (!buf_flush_start(buf_pool, BUF_FLUSH_LRU)) {
- continue;
- }
-
- buf_flush_batch(buf_pool, BUF_FLUSH_LRU, scan_depth, 0, &n);
-
- buf_flush_end(buf_pool, BUF_FLUSH_LRU);
-
- buf_flush_common(BUF_FLUSH_LRU, n.flushed);
-
- if (n.flushed) {
- MONITOR_INC_VALUE_CUMULATIVE(
- MONITOR_LRU_BATCH_FLUSH_TOTAL_PAGE,
- MONITOR_LRU_BATCH_FLUSH_COUNT,
- MONITOR_LRU_BATCH_FLUSH_PAGES,
- n.flushed);
- }
-
- if (n.evicted) {
- MONITOR_INC_VALUE_CUMULATIVE(
- MONITOR_LRU_BATCH_EVICT_TOTAL_PAGE,
- MONITOR_LRU_BATCH_EVICT_COUNT,
- MONITOR_LRU_BATCH_EVICT_PAGES,
- n.evicted);
- }
-
- total_flushed += (n.flushed + n.evicted);
+ ut_ad(buf_pool);
+ /* srv_LRU_scan_depth can be arbitrarily large value.
+ We cap it with current LRU size. */
+ buf_pool_mutex_enter(buf_pool);
+ scan_depth = UT_LIST_GET_LEN(buf_pool->LRU);
+ if (buf_pool->curr_size < buf_pool->old_size
+ && buf_pool->withdraw_target > 0) {
+ withdraw_depth = buf_pool->withdraw_target
+ - UT_LIST_GET_LEN(buf_pool->withdraw);
+ } else {
+ withdraw_depth = 0;
}
+ buf_pool_mutex_exit(buf_pool);
+ if (withdraw_depth > srv_LRU_scan_depth) {
+ scan_depth = ut_min(withdraw_depth, scan_depth);
+ } else {
+ scan_depth = ut_min(static_cast<ulint>(srv_LRU_scan_depth),
+ scan_depth);
+ }
+ /* Currently one of page_cleaners is the only thread
+ that can trigger an LRU flush at the same time.
+ So, it is not possible that a batch triggered during
+ last iteration is still running, */
+ buf_flush_do_batch(buf_pool, BUF_FLUSH_LRU, scan_depth,
+ 0, &n);
- return(total_flushed);
+ return(n.flushed);
}
/*********************************************************************//**
Wait for any possible LRU flushes that are in progress to end. */
-UNIV_INTERN
void
buf_flush_wait_LRU_batch_end(void)
/*==============================*/
@@ -2065,26 +2351,6 @@ buf_flush_wait_LRU_batch_end(void)
}
/*********************************************************************//**
-Flush a batch of dirty pages from the flush list
-@return number of pages flushed, 0 if no page is flushed or if another
-flush_list type batch is running */
-static
-ulint
-page_cleaner_do_flush_batch(
-/*========================*/
- ulint n_to_flush, /*!< in: number of pages that
- we should attempt to flush. */
- lsn_t lsn_limit) /*!< in: LSN up to which flushing
- must happen */
-{
- ulint n_flushed;
-
- buf_flush_list(n_to_flush, lsn_limit, &n_flushed);
-
- return(n_flushed);
-}
-
-/*********************************************************************//**
Calculates if flushing is required based on number of dirty pages in
the buffer pool.
@return percent of io_capacity to flush to manage dirty page ratio */
@@ -2093,10 +2359,11 @@ ulint
af_get_pct_for_dirty()
/*==================*/
{
- ulint dirty_pct = buf_get_modified_ratio_pct();
+ double dirty_pct = buf_get_modified_ratio_pct();
- if (dirty_pct > 0 && srv_max_buf_pool_modified_pct == 0) {
- return(100);
+ if (dirty_pct == 0.0) {
+ /* No pages modified */
+ return(0);
}
ut_a(srv_max_dirty_pages_pct_lwm
@@ -2105,16 +2372,16 @@ af_get_pct_for_dirty()
if (srv_max_dirty_pages_pct_lwm == 0) {
/* The user has not set the option to preflush dirty
pages as we approach the high water mark. */
- if (dirty_pct > srv_max_buf_pool_modified_pct) {
+ if (dirty_pct >= srv_max_buf_pool_modified_pct) {
/* We have crossed the high water mark of dirty
pages In this case we start flushing at 100% of
innodb_io_capacity. */
return(100);
}
- } else if (dirty_pct > srv_max_dirty_pages_pct_lwm) {
+ } else if (dirty_pct >= srv_max_dirty_pages_pct_lwm) {
/* We should start flushing pages gradually. */
- return((dirty_pct * 100)
- / (srv_max_buf_pool_modified_pct + 1));
+ return(static_cast<ulint>((dirty_pct * 100)
+ / (srv_max_buf_pool_modified_pct + 1)));
}
return(0);
@@ -2131,8 +2398,8 @@ af_get_pct_for_lsn(
{
lsn_t max_async_age;
lsn_t lsn_age_factor;
- lsn_t af_lwm = (srv_adaptive_flushing_lwm
- * log_get_capacity()) / 100;
+ lsn_t af_lwm = (lsn_t) ((srv_adaptive_flushing_lwm
+ * log_get_capacity()) / 100);
if (age < af_lwm) {
/* No adaptive flushing. */
@@ -2163,22 +2430,23 @@ af_get_pct_for_lsn(
/*********************************************************************//**
This function is called approximately once every second by the
page_cleaner thread. Based on various factors it decides if there is a
-need to do flushing. If flushing is needed it is performed and the
-number of pages flushed is returned.
-@return number of pages flushed */
+need to do flushing.
+@return number of pages recommended to be flushed
+@param lsn_limit pointer to return LSN up to which flushing must happen
+@param last_pages_in the number of pages flushed by the last flush_list
+ flushing. */
static
ulint
-page_cleaner_flush_pages_if_needed(void)
+page_cleaner_flush_pages_recommendation(
/*====================================*/
+ lsn_t* lsn_limit,
+ ulint last_pages_in)
{
- static lsn_t lsn_avg_rate = 0;
static lsn_t prev_lsn = 0;
- static lsn_t last_lsn = 0;
static ulint sum_pages = 0;
- static ulint last_pages = 0;
- static ulint prev_pages = 0;
static ulint avg_page_rate = 0;
static ulint n_iterations = 0;
+ static time_t prev_time;
lsn_t oldest_lsn;
lsn_t cur_lsn;
lsn_t age;
@@ -2187,7 +2455,6 @@ page_cleaner_flush_pages_if_needed(void)
ulint pct_for_dirty = 0;
ulint pct_for_lsn = 0;
ulint pct_total = 0;
- int age_factor = 0;
cur_lsn = log_get_lsn_nowait();
@@ -2201,6 +2468,7 @@ page_cleaner_flush_pages_if_needed(void)
if (prev_lsn == 0) {
/* First time around. */
prev_lsn = cur_lsn;
+ prev_time = time(NULL);
return(0);
}
@@ -2208,19 +2476,110 @@ page_cleaner_flush_pages_if_needed(void)
return(0);
}
+ sum_pages += last_pages_in;
+
+ time_t curr_time = time(NULL);
+ double time_elapsed = difftime(curr_time, prev_time);
+
/* We update our variables every srv_flushing_avg_loops
iterations to smooth out transition in workload. */
- if (++n_iterations >= srv_flushing_avg_loops) {
+ if (++n_iterations >= srv_flushing_avg_loops
+ || time_elapsed >= srv_flushing_avg_loops) {
- avg_page_rate = ((sum_pages / srv_flushing_avg_loops)
- + avg_page_rate) / 2;
+ if (time_elapsed < 1) {
+ time_elapsed = 1;
+ }
+
+ avg_page_rate = static_cast<ulint>(
+ ((static_cast<double>(sum_pages)
+ / time_elapsed)
+ + avg_page_rate) / 2);
/* How much LSN we have generated since last call. */
- lsn_rate = (cur_lsn - prev_lsn) / srv_flushing_avg_loops;
+ lsn_rate = static_cast<lsn_t>(
+ static_cast<double>(cur_lsn - prev_lsn)
+ / time_elapsed);
lsn_avg_rate = (lsn_avg_rate + lsn_rate) / 2;
+ /* aggregate stats of all slots */
+ mutex_enter(&page_cleaner.mutex);
+
+ ulint flush_tm = page_cleaner.flush_time;
+ ulint flush_pass = page_cleaner.flush_pass;
+
+ page_cleaner.flush_time = 0;
+ page_cleaner.flush_pass = 0;
+
+ ulint lru_tm = 0;
+ ulint list_tm = 0;
+ ulint lru_pass = 0;
+ ulint list_pass = 0;
+
+ for (ulint i = 0; i < page_cleaner.n_slots; i++) {
+ page_cleaner_slot_t* slot;
+
+ slot = &page_cleaner.slots[i];
+
+ lru_tm += slot->flush_lru_time;
+ lru_pass += slot->flush_lru_pass;
+ list_tm += slot->flush_list_time;
+ list_pass += slot->flush_list_pass;
+
+ slot->flush_lru_time = 0;
+ slot->flush_lru_pass = 0;
+ slot->flush_list_time = 0;
+ slot->flush_list_pass = 0;
+ }
+
+ mutex_exit(&page_cleaner.mutex);
+
+ /* minimum values are 1, to avoid dividing by zero. */
+ if (lru_tm < 1) {
+ lru_tm = 1;
+ }
+ if (list_tm < 1) {
+ list_tm = 1;
+ }
+ if (flush_tm < 1) {
+ flush_tm = 1;
+ }
+
+ if (lru_pass < 1) {
+ lru_pass = 1;
+ }
+ if (list_pass < 1) {
+ list_pass = 1;
+ }
+ if (flush_pass < 1) {
+ flush_pass = 1;
+ }
+
+ MONITOR_SET(MONITOR_FLUSH_ADAPTIVE_AVG_TIME_SLOT,
+ list_tm / list_pass);
+ MONITOR_SET(MONITOR_LRU_BATCH_FLUSH_AVG_TIME_SLOT,
+ lru_tm / lru_pass);
+
+ MONITOR_SET(MONITOR_FLUSH_ADAPTIVE_AVG_TIME_THREAD,
+ list_tm / (srv_n_page_cleaners * flush_pass));
+ MONITOR_SET(MONITOR_LRU_BATCH_FLUSH_AVG_TIME_THREAD,
+ lru_tm / (srv_n_page_cleaners * flush_pass));
+ MONITOR_SET(MONITOR_FLUSH_ADAPTIVE_AVG_TIME_EST,
+ flush_tm * list_tm / flush_pass
+ / (list_tm + lru_tm));
+ MONITOR_SET(MONITOR_LRU_BATCH_FLUSH_AVG_TIME_EST,
+ flush_tm * lru_tm / flush_pass
+ / (list_tm + lru_tm));
+ MONITOR_SET(MONITOR_FLUSH_AVG_TIME, flush_tm / flush_pass);
+
+ MONITOR_SET(MONITOR_FLUSH_ADAPTIVE_AVG_PASS,
+ list_pass / page_cleaner.n_slots);
+ MONITOR_SET(MONITOR_LRU_BATCH_FLUSH_AVG_PASS,
+ lru_pass / page_cleaner.n_slots);
+ MONITOR_SET(MONITOR_FLUSH_AVG_PASS, flush_pass);
+
prev_lsn = cur_lsn;
+ prev_time = curr_time;
n_iterations = 0;
@@ -2238,121 +2597,735 @@ page_cleaner_flush_pages_if_needed(void)
pct_total = ut_max(pct_for_dirty, pct_for_lsn);
+ /* Estimate pages to be flushed for the lsn progress */
+ ulint sum_pages_for_lsn = 0;
+ lsn_t target_lsn = oldest_lsn
+ + lsn_avg_rate * buf_flush_lsn_scan_factor;
+
+ for (ulint i = 0; i < srv_buf_pool_instances; i++) {
+ buf_pool_t* buf_pool = buf_pool_from_array(i);
+ ulint pages_for_lsn = 0;
+
+ buf_flush_list_mutex_enter(buf_pool);
+ for (buf_page_t* b = UT_LIST_GET_LAST(buf_pool->flush_list);
+ b != NULL;
+ b = UT_LIST_GET_PREV(list, b)) {
+ if (b->oldest_modification > target_lsn) {
+ break;
+ }
+ ++pages_for_lsn;
+ }
+ buf_flush_list_mutex_exit(buf_pool);
+
+ sum_pages_for_lsn += pages_for_lsn;
+
+ mutex_enter(&page_cleaner.mutex);
+ ut_ad(page_cleaner.slots[i].state
+ == PAGE_CLEANER_STATE_NONE);
+ page_cleaner.slots[i].n_pages_requested
+ = pages_for_lsn / buf_flush_lsn_scan_factor + 1;
+ mutex_exit(&page_cleaner.mutex);
+ }
+
+ sum_pages_for_lsn /= buf_flush_lsn_scan_factor;
+ if(sum_pages_for_lsn < 1) {
+ sum_pages_for_lsn = 1;
+ }
+
/* Cap the maximum IO capacity that we are going to use by
- max_io_capacity. */
- n_pages = (PCT_IO(pct_total) + avg_page_rate) / 2;
+ max_io_capacity. Limit the value to avoid too quick increase */
+ ulint pages_for_lsn =
+ std::min<ulint>(sum_pages_for_lsn, srv_max_io_capacity * 2);
+
+ n_pages = (PCT_IO(pct_total) + avg_page_rate + pages_for_lsn) / 3;
if (n_pages > srv_max_io_capacity) {
n_pages = srv_max_io_capacity;
}
- if (last_pages && cur_lsn - last_lsn > lsn_avg_rate / 2) {
- age_factor = static_cast<int>(prev_pages / last_pages);
+ /* Normalize request for each instance */
+ mutex_enter(&page_cleaner.mutex);
+ ut_ad(page_cleaner.n_slots_requested == 0);
+ ut_ad(page_cleaner.n_slots_flushing == 0);
+ ut_ad(page_cleaner.n_slots_finished == 0);
+
+ for (ulint i = 0; i < srv_buf_pool_instances; i++) {
+ /* if REDO has enough of free space,
+ don't care about age distribution of pages */
+ page_cleaner.slots[i].n_pages_requested = pct_for_lsn > 30 ?
+ page_cleaner.slots[i].n_pages_requested
+ * n_pages / sum_pages_for_lsn + 1
+ : n_pages / srv_buf_pool_instances;
}
+ mutex_exit(&page_cleaner.mutex);
MONITOR_SET(MONITOR_FLUSH_N_TO_FLUSH_REQUESTED, n_pages);
- prev_pages = n_pages;
- n_pages = page_cleaner_do_flush_batch(
- n_pages, oldest_lsn + lsn_avg_rate * (age_factor + 1));
-
- last_lsn= cur_lsn;
- last_pages= n_pages + 1;
+ MONITOR_SET(MONITOR_FLUSH_N_TO_FLUSH_BY_AGE, sum_pages_for_lsn);
MONITOR_SET(MONITOR_FLUSH_AVG_PAGE_RATE, avg_page_rate);
MONITOR_SET(MONITOR_FLUSH_LSN_AVG_RATE, lsn_avg_rate);
MONITOR_SET(MONITOR_FLUSH_PCT_FOR_DIRTY, pct_for_dirty);
MONITOR_SET(MONITOR_FLUSH_PCT_FOR_LSN, pct_for_lsn);
- if (n_pages) {
- MONITOR_INC_VALUE_CUMULATIVE(
- MONITOR_FLUSH_ADAPTIVE_TOTAL_PAGE,
- MONITOR_FLUSH_ADAPTIVE_COUNT,
- MONITOR_FLUSH_ADAPTIVE_PAGES,
- n_pages);
-
- sum_pages += n_pages;
- }
+ *lsn_limit = LSN_MAX;
return(n_pages);
}
/*********************************************************************//**
Puts the page_cleaner thread to sleep if it has finished work in less
-than a second */
+than a second
+@retval 0 wake up by event set,
+@retval OS_SYNC_TIME_EXCEEDED if timeout was exceeded
+@param next_loop_time time when next loop iteration should start
+@param sig_count zero or the value returned by previous call of
+ os_event_reset()
+@param cur_time current time as in ut_time_ms() */
static
-void
-page_cleaner_sleep_if_needed(
-/*=========================*/
- ulint next_loop_time) /*!< in: time when next loop iteration
- should start */
+ulint
+pc_sleep_if_needed(
+/*===============*/
+ ulint next_loop_time,
+ int64_t sig_count,
+ ulint cur_time)
{
/* No sleep if we are cleaning the buffer pool during the shutdown
with everything else finished */
if (srv_shutdown_state == SRV_SHUTDOWN_FLUSH_PHASE)
- return;
-
- ulint cur_time = ut_time_ms();
+ return OS_SYNC_TIME_EXCEEDED;
if (next_loop_time > cur_time) {
/* Get sleep interval in micro seconds. We use
ut_min() to avoid long sleep in case of wrap around. */
ulint sleep_us;
- sleep_us = ut_min(1000000, (next_loop_time - cur_time) * 1000);
+ sleep_us = ut_min(static_cast<ulint>(1000000),
+ (next_loop_time - cur_time) * 1000);
+
+ return(os_event_wait_time_low(buf_flush_event,
+ sleep_us, sig_count));
+ }
+
+ return(OS_SYNC_TIME_EXCEEDED);
+}
+
+/******************************************************************//**
+Initialize page_cleaner. */
+void
+buf_flush_page_cleaner_init(void)
+/*=============================*/
+{
+ ut_ad(!page_cleaner.is_running);
+
+ mutex_create(LATCH_ID_PAGE_CLEANER, &page_cleaner.mutex);
+
+ page_cleaner.is_requested = os_event_create("pc_is_requested");
+ page_cleaner.is_finished = os_event_create("pc_is_finished");
+
+ page_cleaner.n_slots = static_cast<ulint>(srv_buf_pool_instances);
+
+ ut_d(page_cleaner.n_disabled_debug = 0);
+
+ page_cleaner.is_running = true;
+}
+
+/**
+Requests for all slots to flush all buffer pool instances.
+@param min_n wished minimum mumber of blocks flushed
+ (it is not guaranteed that the actual number is that big)
+@param lsn_limit in the case BUF_FLUSH_LIST all blocks whose
+ oldest_modification is smaller than this should be flushed
+ (if their number does not exceed min_n), otherwise ignored
+*/
+static
+void
+pc_request(
+ ulint min_n,
+ lsn_t lsn_limit)
+{
+ if (min_n != ULINT_MAX) {
+ /* Ensure that flushing is spread evenly amongst the
+ buffer pool instances. When min_n is ULINT_MAX
+ we need to flush everything up to the lsn limit
+ so no limit here. */
+ min_n = (min_n + srv_buf_pool_instances - 1)
+ / srv_buf_pool_instances;
+ }
+
+ mutex_enter(&page_cleaner.mutex);
+
+ ut_ad(page_cleaner.n_slots_requested == 0);
+ ut_ad(page_cleaner.n_slots_flushing == 0);
+ ut_ad(page_cleaner.n_slots_finished == 0);
+
+ page_cleaner.requested = (min_n > 0);
+ page_cleaner.lsn_limit = lsn_limit;
+
+ for (ulint i = 0; i < page_cleaner.n_slots; i++) {
+ page_cleaner_slot_t* slot = &page_cleaner.slots[i];
+
+ ut_ad(slot->state == PAGE_CLEANER_STATE_NONE);
+
+ if (min_n == ULINT_MAX) {
+ slot->n_pages_requested = ULINT_MAX;
+ } else if (min_n == 0) {
+ slot->n_pages_requested = 0;
+ }
+
+ /* slot->n_pages_requested was already set by
+ page_cleaner_flush_pages_recommendation() */
+
+ slot->state = PAGE_CLEANER_STATE_REQUESTED;
+ }
+
+ page_cleaner.n_slots_requested = page_cleaner.n_slots;
+ page_cleaner.n_slots_flushing = 0;
+ page_cleaner.n_slots_finished = 0;
+
+ os_event_set(page_cleaner.is_requested);
+
+ mutex_exit(&page_cleaner.mutex);
+}
+
+/**
+Do flush for one slot.
+@return the number of the slots which has not been treated yet. */
+static
+ulint
+pc_flush_slot(void)
+{
+ ulint lru_tm = 0;
+ ulint list_tm = 0;
+ int lru_pass = 0;
+ int list_pass = 0;
+
+ mutex_enter(&page_cleaner.mutex);
+
+ if (!page_cleaner.n_slots_requested) {
+ os_event_reset(page_cleaner.is_requested);
+ } else {
+ page_cleaner_slot_t* slot = NULL;
+ ulint i;
+
+ for (i = 0; i < page_cleaner.n_slots; i++) {
+ slot = &page_cleaner.slots[i];
+
+ if (slot->state == PAGE_CLEANER_STATE_REQUESTED) {
+ break;
+ }
+ }
+
+ /* slot should be found because
+ page_cleaner.n_slots_requested > 0 */
+ ut_a(i < page_cleaner.n_slots);
+
+ buf_pool_t* buf_pool = buf_pool_from_array(i);
+
+ page_cleaner.n_slots_requested--;
+ page_cleaner.n_slots_flushing++;
+ slot->state = PAGE_CLEANER_STATE_FLUSHING;
+
+ if (UNIV_UNLIKELY(!page_cleaner.is_running)) {
+ slot->n_flushed_lru = 0;
+ slot->n_flushed_list = 0;
+ goto finish_mutex;
+ }
+
+ if (page_cleaner.n_slots_requested == 0) {
+ os_event_reset(page_cleaner.is_requested);
+ }
+
+ mutex_exit(&page_cleaner.mutex);
+
+ lru_tm = ut_time_ms();
+
+ /* Flush pages from end of LRU if required */
+ slot->n_flushed_lru = buf_flush_LRU_list(buf_pool);
+
+ lru_tm = ut_time_ms() - lru_tm;
+ lru_pass++;
+
+ if (UNIV_UNLIKELY(!page_cleaner.is_running)) {
+ slot->n_flushed_list = 0;
+ goto finish;
+ }
+
+ /* Flush pages from flush_list if required */
+ if (page_cleaner.requested) {
+ flush_counters_t n;
+ memset(&n, 0, sizeof(flush_counters_t));
+ list_tm = ut_time_ms();
+
+ slot->succeeded_list = buf_flush_do_batch(
+ buf_pool, BUF_FLUSH_LIST,
+ slot->n_pages_requested,
+ page_cleaner.lsn_limit,
+ &n);
+
+ slot->n_flushed_list = n.flushed;
+
+ list_tm = ut_time_ms() - list_tm;
+ list_pass++;
+ } else {
+ slot->n_flushed_list = 0;
+ slot->succeeded_list = true;
+ }
+finish:
+ mutex_enter(&page_cleaner.mutex);
+finish_mutex:
+ page_cleaner.n_slots_flushing--;
+ page_cleaner.n_slots_finished++;
+ slot->state = PAGE_CLEANER_STATE_FINISHED;
+
+ slot->flush_lru_time += lru_tm;
+ slot->flush_list_time += list_tm;
+ slot->flush_lru_pass += lru_pass;
+ slot->flush_list_pass += list_pass;
+
+ if (page_cleaner.n_slots_requested == 0
+ && page_cleaner.n_slots_flushing == 0) {
+ os_event_set(page_cleaner.is_finished);
+ }
+ }
+
+ ulint ret = page_cleaner.n_slots_requested;
+
+ mutex_exit(&page_cleaner.mutex);
+
+ return(ret);
+}
+
+/**
+Wait until all flush requests are finished.
+@param n_flushed_lru number of pages flushed from the end of the LRU list.
+@param n_flushed_list number of pages flushed from the end of the
+ flush_list.
+@return true if all flush_list flushing batch were success. */
+static
+bool
+pc_wait_finished(
+ ulint* n_flushed_lru,
+ ulint* n_flushed_list)
+{
+ bool all_succeeded = true;
+
+ *n_flushed_lru = 0;
+ *n_flushed_list = 0;
+
+ os_event_wait(page_cleaner.is_finished);
+
+ mutex_enter(&page_cleaner.mutex);
+
+ ut_ad(page_cleaner.n_slots_requested == 0);
+ ut_ad(page_cleaner.n_slots_flushing == 0);
+ ut_ad(page_cleaner.n_slots_finished == page_cleaner.n_slots);
+
+ for (ulint i = 0; i < page_cleaner.n_slots; i++) {
+ page_cleaner_slot_t* slot = &page_cleaner.slots[i];
+
+ ut_ad(slot->state == PAGE_CLEANER_STATE_FINISHED);
+
+ *n_flushed_lru += slot->n_flushed_lru;
+ *n_flushed_list += slot->n_flushed_list;
+ all_succeeded &= slot->succeeded_list;
+
+ slot->state = PAGE_CLEANER_STATE_NONE;
+
+ slot->n_pages_requested = 0;
+ }
+
+ page_cleaner.n_slots_finished = 0;
+
+ os_event_reset(page_cleaner.is_finished);
+
+ mutex_exit(&page_cleaner.mutex);
+
+ return(all_succeeded);
+}
+
+#ifdef UNIV_LINUX
+/**
+Set priority for page_cleaner threads.
+@param[in] priority priority intended to set
+@return true if set as intended */
+static
+bool
+buf_flush_page_cleaner_set_priority(
+ int priority)
+{
+ setpriority(PRIO_PROCESS, (pid_t)syscall(SYS_gettid),
+ priority);
+ return(getpriority(PRIO_PROCESS, (pid_t)syscall(SYS_gettid))
+ == priority);
+}
+#endif /* UNIV_LINUX */
+
+#ifdef UNIV_DEBUG
+/** Loop used to disable page cleaner threads. */
+static
+void
+buf_flush_page_cleaner_disabled_loop(void)
+{
+ if (!innodb_page_cleaner_disabled_debug) {
+ /* We return to avoid entering and exiting mutex. */
+ return;
+ }
+
+ mutex_enter(&page_cleaner.mutex);
+ page_cleaner.n_disabled_debug++;
+ mutex_exit(&page_cleaner.mutex);
- ib_int64_t sig_count = os_event_reset(buf_flush_event);
+ while (innodb_page_cleaner_disabled_debug
+ && srv_shutdown_state == SRV_SHUTDOWN_NONE
+ && page_cleaner.is_running) {
- os_event_wait_time_low(buf_flush_event, sleep_us, sig_count);
+ os_thread_sleep(100000); /* [A] */
}
+
+ /* We need to wait for threads exiting here, otherwise we would
+ encounter problem when we quickly perform following steps:
+ 1) SET GLOBAL innodb_page_cleaner_disabled_debug = 1;
+ 2) SET GLOBAL innodb_page_cleaner_disabled_debug = 0;
+ 3) SET GLOBAL innodb_page_cleaner_disabled_debug = 1;
+ That's because after step 1 this thread could still be sleeping
+ inside the loop above at [A] and steps 2, 3 could happen before
+ this thread wakes up from [A]. In such case this thread would
+ not re-increment n_disabled_debug and we would be waiting for
+ him forever in buf_flush_page_cleaner_disabled_debug_update(...).
+
+ Therefore we are waiting in step 2 for this thread exiting here. */
+
+ mutex_enter(&page_cleaner.mutex);
+ page_cleaner.n_disabled_debug--;
+ mutex_exit(&page_cleaner.mutex);
}
+/** Disables page cleaner threads (coordinator and workers).
+It's used by: SET GLOBAL innodb_page_cleaner_disabled_debug = 1 (0).
+@param[in] thd thread handle
+@param[in] var pointer to system variable
+@param[out] var_ptr where the formal string goes
+@param[in] save immediate result from check function */
+void
+buf_flush_page_cleaner_disabled_debug_update(
+ THD* thd,
+ struct st_mysql_sys_var* var,
+ void* var_ptr,
+ const void* save)
+{
+ if (!page_cleaner.is_running) {
+ return;
+ }
+
+ if (!*static_cast<const my_bool*>(save)) {
+ if (!innodb_page_cleaner_disabled_debug) {
+ return;
+ }
+ innodb_page_cleaner_disabled_debug = false;
+
+ /* Enable page cleaner threads. */
+ while (srv_shutdown_state == SRV_SHUTDOWN_NONE) {
+ mutex_enter(&page_cleaner.mutex);
+ const ulint n = page_cleaner.n_disabled_debug;
+ mutex_exit(&page_cleaner.mutex);
+ /* Check if all threads have been enabled, to avoid
+ problem when we decide to re-disable them soon. */
+ if (n == 0) {
+ break;
+ }
+ }
+ return;
+ }
+
+ if (innodb_page_cleaner_disabled_debug) {
+ return;
+ }
+
+ innodb_page_cleaner_disabled_debug = true;
+
+ while (srv_shutdown_state == SRV_SHUTDOWN_NONE) {
+ /* Workers are possibly sleeping on is_requested.
+
+ We have to wake them, otherwise they could possibly
+ have never noticed, that they should be disabled,
+ and we would wait for them here forever.
+
+ That's why we have sleep-loop instead of simply
+ waiting on some disabled_debug_event. */
+ os_event_set(page_cleaner.is_requested);
+
+ mutex_enter(&page_cleaner.mutex);
+
+ ut_ad(page_cleaner.n_disabled_debug
+ <= srv_n_page_cleaners);
+
+ if (page_cleaner.n_disabled_debug
+ == srv_n_page_cleaners) {
+
+ mutex_exit(&page_cleaner.mutex);
+ break;
+ }
+
+ mutex_exit(&page_cleaner.mutex);
+
+ os_thread_sleep(100000);
+ }
+}
+#endif /* UNIV_DEBUG */
/******************************************************************//**
page_cleaner thread tasked with flushing dirty pages from the buffer
-pools. As of now we'll have only one instance of this thread.
+pools. As of now we'll have only one coordinator.
@return a dummy parameter */
-extern "C" UNIV_INTERN
+extern "C"
os_thread_ret_t
-DECLARE_THREAD(buf_flush_page_cleaner_thread)(
-/*==========================================*/
- void* arg MY_ATTRIBUTE((unused)))
- /*!< in: a dummy parameter required by
- os_thread_create */
+DECLARE_THREAD(buf_flush_page_cleaner_coordinator)(void*)
{
my_thread_init();
- ulint next_loop_time = ut_time_ms() + 1000;
- ulint n_flushed = 0;
- ulint last_activity = srv_get_activity_count();
-
- ut_ad(!srv_read_only_mode);
-
#ifdef UNIV_PFS_THREAD
- pfs_register_thread(buf_page_cleaner_thread_key);
+ pfs_register_thread(page_cleaner_thread_key);
#endif /* UNIV_PFS_THREAD */
+ ut_ad(!srv_read_only_mode);
#ifdef UNIV_DEBUG_THREAD_CREATION
- fprintf(stderr, "InnoDB: page_cleaner thread running, id %lu\n",
- os_thread_pf(os_thread_get_curr_id()));
+ ib::info() << "page_cleaner thread running, id "
+ << os_thread_pf(os_thread_get_curr_id());
#endif /* UNIV_DEBUG_THREAD_CREATION */
+#ifdef UNIV_LINUX
+ /* linux might be able to set different setting for each thread.
+ worth to try to set high priority for page cleaner threads */
+ if (buf_flush_page_cleaner_set_priority(
+ buf_flush_page_cleaner_priority)) {
+
+ ib::info() << "page_cleaner coordinator priority: "
+ << buf_flush_page_cleaner_priority;
+ } else {
+ ib::info() << "If the mysqld execution user is authorized,"
+ " page cleaner thread priority can be changed."
+ " See the man page of setpriority().";
+ }
+ /* Signal that setpriority() has been attempted. */
+ os_event_set(recv_sys->flush_end);
+#endif /* UNIV_LINUX */
+
+ do {
+ /* treat flushing requests during recovery. */
+ ulint n_flushed_lru = 0;
+ ulint n_flushed_list = 0;
+
+ os_event_wait(recv_sys->flush_start);
+
+ if (!recv_writer_thread_active) {
+ break;
+ }
+
+ switch (recv_sys->flush_type) {
+ case BUF_FLUSH_LRU:
+ /* Flush pages from end of LRU if required */
+ pc_request(0, LSN_MAX);
+ while (pc_flush_slot() > 0) {}
+ pc_wait_finished(&n_flushed_lru, &n_flushed_list);
+ break;
+
+ case BUF_FLUSH_LIST:
+ /* Flush all pages */
+ do {
+ pc_request(ULINT_MAX, LSN_MAX);
+ while (pc_flush_slot() > 0) {}
+ } while (!pc_wait_finished(&n_flushed_lru,
+ &n_flushed_list));
+ break;
+
+ default:
+ ut_ad(0);
+ }
+
+ os_event_reset(recv_sys->flush_start);
+ os_event_set(recv_sys->flush_end);
+ } while (recv_writer_thread_active);
+
+ os_event_wait(buf_flush_event);
+
+ ulint ret_sleep = 0;
+ ulint n_evicted = 0;
+ ulint n_flushed_last = 0;
+ ulint warn_interval = 1;
+ ulint warn_count = 0;
+ int64_t sig_count = os_event_reset(buf_flush_event);
+ ulint next_loop_time = ut_time_ms() + 1000;
+ ulint n_flushed = 0;
+ ulint last_activity = srv_get_activity_count();
+ ulint last_pages = 0;
while (srv_shutdown_state == SRV_SHUTDOWN_NONE) {
+ ulint curr_time = ut_time_ms();
+
+ /* The page_cleaner skips sleep if the server is
+ idle and there are no pending IOs in the buffer pool
+ and there is work to do. */
+ if (srv_check_activity(last_activity)
+ || buf_get_n_pending_read_ios()
+ || n_flushed == 0) {
+
+ ret_sleep = pc_sleep_if_needed(
+ next_loop_time, sig_count, curr_time);
+ } else if (curr_time > next_loop_time) {
+ ret_sleep = OS_SYNC_TIME_EXCEEDED;
+ } else {
+ ret_sleep = 0;
+ }
+
+ if (srv_shutdown_state != SRV_SHUTDOWN_NONE) {
+ break;
+ }
+
+ sig_count = os_event_reset(buf_flush_event);
+
+ if (ret_sleep == OS_SYNC_TIME_EXCEEDED) {
+ if (global_system_variables.log_warnings > 2
+ && curr_time > next_loop_time + 3000
+ && !(test_flags & TEST_SIGINT)) {
+ if (warn_count == 0) {
+ ib::info() << "page_cleaner: 1000ms"
+ " intended loop took "
+ << 1000 + curr_time
+ - next_loop_time
+ << "ms. The settings might not"
+ " be optimal. (flushed="
+ << n_flushed_last
+ << " and evicted="
+ << n_evicted
+ << ", during the time.)";
+ if (warn_interval > 300) {
+ warn_interval = 600;
+ } else {
+ warn_interval *= 2;
+ }
+
+ warn_count = warn_interval;
+ } else {
+ --warn_count;
+ }
+ } else {
+ /* reset counter */
+ warn_interval = 1;
+ warn_count = 0;
+ }
+
+ next_loop_time = curr_time + 1000;
+ n_flushed_last = n_evicted = 0;
+ }
+
+ if (ret_sleep != OS_SYNC_TIME_EXCEEDED
+ && srv_flush_sync
+ && buf_flush_sync_lsn > 0) {
+ /* woke up for flush_sync */
+ mutex_enter(&page_cleaner.mutex);
+ lsn_t lsn_limit = buf_flush_sync_lsn;
+ buf_flush_sync_lsn = 0;
+ mutex_exit(&page_cleaner.mutex);
+
+ /* Request flushing for threads */
+ pc_request(ULINT_MAX, lsn_limit);
+
+ ulint tm = ut_time_ms();
+
+ /* Coordinator also treats requests */
+ while (pc_flush_slot() > 0) {}
+
+ /* only coordinator is using these counters,
+ so no need to protect by lock. */
+ page_cleaner.flush_time += ut_time_ms() - tm;
+ page_cleaner.flush_pass++;
+
+ /* Wait for all slots to be finished */
+ ulint n_flushed_lru = 0;
+ ulint n_flushed_list = 0;
+ pc_wait_finished(&n_flushed_lru, &n_flushed_list);
+
+ if (n_flushed_list > 0 || n_flushed_lru > 0) {
+ buf_flush_stats(n_flushed_list, n_flushed_lru);
+
+ MONITOR_INC_VALUE_CUMULATIVE(
+ MONITOR_FLUSH_SYNC_TOTAL_PAGE,
+ MONITOR_FLUSH_SYNC_COUNT,
+ MONITOR_FLUSH_SYNC_PAGES,
+ n_flushed_lru + n_flushed_list);
+ }
+
+ n_flushed = n_flushed_lru + n_flushed_list;
- page_cleaner_sleep_if_needed(next_loop_time);
+ } else if (srv_check_activity(last_activity)) {
+ ulint n_to_flush;
+ lsn_t lsn_limit = 0;
- next_loop_time = ut_time_ms() + 1000;
+ /* Estimate pages from flush_list to be flushed */
+ if (ret_sleep == OS_SYNC_TIME_EXCEEDED) {
+ last_activity = srv_get_activity_count();
+ n_to_flush =
+ page_cleaner_flush_pages_recommendation(
+ &lsn_limit, last_pages);
+ } else {
+ n_to_flush = 0;
+ }
+
+ /* Request flushing for threads */
+ pc_request(n_to_flush, lsn_limit);
+
+ ulint tm = ut_time_ms();
+
+ /* Coordinator also treats requests */
+ while (pc_flush_slot() > 0) {
+ /* No op */
+ }
+
+ /* only coordinator is using these counters,
+ so no need to protect by lock. */
+ page_cleaner.flush_time += ut_time_ms() - tm;
+ page_cleaner.flush_pass++ ;
+
+ /* Wait for all slots to be finished */
+ ulint n_flushed_lru = 0;
+ ulint n_flushed_list = 0;
+
+ pc_wait_finished(&n_flushed_lru, &n_flushed_list);
+
+ if (n_flushed_list > 0 || n_flushed_lru > 0) {
+ buf_flush_stats(n_flushed_list, n_flushed_lru);
+ }
- if (srv_check_activity(last_activity)) {
- last_activity = srv_get_activity_count();
+ if (ret_sleep == OS_SYNC_TIME_EXCEEDED) {
+ last_pages = n_flushed_list;
+ }
+
+ n_evicted += n_flushed_lru;
+ n_flushed_last += n_flushed_list;
- /* Flush pages from flush_list if required */
- n_flushed += page_cleaner_flush_pages_if_needed();
+ n_flushed = n_flushed_lru + n_flushed_list;
- } else if (srv_idle_flush_pct) {
- n_flushed = page_cleaner_do_flush_batch(
- PCT_IO(100),
- LSN_MAX);
+ if (n_flushed_lru) {
+ MONITOR_INC_VALUE_CUMULATIVE(
+ MONITOR_LRU_BATCH_FLUSH_TOTAL_PAGE,
+ MONITOR_LRU_BATCH_FLUSH_COUNT,
+ MONITOR_LRU_BATCH_FLUSH_PAGES,
+ n_flushed_lru);
+ }
+
+ if (n_flushed_list) {
+ MONITOR_INC_VALUE_CUMULATIVE(
+ MONITOR_FLUSH_ADAPTIVE_TOTAL_PAGE,
+ MONITOR_FLUSH_ADAPTIVE_COUNT,
+ MONITOR_FLUSH_ADAPTIVE_PAGES,
+ n_flushed_list);
+ }
+
+ } else if (ret_sleep == OS_SYNC_TIME_EXCEEDED) {
+ /* no activity, slept enough */
+ buf_flush_lists(PCT_IO(100), LSN_MAX, &n_flushed);
+
+ n_flushed_last += n_flushed;
if (n_flushed) {
MONITOR_INC_VALUE_CUMULATIVE(
@@ -2360,18 +3333,23 @@ DECLARE_THREAD(buf_flush_page_cleaner_thread)(
MONITOR_FLUSH_BACKGROUND_COUNT,
MONITOR_FLUSH_BACKGROUND_PAGES,
n_flushed);
+
}
+
+ } else {
+ /* no activity, but woken up by event */
+ n_flushed = 0;
}
- /* Flush pages from end of LRU if required */
- buf_flush_LRU_tail();
+ ut_d(buf_flush_page_cleaner_disabled_loop());
}
ut_ad(srv_shutdown_state > 0);
-
- if (srv_fast_shutdown == 2) {
- /* In very fast shutdown we simulate a crash of
- buffer pool. We are not required to do any flushing */
+ if (srv_fast_shutdown == 2
+ || srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS) {
+ /* In very fast shutdown or when innodb failed to start, we
+ simulate a crash of the buffer pool. We are not required to do
+ any flushing. */
goto thread_exit;
}
@@ -2388,7 +3366,15 @@ DECLARE_THREAD(buf_flush_page_cleaner_thread)(
dirtied until we enter SRV_SHUTDOWN_FLUSH_PHASE phase. */
do {
- n_flushed = page_cleaner_do_flush_batch(PCT_IO(100), LSN_MAX);
+ pc_request(ULINT_MAX, LSN_MAX);
+
+ while (pc_flush_slot() > 0) {}
+
+ ulint n_flushed_lru = 0;
+ ulint n_flushed_list = 0;
+ pc_wait_finished(&n_flushed_lru, &n_flushed_list);
+
+ n_flushed = n_flushed_lru + n_flushed_list;
/* We sleep only if there are no pages to flush */
if (n_flushed == 0) {
@@ -2414,15 +3400,25 @@ DECLARE_THREAD(buf_flush_page_cleaner_thread)(
bool success;
do {
+ pc_request(ULINT_MAX, LSN_MAX);
+
+ while (pc_flush_slot() > 0) {}
+
+ ulint n_flushed_lru = 0;
+ ulint n_flushed_list = 0;
+ success = pc_wait_finished(&n_flushed_lru, &n_flushed_list);
+
+ n_flushed = n_flushed_lru + n_flushed_list;
- success = buf_flush_list(PCT_IO(100), LSN_MAX, &n_flushed);
buf_flush_wait_batch_end(NULL, BUF_FLUSH_LIST);
+ buf_flush_wait_LRU_batch_end();
} while (!success || n_flushed > 0);
/* Some sanity checks */
ut_a(srv_get_active_thread_type() == SRV_NONE);
ut_a(srv_shutdown_state == SRV_SHUTDOWN_FLUSH_PHASE);
+
for (ulint i = 0; i < srv_buf_pool_instances; i++) {
buf_pool_t* buf_pool = buf_pool_from_array(i);
ut_a(UT_LIST_GET_LEN(buf_pool->flush_list) == 0);
@@ -2431,21 +3427,122 @@ DECLARE_THREAD(buf_flush_page_cleaner_thread)(
/* We have lived our life. Time to die. */
thread_exit:
+ /* All worker threads are waiting for the event here,
+ and no more access to page_cleaner structure by them.
+ Wakes worker threads up just to make them exit. */
+ page_cleaner.is_running = false;
+
+ /* waiting for all worker threads exit */
+ while (page_cleaner.n_workers) {
+ os_event_set(page_cleaner.is_requested);
+ os_thread_sleep(10000);
+ }
+
+ mutex_destroy(&page_cleaner.mutex);
+
+ os_event_destroy(page_cleaner.is_finished);
+ os_event_destroy(page_cleaner.is_requested);
+
buf_page_cleaner_is_active = false;
my_thread_end();
/* We count the number of threads in os_thread_exit(). A created
thread should always use that to exit and not use return() to exit. */
- os_thread_exit(NULL);
+ os_thread_exit();
OS_THREAD_DUMMY_RETURN;
}
+/******************************************************************//**
+Worker thread of page_cleaner.
+@return a dummy parameter */
+extern "C"
+os_thread_ret_t
+DECLARE_THREAD(buf_flush_page_cleaner_worker)(
+/*==========================================*/
+ void* arg MY_ATTRIBUTE((unused)))
+ /*!< in: a dummy parameter required by
+ os_thread_create */
+{
+ my_thread_init();
+
+ mutex_enter(&page_cleaner.mutex);
+ page_cleaner.n_workers++;
+ mutex_exit(&page_cleaner.mutex);
+
+#ifdef UNIV_LINUX
+ /* linux might be able to set different setting for each thread
+ worth to try to set high priority for page cleaner threads */
+ if (buf_flush_page_cleaner_set_priority(
+ buf_flush_page_cleaner_priority)) {
+
+ ib::info() << "page_cleaner worker priority: "
+ << buf_flush_page_cleaner_priority;
+ }
+#endif /* UNIV_LINUX */
+
+ while (true) {
+ os_event_wait(page_cleaner.is_requested);
+
+ ut_d(buf_flush_page_cleaner_disabled_loop());
+
+ if (!page_cleaner.is_running) {
+ break;
+ }
+
+ pc_flush_slot();
+ }
+
+ mutex_enter(&page_cleaner.mutex);
+ page_cleaner.n_workers--;
+ mutex_exit(&page_cleaner.mutex);
+
+ my_thread_end();
+
+ os_thread_exit();
+
+ OS_THREAD_DUMMY_RETURN;
+}
+
+/*******************************************************************//**
+Synchronously flush dirty blocks from the end of the flush list of all buffer
+pool instances.
+NOTE: The calling thread is not allowed to own any latches on pages! */
+void
+buf_flush_sync_all_buf_pools(void)
+/*==============================*/
+{
+ bool success;
+ do {
+ success = buf_flush_lists(ULINT_MAX, LSN_MAX, NULL);
+ buf_flush_wait_batch_end(NULL, BUF_FLUSH_LIST);
+ } while (!success);
+
+ ut_a(success);
+}
+
+/** Request IO burst and wake page_cleaner up.
+@param[in] lsn_limit upper limit of LSN to be flushed */
+void
+buf_flush_request_force(
+ lsn_t lsn_limit)
+{
+ /* adjust based on lsn_avg_rate not to get old */
+ lsn_t lsn_target = lsn_limit + lsn_avg_rate * 3;
+
+ mutex_enter(&page_cleaner.mutex);
+ if (lsn_target > buf_flush_sync_lsn) {
+ buf_flush_sync_lsn = lsn_target;
+ }
+ mutex_exit(&page_cleaner.mutex);
+
+ os_event_set(buf_flush_event);
+}
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
/** Functor to validate the flush list. */
struct Check {
- void operator()(const buf_page_t* elem)
+ void operator()(const buf_page_t* elem) const
{
ut_a(elem->in_flush_list);
}
@@ -2453,7 +3550,7 @@ struct Check {
/******************************************************************//**
Validates the flush list.
-@return TRUE if ok */
+@return TRUE if ok */
static
ibool
buf_flush_validate_low(
@@ -2465,14 +3562,14 @@ buf_flush_validate_low(
ut_ad(buf_flush_list_mutex_own(buf_pool));
- UT_LIST_VALIDATE(list, buf_page_t, buf_pool->flush_list, Check());
+ ut_list_validate(buf_pool->flush_list, Check());
bpage = UT_LIST_GET_FIRST(buf_pool->flush_list);
/* If we are in recovery mode i.e.: flush_rbt != NULL
then each block in the flush_list must also be present
in the flush_rbt. */
- if (UNIV_LIKELY_NULL(buf_pool->flush_rbt)) {
+ if (buf_pool->flush_rbt != NULL) {
rnode = rbt_first(buf_pool->flush_rbt);
}
@@ -2493,20 +3590,20 @@ buf_flush_validate_low(
|| buf_page_get_state(bpage) == BUF_BLOCK_REMOVE_HASH);
ut_a(om > 0);
- if (UNIV_LIKELY_NULL(buf_pool->flush_rbt)) {
- buf_page_t** prpage;
+ if (buf_pool->flush_rbt != NULL) {
+ buf_page_t** prpage;
- ut_a(rnode);
+ ut_a(rnode != NULL);
prpage = rbt_value(buf_page_t*, rnode);
- ut_a(*prpage);
+ ut_a(*prpage != NULL);
ut_a(*prpage == bpage);
rnode = rbt_next(buf_pool->flush_rbt, rnode);
}
bpage = UT_LIST_GET_NEXT(list, bpage);
- ut_a(!bpage || om >= bpage->oldest_modification);
+ ut_a(bpage == NULL || om >= bpage->oldest_modification);
}
/* By this time we must have exhausted the traversal of
@@ -2518,8 +3615,7 @@ buf_flush_validate_low(
/******************************************************************//**
Validates the flush list.
-@return TRUE if ok */
-UNIV_INTERN
+@return TRUE if ok */
ibool
buf_flush_validate(
/*===============*/
@@ -2537,20 +3633,17 @@ buf_flush_validate(
}
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
-#endif /* !UNIV_HOTBACKUP */
-
-#ifdef UNIV_DEBUG
/******************************************************************//**
Check if there are any dirty pages that belong to a space id in the flush
list in a particular buffer pool.
-@return number of dirty pages present in a single buffer pool */
-UNIV_INTERN
+@return number of dirty pages present in a single buffer pool */
ulint
buf_pool_get_dirty_pages_count(
/*===========================*/
buf_pool_t* buf_pool, /*!< in: buffer pool */
- ulint id) /*!< in: space id to check */
+ ulint id, /*!< in: space id to check */
+ FlushObserver* observer) /*!< in: flush observer to check */
{
ulint count = 0;
@@ -2568,7 +3661,10 @@ buf_pool_get_dirty_pages_count(
ut_ad(bpage->in_flush_list);
ut_ad(bpage->oldest_modification > 0);
- if (buf_page_get_space(bpage) == id) {
+ if ((observer != NULL
+ && observer == bpage->flush_observer)
+ || (observer == NULL
+ && id == bpage->id.space())) {
++count;
}
}
@@ -2581,13 +3677,13 @@ buf_pool_get_dirty_pages_count(
/******************************************************************//**
Check if there are any dirty pages that belong to a space id in the flush list.
-@return number of dirty pages present in all the buffer pools */
-UNIV_INTERN
+@return number of dirty pages present in all the buffer pools */
+static
ulint
buf_flush_get_dirty_pages_count(
/*============================*/
- ulint id) /*!< in: space id to check */
-
+ ulint id, /*!< in: space id to check */
+ FlushObserver* observer) /*!< in: flush observer to check */
{
ulint count = 0;
@@ -2596,9 +3692,110 @@ buf_flush_get_dirty_pages_count(
buf_pool = buf_pool_from_array(i);
- count += buf_pool_get_dirty_pages_count(buf_pool, id);
+ count += buf_pool_get_dirty_pages_count(buf_pool, id, observer);
}
return(count);
}
-#endif /* UNIV_DEBUG */
+
+/** FlushObserver constructor
+@param[in] space_id table space id
+@param[in] trx trx instance
+@param[in] stage performance schema accounting object,
+used by ALTER TABLE. It is passed to log_preflush_pool_modified_pages()
+for accounting. */
+FlushObserver::FlushObserver(
+ ulint space_id,
+ trx_t* trx,
+ ut_stage_alter_t* stage)
+ :
+ m_space_id(space_id),
+ m_trx(trx),
+ m_stage(stage),
+ m_interrupted(false)
+{
+ m_flushed = UT_NEW_NOKEY(std::vector<ulint>(srv_buf_pool_instances));
+ m_removed = UT_NEW_NOKEY(std::vector<ulint>(srv_buf_pool_instances));
+
+ for (ulint i = 0; i < srv_buf_pool_instances; i++) {
+ m_flushed->at(i) = 0;
+ m_removed->at(i) = 0;
+ }
+
+ DBUG_LOG("flush", "FlushObserver(): trx->id=" << m_trx->id);
+}
+
+/** FlushObserver deconstructor */
+FlushObserver::~FlushObserver()
+{
+ ut_ad(buf_flush_get_dirty_pages_count(m_space_id, this) == 0);
+
+ UT_DELETE(m_flushed);
+ UT_DELETE(m_removed);
+
+ DBUG_LOG("flush", "~FlushObserver(): trx->id=" << m_trx->id);
+}
+
+/** Check whether the operation has been interrupted */
+void FlushObserver::check_interrupted()
+{
+ if (trx_is_interrupted(m_trx)) {
+ interrupted();
+ }
+}
+
+/** Notify observer of a flush
+@param[in] buf_pool buffer pool instance
+@param[in] bpage buffer page to flush */
+void
+FlushObserver::notify_flush(
+ buf_pool_t* buf_pool,
+ buf_page_t* bpage)
+{
+ ut_ad(buf_pool_mutex_own(buf_pool));
+
+ m_flushed->at(buf_pool->instance_no)++;
+
+ if (m_stage != NULL) {
+ m_stage->inc();
+ }
+
+ DBUG_LOG("flush", "Flush " << bpage->id);
+}
+
+/** Notify observer of a remove
+@param[in] buf_pool buffer pool instance
+@param[in] bpage buffer page flushed */
+void
+FlushObserver::notify_remove(
+ buf_pool_t* buf_pool,
+ buf_page_t* bpage)
+{
+ ut_ad(buf_pool_mutex_own(buf_pool));
+
+ m_removed->at(buf_pool->instance_no)++;
+
+ DBUG_LOG("flush", "Remove " << bpage->id);
+}
+
+/** Flush dirty pages and wait. */
+void
+FlushObserver::flush()
+{
+ ut_ad(m_trx);
+
+ if (!m_interrupted && m_stage) {
+ m_stage->begin_phase_flush(buf_flush_get_dirty_pages_count(
+ m_space_id, this));
+ }
+
+ buf_LRU_flush_or_remove_pages(m_space_id, this);
+
+ /* Wait for all dirty pages were flushed. */
+ for (ulint i = 0; i < srv_buf_pool_instances; i++) {
+ while (!is_complete(i)) {
+
+ os_thread_sleep(2000);
+ }
+ }
+}
diff --git a/storage/innobase/buf/buf0lru.cc b/storage/innobase/buf/buf0lru.cc
index 29fe81dc920..9feebd9a08e 100644
--- a/storage/innobase/buf/buf0lru.cc
+++ b/storage/innobase/buf/buf0lru.cc
@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2017, 2019, MariaDB Corporation.
+Copyright (c) 2017, 2020, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -25,19 +25,11 @@ Created 11/5/1995 Heikki Tuuri
*******************************************************/
#include "buf0lru.h"
-
-#ifndef UNIV_HOTBACKUP
-#ifdef UNIV_NONINL
-#include "buf0lru.ic"
-#endif
-
#include "ut0byte.h"
-#include "ut0lst.h"
#include "ut0rnd.h"
-#include "sync0sync.h"
#include "sync0rw.h"
#include "hash0hash.h"
-#include "os0sync.h"
+#include "os0event.h"
#include "fil0fil.h"
#include "btr0btr.h"
#include "buf0buddy.h"
@@ -52,9 +44,6 @@ Created 11/5/1995 Heikki Tuuri
#include "log0recv.h"
#include "srv0srv.h"
#include "srv0mon.h"
-#include "lock0lock.h"
-
-#include "ha_prototypes.h"
/** The number of blocks from the LRU_old pointer onward, including
the block pointed to, must be buf_pool->LRU_old_ratio/BUF_LRU_OLD_RATIO_DIV
@@ -63,7 +52,7 @@ is allowed. Note that the tolerance must be small enough such that for
even the BUF_LRU_OLD_MIN_LEN long LRU list, the LRU_old pointer is not
allowed to point to either end of the LRU list. */
-#define BUF_LRU_OLD_TOLERANCE 20
+static const ulint BUF_LRU_OLD_TOLERANCE = 20;
/** The minimum amount of non-old blocks when the LRU_old list exists
(that is, when there are more than BUF_LRU_OLD_MIN_LEN blocks).
@@ -73,6 +62,7 @@ allowed to point to either end of the LRU list. */
# error "BUF_LRU_NON_OLD_MIN_LEN >= BUF_LRU_OLD_MIN_LEN"
#endif
+#ifdef BTR_CUR_HASH_ADAPT
/** When dropping the search hash index entries before deleting an ibd
file, we build a local array of pages belonging to that tablespace
in the buffer pool. Following is the size of that array.
@@ -80,15 +70,16 @@ We also release buf_pool->mutex after scanning this many pages of the
flush_list when dropping a table. This is to ensure that other threads
are not blocked for extended period of time when using very large
buffer pools. */
-#define BUF_LRU_DROP_SEARCH_SIZE 1024
+static const ulint BUF_LRU_DROP_SEARCH_SIZE = 1024;
+#endif /* BTR_CUR_HASH_ADAPT */
/** We scan these many blocks when looking for a clean page to evict
during LRU eviction. */
-#define BUF_LRU_SEARCH_SCAN_THRESHOLD 100
+static const ulint BUF_LRU_SEARCH_SCAN_THRESHOLD = 100;
/** If we switch on the InnoDB monitor because there are too few available
frames in the buffer pool, we set this to TRUE */
-static ibool buf_lru_switched_on_innodb_mon = FALSE;
+static bool buf_lru_switched_on_innodb_mon = false;
/** True if diagnostic message about difficult to find free blocks
in the buffer bool has already printed. */
@@ -108,11 +99,11 @@ uncompressed and compressed data), which must be clean. */
/** Number of intervals for which we keep the history of these stats.
Each interval is 1 second, defined by the rate at which
srv_error_monitor_thread() calls buf_LRU_stat_update(). */
-#define BUF_LRU_STAT_N_INTERVAL 50
+static const ulint BUF_LRU_STAT_N_INTERVAL = 50;
/** Co-efficient with which we multiply I/O operations to equate them
with page_zip_decompress() operations. */
-#define BUF_LRU_IO_TO_UNZIP_FACTOR 50
+static const ulint BUF_LRU_IO_TO_UNZIP_FACTOR = 50;
/** Sampled values buf_LRU_stat_cur.
Not protected by any mutex. Updated by buf_LRU_stat_update(). */
@@ -123,18 +114,18 @@ static ulint buf_LRU_stat_arr_ind;
/** Current operation counters. Not protected by any mutex. Cleared
by buf_LRU_stat_update(). */
-UNIV_INTERN buf_LRU_stat_t buf_LRU_stat_cur;
+buf_LRU_stat_t buf_LRU_stat_cur;
/** Running sum of past values of buf_LRU_stat_cur.
Updated by buf_LRU_stat_update(). Not Protected by any mutex. */
-UNIV_INTERN buf_LRU_stat_t buf_LRU_stat_sum;
+buf_LRU_stat_t buf_LRU_stat_sum;
/* @} */
/** @name Heuristics for detecting index scan @{ */
/** Move blocks to "new" LRU list only if the first access was at
least this many milliseconds ago. Not protected by any mutex or latch. */
-UNIV_INTERN uint buf_LRU_old_threshold_ms;
+uint buf_LRU_old_threshold_ms;
/* @} */
/******************************************************************//**
@@ -151,7 +142,7 @@ If a compressed page is freed other compressed pages may be relocated.
caller needs to free the page to the free list
@retval false if BUF_BLOCK_ZIP_PAGE was removed from page_hash. In
this case the block is already returned to the buddy allocator. */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
+static MY_ATTRIBUTE((warn_unused_result))
bool
buf_LRU_block_remove_hashed(
/*========================*/
@@ -170,8 +161,7 @@ buf_LRU_block_free_hashed_page(
be in a state where it can be freed */
/******************************************************************//**
-Increases LRU size in bytes with zip_size for compressed page,
-UNIV_PAGE_SIZE for uncompressed page in inline function */
+Increases LRU size in bytes with page size inline function */
static inline
void
incr_LRU_size_in_bytes(
@@ -180,24 +170,21 @@ incr_LRU_size_in_bytes(
buf_pool_t* buf_pool) /*!< in: buffer pool instance */
{
ut_ad(buf_pool_mutex_own(buf_pool));
- ulint zip_size = page_zip_get_size(&bpage->zip);
- buf_pool->stat.LRU_bytes += zip_size ? zip_size : UNIV_PAGE_SIZE;
+
+ buf_pool->stat.LRU_bytes += bpage->size.physical();
+
ut_ad(buf_pool->stat.LRU_bytes <= buf_pool->curr_pool_size);
}
/******************************************************************//**
Determines if the unzip_LRU list should be used for evicting a victim
instead of the general LRU list.
-@return TRUE if should use unzip_LRU */
-UNIV_INTERN
+@return TRUE if should use unzip_LRU */
ibool
buf_LRU_evict_from_unzip_LRU(
/*=========================*/
buf_pool_t* buf_pool)
{
- ulint io_avg;
- ulint unzip_avg;
-
ut_ad(buf_pool_mutex_own(buf_pool));
/* If the unzip_LRU list is empty, we can only use the LRU. */
@@ -221,9 +208,10 @@ buf_LRU_evict_from_unzip_LRU(
/* Calculate the average over past intervals, and add the values
of the current interval. */
- io_avg = buf_LRU_stat_sum.io / BUF_LRU_STAT_N_INTERVAL
+ ulint io_avg = buf_LRU_stat_sum.io / BUF_LRU_STAT_N_INTERVAL
+ buf_LRU_stat_cur.io;
- unzip_avg = buf_LRU_stat_sum.unzip / BUF_LRU_STAT_N_INTERVAL
+
+ ulint unzip_avg = buf_LRU_stat_sum.unzip / BUF_LRU_STAT_N_INTERVAL
+ buf_LRU_stat_cur.unzip;
/* Decide based on our formula. If the load is I/O bound
@@ -233,24 +221,25 @@ buf_LRU_evict_from_unzip_LRU(
return(unzip_avg <= io_avg * BUF_LRU_IO_TO_UNZIP_FACTOR);
}
-/******************************************************************//**
-Attempts to drop page hash index on a batch of pages belonging to a
-particular space id. */
+#ifdef BTR_CUR_HASH_ADAPT
+/** Attempts to drop page hash index on a batch of pages belonging to a
+particular space id.
+@param[in] space_id space id
+@param[in] arr array of page_no
+@param[in] count number of entries in array */
static
void
-buf_LRU_drop_page_hash_batch(
-/*=========================*/
- ulint space_id, /*!< in: space id */
- const ulint* arr, /*!< in: array of page_no */
- ulint count) /*!< in: number of entries in array */
+buf_LRU_drop_page_hash_batch(ulint space_id, const ulint* arr, ulint count)
{
- ulint i;
-
- ut_ad(arr != NULL);
ut_ad(count <= BUF_LRU_DROP_SEARCH_SIZE);
- for (i = 0; i < count; ++i) {
- btr_search_drop_page_hash_when_freed(space_id, arr[i]);
+ for (const ulint* const end = arr + count; arr != end; ) {
+ /* While our only caller
+ buf_LRU_drop_page_hash_for_tablespace()
+ is being executed for DROP TABLE or similar,
+ the table cannot be evicted from the buffer pool. */
+ btr_search_drop_page_hash_when_freed(
+ page_id_t(space_id, *arr++));
}
}
@@ -266,29 +255,24 @@ buf_LRU_drop_page_hash_for_tablespace(
buf_pool_t* buf_pool, /*!< in: buffer pool instance */
ulint id) /*!< in: space id */
{
- buf_page_t* bpage;
- ulint* page_arr;
- ulint num_entries;
+ ulint* page_arr = static_cast<ulint*>(ut_malloc_nokey(
+ sizeof(ulint) * BUF_LRU_DROP_SEARCH_SIZE));
- page_arr = static_cast<ulint*>(ut_malloc(
- sizeof(ulint) * BUF_LRU_DROP_SEARCH_SIZE));
+ ulint num_entries = 0;
buf_pool_mutex_enter(buf_pool);
- num_entries = 0;
scan_again:
- bpage = UT_LIST_GET_LAST(buf_pool->LRU);
-
- while (bpage != NULL) {
- buf_page_t* prev_bpage;
- ibool is_fixed;
+ for (buf_page_t* bpage = UT_LIST_GET_LAST(buf_pool->LRU);
+ bpage != NULL;
+ /* No op */) {
- prev_bpage = UT_LIST_GET_PREV(LRU, bpage);
+ buf_page_t* prev_bpage = UT_LIST_GET_PREV(LRU, bpage);
ut_a(buf_page_in_file(bpage));
if (buf_page_get_state(bpage) != BUF_BLOCK_FILE_PAGE
- || bpage->space != id
+ || bpage->id.space() != id
|| bpage->io_fix != BUF_IO_NONE) {
/* Compressed pages are never hashed.
Skip blocks of other tablespaces.
@@ -298,18 +282,34 @@ next_page:
continue;
}
- mutex_enter(&((buf_block_t*) bpage)->mutex);
- is_fixed = bpage->buf_fix_count > 0
- || !((buf_block_t*) bpage)->index;
- mutex_exit(&((buf_block_t*) bpage)->mutex);
+ buf_block_t* block = reinterpret_cast<buf_block_t*>(bpage);
- if (is_fixed) {
+ mutex_enter(&block->mutex);
+
+ /* This debug check uses a dirty read that could
+ theoretically cause false positives while
+ buf_pool_clear_hash_index() is executing.
+ (Other conflicting access paths to the adaptive hash
+ index should not be possible, because when a
+ tablespace is being discarded or dropped, there must
+ be no concurrect access to the contained tables.) */
+ assert_block_ahi_valid(block);
+
+ bool skip = bpage->buf_fix_count > 0 || !block->index;
+
+ mutex_exit(&block->mutex);
+
+ if (skip) {
+ /* Skip this block, because there are
+ no adaptive hash index entries
+ pointing to it, or because we cannot
+ drop them due to the buffer-fix. */
goto next_page;
}
/* Store the page number so that we can drop the hash
index in a batch later. */
- page_arr[num_entries] = bpage->offset;
+ page_arr[num_entries] = bpage->id.page_no();
ut_a(num_entries < BUF_LRU_DROP_SEARCH_SIZE);
++num_entries;
@@ -343,8 +343,9 @@ next_page:
/* If, however, bpage has been removed from LRU list
to the free list then we should restart the scan.
bpage->state is protected by buf_pool mutex. */
- if (bpage
+ if (bpage != NULL
&& buf_page_get_state(bpage) != BUF_BLOCK_FILE_PAGE) {
+
goto scan_again;
}
}
@@ -359,13 +360,13 @@ next_page:
/** Try to drop the adaptive hash index for a tablespace.
@param[in,out] table table
@return whether anything was dropped */
-UNIV_INTERN bool buf_LRU_drop_page_hash_for_tablespace(dict_table_t* table)
+bool buf_LRU_drop_page_hash_for_tablespace(dict_table_t* table)
{
for (dict_index_t* index = dict_table_get_first_index(table);
index != NULL;
index = dict_table_get_next_index(index)) {
- if (btr_search_info_get_ref_count(
- btr_search_get_info(index))) {
+ if (btr_search_info_get_ref_count(btr_search_get_info(index),
+ index)) {
goto drop_ahi;
}
}
@@ -387,14 +388,14 @@ want to hog the CPU and resources. Release the buffer pool and block
mutex and try to force a context switch. Then reacquire the same mutexes.
The current page is "fixed" before the release of the mutexes and then
"unfixed" again once we have reacquired the mutexes. */
-static MY_ATTRIBUTE((nonnull))
+static
void
buf_flush_yield(
/*============*/
buf_pool_t* buf_pool, /*!< in/out: buffer pool instance */
buf_page_t* bpage) /*!< in/out: current page */
{
- ib_mutex_t* block_mutex;
+ BPageMutex* block_mutex;
ut_ad(buf_pool_mutex_own(buf_pool));
ut_ad(buf_page_in_file(bpage));
@@ -402,6 +403,7 @@ buf_flush_yield(
block_mutex = buf_page_get_mutex(bpage);
mutex_enter(block_mutex);
+
/* "Fix" the block so that the position cannot be
changed after we release the buffer pool and
block mutexes. */
@@ -417,6 +419,7 @@ buf_flush_yield(
buf_pool_mutex_enter(buf_pool);
mutex_enter(block_mutex);
+
/* "Unfix" the block now that we have both the
buffer pool and block mutex again. */
buf_page_unset_sticky(bpage);
@@ -428,7 +431,7 @@ If we have hogged the resources for too long then release the buffer
pool and flush list mutex and do a thread yield. Set the current page
to "sticky" so that it is not relocated during the yield.
@return true if yielded */
-static MY_ATTRIBUTE((nonnull(1), warn_unused_result))
+static MY_ATTRIBUTE((warn_unused_result))
bool
buf_flush_try_yield(
/*================*/
@@ -466,12 +469,13 @@ buf_flush_try_yield(
return(false);
}
+#endif /* BTR_CUR_HASH_ADAPT */
/******************************************************************//**
Removes a single page from a given tablespace inside a specific
buffer pool instance.
@return true if page was removed. */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
+static MY_ATTRIBUTE((warn_unused_result))
bool
buf_flush_or_remove_page(
/*=====================*/
@@ -494,8 +498,14 @@ buf_flush_or_remove_page(
yet; maybe the system is currently reading it
in, or flushing the modifications to the file */
return(false);
+
}
+ BPageMutex* block_mutex;
+ bool processed = false;
+
+ block_mutex = buf_page_get_mutex(bpage);
+
/* We have to release the flush_list_mutex to obey the
latching order. We are however guaranteed that the page
will stay in the flush_list and won't be relocated because
@@ -504,9 +514,6 @@ buf_flush_or_remove_page(
buf_flush_list_mutex_exit(buf_pool);
- bool processed;
- ib_mutex_t* block_mutex = buf_page_get_mutex(bpage);
-
mutex_enter(block_mutex);
ut_ad(bpage->oldest_modification != 0);
@@ -514,28 +521,30 @@ buf_flush_or_remove_page(
if (!flush) {
buf_flush_remove(bpage);
- processed = true;
-
- } else if (buf_flush_ready_for_flush(bpage, BUF_FLUSH_SINGLE_PAGE)
- && buf_flush_page(
- buf_pool, bpage, BUF_FLUSH_SINGLE_PAGE, false)) {
- /* Wake possible simulated aio thread to actually
- post the writes to the operating system */
- os_aio_simulated_wake_handler_threads();
+ mutex_exit(block_mutex);
- buf_pool_mutex_enter(buf_pool);
+ processed = true;
- buf_flush_list_mutex_enter(buf_pool);
+ } else if (buf_flush_ready_for_flush(bpage, BUF_FLUSH_SINGLE_PAGE)) {
- return(true);
+ /* The following call will release the buffer pool
+ and block mutex. */
+ processed = buf_flush_page(
+ buf_pool, bpage, BUF_FLUSH_SINGLE_PAGE, false);
+ if (processed) {
+ /* Wake possible simulated aio thread to actually
+ post the writes to the operating system */
+ os_aio_simulated_wake_handler_threads();
+ buf_pool_mutex_enter(buf_pool);
+ } else {
+ mutex_exit(block_mutex);
+ }
} else {
- processed = false;
+ mutex_exit(block_mutex);
}
- mutex_exit(block_mutex);
-
buf_flush_list_mutex_enter(buf_pool);
ut_ad(!mutex_own(block_mutex));
@@ -550,14 +559,17 @@ tablespace. The pages still remain a part of LRU and are evicted from
the list as they age towards the tail of the LRU.
@param[in,out] buf_pool buffer pool
@param[in] id tablespace identifier
-@param[in] trx transaction (to check for interrupt),
+@param[in] observer flush observer (to check for interrupt),
or NULL if the files should not be written to
-@retval DB_SUCCESS if all freed
-@retval DB_FAIL if not all freed
-@retval DB_INTERRUPTED if the transaction was interrupted */
-static MY_ATTRIBUTE((nonnull(1), warn_unused_result))
-dberr_t
-buf_flush_or_remove_pages(buf_pool_t* buf_pool, ulint id, const trx_t* trx)
+@param[in] first first page to be flushed or evicted
+@return whether all matching dirty pages were removed */
+static MY_ATTRIBUTE((warn_unused_result))
+bool
+buf_flush_or_remove_pages(
+ buf_pool_t* buf_pool,
+ ulint id,
+ FlushObserver* observer,
+ ulint first)
{
buf_page_t* prev;
buf_page_t* bpage;
@@ -579,12 +591,29 @@ rescan:
prev = UT_LIST_GET_PREV(list, bpage);
- if (buf_page_get_space(bpage) != id) {
-
- /* Skip this block, as it does not belong to
- the target space. */
-
- } else if (!buf_flush_or_remove_page(buf_pool, bpage, trx)) {
+ /* Flush the pages matching space id,
+ or pages matching the flush observer. */
+ if (observer && observer->is_partial_flush()) {
+ if (observer != bpage->flush_observer) {
+ /* Skip this block. */
+ } else if (!buf_flush_or_remove_page(
+ buf_pool, bpage,
+ !observer->is_interrupted())) {
+ all_freed = false;
+ } else if (!observer->is_interrupted()) {
+ /* The processing was successful. And during the
+ processing we have released the buf_pool mutex
+ when calling buf_page_flush(). We cannot trust
+ prev pointer. */
+ goto rescan;
+ }
+ } else if (id != bpage->id.space()) {
+ /* Skip this block, because it is for a
+ different tablespace. */
+ } else if (bpage->id.page_no() < first) {
+ /* Skip this block, because it is below the limit. */
+ } else if (!buf_flush_or_remove_page(
+ buf_pool, bpage, observer != NULL)) {
/* Remove was unsuccessful, we have to try again
by scanning the entire list from the end.
@@ -607,7 +636,7 @@ rescan:
iteration. */
all_freed = false;
- } else if (trx) {
+ } else if (observer) {
/* The processing was successful. And during the
processing we have released the buf_pool mutex
@@ -616,6 +645,7 @@ rescan:
goto rescan;
}
+#ifdef BTR_CUR_HASH_ADAPT
++processed;
/* Yield if we have hogged the CPU and mutexes for too long. */
@@ -625,24 +655,18 @@ rescan:
processed = 0;
}
+#endif /* BTR_CUR_HASH_ADAPT */
- if (trx) {
- DBUG_EXECUTE_IF("ib_export_flush_crash",
- static ulint n_pages;
- if (++n_pages == 4) {DBUG_SUICIDE();});
-
- /* The check for trx is interrupted is
- expensive, we want to check every N iterations. */
- if (!processed && trx_is_interrupted(trx)) {
- buf_flush_list_mutex_exit(buf_pool);
- return(DB_INTERRUPTED);
- }
+ /* The check for trx is interrupted is expensive, we want
+ to check every N iterations. */
+ if (!processed && observer) {
+ observer->check_interrupted();
}
}
buf_flush_list_mutex_exit(buf_pool);
- return(all_freed ? DB_SUCCESS : DB_FAIL);
+ return(all_freed);
}
/** Remove or flush all the dirty pages that belong to a given tablespace
@@ -651,50 +675,57 @@ list and will be evicted from the LRU list as they age and move towards
the tail of the LRU list.
@param[in,out] buf_pool buffer pool
@param[in] id tablespace identifier
-@param[in] trx transaction (to check for interrupt),
+@param[in] observer flush observer,
or NULL if the files should not be written to
-*/
-static MY_ATTRIBUTE((nonnull(1)))
+@param[in] first first page to be flushed or evicted */
+static
void
-buf_flush_dirty_pages(buf_pool_t* buf_pool, ulint id, const trx_t* trx)
+buf_flush_dirty_pages(
+ buf_pool_t* buf_pool,
+ ulint id,
+ FlushObserver* observer,
+ ulint first)
{
- dberr_t err;
-
- do {
+ for (;;) {
buf_pool_mutex_enter(buf_pool);
- err = buf_flush_or_remove_pages(buf_pool, id, trx);
+ bool freed = buf_flush_or_remove_pages(buf_pool, id, observer,
+ first);
buf_pool_mutex_exit(buf_pool);
ut_ad(buf_flush_validate(buf_pool));
- if (err == DB_FAIL) {
- os_thread_sleep(2000);
+ if (freed) {
+ break;
}
- /* DB_FAIL is a soft error, it means that the task wasn't
- completed, needs to be retried. */
-
+ os_thread_sleep(2000);
ut_ad(buf_flush_validate(buf_pool));
+ }
- } while (err == DB_FAIL);
-
- ut_ad(err == DB_INTERRUPTED
- || buf_pool_get_dirty_pages_count(buf_pool, id) == 0);
+ ut_ad((observer && observer->is_interrupted())
+ || first
+ || buf_pool_get_dirty_pages_count(buf_pool, id, observer) == 0);
}
/** Empty the flush list for all pages belonging to a tablespace.
@param[in] id tablespace identifier
-@param[in] trx transaction, for checking for user interrupt;
- or NULL if nothing is to be written */
-UNIV_INTERN void buf_LRU_flush_or_remove_pages(ulint id, const trx_t* trx)
+@param[in] observer flush observer,
+ or NULL if nothing is to be written
+@param[in] first first page to be flushed or evicted */
+void buf_LRU_flush_or_remove_pages(ulint id, FlushObserver* observer,
+ ulint first)
{
+ /* Pages in the system tablespace must never be discarded. */
+ ut_ad(id || observer);
+
for (ulint i = 0; i < srv_buf_pool_instances; i++) {
- buf_flush_dirty_pages(buf_pool_from_array(i), id, trx);
+ buf_flush_dirty_pages(buf_pool_from_array(i), id, observer,
+ first);
}
- if (trx && !trx_is_interrupted(trx)) {
+ if (observer && !observer->is_interrupted()) {
/* Ensure that all asynchronous IO is completed. */
os_aio_wait_until_no_pending_writes();
fil_flush(id);
@@ -704,13 +735,11 @@ UNIV_INTERN void buf_LRU_flush_or_remove_pages(ulint id, const trx_t* trx)
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
/********************************************************************//**
Insert a compressed block into buf_pool->zip_clean in the LRU order. */
-UNIV_INTERN
void
buf_LRU_insert_zip_clean(
/*=====================*/
buf_page_t* bpage) /*!< in: pointer to the block in question */
{
- buf_page_t* b;
buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
ut_ad(buf_pool_mutex_own(buf_pool));
@@ -718,20 +747,21 @@ buf_LRU_insert_zip_clean(
/* Find the first successor of bpage in the LRU list
that is in the zip_clean list. */
- b = bpage;
+ buf_page_t* b = bpage;
+
do {
b = UT_LIST_GET_NEXT(LRU, b);
} while (b && buf_page_get_state(b) != BUF_BLOCK_ZIP_PAGE);
/* Insert bpage before b, i.e., after the predecessor of b. */
- if (b) {
+ if (b != NULL) {
b = UT_LIST_GET_PREV(list, b);
}
- if (b) {
- UT_LIST_INSERT_AFTER(list, buf_pool->zip_clean, b, bpage);
+ if (b != NULL) {
+ UT_LIST_INSERT_AFTER(buf_pool->zip_clean, b, bpage);
} else {
- UT_LIST_ADD_FIRST(list, buf_pool->zip_clean, bpage);
+ UT_LIST_ADD_FIRST(buf_pool->zip_clean, bpage);
}
}
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
@@ -739,34 +769,34 @@ buf_LRU_insert_zip_clean(
/******************************************************************//**
Try to free an uncompressed page of a compressed block from the unzip
LRU list. The compressed page is preserved, and it need not be clean.
-@return TRUE if freed */
-UNIV_INLINE
-ibool
+@return true if freed */
+static
+bool
buf_LRU_free_from_unzip_LRU_list(
/*=============================*/
buf_pool_t* buf_pool, /*!< in: buffer pool instance */
- ibool scan_all) /*!< in: scan whole LRU list
- if TRUE, otherwise scan only
+ bool scan_all) /*!< in: scan whole LRU list
+ if true, otherwise scan only
srv_LRU_scan_depth / 2 blocks. */
{
- buf_block_t* block;
- ibool freed;
- ulint scanned;
-
ut_ad(buf_pool_mutex_own(buf_pool));
if (!buf_LRU_evict_from_unzip_LRU(buf_pool)) {
- return(FALSE);
+ return(false);
}
- for (block = UT_LIST_GET_LAST(buf_pool->unzip_LRU),
- scanned = 0, freed = FALSE;
- block != NULL && !freed
+ ulint scanned = 0;
+ bool freed = false;
+
+ for (buf_block_t* block = UT_LIST_GET_LAST(buf_pool->unzip_LRU);
+ block != NULL
+ && !freed
&& (scan_all || scanned < srv_LRU_scan_depth);
++scanned) {
- buf_block_t* prev_block = UT_LIST_GET_PREV(unzip_LRU,
- block);
+ buf_block_t* prev_block;
+
+ prev_block = UT_LIST_GET_PREV(unzip_LRU, block);
ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
ut_ad(block->in_unzip_LRU_list);
@@ -784,43 +814,44 @@ buf_LRU_free_from_unzip_LRU_list(
MONITOR_LRU_UNZIP_SEARCH_SCANNED_PER_CALL,
scanned);
}
+
return(freed);
}
/******************************************************************//**
Try to free a clean page from the common LRU list.
-@return TRUE if freed */
-UNIV_INLINE
-ibool
+@return true if freed */
+static
+bool
buf_LRU_free_from_common_LRU_list(
/*==============================*/
buf_pool_t* buf_pool, /*!< in: buffer pool instance */
- ibool scan_all) /*!< in: scan whole LRU list
- if TRUE, otherwise scan only
- srv_LRU_scan_depth / 2 blocks. */
+ bool scan_all) /*!< in: scan whole LRU list
+ if true, otherwise scan only
+ up to BUF_LRU_SEARCH_SCAN_THRESHOLD */
{
- buf_page_t* bpage;
- ibool freed;
- ulint scanned;
-
ut_ad(buf_pool_mutex_own(buf_pool));
- for (bpage = buf_pool->lru_scan_itr.start(),
- scanned = 0, freed = false;
- bpage != NULL && !freed
+ ulint scanned = 0;
+ bool freed = false;
+
+ for (buf_page_t* bpage = buf_pool->lru_scan_itr.start();
+ bpage != NULL
+ && !freed
&& (scan_all || scanned < BUF_LRU_SEARCH_SCAN_THRESHOLD);
++scanned, bpage = buf_pool->lru_scan_itr.get()) {
- buf_page_t* prev = UT_LIST_GET_PREV(LRU, bpage);
+ buf_page_t* prev = UT_LIST_GET_PREV(LRU, bpage);
+ BPageMutex* mutex = buf_page_get_mutex(bpage);
+
buf_pool->lru_scan_itr.set(prev);
- ib_mutex_t* mutex = buf_page_get_mutex(bpage);
mutex_enter(mutex);
ut_ad(buf_page_in_file(bpage));
ut_ad(bpage->in_LRU_list);
- unsigned accessed = buf_page_is_accessed(bpage);
+ unsigned accessed = buf_page_is_accessed(bpage);
if (buf_flush_ready_for_replace(bpage)) {
mutex_exit(mutex);
@@ -853,47 +884,45 @@ buf_LRU_free_from_common_LRU_list(
/******************************************************************//**
Try to free a replaceable block.
-@return TRUE if found and freed */
-UNIV_INTERN
-ibool
+@return true if found and freed */
+bool
buf_LRU_scan_and_free_block(
/*========================*/
buf_pool_t* buf_pool, /*!< in: buffer pool instance */
- ibool scan_all) /*!< in: scan whole LRU list
- if TRUE, otherwise scan only
- 'old' blocks. */
+ bool scan_all) /*!< in: scan whole LRU list
+ if true, otherwise scan only
+ BUF_LRU_SEARCH_SCAN_THRESHOLD
+ blocks. */
{
ut_ad(buf_pool_mutex_own(buf_pool));
return(buf_LRU_free_from_unzip_LRU_list(buf_pool, scan_all)
- || buf_LRU_free_from_common_LRU_list(
- buf_pool, scan_all));
+ || buf_LRU_free_from_common_LRU_list(buf_pool, scan_all));
}
/******************************************************************//**
Returns TRUE if less than 25 % of the buffer pool in any instance is
available. This can be used in heuristics to prevent huge transactions
eating up the whole buffer pool for their locks.
-@return TRUE if less than 25 % of buffer pool left */
-UNIV_INTERN
+@return TRUE if less than 25 % of buffer pool left */
ibool
buf_LRU_buf_pool_running_out(void)
/*==============================*/
{
- ulint i;
ibool ret = FALSE;
- for (i = 0; i < srv_buf_pool_instances && !ret; i++) {
+ for (ulint i = 0; i < srv_buf_pool_instances && !ret; i++) {
buf_pool_t* buf_pool;
buf_pool = buf_pool_from_array(i);
buf_pool_mutex_enter(buf_pool);
- if (!recv_recovery_on
+ if (!recv_recovery_is_on()
&& UT_LIST_GET_LEN(buf_pool->free)
+ UT_LIST_GET_LEN(buf_pool->LRU)
- < buf_pool->curr_size / 4) {
+ < ut_min(buf_pool->curr_size,
+ buf_pool->old_size) / 4) {
ret = TRUE;
}
@@ -907,8 +936,7 @@ buf_LRU_buf_pool_running_out(void)
/******************************************************************//**
Returns a free block from the buf_pool. The block is taken off the
free list. If it is empty, returns NULL.
-@return a free control block, or NULL if the buf_block->free list is empty */
-UNIV_INTERN
+@return a free control block, or NULL if the buf_block->free list is empty */
buf_block_t*
buf_LRU_get_free_only(
/*==================*/
@@ -918,25 +946,45 @@ buf_LRU_get_free_only(
ut_ad(buf_pool_mutex_own(buf_pool));
- block = (buf_block_t*) UT_LIST_GET_FIRST(buf_pool->free);
+ block = reinterpret_cast<buf_block_t*>(
+ UT_LIST_GET_FIRST(buf_pool->free));
- if (block) {
+ while (block != NULL) {
ut_ad(block->page.in_free_list);
ut_d(block->page.in_free_list = FALSE);
ut_ad(!block->page.in_flush_list);
ut_ad(!block->page.in_LRU_list);
ut_a(!buf_page_in_file(&block->page));
- UT_LIST_REMOVE(list, buf_pool->free, (&block->page));
+ UT_LIST_REMOVE(buf_pool->free, &block->page);
- mutex_enter(&block->mutex);
+ if (buf_pool->curr_size >= buf_pool->old_size
+ || UT_LIST_GET_LEN(buf_pool->withdraw)
+ >= buf_pool->withdraw_target
+ || !buf_block_will_withdrawn(buf_pool, block)) {
+ /* found valid free block */
+ buf_page_mutex_enter(block);
+ /* No adaptive hash index entries may point to
+ a free block. */
+ assert_block_ahi_empty(block);
- buf_block_set_state(block, BUF_BLOCK_READY_FOR_USE);
- UNIV_MEM_ALLOC(block->frame, UNIV_PAGE_SIZE);
+ buf_block_set_state(block, BUF_BLOCK_READY_FOR_USE);
+ UNIV_MEM_ALLOC(block->frame, UNIV_PAGE_SIZE);
- ut_ad(buf_pool_from_block(block) == buf_pool);
+ ut_ad(buf_pool_from_block(block) == buf_pool);
- mutex_exit(&block->mutex);
+ buf_page_mutex_exit(block);
+ break;
+ }
+
+ /* This should be withdrawn */
+ UT_LIST_ADD_LAST(
+ buf_pool->withdraw,
+ &block->page);
+ ut_d(block->in_withdraw_list = TRUE);
+
+ block = reinterpret_cast<buf_block_t*>(
+ UT_LIST_GET_FIRST(buf_pool->free));
}
return(block);
@@ -955,28 +1003,23 @@ buf_LRU_check_size_of_non_data_objects(
{
ut_ad(buf_pool_mutex_own(buf_pool));
- if (!recv_recovery_on && UT_LIST_GET_LEN(buf_pool->free)
+ if (!recv_recovery_is_on()
+ && buf_pool->curr_size == buf_pool->old_size
+ && UT_LIST_GET_LEN(buf_pool->free)
+ UT_LIST_GET_LEN(buf_pool->LRU) < buf_pool->curr_size / 20) {
- ut_print_timestamp(stderr);
-
- fprintf(stderr,
- " InnoDB: ERROR: over 95 percent of the buffer pool"
- " is occupied by\n"
- "InnoDB: lock heaps or the adaptive hash index!"
- " Check that your\n"
- "InnoDB: transactions do not set too many row locks.\n"
- "InnoDB: Your buffer pool size is %lu MB."
- " Maybe you should make\n"
- "InnoDB: the buffer pool bigger?\n"
- "InnoDB: We intentionally generate a seg fault"
- " to print a stack trace\n"
- "InnoDB: on Linux!\n",
- (ulong) (buf_pool->curr_size
- / (1024 * 1024 / UNIV_PAGE_SIZE)));
- ut_error;
-
- } else if (!recv_recovery_on
+ ib::fatal() << "Over 95 percent of the buffer pool is"
+ " occupied by lock heaps"
+#ifdef BTR_CUR_HASH_ADAPT
+ " or the adaptive hash index!"
+#endif /* BTR_CUR_HASH_ADAPT */
+ " Check that your transactions do not set too many"
+ " row locks, or review if"
+ " innodb_buffer_pool_size="
+ << (buf_pool->curr_size >> (20 - UNIV_PAGE_SIZE_SHIFT))
+ << "M could be bigger.";
+ } else if (!recv_recovery_is_on()
+ && buf_pool->curr_size == buf_pool->old_size
&& (UT_LIST_GET_LEN(buf_pool->free)
+ UT_LIST_GET_LEN(buf_pool->LRU))
< buf_pool->curr_size / 3) {
@@ -987,27 +1030,24 @@ buf_LRU_check_size_of_non_data_objects(
heaps or the adaptive hash index. This may be a memory
leak! */
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: WARNING: over 67 percent of"
- " the buffer pool is occupied by\n"
- "InnoDB: lock heaps or the adaptive"
- " hash index! Check that your\n"
- "InnoDB: transactions do not set too many"
- " row locks.\n"
- "InnoDB: Your buffer pool size is %lu MB."
- " Maybe you should make\n"
- "InnoDB: the buffer pool bigger?\n"
- "InnoDB: Starting the InnoDB Monitor to print"
- " diagnostics, including\n"
- "InnoDB: lock heap and hash index sizes.\n",
- (ulong) (buf_pool->curr_size
- / (1024 * 1024 / UNIV_PAGE_SIZE)));
-
- buf_lru_switched_on_innodb_mon = TRUE;
+ ib::warn() << "Over 67 percent of the buffer pool is"
+ " occupied by lock heaps"
+#ifdef BTR_CUR_HASH_ADAPT
+ " or the adaptive hash index!"
+#endif /* BTR_CUR_HASH_ADAPT */
+ " Check that your transactions do not"
+ " set too many row locks."
+ " innodb_buffer_pool_size="
+ << (buf_pool->curr_size >>
+ (20 - UNIV_PAGE_SIZE_SHIFT)) << "M."
+ " Starting the InnoDB Monitor to print"
+ " diagnostics.";
+
+ buf_lru_switched_on_innodb_mon = true;
srv_print_innodb_monitor = TRUE;
os_event_set(srv_monitor_event);
}
+
} else if (buf_lru_switched_on_innodb_mon) {
/* Switch off the InnoDB Monitor; this is a simple way
@@ -1015,7 +1055,7 @@ buf_LRU_check_size_of_non_data_objects(
but may also surprise users if the user also switched on the
monitor! */
- buf_lru_switched_on_innodb_mon = FALSE;
+ buf_lru_switched_on_innodb_mon = false;
srv_print_innodb_monitor = FALSE;
}
}
@@ -1043,15 +1083,14 @@ we put it to free list to be used.
* scan LRU list even if buf_pool->try_LRU_scan is not set
* iteration > 1:
* same as iteration 1 but sleep 10ms
-@return the free control block, in state BUF_BLOCK_READY_FOR_USE */
-UNIV_INTERN
+@return the free control block, in state BUF_BLOCK_READY_FOR_USE */
buf_block_t*
buf_LRU_get_free_block(
/*===================*/
buf_pool_t* buf_pool) /*!< in/out: buffer pool instance */
{
buf_block_t* block = NULL;
- ibool freed = FALSE;
+ bool freed = false;
ulint n_iterations = 0;
ulint flush_failures = 0;
@@ -1069,24 +1108,27 @@ loop:
/* If there is a block in the free list, take it */
block = buf_LRU_get_free_only(buf_pool);
- if (block) {
+ if (block != NULL) {
buf_pool_mutex_exit(buf_pool);
ut_ad(buf_pool_from_block(block) == buf_pool);
memset(&block->page.zip, 0, sizeof block->page.zip);
+
+ block->skip_flush_check = false;
+ block->page.flush_observer = NULL;
return(block);
}
- freed = FALSE;
-
+ MONITOR_INC( MONITOR_LRU_GET_FREE_LOOPS );
+ freed = false;
if (buf_pool->try_LRU_scan || n_iterations > 0) {
/* If no block was in the free list, search from the
end of the LRU list and try to free a block there.
If we are doing for the first time we'll scan only
tail of the LRU list otherwise we scan the whole LRU
list. */
- freed = buf_LRU_scan_and_free_block(buf_pool,
- n_iterations > 0);
+ freed = buf_LRU_scan_and_free_block(
+ buf_pool, n_iterations > 0);
if (!freed && n_iterations == 0) {
/* Tell other threads that there is no point
@@ -1109,27 +1151,24 @@ not_found:
if (freed) {
goto loop;
-
}
- if (n_iterations > 20 && !buf_lru_free_blocks_error_printed) {
- ib_logf(IB_LOG_LEVEL_WARN,
- "Difficult to find free blocks in"
- " the buffer pool (" ULINTPF " search iterations)! "
- ULINTPF " failed attempts to flush a page!",
- n_iterations, flush_failures);
- ib_logf(IB_LOG_LEVEL_INFO,
- "Consider increasing the buffer pool size.");
- ib_logf(IB_LOG_LEVEL_INFO,
- "Pending flushes (fsync) log: " ULINTPF
- " buffer pool: " ULINTPF
- " OS file reads: " ULINTPF " OS file writes: "
- ULINTPF " OS fsyncs: " ULINTPF "",
- fil_n_pending_log_flushes,
- fil_n_pending_tablespace_flushes,
- os_n_file_reads,
- os_n_file_writes,
- os_n_fsyncs);
+ if (n_iterations > 20 && !buf_lru_free_blocks_error_printed
+ && srv_buf_pool_old_size == srv_buf_pool_size) {
+
+ ib::warn() << "Difficult to find free blocks in the buffer pool"
+ " (" << n_iterations << " search iterations)! "
+ << flush_failures << " failed attempts to"
+ " flush a page!"
+ " Consider increasing innodb_buffer_pool_size."
+ " Pending flushes (fsync) log: "
+ << fil_n_pending_log_flushes
+ << "; buffer pool: "
+ << fil_n_pending_tablespace_flushes
+ << ". " << os_n_file_reads << " OS file reads, "
+ << os_n_file_writes << " OS file writes, "
+ << os_n_fsyncs
+ << " OS fsyncs.";
buf_lru_free_blocks_error_printed = true;
}
@@ -1138,7 +1177,13 @@ not_found:
find a free block then we should sleep here to let the
page_cleaner do an LRU batch for us. */
+ if (!srv_read_only_mode) {
+ os_event_set(buf_flush_event);
+ }
+
if (n_iterations > 1) {
+
+ MONITOR_INC( MONITOR_LRU_GET_FREE_WAITS );
os_thread_sleep(10000);
}
@@ -1146,11 +1191,13 @@ not_found:
This call will flush one page from the LRU and put it on the
free list. That means that the free block is up for grabs for
all user threads.
+
TODO: A more elegant way would have been to return the freed
up block to the caller here but the code that deals with
removing the block from page_hash and LRU_list is fairly
involved (particularly in case of compressed pages). We
can do that in a separate patch sometime in future. */
+
if (!buf_flush_single_page_from_LRU(buf_pool)) {
MONITOR_INC(MONITOR_LRU_SINGLE_FLUSH_FAILURE_COUNT);
++flush_failures;
@@ -1240,8 +1287,6 @@ buf_LRU_old_init(
/*=============*/
buf_pool_t* buf_pool)
{
- buf_page_t* bpage;
-
ut_ad(buf_pool_mutex_own(buf_pool));
ut_a(UT_LIST_GET_LEN(buf_pool->LRU) == BUF_LRU_OLD_MIN_LEN);
@@ -1249,10 +1294,13 @@ buf_LRU_old_init(
the adjust function to move the LRU_old pointer to the right
position */
- for (bpage = UT_LIST_GET_LAST(buf_pool->LRU); bpage != NULL;
+ for (buf_page_t* bpage = UT_LIST_GET_LAST(buf_pool->LRU);
+ bpage != NULL;
bpage = UT_LIST_GET_PREV(LRU, bpage)) {
+
ut_ad(bpage->in_LRU_list);
ut_ad(buf_page_in_file(bpage));
+
/* This loop temporarily violates the
assertions of buf_page_set_old(). */
bpage->old = TRUE;
@@ -1274,24 +1322,21 @@ buf_unzip_LRU_remove_block_if_needed(
{
buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
- ut_ad(buf_pool);
- ut_ad(bpage);
ut_ad(buf_page_in_file(bpage));
ut_ad(buf_pool_mutex_own(buf_pool));
if (buf_page_belongs_to_unzip_LRU(bpage)) {
- buf_block_t* block = (buf_block_t*) bpage;
+ buf_block_t* block = reinterpret_cast<buf_block_t*>(bpage);
ut_ad(block->in_unzip_LRU_list);
ut_d(block->in_unzip_LRU_list = FALSE);
- UT_LIST_REMOVE(unzip_LRU, buf_pool->unzip_LRU, block);
+ UT_LIST_REMOVE(buf_pool->unzip_LRU, block);
}
}
/******************************************************************//**
Adjust LRU hazard pointers if needed. */
-
void
buf_LRU_adjust_hp(
/*==============*/
@@ -1312,10 +1357,7 @@ buf_LRU_remove_block(
buf_page_t* bpage) /*!< in: control block */
{
buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
- ulint zip_size;
- ut_ad(buf_pool);
- ut_ad(bpage);
ut_ad(buf_pool_mutex_own(buf_pool));
ut_a(buf_page_in_file(bpage));
@@ -1329,7 +1371,7 @@ buf_LRU_remove_block(
/* If the LRU_old pointer is defined and points to just this block,
move it backward one step */
- if (UNIV_UNLIKELY(bpage == buf_pool->LRU_old)) {
+ if (bpage == buf_pool->LRU_old) {
/* Below: the previous block is guaranteed to exist,
because the LRU_old pointer is only allowed to differ
@@ -1349,11 +1391,10 @@ buf_LRU_remove_block(
}
/* Remove the block from the LRU list */
- UT_LIST_REMOVE(LRU, buf_pool->LRU, bpage);
+ UT_LIST_REMOVE(buf_pool->LRU, bpage);
ut_d(bpage->in_LRU_list = FALSE);
- zip_size = page_zip_get_size(&bpage->zip);
- buf_pool->stat.LRU_bytes -= zip_size ? zip_size : UNIV_PAGE_SIZE;
+ buf_pool->stat.LRU_bytes -= bpage->size.physical();
buf_unzip_LRU_remove_block_if_needed(bpage);
@@ -1361,8 +1402,10 @@ buf_LRU_remove_block(
clear the "old" flags and return */
if (UT_LIST_GET_LEN(buf_pool->LRU) < BUF_LRU_OLD_MIN_LEN) {
- for (bpage = UT_LIST_GET_FIRST(buf_pool->LRU); bpage != NULL;
+ for (buf_page_t* bpage = UT_LIST_GET_FIRST(buf_pool->LRU);
+ bpage != NULL;
bpage = UT_LIST_GET_NEXT(LRU, bpage)) {
+
/* This loop temporarily violates the
assertions of buf_page_set_old(). */
bpage->old = FALSE;
@@ -1388,7 +1431,6 @@ buf_LRU_remove_block(
/******************************************************************//**
Adds a block to the LRU list of decompressed zip pages. */
-UNIV_INTERN
void
buf_unzip_LRU_add_block(
/*====================*/
@@ -1398,8 +1440,6 @@ buf_unzip_LRU_add_block(
{
buf_pool_t* buf_pool = buf_pool_from_block(block);
- ut_ad(buf_pool);
- ut_ad(block);
ut_ad(buf_pool_mutex_own(buf_pool));
ut_a(buf_page_belongs_to_unzip_LRU(&block->page));
@@ -1408,69 +1448,16 @@ buf_unzip_LRU_add_block(
ut_d(block->in_unzip_LRU_list = TRUE);
if (old) {
- UT_LIST_ADD_LAST(unzip_LRU, buf_pool->unzip_LRU, block);
- } else {
- UT_LIST_ADD_FIRST(unzip_LRU, buf_pool->unzip_LRU, block);
- }
-}
-
-/******************************************************************//**
-Adds a block to the LRU list end. Please make sure that the zip_size is
-already set into the page zip when invoking the function, so that we
-can get correct zip_size from the buffer page when adding a block
-into LRU */
-UNIV_INLINE
-void
-buf_LRU_add_block_to_end_low(
-/*=========================*/
- buf_page_t* bpage) /*!< in: control block */
-{
- buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
-
- ut_ad(buf_pool);
- ut_ad(bpage);
- ut_ad(buf_pool_mutex_own(buf_pool));
-
- ut_a(buf_page_in_file(bpage));
-
- ut_ad(!bpage->in_LRU_list);
- UT_LIST_ADD_LAST(LRU, buf_pool->LRU, bpage);
- ut_d(bpage->in_LRU_list = TRUE);
-
- incr_LRU_size_in_bytes(bpage, buf_pool);
-
- if (UT_LIST_GET_LEN(buf_pool->LRU) > BUF_LRU_OLD_MIN_LEN) {
-
- ut_ad(buf_pool->LRU_old);
-
- /* Adjust the length of the old block list if necessary */
-
- buf_page_set_old(bpage, TRUE);
- buf_pool->LRU_old_len++;
- buf_LRU_old_adjust_len(buf_pool);
-
- } else if (UT_LIST_GET_LEN(buf_pool->LRU) == BUF_LRU_OLD_MIN_LEN) {
-
- /* The LRU list is now long enough for LRU_old to become
- defined: init it */
-
- buf_LRU_old_init(buf_pool);
+ UT_LIST_ADD_LAST(buf_pool->unzip_LRU, block);
} else {
- buf_page_set_old(bpage, buf_pool->LRU_old != NULL);
- }
-
- /* If this is a zipped block with decompressed frame as well
- then put it on the unzip_LRU list */
- if (buf_page_belongs_to_unzip_LRU(bpage)) {
- buf_unzip_LRU_add_block((buf_block_t*) bpage, TRUE);
+ UT_LIST_ADD_FIRST(buf_pool->unzip_LRU, block);
}
}
/******************************************************************//**
-Adds a block to the LRU list. Please make sure that the zip_size is
-already set into the page zip when invoking the function, so that we
-can get correct zip_size from the buffer page when adding a block
-into LRU */
+Adds a block to the LRU list. Please make sure that the page_size is
+already set when invoking the function, so that we can get correct
+page_size from the buffer page when adding a block into LRU */
UNIV_INLINE
void
buf_LRU_add_block_low(
@@ -1490,7 +1477,7 @@ buf_LRU_add_block_low(
if (!old || (UT_LIST_GET_LEN(buf_pool->LRU) < BUF_LRU_OLD_MIN_LEN)) {
- UT_LIST_ADD_FIRST(LRU, buf_pool->LRU, bpage);
+ UT_LIST_ADD_FIRST(buf_pool->LRU, bpage);
bpage->freed_page_clock = buf_pool->freed_page_clock;
} else {
@@ -1503,8 +1490,9 @@ buf_LRU_add_block_low(
ut_a(!UT_LIST_GET_NEXT(LRU, buf_pool->LRU_old)
|| UT_LIST_GET_NEXT(LRU, buf_pool->LRU_old)->old);
#endif /* UNIV_LRU_DEBUG */
- UT_LIST_INSERT_AFTER(LRU, buf_pool->LRU, buf_pool->LRU_old,
- bpage);
+ UT_LIST_INSERT_AFTER(buf_pool->LRU, buf_pool->LRU_old,
+ bpage);
+
buf_pool->LRU_old_len++;
}
@@ -1539,11 +1527,9 @@ buf_LRU_add_block_low(
}
/******************************************************************//**
-Adds a block to the LRU list. Please make sure that the zip_size is
-already set into the page zip when invoking the function, so that we
-can get correct zip_size from the buffer page when adding a block
-into LRU */
-UNIV_INTERN
+Adds a block to the LRU list. Please make sure that the page_size is
+already set when invoking the function, so that we can get correct
+page_size from the buffer page when adding a block into LRU */
void
buf_LRU_add_block(
/*==============*/
@@ -1559,7 +1545,6 @@ buf_LRU_add_block(
/******************************************************************//**
Moves a block to the start of the LRU list. */
-UNIV_INTERN
void
buf_LRU_make_block_young(
/*=====================*/
@@ -1578,18 +1563,6 @@ buf_LRU_make_block_young(
}
/******************************************************************//**
-Moves a block to the end of the LRU list. */
-UNIV_INTERN
-void
-buf_LRU_make_block_old(
-/*===================*/
- buf_page_t* bpage) /*!< in: control block */
-{
- buf_LRU_remove_block(bpage);
- buf_LRU_add_block_to_end_low(bpage);
-}
-
-/******************************************************************//**
Try to free a block. If bpage is a descriptor of a compressed-only
page, the descriptor object will be freed as well.
@@ -1600,7 +1573,6 @@ accessible via bpage.
The caller must hold buf_pool->mutex and must not hold any
buf_page_get_mutex() when calling this function.
@return true if freed, false otherwise. */
-UNIV_INTERN
bool
buf_LRU_free_page(
/*===============*/
@@ -1610,11 +1582,10 @@ buf_LRU_free_page(
{
buf_page_t* b = NULL;
buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
- const ulint fold = buf_page_address_fold(bpage->space,
- bpage->offset);
- rw_lock_t* hash_lock = buf_page_hash_lock_get(buf_pool, fold);
- ib_mutex_t* block_mutex = buf_page_get_mutex(bpage);
+ rw_lock_t* hash_lock = buf_page_hash_lock_get(buf_pool, bpage->id);
+
+ BPageMutex* block_mutex = buf_page_get_mutex(bpage);
ut_ad(buf_pool_mutex_own(buf_pool));
ut_ad(buf_page_in_file(bpage));
@@ -1625,14 +1596,10 @@ buf_LRU_free_page(
if (!buf_page_can_relocate(bpage)) {
- /* Do not free buffer fixed or I/O-fixed blocks. */
+ /* Do not free buffer fixed and I/O-fixed blocks. */
goto func_exit;
}
-#ifdef UNIV_IBUF_COUNT_DEBUG
- ut_a(ibuf_count_get(bpage->space, bpage->offset) == 0);
-#endif /* UNIV_IBUF_COUNT_DEBUG */
-
if (zip || !bpage->zip.data) {
/* This would completely free the block. */
/* Do not completely free dirty blocks. */
@@ -1661,28 +1628,19 @@ func_exit:
ut_ad(bpage->in_LRU_list);
ut_ad(!bpage->in_flush_list == !bpage->oldest_modification);
-#ifdef UNIV_DEBUG
- if (buf_debug_prints) {
- fprintf(stderr, "Putting space %lu page %lu to free list\n",
- (ulong) buf_page_get_space(bpage),
- (ulong) buf_page_get_page_no(bpage));
- }
-#endif /* UNIV_DEBUG */
+ DBUG_PRINT("ib_buf", ("free page %u:%u",
+ bpage->id.space(), bpage->id.page_no()));
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(rw_lock_own(hash_lock, RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
+ ut_ad(rw_lock_own(hash_lock, RW_LOCK_X));
ut_ad(buf_page_can_relocate(bpage));
if (!buf_LRU_block_remove_hashed(bpage, zip)) {
return(true);
}
-#ifdef UNIV_SYNC_DEBUG
/* buf_LRU_block_remove_hashed() releases the hash_lock */
- ut_ad(!rw_lock_own(hash_lock, RW_LOCK_EX)
- && !rw_lock_own(hash_lock, RW_LOCK_SHARED));
-#endif /* UNIV_SYNC_DEBUG */
+ ut_ad(!rw_lock_own_flagged(hash_lock,
+ RW_LOCK_FLAG_X | RW_LOCK_FLAG_S));
/* We have just freed a BUF_BLOCK_FILE_PAGE. If b != NULL
then it was a compressed page with an uncompressed frame and
@@ -1691,20 +1649,22 @@ func_exit:
into the LRU and page_hash (and possibly flush_list).
if b == NULL then it was a regular page that has been freed */
- if (b) {
+ if (b != NULL) {
buf_page_t* prev_b = UT_LIST_GET_PREV(LRU, b);
rw_lock_x_lock(hash_lock);
+
mutex_enter(block_mutex);
- ut_a(!buf_page_hash_get_low(
- buf_pool, b->space, b->offset, fold));
+ ut_a(!buf_page_hash_get_low(buf_pool, b->id));
b->state = b->oldest_modification
? BUF_BLOCK_ZIP_DIRTY
: BUF_BLOCK_ZIP_PAGE;
- UNIV_MEM_DESC(b->zip.data,
- page_zip_get_size(&b->zip));
+
+ ut_ad(b->size.is_compressed());
+
+ UNIV_MEM_DESC(b->zip.data, b->size.physical());
/* The fields in_page_hash and in_LRU_list of
the to-be-freed block descriptor should have
@@ -1713,6 +1673,7 @@ func_exit:
invokes buf_LRU_remove_block(). */
ut_ad(!bpage->in_page_hash);
ut_ad(!bpage->in_LRU_list);
+
/* bpage->state was BUF_BLOCK_FILE_PAGE because
b != NULL. The type cast below is thus valid. */
ut_ad(!((buf_block_t*) bpage)->in_unzip_LRU_list);
@@ -1723,25 +1684,24 @@ func_exit:
ut_ad(b->in_page_hash);
ut_ad(b->in_LRU_list);
- HASH_INSERT(buf_page_t, hash,
- buf_pool->page_hash, fold, b);
+ HASH_INSERT(buf_page_t, hash, buf_pool->page_hash,
+ b->id.fold(), b);
/* Insert b where bpage was in the LRU list. */
- if (UNIV_LIKELY(prev_b != NULL)) {
+ if (prev_b != NULL) {
ulint lru_len;
ut_ad(prev_b->in_LRU_list);
ut_ad(buf_page_in_file(prev_b));
- UT_LIST_INSERT_AFTER(LRU, buf_pool->LRU,
- prev_b, b);
+
+ UT_LIST_INSERT_AFTER(buf_pool->LRU, prev_b, b);
incr_LRU_size_in_bytes(b, buf_pool);
if (buf_page_is_old(b)) {
buf_pool->LRU_old_len++;
- if (UNIV_UNLIKELY
- (buf_pool->LRU_old
- == UT_LIST_GET_NEXT(LRU, b))) {
+ if (buf_pool->LRU_old
+ == UT_LIST_GET_NEXT(LRU, b)) {
buf_pool->LRU_old = b;
}
@@ -1780,33 +1740,27 @@ func_exit:
}
bpage->zip.data = NULL;
+
page_zip_set_size(&bpage->zip, 0);
+
+ bpage->size.copy_from(page_size_t(bpage->size.logical(),
+ bpage->size.logical(),
+ false));
+
mutex_exit(block_mutex);
/* Prevent buf_page_get_gen() from
decompressing the block while we release
buf_pool->mutex and block_mutex. */
block_mutex = buf_page_get_mutex(b);
+
mutex_enter(block_mutex);
+
buf_page_set_sticky(b);
+
mutex_exit(block_mutex);
rw_lock_x_unlock(hash_lock);
-
- } else {
-
- /* There can be multiple threads doing an LRU scan to
- free a block. The page_cleaner thread can be doing an
- LRU batch whereas user threads can potentially be doing
- multiple single page flushes. As we release
- buf_pool->mutex below we need to make sure that no one
- else considers this block as a victim for page
- replacement. This block is already out of page_hash
- and we are about to remove it from the LRU list and put
- it on the free list. */
- mutex_enter(block_mutex);
- buf_page_set_sticky(bpage);
- mutex_exit(block_mutex);
}
buf_pool_mutex_exit(buf_pool);
@@ -1823,8 +1777,8 @@ func_exit:
UNIV_MEM_INVALID(((buf_block_t*) bpage)->frame,
UNIV_PAGE_SIZE);
- if (b) {
- ib_uint32_t checksum;
+ if (b != NULL) {
+
/* Compute and stamp the compressed page
checksum while not holding any mutex. The
block is already half-freed
@@ -1832,12 +1786,13 @@ func_exit:
buf_pool->page_hash, thus inaccessible by any
other thread. */
- checksum = static_cast<ib_uint32_t>(
- page_zip_calc_checksum(
- b->zip.data,
- page_zip_get_size(&b->zip),
- static_cast<srv_checksum_algorithm_t>(
- srv_checksum_algorithm)));
+ ut_ad(b->size.is_compressed());
+
+ const uint32_t checksum = page_zip_calc_checksum(
+ b->zip.data,
+ b->size.physical(),
+ static_cast<srv_checksum_algorithm_t>(
+ srv_checksum_algorithm));
mach_write_to_4(b->zip.data + FIL_PAGE_SPACE_OR_CHKSUM,
checksum);
@@ -1845,17 +1800,21 @@ func_exit:
buf_pool_mutex_enter(buf_pool);
- mutex_enter(block_mutex);
- buf_page_unset_sticky(b != NULL ? b : bpage);
- mutex_exit(block_mutex);
+ if (b != NULL) {
+ mutex_enter(block_mutex);
+
+ buf_page_unset_sticky(b);
+
+ mutex_exit(block_mutex);
+ }
buf_LRU_block_free_hashed_page((buf_block_t*) bpage);
+
return(true);
}
/******************************************************************//**
Puts a block back to the free list. */
-UNIV_INTERN
void
buf_LRU_block_free_non_file_page(
/*=============================*/
@@ -1864,23 +1823,18 @@ buf_LRU_block_free_non_file_page(
void* data;
buf_pool_t* buf_pool = buf_pool_from_block(block);
- ut_ad(block);
ut_ad(buf_pool_mutex_own(buf_pool));
- ut_ad(mutex_own(&block->mutex));
+ ut_ad(buf_page_mutex_own(block));
switch (buf_block_get_state(block)) {
case BUF_BLOCK_MEMORY:
case BUF_BLOCK_READY_FOR_USE:
break;
default:
- fprintf(stderr, "InnoDB: Error: Block %p incorrect state %s in buf_LRU_block_free_non_file_page()\n",
- block, buf_get_state_name(block));
- return; /* Continue */
+ ut_error;
}
-#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
- ut_a(block->n_pointers == 0);
-#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
+ assert_block_ahi_empty(block);
ut_ad(!block->page.in_free_list);
ut_ad(!block->page.in_flush_list);
ut_ad(!block->page.in_LRU_list);
@@ -1895,24 +1849,42 @@ buf_LRU_block_free_non_file_page(
/* Wipe page_no and space_id */
memset(block->frame + FIL_PAGE_OFFSET, 0xfe, 4);
memset(block->frame + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID, 0xfe, 4);
-#endif
+#endif /* UNIV_DEBUG */
data = block->page.zip.data;
- if (data) {
+ if (data != NULL) {
block->page.zip.data = NULL;
- mutex_exit(&block->mutex);
+ buf_page_mutex_exit(block);
buf_pool_mutex_exit_forbid(buf_pool);
- buf_buddy_free(
- buf_pool, data, page_zip_get_size(&block->page.zip));
+ ut_ad(block->page.size.is_compressed());
+
+ buf_buddy_free(buf_pool, data, block->page.size.physical());
buf_pool_mutex_exit_allow(buf_pool);
- mutex_enter(&block->mutex);
+ buf_page_mutex_enter(block);
+
page_zip_set_size(&block->page.zip, 0);
+
+ block->page.size.copy_from(
+ page_size_t(block->page.size.logical(),
+ block->page.size.logical(),
+ false));
+ }
+
+ if (buf_pool->curr_size < buf_pool->old_size
+ && UT_LIST_GET_LEN(buf_pool->withdraw) < buf_pool->withdraw_target
+ && buf_block_will_withdrawn(buf_pool, block)) {
+ /* This should be withdrawn */
+ UT_LIST_ADD_LAST(
+ buf_pool->withdraw,
+ &block->page);
+ ut_d(block->in_withdraw_list = TRUE);
+ } else {
+ UT_LIST_ADD_FIRST(buf_pool->free, &block->page);
+ ut_d(block->page.in_free_list = TRUE);
}
- UT_LIST_ADD_FIRST(list, buf_pool->free, (&block->page));
- ut_d(block->page.in_free_list = TRUE);
UNIV_MEM_FREE(block->frame, UNIV_PAGE_SIZE);
}
@@ -1940,20 +1912,16 @@ buf_LRU_block_remove_hashed(
bool zip) /*!< in: true if should remove also the
compressed page of an uncompressed page */
{
- ulint fold;
const buf_page_t* hashed_bpage;
buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
rw_lock_t* hash_lock;
- ut_ad(bpage);
ut_ad(buf_pool_mutex_own(buf_pool));
ut_ad(mutex_own(buf_page_get_mutex(bpage)));
- fold = buf_page_address_fold(bpage->space, bpage->offset);
- hash_lock = buf_page_hash_lock_get(buf_pool, fold);
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(rw_lock_own(hash_lock, RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
+ hash_lock = buf_page_hash_lock_get(buf_pool, bpage->id);
+
+ ut_ad(rw_lock_own(hash_lock, RW_LOCK_X));
ut_a(buf_page_get_io_fix(bpage) == BUF_IO_NONE);
ut_a(bpage->buf_fix_count == 0);
@@ -1970,13 +1938,11 @@ buf_LRU_block_remove_hashed(
buf_block_modify_clock_inc((buf_block_t*) bpage);
if (bpage->zip.data) {
const page_t* page = ((buf_block_t*) bpage)->frame;
- const ulint zip_size
- = page_zip_get_size(&bpage->zip);
ut_a(!zip || bpage->oldest_modification == 0);
+ ut_ad(bpage->size.is_compressed());
- switch (UNIV_EXPECT(fil_page_get_type(page),
- FIL_PAGE_INDEX)) {
+ switch (fil_page_get_type(page)) {
case FIL_PAGE_TYPE_ALLOCATED:
case FIL_PAGE_INODE:
case FIL_PAGE_IBUF_BITMAP:
@@ -1989,13 +1955,14 @@ buf_LRU_block_remove_hashed(
to the compressed page, which will
be preserved. */
memcpy(bpage->zip.data, page,
- zip_size);
+ bpage->size.physical());
}
break;
case FIL_PAGE_TYPE_ZBLOB:
case FIL_PAGE_TYPE_ZBLOB2:
break;
case FIL_PAGE_INDEX:
+ case FIL_PAGE_RTREE:
#ifdef UNIV_ZIP_DEBUG
ut_a(page_zip_validate(
&bpage->zip, page,
@@ -2003,14 +1970,16 @@ buf_LRU_block_remove_hashed(
#endif /* UNIV_ZIP_DEBUG */
break;
default:
- ut_print_timestamp(stderr);
- fputs(" InnoDB: ERROR: The compressed page"
- " to be evicted seems corrupt:", stderr);
- ut_print_buf(stderr, page, zip_size);
- fputs("\nInnoDB: Possibly older version"
- " of the page:", stderr);
+ ib::error() << "The compressed page to be"
+ " evicted seems corrupt:";
+ ut_print_buf(stderr, page,
+ bpage->size.logical());
+
+ ib::error() << "Possibly older version of"
+ " the page:";
+
ut_print_buf(stderr, bpage->zip.data,
- zip_size);
+ bpage->size.physical());
putc('\n', stderr);
ut_error;
}
@@ -2020,8 +1989,10 @@ buf_LRU_block_remove_hashed(
/* fall through */
case BUF_BLOCK_ZIP_PAGE:
ut_a(bpage->oldest_modification == 0);
- UNIV_MEM_ASSERT_W(bpage->zip.data,
- page_zip_get_size(&bpage->zip));
+ if (bpage->size.is_compressed()) {
+ UNIV_MEM_ASSERT_W(bpage->zip.data,
+ bpage->size.physical());
+ }
break;
case BUF_BLOCK_POOL_WATCH:
case BUF_BLOCK_ZIP_DIRTY:
@@ -2033,41 +2004,35 @@ buf_LRU_block_remove_hashed(
break;
}
- hashed_bpage = buf_page_hash_get_low(buf_pool, bpage->space,
- bpage->offset, fold);
+ hashed_bpage = buf_page_hash_get_low(buf_pool, bpage->id);
+ if (bpage != hashed_bpage) {
+ ib::error() << "Page " << bpage->id
+ << " not found in the hash table";
- if (UNIV_UNLIKELY(bpage != hashed_bpage)) {
- fprintf(stderr,
- "InnoDB: Error: page %lu %lu not found"
- " in the hash table\n",
- (ulong) bpage->space,
- (ulong) bpage->offset);
#ifdef UNIV_DEBUG
- fprintf(stderr,
- "InnoDB: in_page_hash %lu in_zip_hash %lu\n"
- " in_free_list %lu in_flush_list %lu in_LRU_list %lu\n"
- " zip.data %p zip_size %lu page_state %d\n",
- bpage->in_page_hash, bpage->in_zip_hash,
- bpage->in_free_list, bpage->in_flush_list,
- bpage->in_LRU_list, bpage->zip.data,
- buf_page_get_zip_size(bpage),
- buf_page_get_state(bpage));
+
+
+ ib::error()
+ << "in_page_hash:" << bpage->in_page_hash
+ << " in_zip_hash:" << bpage->in_zip_hash
+ // << " in_free_list:"<< bpage->in_fee_list
+ << " in_flush_list:" << bpage->in_flush_list
+ << " in_LRU_list:" << bpage->in_LRU_list
+ << " zip.data:" << bpage->zip.data
+ << " zip_size:" << bpage->size.logical()
+ << " page_state:" << buf_page_get_state(bpage);
#else
- fprintf(stderr,
- "InnoDB: zip.data %p zip_size %lu page_state %d\n",
- bpage->zip.data,
- buf_page_get_zip_size(bpage),
- buf_page_get_state(bpage));
+ ib::error()
+ << " zip.data:" << bpage->zip.data
+ << " zip_size:" << bpage->size.logical()
+ << " page_state:" << buf_page_get_state(bpage);
#endif
if (hashed_bpage) {
- fprintf(stderr,
- "InnoDB: In hash table we find block"
- " %p of %lu %lu which is not %p\n",
- (const void*) hashed_bpage,
- (ulong) hashed_bpage->space,
- (ulong) hashed_bpage->offset,
- (const void*) bpage);
+
+ ib::error() << "In hash table we find block "
+ << hashed_bpage << " of " << hashed_bpage->id
+ << " which is not " << bpage;
}
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
@@ -2085,26 +2050,28 @@ buf_LRU_block_remove_hashed(
ut_ad(!bpage->in_zip_hash);
ut_ad(bpage->in_page_hash);
ut_d(bpage->in_page_hash = FALSE);
- HASH_DELETE(buf_page_t, hash, buf_pool->page_hash, fold, bpage);
+
+ HASH_DELETE(buf_page_t, hash, buf_pool->page_hash, bpage->id.fold(),
+ bpage);
+
switch (buf_page_get_state(bpage)) {
case BUF_BLOCK_ZIP_PAGE:
ut_ad(!bpage->in_free_list);
ut_ad(!bpage->in_flush_list);
ut_ad(!bpage->in_LRU_list);
ut_a(bpage->zip.data);
- ut_a(buf_page_get_zip_size(bpage));
+ ut_a(bpage->size.is_compressed());
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
- UT_LIST_REMOVE(list, buf_pool->zip_clean, bpage);
+ UT_LIST_REMOVE(buf_pool->zip_clean, bpage);
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
mutex_exit(&buf_pool->zip_mutex);
rw_lock_x_unlock(hash_lock);
buf_pool_mutex_exit_forbid(buf_pool);
- buf_buddy_free(
- buf_pool, bpage->zip.data,
- page_zip_get_size(&bpage->zip));
+ buf_buddy_free(buf_pool, bpage->zip.data,
+ bpage->size.physical());
buf_pool_mutex_exit_allow(buf_pool);
buf_page_free_descriptor(bpage);
@@ -2119,11 +2086,6 @@ buf_LRU_block_remove_hashed(
UNIV_PAGE_SIZE);
buf_page_set_state(bpage, BUF_BLOCK_REMOVE_HASH);
- if (buf_pool->flush_rbt == NULL) {
- bpage->space = ULINT32_UNDEFINED;
- bpage->offset = ULINT32_UNDEFINED;
- }
-
/* Question: If we release bpage and hash mutex here
then what protects us against:
1) Some other thread buffer fixing this page
@@ -2156,12 +2118,16 @@ buf_LRU_block_remove_hashed(
ut_ad(!bpage->in_LRU_list);
buf_pool_mutex_exit_forbid(buf_pool);
- buf_buddy_free(
- buf_pool, data,
- page_zip_get_size(&bpage->zip));
+ buf_buddy_free(buf_pool, data, bpage->size.physical());
buf_pool_mutex_exit_allow(buf_pool);
+
page_zip_set_size(&bpage->zip, 0);
+
+ bpage->size.copy_from(
+ page_size_t(bpage->size.logical(),
+ bpage->size.logical(),
+ false));
}
return(true);
@@ -2188,54 +2154,60 @@ buf_LRU_block_free_hashed_page(
buf_block_t* block) /*!< in: block, must contain a file page and
be in a state where it can be freed */
{
-#ifdef UNIV_DEBUG
buf_pool_t* buf_pool = buf_pool_from_block(block);
ut_ad(buf_pool_mutex_own(buf_pool));
-#endif
- mutex_enter(&block->mutex);
+ buf_page_mutex_enter(block);
+
+ if (buf_pool->flush_rbt == NULL) {
+ block->page.id
+ = page_id_t(ULINT32_UNDEFINED, ULINT32_UNDEFINED);
+ }
+
buf_block_set_state(block, BUF_BLOCK_MEMORY);
buf_LRU_block_free_non_file_page(block);
- mutex_exit(&block->mutex);
+ buf_page_mutex_exit(block);
}
-/******************************************************************//**
-Remove one page from LRU list and put it to free list */
-UNIV_INTERN
-void
-buf_LRU_free_one_page(
-/*==================*/
- buf_page_t* bpage) /*!< in/out: block, must contain a file page and
- be in a state where it can be freed; there
- may or may not be a hash index to the page */
+/** Remove one page from LRU list and put it to free list.
+@param[in,out] bpage block, must contain a file page and be in
+ a freeable state; there may or may not be a
+ hash index to the page
+@param[in] old_page_id page number before bpage->id was invalidated */
+void buf_LRU_free_one_page(buf_page_t* bpage, page_id_t old_page_id)
{
buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
- const ulint fold = buf_page_address_fold(bpage->space,
- bpage->offset);
- rw_lock_t* hash_lock = buf_page_hash_lock_get(buf_pool, fold);
- ib_mutex_t* block_mutex = buf_page_get_mutex(bpage);
+ rw_lock_t* hash_lock = buf_page_hash_lock_get(buf_pool,
+ old_page_id);
+ BPageMutex* block_mutex = buf_page_get_mutex(bpage);
ut_ad(buf_pool_mutex_own(buf_pool));
rw_lock_x_lock(hash_lock);
+
+ while (buf_block_get_fix(bpage) > 0) {
+ /* Wait for other threads to release the fix count
+ before releasing the bpage from LRU list. */
+ }
+
mutex_enter(block_mutex);
+ bpage->id = old_page_id;
+
if (buf_LRU_block_remove_hashed(bpage, true)) {
buf_LRU_block_free_hashed_page((buf_block_t*) bpage);
}
/* buf_LRU_block_remove_hashed() releases hash_lock and block_mutex */
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(!rw_lock_own(hash_lock, RW_LOCK_EX)
- && !rw_lock_own(hash_lock, RW_LOCK_SHARED));
-#endif /* UNIV_SYNC_DEBUG */
+ ut_ad(!rw_lock_own_flagged(hash_lock,
+ RW_LOCK_FLAG_X | RW_LOCK_FLAG_S));
ut_ad(!mutex_own(block_mutex));
}
/**********************************************************************//**
Updates buf_pool->LRU_old_ratio for one buffer pool instance.
-@return updated old_pct */
+@return updated old_pct */
static
uint
buf_LRU_old_ratio_update_instance(
@@ -2263,7 +2235,7 @@ buf_LRU_old_ratio_update_instance(
buf_pool->LRU_old_ratio = ratio;
if (UT_LIST_GET_LEN(buf_pool->LRU)
- >= BUF_LRU_OLD_MIN_LEN) {
+ >= BUF_LRU_OLD_MIN_LEN) {
buf_LRU_old_adjust_len(buf_pool);
}
@@ -2280,8 +2252,7 @@ buf_LRU_old_ratio_update_instance(
/**********************************************************************//**
Updates buf_pool->LRU_old_ratio.
-@return updated old_pct */
-UNIV_INTERN
+@return updated old_pct */
uint
buf_LRU_old_ratio_update(
/*=====================*/
@@ -2291,10 +2262,9 @@ buf_LRU_old_ratio_update(
false=just assign buf_pool->LRU_old_ratio
during the initialization of InnoDB */
{
- ulint i;
uint new_ratio = 0;
- for (i = 0; i < srv_buf_pool_instances; i++) {
+ for (ulint i = 0; i < srv_buf_pool_instances; i++) {
buf_pool_t* buf_pool;
buf_pool = buf_pool_from_array(i);
@@ -2309,24 +2279,22 @@ buf_LRU_old_ratio_update(
/********************************************************************//**
Update the historical stats that we are collecting for LRU eviction
policy at the end of each interval. */
-UNIV_INTERN
void
buf_LRU_stat_update(void)
/*=====================*/
{
- ulint i;
buf_LRU_stat_t* item;
buf_pool_t* buf_pool;
- ibool evict_started = FALSE;
+ bool evict_started = FALSE;
buf_LRU_stat_t cur_stat;
/* If we haven't started eviction yet then don't update stats. */
- for (i = 0; i < srv_buf_pool_instances; i++) {
+ for (ulint i = 0; i < srv_buf_pool_instances; i++) {
buf_pool = buf_pool_from_array(i);
if (buf_pool->freed_page_clock != 0) {
- evict_started = TRUE;
+ evict_started = true;
break;
}
}
@@ -2368,33 +2336,32 @@ buf_LRU_validate_instance(
/*======================*/
buf_pool_t* buf_pool)
{
- buf_page_t* bpage;
- buf_block_t* block;
ulint old_len;
ulint new_len;
- ut_ad(buf_pool);
buf_pool_mutex_enter(buf_pool);
if (UT_LIST_GET_LEN(buf_pool->LRU) >= BUF_LRU_OLD_MIN_LEN) {
ut_a(buf_pool->LRU_old);
old_len = buf_pool->LRU_old_len;
+
new_len = ut_min(UT_LIST_GET_LEN(buf_pool->LRU)
* buf_pool->LRU_old_ratio
/ BUF_LRU_OLD_RATIO_DIV,
UT_LIST_GET_LEN(buf_pool->LRU)
- (BUF_LRU_OLD_TOLERANCE
+ BUF_LRU_NON_OLD_MIN_LEN));
+
ut_a(old_len >= new_len - BUF_LRU_OLD_TOLERANCE);
ut_a(old_len <= new_len + BUF_LRU_OLD_TOLERANCE);
}
- UT_LIST_VALIDATE(LRU, buf_page_t, buf_pool->LRU, CheckInLRUList());
+ CheckInLRUList::validate(buf_pool);
old_len = 0;
- for (bpage = UT_LIST_GET_FIRST(buf_pool->LRU);
+ for (buf_page_t* bpage = UT_LIST_GET_FIRST(buf_pool->LRU);
bpage != NULL;
bpage = UT_LIST_GET_NEXT(LRU, bpage)) {
@@ -2432,21 +2399,19 @@ buf_LRU_validate_instance(
ut_a(buf_pool->LRU_old_len == old_len);
- UT_LIST_VALIDATE(list, buf_page_t, buf_pool->free, CheckInFreeList());
+ CheckInFreeList::validate(buf_pool);
- for (bpage = UT_LIST_GET_FIRST(buf_pool->free);
+ for (buf_page_t* bpage = UT_LIST_GET_FIRST(buf_pool->free);
bpage != NULL;
bpage = UT_LIST_GET_NEXT(list, bpage)) {
ut_a(buf_page_get_state(bpage) == BUF_BLOCK_NOT_USED);
}
- UT_LIST_VALIDATE(
- unzip_LRU, buf_block_t, buf_pool->unzip_LRU,
- CheckUnzipLRUAndLRUList());
+ CheckUnzipLRUAndLRUList::validate(buf_pool);
- for (block = UT_LIST_GET_FIRST(buf_pool->unzip_LRU);
- block;
+ for (buf_block_t* block = UT_LIST_GET_FIRST(buf_pool->unzip_LRU);
+ block != NULL;
block = UT_LIST_GET_NEXT(unzip_LRU, block)) {
ut_ad(block->in_unzip_LRU_list);
@@ -2459,15 +2424,12 @@ buf_LRU_validate_instance(
/**********************************************************************//**
Validates the LRU list.
-@return TRUE */
-UNIV_INTERN
+@return TRUE */
ibool
buf_LRU_validate(void)
/*==================*/
{
- ulint i;
-
- for (i = 0; i < srv_buf_pool_instances; i++) {
+ for (ulint i = 0; i < srv_buf_pool_instances; i++) {
buf_pool_t* buf_pool;
buf_pool = buf_pool_from_array(i);
@@ -2481,38 +2443,35 @@ buf_LRU_validate(void)
#if defined UNIV_DEBUG_PRINT || defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
/**********************************************************************//**
Prints the LRU list for one buffer pool instance. */
-UNIV_INTERN
+static
void
buf_LRU_print_instance(
/*===================*/
buf_pool_t* buf_pool)
{
- const buf_page_t* bpage;
-
- ut_ad(buf_pool);
buf_pool_mutex_enter(buf_pool);
- bpage = UT_LIST_GET_FIRST(buf_pool->LRU);
-
- while (bpage != NULL) {
+ for (const buf_page_t* bpage = UT_LIST_GET_FIRST(buf_pool->LRU);
+ bpage != NULL;
+ bpage = UT_LIST_GET_NEXT(LRU, bpage)) {
mutex_enter(buf_page_get_mutex(bpage));
- fprintf(stderr, "BLOCK space %lu page %lu ",
- (ulong) buf_page_get_space(bpage),
- (ulong) buf_page_get_page_no(bpage));
+
+ fprintf(stderr, "BLOCK space %u page %u ",
+ bpage->id.space(), bpage->id.page_no());
if (buf_page_is_old(bpage)) {
fputs("old ", stderr);
}
if (bpage->buf_fix_count) {
- fprintf(stderr, "buffix count %lu ",
- (ulong) bpage->buf_fix_count);
+ fprintf(stderr, "buffix count %u ",
+ bpage->buf_fix_count);
}
if (buf_page_get_io_fix(bpage)) {
- fprintf(stderr, "io_fix %lu ",
- (ulong) buf_page_get_io_fix(bpage));
+ fprintf(stderr, "io_fix %d ",
+ buf_page_get_io_fix(bpage));
}
if (bpage->oldest_modification) {
@@ -2523,28 +2482,26 @@ buf_LRU_print_instance(
const byte* frame;
case BUF_BLOCK_FILE_PAGE:
frame = buf_block_get_frame((buf_block_t*) bpage);
- fprintf(stderr, "\ntype %lu"
- " index id %llu\n",
- (ulong) fil_page_get_type(frame),
- (ullint) btr_page_get_index_id(frame));
+ fprintf(stderr, "\ntype %u index id " IB_ID_FMT "\n",
+ fil_page_get_type(frame),
+ btr_page_get_index_id(frame));
break;
case BUF_BLOCK_ZIP_PAGE:
frame = bpage->zip.data;
- fprintf(stderr, "\ntype %lu size %lu"
- " index id %llu\n",
- (ulong) fil_page_get_type(frame),
- (ulong) buf_page_get_zip_size(bpage),
- (ullint) btr_page_get_index_id(frame));
+ fprintf(stderr, "\ntype %u size " ULINTPF
+ " index id " IB_ID_FMT "\n",
+ fil_page_get_type(frame),
+ bpage->size.physical(),
+ btr_page_get_index_id(frame));
break;
default:
- fprintf(stderr, "\n!state %lu!\n",
- (ulong) buf_page_get_state(bpage));
+ fprintf(stderr, "\n!state %d!\n",
+ buf_page_get_state(bpage));
break;
}
mutex_exit(buf_page_get_mutex(bpage));
- bpage = UT_LIST_GET_NEXT(LRU, bpage);
}
buf_pool_mutex_exit(buf_pool);
@@ -2552,18 +2509,15 @@ buf_LRU_print_instance(
/**********************************************************************//**
Prints the LRU list. */
-UNIV_INTERN
void
buf_LRU_print(void)
/*===============*/
{
- ulint i;
- buf_pool_t* buf_pool;
+ for (ulint i = 0; i < srv_buf_pool_instances; i++) {
+ buf_pool_t* buf_pool;
- for (i = 0; i < srv_buf_pool_instances; i++) {
buf_pool = buf_pool_from_array(i);
buf_LRU_print_instance(buf_pool);
}
}
#endif /* UNIV_DEBUG_PRINT || UNIV_DEBUG || UNIV_BUF_DEBUG */
-#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innobase/buf/buf0mtflu.cc b/storage/innobase/buf/buf0mtflu.cc
index 0ecc55ed4a9..aae90e48168 100644
--- a/storage/innobase/buf/buf0mtflu.cc
+++ b/storage/innobase/buf/buf0mtflu.cc
@@ -43,7 +43,6 @@ Modified 06/02/2014 Jan Lindström jan.lindstrom@skysql.com
#include "ibuf0ibuf.h"
#include "log0log.h"
#include "os0file.h"
-#include "os0sync.h"
#include "trx0sys.h"
#include "srv0mon.h"
#include "mysql/plugin.h"
@@ -119,18 +118,67 @@ typedef struct wrk_itm
mem_heap_t *rheap;
} wrk_t;
-typedef struct thread_data
+struct thread_data_t
{
os_thread_id_t wthread_id; /*!< Identifier */
- os_thread_t wthread; /*!< Thread id */
wthr_status_t wt_status; /*!< Worker thread status */
-} thread_data_t;
+};
-/* Thread syncronization data */
-typedef struct thread_sync
+/** Flush dirty pages when multi-threaded flush is used. */
+extern "C" UNIV_INTERN
+os_thread_ret_t
+DECLARE_THREAD(mtflush_io_thread)(void* arg);
+
+/** Thread syncronization data */
+struct thread_sync_t
{
+ /** Constructor */
+ thread_sync_t(ulint n_threads, mem_heap_t* wheap, mem_heap_t* rheap) :
+ thread_global_mtx(), n_threads(n_threads),
+ wq(ib_wqueue_create()),
+ wr_cq(ib_wqueue_create()),
+ rd_cq(ib_wqueue_create()),
+ wheap(wheap), rheap(rheap), gwt_status(),
+ thread_data(static_cast<thread_data_t*>(
+ mem_heap_zalloc(wheap, n_threads
+ * sizeof *thread_data)))
+ {
+ ut_a(wq);
+ ut_a(wr_cq);
+ ut_a(rd_cq);
+ ut_a(thread_data);
+
+ mutex_create(LATCH_ID_MTFLUSH_THREAD_MUTEX,
+ &thread_global_mtx);
+
+ /* Create threads for page-compression-flush */
+ for(ulint i = 0; i < n_threads; i++) {
+ thread_data[i].wt_status = WTHR_INITIALIZED;
+ os_thread_create(mtflush_io_thread, this,
+ &thread_data[i].wthread_id);
+ }
+ }
+
+ /** Destructor */
+ ~thread_sync_t()
+ {
+ ut_a(ib_wqueue_is_empty(wq));
+ ut_a(ib_wqueue_is_empty(wr_cq));
+ ut_a(ib_wqueue_is_empty(rd_cq));
+
+ /* Free all queues */
+ ib_wqueue_free(wq);
+ ib_wqueue_free(wr_cq);
+ ib_wqueue_free(rd_cq);
+
+ mutex_free(&thread_global_mtx);
+
+ mem_heap_free(rheap);
+ mem_heap_free(wheap);
+ }
+
/* Global variables used by all threads */
- os_fast_mutex_t thread_global_mtx; /*!< Mutex used protecting below
+ ib_mutex_t thread_global_mtx; /*!< Mutex used protecting below
variables */
ulint n_threads; /*!< Number of threads */
ib_wqueue_t *wq; /*!< Work Queue */
@@ -144,22 +192,10 @@ typedef struct thread_sync
/* Variables used by only one thread at a time */
thread_data_t* thread_data; /*!< Thread specific data */
+};
-} thread_sync_t;
-
-static int mtflush_work_initialized = -1;
-static thread_sync_t* mtflush_ctx=NULL;
-static os_fast_mutex_t mtflush_mtx;
-
-/******************************************************************//**
-Set multi-threaded flush work initialized. */
-static inline
-void
-buf_mtflu_work_init(void)
-/*=====================*/
-{
- mtflush_work_initialized = 1;
-}
+static thread_sync_t* mtflush_ctx;
+static ib_mutex_t mtflush_mtx;
/******************************************************************//**
Return true if multi-threaded flush is initialized
@@ -168,7 +204,7 @@ bool
buf_mtflu_init_done(void)
/*=====================*/
{
- return(mtflush_work_initialized == 1);
+ return(mtflush_ctx != NULL);
}
/******************************************************************//**
@@ -211,7 +247,7 @@ buf_mtflu_flush_pool_instance(
buf_pool_mutex_enter(work_item->wr.buf_pool);
work_item->wr.min = UT_LIST_GET_LEN(work_item->wr.buf_pool->LRU);
buf_pool_mutex_exit(work_item->wr.buf_pool);
- work_item->wr.min = ut_min(srv_LRU_scan_depth,work_item->wr.min);
+ work_item->wr.min = ut_min((ulint)srv_LRU_scan_depth,(ulint)work_item->wr.min);
}
buf_flush_batch(work_item->wr.buf_pool,
@@ -309,22 +345,17 @@ mtflush_service_io(
}
}
-/******************************************************************//**
-Thead used to flush dirty pages when multi-threaded flush is
-used.
-@return a dummy parameter*/
+/** Flush dirty pages when multi-threaded flush is used. */
extern "C" UNIV_INTERN
os_thread_ret_t
-DECLARE_THREAD(mtflush_io_thread)(
-/*==============================*/
- void * arg)
+DECLARE_THREAD(mtflush_io_thread)(void* arg)
{
thread_sync_t *mtflush_io = ((thread_sync_t *)arg);
thread_data_t *this_thread_data = NULL;
ulint i;
/* Find correct slot for this thread */
- os_fast_mutex_lock(&(mtflush_io->thread_global_mtx));
+ mutex_enter(&(mtflush_io->thread_global_mtx));
for(i=0; i < mtflush_io->n_threads; i ++) {
if (mtflush_io->thread_data[i].wthread_id == os_thread_get_curr_id()) {
break;
@@ -333,7 +364,7 @@ DECLARE_THREAD(mtflush_io_thread)(
ut_a(i <= mtflush_io->n_threads);
this_thread_data = &mtflush_io->thread_data[i];
- os_fast_mutex_unlock(&(mtflush_io->thread_global_mtx));
+ mutex_exit(&(mtflush_io->thread_global_mtx));
while (TRUE) {
@@ -352,7 +383,7 @@ DECLARE_THREAD(mtflush_io_thread)(
}
}
- os_thread_exit(NULL);
+ os_thread_exit();
OS_THREAD_DUMMY_RETURN;
}
@@ -389,7 +420,7 @@ buf_mtflu_io_thread_exit(void)
been processed. Thus, we can get this mutex if and only if work
queue is empty. */
- os_fast_mutex_lock(&mtflush_mtx);
+ mutex_enter(&mtflush_mtx);
/* Make sure the work queue is empty */
ut_a(ib_wqueue_is_empty(mtflush_io->wq));
@@ -408,7 +439,7 @@ buf_mtflu_io_thread_exit(void)
}
/* Requests sent */
- os_fast_mutex_unlock(&mtflush_mtx);
+ mutex_exit(&mtflush_mtx);
/* Wait until all work items on a work queue are processed */
while(!ib_wqueue_is_empty(mtflush_io->wq)) {
@@ -440,29 +471,10 @@ buf_mtflu_io_thread_exit(void)
ib_wqueue_nowait(mtflush_io->wq);
}
- os_fast_mutex_lock(&mtflush_mtx);
+ mtflush_ctx->~thread_sync_t();
+ mtflush_ctx = NULL;
- ut_a(ib_wqueue_is_empty(mtflush_io->wq));
- ut_a(ib_wqueue_is_empty(mtflush_io->wr_cq));
- ut_a(ib_wqueue_is_empty(mtflush_io->rd_cq));
-
- /* Free all queues */
- ib_wqueue_free(mtflush_io->wq);
- ib_wqueue_free(mtflush_io->wr_cq);
- ib_wqueue_free(mtflush_io->rd_cq);
-
- mtflush_io->wq = NULL;
- mtflush_io->wr_cq = NULL;
- mtflush_io->rd_cq = NULL;
- mtflush_work_initialized = 0;
-
- /* Free heap */
- mem_heap_free(mtflush_io->wheap);
- mem_heap_free(mtflush_io->rheap);
-
- os_fast_mutex_unlock(&mtflush_mtx);
- os_fast_mutex_free(&mtflush_mtx);
- os_fast_mutex_free(&mtflush_io->thread_global_mtx);
+ mutex_free(&mtflush_mtx);
}
/******************************************************************//**
@@ -474,7 +486,6 @@ buf_mtflu_handler_init(
ulint n_threads, /*!< in: Number of threads to create */
ulint wrk_cnt) /*!< in: Number of work items */
{
- ulint i;
mem_heap_t* mtflush_heap;
mem_heap_t* mtflush_heap2;
@@ -486,42 +497,10 @@ buf_mtflu_handler_init(
mtflush_heap2 = mem_heap_create(0);
ut_a(mtflush_heap2 != NULL);
- mtflush_ctx = (thread_sync_t *)mem_heap_zalloc(mtflush_heap,
- sizeof(thread_sync_t));
-
- ut_a(mtflush_ctx != NULL);
- mtflush_ctx->thread_data = (thread_data_t*)mem_heap_zalloc(
- mtflush_heap, sizeof(thread_data_t) * n_threads);
- ut_a(mtflush_ctx->thread_data);
-
- mtflush_ctx->n_threads = n_threads;
- mtflush_ctx->wq = ib_wqueue_create();
- ut_a(mtflush_ctx->wq);
- mtflush_ctx->wr_cq = ib_wqueue_create();
- ut_a(mtflush_ctx->wr_cq);
- mtflush_ctx->rd_cq = ib_wqueue_create();
- ut_a(mtflush_ctx->rd_cq);
- mtflush_ctx->wheap = mtflush_heap;
- mtflush_ctx->rheap = mtflush_heap2;
-
- os_fast_mutex_init(PFS_NOT_INSTRUMENTED, &mtflush_ctx->thread_global_mtx);
- os_fast_mutex_init(PFS_NOT_INSTRUMENTED, &mtflush_mtx);
-
- /* Create threads for page-compression-flush */
- for(i=0; i < n_threads; i++) {
- os_thread_id_t new_thread_id;
-
- mtflush_ctx->thread_data[i].wt_status = WTHR_INITIALIZED;
-
- mtflush_ctx->thread_data[i].wthread = os_thread_create(
- mtflush_io_thread,
- ((void *) mtflush_ctx),
- &new_thread_id);
-
- mtflush_ctx->thread_data[i].wthread_id = new_thread_id;
- }
+ mutex_create(LATCH_ID_MTFLUSH_MUTEX, &mtflush_mtx);
- buf_mtflu_work_init();
+ mtflush_ctx = new (mem_heap_zalloc(mtflush_heap, sizeof *mtflush_ctx))
+ thread_sync_t(n_threads, mtflush_heap, mtflush_heap2);
return((void *)mtflush_ctx);
}
@@ -646,11 +625,11 @@ buf_mtflu_flush_list(
}
/* This lock is to safequard against re-entry if any. */
- os_fast_mutex_lock(&mtflush_mtx);
+ mutex_enter(&mtflush_mtx);
buf_mtflu_flush_work_items(srv_buf_pool_instances,
cnt, BUF_FLUSH_LIST,
min_n, lsn_limit);
- os_fast_mutex_unlock(&mtflush_mtx);
+ mutex_exit(&mtflush_mtx);
for (i = 0; i < srv_buf_pool_instances; i++) {
if (n_processed) {
@@ -703,10 +682,10 @@ buf_mtflu_flush_LRU_tail(void)
}
/* This lock is to safeguard against re-entry if any */
- os_fast_mutex_lock(&mtflush_mtx);
+ mutex_enter(&mtflush_mtx);
buf_mtflu_flush_work_items(srv_buf_pool_instances,
cnt, BUF_FLUSH_LRU, srv_LRU_scan_depth, 0);
- os_fast_mutex_unlock(&mtflush_mtx);
+ mutex_exit(&mtflush_mtx);
for (i = 0; i < srv_buf_pool_instances; i++) {
total_flushed += cnt[i].flushed+cnt[i].evicted;
diff --git a/storage/innobase/buf/buf0rea.cc b/storage/innobase/buf/buf0rea.cc
index 3b4d21f7507..43831dedb2c 100644
--- a/storage/innobase/buf/buf0rea.cc
+++ b/storage/innobase/buf/buf0rea.cc
@@ -1,7 +1,7 @@
/*****************************************************************************
-Copyright (c) 1995, 2013, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2015, 2017, MariaDB Corporation.
+Copyright (c) 1995, 2017, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2015, 2020, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -24,11 +24,12 @@ The database buffer read
Created 11/5/1995 Heikki Tuuri
*******************************************************/
-#include "buf0rea.h"
+#include "univ.i"
+#include <mysql/service_thd_wait.h>
+#include "buf0rea.h"
#include "fil0fil.h"
#include "mtr0mtr.h"
-
#include "buf0buf.h"
#include "buf0flu.h"
#include "buf0lru.h"
@@ -39,8 +40,6 @@ Created 11/5/1995 Heikki Tuuri
#include "os0file.h"
#include "srv0start.h"
#include "srv0srv.h"
-#include "mysql/plugin.h"
-#include "mysql/service_thd_wait.h"
/** There must be at least this many pages in buf_pool in the area to start
a random read-ahead */
@@ -64,13 +63,14 @@ buf_read_page_handle_error(
buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
const bool uncompressed = (buf_page_get_state(bpage)
== BUF_BLOCK_FILE_PAGE);
+ const page_id_t old_page_id = bpage->id;
/* First unfix and release lock on the bpage */
buf_pool_mutex_enter(buf_pool);
mutex_enter(buf_page_get_mutex(bpage));
ut_ad(buf_page_get_io_fix(bpage) == BUF_IO_READ);
- ut_ad(bpage->buf_fix_count == 0);
+ bpage->id.set_corrupt_id();
/* Set BUF_IO_NONE before we remove the block from LRU list */
buf_page_set_io_fix(bpage, BUF_IO_NONE);
@@ -83,7 +83,7 @@ buf_read_page_handle_error(
mutex_exit(buf_page_get_mutex(bpage));
/* remove the block from LRU list */
- buf_LRU_free_one_page(bpage);
+ buf_LRU_free_one_page(bpage, old_page_id);
ut_ad(buf_pool->n_pend_reads > 0);
buf_pool->n_pend_reads--;
@@ -91,72 +91,51 @@ buf_read_page_handle_error(
buf_pool_mutex_exit(buf_pool);
}
-/********************************************************************//**
-Low-level function which reads a page asynchronously from a file to the
+/** Low-level function which reads a page asynchronously from a file to the
buffer buf_pool if it is not already there, in which case does nothing.
Sets the io_fix flag and sets an exclusive lock on the buffer frame. The
flag is cleared and the x-lock released by an i/o-handler thread.
+
+@param[out] err DB_SUCCESS, DB_TABLESPACE_DELETED or
+ DB_TABLESPACE_TRUNCATED if we are trying
+ to read from a non-existent tablespace, a
+ tablespace which is just now being dropped,
+ or a tablespace which is truncated
+@param[in] sync true if synchronous aio is desired
+@param[in] type IO type, SIMULATED, IGNORE_MISSING
+@param[in] mode BUF_READ_IBUF_PAGES_ONLY, ...,
+@param[in] page_id page id
+@param[in] unzip true=request uncompressed page
+@param[in] ignore_missing_space true=ignore missing space when reading
@return 1 if a read request was queued, 0 if the page already resided
in buf_pool, or if the page is in the doublewrite buffer blocks in
which case it is never read into the pool, or if the tablespace does
-not exist or is being dropped
-
-@param[out] err DB_SUCCESS, DB_TABLESPACE_DELETED if we are
- trying to read from a non-existent tablespace, or a
- tablespace which is just now being dropped,
- DB_PAGE_CORRUPTED if page based on checksum
- check is corrupted, or DB_DECRYPTION_FAILED
- if page post encryption checksum matches but
- after decryption normal page checksum does not match.
-@param[in] sync true if synchronous aio is desired
-@param[in] mode BUF_READ_IBUF_PAGES_ONLY, ...,
- ORed to OS_AIO_SIMULATED_WAKE_LATER (see below
- at read-ahead functions)
-@param[in] space space id
-@param[in] zip_size compressed page size, or 0
-@param[in] unzip TRUE=request uncompressed page
-@param[in] tablespace_version if the space memory object has
- this timestamp different from what we are giving here,
- treat the tablespace as dropped; this is a timestamp we
- use to stop dangling page reads from a tablespace
- which we have DISCARDed + IMPORTed back
-@param[in] offset page number
-@return 1 if read request is issued. 0 if it is not */
+not exist or is being dropped */
static
ulint
buf_read_page_low(
- dberr_t* err,
- bool sync,
- ulint mode,
- ulint space,
- ulint zip_size,
- ibool unzip,
- ib_int64_t tablespace_version,
- ulint offset)
+ dberr_t* err,
+ bool sync,
+ ulint type,
+ ulint mode,
+ const page_id_t page_id,
+ const page_size_t& page_size,
+ bool unzip,
+ bool ignore_missing_space = false)
{
buf_page_t* bpage;
- ulint wake_later;
- ibool ignore_nonexistent_pages;
- *err = DB_SUCCESS;
-
- wake_later = mode & OS_AIO_SIMULATED_WAKE_LATER;
- mode = mode & ~OS_AIO_SIMULATED_WAKE_LATER;
- ignore_nonexistent_pages = mode & BUF_READ_IGNORE_NONEXISTENT_PAGES;
- mode &= ~BUF_READ_IGNORE_NONEXISTENT_PAGES;
+ *err = DB_SUCCESS;
- if (space == TRX_SYS_SPACE && buf_dblwr_page_inside(offset)) {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Warning: trying to read"
- " doublewrite buffer page %lu\n",
- (ulong) offset);
+ if (page_id.space() == TRX_SYS_SPACE
+ && buf_dblwr_page_inside(page_id.page_no())) {
+ ib::error() << "Trying to read doublewrite buffer page "
+ << page_id;
return(0);
}
- if (ibuf_bitmap_page(zip_size, offset)
- || trx_sys_hdr_page(space, offset)) {
+ if (ibuf_bitmap_page(page_id, page_size) || trx_sys_hdr_page(page_id)) {
/* Trx sys header is so low in the latching order that we play
safe and do not leave the i/o-completion to an asynchronous
@@ -171,54 +150,62 @@ buf_read_page_low(
or is being dropped; if we succeed in initing the page in the buffer
pool for read, then DISCARD cannot proceed until the read has
completed */
- bpage = buf_page_init_for_read(err, mode, space, zip_size, unzip,
- tablespace_version, offset);
+ bpage = buf_page_init_for_read(err, mode, page_id, page_size, unzip);
+
if (bpage == NULL) {
return(0);
}
-#ifdef UNIV_DEBUG
- if (buf_debug_prints) {
- fprintf(stderr,
- "Posting read request for page %lu, sync %s\n",
- (ulong) offset, sync ? "true" : "false");
- }
-#endif
+ DBUG_LOG("ib_buf",
+ "read page " << page_id << " size=" << page_size.physical()
+ << " unzip=" << unzip << ',' << (sync ? "sync" : "async"));
ut_ad(buf_page_in_file(bpage));
- byte* frame = zip_size ? bpage->zip.data : ((buf_block_t*) bpage)->frame;
-
if (sync) {
thd_wait_begin(NULL, THD_WAIT_DISKIO);
}
- if (zip_size) {
- *err = fil_io(OS_FILE_READ | wake_later
- | ignore_nonexistent_pages,
- sync, space, zip_size, offset, 0, zip_size,
- frame, bpage, &bpage->write_size);
+ void* dst;
+
+ if (page_size.is_compressed()) {
+ dst = bpage->zip.data;
} else {
ut_a(buf_page_get_state(bpage) == BUF_BLOCK_FILE_PAGE);
- *err = fil_io(OS_FILE_READ | wake_later
- | ignore_nonexistent_pages,
- sync, space, 0, offset, 0, UNIV_PAGE_SIZE,
- frame, bpage,
- &bpage->write_size);
+ dst = ((buf_block_t*) bpage)->frame;
}
+ IORequest request(type | IORequest::READ);
+
+ *err = fil_io(
+ request, sync, page_id, page_size, 0, page_size.physical(),
+ dst, bpage, ignore_missing_space);
+
if (sync) {
thd_wait_end(NULL);
}
- if (*err != DB_SUCCESS) {
- if (ignore_nonexistent_pages || *err == DB_TABLESPACE_DELETED) {
+ if (UNIV_UNLIKELY(*err != DB_SUCCESS)) {
+ if (*err == DB_TABLESPACE_TRUNCATED) {
+ /* Remove the page which is outside the
+ truncated tablespace bounds when recovering
+ from a crash happened during a truncation */
+ buf_read_page_handle_error(bpage);
+ if (recv_recovery_is_on()) {
+ mutex_enter(&recv_sys->mutex);
+ ut_ad(recv_sys->n_addrs > 0);
+ recv_sys->n_addrs--;
+ mutex_exit(&recv_sys->mutex);
+ }
+ return(0);
+ } else if (IORequest::ignore_missing(type)
+ || *err == DB_TABLESPACE_DELETED) {
buf_read_page_handle_error(bpage);
return(0);
}
- /* else */
+
ut_error;
}
@@ -235,8 +222,7 @@ buf_read_page_low(
return(1);
}
-/********************************************************************//**
-Applies a random read-ahead in buf_pool if there are at least a threshold
+/** Applies a random read-ahead in buf_pool if there are at least a threshold
value of accessed pages from the random read-ahead area. Does not read any
page, not even the one at the position (space, offset), if the read-ahead
mechanism is not activated. NOTE 1: the calling thread may own latches on
@@ -245,24 +231,20 @@ end up waiting for these latches! NOTE 2: the calling thread must want
access to the page given: this rule is set to prevent unintended read-aheads
performed by ibuf routines, a situation which could result in a deadlock if
the OS does not support asynchronous i/o.
+@param[in] page_id page id of a page which the current thread
+wants to access
+@param[in] page_size page size
+@param[in] inside_ibuf TRUE if we are inside ibuf routine
@return number of page read requests issued; NOTE that if we read ibuf
pages, it may happen that the page at the given page number does not
-get read even if we return a positive value!
-@return number of page read requests issued */
-UNIV_INTERN
+get read even if we return a positive value! */
ulint
buf_read_ahead_random(
-/*==================*/
- ulint space, /*!< in: space id */
- ulint zip_size, /*!< in: compressed page size in bytes,
- or 0 */
- ulint offset, /*!< in: page number of a page which
- the current thread wants to access */
- ibool inside_ibuf) /*!< in: TRUE if we are inside ibuf
- routine */
+ const page_id_t page_id,
+ const page_size_t& page_size,
+ ibool inside_ibuf)
{
- buf_pool_t* buf_pool = buf_pool_get(space, offset);
- ib_int64_t tablespace_version;
+ buf_pool_t* buf_pool = buf_pool_get(page_id);
ulint recent_blocks = 0;
ulint ibuf_mode;
ulint count;
@@ -282,8 +264,7 @@ buf_read_ahead_random(
return(0);
}
- if (ibuf_bitmap_page(zip_size, offset)
- || trx_sys_hdr_page(space, offset)) {
+ if (ibuf_bitmap_page(page_id, page_size) || trx_sys_hdr_page(page_id)) {
/* If it is an ibuf bitmap page or trx sys hdr, we do
no read-ahead, as that could break the ibuf page access
@@ -292,19 +273,40 @@ buf_read_ahead_random(
return(0);
}
- /* Remember the tablespace version before we ask te tablespace size
+ low = (page_id.page_no() / buf_read_ahead_random_area)
+ * buf_read_ahead_random_area;
+
+ high = (page_id.page_no() / buf_read_ahead_random_area + 1)
+ * buf_read_ahead_random_area;
+
+ /* Remember the tablespace version before we ask the tablespace size
below: if DISCARD + IMPORT changes the actual .ibd file meanwhile, we
do not try to read outside the bounds of the tablespace! */
+ if (fil_space_t* space = fil_space_acquire(page_id.space())) {
- tablespace_version = fil_space_get_version(space);
+#ifdef UNIV_DEBUG
+ if (srv_file_per_table) {
+ ulint size = 0;
- low = (offset / buf_read_ahead_random_area)
- * buf_read_ahead_random_area;
- high = (offset / buf_read_ahead_random_area + 1)
- * buf_read_ahead_random_area;
- if (high > fil_space_get_size(space)) {
+ for (const fil_node_t* node =
+ UT_LIST_GET_FIRST(space->chain);
+ node != NULL;
+ node = UT_LIST_GET_NEXT(chain, node)) {
+
+ size += ulint(os_file_get_size(node->handle)
+ / page_size.physical());
+ }
+
+ ut_ad(size == space->size);
+ }
+#endif /* UNIV_DEBUG */
- high = fil_space_get_size(space);
+ if (high > space->size) {
+ high = space->size;
+ }
+ fil_space_release(space);
+ } else {
+ return(0);
}
buf_pool_mutex_enter(buf_pool);
@@ -320,10 +322,10 @@ buf_read_ahead_random(
that is, reside near the start of the LRU list. */
for (i = low; i < high; i++) {
- const buf_page_t* bpage =
- buf_page_hash_get(buf_pool, space, i);
+ const buf_page_t* bpage = buf_page_hash_get(
+ buf_pool, page_id_t(page_id.space(), i));
- if (bpage
+ if (bpage != NULL
&& buf_page_is_accessed(bpage)
&& buf_page_peek_if_young(bpage)) {
@@ -357,35 +359,28 @@ read_ahead:
/* It is only sensible to do read-ahead in the non-sync aio
mode: hence FALSE as the first parameter */
- if (!ibuf_bitmap_page(zip_size, i)) {
+ const page_id_t cur_page_id(page_id.space(), i);
+
+ if (!ibuf_bitmap_page(cur_page_id, page_size)) {
count += buf_read_page_low(
&err, false,
- ibuf_mode | OS_AIO_SIMULATED_WAKE_LATER,
- space, zip_size, FALSE,
- tablespace_version, i);
+ IORequest::DO_NOT_WAKE,
+ ibuf_mode,
+ cur_page_id, page_size, false);
- switch(err) {
+ switch (err) {
case DB_SUCCESS:
+ case DB_TABLESPACE_TRUNCATED:
case DB_ERROR:
break;
case DB_TABLESPACE_DELETED:
- ib_logf(IB_LOG_LEVEL_WARN,
- "In random"
- " readahead trying to access"
- " tablespace " ULINTPF " page " ULINTPF
- " but the tablespace does not"
- " exist or is just being dropped.",
- space, i);
- break;
- case DB_DECRYPTION_FAILED:
- case DB_PAGE_CORRUPTED:
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Random readahead failed to decrypt page or page corrupted "
- ULINTPF ":" ULINTPF " .",
- space, i);
+ ib::info() << "Random readahead trying to"
+ " access page " << cur_page_id
+ << " in nonexisting or"
+ " being-dropped tablespace";
break;
default:
- ib_logf(IB_LOG_LEVEL_FATAL, "Error %u (%s) in random readahead", err, ut_strerr(err));
+ ut_error;
}
}
}
@@ -396,14 +391,12 @@ read_ahead:
os_aio_simulated_wake_handler_threads();
-#ifdef UNIV_DEBUG
- if (buf_debug_prints && (count > 0)) {
- fprintf(stderr,
- "Random read-ahead space %lu offset %lu pages %lu\n",
- (ulong) space, (ulong) offset,
- (ulong) count);
+ if (count) {
+ DBUG_PRINT("ib_buf", ("random read-ahead %u pages, %u:%u",
+ (unsigned) count,
+ (unsigned) page_id.space(),
+ (unsigned) page_id.page_no()));
}
-#endif /* UNIV_DEBUG */
/* Read ahead is considered one I/O operation for the purpose of
LRU policy decision. */
@@ -414,56 +407,40 @@ read_ahead:
return(count);
}
-/********************************************************************//**
-High-level function which reads a page asynchronously from a file to the
+/** High-level function which reads a page asynchronously from a file to the
buffer buf_pool if it is not already there. Sets the io_fix flag and sets
an exclusive lock on the buffer frame. The flag is cleared and the x-lock
released by the i/o-handler thread.
-
-@param[in] space_id space_id
-@param[in] zip_size compressed page size in bytes, or 0
-@param[in] offset page number
-@return DB_SUCCESS if page has been read and is not corrupted,
+@param[in] page_id page id
+@param[in] page_size page size
+@retval DB_SUCCESS if the page was read and is not corrupted,
@retval DB_PAGE_CORRUPTED if page based on checksum check is corrupted,
@retval DB_DECRYPTION_FAILED if page post encryption checksum matches but
after decryption normal page checksum does not match.
@retval DB_TABLESPACE_DELETED if tablespace .ibd file is missing */
-UNIV_INTERN
dberr_t
buf_read_page(
- ulint space_id,
- ulint zip_size,
- ulint offset)
+ const page_id_t page_id,
+ const page_size_t& page_size)
{
- ib_int64_t tablespace_version;
ulint count;
dberr_t err = DB_SUCCESS;
- tablespace_version = fil_space_get_version(space_id);
+ /* We do synchronous IO because our AIO completion code
+ is sub-optimal. See buf_page_io_complete(), we have to
+ acquire the buffer pool mutex before acquiring the block
+ mutex, required for updating the page state. The acquire
+ of the buffer pool mutex becomes an expensive bottleneck. */
- FilSpace space(space_id, true);
+ count = buf_read_page_low(
+ &err, true,
+ 0, BUF_READ_ANY_PAGE, page_id, page_size, false);
- if (space()) {
-
- /* We do the i/o in the synchronous aio mode to save thread
- switches: hence TRUE */
- count = buf_read_page_low(&err, true, BUF_READ_ANY_PAGE, space_id,
- zip_size, FALSE,
- tablespace_version, offset);
-
- srv_stats.buf_pool_reads.add(count);
- }
+ srv_stats.buf_pool_reads.add(count);
- /* Page corruption and decryption failures are already reported
- in above function. */
- if (!space() || err == DB_TABLESPACE_DELETED) {
- err = DB_TABLESPACE_DELETED;
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Trying to access"
- " page [space=" ULINTPF ": page=" ULINTPF
- "] but the tablespace does not exist"
- " or is just being dropped.",
- space_id, offset);
+ if (err == DB_TABLESPACE_DELETED) {
+ ib::info() << "trying to read page " << page_id
+ << " in nonexisting or being-dropped tablespace";
}
/* Increment number of I/O operations used for LRU policy. */
@@ -472,58 +449,47 @@ buf_read_page(
return(err);
}
-/********************************************************************//**
-High-level function which reads a page asynchronously from a file to the
+/** High-level function which reads a page asynchronously from a file to the
buffer buf_pool if it is not already there. Sets the io_fix flag and sets
an exclusive lock on the buffer frame. The flag is cleared and the x-lock
released by the i/o-handler thread.
-@param[in] space Tablespace id
-@param[in] offset Page no */
-UNIV_INTERN
+@param[in] page_id page id
+@param[in] page_size page size
+@param[in] sync true if synchronous aio is desired */
void
-buf_read_page_async(
- ulint space,
- ulint offset)
+buf_read_page_background(
+ const page_id_t page_id,
+ const page_size_t& page_size,
+ bool sync)
{
- ulint zip_size;
- ib_int64_t tablespace_version;
ulint count;
- dberr_t err = DB_SUCCESS;
+ dberr_t err;
- zip_size = fil_space_get_zip_size(space);
+ count = buf_read_page_low(
+ &err, sync,
+ IORequest::DO_NOT_WAKE | IORequest::IGNORE_MISSING,
+ BUF_READ_ANY_PAGE,
+ page_id, page_size, false);
- if (zip_size == ULINT_UNDEFINED) {
- return;
- }
-
- tablespace_version = fil_space_get_version(space);
-
- count = buf_read_page_low(&err, true, BUF_READ_ANY_PAGE
- | OS_AIO_SIMULATED_WAKE_LATER
- | BUF_READ_IGNORE_NONEXISTENT_PAGES,
- space, zip_size, FALSE,
- tablespace_version, offset);
- switch(err) {
+ switch (err) {
case DB_SUCCESS:
+ case DB_TABLESPACE_TRUNCATED:
case DB_ERROR:
break;
case DB_TABLESPACE_DELETED:
- ib_logf(IB_LOG_LEVEL_ERROR,
- "In async page read "
- "trying to access "
- "page " ULINTPF ":" ULINTPF
- " in nonexisting or being-dropped tablespace",
- space, offset);
+ ib::info() << "trying to read page " << page_id
+ << " in the background"
+ " in a non-existing or being-dropped tablespace";
break;
- case DB_DECRYPTION_FAILED:
case DB_PAGE_CORRUPTED:
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Async page read failed to decrypt page or page corrupted "
- ULINTPF ":" ULINTPF ".",
- space, offset);
+ case DB_DECRYPTION_FAILED:
+ ib::error()
+ << "Background Page read failed to "
+ "read or decrypt " << page_id;
break;
default:
- ib_logf(IB_LOG_LEVEL_FATAL, "Error %u (%s) in async page read", err, ut_strerr(err));
+ ib::fatal() << "Error " << err << " in background read of "
+ << page_id;
}
srv_stats.buf_pool_reads.add(count);
@@ -536,8 +502,7 @@ buf_read_page_async(
ignore these in our heuristics. */
}
-/********************************************************************//**
-Applies linear read-ahead if in the buf_pool the page is a border page of
+/** Applies linear read-ahead if in the buf_pool the page is a border page of
a linear read-ahead area and all the pages in the area have been accessed.
Does not read any page if the read-ahead mechanism is not activated. Note
that the algorithm looks at the 'natural' adjacent successor and
@@ -559,28 +524,25 @@ latches!
NOTE 3: the calling thread must want access to the page given: this rule is
set to prevent unintended read-aheads performed by ibuf routines, a situation
which could result in a deadlock if the OS does not support asynchronous io.
-@return number of page read requests issued */
-UNIV_INTERN
+@param[in] page_id page id; see NOTE 3 above
+@param[in] page_size page size
+@param[in] inside_ibuf TRUE if we are inside ibuf routine
+@return number of page read requests issued */
ulint
buf_read_ahead_linear(
-/*==================*/
- ulint space, /*!< in: space id */
- ulint zip_size, /*!< in: compressed page size in bytes, or 0 */
- ulint offset, /*!< in: page number; see NOTE 3 above */
- ibool inside_ibuf) /*!< in: TRUE if we are inside ibuf routine */
+ const page_id_t page_id,
+ const page_size_t& page_size,
+ ibool inside_ibuf)
{
- buf_pool_t* buf_pool = buf_pool_get(space, offset);
- ib_int64_t tablespace_version;
+ buf_pool_t* buf_pool = buf_pool_get(page_id);
buf_page_t* bpage;
buf_frame_t* frame;
buf_page_t* pred_bpage = NULL;
ulint pred_offset;
ulint succ_offset;
- ulint count;
int asc_or_desc;
ulint new_offset;
ulint fail_count;
- ulint ibuf_mode;
ulint low, high;
dberr_t err = DB_SUCCESS;
ulint i;
@@ -593,24 +555,23 @@ buf_read_ahead_linear(
return(0);
}
- if (UNIV_UNLIKELY(srv_startup_is_before_trx_rollback_phase)) {
+ if (srv_startup_is_before_trx_rollback_phase) {
/* No read-ahead to avoid thread deadlocks */
return(0);
}
- low = (offset / buf_read_ahead_linear_area)
+ low = (page_id.page_no() / buf_read_ahead_linear_area)
* buf_read_ahead_linear_area;
- high = (offset / buf_read_ahead_linear_area + 1)
+ high = (page_id.page_no() / buf_read_ahead_linear_area + 1)
* buf_read_ahead_linear_area;
- if ((offset != low) && (offset != high - 1)) {
+ if ((page_id.page_no() != low) && (page_id.page_no() != high - 1)) {
/* This is not a border page of the area: return */
return(0);
}
- if (ibuf_bitmap_page(zip_size, offset)
- || trx_sys_hdr_page(space, offset)) {
+ if (ibuf_bitmap_page(page_id, page_size) || trx_sys_hdr_page(page_id)) {
/* If it is an ibuf bitmap page or trx sys hdr, we do
no read-ahead, as that could break the ibuf page access
@@ -622,18 +583,22 @@ buf_read_ahead_linear(
/* Remember the tablespace version before we ask te tablespace size
below: if DISCARD + IMPORT changes the actual .ibd file meanwhile, we
do not try to read outside the bounds of the tablespace! */
+ ulint space_size;
- tablespace_version = fil_space_get_version(space);
-
- buf_pool_mutex_enter(buf_pool);
-
- if (high > fil_space_get_size(space)) {
- buf_pool_mutex_exit(buf_pool);
- /* The area is not whole, return */
+ if (fil_space_t* space = fil_space_acquire(page_id.space())) {
+ space_size = space->size;
+ fil_space_release(space);
+ if (high > space_size) {
+ /* The area is not whole */
+ return(0);
+ }
+ } else {
return(0);
}
+ buf_pool_mutex_enter(buf_pool);
+
if (buf_pool->n_pend_reads
> buf_pool->curr_size / BUF_READ_AHEAD_PEND_LIMIT) {
buf_pool_mutex_exit(buf_pool);
@@ -647,19 +612,20 @@ buf_read_ahead_linear(
asc_or_desc = 1;
- if (offset == low) {
+ if (page_id.page_no() == low) {
asc_or_desc = -1;
}
/* How many out of order accessed pages can we ignore
when working out the access pattern for linear readahead */
- threshold = ut_min((64 - srv_read_ahead_threshold),
+ threshold = ut_min(static_cast<ulint>(64 - srv_read_ahead_threshold),
BUF_READ_AHEAD_AREA(buf_pool));
fail_count = 0;
for (i = low; i < high; i++) {
- bpage = buf_page_hash_get(buf_pool, space, i);
+ bpage = buf_page_hash_get(buf_pool,
+ page_id_t(page_id.space(), i));
if (bpage == NULL || !buf_page_is_accessed(bpage)) {
/* Not accessed */
@@ -697,7 +663,7 @@ buf_read_ahead_linear(
/* If we got this far, we know that enough pages in the area have
been accessed in the right order: linear read-ahead can be sensible */
- bpage = buf_page_hash_get(buf_pool, space, offset);
+ bpage = buf_page_hash_get(buf_pool, page_id);
if (bpage == NULL) {
buf_pool_mutex_exit(buf_pool);
@@ -728,12 +694,14 @@ buf_read_ahead_linear(
buf_pool_mutex_exit(buf_pool);
- if ((offset == low) && (succ_offset == offset + 1)) {
+ if ((page_id.page_no() == low)
+ && (succ_offset == page_id.page_no() + 1)) {
/* This is ok, we can continue */
new_offset = pred_offset;
- } else if ((offset == high - 1) && (pred_offset == offset - 1)) {
+ } else if ((page_id.page_no() == high - 1)
+ && (pred_offset == page_id.page_no() - 1)) {
/* This is ok, we can continue */
new_offset = succ_offset;
@@ -754,19 +722,19 @@ buf_read_ahead_linear(
return(0);
}
- if (high > fil_space_get_size(space)) {
+ if (high > space_size) {
/* The area is not whole, return */
return(0);
}
+ ulint count = 0;
+
/* If we got this far, read-ahead can be sensible: do it */
- ibuf_mode = inside_ibuf
- ? BUF_READ_IBUF_PAGES_ONLY | OS_AIO_SIMULATED_WAKE_LATER
- : BUF_READ_ANY_PAGE | OS_AIO_SIMULATED_WAKE_LATER;
+ ulint ibuf_mode;
- count = 0;
+ ibuf_mode = inside_ibuf ? BUF_READ_IBUF_PAGES_ONLY : BUF_READ_ANY_PAGE;
/* Since Windows XP seems to schedule the i/o handler thread
very eagerly, and consequently it does not wait for the
@@ -778,35 +746,28 @@ buf_read_ahead_linear(
/* It is only sensible to do read-ahead in the non-sync
aio mode: hence FALSE as the first parameter */
- if (!ibuf_bitmap_page(zip_size, i)) {
+ const page_id_t cur_page_id(page_id.space(), i);
+ if (!ibuf_bitmap_page(cur_page_id, page_size)) {
count += buf_read_page_low(
&err, false,
- ibuf_mode,
- space, zip_size, FALSE, tablespace_version, i);
+ IORequest::DO_NOT_WAKE,
+ ibuf_mode, cur_page_id, page_size, false);
- switch(err) {
+ switch (err) {
case DB_SUCCESS:
- case DB_ERROR:
- break;
+ case DB_TABLESPACE_TRUNCATED:
case DB_TABLESPACE_DELETED:
- ib_logf(IB_LOG_LEVEL_WARN,
- "In linear "
- " readahead trying to access "
- " tablespace " ULINTPF ":" ULINTPF
- " but the tablespace does not"
- " exist or is just being dropped.",
- space, i);
+ case DB_ERROR:
break;
- case DB_DECRYPTION_FAILED:
case DB_PAGE_CORRUPTED:
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Linear readahead failed to decrypt page or page corrupted"
- ULINTPF ":" ULINTPF ".",
- space, i);
+ case DB_DECRYPTION_FAILED:
+ ib::error() << "linear readahead failed to"
+ " read or decrypt "
+ << page_id_t(page_id.space(), i);
break;
default:
- ib_logf(IB_LOG_LEVEL_FATAL, "Error %u (%s) in linear readahead", err, ut_strerr(err));
+ ut_error;
}
}
}
@@ -817,13 +778,13 @@ buf_read_ahead_linear(
os_aio_simulated_wake_handler_threads();
-#ifdef UNIV_DEBUG
- if (buf_debug_prints && (count > 0)) {
- fprintf(stderr,
- "LINEAR read-ahead space %lu offset %lu pages %lu\n",
- (ulong) space, (ulong) offset, (ulong) count);
+ if (count) {
+ DBUG_PRINT("ib_buf", ("linear read-ahead " ULINTPF " pages, "
+ "%u:%u",
+ count,
+ page_id.space(),
+ page_id.page_no()));
}
-#endif /* UNIV_DEBUG */
/* Read ahead is considered one I/O operation for the purpose of
LRU policy decision. */
@@ -837,7 +798,6 @@ buf_read_ahead_linear(
Issues read requests for pages which the ibuf module wants to read in, in
order to contract the insert buffer tree. Technically, this function is like
a read-ahead function. */
-UNIV_INTERN
void
buf_read_ibuf_merge_pages(
/*======================*/
@@ -847,13 +807,6 @@ buf_read_ibuf_merge_pages(
to get read in, before this
function returns */
const ulint* space_ids, /*!< in: array of space ids */
- const ib_int64_t* space_versions,/*!< in: the spaces must have
- this version number
- (timestamp), otherwise we
- discard the read; we use this
- to cancel reads if DISCARD +
- IMPORT may have changed the
- tablespace size */
const ulint* page_nos, /*!< in: array of page numbers
to read, with the highest page
number the last in the
@@ -861,160 +814,162 @@ buf_read_ibuf_merge_pages(
ulint n_stored) /*!< in: number of elements
in the arrays */
{
- ulint i;
-
#ifdef UNIV_IBUF_DEBUG
ut_a(n_stored < UNIV_PAGE_SIZE);
#endif
- for (i = 0; i < n_stored; i++) {
- buf_pool_t* buf_pool;
- ulint zip_size = fil_space_get_zip_size(space_ids[i]);
- dberr_t err = DB_SUCCESS;
+ for (ulint i = 0; i < n_stored; i++) {
+ fil_space_t* space = fil_space_acquire_silent(space_ids[i]);
+ if (!space) {
+tablespace_deleted:
+ /* The tablespace was not found: remove all
+ entries for it */
+ ibuf_delete_for_discarded_space(space_ids[i]);
+ while (i + 1 < n_stored
+ && space_ids[i + 1] == space_ids[i]) {
+ i++;
+ }
+ continue;
+ }
+
+ if (UNIV_UNLIKELY(page_nos[i] >= space->size)) {
+ do {
+ ibuf_delete_recs(page_id_t(space_ids[i],
+ page_nos[i]));
+ } while (++i < n_stored
+ && space_ids[i - 1] == space_ids[i]
+ && page_nos[i] >= space->size);
+ i--;
+next:
+ fil_space_release(space);
+ continue;
+ }
+
+ const page_id_t page_id(space_ids[i], page_nos[i]);
- buf_pool = buf_pool_get(space_ids[i], page_nos[i]);
+ buf_pool_t* buf_pool = buf_pool_get(page_id);
while (buf_pool->n_pend_reads
> buf_pool->curr_size / BUF_READ_AHEAD_PEND_LIMIT) {
os_thread_sleep(500000);
}
- if (UNIV_UNLIKELY(zip_size == ULINT_UNDEFINED)) {
- goto tablespace_deleted;
- }
+ dberr_t err;
- buf_read_page_low(&err, sync && (i + 1 == n_stored),
- BUF_READ_ANY_PAGE, space_ids[i],
- zip_size, TRUE, space_versions[i],
- page_nos[i]);
+ buf_read_page_low(&err,
+ sync && (i + 1 == n_stored),
+ 0,
+ BUF_READ_ANY_PAGE, page_id,
+ page_size_t(space->flags), true);
switch(err) {
case DB_SUCCESS:
+ case DB_TABLESPACE_TRUNCATED:
case DB_ERROR:
break;
case DB_TABLESPACE_DELETED:
-
-tablespace_deleted:
- /* We have deleted or are deleting the single-table
- tablespace: remove the entries for that page */
-
- ibuf_merge_or_delete_for_page(NULL, space_ids[i],
- page_nos[i],
- zip_size, FALSE);
- break;
- case DB_DECRYPTION_FAILED:
+ fil_space_release(space);
+ goto tablespace_deleted;
case DB_PAGE_CORRUPTED:
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Failed to decrypt insert buffer page or page corrupted "
- ULINTPF ":" ULINTPF ".",
- space_ids[i], page_nos[i]);
+ case DB_DECRYPTION_FAILED:
+ ib::error() << "Failed to read or decrypt page "
+ << page_nos[i]
+ << " of '" << space->chain.start->name
+ << "' for change buffer merge";
break;
default:
- ib_logf(IB_LOG_LEVEL_FATAL, "Error %u (%s) in insert buffer read", err, ut_strerr(err));
+ ut_error;
}
+
+ goto next;
}
os_aio_simulated_wake_handler_threads();
-#ifdef UNIV_DEBUG
- if (buf_debug_prints) {
- fprintf(stderr,
- "Ibuf merge read-ahead space %lu pages %lu\n",
- (ulong) space_ids[0], (ulong) n_stored);
+ if (n_stored) {
+ DBUG_PRINT("ib_buf",
+ ("ibuf merge read-ahead %u pages, space %u",
+ unsigned(n_stored), unsigned(space_ids[0])));
}
-#endif /* UNIV_DEBUG */
}
-/********************************************************************//**
-Issues read requests for pages which recovery wants to read in. */
-UNIV_INTERN
+/** Issues read requests for pages which recovery wants to read in.
+@param[in] sync true if the caller wants this function to wait
+for the highest address page to get read in, before this function returns
+@param[in] space_id tablespace id
+@param[in] page_nos array of page numbers to read, with the
+highest page number the last in the array
+@param[in] n_stored number of page numbers in the array */
void
buf_read_recv_pages(
-/*================*/
- ibool sync, /*!< in: TRUE if the caller
- wants this function to wait
- for the highest address page
- to get read in, before this
- function returns */
- ulint space, /*!< in: space id */
- ulint zip_size, /*!< in: compressed page size in
- bytes, or 0 */
- const ulint* page_nos, /*!< in: array of page numbers
- to read, with the highest page
- number the last in the
- array */
- ulint n_stored) /*!< in: number of page numbers
- in the array */
+ bool sync,
+ ulint space_id,
+ const ulint* page_nos,
+ ulint n_stored)
{
- ib_int64_t tablespace_version;
- ulint count;
- dberr_t err = DB_SUCCESS;
- ulint i;
-
- zip_size = fil_space_get_zip_size(space);
-
- if (UNIV_UNLIKELY(zip_size == ULINT_UNDEFINED)) {
- /* It is a single table tablespace and the .ibd file is
- missing: do nothing */
+ fil_space_t* space = fil_space_get(space_id);
+ if (space == NULL) {
+ /* The tablespace is missing: do nothing */
return;
}
- tablespace_version = fil_space_get_version(space);
+ fil_space_open_if_needed(space);
+
+ const page_size_t page_size(space->flags);
- for (i = 0; i < n_stored; i++) {
- buf_pool_t* buf_pool;
+ for (ulint i = 0; i < n_stored; i++) {
+ buf_pool_t* buf_pool;
+ const page_id_t cur_page_id(space_id, page_nos[i]);
- count = 0;
+ ulint count = 0;
- os_aio_print_debug = FALSE;
- buf_pool = buf_pool_get(space, page_nos[i]);
- while (buf_pool->n_pend_reads >= recv_n_pool_free_frames / 2) {
+ buf_pool = buf_pool_get(cur_page_id);
+ ulint limit = 0;
+ for (ulint j = 0; j < buf_pool->n_chunks; j++) {
+ limit += buf_pool->chunks[j].size / 2;
+ }
+ while (buf_pool->n_pend_reads >= limit) {
os_aio_simulated_wake_handler_threads();
os_thread_sleep(10000);
count++;
- if (count > 1000) {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "waited for 10 seconds for " ULINTPF
- " pending reads to the buffer pool to"
- " be finished",
- buf_pool->n_pend_reads);
+ if (!(count % 1000)) {
- os_aio_print_debug = TRUE;
+ ib::error()
+ << "Waited for " << count / 100
+ << " seconds for "
+ << buf_pool->n_pend_reads
+ << " pending reads";
}
}
- os_aio_print_debug = FALSE;
+ dberr_t err;
- if ((i + 1 == n_stored) && sync) {
- buf_read_page_low(&err, true, BUF_READ_ANY_PAGE, space,
- zip_size, TRUE, tablespace_version,
- page_nos[i]);
+ if (sync && i + 1 == n_stored) {
+ buf_read_page_low(
+ &err, true,
+ 0,
+ BUF_READ_ANY_PAGE,
+ cur_page_id, page_size, true);
} else {
- buf_read_page_low(&err, false, BUF_READ_ANY_PAGE
- | OS_AIO_SIMULATED_WAKE_LATER,
- space, zip_size, TRUE,
- tablespace_version, page_nos[i]);
+ buf_read_page_low(
+ &err, false,
+ IORequest::DO_NOT_WAKE,
+ BUF_READ_ANY_PAGE,
+ cur_page_id, page_size, true);
}
- if (err == DB_DECRYPTION_FAILED) {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Recovery failed to decrypt read page "
- ULINTPF ":" ULINTPF ".",
- space, page_nos[i]);
+ if (err == DB_DECRYPTION_FAILED || err == DB_PAGE_CORRUPTED) {
+ ib::error() << "Recovery failed to read or decrypt "
+ << cur_page_id;
}
}
os_aio_simulated_wake_handler_threads();
-#ifdef UNIV_DEBUG
- if (buf_debug_prints) {
- fprintf(stderr,
- "Recovery applies read-ahead pages %lu\n",
- (ulong) n_stored);
- }
-#endif /* UNIV_DEBUG */
+ DBUG_PRINT("ib_buf", ("recovery read-ahead (%u pages)",
+ unsigned(n_stored)));
}
diff --git a/storage/innobase/bzip2.cmake b/storage/innobase/bzip2.cmake
new file mode 100644
index 00000000000..26fd703120e
--- /dev/null
+++ b/storage/innobase/bzip2.cmake
@@ -0,0 +1,33 @@
+# Copyright (C) 2014, SkySQL Ab. All Rights Reserved.
+#
+# This program is free software; you can redistribute it and/or modify it under
+# the terms of the GNU General Public License as published by the Free Software
+# Foundation; version 2 of the License.
+#
+# This program is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along with
+# this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA
+
+SET(WITH_INNODB_BZIP2 AUTO CACHE STRING
+ "Build with bzip2. Possible values are 'ON', 'OFF', 'AUTO' and default is 'AUTO'")
+
+MACRO (MYSQL_CHECK_BZIP2)
+ IF (WITH_INNODB_BZIP2 STREQUAL "ON" OR WITH_INNODB_BZIP2 STREQUAL "AUTO")
+ CHECK_INCLUDE_FILES(bzlib.h HAVE_BZLIB2_H)
+ CHECK_LIBRARY_EXISTS(bz2 BZ2_bzBuffToBuffCompress "" HAVE_BZLIB2_COMPRESS)
+ CHECK_LIBRARY_EXISTS(bz2 BZ2_bzBuffToBuffDecompress "" HAVE_BZLIB2_DECOMPRESS)
+
+ IF (HAVE_BZLIB2_COMPRESS AND HAVE_BZLIB2_DECOMPRESS AND HAVE_BZLIB2_H)
+ ADD_DEFINITIONS(-DHAVE_BZIP2=1)
+ LINK_LIBRARIES(bz2)
+ ELSE()
+ IF (WITH_INNODB_BZIP2 STREQUAL "ON")
+ MESSAGE(FATAL_ERROR "Required bzip2 library is not found")
+ ENDIF()
+ ENDIF()
+ ENDIF()
+ENDMACRO()
diff --git a/storage/innobase/data/data0data.cc b/storage/innobase/data/data0data.cc
index 6c01176bfdd..17126e38e42 100644
--- a/storage/innobase/data/data0data.cc
+++ b/storage/innobase/data/data0data.cc
@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 1994, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2019, MariaDB Corporation.
+Copyright (c) 2017, 2019, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -25,48 +25,34 @@ Created 5/30/1994 Heikki Tuuri
*************************************************************************/
#include "data0data.h"
-
-#ifdef UNIV_NONINL
-#include "data0data.ic"
-#endif
-
-#ifndef UNIV_HOTBACKUP
#include "rem0rec.h"
#include "rem0cmp.h"
#include "page0page.h"
#include "page0zip.h"
#include "dict0dict.h"
#include "btr0cur.h"
-
-#include <ctype.h>
-#endif /* !UNIV_HOTBACKUP */
+#include "row0upd.h"
#ifdef UNIV_DEBUG
/** Dummy variable to catch access to uninitialized fields. In the
debug version, dtuple_create() will make all fields of dtuple_t point
to data_error. */
-UNIV_INTERN byte data_error;
-
-# ifndef UNIV_DEBUG_VALGRIND
-/** this is used to fool the compiler in dtuple_validate */
-UNIV_INTERN ulint data_dummy;
-# endif /* !UNIV_DEBUG_VALGRIND */
+ut_d(byte data_error);
#endif /* UNIV_DEBUG */
-#ifndef UNIV_HOTBACKUP
-/************************************************************//**
-Compare two data tuples, respecting the collation of character fields.
-@return 1, 0 , -1 if tuple1 is greater, equal, less, respectively,
-than tuple2 */
-UNIV_INTERN
+/** Compare two data tuples.
+@param[in] tuple1 first data tuple
+@param[in] tuple2 second data tuple
+@return positive, 0, negative if tuple1 is greater, equal, less, than tuple2,
+respectively */
int
dtuple_coll_cmp(
-/*============*/
- const dtuple_t* tuple1, /*!< in: tuple 1 */
- const dtuple_t* tuple2) /*!< in: tuple 2 */
+ const dtuple_t* tuple1,
+ const dtuple_t* tuple2)
{
ulint n_fields;
ulint i;
+ int cmp;
ut_ad(tuple1 != NULL);
ut_ad(tuple2 != NULL);
@@ -77,30 +63,20 @@ dtuple_coll_cmp(
n_fields = dtuple_get_n_fields(tuple1);
- if (n_fields != dtuple_get_n_fields(tuple2)) {
-
- return(n_fields < dtuple_get_n_fields(tuple2) ? -1 : 1);
- }
+ cmp = (int) n_fields - (int) dtuple_get_n_fields(tuple2);
- for (i = 0; i < n_fields; i++) {
- int cmp;
+ for (i = 0; cmp == 0 && i < n_fields; i++) {
const dfield_t* field1 = dtuple_get_nth_field(tuple1, i);
const dfield_t* field2 = dtuple_get_nth_field(tuple2, i);
-
cmp = cmp_dfield_dfield(field1, field2);
-
- if (cmp) {
- return(cmp);
- }
}
- return(0);
+ return(cmp);
}
/*********************************************************************//**
Sets number of fields used in a tuple. Normally this is set in
dtuple_create, but if you want later to set it smaller, you can use this. */
-UNIV_INTERN
void
dtuple_set_n_fields(
/*================*/
@@ -113,20 +89,20 @@ dtuple_set_n_fields(
/**********************************************************//**
Checks that a data field is typed.
-@return TRUE if ok */
+@return TRUE if ok */
static
ibool
dfield_check_typed_no_assert(
/*=========================*/
const dfield_t* field) /*!< in: data field */
{
- if (dfield_get_type(field)->mtype > DATA_MYSQL
- || dfield_get_type(field)->mtype < DATA_VARCHAR) {
+ if (dfield_get_type(field)->mtype > DATA_MTYPE_CURRENT_MAX
+ || dfield_get_type(field)->mtype < DATA_MTYPE_CURRENT_MIN) {
+
+ ib::error() << "Data field type "
+ << dfield_get_type(field)->mtype
+ << ", len " << dfield_get_len(field);
- fprintf(stderr,
- "InnoDB: Error: data field type %lu, len %lu\n",
- (ulong) dfield_get_type(field)->mtype,
- (ulong) dfield_get_len(field));
return(FALSE);
}
@@ -135,8 +111,8 @@ dfield_check_typed_no_assert(
/**********************************************************//**
Checks that a data tuple is typed.
-@return TRUE if ok */
-UNIV_INTERN
+@return TRUE if ok */
+static
ibool
dtuple_check_typed_no_assert(
/*=========================*/
@@ -146,9 +122,8 @@ dtuple_check_typed_no_assert(
ulint i;
if (dtuple_get_n_fields(tuple) > REC_MAX_N_FIELDS) {
- fprintf(stderr,
- "InnoDB: Error: index entry has %lu fields\n",
- (ulong) dtuple_get_n_fields(tuple));
+ ib::error() << "Index entry has "
+ << dtuple_get_n_fields(tuple) << " fields";
dump:
fputs("InnoDB: Tuple contents: ", stderr);
dtuple_print(stderr, tuple);
@@ -168,27 +143,22 @@ dump:
return(TRUE);
}
-#endif /* !UNIV_HOTBACKUP */
#ifdef UNIV_DEBUG
/**********************************************************//**
Checks that a data field is typed. Asserts an error if not.
-@return TRUE if ok */
-UNIV_INTERN
+@return TRUE if ok */
ibool
dfield_check_typed(
/*===============*/
const dfield_t* field) /*!< in: data field */
{
- if (dfield_get_type(field)->mtype > DATA_MYSQL
- || dfield_get_type(field)->mtype < DATA_VARCHAR) {
+ if (dfield_get_type(field)->mtype > DATA_MTYPE_CURRENT_MAX
+ || dfield_get_type(field)->mtype < DATA_MTYPE_CURRENT_MIN) {
- fprintf(stderr,
- "InnoDB: Error: data field type %lu, len %lu\n",
- (ulong) dfield_get_type(field)->mtype,
- (ulong) dfield_get_len(field));
-
- ut_error;
+ ib::fatal() << "Data field type "
+ << dfield_get_type(field)->mtype
+ << ", len " << dfield_get_len(field);
}
return(TRUE);
@@ -196,8 +166,7 @@ dfield_check_typed(
/**********************************************************//**
Checks that a data tuple is typed. Asserts an error if not.
-@return TRUE if ok */
-UNIV_INTERN
+@return TRUE if ok */
ibool
dtuple_check_typed(
/*===============*/
@@ -219,8 +188,7 @@ dtuple_check_typed(
/**********************************************************//**
Validates the consistency of a tuple which must be complete, i.e,
all fields must have been set.
-@return TRUE if ok */
-UNIV_INTERN
+@return TRUE if ok */
ibool
dtuple_validate(
/*============*/
@@ -252,10 +220,6 @@ dtuple_validate(
ulint j;
for (j = 0; j < len; j++) {
-
- data_dummy += *data; /* fool the compiler not
- to optimize out this
- code */
data++;
}
#endif /* !UNIV_DEBUG_VALGRIND */
@@ -270,10 +234,8 @@ dtuple_validate(
}
#endif /* UNIV_DEBUG */
-#ifndef UNIV_HOTBACKUP
/*************************************************************//**
Pretty prints a dfield value according to its data type. */
-UNIV_INTERN
void
dfield_print(
/*=========*/
@@ -316,7 +278,6 @@ dfield_print(
/*************************************************************//**
Pretty prints a dfield value according to its data type. Also the hex string
is printed if a string contains non-printable characters. */
-UNIV_INTERN
void
dfield_print_also_hex(
/*==================*/
@@ -390,16 +351,16 @@ dfield_print_also_hex(
case 6:
id = mach_read_from_6(data);
- fprintf(stderr, "%llu", (ullint) id);
+ fprintf(stderr, IB_ID_FMT, id);
break;
case 7:
id = mach_read_from_7(data);
- fprintf(stderr, "%llu", (ullint) id);
+ fprintf(stderr, IB_ID_FMT, id);
break;
case 8:
id = mach_read_from_8(data);
- fprintf(stderr, "%llu", (ullint) id);
+ fprintf(stderr, IB_ID_FMT, id);
break;
default:
goto print_hex;
@@ -427,9 +388,7 @@ dfield_print_also_hex(
break;
default:
- id = mach_ull_read_compressed(data);
-
- fprintf(stderr, "mix_id " TRX_ID_FMT, id);
+ goto print_hex;
}
break;
@@ -457,7 +416,7 @@ dfield_print_also_hex(
break;
}
- data = static_cast<byte*>(dfield_get_data(dfield));
+ data = static_cast<const byte*>(dfield_get_data(dfield));
/* fall through */
case DATA_BINARY:
@@ -466,7 +425,7 @@ print_hex:
fputs(" Hex: ",stderr);
for (i = 0; i < len; i++) {
- fprintf(stderr, "%02lx", (ulint) *data++);
+ fprintf(stderr, "%02x", *data++);
}
if (dfield_is_ext(dfield)) {
@@ -486,7 +445,7 @@ dfield_print_raw(
{
ulint len = dfield_get_len(dfield);
if (!dfield_is_null(dfield)) {
- ulint print_len = ut_min(len, 1000);
+ ulint print_len = ut_min(len, static_cast<ulint>(1000));
ut_print_buf(f, dfield_get_data(dfield), print_len);
if (len != print_len) {
fprintf(f, "(total %lu bytes%s)",
@@ -500,7 +459,6 @@ dfield_print_raw(
/**********************************************************//**
The following function prints the contents of a tuple. */
-UNIV_INTERN
void
dtuple_print(
/*=========*/
@@ -526,6 +484,62 @@ dtuple_print(
ut_ad(dtuple_validate(tuple));
}
+/** Print the contents of a tuple.
+@param[out] o output stream
+@param[in] field array of data fields
+@param[in] n number of data fields */
+void
+dfield_print(
+ std::ostream& o,
+ const dfield_t* field,
+ ulint n)
+{
+ for (ulint i = 0; i < n; i++, field++) {
+ const void* data = dfield_get_data(field);
+ const ulint len = dfield_get_len(field);
+
+ if (i) {
+ o << ',';
+ }
+
+ if (dfield_is_null(field)) {
+ o << "NULL";
+ } else if (dfield_is_ext(field)) {
+ ulint local_len = len - BTR_EXTERN_FIELD_REF_SIZE;
+ ut_ad(len >= BTR_EXTERN_FIELD_REF_SIZE);
+
+ o << '['
+ << local_len
+ << '+' << BTR_EXTERN_FIELD_REF_SIZE << ']';
+ ut_print_buf(o, data, local_len);
+ ut_print_buf_hex(o, static_cast<const byte*>(data)
+ + local_len,
+ BTR_EXTERN_FIELD_REF_SIZE);
+ } else {
+ o << '[' << len << ']';
+ ut_print_buf(o, data, len);
+ }
+ }
+}
+
+/** Print the contents of a tuple.
+@param[out] o output stream
+@param[in] tuple data tuple */
+void
+dtuple_print(
+ std::ostream& o,
+ const dtuple_t* tuple)
+{
+ const ulint n = dtuple_get_n_fields(tuple);
+
+ o << "TUPLE (info_bits=" << dtuple_get_info_bits(tuple)
+ << ", " << n << " fields): {";
+
+ dfield_print(o, tuple->fields, n);
+
+ o << "}";
+}
+
/**************************************************************//**
Moves parts of long fields in entry to the big record vector so that
the size of tuple drops below the maximum record size allowed in the
@@ -534,11 +548,11 @@ to determine uniquely the insertion place of the tuple in the index.
@return own: created big record vector, NULL if we are not able to
shorten the entry enough, i.e., if there are too many fixed-length or
short fields in entry or the index is clustered */
-UNIV_INTERN
big_rec_t*
dtuple_convert_big_rec(
/*===================*/
dict_index_t* index, /*!< in: index */
+ upd_t* upd, /*!< in/out: update vector */
dtuple_t* entry, /*!< in/out: index entry */
ulint* n_ext) /*!< in/out: number of
externally stored columns */
@@ -549,30 +563,20 @@ dtuple_convert_big_rec(
dict_field_t* ifield;
ulint size;
ulint n_fields;
- ulint local_len;
ulint local_prefix_len;
if (!dict_index_is_clust(index)) {
return(NULL);
}
- if (dict_table_get_format(index->table) < UNIV_FORMAT_B) {
- /* up to MySQL 5.1: store a 768-byte prefix locally */
- local_len = BTR_EXTERN_FIELD_REF_SIZE
- + DICT_ANTELOPE_MAX_INDEX_COL_LEN;
- } else {
- /* new-format table: do not store any BLOB prefix locally */
- local_len = BTR_EXTERN_FIELD_REF_SIZE;
- }
+ const ulint local_len = index->table->get_overflow_field_local_len();
ut_a(dtuple_check_typed_no_assert(entry));
size = rec_get_converted_size(index, entry, *n_ext);
if (UNIV_UNLIKELY(size > 1000000000)) {
- fprintf(stderr,
- "InnoDB: Warning: tuple size very big: %lu\n",
- (ulong) size);
+ ib::warn() << "Tuple size is very big: " << size;
fputs("InnoDB: Tuple contents: ", stderr);
dtuple_print(stderr, entry);
putc('\n', stderr);
@@ -581,15 +585,7 @@ dtuple_convert_big_rec(
heap = mem_heap_create(size + dtuple_get_n_fields(entry)
* sizeof(big_rec_field_t) + 1000);
- vector = static_cast<big_rec_t*>(
- mem_heap_alloc(heap, sizeof(big_rec_t)));
-
- vector->heap = heap;
-
- vector->fields = static_cast<big_rec_field_t*>(
- mem_heap_alloc(
- heap,
- dtuple_get_n_fields(entry) * sizeof(big_rec_field_t)));
+ vector = big_rec_t::alloc(heap, dtuple_get_n_fields(entry));
/* Decide which fields to shorten: the algorithm is to look for
a variable-length field that yields the biggest savings when
@@ -601,12 +597,12 @@ dtuple_convert_big_rec(
*n_ext),
dict_table_is_comp(index->table),
dict_index_get_n_fields(index),
- dict_table_zip_size(index->table))) {
+ dict_table_page_size(index->table))) {
+
ulint i;
ulint longest = 0;
ulint longest_i = ULINT_MAX;
byte* data;
- big_rec_field_t* b;
for (i = dict_index_get_n_unique_in_tree(index);
i < dtuple_get_n_fields(entry); i++) {
@@ -623,7 +619,7 @@ dtuple_convert_big_rec(
|| dfield_is_ext(dfield)
|| dfield_get_len(dfield) <= local_len
|| dfield_get_len(dfield)
- <= BTR_EXTERN_FIELD_REF_SIZE * 2) {
+ <= BTR_EXTERN_LOCAL_STORED_MAX_SIZE) {
goto skip_field;
}
@@ -644,8 +640,7 @@ dtuple_convert_big_rec(
there we always store locally columns whose
length is up to local_len == 788 bytes.
@see rec_init_offsets_comp_ordinary */
- if (ifield->col->mtype != DATA_BLOB
- && ifield->col->len < 256) {
+ if (!DATA_BIG_COL(ifield->col)) {
goto skip_field;
}
@@ -674,10 +669,12 @@ skip_field:
ifield = dict_index_get_nth_field(index, longest_i);
local_prefix_len = local_len - BTR_EXTERN_FIELD_REF_SIZE;
- b = &vector->fields[n_fields];
- b->field_no = longest_i;
- b->len = dfield_get_len(dfield) - local_prefix_len;
- b->data = (char*) dfield_get_data(dfield) + local_prefix_len;
+ vector->append(
+ big_rec_field_t(
+ longest_i,
+ dfield_get_len(dfield) - local_prefix_len,
+ static_cast<char*>(dfield_get_data(dfield))
+ + local_prefix_len));
/* Allocate the locally stored part of the column. */
data = static_cast<byte*>(mem_heap_alloc(heap, local_len));
@@ -701,9 +698,30 @@ skip_field:
n_fields++;
(*n_ext)++;
ut_ad(n_fields < dtuple_get_n_fields(entry));
+
+ if (upd && !upd->is_modified(longest_i)) {
+
+ DEBUG_SYNC_C("ib_mv_nonupdated_column_offpage");
+
+ upd_field_t upd_field;
+ upd_field.field_no = unsigned(longest_i);
+ upd_field.orig_len = 0;
+ upd_field.exp = NULL;
+ upd_field.old_v_val = NULL;
+ dfield_copy(&upd_field.new_val,
+ dfield->clone(upd->heap));
+ upd->append(upd_field);
+ ut_ad(upd->is_modified(longest_i));
+
+ ut_ad(upd_field.new_val.len
+ >= BTR_EXTERN_FIELD_REF_SIZE);
+ ut_ad(upd_field.new_val.len == local_len);
+ ut_ad(upd_field.new_val.len == dfield_get_len(dfield));
+ }
}
- vector->n_fields = n_fields;
+ ut_ad(n_fields == vector->n_fields);
+
return(vector);
}
@@ -711,7 +729,6 @@ skip_field:
Puts back to entry the data stored in vector. Note that to ensure the
fields in entry can accommodate the data, vector must have been created
from entry with dtuple_convert_big_rec. */
-UNIV_INTERN
void
dtuple_convert_back_big_rec(
/*========================*/
@@ -747,4 +764,54 @@ dtuple_convert_back_big_rec(
mem_heap_free(vector->heap);
}
-#endif /* !UNIV_HOTBACKUP */
+
+/** Allocate a big_rec_t object in the given memory heap, and for storing
+n_fld number of fields.
+@param[in] heap memory heap in which this object is allocated
+@param[in] n_fld maximum number of fields that can be stored in
+ this object
+
+@return the allocated object */
+big_rec_t*
+big_rec_t::alloc(
+ mem_heap_t* heap,
+ ulint n_fld)
+{
+ big_rec_t* rec = static_cast<big_rec_t*>(
+ mem_heap_alloc(heap, sizeof(big_rec_t)));
+
+ new(rec) big_rec_t(n_fld);
+
+ rec->heap = heap;
+ rec->fields = static_cast<big_rec_field_t*>(
+ mem_heap_alloc(heap,
+ n_fld * sizeof(big_rec_field_t)));
+
+ rec->n_fields = 0;
+ return(rec);
+}
+
+/** Create a deep copy of this object.
+@param[in,out] heap memory heap in which the clone will be created
+@return the cloned object */
+dfield_t*
+dfield_t::clone(mem_heap_t* heap) const
+{
+ const ulint size = len == UNIV_SQL_NULL ? 0 : len;
+ dfield_t* obj = static_cast<dfield_t*>(
+ mem_heap_alloc(heap, sizeof(dfield_t) + size));
+
+ obj->ext = ext;
+ obj->len = len;
+ obj->type = type;
+ obj->spatial_status = spatial_status;
+
+ if (len != UNIV_SQL_NULL) {
+ obj->data = obj + 1;
+ memcpy(obj->data, data, len);
+ } else {
+ obj->data = 0;
+ }
+
+ return(obj);
+}
diff --git a/storage/innobase/data/data0type.cc b/storage/innobase/data/data0type.cc
index 715649f471e..953a59102c0 100644
--- a/storage/innobase/data/data0type.cc
+++ b/storage/innobase/data/data0type.cc
@@ -1,7 +1,7 @@
/*****************************************************************************
-Copyright (c) 1996, 2011, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2018, MariaDB Corporation.
+Copyright (c) 1996, 2015, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2017, 2018, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -26,26 +26,18 @@ Created 1/16/1996 Heikki Tuuri
#include "data0type.h"
-#ifdef UNIV_NONINL
-#include "data0type.ic"
-#endif
-
-#ifndef UNIV_HOTBACKUP
-# include "ha_prototypes.h"
-
/* At the database startup we store the default-charset collation number of
this MySQL installation to this global variable. If we have < 4.1.2 format
column definitions, or records in the insert buffer, we use this
charset-collation code for them. */
-UNIV_INTERN ulint data_mysql_default_charset_coll;
+ulint data_mysql_default_charset_coll;
/*********************************************************************//**
Determine how many bytes the first n characters of the given string occupy.
If the string is shorter than n characters, returns the number of bytes
the characters in the string occupy.
-@return length of the prefix, in bytes */
-UNIV_INTERN
+@return length of the prefix, in bytes */
ulint
dtype_get_at_most_n_mbchars(
/*========================*/
@@ -79,13 +71,11 @@ dtype_get_at_most_n_mbchars(
return(data_len);
}
-#endif /* UNIV_HOTBACKUP */
/*********************************************************************//**
Checks if a data main type is a string type. Also a BLOB is considered a
string type.
-@return TRUE if string type */
-UNIV_INTERN
+@return TRUE if string type */
ibool
dtype_is_string_type(
/*=================*/
@@ -105,8 +95,7 @@ dtype_is_string_type(
Checks if a type is a binary string type. Note that for tables created with
< 4.0.14, we do not know if a DATA_BLOB column is a BLOB or a TEXT column. For
those DATA_BLOB columns this function currently returns FALSE.
-@return TRUE if binary string type */
-UNIV_INTERN
+@return TRUE if binary string type */
ibool
dtype_is_binary_string_type(
/*========================*/
@@ -128,8 +117,7 @@ Checks if a type is a non-binary string type. That is, dtype_is_string_type is
TRUE and dtype_is_binary_string_type is FALSE. Note that for tables created
with < 4.0.14, we do not know if a DATA_BLOB column is a BLOB or a TEXT column.
For those DATA_BLOB columns this function currently returns TRUE.
-@return TRUE if non-binary string type */
-UNIV_INTERN
+@return TRUE if non-binary string type */
ibool
dtype_is_non_binary_string_type(
/*============================*/
@@ -146,27 +134,8 @@ dtype_is_non_binary_string_type(
}
/*********************************************************************//**
-Forms a precise type from the < 4.1.2 format precise type plus the
-charset-collation code.
-@return precise type, including the charset-collation code */
-UNIV_INTERN
-ulint
-dtype_form_prtype(
-/*==============*/
- ulint old_prtype, /*!< in: the MySQL type code and the flags
- DATA_BINARY_TYPE etc. */
- ulint charset_coll) /*!< in: MySQL charset-collation code */
-{
- ut_a(old_prtype < 256 * 256);
- ut_a(charset_coll <= MAX_CHAR_COLL_NUM);
-
- return(old_prtype + (charset_coll << 16));
-}
-
-/*********************************************************************//**
Validates a data type structure.
-@return TRUE if ok */
-UNIV_INTERN
+@return TRUE if ok */
ibool
dtype_validate(
/*===========*/
@@ -174,27 +143,22 @@ dtype_validate(
{
ut_a(type);
ut_a(type->mtype >= DATA_VARCHAR);
- ut_a(type->mtype <= DATA_MYSQL);
+ ut_a(type->mtype <= DATA_MTYPE_MAX);
if (type->mtype == DATA_SYS) {
ut_a((type->prtype & DATA_MYSQL_TYPE_MASK) < DATA_N_SYS_COLS);
}
-#ifndef UNIV_HOTBACKUP
ut_a(dtype_get_mbminlen(type) <= dtype_get_mbmaxlen(type));
-#endif /* !UNIV_HOTBACKUP */
return(TRUE);
}
-#ifndef UNIV_HOTBACKUP
-/*********************************************************************//**
-Prints a data type structure. */
-UNIV_INTERN
+#ifdef UNIV_DEBUG
+/** Print a data type structure.
+@param[in] type data type */
void
-dtype_print(
-/*========*/
- const dtype_t* type) /*!< in: type */
+dtype_print(const dtype_t* type)
{
ulint mtype;
ulint prtype;
@@ -226,6 +190,10 @@ dtype_print(
fputs("DATA_BLOB", stderr);
break;
+ case DATA_GEOMETRY:
+ fputs("DATA_GEOMETRY", stderr);
+ break;
+
case DATA_INT:
fputs("DATA_INT", stderr);
break;
@@ -295,4 +263,4 @@ dtype_print(
fprintf(stderr, " len %lu", (ulong) len);
}
-#endif /* !UNIV_HOTBACKUP */
+#endif /* UNIV_DEBUG */
diff --git a/storage/innobase/dict/dict0boot.cc b/storage/innobase/dict/dict0boot.cc
index bd8e6a5af88..9294cf6263c 100644
--- a/storage/innobase/dict/dict0boot.cc
+++ b/storage/innobase/dict/dict0boot.cc
@@ -1,7 +1,7 @@
/*****************************************************************************
-Copyright (c) 1996, 2012, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2016, MariaDB Corporation.
+Copyright (c) 1996, 2017, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2016, 2019, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -25,11 +25,6 @@ Created 4/18/1996 Heikki Tuuri
*******************************************************/
#include "dict0boot.h"
-
-#ifdef UNIV_NONINL
-#include "dict0boot.ic"
-#endif
-
#include "dict0crea.h"
#include "btr0btr.h"
#include "dict0load.h"
@@ -42,8 +37,7 @@ Created 4/18/1996 Heikki Tuuri
/**********************************************************************//**
Gets a pointer to the dictionary header and x-latches its page.
-@return pointer to the dictionary header, page x-latched */
-UNIV_INTERN
+@return pointer to the dictionary header, page x-latched */
dict_hdr_t*
dict_hdr_get(
/*=========*/
@@ -52,8 +46,8 @@ dict_hdr_get(
buf_block_t* block;
dict_hdr_t* header;
- block = buf_page_get(DICT_HDR_SPACE, 0, DICT_HDR_PAGE_NO,
- RW_X_LATCH, mtr);
+ block = buf_page_get(page_id_t(DICT_HDR_SPACE, DICT_HDR_PAGE_NO),
+ univ_page_size, RW_X_LATCH, mtr);
header = DICT_HDR + buf_block_get_frame(block);
buf_block_dbg_add_level(block, SYNC_DICT_HEADER);
@@ -63,23 +57,59 @@ dict_hdr_get(
/**********************************************************************//**
Returns a new table, index, or space id. */
-UNIV_INTERN
void
dict_hdr_get_new_id(
/*================*/
- table_id_t* table_id, /*!< out: table id
- (not assigned if NULL) */
- index_id_t* index_id, /*!< out: index id
- (not assigned if NULL) */
- ulint* space_id) /*!< out: space id
- (not assigned if NULL) */
+ table_id_t* table_id, /*!< out: table id
+ (not assigned if NULL) */
+ index_id_t* index_id, /*!< out: index id
+ (not assigned if NULL) */
+ ulint* space_id, /*!< out: space id
+ (not assigned if NULL) */
+ const dict_table_t* table, /*!< in: table */
+ bool disable_redo) /*!< in: if true and table
+ object is NULL
+ then disable-redo */
{
dict_hdr_t* dict_hdr;
ib_id_t id;
mtr_t mtr;
mtr_start(&mtr);
+ if (table) {
+ if (table->is_temporary()) {
+ mtr.set_log_mode(MTR_LOG_NO_REDO);
+ }
+ } else if (disable_redo) {
+ /* In non-read-only mode we need to ensure that space-id header
+ page is written to disk else if page is removed from buffer
+ cache and re-loaded it would assign temporary tablespace id
+ to another tablespace.
+ This is not a case with read-only mode as there is no new object
+ that is created except temporary tablespace. */
+ mtr.set_log_mode(srv_read_only_mode
+ ? MTR_LOG_NONE : MTR_LOG_NO_REDO);
+ }
+ /* Server started and let's say space-id = x
+ - table created with file-per-table
+ - space-id = x + 1
+ - crash
+ Case 1: If it was redo logged then we know that it will be
+ restored to x + 1
+ Case 2: if not redo-logged
+ Header will have the old space-id = x
+ This is OK because on restart there is no object with
+ space id = x + 1
+ Case 3:
+ space-id = x (on start)
+ space-id = x+1 (temp-table allocation) - no redo logging
+ space-id = x+2 (non-temp-table allocation), this get's
+ redo logged.
+ If there is a crash there will be only 2 entries
+ x (original) and x+2 (new) and disk hdr will be updated
+ to reflect x + 2 entry.
+ We cannot allocate the same space id to different objects. */
dict_hdr = dict_hdr_get(&mtr);
if (table_id) {
@@ -97,8 +127,7 @@ dict_hdr_get_new_id(
}
if (space_id) {
- *space_id = mtr_read_ulint(dict_hdr + DICT_HDR_MAX_SPACE_ID,
- MLOG_4BYTES, &mtr);
+ *space_id = mach_read_from_4(dict_hdr + DICT_HDR_MAX_SPACE_ID);
if (fil_assign_new_space_id(space_id)) {
mlog_write_ulint(dict_hdr + DICT_HDR_MAX_SPACE_ID,
*space_id, MLOG_4BYTES, &mtr);
@@ -111,7 +140,6 @@ dict_hdr_get_new_id(
/**********************************************************************//**
Writes the current value of the row id counter to the dictionary header file
page. */
-UNIV_INTERN
void
dict_hdr_flush_row_id(void)
/*=======================*/
@@ -120,7 +148,7 @@ dict_hdr_flush_row_id(void)
row_id_t id;
mtr_t mtr;
- ut_ad(mutex_own(&(dict_sys->mutex)));
+ ut_ad(mutex_own(&dict_sys->mutex));
id = dict_sys->row_id;
@@ -136,7 +164,7 @@ dict_hdr_flush_row_id(void)
/*****************************************************************//**
Creates the file page for the dictionary header. This function is
called only at the database creation.
-@return TRUE if succeed */
+@return TRUE if succeed */
static
ibool
dict_hdr_create(
@@ -154,7 +182,7 @@ dict_hdr_create(
block = fseg_create(DICT_HDR_SPACE, 0,
DICT_HDR + DICT_HDR_FSEG_HEADER, mtr);
- ut_a(DICT_HDR_PAGE_NO == buf_block_get_page_no(block));
+ ut_a(DICT_HDR_PAGE_NO == block->page.id.page_no());
dict_header = dict_hdr_get(mtr);
@@ -169,8 +197,7 @@ dict_hdr_create(
mlog_write_ull(dict_header + DICT_HDR_INDEX_ID,
DICT_HDR_FIRST_ID, mtr);
- mlog_write_ulint(dict_header + DICT_HDR_MAX_SPACE_ID,
- 0, MLOG_4BYTES, mtr);
+ ut_ad(mach_read_from_4(dict_header + DICT_HDR_MAX_SPACE_ID) == 0);
/* Obsolete, but we must initialize it anyway. */
mlog_write_ulint(dict_header + DICT_HDR_MIX_ID_LOW,
@@ -180,9 +207,9 @@ dict_hdr_create(
system tables */
/*--------------------------*/
- root_page_no = btr_create(DICT_CLUSTERED | DICT_UNIQUE,
- DICT_HDR_SPACE, 0, DICT_TABLES_ID,
- dict_ind_redundant, mtr);
+ root_page_no = btr_create(DICT_CLUSTERED | DICT_UNIQUE, DICT_HDR_SPACE,
+ univ_page_size, DICT_TABLES_ID,
+ dict_ind_redundant, NULL, mtr);
if (root_page_no == FIL_NULL) {
return(FALSE);
@@ -191,9 +218,9 @@ dict_hdr_create(
mlog_write_ulint(dict_header + DICT_HDR_TABLES, root_page_no,
MLOG_4BYTES, mtr);
/*--------------------------*/
- root_page_no = btr_create(DICT_UNIQUE, DICT_HDR_SPACE, 0,
- DICT_TABLE_IDS_ID,
- dict_ind_redundant, mtr);
+ root_page_no = btr_create(DICT_UNIQUE, DICT_HDR_SPACE,
+ univ_page_size, DICT_TABLE_IDS_ID,
+ dict_ind_redundant, NULL, mtr);
if (root_page_no == FIL_NULL) {
return(FALSE);
@@ -202,9 +229,9 @@ dict_hdr_create(
mlog_write_ulint(dict_header + DICT_HDR_TABLE_IDS, root_page_no,
MLOG_4BYTES, mtr);
/*--------------------------*/
- root_page_no = btr_create(DICT_CLUSTERED | DICT_UNIQUE,
- DICT_HDR_SPACE, 0, DICT_COLUMNS_ID,
- dict_ind_redundant, mtr);
+ root_page_no = btr_create(DICT_CLUSTERED | DICT_UNIQUE, DICT_HDR_SPACE,
+ univ_page_size, DICT_COLUMNS_ID,
+ dict_ind_redundant, NULL, mtr);
if (root_page_no == FIL_NULL) {
return(FALSE);
@@ -213,9 +240,9 @@ dict_hdr_create(
mlog_write_ulint(dict_header + DICT_HDR_COLUMNS, root_page_no,
MLOG_4BYTES, mtr);
/*--------------------------*/
- root_page_no = btr_create(DICT_CLUSTERED | DICT_UNIQUE,
- DICT_HDR_SPACE, 0, DICT_INDEXES_ID,
- dict_ind_redundant, mtr);
+ root_page_no = btr_create(DICT_CLUSTERED | DICT_UNIQUE, DICT_HDR_SPACE,
+ univ_page_size, DICT_INDEXES_ID,
+ dict_ind_redundant, NULL, mtr);
if (root_page_no == FIL_NULL) {
return(FALSE);
@@ -224,9 +251,9 @@ dict_hdr_create(
mlog_write_ulint(dict_header + DICT_HDR_INDEXES, root_page_no,
MLOG_4BYTES, mtr);
/*--------------------------*/
- root_page_no = btr_create(DICT_CLUSTERED | DICT_UNIQUE,
- DICT_HDR_SPACE, 0, DICT_FIELDS_ID,
- dict_ind_redundant, mtr);
+ root_page_no = btr_create(DICT_CLUSTERED | DICT_UNIQUE, DICT_HDR_SPACE,
+ univ_page_size, DICT_FIELDS_ID,
+ dict_ind_redundant, NULL, mtr);
if (root_page_no == FIL_NULL) {
return(FALSE);
@@ -243,7 +270,6 @@ dict_hdr_create(
Initializes the data dictionary memory structures when the database is
started. This function is also called when the data dictionary is created.
@return DB_SUCCESS or error code. */
-UNIV_INTERN
dberr_t
dict_boot(void)
/*===========*/
@@ -263,8 +289,8 @@ dict_boot(void)
ut_ad(DICT_NUM_FIELDS__SYS_TABLE_IDS == 2);
ut_ad(DICT_NUM_COLS__SYS_COLUMNS == 7);
ut_ad(DICT_NUM_FIELDS__SYS_COLUMNS == 9);
- ut_ad(DICT_NUM_COLS__SYS_INDEXES == 7);
- ut_ad(DICT_NUM_FIELDS__SYS_INDEXES == 9);
+ ut_ad(DICT_NUM_COLS__SYS_INDEXES == 8);
+ ut_ad(DICT_NUM_FIELDS__SYS_INDEXES == 10);
ut_ad(DICT_NUM_COLS__SYS_FIELDS == 3);
ut_ad(DICT_NUM_FIELDS__SYS_FIELDS == 5);
ut_ad(DICT_NUM_COLS__SYS_FOREIGN == 4);
@@ -280,7 +306,7 @@ dict_boot(void)
heap = mem_heap_create(450);
- mutex_enter(&(dict_sys->mutex));
+ mutex_enter(&dict_sys->mutex);
/* Get the dictionary header */
dict_hdr = dict_hdr_get(&mtr);
@@ -302,10 +328,11 @@ dict_boot(void)
/* Insert into the dictionary cache the descriptions of the basic
system tables */
/*-------------------------*/
- table = dict_mem_table_create("SYS_TABLES", DICT_HDR_SPACE, 8, 0, 0);
+ table = dict_mem_table_create("SYS_TABLES", DICT_HDR_SPACE, 8, 0, 0, 0);
- dict_mem_table_add_col(table, heap, "NAME", DATA_BINARY, 0, 0);
- dict_mem_table_add_col(table, heap, "ID", DATA_BINARY, 0, 0);
+ dict_mem_table_add_col(table, heap, "NAME", DATA_BINARY, 0,
+ MAX_FULL_NAME_LEN);
+ dict_mem_table_add_col(table, heap, "ID", DATA_BINARY, 0, 8);
/* ROW_FORMAT = (N_COLS >> 31) ? COMPACT : REDUNDANT */
dict_mem_table_add_col(table, heap, "N_COLS", DATA_INT, 0, 4);
/* The low order bit of TYPE is always set to 1. If the format
@@ -313,8 +340,7 @@ dict_boot(void)
dict_mem_table_add_col(table, heap, "TYPE", DATA_INT, 0, 4);
dict_mem_table_add_col(table, heap, "MIX_ID", DATA_BINARY, 0, 0);
/* MIX_LEN may contain additional table flags when
- ROW_FORMAT!=REDUNDANT. Currently, these flags include
- DICT_TF2_TEMPORARY. */
+ ROW_FORMAT!=REDUNDANT. */
dict_mem_table_add_col(table, heap, "MIX_LEN", DATA_INT, 0, 4);
dict_mem_table_add_col(table, heap, "CLUSTER_NAME", DATA_BINARY, 0, 0);
dict_mem_table_add_col(table, heap, "SPACE", DATA_INT, 0, 4);
@@ -334,10 +360,8 @@ dict_boot(void)
index->id = DICT_TABLES_ID;
error = dict_index_add_to_cache(table, index,
- mtr_read_ulint(dict_hdr
- + DICT_HDR_TABLES,
- MLOG_4BYTES, &mtr),
- FALSE);
+ mach_read_from_4(dict_hdr
+ + DICT_HDR_TABLES));
ut_a(error == DB_SUCCESS);
/*-------------------------*/
@@ -346,17 +370,15 @@ dict_boot(void)
dict_mem_index_add_field(index, "ID", 0);
index->id = DICT_TABLE_IDS_ID;
- error = dict_index_add_to_cache(table, index,
- mtr_read_ulint(dict_hdr
- + DICT_HDR_TABLE_IDS,
- MLOG_4BYTES, &mtr),
- FALSE);
+ error = dict_index_add_to_cache(
+ table, index, mach_read_from_4(dict_hdr + DICT_HDR_TABLE_IDS));
ut_a(error == DB_SUCCESS);
/*-------------------------*/
- table = dict_mem_table_create("SYS_COLUMNS", DICT_HDR_SPACE, 7, 0, 0);
+ table = dict_mem_table_create("SYS_COLUMNS", DICT_HDR_SPACE,
+ 7, 0, 0, 0);
- dict_mem_table_add_col(table, heap, "TABLE_ID", DATA_BINARY, 0, 0);
+ dict_mem_table_add_col(table, heap, "TABLE_ID", DATA_BINARY, 0, 8);
dict_mem_table_add_col(table, heap, "POS", DATA_INT, 0, 4);
dict_mem_table_add_col(table, heap, "NAME", DATA_BINARY, 0, 0);
dict_mem_table_add_col(table, heap, "MTYPE", DATA_INT, 0, 4);
@@ -379,22 +401,22 @@ dict_boot(void)
index->id = DICT_COLUMNS_ID;
error = dict_index_add_to_cache(table, index,
- mtr_read_ulint(dict_hdr
- + DICT_HDR_COLUMNS,
- MLOG_4BYTES, &mtr),
- FALSE);
+ mach_read_from_4(dict_hdr
+ + DICT_HDR_COLUMNS));
ut_a(error == DB_SUCCESS);
/*-------------------------*/
- table = dict_mem_table_create("SYS_INDEXES", DICT_HDR_SPACE, 7, 0, 0);
+ table = dict_mem_table_create("SYS_INDEXES", DICT_HDR_SPACE,
+ DICT_NUM_COLS__SYS_INDEXES, 0, 0, 0);
- dict_mem_table_add_col(table, heap, "TABLE_ID", DATA_BINARY, 0, 0);
- dict_mem_table_add_col(table, heap, "ID", DATA_BINARY, 0, 0);
+ dict_mem_table_add_col(table, heap, "TABLE_ID", DATA_BINARY, 0, 8);
+ dict_mem_table_add_col(table, heap, "ID", DATA_BINARY, 0, 8);
dict_mem_table_add_col(table, heap, "NAME", DATA_BINARY, 0, 0);
dict_mem_table_add_col(table, heap, "N_FIELDS", DATA_INT, 0, 4);
dict_mem_table_add_col(table, heap, "TYPE", DATA_INT, 0, 4);
dict_mem_table_add_col(table, heap, "SPACE", DATA_INT, 0, 4);
dict_mem_table_add_col(table, heap, "PAGE_NO", DATA_INT, 0, 4);
+ dict_mem_table_add_col(table, heap, "MERGE_THRESHOLD", DATA_INT, 0, 4);
table->id = DICT_INDEXES_ID;
@@ -411,16 +433,14 @@ dict_boot(void)
index->id = DICT_INDEXES_ID;
error = dict_index_add_to_cache(table, index,
- mtr_read_ulint(dict_hdr
- + DICT_HDR_INDEXES,
- MLOG_4BYTES, &mtr),
- FALSE);
+ mach_read_from_4(dict_hdr
+ + DICT_HDR_INDEXES));
ut_a(error == DB_SUCCESS);
/*-------------------------*/
- table = dict_mem_table_create("SYS_FIELDS", DICT_HDR_SPACE, 3, 0, 0);
+ table = dict_mem_table_create("SYS_FIELDS", DICT_HDR_SPACE, 3, 0, 0, 0);
- dict_mem_table_add_col(table, heap, "INDEX_ID", DATA_BINARY, 0, 0);
+ dict_mem_table_add_col(table, heap, "INDEX_ID", DATA_BINARY, 0, 8);
dict_mem_table_add_col(table, heap, "POS", DATA_INT, 0, 4);
dict_mem_table_add_col(table, heap, "COL_NAME", DATA_BINARY, 0, 0);
@@ -439,10 +459,8 @@ dict_boot(void)
index->id = DICT_FIELDS_ID;
error = dict_index_add_to_cache(table, index,
- mtr_read_ulint(dict_hdr
- + DICT_HDR_FIELDS,
- MLOG_4BYTES, &mtr),
- FALSE);
+ mach_read_from_4(dict_hdr
+ + DICT_HDR_FIELDS));
ut_a(error == DB_SUCCESS);
mtr_commit(&mtr);
@@ -456,20 +474,20 @@ dict_boot(void)
err = ibuf_init_at_db_start();
if (err == DB_SUCCESS) {
- if (srv_read_only_mode && !ibuf_is_empty()) {
+ if (srv_read_only_mode
+ && srv_force_recovery != SRV_FORCE_NO_LOG_REDO
+ && !ibuf_is_empty()) {
if (srv_force_recovery < SRV_FORCE_NO_IBUF_MERGE) {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Change buffer must be empty when --innodb-read-only "
- "is set! "
- "You can try to recover the database with innodb_force_recovery=5");
+ ib::error() << "Change buffer must be empty when"
+ " --innodb-read-only is set!"
+ "You can try to recover the database with innodb_force_recovery=5";
err = DB_ERROR;
} else {
- ib_logf(IB_LOG_LEVEL_WARN,
- "Change buffer not empty when --innodb-read-only "
- "is set! but srv_force_recovery = %lu, ignoring.",
- srv_force_recovery);
+ ib::warn() << "Change buffer not empty when --innodb-read-only "
+ "is set! but srv_force_recovery = " << srv_force_recovery
+ << " , ignoring.";
}
}
@@ -481,10 +499,10 @@ dict_boot(void)
dict_load_sys_table(dict_sys->sys_indexes);
dict_load_sys_table(dict_sys->sys_fields);
}
-
- mutex_exit(&(dict_sys->mutex));
}
+ mutex_exit(&dict_sys->mutex);
+
return(err);
}
@@ -502,7 +520,6 @@ dict_insert_initial_data(void)
/*****************************************************************//**
Creates and initializes the data dictionary at the server bootstrap.
@return DB_SUCCESS or error code. */
-UNIV_INTERN
dberr_t
dict_create(void)
/*=============*/
diff --git a/storage/innobase/dict/dict0crea.cc b/storage/innobase/dict/dict0crea.cc
index 5ac528dea44..54d2eca245f 100644
--- a/storage/innobase/dict/dict0crea.cc
+++ b/storage/innobase/dict/dict0crea.cc
@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2018, MariaDB Corporation.
+Copyright (c) 2017, 2019, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -25,11 +25,6 @@ Created 1/8/1996 Heikki Tuuri
*******************************************************/
#include "dict0crea.h"
-
-#ifdef UNIV_NONINL
-#include "dict0crea.ic"
-#endif
-
#include "btr0pcur.h"
#include "btr0btr.h"
#include "page0page.h"
@@ -41,16 +36,16 @@ Created 1/8/1996 Heikki Tuuri
#include "row0mysql.h"
#include "pars0pars.h"
#include "trx0roll.h"
-#include "usr0sess.h"
+#include "trx0undo.h"
#include "ut0vec.h"
#include "dict0priv.h"
#include "fts0priv.h"
-#include "ha_prototypes.h"
+#include "srv0start.h"
/*****************************************************************//**
Based on a table object, this function builds the entry to be inserted
in the SYS_TABLES system table.
-@return the tuple which should be inserted */
+@return the tuple which should be inserted */
static
dtuple_t*
dict_create_sys_tables_tuple(
@@ -79,7 +74,8 @@ dict_create_sys_tables_tuple(
dfield = dtuple_get_nth_field(
entry, DICT_COL__SYS_TABLES__NAME);
- dfield_set_data(dfield, table->name, ut_strlen(table->name));
+ dfield_set_data(dfield,
+ table->name.m_name, strlen(table->name.m_name));
/* 1: DB_TRX_ID added later */
/* 2: DB_ROLL_PTR added later */
@@ -97,7 +93,11 @@ dict_create_sys_tables_tuple(
entry, DICT_COL__SYS_TABLES__N_COLS);
ptr = static_cast<byte*>(mem_heap_alloc(heap, 4));
- mach_write_to_4(ptr, table->n_def
+
+ /* If there is any virtual column, encode it in N_COLS */
+ mach_write_to_4(ptr, dict_table_encode_n_col(
+ static_cast<ulint>(table->n_def),
+ static_cast<ulint>(table->n_v_def))
| ((table->flags & DICT_TF_COMPACT) << 31));
dfield_set_data(dfield, ptr, 4);
@@ -130,7 +130,7 @@ dict_create_sys_tables_tuple(
ptr = static_cast<byte*>(mem_heap_alloc(heap, 4));
/* Be sure all non-used bits are zero. */
- ut_a(!(table->flags2 & ~DICT_TF2_BIT_MASK));
+ ut_a(!(table->flags2 & DICT_TF2_UNUSED_BIT_MASK));
mach_write_to_4(ptr, table->flags2);
dfield_set_data(dfield, ptr, 4);
@@ -156,7 +156,7 @@ dict_create_sys_tables_tuple(
/*****************************************************************//**
Based on a table object, this function builds the entry to be inserted
in the SYS_COLUMNS system table.
-@return the tuple which should be inserted */
+@return the tuple which should be inserted */
static
dtuple_t*
dict_create_sys_columns_tuple(
@@ -173,11 +173,23 @@ dict_create_sys_columns_tuple(
dfield_t* dfield;
byte* ptr;
const char* col_name;
+ ulint num_base = 0;
+ ulint v_col_no = ULINT_UNDEFINED;
ut_ad(table);
ut_ad(heap);
- column = dict_table_get_nth_col(table, i);
+ /* Any column beyond table->n_def would be virtual columns */
+ if (i >= table->n_def) {
+ dict_v_col_t* v_col = dict_table_get_nth_v_col(
+ table, i - table->n_def);
+ column = &v_col->m_col;
+ num_base = v_col->num_base;
+ v_col_no = column->ind;
+ } else {
+ column = dict_table_get_nth_col(table, i);
+ ut_ad(!dict_col_is_virtual(column));
+ }
sys_columns = dict_sys->sys_columns;
@@ -197,7 +209,15 @@ dict_create_sys_columns_tuple(
dfield = dtuple_get_nth_field(entry, DICT_COL__SYS_COLUMNS__POS);
ptr = static_cast<byte*>(mem_heap_alloc(heap, 4));
- mach_write_to_4(ptr, i);
+
+ if (v_col_no != ULINT_UNDEFINED) {
+ /* encode virtual column's position in MySQL table and InnoDB
+ table in "POS" */
+ mach_write_to_4(ptr, dict_create_v_col_pos(
+ i - table->n_def, v_col_no));
+ } else {
+ mach_write_to_4(ptr, i);
+ }
dfield_set_data(dfield, ptr, 4);
@@ -206,7 +226,12 @@ dict_create_sys_columns_tuple(
/* 4: NAME ---------------------------*/
dfield = dtuple_get_nth_field(entry, DICT_COL__SYS_COLUMNS__NAME);
- col_name = dict_table_get_col_name(table, i);
+ if (i >= table->n_def) {
+ col_name = dict_table_get_v_col_name(table, i - table->n_def);
+ } else {
+ col_name = dict_table_get_col_name(table, i);
+ }
+
dfield_set_data(dfield, col_name, ut_strlen(col_name));
/* 5: MTYPE --------------------------*/
@@ -237,7 +262,7 @@ dict_create_sys_columns_tuple(
dfield = dtuple_get_nth_field(entry, DICT_COL__SYS_COLUMNS__PREC);
ptr = static_cast<byte*>(mem_heap_alloc(heap, 4));
- mach_write_to_4(ptr, 0/* unused */);
+ mach_write_to_4(ptr, num_base);
dfield_set_data(dfield, ptr, 4);
/*---------------------------------*/
@@ -245,9 +270,77 @@ dict_create_sys_columns_tuple(
return(entry);
}
+/** Based on a table object, this function builds the entry to be inserted
+in the SYS_VIRTUAL system table. Each row maps a virtual column to one of
+its base column.
+@param[in] table table
+@param[in] v_col_n virtual column number
+@param[in] b_col_n base column sequence num
+@param[in] heap memory heap
+@return the tuple which should be inserted */
+static
+dtuple_t*
+dict_create_sys_virtual_tuple(
+ const dict_table_t* table,
+ ulint v_col_n,
+ ulint b_col_n,
+ mem_heap_t* heap)
+{
+ dict_table_t* sys_virtual;
+ dtuple_t* entry;
+ const dict_col_t* base_column;
+ dfield_t* dfield;
+ byte* ptr;
+
+ ut_ad(table);
+ ut_ad(heap);
+
+ ut_ad(v_col_n < table->n_v_def);
+ dict_v_col_t* v_col = dict_table_get_nth_v_col(table, v_col_n);
+ base_column = v_col->base_col[b_col_n];
+
+ sys_virtual = dict_sys->sys_virtual;
+
+ entry = dtuple_create(heap, DICT_NUM_COLS__SYS_VIRTUAL
+ + DATA_N_SYS_COLS);
+
+ dict_table_copy_types(entry, sys_virtual);
+
+ /* 0: TABLE_ID -----------------------*/
+ dfield = dtuple_get_nth_field(entry, DICT_COL__SYS_VIRTUAL__TABLE_ID);
+
+ ptr = static_cast<byte*>(mem_heap_alloc(heap, 8));
+ mach_write_to_8(ptr, table->id);
+
+ dfield_set_data(dfield, ptr, 8);
+
+ /* 1: POS ---------------------------*/
+ dfield = dtuple_get_nth_field(entry, DICT_COL__SYS_VIRTUAL__POS);
+
+ ptr = static_cast<byte*>(mem_heap_alloc(heap, 4));
+ ulint v_col_no = dict_create_v_col_pos(v_col_n, v_col->m_col.ind);
+ mach_write_to_4(ptr, v_col_no);
+
+ dfield_set_data(dfield, ptr, 4);
+
+ /* 2: BASE_POS ----------------------------*/
+ dfield = dtuple_get_nth_field(entry, DICT_COL__SYS_VIRTUAL__BASE_POS);
+
+ ptr = static_cast<byte*>(mem_heap_alloc(heap, 4));
+ mach_write_to_4(ptr, base_column->ind);
+
+ dfield_set_data(dfield, ptr, 4);
+
+ /* 3: DB_TRX_ID added later */
+ /* 4: DB_ROLL_PTR added later */
+
+ /*---------------------------------*/
+ return(entry);
+}
+
/***************************************************************//**
Builds a table definition to insert.
-@return DB_SUCCESS or error code */
+@return DB_SUCCESS or error code */
static MY_ATTRIBUTE((nonnull, warn_unused_result))
dberr_t
dict_build_table_def_step(
@@ -256,40 +349,72 @@ dict_build_table_def_step(
tab_node_t* node) /*!< in: table create node */
{
dict_table_t* table;
- dtuple_t* row;
- dberr_t error;
- const char* path;
- mtr_t mtr;
- ulint space = 0;
- bool use_tablespace;
-
- ut_ad(mutex_own(&(dict_sys->mutex)));
table = node->table;
- use_tablespace = DICT_TF2_FLAG_IS_SET(table, DICT_TF2_USE_TABLESPACE);
+ ut_ad(!dict_table_is_temporary(table));
- dict_hdr_get_new_id(&table->id, NULL, NULL);
+ trx_t* trx = thr_get_trx(thr);
+ dict_table_assign_new_id(table, trx);
- thr_get_trx(thr)->table_id = table->id;
+ ut_ad(mutex_own(&dict_sys->mutex));
- /* Always set this bit for all new created tables */
+ /* Always set this bit for all new created tables */
DICT_TF2_FLAG_SET(table, DICT_TF2_FTS_AUX_HEX_NAME);
DBUG_EXECUTE_IF("innodb_test_wrong_fts_aux_table_name",
DICT_TF2_FLAG_UNSET(table,
DICT_TF2_FTS_AUX_HEX_NAME););
- if (use_tablespace) {
- /* This table will not use the system tablespace.
- Get a new space id. */
- dict_hdr_get_new_id(NULL, NULL, &space);
+ if (DICT_TF2_FLAG_IS_SET(table, DICT_TF2_USE_FILE_PER_TABLE)) {
+ /* This table will need a new tablespace. */
+
+ ut_ad(dict_table_get_format(table) <= UNIV_FORMAT_MAX);
+ ut_ad(DICT_TF_GET_ZIP_SSIZE(table->flags) == 0
+ || dict_table_get_format(table) >= UNIV_FORMAT_B);
+ ut_ad(trx->table_id);
+ mtr_t mtr;
+ trx_undo_t* undo = trx->rsegs.m_redo.insert_undo;
+ if (undo && !undo->table_id
+ && trx_get_dict_operation(trx) == TRX_DICT_OP_TABLE) {
+ /* This must be a TRUNCATE operation where
+ the empty table is created after the old table
+ was renamed. Be sure to mark the transaction
+ associated with the new empty table, so that
+ we can remove it on recovery. */
+ mtr.start();
+ trx_undo_mark_as_dict(trx, undo, &mtr);
+ mtr.commit();
+ log_write_up_to(mtr.commit_lsn(), true);
+ }
+ ulint space;
+ /* Get a new tablespace ID */
+ dict_hdr_get_new_id(NULL, NULL, &space, table, false);
DBUG_EXECUTE_IF(
"ib_create_table_fail_out_of_space_ids",
space = ULINT_UNDEFINED;
);
- if (UNIV_UNLIKELY(space == ULINT_UNDEFINED)) {
- return(DB_ERROR);
+ if (space == ULINT_UNDEFINED) {
+ return DB_ERROR;
+ }
+ table->space = unsigned(space);
+
+ /* Determine the tablespace flags. */
+ bool has_data_dir = DICT_TF_HAS_DATA_DIR(table->flags);
+ ulint fsp_flags = dict_tf_to_fsp_flags(table->flags);
+ char* filepath;
+
+ if (has_data_dir) {
+ ut_ad(table->data_dir_path);
+ filepath = fil_make_filepath(
+ table->data_dir_path,
+ table->name.m_name, IBD, true);
+
+ } else {
+ /* Make the tablespace file in the default dir
+ using the table name */
+ filepath = fil_make_filepath(
+ NULL, table->name.m_name, IBD, false);
}
/* We create a new single-table tablespace for the table.
@@ -297,45 +422,35 @@ dict_build_table_def_step(
- page 0 is the fsp header and an extent descriptor page,
- page 1 is an ibuf bitmap page,
- page 2 is the first inode page,
- - page 3 will contain the root of the clustered index of the
- table we create here. */
-
- path = table->data_dir_path ? table->data_dir_path
- : table->dir_path_of_temp_table;
-
- ut_ad(dict_table_get_format(table) <= UNIV_FORMAT_MAX);
- ut_ad(!dict_table_zip_size(table)
- || dict_table_get_format(table) >= UNIV_FORMAT_B);
+ - page 3 will contain the root of the clustered index of
+ the table we create here. */
- error = fil_create_new_single_table_tablespace(
- space, table->name, path,
- dict_tf_to_fsp_flags(table->flags),
- table->flags2,
- FIL_IBD_FILE_INITIAL_SIZE,
- node->mode, node->key_id);
+ dberr_t err = fil_ibd_create(
+ space, table->name.m_name, filepath, fsp_flags,
+ FIL_IBD_FILE_INITIAL_SIZE, node->mode, node->key_id);
- table->space = (unsigned int) space;
+ ut_free(filepath);
- if (error != DB_SUCCESS) {
-
- return(error);
+ if (err != DB_SUCCESS) {
+ return err;
}
- mtr_start(&mtr);
+ mtr.start();
+ mtr.set_named_space(table->space);
fsp_header_init(table->space, FIL_IBD_FILE_INITIAL_SIZE, &mtr);
- mtr_commit(&mtr);
+ mtr.commit();
} else {
- /* Create in the system tablespace */
- ut_ad(table->space == TRX_SYS_SPACE);
+ ut_ad(dict_tf_get_rec_format(table->flags)
+ != REC_FORMAT_COMPRESSED);
+ ut_ad(table->space == srv_sys_space.space_id());
}
- row = dict_create_sys_tables_tuple(table, node->heap);
+ ins_node_set_new_row(node->tab_def,
+ dict_create_sys_tables_tuple(table, node->heap));
- ins_node_set_new_row(node->tab_def, row);
-
- return(DB_SUCCESS);
+ return DB_SUCCESS;
}
/***************************************************************//**
@@ -353,10 +468,25 @@ dict_build_col_def_step(
ins_node_set_new_row(node->col_def, row);
}
+/** Builds a SYS_VIRTUAL row definition to insert.
+@param[in] node table create node */
+static
+void
+dict_build_v_col_def_step(
+ tab_node_t* node)
+{
+ dtuple_t* row;
+
+ row = dict_create_sys_virtual_tuple(node->table, node->col_no,
+ node->base_col_no,
+ node->heap);
+ ins_node_set_new_row(node->v_col_def, row);
+}
+
/*****************************************************************//**
Based on an index object, this function builds the entry to be inserted
in the SYS_INDEXES system table.
-@return the tuple which should be inserted */
+@return the tuple which should be inserted */
static
dtuple_t*
dict_create_sys_indexes_tuple(
@@ -372,7 +502,7 @@ dict_create_sys_indexes_tuple(
dfield_t* dfield;
byte* ptr;
- ut_ad(mutex_own(&(dict_sys->mutex)));
+ ut_ad(mutex_own(&dict_sys->mutex));
ut_ad(index);
ut_ad(heap);
@@ -380,7 +510,8 @@ dict_create_sys_indexes_tuple(
table = dict_table_get_low(index->table_name);
- entry = dtuple_create(heap, 7 + DATA_N_SYS_COLS);
+ entry = dtuple_create(
+ heap, DICT_NUM_COLS__SYS_INDEXES + DATA_N_SYS_COLS);
dict_table_copy_types(entry, sys_indexes);
@@ -408,7 +539,16 @@ dict_create_sys_indexes_tuple(
dfield = dtuple_get_nth_field(
entry, DICT_COL__SYS_INDEXES__NAME);
- dfield_set_data(dfield, index->name, ut_strlen(index->name));
+ if (!index->is_committed()) {
+ ulint len = strlen(index->name) + 1;
+ char* name = static_cast<char*>(
+ mem_heap_alloc(heap, len));
+ *name = *TEMP_INDEX_PREFIX_STR;
+ memcpy(name + 1, index->name, len - 1);
+ dfield_set_data(dfield, name, len);
+ } else {
+ dfield_set_data(dfield, index->name, strlen(index->name));
+ }
/* 5: N_FIELDS ----------------------*/
dfield = dtuple_get_nth_field(
@@ -448,6 +588,16 @@ dict_create_sys_indexes_tuple(
dfield_set_data(dfield, ptr, 4);
+ /* 9: MERGE_THRESHOLD ----------------*/
+
+ dfield = dtuple_get_nth_field(
+ entry, DICT_COL__SYS_INDEXES__MERGE_THRESHOLD);
+
+ ptr = static_cast<byte*>(mem_heap_alloc(heap, 4));
+ mach_write_to_4(ptr, DICT_INDEX_MERGE_THRESHOLD_DEFAULT);
+
+ dfield_set_data(dfield, ptr, 4);
+
/*--------------------------------*/
return(entry);
@@ -456,7 +606,7 @@ dict_create_sys_indexes_tuple(
/*****************************************************************//**
Based on an index object, this function builds the entry to be inserted
in the SYS_FIELDS system table.
-@return the tuple which should be inserted */
+@return the tuple which should be inserted */
static
dtuple_t*
dict_create_sys_fields_tuple(
@@ -538,7 +688,7 @@ dict_create_sys_fields_tuple(
/*****************************************************************//**
Creates the tuple with which the index entry is searched for writing the index
tree root page number, if such a tree is created.
-@return the tuple for search */
+@return the tuple for search */
static
dtuple_t*
dict_create_search_tuple(
@@ -573,7 +723,7 @@ dict_create_search_tuple(
/***************************************************************//**
Builds an index definition row to insert.
-@return DB_SUCCESS or error code */
+@return DB_SUCCESS or error code */
static MY_ATTRIBUTE((nonnull, warn_unused_result))
dberr_t
dict_build_index_def_step(
@@ -586,7 +736,7 @@ dict_build_index_def_step(
dtuple_t* row;
trx_t* trx;
- ut_ad(mutex_own(&(dict_sys->mutex)));
+ ut_ad(mutex_own(&dict_sys->mutex));
trx = thr_get_trx(thr);
@@ -608,7 +758,7 @@ dict_build_index_def_step(
ut_ad((UT_LIST_GET_LEN(table->indexes) > 0)
|| dict_index_is_clust(index));
- dict_hdr_get_new_id(NULL, &index->id, NULL);
+ dict_hdr_get_new_id(NULL, &index->id, NULL, table, false);
/* Inherit the space id from the table; we store all indexes of a
table in the same tablespace */
@@ -629,6 +779,37 @@ dict_build_index_def_step(
}
/***************************************************************//**
+Builds an index definition without updating SYSTEM TABLES.
+@return DB_SUCCESS or error code */
+void
+dict_build_index_def(
+/*=================*/
+ const dict_table_t* table, /*!< in: table */
+ dict_index_t* index, /*!< in/out: index */
+ trx_t* trx) /*!< in/out: InnoDB transaction handle */
+{
+ ut_ad(mutex_own(&dict_sys->mutex));
+
+ if (trx->table_id == 0) {
+ /* Record only the first table id. */
+ trx->table_id = table->id;
+ }
+
+ ut_ad((UT_LIST_GET_LEN(table->indexes) > 0)
+ || dict_index_is_clust(index));
+
+ dict_hdr_get_new_id(NULL, &index->id, NULL, table, false);
+
+ /* Inherit the space id from the table; we store all indexes of a
+ table in the same tablespace */
+
+ index->space = table->space;
+
+ /* Note that the index was created by this transaction. */
+ index->trx_id = trx->id;
+}
+
+/***************************************************************//**
Builds a field definition row to insert. */
static
void
@@ -648,20 +829,20 @@ dict_build_field_def_step(
/***************************************************************//**
Creates an index tree for the index if it is not a member of a cluster.
-@return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
+@return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
static MY_ATTRIBUTE((nonnull, warn_unused_result))
dberr_t
dict_create_index_tree_step(
/*========================*/
ind_node_t* node) /*!< in: index create node */
{
+ mtr_t mtr;
+ btr_pcur_t pcur;
dict_index_t* index;
dict_table_t* sys_indexes;
dtuple_t* search_tuple;
- btr_pcur_t pcur;
- mtr_t mtr;
- ut_ad(mutex_own(&(dict_sys->mutex)));
+ ut_ad(mutex_own(&dict_sys->mutex));
index = node->index;
@@ -678,6 +859,13 @@ dict_create_index_tree_step(
mtr_start(&mtr);
+ const bool missing = !index->is_readable()
+ || dict_table_is_discarded(index->table);
+
+ if (!missing) {
+ mtr.set_named_space(index->space);
+ }
+
search_tuple = dict_create_search_tuple(node->ind_row, node->heap);
btr_pcur_open(UT_LIST_GET_FIRST(sys_indexes->indexes),
@@ -688,16 +876,14 @@ dict_create_index_tree_step(
dberr_t err = DB_SUCCESS;
- ulint zip_size = dict_table_zip_size(index->table);
-
- if (node->index->table->file_unreadable
- || dict_table_is_discarded(node->index->table)) {
+ if (missing) {
node->page_no = FIL_NULL;
} else {
node->page_no = btr_create(
- index->type, index->space, zip_size,
- index->id, index, &mtr);
+ index->type, index->space,
+ dict_table_page_size(index->table),
+ index->id, index, NULL, &mtr);
if (node->page_no == FIL_NULL) {
err = DB_OUT_OF_FILE_SPACE;
@@ -708,180 +894,219 @@ dict_create_index_tree_step(
err = DB_OUT_OF_FILE_SPACE; );
}
- page_rec_write_field(
- btr_pcur_get_rec(&pcur), DICT_FLD__SYS_INDEXES__PAGE_NO,
- node->page_no, &mtr);
+ ulint len;
+ byte* data = rec_get_nth_field_old(btr_pcur_get_rec(&pcur),
+ DICT_FLD__SYS_INDEXES__PAGE_NO,
+ &len);
+ ut_ad(len == 4);
+ if (mach_read_from_4(data) != node->page_no) {
+ mlog_write_ulint(data, node->page_no, MLOG_4BYTES, &mtr);
+ }
+
+ mtr_commit(&mtr);
+
+ return(err);
+}
+
+/***************************************************************//**
+Creates an index tree for the index if it is not a member of a cluster.
+Don't update SYSTEM TABLES.
+@return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
+dberr_t
+dict_create_index_tree_in_mem(
+/*==========================*/
+ dict_index_t* index, /*!< in/out: index */
+ const trx_t* trx) /*!< in: InnoDB transaction handle */
+{
+ mtr_t mtr;
+ ulint page_no;
+
+ ut_ad(mutex_own(&dict_sys->mutex));
+ ut_ad(!(index->type & DICT_FTS));
+
+ mtr_start(&mtr);
+ mtr_set_log_mode(&mtr, MTR_LOG_NO_REDO);
+
+ dberr_t err = DB_SUCCESS;
+
+ /* Currently this function is being used by temp-tables only.
+ Import/Discard of temp-table is blocked and so this assert. */
+ ut_ad(index->is_readable());
+ ut_ad(!dict_table_is_discarded(index->table));
+
+ page_no = btr_create(
+ index->type, index->space,
+ dict_table_page_size(index->table),
+ index->id, index, NULL, &mtr);
+
+ index->page = page_no;
+ index->trx_id = trx->id;
- btr_pcur_close(&pcur);
+ if (page_no == FIL_NULL) {
+ err = DB_OUT_OF_FILE_SPACE;
+ }
mtr_commit(&mtr);
return(err);
}
-/*******************************************************************//**
-Drops the index tree associated with a row in SYS_INDEXES table. */
-UNIV_INTERN
-void
-dict_drop_index_tree(
-/*=================*/
- rec_t* rec, /*!< in/out: record in the clustered index
- of SYS_INDEXES table */
- mtr_t* mtr) /*!< in: mtr having the latch on the record page */
+/** Drop the index tree associated with a row in SYS_INDEXES table.
+@param[in,out] rec SYS_INDEXES record
+@param[in,out] pcur persistent cursor on rec
+@param[in,out] trx dictionary transaction
+@param[in,out] mtr mini-transaction
+@return whether freeing the B-tree was attempted */
+bool dict_drop_index_tree(rec_t* rec, btr_pcur_t* pcur, trx_t* trx, mtr_t* mtr)
{
- ulint root_page_no;
- ulint space;
- ulint zip_size;
const byte* ptr;
ulint len;
+ ulint root_page_no;
- ut_ad(mutex_own(&(dict_sys->mutex)));
+ ut_ad(mutex_own(&dict_sys->mutex));
ut_a(!dict_table_is_comp(dict_sys->sys_indexes));
- ptr = rec_get_nth_field_old(
- rec, DICT_FLD__SYS_INDEXES__PAGE_NO, &len);
+
+ ptr = rec_get_nth_field_old(rec, DICT_FLD__SYS_INDEXES__PAGE_NO, &len);
ut_ad(len == 4);
+ btr_pcur_store_position(pcur, mtr);
+
root_page_no = mtr_read_ulint(ptr, MLOG_4BYTES, mtr);
if (root_page_no == FIL_NULL) {
/* The tree has already been freed */
- return;
+ return(false);
}
+ mlog_write_ulint(const_cast<byte*>(ptr), FIL_NULL, MLOG_4BYTES, mtr);
+
ptr = rec_get_nth_field_old(
rec, DICT_FLD__SYS_INDEXES__SPACE, &len);
ut_ad(len == 4);
- space = mtr_read_ulint(ptr, MLOG_4BYTES, mtr);
- zip_size = fil_space_get_zip_size(space);
+ const uint32_t space_id = mach_read_from_4(ptr);
+ ut_ad(space_id < SRV_TMP_SPACE_ID);
+ if (space_id != TRX_SYS_SPACE
+ && srv_safe_truncate
+ && trx_get_dict_operation(trx) == TRX_DICT_OP_TABLE) {
+ /* We are about to delete the entire .ibd file;
+ do not bother to free pages inside it. */
+ return false;
+ }
+
+ ptr = rec_get_nth_field_old(
+ rec, DICT_FLD__SYS_INDEXES__ID, &len);
+
+ ut_ad(len == 8);
+
+ bool found;
+ const page_size_t page_size(fil_space_get_page_size(space_id,
+ &found));
- if (UNIV_UNLIKELY(zip_size == ULINT_UNDEFINED)) {
+ if (!found) {
/* It is a single table tablespace and the .ibd file is
missing: do nothing */
- return;
+ return(false);
}
- /* We free all the pages but the root page first; this operation
- may span several mini-transactions */
-
- btr_free_but_not_root(space, zip_size, root_page_no);
+ /* If tablespace is scheduled for truncate, do not try to drop
+ the indexes in that tablespace. There is a truncate fixup action
+ which will take care of it. */
+ if (srv_is_tablespace_truncated(space_id)) {
+ return(false);
+ }
- /* Then we free the root page in the same mini-transaction where
- we write FIL_NULL to the appropriate field in the SYS_INDEXES
- record: this mini-transaction marks the B-tree totally freed */
+ btr_free_if_exists(page_id_t(space_id, root_page_no), page_size,
+ mach_read_from_8(ptr), mtr);
- /* printf("Dropping index tree in space %lu root page %lu\n", space,
- root_page_no); */
- btr_free_root(space, zip_size, root_page_no, mtr);
+ return(true);
+}
- page_rec_write_field(rec, DICT_FLD__SYS_INDEXES__PAGE_NO,
- FIL_NULL, mtr);
+/*******************************************************************//**
+Drops the index tree but don't update SYS_INDEXES table. */
+void
+dict_drop_index_tree_in_mem(
+/*========================*/
+ const dict_index_t* index, /*!< in: index */
+ ulint page_no) /*!< in: index page-no */
+{
+ ut_ad(mutex_own(&dict_sys->mutex));
+ ut_ad(dict_table_is_temporary(index->table));
+
+ ulint root_page_no = page_no;
+ ulint space = index->space;
+ bool found;
+ const page_size_t page_size(fil_space_get_page_size(space,
+ &found));
+
+ /* If tree has already been freed or it is a single table
+ tablespace and the .ibd file is missing do nothing,
+ else free the all the pages */
+ if (root_page_no != FIL_NULL && found) {
+ btr_free(page_id_t(space, root_page_no), page_size);
+ }
}
/*******************************************************************//**
-Truncates the index tree associated with a row in SYS_INDEXES table.
+Recreate the index tree associated with a row in SYS_INDEXES table.
@return new root page number, or FIL_NULL on failure */
-UNIV_INTERN
ulint
-dict_truncate_index_tree(
+dict_recreate_index_tree(
/*=====================*/
- dict_table_t* table, /*!< in: the table the index belongs to */
- ulint space, /*!< in: 0=truncate,
- nonzero=create the index tree in the
- given tablespace */
+ const dict_table_t*
+ table, /*!< in/out: the table the index belongs to */
btr_pcur_t* pcur, /*!< in/out: persistent cursor pointing to
record in the clustered index of
SYS_INDEXES table. The cursor may be
repositioned in this call. */
- mtr_t* mtr) /*!< in: mtr having the latch
- on the record page. The mtr may be
- committed and restarted in this call. */
+ mtr_t* mtr) /*!< in/out: mtr having the latch
+ on the record page. */
{
- ulint root_page_no;
- ibool drop = !space;
- ulint zip_size;
- ulint type;
- index_id_t index_id;
- rec_t* rec;
- const byte* ptr;
+ ut_ad(mutex_own(&dict_sys->mutex));
+ ut_a(!dict_table_is_comp(dict_sys->sys_indexes));
+
ulint len;
- dict_index_t* index;
- bool has_been_dropped = false;
+ rec_t* rec = btr_pcur_get_rec(pcur);
- ut_ad(mutex_own(&(dict_sys->mutex)));
- ut_a(!dict_table_is_comp(dict_sys->sys_indexes));
- rec = btr_pcur_get_rec(pcur);
- ptr = rec_get_nth_field_old(
+ const byte* ptr = rec_get_nth_field_old(
rec, DICT_FLD__SYS_INDEXES__PAGE_NO, &len);
ut_ad(len == 4);
- root_page_no = mtr_read_ulint(ptr, MLOG_4BYTES, mtr);
-
- if (drop && root_page_no == FIL_NULL) {
- has_been_dropped = true;
- drop = FALSE;
- }
-
- ptr = rec_get_nth_field_old(
- rec, DICT_FLD__SYS_INDEXES__SPACE, &len);
+ ulint root_page_no = mtr_read_ulint(ptr, MLOG_4BYTES, mtr);
+ ptr = rec_get_nth_field_old(rec, DICT_FLD__SYS_INDEXES__SPACE, &len);
ut_ad(len == 4);
- if (drop) {
- space = mtr_read_ulint(ptr, MLOG_4BYTES, mtr);
- }
+ ut_a(table->space == mtr_read_ulint(ptr, MLOG_4BYTES, mtr));
- zip_size = fil_space_get_zip_size(space);
+ ulint space = table->space;
+ bool found;
+ const page_size_t page_size(fil_space_get_page_size(space,
+ &found));
- if (UNIV_UNLIKELY(zip_size == ULINT_UNDEFINED)) {
- /* It is a single table tablespace and the .ibd file is
- missing: do nothing */
+ if (!found) {
+ /* It is a single table tablespae and the .ibd file is
+ missing: do nothing. */
+
+ ib::warn()
+ << "Trying to TRUNCATE a missing .ibd file of table "
+ << table->name << "!";
- ut_print_timestamp(stderr);
- fprintf(stderr, " InnoDB: Trying to TRUNCATE"
- " a missing .ibd file of table %s!\n", table->name);
return(FIL_NULL);
}
- ptr = rec_get_nth_field_old(
- rec, DICT_FLD__SYS_INDEXES__TYPE, &len);
+ ptr = rec_get_nth_field_old(rec, DICT_FLD__SYS_INDEXES__TYPE, &len);
ut_ad(len == 4);
- type = mach_read_from_4(ptr);
+ ulint type = mach_read_from_4(ptr);
ptr = rec_get_nth_field_old(rec, DICT_FLD__SYS_INDEXES__ID, &len);
ut_ad(len == 8);
- index_id = mach_read_from_8(ptr);
-
- if (!drop) {
-
- goto create;
- }
-
- /* We free all the pages but the root page first; this operation
- may span several mini-transactions */
-
- btr_free_but_not_root(space, zip_size, root_page_no);
-
- /* Then we free the root page in the same mini-transaction where
- we create the b-tree and write its new root page number to the
- appropriate field in the SYS_INDEXES record: this mini-transaction
- marks the B-tree totally truncated */
-
- btr_block_get(space, zip_size, root_page_no, RW_X_LATCH, NULL, mtr);
-
- btr_free_root(space, zip_size, root_page_no, mtr);
-create:
- /* We will temporarily write FIL_NULL to the PAGE_NO field
- in SYS_INDEXES, so that the database will not get into an
- inconsistent state in case it crashes between the mtr_commit()
- below and the following mtr_commit() call. */
- page_rec_write_field(rec, DICT_FLD__SYS_INDEXES__PAGE_NO,
- FIL_NULL, mtr);
+ index_id_t index_id = mach_read_from_8(ptr);
/* We will need to commit the mini-transaction in order to avoid
deadlocks in the btr_create() call, because otherwise we would
@@ -890,55 +1115,110 @@ create:
mtr_commit(mtr);
mtr_start(mtr);
+ mtr->set_named_space(space);
btr_pcur_restore_position(BTR_MODIFY_LEAF, pcur, mtr);
/* Find the index corresponding to this SYS_INDEXES record. */
- for (index = UT_LIST_GET_FIRST(table->indexes);
- index;
+ for (dict_index_t* index = UT_LIST_GET_FIRST(table->indexes);
+ index != NULL;
index = UT_LIST_GET_NEXT(indexes, index)) {
if (index->id == index_id) {
if (index->type & DICT_FTS) {
return(FIL_NULL);
} else {
- if (has_been_dropped) {
- fprintf(stderr, " InnoDB: Trying to"
- " TRUNCATE a missing index of"
- " table %s!\n",
- index->table->name);
- }
-
- root_page_no = btr_create(type, space, zip_size,
- index_id, index, mtr);
+ root_page_no = btr_create(
+ type, space, page_size, index_id,
+ index, NULL, mtr);
index->page = (unsigned int) root_page_no;
return(root_page_no);
}
}
}
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Index %llu of table %s is missing\n"
- "InnoDB: from the data dictionary during TRUNCATE!\n",
- (ullint) index_id,
- table->name);
+ ib::error() << "Failed to create index with index id " << index_id
+ << " of table " << table->name;
return(FIL_NULL);
}
+/*******************************************************************//**
+Truncates the index tree but don't update SYSTEM TABLES.
+@return DB_SUCCESS or error */
+dberr_t
+dict_truncate_index_tree_in_mem(
+/*============================*/
+ dict_index_t* index) /*!< in/out: index */
+{
+ mtr_t mtr;
+ bool truncate;
+ ulint space = index->space;
+
+ ut_ad(mutex_own(&dict_sys->mutex));
+ ut_ad(dict_table_is_temporary(index->table));
+
+ ulint type = index->type;
+ ulint root_page_no = index->page;
+
+ if (root_page_no == FIL_NULL) {
+
+ /* The tree has been freed. */
+ ib::warn() << "Trying to TRUNCATE a missing index of table "
+ << index->table->name << "!";
+
+ truncate = false;
+ } else {
+ truncate = true;
+ }
+
+ bool found;
+ const page_size_t page_size(fil_space_get_page_size(space,
+ &found));
+
+ if (!found) {
+
+ /* It is a single table tablespace and the .ibd file is
+ missing: do nothing */
+
+ ib::warn()
+ << "Trying to TRUNCATE a missing .ibd file of table "
+ << index->table->name << "!";
+ }
+
+ /* If table to truncate resides in its on own tablespace that will
+ be re-created on truncate then we can ignore freeing of existing
+ tablespace objects. */
+
+ if (truncate) {
+ btr_free(page_id_t(space, root_page_no), page_size);
+ }
+
+ mtr_start(&mtr);
+ mtr_set_log_mode(&mtr, MTR_LOG_NO_REDO);
+
+ root_page_no = btr_create(
+ type, space, page_size, index->id, index, NULL, &mtr);
+
+ DBUG_EXECUTE_IF("ib_err_trunc_temp_recreate_index",
+ root_page_no = FIL_NULL;);
+
+ index->page = root_page_no;
+
+ mtr_commit(&mtr);
+
+ return(index->page == FIL_NULL ? DB_ERROR : DB_SUCCESS);
+}
+
/*********************************************************************//**
Creates a table create graph.
-@return own: table create node */
-UNIV_INTERN
+@return own: table create node */
tab_node_t*
tab_create_graph_create(
/*====================*/
dict_table_t* table, /*!< in: table to create, built as a memory data
structure */
mem_heap_t* heap, /*!< in: heap where created */
- bool commit, /*!< in: true if the commit node should be
- added to the query graph */
fil_encryption_t mode, /*!< in: encryption mode */
- ulint key_id) /*!< in: encryption key_id */
+ uint32_t key_id) /*!< in: encryption key_id */
{
tab_node_t* node;
@@ -962,28 +1242,24 @@ tab_create_graph_create(
heap);
node->col_def->common.parent = node;
- if (commit) {
- node->commit_node = trx_commit_node_create(heap);
- node->commit_node->common.parent = node;
- } else {
- node->commit_node = 0;
- }
+ node->v_col_def = ins_node_create(INS_DIRECT, dict_sys->sys_virtual,
+ heap);
+ node->v_col_def->common.parent = node;
return(node);
}
-/*********************************************************************//**
-Creates an index create graph.
-@return own: index create node */
-UNIV_INTERN
+/** Creates an index create graph.
+@param[in] index index to create, built as a memory data structure
+@param[in,out] heap heap where created
+@param[in] add_v new virtual columns added in the same clause with
+ add index
+@return own: index create node */
ind_node_t*
ind_create_graph_create(
-/*====================*/
- dict_index_t* index, /*!< in: index to create, built as a memory data
- structure */
- mem_heap_t* heap, /*!< in: heap where created */
- bool commit) /*!< in: true if the commit node should be
- added to the query graph */
+ dict_index_t* index,
+ mem_heap_t* heap,
+ const dict_add_v_col_t* add_v)
{
ind_node_t* node;
@@ -994,6 +1270,8 @@ ind_create_graph_create(
node->index = index;
+ node->add_v = add_v;
+
node->state = INDEX_BUILD_INDEX_DEF;
node->page_no = FIL_NULL;
node->heap = mem_heap_create(256);
@@ -1006,20 +1284,12 @@ ind_create_graph_create(
dict_sys->sys_fields, heap);
node->field_def->common.parent = node;
- if (commit) {
- node->commit_node = trx_commit_node_create(heap);
- node->commit_node->common.parent = node;
- } else {
- node->commit_node = 0;
- }
-
return(node);
}
/***********************************************************//**
Creates a table. This is a high-level function used in SQL execution graphs.
-@return query thread to run next or NULL */
-UNIV_INTERN
+@return query thread to run next or NULL */
que_thr_t*
dict_create_table_step(
/*===================*/
@@ -1030,7 +1300,7 @@ dict_create_table_step(
trx_t* trx;
ut_ad(thr);
- ut_ad(mutex_own(&(dict_sys->mutex)));
+ ut_ad(mutex_own(&dict_sys->mutex));
trx = thr_get_trx(thr);
@@ -1062,7 +1332,8 @@ dict_create_table_step(
if (node->state == TABLE_BUILD_COL_DEF) {
- if (node->col_no < (node->table)->n_def) {
+ if (node->col_no < (static_cast<ulint>(node->table->n_def)
+ + static_cast<ulint>(node->table->n_v_def))) {
dict_build_col_def_step(node);
@@ -1072,24 +1343,57 @@ dict_create_table_step(
return(thr);
} else {
- node->state = TABLE_COMMIT_WORK;
+ /* Move on to SYS_VIRTUAL table */
+ node->col_no = 0;
+ node->base_col_no = 0;
+ node->state = TABLE_BUILD_V_COL_DEF;
}
}
- if (node->state == TABLE_COMMIT_WORK) {
+ if (node->state == TABLE_BUILD_V_COL_DEF) {
- /* Table was correctly defined: do NOT commit the transaction
- (CREATE TABLE does NOT do an implicit commit of the current
- transaction) */
+ if (node->col_no < static_cast<ulint>(node->table->n_v_def)) {
+ dict_v_col_t* v_col = dict_table_get_nth_v_col(
+ node->table, node->col_no);
- node->state = TABLE_ADD_TO_CACHE;
+ /* If no base column */
+ while (v_col->num_base == 0) {
+ node->col_no++;
+ if (node->col_no == static_cast<ulint>(
+ (node->table)->n_v_def)) {
+ node->state = TABLE_ADD_TO_CACHE;
+ break;
+ }
+
+ v_col = dict_table_get_nth_v_col(
+ node->table, node->col_no);
+ node->base_col_no = 0;
+ }
- /* thr->run_node = node->commit_node;
+ if (node->state != TABLE_ADD_TO_CACHE) {
+ ut_ad(node->col_no == v_col->v_pos);
+ dict_build_v_col_def_step(node);
+
+ if (node->base_col_no < v_col->num_base - 1) {
+ /* move on to next base column */
+ node->base_col_no++;
+ } else {
+ /* move on to next virtual column */
+ node->col_no++;
+ node->base_col_no = 0;
+ }
- return(thr); */
+ thr->run_node = node->v_col_def;
+
+ return(thr);
+ }
+ } else {
+ node->state = TABLE_ADD_TO_CACHE;
+ }
}
if (node->state == TABLE_ADD_TO_CACHE) {
+ DBUG_EXECUTE_IF("ib_ddl_crash_during_create", DBUG_SUICIDE(););
dict_table_add_to_cache(node->table, TRUE, node->heap);
@@ -1119,8 +1423,7 @@ function_exit:
/***********************************************************//**
Creates an index. This is a high-level function used in SQL execution
graphs.
-@return query thread to run next or NULL */
-UNIV_INTERN
+@return query thread to run next or NULL */
que_thr_t*
dict_create_index_step(
/*===================*/
@@ -1131,7 +1434,7 @@ dict_create_index_step(
trx_t* trx;
ut_ad(thr);
- ut_ad(mutex_own(&(dict_sys->mutex)));
+ ut_ad(mutex_own(&dict_sys->mutex));
trx = thr_get_trx(thr);
@@ -1177,17 +1480,10 @@ dict_create_index_step(
}
if (node->state == INDEX_ADD_TO_CACHE) {
-
- index_id_t index_id = node->index->id;
-
err = dict_index_add_to_cache(
- node->table, node->index, FIL_NULL,
- trx_is_strict(trx)
- || dict_table_get_format(node->table)
- >= UNIV_FORMAT_B);
+ node->table, node->index, FIL_NULL, node->add_v);
- node->index = dict_index_get_if_in_cache_low(index_id);
- ut_a((node->index == NULL) == (err != DB_SUCCESS));
+ ut_ad((node->index == NULL) == (err != DB_SUCCESS));
if (err != DB_SUCCESS) {
@@ -1244,20 +1540,6 @@ dict_create_index_step(
dict_index_add_to_cache(). */
ut_ad(node->index->trx_id == trx->id);
ut_ad(node->index->table->def_trx_id == trx->id);
- node->state = INDEX_COMMIT_WORK;
- }
-
- if (node->state == INDEX_COMMIT_WORK) {
-
- /* Index was correctly defined: do NOT commit the transaction
- (CREATE INDEX does NOT currently do an implicit commit of
- the current transaction) */
-
- node->state = INDEX_CREATE_INDEX_TREE;
-
- /* thr->run_node = node->commit_node;
-
- return(thr); */
}
function_exit:
@@ -1316,7 +1598,7 @@ dict_check_if_system_table_exists(
/* This table has already been created, and it is OK.
Ensure that it can't be evicted from the table LRU cache. */
- dict_table_move_from_lru_to_non_lru(sys_table);
+ dict_table_prevent_eviction(sys_table);
}
mutex_exit(&dict_sys->mutex);
@@ -1328,8 +1610,7 @@ dict_check_if_system_table_exists(
Creates the foreign key constraints system tables inside InnoDB
at server bootstrap or server start if they are not found or are
not of the right form.
-@return DB_SUCCESS or error code */
-UNIV_INTERN
+@return DB_SUCCESS or error code */
dberr_t
dict_create_or_check_foreign_constraint_tables(void)
/*================================================*/
@@ -1355,6 +1636,11 @@ dict_create_or_check_foreign_constraint_tables(void)
return(DB_SUCCESS);
}
+ if (srv_read_only_mode
+ || srv_force_recovery >= SRV_FORCE_NO_TRX_UNDO) {
+ return(DB_READ_ONLY);
+ }
+
trx = trx_allocate_for_mysql();
trx_set_dict_operation(trx, TRX_DICT_OP_TABLE);
@@ -1375,28 +1661,20 @@ dict_create_or_check_foreign_constraint_tables(void)
" ON \"test/#sql-ib-garbage\"(ID);\n"
"END;\n", FALSE, trx);
ut_ad(err == DB_SUCCESS);
- row_drop_table_for_mysql("test/#sql-ib-garbage",
- trx, TRUE, TRUE););
+ row_drop_table_for_mysql("test/#sql-ib-garbage", trx,
+ SQLCOM_DROP_DB, true););
/* Check which incomplete table definition to drop. */
if (sys_foreign_err == DB_CORRUPTION) {
- ib_logf(IB_LOG_LEVEL_WARN,
- "Dropping incompletely created "
- "SYS_FOREIGN table.");
- row_drop_table_for_mysql("SYS_FOREIGN", trx, TRUE, TRUE);
+ row_drop_table_after_create_fail("SYS_FOREIGN", trx);
}
if (sys_foreign_cols_err == DB_CORRUPTION) {
- ib_logf(IB_LOG_LEVEL_WARN,
- "Dropping incompletely created "
- "SYS_FOREIGN_COLS table.");
-
- row_drop_table_for_mysql("SYS_FOREIGN_COLS", trx, TRUE, TRUE);
+ row_drop_table_after_create_fail("SYS_FOREIGN_COLS", trx);
}
- ib_logf(IB_LOG_LEVEL_WARN,
- "Creating foreign key constraint system tables.");
+ ib::info() << "Creating foreign key constraint system tables.";
/* NOTE: in dict_load_foreigns we use the fact that
there are 2 secondary indexes on SYS_FOREIGN, and they
@@ -1437,17 +1715,16 @@ dict_create_or_check_foreign_constraint_tables(void)
FALSE, trx);
if (err != DB_SUCCESS) {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Creation of SYS_FOREIGN and SYS_FOREIGN_COLS "
- "has failed with error %lu. Tablespace is full. "
- "Dropping incompletely created tables.",
- (ulong) err);
+
+ ib::error() << "Creation of SYS_FOREIGN and SYS_FOREIGN_COLS"
+ " failed: " << ut_strerr(err) << ". Tablespace is"
+ " full. Dropping incompletely created tables.";
ut_ad(err == DB_OUT_OF_FILE_SPACE
|| err == DB_TOO_MANY_CONCURRENT_TRXS);
- row_drop_table_for_mysql("SYS_FOREIGN", trx, TRUE, TRUE);
- row_drop_table_for_mysql("SYS_FOREIGN_COLS", trx, TRUE, TRUE);
+ row_drop_table_after_create_fail("SYS_FOREIGN", trx);
+ row_drop_table_after_create_fail("SYS_FOREIGN_COLS", trx);
if (err == DB_OUT_OF_FILE_SPACE) {
err = DB_MUST_GET_MORE_FILE_SPACE;
@@ -1462,11 +1739,6 @@ dict_create_or_check_foreign_constraint_tables(void)
srv_file_per_table = srv_file_per_table_backup;
- if (err == DB_SUCCESS) {
- ib_logf(IB_LOG_LEVEL_INFO,
- "Foreign key constraint system tables created");
- }
-
/* Note: The master thread has not been started at this point. */
/* Confirm and move to the non-LRU part of the table LRU list. */
sys_foreign_err = dict_check_if_system_table_exists(
@@ -1480,9 +1752,110 @@ dict_create_or_check_foreign_constraint_tables(void)
return(err);
}
+/** Creates the virtual column system table (SYS_VIRTUAL) inside InnoDB
+at server bootstrap or server start if the table is not found or is
+not of the right form.
+@return DB_SUCCESS or error code */
+dberr_t
+dict_create_or_check_sys_virtual()
+{
+ trx_t* trx;
+ my_bool srv_file_per_table_backup;
+ dberr_t err;
+
+ ut_a(srv_get_active_thread_type() == SRV_NONE);
+
+ /* Note: The master thread has not been started at this point. */
+ err = dict_check_if_system_table_exists(
+ "SYS_VIRTUAL", DICT_NUM_FIELDS__SYS_VIRTUAL + 1, 1);
+
+ if (err == DB_SUCCESS) {
+ mutex_enter(&dict_sys->mutex);
+ dict_sys->sys_virtual = dict_table_get_low("SYS_VIRTUAL");
+ mutex_exit(&dict_sys->mutex);
+ return(DB_SUCCESS);
+ }
+
+ if (srv_read_only_mode
+ || srv_force_recovery >= SRV_FORCE_NO_TRX_UNDO) {
+ return(DB_READ_ONLY);
+ }
+
+ trx = trx_allocate_for_mysql();
+
+ trx_set_dict_operation(trx, TRX_DICT_OP_TABLE);
+
+ trx->op_info = "creating sys_virtual tables";
+
+ row_mysql_lock_data_dictionary(trx);
+
+ /* Check which incomplete table definition to drop. */
+
+ if (err == DB_CORRUPTION) {
+ row_drop_table_after_create_fail("SYS_VIRTUAL", trx);
+ }
+
+ ib::info() << "Creating sys_virtual system tables.";
+
+ srv_file_per_table_backup = srv_file_per_table;
+
+ /* We always want SYSTEM tables to be created inside the system
+ tablespace. */
+
+ srv_file_per_table = 0;
+
+ err = que_eval_sql(
+ NULL,
+ "PROCEDURE CREATE_SYS_VIRTUAL_TABLES_PROC () IS\n"
+ "BEGIN\n"
+ "CREATE TABLE\n"
+ "SYS_VIRTUAL(TABLE_ID BIGINT, POS INT,"
+ " BASE_POS INT);\n"
+ "CREATE UNIQUE CLUSTERED INDEX BASE_IDX"
+ " ON SYS_VIRTUAL(TABLE_ID, POS, BASE_POS);\n"
+ "END;\n",
+ FALSE, trx);
+
+ if (err != DB_SUCCESS) {
+
+ ib::error() << "Creation of SYS_VIRTUAL"
+ " failed: " << ut_strerr(err) << ". Tablespace is"
+ " full or too many transactions."
+ " Dropping incompletely created tables.";
+
+ ut_ad(err == DB_OUT_OF_FILE_SPACE
+ || err == DB_TOO_MANY_CONCURRENT_TRXS);
+
+ row_drop_table_after_create_fail("SYS_VIRTUAL", trx);
+
+ if (err == DB_OUT_OF_FILE_SPACE) {
+ err = DB_MUST_GET_MORE_FILE_SPACE;
+ }
+ }
+
+ trx_commit_for_mysql(trx);
+
+ row_mysql_unlock_data_dictionary(trx);
+
+ trx_free_for_mysql(trx);
+
+ srv_file_per_table = srv_file_per_table_backup;
+
+ /* Note: The master thread has not been started at this point. */
+ /* Confirm and move to the non-LRU part of the table LRU list. */
+ dberr_t sys_virtual_err = dict_check_if_system_table_exists(
+ "SYS_VIRTUAL", DICT_NUM_FIELDS__SYS_VIRTUAL + 1, 1);
+ ut_a(sys_virtual_err == DB_SUCCESS);
+ mutex_enter(&dict_sys->mutex);
+ dict_sys->sys_virtual = dict_table_get_low("SYS_VIRTUAL");
+ mutex_exit(&dict_sys->mutex);
+
+ return(err);
+}
+
/****************************************************************//**
Evaluate the given foreign key SQL statement.
-@return error code or DB_SUCCESS */
+@return error code or DB_SUCCESS */
static MY_ATTRIBUTE((nonnull, warn_unused_result))
dberr_t
dict_foreign_eval_sql(
@@ -1504,9 +1877,9 @@ dict_foreign_eval_sql(
ut_print_timestamp(ef);
fputs(" Error in foreign key constraint creation for table ",
ef);
- ut_print_name(ef, trx, TRUE, name);
+ ut_print_name(ef, trx, name);
fputs(".\nA foreign key constraint of name ", ef);
- ut_print_name(ef, trx, TRUE, id);
+ ut_print_name(ef, trx, id);
fputs("\nalready exists."
" (Note that internally InnoDB adds 'databasename'\n"
"in front of the user-defined constraint name.)\n"
@@ -1525,15 +1898,14 @@ dict_foreign_eval_sql(
}
if (error != DB_SUCCESS) {
- fprintf(stderr,
- "InnoDB: Foreign key constraint creation failed:\n"
- "InnoDB: internal error number %lu\n", (ulong) error);
+ ib::error() << "Foreign key constraint creation failed: "
+ << ut_strerr(error);
mutex_enter(&dict_foreign_err_mutex);
ut_print_timestamp(ef);
fputs(" Internal error in foreign key constraint creation"
" for table ", ef);
- ut_print_name(ef, trx, TRUE, name);
+ ut_print_name(ef, trx, name);
fputs(".\n"
"See the MySQL .err log in the datadir"
" for more information.\n", ef);
@@ -1548,7 +1920,7 @@ dict_foreign_eval_sql(
/********************************************************************//**
Add a single foreign key field definition to the data dictionary tables in
the database.
-@return error code or DB_SUCCESS */
+@return error code or DB_SUCCESS */
static MY_ATTRIBUTE((nonnull, warn_unused_result))
dberr_t
dict_create_add_foreign_field_to_dictionary(
@@ -1558,6 +1930,8 @@ dict_create_add_foreign_field_to_dictionary(
const dict_foreign_t* foreign, /*!< in: foreign */
trx_t* trx) /*!< in/out: transaction */
{
+ DBUG_ENTER("dict_create_add_foreign_field_to_dictionary");
+
pars_info_t* info = pars_info_create();
pars_info_add_str_literal(info, "id", foreign->id);
@@ -1570,7 +1944,7 @@ dict_create_add_foreign_field_to_dictionary(
pars_info_add_str_literal(info, "ref_col_name",
foreign->referenced_col_names[field_nr]);
- return(dict_foreign_eval_sql(
+ DBUG_RETURN(dict_foreign_eval_sql(
info,
"PROCEDURE P () IS\n"
"BEGIN\n"
@@ -1593,12 +1967,12 @@ dict_foreign_def_get(
char* fk_def = (char *)mem_heap_alloc(foreign->heap, 4*1024);
const char* tbname;
char tablebuf[MAX_TABLE_NAME_LEN + 1] = "";
- int i;
+ unsigned i;
char* bufend;
tbname = dict_remove_db_name(foreign->id);
bufend = innobase_convert_name(tablebuf, MAX_TABLE_NAME_LEN,
- tbname, strlen(tbname), trx->mysql_thd, FALSE);
+ tbname, strlen(tbname), trx->mysql_thd);
tablebuf[bufend - tablebuf] = '\0';
sprintf(fk_def,
@@ -1609,9 +1983,9 @@ dict_foreign_def_get(
innobase_convert_name(buf, MAX_TABLE_NAME_LEN,
foreign->foreign_col_names[i],
strlen(foreign->foreign_col_names[i]),
- trx->mysql_thd, FALSE);
+ trx->mysql_thd);
strcat(fk_def, buf);
- if (i < foreign->n_fields-1) {
+ if (i < static_cast<unsigned>(foreign->n_fields-1)) {
strcat(fk_def, (char *)",");
}
}
@@ -1621,7 +1995,7 @@ dict_foreign_def_get(
bufend = innobase_convert_name(tablebuf, MAX_TABLE_NAME_LEN,
foreign->referenced_table_name,
strlen(foreign->referenced_table_name),
- trx->mysql_thd, TRUE);
+ trx->mysql_thd);
tablebuf[bufend - tablebuf] = '\0';
strcat(fk_def, tablebuf);
@@ -1632,10 +2006,10 @@ dict_foreign_def_get(
bufend = innobase_convert_name(buf, MAX_TABLE_NAME_LEN,
foreign->referenced_col_names[i],
strlen(foreign->referenced_col_names[i]),
- trx->mysql_thd, FALSE);
+ trx->mysql_thd);
buf[bufend - buf] = '\0';
strcat(fk_def, buf);
- if (i < foreign->n_fields-1) {
+ if (i < (uint)foreign->n_fields-1) {
strcat(fk_def, (char *)",");
}
}
@@ -1655,7 +2029,7 @@ dict_foreign_def_get_fields(
trx_t* trx, /*!< in: trx */
char** field, /*!< out: foreign column */
char** field2, /*!< out: referenced column */
- int col_no) /*!< in: column number */
+ ulint col_no) /*!< in: column number */
{
char* bufend;
char* fieldbuf = (char *)mem_heap_alloc(foreign->heap, MAX_TABLE_NAME_LEN+1);
@@ -1664,14 +2038,14 @@ dict_foreign_def_get_fields(
bufend = innobase_convert_name(fieldbuf, MAX_TABLE_NAME_LEN,
foreign->foreign_col_names[col_no],
strlen(foreign->foreign_col_names[col_no]),
- trx->mysql_thd, FALSE);
+ trx->mysql_thd);
fieldbuf[bufend - fieldbuf] = '\0';
bufend = innobase_convert_name(fieldbuf2, MAX_TABLE_NAME_LEN,
foreign->referenced_col_names[col_no],
strlen(foreign->referenced_col_names[col_no]),
- trx->mysql_thd, FALSE);
+ trx->mysql_thd);
fieldbuf2[bufend - fieldbuf2] = '\0';
*field = fieldbuf;
@@ -1680,17 +2054,18 @@ dict_foreign_def_get_fields(
/********************************************************************//**
Add a foreign key definition to the data dictionary tables.
-@return error code or DB_SUCCESS */
-UNIV_INTERN
+@return error code or DB_SUCCESS */
dberr_t
dict_create_add_foreign_to_dictionary(
/*==================================*/
- dict_table_t* table, /*!< in: table */
const char* name, /*!< in: table name */
const dict_foreign_t* foreign,/*!< in: foreign key */
trx_t* trx) /*!< in/out: dictionary transaction */
{
dberr_t error;
+
+ DBUG_ENTER("dict_create_add_foreign_to_dictionary");
+
pars_info_t* info = pars_info_create();
pars_info_add_str_literal(info, "id", foreign->id);
@@ -1703,6 +2078,11 @@ dict_create_add_foreign_to_dictionary(
pars_info_add_int4_literal(info, "n_cols",
foreign->n_fields + (foreign->type << 24));
+ DBUG_PRINT("dict_create_add_foreign_to_dictionary",
+ ("'%s', '%s', '%s', %d", foreign->id, name,
+ foreign->referenced_table_name,
+ foreign->n_fields + (foreign->type << 24)));
+
error = dict_foreign_eval_sql(info,
"PROCEDURE P () IS\n"
"BEGIN\n"
@@ -1719,11 +2099,10 @@ dict_create_add_foreign_to_dictionary(
char* fk_def;
innobase_convert_name(tablename, MAX_TABLE_NAME_LEN,
- table->name, strlen(table->name),
- trx->mysql_thd, TRUE);
+ name, strlen(name), trx->mysql_thd);
innobase_convert_name(buf, MAX_TABLE_NAME_LEN,
- foreign->id, strlen(foreign->id), trx->mysql_thd, FALSE);
+ foreign->id, strlen(foreign->id), trx->mysql_thd);
fk_def = dict_foreign_def_get((dict_foreign_t*)foreign, trx);
@@ -1736,7 +2115,7 @@ dict_create_add_foreign_to_dictionary(
tablename, buf, fk_def);
}
- return(error);
+ DBUG_RETURN(error);
}
for (ulint i = 0; i < foreign->n_fields; i++) {
@@ -1751,10 +2130,9 @@ dict_create_add_foreign_to_dictionary(
char* fk_def;
innobase_convert_name(tablename, MAX_TABLE_NAME_LEN,
- table->name, strlen(table->name),
- trx->mysql_thd, TRUE);
+ name, strlen(name), trx->mysql_thd);
innobase_convert_name(buf, MAX_TABLE_NAME_LEN,
- foreign->id, strlen(foreign->id), trx->mysql_thd, FALSE);
+ foreign->id, strlen(foreign->id), trx->mysql_thd);
fk_def = dict_foreign_def_get((dict_foreign_t*)foreign, trx);
dict_foreign_def_get_fields((dict_foreign_t*)foreign, trx, &field, &field2, i);
@@ -1765,11 +2143,85 @@ dict_create_add_foreign_to_dictionary(
" Error in foreign key definition: %s.",
tablename, buf, i+1, fk_def);
- return(error);
+ DBUG_RETURN(error);
}
}
- return(error);
+ DBUG_RETURN(error);
+}
+
+/** Check if a foreign constraint is on the given column name.
+@param[in] col_name column name to be searched for fk constraint
+@param[in] table table to which foreign key constraint belongs
+@return true if fk constraint is present on the table, false otherwise. */
+static
+bool
+dict_foreign_base_for_stored(
+ const char* col_name,
+ const dict_table_t* table)
+{
+ /* Loop through each stored column and check if its base column has
+ the same name as the column name being checked */
+ dict_s_col_list::const_iterator it;
+ for (it = table->s_cols->begin();
+ it != table->s_cols->end(); ++it) {
+ dict_s_col_t s_col = *it;
+
+ for (ulint j = 0; j < s_col.num_base; j++) {
+ if (strcmp(col_name, dict_table_get_col_name(
+ table,
+ s_col.base_col[j]->ind)) == 0) {
+ return(true);
+ }
+ }
+ }
+
+ return(false);
+}
+
+/** Check if a foreign constraint is on columns served as base columns
+of any stored column. This is to prevent creating SET NULL or CASCADE
+constraint on such columns
+@param[in] local_fk_set set of foreign key objects, to be added to
+the dictionary tables
+@param[in] table table to which the foreign key objects in
+local_fk_set belong to
+@return true if yes, otherwise, false */
+bool
+dict_foreigns_has_s_base_col(
+ const dict_foreign_set& local_fk_set,
+ const dict_table_t* table)
+{
+ dict_foreign_t* foreign;
+
+ if (table->s_cols == NULL) {
+ return (false);
+ }
+
+ for (dict_foreign_set::const_iterator it = local_fk_set.begin();
+ it != local_fk_set.end(); ++it) {
+
+ foreign = *it;
+ ulint type = foreign->type;
+
+ type &= ~(DICT_FOREIGN_ON_DELETE_NO_ACTION
+ | DICT_FOREIGN_ON_UPDATE_NO_ACTION);
+
+ if (type == 0) {
+ continue;
+ }
+
+ for (ulint i = 0; i < foreign->n_fields; i++) {
+ /* Check if the constraint is on a column that
+ is a base column of any stored column */
+ if (dict_foreign_base_for_stored(
+ foreign->foreign_col_names[i], table)) {
+ return(true);
+ }
+ }
+ }
+
+ return(false);
}
/** Adds the given set of foreign key objects to the dictionary tables
@@ -1782,7 +2234,6 @@ the dictionary tables
local_fk_set belong to
@param[in,out] trx transaction
@return error code or DB_SUCCESS */
-UNIV_INTERN
dberr_t
dict_create_add_foreigns_to_dictionary(
/*===================================*/
@@ -1793,12 +2244,12 @@ dict_create_add_foreigns_to_dictionary(
dict_foreign_t* foreign;
dberr_t error;
- ut_ad(mutex_own(&(dict_sys->mutex)));
+ ut_ad(mutex_own(&dict_sys->mutex));
if (NULL == dict_table_get_low("SYS_FOREIGN")) {
- fprintf(stderr,
- "InnoDB: table SYS_FOREIGN not found"
- " in internal data dictionary\n");
+
+ ib::error() << "Table SYS_FOREIGN not found"
+ " in internal data dictionary";
return(DB_ERROR);
}
@@ -1810,8 +2261,8 @@ dict_create_add_foreigns_to_dictionary(
foreign = *it;
ut_ad(foreign->id != NULL);
- error = dict_create_add_foreign_to_dictionary((dict_table_t*)table, table->name,
- foreign, trx);
+ error = dict_create_add_foreign_to_dictionary(
+ table->name.m_name, foreign, trx);
if (error != DB_SUCCESS) {
@@ -1819,12 +2270,6 @@ dict_create_add_foreigns_to_dictionary(
}
}
- trx->op_info = "committing foreign key definitions";
-
- trx_commit(trx);
-
- trx->op_info = "";
-
return(DB_SUCCESS);
}
@@ -1832,8 +2277,7 @@ dict_create_add_foreigns_to_dictionary(
Creates the tablespaces and datafiles system tables inside InnoDB
at server bootstrap or server start if they are not found or are
not of the right form.
-@return DB_SUCCESS or error code */
-UNIV_INTERN
+@return DB_SUCCESS or error code */
dberr_t
dict_create_or_check_sys_tablespace(void)
/*=====================================*/
@@ -1855,9 +2299,15 @@ dict_create_or_check_sys_tablespace(void)
if (sys_tablespaces_err == DB_SUCCESS
&& sys_datafiles_err == DB_SUCCESS) {
+ srv_sys_tablespaces_open = true;
return(DB_SUCCESS);
}
+ if (srv_read_only_mode
+ || srv_force_recovery >= SRV_FORCE_NO_TRX_UNDO) {
+ return(DB_READ_ONLY);
+ }
+
trx = trx_allocate_for_mysql();
trx_set_dict_operation(trx, TRX_DICT_OP_TABLE);
@@ -1869,22 +2319,14 @@ dict_create_or_check_sys_tablespace(void)
/* Check which incomplete table definition to drop. */
if (sys_tablespaces_err == DB_CORRUPTION) {
- ib_logf(IB_LOG_LEVEL_WARN,
- "Dropping incompletely created "
- "SYS_TABLESPACES table.");
- row_drop_table_for_mysql("SYS_TABLESPACES", trx, TRUE, TRUE);
+ row_drop_table_after_create_fail("SYS_TABLESPACES", trx);
}
if (sys_datafiles_err == DB_CORRUPTION) {
- ib_logf(IB_LOG_LEVEL_WARN,
- "Dropping incompletely created "
- "SYS_DATAFILES table.");
-
- row_drop_table_for_mysql("SYS_DATAFILES", trx, TRUE, TRUE);
+ row_drop_table_after_create_fail("SYS_DATAFILES", trx);
}
- ib_logf(IB_LOG_LEVEL_INFO,
- "Creating tablespace and datafile system tables.");
+ ib::info() << "Creating tablespace and datafile system tables.";
/* We always want SYSTEM tables to be created inside the system
tablespace. */
@@ -1907,17 +2349,17 @@ dict_create_or_check_sys_tablespace(void)
FALSE, trx);
if (err != DB_SUCCESS) {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Creation of SYS_TABLESPACES and SYS_DATAFILES "
- "has failed with error %lu. Tablespace is full. "
- "Dropping incompletely created tables.",
- (ulong) err);
+
+ ib::error() << "Creation of SYS_TABLESPACES and SYS_DATAFILES"
+ " has failed with error " << ut_strerr(err)
+ << ". Dropping incompletely created tables.";
ut_a(err == DB_OUT_OF_FILE_SPACE
+ || err == DB_DUPLICATE_KEY
|| err == DB_TOO_MANY_CONCURRENT_TRXS);
- row_drop_table_for_mysql("SYS_TABLESPACES", trx, TRUE, TRUE);
- row_drop_table_for_mysql("SYS_DATAFILES", trx, TRUE, TRUE);
+ row_drop_table_after_create_fail("SYS_TABLESPACES", trx);
+ row_drop_table_after_create_fail("SYS_DATAFILES", trx);
if (err == DB_OUT_OF_FILE_SPACE) {
err = DB_MUST_GET_MORE_FILE_SPACE;
@@ -1933,8 +2375,7 @@ dict_create_or_check_sys_tablespace(void)
srv_file_per_table = srv_file_per_table_backup;
if (err == DB_SUCCESS) {
- ib_logf(IB_LOG_LEVEL_INFO,
- "Tablespace and datafile system tables created.");
+ srv_sys_tablespaces_open = true;
}
/* Note: The master thread has not been started at this point. */
@@ -1942,38 +2383,41 @@ dict_create_or_check_sys_tablespace(void)
sys_tablespaces_err = dict_check_if_system_table_exists(
"SYS_TABLESPACES", DICT_NUM_FIELDS__SYS_TABLESPACES + 1, 1);
- ut_a(sys_tablespaces_err == DB_SUCCESS);
+ ut_a(sys_tablespaces_err == DB_SUCCESS || err != DB_SUCCESS);
sys_datafiles_err = dict_check_if_system_table_exists(
"SYS_DATAFILES", DICT_NUM_FIELDS__SYS_DATAFILES + 1, 1);
- ut_a(sys_datafiles_err == DB_SUCCESS);
+ ut_a(sys_datafiles_err == DB_SUCCESS || err != DB_SUCCESS);
return(err);
}
-/********************************************************************//**
-Add a single tablespace definition to the data dictionary tables in the
-database.
-@return error code or DB_SUCCESS */
-UNIV_INTERN
+/** Put a tablespace definition into the data dictionary,
+replacing what was there previously.
+@param[in] space Tablespace id
+@param[in] name Tablespace name
+@param[in] flags Tablespace flags
+@param[in] path Tablespace path
+@param[in] trx Transaction
+@return error code or DB_SUCCESS */
dberr_t
-dict_create_add_tablespace_to_dictionary(
-/*=====================================*/
- ulint space, /*!< in: tablespace id */
- const char* name, /*!< in: tablespace name */
- ulint flags, /*!< in: tablespace flags */
- const char* path, /*!< in: tablespace path */
- trx_t* trx, /*!< in/out: transaction */
- bool commit) /*!< in: if true then commit the
- transaction */
+dict_replace_tablespace_in_dictionary(
+ ulint space_id,
+ const char* name,
+ ulint flags,
+ const char* path,
+ trx_t* trx)
{
+ if (!srv_sys_tablespaces_open) {
+ /* Startup procedure is not yet ready for updates. */
+ return(DB_SUCCESS);
+ }
+
dberr_t error;
pars_info_t* info = pars_info_create();
- ut_a(space > TRX_SYS_SPACE);
-
- pars_info_add_int4_literal(info, "space", space);
+ pars_info_add_int4_literal(info, "space", space_id);
pars_info_add_str_literal(info, "name", name);
@@ -1983,11 +2427,27 @@ dict_create_add_tablespace_to_dictionary(
error = que_eval_sql(info,
"PROCEDURE P () IS\n"
+ "p CHAR;\n"
+
+ "DECLARE CURSOR c IS\n"
+ " SELECT PATH FROM SYS_DATAFILES\n"
+ " WHERE SPACE=:space FOR UPDATE;\n"
+
"BEGIN\n"
- "INSERT INTO SYS_TABLESPACES VALUES"
+ "OPEN c;\n"
+ "FETCH c INTO p;\n"
+
+ "IF (SQL % NOTFOUND) THEN"
+ " DELETE FROM SYS_TABLESPACES "
+ "WHERE SPACE=:space;\n"
+ " INSERT INTO SYS_TABLESPACES VALUES"
"(:space, :name, :flags);\n"
- "INSERT INTO SYS_DATAFILES VALUES"
+ " INSERT INTO SYS_DATAFILES VALUES"
"(:space, :path);\n"
+ "ELSIF p <> :path THEN\n"
+ " UPDATE SYS_DATAFILES SET PATH=:path"
+ " WHERE CURRENT OF c;\n"
+ "END IF;\n"
"END;\n",
FALSE, trx);
@@ -1995,12 +2455,19 @@ dict_create_add_tablespace_to_dictionary(
return(error);
}
- if (commit) {
- trx->op_info = "committing tablespace and datafile definition";
- trx_commit(trx);
- }
-
trx->op_info = "";
return(error);
}
+
+/** Assign a new table ID and put it into the table cache and the transaction.
+@param[in,out] table Table that needs an ID
+@param[in,out] trx Transaction */
+void
+dict_table_assign_new_id(
+ dict_table_t* table,
+ trx_t* trx)
+{
+ dict_hdr_get_new_id(&table->id, NULL, NULL, table, false);
+ trx->table_id = table->id;
+}
diff --git a/storage/innobase/dict/dict0defrag_bg.cc b/storage/innobase/dict/dict0defrag_bg.cc
new file mode 100644
index 00000000000..73f55cc8667
--- /dev/null
+++ b/storage/innobase/dict/dict0defrag_bg.cc
@@ -0,0 +1,333 @@
+/*****************************************************************************
+
+Copyright (c) 2016, 2019, MariaDB Corporation.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file dict/dict0defrag_bg.cc
+Defragmentation routines.
+
+Created 25/08/2016 Jan Lindström
+*******************************************************/
+
+#include "dict0dict.h"
+#include "dict0stats.h"
+#include "dict0stats_bg.h"
+#include "dict0defrag_bg.h"
+#include "btr0btr.h"
+#include "srv0start.h"
+
+static ib_mutex_t defrag_pool_mutex;
+
+#ifdef MYSQL_PFS
+static mysql_pfs_key_t defrag_pool_mutex_key;
+#endif
+
+/** Iterator type for iterating over the elements of objects of type
+defrag_pool_t. */
+typedef defrag_pool_t::iterator defrag_pool_iterator_t;
+
+/** Pool where we store information on which tables are to be processed
+by background defragmentation. */
+defrag_pool_t defrag_pool;
+
+extern bool dict_stats_start_shutdown;
+
+/*****************************************************************//**
+Initialize the defrag pool, called once during thread initialization. */
+void
+dict_defrag_pool_init(void)
+/*=======================*/
+{
+ ut_ad(!srv_read_only_mode);
+
+ /* We choose SYNC_STATS_DEFRAG to be below SYNC_FSP_PAGE. */
+ mutex_create(LATCH_ID_DEFRAGMENT_MUTEX, &defrag_pool_mutex);
+}
+
+/*****************************************************************//**
+Free the resources occupied by the defrag pool, called once during
+thread de-initialization. */
+void
+dict_defrag_pool_deinit(void)
+/*=========================*/
+{
+ ut_ad(!srv_read_only_mode);
+
+ mutex_free(&defrag_pool_mutex);
+}
+
+/*****************************************************************//**
+Get an index from the auto defrag pool. The returned index id is removed
+from the pool.
+@return true if the pool was non-empty and "id" was set, false otherwise */
+static
+bool
+dict_stats_defrag_pool_get(
+/*=======================*/
+ table_id_t* table_id, /*!< out: table id, or unmodified if
+ list is empty */
+ index_id_t* index_id) /*!< out: index id, or unmodified if
+ list is empty */
+{
+ ut_ad(!srv_read_only_mode);
+
+ mutex_enter(&defrag_pool_mutex);
+
+ if (defrag_pool.empty()) {
+ mutex_exit(&defrag_pool_mutex);
+ return(false);
+ }
+
+ defrag_pool_item_t& item = defrag_pool.back();
+ *table_id = item.table_id;
+ *index_id = item.index_id;
+
+ defrag_pool.pop_back();
+
+ mutex_exit(&defrag_pool_mutex);
+
+ return(true);
+}
+
+/*****************************************************************//**
+Add an index in a table to the defrag pool, which is processed by the
+background stats gathering thread. Only the table id and index id are
+added to the list, so the table can be closed after being enqueued and
+it will be opened when needed. If the table or index does not exist later
+(has been DROPped), then it will be removed from the pool and skipped. */
+void
+dict_stats_defrag_pool_add(
+/*=======================*/
+ const dict_index_t* index) /*!< in: table to add */
+{
+ defrag_pool_item_t item;
+
+ ut_ad(!srv_read_only_mode);
+
+ mutex_enter(&defrag_pool_mutex);
+
+ /* quit if already in the list */
+ for (defrag_pool_iterator_t iter = defrag_pool.begin();
+ iter != defrag_pool.end();
+ ++iter) {
+ if ((*iter).table_id == index->table->id
+ && (*iter).index_id == index->id) {
+ mutex_exit(&defrag_pool_mutex);
+ return;
+ }
+ }
+
+ item.table_id = index->table->id;
+ item.index_id = index->id;
+ defrag_pool.push_back(item);
+
+ mutex_exit(&defrag_pool_mutex);
+
+ os_event_set(dict_stats_event);
+}
+
+/*****************************************************************//**
+Delete a given index from the auto defrag pool. */
+void
+dict_stats_defrag_pool_del(
+/*=======================*/
+ const dict_table_t* table, /*!<in: if given, remove
+ all entries for the table */
+ const dict_index_t* index) /*!< in: if given, remove this index */
+{
+ ut_a((table && !index) || (!table && index));
+ ut_ad(!srv_read_only_mode);
+ ut_ad(mutex_own(&dict_sys->mutex));
+
+ mutex_enter(&defrag_pool_mutex);
+
+ defrag_pool_iterator_t iter = defrag_pool.begin();
+ while (iter != defrag_pool.end()) {
+ if ((table && (*iter).table_id == table->id)
+ || (index
+ && (*iter).table_id == index->table->id
+ && (*iter).index_id == index->id)) {
+ /* erase() invalidates the iterator */
+ iter = defrag_pool.erase(iter);
+ if (index)
+ break;
+ } else {
+ iter++;
+ }
+ }
+
+ mutex_exit(&defrag_pool_mutex);
+}
+
+/*****************************************************************//**
+Get the first index that has been added for updating persistent defrag
+stats and eventually save its stats. */
+static
+void
+dict_stats_process_entry_from_defrag_pool()
+{
+ table_id_t table_id;
+ index_id_t index_id;
+
+ ut_ad(!srv_read_only_mode);
+
+ /* pop the first index from the auto defrag pool */
+ if (!dict_stats_defrag_pool_get(&table_id, &index_id)) {
+ /* no index in defrag pool */
+ return;
+ }
+
+ dict_table_t* table;
+
+ mutex_enter(&dict_sys->mutex);
+
+ /* If the table is no longer cached, we've already lost the in
+ memory stats so there's nothing really to write to disk. */
+ table = dict_table_open_on_id(table_id, TRUE,
+ DICT_TABLE_OP_OPEN_ONLY_IF_CACHED);
+
+ dict_index_t* index = table && !table->corrupted
+ ? dict_table_find_index_on_id(table, index_id)
+ : NULL;
+
+ if (!index || index->is_corrupted()) {
+ if (table) {
+ dict_table_close(table, TRUE, FALSE);
+ }
+ mutex_exit(&dict_sys->mutex);
+ return;
+ }
+
+ mutex_exit(&dict_sys->mutex);
+ dict_stats_save_defrag_stats(index);
+ dict_table_close(table, FALSE, FALSE);
+}
+
+/*****************************************************************//**
+Get the first index that has been added for updating persistent defrag
+stats and eventually save its stats. */
+void
+dict_defrag_process_entries_from_defrag_pool()
+/*==========================================*/
+{
+ while (defrag_pool.size() && !dict_stats_start_shutdown) {
+ dict_stats_process_entry_from_defrag_pool();
+ }
+}
+
+/*********************************************************************//**
+Save defragmentation result.
+@return DB_SUCCESS or error code */
+dberr_t
+dict_stats_save_defrag_summary(
+/*============================*/
+ dict_index_t* index) /*!< in: index */
+{
+ dberr_t ret=DB_SUCCESS;
+
+ if (dict_index_is_ibuf(index)) {
+ return DB_SUCCESS;
+ }
+
+ rw_lock_x_lock(&dict_operation_lock);
+ mutex_enter(&dict_sys->mutex);
+
+ ret = dict_stats_save_index_stat(index, time(NULL), "n_pages_freed",
+ index->stat_defrag_n_pages_freed,
+ NULL,
+ "Number of pages freed during"
+ " last defragmentation run.",
+ NULL);
+
+ mutex_exit(&dict_sys->mutex);
+ rw_lock_x_unlock(&dict_operation_lock);
+
+ return (ret);
+}
+
+/*********************************************************************//**
+Save defragmentation stats for a given index.
+@return DB_SUCCESS or error code */
+dberr_t
+dict_stats_save_defrag_stats(
+/*============================*/
+ dict_index_t* index) /*!< in: index */
+{
+ dberr_t ret;
+
+ if (dict_index_is_ibuf(index)) {
+ return DB_SUCCESS;
+ }
+
+ if (!index->is_readable()) {
+ return dict_stats_report_error(index->table, true);
+ }
+
+ const time_t now = time(NULL);
+ mtr_t mtr;
+ ulint n_leaf_pages;
+ ulint n_leaf_reserved;
+ mtr_start(&mtr);
+ mtr_s_lock(dict_index_get_lock(index), &mtr);
+ n_leaf_reserved = btr_get_size_and_reserved(index, BTR_N_LEAF_PAGES,
+ &n_leaf_pages, &mtr);
+ mtr_commit(&mtr);
+
+ if (n_leaf_reserved == ULINT_UNDEFINED) {
+ // The index name is different during fast index creation,
+ // so the stats won't be associated with the right index
+ // for later use. We just return without saving.
+ return DB_SUCCESS;
+ }
+
+ rw_lock_x_lock(&dict_operation_lock);
+
+ mutex_enter(&dict_sys->mutex);
+ ret = dict_stats_save_index_stat(index, now, "n_page_split",
+ index->stat_defrag_n_page_split,
+ NULL,
+ "Number of new page splits on leaves"
+ " since last defragmentation.",
+ NULL);
+ if (ret != DB_SUCCESS) {
+ goto end;
+ }
+
+ ret = dict_stats_save_index_stat(
+ index, now, "n_leaf_pages_defrag",
+ n_leaf_pages,
+ NULL,
+ "Number of leaf pages when this stat is saved to disk",
+ NULL);
+ if (ret != DB_SUCCESS) {
+ goto end;
+ }
+
+ ret = dict_stats_save_index_stat(
+ index, now, "n_leaf_pages_reserved",
+ n_leaf_reserved,
+ NULL,
+ "Number of pages reserved for this index leaves when this stat "
+ "is saved to disk",
+ NULL);
+
+end:
+ mutex_exit(&dict_sys->mutex);
+ rw_lock_x_unlock(&dict_operation_lock);
+
+ return (ret);
+}
diff --git a/storage/innobase/dict/dict0dict.cc b/storage/innobase/dict/dict0dict.cc
index faa804220d3..455d51af438 100644
--- a/storage/innobase/dict/dict0dict.cc
+++ b/storage/innobase/dict/dict0dict.cc
@@ -25,73 +25,62 @@ Data dictionary system
Created 1/8/1996 Heikki Tuuri
***********************************************************************/
+#include <my_config.h>
+#include <string>
+
+#include "ha_prototypes.h"
+#include <mysqld.h>
+#include <strfunc.h>
+
#include "dict0dict.h"
#include "fts0fts.h"
#include "fil0fil.h"
#include <algorithm>
-#include <string>
-
-#ifdef UNIV_NONINL
-#include "dict0dict.ic"
-#include "dict0priv.ic"
-#endif
/** dummy index for ROW_FORMAT=REDUNDANT supremum and infimum records */
-UNIV_INTERN dict_index_t* dict_ind_redundant;
-/** dummy index for ROW_FORMAT=COMPACT supremum and infimum records */
-UNIV_INTERN dict_index_t* dict_ind_compact;
+dict_index_t* dict_ind_redundant;
#if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG
/** Flag to control insert buffer debugging. */
-extern UNIV_INTERN uint ibuf_debug;
+extern uint ibuf_debug;
#endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */
-/**********************************************************************
-Issue a warning that the row is too big. */
-void
-ib_warn_row_too_big(const dict_table_t* table);
-
-#ifndef UNIV_HOTBACKUP
+#include "btr0btr.h"
+#include "btr0cur.h"
+#include "btr0sea.h"
#include "buf0buf.h"
#include "data0type.h"
-#include "mach0data.h"
#include "dict0boot.h"
-#include "dict0mem.h"
#include "dict0crea.h"
+#include "dict0mem.h"
+#include "dict0priv.h"
#include "dict0stats.h"
-#include "trx0undo.h"
-#include "btr0btr.h"
-#include "btr0cur.h"
-#include "btr0sea.h"
+#include "fts0fts.h"
+#include "fts0types.h"
+#include "lock0lock.h"
+#include "mach0data.h"
+#include "mem0mem.h"
#include "os0once.h"
-#include "page0zip.h"
#include "page0page.h"
+#include "page0zip.h"
#include "pars0pars.h"
#include "pars0sym.h"
#include "que0que.h"
#include "rem0cmp.h"
-#include "fts0fts.h"
-#include "fts0types.h"
-#include "m_ctype.h" /* my_isspace() */
-#include "ha_prototypes.h" /* innobase_strcasecmp(), innobase_casedn_str() */
+#include "row0log.h"
+#include "row0merge.h"
+#include "row0mysql.h"
+#include "row0upd.h"
#include "srv0mon.h"
#include "srv0start.h"
-#include "lock0lock.h"
-#include "dict0priv.h"
-#include "row0upd.h"
-#include "row0mysql.h"
-#include "row0merge.h"
-#include "row0log.h"
-#include "ut0ut.h" /* ut_format_name() */
-#include "m_string.h"
-#include "my_sys.h"
-#include "mysqld.h" /* system_charset_info */
-#include "strfunc.h" /* strconvert() */
+#include "sync0sync.h"
+#include "trx0undo.h"
-#include <ctype.h>
+#include <vector>
+#include <algorithm>
/** the dictionary system */
-UNIV_INTERN dict_sys_t* dict_sys = NULL;
+dict_sys_t* dict_sys = NULL;
/** @brief the data dictionary rw-latch protecting dict_sys
@@ -101,29 +90,15 @@ in S-mode; we cannot trust that MySQL protects implicit or background
operations a table drop since MySQL does not know of them; therefore
we need this; NOTE: a transaction which reserves this must keep book
on the mode in trx_t::dict_operation_lock_mode */
-UNIV_INTERN rw_lock_t dict_operation_lock;
+rw_lock_t dict_operation_lock;
/** Percentage of compression failures that are allowed in a single
round */
-UNIV_INTERN ulong zip_failure_threshold_pct = 5;
+ulong zip_failure_threshold_pct = 5;
/** Maximum percentage of a page that can be allowed as a pad to avoid
compression failures */
-UNIV_INTERN ulong zip_pad_max = 50;
-
-/* Keys to register rwlocks and mutexes with performance schema */
-#ifdef UNIV_PFS_RWLOCK
-UNIV_INTERN mysql_pfs_key_t dict_operation_lock_key;
-UNIV_INTERN mysql_pfs_key_t index_tree_rw_lock_key;
-UNIV_INTERN mysql_pfs_key_t index_online_log_key;
-UNIV_INTERN mysql_pfs_key_t dict_table_stats_key;
-#endif /* UNIV_PFS_RWLOCK */
-
-#ifdef UNIV_PFS_MUTEX
-UNIV_INTERN mysql_pfs_key_t zip_pad_mutex_key;
-UNIV_INTERN mysql_pfs_key_t dict_sys_mutex_key;
-UNIV_INTERN mysql_pfs_key_t dict_foreign_err_mutex_key;
-#endif /* UNIV_PFS_MUTEX */
+ulong zip_pad_max = 50;
#define DICT_HEAP_SIZE 100 /*!< initial memory heap size when
creating a table or index object */
@@ -143,17 +118,20 @@ static bool innodb_index_stats_not_found_reported = false;
/*******************************************************************//**
Tries to find column names for the index and sets the col field of the
index.
+@param[in] table table
+@param[in] index index
+@param[in] add_v new virtual columns added along with an add index call
@return TRUE if the column names were found */
static
ibool
dict_index_find_cols(
-/*=================*/
- dict_table_t* table, /*!< in: table */
- dict_index_t* index); /*!< in: index */
+ const dict_table_t* table,
+ dict_index_t* index,
+ const dict_add_v_col_t* add_v);
/*******************************************************************//**
Builds the internal dictionary cache representation for a clustered
index, containing also system fields not defined by the user.
-@return own: the internal representation of the clustered index */
+@return own: the internal representation of the clustered index */
static
dict_index_t*
dict_index_build_internal_clust(
@@ -164,7 +142,7 @@ dict_index_build_internal_clust(
/*******************************************************************//**
Builds the internal dictionary cache representation for a non-clustered
index, containing also system fields not defined by the user.
-@return own: the internal representation of the non-clustered index */
+@return own: the internal representation of the non-clustered index */
static
dict_index_t*
dict_index_build_internal_non_clust(
@@ -174,35 +152,13 @@ dict_index_build_internal_non_clust(
a non-clustered index */
/**********************************************************************//**
Builds the internal dictionary cache representation for an FTS index.
-@return own: the internal representation of the FTS index */
+@return own: the internal representation of the FTS index */
static
dict_index_t*
dict_index_build_internal_fts(
/*==========================*/
dict_table_t* table, /*!< in: table */
dict_index_t* index); /*!< in: user representation of an FTS index */
-/**********************************************************************//**
-Prints a column data. */
-static
-void
-dict_col_print_low(
-/*===============*/
- const dict_table_t* table, /*!< in: table */
- const dict_col_t* col); /*!< in: column */
-/**********************************************************************//**
-Prints an index data. */
-static
-void
-dict_index_print_low(
-/*=================*/
- dict_index_t* index); /*!< in: index */
-/**********************************************************************//**
-Prints a field data. */
-static
-void
-dict_field_print_low(
-/*=================*/
- const dict_field_t* field); /*!< in: field */
/**********************************************************************//**
Removes an index from the dictionary cache. */
@@ -242,25 +198,13 @@ dict_non_lru_find_table(
/* Stream for storing detailed information about the latest foreign key
and unique key errors. Only created if !srv_read_only_mode */
-UNIV_INTERN FILE* dict_foreign_err_file = NULL;
+FILE* dict_foreign_err_file = NULL;
/* mutex protecting the foreign and unique error buffers */
-UNIV_INTERN ib_mutex_t dict_foreign_err_mutex;
-
-/******************************************************************//**
-Makes all characters in a NUL-terminated UTF-8 string lower case. */
-UNIV_INTERN
-void
-dict_casedn_str(
-/*============*/
- char* a) /*!< in/out: string to put in lower case */
-{
- innobase_casedn_str(a);
-}
+ib_mutex_t dict_foreign_err_mutex;
/********************************************************************//**
Checks if the database name in two table names is the same.
-@return TRUE if same db name */
-UNIV_INTERN
+@return TRUE if same db name */
ibool
dict_tables_have_same_db(
/*=====================*/
@@ -280,8 +224,7 @@ dict_tables_have_same_db(
/********************************************************************//**
Return the end of table name where we have removed dbname and '/'.
-@return table name */
-UNIV_INTERN
+@return table name */
const char*
dict_remove_db_name(
/*================*/
@@ -296,8 +239,7 @@ dict_remove_db_name(
/********************************************************************//**
Get the database name length in a table name.
-@return database name length */
-UNIV_INTERN
+@return database name length */
ulint
dict_get_db_name_len(
/*=================*/
@@ -310,24 +252,20 @@ dict_get_db_name_len(
return(s - name);
}
-/********************************************************************//**
-Reserves the dictionary system mutex for MySQL. */
-UNIV_INTERN
+/** Reserve the dictionary system mutex. */
void
-dict_mutex_enter_for_mysql_func(const char * file, ulint line)
-/*============================*/
+dict_mutex_enter_for_mysql_func(const char *file, unsigned line)
{
- mutex_enter_func(&(dict_sys->mutex), file, line);
+ mutex_enter_loc(&dict_sys->mutex, file, line);
}
/********************************************************************//**
Releases the dictionary system mutex for MySQL. */
-UNIV_INTERN
void
dict_mutex_exit_for_mysql(void)
/*===========================*/
{
- mutex_exit(&(dict_sys->mutex));
+ mutex_exit(&dict_sys->mutex);
}
/** Allocate and init a dict_table_t's stats latch.
@@ -340,7 +278,10 @@ dict_table_stats_latch_alloc(
{
dict_table_t* table = static_cast<dict_table_t*>(table_void);
- table->stats_latch = new(std::nothrow) rw_lock_t;
+ /* Note: rw_lock_create() will call the constructor */
+
+ table->stats_latch = static_cast<rw_lock_t*>(
+ ut_malloc_nokey(sizeof(rw_lock_t)));
ut_a(table->stats_latch != NULL);
@@ -357,7 +298,7 @@ dict_table_stats_latch_free(
dict_table_t* table)
{
rw_lock_free(table->stats_latch);
- delete table->stats_latch;
+ ut_free(table->stats_latch);
}
/** Create a dict_table_t's stats latch or delay for lazy creation.
@@ -366,7 +307,6 @@ or from a thread that has not shared the table object with other threads.
@param[in,out] table table whose stats latch to create
@param[in] enabled if false then the latch is disabled
and dict_table_stats_lock()/unlock() become noop on this table. */
-
void
dict_table_stats_latch_create(
dict_table_t* table,
@@ -378,23 +318,15 @@ dict_table_stats_latch_create(
return;
}
-#ifdef HAVE_ATOMIC_BUILTINS
/* We create this lazily the first time it is used. */
table->stats_latch = NULL;
table->stats_latch_created = os_once::NEVER_DONE;
-#else /* HAVE_ATOMIC_BUILTINS */
-
- dict_table_stats_latch_alloc(table);
-
- table->stats_latch_created = os_once::DONE;
-#endif /* HAVE_ATOMIC_BUILTINS */
}
/** Destroy a dict_table_t's stats latch.
This function is only called from either single threaded environment
or from a thread that has not shared the table object with other threads.
@param[in,out] table table whose stats latch to destroy */
-
void
dict_table_stats_latch_destroy(
dict_table_t* table)
@@ -406,25 +338,20 @@ dict_table_stats_latch_destroy(
}
}
-/**********************************************************************//**
-Lock the appropriate latch to protect a given table's statistics. */
-UNIV_INTERN
+/** Lock the appropriate latch to protect a given table's statistics.
+@param[in] table table whose stats to lock
+@param[in] latch_mode RW_S_LATCH or RW_X_LATCH */
void
dict_table_stats_lock(
-/*==================*/
- dict_table_t* table, /*!< in: table */
- ulint latch_mode) /*!< in: RW_S_LATCH or RW_X_LATCH */
+ dict_table_t* table,
+ ulint latch_mode)
{
ut_ad(table != NULL);
ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
-#ifdef HAVE_ATOMIC_BUILTINS
os_once::do_or_wait_for_done(
&table->stats_latch_created,
dict_table_stats_latch_alloc, table);
-#else /* HAVE_ATOMIC_BUILTINS */
- ut_ad(table->stats_latch_created == os_once::DONE);
-#endif /* HAVE_ATOMIC_BUILTINS */
if (table->stats_latch == NULL) {
/* This is a dummy table object that is private in the current
@@ -447,15 +374,13 @@ dict_table_stats_lock(
}
}
-/**********************************************************************//**
-Unlock the latch that has been locked by dict_table_stats_lock() */
-UNIV_INTERN
+/** Unlock the latch that has been locked by dict_table_stats_lock().
+@param[in] table table whose stats to unlock
+@param[in] latch_mode RW_S_LATCH or RW_X_LATCH */
void
dict_table_stats_unlock(
-/*====================*/
- dict_table_t* table, /*!< in: table */
- ulint latch_mode) /*!< in: RW_S_LATCH or
- RW_X_LATCH */
+ dict_table_t* table,
+ ulint latch_mode)
{
ut_ad(table != NULL);
ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
@@ -491,7 +416,7 @@ dict_table_try_drop_aborted(
dict_table_t* table, /*!< in: table, or NULL if it
needs to be looked up again */
table_id_t table_id, /*!< in: table identifier */
- ulint ref_count) /*!< in: expected table->n_ref_count */
+ int32 ref_count) /*!< in: expected table->n_ref_count */
{
trx_t* trx;
@@ -502,18 +427,18 @@ dict_table_try_drop_aborted(
if (table == NULL) {
table = dict_table_open_on_id_low(
- table_id, DICT_ERR_IGNORE_NONE, FALSE);
+ table_id, DICT_ERR_IGNORE_FK_NOKEY, FALSE);
} else {
ut_ad(table->id == table_id);
}
- if (table && table->n_ref_count == ref_count && table->drop_aborted
+ if (table && table->get_ref_count() == ref_count && table->drop_aborted
&& !UT_LIST_GET_FIRST(table->locks)) {
/* Silence a debug assertion in row_merge_drop_indexes(). */
- ut_d(table->n_ref_count++);
+ ut_d(table->acquire());
row_merge_drop_indexes(trx, table, TRUE);
- ut_d(table->n_ref_count--);
- ut_ad(table->n_ref_count == ref_count);
+ ut_d(table->release());
+ ut_ad(table->get_ref_count() == ref_count);
trx_commit_for_mysql(trx);
}
@@ -537,7 +462,7 @@ dict_table_try_drop_aborted_and_mutex_exit(
if (try_drop
&& table != NULL
&& table->drop_aborted
- && table->n_ref_count == 1
+ && table->get_ref_count() == 1
&& dict_table_get_first_index(table)) {
/* Attempt to drop the indexes whose online creation
@@ -554,7 +479,6 @@ dict_table_try_drop_aborted_and_mutex_exit(
/********************************************************************//**
Decrements the count of open handles to a table. */
-UNIV_INTERN
void
dict_table_close(
/*=============*/
@@ -569,16 +493,16 @@ dict_table_close(
}
ut_ad(mutex_own(&dict_sys->mutex));
- ut_a(table->n_ref_count > 0);
+ ut_a(table->get_ref_count() > 0);
- const bool last_handle = !--table->n_ref_count;
+ const bool last_handle = table->release();
/* Force persistent stats re-read upon next open of the table
so that FLUSH TABLE can be used to forcibly fetch stats from disk
if they have been manually modified. We reset table->stat_initialized
only if table reference count is 0 because we do not want too frequent
stats re-reads (e.g. in other cases than FLUSH TABLE). */
- if (last_handle && strchr(table->name, '/') != NULL
+ if (last_handle && strchr(table->name.m_name, '/') != NULL
&& dict_stats_is_persistent_enabled(table)) {
dict_stats_deinit(table);
@@ -612,13 +536,85 @@ dict_table_close(
}
}
}
-#endif /* !UNIV_HOTBACKUP */
+
+/********************************************************************//**
+Closes the only open handle to a table and drops a table while assuring
+that dict_sys->mutex is held the whole time. This assures that the table
+is not evicted after the close when the count of open handles goes to zero.
+Because dict_sys->mutex is held, we do not need to call
+dict_table_prevent_eviction(). */
+void
+dict_table_close_and_drop(
+/*======================*/
+ trx_t* trx, /*!< in: data dictionary transaction */
+ dict_table_t* table) /*!< in/out: table */
+{
+ dberr_t err = DB_SUCCESS;
+
+ ut_ad(mutex_own(&dict_sys->mutex));
+ ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_X));
+ ut_ad(trx->dict_operation != TRX_DICT_OP_NONE);
+ ut_ad(trx_state_eq(trx, TRX_STATE_ACTIVE));
+
+ dict_table_close(table, TRUE, FALSE);
+
+#if defined UNIV_DEBUG || defined UNIV_DDL_DEBUG
+ /* Nobody should have initialized the stats of the newly created
+ table when this is called. So we know that it has not been added
+ for background stats gathering. */
+ ut_a(!table->stat_initialized);
+#endif /* UNIV_DEBUG || UNIV_DDL_DEBUG */
+
+ err = row_merge_drop_table(trx, table);
+
+ if (err != DB_SUCCESS) {
+ ib::error() << "At " << __FILE__ << ":" << __LINE__
+ << " row_merge_drop_table returned error: " << err
+ << " table: " << table->name;
+ }
+}
+
+/** Check if the table has a given (non_virtual) column.
+@param[in] table table object
+@param[in] col_name column name
+@param[in] col_nr column number guessed, 0 as default
+@return column number if the table has the specified column,
+otherwise table->n_def */
+ulint
+dict_table_has_column(
+ const dict_table_t* table,
+ const char* col_name,
+ ulint col_nr)
+{
+ ulint col_max = table->n_def;
+
+ ut_ad(table);
+ ut_ad(col_name);
+ ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
+
+ if (col_nr < col_max
+ && innobase_strcasecmp(
+ col_name, dict_table_get_col_name(table, col_nr)) == 0) {
+ return(col_nr);
+ }
+
+ /** The order of column may changed, check it with other columns */
+ for (ulint i = 0; i < col_max; i++) {
+ if (i != col_nr
+ && innobase_strcasecmp(
+ col_name, dict_table_get_col_name(table, i)) == 0) {
+
+ return(i);
+ }
+ }
+
+ return(col_max);
+}
/**********************************************************************//**
Returns a column's name.
@return column name. NOTE: not guaranteed to stay valid if table is
modified in any way (columns added, etc.). */
-UNIV_INTERN
const char*
dict_table_get_col_name(
/*====================*/
@@ -642,94 +638,169 @@ dict_table_get_col_name(
return(s);
}
-#ifndef UNIV_HOTBACKUP
+/** Returns a virtual column's name.
+@param[in] table target table
+@param[in] col_nr virtual column number (nth virtual column)
+@return column name or NULL if column number out of range. */
+const char*
+dict_table_get_v_col_name(
+ const dict_table_t* table,
+ ulint col_nr)
+{
+ const char* s;
+
+ ut_ad(table);
+ ut_ad(col_nr < table->n_v_def);
+ ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
+
+ if (col_nr >= table->n_v_def) {
+ return(NULL);
+ }
+
+ s = table->v_col_names;
+
+ if (s != NULL) {
+ for (ulint i = 0; i < col_nr; i++) {
+ s += strlen(s) + 1;
+ }
+ }
+
+ return(s);
+}
+
+/** Search virtual column's position in InnoDB according to its position
+in original table's position
+@param[in] table target table
+@param[in] col_nr column number (nth column in the MySQL table)
+@return virtual column's position in InnoDB, ULINT_UNDEFINED if not find */
+static
+ulint
+dict_table_get_v_col_pos_for_mysql(
+ const dict_table_t* table,
+ ulint col_nr)
+{
+ ulint i;
+
+ ut_ad(table);
+ ut_ad(col_nr < static_cast<ulint>(table->n_t_def));
+ ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
+
+ for (i = 0; i < table->n_v_def; i++) {
+ if (col_nr == dict_get_v_col_mysql_pos(
+ table->v_cols[i].m_col.ind)) {
+ break;
+ }
+ }
+
+ if (i == table->n_v_def) {
+ return(ULINT_UNDEFINED);
+ }
+
+ return(i);
+}
+
+/** Returns a virtual column's name according to its original
+MySQL table position.
+@param[in] table target table
+@param[in] col_nr column number (nth column in the table)
+@return column name. */
+static
+const char*
+dict_table_get_v_col_name_mysql(
+ const dict_table_t* table,
+ ulint col_nr)
+{
+ ulint i = dict_table_get_v_col_pos_for_mysql(table, col_nr);
+
+ if (i == ULINT_UNDEFINED) {
+ return(NULL);
+ }
+
+ return(dict_table_get_v_col_name(table, i));
+}
+
+/** Get nth virtual column according to its original MySQL table position
+@param[in] table target table
+@param[in] col_nr column number in MySQL Table definition
+@return dict_v_col_t ptr */
+dict_v_col_t*
+dict_table_get_nth_v_col_mysql(
+ const dict_table_t* table,
+ ulint col_nr)
+{
+ ulint i = dict_table_get_v_col_pos_for_mysql(table, col_nr);
+
+ if (i == ULINT_UNDEFINED) {
+ return(NULL);
+ }
+
+ return(dict_table_get_nth_v_col(table, i));
+}
+
/** Allocate and init the autoinc latch of a given table.
This function must not be called concurrently on the same table object.
@param[in,out] table_void table whose autoinc latch to create */
+static
void
dict_table_autoinc_alloc(
void* table_void)
{
dict_table_t* table = static_cast<dict_table_t*>(table_void);
- table->autoinc_mutex = new (std::nothrow) ib_mutex_t();
+ table->autoinc_mutex = UT_NEW_NOKEY(ib_mutex_t());
ut_a(table->autoinc_mutex != NULL);
- mutex_create(autoinc_mutex_key,
- table->autoinc_mutex, SYNC_DICT_AUTOINC_MUTEX);
+ mutex_create(LATCH_ID_AUTOINC, table->autoinc_mutex);
}
/** Allocate and init the zip_pad_mutex of a given index.
This function must not be called concurrently on the same index object.
@param[in,out] index_void index whose zip_pad_mutex to create */
+static
void
dict_index_zip_pad_alloc(
void* index_void)
{
dict_index_t* index = static_cast<dict_index_t*>(index_void);
- index->zip_pad.mutex = new (std::nothrow) os_fast_mutex_t;
+ index->zip_pad.mutex = UT_NEW_NOKEY(SysMutex());
ut_a(index->zip_pad.mutex != NULL);
- os_fast_mutex_init(zip_pad_mutex_key, index->zip_pad.mutex);
+ mutex_create(LATCH_ID_ZIP_PAD_MUTEX, index->zip_pad.mutex);
}
/********************************************************************//**
Acquire the autoinc lock. */
-UNIV_INTERN
void
dict_table_autoinc_lock(
/*====================*/
dict_table_t* table) /*!< in/out: table */
{
-#ifdef HAVE_ATOMIC_BUILTINS
os_once::do_or_wait_for_done(
&table->autoinc_mutex_created,
dict_table_autoinc_alloc, table);
-#else /* HAVE_ATOMIC_BUILTINS */
- ut_ad(table->autoinc_mutex_created == os_once::DONE);
-#endif /* HAVE_ATOMIC_BUILTINS */
mutex_enter(table->autoinc_mutex);
}
/** Acquire the zip_pad_mutex latch.
@param[in,out] index the index whose zip_pad_mutex to acquire.*/
+static
void
dict_index_zip_pad_lock(
dict_index_t* index)
{
-#ifdef HAVE_ATOMIC_BUILTINS
os_once::do_or_wait_for_done(
&index->zip_pad.mutex_created,
dict_index_zip_pad_alloc, index);
-#else /* HAVE_ATOMIC_BUILTINS */
- ut_ad(index->zip_pad.mutex_created == os_once::DONE);
-#endif /* HAVE_ATOMIC_BUILTINS */
-
- os_fast_mutex_lock(index->zip_pad.mutex);
-}
-
-/********************************************************************//**
-Unconditionally set the autoinc counter. */
-UNIV_INTERN
-void
-dict_table_autoinc_initialize(
-/*==========================*/
- dict_table_t* table, /*!< in/out: table */
- ib_uint64_t value) /*!< in: next value to assign to a row */
-{
- ut_ad(dict_table_autoinc_own(table));
- table->autoinc = value;
+ mutex_enter(index->zip_pad.mutex);
}
-/************************************************************************
-Get all the FTS indexes on a table.
-@return number of FTS indexes */
-UNIV_INTERN
+/** Get all the FTS indexes on a table.
+@param[in] table table
+@param[out] indexes all FTS indexes on this table
+@return number of FTS indexes */
ulint
dict_table_get_all_fts_indexes(
-/*===========================*/
- dict_table_t* table, /*!< in: table */
- ib_vector_t* indexes) /*!< out: all FTS indexes on this
- table */
+ const dict_table_t* table,
+ ib_vector_t* indexes)
{
dict_index_t* index;
@@ -747,82 +818,8 @@ dict_table_get_all_fts_indexes(
return(ib_vector_size(indexes));
}
-/** Store autoinc value when the table is evicted.
-@param[in] table table evicted */
-UNIV_INTERN
-void
-dict_table_autoinc_store(
- const dict_table_t* table)
-{
- ut_ad(mutex_own(&dict_sys->mutex));
-
- if (table->autoinc != 0) {
- ut_ad(dict_sys->autoinc_map->find(table->id)
- == dict_sys->autoinc_map->end());
-
- dict_sys->autoinc_map->insert(
- std::pair<table_id_t, ib_uint64_t>(
- table->id, table->autoinc));
- }
-}
-
-/** Restore autoinc value when the table is loaded.
-@param[in] table table loaded */
-UNIV_INTERN
-void
-dict_table_autoinc_restore(
- dict_table_t* table)
-{
- ut_ad(mutex_own(&dict_sys->mutex));
-
- autoinc_map_t::iterator it;
- it = dict_sys->autoinc_map->find(table->id);
-
- if (it != dict_sys->autoinc_map->end()) {
- table->autoinc = it->second;
- ut_ad(table->autoinc != 0);
-
- dict_sys->autoinc_map->erase(it);
- }
-}
-
-/********************************************************************//**
-Reads the next autoinc value (== autoinc counter value), 0 if not yet
-initialized.
-@return value for a new row, or 0 */
-UNIV_INTERN
-ib_uint64_t
-dict_table_autoinc_read(
-/*====================*/
- const dict_table_t* table) /*!< in: table */
-{
- ut_ad(dict_table_autoinc_own(table));
-
- return(table->autoinc);
-}
-
-/********************************************************************//**
-Updates the autoinc counter if the value supplied is greater than the
-current value. */
-UNIV_INTERN
-void
-dict_table_autoinc_update_if_greater(
-/*=================================*/
-
- dict_table_t* table, /*!< in/out: table */
- ib_uint64_t value) /*!< in: value which was assigned to a row */
-{
- ut_ad(dict_table_autoinc_own(table));
-
- if (value > table->autoinc) {
-
- table->autoinc = value;
- }
-}
-
/********************************************************************//**
Release the autoinc lock. */
-UNIV_INTERN
void
dict_table_autoinc_unlock(
/*======================*/
@@ -830,37 +827,40 @@ dict_table_autoinc_unlock(
{
mutex_exit(table->autoinc_mutex);
}
-#endif /* !UNIV_HOTBACKUP */
-/********************************************************************//**
-Looks for column n in an index.
+/** Looks for column n in an index.
+@param[in] index index
+@param[in] n column number
+@param[in] inc_prefix true=consider column prefixes too
+@param[in] is_virtual true==virtual column
+@param[out] prefix_col_pos col num if prefix
@return position in internal representation of the index;
ULINT_UNDEFINED if not contained */
-UNIV_INTERN
ulint
dict_index_get_nth_col_or_prefix_pos(
-/*=================================*/
- const dict_index_t* index, /*!< in: index */
- ulint n, /*!< in: column number */
- ibool inc_prefix, /*!< in: TRUE=consider
- column prefixes too */
- ulint* prefix_col_pos) /*!< out: col num if prefix */
+ const dict_index_t* index,
+ ulint n,
+ bool inc_prefix,
+ bool is_virtual,
+ ulint* prefix_col_pos)
{
const dict_field_t* field;
const dict_col_t* col;
ulint pos;
ulint n_fields;
- ulint prefixed_pos_dummy;
ut_ad(index);
ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
- if (!prefix_col_pos) {
- prefix_col_pos = &prefixed_pos_dummy;
+ if (prefix_col_pos) {
+ *prefix_col_pos = ULINT_UNDEFINED;
}
- *prefix_col_pos = ULINT_UNDEFINED;
- col = dict_table_get_nth_col(index->table, n);
+ if (is_virtual) {
+ col = &(dict_table_get_nth_v_col(index->table, n)->m_col);
+ } else {
+ col = dict_table_get_nth_col(index->table, n);
+ }
if (dict_index_is_clust(index)) {
@@ -873,7 +873,9 @@ dict_index_get_nth_col_or_prefix_pos(
field = dict_index_get_nth_field(index, pos);
if (col == field->col) {
- *prefix_col_pos = pos;
+ if (prefix_col_pos) {
+ *prefix_col_pos = pos;
+ }
if (inc_prefix || field->prefix_len == 0) {
return(pos);
}
@@ -883,16 +885,16 @@ dict_index_get_nth_col_or_prefix_pos(
return(ULINT_UNDEFINED);
}
-#ifndef UNIV_HOTBACKUP
-/********************************************************************//**
-Returns TRUE if the index contains a column or a prefix of that column.
-@return TRUE if contains the column or its prefix */
-UNIV_INTERN
+/** Returns TRUE if the index contains a column or a prefix of that column.
+@param[in] index index
+@param[in] n column number
+@param[in] is_virtual whether it is a virtual col
+@return TRUE if contains the column or its prefix */
ibool
dict_index_contains_col_or_prefix(
-/*==============================*/
- const dict_index_t* index, /*!< in: index */
- ulint n) /*!< in: column number */
+ const dict_index_t* index,
+ ulint n,
+ bool is_virtual)
{
const dict_field_t* field;
const dict_col_t* col;
@@ -903,11 +905,14 @@ dict_index_contains_col_or_prefix(
ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
if (dict_index_is_clust(index)) {
-
- return(TRUE);
+ return(!is_virtual);
}
- col = dict_table_get_nth_col(index->table, n);
+ if (is_virtual) {
+ col = &dict_table_get_nth_v_col(index->table, n)->m_col;
+ } else {
+ col = dict_table_get_nth_col(index->table, n);
+ }
n_fields = dict_index_get_n_fields(index);
@@ -930,7 +935,6 @@ column in index2. That is, we must be able to construct the prefix in index2
from the prefix in index.
@return position in internal representation of the index;
ULINT_UNDEFINED if not contained */
-UNIV_INTERN
ulint
dict_index_get_nth_field_pos(
/*=========================*/
@@ -949,9 +953,22 @@ dict_index_get_nth_field_pos(
n_fields = dict_index_get_n_fields(index);
+ /* Are we looking for a MBR (Minimum Bound Box) field of
+ a spatial index */
+ bool is_mbr_fld = (n == 0 && dict_index_is_spatial(index2));
+
for (pos = 0; pos < n_fields; pos++) {
field = dict_index_get_nth_field(index, pos);
+ /* The first field of a spatial index is a transformed
+ MBR (Minimum Bound Box) field made out of original column,
+ so its field->col still points to original cluster index
+ col, but the actual content is different. So we cannot
+ consider them equal if neither of them is MBR field */
+ if (pos == 0 && dict_index_is_spatial(index) && !is_mbr_fld) {
+ continue;
+ }
+
if (field->col == field2->col
&& (field->prefix_len == 0
|| (field->prefix_len >= field2->prefix_len
@@ -966,8 +983,7 @@ dict_index_get_nth_field_pos(
/**********************************************************************//**
Returns a table object based on table id.
-@return table, NULL if does not exist */
-UNIV_INTERN
+@return table, NULL if does not exist */
dict_table_t*
dict_table_open_on_id(
/*==================*/
@@ -987,7 +1003,7 @@ dict_table_open_on_id(
table_id,
table_op == DICT_TABLE_OP_LOAD_TABLESPACE
? DICT_ERR_IGNORE_RECOVER_LOCK
- : DICT_ERR_IGNORE_NONE,
+ : DICT_ERR_IGNORE_FK_NOKEY,
table_op == DICT_TABLE_OP_OPEN_ONLY_IF_CACHED);
if (table != NULL) {
@@ -996,7 +1012,7 @@ dict_table_open_on_id(
dict_move_to_mru(table);
}
- ++table->n_ref_count;
+ table->acquire();
MONITOR_INC(MONITOR_TABLE_REFERENCE);
}
@@ -1011,23 +1027,22 @@ dict_table_open_on_id(
/********************************************************************//**
Looks for column n position in the clustered index.
-@return position in internal representation of the clustered index */
-UNIV_INTERN
+@return position in internal representation of the clustered index */
ulint
dict_table_get_nth_col_pos(
/*=======================*/
const dict_table_t* table, /*!< in: table */
- ulint n) /*!< in: column number */
+ ulint n, /*!< in: column number */
+ ulint* prefix_col_pos)
{
return(dict_index_get_nth_col_pos(dict_table_get_first_index(table),
- n, NULL));
+ n, prefix_col_pos));
}
/********************************************************************//**
Checks if a column is in the ordering columns of the clustered index of a
table. Column prefixes are treated like whole columns.
-@return TRUE if the column, or its prefix, is in the clustered key */
-UNIV_INTERN
+@return TRUE if the column, or its prefix, is in the clustered key */
ibool
dict_table_col_in_clustered_key(
/*============================*/
@@ -1060,38 +1075,38 @@ dict_table_col_in_clustered_key(
/**********************************************************************//**
Inits the data dictionary module. */
-UNIV_INTERN
void
dict_init(void)
/*===========*/
{
- dict_sys = static_cast<dict_sys_t*>(mem_zalloc(sizeof(*dict_sys)));
+ dict_sys = static_cast<dict_sys_t*>(ut_zalloc_nokey(sizeof(*dict_sys)));
+
+ UT_LIST_INIT(dict_sys->table_LRU, &dict_table_t::table_LRU);
+ UT_LIST_INIT(dict_sys->table_non_LRU, &dict_table_t::table_LRU);
+
+ mutex_create(LATCH_ID_DICT_SYS, &dict_sys->mutex);
- mutex_create(dict_sys_mutex_key, &dict_sys->mutex, SYNC_DICT);
+ dict_sys->table_hash = hash_create(
+ buf_pool_get_curr_size()
+ / (DICT_POOL_PER_TABLE_HASH * UNIV_WORD_SIZE));
+
+ dict_sys->table_id_hash = hash_create(
+ buf_pool_get_curr_size()
+ / (DICT_POOL_PER_TABLE_HASH * UNIV_WORD_SIZE));
- dict_sys->table_hash = hash_create(buf_pool_get_curr_size()
- / (DICT_POOL_PER_TABLE_HASH
- * UNIV_WORD_SIZE));
- dict_sys->table_id_hash = hash_create(buf_pool_get_curr_size()
- / (DICT_POOL_PER_TABLE_HASH
- * UNIV_WORD_SIZE));
rw_lock_create(dict_operation_lock_key,
&dict_operation_lock, SYNC_DICT_OPERATION);
if (!srv_read_only_mode) {
dict_foreign_err_file = os_file_create_tmpfile(NULL);
ut_a(dict_foreign_err_file);
-
- mutex_create(dict_foreign_err_mutex_key,
- &dict_foreign_err_mutex, SYNC_NO_ORDER_CHECK);
}
- dict_sys->autoinc_map = new autoinc_map_t();
+ mutex_create(LATCH_ID_DICT_FOREIGN_ERR, &dict_foreign_err_mutex);
}
/**********************************************************************//**
Move to the most recently used segment of the LRU list. */
-UNIV_INTERN
void
dict_move_to_mru(
/*=============*/
@@ -1103,9 +1118,9 @@ dict_move_to_mru(
ut_a(table->can_be_evicted);
- UT_LIST_REMOVE(table_LRU, dict_sys->table_LRU, table);
+ UT_LIST_REMOVE(dict_sys->table_LRU, table);
- UT_LIST_ADD_FIRST(table_LRU, dict_sys->table_LRU, table);
+ UT_LIST_ADD_FIRST(dict_sys->table_LRU, table);
ut_ad(dict_lru_validate());
}
@@ -1115,8 +1130,7 @@ Returns a table object and increment its open handle count.
NOTE! This is a high-level function to be used mainly from outside the
'dict' module. Inside this directory dict_table_get_low
is usually the appropriate function.
-@return table, NULL if does not exist */
-UNIV_INTERN
+@return table, NULL if does not exist */
dict_table_t*
dict_table_open_on_name(
/*====================*/
@@ -1130,9 +1144,11 @@ dict_table_open_on_name(
loading a table definition */
{
dict_table_t* table;
+ DBUG_ENTER("dict_table_open_on_name");
+ DBUG_PRINT("dict_table_open_on_name", ("table: '%s'", table_name));
if (!dict_locked) {
- mutex_enter(&(dict_sys->mutex));
+ mutex_enter(&dict_sys->mutex);
}
ut_ad(table_name);
@@ -1141,7 +1157,7 @@ dict_table_open_on_name(
table = dict_table_check_if_in_cache_low(table_name);
if (table == NULL) {
- table = dict_load_table(table_name, TRUE, ignore_err);
+ table = dict_load_table(table_name, ignore_err);
}
ut_ad(!table || table->cached);
@@ -1149,48 +1165,41 @@ dict_table_open_on_name(
if (table != NULL) {
/* If table is encrypted or corrupted */
- if (ignore_err == DICT_ERR_IGNORE_NONE
+ if (!(ignore_err & ~DICT_ERR_IGNORE_FK_NOKEY)
&& !table->is_readable()) {
/* Make life easy for drop table. */
- if (table->can_be_evicted) {
- dict_table_move_from_lru_to_non_lru(table);
- }
+ dict_table_prevent_eviction(table);
if (table->corrupted) {
+ ib::error() << "Table " << table->name
+ << " is corrupted. Please "
+ "drop the table and recreate.";
if (!dict_locked) {
mutex_exit(&dict_sys->mutex);
}
- char buf[MAX_FULL_NAME_LEN];
- ut_format_name(table->name, TRUE, buf, sizeof(buf));
-
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Table %s is corrupted. Please "
- "drop the table and recreate.",
- buf);
-
- return(NULL);
+ DBUG_RETURN(NULL);
}
if (table->can_be_evicted) {
dict_move_to_mru(table);
}
- ++table->n_ref_count;
+ table->acquire();
if (!dict_locked) {
mutex_exit(&dict_sys->mutex);
}
- return (table);
+ DBUG_RETURN(table);
}
if (table->can_be_evicted) {
dict_move_to_mru(table);
}
- ++table->n_ref_count;
+ table->acquire();
MONITOR_INC(MONITOR_TABLE_REFERENCE);
}
@@ -1201,13 +1210,11 @@ dict_table_open_on_name(
dict_table_try_drop_aborted_and_mutex_exit(table, try_drop);
}
- return(table);
+ DBUG_RETURN(table);
}
-#endif /* !UNIV_HOTBACKUP */
/**********************************************************************//**
Adds system columns to a table object. */
-UNIV_INTERN
void
dict_table_add_system_columns(
/*==========================*/
@@ -1221,12 +1228,13 @@ dict_table_add_system_columns(
/* NOTE: the system columns MUST be added in the following order
(so that they can be indexed by the numerical value of DATA_ROW_ID,
etc.) and as the last columns of the table memory object.
- The clustered index will not always physically contain all
- system columns. */
+ The clustered index will not always physically contain all system
+ columns. */
dict_mem_table_add_col(table, heap, "DB_ROW_ID", DATA_SYS,
DATA_ROW_ID | DATA_NOT_NULL,
DATA_ROW_ID_LEN);
+
#if DATA_ROW_ID != 0
#error "DATA_ROW_ID != 0"
#endif
@@ -1236,6 +1244,7 @@ dict_table_add_system_columns(
#if DATA_TRX_ID != 1
#error "DATA_TRX_ID != 1"
#endif
+
dict_mem_table_add_col(table, heap, "DB_ROLL_PTR", DATA_SYS,
DATA_ROLL_PTR | DATA_NOT_NULL,
DATA_ROLL_PTR_LEN);
@@ -1250,60 +1259,34 @@ dict_table_add_system_columns(
#endif
}
-#ifndef UNIV_HOTBACKUP
/**********************************************************************//**
Adds a table object to the dictionary cache. */
-UNIV_INTERN
void
dict_table_add_to_cache(
/*====================*/
dict_table_t* table, /*!< in: table */
- ibool can_be_evicted, /*!< in: TRUE if can be evicted */
+ bool can_be_evicted, /*!< in: whether can be evicted */
mem_heap_t* heap) /*!< in: temporary heap */
{
ulint fold;
ulint id_fold;
- ulint i;
- ulint row_len;
ut_ad(dict_lru_validate());
-
- /* The lower limit for what we consider a "big" row */
-#define BIG_ROW_SIZE 1024
-
- ut_ad(mutex_own(&(dict_sys->mutex)));
+ ut_ad(mutex_own(&dict_sys->mutex));
dict_table_add_system_columns(table, heap);
table->cached = TRUE;
- fold = ut_fold_string(table->name);
+ fold = ut_fold_string(table->name.m_name);
id_fold = ut_fold_ull(table->id);
- row_len = 0;
- for (i = 0; i < table->n_def; i++) {
- ulint col_len = dict_col_get_max_size(
- dict_table_get_nth_col(table, i));
-
- row_len += col_len;
-
- /* If we have a single unbounded field, or several gigantic
- fields, mark the maximum row size as BIG_ROW_SIZE. */
- if (row_len >= BIG_ROW_SIZE || col_len >= BIG_ROW_SIZE) {
- row_len = BIG_ROW_SIZE;
-
- break;
- }
- }
-
- table->big_rows = row_len >= BIG_ROW_SIZE;
-
/* Look for a table with the same name: error if such exists */
{
dict_table_t* table2;
HASH_SEARCH(name_hash, dict_sys->table_hash, fold,
dict_table_t*, table2, ut_ad(table2->cached),
- ut_strcmp(table2->name, table->name) == 0);
+ !strcmp(table2->name.m_name, table->name.m_name));
ut_a(table2 == NULL);
#ifdef UNIV_DEBUG
@@ -1343,13 +1326,11 @@ dict_table_add_to_cache(
table->can_be_evicted = can_be_evicted;
if (table->can_be_evicted) {
- UT_LIST_ADD_FIRST(table_LRU, dict_sys->table_LRU, table);
+ UT_LIST_ADD_FIRST(dict_sys->table_LRU, table);
} else {
- UT_LIST_ADD_FIRST(table_LRU, dict_sys->table_non_LRU, table);
+ UT_LIST_ADD_FIRST(dict_sys->table_non_LRU, table);
}
- dict_table_autoinc_restore(table);
-
ut_ad(dict_lru_validate());
}
@@ -1360,20 +1341,16 @@ static
ibool
dict_table_can_be_evicted(
/*======================*/
- const dict_table_t* table) /*!< in: table to test */
+ dict_table_t* table) /*!< in: table to test */
{
ut_ad(mutex_own(&dict_sys->mutex));
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
+ ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_X));
ut_a(table->can_be_evicted);
ut_a(table->foreign_set.empty());
ut_a(table->referenced_set.empty());
- if (table->n_ref_count == 0) {
- dict_index_t* index;
-
+ if (table->get_ref_count() == 0) {
/* The transaction commit and rollback are called from
outside the handler interface. This means that there is
a window where the table->n_ref_count can be zero but
@@ -1383,7 +1360,8 @@ dict_table_can_be_evicted(
return(FALSE);
}
- for (index = dict_table_get_first_index(table);
+#ifdef BTR_CUR_HASH_ADAPT
+ for (dict_index_t* index = dict_table_get_first_index(table);
index != NULL;
index = dict_table_get_next_index(index)) {
@@ -1401,10 +1379,11 @@ dict_table_can_be_evicted(
See also: dict_index_remove_from_cache_low() */
- if (btr_search_info_get_ref_count(info) > 0) {
+ if (btr_search_info_get_ref_count(info, index) > 0) {
return(FALSE);
}
}
+#endif /* BTR_CUR_HASH_ADAPT */
return(TRUE);
}
@@ -1418,7 +1397,6 @@ should not be part of FK relationship and currently not used in any user
transaction. There is no guarantee that it will remove a table.
@return number of tables evicted. If the number of tables in the dict_LRU
is less than max_tables it will not do anything. */
-UNIV_INTERN
ulint
dict_make_room_in_cache(
/*====================*/
@@ -1434,9 +1412,7 @@ dict_make_room_in_cache(
ut_a(pct_check > 0);
ut_a(pct_check <= 100);
ut_ad(mutex_own(&dict_sys->mutex));
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
+ ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_X));
ut_ad(dict_lru_validate());
i = len = UT_LIST_GET_LEN(dict_sys->table_LRU);
@@ -1464,7 +1440,7 @@ dict_make_room_in_cache(
prev_table = UT_LIST_GET_PREV(table_LRU, table);
if (dict_table_can_be_evicted(table)) {
-
+ ut_ad(!table->fts);
dict_table_remove_from_cache_low(table, TRUE);
++n_evicted;
@@ -1478,7 +1454,6 @@ dict_make_room_in_cache(
/**********************************************************************//**
Move a table to the non-LRU list from the LRU list. */
-UNIV_INTERN
void
dict_table_move_from_lru_to_non_lru(
/*================================*/
@@ -1489,42 +1464,21 @@ dict_table_move_from_lru_to_non_lru(
ut_a(table->can_be_evicted);
- UT_LIST_REMOVE(table_LRU, dict_sys->table_LRU, table);
+ UT_LIST_REMOVE(dict_sys->table_LRU, table);
- UT_LIST_ADD_LAST(table_LRU, dict_sys->table_non_LRU, table);
+ UT_LIST_ADD_LAST(dict_sys->table_non_LRU, table);
table->can_be_evicted = FALSE;
}
-/**********************************************************************//**
-Move a table to the LRU list from the non-LRU list. */
-UNIV_INTERN
-void
-dict_table_move_from_non_lru_to_lru(
-/*================================*/
- dict_table_t* table) /*!< in: table to move from non-LRU to LRU */
-{
- ut_ad(mutex_own(&dict_sys->mutex));
- ut_ad(dict_non_lru_find_table(table));
-
- ut_a(!table->can_be_evicted);
-
- UT_LIST_REMOVE(table_LRU, dict_sys->table_non_LRU, table);
-
- UT_LIST_ADD_LAST(table_LRU, dict_sys->table_LRU, table);
-
- table->can_be_evicted = TRUE;
-}
-
-/**********************************************************************//**
-Looks for an index with the given id given a table instance.
-@return index or NULL */
-UNIV_INTERN
+/** Looks for an index with the given id given a table instance.
+@param[in] table table instance
+@param[in] id index id
+@return index or NULL */
dict_index_t*
dict_table_find_index_on_id(
-/*========================*/
- const dict_table_t* table, /*!< in: table instance */
- index_id_t id) /*!< in: index id */
+ const dict_table_t* table,
+ index_id_t id)
{
dict_index_t* index;
@@ -1546,8 +1500,7 @@ dict_table_find_index_on_id(
Looks for an index with the given id. NOTE that we do not reserve
the dictionary mutex: this function is for emergency purposes like
printing info of a corrupt database page!
-@return index or NULL if not found in cache */
-UNIV_INTERN
+@return index or NULL if not found in cache */
dict_index_t*
dict_index_find_on_id_low(
/*======================*/
@@ -1603,16 +1556,20 @@ struct dict_foreign_remove_partial
/**********************************************************************//**
Renames a table object.
-@return TRUE if success */
-UNIV_INTERN
+@return TRUE if success */
dberr_t
dict_table_rename_in_cache(
/*=======================*/
dict_table_t* table, /*!< in/out: table */
const char* new_name, /*!< in: new name */
- ibool rename_also_foreigns)/*!< in: in ALTER TABLE we want
+ bool rename_also_foreigns,
+ /*!< in: in ALTER TABLE we want
to preserve the original table name
in constraints which reference it */
+ bool replace_new_file)
+ /*!< in: whether to replace the
+ file with the new name
+ (as part of rolling back TRUNCATE) */
{
dberr_t err;
dict_foreign_t* foreign;
@@ -1620,13 +1577,12 @@ dict_table_rename_in_cache(
ulint fold;
char old_name[MAX_FULL_NAME_LEN + 1];
os_file_type_t ftype;
- ibool exists;
- ut_ad(mutex_own(&(dict_sys->mutex)));
+ ut_ad(mutex_own(&dict_sys->mutex));
/* store the old/current name to an automatic variable */
- ut_a(strlen(table->name) < sizeof old_name);
- strcpy(old_name, table->name);
+ ut_a(strlen(table->name.m_name) < sizeof old_name);
+ strcpy(old_name, table->name.m_name);
fold = ut_fold_string(new_name);
@@ -1634,16 +1590,15 @@ dict_table_rename_in_cache(
dict_table_t* table2;
HASH_SEARCH(name_hash, dict_sys->table_hash, fold,
dict_table_t*, table2, ut_ad(table2->cached),
- (ut_strcmp(table2->name, new_name) == 0));
+ (ut_strcmp(table2->name.m_name, new_name) == 0));
DBUG_EXECUTE_IF("dict_table_rename_in_cache_failure",
if (table2 == NULL) {
table2 = (dict_table_t*) -1;
} );
if (table2) {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Cannot rename table '%s' to '%s' since the "
- "dictionary cache already contains '%s'.",
- old_name, new_name, new_name);
+ ib::error() << "Cannot rename table '" << old_name
+ << "' to '" << new_name << "' since the"
+ " dictionary cache already contains '" << new_name << "'.";
return(DB_ERROR);
}
@@ -1651,87 +1606,88 @@ dict_table_rename_in_cache(
.ibd file and rebuild the .isl file if needed. */
if (dict_table_is_discarded(table)) {
+ bool exists;
char* filepath;
- ut_ad(table->space != TRX_SYS_SPACE);
+ ut_ad(dict_table_is_file_per_table(table));
+ ut_ad(!dict_table_is_temporary(table));
- if (DICT_TF_HAS_DATA_DIR(table->flags)) {
+ /* Make sure the data_dir_path is set. */
+ dict_get_and_save_data_dir_path(table, true);
- dict_get_and_save_data_dir_path(table, true);
+ if (DICT_TF_HAS_DATA_DIR(table->flags)) {
ut_a(table->data_dir_path);
- filepath = os_file_make_remote_pathname(
- table->data_dir_path, table->name, "ibd");
+ filepath = fil_make_filepath(
+ table->data_dir_path, table->name.m_name,
+ IBD, true);
} else {
- filepath = fil_make_ibd_name(table->name, false);
+ filepath = fil_make_filepath(
+ NULL, table->name.m_name, IBD, false);
+ }
+
+ if (filepath == NULL) {
+ return(DB_OUT_OF_MEMORY);
}
- fil_delete_tablespace(table->space);
+ fil_delete_tablespace(table->space,
+ dict_table_is_discarded(table));
/* Delete any temp file hanging around. */
if (os_file_status(filepath, &exists, &ftype)
&& exists
- && !os_file_delete_if_exists(innodb_file_temp_key,
- filepath)) {
-
- ib_logf(IB_LOG_LEVEL_INFO,
- "Delete of %s failed.", filepath);
- }
-
- mem_free(filepath);
-
- } else if (table->space != TRX_SYS_SPACE) {
- if (DICT_TF2_FLAG_IS_SET(table, DICT_TF2_TEMPORARY)) {
- ut_print_timestamp(stderr);
- fputs(" InnoDB: Error: trying to rename a"
- " TEMPORARY TABLE ", stderr);
- ut_print_name(stderr, NULL, TRUE, old_name);
- if (table->dir_path_of_temp_table != NULL) {
- fputs(" (", stderr);
- ut_print_filename(
- stderr, table->dir_path_of_temp_table);
- fputs(" )\n", stderr);
- }
+ && !os_file_delete_if_exists(innodb_temp_file_key,
+ filepath, NULL)) {
- return(DB_ERROR);
+ ib::info() << "Delete of " << filepath << " failed.";
}
+ ut_free(filepath);
+ } else if (dict_table_is_file_per_table(table)) {
char* new_path = NULL;
char* old_path = fil_space_get_first_path(table->space);
+ ut_ad(!dict_table_is_temporary(table));
+
if (DICT_TF_HAS_DATA_DIR(table->flags)) {
new_path = os_file_make_new_pathname(
old_path, new_name);
+ err = RemoteDatafile::create_link_file(
+ new_name, new_path);
- err = fil_create_link_file(new_name, new_path);
if (err != DB_SUCCESS) {
- mem_free(new_path);
- mem_free(old_path);
+ ut_free(new_path);
+ ut_free(old_path);
return(DB_TABLESPACE_EXISTS);
}
} else {
- new_path = fil_make_ibd_name(new_name, false);
+ new_path = fil_make_filepath(
+ NULL, new_name, IBD, false);
}
/* New filepath must not exist. */
err = fil_rename_tablespace_check(
- table->space, old_path, new_path, false);
+ table->space, old_path, new_path, false,
+ replace_new_file);
if (err != DB_SUCCESS) {
- mem_free(old_path);
- mem_free(new_path);
+ ut_free(old_path);
+ ut_free(new_path);
return(err);
}
- ibool success = fil_rename_tablespace(
- old_name, table->space, new_name, new_path);
+ fil_name_write_rename(table->space, old_path, new_path);
+
+ bool success = fil_rename_tablespace(
+ table->space, old_path, new_name, new_path);
- mem_free(old_path);
- mem_free(new_path);
+ ut_free(old_path);
+ ut_free(new_path);
/* If the tablespace is remote, a new .isl file was created
- If success, delete the old one. If not, delete the new one. */
+ If success, delete the old one. If not, delete the new one. */
if (DICT_TF_HAS_DATA_DIR(table->flags)) {
- fil_delete_link_file(success ? old_name : new_name);
+ RemoteDatafile::delete_link_file(
+ success ? old_name : new_name);
}
if (!success) {
@@ -1743,16 +1699,16 @@ dict_table_rename_in_cache(
HASH_DELETE(dict_table_t, name_hash, dict_sys->table_hash,
ut_fold_string(old_name), table);
- if (strlen(new_name) > strlen(table->name)) {
+ if (strlen(new_name) > strlen(table->name.m_name)) {
/* We allocate MAX_FULL_NAME_LEN + 1 bytes here to avoid
memory fragmentation, we assume a repeated calls of
ut_realloc() with the same size do not cause fragmentation */
ut_a(strlen(new_name) <= MAX_FULL_NAME_LEN);
- table->name = static_cast<char*>(
- ut_realloc(table->name, MAX_FULL_NAME_LEN + 1));
+ table->name.m_name = static_cast<char*>(
+ ut_realloc(table->name.m_name, MAX_FULL_NAME_LEN + 1));
}
- memcpy(table->name, new_name, strlen(new_name) + 1);
+ strcpy(table->name.m_name, new_name);
/* Add table to hash table of tables */
HASH_INSERT(dict_table_t, name_hash, dict_sys->table_hash, fold,
@@ -1763,7 +1719,7 @@ dict_table_rename_in_cache(
index != NULL;
index = dict_table_get_next_index(index)) {
- index->table_name = table->name;
+ index->table_name = table->name.m_name;
}
if (!rename_also_foreigns) {
@@ -1821,15 +1777,16 @@ dict_table_rename_in_cache(
}
if (ut_strlen(foreign->foreign_table_name)
- < ut_strlen(table->name)) {
+ < ut_strlen(table->name.m_name)) {
/* Allocate a longer name buffer;
TODO: store buf len to save memory */
foreign->foreign_table_name = mem_heap_strdup(
- foreign->heap, table->name);
+ foreign->heap, table->name.m_name);
dict_mem_foreign_table_name_lookup_set(foreign, TRUE);
} else {
- strcpy(foreign->foreign_table_name, table->name);
+ strcpy(foreign->foreign_table_name,
+ table->name.m_name);
dict_mem_foreign_table_name_lookup_set(foreign, FALSE);
}
if (strchr(foreign->id, '/')) {
@@ -1906,21 +1863,22 @@ dict_table_rename_in_cache(
char table_name[MAX_TABLE_NAME_LEN + 1];
uint errors = 0;
- if (strlen(table->name) > strlen(old_name)) {
+ if (strlen(table->name.m_name)
+ > strlen(old_name)) {
foreign->id = static_cast<char*>(
mem_heap_alloc(
foreign->heap,
- strlen(table->name)
+ strlen(table->name.m_name)
+ strlen(old_id) + 1));
}
/* Convert the table name to UTF-8 */
- strncpy(table_name, table->name,
+ strncpy(table_name, table->name.m_name,
MAX_TABLE_NAME_LEN);
table_name[MAX_TABLE_NAME_LEN] = '\0';
innobase_convert_to_system_charset(
strchr(table_name, '/') + 1,
- strchr(table->name, '/') + 1,
+ strchr(table->name.m_name, '/') + 1,
MAX_TABLE_NAME_LEN, &errors);
if (errors) {
@@ -1928,7 +1886,7 @@ dict_table_rename_in_cache(
from charset my_charset_filename to
UTF-8. This means that the table name
is already in UTF-8 (#mysql50#). */
- strncpy(table_name, table->name,
+ strncpy(table_name, table->name.m_name,
MAX_TABLE_NAME_LEN);
table_name[MAX_TABLE_NAME_LEN] = '\0';
}
@@ -1949,9 +1907,10 @@ dict_table_rename_in_cache(
} else {
/* This is a >= 4.0.18 format id where the user
gave the id name */
- db_len = dict_get_db_name_len(table->name) + 1;
+ db_len = dict_get_db_name_len(
+ table->name.m_name) + 1;
- if (dict_get_db_name_len(table->name)
+ if (db_len - 1
> dict_get_db_name_len(foreign->id)) {
foreign->id = static_cast<char*>(
@@ -1963,13 +1922,14 @@ dict_table_rename_in_cache(
/* Replace the database prefix in id with the
one from table->name */
- ut_memcpy(foreign->id, table->name, db_len);
+ ut_memcpy(foreign->id,
+ table->name.m_name, db_len);
strcpy(foreign->id + db_len,
dict_remove_db_name(old_id));
}
- mem_free(old_id);
+ ut_free(old_id);
}
table->foreign_set.erase(it);
@@ -1990,18 +1950,19 @@ dict_table_rename_in_cache(
foreign = *it;
if (ut_strlen(foreign->referenced_table_name)
- < ut_strlen(table->name)) {
+ < ut_strlen(table->name.m_name)) {
/* Allocate a longer name buffer;
TODO: store buf len to save memory */
foreign->referenced_table_name = mem_heap_strdup(
- foreign->heap, table->name);
+ foreign->heap, table->name.m_name);
dict_mem_referenced_table_name_lookup_set(
foreign, TRUE);
} else {
/* Use the same buffer */
- strcpy(foreign->referenced_table_name, table->name);
+ strcpy(foreign->referenced_table_name,
+ table->name.m_name);
dict_mem_referenced_table_name_lookup_set(
foreign, FALSE);
@@ -2014,7 +1975,6 @@ dict_table_rename_in_cache(
/**********************************************************************//**
Change the id of a table object in the dictionary cache. This is used in
DISCARD TABLESPACE. */
-UNIV_INTERN
void
dict_table_change_id_in_cache(
/*==========================*/
@@ -2048,9 +2008,9 @@ dict_table_remove_from_cache_low(
dict_index_t* index;
ut_ad(dict_lru_validate());
- ut_a(table->n_ref_count == 0);
+ ut_a(table->get_ref_count() == 0);
ut_a(table->n_rec_locks == 0);
- ut_ad(mutex_own(&(dict_sys->mutex)));
+ ut_ad(mutex_own(&dict_sys->mutex));
ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
/* Remove the foreign constraints from the cache */
@@ -2080,7 +2040,7 @@ dict_table_remove_from_cache_low(
/* Remove table from the hash tables of tables */
HASH_DELETE(dict_table_t, name_hash, dict_sys->table_hash,
- ut_fold_string(table->name), table);
+ ut_fold_string(table->name.m_name), table);
HASH_DELETE(dict_table_t, id_hash, dict_sys->table_id_hash,
ut_fold_ull(table->id), table);
@@ -2088,18 +2048,14 @@ dict_table_remove_from_cache_low(
/* Remove table from LRU or non-LRU list. */
if (table->can_be_evicted) {
ut_ad(dict_lru_find_table(table));
- UT_LIST_REMOVE(table_LRU, dict_sys->table_LRU, table);
+ UT_LIST_REMOVE(dict_sys->table_LRU, table);
} else {
ut_ad(dict_non_lru_find_table(table));
- UT_LIST_REMOVE(table_LRU, dict_sys->table_non_LRU, table);
+ UT_LIST_REMOVE(dict_sys->table_non_LRU, table);
}
ut_ad(dict_lru_validate());
- if (lru_evict) {
- dict_table_autoinc_store(table);
- }
-
if (lru_evict && table->drop_aborted) {
/* When evicting the table definition,
drop the orphan indexes from the data dictionary
@@ -2107,9 +2063,8 @@ dict_table_remove_from_cache_low(
trx_t* trx = trx_allocate_for_background();
ut_ad(mutex_own(&dict_sys->mutex));
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
+ ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_X));
+
/* Mimic row_mysql_lock_data_dictionary(). */
trx->dict_operation_lock_mode = RW_X_LATCH;
@@ -2120,12 +2075,17 @@ dict_table_remove_from_cache_low(
trx_free_for_background(trx);
}
+ /* Free virtual column template if any */
+ if (table->vc_templ != NULL) {
+ dict_free_vc_templ(table->vc_templ);
+ UT_DELETE(table->vc_templ);
+ }
+
dict_mem_table_free(table);
}
/**********************************************************************//**
Removes a table object from the dictionary cache. */
-UNIV_INTERN
void
dict_table_remove_from_cache(
/*=========================*/
@@ -2137,8 +2097,7 @@ dict_table_remove_from_cache(
/****************************************************************//**
If the given column name is reserved for InnoDB system columns, return
TRUE.
-@return TRUE if name is reserved */
-UNIV_INTERN
+@return TRUE if name is reserved */
ibool
dict_col_name_is_reserved(
/*======================*/
@@ -2166,326 +2125,58 @@ dict_col_name_is_reserved(
return(FALSE);
}
-#if 1 /* This function is not very accurate at determining
- whether an UNDO record will be too big. See innodb_4k.test,
- Bug 13336585, for a testcase that shows an index that can
- be created but cannot be updated. */
-
-/****************************************************************//**
-If an undo log record for this table might not fit on a single page,
-return TRUE.
-@return TRUE if the undo log record could become too big */
-static
-ibool
-dict_index_too_big_for_undo(
-/*========================*/
- const dict_table_t* table, /*!< in: table */
- const dict_index_t* new_index) /*!< in: index */
-{
- /* Make sure that all column prefixes will fit in the undo log record
- in trx_undo_page_report_modify() right after trx_undo_page_init(). */
-
- ulint i;
- const dict_index_t* clust_index
- = dict_table_get_first_index(table);
- ulint undo_page_len
- = TRX_UNDO_PAGE_HDR - TRX_UNDO_PAGE_HDR_SIZE
- + 2 /* next record pointer */
- + 1 /* type_cmpl */
- + 11 /* trx->undo_no */ + 11 /* table->id */
- + 1 /* rec_get_info_bits() */
- + 11 /* DB_TRX_ID */
- + 11 /* DB_ROLL_PTR */
- + 10 + FIL_PAGE_DATA_END /* trx_undo_left() */
- + 2/* pointer to previous undo log record */;
-
- /* FTS index consists of auxiliary tables, they shall be excluded from
- index row size check */
- if (new_index->type & DICT_FTS) {
- return(false);
- }
-
- if (!clust_index) {
- ut_a(dict_index_is_clust(new_index));
- clust_index = new_index;
+/** Clears the virtual column's index list before index is
+being freed.
+@param[in] index Index being freed */
+void
+dict_index_remove_from_v_col_list(dict_index_t* index) {
+ /* Index is not completely formed */
+ if (!index->cached) {
+ return;
}
-
- /* Add the size of the ordering columns in the
- clustered index. */
- for (i = 0; i < clust_index->n_uniq; i++) {
- const dict_col_t* col
- = dict_index_get_nth_col(clust_index, i);
-
- /* Use the maximum output size of
- mach_write_compressed(), although the encoded
- length should always fit in 2 bytes. */
- undo_page_len += 5 + dict_col_get_max_size(col);
- }
-
- /* Add the old values of the columns to be updated.
- First, the amount and the numbers of the columns.
- These are written by mach_write_compressed() whose
- maximum output length is 5 bytes. However, given that
- the quantities are below REC_MAX_N_FIELDS (10 bits),
- the maximum length is 2 bytes per item. */
- undo_page_len += 2 * (dict_table_get_n_cols(table) + 1);
-
- for (i = 0; i < clust_index->n_def; i++) {
- const dict_col_t* col
- = dict_index_get_nth_col(clust_index, i);
- ulint max_size
- = dict_col_get_max_size(col);
- ulint fixed_size
- = dict_col_get_fixed_size(col,
- dict_table_is_comp(table));
- ulint max_prefix
- = col->max_prefix;
-
- if (fixed_size) {
- /* Fixed-size columns are stored locally. */
- max_size = fixed_size;
- } else if (max_size <= BTR_EXTERN_FIELD_REF_SIZE * 2) {
- /* Short columns are stored locally. */
- } else if (!col->ord_part
- || (col->max_prefix
- < (ulint) DICT_MAX_FIELD_LEN_BY_FORMAT(table))) {
- /* See if col->ord_part would be set
- because of new_index. Also check if the new
- index could have longer prefix on columns
- that already had ord_part set */
- ulint j;
-
- for (j = 0; j < new_index->n_uniq; j++) {
- if (dict_index_get_nth_col(
- new_index, j) == col) {
- const dict_field_t* field
- = dict_index_get_nth_field(
- new_index, j);
-
- if (field->prefix_len
- > col->max_prefix) {
- max_prefix =
- field->prefix_len;
- }
-
- goto is_ord_part;
+ if (dict_index_has_virtual(index)) {
+ const dict_col_t* col;
+ const dict_v_col_t* vcol;
+
+ for (ulint i = 0; i < dict_index_get_n_fields(index); i++) {
+ col = dict_index_get_nth_col(index, i);
+ if (dict_col_is_virtual(col)) {
+ vcol = reinterpret_cast<const dict_v_col_t*>(
+ col);
+ /* This could be NULL, when we do add
+ virtual column, add index together. We do not
+ need to track this virtual column's index */
+ if (vcol->v_indexes == NULL) {
+ continue;
+ }
+ dict_v_idx_list::iterator it;
+ for (it = vcol->v_indexes->begin();
+ it != vcol->v_indexes->end(); ++it) {
+ dict_v_idx_t v_index = *it;
+ if (v_index.index == index) {
+ vcol->v_indexes->erase(it);
+ break;
+ }
}
}
-
- if (col->ord_part) {
- goto is_ord_part;
- }
-
- /* This is not an ordering column in any index.
- Thus, it can be stored completely externally. */
- max_size = BTR_EXTERN_FIELD_REF_SIZE;
- } else {
- ulint max_field_len;
-is_ord_part:
- max_field_len = DICT_MAX_FIELD_LEN_BY_FORMAT(table);
-
- /* This is an ordering column in some index.
- A long enough prefix must be written to the
- undo log. See trx_undo_page_fetch_ext(). */
- max_size = ut_min(max_size, max_field_len);
-
- /* We only store the needed prefix length in undo log */
- if (max_prefix) {
- ut_ad(dict_table_get_format(table)
- >= UNIV_FORMAT_B);
-
- max_size = ut_min(max_prefix, max_size);
- }
-
- max_size += BTR_EXTERN_FIELD_REF_SIZE;
- }
-
- undo_page_len += 5 + max_size;
- }
-
- return(undo_page_len >= UNIV_PAGE_SIZE);
-}
-#endif
-
-/****************************************************************//**
-If a record of this index might not fit on a single B-tree page,
-return TRUE.
-@return TRUE if the index record could become too big */
-static
-ibool
-dict_index_too_big_for_tree(
-/*========================*/
- const dict_table_t* table, /*!< in: table */
- const dict_index_t* new_index) /*!< in: index */
-{
- ulint zip_size;
- ulint comp;
- ulint i;
- /* maximum possible storage size of a record */
- ulint rec_max_size;
- /* maximum allowed size of a record on a leaf page */
- ulint page_rec_max;
- /* maximum allowed size of a node pointer record */
- ulint page_ptr_max;
-
- /* FTS index consists of auxiliary tables, they shall be excluded from
- index row size check */
- if (new_index->type & DICT_FTS) {
- return(false);
- }
-
- DBUG_EXECUTE_IF(
- "ib_force_create_table",
- return(FALSE););
-
- comp = dict_table_is_comp(table);
- zip_size = dict_table_zip_size(table);
-
- if (zip_size && zip_size < UNIV_PAGE_SIZE) {
- /* On a compressed page, two records must fit in the
- uncompressed page modification log. On compressed
- pages with zip_size == UNIV_PAGE_SIZE, this limit will
- never be reached. */
- ut_ad(comp);
- /* The maximum allowed record size is the size of
- an empty page, minus a byte for recoding the heap
- number in the page modification log. The maximum
- allowed node pointer size is half that. */
- page_rec_max = page_zip_empty_size(new_index->n_fields,
- zip_size);
- if (page_rec_max) {
- page_rec_max--;
- }
- page_ptr_max = page_rec_max / 2;
- /* On a compressed page, there is a two-byte entry in
- the dense page directory for every record. But there
- is no record header. */
- rec_max_size = 2;
- } else {
- /* The maximum allowed record size is half a B-tree
- page(16k for 64k page size). No additional sparse
- page directory entry will be generated for the first
- few user records. */
- page_rec_max = (comp || UNIV_PAGE_SIZE < UNIV_PAGE_SIZE_MAX)
- ? page_get_free_space_of_empty(comp) / 2
- : REDUNDANT_REC_MAX_DATA_SIZE;
-
- page_ptr_max = page_rec_max;
- /* Each record has a header. */
- rec_max_size = comp
- ? REC_N_NEW_EXTRA_BYTES
- : REC_N_OLD_EXTRA_BYTES;
- }
-
- if (comp) {
- /* Include the "null" flags in the
- maximum possible record size. */
- rec_max_size += UT_BITS_IN_BYTES(new_index->n_nullable);
- } else {
- /* For each column, include a 2-byte offset and a
- "null" flag. The 1-byte format is only used in short
- records that do not contain externally stored columns.
- Such records could never exceed the page limit, even
- when using the 2-byte format. */
- rec_max_size += 2 * new_index->n_fields;
- }
-
- /* Compute the maximum possible record size. */
- for (i = 0; i < new_index->n_fields; i++) {
- const dict_field_t* field
- = dict_index_get_nth_field(new_index, i);
- const dict_col_t* col
- = dict_field_get_col(field);
- ulint field_max_size;
- ulint field_ext_max_size;
-
- /* In dtuple_convert_big_rec(), variable-length columns
- that are longer than BTR_EXTERN_FIELD_REF_SIZE * 2
- may be chosen for external storage.
-
- Fixed-length columns, and all columns of secondary
- index records are always stored inline. */
-
- /* Determine the maximum length of the index field.
- The field_ext_max_size should be computed as the worst
- case in rec_get_converted_size_comp() for
- REC_STATUS_ORDINARY records. */
-
- field_max_size = dict_col_get_fixed_size(col, comp);
- if (field_max_size) {
- /* dict_index_add_col() should guarantee this */
- ut_ad(!field->prefix_len
- || field->fixed_len == field->prefix_len);
- /* Fixed lengths are not encoded
- in ROW_FORMAT=COMPACT. */
- field_ext_max_size = 0;
- goto add_field_size;
- }
-
- field_max_size = dict_col_get_max_size(col);
- field_ext_max_size = field_max_size < 256 ? 1 : 2;
-
- if (field->prefix_len) {
- if (field->prefix_len < field_max_size) {
- field_max_size = field->prefix_len;
- }
- } else if (field_max_size > BTR_EXTERN_FIELD_REF_SIZE * 2
- && dict_index_is_clust(new_index)) {
-
- /* In the worst case, we have a locally stored
- column of BTR_EXTERN_FIELD_REF_SIZE * 2 bytes.
- The length can be stored in one byte. If the
- column were stored externally, the lengths in
- the clustered index page would be
- BTR_EXTERN_FIELD_REF_SIZE and 2. */
- field_max_size = BTR_EXTERN_FIELD_REF_SIZE * 2;
- field_ext_max_size = 1;
- }
-
- if (comp) {
- /* Add the extra size for ROW_FORMAT=COMPACT.
- For ROW_FORMAT=REDUNDANT, these bytes were
- added to rec_max_size before this loop. */
- rec_max_size += field_ext_max_size;
- }
-add_field_size:
- rec_max_size += field_max_size;
-
- /* Check the size limit on leaf pages. */
- if (UNIV_UNLIKELY(rec_max_size >= page_rec_max)) {
- return(TRUE);
- }
-
- /* Check the size limit on non-leaf pages. Records
- stored in non-leaf B-tree pages consist of the unique
- columns of the record (the key columns of the B-tree)
- and a node pointer field. When we have processed the
- unique columns, rec_max_size equals the size of the
- node pointer record minus the node pointer column. */
- if (i + 1 == dict_index_get_n_unique_in_tree(new_index)
- && rec_max_size + REC_NODE_PTR_SIZE >= page_ptr_max) {
-
- return(TRUE);
}
}
-
- return(FALSE);
}
-/**********************************************************************//**
-Adds an index to the dictionary cache.
-@return DB_SUCCESS, DB_TOO_BIG_RECORD, or DB_CORRUPTION */
-UNIV_INTERN
+/** Adds an index to the dictionary cache, with possible indexing newly
+added column.
+@param[in,out] table table on which the index is
+@param[in,out] index index; NOTE! The index memory
+ object is freed in this function!
+@param[in] page_no root page number of the index
+@param[in] add_v virtual columns being added along with ADD INDEX
+@return DB_SUCCESS, or DB_CORRUPTION */
dberr_t
dict_index_add_to_cache(
-/*====================*/
- dict_table_t* table, /*!< in: table on which the index is */
- dict_index_t* index, /*!< in, own: index; NOTE! The index memory
- object is freed in this function! */
- ulint page_no,/*!< in: root page number of the index */
- ibool strict) /*!< in: TRUE=refuse to create the index
- if records could be too big to fit in
- an B-tree page */
+ dict_table_t* table,
+ dict_index_t*& index,
+ ulint page_no,
+ const dict_add_v_col_t* add_v)
{
dict_index_t* new_index;
ulint n_ord;
@@ -2495,15 +2186,17 @@ dict_index_add_to_cache(
ut_ad(index->n_def == index->n_fields);
ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
ut_ad(!dict_index_is_online_ddl(index));
+ ut_ad(!dict_index_is_ibuf(index));
- ut_ad(mem_heap_validate(index->heap));
+ ut_d(mem_heap_validate(index->heap));
ut_a(!dict_index_is_clust(index)
|| UT_LIST_GET_LEN(table->indexes) == 0);
- if (!dict_index_find_cols(table, index)) {
+ if (!dict_index_find_cols(table, index, add_v)) {
dict_mem_index_free(index);
- return(DB_CORRUPTION);
+ index = NULL;
+ return DB_CORRUPTION;
}
/* Build the cache internal representation of the index,
@@ -2522,125 +2215,57 @@ dict_index_add_to_cache(
new_index->n_fields = new_index->n_def;
new_index->trx_id = index->trx_id;
-
- if (dict_index_too_big_for_tree(table, new_index)) {
-
- if (strict) {
-too_big:
- dict_mem_index_free(new_index);
- dict_mem_index_free(index);
- return(DB_TOO_BIG_RECORD);
- } else if (current_thd != NULL) {
- /* Avoid the warning to be printed
- during recovery. */
- ib_warn_row_too_big(table);
- }
- }
-
- if (dict_index_is_univ(index)) {
- n_ord = new_index->n_fields;
- } else {
- n_ord = new_index->n_uniq;
- }
-
-#if 1 /* The following code predetermines whether to call
- dict_index_too_big_for_undo(). This function is not
- accurate. See innodb_4k.test, Bug 13336585, for a
- testcase that shows an index that can be created but
- cannot be updated. */
-
- switch (dict_table_get_format(table)) {
- case UNIV_FORMAT_A:
- /* ROW_FORMAT=REDUNDANT and ROW_FORMAT=COMPACT store
- prefixes of externally stored columns locally within
- the record. There are no special considerations for
- the undo log record size. */
- goto undo_size_ok;
-
- case UNIV_FORMAT_B:
- /* In ROW_FORMAT=DYNAMIC and ROW_FORMAT=COMPRESSED,
- column prefix indexes require that prefixes of
- externally stored columns are written to the undo log.
- This may make the undo log record bigger than the
- record on the B-tree page. The maximum size of an
- undo log record is the page size. That must be
- checked for below. */
- break;
-
-#if UNIV_FORMAT_B != UNIV_FORMAT_MAX
-# error "UNIV_FORMAT_B != UNIV_FORMAT_MAX"
+ new_index->set_committed(index->is_committed());
+ new_index->nulls_equal = index->nulls_equal;
+#ifdef MYSQL_INDEX_DISABLE_AHI
+ new_index->disable_ahi = index->disable_ahi;
#endif
- }
-
- for (i = 0; i < n_ord; i++) {
- const dict_field_t* field
- = dict_index_get_nth_field(new_index, i);
- const dict_col_t* col
- = dict_field_get_col(field);
-
- /* In dtuple_convert_big_rec(), variable-length columns
- that are longer than BTR_EXTERN_FIELD_REF_SIZE * 2
- may be chosen for external storage. If the column appears
- in an ordering column of an index, a longer prefix determined
- by dict_max_field_len_store_undo() will be copied to the undo
- log by trx_undo_page_report_modify() and
- trx_undo_page_fetch_ext(). It suffices to check the
- capacity of the undo log whenever new_index includes
- a column prefix on a column that may be stored externally. */
-
- if (field->prefix_len /* prefix index */
- && (!col->ord_part /* not yet ordering column */
- || field->prefix_len > col->max_prefix)
- && !dict_col_get_fixed_size(col, TRUE) /* variable-length */
- && dict_col_get_max_size(col)
- > BTR_EXTERN_FIELD_REF_SIZE * 2 /* long enough */) {
-
- if (dict_index_too_big_for_undo(table, new_index)) {
- /* An undo log record might not fit in
- a single page. Refuse to create this index. */
-
- goto too_big;
- }
-
- break;
- }
- }
-undo_size_ok:
-#endif
+ n_ord = new_index->n_uniq;
/* Flag the ordering columns and also set column max_prefix */
for (i = 0; i < n_ord; i++) {
const dict_field_t* field
= dict_index_get_nth_field(new_index, i);
- field->col->ord_part = 1;
-
- if (field->prefix_len > field->col->max_prefix) {
+ /* Check the column being added in the index for
+ the first time and flag the ordering column. */
+ if (field->col->ord_part == 0 ) {
+ field->col->max_prefix = field->prefix_len;
+ field->col->ord_part = 1;
+ } else if (field->prefix_len == 0) {
+ /* Set the max_prefix for a column to 0 if
+ its prefix length is 0 (for this index)
+ even if it was a part of any other index
+ with some prefix length. */
+ field->col->max_prefix = 0;
+ } else if (field->col->max_prefix != 0
+ && field->prefix_len
+ > field->col->max_prefix) {
+ /* Set the max_prefix value based on the
+ prefix_len. */
field->col->max_prefix = field->prefix_len;
}
+ ut_ad(field->col->ord_part == 1);
}
- if (!dict_index_is_univ(new_index)) {
-
- new_index->stat_n_diff_key_vals =
- static_cast<ib_uint64_t*>(mem_heap_zalloc(
+ new_index->stat_n_diff_key_vals =
+ static_cast<ib_uint64_t*>(mem_heap_zalloc(
new_index->heap,
dict_index_get_n_unique(new_index)
* sizeof(*new_index->stat_n_diff_key_vals)));
- new_index->stat_n_sample_sizes =
- static_cast<ib_uint64_t*>(mem_heap_zalloc(
+ new_index->stat_n_sample_sizes =
+ static_cast<ib_uint64_t*>(mem_heap_zalloc(
new_index->heap,
dict_index_get_n_unique(new_index)
* sizeof(*new_index->stat_n_sample_sizes)));
- new_index->stat_n_non_null_key_vals =
- static_cast<ib_uint64_t*>(mem_heap_zalloc(
+ new_index->stat_n_non_null_key_vals =
+ static_cast<ib_uint64_t*>(mem_heap_zalloc(
new_index->heap,
dict_index_get_n_unique(new_index)
* sizeof(*new_index->stat_n_non_null_key_vals)));
- }
new_index->stat_index_size = 1;
new_index->stat_n_leaf_pages = 1;
@@ -2654,19 +2279,20 @@ undo_size_ok:
/* Add the new index as the last index for the table */
- UT_LIST_ADD_LAST(indexes, table->indexes, new_index);
+ UT_LIST_ADD_LAST(table->indexes, new_index);
new_index->table = table;
- new_index->table_name = table->name;
+ new_index->table_name = table->name.m_name;
+#ifdef BTR_CUR_ADAPT
new_index->search_info = btr_search_info_create(new_index->heap);
+#endif /* BTR_CUR_ADAPT */
- new_index->page = page_no;
+ new_index->page = unsigned(page_no);
rw_lock_create(index_tree_rw_lock_key, &new_index->lock,
- dict_index_is_ibuf(index)
- ? SYNC_IBUF_INDEX_TREE : SYNC_INDEX_TREE);
+ SYNC_INDEX_TREE);
dict_mem_index_free(index);
-
- return(DB_SUCCESS);
+ index = new_index;
+ return DB_SUCCESS;
}
/**********************************************************************//**
@@ -2680,13 +2306,10 @@ dict_index_remove_from_cache_low(
ibool lru_evict) /*!< in: TRUE if index being evicted
to make room in the table LRU list */
{
- ulint retries = 0;
- btr_search_t* info;
-
ut_ad(table && index);
ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
- ut_ad(mutex_own(&(dict_sys->mutex)));
+ ut_ad(mutex_own(&dict_sys->mutex));
/* No need to acquire the dict_index_t::lock here because
there can't be any active operations on this index (or table). */
@@ -2696,9 +2319,11 @@ dict_index_remove_from_cache_low(
row_log_free(index->online_log);
}
+#ifdef BTR_CUR_HASH_ADAPT
/* We always create search info whether or not adaptive
hash index is enabled or not. */
- info = btr_search_get_info(index);
+ btr_search_t* info = btr_search_get_info(index);
+ ulint retries = 0;
ut_ad(info);
/* We are not allowed to free the in-memory index struct
@@ -2711,25 +2336,35 @@ dict_index_remove_from_cache_low(
zero. See also: dict_table_can_be_evicted() */
do {
- if (!btr_search_info_get_ref_count(info)
+ if (!btr_search_info_get_ref_count(info, index)
|| !buf_LRU_drop_page_hash_for_tablespace(table)) {
break;
}
ut_a(++retries < 10000);
} while (srv_shutdown_state == SRV_SHUTDOWN_NONE || !lru_evict);
+#endif /* BTR_CUR_HASH_ADAPT */
rw_lock_free(&index->lock);
+ /* The index is being dropped, remove any compression stats for it. */
+ if (!lru_evict && DICT_TF_GET_ZIP_SSIZE(index->table->flags)) {
+ mutex_enter(&page_zip_stat_per_index_mutex);
+ page_zip_stat_per_index.erase(index->id);
+ mutex_exit(&page_zip_stat_per_index_mutex);
+ }
+
/* Remove the index from the list of indexes of the table */
- UT_LIST_REMOVE(indexes, table->indexes, index);
+ UT_LIST_REMOVE(table->indexes, index);
+
+ /* Remove the index from affected virtual column index list */
+ index->detach_columns();
dict_mem_index_free(index);
}
/**********************************************************************//**
Removes an index from the dictionary cache. */
-UNIV_INTERN
void
dict_index_remove_from_cache(
/*=========================*/
@@ -2739,43 +2374,97 @@ dict_index_remove_from_cache(
dict_index_remove_from_cache_low(table, index, FALSE);
}
-/*******************************************************************//**
-Tries to find column names for the index and sets the col field of the
+/** Tries to find column names for the index and sets the col field of the
index.
+@param[in] table table
+@param[in,out] index index
+@param[in] add_v new virtual columns added along with an add index call
@return TRUE if the column names were found */
static
ibool
dict_index_find_cols(
-/*=================*/
- dict_table_t* table, /*!< in: table */
- dict_index_t* index) /*!< in: index */
+ const dict_table_t* table,
+ dict_index_t* index,
+ const dict_add_v_col_t* add_v)
{
- ulint i;
+ std::vector<ulint, ut_allocator<ulint> > col_added;
+ std::vector<ulint, ut_allocator<ulint> > v_col_added;
- ut_ad(table && index);
+ ut_ad(table != NULL && index != NULL);
ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
- ut_ad(mutex_own(&(dict_sys->mutex)));
+ ut_ad(mutex_own(&dict_sys->mutex));
- for (i = 0; i < index->n_fields; i++) {
+ for (ulint i = 0; i < index->n_fields; i++) {
ulint j;
dict_field_t* field = dict_index_get_nth_field(index, i);
for (j = 0; j < table->n_cols; j++) {
if (!innobase_strcasecmp(dict_table_get_col_name(table, j),
field->name)) {
+
+ /* Check if same column is being assigned again
+ which suggest that column has duplicate name. */
+ bool exists =
+ std::find(col_added.begin(),
+ col_added.end(), j)
+ != col_added.end();
+
+ if (exists) {
+ /* Duplicate column found. */
+ goto dup_err;
+ }
+
field->col = dict_table_get_nth_col(table, j);
+ col_added.push_back(j);
+
+ goto found;
+ }
+ }
+
+ /* Let's check if it is a virtual column */
+ for (j = 0; j < table->n_v_cols; j++) {
+ if (!strcmp(dict_table_get_v_col_name(table, j),
+ field->name)) {
+
+ /* Check if same column is being assigned again
+ which suggest that column has duplicate name. */
+ bool exists =
+ std::find(v_col_added.begin(),
+ v_col_added.end(), j)
+ != v_col_added.end();
+
+ if (exists) {
+ /* Duplicate column found. */
+ break;
+ }
+
+ field->col = reinterpret_cast<dict_col_t*>(
+ dict_table_get_nth_v_col(table, j));
+
+ v_col_added.push_back(j);
+
goto found;
}
}
+ if (add_v) {
+ for (j = 0; j < add_v->n_v_col; j++) {
+ if (!strcmp(add_v->v_col_name[j],
+ field->name)) {
+ field->col = const_cast<dict_col_t*>(
+ &add_v->v_col[j].m_col);
+ goto found;
+ }
+ }
+ }
+
+dup_err:
#ifdef UNIV_DEBUG
/* It is an error not to find a matching column. */
- fputs("InnoDB: Error: no matching column for ", stderr);
- ut_print_name(stderr, NULL, FALSE, field->name);
- fputs(" in ", stderr);
- dict_index_name_print(stderr, NULL, index);
- fputs("!\n", stderr);
+ ib::error() << "No matching column for " << field->name
+ << " in index " << index->name
+ << " of table " << table->name;
#endif /* UNIV_DEBUG */
return(FALSE);
@@ -2785,11 +2474,9 @@ found:
return(TRUE);
}
-#endif /* !UNIV_HOTBACKUP */
/*******************************************************************//**
Adds a column to index. */
-UNIV_INTERN
void
dict_index_add_col(
/*===============*/
@@ -2801,15 +2488,37 @@ dict_index_add_col(
dict_field_t* field;
const char* col_name;
- col_name = dict_table_get_col_name(table, dict_col_get_no(col));
+ if (dict_col_is_virtual(col)) {
+ dict_v_col_t* v_col = reinterpret_cast<dict_v_col_t*>(col);
+
+ /* When v_col->v_indexes==NULL,
+ ha_innobase::commit_inplace_alter_table(commit=true)
+ will evict and reload the table definition, and
+ v_col->v_indexes will not be NULL for the new table. */
+ if (v_col->v_indexes != NULL) {
+ /* Register the index with the virtual column index
+ list */
+ struct dict_v_idx_t new_idx
+ = {index, index->n_def};
+
+ v_col->v_indexes->push_back(new_idx);
+
+ }
+
+ col_name = dict_table_get_v_col_name_mysql(
+ table, dict_col_get_no(col));
+ } else {
+ col_name = dict_table_get_col_name(table, dict_col_get_no(col));
+ }
dict_mem_index_add_field(index, col_name, prefix_len);
field = dict_index_get_nth_field(index, index->n_def - 1);
field->col = col;
- field->fixed_len = (unsigned int) dict_col_get_fixed_size(
- col, dict_table_is_comp(table));
+ field->fixed_len = static_cast<unsigned int>(
+ dict_col_get_fixed_size(
+ col, dict_table_is_comp(table)));
if (prefix_len && field->fixed_len > prefix_len) {
field->fixed_len = (unsigned int) prefix_len;
@@ -2834,7 +2543,6 @@ dict_index_add_col(
}
}
-#ifndef UNIV_HOTBACKUP
/*******************************************************************//**
Copies fields contained in index2 to index1. */
static
@@ -2855,6 +2563,7 @@ dict_index_copy(
for (i = start; i < end; i++) {
field = dict_index_get_nth_field(index2, i);
+
dict_index_add_col(index1, table, field->col,
field->prefix_len);
}
@@ -2862,7 +2571,6 @@ dict_index_copy(
/*******************************************************************//**
Copies types of fields contained in index to tuple. */
-UNIV_INTERN
void
dict_index_copy_types(
/*==================*/
@@ -2873,7 +2581,7 @@ dict_index_copy_types(
{
ulint i;
- if (dict_index_is_univ(index)) {
+ if (dict_index_is_ibuf(index)) {
dtuple_set_types_binary(tuple, n_fields);
return;
@@ -2886,14 +2594,45 @@ dict_index_copy_types(
ifield = dict_index_get_nth_field(index, i);
dfield_type = dfield_get_type(dtuple_get_nth_field(tuple, i));
dict_col_copy_type(dict_field_get_col(ifield), dfield_type);
+ if (dict_index_is_spatial(index)
+ && DATA_GEOMETRY_MTYPE(dfield_type->mtype)) {
+ dfield_type->prtype |= DATA_GIS_MBR;
+ }
}
}
+/** Copies types of virtual columns contained in table to tuple and sets all
+fields of the tuple to the SQL NULL value. This function should
+be called right after dtuple_create().
+@param[in,out] tuple data tuple
+@param[in] table table
+*/
+void
+dict_table_copy_v_types(
+ dtuple_t* tuple,
+ const dict_table_t* table)
+{
+ /* tuple could have more virtual columns than existing table,
+ if we are calling this for creating index along with adding
+ virtual columns */
+ ulint n_fields = ut_min(dtuple_get_n_v_fields(tuple),
+ static_cast<ulint>(table->n_v_def));
+
+ for (ulint i = 0; i < n_fields; i++) {
+
+ dfield_t* dfield = dtuple_get_nth_v_field(tuple, i);
+ dtype_t* dtype = dfield_get_type(dfield);
+
+ dfield_set_null(dfield);
+ dict_col_copy_type(
+ &(dict_table_get_nth_v_col(table, i)->m_col),
+ dtype);
+ }
+}
/*******************************************************************//**
Copies types of columns contained in table to tuple and sets all
fields of the tuple to the SQL NULL value. This function should
be called right after dtuple_create(). */
-UNIV_INTERN
void
dict_table_copy_types(
/*==================*/
@@ -2910,12 +2649,14 @@ dict_table_copy_types(
dfield_set_null(dfield);
dict_col_copy_type(dict_table_get_nth_col(table, i), dtype);
}
+
+ dict_table_copy_v_types(tuple, table);
}
/*******************************************************************//**
Builds the internal dictionary cache representation for a clustered
index, containing also system fields not defined by the user.
-@return own: the internal representation of the clustered index */
+@return own: the internal representation of the clustered index */
static
dict_index_t*
dict_index_build_internal_clust(
@@ -2932,11 +2673,13 @@ dict_index_build_internal_clust(
ut_ad(table && index);
ut_ad(dict_index_is_clust(index));
- ut_ad(mutex_own(&(dict_sys->mutex)));
+ ut_ad(!dict_index_is_ibuf(index));
+
+ ut_ad(mutex_own(&dict_sys->mutex));
ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
/* Create a new index object with certainly enough fields */
- new_index = dict_mem_index_create(table->name,
+ new_index = dict_mem_index_create(table->name.m_name,
index->name, table->space,
index->type,
index->n_fields + table->n_cols);
@@ -2951,12 +2694,7 @@ dict_index_build_internal_clust(
/* Copy the fields of index */
dict_index_copy(new_index, index, table, 0, index->n_fields);
- if (dict_index_is_univ(index)) {
- /* No fixed number of fields determines an entry uniquely */
-
- new_index->n_uniq = REC_MAX_N_FIELDS;
-
- } else if (dict_index_is_unique(index)) {
+ if (dict_index_is_unique(index)) {
/* Only the fields defined so far are needed to identify
the index entry uniquely */
@@ -2968,10 +2706,9 @@ dict_index_build_internal_clust(
new_index->trx_id_offset = 0;
- if (!dict_index_is_ibuf(index)) {
- /* Add system columns, trx id first */
+ /* Add system columns, trx id first */
- trx_id_pos = new_index->n_def;
+ trx_id_pos = new_index->n_def;
#if DATA_ROW_ID != 0
# error "DATA_ROW_ID != 0"
@@ -2983,63 +2720,61 @@ dict_index_build_internal_clust(
# error "DATA_ROLL_PTR != 2"
#endif
- if (!dict_index_is_unique(index)) {
- dict_index_add_col(new_index, table,
- dict_table_get_sys_col(
- table, DATA_ROW_ID),
- 0);
- trx_id_pos++;
- }
-
+ if (!dict_index_is_unique(index)) {
dict_index_add_col(new_index, table,
- dict_table_get_sys_col(table, DATA_TRX_ID),
+ dict_table_get_sys_col(
+ table, DATA_ROW_ID),
0);
+ trx_id_pos++;
+ }
- dict_index_add_col(new_index, table,
- dict_table_get_sys_col(table,
- DATA_ROLL_PTR),
- 0);
+ dict_index_add_col(
+ new_index, table,
+ dict_table_get_sys_col(table, DATA_TRX_ID), 0);
- for (i = 0; i < trx_id_pos; i++) {
+ for (i = 0; i < trx_id_pos; i++) {
- ulint fixed_size = dict_col_get_fixed_size(
- dict_index_get_nth_col(new_index, i),
- dict_table_is_comp(table));
+ ulint fixed_size = dict_col_get_fixed_size(
+ dict_index_get_nth_col(new_index, i),
+ dict_table_is_comp(table));
- if (fixed_size == 0) {
- new_index->trx_id_offset = 0;
+ if (fixed_size == 0) {
+ new_index->trx_id_offset = 0;
- break;
- }
+ break;
+ }
- if (dict_index_get_nth_field(new_index, i)->prefix_len
- > 0) {
- new_index->trx_id_offset = 0;
+ dict_field_t* field = dict_index_get_nth_field(
+ new_index, i);
+ if (field->prefix_len > 0) {
+ new_index->trx_id_offset = 0;
- break;
- }
+ break;
+ }
- /* Add fixed_size to new_index->trx_id_offset.
- Because the latter is a bit-field, an overflow
- can theoretically occur. Check for it. */
- fixed_size += new_index->trx_id_offset;
+ /* Add fixed_size to new_index->trx_id_offset.
+ Because the latter is a bit-field, an overflow
+ can theoretically occur. Check for it. */
+ fixed_size += new_index->trx_id_offset;
- new_index->trx_id_offset = fixed_size;
+ new_index->trx_id_offset = unsigned(fixed_size);
- if (new_index->trx_id_offset != fixed_size) {
- /* Overflow. Pretend that this is a
- variable-length PRIMARY KEY. */
- ut_ad(0);
- new_index->trx_id_offset = 0;
- break;
- }
+ if (new_index->trx_id_offset != fixed_size) {
+ /* Overflow. Pretend that this is a
+ variable-length PRIMARY KEY. */
+ ut_ad(0);
+ new_index->trx_id_offset = 0;
+ break;
}
-
}
+ dict_index_add_col(
+ new_index, table,
+ dict_table_get_sys_col(table, DATA_ROLL_PTR), 0);
+
/* Remember the table columns already contained in new_index */
indexed = static_cast<ibool*>(
- mem_zalloc(table->n_cols * sizeof *indexed));
+ ut_zalloc_nokey(table->n_cols * sizeof *indexed));
/* Mark the table columns already contained in new_index */
for (i = 0; i < new_index->n_def; i++) {
@@ -3057,8 +2792,7 @@ dict_index_build_internal_clust(
/* Add to new_index non-system columns of table not yet included
there */
- for (i = 0; i + DATA_N_SYS_COLS < (ulint) table->n_cols; i++) {
-
+ for (i = 0; i + DATA_N_SYS_COLS < ulint(table->n_cols); i++) {
dict_col_t* col = dict_table_get_nth_col(table, i);
ut_ad(col->mtype != DATA_SYS);
@@ -3067,10 +2801,9 @@ dict_index_build_internal_clust(
}
}
- mem_free(indexed);
+ ut_free(indexed);
- ut_ad(dict_index_is_ibuf(index)
- || (UT_LIST_GET_LEN(table->indexes) == 0));
+ ut_ad(UT_LIST_GET_LEN(table->indexes) == 0);
new_index->cached = TRUE;
@@ -3080,7 +2813,7 @@ dict_index_build_internal_clust(
/*******************************************************************//**
Builds the internal dictionary cache representation for a non-clustered
index, containing also system fields not defined by the user.
-@return own: the internal representation of the non-clustered index */
+@return own: the internal representation of the non-clustered index */
static
dict_index_t*
dict_index_build_internal_non_clust(
@@ -3097,7 +2830,8 @@ dict_index_build_internal_non_clust(
ut_ad(table && index);
ut_ad(!dict_index_is_clust(index));
- ut_ad(mutex_own(&(dict_sys->mutex)));
+ ut_ad(!dict_index_is_ibuf(index));
+ ut_ad(mutex_own(&dict_sys->mutex));
ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
/* The clustered index should be the first in the list of indexes */
@@ -3105,11 +2839,11 @@ dict_index_build_internal_non_clust(
ut_ad(clust_index);
ut_ad(dict_index_is_clust(clust_index));
- ut_ad(!dict_index_is_univ(clust_index));
+ ut_ad(!dict_index_is_ibuf(clust_index));
/* Create a new index */
new_index = dict_mem_index_create(
- table->name, index->name, index->space, index->type,
+ table->name.m_name, index->name, index->space, index->type,
index->n_fields + 1 + clust_index->n_uniq);
/* Copy other relevant data from the old index
@@ -3124,13 +2858,17 @@ dict_index_build_internal_non_clust(
/* Remember the table columns already contained in new_index */
indexed = static_cast<ibool*>(
- mem_zalloc(table->n_cols * sizeof *indexed));
+ ut_zalloc_nokey(table->n_cols * sizeof *indexed));
/* Mark the table columns already contained in new_index */
for (i = 0; i < new_index->n_def; i++) {
field = dict_index_get_nth_field(new_index, i);
+ if (dict_col_is_virtual(field->col)) {
+ continue;
+ }
+
/* If there is only a prefix of the column in the index
field, do not mark the column as contained in the index */
@@ -3150,10 +2888,15 @@ dict_index_build_internal_non_clust(
if (!indexed[field->col->ind]) {
dict_index_add_col(new_index, table, field->col,
field->prefix_len);
+ } else if (dict_index_is_spatial(index)) {
+ /*For spatial index, we still need to add the
+ field to index. */
+ dict_index_add_col(new_index, table, field->col,
+ field->prefix_len);
}
}
- mem_free(indexed);
+ ut_free(indexed);
if (dict_index_is_unique(index)) {
new_index->n_uniq = index->n_fields;
@@ -3173,7 +2916,7 @@ dict_index_build_internal_non_clust(
/***********************************************************************
Builds the internal dictionary cache representation for an FTS index.
-@return own: the internal representation of the FTS index */
+@return own: the internal representation of the FTS index */
static
dict_index_t*
dict_index_build_internal_fts(
@@ -3185,14 +2928,12 @@ dict_index_build_internal_fts(
ut_ad(table && index);
ut_ad(index->type == DICT_FTS);
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(mutex_own(&(dict_sys->mutex)));
-#endif /* UNIV_SYNC_DEBUG */
+ ut_ad(mutex_own(&dict_sys->mutex));
ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
/* Create a new index */
new_index = dict_mem_index_create(
- table->name, index->name, index->space, index->type,
+ table->name.m_name, index->name, index->space, index->type,
index->n_fields);
/* Copy other relevant data from the old index struct to the new
@@ -3221,15 +2962,26 @@ dict_index_build_internal_fts(
}
/*====================== FOREIGN KEY PROCESSING ========================*/
-#define DB_FOREIGN_KEY_IS_PREFIX_INDEX 200
-#define DB_FOREIGN_KEY_COL_NOT_NULL 201
-#define DB_FOREIGN_KEY_COLS_NOT_EQUAL 202
-#define DB_FOREIGN_KEY_INDEX_NOT_FOUND 203
+/** Check whether the dict_table_t is a partition.
+A partitioned table on the SQL level is composed of InnoDB tables,
+where each InnoDB table is a [sub]partition including its secondary indexes
+which belongs to the partition.
+@param[in] table Table to check.
+@return true if the dict_table_t is a partition else false. */
+UNIV_INLINE
+bool
+dict_table_is_partition(
+ const dict_table_t* table)
+{
+ /* Check both P and p on all platforms in case it was moved to/from
+ WIN. */
+ return(strstr(table->name.m_name, "#p#")
+ || strstr(table->name.m_name, "#P#"));
+}
/*********************************************************************//**
Checks if a table is referenced by foreign keys.
-@return TRUE if table is referenced by a foreign key */
-UNIV_INTERN
+@return TRUE if table is referenced by a foreign key */
ibool
dict_table_is_referenced_by_foreign_key(
/*====================================*/
@@ -3240,13 +2992,12 @@ dict_table_is_referenced_by_foreign_key(
/**********************************************************************//**
Removes a foreign constraint struct from the dictionary cache. */
-UNIV_INTERN
void
dict_foreign_remove_from_cache(
/*===========================*/
dict_foreign_t* foreign) /*!< in, own: foreign constraint */
{
- ut_ad(mutex_own(&(dict_sys->mutex)));
+ ut_ad(mutex_own(&dict_sys->mutex));
ut_a(foreign);
if (foreign->referenced_table != NULL) {
@@ -3263,7 +3014,7 @@ dict_foreign_remove_from_cache(
/**********************************************************************//**
Looks for the foreign constraint from the foreign and referenced lists
of a table.
-@return foreign constraint */
+@return foreign constraint */
static
dict_foreign_t*
dict_foreign_find(
@@ -3271,7 +3022,7 @@ dict_foreign_find(
dict_table_t* table, /*!< in: table object */
dict_foreign_t* foreign) /*!< in: foreign constraint */
{
- ut_ad(mutex_own(&(dict_sys->mutex)));
+ ut_ad(mutex_own(&dict_sys->mutex));
ut_ad(dict_foreign_set_validate(table->foreign_set));
ut_ad(dict_foreign_set_validate(table->referenced_set));
@@ -3291,13 +3042,11 @@ dict_foreign_find(
return(NULL);
}
-
/*********************************************************************//**
Tries to find an index whose first fields are the columns in the array,
in the same order and is not marked for deletion and is not the same
as types_idx.
-@return matching index, NULL if not found */
-UNIV_INTERN
+@return matching index, NULL if not found */
dict_index_t*
dict_foreign_find_index(
/*====================*/
@@ -3319,69 +3068,41 @@ dict_foreign_find_index(
/*!< in: nonzero if none of
the columns must be declared
NOT NULL */
- ulint* error, /*!< out: error code */
+ fkerr_t* error, /*!< out: error code */
ulint* err_col_no,
/*!< out: column number where
error happened */
dict_index_t** err_index)
- /*!< out: index where error
+ /*!< out: index where error
happened */
{
- dict_index_t* index;
-
ut_ad(mutex_own(&dict_sys->mutex));
if (error) {
- *error = DB_FOREIGN_KEY_INDEX_NOT_FOUND;
+ *error = FK_INDEX_NOT_FOUND;
}
- index = dict_table_get_first_index(table);
-
- while (index != NULL) {
+ for (dict_index_t* index = dict_table_get_first_index(table);
+ index;
+ index = dict_table_get_next_index(index)) {
if (types_idx != index
- && !(index->type & DICT_FTS)
&& !index->to_be_dropped
&& !dict_index_is_online_ddl(index)
&& dict_foreign_qualify_index(
table, col_names, columns, n_cols,
index, types_idx,
check_charsets, check_null,
- error, err_col_no,err_index)) {
+ error, err_col_no, err_index)) {
if (error) {
- *error = DB_SUCCESS;
+ *error = FK_SUCCESS;
}
return(index);
}
-
- index = dict_table_get_next_index(index);
}
return(NULL);
}
-#ifdef WITH_WSREP
-dict_index_t*
-wsrep_dict_foreign_find_index(
-/*====================*/
- dict_table_t* table, /*!< in: table */
- const char** col_names, /*!< in: column names, or NULL
- to use table->col_names */
- const char** columns,/*!< in: array of column names */
- ulint n_cols, /*!< in: number of columns */
- dict_index_t* types_idx, /*!< in: NULL or an index to whose types the
- column types must match */
- ibool check_charsets,
- /*!< in: whether to check charsets.
- only has an effect if types_idx != NULL */
- ulint check_null)
- /*!< in: nonzero if none of the columns must
- be declared NOT NULL */
-{
- return dict_foreign_find_index(
- table, col_names, columns, n_cols, types_idx, check_charsets,
- check_null, NULL, NULL, NULL);
-}
-#endif /* WITH_WSREP */
/**********************************************************************//**
Report an error in a foreign key definition. */
static
@@ -3416,12 +3137,9 @@ dict_foreign_error_report(
fputs(fk_str.c_str(), file);
putc('\n', file);
if (fk->foreign_index) {
- fputs("The index in the foreign key in table is ", file);
- ut_print_name(file, NULL, FALSE, fk->foreign_index->name);
- fputs("\n"
- "See " REFMAN "innodb-foreign-key-constraints.html\n"
- "for correct foreign key definition.\n",
- file);
+ fprintf(file, "The index in the foreign key in table is"
+ " %s\n%s\n", fk->foreign_index->name(),
+ FOREIGN_KEY_CONSTRAINTS_MSG);
}
mutex_exit(&dict_foreign_err_mutex);
}
@@ -3431,8 +3149,7 @@ Adds a foreign key constraint object to the dictionary cache. May free
the object if there already is an object with the same identifier in.
At least one of the foreign table and the referenced table must already
be in the dictionary cache!
-@return DB_SUCCESS or error code */
-UNIV_INTERN
+@return DB_SUCCESS or error code */
dberr_t
dict_foreign_add_to_cache(
/*======================*/
@@ -3454,7 +3171,10 @@ dict_foreign_add_to_cache(
ibool added_to_referenced_list= FALSE;
FILE* ef = dict_foreign_err_file;
- ut_ad(mutex_own(&(dict_sys->mutex)));
+ DBUG_ENTER("dict_foreign_add_to_cache");
+ DBUG_PRINT("dict_foreign_add_to_cache", ("id: %s", foreign->id));
+
+ ut_ad(mutex_own(&dict_sys->mutex));
for_table = dict_table_check_if_in_cache_low(
foreign->foreign_table_name_lookup);
@@ -3475,18 +3195,15 @@ dict_foreign_add_to_cache(
dict_foreign_free(foreign);
} else {
for_in_cache = foreign;
+
}
if (ref_table && !for_in_cache->referenced_table) {
- ulint index_error;
- ulint err_col;
- dict_index_t *err_index=NULL;
-
index = dict_foreign_find_index(
ref_table, NULL,
for_in_cache->referenced_col_names,
for_in_cache->n_fields, for_in_cache->foreign_index,
- check_charsets, false, &index_error, &err_col, &err_index);
+ check_charsets, false);
if (index == NULL
&& !(ignore_err & DICT_ERR_IGNORE_FK_NOKEY)) {
@@ -3503,7 +3220,7 @@ dict_foreign_add_to_cache(
dict_foreign_free(foreign);
}
- return(DB_CANNOT_ADD_CONSTRAINT);
+ DBUG_RETURN(DB_CANNOT_ADD_CONSTRAINT);
}
for_in_cache->referenced_table = ref_table;
@@ -3518,10 +3235,6 @@ dict_foreign_add_to_cache(
}
if (for_table && !for_in_cache->foreign_table) {
- ulint index_error;
- ulint err_col;
- dict_index_t *err_index=NULL;
-
index = dict_foreign_find_index(
for_table, col_names,
for_in_cache->foreign_col_names,
@@ -3529,8 +3242,7 @@ dict_foreign_add_to_cache(
for_in_cache->referenced_index, check_charsets,
for_in_cache->type
& (DICT_FOREIGN_ON_DELETE_SET_NULL
- | DICT_FOREIGN_ON_UPDATE_SET_NULL),
- &index_error, &err_col, &err_index);
+ | DICT_FOREIGN_ON_UPDATE_SET_NULL));
if (index == NULL
&& !(ignore_err & DICT_ERR_IGNORE_FK_NOKEY)) {
@@ -3547,8 +3259,8 @@ dict_foreign_add_to_cache(
if (for_in_cache == foreign) {
if (added_to_referenced_list) {
- const dict_foreign_set::size_type n
- = ref_table->referenced_set
+ const dict_foreign_set::size_type
+ n = ref_table->referenced_set
.erase(for_in_cache);
ut_a(n == 1); /* the number of
@@ -3559,11 +3271,12 @@ dict_foreign_add_to_cache(
dict_foreign_free(foreign);
}
- return(DB_CANNOT_ADD_CONSTRAINT);
+ DBUG_RETURN(DB_CANNOT_ADD_CONSTRAINT);
}
for_in_cache->foreign_table = for_table;
for_in_cache->foreign_index = index;
+
std::pair<dict_foreign_set::iterator, bool> ret
= for_table->foreign_set.insert(for_in_cache);
@@ -3574,24 +3287,23 @@ dict_foreign_add_to_cache(
/* We need to move the table to the non-LRU end of the table LRU
list. Otherwise it will be evicted from the cache. */
- if (ref_table != NULL && ref_table->can_be_evicted) {
- dict_table_move_from_lru_to_non_lru(ref_table);
+ if (ref_table != NULL) {
+ dict_table_prevent_eviction(ref_table);
}
- if (for_table != NULL && for_table->can_be_evicted) {
- dict_table_move_from_lru_to_non_lru(for_table);
+ if (for_table != NULL) {
+ dict_table_prevent_eviction(for_table);
}
ut_ad(dict_lru_validate());
-
- return(DB_SUCCESS);
+ DBUG_RETURN(DB_SUCCESS);
}
/*********************************************************************//**
Scans from pointer onwards. Stops if is at the start of a copy of
'string' where characters are compared without case sensitivity, and
only outside `` or "" quotes. Stops also at NUL.
-@return scanned up to this */
+@return scanned up to this */
static
const char*
dict_scan_to(
@@ -3651,7 +3363,7 @@ static
const char*
dict_accept(
/*========*/
- struct charset_info_st* cs,/*!< in: the character set of ptr */
+ CHARSET_INFO* cs, /*!< in: the character set of ptr */
const char* ptr, /*!< in: scan from this */
const char* string, /*!< in: accept only this string as the next
non-whitespace string */
@@ -3682,12 +3394,12 @@ dict_accept(
/*********************************************************************//**
Scans an id. For the lexical definition of an 'id', see the code below.
Strips backquotes or double quotes from around the id.
-@return scanned to */
+@return scanned to */
static
const char*
dict_scan_id(
/*=========*/
- struct charset_info_st* cs,/*!< in: the character set of ptr */
+ CHARSET_INFO* cs, /*!< in: the character set of ptr */
const char* ptr, /*!< in: scanned to */
mem_heap_t* heap, /*!< in: heap where to allocate the id
(NULL=id will not be allocated, but it
@@ -3750,7 +3462,7 @@ dict_scan_id(
len = ptr - s;
}
- if (UNIV_UNLIKELY(!heap)) {
+ if (heap == NULL) {
/* no heap given: id will point to source string */
*id = s;
return(ptr);
@@ -3804,12 +3516,12 @@ convert_id:
/*********************************************************************//**
Tries to scan a column name.
-@return scanned to */
+@return scanned to */
static
const char*
dict_scan_col(
/*==========*/
- struct charset_info_st* cs, /*!< in: the character set of ptr */
+ CHARSET_INFO* cs, /*!< in: the character set of ptr */
const char* ptr, /*!< in: scanned to */
ibool* success,/*!< out: TRUE if success */
dict_table_t* table, /*!< in: table in which the column is */
@@ -3848,18 +3560,33 @@ dict_scan_col(
break;
}
}
+
+ for (i = 0; i < dict_table_get_n_v_cols(table); i++) {
+
+ const char* col_name = dict_table_get_v_col_name(
+ table, i);
+
+ if (0 == innobase_strcasecmp(col_name, *name)) {
+ /* Found */
+ dict_v_col_t * vcol;
+ *success = TRUE;
+ vcol = dict_table_get_nth_v_col(table, i);
+ *column = &vcol->m_col;
+ strcpy((char*) *name, col_name);
+
+ break;
+ }
+ }
}
return(ptr);
}
-
/*********************************************************************//**
Open a table from its database and table name, this is currently used by
foreign constraint parser to get the referenced table.
@return complete table name with database and table name, allocated from
heap memory passed in */
-UNIV_INTERN
char*
dict_get_referenced_table(
/*======================*/
@@ -3902,13 +3629,13 @@ dict_get_referenced_table(
memcpy(ref + database_name_len + 1, table_name, table_name_len + 1);
} else {
-#ifndef __WIN__
+#ifndef _WIN32
if (innobase_get_lower_case_table_names() == 1) {
innobase_casedn_str(ref);
}
#else
innobase_casedn_str(ref);
-#endif /* !__WIN__ */
+#endif /* !_WIN32 */
*table = dict_table_get_low(ref);
}
@@ -3916,12 +3643,12 @@ dict_get_referenced_table(
}
/*********************************************************************//**
Scans a table name from an SQL string.
-@return scanned to */
+@return scanned to */
static
const char*
dict_scan_table_name(
/*=================*/
- struct charset_info_st* cs,/*!< in: the character set of ptr */
+ CHARSET_INFO* cs, /*!< in: the character set of ptr */
const char* ptr, /*!< in: scanned to */
dict_table_t** table, /*!< out: table object or NULL */
const char* name, /*!< in: foreign key table name */
@@ -3991,12 +3718,12 @@ dict_scan_table_name(
/*********************************************************************//**
Skips one id. The id is allowed to contain also '.'.
-@return scanned to */
+@return scanned to */
static
const char*
dict_skip_word(
/*===========*/
- struct charset_info_st* cs,/*!< in: the character set of ptr */
+ CHARSET_INFO* cs, /*!< in: the character set of ptr */
const char* ptr, /*!< in: scanned to */
ibool* success)/*!< out: TRUE if success, FALSE if just spaces
left in string or a syntax error */
@@ -4021,7 +3748,7 @@ Removes MySQL comments from an SQL string. A comment is either
(c) '[slash][asterisk]' till the next '[asterisk][slash]' (like the familiar
C comment syntax).
@return own: SQL string stripped from comments; the caller must free
-this with mem_free()! */
+this with ut_free()! */
static
char*
dict_strip_comments(
@@ -4041,7 +3768,7 @@ dict_strip_comments(
DBUG_PRINT("dict_strip_comments", ("%s", sql_string));
- str = static_cast<char*>(mem_alloc(sql_length + 1));
+ str = static_cast<char*>(ut_malloc_nokey(sql_length + 1));
sptr = sql_string;
ptr = str;
@@ -4130,8 +3857,7 @@ end_of_string:
Finds the highest [number] for foreign key constraints of the table. Looks
only at the >= 4.0.18-format id's, which are of the form
databasename/tablename_ibfk_[number].
-@return highest number, 0 if table has no new format foreign key constraints */
-UNIV_INTERN
+@return highest number, 0 if table has no new format foreign key constraints */
ulint
dict_table_get_highest_foreign_id(
/*==============================*/
@@ -4143,9 +3869,11 @@ dict_table_get_highest_foreign_id(
ulint id;
ulint len;
+ DBUG_ENTER("dict_table_get_highest_foreign_id");
+
ut_a(table);
- len = ut_strlen(table->name);
+ len = ut_strlen(table->name.m_name);
for (dict_foreign_set::iterator it = table->foreign_set.begin();
it != table->foreign_set.end();
@@ -4162,7 +3890,7 @@ dict_table_get_highest_foreign_id(
MAX_TABLE_NAME_LEN);
if (ut_strlen(fkid) > ((sizeof dict_ibfk) - 1) + len
- && 0 == ut_memcmp(fkid, table->name, len)
+ && 0 == ut_memcmp(fkid, table->name.m_name, len)
&& 0 == ut_memcmp(fkid + len,
dict_ibfk, (sizeof dict_ibfk) - 1)
&& fkid[len + ((sizeof dict_ibfk) - 1)] != '0') {
@@ -4181,7 +3909,10 @@ dict_table_get_highest_foreign_id(
}
}
- return(biggest_id);
+ DBUG_PRINT("dict_table_get_highest_foreign_id",
+ ("id: " ULINTPF, biggest_id));
+
+ DBUG_RETURN(biggest_id);
}
/*********************************************************************//**
@@ -4223,7 +3954,7 @@ dict_foreign_push_index_error(
const char* latest_foreign, /*!< in: start of latest foreign key
constraint name */
const char** columns, /*!< in: foreign key columns */
- ulint index_error, /*!< in: error code */
+ fkerr_t index_error, /*!< in: error code */
ulint err_col, /*!< in: column where error happened
*/
dict_index_t* err_index, /*!< in: index where error happened
@@ -4232,37 +3963,37 @@ dict_foreign_push_index_error(
FILE* ef) /*!< in: output stream */
{
switch (index_error) {
- case DB_FOREIGN_KEY_INDEX_NOT_FOUND: {
+ case FK_SUCCESS:
+ break;
+ case FK_INDEX_NOT_FOUND:
fprintf(ef,
- "%s table '%s' with foreign key constraint"
+ "%s table %s with foreign key constraint"
" failed. There is no index in the referenced"
" table where the referenced columns appear"
" as the first columns near '%s'.\n",
operation, create_name, latest_foreign);
ib_push_warning(trx, DB_CANNOT_ADD_CONSTRAINT,
- "%s table '%s' with foreign key constraint"
+ "%s table %s with foreign key constraint"
" failed. There is no index in the referenced"
" table where the referenced columns appear"
" as the first columns near '%s'.",
operation, create_name, latest_foreign);
- break;
- }
- case DB_FOREIGN_KEY_IS_PREFIX_INDEX: {
+ return;
+ case FK_IS_PREFIX_INDEX:
fprintf(ef,
- "%s table '%s' with foreign key constraint"
+ "%s table %s with foreign key constraint"
" failed. There is only prefix index in the referenced"
" table where the referenced columns appear"
" as the first columns near '%s'.\n",
operation, create_name, latest_foreign);
ib_push_warning(trx, DB_CANNOT_ADD_CONSTRAINT,
- "%s table '%s' with foreign key constraint"
+ "%s table %s with foreign key constraint"
" failed. There is only prefix index in the referenced"
" table where the referenced columns appear"
" as the first columns near '%s'.",
operation, create_name, latest_foreign);
- break;
- }
- case DB_FOREIGN_KEY_COL_NOT_NULL: {
+ return;
+ case FK_COL_NOT_NULL:
fprintf(ef,
"%s table %s with foreign key constraint"
" failed. You have defined a SET NULL condition but "
@@ -4273,15 +4004,16 @@ dict_foreign_push_index_error(
" failed. You have defined a SET NULL condition but "
"column '%s' on index is defined as NOT NULL near '%s'.",
operation, create_name, columns[err_col], latest_foreign);
- break;
- }
- case DB_FOREIGN_KEY_COLS_NOT_EQUAL: {
+ return;
+ case FK_COLS_NOT_EQUAL:
dict_field_t* field;
const char* col_name;
field = dict_index_get_nth_field(err_index, err_col);
- col_name = dict_table_get_col_name(
- table, dict_col_get_no(field->col));
+ col_name = dict_col_is_virtual(field->col)
+ ? "(null)"
+ : dict_table_get_col_name(
+ table, dict_col_get_no(field->col));
fprintf(ef,
"%s table %s with foreign key constraint"
" failed. Field type or character set for column '%s' "
@@ -4292,40 +4024,27 @@ dict_foreign_push_index_error(
" failed. Field type or character set for column '%s' "
"does not mach referenced column '%s' near '%s'.",
operation, create_name, columns[err_col], col_name, latest_foreign);
- break;
- }
- default:
- ut_error;
+ return;
}
+ DBUG_ASSERT(!"unknown error");
}
/*********************************************************************//**
-Scans a table create SQL string and adds to the data dictionary the foreign
-key constraints declared in the string. This function should be called after
-the indexes for a table have been created. Each foreign key constraint must
-be accompanied with indexes in both participating tables. The indexes are
-allowed to contain more fields than mentioned in the constraint.
-@return error code or DB_SUCCESS */
+Scans a table create SQL string and adds to the data dictionary the foreign key
+constraints declared in the string. This function should be called after the
+indexes for a table have been created. Each foreign key constraint must be
+accompanied with indexes in bot participating tables. The indexes are allowed
+to contain more fields than mentioned in the constraint.
+@return error code or DB_SUCCESS */
static
dberr_t
dict_create_foreign_constraints_low(
-/*================================*/
- trx_t* trx, /*!< in: transaction */
- mem_heap_t* heap, /*!< in: memory heap */
- struct charset_info_st* cs,/*!< in: the character set of sql_string */
- const char* sql_string,
- /*!< in: CREATE TABLE or ALTER TABLE statement
- where foreign keys are declared like:
- FOREIGN KEY (a, b) REFERENCES table2(c, d),
- table2 can be written also with the database
- name before it: test.table2; the default
- database is the database of parameter name */
- const char* name, /*!< in: table full name in the normalized form
- database_name/table_name */
- ibool reject_fks)
- /*!< in: if TRUE, fail with error code
- DB_CANNOT_ADD_CONSTRAINT if any foreign
- keys are found. */
+ trx_t* trx,
+ mem_heap_t* heap,
+ CHARSET_INFO* cs,
+ const char* sql_string,
+ const char* name,
+ ibool reject_fks)
{
dict_table_t* table = NULL;
dict_table_t* referenced_table = NULL;
@@ -4339,7 +4058,7 @@ dict_create_foreign_constraints_low(
const char* start_of_latest_foreign = sql_string;
const char* start_of_latest_set = NULL;
FILE* ef = dict_foreign_err_file;
- ulint index_error = DB_SUCCESS;
+ fkerr_t index_error = FK_SUCCESS;
dict_index_t* err_index = NULL;
ulint err_col;
const char* constraint_name;
@@ -4363,7 +4082,7 @@ dict_create_foreign_constraints_low(
char create_name[MAX_TABLE_NAME_LEN + 1];
ut_ad(!srv_read_only_mode);
- ut_ad(mutex_own(&(dict_sys->mutex)));
+ ut_ad(mutex_own(&dict_sys->mutex));
table = dict_table_get_low(name);
/* First check if we are actually doing an ALTER TABLE, and in that
@@ -4388,7 +4107,7 @@ dict_create_foreign_constraints_low(
ptr = orig;
const char* n = create_table_name ? create_table_name : name;
char *bufend = innobase_convert_name(create_name, MAX_TABLE_NAME_LEN,
- n, strlen(n), trx->mysql_thd, TRUE);
+ n, strlen(n), trx->mysql_thd);
create_name[bufend-create_name] = '\0';
} else {
strncpy(create_name, name, sizeof create_name);
@@ -4442,22 +4161,16 @@ dict_create_foreign_constraints_low(
{
const char* n = table_to_alter
- ? table_to_alter->name : referenced_table_name;
+ ? table_to_alter->name.m_name : referenced_table_name;
char* bufend = innobase_convert_name(
create_name, MAX_TABLE_NAME_LEN, n, strlen(n),
- trx->mysql_thd, TRUE);
+ trx->mysql_thd);
create_name[bufend-create_name]='\0';
}
if (!success) {
- mutex_enter(&dict_foreign_err_mutex);
- dict_foreign_error_report_low(ef, create_name);
- fprintf(ef,
- "%s table %s with foreign key constraint"
- " failed. Table %s not found from data dictionary."
- " Error close to %s.\n",
- operation, create_name, create_name, orig);
- mutex_exit(&dict_foreign_err_mutex);
+ ib::error() << "Could not find the table " << create_name << " being" << operation << " near to "
+ << orig;
ib_push_warning(trx, DB_ERROR,
"%s table %s with foreign key constraint"
@@ -4548,6 +4261,10 @@ loop:
return(DB_CANNOT_ADD_CONSTRAINT);
}
+ if (dict_foreigns_has_s_base_col(local_fk_set, table)) {
+ return(DB_NO_FK_ON_S_BASE_COL);
+ }
+
/**********************************************************/
/* The following call adds the foreign key constraints
to the data dictionary system tables on disk */
@@ -4563,6 +4280,8 @@ loop:
local_fk_set.end(),
dict_foreign_add_to_referenced_table());
local_fk_set.clear();
+
+ dict_mem_table_fill_foreign_vcol_set(table);
}
return(error);
}
@@ -4588,53 +4307,52 @@ loop:
}
if (my_isspace(cs, *ptr)) {
- ptr1 = dict_accept(cs, ptr, "IF", &success);
-
- if (success) {
- if (!my_isspace(cs, *ptr1)) {
- goto loop;
- }
- ptr1 = dict_accept(cs, ptr1, "NOT", &success);
- if (!success) {
- goto loop;
- }
- ptr1 = dict_accept(cs, ptr1, "EXISTS", &success);
- if (!success) {
- goto loop;
- }
- ptr = ptr1;
- }
+ ptr1 = dict_accept(cs, ptr, "IF", &success);
+
+ if (success) {
+ if (!my_isspace(cs, *ptr1)) {
+ goto loop;
+ }
+ ptr1 = dict_accept(cs, ptr1, "NOT", &success);
+ if (!success) {
+ goto loop;
+ }
+ ptr1 = dict_accept(cs, ptr1, "EXISTS", &success);
+ if (!success) {
+ goto loop;
+ }
+ ptr = ptr1;
+ }
}
orig = ptr;
ptr = dict_accept(cs, ptr, "(", &success);
if (!success) {
- if (constraint_name) {
- /* MySQL allows also an index id before the '('; we
- skip it */
- ptr = dict_skip_word(cs, ptr, &success);
- if (!success) {
- dict_foreign_report_syntax_err(
- "%s table %s with foreign key constraint"
- " failed. Parse error in '%s'"
- " near '%s'.\n",
- operation, create_name, start_of_latest_foreign, orig);
-
- ib_push_warning(trx, DB_CANNOT_ADD_CONSTRAINT,
- "%s table %s with foreign key constraint"
- " failed. Parse error in '%s'"
- " near '%s'.",
- operation, create_name, start_of_latest_foreign, orig);
- return(DB_CANNOT_ADD_CONSTRAINT);
- }
- }
- else {
- while (my_isspace(cs, *ptr)) {
- ptr++;
- }
+ if (constraint_name) {
+ /* MySQL allows also an index id before the '('; we
+ skip it */
+ ptr = dict_skip_word(cs, ptr, &success);
+ if (!success) {
+ dict_foreign_report_syntax_err(
+ "%s table %s with foreign key constraint"
+ " failed. Parse error in '%s'"
+ " near '%s'.\n",
+ operation, create_name, start_of_latest_foreign, orig);
+
+ ib_push_warning(trx, DB_CANNOT_ADD_CONSTRAINT,
+ "%s table %s with foreign key constraint"
+ " failed. Parse error in '%s'"
+ " near '%s'.",
+ operation, create_name, start_of_latest_foreign, orig);
+ return(DB_CANNOT_ADD_CONSTRAINT);
+ }
+ } else {
+ while (my_isspace(cs, *ptr)) {
+ ptr++;
+ }
- ptr = dict_scan_id(cs, ptr, heap,
+ ptr = dict_scan_id(cs, ptr, heap,
&constraint_name, FALSE, FALSE);
}
@@ -4717,12 +4435,11 @@ col_loop1:
mutex_enter(&dict_foreign_err_mutex);
dict_foreign_error_report_low(ef, create_name);
fputs("There is no index in table ", ef);
- ut_print_name(ef, NULL, TRUE, create_name);
+ ut_print_name(ef, NULL, create_name);
fprintf(ef, " where the columns appear\n"
- "as the first columns. Constraint:\n%s\n"
- "See " REFMAN "innodb-foreign-key-constraints.html\n"
- "for correct foreign key definition.\n",
- start_of_latest_foreign);
+ "as the first columns. Constraint:\n%s\n%s",
+ start_of_latest_foreign,
+ FOREIGN_KEY_CONSTRAINTS_MSG);
dict_foreign_push_index_error(trx, operation, create_name, start_of_latest_foreign,
column_names, index_error, err_col, err_index, table, ef);
@@ -4748,6 +4465,40 @@ col_loop1:
return(DB_CANNOT_ADD_CONSTRAINT);
}
+ /* Don't allow foreign keys on partitioned tables yet. */
+ ptr1 = dict_scan_to(ptr, "PARTITION");
+ if (ptr1) {
+ ptr1 = dict_accept(cs, ptr1, "PARTITION", &success);
+ if (success && my_isspace(cs, *ptr1)) {
+ ptr2 = dict_accept(cs, ptr1, "BY", &success);
+ if (success) {
+ my_error(ER_FOREIGN_KEY_ON_PARTITIONED,MYF(0));
+ return(DB_CANNOT_ADD_CONSTRAINT);
+ }
+ }
+ }
+ if (dict_table_is_partition(table)) {
+ my_error(ER_FOREIGN_KEY_ON_PARTITIONED,MYF(0));
+ return(DB_CANNOT_ADD_CONSTRAINT);
+ }
+
+ /* Don't allow foreign keys on partitioned tables yet. */
+ ptr1 = dict_scan_to(ptr, "PARTITION");
+ if (ptr1) {
+ ptr1 = dict_accept(cs, ptr1, "PARTITION", &success);
+ if (success && my_isspace(cs, *ptr1)) {
+ ptr2 = dict_accept(cs, ptr1, "BY", &success);
+ if (success) {
+ my_error(ER_FOREIGN_KEY_ON_PARTITIONED,MYF(0));
+ return(DB_CANNOT_ADD_CONSTRAINT);
+ }
+ }
+ }
+ if (dict_table_is_partition(table)) {
+ my_error(ER_FOREIGN_KEY_ON_PARTITIONED,MYF(0));
+ return(DB_CANNOT_ADD_CONSTRAINT);
+ }
+
/* Let us create a constraint struct */
foreign = dict_mem_foreign_create();
@@ -4760,19 +4511,19 @@ col_loop1:
same MySQL 'database' as the table itself. We store the name
to foreign->id. */
- db_len = dict_get_db_name_len(table->name);
+ db_len = dict_get_db_name_len(table->name.m_name);
foreign->id = static_cast<char*>(mem_heap_alloc(
foreign->heap, db_len + strlen(constraint_name) + 2));
- ut_memcpy(foreign->id, table->name, db_len);
+ ut_memcpy(foreign->id, table->name.m_name, db_len);
foreign->id[db_len] = '/';
strcpy(foreign->id + db_len + 1, constraint_name);
}
if (foreign->id == NULL) {
- error = dict_create_add_foreign_id(&number,
- table->name, foreign);
+ error = dict_create_add_foreign_id(
+ &number, table->name.m_name, foreign);
if (error != DB_SUCCESS) {
dict_foreign_free(foreign);
return(error);
@@ -4790,7 +4541,7 @@ col_loop1:
foreign->foreign_table = table;
foreign->foreign_table_name = mem_heap_strdup(
- foreign->heap, table->name);
+ foreign->heap, table->name.m_name);
dict_mem_foreign_table_name_lookup_set(foreign, TRUE);
foreign->foreign_index = index;
@@ -4801,9 +4552,7 @@ col_loop1:
for (i = 0; i < foreign->n_fields; i++) {
foreign->foreign_col_names[i] = mem_heap_strdup(
- foreign->heap,
- dict_table_get_col_name(table,
- dict_col_get_no(columns[i])));
+ foreign->heap, column_names[i]);
}
ptr = dict_scan_table_name(cs, ptr, &referenced_table, name,
@@ -4818,7 +4567,7 @@ col_loop1:
bufend = innobase_convert_name(buf, MAX_TABLE_NAME_LEN,
referenced_table_name, strlen(referenced_table_name),
- trx->mysql_thd, TRUE);
+ trx->mysql_thd);
buf[bufend - buf] = '\0';
ib_push_warning(trx, DB_CANNOT_ADD_CONSTRAINT,
@@ -4837,7 +4586,14 @@ col_loop1:
return(DB_CANNOT_ADD_CONSTRAINT);
}
- orig = ptr;
+ /* Don't allow foreign keys on partitioned tables yet. */
+ if (referenced_table && dict_table_is_partition(referenced_table)) {
+ /* How could one make a referenced table to be a partition? */
+ ut_ad(0);
+ my_error(ER_FOREIGN_KEY_ON_PARTITIONED,MYF(0));
+ return(DB_CANNOT_ADD_CONSTRAINT);
+ }
+
ptr = dict_accept(cs, ptr, "(", &success);
if (!success) {
@@ -5092,6 +4848,7 @@ try_find_index:
" failed. You have more than one on delete or on update clause"
" in '%s' near '%s'.\n",
operation, create_name, start_of_latest_foreign, start_of_latest_set);
+ mutex_exit(&dict_foreign_err_mutex);
ib_push_warning(trx, DB_CANNOT_ADD_CONSTRAINT,
"%s table %s with foreign key constraint"
@@ -5100,7 +4857,6 @@ try_find_index:
operation, create_name, start_of_latest_foreign, start_of_latest_set);
dict_foreign_free(foreign);
- mutex_exit(&dict_foreign_err_mutex);
return(DB_CANNOT_ADD_CONSTRAINT);
}
@@ -5114,6 +4870,7 @@ try_find_index:
ref_column_names, i,
foreign->foreign_index,
TRUE, FALSE, &index_error, &err_col, &err_index);
+
if (!index) {
mutex_enter(&dict_foreign_err_mutex);
dict_foreign_error_report_low(ef, create_name);
@@ -5129,11 +4886,9 @@ try_find_index:
"tables created with >= InnoDB-4.1.12,"
" and such columns in old tables\n"
"cannot be referenced by such columns"
- " in new tables.\n"
- "See " REFMAN
- "innodb-foreign-key-constraints.html\n"
- "for correct foreign key definition.\n",
- start_of_latest_foreign);
+ " in new tables.\n%s\n",
+ start_of_latest_foreign,
+ FOREIGN_KEY_CONSTRAINTS_MSG);
dict_foreign_push_index_error(trx, operation, create_name, start_of_latest_foreign,
column_names, index_error, err_col, err_index, referenced_table, ef);
@@ -5165,51 +4920,33 @@ try_find_index:
goto loop;
}
-/**************************************************************************
-Determines whether a string starts with the specified keyword.
-@return TRUE if str starts with keyword */
-UNIV_INTERN
-ibool
-dict_str_starts_with_keyword(
-/*=========================*/
- THD* thd, /*!< in: MySQL thread handle */
- const char* str, /*!< in: string to scan for keyword */
- const char* keyword) /*!< in: keyword to look for */
-{
- struct charset_info_st* cs = innobase_get_charset(thd);
- ibool success;
-
- dict_accept(cs, str, keyword, &success);
- return(success);
-}
+/** Scans a table create SQL string and adds to the data dictionary
+the foreign key constraints declared in the string. This function
+should be called after the indexes for a table have been created.
+Each foreign key constraint must be accompanied with indexes in
+bot participating tables. The indexes are allowed to contain more
+fields than mentioned in the constraint.
-/*********************************************************************//**
-Scans a table create SQL string and adds to the data dictionary the foreign
-key constraints declared in the string. This function should be called after
-the indexes for a table have been created. Each foreign key constraint must
-be accompanied with indexes in both participating tables. The indexes are
-allowed to contain more fields than mentioned in the constraint.
-@return error code or DB_SUCCESS */
-UNIV_INTERN
+@param[in] trx transaction
+@param[in] sql_string table create statement where
+ foreign keys are declared like:
+ FOREIGN KEY (a, b) REFERENCES table2(c, d),
+ table2 can be written also with the database
+ name before it: test.table2; the default
+ database id the database of parameter name
+@param[in] sql_length length of sql_string
+@param[in] name table full name in normalized form
+@param[in] reject_fks if TRUE, fail with error code
+ DB_CANNOT_ADD_CONSTRAINT if any
+ foreign keys are found.
+@return error code or DB_SUCCESS */
dberr_t
dict_create_foreign_constraints(
-/*============================*/
- trx_t* trx, /*!< in: transaction */
- const char* sql_string, /*!< in: table create statement where
- foreign keys are declared like:
- FOREIGN KEY (a, b) REFERENCES
- table2(c, d), table2 can be written
- also with the database
- name before it: test.table2; the
- default database id the database of
- parameter name */
- size_t sql_length, /*!< in: length of sql_string */
- const char* name, /*!< in: table full name in the
- normalized form
- database_name/table_name */
- ibool reject_fks) /*!< in: if TRUE, fail with error
- code DB_CANNOT_ADD_CONSTRAINT if
- any foreign keys are found. */
+ trx_t* trx,
+ const char* sql_string,
+ size_t sql_length,
+ const char* name,
+ ibool reject_fks)
{
char* str;
dberr_t err;
@@ -5222,11 +4959,11 @@ dict_create_foreign_constraints(
heap = mem_heap_create(10000);
err = dict_create_foreign_constraints_low(
- trx, heap, innobase_get_charset(trx->mysql_thd), str, name,
- reject_fks);
+ trx, heap, innobase_get_charset(trx->mysql_thd),
+ str, name, reject_fks);
mem_heap_free(heap);
- mem_free(str);
+ ut_free(str);
return(err);
}
@@ -5235,7 +4972,6 @@ dict_create_foreign_constraints(
Parses the CONSTRAINT id's to be dropped in an ALTER TABLE statement.
@return DB_SUCCESS or DB_CANNOT_DROP_CONSTRAINT if syntax error or the
constraint id does not match */
-UNIV_INTERN
dberr_t
dict_foreign_parse_drop_constraints(
/*================================*/
@@ -5252,9 +4988,9 @@ dict_foreign_parse_drop_constraints(
char* str;
size_t len;
const char* ptr;
- const char* ptr1;
+ const char* ptr1;
const char* id;
- struct charset_info_st* cs;
+ CHARSET_INFO* cs;
ut_a(trx->mysql_thd);
@@ -5265,18 +5001,18 @@ dict_foreign_parse_drop_constraints(
*constraints_to_drop = static_cast<const char**>(
mem_heap_alloc(heap, 1000 * sizeof(char*)));
- ptr = innobase_get_stmt(trx->mysql_thd, &len);
+ ptr = innobase_get_stmt_unsafe(trx->mysql_thd, &len);
str = dict_strip_comments(ptr, len);
ptr = str;
- ut_ad(mutex_own(&(dict_sys->mutex)));
+ ut_ad(mutex_own(&dict_sys->mutex));
loop:
ptr = dict_scan_to(ptr, "DROP");
if (*ptr == '\0') {
- mem_free(str);
+ ut_free(str);
return(DB_SUCCESS);
}
@@ -5305,11 +5041,10 @@ loop:
ptr1 = dict_accept(cs, ptr, "IF", &success);
if (success && my_isspace(cs, *ptr1)) {
- ptr1 = dict_accept(cs, ptr1, "EXISTS", &success);
- if (success) {
-
- ptr = ptr1;
- }
+ ptr1 = dict_accept(cs, ptr1, "EXISTS", &success);
+ if (success) {
+ ptr = ptr1;
+ }
}
ptr = dict_scan_id(cs, ptr, heap, &id, FALSE, TRUE);
@@ -5334,19 +5069,16 @@ loop:
mutex_enter(&dict_foreign_err_mutex);
rewind(ef);
ut_print_timestamp(ef);
- fputs(" Error in dropping of a foreign key "
- "constraint of table ", ef);
- ut_print_name(ef, NULL, TRUE, table->name);
- fputs(",\nin SQL command\n", ef);
- fputs(str, ef);
- fputs("\nCannot find a constraint with the "
- "given id ", ef);
- ut_print_name(ef, NULL, FALSE, id);
- fputs(".\n", ef);
+ fputs(" Error in dropping of a foreign key"
+ " constraint of table ", ef);
+ ut_print_name(ef, NULL, table->name.m_name);
+ fprintf(ef, ",\nin SQL command\n%s"
+ "\nCannot find a constraint with the"
+ " given id %s.\n", str, id);
mutex_exit(&dict_foreign_err_mutex);
}
- mem_free(str);
+ ut_free(str);
return(DB_CANNOT_DROP_CONSTRAINT);
}
@@ -5362,13 +5094,13 @@ syntax_error:
ut_print_timestamp(ef);
fputs(" Syntax error in dropping of a"
" foreign key constraint of table ", ef);
- ut_print_name(ef, NULL, TRUE, table->name);
+ ut_print_name(ef, NULL, table->name.m_name);
fprintf(ef, ",\n"
"close to:\n%s\n in SQL command\n%s\n", ptr, str);
mutex_exit(&dict_foreign_err_mutex);
}
- mem_free(str);
+ ut_free(str);
return(DB_CANNOT_DROP_CONSTRAINT);
}
@@ -5378,14 +5110,13 @@ syntax_error:
/**********************************************************************//**
Returns an index object if it is found in the dictionary cache.
Assumes that dict_sys->mutex is already being held.
-@return index, NULL if not found */
-UNIV_INTERN
+@return index, NULL if not found */
dict_index_t*
dict_index_get_if_in_cache_low(
/*===========================*/
index_id_t index_id) /*!< in: index id */
{
- ut_ad(mutex_own(&(dict_sys->mutex)));
+ ut_ad(mutex_own(&dict_sys->mutex));
return(dict_index_find_on_id_low(index_id));
}
@@ -5393,8 +5124,7 @@ dict_index_get_if_in_cache_low(
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
/**********************************************************************//**
Returns an index object if it is found in the dictionary cache.
-@return index, NULL if not found */
-UNIV_INTERN
+@return index, NULL if not found */
dict_index_t*
dict_index_get_if_in_cache(
/*=======================*/
@@ -5406,11 +5136,11 @@ dict_index_get_if_in_cache(
return(NULL);
}
- mutex_enter(&(dict_sys->mutex));
+ mutex_enter(&dict_sys->mutex);
index = dict_index_get_if_in_cache_low(index_id);
- mutex_exit(&(dict_sys->mutex));
+ mutex_exit(&dict_sys->mutex);
return(index);
}
@@ -5420,8 +5150,7 @@ dict_index_get_if_in_cache(
/**********************************************************************//**
Checks that a tuple has n_fields_cmp value in a sensible range, so that
no comparison can occur with the page number field in a node pointer.
-@return TRUE if ok */
-UNIV_INTERN
+@return TRUE if ok */
ibool
dict_index_check_search_tuple(
/*==========================*/
@@ -5436,8 +5165,7 @@ dict_index_check_search_tuple(
/**********************************************************************//**
Builds a node pointer out of a physical record and a page number.
-@return own: node pointer */
-UNIV_INTERN
+@return own: node pointer */
dtuple_t*
dict_index_build_node_ptr(
/*======================*/
@@ -5456,7 +5184,7 @@ dict_index_build_node_ptr(
byte* buf;
ulint n_unique;
- if (dict_index_is_univ(index)) {
+ if (dict_index_is_ibuf(index)) {
/* In a universal index tree, we take the whole record as
the node pointer if the record is on the leaf level,
on non-leaf levels we remove the last field, which
@@ -5470,7 +5198,7 @@ dict_index_build_node_ptr(
n_unique--;
}
} else {
- n_unique = dict_index_get_n_unique_in_tree(index);
+ n_unique = dict_index_get_n_unique_in_tree_nonleaf(index);
}
tuple = dtuple_create(heap, n_unique + 1);
@@ -5494,7 +5222,7 @@ dict_index_build_node_ptr(
dtype_set(dfield_get_type(field), DATA_SYS_CHILD, DATA_NOT_NULL, 4);
- rec_copy_prefix_to_dtuple(tuple, rec, index, n_unique, heap);
+ rec_copy_prefix_to_dtuple(tuple, rec, index, !level, n_unique, heap);
dtuple_set_info_bits(tuple, dtuple_get_info_bits(tuple)
| REC_STATUS_NODE_PTR);
@@ -5506,8 +5234,7 @@ dict_index_build_node_ptr(
/**********************************************************************//**
Copies an initial segment of a physical record, long enough to specify an
index entry uniquely.
-@return pointer to the prefix record */
-UNIV_INTERN
+@return pointer to the prefix record */
rec_t*
dict_index_copy_rec_order_prefix(
/*=============================*/
@@ -5523,28 +5250,43 @@ dict_index_copy_rec_order_prefix(
UNIV_PREFETCH_R(rec);
- if (dict_index_is_univ(index)) {
+ if (dict_index_is_ibuf(index)) {
ut_a(!dict_table_is_comp(index->table));
n = rec_get_n_fields_old(rec);
} else {
- n = dict_index_get_n_unique_in_tree(index);
+ if (page_rec_is_leaf(rec)) {
+ n = dict_index_get_n_unique_in_tree(index);
+ } else {
+ n = dict_index_get_n_unique_in_tree_nonleaf(index);
+ /* For internal node of R-tree, since we need to
+ compare the page no field, so, we need to copy this
+ field as well. */
+ if (dict_index_is_spatial(index)) {
+ n++;
+ }
+ }
}
*n_fields = n;
return(rec_copy_prefix_to_buf(rec, index, n, buf, buf_size));
}
-/**********************************************************************//**
-Builds a typed data tuple out of a physical record.
-@return own: data tuple */
-UNIV_INTERN
+/** Convert a physical record into a search tuple.
+@param[in] rec index record (not necessarily in an index page)
+@param[in] index index
+@param[in] leaf whether rec is in a leaf page
+@param[in] n_fields number of data fields
+@param[in,out] heap memory heap for allocation
+@return own: data tuple */
dtuple_t*
-dict_index_build_data_tuple(
-/*========================*/
- dict_index_t* index, /*!< in: index tree */
- rec_t* rec, /*!< in: record for which to build data tuple */
- ulint n_fields,/*!< in: number of data fields */
- mem_heap_t* heap) /*!< in: memory heap where tuple created */
+dict_index_build_data_tuple_func(
+ const rec_t* rec,
+ const dict_index_t* index,
+#ifdef UNIV_DEBUG
+ bool leaf,
+#endif /* UNIV_DEBUG */
+ ulint n_fields,
+ mem_heap_t* heap)
{
dtuple_t* tuple;
@@ -5555,7 +5297,7 @@ dict_index_build_data_tuple(
dict_index_copy_types(tuple, index, n_fields);
- rec_copy_prefix_to_dtuple(tuple, rec, index, n_fields, heap);
+ rec_copy_prefix_to_dtuple(tuple, rec, index, leaf, n_fields, heap);
ut_ad(dtuple_check_typed(tuple));
@@ -5564,7 +5306,6 @@ dict_index_build_data_tuple(
/*********************************************************************//**
Calculates the minimum record length in an index. */
-UNIV_INTERN
ulint
dict_index_calc_min_rec_len(
/*========================*/
@@ -5614,184 +5355,8 @@ dict_index_calc_min_rec_len(
}
/**********************************************************************//**
-Prints info of a foreign key constraint. */
-static
-void
-dict_foreign_print_low(
-/*===================*/
- dict_foreign_t* foreign) /*!< in: foreign key constraint */
-{
- ulint i;
-
- ut_ad(mutex_own(&(dict_sys->mutex)));
-
- fprintf(stderr, " FOREIGN KEY CONSTRAINT %s: %s (",
- foreign->id, foreign->foreign_table_name);
-
- for (i = 0; i < foreign->n_fields; i++) {
- fprintf(stderr, " %s", foreign->foreign_col_names[i]);
- }
-
- fprintf(stderr, " )\n"
- " REFERENCES %s (",
- foreign->referenced_table_name);
-
- for (i = 0; i < foreign->n_fields; i++) {
- fprintf(stderr, " %s", foreign->referenced_col_names[i]);
- }
-
- fputs(" )\n", stderr);
-}
-
-/**********************************************************************//**
-Prints a table data. */
-UNIV_INTERN
-void
-dict_table_print(
-/*=============*/
- dict_table_t* table) /*!< in: table */
-{
- dict_index_t* index;
- ulint i;
-
- ut_ad(mutex_own(&(dict_sys->mutex)));
-
- dict_table_stats_lock(table, RW_X_LATCH);
-
- if (!table->stat_initialized) {
- dict_stats_update_transient(table);
- }
-
- fprintf(stderr,
- "--------------------------------------\n"
- "TABLE: name %s, id %llu, flags %lx, columns %lu,"
- " indexes %lu, appr.rows " UINT64PF "\n"
- " COLUMNS: ",
- table->name,
- (ullint) table->id,
- (ulong) table->flags,
- (ulong) table->n_cols,
- (ulong) UT_LIST_GET_LEN(table->indexes),
- table->stat_n_rows);
-
- for (i = 0; i < (ulint) table->n_cols; i++) {
- dict_col_print_low(table, dict_table_get_nth_col(table, i));
- fputs("; ", stderr);
- }
-
- putc('\n', stderr);
-
- index = UT_LIST_GET_FIRST(table->indexes);
-
- while (index != NULL) {
- dict_index_print_low(index);
- index = UT_LIST_GET_NEXT(indexes, index);
- }
-
- dict_table_stats_unlock(table, RW_X_LATCH);
-
- std::for_each(table->foreign_set.begin(),
- table->foreign_set.end(),
- dict_foreign_print_low);
-
- std::for_each(table->referenced_set.begin(),
- table->referenced_set.end(),
- dict_foreign_print_low);
-}
-
-/**********************************************************************//**
-Prints a column data. */
-static
-void
-dict_col_print_low(
-/*===============*/
- const dict_table_t* table, /*!< in: table */
- const dict_col_t* col) /*!< in: column */
-{
- dtype_t type;
-
- ut_ad(mutex_own(&(dict_sys->mutex)));
-
- dict_col_copy_type(col, &type);
- fprintf(stderr, "%s: ", dict_table_get_col_name(table,
- dict_col_get_no(col)));
-
- dtype_print(&type);
-}
-
-/**********************************************************************//**
-Prints an index data. */
-static
-void
-dict_index_print_low(
-/*=================*/
- dict_index_t* index) /*!< in: index */
-{
- ib_int64_t n_vals;
- ulint i;
-
- ut_a(index->table->stat_initialized);
-
- ut_ad(mutex_own(&(dict_sys->mutex)));
-
- if (index->n_user_defined_cols > 0) {
- n_vals = index->stat_n_diff_key_vals[
- index->n_user_defined_cols - 1];
- } else {
- n_vals = index->stat_n_diff_key_vals[0];
- }
-
- fprintf(stderr,
- " INDEX: name %s, id %llu, fields %lu/%lu,"
- " uniq %lu, type %lu\n"
- " root page %lu, appr.key vals %lu,"
- " leaf pages %lu, size pages %lu\n"
- " FIELDS: ",
- index->name,
- (ullint) index->id,
- (ulong) index->n_user_defined_cols,
- (ulong) index->n_fields,
- (ulong) index->n_uniq,
- (ulong) index->type,
- (ulong) index->page,
- (ulong) n_vals,
- (ulong) index->stat_n_leaf_pages,
- (ulong) index->stat_index_size);
-
- for (i = 0; i < index->n_fields; i++) {
- dict_field_print_low(dict_index_get_nth_field(index, i));
- }
-
- putc('\n', stderr);
-
-#ifdef UNIV_BTR_PRINT
- btr_print_size(index);
-
- btr_print_index(index, 7);
-#endif /* UNIV_BTR_PRINT */
-}
-
-/**********************************************************************//**
-Prints a field data. */
-static
-void
-dict_field_print_low(
-/*=================*/
- const dict_field_t* field) /*!< in: field */
-{
- ut_ad(mutex_own(&(dict_sys->mutex)));
-
- fprintf(stderr, " %s", field->name);
-
- if (field->prefix_len != 0) {
- fprintf(stderr, "(%lu)", (ulong) field->prefix_len);
- }
-}
-
-/**********************************************************************//**
Outputs info on a foreign key of a table in a format suitable for
CREATE TABLE. */
-UNIV_INTERN
std::string
dict_print_info_on_foreign_key_in_create_format(
/*============================================*/
@@ -5822,11 +5387,12 @@ dict_print_info_on_foreign_key_in_create_format(
str.append(" CONSTRAINT ");
- str.append(ut_get_name(trx, FALSE, stripped_id));
+ str.append(innobase_quote_identifier(trx, stripped_id));
str.append(" FOREIGN KEY (");
for (i = 0;;) {
- str.append(ut_get_name(trx, FALSE, foreign->foreign_col_names[i]));
+ str.append(innobase_quote_identifier(trx, foreign->foreign_col_names[i]));
+
if (++i < foreign->n_fields) {
str.append(", ");
} else {
@@ -5839,18 +5405,18 @@ dict_print_info_on_foreign_key_in_create_format(
if (dict_tables_have_same_db(foreign->foreign_table_name_lookup,
foreign->referenced_table_name_lookup)) {
/* Do not print the database name of the referenced table */
- str.append(ut_get_name(trx, TRUE,
+ str.append(ut_get_name(trx,
dict_remove_db_name(
foreign->referenced_table_name)));
} else {
- str.append(ut_get_name(trx, TRUE,
+ str.append(ut_get_name(trx,
foreign->referenced_table_name));
}
str.append(" (");
for (i = 0;;) {
- str.append(ut_get_name(trx, FALSE,
+ str.append(innobase_quote_identifier(trx,
foreign->referenced_col_names[i]));
if (++i < foreign->n_fields) {
@@ -5891,7 +5457,6 @@ dict_print_info_on_foreign_key_in_create_format(
/**********************************************************************//**
Outputs info on foreign keys of a table. */
-UNIV_INTERN
std::string
dict_print_info_on_foreign_keys(
/*============================*/
@@ -5905,7 +5470,7 @@ dict_print_info_on_foreign_keys(
dict_foreign_t* foreign;
std::string str;
- mutex_enter(&(dict_sys->mutex));
+ mutex_enter(&dict_sys->mutex);
for (dict_foreign_set::iterator it = table->foreign_set.begin();
it != table->foreign_set.end();
@@ -5926,12 +5491,12 @@ dict_print_info_on_foreign_keys(
str.append(" ");
}
- str.append(ut_get_name(trx, FALSE,
+ str.append(innobase_quote_identifier(trx,
foreign->foreign_col_names[i]));
}
str.append(") REFER ");
- str.append(ut_get_name(trx, TRUE,
+ str.append(ut_get_name(trx,
foreign->referenced_table_name));
str.append(")");
@@ -5939,8 +5504,8 @@ dict_print_info_on_foreign_keys(
if (i) {
str.append(" ");
}
- str.append(ut_get_name(
- trx, FALSE,
+ str.append(innobase_quote_identifier(
+ trx,
foreign->referenced_col_names[i]));
}
@@ -5972,37 +5537,20 @@ dict_print_info_on_foreign_keys(
}
}
- mutex_exit(&(dict_sys->mutex));
-
+ mutex_exit(&dict_sys->mutex);
return str;
}
-/********************************************************************//**
-Displays the names of the index and the table. */
-UNIV_INTERN
-void
-dict_index_name_print(
-/*==================*/
- FILE* file, /*!< in: output stream */
- const trx_t* trx, /*!< in: transaction */
- const dict_index_t* index) /*!< in: index to print */
-{
- fputs("index ", file);
- ut_print_name(file, trx, FALSE, index->name);
- fputs(" of table ", file);
- ut_print_name(file, trx, TRUE, index->table_name);
-}
-
-/**********************************************************************//**
-Find a table in dict_sys->table_LRU list with specified space id
+/** Given a space_id of a file-per-table tablespace, search the
+dict_sys->table_LRU list and return the dict_table_t* pointer for it.
+@param space_id Tablespace ID
@return table if found, NULL if not */
static
dict_table_t*
-dict_find_table_by_space(
-/*=====================*/
- ulint space_id) /*!< in: space ID */
+dict_find_single_table_by_space(
+ ulint space_id)
{
- dict_table_t* table;
+ dict_table_t* table;
ulint num_item;
ulint count = 0;
@@ -6018,11 +5566,14 @@ dict_find_table_by_space(
/* This function intentionally does not acquire mutex as it is used
by error handling code in deep call stack as last means to avoid
- killing the server, so it worth to risk some consequencies for
+ killing the server, so it worth to risk some consequences for
the action. */
while (table && count < num_item) {
if (table->space == space_id) {
- return(table);
+ if (dict_table_is_file_per_table(table)) {
+ return(table);
+ }
+ return(NULL);
}
table = UT_LIST_GET_NEXT(table_LRU, table);
@@ -6036,7 +5587,6 @@ dict_find_table_by_space(
Flags a table with specified space_id corrupted in the data dictionary
cache
@return TRUE if successful */
-UNIV_INTERN
ibool
dict_set_corrupted_by_space(
/*========================*/
@@ -6044,7 +5594,7 @@ dict_set_corrupted_by_space(
{
dict_table_t* table;
- table = dict_find_table_by_space(space_id);
+ table = dict_find_single_table_by_space(space_id);
if (!table) {
return(FALSE);
@@ -6059,7 +5609,7 @@ dict_set_corrupted_by_space(
}
-/** Flags a table with specified space_id encrypted in the data dictionary
+/** Flag a table with specified space_id encrypted in the data dictionary
cache
@param[in] space_id Tablespace id */
UNIV_INTERN
@@ -6068,7 +5618,7 @@ dict_set_encrypted_by_space(ulint space_id)
{
dict_table_t* table;
- table = dict_find_table_by_space(space_id);
+ table = dict_find_single_table_by_space(space_id);
if (table) {
table->file_unreadable = true;
@@ -6078,7 +5628,6 @@ dict_set_encrypted_by_space(ulint space_id)
/**********************************************************************//**
Flags an index corrupted both in the data dictionary cache
and in the SYS_INDEXES */
-UNIV_INTERN
void
dict_set_corrupted(
/*===============*/
@@ -6092,7 +5641,6 @@ dict_set_corrupted(
dtuple_t* tuple;
dfield_t* dfield;
byte* buf;
- char* table_name;
const char* status;
btr_cur_t cursor;
bool locked = RW_X_LATCH == trx->dict_operation_lock_mode;
@@ -6104,10 +5652,7 @@ dict_set_corrupted(
ut_ad(mutex_own(&dict_sys->mutex));
ut_ad(!dict_table_is_comp(dict_sys->sys_tables));
ut_ad(!dict_table_is_comp(dict_sys->sys_indexes));
-
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(sync_thread_levels_empty_except_dict());
-#endif
+ ut_ad(!sync_check_iterate(dict_sync_check()));
/* Mark the table as corrupted only if the clustered index
is corrupted */
@@ -6121,6 +5666,13 @@ dict_set_corrupted(
goto func_exit;
}
+ /* If this is read only mode, do not update SYS_INDEXES, just
+ mark it as corrupted in memory */
+ if (srv_read_only_mode) {
+ index->type |= DICT_CORRUPT;
+ goto func_exit;
+ }
+
heap = mem_heap_create(sizeof(dtuple_t) + 2 * (sizeof(dfield_t)
+ sizeof(que_fork_t) + sizeof(upd_node_t)
+ sizeof(upd_t) + 12));
@@ -6167,15 +5719,8 @@ fail:
mtr_commit(&mtr);
mem_heap_empty(heap);
- table_name = static_cast<char*>(mem_heap_alloc(heap, FN_REFLEN + 1));
- *innobase_convert_name(
- table_name, FN_REFLEN,
- index->table_name, strlen(index->table_name),
- NULL, TRUE) = 0;
-
- ib_logf(IB_LOG_LEVEL_ERROR, "%s corruption of %s in table %s in %s",
- status, index->name, table_name, ctx);
-
+ ib::error() << status << " corruption of " << index->name
+ << " in table " << index->table->name << " in " << ctx;
mem_heap_free(heap);
func_exit:
@@ -6184,19 +5729,16 @@ func_exit:
}
}
-/**********************************************************************//**
-Flags an index corrupted in the data dictionary cache only. This
+/** Flags an index corrupted in the data dictionary cache only. This
is used mostly to mark a corrupted index when index's own dictionary
-is corrupted, and we force to load such index for repair purpose */
-UNIV_INTERN
+is corrupted, and we force to load such index for repair purpose
+@param[in,out] index index which is corrupted */
void
dict_set_corrupted_index_cache_only(
-/*================================*/
- dict_index_t* index, /*!< in/out: index */
- dict_table_t* table) /*!< in/out: table */
+ dict_index_t* index)
{
ut_ad(index != NULL);
- ut_ad(table != NULL);
+ ut_ad(index->table != NULL);
ut_ad(mutex_own(&dict_sys->mutex));
ut_ad(!dict_table_is_comp(dict_sys->sys_tables));
ut_ad(!dict_table_is_comp(dict_sys->sys_indexes));
@@ -6204,24 +5746,133 @@ dict_set_corrupted_index_cache_only(
/* Mark the table as corrupted only if the clustered index
is corrupted */
if (dict_index_is_clust(index)) {
- table->corrupted = TRUE;
+ index->table->corrupted = TRUE;
}
index->type |= DICT_CORRUPT;
}
-#endif /* !UNIV_HOTBACKUP */
-/**********************************************************************//**
-Inits dict_ind_redundant and dict_ind_compact. */
-UNIV_INTERN
+/** Sets merge_threshold in the SYS_INDEXES
+@param[in,out] index index
+@param[in] merge_threshold value to set */
void
-dict_ind_init(void)
-/*===============*/
+dict_index_set_merge_threshold(
+ dict_index_t* index,
+ ulint merge_threshold)
+{
+ mem_heap_t* heap;
+ mtr_t mtr;
+ dict_index_t* sys_index;
+ dtuple_t* tuple;
+ dfield_t* dfield;
+ byte* buf;
+ btr_cur_t cursor;
+
+ ut_ad(index != NULL);
+ ut_ad(!dict_table_is_comp(dict_sys->sys_tables));
+ ut_ad(!dict_table_is_comp(dict_sys->sys_indexes));
+
+ rw_lock_x_lock(&dict_operation_lock);
+ mutex_enter(&(dict_sys->mutex));
+
+ heap = mem_heap_create(sizeof(dtuple_t) + 2 * (sizeof(dfield_t)
+ + sizeof(que_fork_t) + sizeof(upd_node_t)
+ + sizeof(upd_t) + 12));
+
+ mtr_start(&mtr);
+
+ sys_index = UT_LIST_GET_FIRST(dict_sys->sys_indexes->indexes);
+
+ /* Find the index row in SYS_INDEXES */
+ tuple = dtuple_create(heap, 2);
+
+ dfield = dtuple_get_nth_field(tuple, 0);
+ buf = static_cast<byte*>(mem_heap_alloc(heap, 8));
+ mach_write_to_8(buf, index->table->id);
+ dfield_set_data(dfield, buf, 8);
+
+ dfield = dtuple_get_nth_field(tuple, 1);
+ buf = static_cast<byte*>(mem_heap_alloc(heap, 8));
+ mach_write_to_8(buf, index->id);
+ dfield_set_data(dfield, buf, 8);
+
+ dict_index_copy_types(tuple, sys_index, 2);
+
+ btr_cur_search_to_nth_level(sys_index, 0, tuple, PAGE_CUR_GE,
+ BTR_MODIFY_LEAF,
+ &cursor, 0, __FILE__, __LINE__, &mtr);
+
+ if (cursor.up_match == dtuple_get_n_fields(tuple)
+ && rec_get_n_fields_old(btr_cur_get_rec(&cursor))
+ == DICT_NUM_FIELDS__SYS_INDEXES) {
+ ulint len;
+ byte* field = rec_get_nth_field_old(
+ btr_cur_get_rec(&cursor),
+ DICT_FLD__SYS_INDEXES__MERGE_THRESHOLD, &len);
+
+ ut_ad(len == 4);
+
+ if (len == 4) {
+ mlog_write_ulint(field, merge_threshold,
+ MLOG_4BYTES, &mtr);
+ }
+ }
+
+ mtr_commit(&mtr);
+ mem_heap_free(heap);
+
+ mutex_exit(&(dict_sys->mutex));
+ rw_lock_x_unlock(&dict_operation_lock);
+}
+
+#ifdef UNIV_DEBUG
+/** Sets merge_threshold for all indexes in the list of tables
+@param[in] list pointer to the list of tables */
+inline
+void
+dict_set_merge_threshold_list_debug(
+ UT_LIST_BASE_NODE_T(dict_table_t)* list,
+ uint merge_threshold_all)
+{
+ for (dict_table_t* table = UT_LIST_GET_FIRST(*list);
+ table != NULL;
+ table = UT_LIST_GET_NEXT(table_LRU, table)) {
+ for (dict_index_t* index = UT_LIST_GET_FIRST(table->indexes);
+ index != NULL;
+ index = UT_LIST_GET_NEXT(indexes, index)) {
+ rw_lock_x_lock(dict_index_get_lock(index));
+ index->merge_threshold = merge_threshold_all;
+ rw_lock_x_unlock(dict_index_get_lock(index));
+ }
+ }
+}
+
+/** Sets merge_threshold for all indexes in dictionary cache for debug.
+@param[in] merge_threshold_all value to set for all indexes */
+void
+dict_set_merge_threshold_all_debug(
+ uint merge_threshold_all)
+{
+ mutex_enter(&dict_sys->mutex);
+
+ dict_set_merge_threshold_list_debug(
+ &dict_sys->table_LRU, merge_threshold_all);
+ dict_set_merge_threshold_list_debug(
+ &dict_sys->table_non_LRU, merge_threshold_all);
+
+ mutex_exit(&dict_sys->mutex);
+}
+
+#endif /* UNIV_DEBUG */
+
+/** Initialize dict_ind_redundant. */
+void
+dict_ind_init()
{
dict_table_t* table;
/* create dummy table and index for REDUNDANT infimum and supremum */
- table = dict_mem_table_create("SYS_DUMMY1", DICT_HDR_SPACE, 1, 0, 0);
+ table = dict_mem_table_create("SYS_DUMMY1", DICT_HDR_SPACE, 1, 0, 0, 0);
dict_mem_table_add_col(table, NULL, NULL, DATA_CHAR,
DATA_ENGLISH | DATA_NOT_NULL, 8);
@@ -6230,61 +5881,33 @@ dict_ind_init(void)
dict_index_add_col(dict_ind_redundant, table,
dict_table_get_nth_col(table, 0), 0);
dict_ind_redundant->table = table;
-
- /* create dummy table and index for COMPACT infimum and supremum */
- table = dict_mem_table_create("SYS_DUMMY2",
- DICT_HDR_SPACE, 1,
- DICT_TF_COMPACT, 0);
- dict_mem_table_add_col(table, NULL, NULL, DATA_CHAR,
- DATA_ENGLISH | DATA_NOT_NULL, 8);
- dict_ind_compact = dict_mem_index_create("SYS_DUMMY2", "SYS_DUMMY2",
- DICT_HDR_SPACE, 0, 1);
- dict_index_add_col(dict_ind_compact, table,
- dict_table_get_nth_col(table, 0), 0);
- dict_ind_compact->table = table;
-
/* avoid ut_ad(index->cached) in dict_index_get_n_unique_in_tree */
- dict_ind_redundant->cached = dict_ind_compact->cached = TRUE;
+ dict_ind_redundant->cached = TRUE;
}
-#ifndef UNIV_HOTBACKUP
-/**********************************************************************//**
-Frees dict_ind_redundant and dict_ind_compact. */
-static
+/** Free dict_ind_redundant. */
void
-dict_ind_free(void)
-/*===============*/
+dict_ind_free()
{
- dict_table_t* table;
-
- table = dict_ind_compact->table;
- dict_mem_index_free(dict_ind_compact);
- dict_ind_compact = NULL;
- dict_mem_table_free(table);
-
- table = dict_ind_redundant->table;
+ dict_table_t* table = dict_ind_redundant->table;
dict_mem_index_free(dict_ind_redundant);
dict_ind_redundant = NULL;
dict_mem_table_free(table);
}
-/**********************************************************************//**
-Get index by name
-@return index, NULL if does not exist */
-UNIV_INTERN
+/** Get an index by name.
+@param[in] table the table where to look for the index
+@param[in] name the index name to look for
+@return index, NULL if does not exist */
dict_index_t*
-dict_table_get_index_on_name(
-/*=========================*/
- dict_table_t* table, /*!< in: table */
- const char* name) /*!< in: name of the index to find */
+dict_table_get_index_on_name(dict_table_t* table, const char* name)
{
dict_index_t* index;
index = dict_table_get_first_index(table);
while (index != NULL) {
- if (innobase_strcasecmp(index->name, name) == 0) {
-
+ if (index->is_committed() && !strcmp(index->name, name)) {
return(index);
}
@@ -6298,7 +5921,6 @@ dict_table_get_index_on_name(
Replace the index passed in with another equivalent index in the
foreign key lists of the table.
@return whether all replacements were found */
-UNIV_INTERN
bool
dict_foreign_replace_index(
/*=======================*/
@@ -6369,42 +5991,9 @@ dict_foreign_replace_index(
return(found);
}
-/**********************************************************************//**
-In case there is more than one index with the same name return the index
-with the min(id).
-@return index, NULL if does not exist */
-UNIV_INTERN
-dict_index_t*
-dict_table_get_index_on_name_and_min_id(
-/*=====================================*/
- dict_table_t* table, /*!< in: table */
- const char* name) /*!< in: name of the index to find */
-{
- dict_index_t* index;
- dict_index_t* min_index; /* Index with matching name and min(id) */
-
- min_index = NULL;
- index = dict_table_get_first_index(table);
-
- while (index != NULL) {
- if (ut_strcmp(index->name, name) == 0) {
- if (!min_index || index->id < min_index->id) {
-
- min_index = index;
- }
- }
-
- index = dict_table_get_next_index(index);
- }
-
- return(min_index);
-
-}
-
#ifdef UNIV_DEBUG
/**********************************************************************//**
Check for duplicate index entries in a table [using the index name] */
-UNIV_INTERN
void
dict_table_check_for_dup_indexes(
/*=============================*/
@@ -6427,7 +6016,7 @@ dict_table_check_for_dup_indexes(
index1 = UT_LIST_GET_FIRST(table->indexes);
do {
- if (*index1->name == TEMP_INDEX_PREFIX) {
+ if (!index1->is_committed()) {
ut_a(!dict_index_is_clust(index1));
switch (check) {
@@ -6452,7 +6041,9 @@ dict_table_check_for_dup_indexes(
for (index2 = UT_LIST_GET_NEXT(indexes, index1);
index2 != NULL;
index2 = UT_LIST_GET_NEXT(indexes, index2)) {
- ut_ad(ut_strcmp(index1->name, index2->name));
+ ut_ad(index1->is_committed()
+ != index2->is_committed()
+ || strcmp(index1->name, index2->name) != 0);
}
index1 = UT_LIST_GET_NEXT(indexes, index1);
@@ -6478,7 +6069,6 @@ types. The order of the columns does not matter.
The caller must own the dictionary mutex.
dict_table_schema_check() @{
@return DB_SUCCESS if the table exists and contains the necessary columns */
-UNIV_INTERN
dberr_t
dict_table_schema_check(
/*====================*/
@@ -6520,10 +6110,10 @@ dict_table_schema_check(
}
if (should_print) {
- ut_snprintf(errstr, errstr_sz,
+ snprintf(errstr, errstr_sz,
"Table %s not found.",
ut_format_name(req_schema->table_name,
- TRUE, buf, sizeof(buf)));
+ buf, sizeof(buf)));
return(DB_TABLE_NOT_FOUND);
} else {
return(DB_STATS_DO_NOT_EXIST);
@@ -6534,24 +6124,23 @@ dict_table_schema_check(
fil_space_get(table->space) == NULL) {
/* missing tablespace */
- ut_snprintf(errstr, errstr_sz,
+ snprintf(errstr, errstr_sz,
"Tablespace for table %s is missing.",
ut_format_name(req_schema->table_name,
- TRUE, buf, sizeof(buf)));
+ buf, sizeof(buf)));
return(DB_TABLE_NOT_FOUND);
}
- if ((ulint) table->n_def - DATA_N_SYS_COLS != req_schema->n_cols) {
- /* the table has a different number of columns than
- required */
-
- ut_snprintf(errstr, errstr_sz,
- "%s has %d columns but should have %lu.",
- ut_format_name(req_schema->table_name,
- TRUE, buf, sizeof(buf)),
- table->n_def - DATA_N_SYS_COLS,
- req_schema->n_cols);
+ if (ulint(table->n_def) - DATA_N_SYS_COLS != req_schema->n_cols) {
+ /* the table has a different number of columns than required */
+ snprintf(errstr, errstr_sz,
+ "%s has " ULINTPF " columns but should have "
+ ULINTPF ".",
+ ut_format_name(req_schema->table_name, buf,
+ sizeof buf),
+ ulint(table->n_def) - DATA_N_SYS_COLS,
+ req_schema->n_cols);
return(DB_ERROR);
}
@@ -6562,60 +6151,45 @@ dict_table_schema_check(
be O(n_cols) if the columns are in the same order in both arrays. */
for (i = 0; i < req_schema->n_cols; i++) {
- ulint j;
-
- /* check if i'th column is the same in both arrays */
- if (innobase_strcasecmp(req_schema->columns[i].name,
- dict_table_get_col_name(table, i)) == 0) {
-
- /* we found the column in table->cols[] quickly */
- j = i;
- } else {
-
- /* columns in both arrays are not in the same order,
- do a full scan of the second array */
- for (j = 0; j < table->n_def; j++) {
- const char* name;
+ ulint j = dict_table_has_column(
+ table, req_schema->columns[i].name, i);
- name = dict_table_get_col_name(table, j);
+ if (j == table->n_def) {
- if (innobase_strcasecmp(name,
- req_schema->columns[i].name) == 0) {
-
- /* found the column on j'th
- position */
- break;
- }
- }
-
- if (j == table->n_def) {
-
- ut_snprintf(errstr, errstr_sz,
- "required column %s "
- "not found in table %s.",
- req_schema->columns[i].name,
- ut_format_name(
- req_schema->table_name,
- TRUE, buf, sizeof(buf)));
+ snprintf(errstr, errstr_sz,
+ "required column %s"
+ " not found in table %s.",
+ req_schema->columns[i].name,
+ ut_format_name(
+ req_schema->table_name,
+ buf, sizeof(buf)));
- return(DB_ERROR);
- }
+ return(DB_ERROR);
}
/* we found a column with the same name on j'th position,
compare column types and flags */
/* check length for exact match */
- if (req_schema->columns[i].len != table->cols[j].len) {
-
+ if (req_schema->columns[i].len == table->cols[j].len) {
+ } else if (!strcmp(req_schema->table_name, TABLE_STATS_NAME)
+ || !strcmp(req_schema->table_name,
+ INDEX_STATS_NAME)) {
+ ut_ad(table->cols[j].len < req_schema->columns[i].len);
+ ib::warn() << "Table " << req_schema->table_name
+ << " has length mismatch in the"
+ << " column name "
+ << req_schema->columns[i].name
+ << ". Please run mysql_upgrade";
+ } else {
CREATE_TYPES_NAMES();
- ut_snprintf(errstr, errstr_sz,
- "Column %s in table %s is %s "
- "but should be %s (length mismatch).",
+ snprintf(errstr, errstr_sz,
+ "Column %s in table %s is %s"
+ " but should be %s (length mismatch).",
req_schema->columns[i].name,
ut_format_name(req_schema->table_name,
- TRUE, buf, sizeof(buf)),
+ buf, sizeof(buf)),
actual_type, req_type);
return(DB_ERROR);
@@ -6634,12 +6208,12 @@ dict_table_schema_check(
{
CREATE_TYPES_NAMES();
- ut_snprintf(errstr, errstr_sz,
- "Column %s in table %s is %s "
- "but should be %s (type mismatch).",
+ snprintf(errstr, errstr_sz,
+ "Column %s in table %s is %s"
+ " but should be %s (type mismatch).",
req_schema->columns[i].name,
ut_format_name(req_schema->table_name,
- TRUE, buf, sizeof(buf)),
+ buf, sizeof(buf)),
actual_type, req_type);
return(DB_ERROR);
@@ -6653,12 +6227,12 @@ dict_table_schema_check(
CREATE_TYPES_NAMES();
- ut_snprintf(errstr, errstr_sz,
- "Column %s in table %s is %s "
- "but should be %s (flags mismatch).",
+ snprintf(errstr, errstr_sz,
+ "Column %s in table %s is %s"
+ " but should be %s (flags mismatch).",
req_schema->columns[i].name,
ut_format_name(req_schema->table_name,
- TRUE, buf, sizeof(buf)),
+ buf, sizeof(buf)),
actual_type, req_type);
return(DB_ERROR);
@@ -6666,25 +6240,25 @@ dict_table_schema_check(
}
if (req_schema->n_foreign != table->foreign_set.size()) {
- ut_snprintf(
+ snprintf(
errstr, errstr_sz,
"Table %s has " ULINTPF " foreign key(s) pointing"
- " to other tables, but it must have %lu.",
+ " to other tables, but it must have " ULINTPF ".",
ut_format_name(req_schema->table_name,
- TRUE, buf, sizeof(buf)),
+ buf, sizeof(buf)),
static_cast<ulint>(table->foreign_set.size()),
req_schema->n_foreign);
return(DB_ERROR);
}
if (req_schema->n_referenced != table->referenced_set.size()) {
- ut_snprintf(
+ snprintf(
errstr, errstr_sz,
"There are " ULINTPF " foreign key(s) pointing to %s, "
- "but there must be %lu.",
+ "but there must be " ULINTPF ".",
static_cast<ulint>(table->referenced_set.size()),
ut_format_name(req_schema->table_name,
- TRUE, buf, sizeof(buf)),
+ buf, sizeof(buf)),
req_schema->n_referenced);
return(DB_ERROR);
}
@@ -6698,7 +6272,6 @@ Converts a database and table name from filesystem encoding
(e.g. d@i1b/a@q1b@1Kc, same format as used in dict_table_t::name) in two
strings in UTF8 encoding (e.g. dцb and aюbØc). The output buffers must be
at least MAX_DB_UTF8_LEN and MAX_TABLE_UTF8_LEN bytes. */
-UNIV_INTERN
void
dict_fs2utf8(
/*=========*/
@@ -6721,8 +6294,8 @@ dict_fs2utf8(
db[db_len] = '\0';
strconvert(
- &my_charset_filename, db, db_len, system_charset_info,
- db_utf8, static_cast<uint>(db_utf8_size), &errors);
+ &my_charset_filename, db, uint(db_len), system_charset_info,
+ db_utf8, uint(db_utf8_size), &errors);
/* convert each # to @0023 in table name and store the result in buf */
const char* table = dict_remove_db_name(db_and_table);
@@ -6747,19 +6320,66 @@ dict_fs2utf8(
errors = 0;
strconvert(
- &my_charset_filename, buf, (uint) (buf_p - buf), system_charset_info,
- table_utf8, static_cast<uint>(table_utf8_size),
+ &my_charset_filename, buf, (uint) (buf_p - buf),
+ system_charset_info,
+ table_utf8, uint(table_utf8_size),
&errors);
if (errors != 0) {
- ut_snprintf(table_utf8, table_utf8_size, "%s%s",
+ snprintf(table_utf8, table_utf8_size, "%s%s",
srv_mysql50_table_name_prefix, table);
}
}
+/** Resize the hash tables besed on the current buffer pool size. */
+void
+dict_resize()
+{
+ dict_table_t* table;
+
+ mutex_enter(&dict_sys->mutex);
+
+ /* all table entries are in table_LRU and table_non_LRU lists */
+ hash_table_free(dict_sys->table_hash);
+ hash_table_free(dict_sys->table_id_hash);
+
+ dict_sys->table_hash = hash_create(
+ buf_pool_get_curr_size()
+ / (DICT_POOL_PER_TABLE_HASH * UNIV_WORD_SIZE));
+
+ dict_sys->table_id_hash = hash_create(
+ buf_pool_get_curr_size()
+ / (DICT_POOL_PER_TABLE_HASH * UNIV_WORD_SIZE));
+
+ for (table = UT_LIST_GET_FIRST(dict_sys->table_LRU); table;
+ table = UT_LIST_GET_NEXT(table_LRU, table)) {
+ ulint fold = ut_fold_string(table->name.m_name);
+ ulint id_fold = ut_fold_ull(table->id);
+
+ HASH_INSERT(dict_table_t, name_hash, dict_sys->table_hash,
+ fold, table);
+
+ HASH_INSERT(dict_table_t, id_hash, dict_sys->table_id_hash,
+ id_fold, table);
+ }
+
+ for (table = UT_LIST_GET_FIRST(dict_sys->table_non_LRU); table;
+ table = UT_LIST_GET_NEXT(table_LRU, table)) {
+ ulint fold = ut_fold_string(table->name.m_name);
+ ulint id_fold = ut_fold_ull(table->id);
+
+ HASH_INSERT(dict_table_t, name_hash, dict_sys->table_hash,
+ fold, table);
+
+ HASH_INSERT(dict_table_t, id_hash, dict_sys->table_id_hash,
+ id_fold, table);
+ }
+
+ mutex_exit(&dict_sys->mutex);
+}
+
/**********************************************************************//**
Closes the data dictionary module. */
-UNIV_INTERN
void
dict_close(void)
/*============*/
@@ -6779,9 +6399,7 @@ dict_close(void)
table = static_cast<dict_table_t*>(
HASH_GET_NEXT(name_hash, prev_table));
-#ifdef UNIV_DEBUG
- ut_a(prev_table->magic_n == DICT_TABLE_MAGIC_N);
-#endif
+ ut_ad(prev_table->magic_n == DICT_TABLE_MAGIC_N);
/* Acquire only because it's a pre-condition. */
mutex_enter(&dict_sys->mutex);
@@ -6797,27 +6415,21 @@ dict_close(void)
therefore we don't delete the individual elements. */
hash_table_free(dict_sys->table_id_hash);
- dict_ind_free();
-
mutex_free(&dict_sys->mutex);
rw_lock_free(&dict_operation_lock);
- memset(&dict_operation_lock, 0x0, sizeof(dict_operation_lock));
- if (!srv_read_only_mode) {
- mutex_free(&dict_foreign_err_mutex);
- }
+ mutex_free(&dict_foreign_err_mutex);
- delete dict_sys->autoinc_map;
+ ut_free(dict_sys);
- mem_free(dict_sys);
dict_sys = NULL;
}
#ifdef UNIV_DEBUG
/**********************************************************************//**
Validate the dictionary table LRU list.
-@return TRUE if valid */
+@return TRUE if valid */
static
ibool
dict_lru_validate(void)
@@ -6905,7 +6517,6 @@ Check an index to see whether its first fields are the columns in the array,
in the same order and is not marked for deletion and is not the same
as types_idx.
@return true if the index qualifies, otherwise false */
-UNIV_INTERN
bool
dict_foreign_qualify_index(
/*=======================*/
@@ -6928,7 +6539,7 @@ dict_foreign_qualify_index(
/*!< in: nonzero if none of
the columns must be declared
NOT NULL */
- ulint* error, /*!< out: error code */
+ fkerr_t* error, /*!< out: error code */
ulint* err_col_no,
/*!< out: column number where
error happened */
@@ -6940,6 +6551,10 @@ dict_foreign_qualify_index(
return(false);
}
+ if (index->type & (DICT_SPATIAL | DICT_FTS)) {
+ return false;
+ }
+
for (ulint i = 0; i < n_cols; i++) {
dict_field_t* field;
const char* col_name;
@@ -6952,7 +6567,7 @@ dict_foreign_qualify_index(
/* We do not accept column prefix
indexes here */
if (error && err_col_no && err_index) {
- *error = DB_FOREIGN_KEY_IS_PREFIX_INDEX;
+ *error = FK_IS_PREFIX_INDEX;
*err_col_no = i;
*err_index = (dict_index_t*)index;
}
@@ -6962,16 +6577,26 @@ dict_foreign_qualify_index(
if (check_null
&& (field->col->prtype & DATA_NOT_NULL)) {
if (error && err_col_no && err_index) {
- *error = DB_FOREIGN_KEY_COL_NOT_NULL;
+ *error = FK_COL_NOT_NULL;
*err_col_no = i;
*err_index = (dict_index_t*)index;
}
return(false);
}
- col_name = col_names
- ? col_names[col_no]
- : dict_table_get_col_name(table, col_no);
+ if (dict_col_is_virtual(field->col)) {
+ col_name = "";
+ for (ulint j = 0; j < table->n_v_def; j++) {
+ col_name = dict_table_get_v_col_name(table, j);
+ if (innobase_strcasecmp(field->name,col_name) == 0) {
+ break;
+ }
+ }
+ } else {
+ col_name = col_names
+ ? col_names[col_no]
+ : dict_table_get_col_name(table, col_no);
+ }
if (0 != innobase_strcasecmp(columns[i], col_name)) {
return(false);
@@ -6982,7 +6607,7 @@ dict_foreign_qualify_index(
dict_index_get_nth_col(types_idx, i),
check_charsets)) {
if (error && err_col_no && err_index) {
- *error = DB_FOREIGN_KEY_COLS_NOT_EQUAL;
+ *error = FK_COLS_NOT_EQUAL;
*err_col_no = i;
*err_index = (dict_index_t*)index;
}
@@ -7014,7 +6639,7 @@ dict_index_zip_pad_update(
ut_ad(total > 0);
- if(zip_threshold == 0) {
+ if (zip_threshold == 0) {
/* User has just disabled the padding. */
return;
}
@@ -7040,15 +6665,10 @@ dict_index_zip_pad_update(
beyond max pad size. */
if (info->pad + ZIP_PAD_INCR
< (UNIV_PAGE_SIZE * zip_pad_max) / 100) {
-#ifdef HAVE_ATOMIC_BUILTINS
/* Use atomics even though we have the mutex.
This is to ensure that we are able to read
- info->pad atomically where atomics are
- supported. */
- os_atomic_increment_ulint(&info->pad, ZIP_PAD_INCR);
-#else /* HAVE_ATOMIC_BUILTINS */
- info->pad += ZIP_PAD_INCR;
-#endif /* HAVE_ATOMIC_BUILTINS */
+ info->pad atomically. */
+ my_atomic_addlint(&info->pad, ZIP_PAD_INCR);
MONITOR_INC(MONITOR_PAD_INCREMENTS);
}
@@ -7067,15 +6687,10 @@ dict_index_zip_pad_update(
&& info->pad > 0) {
ut_ad(info->pad % ZIP_PAD_INCR == 0);
-#ifdef HAVE_ATOMIC_BUILTINS
/* Use atomics even though we have the mutex.
This is to ensure that we are able to read
- info->pad atomically where atomics are
- supported. */
- os_atomic_decrement_ulint(&info->pad, ZIP_PAD_INCR);
-#else /* HAVE_ATOMIC_BUILTINS */
- info->pad -= ZIP_PAD_INCR;
-#endif /* HAVE_ATOMIC_BUILTINS */
+ info->pad atomically. */
+ my_atomic_addlint(&info->pad, -ZIP_PAD_INCR);
info->n_rounds = 0;
@@ -7087,7 +6702,6 @@ dict_index_zip_pad_update(
/*********************************************************************//**
This function should be called whenever a page is successfully
compressed. Updates the compression padding information. */
-UNIV_INTERN
void
dict_index_zip_success(
/*===================*/
@@ -7108,7 +6722,6 @@ dict_index_zip_success(
/*********************************************************************//**
This function should be called whenever a page compression attempt
fails. Updates the compression padding information. */
-UNIV_INTERN
void
dict_index_zip_failure(
/*===================*/
@@ -7126,11 +6739,9 @@ dict_index_zip_failure(
dict_index_zip_pad_unlock(index);
}
-
/*********************************************************************//**
Return the optimal page size, for which page will likely compress.
@return page size beyond which page might not compress */
-UNIV_INTERN
ulint
dict_index_zip_pad_optimal_page_size(
/*=================================*/
@@ -7146,17 +6757,7 @@ dict_index_zip_pad_optimal_page_size(
return(UNIV_PAGE_SIZE);
}
- /* We use atomics to read index->zip_pad.pad. Here we use zero
- as increment as are not changing the value of the 'pad'. On
- platforms where atomics are not available we grab the mutex. */
-
-#ifdef HAVE_ATOMIC_BUILTINS
- pad = os_atomic_increment_ulint(&index->zip_pad.pad, 0);
-#else /* HAVE_ATOMIC_BUILTINS */
- dict_index_zip_pad_lock(index);
- pad = index->zip_pad.pad;
- dict_index_zip_pad_unlock(index);
-#endif /* HAVE_ATOMIC_BUILTINS */
+ pad = my_atomic_loadlint(&index->zip_pad.pad);
ut_ad(pad < UNIV_PAGE_SIZE);
sz = UNIV_PAGE_SIZE - pad;
@@ -7171,7 +6772,6 @@ dict_index_zip_pad_optimal_page_size(
/*************************************************************//**
Convert table flag to row format string.
@return row format name. */
-UNIV_INTERN
const char*
dict_tf_to_row_format_string(
/*=========================*/
@@ -7191,7 +6791,6 @@ dict_tf_to_row_format_string(
ut_error;
return(0);
}
-#endif /* !UNIV_HOTBACKUP */
/** Calculate the used memory occupied by the data dictionary
table and index objects.
@@ -7211,3 +6810,153 @@ dict_sys_get_size()
return size;
}
+
+/** Look for any dictionary objects that are found in the given tablespace.
+@param[in] space_id Tablespace ID to search for.
+@return true if tablespace is empty. */
+bool
+dict_space_is_empty(
+ ulint space_id)
+{
+ btr_pcur_t pcur;
+ const rec_t* rec;
+ mtr_t mtr;
+ bool found = false;
+
+ rw_lock_x_lock(&dict_operation_lock);
+ mutex_enter(&dict_sys->mutex);
+ mtr_start(&mtr);
+
+ for (rec = dict_startscan_system(&pcur, &mtr, SYS_TABLES);
+ rec != NULL;
+ rec = dict_getnext_system(&pcur, &mtr)) {
+ const byte* field;
+ ulint len;
+ ulint space_id_for_table;
+
+ field = rec_get_nth_field_old(
+ rec, DICT_FLD__SYS_TABLES__SPACE, &len);
+ ut_ad(len == 4);
+ space_id_for_table = mach_read_from_4(field);
+
+ if (space_id_for_table == space_id) {
+ found = true;
+ }
+ }
+
+ mtr_commit(&mtr);
+ mutex_exit(&dict_sys->mutex);
+ rw_lock_x_unlock(&dict_operation_lock);
+
+ return(!found);
+}
+
+/** Find the space_id for the given name in sys_tablespaces.
+@param[in] name Tablespace name to search for.
+@return the tablespace ID. */
+ulint
+dict_space_get_id(
+ const char* name)
+{
+ btr_pcur_t pcur;
+ const rec_t* rec;
+ mtr_t mtr;
+ ulint name_len = strlen(name);
+ ulint id = ULINT_UNDEFINED;
+
+ rw_lock_x_lock(&dict_operation_lock);
+ mutex_enter(&dict_sys->mutex);
+ mtr_start(&mtr);
+
+ for (rec = dict_startscan_system(&pcur, &mtr, SYS_TABLESPACES);
+ rec != NULL;
+ rec = dict_getnext_system(&pcur, &mtr)) {
+ const byte* field;
+ ulint len;
+
+ field = rec_get_nth_field_old(
+ rec, DICT_FLD__SYS_TABLESPACES__NAME, &len);
+ ut_ad(len > 0);
+ ut_ad(len < OS_FILE_MAX_PATH);
+
+ if (len == name_len && ut_memcmp(name, field, len) == 0) {
+
+ field = rec_get_nth_field_old(
+ rec, DICT_FLD__SYS_TABLESPACES__SPACE, &len);
+ ut_ad(len == 4);
+ id = mach_read_from_4(field);
+
+ /* This is normally called by dict_getnext_system()
+ at the end of the index. */
+ btr_pcur_close(&pcur);
+ break;
+ }
+ }
+
+ mtr_commit(&mtr);
+ mutex_exit(&dict_sys->mutex);
+ rw_lock_x_unlock(&dict_operation_lock);
+
+ return(id);
+}
+
+/** Determine the extent size (in pages) for the given table
+@param[in] table the table whose extent size is being
+ calculated.
+@return extent size in pages (256, 128 or 64) */
+ulint
+dict_table_extent_size(
+ const dict_table_t* table)
+{
+ const ulint mb_1 = 1024 * 1024;
+ const ulint mb_2 = 2 * mb_1;
+ const ulint mb_4 = 4 * mb_1;
+
+ page_size_t page_size = dict_table_page_size(table);
+ ulint pages_in_extent = FSP_EXTENT_SIZE;
+
+ if (page_size.is_compressed()) {
+
+ ulint disk_page_size = page_size.physical();
+
+ switch (disk_page_size) {
+ case 1024:
+ pages_in_extent = mb_1/1024;
+ break;
+ case 2048:
+ pages_in_extent = mb_1/2048;
+ break;
+ case 4096:
+ pages_in_extent = mb_1/4096;
+ break;
+ case 8192:
+ pages_in_extent = mb_1/8192;
+ break;
+ case 16384:
+ pages_in_extent = mb_1/16384;
+ break;
+ case 32768:
+ pages_in_extent = mb_2/32768;
+ break;
+ case 65536:
+ pages_in_extent = mb_4/65536;
+ break;
+ default:
+ ut_ad(0);
+ }
+ }
+
+ return(pages_in_extent);
+}
+
+size_t
+dict_table_t::get_overflow_field_local_len() const
+{
+ if (dict_table_get_format(this) < UNIV_FORMAT_B) {
+ /* up to MySQL 5.1: store a 768-byte prefix locally */
+ return BTR_EXTERN_FIELD_REF_SIZE
+ + DICT_ANTELOPE_MAX_INDEX_COL_LEN;
+ }
+ /* new-format table: do not store any BLOB prefix locally */
+ return BTR_EXTERN_FIELD_REF_SIZE;
+}
diff --git a/storage/innobase/dict/dict0load.cc b/storage/innobase/dict/dict0load.cc
index 484c4c5e966..486f3c1081c 100644
--- a/storage/innobase/dict/dict0load.cc
+++ b/storage/innobase/dict/dict0load.cc
@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2016, 2018, MariaDB Corporation.
+Copyright (c) 2016, 2020, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -26,26 +26,23 @@ Created 4/24/1996 Heikki Tuuri
*******************************************************/
#include "dict0load.h"
-#include "mysql_version.h"
-
-#ifdef UNIV_NONINL
-#include "dict0load.ic"
-#endif
+#include "mysql_version.h"
#include "btr0pcur.h"
#include "btr0btr.h"
-#include "page0page.h"
-#include "mach0data.h"
-#include "dict0dict.h"
#include "dict0boot.h"
+#include "dict0crea.h"
+#include "dict0dict.h"
+#include "dict0mem.h"
+#include "dict0priv.h"
#include "dict0stats.h"
+#include "fsp0file.h"
+#include "fts0priv.h"
+#include "mach0data.h"
+#include "page0page.h"
#include "rem0cmp.h"
#include "srv0start.h"
#include "srv0srv.h"
-#include "dict0crea.h"
-#include "dict0priv.h"
-#include "ha_prototypes.h" /* innobase_casedn_str() */
-#include "fts0priv.h"
#include "fts0opt.h"
/** Following are the InnoDB system tables. The positions in
@@ -58,17 +55,137 @@ static const char* SYSTEM_TABLE_NAME[] = {
"SYS_FOREIGN",
"SYS_FOREIGN_COLS",
"SYS_TABLESPACES",
- "SYS_DATAFILES"
+ "SYS_DATAFILES",
+ "SYS_VIRTUAL"
};
+/** Loads a table definition and also all its index definitions.
+
+Loads those foreign key constraints whose referenced table is already in
+dictionary cache. If a foreign key constraint is not loaded, then the
+referenced table is pushed into the output stack (fk_tables), if it is not
+NULL. These tables must be subsequently loaded so that all the foreign
+key constraints are loaded into memory.
+
+@param[in] name Table name in the db/tablename format
+@param[in] ignore_err Error to be ignored when loading table
+ and its index definition
+@param[out] fk_tables Related table names that must also be
+ loaded to ensure that all foreign key
+ constraints are loaded.
+@return table, NULL if does not exist; if the table is stored in an
+.ibd file, but the file does not exist, then we set the
+file_unreadable flag in the table object we return */
+static
+dict_table_t*
+dict_load_table_one(
+ const table_name_t& name,
+ dict_err_ignore_t ignore_err,
+ dict_names_t& fk_tables);
+
+/** Load a table definition from a SYS_TABLES record to dict_table_t.
+Do not load any columns or indexes.
+@param[in] name Table name
+@param[in] rec SYS_TABLES record
+@param[out,own] table table, or NULL
+@return error message
+@retval NULL on success */
+static const char* dict_load_table_low(const table_name_t& name,
+ const rec_t* rec, dict_table_t** table)
+ MY_ATTRIBUTE((nonnull, warn_unused_result));
+
+/** Load an index definition from a SYS_INDEXES record to dict_index_t.
+If allocate=TRUE, we will create a dict_index_t structure and fill it
+accordingly. If allocated=FALSE, the dict_index_t will be supplied by
+the caller and filled with information read from the record.
+@return error message
+@retval NULL on success */
+static
+const char*
+dict_load_index_low(
+ byte* table_id, /*!< in/out: table id (8 bytes),
+ an "in" value if allocate=TRUE
+ and "out" when allocate=FALSE */
+ const char* table_name, /*!< in: table name */
+ mem_heap_t* heap, /*!< in/out: temporary memory heap */
+ const rec_t* rec, /*!< in: SYS_INDEXES record */
+ ibool allocate, /*!< in: TRUE=allocate *index,
+ FALSE=fill in a pre-allocated
+ *index */
+ dict_index_t** index); /*!< out,own: index, or NULL */
+
+/** Load a table column definition from a SYS_COLUMNS record to dict_table_t.
+@return error message
+@retval NULL on success */
+static
+const char*
+dict_load_column_low(
+ dict_table_t* table, /*!< in/out: table, could be NULL
+ if we just populate a dict_column_t
+ struct with information from
+ a SYS_COLUMNS record */
+ mem_heap_t* heap, /*!< in/out: memory heap
+ for temporary storage */
+ dict_col_t* column, /*!< out: dict_column_t to fill,
+ or NULL if table != NULL */
+ table_id_t* table_id, /*!< out: table id */
+ const char** col_name, /*!< out: column name */
+ const rec_t* rec, /*!< in: SYS_COLUMNS record */
+ ulint* nth_v_col); /*!< out: if not NULL, this
+ records the "n" of "nth" virtual
+ column */
+
+/** Load a virtual column "mapping" (to base columns) information
+from a SYS_VIRTUAL record
+@param[in,out] table table
+@param[in,out] heap memory heap
+@param[in,out] column mapped base column's dict_column_t
+@param[in,out] table_id table id
+@param[in,out] pos virtual column position
+@param[in,out] base_pos base column position
+@param[in] rec SYS_VIRTUAL record
+@return error message
+@retval NULL on success */
+static
+const char*
+dict_load_virtual_low(
+ dict_table_t* table,
+ mem_heap_t* heap,
+ dict_col_t** column,
+ table_id_t* table_id,
+ ulint* pos,
+ ulint* base_pos,
+ const rec_t* rec);
+
+/** Load an index field definition from a SYS_FIELDS record to dict_index_t.
+@return error message
+@retval NULL on success */
+static
+const char*
+dict_load_field_low(
+ byte* index_id, /*!< in/out: index id (8 bytes)
+ an "in" value if index != NULL
+ and "out" if index == NULL */
+ dict_index_t* index, /*!< in/out: index, could be NULL
+ if we just populate a dict_field_t
+ struct with information from
+ a SYS_FIELDS record */
+ dict_field_t* sys_field, /*!< out: dict_field_t to be
+ filled */
+ ulint* pos, /*!< out: Field position */
+ byte* last_index_id, /*!< in: last index id */
+ mem_heap_t* heap, /*!< in/out: memory heap
+ for temporary storage */
+ const rec_t* rec); /*!< in: SYS_FIELDS record */
+
/* If this flag is TRUE, then we will load the cluster index's (and tables')
metadata even if it is marked as "corrupted". */
-UNIV_INTERN my_bool srv_load_corrupted = FALSE;
+my_bool srv_load_corrupted;
#ifdef UNIV_DEBUG
/****************************************************************//**
Compare the name of an index column.
-@return TRUE if the i'th column of index is 'name'. */
+@return TRUE if the i'th column of index is 'name'. */
static
ibool
name_of_col_is(
@@ -90,7 +207,6 @@ name_of_col_is(
Finds the first table name in the given database.
@return own: table name, NULL if does not exist; the caller must free
the memory in the string! */
-UNIV_INTERN
char*
dict_get_first_table_name_in_db(
/*============================*/
@@ -107,7 +223,7 @@ dict_get_first_table_name_in_db(
ulint len;
mtr_t mtr;
- ut_ad(mutex_own(&(dict_sys->mutex)));
+ ut_ad(mutex_own(&dict_sys->mutex));
heap = mem_heap_create(1000);
@@ -171,68 +287,8 @@ loop:
}
/********************************************************************//**
-Prints to the standard output information on all tables found in the data
-dictionary system table. */
-UNIV_INTERN
-void
-dict_print(void)
-/*============*/
-{
- dict_table_t* table;
- btr_pcur_t pcur;
- const rec_t* rec;
- mem_heap_t* heap;
- mtr_t mtr;
-
- /* Enlarge the fatal semaphore wait timeout during the InnoDB table
- monitor printout */
-
- os_increment_counter_by_amount(
- server_mutex,
- srv_fatal_semaphore_wait_threshold,
- SRV_SEMAPHORE_WAIT_EXTENSION);
-
- heap = mem_heap_create(1000);
- mutex_enter(&(dict_sys->mutex));
- mtr_start(&mtr);
-
- rec = dict_startscan_system(&pcur, &mtr, SYS_TABLES);
-
- while (rec) {
- const char* err_msg;
-
- err_msg = static_cast<const char*>(
- dict_process_sys_tables_rec_and_mtr_commit(
- heap, rec, &table, DICT_TABLE_LOAD_FROM_CACHE,
- &mtr));
-
- if (!err_msg) {
- dict_table_print(table);
- } else {
- ut_print_timestamp(stderr);
- fprintf(stderr, " InnoDB: %s\n", err_msg);
- }
-
- mem_heap_empty(heap);
-
- mtr_start(&mtr);
- rec = dict_getnext_system(&pcur, &mtr);
- }
-
- mtr_commit(&mtr);
- mutex_exit(&(dict_sys->mutex));
- mem_heap_free(heap);
-
- /* Restore the fatal semaphore wait timeout */
- os_decrement_counter_by_amount(
- server_mutex,
- srv_fatal_semaphore_wait_threshold,
- SRV_SEMAPHORE_WAIT_EXTENSION);
-}
-
-/********************************************************************//**
This function gets the next system table record as it scans the table.
-@return the next record if found, NULL if end of scan */
+@return the next record if found, NULL if end of scan */
static
const rec_t*
dict_getnext_system_low(
@@ -264,8 +320,7 @@ dict_getnext_system_low(
/********************************************************************//**
This function opens a system table, and returns the first record.
-@return first record of the system table */
-UNIV_INTERN
+@return first record of the system table */
const rec_t*
dict_startscan_system(
/*==================*/
@@ -294,8 +349,7 @@ dict_startscan_system(
/********************************************************************//**
This function gets the next system table record as it scans the table.
-@return the next record if found, NULL if end of scan */
-UNIV_INTERN
+@return the next record if found, NULL if end of scan */
const rec_t*
dict_getnext_system(
/*================*/
@@ -316,10 +370,8 @@ dict_getnext_system(
/********************************************************************//**
This function processes one SYS_TABLES record and populate the dict_table_t
-struct for the table. Extracted out of dict_print() to be used by
-both monitor table output and information schema innodb_sys_tables output.
+struct for the table.
@return error message, or NULL on success */
-UNIV_INTERN
const char*
dict_process_sys_tables_rec_and_mtr_commit(
/*=======================================*/
@@ -336,7 +388,6 @@ dict_process_sys_tables_rec_and_mtr_commit(
ulint len;
const char* field;
const char* err_msg = NULL;
- char* table_name;
field = (const char*) rec_get_nth_field_old(
rec, DICT_FLD__SYS_TABLES__NAME, &len);
@@ -346,7 +397,7 @@ dict_process_sys_tables_rec_and_mtr_commit(
ut_ad(mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_S_FIX));
/* Get the table name */
- table_name = mem_heap_strdupl(heap, field, len);
+ table_name_t table_name(mem_heap_strdupl(heap, field, len));
/* If DICT_TABLE_LOAD_FROM_CACHE is set, first check
whether there is cached dict_table_t struct */
@@ -355,7 +406,7 @@ dict_process_sys_tables_rec_and_mtr_commit(
/* Commit before load the table again */
mtr_commit(mtr);
- *table = dict_table_get_low(table_name);
+ *table = dict_table_get_low(table_name.m_name);
if (!(*table)) {
err_msg = "Table not found in cache";
@@ -377,7 +428,6 @@ This function parses a SYS_INDEXES record and populate a dict_index_t
structure with the information from the record. For detail information
about SYS_INDEXES fields, please refer to dict_boot() function.
@return error message, or NULL on success */
-UNIV_INTERN
const char*
dict_process_sys_indexes_rec(
/*=========================*/
@@ -404,7 +454,6 @@ dict_process_sys_indexes_rec(
This function parses a SYS_COLUMNS record and populate a dict_column_t
structure with the information from the record.
@return error message, or NULL on success */
-UNIV_INTERN
const char*
dict_process_sys_columns_rec(
/*=========================*/
@@ -412,13 +461,40 @@ dict_process_sys_columns_rec(
const rec_t* rec, /*!< in: current SYS_COLUMNS rec */
dict_col_t* column, /*!< out: dict_col_t to be filled */
table_id_t* table_id, /*!< out: table id */
- const char** col_name) /*!< out: column name */
+ const char** col_name, /*!< out: column name */
+ ulint* nth_v_col) /*!< out: if virtual col, this is
+ record's sequence number */
{
const char* err_msg;
/* Parse the record, and get "dict_col_t" struct filled */
err_msg = dict_load_column_low(NULL, heap, column,
- table_id, col_name, rec);
+ table_id, col_name, rec, nth_v_col);
+
+ return(err_msg);
+}
+
+/** This function parses a SYS_VIRTUAL record and extracts virtual column
+information
+@param[in,out] heap heap memory
+@param[in] rec current SYS_COLUMNS rec
+@param[in,out] table_id table id
+@param[in,out] pos virtual column position
+@param[in,out] base_pos base column position
+@return error message, or NULL on success */
+const char*
+dict_process_sys_virtual_rec(
+ mem_heap_t* heap,
+ const rec_t* rec,
+ table_id_t* table_id,
+ ulint* pos,
+ ulint* base_pos)
+{
+ const char* err_msg;
+
+ /* Parse the record, and get "dict_col_t" struct filled */
+ err_msg = dict_load_virtual_low(NULL, heap, NULL, table_id,
+ pos, base_pos, rec);
return(err_msg);
}
@@ -427,7 +503,6 @@ dict_process_sys_columns_rec(
This function parses a SYS_FIELDS record and populates a dict_field_t
structure with the information from the record.
@return error message, or NULL on success */
-UNIV_INTERN
const char*
dict_process_sys_fields_rec(
/*========================*/
@@ -462,7 +537,6 @@ This function parses a SYS_FOREIGN record and populate a dict_foreign_t
structure with the information from the record. For detail information
about SYS_FOREIGN fields, please refer to dict_load_foreign() function.
@return error message, or NULL on success */
-UNIV_INTERN
const char*
dict_process_sys_foreign_rec(
/*=========================*/
@@ -490,7 +564,7 @@ err_len:
return("incorrect column length in SYS_FOREIGN");
}
- /* This recieves a dict_foreign_t* that points to a stack variable.
+ /* This receives a dict_foreign_t* that points to a stack variable.
So dict_foreign_free(foreign) is not used as elsewhere.
Since the heap used here is freed elsewhere, foreign->heap
is not assigned. */
@@ -543,7 +617,6 @@ err_len:
This function parses a SYS_FOREIGN_COLS record and extract necessary
information from the record and return to caller.
@return error message, or NULL on success */
-UNIV_INTERN
const char*
dict_process_sys_foreign_col_rec(
/*=============================*/
@@ -613,7 +686,6 @@ err_len:
This function parses a SYS_TABLESPACES record, extracts necessary
information from the record and returns to caller.
@return error message, or NULL on success */
-UNIV_INTERN
const char*
dict_process_sys_tablespaces(
/*=========================*/
@@ -680,7 +752,6 @@ err_len:
This function parses a SYS_DATAFILES record, extracts necessary
information from the record and returns it to the caller.
@return error message, or NULL on success */
-UNIV_INTERN
const char*
dict_process_sys_datafiles(
/*=======================*/
@@ -730,65 +801,13 @@ err_len:
return(NULL);
}
-/********************************************************************//**
-Determine the flags of a table as stored in SYS_TABLES.TYPE and N_COLS.
-@return ULINT_UNDEFINED if error, else a valid dict_table_t::flags. */
-static
-ulint
-dict_sys_tables_get_flags(
-/*======================*/
- const rec_t* rec) /*!< in: a record of SYS_TABLES */
-{
- const byte* field;
- ulint len;
- ulint type;
- ulint n_cols;
-
- /* read the 4 byte flags from the TYPE field */
- field = rec_get_nth_field_old(
- rec, DICT_FLD__SYS_TABLES__TYPE, &len);
- ut_a(len == 4);
- type = mach_read_from_4(field);
-
- /* The low order bit of SYS_TABLES.TYPE is always set to 1. But in
- dict_table_t::flags the low order bit is used to determine if the
- row format is Redundant or Compact when the format is Antelope.
- Read the 4 byte N_COLS field and look at the high order bit. It
- should be set for COMPACT and later. It should not be set for
- REDUNDANT. */
- field = rec_get_nth_field_old(
- rec, DICT_FLD__SYS_TABLES__N_COLS, &len);
- ut_a(len == 4);
- n_cols = mach_read_from_4(field);
-
- /* This validation function also combines the DICT_N_COLS_COMPACT
- flag in n_cols into the type field to effectively make it a
- dict_table_t::flags. */
-
- if (ULINT_UNDEFINED == dict_sys_tables_type_validate(type, n_cols)) {
- return(ULINT_UNDEFINED);
- }
-
- return(dict_sys_tables_type_to_tf(type, n_cols));
-}
-
-/********************************************************************//**
-Gets the filepath for a spaceid from SYS_DATAFILES and checks it against
-the contents of a link file. This function is called when there is no
-fil_node_t entry for this space ID so both durable locations on disk
-must be checked and compared.
-We use a temporary heap here for the table lookup, but not for the path
-returned which the caller must free.
-This function can return NULL if the space ID is not found in SYS_DATAFILES,
-then the caller will assume that the ibd file is in the normal datadir.
-@return own: A copy of the first datafile found in SYS_DATAFILES.PATH for
-the given space ID. NULL if space ID is zero or not found. */
-UNIV_INTERN
-char*
+/** Get the first filepath from SYS_DATAFILES for a given space_id.
+@param[in] space_id Tablespace ID
+@return First filepath (caller must invoke ut_free() on it)
+@retval NULL if no SYS_DATAFILES entry was found. */
+static char*
dict_get_first_path(
-/*================*/
- ulint space, /*!< in: space id */
- const char* name) /*!< in: tablespace name */
+ ulint space_id)
{
mtr_t mtr;
dict_table_t* sys_datafiles;
@@ -800,15 +819,16 @@ dict_get_first_path(
const rec_t* rec;
const byte* field;
ulint len;
- char* dict_filepath = NULL;
+ char* filepath = NULL;
mem_heap_t* heap = mem_heap_create(1024);
- ut_ad(mutex_own(&(dict_sys->mutex)));
+ ut_ad(mutex_own(&dict_sys->mutex));
mtr_start(&mtr);
sys_datafiles = dict_table_get_low("SYS_DATAFILES");
sys_index = UT_LIST_GET_FIRST(sys_datafiles->indexes);
+
ut_ad(!dict_table_is_comp(sys_datafiles));
ut_ad(name_of_col_is(sys_datafiles, sys_index,
DICT_FLD__SYS_DATAFILES__SPACE, "SPACE"));
@@ -819,7 +839,7 @@ dict_get_first_path(
dfield = dtuple_get_nth_field(tuple, DICT_FLD__SYS_DATAFILES__SPACE);
buf = static_cast<byte*>(mem_heap_alloc(heap, 4));
- mach_write_to_4(buf, space);
+ mach_write_to_4(buf, space_id);
dfield_set_data(dfield, buf, 4);
dict_index_copy_types(tuple, sys_index, 1);
@@ -829,44 +849,59 @@ dict_get_first_path(
rec = btr_pcur_get_rec(&pcur);
- /* If the file-per-table tablespace was created with
- an earlier version of InnoDB, then this record is not
- in SYS_DATAFILES. But a link file still might exist. */
-
+ /* Get the filepath from this SYS_DATAFILES record. */
if (btr_pcur_is_on_user_rec(&pcur)) {
- /* A record for this space ID was found. */
field = rec_get_nth_field_old(
- rec, DICT_FLD__SYS_DATAFILES__PATH, &len);
- ut_a(len > 0 || len == UNIV_SQL_NULL);
- ut_a(len < OS_FILE_MAX_PATH);
- dict_filepath = mem_strdupl((char*) field, len);
- ut_a(dict_filepath);
+ rec, DICT_FLD__SYS_DATAFILES__SPACE, &len);
+ ut_a(len == 4);
+
+ if (space_id == mach_read_from_4(field)) {
+ /* A record for this space ID was found. */
+ field = rec_get_nth_field_old(
+ rec, DICT_FLD__SYS_DATAFILES__PATH, &len);
+
+ ut_ad(len > 0);
+ ut_ad(len < OS_FILE_MAX_PATH);
+
+ if (len > 0 && len < UNIV_SQL_NULL) {
+ filepath = mem_strdupl(
+ reinterpret_cast<const char*>(field),
+ len);
+ ut_ad(filepath != NULL);
+
+ /* The dictionary may have been written on
+ another OS. */
+ os_normalize_path(filepath);
+ }
+ }
}
btr_pcur_close(&pcur);
mtr_commit(&mtr);
mem_heap_free(heap);
- return(dict_filepath);
+ return(filepath);
}
-/********************************************************************//**
-Update the record for space_id in SYS_TABLESPACES to this filepath.
-@return DB_SUCCESS if OK, dberr_t if the insert failed */
-UNIV_INTERN
+/** Update the record for space_id in SYS_TABLESPACES to this filepath.
+@param[in] space_id Tablespace ID
+@param[in] filepath Tablespace filepath
+@return DB_SUCCESS if OK, dberr_t if the insert failed */
dberr_t
dict_update_filepath(
-/*=================*/
- ulint space_id, /*!< in: space id */
- const char* filepath) /*!< in: filepath */
+ ulint space_id,
+ const char* filepath)
{
+ if (!srv_sys_tablespaces_open) {
+ /* Startup procedure is not yet ready for updates. */
+ return(DB_SUCCESS);
+ }
+
dberr_t err = DB_SUCCESS;
trx_t* trx;
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
- ut_ad(mutex_own(&(dict_sys->mutex)));
+ ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_X));
+ ut_ad(mutex_own(&dict_sys->mutex));
trx = trx_allocate_for_background();
trx->op_info = "update filepath";
@@ -893,39 +928,48 @@ dict_update_filepath(
if (err == DB_SUCCESS) {
/* We just updated SYS_DATAFILES due to the contents in
a link file. Make a note that we did this. */
- ib_logf(IB_LOG_LEVEL_INFO,
- "The InnoDB data dictionary table SYS_DATAFILES "
- "for tablespace ID %lu was updated to use file %s.",
- (ulong) space_id, filepath);
+ ib::info() << "The InnoDB data dictionary table SYS_DATAFILES"
+ " for tablespace ID " << space_id
+ << " was updated to use file " << filepath << ".";
} else {
- ib_logf(IB_LOG_LEVEL_WARN,
- "Problem updating InnoDB data dictionary table "
- "SYS_DATAFILES for tablespace ID %lu to file %s.",
- (ulong) space_id, filepath);
+ ib::warn() << "Error occurred while updating InnoDB data"
+ " dictionary table SYS_DATAFILES for tablespace ID "
+ << space_id << " to file " << filepath << ": "
+ << ut_strerr(err) << ".";
}
return(err);
}
-/********************************************************************//**
-Insert records into SYS_TABLESPACES and SYS_DATAFILES.
-@return DB_SUCCESS if OK, dberr_t if the insert failed */
-UNIV_INTERN
+/** Replace records in SYS_TABLESPACES and SYS_DATAFILES associated with
+the given space_id using an independent transaction.
+@param[in] space_id Tablespace ID
+@param[in] name Tablespace name
+@param[in] filepath First filepath
+@param[in] fsp_flags Tablespace flags
+@return DB_SUCCESS if OK, dberr_t if the insert failed */
dberr_t
-dict_insert_tablespace_and_filepath(
-/*================================*/
- ulint space, /*!< in: space id */
- const char* name, /*!< in: talespace name */
- const char* filepath, /*!< in: filepath */
- ulint fsp_flags) /*!< in: tablespace flags */
+dict_replace_tablespace_and_filepath(
+ ulint space_id,
+ const char* name,
+ const char* filepath,
+ ulint fsp_flags)
{
+ if (!srv_sys_tablespaces_open) {
+ /* Startup procedure is not yet ready for updates.
+ Return success since this will likely get updated
+ later. */
+ return(DB_SUCCESS);
+ }
+
dberr_t err = DB_SUCCESS;
trx_t* trx;
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
- ut_ad(mutex_own(&(dict_sys->mutex)));
+ DBUG_EXECUTE_IF("innodb_fail_to_update_tablespace_dict",
+ return(DB_INTERRUPTED););
+
+ ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_X));
+ ut_ad(mutex_own(&dict_sys->mutex));
ut_ad(filepath);
trx = trx_allocate_for_background();
@@ -935,9 +979,9 @@ dict_insert_tablespace_and_filepath(
/* A record for this space ID was not found in
SYS_DATAFILES. Assume the record is also missing in
- SYS_TABLESPACES. Insert records onto them both. */
- err = dict_create_add_tablespace_to_dictionary(
- space, name, fsp_flags, filepath, trx, false);
+ SYS_TABLESPACES. Insert records into them both. */
+ err = dict_replace_tablespace_in_dictionary(
+ space_id, name, fsp_flags, filepath, trx);
trx_commit_for_mysql(trx);
trx->dict_operation_lock_mode = 0;
@@ -946,270 +990,562 @@ dict_insert_tablespace_and_filepath(
return(err);
}
-/********************************************************************//**
-This function looks at each table defined in SYS_TABLES. It checks the
-tablespace for any table with a space_id > 0. It looks up the tablespace
-in SYS_DATAFILES to ensure the correct path.
+/** Check the validity of a SYS_TABLES record
+Make sure the fields are the right length and that they
+do not contain invalid contents.
+@param[in] rec SYS_TABLES record
+@return error message, or NULL on success */
+static
+const char*
+dict_sys_tables_rec_check(
+ const rec_t* rec)
+{
+ const byte* field;
+ ulint len;
-In a crash recovery we already have all the tablespace objects created.
-This function compares the space id information in the InnoDB data dictionary
-to what we already read with fil_load_single_table_tablespaces().
+ ut_ad(mutex_own(&dict_sys->mutex));
-In a normal startup, we create the tablespace objects for every table in
-InnoDB's data dictionary, if the corresponding .ibd file exists.
-We also scan the biggest space id, and store it to fil_system. */
-UNIV_INTERN
-void
-dict_check_tablespaces_and_store_max_id(
-/*====================================*/
- dict_check_t dict_check) /*!< in: how to check */
-{
- dict_table_t* sys_tables;
- dict_index_t* sys_index;
- btr_pcur_t pcur;
- const rec_t* rec;
- ulint max_space_id;
- mtr_t mtr;
+ if (rec_get_deleted_flag(rec, 0)) {
+ return("delete-marked record in SYS_TABLES");
+ }
- rw_lock_x_lock(&dict_operation_lock);
- mutex_enter(&(dict_sys->mutex));
+ if (rec_get_n_fields_old(rec) != DICT_NUM_FIELDS__SYS_TABLES) {
+ return("wrong number of columns in SYS_TABLES record");
+ }
- mtr_start(&mtr);
+ rec_get_nth_field_offs_old(
+ rec, DICT_FLD__SYS_TABLES__NAME, &len);
+ if (len == 0 || len == UNIV_SQL_NULL) {
+err_len:
+ return("incorrect column length in SYS_TABLES");
+ }
+ rec_get_nth_field_offs_old(
+ rec, DICT_FLD__SYS_TABLES__DB_TRX_ID, &len);
+ if (len != DATA_TRX_ID_LEN && len != UNIV_SQL_NULL) {
+ goto err_len;
+ }
+ rec_get_nth_field_offs_old(
+ rec, DICT_FLD__SYS_TABLES__DB_ROLL_PTR, &len);
+ if (len != DATA_ROLL_PTR_LEN && len != UNIV_SQL_NULL) {
+ goto err_len;
+ }
- sys_tables = dict_table_get_low("SYS_TABLES");
- sys_index = UT_LIST_GET_FIRST(sys_tables->indexes);
- ut_ad(!dict_table_is_comp(sys_tables));
+ rec_get_nth_field_offs_old(rec, DICT_FLD__SYS_TABLES__ID, &len);
+ if (len != 8) {
+ goto err_len;
+ }
- max_space_id = mtr_read_ulint(dict_hdr_get(&mtr)
- + DICT_HDR_MAX_SPACE_ID,
- MLOG_4BYTES, &mtr);
- fil_set_max_space_id_if_bigger(max_space_id);
+ field = rec_get_nth_field_old(
+ rec, DICT_FLD__SYS_TABLES__N_COLS, &len);
+ if (field == NULL || len != 4) {
+ goto err_len;
+ }
- btr_pcur_open_at_index_side(true, sys_index, BTR_SEARCH_LEAF, &pcur,
- true, 0, &mtr);
-loop:
- btr_pcur_move_to_next_user_rec(&pcur, &mtr);
+ rec_get_nth_field_offs_old(rec, DICT_FLD__SYS_TABLES__TYPE, &len);
+ if (len != 4) {
+ goto err_len;
+ }
- rec = btr_pcur_get_rec(&pcur);
+ rec_get_nth_field_offs_old(
+ rec, DICT_FLD__SYS_TABLES__MIX_ID, &len);
+ if (len != 8) {
+ goto err_len;
+ }
- if (!btr_pcur_is_on_user_rec(&pcur)) {
- /* end of index */
+ field = rec_get_nth_field_old(
+ rec, DICT_FLD__SYS_TABLES__MIX_LEN, &len);
+ if (field == NULL || len != 4) {
+ goto err_len;
+ }
- btr_pcur_close(&pcur);
- mtr_commit(&mtr);
+ rec_get_nth_field_offs_old(
+ rec, DICT_FLD__SYS_TABLES__CLUSTER_ID, &len);
+ if (len != UNIV_SQL_NULL) {
+ goto err_len;
+ }
- /* We must make the tablespace cache aware of the biggest
- known space id */
+ field = rec_get_nth_field_old(
+ rec, DICT_FLD__SYS_TABLES__SPACE, &len);
+ if (field == NULL || len != 4) {
+ goto err_len;
+ }
- /* printf("Biggest space id in data dictionary %lu\n",
- max_space_id); */
- fil_set_max_space_id_if_bigger(max_space_id);
+ return(NULL);
+}
- mutex_exit(&(dict_sys->mutex));
- rw_lock_x_unlock(&dict_operation_lock);
+/** Read and return the contents of a SYS_TABLESPACES record.
+@param[in] rec A record of SYS_TABLESPACES
+@param[out] id Pointer to the space_id for this table
+@param[in,out] name Buffer for Tablespace Name of length NAME_LEN
+@param[out] flags Pointer to tablespace flags
+@return true if the record was read correctly, false if not. */
+bool
+dict_sys_tablespaces_rec_read(
+ const rec_t* rec,
+ ulint* id,
+ char* name,
+ ulint* flags)
+{
+ const byte* field;
+ ulint len;
- return;
+ field = rec_get_nth_field_old(
+ rec, DICT_FLD__SYS_TABLESPACES__SPACE, &len);
+ if (len != DICT_FLD_LEN_SPACE) {
+ ib::error() << "Wrong field length in SYS_TABLESPACES.SPACE: "
+ << len;
+ return(false);
}
+ *id = mach_read_from_4(field);
- if (!rec_get_deleted_flag(rec, 0)) {
+ field = rec_get_nth_field_old(
+ rec, DICT_FLD__SYS_TABLESPACES__NAME, &len);
+ if (len == 0 || len == UNIV_SQL_NULL) {
+ ib::error() << "Wrong field length in SYS_TABLESPACES.NAME: "
+ << len;
+ return(false);
+ }
+ strncpy(name, reinterpret_cast<const char*>(field), NAME_LEN);
- /* We found one */
- const byte* field;
- ulint len;
- ulint space_id;
- ulint flags;
- char* name;
+ /* read the 4 byte flags from the TYPE field */
+ field = rec_get_nth_field_old(
+ rec, DICT_FLD__SYS_TABLESPACES__FLAGS, &len);
+ if (len != 4) {
+ ib::error() << "Wrong field length in SYS_TABLESPACES.FLAGS: "
+ << len;
+ return(false);
+ }
+ *flags = mach_read_from_4(field);
- field = rec_get_nth_field_old(
- rec, DICT_FLD__SYS_TABLES__NAME, &len);
+ return(true);
+}
+
+/** Check if SYS_TABLES.TYPE is valid
+@param[in] type SYS_TABLES.TYPE
+@param[in] not_redundant whether ROW_FORMAT=REDUNDANT is not used
+@return whether the SYS_TABLES.TYPE value is valid */
+static
+bool
+dict_sys_tables_type_valid(ulint type, bool not_redundant)
+{
+ /* The DATA_DIRECTORY flag can be assigned fully independently
+ of all other persistent table flags. */
+ type &= ~DICT_TF_MASK_DATA_DIR;
+
+ if (type == 1) {
+ return(true); /* ROW_FORMAT=REDUNDANT or ROW_FORMAT=COMPACT */
+ }
- name = mem_strdupl((char*) field, len);
+ if (!(type & 1)) {
+ /* For ROW_FORMAT=REDUNDANT and ROW_FORMAT=COMPACT,
+ SYS_TABLES.TYPE=1. Else, it is the same as
+ dict_table_t::flags, and the least significant bit
+ would be set. So, the bit never can be 0. */
+ return(false);
+ }
- char table_name[MAX_FULL_NAME_LEN + 1];
+ if (!not_redundant) {
+ /* SYS_TABLES.TYPE must be 1 for ROW_FORMAT=REDUNDANT. */
+ return(false);
+ }
- innobase_format_name(
- table_name, sizeof(table_name), name, FALSE);
+ if (type >= 1U << DICT_TF_POS_UNUSED) {
+ /* Some unknown bits are set. */
+ return(false);
+ }
- flags = dict_sys_tables_get_flags(rec);
- if (UNIV_UNLIKELY(flags == ULINT_UNDEFINED)) {
- /* Read again the 4 bytes from rec. */
- field = rec_get_nth_field_old(
- rec, DICT_FLD__SYS_TABLES__TYPE, &len);
- ut_ad(len == 4); /* this was checked earlier */
- flags = mach_read_from_4(field);
-
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Table '%s' in InnoDB data dictionary"
- " has unknown type %lx", table_name, flags);
- mem_free(name);
- goto loop;
+ /* ATOMIC_WRITES cannot be 3; it is the 10.3 NO_ROLLBACK flag. */
+ if (!(~type & DICT_TF_MASK_ATOMIC_WRITES)) {
+ return(false);
+ }
+
+ return(dict_tf_is_valid_not_redundant(type));
+}
+
+/** Convert SYS_TABLES.TYPE to dict_table_t::flags.
+@param[in] type SYS_TABLES.TYPE
+@param[in] not_redundant whether ROW_FORMAT=REDUNDANT is not used
+@return table flags */
+static
+ulint
+dict_sys_tables_type_to_tf(ulint type, bool not_redundant)
+{
+ ut_ad(dict_sys_tables_type_valid(type, not_redundant));
+ ulint flags = not_redundant ? 1 : 0;
+
+ /* ZIP_SSIZE, ATOMIC_BLOBS, DATA_DIR, PAGE_COMPRESSION,
+ PAGE_COMPRESSION_LEVEL are the same. */
+ flags |= type & (DICT_TF_MASK_ZIP_SSIZE
+ | DICT_TF_MASK_ATOMIC_BLOBS
+ | DICT_TF_MASK_DATA_DIR
+ | DICT_TF_MASK_PAGE_COMPRESSION
+ | DICT_TF_MASK_PAGE_COMPRESSION_LEVEL);
+
+ ut_ad(dict_tf_is_valid(flags));
+ return(flags);
+}
+
+/** Read and return 5 integer fields from a SYS_TABLES record.
+@param[in] rec A record of SYS_TABLES
+@param[in] name Table Name, the same as SYS_TABLES.NAME
+@param[out] table_id Pointer to the table_id for this table
+@param[out] space_id Pointer to the space_id for this table
+@param[out] n_cols Pointer to number of columns for this table.
+@param[out] flags Pointer to table flags
+@param[out] flags2 Pointer to table flags2
+@return true if the record was read correctly, false if not. */
+MY_ATTRIBUTE((warn_unused_result))
+static
+bool
+dict_sys_tables_rec_read(
+ const rec_t* rec,
+ const table_name_t& table_name,
+ table_id_t* table_id,
+ ulint* space_id,
+ ulint* n_cols,
+ ulint* flags,
+ ulint* flags2)
+{
+ const byte* field;
+ ulint len;
+ ulint type;
+
+ field = rec_get_nth_field_old(
+ rec, DICT_FLD__SYS_TABLES__ID, &len);
+ ut_ad(len == 8);
+ *table_id = static_cast<table_id_t>(mach_read_from_8(field));
+
+ field = rec_get_nth_field_old(
+ rec, DICT_FLD__SYS_TABLES__SPACE, &len);
+ ut_ad(len == 4);
+ *space_id = mach_read_from_4(field);
+
+ /* Read the 4 byte flags from the TYPE field */
+ field = rec_get_nth_field_old(
+ rec, DICT_FLD__SYS_TABLES__TYPE, &len);
+ ut_a(len == 4);
+ type = mach_read_from_4(field);
+
+ /* Handle MDEV-12873 InnoDB SYS_TABLES.TYPE incompatibility
+ for PAGE_COMPRESSED=YES in MariaDB 10.2.2 to 10.2.6.
+
+ MariaDB 10.2.2 introduced the SHARED_SPACE flag from MySQL 5.7,
+ shifting the flags PAGE_COMPRESSION, PAGE_COMPRESSION_LEVEL,
+ ATOMIC_WRITES by one bit. The SHARED_SPACE flag would always
+ be written as 0 by MariaDB, because MariaDB does not support
+ CREATE TABLESPACE or CREATE TABLE...TABLESPACE for InnoDB.
+
+ So, instead of the bits AALLLLCxxxxxxx we would have
+ AALLLLC0xxxxxxx if the table was created with MariaDB 10.2.2
+ to 10.2.6. (AA=ATOMIC_WRITES, LLLL=PAGE_COMPRESSION_LEVEL,
+ C=PAGE_COMPRESSED, xxxxxxx=7 bits that were not moved.)
+
+ The case LLLLC=00000 is not a problem. The problem is the case
+ AALLLL10DB00001 where D is the (mostly ignored) DATA_DIRECTORY
+ flag and B is the ATOMIC_BLOBS flag (1 for ROW_FORMAT=DYNAMIC
+ and 0 for ROW_FORMAT=COMPACT in this case). Other low-order
+ bits must be so, because PAGE_COMPRESSED=YES is only allowed
+ for ROW_FORMAT=DYNAMIC and ROW_FORMAT=COMPACT, not for
+ ROW_FORMAT=REDUNDANT or ROW_FORMAT=COMPRESSED.
+
+ Starting with MariaDB 10.2.4, the flags would be
+ 00LLLL10DB00001, because ATOMIC_WRITES is always written as 0.
+
+ We will concentrate on the PAGE_COMPRESSION_LEVEL and
+ PAGE_COMPRESSED=YES. PAGE_COMPRESSED=NO implies
+ PAGE_COMPRESSION_LEVEL=0, and in that case all the affected
+ bits will be 0. For PAGE_COMPRESSED=YES, the values 1..9 are
+ allowed for PAGE_COMPRESSION_LEVEL. That is, we must interpret
+ the bits AALLLL10DB00001 as AALLLL1DB00001.
+
+ If someone created a table in MariaDB 10.2.2 or 10.2.3 with
+ the attribute ATOMIC_WRITES=OFF (value 2) and without
+ PAGE_COMPRESSED=YES or PAGE_COMPRESSION_LEVEL, that should be
+ rejected. The value ATOMIC_WRITES=ON (1) would look like
+ ATOMIC_WRITES=OFF, but it would be ignored starting with
+ MariaDB 10.2.4. */
+ compile_time_assert(DICT_TF_POS_PAGE_COMPRESSION == 7);
+ compile_time_assert(DICT_TF_POS_UNUSED == 14);
+
+ if ((type & 0x19f) != 0x101) {
+ /* The table cannot have been created with MariaDB
+ 10.2.2 to 10.2.6, because they would write the
+ low-order bits of SYS_TABLES.TYPE as 0b10xx00001 for
+ PAGE_COMPRESSED=YES. No adjustment is applicable. */
+ } else if (type >= 3 << 13) {
+ /* 10.2.2 and 10.2.3 write ATOMIC_WRITES less than 3,
+ and no other flags above that can be set for the
+ SYS_TABLES.TYPE to be in the 10.2.2..10.2.6 format.
+ This would in any case be invalid format for 10.2 and
+ earlier releases. */
+ ut_ad(!dict_sys_tables_type_valid(type, true));
+ } else {
+ /* SYS_TABLES.TYPE is of the form AALLLL10DB00001. We
+ must still validate that the LLLL bits are between 0
+ and 9 before we can discard the extraneous 0 bit. */
+ ut_ad(!DICT_TF_GET_PAGE_COMPRESSION(type));
+
+ if ((((type >> 9) & 0xf) - 1) < 9) {
+ ut_ad(DICT_TF_GET_PAGE_COMPRESSION_LEVEL(type) & 1);
+
+ type = (type & 0x7fU) | (type >> 1 & ~0x7fU);
+
+ ut_ad(DICT_TF_GET_PAGE_COMPRESSION(type));
+ ut_ad(DICT_TF_GET_PAGE_COMPRESSION_LEVEL(type) >= 1);
+ ut_ad(DICT_TF_GET_PAGE_COMPRESSION_LEVEL(type) <= 9);
+ } else {
+ ut_ad(!dict_sys_tables_type_valid(type, true));
}
+ }
- field = rec_get_nth_field_old(
- rec, DICT_FLD__SYS_TABLES__SPACE, &len);
- ut_a(len == 4);
+ /* The low order bit of SYS_TABLES.TYPE is always set to 1. But in
+ dict_table_t::flags the low order bit is used to determine if the
+ row format is Redundant (0) or Compact (1) when the format is Antelope.
+ Read the 4 byte N_COLS field and look at the high order bit. It
+ should be set for COMPACT and later. It should not be set for
+ REDUNDANT. */
+ field = rec_get_nth_field_old(
+ rec, DICT_FLD__SYS_TABLES__N_COLS, &len);
+ ut_a(len == 4);
+ *n_cols = mach_read_from_4(field);
- space_id = mach_read_from_4(field);
+ const bool not_redundant = 0 != (*n_cols & DICT_N_COLS_COMPACT);
- btr_pcur_store_position(&pcur, &mtr);
+ if (!dict_sys_tables_type_valid(type, not_redundant)) {
+ ib::error() << "Table " << table_name << " in InnoDB"
+ " data dictionary contains invalid flags."
+ " SYS_TABLES.TYPE=" << type <<
+ " SYS_TABLES.N_COLS=" << *n_cols;
+ return(false);
+ }
- /* For tables created with old versions of InnoDB,
- SYS_TABLES.MIX_LEN may contain garbage. Such tables
- would always be in ROW_FORMAT=REDUNDANT. Pretend that
- all such tables are non-temporary. That is, do not
- suppress error printouts about temporary or discarded
- tablespaces not being found. */
+ *flags = dict_sys_tables_type_to_tf(type, not_redundant);
+ /* For tables created before MySQL 4.1, there may be
+ garbage in SYS_TABLES.MIX_LEN where flags2 are found. Such tables
+ would always be in ROW_FORMAT=REDUNDANT which do not have the
+ high bit set in n_cols, and flags would be zero.
+ MySQL 4.1 was the first version to support innodb_file_per_table,
+ that is, *space_id != 0. */
+ if (not_redundant || *space_id != 0 || *n_cols & DICT_N_COLS_COMPACT) {
+
+ /* Get flags2 from SYS_TABLES.MIX_LEN */
field = rec_get_nth_field_old(
rec, DICT_FLD__SYS_TABLES__MIX_LEN, &len);
+ *flags2 = mach_read_from_4(field);
+
+ if (!dict_tf2_is_valid(*flags, *flags2)) {
+ ib::error() << "Table " << table_name << " in InnoDB"
+ " data dictionary contains invalid flags."
+ " SYS_TABLES.TYPE=" << type
+ << " SYS_TABLES.MIX_LEN=" << *flags2;
+ return(false);
+ }
- bool is_temp = false;
- bool discarded = false;
- ib_uint32_t flags2 = static_cast<ib_uint32_t>(
- mach_read_from_4(field));
+ /* DICT_TF2_FTS will be set when indexes are being loaded */
+ *flags2 &= ~DICT_TF2_FTS;
- /* Check that the tablespace (the .ibd file) really
- exists; print a warning to the .err log if not.
- Do not print warnings for temporary tables or for
- tablespaces that have been discarded. */
+ /* Now that we have used this bit, unset it. */
+ *n_cols &= ~DICT_N_COLS_COMPACT;
+ } else {
+ *flags2 = 0;
+ }
- field = rec_get_nth_field_old(
- rec, DICT_FLD__SYS_TABLES__N_COLS, &len);
+ return(true);
+}
- /* MIX_LEN valid only for ROW_FORMAT > REDUNDANT. */
- if (mach_read_from_4(field) & DICT_N_COLS_COMPACT) {
+/** Load and check each non-predefined tablespace mentioned in SYS_TABLES.
+Search SYS_TABLES and check each tablespace mentioned that has not
+already been added to the fil_system. If it is valid, add it to the
+file_system list.
+@return the highest space ID found. */
+static ulint dict_check_sys_tables()
+{
+ ulint max_space_id = 0;
+ btr_pcur_t pcur;
+ const rec_t* rec;
+ mtr_t mtr;
- is_temp = !!(flags2 & DICT_TF2_TEMPORARY);
- discarded = !!(flags2 & DICT_TF2_DISCARDED);
- }
+ DBUG_ENTER("dict_check_sys_tables");
- if (space_id == 0) {
- /* The system tablespace always exists. */
- ut_ad(!discarded);
- mem_free(name);
- goto loop;
- }
+ ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_X));
+ ut_ad(mutex_own(&dict_sys->mutex));
- mtr_commit(&mtr);
+ mtr_start(&mtr);
- switch (dict_check) {
- case DICT_CHECK_ALL_LOADED:
- /* All tablespaces should have been found in
- fil_load_single_table_tablespaces(). */
- if (fil_space_for_table_exists_in_mem(
- space_id, name, !(is_temp || discarded),
- false, NULL, 0, flags)
- && !(is_temp || discarded)) {
- /* If user changes the path of .ibd files in
- *.isl files before doing crash recovery ,
- then this leads to inconsistency in
- SYS_DATAFILES system table because the
- tables are loaded from the updated path
- but the SYS_DATAFILES still points to the
- old path.Therefore after crash recovery
- update SYS_DATAFILES with the updated path.*/
- ut_ad(space_id);
- ut_ad(recv_needed_recovery);
- char *dict_path = dict_get_first_path(space_id,
- name);
- char *remote_path = fil_read_link_file(name);
- if(dict_path && remote_path) {
- if(strcmp(dict_path,remote_path)) {
- dict_update_filepath(space_id,
- remote_path);
- }
- }
- if(dict_path)
- mem_free(dict_path);
- if(remote_path)
- mem_free(remote_path);
- }
- break;
+ /* Before traversing SYS_TABLES, let's make sure we have
+ SYS_TABLESPACES and SYS_DATAFILES loaded. */
+ dict_table_t* sys_tablespaces;
+ dict_table_t* sys_datafiles;
+ sys_tablespaces = dict_table_get_low("SYS_TABLESPACES");
+ ut_a(sys_tablespaces != NULL);
+ sys_datafiles = dict_table_get_low("SYS_DATAFILES");
+ ut_a(sys_datafiles != NULL);
- case DICT_CHECK_SOME_LOADED:
- /* Some tablespaces may have been opened in
- trx_resurrect_table_locks(). */
- if (fil_space_for_table_exists_in_mem(
- space_id, name, false,
- false, NULL, 0, flags)) {
- break;
- }
- /* fall through */
- case DICT_CHECK_NONE_LOADED:
- if (discarded) {
- ib_logf(IB_LOG_LEVEL_INFO,
- "DISCARD flag set for table '%s',"
- " ignored.",
- table_name);
- break;
- }
+ const bool validate = recv_needed_recovery
+ && !srv_safe_truncate
+ && !srv_force_recovery;
- /* It is a normal database startup: create the
- space object and check that the .ibd file exists.
- If the table uses a remote tablespace, look for the
- space_id in SYS_DATAFILES to find the filepath */
+ for (rec = dict_startscan_system(&pcur, &mtr, SYS_TABLES);
+ rec != NULL;
+ rec = dict_getnext_system(&pcur, &mtr)) {
+ const byte* field;
+ ulint len;
+ table_id_t table_id;
+ ulint space_id;
+ ulint n_cols;
+ ulint flags;
+ ulint flags2;
- /* Use the remote filepath if known. */
- char* filepath = NULL;
- if (DICT_TF_HAS_DATA_DIR(flags)) {
- filepath = dict_get_first_path(
- space_id, name);
- }
+ /* If a table record is not useable, ignore it and continue
+ on to the next record. Error messages were logged. */
+ if (dict_sys_tables_rec_check(rec) != NULL) {
+ continue;
+ }
- /* We could read page 0 to get (optional) IV
- if encryption is turned on, if it's off
- we will read the page 0 later and find out
- if we should decrypt a potentially
- already encrypted table
- bool read_page_0 = srv_encrypt_tables; */
-
- bool read_page_0 = false;
-
- /* We set the 2nd param (fix_dict = true)
- here because we already have an x-lock on
- dict_operation_lock and dict_sys->mutex. Besides,
- this is at startup and we are now single threaded.
- If the filepath is not known, it will need to
- be discovered. */
- dberr_t err = fil_open_single_table_tablespace(
- read_page_0, srv_read_only_mode ? false : true,
- space_id, dict_tf_to_fsp_flags(flags),
- name, filepath);
-
- if (err != DB_SUCCESS) {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Tablespace open failed for '%s', "
- "ignored.", table_name);
- }
+ /* Copy the table name from rec */
+ field = rec_get_nth_field_old(
+ rec, DICT_FLD__SYS_TABLES__NAME, &len);
- if (filepath) {
- mem_free(filepath);
- }
+ table_name_t table_name(mem_strdupl((char*) field, len));
+ DBUG_PRINT("dict_check_sys_tables",
+ ("name: %p, '%s'", table_name.m_name,
+ table_name.m_name));
+
+ if (!dict_sys_tables_rec_read(rec, table_name,
+ &table_id, &space_id,
+ &n_cols, &flags, &flags2)
+ || space_id == TRX_SYS_SPACE) {
+next:
+ ut_free(table_name.m_name);
+ continue;
+ }
- break;
+ if (srv_safe_truncate
+ && strstr(table_name.m_name, "/" TEMP_FILE_PREFIX "-")) {
+ /* This table will be dropped by
+ row_mysql_drop_garbage_tables().
+ We do not care if the file exists. */
+ goto next;
}
- if (space_id > max_space_id) {
- max_space_id = space_id;
+ if (flags2 & DICT_TF2_DISCARDED) {
+ ib::info() << "Ignoring tablespace for " << table_name
+ << " because the DISCARD flag is set .";
+ goto next;
}
- mem_free(name);
- mtr_start(&mtr);
+ /* For tables or partitions using .ibd files, the flag
+ DICT_TF2_USE_FILE_PER_TABLE was not set in MIX_LEN
+ before MySQL 5.6.5. The flag should not have been
+ introduced in persistent storage. MariaDB will keep
+ setting the flag when writing SYS_TABLES entries for
+ newly created or rebuilt tables or partitions, but
+ will otherwise ignore the flag. */
+
+ /* Now that we have the proper name for this tablespace,
+ look to see if it is already in the tablespace cache. */
+ if (fil_space_for_table_exists_in_mem(
+ space_id, table_name.m_name, flags)) {
+ /* Recovery can open a datafile that does not
+ match SYS_DATAFILES. If they don't match, update
+ SYS_DATAFILES. */
+ char *dict_path = dict_get_first_path(space_id);
+ char *fil_path = fil_space_get_first_path(space_id);
+ if (dict_path && fil_path
+ && strcmp(dict_path, fil_path)) {
+ dict_update_filepath(space_id, fil_path);
+ }
+ ut_free(dict_path);
+ ut_free(fil_path);
+ ut_free(table_name.m_name);
+ continue;
+ }
+
+ /* Set the expected filepath from the data dictionary.
+ If the file is found elsewhere (from an ISL or the default
+ location) or this path is the same file but looks different,
+ fil_ibd_open() will update the dictionary with what is
+ opened. */
+ char* filepath = dict_get_first_path(space_id);
+
+ /* Check that the .ibd file exists. */
+ dberr_t err = fil_ibd_open(
+ validate,
+ !srv_read_only_mode && srv_log_file_size != 0,
+ FIL_TYPE_TABLESPACE,
+ space_id, dict_tf_to_fsp_flags(flags),
+ table_name.m_name,
+ filepath);
+
+ if (err != DB_SUCCESS) {
+ ib::warn() << "Ignoring tablespace for "
+ << table_name
+ << " because it could not be opened.";
+ }
- btr_pcur_restore_position(BTR_SEARCH_LEAF, &pcur, &mtr);
+ max_space_id = ut_max(max_space_id, space_id);
+
+ ut_free(table_name.m_name);
+ ut_free(filepath);
}
- goto loop;
+ mtr_commit(&mtr);
+
+ DBUG_RETURN(max_space_id);
}
-/********************************************************************//**
-Loads a table column definition from a SYS_COLUMNS record to
-dict_table_t.
-@return error message, or NULL on success */
-UNIV_INTERN
+/** Check each tablespace found in the data dictionary.
+Then look at each table defined in SYS_TABLES that has a space_id > 0
+to find all the file-per-table tablespaces.
+
+In a crash recovery we already have some tablespace objects created from
+processing the REDO log. Any other tablespace in SYS_TABLESPACES not
+previously used in recovery will be opened here. We will compare the
+space_id information in the data dictionary to what we find in the
+tablespace file. In addition, more validation will be done if recovery
+was needed and force_recovery is not set.
+
+We also scan the biggest space id, and store it to fil_system. */
+void dict_check_tablespaces_and_store_max_id()
+{
+ mtr_t mtr;
+
+ DBUG_ENTER("dict_check_tablespaces_and_store_max_id");
+
+ rw_lock_x_lock(&dict_operation_lock);
+ mutex_enter(&dict_sys->mutex);
+
+ /* Initialize the max space_id from sys header */
+ mtr_start(&mtr);
+ ulint max_space_id = mtr_read_ulint(
+ dict_hdr_get(&mtr) + DICT_HDR_MAX_SPACE_ID,
+ MLOG_4BYTES, &mtr);
+ mtr_commit(&mtr);
+
+ fil_set_max_space_id_if_bigger(max_space_id);
+
+ /* Open all tablespaces referenced in SYS_TABLES.
+ This will update SYS_TABLESPACES and SYS_DATAFILES if it
+ finds any file-per-table tablespaces not already there. */
+ max_space_id = dict_check_sys_tables();
+ fil_set_max_space_id_if_bigger(max_space_id);
+
+ mutex_exit(&dict_sys->mutex);
+ rw_lock_x_unlock(&dict_operation_lock);
+
+ DBUG_VOID_RETURN;
+}
+
+/** Error message for a delete-marked record in dict_load_column_low() */
+static const char* dict_load_column_del = "delete-marked record in SYS_COLUMN";
+
+/** Load a table column definition from a SYS_COLUMNS record to dict_table_t.
+@return error message
+@retval NULL on success */
+static
const char*
dict_load_column_low(
-/*=================*/
dict_table_t* table, /*!< in/out: table, could be NULL
if we just populate a dict_column_t
struct with information from
@@ -1220,7 +1556,10 @@ dict_load_column_low(
or NULL if table != NULL */
table_id_t* table_id, /*!< out: table id */
const char** col_name, /*!< out: column name */
- const rec_t* rec) /*!< in: SYS_COLUMNS record */
+ const rec_t* rec, /*!< in: SYS_COLUMNS record */
+ ulint* nth_v_col) /*!< out: if not NULL, this
+ records the "n" of "nth" virtual
+ column */
{
char* name;
const byte* field;
@@ -1229,11 +1568,12 @@ dict_load_column_low(
ulint prtype;
ulint col_len;
ulint pos;
+ ulint num_base;
ut_ad(table || column);
if (rec_get_deleted_flag(rec, 0)) {
- return("delete-marked record in SYS_COLUMNS");
+ return(dict_load_column_del);
}
if (rec_get_n_fields_old(rec) != DICT_NUM_FIELDS__SYS_COLUMNS) {
@@ -1256,16 +1596,11 @@ err_len:
field = rec_get_nth_field_old(
rec, DICT_FLD__SYS_COLUMNS__POS, &len);
if (len != 4) {
-
goto err_len;
}
pos = mach_read_from_4(field);
- if (table && table->n_def != pos) {
- return("SYS_COLUMNS.POS mismatch");
- }
-
rec_get_nth_field_offs_old(
rec, DICT_FLD__SYS_COLUMNS__DB_TRX_ID, &len);
if (len != DATA_TRX_ID_LEN && len != UNIV_SQL_NULL) {
@@ -1325,6 +1660,10 @@ err_len:
}
}
+ if (table && table->n_def != pos && !(prtype & DATA_VIRTUAL)) {
+ return("SYS_COLUMNS.POS mismatch");
+ }
+
field = rec_get_nth_field_old(
rec, DICT_FLD__SYS_COLUMNS__LEN, &len);
if (len != 4) {
@@ -1336,15 +1675,124 @@ err_len:
if (len != 4) {
goto err_len;
}
+ num_base = mach_read_from_4(field);
- if (!column) {
- dict_mem_table_add_col(table, heap, name, mtype,
- prtype, col_len);
+ if (column == NULL) {
+ if (prtype & DATA_VIRTUAL) {
+#ifdef UNIV_DEBUG
+ dict_v_col_t* vcol =
+#endif
+ dict_mem_table_add_v_col(
+ table, heap, name, mtype,
+ prtype, col_len,
+ dict_get_v_col_mysql_pos(pos), num_base);
+ ut_ad(vcol->v_pos == dict_get_v_col_pos(pos));
+ } else {
+ ut_ad(num_base == 0);
+ dict_mem_table_add_col(table, heap, name, mtype,
+ prtype, col_len);
+ }
} else {
dict_mem_fill_column_struct(column, pos, mtype,
prtype, col_len);
}
+ /* Report the virtual column number */
+ if ((prtype & DATA_VIRTUAL) && nth_v_col != NULL) {
+ *nth_v_col = dict_get_v_col_pos(pos);
+ }
+
+ return(NULL);
+}
+
+/** Error message for a delete-marked record in dict_load_virtual_low() */
+static const char* dict_load_virtual_del = "delete-marked record in SYS_VIRTUAL";
+
+/** Load a virtual column "mapping" (to base columns) information
+from a SYS_VIRTUAL record
+@param[in,out] table table
+@param[in,out] heap memory heap
+@param[in,out] column mapped base column's dict_column_t
+@param[in,out] table_id table id
+@param[in,out] pos virtual column position
+@param[in,out] base_pos base column position
+@param[in] rec SYS_VIRTUAL record
+@return error message
+@retval NULL on success */
+static
+const char*
+dict_load_virtual_low(
+ dict_table_t* table,
+ mem_heap_t* heap,
+ dict_col_t** column,
+ table_id_t* table_id,
+ ulint* pos,
+ ulint* base_pos,
+ const rec_t* rec)
+{
+ const byte* field;
+ ulint len;
+ ulint base;
+
+ if (rec_get_deleted_flag(rec, 0)) {
+ return(dict_load_virtual_del);
+ }
+
+ if (rec_get_n_fields_old(rec) != DICT_NUM_FIELDS__SYS_VIRTUAL) {
+ return("wrong number of columns in SYS_VIRTUAL record");
+ }
+
+ field = rec_get_nth_field_old(
+ rec, DICT_FLD__SYS_VIRTUAL__TABLE_ID, &len);
+ if (len != 8) {
+err_len:
+ return("incorrect column length in SYS_VIRTUAL");
+ }
+
+ if (table_id != NULL) {
+ *table_id = mach_read_from_8(field);
+ } else if (table->id != mach_read_from_8(field)) {
+ return("SYS_VIRTUAL.TABLE_ID mismatch");
+ }
+
+ field = rec_get_nth_field_old(
+ rec, DICT_FLD__SYS_VIRTUAL__POS, &len);
+ if (len != 4) {
+ goto err_len;
+ }
+
+ if (pos != NULL) {
+ *pos = mach_read_from_4(field);
+ }
+
+ field = rec_get_nth_field_old(
+ rec, DICT_FLD__SYS_VIRTUAL__BASE_POS, &len);
+ if (len != 4) {
+ goto err_len;
+ }
+
+ base = mach_read_from_4(field);
+
+ if (base_pos != NULL) {
+ *base_pos = base;
+ }
+
+ rec_get_nth_field_offs_old(
+ rec, DICT_FLD__SYS_VIRTUAL__DB_TRX_ID, &len);
+ if (len != DATA_TRX_ID_LEN && len != UNIV_SQL_NULL) {
+ goto err_len;
+ }
+
+ rec_get_nth_field_offs_old(
+ rec, DICT_FLD__SYS_VIRTUAL__DB_ROLL_PTR, &len);
+ if (len != DATA_ROLL_PTR_LEN && len != UNIV_SQL_NULL) {
+ goto err_len;
+ }
+
+ if (column != NULL) {
+ *column = dict_table_get_nth_col(table, base);
+ }
+
return(NULL);
}
@@ -1367,8 +1815,9 @@ dict_load_columns(
byte* buf;
ulint i;
mtr_t mtr;
+ ulint n_skipped = 0;
- ut_ad(mutex_own(&(dict_sys->mutex)));
+ ut_ad(mutex_own(&dict_sys->mutex));
mtr_start(&mtr);
@@ -1392,26 +1841,37 @@ dict_load_columns(
btr_pcur_open_on_user_rec(sys_index, tuple, PAGE_CUR_GE,
BTR_SEARCH_LEAF, &pcur, &mtr);
- for (i = 0; i + DATA_N_SYS_COLS < (ulint) table->n_cols; i++) {
+
+ ut_ad(table->n_t_cols == static_cast<ulint>(
+ table->n_cols) + static_cast<ulint>(table->n_v_cols));
+
+ for (i = 0;
+ i + DATA_N_SYS_COLS < table->n_t_cols + n_skipped;
+ i++) {
const char* err_msg;
const char* name = NULL;
+ ulint nth_v_col = ULINT_UNDEFINED;
rec = btr_pcur_get_rec(&pcur);
ut_a(btr_pcur_is_on_user_rec(&pcur));
err_msg = dict_load_column_low(table, heap, NULL, NULL,
- &name, rec);
+ &name, rec, &nth_v_col);
- if (err_msg) {
- fprintf(stderr, "InnoDB: %s\n", err_msg);
- ut_error;
+ if (err_msg == dict_load_column_del) {
+ n_skipped++;
+ goto next_rec;
+ } else if (err_msg) {
+ ib::fatal() << err_msg;
}
/* Note: Currently we have one DOC_ID column that is
- shared by all FTS indexes on a table. */
+ shared by all FTS indexes on a table. And only non-virtual
+ column can be used for FULLTEXT index */
if (innobase_strcasecmp(name,
- FTS_DOC_ID_COL_NAME) == 0) {
+ FTS_DOC_ID_COL_NAME) == 0
+ && nth_v_col == ULINT_UNDEFINED) {
dict_col_t* col;
/* As part of normal loading of tables the
table->flag is not set for tables with FTS
@@ -1428,7 +1888,7 @@ dict_load_columns(
ut_a(table->fts->doc_col == ULINT_UNDEFINED);
- col = dict_table_get_nth_col(table, i);
+ col = dict_table_get_nth_col(table, i - n_skipped);
ut_ad(col->len == sizeof(doc_id_t));
@@ -1439,7 +1899,103 @@ dict_load_columns(
table, DICT_TF2_FTS_ADD_DOC_ID);
}
- table->fts->doc_col = i;
+ table->fts->doc_col = i - n_skipped;
+ }
+next_rec:
+ btr_pcur_move_to_next_user_rec(&pcur, &mtr);
+ }
+
+ btr_pcur_close(&pcur);
+ mtr_commit(&mtr);
+}
+
+/** Loads SYS_VIRTUAL info for one virtual column
+@param[in,out] table table
+@param[in] nth_v_col virtual column sequence num
+@param[in,out] v_col virtual column
+@param[in,out] heap memory heap
+*/
+static
+void
+dict_load_virtual_one_col(
+ dict_table_t* table,
+ ulint nth_v_col,
+ dict_v_col_t* v_col,
+ mem_heap_t* heap)
+{
+ dict_table_t* sys_virtual;
+ dict_index_t* sys_virtual_index;
+ btr_pcur_t pcur;
+ dtuple_t* tuple;
+ dfield_t* dfield;
+ const rec_t* rec;
+ byte* buf;
+ ulint i = 0;
+ mtr_t mtr;
+ ulint skipped = 0;
+
+ ut_ad(mutex_own(&dict_sys->mutex));
+
+ if (v_col->num_base == 0) {
+ return;
+ }
+
+ mtr_start(&mtr);
+
+ sys_virtual = dict_table_get_low("SYS_VIRTUAL");
+ sys_virtual_index = UT_LIST_GET_FIRST(sys_virtual->indexes);
+ ut_ad(!dict_table_is_comp(sys_virtual));
+
+ ut_ad(name_of_col_is(sys_virtual, sys_virtual_index,
+ DICT_FLD__SYS_VIRTUAL__POS, "POS"));
+
+ tuple = dtuple_create(heap, 2);
+
+ /* table ID field */
+ dfield = dtuple_get_nth_field(tuple, 0);
+
+ buf = static_cast<byte*>(mem_heap_alloc(heap, 8));
+ mach_write_to_8(buf, table->id);
+
+ dfield_set_data(dfield, buf, 8);
+
+ /* virtual column pos field */
+ dfield = dtuple_get_nth_field(tuple, 1);
+
+ buf = static_cast<byte*>(mem_heap_alloc(heap, 4));
+ ulint vcol_pos = dict_create_v_col_pos(nth_v_col, v_col->m_col.ind);
+ mach_write_to_4(buf, vcol_pos);
+
+ dfield_set_data(dfield, buf, 4);
+
+ dict_index_copy_types(tuple, sys_virtual_index, 2);
+
+ btr_pcur_open_on_user_rec(sys_virtual_index, tuple, PAGE_CUR_GE,
+ BTR_SEARCH_LEAF, &pcur, &mtr);
+
+ for (i = 0; i < v_col->num_base + skipped; i++) {
+ const char* err_msg;
+ ulint pos;
+
+ ut_ad(btr_pcur_is_on_user_rec(&pcur));
+
+ rec = btr_pcur_get_rec(&pcur);
+
+ ut_a(btr_pcur_is_on_user_rec(&pcur));
+
+ err_msg = dict_load_virtual_low(table, heap,
+ &v_col->base_col[i - skipped],
+ NULL,
+ &pos, NULL, rec);
+
+ if (err_msg) {
+ if (err_msg != dict_load_virtual_del) {
+ ib::fatal() << err_msg;
+ } else {
+ skipped++;
+ }
+ } else {
+ ut_ad(pos == vcol_pos);
}
btr_pcur_move_to_next_user_rec(&pcur, &mtr);
@@ -1449,17 +2005,32 @@ dict_load_columns(
mtr_commit(&mtr);
}
+/** Loads info from SYS_VIRTUAL for virtual columns.
+@param[in,out] table table
+@param[in] heap memory heap
+*/
+static
+void
+dict_load_virtual(
+ dict_table_t* table,
+ mem_heap_t* heap)
+{
+ for (ulint i = 0; i < table->n_v_cols; i++) {
+ dict_v_col_t* v_col = dict_table_get_nth_v_col(table, i);
+
+ dict_load_virtual_one_col(table, i, v_col, heap);
+ }
+}
+
/** Error message for a delete-marked record in dict_load_field_low() */
static const char* dict_load_field_del = "delete-marked record in SYS_FIELDS";
-/********************************************************************//**
-Loads an index field definition from a SYS_FIELDS record to
-dict_index_t.
-@return error message, or NULL on success */
-UNIV_INTERN
+/** Load an index field definition from a SYS_FIELDS record to dict_index_t.
+@return error message
+@retval NULL on success */
+static
const char*
dict_load_field_low(
-/*================*/
byte* index_id, /*!< in/out: index id (8 bytes)
an "in" value if index != NULL
and "out" if index == NULL */
@@ -1477,8 +2048,8 @@ dict_load_field_low(
{
const byte* field;
ulint len;
- ulint pos_and_prefix_len;
- ulint prefix_len;
+ unsigned pos_and_prefix_len;
+ unsigned prefix_len;
ibool first_field;
ulint position;
@@ -1596,7 +2167,7 @@ dict_load_fields(
mtr_t mtr;
dberr_t error;
- ut_ad(mutex_own(&(dict_sys->mutex)));
+ ut_ad(mutex_own(&dict_sys->mutex));
mtr_start(&mtr);
@@ -1634,7 +2205,7 @@ dict_load_fields(
goto next_rec;
} else if (err_msg) {
- fprintf(stderr, "InnoDB: %s\n", err_msg);
+ ib::error() << err_msg;
error = DB_CORRUPTION;
goto func_exit;
}
@@ -1653,17 +2224,18 @@ func_exit:
static const char* dict_load_index_del = "delete-marked record in SYS_INDEXES";
/** Error message for table->id mismatch in dict_load_index_low() */
static const char* dict_load_index_id_err = "SYS_INDEXES.TABLE_ID mismatch";
+/** Error message for SYS_TABLES flags mismatch in dict_load_table_low() */
+static const char* dict_load_table_flags = "incorrect flags in SYS_TABLES";
-/********************************************************************//**
-Loads an index definition from a SYS_INDEXES record to dict_index_t.
+/** Load an index definition from a SYS_INDEXES record to dict_index_t.
If allocate=TRUE, we will create a dict_index_t structure and fill it
accordingly. If allocated=FALSE, the dict_index_t will be supplied by
-the caller and filled with information read from the record. @return
-error message, or NULL on success */
-UNIV_INTERN
+the caller and filled with information read from the record.
+@return error message
+@retval NULL on success */
+static
const char*
dict_load_index_low(
-/*================*/
byte* table_id, /*!< in/out: table id (8 bytes),
an "in" value if allocate=TRUE
and "out" when allocate=FALSE */
@@ -1683,6 +2255,7 @@ dict_load_index_low(
ulint n_fields;
ulint type;
ulint space;
+ unsigned merge_threshold;
if (allocate) {
/* If allocate=TRUE, no dict_index_t will
@@ -1694,7 +2267,27 @@ dict_load_index_low(
return(dict_load_index_del);
}
- if (rec_get_n_fields_old(rec) != DICT_NUM_FIELDS__SYS_INDEXES) {
+ if (rec_get_n_fields_old(rec) == DICT_NUM_FIELDS__SYS_INDEXES) {
+ /* MERGE_THRESHOLD exists */
+ field = rec_get_nth_field_old(
+ rec, DICT_FLD__SYS_INDEXES__MERGE_THRESHOLD, &len);
+ switch (len) {
+ case 4:
+ merge_threshold = mach_read_from_4(field);
+ break;
+ case UNIV_SQL_NULL:
+ merge_threshold = DICT_INDEX_MERGE_THRESHOLD_DEFAULT;
+ break;
+ default:
+ return("incorrect MERGE_THRESHOLD length"
+ " in SYS_INDEXES");
+ }
+ } else if (rec_get_n_fields_old(rec)
+ == DICT_NUM_FIELDS__SYS_INDEXES - 1) {
+ /* MERGE_THRESHOLD doesn't exist */
+
+ merge_threshold = DICT_INDEX_MERGE_THRESHOLD_DEFAULT;
+ } else {
return("wrong number of columns in SYS_INDEXES record");
}
@@ -1785,6 +2378,7 @@ err_len:
(*index)->id = id;
(*index)->page = mach_read_from_4(field);
ut_ad((*index)->page);
+ (*index)->merge_threshold = merge_threshold;
return(NULL);
}
@@ -1814,7 +2408,7 @@ dict_load_indexes(
mtr_t mtr;
dberr_t error = DB_SUCCESS;
- ut_ad(mutex_own(&(dict_sys->mutex)));
+ ut_ad(mutex_own(&dict_sys->mutex));
mtr_start(&mtr);
@@ -1849,11 +2443,10 @@ dict_load_indexes(
for drop table */
if (dict_table_get_first_index(table) == NULL
&& !(ignore_err & DICT_ERR_IGNORE_CORRUPT)) {
- ib_logf(IB_LOG_LEVEL_WARN,
- "Cannot load table %s "
- "because it has no indexes in "
- "InnoDB internal data dictionary.",
- table->name);
+ ib::warn() << "Cannot load table "
+ << table->name
+ << " because it has no indexes in"
+ " InnoDB internal data dictionary.";
error = DB_CORRUPTION;
goto func_exit;
}
@@ -1864,24 +2457,30 @@ dict_load_indexes(
rec = btr_pcur_get_rec(&pcur);
if ((ignore_err & DICT_ERR_IGNORE_RECOVER_LOCK)
- && rec_get_n_fields_old(rec)
- == DICT_NUM_FIELDS__SYS_INDEXES) {
+ && (rec_get_n_fields_old(rec)
+ == DICT_NUM_FIELDS__SYS_INDEXES
+ /* a record for older SYS_INDEXES table
+ (missing merge_threshold column) is acceptable. */
+ || rec_get_n_fields_old(rec)
+ == DICT_NUM_FIELDS__SYS_INDEXES - 1)) {
const byte* field;
ulint len;
field = rec_get_nth_field_old(
rec, DICT_FLD__SYS_INDEXES__NAME, &len);
if (len != UNIV_SQL_NULL
- && char(*field) == char(TEMP_INDEX_PREFIX)) {
+ && static_cast<char>(*field)
+ == static_cast<char>(*TEMP_INDEX_PREFIX_STR)) {
/* Skip indexes whose name starts with
- TEMP_INDEX_PREFIX, because they will
- be dropped during crash recovery. */
+ TEMP_INDEX_PREFIX_STR, because they will
+ be dropped by row_merge_drop_temp_indexes()
+ during crash recovery. */
goto next_rec;
}
}
- err_msg = dict_load_index_low(buf, table->name, heap, rec,
- TRUE, &index);
+ err_msg = dict_load_index_low(
+ buf, table->name.m_name, heap, rec, TRUE, &index);
ut_ad((index == NULL && err_msg != NULL)
|| (index != NULL && err_msg == NULL));
@@ -1891,13 +2490,15 @@ dict_load_indexes(
if (dict_table_get_first_index(table) == NULL
&& !(ignore_err & DICT_ERR_IGNORE_CORRUPT)) {
- ib_logf(IB_LOG_LEVEL_WARN,
- "Failed to load the "
- "clustered index for table %s "
- "because of the following error: %s. "
- "Refusing to load the rest of the "
- "indexes (if any) and the whole table "
- "altogether.", table->name, err_msg);
+
+ ib::warn() << "Failed to load the"
+ " clustered index for table "
+ << table->name
+ << " because of the following error: "
+ << err_msg << "."
+ " Refusing to load the rest of the"
+ " indexes (if any) and the whole table"
+ " altogether.";
error = DB_CORRUPTION;
goto func_exit;
}
@@ -1907,7 +2508,7 @@ dict_load_indexes(
/* Skip delete-marked records. */
goto next_rec;
} else if (err_msg) {
- fprintf(stderr, "InnoDB: %s\n", err_msg);
+ ib::error() << err_msg;
if (ignore_err & DICT_ERR_IGNORE_CORRUPT) {
goto next_rec;
}
@@ -1916,13 +2517,13 @@ dict_load_indexes(
}
ut_ad(index);
+ ut_ad(!dict_index_is_online_ddl(index));
/* Check whether the index is corrupted */
- if (dict_index_is_corrupted(index)) {
- ut_print_timestamp(stderr);
- fputs(" InnoDB: ", stderr);
- dict_index_name_print(stderr, NULL, index);
- fputs(" is corrupted\n", stderr);
+ if (index->is_corrupted()) {
+ ib::error() << "Index " << index->name
+ << " of table " << table->name
+ << " is corrupted";
if (!srv_load_corrupted
&& !(ignore_err & DICT_ERR_IGNORE_CORRUPT)
@@ -1938,15 +2539,14 @@ dict_load_indexes(
DICT_ERR_IGNORE_CORRUPT
3) if the index corrupted is a secondary
index */
- ut_print_timestamp(stderr);
- fputs(" InnoDB: load corrupted index ", stderr);
- dict_index_name_print(stderr, NULL, index);
- putc('\n', stderr);
+ ib::info() << "Load corrupted index "
+ << index->name
+ << " of table " << table->name;
}
}
if (index->type & DICT_FTS
- && !DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS)) {
+ && !dict_table_has_fts_index(table)) {
/* This should have been created by now. */
ut_a(table->fts != NULL);
DICT_TF2_FLAG_SET(table, DICT_TF2_FTS);
@@ -1955,23 +2555,23 @@ dict_load_indexes(
/* We check for unsupported types first, so that the
subsequent checks are relevant for the supported types. */
if (index->type & ~(DICT_CLUSTERED | DICT_UNIQUE
- | DICT_CORRUPT | DICT_FTS)) {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Unknown type %lu of index %s of table %s",
- (ulong) index->type, index->name, table->name);
+ | DICT_CORRUPT | DICT_FTS
+ | DICT_SPATIAL | DICT_VIRTUAL)) {
+
+ ib::error() << "Unknown type " << index->type
+ << " of index " << index->name
+ << " of table " << table->name;
error = DB_UNSUPPORTED;
dict_mem_index_free(index);
goto func_exit;
} else if (index->page == FIL_NULL
- && !table->file_unreadable
+ && table->is_readable()
&& (!(index->type & DICT_FTS))) {
- fprintf(stderr,
- "InnoDB: Error: trying to load index %s"
- " for table %s\n"
- "InnoDB: but the index tree has been freed!\n",
- index->name, table->name);
+ ib::error() << "Trying to load index " << index->name
+ << " for table " << table->name
+ << ", but the index tree has been freed!";
if (ignore_err & DICT_ERR_IGNORE_INDEX_ROOT) {
/* If caller can tolerate this error,
@@ -1982,12 +2582,11 @@ dict_load_indexes(
dictionary cache for such metadata corruption,
since we would always be able to set it
when loading the dictionary cache */
- dict_set_corrupted_index_cache_only(
- index, table);
+ index->table = table;
+ dict_set_corrupted_index_cache_only(index);
- fprintf(stderr,
- "InnoDB: Index is corrupt but forcing"
- " load into data dictionary\n");
+ ib::info() << "Index is corrupt but forcing"
+ " load into data dictionary";
} else {
corrupted:
dict_mem_index_free(index);
@@ -1997,13 +2596,9 @@ corrupted:
} else if (!dict_index_is_clust(index)
&& NULL == dict_table_get_first_index(table)) {
- fputs("InnoDB: Error: trying to load index ",
- stderr);
- ut_print_name(stderr, NULL, FALSE, index->name);
- fputs(" for table ", stderr);
- ut_print_name(stderr, NULL, TRUE, table->name);
- fputs("\nInnoDB: but the first index"
- " is not clustered!\n", stderr);
+ ib::error() << "Trying to load index " << index->name
+ << " for table " << table->name
+ << ", but the first index is not clustered!";
goto corrupted;
} else if (dict_is_sys_table(table->id)
@@ -2034,8 +2629,16 @@ next_rec:
btr_pcur_move_to_next_user_rec(&pcur, &mtr);
}
+ ut_ad(table->fts_doc_id_index == NULL);
+
+ if (table->fts != NULL) {
+ table->fts_doc_id_index = dict_table_get_index_on_name(
+ table, FTS_DOC_ID_INDEX_NAME);
+ }
+
/* If the table contains FTS indexes, populate table->fts->indexes */
- if (DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS)) {
+ if (dict_table_has_fts_index(table)) {
+ ut_ad(table->fts_doc_id_index != NULL);
/* table->fts->indexes should have been created. */
ut_a(table->fts->indexes != NULL);
dict_table_get_all_fts_indexes(table, table->fts->indexes);
@@ -2048,154 +2651,40 @@ func_exit:
return(error);
}
-/********************************************************************//**
-Loads a table definition from a SYS_TABLES record to dict_table_t.
-Does not load any columns or indexes.
-@return error message, or NULL on success */
-UNIV_INTERN
-const char*
-dict_load_table_low(
-/*================*/
- const char* name, /*!< in: table name */
- const rec_t* rec, /*!< in: SYS_TABLES record */
- dict_table_t** table) /*!< out,own: table, or NULL */
+/** Load a table definition from a SYS_TABLES record to dict_table_t.
+Do not load any columns or indexes.
+@param[in] name Table name
+@param[in] rec SYS_TABLES record
+@param[out,own] table table, or NULL
+@return error message
+@retval NULL on success */
+static const char* dict_load_table_low(const table_name_t& name,
+ const rec_t* rec, dict_table_t** table)
{
- const byte* field;
- ulint len;
- ulint space;
+ table_id_t table_id;
+ ulint space_id;
ulint n_cols;
- ulint flags = 0;
+ ulint t_num;
+ ulint flags;
ulint flags2;
+ ulint n_v_col;
- if (rec_get_deleted_flag(rec, 0)) {
+ if (const char* error_text = dict_sys_tables_rec_check(rec)) {
*table = NULL;
- return("delete-marked record in SYS_TABLES");
- }
-
- if (rec_get_n_fields_old(rec) != DICT_NUM_FIELDS__SYS_TABLES) {
- *table = NULL;
- return("wrong number of columns in SYS_TABLES record");
- }
-
- rec_get_nth_field_offs_old(
- rec, DICT_FLD__SYS_TABLES__NAME, &len);
- if (len == 0 || len == UNIV_SQL_NULL) {
-err_len:
- *table = NULL;
- return("incorrect column length in SYS_TABLES");
- }
- rec_get_nth_field_offs_old(
- rec, DICT_FLD__SYS_TABLES__DB_TRX_ID, &len);
- if (len != DATA_TRX_ID_LEN && len != UNIV_SQL_NULL) {
- goto err_len;
- }
- rec_get_nth_field_offs_old(
- rec, DICT_FLD__SYS_TABLES__DB_ROLL_PTR, &len);
- if (len != DATA_ROLL_PTR_LEN && len != UNIV_SQL_NULL) {
- goto err_len;
- }
-
- rec_get_nth_field_offs_old(rec, DICT_FLD__SYS_TABLES__ID, &len);
- if (len != 8) {
- goto err_len;
- }
-
- field = rec_get_nth_field_old(
- rec, DICT_FLD__SYS_TABLES__N_COLS, &len);
- if (len != 4) {
- goto err_len;
- }
-
- n_cols = mach_read_from_4(field);
-
- rec_get_nth_field_offs_old(rec, DICT_FLD__SYS_TABLES__TYPE, &len);
- if (len != 4) {
- goto err_len;
- }
-
- rec_get_nth_field_offs_old(
- rec, DICT_FLD__SYS_TABLES__MIX_ID, &len);
- if (len != 8) {
- goto err_len;
+ return(error_text);
}
- field = rec_get_nth_field_old(
- rec, DICT_FLD__SYS_TABLES__MIX_LEN, &len);
- if (len != 4) {
- goto err_len;
- }
-
- /* MIX_LEN may hold additional flags in post-antelope file formats. */
- flags2 = mach_read_from_4(field);
-
- /* DICT_TF2_FTS will be set when indexes is being loaded */
- flags2 &= ~DICT_TF2_FTS;
-
- rec_get_nth_field_offs_old(
- rec, DICT_FLD__SYS_TABLES__CLUSTER_ID, &len);
- if (len != UNIV_SQL_NULL) {
- goto err_len;
- }
-
- field = rec_get_nth_field_old(
- rec, DICT_FLD__SYS_TABLES__SPACE, &len);
- if (len != 4) {
- goto err_len;
- }
-
- space = mach_read_from_4(field);
-
- /* Check if the tablespace exists and has the right name */
- flags = dict_sys_tables_get_flags(rec);
-
- if (UNIV_UNLIKELY(flags == ULINT_UNDEFINED)) {
- field = rec_get_nth_field_old(
- rec, DICT_FLD__SYS_TABLES__TYPE, &len);
- ut_ad(len == 4); /* this was checked earlier */
- flags = mach_read_from_4(field);
-
- ut_print_timestamp(stderr);
- fputs(" InnoDB: Error: table ", stderr);
- ut_print_filename(stderr, name);
- fprintf(stderr, "\n"
- "InnoDB: in InnoDB data dictionary"
- " has unknown type %lx.\n",
- (ulong) flags);
+ if (!dict_sys_tables_rec_read(rec, name, &table_id, &space_id,
+ &t_num, &flags, &flags2)) {
*table = NULL;
- return("incorrect flags in SYS_TABLES");
+ return(dict_load_table_flags);
}
- /* The high-order bit of N_COLS is the "compact format" flag.
- For tables in that format, MIX_LEN may hold additional flags. */
- if (n_cols & DICT_N_COLS_COMPACT) {
- ut_ad(flags & DICT_TF_COMPACT);
-
- if (flags2 & ~DICT_TF2_BIT_MASK) {
- ut_print_timestamp(stderr);
- fputs(" InnoDB: Warning: table ", stderr);
- ut_print_filename(stderr, name);
- fprintf(stderr, "\n"
- "InnoDB: in InnoDB data dictionary"
- " has unknown flags %lx.\n",
- (ulong) flags2);
-
- /* Clean it up and keep going */
- flags2 &= DICT_TF2_BIT_MASK;
- }
- } else {
- /* Do not trust the MIX_LEN field when the
- row format is Redundant. */
- flags2 = 0;
- }
+ dict_table_decode_n_col(t_num, &n_cols, &n_v_col);
- /* See if the tablespace is available. */
*table = dict_mem_table_create(
- name, space, n_cols & ~DICT_N_COLS_COMPACT, flags, flags2);
-
- field = rec_get_nth_field_old(rec, DICT_FLD__SYS_TABLES__ID, &len);
- ut_ad(len == 8); /* this was checked earlier */
-
- (*table)->id = mach_read_from_8(field);
+ name.m_name, space_id, n_cols + n_v_col, n_v_col, flags, flags2);
+ (*table)->id = table_id;
(*table)->file_unreadable = false;
return(NULL);
@@ -2207,65 +2696,71 @@ table->data_dir_path and replace the 'databasename/tablename.ibd'
portion with 'tablename'.
This allows SHOW CREATE TABLE to return the correct DATA DIRECTORY path.
Make this data directory path only if it has not yet been saved. */
-UNIV_INTERN
+static
void
dict_save_data_dir_path(
/*====================*/
dict_table_t* table, /*!< in/out: table */
char* filepath) /*!< in: filepath of tablespace */
{
- ut_ad(mutex_own(&(dict_sys->mutex)));
+ ut_ad(mutex_own(&dict_sys->mutex));
ut_a(DICT_TF_HAS_DATA_DIR(table->flags));
ut_a(!table->data_dir_path);
ut_a(filepath);
/* Be sure this filepath is not the default filepath. */
- char* default_filepath = fil_make_ibd_name(table->name, false);
- if (strcmp(filepath, default_filepath)) {
- ulint pathlen = strlen(filepath);
- ut_a(pathlen < OS_FILE_MAX_PATH);
- ut_a(0 == strcmp(filepath + pathlen - 4, ".ibd"));
-
- table->data_dir_path = mem_heap_strdup(table->heap, filepath);
- os_file_make_data_dir_path(table->data_dir_path);
- } else {
- /* This does not change SYS_DATAFILES or SYS_TABLES
- or FSP_FLAGS on the header page of the tablespace,
- but it makes dict_table_t consistent */
- table->flags &= ~DICT_TF_MASK_DATA_DIR;
+ char* default_filepath = fil_make_filepath(
+ NULL, table->name.m_name, IBD, false);
+ if (default_filepath) {
+ if (0 != strcmp(filepath, default_filepath)) {
+ ulint pathlen = strlen(filepath);
+ ut_a(pathlen < OS_FILE_MAX_PATH);
+ ut_a(0 == strcmp(filepath + pathlen - 4, DOT_IBD));
+
+ table->data_dir_path = mem_heap_strdup(
+ table->heap, filepath);
+ os_file_make_data_dir_path(table->data_dir_path);
+ }
+
+ ut_free(default_filepath);
}
- mem_free(default_filepath);
}
-/*****************************************************************//**
-Make sure the data_file_name is saved in dict_table_t if needed. Try to
-read it from the file dictionary first, then from SYS_DATAFILES. */
-UNIV_INTERN
+/** Make sure the data_dir_path is saved in dict_table_t if DATA DIRECTORY
+was used. Try to read it from the fil_system first, then from SYS_DATAFILES.
+@param[in] table Table object
+@param[in] dict_mutex_own true if dict_sys->mutex is owned already */
void
dict_get_and_save_data_dir_path(
-/*============================*/
- dict_table_t* table, /*!< in/out: table */
- bool dict_mutex_own) /*!< in: true if dict_sys->mutex
- is owned already */
+ dict_table_t* table,
+ bool dict_mutex_own)
{
- bool is_temp = DICT_TF2_FLAG_IS_SET(table, DICT_TF2_TEMPORARY);
+ ut_ad(!dict_table_is_temporary(table));
- if (!is_temp && !table->data_dir_path && table->space) {
+ if (!table->data_dir_path && table->space) {
char* path = fil_space_get_first_path(table->space);
if (!dict_mutex_own) {
dict_mutex_enter_for_mysql();
}
- if (!path) {
- path = dict_get_first_path(
- table->space, table->name);
+
+ if (path == NULL) {
+ path = dict_get_first_path(table->space);
}
- if (path) {
+ if (path != NULL) {
table->flags |= (1 << DICT_TF_POS_DATA_DIR);
dict_save_data_dir_path(table, path);
- mem_free(path);
+ ut_free(path);
+ }
+
+ if (table->data_dir_path == NULL) {
+ /* Since we did not set the table data_dir_path,
+ unset the flag. This does not change SYS_DATAFILES
+ or SYS_TABLES or FSP_FLAGS on the header page of the
+ tablespace, but it makes dict_table_t consistent. */
+ table->flags &= ~DICT_TF_MASK_DATA_DIR;
}
if (!dict_mutex_own) {
@@ -2274,25 +2769,141 @@ dict_get_and_save_data_dir_path(
}
}
-/********************************************************************//**
-Loads a table definition and also all its index definitions, and also
+/** Loads a table definition and also all its index definitions, and also
the cluster definition if the table is a member in a cluster. Also loads
all foreign key constraints where the foreign key is in the table or where
-a foreign key references columns in this table. Adds all these to the data
-dictionary cache.
+a foreign key references columns in this table.
+@param[in] name Table name in the dbname/tablename format
+@param[in] ignore_err Error to be ignored when loading
+ table and its index definition
+@return table, NULL if does not exist; if the table is stored in an
+.ibd file, but the file does not exist, then we set the file_unreadable
+flag in the table object we return. */
+dict_table_t* dict_load_table(const char* name, dict_err_ignore_t ignore_err)
+{
+ dict_names_t fk_list;
+ dict_table_t* result;
+ dict_names_t::iterator i;
+
+ DBUG_ENTER("dict_load_table");
+ DBUG_PRINT("dict_load_table", ("loading table: '%s'", name));
+
+ ut_ad(mutex_own(&dict_sys->mutex));
+
+ result = dict_table_check_if_in_cache_low(name);
+
+ if (!result) {
+ result = dict_load_table_one(const_cast<char*>(name),
+ ignore_err, fk_list);
+ while (!fk_list.empty()) {
+ if (!dict_table_check_if_in_cache_low(fk_list.front()))
+ dict_load_table_one(
+ const_cast<char*>(fk_list.front()),
+ ignore_err, fk_list);
+ fk_list.pop_front();
+ }
+ }
+
+ DBUG_RETURN(result);
+}
+
+/** Opens a tablespace for dict_load_table_one()
+@param[in,out] table A table that refers to the tablespace to open
+@param[in] ignore_err Whether to ignore an error. */
+UNIV_INLINE
+void
+dict_load_tablespace(
+ dict_table_t* table,
+ dict_err_ignore_t ignore_err)
+{
+ ut_ad(!dict_table_is_temporary(table));
+
+ /* The system tablespace is always available. */
+ if (is_system_tablespace(table->space)) {
+ return;
+ }
+
+ if (table->flags2 & DICT_TF2_DISCARDED) {
+ ib::warn() << "Tablespace for table " << table->name
+ << " is set as discarded.";
+ table->file_unreadable = true;
+ return;
+ }
+
+ char* space_name = table->name.m_name;
+
+ /* The tablespace may already be open. */
+ if (fil_space_for_table_exists_in_mem(
+ table->space, space_name, table->flags)) {
+ return;
+ }
+
+ if (ignore_err == DICT_ERR_IGNORE_DROP) {
+ table->file_unreadable = true;
+ return;
+ }
+
+ if (!(ignore_err & DICT_ERR_IGNORE_RECOVER_LOCK)) {
+ ib::error() << "Failed to find tablespace for table "
+ << table->name << " in the cache. Attempting"
+ " to load the tablespace with space id "
+ << table->space;
+ }
+
+ /* Use the remote filepath if needed. This parameter is optional
+ in the call to fil_ibd_open(). If not supplied, it will be built
+ from the space_name. */
+ char* filepath = NULL;
+ if (DICT_TF_HAS_DATA_DIR(table->flags)) {
+ /* This will set table->data_dir_path from either
+ fil_system or SYS_DATAFILES */
+ dict_get_and_save_data_dir_path(table, true);
+
+ if (table->data_dir_path) {
+ filepath = fil_make_filepath(
+ table->data_dir_path,
+ table->name.m_name, IBD, true);
+ }
+ }
+
+ /* Try to open the tablespace. We set the 2nd param (fix_dict) to
+ false because we do not have an x-lock on dict_operation_lock */
+ dberr_t err = fil_ibd_open(
+ true, false, FIL_TYPE_TABLESPACE, table->space,
+ dict_tf_to_fsp_flags(table->flags),
+ space_name, filepath);
+
+ if (err != DB_SUCCESS) {
+ /* We failed to find a sensible tablespace file */
+ table->file_unreadable = true;
+ }
+
+ ut_free(filepath);
+}
+
+/** Loads a table definition and also all its index definitions.
+
+Loads those foreign key constraints whose referenced table is already in
+dictionary cache. If a foreign key constraint is not loaded, then the
+referenced table is pushed into the output stack (fk_tables), if it is not
+NULL. These tables must be subsequently loaded so that all the foreign
+key constraints are loaded into memory.
+
+@param[in] name Table name in the db/tablename format
+@param[in] ignore_err Error to be ignored when loading table
+ and its index definition
+@param[out] fk_tables Related table names that must also be
+ loaded to ensure that all foreign key
+ constraints are loaded.
@return table, NULL if does not exist; if the table is stored in an
.ibd file, but the file does not exist, then we set the
-ibd_file_missing flag TRUE in the table object we return */
-UNIV_INTERN
+file_unreadable flag in the table object we return */
+static
dict_table_t*
-dict_load_table(
-/*============*/
- const char* name, /*!< in: table name in the
- databasename/tablename format */
- ibool cached, /*!< in: TRUE=add to cache, FALSE=do not */
- dict_err_ignore_t ignore_err)
- /*!< in: error to be ignored when loading
- table and its indexes' definition */
+dict_load_table_one(
+ const table_name_t& name,
+ dict_err_ignore_t ignore_err,
+ dict_names_t& fk_tables)
{
dberr_t err;
dict_table_t* table;
@@ -2305,11 +2916,12 @@ dict_load_table(
const rec_t* rec;
const byte* field;
ulint len;
- char* filepath = NULL;
- const char* err_msg;
mtr_t mtr;
- ut_ad(mutex_own(&(dict_sys->mutex)));
+ DBUG_ENTER("dict_load_table_one");
+ DBUG_PRINT("dict_load_table_one", ("table: %s", name.m_name));
+
+ ut_ad(mutex_own(&dict_sys->mutex));
heap = mem_heap_create(32000);
@@ -2332,7 +2944,7 @@ dict_load_table(
tuple = dtuple_create(heap, 1);
dfield = dtuple_get_nth_field(tuple, 0);
- dfield_set_data(dfield, name, ut_strlen(name));
+ dfield_set_data(dfield, name.m_name, ut_strlen(name.m_name));
dict_index_copy_types(tuple, sys_index, 1);
btr_pcur_open_on_user_rec(sys_index, tuple, PAGE_CUR_GE,
@@ -2347,104 +2959,41 @@ err_exit:
mtr_commit(&mtr);
mem_heap_free(heap);
- return(NULL);
+ DBUG_RETURN(NULL);
}
field = rec_get_nth_field_old(
rec, DICT_FLD__SYS_TABLES__NAME, &len);
/* Check if the table name in record is the searched one */
- if (len != ut_strlen(name) || ut_memcmp(name, field, len) != 0) {
+ if (len != ut_strlen(name.m_name)
+ || 0 != ut_memcmp(name.m_name, field, len)) {
goto err_exit;
}
- err_msg = dict_load_table_low(name, rec, &table);
-
- if (err_msg) {
-
- ut_print_timestamp(stderr);
- fprintf(stderr, " InnoDB: %s\n", err_msg);
+ if (const char* err_msg = dict_load_table_low(name, rec, &table)) {
+ if (err_msg != dict_load_table_flags) {
+ ib::error() << err_msg;
+ }
goto err_exit;
}
- char table_name[MAX_FULL_NAME_LEN + 1];
-
- innobase_format_name(table_name, sizeof(table_name), name, FALSE);
-
btr_pcur_close(&pcur);
mtr_commit(&mtr);
- if (table->space == 0) {
- /* The system tablespace is always available. */
- } else if (table->flags2 & DICT_TF2_DISCARDED) {
-
- ib_logf(IB_LOG_LEVEL_WARN,
- "Table '%s' tablespace is set as discarded.",
- table_name);
-
- table->file_unreadable = true;
-
- } else if (!fil_space_for_table_exists_in_mem(
- table->space, name, false, true, heap,
- table->id, table->flags)) {
-
- if (DICT_TF2_FLAG_IS_SET(table, DICT_TF2_TEMPORARY)) {
- /* Do not bother to retry opening temporary tables. */
- table->file_unreadable = true;
-
- } else {
- if (!(ignore_err & DICT_ERR_IGNORE_RECOVER_LOCK)) {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Failed to find tablespace for "
- "table '%s' in the cache. "
- "Attempting to load the tablespace "
- "with space id %lu.",
- table_name, (ulong) table->space);
- }
-
- /* Use the remote filepath if needed. */
- /* This needs to be added to the tablex1
- from SYS_DATAFILES */
- dict_get_and_save_data_dir_path(table, true);
-
- if (table->data_dir_path) {
- filepath = os_file_make_remote_pathname(
- table->data_dir_path,
- table->name, "ibd");
- }
-
- /* Try to open the tablespace. We set the
- 2nd param (fix_dict = false) here because we
- do not have an x-lock on dict_operation_lock */
- err = fil_open_single_table_tablespace(
- true, false, table->space,
- dict_tf_to_fsp_flags(table->flags),
- name, filepath);
-
- if (err != DB_SUCCESS) {
- /* We failed to find a sensible
- tablespace file */
-
- table->file_unreadable = true;
- }
-
- if (filepath) {
- mem_free(filepath);
- }
- }
- }
+ dict_load_tablespace(table, ignore_err);
dict_load_columns(table, heap);
- if (cached) {
- dict_table_add_to_cache(table, TRUE, heap);
- } else {
- dict_table_add_system_columns(table, heap);
- }
+ dict_load_virtual(table, heap);
+
+ dict_table_add_to_cache(table, TRUE, heap);
mem_heap_empty(heap);
+ ut_ad(dict_tf2_is_valid(table->flags, table->flags2));
+
/* If there is no tablespace for the table then we only need to
load the index definitions. So that we can IMPORT the tablespace
later. When recovering table locks for resurrected incomplete
@@ -2452,7 +3001,7 @@ err_exit:
were not allowed while the table is being locked by a transaction. */
dict_err_ignore_t index_load_err =
!(ignore_err & DICT_ERR_IGNORE_RECOVER_LOCK)
- && table->file_unreadable
+ && !table->is_readable()
? DICT_ERR_IGNORE_ALL
: ignore_err;
@@ -2462,22 +3011,17 @@ err_exit:
/* Refuse to load the table if the table has a corrupted
cluster index */
if (!srv_load_corrupted) {
- fprintf(stderr, "InnoDB: Error: Load table ");
- ut_print_name(stderr, NULL, TRUE, table->name);
- fprintf(stderr, " failed, the table has corrupted"
- " clustered indexes. Turn on"
- " 'innodb_force_load_corrupted'"
- " to drop it\n");
+ ib::error() << "Load table " << table->name
+ << " failed, the table has"
+ " corrupted clustered indexes. Turn on"
+ " 'innodb_force_load_corrupted' to drop it";
dict_table_remove_from_cache(table);
table = NULL;
goto func_exit;
} else {
- dict_index_t* clust_index;
- clust_index = dict_table_get_first_index(table);
-
- if (dict_index_is_corrupted(clust_index)) {
- table->corrupted = TRUE;
+ if (table->indexes.start->is_corrupted()) {
+ table->corrupted = true;
}
}
}
@@ -2490,23 +3034,53 @@ err_exit:
of the error condition, since the user may want to dump data from the
clustered index. However we load the foreign key information only if
all indexes were loaded. */
- if (!cached || table->file_unreadable) {
+ if (!table->is_readable()) {
/* Don't attempt to load the indexes from disk. */
} else if (err == DB_SUCCESS) {
- err = dict_load_foreigns(table->name, NULL, true, true,
- ignore_err);
+ err = dict_load_foreigns(table->name.m_name, NULL,
+ true, true,
+ ignore_err, fk_tables);
if (err != DB_SUCCESS) {
- ib_logf(IB_LOG_LEVEL_WARN,
- "Load table '%s' failed, the table has missing "
- "foreign key indexes. Turn off "
- "'foreign_key_checks' and try again.",
- table->name);
+ ib::warn() << "Load table " << table->name
+ << " failed, the table has missing"
+ " foreign key indexes. Turn off"
+ " 'foreign_key_checks' and try again.";
dict_table_remove_from_cache(table);
table = NULL;
} else {
+ dict_mem_table_fill_foreign_vcol_set(table);
table->fk_max_recusive_level = 0;
+
+ if (table->space
+ && !fil_space_get_size(table->space)) {
+corrupted:
+ table->corrupted = true;
+ table->file_unreadable = true;
+ } else {
+ const page_id_t page_id(
+ table->space,
+ dict_table_get_first_index(table)
+ ->page);
+ mtr.start();
+ buf_block_t* block = buf_page_get(
+ page_id,
+ dict_table_page_size(table),
+ RW_S_LATCH, &mtr);
+ const bool corrupted = !block
+ || page_get_space_id(block->frame)
+ != page_id.space()
+ || page_get_page_no(block->frame)
+ != page_id.page_no()
+ || mach_read_from_2(FIL_PAGE_TYPE
+ + block->frame)
+ != FIL_PAGE_INDEX;
+ mtr.commit();
+ if (corrupted) {
+ goto corrupted;
+ }
+ }
}
} else {
dict_index_t* index;
@@ -2517,14 +3091,11 @@ err_exit:
if (!srv_force_recovery
|| !index
- || !dict_index_is_clust(index)) {
-
+ || !index->is_primary()) {
dict_table_remove_from_cache(table);
table = NULL;
-
- } else if (dict_index_is_corrupted(index)
- && !table->file_unreadable) {
-
+ } else if (index->is_corrupted()
+ && table->is_readable()) {
/* It is possible we force to load a corrupted
clustered index if srv_load_corrupted is set.
Mark the table as corrupted in this case */
@@ -2536,8 +3107,8 @@ func_exit:
mem_heap_free(heap);
ut_ad(!table
- || ignore_err != DICT_ERR_IGNORE_NONE
- || table->file_unreadable
+ || (ignore_err & ~DICT_ERR_IGNORE_FK_NOKEY)
+ || !table->is_readable()
|| !table->corrupted);
if (table && table->fts) {
@@ -2560,13 +3131,12 @@ func_exit:
ut_ad(err != DB_SUCCESS || dict_foreign_set_validate(*table));
- return(table);
+ DBUG_RETURN(table);
}
/***********************************************************************//**
Loads a table object based on the table id.
-@return table; NULL if table does not exist */
-UNIV_INTERN
+@return table; NULL if table does not exist */
dict_table_t*
dict_load_table_on_id(
/*==================*/
@@ -2587,7 +3157,7 @@ dict_load_table_on_id(
dict_table_t* table;
mtr_t mtr;
- ut_ad(mutex_own(&(dict_sys->mutex)));
+ ut_ad(mutex_own(&dict_sys->mutex));
table = NULL;
@@ -2647,10 +3217,9 @@ check_rec:
field = rec_get_nth_field_old(rec,
DICT_FLD__SYS_TABLE_IDS__NAME, &len);
/* Load the table definition to memory */
- table = dict_load_table(
- mem_heap_strdupl(
- heap, (char*) field, len),
- TRUE, ignore_err);
+ char* table_name = mem_heap_strdupl(
+ heap, (char*) field, len);
+ table = dict_load_table(table_name, ignore_err);
}
}
}
@@ -2662,104 +3231,10 @@ check_rec:
return(table);
}
-/***********************************************************************//**
-Loads a table id based on the index id.
-@return true if found */
-static
-bool
-dict_load_table_id_on_index_id(
-/*==================*/
- index_id_t index_id, /*!< in: index id */
- table_id_t* table_id) /*!< out: table id */
-{
- /* check hard coded indexes */
- switch(index_id) {
- case DICT_TABLES_ID:
- case DICT_COLUMNS_ID:
- case DICT_INDEXES_ID:
- case DICT_FIELDS_ID:
- *table_id = index_id;
- return true;
- case DICT_TABLE_IDS_ID:
- /* The following is a secondary index on SYS_TABLES */
- *table_id = DICT_TABLES_ID;
- return true;
- }
-
- bool found = false;
- mtr_t mtr;
-
- ut_ad(mutex_own(&(dict_sys->mutex)));
-
- /* NOTE that the operation of this function is protected by
- the dictionary mutex, and therefore no deadlocks can occur
- with other dictionary operations. */
-
- mtr_start(&mtr);
-
- btr_pcur_t pcur;
- const rec_t* rec = dict_startscan_system(&pcur, &mtr, SYS_INDEXES);
-
- while (rec) {
- ulint len;
- const byte* field = rec_get_nth_field_old(
- rec, DICT_FLD__SYS_INDEXES__ID, &len);
- ut_ad(len == 8);
-
- /* Check if the index id is the one searched for */
- if (index_id == mach_read_from_8(field)) {
- found = true;
- /* Now we get the table id */
- const byte* field = rec_get_nth_field_old(
- rec,
- DICT_FLD__SYS_INDEXES__TABLE_ID,
- &len);
- *table_id = mach_read_from_8(field);
- break;
- }
- mtr_commit(&mtr);
- mtr_start(&mtr);
- rec = dict_getnext_system(&pcur, &mtr);
- }
-
- btr_pcur_close(&pcur);
- mtr_commit(&mtr);
-
- return(found);
-}
-
-UNIV_INTERN
-dict_table_t*
-dict_table_open_on_index_id(
-/*==================*/
- index_id_t index_id, /*!< in: index id */
- bool dict_locked) /*!< in: dict locked */
-{
- if (!dict_locked) {
- mutex_enter(&dict_sys->mutex);
- }
-
- ut_ad(mutex_own(&dict_sys->mutex));
- table_id_t table_id;
- dict_table_t * table = NULL;
- if (dict_load_table_id_on_index_id(index_id, &table_id)) {
- bool local_dict_locked = true;
- table = dict_table_open_on_id(table_id,
- local_dict_locked,
- DICT_TABLE_OP_LOAD_TABLESPACE);
- }
-
- if (!dict_locked) {
- mutex_exit(&dict_sys->mutex);
- }
- return table;
-}
-
/********************************************************************//**
This function is called when the database is booted. Loads system table
index definitions except for the clustered index which is added to the
dictionary cache at booting before calling this function. */
-UNIV_INTERN
void
dict_load_sys_table(
/*================*/
@@ -2767,7 +3242,7 @@ dict_load_sys_table(
{
mem_heap_t* heap;
- ut_ad(mutex_own(&(dict_sys->mutex)));
+ ut_ad(mutex_own(&dict_sys->mutex));
heap = mem_heap_create(1000);
@@ -2804,7 +3279,7 @@ dict_load_foreign_cols(
mtr_t mtr;
size_t id_len;
- ut_ad(mutex_own(&(dict_sys->mutex)));
+ ut_ad(mutex_own(&dict_sys->mutex));
id_len = strlen(foreign->id);
@@ -2861,20 +3336,21 @@ dict_load_foreign_cols(
rec, DICT_FLD__SYS_FOREIGN_COLS__REF_COL_NAME,
&ref_col_name_len);
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Unable to load columns names for foreign "
- "key '%s' because it was not found in "
- "InnoDB internal table SYS_FOREIGN_COLS. The "
- "closest entry we found is: "
- "(ID='%.*s', POS=%lu, FOR_COL_NAME='%.*s', "
- "REF_COL_NAME='%.*s')",
- foreign->id,
- (int) len, field,
- mach_read_from_4(pos),
- (int) for_col_name_len, for_col_name,
- (int) ref_col_name_len, ref_col_name);
-
- ut_error;
+ ib::fatal sout;
+
+ sout << "Unable to load column names for foreign"
+ " key '" << foreign->id
+ << "' because it was not found in"
+ " InnoDB internal table SYS_FOREIGN_COLS. The"
+ " closest entry we found is:"
+ " (ID='";
+ sout.write(field, len);
+ sout << "', POS=" << mach_read_from_4(pos)
+ << ", FOR_COL_NAME='";
+ sout.write(for_col_name, for_col_name_len);
+ sout << "', REF_COL_NAME='";
+ sout.write(ref_col_name, ref_col_name_len);
+ sout << "')";
}
field = rec_get_nth_field_old(
@@ -2900,8 +3376,9 @@ dict_load_foreign_cols(
}
/***********************************************************************//**
-Loads a foreign key constraint to the dictionary cache.
-@return DB_SUCCESS or error code */
+Loads a foreign key constraint to the dictionary cache. If the referenced
+table is not yet loaded, it is added in the output parameter (fk_tables).
+@return DB_SUCCESS or error code */
static MY_ATTRIBUTE((nonnull(1), warn_unused_result))
dberr_t
dict_load_foreign(
@@ -2919,8 +3396,15 @@ dict_load_foreign(
bool check_charsets,
/*!< in: whether to check charset
compatibility */
- dict_err_ignore_t ignore_err)
+ dict_err_ignore_t ignore_err,
/*!< in: error to be ignored */
+ dict_names_t& fk_tables)
+ /*!< out: the foreign key constraint is added
+ to the dictionary cache only if the referenced
+ table is already in cache. Otherwise, the
+ foreign key constraint is not added to cache,
+ and the referenced table is added to this
+ stack. */
{
dict_foreign_t* foreign;
dict_table_t* sys_foreign;
@@ -2938,7 +3422,11 @@ dict_load_foreign(
dict_table_t* ref_table;
size_t id_len;
- ut_ad(mutex_own(&(dict_sys->mutex)));
+ DBUG_ENTER("dict_load_foreign");
+ DBUG_PRINT("dict_load_foreign",
+ ("id: '%s', check_recursive: %d", id, check_recursive));
+
+ ut_ad(mutex_own(&dict_sys->mutex));
id_len = strlen(id);
@@ -2965,16 +3453,15 @@ dict_load_foreign(
|| rec_get_deleted_flag(rec, 0)) {
/* Not found */
- fprintf(stderr,
- "InnoDB: Error: cannot load foreign constraint "
- "%s: could not find the relevant record in "
- "SYS_FOREIGN\n", id);
+ ib::error() << "Cannot load foreign constraint " << id
+ << ": could not find the relevant record in "
+ << "SYS_FOREIGN";
btr_pcur_close(&pcur);
mtr_commit(&mtr);
mem_heap_free(heap2);
- return(DB_ERROR);
+ DBUG_RETURN(DB_ERROR);
}
field = rec_get_nth_field_old(rec, DICT_FLD__SYS_FOREIGN__ID, &len);
@@ -2982,16 +3469,19 @@ dict_load_foreign(
/* Check if the id in record is the searched one */
if (len != id_len || ut_memcmp(id, field, len) != 0) {
- fprintf(stderr,
- "InnoDB: Error: cannot load foreign constraint "
- "%s: found %.*s instead in SYS_FOREIGN\n",
- id, (int) len, field);
+ {
+ ib::error err;
+ err << "Cannot load foreign constraint " << id
+ << ": found ";
+ err.write(field, len);
+ err << " instead in SYS_FOREIGN";
+ }
btr_pcur_close(&pcur);
mtr_commit(&mtr);
mem_heap_free(heap2);
- return(DB_ERROR);
+ DBUG_RETURN(DB_ERROR);
}
/* Read the table names and the number of columns associated
@@ -3021,6 +3511,8 @@ dict_load_foreign(
foreign->heap, (char*) field, len);
dict_mem_foreign_table_name_lookup_set(foreign, TRUE);
+ const ulint foreign_table_name_len = len;
+
field = rec_get_nth_field_old(
rec, DICT_FLD__SYS_FOREIGN__REF_NAME, &len);
foreign->referenced_table_name = mem_heap_strdupl(
@@ -3033,54 +3525,28 @@ dict_load_foreign(
dict_load_foreign_cols(foreign);
ref_table = dict_table_check_if_in_cache_low(
- foreign->referenced_table_name_lookup);
-
- /* We could possibly wind up in a deep recursive calls if
- we call dict_table_get_low() again here if there
- is a chain of tables concatenated together with
- foreign constraints. In such case, each table is
- both a parent and child of the other tables, and
- act as a "link" in such table chains.
- To avoid such scenario, we would need to check the
- number of ancesters the current table has. If that
- exceeds DICT_FK_MAX_CHAIN_LEN, we will stop loading
- the child table.
- Foreign constraints are loaded in a Breath First fashion,
- that is, the index on FOR_NAME is scanned first, and then
- index on REF_NAME. So foreign constrains in which
- current table is a child (foreign table) are loaded first,
- and then those constraints where current table is a
- parent (referenced) table.
- Thus we could check the parent (ref_table) table's
- reference count (fk_max_recusive_level) to know how deep the
- recursive call is. If the parent table (ref_table) is already
- loaded, and its fk_max_recusive_level is larger than
- DICT_FK_MAX_CHAIN_LEN, we will stop the recursive loading
- by skipping loading the child table. It will not affect foreign
- constraint check for DMLs since child table will be loaded
- at that time for the constraint check. */
- if (!ref_table
- || ref_table->fk_max_recusive_level < DICT_FK_MAX_RECURSIVE_LOAD) {
-
- /* If the foreign table is not yet in the dictionary cache, we
- have to load it so that we are able to make type comparisons
- in the next function call. */
-
- for_table = dict_table_get_low(foreign->foreign_table_name_lookup);
-
- if (for_table && ref_table && check_recursive) {
- /* This is to record the longest chain of ancesters
- this table has, if the parent has more ancesters
- than this table has, record it after add 1 (for this
- parent */
- if (ref_table->fk_max_recusive_level
- >= for_table->fk_max_recusive_level) {
- for_table->fk_max_recusive_level =
- ref_table->fk_max_recusive_level + 1;
- }
- }
+ foreign->referenced_table_name_lookup);
+ for_table = dict_table_check_if_in_cache_low(
+ foreign->foreign_table_name_lookup);
+
+ if (!for_table) {
+ /* To avoid recursively loading the tables related through
+ the foreign key constraints, the child table name is saved
+ here. The child table will be loaded later, along with its
+ foreign key constraint. */
+
+ ut_a(ref_table != NULL);
+ fk_tables.push_back(
+ mem_heap_strdupl(ref_table->heap,
+ foreign->foreign_table_name_lookup,
+ foreign_table_name_len));
+
+ dict_foreign_remove_from_cache(foreign);
+ DBUG_RETURN(DB_SUCCESS);
}
+ ut_a(for_table || ref_table);
+
/* Note that there may already be a foreign constraint object in
the dictionary cache for this constraint: then the following
call only sets the pointers in it to point to the appropriate table
@@ -3089,18 +3555,21 @@ dict_load_foreign(
a new foreign key constraint but loading one from the data
dictionary. */
- return(dict_foreign_add_to_cache(foreign, col_names, check_charsets,
- ignore_err));
+ DBUG_RETURN(dict_foreign_add_to_cache(foreign, col_names,
+ check_charsets,
+ ignore_err));
}
/***********************************************************************//**
Loads foreign key constraints where the table is either the foreign key
holder or where the table is referenced by a foreign key. Adds these
-constraints to the data dictionary. Note that we know that the dictionary
-cache already contains all constraints where the other relevant table is
-already in the dictionary cache.
-@return DB_SUCCESS or error code */
-UNIV_INTERN
+constraints to the data dictionary.
+
+The foreign key constraint is loaded only if the referenced table is also
+in the dictionary cache. If the referenced table is not in dictionary
+cache, then it is added to the output parameter (fk_tables).
+
+@return DB_SUCCESS or error code */
dberr_t
dict_load_foreigns(
const char* table_name, /*!< in: table name */
@@ -3111,8 +3580,12 @@ dict_load_foreigns(
chained by FK */
bool check_charsets, /*!< in: whether to check
charset compatibility */
- dict_err_ignore_t ignore_err) /*!< in: error to be ignored */
-/*===============*/
+ dict_err_ignore_t ignore_err, /*!< in: error to be ignored */
+ dict_names_t& fk_tables)
+ /*!< out: stack of table
+ names which must be loaded
+ subsequently to load all the
+ foreign key constraints. */
{
ulint tuple_buf[(DTUPLE_EST_ALLOC(1) + sizeof(ulint) - 1)
/ sizeof(ulint)];
@@ -3127,18 +3600,17 @@ dict_load_foreigns(
dberr_t err;
mtr_t mtr;
- ut_ad(mutex_own(&(dict_sys->mutex)));
+ DBUG_ENTER("dict_load_foreigns");
+
+ ut_ad(mutex_own(&dict_sys->mutex));
sys_foreign = dict_table_get_low("SYS_FOREIGN");
if (sys_foreign == NULL) {
/* No foreign keys defined yet in this database */
- fprintf(stderr,
- "InnoDB: Error: no foreign key system tables"
- " in the database\n");
-
- return(DB_ERROR);
+ ib::info() << "No foreign key system tables in the database";
+ DBUG_RETURN(DB_ERROR);
}
ut_ad(!dict_table_is_comp(sys_foreign));
@@ -3152,7 +3624,7 @@ dict_load_foreigns(
ut_ad(!dict_index_is_clust(sec_index));
start_load:
- tuple = dtuple_create_from_mem(tuple_buf, sizeof(tuple_buf), 1);
+ tuple = dtuple_create_from_mem(tuple_buf, sizeof(tuple_buf), 1, 0);
dfield = dtuple_get_nth_field(tuple, 0);
dfield_set_data(dfield, table_name, ut_strlen(table_name));
@@ -3224,12 +3696,13 @@ loop:
/* Load the foreign constraint definition to the dictionary cache */
err = dict_load_foreign(fk_id, col_names,
- check_recursive, check_charsets, ignore_err);
+ check_recursive, check_charsets, ignore_err,
+ fk_tables);
if (err != DB_SUCCESS) {
btr_pcur_close(&pcur);
- return(err);
+ DBUG_RETURN(err);
}
mtr_start(&mtr);
@@ -3258,5 +3731,83 @@ load_next_index:
goto start_load;
}
- return(DB_SUCCESS);
+ DBUG_RETURN(DB_SUCCESS);
+}
+
+/***********************************************************************//**
+Loads a table id based on the index id.
+@return true if found */
+static
+bool
+dict_load_table_id_on_index_id(
+/*===========================*/
+ index_id_t index_id, /*!< in: index id */
+ table_id_t* table_id) /*!< out: table id */
+{
+ /* check hard coded indexes */
+ switch(index_id) {
+ case DICT_TABLES_ID:
+ case DICT_COLUMNS_ID:
+ case DICT_INDEXES_ID:
+ case DICT_FIELDS_ID:
+ *table_id = index_id;
+ return true;
+ case DICT_TABLE_IDS_ID:
+ /* The following is a secondary index on SYS_TABLES */
+ *table_id = DICT_TABLES_ID;
+ return true;
+ }
+
+ bool found = false;
+ mtr_t mtr;
+
+ ut_ad(mutex_own(&(dict_sys->mutex)));
+
+ /* NOTE that the operation of this function is protected by
+ the dictionary mutex, and therefore no deadlocks can occur
+ with other dictionary operations. */
+
+ mtr_start(&mtr);
+
+ btr_pcur_t pcur;
+ const rec_t* rec = dict_startscan_system(&pcur, &mtr, SYS_INDEXES);
+
+ while (rec) {
+ ulint len;
+ const byte* field = rec_get_nth_field_old(
+ rec, DICT_FLD__SYS_INDEXES__ID, &len);
+ ut_ad(len == 8);
+
+ /* Check if the index id is the one searched for */
+ if (index_id == mach_read_from_8(field)) {
+ found = true;
+ /* Now we get the table id */
+ const byte* field = rec_get_nth_field_old(
+ rec,
+ DICT_FLD__SYS_INDEXES__TABLE_ID,
+ &len);
+ *table_id = mach_read_from_8(field);
+ break;
+ }
+ mtr_commit(&mtr);
+ mtr_start(&mtr);
+ rec = dict_getnext_system(&pcur, &mtr);
+ }
+
+ btr_pcur_close(&pcur);
+ mtr_commit(&mtr);
+
+ return(found);
+}
+
+dict_table_t* dict_table_open_on_index_id(index_id_t index_id)
+{
+ table_id_t table_id;
+ dict_table_t * table = NULL;
+ if (dict_load_table_id_on_index_id(index_id, &table_id)) {
+ table = dict_table_open_on_id(table_id, true,
+ DICT_TABLE_OP_LOAD_TABLESPACE);
+ }
+
+ return table;
}
diff --git a/storage/innobase/dict/dict0mem.cc b/storage/innobase/dict/dict0mem.cc
index bc955fb13b9..9190272a4c5 100644
--- a/storage/innobase/dict/dict0mem.cc
+++ b/storage/innobase/dict/dict0mem.cc
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1996, 2018, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2012, Facebook Inc.
Copyright (c) 2013, 2020, MariaDB Corporation.
@@ -25,37 +25,23 @@ Data dictionary memory object creation
Created 1/8/1996 Heikki Tuuri
***********************************************************************/
-#include "dict0mem.h"
-
-#ifdef UNIV_NONINL
-#include "dict0mem.ic"
-#endif
+#include "ha_prototypes.h"
+#include <mysql_com.h>
+#include "dict0mem.h"
#include "rem0rec.h"
#include "data0type.h"
#include "mach0data.h"
#include "dict0dict.h"
#include "fts0priv.h"
#include "ut0crc32.h"
-#ifndef UNIV_HOTBACKUP
-# include "ha_prototypes.h" /* innobase_casedn_str(),
- innobase_get_lower_case_table_names */
-# include "mysql_com.h" /* NAME_LEN */
-# include "lock0lock.h"
-#endif /* !UNIV_HOTBACKUP */
-#ifdef UNIV_BLOB_DEBUG
-# include "ut0rbt.h"
-#endif /* UNIV_BLOB_DEBUG */
+#include "lock0lock.h"
+#include "sync0sync.h"
#include <iostream>
#define DICT_HEAP_SIZE 100 /*!< initial memory heap size when
creating a table or index object */
-#ifdef UNIV_PFS_MUTEX
-/* Key to register autoinc_mutex with performance schema */
-UNIV_INTERN mysql_pfs_key_t autoinc_mutex_key;
-#endif /* UNIV_PFS_MUTEX */
-
/** System databases */
static const char* innobase_system_databases[] = {
"mysql/",
@@ -64,21 +50,64 @@ static const char* innobase_system_databases[] = {
NullS
};
+/** The start of the table basename suffix for partitioned tables */
+const char table_name_t::part_suffix[4]
+#ifdef _WIN32
+= "#p#";
+#else
+= "#P#";
+#endif
+
/** An interger randomly initialized at startup used to make a temporary
-table name as unique as possible. */
+table name as unuique as possible. */
static ib_uint32_t dict_temp_file_num;
+/** Display an identifier.
+@param[in,out] s output stream
+@param[in] id_name SQL identifier (other than table name)
+@return the output stream */
+std::ostream&
+operator<<(
+ std::ostream& s,
+ const id_name_t& id_name)
+{
+ const char q = '`';
+ const char* c = id_name;
+ s << q;
+ for (; *c != 0; c++) {
+ if (*c == q) {
+ s << *c;
+ }
+ s << *c;
+ }
+ s << q;
+ return(s);
+}
+
+/** Display a table name.
+@param[in,out] s output stream
+@param[in] table_name table name
+@return the output stream */
+std::ostream&
+operator<<(
+ std::ostream& s,
+ const table_name_t& table_name)
+{
+ return(s << ut_get_name(NULL, table_name.m_name));
+}
+
/**********************************************************************//**
Creates a table memory object.
-@return own: table object */
-UNIV_INTERN
+@return own: table object */
dict_table_t*
dict_mem_table_create(
/*==================*/
const char* name, /*!< in: table name */
ulint space, /*!< in: space where the clustered index of
the table is placed */
- ulint n_cols, /*!< in: number of columns */
+ ulint n_cols, /*!< in: total number of columns including
+ virtual and non-virtual columns */
+ ulint n_v_cols,/*!< in: number of virtual columns */
ulint flags, /*!< in: table flags */
ulint flags2) /*!< in: table flags2 */
{
@@ -86,47 +115,46 @@ dict_mem_table_create(
mem_heap_t* heap;
ut_ad(name);
- ut_a(dict_tf_is_valid(flags));
- ut_a(!(flags2 & ~DICT_TF2_BIT_MASK));
+ ut_a(dict_tf2_is_valid(flags, flags2));
+ ut_a(!(flags2 & DICT_TF2_UNUSED_BIT_MASK));
heap = mem_heap_create(DICT_HEAP_SIZE);
table = static_cast<dict_table_t*>(
- mem_heap_zalloc(heap, sizeof(dict_table_t)));
+ mem_heap_zalloc(heap, sizeof(*table)));
+
+ lock_table_lock_list_init(&table->locks);
+
+ UT_LIST_INIT(table->indexes, &dict_index_t::indexes);
table->heap = heap;
+ ut_d(table->magic_n = DICT_TABLE_MAGIC_N);
+
table->flags = (unsigned int) flags;
table->flags2 = (unsigned int) flags2;
- table->name = static_cast<char*>(ut_malloc(strlen(name) + 1));
- memcpy(table->name, name, strlen(name) + 1);
- table->is_system_db = dict_mem_table_is_system(table->name);
+ table->name.m_name = mem_strdup(name);
+ table->is_system_db = dict_mem_table_is_system(table->name.m_name);
table->space = (unsigned int) space;
- table->n_cols = (unsigned int) (n_cols + DATA_N_SYS_COLS);
+ table->n_t_cols = unsigned(n_cols + DATA_N_SYS_COLS);
+ table->n_v_cols = (unsigned int) (n_v_cols);
+ table->n_cols = table->n_t_cols - table->n_v_cols;
table->cols = static_cast<dict_col_t*>(
- mem_heap_alloc(heap,
- (n_cols + DATA_N_SYS_COLS)
- * sizeof(dict_col_t)));
-
- ut_d(table->magic_n = DICT_TABLE_MAGIC_N);
+ mem_heap_alloc(heap, table->n_cols * sizeof(dict_col_t)));
+ table->v_cols = static_cast<dict_v_col_t*>(
+ mem_heap_alloc(heap, n_v_cols * sizeof(*table->v_cols)));
/* true means that the stats latch will be enabled -
dict_table_stats_lock() will not be noop. */
dict_table_stats_latch_create(table, true);
-#ifndef UNIV_HOTBACKUP
table->autoinc_lock = static_cast<ib_lock_t*>(
mem_heap_alloc(heap, lock_get_size()));
+ /* lazy creation of table autoinc latch */
dict_table_autoinc_create_lazy(table);
- table->autoinc = 0;
-
- /* The number of transactions that are either waiting on the
- AUTOINC lock or have been granted the lock. */
- table->n_waiting_or_granted_auto_inc_locks = 0;
-
/* If the table has an FTS index or we are in the process
of building one, create the table->fts */
if (dict_table_has_fts_index(table)
@@ -137,7 +165,6 @@ dict_mem_table_create(
} else {
table->fts = NULL;
}
-#endif /* !UNIV_HOTBACKUP */
new(&table->foreign_set) dict_foreign_set();
new(&table->referenced_set) dict_foreign_set();
@@ -146,38 +173,7 @@ dict_mem_table_create(
}
/****************************************************************//**
-Determines if a table belongs to a system database
-@return */
-UNIV_INTERN
-bool
-dict_mem_table_is_system(
-/*================*/
- char *name) /*!< in: table name */
-{
- ut_ad(name);
-
- /* table has the following format: database/table
- and some system table are of the form SYS_* */
- if (strchr(name, '/')) {
- int table_len = strlen(name);
- const char *system_db;
- int i = 0;
- while ((system_db = innobase_system_databases[i++])
- && (system_db != NullS)) {
- int len = strlen(system_db);
- if (table_len > len && !strncmp(name, system_db, len)) {
- return true;
- }
- }
- return false;
- } else {
- return true;
- }
-}
-
-/****************************************************************//**
Free a table memory object. */
-UNIV_INTERN
void
dict_mem_table_free(
/*================*/
@@ -187,33 +183,45 @@ dict_mem_table_free(
ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
ut_d(table->cached = FALSE);
- if (dict_table_has_fts_index(table)
- || DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_HAS_DOC_ID)
- || DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_ADD_DOC_ID)) {
+ if (dict_table_has_fts_index(table)
+ || DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_HAS_DOC_ID)
+ || DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_ADD_DOC_ID)) {
if (table->fts) {
- if (table->cached) {
- fts_optimize_remove_table(table);
- }
+ fts_optimize_remove_table(table);
fts_free(table);
}
}
-#ifndef UNIV_HOTBACKUP
- dict_table_autoinc_destroy(table);
-#endif /* UNIV_HOTBACKUP */
+ dict_table_autoinc_destroy(table);
+ dict_mem_table_free_foreign_vcol_set(table);
dict_table_stats_latch_destroy(table);
table->foreign_set.~dict_foreign_set();
table->referenced_set.~dict_foreign_set();
- ut_free(table->name);
+ ut_free(table->name.m_name);
+ table->name.m_name = NULL;
+
+ /* Clean up virtual index info structures that are registered
+ with virtual columns */
+ for (ulint i = 0; i < table->n_v_def; i++) {
+ dict_v_col_t* vcol
+ = dict_table_get_nth_v_col(table, i);
+
+ UT_DELETE(vcol->v_indexes);
+ }
+
+ if (table->s_cols != NULL) {
+ UT_DELETE(table->s_cols);
+ }
+
mem_heap_free(table->heap);
}
/****************************************************************//**
Append 'name' to 'col_names'. @see dict_table_t::col_names
-@return new column names array */
+@return new column names array */
static
const char*
dict_add_col_name(
@@ -261,7 +269,6 @@ dict_add_col_name(
/**********************************************************************//**
Adds a column definition to a table. */
-UNIV_INTERN
void
dict_mem_table_add_col(
/*===================*/
@@ -278,13 +285,17 @@ dict_mem_table_add_col(
ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
ut_ad(!heap == !name);
+ ut_ad(!(prtype & DATA_VIRTUAL));
+
i = table->n_def++;
+ table->n_t_def++;
+
if (name) {
- if (UNIV_UNLIKELY(table->n_def == table->n_cols)) {
+ if (table->n_def == table->n_cols) {
heap = table->heap;
}
- if (UNIV_LIKELY(i != 0) && UNIV_UNLIKELY(table->col_names == NULL)) {
+ if (i && !table->col_names) {
/* All preceding column names are empty. */
char* s = static_cast<char*>(
mem_heap_zalloc(heap, table->n_def));
@@ -301,6 +312,114 @@ dict_mem_table_add_col(
dict_mem_fill_column_struct(col, i, mtype, prtype, len);
}
+/** Adds a virtual column definition to a table.
+@param[in,out] table table
+@param[in,out] heap temporary memory heap, or NULL. It is
+ used to store name when we have not finished
+ adding all columns. When all columns are
+ added, the whole name will copy to memory from
+ table->heap
+@param[in] name column name
+@param[in] mtype main datatype
+@param[in] prtype precise type
+@param[in] len length
+@param[in] pos position in a table
+@param[in] num_base number of base columns
+@return the virtual column definition */
+dict_v_col_t*
+dict_mem_table_add_v_col(
+ dict_table_t* table,
+ mem_heap_t* heap,
+ const char* name,
+ ulint mtype,
+ ulint prtype,
+ ulint len,
+ ulint pos,
+ ulint num_base)
+{
+ dict_v_col_t* v_col;
+ ulint i;
+
+ ut_ad(table);
+ ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
+ ut_ad(!heap == !name);
+
+ ut_ad(prtype & DATA_VIRTUAL);
+
+ i = table->n_v_def++;
+
+ table->n_t_def++;
+
+ if (name != NULL) {
+ if (table->n_v_def == table->n_v_cols) {
+ heap = table->heap;
+ }
+
+ if (i && !table->v_col_names) {
+ /* All preceding column names are empty. */
+ char* s = static_cast<char*>(
+ mem_heap_zalloc(heap, table->n_v_def));
+
+ table->v_col_names = s;
+ }
+
+ table->v_col_names = dict_add_col_name(table->v_col_names,
+ i, name, heap);
+ }
+
+ v_col = dict_table_get_nth_v_col(table, i);
+
+ dict_mem_fill_column_struct(&v_col->m_col, pos, mtype, prtype, len);
+ v_col->v_pos = i;
+
+ if (num_base != 0) {
+ v_col->base_col = static_cast<dict_col_t**>(mem_heap_zalloc(
+ table->heap, num_base * sizeof(
+ *v_col->base_col)));
+ } else {
+ v_col->base_col = NULL;
+ }
+
+ v_col->num_base = num_base;
+
+ /* Initialize the index list for virtual columns */
+ v_col->v_indexes = UT_NEW_NOKEY(dict_v_idx_list());
+
+ return(v_col);
+}
+
+/** Adds a stored column definition to a table.
+@param[in] table table
+@param[in] num_base number of base columns. */
+void
+dict_mem_table_add_s_col(
+ dict_table_t* table,
+ ulint num_base)
+{
+ ulint i = table->n_def - 1;
+ dict_col_t* col = dict_table_get_nth_col(table, i);
+ dict_s_col_t s_col;
+
+ ut_ad(col != NULL);
+
+ if (table->s_cols == NULL) {
+ table->s_cols = UT_NEW_NOKEY(dict_s_col_list());
+ }
+
+ s_col.m_col = col;
+ s_col.s_pos = i + table->n_v_def;
+
+ if (num_base != 0) {
+ s_col.base_col = static_cast<dict_col_t**>(mem_heap_zalloc(
+ table->heap, num_base * sizeof(dict_col_t*)));
+ } else {
+ s_col.base_col = NULL;
+ }
+
+ s_col.num_base = num_base;
+ table->s_cols->push_back(s_col);
+}
+
/**********************************************************************//**
Renames a column of a table in the data dictionary cache. */
static MY_ATTRIBUTE((nonnull))
@@ -310,11 +429,19 @@ dict_mem_table_col_rename_low(
dict_table_t* table, /*!< in/out: table */
unsigned i, /*!< in: column offset corresponding to s */
const char* to, /*!< in: new column name */
- const char* s) /*!< in: pointer to table->col_names */
+ const char* s, /*!< in: pointer to table->col_names */
+ bool is_virtual)
+ /*!< in: if this is a virtual column */
{
+ char* t_col_names = const_cast<char*>(
+ is_virtual ? table->v_col_names : table->col_names);
+ ulint n_col = is_virtual ? table->n_v_def : table->n_def;
+
size_t from_len = strlen(s), to_len = strlen(to);
- ut_ad(i < table->n_def);
+ ut_ad(i < table->n_def || is_virtual);
+ ut_ad(i < table->n_v_def || !is_virtual);
+
ut_ad(from_len <= NAME_LEN);
ut_ad(to_len <= NAME_LEN);
@@ -330,13 +457,13 @@ dict_mem_table_col_rename_low(
/* We need to adjust all affected index->field
pointers, as in dict_index_add_col(). First, copy
table->col_names. */
- ulint prefix_len = s - table->col_names;
+ ulint prefix_len = s - t_col_names;
- for (; i < table->n_def; i++) {
+ for (; i < n_col; i++) {
s += strlen(s) + 1;
}
- ulint full_len = s - table->col_names;
+ ulint full_len = s - t_col_names;
char* col_names;
if (to_len > from_len) {
@@ -345,14 +472,14 @@ dict_mem_table_col_rename_low(
table->heap,
full_len + to_len - from_len));
- memcpy(col_names, table->col_names, prefix_len);
+ memcpy(col_names, t_col_names, prefix_len);
} else {
- col_names = const_cast<char*>(table->col_names);
+ col_names = const_cast<char*>(t_col_names);
}
memcpy(col_names + prefix_len, to, to_len);
memmove(col_names + prefix_len + to_len,
- table->col_names + (prefix_len + from_len),
+ t_col_names + (prefix_len + from_len),
full_len - (prefix_len + from_len));
/* Replace the field names in every index. */
@@ -365,8 +492,16 @@ dict_mem_table_col_rename_low(
dict_field_t* field
= dict_index_get_nth_field(
index, i);
+
+ /* if is_virtual and that in field->col does
+ not match, continue */
+ if ((!is_virtual) !=
+ (!dict_col_is_virtual(field->col))) {
+ continue;
+ }
+
ulint name_ofs
- = field->name - table->col_names;
+ = field->name - t_col_names;
if (name_ofs <= prefix_len) {
field->name = col_names + name_ofs;
} else {
@@ -377,7 +512,16 @@ dict_mem_table_col_rename_low(
}
}
- table->col_names = col_names;
+ if (is_virtual) {
+ table->v_col_names = col_names;
+ } else {
+ table->col_names = col_names;
+ }
+ }
+
+ /* Virtual columns are not allowed for foreign key */
+ if (is_virtual) {
+ return;
}
dict_foreign_t* foreign;
@@ -480,20 +624,22 @@ dict_mem_table_col_rename_low(
/**********************************************************************//**
Renames a column of a table in the data dictionary cache. */
-UNIV_INTERN
void
dict_mem_table_col_rename(
/*======================*/
dict_table_t* table, /*!< in/out: table */
- unsigned nth_col,/*!< in: column index */
+ ulint nth_col,/*!< in: column index */
const char* from, /*!< in: old column name */
- const char* to) /*!< in: new column name */
+ const char* to, /*!< in: new column name */
+ bool is_virtual)
+ /*!< in: if this is a virtual column */
{
- const char* s = table->col_names;
+ const char* s = is_virtual ? table->v_col_names : table->col_names;
- ut_ad(nth_col < table->n_def);
+ ut_ad((!is_virtual && nth_col < table->n_def)
+ || (is_virtual && nth_col < table->n_v_def));
- for (unsigned i = 0; i < nth_col; i++) {
+ for (ulint i = 0; i < nth_col; i++) {
size_t len = strlen(s);
ut_ad(len > 0);
s += len + 1;
@@ -501,13 +647,13 @@ dict_mem_table_col_rename(
ut_ad(!my_strcasecmp(system_charset_info, from, s));
- dict_mem_table_col_rename_low(table, nth_col, to, s);
+ dict_mem_table_col_rename_low(table, static_cast<unsigned>(nth_col),
+ to, s, is_virtual);
}
/**********************************************************************//**
This function populates a dict_col_t memory structure with
supplied information. */
-UNIV_INTERN
void
dict_mem_fill_column_struct(
/*========================*/
@@ -518,10 +664,8 @@ dict_mem_fill_column_struct(
ulint prtype, /*!< in: precise type */
ulint col_len) /*!< in: column length */
{
-#ifndef UNIV_HOTBACKUP
ulint mbminlen;
ulint mbmaxlen;
-#endif /* !UNIV_HOTBACKUP */
column->ind = (unsigned int) col_pos;
column->ord_part = 0;
@@ -529,17 +673,14 @@ dict_mem_fill_column_struct(
column->mtype = (unsigned int) mtype;
column->prtype = (unsigned int) prtype;
column->len = (unsigned int) col_len;
-#ifndef UNIV_HOTBACKUP
dtype_get_mblen(mtype, prtype, &mbminlen, &mbmaxlen);
column->mbminlen = mbminlen;
column->mbmaxlen = mbmaxlen;
-#endif /* !UNIV_HOTBACKUP */
}
/**********************************************************************//**
Creates an index memory object.
-@return own: index object */
-UNIV_INTERN
+@return own: index object */
dict_index_t*
dict_mem_index_create(
/*==================*/
@@ -566,20 +707,31 @@ dict_mem_index_create(
space, type, n_fields);
dict_index_zip_pad_mutex_create_lazy(index);
+
+ if (type & DICT_SPATIAL) {
+ mutex_create(LATCH_ID_RTR_SSN_MUTEX, &index->rtr_ssn.mutex);
+ index->rtr_track = static_cast<rtr_info_track_t*>(
+ mem_heap_alloc(
+ heap,
+ sizeof(*index->rtr_track)));
+ mutex_create(LATCH_ID_RTR_ACTIVE_MUTEX,
+ &index->rtr_track->rtr_active_mutex);
+ index->rtr_track->rtr_active = UT_NEW_NOKEY(rtr_info_active());
+ }
+
return(index);
}
-#ifndef UNIV_HOTBACKUP
/**********************************************************************//**
Creates and initializes a foreign constraint memory object.
-@return own: foreign constraint struct */
-UNIV_INTERN
+@return own: foreign constraint struct */
dict_foreign_t*
dict_mem_foreign_create(void)
/*=========================*/
{
dict_foreign_t* foreign;
mem_heap_t* heap;
+ DBUG_ENTER("dict_mem_foreign_create");
heap = mem_heap_create(100);
@@ -588,7 +740,11 @@ dict_mem_foreign_create(void)
foreign->heap = heap;
- return(foreign);
+ foreign->v_cols = NULL;
+
+ DBUG_PRINT("dict_mem_foreign_create", ("heap: %p", heap));
+
+ DBUG_RETURN(foreign);
}
/**********************************************************************//**
@@ -596,7 +752,6 @@ Sets the foreign_table_name_lookup pointer based on the value of
lower_case_table_names. If that is 0 or 1, foreign_table_name_lookup
will point to foreign_table_name. If 2, then another string is
allocated from foreign->heap and set to lower case. */
-UNIV_INTERN
void
dict_mem_foreign_table_name_lookup_set(
/*===================================*/
@@ -627,7 +782,6 @@ Sets the referenced_table_name_lookup pointer based on the value of
lower_case_table_names. If that is 0 or 1, referenced_table_name_lookup
will point to referenced_table_name. If 2, then another string is
allocated from foreign->heap and set to lower case. */
-UNIV_INTERN
void
dict_mem_referenced_table_name_lookup_set(
/*======================================*/
@@ -652,13 +806,190 @@ dict_mem_referenced_table_name_lookup_set(
= foreign->referenced_table_name;
}
}
-#endif /* !UNIV_HOTBACKUP */
+
+/** Fill the virtual column set with virtual column information
+present in the given virtual index.
+@param[in] index virtual index
+@param[out] v_cols virtual column set. */
+static
+void
+dict_mem_fill_vcol_has_index(
+ const dict_index_t* index,
+ dict_vcol_set** v_cols)
+{
+ for (ulint i = 0; i < index->table->n_v_cols; i++) {
+ dict_v_col_t* v_col = dict_table_get_nth_v_col(
+ index->table, i);
+ if (!v_col->m_col.ord_part) {
+ continue;
+ }
+
+ dict_v_idx_list::iterator it;
+ for (it = v_col->v_indexes->begin();
+ it != v_col->v_indexes->end(); ++it) {
+ dict_v_idx_t v_idx = *it;
+
+ if (v_idx.index != index) {
+ continue;
+ }
+
+ if (*v_cols == NULL) {
+ *v_cols = UT_NEW_NOKEY(dict_vcol_set());
+ }
+
+ (*v_cols)->insert(v_col);
+ }
+ }
+}
+
+/** Fill the virtual column set with the virtual column of the index
+if the index contains given column name.
+@param[in] col_name column name
+@param[in] table innodb table object
+@param[out] v_cols set of virtual column information. */
+static
+void
+dict_mem_fill_vcol_from_v_indexes(
+ const char* col_name,
+ const dict_table_t* table,
+ dict_vcol_set** v_cols)
+{
+ /* virtual column can't be Primary Key, so start with
+ secondary index */
+ for (dict_index_t* index = dict_table_get_next_index(
+ dict_table_get_first_index(table));
+ index;
+ index = dict_table_get_next_index(index)) {
+
+ /* Skip if the index have newly added
+ virtual column because field name is NULL.
+ Later virtual column set will be
+ refreshed during loading of table. */
+ if (!dict_index_has_virtual(index)
+ || index->has_new_v_col) {
+ continue;
+ }
+
+ for (ulint i = 0; i < index->n_fields; i++) {
+ dict_field_t* field =
+ dict_index_get_nth_field(index, i);
+
+ if (strcmp(field->name, col_name) == 0) {
+ dict_mem_fill_vcol_has_index(
+ index, v_cols);
+ }
+ }
+ }
+}
+
+/** Fill the virtual column set with virtual columns which have base columns
+as the given col_name
+@param[in] col_name column name
+@param[in] table table object
+@param[out] v_cols set of virtual columns. */
+static
+void
+dict_mem_fill_vcol_set_for_base_col(
+ const char* col_name,
+ const dict_table_t* table,
+ dict_vcol_set** v_cols)
+{
+ for (ulint i = 0; i < table->n_v_cols; i++) {
+ dict_v_col_t* v_col = dict_table_get_nth_v_col(table, i);
+
+ if (!v_col->m_col.ord_part) {
+ continue;
+ }
+
+ for (ulint j = 0; j < v_col->num_base; j++) {
+ if (strcmp(col_name, dict_table_get_col_name(
+ table,
+ v_col->base_col[j]->ind)) == 0) {
+
+ if (*v_cols == NULL) {
+ *v_cols = UT_NEW_NOKEY(dict_vcol_set());
+ }
+
+ (*v_cols)->insert(v_col);
+ }
+ }
+ }
+}
+
+/** Fills the dependent virtual columns in a set.
+Reason for being dependent are
+1) FK can be present on base column of virtual columns
+2) FK can be present on column which is a part of virtual index
+@param[in,out] foreign foreign key information. */
+void
+dict_mem_foreign_fill_vcol_set(
+ dict_foreign_t* foreign)
+{
+ ulint type = foreign->type;
+
+ if (type == 0) {
+ return;
+ }
+
+ for (ulint i = 0; i < foreign->n_fields; i++) {
+ /** FK can be present on base columns
+ of virtual columns. */
+ dict_mem_fill_vcol_set_for_base_col(
+ foreign->foreign_col_names[i],
+ foreign->foreign_table,
+ &foreign->v_cols);
+
+ /** FK can be present on the columns
+ which can be a part of virtual index. */
+ dict_mem_fill_vcol_from_v_indexes(
+ foreign->foreign_col_names[i],
+ foreign->foreign_table,
+ &foreign->v_cols);
+ }
+}
+
+/** Fill virtual columns set in each fk constraint present in the table.
+@param[in,out] table innodb table object. */
+void
+dict_mem_table_fill_foreign_vcol_set(
+ dict_table_t* table)
+{
+ dict_foreign_set fk_set = table->foreign_set;
+ dict_foreign_t* foreign;
+
+ dict_foreign_set::iterator it;
+ for (it = fk_set.begin(); it != fk_set.end(); ++it) {
+ foreign = *it;
+
+ dict_mem_foreign_fill_vcol_set(foreign);
+ }
+}
+
+/** Free the vcol_set from all foreign key constraint on the table.
+@param[in,out] table innodb table object. */
+void
+dict_mem_table_free_foreign_vcol_set(
+ dict_table_t* table)
+{
+ dict_foreign_set fk_set = table->foreign_set;
+ dict_foreign_t* foreign;
+
+ dict_foreign_set::iterator it;
+ for (it = fk_set.begin(); it != fk_set.end(); ++it) {
+
+ foreign = *it;
+
+ if (foreign->v_cols != NULL) {
+ UT_DELETE(foreign->v_cols);
+ foreign->v_cols = NULL;
+ }
+ }
+}
/**********************************************************************//**
Adds a field definition to an index. NOTE: does not take a copy
of the column name if the field is a column. The memory occupied
by the column name may be released only after publishing the index. */
-UNIV_INTERN
void
dict_mem_index_add_field(
/*=====================*/
@@ -683,7 +1014,6 @@ dict_mem_index_add_field(
/**********************************************************************//**
Frees an index memory object. */
-UNIV_INTERN
void
dict_mem_index_free(
/*================*/
@@ -691,31 +1021,34 @@ dict_mem_index_free(
{
ut_ad(index);
ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
-#ifdef UNIV_BLOB_DEBUG
- if (index->blobs) {
- mutex_free(&index->blobs_mutex);
- rbt_free(index->blobs);
- }
-#endif /* UNIV_BLOB_DEBUG */
dict_index_zip_pad_mutex_destroy(index);
+ if (dict_index_is_spatial(index)) {
+ rtr_info_active::iterator it;
+ rtr_info_t* rtr_info;
+
+ for (it = index->rtr_track->rtr_active->begin();
+ it != index->rtr_track->rtr_active->end(); ++it) {
+ rtr_info = *it;
+
+ rtr_info->index = NULL;
+ }
+
+ mutex_destroy(&index->rtr_ssn.mutex);
+ mutex_destroy(&index->rtr_track->rtr_active_mutex);
+ UT_DELETE(index->rtr_track->rtr_active);
+ }
+
+ dict_index_remove_from_v_col_list(index);
mem_heap_free(index->heap);
}
-/** Create a temporary tablename like "#sql-ibtid-inc where
- tid = the Table ID
- inc = a randomly initialized number that is incremented for each file
-The table ID is a 64 bit integer, can use up to 20 digits, and is
-initialized at bootstrap. The second number is 32 bits, can use up to 10
-digits, and is initialized at startup to a randomly distributed number.
-It is hoped that the combination of these two numbers will provide a
-reasonably unique temporary file name.
+/** Create a temporary tablename like "#sql-ibNNN".
@param[in] heap A memory heap
@param[in] dbtab Table name in the form database/table name
@param[in] id Table id
@return A unique temporary tablename suitable for InnoDB use */
-UNIV_INTERN
char*
dict_mem_create_temporary_tablename(
mem_heap_t* heap,
@@ -724,21 +1057,28 @@ dict_mem_create_temporary_tablename(
{
size_t size;
char* name;
- const char* dbend = strchr(dbtab, '/');
+ const char* dbend = strchr(dbtab, '/');
ut_ad(dbend);
- size_t dblen = dbend - dbtab + 1;
-
-#ifdef HAVE_ATOMIC_BUILTINS
- /* Increment a randomly initialized number for each temp file. */
- os_atomic_increment_uint32(&dict_temp_file_num, 1);
-#else /* HAVE_ATOMIC_BUILTINS */
- dict_temp_file_num++;
-#endif /* HAVE_ATOMIC_BUILTINS */
+ size_t dblen = size_t(dbend - dbtab) + 1;
+
+ if (srv_safe_truncate) {
+ /* InnoDB will drop all #sql tables at startup.
+ Therefore, the id alone should generate a unique
+ and previously non-existent name. */
+ size = dblen + (sizeof(TEMP_FILE_PREFIX) + 3 + 20);
+ name = static_cast<char*>(mem_heap_alloc(heap, size));
+ memcpy(name, dbtab, dblen);
+ snprintf(name + dblen, size - dblen,
+ TEMP_FILE_PREFIX_INNODB UINT64PF, id);
+ return name;
+ }
+ /* Increment a randomly initialized number for each temp file. */
+ my_atomic_add32((int32*) &dict_temp_file_num, 1);
- size = tmp_file_prefix_length + 3 + 20 + 1 + 10 + dblen;
+ size = dblen + (sizeof(TEMP_FILE_PREFIX) + 3 + 20 + 1 + 10);
name = static_cast<char*>(mem_heap_alloc(heap, size));
memcpy(name, dbtab, dblen);
- ut_snprintf(name + dblen, size - dblen,
+ snprintf(name + dblen, size - dblen,
TEMP_FILE_PREFIX_INNODB UINT64PF "-" UINT32PF,
id, dict_temp_file_num);
@@ -746,15 +1086,13 @@ dict_mem_create_temporary_tablename(
}
/** Initialize dict memory variables */
-
void
dict_mem_init(void)
{
/* Initialize a randomly distributed temporary file number */
ib_uint32_t now = static_cast<ib_uint32_t>(time(NULL));
- const byte* buf = reinterpret_cast<const byte*>(&now);
- ut_ad(ut_crc32 != NULL);
+ const byte* buf = reinterpret_cast<const byte*>(&now);
dict_temp_file_num = ut_crc32(buf, sizeof(now));
@@ -820,3 +1158,31 @@ operator<< (std::ostream& out, const dict_foreign_set& fk_set)
return(out);
}
+/****************************************************************//**
+Determines if a table belongs to a system database
+@return */
+bool
+dict_mem_table_is_system(
+/*================*/
+ char *name) /*!< in: table name */
+{
+ ut_ad(name);
+
+ /* table has the following format: database/table
+ and some system table are of the form SYS_* */
+ if (strchr(name, '/')) {
+ size_t table_len = strlen(name);
+ const char *system_db;
+ int i = 0;
+ while ((system_db = innobase_system_databases[i++])
+ && (system_db != NullS)) {
+ size_t len = strlen(system_db);
+ if (table_len > len && !strncmp(name, system_db, len)) {
+ return true;
+ }
+ }
+ return false;
+ } else {
+ return true;
+ }
+}
diff --git a/storage/innobase/dict/dict0stats.cc b/storage/innobase/dict/dict0stats.cc
index 59c661514f5..2e2156865ef 100644
--- a/storage/innobase/dict/dict0stats.cc
+++ b/storage/innobase/dict/dict0stats.cc
@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 2009, 2019, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2015, 2017, MariaDB Corporation.
+Copyright (c) 2015, 2020, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -24,28 +24,15 @@ Code used for calculating and manipulating table statistics.
Created Jan 06, 2010 Vasil Dimov
*******************************************************/
-#ifndef UNIV_HOTBACKUP
-
-#include "univ.i"
-
-#include "btr0btr.h" /* btr_get_size() */
-#include "btr0cur.h" /* btr_estimate_number_of_different_key_vals() */
-#include "dict0dict.h" /* dict_table_get_first_index(), dict_fs2utf8() */
-#include "dict0mem.h" /* DICT_TABLE_MAGIC_N */
#include "dict0stats.h"
-#include "data0type.h" /* dtype_t */
-#include "db0err.h" /* dberr_t */
-#include "page0page.h" /* page_align() */
-#include "pars0pars.h" /* pars_info_create() */
-#include "pars0types.h" /* pars_info_t */
-#include "que0que.h" /* que_eval_sql() */
-#include "rem0cmp.h" /* REC_MAX_N_FIELDS,cmp_rec_rec_with_match() */
-#include "row0sel.h" /* sel_node_t */
-#include "row0types.h" /* sel_node_t */
-#include "trx0trx.h" /* trx_create() */
-#include "trx0roll.h" /* trx_rollback_to_savepoint() */
-#include "ut0rnd.h" /* ut_rnd_interval() */
-#include "ut0ut.h" /* ut_format_name() */
+#include "ut0ut.h"
+#include "ut0rnd.h"
+#include "dyn0buf.h"
+#include "row0sel.h"
+#include "trx0trx.h"
+#include "pars0pars.h"
+#include <mysql_com.h>
+#include "btr0btr.h"
#include <algorithm>
#include <map>
@@ -117,9 +104,7 @@ where n=1..n_uniq.
@} */
/* names of the tables from the persistent statistics storage */
-#define TABLE_STATS_NAME "mysql/innodb_table_stats"
#define TABLE_STATS_NAME_PRINT "mysql.innodb_table_stats"
-#define INDEX_STATS_NAME "mysql/innodb_index_stats"
#define INDEX_STATS_NAME_PRINT "mysql.innodb_index_stats"
#ifdef UNIV_STATS_DEBUG
@@ -145,18 +130,15 @@ of keys. For example if a btree level is:
index: 0,1,2,3,4,5,6,7,8,9,10,11,12
data: b,b,b,b,b,b,g,g,j,j,j, x, y
then we would store 5,7,10,11,12 in the array. */
-typedef std::vector<ib_uint64_t> boundaries_t;
+typedef std::vector<ib_uint64_t, ut_allocator<ib_uint64_t> > boundaries_t;
-/* This is used to arrange the index based on the index name.
-@return true if index_name1 is smaller than index_name2. */
-struct index_cmp
-{
- bool operator()(const char* index_name1, const char* index_name2) const {
- return(strcmp(index_name1, index_name2) < 0);
- }
-};
+/** Allocator type used for index_map_t. */
+typedef ut_allocator<std::pair<const char* const, dict_index_t*> >
+ index_map_t_allocator;
-typedef std::map<const char*, dict_index_t*, index_cmp> index_map_t;
+/** Auxiliary map used for sorting indexes by name in dict_stats_save(). */
+typedef std::map<const char*, dict_index_t*, ut_strcmp_functor,
+ index_map_t_allocator> index_map_t;
/*********************************************************************//**
Checks whether an index should be ignored in stats manipulations:
@@ -170,10 +152,10 @@ dict_stats_should_ignore_index(
/*===========================*/
const dict_index_t* index) /*!< in: index */
{
- return((index->type & DICT_FTS)
- || dict_index_is_corrupted(index)
+ return((index->type & (DICT_FTS | DICT_SPATIAL))
+ || index->is_corrupted()
|| index->to_be_dropped
- || *index->name == TEMP_INDEX_PREFIX);
+ || !index->is_committed());
}
/*********************************************************************//**
@@ -193,7 +175,7 @@ dict_stats_persistent_storage_check(
DATA_NOT_NULL, 192},
{"table_name", DATA_VARMYSQL,
- DATA_NOT_NULL, 192},
+ DATA_NOT_NULL, 597},
{"last_update", DATA_FIXBINARY,
DATA_NOT_NULL, 4},
@@ -221,7 +203,7 @@ dict_stats_persistent_storage_check(
DATA_NOT_NULL, 192},
{"table_name", DATA_VARMYSQL,
- DATA_NOT_NULL, 192},
+ DATA_NOT_NULL, 597},
{"index_name", DATA_VARMYSQL,
DATA_NOT_NULL, 192},
@@ -253,7 +235,7 @@ dict_stats_persistent_storage_check(
dberr_t ret;
if (!caller_has_dict_sys_mutex) {
- mutex_enter(&(dict_sys->mutex));
+ mutex_enter(&dict_sys->mutex);
}
ut_ad(mutex_own(&dict_sys->mutex));
@@ -268,12 +250,11 @@ dict_stats_persistent_storage_check(
}
if (!caller_has_dict_sys_mutex) {
- mutex_exit(&(dict_sys->mutex));
+ mutex_exit(&dict_sys->mutex);
}
if (ret != DB_SUCCESS && ret != DB_STATS_DO_NOT_EXIST) {
- ut_print_timestamp(stderr);
- fprintf(stderr, " InnoDB: Error: %s\n", errstr);
+ ib::error() << errstr;
return(false);
} else if (ret == DB_STATS_DO_NOT_EXIST) {
return false;
@@ -301,9 +282,8 @@ dict_stats_exec_sql(
{
dberr_t err;
bool trx_started = false;
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
+
+ ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_X));
ut_ad(mutex_own(&dict_sys->mutex));
if (!dict_stats_persistent_storage_check(true)) {
@@ -313,8 +293,13 @@ dict_stats_exec_sql(
if (trx == NULL) {
trx = trx_allocate_for_background();
- trx_start_if_not_started(trx);
trx_started = true;
+
+ if (srv_read_only_mode) {
+ trx_start_internal_read_only(trx);
+ } else {
+ trx_start_internal(trx);
+ }
}
err = que_eval_sql(pinfo, sql, FALSE, trx); /* pinfo is freed here */
@@ -389,7 +374,7 @@ dict_stats_table_clone_create(
heap_size = 0;
heap_size += sizeof(dict_table_t);
- heap_size += strlen(table->name) + 1;
+ heap_size += strlen(table->name.m_name) + 1;
for (index = dict_table_get_first_index(table);
index != NULL;
@@ -399,7 +384,7 @@ dict_stats_table_clone_create(
continue;
}
- ut_ad(!dict_index_is_univ(index));
+ ut_ad(!dict_index_is_ibuf(index));
ulint n_uniq = dict_index_get_n_unique(index);
@@ -429,8 +414,7 @@ dict_stats_table_clone_create(
t->heap = heap;
- UNIV_MEM_ASSERT_RW_ABORT(table->name, strlen(table->name) + 1);
- t->name = (char*) mem_heap_strdup(heap, table->name);
+ t->name.m_name = mem_heap_strdup(heap, table->name.m_name);
t->corrupted = table->corrupted;
@@ -439,7 +423,7 @@ dict_stats_table_clone_create(
dict_table_stats_lock()/unlock() routines will do nothing. */
dict_table_stats_latch_create(t, false);
- UT_LIST_INIT(t->indexes);
+ UT_LIST_INIT(t->indexes, &dict_index_t::indexes);
for (index = dict_table_get_first_index(table);
index != NULL;
@@ -449,7 +433,7 @@ dict_stats_table_clone_create(
continue;
}
- ut_ad(!dict_index_is_univ(index));
+ ut_ad(!dict_index_is_ibuf(index));
dict_index_t* idx;
@@ -458,10 +442,9 @@ dict_stats_table_clone_create(
UNIV_MEM_ASSERT_RW_ABORT(&index->id, sizeof(index->id));
idx->id = index->id;
- UNIV_MEM_ASSERT_RW_ABORT(index->name, strlen(index->name) + 1);
- idx->name = (char*) mem_heap_strdup(heap, index->name);
+ idx->name = mem_heap_strdup(heap, index->name);
- idx->table_name = t->name;
+ idx->table_name = t->name.m_name;
idx->table = t;
@@ -470,6 +453,7 @@ dict_stats_table_clone_create(
idx->to_be_dropped = 0;
idx->online_status = ONLINE_INDEX_COMPLETE;
+ idx->set_committed(true);
idx->n_uniq = index->n_uniq;
@@ -477,13 +461,12 @@ dict_stats_table_clone_create(
heap, idx->n_uniq * sizeof(idx->fields[0]));
for (ulint i = 0; i < idx->n_uniq; i++) {
- UNIV_MEM_ASSERT_RW_ABORT(index->fields[i].name, strlen(index->fields[i].name) + 1);
- idx->fields[i].name = (char*) mem_heap_strdup(
+ idx->fields[i].name = mem_heap_strdup(
heap, index->fields[i].name);
}
/* hook idx into t->indexes */
- UT_LIST_ADD_LAST(indexes, t->indexes, idx);
+ UT_LIST_ADD_LAST(t->indexes, idx);
idx->stat_n_diff_key_vals = (ib_uint64_t*) mem_heap_alloc(
heap,
@@ -534,7 +517,7 @@ dict_stats_empty_index(
/*!< in: whether to empty defrag stats */
{
ut_ad(!(index->type & DICT_FTS));
- ut_ad(!dict_index_is_univ(index));
+ ut_ad(!dict_index_is_ibuf(index));
ulint n_uniq = index->n_uniq;
@@ -553,29 +536,6 @@ dict_stats_empty_index(
}
}
-/**********************************************************************//**
-Clear defragmentation summary. */
-UNIV_INTERN
-void
-dict_stats_empty_defrag_summary(
-/*==================*/
- dict_index_t* index) /*!< in: index to clear defragmentation stats */
-{
- index->stat_defrag_n_pages_freed = 0;
-}
-
-/**********************************************************************//**
-Clear defragmentation related index stats. */
-UNIV_INTERN
-void
-dict_stats_empty_defrag_stats(
-/*==================*/
- dict_index_t* index) /*!< in: index to clear defragmentation stats */
-{
- index->stat_defrag_modified_counter = 0;
- index->stat_defrag_n_page_split = 0;
-}
-
/*********************************************************************//**
Write all zeros (or 1 where it makes sense) into a table and its indexes'
statistics members. The resulting stats correspond to an empty table. */
@@ -608,7 +568,7 @@ dict_stats_empty_table(
continue;
}
- ut_ad(!dict_index_is_univ(index));
+ ut_ad(!dict_index_is_ibuf(index));
dict_stats_empty_index(index, empty_defrag_stats);
}
@@ -711,7 +671,7 @@ dict_stats_copy(
dict_table_t* dst, /*!< in/out: destination table */
const dict_table_t* src, /*!< in: source table */
bool reset_ignored_indexes) /*!< in: if true, set ignored indexes
- to have the same statistics as if
+ to have the same statistics as if
the table was empty */
{
dst->stats_last_recalc = src->stats_last_recalc;
@@ -743,7 +703,7 @@ dict_stats_copy(
}
}
- ut_ad(!dict_index_is_univ(dst_idx));
+ ut_ad(!dict_index_is_ibuf(dst_idx));
if (!INDEX_EQ(src_idx, dst_idx)) {
for (src_idx = dict_table_get_first_index(src);
@@ -800,8 +760,7 @@ dict_stats_copy(
dst->stat_initialized = TRUE;
}
-/*********************************************************************//**
-Duplicate the stats of a table and its indexes.
+/** Duplicate the stats of a table and its indexes.
This function creates a dummy dict_table_t object and copies the input
table's stats into it. The returned table object is not in the dictionary
cache and cannot be accessed by any other threads. In addition to the
@@ -823,12 +782,12 @@ dict_index_t::stat_defrag_n_pages_freed
dict_index_t::stat_defrag_n_page_split
The returned object should be freed with dict_stats_snapshot_free()
when no longer needed.
+@param[in] table table whose stats to copy
@return incomplete table object */
static
dict_table_t*
dict_stats_snapshot_create(
-/*=======================*/
- dict_table_t* table) /*!< in: table whose stats to copy */
+ dict_table_t* table)
{
mutex_enter(&dict_sys->mutex);
@@ -896,7 +855,9 @@ dict_stats_update_transient_for_index(
} else {
mtr_t mtr;
ulint size;
+
mtr_start(&mtr);
+
mtr_s_lock(dict_index_get_lock(index), &mtr);
size = btr_get_size(index, BTR_TOTAL_SIZE, &mtr);
@@ -924,6 +885,10 @@ dict_stats_update_transient_for_index(
/* Do not continue if table decryption has failed or
table is already marked as corrupted. */
if (index->is_readable()) {
+ /* We don't handle the return value since it
+ will be false only when some thread is
+ dropping the table and we don't have to empty
+ the statistics of the to be dropped index */
btr_estimate_number_of_different_key_vals(index);
}
}
@@ -935,7 +900,7 @@ is relatively quick and is used to calculate transient statistics that
are not saved on disk.
This was the only way to calculate statistics before the
Persistent Statistics feature was introduced. */
-UNIV_INTERN
+static
void
dict_stats_update_transient(
/*========================*/
@@ -951,25 +916,22 @@ dict_stats_update_transient(
if (dict_table_is_discarded(table)) {
/* Nothing to do. */
- dict_stats_empty_table(table, false);
+ dict_stats_empty_table(table, true);
return;
} else if (index == NULL) {
/* Table definition is corrupt */
- char buf[MAX_FULL_NAME_LEN];
- ut_print_timestamp(stderr);
- fprintf(stderr, " InnoDB: table %s has no indexes. "
- "Cannot calculate statistics.\n",
- ut_format_name(table->name, TRUE, buf, sizeof(buf)));
- dict_stats_empty_table(table, false);
+ ib::warn() << "Table " << table->name
+ << " has no indexes. Cannot calculate statistics.";
+ dict_stats_empty_table(table, true);
return;
}
for (; index != NULL; index = dict_table_get_next_index(index)) {
- ut_ad(!dict_index_is_univ(index));
+ ut_ad(!dict_index_is_ibuf(index));
- if (index->type & DICT_FTS) {
+ if (index->type & DICT_FTS || dict_index_is_spatial(index)) {
continue;
}
@@ -1056,15 +1018,15 @@ dict_stats_analyze_index_level(
bool prev_rec_is_copied;
byte* prev_rec_buf = NULL;
ulint prev_rec_buf_size = 0;
- ulint* rec_offsets;
- ulint* prev_rec_offsets;
+ offset_t* rec_offsets;
+ offset_t* prev_rec_offsets;
ulint i;
- DEBUG_PRINTF(" %s(table=%s, index=%s, level=%lu)\n", __func__,
- index->table->name, index->name, level);
+ DEBUG_PRINTF(" %s(table=%s, index=%s, level=" ULINTPF ")\n",
+ __func__, index->table->name, index->name, level);
ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(index),
- MTR_MEMO_S_LOCK));
+ MTR_MEMO_SX_LOCK));
n_uniq = dict_index_get_n_unique(index);
@@ -1078,9 +1040,9 @@ dict_stats_analyze_index_level(
i = (REC_OFFS_HEADER_SIZE + 1 + 1) + index->n_fields;
heap = mem_heap_create((2 * sizeof *rec_offsets) * i);
- rec_offsets = static_cast<ulint*>(
+ rec_offsets = static_cast<offset_t*>(
mem_heap_alloc(heap, i * sizeof *rec_offsets));
- prev_rec_offsets = static_cast<ulint*>(
+ prev_rec_offsets = static_cast<offset_t*>(
mem_heap_alloc(heap, i * sizeof *prev_rec_offsets));
rec_offs_set_n_alloc(rec_offsets, i);
rec_offs_set_n_alloc(prev_rec_offsets, i);
@@ -1098,7 +1060,7 @@ dict_stats_analyze_index_level(
on the desired level. */
btr_pcur_open_at_index_side(
- true, index, BTR_SEARCH_LEAF | BTR_ALREADY_S_LATCHED,
+ true, index, BTR_SEARCH_TREE_ALREADY_S_LATCHED,
&pcur, true, level, mtr);
btr_pcur_move_to_next_on_page(&pcur);
@@ -1114,7 +1076,7 @@ dict_stats_analyze_index_level(
ut_a(btr_page_get_level(page, mtr) == level);
/* there should not be any pages on the left */
- ut_a(btr_page_get_prev(page, mtr) == FIL_NULL);
+ ut_a(!page_has_prev(page));
/* check whether the first record on the leftmost page is marked
as such, if we are on a non-leaf level */
@@ -1137,8 +1099,6 @@ dict_stats_analyze_index_level(
btr_pcur_is_on_user_rec(&pcur);
btr_pcur_move_to_next_user_rec(&pcur, mtr)) {
- ulint matched_fields = 0;
- ulint matched_bytes = 0;
bool rec_is_last_on_page;
rec = btr_pcur_get_rec(&pcur);
@@ -1181,6 +1141,7 @@ dict_stats_analyze_index_level(
prev_rec_offsets = rec_get_offsets(
prev_rec, index, prev_rec_offsets,
+ true,
n_uniq, &heap);
prev_rec = rec_copy_prefix_to_buf(
@@ -1193,25 +1154,21 @@ dict_stats_analyze_index_level(
continue;
}
-
rec_offsets = rec_get_offsets(
- rec, index, rec_offsets, n_uniq, &heap);
+ rec, index, rec_offsets, !level, n_uniq, &heap);
(*total_recs)++;
if (prev_rec != NULL) {
+ ulint matched_fields;
+
prev_rec_offsets = rec_get_offsets(
- prev_rec, index, prev_rec_offsets,
+ prev_rec, index, prev_rec_offsets, !level,
n_uniq, &heap);
- cmp_rec_rec_with_match(rec,
- prev_rec,
- rec_offsets,
- prev_rec_offsets,
- index,
- FALSE,
- &matched_fields,
- &matched_bytes);
+ cmp_rec_rec(prev_rec, rec,
+ prev_rec_offsets, rec_offsets, index,
+ false, &matched_fields);
for (i = matched_fields; i < n_uniq; i++) {
@@ -1309,7 +1266,7 @@ dict_stats_analyze_index_level(
DEBUG_PRINTF(" %s(): total recs: " UINT64PF
", total pages: " UINT64PF
- ", n_diff[%lu]: " UINT64PF "\n",
+ ", n_diff[" ULINTPF "]: " UINT64PF "\n",
__func__, *total_recs,
*total_pages,
i, n_diff[i]);
@@ -1340,30 +1297,10 @@ dict_stats_analyze_index_level(
btr_leaf_page_release(btr_pcur_get_block(&pcur), BTR_SEARCH_LEAF, mtr);
btr_pcur_close(&pcur);
-
- if (prev_rec_buf != NULL) {
-
- mem_free(prev_rec_buf);
- }
-
+ ut_free(prev_rec_buf);
mem_heap_free(heap);
}
-/* aux enum for controlling the behavior of dict_stats_scan_page() @{ */
-enum page_scan_method_t {
- COUNT_ALL_NON_BORING_AND_SKIP_DEL_MARKED,/* scan all records on
- the given page and count the number of
- distinct ones, also ignore delete marked
- records */
- QUIT_ON_FIRST_NON_BORING,/* quit when the first record that differs
- from its right neighbor is found */
- COUNT_ALL_NON_BORING_INCLUDE_DEL_MARKED/* scan all records on
- the given page and count the number of
- distinct ones, include delete marked
- records */
-};
-/* @} */
-
/** Scan a page, reading records from left to right and counting the number
of distinct records (looking only at the first n_prefix
columns) and the number of external pages pointed by records from this page.
@@ -1380,40 +1317,38 @@ be big enough)
@param[in] index index of the page
@param[in] page the page to scan
@param[in] n_prefix look at the first n_prefix columns
-@param[in] scan_method scan to the end of the page or not
+@param[in] is_leaf whether this is the leaf page
@param[out] n_diff number of distinct records encountered
@param[out] n_external_pages if this is non-NULL then it will be set
to the number of externally stored pages which were encountered
@return offsets1 or offsets2 (the offsets of *out_rec),
or NULL if the page is empty and does not contain user records. */
UNIV_INLINE
-ulint*
+offset_t*
dict_stats_scan_page(
const rec_t** out_rec,
- ulint* offsets1,
- ulint* offsets2,
- dict_index_t* index,
+ offset_t* offsets1,
+ offset_t* offsets2,
+ const dict_index_t* index,
const page_t* page,
ulint n_prefix,
- page_scan_method_t scan_method,
+ bool is_leaf,
ib_uint64_t* n_diff,
ib_uint64_t* n_external_pages)
{
- ulint* offsets_rec = offsets1;
- ulint* offsets_next_rec = offsets2;
+ offset_t* offsets_rec = offsets1;
+ offset_t* offsets_next_rec = offsets2;
const rec_t* rec;
const rec_t* next_rec;
/* A dummy heap, to be passed to rec_get_offsets().
Because offsets1,offsets2 should be big enough,
this memory heap should never be used. */
mem_heap_t* heap = NULL;
- const rec_t* (*get_next)(const rec_t*);
-
- if (scan_method == COUNT_ALL_NON_BORING_AND_SKIP_DEL_MARKED) {
- get_next = page_rec_get_next_non_del_marked;
- } else {
- get_next = page_rec_get_next_const;
- }
+ ut_ad(is_leaf == page_is_leaf(page));
+ const rec_t* (*get_next)(const rec_t*)
+ = !is_leaf || srv_stats_include_delete_marked
+ ? page_rec_get_next_const
+ : page_rec_get_next_non_del_marked;
const bool should_count_external_pages = n_external_pages != NULL;
@@ -1430,7 +1365,7 @@ dict_stats_scan_page(
return(NULL);
}
- offsets_rec = rec_get_offsets(rec, index, offsets_rec,
+ offsets_rec = rec_get_offsets(rec, index, offsets_rec, is_leaf,
ULINT_UNDEFINED, &heap);
if (should_count_external_pages) {
@@ -1444,49 +1379,39 @@ dict_stats_scan_page(
while (!page_rec_is_supremum(next_rec)) {
- ulint matched_fields = 0;
- ulint matched_bytes = 0;
+ ulint matched_fields;
offsets_next_rec = rec_get_offsets(next_rec, index,
- offsets_next_rec,
+ offsets_next_rec, is_leaf,
ULINT_UNDEFINED,
&heap);
/* check whether rec != next_rec when looking at
the first n_prefix fields */
- cmp_rec_rec_with_match(rec, next_rec,
- offsets_rec, offsets_next_rec,
- index, FALSE, &matched_fields,
- &matched_bytes);
+ cmp_rec_rec(rec, next_rec, offsets_rec, offsets_next_rec,
+ index, false, &matched_fields);
if (matched_fields < n_prefix) {
/* rec != next_rec, => rec is non-boring */
(*n_diff)++;
- if (scan_method == QUIT_ON_FIRST_NON_BORING) {
- goto func_exit;
+ if (!is_leaf) {
+ break;
}
}
rec = next_rec;
- {
- /* Assign offsets_rec = offsets_next_rec
- so that offsets_rec matches with rec which
- was just assigned rec = next_rec above.
- Also need to point offsets_next_rec to the
- place where offsets_rec was pointing before
- because we have just 2 placeholders where
- data is actually stored:
- offsets_onstack1 and offsets_onstack2 and we
- are using them in circular fashion
- (offsets[_next]_rec are just pointers to
- those placeholders). */
- ulint* offsets_tmp;
- offsets_tmp = offsets_rec;
- offsets_rec = offsets_next_rec;
- offsets_next_rec = offsets_tmp;
- }
+ /* Assign offsets_rec = offsets_next_rec so that
+ offsets_rec matches with rec which was just assigned
+ rec = next_rec above. Also need to point
+ offsets_next_rec to the place where offsets_rec was
+ pointing before because we have just 2 placeholders
+ where data is actually stored: offsets1 and offsets2
+ and we are using them in circular fashion
+ (offsets[_next]_rec are just pointers to those
+ placeholders). */
+ std::swap(offsets_rec, offsets_next_rec);
if (should_count_external_pages) {
*n_external_pages += btr_rec_get_externally_stored_len(
@@ -1496,7 +1421,6 @@ dict_stats_scan_page(
next_rec = get_next(next_rec);
}
-func_exit:
/* offsets1,offsets2 should have been big enough */
ut_a(heap == NULL);
*out_rec = rec;
@@ -1522,16 +1446,13 @@ dict_stats_analyze_index_below_cur(
ib_uint64_t* n_external_pages)
{
dict_index_t* index;
- ulint space;
- ulint zip_size;
buf_block_t* block;
- ulint page_no;
const page_t* page;
mem_heap_t* heap;
const rec_t* rec;
- ulint* offsets1;
- ulint* offsets2;
- ulint* offsets_rec;
+ offset_t* offsets1;
+ offset_t* offsets2;
+ offset_t* offsets_rec;
ulint size;
mtr_t mtr;
@@ -1549,24 +1470,25 @@ dict_stats_analyze_index_below_cur(
heap = mem_heap_create(size * (sizeof *offsets1 + sizeof *offsets2));
- offsets1 = static_cast<ulint*>(mem_heap_alloc(
+ offsets1 = static_cast<offset_t*>(mem_heap_alloc(
heap, size * sizeof *offsets1));
- offsets2 = static_cast<ulint*>(mem_heap_alloc(
+ offsets2 = static_cast<offset_t*>(mem_heap_alloc(
heap, size * sizeof *offsets2));
rec_offs_set_n_alloc(offsets1, size);
rec_offs_set_n_alloc(offsets2, size);
- space = dict_index_get_space(index);
- zip_size = dict_table_zip_size(index->table);
-
rec = btr_cur_get_rec(cur);
+ ut_ad(!page_rec_is_leaf(rec));
- offsets_rec = rec_get_offsets(rec, index, offsets1,
+ offsets_rec = rec_get_offsets(rec, index, offsets1, false,
ULINT_UNDEFINED, &heap);
- page_no = btr_node_ptr_get_child_page_no(rec, offsets_rec);
+ page_id_t page_id(dict_index_get_space(index),
+ btr_node_ptr_get_child_page_no(
+ rec, offsets_rec));
+ const page_size_t page_size(dict_table_page_size(index->table));
/* assume no external pages by default - in case we quit from this
function without analyzing any leaf pages */
@@ -1577,9 +1499,11 @@ dict_stats_analyze_index_below_cur(
/* descend to the leaf level on the B-tree */
for (;;) {
- block = buf_page_get_gen(space, zip_size, page_no, RW_S_LATCH,
+ dberr_t err = DB_SUCCESS;
+
+ block = buf_page_get_gen(page_id, page_size, RW_S_LATCH,
NULL /* no guessed block */,
- BUF_GET, __FILE__, __LINE__, &mtr);
+ BUF_GET, __FILE__, __LINE__, &mtr, &err);
page = buf_block_get_frame(block);
@@ -1592,7 +1516,7 @@ dict_stats_analyze_index_below_cur(
/* search for the first non-boring record on the page */
offsets_rec = dict_stats_scan_page(
&rec, offsets1, offsets2, index, page, n_prefix,
- QUIT_ON_FIRST_NON_BORING, n_diff, NULL);
+ false, n_diff, NULL);
/* pages on level > 0 are not allowed to be empty */
ut_a(offsets_rec != NULL);
@@ -1623,7 +1547,8 @@ dict_stats_analyze_index_below_cur(
/* we have a non-boring record in rec, descend below it */
- page_no = btr_node_ptr_get_child_page_no(rec, offsets_rec);
+ page_id.set_page_no(
+ btr_node_ptr_get_child_page_no(rec, offsets_rec));
}
/* make sure we got a leaf page as a result from the above loop */
@@ -1636,9 +1561,7 @@ dict_stats_analyze_index_below_cur(
offsets_rec = dict_stats_scan_page(
&rec, offsets1, offsets2, index, page, n_prefix,
- srv_stats_include_delete_marked ?
- COUNT_ALL_NON_BORING_INCLUDE_DEL_MARKED:
- COUNT_ALL_NON_BORING_AND_SKIP_DEL_MARKED, n_diff,
+ true, n_diff,
n_external_pages);
#if 0
@@ -1719,20 +1642,20 @@ dict_stats_analyze_index_for_n_prefix(
ib_uint64_t i;
#if 0
- DEBUG_PRINTF(" %s(table=%s, index=%s, level=%lu, n_prefix=%lu, "
- "n_diff_on_level=" UINT64PF ")\n",
+ DEBUG_PRINTF(" %s(table=%s, index=%s, level=%lu, n_prefix=%lu,"
+ " n_diff_on_level=" UINT64PF ")\n",
__func__, index->table->name, index->name, level,
n_prefix, n_diff_data->n_diff_on_level);
#endif
ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(index),
- MTR_MEMO_S_LOCK));
+ MTR_MEMO_SX_LOCK));
/* Position pcur on the leftmost record on the leftmost page
on the desired level. */
btr_pcur_open_at_index_side(
- true, index, BTR_SEARCH_LEAF | BTR_ALREADY_S_LATCHED,
+ true, index, BTR_SEARCH_TREE_ALREADY_S_LATCHED,
&pcur, true, n_diff_data->level, mtr);
btr_pcur_move_to_next_on_page(&pcur);
@@ -1754,7 +1677,7 @@ dict_stats_analyze_index_for_n_prefix(
ut_a(btr_page_get_level(page, mtr) == n_diff_data->level);
/* there should not be any pages on the left */
- ut_a(btr_page_get_prev(page, mtr) == FIL_NULL);
+ ut_a(!page_has_prev(page));
/* check whether the first record on the leftmost page is marked
as such; we are on a non-leaf level */
@@ -1812,7 +1735,7 @@ dict_stats_analyze_index_for_n_prefix(
ut_rnd_interval() to work with too big numbers since
ib_uint64_t could be bigger than ulint */
const ulint rnd = ut_rnd_interval(
- 0, static_cast<ulint>(right - left));
+ static_cast<ulint>(right - left));
const ib_uint64_t dive_below_idx
= boundaries->at(static_cast<unsigned>(left + rnd));
@@ -1859,7 +1782,7 @@ dict_stats_analyze_index_for_n_prefix(
&n_external_pages);
/* We adjust n_diff_on_leaf_page here to avoid counting
- one record twice - once as the last on some page and once
+ one value twice - once as the last on some page and once
as the first on another page. Consider the following example:
Leaf level:
page: (2,2,2,2,3,3)
@@ -1907,7 +1830,7 @@ dict_stats_index_set_n_diff(
ut_ad(data->n_leaf_pages_to_analyze > 0);
ut_ad(data->n_recs_on_level > 0);
- ulint n_ordinary_leaf_pages;
+ ib_uint64_t n_ordinary_leaf_pages;
if (data->level == 1) {
/* If we know the number of records on level 1, then
@@ -1943,8 +1866,9 @@ dict_stats_index_set_n_diff(
index->stat_n_sample_sizes[n_prefix - 1]
= data->n_leaf_pages_to_analyze;
- DEBUG_PRINTF(" %s(): n_diff=" UINT64PF " for n_prefix=%lu"
- " (%lu"
+ DEBUG_PRINTF(" %s(): n_diff=" UINT64PF
+ " for n_prefix=" ULINTPF
+ " (" ULINTPF
" * " UINT64PF " / " UINT64PF
" * " UINT64PF " / " UINT64PF ")\n",
__func__,
@@ -1979,10 +1903,15 @@ dict_stats_analyze_index(
ulint size;
DBUG_ENTER("dict_stats_analyze_index");
- DBUG_PRINT("info", ("index: %s, online status: %d", index->name,
+ DBUG_PRINT("info", ("index: %s, online status: %d", index->name(),
dict_index_get_online_status(index)));
- DEBUG_PRINTF(" %s(index=%s)\n", __func__, index->name);
+ /* Disable update statistic for Rtree */
+ if (dict_index_is_spatial(index)) {
+ DBUG_VOID_RETURN;
+ }
+
+ DEBUG_PRINTF(" %s(index=%s)\n", __func__, index->name());
dict_stats_empty_index(index, false);
@@ -2013,7 +1942,7 @@ dict_stats_analyze_index(
mtr_start(&mtr);
- mtr_s_lock(dict_index_get_lock(index), &mtr);
+ mtr_sx_lock(dict_index_get_lock(index), &mtr);
root_level = btr_height_get(index, &mtr);
@@ -2032,11 +1961,11 @@ dict_stats_analyze_index(
|| N_SAMPLE_PAGES(index) * n_uniq > index->stat_n_leaf_pages) {
if (root_level == 0) {
- DEBUG_PRINTF(" %s(): just one page, "
- "doing full scan\n", __func__);
+ DEBUG_PRINTF(" %s(): just one page,"
+ " doing full scan\n", __func__);
} else {
- DEBUG_PRINTF(" %s(): too many pages requested for "
- "sampling, doing full scan\n", __func__);
+ DEBUG_PRINTF(" %s(): too many pages requested for"
+ " sampling, doing full scan\n", __func__);
}
/* do full scan of level 0; save results directly
@@ -2062,16 +1991,18 @@ dict_stats_analyze_index(
/* For each level that is being scanned in the btree, this contains the
number of different key values for all possible n-column prefixes. */
- ib_uint64_t* n_diff_on_level = new ib_uint64_t[n_uniq];
+ ib_uint64_t* n_diff_on_level = UT_NEW_ARRAY(
+ ib_uint64_t, n_uniq, mem_key_dict_stats_n_diff_on_level);
/* For each level that is being scanned in the btree, this contains the
index of the last record from each group of equal records (when
comparing only the first n columns, n=1..n_uniq). */
- boundaries_t* n_diff_boundaries = new boundaries_t[n_uniq];
+ boundaries_t* n_diff_boundaries = UT_NEW_ARRAY_NOKEY(boundaries_t,
+ n_uniq);
/* For each n-column prefix this array contains the input data that is
used to calculate dict_index_t::stat_n_diff_key_vals[]. */
- n_diff_data_t* n_diff_data = new n_diff_data_t[n_uniq];
+ n_diff_data_t* n_diff_data = UT_NEW_ARRAY_NOKEY(n_diff_data_t, n_uniq);
/* total_recs is also used to estimate the number of pages on one
level below, so at the start we have 1 page (the root) */
@@ -2093,14 +2024,14 @@ dict_stats_analyze_index(
for (n_prefix = n_uniq; n_prefix >= 1; n_prefix--) {
DEBUG_PRINTF(" %s(): searching level with >=%llu "
- "distinct records, n_prefix=%lu\n",
+ "distinct records, n_prefix=" ULINTPF "\n",
__func__, N_DIFF_REQUIRED(index), n_prefix);
/* Commit the mtr to release the tree S lock to allow
other threads to do some work too. */
mtr_commit(&mtr);
mtr_start(&mtr);
- mtr_s_lock(dict_index_get_lock(index), &mtr);
+ mtr_sx_lock(dict_index_get_lock(index), &mtr);
if (root_level != btr_height_get(index, &mtr)) {
/* Just quit if the tree has changed beyond
recognition here. The old stats from previous
@@ -2197,8 +2128,9 @@ dict_stats_analyze_index(
}
found_level:
- DEBUG_PRINTF(" %s(): found level %lu that has " UINT64PF
- " distinct records for n_prefix=%lu\n",
+ DEBUG_PRINTF(" %s(): found level " ULINTPF
+ " that has " UINT64PF
+ " distinct records for n_prefix=" ULINTPF "\n",
__func__, level, n_diff_on_level[n_prefix - 1],
n_prefix);
/* here we are either on level 1 or the level that we are on
@@ -2239,9 +2171,9 @@ found_level:
mtr_commit(&mtr);
- delete[] n_diff_boundaries;
+ UT_DELETE_ARRAY(n_diff_boundaries);
- delete[] n_diff_on_level;
+ UT_DELETE_ARRAY(n_diff_on_level);
/* n_prefix == 0 means that the above loop did not end up prematurely
due to tree being changed and so n_diff_data[] is set up. */
@@ -2249,7 +2181,7 @@ found_level:
dict_stats_index_set_n_diff(n_diff_data, index);
}
- delete[] n_diff_data;
+ UT_DELETE_ARRAY(n_diff_data);
dict_stats_assert_initialized_index(index);
DBUG_VOID_RETURN;
@@ -2277,7 +2209,7 @@ dict_stats_update_persistent(
index = dict_table_get_first_index(table);
if (index == NULL
- || dict_index_is_corrupted(index)
+ || index->is_corrupted()
|| (index->type | DICT_UNIQUE) != (DICT_CLUSTERED | DICT_UNIQUE)) {
/* Table definition is corrupt */
@@ -2287,7 +2219,7 @@ dict_stats_update_persistent(
return(DB_CORRUPTION);
}
- ut_ad(!dict_index_is_univ(index));
+ ut_ad(!dict_index_is_ibuf(index));
dict_stats_analyze_index(index);
@@ -2305,9 +2237,9 @@ dict_stats_update_persistent(
index != NULL;
index = dict_table_get_next_index(index)) {
- ut_ad(!dict_index_is_univ(index));
+ ut_ad(!dict_index_is_ibuf(index));
- if (index->type & DICT_FTS) {
+ if (index->type & DICT_FTS || dict_index_is_spatial(index)) {
continue;
}
@@ -2351,37 +2283,34 @@ storage.
allocate and free the trx object. If it is not NULL then it will be
rolled back only in the case of error, but not freed.
@return DB_SUCCESS or error code */
-static
dberr_t
dict_stats_save_index_stat(
dict_index_t* index,
- lint last_update,
+ time_t last_update,
const char* stat_name,
ib_uint64_t stat_value,
ib_uint64_t* sample_size,
const char* stat_description,
trx_t* trx)
{
- pars_info_t* pinfo;
dberr_t ret;
+ pars_info_t* pinfo;
char db_utf8[MAX_DB_UTF8_LEN];
char table_utf8[MAX_TABLE_UTF8_LEN];
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
+ ut_ad(!trx || trx->internal || trx->in_mysql_trx_list);
+ ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_X));
ut_ad(mutex_own(&dict_sys->mutex));
- dict_fs2utf8(index->table->name, db_utf8, sizeof(db_utf8),
+ dict_fs2utf8(index->table->name.m_name, db_utf8, sizeof(db_utf8),
table_utf8, sizeof(table_utf8));
pinfo = pars_info_create();
pars_info_add_str_literal(pinfo, "database_name", db_utf8);
pars_info_add_str_literal(pinfo, "table_name", table_utf8);
- UNIV_MEM_ASSERT_RW_ABORT(index->name, strlen(index->name));
pars_info_add_str_literal(pinfo, "index_name", index->name);
UNIV_MEM_ASSERT_RW_ABORT(&last_update, 4);
- pars_info_add_int4_literal(pinfo, "last_update", last_update);
+ pars_info_add_int4_literal(pinfo, "last_update", (lint)last_update);
UNIV_MEM_ASSERT_RW_ABORT(stat_name, strlen(stat_name));
pars_info_add_str_literal(pinfo, "stat_name", stat_name);
UNIV_MEM_ASSERT_RW_ABORT(&stat_value, 8);
@@ -2426,17 +2355,11 @@ dict_stats_save_index_stat(
if (ret != DB_SUCCESS) {
if (innodb_index_stats_not_found == false &&
index->stats_error_printed == false) {
- char buf_table[MAX_FULL_NAME_LEN];
- char buf_index[MAX_FULL_NAME_LEN];
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Cannot save index statistics for table "
- "%s, index %s, stat name \"%s\": %s\n",
- ut_format_name(index->table->name, TRUE,
- buf_table, sizeof(buf_table)),
- ut_format_name(index->name, FALSE,
- buf_index, sizeof(buf_index)),
- stat_name, ut_strerr(ret));
+ ib::error() << "Cannot save index statistics for table "
+ << index->table->name
+ << ", index " << index->name
+ << ", stat name \"" << stat_name << "\": "
+ << ut_strerr(ret);
index->stats_error_printed = true;
}
}
@@ -2444,104 +2367,81 @@ dict_stats_save_index_stat(
return(ret);
}
-/** Report error if statistic update for a table failed because
+/** Report an error if updating table statistics failed because
.ibd file is missing, table decryption failed or table is corrupted.
@param[in,out] table Table
@param[in] defragment true if statistics is for defragment
-@return DB_DECRYPTION_FAILED, DB_TABLESPACE_DELETED or DB_CORRUPTION
@retval DB_DECRYPTION_FAILED if decryption of the table failed
@retval DB_TABLESPACE_DELETED if .ibd file is missing
@retval DB_CORRUPTION if table is marked as corrupted */
-static
dberr_t
-dict_stats_report_error(
- dict_table_t* table,
- bool defragment = false)
+dict_stats_report_error(dict_table_t* table, bool defragment)
{
- char buf[3 * NAME_LEN];
dberr_t err;
- innobase_format_name(buf, sizeof buf,
- table->name,
- true);
-
FilSpace space(table->space);
+ const char* df = defragment ? " defragment" : "";
- if (space()) {
- if (table->corrupted) {
- ib_logf(IB_LOG_LEVEL_INFO,
- "Cannot save%s statistics because "
- " table %s in file %s is corrupted.",
- defragment ? " defragment" : " ",
- buf, space()->chain.start->name);
- err = DB_CORRUPTION;
- } else {
- ib_logf(IB_LOG_LEVEL_INFO,
- "Cannot save%s statistics because "
- " table %s in file %s can't be decrypted.",
- defragment ? " defragment" : " ",
- buf, space()->chain.start->name);
- err = DB_DECRYPTION_FAILED;
- }
- } else {
- ib_logf(IB_LOG_LEVEL_INFO,
- "Cannot save%s statistics for "
- " table %s because .ibd file is missing."
- " For help, please "
- "refer to " REFMAN "innodb-troubleshooting.html.",
- defragment ? " defragment" : " ",
- buf);
+ if (!space()) {
+ ib::warn() << "Cannot save" << df << " statistics for table "
+ << table->name
+ << " because the .ibd file is missing. "
+ << TROUBLESHOOTING_MSG;
err = DB_TABLESPACE_DELETED;
+ } else {
+ ib::warn() << "Cannot save" << df << " statistics for table "
+ << table->name
+ << " because file " << space()->chain.start->name
+ << (table->corrupted
+ ? " is corrupted."
+ : " cannot be decrypted.");
+ err = table->corrupted ? DB_CORRUPTION : DB_DECRYPTION_FAILED;
}
dict_stats_empty_table(table, defragment);
-
- return (err);
+ return err;
}
/** Save the table's statistics into the persistent statistics storage.
-@param[in] table_orig table whose stats to save
-@param[in] only_for_index if this is non-NULL, then stats for indexes
-that are not equal to it will not be saved, if NULL, then all
-indexes' stats are saved
+@param[in] table_orig table whose stats to save
+@param[in] only_for_index if this is non-NULL, then stats for indexes
+that are not equal to it will not be saved, if NULL, then all indexes' stats
+are saved
@return DB_SUCCESS or error code */
static
dberr_t
dict_stats_save(
-/*============*/
dict_table_t* table_orig,
const index_id_t* only_for_index)
{
pars_info_t* pinfo;
- lint now;
dberr_t ret;
dict_table_t* table;
char db_utf8[MAX_DB_UTF8_LEN];
char table_utf8[MAX_TABLE_UTF8_LEN];
- if (table_orig->is_readable()) {
- } else {
+ if (high_level_read_only) {
+ return DB_READ_ONLY;
+ }
+
+ if (!table_orig->is_readable()) {
return (dict_stats_report_error(table_orig));
}
table = dict_stats_snapshot_create(table_orig);
- dict_fs2utf8(table->name, db_utf8, sizeof(db_utf8),
+ dict_fs2utf8(table->name.m_name, db_utf8, sizeof(db_utf8),
table_utf8, sizeof(table_utf8));
+ const time_t now = time(NULL);
rw_lock_x_lock(&dict_operation_lock);
mutex_enter(&dict_sys->mutex);
- /* MySQL's timestamp is 4 byte, so we use
- pars_info_add_int4_literal() which takes a lint arg, so "now" is
- lint */
- now = (lint) time(NULL);
-
pinfo = pars_info_create();
pars_info_add_str_literal(pinfo, "database_name", db_utf8);
pars_info_add_str_literal(pinfo, "table_name", table_utf8);
- pars_info_add_int4_literal(pinfo, "last_update", now);
+ pars_info_add_int4_literal(pinfo, "last_update", (lint)now);
pars_info_add_ull_literal(pinfo, "n_rows", table->stat_n_rows);
pars_info_add_ull_literal(pinfo, "clustered_index_size",
table->stat_clustered_index_size);
@@ -2571,13 +2471,8 @@ dict_stats_save(
"END;", NULL);
if (ret != DB_SUCCESS) {
- char buf[MAX_FULL_NAME_LEN];
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Cannot save table statistics for table "
- "%s: %s\n",
- ut_format_name(table->name, TRUE, buf, sizeof(buf)),
- ut_strerr(ret));
+ ib::error() << "Cannot save table statistics for table "
+ << table->name << ": " << ut_strerr(ret);
mutex_exit(&dict_sys->mutex);
rw_lock_x_unlock(&dict_operation_lock);
@@ -2588,10 +2483,12 @@ dict_stats_save(
}
trx_t* trx = trx_allocate_for_background();
- trx_start_if_not_started(trx);
+ trx_start_internal(trx);
dict_index_t* index;
- index_map_t indexes;
+ index_map_t indexes(
+ (ut_strcmp_functor()),
+ index_map_t_allocator(mem_key_dict_stats_index_map_t));
/* Below we do all the modifications in innodb_index_stats in a single
transaction for performance reasons. Modifying more than one row in a
@@ -2626,29 +2523,27 @@ dict_stats_save(
continue;
}
- ut_ad(!dict_index_is_univ(index));
+ ut_ad(!dict_index_is_ibuf(index));
- for (ulint i = 0; i < index->n_uniq; i++) {
+ for (unsigned i = 0; i < index->n_uniq; i++) {
char stat_name[16];
char stat_description[1024];
- ulint j;
- ut_snprintf(stat_name, sizeof(stat_name),
- "n_diff_pfx%02lu", i + 1);
+ snprintf(stat_name, sizeof(stat_name),
+ "n_diff_pfx%02u", i + 1);
- /* craft a string that contains the columns names */
- ut_snprintf(stat_description,
- sizeof(stat_description),
- "%s", index->fields[0].name);
- for (j = 1; j <= i; j++) {
+ /* craft a string that contains the column names */
+ snprintf(stat_description, sizeof(stat_description),
+ "%s", index->fields[0].name());
+ for (unsigned j = 1; j <= i; j++) {
size_t len;
len = strlen(stat_description);
- ut_snprintf(stat_description + len,
- sizeof(stat_description) - len,
- ",%s", index->fields[j].name);
+ snprintf(stat_description + len,
+ sizeof(stat_description) - len,
+ ",%s", index->fields[j].name());
}
ret = dict_stats_save_index_stat(
@@ -2846,7 +2741,8 @@ dict_stats_fetch_index_stats_step(
index != NULL;
index = dict_table_get_next_index(index)) {
- if (strlen(index->name) == len
+ if (index->is_committed()
+ && strlen(index->name) == len
&& memcmp(index->name, data, len) == 0) {
/* the corresponding index was found */
break;
@@ -2972,24 +2868,19 @@ dict_stats_fetch_index_stats_step(
char db_utf8[MAX_DB_UTF8_LEN];
char table_utf8[MAX_TABLE_UTF8_LEN];
- dict_fs2utf8(table->name, db_utf8, sizeof(db_utf8),
+ dict_fs2utf8(table->name.m_name,
+ db_utf8, sizeof(db_utf8),
table_utf8, sizeof(table_utf8));
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Ignoring strange row from "
- "%s WHERE "
- "database_name = '%s' AND "
- "table_name = '%s' AND "
- "index_name = '%s' AND "
- "stat_name = '%.*s'; because stat_name "
- "is malformed\n",
- INDEX_STATS_NAME_PRINT,
- db_utf8,
- table_utf8,
- index->name,
- (int) stat_name_len,
- stat_name);
+ ib::info out;
+ out << "Ignoring strange row from "
+ << INDEX_STATS_NAME_PRINT << " WHERE"
+ " database_name = '" << db_utf8
+ << "' AND table_name = '" << table_utf8
+ << "' AND index_name = '" << index->name()
+ << "' AND stat_name = '";
+ out.write(stat_name, stat_name_len);
+ out << "'; because stat_name is malformed";
return(TRUE);
}
/* else */
@@ -3005,26 +2896,21 @@ dict_stats_fetch_index_stats_step(
char db_utf8[MAX_DB_UTF8_LEN];
char table_utf8[MAX_TABLE_UTF8_LEN];
- dict_fs2utf8(table->name, db_utf8, sizeof(db_utf8),
+ dict_fs2utf8(table->name.m_name,
+ db_utf8, sizeof(db_utf8),
table_utf8, sizeof(table_utf8));
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Ignoring strange row from "
- "%s WHERE "
- "database_name = '%s' AND "
- "table_name = '%s' AND "
- "index_name = '%s' AND "
- "stat_name = '%.*s'; because stat_name is "
- "out of range, the index has %lu unique "
- "columns\n",
- INDEX_STATS_NAME_PRINT,
- db_utf8,
- table_utf8,
- index->name,
- (int) stat_name_len,
- stat_name,
- n_uniq);
+ ib::info out;
+ out << "Ignoring strange row from "
+ << INDEX_STATS_NAME_PRINT << " WHERE"
+ " database_name = '" << db_utf8
+ << "' AND table_name = '" << table_utf8
+ << "' AND index_name = '" << index->name()
+ << "' AND stat_name = '";
+ out.write(stat_name, stat_name_len);
+ out << "'; because stat_name is out of range, the index"
+ " has " << n_uniq << " unique columns";
+
return(TRUE);
}
/* else */
@@ -3083,9 +2969,13 @@ dict_stats_fetch_from_ps(
trx->isolation_level = TRX_ISO_READ_UNCOMMITTED;
- trx_start_if_not_started(trx);
+ if (srv_read_only_mode) {
+ trx_start_internal_read_only(trx);
+ } else {
+ trx_start_internal(trx);
+ }
- dict_fs2utf8(table->name, db_utf8, sizeof(db_utf8),
+ dict_fs2utf8(table->name.m_name, db_utf8, sizeof(db_utf8),
table_utf8, sizeof(table_utf8));
pinfo = pars_info_create();
@@ -3192,7 +3082,6 @@ dict_stats_empty_defrag_modified_counter(
/*********************************************************************//**
Fetches or calculates new estimates for index statistics. */
-UNIV_INTERN
void
dict_stats_update_for_index(
/*========================*/
@@ -3219,18 +3108,13 @@ dict_stats_update_for_index(
index->stats_error_printed == false) {
/* Fall back to transient stats since the persistent
storage is not present or is corrupted */
- char buf_table[MAX_FULL_NAME_LEN];
- char buf_index[MAX_FULL_NAME_LEN];
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Recalculation of persistent statistics "
- "requested for table %s index %s but the required "
- "persistent statistics storage is not present or is "
- "corrupted. Using transient stats instead.\n",
- ut_format_name(index->table->name, TRUE,
- buf_table, sizeof(buf_table)),
- ut_format_name(index->name, FALSE,
- buf_index, sizeof(buf_index)));
+
+ ib::info() << "Recalculation of persistent statistics"
+ " requested for table " << index->table->name
+ << " index " << index->name
+ << " but the required"
+ " persistent statistics storage is not present or is"
+ " corrupted. Using transient stats instead.";
index->stats_error_printed = false;
}
}
@@ -3246,7 +3130,6 @@ dict_stats_update_for_index(
Calculates new estimates for table and index statistics. The statistics
are used in query optimization.
@return DB_SUCCESS or error code */
-UNIV_INTERN
dberr_t
dict_stats_update(
/*==============*/
@@ -3257,8 +3140,6 @@ dict_stats_update(
the persistent statistics
storage */
{
- char buf[MAX_FULL_NAME_LEN];
-
ut_ad(!mutex_own(&dict_sys->mutex));
if (!table->is_readable()) {
@@ -3286,7 +3167,7 @@ dict_stats_update(
/* InnoDB internal tables (e.g. SYS_TABLES) cannot have
persistent stats enabled */
- ut_a(strchr(table->name, '/') != NULL);
+ ut_a(strchr(table->name.m_name, '/') != NULL);
/* check if the persistent statistics storage exists
before calling the potentially slow function
@@ -3312,13 +3193,12 @@ dict_stats_update(
if (innodb_table_stats_not_found == false &&
table->stats_error_printed == false) {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Recalculation of persistent statistics "
- "requested for table %s but the required persistent "
- "statistics storage is not present or is corrupted. "
- "Using transient stats instead.\n",
- ut_format_name(table->name, TRUE, buf, sizeof(buf)));
+ ib::warn() << "Recalculation of persistent statistics"
+ " requested for table "
+ << table->name
+ << " but the required persistent"
+ " statistics storage is not present or is corrupted."
+ " Using transient stats instead.";
table->stats_error_printed = true;
}
@@ -3358,7 +3238,7 @@ dict_stats_update(
/* InnoDB internal tables (e.g. SYS_TABLES) cannot have
persistent stats enabled */
- ut_a(strchr(table->name, '/') != NULL);
+ ut_a(strchr(table->name.m_name, '/') != NULL);
if (!dict_stats_persistent_storage_check(false)) {
/* persistent statistics storage does not exist
@@ -3366,18 +3246,15 @@ dict_stats_update(
if (innodb_table_stats_not_found == false &&
table->stats_error_printed == false) {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Error: Fetch of persistent "
- "statistics requested for table %s but the "
- "required system tables %s and %s are not "
- "present or have unexpected structure. "
- "Using transient stats instead.\n",
- ut_format_name(table->name, TRUE,
- buf, sizeof(buf)),
- TABLE_STATS_NAME_PRINT,
- INDEX_STATS_NAME_PRINT);
- table->stats_error_printed = true;
+ ib::error() << "Fetch of persistent statistics"
+ " requested for table "
+ << table->name
+ << " but the required system tables "
+ << TABLE_STATS_NAME_PRINT
+ << " and " << INDEX_STATS_NAME_PRINT
+ << " are not present or have unexpected"
+ " structure. Using transient stats instead.";
+ table->stats_error_printed = true;
}
goto transient;
@@ -3426,20 +3303,18 @@ dict_stats_update(
DICT_STATS_RECALC_PERSISTENT));
}
- ut_format_name(table->name, TRUE, buf, sizeof(buf));
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Trying to use table %s which has "
- "persistent statistics enabled, but auto "
- "recalculation turned off and the statistics "
- "do not exist in %s and %s. Please either run "
- "\"ANALYZE TABLE %s;\" manually or enable the "
- "auto recalculation with "
- "\"ALTER TABLE %s STATS_AUTO_RECALC=1;\". "
- "InnoDB will now use transient statistics for "
- "%s.\n",
- buf, TABLE_STATS_NAME, INDEX_STATS_NAME, buf,
- buf, buf);
+ ib::info() << "Trying to use table " << table->name
+ << " which has persistent statistics enabled,"
+ " but auto recalculation turned off and the"
+ " statistics do not exist in "
+ TABLE_STATS_NAME_PRINT
+ " and " INDEX_STATS_NAME_PRINT
+ ". Please either run \"ANALYZE TABLE "
+ << table->name << ";\" manually or enable the"
+ " auto recalculation with \"ALTER TABLE "
+ << table->name << " STATS_AUTO_RECALC=1;\"."
+ " InnoDB will now use transient statistics for "
+ << table->name << ".";
goto transient;
default:
@@ -3448,16 +3323,12 @@ dict_stats_update(
if (innodb_table_stats_not_found == false &&
table->stats_error_printed == false) {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Error fetching persistent statistics "
- "for table %s from %s and %s: %s. "
- "Using transient stats method instead.\n",
- ut_format_name(table->name, TRUE, buf,
- sizeof(buf)),
- TABLE_STATS_NAME,
- INDEX_STATS_NAME,
- ut_strerr(err));
+ ib::error() << "Error fetching persistent statistics"
+ " for table "
+ << table->name
+ << " from " TABLE_STATS_NAME_PRINT " and "
+ INDEX_STATS_NAME_PRINT ": " << ut_strerr(err)
+ << ". Using transient stats method instead.";
}
goto transient;
@@ -3488,7 +3359,6 @@ marko: If ibuf merges are not disabled, we need to scan the *.ibd files.
But we shouldn't open *.ibd files before we have rolled back dict
transactions and opened the SYS_* records for the *.ibd files.
@return DB_SUCCESS or error code */
-UNIV_INTERN
dberr_t
dict_stats_drop_index(
/*==================*/
@@ -3544,23 +3414,23 @@ dict_stats_drop_index(
}
if (ret != DB_SUCCESS) {
- ut_snprintf(errstr, errstr_sz,
- "Unable to delete statistics for index %s "
- "from %s%s: %s. They can be deleted later using "
- "DELETE FROM %s WHERE "
- "database_name = '%s' AND "
- "table_name = '%s' AND "
- "index_name = '%s';",
- iname,
- INDEX_STATS_NAME_PRINT,
- (ret == DB_LOCK_WAIT_TIMEOUT
- ? " because the rows are locked"
- : ""),
- ut_strerr(ret),
- INDEX_STATS_NAME_PRINT,
- db_utf8,
- table_utf8,
- iname);
+ snprintf(errstr, errstr_sz,
+ "Unable to delete statistics for index %s"
+ " from %s%s: %s. They can be deleted later using"
+ " DELETE FROM %s WHERE"
+ " database_name = '%s' AND"
+ " table_name = '%s' AND"
+ " index_name = '%s';",
+ iname,
+ INDEX_STATS_NAME_PRINT,
+ (ret == DB_LOCK_WAIT_TIMEOUT
+ ? " because the rows are locked"
+ : ""),
+ ut_strerr(ret),
+ INDEX_STATS_NAME_PRINT,
+ db_utf8,
+ table_utf8,
+ iname);
ut_print_timestamp(stderr);
fprintf(stderr, " InnoDB: %s\n", errstr);
@@ -3585,9 +3455,7 @@ dict_stats_delete_from_table_stats(
pars_info_t* pinfo;
dberr_t ret;
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
+ ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_X));
ut_ad(mutex_own(&dict_sys->mutex));
pinfo = pars_info_create();
@@ -3623,9 +3491,7 @@ dict_stats_delete_from_index_stats(
pars_info_t* pinfo;
dberr_t ret;
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
+ ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_X));
ut_ad(mutex_own(&dict_sys->mutex));
pinfo = pars_info_create();
@@ -3650,7 +3516,6 @@ Removes the statistics for a table and all of its indexes from the
persistent statistics storage if it exists and if there is data stored for
the table. This function creates its own transaction and commits it.
@return DB_SUCCESS or error code */
-UNIV_INTERN
dberr_t
dict_stats_drop_table(
/*==================*/
@@ -3663,9 +3528,7 @@ dict_stats_drop_table(
char table_utf8[MAX_TABLE_UTF8_LEN];
dberr_t ret;
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
+ ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_X));
ut_ad(mutex_own(&dict_sys->mutex));
/* skip tables that do not contain a database name
@@ -3697,26 +3560,26 @@ dict_stats_drop_table(
if (ret != DB_SUCCESS) {
- ut_snprintf(errstr, errstr_sz,
- "Unable to delete statistics for table %s.%s: %s. "
- "They can be deleted later using "
+ snprintf(errstr, errstr_sz,
+ "Unable to delete statistics for table %s.%s: %s."
+ " They can be deleted later using"
- "DELETE FROM %s WHERE "
- "database_name = '%s' AND "
- "table_name = '%s'; "
+ " DELETE FROM %s WHERE"
+ " database_name = '%s' AND"
+ " table_name = '%s';"
- "DELETE FROM %s WHERE "
- "database_name = '%s' AND "
- "table_name = '%s';",
+ " DELETE FROM %s WHERE"
+ " database_name = '%s' AND"
+ " table_name = '%s';",
- db_utf8, table_utf8,
- ut_strerr(ret),
+ db_utf8, table_utf8,
+ ut_strerr(ret),
- INDEX_STATS_NAME_PRINT,
- db_utf8, table_utf8,
+ INDEX_STATS_NAME_PRINT,
+ db_utf8, table_utf8,
- TABLE_STATS_NAME_PRINT,
- db_utf8, table_utf8);
+ TABLE_STATS_NAME_PRINT,
+ db_utf8, table_utf8);
}
return(ret);
@@ -3731,8 +3594,8 @@ Creates its own transaction and commits it.
@return DB_SUCCESS or error code */
UNIV_INLINE
dberr_t
-dict_stats_rename_in_table_stats(
-/*=============================*/
+dict_stats_rename_table_in_table_stats(
+/*===================================*/
const char* old_dbname_utf8,/*!< in: database name, e.g. 'olddb' */
const char* old_tablename_utf8,/*!< in: table name, e.g. 'oldtable' */
const char* new_dbname_utf8,/*!< in: database name, e.g. 'newdb' */
@@ -3741,9 +3604,7 @@ dict_stats_rename_in_table_stats(
pars_info_t* pinfo;
dberr_t ret;
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
+ ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_X));
ut_ad(mutex_own(&dict_sys->mutex));
pinfo = pars_info_create();
@@ -3755,7 +3616,7 @@ dict_stats_rename_in_table_stats(
ret = dict_stats_exec_sql(
pinfo,
- "PROCEDURE RENAME_IN_TABLE_STATS () IS\n"
+ "PROCEDURE RENAME_TABLE_IN_TABLE_STATS () IS\n"
"BEGIN\n"
"UPDATE \"" TABLE_STATS_NAME "\" SET\n"
"database_name = :new_dbname_utf8,\n"
@@ -3777,8 +3638,8 @@ Creates its own transaction and commits it.
@return DB_SUCCESS or error code */
UNIV_INLINE
dberr_t
-dict_stats_rename_in_index_stats(
-/*=============================*/
+dict_stats_rename_table_in_index_stats(
+/*===================================*/
const char* old_dbname_utf8,/*!< in: database name, e.g. 'olddb' */
const char* old_tablename_utf8,/*!< in: table name, e.g. 'oldtable' */
const char* new_dbname_utf8,/*!< in: database name, e.g. 'newdb' */
@@ -3787,9 +3648,7 @@ dict_stats_rename_in_index_stats(
pars_info_t* pinfo;
dberr_t ret;
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
+ ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_X));
ut_ad(mutex_own(&dict_sys->mutex));
pinfo = pars_info_create();
@@ -3801,7 +3660,7 @@ dict_stats_rename_in_index_stats(
ret = dict_stats_exec_sql(
pinfo,
- "PROCEDURE RENAME_IN_INDEX_STATS () IS\n"
+ "PROCEDURE RENAME_TABLE_IN_INDEX_STATS () IS\n"
"BEGIN\n"
"UPDATE \"" INDEX_STATS_NAME "\" SET\n"
"database_name = :new_dbname_utf8,\n"
@@ -3818,7 +3677,6 @@ dict_stats_rename_in_index_stats(
Renames a table in InnoDB persistent stats storage.
This function creates its own transaction and commits it.
@return DB_SUCCESS or error code */
-UNIV_INTERN
dberr_t
dict_stats_rename_table(
/*====================*/
@@ -3834,9 +3692,7 @@ dict_stats_rename_table(
char new_table_utf8[MAX_TABLE_UTF8_LEN];
dberr_t ret;
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(!rw_lock_own(&dict_operation_lock, RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
+ ut_ad(!rw_lock_own(&dict_operation_lock, RW_LOCK_X));
ut_ad(!mutex_own(&dict_sys->mutex));
/* skip innodb_table_stats and innodb_index_stats themselves */
@@ -3861,7 +3717,7 @@ dict_stats_rename_table(
do {
n_attempts++;
- ret = dict_stats_rename_in_table_stats(
+ ret = dict_stats_rename_table_in_table_stats(
old_db_utf8, old_table_utf8,
new_db_utf8, new_table_utf8);
@@ -3887,26 +3743,26 @@ dict_stats_rename_table(
&& n_attempts < 5);
if (ret != DB_SUCCESS) {
- ut_snprintf(errstr, errstr_sz,
- "Unable to rename statistics from "
- "%s.%s to %s.%s in %s: %s. "
- "They can be renamed later using "
-
- "UPDATE %s SET "
- "database_name = '%s', "
- "table_name = '%s' "
- "WHERE "
- "database_name = '%s' AND "
- "table_name = '%s';",
-
- old_db_utf8, old_table_utf8,
- new_db_utf8, new_table_utf8,
- TABLE_STATS_NAME_PRINT,
- ut_strerr(ret),
-
- TABLE_STATS_NAME_PRINT,
- new_db_utf8, new_table_utf8,
- old_db_utf8, old_table_utf8);
+ snprintf(errstr, errstr_sz,
+ "Unable to rename statistics from"
+ " %s.%s to %s.%s in %s: %s."
+ " They can be renamed later using"
+
+ " UPDATE %s SET"
+ " database_name = '%s',"
+ " table_name = '%s'"
+ " WHERE"
+ " database_name = '%s' AND"
+ " table_name = '%s';",
+
+ old_db_utf8, old_table_utf8,
+ new_db_utf8, new_table_utf8,
+ TABLE_STATS_NAME_PRINT,
+ ut_strerr(ret),
+
+ TABLE_STATS_NAME_PRINT,
+ new_db_utf8, new_table_utf8,
+ old_db_utf8, old_table_utf8);
mutex_exit(&dict_sys->mutex);
rw_lock_x_unlock(&dict_operation_lock);
return(ret);
@@ -3917,7 +3773,7 @@ dict_stats_rename_table(
do {
n_attempts++;
- ret = dict_stats_rename_in_index_stats(
+ ret = dict_stats_rename_table_in_index_stats(
old_db_utf8, old_table_utf8,
new_db_utf8, new_table_utf8);
@@ -3946,134 +3802,33 @@ dict_stats_rename_table(
rw_lock_x_unlock(&dict_operation_lock);
if (ret != DB_SUCCESS) {
- ut_snprintf(errstr, errstr_sz,
- "Unable to rename statistics from "
- "%s.%s to %s.%s in %s: %s. "
- "They can be renamed later using "
-
- "UPDATE %s SET "
- "database_name = '%s', "
- "table_name = '%s' "
- "WHERE "
- "database_name = '%s' AND "
- "table_name = '%s';",
-
- old_db_utf8, old_table_utf8,
- new_db_utf8, new_table_utf8,
- INDEX_STATS_NAME_PRINT,
- ut_strerr(ret),
-
- INDEX_STATS_NAME_PRINT,
- new_db_utf8, new_table_utf8,
- old_db_utf8, old_table_utf8);
+ snprintf(errstr, errstr_sz,
+ "Unable to rename statistics from"
+ " %s.%s to %s.%s in %s: %s."
+ " They can be renamed later using"
+
+ " UPDATE %s SET"
+ " database_name = '%s',"
+ " table_name = '%s'"
+ " WHERE"
+ " database_name = '%s' AND"
+ " table_name = '%s';",
+
+ old_db_utf8, old_table_utf8,
+ new_db_utf8, new_table_utf8,
+ INDEX_STATS_NAME_PRINT,
+ ut_strerr(ret),
+
+ INDEX_STATS_NAME_PRINT,
+ new_db_utf8, new_table_utf8,
+ old_db_utf8, old_table_utf8);
}
return(ret);
}
-/*********************************************************************//**
-Save defragmentation result.
-@return DB_SUCCESS or error code */
-UNIV_INTERN
-dberr_t
-dict_stats_save_defrag_summary(
- dict_index_t* index) /*!< in: index */
-{
- dberr_t ret;
- lint now = (lint) time(NULL);
- if (dict_index_is_univ(index)) {
- return DB_SUCCESS;
- }
- rw_lock_x_lock(&dict_operation_lock);
- mutex_enter(&dict_sys->mutex);
- ret = dict_stats_save_index_stat(index, now, "n_pages_freed",
- index->stat_defrag_n_pages_freed,
- NULL,
- "Number of pages freed during"
- " last defragmentation run.",
- NULL);
-
- mutex_exit(&dict_sys->mutex);
- rw_lock_x_unlock(&dict_operation_lock);
- return (ret);
-}
-
-/*********************************************************************//**
-Save defragmentation stats for a given index.
-@return DB_SUCCESS or error code */
-UNIV_INTERN
-dberr_t
-dict_stats_save_defrag_stats(
- dict_index_t* index) /*!< in: index */
-{
- dberr_t ret;
-
- if (index->is_readable()) {
- } else {
- return (dict_stats_report_error(index->table, true));
- }
-
- if (dict_index_is_univ(index)) {
- return DB_SUCCESS;
- }
-
- lint now = (lint) time(NULL);
- mtr_t mtr;
- ulint n_leaf_pages;
- ulint n_leaf_reserved;
- mtr_start(&mtr);
- mtr_s_lock(dict_index_get_lock(index), &mtr);
- n_leaf_reserved = btr_get_size_and_reserved(index, BTR_N_LEAF_PAGES,
- &n_leaf_pages, &mtr);
- mtr_commit(&mtr);
-
- if (n_leaf_reserved == ULINT_UNDEFINED) {
- // The index name is different during fast index creation,
- // so the stats won't be associated with the right index
- // for later use. We just return without saving.
- return DB_SUCCESS;
- }
-
- rw_lock_x_lock(&dict_operation_lock);
-
- mutex_enter(&dict_sys->mutex);
- ret = dict_stats_save_index_stat(index, now, "n_page_split",
- index->stat_defrag_n_page_split,
- NULL,
- "Number of new page splits on leaves"
- " since last defragmentation.",
- NULL);
- if (ret != DB_SUCCESS) {
- goto end;
- }
-
- ret = dict_stats_save_index_stat(
- index, now, "n_leaf_pages_defrag",
- n_leaf_pages,
- NULL,
- "Number of leaf pages when this stat is saved to disk",
- NULL);
- if (ret != DB_SUCCESS) {
- goto end;
- }
-
- ret = dict_stats_save_index_stat(
- index, now, "n_leaf_pages_reserved",
- n_leaf_reserved,
- NULL,
- "Number of pages reserved for this index leaves when this stat "
- "is saved to disk",
- NULL);
-
-end:
- mutex_exit(&dict_sys->mutex);
- rw_lock_x_unlock(&dict_operation_lock);
-
- return (ret);
-}
-
/* tests @{ */
-#ifdef UNIV_COMPILE_TEST_FUNCS
+#ifdef UNIV_ENABLE_UNIT_TEST_DICT_STATS
/* The following unit tests test some of the functions in this file
individually, such testing cannot be performed by the mysql-test framework
@@ -4112,12 +3867,12 @@ test_dict_table_schema_check()
};
char errstr[512];
- ut_snprintf(errstr, sizeof(errstr), "Table not found");
+ snprintf(errstr, sizeof(errstr), "Table not found");
/* prevent any data dictionary modifications while we are checking
the tables' structure */
- mutex_enter(&(dict_sys->mutex));
+ mutex_enter(&dict_sys->mutex);
/* check that a valid table is reported as valid */
schema.n_cols = 7;
@@ -4134,11 +3889,11 @@ test_dict_table_schema_check()
schema.columns[1].len = 8;
if (dict_table_schema_check(&schema, errstr, sizeof(errstr))
!= DB_SUCCESS) {
- printf("OK: test.tcheck.c02 has different length and is "
- "reported as corrupted\n");
+ printf("OK: test.tcheck.c02 has different length and is"
+ " reported as corrupted\n");
} else {
- printf("OK: test.tcheck.c02 has different length but is "
- "reported as ok\n");
+ printf("OK: test.tcheck.c02 has different length but is"
+ " reported as ok\n");
goto test_dict_table_schema_check_end;
}
schema.columns[1].len = 4;
@@ -4148,11 +3903,11 @@ test_dict_table_schema_check()
schema.columns[1].prtype_mask |= DATA_NOT_NULL;
if (dict_table_schema_check(&schema, errstr, sizeof(errstr))
!= DB_SUCCESS) {
- printf("OK: test.tcheck.c02 does not have NOT NULL while "
- "it should and is reported as corrupted\n");
+ printf("OK: test.tcheck.c02 does not have NOT NULL while"
+ " it should and is reported as corrupted\n");
} else {
- printf("ERROR: test.tcheck.c02 does not have NOT NULL while "
- "it should and is not reported as corrupted\n");
+ printf("ERROR: test.tcheck.c02 does not have NOT NULL while"
+ " it should and is not reported as corrupted\n");
goto test_dict_table_schema_check_end;
}
schema.columns[1].prtype_mask &= ~DATA_NOT_NULL;
@@ -4161,23 +3916,23 @@ test_dict_table_schema_check()
schema.n_cols = 6;
if (dict_table_schema_check(&schema, errstr, sizeof(errstr))
== DB_SUCCESS) {
- printf("ERROR: test.tcheck has more columns but is not "
- "reported as corrupted\n");
+ printf("ERROR: test.tcheck has more columns but is not"
+ " reported as corrupted\n");
goto test_dict_table_schema_check_end;
} else {
- printf("OK: test.tcheck has more columns and is "
- "reported as corrupted\n");
+ printf("OK: test.tcheck has more columns and is"
+ " reported as corrupted\n");
}
/* check a table that has some columns missing */
schema.n_cols = 8;
if (dict_table_schema_check(&schema, errstr, sizeof(errstr))
!= DB_SUCCESS) {
- printf("OK: test.tcheck has missing columns and is "
- "reported as corrupted\n");
+ printf("OK: test.tcheck has missing columns and is"
+ " reported as corrupted\n");
} else {
- printf("ERROR: test.tcheck has missing columns but is "
- "reported as ok\n");
+ printf("ERROR: test.tcheck has missing columns but is"
+ " reported as ok\n");
goto test_dict_table_schema_check_end;
}
@@ -4193,7 +3948,7 @@ test_dict_table_schema_check()
test_dict_table_schema_check_end:
- mutex_exit(&(dict_sys->mutex));
+ mutex_exit(&dict_sys->mutex);
}
/* @} */
@@ -4245,13 +4000,13 @@ test_dict_stats_save()
dberr_t ret;
/* craft a dummy dict_table_t */
- table.name = (char*) (TEST_DATABASE_NAME "/" TEST_TABLE_NAME);
+ table.name.m_name = (char*) (TEST_DATABASE_NAME "/" TEST_TABLE_NAME);
table.stat_n_rows = TEST_N_ROWS;
table.stat_clustered_index_size = TEST_CLUSTERED_INDEX_SIZE;
table.stat_sum_of_other_index_sizes = TEST_SUM_OF_OTHER_INDEX_SIZES;
- UT_LIST_INIT(table.indexes);
- UT_LIST_ADD_LAST(indexes, table.indexes, &index1);
- UT_LIST_ADD_LAST(indexes, table.indexes, &index2);
+ UT_LIST_INIT(table.indexes, &dict_index_t::indexes);
+ UT_LIST_ADD_LAST(table.indexes, &index1);
+ UT_LIST_ADD_LAST(table.indexes, &index2);
ut_d(table.magic_n = DICT_TABLE_MAGIC_N);
ut_d(index1.magic_n = DICT_INDEX_MAGIC_N);
@@ -4295,8 +4050,8 @@ test_dict_stats_save()
ut_a(ret == DB_SUCCESS);
- printf("\nOK: stats saved successfully, now go ahead and read "
- "what's inside %s and %s:\n\n",
+ printf("\nOK: stats saved successfully, now go ahead and read"
+ " what's inside %s and %s:\n\n",
TABLE_STATS_NAME_PRINT,
INDEX_STATS_NAME_PRINT);
@@ -4397,10 +4152,10 @@ test_dict_stats_fetch_from_ps()
dberr_t ret;
/* craft a dummy dict_table_t */
- table.name = (char*) (TEST_DATABASE_NAME "/" TEST_TABLE_NAME);
- UT_LIST_INIT(table.indexes);
- UT_LIST_ADD_LAST(indexes, table.indexes, &index1);
- UT_LIST_ADD_LAST(indexes, table.indexes, &index2);
+ table.name.m_name = (char*) (TEST_DATABASE_NAME "/" TEST_TABLE_NAME);
+ UT_LIST_INIT(table.indexes, &dict_index_t::indexes);
+ UT_LIST_ADD_LAST(table.indexes, &index1);
+ UT_LIST_ADD_LAST(table.indexes, &index2);
ut_d(table.magic_n = DICT_TABLE_MAGIC_N);
index1.name = TEST_IDX1_NAME;
@@ -4458,7 +4213,5 @@ test_dict_stats_all()
}
/* @} */
-#endif /* UNIV_COMPILE_TEST_FUNCS */
+#endif /* UNIV_ENABLE_UNIT_TEST_DICT_STATS */
/* @} */
-
-#endif /* UNIV_HOTBACKUP */
diff --git a/storage/innobase/dict/dict0stats_bg.cc b/storage/innobase/dict/dict0stats_bg.cc
index b4923364cac..ec3f0a8eae6 100644
--- a/storage/innobase/dict/dict0stats_bg.cc
+++ b/storage/innobase/dict/dict0stats_bg.cc
@@ -24,14 +24,18 @@ Code used for background table and index stats gathering.
Created Apr 25, 2012 Vasil Dimov
*******************************************************/
-#include "row0mysql.h"
-#include "srv0start.h"
#include "dict0dict.h"
#include "dict0stats.h"
#include "dict0stats_bg.h"
-
-#ifdef UNIV_NONINL
-# include "dict0stats_bg.ic"
+#include "dict0defrag_bg.h"
+#include "row0mysql.h"
+#include "srv0start.h"
+#include "fil0fil.h"
+#ifdef WITH_WSREP
+# include "mysql/service_wsrep.h"
+# include "wsrep.h"
+# include "log.h"
+# include "wsrep_mysqld.h"
#endif
#include <vector>
@@ -41,61 +45,48 @@ Created Apr 25, 2012 Vasil Dimov
/** Event to wake up dict_stats_thread on dict_stats_recalc_pool_add()
or shutdown. Not protected by any mutex. */
-UNIV_INTERN os_event_t dict_stats_event;
+os_event_t dict_stats_event;
/** Variable to initiate shutdown the dict stats thread. Note we don't
use 'srv_shutdown_state' because we want to shutdown dict stats thread
before purge thread. */
-static bool dict_stats_start_shutdown;
+bool dict_stats_start_shutdown;
/** Event to wait for shutdown of the dict stats thread */
-static os_event_t dict_stats_shutdown_event;
+os_event_t dict_stats_shutdown_event;
+
+#ifdef UNIV_DEBUG
+/** Used by SET GLOBAL innodb_dict_stats_disabled_debug = 1; */
+my_bool innodb_dict_stats_disabled_debug;
+
+static os_event_t dict_stats_disabled_event;
+#endif /* UNIV_DEBUG */
/** This mutex protects the "recalc_pool" variable. */
static ib_mutex_t recalc_pool_mutex;
-static ib_mutex_t defrag_pool_mutex;
-#ifdef HAVE_PSI_INTERFACE
-static mysql_pfs_key_t recalc_pool_mutex_key;
-static mysql_pfs_key_t defrag_pool_mutex_key;
-#endif /* HAVE_PSI_INTERFACE */
-/** The number of tables that can be added to "recalc_pool" before
-it is enlarged */
-static const ulint RECALC_POOL_INITIAL_SLOTS = 128;
+/** Allocator type, used by std::vector */
+typedef ut_allocator<table_id_t>
+ recalc_pool_allocator_t;
/** The multitude of tables whose stats are to be automatically
recalculated - an STL vector */
-typedef std::vector<table_id_t> recalc_pool_t;
-static recalc_pool_t recalc_pool;
-
-typedef recalc_pool_t::iterator recalc_pool_iterator_t;
-
-/** Indices whose defrag stats need to be saved to persistent storage.*/
-struct defrag_pool_item_t {
- table_id_t table_id;
- index_id_t index_id;
-};
-typedef std::vector<defrag_pool_item_t> defrag_pool_t;
-static defrag_pool_t defrag_pool;
-typedef defrag_pool_t::iterator defrag_pool_iterator_t;
+typedef std::vector<table_id_t, recalc_pool_allocator_t>
+ recalc_pool_t;
-/*****************************************************************//**
-Initialize the recalc pool, called once during thread initialization. */
-static
-void
-dict_stats_pool_init()
-/*=========================*/
-{
- ut_ad(!srv_read_only_mode);
+/** Iterator type for iterating over the elements of objects of type
+recalc_pool_t. */
+typedef recalc_pool_t::iterator
+ recalc_pool_iterator_t;
- recalc_pool.reserve(RECALC_POOL_INITIAL_SLOTS);
- defrag_pool.reserve(RECALC_POOL_INITIAL_SLOTS);
-}
+/** Pool where we store information on which tables are to be processed
+by background statistics gathering. */
+static recalc_pool_t recalc_pool;
/*****************************************************************//**
Free the resources occupied by the recalc pool, called once during
thread de-initialization. */
-static void dict_stats_pool_deinit()
+static void dict_stats_recalc_pool_deinit()
{
ut_ad(!srv_read_only_mode);
@@ -121,7 +112,7 @@ background stats gathering thread. Only the table id is added to the
list, so the table can be closed after being enqueued and it will be
opened when needed. If the table does not exist later (has been DROPped),
then it will be removed from the pool and skipped. */
-UNIV_INTERN
+static
void
dict_stats_recalc_pool_add(
/*=======================*/
@@ -149,6 +140,76 @@ dict_stats_recalc_pool_add(
os_event_set(dict_stats_event);
}
+#ifdef WITH_WSREP
+/** Update the table modification counter and if necessary,
+schedule new estimates for table and index statistics to be calculated.
+@param[in,out] table persistent or temporary table
+@param[in] thd current session */
+void dict_stats_update_if_needed(dict_table_t* table, THD* thd)
+#else
+/** Update the table modification counter and if necessary,
+schedule new estimates for table and index statistics to be calculated.
+@param[in,out] table persistent or temporary table */
+void dict_stats_update_if_needed_func(dict_table_t* table)
+#endif
+{
+ ut_ad(table->stat_initialized);
+ ut_ad(!mutex_own(&dict_sys->mutex));
+
+ ulonglong counter = table->stat_modified_counter++;
+ ulonglong n_rows = dict_table_get_n_rows(table);
+
+ if (dict_stats_is_persistent_enabled(table)) {
+ if (counter > n_rows / 10 /* 10% */
+ && dict_stats_auto_recalc_is_enabled(table)) {
+
+#ifdef WITH_WSREP
+ /* Do not add table to background
+ statistic calculation if this thread is not a
+ applier (as all DDL, which is replicated (i.e
+ is binlogged in master node), will be executed
+ with high priority (a.k.a BF) in slave nodes)
+ and is BF. This could again lead BF lock
+ waits in applier node but it is better than
+ no persistent index/table statistics at
+ applier nodes. TODO: allow BF threads
+ wait for these InnoDB internal SQL-parser
+ generated row locks and allow BF thread
+ lock waits to be enqueued at head of waiting
+ queue. */
+ if (thd
+ && !wsrep_thd_is_applier(thd)
+ && wsrep_on(thd)
+ && wsrep_thd_is_BF(thd, 0)) {
+ WSREP_DEBUG("Avoiding background statistics"
+ " calculation for table %s.",
+ table->name.m_name);
+ return;
+ }
+#endif /* WITH_WSREP */
+
+ dict_stats_recalc_pool_add(table);
+ table->stat_modified_counter = 0;
+ }
+ return;
+ }
+
+ /* Calculate new statistics if 1 / 16 of table has been modified
+ since the last time a statistics batch was run.
+ We calculate statistics at most every 16th round, since we may have
+ a counter table which is very small and updated very often. */
+ ulonglong threshold = 16 + n_rows / 16; /* 6.25% */
+
+ if (srv_stats_modified_counter) {
+ threshold = std::min(srv_stats_modified_counter, threshold);
+ }
+
+ if (counter > threshold) {
+ /* this will reset table->stat_modified_counter to 0 */
+ dict_stats_update(table, DICT_STATS_RECALC_TRANSIENT);
+ }
+}
+
/*****************************************************************//**
Get a table from the auto recalc pool. The returned table id is removed
from the pool.
@@ -169,7 +230,7 @@ dict_stats_recalc_pool_get(
return(false);
}
- *id = recalc_pool[0];
+ *id = recalc_pool.at(0);
recalc_pool.erase(recalc_pool.begin());
@@ -181,7 +242,6 @@ dict_stats_recalc_pool_get(
/*****************************************************************//**
Delete a given table from the auto recalc pool.
dict_stats_recalc_pool_del() */
-UNIV_INTERN
void
dict_stats_recalc_pool_del(
/*=======================*/
@@ -209,111 +269,6 @@ dict_stats_recalc_pool_del(
}
/*****************************************************************//**
-Add an index in a table to the defrag pool, which is processed by the
-background stats gathering thread. Only the table id and index id are
-added to the list, so the table can be closed after being enqueued and
-it will be opened when needed. If the table or index does not exist later
-(has been DROPped), then it will be removed from the pool and skipped. */
-UNIV_INTERN
-void
-dict_stats_defrag_pool_add(
-/*=======================*/
- const dict_index_t* index) /*!< in: table to add */
-{
- defrag_pool_item_t item;
-
- ut_ad(!srv_read_only_mode);
-
- mutex_enter(&defrag_pool_mutex);
-
- /* quit if already in the list */
- for (defrag_pool_iterator_t iter = defrag_pool.begin();
- iter != defrag_pool.end();
- ++iter) {
- if ((*iter).table_id == index->table->id
- && (*iter).index_id == index->id) {
- mutex_exit(&defrag_pool_mutex);
- return;
- }
- }
-
- item.table_id = index->table->id;
- item.index_id = index->id;
- defrag_pool.push_back(item);
-
- mutex_exit(&defrag_pool_mutex);
-
- os_event_set(dict_stats_event);
-}
-
-/*****************************************************************//**
-Get an index from the auto defrag pool. The returned index id is removed
-from the pool.
-@return true if the pool was non-empty and "id" was set, false otherwise */
-static
-bool
-dict_stats_defrag_pool_get(
-/*=======================*/
- table_id_t* table_id, /*!< out: table id, or unmodified if
- list is empty */
- index_id_t* index_id) /*!< out: index id, or unmodified if
- list is empty */
-{
- ut_ad(!srv_read_only_mode);
-
- mutex_enter(&defrag_pool_mutex);
-
- if (defrag_pool.empty()) {
- mutex_exit(&defrag_pool_mutex);
- return(false);
- }
-
- defrag_pool_item_t& item = defrag_pool.back();
- *table_id = item.table_id;
- *index_id = item.index_id;
-
- defrag_pool.pop_back();
-
- mutex_exit(&defrag_pool_mutex);
-
- return(true);
-}
-
-/*****************************************************************//**
-Delete a given index from the auto defrag pool. */
-UNIV_INTERN
-void
-dict_stats_defrag_pool_del(
-/*=======================*/
- const dict_table_t* table, /*!<in: if given, remove
- all entries for the table */
- const dict_index_t* index) /*!< in: if given, remove this index */
-{
- ut_a((table && !index) || (!table && index));
- ut_ad(!srv_read_only_mode);
- ut_ad(mutex_own(&dict_sys->mutex));
-
- mutex_enter(&defrag_pool_mutex);
-
- defrag_pool_iterator_t iter = defrag_pool.begin();
- while (iter != defrag_pool.end()) {
- if ((table && (*iter).table_id == table->id)
- || (index
- && (*iter).table_id == index->table->id
- && (*iter).index_id == index->id)) {
- /* erase() invalidates the iterator */
- iter = defrag_pool.erase(iter);
- if (index)
- break;
- } else {
- iter++;
- }
- }
-
- mutex_exit(&defrag_pool_mutex);
-}
-
-/*****************************************************************//**
Wait until background stats thread has stopped using the specified table.
The caller must have locked the data dictionary using
row_mysql_lock_data_dictionary() and this function may unlock it temporarily
@@ -322,7 +277,6 @@ The background stats thread is guaranteed not to start using the specified
table after this function returns and before the caller unlocks the data
dictionary because it sets the BG_STAT_IN_PROGRESS bit in table->stats_bg_flag
under dict_sys->mutex. */
-UNIV_INTERN
void
dict_stats_wait_bg_to_stop_using_table(
/*===================================*/
@@ -338,41 +292,38 @@ dict_stats_wait_bg_to_stop_using_table(
/*****************************************************************//**
Initialize global variables needed for the operation of dict_stats_thread()
Must be called before dict_stats_thread() is started. */
-UNIV_INTERN
void
dict_stats_thread_init()
{
ut_a(!srv_read_only_mode);
- dict_stats_event = os_event_create();
- dict_stats_shutdown_event = os_event_create();
+ dict_stats_event = os_event_create(0);
+ dict_stats_shutdown_event = os_event_create(0);
+
+ ut_d(dict_stats_disabled_event = os_event_create(0));
/* The recalc_pool_mutex is acquired from:
1) the background stats gathering thread before any other latch
and released without latching anything else in between (thus
any level would do here)
- 2) from row_update_statistics_if_needed()
+ 2) from dict_stats_update_if_needed()
and released without latching anything else in between. We know
that dict_sys->mutex (SYNC_DICT) is not acquired when
- row_update_statistics_if_needed() is called and it may be acquired
+ dict_stats_update_if_needed() is called and it may be acquired
inside that function (thus a level <=SYNC_DICT would do).
3) from row_drop_table_for_mysql() after dict_sys->mutex (SYNC_DICT)
and dict_operation_lock (SYNC_DICT_OPERATION) have been locked
(thus a level <SYNC_DICT && <SYNC_DICT_OPERATION would do)
So we choose SYNC_STATS_AUTO_RECALC to be about below SYNC_DICT. */
- mutex_create(recalc_pool_mutex_key, &recalc_pool_mutex,
- SYNC_STATS_AUTO_RECALC);
- /* We choose SYNC_STATS_DEFRAG to be below SYNC_FSP_PAGE. */
- mutex_create(defrag_pool_mutex_key, &defrag_pool_mutex,
- SYNC_STATS_DEFRAG);
- dict_stats_pool_init();
+ mutex_create(LATCH_ID_RECALC_POOL, &recalc_pool_mutex);
+
+ dict_defrag_pool_init();
}
/*****************************************************************//**
Free resources allocated by dict_stats_thread_init(), must be called
after dict_stats_thread() has exited. */
-UNIV_INTERN
void
dict_stats_thread_deinit()
/*======================*/
@@ -380,18 +331,14 @@ dict_stats_thread_deinit()
ut_a(!srv_read_only_mode);
ut_ad(!srv_dict_stats_thread_active);
- dict_stats_pool_deinit();
+ dict_stats_recalc_pool_deinit();
+ dict_defrag_pool_deinit();
mutex_free(&recalc_pool_mutex);
- memset(&recalc_pool_mutex, 0x0, sizeof(recalc_pool_mutex));
- mutex_free(&defrag_pool_mutex);
- memset(&defrag_pool_mutex, 0x0, sizeof(defrag_pool_mutex));
-
- os_event_free(dict_stats_event);
- dict_stats_event = NULL;
- os_event_free(dict_stats_shutdown_event);
- dict_stats_shutdown_event = NULL;
+ ut_d(os_event_destroy(dict_stats_disabled_event));
+ os_event_destroy(dict_stats_event);
+ os_event_destroy(dict_stats_shutdown_event);
dict_stats_start_shutdown = false;
}
@@ -426,8 +373,9 @@ dict_stats_process_entry_from_recalc_pool()
return;
}
- /* Check whether table is corrupted */
- if (table->corrupted) {
+ ut_ad(!dict_table_is_temporary(table));
+
+ if (!fil_table_accessible(table)) {
dict_table_close(table, TRUE, FALSE);
mutex_exit(&dict_sys->mutex);
return;
@@ -460,69 +408,62 @@ dict_stats_process_entry_from_recalc_pool()
mutex_enter(&dict_sys->mutex);
- table->stats_bg_flag &= ~BG_STAT_IN_PROGRESS;
+ table->stats_bg_flag = BG_STAT_NONE;
dict_table_close(table, TRUE, FALSE);
mutex_exit(&dict_sys->mutex);
}
-/*****************************************************************//**
-Get the first index that has been added for updating persistent defrag
-stats and eventually save its stats. */
-static
+#ifdef UNIV_DEBUG
+/** Disables dict stats thread. It's used by:
+ SET GLOBAL innodb_dict_stats_disabled_debug = 1 (0).
+@param[in] thd thread handle
+@param[in] var pointer to system variable
+@param[out] var_ptr where the formal string goes
+@param[in] save immediate result from check function */
void
-dict_stats_process_entry_from_defrag_pool()
+dict_stats_disabled_debug_update(
+ THD* thd,
+ struct st_mysql_sys_var* var,
+ void* var_ptr,
+ const void* save)
{
- table_id_t table_id;
- index_id_t index_id;
+ /* This method is protected by mutex, as every SET GLOBAL .. */
+ ut_ad(dict_stats_disabled_event != NULL);
- ut_ad(!srv_read_only_mode);
-
- /* pop the first index from the auto defrag pool */
- if (!dict_stats_defrag_pool_get(&table_id, &index_id)) {
- /* no index in defrag pool */
- return;
- }
+ const bool disable = *static_cast<const my_bool*>(save);
- dict_table_t* table;
-
- mutex_enter(&dict_sys->mutex);
+ const int64_t sig_count = os_event_reset(dict_stats_disabled_event);
- /* If the table is no longer cached, we've already lost the in
- memory stats so there's nothing really to write to disk. */
- table = dict_table_open_on_id(table_id, TRUE,
- DICT_TABLE_OP_OPEN_ONLY_IF_CACHED);
+ innodb_dict_stats_disabled_debug = disable;
- dict_index_t* index = table && !table->corrupted
- ? dict_table_find_index_on_id(table, index_id)
- : NULL;
-
- if (!index || dict_index_is_corrupted(index)) {
- if (table) {
- dict_table_close(table, TRUE, FALSE);
- }
- mutex_exit(&dict_sys->mutex);
- return;
+ if (disable) {
+ os_event_set(dict_stats_event);
+ os_event_wait_low(dict_stats_disabled_event, sig_count);
}
-
- mutex_exit(&dict_sys->mutex);
- dict_stats_save_defrag_stats(index);
- dict_table_close(table, FALSE, FALSE);
}
+#endif /* UNIV_DEBUG */
+
/*****************************************************************//**
This is the thread for background stats gathering. It pops tables, from
the auto recalc list and proceeds them, eventually recalculating their
statistics.
@return this function does not return, it calls os_thread_exit() */
-extern "C" UNIV_INTERN
+extern "C"
os_thread_ret_t
DECLARE_THREAD(dict_stats_thread)(void*)
{
my_thread_init();
ut_a(!srv_read_only_mode);
+#ifdef UNIV_PFS_THREAD
+ /* JAN: TODO: MySQL 5.7 PSI
+ pfs_register_thread(dict_stats_thread_key);
+ */
+#endif /* UNIV_PFS_THREAD */
+
while (!dict_stats_start_shutdown) {
/* Wake up periodically even if not signaled. This is
@@ -533,14 +474,23 @@ DECLARE_THREAD(dict_stats_thread)(void*)
os_event_wait_time(
dict_stats_event, MIN_RECALC_INTERVAL * 1000000);
+#ifdef UNIV_DEBUG
+ while (innodb_dict_stats_disabled_debug) {
+ os_event_set(dict_stats_disabled_event);
+ if (dict_stats_start_shutdown) {
+ break;
+ }
+ os_event_wait_time(
+ dict_stats_event, 100000);
+ }
+#endif /* UNIV_DEBUG */
+
if (dict_stats_start_shutdown) {
break;
}
dict_stats_process_entry_from_recalc_pool();
-
- while (defrag_pool.size())
- dict_stats_process_entry_from_defrag_pool();
+ dict_defrag_process_entries_from_defrag_pool();
os_event_reset(dict_stats_event);
}
@@ -549,9 +499,10 @@ DECLARE_THREAD(dict_stats_thread)(void*)
os_event_set(dict_stats_shutdown_event);
my_thread_end();
+
/* We count the number of threads in os_thread_exit(). A created
thread should always use that to exit instead of return(). */
- os_thread_exit(NULL);
+ os_thread_exit();
OS_THREAD_DUMMY_RETURN;
}
diff --git a/storage/innobase/dyn/dyn0dyn.cc b/storage/innobase/dyn/dyn0dyn.cc
deleted file mode 100644
index 8302de53eb9..00000000000
--- a/storage/innobase/dyn/dyn0dyn.cc
+++ /dev/null
@@ -1,65 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1996, 2013, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file dyn/dyn0dyn.cc
-The dynamically allocated array
-
-Created 2/5/1996 Heikki Tuuri
-*******************************************************/
-
-#include "dyn0dyn.h"
-#ifdef UNIV_NONINL
-#include "dyn0dyn.ic"
-#endif
-
-/************************************************************//**
-Adds a new block to a dyn array.
-@return created block */
-UNIV_INTERN
-dyn_block_t*
-dyn_array_add_block(
-/*================*/
- dyn_array_t* arr) /*!< in/out: dyn array */
-{
- mem_heap_t* heap;
- dyn_block_t* block;
-
- ut_ad(arr->magic_n == DYN_BLOCK_MAGIC_N);
-
- if (arr->heap == NULL) {
- UT_LIST_INIT(arr->base);
- UT_LIST_ADD_FIRST(list, arr->base, arr);
-
- arr->heap = mem_heap_create(sizeof(dyn_block_t));
- }
-
- block = dyn_array_get_last_block(arr);
- block->used = block->used | DYN_BLOCK_FULL_FLAG;
-
- heap = arr->heap;
-
- block = static_cast<dyn_block_t*>(
- mem_heap_alloc(heap, sizeof(dyn_block_t)));
-
- block->used = 0;
-
- UT_LIST_ADD_LAST(list, arr->base, block);
-
- return(block);
-}
diff --git a/storage/innobase/eval/eval0eval.cc b/storage/innobase/eval/eval0eval.cc
index d62febd466d..6f709707f7f 100644
--- a/storage/innobase/eval/eval0eval.cc
+++ b/storage/innobase/eval/eval0eval.cc
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1997, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1997, 2016, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2019, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
@@ -26,18 +26,10 @@ Created 12/29/1997 Heikki Tuuri
*******************************************************/
#include "eval0eval.h"
-
-#ifdef UNIV_NONINL
-#include "eval0eval.ic"
-#endif
-
#include "data0data.h"
#include "row0sel.h"
#include "rem0cmp.h"
-/** The RND function seed */
-static ulint eval_rnd = 128367121;
-
/** Dummy adress used when we should allocate a buffer of size 0 in
eval_node_alloc_val_buf */
@@ -61,8 +53,7 @@ NOTE that this memory must be explicitly freed when the query graph is
freed. If the node already has an allocated buffer, that buffer is freed
here. NOTE that this is the only function where dynamic memory should be
allocated for a query node val field.
-@return pointer to allocated buffer */
-UNIV_INTERN
+@return pointer to allocated buffer */
byte*
eval_node_alloc_val_buf(
/*====================*/
@@ -81,14 +72,14 @@ eval_node_alloc_val_buf(
data = static_cast<byte*>(dfield_get_data(dfield));
- if (data && data != &eval_dummy) {
- mem_free(data);
+ if (data != &eval_dummy) {
+ ut_free(data);
}
if (size == 0) {
data = &eval_dummy;
} else {
- data = static_cast<byte*>(mem_alloc(size));
+ data = static_cast<byte*>(ut_malloc_nokey(size));
}
que_node_set_val_buf_size(node, size);
@@ -102,7 +93,6 @@ eval_node_alloc_val_buf(
Free the buffer from global dynamic memory for a value of a que_node,
if it has been allocated in the above function. The freeing for pushed
column values is done in sel_col_prefetch_buf_free. */
-UNIV_INTERN
void
eval_node_free_val_buf(
/*===================*/
@@ -121,7 +111,7 @@ eval_node_free_val_buf(
if (que_node_get_val_buf_size(node) > 0) {
ut_a(data);
- mem_free(data);
+ ut_free(data);
}
}
@@ -136,12 +126,9 @@ eval_cmp_like(
que_node_t* arg2) /* !< in: right operand */
{
ib_like_t op;
- int res;
que_node_t* arg3;
que_node_t* arg4;
- dfield_t* dfield;
- dtype_t* dtype;
- ibool val = TRUE;
+ const dfield_t* dfield;
arg3 = que_node_get_like_node(arg2);
@@ -149,51 +136,23 @@ eval_cmp_like(
ut_a(arg3);
dfield = que_node_get_val(arg3);
- dtype = dfield_get_type(dfield);
-
- ut_a(dtype_get_mtype(dtype) == DATA_INT);
- op = static_cast<ib_like_t>(mach_read_from_4(static_cast<const unsigned char*>(dfield_get_data(dfield))));
+ ut_ad(dtype_get_mtype(dfield_get_type(dfield)) == DATA_INT);
+ op = static_cast<ib_like_t>(
+ mach_read_from_4(static_cast<const byte*>(
+ dfield_get_data(dfield))));
switch (op) {
- case IB_LIKE_PREFIX:
-
- arg4 = que_node_get_next(arg3);
- res = cmp_dfield_dfield_like_prefix(
- que_node_get_val(arg1),
- que_node_get_val(arg4));
- break;
-
- case IB_LIKE_SUFFIX:
-
- arg4 = que_node_get_next(arg3);
- res = cmp_dfield_dfield_like_suffix(
- que_node_get_val(arg1),
- que_node_get_val(arg4));
- break;
-
- case IB_LIKE_SUBSTR:
-
+ case IB_LIKE_PREFIX:
arg4 = que_node_get_next(arg3);
- res = cmp_dfield_dfield_like_substr(
- que_node_get_val(arg1),
- que_node_get_val(arg4));
- break;
-
- case IB_LIKE_EXACT:
- res = cmp_dfield_dfield(
- que_node_get_val(arg1),
- que_node_get_val(arg2));
- break;
-
- default:
- ut_error;
- }
-
- if (res != 0) {
- val = FALSE;
+ return(!cmp_dfield_dfield_like_prefix(que_node_get_val(arg1),
+ que_node_get_val(arg4)));
+ case IB_LIKE_EXACT:
+ return(!cmp_dfield_dfield(que_node_get_val(arg1),
+ que_node_get_val(arg2)));
}
- return(val);
+ ut_error;
+ return(FALSE);
}
/*********************************************************************
@@ -207,53 +166,47 @@ eval_cmp(
que_node_t* arg1;
que_node_t* arg2;
int res;
- int func;
- ibool val = TRUE;
+ ibool val = FALSE; /* remove warning */
ut_ad(que_node_get_type(cmp_node) == QUE_NODE_FUNC);
arg1 = cmp_node->args;
arg2 = que_node_get_next(arg1);
- func = cmp_node->func;
-
- if (func == PARS_LIKE_TOKEN_EXACT
- || func == PARS_LIKE_TOKEN_PREFIX
- || func == PARS_LIKE_TOKEN_SUFFIX
- || func == PARS_LIKE_TOKEN_SUBSTR) {
-
- val = eval_cmp_like(arg1, arg2);
- } else {
+ switch (cmp_node->func) {
+ case '<':
+ case '=':
+ case '>':
+ case PARS_LE_TOKEN:
+ case PARS_NE_TOKEN:
+ case PARS_GE_TOKEN:
res = cmp_dfield_dfield(
que_node_get_val(arg1), que_node_get_val(arg2));
- if (func == '=') {
- if (res != 0) {
- val = FALSE;
- }
- } else if (func == '<') {
- if (res != -1) {
- val = FALSE;
- }
- } else if (func == PARS_LE_TOKEN) {
- if (res == 1) {
- val = FALSE;
- }
- } else if (func == PARS_NE_TOKEN) {
- if (res == 0) {
- val = FALSE;
- }
- } else if (func == PARS_GE_TOKEN) {
- if (res == -1) {
- val = FALSE;
- }
- } else {
- ut_ad(func == '>');
-
- if (res != 1) {
- val = FALSE;
- }
+ switch (cmp_node->func) {
+ case '<':
+ val = (res < 0);
+ break;
+ case '=':
+ val = (res == 0);
+ break;
+ case '>':
+ val = (res > 0);
+ break;
+ case PARS_LE_TOKEN:
+ val = (res <= 0);
+ break;
+ case PARS_NE_TOKEN:
+ val = (res != 0);
+ break;
+ case PARS_GE_TOKEN:
+ val = (res >= 0);
+ break;
}
+ break;
+ default:
+ val = eval_cmp_like(arg1, arg2);
+ break;
}
eval_node_set_ibool_val(cmp_node, val);
@@ -354,120 +307,18 @@ eval_aggregate(
/*===========*/
func_node_t* node) /*!< in: aggregate operation node */
{
- que_node_t* arg;
lint val;
- lint arg_val;
- int func;
ut_ad(que_node_get_type(node) == QUE_NODE_FUNC);
val = eval_node_get_int_val(node);
- func = node->func;
-
- if (func == PARS_COUNT_TOKEN) {
-
- val = val + 1;
- } else {
- ut_ad(func == PARS_SUM_TOKEN);
-
- arg = node->args;
- arg_val = eval_node_get_int_val(arg);
-
- val = val + arg_val;
- }
-
+ ut_a(node->func == PARS_COUNT_TOKEN);
+ val = val + 1;
eval_node_set_int_val(node, val);
}
/*****************************************************************//**
-Evaluates a predefined function node where the function is not relevant
-in benchmarks. */
-static
-void
-eval_predefined_2(
-/*==============*/
- func_node_t* func_node) /*!< in: predefined function node */
-{
- que_node_t* arg;
- que_node_t* arg1;
- que_node_t* arg2 = 0; /* remove warning (??? bug ???) */
- lint int_val;
- byte* data;
- ulint len1;
- ulint len2;
- int func;
- ulint i;
-
- ut_ad(que_node_get_type(func_node) == QUE_NODE_FUNC);
-
- arg1 = func_node->args;
-
- if (arg1) {
- arg2 = que_node_get_next(arg1);
- }
-
- func = func_node->func;
-
- if (func == PARS_PRINTF_TOKEN) {
-
- arg = arg1;
-
- while (arg) {
- dfield_print(que_node_get_val(arg));
-
- arg = que_node_get_next(arg);
- }
-
- putc('\n', stderr);
-
- } else if (func == PARS_ASSERT_TOKEN) {
-
- if (!eval_node_get_ibool_val(arg1)) {
- fputs("SQL assertion fails in a stored procedure!\n",
- stderr);
- }
-
- ut_a(eval_node_get_ibool_val(arg1));
-
- /* This function, or more precisely, a debug procedure,
- returns no value */
-
- } else if (func == PARS_RND_TOKEN) {
-
- len1 = (ulint) eval_node_get_int_val(arg1);
- len2 = (ulint) eval_node_get_int_val(arg2);
-
- ut_ad(len2 >= len1);
-
- if (len2 > len1) {
- int_val = (lint) (len1
- + (eval_rnd % (len2 - len1 + 1)));
- } else {
- int_val = (lint) len1;
- }
-
- eval_rnd = ut_rnd_gen_next_ulint(eval_rnd);
-
- eval_node_set_int_val(func_node, int_val);
-
- } else if (func == PARS_RND_STR_TOKEN) {
-
- len1 = (ulint) eval_node_get_int_val(arg1);
-
- data = eval_node_ensure_val_buf(func_node, len1);
-
- for (i = 0; i < len1; i++) {
- data[i] = (byte)(97 + (eval_rnd % 3));
-
- eval_rnd = ut_rnd_gen_next_ulint(eval_rnd);
- }
- } else {
- ut_error;
- }
-}
-
-/*****************************************************************//**
Evaluates a notfound-function node. */
UNIV_INLINE
void
@@ -538,46 +389,6 @@ eval_substr(
}
/*****************************************************************//**
-Evaluates a replstr-procedure node. */
-static
-void
-eval_replstr(
-/*=========*/
- func_node_t* func_node) /*!< in: function node */
-{
- que_node_t* arg1;
- que_node_t* arg2;
- que_node_t* arg3;
- que_node_t* arg4;
- byte* str1;
- byte* str2;
- ulint len1;
- ulint len2;
-
- arg1 = func_node->args;
- arg2 = que_node_get_next(arg1);
-
- ut_ad(que_node_get_type(arg1) == QUE_NODE_SYMBOL);
-
- arg3 = que_node_get_next(arg2);
- arg4 = que_node_get_next(arg3);
-
- str1 = static_cast<byte*>(dfield_get_data(que_node_get_val(arg1)));
- str2 = static_cast<byte*>(dfield_get_data(que_node_get_val(arg2)));
-
- len1 = (ulint) eval_node_get_int_val(arg3);
- len2 = (ulint) eval_node_get_int_val(arg4);
-
- if ((dfield_get_len(que_node_get_val(arg1)) < len1 + len2)
- || (dfield_get_len(que_node_get_val(arg2)) < len2)) {
-
- ut_error;
- }
-
- ut_memcpy(str1 + len1, str2, len2);
-}
-
-/*****************************************************************//**
Evaluates an instr-function node. */
static
void
@@ -651,44 +462,6 @@ match_found:
/*****************************************************************//**
Evaluates a predefined function node. */
-UNIV_INLINE
-void
-eval_binary_to_number(
-/*==================*/
- func_node_t* func_node) /*!< in: function node */
-{
- que_node_t* arg1;
- dfield_t* dfield;
- byte* str1;
- byte* str2;
- ulint len1;
- ulint int_val;
-
- arg1 = func_node->args;
-
- dfield = que_node_get_val(arg1);
-
- str1 = static_cast<byte*>(dfield_get_data(dfield));
- len1 = dfield_get_len(dfield);
-
- if (len1 > 4) {
- ut_error;
- }
-
- if (len1 == 4) {
- str2 = str1;
- } else {
- int_val = 0;
- str2 = (byte*) &int_val;
-
- ut_memcpy(str2 + (4 - len1), str1, len1);
- }
-
- eval_node_copy_and_alloc_val(func_node, str2, 4);
-}
-
-/*****************************************************************//**
-Evaluates a predefined function node. */
static
void
eval_concat(
@@ -778,100 +551,16 @@ eval_to_binary(
}
/*****************************************************************//**
-Evaluates a predefined function node. */
-UNIV_INLINE
-void
-eval_predefined(
-/*============*/
- func_node_t* func_node) /*!< in: function node */
+Evaluate LENGTH(). */
+inline void eval_length(func_node_t* func_node)
{
- que_node_t* arg1;
- lint int_val;
- byte* data;
- int func;
-
- func = func_node->func;
-
- arg1 = func_node->args;
-
- if (func == PARS_LENGTH_TOKEN) {
-
- int_val = (lint) dfield_get_len(que_node_get_val(arg1));
-
- } else if (func == PARS_TO_CHAR_TOKEN) {
-
- /* Convert number to character string as a
- signed decimal integer. */
-
- ulint uint_val;
- int int_len;
-
- int_val = eval_node_get_int_val(arg1);
-
- /* Determine the length of the string. */
-
- if (int_val == 0) {
- int_len = 1; /* the number 0 occupies 1 byte */
- } else {
- int_len = 0;
- if (int_val < 0) {
- uint_val = ((ulint) -int_val - 1) + 1;
- int_len++; /* reserve space for minus sign */
- } else {
- uint_val = (ulint) int_val;
- }
- for (; uint_val > 0; int_len++) {
- uint_val /= 10;
- }
- }
-
- /* allocate the string */
- data = eval_node_ensure_val_buf(func_node, int_len + 1);
-
- /* add terminating NUL character */
- data[int_len] = 0;
-
- /* convert the number */
-
- if (int_val == 0) {
- data[0] = '0';
- } else {
- int tmp;
- if (int_val < 0) {
- data[0] = '-'; /* preceding minus sign */
- uint_val = ((ulint) -int_val - 1) + 1;
- } else {
- uint_val = (ulint) int_val;
- }
- for (tmp = int_len; uint_val > 0; uint_val /= 10) {
- data[--tmp] = (byte)
- ('0' + (byte)(uint_val % 10));
- }
- }
-
- dfield_set_len(que_node_get_val(func_node), int_len);
-
- return;
-
- } else if (func == PARS_TO_NUMBER_TOKEN) {
-
- int_val = atoi((char*)
- dfield_get_data(que_node_get_val(arg1)));
-
- } else if (func == PARS_SYSDATE_TOKEN) {
- int_val = (lint) time(NULL);
- } else {
- eval_predefined_2(func_node);
-
- return;
- }
-
- eval_node_set_int_val(func_node, int_val);
+ eval_node_set_int_val(func_node,
+ dfield_get_len(que_node_get_val
+ (func_node->args)));
}
/*****************************************************************//**
Evaluates a function node. */
-UNIV_INTERN
void
eval_func(
/*======*/
@@ -897,8 +586,7 @@ eval_func(
if (dfield_is_null(que_node_get_val(arg))
&& (fclass != PARS_FUNC_CMP)
- && (func != PARS_NOTFOUND_TOKEN)
- && (func != PARS_PRINTF_TOKEN)) {
+ && (func != PARS_NOTFOUND_TOKEN)) {
ut_error;
}
@@ -923,24 +611,20 @@ eval_func(
case PARS_SUBSTR_TOKEN:
eval_substr(func_node);
return;
- case PARS_REPLSTR_TOKEN:
- eval_replstr(func_node);
- return;
case PARS_INSTR_TOKEN:
eval_instr(func_node);
return;
- case PARS_BINARY_TO_NUMBER_TOKEN:
- eval_binary_to_number(func_node);
- return;
case PARS_CONCAT_TOKEN:
eval_concat(func_node);
return;
case PARS_TO_BINARY_TOKEN:
eval_to_binary(func_node);
return;
- default:
- eval_predefined(func_node);
+ case PARS_LENGTH_TOKEN:
+ eval_length(func_node);
return;
+ default:
+ ut_error;
}
case PARS_FUNC_LOGICAL:
eval_logical(func_node);
diff --git a/storage/innobase/eval/eval0proc.cc b/storage/innobase/eval/eval0proc.cc
index 7adfe1e6389..7e39443f3d6 100644
--- a/storage/innobase/eval/eval0proc.cc
+++ b/storage/innobase/eval/eval0proc.cc
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1998, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1998, 2016, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -25,14 +25,9 @@ Created 1/20/1998 Heikki Tuuri
#include "eval0proc.h"
-#ifdef UNIV_NONINL
-#include "eval0proc.ic"
-#endif
-
/**********************************************************************//**
Performs an execution step of an if-statement node.
-@return query thread to run next or NULL */
-UNIV_INTERN
+@return query thread to run next or NULL */
que_thr_t*
if_step(
/*====*/
@@ -108,8 +103,7 @@ if_step(
/**********************************************************************//**
Performs an execution step of a while-statement node.
-@return query thread to run next or NULL */
-UNIV_INTERN
+@return query thread to run next or NULL */
que_thr_t*
while_step(
/*=======*/
@@ -144,8 +138,7 @@ while_step(
/**********************************************************************//**
Performs an execution step of an assignment statement node.
-@return query thread to run next or NULL */
-UNIV_INTERN
+@return query thread to run next or NULL */
que_thr_t*
assign_step(
/*========*/
@@ -171,8 +164,7 @@ assign_step(
/**********************************************************************//**
Performs an execution step of a for-loop node.
-@return query thread to run next or NULL */
-UNIV_INTERN
+@return query thread to run next or NULL */
que_thr_t*
for_step(
/*=====*/
@@ -233,8 +225,7 @@ for_step(
/**********************************************************************//**
Performs an execution step of an exit statement node.
-@return query thread to run next or NULL */
-UNIV_INTERN
+@return query thread to run next or NULL */
que_thr_t*
exit_step(
/*======*/
@@ -265,8 +256,7 @@ exit_step(
/**********************************************************************//**
Performs an execution step of a return-statement node.
-@return query thread to run next or NULL */
-UNIV_INTERN
+@return query thread to run next or NULL */
que_thr_t*
return_step(
/*========*/
diff --git a/storage/innobase/fil/fil0crypt.cc b/storage/innobase/fil/fil0crypt.cc
index 901b28c8c62..a28c9f63797 100644
--- a/storage/innobase/fil/fil0crypt.cc
+++ b/storage/innobase/fil/fil0crypt.cc
@@ -24,14 +24,12 @@ Modified Jan Lindström jan.lindstrom@mariadb.com
*******************************************************/
#include "fil0fil.h"
-#include "fil0crypt.h"
+#include "mtr0types.h"
#include "mach0data.h"
+#include "page0size.h"
#include "page0zip.h"
-#include "buf0buf.h"
-#include "buf0checksum.h"
-
#ifndef UNIV_INNOCHECKSUM
-
+#include "fil0crypt.h"
#include "srv0srv.h"
#include "srv0start.h"
#include "log0recv.h"
@@ -41,7 +39,6 @@ Modified Jan Lindström jan.lindstrom@mariadb.com
#include "btr0scrub.h"
#include "fsp0fsp.h"
#include "fil0pagecompress.h"
-#include "ha_prototypes.h" // IB_LOG_
#include <my_crypt.h>
/** Mutex for keys */
@@ -49,10 +46,6 @@ static ib_mutex_t fil_crypt_key_mutex;
static bool fil_crypt_threads_inited = false;
-#ifdef UNIV_PFS_MUTEX
-static mysql_pfs_key_t fil_crypt_key_mutex_key;
-#endif
-
/** Is encryption enabled/disabled */
UNIV_INTERN ulong srv_encrypt_tables = 0;
@@ -77,10 +70,6 @@ static os_event_t fil_crypt_throttle_sleep_event;
/** Mutex for key rotation threads. */
UNIV_INTERN ib_mutex_t fil_crypt_threads_mutex;
-#ifdef UNIV_PFS_MUTEX
-static mysql_pfs_key_t fil_crypt_threads_mutex_key;
-#endif
-
/** Variable ensuring only 1 thread at time does initial conversion */
static bool fil_crypt_start_converting = false;
@@ -99,15 +88,6 @@ extern uint srv_background_scrub_data_check_interval;
static fil_crypt_stat_t crypt_stat;
static ib_mutex_t crypt_stat_mutex;
-#ifdef UNIV_PFS_MUTEX
-static mysql_pfs_key_t fil_crypt_stat_mutex_key;
-
-/**
- * key for crypt data mutex
-*/
-UNIV_INTERN mysql_pfs_key_t fil_crypt_data_mutex_key;
-#endif
-
/** Is background scrubbing enabled, defined on btr0scrub.cc */
extern my_bool srv_background_scrub_data_uncompressed;
extern my_bool srv_background_scrub_data_compressed;
@@ -133,14 +113,11 @@ UNIV_INTERN
void
fil_space_crypt_init()
{
- mutex_create(fil_crypt_key_mutex_key,
- &fil_crypt_key_mutex, SYNC_NO_ORDER_CHECK);
-
- fil_crypt_throttle_sleep_event = os_event_create();
+ mutex_create(LATCH_ID_FIL_CRYPT_MUTEX, &fil_crypt_key_mutex);
- mutex_create(fil_crypt_stat_mutex_key,
- &crypt_stat_mutex, SYNC_NO_ORDER_CHECK);
+ fil_crypt_throttle_sleep_event = os_event_create(0);
+ mutex_create(LATCH_ID_FIL_CRYPT_STAT_MUTEX, &crypt_stat_mutex);
memset(&crypt_stat, 0, sizeof(crypt_stat));
}
@@ -150,8 +127,7 @@ UNIV_INTERN
void
fil_space_crypt_cleanup()
{
- os_event_free(fil_crypt_throttle_sleep_event);
- fil_crypt_throttle_sleep_event = NULL;
+ os_event_destroy(fil_crypt_throttle_sleep_event);
mutex_free(&fil_crypt_key_mutex);
mutex_free(&crypt_stat_mutex);
}
@@ -240,10 +216,8 @@ fil_space_create_crypt_data(
uint min_key_version,
uint key_id)
{
- void* buf = mem_zalloc(sizeof(fil_space_crypt_t));
fil_space_crypt_t* crypt_data = NULL;
-
- if (buf) {
+ if (void* buf = ut_zalloc_nokey(sizeof(fil_space_crypt_t))) {
crypt_data = new(buf)
fil_space_crypt_t(
type,
@@ -299,59 +273,40 @@ fil_space_merge_crypt_data(
mutex_exit(&dst->mutex);
}
-/******************************************************************
-Read crypt data from a page (0)
-@param[in] space space_id
-@param[in] page Page 0
-@param[in] offset Offset to crypt data
-@return crypt data from page 0 or NULL. */
+/** Initialize encryption parameters from a tablespace header page.
+@param[in] page_size page size of the tablespace
+@param[in] page first page of the tablespace
+@return crypt data from page 0
+@retval NULL if not present or not valid */
UNIV_INTERN
fil_space_crypt_t*
-fil_space_read_crypt_data(
- ulint space,
- const byte* page,
- ulint offset)
+fil_space_read_crypt_data(const page_size_t& page_size, const byte* page)
{
+ const ulint offset = FSP_HEADER_OFFSET
+ + fsp_header_get_encryption_offset(page_size);
+
if (memcmp(page + offset, CRYPT_MAGIC, MAGIC_SZ) != 0) {
/* Crypt data is not stored. */
return NULL;
}
- ulint type = mach_read_from_1(page + offset + MAGIC_SZ + 0);
-
- if (! (type == CRYPT_SCHEME_UNENCRYPTED ||
- type == CRYPT_SCHEME_1)) {
-
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Found non sensible crypt scheme: " ULINTPF " for space " ULINTPF
- " offset: " ULINTPF " bytes: "
- "[ %.2x %.2x %.2x %.2x %.2x %.2x ].",
- type, space, offset,
- page[offset + 0 + MAGIC_SZ],
- page[offset + 1 + MAGIC_SZ],
- page[offset + 2 + MAGIC_SZ],
- page[offset + 3 + MAGIC_SZ],
- page[offset + 4 + MAGIC_SZ],
- page[offset + 5 + MAGIC_SZ]);
- ut_error;
- }
-
+ uint8_t type = mach_read_from_1(page + offset + MAGIC_SZ + 0);
+ uint8_t iv_length = mach_read_from_1(page + offset + MAGIC_SZ + 1);
fil_space_crypt_t* crypt_data;
- ulint iv_length = mach_read_from_1(page + offset + MAGIC_SZ + 1);
-
- if (! (iv_length == sizeof(crypt_data->iv))) {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Found non sensible iv length: %lu for space %lu "
- " offset: %lu type: %lu bytes: "
- "[ %.2x %.2x %.2x %.2x %.2x %.2x ].",
- iv_length, space, offset, type,
- page[offset + 0 + MAGIC_SZ],
- page[offset + 1 + MAGIC_SZ],
- page[offset + 2 + MAGIC_SZ],
- page[offset + 3 + MAGIC_SZ],
- page[offset + 4 + MAGIC_SZ],
- page[offset + 5 + MAGIC_SZ]);
- ut_error;
+
+ if (!(type == CRYPT_SCHEME_UNENCRYPTED ||
+ type == CRYPT_SCHEME_1)
+ || iv_length != sizeof crypt_data->iv) {
+ ib::error() << "Found non sensible crypt scheme: "
+ << type << "," << iv_length << " for space: "
+ << page_get_space_id(page) << " offset: "
+ << offset << " bytes: ["
+ << page[offset + 2 + MAGIC_SZ]
+ << page[offset + 3 + MAGIC_SZ]
+ << page[offset + 4 + MAGIC_SZ]
+ << page[offset + 5 + MAGIC_SZ]
+ << "].";
+ return NULL;
}
uint min_key_version = mach_read_from_4
@@ -383,28 +338,67 @@ fil_space_destroy_crypt_data(
fil_space_crypt_t **crypt_data)
{
if (crypt_data != NULL && (*crypt_data) != NULL) {
- fil_space_crypt_t* c = *crypt_data;
- c->~fil_space_crypt_t();
- mem_free(c);
- *crypt_data = NULL;
+ fil_space_crypt_t* c;
+ if (UNIV_LIKELY(fil_crypt_threads_inited)) {
+ mutex_enter(&fil_crypt_threads_mutex);
+ c = *crypt_data;
+ *crypt_data = NULL;
+ mutex_exit(&fil_crypt_threads_mutex);
+ } else {
+ ut_ad(srv_read_only_mode || !srv_was_started);
+ c = *crypt_data;
+ *crypt_data = NULL;
+ }
+ if (c) {
+ c->~fil_space_crypt_t();
+ ut_free(c);
+ }
}
}
+/** Fill crypt data information to the give page.
+It should be called during ibd file creation.
+@param[in] flags tablespace flags
+@param[in,out] page first page of the tablespace */
+void
+fil_space_crypt_t::fill_page0(
+ ulint flags,
+ byte* page)
+{
+ const uint len = sizeof(iv);
+ const ulint offset = FSP_HEADER_OFFSET
+ + fsp_header_get_encryption_offset(page_size_t(flags));
+ page0_offset = offset;
+
+ memcpy(page + offset, CRYPT_MAGIC, MAGIC_SZ);
+ mach_write_to_1(page + offset + MAGIC_SZ, type);
+ mach_write_to_1(page + offset + MAGIC_SZ + 1, len);
+ memcpy(page + offset + MAGIC_SZ + 2, &iv, len);
+
+ mach_write_to_4(page + offset + MAGIC_SZ + 2 + len,
+ min_key_version);
+ mach_write_to_4(page + offset + MAGIC_SZ + 2 + len + 4,
+ key_id);
+ mach_write_to_1(page + offset + MAGIC_SZ + 2 + len + 8,
+ encryption);
+}
+
/******************************************************************
Write crypt data to a page (0)
-@param[in,out] page0 Page 0 where to write
-@param[in,out] mtr Minitransaction */
+@param[in] space tablespace
+@param[in,out] page0 first page of the tablespace
+@param[in,out] mtr mini-transaction */
UNIV_INTERN
void
fil_space_crypt_t::write_page0(
+ const fil_space_t* space,
byte* page,
mtr_t* mtr)
{
- ulint space_id = mach_read_from_4(
- page + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID);
+ ut_ad(this == space->crypt_data);
const uint len = sizeof(iv);
- ulint zip_size = fsp_header_get_zip_size(page);
- const ulint offset = fsp_header_get_crypt_offset(zip_size);
+ const ulint offset = FSP_HEADER_OFFSET
+ + fsp_header_get_encryption_offset(page_size_t(space->flags));
page0_offset = offset;
/*
@@ -431,7 +425,7 @@ fil_space_crypt_t::write_page0(
page,
MLOG_FILE_WRITE_CRYPT_DATA,
log_ptr, mtr);
- mach_write_to_4(log_ptr, space_id);
+ mach_write_to_4(log_ptr, space->id);
log_ptr += 4;
mach_write_to_2(log_ptr, offset);
log_ptr += 2;
@@ -452,45 +446,6 @@ fil_space_crypt_t::write_page0(
}
/******************************************************************
-Set crypt data for a tablespace
-@param[in,out] space Tablespace
-@param[in,out] crypt_data Crypt data to be set
-@return crypt_data in tablespace */
-static
-fil_space_crypt_t*
-fil_space_set_crypt_data(
- fil_space_t* space,
- fil_space_crypt_t* crypt_data)
-{
- fil_space_crypt_t* free_crypt_data = NULL;
- fil_space_crypt_t* ret_crypt_data = NULL;
-
- /* Provided space is protected using fil_space_acquire()
- from concurrent operations. */
- if (space->crypt_data != NULL) {
- /* There is already crypt data present,
- merge new crypt_data */
- fil_space_merge_crypt_data(space->crypt_data,
- crypt_data);
- ret_crypt_data = space->crypt_data;
- free_crypt_data = crypt_data;
- } else {
- space->crypt_data = crypt_data;
- ret_crypt_data = space->crypt_data;
- }
-
- if (free_crypt_data != NULL) {
- /* there was already crypt data present and the new crypt
- * data provided as argument to this function has been merged
- * into that => free new crypt data
- */
- fil_space_destroy_crypt_data(&free_crypt_data);
- }
-
- return ret_crypt_data;
-}
-
-/******************************************************************
Parse a MLOG_FILE_WRITE_CRYPT_DATA log entry
@param[in] ptr Log entry start
@param[in] end_ptr Log entry end
@@ -548,65 +503,69 @@ fil_parse_write_crypt_data(
return NULL;
}
- fil_space_crypt_t* crypt_data = fil_space_create_crypt_data(encryption, key_id);
- /* Need to overwrite these as above will initialize fields. */
+ mutex_enter(&fil_system->mutex);
+
+ fil_space_t* space = fil_space_get_by_id(space_id);
+
+ if (!space) {
+ mutex_exit(&fil_system->mutex);
+ return ptr + len;
+ }
+
+ fil_space_crypt_t* crypt_data = fil_space_create_crypt_data(
+ encryption, key_id);
+
crypt_data->page0_offset = offset;
crypt_data->min_key_version = min_key_version;
- crypt_data->encryption = encryption;
crypt_data->type = type;
memcpy(crypt_data->iv, ptr, len);
ptr += len;
- /* update fil_space memory cache with crypt_data */
- if (fil_space_t* space = fil_space_acquire_silent(space_id)) {
- crypt_data = fil_space_set_crypt_data(space, crypt_data);
- fil_space_release(space);
- /* Check is used key found from encryption plugin */
- if (crypt_data->should_encrypt()
- && !crypt_data->is_key_found()) {
- *err = DB_DECRYPTION_FAILED;
- }
- } else {
+ if (space->crypt_data) {
+ fil_space_merge_crypt_data(space->crypt_data, crypt_data);
fil_space_destroy_crypt_data(&crypt_data);
+ crypt_data = space->crypt_data;
+ } else {
+ space->crypt_data = crypt_data;
+ }
+
+ mutex_exit(&fil_system->mutex);
+
+ if (crypt_data->should_encrypt() && !crypt_data->is_key_found()) {
+ *err = DB_DECRYPTION_FAILED;
}
return ptr;
}
-/******************************************************************
-Encrypt a buffer
+/** Encrypt a buffer.
@param[in,out] crypt_data Crypt data
@param[in] space space_id
@param[in] offset Page offset
@param[in] lsn Log sequence number
@param[in] src_frame Page to encrypt
-@param[in] zip_size Compressed size or 0
+@param[in] page_size Page size
@param[in,out] dst_frame Output buffer
@return encrypted buffer or NULL */
UNIV_INTERN
byte*
fil_encrypt_buf(
- fil_space_crypt_t* crypt_data,
- ulint space,
- ulint offset,
- lsn_t lsn,
- const byte* src_frame,
- ulint zip_size,
- byte* dst_frame)
+ fil_space_crypt_t* crypt_data,
+ ulint space,
+ ulint offset,
+ lsn_t lsn,
+ const byte* src_frame,
+ const page_size_t& page_size,
+ byte* dst_frame)
{
- ulint page_size = (zip_size) ? zip_size : UNIV_PAGE_SIZE;
+ uint size = uint(page_size.physical());
uint key_version = fil_crypt_get_latest_key_version(crypt_data);
- if (key_version == ENCRYPTION_KEY_VERSION_INVALID) {
- ib_logf(IB_LOG_LEVEL_FATAL,
- "Unknown key id %u. Can't continue!\n",
- crypt_data->key_id);
- ut_error;
- }
+ ut_a(key_version != ENCRYPTION_KEY_VERSION_INVALID);
ulint orig_page_type = mach_read_from_2(src_frame+FIL_PAGE_TYPE);
ibool page_compressed = (orig_page_type == FIL_PAGE_PAGE_COMPRESSED_ENCRYPTED);
- ulint header_len = FIL_PAGE_DATA;
+ uint header_len = FIL_PAGE_DATA;
if (page_compressed) {
header_len += (FIL_PAGE_COMPRESSED_SIZE + FIL_PAGE_COMPRESSION_METHOD_SIZE);
@@ -619,8 +578,8 @@ fil_encrypt_buf(
mach_write_to_4(dst_frame + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION, key_version);
/* Calculate the start offset in a page */
- ulint unencrypted_bytes = header_len + FIL_PAGE_DATA_END;
- ulint srclen = page_size - unencrypted_bytes;
+ uint unencrypted_bytes = header_len + FIL_PAGE_DATA_END;
+ uint srclen = size - unencrypted_bytes;
const byte* src = src_frame + header_len;
byte* dst = dst_frame + header_len;
uint32 dstlen = 0;
@@ -632,16 +591,8 @@ fil_encrypt_buf(
int rc = encryption_scheme_encrypt(src, srclen, dst, &dstlen,
crypt_data, key_version,
space, offset, lsn);
-
- if (! ((rc == MY_AES_OK) && ((ulint) dstlen == srclen))) {
- ib_logf(IB_LOG_LEVEL_FATAL,
- "Unable to encrypt data-block "
- " src: %p srclen: %ld buf: %p buflen: %d."
- " return-code: %d. Can't continue!\n",
- src, (long)srclen,
- dst, dstlen, rc);
- ut_error;
- }
+ ut_a(rc == MY_AES_OK);
+ ut_a(dstlen == srclen);
/* For compressed tables we do not store the FIL header because
the whole page is not stored to the disk. In compressed tables only
@@ -649,23 +600,24 @@ fil_encrypt_buf(
to sector boundary is written. */
if (!page_compressed) {
/* FIL page trailer is also not encrypted */
- memcpy(dst_frame + page_size - FIL_PAGE_DATA_END,
- src_frame + page_size - FIL_PAGE_DATA_END,
+ memcpy(dst_frame + page_size.physical() - FIL_PAGE_DATA_END,
+ src_frame + page_size.physical() - FIL_PAGE_DATA_END,
FIL_PAGE_DATA_END);
} else {
/* Clean up rest of buffer */
- memset(dst_frame+header_len+srclen, 0, page_size - (header_len+srclen));
+ memset(dst_frame+header_len+srclen, 0,
+ page_size.physical() - (header_len + srclen));
}
/* handle post encryption checksum */
ib_uint32_t checksum = 0;
- checksum = fil_crypt_calculate_checksum(zip_size, dst_frame);
+ checksum = fil_crypt_calculate_checksum(page_size, dst_frame);
// store the post-encryption checksum after the key-version
mach_write_to_4(dst_frame + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION + 4, checksum);
- ut_ad(fil_space_verify_crypt_checksum(dst_frame, zip_size));
+ ut_ad(fil_space_verify_crypt_checksum(dst_frame, page_size));
srv_stats.pages_encrypted.inc();
@@ -685,33 +637,31 @@ UNIV_INTERN
byte*
fil_space_encrypt(
const fil_space_t* space,
- ulint offset,
- lsn_t lsn,
- byte* src_frame,
- byte* dst_frame)
+ ulint offset,
+ lsn_t lsn,
+ byte* src_frame,
+ byte* dst_frame)
{
- ulint orig_page_type = mach_read_from_2(src_frame+FIL_PAGE_TYPE);
-
- if (orig_page_type==FIL_PAGE_TYPE_FSP_HDR
- || orig_page_type==FIL_PAGE_TYPE_XDES) {
- /* File space header or extent descriptor do not need to be
- encrypted. */
- return (src_frame);
+ switch (mach_read_from_2(src_frame+FIL_PAGE_TYPE)) {
+ case FIL_PAGE_TYPE_FSP_HDR:
+ case FIL_PAGE_TYPE_XDES:
+ case FIL_PAGE_RTREE:
+ /* File space header, extent descriptor or spatial index
+ are not encrypted. */
+ return src_frame;
}
if (!space->crypt_data || !space->crypt_data->is_encrypted()) {
return (src_frame);
}
- fil_space_crypt_t* crypt_data = space->crypt_data;
ut_ad(space->n_pending_ios > 0);
- ulint zip_size = fsp_flags_get_zip_size(space->flags);
- return fil_encrypt_buf(crypt_data, space->id, offset, lsn,
- src_frame, zip_size, dst_frame);
+ return fil_encrypt_buf(space->crypt_data, space->id, offset, lsn,
+ src_frame, page_size_t(space->flags),
+ dst_frame);
}
-/******************************************************************
-Decrypt a page
+/** Decrypt a page.
@param[in] crypt_data crypt_data
@param[in] tmp_frame Temporary buffer
@param[in] page_size Page size
@@ -723,15 +673,15 @@ bool
fil_space_decrypt(
fil_space_crypt_t* crypt_data,
byte* tmp_frame,
- ulint page_size,
+ const page_size_t& page_size,
byte* src_frame,
dberr_t* err)
{
ulint page_type = mach_read_from_2(src_frame+FIL_PAGE_TYPE);
uint key_version = mach_read_from_4(src_frame + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION);
bool page_compressed = (page_type == FIL_PAGE_PAGE_COMPRESSED_ENCRYPTED);
- ulint offset = mach_read_from_4(src_frame + FIL_PAGE_OFFSET);
- ulint space = mach_read_from_4(src_frame + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID);
+ uint offset = mach_read_from_4(src_frame + FIL_PAGE_OFFSET);
+ uint space = mach_read_from_4(src_frame + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID);
ib_uint64_t lsn = mach_read_from_8(src_frame + FIL_PAGE_LSN);
*err = DB_SUCCESS;
@@ -743,7 +693,7 @@ fil_space_decrypt(
ut_a(crypt_data != NULL && crypt_data->is_encrypted());
/* read space & lsn */
- ulint header_len = FIL_PAGE_DATA;
+ uint header_len = FIL_PAGE_DATA;
if (page_compressed) {
header_len += (FIL_PAGE_COMPRESSED_SIZE + FIL_PAGE_COMPRESSION_METHOD_SIZE);
@@ -756,7 +706,8 @@ fil_space_decrypt(
const byte* src = src_frame + header_len;
byte* dst = tmp_frame + header_len;
uint32 dstlen = 0;
- ulint srclen = page_size - (header_len + FIL_PAGE_DATA_END);
+ uint srclen = uint(page_size.physical())
+ - header_len - FIL_PAGE_DATA_END;
if (page_compressed) {
srclen = mach_read_from_2(src_frame + FIL_PAGE_DATA);
@@ -773,13 +724,11 @@ fil_space_decrypt(
return false;
}
- ib_logf(IB_LOG_LEVEL_FATAL,
- "Unable to decrypt data-block "
- " src: %p srclen: %ld buf: %p buflen: %d."
- " return-code: %d. Can't continue!\n",
- src, (long)srclen,
- dst, dstlen, rc);
- ut_error;
+ ib::fatal() << "Unable to decrypt data-block "
+ << " src: " << src << "srclen: "
+ << srclen << " buf: " << dst << "buflen: "
+ << dstlen << " return-code: " << rc
+ << " Can't continue!";
}
/* For compressed tables we do not store the FIL header because
@@ -788,8 +737,8 @@ fil_space_decrypt(
to sector boundary is written. */
if (!page_compressed) {
/* Copy FIL trailer */
- memcpy(tmp_frame + page_size - FIL_PAGE_DATA_END,
- src_frame + page_size - FIL_PAGE_DATA_END,
+ memcpy(tmp_frame + page_size.physical() - FIL_PAGE_DATA_END,
+ src_frame + page_size.physical() - FIL_PAGE_DATA_END,
FIL_PAGE_DATA_END);
}
@@ -798,13 +747,11 @@ fil_space_decrypt(
return true; /* page was decrypted */
}
-/******************************************************************
-Decrypt a page
+/**
+Decrypt a page.
@param[in] space Tablespace
@param[in] tmp_frame Temporary buffer used for decrypting
-@param[in] page_size Page size
@param[in,out] src_frame Page to decrypt
-@param[out] decrypted true if page was decrypted
@return decrypted page, or original not encrypted page if decryption is
not needed.*/
UNIV_INTERN
@@ -812,31 +759,23 @@ byte*
fil_space_decrypt(
const fil_space_t* space,
byte* tmp_frame,
- byte* src_frame,
- bool* decrypted)
+ byte* src_frame)
{
dberr_t err = DB_SUCCESS;
byte* res = NULL;
- ulint zip_size = fsp_flags_get_zip_size(space->flags);
- ulint size = zip_size ? zip_size : UNIV_PAGE_SIZE;
- *decrypted = false;
+ const page_size_t page_size(space->flags);
ut_ad(space->crypt_data != NULL && space->crypt_data->is_encrypted());
ut_ad(space->n_pending_ios > 0);
- bool encrypted = fil_space_decrypt(
- space->crypt_data,
- tmp_frame,
- size,
- src_frame,
- &err);
+ bool encrypted = fil_space_decrypt(space->crypt_data, tmp_frame,
+ page_size, src_frame, &err);
if (err == DB_SUCCESS) {
if (encrypted) {
- *decrypted = true;
/* Copy the decrypted page back to page buffer, not
really any other options. */
- memcpy(src_frame, tmp_frame, size);
+ memcpy(src_frame, tmp_frame, page_size.physical());
}
res = src_frame;
@@ -847,27 +786,21 @@ fil_space_decrypt(
/******************************************************************
Calculate post encryption checksum
-@param[in] zip_size zip_size or 0
+@param[in] page_size page size
@param[in] dst_frame Block where checksum is calculated
@return page checksum
not needed. */
UNIV_INTERN
-ulint
+uint32_t
fil_crypt_calculate_checksum(
- ulint zip_size,
- const byte* dst_frame)
+ const page_size_t& page_size,
+ const byte* dst_frame)
{
- ib_uint32_t checksum = 0;
-
/* For encrypted tables we use only crc32 and strict_crc32 */
- if (zip_size == 0) {
- checksum = buf_calc_page_crc32(dst_frame);
- } else {
- checksum = page_zip_calc_checksum(dst_frame, zip_size,
- SRV_CHECKSUM_ALGORITHM_CRC32);
- }
-
- return checksum;
+ return page_size.is_compressed()
+ ? page_zip_calc_checksum(dst_frame, page_size.physical(),
+ SRV_CHECKSUM_ALGORITHM_CRC32)
+ : buf_calc_page_crc32(dst_frame);
}
/***********************************************************************/
@@ -939,11 +872,15 @@ fil_crypt_needs_rotation(
if (crypt_data->encryption == FIL_ENCRYPTION_DEFAULT
&& crypt_data->type == CRYPT_SCHEME_1
- && srv_encrypt_tables == 0 ) {
+ && !srv_encrypt_tables) {
/* This is rotation encrypted => unencrypted */
return true;
}
+ if (rotate_key_age == 0) {
+ return false;
+ }
+
/* this is rotation encrypted => encrypted,
* only reencrypt if key is sufficiently old */
if (key_version + rotate_key_age < latest_key_version) {
@@ -964,8 +901,8 @@ fil_crypt_read_crypt_data(fil_space_t* space)
/* The encryption metadata has already been read, or
the tablespace is not encrypted and the file has been
opened already, or the file cannot be accessed,
- likely due to a concurrent TRUNCATE or
- RENAME or DROP (possibly as part of ALTER TABLE).
+ likely due to a concurrent DROP
+ (possibly as part of TRUNCATE or ALTER TABLE).
FIXME: The file can become unaccessible any time
after this check! We should really remove this
function and instead make crypt_data an integral
@@ -973,31 +910,25 @@ fil_crypt_read_crypt_data(fil_space_t* space)
return;
}
+ const page_size_t page_size(space->flags);
mtr_t mtr;
- mtr_start(&mtr);
- ulint zip_size = fsp_flags_get_zip_size(space->flags);
- ulint offset = fsp_header_get_crypt_offset(zip_size);
- if (buf_block_t* block = buf_page_get(space->id, zip_size, 0,
- RW_S_LATCH, &mtr)) {
+ mtr.start();
+ if (buf_block_t* block = buf_page_get(page_id_t(space->id, 0),
+ page_size, RW_S_LATCH, &mtr)) {
mutex_enter(&fil_system->mutex);
if (!space->crypt_data) {
space->crypt_data = fil_space_read_crypt_data(
- space->id, block->frame, offset);
+ page_size, block->frame);
}
mutex_exit(&fil_system->mutex);
}
-
- mtr_commit(&mtr);
+ mtr.commit();
}
-/***********************************************************************
-Start encrypting a space
+/** Start encrypting a space
@param[in,out] space Tablespace
-@return true if a recheck is needed */
-static
-bool
-fil_crypt_start_encrypting_space(
- fil_space_t* space)
+@return true if a recheck of tablespace is needed by encryption thread. */
+static bool fil_crypt_start_encrypting_space(fil_space_t* space)
{
bool recheck = false;
@@ -1028,7 +959,8 @@ fil_crypt_start_encrypting_space(
* crypt data in page 0 */
/* 1 - create crypt data */
- crypt_data = fil_space_create_crypt_data(FIL_ENCRYPTION_DEFAULT, FIL_DEFAULT_ENCRYPTION_KEY);
+ crypt_data = fil_space_create_crypt_data(
+ FIL_ENCRYPTION_DEFAULT, FIL_DEFAULT_ENCRYPTION_KEY);
if (crypt_data == NULL) {
mutex_exit(&fil_crypt_threads_mutex);
@@ -1041,9 +973,9 @@ fil_crypt_start_encrypting_space(
crypt_data->rotate_state.starting = true;
crypt_data->rotate_state.active_threads = 1;
- mutex_enter(&crypt_data->mutex);
- crypt_data = fil_space_set_crypt_data(space, crypt_data);
- mutex_exit(&crypt_data->mutex);
+ mutex_enter(&fil_system->mutex);
+ space->crypt_data = crypt_data;
+ mutex_exit(&fil_system->mutex);
fil_crypt_start_converting = true;
mutex_exit(&fil_crypt_threads_mutex);
@@ -1051,26 +983,27 @@ fil_crypt_start_encrypting_space(
do
{
mtr_t mtr;
- mtr_start(&mtr);
+ mtr.start();
+ mtr.set_named_space(space);
/* 2 - get page 0 */
- ulint zip_size = fsp_flags_get_zip_size(space->flags);
- buf_block_t* block = buf_page_get_gen(space->id, zip_size, 0,
- RW_X_LATCH,
- NULL,
- BUF_GET,
- __FILE__, __LINE__,
- &mtr);
+ dberr_t err = DB_SUCCESS;
+ buf_block_t* block = buf_page_get_gen(
+ page_id_t(space->id, 0), page_size_t(space->flags),
+ RW_X_LATCH, NULL, BUF_GET,
+ __FILE__, __LINE__,
+ &mtr, &err);
/* 3 - write crypt data to page 0 */
byte* frame = buf_block_get_frame(block);
crypt_data->type = CRYPT_SCHEME_1;
- crypt_data->write_page0(frame, &mtr);
- mtr_commit(&mtr);
+ crypt_data->write_page0(space, frame, &mtr);
+
+ mtr.commit();
/* record lsn of update */
- lsn_t end_lsn = mtr.end_lsn;
+ lsn_t end_lsn = mtr.commit_lsn();
/* 4 - sync tablespace before publishing crypt data */
@@ -1079,7 +1012,7 @@ fil_crypt_start_encrypting_space(
do {
ulint n_pages = 0;
- success = buf_flush_list(ULINT_MAX, end_lsn, &n_pages);
+ success = buf_flush_lists(ULINT_MAX, end_lsn, &n_pages);
buf_flush_wait_batch_end(NULL, BUF_FLUSH_LIST);
sum_pages += n_pages;
} while (!success);
@@ -1130,8 +1063,8 @@ struct rotate_thread_t {
uint estimated_max_iops; /*!< estimation of max iops */
uint allocated_iops; /*!< allocated iops */
- uint cnt_waited; /*!< #times waited during this slot */
- uint sum_waited_us; /*!< wait time during this slot */
+ ulint cnt_waited; /*!< #times waited during this slot */
+ uintmax_t sum_waited_us; /*!< wait time during this slot */
fil_crypt_stat_t crypt_stat; // statistics
@@ -1143,11 +1076,12 @@ struct rotate_thread_t {
switch (srv_shutdown_state) {
case SRV_SHUTDOWN_NONE:
return thread_no >= srv_n_fil_crypt_threads;
+ case SRV_SHUTDOWN_EXIT_THREADS:
+ /* srv_init_abort() must have been invoked */
case SRV_SHUTDOWN_CLEANUP:
return true;
case SRV_SHUTDOWN_FLUSH_PHASE:
case SRV_SHUTDOWN_LAST_PHASE:
- case SRV_SHUTDOWN_EXIT_THREADS:
break;
}
ut_ad(0);
@@ -1171,7 +1105,7 @@ fil_crypt_space_needs_rotation(
fil_space_t* space = state->space;
/* Make sure that tablespace is normal tablespace */
- if (space->purpose != FIL_TABLESPACE) {
+ if (space->purpose != FIL_TYPE_TABLESPACE) {
return false;
}
@@ -1342,7 +1276,7 @@ fil_crypt_realloc_iops(
if (10 * state->cnt_waited > state->batch) {
/* if we waited more than 10% re-estimate max_iops */
ulint avg_wait_time_us =
- state->sum_waited_us / state->cnt_waited;
+ ulint(state->sum_waited_us / state->cnt_waited);
if (avg_wait_time_us == 0) {
avg_wait_time_us = 1; // prevent division by zero
@@ -1359,11 +1293,12 @@ fil_crypt_realloc_iops(
state->cnt_waited = 0;
state->sum_waited_us = 0;
} else {
-
DBUG_PRINT("ib_crypt",
- ("thr_no: %u only waited %lu%% skip re-estimate.",
- state->thread_no,
- (100 * state->cnt_waited) / (state->batch ? state->batch : 1)));
+ ("thr_no: %u only waited " ULINTPF
+ "%% skip re-estimate.",
+ state->thread_no,
+ (100 * state->cnt_waited)
+ / (state->batch ? state->batch : 1)));
}
if (state->estimated_max_iops <= state->allocated_iops) {
@@ -1451,14 +1386,12 @@ fil_crypt_return_iops(
fil_crypt_update_total_stat(state);
}
-/***********************************************************************
-Search for a space needing rotation
-@param[in,out] key_state Key state
-@param[in,out] state Rotation state
-@param[in,out] recheck recheck ? */
-static
-bool
-fil_crypt_find_space_to_rotate(
+/** Search for a space needing rotation
+@param[in,out] key_state Key state
+@param[in,out] state Rotation state
+@param[in,out] recheck recheck of the tablespace is needed or
+ still encryption thread does write page 0 */
+static bool fil_crypt_find_space_to_rotate(
key_state_t* key_state,
rotate_thread_t* state,
bool* recheck)
@@ -1491,11 +1424,18 @@ fil_crypt_find_space_to_rotate(
if (srv_fil_crypt_rotate_key_age) {
state->space = fil_space_next(state->space);
} else {
- state->space = fil_space_keyrotate_next(state->space);
+ state->space = fil_system->keyrotate_next(
+ state->space, *recheck,
+ key_state->key_version);
}
while (!state->should_shutdown() && state->space) {
- fil_crypt_read_crypt_data(state->space);
+ /* If there is no crypt data and we have not yet read
+ page 0 for this tablespace, we need to read it before
+ we can continue. */
+ if (!state->space->crypt_data) {
+ fil_crypt_read_crypt_data(state->space);
+ }
if (fil_crypt_space_needs_rotation(state, key_state, recheck)) {
ut_ad(key_state->key_id);
@@ -1508,7 +1448,9 @@ fil_crypt_find_space_to_rotate(
if (srv_fil_crypt_rotate_key_age) {
state->space = fil_space_next(state->space);
} else {
- state->space = fil_space_keyrotate_next(state->space);
+ state->space = fil_system->keyrotate_next(
+ state->space, *recheck,
+ key_state->key_version);
}
}
@@ -1621,7 +1563,6 @@ fil_crypt_find_page_to_rotate(
/***********************************************************************
Get a page and compute sleep time
@param[in,out] state Rotation state
-@param[in] zip_size compressed size or 0
@param[in] offset Page offset
@param[in,out] mtr Minitransaction
@param[out] sleeptime_ms Sleep time
@@ -1636,23 +1577,30 @@ fil_crypt_get_page_throttle_func(
mtr_t* mtr,
ulint* sleeptime_ms,
const char* file,
- ulint line)
+ unsigned line)
{
fil_space_t* space = state->space;
- ulint zip_size = fsp_flags_get_zip_size(space->flags);
+ const page_size_t page_size = page_size_t(space->flags);
+ const page_id_t page_id(space->id, offset);
ut_ad(space->n_pending_ops > 0);
- buf_block_t* block = buf_page_try_get_func(space->id, offset, RW_X_LATCH,
- true,
- file, line, mtr);
+ /* Before reading from tablespace we need to make sure that
+ the tablespace is not about to be dropped. */
+ if (space->is_stopping()) {
+ return NULL;
+ }
+
+ dberr_t err = DB_SUCCESS;
+ buf_block_t* block = buf_page_get_gen(page_id, page_size, RW_X_LATCH,
+ NULL,
+ BUF_PEEK_IF_IN_POOL, file, line,
+ mtr, &err);
if (block != NULL) {
/* page was in buffer pool */
state->crypt_stat.pages_read_from_cache++;
return block;
}
- /* Before reading from tablespace we need to make sure that
- tablespace exists and is not is just being dropped. */
if (space->is_stopping()) {
return NULL;
}
@@ -1660,10 +1608,10 @@ fil_crypt_get_page_throttle_func(
state->crypt_stat.pages_read_from_disk++;
const ulonglong start = my_interval_timer();
- block = buf_page_get_gen(space->id, zip_size, offset,
+ block = buf_page_get_gen(page_id, page_size,
RW_X_LATCH,
NULL, BUF_GET_POSSIBLY_FREED,
- file, line, mtr);
+ file, line, mtr, &err);
const ulonglong end = my_interval_timer();
state->cnt_waited++;
@@ -1674,7 +1622,7 @@ fil_crypt_get_page_throttle_func(
/* average page load */
ulint add_sleeptime_ms = 0;
- ulint avg_wait_time_us = state->sum_waited_us / state->cnt_waited;
+ ulint avg_wait_time_us =ulint(state->sum_waited_us / state->cnt_waited);
ulint alloc_wait_us = 1000000 / state->allocated_iops;
if (avg_wait_time_us < alloc_wait_us) {
@@ -1697,7 +1645,6 @@ note: innodb locks fil_space_latch and then block when allocating page
but locks block and then fil_space_latch when freeing page.
@param[in,out] state Rotation state
-@param[in] zip_size Compressed size or 0
@param[in] offset Page offset
@param[in,out] mtr Minitransaction
@param[out] allocation_status Allocation status
@@ -1708,7 +1655,6 @@ static
buf_block_t*
btr_scrub_get_block_and_allocation_status(
rotate_thread_t* state,
- uint zip_size,
ulint offset,
mtr_t* mtr,
btr_scrub_page_allocation_status_t *allocation_status,
@@ -1719,11 +1665,10 @@ btr_scrub_get_block_and_allocation_status(
fil_space_t* space = state->space;
ut_ad(space->n_pending_ops > 0);
- ut_ad(zip_size == fsp_flags_get_zip_size(space->flags));
mtr_start(&local_mtr);
- *allocation_status = fsp_page_is_free(space->id, offset, &local_mtr) ?
+ *allocation_status = fseg_page_is_free(space, offset) ?
BTR_SCRUB_PAGE_FREE :
BTR_SCRUB_PAGE_ALLOCATED;
@@ -1768,7 +1713,6 @@ fil_crypt_rotate_page(
fil_space_t*space = state->space;
ulint space_id = space->id;
ulint offset = state->offset;
- const uint zip_size = fsp_flags_get_zip_size(space->flags);
ulint sleeptime_ms = 0;
fil_space_crypt_t *crypt_data = space->crypt_data;
@@ -1789,13 +1733,10 @@ fil_crypt_rotate_page(
}
mtr_t mtr;
- mtr_start(&mtr);
- buf_block_t* block = fil_crypt_get_page_throttle(state,
- offset, &mtr,
- &sleeptime_ms);
-
- if (block) {
-
+ mtr.start();
+ if (buf_block_t* block = fil_crypt_get_page_throttle(state,
+ offset, &mtr,
+ &sleeptime_ms)) {
bool modified = false;
int needs_scrubbing = BTR_SCRUB_SKIP_PAGE;
lsn_t block_lsn = block->page.newest_modification;
@@ -1805,9 +1746,9 @@ fil_crypt_rotate_page(
if (space->is_stopping()) {
/* The tablespace is closing (in DROP TABLE or
TRUNCATE TABLE or similar): avoid further access */
- } else if (!*reinterpret_cast<uint32_t*>(FIL_PAGE_OFFSET
- + frame)) {
- /* It looks like this page was never
+ } else if (!kv && !*reinterpret_cast<uint16_t*>
+ (&frame[FIL_PAGE_TYPE])) {
+ /* It looks like this page is not
allocated. Because key rotation is accessing
pages in a pattern that is unlike the normal
B-tree and undo log access pattern, we cannot
@@ -1817,14 +1758,27 @@ fil_crypt_rotate_page(
tablespace latch before acquiring block->lock,
then the fseg_page_is_free() information
could be stale already. */
- ut_ad(kv == 0);
- ut_ad(page_get_space_id(frame) == 0);
+
+ /* If the data file was originally created
+ before MariaDB 10.0 or MySQL 5.6, some
+ allocated data pages could carry 0 in
+ FIL_PAGE_TYPE. The FIL_PAGE_TYPE on those
+ pages will be updated in
+ buf_flush_init_for_writing() when the page
+ is modified the next time.
+
+ Also, when the doublewrite buffer pages are
+ allocated on bootstrap in a non-debug build,
+ some dummy pages will be allocated, with 0 in
+ the FIL_PAGE_TYPE. Those pages should be
+ skipped from key rotation forever. */
} else if (fil_crypt_needs_rotation(
crypt_data,
kv,
key_state->key_version,
key_state->rotate_key_age)) {
+ mtr.set_named_space(space);
modified = true;
/* force rotation by dummy updating page */
@@ -1845,21 +1799,23 @@ fil_crypt_rotate_page(
BTR_SCRUB_PAGE_ALLOCATION_UNKNOWN);
}
- mtr_commit(&mtr);
- lsn_t end_lsn = mtr.end_lsn;
+ mtr.commit();
+ lsn_t end_lsn = mtr.commit_lsn();
if (needs_scrubbing == BTR_SCRUB_PAGE) {
- mtr_start(&mtr);
+ mtr.start();
/*
* refetch page and allocation status
*/
btr_scrub_page_allocation_status_t allocated;
+
block = btr_scrub_get_block_and_allocation_status(
- state, zip_size, offset, &mtr,
+ state, offset, &mtr,
&allocated,
&sleeptime_ms);
if (block) {
+ mtr.set_named_space(space);
/* get required table/index and index-locks */
needs_scrubbing = btr_scrub_recheck_page(
@@ -1869,7 +1825,7 @@ fil_crypt_rotate_page(
/* we need to refetch it once more now that we have
* index locked */
block = btr_scrub_get_block_and_allocation_status(
- state, zip_size, offset, &mtr,
+ state, offset, &mtr,
&allocated,
&sleeptime_ms);
@@ -1919,9 +1875,11 @@ fil_crypt_rotate_page(
}
} else {
/* If block read failed mtr memo and log should be empty. */
- ut_ad(dyn_array_get_data_size(&mtr.memo) == 0);
- ut_ad(dyn_array_get_data_size(&mtr.log) == 0);
- mtr_commit(&mtr);
+ ut_ad(!mtr.has_modifications());
+ ut_ad(!mtr.is_dirty());
+ ut_ad(mtr.get_memo()->size() == 0);
+ ut_ad(mtr.get_log()->size() == 0);
+ mtr.commit();
}
if (sleeptime_ms) {
@@ -1942,7 +1900,8 @@ fil_crypt_rotate_pages(
rotate_thread_t* state)
{
ulint space = state->space->id;
- ulint end = state->offset + state->batch;
+ ulint end = std::min(state->offset + state->batch,
+ state->space->free_limit);
ut_ad(state->space->n_pending_ops > 0);
@@ -1995,7 +1954,7 @@ fil_crypt_flush_space(
const ulonglong start = my_interval_timer();
do {
- success = buf_flush_list(ULINT_MAX, end_lsn, &n_pages);
+ success = buf_flush_lists(ULINT_MAX, end_lsn, &n_pages);
buf_flush_wait_batch_end(NULL, BUF_FLUSH_LIST);
sum_pages += n_pages;
} while (!success && !space->is_stopping());
@@ -2015,22 +1974,25 @@ fil_crypt_flush_space(
crypt_data->type = CRYPT_SCHEME_UNENCRYPTED;
}
- if (!space->is_stopping()) {
- /* update page 0 */
- mtr_t mtr;
- mtr_start(&mtr);
-
- const uint zip_size = fsp_flags_get_zip_size(state->space->flags);
+ if (space->is_stopping()) {
+ return;
+ }
- buf_block_t* block = buf_page_get_gen(space->id, zip_size, 0,
- RW_X_LATCH, NULL, BUF_GET,
- __FILE__, __LINE__, &mtr);
- byte* frame = buf_block_get_frame(block);
+ /* update page 0 */
+ mtr_t mtr;
+ mtr.start();
- crypt_data->write_page0(frame, &mtr);
+ dberr_t err;
- mtr_commit(&mtr);
+ if (buf_block_t* block = buf_page_get_gen(
+ page_id_t(space->id, 0), page_size_t(space->flags),
+ RW_X_LATCH, NULL, BUF_GET,
+ __FILE__, __LINE__, &mtr, &err)) {
+ mtr.set_named_space(space);
+ crypt_data->write_page0(space, block->frame, &mtr);
}
+
+ mtr.commit();
}
/***********************************************************************
@@ -2127,13 +2089,8 @@ A thread which monitors global key state and rotates tablespaces accordingly
@return a dummy parameter */
extern "C" UNIV_INTERN
os_thread_ret_t
-DECLARE_THREAD(fil_crypt_thread)(
-/*=============================*/
- void* arg __attribute__((unused))) /*!< in: a dummy parameter required
- * by os_thread_create */
+DECLARE_THREAD(fil_crypt_thread)(void*)
{
- UT_NOT_USED(arg);
-
mutex_enter(&fil_crypt_threads_mutex);
uint thread_no = srv_n_fil_crypt_threads_started;
srv_n_fil_crypt_threads_started++;
@@ -2245,7 +2202,7 @@ DECLARE_THREAD(fil_crypt_thread)(
/* We count the number of threads in os_thread_exit(). A created
thread should always use that to exit and not use return() to exit. */
- os_thread_exit(NULL);
+ os_thread_exit();
OS_THREAD_DUMMY_RETURN;
}
@@ -2270,10 +2227,10 @@ fil_crypt_set_thread_cnt(
for (uint i = 0; i < add; i++) {
os_thread_id_t rotation_thread_id;
os_thread_create(fil_crypt_thread, NULL, &rotation_thread_id);
-
- ib_logf(IB_LOG_LEVEL_INFO,
- "Creating #%d encryption thread id %lu total threads %u.",
- i+1, os_thread_pf(rotation_thread_id), new_cnt);
+ ib::info() << "Creating #"
+ << i+1 << " encryption thread id "
+ << os_thread_pf(rotation_thread_id)
+ << " total threads " << new_cnt << ".";
}
} else if (new_cnt < srv_n_fil_crypt_threads) {
srv_n_fil_crypt_threads = new_cnt;
@@ -2294,6 +2251,64 @@ fil_crypt_set_thread_cnt(
}
}
+/** Initialize the tablespace rotation_list
+if innodb_encryption_rotate_key_age=0. */
+static void fil_crypt_rotation_list_fill()
+{
+ ut_ad(mutex_own(&fil_system->mutex));
+
+ for (fil_space_t* space = UT_LIST_GET_FIRST(fil_system->space_list);
+ space != NULL;
+ space = UT_LIST_GET_NEXT(space_list, space)) {
+ if (space->purpose != FIL_TYPE_TABLESPACE
+ || space->is_in_rotation_list()
+ || space->is_stopping()
+ || UT_LIST_GET_LEN(space->chain) == 0) {
+ continue;
+ }
+
+ /* Ensure that crypt_data has been initialized. */
+ if (!space->size) {
+ /* Protect the tablespace while we may
+ release fil_system->mutex. */
+ space->n_pending_ops++;
+ ut_d(const fil_space_t* s=)
+ fil_system->read_page0(space->id);
+ ut_ad(!s || s == space);
+ space->n_pending_ops--;
+ if (!space->size) {
+ /* Page 0 was not loaded.
+ Skip this tablespace. */
+ continue;
+ }
+ }
+
+ /* Skip ENCRYPTION!=DEFAULT tablespaces. */
+ if (space->crypt_data
+ && !space->crypt_data->is_default_encryption()) {
+ continue;
+ }
+
+ if (srv_encrypt_tables) {
+ /* Skip encrypted tablespaces if
+ innodb_encrypt_tables!=OFF */
+ if (space->crypt_data
+ && space->crypt_data->min_key_version) {
+ continue;
+ }
+ } else {
+ /* Skip unencrypted tablespaces if
+ innodb_encrypt_tables=OFF */
+ if (!space->crypt_data
+ || !space->crypt_data->min_key_version) {
+ continue;
+ }
+ }
+
+ fil_system->rotation_list.push_back(*space);
+ }
+}
+
/*********************************************************************
Adjust max key age
@param[in] val New max key age */
@@ -2302,7 +2317,12 @@ void
fil_crypt_set_rotate_key_age(
uint val)
{
+ mutex_enter(&fil_system->mutex);
srv_fil_crypt_rotate_key_age = val;
+ if (val == 0) {
+ fil_crypt_rotation_list_fill();
+ }
+ mutex_exit(&fil_system->mutex);
os_event_set(fil_crypt_threads_event);
}
@@ -2326,7 +2346,16 @@ void
fil_crypt_set_encrypt_tables(
uint val)
{
+ mutex_enter(&fil_system->mutex);
+
srv_encrypt_tables = val;
+
+ if (srv_fil_crypt_rotate_key_age == 0) {
+ fil_crypt_rotation_list_fill();
+ }
+
+ mutex_exit(&fil_system->mutex);
+
os_event_set(fil_crypt_threads_event);
}
@@ -2336,12 +2365,11 @@ UNIV_INTERN
void
fil_crypt_threads_init()
{
- ut_ad(mutex_own(&fil_system->mutex));
if (!fil_crypt_threads_inited) {
- fil_crypt_event = os_event_create();
- fil_crypt_threads_event = os_event_create();
- mutex_create(fil_crypt_threads_mutex_key,
- &fil_crypt_threads_mutex, SYNC_NO_ORDER_CHECK);
+ fil_crypt_event = os_event_create(0);
+ fil_crypt_threads_event = os_event_create(0);
+ mutex_create(LATCH_ID_FIL_CRYPT_THREADS_MUTEX,
+ &fil_crypt_threads_mutex);
uint cnt = srv_n_fil_crypt_threads;
srv_n_fil_crypt_threads = 0;
@@ -2360,10 +2388,8 @@ fil_crypt_threads_cleanup()
return;
}
ut_a(!srv_n_fil_crypt_threads_started);
- os_event_free(fil_crypt_event);
- fil_crypt_event = NULL;
- os_event_free(fil_crypt_threads_event);
- fil_crypt_threads_event = NULL;
+ os_event_destroy(fil_crypt_event);
+ os_event_destroy(fil_crypt_threads_event);
mutex_free(&fil_crypt_threads_mutex);
fil_crypt_threads_inited = false;
}
@@ -2376,22 +2402,22 @@ void
fil_space_crypt_close_tablespace(
const fil_space_t* space)
{
- if (!srv_encrypt_tables || !space->crypt_data
- || srv_n_fil_crypt_threads == 0) {
+ fil_space_crypt_t* crypt_data = space->crypt_data;
+
+ if (!crypt_data || srv_n_fil_crypt_threads == 0
+ || !fil_crypt_threads_inited) {
return;
}
mutex_enter(&fil_crypt_threads_mutex);
- fil_space_crypt_t* crypt_data = space->crypt_data;
-
time_t start = time(0);
time_t last = start;
mutex_enter(&crypt_data->mutex);
mutex_exit(&fil_crypt_threads_mutex);
- uint cnt = crypt_data->rotate_state.active_threads;
+ ulint cnt = crypt_data->rotate_state.active_threads;
bool flushing = crypt_data->rotate_state.flushing;
while (cnt > 0 || flushing) {
@@ -2412,10 +2438,13 @@ fil_space_crypt_close_tablespace(
time_t now = time(0);
if (now >= last + 30) {
- ib_logf(IB_LOG_LEVEL_WARN,
- "Waited %ld seconds to drop space: %s (" ULINTPF
- ") active threads %u flushing=%d.",
- (long)(now - start), space->name, space->id, cnt, flushing);
+ ib::warn() << "Waited "
+ << now - start
+ << " seconds to drop space: "
+ << space->name << " ("
+ << space->id << ") active threads "
+ << cnt << "flushing="
+ << flushing << ".";
last = now;
}
}
@@ -2436,7 +2465,14 @@ fil_space_crypt_get_status(
memset(status, 0, sizeof(*status));
ut_ad(space->n_pending_ops > 0);
- fil_crypt_read_crypt_data(const_cast<fil_space_t*>(space));
+
+ /* If there is no crypt data and we have not yet read
+ page 0 for this tablespace, we need to read it before
+ we can continue. */
+ if (!space->crypt_data) {
+ fil_crypt_read_crypt_data(const_cast<fil_space_t*>(space));
+ }
+
status->space = ULINT_UNDEFINED;
if (fil_space_crypt_t* crypt_data = space->crypt_data) {
@@ -2499,7 +2535,7 @@ fil_space_get_scrub_status(
status->space = space->id;
if (crypt_data != NULL) {
- status->compressed = fsp_flags_get_zip_size(space->flags) > 0;
+ status->compressed = FSP_FLAGS_GET_ZIP_SSIZE(space->flags) > 0;
mutex_enter(&crypt_data->mutex);
status->last_scrub_completed =
crypt_data->rotate_state.scrubbing.last_scrub_completed;
@@ -2519,10 +2555,9 @@ fil_space_get_scrub_status(
mutex_exit(&crypt_data->mutex);
}
}
+#endif /* UNIV_INNOCHECKSUM */
-#endif /* !UNIV_INNOCHECKSUM */
-
-/*********************************************************************
+/**
Verify that post encryption checksum match calculated checksum.
This function should be called only if tablespace contains crypt_data
metadata (this is strong indication that tablespace is encrypted).
@@ -2530,11 +2565,11 @@ Function also verifies that traditional checksum does not match
calculated checksum as if it does page could be valid unencrypted,
encrypted, or corrupted.
-@param[in] page Page to verify
-@param[in] zip_size zip size
+@param[in,out] page page frame (checksum is temporarily modified)
+@param[in] page_size page size
@return whether the encrypted page is OK */
-UNIV_INTERN
-bool fil_space_verify_crypt_checksum(const byte* page, ulint zip_size)
+bool
+fil_space_verify_crypt_checksum(const byte* page, const page_size_t& page_size)
{
ut_ad(mach_read_from_4(page + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION));
@@ -2555,12 +2590,23 @@ bool fil_space_verify_crypt_checksum(const byte* page, ulint zip_size)
switch (srv_checksum_algorithm_t(srv_checksum_algorithm)) {
case SRV_CHECKSUM_ALGORITHM_STRICT_CRC32:
- if (zip_size) {
+ if (page_size.is_compressed()) {
return checksum == page_zip_calc_checksum(
- page, zip_size, SRV_CHECKSUM_ALGORITHM_CRC32);
+ page, page_size.physical(),
+ SRV_CHECKSUM_ALGORITHM_CRC32)
+#ifdef INNODB_BUG_ENDIAN_CRC32
+ || checksum == page_zip_calc_checksum(
+ page, page_size.physical(),
+ SRV_CHECKSUM_ALGORITHM_CRC32, true)
+#endif
+ ;
}
- return checksum == buf_calc_page_crc32(page);
+ return checksum == buf_calc_page_crc32(page)
+#ifdef INNODB_BUG_ENDIAN_CRC32
+ || checksum == buf_calc_page_crc32(page, true)
+#endif
+ ;
case SRV_CHECKSUM_ALGORITHM_STRICT_NONE:
/* Starting with MariaDB 10.1.25, 10.2.7, 10.3.1,
due to MDEV-12114, fil_crypt_calculate_checksum()
@@ -2578,15 +2624,24 @@ bool fil_space_verify_crypt_checksum(const byte* page, ulint zip_size)
if (checksum == BUF_NO_CHECKSUM_MAGIC) {
return true;
}
- if (zip_size) {
+ if (page_size.is_compressed()) {
return checksum == page_zip_calc_checksum(
- page, zip_size, SRV_CHECKSUM_ALGORITHM_CRC32)
+ page, page_size.physical(),
+ SRV_CHECKSUM_ALGORITHM_CRC32)
+#ifdef INNODB_BUG_ENDIAN_CRC32
|| checksum == page_zip_calc_checksum(
- page, zip_size,
+ page, page_size.physical(),
+ SRV_CHECKSUM_ALGORITHM_CRC32, true)
+#endif
+ || checksum == page_zip_calc_checksum(
+ page, page_size.physical(),
SRV_CHECKSUM_ALGORITHM_INNODB);
}
return checksum == buf_calc_page_crc32(page)
+#ifdef INNODB_BUG_ENDIAN_CRC32
+ || checksum == buf_calc_page_crc32(page, true)
+#endif
|| checksum == buf_calc_page_new_checksum(page);
}
diff --git a/storage/innobase/fil/fil0fil.cc b/storage/innobase/fil/fil0fil.cc
index 7234a6c4102..d4b0d3f4377 100644
--- a/storage/innobase/fil/fil0fil.cc
+++ b/storage/innobase/fil/fil0fil.cc
@@ -27,43 +27,44 @@ Created 10/25/1995 Heikki Tuuri
#include "fil0fil.h"
#include "fil0crypt.h"
-#include <debug_sync.h>
-#include <my_dbug.h>
-
-#include "mem0mem.h"
-#include "hash0hash.h"
-#include "os0file.h"
-#include "mach0data.h"
+#include "btr0btr.h"
+#include "btr0sea.h"
#include "buf0buf.h"
-#include "buf0flu.h"
-#include "log0recv.h"
+#include "dict0boot.h"
+#include "dict0dict.h"
+#include "dict0load.h"
+#include "fsp0file.h"
#include "fsp0fsp.h"
-#include "srv0srv.h"
-#include "srv0start.h"
-#include "mtr0mtr.h"
+#include "hash0hash.h"
+#include "log0log.h"
+#include "log0recv.h"
+#include "mach0data.h"
#include "mtr0log.h"
-#include "dict0dict.h"
-#include "page0page.h"
+#include "os0file.h"
#include "page0zip.h"
-#include "trx0sys.h"
-#include "row0mysql.h"
-#ifndef UNIV_HOTBACKUP
-# include "buf0lru.h"
-# include "ibuf0ibuf.h"
-# include "sync0sync.h"
-#else /* !UNIV_HOTBACKUP */
-# include "srv0srv.h"
-static ulint srv_data_read, srv_data_written;
-#endif /* !UNIV_HOTBACKUP */
-#include "zlib.h"
-#ifdef __linux__
-#include <linux/fs.h>
-#include <sys/ioctl.h>
-#include <fcntl.h>
-#endif
#include "row0mysql.h"
+#include "row0trunc.h"
+#include "srv0start.h"
+#include "trx0purge.h"
+#include "buf0lru.h"
+#include "ibuf0ibuf.h"
+#include "os0event.h"
+#include "sync0sync.h"
+#include "buf0flu.h"
+#include "os0api.h"
-MYSQL_PLUGIN_IMPORT extern my_bool lower_case_file_system;
+/** Tries to close a file in the LRU list. The caller must hold the fil_sys
+mutex.
+@return true if success, false if should retry later; since i/o's
+generally complete in < 100 ms, and as InnoDB writes at most 128 pages
+from the buffer pool in a batch, and then immediately flushes the
+files, there is a good chance that the next time we find a suitable
+node from the LRU list.
+@param[in] print_info if true, prints information why it
+ cannot close a file */
+static
+bool
+fil_try_to_close_file_in_LRU(bool print_info);
/*
IMPLEMENTATION OF THE TABLESPACE MEMORY CACHE
@@ -121,34 +122,24 @@ out of the LRU-list and keep a count of pending operations. When an operation
completes, we decrement the count and return the file node to the LRU-list if
the count drops to zero. */
-/** When mysqld is run, the default directory "." is the mysqld datadir,
-but in the MySQL Embedded Server Library and mysqlbackup it is not the default
-directory, and we must set the base file path explicitly */
-UNIV_INTERN const char* fil_path_to_mysql_datadir = ".";
+/** Reference to the server data directory. Usually it is the
+current working directory ".", but in the MySQL Embedded Server Library
+it is an absolute path. */
+const char* fil_path_to_mysql_datadir;
+
+/** Common InnoDB file extentions */
+const char* dot_ext[] = { "", ".ibd", ".isl", ".cfg" };
/** The number of fsyncs done to the log */
-UNIV_INTERN ulint fil_n_log_flushes = 0;
+ulint fil_n_log_flushes = 0;
/** Number of pending redo log flushes */
-UNIV_INTERN ulint fil_n_pending_log_flushes = 0;
+ulint fil_n_pending_log_flushes = 0;
/** Number of pending tablespace flushes */
-UNIV_INTERN ulint fil_n_pending_tablespace_flushes = 0;
-
-/** Number of files currently open */
-UNIV_INTERN ulint fil_n_file_opened = 0;
+ulint fil_n_pending_tablespace_flushes = 0;
/** The null file address */
-UNIV_INTERN fil_addr_t fil_addr_null = {FIL_NULL, 0};
-
-#ifdef UNIV_PFS_MUTEX
-/* Key to register fil_system_mutex with performance schema */
-UNIV_INTERN mysql_pfs_key_t fil_system_mutex_key;
-#endif /* UNIV_PFS_MUTEX */
-
-#ifdef UNIV_PFS_RWLOCK
-/* Key to register file space latch with performance schema */
-UNIV_INTERN mysql_pfs_key_t fil_space_latch_key;
-#endif /* UNIV_PFS_RWLOCK */
+fil_addr_t fil_addr_null = {FIL_NULL, 0};
/** The tablespace memory cache. This variable is NULL before the module is
initialized. */
@@ -158,19 +149,23 @@ UNIV_INTERN fil_system_t* fil_system = NULL;
UNIV_INTERN extern uint srv_fil_crypt_rotate_key_age;
UNIV_INTERN extern ib_mutex_t fil_crypt_threads_mutex;
-/** Determine if (i) is a user tablespace id or not. */
-# define fil_is_user_tablespace_id(i) (i != 0 \
- && !srv_is_undo_tablespace(i))
-
/** Determine if user has explicitly disabled fsync(). */
-#ifndef __WIN__
# define fil_buffering_disabled(s) \
- ((s)->purpose == FIL_TABLESPACE \
- && srv_unix_file_flush_method \
- == SRV_UNIX_O_DIRECT_NO_FSYNC)
-#else /* __WIN__ */
-# define fil_buffering_disabled(s) (0)
-#endif /* __WIN__ */
+ ((s)->purpose == FIL_TYPE_TABLESPACE \
+ && srv_file_flush_method \
+ == SRV_O_DIRECT_NO_FSYNC)
+
+/** Determine if the space id is a user tablespace id or not.
+@param[in] space_id Space ID to check
+@return true if it is a user tablespace ID */
+inline
+bool
+fil_is_user_tablespace_id(ulint space_id)
+{
+ return(space_id != TRX_SYS_SPACE
+ && space_id != SRV_TMP_SPACE_ID
+ && !srv_is_undo_tablespace(space_id));
+}
#ifdef UNIV_DEBUG
/** Try fil_validate() every this many times */
@@ -178,9 +173,9 @@ UNIV_INTERN extern ib_mutex_t fil_crypt_threads_mutex;
/******************************************************************//**
Checks the consistency of the tablespace cache some of the time.
-@return TRUE if ok or the check was skipped */
+@return true if ok or the check was skipped */
static
-ibool
+bool
fil_validate_skip(void)
/*===================*/
{
@@ -193,7 +188,7 @@ fil_validate_skip(void)
reduce the call frequency of the costly fil_validate() check
in debug builds. */
if (--fil_validate_count > 0) {
- return(TRUE);
+ return(true);
}
fil_validate_count = FIL_VALIDATE_SKIP;
@@ -203,15 +198,25 @@ fil_validate_skip(void)
/********************************************************************//**
Determines if a file node belongs to the least-recently-used list.
-@return TRUE if the file belongs to fil_system->LRU mutex. */
+@return true if the file belongs to fil_system->LRU mutex. */
UNIV_INLINE
-ibool
+bool
fil_space_belongs_in_lru(
/*=====================*/
const fil_space_t* space) /*!< in: file space */
{
- return(space->purpose == FIL_TABLESPACE
- && fil_is_user_tablespace_id(space->id));
+ switch (space->purpose) {
+ case FIL_TYPE_TEMPORARY:
+ case FIL_TYPE_LOG:
+ return(false);
+ case FIL_TYPE_TABLESPACE:
+ return(fil_is_user_tablespace_id(space->id));
+ case FIL_TYPE_IMPORT:
+ return(true);
+ }
+
+ ut_ad(0);
+ return(false);
}
/********************************************************************//**
@@ -229,96 +234,66 @@ fil_node_prepare_for_io(
fil_node_t* node, /*!< in: file node */
fil_system_t* system, /*!< in: tablespace memory cache */
fil_space_t* space); /*!< in: space */
-/********************************************************************//**
-Updates the data structures when an i/o operation finishes. Updates the
-pending i/o's field in the node appropriately. */
+
+/** Update the data structures when an i/o operation finishes.
+@param[in,out] node file node
+@param[in] type IO context */
static
void
-fil_node_complete_io(
-/*=================*/
- fil_node_t* node, /*!< in: file node */
- fil_system_t* system, /*!< in: tablespace memory cache */
- ulint type); /*!< in: OS_FILE_WRITE or OS_FILE_READ; marks
- the node as modified if
- type == OS_FILE_WRITE */
-/** Free a space object from the tablespace memory cache. Close the files in
-the chain but do not delete them. There must not be any pending i/o's or
-flushes on the files.
-The fil_system->mutex will be released.
-@param[in] id tablespace ID
-@param[in] x_latched whether the caller holds exclusive space->latch
-@return whether the tablespace existed */
-static
-bool
-fil_space_free_and_mutex_exit(ulint id, bool x_latched);
-/********************************************************************//**
-Reads data from a space to a buffer. Remember that the possible incomplete
+fil_node_complete_io(fil_node_t* node, const IORequest& type);
+
+/** Reads data from a space to a buffer. Remember that the possible incomplete
blocks at the end of file are ignored: they are not taken into account when
calculating the byte offset within a space.
+@param[in] page_id page id
+@param[in] page_size page size
+@param[in] byte_offset remainder of offset in bytes; in aio this
+must be divisible by the OS block size
+@param[in] len how many bytes to read; this must not cross a
+file boundary; in aio this must be a block size multiple
+@param[in,out] buf buffer where to store data read; in aio this
+must be appropriately aligned
@return DB_SUCCESS, or DB_TABLESPACE_DELETED if we are trying to do
i/o on a tablespace which does not exist */
UNIV_INLINE
dberr_t
fil_read(
-/*=====*/
- bool sync, /*!< in: true if synchronous aio is desired */
- ulint space_id, /*!< in: space id */
- ulint zip_size, /*!< in: compressed page size in bytes;
- 0 for uncompressed pages */
- ulint block_offset, /*!< in: offset in number of blocks */
- ulint byte_offset, /*!< in: remainder of offset in bytes; in aio
- this must be divisible by the OS block size */
- ulint len, /*!< in: how many bytes to read; this must not
- cross a file boundary; in aio this must be a
- block size multiple */
- void* buf, /*!< in/out: buffer where to store data read;
- in aio this must be appropriately aligned */
- void* message, /*!< in: message for aio handler if non-sync
- aio used, else ignored */
- ulint* write_size) /*!< in/out: Actual write size initialized
- after fist successfull trim
- operation for this page and if
- initialized we do not trim again if
- actual page size does not decrease. */
+ const page_id_t page_id,
+ const page_size_t& page_size,
+ ulint byte_offset,
+ ulint len,
+ void* buf)
{
- return(fil_io(OS_FILE_READ, sync, space_id, zip_size, block_offset,
- byte_offset, len, buf, message, write_size));
+ return(fil_io(IORequestRead, true, page_id, page_size,
+ byte_offset, len, buf, NULL));
}
-/********************************************************************//**
-Writes data to a space from a buffer. Remember that the possible incomplete
+/** Writes data to a space from a buffer. Remember that the possible incomplete
blocks at the end of file are ignored: they are not taken into account when
calculating the byte offset within a space.
+@param[in] page_id page id
+@param[in] page_size page size
+@param[in] byte_offset remainder of offset in bytes; in aio this
+must be divisible by the OS block size
+@param[in] len how many bytes to write; this must not cross
+a file boundary; in aio this must be a block size multiple
+@param[in] buf buffer from which to write; in aio this must
+be appropriately aligned
@return DB_SUCCESS, or DB_TABLESPACE_DELETED if we are trying to do
i/o on a tablespace which does not exist */
UNIV_INLINE
dberr_t
fil_write(
-/*======*/
- bool sync, /*!< in: true if synchronous aio is desired */
- ulint space_id, /*!< in: space id */
- ulint zip_size, /*!< in: compressed page size in bytes;
- 0 for uncompressed pages */
- ulint block_offset, /*!< in: offset in number of blocks */
- ulint byte_offset, /*!< in: remainder of offset in bytes; in aio
- this must be divisible by the OS block size */
- ulint len, /*!< in: how many bytes to write; this must
- not cross a file boundary; in aio this must
- be a block size multiple */
- void* buf, /*!< in: buffer from which to write; in aio
- this must be appropriately aligned */
- void* message, /*!< in: message for aio handler if non-sync
- aio used, else ignored */
- ulint* write_size) /*!< in/out: Actual write size initialized
- after fist successfull trim
- operation for this page and if
- initialized we do not trim again if
- actual page size does not decrease. */
+ const page_id_t page_id,
+ const page_size_t& page_size,
+ ulint byte_offset,
+ ulint len,
+ void* buf)
{
ut_ad(!srv_read_only_mode);
- return(fil_io(OS_FILE_WRITE, sync, space_id, zip_size, block_offset,
- byte_offset, len, buf, message, write_size));
+ return(fil_io(IORequestWrite, true, page_id, page_size,
+ byte_offset, len, buf, NULL));
}
/*******************************************************************//**
@@ -342,13 +317,12 @@ fil_space_get_by_id(
return(space);
}
-/*******************************************************************//**
-Returns the table space by a given name, NULL if not found. */
-UNIV_INLINE
+/** Look up a tablespace.
+@param[in] name tablespace name
+@return tablespace
+@retval NULL if not found */
fil_space_t*
-fil_space_get_by_name(
-/*==================*/
- const char* name) /*!< in: space name */
+fil_space_get_by_name(const char* name)
{
fil_space_t* space;
ulint fold;
@@ -365,44 +339,34 @@ fil_space_get_by_name(
return(space);
}
-#ifndef UNIV_HOTBACKUP
-/*******************************************************************//**
-Returns the version number of a tablespace, -1 if not found.
-@return version number, -1 if the tablespace does not exist in the
-memory cache */
-UNIV_INTERN
-ib_int64_t
-fil_space_get_version(
-/*==================*/
- ulint id) /*!< in: space id */
+/** Look up a tablespace.
+The caller should hold an InnoDB table lock or a MDL that prevents
+the tablespace from being dropped during the operation,
+or the caller should be in single-threaded crash recovery mode
+(no user connections that could drop tablespaces).
+If this is not the case, fil_space_acquire() and fil_space_release()
+should be used instead.
+@param[in] id tablespace ID
+@return tablespace, or NULL if not found */
+fil_space_t*
+fil_space_get(
+ ulint id)
{
- fil_space_t* space;
- ib_int64_t version = -1;
-
- ut_ad(fil_system);
-
mutex_enter(&fil_system->mutex);
-
- space = fil_space_get_by_id(id);
-
- if (space) {
- version = space->tablespace_version;
- }
-
+ fil_space_t* space = fil_space_get_by_id(id);
mutex_exit(&fil_system->mutex);
-
- return(version);
+ ut_ad(space == NULL || space->purpose != FIL_TYPE_LOG);
+ return(space);
}
-/*******************************************************************//**
-Returns the latch of a file space.
-@return latch protecting storage allocation */
-UNIV_INTERN
+/** Returns the latch of a file space.
+@param[in] id space id
+@param[out] flags tablespace flags
+@return latch protecting storage allocation */
rw_lock_t*
fil_space_get_latch(
-/*================*/
- ulint id, /*!< in: space id */
- ulint* flags) /*!< out: tablespace flags */
+ ulint id,
+ ulint* flags)
{
fil_space_t* space;
@@ -423,17 +387,14 @@ fil_space_get_latch(
return(&(space->latch));
}
-/*******************************************************************//**
-Returns the type of a file space.
-@return ULINT_UNDEFINED, or FIL_TABLESPACE or FIL_LOG */
-UNIV_INTERN
-ulint
+/** Gets the type of a file space.
+@param[in] id tablespace identifier
+@return file type */
+fil_type_t
fil_space_get_type(
-/*===============*/
- ulint id) /*!< in: space id */
+ ulint id)
{
fil_space_t* space;
- ulint type = ULINT_UNDEFINED;
ut_ad(fil_system);
@@ -441,369 +402,396 @@ fil_space_get_type(
space = fil_space_get_by_id(id);
+ ut_a(space);
+
mutex_exit(&fil_system->mutex);
- if (space) {
- type = space->purpose;
- }
+ return(space->purpose);
+}
- return(type);
+/** Note that a tablespace has been imported.
+It is initially marked as FIL_TYPE_IMPORT so that no logging is
+done during the import process when the space ID is stamped to each page.
+Now we change it to FIL_SPACE_TABLESPACE to start redo and undo logging.
+NOTE: temporary tablespaces are never imported.
+@param[in] id tablespace identifier */
+void
+fil_space_set_imported(
+ ulint id)
+{
+ ut_ad(fil_system != NULL);
+
+ mutex_enter(&fil_system->mutex);
+
+ fil_space_t* space = fil_space_get_by_id(id);
+ const fil_node_t* node = UT_LIST_GET_FIRST(space->chain);
+
+ ut_ad(space->purpose == FIL_TYPE_IMPORT);
+ space->purpose = FIL_TYPE_TABLESPACE;
+ space->atomic_write_supported = node->atomic_write
+ && srv_use_atomic_writes
+ && my_test_if_atomic_write(node->handle,
+ int(page_size_t(space->flags)
+ .physical()));
+ mutex_exit(&fil_system->mutex);
}
-#endif /* !UNIV_HOTBACKUP */
/**********************************************************************//**
Checks if all the file nodes in a space are flushed. The caller must hold
the fil_system mutex.
-@return true if all are flushed */
+@return true if all are flushed */
static
bool
fil_space_is_flushed(
/*=================*/
fil_space_t* space) /*!< in: space */
{
- fil_node_t* node;
-
ut_ad(mutex_own(&fil_system->mutex));
- node = UT_LIST_GET_FIRST(space->chain);
+ for (const fil_node_t* node = UT_LIST_GET_FIRST(space->chain);
+ node != NULL;
+ node = UT_LIST_GET_NEXT(chain, node)) {
- while (node) {
- if (node->modification_counter > node->flush_counter) {
+ if (node->needs_flush) {
ut_ad(!fil_buffering_disabled(space));
return(false);
}
-
- node = UT_LIST_GET_NEXT(chain, node);
}
return(true);
}
-/*******************************************************************//**
-Appends a new file to the chain of files of a space. File must be closed.
-@return pointer to the file name, or NULL on error */
-UNIV_INTERN
-char*
-fil_node_create(
-/*============*/
- const char* name, /*!< in: file name (file must be closed) */
- ulint size, /*!< in: file size in database blocks, rounded
- downwards to an integer */
- ulint id, /*!< in: space id where to append */
- ibool is_raw) /*!< in: TRUE if a raw device or
- a raw disk partition */
+
+/** Append a file to the chain of files of a space.
+@param[in] name file name of a file that is not open
+@param[in] handle file handle, or OS_FILE_CLOSED
+@param[in] size file size in entire database pages
+@param[in] is_raw whether this is a raw device
+@param[in] atomic_write true if atomic write could be enabled
+@param[in] max_pages maximum number of pages in file,
+or ULINT_MAX for unlimited
+@return file object */
+fil_node_t* fil_space_t::add(const char* name, pfs_os_file_t handle,
+ ulint size, bool is_raw, bool atomic_write,
+ ulint max_pages)
{
fil_node_t* node;
- fil_space_t* space;
- ut_a(fil_system);
- ut_a(name);
+ ut_ad(name != NULL);
+ ut_ad(fil_system != NULL);
- mutex_enter(&fil_system->mutex);
+ node = reinterpret_cast<fil_node_t*>(ut_zalloc_nokey(sizeof(*node)));
- node = static_cast<fil_node_t*>(mem_zalloc(sizeof(fil_node_t)));
+ node->handle = handle;
node->name = mem_strdup(name);
ut_a(!is_raw || srv_start_raw_disk_in_use);
- node->sync_event = os_event_create();
node->is_raw_disk = is_raw;
+
node->size = size;
+
node->magic_n = FIL_NODE_MAGIC_N;
- space = fil_space_get_by_id(id);
+ node->init_size = size;
+ node->max_size = max_pages;
- if (!space) {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Error: Could not find tablespace %lu for\n"
- "InnoDB: file ", (ulong) id);
- ut_print_filename(stderr, name);
- fputs(" in the tablespace memory cache.\n", stderr);
- mem_free(node->name);
+ node->space = this;
- mem_free(node);
+ node->atomic_write = atomic_write;
- mutex_exit(&fil_system->mutex);
-
- return(NULL);
+ mutex_enter(&fil_system->mutex);
+ this->size += size;
+ UT_LIST_ADD_LAST(chain, node);
+ if (node->is_open()) {
+ fil_system->n_open++;
}
+ mutex_exit(&fil_system->mutex);
- space->size += size;
-
- node->space = space;
+ return node;
+}
- UT_LIST_ADD_LAST(chain, space->chain, node);
+/** Read the first page of a data file.
+@param[in] first whether this is the very first read
+@return whether the page was found valid */
+bool fil_node_t::read_page0(bool first)
+{
+ ut_ad(mutex_own(&fil_system->mutex));
+ ut_a(space->purpose != FIL_TYPE_LOG);
+ const page_size_t page_size(space->flags);
+ const ulint psize = page_size.physical();
- if (id < SRV_LOG_SPACE_FIRST_ID && fil_system->max_assigned_id < id) {
+ os_offset_t size_bytes = os_file_get_size(handle);
+ ut_a(size_bytes != (os_offset_t) -1);
+ const ulint min_size = FIL_IBD_FILE_INITIAL_SIZE * psize;
- fil_system->max_assigned_id = id;
+ if (size_bytes < min_size) {
+ ib::error() << "The size of the file " << name
+ << " is only " << size_bytes
+ << " bytes, should be at least " << min_size;
+ return false;
}
- mutex_exit(&fil_system->mutex);
+ byte* buf2 = static_cast<byte*>(ut_malloc_nokey(2 * psize));
- return(node->name);
-}
-
-/********************************************************************//**
-Opens a file of a node of a tablespace. The caller must own the fil_system
-mutex.
-@return false if the file can't be opened, otherwise true */
-static
-bool
-fil_node_open_file(
-/*===============*/
- fil_node_t* node, /*!< in: file node */
- fil_system_t* system, /*!< in: tablespace memory cache */
- fil_space_t* space) /*!< in: space */
-{
- os_offset_t size_bytes;
- ibool ret;
- ibool success;
- byte* buf2;
- byte* page;
+ /* Align the memory for file i/o if we might have O_DIRECT set */
+ byte* page = static_cast<byte*>(ut_align(buf2, psize));
+ IORequest request(IORequest::READ);
+ if (os_file_read(request, handle, page, 0, psize) != DB_SUCCESS) {
+ ib::error() << "Unable to read first page of file " << name;
+ ut_free(buf2);
+ return false;
+ }
+ srv_stats.page0_read.add(1);
+ const ulint space_id = fsp_header_get_space_id(page);
+ ulint flags = fsp_header_get_flags(page);
+ const ulint size = fsp_header_get_field(page, FSP_SIZE);
+ const ulint free_limit = fsp_header_get_field(page, FSP_FREE_LIMIT);
+ const ulint free_len = flst_get_len(FSP_HEADER_OFFSET + FSP_FREE
+ + page);
+ /* Try to read crypt_data from page 0 if it is not yet read. */
+ if (!space->crypt_data) {
+ space->crypt_data = fil_space_read_crypt_data(page_size, page);
+ }
+ ut_free(buf2);
- ut_ad(mutex_own(&(system->mutex)));
- ut_a(node->n_pending == 0);
- ut_a(node->open == FALSE);
+ if (!fsp_flags_is_valid(flags, space->id)) {
+ ulint cflags = fsp_flags_convert_from_101(flags);
+ if (cflags == ULINT_UNDEFINED
+ || (cflags ^ space->flags) & ~FSP_FLAGS_MEM_MASK) {
+ ib::error()
+ << "Expected tablespace flags "
+ << ib::hex(space->flags)
+ << " but found " << ib::hex(flags)
+ << " in the file " << name;
+ return false;
+ }
- if (node->size == 0) {
- /* It must be a single-table tablespace and we do not know the
- size of the file yet. First we open the file in the normal
- mode, no async I/O here, for simplicity. Then do some checks,
- and close the file again.
- NOTE that we could not use the simple file read function
- os_file_read() in Windows to read from a file opened for
- async I/O! */
+ flags = cflags;
+ }
- node->handle = os_file_create_simple_no_error_handling(
- innodb_file_data_key, node->name, OS_FILE_OPEN,
- OS_FILE_READ_ONLY, &success, 0);
+ if (UNIV_UNLIKELY(space_id != space->id)) {
+ ib::error() << "Expected tablespace id " << space->id
+ << " but found " << space_id
+ << " in the file " << name;
+ return false;
+ }
- if (!success) {
- /* The following call prints an error message */
- os_file_get_last_error(true);
+ if (first) {
+ ut_ad(space->id != TRX_SYS_SPACE);
- ib_logf(IB_LOG_LEVEL_WARN, "InnoDB: Error: cannot "
- "open %s\n. InnoDB: Have you deleted .ibd "
- "files under a running mysqld server?\n",
- node->name);
+ /* Truncate the size to a multiple of extent size. */
+ ulint mask = psize * FSP_EXTENT_SIZE - 1;
- return(false);
+ if (size_bytes <= mask) {
+ /* .ibd files start smaller than an
+ extent size. Do not truncate valid data. */
+ } else {
+ size_bytes &= ~os_offset_t(mask);
}
- size_bytes = os_file_get_size(node->handle);
- ut_a(size_bytes != (os_offset_t) -1);
-
- node->file_block_size = os_file_get_block_size(
- node->handle, node->name);
- space->file_block_size = node->file_block_size;
+ this->size = ulint(size_bytes / psize);
+ space->size += this->size;
+ } else if (space->id != TRX_SYS_SPACE || space->size_in_header) {
+ /* If this is not the first-time open, do nothing.
+ For the system tablespace, we always get invoked as
+ first=false, so we detect the true first-time-open based
+ on size_in_header and proceed to initiailze the data. */
+ return true;
+ }
+
+ ut_ad(space->free_limit == 0 || space->free_limit == free_limit);
+ ut_ad(space->free_len == 0 || space->free_len == free_len);
+ space->size_in_header = size;
+ space->free_limit = free_limit;
+ space->free_len = free_len;
+ return true;
+}
-#ifdef UNIV_HOTBACKUP
- if (space->id == 0) {
- node->size = (ulint) (size_bytes / UNIV_PAGE_SIZE);
- os_file_close(node->handle);
- goto add_size;
- }
-#endif /* UNIV_HOTBACKUP */
- ut_a(space->purpose != FIL_LOG);
- ut_a(fil_is_user_tablespace_id(space->id));
-
- if (size_bytes < FIL_IBD_FILE_INITIAL_SIZE * UNIV_PAGE_SIZE) {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "The size of the file %s is only " UINT64PF
- " bytes, should be at least " ULINTPF,
- node->name, size_bytes,
- FIL_IBD_FILE_INITIAL_SIZE * UNIV_PAGE_SIZE);
- os_file_close(node->handle);
- return(false);
- }
+/** Open a file node of a tablespace.
+The caller must own the fil_system mutex.
+@param[in,out] node File node
+@return false if the file can't be opened, otherwise true */
+static bool fil_node_open_file(fil_node_t* node)
+{
+ bool success;
+ bool read_only_mode;
+ fil_space_t* space = node->space;
- /* Read the first page of the tablespace */
+ ut_ad(mutex_own(&fil_system->mutex));
+ ut_a(node->n_pending == 0);
+ ut_a(!node->is_open());
- buf2 = static_cast<byte*>(ut_malloc(2 * UNIV_PAGE_SIZE));
- /* Align the memory for file i/o if we might have O_DIRECT
- set */
- page = static_cast<byte*>(ut_align(buf2, UNIV_PAGE_SIZE));
+ read_only_mode = space->purpose != FIL_TYPE_TEMPORARY
+ && srv_read_only_mode;
- success = os_file_read(node->handle, page, 0, UNIV_PAGE_SIZE);
- srv_stats.page0_read.add(1);
+ const bool first_time_open = node->size == 0;
- const ulint space_id = fsp_header_get_space_id(page);
- ulint flags = fsp_header_get_flags(page);
+ if (first_time_open
+ || (space->purpose == FIL_TYPE_TABLESPACE
+ && node == UT_LIST_GET_FIRST(space->chain)
+ && srv_startup_is_before_trx_rollback_phase
+ && !undo::Truncate::was_tablespace_truncated(space->id))) {
+ /* We do not know the size of the file yet. First we
+ open the file in the normal mode, no async I/O here,
+ for simplicity. Then do some checks, and close the
+ file again. NOTE that we could not use the simple
+ file read function os_file_read() in Windows to read
+ from a file opened for async I/O! */
- /* Try to read crypt_data from page 0 if it is not yet
- read. */
- if (!node->space->crypt_data) {
- const ulint offset = fsp_header_get_crypt_offset(
- fsp_flags_get_zip_size(flags));
- node->space->crypt_data = fil_space_read_crypt_data(space_id, page, offset);
- }
+retry:
+ node->handle = os_file_create(
+ innodb_data_file_key, node->name,
+ node->is_raw_disk
+ ? OS_FILE_OPEN_RAW | OS_FILE_ON_ERROR_NO_EXIT
+ : OS_FILE_OPEN | OS_FILE_ON_ERROR_NO_EXIT,
+ OS_FILE_AIO, OS_DATA_FILE, read_only_mode, &success);
- ut_free(buf2);
- os_file_close(node->handle);
-
- if (!fsp_flags_is_valid(flags, space->id)) {
- ulint cflags = fsp_flags_convert_from_101(flags);
- if (cflags == ULINT_UNDEFINED) {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Expected tablespace flags 0x%x"
- " but found 0x%x in the file %s",
- int(space->flags), int(flags),
- node->name);
- return(false);
+ if (!success) {
+ /* The following call prints an error message */
+ ulint err = os_file_get_last_error(true);
+ if (err == EMFILE + 100) {
+ if (fil_try_to_close_file_in_LRU(true))
+ goto retry;
}
- flags = cflags;
- }
-
- if (UNIV_UNLIKELY(space_id != space->id)) {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "tablespace id is " ULINTPF " in the data dictionary"
- " but in file %s it is " ULINTPF "!\n",
- space->id, node->name, space_id);
+ ib::warn() << "Cannot open '" << node->name << "'."
+ " Have you deleted .ibd files under a"
+ " running mysqld server?";
return(false);
}
- if (ulint zip_size = fsp_flags_get_zip_size(flags)) {
- node->size = ulint(size_bytes / zip_size);
- } else {
- node->size = ulint(size_bytes / UNIV_PAGE_SIZE);
+ if (!node->read_page0(first_time_open)) {
+ os_file_close(node->handle);
+ node->handle = OS_FILE_CLOSED;
+ return false;
}
-
-#ifdef UNIV_HOTBACKUP
-add_size:
-#endif /* UNIV_HOTBACKUP */
- space->size += node->size;
- }
-
- ulint atomic_writes = FSP_FLAGS_GET_ATOMIC_WRITES(space->flags);
-
- /* printf("Opening file %s\n", node->name); */
-
- /* Open the file for reading and writing, in Windows normally in the
- unbuffered async I/O mode, though global variables may make
- os_file_create() to fall back to the normal file I/O mode. */
-
- if (space->purpose == FIL_LOG) {
- node->handle = os_file_create(innodb_file_log_key,
- node->name, OS_FILE_OPEN,
- OS_FILE_AIO, OS_LOG_FILE,
- &ret, atomic_writes);
- } else if (node->is_raw_disk) {
- node->handle = os_file_create(innodb_file_data_key,
- node->name,
- OS_FILE_OPEN_RAW,
- OS_FILE_AIO, OS_DATA_FILE,
- &ret, atomic_writes);
+ } else if (space->purpose == FIL_TYPE_LOG) {
+ node->handle = os_file_create(
+ innodb_log_file_key, node->name, OS_FILE_OPEN,
+ OS_FILE_AIO, OS_LOG_FILE, read_only_mode, &success);
} else {
- node->handle = os_file_create(innodb_file_data_key,
- node->name, OS_FILE_OPEN,
- OS_FILE_AIO, OS_DATA_FILE,
- &ret, atomic_writes);
- }
-
- if (node->file_block_size == 0) {
- node->file_block_size = os_file_get_block_size(
- node->handle, node->name);
- space->file_block_size = node->file_block_size;
- }
-
- ut_a(ret);
-
- node->open = TRUE;
-
- system->n_open++;
- fil_n_file_opened++;
+ node->handle = os_file_create(
+ innodb_data_file_key, node->name,
+ node->is_raw_disk
+ ? OS_FILE_OPEN_RAW | OS_FILE_ON_ERROR_NO_EXIT
+ : OS_FILE_OPEN | OS_FILE_ON_ERROR_NO_EXIT,
+ OS_FILE_AIO, OS_DATA_FILE, read_only_mode, &success);
+ }
+
+ if (space->purpose != FIL_TYPE_LOG) {
+ /*
+ For the temporary tablespace and during the
+ non-redo-logged adjustments in
+ IMPORT TABLESPACE, we do not care about
+ the atomicity of writes.
+
+ Atomic writes is supported if the file can be used
+ with atomic_writes (not log file), O_DIRECT is
+ used (tested in ha_innodb.cc) and the file is
+ device and file system that supports atomic writes
+ for the given block size
+ */
+ space->atomic_write_supported
+ = space->purpose == FIL_TYPE_TEMPORARY
+ || space->purpose == FIL_TYPE_IMPORT
+ || (node->atomic_write
+ && srv_use_atomic_writes
+ && my_test_if_atomic_write(
+ node->handle,
+ int(page_size_t(space->flags)
+ .physical())));
+ }
+
+ ut_a(success);
+ ut_a(node->is_open());
+
+ fil_system->n_open++;
if (fil_space_belongs_in_lru(space)) {
/* Put the node to the LRU list */
- UT_LIST_ADD_FIRST(LRU, system->LRU, node);
+ UT_LIST_ADD_FIRST(fil_system->LRU, node);
}
return(true);
}
-/**********************************************************************//**
-Closes a file. */
+/** Close a file node.
+@param[in,out] node File node */
static
void
fil_node_close_file(
-/*================*/
- fil_node_t* node, /*!< in: file node */
- fil_system_t* system) /*!< in: tablespace memory cache */
+ fil_node_t* node)
{
- ibool ret;
+ bool ret;
- ut_ad(node && system);
- ut_ad(mutex_own(&(system->mutex)));
- ut_a(node->open);
+ ut_ad(mutex_own(&(fil_system->mutex)));
+ ut_a(node->is_open());
ut_a(node->n_pending == 0);
ut_a(node->n_pending_flushes == 0);
ut_a(!node->being_extended);
-#ifndef UNIV_HOTBACKUP
- ut_a(node->modification_counter == node->flush_counter
- || srv_fast_shutdown == 2);
-#endif /* !UNIV_HOTBACKUP */
+ ut_a(!node->needs_flush
+ || node->space->purpose == FIL_TYPE_TEMPORARY
+ || srv_fast_shutdown == 2
+ || !srv_was_started);
ret = os_file_close(node->handle);
ut_a(ret);
/* printf("Closing file %s\n", node->name); */
- node->open = FALSE;
- ut_a(system->n_open > 0);
- system->n_open--;
- fil_n_file_opened--;
+ node->handle = OS_FILE_CLOSED;
+ ut_ad(!node->is_open());
+ ut_a(fil_system->n_open > 0);
+ fil_system->n_open--;
if (fil_space_belongs_in_lru(node->space)) {
- ut_a(UT_LIST_GET_LEN(system->LRU) > 0);
+ ut_a(UT_LIST_GET_LEN(fil_system->LRU) > 0);
/* The node is in the LRU list, remove it */
- UT_LIST_REMOVE(LRU, system->LRU, node);
+ UT_LIST_REMOVE(fil_system->LRU, node);
}
}
-/********************************************************************//**
-Tries to close a file in the LRU list. The caller must hold the fil_sys
+/** Tries to close a file in the LRU list. The caller must hold the fil_sys
mutex.
-@return TRUE if success, FALSE if should retry later; since i/o's
+@return true if success, false if should retry later; since i/o's
generally complete in < 100 ms, and as InnoDB writes at most 128 pages
from the buffer pool in a batch, and then immediately flushes the
files, there is a good chance that the next time we find a suitable
-node from the LRU list */
+node from the LRU list.
+@param[in] print_info if true, prints information why it
+ cannot close a file*/
static
-ibool
+bool
fil_try_to_close_file_in_LRU(
-/*=========================*/
- ibool print_info) /*!< in: if TRUE, prints information why it
- cannot close a file */
+
+ bool print_info)
{
fil_node_t* node;
ut_ad(mutex_own(&fil_system->mutex));
if (print_info) {
- fprintf(stderr,
- "InnoDB: fil_sys open file LRU len %lu\n",
- (ulong) UT_LIST_GET_LEN(fil_system->LRU));
+ ib::info() << "fil_sys open file LRU len "
+ << UT_LIST_GET_LEN(fil_system->LRU);
}
for (node = UT_LIST_GET_LAST(fil_system->LRU);
node != NULL;
node = UT_LIST_GET_PREV(LRU, node)) {
- if (node->modification_counter == node->flush_counter
+ if (!node->needs_flush
&& node->n_pending_flushes == 0
&& !node->being_extended) {
- fil_node_close_file(node, fil_system);
+ fil_node_close_file(node);
- return(TRUE);
+ return(true);
}
if (!print_info) {
@@ -811,30 +799,24 @@ fil_try_to_close_file_in_LRU(
}
if (node->n_pending_flushes > 0) {
- fputs("InnoDB: cannot close file ", stderr);
- ut_print_filename(stderr, node->name);
- fprintf(stderr, ", because n_pending_flushes %lu\n",
- (ulong) node->n_pending_flushes);
- }
- if (node->modification_counter != node->flush_counter) {
- fputs("InnoDB: cannot close file ", stderr);
- ut_print_filename(stderr, node->name);
- fprintf(stderr,
- ", because mod_count %ld != fl_count %ld\n",
- (long) node->modification_counter,
- (long) node->flush_counter);
+ ib::info() << "Cannot close file " << node->name
+ << ", because n_pending_flushes "
+ << node->n_pending_flushes;
+ }
+ if (node->needs_flush) {
+ ib::warn() << "Cannot close file " << node->name
+ << ", because is should be flushed first";
}
if (node->being_extended) {
- fputs("InnoDB: cannot close file ", stderr);
- ut_print_filename(stderr, node->name);
- fprintf(stderr, ", because it is being extended\n");
+ ib::info() << "Cannot close file " << node->name
+ << ", because it is being extended";
}
}
- return(FALSE);
+ return(false);
}
/** Flush any writes cached by the file system.
@@ -850,7 +832,7 @@ static void fil_flush_low(fil_space_t* space, bool metadata = false)
/* No need to flush. User has explicitly disabled
buffering. */
- ut_ad(!space->is_in_unflushed_spaces);
+ ut_ad(!space->is_in_unflushed_spaces());
ut_ad(fil_space_is_flushed(space));
ut_ad(space->n_pending_flushes == 0);
@@ -858,8 +840,7 @@ static void fil_flush_low(fil_space_t* space, bool metadata = false)
for (fil_node_t* node = UT_LIST_GET_FIRST(space->chain);
node != NULL;
node = UT_LIST_GET_NEXT(chain, node)) {
- ut_ad(node->modification_counter
- == node->flush_counter);
+ ut_ad(!node->needs_flush);
ut_ad(node->n_pending_flushes == 0);
}
#endif /* UNIV_DEBUG */
@@ -874,52 +855,34 @@ static void fil_flush_low(fil_space_t* space, bool metadata = false)
node != NULL;
node = UT_LIST_GET_NEXT(chain, node)) {
- ib_int64_t old_mod_counter = node->modification_counter;
-
- if (old_mod_counter <= node->flush_counter) {
+ if (!node->needs_flush) {
continue;
}
- ut_a(node->open);
+ ut_a(node->is_open());
- if (space->purpose == FIL_TABLESPACE) {
+ switch (space->purpose) {
+ case FIL_TYPE_TEMPORARY:
+ ut_ad(0); // we already checked for this
+ case FIL_TYPE_TABLESPACE:
+ case FIL_TYPE_IMPORT:
fil_n_pending_tablespace_flushes++;
- } else {
+ break;
+ case FIL_TYPE_LOG:
fil_n_pending_log_flushes++;
fil_n_log_flushes++;
+ break;
}
-#ifdef __WIN__
+#ifdef _WIN32
if (node->is_raw_disk) {
goto skip_flush;
}
-#endif /* __WIN__ */
-retry:
- if (node->n_pending_flushes > 0) {
- /* We want to avoid calling os_file_flush() on
- the file twice at the same time, because we do
- not know what bugs OS's may contain in file
- i/o */
-
- ib_int64_t sig_count =
- os_event_reset(node->sync_event);
-
- mutex_exit(&fil_system->mutex);
-
- os_event_wait_low(node->sync_event, sig_count);
-
- mutex_enter(&fil_system->mutex);
-
- if (node->flush_counter >= old_mod_counter) {
-
- goto skip_flush;
- }
-
- goto retry;
- }
+#endif /* _WIN32 */
- ut_a(node->open);
+ ut_a(node->is_open());
node->n_pending_flushes++;
+ node->needs_flush = false;
mutex_exit(&fil_system->mutex);
@@ -927,30 +890,31 @@ retry:
mutex_enter(&fil_system->mutex);
- os_event_set(node->sync_event);
-
node->n_pending_flushes--;
+#ifdef _WIN32
skip_flush:
- if (node->flush_counter < old_mod_counter) {
- node->flush_counter = old_mod_counter;
-
- if (space->is_in_unflushed_spaces
+#endif /* _WIN32 */
+ if (!node->needs_flush) {
+ if (space->is_in_unflushed_spaces()
&& fil_space_is_flushed(space)) {
- space->is_in_unflushed_spaces = false;
-
- UT_LIST_REMOVE(
- unflushed_spaces,
- fil_system->unflushed_spaces,
- space);
+ fil_system->unflushed_spaces.remove(*space);
}
}
- if (space->purpose == FIL_TABLESPACE) {
+ switch (space->purpose) {
+ case FIL_TYPE_TEMPORARY:
+ break;
+ case FIL_TYPE_TABLESPACE:
+ case FIL_TYPE_IMPORT:
fil_n_pending_tablespace_flushes--;
- } else {
+ continue;
+ case FIL_TYPE_LOG:
fil_n_pending_log_flushes--;
+ continue;
}
+
+ ut_ad(0);
}
space->n_pending_flushes--;
@@ -962,13 +926,13 @@ skip_flush:
@param[in] size desired size in number of pages
@param[out] success whether the operation succeeded
@return whether the operation should be retried */
-static UNIV_COLD __attribute__((warn_unused_result, nonnull))
+static ATTRIBUTE_COLD __attribute__((warn_unused_result, nonnull))
bool
fil_space_extend_must_retry(
fil_space_t* space,
fil_node_t* node,
ulint size,
- ibool* success)
+ bool* success)
{
ut_ad(mutex_own(&fil_system->mutex));
ut_ad(UT_LIST_GET_LAST(space->chain) == node);
@@ -1000,28 +964,26 @@ fil_space_extend_must_retry(
}
/* At this point it is safe to release fil_system mutex. No
- other thread can rename, delete or close the file because
+ other thread can rename, delete, close or extend the file because
we have set the node->being_extended flag. */
mutex_exit(&fil_system->mutex);
- ulint start_page_no = space->size;
- const ulint file_start_page_no = start_page_no - node->size;
+ ut_ad(size >= space->size);
+
+ ulint last_page_no = space->size;
+ const ulint file_start_page_no = last_page_no - node->size;
/* Determine correct file block size */
- if (node->file_block_size == 0) {
- node->file_block_size = os_file_get_block_size(
+ if (node->block_size == 0) {
+ node->block_size = os_file_get_block_size(
node->handle, node->name);
- space->file_block_size = node->file_block_size;
}
- ulint page_size = fsp_flags_get_zip_size(space->flags);
- if (!page_size) {
- page_size = UNIV_PAGE_SIZE;
- }
+ const page_size_t pageSize(space->flags);
+ const ulint page_size = pageSize.physical();
/* fil_read_first_page() expects UNIV_PAGE_SIZE bytes.
fil_node_open_file() expects at least 4 * UNIV_PAGE_SIZE bytes.*/
-
os_offset_t new_size = std::max(
os_offset_t(size - file_start_page_no) * page_size,
os_offset_t(FIL_IBD_FILE_INITIAL_SIZE * UNIV_PAGE_SIZE));
@@ -1029,41 +991,56 @@ fil_space_extend_must_retry(
*success = os_file_set_size(node->name, node->handle, new_size,
FSP_FLAGS_HAS_PAGE_COMPRESSION(space->flags));
- DBUG_EXECUTE_IF("ib_os_aio_func_io_failure_28",
- *success = FALSE;
- os_has_said_disk_full = TRUE;);
-
+ os_has_said_disk_full = *success;
if (*success) {
os_file_flush(node->handle);
- os_has_said_disk_full = FALSE;
- start_page_no = size;
- }
+ last_page_no = size;
+ } else {
+ /* Let us measure the size of the file
+ to determine how much we were able to
+ extend it */
+ os_offset_t fsize = os_file_get_size(node->handle);
+ ut_a(fsize != os_offset_t(-1));
+ last_page_no = ulint(fsize / page_size)
+ + file_start_page_no;
+ }
mutex_enter(&fil_system->mutex);
ut_a(node->being_extended);
- ut_a(start_page_no - file_start_page_no >= node->size);
+ node->being_extended = false;
+ ut_a(last_page_no - file_start_page_no >= node->size);
- ulint file_size = start_page_no - file_start_page_no;
+ ulint file_size = last_page_no - file_start_page_no;
space->size += file_size - node->size;
node->size = file_size;
+ const ulint pages_in_MiB = node->size
+ & ~((1 << (20 - UNIV_PAGE_SIZE_SHIFT)) - 1);
- fil_node_complete_io(node, fil_system, OS_FILE_READ);
+ fil_node_complete_io(node,IORequestRead);
- node->being_extended = FALSE;
+ /* Keep the last data file size info up to date, rounded to
+ full megabytes */
- if (space->id == 0) {
- ulint pages_per_mb = (1024 * 1024) / page_size;
-
- /* Keep the last data file size info up to date, rounded to
- full megabytes */
-
- srv_data_file_sizes[srv_n_data_files - 1]
- = (node->size / pages_per_mb) * pages_per_mb;
+ switch (space->id) {
+ case TRX_SYS_SPACE:
+ srv_sys_space.set_last_file_size(pages_in_MiB);
+ fil_flush_low(space, true);
+ return(false);
+ default:
+ ut_ad(space->purpose == FIL_TYPE_TABLESPACE
+ || space->purpose == FIL_TYPE_IMPORT);
+ if (space->purpose == FIL_TYPE_TABLESPACE
+ && !space->is_being_truncated) {
+ fil_flush_low(space, true);
+ }
+ return(false);
+ case SRV_TMP_SPACE_ID:
+ ut_ad(space->purpose == FIL_TYPE_TEMPORARY);
+ srv_tmp_space.set_last_file_size(pages_in_MiB);
+ return(false);
}
- fil_flush_low(space, true);
- return(false);
}
/*******************************************************************//**
@@ -1076,285 +1053,359 @@ fil_mutex_enter_and_prepare_for_io(
/*===============================*/
ulint space_id) /*!< in: space id */
{
- fil_space_t* space;
- ulint count = 0;
+ for (ulint count = 0;;) {
+ mutex_enter(&fil_system->mutex);
-retry:
- mutex_enter(&fil_system->mutex);
+ if (space_id >= SRV_LOG_SPACE_FIRST_ID) {
+ /* We keep log files always open. */
+ break;
+ }
- if (space_id >= SRV_LOG_SPACE_FIRST_ID) {
- /* We keep log files always open. */
- return;
- }
+ fil_space_t* space = fil_space_get_by_id(space_id);
- space = fil_space_get_by_id(space_id);
+ if (space == NULL) {
+ break;
+ }
- if (space == NULL) {
- return;
- }
+ fil_node_t* node = UT_LIST_GET_LAST(space->chain);
+ ut_ad(space->id == 0
+ || node == UT_LIST_GET_FIRST(space->chain));
- fil_node_t* node = UT_LIST_GET_LAST(space->chain);
+ if (space->id == 0) {
+ /* We keep the system tablespace files always
+ open; this is important in preventing
+ deadlocks in this module, as a page read
+ completion often performs another read from
+ the insert buffer. The insert buffer is in
+ tablespace 0, and we cannot end up waiting in
+ this function. */
+ } else if (!node || node->is_open()) {
+ /* If the file is already open, no need to do
+ anything; if the space does not exist, we handle the
+ situation in the function which called this
+ function */
+ } else {
+ while (fil_system->n_open >= fil_system->max_n_open) {
+ /* Too many files are open */
+ if (fil_try_to_close_file_in_LRU(count > 1)) {
+ /* No problem */
+ } else if (count >= 2) {
+ ib::warn() << "innodb_open_files="
+ << fil_system->max_n_open
+ << " is exceeded ("
+ << fil_system->n_open
+ << ") files stay open)";
+ break;
+ } else {
+ mutex_exit(&fil_system->mutex);
+ os_aio_simulated_wake_handler_threads();
+ os_thread_sleep(20000);
+ /* Flush tablespaces so that we can
+ close modified files in the LRU list */
+ fil_flush_file_spaces(FIL_TYPE_TABLESPACE);
+
+ count++;
+ mutex_enter(&fil_system->mutex);
+ continue;
+ }
+ }
+ }
- ut_ad(space->id == 0 || node == UT_LIST_GET_FIRST(space->chain));
+ if (ulint size = ulint(UNIV_UNLIKELY(space->recv_size))) {
+ ut_ad(node);
+ bool success;
+ if (fil_space_extend_must_retry(space, node, size,
+ &success)) {
+ continue;
+ }
- if (space->id == 0) {
- /* We keep the system tablespace files always open;
- this is important in preventing deadlocks in this module, as
- a page read completion often performs another read from the
- insert buffer. The insert buffer is in tablespace 0, and we
- cannot end up waiting in this function. */
- } else if (!node || node->open) {
- /* If the file is already open, no need to do
- anything; if the space does not exist, we handle the
- situation in the function which called this
- function */
- } else {
- /* Too many files are open, try to close some */
- while (fil_system->n_open >= fil_system->max_n_open) {
- if (fil_try_to_close_file_in_LRU(count > 1)) {
- /* No problem */
- } else if (count >= 2) {
- ib_logf(IB_LOG_LEVEL_WARN,
- "innodb_open_files=%lu is exceeded"
- " (%lu files stay open)",
- fil_system->max_n_open,
- fil_system->n_open);
- break;
- } else {
- mutex_exit(&fil_system->mutex);
+ ut_ad(mutex_own(&fil_system->mutex));
+ /* Crash recovery requires the file extension
+ to succeed. */
+ ut_a(success);
+ /* InnoDB data files cannot shrink. */
+ ut_a(space->size >= size);
- /* Wake the i/o-handler threads to
- make sure pending i/o's are
- performed */
- os_aio_simulated_wake_handler_threads();
- os_thread_sleep(20000);
+ /* There could be multiple concurrent I/O requests for
+ this tablespace (multiple threads trying to extend
+ this tablespace).
- /* Flush tablespaces so that we can
- close modified files in the LRU list */
- fil_flush_file_spaces(FIL_TABLESPACE);
+ Also, fil_space_set_recv_size() may have been invoked
+ again during the file extension while fil_system->mutex
+ was not being held by us.
- count++;
- goto retry;
- }
- }
- }
+ Only if space->recv_size matches what we read
+ originally, reset the field. In this way, a
+ subsequent I/O request will handle any pending
+ fil_space_set_recv_size(). */
- if (ulint size = UNIV_UNLIKELY(space->recv_size)) {
- ut_ad(node);
- ibool success;
- if (fil_space_extend_must_retry(space, node, size, &success)) {
- goto retry;
+ if (size == space->recv_size) {
+ space->recv_size = 0;
+ }
}
- ut_ad(mutex_own(&fil_system->mutex));
- /* Crash recovery requires the file extension to succeed. */
- ut_a(success);
- /* InnoDB data files cannot shrink. */
- ut_a(space->size >= size);
+ break;
+ }
+}
- /* There could be multiple concurrent I/O requests for
- this tablespace (multiple threads trying to extend
- this tablespace).
+/** Try to extend a tablespace if it is smaller than the specified size.
+@param[in,out] space tablespace
+@param[in] size desired size in pages
+@return whether the tablespace is at least as big as requested */
+bool
+fil_space_extend(
+ fil_space_t* space,
+ ulint size)
+{
+ ut_ad(!srv_read_only_mode || space->purpose == FIL_TYPE_TEMPORARY);
- Also, fil_space_set_recv_size() may have been invoked
- again during the file extension while fil_system->mutex
- was not being held by us.
+ bool success;
- Only if space->recv_size matches what we read originally,
- reset the field. In this way, a subsequent I/O request
- will handle any pending fil_space_set_recv_size(). */
+ do {
+ fil_mutex_enter_and_prepare_for_io(space->id);
+ } while (fil_space_extend_must_retry(
+ space, UT_LIST_GET_LAST(space->chain), size,
+ &success));
- if (size == space->recv_size) {
- space->recv_size = 0;
- }
- }
+ mutex_exit(&fil_system->mutex);
+ return(success);
}
-/** Prepare a data file object for freeing.
-@param[in,out] space tablespace
-@param[in,out] node data file */
+/** Prepare to free a file node object from a tablespace memory cache.
+@param[in,out] node file node
+@param[in] space tablespace */
static
void
-fil_node_free_part1(fil_space_t* space, fil_node_t* node)
+fil_node_close_to_free(
+ fil_node_t* node,
+ fil_space_t* space)
{
ut_ad(mutex_own(&fil_system->mutex));
ut_a(node->magic_n == FIL_NODE_MAGIC_N);
ut_a(node->n_pending == 0);
ut_a(!node->being_extended);
- if (node->open) {
+ if (node->is_open()) {
/* We fool the assertion in fil_node_close_file() to think
there are no unflushed modifications in the file */
- node->modification_counter = node->flush_counter;
- os_event_set(node->sync_event);
+ node->needs_flush = false;
if (fil_buffering_disabled(space)) {
- ut_ad(!space->is_in_unflushed_spaces);
+ ut_ad(!space->is_in_unflushed_spaces());
ut_ad(fil_space_is_flushed(space));
- } else if (space->is_in_unflushed_spaces
+ } else if (space->is_in_unflushed_spaces()
&& fil_space_is_flushed(space)) {
- space->is_in_unflushed_spaces = false;
-
- UT_LIST_REMOVE(unflushed_spaces,
- fil_system->unflushed_spaces,
- space);
+ fil_system->unflushed_spaces.remove(*space);
}
- fil_node_close_file(node, fil_system);
+ fil_node_close_file(node);
}
}
-/** Free a data file object.
-@param[in,out] space tablespace
-@param[in] node data file */
+/** Detach a space object from the tablespace memory cache.
+Closes the files in the chain but does not delete them.
+There must not be any pending i/o's or flushes on the files.
+@param[in,out] space tablespace */
static
void
-fil_node_free_part2(fil_space_t* space, fil_node_t* node)
+fil_space_detach(
+ fil_space_t* space)
{
- ut_ad(!node->open);
+ ut_ad(mutex_own(&fil_system->mutex));
+
+ HASH_DELETE(fil_space_t, hash, fil_system->spaces, space->id, space);
+
+ fil_space_t* fnamespace = fil_space_get_by_name(space->name);
- space->size -= node->size;
+ ut_a(space == fnamespace);
+
+ HASH_DELETE(fil_space_t, name_hash, fil_system->name_hash,
+ ut_fold_string(space->name), space);
+
+ if (space->is_in_unflushed_spaces()) {
+
+ ut_ad(!fil_buffering_disabled(space));
+
+ fil_system->unflushed_spaces.remove(*space);
+ }
+
+ if (space->is_in_rotation_list()) {
+
+ fil_system->rotation_list.remove(*space);
+ }
- UT_LIST_REMOVE(chain, space->chain, node);
+ UT_LIST_REMOVE(fil_system->space_list, space);
- os_event_free(node->sync_event);
- mem_free(node->name);
- mem_free(node);
+ ut_a(space->magic_n == FIL_SPACE_MAGIC_N);
+ ut_a(space->n_pending_flushes == 0);
+
+ for (fil_node_t* fil_node = UT_LIST_GET_FIRST(space->chain);
+ fil_node != NULL;
+ fil_node = UT_LIST_GET_NEXT(chain, fil_node)) {
+
+ fil_node_close_to_free(fil_node, space);
+ }
}
-#ifdef UNIV_LOG_ARCHIVE
-/****************************************************************//**
-Drops files from the start of a file space, so that its size is cut by
-the amount given. */
-UNIV_INTERN
+/** Free a tablespace object on which fil_space_detach() was invoked.
+There must not be any pending i/o's or flushes on the files.
+@param[in,out] space tablespace */
+static
void
-fil_space_truncate_start(
-/*=====================*/
- ulint id, /*!< in: space id */
- ulint trunc_len) /*!< in: truncate by this much; it is an error
- if this does not equal to the combined size of
- some initial files in the space */
+fil_space_free_low(
+ fil_space_t* space)
{
- fil_node_t* node;
- fil_space_t* space;
+ /* The tablespace must not be in fil_system->named_spaces. */
+ ut_ad(srv_fast_shutdown == 2 || !srv_was_started
+ || space->max_lsn == 0);
- mutex_enter(&fil_system->mutex);
+ /* Wait for fil_space_release_for_io(); after
+ fil_space_detach(), the tablespace cannot be found, so
+ fil_space_acquire_for_io() would return NULL */
+ while (space->n_pending_ios) {
+ os_thread_sleep(100);
+ }
- space = fil_space_get_by_id(id);
+ for (fil_node_t* node = UT_LIST_GET_FIRST(space->chain);
+ node != NULL; ) {
+ ut_d(space->size -= node->size);
+ ut_free(node->name);
+ fil_node_t* old_node = node;
+ node = UT_LIST_GET_NEXT(chain, node);
+ ut_free(old_node);
+ }
- ut_a(space);
+ ut_ad(space->size == 0);
- while (trunc_len > 0) {
- node = UT_LIST_GET_FIRST(space->chain);
+ rw_lock_free(&space->latch);
+ fil_space_destroy_crypt_data(&space->crypt_data);
- ut_a(node->size * UNIV_PAGE_SIZE <= trunc_len);
+ ut_free(space->name);
+ ut_free(space);
+}
- trunc_len -= node->size * UNIV_PAGE_SIZE;
+/** Frees a space object from the tablespace memory cache.
+Closes the files in the chain but does not delete them.
+There must not be any pending i/o's or flushes on the files.
+@param[in] id tablespace identifier
+@param[in] x_latched whether the caller holds X-mode space->latch
+@return true if success */
+bool
+fil_space_free(
+ ulint id,
+ bool x_latched)
+{
+ ut_ad(id != TRX_SYS_SPACE);
+
+ mutex_enter(&fil_system->mutex);
+ fil_space_t* space = fil_space_get_by_id(id);
- fil_node_free_part1(space, node);
- fil_node_free_part2(space, node);
+ if (space != NULL) {
+ fil_space_detach(space);
}
mutex_exit(&fil_system->mutex);
+
+ if (space != NULL) {
+ if (x_latched) {
+ rw_lock_x_unlock(&space->latch);
+ }
+
+ if (!recv_recovery_is_on()) {
+ log_mutex_enter();
+ }
+
+ ut_ad(log_mutex_own());
+
+ if (space->max_lsn != 0) {
+ ut_d(space->max_lsn = 0);
+ UT_LIST_REMOVE(fil_system->named_spaces, space);
+ }
+
+ if (!recv_recovery_is_on()) {
+ log_mutex_exit();
+ }
+
+ fil_space_free_low(space);
+ }
+
+ return(space != NULL);
}
-#endif /* UNIV_LOG_ARCHIVE */
-/*******************************************************************//**
-Creates a space memory object and puts it to the 'fil system' hash table.
-If there is an error, prints an error message to the .err log.
-@param[in] name Space name
-@param[in] id Space id
-@param[in] flags Tablespace flags
-@param[in] purpose FIL_TABLESPACE or FIL_LOG if log
-@param[in] crypt_data Encryption information
-@param[in] create_table True if this is create table
-@param[in] mode Encryption mode
-@return TRUE if success */
-UNIV_INTERN
-bool
+/** Create a space memory object and put it to the fil_system hash table.
+Error messages are issued to the server log.
+@param[in] name tablespace name
+@param[in] id tablespace identifier
+@param[in] flags tablespace flags
+@param[in] purpose tablespace purpose
+@param[in,out] crypt_data encryption information
+@param[in] mode encryption mode
+@return pointer to created tablespace, to be filled in with fil_space_t::add()
+@retval NULL on failure (such as when the same tablespace exists) */
+fil_space_t*
fil_space_create(
const char* name,
ulint id,
ulint flags,
- ulint purpose,
+ fil_type_t purpose,
fil_space_crypt_t* crypt_data,
- bool create_table,
fil_encryption_t mode)
{
fil_space_t* space;
- DBUG_EXECUTE_IF("fil_space_create_failure", return(false););
-
- ut_a(fil_system);
-
- /* Look for a matching tablespace and if found free it. */
- do {
- mutex_enter(&fil_system->mutex);
-
- space = fil_space_get_by_name(name);
-
- if (space != 0) {
- ib_logf(IB_LOG_LEVEL_WARN,
- "Tablespace '%s' exists in the cache "
- "with id %lu != %lu",
- name, (ulong) space->id, (ulong) id);
+ ut_ad(fil_system);
+ ut_ad(fsp_flags_is_valid(flags & ~FSP_FLAGS_MEM_MASK, id));
+ ut_ad(purpose == FIL_TYPE_LOG
+ || srv_page_size == UNIV_PAGE_SIZE_ORIG || flags != 0);
- if (id == 0 || purpose != FIL_TABLESPACE) {
+ DBUG_EXECUTE_IF("fil_space_create_failure", return(NULL););
- mutex_exit(&fil_system->mutex);
+ mutex_enter(&fil_system->mutex);
- return(false);
- }
+ /* Look for a matching tablespace. */
+ space = fil_space_get_by_name(name);
- ib_logf(IB_LOG_LEVEL_WARN,
- "Freeing existing tablespace '%s' entry "
- "from the cache with id %lu",
- name, (ulong) id);
+ if (space != NULL) {
+ mutex_exit(&fil_system->mutex);
- bool success = fil_space_free_and_mutex_exit(
- space->id, false);
- ut_a(success);
- }
+ ib::warn() << "Tablespace '" << name << "' exists in the"
+ " cache with id " << space->id << " != " << id;
- } while (space != 0);
+ return(NULL);
+ }
space = fil_space_get_by_id(id);
- if (space != 0) {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Trying to add tablespace '%s' with id %lu "
- "to the tablespace memory cache, but tablespace '%s' "
- "with id %lu already exists in the cache!",
- name, (ulong) id, space->name, (ulong) space->id);
-
+ if (space != NULL) {
+ ib::error() << "Trying to add tablespace '" << name
+ << "' with id " << id
+ << " to the tablespace memory cache, but tablespace '"
+ << space->name << "' already exists in the cache!";
mutex_exit(&fil_system->mutex);
-
- return(false);
+ return(NULL);
}
- space = static_cast<fil_space_t*>(mem_zalloc(sizeof(*space)));
+ space = static_cast<fil_space_t*>(ut_zalloc_nokey(sizeof(*space)));
- space->name = mem_strdup(name);
space->id = id;
+ space->name = mem_strdup(name);
- fil_system->tablespace_version++;
- space->tablespace_version = fil_system->tablespace_version;
+ UT_LIST_INIT(space->chain, &fil_node_t::chain);
- if (purpose == FIL_TABLESPACE && !recv_recovery_on
+ if ((purpose == FIL_TYPE_TABLESPACE || purpose == FIL_TYPE_IMPORT)
+ && !recv_recovery_is_on()
&& id > fil_system->max_assigned_id) {
if (!fil_system->space_id_reuse_warned) {
- fil_system->space_id_reuse_warned = TRUE;
+ fil_system->space_id_reuse_warned = true;
- ib_logf(IB_LOG_LEVEL_WARN,
- "Allocated tablespace %lu, old maximum "
- "was %lu",
- (ulong) id,
- (ulong) fil_system->max_assigned_id);
+ ib::warn() << "Allocated tablespace ID " << id
+ << " for " << name << ", old maximum was "
+ << fil_system->max_assigned_id;
}
fil_system->max_assigned_id = id;
@@ -1366,25 +1417,51 @@ fil_space_create(
space->magic_n = FIL_SPACE_MAGIC_N;
space->crypt_data = crypt_data;
+ DBUG_LOG("tablespace",
+ "Created metadata for " << id << " name " << name);
+ if (crypt_data) {
+ DBUG_LOG("crypt",
+ "Tablespace " << id << " name " << name
+ << " encryption " << crypt_data->encryption
+ << " key id " << crypt_data->key_id
+ << ":" << fil_crypt_get_mode(crypt_data)
+ << " " << fil_crypt_get_type(crypt_data));
+ }
+
rw_lock_create(fil_space_latch_key, &space->latch, SYNC_FSP);
+ if (space->purpose == FIL_TYPE_TEMPORARY) {
+ ut_d(space->latch.set_temp_fsp());
+ /* SysTablespace::open_or_create() would pass
+ size!=0 to fil_space_t::add(), so first_time_open
+ would not hold in fil_node_open_file(), and we
+ must assign this manually. We do not care about
+ the durability or atomicity of writes to the
+ temporary tablespace files. */
+ space->atomic_write_supported = true;
+ }
+
HASH_INSERT(fil_space_t, hash, fil_system->spaces, id, space);
HASH_INSERT(fil_space_t, name_hash, fil_system->name_hash,
ut_fold_string(name), space);
- UT_LIST_ADD_LAST(space_list, fil_system->space_list, space);
+ UT_LIST_ADD_LAST(fil_system->space_list, space);
+ if (id < SRV_LOG_SPACE_FIRST_ID && id > fil_system->max_assigned_id) {
+
+ fil_system->max_assigned_id = id;
+ }
/* Inform key rotation that there could be something
to do */
- if (purpose == FIL_TABLESPACE && !srv_fil_crypt_rotate_key_age && fil_crypt_threads_event &&
+ if (purpose == FIL_TYPE_TABLESPACE
+ && !srv_fil_crypt_rotate_key_age && fil_crypt_threads_event &&
(mode == FIL_ENCRYPTION_ON || mode == FIL_ENCRYPTION_OFF ||
srv_encrypt_tables)) {
/* Key rotation is not enabled, need to inform background
encryption threads. */
- UT_LIST_ADD_LAST(rotation_list, fil_system->rotation_list, space);
- space->is_in_rotation_list = true;
+ fil_system->rotation_list.push_back(*space);
mutex_exit(&fil_system->mutex);
mutex_enter(&fil_crypt_threads_mutex);
os_event_set(fil_crypt_threads_event);
@@ -1393,22 +1470,21 @@ fil_space_create(
mutex_exit(&fil_system->mutex);
}
- return(true);
+ return(space);
}
/*******************************************************************//**
Assigns a new space id for a new single-table tablespace. This works simply by
incrementing the global counter. If 4 billion id's is not enough, we may need
to recycle id's.
-@return TRUE if assigned, FALSE if not */
-UNIV_INTERN
-ibool
+@return true if assigned, false if not */
+bool
fil_assign_new_space_id(
/*====================*/
ulint* space_id) /*!< in/out: space id */
{
ulint id;
- ibool success;
+ bool success;
mutex_enter(&fil_system->mutex);
@@ -1421,17 +1497,12 @@ fil_assign_new_space_id(
id++;
if (id > (SRV_LOG_SPACE_FIRST_ID / 2) && (id % 1000000UL == 0)) {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- "InnoDB: Warning: you are running out of new"
- " single-table tablespace id's.\n"
- "InnoDB: Current counter is %lu and it"
- " must not exceed %lu!\n"
- "InnoDB: To reset the counter to zero"
- " you have to dump all your tables and\n"
- "InnoDB: recreate the whole InnoDB installation.\n",
- (ulong) id,
- (ulong) SRV_LOG_SPACE_FIRST_ID);
+ ib::warn() << "You are running out of new single-table"
+ " tablespace id's. Current counter is " << id
+ << " and it must not exceed" << SRV_LOG_SPACE_FIRST_ID
+ << "! To reset the counter to zero you have to dump"
+ " all your tables and recreate the whole InnoDB"
+ " installation.";
}
success = (id < SRV_LOG_SPACE_FIRST_ID);
@@ -1439,15 +1510,11 @@ fil_assign_new_space_id(
if (success) {
*space_id = fil_system->max_assigned_id = id;
} else {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- "InnoDB: You have run out of single-table"
- " tablespace id's!\n"
- "InnoDB: Current counter is %lu.\n"
- "InnoDB: To reset the counter to zero you"
- " have to dump all your tables and\n"
- "InnoDB: recreate the whole InnoDB installation.\n",
- (ulong) id);
+ ib::warn() << "You have run out of single-table tablespace"
+ " id's! Current counter is " << id
+ << ". To reset the counter to zero"
+ " you have to dump all your tables and"
+ " recreate the whole InnoDB installation.";
*space_id = ULINT_UNDEFINED;
}
@@ -1456,195 +1523,83 @@ fil_assign_new_space_id(
return(success);
}
-/** Free a space object from the tablespace memory cache. Close the files in
-the chain but do not delete them. There must not be any pending i/o's or
-flushes on the files.
-The fil_system->mutex will be released.
-@param[in] id tablespace ID
-@param[in] x_latched whether the caller holds exclusive space->latch
-@return whether the tablespace existed */
-static
-bool
-fil_space_free_and_mutex_exit(ulint id, bool x_latched)
+/** Trigger a call to fil_node_t::read_page0()
+@param[in] id tablespace identifier
+@return tablespace
+@retval NULL if the tablespace does not exist or cannot be read */
+fil_space_t* fil_system_t::read_page0(ulint id)
{
- fil_space_t* space;
- fil_space_t* fnamespace;
-
- ut_ad(mutex_own(&fil_system->mutex));
-
- space = fil_space_get_by_id(id);
-
- if (!space) {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "trying to remove non-existing tablespace " ULINTPF,
- id);
- mutex_exit(&fil_system->mutex);
- return(false);
- }
-
- HASH_DELETE(fil_space_t, hash, fil_system->spaces, id, space);
+ mutex_exit(&mutex);
- fnamespace = fil_space_get_by_name(space->name);
- ut_a(fnamespace);
- ut_a(space == fnamespace);
-
- HASH_DELETE(fil_space_t, name_hash, fil_system->name_hash,
- ut_fold_string(space->name), space);
-
- if (space->is_in_unflushed_spaces) {
-
- ut_ad(!fil_buffering_disabled(space));
- space->is_in_unflushed_spaces = false;
-
- UT_LIST_REMOVE(unflushed_spaces, fil_system->unflushed_spaces,
- space);
- }
+ ut_ad(id != 0);
- if (space->is_in_rotation_list) {
- space->is_in_rotation_list = false;
- ut_a(UT_LIST_GET_LEN(fil_system->rotation_list) > 0);
- UT_LIST_REMOVE(rotation_list, fil_system->rotation_list, space);
- }
-
- UT_LIST_REMOVE(space_list, fil_system->space_list, space);
+ /* It is possible that the tablespace is dropped while we are
+ not holding the mutex. */
+ fil_mutex_enter_and_prepare_for_io(id);
- ut_a(space->magic_n == FIL_SPACE_MAGIC_N);
- ut_a(0 == space->n_pending_flushes);
+ fil_space_t* space = fil_space_get_by_id(id);
- for (fil_node_t* node = UT_LIST_GET_FIRST(space->chain);
- node != NULL;
- node = UT_LIST_GET_NEXT(chain, node)) {
- fil_node_free_part1(space, node);
+ if (space == NULL || UT_LIST_GET_LEN(space->chain) == 0) {
+ return(NULL);
}
- mutex_exit(&fil_system->mutex);
-
- /* Wait for fil_space_release_for_io(); after
- fil_space_detach(), the tablespace cannot be found, so
- fil_space_acquire_for_io() would return NULL */
- while (space->n_pending_ios) {
- os_thread_sleep(100);
- }
+ /* The following code must change when InnoDB supports
+ multiple datafiles per tablespace. */
+ ut_a(1 == UT_LIST_GET_LEN(space->chain));
- for (fil_node_t* fil_node = UT_LIST_GET_FIRST(space->chain);
- fil_node != NULL;
- fil_node = UT_LIST_GET_FIRST(space->chain)) {
- fil_node_free_part2(space, fil_node);
- }
+ fil_node_t* node = UT_LIST_GET_FIRST(space->chain);
- ut_a(0 == UT_LIST_GET_LEN(space->chain));
+ /* It must be a single-table tablespace and we have not opened
+ the file yet; the following calls will open it and update the
+ size fields */
- if (x_latched) {
- rw_lock_x_unlock(&space->latch);
+ if (!fil_node_prepare_for_io(node, fil_system, space)) {
+ /* The single-table tablespace can't be opened,
+ because the ibd file is missing. */
+ return(NULL);
}
- rw_lock_free(&(space->latch));
-
- fil_space_destroy_crypt_data(&(space->crypt_data));
+ fil_node_complete_io(node, IORequestRead);
- mem_free(space->name);
- mem_free(space);
-
- return(TRUE);
-}
-
-/*******************************************************************//**
-Returns a pointer to the file_space_t that is in the memory cache
-associated with a space id.
-@return file_space_t pointer, NULL if space not found */
-fil_space_t*
-fil_space_get(
-/*==========*/
- ulint id) /*!< in: space id */
-{
- fil_space_t* space;
-
- ut_ad(fil_system);
-
- mutex_enter(&fil_system->mutex);
-
- space = fil_space_get_by_id(id);
-
- mutex_exit(&fil_system->mutex);
-
- return (space);
+ return space;
}
/*******************************************************************//**
-Returns a pointer to the file_space_t that is in the memory cache
+Returns a pointer to the fil_space_t that is in the memory cache
associated with a space id. The caller must lock fil_system->mutex.
-@return file_space_t pointer, NULL if space not found */
+@return file_space_t pointer, NULL if space not found */
UNIV_INLINE
fil_space_t*
fil_space_get_space(
/*================*/
ulint id) /*!< in: space id */
{
- fil_space_t* space;
- fil_node_t* node;
-
- ut_ad(fil_system);
-
- space = fil_space_get_by_id(id);
- if (space == NULL) {
- return(NULL);
+ fil_space_t* space = fil_space_get_by_id(id);
+ if (space == NULL || space->size != 0) {
+ return(space);
}
- if (space->size == 0 && space->purpose == FIL_TABLESPACE) {
- ut_a(id != 0);
-
- mutex_exit(&fil_system->mutex);
-
- /* It is possible that the space gets evicted at this point
- before the fil_mutex_enter_and_prepare_for_io() acquires
- the fil_system->mutex. Check for this after completing the
- call to fil_mutex_enter_and_prepare_for_io(). */
- fil_mutex_enter_and_prepare_for_io(id);
-
- /* We are still holding the fil_system->mutex. Check if
- the space is still in memory cache. */
- space = fil_space_get_by_id(id);
- if (space == NULL) {
- return(NULL);
- }
-
- /* The following code must change when InnoDB supports
- multiple datafiles per tablespace. Note that there is small
- change that space is found from tablespace list but
- we have not yet created node for it and as we hold
- fil_system mutex here fil_node_create can't continue. */
- ut_a(UT_LIST_GET_LEN(space->chain) == 1 || UT_LIST_GET_LEN(space->chain) == 0);
-
- node = UT_LIST_GET_FIRST(space->chain);
-
- if (node) {
- /* It must be a single-table tablespace and we have not opened
- the file yet; the following calls will open it and update the
- size fields */
-
- if (!fil_node_prepare_for_io(node, fil_system, space)) {
- /* The single-table tablespace can't be opened,
- because the ibd file is missing. */
- return(NULL);
- }
- fil_node_complete_io(node, fil_system, OS_FILE_READ);
- }
+ switch (space->purpose) {
+ case FIL_TYPE_LOG:
+ break;
+ case FIL_TYPE_TEMPORARY:
+ case FIL_TYPE_TABLESPACE:
+ case FIL_TYPE_IMPORT:
+ space = fil_system->read_page0(id);
}
return(space);
}
-/*******************************************************************//**
-Returns the path from the first fil_node_t found for the space ID sent.
+/** Returns the path from the first fil_node_t found with this space ID.
The caller is responsible for freeing the memory allocated here for the
value returned.
-@return own: A copy of fil_node_t::path, NULL if space ID is zero
+@param[in] id Tablespace ID
+@return own: A copy of fil_node_t::path, NULL if space ID is zero
or not found. */
-UNIV_INTERN
char*
fil_space_get_first_path(
-/*=====================*/
- ulint id) /*!< in: space id */
+ ulint id)
{
fil_space_t* space;
fil_node_t* node;
@@ -1695,8 +1650,7 @@ fil_space_set_recv_size(ulint id, ulint size)
/*******************************************************************//**
Returns the size of the space in pages. The tablespace must be cached in the
memory cache.
-@return space size, 0 if space not found */
-UNIV_INTERN
+@return space size, 0 if space not found */
ulint
fil_space_get_size(
/*===============*/
@@ -1720,8 +1674,7 @@ fil_space_get_size(
/*******************************************************************//**
Returns the flags of the space. The tablespace must be cached
in the memory cache.
-@return flags, ULINT_UNDEFINED if space not found */
-UNIV_INTERN
+@return flags, ULINT_UNDEFINED if space not found */
ulint
fil_space_get_flags(
/*================*/
@@ -1732,10 +1685,6 @@ fil_space_get_flags(
ut_ad(fil_system);
- if (!id) {
- return(0);
- }
-
mutex_enter(&fil_system->mutex);
space = fil_space_get_space(id);
@@ -1753,50 +1702,90 @@ fil_space_get_flags(
return(flags);
}
-/*******************************************************************//**
-Returns the compressed page size of the space, or 0 if the space
-is not compressed. The tablespace must be cached in the memory cache.
-@return compressed page size, ULINT_UNDEFINED if space not found */
-UNIV_INTERN
-ulint
-fil_space_get_zip_size(
-/*===================*/
- ulint id) /*!< in: space id */
+/** Open each fil_node_t of a named fil_space_t if not already open.
+@param[in] name Tablespace name
+@return true if all nodes are open */
+bool
+fil_space_open(
+ const char* name)
{
- ulint flags;
+ ut_ad(fil_system != NULL);
- flags = fil_space_get_flags(id);
+ mutex_enter(&fil_system->mutex);
- if (flags && flags != ULINT_UNDEFINED) {
+ fil_space_t* space = fil_space_get_by_name(name);
+ fil_node_t* node;
- return(fsp_flags_get_zip_size(flags));
+ for (node = UT_LIST_GET_FIRST(space->chain);
+ node != NULL;
+ node = UT_LIST_GET_NEXT(chain, node)) {
+
+ if (!node->is_open()
+ && !fil_node_open_file(node)) {
+ mutex_exit(&fil_system->mutex);
+ return(false);
+ }
}
- return(flags);
+ mutex_exit(&fil_system->mutex);
+
+ return(true);
}
-/*******************************************************************//**
-Checks if the pair space, page_no refers to an existing page in a tablespace
-file space. The tablespace must be cached in the memory cache.
-@return TRUE if the address is meaningful */
-UNIV_INTERN
-ibool
-fil_check_adress_in_tablespace(
-/*===========================*/
- ulint id, /*!< in: space id */
- ulint page_no)/*!< in: page number */
+/** Close each fil_node_t of a named fil_space_t if open.
+@param[in] name Tablespace name */
+void
+fil_space_close(
+ const char* name)
+{
+ if (fil_system == NULL) {
+ return;
+ }
+
+ mutex_enter(&fil_system->mutex);
+
+ fil_space_t* space = fil_space_get_by_name(name);
+ if (space == NULL) {
+ mutex_exit(&fil_system->mutex);
+ return;
+ }
+
+ for (fil_node_t* node = UT_LIST_GET_FIRST(space->chain);
+ node != NULL;
+ node = UT_LIST_GET_NEXT(chain, node)) {
+
+ if (node->is_open()) {
+ fil_node_close_file(node);
+ }
+ }
+
+ mutex_exit(&fil_system->mutex);
+}
+
+/** Returns the page size of the space and whether it is compressed or not.
+The tablespace must be cached in the memory cache.
+@param[in] id space id
+@param[out] found true if tablespace was found
+@return page size */
+const page_size_t
+fil_space_get_page_size(
+ ulint id,
+ bool* found)
{
- if (fil_space_get_size(id) > page_no) {
+ const ulint flags = fil_space_get_flags(id);
- return(TRUE);
+ if (flags == ULINT_UNDEFINED) {
+ *found = false;
+ return(univ_page_size);
}
- return(FALSE);
+ *found = true;
+
+ return(page_size_t(flags));
}
/****************************************************************//**
Initializes the tablespace memory cache. */
-UNIV_INTERN
void
fil_init(
/*=====*/
@@ -1808,15 +1797,17 @@ fil_init(
ut_a(hash_size > 0);
ut_a(max_n_open > 0);
- fil_system = static_cast<fil_system_t*>(
- mem_zalloc(sizeof(fil_system_t)));
+ fil_system = new fil_system_t();
- mutex_create(fil_system_mutex_key,
- &fil_system->mutex, SYNC_ANY_LATCH);
+ mutex_create(LATCH_ID_FIL_SYSTEM, &fil_system->mutex);
fil_system->spaces = hash_create(hash_size);
fil_system->name_hash = hash_create(hash_size);
+ UT_LIST_INIT(fil_system->LRU, &fil_node_t::LRU);
+ UT_LIST_INIT(fil_system->space_list, &fil_space_t::space_list);
+ UT_LIST_INIT(fil_system->named_spaces, &fil_space_t::named_spaces);
+
fil_system->max_n_open = max_n_open;
fil_space_crypt_init();
@@ -1828,7 +1819,6 @@ database server shutdown. This should be called at a server startup after the
space objects for the log and the system tablespace have been created. The
purpose of this operation is to make sure we never run out of file descriptors
if we need to read from the insert buffer or to write to the log. */
-UNIV_INTERN
void
fil_open_log_and_system_tablespace_files(void)
/*==========================================*/
@@ -1852,9 +1842,8 @@ fil_open_log_and_system_tablespace_files(void)
node != NULL;
node = UT_LIST_GET_NEXT(chain, node)) {
- if (!node->open) {
- if (!fil_node_open_file(node, fil_system,
- space)) {
+ if (!node->is_open()) {
+ if (!fil_node_open_file(node)) {
/* This func is called during server's
startup. If some file of log or system
tablespace is missing, the server
@@ -1866,25 +1855,20 @@ fil_open_log_and_system_tablespace_files(void)
if (fil_system->max_n_open < 10 + fil_system->n_open) {
- fprintf(stderr,
- "InnoDB: Warning: you must"
- " raise the value of"
- " innodb_open_files in\n"
- "InnoDB: my.cnf! Remember that"
- " InnoDB keeps all log files"
- " and all system\n"
- "InnoDB: tablespace files open"
+ ib::warn() << "You must raise the value of"
+ " innodb_open_files in my.cnf!"
+ " Remember that InnoDB keeps all"
+ " log files and all system"
+ " tablespace files open"
" for the whole time mysqld is"
- " running, and\n"
- "InnoDB: needs to open also"
+ " running, and needs to open also"
" some .ibd files if the"
- " file-per-table storage\n"
- "InnoDB: model is used."
- " Current open files %lu,"
- " max allowed"
- " open files %lu.\n",
- (ulong) fil_system->n_open,
- (ulong) fil_system->max_n_open);
+ " file-per-table storage model is used."
+ " Current open files "
+ << fil_system->n_open
+ << ", max allowed open files "
+ << fil_system->max_n_open
+ << ".";
}
}
}
@@ -1895,18 +1879,21 @@ fil_open_log_and_system_tablespace_files(void)
/*******************************************************************//**
Closes all open files. There must not be any pending i/o's or not flushed
modifications in the files. */
-UNIV_INTERN
void
fil_close_all_files(void)
/*=====================*/
{
fil_space_t* space;
- mutex_enter(&fil_system->mutex);
+ /* At shutdown, we should not have any files in this list. */
+ ut_ad(srv_fast_shutdown == 2
+ || !srv_was_started
+ || UT_LIST_GET_LEN(fil_system->named_spaces) == 0);
- space = UT_LIST_GET_FIRST(fil_system->space_list);
+ mutex_enter(&fil_system->mutex);
- while (space != NULL) {
+ for (space = UT_LIST_GET_FIRST(fil_system->space_list);
+ space != NULL; ) {
fil_node_t* node;
fil_space_t* prev_space = space;
@@ -1914,27 +1901,26 @@ fil_close_all_files(void)
node != NULL;
node = UT_LIST_GET_NEXT(chain, node)) {
- if (node->open) {
- fil_node_close_file(node, fil_system);
+ if (node->is_open()) {
+ fil_node_close_file(node);
}
}
space = UT_LIST_GET_NEXT(space_list, space);
-
- /* This is executed during shutdown. No other thread
- can create or remove tablespaces while we are not
- holding fil_system->mutex. */
- fil_space_free_and_mutex_exit(prev_space->id, false);
- mutex_enter(&fil_system->mutex);
+ fil_space_detach(prev_space);
+ fil_space_free_low(prev_space);
}
mutex_exit(&fil_system->mutex);
+
+ ut_ad(srv_fast_shutdown == 2
+ || !srv_was_started
+ || UT_LIST_GET_LEN(fil_system->named_spaces) == 0);
}
/*******************************************************************//**
Closes the redo log files. There must not be any pending i/o's or not
flushed modifications in the files. */
-UNIV_INTERN
void
fil_close_log_files(
/*================*/
@@ -1950,28 +1936,28 @@ fil_close_log_files(
fil_node_t* node;
fil_space_t* prev_space = space;
- if (space->purpose != FIL_LOG) {
+ if (space->purpose != FIL_TYPE_LOG) {
space = UT_LIST_GET_NEXT(space_list, space);
continue;
}
+ /* Log files are not in the fil_system->named_spaces list. */
+ ut_ad(space->max_lsn == 0);
+
for (node = UT_LIST_GET_FIRST(space->chain);
node != NULL;
node = UT_LIST_GET_NEXT(chain, node)) {
- if (node->open) {
- fil_node_close_file(node, fil_system);
+ if (node->is_open()) {
+ fil_node_close_file(node);
}
}
space = UT_LIST_GET_NEXT(space_list, space);
if (free) {
- /* This is executed during startup. No other thread
- can create or remove tablespaces while we are not
- holding fil_system->mutex. */
- fil_space_free_and_mutex_exit(prev_space->id, false);
- mutex_enter(&fil_system->mutex);
+ fil_space_detach(prev_space);
+ fil_space_free_low(prev_space);
}
}
@@ -1981,17 +1967,13 @@ fil_close_log_files(
/*******************************************************************//**
Sets the max tablespace id counter if the given number is bigger than the
previous value. */
-UNIV_INTERN
void
fil_set_max_space_id_if_bigger(
/*===========================*/
ulint max_id) /*!< in: maximum known id */
{
if (max_id >= SRV_LOG_SPACE_FIRST_ID) {
- fprintf(stderr,
- "InnoDB: Fatal error: max tablespace id"
- " is too high, %lu\n", (ulong) max_id);
- ut_error;
+ ib::fatal() << "Max tablespace id is too high, " << max_id;
}
mutex_enter(&fil_system->mutex);
@@ -2016,251 +1998,105 @@ fil_write_flushed_lsn(
byte* buf;
dberr_t err = DB_TABLESPACE_NOT_FOUND;
- buf1 = static_cast<byte*>(ut_malloc(2 * UNIV_PAGE_SIZE));
+ buf1 = static_cast<byte*>(ut_malloc_nokey(2 * UNIV_PAGE_SIZE));
buf = static_cast<byte*>(ut_align(buf1, UNIV_PAGE_SIZE));
- /* Acquire system tablespace */
- fil_space_t* space = fil_space_acquire(0);
-
- /* If tablespace is not encrypted, stamp flush_lsn to
- first page of all system tablespace datafiles to avoid
- unnecessary error messages on possible downgrade. */
- if (!space->crypt_data
- || !space->crypt_data->should_encrypt()) {
-
- fil_node_t* node;
- ulint sum_of_sizes = 0;
+ const page_id_t page_id(TRX_SYS_SPACE, 0);
- for (node = UT_LIST_GET_FIRST(space->chain);
- node != NULL;
- node = UT_LIST_GET_NEXT(chain, node)) {
-
- err = fil_read(TRUE, 0, 0, sum_of_sizes, 0,
- UNIV_PAGE_SIZE, buf, NULL, 0);
-
- if (err == DB_SUCCESS) {
- mach_write_to_8(buf + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION,
- lsn);
+ err = fil_read(page_id, univ_page_size, 0, univ_page_size.physical(),
+ buf);
- err = fil_write(TRUE, 0, 0, sum_of_sizes, 0,
- UNIV_PAGE_SIZE, buf, NULL, 0);
-
- sum_of_sizes += node->size;
- }
- }
- } else {
- /* When system tablespace is encrypted stamp flush_lsn to
- only the first page of the first datafile (rest of pages
- are encrypted). */
- err = fil_read(TRUE, 0, 0, 0, 0,
- UNIV_PAGE_SIZE, buf, NULL, 0);
-
- if (err == DB_SUCCESS) {
- mach_write_to_8(buf + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION,
- lsn);
-
- err = fil_write(TRUE, 0, 0, 0, 0,
- UNIV_PAGE_SIZE, buf, NULL, 0);
- }
+ if (err == DB_SUCCESS) {
+ mach_write_to_8(buf + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION, lsn);
+ err = fil_write(page_id, univ_page_size, 0,
+ univ_page_size.physical(), buf);
+ fil_flush_file_spaces(FIL_TYPE_TABLESPACE);
}
- fil_flush_file_spaces(FIL_TABLESPACE);
- fil_space_release(space);
-
ut_free(buf1);
-
return(err);
}
-/** Check the consistency of the first data page of a tablespace
-at database startup.
-@param[in] page page frame
-@param[in] space_id tablespace identifier
-@param[in] flags tablespace flags
-@retval NULL on success, or if innodb_force_recovery is set
-@return pointer to an error message string */
-static MY_ATTRIBUTE((warn_unused_result))
-const char*
-fil_check_first_page(const page_t* page, ulint space_id, ulint flags)
+/** Acquire a tablespace when it could be dropped concurrently.
+Used by background threads that do not necessarily hold proper locks
+for concurrency control.
+@param[in] id tablespace ID
+@param[in] silent whether to silently ignore missing tablespaces
+@return the tablespace
+@retval NULL if missing or being deleted or truncated */
+UNIV_INTERN
+fil_space_t*
+fil_space_acquire_low(ulint id, bool silent)
{
- if (srv_force_recovery >= SRV_FORCE_IGNORE_CORRUPT) {
- return(NULL);
- }
-
- if (UNIV_PAGE_SIZE != fsp_flags_get_page_size(flags)) {
- fprintf(stderr,
- "InnoDB: Error: Current page size %lu != "
- " page size on page %lu\n",
- UNIV_PAGE_SIZE, fsp_flags_get_page_size(flags));
-
- return("innodb-page-size mismatch");
- }
+ fil_space_t* space;
- if (!space_id && !flags) {
- ulint nonzero_bytes = UNIV_PAGE_SIZE;
- const byte* b = page;
+ mutex_enter(&fil_system->mutex);
- while (!*b && --nonzero_bytes) {
- b++;
- }
+ space = fil_space_get_by_id(id);
- if (!nonzero_bytes) {
- return("space header page consists of zero bytes");
+ if (space == NULL) {
+ if (!silent) {
+ ib::warn() << "Trying to access missing"
+ " tablespace " << id;
}
+ } else if (space->is_stopping()) {
+ space = NULL;
+ } else {
+ space->n_pending_ops++;
}
- if (buf_page_is_corrupted(
- false, page, fsp_flags_get_zip_size(flags), NULL)) {
- return("checksum mismatch");
- }
-
- if (page_get_space_id(page) == space_id
- && page_get_page_no(page) == 0) {
- return(NULL);
- }
+ mutex_exit(&fil_system->mutex);
- return("inconsistent data in space header");
+ return(space);
}
-/** Reads the flushed lsn, arch no, space_id and tablespace flag fields from
-the first page of a first data file at database startup.
-@param[in] data_file open data file
-@param[in] one_read_only true if first datafile is already
- read
-@param[out] flags FSP_SPACE_FLAGS
-@param[out] space_id tablepspace ID
-@param[out] min_arch_log_no min of archived log numbers in
- data files
-@param[out] max_arch_log_no max of archived log numbers in
- data files
-@param[out] flushed_lsn flushed lsn value
-@param[out] crypt_data encryption crypt data
-@param[in] check_first_page true if first page contents
- should be checked
-@return NULL on success, or if innodb_force_recovery is set
-@retval pointer to an error message string */
-UNIV_INTERN
-const char*
-fil_read_first_page(
- pfs_os_file_t data_file,
- ibool one_read_already,
- ulint* flags,
- ulint* space_id,
-#ifdef UNIV_LOG_ARCHIVE
- ulint* min_arch_log_no,
- ulint* max_arch_log_no,
-#endif /* UNIV_LOG_ARCHIVE */
- lsn_t* flushed_lsn,
- fil_space_crypt_t** crypt_data,
- bool check_first_page)
+/** Release a tablespace acquired with fil_space_acquire().
+@param[in,out] space tablespace to release */
+void
+fil_space_release(fil_space_t* space)
{
- byte* buf;
- byte* page;
- const char* check_msg = NULL;
- fil_space_crypt_t* cdata;
-
- buf = static_cast<byte*>(ut_malloc(2 * UNIV_PAGE_SIZE));
-
- /* Align the memory for a possible read from a raw device */
-
- page = static_cast<byte*>(ut_align(buf, UNIV_PAGE_SIZE));
-
- os_file_read(data_file, page, 0, UNIV_PAGE_SIZE);
-
- srv_stats.page0_read.add(1);
-
- /* The FSP_HEADER on page 0 is only valid for the first file
- in a tablespace. So if this is not the first datafile, leave
- *flags and *space_id as they were read from the first file and
- do not validate the first page. */
- if (!one_read_already) {
- /* Undo tablespace does not contain correct FSP_HEADER,
- and actually we really need to read only crypt_data. */
- if (check_first_page) {
- *space_id = fsp_header_get_space_id(page);
- *flags = fsp_header_get_flags(page);
-
- if (flushed_lsn) {
- *flushed_lsn = mach_read_from_8(page +
- FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION);
- }
-
- if (!fsp_flags_is_valid(*flags, *space_id)) {
- ulint cflags = fsp_flags_convert_from_101(*flags);
- if (cflags == ULINT_UNDEFINED) {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Invalid flags 0x%x in tablespace %u",
- unsigned(*flags), unsigned(*space_id));
- return "invalid tablespace flags";
- } else {
- *flags = cflags;
- }
- }
-
- check_msg = fil_check_first_page(page, *space_id, *flags);
- }
-
- /* Possible encryption crypt data is also stored only to first page
- of the first datafile. */
-
- const ulint offset = fsp_header_get_crypt_offset(
- fsp_flags_get_zip_size(*flags));
-
- cdata = fil_space_read_crypt_data(*space_id, page, offset);
-
- if (crypt_data) {
- *crypt_data = cdata;
- }
-
- /* If file space is encrypted we need to have at least some
- encryption service available where to get keys */
- if (cdata && cdata->should_encrypt()) {
-
- if (!encryption_key_id_exists(cdata->key_id)) {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Tablespace id " ULINTPF
- " is encrypted but encryption service"
- " or used key_id %u is not available. "
- "Can't continue opening tablespace.",
- *space_id, cdata->key_id);
-
- return ("table encrypted but encryption service not available.");
- }
- }
- }
-
- ut_free(buf);
+ mutex_enter(&fil_system->mutex);
+ ut_ad(space->magic_n == FIL_SPACE_MAGIC_N);
+ ut_ad(space->n_pending_ops > 0);
+ space->n_pending_ops--;
+ mutex_exit(&fil_system->mutex);
+}
- if (check_msg) {
- return(check_msg);
- }
+/** Acquire a tablespace for reading or writing a block,
+when it could be dropped concurrently.
+@param[in] id tablespace ID
+@return the tablespace
+@retval NULL if missing */
+fil_space_t*
+fil_space_acquire_for_io(ulint id)
+{
+ mutex_enter(&fil_system->mutex);
- if (!one_read_already) {
-#ifdef UNIV_LOG_ARCHIVE
- *min_arch_log_no = arch_log_no;
- *max_arch_log_no = arch_log_no;
-#endif /* UNIV_LOG_ARCHIVE */
- return(NULL);
- }
+ fil_space_t* space = fil_space_get_by_id(id);
-#ifdef UNIV_LOG_ARCHIVE
- if (*min_arch_log_no > arch_log_no) {
- *min_arch_log_no = arch_log_no;
+ if (space) {
+ space->n_pending_ios++;
}
- if (*max_arch_log_no < arch_log_no) {
- *max_arch_log_no = arch_log_no;
- }
-#endif /* UNIV_LOG_ARCHIVE */
+ mutex_exit(&fil_system->mutex);
- return(NULL);
+ return(space);
}
-/*================ SINGLE-TABLE TABLESPACES ==========================*/
+/** Release a tablespace acquired with fil_space_acquire_for_io().
+@param[in,out] space tablespace to release */
+void
+fil_space_release_for_io(fil_space_t* space)
+{
+ mutex_enter(&fil_system->mutex);
+ ut_ad(space->magic_n == FIL_SPACE_MAGIC_N);
+ ut_ad(space->n_pending_ios > 0);
+ space->n_pending_ios--;
+ mutex_exit(&fil_system->mutex);
+}
/********************************************************//**
Creates the database directory for a table if it does not exist yet. */
-static
void
fil_create_directory_for_tablename(
/*===============================*/
@@ -2274,303 +2110,525 @@ fil_create_directory_for_tablename(
len = strlen(fil_path_to_mysql_datadir);
namend = strchr(name, '/');
ut_a(namend);
- path = static_cast<char*>(mem_alloc(len + (namend - name) + 2));
+ path = static_cast<char*>(ut_malloc_nokey(len + (namend - name) + 2));
memcpy(path, fil_path_to_mysql_datadir, len);
path[len] = '/';
memcpy(path + len + 1, name, namend - name);
path[len + (namend - name) + 1] = 0;
- srv_normalize_path_for_win(path);
+ os_normalize_path(path);
+
+ bool success = os_file_create_directory(path, false);
+ ut_a(success);
- ut_a(os_file_create_directory(path, FALSE));
- mem_free(path);
+ ut_free(path);
}
-#ifndef UNIV_HOTBACKUP
-/********************************************************//**
-Writes a log record about an .ibd file create/rename/delete. */
+/** Write a log record about an operation on a tablespace file.
+@param[in] type MLOG_FILE_NAME or MLOG_FILE_DELETE
+or MLOG_FILE_CREATE2 or MLOG_FILE_RENAME2
+@param[in] space_id tablespace identifier
+@param[in] first_page_no first page number in the file
+@param[in] path file path
+@param[in] new_path if type is MLOG_FILE_RENAME2, the new name
+@param[in] flags if type is MLOG_FILE_CREATE2, the space flags
+@param[in,out] mtr mini-transaction */
static
void
fil_op_write_log(
-/*=============*/
- ulint type, /*!< in: MLOG_FILE_CREATE,
- MLOG_FILE_CREATE2,
- MLOG_FILE_DELETE, or
- MLOG_FILE_RENAME */
- ulint space_id, /*!< in: space id */
- ulint log_flags, /*!< in: redo log flags (stored
- in the page number field) */
- ulint flags, /*!< in: compressed page size
- and file format
- if type==MLOG_FILE_CREATE2, or 0 */
- const char* name, /*!< in: table name in the familiar
- 'databasename/tablename' format, or
- the file path in the case of
- MLOG_FILE_DELETE */
- const char* new_name, /*!< in: if type is MLOG_FILE_RENAME,
- the new table name in the
- 'databasename/tablename' format */
- mtr_t* mtr) /*!< in: mini-transaction handle */
+ mlog_id_t type,
+ ulint space_id,
+ ulint first_page_no,
+ const char* path,
+ const char* new_path,
+ ulint flags,
+ mtr_t* mtr)
{
- byte* log_ptr;
- ulint len;
+ byte* log_ptr;
+ ulint len;
- log_ptr = mlog_open(mtr, 11 + 2 + 1);
+ ut_ad(first_page_no == 0 || type == MLOG_FILE_CREATE2);
ut_ad(fsp_flags_is_valid(flags, space_id));
- if (!log_ptr) {
+ /* fil_name_parse() requires that there be at least one path
+ separator and that the file path end with ".ibd". */
+ ut_ad(strchr(path, OS_PATH_SEPARATOR) != NULL);
+ ut_ad(first_page_no /* trimming an undo tablespace */
+ || !strcmp(&path[strlen(path) - strlen(DOT_IBD)], DOT_IBD));
+
+ log_ptr = mlog_open(mtr, 11 + 4 + 2 + 1);
+
+ if (log_ptr == NULL) {
/* Logging in mtr is switched off during crash recovery:
in that case mlog_open returns NULL */
return;
}
- log_ptr = mlog_write_initial_log_record_for_file_op(
- type, space_id, log_flags, log_ptr, mtr);
+ log_ptr = mlog_write_initial_log_record_low(
+ type, space_id, first_page_no, log_ptr, mtr);
+
if (type == MLOG_FILE_CREATE2) {
mach_write_to_4(log_ptr, flags);
log_ptr += 4;
}
+
/* Let us store the strings as null-terminated for easier readability
and handling */
- len = strlen(name) + 1;
+ len = strlen(path) + 1;
mach_write_to_2(log_ptr, len);
log_ptr += 2;
mlog_close(mtr, log_ptr);
- mlog_catenate_string(mtr, (byte*) name, len);
+ mlog_catenate_string(
+ mtr, reinterpret_cast<const byte*>(path), len);
- if (type == MLOG_FILE_RENAME) {
- len = strlen(new_name) + 1;
+ switch (type) {
+ case MLOG_FILE_RENAME2:
+ ut_ad(strchr(new_path, OS_PATH_SEPARATOR) != NULL);
+ len = strlen(new_path) + 1;
log_ptr = mlog_open(mtr, 2 + len);
ut_a(log_ptr);
mach_write_to_2(log_ptr, len);
log_ptr += 2;
mlog_close(mtr, log_ptr);
- mlog_catenate_string(mtr, (byte*) new_name, len);
+ mlog_catenate_string(
+ mtr, reinterpret_cast<const byte*>(new_path), len);
+ break;
+ case MLOG_FILE_NAME:
+ case MLOG_FILE_DELETE:
+ case MLOG_FILE_CREATE2:
+ break;
+ default:
+ ut_ad(0);
}
}
-#endif
-/*******************************************************************//**
-Parses the body of a log record written about an .ibd file operation. That is,
-the log record part after the standard (type, space id, page no) header of the
-log record.
+/** Write redo log for renaming a file.
+@param[in] space_id tablespace id
+@param[in] first_page_no first page number in the file
+@param[in] old_name tablespace file name
+@param[in] new_name tablespace file name after renaming
+@param[in,out] mtr mini-transaction */
+static
+void
+fil_name_write_rename_low(
+ ulint space_id,
+ ulint first_page_no,
+ const char* old_name,
+ const char* new_name,
+ mtr_t* mtr)
+{
+ ut_ad(!is_predefined_tablespace(space_id));
-If desired, also replays the delete or rename operation if the .ibd file
-exists and the space id in it matches. Replays the create operation if a file
-at that path does not exist yet. If the database directory for the file to be
-created does not exist, then we create the directory, too.
+ fil_op_write_log(
+ MLOG_FILE_RENAME2,
+ space_id, first_page_no, old_name, new_name, 0, mtr);
+}
-Note that mysqlbackup --apply-log sets fil_path_to_mysql_datadir to point to
-the datadir that we should use in replaying the file operations.
+/** Write redo log for renaming a file.
+@param[in] space_id tablespace id
+@param[in] old_name tablespace file name
+@param[in] new_name tablespace file name after renaming */
+void
+fil_name_write_rename(
+ ulint space_id,
+ const char* old_name,
+ const char* new_name)
+{
+ mtr_t mtr;
+ mtr.start();
+ fil_name_write_rename_low(space_id, 0, old_name, new_name, &mtr);
+ mtr.commit();
+ log_write_up_to(mtr.commit_lsn(), true);
+}
-InnoDB recovery does not replay these fully since it always sets the space id
-to zero. But mysqlbackup does replay them. TODO: If remote tablespaces are
-used, mysqlbackup will only create tables in the default directory since
-MLOG_FILE_CREATE and MLOG_FILE_CREATE2 only know the tablename, not the path.
+/** Write MLOG_FILE_NAME for a file.
+@param[in] space_id tablespace id
+@param[in] first_page_no first page number in the file
+@param[in] name tablespace file name
+@param[in,out] mtr mini-transaction */
+static
+void
+fil_name_write(
+ ulint space_id,
+ ulint first_page_no,
+ const char* name,
+ mtr_t* mtr)
+{
+ fil_op_write_log(
+ MLOG_FILE_NAME, space_id, first_page_no, name, NULL, 0, mtr);
+}
+/** Write MLOG_FILE_NAME for a file.
+@param[in] space tablespace
+@param[in] first_page_no first page number in the file
+@param[in] file tablespace file
+@param[in,out] mtr mini-transaction */
+static
+void
+fil_name_write(
+ const fil_space_t* space,
+ ulint first_page_no,
+ const fil_node_t* file,
+ mtr_t* mtr)
+{
+ fil_name_write(space->id, first_page_no, file->name, mtr);
+}
-@return end of log record, or NULL if the record was not completely
-contained between ptr and end_ptr */
-UNIV_INTERN
-byte*
-fil_op_log_parse_or_replay(
-/*=======================*/
- byte* ptr, /*!< in: buffer containing the log record body,
- or an initial segment of it, if the record does
- not fir completely between ptr and end_ptr */
- byte* end_ptr, /*!< in: buffer end */
- ulint type, /*!< in: the type of this log record */
- ulint space_id, /*!< in: the space id of the tablespace in
- question, or 0 if the log record should
- only be parsed but not replayed */
- ulint log_flags) /*!< in: redo log flags
- (stored in the page number parameter) */
+/********************************************************//**
+Recreates table indexes by applying
+TRUNCATE log record during recovery.
+@return DB_SUCCESS or error code */
+dberr_t
+fil_recreate_table(
+/*===============*/
+ ulint space_id, /*!< in: space id */
+ ulint format_flags, /*!< in: page format */
+ ulint flags, /*!< in: tablespace flags */
+ const char* name, /*!< in: table name */
+ truncate_t& truncate) /*!< in: The information of
+ TRUNCATE log record */
{
- ulint name_len;
- ulint new_name_len;
- const char* name;
- const char* new_name = NULL;
- ulint flags = 0;
+ dberr_t err = DB_SUCCESS;
+ bool found;
+ const page_size_t page_size(fil_space_get_page_size(space_id,
+ &found));
+
+ if (!found) {
+ ib::info() << "Missing .ibd file for table '" << name
+ << "' with tablespace " << space_id;
+ return(DB_ERROR);
+ }
- if (type == MLOG_FILE_CREATE2) {
- if (end_ptr < ptr + 4) {
+ ut_ad(!truncate_t::s_fix_up_active);
+ truncate_t::s_fix_up_active = true;
- return(NULL);
- }
+ /* Step-1: Scan for active indexes from REDO logs and drop
+ all the indexes using low level function that take root_page_no
+ and space-id. */
+ truncate.drop_indexes(space_id);
- flags = mach_read_from_4(ptr);
- ptr += 4;
+ /* Step-2: Scan for active indexes and re-create them. */
+ err = truncate.create_indexes(
+ name, space_id, page_size, flags, format_flags);
+ if (err != DB_SUCCESS) {
+ ib::info() << "Failed to create indexes for the table '"
+ << name << "' with tablespace " << space_id
+ << " while fixing up truncate action";
+ return(err);
}
- if (end_ptr < ptr + 2) {
+ truncate_t::s_fix_up_active = false;
- return(NULL);
- }
+ return(err);
+}
+
+/********************************************************//**
+Recreates the tablespace and table indexes by applying
+TRUNCATE log record during recovery.
+@return DB_SUCCESS or error code */
+dberr_t
+fil_recreate_tablespace(
+/*====================*/
+ ulint space_id, /*!< in: space id */
+ ulint format_flags, /*!< in: page format */
+ ulint flags, /*!< in: tablespace flags */
+ const char* name, /*!< in: table name */
+ truncate_t& truncate, /*!< in: The information of
+ TRUNCATE log record */
+ lsn_t recv_lsn) /*!< in: the end LSN of
+ the log record */
+{
+ dberr_t err = DB_SUCCESS;
+ mtr_t mtr;
+
+ ut_ad(!truncate_t::s_fix_up_active);
+ truncate_t::s_fix_up_active = true;
- name_len = mach_read_from_2(ptr);
+ /* Step-1: Invalidate buffer pool pages belonging to the tablespace
+ to re-create. */
+ buf_LRU_flush_or_remove_pages(space_id, NULL);
- ptr += 2;
+ /* Remove all insert buffer entries for the tablespace */
+ ibuf_delete_for_discarded_space(space_id);
- if (end_ptr < ptr + name_len) {
+ /* Step-2: truncate tablespace (reset the size back to original or
+ default size) of tablespace. */
+ err = truncate.truncate(
+ space_id, truncate.get_dir_path(), name, flags, true);
- return(NULL);
+ if (err != DB_SUCCESS) {
+
+ ib::info() << "Cannot access .ibd file for table '"
+ << name << "' with tablespace " << space_id
+ << " while truncating";
+ return(DB_ERROR);
}
- name = (const char*) ptr;
+ bool found;
+ const page_size_t& page_size =
+ fil_space_get_page_size(space_id, &found);
+
+ if (!found) {
+ ib::info() << "Missing .ibd file for table '" << name
+ << "' with tablespace " << space_id;
+ return(DB_ERROR);
+ }
- ptr += name_len;
+ /* Step-3: Initialize Header. */
+ if (page_size.is_compressed()) {
+ byte* buf;
+ page_t* page;
- if (type == MLOG_FILE_RENAME) {
- if (end_ptr < ptr + 2) {
+ buf = static_cast<byte*>(ut_zalloc_nokey(3 * UNIV_PAGE_SIZE));
- return(NULL);
- }
+ /* Align the memory for file i/o */
+ page = static_cast<byte*>(ut_align(buf, UNIV_PAGE_SIZE));
- new_name_len = mach_read_from_2(ptr);
+ flags |= FSP_FLAGS_PAGE_SSIZE();
- ptr += 2;
+ fsp_header_init_fields(page, space_id, flags);
- if (end_ptr < ptr + new_name_len) {
+ mach_write_to_4(
+ page + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID, space_id);
- return(NULL);
- }
+ page_zip_des_t page_zip;
+ page_zip_set_size(&page_zip, page_size.physical());
+ page_zip.data = page + UNIV_PAGE_SIZE;
- new_name = (const char*) ptr;
+#ifdef UNIV_DEBUG
+ page_zip.m_start =
+#endif /* UNIV_DEBUG */
+ page_zip.m_end = page_zip.m_nonempty = page_zip.n_blobs = 0;
+ buf_flush_init_for_writing(NULL, page, &page_zip, 0);
- ptr += new_name_len;
- }
+ err = fil_write(page_id_t(space_id, 0), page_size, 0,
+ page_size.physical(), page_zip.data);
- /* We managed to parse a full log record body */
- /*
- printf("Parsed log rec of type %lu space %lu\n"
- "name %s\n", type, space_id, name);
+ ut_free(buf);
- if (type == MLOG_FILE_RENAME) {
- printf("new name %s\n", new_name);
+ if (err != DB_SUCCESS) {
+ ib::info() << "Failed to clean header of the"
+ " table '" << name << "' with tablespace "
+ << space_id;
+ return(err);
+ }
}
- */
- if (!space_id) {
- return(ptr);
+
+ mtr_start(&mtr);
+ /* Don't log the operation while fixing up table truncate operation
+ as crash at this level can still be sustained with recovery restarting
+ from last checkpoint. */
+ mtr_set_log_mode(&mtr, MTR_LOG_NO_REDO);
+
+ /* Initialize the first extent descriptor page and
+ the second bitmap page for the new tablespace. */
+ fsp_header_init(space_id, FIL_IBD_FILE_INITIAL_SIZE, &mtr);
+ mtr_commit(&mtr);
+
+ /* Step-4: Re-Create Indexes to newly re-created tablespace.
+ This operation will restore tablespace back to what it was
+ when it was created during CREATE TABLE. */
+ err = truncate.create_indexes(
+ name, space_id, page_size, flags, format_flags);
+ if (err != DB_SUCCESS) {
+ return(err);
}
- /* Let us try to perform the file operation, if sensible. Note that
- mysqlbackup has at this stage already read in all space id info to the
- fil0fil.cc data structures.
+ /* Step-5: Write new created pages into ibd file handle and
+ flush it to disk for the tablespace, in case i/o-handler thread
+ deletes the bitmap page from buffer. */
+ mtr_start(&mtr);
- NOTE that our algorithm is not guaranteed to work correctly if there
- were renames of tables during the backup. See mysqlbackup code for more
- on the problem. */
+ mtr_set_log_mode(&mtr, MTR_LOG_NO_REDO);
- switch (type) {
- case MLOG_FILE_DELETE:
- if (fil_tablespace_exists_in_mem(space_id)) {
- dberr_t err = fil_delete_tablespace(space_id);
- ut_a(err == DB_SUCCESS);
- }
+ mutex_enter(&fil_system->mutex);
- break;
+ fil_space_t* space = fil_space_get_by_id(space_id);
- case MLOG_FILE_RENAME:
- /* In order to replay the rename, the following must hold:
- * The new name is not already used.
- * A tablespace is open in memory with the old name.
- * The space ID for that tablepace matches this log entry.
- This will prevent unintended renames during recovery. */
-
- if (fil_get_space_id_for_table(new_name) == ULINT_UNDEFINED
- && space_id == fil_get_space_id_for_table(name)) {
- /* Create the database directory for the new name, if
- it does not exist yet */
- fil_create_directory_for_tablename(new_name);
-
- if (!fil_rename_tablespace(name, space_id,
- new_name, NULL)) {
- ut_error;
- }
- }
+ mutex_exit(&fil_system->mutex);
- break;
+ fil_node_t* node = UT_LIST_GET_FIRST(space->chain);
- case MLOG_FILE_CREATE:
- case MLOG_FILE_CREATE2:
- if (fil_tablespace_exists_in_mem(space_id)) {
- /* Do nothing */
- } else if (fil_get_space_id_for_table(name)
- != ULINT_UNDEFINED) {
- /* Do nothing */
- } else if (log_flags & MLOG_FILE_FLAG_TEMP) {
- /* Temporary table, do nothing */
+ for (ulint page_no = 0; page_no < node->size; ++page_no) {
+
+ const page_id_t cur_page_id(space_id, page_no);
+
+ buf_block_t* block = buf_page_get(cur_page_id, page_size,
+ RW_X_LATCH, &mtr);
+
+ byte* page = buf_block_get_frame(block);
+
+ if (!FSP_FLAGS_GET_ZIP_SSIZE(flags)) {
+ ut_ad(!page_size.is_compressed());
+
+ buf_flush_init_for_writing(
+ block, page, NULL, recv_lsn);
+
+ err = fil_write(cur_page_id, page_size, 0,
+ page_size.physical(), page);
} else {
- /* Create the database directory for name, if it does
- not exist yet */
- fil_create_directory_for_tablename(name);
-
- if (fil_create_new_single_table_tablespace(
- space_id, name, NULL, flags,
- DICT_TF2_USE_TABLESPACE,
- FIL_IBD_FILE_INITIAL_SIZE,
- FIL_ENCRYPTION_DEFAULT,
- FIL_DEFAULT_ENCRYPTION_KEY) != DB_SUCCESS) {
- ut_error;
+ ut_ad(page_size.is_compressed());
+
+ /* We don't want to rewrite empty pages. */
+
+ if (fil_page_get_type(page) != 0) {
+ page_zip_des_t* page_zip =
+ buf_block_get_page_zip(block);
+
+ buf_flush_init_for_writing(
+ block, page, page_zip, recv_lsn);
+
+ err = fil_write(cur_page_id, page_size, 0,
+ page_size.physical(),
+ page_zip->data);
+ } else {
+#ifdef UNIV_DEBUG
+ const byte* data = block->page.zip.data;
+
+ /* Make sure that the page is really empty */
+ for (ulint i = 0;
+ i < page_size.physical();
+ ++i) {
+
+ ut_a(data[i] == 0);
+ }
+#endif /* UNIV_DEBUG */
}
}
- break;
-
- default:
- ut_error;
+ if (err != DB_SUCCESS) {
+ ib::info() << "Cannot write page " << page_no
+ << " into a .ibd file for table '"
+ << name << "' with tablespace " << space_id;
+ }
}
- return(ptr);
+ mtr_commit(&mtr);
+
+ truncate_t::s_fix_up_active = false;
+
+ return(err);
}
-/*******************************************************************//**
-Allocates a file name for the EXPORT/IMPORT config file name. The
-string must be freed by caller with mem_free().
-@return own: file name */
-static
-char*
-fil_make_cfg_name(
-/*==============*/
- const char* filepath) /*!< in: .ibd file name */
+/** Replay a file rename operation if possible.
+@param[in] space_id tablespace identifier
+@param[in] first_page_no first page number in the file
+@param[in] name old file name
+@param[in] new_name new file name
+@return whether the operation was successfully applied
+(the name did not exist, or new_name did not exist and
+name was successfully renamed to new_name) */
+bool
+fil_op_replay_rename(
+ ulint space_id,
+ ulint first_page_no,
+ const char* name,
+ const char* new_name)
{
- char* cfg_name;
+ ut_ad(first_page_no == 0);
+
+ /* In order to replay the rename, the following must hold:
+ * The new name is not already used.
+ * A tablespace exists with the old name.
+ * The space ID for that tablepace matches this log entry.
+ This will prevent unintended renames during recovery. */
+ fil_space_t* space = fil_space_get(space_id);
+
+ if (space == NULL) {
+ return(true);
+ }
+
+ const bool name_match
+ = strcmp(name, UT_LIST_GET_FIRST(space->chain)->name) == 0;
+
+ if (!name_match) {
+ return(true);
+ }
- /* Create a temporary file path by replacing the .ibd suffix
- with .cfg. */
+ /* Create the database directory for the new name, if
+ it does not exist yet */
- ut_ad(strlen(filepath) > 4);
+ const char* namend = strrchr(new_name, OS_PATH_SEPARATOR);
+ ut_a(namend != NULL);
- cfg_name = mem_strdup(filepath);
- ut_snprintf(cfg_name + strlen(cfg_name) - 3, 4, "cfg");
- return(cfg_name);
+ char* dir = static_cast<char*>(
+ ut_malloc_nokey(namend - new_name + 1));
+
+ memcpy(dir, new_name, namend - new_name);
+ dir[namend - new_name] = '\0';
+
+ bool success = os_file_create_directory(dir, false);
+ ut_a(success);
+
+ ulint dirlen = 0;
+
+ if (const char* dirend = strrchr(dir, OS_PATH_SEPARATOR)) {
+ dirlen = dirend - dir + 1;
+ }
+
+ ut_free(dir);
+
+ /* New path must not exist. */
+ dberr_t err = fil_rename_tablespace_check(
+ space_id, name, new_name, false);
+ if (err != DB_SUCCESS) {
+ ib::error() << " Cannot replay file rename."
+ " Remove either file and try again.";
+ return(false);
+ }
+
+ char* new_table = mem_strdupl(
+ new_name + dirlen,
+ strlen(new_name + dirlen)
+ - 4 /* remove ".ibd" */);
+
+ ut_ad(new_table[namend - new_name - dirlen]
+ == OS_PATH_SEPARATOR);
+#if OS_PATH_SEPARATOR != '/'
+ new_table[namend - new_name - dirlen] = '/';
+#endif
+
+ if (!fil_rename_tablespace(
+ space_id, name, new_table, new_name)) {
+ ut_error;
+ }
+
+ ut_free(new_table);
+ return(true);
}
-/*******************************************************************//**
-Check for change buffer merges.
-@return 0 if no merges else count + 1. */
+/** File operations for tablespace */
+enum fil_operation_t {
+ FIL_OPERATION_DELETE, /*!< delete a single-table tablespace */
+ FIL_OPERATION_CLOSE, /*!< close a single-table tablespace */
+ FIL_OPERATION_TRUNCATE /*!< truncate an undo tablespace */
+};
+
+/** Check for pending operations.
+@param[in] space tablespace
+@param[in] count number of attempts so far
+@return 0 if no operations else count + 1. */
static
ulint
-fil_ibuf_check_pending_ops(
-/*=======================*/
- fil_space_t* space, /*!< in/out: Tablespace to check */
- ulint count) /*!< in: number of attempts so far */
+fil_check_pending_ops(const fil_space_t* space, ulint count)
{
ut_ad(mutex_own(&fil_system->mutex));
- if (space != 0 && space->n_pending_ops != 0) {
+ if (space == NULL) {
+ return 0;
+ }
+
+ if (ulint n_pending_ops = space->n_pending_ops) {
if (count > 5000) {
- ib_logf(IB_LOG_LEVEL_WARN,
- "Trying to close/delete tablespace "
- "'%s' but there are %lu pending change "
- "buffer merges on it.",
- space->name,
- (ulong) space->n_pending_ops);
+ ib::warn() << "Trying to close/delete/truncate"
+ " tablespace '" << space->name
+ << "' but there are " << n_pending_ops
+ << " pending operations on it.";
}
return(count + 1);
@@ -2586,13 +2644,23 @@ static
ulint
fil_check_pending_io(
/*=================*/
- fil_space_t* space, /*!< in/out: Tablespace to check */
- fil_node_t** node, /*!< out: Node in space list */
- ulint count) /*!< in: number of attempts so far */
+ fil_operation_t operation, /*!< in: File operation */
+ fil_space_t* space, /*!< in/out: Tablespace to check */
+ fil_node_t** node, /*!< out: Node in space list */
+ ulint count) /*!< in: number of attempts so far */
{
ut_ad(mutex_own(&fil_system->mutex));
ut_a(space->n_pending_ops == 0);
+ switch (operation) {
+ case FIL_OPERATION_DELETE:
+ case FIL_OPERATION_CLOSE:
+ break;
+ case FIL_OPERATION_TRUNCATE:
+ space->is_being_truncated = true;
+ break;
+ }
+
/* The following code must change when InnoDB supports
multiple datafiles per tablespace. */
ut_a(UT_LIST_GET_LEN(space->chain) == 1);
@@ -2604,13 +2672,12 @@ fil_check_pending_io(
ut_a(!(*node)->being_extended);
if (count > 1000) {
- ib_logf(IB_LOG_LEVEL_WARN,
- "Trying to close/delete tablespace '%s' "
- "but there are %lu flushes "
- " and %lu pending i/o's on it.",
- space->name,
- (ulong) space->n_pending_flushes,
- (ulong) (*node)->n_pending);
+ ib::warn() << "Trying to delete/close/truncate"
+ " tablespace '" << space->name
+ << "' but there are "
+ << space->n_pending_flushes
+ << " flushes and " << (*node)->n_pending
+ << " pending i/o's on it.";
}
return(count + 1);
@@ -2626,13 +2693,15 @@ static
dberr_t
fil_check_pending_operations(
/*=========================*/
- ulint id, /*!< in: space id */
- fil_space_t** space, /*!< out: tablespace instance in memory */
- char** path) /*!< out/own: tablespace path */
+ ulint id, /*!< in: space id */
+ fil_operation_t operation, /*!< in: File operation */
+ fil_space_t** space, /*!< out: tablespace instance
+ in memory */
+ char** path) /*!< out/own: tablespace path */
{
ulint count = 0;
- ut_a(id != TRX_SYS_SPACE);
+ ut_a(!is_system_tablespace(id));
ut_ad(space);
*space = 0;
@@ -2642,30 +2711,22 @@ fil_check_pending_operations(
if (sp) {
sp->stop_new_ops = true;
- /* space could be freed by other threads as soon
- as n_pending_ops reaches 0, thus increment pending
- ops here. */
- sp->n_pending_ops++;
- }
-
- mutex_exit(&fil_system->mutex);
-
- /* Wait for crypt threads to stop accessing space */
- if (sp) {
- fil_space_crypt_close_tablespace(sp);
- /* We have "acquired" this space and must
- free it now as below we compare n_pending_ops. */
- fil_space_release(sp);
+ if (sp->crypt_data) {
+ sp->n_pending_ops++;
+ mutex_exit(&fil_system->mutex);
+ fil_space_crypt_close_tablespace(sp);
+ mutex_enter(&fil_system->mutex);
+ ut_ad(sp->n_pending_ops > 0);
+ sp->n_pending_ops--;
+ }
}
- /* Check for pending change buffer merges. */
+ /* Check for pending operations. */
do {
- mutex_enter(&fil_system->mutex);
-
sp = fil_space_get_by_id(id);
- count = fil_ibuf_check_pending_ops(sp, count);
+ count = fil_check_pending_ops(sp, count);
mutex_exit(&fil_system->mutex);
@@ -2673,15 +2734,12 @@ fil_check_pending_operations(
os_thread_sleep(20000);
}
+ mutex_enter(&fil_system->mutex);
} while (count > 0);
/* Check for pending IO. */
- *path = 0;
-
- do {
- mutex_enter(&fil_system->mutex);
-
+ for (;;) {
sp = fil_space_get_by_id(id);
if (sp == NULL) {
@@ -2691,19 +2749,21 @@ fil_check_pending_operations(
fil_node_t* node;
- count = fil_check_pending_io(sp, &node, count);
+ count = fil_check_pending_io(operation, sp, &node, count);
- if (count == 0) {
+ if (count == 0 && path) {
*path = mem_strdup(node->name);
}
mutex_exit(&fil_system->mutex);
- if (count > 0) {
- os_thread_sleep(20000);
+ if (count == 0) {
+ break;
}
- } while (count > 0);
+ os_thread_sleep(20000);
+ mutex_enter(&fil_system->mutex);
+ }
ut_ad(sp);
@@ -2714,8 +2774,7 @@ fil_check_pending_operations(
/*******************************************************************//**
Closes a single-table tablespace. The tablespace must be cached in the
memory cache. Free all pages used by the tablespace.
-@return DB_SUCCESS or error */
-UNIV_INTERN
+@return DB_SUCCESS or error */
dberr_t
fil_close_tablespace(
/*=================*/
@@ -2724,10 +2783,12 @@ fil_close_tablespace(
{
char* path = 0;
fil_space_t* space = 0;
+ dberr_t err;
- ut_a(id != TRX_SYS_SPACE);
+ ut_a(!is_system_tablespace(id));
- dberr_t err = fil_check_pending_operations(id, &space, &path);
+ err = fil_check_pending_operations(id, FIL_OPERATION_CLOSE,
+ &space, &path);
if (err != DB_SUCCESS) {
return(err);
@@ -2738,22 +2799,22 @@ fil_close_tablespace(
rw_lock_x_lock(&space->latch);
-#ifndef UNIV_HOTBACKUP
/* Invalidate in the buffer pool all pages belonging to the
- tablespace. Since we have set space->stop_new_ops = TRUE, readahead
+ tablespace. Since we have set space->stop_new_ops = true, readahead
or ibuf merge can no longer read more pages of this tablespace to the
buffer pool. Thus we can clean the tablespace out of the buffer pool
completely and permanently. The flag stop_new_ops also prevents
fil_flush() from being applied to this tablespace. */
- buf_LRU_flush_or_remove_pages(id, trx);
-#endif
- mutex_enter(&fil_system->mutex);
+ {
+ FlushObserver observer(id, trx, NULL);
+ buf_LRU_flush_or_remove_pages(id, &observer);
+ }
/* If the free is successful, the X lock will be released before
the space memory data structure is freed. */
- if (!fil_space_free_and_mutex_exit(id, TRUE)) {
+ if (!fil_space_free(id, true)) {
rw_lock_x_unlock(&space->latch);
err = DB_TABLESPACE_NOT_FOUND;
} else {
@@ -2763,12 +2824,13 @@ fil_close_tablespace(
/* If it is a delete then also delete any generated files, otherwise
when we drop the database the remove directory will fail. */
- char* cfg_name = fil_make_cfg_name(path);
-
- os_file_delete_if_exists(innodb_file_data_key, cfg_name);
+ char* cfg_name = fil_make_filepath(path, NULL, CFG, false);
+ if (cfg_name != NULL) {
+ os_file_delete_if_exists(innodb_data_file_key, cfg_name, NULL);
+ ut_free(cfg_name);
+ }
- mem_free(path);
- mem_free(cfg_name);
+ ut_free(path);
return(err);
}
@@ -2781,7 +2843,7 @@ but only by InnoDB table locks, which may be broken by
lock_remove_all_on_table().)
@param[in] table persistent table
checked @return whether the table is accessible */
-UNIV_INTERN bool fil_table_accessible(const dict_table_t* table)
+bool fil_table_accessible(const dict_table_t* table)
{
if (UNIV_UNLIKELY(!table->is_readable() || table->corrupted)) {
return(false);
@@ -2790,6 +2852,7 @@ UNIV_INTERN bool fil_table_accessible(const dict_table_t* table)
if (fil_space_t* space = fil_space_acquire(table->space)) {
bool accessible = !space->is_stopping();
fil_space_release(space);
+ ut_ad(accessible || dict_table_is_file_per_table(table));
return(accessible);
} else {
return(false);
@@ -2798,25 +2861,24 @@ UNIV_INTERN bool fil_table_accessible(const dict_table_t* table)
/** Delete a tablespace and associated .ibd file.
@param[in] id tablespace identifier
-@param[in] drop_ahi whether to drop the adaptive hash index
+@param[in] if_exists whether to ignore missing tablespace
@return DB_SUCCESS or error */
-UNIV_INTERN
-dberr_t
-fil_delete_tablespace(ulint id, bool drop_ahi)
+dberr_t fil_delete_tablespace(ulint id, bool if_exists)
{
char* path = 0;
fil_space_t* space = 0;
- ut_a(id != TRX_SYS_SPACE);
+ ut_a(!is_system_tablespace(id));
- dberr_t err = fil_check_pending_operations(id, &space, &path);
+ dberr_t err = fil_check_pending_operations(
+ id, FIL_OPERATION_DELETE, &space, &path);
if (err != DB_SUCCESS) {
-
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Cannot delete tablespace %lu because it is not "
- "found in the tablespace memory cache.",
- (ulong) id);
+ if (!if_exists) {
+ ib::error() << "Cannot delete tablespace " << id
+ << " because it is not found"
+ " in the tablespace memory cache.";
+ }
return(err);
}
@@ -2824,19 +2886,6 @@ fil_delete_tablespace(ulint id, bool drop_ahi)
ut_a(space);
ut_a(path != 0);
- /* Important: We rely on the data dictionary mutex to ensure
- that a race is not possible here. It should serialize the tablespace
- drop/free. We acquire an X latch only to avoid a race condition
- when accessing the tablespace instance via:
-
- fsp_get_available_space_in_free_extents().
-
- There our main motivation is to reduce the contention on the
- dictionary mutex. */
-
- rw_lock_x_lock(&space->latch);
-
-#ifndef UNIV_HOTBACKUP
/* IMPORTANT: Because we have set space::stop_new_ops there
can't be any new ibuf merges, reads or flushes. We are here
because node::n_pending was zero above. However, it is still
@@ -2852,89 +2901,270 @@ fil_delete_tablespace(ulint id, bool drop_ahi)
We deal with pending write requests in the following function
where we'd minimally evict all dirty pages belonging to this
- space from the flush_list. Not that if a block is IO-fixed
+ space from the flush_list. Note that if a block is IO-fixed
we'll wait for IO to complete.
- To deal with potential read requests by checking the
- ::stop_new_ops flag in fil_io() */
+ To deal with potential read requests, we will check the
+ ::stop_new_ops flag in fil_io(). */
buf_LRU_flush_or_remove_pages(id, NULL);
-#endif /* !UNIV_HOTBACKUP */
-
/* If it is a delete then also delete any generated files, otherwise
when we drop the database the remove directory will fail. */
{
- char* cfg_name = fil_make_cfg_name(path);
- os_file_delete_if_exists(innodb_file_data_key, cfg_name);
- mem_free(cfg_name);
+ /* Before deleting the file, write a log record about
+ it, so that InnoDB crash recovery will expect the file
+ to be gone. */
+ mtr_t mtr;
+
+ mtr_start(&mtr);
+ fil_op_write_log(MLOG_FILE_DELETE, id, 0, path, NULL, 0, &mtr);
+ mtr_commit(&mtr);
+ /* Even if we got killed shortly after deleting the
+ tablespace file, the record must have already been
+ written to the redo log. */
+ log_write_up_to(mtr.commit_lsn(), true);
+
+ char* cfg_name = fil_make_filepath(path, NULL, CFG, false);
+ if (cfg_name != NULL) {
+ os_file_delete_if_exists(innodb_data_file_key, cfg_name, NULL);
+ ut_free(cfg_name);
+ }
}
/* Delete the link file pointing to the ibd file we are deleting. */
if (FSP_FLAGS_HAS_DATA_DIR(space->flags)) {
- fil_delete_link_file(space->name);
+ RemoteDatafile::delete_link_file(space->name);
}
mutex_enter(&fil_system->mutex);
/* Double check the sanity of pending ops after reacquiring
the fil_system::mutex. */
- if (fil_space_get_by_id(id)) {
+ if (const fil_space_t* s = fil_space_get_by_id(id)) {
+ ut_a(s == space);
ut_a(space->n_pending_ops == 0);
ut_a(UT_LIST_GET_LEN(space->chain) == 1);
fil_node_t* node = UT_LIST_GET_FIRST(space->chain);
ut_a(node->n_pending == 0);
- }
- if (!fil_space_free_and_mutex_exit(id, true)) {
+ fil_space_detach(space);
+ mutex_exit(&fil_system->mutex);
+
+ log_mutex_enter();
+
+ if (space->max_lsn != 0) {
+ ut_d(space->max_lsn = 0);
+ UT_LIST_REMOVE(fil_system->named_spaces, space);
+ }
+
+ log_mutex_exit();
+ fil_space_free_low(space);
+
+ if (!os_file_delete(innodb_data_file_key, path)
+ && !os_file_delete_if_exists(
+ innodb_data_file_key, path, NULL)) {
+
+ /* Note: This is because we have removed the
+ tablespace instance from the cache. */
+
+ err = DB_IO_ERROR;
+ }
+ } else {
+ mutex_exit(&fil_system->mutex);
err = DB_TABLESPACE_NOT_FOUND;
}
- if (err != DB_SUCCESS) {
- rw_lock_x_unlock(&space->latch);
- } else if (!os_file_delete(innodb_file_data_key, path)
- && !os_file_delete_if_exists(innodb_file_data_key, path)) {
+ ut_free(path);
- /* Note: This is because we have removed the
- tablespace instance from the cache. */
+ return(err);
+}
- err = DB_IO_ERROR;
+/** Prepare to truncate an undo tablespace.
+@param[in] space_id undo tablespace id
+@return the tablespace
+@retval NULL if tablespace not found */
+fil_space_t* fil_truncate_prepare(ulint space_id)
+{
+ /* Stop all I/O on the tablespace and ensure that related
+ pages are flushed to disk. */
+ fil_space_t* space;
+ if (fil_check_pending_operations(space_id, FIL_OPERATION_TRUNCATE,
+ &space, NULL) != DB_SUCCESS) {
+ return NULL;
+ }
+ ut_ad(space != NULL);
+ return space;
+}
+
+/** Write log about an undo tablespace truncate operation. */
+void fil_truncate_log(fil_space_t* space, ulint size, mtr_t* mtr)
+{
+ /* Write a MLOG_FILE_CREATE2 record with the new size, so that
+ recovery and backup will ignore any preceding redo log records
+ for writing pages that are after the new end of the tablespace. */
+ ut_ad(UT_LIST_GET_LEN(space->chain) == 1);
+ const fil_node_t* file = UT_LIST_GET_FIRST(space->chain);
+ fil_op_write_log(MLOG_FILE_CREATE2, space->id, size, file->name,
+ NULL, space->flags & ~FSP_FLAGS_MEM_MASK, mtr);
+}
+
+/** Truncate the tablespace to needed size.
+@param[in] space_id id of tablespace to truncate
+@param[in] size_in_pages truncate size.
+@return true if truncate was successful. */
+bool
+fil_truncate_tablespace(
+ ulint space_id,
+ ulint size_in_pages)
+{
+ /* Step-1: Prepare tablespace for truncate. This involves
+ stopping all the new operations + IO on that tablespace
+ and ensuring that related pages are flushed to disk. */
+ if (fil_prepare_for_truncate(space_id) != DB_SUCCESS) {
+ return(false);
}
- if (err == DB_SUCCESS) {
-#ifndef UNIV_HOTBACKUP
- /* Write a log record about the deletion of the .ibd
- file, so that mysqlbackup can replay it in the
- --apply-log phase. We use a dummy mtr and the familiar
- log write mechanism. */
- mtr_t mtr;
+ /* Step-2: Invalidate buffer pool pages belonging to the tablespace
+ to re-create. Remove all insert buffer entries for the tablespace */
+ buf_LRU_flush_or_remove_pages(space_id, NULL);
- /* When replaying the operation in mysqlbackup, do not try
- to write any log record */
- mtr_start(&mtr);
+ /* Step-3: Truncate the tablespace and accordingly update
+ the fil_space_t handler that is used to access this tablespace. */
+ mutex_enter(&fil_system->mutex);
+ fil_space_t* space = fil_space_get_by_id(space_id);
- fil_op_write_log(MLOG_FILE_DELETE, id, 0, 0, path, NULL, &mtr);
- mtr_commit(&mtr);
-#endif
- err = DB_SUCCESS;
+ /* The following code must change when InnoDB supports
+ multiple datafiles per tablespace. */
+ ut_a(UT_LIST_GET_LEN(space->chain) == 1);
+
+ fil_node_t* node = UT_LIST_GET_FIRST(space->chain);
+
+ ut_ad(node->is_open());
+
+ space->size = node->size = size_in_pages;
+
+ bool success = os_file_truncate(node->name, node->handle, 0);
+ if (success) {
+
+ os_offset_t size = os_offset_t(size_in_pages) * UNIV_PAGE_SIZE;
+
+ success = os_file_set_size(
+ node->name, node->handle, size,
+ FSP_FLAGS_HAS_PAGE_COMPRESSION(space->flags));
+
+ if (success) {
+ space->stop_new_ops = false;
+ space->is_being_truncated = false;
+ }
}
- mem_free(path);
+ mutex_exit(&fil_system->mutex);
- return(err);
+ return(success);
}
/*******************************************************************//**
-Returns TRUE if a single-table tablespace is being deleted.
-@return TRUE if being deleted */
-UNIV_INTERN
-ibool
-fil_tablespace_is_being_deleted(
-/*============================*/
- ulint id) /*!< in: space id */
+Prepare for truncating a single-table tablespace.
+1) Check pending operations on a tablespace;
+2) Remove all insert buffer entries for the tablespace;
+@return DB_SUCCESS or error */
+dberr_t
+fil_prepare_for_truncate(
+/*=====================*/
+ ulint id) /*!< in: space id */
+{
+ char* path = 0;
+ fil_space_t* space = 0;
+
+ ut_a(!is_system_tablespace(id));
+
+ dberr_t err = fil_check_pending_operations(
+ id, FIL_OPERATION_TRUNCATE, &space, &path);
+
+ ut_free(path);
+
+ if (err == DB_TABLESPACE_NOT_FOUND) {
+ ib::error() << "Cannot truncate tablespace " << id
+ << " because it is not found in the tablespace"
+ " memory cache.";
+ }
+
+ return(err);
+}
+
+/** Reinitialize the original tablespace header with the same space id
+for single tablespace
+@param[in] table table belongs to tablespace
+@param[in] size size in blocks
+@param[in] trx Transaction covering truncate */
+void
+fil_reinit_space_header_for_table(
+ dict_table_t* table,
+ ulint size,
+ trx_t* trx)
+{
+ ulint id = table->space;
+
+ ut_a(!is_system_tablespace(id));
+
+ /* Invalidate in the buffer pool all pages belonging
+ to the tablespace. The buffer pool scan may take long
+ time to complete, therefore we release dict_sys->mutex
+ and the dict operation lock during the scan and aquire
+ it again after the buffer pool scan.*/
+
+ /* Release the lock on the indexes too. So that
+ they won't violate the latch ordering. */
+ dict_table_x_unlock_indexes(table);
+ row_mysql_unlock_data_dictionary(trx);
+
+ /* Lock the search latch in shared mode to prevent user
+ from disabling AHI during the scan */
+ btr_search_s_lock_all();
+ DEBUG_SYNC_C("buffer_pool_scan");
+ buf_LRU_flush_or_remove_pages(id, NULL);
+ btr_search_s_unlock_all();
+
+ row_mysql_lock_data_dictionary(trx);
+
+ dict_table_x_lock_indexes(table);
+
+ /* Remove all insert buffer entries for the tablespace */
+ ibuf_delete_for_discarded_space(id);
+
+ mutex_enter(&fil_system->mutex);
+
+ fil_space_t* space = fil_space_get_by_id(id);
+
+ /* The following code must change when InnoDB supports
+ multiple datafiles per tablespace. */
+ ut_a(UT_LIST_GET_LEN(space->chain) == 1);
+
+ fil_node_t* node = UT_LIST_GET_FIRST(space->chain);
+
+ space->size = node->size = size;
+
+ mutex_exit(&fil_system->mutex);
+
+ mtr_t mtr;
+
+ mtr_start(&mtr);
+ mtr.set_named_space(id);
+
+ fsp_header_init(id, size, &mtr);
+
+ mtr_commit(&mtr);
+}
+
+#ifdef UNIV_DEBUG
+/** Increase redo skipped count for a tablespace.
+@param[in] id space id */
+void
+fil_space_inc_redo_skipped_count(
+ ulint id)
{
fil_space_t* space;
- ibool is_being_deleted;
mutex_enter(&fil_system->mutex);
@@ -2942,14 +3172,32 @@ fil_tablespace_is_being_deleted(
ut_a(space != NULL);
- is_being_deleted = space->stop_new_ops;
+ space->redo_skipped_count++;
mutex_exit(&fil_system->mutex);
+}
+
+/** Decrease redo skipped count for a tablespace.
+@param[in] id space id */
+void
+fil_space_dec_redo_skipped_count(
+ ulint id)
+{
+ fil_space_t* space;
+
+ mutex_enter(&fil_system->mutex);
+
+ space = fil_space_get_by_id(id);
+
+ ut_a(space != NULL);
+ ut_a(space->redo_skipped_count > 0);
- return(is_being_deleted);
+ space->redo_skipped_count--;
+
+ mutex_exit(&fil_system->mutex);
}
+#endif /* UNIV_DEBUG */
-#ifndef UNIV_HOTBACKUP
/*******************************************************************//**
Discards a single-table tablespace. The tablespace must be cached in the
memory cache. Discarding is like deleting a tablespace, but
@@ -2960,8 +3208,7 @@ memory cache. Discarding is like deleting a tablespace, but
in DROP TABLE they are only removed gradually in the background;
3. Free all the pages in use by the tablespace.
-@return DB_SUCCESS or error */
-UNIV_INTERN
+@return DB_SUCCESS or error */
dberr_t
fil_discard_tablespace(
/*===================*/
@@ -2974,17 +3221,14 @@ fil_discard_tablespace(
break;
case DB_IO_ERROR:
- ib_logf(IB_LOG_LEVEL_WARN,
- "While deleting tablespace %lu in DISCARD TABLESPACE."
- " File rename/delete failed: %s",
- (ulong) id, ut_strerr(err));
+ ib::warn() << "While deleting tablespace " << id
+ << " in DISCARD TABLESPACE. File rename/delete"
+ " failed: " << ut_strerr(err);
break;
case DB_TABLESPACE_NOT_FOUND:
- ib_logf(IB_LOG_LEVEL_WARN,
- "Cannot delete tablespace %lu in DISCARD "
- "TABLESPACE. %s",
- (ulong) id, ut_strerr(err));
+ ib::warn() << "Cannot delete tablespace " << id
+ << " in DISCARD TABLESPACE: " << ut_strerr(err);
break;
default:
@@ -2997,111 +3241,109 @@ fil_discard_tablespace(
return(err);
}
-#endif /* !UNIV_HOTBACKUP */
/*******************************************************************//**
-Renames the memory cache structures of a single-table tablespace.
-@return TRUE if success */
-static
-ibool
-fil_rename_tablespace_in_mem(
-/*=========================*/
- fil_space_t* space, /*!< in: tablespace memory object */
- fil_node_t* node, /*!< in: file node of that tablespace */
- const char* new_name, /*!< in: new name */
- const char* new_path) /*!< in: new file path */
+Allocates and builds a file name from a path, a table or tablespace name
+and a suffix. The string must be freed by caller with ut_free().
+@param[in] path NULL or the direcory path or the full path and filename.
+@param[in] name NULL if path is full, or Table/Tablespace name
+@param[in] suffix NULL or the file extention to use.
+@param[in] trim_name true if the last name on the path should be trimmed.
+@return own: file name */
+char*
+fil_make_filepath(
+ const char* path,
+ const char* name,
+ ib_extention ext,
+ bool trim_name)
{
- fil_space_t* space2;
- const char* old_name = space->name;
+ /* The path may contain the basename of the file, if so we do not
+ need the name. If the path is NULL, we can use the default path,
+ but there needs to be a name. */
+ ut_ad(path != NULL || name != NULL);
- ut_ad(mutex_own(&fil_system->mutex));
-
- space2 = fil_space_get_by_name(old_name);
- if (space != space2) {
- fputs("InnoDB: Error: cannot find ", stderr);
- ut_print_filename(stderr, old_name);
- fputs(" in tablespace memory cache\n", stderr);
+ /* If we are going to strip a name off the path, there better be a
+ path and a new name to put back on. */
+ ut_ad(!trim_name || (path != NULL && name != NULL));
- return(FALSE);
+ if (path == NULL) {
+ path = fil_path_to_mysql_datadir;
}
- space2 = fil_space_get_by_name(new_name);
- if (space2 != NULL) {
- fputs("InnoDB: Error: ", stderr);
- ut_print_filename(stderr, new_name);
- fputs(" is already in tablespace memory cache\n", stderr);
+ ulint len = 0; /* current length */
+ ulint path_len = strlen(path);
+ ulint name_len = (name ? strlen(name) : 0);
+ const char* suffix = dot_ext[ext];
+ ulint suffix_len = strlen(suffix);
+ ulint full_len = path_len + 1 + name_len + suffix_len + 1;
- return(FALSE);
+ char* full_name = static_cast<char*>(ut_malloc_nokey(full_len));
+ if (full_name == NULL) {
+ return NULL;
}
- HASH_DELETE(fil_space_t, name_hash, fil_system->name_hash,
- ut_fold_string(space->name), space);
- mem_free(space->name);
- mem_free(node->name);
-
- space->name = mem_strdup(new_name);
- node->name = mem_strdup(new_path);
-
- HASH_INSERT(fil_space_t, name_hash, fil_system->name_hash,
- ut_fold_string(new_name), space);
- return(TRUE);
-}
-
-/*******************************************************************//**
-Allocates a file name for a single-table tablespace. The string must be freed
-by caller with mem_free().
-@return own: file name */
-UNIV_INTERN
-char*
-fil_make_ibd_name(
-/*==============*/
- const char* name, /*!< in: table name or a dir path */
- bool is_full_path) /*!< in: TRUE if it is a dir path */
-{
- char* filename;
- ulint namelen = strlen(name);
- ulint dirlen = strlen(fil_path_to_mysql_datadir);
- ulint pathlen = dirlen + namelen + sizeof "/.ibd";
-
- filename = static_cast<char*>(mem_alloc(pathlen));
-
- if (is_full_path) {
- memcpy(filename, name, namelen);
- memcpy(filename + namelen, ".ibd", sizeof ".ibd");
- } else {
- ut_snprintf(filename, pathlen, "%s/%s.ibd",
- fil_path_to_mysql_datadir, name);
+ /* If the name is a relative path, do not prepend "./". */
+ if (path[0] == '.'
+ && (path[1] == '\0' || path[1] == OS_PATH_SEPARATOR)
+ && name != NULL && name[0] == '.') {
+ path = NULL;
+ path_len = 0;
+ }
+ if (path != NULL) {
+ memcpy(full_name, path, path_len);
+ len = path_len;
+ full_name[len] = '\0';
+ os_normalize_path(full_name);
}
- srv_normalize_path_for_win(filename);
+ if (trim_name) {
+ /* Find the offset of the last DIR separator and set it to
+ null in order to strip off the old basename from this path. */
+ char* last_dir_sep = strrchr(full_name, OS_PATH_SEPARATOR);
+ if (last_dir_sep) {
+ last_dir_sep[0] = '\0';
+ len = strlen(full_name);
+ }
+ }
- return(filename);
-}
+ if (name != NULL) {
+ if (len && full_name[len - 1] != OS_PATH_SEPARATOR) {
+ /* Add a DIR separator */
+ full_name[len] = OS_PATH_SEPARATOR;
+ full_name[++len] = '\0';
+ }
-/*******************************************************************//**
-Allocates a file name for a tablespace ISL file (InnoDB Symbolic Link).
-The string must be freed by caller with mem_free().
-@return own: file name */
-UNIV_INTERN
-char*
-fil_make_isl_name(
-/*==============*/
- const char* name) /*!< in: table name */
-{
- char* filename;
- ulint namelen = strlen(name);
- ulint dirlen = strlen(fil_path_to_mysql_datadir);
- ulint pathlen = dirlen + namelen + sizeof "/.isl";
+ char* ptr = &full_name[len];
+ memcpy(ptr, name, name_len);
+ len += name_len;
+ full_name[len] = '\0';
+ os_normalize_path(ptr);
+ }
- filename = static_cast<char*>(mem_alloc(pathlen));
+ /* Make sure that the specified suffix is at the end of the filepath
+ string provided. This assumes that the suffix starts with '.'.
+ If the first char of the suffix is found in the filepath at the same
+ length as the suffix from the end, then we will assume that there is
+ a previous suffix that needs to be replaced. */
+ if (suffix != NULL) {
+ /* Need room for the trailing null byte. */
+ ut_ad(len < full_len);
- ut_snprintf(filename, pathlen, "%s/%s.isl",
- fil_path_to_mysql_datadir, name);
+ if ((len > suffix_len)
+ && (full_name[len - suffix_len] == suffix[0])) {
+ /* Another suffix exists, make it the one requested. */
+ memcpy(&full_name[len - suffix_len], suffix, suffix_len);
- srv_normalize_path_for_win(filename);
+ } else {
+ /* No previous suffix, add it. */
+ ut_ad(len + suffix_len < full_len);
+ memcpy(&full_name[len], suffix, suffix_len);
+ full_name[len + suffix_len] = '\0';
+ }
+ }
- return(filename);
+ return(full_name);
}
/** Test if a tablespace file can be renamed to a new filepath by checking
@@ -3110,480 +3352,295 @@ if that the old filepath exists and the new filepath does not exist.
@param[in] old_path old filepath
@param[in] new_path new filepath
@param[in] is_discarded whether the tablespace is discarded
+@param[in] replace_new whether to ignore the existence of new_path
@return innodb error code */
dberr_t
fil_rename_tablespace_check(
ulint space_id,
const char* old_path,
const char* new_path,
- bool is_discarded)
+ bool is_discarded,
+ bool replace_new)
{
- ulint exists = false;
+ bool exists = false;
os_file_type_t ftype;
if (!is_discarded
&& os_file_status(old_path, &exists, &ftype)
&& !exists) {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Cannot rename '%s' to '%s' for space ID %lu"
- " because the source file does not exist.",
- old_path, new_path, space_id);
-
+ ib::error() << "Cannot rename '" << old_path
+ << "' to '" << new_path
+ << "' for space ID " << space_id
+ << " because the source file"
+ << " does not exist.";
return(DB_TABLESPACE_NOT_FOUND);
}
exists = false;
- if (!os_file_status(new_path, &exists, &ftype) || exists) {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Cannot rename '%s' to '%s' for space ID %lu"
- " because the target file exists."
- " Remove the target file and try again.",
- old_path, new_path, space_id);
+ if (os_file_status(new_path, &exists, &ftype) && !exists) {
+ return DB_SUCCESS;
+ }
+ if (!replace_new) {
+ ib::error() << "Cannot rename '" << old_path
+ << "' to '" << new_path
+ << "' for space ID " << space_id
+ << " because the target file exists."
+ " Remove the target file and try again.";
return(DB_TABLESPACE_EXISTS);
}
+ /* This must be during the ROLLBACK of TRUNCATE TABLE.
+ Because InnoDB only allows at most one data dictionary
+ transaction at a time, and because this incomplete TRUNCATE
+ would have created a new tablespace file, we must remove
+ a possibly existing tablespace that is associated with the
+ new tablespace file. */
+retry:
+ mutex_enter(&fil_system->mutex);
+ for (fil_space_t* space = UT_LIST_GET_FIRST(fil_system->space_list);
+ space; space = UT_LIST_GET_NEXT(space_list, space)) {
+ ulint id = space->id;
+ if (id && id < SRV_LOG_SPACE_FIRST_ID
+ && space->purpose == FIL_TYPE_TABLESPACE
+ && !strcmp(new_path,
+ UT_LIST_GET_FIRST(space->chain)->name)) {
+ ib::info() << "TRUNCATE rollback: " << id
+ << "," << new_path;
+ mutex_exit(&fil_system->mutex);
+ dberr_t err = fil_delete_tablespace(id);
+ if (err != DB_SUCCESS) {
+ return err;
+ }
+ goto retry;
+ }
+ }
+ mutex_exit(&fil_system->mutex);
+ fil_delete_file(new_path);
+
return(DB_SUCCESS);
}
-/*******************************************************************//**
-Renames a single-table tablespace. The tablespace must be cached in the
-tablespace memory cache.
-@return TRUE if success */
-UNIV_INTERN
-ibool
+/** Rename a single-table tablespace.
+The tablespace must exist in the memory cache.
+@param[in] id tablespace identifier
+@param[in] old_path old file name
+@param[in] new_name new table name in the
+databasename/tablename format
+@param[in] new_path_in new file name,
+or NULL if it is located in the normal data directory
+@return true if success */
+bool
fil_rename_tablespace(
-/*==================*/
- const char* old_name_in, /*!< in: old table name in the
- standard databasename/tablename
- format of InnoDB, or NULL if we
- do the rename based on the space
- id only */
- ulint id, /*!< in: space id */
- const char* new_name, /*!< in: new table name in the
- standard databasename/tablename
- format of InnoDB */
- const char* new_path_in) /*!< in: new full datafile path
- if the tablespace is remotely
- located, or NULL if it is located
- in the normal data directory. */
+ ulint id,
+ const char* old_path,
+ const char* new_name,
+ const char* new_path_in)
{
- ibool success;
fil_space_t* space;
fil_node_t* node;
- char* new_path;
- char* old_name;
- char* old_path;
- const char* not_given = "(name not specified)";
-
ut_a(id != 0);
+ ut_ad(strchr(new_name, '/') != NULL);
+
mutex_enter(&fil_system->mutex);
space = fil_space_get_by_id(id);
if (space == NULL) {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Cannot find space id %lu in the tablespace "
- "memory cache, though the table '%s' in a "
- "rename operation should have that id.",
- (ulong) id, old_name_in ? old_name_in : not_given);
+ ib::error() << "Cannot find space id " << id
+ << " in the tablespace memory cache, though the file '"
+ << old_path
+ << "' in a rename operation should have that id.";
+func_exit:
mutex_exit(&fil_system->mutex);
+ return(false);
+ }
+
+ if (space != fil_space_get_by_name(space->name)) {
+ ib::error() << "Cannot find " << space->name
+ << " in tablespace memory cache";
+ goto func_exit;
+ }
- return(FALSE);
+ if (fil_space_get_by_name(new_name)) {
+ ib::error() << new_name
+ << " is already in tablespace memory cache";
+ goto func_exit;
}
/* The following code must change when InnoDB supports
multiple datafiles per tablespace. */
ut_a(UT_LIST_GET_LEN(space->chain) == 1);
node = UT_LIST_GET_FIRST(space->chain);
-
- /* Check that the old name in the space is right */
-
- if (old_name_in) {
- old_name = mem_strdup(old_name_in);
- ut_a(strcmp(space->name, old_name) == 0);
- } else {
- old_name = mem_strdup(space->name);
- }
- old_path = mem_strdup(node->name);
-
- /* Rename the tablespace and the node in the memory cache */
- new_path = new_path_in ? mem_strdup(new_path_in)
- : fil_make_ibd_name(new_name, false);
-
- success = fil_rename_tablespace_in_mem(
- space, node, new_name, new_path);
-
- if (success) {
- DBUG_EXECUTE_IF("fil_rename_tablespace_failure_2",
- goto skip_second_rename; );
- success = os_file_rename(
- innodb_file_data_key, old_path, new_path);
- DBUG_EXECUTE_IF("fil_rename_tablespace_failure_2",
-skip_second_rename:
- success = FALSE; );
-
- if (!success) {
- /* We have to revert the changes we made
- to the tablespace memory cache */
-
- ut_a(fil_rename_tablespace_in_mem(
- space, node, old_name, old_path));
- }
- }
+ space->n_pending_ops++;
mutex_exit(&fil_system->mutex);
-#ifndef UNIV_HOTBACKUP
- if (success && !recv_recovery_on) {
- mtr_t mtr;
-
- mtr_start(&mtr);
-
- fil_op_write_log(MLOG_FILE_RENAME, id, 0, 0, old_name, new_name,
- &mtr);
- mtr_commit(&mtr);
- }
-#endif /* !UNIV_HOTBACKUP */
-
- mem_free(new_path);
- mem_free(old_path);
- mem_free(old_name);
-
- return(success);
-}
-
-/*******************************************************************//**
-Creates a new InnoDB Symbolic Link (ISL) file. It is always created
-under the 'datadir' of MySQL. The datadir is the directory of a
-running mysqld program. We can refer to it by simply using the path '.'.
-@return DB_SUCCESS or error code */
-UNIV_INTERN
-dberr_t
-fil_create_link_file(
-/*=================*/
- const char* tablename, /*!< in: tablename */
- const char* filepath) /*!< in: pathname of tablespace */
-{
- dberr_t err = DB_SUCCESS;
- char* link_filepath;
- char* prev_filepath = fil_read_link_file(tablename);
+ char* new_file_name = new_path_in == NULL
+ ? fil_make_filepath(NULL, new_name, IBD, false)
+ : mem_strdup(new_path_in);
+ char* old_file_name = node->name;
+ char* new_space_name = mem_strdup(new_name);
+ char* old_space_name = space->name;
+ ulint old_fold = ut_fold_string(old_space_name);
+ ulint new_fold = ut_fold_string(new_space_name);
- ut_ad(!srv_read_only_mode);
+ ut_ad(strchr(old_file_name, OS_PATH_SEPARATOR) != NULL);
+ ut_ad(strchr(new_file_name, OS_PATH_SEPARATOR) != NULL);
- if (prev_filepath) {
- /* Truncate will call this with an existing
- link file which contains the same filepath. */
- if (0 == strcmp(prev_filepath, filepath)) {
- mem_free(prev_filepath);
- return(DB_SUCCESS);
- }
- mem_free(prev_filepath);
+ if (!recv_recovery_is_on()) {
+ fil_name_write_rename(id, old_file_name, new_file_name);
+ log_mutex_enter();
}
- link_filepath = fil_make_isl_name(tablename);
-
- /** Check if the file already exists. */
- FILE* file = NULL;
- ibool exists;
- os_file_type_t ftype;
-
- bool success = os_file_status(link_filepath, &exists, &ftype);
-
- ulint error = 0;
- if (success && !exists) {
- file = fopen(link_filepath, "w");
- if (file == NULL) {
- /* This call will print its own error message */
- error = os_file_get_last_error(true);
- }
- } else {
- error = OS_FILE_ALREADY_EXISTS;
- }
- if (error != 0) {
+ /* log_sys->mutex is above fil_system->mutex in the latching order */
+ ut_ad(log_mutex_own());
+ mutex_enter(&fil_system->mutex);
+ ut_ad(space->n_pending_ops);
+ space->n_pending_ops--;
+ ut_ad(space->name == old_space_name);
+ /* We already checked these. */
+ ut_ad(space == fil_space_get_by_name(old_space_name));
+ ut_ad(!fil_space_get_by_name(new_space_name));
+ ut_ad(node->name == old_file_name);
+ bool success;
+ DBUG_EXECUTE_IF("fil_rename_tablespace_failure_2",
+ goto skip_second_rename; );
+ success = os_file_rename(innodb_data_file_key,
+ old_file_name,
+ new_file_name);
+ DBUG_EXECUTE_IF("fil_rename_tablespace_failure_2",
+skip_second_rename:
+ success = false; );
- ut_print_timestamp(stderr);
- fputs(" InnoDB: Cannot create file ", stderr);
- ut_print_filename(stderr, link_filepath);
- fputs(".\n", stderr);
+ ut_ad(node->name == old_file_name);
- if (error == OS_FILE_ALREADY_EXISTS) {
- fputs("InnoDB: The link file: ", stderr);
- ut_print_filename(stderr, filepath);
- fputs(" already exists.\n", stderr);
- err = DB_TABLESPACE_EXISTS;
- } else if (error == OS_FILE_DISK_FULL) {
- err = DB_OUT_OF_FILE_SPACE;
- } else if (error == OS_FILE_OPERATION_NOT_SUPPORTED) {
- err = DB_UNSUPPORTED;
- } else {
- err = DB_ERROR;
- }
-
- /* file is not open, no need to close it. */
- mem_free(link_filepath);
- return(err);
+ if (success) {
+ node->name = new_file_name;
}
- ulint rbytes = fwrite(filepath, 1, strlen(filepath), file);
- if (rbytes != strlen(filepath)) {
- os_file_get_last_error(true);
- ib_logf(IB_LOG_LEVEL_ERROR,
- "cannot write link file "
- "%s",filepath);
- err = DB_ERROR;
+ if (!recv_recovery_is_on()) {
+ log_mutex_exit();
}
- /* Close the file, we only need it at startup */
- fclose(file);
-
- mem_free(link_filepath);
-
- return(err);
-}
-
-/*******************************************************************//**
-Deletes an InnoDB Symbolic Link (ISL) file. */
-UNIV_INTERN
-void
-fil_delete_link_file(
-/*=================*/
- const char* tablename) /*!< in: name of table */
-{
- char* link_filepath = fil_make_isl_name(tablename);
-
- os_file_delete_if_exists(innodb_file_data_key, link_filepath);
-
- mem_free(link_filepath);
-}
-
-/*******************************************************************//**
-Reads an InnoDB Symbolic Link (ISL) file.
-It is always created under the 'datadir' of MySQL. The name is of the
-form {databasename}/{tablename}. and the isl file is expected to be in a
-'{databasename}' directory called '{tablename}.isl'. The caller must free
-the memory of the null-terminated path returned if it is not null.
-@return own: filepath found in link file, NULL if not found. */
-UNIV_INTERN
-char*
-fil_read_link_file(
-/*===============*/
- const char* name) /*!< in: tablespace name */
-{
- char* filepath = NULL;
- char* link_filepath;
- FILE* file = NULL;
-
- /* The .isl file is in the 'normal' tablespace location. */
- link_filepath = fil_make_isl_name(name);
-
- file = fopen(link_filepath, "r+b");
-
- mem_free(link_filepath);
-
- if (file) {
- filepath = static_cast<char*>(mem_alloc(OS_FILE_MAX_PATH));
-
- os_file_read_string(file, filepath, OS_FILE_MAX_PATH);
- fclose(file);
-
- if (strlen(filepath)) {
- /* Trim whitespace from end of filepath */
- ulint lastch = strlen(filepath) - 1;
- while (lastch > 4 && filepath[lastch] <= 0x20) {
- filepath[lastch--] = 0x00;
- }
- srv_normalize_path_for_win(filepath);
- }
- }
-
- return(filepath);
-}
-
-/*******************************************************************//**
-Opens a handle to the file linked to in an InnoDB Symbolic Link file.
-@return TRUE if remote linked tablespace file is found and opened. */
-UNIV_INTERN
-ibool
-fil_open_linked_file(
-/*===============*/
- const char* tablename, /*!< in: database/tablename */
- char** remote_filepath,/*!< out: remote filepath */
- pfs_os_file_t* remote_file, /*!< out: remote file handle */
- ulint atomic_writes) /*!< in: atomic writes table option
- value */
-{
- ibool success;
-
- *remote_filepath = fil_read_link_file(tablename);
- if (*remote_filepath == NULL) {
- return(FALSE);
+ ut_ad(space->name == old_space_name);
+ if (success) {
+ HASH_DELETE(fil_space_t, name_hash, fil_system->name_hash,
+ old_fold, space);
+ space->name = new_space_name;
+ HASH_INSERT(fil_space_t, name_hash, fil_system->name_hash,
+ new_fold, space);
+ } else {
+ /* Because nothing was renamed, we must free the new
+ names, not the old ones. */
+ old_file_name = new_file_name;
+ old_space_name = new_space_name;
}
- /* The filepath provided is different from what was
- found in the link file. */
- *remote_file = os_file_create_simple_no_error_handling(
- innodb_file_data_key, *remote_filepath,
- OS_FILE_OPEN, OS_FILE_READ_ONLY,
- &success, atomic_writes);
-
- if (!success) {
- char* link_filepath = fil_make_isl_name(tablename);
-
- /* The following call prints an error message */
- os_file_get_last_error(true);
-
- ib_logf(IB_LOG_LEVEL_ERROR,
- "A link file was found named '%s' "
- "but the linked tablespace '%s' "
- "could not be opened.",
- link_filepath, *remote_filepath);
+ mutex_exit(&fil_system->mutex);
- mem_free(link_filepath);
- mem_free(*remote_filepath);
- *remote_filepath = NULL;
- }
+ ut_free(old_file_name);
+ ut_free(old_space_name);
return(success);
}
-/*******************************************************************//**
-Creates a new single-table tablespace to a database directory of MySQL.
-Database directories are under the 'datadir' of MySQL. The datadir is the
-directory of a running mysqld program. We can refer to it by simply the
-path '.'. Tables created with CREATE TEMPORARY TABLE we place in the temp
-dir of the mysqld server.
-
-@return DB_SUCCESS or error code */
-UNIV_INTERN
+/** Create a tablespace file.
+@param[in] space_id Tablespace ID
+@param[in] name Tablespace name in dbname/tablename format.
+@param[in] path Path and filename of the datafile to create.
+@param[in] flags Tablespace flags
+@param[in] size Initial size of the tablespace file in
+ pages, must be >= FIL_IBD_FILE_INITIAL_SIZE
+@param[in] mode MariaDB encryption mode
+@param[in] key_id MariaDB encryption key_id
+@return DB_SUCCESS or error code */
dberr_t
-fil_create_new_single_table_tablespace(
-/*===================================*/
- ulint space_id, /*!< in: space id */
- const char* tablename, /*!< in: the table name in the usual
- databasename/tablename format
- of InnoDB */
- const char* dir_path, /*!< in: NULL or a dir path */
- ulint flags, /*!< in: tablespace flags */
- ulint flags2, /*!< in: table flags2 */
- ulint size, /*!< in: the initial size of the
- tablespace file in pages,
- must be >= FIL_IBD_FILE_INITIAL_SIZE */
- fil_encryption_t mode, /*!< in: encryption mode */
- ulint key_id) /*!< in: encryption key_id */
+fil_ibd_create(
+ ulint space_id,
+ const char* name,
+ const char* path,
+ ulint flags,
+ ulint size,
+ fil_encryption_t mode,
+ uint32_t key_id)
{
pfs_os_file_t file;
-
- ibool ret;
dberr_t err;
byte* buf2;
byte* page;
- char* path;
- ibool success;
- /* TRUE if a table is created with CREATE TEMPORARY TABLE */
- bool is_temp = !!(flags2 & DICT_TF2_TEMPORARY);
+ bool success;
bool has_data_dir = FSP_FLAGS_HAS_DATA_DIR(flags) != 0;
- ulint atomic_writes = FSP_FLAGS_GET_ATOMIC_WRITES(flags);
+ fil_space_t* space = NULL;
fil_space_crypt_t *crypt_data = NULL;
- ut_a(space_id > 0);
+ ut_ad(!is_system_tablespace(space_id));
ut_ad(!srv_read_only_mode);
ut_a(space_id < SRV_LOG_SPACE_FIRST_ID);
ut_a(size >= FIL_IBD_FILE_INITIAL_SIZE);
ut_a(fsp_flags_is_valid(flags & ~FSP_FLAGS_MEM_MASK, space_id));
- if (is_temp) {
- /* Temporary table filepath */
- ut_ad(dir_path);
- path = fil_make_ibd_name(dir_path, true);
- } else if (has_data_dir) {
- ut_ad(dir_path);
- path = os_file_make_remote_pathname(dir_path, tablename, "ibd");
-
- /* Since this tablespace file will be created in a
- remote directory, let's create the subdirectories
- in the path, if they are not there already. */
- success = os_file_create_subdirs_if_needed(path);
- if (!success) {
- err = DB_ERROR;
- goto error_exit_3;
- }
- } else {
- path = fil_make_ibd_name(tablename, false);
+ /* Create the subdirectories in the path, if they are
+ not there already. */
+ err = os_file_create_subdirs_if_needed(path);
+ if (err != DB_SUCCESS) {
+ return(err);
}
file = os_file_create(
- innodb_file_data_key, path,
+ innodb_data_file_key, path,
OS_FILE_CREATE | OS_FILE_ON_ERROR_NO_EXIT,
OS_FILE_NORMAL,
OS_DATA_FILE,
- &ret,
- atomic_writes);
+ srv_read_only_mode,
+ &success);
- if (ret == FALSE) {
+ if (!success) {
/* The following call will print an error message */
ulint error = os_file_get_last_error(true);
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Cannot create file '%s'\n", path);
+ ib::error() << "Cannot create file '" << path << "'";
if (error == OS_FILE_ALREADY_EXISTS) {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "The file '%s' already exists though the "
- "corresponding table did not exist "
- "in the InnoDB data dictionary. "
- "Have you moved InnoDB .ibd files "
- "around without using the SQL commands "
- "DISCARD TABLESPACE and IMPORT TABLESPACE, "
- "or did mysqld crash in the middle of "
- "CREATE TABLE? "
- "You can resolve the problem by removing "
- "the file '%s' under the 'datadir' of MySQL.",
- path, path);
-
- err = DB_TABLESPACE_EXISTS;
- goto error_exit_3;
- }
-
- if (error == OS_FILE_OPERATION_NOT_SUPPORTED) {
- err = DB_UNSUPPORTED;
- goto error_exit_3;
+ ib::info() << "The file '" << path << "'"
+ " already exists though the"
+ " corresponding table did not exist"
+ " in the InnoDB data dictionary."
+ " You can resolve the problem by removing"
+ " the file.";
+
+ return(DB_TABLESPACE_EXISTS);
}
if (error == OS_FILE_DISK_FULL) {
- err = DB_OUT_OF_FILE_SPACE;
- goto error_exit_3;
+ return(DB_OUT_OF_FILE_SPACE);
}
- err = DB_ERROR;
- goto error_exit_3;
+ return(DB_ERROR);
}
- {
- /* fil_read_first_page() expects UNIV_PAGE_SIZE bytes.
- fil_node_open_file() expects at least 4 * UNIV_PAGE_SIZE bytes.
- Do not create too short ROW_FORMAT=COMPRESSED files. */
- const ulint zip_size = fsp_flags_get_zip_size(flags);
- const ulint page_size = zip_size ? zip_size : UNIV_PAGE_SIZE;
- const os_offset_t fsize = std::max(
- os_offset_t(size) * page_size,
- os_offset_t(FIL_IBD_FILE_INITIAL_SIZE
- * UNIV_PAGE_SIZE));
- /* ROW_FORMAT=COMPRESSED files never use page_compression
- (are never sparse). */
- ut_ad(!zip_size || !FSP_FLAGS_HAS_PAGE_COMPRESSION(flags));
+ const bool is_compressed = FSP_FLAGS_HAS_PAGE_COMPRESSION(flags);
- ret = os_file_set_size(path, file, fsize,
- FSP_FLAGS_HAS_PAGE_COMPRESSION(flags));
+#ifdef _WIN32
+ if (is_compressed) {
+ os_file_set_sparse_win32(file);
}
+#endif
+
+ success = os_file_set_size(
+ path, file,
+ os_offset_t(size) << UNIV_PAGE_SIZE_SHIFT, is_compressed);
- if (!ret) {
- err = DB_OUT_OF_FILE_SPACE;
- goto error_exit_2;
+ if (!success) {
+ os_file_close(file);
+ os_file_delete(innodb_data_file_key, path);
+ return(DB_OUT_OF_FILE_SPACE);
}
- /* printf("Creating tablespace %s id %lu\n", path, space_id); */
+ bool punch_hole = os_is_sparse_file_supported(file);
+
+ ulint block_size = os_file_get_block_size(file, path);
/* We have to write the space id to the file immediately and flush the
file to disk. This is because in crash recovery we must be aware what
@@ -3594,7 +3651,7 @@ fil_create_new_single_table_tablespace(
with zeros from the call of os_file_set_size(), until a buffer pool
flush would write to it. */
- buf2 = static_cast<byte*>(ut_malloc(3 * UNIV_PAGE_SIZE));
+ buf2 = static_cast<byte*>(ut_malloc_nokey(3 * UNIV_PAGE_SIZE));
/* Align the memory for file i/o if we might have O_DIRECT set */
page = static_cast<byte*>(ut_align(buf2, UNIV_PAGE_SIZE));
@@ -3604,181 +3661,120 @@ fil_create_new_single_table_tablespace(
fsp_header_init_fields(page, space_id, flags);
mach_write_to_4(page + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID, space_id);
- if (const ulint zip_size = fsp_flags_get_zip_size(flags)) {
- page_zip_des_t page_zip;
+ /* Create crypt data if the tablespace is either encrypted or user has
+ requested it to remain unencrypted. */
+ if (mode == FIL_ENCRYPTION_ON || mode == FIL_ENCRYPTION_OFF ||
+ srv_encrypt_tables) {
+ crypt_data = fil_space_create_crypt_data(mode, key_id);
+ }
- page_zip_set_size(&page_zip, zip_size);
+ if (crypt_data) {
+ /* Write crypt data information in page0 while creating
+ ibd file. */
+ crypt_data->fill_page0(flags, page);
+ }
+
+ const page_size_t page_size(flags);
+ IORequest request(IORequest::WRITE);
+
+ if (!page_size.is_compressed()) {
+
+ buf_flush_init_for_writing(NULL, page, NULL, 0);
+
+ err = os_file_write(
+ request, path, file, page, 0, page_size.physical());
+ } else {
+ page_zip_des_t page_zip;
+ page_zip_set_size(&page_zip, page_size.physical());
page_zip.data = page + UNIV_PAGE_SIZE;
#ifdef UNIV_DEBUG
page_zip.m_start =
#endif /* UNIV_DEBUG */
page_zip.m_end = page_zip.m_nonempty =
page_zip.n_blobs = 0;
- buf_flush_init_for_writing(page, &page_zip, 0);
- ret = os_file_write(path, file, page_zip.data, 0, zip_size);
- } else {
- buf_flush_init_for_writing(page, NULL, 0);
- ret = os_file_write(path, file, page, 0, UNIV_PAGE_SIZE);
+
+ buf_flush_init_for_writing(NULL, page, &page_zip, 0);
+
+ err = os_file_write(
+ request, path, file, page_zip.data, 0,
+ page_size.physical());
}
ut_free(buf2);
- if (!ret) {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Could not write the first page to tablespace "
- "'%s'", path);
+ if (err != DB_SUCCESS) {
- err = DB_ERROR;
- goto error_exit_2;
+ ib::error()
+ << "Could not write the first page to"
+ << " tablespace '" << path << "'";
+
+ os_file_close(file);
+ os_file_delete(innodb_data_file_key, path);
+
+ return(DB_ERROR);
}
- ret = os_file_flush(file);
+ success = os_file_flush(file);
- if (!ret) {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "File flush of tablespace '%s' failed", path);
- err = DB_ERROR;
- goto error_exit_2;
+ if (!success) {
+ ib::error() << "File flush of tablespace '"
+ << path << "' failed";
+ os_file_close(file);
+ os_file_delete(innodb_data_file_key, path);
+ return(DB_ERROR);
}
if (has_data_dir) {
- /* Now that the IBD file is created, make the ISL file. */
- err = fil_create_link_file(tablename, path);
+ /* Make the ISL file if the IBD file is not
+ in the default location. */
+ err = RemoteDatafile::create_link_file(name, path);
if (err != DB_SUCCESS) {
- goto error_exit_2;
+ os_file_close(file);
+ os_file_delete(innodb_data_file_key, path);
+ return(err);
}
}
- /* Create crypt data if the tablespace is either encrypted or user has
- requested it to remain unencrypted. */
- if (mode == FIL_ENCRYPTION_ON || mode == FIL_ENCRYPTION_OFF ||
- srv_encrypt_tables) {
- crypt_data = fil_space_create_crypt_data(mode, key_id);
- }
-
- success = fil_space_create(tablename, space_id, flags, FIL_TABLESPACE,
- crypt_data, true, mode);
+ space = fil_space_create(name, space_id, flags, FIL_TYPE_TABLESPACE,
+ crypt_data, mode);
+ if (!space) {
+ if (crypt_data) {
+ free(crypt_data);
+ }
- if (!success || !fil_node_create(path, size, space_id, FALSE)) {
err = DB_ERROR;
- goto error_exit_1;
- }
-
-#ifndef UNIV_HOTBACKUP
- {
- mtr_t mtr;
- ulint mlog_file_flag = 0;
-
- if (is_temp) {
- mlog_file_flag |= MLOG_FILE_FLAG_TEMP;
- }
+ } else {
+ mtr_t mtr;
+ fil_node_t* file = space->add(path, OS_FILE_CLOSED, size,
+ false, true);
- mtr_start(&mtr);
+ mtr.start();
+ fil_op_write_log(
+ MLOG_FILE_CREATE2, space_id, 0, file->name,
+ NULL, space->flags & ~FSP_FLAGS_MEM_MASK, &mtr);
+ fil_name_write(space, 0, file, &mtr);
+ mtr.commit();
- fil_op_write_log(flags
- ? MLOG_FILE_CREATE2
- : MLOG_FILE_CREATE,
- space_id, mlog_file_flag,
- flags & ~FSP_FLAGS_MEM_MASK,
- tablename, NULL, &mtr);
+ file->block_size = block_size;
+ space->punch_hole = punch_hole;
- mtr_commit(&mtr);
+ err = DB_SUCCESS;
}
-#endif
- err = DB_SUCCESS;
- /* Error code is set. Cleanup the various variables used.
- These labels reflect the order in which variables are assigned or
- actions are done. */
-error_exit_1:
- if (has_data_dir && err != DB_SUCCESS) {
- fil_delete_link_file(tablename);
- }
-error_exit_2:
os_file_close(file);
- if (err != DB_SUCCESS) {
- os_file_delete(innodb_file_data_key, path);
- }
-error_exit_3:
- mem_free(path);
- return(err);
-}
-
-#ifndef UNIV_HOTBACKUP
-/********************************************************************//**
-Report information about a bad tablespace. */
-static
-void
-fil_report_bad_tablespace(
-/*======================*/
- const char* filepath, /*!< in: filepath */
- const char* check_msg, /*!< in: fil_check_first_page() */
- ulint found_id, /*!< in: found space ID */
- ulint found_flags, /*!< in: found flags */
- ulint expected_id, /*!< in: expected space id */
- ulint expected_flags) /*!< in: expected flags */
-{
- if (check_msg) {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Error %s in file '%s',"
- "tablespace id=%lu, flags=%lu. "
- "Please refer to "
- REFMAN "innodb-troubleshooting-datadict.html "
- "for how to resolve the issue.",
- check_msg, filepath,
- (ulong) expected_id, (ulong) expected_flags);
- return;
- }
-
- ib_logf(IB_LOG_LEVEL_ERROR,
- "In file '%s', tablespace id and flags are %lu and %lu, "
- "but in the InnoDB data dictionary they are %lu and %lu. "
- "Have you moved InnoDB .ibd files around without using the "
- "commands DISCARD TABLESPACE and IMPORT TABLESPACE? "
- "Please refer to "
- REFMAN "innodb-troubleshooting-datadict.html "
- "for how to resolve the issue.",
- filepath, (ulong) found_id, (ulong) found_flags,
- (ulong) expected_id, (ulong) expected_flags);
-}
-
-/** Try to adjust FSP_SPACE_FLAGS if they differ from the expectations.
-(Typically when upgrading from MariaDB 10.1.0..10.1.20.)
-@param[in] space_id tablespace ID
-@param[in] flags desired tablespace flags */
-UNIV_INTERN
-void
-fsp_flags_try_adjust(ulint space_id, ulint flags)
-{
- ut_ad(!srv_read_only_mode);
- ut_ad(fsp_flags_is_valid(flags, space_id));
-
- mtr_t mtr;
- mtr_start(&mtr);
- if (buf_block_t* b = buf_page_get(
- space_id, fsp_flags_get_zip_size(flags), 0, RW_X_LATCH,
- &mtr)) {
- ulint f = fsp_header_get_flags(b->frame);
- /* Suppress the message if only the DATA_DIR flag to differs. */
- if ((f ^ flags) & ~(1U << FSP_FLAGS_POS_RESERVED)) {
- ib_logf(IB_LOG_LEVEL_WARN,
- "adjusting FSP_SPACE_FLAGS of tablespace "
- ULINTPF " from 0x%x to 0x%x",
- space_id, int(f), int(flags));
- }
- if (f != flags) {
- mlog_write_ulint(FSP_HEADER_OFFSET
- + FSP_SPACE_FLAGS + b->frame,
- flags, MLOG_4BYTES, &mtr);
+ if (err != DB_SUCCESS) {
+ if (has_data_dir) {
+ RemoteDatafile::delete_link_file(name);
}
+
+ os_file_delete(innodb_data_file_key, path);
}
- mtr_commit(&mtr);
+ return(err);
}
-/********************************************************************//**
-Tries to open a single-table tablespace and optionally checks that the
+/** Try to open a single-table tablespace and optionally check that the
space id in it is correct. If this does not succeed, print an error message
to the .err log. This function is used to open a tablespace when we start
mysqld after the dictionary has been booted, and also in IMPORT TABLESPACE.
@@ -3792,40 +3788,47 @@ If the validate boolean is set, we read the first page of the file and
check that the space id in the file is what we expect. We assume that
this function runs much faster if no check is made, since accessing the
file inode probably is much faster (the OS caches them) than accessing
-the first page of the file. This boolean may be initially FALSE, but if
+the first page of the file. This boolean may be initially false, but if
a remote tablespace is found it will be changed to true.
If the fix_dict boolean is set, then it is safe to use an internal SQL
statement to update the dictionary tables if they are incorrect.
-@return DB_SUCCESS or error code */
-UNIV_INTERN
+@param[in] validate true if we should validate the tablespace
+@param[in] fix_dict true if the dictionary is available to be fixed
+@param[in] purpose FIL_TYPE_TABLESPACE or FIL_TYPE_TEMPORARY
+@param[in] id tablespace ID
+@param[in] flags expected FSP_SPACE_FLAGS
+@param[in] space_name tablespace name of the datafile
+If file-per-table, it is the table name in the databasename/tablename format
+@param[in] path_in expected filepath, usually read from dictionary
+@return DB_SUCCESS or error code */
dberr_t
-fil_open_single_table_tablespace(
-/*=============================*/
- bool validate, /*!< in: Do we validate tablespace? */
- bool fix_dict, /*!< in: Can we fix the dictionary? */
- ulint id, /*!< in: space id */
- ulint flags, /*!< in: expected FSP_SPACE_FLAGS */
- const char* tablename, /*!< in: table name in the
- databasename/tablename format */
- const char* path_in) /*!< in: table */
+fil_ibd_open(
+ bool validate,
+ bool fix_dict,
+ fil_type_t purpose,
+ ulint id,
+ ulint flags,
+ const char* space_name,
+ const char* path_in)
{
dberr_t err = DB_SUCCESS;
bool dict_filepath_same_as_default = false;
bool link_file_found = false;
bool link_file_is_bad = false;
- fsp_open_info def;
- fsp_open_info dict;
- fsp_open_info remote;
+ Datafile df_default; /* default location */
+ Datafile df_dict; /* dictionary location */
+ RemoteDatafile df_remote; /* remote location */
ulint tablespaces_found = 0;
ulint valid_tablespaces_found = 0;
- fil_space_crypt_t* crypt_data = NULL;
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(!fix_dict || rw_lock_own(&dict_operation_lock, RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
- ut_ad(!fix_dict || mutex_own(&(dict_sys->mutex)));
+ ut_ad(!fix_dict || rw_lock_own(&dict_operation_lock, RW_LOCK_X));
+
+ ut_ad(!fix_dict || mutex_own(&dict_sys->mutex));
+ ut_ad(!fix_dict || !srv_read_only_mode);
+ ut_ad(!fix_dict || srv_log_file_size != 0);
+ ut_ad(fil_type_is_data(purpose));
/* Table flags can be ULINT_UNDEFINED if
dict_tf_to_fsp_flags_failure is set. */
@@ -3834,199 +3837,128 @@ fil_open_single_table_tablespace(
}
ut_ad(fsp_flags_is_valid(flags & ~FSP_FLAGS_MEM_MASK, id));
- const ulint atomic_writes = FSP_FLAGS_GET_ATOMIC_WRITES(flags);
+ df_default.init(space_name, flags);
+ df_dict.init(space_name, flags);
+ df_remote.init(space_name, flags);
- memset(&def, 0, sizeof(def));
- memset(&dict, 0, sizeof(dict));
- memset(&remote, 0, sizeof(remote));
+ /* Discover the correct file by looking in three possible locations
+ while avoiding unecessary effort. */
- /* Discover the correct filepath. We will always look for an ibd
- in the default location. If it is remote, it should not be here. */
- def.filepath = fil_make_ibd_name(tablename, false);
+ /* We will always look for an ibd in the default location. */
+ df_default.make_filepath(NULL, space_name, IBD);
- /* The path_in was read from SYS_DATAFILES. */
- if (path_in) {
- if (strcmp(def.filepath, path_in)) {
- dict.filepath = mem_strdup(path_in);
- /* possibility of multiple files. */
- validate = true;
- } else {
- dict_filepath_same_as_default = true;
- }
- }
+ /* Look for a filepath embedded in an ISL where the default file
+ would be. */
+ if (df_remote.open_read_only(true) == DB_SUCCESS) {
+ ut_ad(df_remote.is_open());
- link_file_found = fil_open_linked_file(
- tablename, &remote.filepath, &remote.file, atomic_writes);
- remote.success = link_file_found;
- if (remote.success) {
- /* possibility of multiple files. */
+ /* Always validate a file opened from an ISL pointer */
+ validate = true;
+ ++tablespaces_found;
+ link_file_found = true;
+ } else if (df_remote.filepath() != NULL) {
+ /* An ISL file was found but contained a bad filepath in it.
+ Better validate anything we do find. */
validate = true;
- tablespaces_found++;
-
- /* A link file was found. MySQL does not allow a DATA
- DIRECTORY to be be the same as the default filepath. */
- ut_a(strcmp(def.filepath, remote.filepath));
-
- /* If there was a filepath found in SYS_DATAFILES,
- we hope it was the same as this remote.filepath found
- in the ISL file. */
- if (dict.filepath
- && (0 == strcmp(dict.filepath, remote.filepath))) {
- remote.success = FALSE;
- os_file_close(remote.file);
- mem_free(remote.filepath);
- remote.filepath = NULL;
- tablespaces_found--;
- }
}
- /* Attempt to open the tablespace at other possible filepaths. */
- if (dict.filepath) {
- dict.file = os_file_create_simple_no_error_handling(
- innodb_file_data_key, dict.filepath, OS_FILE_OPEN,
- OS_FILE_READ_ONLY, &dict.success, atomic_writes);
- if (dict.success) {
- /* possibility of multiple files. */
+ /* Attempt to open the tablespace at the dictionary filepath. */
+ if (path_in) {
+ if (df_default.same_filepath_as(path_in)) {
+ dict_filepath_same_as_default = true;
+ } else {
+ /* Dict path is not the default path. Always validate
+ remote files. If default is opened, it was moved. */
validate = true;
- tablespaces_found++;
+ df_dict.set_filepath(path_in);
+ if (df_dict.open_read_only(true) == DB_SUCCESS) {
+ ut_ad(df_dict.is_open());
+ ++tablespaces_found;
+ }
}
}
- /* Always look for a file at the default location. */
- ut_a(def.filepath);
- def.file = os_file_create_simple_no_error_handling(
- innodb_file_data_key, def.filepath, OS_FILE_OPEN,
- OS_FILE_READ_ONLY, &def.success, atomic_writes);
+ /* Always look for a file at the default location. But don't log
+ an error if the tablespace is already open in remote or dict. */
+ ut_a(df_default.filepath());
+ const bool strict = (tablespaces_found == 0);
+ if (df_default.open_read_only(strict) == DB_SUCCESS) {
+ ut_ad(df_default.is_open());
+ ++tablespaces_found;
+ }
- if (def.success) {
- tablespaces_found++;
+ /* Check if multiple locations point to the same file. */
+ if (tablespaces_found > 1 && df_default.same_as(df_remote)) {
+ /* A link file was found with the default path in it.
+ Use the default path and delete the link file. */
+ --tablespaces_found;
+ df_remote.delete_link_file();
+ df_remote.close();
+ }
+ if (tablespaces_found > 1 && df_default.same_as(df_dict)) {
+ --tablespaces_found;
+ df_dict.close();
+ }
+ if (tablespaces_found > 1 && df_remote.same_as(df_dict)) {
+ --tablespaces_found;
+ df_dict.close();
}
/* We have now checked all possible tablespace locations and
- have a count of how many we found. If things are normal, we
- only found 1. */
+ have a count of how many unique files we found. If things are
+ normal, we only found 1. */
+ /* For encrypted tablespace, we need to check the
+ encryption in header of first page. */
if (!validate && tablespaces_found == 1) {
goto skip_validate;
}
- /* Read the first page of the datadir tablespace, if found. */
- if (def.success) {
- def.check_msg = fil_read_first_page(
- def.file, false, &def.flags, &def.id,
-#ifdef UNIV_LOG_ARCHIVE
- &space_arch_log_no, &space_arch_log_no,
-#endif /* UNIV_LOG_ARCHIVE */
- NULL, &def.crypt_data);
-
- def.valid = !def.check_msg && def.id == id
- && fsp_flags_match(flags, def.flags);
-
- if (def.valid) {
- valid_tablespaces_found++;
- } else {
- /* Do not use this tablespace. */
- fil_report_bad_tablespace(
- def.filepath, def.check_msg, def.id,
- def.flags, id, flags);
- }
- }
-
- /* Read the first page of the remote tablespace */
- if (remote.success) {
- remote.check_msg = fil_read_first_page(
- remote.file, false, &remote.flags, &remote.id,
-#ifdef UNIV_LOG_ARCHIVE
- &remote.arch_log_no, &remote.arch_log_no,
-#endif /* UNIV_LOG_ARCHIVE */
- NULL, &remote.crypt_data);
+ /* Read and validate the first page of these three tablespace
+ locations, if found. */
+ valid_tablespaces_found +=
+ (df_remote.validate_to_dd(id, flags) == DB_SUCCESS);
- /* Validate this single-table-tablespace with SYS_TABLES. */
- remote.valid = !remote.check_msg && remote.id == id
- && fsp_flags_match(flags, remote.flags);
+ valid_tablespaces_found +=
+ (df_default.validate_to_dd(id, flags) == DB_SUCCESS);
- if (remote.valid) {
- valid_tablespaces_found++;
- } else {
- /* Do not use this linked tablespace. */
- fil_report_bad_tablespace(
- remote.filepath, remote.check_msg, remote.id,
- remote.flags, id, flags);
- link_file_is_bad = true;
- }
- }
-
- /* Read the first page of the datadir tablespace, if found. */
- if (dict.success) {
- dict.check_msg = fil_read_first_page(
- dict.file, false, &dict.flags, &dict.id,
-#ifdef UNIV_LOG_ARCHIVE
- &dict.arch_log_no, &dict.arch_log_no,
-#endif /* UNIV_LOG_ARCHIVE */
- NULL, &dict.crypt_data);
-
- /* Validate this single-table-tablespace with SYS_TABLES. */
- dict.valid = !dict.check_msg && dict.id == id
- && fsp_flags_match(flags, dict.flags);
-
- if (dict.valid) {
- valid_tablespaces_found++;
- } else {
- /* Do not use this tablespace. */
- fil_report_bad_tablespace(
- dict.filepath, dict.check_msg, dict.id,
- dict.flags, id, flags);
- }
- }
+ valid_tablespaces_found +=
+ (df_dict.validate_to_dd(id, flags) == DB_SUCCESS);
/* Make sense of these three possible locations.
First, bail out if no tablespace files were found. */
if (valid_tablespaces_found == 0) {
- /* The following call prints an error message */
os_file_get_last_error(true);
-
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Could not find a valid tablespace file for '%s'. "
- "See " REFMAN "innodb-troubleshooting-datadict.html "
- "for how to resolve the issue.",
- tablename);
-
- err = DB_CORRUPTION;
-
- goto cleanup_and_exit;
+ ib::error() << "Could not find a valid tablespace file for `"
+ << space_name << "`. " << TROUBLESHOOT_DATADICT_MSG;
+ return(DB_CORRUPTION);
+ }
+ if (!validate) {
+ goto skip_validate;
}
/* Do not open any tablespaces if more than one tablespace with
the correct space ID and flags were found. */
if (tablespaces_found > 1) {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "A tablespace for %s has been found in "
- "multiple places;", tablename);
-
- if (def.success) {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Default location; %s"
- ", Space ID=" ULINTPF " , Flags=" ULINTPF " .",
- def.filepath,
- def.id,
- def.flags);
+ ib::error() << "A tablespace for `" << space_name
+ << "` has been found in multiple places;";
+
+ if (df_default.is_open()) {
+ ib::error() << "Default location: "
+ << df_default.filepath()
+ << ", Space ID=" << df_default.space_id()
+ << ", Flags=" << df_default.flags();
}
-
- if (remote.success) {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Remote location; %s"
- ", Space ID=" ULINTPF " , Flags=" ULINTPF " .",
- remote.filepath,
- remote.id,
- remote.flags);
+ if (df_remote.is_open()) {
+ ib::error() << "Remote location: "
+ << df_remote.filepath()
+ << ", Space ID=" << df_remote.space_id()
+ << ", Flags=" << df_remote.flags();
}
-
- if (dict.success) {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Dictionary location; %s"
- ", Space ID=" ULINTPF " , Flags=" ULINTPF " .",
- dict.filepath,
- dict.id,
- dict.flags);
+ if (df_dict.is_open()) {
+ ib::error() << "Dictionary location: "
+ << df_dict.filepath()
+ << ", Space ID=" << df_dict.space_id()
+ << ", Flags=" << df_dict.flags();
}
/* Force-recovery will allow some tablespaces to be
@@ -4037,43 +3969,44 @@ fil_open_single_table_tablespace(
recovery and there is only one good tablespace, ignore
any bad tablespaces. */
if (valid_tablespaces_found > 1 || srv_force_recovery > 0) {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Will not open the tablespace for '%s'",
- tablename);
-
- if (def.success != def.valid
- || dict.success != dict.valid
- || remote.success != remote.valid) {
- err = DB_CORRUPTION;
- } else {
- err = DB_ERROR;
+ ib::error() << "Will not open tablespace `"
+ << space_name << "`";
+
+ /* If the file is not open it cannot be valid. */
+ ut_ad(df_default.is_open() || !df_default.is_valid());
+ ut_ad(df_dict.is_open() || !df_dict.is_valid());
+ ut_ad(df_remote.is_open() || !df_remote.is_valid());
+
+ /* Having established that, this is an easy way to
+ look for corrupted data files. */
+ if (df_default.is_open() != df_default.is_valid()
+ || df_dict.is_open() != df_dict.is_valid()
+ || df_remote.is_open() != df_remote.is_valid()) {
+ return(DB_CORRUPTION);
}
- goto cleanup_and_exit;
+error:
+ return(DB_ERROR);
}
/* There is only one valid tablespace found and we did
not use srv_force_recovery during REDO. Use this one
tablespace and clean up invalid tablespace pointers */
- if (def.success && !def.valid) {
- def.success = false;
- os_file_close(def.file);
+ if (df_default.is_open() && !df_default.is_valid()) {
+ df_default.close();
tablespaces_found--;
}
- if (dict.success && !dict.valid) {
- dict.success = false;
- os_file_close(dict.file);
+ if (df_dict.is_open() && !df_dict.is_valid()) {
+ df_dict.close();
/* Leave dict.filepath so that SYS_DATAFILES
can be corrected below. */
tablespaces_found--;
}
- if (remote.success && !remote.valid) {
- remote.success = false;
- os_file_close(remote.file);
- mem_free(remote.filepath);
- remote.filepath = NULL;
+ if (df_remote.is_open() && !df_remote.is_valid()) {
+ df_remote.close();
tablespaces_found--;
+ link_file_is_bad = true;
}
}
@@ -4087,735 +4020,442 @@ fil_open_single_table_tablespace(
goto skip_validate;
}
- /* We may need to change what is stored in SYS_DATAFILES or
- SYS_TABLESPACES or adjust the link file.
- Since a failure to update SYS_TABLESPACES or SYS_DATAFILES does
- not prevent opening and using the single_table_tablespace either
- this time or the next, we do not check the return code or fail
- to open the tablespace. But dict_update_filepath() will issue a
- warning to the log. */
- if (dict.filepath) {
- if (remote.success) {
- dict_update_filepath(id, remote.filepath);
- } else if (def.success) {
- dict_update_filepath(id, def.filepath);
+ /* We may need to update what is stored in SYS_DATAFILES or
+ SYS_TABLESPACES or adjust the link file. Since a failure to
+ update SYS_TABLESPACES or SYS_DATAFILES does not prevent opening
+ and using the tablespace either this time or the next, we do not
+ check the return code or fail to open the tablespace. But if it
+ fails, dict_update_filepath() will issue a warning to the log. */
+ if (df_dict.filepath()) {
+ ut_ad(path_in != NULL);
+ ut_ad(df_dict.same_filepath_as(path_in));
+
+ if (df_remote.is_open()) {
+ if (!df_remote.same_filepath_as(path_in)) {
+ dict_update_filepath(id, df_remote.filepath());
+ }
+
+ } else if (df_default.is_open()) {
+ ut_ad(!dict_filepath_same_as_default);
+ dict_update_filepath(id, df_default.filepath());
if (link_file_is_bad) {
- fil_delete_link_file(tablename);
+ RemoteDatafile::delete_link_file(space_name);
}
+
} else if (!link_file_found || link_file_is_bad) {
- ut_ad(dict.success);
+ ut_ad(df_dict.is_open());
/* Fix the link file if we got our filepath
from the dictionary but a link file did not
exist or it did not point to a valid file. */
- fil_delete_link_file(tablename);
- fil_create_link_file(tablename, dict.filepath);
+ RemoteDatafile::delete_link_file(space_name);
+ RemoteDatafile::create_link_file(
+ space_name, df_dict.filepath());
}
- } else if (remote.success && dict_filepath_same_as_default) {
- dict_update_filepath(id, remote.filepath);
+ } else if (df_remote.is_open()) {
+ if (dict_filepath_same_as_default) {
+ dict_update_filepath(id, df_remote.filepath());
- } else if (remote.success && path_in == NULL) {
- /* SYS_DATAFILES record for this space ID was not found. */
- dict_insert_tablespace_and_filepath(
- id, tablename, remote.filepath, flags);
+ } else if (path_in == NULL) {
+ /* SYS_DATAFILES record for this space ID
+ was not found. */
+ dict_replace_tablespace_and_filepath(
+ id, space_name, df_remote.filepath(), flags);
+ }
+
+ } else if (df_default.is_open()) {
+ /* We opened the tablespace in the default location.
+ SYS_DATAFILES.PATH needs to be updated if it is different
+ from this default path or if the SYS_DATAFILES.PATH was not
+ supplied and it should have been. Also update the dictionary
+ if we found an ISL file (since !df_remote.is_open). Since
+ path_in is not suppled for file-per-table, we must assume
+ that it matched the ISL. */
+ if ((path_in != NULL && !dict_filepath_same_as_default)
+ || (path_in == NULL && DICT_TF_HAS_DATA_DIR(flags))
+ || df_remote.filepath() != NULL) {
+ dict_replace_tablespace_and_filepath(
+ id, space_name, df_default.filepath(), flags);
+ }
}
skip_validate:
- if (remote.success)
- crypt_data = remote.crypt_data;
- else if (dict.success)
- crypt_data = dict.crypt_data;
- else if (def.success)
- crypt_data = def.crypt_data;
+ if (err == DB_SUCCESS) {
+ const byte* first_page =
+ df_default.is_open() ? df_default.get_first_page() :
+ df_dict.is_open() ? df_dict.get_first_page() :
+ df_remote.get_first_page();
+
+ fil_space_crypt_t* crypt_data = first_page
+ ? fil_space_read_crypt_data(page_size_t(flags),
+ first_page)
+ : NULL;
+
+ fil_space_t* space = fil_space_create(
+ space_name, id, flags, purpose, crypt_data);
+ if (!space) {
+ goto error;
+ }
- if (err != DB_SUCCESS) {
- ; // Don't load the tablespace into the cache
- } else if (!fil_space_create(tablename, id, flags, FIL_TABLESPACE,
- crypt_data, false)) {
- err = DB_ERROR;
- } else {
/* We do not measure the size of the file, that is why
we pass the 0 below */
- if (!fil_node_create(remote.success ? remote.filepath :
- dict.success ? dict.filepath :
- def.filepath, 0, id, FALSE)) {
- err = DB_ERROR;
- }
- }
-
-cleanup_and_exit:
- if (remote.success) {
- os_file_close(remote.file);
- }
- if (remote.filepath) {
- mem_free(remote.filepath);
- }
- if (remote.crypt_data && remote.crypt_data != crypt_data) {
- if (err == DB_SUCCESS) {
- fil_space_destroy_crypt_data(&remote.crypt_data);
- }
- }
- if (dict.success) {
- os_file_close(dict.file);
- }
- if (dict.filepath) {
- mem_free(dict.filepath);
- }
- if (dict.crypt_data && dict.crypt_data != crypt_data) {
- fil_space_destroy_crypt_data(&dict.crypt_data);
- }
- if (def.success) {
- os_file_close(def.file);
- }
- if (def.crypt_data && def.crypt_data != crypt_data) {
- if (err == DB_SUCCESS) {
- fil_space_destroy_crypt_data(&def.crypt_data);
+ space->add(
+ df_remote.is_open() ? df_remote.filepath() :
+ df_dict.is_open() ? df_dict.filepath() :
+ df_default.filepath(), OS_FILE_CLOSED, 0, false, true);
+
+ if (err == DB_SUCCESS && validate
+ && purpose != FIL_TYPE_IMPORT && !srv_read_only_mode) {
+ df_remote.close();
+ df_dict.close();
+ df_default.close();
+ fsp_flags_try_adjust(id, flags & ~FSP_FLAGS_MEM_MASK);
}
}
- mem_free(def.filepath);
-
- if (err == DB_SUCCESS && validate && !srv_read_only_mode) {
- fsp_flags_try_adjust(id, flags & ~FSP_FLAGS_MEM_MASK);
- }
-
return(err);
}
-#endif /* !UNIV_HOTBACKUP */
-
-#ifdef UNIV_HOTBACKUP
-/*******************************************************************//**
-Allocates a file name for an old version of a single-table tablespace.
-The string must be freed by caller with mem_free()!
-@return own: file name */
-static
-char*
-fil_make_ibbackup_old_name(
-/*=======================*/
- const char* name) /*!< in: original file name */
-{
- static const char suffix[] = "_ibbackup_old_vers_";
- char* path;
- ulint len = strlen(name);
-
- path = static_cast<char*>(mem_alloc(len + (15 + sizeof suffix)));
-
- memcpy(path, name, len);
- memcpy(path + len, suffix, (sizeof suffix) - 1);
- ut_sprintf_timestamp_without_extra_chars(
- path + len + ((sizeof suffix) - 1));
- return(path);
-}
-#endif /* UNIV_HOTBACKUP */
-
-/*******************************************************************//**
-Determine the space id of the given file descriptor by reading a few
-pages from the beginning of the .ibd file.
-@return true if space id was successfully identified, or false. */
-static
+/** Looks for a pre-existing fil_space_t with the given tablespace ID
+and, if found, returns the name and filepath in newly allocated buffers
+that the caller must free.
+@param[in] space_id The tablespace ID to search for.
+@param[out] name Name of the tablespace found.
+@param[out] filepath The filepath of the first datafile for the
+tablespace.
+@return true if tablespace is found, false if not. */
bool
-fil_user_tablespace_find_space_id(
-/*==============================*/
- fsp_open_info* fsp) /* in/out: contains file descriptor, which is
- used as input. contains space_id, which is
- the output */
+fil_space_read_name_and_filepath(
+ ulint space_id,
+ char** name,
+ char** filepath)
{
- bool st;
- os_offset_t file_size;
-
- file_size = os_file_get_size(fsp->file);
-
- if (file_size == (os_offset_t) -1) {
- ib_logf(IB_LOG_LEVEL_ERROR, "Could not get file size: %s",
- fsp->filepath);
- return(false);
- }
-
- /* Assuming a page size, read the space_id from each page and store it
- in a map. Find out which space_id is agreed on by majority of the
- pages. Choose that space_id. */
- for (ulint page_size = UNIV_ZIP_SIZE_MIN;
- page_size <= UNIV_PAGE_SIZE_MAX; page_size <<= 1) {
-
- /* map[space_id] = count of pages */
- std::map<ulint, ulint> verify;
-
- ulint page_count = 64;
- ulint valid_pages = 0;
-
- /* Adjust the number of pages to analyze based on file size */
- while ((page_count * page_size) > file_size) {
- --page_count;
- }
-
- ib_logf(IB_LOG_LEVEL_INFO, "Page size:%lu Pages to analyze:"
- "%lu", page_size, page_count);
-
- byte* buf = static_cast<byte*>(ut_malloc(2*page_size));
- byte* page = static_cast<byte*>(ut_align(buf, page_size));
-
- for (ulint j = 0; j < page_count; ++j) {
-
- st = os_file_read(fsp->file, page, (j* page_size), page_size);
-
- if (!st) {
- ib_logf(IB_LOG_LEVEL_INFO,
- "READ FAIL: page_no:%lu", j);
- continue;
- }
-
- bool uncompressed_ok = false;
-
- /* For uncompressed pages, the page size must be equal
- to UNIV_PAGE_SIZE. */
- if (page_size == UNIV_PAGE_SIZE) {
- uncompressed_ok = !buf_page_is_corrupted(
- false, page, 0, NULL);
- }
-
- bool compressed_ok = false;
- if (page_size <= UNIV_PAGE_SIZE_DEF) {
- compressed_ok = !buf_page_is_corrupted(
- false, page, page_size, NULL);
- }
-
- if (uncompressed_ok || compressed_ok) {
-
- ulint space_id = mach_read_from_4(page
- + FIL_PAGE_SPACE_ID);
-
- if (space_id > 0) {
- ib_logf(IB_LOG_LEVEL_INFO,
- "VALID: space:%lu "
- "page_no:%lu page_size:%lu",
- space_id, j, page_size);
- verify[space_id]++;
- ++valid_pages;
- }
- }
- }
+ bool success = false;
+ *name = NULL;
+ *filepath = NULL;
- ut_free(buf);
-
- ib_logf(IB_LOG_LEVEL_INFO, "Page size: %lu, Possible space_id "
- "count:%lu", page_size, (ulint) verify.size());
+ mutex_enter(&fil_system->mutex);
- const ulint pages_corrupted = 3;
- for (ulint missed = 0; missed <= pages_corrupted; ++missed) {
+ fil_space_t* space = fil_space_get_by_id(space_id);
- for (std::map<ulint, ulint>::iterator
- m = verify.begin(); m != verify.end(); ++m ) {
+ if (space != NULL) {
+ *name = mem_strdup(space->name);
- ib_logf(IB_LOG_LEVEL_INFO, "space_id:%lu, "
- "Number of pages matched: %lu/%lu "
- "(%lu)", m->first, m->second,
- valid_pages, page_size);
+ fil_node_t* node = UT_LIST_GET_FIRST(space->chain);
+ *filepath = mem_strdup(node->name);
- if (m->second == (valid_pages - missed)) {
+ success = true;
+ }
- ib_logf(IB_LOG_LEVEL_INFO,
- "Chosen space:%lu\n", m->first);
+ mutex_exit(&fil_system->mutex);
- fsp->id = m->first;
- return(true);
- }
- }
+ return(success);
+}
- }
- }
+/** Convert a file name to a tablespace name.
+@param[in] filename directory/databasename/tablename.ibd
+@return database/tablename string, to be freed with ut_free() */
+char*
+fil_path_to_space_name(
+ const char* filename)
+{
+ /* Strip the file name prefix and suffix, leaving
+ only databasename/tablename. */
+ ulint filename_len = strlen(filename);
+ const char* end = filename + filename_len;
+#ifdef HAVE_MEMRCHR
+ const char* tablename = 1 + static_cast<const char*>(
+ memrchr(filename, OS_PATH_SEPARATOR,
+ filename_len));
+ const char* dbname = 1 + static_cast<const char*>(
+ memrchr(filename, OS_PATH_SEPARATOR,
+ tablename - filename - 1));
+#else /* HAVE_MEMRCHR */
+ const char* tablename = filename;
+ const char* dbname = NULL;
+
+ while (const char* t = static_cast<const char*>(
+ memchr(tablename, OS_PATH_SEPARATOR,
+ end - tablename))) {
+ dbname = tablename;
+ tablename = t + 1;
+ }
+#endif /* HAVE_MEMRCHR */
+
+ ut_ad(dbname != NULL);
+ ut_ad(tablename > dbname);
+ ut_ad(tablename < end);
+ ut_ad(end - tablename > 4);
+ ut_ad(memcmp(end - 4, DOT_IBD, 4) == 0);
+
+ char* name = mem_strdupl(dbname, end - dbname - 4);
+
+ ut_ad(name[tablename - dbname - 1] == OS_PATH_SEPARATOR);
+#if OS_PATH_SEPARATOR != '/'
+ /* space->name uses '/', not OS_PATH_SEPARATOR. */
+ name[tablename - dbname - 1] = '/';
+#endif
- return(false);
+ return(name);
}
-/*******************************************************************//**
-Finds the given page_no of the given space id from the double write buffer,
-and copies it to the corresponding .ibd file.
-@return true if copy was successful, or false. */
+/** Discover the correct IBD file to open given a remote or missing
+filepath from the REDO log. Administrators can move a crashed
+database to another location on the same machine and try to recover it.
+Remote IBD files might be moved as well to the new location.
+ The problem with this is that the REDO log contains the old location
+which may be still accessible. During recovery, if files are found in
+both locations, we can chose on based on these priorities;
+1. Default location
+2. ISL location
+3. REDO location
+@param[in] space_id tablespace ID
+@param[in] df Datafile object with path from redo
+@return true if a valid datafile was found, false if not */
+static
bool
-fil_user_tablespace_restore_page(
-/*==============================*/
- fsp_open_info* fsp, /* in: contains space id and .ibd
- file information */
- ulint page_no) /* in: page_no to obtain from double
- write buffer */
+fil_ibd_discover(
+ ulint space_id,
+ Datafile& df)
{
- bool err;
- ulint flags;
- ulint zip_size;
- ulint page_size;
- ulint buflen;
- byte* page;
-
- ib_logf(IB_LOG_LEVEL_INFO, "Restoring page %lu of tablespace %lu",
- page_no, fsp->id);
-
- // find if double write buffer has page_no of given space id
- page = recv_sys->dblwr.find_page(fsp->id, page_no);
-
- if (!page) {
- ib_logf(IB_LOG_LEVEL_WARN, "Doublewrite does not have "
- "page_no=%lu of space: %lu", page_no, fsp->id);
- err = false;
- goto out;
- }
-
- flags = mach_read_from_4(FSP_HEADER_OFFSET + FSP_SPACE_FLAGS + page);
-
- if (!fsp_flags_is_valid(flags, fsp->id)) {
- ulint cflags = fsp_flags_convert_from_101(flags);
- if (cflags == ULINT_UNDEFINED) {
- ib_logf(IB_LOG_LEVEL_WARN,
- "Ignoring a doublewrite copy of page "
- ULINTPF ":" ULINTPF
- " due to invalid flags 0x%x",
- fsp->id, page_no, int(flags));
- err = false;
- goto out;
+ Datafile df_def_per; /* default file-per-table datafile */
+ RemoteDatafile df_rem_per; /* remote file-per-table datafile */
+
+ /* Look for the datafile in the default location. */
+ const char* filename = df.filepath();
+ const char* basename = base_name(filename);
+
+ /* If this datafile is file-per-table it will have a schema dir. */
+ ulint sep_found = 0;
+ const char* db = basename;
+ for (; db > filename && sep_found < 2; db--) {
+ if (db[0] == OS_PATH_SEPARATOR) {
+ sep_found++;
}
- flags = cflags;
- /* The flags on the page should be converted later. */
}
+ if (sep_found == 2) {
+ db += 2;
+ df_def_per.init(db, 0);
+ df_def_per.make_filepath(NULL, db, IBD);
+ if (df_def_per.open_read_only(false) == DB_SUCCESS
+ && df_def_per.validate_for_recovery() == DB_SUCCESS
+ && df_def_per.space_id() == space_id) {
+ df.set_filepath(df_def_per.filepath());
+ df.open_read_only(false);
+ return(true);
+ }
- zip_size = fsp_flags_get_zip_size(flags);
- page_size = fsp_flags_get_page_size(flags);
-
- ut_ad(page_no == page_get_page_no(page));
-
- buflen = zip_size ? zip_size: page_size;
-
- ib_logf(IB_LOG_LEVEL_INFO, "Writing %lu bytes into file: %s",
- buflen, fsp->filepath);
-
- err = os_file_write(fsp->filepath, fsp->file, page,
- (zip_size ? zip_size : page_size) * page_no,
- buflen);
+ /* Look for a remote file-per-table tablespace. */
- os_file_flush(fsp->file);
-out:
- return(err);
-}
+ switch (srv_operation) {
+ case SRV_OPERATION_BACKUP:
+ case SRV_OPERATION_RESTORE_DELTA:
+ ut_ad(0);
+ break;
+ case SRV_OPERATION_RESTORE_ROLLBACK_XA:
+ case SRV_OPERATION_RESTORE_EXPORT:
+ case SRV_OPERATION_RESTORE:
+ break;
+ case SRV_OPERATION_NORMAL:
+ df_rem_per.set_name(db);
+ if (df_rem_per.open_link_file() != DB_SUCCESS) {
+ break;
+ }
-/********************************************************************//**
-Opens an .ibd file and adds the associated single-table tablespace to the
-InnoDB fil0fil.cc data structures.
-Set fsp->success to TRUE if tablespace is valid, FALSE if not. */
-static
-void
-fil_validate_single_table_tablespace(
-/*=================================*/
- const char* tablename, /*!< in: database/tablename */
- fsp_open_info* fsp) /*!< in/out: tablespace info */
-{
- bool restore_attempted = false;
-
-check_first_page:
- fsp->success = TRUE;
- fsp->encryption_error = 0;
- if (const char* check_msg = fil_read_first_page(
- fsp->file, FALSE, &fsp->flags, &fsp->id,
-#ifdef UNIV_LOG_ARCHIVE
- &fsp->arch_log_no, &fsp->arch_log_no,
-#endif /* UNIV_LOG_ARCHIVE */
- NULL, &fsp->crypt_data)) {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "%s in tablespace %s (table %s)",
- check_msg, fsp->filepath, tablename);
- fsp->success = FALSE;
- }
-
- if (!fsp->success) {
- if (!restore_attempted) {
- if (!fil_user_tablespace_find_space_id(fsp)) {
- return;
+ /* An ISL file was found with contents. */
+ if (df_rem_per.open_read_only(false) != DB_SUCCESS
+ || df_rem_per.validate_for_recovery()
+ != DB_SUCCESS) {
+
+ /* Assume that this ISL file is intended to
+ be used. Do not continue looking for another
+ if this file cannot be opened or is not
+ a valid IBD file. */
+ ib::error() << "ISL file '"
+ << df_rem_per.link_filepath()
+ << "' was found but the linked file '"
+ << df_rem_per.filepath()
+ << "' could not be opened or is"
+ " not correct.";
+ return(false);
}
- restore_attempted = true;
- if (fsp->id > 0
- && !fil_user_tablespace_restore_page(fsp, 0)) {
- return;
+ /* Use this file if it has the space_id from the
+ MLOG record. */
+ if (df_rem_per.space_id() == space_id) {
+ df.set_filepath(df_rem_per.filepath());
+ df.open_read_only(false);
+ return(true);
}
- goto check_first_page;
- }
- return;
- }
- if (fsp->id == ULINT_UNDEFINED || fsp->id == 0) {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Tablespace is not sensible;"
- " Table: %s Space ID: %lu Filepath: %s\n",
- tablename, (ulong) fsp->id, fsp->filepath);
- fsp->success = FALSE;
- return;
+ /* Since old MLOG records can use the same basename
+ in multiple CREATE/DROP TABLE sequences, this ISL
+ file could be pointing to a later version of this
+ basename.ibd file which has a different space_id.
+ Keep looking. */
+ }
}
- mutex_enter(&fil_system->mutex);
- fil_space_t* space = fil_space_get_by_id(fsp->id);
- mutex_exit(&fil_system->mutex);
- if (space != NULL) {
- char* prev_filepath = fil_space_get_first_path(fsp->id);
-
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Attempted to open a previously opened tablespace. "
- "Previous tablespace %s uses space ID: %lu at "
- "filepath: %s. Cannot open tablespace %s which uses "
- "space ID: %lu at filepath: %s",
- space->name, (ulong) space->id, prev_filepath,
- tablename, (ulong) fsp->id, fsp->filepath);
-
- mem_free(prev_filepath);
- fsp->success = FALSE;
- return;
+ /* No ISL files were found in the default location. Use the location
+ given in the redo log. */
+ if (df.open_read_only(false) == DB_SUCCESS
+ && df.validate_for_recovery() == DB_SUCCESS
+ && df.space_id() == space_id) {
+ return(true);
}
- fsp->success = TRUE;
+ /* A datafile was not discovered for the filename given. */
+ return(false);
}
-
-
-/********************************************************************//**
-Opens an .ibd file and adds the associated single-table tablespace to the
-InnoDB fil0fil.cc data structures. */
-static
-void
-fil_load_single_table_tablespace(
-/*=============================*/
- const char* dbname, /*!< in: database name */
- const char* filename) /*!< in: file name (not a path),
- including the .ibd or .isl extension */
+/** Open an ibd tablespace and add it to the InnoDB data structures.
+This is similar to fil_ibd_open() except that it is used while processing
+the REDO log, so the data dictionary is not available and very little
+validation is done. The tablespace name is extracred from the
+dbname/tablename.ibd portion of the filename, which assumes that the file
+is a file-per-table tablespace. Any name will do for now. General
+tablespace names will be read from the dictionary after it has been
+recovered. The tablespace flags are read at this time from the first page
+of the file in validate_for_recovery().
+@param[in] space_id tablespace ID
+@param[in] filename path/to/databasename/tablename.ibd
+@param[out] space the tablespace, or NULL on error
+@return status of the operation */
+enum fil_load_status
+fil_ibd_load(
+ ulint space_id,
+ const char* filename,
+ fil_space_t*& space)
{
- char* tablename;
- ulint tablename_len;
- ulint dbname_len = strlen(dbname);
- ulint filename_len = strlen(filename);
- fsp_open_info def;
- fsp_open_info remote;
- os_offset_t size;
- fil_space_t* space;
-
- memset(&def, 0, sizeof(def));
- memset(&remote, 0, sizeof(remote));
-
- /* The caller assured that the extension is ".ibd" or ".isl". */
- ut_ad(0 == memcmp(filename + filename_len - 4, ".ibd", 4)
- || 0 == memcmp(filename + filename_len - 4, ".isl", 4));
-
- /* Build up the tablename in the standard form database/table. */
- tablename = static_cast<char*>(
- mem_alloc(dbname_len + filename_len + 2));
-
- /* When lower_case_table_names = 2 it is possible that the
- dbname is in upper case ,but while storing it in fil_space_t
- we must convert it into lower case */
- sprintf(tablename, "%s" , dbname);
- tablename[dbname_len] = '\0';
-
- if (lower_case_file_system) {
- dict_casedn_str(tablename);
- }
-
- sprintf(tablename+dbname_len,"/%s",filename);
- tablename_len = strlen(tablename) - strlen(".ibd");
- tablename[tablename_len] = '\0';
-
- /* There may be both .ibd and .isl file in the directory.
- And it is possible that the .isl file refers to a different
- .ibd file. If so, we open and compare them the first time
- one of them is sent to this function. So if this table has
- already been loaded, there is nothing to do.*/
+ /* If the a space is already in the file system cache with this
+ space ID, then there is nothing to do. */
mutex_enter(&fil_system->mutex);
- space = fil_space_get_by_name(tablename);
- if (space) {
- mem_free(tablename);
- mutex_exit(&fil_system->mutex);
- return;
- }
+ space = fil_space_get_by_id(space_id);
mutex_exit(&fil_system->mutex);
- /* Build up the filepath of the .ibd tablespace in the datadir.
- This must be freed independent of def.success. */
- def.filepath = fil_make_ibd_name(tablename, false);
-
-#ifdef __WIN__
-# ifndef UNIV_HOTBACKUP
- /* If lower_case_table_names is 0 or 2, then MySQL allows database
- directory names with upper case letters. On Windows, all table and
- database names in InnoDB are internally always in lower case. Put the
- file path to lower case, so that we are consistent with InnoDB's
- internal data dictionary. */
-
- dict_casedn_str(def.filepath);
-# endif /* !UNIV_HOTBACKUP */
-#endif
-
- /* Check for a link file which locates a remote tablespace. */
- remote.success = fil_open_linked_file(
- tablename, &remote.filepath, &remote.file, FALSE);
-
- /* Read the first page of the remote tablespace */
- if (remote.success) {
- fil_validate_single_table_tablespace(tablename, &remote);
- if (!remote.success) {
- os_file_close(remote.file);
- mem_free(remote.filepath);
- }
- }
-
-
- /* Try to open the tablespace in the datadir. */
- def.file = os_file_create_simple_no_error_handling(
- innodb_file_data_key, def.filepath, OS_FILE_OPEN,
- OS_FILE_READ_WRITE, &def.success, FALSE);
-
- /* Read the first page of the remote tablespace */
- if (def.success) {
- fil_validate_single_table_tablespace(tablename, &def);
- if (!def.success) {
- os_file_close(def.file);
+ if (space != NULL) {
+ /* Compare the filename we are trying to open with the
+ filename from the first node of the tablespace we opened
+ previously. Fail if it is different. */
+ fil_node_t* node = UT_LIST_GET_FIRST(space->chain);
+ if (0 != strcmp(innobase_basename(filename),
+ innobase_basename(node->name))) {
+ ib::info()
+ << "Ignoring data file '" << filename
+ << "' with space ID " << space->id
+ << ". Another data file called " << node->name
+ << " exists with the same space ID.";
+ space = NULL;
+ return(FIL_LOAD_ID_CHANGED);
}
+ return(FIL_LOAD_OK);
}
- if (!def.success && !remote.success) {
-
- if (def.encryption_error || remote.encryption_error) {
- fprintf(stderr,
- "InnoDB: Error: could not open single-table"
- " tablespace file %s. Encryption error!\n", def.filepath);
- return;
- }
-
- /* The following call prints an error message */
- os_file_get_last_error(true);
- fprintf(stderr,
- "InnoDB: Error: could not open single-table"
- " tablespace file %s\n", def.filepath);
-
- if (!strncmp(filename,
- tmp_file_prefix, tmp_file_prefix_length)) {
- /* Ignore errors for #sql tablespaces. */
- mem_free(tablename);
- if (remote.filepath) {
- mem_free(remote.filepath);
+ if (is_mariabackup_restore()) {
+ /* Replace absolute DATA DIRECTORY file paths with
+ short names relative to the backup directory. */
+ if (const char* name = strrchr(filename, OS_PATH_SEPARATOR)) {
+ while (--name > filename
+ && *name != OS_PATH_SEPARATOR);
+ if (name > filename) {
+ filename = name + 1;
}
- if (def.filepath) {
- mem_free(def.filepath);
- }
- return;
- }
-no_good_file:
- fprintf(stderr,
- "InnoDB: We do not continue the crash recovery,"
- " because the table may become\n"
- "InnoDB: corrupt if we cannot apply the log"
- " records in the InnoDB log to it.\n"
- "InnoDB: To fix the problem and start mysqld:\n"
- "InnoDB: 1) If there is a permission problem"
- " in the file and mysqld cannot\n"
- "InnoDB: open the file, you should"
- " modify the permissions.\n"
- "InnoDB: 2) If the table is not needed, or you"
- " can restore it from a backup,\n"
- "InnoDB: then you can remove the .ibd file,"
- " and InnoDB will do a normal\n"
- "InnoDB: crash recovery and ignore that table.\n"
- "InnoDB: 3) If the file system or the"
- " disk is broken, and you cannot remove\n"
- "InnoDB: the .ibd file, you can set"
- " innodb_force_recovery > 0 in my.cnf\n"
- "InnoDB: and force InnoDB to continue crash"
- " recovery here.\n");
-will_not_choose:
- mem_free(tablename);
- if (remote.filepath) {
- mem_free(remote.filepath);
- }
- if (def.filepath) {
- mem_free(def.filepath);
}
-
- if (srv_force_recovery > 0) {
- ib_logf(IB_LOG_LEVEL_INFO,
- "innodb_force_recovery was set to %lu. "
- "Continuing crash recovery even though we "
- "cannot access the .ibd file of this table.",
- srv_force_recovery);
- return;
- }
-
- abort();
}
- if (def.success && remote.success) {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Tablespaces for %s have been found in two places;\n"
- "Location 1: SpaceID: " ULINTPF " File: %s\n"
- "Location 2: SpaceID: " ULINTPF " File: %s\n"
- "You must delete one of them.",
- tablename, def.id,
- def.filepath, remote.id,
- remote.filepath);
+ Datafile file;
+ file.set_filepath(filename);
+ file.open_read_only(false);
- def.success = FALSE;
- os_file_close(def.file);
- os_file_close(remote.file);
- goto will_not_choose;
- }
-
- /* At this point, only one tablespace is open */
- ut_a(def.success == !remote.success);
-
- fsp_open_info* fsp = def.success ? &def : &remote;
-
- /* Get and test the file size. */
- size = os_file_get_size(fsp->file);
-
- if (size == (os_offset_t) -1) {
- /* The following call prints an error message */
- os_file_get_last_error(true);
-
- ib_logf(IB_LOG_LEVEL_ERROR,
- "could not measure the size of single-table "
- "tablespace file %s", fsp->filepath);
-
- os_file_close(fsp->file);
- goto no_good_file;
+ if (!file.is_open()) {
+ /* The file has been moved or it is a remote datafile. */
+ if (!fil_ibd_discover(space_id, file)
+ || !file.is_open()) {
+ return(FIL_LOAD_NOT_FOUND);
+ }
}
- /* Every .ibd file is created >= 4 pages in size. Smaller files
- cannot be ok. */
- ulong minimum_size = FIL_IBD_FILE_INITIAL_SIZE * UNIV_PAGE_SIZE;
- if (size < minimum_size) {
-#ifndef UNIV_HOTBACKUP
- ib_logf(IB_LOG_LEVEL_ERROR,
- "The size of single-table tablespace file %s "
- "is only " UINT64PF ", should be at least %lu!",
- fsp->filepath, size, minimum_size);
- os_file_close(fsp->file);
- goto no_good_file;
-#else
- fsp->id = ULINT_UNDEFINED;
- fsp->flags = 0;
-#endif /* !UNIV_HOTBACKUP */
- }
+ os_offset_t size;
-#ifdef UNIV_HOTBACKUP
- if (fsp->id == ULINT_UNDEFINED || fsp->id == 0) {
- char* new_path;
+ /* Read and validate the first page of the tablespace.
+ Assign a tablespace name based on the tablespace type. */
+ switch (file.validate_for_recovery()) {
+ os_offset_t minimum_size;
+ case DB_SUCCESS:
+ if (file.space_id() != space_id) {
+ ib::info()
+ << "Ignoring data file '"
+ << file.filepath()
+ << "' with space ID " << file.space_id()
+ << ", since the redo log references "
+ << file.filepath() << " with space ID "
+ << space_id << ".";
+ return(FIL_LOAD_ID_CHANGED);
+ }
+ /* Get and test the file size. */
+ size = os_file_get_size(file.handle());
- fprintf(stderr,
- "InnoDB: Renaming tablespace %s of id %lu,\n"
- "InnoDB: to %s_ibbackup_old_vers_<timestamp>\n"
- "InnoDB: because its size %" PRId64 " is too small"
- " (< 4 pages 16 kB each),\n"
- "InnoDB: or the space id in the file header"
- " is not sensible.\n"
- "InnoDB: This can happen in an mysqlbackup run,"
- " and is not dangerous.\n",
- fsp->filepath, fsp->id, fsp->filepath, size);
- os_file_close(fsp->file);
+ /* Every .ibd file is created >= 4 pages in size.
+ Smaller files cannot be OK. */
+ minimum_size = FIL_IBD_FILE_INITIAL_SIZE * UNIV_PAGE_SIZE;
- new_path = fil_make_ibbackup_old_name(fsp->filepath);
+ if (size == static_cast<os_offset_t>(-1)) {
+ /* The following call prints an error message */
+ os_file_get_last_error(true);
- bool success = os_file_rename(
- innodb_file_data_key, fsp->filepath, new_path);
+ ib::error() << "Could not measure the size of"
+ " single-table tablespace file '"
+ << file.filepath() << "'";
+ } else if (size < minimum_size) {
+ ib::error() << "The size of tablespace file '"
+ << file.filepath() << "' is only " << size
+ << ", should be at least " << minimum_size
+ << "!";
+ } else {
+ /* Everything is fine so far. */
+ break;
+ }
- ut_a(success);
+ /* fall through */
- mem_free(new_path);
+ case DB_TABLESPACE_EXISTS:
+ return(FIL_LOAD_INVALID);
- goto func_exit_after_close;
+ default:
+ return(FIL_LOAD_NOT_FOUND);
}
- /* A backup may contain the same space several times, if the space got
- renamed at a sensitive time. Since it is enough to have one version of
- the space, we rename the file if a space with the same space id
- already exists in the tablespace memory cache. We rather rename the
- file than delete it, because if there is a bug, we do not want to
- destroy valuable data. */
-
- mutex_enter(&fil_system->mutex);
+ ut_ad(space == NULL);
- space = fil_space_get_by_id(fsp->id);
-
- if (space) {
- char* new_path;
-
- fprintf(stderr,
- "InnoDB: Renaming tablespace %s of id %lu,\n"
- "InnoDB: to %s_ibbackup_old_vers_<timestamp>\n"
- "InnoDB: because space %s with the same id\n"
- "InnoDB: was scanned earlier. This can happen"
- " if you have renamed tables\n"
- "InnoDB: during an mysqlbackup run.\n",
- fsp->filepath, fsp->id, fsp->filepath,
- space->name);
- os_file_close(fsp->file);
-
- new_path = fil_make_ibbackup_old_name(fsp->filepath);
-
- mutex_exit(&fil_system->mutex);
-
- bool success = os_file_rename(
- innodb_file_data_key, fsp->filepath, new_path);
-
- ut_a(success);
-
- mem_free(new_path);
-
- goto func_exit_after_close;
- }
- mutex_exit(&fil_system->mutex);
-#endif /* UNIV_HOTBACKUP */
/* Adjust the memory-based flags that would normally be set by
dict_tf_to_fsp_flags(). In recovery, we have no data dictionary. */
- if (FSP_FLAGS_HAS_PAGE_COMPRESSION(fsp->flags)) {
- fsp->flags |= page_zip_level
+ ulint flags = file.flags();
+ if (FSP_FLAGS_HAS_PAGE_COMPRESSION(flags)) {
+ flags |= page_zip_level
<< FSP_FLAGS_MEM_COMPRESSION_LEVEL;
}
- remote.flags |= 1U << FSP_FLAGS_MEM_DATA_DIR;
- /* We will leave atomic_writes at ATOMIC_WRITES_DEFAULT.
- That will be adjusted in fil_space_for_table_exists_in_mem(). */
-
- ibool file_space_create_success = fil_space_create(
- tablename, fsp->id, fsp->flags, FIL_TABLESPACE,
- fsp->crypt_data, false);
-
- if (!file_space_create_success) {
- if (srv_force_recovery > 0) {
- fprintf(stderr,
- "InnoDB: innodb_force_recovery was set"
- " to %lu. Continuing crash recovery\n"
- "InnoDB: even though the tablespace"
- " creation of this table failed.\n",
- srv_force_recovery);
- goto func_exit;
- }
- /* Exit here with a core dump, stack, etc. */
- ut_a(file_space_create_success);
+ const byte* first_page = file.get_first_page();
+ fil_space_crypt_t* crypt_data = first_page
+ ? fil_space_read_crypt_data(page_size_t(flags), first_page)
+ : NULL;
+ space = fil_space_create(
+ file.name(), space_id, flags, FIL_TYPE_TABLESPACE, crypt_data);
+
+ if (space == NULL) {
+ return(FIL_LOAD_INVALID);
}
+ ut_ad(space->id == file.space_id());
+ ut_ad(space->id == space_id);
+
/* We do not use the size information we have about the file, because
the rounding formula for extents and pages is somewhat complex; we
let fil_node_open() do that task. */
- if (!fil_node_create(fsp->filepath, 0, fsp->id, FALSE)) {
- ut_error;
- }
+ space->add(file.filepath(), OS_FILE_CLOSED, 0, false, false);
-func_exit:
- os_file_close(fsp->file);
-
-#ifdef UNIV_HOTBACKUP
-func_exit_after_close:
-#else
- ut_ad(!mutex_own(&fil_system->mutex));
-#endif
- mem_free(tablename);
- if (remote.success) {
- mem_free(remote.filepath);
- }
- mem_free(def.filepath);
+ return(FIL_LOAD_OK);
}
/***********************************************************************//**
@@ -4824,7 +4464,6 @@ directory. We retry 100 times if os_file_readdir_next_file() returns -1. The
idea is to read as much good data as we can and jump over bad data.
@return 0 if ok, -1 if error even after the retries, 1 if at the end
of the directory */
-static
int
fil_file_readdir_next_file(
/*=======================*/
@@ -4843,10 +4482,10 @@ fil_file_readdir_next_file(
return(ret);
}
- ib_logf(IB_LOG_LEVEL_ERROR,
- "os_file_readdir_next_file() returned -1 in "
- "directory %s, crash recovery may have failed "
- "for some .ibd files!", dirname);
+ ib::error() << "os_file_readdir_next_file() returned -1 in"
+ " directory " << dirname
+ << ", crash recovery may have failed"
+ " for some .ibd files!";
*err = DB_ERROR;
}
@@ -4854,244 +4493,56 @@ fil_file_readdir_next_file(
return(-1);
}
-/********************************************************************//**
-At the server startup, if we need crash recovery, scans the database
-directories under the MySQL datadir, looking for .ibd files. Those files are
-single-table tablespaces. We need to know the space id in each of them so that
-we know into which file we should look to check the contents of a page stored
-in the doublewrite buffer, also to know where to apply log records where the
-space id is != 0.
-@return DB_SUCCESS or error number */
+/** Try to adjust FSP_SPACE_FLAGS if they differ from the expectations.
+(Typically when upgrading from MariaDB 10.1.0..10.1.20.)
+@param[in] space_id tablespace ID
+@param[in] flags desired tablespace flags */
UNIV_INTERN
-dberr_t
-fil_load_single_table_tablespaces(void)
-/*===================================*/
+void
+fsp_flags_try_adjust(ulint space_id, ulint flags)
{
- int ret;
- char* dbpath = NULL;
- ulint dbpath_len = 100;
- os_file_dir_t dir;
- os_file_dir_t dbdir;
- os_file_stat_t dbinfo;
- os_file_stat_t fileinfo;
- dberr_t err = DB_SUCCESS;
-
- /* The datadir of MySQL is always the default directory of mysqld */
-
- dir = os_file_opendir(fil_path_to_mysql_datadir, TRUE);
-
- if (dir == NULL) {
-
- return(DB_ERROR);
+ ut_ad(!srv_read_only_mode);
+ ut_ad(fsp_flags_is_valid(flags, space_id));
+ if (!fil_space_get_size(space_id)) {
+ return;
}
-
- dbpath = static_cast<char*>(mem_alloc(dbpath_len));
-
- /* Scan all directories under the datadir. They are the database
- directories of MySQL. */
-
- ret = fil_file_readdir_next_file(&err, fil_path_to_mysql_datadir, dir,
- &dbinfo);
- while (ret == 0) {
- ulint len;
- /* printf("Looking at %s in datadir\n", dbinfo.name); */
-
- if (dbinfo.type == OS_FILE_TYPE_FILE
- || dbinfo.type == OS_FILE_TYPE_UNKNOWN) {
-
- goto next_datadir_item;
- }
-
- /* We found a symlink or a directory; try opening it to see
- if a symlink is a directory */
-
- len = strlen(fil_path_to_mysql_datadir)
- + strlen (dbinfo.name) + 2;
- if (len > dbpath_len) {
- dbpath_len = len;
-
- if (dbpath) {
- mem_free(dbpath);
- }
-
- dbpath = static_cast<char*>(mem_alloc(dbpath_len));
+ mtr_t mtr;
+ mtr.start();
+ if (buf_block_t* b = buf_page_get(
+ page_id_t(space_id, 0), page_size_t(flags),
+ RW_X_LATCH, &mtr)) {
+ ulint f = fsp_header_get_flags(b->frame);
+ /* Suppress the message if only the DATA_DIR flag to differs. */
+ if ((f ^ flags) & ~(1U << FSP_FLAGS_POS_RESERVED)) {
+ ib::warn()
+ << "adjusting FSP_SPACE_FLAGS of tablespace "
+ << space_id
+ << " from " << ib::hex(f)
+ << " to " << ib::hex(flags);
}
- ut_snprintf(dbpath, dbpath_len,
- "%s/%s", fil_path_to_mysql_datadir, dbinfo.name);
- srv_normalize_path_for_win(dbpath);
-
- dbdir = os_file_opendir(dbpath, FALSE);
-
- if (dbdir != NULL) {
-
- /* We found a database directory; loop through it,
- looking for possible .ibd files in it */
-
- ret = fil_file_readdir_next_file(&err, dbpath, dbdir,
- &fileinfo);
- while (ret == 0) {
-
- if (fileinfo.type == OS_FILE_TYPE_DIR) {
-
- goto next_file_item;
- }
-
- /* We found a symlink or a file */
- if (strlen(fileinfo.name) > 4
- && (0 == strcmp(fileinfo.name
- + strlen(fileinfo.name) - 4,
- ".ibd")
- || 0 == strcmp(fileinfo.name
- + strlen(fileinfo.name) - 4,
- ".isl"))) {
- /* The name ends in .ibd or .isl;
- try opening the file */
- fil_load_single_table_tablespace(
- dbinfo.name, fileinfo.name);
- }
-next_file_item:
- ret = fil_file_readdir_next_file(&err,
- dbpath, dbdir,
- &fileinfo);
- }
-
- if (0 != os_file_closedir(dbdir)) {
- fputs("InnoDB: Warning: could not"
- " close database directory ", stderr);
- ut_print_filename(stderr, dbpath);
- putc('\n', stderr);
-
- err = DB_ERROR;
- }
+ if (f != flags) {
+ mtr.set_named_space(space_id);
+ mlog_write_ulint(FSP_HEADER_OFFSET
+ + FSP_SPACE_FLAGS + b->frame,
+ flags, MLOG_4BYTES, &mtr);
}
-
-next_datadir_item:
- ret = fil_file_readdir_next_file(&err,
- fil_path_to_mysql_datadir,
- dir, &dbinfo);
}
-
- mem_free(dbpath);
-
- if (0 != os_file_closedir(dir)) {
- fprintf(stderr,
- "InnoDB: Error: could not close MySQL datadir\n");
-
- return(DB_ERROR);
- }
-
- return(err);
-}
-
-/*******************************************************************//**
-Returns TRUE if a single-table tablespace does not exist in the memory cache,
-or is being deleted there.
-@return TRUE if does not exist or is being deleted */
-UNIV_INTERN
-ibool
-fil_tablespace_deleted_or_being_deleted_in_mem(
-/*===========================================*/
- ulint id, /*!< in: space id */
- ib_int64_t version)/*!< in: tablespace_version should be this; if
- you pass -1 as the value of this, then this
- parameter is ignored */
-{
- fil_space_t* space;
-
- ut_ad(fil_system);
-
- mutex_enter(&fil_system->mutex);
-
- space = fil_space_get_by_id(id);
-
- if (space == NULL || space->is_stopping()) {
- mutex_exit(&fil_system->mutex);
-
- return(TRUE);
- }
-
- if (version != ((ib_int64_t)-1)
- && space->tablespace_version != version) {
- mutex_exit(&fil_system->mutex);
-
- return(TRUE);
- }
-
- mutex_exit(&fil_system->mutex);
-
- return(FALSE);
-}
-
-/*******************************************************************//**
-Returns TRUE if a single-table tablespace exists in the memory cache.
-@return TRUE if exists */
-UNIV_INTERN
-ibool
-fil_tablespace_exists_in_mem(
-/*=========================*/
- ulint id) /*!< in: space id */
-{
- fil_space_t* space;
-
- ut_ad(fil_system);
-
- mutex_enter(&fil_system->mutex);
-
- space = fil_space_get_by_id(id);
-
- mutex_exit(&fil_system->mutex);
-
- return(space != NULL);
-}
-
-/*******************************************************************//**
-Report that a tablespace for a table was not found. */
-static
-void
-fil_report_missing_tablespace(
-/*===========================*/
- const char* name, /*!< in: table name */
- ulint space_id) /*!< in: table's space id */
-{
- char index_name[MAX_FULL_NAME_LEN + 1];
-
- innobase_format_name(index_name, sizeof(index_name), name, TRUE);
-
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Table %s in the InnoDB data dictionary has tablespace id %lu, "
- "but tablespace with that id or name does not exist. Have "
- "you deleted or moved .ibd files? This may also be a table "
- "created with CREATE TEMPORARY TABLE whose .ibd and .frm "
- "files MySQL automatically removed, but the table still "
- "exists in the InnoDB internal data dictionary.",
- name, space_id);
+ mtr.commit();
}
-/** Check if a matching tablespace exists in the InnoDB tablespace memory
-cache. Note that if we have not done a crash recovery at the database startup,
-there may be many tablespaces which are not yet in the memory cache.
-@return whether a matching tablespace exists in the memory cache */
-UNIV_INTERN
+/** Determine if a matching tablespace exists in the InnoDB tablespace
+memory cache. Note that if we have not done a crash recovery at the database
+startup, there may be many tablespaces which are not yet in the memory cache.
+@param[in] id Tablespace ID
+@param[in] name Tablespace name used in fil_space_create().
+@param[in] table_flags table flags
+@return true if a matching tablespace exists in the memory cache */
bool
fil_space_for_table_exists_in_mem(
-/*==============================*/
- ulint id, /*!< in: space id */
- const char* name, /*!< in: table name used in
- fil_space_create(). Either the
- standard 'dbname/tablename' format
- or table->dir_path_of_temp_table */
- bool print_error_if_does_not_exist,
- /*!< in: print detailed error
- information to the .err log if a
- matching tablespace is not found from
- memory */
- bool adjust_space, /*!< in: whether to adjust space id
- when find table space mismatch */
- mem_heap_t* heap, /*!< in: heap memory */
- table_id_t table_id, /*!< in: table id */
- ulint table_flags) /*!< in: table flags */
+ ulint id,
+ const char* name,
+ ulint table_flags)
{
- fil_space_t* fnamespace;
fil_space_t* space;
const ulint expected_flags = dict_tf_to_fsp_flags(table_flags);
@@ -5105,105 +4556,10 @@ fil_space_for_table_exists_in_mem(
/* Look if there is a space with the same name; the name is the
directory path from the datadir to the file */
- fnamespace = fil_space_get_by_name(name);
- bool valid = space && !((space->flags ^ expected_flags)
- & ~FSP_FLAGS_MEM_MASK);
-
- if (!space) {
- } else if (!valid || space == fnamespace) {
- /* Found with the same file name, or got a flag mismatch. */
- goto func_exit;
- } else if (adjust_space
- && row_is_mysql_tmp_table_name(space->name)
- && !row_is_mysql_tmp_table_name(name)) {
- /* Info from fnamespace comes from the ibd file
- itself, it can be different from data obtained from
- System tables since renaming files is not
- transactional. We shall adjust the ibd file name
- according to system table info. */
- mutex_exit(&fil_system->mutex);
+ const bool valid = space
+ && !((space->flags ^ expected_flags) & ~FSP_FLAGS_MEM_MASK)
+ && space == fil_space_get_by_name(name);
- DBUG_EXECUTE_IF("ib_crash_before_adjust_fil_space",
- DBUG_SUICIDE(););
-
- char* tmp_name = dict_mem_create_temporary_tablename(
- heap, name, table_id);
-
- fil_rename_tablespace(fnamespace->name, fnamespace->id,
- tmp_name, NULL);
-
- DBUG_EXECUTE_IF("ib_crash_after_adjust_one_fil_space",
- DBUG_SUICIDE(););
-
- fil_rename_tablespace(space->name, id, name, NULL);
-
- DBUG_EXECUTE_IF("ib_crash_after_adjust_fil_space",
- DBUG_SUICIDE(););
-
- mutex_enter(&fil_system->mutex);
- fnamespace = fil_space_get_by_name(name);
- ut_ad(space == fnamespace);
- goto func_exit;
- }
-
- if (!print_error_if_does_not_exist) {
- valid = false;
- goto func_exit;
- }
-
- if (space == NULL) {
- if (fnamespace == NULL) {
- if (print_error_if_does_not_exist) {
- fil_report_missing_tablespace(name, id);
- }
- } else {
- ut_print_timestamp(stderr);
- fputs(" InnoDB: Error: table ", stderr);
- ut_print_filename(stderr, name);
- fprintf(stderr, "\n"
- "InnoDB: in InnoDB data dictionary has"
- " tablespace id %lu,\n"
- "InnoDB: but a tablespace with that id"
- " does not exist. There is\n"
- "InnoDB: a tablespace of name %s and id %lu,"
- " though. Have\n"
- "InnoDB: you deleted or moved .ibd files?\n",
- (ulong) id, fnamespace->name,
- (ulong) fnamespace->id);
- }
-error_exit:
- fputs("InnoDB: Please refer to\n"
- "InnoDB: " REFMAN "innodb-troubleshooting-datadict.html\n"
- "InnoDB: for how to resolve the issue.\n", stderr);
- valid = false;
- goto func_exit;
- }
-
- if (0 != strcmp(space->name, name)) {
- ut_print_timestamp(stderr);
- fputs(" InnoDB: Error: table ", stderr);
- ut_print_filename(stderr, name);
- fprintf(stderr, "\n"
- "InnoDB: in InnoDB data dictionary has"
- " tablespace id %lu,\n"
- "InnoDB: but the tablespace with that id"
- " has name %s.\n"
- "InnoDB: Have you deleted or moved .ibd files?\n",
- (ulong) id, space->name);
-
- if (fnamespace != NULL) {
- fputs("InnoDB: There is a tablespace"
- " with the right name\n"
- "InnoDB: ", stderr);
- ut_print_filename(stderr, fnamespace->name);
- fprintf(stderr, ", but its id is %lu.\n",
- (ulong) fnamespace->id);
- }
-
- goto error_exit;
- }
-
-func_exit:
if (valid) {
/* Adjust the flags that are in FSP_FLAGS_MEM_MASK.
FSP_SPACE_FLAGS will not be written back here. */
@@ -5218,143 +4574,32 @@ func_exit:
return(valid);
}
-/*******************************************************************//**
-Checks if a single-table tablespace for a given table name exists in the
-tablespace memory cache.
-@return space id, ULINT_UNDEFINED if not found */
-UNIV_INTERN
+/** Return the space ID based on the tablespace name.
+The tablespace must be found in the tablespace memory cache.
+This call is made from external to this module, so the mutex is not owned.
+@param[in] tablespace Tablespace name
+@return space ID if tablespace found, ULINT_UNDEFINED if space not. */
ulint
-fil_get_space_id_for_table(
-/*=======================*/
- const char* tablename) /*!< in: table name in the standard
- 'databasename/tablename' format */
+fil_space_get_id_by_name(
+ const char* tablespace)
{
- fil_space_t* fnamespace;
- ulint id = ULINT_UNDEFINED;
-
- ut_ad(fil_system);
-
mutex_enter(&fil_system->mutex);
- /* Look if there is a space with the same name. */
-
- fnamespace = fil_space_get_by_name(tablename);
-
- if (fnamespace) {
- id = fnamespace->id;
- }
+ /* Search for a space with the same name. */
+ fil_space_t* space = fil_space_get_by_name(tablespace);
+ ulint id = (space == NULL) ? ULINT_UNDEFINED : space->id;
mutex_exit(&fil_system->mutex);
return(id);
}
-/**********************************************************************//**
-Tries to extend a data file so that it would accommodate the number of pages
-given. The tablespace must be cached in the memory cache. If the space is big
-enough already, does nothing.
-@return TRUE if success */
-UNIV_INTERN
-ibool
-fil_extend_space_to_desired_size(
-/*=============================*/
- ulint* actual_size, /*!< out: size of the space after extension;
- if we ran out of disk space this may be lower
- than the desired size */
- ulint space_id, /*!< in: space id */
- ulint size_after_extend)/*!< in: desired size in pages after the
- extension; if the current space size is bigger
- than this already, the function does nothing */
-{
- ut_ad(!srv_read_only_mode);
-
- for (;;) {
- fil_mutex_enter_and_prepare_for_io(space_id);
-
- fil_space_t* space = fil_space_get_by_id(space_id);
- ut_a(space);
- ibool success;
-
- if (!fil_space_extend_must_retry(
- space, UT_LIST_GET_LAST(space->chain),
- size_after_extend, &success)) {
- *actual_size = space->size;
- mutex_exit(&fil_system->mutex);
- return(success);
- }
- }
-}
-
-#ifdef UNIV_HOTBACKUP
-/********************************************************************//**
-Extends all tablespaces to the size stored in the space header. During the
-mysqlbackup --apply-log phase we extended the spaces on-demand so that log
-records could be applied, but that may have left spaces still too small
-compared to the size stored in the space header. */
-UNIV_INTERN
-void
-fil_extend_tablespaces_to_stored_len(void)
-/*======================================*/
-{
- fil_space_t* space;
- byte* buf;
- ulint actual_size;
- ulint size_in_header;
- dberr_t error;
- ibool success;
-
- buf = mem_alloc(UNIV_PAGE_SIZE);
-
- mutex_enter(&fil_system->mutex);
-
- space = UT_LIST_GET_FIRST(fil_system->space_list);
-
- while (space) {
- ut_a(space->purpose == FIL_TABLESPACE);
-
- mutex_exit(&fil_system->mutex); /* no need to protect with a
- mutex, because this is a
- single-threaded operation */
- error = fil_read(TRUE, space->id,
- fsp_flags_get_zip_size(space->flags),
- 0, 0, UNIV_PAGE_SIZE, buf, NULL, 0);
- ut_a(error == DB_SUCCESS);
-
- size_in_header = fsp_get_size_low(buf);
-
- success = fil_extend_space_to_desired_size(
- &actual_size, space->id, size_in_header);
- if (!success) {
- fprintf(stderr,
- "InnoDB: Error: could not extend the"
- " tablespace of %s\n"
- "InnoDB: to the size stored in header,"
- " %lu pages;\n"
- "InnoDB: size after extension %lu pages\n"
- "InnoDB: Check that you have free disk space"
- " and retry!\n",
- space->name, size_in_header, actual_size);
- ut_a(success);
- }
-
- mutex_enter(&fil_system->mutex);
-
- space = UT_LIST_GET_NEXT(space_list, space);
- }
-
- mutex_exit(&fil_system->mutex);
-
- mem_free(buf);
-}
-#endif
-
/*========== RESERVE FREE EXTENTS (for a B-tree split, for example) ===*/
/*******************************************************************//**
Tries to reserve free extents in a file space.
-@return TRUE if succeed */
-UNIV_INTERN
-ibool
+@return true if succeed */
+bool
fil_space_reserve_free_extents(
/*===========================*/
ulint id, /*!< in: space id */
@@ -5362,7 +4607,7 @@ fil_space_reserve_free_extents(
ulint n_to_reserve) /*!< in: how many one wants to reserve */
{
fil_space_t* space;
- ibool success;
+ bool success;
ut_ad(fil_system);
@@ -5373,10 +4618,10 @@ fil_space_reserve_free_extents(
ut_a(space);
if (space->n_reserved_extents + n_to_reserve > n_free_now) {
- success = FALSE;
+ success = false;
} else {
space->n_reserved_extents += n_to_reserve;
- success = TRUE;
+ success = true;
}
mutex_exit(&fil_system->mutex);
@@ -5386,7 +4631,6 @@ fil_space_reserve_free_extents(
/*******************************************************************//**
Releases free extents in a file space. */
-UNIV_INTERN
void
fil_space_release_free_extents(
/*===========================*/
@@ -5412,7 +4656,6 @@ fil_space_release_free_extents(
/*******************************************************************//**
Gets the number of reserved extents. If the database is silent, this number
should be zero. */
-UNIV_INTERN
ulint
fil_space_get_n_reserved_extents(
/*=============================*/
@@ -5458,19 +4701,15 @@ fil_node_prepare_for_io(
ut_ad(mutex_own(&(system->mutex)));
if (system->n_open > system->max_n_open + 5) {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Warning: open files %lu"
- " exceeds the limit %lu\n",
- (ulong) system->n_open,
- (ulong) system->max_n_open);
+ ib::warn() << "Open files " << system->n_open
+ << " exceeds the limit " << system->max_n_open;
}
- if (node->open == FALSE) {
+ if (!node->is_open()) {
/* File is closed: open it */
ut_a(node->n_pending == 0);
- if (!fil_node_open_file(node, system, space)) {
+ if (!fil_node_open_file(node)) {
return(false);
}
}
@@ -5480,7 +4719,7 @@ fil_node_prepare_for_io(
ut_a(UT_LIST_GET_LEN(system->LRU) > 0);
- UT_LIST_REMOVE(LRU, system->LRU, node);
+ UT_LIST_REMOVE(system->LRU, node);
}
node->n_pending++;
@@ -5488,219 +4727,113 @@ fil_node_prepare_for_io(
return(true);
}
-/********************************************************************//**
-Updates the data structures when an i/o operation finishes. Updates the
-pending i/o's field in the node appropriately. */
+/** Update the data structures when an i/o operation finishes.
+@param[in,out] node file node
+@param[in] type IO context */
static
void
-fil_node_complete_io(
-/*=================*/
- fil_node_t* node, /*!< in: file node */
- fil_system_t* system, /*!< in: tablespace memory cache */
- ulint type) /*!< in: OS_FILE_WRITE or OS_FILE_READ; marks
- the node as modified if
- type == OS_FILE_WRITE */
+fil_node_complete_io(fil_node_t* node, const IORequest& type)
{
- ut_ad(node);
- ut_ad(system);
- ut_ad(mutex_own(&(system->mutex)));
-
+ ut_ad(mutex_own(&fil_system->mutex));
ut_a(node->n_pending > 0);
- node->n_pending--;
+ --node->n_pending;
+
+ ut_ad(type.validate());
- if (type == OS_FILE_WRITE) {
- ut_ad(!srv_read_only_mode);
- system->modification_counter++;
- node->modification_counter = system->modification_counter;
+ if (type.is_write()) {
+
+ ut_ad(!srv_read_only_mode
+ || fsp_is_system_temporary(node->space->id));
if (fil_buffering_disabled(node->space)) {
/* We don't need to keep track of unflushed
changes as user has explicitly disabled
buffering. */
- ut_ad(!node->space->is_in_unflushed_spaces);
- node->flush_counter = node->modification_counter;
+ ut_ad(!node->space->is_in_unflushed_spaces());
+ ut_ad(node->needs_flush == false);
- } else if (!node->space->is_in_unflushed_spaces) {
+ } else {
+ node->needs_flush = true;
- node->space->is_in_unflushed_spaces = true;
- UT_LIST_ADD_FIRST(unflushed_spaces,
- system->unflushed_spaces,
- node->space);
+ if (!node->space->is_in_unflushed_spaces()) {
+
+ fil_system->unflushed_spaces.push_front(
+ *node->space);
+ }
}
}
if (node->n_pending == 0 && fil_space_belongs_in_lru(node->space)) {
/* The node must be put back to the LRU list */
- UT_LIST_ADD_FIRST(LRU, system->LRU, node);
+ UT_LIST_ADD_FIRST(fil_system->LRU, node);
}
}
-/********************************************************************//**
-Report information about an invalid page access. */
+/** Report information about an invalid page access. */
static
void
fil_report_invalid_page_access(
-/*===========================*/
ulint block_offset, /*!< in: block offset */
ulint space_id, /*!< in: space id */
const char* space_name, /*!< in: space name */
ulint byte_offset, /*!< in: byte offset */
ulint len, /*!< in: I/O length */
- ulint type) /*!< in: I/O type */
+ bool is_read) /*!< in: I/O type */
{
- ib_logf(IB_LOG_LEVEL_FATAL,
- "Trying to access page number " ULINTPF
- " in space " ULINTPF
- " space name %s,"
- " which is outside the tablespace bounds."
- " Byte offset " ULINTPF ", len " ULINTPF
- " i/o type " ULINTPF ".%s",
- block_offset, space_id, space_name,
- byte_offset, len, type,
- space_id == 0 && !srv_was_started
+ ib::fatal()
+ << "Trying to " << (is_read ? "read" : "write")
+ << " page number " << block_offset << " in"
+ " space " << space_id << ", space name " << space_name << ","
+ " which is outside the tablespace bounds. Byte offset "
+ << byte_offset << ", len " << len <<
+ (space_id == 0 && !srv_was_started
? "Please check that the configuration matches"
" the InnoDB system tablespace location (ibdata files)"
: "");
}
-/********************************************************************//**
-Find correct node from file space
-@return node */
-static
-fil_node_t*
-fil_space_get_node(
- fil_space_t* space, /*!< in: file spage */
- ulint space_id, /*!< in: space id */
- ulint* block_offset, /*!< in/out: offset in number of blocks */
- ulint byte_offset, /*!< in: remainder of offset in bytes; in
- aio this must be divisible by the OS block
- size */
- ulint len) /*!< in: how many bytes to read or write; this
- must not cross a file boundary; in aio this
- must be a block size multiple */
-{
- fil_node_t* node;
- ut_ad(mutex_own(&fil_system->mutex));
-
- node = UT_LIST_GET_FIRST(space->chain);
-
- for (;;) {
- if (node == NULL) {
- return(NULL);
- } else if (fil_is_user_tablespace_id(space->id)
- && node->size == 0) {
-
- /* We do not know the size of a single-table tablespace
- before we open the file */
- break;
- } else if (node->size > *block_offset) {
- /* Found! */
- break;
- } else {
- *block_offset -= node->size;
- node = UT_LIST_GET_NEXT(chain, node);
- }
- }
-
- return (node);
-}
-
-/** Determine the block size of the data file.
-@param[in] space tablespace
-@param[in] offset page number
-@return block size */
-UNIV_INTERN
-ulint
-fil_space_get_block_size(const fil_space_t* space, unsigned offset)
-{
- ut_ad(space->n_pending_ios > 0);
-
- ulint block_size = 512;
-
- for (fil_node_t* node = UT_LIST_GET_FIRST(space->chain);
- node != NULL;
- node = UT_LIST_GET_NEXT(chain, node)) {
- block_size = node->file_block_size;
- if (node->size > offset) {
- break;
- }
- offset -= node->size;
- }
-
- /* Currently supporting block size up to 4K,
- fall back to default if bigger requested. */
- if (block_size > 4096) {
- block_size = 512;
- }
-
- return block_size;
-}
-
-/********************************************************************//**
-Reads or writes data. This operation is asynchronous (aio).
-@return DB_SUCCESS, or DB_TABLESPACE_DELETED if we are trying to do
-i/o on a tablespace which does not exist */
-UNIV_INTERN
+/** Reads or writes data. This operation could be asynchronous (aio).
+
+@param[in,out] type IO context
+@param[in] sync true if synchronous aio is desired
+@param[in] page_id page id
+@param[in] page_size page size
+@param[in] byte_offset remainder of offset in bytes; in aio this
+ must be divisible by the OS block size
+@param[in] len how many bytes to read or write; this must
+ not cross a file boundary; in aio this must
+ be a block size multiple
+@param[in,out] buf buffer where to store read data or from where
+ to write; in aio this must be appropriately
+ aligned
+@param[in] message message for aio handler if non-sync aio
+ used, else ignored
+@param[in] ignore_missing_space true=ignore missing space duging read
+@return DB_SUCCESS, DB_TABLESPACE_DELETED or DB_TABLESPACE_TRUNCATED
+ if we are trying to do i/o on a tablespace which does not exist */
dberr_t
fil_io(
-/*===*/
- ulint type, /*!< in: OS_FILE_READ or OS_FILE_WRITE,
- ORed to OS_FILE_LOG, if a log i/o
- and ORed to OS_AIO_SIMULATED_WAKE_LATER
- if simulated aio and we want to post a
- batch of i/os; NOTE that a simulated batch
- may introduce hidden chances of deadlocks,
- because i/os are not actually handled until
- all have been posted: use with great
- caution! */
- bool sync, /*!< in: true if synchronous aio is desired */
- ulint space_id, /*!< in: space id */
- ulint zip_size, /*!< in: compressed page size in bytes;
- 0 for uncompressed pages */
- ulint block_offset, /*!< in: offset in number of blocks */
- ulint byte_offset, /*!< in: remainder of offset in bytes; in
- aio this must be divisible by the OS block
- size */
- ulint len, /*!< in: how many bytes to read or write; this
- must not cross a file boundary; in aio this
- must be a block size multiple */
- void* buf, /*!< in/out: buffer where to store read data
- or from where to write; in aio this must be
- appropriately aligned */
- void* message, /*!< in: message for aio handler if non-sync
- aio used, else ignored */
- ulint* write_size) /*!< in/out: Actual write size initialized
- after fist successfull trim
- operation for this page and if
- initialized we do not trim again if
- actual page size does not decrease. */
+ const IORequest& type,
+ bool sync,
+ const page_id_t page_id,
+ const page_size_t& page_size,
+ ulint byte_offset,
+ ulint len,
+ void* buf,
+ void* message,
+ bool ignore_missing_space)
{
- ulint mode;
- fil_space_t* space;
- fil_node_t* node;
- ibool ret;
- ulint is_log;
- ulint wake_later;
- os_offset_t offset;
- bool ignore_nonexistent_pages;
+ os_offset_t offset;
+ IORequest req_type(type);
- is_log = type & OS_FILE_LOG;
- type = type & ~OS_FILE_LOG;
+ ut_ad(req_type.validate());
- wake_later = type & OS_AIO_SIMULATED_WAKE_LATER;
- type = type & ~OS_AIO_SIMULATED_WAKE_LATER;
-
- ignore_nonexistent_pages = type & BUF_READ_IGNORE_NONEXISTENT_PAGES;
- type &= ~BUF_READ_IGNORE_NONEXISTENT_PAGES;
-
- ut_ad(byte_offset < UNIV_PAGE_SIZE);
- ut_ad(!zip_size || !byte_offset);
- ut_ad(ut_is_2pow(zip_size));
- ut_ad(buf);
ut_ad(len > 0);
+ ut_ad(byte_offset < UNIV_PAGE_SIZE);
+ ut_ad(!page_size.is_compressed() || byte_offset == 0);
ut_ad(UNIV_PAGE_SIZE == (ulong)(1 << UNIV_PAGE_SIZE_SHIFT));
#if (1 << UNIV_PAGE_SIZE_SHIFT_MAX) != UNIV_PAGE_SIZE_MAX
# error "(1 << UNIV_PAGE_SIZE_SHIFT_MAX) != UNIV_PAGE_SIZE_MAX"
@@ -5709,100 +4842,148 @@ fil_io(
# error "(1 << UNIV_PAGE_SIZE_SHIFT_MIN) != UNIV_PAGE_SIZE_MIN"
#endif
ut_ad(fil_validate_skip());
-#ifndef UNIV_HOTBACKUP
-# ifndef UNIV_LOG_DEBUG
- /* ibuf bitmap pages must be read in the sync aio mode: */
+
+ /* ibuf bitmap pages must be read in the sync AIO mode: */
ut_ad(recv_no_ibuf_operations
- || type == OS_FILE_WRITE
- || !ibuf_bitmap_page(zip_size, block_offset)
+ || req_type.is_write()
+ || !ibuf_bitmap_page(page_id, page_size)
|| sync
- || is_log);
-# endif /* UNIV_LOG_DEBUG */
+ || req_type.is_log());
+
+ ulint mode;
+
if (sync) {
+
mode = OS_AIO_SYNC;
- } else if (is_log) {
+
+ } else if (req_type.is_log()) {
+
mode = OS_AIO_LOG;
- } else if (type == OS_FILE_READ
+
+ } else if (req_type.is_read()
&& !recv_no_ibuf_operations
- && ibuf_page(space_id, zip_size, block_offset, NULL)) {
+ && ibuf_page(page_id, page_size, NULL)) {
+
mode = OS_AIO_IBUF;
+
+ /* Reduce probability of deadlock bugs in connection with ibuf:
+ do not let the ibuf i/o handler sleep */
+
+ req_type.clear_do_not_wake();
} else {
mode = OS_AIO_NORMAL;
}
-#else /* !UNIV_HOTBACKUP */
- ut_a(sync);
- mode = OS_AIO_SYNC;
-#endif /* !UNIV_HOTBACKUP */
- if (type == OS_FILE_READ) {
+ if (req_type.is_read()) {
+
srv_stats.data_read.add(len);
- } else if (type == OS_FILE_WRITE) {
- ut_ad(!srv_read_only_mode);
+
+ } else if (req_type.is_write()) {
+
+ ut_ad(!srv_read_only_mode
+ || fsp_is_system_temporary(page_id.space()));
+
srv_stats.data_written.add(len);
- if (mach_read_from_2(static_cast<const byte*>(buf)
- + FIL_PAGE_TYPE) == FIL_PAGE_INDEX) {
- srv_stats.index_pages_written.inc();
- } else {
- srv_stats.non_index_pages_written.inc();
- }
}
/* Reserve the fil_system mutex and make sure that we can open at
least one file while holding it, if the file is not already open */
- fil_mutex_enter_and_prepare_for_io(space_id);
+ fil_mutex_enter_and_prepare_for_io(page_id.space());
- space = fil_space_get_by_id(space_id);
+ fil_space_t* space = fil_space_get_by_id(page_id.space());
/* If we are deleting a tablespace we don't allow async read operations
- on that. However, we do allow write and sync read operations */
- if (space == 0
- || (type == OS_FILE_READ
+ on that. However, we do allow write operations and sync read operations. */
+ if (space == NULL
+ || (req_type.is_read()
&& !sync
- && space->stop_new_ops)) {
+ && space->stop_new_ops
+ && !space->is_being_truncated)) {
+
mutex_exit(&fil_system->mutex);
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Trying to do i/o to a tablespace which does "
- "not exist. i/o type " ULINTPF
- ", space id " ULINTPF " , "
- "page no. " ULINTPF
- ", i/o length " ULINTPF " bytes",
- type, space_id, block_offset,
- len);
+ if (!req_type.ignore_missing() && !ignore_missing_space) {
+ ib::error()
+ << "Trying to do I/O to a tablespace which"
+ " does not exist. I/O type: "
+ << (req_type.is_read() ? "read" : "write")
+ << ", page: " << page_id
+ << ", I/O length: " << len << " bytes";
+ }
return(DB_TABLESPACE_DELETED);
}
- ut_ad(mode != OS_AIO_IBUF || space->purpose == FIL_TABLESPACE);
+ ut_ad(mode != OS_AIO_IBUF || fil_type_is_data(space->purpose));
- node = fil_space_get_node(space, space_id, &block_offset, byte_offset, len);
+ ulint cur_page_no = page_id.page_no();
+ fil_node_t* node = UT_LIST_GET_FIRST(space->chain);
- if (!node) {
- if (ignore_nonexistent_pages) {
- mutex_exit(&fil_system->mutex);
- return(DB_ERROR);
- }
+ for (;;) {
- fil_report_invalid_page_access(
- block_offset, space_id, space->name,
- byte_offset, len, type);
+ if (node == NULL) {
+
+ if (req_type.ignore_missing()) {
+ mutex_exit(&fil_system->mutex);
+ return(DB_ERROR);
+ }
+
+ fil_report_invalid_page_access(
+ page_id.page_no(), page_id.space(),
+ space->name, byte_offset, len,
+ req_type.is_read());
+
+ } else if (fil_is_user_tablespace_id(space->id)
+ && node->size == 0) {
+
+ /* We do not know the size of a single-table tablespace
+ before we open the file */
+ break;
+
+ } else if (node->size > cur_page_no) {
+ /* Found! */
+ break;
+
+ } else {
+ if (space->id != TRX_SYS_SPACE
+ && UT_LIST_GET_LEN(space->chain) == 1
+ && (srv_is_tablespace_truncated(space->id)
+ || space->is_being_truncated
+ || srv_was_tablespace_truncated(space))
+ && req_type.is_read()) {
+
+ /* Handle page which is outside the truncated
+ tablespace bounds when recovering from a crash
+ happened during a truncation */
+ mutex_exit(&fil_system->mutex);
+ return(DB_TABLESPACE_TRUNCATED);
+ }
+
+ cur_page_no -= node->size;
+
+ node = UT_LIST_GET_NEXT(chain, node);
+ }
}
/* Open file if closed */
if (!fil_node_prepare_for_io(node, fil_system, space)) {
- if (space->purpose == FIL_TABLESPACE
+ if (fil_type_is_data(space->purpose)
&& fil_is_user_tablespace_id(space->id)) {
mutex_exit(&fil_system->mutex);
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Trying to do i/o to a tablespace which "
- "exists without .ibd data file. "
- "i/o type " ULINTPF ", space id "
- ULINTPF ", page no " ULINTPF ", "
- "i/o length " ULINTPF " bytes",
- type, space_id,
- block_offset, len);
+ if (!req_type.ignore_missing()) {
+ ib::error()
+ << "Trying to do I/O to a tablespace"
+ " which exists without .ibd data file."
+ " I/O type: "
+ << (req_type.is_read()
+ ? "read" : "write")
+ << ", page: "
+ << page_id_t(page_id.space(),
+ cur_page_no)
+ << ", I/O length: " << len << " bytes";
+ }
return(DB_TABLESPACE_DELETED);
}
@@ -5816,12 +4997,22 @@ fil_io(
/* Check that at least the start offset is within the bounds of a
single-table tablespace, including rollback tablespaces. */
- if (UNIV_UNLIKELY(node->size <= block_offset)
- && space->id != 0 && space->purpose == FIL_TABLESPACE) {
+ if (node->size <= cur_page_no
+ && space->id != TRX_SYS_SPACE
+ && fil_type_is_data(space->purpose)) {
+
+ if (req_type.ignore_missing()) {
+ /* If we can tolerate the non-existent pages, we
+ should return with DB_ERROR and let caller decide
+ what to do. */
+ fil_node_complete_io(node, req_type);
+ mutex_exit(&fil_system->mutex);
+ return(DB_ERROR);
+ }
fil_report_invalid_page_access(
- block_offset, space_id, space->name, byte_offset,
- len, type);
+ page_id.page_no(), page_id.space(),
+ space->name, byte_offset, len, req_type.is_read());
}
/* Now we have made the changes in the data structures of fil_system */
@@ -5829,117 +5020,101 @@ fil_io(
/* Calculate the low 32 bits and the high 32 bits of the file offset */
- if (!zip_size) {
- offset = ((os_offset_t) block_offset << UNIV_PAGE_SIZE_SHIFT)
- + byte_offset;
+ if (!page_size.is_compressed()) {
+
+ offset = ((os_offset_t) cur_page_no
+ << UNIV_PAGE_SIZE_SHIFT) + byte_offset;
- ut_a(node->size - block_offset
+ ut_a(node->size - cur_page_no
>= ((byte_offset + len + (UNIV_PAGE_SIZE - 1))
/ UNIV_PAGE_SIZE));
} else {
- ulint zip_size_shift;
- switch (zip_size) {
- case 1024: zip_size_shift = 10; break;
- case 2048: zip_size_shift = 11; break;
- case 4096: zip_size_shift = 12; break;
- case 8192: zip_size_shift = 13; break;
- case 16384: zip_size_shift = 14; break;
- case 32768: zip_size_shift = 15; break;
- case 65536: zip_size_shift = 16; break;
+ ulint size_shift;
+
+ switch (page_size.physical()) {
+ case 1024: size_shift = 10; break;
+ case 2048: size_shift = 11; break;
+ case 4096: size_shift = 12; break;
+ case 8192: size_shift = 13; break;
+ case 16384: size_shift = 14; break;
+ case 32768: size_shift = 15; break;
+ case 65536: size_shift = 16; break;
default: ut_error;
}
- offset = ((os_offset_t) block_offset << zip_size_shift)
+
+ offset = ((os_offset_t) cur_page_no << size_shift)
+ byte_offset;
- ut_a(node->size - block_offset
- >= (len + (zip_size - 1)) / zip_size);
+
+ ut_a(node->size - cur_page_no
+ >= (len + (page_size.physical() - 1))
+ / page_size.physical());
}
- /* Do aio */
+ /* Do AIO */
ut_a(byte_offset % OS_FILE_LOG_BLOCK_SIZE == 0);
ut_a((len % OS_FILE_LOG_BLOCK_SIZE) == 0);
const char* name = node->name == NULL ? space->name : node->name;
-#ifdef UNIV_HOTBACKUP
- /* In mysqlbackup do normal i/o, not aio */
- if (type == OS_FILE_READ) {
- ret = os_file_read(node->handle, buf, offset, len);
- } else {
- ut_ad(!srv_read_only_mode);
- ret = os_file_write(name, node->handle, buf,
- offset, len);
- }
-#else
+ req_type.set_fil_node(node);
+
+ ut_ad(!req_type.is_write()
+ || page_id.space() == SRV_LOG_SPACE_FIRST_ID
+ || !fil_is_user_tablespace_id(page_id.space())
+ || offset == page_id.page_no() * page_size.physical());
+
/* Queue the aio request */
- ret = os_aio(type, is_log, mode | wake_later, name, node->handle, buf,
- offset, len, zip_size ? zip_size : UNIV_PAGE_SIZE, node,
- message, write_size);
+ dberr_t err = os_aio(
+ req_type,
+ mode, name, node->handle, buf, offset, len,
+ space->purpose != FIL_TYPE_TEMPORARY
+ && srv_read_only_mode,
+ node, message);
-#endif /* UNIV_HOTBACKUP */
+ /* We an try to recover the page from the double write buffer if
+ the decompression fails or the page is corrupt. */
+ ut_a(req_type.is_dblwr_recover() || err == DB_SUCCESS);
- if (mode == OS_AIO_SYNC) {
+ if (sync) {
/* The i/o operation is already completed when we return from
os_aio: */
mutex_enter(&fil_system->mutex);
- fil_node_complete_io(node, fil_system, type);
+ fil_node_complete_io(node, req_type);
mutex_exit(&fil_system->mutex);
ut_ad(fil_validate_skip());
}
- if (!ret) {
- return(DB_OUT_OF_FILE_SPACE);
- }
-
- return(DB_SUCCESS);
+ return(err);
}
-#ifndef UNIV_HOTBACKUP
/**********************************************************************//**
Waits for an aio operation to complete. This function is used to write the
handler for completed requests. The aio array of pending requests is divided
into segments (see os0file.cc for more info). The thread specifies which
segment it wants to wait for. */
-UNIV_INTERN
void
fil_aio_wait(
/*=========*/
ulint segment) /*!< in: the number of the segment in the aio
array to wait for */
{
- ibool ret;
- fil_node_t* fil_node;
+ fil_node_t* node;
+ IORequest type;
void* message;
- ulint type;
ut_ad(fil_validate_skip());
- if (srv_use_native_aio) {
- srv_set_io_thread_op_info(segment, "native aio handle");
-#ifdef WIN_ASYNC_IO
- ret = os_aio_windows_handle(
- segment, 0, &fil_node, &message, &type);
-#elif defined(LINUX_NATIVE_AIO)
- ret = os_aio_linux_handle(
- segment, &fil_node, &message, &type);
-#else
- ut_error;
- ret = 0; /* Eliminate compiler warning */
-#endif /* WIN_ASYNC_IO */
- } else {
- srv_set_io_thread_op_info(segment, "simulated aio handle");
+ dberr_t err = os_aio_handler(segment, &node, &message, &type);
- ret = os_aio_simulated_handle(
- segment, &fil_node, &message, &type);
- }
+ ut_a(err == DB_SUCCESS);
- ut_a(ret);
- if (fil_node == NULL) {
+ if (node == NULL) {
ut_ad(srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS);
return;
}
@@ -5948,8 +5123,10 @@ fil_aio_wait(
mutex_enter(&fil_system->mutex);
- fil_node_complete_io(fil_node, fil_system, type);
- ulint purpose = fil_node->space->purpose;
+ fil_node_complete_io(node, type);
+ const fil_type_t purpose = node->space->purpose;
+ const ulint space_id= node->space->id;
+ const bool dblwr = node->space->use_doublewrite();
mutex_exit(&fil_system->mutex);
@@ -5961,38 +5138,53 @@ fil_aio_wait(
deadlocks in the i/o system. We keep tablespace 0 data files always
open, and use a special i/o thread to serve insert buffer requests. */
- if (purpose == FIL_TABLESPACE) {
+ switch (purpose) {
+ case FIL_TYPE_LOG:
+ srv_set_io_thread_op_info(segment, "complete io for log");
+ log_io_complete(static_cast<log_group_t*>(message));
+ return;
+ case FIL_TYPE_TABLESPACE:
+ case FIL_TYPE_TEMPORARY:
+ case FIL_TYPE_IMPORT:
srv_set_io_thread_op_info(segment, "complete io for buf page");
+
+ /* async single page writes from the dblwr buffer don't have
+ access to the page */
buf_page_t* bpage = static_cast<buf_page_t*>(message);
- ulint offset = bpage->offset;
- dberr_t err = buf_page_io_complete(bpage);
+ if (!bpage) {
+ return;
+ }
- if (err != DB_SUCCESS) {
- ut_ad(type == OS_FILE_READ);
- /* In crash recovery set log corruption on
- and produce only an error to fail InnoDB startup. */
- if (recv_recovery_is_on() && !srv_force_recovery) {
- recv_sys->found_corrupt_log = true;
+ ulint offset = bpage->id.page_no();
+ dberr_t err = buf_page_io_complete(bpage, dblwr);
+ if (err == DB_SUCCESS) {
+ return;
+ }
+
+ ut_ad(type.is_read());
+ if (recv_recovery_is_on() && !srv_force_recovery) {
+ recv_sys->found_corrupt_fs = true;
+ }
+
+ if (fil_space_t* space = fil_space_acquire_for_io(space_id)) {
+ if (space == node->space) {
+ ib::error() << "Failed to read file '"
+ << node->name
+ << "' at offset " << offset
+ << ": " << ut_strerr(err);
}
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Read operation failed for tablespace %s"
- " offset " ULINTPF " with error %s",
- fil_node->name,
- offset,
- ut_strerr(err));
+ fil_space_release_for_io(space);
}
- } else {
- srv_set_io_thread_op_info(segment, "complete io for log");
- log_io_complete(static_cast<log_group_t*>(message));
+ return;
}
+
+ ut_ad(0);
}
-#endif /* UNIV_HOTBACKUP */
/**********************************************************************//**
Flushes to disk possible writes cached by the OS. If the space does not exist
or is being dropped, does not do anything. */
-UNIV_INTERN
void
fil_flush(
/*======*/
@@ -6002,7 +5194,8 @@ fil_flush(
mutex_enter(&fil_system->mutex);
if (fil_space_t* space = fil_space_get_by_id(space_id)) {
- if (!space->stop_new_ops) {
+ if (space->purpose != FIL_TYPE_TEMPORARY
+ && !space->is_stopping()) {
fil_flush_low(space);
}
}
@@ -6012,11 +5205,12 @@ fil_flush(
/** Flush a tablespace.
@param[in,out] space tablespace to flush */
-UNIV_INTERN
void
fil_flush(fil_space_t* space)
{
ut_ad(space->n_pending_ios > 0);
+ ut_ad(space->purpose == FIL_TYPE_TABLESPACE
+ || space->purpose == FIL_TYPE_IMPORT);
if (!space->is_stopping()) {
mutex_enter(&fil_system->mutex);
@@ -6030,39 +5224,36 @@ fil_flush(fil_space_t* space)
/** Flush to disk the writes in file spaces of the given type
possibly cached by the OS.
@param[in] purpose FIL_TYPE_TABLESPACE or FIL_TYPE_LOG */
-UNIV_INTERN
void
-fil_flush_file_spaces(ulint purpose)
+fil_flush_file_spaces(
+ fil_type_t purpose)
{
- fil_space_t* space;
ulint* space_ids;
ulint n_space_ids;
- ulint i;
+
+ ut_ad(purpose == FIL_TYPE_TABLESPACE || purpose == FIL_TYPE_LOG);
mutex_enter(&fil_system->mutex);
- n_space_ids = UT_LIST_GET_LEN(fil_system->unflushed_spaces);
+ n_space_ids = fil_system->unflushed_spaces.size();
if (n_space_ids == 0) {
mutex_exit(&fil_system->mutex);
return;
}
- /* Assemble a list of space ids to flush. Previously, we
- traversed fil_system->unflushed_spaces and called UT_LIST_GET_NEXT()
- on a space that was just removed from the list by fil_flush().
- Thus, the space could be dropped and the memory overwritten. */
space_ids = static_cast<ulint*>(
- mem_alloc(n_space_ids * sizeof *space_ids));
+ ut_malloc_nokey(n_space_ids * sizeof(*space_ids)));
n_space_ids = 0;
- for (space = UT_LIST_GET_FIRST(fil_system->unflushed_spaces);
- space;
- space = UT_LIST_GET_NEXT(unflushed_spaces, space)) {
+ for (intrusive::list<fil_space_t, unflushed_spaces_tag_t>::iterator it
+ = fil_system->unflushed_spaces.begin(),
+ end = fil_system->unflushed_spaces.end();
+ it != end; ++it) {
- if (space->purpose == purpose && !space->is_stopping()) {
- space_ids[n_space_ids++] = space->id;
+ if (it->purpose == purpose && !it->is_stopping()) {
+ space_ids[n_space_ids++] = it->id;
}
}
@@ -6070,68 +5261,76 @@ fil_flush_file_spaces(ulint purpose)
/* Flush the spaces. It will not hurt to call fil_flush() on
a non-existing space id. */
- for (i = 0; i < n_space_ids; i++) {
+ for (ulint i = 0; i < n_space_ids; i++) {
fil_flush(space_ids[i]);
}
- mem_free(space_ids);
+ ut_free(space_ids);
}
-/** Functor to validate the space list. */
+/** Functor to validate the file node list of a tablespace. */
struct Check {
+ /** Total size of file nodes visited so far */
+ ulint size;
+ /** Total number of open files visited so far */
+ ulint n_open;
+
+ /** Constructor */
+ Check() : size(0), n_open(0) {}
+
+ /** Visit a file node
+ @param[in] elem file node to visit */
void operator()(const fil_node_t* elem)
{
- ut_a(elem->open || !elem->n_pending);
+ ut_a(elem->is_open() || !elem->n_pending);
+ n_open += elem->is_open();
+ size += elem->size;
+ }
+
+ /** Validate a tablespace.
+ @param[in] space tablespace to validate
+ @return number of open file nodes */
+ static ulint validate(const fil_space_t* space)
+ {
+ ut_ad(mutex_own(&fil_system->mutex));
+ Check check;
+ ut_list_validate(space->chain, check);
+ ut_a(space->size == check.size);
+ return(check.n_open);
}
};
/******************************************************************//**
Checks the consistency of the tablespace cache.
-@return TRUE if ok */
-UNIV_INTERN
-ibool
+@return true if ok */
+bool
fil_validate(void)
/*==============*/
{
fil_space_t* space;
fil_node_t* fil_node;
ulint n_open = 0;
- ulint i;
mutex_enter(&fil_system->mutex);
/* Look for spaces in the hash table */
- for (i = 0; i < hash_get_n_cells(fil_system->spaces); i++) {
+ for (ulint i = 0; i < hash_get_n_cells(fil_system->spaces); i++) {
for (space = static_cast<fil_space_t*>(
HASH_GET_FIRST(fil_system->spaces, i));
space != 0;
space = static_cast<fil_space_t*>(
- HASH_GET_NEXT(hash, space))) {
-
- UT_LIST_VALIDATE(
- chain, fil_node_t, space->chain, Check());
+ HASH_GET_NEXT(hash, space))) {
- for (fil_node = UT_LIST_GET_FIRST(space->chain);
- fil_node != 0;
- fil_node = UT_LIST_GET_NEXT(chain, fil_node)) {
-
- if (fil_node->n_pending > 0) {
- ut_a(fil_node->open);
- }
-
- if (fil_node->open) {
- n_open++;
- }
- }
+ n_open += Check::validate(space);
}
}
ut_a(fil_system->n_open == n_open);
- UT_LIST_CHECK(LRU, fil_node_t, fil_system->LRU);
+ ut_list_validate(fil_system->LRU);
for (fil_node = UT_LIST_GET_FIRST(fil_system->LRU);
fil_node != 0;
@@ -6139,20 +5338,19 @@ fil_validate(void)
ut_a(fil_node->n_pending == 0);
ut_a(!fil_node->being_extended);
- ut_a(fil_node->open);
+ ut_a(fil_node->is_open());
ut_a(fil_space_belongs_in_lru(fil_node->space));
}
mutex_exit(&fil_system->mutex);
- return(TRUE);
+ return(true);
}
/********************************************************************//**
-Returns TRUE if file address is undefined.
-@return TRUE if undefined */
-UNIV_INTERN
-ibool
+Returns true if file address is undefined.
+@return true if undefined */
+bool
fil_addr_is_null(
/*=============*/
fil_addr_t addr) /*!< in: address */
@@ -6162,8 +5360,7 @@ fil_addr_is_null(
/********************************************************************//**
Get the predecessor of a file page.
-@return FIL_PAGE_PREV */
-UNIV_INTERN
+@return FIL_PAGE_PREV */
ulint
fil_page_get_prev(
/*==============*/
@@ -6174,8 +5371,7 @@ fil_page_get_prev(
/********************************************************************//**
Get the successor of a file page.
-@return FIL_PAGE_NEXT */
-UNIV_INTERN
+@return FIL_PAGE_NEXT */
ulint
fil_page_get_next(
/*==============*/
@@ -6186,7 +5382,6 @@ fil_page_get_next(
/*********************************************************************//**
Sets the file page type. */
-UNIV_INTERN
void
fil_page_set_type(
/*==============*/
@@ -6198,66 +5393,51 @@ fil_page_set_type(
mach_write_to_2(page + FIL_PAGE_TYPE, type);
}
-/*********************************************************************//**
-Gets the file page type.
-@return type; NOTE that if the type has not been written to page, the
-return value not defined */
-UNIV_INTERN
-ulint
-fil_page_get_type(
-/*==============*/
- const byte* page) /*!< in: file page */
-{
- ut_ad(page);
-
- return(mach_read_from_2(page + FIL_PAGE_TYPE));
-}
-
/****************************************************************//**
Closes the tablespace memory cache. */
-UNIV_INTERN
void
fil_close(void)
/*===========*/
{
- fil_space_crypt_cleanup();
+ if (fil_system) {
+ hash_table_free(fil_system->spaces);
- mutex_free(&fil_system->mutex);
+ hash_table_free(fil_system->name_hash);
- hash_table_free(fil_system->spaces);
+ ut_a(UT_LIST_GET_LEN(fil_system->LRU) == 0);
+ ut_a(fil_system->unflushed_spaces.size() == 0);
+ ut_a(UT_LIST_GET_LEN(fil_system->space_list) == 0);
- hash_table_free(fil_system->name_hash);
+ mutex_free(&fil_system->mutex);
- ut_a(UT_LIST_GET_LEN(fil_system->LRU) == 0);
- ut_a(UT_LIST_GET_LEN(fil_system->unflushed_spaces) == 0);
- ut_a(UT_LIST_GET_LEN(fil_system->space_list) == 0);
+ delete fil_system;
+ fil_system = NULL;
- mem_free(fil_system);
-
- fil_system = NULL;
+ fil_space_crypt_cleanup();
+ }
}
/********************************************************************//**
Delete the tablespace file and any related files like .cfg.
-This should not be called for temporary tables. */
-UNIV_INTERN
+This should not be called for temporary tables.
+@param[in] ibd_filepath File path of the IBD tablespace */
void
fil_delete_file(
/*============*/
- const char* ibd_name) /*!< in: filepath of the ibd
- tablespace */
+ const char* ibd_filepath)
{
/* Force a delete of any stale .ibd files that are lying around. */
- ib_logf(IB_LOG_LEVEL_INFO, "Deleting %s", ibd_name);
-
- os_file_delete_if_exists(innodb_file_data_key, ibd_name);
+ ib::info() << "Deleting " << ibd_filepath;
+ os_file_delete_if_exists(innodb_data_file_key, ibd_filepath, NULL);
- char* cfg_name = fil_make_cfg_name(ibd_name);
-
- os_file_delete_if_exists(innodb_file_data_key, cfg_name);
-
- mem_free(cfg_name);
+ char* cfg_filepath = fil_make_filepath(
+ ibd_filepath, NULL, CFG, false);
+ if (cfg_filepath != NULL) {
+ os_file_delete_if_exists(
+ innodb_data_file_key, cfg_filepath, NULL);
+ ut_free(cfg_filepath);
+ }
}
/**
@@ -6265,7 +5445,6 @@ Iterate over all the spaces in the space list and fetch the
tablespace names. It will return a copy of the name that must be
freed by the caller using: delete[].
@return DB_SUCCESS if all OK. */
-UNIV_INTERN
dberr_t
fil_get_space_names(
/*================*/
@@ -6281,12 +5460,12 @@ fil_get_space_names(
space != NULL;
space = UT_LIST_GET_NEXT(space_list, space)) {
- if (space->purpose == FIL_TABLESPACE) {
+ if (space->purpose == FIL_TYPE_TABLESPACE) {
ulint len;
char* name;
- len = strlen(space->name);
- name = new(std::nothrow) char[len + 1];
+ len = ::strlen(space->name);
+ name = UT_NEW_ARRAY_NOKEY(char, len + 1);
if (name == 0) {
/* Caller to free elements allocated so far. */
@@ -6312,7 +5491,6 @@ fil_get_space_names(
@param[in] tmp_name temporary table name
@param[in,out] mtr mini-transaction
@return innodb error code */
-UNIV_INTERN
dberr_t
fil_mtr_rename_log(
const dict_table_t* old_table,
@@ -6320,39 +5498,35 @@ fil_mtr_rename_log(
const char* tmp_name,
mtr_t* mtr)
{
- dberr_t err = DB_SUCCESS;
- char* old_path;
+ dberr_t err;
+
+ bool old_is_file_per_table =
+ !is_system_tablespace(old_table->space);
+
+ bool new_is_file_per_table =
+ !is_system_tablespace(new_table->space);
/* If neither table is file-per-table,
there will be no renaming of files. */
- if (old_table->space == TRX_SYS_SPACE
- && new_table->space == TRX_SYS_SPACE) {
+ if (!old_is_file_per_table && !new_is_file_per_table) {
return(DB_SUCCESS);
}
- if (DICT_TF_HAS_DATA_DIR(old_table->flags)) {
- old_path = os_file_make_remote_pathname(
- old_table->data_dir_path, old_table->name, "ibd");
- } else {
- old_path = fil_make_ibd_name(old_table->name, false);
- }
+ const char* old_dir = DICT_TF_HAS_DATA_DIR(old_table->flags)
+ ? old_table->data_dir_path
+ : NULL;
+
+ char* old_path = fil_make_filepath(
+ old_dir, old_table->name.m_name, IBD, (old_dir != NULL));
if (old_path == NULL) {
return(DB_OUT_OF_MEMORY);
}
- if (old_table->space != TRX_SYS_SPACE) {
- char* tmp_path;
-
- if (DICT_TF_HAS_DATA_DIR(old_table->flags)) {
- tmp_path = os_file_make_remote_pathname(
- old_table->data_dir_path, tmp_name, "ibd");
- }
- else {
- tmp_path = fil_make_ibd_name(tmp_name, false);
- }
-
+ if (old_is_file_per_table) {
+ char* tmp_path = fil_make_filepath(
+ old_dir, tmp_name, IBD, (old_dir != NULL));
if (tmp_path == NULL) {
- mem_free(old_path);
+ ut_free(old_path);
return(DB_OUT_OF_MEMORY);
}
@@ -6360,137 +5534,386 @@ fil_mtr_rename_log(
err = fil_rename_tablespace_check(
old_table->space, old_path, tmp_path,
dict_table_is_discarded(old_table));
- mem_free(tmp_path);
if (err != DB_SUCCESS) {
- mem_free(old_path);
+ ut_free(old_path);
+ ut_free(tmp_path);
return(err);
}
- fil_op_write_log(MLOG_FILE_RENAME, old_table->space,
- 0, 0, old_table->name, tmp_name, mtr);
+ fil_name_write_rename_low(
+ old_table->space, 0, old_path, tmp_path, mtr);
+
+ ut_free(tmp_path);
}
- if (new_table->space != TRX_SYS_SPACE) {
+ if (new_is_file_per_table) {
+ const char* new_dir = DICT_TF_HAS_DATA_DIR(new_table->flags)
+ ? new_table->data_dir_path
+ : NULL;
+ char* new_path = fil_make_filepath(
+ new_dir, new_table->name.m_name,
+ IBD, (new_dir != NULL));
+ if (new_path == NULL) {
+ ut_free(old_path);
+ return(DB_OUT_OF_MEMORY);
+ }
/* Destination filepath must not exist unless this ALTER
TABLE starts and ends with a file_per-table tablespace. */
- if (old_table->space == TRX_SYS_SPACE) {
- char* new_path = NULL;
-
- if (DICT_TF_HAS_DATA_DIR(new_table->flags)) {
- new_path = os_file_make_remote_pathname(
- new_table->data_dir_path,
- new_table->name, "ibd");
- }
- else {
- new_path = fil_make_ibd_name(
- new_table->name, false);
- }
-
- if (new_path == NULL) {
- mem_free(old_path);
- return(DB_OUT_OF_MEMORY);
- }
-
+ if (!old_is_file_per_table) {
err = fil_rename_tablespace_check(
new_table->space, new_path, old_path,
dict_table_is_discarded(new_table));
- mem_free(new_path);
if (err != DB_SUCCESS) {
- mem_free(old_path);
+ ut_free(old_path);
+ ut_free(new_path);
return(err);
}
}
- fil_op_write_log(MLOG_FILE_RENAME, new_table->space,
- 0, 0, new_table->name, old_table->name, mtr);
+ fil_name_write_rename_low(
+ new_table->space, 0, new_path, old_path, mtr);
+ ut_free(new_path);
}
- mem_free(old_path);
+ ut_free(old_path);
- return(err);
+ return(DB_SUCCESS);
}
-/** Acquire a tablespace when it could be dropped concurrently.
-Used by background threads that do not necessarily hold proper locks
-for concurrency control.
-@param[in] id tablespace ID
-@param[in] silent whether to silently ignore missing tablespaces
-@return the tablespace
-@retval NULL if missing or being deleted or truncated */
-UNIV_INTERN
-fil_space_t*
-fil_space_acquire_low(ulint id, bool silent)
+#ifdef UNIV_DEBUG
+/** Check that a tablespace is valid for mtr_commit().
+@param[in] space persistent tablespace that has been changed */
+static
+void
+fil_space_validate_for_mtr_commit(
+ const fil_space_t* space)
{
- fil_space_t* space;
+ ut_ad(!mutex_own(&fil_system->mutex));
+ ut_ad(space != NULL);
+ ut_ad(space->purpose == FIL_TYPE_TABLESPACE);
+ ut_ad(!is_predefined_tablespace(space->id));
+
+ /* We are serving mtr_commit(). While there is an active
+ mini-transaction, we should have !space->stop_new_ops. This is
+ guaranteed by meta-data locks or transactional locks, or
+ dict_operation_lock (X-lock in DROP, S-lock in purge).
+
+ However, a file I/O thread can invoke change buffer merge
+ while fil_check_pending_operations() is waiting for operations
+ to quiesce. This is not a problem, because
+ ibuf_merge_or_delete_for_page() would call
+ fil_space_acquire() before mtr_start() and
+ fil_space_release() after mtr_commit(). This is why
+ n_pending_ops should not be zero if stop_new_ops is set. */
+ ut_ad(!space->stop_new_ops
+ || space->is_being_truncated /* fil_truncate_prepare() */
+ || space->n_pending_ops > 0);
+}
+#endif /* UNIV_DEBUG */
- mutex_enter(&fil_system->mutex);
+/** Write a MLOG_FILE_NAME record for a persistent tablespace.
+@param[in] space tablespace
+@param[in,out] mtr mini-transaction */
+static
+void
+fil_names_write(
+ const fil_space_t* space,
+ mtr_t* mtr)
+{
+ ut_ad(UT_LIST_GET_LEN(space->chain) == 1);
+ fil_name_write(space, 0, UT_LIST_GET_FIRST(space->chain), mtr);
+}
- space = fil_space_get_by_id(id);
+/** Note that a non-predefined persistent tablespace has been modified
+by redo log.
+@param[in,out] space tablespace */
+void
+fil_names_dirty(
+ fil_space_t* space)
+{
+ ut_ad(log_mutex_own());
+ ut_ad(recv_recovery_is_on());
+ ut_ad(log_sys->lsn != 0);
+ ut_ad(space->max_lsn == 0);
+ ut_d(fil_space_validate_for_mtr_commit(space));
+
+ UT_LIST_ADD_LAST(fil_system->named_spaces, space);
+ space->max_lsn = log_sys->lsn;
+}
- if (space == NULL) {
- if (!silent) {
- ib_logf(IB_LOG_LEVEL_WARN, "Trying to access missing"
- " tablespace " ULINTPF ".", id);
+/** Write MLOG_FILE_NAME records when a non-predefined persistent
+tablespace was modified for the first time since the latest
+fil_names_clear().
+@param[in,out] space tablespace
+@param[in,out] mtr mini-transaction */
+void
+fil_names_dirty_and_write(
+ fil_space_t* space,
+ mtr_t* mtr)
+{
+ ut_ad(log_mutex_own());
+ ut_d(fil_space_validate_for_mtr_commit(space));
+ ut_ad(space->max_lsn == log_sys->lsn);
+
+ UT_LIST_ADD_LAST(fil_system->named_spaces, space);
+ fil_names_write(space, mtr);
+
+ DBUG_EXECUTE_IF("fil_names_write_bogus",
+ {
+ char bogus_name[] = "./test/bogus file.ibd";
+ os_normalize_path(bogus_name);
+ fil_name_write(
+ SRV_LOG_SPACE_FIRST_ID, 0,
+ bogus_name, mtr);
+ });
+}
+
+/** On a log checkpoint, reset fil_names_dirty_and_write() flags
+and write out MLOG_FILE_NAME and MLOG_CHECKPOINT if needed.
+@param[in] lsn checkpoint LSN
+@param[in] do_write whether to always write MLOG_CHECKPOINT
+@return whether anything was written to the redo log
+@retval false if no flags were set and nothing written
+@retval true if anything was written to the redo log */
+bool
+fil_names_clear(
+ lsn_t lsn,
+ bool do_write)
+{
+ mtr_t mtr;
+ ulint mtr_checkpoint_size = LOG_CHECKPOINT_FREE_PER_THREAD;
+
+ DBUG_EXECUTE_IF(
+ "increase_mtr_checkpoint_size",
+ mtr_checkpoint_size = 75 * 1024;
+ );
+
+ ut_ad(log_mutex_own());
+
+ if (log_sys->append_on_checkpoint) {
+ mtr_write_log(log_sys->append_on_checkpoint);
+ do_write = true;
+ }
+
+ mtr.start();
+
+ for (fil_space_t* space = UT_LIST_GET_FIRST(fil_system->named_spaces);
+ space != NULL; ) {
+ fil_space_t* next = UT_LIST_GET_NEXT(named_spaces, space);
+
+ ut_ad(space->max_lsn > 0);
+ if (space->max_lsn < lsn) {
+ /* The tablespace was last dirtied before the
+ checkpoint LSN. Remove it from the list, so
+ that if the tablespace is not going to be
+ modified any more, subsequent checkpoints will
+ avoid calling fil_names_write() on it. */
+ space->max_lsn = 0;
+ UT_LIST_REMOVE(fil_system->named_spaces, space);
}
- } else if (space->is_stopping()) {
- space = NULL;
- } else {
- space->n_pending_ops++;
+
+ /* max_lsn is the last LSN where fil_names_dirty_and_write()
+ was called. If we kept track of "min_lsn" (the first LSN
+ where max_lsn turned nonzero), we could avoid the
+ fil_names_write() call if min_lsn > lsn. */
+
+ fil_names_write(space, &mtr);
+ do_write = true;
+
+ const mtr_buf_t* mtr_log = mtr_get_log(&mtr);
+
+ /** If the mtr buffer size exceeds the size of
+ LOG_CHECKPOINT_FREE_PER_THREAD then commit the multi record
+ mini-transaction, start the new mini-transaction to
+ avoid the parsing buffer overflow error during recovery. */
+
+ if (mtr_log->size() > mtr_checkpoint_size) {
+ ut_ad(mtr_log->size() < (RECV_PARSING_BUF_SIZE / 2));
+ mtr.commit_checkpoint(lsn, false);
+ mtr.start();
+ }
+
+ space = next;
}
- mutex_exit(&fil_system->mutex);
+ if (do_write) {
+ mtr.commit_checkpoint(lsn, true);
+ } else {
+ ut_ad(!mtr.has_modifications());
+ }
- return(space);
+ return(do_write);
}
-/** Release a tablespace acquired with fil_space_acquire().
-@param[in,out] space tablespace to release */
-UNIV_INTERN
-void
-fil_space_release(fil_space_t* space)
+/** Truncate a single-table tablespace. The tablespace must be cached
+in the memory cache.
+@param space_id space id
+@param dir_path directory path
+@param tablename the table name in the usual
+ databasename/tablename format of InnoDB
+@param flags tablespace flags
+@param trunc_to_default truncate to default size if tablespace
+ is being newly re-initialized.
+@return DB_SUCCESS or error */
+dberr_t
+truncate_t::truncate(
+/*=================*/
+ ulint space_id,
+ const char* dir_path,
+ const char* tablename,
+ ulint flags,
+ bool trunc_to_default)
{
- mutex_enter(&fil_system->mutex);
- ut_ad(space->magic_n == FIL_SPACE_MAGIC_N);
- ut_ad(space->n_pending_ops > 0);
- space->n_pending_ops--;
- mutex_exit(&fil_system->mutex);
-}
+ dberr_t err = DB_SUCCESS;
+ char* path;
+
+ ut_a(!is_system_tablespace(space_id));
+
+ if (FSP_FLAGS_HAS_DATA_DIR(flags)) {
+ ut_ad(dir_path != NULL);
+ path = fil_make_filepath(dir_path, tablename, IBD, true);
+ } else {
+ path = fil_make_filepath(NULL, tablename, IBD, false);
+ }
+
+ if (path == NULL) {
+ return(DB_OUT_OF_MEMORY);
+ }
-/** Acquire a tablespace for reading or writing a block,
-when it could be dropped concurrently.
-@param[in] id tablespace ID
-@return the tablespace
-@retval NULL if missing */
-UNIV_INTERN
-fil_space_t*
-fil_space_acquire_for_io(ulint id)
-{
mutex_enter(&fil_system->mutex);
- fil_space_t* space = fil_space_get_by_id(id);
+ fil_space_t* space = fil_space_get_by_id(space_id);
- if (space) {
- space->n_pending_ios++;
+ /* The following code must change when InnoDB supports
+ multiple datafiles per tablespace. */
+ ut_a(UT_LIST_GET_LEN(space->chain) == 1);
+
+ fil_node_t* node = UT_LIST_GET_FIRST(space->chain);
+
+ if (trunc_to_default) {
+ space->size = node->size = FIL_IBD_FILE_INITIAL_SIZE;
+ }
+
+ const bool already_open = node->is_open();
+
+ if (!already_open) {
+
+ bool ret;
+
+ node->handle = os_file_create_simple_no_error_handling(
+ innodb_data_file_key, path, OS_FILE_OPEN,
+ OS_FILE_READ_WRITE,
+ fsp_is_system_temporary(space_id)
+ ? false : srv_read_only_mode, &ret);
+
+ if (!ret) {
+ ib::error() << "Failed to open tablespace file "
+ << path << ".";
+
+ ut_free(path);
+
+ return(DB_ERROR);
+ }
+
+ ut_a(node->is_open());
+ }
+
+ os_offset_t trunc_size = trunc_to_default
+ ? FIL_IBD_FILE_INITIAL_SIZE
+ : space->size;
+
+ const bool success = os_file_truncate(
+ path, node->handle, trunc_size * UNIV_PAGE_SIZE);
+
+ if (!success) {
+ ib::error() << "Cannot truncate file " << path
+ << " in TRUNCATE TABLESPACE.";
+ err = DB_ERROR;
+ }
+
+ space->stop_new_ops = false;
+ space->is_being_truncated = false;
+
+ /* If we opened the file in this function, close it. */
+ if (!already_open) {
+ bool closed = os_file_close(node->handle);
+
+ if (!closed) {
+
+ ib::error() << "Failed to close tablespace file "
+ << path << ".";
+
+ err = DB_ERROR;
+ } else {
+ node->handle = OS_FILE_CLOSED;
+ }
}
mutex_exit(&fil_system->mutex);
- return(space);
+ ut_free(path);
+
+ return(err);
}
-/** Release a tablespace acquired with fil_space_acquire_for_io().
-@param[in,out] space tablespace to release */
-UNIV_INTERN
+/* Unit Tests */
+#ifdef UNIV_ENABLE_UNIT_TEST_MAKE_FILEPATH
+#define MF fil_make_filepath
+#define DISPLAY ib::info() << path
void
-fil_space_release_for_io(fil_space_t* space)
+test_make_filepath()
{
- mutex_enter(&fil_system->mutex);
- ut_ad(space->magic_n == FIL_SPACE_MAGIC_N);
- ut_ad(space->n_pending_ios > 0);
- space->n_pending_ios--;
- mutex_exit(&fil_system->mutex);
+ char* path;
+ const char* long_path =
+ "this/is/a/very/long/path/including/a/very/"
+ "looooooooooooooooooooooooooooooooooooooooooooooooo"
+ "oooooooooooooooooooooooooooooooooooooooooooooooooo"
+ "oooooooooooooooooooooooooooooooooooooooooooooooooo"
+ "oooooooooooooooooooooooooooooooooooooooooooooooooo"
+ "oooooooooooooooooooooooooooooooooooooooooooooooooo"
+ "oooooooooooooooooooooooooooooooooooooooooooooooooo"
+ "oooooooooooooooooooooooooooooooooooooooooooooooooo"
+ "oooooooooooooooooooooooooooooooooooooooooooooooooo"
+ "oooooooooooooooooooooooooooooooooooooooooooooooooo"
+ "oooooooooooooooooooooooooooooooooooooooooooooooong"
+ "/folder/name";
+ path = MF("/this/is/a/path/with/a/filename", NULL, IBD, false); DISPLAY;
+ path = MF("/this/is/a/path/with/a/filename", NULL, ISL, false); DISPLAY;
+ path = MF("/this/is/a/path/with/a/filename", NULL, CFG, false); DISPLAY;
+ path = MF("/this/is/a/path/with/a/filename.ibd", NULL, IBD, false); DISPLAY;
+ path = MF("/this/is/a/path/with/a/filename.ibd", NULL, IBD, false); DISPLAY;
+ path = MF("/this/is/a/path/with/a/filename.dat", NULL, IBD, false); DISPLAY;
+ path = MF(NULL, "tablespacename", NO_EXT, false); DISPLAY;
+ path = MF(NULL, "tablespacename", IBD, false); DISPLAY;
+ path = MF(NULL, "dbname/tablespacename", NO_EXT, false); DISPLAY;
+ path = MF(NULL, "dbname/tablespacename", IBD, false); DISPLAY;
+ path = MF(NULL, "dbname/tablespacename", ISL, false); DISPLAY;
+ path = MF(NULL, "dbname/tablespacename", CFG, false); DISPLAY;
+ path = MF(NULL, "dbname\\tablespacename", NO_EXT, false); DISPLAY;
+ path = MF(NULL, "dbname\\tablespacename", IBD, false); DISPLAY;
+ path = MF("/this/is/a/path", "dbname/tablespacename", IBD, false); DISPLAY;
+ path = MF("/this/is/a/path", "dbname/tablespacename", IBD, true); DISPLAY;
+ path = MF("./this/is/a/path", "dbname/tablespacename.ibd", IBD, true); DISPLAY;
+ path = MF("this\\is\\a\\path", "dbname/tablespacename", IBD, true); DISPLAY;
+ path = MF("/this/is/a/path", "dbname\\tablespacename", IBD, true); DISPLAY;
+ path = MF(long_path, NULL, IBD, false); DISPLAY;
+ path = MF(long_path, "tablespacename", IBD, false); DISPLAY;
+ path = MF(long_path, "tablespacename", IBD, true); DISPLAY;
+}
+#endif /* UNIV_ENABLE_UNIT_TEST_MAKE_FILEPATH */
+/* @} */
+
+/** Release the reserved free extents.
+@param[in] n_reserved number of reserved extents */
+void
+fil_space_t::release_free_extents(ulint n_reserved)
+{
+ ut_ad(rw_lock_own(&latch, RW_LOCK_X));
+
+ ut_a(n_reserved_extents >= n_reserved);
+ n_reserved_extents -= n_reserved;
}
/** Return the next fil_space_t.
@@ -6501,7 +5924,6 @@ blocks a concurrent operation from dropping the tablespace.
If NULL, use the first fil_space_t on fil_system->space_list.
@return pointer to the next fil_space_t.
@retval NULL if this was the last*/
-UNIV_INTERN
fil_space_t*
fil_space_next(fil_space_t* prev_space)
{
@@ -6527,7 +5949,7 @@ fil_space_next(fil_space_t* prev_space)
while (space != NULL
&& (UT_LIST_GET_LEN(space->chain) == 0
|| space->is_stopping()
- || space->purpose != FIL_TABLESPACE)) {
+ || space->purpose != FIL_TYPE_TABLESPACE)) {
space = UT_LIST_GET_NEXT(space_list, space);
}
@@ -6544,19 +5966,17 @@ fil_space_next(fil_space_t* prev_space)
/**
Remove space from key rotation list if there are no more
pending operations.
-@param[in] space Tablespace */
+@param[in,out] space Tablespace */
static
void
-fil_space_remove_from_keyrotation(
- fil_space_t* space)
+fil_space_remove_from_keyrotation(fil_space_t* space)
{
ut_ad(mutex_own(&fil_system->mutex));
ut_ad(space);
- if (space->n_pending_ops == 0 && space->is_in_rotation_list) {
- space->is_in_rotation_list = false;
- ut_a(UT_LIST_GET_LEN(fil_system->rotation_list) > 0);
- UT_LIST_REMOVE(rotation_list, fil_system->rotation_list, space);
+ if (space->n_pending_ops == 0 && space->is_in_rotation_list()) {
+ ut_a(!fil_system->rotation_list.empty());
+ fil_system->rotation_list.remove(*space);
}
}
@@ -6565,64 +5985,156 @@ fil_space_remove_from_keyrotation(
Once started, the caller must keep calling this until it returns NULL.
fil_space_acquire() and fil_space_release() are invoked here which
blocks a concurrent operation from dropping the tablespace.
-@param[in] prev_space Pointer to the previous fil_space_t.
+@param[in] prev_space Previous tablespace or NULL to start
+ from beginning of fil_system->rotation list
+@param[in] recheck recheck of the tablespace is needed or
+ still encryption thread does write page0 for it
+@param[in] key_version key version of the key state thread
If NULL, use the first fil_space_t on fil_system->space_list.
@return pointer to the next fil_space_t.
-@retval NULL if this was the last*/
-UNIV_INTERN
-fil_space_t*
-fil_space_keyrotate_next(
- fil_space_t* prev_space)
+@retval NULL if this was the last */
+fil_space_t *fil_system_t::keyrotate_next(fil_space_t *prev_space,
+ bool recheck, uint key_version)
{
- fil_space_t* space = prev_space;
- fil_space_t* old = NULL;
+ mutex_enter(&fil_system->mutex);
- mutex_enter(&fil_system->mutex);
+ /* If one of the encryption threads already started the encryption
+ of the table then don't remove the unencrypted spaces from rotation list
- if (UT_LIST_GET_LEN(fil_system->rotation_list) == 0) {
- if (space) {
- ut_ad(space->n_pending_ops > 0);
- space->n_pending_ops--;
- fil_space_remove_from_keyrotation(space);
- }
- mutex_exit(&fil_system->mutex);
- return(NULL);
- }
+ If there is a change in innodb_encrypt_tables variables value then
+ don't remove the last processed tablespace from the rotation list. */
+ const bool remove= (!recheck || prev_space->crypt_data) &&
+ !key_version == !srv_encrypt_tables;
+ intrusive::list<fil_space_t, rotation_list_tag_t>::iterator it=
+ prev_space == NULL ? fil_system->rotation_list.end() : prev_space;
- if (prev_space == NULL) {
- space = UT_LIST_GET_FIRST(fil_system->rotation_list);
+ if (it == fil_system->rotation_list.end())
+ {
+ it= fil_system->rotation_list.begin();
+ }
+ else
+ {
+ ut_ad(prev_space->n_pending_ops > 0);
- /* We can trust that space is not NULL because we
- checked list length above */
- } else {
- ut_ad(space->n_pending_ops > 0);
+ /* Move on to the next fil_space_t */
+ prev_space->n_pending_ops--;
- /* Move on to the next fil_space_t */
- space->n_pending_ops--;
+ ++it;
- old = space;
- space = UT_LIST_GET_NEXT(rotation_list, space);
+ while (it != fil_system->rotation_list.end() &&
+ (UT_LIST_GET_LEN(it->chain) == 0 || it->is_stopping()))
+ {
+ ++it;
+ }
- fil_space_remove_from_keyrotation(old);
- }
+ if (remove)
+ {
+ fil_space_remove_from_keyrotation(prev_space);
+ }
+ }
+
+ fil_space_t *space= it == fil_system->rotation_list.end() ? NULL : &*it;
- /* Skip spaces that are being created by fil_ibd_create(),
- or dropped. Note that rotation_list contains only
- space->purpose == FIL_TABLESPACE. */
- while (space != NULL
- && (UT_LIST_GET_LEN(space->chain) == 0
- || space->is_stopping())) {
+ if (space)
+ {
+ space->n_pending_ops++;
+ }
- old = space;
- space = UT_LIST_GET_NEXT(rotation_list, space);
- fil_space_remove_from_keyrotation(old);
+ mutex_exit(&fil_system->mutex);
+
+ return space;
+}
+
+/** Determine the block size of the data file.
+@param[in] space tablespace
+@param[in] offset page number
+@return block size */
+UNIV_INTERN
+ulint
+fil_space_get_block_size(const fil_space_t* space, unsigned offset)
+{
+ ulint block_size = 512;
+
+ for (fil_node_t* node = UT_LIST_GET_FIRST(space->chain);
+ node != NULL;
+ node = UT_LIST_GET_NEXT(chain, node)) {
+ block_size = node->block_size;
+ if (node->size > offset) {
+ ut_ad(node->size <= 0xFFFFFFFFU);
+ break;
+ }
+ offset -= static_cast<unsigned>(node->size);
}
- if (space != NULL) {
- space->n_pending_ops++;
+ /* Currently supporting block size up to 4K,
+ fall back to default if bigger requested. */
+ if (block_size > 4096) {
+ block_size = 512;
+ }
+
+ return block_size;
+}
+
+/*******************************************************************//**
+Returns the table space by a given id, NULL if not found. */
+fil_space_t*
+fil_space_found_by_id(
+/*==================*/
+ ulint id) /*!< in: space id */
+{
+ fil_space_t* space = NULL;
+ mutex_enter(&fil_system->mutex);
+ space = fil_space_get_by_id(id);
+
+ /* Not found if space is being deleted */
+ if (space && space->stop_new_ops) {
+ space = NULL;
}
mutex_exit(&fil_system->mutex);
+ return space;
+}
- return(space);
+/**
+Get should we punch hole to tablespace.
+@param[in] node File node
+@return true, if punch hole should be tried, false if not. */
+bool
+fil_node_should_punch_hole(
+ const fil_node_t* node)
+{
+ return (node->space->punch_hole);
+}
+
+/**
+Set punch hole to tablespace to given value.
+@param[in] node File node
+@param[in] val value to be set. */
+void
+fil_space_set_punch_hole(
+ fil_node_t* node,
+ bool val)
+{
+ node->space->punch_hole = val;
+}
+
+/** Checks that this tablespace in a list of unflushed tablespaces.
+@return true if in a list */
+bool fil_space_t::is_in_unflushed_spaces() const
+{
+ ut_ad(mutex_own(&fil_system->mutex));
+
+ return static_cast<const intrusive::list_node<unflushed_spaces_tag_t> *>(
+ this)
+ ->next;
+}
+
+/** Checks that this tablespace needs key rotation.
+@return true if in a rotation list */
+bool fil_space_t::is_in_rotation_list() const
+{
+ ut_ad(mutex_own(&fil_system->mutex));
+
+ return static_cast<const intrusive::list_node<rotation_list_tag_t> *>(this)
+ ->next;
}
diff --git a/storage/innobase/fil/fil0pagecompress.cc b/storage/innobase/fil/fil0pagecompress.cc
index edc1fa913e7..89b8efc4e9b 100644
--- a/storage/innobase/fil/fil0pagecompress.cc
+++ b/storage/innobase/fil/fil0pagecompress.cc
@@ -47,16 +47,9 @@ Updated 14/02/2015
#include "page0zip.h"
#include "trx0sys.h"
#include "row0mysql.h"
-#include "ha_prototypes.h" // IB_LOG_
-#ifndef UNIV_HOTBACKUP
-# include "buf0lru.h"
-# include "ibuf0ibuf.h"
-# include "sync0sync.h"
-# include "os0sync.h"
-#else /* !UNIV_HOTBACKUP */
-# include "srv0srv.h"
-static ulint srv_data_read, srv_data_written;
-#endif /* !UNIV_HOTBACKUP */
+#include "buf0lru.h"
+#include "ibuf0ibuf.h"
+#include "sync0sync.h"
#include "zlib.h"
#ifdef __linux__
#include <linux/fs.h>
@@ -88,8 +81,8 @@ static ulint srv_data_read, srv_data_written;
@param[in] encrypted whether the page will be subsequently encrypted
@return actual length of compressed page
@retval 0 if the page was not compressed */
-UNIV_INTERN ulint fil_page_compress(const byte* buf, byte* out_buf, ulint level,
- ulint block_size, bool encrypted)
+ulint fil_page_compress(const byte* buf, byte* out_buf, ulint level,
+ ulint block_size, bool encrypted)
{
int comp_level = int(level);
ulint header_len = FIL_PAGE_DATA + FIL_PAGE_COMPRESSED_SIZE;
@@ -257,7 +250,8 @@ success:
page_t page[UNIV_PAGE_SIZE_MAX];
memcpy(page, out_buf, srv_page_size);
ut_ad(fil_page_decompress(tmp_buf, page));
- ut_ad(!buf_page_is_corrupted(false, page, 0, NULL));
+ ut_ad(!buf_page_is_corrupted(false, page, univ_page_size,
+ NULL));
}
#endif /* UNIV_DEBUG */
@@ -299,11 +293,11 @@ success:
@return size of the compressed data
@retval 0 if decompression failed
@retval srv_page_size if the page was not compressed */
-UNIV_INTERN ulint fil_page_decompress(byte* tmp_buf, byte* buf)
+ulint fil_page_decompress(byte* tmp_buf, byte* buf)
{
const unsigned ptype = mach_read_from_2(buf+FIL_PAGE_TYPE);
ulint header_len;
- ib_uint64_t compression_alg;
+ uint64_t compression_alg;
switch (ptype) {
case FIL_PAGE_PAGE_COMPRESSED_ENCRYPTED:
header_len = FIL_PAGE_DATA + FIL_PAGE_COMPRESSED_SIZE
@@ -334,9 +328,8 @@ UNIV_INTERN ulint fil_page_decompress(byte* tmp_buf, byte* buf)
switch (compression_alg) {
default:
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Unknown compression algorithm " UINT64PF,
- compression_alg);
+ ib::error() << "Unknown compression algorithm "
+ << compression_alg;
return 0;
case PAGE_ZLIB_ALGORITHM:
{
diff --git a/storage/innobase/fsp/fsp0file.cc b/storage/innobase/fsp/fsp0file.cc
new file mode 100644
index 00000000000..673e74cfb3d
--- /dev/null
+++ b/storage/innobase/fsp/fsp0file.cc
@@ -0,0 +1,1060 @@
+/*****************************************************************************
+
+Copyright (c) 2013, 2016, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2017, 2018, MariaDB Corporation.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file fsp/fsp0file.cc
+Tablespace data file implementation
+
+Created 2013-7-26 by Kevin Lewis
+*******************************************************/
+
+#include "fil0fil.h"
+#include "fsp0types.h"
+#include "os0file.h"
+#include "page0page.h"
+#include "srv0start.h"
+
+/** Initialize the name, size and order of this datafile
+@param[in] name tablespace name, will be copied
+@param[in] flags tablespace flags */
+void
+Datafile::init(
+ const char* name,
+ ulint flags)
+{
+ ut_ad(m_name == NULL);
+ ut_ad(name != NULL);
+
+ m_name = mem_strdup(name);
+ m_flags = flags;
+}
+
+/** Release the resources. */
+void
+Datafile::shutdown()
+{
+ close();
+
+ ut_free(m_name);
+ m_name = NULL;
+ free_filepath();
+ free_first_page();
+}
+
+/** Create/open a data file.
+@param[in] read_only_mode if true, then readonly mode checks are enforced.
+@return DB_SUCCESS or error code */
+dberr_t
+Datafile::open_or_create(bool read_only_mode)
+{
+ bool success;
+ ut_a(m_filepath != NULL);
+ ut_ad(m_handle == OS_FILE_CLOSED);
+
+ m_handle = os_file_create(
+ innodb_data_file_key, m_filepath, m_open_flags,
+ OS_FILE_NORMAL, OS_DATA_FILE, read_only_mode, &success);
+
+ if (!success) {
+ m_last_os_error = os_file_get_last_error(true);
+ ib::error() << "Cannot open datafile '" << m_filepath << "'";
+ return(DB_CANNOT_OPEN_FILE);
+ }
+
+ return(DB_SUCCESS);
+}
+
+/** Open a data file in read-only mode to check if it exists so that it
+can be validated.
+@param[in] strict whether to issue error messages
+@return DB_SUCCESS or error code */
+dberr_t
+Datafile::open_read_only(bool strict)
+{
+ bool success = false;
+ ut_ad(m_handle == OS_FILE_CLOSED);
+
+ /* This function can be called for file objects that do not need
+ to be opened, which is the case when the m_filepath is NULL */
+ if (m_filepath == NULL) {
+ return(DB_ERROR);
+ }
+
+ set_open_flags(OS_FILE_OPEN);
+ m_handle = os_file_create_simple_no_error_handling(
+ innodb_data_file_key, m_filepath, m_open_flags,
+ OS_FILE_READ_ONLY, true, &success);
+
+ if (success) {
+ m_exists = true;
+ init_file_info();
+
+ return(DB_SUCCESS);
+ }
+
+ if (strict) {
+ m_last_os_error = os_file_get_last_error(true);
+ ib::error() << "Cannot open datafile for read-only: '"
+ << m_filepath << "' OS error: " << m_last_os_error;
+ }
+
+ return(DB_CANNOT_OPEN_FILE);
+}
+
+/** Open a data file in read-write mode during start-up so that
+doublewrite pages can be restored and then it can be validated.*
+@param[in] read_only_mode if true, then readonly mode checks are enforced.
+@return DB_SUCCESS or error code */
+dberr_t
+Datafile::open_read_write(bool read_only_mode)
+{
+ bool success = false;
+ ut_ad(m_handle == OS_FILE_CLOSED);
+
+ /* This function can be called for file objects that do not need
+ to be opened, which is the case when the m_filepath is NULL */
+ if (m_filepath == NULL) {
+ return(DB_ERROR);
+ }
+
+ set_open_flags(OS_FILE_OPEN);
+ m_handle = os_file_create_simple_no_error_handling(
+ innodb_data_file_key, m_filepath, m_open_flags,
+ OS_FILE_READ_WRITE, read_only_mode, &success);
+
+ if (!success) {
+ m_last_os_error = os_file_get_last_error(true);
+ ib::error() << "Cannot open datafile for read-write: '"
+ << m_filepath << "'";
+ return(DB_CANNOT_OPEN_FILE);
+ }
+
+ m_exists = true;
+
+ init_file_info();
+
+ return(DB_SUCCESS);
+}
+
+/** Initialize OS specific file info. */
+void
+Datafile::init_file_info()
+{
+#ifdef _WIN32
+ GetFileInformationByHandle(m_handle, &m_file_info);
+#else
+ fstat(m_handle, &m_file_info);
+#endif /* WIN32 */
+}
+
+/** Close a data file.
+@return DB_SUCCESS or error code */
+dberr_t
+Datafile::close()
+{
+ if (m_handle != OS_FILE_CLOSED) {
+ ibool success = os_file_close(m_handle);
+ ut_a(success);
+
+ m_handle = OS_FILE_CLOSED;
+ }
+
+ return(DB_SUCCESS);
+}
+
+/** Make a full filepath from a directory path and a filename.
+Prepend the dirpath to filename using the extension given.
+If dirpath is NULL, prepend the default datadir to filepath.
+Store the result in m_filepath.
+@param[in] dirpath directory path
+@param[in] filename filename or filepath
+@param[in] ext filename extension */
+void
+Datafile::make_filepath(
+ const char* dirpath,
+ const char* filename,
+ ib_extention ext)
+{
+ ut_ad(dirpath != NULL || filename != NULL);
+
+ free_filepath();
+
+ m_filepath = fil_make_filepath(dirpath, filename, ext, false);
+
+ ut_ad(m_filepath != NULL);
+
+ set_filename();
+}
+
+/** Set the filepath by duplicating the filepath sent in. This is the
+name of the file with its extension and absolute or relative path.
+@param[in] filepath filepath to set */
+void
+Datafile::set_filepath(const char* filepath)
+{
+ free_filepath();
+ m_filepath = static_cast<char*>(ut_malloc_nokey(strlen(filepath) + 1));
+ ::strcpy(m_filepath, filepath);
+ set_filename();
+}
+
+/** Free the filepath buffer. */
+void
+Datafile::free_filepath()
+{
+ if (m_filepath != NULL) {
+ ut_free(m_filepath);
+ m_filepath = NULL;
+ m_filename = NULL;
+ }
+}
+
+/** Do a quick test if the filepath provided looks the same as this filepath
+byte by byte. If they are two different looking paths to the same file,
+same_as() will be used to show that after the files are opened.
+@param[in] other filepath to compare with
+@retval true if it is the same filename by byte comparison
+@retval false if it looks different */
+bool
+Datafile::same_filepath_as(
+ const char* other) const
+{
+ return(0 == strcmp(m_filepath, other));
+}
+
+/** Test if another opened datafile is the same file as this object.
+@param[in] other Datafile to compare with
+@return true if it is the same file, else false */
+bool
+Datafile::same_as(
+ const Datafile& other) const
+{
+#ifdef _WIN32
+ return(m_file_info.dwVolumeSerialNumber
+ == other.m_file_info.dwVolumeSerialNumber
+ && m_file_info.nFileIndexHigh
+ == other.m_file_info.nFileIndexHigh
+ && m_file_info.nFileIndexLow
+ == other.m_file_info.nFileIndexLow);
+#else
+ return(m_file_info.st_ino == other.m_file_info.st_ino
+ && m_file_info.st_dev == other.m_file_info.st_dev);
+#endif /* WIN32 */
+}
+
+/** Allocate and set the datafile or tablespace name in m_name.
+If a name is provided, use it; else extract a file-per-table
+tablespace name from m_filepath. The value of m_name
+will be freed in the destructor.
+@param[in] name tablespace name if known, NULL if not */
+void
+Datafile::set_name(const char* name)
+{
+ ut_free(m_name);
+
+ if (name != NULL) {
+ m_name = mem_strdup(name);
+ } else {
+ m_name = fil_path_to_space_name(m_filepath);
+ }
+}
+
+/** Reads a few significant fields from the first page of the first
+datafile. The Datafile must already be open.
+@param[in] read_only_mode If true, then readonly mode checks are enforced.
+@return DB_SUCCESS or DB_IO_ERROR if page cannot be read */
+dberr_t
+Datafile::read_first_page(bool read_only_mode)
+{
+ if (m_handle == OS_FILE_CLOSED) {
+
+ dberr_t err = open_or_create(read_only_mode);
+
+ if (err != DB_SUCCESS) {
+ return(err);
+ }
+ }
+
+ m_first_page_buf = static_cast<byte*>(
+ ut_malloc_nokey(2 * UNIV_PAGE_SIZE_MAX));
+
+ /* Align the memory for a possible read from a raw device */
+
+ m_first_page = static_cast<byte*>(
+ ut_align(m_first_page_buf, UNIV_PAGE_SIZE));
+
+ IORequest request;
+ dberr_t err = DB_ERROR;
+ size_t page_size = UNIV_PAGE_SIZE_MAX;
+
+ /* Don't want unnecessary complaints about partial reads. */
+
+ request.disable_partial_io_warnings();
+
+ while (page_size >= UNIV_PAGE_SIZE_MIN) {
+
+ ulint n_read = 0;
+
+ err = os_file_read_no_error_handling(
+ request, m_handle, m_first_page, 0, page_size, &n_read);
+
+ if (err == DB_IO_ERROR && n_read >= UNIV_PAGE_SIZE_MIN) {
+
+ page_size >>= 1;
+
+ } else if (err == DB_SUCCESS) {
+
+ ut_a(n_read == page_size);
+
+ break;
+
+ } else if (srv_operation == SRV_OPERATION_BACKUP) {
+ break;
+ } else {
+
+ ib::error()
+ << "Cannot read first page of '"
+ << m_filepath << "' "
+ << ut_strerr(err);
+ break;
+ }
+ }
+
+ if (err != DB_SUCCESS) {
+ return(err);
+ }
+
+ if (m_order == 0) {
+ m_space_id = fsp_header_get_space_id(m_first_page);
+ m_flags = fsp_header_get_flags(m_first_page);
+ if (!fsp_flags_is_valid(m_flags, m_space_id)) {
+ ulint cflags = fsp_flags_convert_from_101(m_flags);
+ if (cflags == ULINT_UNDEFINED) {
+ ib::error()
+ << "Invalid flags " << ib::hex(m_flags)
+ << " in " << m_filepath;
+ return(DB_CORRUPTION);
+ } else {
+ m_flags = cflags;
+ }
+ }
+ }
+
+ const page_size_t ps(m_flags);
+ if (ps.physical() > page_size) {
+ ib::error() << "File " << m_filepath
+ << " should be longer than "
+ << page_size << " bytes";
+ return(DB_CORRUPTION);
+ }
+
+ return(err);
+}
+
+/** Free the first page from memory when it is no longer needed. */
+void
+Datafile::free_first_page()
+{
+ if (m_first_page_buf) {
+ ut_free(m_first_page_buf);
+ m_first_page_buf = NULL;
+ m_first_page = NULL;
+ }
+}
+
+/** Validates the datafile and checks that it conforms with the expected
+space ID and flags. The file should exist and be successfully opened
+in order for this function to validate it.
+@param[in] space_id The expected tablespace ID.
+@param[in] flags The expected tablespace flags.
+@retval DB_SUCCESS if tablespace is valid, DB_ERROR if not.
+m_is_valid is also set true on success, else false. */
+dberr_t
+Datafile::validate_to_dd(ulint space_id, ulint flags)
+{
+ dberr_t err;
+
+ if (!is_open()) {
+ return DB_ERROR;
+ }
+
+ /* Validate this single-table-tablespace with the data dictionary,
+ but do not compare the DATA_DIR flag, in case the tablespace was
+ remotely located. */
+ err = validate_first_page(0);
+ if (err != DB_SUCCESS) {
+ return(err);
+ }
+
+ flags &= ~FSP_FLAGS_MEM_MASK;
+
+ /* Make sure the datafile we found matched the space ID.
+ If the datafile is a file-per-table tablespace then also match
+ the row format and zip page size. */
+ if (m_space_id == space_id && m_flags == flags) {
+ /* Datafile matches the tablespace expected. */
+ return(DB_SUCCESS);
+ }
+
+ /* else do not use this tablespace. */
+ m_is_valid = false;
+
+ ib::error() << "Refusing to load '" << m_filepath << "' (id="
+ << m_space_id << ", flags=" << ib::hex(m_flags)
+ << "); dictionary contains id="
+ << space_id << ", flags=" << ib::hex(flags);
+
+ return(DB_ERROR);
+}
+
+/** Validates this datafile for the purpose of recovery. The file should
+exist and be successfully opened. We initially open it in read-only mode
+because we just want to read the SpaceID. However, if the first page is
+corrupt and needs to be restored from the doublewrite buffer, we will
+reopen it in write mode and ry to restore that page.
+@retval DB_SUCCESS if tablespace is valid, DB_ERROR if not.
+m_is_valid is also set true on success, else false. */
+dberr_t
+Datafile::validate_for_recovery()
+{
+ dberr_t err;
+
+ ut_ad(is_open());
+ ut_ad(!srv_read_only_mode);
+
+ err = validate_first_page(0);
+
+ switch (err) {
+ case DB_SUCCESS:
+ case DB_TABLESPACE_EXISTS:
+ break;
+
+ default:
+ /* Re-open the file in read-write mode Attempt to restore
+ page 0 from doublewrite and read the space ID from a survey
+ of the first few pages. */
+ close();
+ err = open_read_write(srv_read_only_mode);
+ if (err != DB_SUCCESS) {
+ return(err);
+ }
+
+ err = find_space_id();
+ if (err != DB_SUCCESS || m_space_id == 0) {
+ ib::error() << "Datafile '" << m_filepath << "' is"
+ " corrupted. Cannot determine the space ID from"
+ " the first 64 pages.";
+ return(err);
+ }
+
+ if (restore_from_doublewrite()) {
+ return(DB_CORRUPTION);
+ }
+
+ /* Free the previously read first page and then re-validate. */
+ free_first_page();
+ err = validate_first_page(0);
+ }
+
+ if (err == DB_SUCCESS) {
+ set_name(NULL);
+ }
+
+ return(err);
+}
+
+/** Check the consistency of the first page of a datafile when the
+tablespace is opened. This occurs before the fil_space_t is created
+so the Space ID found here must not already be open.
+m_is_valid is set true on success, else false.
+@param[out] flush_lsn contents of FIL_PAGE_FILE_FLUSH_LSN
+@retval DB_SUCCESS on if the datafile is valid
+@retval DB_CORRUPTION if the datafile is not readable
+@retval DB_TABLESPACE_EXISTS if there is a duplicate space_id */
+dberr_t
+Datafile::validate_first_page(lsn_t* flush_lsn)
+{
+ char* prev_name;
+ char* prev_filepath;
+ const char* error_txt = NULL;
+
+ m_is_valid = true;
+
+ if (m_first_page == NULL
+ && read_first_page(srv_read_only_mode) != DB_SUCCESS) {
+
+ error_txt = "Cannot read first page";
+ } else {
+ ut_ad(m_first_page_buf);
+ ut_ad(m_first_page);
+
+ if (flush_lsn != NULL) {
+
+ *flush_lsn = mach_read_from_8(
+ m_first_page + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION);
+ }
+ }
+
+ if (error_txt != NULL) {
+err_exit:
+ ib::info() << error_txt << " in datafile: " << m_filepath
+ << ", Space ID:" << m_space_id << ", Flags: "
+ << m_flags;
+ m_is_valid = false;
+ free_first_page();
+ return(DB_CORRUPTION);
+ }
+
+ /* Check if the whole page is blank. */
+ if (!m_space_id && !m_flags) {
+ const byte* b = m_first_page;
+ ulint nonzero_bytes = UNIV_PAGE_SIZE;
+
+ while (*b == '\0' && --nonzero_bytes != 0) {
+
+ b++;
+ }
+
+ if (nonzero_bytes == 0) {
+ error_txt = "Header page consists of zero bytes";
+ goto err_exit;
+ }
+ }
+
+ if (!fsp_flags_is_valid(m_flags, m_space_id)) {
+ /* Tablespace flags must be valid. */
+ error_txt = "Tablespace flags are invalid";
+ goto err_exit;
+ }
+
+ const page_size_t page_size(m_flags);
+
+ if (univ_page_size.logical() != page_size.logical()) {
+ /* Page size must be univ_page_size. */
+ ib::error()
+ << "Data file '" << m_filepath << "' uses page size "
+ << page_size.logical() << ", but the innodb_page_size"
+ " start-up parameter is "
+ << univ_page_size.logical();
+ free_first_page();
+ return(DB_ERROR);
+ }
+
+ if (page_get_page_no(m_first_page) != 0) {
+ /* First page must be number 0 */
+ error_txt = "Header page contains inconsistent data";
+ goto err_exit;
+ }
+
+ if (m_space_id >= SRV_LOG_SPACE_FIRST_ID) {
+ error_txt = "A bad Space ID was found";
+ goto err_exit;
+ }
+
+ if (buf_page_is_corrupted(false, m_first_page, page_size)) {
+ /* Look for checksum and other corruptions. */
+ error_txt = "Checksum mismatch";
+ goto err_exit;
+ }
+
+ if (fil_space_read_name_and_filepath(
+ m_space_id, &prev_name, &prev_filepath)) {
+
+ if (0 == strcmp(m_filepath, prev_filepath)) {
+ ut_free(prev_name);
+ ut_free(prev_filepath);
+ return(DB_SUCCESS);
+ }
+
+ /* Make sure the space_id has not already been opened. */
+ ib::error() << "Attempted to open a previously opened"
+ " tablespace. Previous tablespace " << prev_name
+ << " at filepath: " << prev_filepath
+ << " uses space ID: " << m_space_id
+ << ". Cannot open filepath: " << m_filepath
+ << " which uses the same space ID.";
+
+ ut_free(prev_name);
+ ut_free(prev_filepath);
+
+ m_is_valid = false;
+
+ free_first_page();
+
+ return(is_predefined_tablespace(m_space_id)
+ ? DB_CORRUPTION
+ : DB_TABLESPACE_EXISTS);
+ }
+
+ return(DB_SUCCESS);
+}
+
+/** Determine the space id of the given file descriptor by reading a few
+pages from the beginning of the .ibd file.
+@return DB_SUCCESS if space id was successfully identified, else DB_ERROR. */
+dberr_t
+Datafile::find_space_id()
+{
+ os_offset_t file_size;
+
+ ut_ad(m_handle != OS_FILE_CLOSED);
+
+ file_size = os_file_get_size(m_handle);
+
+ if (file_size == (os_offset_t) -1) {
+ ib::error() << "Could not get file size of datafile '"
+ << m_filepath << "'";
+ return(DB_CORRUPTION);
+ }
+
+ /* Assuming a page size, read the space_id from each page and store it
+ in a map. Find out which space_id is agreed on by majority of the
+ pages. Choose that space_id. */
+ for (ulint page_size = UNIV_ZIP_SIZE_MIN;
+ page_size <= UNIV_PAGE_SIZE_MAX;
+ page_size <<= 1) {
+
+ /* map[space_id] = count of pages */
+ typedef std::map<
+ ulint,
+ ulint,
+ std::less<ulint>,
+ ut_allocator<std::pair<const ulint, ulint> > >
+ Pages;
+
+ Pages verify;
+ ulint page_count = 64;
+ ulint valid_pages = 0;
+
+ /* Adjust the number of pages to analyze based on file size */
+ while ((page_count * page_size) > file_size) {
+ --page_count;
+ }
+
+ ib::info()
+ << "Page size:" << page_size
+ << ". Pages to analyze:" << page_count;
+
+ byte* buf = static_cast<byte*>(
+ ut_malloc_nokey(2 * UNIV_PAGE_SIZE_MAX));
+
+ byte* page = static_cast<byte*>(
+ ut_align(buf, UNIV_SECTOR_SIZE));
+
+ for (ulint j = 0; j < page_count; ++j) {
+
+ dberr_t err;
+ ulint n_bytes = j * page_size;
+ IORequest request(IORequest::READ);
+
+ err = os_file_read(
+ request, m_handle, page, n_bytes, page_size);
+
+ if (err != DB_SUCCESS) {
+
+ ib::info()
+ << "READ FAIL: page_no:" << j;
+
+ continue;
+ }
+
+ bool noncompressed_ok = false;
+
+ /* For noncompressed pages, the page size must be
+ equal to univ_page_size.physical(). */
+ if (page_size == univ_page_size.physical()) {
+ noncompressed_ok = !buf_page_is_corrupted(
+ false, page, univ_page_size, NULL);
+ }
+
+ bool compressed_ok = false;
+
+ /* file-per-table tablespaces can be compressed with
+ the same physical and logical page size. General
+ tablespaces must have different physical and logical
+ page sizes in order to be compressed. For this check,
+ assume the page is compressed if univ_page_size.
+ logical() is equal to or less than 16k and the
+ page_size we are checking is equal to or less than
+ univ_page_size.logical(). */
+ if (univ_page_size.logical() <= UNIV_PAGE_SIZE_DEF
+ && page_size <= univ_page_size.logical()) {
+ const page_size_t compr_page_size(
+ page_size, univ_page_size.logical(),
+ true);
+
+ compressed_ok = !buf_page_is_corrupted(
+ false, page, compr_page_size, NULL);
+ }
+
+ if (noncompressed_ok || compressed_ok) {
+
+ ulint space_id = mach_read_from_4(page
+ + FIL_PAGE_SPACE_ID);
+
+ if (space_id > 0) {
+
+ ib::info()
+ << "VALID: space:"
+ << space_id << " page_no:" << j
+ << " page_size:" << page_size;
+
+ ++valid_pages;
+
+ ++verify[space_id];
+ }
+ }
+ }
+
+ ut_free(buf);
+
+ ib::info()
+ << "Page size: " << page_size
+ << ". Possible space_id count:" << verify.size();
+
+ const ulint pages_corrupted = 3;
+
+ for (ulint missed = 0; missed <= pages_corrupted; ++missed) {
+
+ for (Pages::const_iterator it = verify.begin();
+ it != verify.end();
+ ++it) {
+
+ ib::info() << "space_id:" << it->first
+ << ", Number of pages matched: "
+ << it->second << "/" << valid_pages
+ << " (" << page_size << ")";
+
+ if (it->second == (valid_pages - missed)) {
+ ib::info() << "Chosen space:"
+ << it->first;
+
+ m_space_id = it->first;
+ return(DB_SUCCESS);
+ }
+ }
+
+ }
+ }
+
+ return(DB_CORRUPTION);
+}
+
+
+/** Restore the first page of the tablespace from
+the double write buffer.
+@return whether the operation failed */
+bool
+Datafile::restore_from_doublewrite()
+{
+ if (srv_operation != SRV_OPERATION_NORMAL) {
+ return true;
+ }
+
+ /* Find if double write buffer contains page_no of given space id. */
+ const byte* page = recv_sys->dblwr.find_page(m_space_id, 0);
+ const page_id_t page_id(m_space_id, 0);
+
+ if (page == NULL) {
+ /* If the first page of the given user tablespace is not there
+ in the doublewrite buffer, then the recovery is going to fail
+ now. Hence this is treated as an error. */
+
+ ib::error()
+ << "Corrupted page " << page_id
+ << " of datafile '" << m_filepath
+ << "' could not be found in the doublewrite buffer.";
+
+ return(true);
+ }
+
+ ulint flags = mach_read_from_4(
+ FSP_HEADER_OFFSET + FSP_SPACE_FLAGS + page);
+
+ if (!fsp_flags_is_valid(flags, m_space_id)) {
+ ulint cflags = fsp_flags_convert_from_101(flags);
+ if (cflags == ULINT_UNDEFINED) {
+ ib::warn()
+ << "Ignoring a doublewrite copy of page "
+ << page_id
+ << " due to invalid flags " << ib::hex(flags);
+ return(true);
+ }
+ flags = cflags;
+ /* The flags on the page should be converted later. */
+ }
+
+ const page_size_t page_size(flags);
+
+ ut_a(page_get_page_no(page) == page_id.page_no());
+
+ ib::info() << "Restoring page " << page_id
+ << " of datafile '" << m_filepath
+ << "' from the doublewrite buffer. Writing "
+ << page_size.physical() << " bytes into file '"
+ << m_filepath << "'";
+
+ IORequest request(IORequest::WRITE);
+
+ return(os_file_write(
+ request,
+ m_filepath, m_handle, page, 0, page_size.physical())
+ != DB_SUCCESS);
+}
+
+/** Create a link filename based on the contents of m_name,
+open that file, and read the contents into m_filepath.
+@retval DB_SUCCESS if remote linked tablespace file is opened and read.
+@retval DB_CANNOT_OPEN_FILE if the link file does not exist. */
+dberr_t
+RemoteDatafile::open_link_file()
+{
+ set_link_filepath(NULL);
+ m_filepath = read_link_file(m_link_filepath);
+
+ return(m_filepath == NULL ? DB_CANNOT_OPEN_FILE : DB_SUCCESS);
+}
+
+/** Opens a handle to the file linked to in an InnoDB Symbolic Link file
+in read-only mode so that it can be validated.
+@param[in] strict whether to issue error messages
+@return DB_SUCCESS if remote linked tablespace file is found and opened. */
+dberr_t
+RemoteDatafile::open_read_only(bool strict)
+{
+ if (m_filepath == NULL && open_link_file() == DB_CANNOT_OPEN_FILE) {
+ return(DB_ERROR);
+ }
+
+ dberr_t err = Datafile::open_read_only(strict);
+
+ if (err != DB_SUCCESS && strict) {
+ /* The following call prints an error message */
+ os_file_get_last_error(true);
+ ib::error() << "A link file was found named '"
+ << m_link_filepath << "' but the linked tablespace '"
+ << m_filepath << "' could not be opened read-only.";
+ }
+
+ return(err);
+}
+
+/** Opens a handle to the file linked to in an InnoDB Symbolic Link file
+in read-write mode so that it can be restored from doublewrite and validated.
+@param[in] read_only_mode If true, then readonly mode checks are enforced.
+@return DB_SUCCESS if remote linked tablespace file is found and opened. */
+dberr_t
+RemoteDatafile::open_read_write(bool read_only_mode)
+{
+ if (m_filepath == NULL && open_link_file() == DB_CANNOT_OPEN_FILE) {
+ return(DB_ERROR);
+ }
+
+ dberr_t err = Datafile::open_read_write(read_only_mode);
+
+ if (err != DB_SUCCESS) {
+ /* The following call prints an error message */
+ m_last_os_error = os_file_get_last_error(true);
+ ib::error() << "A link file was found named '"
+ << m_link_filepath << "' but the linked data file '"
+ << m_filepath << "' could not be opened for writing.";
+ }
+
+ return(err);
+}
+
+/** Release the resources. */
+void
+RemoteDatafile::shutdown()
+{
+ Datafile::shutdown();
+
+ if (m_link_filepath != 0) {
+ ut_free(m_link_filepath);
+ m_link_filepath = 0;
+ }
+}
+
+/** Set the link filepath. Use default datadir, the base name of
+the path provided without its suffix, plus DOT_ISL.
+@param[in] path filepath which contains a basename to use.
+ If NULL, use m_name as the basename. */
+void
+RemoteDatafile::set_link_filepath(const char* path)
+{
+ if (m_link_filepath == NULL) {
+ m_link_filepath = fil_make_filepath(NULL, name(), ISL, false);
+ }
+}
+
+/** Creates a new InnoDB Symbolic Link (ISL) file. It is always created
+under the 'datadir' of MySQL. The datadir is the directory of a
+running mysqld program. We can refer to it by simply using the path ".".
+@param[in] name tablespace name
+@param[in] filepath remote filepath of tablespace datafile
+@return DB_SUCCESS or error code */
+dberr_t
+RemoteDatafile::create_link_file(
+ const char* name,
+ const char* filepath)
+{
+ bool success;
+ dberr_t err = DB_SUCCESS;
+ char* link_filepath = NULL;
+ char* prev_filepath = NULL;
+
+ ut_ad(!srv_read_only_mode);
+ ut_ad(0 == strcmp(&filepath[strlen(filepath) - 4], DOT_IBD));
+
+ link_filepath = fil_make_filepath(NULL, name, ISL, false);
+
+ if (link_filepath == NULL) {
+ return(DB_ERROR);
+ }
+
+ prev_filepath = read_link_file(link_filepath);
+ if (prev_filepath) {
+ /* Truncate will call this with an existing
+ link file which contains the same filepath. */
+ bool same = !strcmp(prev_filepath, filepath);
+ ut_free(prev_filepath);
+ if (same) {
+ ut_free(link_filepath);
+ return(DB_SUCCESS);
+ }
+ }
+
+ /** Check if the file already exists. */
+ FILE* file = NULL;
+ bool exists;
+ os_file_type_t ftype;
+
+ success = os_file_status(link_filepath, &exists, &ftype);
+ ulint error = 0;
+
+ if (success && !exists) {
+
+ file = fopen(link_filepath, "w");
+ if (file == NULL) {
+ /* This call will print its own error message */
+ error = os_file_get_last_error(true);
+ }
+ } else {
+ error = OS_FILE_ALREADY_EXISTS;
+ }
+
+ if (error != 0) {
+
+ ib::error() << "Cannot create file " << link_filepath << ".";
+
+ if (error == OS_FILE_ALREADY_EXISTS) {
+ ib::error() << "The link file: " << link_filepath
+ << " already exists.";
+ err = DB_TABLESPACE_EXISTS;
+
+ } else if (error == OS_FILE_DISK_FULL) {
+ err = DB_OUT_OF_FILE_SPACE;
+
+ } else {
+ err = DB_ERROR;
+ }
+
+ /* file is not open, no need to close it. */
+ ut_free(link_filepath);
+ return(err);
+ }
+
+ ulint rbytes = fwrite(filepath, 1, strlen(filepath), file);
+
+ if (rbytes != strlen(filepath)) {
+ error = os_file_get_last_error(true);
+ ib::error() <<
+ "Cannot write link file: "
+ << link_filepath << " filepath: " << filepath;
+ err = DB_ERROR;
+ }
+
+ /* Close the file, we only need it at startup */
+ fclose(file);
+
+ ut_free(link_filepath);
+
+ return(err);
+}
+
+/** Delete an InnoDB Symbolic Link (ISL) file. */
+void
+RemoteDatafile::delete_link_file(void)
+{
+ ut_ad(m_link_filepath != NULL);
+
+ if (m_link_filepath != NULL) {
+ os_file_delete_if_exists(innodb_data_file_key,
+ m_link_filepath, NULL);
+ }
+}
+
+/** Delete an InnoDB Symbolic Link (ISL) file by name.
+@param[in] name tablespace name */
+void
+RemoteDatafile::delete_link_file(
+ const char* name)
+{
+ char* link_filepath = fil_make_filepath(NULL, name, ISL, false);
+
+ if (link_filepath != NULL) {
+ os_file_delete_if_exists(
+ innodb_data_file_key, link_filepath, NULL);
+
+ ut_free(link_filepath);
+ }
+}
+
+/** Read an InnoDB Symbolic Link (ISL) file by name.
+It is always created under the datadir of MySQL.
+For file-per-table tablespaces, the isl file is expected to be
+in a 'database' directory and called 'tablename.isl'.
+The caller must free the memory returned if it is not null.
+@param[in] link_filepath filepath of the ISL file
+@return Filepath of the IBD file read from the ISL file */
+char*
+RemoteDatafile::read_link_file(
+ const char* link_filepath)
+{
+ FILE* file = fopen(link_filepath, "r+b");
+ if (file == NULL) {
+ return(NULL);
+ }
+
+ char* filepath = static_cast<char*>(ut_malloc_nokey(OS_FILE_MAX_PATH));
+
+ os_file_read_string(file, filepath, OS_FILE_MAX_PATH);
+ fclose(file);
+
+ if (filepath[0] != '\0') {
+ /* Trim whitespace from end of filepath */
+ ulint last_ch = strlen(filepath) - 1;
+ while (last_ch > 4 && filepath[last_ch] <= 0x20) {
+ filepath[last_ch--] = 0x00;
+ }
+ os_normalize_path(filepath);
+ }
+
+ return(filepath);
+}
diff --git a/storage/innobase/fsp/fsp0fsp.cc b/storage/innobase/fsp/fsp0fsp.cc
index 13ac923eb3f..78d562be9a3 100644
--- a/storage/innobase/fsp/fsp0fsp.cc
+++ b/storage/innobase/fsp/fsp0fsp.cc
@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2017, 2018, MariaDB Corporation.
+Copyright (c) 2017, 2019, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -25,73 +25,42 @@ Created 11/29/1995 Heikki Tuuri
***********************************************************************/
#include "fsp0fsp.h"
-
-#ifdef UNIV_NONINL
-#include "fsp0fsp.ic"
-#endif
-
#include "buf0buf.h"
#include "fil0fil.h"
#include "fil0crypt.h"
#include "mtr0log.h"
#include "ut0byte.h"
#include "page0page.h"
-#include "page0zip.h"
-#ifdef UNIV_HOTBACKUP
-# include "fut0lst.h"
-#else /* UNIV_HOTBACKUP */
-# include "sync0sync.h"
-# include "fut0fut.h"
-# include "srv0srv.h"
-# include "ibuf0ibuf.h"
-# include "btr0btr.h"
-# include "btr0sea.h"
-# include "dict0boot.h"
-# include "log0log.h"
-#endif /* UNIV_HOTBACKUP */
-#include "dict0mem.h"
+#include "fut0fut.h"
+#include "srv0srv.h"
#include "srv0start.h"
+#include "ibuf0ibuf.h"
+#include "btr0btr.h"
+#include "btr0sea.h"
+#include "dict0boot.h"
+#include "log0log.h"
+#include "dict0mem.h"
+#include "fsp0types.h"
+// JAN: MySQL 5.7 Encryption
+// #include <my_aes.h>
-#ifndef UNIV_HOTBACKUP
-/** Flag to indicate if we have printed the tablespace full error. */
-static ibool fsp_tbs_full_error_printed = FALSE;
+typedef ulint page_no_t;
-/**********************************************************************//**
-Returns an extent to the free list of a space. */
+/** Return an extent to the free list of a space.
+@param[in,out] space tablespace
+@param[in] offset page number in the extent
+@param[in] page_size page size
+@param[in,out] mtr mini-transaction */
+MY_ATTRIBUTE((nonnull))
static
void
fsp_free_extent(
-/*============*/
- ulint space, /*!< in: space id */
- ulint zip_size,/*!< in: compressed page size in bytes
- or 0 for uncompressed pages */
- ulint page, /*!< in: page offset in the extent */
- mtr_t* mtr); /*!< in/out: mini-transaction */
-/**********************************************************************//**
-Frees an extent of a segment to the space free list. */
-static
-void
-fseg_free_extent(
-/*=============*/
- fseg_inode_t* seg_inode, /*!< in: segment inode */
- ulint space, /*!< in: space id */
- ulint zip_size,/*!< in: compressed page size in bytes
- or 0 for uncompressed pages */
- ulint page, /*!< in: page offset in the extent */
- mtr_t* mtr); /*!< in/out: mini-transaction */
-/**********************************************************************//**
-Calculates the number of pages reserved by a segment, and how
-many pages are currently used.
-@return number of reserved pages */
-static
-ulint
-fseg_n_reserved_pages_low(
-/*======================*/
- fseg_inode_t* header, /*!< in: segment inode */
- ulint* used, /*!< out: number of pages used (not
- more than reserved) */
- mtr_t* mtr); /*!< in/out: mini-transaction */
+ fil_space_t* space,
+ page_no_t offset,
+ const page_size_t& page_size,
+ mtr_t* mtr);
+
/********************************************************************//**
Marks a page used. The page must reside within the extents of the given
segment. */
@@ -100,118 +69,127 @@ void
fseg_mark_page_used(
/*================*/
fseg_inode_t* seg_inode,/*!< in: segment inode */
- ulint page, /*!< in: page offset */
+ page_no_t page, /*!< in: page offset */
xdes_t* descr, /*!< in: extent descriptor */
mtr_t* mtr); /*!< in/out: mini-transaction */
-/**********************************************************************//**
-Returns the first extent descriptor for a segment. We think of the extent
-lists of the segment catenated in the order FSEG_FULL -> FSEG_NOT_FULL
--> FSEG_FREE.
-@return the first extent descriptor, or NULL if none */
+
+/** Returns the first extent descriptor for a segment.
+We think of the extent lists of the segment catenated in the order
+FSEG_FULL -> FSEG_NOT_FULL -> FSEG_FREE.
+@param[in] inode segment inode
+@param[in] space tablespace
+@param[in] page_size page size
+@param[in,out] mtr mini-transaction
+@return the first extent descriptor, or NULL if none */
+MY_ATTRIBUTE((nonnull, warn_unused_result))
static
xdes_t*
fseg_get_first_extent(
-/*==================*/
- fseg_inode_t* inode, /*!< in: segment inode */
- ulint space, /*!< in: space id */
- ulint zip_size,/*!< in: compressed page size in bytes
- or 0 for uncompressed pages */
- mtr_t* mtr); /*!< in/out: mini-transaction */
-/**********************************************************************//**
-Puts new extents to the free list if
-there are free extents above the free limit. If an extent happens
-to contain an extent descriptor page, the extent is put to
-the FSP_FREE_FRAG list with the page marked as used. */
-static
+ fseg_inode_t* inode,
+ const fil_space_t* space,
+ const page_size_t& page_size,
+ mtr_t* mtr);
+
+/** Put new extents to the free list if there are free extents above the free
+limit. If an extent happens to contain an extent descriptor page, the extent
+is put to the FSP_FREE_FRAG list with the page marked as used.
+@param[in] init_space true if this is a single-table tablespace
+and we are only initializing the first extent and the first bitmap pages;
+then we will not allocate more extents
+@param[in,out] space tablespace
+@param[in,out] header tablespace header
+@param[in,out] mtr mini-transaction */
+static ATTRIBUTE_COLD
void
fsp_fill_free_list(
-/*===============*/
- ibool init_space, /*!< in: TRUE if this is a single-table
- tablespace and we are only initing
- the tablespace's first extent
- descriptor page and ibuf bitmap page;
- then we do not allocate more extents */
- ulint space, /*!< in: space */
- fsp_header_t* header, /*!< in/out: space header */
- mtr_t* mtr) /*!< in/out: mini-transaction */
- UNIV_COLD MY_ATTRIBUTE((nonnull));
-/**********************************************************************//**
-Allocates a single free page from a segment. This function implements
-the intelligent allocation strategy which tries to minimize file space
-fragmentation.
-@retval NULL if no page could be allocated
-@retval block, rw_lock_x_lock_count(&block->lock) == 1 if allocation succeeded
+ bool init_space,
+ fil_space_t* space,
+ fsp_header_t* header,
+ mtr_t* mtr);
+
+/** Allocates a single free page from a segment.
+This function implements the intelligent allocation strategy which tries
+to minimize file space fragmentation.
+@param[in,out] space tablespace
+@param[in] page_size page size
+@param[in,out] seg_inode segment inode
+@param[in] hint hint of which page would be desirable
+@param[in] direction if the new page is needed because of
+an index page split, and records are inserted there in order, into which
+direction they go alphabetically: FSP_DOWN, FSP_UP, FSP_NO_DIR
+@param[in] rw_latch RW_SX_LATCH, RW_X_LATCH
+@param[in,out] mtr mini-transaction
+@param[in,out] init_mtr mtr or another mini-transaction in
+which the page should be initialized. If init_mtr != mtr, but the page is
+already latched in mtr, do not initialize the page
+@param[in] has_done_reservation TRUE if the space has already been
+reserved, in this case we will never return NULL
+@retval NULL if no page could be allocated
+@retval block rw_lock_x_lock_count(&block->lock) == 1 if allocation succeeded
(init_mtr == mtr, or the page was not previously freed in mtr)
-@retval block (not allocated or initialized) otherwise */
+@retval block (not allocated or initialized) otherwise */
static
buf_block_t*
fseg_alloc_free_page_low(
-/*=====================*/
- ulint space, /*!< in: space */
- ulint zip_size,/*!< in: compressed page size in bytes
- or 0 for uncompressed pages */
- fseg_inode_t* seg_inode, /*!< in/out: segment inode */
- ulint hint, /*!< in: hint of which page would be
- desirable */
- byte direction, /*!< in: if the new page is needed because
- of an index page split, and records are
- inserted there in order, into which
- direction they go alphabetically: FSP_DOWN,
- FSP_UP, FSP_NO_DIR */
- mtr_t* mtr, /*!< in/out: mini-transaction */
- mtr_t* init_mtr)/*!< in/out: mtr or another mini-transaction
- in which the page should be initialized.
- If init_mtr!=mtr, but the page is already
- latched in mtr, do not initialize the page. */
- MY_ATTRIBUTE((warn_unused_result, nonnull));
-#endif /* !UNIV_HOTBACKUP */
-
-/**********************************************************************//**
-Reads the file space size stored in the header page.
-@return tablespace size stored in the space header */
-UNIV_INTERN
-ulint
-fsp_get_size_low(
-/*=============*/
- page_t* page) /*!< in: header page (page 0 in the tablespace) */
-{
- return(mach_read_from_4(page + FSP_HEADER_OFFSET + FSP_SIZE));
-}
-
-#ifndef UNIV_HOTBACKUP
-/**********************************************************************//**
-Gets a pointer to the space header and x-locks its page.
-@return pointer to the space header, page x-locked */
+ fil_space_t* space,
+ const page_size_t& page_size,
+ fseg_inode_t* seg_inode,
+ ulint hint,
+ byte direction,
+ rw_lock_type_t rw_latch,
+ mtr_t* mtr,
+ mtr_t* init_mtr
+#ifdef UNIV_DEBUG
+ , ibool has_done_reservation
+#endif /* UNIV_DEBUG */
+)
+ MY_ATTRIBUTE((warn_unused_result));
+
+/** Gets a pointer to the space header and x-locks its page.
+@param[in] space tablespace
+@param[in] page_size page size
+@param[in,out] mtr mini-transaction
+@return pointer to the space header, page x-locked */
UNIV_INLINE
fsp_header_t*
fsp_get_space_header(
-/*=================*/
- ulint id, /*!< in: space id */
- ulint zip_size,/*!< in: compressed page size in bytes
- or 0 for uncompressed pages */
- mtr_t* mtr) /*!< in/out: mini-transaction */
+ const fil_space_t* space,
+ const page_size_t& page_size,
+ mtr_t* mtr)
{
buf_block_t* block;
fsp_header_t* header;
- ut_ad(ut_is_2pow(zip_size));
- ut_ad(zip_size <= UNIV_ZIP_SIZE_MAX);
- ut_ad(!zip_size || zip_size >= UNIV_ZIP_SIZE_MIN);
- ut_ad(id || !zip_size);
+ ut_ad(space->purpose != FIL_TYPE_LOG);
+ ut_ad(!FSP_FLAGS_GET_ZIP_SSIZE(space->flags)
+ == !page_size.is_compressed());
- block = buf_page_get(id, zip_size, 0, RW_X_LATCH, mtr);
+ block = buf_page_get(page_id_t(space->id, 0), page_size,
+ RW_SX_LATCH, mtr);
header = FSP_HEADER_OFFSET + buf_block_get_frame(block);
buf_block_dbg_add_level(block, SYNC_FSP_PAGE);
- ut_ad(id == mach_read_from_4(FSP_SPACE_ID + header));
- ut_ad(zip_size == fsp_flags_get_zip_size(
- mach_read_from_4(FSP_SPACE_FLAGS + header)));
+ ut_ad(space->id == mach_read_from_4(FSP_SPACE_ID + header));
return(header);
}
+#ifdef UNIV_DEBUG
+/** Skip some of the sanity checks that are time consuming even in debug mode
+and can affect frequent verification runs that are done to ensure stability of
+the product.
+@return true if check should be skipped for given space. */
+bool
+fsp_skip_sanity_check(
+ ulint space_id)
+{
+ return(srv_skip_temp_table_checks_debug
+ && fsp_is_system_temporary(space_id));
+}
+#endif /* UNIV_DEBUG */
+
/**********************************************************************//**
Gets a descriptor bit of a page.
-@return TRUE if free */
+@return TRUE if free */
UNIV_INLINE
ibool
xdes_mtr_get_bit(
@@ -222,8 +200,8 @@ xdes_mtr_get_bit(
0 ... FSP_EXTENT_SIZE - 1 */
mtr_t* mtr) /*!< in: mini-transaction */
{
- ut_ad(mtr->state == MTR_ACTIVE);
- ut_ad(mtr_memo_contains_page(mtr, descr, MTR_MEMO_PAGE_X_FIX));
+ ut_ad(mtr->is_active());
+ ut_ad(mtr_memo_contains_page(mtr, descr, MTR_MEMO_PAGE_SX_FIX));
return(xdes_get_bit(descr, bit, offset));
}
@@ -246,7 +224,7 @@ xdes_set_bit(
ulint bit_index;
ulint descr_byte;
- ut_ad(mtr_memo_contains_page(mtr, descr, MTR_MEMO_PAGE_X_FIX));
+ ut_ad(mtr_memo_contains_page(mtr, descr, MTR_MEMO_PAGE_SX_FIX));
ut_ad((bit == XDES_FREE_BIT) || (bit == XDES_CLEAN_BIT));
ut_ad(offset < FSP_EXTENT_SIZE);
@@ -255,8 +233,7 @@ xdes_set_bit(
byte_index = index / 8;
bit_index = index % 8;
- descr_byte = mtr_read_ulint(descr + XDES_BITMAP + byte_index,
- MLOG_1BYTE, mtr);
+ descr_byte = mach_read_from_1(descr + XDES_BITMAP + byte_index);
descr_byte = ut_bit_set_nth(descr_byte, bit_index, val);
mlog_write_ulint(descr + XDES_BITMAP + byte_index, descr_byte,
@@ -267,7 +244,7 @@ xdes_set_bit(
Looks for a descriptor bit having the desired value. Starts from hint
and scans upward; at the end of the extent the search is wrapped to
the start of the extent.
-@return bit index of the bit, ULINT_UNDEFINED if not found */
+@return bit index of the bit, ULINT_UNDEFINED if not found */
UNIV_INLINE
ulint
xdes_find_bit(
@@ -284,7 +261,7 @@ xdes_find_bit(
ut_ad(descr && mtr);
ut_ad(val <= TRUE);
ut_ad(hint < FSP_EXTENT_SIZE);
- ut_ad(mtr_memo_contains_page(mtr, descr, MTR_MEMO_PAGE_X_FIX));
+ ut_ad(mtr_memo_contains_page(mtr, descr, MTR_MEMO_PAGE_SX_FIX));
for (i = hint; i < FSP_EXTENT_SIZE; i++) {
if (val == xdes_mtr_get_bit(descr, bit, i, mtr)) {
@@ -304,7 +281,7 @@ xdes_find_bit(
/**********************************************************************//**
Returns the number of used pages in a descriptor.
-@return number of pages used */
+@return number of pages used */
UNIV_INLINE
ulint
xdes_get_n_used(
@@ -315,7 +292,7 @@ xdes_get_n_used(
ulint count = 0;
ut_ad(descr && mtr);
- ut_ad(mtr_memo_contains_page(mtr, descr, MTR_MEMO_PAGE_X_FIX));
+ ut_ad(mtr_memo_contains_page(mtr, descr, MTR_MEMO_PAGE_SX_FIX));
for (ulint i = 0; i < FSP_EXTENT_SIZE; ++i) {
if (FALSE == xdes_mtr_get_bit(descr, XDES_FREE_BIT, i, mtr)) {
count++;
@@ -327,7 +304,7 @@ xdes_get_n_used(
/**********************************************************************//**
Returns true if extent contains no used pages.
-@return TRUE if totally free */
+@return TRUE if totally free */
UNIV_INLINE
ibool
xdes_is_free(
@@ -345,7 +322,7 @@ xdes_is_free(
/**********************************************************************//**
Returns true if extent contains no free pages.
-@return TRUE if full */
+@return TRUE if full */
UNIV_INLINE
ibool
xdes_is_full(
@@ -374,14 +351,14 @@ xdes_set_state(
ut_ad(descr && mtr);
ut_ad(state >= XDES_FREE);
ut_ad(state <= XDES_FSEG);
- ut_ad(mtr_memo_contains_page(mtr, descr, MTR_MEMO_PAGE_X_FIX));
+ ut_ad(mtr_memo_contains_page(mtr, descr, MTR_MEMO_PAGE_SX_FIX));
mlog_write_ulint(descr + XDES_STATE, state, MLOG_4BYTES, mtr);
}
/**********************************************************************//**
Gets the state of an xdes.
-@return state */
+@return state */
UNIV_INLINE
ulint
xdes_get_state(
@@ -392,9 +369,9 @@ xdes_get_state(
ulint state;
ut_ad(descr && mtr);
- ut_ad(mtr_memo_contains_page(mtr, descr, MTR_MEMO_PAGE_X_FIX));
+ ut_ad(mtr_memo_contains_page(mtr, descr, MTR_MEMO_PAGE_SX_FIX));
- state = mtr_read_ulint(descr + XDES_STATE, MLOG_4BYTES, mtr);
+ state = mach_read_from_4(descr + XDES_STATE);
ut_ad(state - 1 < XDES_FSEG);
return(state);
}
@@ -411,7 +388,7 @@ xdes_init(
ulint i;
ut_ad(descr && mtr);
- ut_ad(mtr_memo_contains_page(mtr, descr, MTR_MEMO_PAGE_X_FIX));
+ ut_ad(mtr_memo_contains_page(mtr, descr, MTR_MEMO_PAGE_SX_FIX));
ut_ad((XDES_SIZE - XDES_BITMAP) % 4 == 0);
for (i = XDES_BITMAP; i < XDES_SIZE; i += 4) {
@@ -421,126 +398,183 @@ xdes_init(
xdes_set_state(descr, XDES_FREE, mtr);
}
-/********************************************************************//**
-Gets pointer to a the extent descriptor of a page. The page where the extent
-descriptor resides is x-locked. This function no longer extends the data
-file.
+/** Get pointer to a the extent descriptor of a page.
+@param[in,out] sp_header tablespace header page, x-latched
+@param[in] space tablespace
+@param[in] offset page offset
+@param[in,out] mtr mini-transaction
+@param[in] init_space whether the tablespace is being initialized
+@param[out] desc_block descriptor block, or NULL if it is
+the same as the tablespace header
@return pointer to the extent descriptor, NULL if the page does not
-exist in the space or if the offset is >= the free limit */
-UNIV_INLINE MY_ATTRIBUTE((nonnull, warn_unused_result))
+exist in the space or if the offset exceeds free limit */
+UNIV_INLINE MY_ATTRIBUTE((warn_unused_result))
xdes_t*
xdes_get_descriptor_with_space_hdr(
-/*===============================*/
- fsp_header_t* sp_header, /*!< in/out: space header, x-latched
- in mtr */
- ulint space, /*!< in: space id */
- ulint offset, /*!< in: page offset; if equal
- to the free limit, we try to
- add new extents to the space
- free list */
- mtr_t* mtr) /*!< in/out: mini-transaction */
+ fsp_header_t* sp_header,
+ const fil_space_t* space,
+ page_no_t offset,
+ mtr_t* mtr,
+ bool init_space = false,
+ buf_block_t** desc_block = NULL)
{
ulint limit;
ulint size;
- ulint zip_size;
ulint descr_page_no;
page_t* descr_page;
-
- ut_ad(mtr_memo_contains(mtr, fil_space_get_latch(space, NULL),
- MTR_MEMO_X_LOCK));
- ut_ad(mtr_memo_contains_page(mtr, sp_header, MTR_MEMO_PAGE_X_FIX));
+ ut_ad(mtr_memo_contains(mtr, &space->latch, MTR_MEMO_X_LOCK));
+ ut_ad(mtr_memo_contains_page(mtr, sp_header, MTR_MEMO_PAGE_SX_FIX));
ut_ad(page_offset(sp_header) == FSP_HEADER_OFFSET);
/* Read free limit and space size */
limit = mach_read_from_4(sp_header + FSP_FREE_LIMIT);
size = mach_read_from_4(sp_header + FSP_SIZE);
- zip_size = fsp_flags_get_zip_size(
- mach_read_from_4(sp_header + FSP_SPACE_FLAGS));
+ ut_ad(limit == space->free_limit
+ || (space->free_limit == 0
+ && (init_space
+ || space->purpose == FIL_TYPE_TEMPORARY
+ || (srv_startup_is_before_trx_rollback_phase
+ && (space->id == TRX_SYS_SPACE
+ || srv_is_undo_tablespace(space->id))))));
+ ut_ad(size == space->size_in_header);
if ((offset >= size) || (offset >= limit)) {
return(NULL);
}
- descr_page_no = xdes_calc_descriptor_page(zip_size, offset);
+ const page_size_t page_size(space->flags);
+
+ descr_page_no = xdes_calc_descriptor_page(page_size, offset);
+
+ buf_block_t* block;
if (descr_page_no == 0) {
/* It is on the space header page */
descr_page = page_align(sp_header);
+ block = NULL;
} else {
- buf_block_t* block;
+ block = buf_page_get(
+ page_id_t(space->id, descr_page_no), page_size,
+ RW_SX_LATCH, mtr);
- block = buf_page_get(space, zip_size, descr_page_no,
- RW_X_LATCH, mtr);
buf_block_dbg_add_level(block, SYNC_FSP_PAGE);
descr_page = buf_block_get_frame(block);
}
- return(descr_page + XDES_ARR_OFFSET
- + XDES_SIZE * xdes_calc_descriptor_index(zip_size, offset));
-}
+ if (desc_block != NULL) {
+ *desc_block = block;
+ }
-/********************************************************************//**
-Gets pointer to a the extent descriptor of a page. The page where the
-extent descriptor resides is x-locked. If the page offset is equal to
-the free limit of the space, adds new extents from above the free limit
-to the space free list, if not free limit == space size. This adding
-is necessary to make the descriptor defined, as they are uninitialized
-above the free limit.
-@return pointer to the extent descriptor, NULL if the page does not
-exist in the space or if the offset exceeds the free limit */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
+ return(descr_page + XDES_ARR_OFFSET
+ + XDES_SIZE * xdes_calc_descriptor_index(page_size, offset));
+}
+
+/** Get the extent descriptor of a page.
+The page where the extent descriptor resides is x-locked. If the page
+offset is equal to the free limit of the space, we will add new
+extents from above the free limit to the space free list, if not free
+limit == space size. This adding is necessary to make the descriptor
+defined, as they are uninitialized above the free limit.
+@param[in] space tablespace
+@param[in] offset page offset; if equal to the free limit, we
+try to add new extents to the space free list
+@param[in] page_size page size
+@param[in,out] mtr mini-transaction
+@return the extent descriptor */
+MY_ATTRIBUTE((warn_unused_result))
+static
xdes_t*
xdes_get_descriptor(
-/*================*/
- ulint space, /*!< in: space id */
- ulint zip_size,/*!< in: compressed page size in bytes
- or 0 for uncompressed pages */
- ulint offset, /*!< in: page offset; if equal to the free limit,
- we try to add new extents to the space free list */
- mtr_t* mtr) /*!< in/out: mini-transaction */
+ const fil_space_t* space,
+ page_no_t offset,
+ const page_size_t& page_size,
+ mtr_t* mtr)
{
buf_block_t* block;
fsp_header_t* sp_header;
- block = buf_page_get(space, zip_size, 0, RW_X_LATCH, mtr);
+ block = buf_page_get(page_id_t(space->id, 0), page_size,
+ RW_SX_LATCH, mtr);
+
buf_block_dbg_add_level(block, SYNC_FSP_PAGE);
sp_header = FSP_HEADER_OFFSET + buf_block_get_frame(block);
- return(xdes_get_descriptor_with_space_hdr(sp_header, space, offset,
- mtr));
+ return(xdes_get_descriptor_with_space_hdr(
+ sp_header, space, offset, mtr));
+}
+
+/** Get the extent descriptor of a page.
+The page where the extent descriptor resides is x-locked. If the page
+offset is equal to the free limit of the space, we will add new
+extents from above the free limit to the space free list, if not free
+limit == space size. This adding is necessary to make the descriptor
+defined, as they are uninitialized above the free limit.
+@param[in] space tablespace
+@param[in] page descriptor page offset
+@param[in] offset page offset
+@param[in] page_size page size
+@param[in,out] mtr mini-transaction
+@return the extent descriptor
+@retval NULL if the descriptor is not available */
+MY_ATTRIBUTE((warn_unused_result))
+static
+const xdes_t*
+xdes_get_descriptor_const(
+ const fil_space_t* space,
+ page_no_t page,
+ page_no_t offset,
+ const page_size_t& page_size,
+ mtr_t* mtr)
+{
+ ut_ad(mtr_memo_contains(mtr, &space->latch, MTR_MEMO_S_LOCK));
+ ut_ad(offset < space->free_limit);
+ ut_ad(offset < space->size_in_header);
+
+ if (buf_block_t* block = buf_page_get(page_id_t(space->id, page),
+ page_size, RW_S_LATCH, mtr)) {
+ buf_block_dbg_add_level(block, SYNC_FSP_PAGE);
+
+ ut_ad(page != 0 || space->free_limit == mach_read_from_4(
+ FSP_FREE_LIMIT + FSP_HEADER_OFFSET
+ + block->frame));
+ ut_ad(page != 0 || space->size_in_header == mach_read_from_4(
+ FSP_SIZE + FSP_HEADER_OFFSET
+ + block->frame));
+
+ return(block->frame + XDES_ARR_OFFSET + XDES_SIZE
+ * xdes_calc_descriptor_index(page_size, offset));
+ }
+
+ return(NULL);
}
-/********************************************************************//**
-Gets pointer to a the extent descriptor if the file address
-of the descriptor list node is known. The page where the
+/** Get a pointer to the extent descriptor. The page where the
extent descriptor resides is x-locked.
-@return pointer to the extent descriptor */
+@param[in] space tablespace
+@param[in] page_size page size
+@param[in] lst_node file address of the list node
+ contained in the descriptor
+@param[in,out] mtr mini-transaction
+@return pointer to the extent descriptor */
+MY_ATTRIBUTE((nonnull, warn_unused_result))
UNIV_INLINE
xdes_t*
xdes_lst_get_descriptor(
-/*====================*/
- ulint space, /*!< in: space id */
- ulint zip_size,/*!< in: compressed page size in bytes
- or 0 for uncompressed pages */
- fil_addr_t lst_node,/*!< in: file address of the list node
- contained in the descriptor */
- mtr_t* mtr) /*!< in/out: mini-transaction */
+ const fil_space_t* space,
+ const page_size_t& page_size,
+ fil_addr_t lst_node,
+ mtr_t* mtr)
{
- xdes_t* descr;
-
- ut_ad(mtr);
- ut_ad(mtr_memo_contains(mtr, fil_space_get_latch(space, NULL),
- MTR_MEMO_X_LOCK));
- descr = fut_get_ptr(space, zip_size, lst_node, RW_X_LATCH, mtr)
- - XDES_FLST_NODE;
-
- return(descr);
+ ut_ad(mtr_memo_contains(mtr, &space->latch, MTR_MEMO_X_LOCK));
+ ut_ad(page_size.equals_to(page_size_t(space->flags)));
+ return(fut_get_ptr(space->id, page_size, lst_node, RW_SX_LATCH, mtr)
+ - XDES_FLST_NODE);
}
/********************************************************************//**
Returns page offset of the first page in extent described by a descriptor.
-@return offset of the first page in extent */
+@return offset of the first page in extent */
UNIV_INLINE
ulint
xdes_get_offset(
@@ -553,84 +587,63 @@ xdes_get_offset(
+ ((page_offset(descr) - XDES_ARR_OFFSET) / XDES_SIZE)
* FSP_EXTENT_SIZE);
}
-#endif /* !UNIV_HOTBACKUP */
-/***********************************************************//**
-Inits a file page whose prior contents should be ignored. */
-static
-void
-fsp_init_file_page_low(
-/*===================*/
- buf_block_t* block) /*!< in: pointer to a page */
+/** Initialize a file page whose prior contents should be ignored.
+@param[in,out] block buffer pool block */
+void fsp_apply_init_file_page(buf_block_t* block)
{
page_t* page = buf_block_get_frame(block);
- page_zip_des_t* page_zip= buf_block_get_page_zip(block);
-#ifndef UNIV_HOTBACKUP
- block->check_index_page_at_flush = FALSE;
-#endif /* !UNIV_HOTBACKUP */
+ memset(page, 0, UNIV_PAGE_SIZE);
+
+ mach_write_to_4(page + FIL_PAGE_OFFSET, block->page.id.page_no());
+ mach_write_to_4(page + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID,
+ block->page.id.space());
- if (page_zip) {
- memset(page, 0, UNIV_PAGE_SIZE);
+ if (page_zip_des_t* page_zip= buf_block_get_page_zip(block)) {
memset(page_zip->data, 0, page_zip_get_size(page_zip));
- mach_write_to_4(page + FIL_PAGE_OFFSET,
- buf_block_get_page_no(block));
- mach_write_to_4(page
- + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID,
- buf_block_get_space(block));
memcpy(page_zip->data + FIL_PAGE_OFFSET,
page + FIL_PAGE_OFFSET, 4);
memcpy(page_zip->data + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID,
page + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID, 4);
- return;
}
-
- memset(page, 0, UNIV_PAGE_SIZE);
- mach_write_to_4(page + FIL_PAGE_OFFSET, buf_block_get_page_no(block));
- mach_write_to_4(page + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID,
- buf_block_get_space(block));
}
-#ifndef UNIV_HOTBACKUP
-/***********************************************************//**
-Inits a file page whose prior contents should be ignored. */
-static
-void
-fsp_init_file_page(
-/*===============*/
- buf_block_t* block, /*!< in: pointer to a page */
- mtr_t* mtr) /*!< in/out: mini-transaction */
-{
- fsp_init_file_page_low(block);
-
- mlog_write_initial_log_record(buf_block_get_frame(block),
- MLOG_INIT_FILE_PAGE, mtr);
-}
-#endif /* !UNIV_HOTBACKUP */
-
-/***********************************************************//**
-Parses a redo log record of a file page init.
-@return end of log record or NULL */
-UNIV_INTERN
-byte*
-fsp_parse_init_file_page(
-/*=====================*/
- byte* ptr, /*!< in: buffer */
- byte* end_ptr MY_ATTRIBUTE((unused)), /*!< in: buffer end */
- buf_block_t* block) /*!< in: block or NULL */
-{
- ut_ad(ptr && end_ptr);
-
- if (block) {
- fsp_init_file_page_low(block);
+#ifdef UNIV_DEBUG
+/** Assert that the mini-transaction is compatible with
+updating an allocation bitmap page.
+@param[in] mtr mini-transaction */
+void fil_space_t::modify_check(const mtr_t& mtr) const
+{
+ switch (mtr.get_log_mode()) {
+ case MTR_LOG_SHORT_INSERTS:
+ case MTR_LOG_NONE:
+ /* These modes are only allowed within a non-bitmap page
+ when there is a higher-level redo log record written. */
+ ut_ad(purpose == FIL_TYPE_TABLESPACE
+ || purpose == FIL_TYPE_TEMPORARY);
+ break;
+ case MTR_LOG_NO_REDO:
+ ut_ad(purpose == FIL_TYPE_TEMPORARY
+ || purpose == FIL_TYPE_IMPORT
+ || redo_skipped_count
+ || is_being_truncated
+ || srv_is_tablespace_truncated(id));
+ return;
+ case MTR_LOG_ALL:
+ /* We may only write redo log for a persistent
+ tablespace. */
+ ut_ad(purpose == FIL_TYPE_TABLESPACE);
+ ut_ad(mtr.is_named_space(id));
+ return;
}
- return(ptr);
+ ut_ad(!"invalid log mode");
}
+#endif
/**********************************************************************//**
Initializes the fsp system. */
-UNIV_INTERN
void
fsp_init(void)
/*==========*/
@@ -653,7 +666,6 @@ fsp_init(void)
Writes the space id and flags to a tablespace header. The flags contain
row type, physical/compressed page size, and logical/uncompressed page
size of the tablespace. */
-UNIV_INTERN
void
fsp_header_init_fields(
/*===================*/
@@ -670,33 +682,35 @@ fsp_header_init_fields(
flags);
}
-#ifndef UNIV_HOTBACKUP
/** Initialize a tablespace header.
@param[in] space_id space id
@param[in] size current size in blocks
@param[in,out] mtr mini-transaction */
-UNIV_INTERN
void
fsp_header_init(ulint space_id, ulint size, mtr_t* mtr)
{
fsp_header_t* header;
buf_block_t* block;
page_t* page;
- ulint flags;
- ulint zip_size;
ut_ad(mtr);
- mtr_x_lock(fil_space_get_latch(space_id, &flags), mtr);
+ fil_space_t* space = mtr_x_lock_space(space_id, mtr);
- zip_size = fsp_flags_get_zip_size(flags);
- block = buf_page_create(space_id, 0, zip_size, mtr);
- buf_page_get(space_id, zip_size, 0, RW_X_LATCH, mtr);
+ const page_id_t page_id(space_id, 0);
+ const page_size_t page_size(space->flags);
+
+ block = buf_page_create(page_id, page_size, mtr);
+ buf_page_get(page_id, page_size, RW_SX_LATCH, mtr);
buf_block_dbg_add_level(block, SYNC_FSP_PAGE);
+ space->size_in_header = size;
+ space->free_len = 0;
+ space->free_limit = 0;
+
/* The prior contents of the file page should be ignored */
- fsp_init_file_page(block, mtr);
+ fsp_init_file_page(space, block, mtr);
page = buf_block_get_frame(block);
mlog_write_ulint(page + FIL_PAGE_TYPE, FIL_PAGE_TYPE_FSP_HDR,
@@ -709,7 +723,8 @@ fsp_header_init(ulint space_id, ulint size, mtr_t* mtr)
mlog_write_ulint(header + FSP_SIZE, size, MLOG_4BYTES, mtr);
mlog_write_ulint(header + FSP_FREE_LIMIT, 0, MLOG_4BYTES, mtr);
- mlog_write_ulint(header + FSP_SPACE_FLAGS, flags & ~FSP_FLAGS_MEM_MASK,
+ mlog_write_ulint(header + FSP_SPACE_FLAGS,
+ space->flags & ~FSP_FLAGS_MEM_MASK,
MLOG_4BYTES, mtr);
mlog_write_ulint(header + FSP_FRAG_N_USED, 0, MLOG_4BYTES, mtr);
@@ -721,28 +736,21 @@ fsp_header_init(ulint space_id, ulint size, mtr_t* mtr)
mlog_write_ull(header + FSP_SEG_ID, 1, mtr);
- fsp_fill_free_list(space_id != TRX_SYS_SPACE, space_id, header, mtr);
-
- fil_space_t* space = fil_space_acquire(space_id);
- ut_ad(space);
+ fsp_fill_free_list(!is_system_tablespace(space_id),
+ space, header, mtr);
/* Write encryption metadata to page 0 if tablespace is
encrypted or encryption is disabled by table option. */
if (space->crypt_data &&
(space->crypt_data->should_encrypt() ||
space->crypt_data->not_encrypted())) {
- space->crypt_data->write_page0(page, mtr);
+ space->crypt_data->write_page0(space, page, mtr);
}
-
- fil_space_release(space);
}
-#endif /* !UNIV_HOTBACKUP */
-
/**********************************************************************//**
Reads the space id from the first page of a tablespace.
-@return space id, ULINT UNDEFINED if error */
-UNIV_INTERN
+@return space id, ULINT UNDEFINED if error */
ulint
fsp_header_get_space_id(
/*====================*/
@@ -759,10 +767,8 @@ fsp_header_get_space_id(
id = ULINT_UNDEFINED;);
if (id != fsp_id) {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Space id in fsp header %lu,but in the page header "
- "%lu", fsp_id, id);
-
+ ib::error() << "Space ID in fsp header is " << fsp_id
+ << ", but in the page header it is " << id << ".";
return(ULINT_UNDEFINED);
}
@@ -770,60 +776,32 @@ fsp_header_get_space_id(
}
/**********************************************************************//**
-Reads the space flags from the first page of a tablespace.
-@return flags */
-UNIV_INTERN
-ulint
-fsp_header_get_flags(
-/*=================*/
- const page_t* page) /*!< in: first page of a tablespace */
-{
- ut_ad(!page_offset(page));
-
- return(mach_read_from_4(FSP_HEADER_OFFSET + FSP_SPACE_FLAGS + page));
-}
-
-/**********************************************************************//**
-Reads the compressed page size from the first page of a tablespace.
-@return compressed page size in bytes, or 0 if uncompressed */
-UNIV_INTERN
-ulint
-fsp_header_get_zip_size(
-/*====================*/
- const page_t* page) /*!< in: first page of a tablespace */
-{
- ulint flags = fsp_header_get_flags(page);
-
- return(fsp_flags_get_zip_size(flags));
-}
-
-#ifndef UNIV_HOTBACKUP
-/**********************************************************************//**
Increases the space size field of a space. */
-UNIV_INTERN
void
fsp_header_inc_size(
/*================*/
- ulint space, /*!< in: space id */
+ ulint space_id, /*!< in: space id */
ulint size_inc, /*!< in: size increment in pages */
mtr_t* mtr) /*!< in/out: mini-transaction */
{
fsp_header_t* header;
ulint size;
- ulint flags;
ut_ad(mtr);
- mtr_x_lock(fil_space_get_latch(space, &flags), mtr);
+ fil_space_t* space = mtr_x_lock_space(space_id, mtr);
+ ut_d(space->modify_check(*mtr));
- header = fsp_get_space_header(space,
- fsp_flags_get_zip_size(flags),
- mtr);
+ header = fsp_get_space_header(
+ space, page_size_t(space->flags), mtr);
- size = mtr_read_ulint(header + FSP_SIZE, MLOG_4BYTES, mtr);
+ size = mach_read_from_4(header + FSP_SIZE);
+ ut_ad(size == space->size_in_header);
- mlog_write_ulint(header + FSP_SIZE, size + size_inc, MLOG_4BYTES,
- mtr);
+ size += size_inc;
+
+ mlog_write_ulint(header + FSP_SIZE, size, MLOG_4BYTES, mtr);
+ space->size_in_header = size;
}
/**********************************************************************//**
@@ -831,8 +809,7 @@ Gets the size of the system tablespace from the tablespace header. If
we do not have an auto-extending data file, this should be equal to
the size of the data files. If there is an auto-extending data file,
this can be smaller.
-@return size in pages */
-UNIV_INTERN
+@return size in pages */
ulint
fsp_header_get_tablespace_size(void)
/*================================*/
@@ -843,261 +820,258 @@ fsp_header_get_tablespace_size(void)
mtr_start(&mtr);
- mtr_x_lock(fil_space_get_latch(0, NULL), &mtr);
+ fil_space_t* space = mtr_x_lock_space(TRX_SYS_SPACE, &mtr);
- header = fsp_get_space_header(0, 0, &mtr);
+ header = fsp_get_space_header(space, univ_page_size, &mtr);
- size = mtr_read_ulint(header + FSP_SIZE, MLOG_4BYTES, &mtr);
+ size = mach_read_from_4(header + FSP_SIZE);
+ ut_ad(space->size_in_header == size);
mtr_commit(&mtr);
return(size);
}
-/***********************************************************************//**
-Tries to extend a single-table tablespace so that a page would fit in the
+/** Try to extend a single-table tablespace so that a page would fit in the
data file.
-@return TRUE if success */
-static UNIV_COLD MY_ATTRIBUTE((nonnull, warn_unused_result))
-ibool
+@param[in,out] space tablespace
+@param[in] page_no page number
+@param[in,out] header tablespace header
+@param[in,out] mtr mini-transaction
+@return true if success */
+static ATTRIBUTE_COLD __attribute__((warn_unused_result))
+bool
fsp_try_extend_data_file_with_pages(
-/*================================*/
- ulint space, /*!< in: space */
- ulint page_no, /*!< in: page number */
- fsp_header_t* header, /*!< in/out: space header */
- mtr_t* mtr) /*!< in/out: mini-transaction */
+ fil_space_t* space,
+ ulint page_no,
+ fsp_header_t* header,
+ mtr_t* mtr)
{
- ibool success;
- ulint actual_size;
+ bool success;
ulint size;
- ut_a(space != 0);
+ ut_a(!is_system_tablespace(space->id));
+ ut_d(space->modify_check(*mtr));
- size = mtr_read_ulint(header + FSP_SIZE, MLOG_4BYTES, mtr);
+ size = mach_read_from_4(header + FSP_SIZE);
+ ut_ad(size == space->size_in_header);
ut_a(page_no >= size);
- success = fil_extend_space_to_desired_size(&actual_size, space,
- page_no + 1);
- /* actual_size now has the space size in pages; it may be less than
- we wanted if we ran out of disk space */
-
- mlog_write_ulint(header + FSP_SIZE, actual_size, MLOG_4BYTES, mtr);
+ success = fil_space_extend(space, page_no + 1);
+ /* The size may be less than we wanted if we ran out of disk space. */
+ mlog_write_ulint(header + FSP_SIZE, space->size, MLOG_4BYTES, mtr);
+ space->size_in_header = space->size;
return(success);
}
-/***********************************************************************//**
-Tries to extend the last data file of a tablespace if it is auto-extending.
-@return FALSE if not auto-extending */
-static UNIV_COLD MY_ATTRIBUTE((nonnull))
-ibool
-fsp_try_extend_data_file(
-/*=====================*/
- ulint* actual_increase,/*!< out: actual increase in pages, where
- we measure the tablespace size from
- what the header field says; it may be
- the actual file size rounded down to
- megabyte */
- ulint space, /*!< in: space */
- fsp_header_t* header, /*!< in/out: space header */
- mtr_t* mtr) /*!< in/out: mini-transaction */
+/** Try to extend the last data file of a tablespace if it is auto-extending.
+@param[in,out] space tablespace
+@param[in,out] header tablespace header
+@param[in,out] mtr mini-transaction
+@return number of pages added
+@retval 0 if the tablespace was not extended */
+ATTRIBUTE_COLD __attribute__((nonnull))
+static
+ulint
+fsp_try_extend_data_file(fil_space_t* space, fsp_header_t* header, mtr_t* mtr)
{
- ulint size;
- ulint zip_size;
- ulint new_size;
- ulint old_size;
- ulint size_increase;
- ulint actual_size;
- ibool success;
+ ulint size; /* current number of pages in the datafile */
+ ulint size_increase; /* number of pages to extend this file */
+ const char* OUT_OF_SPACE_MSG =
+ "ran out of space. Please add another file or use"
+ " 'autoextend' for the last file in setting";
- *actual_increase = 0;
+ ut_d(space->modify_check(*mtr));
- if (space == 0 && !srv_auto_extend_last_data_file) {
+ if (space->id == TRX_SYS_SPACE
+ && !srv_sys_space.can_auto_extend_last_file()) {
/* We print the error message only once to avoid
spamming the error log. Note that we don't need
- to reset the flag to FALSE as dealing with this
+ to reset the flag to false as dealing with this
error requires server restart. */
- if (fsp_tbs_full_error_printed == FALSE) {
- fprintf(stderr,
- "InnoDB: Error: Data file(s) ran"
- " out of space.\n"
- "Please add another data file or"
- " use \'autoextend\' for the last"
- " data file.\n");
- fsp_tbs_full_error_printed = TRUE;
+ if (!srv_sys_space.get_tablespace_full_status()) {
+ ib::error() << "The InnoDB system tablespace "
+ << OUT_OF_SPACE_MSG
+ << " innodb_data_file_path.";
+ srv_sys_space.set_tablespace_full_status(true);
}
- return(FALSE);
- }
-
- size = mtr_read_ulint(header + FSP_SIZE, MLOG_4BYTES, mtr);
- zip_size = fsp_flags_get_zip_size(
- mach_read_from_4(header + FSP_SPACE_FLAGS));
-
- old_size = size;
-
- if (space == 0) {
- if (!srv_last_file_size_max) {
- size_increase = SRV_AUTO_EXTEND_INCREMENT;
- } else {
- if (srv_last_file_size_max
- < srv_data_file_sizes[srv_n_data_files - 1]) {
-
- fprintf(stderr,
- "InnoDB: Error: Last data file size"
- " is %lu, max size allowed %lu\n",
- (ulong) srv_data_file_sizes[
- srv_n_data_files - 1],
- (ulong) srv_last_file_size_max);
- }
+ return(0);
+ } else if (space->id == SRV_TMP_SPACE_ID
+ && !srv_tmp_space.can_auto_extend_last_file()) {
- size_increase = srv_last_file_size_max
- - srv_data_file_sizes[srv_n_data_files - 1];
- if (size_increase > SRV_AUTO_EXTEND_INCREMENT) {
- size_increase = SRV_AUTO_EXTEND_INCREMENT;
- }
- }
- } else {
- /* We extend single-table tablespaces first one extent
- at a time, but 4 at a time for bigger tablespaces. It is
- not enough to extend always by one extent, because we need
- to add at least one extent to FSP_FREE.
- A single extent descriptor page will track many extents.
- And the extent that uses its extent descriptor page is
- put onto the FSP_FREE_FRAG list. Extents that do not
- use their extent descriptor page are added to FSP_FREE.
- The physical page size is used to determine how many
- extents are tracked on one extent descriptor page. */
- ulint extent_size; /*!< one megabyte, in pages */
- ulint threshold; /*!< The size of the tablespace
- (in number of pages) where we
- start allocating more than one
- extent at a time. */
-
- if (!zip_size) {
- extent_size = FSP_EXTENT_SIZE;
- } else {
- extent_size = FSP_EXTENT_SIZE
- * UNIV_PAGE_SIZE / zip_size;
+ /* We print the error message only once to avoid
+ spamming the error log. Note that we don't need
+ to reset the flag to false as dealing with this
+ error requires server restart. */
+ if (!srv_tmp_space.get_tablespace_full_status()) {
+ ib::error() << "The InnoDB temporary tablespace "
+ << OUT_OF_SPACE_MSG
+ << " innodb_temp_data_file_path.";
+ srv_tmp_space.set_tablespace_full_status(true);
}
+ return(0);
+ }
- /* Threshold is set at 32mb except when the page
- size is small enough that it must be done sooner.
- For page size less than 4k, we may reach the
- extent contains extent descriptor page before
- 32 mb. */
- threshold = ut_min((32 * extent_size),
- (zip_size ? zip_size : UNIV_PAGE_SIZE));
-
- if (size < extent_size) {
- /* Let us first extend the file to extent_size */
- success = fsp_try_extend_data_file_with_pages(
- space, extent_size - 1, header, mtr);
- if (!success) {
- new_size = mtr_read_ulint(header + FSP_SIZE,
- MLOG_4BYTES, mtr);
+ size = mach_read_from_4(header + FSP_SIZE);
+ ut_ad(size == space->size_in_header);
- *actual_increase = new_size - old_size;
+ const page_size_t page_size(
+ mach_read_from_4(header + FSP_SPACE_FLAGS));
- return(FALSE);
+ switch (space->id) {
+ case TRX_SYS_SPACE:
+ size_increase = srv_sys_space.get_increment();
+ break;
+ case SRV_TMP_SPACE_ID:
+ size_increase = srv_tmp_space.get_increment();
+ break;
+ default:
+ ulint extent_pages
+ = fsp_get_extent_size_in_pages(page_size);
+ if (size < extent_pages) {
+ /* Let us first extend the file to extent_size */
+ if (!fsp_try_extend_data_file_with_pages(
+ space, extent_pages - 1, header, mtr)) {
+ return(0);
}
- size = extent_size;
+ size = extent_pages;
}
- if (size < threshold) {
- size_increase = extent_size;
- } else {
- /* Below in fsp_fill_free_list() we assume
- that we add at most FSP_FREE_ADD extents at
- a time */
- size_increase = FSP_FREE_ADD * extent_size;
- }
+ size_increase = fsp_get_pages_to_extend_ibd(page_size, size);
}
if (size_increase == 0) {
-
- return(TRUE);
+ return(0);
}
- success = fil_extend_space_to_desired_size(&actual_size, space,
- size + size_increase);
- if (!success) {
-
- return(false);
+ if (!fil_space_extend(space, size + size_increase)) {
+ return(0);
}
/* We ignore any fragments of a full megabyte when storing the size
to the space header */
- if (!zip_size) {
- new_size = ut_calc_align_down(actual_size,
- (1024 * 1024) / UNIV_PAGE_SIZE);
+ space->size_in_header = ut_2pow_round(
+ space->size, (1024 * 1024) / page_size.physical());
+
+ mlog_write_ulint(
+ header + FSP_SIZE, space->size_in_header, MLOG_4BYTES, mtr);
+
+ return(size_increase);
+}
+
+/** Calculate the number of pages to extend a datafile.
+We extend single-table tablespaces first one extent at a time,
+but 4 at a time for bigger tablespaces. It is not enough to extend always
+by one extent, because we need to add at least one extent to FSP_FREE.
+A single extent descriptor page will track many extents. And the extent
+that uses its extent descriptor page is put onto the FSP_FREE_FRAG list.
+Extents that do not use their extent descriptor page are added to FSP_FREE.
+The physical page size is used to determine how many extents are tracked
+on one extent descriptor page. See xdes_calc_descriptor_page().
+@param[in] page_size page_size of the datafile
+@param[in] size current number of pages in the datafile
+@return number of pages to extend the file. */
+ulint
+fsp_get_pages_to_extend_ibd(
+ const page_size_t& page_size,
+ ulint size)
+{
+ ulint size_increase; /* number of pages to extend this file */
+ ulint extent_size; /* one megabyte, in pages */
+ ulint threshold; /* The size of the tablespace (in number
+ of pages) where we start allocating more
+ than one extent at a time. */
+
+ extent_size = fsp_get_extent_size_in_pages(page_size);
+
+ /* The threshold is set at 32MiB except when the physical page
+ size is small enough that it must be done sooner. */
+ threshold = ut_min(32 * extent_size, page_size.physical());
+
+ if (size < threshold) {
+ size_increase = extent_size;
} else {
- new_size = ut_calc_align_down(actual_size,
- (1024 * 1024) / zip_size);
+ /* Below in fsp_fill_free_list() we assume
+ that we add at most FSP_FREE_ADD extents at
+ a time */
+ size_increase = FSP_FREE_ADD * extent_size;
}
- mlog_write_ulint(header + FSP_SIZE, new_size, MLOG_4BYTES, mtr);
- *actual_increase = new_size - old_size;
+ return(size_increase);
+}
- return(TRUE);
+/** Reset the page type.
+Data files created before MySQL 5.1.48 may contain garbage in FIL_PAGE_TYPE.
+In MySQL 3.23.53, only undo log pages and index pages were tagged.
+Any other pages were written with uninitialized bytes in FIL_PAGE_TYPE.
+@param[in] block block with invalid FIL_PAGE_TYPE
+@param[in] type expected page type
+@param[in,out] mtr mini-transaction */
+ATTRIBUTE_COLD
+void fil_block_reset_type(const buf_block_t& block, ulint type, mtr_t* mtr)
+{
+ ib::info()
+ << "Resetting invalid page " << block.page.id << " type "
+ << fil_page_get_type(block.frame) << " to " << type << ".";
+ mlog_write_ulint(block.frame + FIL_PAGE_TYPE, type, MLOG_2BYTES, mtr);
}
-/**********************************************************************//**
-Puts new extents to the free list if there are free extents above the free
+/** Put new extents to the free list if there are free extents above the free
limit. If an extent happens to contain an extent descriptor page, the extent
-is put to the FSP_FREE_FRAG list with the page marked as used. */
+is put to the FSP_FREE_FRAG list with the page marked as used.
+@param[in] init_space true if this is a single-table tablespace
+and we are only initializing the first extent and the first bitmap pages;
+then we will not allocate more extents
+@param[in,out] space tablespace
+@param[in,out] header tablespace header
+@param[in,out] mtr mini-transaction */
static
void
fsp_fill_free_list(
-/*===============*/
- ibool init_space, /*!< in: TRUE if this is a single-table
- tablespace and we are only initing
- the tablespace's first extent
- descriptor page and ibuf bitmap page;
- then we do not allocate more extents */
- ulint space, /*!< in: space */
- fsp_header_t* header, /*!< in/out: space header */
- mtr_t* mtr) /*!< in/out: mini-transaction */
+ bool init_space,
+ fil_space_t* space,
+ fsp_header_t* header,
+ mtr_t* mtr)
{
ulint limit;
ulint size;
- ulint zip_size;
xdes_t* descr;
ulint count = 0;
ulint frag_n_used;
- ulint actual_increase;
ulint i;
- mtr_t ibuf_mtr;
ut_ad(page_offset(header) == FSP_HEADER_OFFSET);
+ ut_d(space->modify_check(*mtr));
/* Check if we can fill free list from above the free list limit */
- size = mtr_read_ulint(header + FSP_SIZE, MLOG_4BYTES, mtr);
- limit = mtr_read_ulint(header + FSP_FREE_LIMIT, MLOG_4BYTES, mtr);
-
- zip_size = fsp_flags_get_zip_size(
- mach_read_from_4(FSP_SPACE_FLAGS + header));
- ut_a(ut_is_2pow(zip_size));
- ut_a(zip_size <= UNIV_ZIP_SIZE_MAX);
- ut_a(!zip_size || zip_size >= UNIV_ZIP_SIZE_MIN);
-
- if (space == 0 && srv_auto_extend_last_data_file
- && size < limit + FSP_EXTENT_SIZE * FSP_FREE_ADD) {
-
- /* Try to increase the last data file size */
- fsp_try_extend_data_file(&actual_increase, space, header, mtr);
- size = mtr_read_ulint(header + FSP_SIZE, MLOG_4BYTES, mtr);
- }
-
- if (space != 0 && !init_space
- && size < limit + FSP_EXTENT_SIZE * FSP_FREE_ADD) {
+ size = mach_read_from_4(header + FSP_SIZE);
+ limit = mach_read_from_4(header + FSP_FREE_LIMIT);
+
+ ut_ad(size == space->size_in_header);
+ ut_ad(limit == space->free_limit);
+
+ const page_size_t page_size(space->flags);
+
+ if (size < limit + FSP_EXTENT_SIZE * FSP_FREE_ADD) {
+ bool skip_resize = init_space;
+ switch (space->id) {
+ case TRX_SYS_SPACE:
+ skip_resize = !srv_sys_space.can_auto_extend_last_file();
+ break;
+ case SRV_TMP_SPACE_ID:
+ skip_resize = !srv_tmp_space.can_auto_extend_last_file();
+ break;
+ }
- /* Try to increase the .ibd file size */
- fsp_try_extend_data_file(&actual_increase, space, header, mtr);
- size = mtr_read_ulint(header + FSP_SIZE, MLOG_4BYTES, mtr);
+ if (!skip_resize) {
+ fsp_try_extend_data_file(space, header, mtr);
+ size = space->size_in_header;
+ }
}
i = limit;
@@ -1105,17 +1079,14 @@ fsp_fill_free_list(
while ((init_space && i < 1)
|| ((i + FSP_EXTENT_SIZE <= size) && (count < FSP_FREE_ADD))) {
- ibool init_xdes;
- if (zip_size) {
- init_xdes = ut_2pow_remainder(i, zip_size) == 0;
- } else {
- init_xdes = ut_2pow_remainder(i, UNIV_PAGE_SIZE) == 0;
- }
+ bool init_xdes
+ = (ut_2pow_remainder(i, page_size.physical()) == 0);
+ space->free_limit = i + FSP_EXTENT_SIZE;
mlog_write_ulint(header + FSP_FREE_LIMIT, i + FSP_EXTENT_SIZE,
MLOG_4BYTES, mtr);
- if (UNIV_UNLIKELY(init_xdes)) {
+ if (init_xdes) {
buf_block_t* block;
@@ -1124,14 +1095,17 @@ fsp_fill_free_list(
pages should be ignored. */
if (i > 0) {
+ const page_id_t page_id(space->id, i);
+
block = buf_page_create(
- space, i, zip_size, mtr);
- buf_page_get(space, zip_size, i,
- RW_X_LATCH, mtr);
- buf_block_dbg_add_level(block,
- SYNC_FSP_PAGE);
+ page_id, page_size, mtr);
+
+ buf_page_get(
+ page_id, page_size, RW_SX_LATCH, mtr);
+
+ buf_block_dbg_add_level(block, SYNC_FSP_PAGE);
- fsp_init_file_page(block, mtr);
+ fsp_init_file_page(space, block, mtr);
mlog_write_ulint(buf_block_get_frame(block)
+ FIL_PAGE_TYPE,
FIL_PAGE_TYPE_XDES,
@@ -1140,28 +1114,50 @@ fsp_fill_free_list(
/* Initialize the ibuf bitmap page in a separate
mini-transaction because it is low in the latching
- order, and we must be able to release its latch
- before returning from the fsp routine */
+ order, and we must be able to release its latch.
+ Note: Insert-Buffering is disabled for tables that
+ reside in the temp-tablespace. */
+ if (space->purpose != FIL_TYPE_TEMPORARY) {
+ mtr_t ibuf_mtr;
+
+ mtr_start(&ibuf_mtr);
+ ibuf_mtr.set_named_space(space);
+
+ /* Avoid logging while truncate table
+ fix-up is active. */
+ if (srv_is_tablespace_truncated(space->id)) {
+ mtr_set_log_mode(
+ &ibuf_mtr, MTR_LOG_NO_REDO);
+ }
+
+ const page_id_t page_id(
+ space->id,
+ i + FSP_IBUF_BITMAP_OFFSET);
- mtr_start(&ibuf_mtr);
+ block = buf_page_create(
+ page_id, page_size, &ibuf_mtr);
+
+ buf_page_get(
+ page_id, page_size, RW_SX_LATCH,
+ &ibuf_mtr);
- block = buf_page_create(space,
- i + FSP_IBUF_BITMAP_OFFSET,
- zip_size, &ibuf_mtr);
- buf_page_get(space, zip_size,
- i + FSP_IBUF_BITMAP_OFFSET,
- RW_X_LATCH, &ibuf_mtr);
- buf_block_dbg_add_level(block, SYNC_FSP_PAGE);
+ buf_block_dbg_add_level(block, SYNC_FSP_PAGE);
- fsp_init_file_page(block, &ibuf_mtr);
+ fsp_init_file_page(space, block, &ibuf_mtr);
- ibuf_bitmap_page_init(block, &ibuf_mtr);
+ ibuf_bitmap_page_init(block, &ibuf_mtr);
- mtr_commit(&ibuf_mtr);
+ mtr_commit(&ibuf_mtr);
+ }
}
- descr = xdes_get_descriptor_with_space_hdr(header, space, i,
- mtr);
+ buf_block_t* desc_block = NULL;
+ descr = xdes_get_descriptor_with_space_hdr(
+ header, space, i, mtr, init_space, &desc_block);
+ if (desc_block != NULL) {
+ fil_block_check_type(
+ *desc_block, FIL_PAGE_TYPE_XDES, mtr);
+ }
xdes_init(descr, mtr);
if (UNIV_UNLIKELY(init_xdes)) {
@@ -1177,8 +1173,8 @@ fsp_fill_free_list(
flst_add_last(header + FSP_FREE_FRAG,
descr + XDES_FLST_NODE, mtr);
- frag_n_used = mtr_read_ulint(header + FSP_FRAG_N_USED,
- MLOG_4BYTES, mtr);
+ frag_n_used = mach_read_from_4(
+ header + FSP_FRAG_N_USED);
mlog_write_ulint(header + FSP_FRAG_N_USED,
frag_n_used + 2, MLOG_4BYTES, mtr);
} else {
@@ -1189,32 +1185,38 @@ fsp_fill_free_list(
i += FSP_EXTENT_SIZE;
}
+
+ space->free_len += count;
}
-/**********************************************************************//**
-Allocates a new free extent.
-@return extent descriptor, NULL if cannot be allocated */
+/** Allocates a new free extent.
+@param[in,out] space tablespace
+@param[in] page_size page size
+@param[in] hint hint of which extent would be desirable: any
+page offset in the extent goes; the hint must not be > FSP_FREE_LIMIT
+@param[in,out] mtr mini-transaction
+@return extent descriptor, NULL if cannot be allocated */
static
xdes_t*
fsp_alloc_free_extent(
-/*==================*/
- ulint space, /*!< in: space id */
- ulint zip_size,/*!< in: compressed page size in bytes
- or 0 for uncompressed pages */
- ulint hint, /*!< in: hint of which extent would be desirable: any
- page offset in the extent goes; the hint must not
- be > FSP_FREE_LIMIT */
- mtr_t* mtr) /*!< in/out: mini-transaction */
+ fil_space_t* space,
+ const page_size_t& page_size,
+ ulint hint,
+ mtr_t* mtr)
{
fsp_header_t* header;
fil_addr_t first;
xdes_t* descr;
+ buf_block_t* desc_block = NULL;
- ut_ad(mtr);
+ header = fsp_get_space_header(space, page_size, mtr);
- header = fsp_get_space_header(space, zip_size, mtr);
+ descr = xdes_get_descriptor_with_space_hdr(
+ header, space, hint, mtr, false, &desc_block);
- descr = xdes_get_descriptor_with_space_hdr(header, space, hint, mtr);
+ if (desc_block != NULL) {
+ fil_block_check_type(*desc_block, FIL_PAGE_TYPE_XDES, mtr);
+ }
if (descr && (xdes_get_state(descr, mtr) == XDES_FREE)) {
/* Ok, we can take this extent */
@@ -1223,7 +1225,7 @@ fsp_alloc_free_extent(
first = flst_get_first(header + FSP_FREE, mtr);
if (fil_addr_is_null(first)) {
- fsp_fill_free_list(FALSE, space, header, mtr);
+ fsp_fill_free_list(false, space, header, mtr);
first = flst_get_first(header + FSP_FREE, mtr);
}
@@ -1233,10 +1235,12 @@ fsp_alloc_free_extent(
return(NULL); /* No free extents left */
}
- descr = xdes_lst_get_descriptor(space, zip_size, first, mtr);
+ descr = xdes_lst_get_descriptor(
+ space, page_size, first, mtr);
}
flst_remove(header + FSP_FREE, descr + XDES_FLST_NODE, mtr);
+ space->free_len--;
return(descr);
}
@@ -1259,8 +1263,7 @@ fsp_alloc_from_free_frag(
xdes_set_bit(descr, XDES_FREE_BIT, bit, FALSE, mtr);
/* Update the FRAG_N_USED field */
- frag_n_used = mtr_read_ulint(header + FSP_FRAG_N_USED, MLOG_4BYTES,
- mtr);
+ frag_n_used = mach_read_from_4(header + FSP_FRAG_N_USED);
frag_n_used++;
mlog_write_ulint(header + FSP_FRAG_N_USED, frag_n_used, MLOG_4BYTES,
mtr);
@@ -1278,83 +1281,96 @@ fsp_alloc_from_free_frag(
}
}
-/**********************************************************************//**
-Gets a buffer block for an allocated page.
-
+/** Gets a buffer block for an allocated page.
NOTE: If init_mtr != mtr, the block will only be initialized if it was
not previously x-latched. It is assumed that the block has been
x-latched only by mtr, and freed in mtr in that case.
-
+@param[in,out] space tablespace
+@param[in] offset page number of the allocated page
+@param[in] page_size page size of the allocated page
+@param[in] rw_latch RW_SX_LATCH, RW_X_LATCH
+@param[in,out] mtr mini-transaction of the allocation
+@param[in,out] init_mtr mini-transaction for initializing the page
@return block, initialized if init_mtr==mtr
or rw_lock_x_lock_count(&block->lock) == 1 */
static
buf_block_t*
fsp_page_create(
-/*============*/
- ulint space, /*!< in: space id of the allocated page */
- ulint zip_size, /*!< in: compressed page size in bytes
- or 0 for uncompressed pages */
- ulint page_no, /*!< in: page number of the allocated page */
- mtr_t* mtr, /*!< in: mini-transaction of the allocation */
- mtr_t* init_mtr) /*!< in: mini-transaction for initializing
- the page */
+ fil_space_t* space,
+ page_no_t offset,
+ const page_size_t& page_size,
+ rw_lock_type_t rw_latch,
+ mtr_t* mtr,
+ mtr_t* init_mtr)
{
- buf_block_t* block
- = buf_page_create(space, page_no, zip_size, init_mtr);
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX)
- == rw_lock_own(&block->lock, RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
+ ut_ad(page_size.equals_to(page_size_t(space->flags)));
+
+ buf_block_t* block = buf_page_create(page_id_t(space->id, offset),
+ page_size, init_mtr);
+
+ ut_d(bool latched = mtr_memo_contains_flagged(mtr, block,
+ MTR_MEMO_PAGE_X_FIX
+ | MTR_MEMO_PAGE_SX_FIX));
+
+ ut_ad(rw_latch == RW_X_LATCH || rw_latch == RW_SX_LATCH);
/* Mimic buf_page_get(), but avoid the buf_pool->page_hash lookup. */
- rw_lock_x_lock(&block->lock);
- mutex_enter(&block->mutex);
+ if (rw_latch == RW_X_LATCH) {
+ rw_lock_x_lock(&block->lock);
+ } else {
+ rw_lock_sx_lock(&block->lock);
+ }
+
buf_block_buf_fix_inc(block, __FILE__, __LINE__);
- mutex_exit(&block->mutex);
- mtr_memo_push(init_mtr, block, MTR_MEMO_PAGE_X_FIX);
+ mtr_memo_push(init_mtr, block, rw_latch == RW_X_LATCH
+ ? MTR_MEMO_PAGE_X_FIX : MTR_MEMO_PAGE_SX_FIX);
if (init_mtr == mtr
- || rw_lock_get_x_lock_count(&block->lock) == 1) {
+ || (rw_latch == RW_X_LATCH
+ ? rw_lock_get_x_lock_count(&block->lock) == 1
+ : rw_lock_get_sx_lock_count(&block->lock) == 1)) {
/* Initialize the page, unless it was already
- X-latched in mtr. (In this case, we would want to
+ SX-latched in mtr. (In this case, we would want to
allocate another page that has not been freed in mtr.) */
- ut_ad(init_mtr == mtr
- || !mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
-
- fsp_init_file_page(block, init_mtr);
+ ut_ad(init_mtr == mtr || !latched);
+ fsp_init_file_page(space, block, init_mtr);
}
return(block);
}
-/**********************************************************************//**
-Allocates a single free page from a space. The page is marked as used.
-@retval NULL if no page could be allocated
-@retval block, rw_lock_x_lock_count(&block->lock) == 1 if allocation succeeded
+/** Allocates a single free page from a space.
+The page is marked as used.
+@param[in,out] space tablespace
+@param[in] page_size page size
+@param[in] hint hint of which page would be desirable
+@param[in] rw_latch RW_SX_LATCH, RW_X_LATCH
+@param[in,out] mtr mini-transaction
+@param[in,out] init_mtr mini-transaction in which the page should be
+initialized (may be the same as mtr)
+@retval NULL if no page could be allocated
+@retval block rw_lock_x_lock_count(&block->lock) == 1 if allocation succeeded
(init_mtr == mtr, or the page was not previously freed in mtr)
-@retval block (not allocated or initialized) otherwise */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
+@retval block (not allocated or initialized) otherwise */
+static MY_ATTRIBUTE((warn_unused_result, nonnull))
buf_block_t*
fsp_alloc_free_page(
-/*================*/
- ulint space, /*!< in: space id */
- ulint zip_size,/*!< in: compressed page size in bytes
- or 0 for uncompressed pages */
- ulint hint, /*!< in: hint of which page would be desirable */
- mtr_t* mtr, /*!< in/out: mini-transaction */
- mtr_t* init_mtr)/*!< in/out: mini-transaction in which the
- page should be initialized
- (may be the same as mtr) */
+ fil_space_t* space,
+ const page_size_t& page_size,
+ ulint hint,
+ rw_lock_type_t rw_latch,
+ mtr_t* mtr,
+ mtr_t* init_mtr)
{
fsp_header_t* header;
fil_addr_t first;
xdes_t* descr;
ulint free;
- ulint page_no;
- ulint space_size;
+ const ulint space_id = space->id;
- header = fsp_get_space_header(space, zip_size, mtr);
+ ut_d(space->modify_check(*mtr));
+ header = fsp_get_space_header(space, page_size, mtr);
/* Get the hinted descriptor */
descr = xdes_get_descriptor_with_space_hdr(header, space, hint, mtr);
@@ -1373,7 +1389,7 @@ fsp_alloc_free_page(
FREE_FRAG list. But we will allocate our page from the
the free extent anyway. */
- descr = fsp_alloc_free_extent(space, zip_size,
+ descr = fsp_alloc_free_extent(space, page_size,
hint, mtr);
if (descr == NULL) {
@@ -1386,7 +1402,7 @@ fsp_alloc_free_page(
flst_add_last(header + FSP_FREE_FRAG,
descr + XDES_FLST_NODE, mtr);
} else {
- descr = xdes_lst_get_descriptor(space, zip_size,
+ descr = xdes_lst_get_descriptor(space, page_size,
first, mtr);
}
@@ -1407,25 +1423,26 @@ fsp_alloc_free_page(
ut_error;
}
- page_no = xdes_get_offset(descr) + free;
+ page_no_t page_no = xdes_get_offset(descr) + free;
- space_size = mtr_read_ulint(header + FSP_SIZE, MLOG_4BYTES, mtr);
+ page_no_t space_size = mach_read_from_4(header + FSP_SIZE);
+ ut_ad(space_size == space->size_in_header
+ || (space_id == TRX_SYS_SPACE
+ && srv_startup_is_before_trx_rollback_phase));
if (space_size <= page_no) {
/* It must be that we are extending a single-table tablespace
whose size is still < 64 pages */
- ut_a(space != 0);
+ ut_a(!is_system_tablespace(space_id));
if (page_no >= FSP_EXTENT_SIZE) {
- fprintf(stderr,
- "InnoDB: Error: trying to extend a"
- " single-table tablespace %lu\n"
- "InnoDB: by single page(s) though the"
- " space size %lu. Page no %lu.\n",
- (ulong) space, (ulong) space_size,
- (ulong) page_no);
+ ib::error() << "Trying to extend a single-table"
+ " tablespace " << space << " , by single"
+ " page(s) though the space size " << space_size
+ << ". Page no " << page_no << ".";
return(NULL);
}
+
if (!fsp_try_extend_data_file_with_pages(space, page_no,
header, mtr)) {
/* No disk space left */
@@ -1434,20 +1451,23 @@ fsp_alloc_free_page(
}
fsp_alloc_from_free_frag(header, descr, free, mtr);
- return(fsp_page_create(space, zip_size, page_no, mtr, init_mtr));
+ return(fsp_page_create(space, page_no, page_size, rw_latch,
+ mtr, init_mtr));
}
-/**********************************************************************//**
-Frees a single page of a space. The page is marked as free and clean. */
+/** Frees a single page of a space.
+The page is marked as free and clean.
+@param[in,out] space tablespace
+@param[in] page_id page id
+@param[in] page_size page size
+@param[in,out] mtr mini-transaction */
static
void
fsp_free_page(
-/*==========*/
- ulint space, /*!< in: space id */
- ulint zip_size,/*!< in: compressed page size in bytes
- or 0 for uncompressed pages */
- ulint page, /*!< in: page offset */
- mtr_t* mtr) /*!< in/out: mini-transaction */
+ fil_space_t* space,
+ ulint offset,
+ const page_size_t& page_size,
+ mtr_t* mtr)
{
fsp_header_t* header;
xdes_t* descr;
@@ -1455,24 +1475,22 @@ fsp_free_page(
ulint frag_n_used;
ut_ad(mtr);
+ ut_d(space->modify_check(*mtr));
/* fprintf(stderr, "Freeing page %lu in space %lu\n", page, space); */
- header = fsp_get_space_header(space, zip_size, mtr);
+ header = fsp_get_space_header(space, page_size, mtr);
- descr = xdes_get_descriptor_with_space_hdr(header, space, page, mtr);
+ descr = xdes_get_descriptor_with_space_hdr(
+ header, space, offset, mtr);
state = xdes_get_state(descr, mtr);
- if (state != XDES_FREE_FRAG && state != XDES_FULL_FRAG) {
- fprintf(stderr,
- "InnoDB: Error: File space extent descriptor"
- " of page %lu has state %lu\n",
- (ulong) page,
- (ulong) state);
- fputs("InnoDB: Dump of descriptor: ", stderr);
- ut_print_buf(stderr, ((byte*) descr) - 50, 200);
- putc('\n', stderr);
+ if (UNIV_UNLIKELY(state != XDES_FREE_FRAG
+ && state != XDES_FULL_FRAG)) {
+ ib::error() << "File space extent descriptor of page "
+ << page_id_t(space->id, offset)
+ << " has state " << state;
/* Crash in debug version, so that we get a core dump
of this corruption. */
ut_ad(0);
@@ -1488,14 +1506,11 @@ fsp_free_page(
}
if (xdes_mtr_get_bit(descr, XDES_FREE_BIT,
- page % FSP_EXTENT_SIZE, mtr)) {
+ offset % FSP_EXTENT_SIZE, mtr)) {
- fprintf(stderr,
- "InnoDB: Error: File space extent descriptor"
- " of page %lu says it is free\n"
- "InnoDB: Dump of descriptor: ", (ulong) page);
- ut_print_buf(stderr, ((byte*) descr) - 50, 200);
- putc('\n', stderr);
+ ib::error() << "File space extent descriptor of page "
+ << page_id_t(space->id, offset)
+ << " says it is free.";
/* Crash in debug version, so that we get a core dump
of this corruption. */
ut_ad(0);
@@ -1506,8 +1521,11 @@ fsp_free_page(
return;
}
- xdes_set_bit(descr, XDES_FREE_BIT, page % FSP_EXTENT_SIZE, TRUE, mtr);
- xdes_set_bit(descr, XDES_CLEAN_BIT, page % FSP_EXTENT_SIZE, TRUE, mtr);
+ const ulint bit = offset % FSP_EXTENT_SIZE;
+
+ xdes_set_bit(descr, XDES_FREE_BIT, bit, TRUE, mtr);
+ /* xdes_init() should have set all XDES_CLEAN_BIT */
+ ut_ad(xdes_get_bit(descr, XDES_CLEAN_BIT, bit));
frag_n_used = mtr_read_ulint(header + FSP_FRAG_N_USED, MLOG_4BYTES,
mtr);
@@ -1531,84 +1549,80 @@ fsp_free_page(
/* The extent has become free: move it to another list */
flst_remove(header + FSP_FREE_FRAG, descr + XDES_FLST_NODE,
mtr);
- fsp_free_extent(space, zip_size, page, mtr);
+ fsp_free_extent(space, offset, page_size, mtr);
}
-
- mtr->n_freed_pages++;
}
-/**********************************************************************//**
-Returns an extent to the free list of a space. */
+/** Return an extent to the free list of a space.
+@param[in,out] space tablespace
+@param[in] offset page number in the extent
+@param[in] page_size page size
+@param[in,out] mtr mini-transaction */
static
void
fsp_free_extent(
-/*============*/
- ulint space, /*!< in: space id */
- ulint zip_size,/*!< in: compressed page size in bytes
- or 0 for uncompressed pages */
- ulint page, /*!< in: page offset in the extent */
- mtr_t* mtr) /*!< in/out: mini-transaction */
+ fil_space_t* space,
+ page_no_t offset,
+ const page_size_t& page_size,
+ mtr_t* mtr)
{
fsp_header_t* header;
xdes_t* descr;
- ut_ad(mtr);
-
- header = fsp_get_space_header(space, zip_size, mtr);
+ ut_ad(mtr_memo_contains(mtr, &space->latch, MTR_MEMO_X_LOCK));
- descr = xdes_get_descriptor_with_space_hdr(header, space, page, mtr);
+ header = fsp_get_space_header(space, page_size, mtr);
- if (xdes_get_state(descr, mtr) == XDES_FREE) {
+ descr = xdes_get_descriptor_with_space_hdr(
+ header, space, offset, mtr);
- ut_print_buf(stderr, (byte*) descr - 500, 1000);
- putc('\n', stderr);
-
- ut_error;
- }
+ ut_a(xdes_get_state(descr, mtr) != XDES_FREE);
xdes_init(descr, mtr);
flst_add_last(header + FSP_FREE, descr + XDES_FLST_NODE, mtr);
+ space->free_len++;
}
-/**********************************************************************//**
-Returns the nth inode slot on an inode page.
-@return segment inode */
+/** Returns the nth inode slot on an inode page.
+@param[in] page segment inode page
+@param[in] i inode index on page
+@param[in] page_size page size
+@param[in,out] mtr mini-transaction
+@return segment inode */
UNIV_INLINE
fseg_inode_t*
fsp_seg_inode_page_get_nth_inode(
-/*=============================*/
- page_t* page, /*!< in: segment inode page */
- ulint i, /*!< in: inode index on page */
- ulint zip_size MY_ATTRIBUTE((unused)),
- /*!< in: compressed page size, or 0 */
- mtr_t* mtr MY_ATTRIBUTE((unused)))
- /*!< in/out: mini-transaction */
+ page_t* page,
+ ulint i,
+ const page_size_t& page_size,
+ mtr_t* mtr)
{
- ut_ad(i < FSP_SEG_INODES_PER_PAGE(zip_size));
- ut_ad(mtr_memo_contains_page(mtr, page, MTR_MEMO_PAGE_X_FIX));
+ ut_ad(i < FSP_SEG_INODES_PER_PAGE(page_size));
+ ut_ad(mtr_memo_contains_page(mtr, page, MTR_MEMO_PAGE_SX_FIX));
return(page + FSEG_ARR_OFFSET + FSEG_INODE_SIZE * i);
}
-/**********************************************************************//**
-Looks for a used segment inode on a segment inode page.
-@return segment inode index, or ULINT_UNDEFINED if not found */
+/** Looks for a used segment inode on a segment inode page.
+@param[in] page segment inode page
+@param[in] page_size page size
+@param[in,out] mtr mini-transaction
+@return segment inode index, or ULINT_UNDEFINED if not found */
static
ulint
fsp_seg_inode_page_find_used(
-/*=========================*/
- page_t* page, /*!< in: segment inode page */
- ulint zip_size,/*!< in: compressed page size, or 0 */
- mtr_t* mtr) /*!< in/out: mini-transaction */
+ page_t* page,
+ const page_size_t& page_size,
+ mtr_t* mtr)
{
ulint i;
fseg_inode_t* inode;
- for (i = 0; i < FSP_SEG_INODES_PER_PAGE(zip_size); i++) {
+ for (i = 0; i < FSP_SEG_INODES_PER_PAGE(page_size); i++) {
inode = fsp_seg_inode_page_get_nth_inode(
- page, i, zip_size, mtr);
+ page, i, page_size, mtr);
if (mach_read_from_8(inode + FSEG_ID)) {
/* This is used */
@@ -1622,24 +1636,26 @@ fsp_seg_inode_page_find_used(
return(ULINT_UNDEFINED);
}
-/**********************************************************************//**
-Looks for an unused segment inode on a segment inode page.
-@return segment inode index, or ULINT_UNDEFINED if not found */
+/** Looks for an unused segment inode on a segment inode page.
+@param[in] page segment inode page
+@param[in] i search forward starting from this index
+@param[in] page_size page size
+@param[in,out] mtr mini-transaction
+@return segment inode index, or ULINT_UNDEFINED if not found */
static
ulint
fsp_seg_inode_page_find_free(
-/*=========================*/
- page_t* page, /*!< in: segment inode page */
- ulint i, /*!< in: search forward starting from this index */
- ulint zip_size,/*!< in: compressed page size, or 0 */
- mtr_t* mtr) /*!< in/out: mini-transaction */
+ page_t* page,
+ ulint i,
+ const page_size_t& page_size,
+ mtr_t* mtr)
{
- for (; i < FSP_SEG_INODES_PER_PAGE(zip_size); i++) {
+ for (; i < FSP_SEG_INODES_PER_PAGE(page_size); i++) {
fseg_inode_t* inode;
inode = fsp_seg_inode_page_get_nth_inode(
- page, i, zip_size, mtr);
+ page, i, page_size, mtr);
if (!mach_read_from_8(inode + FSEG_ID)) {
/* This is unused */
@@ -1653,110 +1669,100 @@ fsp_seg_inode_page_find_free(
return(ULINT_UNDEFINED);
}
-/**********************************************************************//**
-Allocates a new file segment inode page.
-@return TRUE if could be allocated */
+/** Allocate a file segment inode page.
+@param[in,out] space tablespace
+@param[in,out] space_header tablespace header
+@param[in,out] mtr mini-transaction
+@return whether the allocation succeeded */
+MY_ATTRIBUTE((nonnull, warn_unused_result))
static
-ibool
+bool
fsp_alloc_seg_inode_page(
-/*=====================*/
- fsp_header_t* space_header, /*!< in: space header */
- mtr_t* mtr) /*!< in/out: mini-transaction */
+ fil_space_t* space,
+ fsp_header_t* space_header,
+ mtr_t* mtr)
{
- fseg_inode_t* inode;
buf_block_t* block;
- page_t* page;
- ulint space;
- ulint zip_size;
ut_ad(page_offset(space_header) == FSP_HEADER_OFFSET);
+ ut_ad(page_get_space_id(page_align(space_header)) == space->id);
- space = page_get_space_id(page_align(space_header));
+ const page_size_t page_size(space->flags);
- zip_size = fsp_flags_get_zip_size(
- mach_read_from_4(FSP_SPACE_FLAGS + space_header));
-
- block = fsp_alloc_free_page(space, zip_size, 0, mtr, mtr);
+ block = fsp_alloc_free_page(
+ space, page_size, 0, RW_SX_LATCH, mtr, mtr);
if (block == NULL) {
- return(FALSE);
+ return(false);
}
buf_block_dbg_add_level(block, SYNC_FSP_PAGE);
- ut_ad(rw_lock_get_x_lock_count(&block->lock) == 1);
-
- block->check_index_page_at_flush = FALSE;
-
- page = buf_block_get_frame(block);
+ ut_ad(rw_lock_get_sx_lock_count(&block->lock) == 1);
- mlog_write_ulint(page + FIL_PAGE_TYPE, FIL_PAGE_INODE,
+ mlog_write_ulint(block->frame + FIL_PAGE_TYPE, FIL_PAGE_INODE,
MLOG_2BYTES, mtr);
- for (ulint i = 0; i < FSP_SEG_INODES_PER_PAGE(zip_size); i++) {
-
- inode = fsp_seg_inode_page_get_nth_inode(
- page, i, zip_size, mtr);
-
- mlog_write_ull(inode + FSEG_ID, 0, mtr);
+#ifdef UNIV_DEBUG
+ const byte* inode = FSEG_ID + FSEG_ARR_OFFSET + block->frame;
+ for (ulint i = FSP_SEG_INODES_PER_PAGE(page_size); i--;
+ inode += FSEG_INODE_SIZE) {
+ ut_ad(!mach_read_from_8(inode));
}
+#endif
flst_add_last(
space_header + FSP_SEG_INODES_FREE,
- page + FSEG_INODE_PAGE_NODE, mtr);
+ block->frame + FSEG_INODE_PAGE_NODE, mtr);
- return(TRUE);
+ return(true);
}
-/**********************************************************************//**
-Allocates a new file segment inode.
-@return segment inode, or NULL if not enough space */
+/** Allocate a file segment inode.
+@param[in,out] space tablespace
+@param[in,out] space_header tablespace header
+@param[in,out] mtr mini-transaction
+@return segment inode
+@retval NULL if not enough space */
+MY_ATTRIBUTE((nonnull, warn_unused_result))
static
fseg_inode_t*
fsp_alloc_seg_inode(
-/*================*/
- fsp_header_t* space_header, /*!< in: space header */
- mtr_t* mtr) /*!< in/out: mini-transaction */
+ fil_space_t* space,
+ fsp_header_t* space_header,
+ mtr_t* mtr)
{
- ulint page_no;
buf_block_t* block;
page_t* page;
fseg_inode_t* inode;
- ibool success;
- ulint zip_size;
ulint n;
ut_ad(page_offset(space_header) == FSP_HEADER_OFFSET);
- if (flst_get_len(space_header + FSP_SEG_INODES_FREE, mtr) == 0) {
- /* Allocate a new segment inode page */
-
- success = fsp_alloc_seg_inode_page(space_header, mtr);
-
- if (!success) {
-
- return(NULL);
- }
+ /* Allocate a new segment inode page if needed. */
+ if (flst_get_len(space_header + FSP_SEG_INODES_FREE) == 0
+ && !fsp_alloc_seg_inode_page(space, space_header, mtr)) {
+ return(NULL);
}
+ const page_size_t page_size(space->flags);
+ const page_id_t page_id(
+ space->id,
+ flst_get_first(space_header + FSP_SEG_INODES_FREE, mtr).page);
- page_no = flst_get_first(space_header + FSP_SEG_INODES_FREE, mtr).page;
-
- zip_size = fsp_flags_get_zip_size(
- mach_read_from_4(FSP_SPACE_FLAGS + space_header));
- block = buf_page_get(page_get_space_id(page_align(space_header)),
- zip_size, page_no, RW_X_LATCH, mtr);
+ block = buf_page_get(page_id, page_size, RW_SX_LATCH, mtr);
buf_block_dbg_add_level(block, SYNC_FSP_PAGE);
+ fil_block_check_type(*block, FIL_PAGE_INODE, mtr);
page = buf_block_get_frame(block);
- n = fsp_seg_inode_page_find_free(page, 0, zip_size, mtr);
+ n = fsp_seg_inode_page_find_free(page, 0, page_size, mtr);
ut_a(n != ULINT_UNDEFINED);
- inode = fsp_seg_inode_page_get_nth_inode(page, n, zip_size, mtr);
+ inode = fsp_seg_inode_page_get_nth_inode(page, n, page_size, mtr);
if (ULINT_UNDEFINED == fsp_seg_inode_page_find_free(page, n + 1,
- zip_size, mtr)) {
+ page_size, mtr)) {
/* There are no other unused headers left on the page: move it
to another list */
@@ -1772,29 +1778,32 @@ fsp_alloc_seg_inode(
return(inode);
}
-/**********************************************************************//**
-Frees a file segment inode. */
+/** Frees a file segment inode.
+@param[in,out] space tablespace
+@param[in] page_size page size
+@param[in,out] inode segment inode
+@param[in,out] mtr mini-transaction */
static
void
fsp_free_seg_inode(
-/*===============*/
- ulint space, /*!< in: space id */
- ulint zip_size,/*!< in: compressed page size in bytes
- or 0 for uncompressed pages */
- fseg_inode_t* inode, /*!< in: segment inode */
- mtr_t* mtr) /*!< in/out: mini-transaction */
+ fil_space_t* space,
+ const page_size_t& page_size,
+ fseg_inode_t* inode,
+ mtr_t* mtr)
{
page_t* page;
fsp_header_t* space_header;
+ ut_d(space->modify_check(*mtr));
+
page = page_align(inode);
- space_header = fsp_get_space_header(space, zip_size, mtr);
+ space_header = fsp_get_space_header(space, page_size, mtr);
ut_ad(mach_read_from_4(inode + FSEG_MAGIC_N) == FSEG_MAGIC_N_VALUE);
if (ULINT_UNDEFINED
- == fsp_seg_inode_page_find_free(page, 0, zip_size, mtr)) {
+ == fsp_seg_inode_page_find_free(page, 0, page_size, mtr)) {
/* Move the page to another list */
@@ -1809,29 +1818,32 @@ fsp_free_seg_inode(
mlog_write_ulint(inode + FSEG_MAGIC_N, 0xfa051ce3, MLOG_4BYTES, mtr);
if (ULINT_UNDEFINED
- == fsp_seg_inode_page_find_used(page, zip_size, mtr)) {
+ == fsp_seg_inode_page_find_used(page, page_size, mtr)) {
/* There are no other used headers left on the page: free it */
flst_remove(space_header + FSP_SEG_INODES_FREE,
page + FSEG_INODE_PAGE_NODE, mtr);
- fsp_free_page(space, zip_size, page_get_page_no(page), mtr);
+ fsp_free_page(space, page_get_page_no(page), page_size, mtr);
}
}
-/**********************************************************************//**
-Returns the file segment inode, page x-latched.
-@return segment inode, page x-latched; NULL if the inode is free */
+/** Returns the file segment inode, page x-latched.
+@param[in] header segment header
+@param[in] space space id
+@param[in] page_size page size
+@param[in,out] mtr mini-transaction
+@param[out] block inode block, or NULL to ignore
+@return segment inode, page x-latched; NULL if the inode is free */
static
fseg_inode_t*
fseg_inode_try_get(
-/*===============*/
- fseg_header_t* header, /*!< in: segment header */
- ulint space, /*!< in: space id */
- ulint zip_size,/*!< in: compressed page size in bytes
- or 0 for uncompressed pages */
- mtr_t* mtr) /*!< in/out: mini-transaction */
+ fseg_header_t* header,
+ ulint space,
+ const page_size_t& page_size,
+ mtr_t* mtr,
+ buf_block_t** block)
{
fil_addr_t inode_addr;
fseg_inode_t* inode;
@@ -1840,7 +1852,8 @@ fseg_inode_try_get(
inode_addr.boffset = mach_read_from_2(header + FSEG_HDR_OFFSET);
ut_ad(space == mach_read_from_4(header + FSEG_HDR_SPACE));
- inode = fut_get_ptr(space, zip_size, inode_addr, RW_X_LATCH, mtr);
+ inode = fut_get_ptr(space, page_size, inode_addr, RW_SX_LATCH, mtr,
+ block);
if (UNIV_UNLIKELY(!mach_read_from_8(inode + FSEG_ID))) {
@@ -1853,28 +1866,31 @@ fseg_inode_try_get(
return(inode);
}
-/**********************************************************************//**
-Returns the file segment inode, page x-latched.
-@return segment inode, page x-latched */
+/** Returns the file segment inode, page x-latched.
+@param[in] header segment header
+@param[in] space space id
+@param[in] page_size page size
+@param[in,out] mtr mini-transaction
+@param[out] block inode block
+@return segment inode, page x-latched */
static
fseg_inode_t*
fseg_inode_get(
-/*===========*/
- fseg_header_t* header, /*!< in: segment header */
- ulint space, /*!< in: space id */
- ulint zip_size,/*!< in: compressed page size in bytes
- or 0 for uncompressed pages */
- mtr_t* mtr) /*!< in/out: mini-transaction */
+ fseg_header_t* header,
+ ulint space,
+ const page_size_t& page_size,
+ mtr_t* mtr,
+ buf_block_t** block = NULL)
{
fseg_inode_t* inode
- = fseg_inode_try_get(header, space, zip_size, mtr);
+ = fseg_inode_try_get(header, space, page_size, mtr, block);
ut_a(inode);
return(inode);
}
/**********************************************************************//**
Gets the page number from the nth fragment page slot.
-@return page number, FIL_NULL if not in use */
+@return page number, FIL_NULL if not in use */
UNIV_INLINE
ulint
fseg_get_nth_frag_page_no(
@@ -1886,7 +1902,7 @@ fseg_get_nth_frag_page_no(
{
ut_ad(inode && mtr);
ut_ad(n < FSEG_FRAG_ARR_N_SLOTS);
- ut_ad(mtr_memo_contains_page(mtr, inode, MTR_MEMO_PAGE_X_FIX));
+ ut_ad(mtr_memo_contains_page(mtr, inode, MTR_MEMO_PAGE_SX_FIX));
ut_ad(mach_read_from_4(inode + FSEG_MAGIC_N) == FSEG_MAGIC_N_VALUE);
return(mach_read_from_4(inode + FSEG_FRAG_ARR
+ n * FSEG_FRAG_SLOT_SIZE));
@@ -1905,7 +1921,7 @@ fseg_set_nth_frag_page_no(
{
ut_ad(inode && mtr);
ut_ad(n < FSEG_FRAG_ARR_N_SLOTS);
- ut_ad(mtr_memo_contains_page(mtr, inode, MTR_MEMO_PAGE_X_FIX));
+ ut_ad(mtr_memo_contains_page(mtr, inode, MTR_MEMO_PAGE_SX_FIX));
ut_ad(mach_read_from_4(inode + FSEG_MAGIC_N) == FSEG_MAGIC_N_VALUE);
mlog_write_ulint(inode + FSEG_FRAG_ARR + n * FSEG_FRAG_SLOT_SIZE,
@@ -1914,7 +1930,7 @@ fseg_set_nth_frag_page_no(
/**********************************************************************//**
Finds a fragment page slot which is free.
-@return slot index; ULINT_UNDEFINED if none found */
+@return slot index; ULINT_UNDEFINED if none found */
static
ulint
fseg_find_free_frag_page_slot(
@@ -1941,7 +1957,7 @@ fseg_find_free_frag_page_slot(
/**********************************************************************//**
Finds a fragment page slot which is used and last in the array.
-@return slot index; ULINT_UNDEFINED if none found */
+@return slot index; ULINT_UNDEFINED if none found */
static
ulint
fseg_find_last_used_frag_page_slot(
@@ -1969,7 +1985,7 @@ fseg_find_last_used_frag_page_slot(
/**********************************************************************//**
Calculates reserved fragment page slots.
-@return number of fragment pages */
+@return number of fragment pages */
static
ulint
fseg_get_n_frag_pages(
@@ -1995,11 +2011,10 @@ fseg_get_n_frag_pages(
Creates a new segment.
@return the block where the segment header is placed, x-latched, NULL
if could not create segment because of lack of space */
-UNIV_INTERN
buf_block_t*
fseg_create_general(
/*================*/
- ulint space, /*!< in: space id */
+ ulint space_id,/*!< in: space id */
ulint page, /*!< in: page where the segment header is placed: if
this is != 0, the page must belong to another segment,
if this is 0, a new page will be allocated and it
@@ -2014,43 +2029,47 @@ fseg_create_general(
operation */
mtr_t* mtr) /*!< in/out: mini-transaction */
{
- ulint flags;
- ulint zip_size;
fsp_header_t* space_header;
fseg_inode_t* inode;
ib_id_t seg_id;
buf_block_t* block = 0; /* remove warning */
fseg_header_t* header = 0; /* remove warning */
- rw_lock_t* latch;
- ibool success;
ulint n_reserved;
ulint i;
+ DBUG_ENTER("fseg_create_general");
+
ut_ad(mtr);
ut_ad(byte_offset + FSEG_HEADER_SIZE
<= UNIV_PAGE_SIZE - FIL_PAGE_DATA_END);
- latch = fil_space_get_latch(space, &flags);
- zip_size = fsp_flags_get_zip_size(flags);
+ fil_space_t* space = mtr_x_lock_space(space_id, mtr);
+ const page_size_t page_size(space->flags);
+ ut_d(space->modify_check(*mtr));
if (page != 0) {
- block = buf_page_get(space, zip_size, page, RW_X_LATCH, mtr);
+ block = buf_page_get(page_id_t(space_id, page), page_size,
+ RW_SX_LATCH, mtr);
+
header = byte_offset + buf_block_get_frame(block);
- }
- mtr_x_lock(latch, mtr);
+ const ulint type = space_id == TRX_SYS_SPACE
+ && page == TRX_SYS_PAGE_NO
+ ? FIL_PAGE_TYPE_TRX_SYS
+ : FIL_PAGE_TYPE_SYS;
- if (!has_done_reservation) {
- success = fsp_reserve_free_extents(&n_reserved, space, 2,
- FSP_NORMAL, mtr);
- if (!success) {
- return(NULL);
- }
+ fil_block_check_type(*block, type, mtr);
}
- space_header = fsp_get_space_header(space, zip_size, mtr);
+ if (!has_done_reservation
+ && !fsp_reserve_free_extents(&n_reserved, space_id, 2,
+ FSP_NORMAL, mtr)) {
+ DBUG_RETURN(NULL);
+ }
+
+ space_header = fsp_get_space_header(space, page_size, mtr);
- inode = fsp_alloc_seg_inode(space_header, mtr);
+ inode = fsp_alloc_seg_inode(space, space_header, mtr);
if (inode == NULL) {
goto funct_exit;
@@ -2077,17 +2096,26 @@ fseg_create_general(
}
if (page == 0) {
- block = fseg_alloc_free_page_low(space, zip_size,
- inode, 0, FSP_UP, mtr, mtr);
+ block = fseg_alloc_free_page_low(space, page_size,
+ inode, 0, FSP_UP, RW_SX_LATCH,
+ mtr, mtr
+#ifdef UNIV_DEBUG
+ , has_done_reservation
+#endif /* UNIV_DEBUG */
+ );
+
+ /* The allocation cannot fail if we have already reserved a
+ space for the page. */
+ ut_ad(!has_done_reservation || block != NULL);
if (block == NULL) {
- fsp_free_seg_inode(space, zip_size, inode, mtr);
+ fsp_free_seg_inode(space, page_size, inode, mtr);
goto funct_exit;
}
- ut_ad(rw_lock_get_x_lock_count(&block->lock) == 1);
+ ut_ad(rw_lock_get_sx_lock_count(&block->lock) == 1);
header = byte_offset + buf_block_get_frame(block);
mlog_write_ulint(buf_block_get_frame(block) + FIL_PAGE_TYPE,
@@ -2101,22 +2129,21 @@ fseg_create_general(
page_get_page_no(page_align(inode)),
MLOG_4BYTES, mtr);
- mlog_write_ulint(header + FSEG_HDR_SPACE, space, MLOG_4BYTES, mtr);
+ mlog_write_ulint(header + FSEG_HDR_SPACE, space_id, MLOG_4BYTES, mtr);
funct_exit:
if (!has_done_reservation) {
- fil_space_release_free_extents(space, n_reserved);
+ fil_space_release_free_extents(space_id, n_reserved);
}
- return(block);
+ DBUG_RETURN(block);
}
/**********************************************************************//**
Creates a new segment.
@return the block where the segment header is placed, x-latched, NULL
if could not create segment because of lack of space */
-UNIV_INTERN
buf_block_t*
fseg_create(
/*========*/
@@ -2135,7 +2162,7 @@ fseg_create(
/**********************************************************************//**
Calculates the number of pages reserved by a segment, and how many pages are
currently used.
-@return number of reserved pages */
+@return number of reserved pages */
static
ulint
fseg_n_reserved_pages_low(
@@ -2148,16 +2175,16 @@ fseg_n_reserved_pages_low(
ulint ret;
ut_ad(inode && used && mtr);
- ut_ad(mtr_memo_contains_page(mtr, inode, MTR_MEMO_PAGE_X_FIX));
+ ut_ad(mtr_memo_contains_page(mtr, inode, MTR_MEMO_PAGE_SX_FIX));
- *used = mtr_read_ulint(inode + FSEG_NOT_FULL_N_USED, MLOG_4BYTES, mtr)
- + FSP_EXTENT_SIZE * flst_get_len(inode + FSEG_FULL, mtr)
+ *used = mach_read_from_4(inode + FSEG_NOT_FULL_N_USED)
+ + FSP_EXTENT_SIZE * flst_get_len(inode + FSEG_FULL)
+ fseg_get_n_frag_pages(inode, mtr);
ret = fseg_get_n_frag_pages(inode, mtr)
- + FSP_EXTENT_SIZE * flst_get_len(inode + FSEG_FREE, mtr)
- + FSP_EXTENT_SIZE * flst_get_len(inode + FSEG_NOT_FULL, mtr)
- + FSP_EXTENT_SIZE * flst_get_len(inode + FSEG_FULL, mtr);
+ + FSP_EXTENT_SIZE * flst_get_len(inode + FSEG_FREE)
+ + FSP_EXTENT_SIZE * flst_get_len(inode + FSEG_NOT_FULL)
+ + FSP_EXTENT_SIZE * flst_get_len(inode + FSEG_FULL);
return(ret);
}
@@ -2165,8 +2192,7 @@ fseg_n_reserved_pages_low(
/**********************************************************************//**
Calculates the number of pages reserved by a segment, and how many pages are
currently used.
-@return number of reserved pages */
-UNIV_INTERN
+@return number of reserved pages */
ulint
fseg_n_reserved_pages(
/*==================*/
@@ -2176,40 +2202,39 @@ fseg_n_reserved_pages(
{
ulint ret;
fseg_inode_t* inode;
- ulint space;
- ulint flags;
- ulint zip_size;
- rw_lock_t* latch;
+ ulint space_id;
+ fil_space_t* space;
- space = page_get_space_id(page_align(header));
- latch = fil_space_get_latch(space, &flags);
- zip_size = fsp_flags_get_zip_size(flags);
+ space_id = page_get_space_id(page_align(header));
+ space = mtr_x_lock_space(space_id, mtr);
- mtr_x_lock(latch, mtr);
+ const page_size_t page_size(space->flags);
- inode = fseg_inode_get(header, space, zip_size, mtr);
+ inode = fseg_inode_get(header, space_id, page_size, mtr);
ret = fseg_n_reserved_pages_low(inode, used, mtr);
return(ret);
}
-/*********************************************************************//**
-Tries to fill the free list of a segment with consecutive free extents.
+/** Tries to fill the free list of a segment with consecutive free extents.
This happens if the segment is big enough to allow extents in the free list,
the free list is empty, and the extents can be allocated consecutively from
-the hint onward. */
+the hint onward.
+@param[in] inode segment inode
+@param[in] space tablespace
+@param[in] page_size page size
+@param[in] hint hint which extent would be good as the first
+extent
+@param[in,out] mtr mini-transaction */
static
void
fseg_fill_free_list(
-/*================*/
- fseg_inode_t* inode, /*!< in: segment inode */
- ulint space, /*!< in: space id */
- ulint zip_size,/*!< in: compressed page size in bytes
- or 0 for uncompressed pages */
- ulint hint, /*!< in: hint which extent would be good as
- the first extent */
- mtr_t* mtr) /*!< in/out: mini-transaction */
+ fseg_inode_t* inode,
+ fil_space_t* space,
+ const page_size_t& page_size,
+ ulint hint,
+ mtr_t* mtr)
{
xdes_t* descr;
ulint i;
@@ -2219,6 +2244,7 @@ fseg_fill_free_list(
ut_ad(inode && mtr);
ut_ad(!((page_offset(inode) - FSEG_ARR_OFFSET) % FSEG_INODE_SIZE));
+ ut_d(space->modify_check(*mtr));
reserved = fseg_n_reserved_pages_low(inode, &used, mtr);
@@ -2229,14 +2255,14 @@ fseg_fill_free_list(
return;
}
- if (flst_get_len(inode + FSEG_FREE, mtr) > 0) {
+ if (flst_get_len(inode + FSEG_FREE) > 0) {
/* Free list is not empty */
return;
}
for (i = 0; i < FSEG_FREE_LIST_MAX_LEN; i++) {
- descr = xdes_get_descriptor(space, zip_size, hint, mtr);
+ descr = xdes_get_descriptor(space, hint, page_size, mtr);
if ((descr == NULL)
|| (XDES_FREE != xdes_get_state(descr, mtr))) {
@@ -2246,7 +2272,7 @@ fseg_fill_free_list(
return;
}
- descr = fsp_alloc_free_extent(space, zip_size, hint, mtr);
+ descr = fsp_alloc_free_extent(space, page_size, hint, mtr);
xdes_set_state(descr, XDES_FSEG, mtr);
@@ -2260,23 +2286,25 @@ fseg_fill_free_list(
}
}
-/*********************************************************************//**
-Allocates a free extent for the segment: looks first in the free list of the
-segment, then tries to allocate from the space free list. NOTE that the extent
-returned still resides in the segment free list, it is not yet taken off it!
-@retval NULL if no page could be allocated
-@retval block, rw_lock_x_lock_count(&block->lock) == 1 if allocation succeeded
+/** Allocates a free extent for the segment: looks first in the free list of
+the segment, then tries to allocate from the space free list.
+NOTE that the extent returned still resides in the segment free list, it is
+not yet taken off it!
+@param[in] inode segment inode
+@param[in,out] space tablespace
+@param[in] page_size page size
+@param[in,out] mtr mini-transaction
+@retval NULL if no page could be allocated
+@retval block rw_lock_x_lock_count(&block->lock) == 1 if allocation succeeded
(init_mtr == mtr, or the page was not previously freed in mtr)
-@retval block (not allocated or initialized) otherwise */
+@retval block (not allocated or initialized) otherwise */
static
xdes_t*
fseg_alloc_free_extent(
-/*===================*/
- fseg_inode_t* inode, /*!< in: segment inode */
- ulint space, /*!< in: space id */
- ulint zip_size,/*!< in: compressed page size in bytes
- or 0 for uncompressed pages */
- mtr_t* mtr) /*!< in/out: mini-transaction */
+ fseg_inode_t* inode,
+ fil_space_t* space,
+ const page_size_t& page_size,
+ mtr_t* mtr)
{
xdes_t* descr;
ib_id_t seg_id;
@@ -2284,16 +2312,17 @@ fseg_alloc_free_extent(
ut_ad(!((page_offset(inode) - FSEG_ARR_OFFSET) % FSEG_INODE_SIZE));
ut_ad(mach_read_from_4(inode + FSEG_MAGIC_N) == FSEG_MAGIC_N_VALUE);
+ ut_d(space->modify_check(*mtr));
- if (flst_get_len(inode + FSEG_FREE, mtr) > 0) {
+ if (flst_get_len(inode + FSEG_FREE) > 0) {
/* Segment free list is not empty, allocate from it */
first = flst_get_first(inode + FSEG_FREE, mtr);
- descr = xdes_lst_get_descriptor(space, zip_size, first, mtr);
+ descr = xdes_lst_get_descriptor(space, page_size, first, mtr);
} else {
/* Segment free list was empty, allocate from space */
- descr = fsp_alloc_free_extent(space, zip_size, 0, mtr);
+ descr = fsp_alloc_free_extent(space, page_size, 0, mtr);
if (descr == NULL) {
@@ -2307,7 +2336,7 @@ fseg_alloc_free_extent(
flst_add_last(inode + FSEG_FREE, descr + XDES_FLST_NODE, mtr);
/* Try to fill the segment free list */
- fseg_fill_free_list(inode, space, zip_size,
+ fseg_fill_free_list(inode, space, page_size,
xdes_get_offset(descr) + FSP_EXTENT_SIZE,
mtr);
}
@@ -2315,37 +2344,44 @@ fseg_alloc_free_extent(
return(descr);
}
-/**********************************************************************//**
-Allocates a single free page from a segment. This function implements
-the intelligent allocation strategy which tries to minimize file space
-fragmentation.
-@retval NULL if no page could be allocated
-@retval block, rw_lock_x_lock_count(&block->lock) == 1 if allocation succeeded
+/** Allocates a single free page from a segment.
+This function implements the intelligent allocation strategy which tries to
+minimize file space fragmentation.
+@param[in,out] space tablespace
+@param[in] page_size page size
+@param[in,out] seg_inode segment inode
+@param[in] hint hint of which page would be desirable
+@param[in] direction if the new page is needed because of
+an index page split, and records are inserted there in order, into which
+direction they go alphabetically: FSP_DOWN, FSP_UP, FSP_NO_DIR
+@param[in] rw_latch RW_SX_LATCH, RW_X_LATCH
+@param[in,out] mtr mini-transaction
+@param[in,out] init_mtr mtr or another mini-transaction in
+which the page should be initialized. If init_mtr != mtr, but the page is
+already latched in mtr, do not initialize the page
+@param[in] has_done_reservation TRUE if the space has already been
+reserved, in this case we will never return NULL
+@retval NULL if no page could be allocated
+@retval block rw_lock_x_lock_count(&block->lock) == 1 if allocation succeeded
(init_mtr == mtr, or the page was not previously freed in mtr)
-@retval block (not allocated or initialized) otherwise */
+@retval block (not allocated or initialized) otherwise */
static
buf_block_t*
fseg_alloc_free_page_low(
-/*=====================*/
- ulint space, /*!< in: space */
- ulint zip_size,/*!< in: compressed page size in bytes
- or 0 for uncompressed pages */
- fseg_inode_t* seg_inode, /*!< in/out: segment inode */
- ulint hint, /*!< in: hint of which page would be
- desirable */
- byte direction, /*!< in: if the new page is needed because
- of an index page split, and records are
- inserted there in order, into which
- direction they go alphabetically: FSP_DOWN,
- FSP_UP, FSP_NO_DIR */
- mtr_t* mtr, /*!< in/out: mini-transaction */
- mtr_t* init_mtr)/*!< in/out: mtr or another mini-transaction
- in which the page should be initialized.
- If init_mtr!=mtr, but the page is already
- latched in mtr, do not initialize the page. */
+ fil_space_t* space,
+ const page_size_t& page_size,
+ fseg_inode_t* seg_inode,
+ ulint hint,
+ byte direction,
+ rw_lock_type_t rw_latch,
+ mtr_t* mtr,
+ mtr_t* init_mtr
+#ifdef UNIV_DEBUG
+ , ibool has_done_reservation
+#endif /* UNIV_DEBUG */
+)
{
fsp_header_t* space_header;
- ulint space_size;
ib_id_t seg_id;
ulint used;
ulint reserved;
@@ -2353,8 +2389,8 @@ fseg_alloc_free_page_low(
ulint ret_page; /*!< the allocated page offset, FIL_NULL
if could not be allocated */
xdes_t* ret_descr; /*!< the extent of the allocated page */
- ibool success;
ulint n;
+ const ulint space_id = space->id;
ut_ad((direction >= FSP_UP) && (direction <= FSP_NO_DIR));
ut_ad(mach_read_from_4(seg_inode + FSEG_MAGIC_N)
@@ -2363,10 +2399,12 @@ fseg_alloc_free_page_low(
seg_id = mach_read_from_8(seg_inode + FSEG_ID);
ut_ad(seg_id);
+ ut_d(space->modify_check(*mtr));
+ ut_ad(fil_page_get_type(page_align(seg_inode)) == FIL_PAGE_INODE);
reserved = fseg_n_reserved_pages_low(seg_inode, &used, mtr);
- space_header = fsp_get_space_header(space, zip_size, mtr);
+ space_header = fsp_get_space_header(space, page_size, mtr);
descr = xdes_get_descriptor_with_space_hdr(space_header, space,
hint, mtr);
@@ -2375,7 +2413,7 @@ fseg_alloc_free_page_low(
hint */
/* The file space header page is always allocated. */
hint = 0;
- descr = xdes_get_descriptor(space, zip_size, hint, mtr);
+ descr = xdes_get_descriptor(space, hint, page_size, mtr);
}
/* In the big if-else below we look for ret_page and ret_descr */
@@ -2402,7 +2440,7 @@ take_hinted_page:
=========================================================
the hinted page
===============*/
- ret_descr = fsp_alloc_free_extent(space, zip_size, hint, mtr);
+ ret_descr = fsp_alloc_free_extent(space, page_size, hint, mtr);
ut_a(ret_descr == descr);
@@ -2412,7 +2450,7 @@ take_hinted_page:
ret_descr + XDES_FLST_NODE, mtr);
/* Try to fill the segment free list */
- fseg_fill_free_list(seg_inode, space, zip_size,
+ fseg_fill_free_list(seg_inode, space, page_size,
hint + FSP_EXTENT_SIZE, mtr);
goto take_hinted_page;
/*-----------------------------------------------------------*/
@@ -2420,8 +2458,8 @@ take_hinted_page:
&& ((reserved - used) < reserved / FSEG_FILLFACTOR)
&& (used >= FSEG_FRAG_LIMIT)
&& (!!(ret_descr
- = fseg_alloc_free_extent(seg_inode,
- space, zip_size, mtr)))) {
+ = fseg_alloc_free_extent(
+ seg_inode, space, page_size, mtr)))) {
/* 3. We take any free extent (which was already assigned above
===============================================================
@@ -2434,6 +2472,7 @@ take_hinted_page:
if (direction == FSP_DOWN) {
ret_page += FSP_EXTENT_SIZE - 1;
}
+ ut_ad(!has_done_reservation || ret_page != FIL_NULL);
/*-----------------------------------------------------------*/
} else if ((xdes_get_state(descr, mtr) == XDES_FSEG)
&& mach_read_from_8(descr + XDES_ID) == seg_id
@@ -2449,33 +2488,37 @@ take_hinted_page:
ret_page = xdes_get_offset(ret_descr)
+ xdes_find_bit(ret_descr, XDES_FREE_BIT, TRUE,
hint % FSP_EXTENT_SIZE, mtr);
+ ut_ad(!has_done_reservation || ret_page != FIL_NULL);
/*-----------------------------------------------------------*/
} else if (reserved - used > 0) {
/* 5. We take any unused page from the segment
==============================================*/
fil_addr_t first;
- if (flst_get_len(seg_inode + FSEG_NOT_FULL, mtr) > 0) {
+ if (flst_get_len(seg_inode + FSEG_NOT_FULL) > 0) {
first = flst_get_first(seg_inode + FSEG_NOT_FULL,
mtr);
- } else if (flst_get_len(seg_inode + FSEG_FREE, mtr) > 0) {
+ } else if (flst_get_len(seg_inode + FSEG_FREE) > 0) {
first = flst_get_first(seg_inode + FSEG_FREE, mtr);
} else {
- ut_error;
+ ut_ad(!has_done_reservation);
return(NULL);
}
- ret_descr = xdes_lst_get_descriptor(space, zip_size,
+ ret_descr = xdes_lst_get_descriptor(space, page_size,
first, mtr);
ret_page = xdes_get_offset(ret_descr)
+ xdes_find_bit(ret_descr, XDES_FREE_BIT, TRUE,
0, mtr);
+ ut_ad(!has_done_reservation || ret_page != FIL_NULL);
/*-----------------------------------------------------------*/
} else if (used < FSEG_FRAG_LIMIT) {
/* 6. We allocate an individual page from the space
===================================================*/
buf_block_t* block = fsp_alloc_free_page(
- space, zip_size, hint, mtr, init_mtr);
+ space, page_size, hint, rw_latch, mtr, init_mtr);
+
+ ut_ad(!has_done_reservation || block != NULL);
if (block != NULL) {
/* Put the page in the fragment page array of the
@@ -2484,7 +2527,7 @@ take_hinted_page:
ut_a(n != ULINT_UNDEFINED);
fseg_set_nth_frag_page_no(
- seg_inode, n, buf_block_get_page_no(block),
+ seg_inode, n, block->page.id.page_no(),
mtr);
}
@@ -2496,45 +2539,43 @@ take_hinted_page:
/* 7. We allocate a new extent and take its first page
======================================================*/
ret_descr = fseg_alloc_free_extent(seg_inode,
- space, zip_size, mtr);
+ space, page_size, mtr);
if (ret_descr == NULL) {
ret_page = FIL_NULL;
+ ut_ad(!has_done_reservation);
} else {
ret_page = xdes_get_offset(ret_descr);
+ ut_ad(!has_done_reservation || ret_page != FIL_NULL);
}
}
if (ret_page == FIL_NULL) {
/* Page could not be allocated */
+ ut_ad(!has_done_reservation);
return(NULL);
}
- if (space != 0) {
- space_size = fil_space_get_size(space);
-
- if (space_size <= ret_page) {
- /* It must be that we are extending a single-table
- tablespace whose size is still < 64 pages */
-
- if (ret_page >= FSP_EXTENT_SIZE) {
- fprintf(stderr,
- "InnoDB: Error (2): trying to extend"
- " a single-table tablespace %lu\n"
- "InnoDB: by single page(s) though"
- " the space size %lu. Page no %lu.\n",
- (ulong) space, (ulong) space_size,
- (ulong) ret_page);
- return(NULL);
- }
+ if (space->size <= ret_page && !is_system_tablespace(space_id)) {
+ /* It must be that we are extending a single-table
+ tablespace whose size is still < 64 pages */
- success = fsp_try_extend_data_file_with_pages(
- space, ret_page, space_header, mtr);
- if (!success) {
- /* No disk space left */
- return(NULL);
- }
+ if (ret_page >= FSP_EXTENT_SIZE) {
+ ib::error() << "Error (2): trying to extend"
+ " a single-table tablespace " << space_id
+ << " by single page(s) though the"
+ << " space size " << space->size
+ << ". Page no " << ret_page << ".";
+ ut_ad(!has_done_reservation);
+ return(NULL);
+ }
+
+ if (!fsp_try_extend_data_file_with_pages(
+ space, ret_page, space_header, mtr)) {
+ /* No disk space left */
+ ut_ad(!has_done_reservation);
+ return(NULL);
}
}
@@ -2546,7 +2587,7 @@ got_hinted_page:
The extent is still in the appropriate list (FSEG_NOT_FULL
or FSEG_FREE), and the page is not yet marked as used. */
- ut_ad(xdes_get_descriptor(space, zip_size, ret_page, mtr)
+ ut_ad(xdes_get_descriptor(space, ret_page, page_size, mtr)
== ret_descr);
ut_ad(xdes_mtr_get_bit(
@@ -2556,11 +2597,8 @@ got_hinted_page:
fseg_mark_page_used(seg_inode, ret_page, ret_descr, mtr);
}
- return(fsp_page_create(
- space, fsp_flags_get_zip_size(
- mach_read_from_4(FSP_SPACE_FLAGS
- + space_header)),
- ret_page, mtr, init_mtr));
+ return(fsp_page_create(space, ret_page, page_size, rw_latch,
+ mtr, init_mtr));
}
/**********************************************************************//**
@@ -2571,7 +2609,6 @@ fragmentation.
@retval block, rw_lock_x_lock_count(&block->lock) == 1 if allocation succeeded
(init_mtr == mtr, or the page was not previously freed in mtr)
@retval block (not allocated or initialized) otherwise */
-UNIV_INTERN
buf_block_t*
fseg_alloc_free_page_general(
/*=========================*/
@@ -2595,80 +2632,84 @@ fseg_alloc_free_page_general(
latched in mtr, do not initialize the page. */
{
fseg_inode_t* inode;
- ulint space;
- ulint flags;
- ulint zip_size;
- rw_lock_t* latch;
+ ulint space_id;
+ fil_space_t* space;
+ buf_block_t* iblock;
buf_block_t* block;
ulint n_reserved;
- space = page_get_space_id(page_align(seg_header));
-
- latch = fil_space_get_latch(space, &flags);
-
- zip_size = fsp_flags_get_zip_size(flags);
+ space_id = page_get_space_id(page_align(seg_header));
+ space = mtr_x_lock_space(space_id, mtr);
+ const page_size_t page_size(space->flags);
- mtr_x_lock(latch, mtr);
-
- inode = fseg_inode_get(seg_header, space, zip_size, mtr);
+ inode = fseg_inode_get(seg_header, space_id, page_size, mtr, &iblock);
+ fil_block_check_type(*iblock, FIL_PAGE_INODE, mtr);
if (!has_done_reservation
- && !fsp_reserve_free_extents(&n_reserved, space, 2,
+ && !fsp_reserve_free_extents(&n_reserved, space_id, 2,
FSP_NORMAL, mtr)) {
return(NULL);
}
- block = fseg_alloc_free_page_low(space, zip_size,
+ block = fseg_alloc_free_page_low(space, page_size,
inode, hint, direction,
- mtr, init_mtr);
+ RW_X_LATCH, mtr, init_mtr
+#ifdef UNIV_DEBUG
+ , has_done_reservation
+#endif /* UNIV_DEBUG */
+ );
+
+ /* The allocation cannot fail if we have already reserved a
+ space for the page. */
+ ut_ad(!has_done_reservation || block != NULL);
+
if (!has_done_reservation) {
- fil_space_release_free_extents(space, n_reserved);
+ fil_space_release_free_extents(space_id, n_reserved);
}
return(block);
}
-/**********************************************************************//**
-Checks that we have at least 2 frag pages free in the first extent of a
-single-table tablespace, and they are also physically initialized to the data
-file. That is we have already extended the data file so that those pages are
-inside the data file. If not, this function extends the tablespace with
-pages.
-@return TRUE if there were >= 3 free pages, or we were able to extend */
+/** Check that we have at least n_pages frag pages free in the first extent
+of a single-table tablespace, and they are also physically initialized to
+the data file. That is we have already extended the data file so that those
+pages are inside the data file. If not, this function extends the tablespace
+with pages.
+@param[in,out] space tablespace
+@param[in,out] space_header tablespace header, x-latched
+@param[in] size size of the tablespace in pages,
+must be less than FSP_EXTENT_SIZE
+@param[in,out] mtr mini-transaction
+@param[in] n_pages number of pages to reserve
+@return true if there were at least n_pages free pages, or we were able
+to extend */
static
-ibool
+bool
fsp_reserve_free_pages(
-/*===================*/
- ulint space, /*!< in: space id, must be != 0 */
- fsp_header_t* space_header, /*!< in: header of that space,
- x-latched */
- ulint size, /*!< in: size of the tablespace in
- pages, must be < FSP_EXTENT_SIZE/2 */
- mtr_t* mtr) /*!< in/out: mini-transaction */
+ fil_space_t* space,
+ fsp_header_t* space_header,
+ ulint size,
+ mtr_t* mtr,
+ ulint n_pages)
{
xdes_t* descr;
ulint n_used;
- ut_a(space != 0);
- ut_a(size < FSP_EXTENT_SIZE / 2);
+ ut_a(!is_system_tablespace(space->id));
+ ut_a(size < FSP_EXTENT_SIZE);
- descr = xdes_get_descriptor_with_space_hdr(space_header, space, 0,
- mtr);
+ descr = xdes_get_descriptor_with_space_hdr(
+ space_header, space, 0, mtr);
n_used = xdes_get_n_used(descr, mtr);
ut_a(n_used <= size);
- if (size >= n_used + 2) {
-
- return(TRUE);
- }
-
- return(fsp_try_extend_data_file_with_pages(space, n_used + 1,
- space_header, mtr));
+ return(size >= n_used + n_pages
+ || fsp_try_extend_data_file_with_pages(
+ space, n_used + n_pages - 1, space_header, mtr));
}
-/**********************************************************************//**
-Reserves free pages from a tablespace. All mini-transactions which may
+/** Reserves free pages from a tablespace. All mini-transactions which may
use several pages from the tablespace should call this function beforehand
and reserve enough free extents so that they certainly will be able
to do their operation, like a B-tree page split, fully. Reservations
@@ -2687,81 +2728,89 @@ The purpose is to avoid dead end where the database is full but the
user cannot free any space because these freeing operations temporarily
reserve some space.
-Single-table tablespaces whose size is < 32 pages are a special case. In this
-function we would liberally reserve several 64 page extents for every page
-split or merge in a B-tree. But we do not want to waste disk space if the table
-only occupies < 32 pages. That is why we apply different rules in that special
-case, just ensuring that there are 3 free pages available.
-@return TRUE if we were able to make the reservation */
-UNIV_INTERN
-ibool
+Single-table tablespaces whose size is < FSP_EXTENT_SIZE pages are a special
+case. In this function we would liberally reserve several extents for
+every page split or merge in a B-tree. But we do not want to waste disk space
+if the table only occupies < FSP_EXTENT_SIZE pages. That is why we apply
+different rules in that special case, just ensuring that there are n_pages
+free pages available.
+
+@param[out] n_reserved number of extents actually reserved; if we
+ return true and the tablespace size is <
+ FSP_EXTENT_SIZE pages, then this can be 0,
+ otherwise it is n_ext
+@param[in] space_id tablespace identifier
+@param[in] n_ext number of extents to reserve
+@param[in] alloc_type page reservation type (FSP_BLOB, etc)
+@param[in,out] mtr the mini transaction
+@param[in] n_pages for small tablespaces (tablespace size is
+ less than FSP_EXTENT_SIZE), number of free
+ pages to reserve.
+@return true if we were able to make the reservation */
+bool
fsp_reserve_free_extents(
-/*=====================*/
- ulint* n_reserved,/*!< out: number of extents actually reserved; if we
- return TRUE and the tablespace size is < 64 pages,
- then this can be 0, otherwise it is n_ext */
- ulint space, /*!< in: space id */
- ulint n_ext, /*!< in: number of extents to reserve */
- ulint alloc_type,/*!< in: FSP_NORMAL, FSP_UNDO, or FSP_CLEANING */
- mtr_t* mtr) /*!< in/out: mini-transaction */
+ ulint* n_reserved,
+ ulint space_id,
+ ulint n_ext,
+ fsp_reserve_t alloc_type,
+ mtr_t* mtr,
+ ulint n_pages)
{
fsp_header_t* space_header;
- rw_lock_t* latch;
ulint n_free_list_ext;
ulint free_limit;
ulint size;
- ulint flags;
- ulint zip_size;
ulint n_free;
ulint n_free_up;
ulint reserve;
- ibool success;
- ulint n_pages_added;
size_t total_reserved = 0;
ut_ad(mtr);
*n_reserved = n_ext;
- latch = fil_space_get_latch(space, &flags);
- zip_size = fsp_flags_get_zip_size(flags);
-
- mtr_x_lock(latch, mtr);
+ fil_space_t* space = mtr_x_lock_space(space_id, mtr);
+ const page_size_t page_size(space->flags);
- space_header = fsp_get_space_header(space, zip_size, mtr);
+ space_header = fsp_get_space_header(space, page_size, mtr);
try_again:
- size = mtr_read_ulint(space_header + FSP_SIZE, MLOG_4BYTES, mtr);
+ size = mach_read_from_4(space_header + FSP_SIZE);
+ ut_ad(size == space->size_in_header);
- if (size < FSP_EXTENT_SIZE / 2) {
+ if (size < FSP_EXTENT_SIZE && n_pages < FSP_EXTENT_SIZE / 2) {
/* Use different rules for small single-table tablespaces */
*n_reserved = 0;
- return(fsp_reserve_free_pages(space, space_header, size, mtr));
+ return(fsp_reserve_free_pages(space, space_header, size,
+ mtr, n_pages));
}
- n_free_list_ext = flst_get_len(space_header + FSP_FREE, mtr);
+ n_free_list_ext = flst_get_len(space_header + FSP_FREE);
+ ut_ad(space->free_len == n_free_list_ext);
free_limit = mtr_read_ulint(space_header + FSP_FREE_LIMIT,
MLOG_4BYTES, mtr);
+ ut_ad(space->free_limit == free_limit);
/* Below we play safe when counting free extents above the free limit:
some of them will contain extent descriptor pages, and therefore
will not be free extents */
- n_free_up = (size - free_limit) / FSP_EXTENT_SIZE;
+ if (size >= free_limit) {
+ n_free_up = (size - free_limit) / FSP_EXTENT_SIZE;
+ } else {
+ ut_ad(alloc_type == FSP_BLOB);
+ n_free_up = 0;
+ }
if (n_free_up > 0) {
n_free_up--;
- if (!zip_size) {
- n_free_up -= n_free_up
- / (UNIV_PAGE_SIZE / FSP_EXTENT_SIZE);
- } else {
- n_free_up -= n_free_up
- / (zip_size / FSP_EXTENT_SIZE);
- }
+ n_free_up -= n_free_up / (page_size.physical()
+ / FSP_EXTENT_SIZE);
}
n_free = n_free_list_ext + n_free_up;
- if (alloc_type == FSP_NORMAL) {
+ switch (alloc_type) {
+ case FSP_NORMAL:
/* We reserve 1 extent + 0.5 % of the space size to undo logs
and 1 extent + 0.5 % to cleaning operations; NOTE: this source
code is duplicated in the function below! */
@@ -2772,7 +2821,8 @@ try_again:
goto try_to_extend;
}
- } else if (alloc_type == FSP_UNDO) {
+ break;
+ case FSP_UNDO:
/* We reserve 0.5 % of the space size to cleaning operations */
reserve = 1 + ((size / FSP_EXTENT_SIZE) * 1) / 200;
@@ -2781,124 +2831,57 @@ try_again:
goto try_to_extend;
}
- } else {
- ut_a(alloc_type == FSP_CLEANING);
+ break;
+ case FSP_CLEANING:
+ case FSP_BLOB:
reserve = 0;
+ break;
+ default:
+ ut_error;
}
- success = fil_space_reserve_free_extents(space, n_free, n_ext);
- *n_reserved = n_ext;
-
- if (success) {
- return(TRUE);
+ if (fil_space_reserve_free_extents(space_id, n_free, n_ext)) {
+ return(true);
}
try_to_extend:
- success = fsp_try_extend_data_file(&n_pages_added, space,
- space_header, mtr);
-
- if (success && n_pages_added > 0) {
- total_reserved += n_pages_added;
+ if (ulint n = fsp_try_extend_data_file(space, space_header, mtr)) {
+ total_reserved += n;
goto try_again;
}
- return(FALSE);
+ return(false);
}
-/**********************************************************************//**
-This function should be used to get information on how much we still
-will be able to insert new data to the database without running out the
-tablespace. Only free extents are taken into account and we also subtract
-the safety margin required by the above function fsp_reserve_free_extents.
-@return available space in kB */
-UNIV_INTERN
-ullint
+/** Calculate how many KiB of new data we will be able to insert to the
+tablespace without running out of space.
+@param[in] space_id tablespace ID
+@return available space in KiB
+@retval UINTMAX_MAX if unknown */
+uintmax_t
fsp_get_available_space_in_free_extents(
-/*====================================*/
- ulint space) /*!< in: space id */
+ ulint space_id)
{
- fsp_header_t* space_header;
- ulint n_free_list_ext;
- ulint free_limit;
- ulint size;
- ulint flags;
- ulint zip_size;
- ulint n_free;
- ulint n_free_up;
- ulint reserve;
- rw_lock_t* latch;
- mtr_t mtr;
-
- /* The convoluted mutex acquire is to overcome latching order
- issues: The problem is that the fil_mutex is at a lower level
- than the tablespace latch and the buffer pool mutex. We have to
- first prevent any operations on the file system by acquiring the
- dictionary mutex. Then acquire the tablespace latch to obey the
- latching order and then release the dictionary mutex. That way we
- ensure that the tablespace instance can't be freed while we are
- examining its contents (see fil_space_free()).
-
- However, there is one further complication, we release the fil_mutex
- when we need to invalidate the the pages in the buffer pool and we
- reacquire the fil_mutex when deleting and freeing the tablespace
- instance in fil0fil.cc. Here we need to account for that situation
- too. */
-
- mutex_enter(&dict_sys->mutex);
-
- /* At this stage there is no guarantee that the tablespace even
- exists in the cache. */
-
- if (fil_tablespace_deleted_or_being_deleted_in_mem(space, -1)) {
-
- mutex_exit(&dict_sys->mutex);
-
- return(ULLINT_UNDEFINED);
- }
-
- mtr_start(&mtr);
-
- latch = fil_space_get_latch(space, &flags);
-
- /* This should ensure that the tablespace instance can't be freed
- by another thread. However, the tablespace pages can still be freed
- from the buffer pool. We need to check for that again. */
-
- zip_size = fsp_flags_get_zip_size(flags);
-
- mtr_x_lock(latch, &mtr);
-
- mutex_exit(&dict_sys->mutex);
-
- /* At this point it is possible for the tablespace to be deleted and
- its pages removed from the buffer pool. We need to check for that
- situation. However, the tablespace instance can't be deleted because
- our latching above should ensure that. */
-
- if (fil_tablespace_is_being_deleted(space)) {
-
- mtr_commit(&mtr);
-
- return(ULLINT_UNDEFINED);
+ FilSpace space(space_id);
+ if (space() == NULL) {
+ return(UINTMAX_MAX);
}
- /* From here on even if the user has dropped the tablespace, the
- pages _must_ still exist in the buffer pool and the tablespace
- instance _must_ be in the file system hash table. */
-
- space_header = fsp_get_space_header(space, zip_size, &mtr);
-
- size = mtr_read_ulint(space_header + FSP_SIZE, MLOG_4BYTES, &mtr);
-
- n_free_list_ext = flst_get_len(space_header + FSP_FREE, &mtr);
-
- free_limit = mtr_read_ulint(space_header + FSP_FREE_LIMIT,
- MLOG_4BYTES, &mtr);
- mtr_commit(&mtr);
+ return(fsp_get_available_space_in_free_extents(space));
+}
- if (size < FSP_EXTENT_SIZE) {
- ut_a(space != 0); /* This must be a single-table
- tablespace */
+/** Calculate how many KiB of new data we will be able to insert to the
+tablespace without running out of space. Start with a space object that has
+been acquired by the caller who holds it for the calculation,
+@param[in] space tablespace object from fil_space_acquire()
+@return available space in KiB */
+uintmax_t
+fsp_get_available_space_in_free_extents(
+ const fil_space_t* space)
+{
+ ut_ad(space->n_pending_ops > 0);
+ ulint size_in_header = space->size_in_header;
+ if (size_in_header < FSP_EXTENT_SIZE) {
return(0); /* TODO: count free frag pages and
return a value based on that */
}
@@ -2906,41 +2889,30 @@ fsp_get_available_space_in_free_extents(
/* Below we play safe when counting free extents above the free limit:
some of them will contain extent descriptor pages, and therefore
will not be free extents */
+ ut_ad(size_in_header >= space->free_limit);
+ ulint n_free_up =
+ (size_in_header - space->free_limit) / FSP_EXTENT_SIZE;
- n_free_up = (size - free_limit) / FSP_EXTENT_SIZE;
-
+ page_size_t page_size(space->flags);
if (n_free_up > 0) {
n_free_up--;
- if (!zip_size) {
- n_free_up -= n_free_up
- / (UNIV_PAGE_SIZE / FSP_EXTENT_SIZE);
- } else {
- n_free_up -= n_free_up
- / (zip_size / FSP_EXTENT_SIZE);
- }
+ n_free_up -= n_free_up / (page_size.physical()
+ / FSP_EXTENT_SIZE);
}
- n_free = n_free_list_ext + n_free_up;
-
/* We reserve 1 extent + 0.5 % of the space size to undo logs
and 1 extent + 0.5 % to cleaning operations; NOTE: this source
code is duplicated in the function above! */
- reserve = 2 + ((size / FSP_EXTENT_SIZE) * 2) / 200;
+ ulint reserve = 2 + ((size_in_header / FSP_EXTENT_SIZE) * 2) / 200;
+ ulint n_free = space->free_len + n_free_up;
if (reserve > n_free) {
return(0);
}
- if (!zip_size) {
- return((ullint) (n_free - reserve)
- * FSP_EXTENT_SIZE
- * (UNIV_PAGE_SIZE / 1024));
- } else {
- return((ullint) (n_free - reserve)
- * FSP_EXTENT_SIZE
- * (zip_size / 1024));
- }
+ return(static_cast<uintmax_t>(n_free - reserve)
+ * FSP_EXTENT_SIZE * (page_size.physical() / 1024));
}
/********************************************************************//**
@@ -2957,6 +2929,7 @@ fseg_mark_page_used(
{
ulint not_full_n_used;
+ ut_ad(fil_page_get_type(page_align(seg_inode)) == FIL_PAGE_INODE);
ut_ad(!((page_offset(seg_inode) - FSEG_ARR_OFFSET) % FSEG_INODE_SIZE));
ut_ad(mach_read_from_4(seg_inode + FSEG_MAGIC_N)
== FSEG_MAGIC_N_VALUE);
@@ -2998,59 +2971,59 @@ fseg_mark_page_used(
}
}
-/**********************************************************************//**
-Frees a single page of a segment. */
+/** Frees a single page of a segment.
+@param[in] seg_inode segment inode
+@param[in,out] space tablespace
+@param[in] offset page number
+@param[in] page_size page size
+@param[in] ahi whether we may need to drop the adaptive
+hash index
+@param[in,out] mtr mini-transaction */
static
void
fseg_free_page_low(
-/*===============*/
- fseg_inode_t* seg_inode, /*!< in: segment inode */
- ulint space, /*!< in: space id */
- ulint zip_size,/*!< in: compressed page size in bytes
- or 0 for uncompressed pages */
- ulint page, /*!< in: page offset */
- mtr_t* mtr) /*!< in/out: mini-transaction */
+ fseg_inode_t* seg_inode,
+ fil_space_t* space,
+ page_no_t offset,
+ const page_size_t& page_size,
+#ifdef BTR_CUR_HASH_ADAPT
+ bool ahi,
+#endif /* BTR_CUR_HASH_ADAPT */
+ mtr_t* mtr)
{
xdes_t* descr;
ulint not_full_n_used;
ulint state;
ib_id_t descr_id;
ib_id_t seg_id;
- ulint i;
ut_ad(seg_inode != NULL);
ut_ad(mtr != NULL);
ut_ad(mach_read_from_4(seg_inode + FSEG_MAGIC_N)
== FSEG_MAGIC_N_VALUE);
ut_ad(!((page_offset(seg_inode) - FSEG_ARR_OFFSET) % FSEG_INODE_SIZE));
-
+ ut_d(space->modify_check(*mtr));
+#ifdef BTR_CUR_HASH_ADAPT
/* Drop search system page hash index if the page is found in
the pool and is hashed */
- btr_search_drop_page_hash_when_freed(space, page);
+ if (ahi) {
+ btr_search_drop_page_hash_when_freed(
+ page_id_t(space->id, offset));
+ }
+#endif /* BTR_CUR_HASH_ADAPT */
- descr = xdes_get_descriptor(space, zip_size, page, mtr);
+ descr = xdes_get_descriptor(space, offset, page_size, mtr);
if (xdes_mtr_get_bit(descr, XDES_FREE_BIT,
- page % FSP_EXTENT_SIZE, mtr)) {
- fputs("InnoDB: Dump of the tablespace extent descriptor: ",
- stderr);
- ut_print_buf(stderr, descr, 40);
-
- fprintf(stderr, "\n"
- "InnoDB: Serious error! InnoDB is trying to"
- " free page %lu\n"
- "InnoDB: though it is already marked as free"
- " in the tablespace!\n"
- "InnoDB: The tablespace free space info is corrupt.\n"
- "InnoDB: You may need to dump your"
- " InnoDB tables and recreate the whole\n"
- "InnoDB: database!\n", (ulong) page);
-crash:
- fputs("InnoDB: Please refer to\n"
- "InnoDB: " REFMAN "forcing-innodb-recovery.html\n"
- "InnoDB: about forcing recovery.\n", stderr);
- ut_error;
+ offset % FSP_EXTENT_SIZE, mtr)) {
+ ib::fatal() << "InnoDB is trying to free page "
+ << page_id_t(space->id, offset)
+ << " though it is already marked as free in the"
+ " tablespace! The tablespace free space info is"
+ " corrupt. You may need to dump your tables and"
+ " recreate the whole database!"
+ << FORCE_RECOVERY_MSG;
}
state = xdes_get_state(descr, mtr);
@@ -3058,9 +3031,9 @@ crash:
if (state != XDES_FSEG) {
/* The page is in the fragment pages of the segment */
- for (i = 0;; i++) {
+ for (ulint i = 0;; i++) {
if (fseg_get_nth_frag_page_no(seg_inode, i, mtr)
- == page) {
+ == offset) {
fseg_set_nth_frag_page_no(seg_inode, i,
FIL_NULL, mtr);
@@ -3068,7 +3041,7 @@ crash:
}
}
- fsp_free_page(space, zip_size, page, mtr);
+ fsp_free_page(space, offset, page_size, mtr);
return;
}
@@ -3077,15 +3050,7 @@ crash:
descr_id = mach_read_from_8(descr + XDES_ID);
seg_id = mach_read_from_8(seg_inode + FSEG_ID);
-#if 0
- fprintf(stderr,
- "InnoDB: InnoDB is freeing space %lu page %lu,\n"
- "InnoDB: which belongs to descr seg %llu\n"
- "InnoDB: segment %llu.\n",
- (ulong) space, (ulong) page,
- (ullint) descr_id,
- (ullint) seg_id);
-#endif /* 0 */
+
if (UNIV_UNLIKELY(descr_id != seg_id)) {
fputs("InnoDB: Dump of the tablespace extent descriptor: ",
stderr);
@@ -3094,16 +3059,11 @@ crash:
ut_print_buf(stderr, seg_inode, 40);
putc('\n', stderr);
- fprintf(stderr,
- "InnoDB: Serious error: InnoDB is trying to"
- " free space %lu page %lu,\n"
- "InnoDB: which does not belong to"
- " segment %llu but belongs\n"
- "InnoDB: to segment %llu.\n",
- (ulong) space, (ulong) page,
- (ullint) descr_id,
- (ullint) seg_id);
- goto crash;
+ ib::fatal() << "InnoDB is trying to free page "
+ << page_id_t(space->id, offset)
+ << ", which does not belong to segment " << descr_id
+ << " but belongs to segment " << seg_id << "."
+ << FORCE_RECOVERY_MSG;
}
not_full_n_used = mtr_read_ulint(seg_inode + FSEG_NOT_FULL_N_USED,
@@ -3123,133 +3083,141 @@ crash:
not_full_n_used - 1, MLOG_4BYTES, mtr);
}
- xdes_set_bit(descr, XDES_FREE_BIT, page % FSP_EXTENT_SIZE, TRUE, mtr);
- xdes_set_bit(descr, XDES_CLEAN_BIT, page % FSP_EXTENT_SIZE, TRUE, mtr);
+ const ulint bit = offset % FSP_EXTENT_SIZE;
+
+ xdes_set_bit(descr, XDES_FREE_BIT, bit, TRUE, mtr);
+ /* xdes_init() should have set all XDES_CLEAN_BIT */
+ ut_ad(xdes_get_bit(descr, XDES_CLEAN_BIT, bit));
if (xdes_is_free(descr, mtr)) {
/* The extent has become free: free it to space */
flst_remove(seg_inode + FSEG_NOT_FULL,
descr + XDES_FLST_NODE, mtr);
- fsp_free_extent(space, zip_size, page, mtr);
+ fsp_free_extent(space, offset, page_size, mtr);
}
-
- mtr->n_freed_pages++;
}
+#ifndef BTR_CUR_HASH_ADAPT
+# define fseg_free_page_low(inode, space, offset, page_size, ahi, mtr) \
+ fseg_free_page_low(inode, space, offset, page_size, mtr)
+#endif /* !BTR_CUR_HASH_ADAPT */
+
/**********************************************************************//**
Frees a single page of a segment. */
-UNIV_INTERN
void
-fseg_free_page(
-/*===========*/
+fseg_free_page_func(
fseg_header_t* seg_header, /*!< in: segment header */
- ulint space, /*!< in: space id */
+ ulint space_id,/*!< in: space id */
ulint page, /*!< in: page offset */
+#ifdef BTR_CUR_HASH_ADAPT
+ bool ahi, /*!< in: whether we may need to drop
+ the adaptive hash index */
+#endif /* BTR_CUR_HASH_ADAPT */
mtr_t* mtr) /*!< in/out: mini-transaction */
{
- ulint flags;
- ulint zip_size;
- fseg_inode_t* seg_inode;
- rw_lock_t* latch;
+ DBUG_ENTER("fseg_free_page");
+ fseg_inode_t* seg_inode;
+ buf_block_t* iblock;
+ fil_space_t* space = mtr_x_lock_space(space_id, mtr);
+ const page_size_t page_size(space->flags);
- latch = fil_space_get_latch(space, &flags);
- zip_size = fsp_flags_get_zip_size(flags);
+ DBUG_LOG("fseg_free_page", "space_id: " << space_id
+ << ", page_no: " << page);
- mtr_x_lock(latch, mtr);
+ seg_inode = fseg_inode_get(seg_header, space_id, page_size, mtr,
+ &iblock);
+ fil_block_check_type(*iblock, FIL_PAGE_INODE, mtr);
- seg_inode = fseg_inode_get(seg_header, space, zip_size, mtr);
+ fseg_free_page_low(seg_inode, space, page, page_size, ahi, mtr);
- fseg_free_page_low(seg_inode, space, zip_size, page, mtr);
+ ut_d(buf_page_set_file_page_was_freed(page_id_t(space_id, page)));
-#if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG
- buf_page_set_file_page_was_freed(space, page);
-#endif /* UNIV_DEBUG_FILE_ACCESSES || UNIV_DEBUG */
+ DBUG_VOID_RETURN;
}
-/**********************************************************************//**
-Checks if a single page of a segment is free.
-@return true if free */
-UNIV_INTERN
+/** Determine whether a page is free.
+@param[in,out] space tablespace
+@param[in] page page number
+@return whether the page is marked as free */
bool
-fseg_page_is_free(
-/*==============*/
- fseg_header_t* seg_header, /*!< in: segment header */
- ulint space, /*!< in: space id */
- ulint page) /*!< in: page offset */
+fseg_page_is_free(fil_space_t* space, unsigned page)
{
+ bool is_free;
mtr_t mtr;
- ibool is_free;
- ulint flags;
- rw_lock_t* latch;
- xdes_t* descr;
- ulint zip_size;
- fseg_inode_t* seg_inode;
-
- latch = fil_space_get_latch(space, &flags);
- zip_size = dict_tf_get_zip_size(flags);
-
- mtr_start(&mtr);
- mtr_x_lock(latch, &mtr);
-
- seg_inode = fseg_inode_get(seg_header, space, zip_size, &mtr);
-
- ut_a(seg_inode);
- ut_ad(mach_read_from_4(seg_inode + FSEG_MAGIC_N)
- == FSEG_MAGIC_N_VALUE);
- ut_ad(!((page_offset(seg_inode) - FSEG_ARR_OFFSET) % FSEG_INODE_SIZE));
-
- descr = xdes_get_descriptor(space, zip_size, page, &mtr);
- ut_a(descr);
-
- is_free = xdes_mtr_get_bit(
- descr, XDES_FREE_BIT, page % FSP_EXTENT_SIZE, &mtr);
-
- mtr_commit(&mtr);
+ page_size_t page_size(space->flags);
+ page_no_t dpage = xdes_calc_descriptor_page(page_size, page);
+
+ mtr.start();
+ mtr_s_lock(&space->latch, &mtr);
+
+ if (page >= space->free_limit || page >= space->size_in_header) {
+ is_free = true;
+ } else if (const xdes_t* descr = xdes_get_descriptor_const(
+ space, dpage, page, page_size, &mtr)) {
+ is_free = xdes_get_bit(descr, XDES_FREE_BIT,
+ page % FSP_EXTENT_SIZE);
+ } else {
+ is_free = true;
+ }
+ mtr.commit();
return(is_free);
}
-/**********************************************************************//**
-Frees an extent of a segment to the space free list. */
+/** Free an extent of a segment to the space free list.
+@param[in,out] seg_inode segment inode
+@param[in,out] space tablespace
+@param[in] page_size page size
+@param[in] page page number in the extent
+@param[in] ahi whether we may need to drop
+ the adaptive hash index
+@param[in,out] mtr mini-transaction */
+MY_ATTRIBUTE((nonnull))
static
void
fseg_free_extent(
-/*=============*/
- fseg_inode_t* seg_inode, /*!< in: segment inode */
- ulint space, /*!< in: space id */
- ulint zip_size,/*!< in: compressed page size in bytes
- or 0 for uncompressed pages */
- ulint page, /*!< in: a page in the extent */
- mtr_t* mtr) /*!< in/out: mini-transaction */
+ fseg_inode_t* seg_inode,
+ fil_space_t* space,
+ const page_size_t& page_size,
+ ulint page,
+#ifdef BTR_CUR_HASH_ADAPT
+ bool ahi,
+#endif /* BTR_CUR_HASH_ADAPT */
+ mtr_t* mtr)
{
ulint first_page_in_extent;
xdes_t* descr;
ulint not_full_n_used;
ulint descr_n_used;
- ulint i;
- ut_ad(seg_inode != NULL);
ut_ad(mtr != NULL);
- descr = xdes_get_descriptor(space, zip_size, page, mtr);
+ descr = xdes_get_descriptor(space, page, page_size, mtr);
ut_a(xdes_get_state(descr, mtr) == XDES_FSEG);
ut_a(!memcmp(descr + XDES_ID, seg_inode + FSEG_ID, 8));
ut_ad(mach_read_from_4(seg_inode + FSEG_MAGIC_N)
== FSEG_MAGIC_N_VALUE);
+ ut_d(space->modify_check(*mtr));
first_page_in_extent = page - (page % FSP_EXTENT_SIZE);
- for (i = 0; i < FSP_EXTENT_SIZE; i++) {
- if (!xdes_mtr_get_bit(descr, XDES_FREE_BIT, i, mtr)) {
+#ifdef BTR_CUR_HASH_ADAPT
+ if (ahi) {
+ for (ulint i = 0; i < FSP_EXTENT_SIZE; i++) {
+ if (!xdes_mtr_get_bit(descr, XDES_FREE_BIT, i, mtr)) {
- /* Drop search system page hash index if the page is
- found in the pool and is hashed */
+ /* Drop search system page hash index
+ if the page is found in the pool and
+ is hashed */
- btr_search_drop_page_hash_when_freed(
- space, first_page_in_extent + i);
+ btr_search_drop_page_hash_when_freed(
+ page_id_t(space->id,
+ first_page_in_extent + i));
+ }
}
}
+#endif /* BTR_CUR_HASH_ADAPT */
if (xdes_is_full(descr, mtr)) {
flst_remove(seg_inode + FSEG_FULL,
@@ -3271,76 +3239,82 @@ fseg_free_extent(
MLOG_4BYTES, mtr);
}
- fsp_free_extent(space, zip_size, page, mtr);
+ fsp_free_extent(space, page, page_size, mtr);
-#if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG
- for (i = 0; i < FSP_EXTENT_SIZE; i++) {
+#ifdef UNIV_DEBUG
+ for (ulint i = 0; i < FSP_EXTENT_SIZE; i++) {
- buf_page_set_file_page_was_freed(space,
- first_page_in_extent + i);
+ buf_page_set_file_page_was_freed(
+ page_id_t(space->id, first_page_in_extent + i));
}
-#endif /* UNIV_DEBUG_FILE_ACCESSES || UNIV_DEBUG */
+#endif /* UNIV_DEBUG */
}
+#ifndef BTR_CUR_HASH_ADAPT
+# define fseg_free_extent(inode, space, page_size, page, ahi, mtr) \
+ fseg_free_extent(inode, space, page_size, page, mtr)
+#endif /* !BTR_CUR_HASH_ADAPT */
+
/**********************************************************************//**
Frees part of a segment. This function can be used to free a segment by
repeatedly calling this function in different mini-transactions. Doing
the freeing in a single mini-transaction might result in too big a
mini-transaction.
-@return TRUE if freeing completed */
-UNIV_INTERN
+@return TRUE if freeing completed */
ibool
-fseg_free_step(
-/*===========*/
+fseg_free_step_func(
fseg_header_t* header, /*!< in, own: segment header; NOTE: if the header
resides on the first page of the frag list
of the segment, this pointer becomes obsolete
after the last freeing step */
+#ifdef BTR_CUR_HASH_ADAPT
+ bool ahi, /*!< in: whether we may need to drop
+ the adaptive hash index */
+#endif /* BTR_CUR_HASH_ADAPT */
mtr_t* mtr) /*!< in/out: mini-transaction */
{
ulint n;
ulint page;
xdes_t* descr;
fseg_inode_t* inode;
- ulint space;
- ulint flags;
- ulint zip_size;
+ ulint space_id;
ulint header_page;
- rw_lock_t* latch;
- space = page_get_space_id(page_align(header));
- header_page = page_get_page_no(page_align(header));
+ DBUG_ENTER("fseg_free_step");
- latch = fil_space_get_latch(space, &flags);
- zip_size = fsp_flags_get_zip_size(flags);
+ space_id = page_get_space_id(page_align(header));
+ header_page = page_get_page_no(page_align(header));
- mtr_x_lock(latch, mtr);
+ fil_space_t* space = mtr_x_lock_space(space_id, mtr);
+ const page_size_t page_size(space->flags);
- descr = xdes_get_descriptor(space, zip_size, header_page, mtr);
+ descr = xdes_get_descriptor(space, header_page, page_size, mtr);
/* Check that the header resides on a page which has not been
freed yet */
ut_a(xdes_mtr_get_bit(descr, XDES_FREE_BIT,
header_page % FSP_EXTENT_SIZE, mtr) == FALSE);
+ buf_block_t* iblock;
- inode = fseg_inode_try_get(header, space, zip_size, mtr);
+ inode = fseg_inode_try_get(header, space_id, page_size, mtr, &iblock);
- if (UNIV_UNLIKELY(inode == NULL)) {
- fprintf(stderr, "double free of inode from %u:%u\n",
- (unsigned) space, (unsigned) header_page);
- return(TRUE);
+ if (inode == NULL) {
+ ib::info() << "Double free of inode from "
+ << page_id_t(space_id, header_page);
+ DBUG_RETURN(TRUE);
}
- descr = fseg_get_first_extent(inode, space, zip_size, mtr);
+ fil_block_check_type(*iblock, FIL_PAGE_INODE, mtr);
+ descr = fseg_get_first_extent(inode, space, page_size, mtr);
if (descr != NULL) {
/* Free the extent held by the segment */
page = xdes_get_offset(descr);
- fseg_free_extent(inode, space, zip_size, page, mtr);
+ fseg_free_extent(inode, space, page_size, page, ahi, mtr);
- return(FALSE);
+ DBUG_RETURN(FALSE);
}
/* Free a frag page */
@@ -3348,64 +3322,66 @@ fseg_free_step(
if (n == ULINT_UNDEFINED) {
/* Freeing completed: free the segment inode */
- fsp_free_seg_inode(space, zip_size, inode, mtr);
+ fsp_free_seg_inode(space, page_size, inode, mtr);
- return(TRUE);
+ DBUG_RETURN(TRUE);
}
- fseg_free_page_low(inode, space, zip_size,
- fseg_get_nth_frag_page_no(inode, n, mtr), mtr);
+ fseg_free_page_low(
+ inode, space,
+ fseg_get_nth_frag_page_no(inode, n, mtr),
+ page_size, ahi, mtr);
n = fseg_find_last_used_frag_page_slot(inode, mtr);
if (n == ULINT_UNDEFINED) {
/* Freeing completed: free the segment inode */
- fsp_free_seg_inode(space, zip_size, inode, mtr);
+ fsp_free_seg_inode(space, page_size, inode, mtr);
- return(TRUE);
+ DBUG_RETURN(TRUE);
}
- return(FALSE);
+ DBUG_RETURN(FALSE);
}
/**********************************************************************//**
Frees part of a segment. Differs from fseg_free_step because this function
leaves the header page unfreed.
-@return TRUE if freeing completed, except the header page */
-UNIV_INTERN
+@return TRUE if freeing completed, except the header page */
ibool
-fseg_free_step_not_header(
-/*======================*/
+fseg_free_step_not_header_func(
fseg_header_t* header, /*!< in: segment header which must reside on
the first fragment page of the segment */
+#ifdef BTR_CUR_HASH_ADAPT
+ bool ahi, /*!< in: whether we may need to drop
+ the adaptive hash index */
+#endif /* BTR_CUR_HASH_ADAPT */
mtr_t* mtr) /*!< in/out: mini-transaction */
{
ulint n;
ulint page;
xdes_t* descr;
fseg_inode_t* inode;
- ulint space;
- ulint flags;
- ulint zip_size;
+ ulint space_id;
ulint page_no;
- rw_lock_t* latch;
- space = page_get_space_id(page_align(header));
+ space_id = page_get_space_id(page_align(header));
+ ut_ad(mtr->is_named_space(space_id));
- latch = fil_space_get_latch(space, &flags);
- zip_size = fsp_flags_get_zip_size(flags);
+ fil_space_t* space = mtr_x_lock_space(space_id, mtr);
+ const page_size_t page_size(space->flags);
+ buf_block_t* iblock;
- mtr_x_lock(latch, mtr);
+ inode = fseg_inode_get(header, space_id, page_size, mtr, &iblock);
+ fil_block_check_type(*iblock, FIL_PAGE_INODE, mtr);
- inode = fseg_inode_get(header, space, zip_size, mtr);
-
- descr = fseg_get_first_extent(inode, space, zip_size, mtr);
+ descr = fseg_get_first_extent(inode, space, page_size, mtr);
if (descr != NULL) {
/* Free the extent held by the segment */
page = xdes_get_offset(descr);
- fseg_free_extent(inode, space, zip_size, page, mtr);
+ fseg_free_extent(inode, space, page_size, page, ahi, mtr);
return(FALSE);
}
@@ -3425,194 +3401,55 @@ fseg_free_step_not_header(
return(TRUE);
}
- fseg_free_page_low(inode, space, zip_size, page_no, mtr);
+ fseg_free_page_low(inode, space, page_no, page_size, ahi, mtr);
return(FALSE);
}
-/**********************************************************************//**
-Returns the first extent descriptor for a segment. We think of the extent
-lists of the segment catenated in the order FSEG_FULL -> FSEG_NOT_FULL
--> FSEG_FREE.
-@return the first extent descriptor, or NULL if none */
+/** Returns the first extent descriptor for a segment.
+We think of the extent lists of the segment catenated in the order
+FSEG_FULL -> FSEG_NOT_FULL -> FSEG_FREE.
+@param[in] inode segment inode
+@param[in] space tablespace
+@param[in] page_size page size
+@param[in,out] mtr mini-transaction
+@return the first extent descriptor, or NULL if none */
+MY_ATTRIBUTE((nonnull, warn_unused_result))
static
xdes_t*
fseg_get_first_extent(
-/*==================*/
- fseg_inode_t* inode, /*!< in: segment inode */
- ulint space, /*!< in: space id */
- ulint zip_size,/*!< in: compressed page size in bytes
- or 0 for uncompressed pages */
- mtr_t* mtr) /*!< in/out: mini-transaction */
+ fseg_inode_t* inode,
+ const fil_space_t* space,
+ const page_size_t& page_size,
+ mtr_t* mtr)
{
fil_addr_t first;
- xdes_t* descr;
- ut_ad(inode && mtr);
-
- ut_ad(space == page_get_space_id(page_align(inode)));
+ ut_ad(space->id == page_get_space_id(page_align(inode)));
ut_ad(mach_read_from_4(inode + FSEG_MAGIC_N) == FSEG_MAGIC_N_VALUE);
- first = fil_addr_null;
-
- if (flst_get_len(inode + FSEG_FULL, mtr) > 0) {
+ if (flst_get_len(inode + FSEG_FULL) > 0) {
first = flst_get_first(inode + FSEG_FULL, mtr);
- } else if (flst_get_len(inode + FSEG_NOT_FULL, mtr) > 0) {
+ } else if (flst_get_len(inode + FSEG_NOT_FULL) > 0) {
first = flst_get_first(inode + FSEG_NOT_FULL, mtr);
- } else if (flst_get_len(inode + FSEG_FREE, mtr) > 0) {
+ } else if (flst_get_len(inode + FSEG_FREE) > 0) {
first = flst_get_first(inode + FSEG_FREE, mtr);
- }
-
- if (first.page == FIL_NULL) {
-
+ } else {
return(NULL);
}
- descr = xdes_lst_get_descriptor(space, zip_size, first, mtr);
- return(descr);
-}
-
-/*******************************************************************//**
-Validates a segment.
-@return TRUE if ok */
-static
-ibool
-fseg_validate_low(
-/*==============*/
- fseg_inode_t* inode, /*!< in: segment inode */
- mtr_t* mtr2) /*!< in/out: mini-transaction */
-{
- ulint space;
- ib_id_t seg_id;
- mtr_t mtr;
- xdes_t* descr;
- fil_addr_t node_addr;
- ulint n_used = 0;
- ulint n_used2 = 0;
-
- ut_ad(mtr_memo_contains_page(mtr2, inode, MTR_MEMO_PAGE_X_FIX));
- ut_ad(mach_read_from_4(inode + FSEG_MAGIC_N) == FSEG_MAGIC_N_VALUE);
-
- space = page_get_space_id(page_align(inode));
-
- seg_id = mach_read_from_8(inode + FSEG_ID);
- n_used = mtr_read_ulint(inode + FSEG_NOT_FULL_N_USED,
- MLOG_4BYTES, mtr2);
- flst_validate(inode + FSEG_FREE, mtr2);
- flst_validate(inode + FSEG_NOT_FULL, mtr2);
- flst_validate(inode + FSEG_FULL, mtr2);
-
- /* Validate FSEG_FREE list */
- node_addr = flst_get_first(inode + FSEG_FREE, mtr2);
-
- while (!fil_addr_is_null(node_addr)) {
- ulint flags;
- ulint zip_size;
-
- mtr_start(&mtr);
- mtr_x_lock(fil_space_get_latch(space, &flags), &mtr);
- zip_size = fsp_flags_get_zip_size(flags);
-
- descr = xdes_lst_get_descriptor(space, zip_size,
- node_addr, &mtr);
-
- ut_a(xdes_get_n_used(descr, &mtr) == 0);
- ut_a(xdes_get_state(descr, &mtr) == XDES_FSEG);
- ut_a(mach_read_from_8(descr + XDES_ID) == seg_id);
-
- node_addr = flst_get_next_addr(descr + XDES_FLST_NODE, &mtr);
- mtr_commit(&mtr);
- }
-
- /* Validate FSEG_NOT_FULL list */
-
- node_addr = flst_get_first(inode + FSEG_NOT_FULL, mtr2);
-
- while (!fil_addr_is_null(node_addr)) {
- ulint flags;
- ulint zip_size;
-
- mtr_start(&mtr);
- mtr_x_lock(fil_space_get_latch(space, &flags), &mtr);
- zip_size = fsp_flags_get_zip_size(flags);
-
- descr = xdes_lst_get_descriptor(space, zip_size,
- node_addr, &mtr);
-
- ut_a(xdes_get_n_used(descr, &mtr) > 0);
- ut_a(xdes_get_n_used(descr, &mtr) < FSP_EXTENT_SIZE);
- ut_a(xdes_get_state(descr, &mtr) == XDES_FSEG);
- ut_a(mach_read_from_8(descr + XDES_ID) == seg_id);
-
- n_used2 += xdes_get_n_used(descr, &mtr);
-
- node_addr = flst_get_next_addr(descr + XDES_FLST_NODE, &mtr);
- mtr_commit(&mtr);
- }
-
- /* Validate FSEG_FULL list */
-
- node_addr = flst_get_first(inode + FSEG_FULL, mtr2);
+ ut_ad(first.page != FIL_NULL);
- while (!fil_addr_is_null(node_addr)) {
- ulint flags;
- ulint zip_size;
-
- mtr_start(&mtr);
- mtr_x_lock(fil_space_get_latch(space, &flags), &mtr);
- zip_size = fsp_flags_get_zip_size(flags);
-
- descr = xdes_lst_get_descriptor(space, zip_size,
- node_addr, &mtr);
-
- ut_a(xdes_get_n_used(descr, &mtr) == FSP_EXTENT_SIZE);
- ut_a(xdes_get_state(descr, &mtr) == XDES_FSEG);
- ut_a(mach_read_from_8(descr + XDES_ID) == seg_id);
-
- node_addr = flst_get_next_addr(descr + XDES_FLST_NODE, &mtr);
- mtr_commit(&mtr);
- }
-
- ut_a(n_used == n_used2);
-
- return(TRUE);
+ return(first.page == FIL_NULL ? NULL
+ : xdes_lst_get_descriptor(space, page_size, first, mtr));
}
-#ifdef UNIV_DEBUG
-/*******************************************************************//**
-Validates a segment.
-@return TRUE if ok */
-UNIV_INTERN
-ibool
-fseg_validate(
-/*==========*/
- fseg_header_t* header, /*!< in: segment header */
- mtr_t* mtr) /*!< in/out: mini-transaction */
-{
- fseg_inode_t* inode;
- ibool ret;
- ulint space;
- ulint flags;
- ulint zip_size;
-
- space = page_get_space_id(page_align(header));
-
- mtr_x_lock(fil_space_get_latch(space, &flags), mtr);
- zip_size = fsp_flags_get_zip_size(flags);
-
- inode = fseg_inode_get(header, space, zip_size, mtr);
-
- ret = fseg_validate_low(inode, mtr);
-
- return(ret);
-}
-#endif /* UNIV_DEBUG */
-
+#ifdef UNIV_BTR_PRINT
/*******************************************************************//**
Writes info of a segment. */
static
@@ -3633,7 +3470,7 @@ fseg_print_low(
ulint page_no;
ib_id_t seg_id;
- ut_ad(mtr_memo_contains_page(mtr, inode, MTR_MEMO_PAGE_X_FIX));
+ ut_ad(mtr_memo_contains_page(mtr, inode, MTR_MEMO_PAGE_SX_FIX));
space = page_get_space_id(page_align(inode));
page_no = page_get_page_no(page_align(inode));
@@ -3644,27 +3481,24 @@ fseg_print_low(
n_used = mtr_read_ulint(inode + FSEG_NOT_FULL_N_USED,
MLOG_4BYTES, mtr);
n_frag = fseg_get_n_frag_pages(inode, mtr);
- n_free = flst_get_len(inode + FSEG_FREE, mtr);
- n_not_full = flst_get_len(inode + FSEG_NOT_FULL, mtr);
- n_full = flst_get_len(inode + FSEG_FULL, mtr);
-
- fprintf(stderr,
- "SEGMENT id %llu space %lu; page %lu;"
- " res %lu used %lu; full ext %lu\n"
- "fragm pages %lu; free extents %lu;"
- " not full extents %lu: pages %lu\n",
- (ullint) seg_id,
- (ulong) space, (ulong) page_no,
- (ulong) reserved, (ulong) used, (ulong) n_full,
- (ulong) n_frag, (ulong) n_free, (ulong) n_not_full,
- (ulong) n_used);
+ n_free = flst_get_len(inode + FSEG_FREE);
+ n_not_full = flst_get_len(inode + FSEG_NOT_FULL);
+ n_full = flst_get_len(inode + FSEG_FULL);
+
+ ib::info() << "SEGMENT id " << seg_id
+ << " space " << space << ";"
+ << " page " << page_no << ";"
+ << " res " << reserved << " used " << used << ";"
+ << " full ext " << n_full << ";"
+ << " fragm pages " << n_frag << ";"
+ << " free extents " << n_free << ";"
+ << " not full extents " << n_not_full << ": pages " << n_used;
+
ut_ad(mach_read_from_4(inode + FSEG_MAGIC_N) == FSEG_MAGIC_N_VALUE);
}
-#ifdef UNIV_BTR_PRINT
/*******************************************************************//**
Writes info of a segment. */
-UNIV_INTERN
void
fseg_print(
/*=======*/
@@ -3672,461 +3506,36 @@ fseg_print(
mtr_t* mtr) /*!< in/out: mini-transaction */
{
fseg_inode_t* inode;
- ulint space;
- ulint flags;
- ulint zip_size;
-
- space = page_get_space_id(page_align(header));
+ ulint space_id;
- mtr_x_lock(fil_space_get_latch(space, &flags), mtr);
- zip_size = fsp_flags_get_zip_size(flags);
+ space_id = page_get_space_id(page_align(header));
+ const fil_space_t* space = mtr_x_lock_space(space_id, mtr);
+ const page_size_t page_size(space->flags);
- inode = fseg_inode_get(header, space, zip_size, mtr);
+ inode = fseg_inode_get(header, space_id, page_size, mtr);
fseg_print_low(inode, mtr);
}
#endif /* UNIV_BTR_PRINT */
-/*******************************************************************//**
-Validates the file space system and its segments.
-@return TRUE if ok */
-UNIV_INTERN
-ibool
-fsp_validate(
-/*=========*/
- ulint space) /*!< in: space id */
-{
- fsp_header_t* header;
- fseg_inode_t* seg_inode;
- page_t* seg_inode_page;
- rw_lock_t* latch;
- ulint size;
- ulint flags;
- ulint zip_size;
- ulint free_limit;
- ulint frag_n_used;
- mtr_t mtr;
- mtr_t mtr2;
- xdes_t* descr;
- fil_addr_t node_addr;
- fil_addr_t next_node_addr;
- ulint descr_count = 0;
- ulint n_used = 0;
- ulint n_used2 = 0;
- ulint n_full_frag_pages;
- ulint n;
- ulint seg_inode_len_free;
- ulint seg_inode_len_full;
-
- latch = fil_space_get_latch(space, &flags);
- zip_size = fsp_flags_get_zip_size(flags);
- ut_a(ut_is_2pow(zip_size));
- ut_a(zip_size <= UNIV_ZIP_SIZE_MAX);
- ut_a(!zip_size || zip_size >= UNIV_ZIP_SIZE_MIN);
-
- /* Start first a mini-transaction mtr2 to lock out all other threads
- from the fsp system */
- mtr_start(&mtr2);
- mtr_x_lock(latch, &mtr2);
-
- mtr_start(&mtr);
- mtr_x_lock(latch, &mtr);
-
- header = fsp_get_space_header(space, zip_size, &mtr);
-
- size = mtr_read_ulint(header + FSP_SIZE, MLOG_4BYTES, &mtr);
- free_limit = mtr_read_ulint(header + FSP_FREE_LIMIT,
- MLOG_4BYTES, &mtr);
- frag_n_used = mtr_read_ulint(header + FSP_FRAG_N_USED,
- MLOG_4BYTES, &mtr);
-
- n_full_frag_pages = FSP_EXTENT_SIZE
- * flst_get_len(header + FSP_FULL_FRAG, &mtr);
-
- if (UNIV_UNLIKELY(free_limit > size)) {
-
- ut_a(space != 0);
- ut_a(size < FSP_EXTENT_SIZE);
- }
-
- flst_validate(header + FSP_FREE, &mtr);
- flst_validate(header + FSP_FREE_FRAG, &mtr);
- flst_validate(header + FSP_FULL_FRAG, &mtr);
-
- mtr_commit(&mtr);
-
- /* Validate FSP_FREE list */
- mtr_start(&mtr);
- mtr_x_lock(latch, &mtr);
-
- header = fsp_get_space_header(space, zip_size, &mtr);
- node_addr = flst_get_first(header + FSP_FREE, &mtr);
-
- mtr_commit(&mtr);
-
- while (!fil_addr_is_null(node_addr)) {
- mtr_start(&mtr);
- mtr_x_lock(latch, &mtr);
-
- descr_count++;
- descr = xdes_lst_get_descriptor(space, zip_size,
- node_addr, &mtr);
-
- ut_a(xdes_get_n_used(descr, &mtr) == 0);
- ut_a(xdes_get_state(descr, &mtr) == XDES_FREE);
-
- node_addr = flst_get_next_addr(descr + XDES_FLST_NODE, &mtr);
- mtr_commit(&mtr);
- }
-
- /* Validate FSP_FREE_FRAG list */
- mtr_start(&mtr);
- mtr_x_lock(latch, &mtr);
-
- header = fsp_get_space_header(space, zip_size, &mtr);
- node_addr = flst_get_first(header + FSP_FREE_FRAG, &mtr);
-
- mtr_commit(&mtr);
-
- while (!fil_addr_is_null(node_addr)) {
- mtr_start(&mtr);
- mtr_x_lock(latch, &mtr);
-
- descr_count++;
- descr = xdes_lst_get_descriptor(space, zip_size,
- node_addr, &mtr);
-
- ut_a(xdes_get_n_used(descr, &mtr) > 0);
- ut_a(xdes_get_n_used(descr, &mtr) < FSP_EXTENT_SIZE);
- ut_a(xdes_get_state(descr, &mtr) == XDES_FREE_FRAG);
-
- n_used += xdes_get_n_used(descr, &mtr);
- node_addr = flst_get_next_addr(descr + XDES_FLST_NODE, &mtr);
-
- mtr_commit(&mtr);
- }
-
- /* Validate FSP_FULL_FRAG list */
- mtr_start(&mtr);
- mtr_x_lock(latch, &mtr);
-
- header = fsp_get_space_header(space, zip_size, &mtr);
- node_addr = flst_get_first(header + FSP_FULL_FRAG, &mtr);
-
- mtr_commit(&mtr);
-
- while (!fil_addr_is_null(node_addr)) {
- mtr_start(&mtr);
- mtr_x_lock(latch, &mtr);
-
- descr_count++;
- descr = xdes_lst_get_descriptor(space, zip_size,
- node_addr, &mtr);
-
- ut_a(xdes_get_n_used(descr, &mtr) == FSP_EXTENT_SIZE);
- ut_a(xdes_get_state(descr, &mtr) == XDES_FULL_FRAG);
-
- node_addr = flst_get_next_addr(descr + XDES_FLST_NODE, &mtr);
- mtr_commit(&mtr);
- }
-
- /* Validate segments */
- mtr_start(&mtr);
- mtr_x_lock(latch, &mtr);
-
- header = fsp_get_space_header(space, zip_size, &mtr);
-
- node_addr = flst_get_first(header + FSP_SEG_INODES_FULL, &mtr);
-
- seg_inode_len_full = flst_get_len(header + FSP_SEG_INODES_FULL, &mtr);
-
- mtr_commit(&mtr);
-
- while (!fil_addr_is_null(node_addr)) {
-
- n = 0;
- do {
- mtr_start(&mtr);
- mtr_x_lock(latch, &mtr);
-
- seg_inode_page = fut_get_ptr(
- space, zip_size, node_addr, RW_X_LATCH, &mtr)
- - FSEG_INODE_PAGE_NODE;
-
- seg_inode = fsp_seg_inode_page_get_nth_inode(
- seg_inode_page, n, zip_size, &mtr);
- ut_a(mach_read_from_8(seg_inode + FSEG_ID) != 0);
- fseg_validate_low(seg_inode, &mtr);
-
- descr_count += flst_get_len(seg_inode + FSEG_FREE,
- &mtr);
- descr_count += flst_get_len(seg_inode + FSEG_FULL,
- &mtr);
- descr_count += flst_get_len(seg_inode + FSEG_NOT_FULL,
- &mtr);
-
- n_used2 += fseg_get_n_frag_pages(seg_inode, &mtr);
-
- next_node_addr = flst_get_next_addr(
- seg_inode_page + FSEG_INODE_PAGE_NODE, &mtr);
- mtr_commit(&mtr);
- } while (++n < FSP_SEG_INODES_PER_PAGE(zip_size));
-
- node_addr = next_node_addr;
- }
-
- mtr_start(&mtr);
- mtr_x_lock(latch, &mtr);
-
- header = fsp_get_space_header(space, zip_size, &mtr);
-
- node_addr = flst_get_first(header + FSP_SEG_INODES_FREE, &mtr);
-
- seg_inode_len_free = flst_get_len(header + FSP_SEG_INODES_FREE, &mtr);
-
- mtr_commit(&mtr);
-
- while (!fil_addr_is_null(node_addr)) {
-
- n = 0;
-
- do {
- mtr_start(&mtr);
- mtr_x_lock(latch, &mtr);
-
- seg_inode_page = fut_get_ptr(
- space, zip_size, node_addr, RW_X_LATCH, &mtr)
- - FSEG_INODE_PAGE_NODE;
-
- seg_inode = fsp_seg_inode_page_get_nth_inode(
- seg_inode_page, n, zip_size, &mtr);
- if (mach_read_from_8(seg_inode + FSEG_ID)) {
- fseg_validate_low(seg_inode, &mtr);
-
- descr_count += flst_get_len(
- seg_inode + FSEG_FREE, &mtr);
- descr_count += flst_get_len(
- seg_inode + FSEG_FULL, &mtr);
- descr_count += flst_get_len(
- seg_inode + FSEG_NOT_FULL, &mtr);
- n_used2 += fseg_get_n_frag_pages(
- seg_inode, &mtr);
- }
-
- next_node_addr = flst_get_next_addr(
- seg_inode_page + FSEG_INODE_PAGE_NODE, &mtr);
- mtr_commit(&mtr);
- } while (++n < FSP_SEG_INODES_PER_PAGE(zip_size));
-
- node_addr = next_node_addr;
- }
-
- ut_a(descr_count * FSP_EXTENT_SIZE == free_limit);
- if (!zip_size) {
- ut_a(n_used + n_full_frag_pages
- == n_used2 + 2 * ((free_limit + (UNIV_PAGE_SIZE - 1))
- / UNIV_PAGE_SIZE)
- + seg_inode_len_full + seg_inode_len_free);
- } else {
- ut_a(n_used + n_full_frag_pages
- == n_used2 + 2 * ((free_limit + (zip_size - 1))
- / zip_size)
- + seg_inode_len_full + seg_inode_len_free);
- }
- ut_a(frag_n_used == n_used);
-
- mtr_commit(&mtr2);
-
- return(TRUE);
-}
-
-/*******************************************************************//**
-Prints info of a file space. */
-UNIV_INTERN
-void
-fsp_print(
-/*======*/
- ulint space) /*!< in: space id */
-{
- fsp_header_t* header;
- fseg_inode_t* seg_inode;
- page_t* seg_inode_page;
- rw_lock_t* latch;
- ulint flags;
- ulint zip_size;
- ulint size;
- ulint free_limit;
- ulint frag_n_used;
- fil_addr_t node_addr;
- fil_addr_t next_node_addr;
- ulint n_free;
- ulint n_free_frag;
- ulint n_full_frag;
- ib_id_t seg_id;
- ulint n;
- ulint n_segs = 0;
- mtr_t mtr;
- mtr_t mtr2;
-
- latch = fil_space_get_latch(space, &flags);
- zip_size = fsp_flags_get_zip_size(flags);
-
- /* Start first a mini-transaction mtr2 to lock out all other threads
- from the fsp system */
-
- mtr_start(&mtr2);
-
- mtr_x_lock(latch, &mtr2);
-
- mtr_start(&mtr);
-
- mtr_x_lock(latch, &mtr);
-
- header = fsp_get_space_header(space, zip_size, &mtr);
-
- size = mtr_read_ulint(header + FSP_SIZE, MLOG_4BYTES, &mtr);
-
- free_limit = mtr_read_ulint(header + FSP_FREE_LIMIT, MLOG_4BYTES,
- &mtr);
- frag_n_used = mtr_read_ulint(header + FSP_FRAG_N_USED, MLOG_4BYTES,
- &mtr);
- n_free = flst_get_len(header + FSP_FREE, &mtr);
- n_free_frag = flst_get_len(header + FSP_FREE_FRAG, &mtr);
- n_full_frag = flst_get_len(header + FSP_FULL_FRAG, &mtr);
-
- seg_id = mach_read_from_8(header + FSP_SEG_ID);
-
- fprintf(stderr,
- "FILE SPACE INFO: id %lu\n"
- "size %lu, free limit %lu, free extents %lu\n"
- "not full frag extents %lu: used pages %lu,"
- " full frag extents %lu\n"
- "first seg id not used %llu\n",
- (ulong) space,
- (ulong) size, (ulong) free_limit, (ulong) n_free,
- (ulong) n_free_frag, (ulong) frag_n_used, (ulong) n_full_frag,
- (ullint) seg_id);
-
- mtr_commit(&mtr);
-
- /* Print segments */
-
- mtr_start(&mtr);
- mtr_x_lock(latch, &mtr);
-
- header = fsp_get_space_header(space, zip_size, &mtr);
-
- node_addr = flst_get_first(header + FSP_SEG_INODES_FULL, &mtr);
-
- mtr_commit(&mtr);
-
- while (!fil_addr_is_null(node_addr)) {
-
- n = 0;
-
- do {
-
- mtr_start(&mtr);
- mtr_x_lock(latch, &mtr);
-
- seg_inode_page = fut_get_ptr(
- space, zip_size, node_addr, RW_X_LATCH, &mtr)
- - FSEG_INODE_PAGE_NODE;
-
- seg_inode = fsp_seg_inode_page_get_nth_inode(
- seg_inode_page, n, zip_size, &mtr);
- ut_a(mach_read_from_8(seg_inode + FSEG_ID) != 0);
- fseg_print_low(seg_inode, &mtr);
-
- n_segs++;
-
- next_node_addr = flst_get_next_addr(
- seg_inode_page + FSEG_INODE_PAGE_NODE, &mtr);
- mtr_commit(&mtr);
- } while (++n < FSP_SEG_INODES_PER_PAGE(zip_size));
-
- node_addr = next_node_addr;
- }
-
- mtr_start(&mtr);
- mtr_x_lock(latch, &mtr);
-
- header = fsp_get_space_header(space, zip_size, &mtr);
-
- node_addr = flst_get_first(header + FSP_SEG_INODES_FREE, &mtr);
-
- mtr_commit(&mtr);
-
- while (!fil_addr_is_null(node_addr)) {
-
- n = 0;
-
- do {
-
- mtr_start(&mtr);
- mtr_x_lock(latch, &mtr);
-
- seg_inode_page = fut_get_ptr(
- space, zip_size, node_addr, RW_X_LATCH, &mtr)
- - FSEG_INODE_PAGE_NODE;
-
- seg_inode = fsp_seg_inode_page_get_nth_inode(
- seg_inode_page, n, zip_size, &mtr);
- if (mach_read_from_8(seg_inode + FSEG_ID)) {
-
- fseg_print_low(seg_inode, &mtr);
- n_segs++;
- }
-
- next_node_addr = flst_get_next_addr(
- seg_inode_page + FSEG_INODE_PAGE_NODE, &mtr);
- mtr_commit(&mtr);
- } while (++n < FSP_SEG_INODES_PER_PAGE(zip_size));
-
- node_addr = next_node_addr;
- }
-
- mtr_commit(&mtr2);
-
- fprintf(stderr, "NUMBER of file segments: %lu\n", (ulong) n_segs);
-}
-#endif /* !UNIV_HOTBACKUP */
-
-/**********************************************************************//**
-Compute offset after xdes where crypt data can be stored
-@param[in] zip_size Compressed size or 0
-@return offset */
-ulint
-fsp_header_get_crypt_offset(
- const ulint zip_size)
-{
- return (FSP_HEADER_OFFSET + (XDES_ARR_OFFSET + XDES_SIZE *
- (zip_size ? zip_size : UNIV_PAGE_SIZE) / FSP_EXTENT_SIZE));
-}
-
-/**********************************************************************//**
-Checks if a single page is free.
-@return true if free */
-UNIV_INTERN
-bool
-fsp_page_is_free_func(
-/*==============*/
- ulint space, /*!< in: space id */
- ulint page_no, /*!< in: page offset */
- mtr_t* mtr, /*!< in/out: mini-transaction */
- const char *file,
- ulint line)
+#ifdef UNIV_DEBUG
+/** Print the file segment header to the given output stream.
+@param[in] out the output stream into which the object is printed.
+@retval the output stream into which the object was printed. */
+std::ostream&
+fseg_header::to_stream(std::ostream& out) const
{
- ulint flags;
+ const ulint space = mtr_read_ulint(m_header + FSEG_HDR_SPACE,
+ MLOG_4BYTES, m_mtr);
+ const ulint page_no = mtr_read_ulint(m_header + FSEG_HDR_PAGE_NO,
+ MLOG_4BYTES, m_mtr);
- ut_ad(mtr);
+ const ulint offset = mtr_read_ulint(m_header + FSEG_HDR_OFFSET,
+ MLOG_2BYTES, m_mtr);
- mtr_x_lock_func(fil_space_get_latch(space, &flags), file, line, mtr);
- ulint zip_size = fsp_flags_get_zip_size(flags);
+ out << "[fseg_header_t: space=" << space << ", page="
+ << page_no << ", offset=" << offset << "]";
- xdes_t* descr = xdes_get_descriptor(space, zip_size, page_no, mtr);
- ut_a(descr);
-
- return xdes_mtr_get_bit(
- descr, XDES_FREE_BIT, page_no % FSP_EXTENT_SIZE, mtr);
+ return(out);
}
+#endif /* UNIV_DEBUG */
diff --git a/storage/innobase/fsp/fsp0space.cc b/storage/innobase/fsp/fsp0space.cc
new file mode 100644
index 00000000000..757eeaf90ae
--- /dev/null
+++ b/storage/innobase/fsp/fsp0space.cc
@@ -0,0 +1,218 @@
+/*****************************************************************************
+
+Copyright (c) 2013, 2015, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2017, MariaDB Corporation.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file fsp/fsp0space.cc
+Shared tablespace implementation.
+
+Created 2012-11-16 by Sunny Bains as srv/srv0space.cc
+*******************************************************/
+
+#include "fsp0sysspace.h"
+#include "fsp0fsp.h"
+#include "os0file.h"
+#include "my_sys.h"
+
+/** Check if two tablespaces have common data file names.
+@param other_space Tablespace to check against this.
+@return true if they have the same data filenames and paths */
+bool
+Tablespace::intersection(
+ const Tablespace* other_space)
+{
+ for (files_t::const_iterator it(other_space->begin()),
+ end(other_space->end()); it != end; ++it) {
+
+ if (find(it->m_filename)) {
+
+ return(true);
+ }
+ }
+
+ return(false);
+}
+
+/** Frees the memory allocated by the SysTablespace object. */
+void
+Tablespace::shutdown()
+{
+ for (iterator it = begin(); it != end(); ++it) {
+ it->shutdown();
+ }
+
+ m_files.clear();
+ ut_free(m_path);
+ m_path = NULL;
+ m_space_id = ULINT_UNDEFINED;
+}
+
+/** Note that the data file was found.
+@param[in,out] file Data file object to set */
+void
+Tablespace::file_found(Datafile& file)
+{
+ /* Note that the file exists and can be opened
+ in the appropriate mode. */
+ file.m_exists = true;
+
+ file.set_open_flags(
+ &file == &m_files.front()
+ ? OS_FILE_OPEN_RETRY : OS_FILE_OPEN);
+}
+
+/** Open or Create the data files if they do not exist.
+@param[in] is_temp whether this is a temporary tablespace
+@return DB_SUCCESS or error code */
+dberr_t
+Tablespace::open_or_create(bool is_temp)
+{
+ fil_space_t* space = NULL;
+ dberr_t err = DB_SUCCESS;
+
+ ut_ad(!m_files.empty());
+
+ for (iterator it = begin(); it != end(); ++it) {
+
+ if (it->m_exists) {
+ err = it->open_or_create(
+ m_ignore_read_only
+ ? false : srv_read_only_mode);
+ } else {
+ err = it->open_or_create(
+ m_ignore_read_only
+ ? false : srv_read_only_mode);
+
+ /* Set the correct open flags now that we have
+ successfully created the file. */
+ if (err == DB_SUCCESS) {
+ file_found(*it);
+ }
+ }
+
+ if (err != DB_SUCCESS) {
+ break;
+ }
+
+ /* We can close the handle now and open the tablespace
+ the proper way. */
+ it->close();
+
+ if (it == begin()) {
+ /* First data file. */
+
+ /* Create the tablespace entry for the multi-file
+ tablespace in the tablespace manager. */
+ space = fil_space_create(
+ m_name, m_space_id, FSP_FLAGS_PAGE_SSIZE(),
+ is_temp
+ ? FIL_TYPE_TEMPORARY : FIL_TYPE_TABLESPACE,
+ NULL);
+ if (!space) {
+ return DB_ERROR;
+ }
+ }
+
+ ut_a(fil_validate());
+
+ space->add(it->m_filepath, OS_FILE_CLOSED, it->m_size,
+ false, true);
+ }
+
+ return(err);
+}
+
+/** Find a filename in the list of Datafiles for a tablespace
+@return true if the filename exists in the data files */
+bool
+Tablespace::find(const char* filename) const
+{
+ for (const_iterator it = begin(); it != end(); ++it) {
+
+ if (innobase_strcasecmp(filename, it->m_filename) == 0) {
+ return(true);
+ }
+ }
+
+ return(false);
+}
+
+/** Delete all the data files. */
+void
+Tablespace::delete_files()
+{
+ for (iterator it = begin(); it != end(); ++it) {
+
+ it->close();
+
+ bool file_pre_exists;
+ bool success = os_file_delete_if_exists(
+ innodb_data_file_key, it->m_filepath, &file_pre_exists);
+
+ if (success && file_pre_exists) {
+ ib::info() << "Removed temporary tablespace data"
+ " file: \"" << it->m_name << "\"";
+ }
+ }
+}
+
+/** Use the ADD DATAFILE path to create a Datafile object and add it to the
+front of m_files.
+Parse the datafile path into a path and a filename with extension 'ibd'.
+This datafile_path provided may or may not be an absolute path, but it
+must end with the extension .ibd and have a basename of at least 1 byte.
+
+Set tablespace m_path member and add a Datafile with the filename.
+@param[in] datafile_path full path of the tablespace file. */
+dberr_t
+Tablespace::add_datafile(
+ const char* datafile_added)
+{
+ /* The path provided ends in ".ibd". This was assured by
+ validate_create_tablespace_info() */
+ ut_d(const char* dot = strrchr(datafile_added, '.'));
+ ut_ad(dot != NULL && 0 == strcmp(dot, DOT_IBD));
+
+ char* filepath = mem_strdup(datafile_added);
+ os_normalize_path(filepath);
+
+ /* If the path is an absolute path, separate it onto m_path and a
+ basename. For relative paths, make the whole thing a basename so that
+ it can be appended to the datadir. */
+ bool is_abs_path = is_absolute_path(filepath);
+ size_t dirlen = (is_abs_path ? dirname_length(filepath) : 0);
+ const char* basename = filepath + dirlen;
+
+ /* If the pathname contains a directory separator, fill the
+ m_path member which is the default directory for files in this
+ tablespace. Leave it null otherwise. */
+ if (dirlen > 0) {
+ set_path(filepath, dirlen);
+ }
+
+ /* Now add a new Datafile and set the filepath
+ using the m_path created above. */
+ m_files.push_back(Datafile(m_name, m_flags,
+ FIL_IBD_FILE_INITIAL_SIZE, 0));
+ Datafile* datafile = &m_files.back();
+ datafile->make_filepath(m_path, basename, IBD);
+
+ ut_free(filepath);
+
+ return(DB_SUCCESS);
+}
diff --git a/storage/innobase/fsp/fsp0sysspace.cc b/storage/innobase/fsp/fsp0sysspace.cc
new file mode 100644
index 00000000000..dc5a27e2f2c
--- /dev/null
+++ b/storage/innobase/fsp/fsp0sysspace.cc
@@ -0,0 +1,1000 @@
+/*****************************************************************************
+
+Copyright (c) 2013, 2016, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file fsp/fsp0space.cc
+Multi file, shared, system tablespace implementation.
+
+Created 2012-11-16 by Sunny Bains as srv/srv0space.cc
+Refactored 2013-7-26 by Kevin Lewis
+*******************************************************/
+
+#include "fsp0sysspace.h"
+#include "srv0start.h"
+#include "trx0sys.h"
+#include "dict0load.h"
+#include "mem0mem.h"
+#include "os0file.h"
+#include "row0mysql.h"
+
+/** The server header file is included to access opt_initialize global variable.
+If server passes the option for create/open DB to SE, we should remove such
+direct reference to server header and global variable */
+#include "mysqld.h"
+
+/** The control info of the system tablespace. */
+SysTablespace srv_sys_space;
+
+/** The control info of a temporary table shared tablespace. */
+SysTablespace srv_tmp_space;
+
+/** If the last data file is auto-extended, we add this many pages to it
+at a time. We have to make this public because it is a config variable. */
+ulong sys_tablespace_auto_extend_increment;
+
+#ifdef UNIV_DEBUG
+/** Control if extra debug checks need to be done for temporary tablespace.
+Default = true that is disable such checks.
+This variable is not exposed to end-user but still kept as variable for
+developer to enable it during debug. */
+bool srv_skip_temp_table_checks_debug = true;
+#endif /* UNIV_DEBUG */
+
+/** Convert a numeric string that optionally ends in G or M or K,
+ to a number containing megabytes.
+@param[in] str String with a quantity in bytes
+@param[out] megs The number in megabytes
+@return next character in string */
+char*
+SysTablespace::parse_units(
+ char* ptr,
+ ulint* megs)
+{
+ char* endp;
+
+ *megs = strtoul(ptr, &endp, 10);
+
+ ptr = endp;
+
+ switch (*ptr) {
+ case 'G': case 'g':
+ *megs *= 1024;
+ /* fall through */
+ case 'M': case 'm':
+ ++ptr;
+ break;
+ case 'K': case 'k':
+ *megs /= 1024;
+ ++ptr;
+ break;
+ default:
+ *megs /= 1024 * 1024;
+ break;
+ }
+
+ return(ptr);
+}
+
+/** Parse the input params and populate member variables.
+@param[in] filepath path to data files
+@param[in] supports_raw true if the tablespace supports raw devices
+@return true on success parse */
+bool
+SysTablespace::parse_params(
+ const char* filepath_spec,
+ bool supports_raw)
+{
+ char* filepath;
+ ulint size;
+ char* input_str;
+ ulint n_files = 0;
+
+ ut_ad(m_last_file_size_max == 0);
+ ut_ad(!m_auto_extend_last_file);
+
+ char* new_str = mem_strdup(filepath_spec);
+ char* str = new_str;
+
+ input_str = str;
+
+ /*---------------------- PASS 1 ---------------------------*/
+ /* First calculate the number of data files and check syntax:
+ filepath:size[K |M | G];filepath:size[K |M | G]... .
+ Note that a Windows path may contain a drive name and a ':'. */
+ while (*str != '\0') {
+ filepath = str;
+
+ while ((*str != ':' && *str != '\0')
+ || (*str == ':'
+ && (*(str + 1) == '\\' || *(str + 1) == '/'
+ || *(str + 1) == ':'))) {
+ str++;
+ }
+
+ if (*str == '\0') {
+ ut_free(new_str);
+
+ ib::error()
+ << "syntax error in file path or size"
+ " specified is less than 1 megabyte";
+ return(false);
+ }
+
+ str++;
+
+ str = parse_units(str, &size);
+
+ if (0 == strncmp(str, ":autoextend",
+ (sizeof ":autoextend") - 1)) {
+
+ str += (sizeof ":autoextend") - 1;
+
+ if (0 == strncmp(str, ":max:",
+ (sizeof ":max:") - 1)) {
+
+ str += (sizeof ":max:") - 1;
+
+ str = parse_units(str, &size);
+ }
+
+ if (*str != '\0') {
+ ut_free(new_str);
+ ib::error()
+ << "syntax error in file path or"
+ << " size specified is less than"
+ << " 1 megabyte";
+ return(false);
+ }
+ }
+
+ if (::strlen(str) >= 6
+ && *str == 'n'
+ && *(str + 1) == 'e'
+ && *(str + 2) == 'w') {
+
+ if (!supports_raw) {
+ ib::error()
+ << "Tablespace doesn't support raw"
+ " devices";
+ ut_free(new_str);
+ return(false);
+ }
+
+ str += 3;
+ }
+
+ if (*str == 'r' && *(str + 1) == 'a' && *(str + 2) == 'w') {
+ str += 3;
+
+ if (!supports_raw) {
+ ib::error()
+ << "Tablespace doesn't support raw"
+ " devices";
+ ut_free(new_str);
+ return(false);
+ }
+ }
+
+ if (size == 0) {
+
+ ut_free(new_str);
+
+ ib::error()
+ << "syntax error in file path or size"
+ " specified is less than 1 megabyte";
+
+ return(false);
+ }
+
+ ++n_files;
+
+ if (*str == ';') {
+ str++;
+ } else if (*str != '\0') {
+ ut_free(new_str);
+
+ ib::error()
+ << "syntax error in file path or size"
+ " specified is less than 1 megabyte";
+ return(false);
+ }
+ }
+
+ if (n_files == 0) {
+
+ /* filepath_spec must contain at least one data file
+ definition */
+
+ ut_free(new_str);
+
+ ib::error()
+ << "syntax error in file path or size specified"
+ " is less than 1 megabyte";
+
+ return(false);
+ }
+
+ /*---------------------- PASS 2 ---------------------------*/
+ /* Then store the actual values to our arrays */
+ str = input_str;
+ ulint order = 0;
+
+ while (*str != '\0') {
+ filepath = str;
+
+ /* Note that we must step over the ':' in a Windows filepath;
+ a Windows path normally looks like C:\ibdata\ibdata1:1G, but
+ a Windows raw partition may have a specification like
+ \\.\C::1Gnewraw or \\.\PHYSICALDRIVE2:1Gnewraw */
+
+ while ((*str != ':' && *str != '\0')
+ || (*str == ':'
+ && (*(str + 1) == '\\' || *(str + 1) == '/'
+ || *(str + 1) == ':'))) {
+ str++;
+ }
+
+ if (*str == ':') {
+ /* Make filepath a null-terminated string */
+ *str = '\0';
+ str++;
+ }
+
+ str = parse_units(str, &size);
+
+ if (0 == strncmp(str, ":autoextend",
+ (sizeof ":autoextend") - 1)) {
+
+ m_auto_extend_last_file = true;
+
+ str += (sizeof ":autoextend") - 1;
+
+ if (0 == strncmp(str, ":max:",
+ (sizeof ":max:") - 1)) {
+
+ str += (sizeof ":max:") - 1;
+
+ str = parse_units(str, &m_last_file_size_max);
+ }
+
+ if (*str != '\0') {
+ ut_free(new_str);
+ ib::error() << "syntax error in file path or"
+ " size specified is less than 1"
+ " megabyte";
+ return(false);
+ }
+ }
+
+ m_files.push_back(Datafile(filepath, flags(), size, order));
+ Datafile* datafile = &m_files.back();
+ datafile->make_filepath(path(), filepath, NO_EXT);
+
+ if (::strlen(str) >= 6
+ && *str == 'n'
+ && *(str + 1) == 'e'
+ && *(str + 2) == 'w') {
+
+ ut_a(supports_raw);
+
+ str += 3;
+
+ /* Initialize new raw device only during initialize */
+ /* JAN: TODO: MySQL 5.7 used opt_initialize */
+ m_files.back().m_type =
+ opt_bootstrap ? SRV_NEW_RAW : SRV_OLD_RAW;
+ }
+
+ if (*str == 'r' && *(str + 1) == 'a' && *(str + 2) == 'w') {
+
+ ut_a(supports_raw);
+
+ str += 3;
+
+ /* Initialize new raw device only during initialize */
+ if (m_files.back().m_type == SRV_NOT_RAW) {
+ /* JAN: TODO: MySQL 5.7 used opt_initialize */
+ m_files.back().m_type =
+ opt_bootstrap ? SRV_NEW_RAW : SRV_OLD_RAW;
+ }
+ }
+
+ if (*str == ';') {
+ ++str;
+ }
+ order++;
+ }
+
+ ut_ad(n_files == ulint(m_files.size()));
+
+ ut_free(new_str);
+
+ return(true);
+}
+
+/** Frees the memory allocated by the parse method. */
+void
+SysTablespace::shutdown()
+{
+ Tablespace::shutdown();
+
+ m_auto_extend_last_file = 0;
+ m_last_file_size_max = 0;
+ m_created_new_raw = 0;
+ m_is_tablespace_full = false;
+ m_sanity_checks_done = false;
+}
+
+/** Verify the size of the physical file.
+@param[in] file data file object
+@return DB_SUCCESS if OK else error code. */
+dberr_t
+SysTablespace::check_size(
+ Datafile& file)
+{
+ os_offset_t size = os_file_get_size(file.m_handle);
+ ut_a(size != (os_offset_t) -1);
+
+ /* Under some error conditions like disk full scenarios
+ or file size reaching filesystem limit the data file
+ could contain an incomplete extent at the end. When we
+ extend a data file and if some failure happens, then
+ also the data file could contain an incomplete extent.
+ So we need to round the size downward to a megabyte.*/
+
+ const ulint rounded_size_pages = static_cast<ulint>(
+ size >> UNIV_PAGE_SIZE_SHIFT);
+
+ /* If last file */
+ if (&file == &m_files.back() && m_auto_extend_last_file) {
+
+ if (file.m_size > rounded_size_pages
+ || (m_last_file_size_max > 0
+ && m_last_file_size_max < rounded_size_pages)) {
+ ib::error() << "The Auto-extending " << name()
+ << " data file '" << file.filepath() << "' is"
+ " of a different size " << rounded_size_pages
+ << " pages than specified"
+ " in the .cnf file: initial " << file.m_size
+ << " pages, max " << m_last_file_size_max
+ << " (relevant if non-zero) pages!";
+ return(DB_ERROR);
+ }
+
+ file.m_size = rounded_size_pages;
+ }
+
+ if (rounded_size_pages != file.m_size) {
+ ib::error() << "The " << name() << " data file '"
+ << file.filepath() << "' is of a different size "
+ << rounded_size_pages << " pages"
+ " than the " << file.m_size << " pages specified in"
+ " the .cnf file!";
+ return(DB_ERROR);
+ }
+
+ return(DB_SUCCESS);
+}
+
+/** Set the size of the file.
+@param[in] file data file object
+@return DB_SUCCESS or error code */
+dberr_t
+SysTablespace::set_size(
+ Datafile& file)
+{
+ ut_a(!srv_read_only_mode || m_ignore_read_only);
+
+ /* We created the data file and now write it full of zeros */
+ ib::info() << "Setting file '" << file.filepath() << "' size to "
+ << (file.m_size >> (20 - UNIV_PAGE_SIZE_SHIFT)) << " MB."
+ " Physically writing the file full; Please wait ...";
+
+ bool success = os_file_set_size(
+ file.m_filepath, file.m_handle,
+ static_cast<os_offset_t>(file.m_size) << UNIV_PAGE_SIZE_SHIFT);
+
+ if (success) {
+ ib::info() << "File '" << file.filepath() << "' size is now "
+ << (file.m_size >> (20 - UNIV_PAGE_SIZE_SHIFT))
+ << " MB.";
+ } else {
+ ib::error() << "Could not set the file size of '"
+ << file.filepath() << "'. Probably out of disk space";
+
+ return(DB_ERROR);
+ }
+
+ return(DB_SUCCESS);
+}
+
+/** Create a data file.
+@param[in] file data file object
+@return DB_SUCCESS or error code */
+dberr_t
+SysTablespace::create_file(
+ Datafile& file)
+{
+ dberr_t err = DB_SUCCESS;
+
+ ut_a(!file.m_exists);
+ ut_a(!srv_read_only_mode || m_ignore_read_only);
+
+ switch (file.m_type) {
+ case SRV_NEW_RAW:
+
+ /* The partition is opened, not created; then it is
+ written over */
+ m_created_new_raw = true;
+
+ /* Fall through. */
+
+ case SRV_OLD_RAW:
+
+ srv_start_raw_disk_in_use = TRUE;
+
+ /* Fall through. */
+
+ case SRV_NOT_RAW:
+ err = file.open_or_create(
+ m_ignore_read_only ? false : srv_read_only_mode);
+ break;
+ }
+
+
+ if (err == DB_SUCCESS && file.m_type != SRV_OLD_RAW) {
+ err = set_size(file);
+ }
+
+ return(err);
+}
+
+/** Open a data file.
+@param[in] file data file object
+@return DB_SUCCESS or error code */
+dberr_t
+SysTablespace::open_file(
+ Datafile& file)
+{
+ dberr_t err = DB_SUCCESS;
+
+ ut_a(file.m_exists);
+
+ switch (file.m_type) {
+ case SRV_NEW_RAW:
+ /* The partition is opened, not created; then it is
+ written over */
+ m_created_new_raw = true;
+
+ /* Fall through */
+
+ case SRV_OLD_RAW:
+ srv_start_raw_disk_in_use = TRUE;
+
+ if (srv_read_only_mode && !m_ignore_read_only) {
+ ib::error() << "Can't open a raw device '"
+ << file.m_filepath << "' when"
+ " --innodb-read-only is set";
+
+ return(DB_ERROR);
+ }
+
+ /* Fall through */
+
+ case SRV_NOT_RAW:
+ err = file.open_or_create(
+ m_ignore_read_only ? false : srv_read_only_mode);
+
+ if (err != DB_SUCCESS) {
+ return(err);
+ }
+ break;
+ }
+
+ switch (file.m_type) {
+ case SRV_NEW_RAW:
+ /* Set file size for new raw device. */
+ err = set_size(file);
+ break;
+
+ case SRV_NOT_RAW:
+ /* Check file size for existing file. */
+ err = check_size(file);
+ break;
+
+ case SRV_OLD_RAW:
+ err = DB_SUCCESS;
+ break;
+
+ }
+
+ if (err != DB_SUCCESS) {
+ file.close();
+ }
+
+ return(err);
+}
+
+/** Check the tablespace header for this tablespace.
+@param[out] flushed_lsn the value of FIL_PAGE_FILE_FLUSH_LSN
+@return DB_SUCCESS or error code */
+dberr_t
+SysTablespace::read_lsn_and_check_flags(lsn_t* flushed_lsn)
+{
+ dberr_t err;
+
+ /* Only relevant for the system tablespace. */
+ ut_ad(space_id() == TRX_SYS_SPACE);
+
+ files_t::iterator it = m_files.begin();
+
+ ut_a(it->m_exists);
+
+ if (it->m_handle == OS_FILE_CLOSED) {
+
+ err = it->open_or_create(
+ m_ignore_read_only ? false : srv_read_only_mode);
+
+ if (err != DB_SUCCESS) {
+ return(err);
+ }
+ }
+
+ err = it->read_first_page(
+ m_ignore_read_only ? false : srv_read_only_mode);
+
+ if (err != DB_SUCCESS) {
+ return(err);
+ }
+
+ ut_a(it->order() == 0);
+
+ if (srv_operation == SRV_OPERATION_NORMAL) {
+ buf_dblwr_init_or_load_pages(it->handle(), it->filepath());
+ }
+
+ /* Check the contents of the first page of the
+ first datafile. */
+ for (int retry = 0; retry < 2; ++retry) {
+
+ err = it->validate_first_page(flushed_lsn);
+
+ if (err != DB_SUCCESS
+ && (retry == 1
+ || it->restore_from_doublewrite())) {
+
+ it->close();
+
+ return(err);
+ }
+ }
+
+ /* Make sure the tablespace space ID matches the
+ space ID on the first page of the first datafile. */
+ if (space_id() != it->m_space_id) {
+
+ ib::error()
+ << "The " << name() << " data file '" << it->name()
+ << "' has the wrong space ID. It should be "
+ << space_id() << ", but " << it->m_space_id
+ << " was found";
+
+ it->close();
+
+ return(err);
+ }
+
+ it->close();
+
+ return(DB_SUCCESS);
+}
+
+/** Check if a file can be opened in the correct mode.
+@param[in] file data file object
+@param[out] reason exact reason if file_status check failed.
+@return DB_SUCCESS or error code. */
+dberr_t
+SysTablespace::check_file_status(
+ const Datafile& file,
+ file_status_t& reason)
+{
+ os_file_stat_t stat;
+
+ memset(&stat, 0x0, sizeof(stat));
+
+ dberr_t err = os_file_get_status(
+ file.m_filepath, &stat, true,
+ m_ignore_read_only ? false : srv_read_only_mode);
+
+ reason = FILE_STATUS_VOID;
+ /* File exists but we can't read the rw-permission settings. */
+ switch (err) {
+ case DB_FAIL:
+ ib::error() << "os_file_get_status() failed on '"
+ << file.filepath()
+ << "'. Can't determine file permissions";
+ err = DB_ERROR;
+ reason = FILE_STATUS_RW_PERMISSION_ERROR;
+ break;
+
+ case DB_SUCCESS:
+
+ /* Note: stat.rw_perm is only valid for "regular" files */
+
+ if (stat.type == OS_FILE_TYPE_FILE) {
+
+ if (!stat.rw_perm) {
+ const char *p = (!srv_read_only_mode
+ || m_ignore_read_only)
+ ? "writable"
+ : "readable";
+
+ ib::error() << "The " << name() << " data file"
+ << " '" << file.name() << "' must be "
+ << p;
+
+ err = DB_ERROR;
+ reason = FILE_STATUS_READ_WRITE_ERROR;
+ }
+
+ } else {
+ /* Not a regular file, bail out. */
+ ib::error() << "The " << name() << " data file '"
+ << file.name() << "' is not a regular"
+ " InnoDB data file.";
+
+ err = DB_ERROR;
+ reason = FILE_STATUS_NOT_REGULAR_FILE_ERROR;
+ }
+ break;
+
+ case DB_NOT_FOUND:
+ break;
+
+ default:
+ ut_ad(0);
+ }
+
+ return(err);
+}
+
+/** Note that the data file was not found.
+@param[in] file data file object
+@param[out] create_new_db true if a new instance to be created
+@return DB_SUCESS or error code */
+dberr_t
+SysTablespace::file_not_found(
+ Datafile& file,
+ bool* create_new_db)
+{
+ file.m_exists = false;
+
+ if (srv_read_only_mode && !m_ignore_read_only) {
+ ib::error() << "Can't create file '" << file.filepath()
+ << "' when --innodb-read-only is set";
+
+ return(DB_ERROR);
+
+ } else if (&file == &m_files.front()) {
+
+ /* First data file. */
+ ut_a(!*create_new_db);
+ *create_new_db = TRUE;
+
+ if (space_id() == TRX_SYS_SPACE) {
+ ib::info() << "The first " << name() << " data file '"
+ << file.name() << "' did not exist."
+ " A new tablespace will be created!";
+ }
+
+ } else {
+ ib::info() << "Need to create a new " << name()
+ << " data file '" << file.name() << "'.";
+ }
+
+ /* Set the file create mode. */
+ switch (file.m_type) {
+ case SRV_NOT_RAW:
+ file.set_open_flags(OS_FILE_CREATE);
+ break;
+
+ case SRV_NEW_RAW:
+ case SRV_OLD_RAW:
+ file.set_open_flags(OS_FILE_OPEN_RAW);
+ break;
+ }
+
+ return(DB_SUCCESS);
+}
+
+/** Note that the data file was found.
+@param[in,out] file data file object
+@return true if a new instance to be created */
+bool
+SysTablespace::file_found(
+ Datafile& file)
+{
+ /* Note that the file exists and can be opened
+ in the appropriate mode. */
+ file.m_exists = true;
+
+ /* Set the file open mode */
+ switch (file.m_type) {
+ case SRV_NOT_RAW:
+ file.set_open_flags(
+ &file == &m_files.front()
+ ? OS_FILE_OPEN_RETRY : OS_FILE_OPEN);
+ break;
+
+ case SRV_NEW_RAW:
+ case SRV_OLD_RAW:
+ file.set_open_flags(OS_FILE_OPEN_RAW);
+ break;
+ }
+
+ /* Need to create the system tablespace for new raw device. */
+ return(file.m_type == SRV_NEW_RAW);
+}
+
+/** Check the data file specification.
+@param[out] create_new_db true if a new database is to be created
+@param[in] min_expected_size Minimum expected tablespace size in bytes
+@return DB_SUCCESS if all OK else error code */
+dberr_t
+SysTablespace::check_file_spec(
+ bool* create_new_db,
+ ulint min_expected_size)
+{
+ *create_new_db = FALSE;
+
+ if (m_files.size() >= 1000) {
+ ib::error() << "There must be < 1000 data files in "
+ << name() << " but " << m_files.size() << " have been"
+ " defined.";
+
+ return(DB_ERROR);
+ }
+
+ if (!m_auto_extend_last_file
+ && get_sum_of_sizes() < min_expected_size / UNIV_PAGE_SIZE) {
+
+ ib::error() << "Tablespace size must be at least "
+ << min_expected_size / (1024 * 1024) << " MB";
+
+ return(DB_ERROR);
+ }
+
+ dberr_t err = DB_SUCCESS;
+
+ ut_a(!m_files.empty());
+
+ /* If there is more than one data file and the last data file
+ doesn't exist, that is OK. We allow adding of new data files. */
+
+ files_t::iterator begin = m_files.begin();
+ files_t::iterator end = m_files.end();
+
+ for (files_t::iterator it = begin; it != end; ++it) {
+
+ file_status_t reason_if_failed;
+ err = check_file_status(*it, reason_if_failed);
+
+ if (err == DB_NOT_FOUND) {
+
+ err = file_not_found(*it, create_new_db);
+
+ if (err != DB_SUCCESS) {
+ break;
+ }
+
+ } else if (err != DB_SUCCESS) {
+ if (reason_if_failed == FILE_STATUS_READ_WRITE_ERROR) {
+ const char* p = (!srv_read_only_mode
+ || m_ignore_read_only)
+ ? "writable" : "readable";
+ ib::error() << "The " << name() << " data file"
+ << " '" << it->name() << "' must be "
+ << p;
+ }
+
+ ut_a(err != DB_FAIL);
+ break;
+
+ } else if (*create_new_db) {
+ ib::error() << "The " << name() << " data file '"
+ << begin->m_name << "' was not found but"
+ " one of the other data files '" << it->m_name
+ << "' exists.";
+
+ err = DB_ERROR;
+ break;
+
+ } else {
+ *create_new_db = file_found(*it);
+ }
+ }
+
+ return(err);
+}
+
+/** Open or create the data files
+@param[in] is_temp whether this is a temporary tablespace
+@param[in] create_new_db whether we are creating a new database
+@param[out] sum_new_sizes sum of sizes of the new files added
+@param[out] flush_lsn FIL_PAGE_FILE_FLUSH_LSN of first file
+@return DB_SUCCESS or error code */
+dberr_t
+SysTablespace::open_or_create(
+ bool is_temp,
+ bool create_new_db,
+ ulint* sum_new_sizes,
+ lsn_t* flush_lsn)
+{
+ dberr_t err = DB_SUCCESS;
+ fil_space_t* space = NULL;
+
+ ut_ad(!m_files.empty());
+
+ if (sum_new_sizes) {
+ *sum_new_sizes = 0;
+ }
+
+ files_t::iterator begin = m_files.begin();
+ files_t::iterator end = m_files.end();
+
+ ut_ad(begin->order() == 0);
+
+ for (files_t::iterator it = begin; it != end; ++it) {
+
+ if (it->m_exists) {
+ err = open_file(*it);
+
+ /* For new raw device increment new size. */
+ if (sum_new_sizes && it->m_type == SRV_NEW_RAW) {
+
+ *sum_new_sizes += it->m_size;
+ }
+
+ } else {
+ err = create_file(*it);
+
+ if (sum_new_sizes) {
+ *sum_new_sizes += it->m_size;
+ }
+
+ /* Set the correct open flags now that we have
+ successfully created the file. */
+ if (err == DB_SUCCESS) {
+ /* We ignore new_db OUT parameter here
+ as the information is known at this stage */
+ file_found(*it);
+ }
+ }
+
+ if (err != DB_SUCCESS) {
+ return(err);
+ }
+
+ }
+
+ if (!create_new_db && flush_lsn) {
+ /* Validate the header page in the first datafile
+ and read LSNs fom the others. */
+ err = read_lsn_and_check_flags(flush_lsn);
+ if (err != DB_SUCCESS) {
+ return(err);
+ }
+ }
+
+ /* Close the curent handles, add space and file info to the
+ fil_system cache and the Data Dictionary, and re-open them
+ in file_system cache so that they stay open until shutdown. */
+ ulint node_counter = 0;
+ for (files_t::iterator it = begin; it != end; ++it) {
+ it->close();
+ it->m_exists = true;
+
+ if (it == begin) {
+ /* First data file. */
+
+ /* Create the tablespace entry for the multi-file
+ tablespace in the tablespace manager. */
+ space = fil_space_create(
+ name(), space_id(), flags(), is_temp
+ ? FIL_TYPE_TEMPORARY : FIL_TYPE_TABLESPACE,
+ NULL);
+ if (!space) {
+ return DB_ERROR;
+ }
+ }
+
+ ut_a(fil_validate());
+
+ ulint max_size = (++node_counter == m_files.size()
+ ? (m_last_file_size_max == 0
+ ? ULINT_MAX
+ : m_last_file_size_max)
+ : it->m_size);
+
+ space->add(it->m_filepath, OS_FILE_CLOSED, it->m_size,
+ it->m_type != SRV_NOT_RAW, true, max_size);
+ }
+
+ return(err);
+}
+
+/** Normalize the file size, convert from megabytes to number of pages. */
+void
+SysTablespace::normalize()
+{
+ files_t::iterator end = m_files.end();
+
+ for (files_t::iterator it = m_files.begin(); it != end; ++it) {
+
+ it->m_size *= (1024 * 1024) / UNIV_PAGE_SIZE;
+ }
+
+ m_last_file_size_max *= (1024 * 1024) / UNIV_PAGE_SIZE;
+}
+
+
+/**
+@return next increment size */
+ulint
+SysTablespace::get_increment() const
+{
+ ulint increment;
+
+ if (m_last_file_size_max == 0) {
+ increment = get_autoextend_increment();
+ } else {
+
+ if (!is_valid_size()) {
+ ib::error() << "The last data file in " << name()
+ << " has a size of " << last_file_size()
+ << " but the max size allowed is "
+ << m_last_file_size_max;
+ }
+
+ increment = m_last_file_size_max - last_file_size();
+ }
+
+ if (increment > get_autoextend_increment()) {
+ increment = get_autoextend_increment();
+ }
+
+ return(increment);
+}
+
+
+/**
+@return true if configured to use raw devices */
+bool
+SysTablespace::has_raw_device()
+{
+ files_t::iterator end = m_files.end();
+
+ for (files_t::iterator it = m_files.begin(); it != end; ++it) {
+
+ if (it->is_raw_device()) {
+ return(true);
+ }
+ }
+
+ return(false);
+}
diff --git a/storage/innobase/fts/Makefile.query b/storage/innobase/fts/Makefile.query
index 12dcd833064..d91b1b92de1 100644
--- a/storage/innobase/fts/Makefile.query
+++ b/storage/innobase/fts/Makefile.query
@@ -9,23 +9,9 @@ fts0blex.cc: fts0blex.l
fts0tlex.cc: fts0tlex.l
.l.cc:
- $(LEX) -P$(subst lex,,$*) -o $*.cc --header-file=../include/$*.h $<
-
-.y.cc:
- $(YACC) -p $(PREFIX) -o $*.cc -d $<
- mv $*.h ../include
-LEX=flex
-YACC=bison
-PREFIX=fts
-
-all: fts0pars.cc fts0blex.cc fts0tlex.cc
-
-fts0par.cc: fts0pars.y
-fts0blex.cc: fts0blex.l
-fts0tlex.cc: fts0tlex.l
-
-.l.cc:
- $(LEX) -P$(subst lex,,$*) -o $*.cc --header-file=../include/$*.h $<
+ echo '#include "univ.i"' > $*.cc
+ $(LEX) --stdout -P$(subst lex,,$*) -o $*.cc \
+ --header-file=../include/$*.h $< >> $*.cc
.y.cc:
$(YACC) -p $(PREFIX) -o $*.cc -d $<
diff --git a/storage/innobase/fts/fts0ast.cc b/storage/innobase/fts/fts0ast.cc
index c69eabc0ff8..e22613a265b 100644
--- a/storage/innobase/fts/fts0ast.cc
+++ b/storage/innobase/fts/fts0ast.cc
@@ -24,7 +24,6 @@ Full Text Search parser helper file.
Created 2007/3/16 Sunny Bains.
***********************************************************************/
-#include "mem0mem.h"
#include "fts0ast.h"
#include "fts0pars.h"
#include "fts0fts.h"
@@ -51,16 +50,31 @@ fts_ast_node_create(void)
{
fts_ast_node_t* node;
- node = (fts_ast_node_t*) ut_malloc(sizeof(*node));
- memset(node, 0x0, sizeof(*node));
+ node = (fts_ast_node_t*) ut_zalloc_nokey(sizeof(*node));
return(node);
}
+/** Track node allocations, in case there is an error during parsing. */
+static
+void
+fts_ast_state_add_node(
+ fts_ast_state_t*state, /*!< in: ast instance */
+ fts_ast_node_t* node) /*!< in: node to add to ast */
+{
+ if (!state->list.head) {
+ ut_a(!state->list.tail);
+
+ state->list.head = state->list.tail = node;
+ } else {
+ state->list.tail->next_alloc = node;
+ state->list.tail = node;
+ }
+}
+
/******************************************************************//**
Create a operator fts_ast_node_t.
@return new node */
-UNIV_INTERN
fts_ast_node_t*
fts_ast_create_node_oper(
/*=====================*/
@@ -81,7 +95,6 @@ fts_ast_create_node_oper(
This function takes ownership of the ptr and is responsible
for free'ing it
@return new node or a node list with tokenized words */
-UNIV_INTERN
fts_ast_node_t*
fts_ast_create_node_term(
/*=====================*/
@@ -98,14 +111,12 @@ fts_ast_create_node_term(
/* Scan the incoming string and filter out any "non-word" characters */
while (cur_pos < len) {
fts_string_t str;
- ulint offset;
ulint cur_len;
cur_len = innobase_mysql_fts_get_token(
state->charset,
reinterpret_cast<const byte*>(ptr->str) + cur_pos,
- reinterpret_cast<const byte*>(ptr->str) + len,
- &str, &offset);
+ reinterpret_cast<const byte*>(ptr->str) + len, &str);
if (cur_len == 0) {
break;
@@ -154,10 +165,40 @@ fts_ast_create_node_term(
}
/******************************************************************//**
+Create an AST term node, makes a copy of ptr for plugin parser
+@return node */
+fts_ast_node_t*
+fts_ast_create_node_term_for_parser(
+/*================================*/
+ void* arg, /*!< in: ast state */
+ const char* ptr, /*!< in: term string */
+ const ulint len) /*!< in: term string length */
+{
+ fts_ast_node_t* node = NULL;
+
+ /* '%' as first char is forbidden for LIKE in internal SQL parser;
+ '%' as last char is reserved for wildcard search;*/
+ if (len == 0 || len > FTS_MAX_WORD_LEN
+ || ptr[0] == '%' || ptr[len - 1] == '%') {
+ return(NULL);
+ }
+
+ node = fts_ast_node_create();
+
+ node->type = FTS_AST_TERM;
+
+ node->term.ptr = fts_ast_string_create(
+ reinterpret_cast<const byte*>(ptr), len);
+
+ fts_ast_state_add_node(static_cast<fts_ast_state_t*>(arg), node);
+
+ return(node);
+}
+
+/******************************************************************//**
This function takes ownership of the ptr and is responsible
for free'ing it.
@return new node */
-UNIV_INTERN
fts_ast_node_t*
fts_ast_create_node_text(
/*=====================*/
@@ -196,10 +237,29 @@ fts_ast_create_node_text(
}
/******************************************************************//**
+Create an AST phrase list node for plugin parser
+@return node */
+fts_ast_node_t*
+fts_ast_create_node_phrase_list(
+/*============================*/
+ void* arg) /*!< in: ast state */
+{
+ fts_ast_node_t* node = fts_ast_node_create();
+
+ node->type = FTS_AST_PARSER_PHRASE_LIST;
+
+ node->text.distance = ULINT_UNDEFINED;
+ node->list.head = node->list.tail = NULL;
+
+ fts_ast_state_add_node(static_cast<fts_ast_state_t*>(arg), node);
+
+ return(node);
+}
+
+/******************************************************************//**
This function takes ownership of the expr and is responsible
for free'ing it.
@return new node */
-UNIV_INTERN
fts_ast_node_t*
fts_ast_create_node_list(
/*=====================*/
@@ -220,7 +280,6 @@ fts_ast_create_node_list(
Create a sub-expression list node. This function takes ownership of
expr and is responsible for deleting it.
@return new node */
-UNIV_INTERN
fts_ast_node_t*
fts_ast_create_node_subexp_list(
/*============================*/
@@ -246,7 +305,8 @@ fts_ast_free_list(
fts_ast_node_t* node) /*!< in: ast node to free */
{
ut_a(node->type == FTS_AST_LIST
- || node->type == FTS_AST_SUBEXP_LIST);
+ || node->type == FTS_AST_SUBEXP_LIST
+ || node->type == FTS_AST_PARSER_PHRASE_LIST);
for (node = node->list.head;
node != NULL;
@@ -259,7 +319,6 @@ fts_ast_free_list(
/********************************************************************//**
Free a fts_ast_node_t instance.
@return next node to free */
-UNIV_INTERN
fts_ast_node_t*
fts_ast_free_node(
/*==============*/
@@ -284,6 +343,7 @@ fts_ast_free_node(
case FTS_AST_LIST:
case FTS_AST_SUBEXP_LIST:
+ case FTS_AST_PARSER_PHRASE_LIST:
fts_ast_free_list(node);
node->list.head = node->list.tail = NULL;
break;
@@ -307,7 +367,6 @@ fts_ast_free_node(
This AST takes ownership of the expr and is responsible
for free'ing it.
@return in param "list" */
-UNIV_INTERN
fts_ast_node_t*
fts_ast_add_node(
/*=============*/
@@ -320,7 +379,8 @@ fts_ast_add_node(
ut_a(!elem->next);
ut_a(node->type == FTS_AST_LIST
- || node->type == FTS_AST_SUBEXP_LIST);
+ || node->type == FTS_AST_SUBEXP_LIST
+ || node->type == FTS_AST_PARSER_PHRASE_LIST);
if (!node->list.head) {
ut_a(!node->list.tail);
@@ -337,28 +397,7 @@ fts_ast_add_node(
}
/******************************************************************//**
-For tracking node allocations, in case there is an error during
-parsing. */
-UNIV_INTERN
-void
-fts_ast_state_add_node(
-/*===================*/
- fts_ast_state_t*state, /*!< in: ast instance */
- fts_ast_node_t* node) /*!< in: node to add to ast */
-{
- if (!state->list.head) {
- ut_a(!state->list.tail);
-
- state->list.head = state->list.tail = node;
- } else {
- state->list.tail->next_alloc = node;
- state->list.tail = node;
- }
-}
-
-/******************************************************************//**
Set the wildcard attribute of a term. */
-UNIV_INTERN
void
fts_ast_term_set_wildcard(
/*======================*/
@@ -383,9 +422,8 @@ fts_ast_term_set_wildcard(
/******************************************************************//**
Set the proximity attribute of a text node. */
-UNIV_INTERN
void
-fts_ast_term_set_distance(
+fts_ast_text_set_distance(
/*======================*/
fts_ast_node_t* node, /*!< in/out: text node */
ulint distance) /*!< in: the text proximity
@@ -403,7 +441,6 @@ fts_ast_term_set_distance(
/******************************************************************//**
Free node and expr allocations. */
-UNIV_INTERN
void
fts_ast_state_free(
/*===============*/
@@ -430,14 +467,34 @@ fts_ast_state_free(
state->root = state->list.head = state->list.tail = NULL;
}
+/** Print the ast string
+@param[in] str string to print */
+static
+void
+fts_ast_string_print(
+ const fts_ast_string_t* ast_str)
+{
+ for (ulint i = 0; i < ast_str->len; ++i) {
+ printf("%c", ast_str->str[i]);
+ }
+
+ printf("\n");
+}
+
/******************************************************************//**
-Print an ast node. */
-UNIV_INTERN
+Print an ast node recursively. */
+static
void
-fts_ast_node_print(
-/*===============*/
- fts_ast_node_t* node) /*!< in: ast node to print */
+fts_ast_node_print_recursive(
+/*=========================*/
+ fts_ast_node_t* node, /*!< in: ast node to print */
+ ulint level) /*!< in: recursive level */
{
+ /* Print alignment blank */
+ for (ulint i = 0; i < level; i++) {
+ printf(" ");
+ }
+
switch (node->type) {
case FTS_AST_TEXT:
printf("TEXT: ");
@@ -450,38 +507,83 @@ fts_ast_node_print(
break;
case FTS_AST_LIST:
- printf("LIST: ");
- node = node->list.head;
+ printf("LIST: \n");
- while (node) {
- fts_ast_node_print(node);
- node = node->next;
+ for (node = node->list.head; node; node = node->next) {
+ fts_ast_node_print_recursive(node, level + 1);
}
break;
case FTS_AST_SUBEXP_LIST:
- printf("SUBEXP_LIST: ");
- node = node->list.head;
+ printf("SUBEXP_LIST: \n");
- while (node) {
- fts_ast_node_print(node);
- node = node->next;
+ for (node = node->list.head; node; node = node->next) {
+ fts_ast_node_print_recursive(node, level + 1);
}
+ break;
+
case FTS_AST_OPER:
printf("OPER: %d\n", node->oper);
break;
+ case FTS_AST_PARSER_PHRASE_LIST:
+ printf("PARSER_PHRASE_LIST: \n");
+
+ for (node = node->list.head; node; node = node->next) {
+ fts_ast_node_print_recursive(node, level + 1);
+ }
+ break;
+
default:
ut_error;
}
}
/******************************************************************//**
+Print an ast node */
+void
+fts_ast_node_print(
+/*===============*/
+ fts_ast_node_t* node) /*!< in: ast node to print */
+{
+ fts_ast_node_print_recursive(node, 0);
+}
+
+/** Check only union operation involved in the node
+@param[in] node ast node to check
+@return true if the node contains only union else false. */
+bool
+fts_ast_node_check_union(
+ fts_ast_node_t* node)
+{
+ if (node->type == FTS_AST_LIST
+ || node->type == FTS_AST_SUBEXP_LIST
+ || node->type == FTS_AST_PARSER_PHRASE_LIST) {
+
+ for (node = node->list.head; node; node = node->next) {
+ if (!fts_ast_node_check_union(node)) {
+ return(false);
+ }
+ }
+
+ } else if (node->type == FTS_AST_OPER
+ && (node->oper == FTS_IGNORE
+ || node->oper == FTS_EXIST)) {
+
+ return(false);
+ } else if (node->type == FTS_AST_TEXT) {
+ /* Distance or phrase search query. */
+ return(false);
+ }
+
+ return(true);
+}
+
+/******************************************************************//**
Traverse the AST - in-order traversal, except for the FTX_EXIST and FTS_IGNORE
nodes, which will be ignored in the first pass of each level, and visited in a
second and third pass after all other nodes in the same level are visited.
@return DB_SUCCESS if all went well */
-UNIV_INTERN
dberr_t
fts_ast_visit(
/*==========*/
@@ -534,7 +636,7 @@ fts_ast_visit(
node && (error == DB_SUCCESS);
node = node->next) {
- switch(node->type) {
+ switch (node->type) {
case FTS_AST_LIST:
if (visit_pass != FTS_PASS_FIRST) {
break;
@@ -637,7 +739,6 @@ has one more byte than len
@param[in] str pointer to string
@param[in] len length of the string
@return ast string with NUL-terminator */
-UNIV_INTERN
fts_ast_string_t*
fts_ast_string_create(
const byte* str,
@@ -647,9 +748,10 @@ fts_ast_string_create(
ut_ad(len > 0);
- ast_str = static_cast<fts_ast_string_t*>
- (ut_malloc(sizeof(fts_ast_string_t)));
- ast_str->str = static_cast<byte*>(ut_malloc(len + 1));
+ ast_str = static_cast<fts_ast_string_t*>(
+ ut_malloc_nokey(sizeof(fts_ast_string_t)));
+
+ ast_str->str = static_cast<byte*>(ut_malloc_nokey(len + 1));
ast_str->len = len;
memcpy(ast_str->str, str, len);
@@ -661,7 +763,6 @@ fts_ast_string_create(
/**
Free an ast string instance
@param[in,out] ast_str string to free */
-UNIV_INTERN
void
fts_ast_string_free(
fts_ast_string_t* ast_str)
@@ -677,7 +778,6 @@ Translate ast string of type FTS_AST_NUMB to unsigned long by strtoul
@param[in] str string to translate
@param[in] base the base
@return translated number */
-UNIV_INTERN
ulint
fts_ast_string_to_ul(
const fts_ast_string_t* ast_str,
@@ -687,49 +787,8 @@ fts_ast_string_to_ul(
NULL, base));
}
-/**
-Print the ast string
-@param[in] str string to print */
-UNIV_INTERN
-void
-fts_ast_string_print(
- const fts_ast_string_t* ast_str)
-{
- for (ulint i = 0; i < ast_str->len; ++i) {
- printf("%c", ast_str->str[i]);
- }
-
- printf("\n");
-}
-
#ifdef UNIV_DEBUG
const char*
-fts_ast_oper_name_get(fts_ast_oper_t oper)
-{
- switch(oper) {
- case FTS_NONE:
- return("FTS_NONE");
- case FTS_IGNORE:
- return("FTS_IGNORE");
- case FTS_EXIST:
- return("FTS_EXIST");
- case FTS_NEGATE:
- return("FTS_NEGATE");
- case FTS_INCR_RATING:
- return("FTS_INCR_RATING");
- case FTS_DECR_RATING:
- return("FTS_DECR_RATING");
- case FTS_DISTANCE:
- return("FTS_DISTANCE");
- case FTS_IGNORE_SKIP:
- return("FTS_IGNORE_SKIP");
- case FTS_EXIST_SKIP:
- return("FTS_EXIST_SKIP");
- }
- ut_ad(0);
-}
-
-const char*
fts_ast_node_type_get(fts_ast_type_t type)
{
switch (type) {
@@ -745,7 +804,10 @@ fts_ast_node_type_get(fts_ast_type_t type)
return("FTS_AST_LIST");
case FTS_AST_SUBEXP_LIST:
return("FTS_AST_SUBEXP_LIST");
+ case FTS_AST_PARSER_PHRASE_LIST:
+ return("FTS_AST_PARSER_PHRASE_LIST");
}
ut_ad(0);
+ return("FTS_UNKNOWN");
}
#endif /* UNIV_DEBUG */
diff --git a/storage/innobase/fts/fts0blex.cc b/storage/innobase/fts/fts0blex.cc
index 508069a328f..2f66e9740aa 100644
--- a/storage/innobase/fts/fts0blex.cc
+++ b/storage/innobase/fts/fts0blex.cc
@@ -1,3 +1,4 @@
+#include "univ.i"
#line 2 "fts0blex.cc"
#line 4 "fts0blex.cc"
@@ -669,10 +670,11 @@ this program; if not, write to the Free Software Foundation, Inc.,
/* Required for reentrant parser */
#define YY_DECL int fts_blexer(YYSTYPE* val, yyscan_t yyscanner)
+#define exit(A) ut_error
-#line 674 "fts0blex.cc"
+#line 675 "fts0blex.cc"
#define YY_NO_INPUT 1
-#line 676 "fts0blex.cc"
+#line 677 "fts0blex.cc"
#define INITIAL 0
@@ -930,10 +932,10 @@ YY_DECL
}
{
-#line 43 "fts0blex.l"
+#line 44 "fts0blex.l"
-#line 937 "fts0blex.cc"
+#line 938 "fts0blex.cc"
while ( /*CONSTCOND*/1 ) /* loops until end-of-file is reached */
{
@@ -988,12 +990,12 @@ do_action: /* This label is used only to access EOF actions. */
case 1:
YY_RULE_SETUP
-#line 45 "fts0blex.l"
+#line 46 "fts0blex.l"
/* Ignore whitespace */ ;
YY_BREAK
case 2:
YY_RULE_SETUP
-#line 47 "fts0blex.l"
+#line 48 "fts0blex.l"
{
val->oper = fts0bget_text(yyscanner)[0];
@@ -1002,7 +1004,7 @@ YY_RULE_SETUP
YY_BREAK
case 3:
YY_RULE_SETUP
-#line 53 "fts0blex.l"
+#line 54 "fts0blex.l"
{
val->token = fts_ast_string_create(reinterpret_cast<const byte*>(fts0bget_text(yyscanner)), fts0bget_leng(yyscanner));
@@ -1011,7 +1013,7 @@ YY_RULE_SETUP
YY_BREAK
case 4:
YY_RULE_SETUP
-#line 59 "fts0blex.l"
+#line 60 "fts0blex.l"
{
val->token = fts_ast_string_create(reinterpret_cast<const byte*>(fts0bget_text(yyscanner)), fts0bget_leng(yyscanner));
@@ -1020,7 +1022,7 @@ YY_RULE_SETUP
YY_BREAK
case 5:
YY_RULE_SETUP
-#line 65 "fts0blex.l"
+#line 66 "fts0blex.l"
{
val->token = fts_ast_string_create(reinterpret_cast<const byte*>(fts0bget_text(yyscanner)), fts0bget_leng(yyscanner));
@@ -1030,15 +1032,15 @@ YY_RULE_SETUP
case 6:
/* rule 6 can match eol */
YY_RULE_SETUP
-#line 71 "fts0blex.l"
+#line 72 "fts0blex.l"
YY_BREAK
case 7:
YY_RULE_SETUP
-#line 73 "fts0blex.l"
+#line 74 "fts0blex.l"
ECHO;
YY_BREAK
-#line 1042 "fts0blex.cc"
+#line 1043 "fts0blex.cc"
case YY_STATE_EOF(INITIAL):
yyterminate();
@@ -2170,6 +2172,6 @@ void yyfree (void * ptr , yyscan_t yyscanner)
#define YYTABLES_NAME "yytables"
-#line 73 "fts0blex.l"
+#line 74 "fts0blex.l"
diff --git a/storage/innobase/fts/fts0blex.l b/storage/innobase/fts/fts0blex.l
index 0d1788f89f0..cf19cd0fee5 100644
--- a/storage/innobase/fts/fts0blex.l
+++ b/storage/innobase/fts/fts0blex.l
@@ -30,6 +30,7 @@ this program; if not, write to the Free Software Foundation, Inc.,
/* Required for reentrant parser */
#define YY_DECL int fts_blexer(YYSTYPE* val, yyscan_t yyscanner)
+#define exit(A) ut_error
%}
diff --git a/storage/innobase/fts/fts0config.cc b/storage/innobase/fts/fts0config.cc
index b39d568575c..6130546e963 100644
--- a/storage/innobase/fts/fts0config.cc
+++ b/storage/innobase/fts/fts0config.cc
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 2007, 2013, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2007, 2016, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2017, 2019, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
@@ -29,11 +29,6 @@ Created 2007/5/9 Sunny Bains
#include "fts0priv.h"
-#ifndef UNIV_NONINL
-#include "fts0types.ic"
-#include "fts0vlc.ic"
-#endif
-
/******************************************************************//**
Callback function for fetching the config value.
@return always returns TRUE */
@@ -70,7 +65,6 @@ fts_config_fetch_value(
Get value from the config table. The caller must ensure that enough
space is allocated for value to hold the column contents.
@return DB_SUCCESS or error code */
-UNIV_INTERN
dberr_t
fts_config_get_value(
/*=================*/
@@ -136,7 +130,6 @@ fts_config_get_value(
/*********************************************************************//**
Create the config table name for retrieving index specific value.
@return index config parameter name */
-UNIV_INTERN
char*
fts_config_create_index_param_name(
/*===============================*/
@@ -150,9 +143,9 @@ fts_config_create_index_param_name(
len = strlen(param);
/* Caller is responsible for deleting name. */
- name = static_cast<char*>(ut_malloc(
+ name = static_cast<char*>(ut_malloc_nokey(
len + FTS_AUX_MIN_TABLE_ID_LENGTH + 2));
- strcpy(name, param);
+ ::strcpy(name, param);
name[len] = '_';
fts_write_object_id(index->id, name + len + 1,
@@ -167,7 +160,6 @@ Get value specific to an FTS index from the config table. The caller
must ensure that enough space is allocated for value to hold the
column contents.
@return DB_SUCCESS or error code */
-UNIV_INTERN
dberr_t
fts_config_get_index_value(
/*=======================*/
@@ -198,7 +190,6 @@ fts_config_get_index_value(
/******************************************************************//**
Set the value in the config table for name.
@return DB_SUCCESS or error code */
-UNIV_INTERN
dberr_t
fts_config_set_value(
/*=================*/
@@ -232,8 +223,8 @@ fts_config_set_value(
graph = fts_parse_sql(
fts_table, info,
- "BEGIN UPDATE $table_name SET value = :value "
- "WHERE key = :name;");
+ "BEGIN UPDATE $table_name SET value = :value"
+ " WHERE key = :name;");
trx->op_info = "setting FTS config value";
@@ -276,7 +267,6 @@ fts_config_set_value(
/******************************************************************//**
Set the value specific to an FTS index in the config table.
@return DB_SUCCESS or error code */
-UNIV_INTERN
dberr_t
fts_config_set_index_value(
/*=======================*/
@@ -304,10 +294,10 @@ fts_config_set_index_value(
return(error);
}
+#ifdef FTS_OPTIMIZE_DEBUG
/******************************************************************//**
Get an ulint value from the config table.
@return DB_SUCCESS if all OK else error code */
-UNIV_INTERN
dberr_t
fts_config_get_index_ulint(
/*=======================*/
@@ -322,15 +312,14 @@ fts_config_get_index_ulint(
/* We set the length of value to the max bytes it can hold. This
information is used by the callback that reads the value.*/
value.f_len = FTS_MAX_CONFIG_VALUE_LEN;
- value.f_str = static_cast<byte*>(ut_malloc(value.f_len + 1));
+ value.f_str = static_cast<byte*>(ut_malloc_nokey(value.f_len + 1));
error = fts_config_get_index_value(trx, index, name, &value);
if (UNIV_UNLIKELY(error != DB_SUCCESS)) {
- ut_print_timestamp(stderr);
- fprintf(stderr, " InnoDB: Error: (%s) reading `%s'\n",
- ut_strerr(error), name);
+ ib::error() << "(" << ut_strerr(error) << ") reading `"
+ << name << "'";
} else {
*int_value = strtoul((char*) value.f_str, NULL, 10);
}
@@ -343,7 +332,6 @@ fts_config_get_index_ulint(
/******************************************************************//**
Set an ulint value in the config table.
@return DB_SUCCESS if all OK else error code */
-UNIV_INTERN
dberr_t
fts_config_set_index_ulint(
/*=======================*/
@@ -358,32 +346,31 @@ fts_config_set_index_ulint(
/* We set the length of value to the max bytes it can hold. This
information is used by the callback that reads the value.*/
value.f_len = FTS_MAX_CONFIG_VALUE_LEN;
- value.f_str = static_cast<byte*>(ut_malloc(value.f_len + 1));
+ value.f_str = static_cast<byte*>(ut_malloc_nokey(value.f_len + 1));
// FIXME: Get rid of snprintf
ut_a(FTS_MAX_INT_LEN < FTS_MAX_CONFIG_VALUE_LEN);
- value.f_len = ut_snprintf(
- (char*) value.f_str, FTS_MAX_INT_LEN, "%lu", int_value);
+ value.f_len = snprintf(
+ (char*) value.f_str, FTS_MAX_INT_LEN, ULINTPF, int_value);
error = fts_config_set_index_value(trx, index, name, &value);
if (UNIV_UNLIKELY(error != DB_SUCCESS)) {
- ut_print_timestamp(stderr);
- fprintf(stderr, " InnoDB: Error: (%s) writing `%s'\n",
- ut_strerr(error), name);
+ ib::error() << "(" << ut_strerr(error) << ") writing `"
+ << name << "'";
}
ut_free(value.f_str);
return(error);
}
+#endif /* FTS_OPTIMIZE_DEBUG */
/******************************************************************//**
Get an ulint value from the config table.
@return DB_SUCCESS if all OK else error code */
-UNIV_INTERN
dberr_t
fts_config_get_ulint(
/*=================*/
@@ -399,15 +386,13 @@ fts_config_get_ulint(
/* We set the length of value to the max bytes it can hold. This
information is used by the callback that reads the value.*/
value.f_len = FTS_MAX_CONFIG_VALUE_LEN;
- value.f_str = static_cast<byte*>(ut_malloc(value.f_len + 1));
+ value.f_str = static_cast<byte*>(ut_malloc_nokey(value.f_len + 1));
error = fts_config_get_value(trx, fts_table, name, &value);
if (UNIV_UNLIKELY(error != DB_SUCCESS)) {
- ut_print_timestamp(stderr);
-
- fprintf(stderr, " InnoDB: Error: (%s) reading `%s'\n",
- ut_strerr(error), name);
+ ib::error() << "(" << ut_strerr(error) << ") reading `"
+ << name << "'";
} else {
*int_value = strtoul((char*) value.f_str, NULL, 10);
}
@@ -420,7 +405,6 @@ fts_config_get_ulint(
/******************************************************************//**
Set an ulint value in the config table.
@return DB_SUCCESS if all OK else error code */
-UNIV_INTERN
dberr_t
fts_config_set_ulint(
/*=================*/
@@ -436,145 +420,21 @@ fts_config_set_ulint(
/* We set the length of value to the max bytes it can hold. This
information is used by the callback that reads the value.*/
value.f_len = FTS_MAX_CONFIG_VALUE_LEN;
- value.f_str = static_cast<byte*>(ut_malloc(value.f_len + 1));
+ value.f_str = static_cast<byte*>(ut_malloc_nokey(value.f_len + 1));
- // FIXME: Get rid of snprintf
ut_a(FTS_MAX_INT_LEN < FTS_MAX_CONFIG_VALUE_LEN);
value.f_len = snprintf(
- (char*) value.f_str, FTS_MAX_INT_LEN, "%lu", int_value);
+ (char*) value.f_str, FTS_MAX_INT_LEN, ULINTPF, int_value);
error = fts_config_set_value(trx, fts_table, name, &value);
if (UNIV_UNLIKELY(error != DB_SUCCESS)) {
- ut_print_timestamp(stderr);
-
- fprintf(stderr, " InnoDB: Error: (%s) writing `%s'\n",
- ut_strerr(error), name);
- }
-
- ut_free(value.f_str);
-
- return(error);
-}
-
-/******************************************************************//**
-Increment the value in the config table for column name.
-@return DB_SUCCESS or error code */
-UNIV_INTERN
-dberr_t
-fts_config_increment_value(
-/*=======================*/
- trx_t* trx, /*!< transaction */
- fts_table_t* fts_table, /*!< in: the indexed
- FTS table */
- const char* name, /*!< in: increment config value
- for this parameter name */
- ulint delta) /*!< in: increment by this
- much */
-{
- dberr_t error;
- fts_string_t value;
- que_t* graph = NULL;
- ulint name_len = strlen(name);
- pars_info_t* info = pars_info_create();
- char table_name[MAX_FULL_NAME_LEN];
-
- /* We set the length of value to the max bytes it can hold. This
- information is used by the callback that reads the value.*/
- value.f_len = FTS_MAX_CONFIG_VALUE_LEN;
- value.f_str = static_cast<byte*>(ut_malloc(value.f_len + 1));
-
- *value.f_str = '\0';
-
- pars_info_bind_varchar_literal(info, "name", (byte*) name, name_len);
-
- pars_info_bind_function(
- info, "my_func", fts_config_fetch_value, &value);
-
- fts_table->suffix = "CONFIG";
- fts_get_table_name(fts_table, table_name);
- pars_info_bind_id(info, true, "config_table", table_name);
-
- graph = fts_parse_sql(
- fts_table, info,
- "DECLARE FUNCTION my_func;\n"
- "DECLARE CURSOR c IS SELECT value FROM $config_table"
- " WHERE key = :name FOR UPDATE;\n"
- "BEGIN\n"
- ""
- "OPEN c;\n"
- "WHILE 1 = 1 LOOP\n"
- " FETCH c INTO my_func();\n"
- " IF c % NOTFOUND THEN\n"
- " EXIT;\n"
- " END IF;\n"
- "END LOOP;\n"
- "CLOSE c;");
-
- trx->op_info = "read FTS config value";
-
- error = fts_eval_sql(trx, graph);
-
- fts_que_graph_free_check_lock(fts_table, NULL, graph);
-
- if (UNIV_UNLIKELY(error == DB_SUCCESS)) {
- ulint int_value;
-
- int_value = strtoul((char*) value.f_str, NULL, 10);
-
- int_value += delta;
-
- ut_a(FTS_MAX_CONFIG_VALUE_LEN > FTS_MAX_INT_LEN);
-
- // FIXME: Get rid of snprintf
- value.f_len = snprintf(
- (char*) value.f_str, FTS_MAX_INT_LEN, "%lu", int_value);
-
- fts_config_set_value(trx, fts_table, name, &value);
- }
-
- if (UNIV_UNLIKELY(error != DB_SUCCESS)) {
-
- ut_print_timestamp(stderr);
-
- fprintf(stderr, " InnoDB: Error: (%s) "
- "while incrementing %s.\n", ut_strerr(error), name);
+ ib::error() << "(" << ut_strerr(error) << ") writing `"
+ << name << "'";
}
ut_free(value.f_str);
return(error);
}
-
-/******************************************************************//**
-Increment the per index value in the config table for column name.
-@return DB_SUCCESS or error code */
-UNIV_INTERN
-dberr_t
-fts_config_increment_index_value(
-/*=============================*/
- trx_t* trx, /*!< transaction */
- dict_index_t* index, /*!< in: FTS index */
- const char* param, /*!< in: increment config value
- for this parameter name */
- ulint delta) /*!< in: increment by this
- much */
-{
- char* name;
- dberr_t error;
- fts_table_t fts_table;
-
- FTS_INIT_FTS_TABLE(&fts_table, "CONFIG", FTS_COMMON_TABLE,
- index->table);
-
- /* We are responsible for free'ing name. */
- name = fts_config_create_index_param_name(param, index);
-
- error = fts_config_increment_value(trx, &fts_table, name, delta);
-
- ut_free(name);
-
- return(error);
-}
-
diff --git a/storage/innobase/fts/fts0fts.cc b/storage/innobase/fts/fts0fts.cc
index 6dbe5e0e2a0..330424e4ab7 100644
--- a/storage/innobase/fts/fts0fts.cc
+++ b/storage/innobase/fts/fts0fts.cc
@@ -28,21 +28,18 @@ Full Text Search interface
#include "dict0types.h"
#include "dict0stats_bg.h"
#include "row0sel.h"
-
#include "fts0fts.h"
#include "fts0priv.h"
#include "fts0types.h"
-
#include "fts0types.ic"
#include "fts0vlc.ic"
+#include "fts0plugin.h"
#include "dict0priv.h"
#include "dict0stats.h"
#include "btr0pcur.h"
-#include <vector>
+#include "sync0sync.h"
-#include "ha_prototypes.h"
-
-#define FTS_MAX_ID_LEN 32
+static const ulint FTS_MAX_ID_LEN = 32;
/** Column name from the FTS config table */
#define FTS_MAX_CACHE_SIZE_IN_MB "cache_size_in_mb"
@@ -56,57 +53,41 @@ by looking up the key word in the obsolete table names */
/** This is maximum FTS cache for each table and would be
a configurable variable */
-UNIV_INTERN ulong fts_max_cache_size;
+ulong fts_max_cache_size;
/** Whether the total memory used for FTS cache is exhausted, and we will
need a sync to free some memory */
-UNIV_INTERN bool fts_need_sync = false;
+bool fts_need_sync = false;
/** Variable specifying the total memory allocated for FTS cache */
-UNIV_INTERN ulong fts_max_total_cache_size;
+ulong fts_max_total_cache_size;
/** This is FTS result cache limit for each query and would be
a configurable variable */
-UNIV_INTERN size_t fts_result_cache_limit;
+size_t fts_result_cache_limit;
/** Variable specifying the maximum FTS max token size */
-UNIV_INTERN ulong fts_max_token_size;
+ulong fts_max_token_size;
/** Variable specifying the minimum FTS max token size */
-UNIV_INTERN ulong fts_min_token_size;
+ulong fts_min_token_size;
// FIXME: testing
static time_t elapsed_time;
static ulint n_nodes;
-/** Error condition reported by fts_utf8_decode() */
-const ulint UTF8_ERROR = 0xFFFFFFFF;
-
#ifdef FTS_CACHE_SIZE_DEBUG
/** The cache size permissible lower limit (1K) */
static const ulint FTS_CACHE_SIZE_LOWER_LIMIT_IN_MB = 1;
/** The cache size permissible upper limit (1G) */
static const ulint FTS_CACHE_SIZE_UPPER_LIMIT_IN_MB = 1024;
-#endif /* FTS_CACHE_SIZE_DEBUG */
+#endif
/** Time to sleep after DEADLOCK error before retrying operation. */
static const ulint FTS_DEADLOCK_RETRY_WAIT = 100000;
-#ifdef UNIV_PFS_RWLOCK
-UNIV_INTERN mysql_pfs_key_t fts_cache_rw_lock_key;
-UNIV_INTERN mysql_pfs_key_t fts_cache_init_rw_lock_key;
-#endif /* UNIV_PFS_RWLOCK */
-
-#ifdef UNIV_PFS_MUTEX
-UNIV_INTERN mysql_pfs_key_t fts_delete_mutex_key;
-UNIV_INTERN mysql_pfs_key_t fts_optimize_mutex_key;
-UNIV_INTERN mysql_pfs_key_t fts_bg_threads_mutex_key;
-UNIV_INTERN mysql_pfs_key_t fts_doc_id_mutex_key;
-UNIV_INTERN mysql_pfs_key_t fts_pll_tokenize_mutex_key;
-#endif /* UNIV_PFS_MUTEX */
-
/** InnoDB default stopword list:
There are different versions of stopwords, the stop words listed
below comes from "Google Stopword" list. Reference:
@@ -162,63 +143,6 @@ struct fts_aux_table_t {
char* name; /*!< Name of the table */
};
-/** SQL statements for creating the ancillary common FTS tables.
-The table name here shall be consistent with fts_common_tables. */
-static const char* fts_create_common_tables_sql = {
- "BEGIN\n"
- ""
- "CREATE TABLE $DELETED (\n"
- " doc_id BIGINT UNSIGNED\n"
- ") COMPACT;\n"
- "CREATE UNIQUE CLUSTERED INDEX IND ON $DELETED (doc_id);\n"
- ""
- "CREATE TABLE $DELETED_CACHE (\n"
- " doc_id BIGINT UNSIGNED\n"
- ") COMPACT;\n"
- "CREATE UNIQUE CLUSTERED INDEX IND "
- "ON $DELETED_CACHE(doc_id);\n"
- ""
- "CREATE TABLE $BEING_DELETED (\n"
- " doc_id BIGINT UNSIGNED\n"
- ") COMPACT;\n"
- "CREATE UNIQUE CLUSTERED INDEX IND "
- "ON $BEING_DELETED(doc_id);\n"
- ""
- "CREATE TABLE $BEING_DELETED_CACHE (\n"
- " doc_id BIGINT UNSIGNED\n"
- ") COMPACT;\n"
- "CREATE UNIQUE CLUSTERED INDEX IND "
- "ON $BEING_DELETED_CACHE(doc_id);\n"
- ""
- "CREATE TABLE $CONFIG (\n"
- " key CHAR(50),\n"
- " value CHAR(200) NOT NULL\n"
- ") COMPACT;\n"
- "CREATE UNIQUE CLUSTERED INDEX IND ON $CONFIG(key);\n"
-};
-
-#ifdef FTS_DOC_STATS_DEBUG
-/** Template for creating the FTS auxiliary index specific tables. This is
-mainly designed for the statistics work in the future */
-static const char* fts_create_index_tables_sql = {
- "BEGIN\n"
- ""
- "CREATE TABLE $doc_id_table (\n"
- " doc_id BIGINT UNSIGNED,\n"
- " word_count INTEGER UNSIGNED NOT NULL\n"
- ") COMPACT;\n"
- "CREATE UNIQUE CLUSTERED INDEX IND ON $doc_id_table(doc_id);\n"
-};
-#endif
-
-/** Template for creating the ancillary FTS tables word index tables. */
-static const char* fts_create_index_sql = {
- "BEGIN\n"
- ""
- "CREATE UNIQUE CLUSTERED INDEX FTS_INDEX_TABLE_IND "
- "ON $table (word, first_doc_id);\n"
-};
-
/** FTS auxiliary table suffixes that are common to all FT indexes. */
const char* fts_common_tables[] = {
"BEING_DELETED",
@@ -259,20 +183,24 @@ static const char* fts_config_table_insert_values_sql =
"INSERT INTO $config_table VALUES ('"
FTS_TABLE_STATE "', '0');\n";
+/** FTS tokenize parmameter for plugin parser */
+struct fts_tokenize_param_t {
+ fts_doc_t* result_doc; /*!< Result doc for tokens */
+ ulint add_pos; /*!< Added position for tokens */
+};
+
/** Run SYNC on the table, i.e., write out data from the cache to the
FTS auxiliary INDEX table and clear the cache at the end.
@param[in,out] sync sync state
@param[in] unlock_cache whether unlock cache lock when write node
@param[in] wait whether wait when a sync is in progress
-@param[in] has_dict whether has dict operation lock
@return DB_SUCCESS if all OK */
static
dberr_t
fts_sync(
fts_sync_t* sync,
bool unlock_cache,
- bool wait,
- bool has_dict);
+ bool wait);
/****************************************************************//**
Release all resources help by the words rb tree e.g., the node ilist. */
@@ -305,22 +233,6 @@ fts_add_doc_by_id(
doc_id_t doc_id, /*!< in: doc id */
ib_vector_t* fts_indexes MY_ATTRIBUTE((unused)));
/*!< in: affected fts indexes */
-#ifdef FTS_DOC_STATS_DEBUG
-/****************************************************************//**
-Check whether a particular word (term) exists in the FTS index.
-@return DB_SUCCESS if all went fine */
-static
-dberr_t
-fts_is_word_in_index(
-/*=================*/
- trx_t* trx, /*!< in: FTS query state */
- que_t** graph, /*!< out: Query graph */
- fts_table_t* fts_table, /*!< in: table instance */
- const fts_string_t* word, /*!< in: the word to check */
- ibool* found) /*!< out: TRUE if exists */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-#endif /* FTS_DOC_STATS_DEBUG */
-
/******************************************************************//**
Update the last document id. This function could create a new
transaction to update the last document id.
@@ -334,6 +246,95 @@ fts_update_sync_doc_id(
trx_t* trx) /*!< in: update trx, or NULL */
MY_ATTRIBUTE((nonnull(1)));
+/** Tokenize a document.
+@param[in,out] doc document to tokenize
+@param[out] result tokenization result
+@param[in] parser pluggable parser */
+static
+void
+fts_tokenize_document(
+ fts_doc_t* doc,
+ fts_doc_t* result,
+ st_mysql_ftparser* parser);
+
+/** Continue to tokenize a document.
+@param[in,out] doc document to tokenize
+@param[in] add_pos add this position to all tokens from this tokenization
+@param[out] result tokenization result
+@param[in] parser pluggable parser */
+static
+void
+fts_tokenize_document_next(
+ fts_doc_t* doc,
+ ulint add_pos,
+ fts_doc_t* result,
+ st_mysql_ftparser* parser);
+
+/** Create the vector of fts_get_doc_t instances.
+@param[in,out] cache fts cache
+@return vector of fts_get_doc_t instances */
+static
+ib_vector_t*
+fts_get_docs_create(
+ fts_cache_t* cache);
+
+/** Free the FTS cache.
+@param[in,out] cache to be freed */
+static
+void
+fts_cache_destroy(fts_cache_t* cache)
+{
+ rw_lock_free(&cache->lock);
+ rw_lock_free(&cache->init_lock);
+ mutex_free(&cache->optimize_lock);
+ mutex_free(&cache->deleted_lock);
+ mutex_free(&cache->doc_id_lock);
+ os_event_destroy(cache->sync->event);
+
+ if (cache->stopword_info.cached_stopword) {
+ rbt_free(cache->stopword_info.cached_stopword);
+ }
+
+ if (cache->sync_heap->arg) {
+ mem_heap_free(static_cast<mem_heap_t*>(cache->sync_heap->arg));
+ }
+
+ mem_heap_free(cache->cache_heap);
+}
+
+/** Get a character set based on precise type.
+@param prtype precise type
+@return the corresponding character set */
+UNIV_INLINE
+CHARSET_INFO*
+fts_get_charset(ulint prtype)
+{
+#ifdef UNIV_DEBUG
+ switch (prtype & DATA_MYSQL_TYPE_MASK) {
+ case MYSQL_TYPE_BIT:
+ case MYSQL_TYPE_STRING:
+ case MYSQL_TYPE_VAR_STRING:
+ case MYSQL_TYPE_TINY_BLOB:
+ case MYSQL_TYPE_MEDIUM_BLOB:
+ case MYSQL_TYPE_BLOB:
+ case MYSQL_TYPE_LONG_BLOB:
+ case MYSQL_TYPE_VARCHAR:
+ break;
+ default:
+ ut_error;
+ }
+#endif /* UNIV_DEBUG */
+
+ uint cs_num = (uint) dtype_get_charset_coll(prtype);
+
+ if (CHARSET_INFO* cs = get_charset(cs_num, MYF(MY_WME))) {
+ return(cs);
+ }
+
+ ib::fatal() << "Unable to find charset-collation " << cs_num;
+ return(NULL);
+}
+
/****************************************************************//**
This function loads the default InnoDB stopword list */
static
@@ -351,9 +352,9 @@ fts_load_default_stopword(
heap = static_cast<mem_heap_t*>(allocator->arg);
if (!stopword_info->cached_stopword) {
- /* For default stopword, we always use fts_utf8_string_cmp() */
- stopword_info->cached_stopword = rbt_create(
- sizeof(fts_tokenizer_word_t), fts_utf8_string_cmp);
+ stopword_info->cached_stopword = rbt_create_arg_cmp(
+ sizeof(fts_tokenizer_word_t), innobase_fts_text_cmp,
+ &my_charset_latin1);
}
stop_words = stopword_info->cached_stopword;
@@ -373,7 +374,7 @@ fts_load_default_stopword(
str.f_len = ut_strlen(word);
str.f_str = reinterpret_cast<byte*>(word);
- fts_utf8_string_dup(&new_word.text, &str, heap);
+ fts_string_dup(&new_word.text, &str, heap);
rbt_insert(stop_words, &new_word, &new_word);
}
@@ -494,7 +495,7 @@ fts_load_user_stopword(
info,
"DECLARE FUNCTION my_func;\n"
"DECLARE CURSOR c IS"
- " SELECT value "
+ " SELECT value"
" FROM $table_stopword;\n"
"BEGIN\n"
"\n"
@@ -518,18 +519,15 @@ fts_load_user_stopword(
fts_sql_rollback(trx);
- ut_print_timestamp(stderr);
-
if (error == DB_LOCK_WAIT_TIMEOUT) {
- fprintf(stderr, " InnoDB: Warning: lock wait "
- "timeout reading user stopword table. "
- "Retrying!\n");
+ ib::warn() << "Lock wait timeout reading user"
+ " stopword table. Retrying!";
trx->error_state = DB_SUCCESS;
} else {
- fprintf(stderr, " InnoDB: Error '%s' "
- "while reading user stopword table.\n",
- ut_strerr(error));
+ ib::error() << "Error '" << ut_strerr(error)
+ << "' while reading user stopword"
+ " table.";
ret = FALSE;
break;
}
@@ -569,7 +567,7 @@ fts_index_cache_init(
index_cache->doc_stats = ib_vector_create(
allocator, sizeof(fts_doc_stats_t), 4);
- for (i = 0; fts_index_selector[i].value; ++i) {
+ for (i = 0; i < FTS_NUM_AUX_INDEX; ++i) {
ut_a(index_cache->ins_graph[i] == NULL);
ut_a(index_cache->sel_graph[i] == NULL);
}
@@ -577,7 +575,6 @@ fts_index_cache_init(
/*********************************************************************//**
Initialize FTS cache. */
-UNIV_INTERN
void
fts_cache_init(
/*===========*/
@@ -610,7 +607,6 @@ fts_cache_init(
/****************************************************************//**
Create a FTS cache. */
-UNIV_INTERN
fts_cache_t*
fts_cache_create(
/*=============*/
@@ -632,15 +628,11 @@ fts_cache_create(
fts_cache_init_rw_lock_key, &cache->init_lock,
SYNC_FTS_CACHE_INIT);
- mutex_create(
- fts_delete_mutex_key, &cache->deleted_lock, SYNC_FTS_OPTIMIZE);
+ mutex_create(LATCH_ID_FTS_DELETE, &cache->deleted_lock);
- mutex_create(
- fts_optimize_mutex_key, &cache->optimize_lock,
- SYNC_FTS_OPTIMIZE);
+ mutex_create(LATCH_ID_FTS_OPTIMIZE, &cache->optimize_lock);
- mutex_create(
- fts_doc_id_mutex_key, &cache->doc_id_lock, SYNC_FTS_OPTIMIZE);
+ mutex_create(LATCH_ID_FTS_DOC_ID, &cache->doc_id_lock);
/* This is the heap used to create the cache itself. */
cache->self_heap = ib_heap_allocator_create(heap);
@@ -649,13 +641,11 @@ fts_cache_create(
cache->sync_heap = ib_heap_allocator_create(heap);
cache->sync_heap->arg = NULL;
- fts_need_sync = false;
-
cache->sync = static_cast<fts_sync_t*>(
mem_heap_zalloc(heap, sizeof(fts_sync_t)));
cache->sync->table = table;
- cache->sync->event = os_event_create();
+ cache->sync->event = os_event_create(0);
/* Create the index cache vector that will hold the inverted indexes. */
cache->indexes = ib_vector_create(
@@ -675,7 +665,6 @@ fts_cache_create(
/*******************************************************************//**
Add a newly create index into FTS cache */
-UNIV_INTERN
void
fts_add_index(
/*==========*/
@@ -714,9 +703,8 @@ fts_reset_get_doc(
fts_get_doc_t* get_doc;
ulint i;
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(rw_lock_own(&cache->init_lock, RW_LOCK_EX));
-#endif
+ ut_ad(rw_lock_own(&cache->init_lock, RW_LOCK_X));
+
ib_vector_reset(cache->get_docs);
for (i = 0; i < ib_vector_size(cache->indexes); i++) {
@@ -792,7 +780,6 @@ fts_in_index_cache(
Check indexes in the fts->indexes is also present in index cache and
table->indexes list
@return TRUE if all indexes match */
-UNIV_INTERN
ibool
fts_check_cached_index(
/*===================*/
@@ -828,7 +815,6 @@ fts_check_cached_index(
/*******************************************************************//**
Drop auxiliary tables related to an FTS index
@return DB_SUCCESS or error number */
-UNIV_INTERN
dberr_t
fts_drop_index(
/*===========*/
@@ -925,7 +911,6 @@ fts_drop_index(
/****************************************************************//**
Free the query graph but check whether dict_sys->mutex is already
held */
-UNIV_INTERN
void
fts_que_graph_free_check_lock(
/*==========================*/
@@ -960,7 +945,6 @@ fts_que_graph_free_check_lock(
/****************************************************************//**
Create an FTS index cache. */
-UNIV_INTERN
CHARSET_INFO*
fts_index_get_charset(
/*==================*/
@@ -973,9 +957,7 @@ fts_index_get_charset(
field = dict_index_get_nth_field(index, 0);
prtype = field->col->prtype;
- charset = innobase_get_fts_charset(
- (int) (prtype & DATA_MYSQL_TYPE_MASK),
- (uint) dtype_get_charset_coll(prtype));
+ charset = fts_get_charset(prtype);
#ifdef FTS_DEBUG
/* Set up charset info for this index. Please note all
@@ -986,9 +968,7 @@ fts_index_get_charset(
field = dict_index_get_nth_field(index, i);
prtype = field->col->prtype;
- fld_charset = innobase_get_fts_charset(
- (int)(prtype & DATA_MYSQL_TYPE_MASK),
- (uint) dtype_get_charset_coll(prtype));
+ fld_charset = fts_get_charset(prtype);
/* All FTS columns should have the same charset */
if (charset) {
@@ -1005,7 +985,6 @@ fts_index_get_charset(
/****************************************************************//**
Create an FTS index cache.
@return Index Cache */
-UNIV_INTERN
fts_index_cache_t*
fts_cache_index_cache_create(
/*=========================*/
@@ -1018,9 +997,7 @@ fts_cache_index_cache_create(
ut_a(cache != NULL);
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(rw_lock_own(&cache->init_lock, RW_LOCK_EX));
-#endif
+ ut_ad(rw_lock_own(&cache->init_lock, RW_LOCK_X));
/* Must not already exist in the cache vector. */
ut_a(fts_find_index_cache(cache, index) == NULL);
@@ -1034,7 +1011,7 @@ fts_cache_index_cache_create(
index_cache->charset = fts_index_get_charset(index);
- n_bytes = sizeof(que_t*) * sizeof(fts_index_selector);
+ n_bytes = sizeof(que_t*) * FTS_NUM_AUX_INDEX;
index_cache->ins_graph = static_cast<que_t**>(
mem_heap_zalloc(static_cast<mem_heap_t*>(
@@ -1090,7 +1067,6 @@ fts_words_free(
/** Clear cache.
@param[in,out] cache fts cache */
-UNIV_INTERN
void
fts_cache_clear(
fts_cache_t* cache)
@@ -1110,7 +1086,7 @@ fts_cache_clear(
index_cache->words = NULL;
- for (j = 0; fts_index_selector[j].value; ++j) {
+ for (j = 0; j < FTS_NUM_AUX_INDEX; ++j) {
if (index_cache->ins_graph[j] != NULL) {
@@ -1137,6 +1113,8 @@ fts_cache_clear(
mem_heap_free(static_cast<mem_heap_t*>(cache->sync_heap->arg));
cache->sync_heap->arg = NULL;
+ fts_need_sync = false;
+
cache->total_size = 0;
mutex_enter((ib_mutex_t*) &cache->deleted_lock);
@@ -1156,10 +1134,8 @@ fts_get_index_cache(
{
ulint i;
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(rw_lock_own((rw_lock_t*) &cache->lock, RW_LOCK_EX)
- || rw_lock_own((rw_lock_t*) &cache->init_lock, RW_LOCK_EX));
-#endif
+ ut_ad(rw_lock_own((rw_lock_t*) &cache->lock, RW_LOCK_X)
+ || rw_lock_own((rw_lock_t*) &cache->init_lock, RW_LOCK_X));
for (i = 0; i < ib_vector_size(cache->indexes); ++i) {
fts_index_cache_t* index_cache;
@@ -1189,9 +1165,7 @@ fts_get_index_get_doc(
{
ulint i;
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(rw_lock_own((rw_lock_t*) &cache->init_lock, RW_LOCK_EX));
-#endif
+ ut_ad(rw_lock_own((rw_lock_t*) &cache->init_lock, RW_LOCK_X));
for (i = 0; i < ib_vector_size(cache->get_docs); ++i) {
fts_get_doc_t* get_doc;
@@ -1210,32 +1184,6 @@ fts_get_index_get_doc(
#endif
/**********************************************************************//**
-Free the FTS cache. */
-UNIV_INTERN
-void
-fts_cache_destroy(
-/*==============*/
- fts_cache_t* cache) /*!< in: cache*/
-{
- rw_lock_free(&cache->lock);
- rw_lock_free(&cache->init_lock);
- mutex_free(&cache->optimize_lock);
- mutex_free(&cache->deleted_lock);
- mutex_free(&cache->doc_id_lock);
- os_event_free(cache->sync->event);
-
- if (cache->stopword_info.cached_stopword) {
- rbt_free(cache->stopword_info.cached_stopword);
- }
-
- if (cache->sync_heap->arg) {
- mem_heap_free(static_cast<mem_heap_t*>(cache->sync_heap->arg));
- }
-
- mem_heap_free(cache->cache_heap);
-}
-
-/**********************************************************************//**
Find an existing word, or if not found, create one and return it.
@return specified word token */
static
@@ -1250,14 +1198,12 @@ fts_tokenizer_word_get(
fts_tokenizer_word_t* word;
ib_rbt_bound_t parent;
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(rw_lock_own(&cache->lock, RW_LOCK_EX));
-#endif
+ ut_ad(rw_lock_own(&cache->lock, RW_LOCK_X));
/* If it is a stopword, do not index it */
- if (cache->stopword_info.cached_stopword != NULL
- && rbt_search(cache->stopword_info.cached_stopword,
- &parent, text) == 0) {
+ if (!fts_check_token(text,
+ cache->stopword_info.cached_stopword,
+ index_cache->charset)) {
return(NULL);
}
@@ -1272,7 +1218,7 @@ fts_tokenizer_word_get(
new_word.nodes = ib_vector_create(
cache->sync_heap, sizeof(fts_node_t), 4);
- fts_utf8_string_dup(&new_word.text, text, heap);
+ fts_string_dup(&new_word.text, text, heap);
parent.last = rbt_add_node(
index_cache->words, &parent, &new_word);
@@ -1294,7 +1240,6 @@ fts_tokenizer_word_get(
/**********************************************************************//**
Add the given doc_id/word positions to the given node's ilist. */
-UNIV_INTERN
void
fts_cache_node_add_positions(
/*=========================*/
@@ -1311,11 +1256,12 @@ fts_cache_node_add_positions(
byte* ptr_start;
ulint doc_id_delta;
-#ifdef UNIV_SYNC_DEBUG
+#ifdef UNIV_DEBUG
if (cache) {
- ut_ad(rw_lock_own(&cache->lock, RW_LOCK_EX));
+ ut_ad(rw_lock_own(&cache->lock, RW_LOCK_X));
}
-#endif
+#endif /* UNIV_DEBUG */
+
ut_ad(doc_id >= node->last_doc_id);
/* Calculate the space required to store the ilist. */
@@ -1356,7 +1302,7 @@ fts_cache_node_add_positions(
new_size = (ulint)(1.2 * new_size);
}
- ilist = static_cast<byte*>(ut_malloc(new_size));
+ ilist = static_cast<byte*>(ut_malloc_nokey(new_size));
ptr = ilist + node->ilist_size;
node->ilist_size_alloc = new_size;
@@ -1425,9 +1371,7 @@ fts_cache_add_doc(
return;
}
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(rw_lock_own(&cache->lock, RW_LOCK_EX));
-#endif
+ ut_ad(rw_lock_own(&cache->lock, RW_LOCK_X));
n_words = rbt_size(tokens);
@@ -1514,12 +1458,12 @@ fts_drop_table(
/* Pass nonatomic=false (dont allow data dict unlock),
because the transaction may hold locks on SYS_* tables from
previous calls to fts_drop_table(). */
- error = row_drop_table_for_mysql(table_name, trx, true, false);
+ error = row_drop_table_for_mysql(table_name, trx,
+ SQLCOM_DROP_DB, false, false);
if (error != DB_SUCCESS) {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Unable to drop FTS index aux table %s: %s",
- table_name, ut_strerr(error));
+ ib::error() << "Unable to drop FTS index aux table "
+ << table_name << ": " << ut_strerr(error);
}
} else {
error = DB_FAIL;
@@ -1558,15 +1502,14 @@ fts_rename_one_aux_table(
table_new_name_len - new_db_name_len);
fts_table_new_name[table_new_name_len] = 0;
- return(row_rename_table_for_mysql(
- fts_table_old_name, fts_table_new_name, trx, false));
+ return row_rename_table_for_mysql(
+ fts_table_old_name, fts_table_new_name, trx, false, false);
}
/****************************************************************//**
Rename auxiliary tables for all fts index for a table. This(rename)
is due to database name change
@return DB_SUCCESS or error code */
-
dberr_t
fts_rename_aux_tables(
/*==================*/
@@ -1585,7 +1528,6 @@ fts_rename_aux_tables(
/* Rename common auxiliary tables */
for (i = 0; fts_common_tables[i] != NULL; ++i) {
fts_table.suffix = fts_common_tables[i];
-
fts_get_table_name(&fts_table, old_table_name, true);
err = fts_rename_one_aux_table(new_name, old_table_name, trx);
@@ -1607,7 +1549,7 @@ fts_rename_aux_tables(
FTS_INIT_INDEX_TABLE(&fts_table, NULL, FTS_INDEX_TABLE, index);
- for (ulint j = 0; fts_index_selector[j].value; ++j) {
+ for (ulint j = 0; j < FTS_NUM_AUX_INDEX; ++j) {
fts_table.suffix = fts_get_suffix(j);
fts_get_table_name(&fts_table, old_table_name, true);
@@ -1648,7 +1590,6 @@ fts_drop_common_tables(
char table_name[MAX_FULL_NAME_LEN];
fts_table->suffix = fts_common_tables[i];
-
fts_get_table_name(fts_table, table_name, true);
err = fts_drop_table(trx, table_name);
@@ -1666,7 +1607,7 @@ fts_drop_common_tables(
Since we do a horizontal split on the index table, we need to drop
all the split tables.
@return DB_SUCCESS or error code */
-UNIV_INTERN
+static
dberr_t
fts_drop_index_split_tables(
/*========================*/
@@ -1680,12 +1621,11 @@ fts_drop_index_split_tables(
FTS_INIT_INDEX_TABLE(&fts_table, NULL, FTS_INDEX_TABLE, index);
- for (i = 0; fts_index_selector[i].value; ++i) {
+ for (i = 0; i < FTS_NUM_AUX_INDEX; ++i) {
dberr_t err;
char table_name[MAX_FULL_NAME_LEN];
fts_table.suffix = fts_get_suffix(i);
-
fts_get_table_name(&fts_table, table_name, true);
err = fts_drop_table(trx, table_name);
@@ -1702,50 +1642,13 @@ fts_drop_index_split_tables(
/****************************************************************//**
Drops FTS auxiliary tables for an FTS index
@return DB_SUCCESS or error code */
-UNIV_INTERN
dberr_t
fts_drop_index_tables(
/*==================*/
trx_t* trx, /*!< in: transaction */
dict_index_t* index) /*!< in: Index to drop */
{
- dberr_t error = DB_SUCCESS;
-
-#ifdef FTS_DOC_STATS_DEBUG
- fts_table_t fts_table;
- static const char* index_tables[] = {
- "DOC_ID",
- NULL
- };
-#endif /* FTS_DOC_STATS_DEBUG */
-
- dberr_t err = fts_drop_index_split_tables(trx, index);
-
- /* We only return the status of the last error. */
- if (err != DB_SUCCESS) {
- error = err;
- }
-
-#ifdef FTS_DOC_STATS_DEBUG
- FTS_INIT_INDEX_TABLE(&fts_table, NULL, FTS_INDEX_TABLE, index);
-
- for (ulint i = 0; index_tables[i] != NULL; ++i) {
- char table_name[MAX_FULL_NAME_LEN];
-
- fts_table.suffix = index_tables[i];
-
- fts_get_table_name(&fts_table, table_name, true);
-
- err = fts_drop_table(trx, table_name);
-
- /* We only return the status of the last error. */
- if (err != DB_SUCCESS && err != DB_FAIL) {
- error = err;
- }
- }
-#endif /* FTS_DOC_STATS_DEBUG */
-
- return(error);
+ return(fts_drop_index_split_tables(trx, index));
}
/****************************************************************//**
@@ -1787,7 +1690,6 @@ Drops the ancillary tables needed for supporting an FTS index on a
given table. row_mysql_lock_data_dictionary must have been called before
this.
@return DB_SUCCESS or error code */
-UNIV_INTERN
dberr_t
fts_drop_tables(
/*============*/
@@ -1810,19 +1712,137 @@ fts_drop_tables(
return(error);
}
-/*********************************************************************//**
-Creates the common ancillary tables needed for supporting an FTS index
+/** Create dict_table_t object for FTS Aux tables.
+@param[in] aux_table_name FTS Aux table name
+@param[in] table table object of FTS Index
+@param[in] n_cols number of columns for FTS Aux table
+@return table object for FTS Aux table */
+static
+dict_table_t*
+fts_create_in_mem_aux_table(
+ const char* aux_table_name,
+ const dict_table_t* table,
+ ulint n_cols)
+{
+ dict_table_t* new_table = dict_mem_table_create(
+ aux_table_name, table->space, n_cols, 0, table->flags,
+ table->space == TRX_SYS_SPACE
+ ? 0 : table->space == SRV_TMP_SPACE_ID
+ ? DICT_TF2_TEMPORARY : DICT_TF2_USE_FILE_PER_TABLE);
+
+ if (DICT_TF_HAS_DATA_DIR(table->flags)) {
+ ut_ad(table->data_dir_path != NULL);
+ new_table->data_dir_path = mem_heap_strdup(
+ new_table->heap, table->data_dir_path);
+ }
+
+ return(new_table);
+}
+
+/** Function to create on FTS common table.
+@param[in,out] trx InnoDB transaction
+@param[in] table Table that has FTS Index
+@param[in] fts_table_name FTS AUX table name
+@param[in] fts_suffix FTS AUX table suffix
+@param[in] heap heap
+@return table object if created, else NULL */
+static
+dict_table_t*
+fts_create_one_common_table(
+ trx_t* trx,
+ const dict_table_t* table,
+ const char* fts_table_name,
+ const char* fts_suffix,
+ mem_heap_t* heap)
+{
+ dict_table_t* new_table;
+ dberr_t error;
+ bool is_config = strcmp(fts_suffix, "CONFIG") == 0;
+
+ if (!is_config) {
+
+ new_table = fts_create_in_mem_aux_table(
+ fts_table_name, table, FTS_DELETED_TABLE_NUM_COLS);
+
+ dict_mem_table_add_col(
+ new_table, heap, "doc_id", DATA_INT, DATA_UNSIGNED,
+ FTS_DELETED_TABLE_COL_LEN);
+ } else {
+ /* Config table has different schema. */
+ new_table = fts_create_in_mem_aux_table(
+ fts_table_name, table, FTS_CONFIG_TABLE_NUM_COLS);
+
+ dict_mem_table_add_col(
+ new_table, heap, "key", DATA_VARCHAR, 0,
+ FTS_CONFIG_TABLE_KEY_COL_LEN);
+
+ dict_mem_table_add_col(
+ new_table, heap, "value", DATA_VARCHAR, DATA_NOT_NULL,
+ FTS_CONFIG_TABLE_VALUE_COL_LEN);
+ }
+
+ error = row_create_table_for_mysql(new_table, trx,
+ FIL_ENCRYPTION_DEFAULT, FIL_DEFAULT_ENCRYPTION_KEY);
+
+ if (error == DB_SUCCESS) {
+
+ dict_index_t* index = dict_mem_index_create(
+ fts_table_name, "FTS_COMMON_TABLE_IND",
+ new_table->space, DICT_UNIQUE|DICT_CLUSTERED, 1);
+
+ if (!is_config) {
+ dict_mem_index_add_field(index, "doc_id", 0);
+ } else {
+ dict_mem_index_add_field(index, "key", 0);
+ }
+
+ /* We save and restore trx->dict_operation because
+ row_create_index_for_mysql() changes the operation to
+ TRX_DICT_OP_TABLE. */
+ trx_dict_op_t op = trx_get_dict_operation(trx);
+
+ error = row_create_index_for_mysql(index, trx, NULL);
+
+ trx->dict_operation = op;
+ }
+
+ if (error != DB_SUCCESS) {
+ dict_mem_table_free(new_table);
+ new_table = NULL;
+ ib::warn() << "Failed to create FTS common table "
+ << fts_table_name;
+ trx->error_state = DB_SUCCESS;
+ row_drop_table_for_mysql(fts_table_name, trx, SQLCOM_DROP_DB);
+ trx->error_state = error;
+ }
+ return(new_table);
+}
+
+/** Creates the common auxiliary tables needed for supporting an FTS index
on the given table. row_mysql_lock_data_dictionary must have been called
before this.
+The following tables are created.
+CREATE TABLE $FTS_PREFIX_DELETED
+ (doc_id BIGINT UNSIGNED, UNIQUE CLUSTERED INDEX on doc_id)
+CREATE TABLE $FTS_PREFIX_DELETED_CACHE
+ (doc_id BIGINT UNSIGNED, UNIQUE CLUSTERED INDEX on doc_id)
+CREATE TABLE $FTS_PREFIX_BEING_DELETED
+ (doc_id BIGINT UNSIGNED, UNIQUE CLUSTERED INDEX on doc_id)
+CREATE TABLE $FTS_PREFIX_BEING_DELETED_CACHE
+ (doc_id BIGINT UNSIGNED, UNIQUE CLUSTERED INDEX on doc_id)
+CREATE TABLE $FTS_PREFIX_CONFIG
+ (key CHAR(50), value CHAR(200), UNIQUE CLUSTERED INDEX on key)
+@param[in,out] trx transaction
+@param[in] table table with FTS index
+@param[in] name table name normalized
+@param[in] skip_doc_id_index Skip index on doc id
@return DB_SUCCESS if succeed */
-UNIV_INTERN
dberr_t
fts_create_common_tables(
-/*=====================*/
- trx_t* trx, /*!< in: transaction */
- const dict_table_t* table, /*!< in: table with FTS index */
- const char* name, /*!< in: table name normalized.*/
- bool skip_doc_id_index)/*!< in: Skip index on doc id */
+ trx_t* trx,
+ const dict_table_t* table,
+ const char* name,
+ bool skip_doc_id_index)
{
dberr_t error;
que_t* graph;
@@ -1832,7 +1852,13 @@ fts_create_common_tables(
char fts_name[MAX_FULL_NAME_LEN];
char full_name[sizeof(fts_common_tables) / sizeof(char*)]
[MAX_FULL_NAME_LEN];
- ulint i;
+
+ dict_index_t* index = NULL;
+ trx_dict_op_t op;
+ /* common_tables vector is used for dropping FTS common tables
+ on error condition. */
+ std::vector<dict_table_t*> common_tables;
+ std::vector<dict_table_t*>::const_iterator it;
FTS_INIT_FTS_TABLE(&fts_table, NULL, FTS_COMMON_TABLE, table);
@@ -1844,27 +1870,28 @@ fts_create_common_tables(
}
/* Create the FTS tables that are common to an FTS index. */
- info = pars_info_create();
-
- for (i = 0; fts_common_tables[i] != NULL; ++i) {
+ for (ulint i = 0; fts_common_tables[i] != NULL; ++i) {
fts_table.suffix = fts_common_tables[i];
fts_get_table_name(&fts_table, full_name[i], true);
+ dict_table_t* common_table = fts_create_one_common_table(
+ trx, table, full_name[i], fts_table.suffix, heap);
- pars_info_bind_id(info, true,
- fts_common_tables[i], full_name[i]);
- }
-
- graph = fts_parse_sql_no_dict_lock(NULL, info,
- fts_create_common_tables_sql);
-
- error = fts_eval_sql(trx, graph);
-
- que_graph_free(graph);
+ if (common_table == NULL) {
+ error = DB_ERROR;
+ goto func_exit;
+ } else {
+ common_tables.push_back(common_table);
+ }
- if (error != DB_SUCCESS) {
+ DBUG_EXECUTE_IF("ib_fts_aux_table_error",
+ /* Return error after creating FTS_AUX_CONFIG table. */
+ if (i == 4) {
+ error = DB_ERROR;
+ goto func_exit;
+ }
+ );
- goto func_exit;
}
/* Write the default settings to the config table. */
@@ -1886,82 +1913,63 @@ fts_create_common_tables(
goto func_exit;
}
- info = pars_info_create();
+ index = dict_mem_index_create(
+ name, FTS_DOC_ID_INDEX_NAME, table->space,
+ DICT_UNIQUE, 1);
+ dict_mem_index_add_field(index, FTS_DOC_ID_COL_NAME, 0);
- pars_info_bind_id(info, TRUE, "table_name", name);
- pars_info_bind_id(info, TRUE, "index_name", FTS_DOC_ID_INDEX_NAME);
- pars_info_bind_id(info, TRUE, "doc_id_col_name", FTS_DOC_ID_COL_NAME);
+ op = trx_get_dict_operation(trx);
- /* Create the FTS DOC_ID index on the hidden column. Currently this
- is common for any FT index created on the table. */
- graph = fts_parse_sql_no_dict_lock(
- NULL,
- info,
- mem_heap_printf(
- heap,
- "BEGIN\n"
- ""
- "CREATE UNIQUE INDEX $index_name ON $table_name("
- "$doc_id_col_name);\n"));
+ error = row_create_index_for_mysql(index, trx, NULL);
- error = fts_eval_sql(trx, graph);
- que_graph_free(graph);
+ trx->dict_operation = op;
func_exit:
if (error != DB_SUCCESS) {
- /* We have special error handling here */
-
- trx->error_state = DB_SUCCESS;
-
- trx_rollback_to_savepoint(trx, NULL);
-
- row_drop_table_for_mysql(table->name, trx, FALSE, TRUE);
-
- trx->error_state = DB_SUCCESS;
+ for (it = common_tables.begin(); it != common_tables.end();
+ ++it) {
+ row_drop_table_for_mysql((*it)->name.m_name, trx,
+ SQLCOM_DROP_DB);
+ }
}
+ common_tables.clear();
mem_heap_free(heap);
return(error);
}
-/*************************************************************//**
-Wrapper function of fts_create_index_tables_low(), create auxiliary
-tables for an FTS index
-
+/** Create one FTS auxiliary index table for an FTS index.
+@param[in,out] trx transaction
+@param[in] index the index instance
+@param[in] fts_table fts_table structure
+@param[in,out] heap memory heap
@see row_merge_create_fts_sort_index()
-@return: DB_SUCCESS or error code */
+@return DB_SUCCESS or error code */
static
dict_table_t*
fts_create_one_index_table(
-/*=======================*/
- trx_t* trx, /*!< in: transaction */
- const dict_index_t*
- index, /*!< in: the index instance */
- fts_table_t* fts_table, /*!< in: fts_table structure */
- mem_heap_t* heap) /*!< in: heap */
+ trx_t* trx,
+ const dict_index_t* index,
+ const fts_table_t* fts_table,
+ mem_heap_t* heap)
{
dict_field_t* field;
- dict_table_t* new_table = NULL;
+ dict_table_t* new_table;
char table_name[MAX_FULL_NAME_LEN];
dberr_t error;
CHARSET_INFO* charset;
- ulint flags2 = 0;
ut_ad(index->type & DICT_FTS);
fts_get_table_name(fts_table, table_name, true);
- if (srv_file_per_table) {
- flags2 = DICT_TF2_USE_TABLESPACE;
- }
-
- new_table = dict_mem_table_create(table_name, 0, 5, 1, flags2);
+ new_table = fts_create_in_mem_aux_table(
+ table_name, fts_table->table,
+ FTS_AUX_INDEX_TABLE_NUM_COLS);
field = dict_index_get_nth_field(index, 0);
- charset = innobase_get_fts_charset(
- (int)(field->col->prtype & DATA_MYSQL_TYPE_MASK),
- (uint) dtype_get_charset_coll(field->col->prtype));
+ charset = fts_get_charset(field->col->prtype);
dict_mem_table_add_col(new_table, heap, "word",
charset == &my_charset_latin1
@@ -1972,82 +1980,87 @@ fts_create_one_index_table(
dict_mem_table_add_col(new_table, heap, "first_doc_id", DATA_INT,
DATA_NOT_NULL | DATA_UNSIGNED,
- sizeof(doc_id_t));
+ FTS_INDEX_FIRST_DOC_ID_LEN);
dict_mem_table_add_col(new_table, heap, "last_doc_id", DATA_INT,
DATA_NOT_NULL | DATA_UNSIGNED,
- sizeof(doc_id_t));
+ FTS_INDEX_LAST_DOC_ID_LEN);
dict_mem_table_add_col(new_table, heap, "doc_count", DATA_INT,
- DATA_NOT_NULL | DATA_UNSIGNED, 4);
+ DATA_NOT_NULL | DATA_UNSIGNED,
+ FTS_INDEX_DOC_COUNT_LEN);
- dict_mem_table_add_col(new_table, heap, "ilist", DATA_BLOB,
- 4130048, 0);
+ /* The precise type calculation is as follows:
+ least signficiant byte: MySQL type code (not applicable for sys cols)
+ second least : DATA_NOT_NULL | DATA_BINARY_TYPE
+ third least : the MySQL charset-collation code (DATA_MTYPE_MAX) */
- error = row_create_table_for_mysql(new_table, trx, false,
+ dict_mem_table_add_col(
+ new_table, heap, "ilist", DATA_BLOB,
+ (DATA_MTYPE_MAX << 16) | DATA_UNSIGNED | DATA_NOT_NULL,
+ FTS_INDEX_ILIST_LEN);
+
+ error = row_create_table_for_mysql(new_table, trx,
FIL_ENCRYPTION_DEFAULT, FIL_DEFAULT_ENCRYPTION_KEY);
+ if (error == DB_SUCCESS) {
+ dict_index_t* index = dict_mem_index_create(
+ table_name, "FTS_INDEX_TABLE_IND", new_table->space,
+ DICT_UNIQUE|DICT_CLUSTERED, 2);
+ dict_mem_index_add_field(index, "word", 0);
+ dict_mem_index_add_field(index, "first_doc_id", 0);
+
+ trx_dict_op_t op = trx_get_dict_operation(trx);
+
+ error = row_create_index_for_mysql(index, trx, NULL);
+
+ trx->dict_operation = op;
+ }
+
if (error != DB_SUCCESS) {
- trx->error_state = error;
dict_mem_table_free(new_table);
new_table = NULL;
- ib_logf(IB_LOG_LEVEL_WARN,
- "Fail to create FTS index table %s", table_name);
+ ib::warn() << "Failed to create FTS index table "
+ << table_name;
+ trx->error_state = DB_SUCCESS;
+ row_drop_table_for_mysql(table_name, trx, SQLCOM_DROP_DB);
+ trx->error_state = error;
}
return(new_table);
}
-/*************************************************************//**
-Wrapper function of fts_create_index_tables_low(), create auxiliary
-tables for an FTS index
-@return: DB_SUCCESS or error code */
-UNIV_INTERN
+/** Create auxiliary index tables for an FTS index.
+@param[in,out] trx transaction
+@param[in] index the index instance
+@param[in] table_name table name
+@param[in] table_id the table id
+@return DB_SUCCESS or error code */
dberr_t
fts_create_index_tables_low(
-/*========================*/
- trx_t* trx, /*!< in: transaction */
- const dict_index_t*
- index, /*!< in: the index instance */
- const char* table_name, /*!< in: the table name */
- table_id_t table_id) /*!< in: the table id */
-
+ trx_t* trx,
+ const dict_index_t* index,
+ const char* table_name,
+ table_id_t table_id)
{
ulint i;
- que_t* graph;
fts_table_t fts_table;
dberr_t error = DB_SUCCESS;
- pars_info_t* info;
mem_heap_t* heap = mem_heap_create(1024);
- char fts_name[MAX_FULL_NAME_LEN];
fts_table.type = FTS_INDEX_TABLE;
fts_table.index_id = index->id;
fts_table.table_id = table_id;
fts_table.table = index->table;
-#ifdef FTS_DOC_STATS_DEBUG
- /* Create the FTS auxiliary tables that are specific
- to an FTS index. */
- info = pars_info_create();
-
- fts_table.suffix = "DOC_ID";
- fts_get_table_name(&fts_table, fts_name, true);
-
- pars_info_bind_id(info, true, "doc_id_table", fts_name);
-
- graph = fts_parse_sql_no_dict_lock(NULL, info,
- fts_create_index_tables_sql);
-
- error = fts_eval_sql(trx, graph);
- que_graph_free(graph);
-#endif /* FTS_DOC_STATS_DEBUG */
+ /* aux_idx_tables vector is used for dropping FTS AUX INDEX
+ tables on error condition. */
+ std::vector<dict_table_t*> aux_idx_tables;
+ std::vector<dict_table_t*>::const_iterator it;
- for (i = 0; fts_index_selector[i].value && error == DB_SUCCESS; ++i) {
+ for (i = 0; i < FTS_NUM_AUX_INDEX && error == DB_SUCCESS; ++i) {
dict_table_t* new_table;
- info = pars_info_create();
-
/* Create the FTS auxiliary tables that are specific
to an FTS index. We need to preserve the table_id %s
which fts_parse_sql_no_dict_lock() will fill in for us. */
@@ -2056,50 +2069,57 @@ fts_create_index_tables_low(
new_table = fts_create_one_index_table(
trx, index, &fts_table, heap);
- if (!new_table) {
+ if (new_table == NULL) {
error = DB_FAIL;
break;
+ } else {
+ aux_idx_tables.push_back(new_table);
}
- fts_get_table_name(&fts_table, fts_name, true);
-
- pars_info_bind_id(info, true, "table", fts_name);
-
- graph = fts_parse_sql_no_dict_lock(
- &fts_table, info, fts_create_index_sql);
-
- error = fts_eval_sql(trx, graph);
- que_graph_free(graph);
+ DBUG_EXECUTE_IF("ib_fts_index_table_error",
+ /* Return error after creating FTS_INDEX_5
+ aux table. */
+ if (i == 4) {
+ error = DB_FAIL;
+ break;
+ }
+ );
}
if (error != DB_SUCCESS) {
- /* We have special error handling here */
-
- trx->error_state = DB_SUCCESS;
-
- trx_rollback_to_savepoint(trx, NULL);
-
- row_drop_table_for_mysql(table_name, trx, FALSE, TRUE);
- trx->error_state = DB_SUCCESS;
+ for (it = aux_idx_tables.begin(); it != aux_idx_tables.end();
+ ++it) {
+ row_drop_table_for_mysql((*it)->name.m_name, trx,
+ SQLCOM_DROP_DB);
+ }
}
+ aux_idx_tables.clear();
mem_heap_free(heap);
return(error);
}
-/******************************************************************//**
-Creates the column specific ancillary tables needed for supporting an
+/** Creates the column specific ancillary tables needed for supporting an
FTS index on the given table. row_mysql_lock_data_dictionary must have
been called before this.
+
+All FTS AUX Index tables have the following schema.
+CREAT TABLE $FTS_PREFIX_INDEX_[1-6](
+ word VARCHAR(FTS_MAX_WORD_LEN),
+ first_doc_id INT NOT NULL,
+ last_doc_id UNSIGNED NOT NULL,
+ doc_count UNSIGNED INT NOT NULL,
+ ilist VARBINARY NOT NULL,
+ UNIQUE CLUSTERED INDEX ON (word, first_doc_id))
+@param[in,out] trx transaction
+@param[in] index index instance
@return DB_SUCCESS or error code */
-UNIV_INTERN
dberr_t
fts_create_index_tables(
-/*====================*/
- trx_t* trx, /*!< in: transaction */
- const dict_index_t* index) /*!< in: the index instance */
+ trx_t* trx,
+ const dict_index_t* index)
{
dberr_t err;
dict_table_t* table;
@@ -2107,7 +2127,8 @@ fts_create_index_tables(
table = dict_table_get_low(index->table_name);
ut_a(table != NULL);
- err = fts_create_index_tables_low(trx, index, table->name, table->id);
+ err = fts_create_index_tables_low(
+ trx, index, table->name.m_name, table->id);
if (err == DB_SUCCESS) {
trx_commit(trx);
@@ -2253,8 +2274,7 @@ fts_savepoint_create(
/******************************************************************//**
Create an FTS trx.
-@return FTS trx */
-static
+@return FTS trx */
fts_trx_t*
fts_trx_create(
/*===========*/
@@ -2436,7 +2456,6 @@ fts_trx_table_add_op(
/******************************************************************//**
Notify the FTS system about an operation on an FTS-indexed table. */
-UNIV_INTERN
void
fts_trx_add_op(
/*===========*/
@@ -2509,7 +2528,7 @@ fts_get_max_cache_size(
{
dberr_t error;
fts_string_t value;
- ulint cache_size_in_mb;
+ ulong cache_size_in_mb;
/* Set to the default value. */
cache_size_in_mb = FTS_CACHE_SIZE_LOWER_LIMIT_IN_MB;
@@ -2518,7 +2537,7 @@ fts_get_max_cache_size(
information is used by the callback that reads the value. */
value.f_n_char = 0;
value.f_len = FTS_MAX_CONFIG_VALUE_LEN;
- value.f_str = ut_malloc(value.f_len + 1);
+ value.f_str = ut_malloc_nokey(value.f_len + 1);
error = fts_config_get_value(
trx, fts_table, FTS_MAX_CACHE_SIZE_IN_MB, &value);
@@ -2530,35 +2549,32 @@ fts_get_max_cache_size(
if (cache_size_in_mb > FTS_CACHE_SIZE_UPPER_LIMIT_IN_MB) {
- ut_print_timestamp(stderr);
- fprintf(stderr, " InnoDB: Warning: FTS max cache size "
- " (%lu) out of range. Minimum value is "
- "%luMB and the maximum values is %luMB, "
- "setting cache size to upper limit\n",
- cache_size_in_mb,
- FTS_CACHE_SIZE_LOWER_LIMIT_IN_MB,
- FTS_CACHE_SIZE_UPPER_LIMIT_IN_MB);
+ ib::warn() << "FTS max cache size ("
+ << cache_size_in_mb << ") out of range."
+ " Minimum value is "
+ << FTS_CACHE_SIZE_LOWER_LIMIT_IN_MB
+ << "MB and the maximum value is "
+ << FTS_CACHE_SIZE_UPPER_LIMIT_IN_MB
+ << "MB, setting cache size to upper limit";
cache_size_in_mb = FTS_CACHE_SIZE_UPPER_LIMIT_IN_MB;
} else if (cache_size_in_mb
< FTS_CACHE_SIZE_LOWER_LIMIT_IN_MB) {
- ut_print_timestamp(stderr);
- fprintf(stderr, " InnoDB: Warning: FTS max cache size "
- " (%lu) out of range. Minimum value is "
- "%luMB and the maximum values is %luMB, "
- "setting cache size to lower limit\n",
- cache_size_in_mb,
- FTS_CACHE_SIZE_LOWER_LIMIT_IN_MB,
- FTS_CACHE_SIZE_UPPER_LIMIT_IN_MB);
+ ib::warn() << "FTS max cache size ("
+ << cache_size_in_mb << ") out of range."
+ " Minimum value is "
+ << FTS_CACHE_SIZE_LOWER_LIMIT_IN_MB
+ << "MB and the maximum value is"
+ << FTS_CACHE_SIZE_UPPER_LIMIT_IN_MB
+ << "MB, setting cache size to lower limit";
cache_size_in_mb = FTS_CACHE_SIZE_LOWER_LIMIT_IN_MB;
}
} else {
- ut_print_timestamp(stderr);
- fprintf(stderr, "InnoDB: Error: (%lu) reading max cache "
- "config value from config table\n", error);
+ ib::error() << "(" << ut_strerr(error) << ") reading max"
+ " cache config value from config table";
}
ut_free(value.f_str);
@@ -2567,53 +2583,10 @@ fts_get_max_cache_size(
}
#endif
-#ifdef FTS_DOC_STATS_DEBUG
-/*********************************************************************//**
-Get the total number of words in the FTS for a particular FTS index.
-@return DB_SUCCESS if all OK else error code */
-UNIV_INTERN
-dberr_t
-fts_get_total_word_count(
-/*=====================*/
- trx_t* trx, /*!< in: transaction */
- dict_index_t* index, /*!< in: for this index */
- ulint* total) /* out: total words */
-{
- dberr_t error;
- fts_string_t value;
-
- *total = 0;
-
- /* We set the length of value to the max bytes it can hold. This
- information is used by the callback that reads the value. */
- value.f_n_char = 0;
- value.f_len = FTS_MAX_CONFIG_VALUE_LEN;
- value.f_str = static_cast<byte*>(ut_malloc(value.f_len + 1));
-
- error = fts_config_get_index_value(
- trx, index, FTS_TOTAL_WORD_COUNT, &value);
-
- if (error == DB_SUCCESS) {
-
- value.f_str[value.f_len] = 0;
- *total = strtoul((char*) value.f_str, NULL, 10);
- } else {
- ut_print_timestamp(stderr);
- fprintf(stderr, " InnoDB: Error: (%s) reading total words "
- "value from config table\n", ut_strerr(error));
- }
-
- ut_free(value.f_str);
-
- return(error);
-}
-#endif /* FTS_DOC_STATS_DEBUG */
-
/*********************************************************************//**
Update the next and last Doc ID in the CONFIG table to be the input
"doc_id" value (+ 1). We would do so after each FTS index build or
table truncate */
-UNIV_INTERN
void
fts_update_next_doc_id(
/*===================*/
@@ -2634,7 +2607,6 @@ fts_update_next_doc_id(
/*********************************************************************//**
Get the next available document id.
@return DB_SUCCESS if OK */
-UNIV_INTERN
dberr_t
fts_get_next_doc_id(
/*================*/
@@ -2646,19 +2618,15 @@ fts_get_next_doc_id(
/* If the Doc ID system has not yet been initialized, we
will consult the CONFIG table and user table to re-establish
the initial value of the Doc ID */
+ if (cache->first_doc_id == FTS_NULL_DOC_ID) {
+ fts_init_doc_id(table);
+ }
if (!DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_HAS_DOC_ID)) {
- if (cache->first_doc_id == FTS_NULL_DOC_ID) {
- fts_init_doc_id(table);
- }
*doc_id = FTS_NULL_DOC_ID;
return(DB_SUCCESS);
}
- if (cache->first_doc_id == FTS_NULL_DOC_ID) {
- fts_init_doc_id(table);
- }
-
DEBUG_SYNC_C("get_next_FTS_DOC_ID");
mutex_enter(&cache->doc_id_lock);
*doc_id = cache->next_doc_id++;
@@ -2700,6 +2668,11 @@ retry:
fts_table.table = table;
trx = trx_allocate_for_background();
+ if (srv_read_only_mode) {
+ trx_start_internal_read_only(trx);
+ } else {
+ trx_start_internal(trx);
+ }
trx->op_info = "update the next FTS document id";
@@ -2774,10 +2747,8 @@ func_exit:
} else {
*doc_id = 0;
- ut_print_timestamp(stderr);
- fprintf(stderr, " InnoDB: Error: (%s) "
- "while getting next doc id.\n", ut_strerr(error));
-
+ ib::error() << "(" << ut_strerr(error) << ") while getting"
+ " next doc id.";
fts_sql_rollback(trx);
if (error == DB_DEADLOCK) {
@@ -2813,6 +2784,10 @@ fts_update_sync_doc_id(
fts_cache_t* cache = table->fts->cache;
char fts_name[MAX_FULL_NAME_LEN];
+ if (srv_read_only_mode) {
+ return DB_READ_ONLY;
+ }
+
fts_table.suffix = "CONFIG";
fts_table.table_id = table->id;
fts_table.type = FTS_COMMON_TABLE;
@@ -2820,6 +2795,7 @@ fts_update_sync_doc_id(
if (!trx) {
trx = trx_allocate_for_background();
+ trx_start_internal(trx);
trx->op_info = "setting last FTS document id";
local_trx = TRUE;
@@ -2827,7 +2803,7 @@ fts_update_sync_doc_id(
info = pars_info_create();
- id_len = ut_snprintf(
+ id_len = snprintf(
(char*) id, sizeof(id), FTS_DOC_ID_FORMAT, doc_id + 1);
pars_info_bind_varchar_literal(info, "doc_id", id, id_len);
@@ -2838,8 +2814,8 @@ fts_update_sync_doc_id(
graph = fts_parse_sql(
&fts_table, info,
- "BEGIN "
- "UPDATE $table_name SET value = :doc_id"
+ "BEGIN"
+ " UPDATE $table_name SET value = :doc_id"
" WHERE key = 'synced_doc_id';");
error = fts_eval_sql(trx, graph);
@@ -2852,9 +2828,8 @@ fts_update_sync_doc_id(
cache->synced_doc_id = doc_id;
} else {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "(%s) while updating last doc id.",
- ut_strerr(error));
+ ib::error() << "(" << ut_strerr(error) << ") while"
+ " updating last doc id.";
fts_sql_rollback(trx);
}
@@ -2867,7 +2842,6 @@ fts_update_sync_doc_id(
/*********************************************************************//**
Create a new fts_doc_ids_t.
@return new fts_doc_ids_t */
-UNIV_INTERN
fts_doc_ids_t*
fts_doc_ids_create(void)
/*====================*/
@@ -3039,12 +3013,18 @@ fts_commit_table(
/*=============*/
fts_trx_table_t* ftt) /*!< in: FTS table to commit*/
{
+ if (srv_read_only_mode) {
+ return DB_READ_ONLY;
+ }
+
const ib_rbt_node_t* node;
ib_rbt_t* rows;
dberr_t error = DB_SUCCESS;
fts_cache_t* cache = ftt->table->fts->cache;
trx_t* trx = trx_allocate_for_background();
+ trx_start_internal(trx);
+
rows = ftt->rows;
ftt->fts_trx->trx = trx;
@@ -3092,7 +3072,6 @@ fts_commit_table(
The given transaction is about to be committed; do whatever is necessary
from the FTS system's POV.
@return DB_SUCCESS or error code */
-UNIV_INTERN
dberr_t
fts_commit(
/*=======*/
@@ -3123,7 +3102,6 @@ fts_commit(
/*********************************************************************//**
Initialize a document. */
-UNIV_INTERN
void
fts_doc_init(
/*=========*/
@@ -3138,7 +3116,6 @@ fts_doc_init(
/*********************************************************************//**
Free document. */
-UNIV_INTERN
void
fts_doc_free(
/*=========*/
@@ -3150,44 +3127,15 @@ fts_doc_free(
rbt_free(doc->tokens);
}
-#ifdef UNIV_DEBUG
- memset(doc, 0, sizeof(*doc));
-#endif /* UNIV_DEBUG */
+ ut_d(memset(doc, 0, sizeof(*doc)));
mem_heap_free(heap);
}
/*********************************************************************//**
-Callback function for fetch that stores a row id to the location pointed.
-The column's type must be DATA_FIXBINARY, DATA_BINARY_TYPE, length = 8.
-@return always returns NULL */
-UNIV_INTERN
-void*
-fts_fetch_row_id(
-/*=============*/
- void* row, /*!< in: sel_node_t* */
- void* user_arg) /*!< in: data pointer */
-{
- sel_node_t* node = static_cast<sel_node_t*>(row);
-
- dfield_t* dfield = que_node_get_val(node->select_list);
- dtype_t* type = dfield_get_type(dfield);
- ulint len = dfield_get_len(dfield);
-
- ut_a(dtype_get_mtype(type) == DATA_FIXBINARY);
- ut_a(dtype_get_prtype(type) & DATA_BINARY_TYPE);
- ut_a(len == 8);
-
- memcpy(user_arg, dfield_get_data(dfield), 8);
-
- return(NULL);
-}
-
-/*********************************************************************//**
Callback function for fetch that stores the text of an FTS document,
converting each column to UTF-16.
@return always FALSE */
-UNIV_INTERN
ibool
fts_query_expansion_fetch_doc(
/*==========================*/
@@ -3226,10 +3174,7 @@ fts_query_expansion_fetch_doc(
}
if (!doc_charset) {
- ulint prtype = dfield->type.prtype;
- doc_charset = innobase_get_fts_charset(
- (int)(prtype & DATA_MYSQL_TYPE_MASK),
- (uint) dtype_get_charset_coll(prtype));
+ doc_charset = fts_get_charset(dfield->type.prtype);
}
doc.charset = doc_charset;
@@ -3249,9 +3194,11 @@ fts_query_expansion_fetch_doc(
}
if (field_no == 0) {
- fts_tokenize_document(&doc, result_doc);
+ fts_tokenize_document(&doc, result_doc,
+ result_doc->parser);
} else {
- fts_tokenize_document_next(&doc, doc_len, result_doc);
+ fts_tokenize_document_next(&doc, doc_len, result_doc,
+ result_doc->parser);
}
exp = que_node_get_next(exp);
@@ -3282,7 +3229,7 @@ fts_fetch_doc_from_rec(
dict_index_t* clust_index, /*!< in: cluster index */
btr_pcur_t* pcur, /*!< in: cursor whose position
has been stored */
- ulint* offsets, /*!< in: offsets */
+ offset_t* offsets, /*!< in: offsets */
fts_doc_t* doc) /*!< out: fts doc to hold parsed
documents */
{
@@ -3296,6 +3243,7 @@ fts_fetch_doc_from_rec(
ulint i;
ulint doc_len = 0;
ulint processed_doc = 0;
+ st_mysql_ftparser* parser;
if (!get_doc) {
return;
@@ -3303,6 +3251,7 @@ fts_fetch_doc_from_rec(
index = get_doc->index_cache->index;
table = get_doc->index_cache->index->table;
+ parser = get_doc->index_cache->index->parser;
clust_rec = btr_pcur_get_rec(pcur);
@@ -3314,19 +3263,15 @@ fts_fetch_doc_from_rec(
clust_pos = dict_col_get_clust_pos(col, clust_index);
if (!get_doc->index_cache->charset) {
- ulint prtype = ifield->col->prtype;
-
- get_doc->index_cache->charset =
- innobase_get_fts_charset(
- (int) (prtype & DATA_MYSQL_TYPE_MASK),
- (uint) dtype_get_charset_coll(prtype));
+ get_doc->index_cache->charset = fts_get_charset(
+ ifield->col->prtype);
}
if (rec_offs_nth_extern(offsets, clust_pos)) {
doc->text.f_str =
btr_rec_copy_externally_stored_field(
clust_rec, offsets,
- dict_table_zip_size(table),
+ dict_table_page_size(table),
clust_pos, &doc->text.f_len,
static_cast<mem_heap_t*>(
doc->self_heap->arg));
@@ -3345,9 +3290,9 @@ fts_fetch_doc_from_rec(
}
if (processed_doc == 0) {
- fts_tokenize_document(doc, NULL);
+ fts_tokenize_document(doc, NULL, parser);
} else {
- fts_tokenize_document_next(doc, doc_len, NULL);
+ fts_tokenize_document_next(doc, doc_len, NULL, parser);
}
processed_doc++;
@@ -3355,6 +3300,143 @@ fts_fetch_doc_from_rec(
}
}
+/** Fetch the data from tuple and tokenize the document.
+@param[in] get_doc FTS index's get_doc struct
+@param[in] tuple tuple should be arranged in table schema order
+@param[out] doc fts doc to hold parsed documents. */
+static
+void
+fts_fetch_doc_from_tuple(
+ fts_get_doc_t* get_doc,
+ const dtuple_t* tuple,
+ fts_doc_t* doc)
+{
+ dict_index_t* index;
+ st_mysql_ftparser* parser;
+ ulint doc_len = 0;
+ ulint processed_doc = 0;
+ ulint num_field;
+
+ if (get_doc == NULL) {
+ return;
+ }
+
+ index = get_doc->index_cache->index;
+ parser = get_doc->index_cache->index->parser;
+ num_field = dict_index_get_n_fields(index);
+
+ for (ulint i = 0; i < num_field; i++) {
+ const dict_field_t* ifield;
+ const dict_col_t* col;
+ ulint pos;
+
+ ifield = dict_index_get_nth_field(index, i);
+ col = dict_field_get_col(ifield);
+ pos = dict_col_get_no(col);
+ const dfield_t* field = dtuple_get_nth_field(tuple, pos);
+
+ if (!get_doc->index_cache->charset) {
+ get_doc->index_cache->charset = fts_get_charset(
+ ifield->col->prtype);
+ }
+
+ ut_ad(!dfield_is_ext(field));
+
+ doc->text.f_str = (byte*) dfield_get_data(field);
+ doc->text.f_len = dfield_get_len(field);
+ doc->found = TRUE;
+ doc->charset = get_doc->index_cache->charset;
+
+ /* field data is NULL. */
+ if (doc->text.f_len == UNIV_SQL_NULL || doc->text.f_len == 0) {
+ continue;
+ }
+
+ if (processed_doc == 0) {
+ fts_tokenize_document(doc, NULL, parser);
+ } else {
+ fts_tokenize_document_next(doc, doc_len, NULL, parser);
+ }
+
+ processed_doc++;
+ doc_len += doc->text.f_len + 1;
+ }
+}
+
+/** Fetch the document from tuple, tokenize the text data and
+insert the text data into fts auxiliary table and
+its cache. Moreover this tuple fields doesn't contain any information
+about externally stored field. This tuple contains data directly
+converted from mysql.
+@param[in] ftt FTS transaction table
+@param[in] doc_id doc id
+@param[in] tuple tuple from where data can be retrieved
+ and tuple should be arranged in table
+ schema order. */
+void
+fts_add_doc_from_tuple(
+ fts_trx_table_t*ftt,
+ doc_id_t doc_id,
+ const dtuple_t* tuple)
+{
+ mtr_t mtr;
+ fts_cache_t* cache = ftt->table->fts->cache;
+
+ ut_ad(cache->get_docs);
+
+ if (!ftt->table->fts->added_synced) {
+ fts_init_index(ftt->table, FALSE);
+ }
+
+ mtr_start(&mtr);
+
+ ulint num_idx = ib_vector_size(cache->get_docs);
+
+ for (ulint i = 0; i < num_idx; ++i) {
+ fts_doc_t doc;
+ dict_table_t* table;
+ fts_get_doc_t* get_doc;
+
+ get_doc = static_cast<fts_get_doc_t*>(
+ ib_vector_get(cache->get_docs, i));
+ table = get_doc->index_cache->index->table;
+
+ fts_doc_init(&doc);
+ fts_fetch_doc_from_tuple(
+ get_doc, tuple, &doc);
+
+ if (doc.found) {
+ mtr_commit(&mtr);
+ rw_lock_x_lock(&table->fts->cache->lock);
+
+ if (table->fts->cache->stopword_info.status
+ & STOPWORD_NOT_INIT) {
+ fts_load_stopword(table, NULL, NULL,
+ NULL, TRUE, TRUE);
+ }
+
+ fts_cache_add_doc(
+ table->fts->cache,
+ get_doc->index_cache,
+ doc_id, doc.tokens);
+
+ rw_lock_x_unlock(&table->fts->cache->lock);
+
+ if (cache->total_size > fts_max_cache_size / 5
+ || fts_need_sync) {
+ fts_sync(cache->sync, true, false);
+ }
+
+ mtr_start(&mtr);
+
+ }
+
+ fts_doc_free(&doc);
+ }
+
+ mtr_commit(&mtr);
+}
+
/*********************************************************************//**
This function fetches the document inserted during the committing
transaction, and tokenize the inserted text data and insert into
@@ -3401,8 +3483,7 @@ fts_add_doc_by_id(
heap = mem_heap_create(512);
clust_index = dict_table_get_first_index(table);
- fts_id_index = dict_table_get_index_on_name(
- table, FTS_DOC_ID_INDEX_NAME);
+ fts_id_index = table->fts_doc_id_index;
/* Check whether the index on FTS_DOC_ID is cluster index */
is_id_cluster = (clust_index == fts_id_index);
@@ -3430,7 +3511,7 @@ fts_add_doc_by_id(
btr_pcur_t* doc_pcur;
const rec_t* clust_rec;
btr_pcur_t clust_pcur;
- ulint* offsets = NULL;
+ offset_t* offsets = NULL;
ulint num_idx = ib_vector_size(cache->get_docs);
rec = btr_pcur_get_rec(&pcur);
@@ -3467,10 +3548,10 @@ fts_add_doc_by_id(
}
- offsets = rec_get_offsets(clust_rec, clust_index,
- NULL, ULINT_UNDEFINED, &heap);
+ offsets = rec_get_offsets(clust_rec, clust_index, NULL, true,
+ ULINT_UNDEFINED, &heap);
- for (ulint i = 0; i < num_idx; ++i) {
+ for (ulint i = 0; i < num_idx; ++i) {
fts_doc_t doc;
dict_table_t* table;
fts_get_doc_t* get_doc;
@@ -3521,7 +3602,7 @@ fts_add_doc_by_id(
DBUG_EXECUTE_IF(
"fts_instrument_sync_debug",
- fts_sync(cache->sync, true, true, false);
+ fts_sync(cache->sync, true, true);
);
DEBUG_SYNC_C("fts_instrument_sync_request");
@@ -3588,7 +3669,6 @@ fts_read_ulint(
/*********************************************************************//**
Get maximum Doc ID in a table if index "FTS_DOC_ID_INDEX" exists
@return max Doc ID or 0 if index "FTS_DOC_ID_INDEX" does not exist */
-UNIV_INTERN
doc_id_t
fts_get_max_doc_id(
/*===============*/
@@ -3600,7 +3680,7 @@ fts_get_max_doc_id(
mtr_t mtr;
btr_pcur_t pcur;
- index = dict_table_get_index_on_name(table, FTS_DOC_ID_INDEX_NAME);
+ index = table->fts_doc_id_index;
if (!index) {
return(0);
@@ -3620,8 +3700,8 @@ fts_get_max_doc_id(
if (!page_is_empty(btr_pcur_get_page(&pcur))) {
const rec_t* rec = NULL;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- ulint* offsets = offsets_;
+ offset_t offsets_[REC_OFFS_NORMAL_SIZE];
+ offset_t* offsets = offsets_;
mem_heap_t* heap = NULL;
ulint len;
const void* data;
@@ -3641,7 +3721,7 @@ fts_get_max_doc_id(
}
offsets = rec_get_offsets(
- rec, index, offsets, ULINT_UNDEFINED, &heap);
+ rec, index, offsets, true, ULINT_UNDEFINED, &heap);
data = rec_get_nth_field(rec, offsets, 0, &len);
@@ -3658,7 +3738,6 @@ func_exit:
/*********************************************************************//**
Fetch document with the given document id.
@return DB_SUCCESS if OK else error */
-UNIV_INTERN
dberr_t
fts_doc_fetch_by_doc_id(
/*====================*/
@@ -3768,13 +3847,7 @@ fts_doc_fetch_by_doc_id(
}
error = fts_eval_sql(trx, graph);
-
- if (error == DB_SUCCESS) {
- fts_sql_commit(trx);
- } else {
- fts_sql_rollback(trx);
- }
-
+ fts_sql_commit(trx);
trx_free_for_background(trx);
if (!get_doc) {
@@ -3787,7 +3860,6 @@ fts_doc_fetch_by_doc_id(
/*********************************************************************//**
Write out a single word's data as new entry/entries in the INDEX table.
@return DB_SUCCESS if all OK. */
-UNIV_INTERN
dberr_t
fts_write_node(
/*===========*/
@@ -3805,6 +3877,8 @@ fts_write_node(
doc_id_t first_doc_id;
char table_name[MAX_FULL_NAME_LEN];
+ ut_a(node->ilist != NULL);
+
if (*graph) {
info = (*graph)->info;
} else {
@@ -3842,9 +3916,9 @@ fts_write_node(
fts_table,
info,
"BEGIN\n"
- "INSERT INTO $index_table_name VALUES "
- "(:token, :first_doc_id,"
- " :last_doc_id, :doc_count, :ilist);");
+ "INSERT INTO $index_table_name VALUES"
+ " (:token, :first_doc_id,"
+ " :last_doc_id, :doc_count, :ilist);");
}
start_time = time(NULL);
@@ -3930,9 +4004,6 @@ fts_sync_write_words(
dberr_t error = DB_SUCCESS;
ibool print_error = FALSE;
dict_table_t* table = index_cache->index->table;
-#ifdef FTS_DOC_STATS_DEBUG
- ulint n_new_words = 0;
-#endif /* FTS_DOC_STATS_DEBUG */
FTS_INIT_INDEX_TABLE(
&fts_table, NULL, FTS_INDEX_TABLE, index_cache->index);
@@ -3960,25 +4031,6 @@ fts_sync_write_words(
fts_table.suffix = fts_get_suffix(selected);
-#ifdef FTS_DOC_STATS_DEBUG
- /* Check if the word exists in the FTS index and if not
- then we need to increment the total word count stats. */
- if (error == DB_SUCCESS && fts_enable_diag_print) {
- ibool found = FALSE;
-
- error = fts_is_word_in_index(
- trx,
- &index_cache->sel_graph[selected],
- &fts_table,
- &word->text, &found);
-
- if (error == DB_SUCCESS && !found) {
-
- ++n_new_words;
- }
- }
-#endif /* FTS_DOC_STATS_DEBUG */
-
/* We iterate over all the nodes even if there was an error */
for (i = 0; i < ib_vector_size(word->nodes); ++i) {
@@ -4021,28 +4073,12 @@ fts_sync_write_words(
n_nodes += ib_vector_size(word->nodes);
if (error != DB_SUCCESS && !print_error) {
- ut_print_timestamp(stderr);
- fprintf(stderr, " InnoDB: Error (%s) writing "
- "word node to FTS auxiliary index "
- "table.\n", ut_strerr(error));
-
+ ib::error() << "(" << ut_strerr(error) << ") writing"
+ " word node to FTS auxiliary index table.";
print_error = TRUE;
}
}
-#ifdef FTS_DOC_STATS_DEBUG
- if (error == DB_SUCCESS && n_new_words > 0 && fts_enable_diag_print) {
- fts_table_t fts_table;
-
- FTS_INIT_FTS_TABLE(&fts_table, NULL, FTS_COMMON_TABLE, table);
-
- /* Increment the total number of words in the FTS index */
- error = fts_config_increment_index_value(
- trx, index_cache->index, FTS_TOTAL_WORD_COUNT,
- n_new_words);
- }
-#endif /* FTS_DOC_STATS_DEBUG */
-
if (fts_enable_diag_print) {
printf("Avg number of nodes: %lf\n",
(double) n_nodes / (double) (n_words > 1 ? n_words : 1));
@@ -4051,242 +4087,6 @@ fts_sync_write_words(
return(error);
}
-#ifdef FTS_DOC_STATS_DEBUG
-/*********************************************************************//**
-Write a single documents statistics to disk.
-@return DB_SUCCESS if all went well else error code */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
-dberr_t
-fts_sync_write_doc_stat(
-/*====================*/
- trx_t* trx, /*!< in: transaction */
- dict_index_t* index, /*!< in: index */
- que_t** graph, /* out: query graph */
- const fts_doc_stats_t* doc_stat) /*!< in: doc stats to write */
-{
- pars_info_t* info;
- doc_id_t doc_id;
- dberr_t error = DB_SUCCESS;
- ib_uint32_t word_count;
- char table_name[MAX_FULL_NAME_LEN];
-
- if (*graph) {
- info = (*graph)->info;
- } else {
- info = pars_info_create();
- }
-
- /* Convert to "storage" byte order. */
- mach_write_to_4((byte*) &word_count, doc_stat->word_count);
- pars_info_bind_int4_literal(
- info, "count", (const ib_uint32_t*) &word_count);
-
- /* Convert to "storage" byte order. */
- fts_write_doc_id((byte*) &doc_id, doc_stat->doc_id);
- fts_bind_doc_id(info, "doc_id", &doc_id);
-
- if (!*graph) {
- fts_table_t fts_table;
-
- FTS_INIT_INDEX_TABLE(
- &fts_table, "DOC_ID", FTS_INDEX_TABLE, index);
-
- fts_get_table_name(&fts_table, table_name);
-
- pars_info_bind_id(info, true, "doc_id_table", table_name);
-
- *graph = fts_parse_sql(
- &fts_table,
- info,
- "BEGIN "
- "INSERT INTO $doc_id_table VALUES (:doc_id, :count);");
- }
-
- for (;;) {
- error = fts_eval_sql(trx, *graph);
-
- if (error == DB_SUCCESS) {
-
- break; /* Exit the loop. */
- } else {
- ut_print_timestamp(stderr);
-
- if (error == DB_LOCK_WAIT_TIMEOUT) {
- fprintf(stderr, " InnoDB: Warning: lock wait "
- "timeout writing to FTS doc_id. "
- "Retrying!\n");
-
- trx->error_state = DB_SUCCESS;
- } else {
- fprintf(stderr, " InnoDB: Error: (%s) "
- "while writing to FTS doc_id.\n",
- ut_strerr(error));
-
- break; /* Exit the loop. */
- }
- }
- }
-
- return(error);
-}
-
-/*********************************************************************//**
-Write document statistics to disk.
-@return DB_SUCCESS if all OK */
-static
-ulint
-fts_sync_write_doc_stats(
-/*=====================*/
- trx_t* trx, /*!< in: transaction */
- const fts_index_cache_t*index_cache) /*!< in: index cache */
-{
- dberr_t error = DB_SUCCESS;
- que_t* graph = NULL;
- fts_doc_stats_t* doc_stat;
-
- if (ib_vector_is_empty(index_cache->doc_stats)) {
- return(DB_SUCCESS);
- }
-
- doc_stat = static_cast<ts_doc_stats_t*>(
- ib_vector_pop(index_cache->doc_stats));
-
- while (doc_stat) {
- error = fts_sync_write_doc_stat(
- trx, index_cache->index, &graph, doc_stat);
-
- if (error != DB_SUCCESS) {
- break;
- }
-
- if (ib_vector_is_empty(index_cache->doc_stats)) {
- break;
- }
-
- doc_stat = static_cast<ts_doc_stats_t*>(
- ib_vector_pop(index_cache->doc_stats));
- }
-
- if (graph != NULL) {
- fts_que_graph_free_check_lock(NULL, index_cache, graph);
- }
-
- return(error);
-}
-
-/*********************************************************************//**
-Callback to check the existince of a word.
-@return always return NULL */
-static
-ibool
-fts_lookup_word(
-/*============*/
- void* row, /*!< in: sel_node_t* */
- void* user_arg) /*!< in: fts_doc_t* */
-{
-
- que_node_t* exp;
- sel_node_t* node = static_cast<sel_node_t*>(row);
- ibool* found = static_cast<ibool*>(user_arg);
-
- exp = node->select_list;
-
- while (exp) {
- dfield_t* dfield = que_node_get_val(exp);
- ulint len = dfield_get_len(dfield);
-
- if (len != UNIV_SQL_NULL && len != 0) {
- *found = TRUE;
- }
-
- exp = que_node_get_next(exp);
- }
-
- return(FALSE);
-}
-
-/*********************************************************************//**
-Check whether a particular word (term) exists in the FTS index.
-@return DB_SUCCESS if all went well else error code */
-static
-dberr_t
-fts_is_word_in_index(
-/*=================*/
- trx_t* trx, /*!< in: FTS query state */
- que_t** graph, /* out: Query graph */
- fts_table_t* fts_table, /*!< in: table instance */
- const fts_string_t*
- word, /*!< in: the word to check */
- ibool* found) /* out: TRUE if exists */
-{
- pars_info_t* info;
- dberr_t error;
- char table_name[MAX_FULL_NAME_LEN];
-
- trx->op_info = "looking up word in FTS index";
-
- if (*graph) {
- info = (*graph)->info;
- } else {
- info = pars_info_create();
- }
-
- fts_get_table_name(fts_table, table_name);
- pars_info_bind_id(info, true, "table_name", table_name);
- pars_info_bind_function(info, "my_func", fts_lookup_word, found);
- pars_info_bind_varchar_literal(info, "word", word->f_str, word->f_len);
-
- if (*graph == NULL) {
- *graph = fts_parse_sql(
- fts_table,
- info,
- "DECLARE FUNCTION my_func;\n"
- "DECLARE CURSOR c IS"
- " SELECT doc_count\n"
- " FROM $table_name\n"
- " WHERE word = :word "
- " ORDER BY first_doc_id;\n"
- "BEGIN\n"
- "\n"
- "OPEN c;\n"
- "WHILE 1 = 1 LOOP\n"
- " FETCH c INTO my_func();\n"
- " IF c % NOTFOUND THEN\n"
- " EXIT;\n"
- " END IF;\n"
- "END LOOP;\n"
- "CLOSE c;");
- }
-
- for (;;) {
- error = fts_eval_sql(trx, *graph);
-
- if (error == DB_SUCCESS) {
-
- break; /* Exit the loop. */
- } else {
- ut_print_timestamp(stderr);
-
- if (error == DB_LOCK_WAIT_TIMEOUT) {
- fprintf(stderr, " InnoDB: Warning: lock wait "
- "timeout reading FTS index. "
- "Retrying!\n");
-
- trx->error_state = DB_SUCCESS;
- } else {
- fprintf(stderr, " InnoDB: Error: (%s) "
- "while reading FTS index.\n",
- ut_strerr(error));
-
- break; /* Exit the loop. */
- }
- }
- }
-
- return(error);
-}
-#endif /* FTS_DOC_STATS_DEBUG */
-
/*********************************************************************//**
Begin Sync, create transaction, acquire locks, etc. */
static
@@ -4303,14 +4103,13 @@ fts_sync_begin(
sync->start_time = time(NULL);
sync->trx = trx_allocate_for_background();
+ trx_start_internal(sync->trx);
if (fts_enable_diag_print) {
- ib_logf(IB_LOG_LEVEL_INFO,
- "FTS SYNC for table %s, deleted count: %ld size: "
- "%zu bytes",
- sync->table->name,
- ib_vector_size(cache->deleted_doc_ids),
- cache->total_size);
+ ib::info() << "FTS SYNC for table " << sync->table->name
+ << ", deleted count: "
+ << ib_vector_size(cache->deleted_doc_ids)
+ << " size: " << cache->total_size << " bytes";
}
}
@@ -4326,31 +4125,16 @@ fts_sync_index(
fts_index_cache_t* index_cache) /*!< in: index cache */
{
trx_t* trx = sync->trx;
- dberr_t error = DB_SUCCESS;
trx->op_info = "doing SYNC index";
if (fts_enable_diag_print) {
- ib_logf(IB_LOG_LEVEL_INFO,
- "SYNC words: %ld", rbt_size(index_cache->words));
+ ib::info() << "SYNC words: " << rbt_size(index_cache->words);
}
ut_ad(rbt_validate(index_cache->words));
- error = fts_sync_write_words(sync->trx, index_cache, sync->unlock_cache);
-
-#ifdef FTS_DOC_STATS_DEBUG
- /* FTS_RESOLVE: the word counter info in auxiliary table "DOC_ID"
- is not used currently for ranking. We disable fts_sync_write_doc_stats()
- for now */
- /* Write the per doc statistics that will be used for ranking. */
- if (error == DB_SUCCESS) {
-
- error = fts_sync_write_doc_stats(trx, index_cache);
- }
-#endif /* FTS_DOC_STATS_DEBUG */
-
- return(error);
+ return(fts_sync_write_words(trx, index_cache, sync->unlock_cache));
}
/** Check if index cache has been synced completely
@@ -4449,18 +4233,16 @@ fts_sync_commit(
fts_sql_rollback(trx);
- ut_print_timestamp(stderr);
- fprintf(stderr, " InnoDB: Error: (%s) during SYNC.\n",
- ut_strerr(error));
+ ib::error() << "(" << ut_strerr(error) << ") during SYNC.";
}
if (fts_enable_diag_print && elapsed_time) {
- ib_logf(IB_LOG_LEVEL_INFO,
- "SYNC for table %s: SYNC time : %lu secs: "
- "elapsed %lf ins/sec",
- sync->table->name,
- (ulong) (time(NULL) - sync->start_time),
- (double) n_nodes/ (double) elapsed_time);
+ ib::info() << "SYNC for table " << sync->table->name
+ << ": SYNC time: "
+ << (time(NULL) - sync->start_time)
+ << " secs: elapsed "
+ << (double) n_nodes / elapsed_time
+ << " ins/sec";
}
/* Avoid assertion in trx_free(). */
@@ -4527,16 +4309,18 @@ FTS auxiliary INDEX table and clear the cache at the end.
@param[in,out] sync sync state
@param[in] unlock_cache whether unlock cache lock when write node
@param[in] wait whether wait when a sync is in progress
-@param[in] has_dict whether has dict operation lock
@return DB_SUCCESS if all OK */
static
dberr_t
fts_sync(
fts_sync_t* sync,
bool unlock_cache,
- bool wait,
- bool has_dict)
+ bool wait)
{
+ if (srv_read_only_mode) {
+ return DB_READ_ONLY;
+ }
+
ulint i;
dberr_t error = DB_SUCCESS;
fts_cache_t* cache = sync->table->fts->cache;
@@ -4564,12 +4348,6 @@ fts_sync(
DEBUG_SYNC_C("fts_sync_begin");
fts_sync_begin(sync);
- /* When sync in background, we hold dict operation lock
- to prevent DDL like DROP INDEX, etc. */
- if (has_dict) {
- sync->trx->dict_operation_lock_mode = RW_S_LATCH;
- }
-
begin_sync:
if (cache->total_size > fts_max_cache_size) {
/* Avoid the case: sync never finish when
@@ -4625,7 +4403,7 @@ begin_sync:
end_sync:
if (error == DB_SUCCESS && !sync->interrupted) {
error = fts_sync_commit(sync);
- } else {
+ } else {
fts_sync_rollback(sync);
}
@@ -4660,64 +4438,60 @@ end_sync:
/** Run SYNC on the table, i.e., write out data from the cache to the
FTS auxiliary INDEX table and clear the cache at the end.
@param[in,out] table fts table
-@param[in] unlock_cache whether unlock cache when write node
@param[in] wait whether wait for existing sync to finish
-@param[in] has_dict whether has dict operation lock
@return DB_SUCCESS on success, error code on failure. */
-UNIV_INTERN
-dberr_t
-fts_sync_table(
- dict_table_t* table,
- bool unlock_cache,
- bool wait,
- bool has_dict)
+dberr_t fts_sync_table(dict_table_t* table, bool wait)
{
dberr_t err = DB_SUCCESS;
ut_ad(table->fts);
- if (!dict_table_is_discarded(table) && table->fts->cache) {
- err = fts_sync(table->fts->cache->sync,
- unlock_cache, wait, has_dict);
+ if (!dict_table_is_discarded(table) && table->fts->cache
+ && !dict_table_is_corrupted(table)) {
+ err = fts_sync(table->fts->cache->sync, !wait, wait);
}
return(err);
}
-/********************************************************************
-Process next token from document starting at the given position, i.e., add
-the token's start position to the token's list of positions.
-@return number of characters handled in this call */
-static
-ulint
-fts_process_token(
-/*==============*/
- fts_doc_t* doc, /* in/out: document to
- tokenize */
- fts_doc_t* result, /* out: if provided, save
- result here */
- ulint start_pos, /*!< in: start position in text */
- ulint add_pos) /*!< in: add this position to all
- tokens from this tokenization */
+/** Check if a fts token is a stopword or less than fts_min_token_size
+or greater than fts_max_token_size.
+@param[in] token token string
+@param[in] stopwords stopwords rb tree
+@param[in] cs token charset
+@retval true if it is not stopword and length in range
+@retval false if it is stopword or lenght not in range */
+bool
+fts_check_token(
+ const fts_string_t* token,
+ const ib_rbt_t* stopwords,
+ const CHARSET_INFO* cs)
{
- ulint ret;
- fts_string_t str;
- ulint offset = 0;
- fts_doc_t* result_doc;
+ ut_ad(cs != NULL || stopwords == NULL);
- /* Determine where to save the result. */
- result_doc = (result) ? result : doc;
+ ib_rbt_bound_t parent;
- /* The length of a string in characters is set here only. */
- ret = innobase_mysql_fts_get_token(
- doc->charset, doc->text.f_str + start_pos,
- doc->text.f_str + doc->text.f_len, &str, &offset);
+ return(token->f_n_char >= fts_min_token_size
+ && token->f_n_char <= fts_max_token_size
+ && (stopwords == NULL
+ || rbt_search(stopwords, &parent, token) != 0));
+}
+/** Add the token and its start position to the token's list of positions.
+@param[in,out] result_doc result doc rb tree
+@param[in] str token string
+@param[in] position token position */
+static
+void
+fts_add_token(
+ fts_doc_t* result_doc,
+ fts_string_t str,
+ ulint position)
+{
/* Ignore string whose character number is less than
"fts_min_token_size" or more than "fts_max_token_size" */
- if (str.f_n_char >= fts_min_token_size
- && str.f_n_char <= fts_max_token_size) {
+ if (fts_check_token(&str, NULL, result_doc->charset)) {
mem_heap_t* heap;
fts_string_t t_str;
@@ -4729,7 +4503,7 @@ fts_process_token(
t_str.f_n_char = str.f_n_char;
- t_str.f_len = str.f_len * doc->charset->casedn_multiply + 1;
+ t_str.f_len = str.f_len * result_doc->charset->casedn_multiply + 1;
t_str.f_str = static_cast<byte*>(
mem_heap_alloc(heap, t_str.f_len));
@@ -4742,7 +4516,7 @@ fts_process_token(
newlen= str.f_len;
} else {
newlen = innobase_fts_casedn_str(
- doc->charset, (char*) str.f_str, str.f_len,
+ result_doc->charset, (char*) str.f_str, str.f_len,
(char*) t_str.f_str, t_str.f_len);
}
@@ -4761,41 +4535,220 @@ fts_process_token(
new_token.positions = ib_vector_create(
result_doc->self_heap, sizeof(ulint), 32);
- ut_a(new_token.text.f_n_char >= fts_min_token_size);
- ut_a(new_token.text.f_n_char <= fts_max_token_size);
-
parent.last = rbt_add_node(
result_doc->tokens, &parent, &new_token);
ut_ad(rbt_validate(result_doc->tokens));
}
-#ifdef FTS_CHARSET_DEBUG
- offset += start_pos + add_pos;
-#endif /* FTS_CHARSET_DEBUG */
-
- offset += start_pos + ret - str.f_len + add_pos;
-
token = rbt_value(fts_token_t, parent.last);
- ib_vector_push(token->positions, &offset);
+ ib_vector_push(token->positions, &position);
}
+}
+
+/********************************************************************
+Process next token from document starting at the given position, i.e., add
+the token's start position to the token's list of positions.
+@return number of characters handled in this call */
+static
+ulint
+fts_process_token(
+/*==============*/
+ fts_doc_t* doc, /* in/out: document to
+ tokenize */
+ fts_doc_t* result, /* out: if provided, save
+ result here */
+ ulint start_pos, /*!< in: start position in text */
+ ulint add_pos) /*!< in: add this position to all
+ tokens from this tokenization */
+{
+ ulint ret;
+ fts_string_t str;
+ ulint position;
+ fts_doc_t* result_doc;
+ byte buf[FTS_MAX_WORD_LEN + 1];
+
+ str.f_str = buf;
+
+ /* Determine where to save the result. */
+ result_doc = (result != NULL) ? result : doc;
+
+ /* The length of a string in characters is set here only. */
+
+ ret = innobase_mysql_fts_get_token(
+ doc->charset, doc->text.f_str + start_pos,
+ doc->text.f_str + doc->text.f_len, &str);
+
+ position = start_pos + ret - str.f_len + add_pos;
+
+ fts_add_token(result_doc, str, position);
return(ret);
}
+/*************************************************************//**
+Get token char size by charset
+@return token size */
+ulint
+fts_get_token_size(
+/*===============*/
+ const CHARSET_INFO* cs, /*!< in: Character set */
+ const char* token, /*!< in: token */
+ ulint len) /*!< in: token length */
+{
+ char* start;
+ char* end;
+ ulint size = 0;
+
+ /* const_cast is for reinterpret_cast below, or it will fail. */
+ start = const_cast<char*>(token);
+ end = start + len;
+ while (start < end) {
+ int ctype;
+ int mbl;
+
+ mbl = cs->cset->ctype(
+ cs, &ctype,
+ reinterpret_cast<uchar*>(start),
+ reinterpret_cast<uchar*>(end));
+
+ size++;
+
+ start += mbl > 0 ? mbl : (mbl < 0 ? -mbl : 1);
+ }
+
+ return(size);
+}
+
+/*************************************************************//**
+FTS plugin parser 'myql_parser' callback function for document tokenize.
+Refer to 'st_mysql_ftparser_param' for more detail.
+@return always returns 0 */
+int
+fts_tokenize_document_internal(
+/*===========================*/
+ MYSQL_FTPARSER_PARAM* param, /*!< in: parser parameter */
+ const char* doc,/*!< in/out: document */
+ int len) /*!< in: document length */
+{
+ fts_string_t str;
+ byte buf[FTS_MAX_WORD_LEN + 1];
+ /* JAN: TODO: MySQL 5.7
+ MYSQL_FTPARSER_BOOLEAN_INFO bool_info =
+ { FT_TOKEN_WORD, 0, 0, 0, 0, 0, ' ', 0 };
+ */
+ MYSQL_FTPARSER_BOOLEAN_INFO bool_info =
+ { FT_TOKEN_WORD, 0, 0, 0, 0, ' ', 0};
+
+ ut_ad(len >= 0);
+
+ str.f_str = buf;
+
+ for (ulint i = 0, inc = 0; i < static_cast<ulint>(len); i += inc) {
+ inc = innobase_mysql_fts_get_token(
+ const_cast<CHARSET_INFO*>(param->cs),
+ (uchar*)(doc) + i,
+ (uchar*)(doc) + len,
+ &str);
+
+ if (str.f_len > 0) {
+ /* JAN: TODO: MySQL 5.7
+ bool_info.position =
+ static_cast<int>(i + inc - str.f_len);
+ ut_ad(bool_info.position >= 0);
+ */
+
+ /* Stop when add word fails */
+ if (param->mysql_add_word(
+ param,
+ reinterpret_cast<char*>(str.f_str),
+ static_cast<int>(str.f_len),
+ &bool_info)) {
+ break;
+ }
+ }
+ }
+
+ return(0);
+}
+
/******************************************************************//**
-Tokenize a document. */
-UNIV_INTERN
+FTS plugin parser 'myql_add_word' callback function for document tokenize.
+Refer to 'st_mysql_ftparser_param' for more detail.
+@return always returns 0 */
+static
+int
+fts_tokenize_add_word_for_parser(
+/*=============================*/
+ MYSQL_FTPARSER_PARAM* param, /* in: parser paramter */
+ const char* word, /* in: token word */
+ int word_len, /* in: word len */
+ MYSQL_FTPARSER_BOOLEAN_INFO* boolean_info) /* in: word boolean info */
+{
+ fts_string_t str;
+ fts_tokenize_param_t* fts_param;
+ fts_doc_t* result_doc;
+ ulint position;
+
+ fts_param = static_cast<fts_tokenize_param_t*>(param->mysql_ftparam);
+ result_doc = fts_param->result_doc;
+ ut_ad(result_doc != NULL);
+
+ str.f_str = (byte*)(word);
+ str.f_len = word_len;
+ str.f_n_char = fts_get_token_size(
+ const_cast<CHARSET_INFO*>(param->cs), word, word_len);
+
+ /* JAN: TODO: MySQL 5.7 FTS
+ ut_ad(boolean_info->position >= 0);
+ position = boolean_info->position + fts_param->add_pos;
+ */
+ position = fts_param->add_pos;
+
+ fts_add_token(result_doc, str, position);
+
+ return(0);
+}
+
+/******************************************************************//**
+Parse a document using an external / user supplied parser */
+static
void
-fts_tokenize_document(
-/*==================*/
- fts_doc_t* doc, /* in/out: document to
- tokenize */
- fts_doc_t* result) /* out: if provided, save
- the result token here */
+fts_tokenize_by_parser(
+/*===================*/
+ fts_doc_t* doc, /* in/out: document to tokenize */
+ st_mysql_ftparser* parser, /* in: plugin fts parser */
+ fts_tokenize_param_t* fts_param) /* in: fts tokenize param */
{
- ulint inc;
+ MYSQL_FTPARSER_PARAM param;
+ ut_a(parser);
+
+ /* Set paramters for param */
+ param.mysql_parse = fts_tokenize_document_internal;
+ param.mysql_add_word = fts_tokenize_add_word_for_parser;
+ param.mysql_ftparam = fts_param;
+ param.cs = doc->charset;
+ param.doc = reinterpret_cast<char*>(doc->text.f_str);
+ param.length = static_cast<int>(doc->text.f_len);
+ param.mode= MYSQL_FTPARSER_SIMPLE_MODE;
+
+ PARSER_INIT(parser, &param);
+ parser->parse(&param);
+ PARSER_DEINIT(parser, &param);
+}
+
+/** Tokenize a document.
+@param[in,out] doc document to tokenize
+@param[out] result tokenization result
+@param[in] parser pluggable parser */
+static
+void
+fts_tokenize_document(
+ fts_doc_t* doc,
+ fts_doc_t* result,
+ st_mysql_ftparser* parser)
+{
ut_a(!doc->tokens);
ut_a(doc->charset);
@@ -4803,58 +4756,72 @@ fts_tokenize_document(
innobase_fts_text_cmp,
(void*) doc->charset);
- for (ulint i = 0; i < doc->text.f_len; i += inc) {
- inc = fts_process_token(doc, result, i, 0);
- ut_a(inc > 0);
+ if (parser != NULL) {
+ fts_tokenize_param_t fts_param;
+ fts_param.result_doc = (result != NULL) ? result : doc;
+ fts_param.add_pos = 0;
+
+ fts_tokenize_by_parser(doc, parser, &fts_param);
+ } else {
+ ulint inc;
+
+ for (ulint i = 0; i < doc->text.f_len; i += inc) {
+ inc = fts_process_token(doc, result, i, 0);
+ ut_a(inc > 0);
+ }
}
}
-/******************************************************************//**
-Continue to tokenize a document. */
-UNIV_INTERN
+/** Continue to tokenize a document.
+@param[in,out] doc document to tokenize
+@param[in] add_pos add this position to all tokens from this tokenization
+@param[out] result tokenization result
+@param[in] parser pluggable parser */
+static
void
fts_tokenize_document_next(
-/*=======================*/
- fts_doc_t* doc, /*!< in/out: document to
- tokenize */
- ulint add_pos, /*!< in: add this position to all
- tokens from this tokenization */
- fts_doc_t* result) /*!< out: if provided, save
- the result token here */
+ fts_doc_t* doc,
+ ulint add_pos,
+ fts_doc_t* result,
+ st_mysql_ftparser* parser)
{
- ulint inc;
-
ut_a(doc->tokens);
- for (ulint i = 0; i < doc->text.f_len; i += inc) {
- inc = fts_process_token(doc, result, i, add_pos);
- ut_a(inc > 0);
+ if (parser) {
+ fts_tokenize_param_t fts_param;
+
+ fts_param.result_doc = (result != NULL) ? result : doc;
+ fts_param.add_pos = add_pos;
+
+ fts_tokenize_by_parser(doc, parser, &fts_param);
+ } else {
+ ulint inc;
+
+ for (ulint i = 0; i < doc->text.f_len; i += inc) {
+ inc = fts_process_token(doc, result, i, add_pos);
+ ut_a(inc > 0);
+ }
}
}
-/********************************************************************
-Create the vector of fts_get_doc_t instances. */
-UNIV_INTERN
+/** Create the vector of fts_get_doc_t instances.
+@param[in,out] cache fts cache
+@return vector of fts_get_doc_t instances */
+static
ib_vector_t*
fts_get_docs_create(
-/*================*/
- /* out: vector of
- fts_get_doc_t instances */
- fts_cache_t* cache) /*!< in: fts cache */
+ fts_cache_t* cache)
{
- ulint i;
ib_vector_t* get_docs;
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(rw_lock_own(&cache->init_lock, RW_LOCK_EX));
-#endif
+ ut_ad(rw_lock_own(&cache->init_lock, RW_LOCK_X));
+
/* We need one instance of fts_get_doc_t per index. */
- get_docs = ib_vector_create(
- cache->self_heap, sizeof(fts_get_doc_t), 4);
+ get_docs = ib_vector_create(cache->self_heap, sizeof(fts_get_doc_t), 4);
/* Create the get_doc instance, we need one of these
per FTS index. */
- for (i = 0; i < ib_vector_size(cache->indexes); ++i) {
+ for (ulint i = 0; i < ib_vector_size(cache->indexes); ++i) {
dict_index_t** index;
fts_get_doc_t* get_doc;
@@ -4906,7 +4873,6 @@ fts_get_docs_clear(
/*********************************************************************//**
Get the initial Doc ID by consulting the CONFIG table
@return initial Doc ID */
-UNIV_INTERN
doc_id_t
fts_init_doc_id(
/*============*/
@@ -4981,7 +4947,6 @@ fts_is_index_updated(
/*********************************************************************//**
Fetch COUNT(*) from specified table.
@return the number of rows in the table */
-UNIV_INTERN
ulint
fts_get_rows_count(
/*===============*/
@@ -4995,7 +4960,6 @@ fts_get_rows_count(
char table_name[MAX_FULL_NAME_LEN];
trx = trx_allocate_for_background();
-
trx->op_info = "fetching FT table rows count";
info = pars_info_create();
@@ -5010,7 +4974,7 @@ fts_get_rows_count(
info,
"DECLARE FUNCTION my_func;\n"
"DECLARE CURSOR c IS"
- " SELECT COUNT(*) "
+ " SELECT COUNT(*)"
" FROM $table_name;\n"
"BEGIN\n"
"\n"
@@ -5033,18 +4997,14 @@ fts_get_rows_count(
} else {
fts_sql_rollback(trx);
- ut_print_timestamp(stderr);
-
if (error == DB_LOCK_WAIT_TIMEOUT) {
- fprintf(stderr, " InnoDB: Warning: lock wait "
- "timeout reading FTS table. "
- "Retrying!\n");
+ ib::warn() << "lock wait timeout reading"
+ " FTS table. Retrying!";
trx->error_state = DB_SUCCESS;
} else {
- fprintf(stderr, " InnoDB: Error: (%s) "
- "while reading FTS table.\n",
- ut_strerr(error));
+ ib::error() << "(" << ut_strerr(error)
+ << ") while reading FTS table.";
break; /* Exit the loop. */
}
@@ -5165,7 +5125,6 @@ fts_savepoint_free(
/*********************************************************************//**
Free an FTS trx. */
-UNIV_INTERN
void
fts_trx_free(
/*=========*/
@@ -5209,7 +5168,6 @@ fts_trx_free(
/*********************************************************************//**
Extract the doc id from the FTS hidden column.
@return doc id that was extracted from rec */
-UNIV_INTERN
doc_id_t
fts_get_doc_id_from_row(
/*====================*/
@@ -5233,37 +5191,37 @@ fts_get_doc_id_from_row(
return(doc_id);
}
-/*********************************************************************//**
-Extract the doc id from the FTS hidden column.
+/** Extract the doc id from the record that belongs to index.
+@param[in] table table
+@param[in] rec record contains FTS_DOC_ID
+@param[in] index index of rec
+@param[in] heap heap memory
@return doc id that was extracted from rec */
-UNIV_INTERN
doc_id_t
fts_get_doc_id_from_rec(
-/*====================*/
- dict_table_t* table, /*!< in: table */
- const rec_t* rec, /*!< in: rec */
- mem_heap_t* heap) /*!< in: heap */
+ dict_table_t* table,
+ const rec_t* rec,
+ const dict_index_t* index,
+ mem_heap_t* heap)
{
ulint len;
const byte* data;
ulint col_no;
doc_id_t doc_id = 0;
- dict_index_t* clust_index;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- ulint* offsets = offsets_;
+ offset_t offsets_[REC_OFFS_NORMAL_SIZE];
+ offset_t* offsets = offsets_;
mem_heap_t* my_heap = heap;
ut_a(table->fts->doc_col != ULINT_UNDEFINED);
- clust_index = dict_table_get_first_index(table);
-
rec_offs_init(offsets_);
offsets = rec_get_offsets(
- rec, clust_index, offsets, ULINT_UNDEFINED, &my_heap);
+ rec, index, offsets, true, ULINT_UNDEFINED, &my_heap);
+
+ col_no = dict_col_get_index_pos(
+ &table->cols[table->fts->doc_col], index);
- col_no = dict_col_get_clust_pos(
- &table->cols[table->fts->doc_col], clust_index);
ut_ad(col_no != ULINT_UNDEFINED);
data = rec_get_nth_field(rec, offsets, col_no, &len);
@@ -5282,7 +5240,6 @@ fts_get_doc_id_from_rec(
/*********************************************************************//**
Search the index specific cache for a particular FTS index.
@return the index specific cache else NULL */
-UNIV_INTERN
fts_index_cache_t*
fts_find_index_cache(
/*=================*/
@@ -5298,7 +5255,6 @@ fts_find_index_cache(
/*********************************************************************//**
Search cache for word.
@return the word node vector if found else NULL */
-UNIV_INTERN
const ib_vector_t*
fts_cache_find_word(
/*================*/
@@ -5307,12 +5263,12 @@ fts_cache_find_word(
{
ib_rbt_bound_t parent;
const ib_vector_t* nodes = NULL;
-#ifdef UNIV_SYNC_DEBUG
+#ifdef UNIV_DEBUG
dict_table_t* table = index_cache->index->table;
fts_cache_t* cache = table->fts->cache;
- ut_ad(rw_lock_own((rw_lock_t*) &cache->lock, RW_LOCK_EX));
-#endif
+ ut_ad(rw_lock_own(&cache->lock, RW_LOCK_X));
+#endif /* UNIV_DEBUG */
/* Lookup the word in the rb tree */
if (rbt_search(index_cache->words, &parent, text) == 0) {
@@ -5327,48 +5283,14 @@ fts_cache_find_word(
}
/*********************************************************************//**
-Check cache for deleted doc id.
-@return TRUE if deleted */
-UNIV_INTERN
-ibool
-fts_cache_is_deleted_doc_id(
-/*========================*/
- const fts_cache_t* cache, /*!< in: cache ito search */
- doc_id_t doc_id) /*!< in: doc id to search for */
-{
- ulint i;
-
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(mutex_own(&cache->deleted_lock));
-#endif
-
- for (i = 0; i < ib_vector_size(cache->deleted_doc_ids); ++i) {
- const fts_update_t* update;
-
- update = static_cast<const fts_update_t*>(
- ib_vector_get_const(cache->deleted_doc_ids, i));
-
- if (doc_id == update->doc_id) {
-
- return(TRUE);
- }
- }
-
- return(FALSE);
-}
-
-/*********************************************************************//**
Append deleted doc ids to vector. */
-UNIV_INTERN
void
fts_cache_append_deleted_doc_ids(
/*=============================*/
const fts_cache_t* cache, /*!< in: cache to use */
ib_vector_t* vector) /*!< in: append to this vector */
{
- ulint i;
-
- mutex_enter((ib_mutex_t*) &cache->deleted_lock);
+ mutex_enter(const_cast<ib_mutex_t*>(&cache->deleted_lock));
if (cache->deleted_doc_ids == NULL) {
mutex_exit((ib_mutex_t*) &cache->deleted_lock);
@@ -5376,7 +5298,7 @@ fts_cache_append_deleted_doc_ids(
}
- for (i = 0; i < ib_vector_size(cache->deleted_doc_ids); ++i) {
+ for (ulint i = 0; i < ib_vector_size(cache->deleted_doc_ids); ++i) {
fts_update_t* update;
update = static_cast<fts_update_t*>(
@@ -5390,7 +5312,6 @@ fts_cache_append_deleted_doc_ids(
/*********************************************************************//**
Add the FTS document id hidden column. */
-UNIV_INTERN
void
fts_add_doc_id_column(
/*==================*/
@@ -5408,16 +5329,23 @@ fts_add_doc_id_column(
DICT_TF2_FLAG_SET(table, DICT_TF2_FTS_HAS_DOC_ID);
}
-/*********************************************************************//**
-Update the query graph with a new document id.
-@return Doc ID used */
-UNIV_INTERN
+/** Add new fts doc id to the update vector.
+@param[in] table the table that contains the FTS index.
+@param[in,out] ufield the fts doc id field in the update vector.
+ No new memory is allocated for this in this
+ function.
+@param[in,out] next_doc_id the fts doc id that has been added to the
+ update vector. If 0, a new fts doc id is
+ automatically generated. The memory provided
+ for this argument will be used by the update
+ vector. Ensure that the life time of this
+ memory matches that of the update vector.
+@return the fts doc id used in the update vector */
doc_id_t
fts_update_doc_id(
-/*==============*/
- dict_table_t* table, /*!< in: table */
- upd_field_t* ufield, /*!< out: update node */
- doc_id_t* next_doc_id) /*!< in/out: buffer for writing */
+ dict_table_t* table,
+ upd_field_t* ufield,
+ doc_id_t* next_doc_id)
{
doc_id_t doc_id;
dberr_t error = DB_SUCCESS;
@@ -5431,6 +5359,8 @@ fts_update_doc_id(
if (error == DB_SUCCESS) {
dict_index_t* clust_index;
+ dict_col_t* col = dict_table_get_nth_col(
+ table, table->fts->doc_col);
ufield->exp = NULL;
@@ -5438,8 +5368,8 @@ fts_update_doc_id(
clust_index = dict_table_get_first_index(table);
- ufield->field_no = dict_col_get_clust_pos(
- &table->cols[table->fts->doc_col], clust_index);
+ ufield->field_no = dict_col_get_clust_pos(col, clust_index);
+ dict_col_copy_type(col, dfield_get_type(&ufield->new_val));
/* It is possible we update record that has
not yet be sync-ed from last crash. */
@@ -5449,79 +5379,84 @@ fts_update_doc_id(
fts_write_doc_id((byte*) next_doc_id, doc_id);
ufield->new_val.data = next_doc_id;
+ ufield->new_val.ext = 0;
}
return(doc_id);
}
-/*********************************************************************//**
-Check if the table has an FTS index. This is the non-inline version
-of dict_table_has_fts_index().
-@return TRUE if table has an FTS index */
-UNIV_INTERN
-ibool
-fts_dict_table_has_fts_index(
-/*=========================*/
- dict_table_t* table) /*!< in: table */
+/** fts_t constructor.
+@param[in] table table with FTS indexes
+@param[in,out] heap memory heap where 'this' is stored */
+fts_t::fts_t(
+ const dict_table_t* table,
+ mem_heap_t* heap)
+ :
+ added_synced(0), dict_locked(0),
+ bg_threads(0),
+ add_wq(NULL),
+ cache(NULL),
+ doc_col(ULINT_UNDEFINED), in_queue(false),
+ fts_heap(heap)
{
- return(dict_table_has_fts_index(table));
+ ut_a(table->fts == NULL);
+
+ mutex_create(LATCH_ID_FTS_BG_THREADS, &bg_threads_mutex);
+
+ ib_alloc_t* heap_alloc = ib_heap_allocator_create(fts_heap);
+
+ indexes = ib_vector_create(heap_alloc, sizeof(dict_index_t*), 4);
+
+ dict_table_get_all_fts_indexes(table, indexes);
+}
+
+/** fts_t destructor. */
+fts_t::~fts_t()
+{
+ mutex_free(&bg_threads_mutex);
+
+ ut_ad(add_wq == NULL);
+
+ if (cache != NULL) {
+ fts_cache_clear(cache);
+ fts_cache_destroy(cache);
+ cache = NULL;
+ }
+
+ /* There is no need to call ib_vector_free() on this->indexes
+ because it is stored in this->fts_heap. */
}
/*********************************************************************//**
Create an instance of fts_t.
@return instance of fts_t */
-UNIV_INTERN
fts_t*
fts_create(
/*=======*/
dict_table_t* table) /*!< in/out: table with FTS indexes */
{
fts_t* fts;
- ib_alloc_t* heap_alloc;
mem_heap_t* heap;
- ut_a(!table->fts);
-
heap = mem_heap_create(512);
fts = static_cast<fts_t*>(mem_heap_alloc(heap, sizeof(*fts)));
- memset(fts, 0x0, sizeof(*fts));
-
- fts->fts_heap = heap;
-
- fts->doc_col = ULINT_UNDEFINED;
-
- mutex_create(
- fts_bg_threads_mutex_key, &fts->bg_threads_mutex,
- SYNC_FTS_BG_THREADS);
-
- heap_alloc = ib_heap_allocator_create(heap);
- fts->indexes = ib_vector_create(heap_alloc, sizeof(dict_index_t*), 4);
- dict_table_get_all_fts_indexes(table, fts->indexes);
+ new(fts) fts_t(table, heap);
return(fts);
}
/*********************************************************************//**
Free the FTS resources. */
-UNIV_INTERN
void
fts_free(
/*=====*/
dict_table_t* table) /*!< in/out: table with FTS indexes */
{
- fts_t* fts = table->fts;
-
- mutex_free(&fts->bg_threads_mutex);
-
- ut_ad(!fts->add_wq);
+ fts_t* fts = table->fts;
- if (fts->cache) {
- fts_cache_clear(fts->cache);
- fts_cache_destroy(fts->cache);
- fts->cache = NULL;
- }
+ fts->~fts_t();
mem_heap_free(fts->fts_heap);
@@ -5557,7 +5492,6 @@ fts_savepoint_copy(
/*********************************************************************//**
Take a FTS savepoint. */
-UNIV_INTERN
void
fts_savepoint_take(
/*===============*/
@@ -5617,7 +5551,6 @@ fts_savepoint_lookup(
Release the savepoint data identified by name. All savepoints created
after the named savepoint are kept.
@return DB_SUCCESS or error code */
-UNIV_INTERN
void
fts_savepoint_release(
/*==================*/
@@ -5660,7 +5593,6 @@ fts_savepoint_release(
/**********************************************************************//**
Refresh last statement savepoint. */
-UNIV_INTERN
void
fts_savepoint_laststmt_refresh(
/*===========================*/
@@ -5736,7 +5668,6 @@ fts_undo_last_stmt(
/**********************************************************************//**
Rollback to savepoint indentified by name.
@return DB_SUCCESS or error code */
-UNIV_INTERN
void
fts_savepoint_rollback_last_stmt(
/*=============================*/
@@ -5786,7 +5717,6 @@ fts_savepoint_rollback_last_stmt(
/**********************************************************************//**
Rollback to savepoint indentified by name.
@return DB_SUCCESS or error code */
-UNIV_INTERN
void
fts_savepoint_rollback(
/*===================*/
@@ -5847,16 +5777,17 @@ fts_savepoint_rollback(
}
}
-/**********************************************************************//**
-Check if a table is an FTS auxiliary table name.
-@return TRUE if the name matches an auxiliary table name pattern */
+/** Check if a table is an FTS auxiliary table name.
+@param[out] table FTS table info
+@param[in] name Table name
+@param[in] len Length of table name
+@return true if the name matches an auxiliary table name pattern */
static
-ibool
+bool
fts_is_aux_table_name(
-/*==================*/
- fts_aux_table_t*table, /*!< out: table info */
- const char* name, /*!< in: table name */
- ulint len) /*!< in: length of table name */
+ fts_aux_table_t* table,
+ const char* name,
+ ulint len)
{
const char* ptr;
char* end;
@@ -5886,14 +5817,14 @@ fts_is_aux_table_name(
/* Try and read the table id. */
if (!fts_read_object_id(&table->parent_id, ptr)) {
- return(FALSE);
+ return(false);
}
/* Skip the table id. */
ptr = static_cast<const char*>(memchr(ptr, '_', len));
if (ptr == NULL) {
- return(FALSE);
+ return(false);
}
/* Skip the underscore. */
@@ -5905,7 +5836,7 @@ fts_is_aux_table_name(
for (i = 0; fts_common_tables[i] != NULL; ++i) {
if (strncmp(ptr, fts_common_tables[i], len) == 0) {
- return(TRUE);
+ return(true);
}
}
@@ -5917,14 +5848,14 @@ fts_is_aux_table_name(
/* Try and read the index id. */
if (!fts_read_object_id(&table->index_id, ptr)) {
- return(FALSE);
+ return(false);
}
/* Skip the table id. */
ptr = static_cast<const char*>(memchr(ptr, '_', len));
if (ptr == NULL) {
- return(FALSE);
+ return(false);
}
/* Skip the underscore. */
@@ -5933,20 +5864,20 @@ fts_is_aux_table_name(
len = end - ptr;
/* Search the FT index specific array. */
- for (i = 0; fts_index_selector[i].value; ++i) {
+ for (i = 0; i < FTS_NUM_AUX_INDEX; ++i) {
if (strncmp(ptr, fts_get_suffix(i), len) == 0) {
- return(TRUE);
+ return(true);
}
}
/* Other FT index specific table(s). */
if (strncmp(ptr, "DOC_ID", len) == 0) {
- return(TRUE);
+ return(true);
}
}
- return(FALSE);
+ return(false);
}
/**********************************************************************//**
@@ -6050,7 +5981,7 @@ fts_set_hex_format(
/*****************************************************************//**
Update the DICT_TF2_FTS_AUX_HEX_NAME flag in SYS_TABLES.
@return DB_SUCCESS or error code. */
-UNIV_INTERN
+static
dberr_t
fts_update_hex_format_flag(
/*=======================*/
@@ -6069,8 +6000,8 @@ fts_update_hex_format_flag(
"PROCEDURE UPDATE_HEX_FORMAT_FLAG() IS\n"
"DECLARE FUNCTION my_func;\n"
"DECLARE CURSOR c IS\n"
- " SELECT MIX_LEN "
- " FROM SYS_TABLES "
+ " SELECT MIX_LEN"
+ " FROM SYS_TABLES"
" WHERE ID = :table_id FOR UPDATE;"
"\n"
"BEGIN\n"
@@ -6105,7 +6036,7 @@ fts_update_hex_format_flag(
ut_a(flags2 != ULINT32_UNDEFINED);
- return (err);
+ return(err);
}
/*********************************************************************//**
@@ -6174,20 +6105,18 @@ fts_rename_one_aux_table_to_hex_format(
}
error = row_rename_table_for_mysql(aux_table->name, new_name, trx,
- FALSE);
+ false, false);
if (error != DB_SUCCESS) {
- ib_logf(IB_LOG_LEVEL_WARN,
- "Failed to rename aux table \'%s\' to "
- "new format \'%s\'. ",
- aux_table->name, new_name);
+ ib::warn() << "Failed to rename aux table '"
+ << aux_table->name << "' to new format '"
+ << new_name << "'.";
} else {
- ib_logf(IB_LOG_LEVEL_INFO,
- "Renamed aux table \'%s\' to \'%s\'.",
- aux_table->name, new_name);
+ ib::info() << "Renamed aux table '" << aux_table->name
+ << "' to '" << new_name << "'.";
}
- return (error);
+ return(error);
}
/**********************************************************************//**
@@ -6216,12 +6145,10 @@ fts_rename_aux_tables_to_hex_format_low(
error = fts_update_hex_format_flag(trx, parent_table->id, true);
if (error != DB_SUCCESS) {
- ib_logf(IB_LOG_LEVEL_WARN,
- "Setting parent table %s to hex format failed.",
- parent_table->name);
-
+ ib::warn() << "Setting parent table " << parent_table->name
+ << " to hex format failed.";
fts_sql_rollback(trx);
- return (error);
+ return(error);
}
DICT_TF2_FLAG_SET(parent_table, DICT_TF2_FTS_AUX_HEX_NAME);
@@ -6252,10 +6179,9 @@ fts_rename_aux_tables_to_hex_format_low(
if (error != DB_SUCCESS) {
dict_table_close(table, TRUE, FALSE);
- ib_logf(IB_LOG_LEVEL_WARN,
- "Failed to rename one aux table %s "
- "Will revert all successful rename "
- "operations.", aux_table->name);
+ ib::warn() << "Failed to rename one aux table "
+ << aux_table->name << ". Will revert"
+ " all successful rename operations.";
fts_sql_rollback(trx);
break;
@@ -6265,9 +6191,8 @@ fts_rename_aux_tables_to_hex_format_low(
dict_table_close(table, TRUE, FALSE);
if (error != DB_SUCCESS) {
- ib_logf(IB_LOG_LEVEL_WARN,
- "Setting aux table %s to hex format failed.",
- aux_table->name);
+ ib::warn() << "Setting aux table " << aux_table->name
+ << " to hex format failed.";
fts_sql_rollback(trx);
break;
@@ -6276,10 +6201,13 @@ fts_rename_aux_tables_to_hex_format_low(
if (error != DB_SUCCESS) {
ut_ad(count != ib_vector_size(tables));
+
/* If rename fails, thr trx would be rolled back, we can't
use it any more, we'll start a new background trx to do
the reverting. */
- ut_a(trx->state == TRX_STATE_NOT_STARTED);
+
+ ut_ad(!trx_is_started(trx));
+
bool not_rename = false;
/* Try to revert those succesful rename operations
@@ -6314,17 +6242,17 @@ fts_rename_aux_tables_to_hex_format_low(
trx_start_for_ddl(trx_bg, TRX_DICT_OP_TABLE);
DICT_TF2_FLAG_UNSET(table, DICT_TF2_FTS_AUX_HEX_NAME);
- err = row_rename_table_for_mysql(table->name,
+ err = row_rename_table_for_mysql(table->name.m_name,
aux_table->name,
- trx_bg, FALSE);
+ trx_bg, false, false);
trx_bg->dict_operation_lock_mode = 0;
dict_table_close(table, TRUE, FALSE);
if (err != DB_SUCCESS) {
- ib_logf(IB_LOG_LEVEL_WARN, "Failed to revert "
- "table %s. Please revert manually.",
- table->name);
+ ib::warn() << "Failed to revert table "
+ << table->name << ". Please revert"
+ " manually.";
fts_sql_rollback(trx_bg);
trx_free_for_background(trx_bg);
/* Continue to clear aux tables' flags2 */
@@ -6339,7 +6267,7 @@ fts_rename_aux_tables_to_hex_format_low(
DICT_TF2_FLAG_UNSET(parent_table, DICT_TF2_FTS_AUX_HEX_NAME);
}
- return (error);
+ return(error);
}
/**********************************************************************//**
@@ -6353,15 +6281,16 @@ fts_fake_hex_to_dec(
{
ib_id_t dec_id = 0;
char tmp_id[FTS_AUX_MIN_TABLE_ID_LENGTH];
- int ret MY_ATTRIBUTE((unused));
- ret = sprintf(tmp_id, UINT64PFx, id);
+#ifdef UNIV_DEBUG
+ int ret =
+#endif /* UNIV_DEBUG */
+ sprintf(tmp_id, UINT64PFx, id);
ut_ad(ret == 16);
-#ifdef _WIN32
- ret = sscanf(tmp_id, "%016llu", &dec_id);
-#else
- ret = sscanf(tmp_id, "%016" PRIu64, &dec_id);
-#endif /* _WIN32 */
+#ifdef UNIV_DEBUG
+ ret =
+#endif /* UNIV_DEBUG */
+ sscanf(tmp_id, "%016" UINT64scan, &dec_id);
ut_ad(ret == 1);
return dec_id;
@@ -6426,7 +6355,7 @@ fts_set_index_corrupt(
}
for (ulint j = 0; j < ib_vector_size(fts->indexes); j++) {
- dict_index_t* index = static_cast<dict_index_t*>(
+ dict_index_t* index = static_cast<dict_index_t*>(
ib_vector_getp_const(fts->indexes, j));
if (index->id == id) {
dict_set_corrupted(index, trx,
@@ -6459,7 +6388,7 @@ fts_check_corrupt_index(
if (index->id == aux_table->index_id) {
ut_ad(index->type & DICT_FTS);
dict_table_close(table, true, false);
- return(dict_index_is_corrupted(index));
+ return index->is_corrupted();
}
}
@@ -6487,8 +6416,8 @@ fts_get_parent_table_name(
if (parent_table != NULL) {
parent_table_name = mem_strdupl(
- parent_table->name,
- strlen(parent_table->name));
+ parent_table->name.m_name,
+ strlen(parent_table->name.m_name));
dict_table_close(parent_table, TRUE, FALSE);
}
@@ -6561,12 +6490,10 @@ fts_rename_aux_tables_to_hex_format(
if (err != DB_SUCCESS) {
- ib_logf(IB_LOG_LEVEL_WARN,
- "Rollback operations on all aux tables of table %s. "
- "All the fts index associated with the table are "
- "marked as corrupted. Please rebuild the "
- "index again.", parent_table->name);
- fts_sql_rollback(trx_rename);
+ ib::warn() << "Rollback operations on all aux tables of "
+ "table "<< parent_table->name << ". All the fts index "
+ "associated with the table are marked as corrupted. "
+ "Please rebuild the index again.";
/* Corrupting the fts index related to parent table. */
trx_t* trx_corrupt;
@@ -6596,25 +6523,18 @@ fts_set_parent_hex_format_flag(
{
if (!DICT_TF2_FLAG_IS_SET(parent_table,
DICT_TF2_FTS_AUX_HEX_NAME)) {
- DBUG_EXECUTE_IF("parent_table_flag_fail",
- ib_logf(IB_LOG_LEVEL_FATAL,
- "Setting parent table %s to hex format "
- "failed. Please try to restart the server "
- "again, if it doesn't work, the system "
- "tables might be corrupted.",
- parent_table->name);
- return;);
+ DBUG_EXECUTE_IF("parent_table_flag_fail", DBUG_SUICIDE(););
dberr_t err = fts_update_hex_format_flag(
trx, parent_table->id, true);
if (err != DB_SUCCESS) {
- ib_logf(IB_LOG_LEVEL_FATAL,
- "Setting parent table %s to hex format "
- "failed. Please try to restart the server "
- "again, if it doesn't work, the system "
- "tables might be corrupted.",
- parent_table->name);
+ ib::fatal() << "Setting parent table "
+ << parent_table->name
+ << "to hex format failed. Please try "
+ << "to restart the server again, if it "
+ << "doesn't work, the system tables "
+ << "might be corrupted.";
} else {
DICT_TF2_FLAG_SET(
parent_table, DICT_TF2_FTS_AUX_HEX_NAME);
@@ -6643,7 +6563,8 @@ fts_drop_obsolete_aux_table_from_vector(
trx_start_for_ddl(trx_drop, TRX_DICT_OP_TABLE);
err = row_drop_table_for_mysql(
- aux_drop_table->name, trx_drop, false, true);
+ aux_drop_table->name, trx_drop,
+ SQLCOM_DROP_TABLE, true);
trx_drop->dict_operation_lock_mode = 0;
@@ -6652,15 +6573,16 @@ fts_drop_obsolete_aux_table_from_vector(
failure, since server would try to
drop it on next restart, even if
the table was broken. */
- ib_logf(IB_LOG_LEVEL_WARN,
- "Fail to drop obsolete aux table '%s', which "
- "is harmless. will try to drop it on next "
- "restart.", aux_drop_table->name);
+ ib::warn() << "Failed to drop obsolete aux table "
+ << aux_drop_table->name << ", which is "
+ << "harmless. will try to drop it on next "
+ << "restart.";
+
fts_sql_rollback(trx_drop);
} else {
- ib_logf(IB_LOG_LEVEL_INFO,
- "Dropped obsolete aux table '%s'.",
- aux_drop_table->name);
+ ib::info() << "Dropped obsolete aux"
+ " table '" << aux_drop_table->name
+ << "'.";
fts_sql_commit(trx_drop);
}
@@ -6686,16 +6608,22 @@ fts_drop_aux_table_from_vector(
/* Check for the validity of the parent table */
if (!fts_valid_parent_table(aux_drop_table)) {
- ib_logf(IB_LOG_LEVEL_WARN,
- "Parent table of FTS auxiliary table %s not "
- "found.", aux_drop_table->name);
+
+ ib::warn() << "Parent table of FTS auxiliary table "
+ << aux_drop_table->name << " not found.";
+
dberr_t err = fts_drop_table(trx, aux_drop_table->name);
if (err == DB_FAIL) {
- char* path = fil_make_ibd_name(
- aux_drop_table->name, false);
- os_file_delete_if_exists(innodb_file_data_key,
- path);
- mem_free(path);
+
+ char* path = fil_make_filepath(
+ NULL, aux_drop_table->name, IBD, false);
+
+ if (path != NULL) {
+ os_file_delete_if_exists(
+ innodb_data_file_key,
+ path , NULL);
+ ut_free(path);
+ }
}
}
}
@@ -6766,7 +6694,8 @@ fts_check_and_drop_orphaned_tables(
orig_parent_id = aux_table->parent_id;
orig_index_id = aux_table->index_id;
- if (table == NULL || strcmp(table->name, aux_table->name)) {
+ if (table == NULL
+ || strcmp(table->name.m_name, aux_table->name)) {
bool fake_aux = false;
@@ -6801,7 +6730,7 @@ fts_check_and_drop_orphaned_tables(
|| orig_parent_id != next_aux_table->parent_id)
&& (!ib_vector_is_empty(aux_tables_to_rename))) {
- ulint parent_id = fts_fake_hex_to_dec(
+ ib_id_t parent_id = fts_fake_hex_to_dec(
aux_table->parent_id);
parent_table = dict_table_open_on_id(
@@ -6863,7 +6792,7 @@ fts_check_and_drop_orphaned_tables(
}
if (table != NULL) {
- dict_table_close(table, true, false);
+ dict_table_close(table, TRUE, FALSE);
}
if (!rename) {
@@ -6874,7 +6803,7 @@ fts_check_and_drop_orphaned_tables(
}
/* Filter out the fake aux table by comparing with the
- current valid auxiliary table name . */
+ current valid auxiliary table name. */
for (ulint count = 0;
count < ib_vector_size(invalid_aux_tables); count++) {
fts_aux_table_t* invalid_aux;
@@ -6896,7 +6825,7 @@ fts_check_and_drop_orphaned_tables(
if (i + 1 < ib_vector_size(tables)) {
next_aux_table = static_cast<fts_aux_table_t*>(
- ib_vector_get(tables, i + 1));
+ ib_vector_get(tables, i + 1));
}
if (next_aux_table == NULL
@@ -6909,7 +6838,6 @@ fts_check_and_drop_orphaned_tables(
if (!ib_vector_is_empty(aux_tables_to_rename)) {
fts_rename_aux_tables_to_hex_format(
aux_tables_to_rename, parent_table);
-
} else {
fts_set_parent_hex_format_flag(
parent_table, trx);
@@ -6925,16 +6853,9 @@ fts_check_and_drop_orphaned_tables(
aux_table->parent_id, TRUE, DICT_TABLE_OP_NORMAL);
if (drop) {
- ib_vector_push(drop_aux_tables, aux_table);
+ ib_vector_push(drop_aux_tables, aux_table);
} else {
if (FTS_IS_OBSOLETE_AUX_TABLE(aux_table->name)) {
-
- /* Current table could be one of the three
- obsolete tables, in this case, we should
- always try to drop it but not rename it.
- This could happen when we try to upgrade
- from older server to later one, which doesn't
- contain these obsolete tables. */
ib_vector_push(obsolete_aux_tables, aux_table);
continue;
}
@@ -6943,22 +6864,36 @@ fts_check_and_drop_orphaned_tables(
/* If the aux table is in decimal format, we should
rename it, so push it to aux_tables_to_rename */
if (!drop && rename) {
- ib_vector_push(aux_tables_to_rename, aux_table);
+ bool rename_table = true;
+ for (ulint count = 0;
+ count < ib_vector_size(aux_tables_to_rename);
+ count++) {
+ fts_aux_table_t* rename_aux =
+ static_cast<fts_aux_table_t*>(
+ ib_vector_get(aux_tables_to_rename,
+ count));
+ if (strcmp(rename_aux->name,
+ aux_table->name) == 0) {
+ rename_table = false;
+ break;
+ }
+ }
+
+ if (rename_table) {
+ ib_vector_push(aux_tables_to_rename,
+ aux_table);
+ }
}
if (i + 1 < ib_vector_size(tables)) {
next_aux_table = static_cast<fts_aux_table_t*>(
- ib_vector_get(tables, i + 1));
+ ib_vector_get(tables, i + 1));
}
if ((next_aux_table == NULL
|| orig_parent_id != next_aux_table->parent_id)
&& !ib_vector_is_empty(aux_tables_to_rename)) {
- /* All aux tables of parent table, whose id is
- last_parent_id, have been checked, try to rename
- them if necessary. We had better use a new background
- trx to rename rather than the original trx, in case
- any failure would cause a complete rollback. */
+
ut_ad(rename);
ut_ad(!DICT_TF2_FLAG_IS_SET(
parent_table, DICT_TF2_FTS_AUX_HEX_NAME));
@@ -6973,21 +6908,22 @@ fts_check_and_drop_orphaned_tables(
table = dict_table_open_on_id(
aux_table->id, TRUE, DICT_TABLE_OP_NORMAL);
+
if (table != NULL
- && strcmp(table->name, aux_table->name)) {
+ && strcmp(table->name.m_name, aux_table->name)) {
dict_table_close(table, TRUE, FALSE);
table = NULL;
}
if (table != NULL
&& !DICT_TF2_FLAG_IS_SET(
- table,
- DICT_TF2_FTS_AUX_HEX_NAME)) {
+ table,
+ DICT_TF2_FTS_AUX_HEX_NAME)) {
DBUG_EXECUTE_IF("aux_table_flag_fail",
- ib_logf(IB_LOG_LEVEL_WARN,
- "Setting aux table %s to hex "
- "format failed.", table->name);
+ ib::warn() << "Setting aux table "
+ << table->name << " to hex "
+ "format failed.";
fts_set_index_corrupt(
trx, aux_table->index_id,
parent_table);
@@ -6997,9 +6933,9 @@ fts_check_and_drop_orphaned_tables(
trx, table->id, true);
if (err != DB_SUCCESS) {
- ib_logf(IB_LOG_LEVEL_WARN,
- "Setting aux table %s to hex "
- "format failed.", table->name);
+ ib::warn() << "Setting aux table "
+ << table->name << " to hex "
+ "format failed.";
fts_set_index_corrupt(
trx, aux_table->index_id,
@@ -7020,7 +6956,7 @@ table_exit:
ut_ad(parent_table != NULL);
fts_set_parent_hex_format_flag(
- parent_table, trx);
+ parent_table, trx);
}
if (parent_table != NULL) {
@@ -7043,7 +6979,6 @@ table_exit:
/**********************************************************************//**
Drop all orphaned FTS auxiliary tables, those that don't have a parent
table or FTS index defined on them. */
-UNIV_INTERN
void
fts_drop_orphaned_tables(void)
/*==========================*/
@@ -7061,8 +6996,7 @@ fts_drop_orphaned_tables(void)
error = fil_get_space_names(space_name_list);
if (error == DB_OUT_OF_MEMORY) {
- ib_logf(IB_LOG_LEVEL_ERROR, "Out of memory");
- ut_error;
+ ib::fatal() << "Out of memory";
}
heap = mem_heap_create(1024);
@@ -7092,7 +7026,7 @@ fts_drop_orphaned_tables(void)
} else {
ulint len = strlen(*it);
- fts_aux_table->id = fil_get_space_id_for_table(*it);
+ fts_aux_table->id = fil_space_get_id_by_name(*it);
/* We got this list from fil0fil.cc. The tablespace
with this name must exist. */
@@ -7118,7 +7052,7 @@ fts_drop_orphaned_tables(void)
info,
"DECLARE FUNCTION my_func;\n"
"DECLARE CURSOR c IS"
- " SELECT NAME, ID "
+ " SELECT NAME, ID"
" FROM SYS_TABLES;\n"
"BEGIN\n"
"\n"
@@ -7142,18 +7076,14 @@ fts_drop_orphaned_tables(void)
fts_sql_rollback(trx);
- ut_print_timestamp(stderr);
-
if (error == DB_LOCK_WAIT_TIMEOUT) {
- ib_logf(IB_LOG_LEVEL_WARN,
- "lock wait timeout reading SYS_TABLES. "
- "Retrying!");
+ ib::warn() << "lock wait timeout reading"
+ " SYS_TABLES. Retrying!";
trx->error_state = DB_SUCCESS;
} else {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "(%s) while reading SYS_TABLES.",
- ut_strerr(error));
+ ib::error() << "(" << ut_strerr(error)
+ << ") while reading SYS_TABLES.";
break; /* Exit the loop. */
}
@@ -7175,7 +7105,7 @@ fts_drop_orphaned_tables(void)
it != space_name_list.end();
++it) {
- delete[] *it;
+ UT_DELETE_ARRAY(*it);
}
}
@@ -7183,11 +7113,10 @@ fts_drop_orphaned_tables(void)
Check whether user supplied stopword table is of the right format.
Caller is responsible to hold dictionary locks.
@return the stopword column charset if qualifies */
-UNIV_INTERN
CHARSET_INFO*
fts_valid_stopword_table(
/*=====================*/
- const char* stopword_table_name) /*!< in: Stopword table
+ const char* stopword_table_name) /*!< in: Stopword table
name */
{
dict_table_t* table;
@@ -7200,9 +7129,8 @@ fts_valid_stopword_table(
table = dict_table_get_low(stopword_table_name);
if (!table) {
- fprintf(stderr,
- "InnoDB: user stopword table %s does not exist.\n",
- stopword_table_name);
+ ib::error() << "User stopword table " << stopword_table_name
+ << " does not exist.";
return(NULL);
} else {
@@ -7211,10 +7139,9 @@ fts_valid_stopword_table(
col_name = dict_table_get_col_name(table, 0);
if (ut_strcmp(col_name, "value")) {
- fprintf(stderr,
- "InnoDB: invalid column name for stopword "
- "table %s. Its first column must be named as "
- "'value'.\n", stopword_table_name);
+ ib::error() << "Invalid column name for stopword"
+ " table " << stopword_table_name << ". Its"
+ " first column must be named as 'value'.";
return(NULL);
}
@@ -7223,10 +7150,9 @@ fts_valid_stopword_table(
if (col->mtype != DATA_VARCHAR
&& col->mtype != DATA_VARMYSQL) {
- fprintf(stderr,
- "InnoDB: invalid column type for stopword "
- "table %s. Its first column must be of "
- "varchar type\n", stopword_table_name);
+ ib::error() << "Invalid column type for stopword"
+ " table " << stopword_table_name << ". Its"
+ " first column must be of varchar type";
return(NULL);
}
@@ -7234,9 +7160,7 @@ fts_valid_stopword_table(
ut_ad(col);
- return(innobase_get_fts_charset(
- static_cast<int>(col->prtype & DATA_MYSQL_TYPE_MASK),
- static_cast<uint>(dtype_get_charset_coll(col->prtype))));
+ return(fts_get_charset(col->prtype));
}
/**********************************************************************//**
@@ -7245,7 +7169,6 @@ records/fetches stopword configuration to/from FTS configure
table, depending on whether we are creating or reloading the
FTS.
@return TRUE if load operation is successful */
-UNIV_INTERN
ibool
fts_load_stopword(
/*==============*/
@@ -7281,6 +7204,11 @@ fts_load_stopword(
if (!trx) {
trx = trx_allocate_for_background();
+ if (srv_read_only_mode) {
+ trx_start_internal_read_only(trx);
+ } else {
+ trx_start_internal(trx);
+ }
trx->op_info = "upload FTS stopword";
new_trx = TRUE;
}
@@ -7359,8 +7287,9 @@ cleanup:
}
if (!cache->stopword_info.cached_stopword) {
- cache->stopword_info.cached_stopword = rbt_create(
- sizeof(fts_tokenizer_word_t), fts_utf8_string_cmp);
+ cache->stopword_info.cached_stopword = rbt_create_arg_cmp(
+ sizeof(fts_tokenizer_word_t), innobase_fts_text_cmp,
+ &my_charset_latin1);
}
return(error == DB_SUCCESS);
@@ -7424,6 +7353,7 @@ fts_init_recover_doc(
sel_node_t* node = static_cast<sel_node_t*>(row);
que_node_t* exp = node->select_list;
fts_cache_t* cache = get_doc->cache;
+ st_mysql_ftparser* parser = get_doc->index_cache->index->parser;
fts_doc_init(&doc);
doc.found = TRUE;
@@ -7457,24 +7387,19 @@ fts_init_recover_doc(
ut_ad(get_doc);
if (!get_doc->index_cache->charset) {
- ulint prtype = dfield->type.prtype;
-
- get_doc->index_cache->charset =
- innobase_get_fts_charset(
- (int)(prtype & DATA_MYSQL_TYPE_MASK),
- (uint) dtype_get_charset_coll(prtype));
+ get_doc->index_cache->charset = fts_get_charset(
+ dfield->type.prtype);
}
doc.charset = get_doc->index_cache->charset;
if (dfield_is_ext(dfield)) {
dict_table_t* table = cache->sync->table;
- ulint zip_size = dict_table_zip_size(table);
doc.text.f_str = btr_copy_externally_stored_field(
&doc.text.f_len,
static_cast<byte*>(dfield_get_data(dfield)),
- zip_size, len,
+ dict_table_page_size(table), len,
static_cast<mem_heap_t*>(doc.self_heap->arg));
} else {
doc.text.f_str = static_cast<byte*>(
@@ -7484,9 +7409,9 @@ fts_init_recover_doc(
}
if (field_no == 1) {
- fts_tokenize_document(&doc, NULL);
+ fts_tokenize_document(&doc, NULL, parser);
} else {
- fts_tokenize_document_next(&doc, doc_len, NULL);
+ fts_tokenize_document_next(&doc, doc_len, NULL, parser);
}
exp = que_node_get_next(exp);
@@ -7515,7 +7440,6 @@ used. There are documents that have not yet sync-ed to auxiliary
tables from last server abnormally shutdown, we will need to bring
such document into FTS cache before any further operations
@return TRUE if all OK */
-UNIV_INTERN
ibool
fts_init_index(
/*===========*/
@@ -7559,7 +7483,7 @@ fts_init_index(
dropped, and we re-initialize the Doc ID system for subsequent
insertion */
if (ib_vector_is_empty(cache->get_docs)) {
- index = dict_table_get_index_on_name(table, FTS_DOC_ID_INDEX_NAME);
+ index = table->fts_doc_id_index;
ut_a(index);
@@ -7602,3 +7526,58 @@ func_exit:
return(TRUE);
}
+
+/** Check if the all the auxillary tables associated with FTS index are in
+consistent state. For now consistency is check only by ensuring
+index->page_no != FIL_NULL
+@param[out] base_table table has host fts index
+@param[in,out] trx trx handler */
+void
+fts_check_corrupt(
+ dict_table_t* base_table,
+ trx_t* trx)
+{
+ bool sane = true;
+ fts_table_t fts_table;
+
+ /* Iterate over the common table and check for their sanity. */
+ FTS_INIT_FTS_TABLE(&fts_table, NULL, FTS_COMMON_TABLE, base_table);
+
+ for (ulint i = 0; fts_common_tables[i] != NULL && sane; ++i) {
+
+ char table_name[MAX_FULL_NAME_LEN];
+
+ fts_table.suffix = fts_common_tables[i];
+ fts_get_table_name(&fts_table, table_name);
+
+ dict_table_t* aux_table = dict_table_open_on_name(
+ table_name, true, FALSE, DICT_ERR_IGNORE_NONE);
+
+ if (aux_table == NULL) {
+ dict_set_corrupted(
+ dict_table_get_first_index(base_table),
+ trx, "FTS_SANITY_CHECK");
+ ut_ad(base_table->corrupted == TRUE);
+ sane = false;
+ continue;
+ }
+
+ for (dict_index_t* aux_table_index =
+ UT_LIST_GET_FIRST(aux_table->indexes);
+ aux_table_index != NULL;
+ aux_table_index =
+ UT_LIST_GET_NEXT(indexes, aux_table_index)) {
+
+ /* Check if auxillary table needed for FTS is sane. */
+ if (aux_table_index->page == FIL_NULL) {
+ dict_set_corrupted(
+ dict_table_get_first_index(base_table),
+ trx, "FTS_SANITY_CHECK");
+ ut_ad(base_table->corrupted == TRUE);
+ sane = false;
+ }
+ }
+
+ dict_table_close(aux_table, FALSE, FALSE);
+ }
+}
diff --git a/storage/innobase/fts/fts0opt.cc b/storage/innobase/fts/fts0opt.cc
index 7f498443544..deda14fee24 100644
--- a/storage/innobase/fts/fts0opt.cc
+++ b/storage/innobase/fts/fts0opt.cc
@@ -33,14 +33,10 @@ Completed 2011/7/10 Sunny and Jimmy Yang
#include "fts0types.h"
#include "ut0wqueue.h"
#include "srv0start.h"
+#include "ut0list.h"
#include "zlib.h"
#include "fts0opt.h"
-#ifndef UNIV_NONINL
-#include "fts0types.ic"
-#include "fts0vlc.ic"
-#endif
-
/** The FTS optimize thread's work queue. */
ib_wqueue_t* fts_optimize_wq;
@@ -56,6 +52,9 @@ static const ulint FTS_OPTIMIZE_INTERVAL_IN_SECS = 300;
/** Server is shutting down, so does we exiting the optimize thread */
static bool fts_opt_start_shutdown = false;
+/** Event to wait for shutdown of the optimize thread */
+static os_event_t fts_opt_shutdown_event = NULL;
+
/** Initial size of nodes in fts_word_t. */
static const ulint FTS_WORD_NODES_INIT_SIZE = 64;
@@ -198,11 +197,6 @@ struct fts_msg_del_t {
this message by the consumer */
};
-/** Stop the optimize thread. */
-struct fts_msg_optimize_t {
- dict_table_t* table; /*!< Table to optimize */
-};
-
/** The FTS optimize message work queue message type. */
struct fts_msg_t {
fts_msg_type_t type; /*!< Message type */
@@ -215,10 +209,10 @@ struct fts_msg_t {
};
/** The number of words to read and optimize in a single pass. */
-UNIV_INTERN ulong fts_num_word_optimize;
+ulong fts_num_word_optimize;
/** Whether to enable additional FTS diagnostic printout. */
-UNIV_INTERN char fts_enable_diag_print;
+char fts_enable_diag_print;
/** ZLib compressed block size.*/
static ulint FTS_ZIP_BLOCK_SIZE = 1024;
@@ -324,7 +318,7 @@ fts_zip_init(
/**********************************************************************//**
Create a fts_optimizer_word_t instance.
@return new instance */
-UNIV_INTERN
+static
fts_word_t*
fts_word_init(
/*==========*/
@@ -391,7 +385,7 @@ fts_optimize_read_node(
case 4: /* ILIST */
node->ilist_size_alloc = node->ilist_size = len;
- node->ilist = static_cast<byte*>(ut_malloc(len));
+ node->ilist = static_cast<byte*>(ut_malloc_nokey(len));
memcpy(node->ilist, data, len);
break;
@@ -409,7 +403,6 @@ fts_optimize_read_node(
/**********************************************************************//**
Callback function to fetch the rows in an FTS INDEX record.
@return always returns non-NULL */
-UNIV_INTERN
ibool
fts_optimize_index_fetch_node(
/*==========================*/
@@ -467,7 +460,6 @@ fts_optimize_index_fetch_node(
/**********************************************************************//**
Read the rows from the FTS inde.
@return DB_SUCCESS or error code */
-UNIV_INTERN
dberr_t
fts_index_fetch_nodes(
/*==================*/
@@ -513,8 +505,8 @@ fts_index_fetch_nodes(
info,
"DECLARE FUNCTION my_func;\n"
"DECLARE CURSOR c IS"
- " SELECT word, doc_count, first_doc_id, last_doc_id, "
- "ilist\n"
+ " SELECT word, doc_count, first_doc_id, last_doc_id,"
+ " ilist\n"
" FROM $table_name\n"
" WHERE word LIKE :word\n"
" ORDER BY first_doc_id;\n"
@@ -530,7 +522,7 @@ fts_index_fetch_nodes(
"CLOSE c;");
}
- for(;;) {
+ for (;;) {
error = fts_eval_sql(trx, *graph);
if (error == DB_SUCCESS) {
@@ -540,18 +532,14 @@ fts_index_fetch_nodes(
} else {
fts_sql_rollback(trx);
- ut_print_timestamp(stderr);
-
if (error == DB_LOCK_WAIT_TIMEOUT) {
- fprintf(stderr, " InnoDB: Warning: lock wait "
- "timeout reading FTS index. "
- "Retrying!\n");
+ ib::warn() << "lock wait timeout reading"
+ " FTS index. Retrying!";
trx->error_state = DB_SUCCESS;
} else {
- fprintf(stderr, " InnoDB: Error: (%s) "
- "while reading FTS index.\n",
- ut_strerr(error));
+ ib::error() << "(" << ut_strerr(error)
+ << ") while reading FTS index.";
break; /* Exit the loop. */
}
@@ -588,7 +576,7 @@ fts_zip_read_word(
/* Finished decompressing block. */
if (zip->zp->avail_in == 0) {
- /* Free the block that's been decompressed. */
+ /* Free the block thats been decompressed. */
if (zip->pos > 0) {
ulint prev = zip->pos - 1;
@@ -609,7 +597,8 @@ fts_zip_read_word(
zip->zp->avail_in =
FTS_MAX_WORD_LEN;
} else {
- zip->zp->avail_in = static_cast<uInt>(zip->block_sz);
+ zip->zp->avail_in =
+ static_cast<uInt>(zip->block_sz);
}
++zip->pos;
@@ -707,7 +696,9 @@ fts_fetch_index_words(
if (zip->zp->avail_out == 0) {
byte* block;
- block = static_cast<byte*>(ut_malloc(zip->block_sz));
+ block = static_cast<byte*>(
+ ut_malloc_nokey(zip->block_sz));
+
ib_vector_push(zip->blocks, &block);
zip->zp->next_out = block;
@@ -763,7 +754,9 @@ fts_zip_deflate_end(
ut_a(zip->zp->avail_out == 0);
- block = static_cast<byte*>(ut_malloc(FTS_MAX_WORD_LEN + 1));
+ block = static_cast<byte*>(
+ ut_malloc_nokey(FTS_MAX_WORD_LEN + 1));
+
ib_vector_push(zip->blocks, &block);
zip->zp->next_out = block;
@@ -811,19 +804,14 @@ fts_index_fetch_words(
}
for (selected = fts_select_index(
- optim->fts_index_table.charset, word->f_str, word->f_len);
- fts_index_selector[selected].value;
+ optim->fts_index_table.charset, word->f_str, word->f_len);
+ selected < FTS_NUM_AUX_INDEX;
selected++) {
char table_name[MAX_FULL_NAME_LEN];
optim->fts_index_table.suffix = fts_get_suffix(selected);
- /* We've search all indexes. */
- if (optim->fts_index_table.suffix == NULL) {
- return(DB_TABLE_NOT_FOUND);
- }
-
info = pars_info_create();
pars_info_bind_function(
@@ -857,15 +845,13 @@ fts_index_fetch_words(
zip = optim->zip;
- for(;;) {
+ for (;;) {
int err;
if (!inited && ((err = deflateInit(zip->zp, 9))
!= Z_OK)) {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Error: ZLib deflateInit() "
- "failed: %d\n", err);
+ ib::error() << "ZLib deflateInit() failed: "
+ << err;
error = DB_ERROR;
break;
@@ -880,13 +866,9 @@ fts_index_fetch_words(
} else {
//FIXME fts_sql_rollback(optim->trx);
- ut_print_timestamp(stderr);
-
if (error == DB_LOCK_WAIT_TIMEOUT) {
- fprintf(stderr, " InnoDB: "
- "Warning: lock wait "
- "timeout reading document. "
- "Retrying!\n");
+ ib::warn() << "Lock wait timeout"
+ " reading document. Retrying!";
/* We need to reset the ZLib state. */
inited = FALSE;
@@ -895,9 +877,8 @@ fts_index_fetch_words(
optim->trx->error_state = DB_SUCCESS;
} else {
- fprintf(stderr, " InnoDB: Error: (%s) "
- "while reading document.\n",
- ut_strerr(error));
+ ib::error() << "(" << ut_strerr(error)
+ << ") while reading document.";
break; /* Exit the loop. */
}
@@ -971,7 +952,6 @@ fts_fetch_doc_ids(
/**********************************************************************//**
Read the rows from a FTS common auxiliary table.
@return DB_SUCCESS or error code */
-UNIV_INTERN
dberr_t
fts_table_fetch_doc_ids(
/*====================*/
@@ -1018,17 +998,14 @@ fts_table_fetch_doc_ids(
"CLOSE c;");
error = fts_eval_sql(trx, graph);
+ fts_sql_commit(trx);
mutex_enter(&dict_sys->mutex);
que_graph_free(graph);
mutex_exit(&dict_sys->mutex);
if (error == DB_SUCCESS) {
- fts_sql_commit(trx);
-
ib_vector_sort(doc_ids->doc_ids, fts_update_doc_id_cmp);
- } else {
- fts_sql_rollback(trx);
}
if (alloc_bk_trx) {
@@ -1042,7 +1019,6 @@ fts_table_fetch_doc_ids(
Do a binary search for a doc id in the array
@return +ve index if found -ve index where it should be inserted
if not found */
-UNIV_INTERN
int
fts_bsearch(
/*========*/
@@ -1079,7 +1055,7 @@ fts_bsearch(
}
/* Not found. */
- return( (lower == 0) ? -1 : -lower);
+ return( (lower == 0) ? -1 : -(lower));
}
/**********************************************************************//**
@@ -1178,12 +1154,12 @@ fts_optimize_encode_node(
new_size = enc_len > FTS_ILIST_MAX_SIZE
? enc_len : FTS_ILIST_MAX_SIZE;
- node->ilist = static_cast<byte*>(ut_malloc(new_size));
+ node->ilist = static_cast<byte*>(ut_malloc_nokey(new_size));
node->ilist_size_alloc = new_size;
} else if ((node->ilist_size + enc_len) > node->ilist_size_alloc) {
ulint new_size = node->ilist_size + enc_len;
- byte* ilist = static_cast<byte*>(ut_malloc(new_size));
+ byte* ilist = static_cast<byte*>(ut_malloc_nokey(new_size));
memcpy(ilist, node->ilist, node->ilist_size);
@@ -1383,8 +1359,8 @@ fts_optimize_word(
if (fts_enable_diag_print) {
word->text.f_str[word->text.f_len] = 0;
- fprintf(stderr, "FTS_OPTIMIZE: optimize \"%s\"\n",
- word->text.f_str);
+ ib::info() << "FTS_OPTIMIZE: optimize \"" << word->text.f_str
+ << "\"";
}
while (i < size) {
@@ -1465,8 +1441,8 @@ fts_optimize_write_word(
ut_ad(fts_table->charset);
if (fts_enable_diag_print) {
- fprintf(stderr, "FTS_OPTIMIZE: processed \"%s\"\n",
- word->f_str);
+ ib::info() << "FTS_OPTIMIZE: processed \"" << word->f_str
+ << "\"";
}
pars_info_bind_varchar_literal(
@@ -1487,10 +1463,8 @@ fts_optimize_write_word(
error = fts_eval_sql(trx, graph);
if (error != DB_SUCCESS) {
- ut_print_timestamp(stderr);
- fprintf(stderr, " InnoDB: Error: (%s) during optimize, "
- "when deleting a word from the FTS index.\n",
- ut_strerr(error));
+ ib::error() << "(" << ut_strerr(error) << ") during optimize,"
+ " when deleting a word from the FTS index.";
}
fts_que_graph_free(graph);
@@ -1503,15 +1477,19 @@ fts_optimize_write_word(
fts_node_t* node = (fts_node_t*) ib_vector_get(nodes, i);
if (error == DB_SUCCESS) {
+ /* Skip empty node. */
+ if (node->ilist == NULL) {
+ ut_ad(node->ilist_size == 0);
+ continue;
+ }
+
error = fts_write_node(
trx, &graph, fts_table, word, node);
if (error != DB_SUCCESS) {
- ut_print_timestamp(stderr);
- fprintf(stderr, " InnoDB: Error: (%s) "
- "during optimize, while adding a "
- "word to the FTS index.\n",
- ut_strerr(error));
+ ib::error() << "(" << ut_strerr(error) << ")"
+ " during optimize, while adding a"
+ " word to the FTS index.";
}
}
@@ -1529,7 +1507,6 @@ fts_optimize_write_word(
/**********************************************************************//**
Free fts_optimizer_word_t instanace.*/
-UNIV_INTERN
void
fts_word_free(
/*==========*/
@@ -1623,6 +1600,7 @@ fts_optimize_create(
optim->table = table;
optim->trx = trx_allocate_for_background();
+ trx_start_internal(optim->trx);
optim->fts_common_table.table_id = table->id;
optim->fts_common_table.type = FTS_COMMON_TABLE;
@@ -1743,6 +1721,7 @@ fts_optimize_free(
{
mem_heap_t* heap = static_cast<mem_heap_t*>(optim->self_heap->arg);
+ trx_commit_for_mysql(optim->trx);
trx_free_for_background(optim->trx);
fts_doc_ids_free(optim->to_delete);
@@ -1802,7 +1781,7 @@ fts_optimize_words(
fetch.read_arg = optim->words;
fetch.read_record = fts_optimize_index_fetch_node;
- while(!optim->done) {
+ while (!optim->done) {
dberr_t error;
trx_t* trx = optim->trx;
ulint selected;
@@ -1849,13 +1828,12 @@ fts_optimize_words(
}
}
} else if (error == DB_LOCK_WAIT_TIMEOUT) {
- fprintf(stderr, "InnoDB: Warning: lock wait timeout "
- "during optimize. Retrying!\n");
+ ib::warn() << "Lock wait timeout during optimize."
+ " Retrying!";
trx->error_state = DB_SUCCESS;
} else if (error == DB_DEADLOCK) {
- fprintf(stderr, "InnoDB: Warning: deadlock "
- "during optimize. Retrying!\n");
+ ib::warn() << "Deadlock during optimize. Retrying!";
trx->error_state = DB_SUCCESS;
} else {
@@ -1869,42 +1847,6 @@ fts_optimize_words(
}
/**********************************************************************//**
-Select the FTS index to search.
-@return TRUE if last index */
-static
-ibool
-fts_optimize_set_next_word(
-/*=======================*/
- CHARSET_INFO* charset, /*!< in: charset */
- fts_string_t* word) /*!< in: current last word */
-{
- ulint selected;
- ibool last = FALSE;
-
- selected = fts_select_next_index(charset, word->f_str, word->f_len);
-
- /* If this was the last index then reset to start. */
- if (fts_index_selector[selected].value == 0) {
- /* Reset the last optimized word to '' if no
- more words could be read from the FTS index. */
- word->f_len = 0;
- *word->f_str = 0;
-
- last = TRUE;
- } else {
- ulint value = fts_index_selector[selected].value;
-
- ut_a(value <= 0xff);
-
- /* Set to the first character of the next slot. */
- word->f_len = 1;
- *word->f_str = (byte) value;
- }
-
- return(last);
-}
-
-/**********************************************************************//**
Optimize is complete. Set the completion time, and reset the optimize
start string for this FTS index to "".
@return DB_SUCCESS if all OK */
@@ -1936,8 +1878,8 @@ fts_optimize_index_completed(
if (error != DB_SUCCESS) {
- fprintf(stderr, "InnoDB: Error: (%s) while "
- "updating last optimized word!\n", ut_strerr(error));
+ ib::error() << "(" << ut_strerr(error) << ") while updating"
+ " last optimized word!";
}
return(error);
@@ -1980,21 +1922,14 @@ fts_optimize_index_read_words(
optim, word, fts_num_word_optimize);
if (error == DB_SUCCESS) {
-
- /* If the search returned an empty set
- try the next index in the horizontal split. */
- if (optim->zip->n_words > 0) {
- break;
- } else {
-
- fts_optimize_set_next_word(
- optim->fts_index_table.charset,
- word);
-
- if (word->f_len == 0) {
- break;
- }
+ /* Reset the last optimized word to '' if no
+ more words could be read from the FTS index. */
+ if (optim->zip->n_words == 0) {
+ word->f_len = 0;
+ *word->f_str = 0;
}
+
+ break;
}
}
@@ -2482,18 +2417,22 @@ fts_optimize_table_bk(
/*********************************************************************//**
Run OPTIMIZE on the given table.
@return DB_SUCCESS if all OK */
-UNIV_INTERN
dberr_t
fts_optimize_table(
/*===============*/
dict_table_t* table) /*!< in: table to optimiza */
{
+ if (srv_read_only_mode) {
+ return DB_READ_ONLY;
+ }
+
dberr_t error = DB_SUCCESS;
fts_optimize_t* optim = NULL;
fts_t* fts = table->fts;
- ut_print_timestamp(stderr);
- fprintf(stderr, " InnoDB: FTS start optimize %s\n", table->name);
+ if (fts_enable_diag_print) {
+ ib::info() << "FTS start optimize " << table->name;
+ }
optim = fts_optimize_create(table);
@@ -2544,9 +2483,8 @@ fts_optimize_table(
&& optim->n_completed == ib_vector_size(fts->indexes)) {
if (fts_enable_diag_print) {
- fprintf(stderr, "FTS_OPTIMIZE: Completed "
- "Optimize, cleanup DELETED "
- "table\n");
+ ib::info() << "FTS_OPTIMIZE: Completed"
+ " Optimize, cleanup DELETED table";
}
if (ib_vector_size(optim->to_delete->doc_ids) > 0) {
@@ -2567,8 +2505,9 @@ fts_optimize_table(
fts_optimize_free(optim);
- ut_print_timestamp(stderr);
- fprintf(stderr, " InnoDB: FTS end optimize %s\n", table->name);
+ if (fts_enable_diag_print) {
+ ib::info() << "FTS end optimize " << table->name;
+ }
return(error);
}
@@ -2598,7 +2537,7 @@ fts_optimize_create_msg(
/** Add the table to add to the OPTIMIZER's list.
@param[in] table table to add */
-UNIV_INTERN void fts_optimize_add_table(dict_table_t* table)
+void fts_optimize_add_table(dict_table_t* table)
{
fts_msg_t* msg;
@@ -2613,9 +2552,7 @@ UNIV_INTERN void fts_optimize_add_table(dict_table_t* table)
}
/* Make sure table with FTS index cannot be evicted */
- if (table->can_be_evicted) {
- dict_table_move_from_lru_to_non_lru(table);
- }
+ dict_table_prevent_eviction(table);
msg = fts_optimize_create_msg(FTS_MSG_ADD_TABLE, table);
@@ -2631,7 +2568,6 @@ UNIV_INTERN void fts_optimize_add_table(dict_table_t* table)
/**********************************************************************//**
Remove the table from the OPTIMIZER's list. We do wait for
acknowledgement from the consumer of the message. */
-UNIV_INTERN
void
fts_optimize_remove_table(
/*======================*/
@@ -2648,9 +2584,8 @@ fts_optimize_remove_table(
/* FTS optimizer thread is already exited */
if (fts_opt_start_shutdown) {
- ib_logf(IB_LOG_LEVEL_INFO,
- "Try to remove table %s after FTS optimize"
- " thread exiting.", table->name);
+ ib::info() << "Try to remove table " << table->name
+ << " after FTS optimize thread exiting.";
return;
}
@@ -2664,7 +2599,7 @@ fts_optimize_remove_table(
msg = fts_optimize_create_msg(FTS_MSG_DEL_TABLE, NULL);
/* We will wait on this event until signalled by the consumer. */
- event = os_event_create();
+ event = os_event_create(0);
remove = static_cast<fts_msg_del_t*>(
mem_heap_alloc(msg->heap, sizeof(*remove)));
@@ -2679,7 +2614,7 @@ fts_optimize_remove_table(
os_event_wait(event);
- os_event_free(event);
+ os_event_destroy(event);
ut_d(mutex_enter(&fts_optimize_wq->mutex));
ut_ad(!table->fts->in_queue);
@@ -2688,13 +2623,10 @@ fts_optimize_remove_table(
/** Send sync fts cache for the table.
@param[in] table table to sync */
-UNIV_INTERN
void
fts_optimize_request_sync_table(
dict_table_t* table)
{
- fts_msg_t* msg;
-
/* if the optimize system not yet initialized, return */
if (!fts_optimize_wq) {
return;
@@ -2702,20 +2634,17 @@ fts_optimize_request_sync_table(
/* FTS optimizer thread is already exited */
if (fts_opt_start_shutdown) {
- ib_logf(IB_LOG_LEVEL_INFO,
- "Try to sync table %s after FTS optimize"
- " thread exiting.", table->name);
+ ib::info() << "Try to sync table " << table->name
+ << " after FTS optimize thread exiting.";
return;
}
- msg = fts_optimize_create_msg(FTS_MSG_SYNC_TABLE, table);
+ fts_msg_t* msg = fts_optimize_create_msg(FTS_MSG_SYNC_TABLE, table);
mutex_enter(&fts_optimize_wq->mutex);
ib_wqueue_add(fts_optimize_wq, msg, msg->heap, true);
- table->fts->in_queue = true;
-
mutex_exit(&fts_optimize_wq->mutex);
}
@@ -2754,6 +2683,7 @@ static bool fts_optimize_new_table(dict_table_t* table)
@param[in,out] table table to be removed from fts_slots */
static bool fts_optimize_del_table(const dict_table_t* table)
{
+ ut_ad(table);
for (ulint i = 0; i < ib_vector_size(fts_slots); ++i) {
fts_slot_t* slot;
@@ -2761,15 +2691,13 @@ static bool fts_optimize_del_table(const dict_table_t* table)
if (slot->table == table) {
if (fts_enable_diag_print) {
- ib_logf(IB_LOG_LEVEL_INFO,
- "FTS Optimize Removing table %s",
- table->name);
+ ib::info() << "FTS Optimize Removing table "
+ << table->name;
}
mutex_enter(&fts_optimize_wq->mutex);
slot->table->fts->in_queue = false;
mutex_exit(&fts_optimize_wq->mutex);
-
slot->table = NULL;
return true;
}
@@ -2845,42 +2773,37 @@ static bool fts_is_sync_needed()
@param[in,out] table table to be synced */
static void fts_optimize_sync_table(dict_table_t* table)
{
- if (fil_table_accessible(table)
- && table->fts && table->fts->cache) {
- fts_sync_table(table, true, false, false);
+ if (table->fts && table->fts->cache && fil_table_accessible(table)) {
+ fts_sync_table(table, false);
}
- DBUG_EXECUTE_IF("ib_optimize_wq_hang",
- os_thread_sleep(6000000););
+ DBUG_EXECUTE_IF("ib_optimize_wq_hang", os_thread_sleep(6000000););
}
/**********************************************************************//**
Optimize all FTS tables.
@return Dummy return */
-UNIV_INTERN
+static
os_thread_ret_t
fts_optimize_thread(
/*================*/
void* arg) /*!< in: work queue*/
{
- mem_heap_t* heap;
- ib_alloc_t* heap_alloc;
ulint current = 0;
ibool done = FALSE;
ulint n_tables = 0;
- os_event_t exit_event = 0;
ulint n_optimize = 0;
ib_wqueue_t* wq = (ib_wqueue_t*) arg;
ut_ad(!srv_read_only_mode);
my_thread_init();
- heap = mem_heap_create(sizeof(dict_table_t*) * 64);
- heap_alloc = ib_heap_allocator_create(heap);
+ ut_ad(fts_slots);
- fts_slots = ib_vector_create(heap_alloc, sizeof(fts_slot_t), 4);
+ /* Assign number of tables added in fts_slots_t to n_tables */
+ n_tables = ib_vector_size(fts_slots);
- while(!done && srv_shutdown_state == SRV_SHUTDOWN_NONE) {
+ while (!done && srv_shutdown_state == SRV_SHUTDOWN_NONE) {
/* If there is no message in the queue and we have tables
to optimize then optimize the tables. */
@@ -2922,7 +2845,6 @@ fts_optimize_thread(
switch (msg->type) {
case FTS_MSG_STOP:
done = TRUE;
- exit_event = (os_event_t) msg->ptr;
break;
case FTS_MSG_ADD_TABLE:
@@ -2981,70 +2903,74 @@ fts_optimize_thread(
ib_vector_free(fts_slots);
fts_slots = NULL;
- ib_logf(IB_LOG_LEVEL_INFO, "FTS optimize thread exiting.");
+ ib::info() << "FTS optimize thread exiting.";
- os_event_set(exit_event);
+ os_event_set(fts_opt_shutdown_event);
my_thread_end();
/* We count the number of threads in os_thread_exit(). A created
thread should always use that to exit and not use return() to exit. */
- os_thread_exit(NULL);
+ os_thread_exit();
OS_THREAD_DUMMY_RETURN;
}
/**********************************************************************//**
Startup the optimize thread and create the work queue. */
-UNIV_INTERN
void
fts_optimize_init(void)
/*===================*/
{
+ mem_heap_t* heap;
+ ib_alloc_t* heap_alloc;
+
ut_ad(!srv_read_only_mode);
/* For now we only support one optimize thread. */
ut_a(!fts_optimize_wq);
+ /* Create FTS optimize work queue */
fts_optimize_wq = ib_wqueue_create();
ut_a(fts_optimize_wq != NULL);
- last_check_sync_time = time(NULL);
- /* Add fts tables to fts slots which could be skipped
- during dict_load_table() because fts_optimize_thread
+ /* Create FTS vector to store fts_slot_t */
+ heap = mem_heap_create(sizeof(dict_table_t*) * 64);
+ heap_alloc = ib_heap_allocator_create(heap);
+ fts_slots = ib_vector_create(heap_alloc, sizeof(fts_slot_t), 4);
+
+ /* Add fts tables to fts_slots which could be skipped
+ during dict_load_table_one() because fts_optimize_thread
wasn't even started. */
mutex_enter(&dict_sys->mutex);
-
for (dict_table_t* table = UT_LIST_GET_FIRST(dict_sys->table_LRU);
table != NULL;
table = UT_LIST_GET_NEXT(table_LRU, table)) {
-
if (!table->fts || !dict_table_has_fts_index(table)) {
continue;
}
/* fts_optimize_thread is not started yet. So there is no
- need to acqquire fts_optimize_wq->mutex for adding the fts
+ need to acquire fts_optimize_wq->mutex for adding the fts
table to the fts slots. */
ut_ad(!table->can_be_evicted);
fts_optimize_new_table(table);
table->fts->in_queue = true;
}
-
mutex_exit(&dict_sys->mutex);
+
+ fts_opt_shutdown_event = os_event_create(0);
+ last_check_sync_time = time(NULL);
+
os_thread_create(fts_optimize_thread, fts_optimize_wq, NULL);
}
-/**********************************************************************//**
-Signal the optimize thread to prepare for shutdown. */
-UNIV_INTERN
+/** Shutdown fts optimize thread. */
void
-fts_optimize_start_shutdown(void)
-/*=============================*/
+fts_optimize_shutdown()
{
ut_ad(!srv_read_only_mode);
fts_msg_t* msg;
- os_event_t event;
/* If there is an ongoing activity on dictionary, such as
srv_master_evict_from_table_cache(), wait for it */
@@ -3059,30 +2985,15 @@ fts_optimize_start_shutdown(void)
/* We tell the OPTIMIZE thread to switch to state done, we
can't delete the work queue here because the add thread needs
deregister the FTS tables. */
- event = os_event_create();
msg = fts_optimize_create_msg(FTS_MSG_STOP, NULL);
- msg->ptr = event;
ib_wqueue_add(fts_optimize_wq, msg, msg->heap);
- os_event_wait(event);
- os_event_free(event);
-
- ib_wqueue_free(fts_optimize_wq);
+ os_event_wait(fts_opt_shutdown_event);
-}
+ os_event_destroy(fts_opt_shutdown_event);
-/**********************************************************************//**
-Reset the work queue. */
-UNIV_INTERN
-void
-fts_optimize_end(void)
-/*==================*/
-{
- ut_ad(!srv_read_only_mode);
-
- // FIXME: Potential race condition here: We should wait for
- // the optimize thread to confirm shutdown.
+ ib_wqueue_free(fts_optimize_wq);
fts_optimize_wq = NULL;
}
diff --git a/storage/innobase/fts/fts0pars.cc b/storage/innobase/fts/fts0pars.cc
index 19917ccd26a..56cc8d6052c 100644
--- a/storage/innobase/fts/fts0pars.cc
+++ b/storage/innobase/fts/fts0pars.cc
@@ -76,12 +76,13 @@
/* Line 268 of yacc.c */
#line 26 "fts0pars.y"
-
+#include "ha_prototypes.h"
#include "mem0mem.h"
#include "fts0ast.h"
#include "fts0blex.h"
#include "fts0tlex.h"
#include "fts0pars.h"
+#include <my_sys.h>
extern int fts_lexer(YYSTYPE*, fts_lexer_t*);
extern int fts_blexer(YYSTYPE*, yyscan_t);
@@ -271,8 +272,6 @@ YYID (yyi)
# define YYSTACK_ALLOC __builtin_alloca
# elif defined __BUILTIN_VA_ARG_INCR
# include <alloca.h> /* INFRINGES ON USER NAME SPACE */
-# elif defined _AIX
-# define YYSTACK_ALLOC __alloca
# elif defined _MSC_VER
# include <malloc.h> /* INFRINGES ON USER NAME SPACE */
# define alloca _alloca
@@ -1541,7 +1540,7 @@ yyreduce:
/* Line 1806 of yacc.c */
#line 141 "fts0pars.y"
{
- fts_ast_term_set_distance((yyvsp[(1) - (3)].node), fts_ast_string_to_ul((yyvsp[(3) - (3)].token), 10));
+ fts_ast_text_set_distance((yyvsp[(1) - (3)].node), fts_ast_string_to_ul((yyvsp[(3) - (3)].token), 10));
fts_ast_string_free((yyvsp[(3) - (3)].token));
}
break;
@@ -1574,7 +1573,7 @@ yyreduce:
{
(yyval.node) = fts_ast_create_node_list(state, (yyvsp[(1) - (4)].node));
fts_ast_add_node((yyval.node), (yyvsp[(2) - (4)].node));
- fts_ast_term_set_distance((yyvsp[(2) - (4)].node), fts_ast_string_to_ul((yyvsp[(4) - (4)].token), 10));
+ fts_ast_text_set_distance((yyvsp[(2) - (4)].node), fts_ast_string_to_ul((yyvsp[(4) - (4)].token), 10));
fts_ast_string_free((yyvsp[(4) - (4)].token));
}
break;
@@ -1933,7 +1932,6 @@ ftserror(
/********************************************************************
Create a fts_lexer_t instance.*/
-
fts_lexer_t*
fts_lexer_create(
/*=============*/
@@ -1942,7 +1940,7 @@ fts_lexer_create(
ulint query_len)
{
fts_lexer_t* fts_lexer = static_cast<fts_lexer_t*>(
- ut_malloc(sizeof(fts_lexer_t)));
+ ut_malloc_nokey(sizeof(fts_lexer_t)));
if (boolean_mode) {
fts0blex_init(&fts_lexer->yyscanner);
@@ -1984,7 +1982,6 @@ fts_lexer_free(
/********************************************************************
Call the appropaiate scanner.*/
-
int
fts_lexer(
/*======*/
diff --git a/storage/innobase/fts/fts0pars.y b/storage/innobase/fts/fts0pars.y
index 31895e8a86c..deebc79e4c4 100644
--- a/storage/innobase/fts/fts0pars.y
+++ b/storage/innobase/fts/fts0pars.y
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 2007, 2014, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2007, 2014, Oracle and/or its affiliates. All rights reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -24,12 +24,13 @@ this program; if not, write to the Free Software Foundation, Inc.,
*/
%{
-
+#include "ha_prototypes.h"
#include "mem0mem.h"
#include "fts0ast.h"
#include "fts0blex.h"
#include "fts0tlex.h"
#include "fts0pars.h"
+#include <my_sys.h>
extern int fts_lexer(YYSTYPE*, fts_lexer_t*);
extern int fts_blexer(YYSTYPE*, yyscan_t);
@@ -139,7 +140,7 @@ expr : term {
}
| text '@' FTS_NUMB {
- fts_ast_term_set_distance($1, fts_ast_string_to_ul($3, 10));
+ fts_ast_text_set_distance($1, fts_ast_string_to_ul($3, 10));
fts_ast_string_free($3);
}
@@ -157,7 +158,7 @@ expr : term {
| prefix text '@' FTS_NUMB {
$$ = fts_ast_create_node_list(state, $1);
fts_ast_add_node($$, $2);
- fts_ast_term_set_distance($2, fts_ast_string_to_ul($4, 10));
+ fts_ast_text_set_distance($2, fts_ast_string_to_ul($4, 10));
fts_ast_string_free($4);
}
@@ -224,7 +225,6 @@ ftserror(
/********************************************************************
Create a fts_lexer_t instance.*/
-
fts_lexer_t*
fts_lexer_create(
/*=============*/
@@ -233,17 +233,17 @@ fts_lexer_create(
ulint query_len)
{
fts_lexer_t* fts_lexer = static_cast<fts_lexer_t*>(
- ut_malloc(sizeof(fts_lexer_t)));
+ ut_malloc_nokey(sizeof(fts_lexer_t)));
if (boolean_mode) {
fts0blex_init(&fts_lexer->yyscanner);
- fts0b_scan_bytes((char*) query, query_len, fts_lexer->yyscanner);
+ fts0b_scan_bytes((char*) query, (int) query_len, fts_lexer->yyscanner);
fts_lexer->scanner = fts_blexer;
/* FIXME: Debugging */
/* fts0bset_debug(1 , fts_lexer->yyscanner); */
} else {
fts0tlex_init(&fts_lexer->yyscanner);
- fts0t_scan_bytes((char*) query, query_len, fts_lexer->yyscanner);
+ fts0t_scan_bytes((char*) query, (int) query_len, fts_lexer->yyscanner);
fts_lexer->scanner = fts_tlexer;
}
@@ -269,7 +269,6 @@ fts_lexer_free(
/********************************************************************
Call the appropaiate scanner.*/
-
int
fts_lexer(
/*======*/
diff --git a/storage/innobase/fts/fts0plugin.cc b/storage/innobase/fts/fts0plugin.cc
new file mode 100644
index 00000000000..9a37ec52516
--- /dev/null
+++ b/storage/innobase/fts/fts0plugin.cc
@@ -0,0 +1,297 @@
+/*****************************************************************************
+
+Copyright (c) 2013, 2015, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/******************************************************************//**
+@file fts/fts0plugin.cc
+Full Text Search plugin support.
+
+Created 2013/06/04 Shaohua Wang
+***********************************************************************/
+
+#include "fts0ast.h"
+#include "fts0plugin.h"
+#include "fts0tokenize.h"
+
+#include "ft_global.h"
+
+/******************************************************************//**
+FTS default parser init
+@return 0 */
+static
+int
+fts_default_parser_init(
+/*====================*/
+ MYSQL_FTPARSER_PARAM *param) /*!< in: plugin parser param */
+{
+ return(0);
+}
+
+/******************************************************************//**
+FTS default parser deinit
+@return 0 */
+static
+int
+fts_default_parser_deinit(
+/*======================*/
+ MYSQL_FTPARSER_PARAM *param) /*!< in: plugin parser param */
+{
+ return(0);
+}
+
+/******************************************************************//**
+FTS default parser parse from ft_static.c in MYISAM.
+@return 0 if parse successfully, or return non-zero */
+static
+int
+fts_default_parser_parse(
+/*=====================*/
+ MYSQL_FTPARSER_PARAM *param) /*!< in: plugin parser param */
+{
+ return(param->mysql_parse(param, param->doc, param->length));
+}
+
+/* FTS default parser from ft_static.c in MYISAM. */
+struct st_mysql_ftparser fts_default_parser =
+{
+ MYSQL_FTPARSER_INTERFACE_VERSION,
+ fts_default_parser_parse,
+ fts_default_parser_init,
+ fts_default_parser_deinit
+};
+
+/******************************************************************//**
+Get a operator node from token boolean info
+@return node */
+static
+fts_ast_node_t*
+fts_query_get_oper_node(
+/*====================*/
+ MYSQL_FTPARSER_BOOLEAN_INFO* info, /*!< in: token info */
+ fts_ast_state_t* state) /*!< in/out: query parse state*/
+{
+ fts_ast_node_t* oper_node = NULL;
+
+ if (info->yesno > 0) {
+ oper_node = fts_ast_create_node_oper(state, FTS_EXIST);
+ } else if (info->yesno < 0) {
+ oper_node = fts_ast_create_node_oper(state, FTS_IGNORE);
+ } else if (info->weight_adjust > 0) {
+ oper_node = fts_ast_create_node_oper(state, FTS_INCR_RATING);
+ } else if (info->weight_adjust < 0) {
+ oper_node = fts_ast_create_node_oper(state, FTS_DECR_RATING);
+ } else if (info->wasign > 0) {
+ oper_node = fts_ast_create_node_oper(state, FTS_NEGATE);
+ }
+
+ return(oper_node);
+}
+
+/******************************************************************//**
+FTS plugin parser 'myql_add_word' callback function for query parse.
+Refer to 'st_mysql_ftparser_param' for more detail.
+Note:
+a. Parse logic refers to 'ftb_query_add_word' from ft_boolean_search.c in MYISAM;
+b. Parse node or tree refers to fts0pars.y.
+@return 0 if add successfully, or return non-zero. */
+static
+int
+fts_query_add_word_for_parser(
+/*==========================*/
+ MYSQL_FTPARSER_PARAM* param, /*!< in: parser param */
+ const char* word, /*!< in: token */
+ int word_len, /*!< in: token length */
+ MYSQL_FTPARSER_BOOLEAN_INFO* info) /*!< in: token info */
+{
+ fts_ast_state_t* state =
+ static_cast<fts_ast_state_t*>(param->mysql_ftparam);
+ fts_ast_node_t* cur_node = state->cur_node;
+ fts_ast_node_t* oper_node = NULL;
+ fts_ast_node_t* term_node = NULL;
+ fts_ast_node_t* node = NULL;
+
+ switch (info->type) {
+ case FT_TOKEN_STOPWORD:
+ /* We only handler stopword in phrase */
+ if (cur_node->type != FTS_AST_PARSER_PHRASE_LIST) {
+ break;
+ }
+ /* fall through */
+
+ case FT_TOKEN_WORD:
+ term_node = fts_ast_create_node_term_for_parser(
+ state, word, word_len);
+
+ if (info->trunc) {
+ fts_ast_term_set_wildcard(term_node);
+ }
+
+ if (cur_node->type == FTS_AST_PARSER_PHRASE_LIST) {
+ /* Ignore operator inside phrase */
+ fts_ast_add_node(cur_node, term_node);
+ } else {
+ ut_ad(cur_node->type == FTS_AST_LIST
+ || cur_node->type == FTS_AST_SUBEXP_LIST);
+ oper_node = fts_query_get_oper_node(info, state);
+
+ if (oper_node) {
+ node = fts_ast_create_node_list(state, oper_node);
+ fts_ast_add_node(node, term_node);
+ fts_ast_add_node(cur_node, node);
+ } else {
+ fts_ast_add_node(cur_node, term_node);
+ }
+ }
+
+ break;
+
+ case FT_TOKEN_LEFT_PAREN:
+ /* Check parse error */
+ if (cur_node->type != FTS_AST_LIST
+ && cur_node->type != FTS_AST_SUBEXP_LIST) {
+ return(1);
+ }
+
+ /* Set operator */
+ oper_node = fts_query_get_oper_node(info, state);
+ if (oper_node != NULL) {
+ node = fts_ast_create_node_list(state, oper_node);
+ fts_ast_add_node(cur_node, node);
+ node->go_up = true;
+ node->up_node = cur_node;
+ cur_node = node;
+ }
+
+ if (info->quot) {
+ /* Phrase node */
+ node = fts_ast_create_node_phrase_list(state);
+ } else {
+ /* Subexp list node */
+ node = fts_ast_create_node_subexp_list(state, NULL);
+ }
+
+ fts_ast_add_node(cur_node, node);
+
+ node->up_node = cur_node;
+ state->cur_node = node;
+ state->depth += 1;
+
+ break;
+
+ case FT_TOKEN_RIGHT_PAREN:
+ info->quot = 0;
+
+ if (cur_node->up_node != NULL) {
+ cur_node = cur_node->up_node;
+
+ if (cur_node->go_up) {
+ ut_a(cur_node->up_node
+ && !(cur_node->up_node->go_up));
+ cur_node = cur_node->up_node;
+ }
+ }
+
+ state->cur_node = cur_node;
+
+ if (state->depth > 0) {
+ state->depth--;
+ } else {
+ /* Parentheses mismatch */
+ return(1);
+ }
+
+ break;
+
+ case FT_TOKEN_EOF:
+ default:
+ break;
+ }
+
+ return(0);
+}
+
+/******************************************************************//**
+FTS plugin parser 'myql_parser' callback function for query parse.
+Refer to 'st_mysql_ftparser_param' for more detail.
+@return 0 if parse successfully */
+static
+int
+fts_parse_query_internal(
+/*=====================*/
+ MYSQL_FTPARSER_PARAM* param, /*!< in: parser param */
+ const char* query, /*!< in: query string */
+ int len) /*!< in: query length */
+{
+ MYSQL_FTPARSER_BOOLEAN_INFO info;
+ const CHARSET_INFO* cs = param->cs;
+ uchar** start = (uchar**)(&query);
+ uchar* end = (uchar*)(query + len);
+ FT_WORD w = {NULL, 0, 0};
+
+ info.prev = ' ';
+ info.quot = 0;
+ memset(&w, 0, sizeof(w));
+ /* Note: We don't handle simple parser mode here,
+ but user supplied plugin parser should handler it. */
+ while (fts_get_word(cs, start, end, &w, &info)) {
+ int ret = param->mysql_add_word(
+ param,
+ reinterpret_cast<char*>(w.pos),
+ w.len, &info);
+ if (ret) {
+ return(ret);
+ }
+ }
+
+ return(0);
+}
+
+/******************************************************************//**
+fts parse query by plugin parser.
+@return 0 if parse successfully, or return non-zero. */
+int
+fts_parse_by_parser(
+/*================*/
+ ibool mode, /*!< in: parse boolean mode */
+ uchar* query_str, /*!< in: query string */
+ ulint query_len, /*!< in: query string length */
+ st_mysql_ftparser* parser, /*!< in: fts plugin parser */
+ fts_ast_state_t* state) /*!< in/out: parser state */
+{
+ MYSQL_FTPARSER_PARAM param;
+ int ret;
+
+ ut_ad(parser);
+
+ /* Initial parser param */
+ param.mysql_parse = fts_parse_query_internal;
+ param.mysql_add_word = fts_query_add_word_for_parser;
+ param.mysql_ftparam = static_cast<void*>(state);
+ param.cs = state->charset;
+ param.doc = reinterpret_cast<char*>(query_str);
+ param.length = static_cast<int>(query_len);
+ param.flags = 0;
+ param.mode = mode ?
+ MYSQL_FTPARSER_FULL_BOOLEAN_INFO :
+ MYSQL_FTPARSER_SIMPLE_MODE;
+
+ PARSER_INIT(parser, &param);
+ ret = parser->parse(&param);
+ PARSER_DEINIT(parser, &param);
+
+ return(ret | state->depth);
+}
diff --git a/storage/innobase/fts/fts0que.cc b/storage/innobase/fts/fts0que.cc
index ce6151e63b8..6baec777270 100644
--- a/storage/innobase/fts/fts0que.cc
+++ b/storage/innobase/fts/fts0que.cc
@@ -25,7 +25,7 @@ Created 2007/03/27 Sunny Bains
Completed 2011/7/10 Sunny and Jimmy Yang
*******************************************************/
-#include "dict0dict.h" /* dict_table_get_n_rows() */
+#include "dict0dict.h"
#include "ut0rbt.h"
#include "row0sel.h"
#include "fts0fts.h"
@@ -33,14 +33,9 @@ Completed 2011/7/10 Sunny and Jimmy Yang
#include "fts0ast.h"
#include "fts0pars.h"
#include "fts0types.h"
-#include "ha_prototypes.h"
-#include <ctype.h>
-
-#ifndef UNIV_NONINL
-#include "fts0types.ic"
-#include "fts0vlc.ic"
-#endif
+#include "fts0plugin.h"
+#include <iomanip>
#include <vector>
#define FTS_ELEM(t, n, i, j) (t[(i) * n + (j)])
@@ -60,7 +55,7 @@ Completed 2011/7/10 Sunny and Jimmy Yang
// FIXME: Need to have a generic iterator that traverses the ilist.
-typedef std::vector<fts_string_t> word_vector_t;
+typedef std::vector<fts_string_t, ut_allocator<fts_string_t> > word_vector_t;
struct fts_word_freq_t;
@@ -72,6 +67,7 @@ struct fts_query_t {
dict_index_t* index; /*!< The FTS index to search */
/*!< FTS auxiliary common table def */
+
fts_table_t fts_common_table;
fts_table_t fts_index_table;/*!< FTS auxiliary index table def */
@@ -145,7 +141,11 @@ struct fts_query_t {
document, its elements are of type
fts_word_freq_t */
+ ib_rbt_t* wildcard_words; /*!< words with wildcard */
+
bool multi_exist; /*!< multiple FTS_EXIST oper */
+
+ st_mysql_ftparser* parser; /*!< fts plugin parser */
};
/** For phrase matching, first we collect the documents and the positions
@@ -179,7 +179,7 @@ struct fts_select_t {
the FTS index */
};
-typedef std::vector<ulint> pos_vector_t;
+typedef std::vector<ulint, ut_allocator<ulint> > pos_vector_t;
/** structure defines a set of ranges for original documents, each of which
has a minimum position and maximum position. Text in such range should
@@ -198,22 +198,54 @@ struct fts_proximity_t {
/** The match positions and tokesn to match */
struct fts_phrase_t {
- ibool found; /*!< Match result */
-
- const fts_match_t*
- match; /*!< Positions within text */
-
- const ib_vector_t*
- tokens; /*!< Tokens to match */
-
- ulint distance; /*!< For matching on proximity
- distance. Can be 0 for exact match */
- CHARSET_INFO* charset; /*!< Phrase match charset */
- mem_heap_t* heap; /*!< Heap for word processing */
- ulint zip_size; /*!< row zip size */
- fts_proximity_t*proximity_pos; /*!< position info for proximity
- search verification. Records the min
- and max position of words matched */
+ fts_phrase_t(const dict_table_t* table)
+ :
+ found(false),
+ match(NULL),
+ tokens(NULL),
+ distance(0),
+ charset(NULL),
+ heap(NULL),
+ page_size(dict_table_page_size(table)),
+ proximity_pos(NULL),
+ parser(NULL)
+ {
+ }
+
+ /** Match result */
+ ibool found;
+
+ /** Positions within text */
+ const fts_match_t* match;
+
+ /** Tokens to match */
+ const ib_vector_t* tokens;
+
+ /** For matching on proximity distance. Can be 0 for exact match */
+ ulint distance;
+
+ /** Phrase match charset */
+ CHARSET_INFO* charset;
+
+ /** Heap for word processing */
+ mem_heap_t* heap;
+
+ /** Row page size */
+ const page_size_t page_size;
+
+ /** Position info for proximity search verification. Records the
+ min and max position of words matched */
+ fts_proximity_t* proximity_pos;
+
+ /** FTS plugin parser */
+ st_mysql_ftparser* parser;
+};
+
+/** Paramter passed to fts phrase match by parser */
+struct fts_phrase_param_t {
+ fts_phrase_t* phrase; /*!< Match phrase instance */
+ ulint token_index; /*!< Index of token to match next */
+ mem_heap_t* heap; /*!< Heap for word processing */
};
/** For storing the frequncy of a word/term in a document */
@@ -261,6 +293,20 @@ fts_query_filter_doc_ids(
ibool calc_doc_count);/*!< in: whether to remember doc
count */
+/** Process (nested) sub-expression, create a new result set to store the
+sub-expression result by processing nodes under current sub-expression
+list. Merge the sub-expression result with that of parent expression list.
+@param[in,out] node current root node
+@param[in,out] visitor callback function
+@param[in,out] arg argument for callback
+@return DB_SUCCESS if all go well */
+static
+dberr_t
+fts_ast_visit_sub_exp(
+ fts_ast_node_t* node,
+ fts_ast_callback visitor,
+ void* arg);
+
#if 0
/*****************************************************************//***
Find a doc_id in a word's ilist.
@@ -396,7 +442,7 @@ fts_query_lcs(
ulint r = len_p1;
ulint c = len_p2;
ulint size = (r + 1) * (c + 1) * sizeof(ulint);
- ulint* table = (ulint*) ut_malloc(size);
+ ulint* table = (ulint*) ut_malloc_nokey(size);
/* Traverse the table backwards, from the last row to the first and
also from the last column to the first. We compute the smaller
@@ -432,7 +478,7 @@ fts_query_lcs(
len = FTS_ELEM(table, c, 0, 0);
fts_print_lcs_table(table, r, c);
- printf("\nLen=%lu\n", len);
+ printf("\nLen=" ULINTPF "\n", len);
ut_free(table);
@@ -443,7 +489,7 @@ fts_query_lcs(
/*******************************************************************//**
Compare two fts_ranking_t instance on their rank value and doc ids in
descending order on the rank and ascending order on doc id.
-@return 0 if p1 == p2, < 0 if p1 < p2, > 0 if p1 > p2 */
+@return 0 if p1 == p2, < 0 if p1 < p2, > 0 if p1 > p2 */
static
int
fts_query_compare_rank(
@@ -470,67 +516,6 @@ fts_query_compare_rank(
return(1);
}
-#ifdef FTS_UTF8_DEBUG
-/*******************************************************************//**
-Convert string to lowercase.
-@return lower case string, callers responsibility to delete using
-ut_free() */
-static
-byte*
-fts_tolower(
-/*========*/
- const byte* src, /*!< in: src string */
- ulint len) /*!< in: src string length */
-{
- fts_string_t str;
- byte* lc_str = ut_malloc(len + 1);
-
- str.f_len = len;
- str.f_str = lc_str;
-
- memcpy(str.f_str, src, len);
-
- /* Make sure the last byte is NUL terminated */
- str.f_str[len] = '\0';
-
- fts_utf8_tolower(&str);
-
- return(lc_str);
-}
-
-/*******************************************************************//**
-Do a case insensitive search. Doesn't check for NUL byte end marker
-only relies on len. Convert str2 to lower case before comparing.
-@return 0 if p1 == p2, < 0 if p1 < p2, > 0 if p1 > p2 */
-static
-int
-fts_utf8_strcmp(
-/*============*/
- const fts_string_t*
- str1, /*!< in: should be lower case*/
-
- fts_string_t* str2) /*!< in: any case. We will use the length
- of this string during compare as it
- should be the min of the two strings */
-{
- byte b = str2->f_str[str2->f_len];
-
- ut_a(str2->f_len <= str1->f_len);
-
- /* We need to write a NUL byte at the end of the string because the
- string is converted to lowercase by a MySQL function which doesn't
- care about the length. */
- str2->f_str[str2->f_len] = 0;
-
- fts_utf8_tolower(str2);
-
- /* Restore the value we replaced above. */
- str2->f_str[str2->f_len] = b;
-
- return(memcmp(str1->f_str, str2->f_str, str2->f_len));
-}
-#endif
-
/*******************************************************************//**
Create words in ranking */
static
@@ -594,11 +579,7 @@ fts_ranking_words_add(
pos = rbt_size(query->word_map);
- new_word.f_str = static_cast<byte*>(mem_heap_alloc(query->heap,
- word->f_len + 1));
- memcpy(new_word.f_str, word->f_str, word->f_len);
- new_word.f_str[word->f_len] = 0;
- new_word.f_len = word->f_len;
+ fts_string_dup(&new_word, word, query->heap);
new_word.f_n_char = pos;
rbt_add_node(query->word_map, &parent, &new_word);
@@ -685,11 +666,7 @@ fts_query_add_word_freq(
memset(&word_freq, 0, sizeof(word_freq));
- word_freq.word.f_str = static_cast<byte*>(
- mem_heap_alloc(query->heap, word->f_len + 1));
- memcpy(word_freq.word.f_str, word->f_str, word->f_len);
- word_freq.word.f_str[word->f_len] = 0;
- word_freq.word.f_len = word->f_len;
+ fts_string_dup(&word_freq.word, word, query->heap);
word_freq.doc_count = 0;
@@ -1143,8 +1120,12 @@ fts_query_difference(
ut_a(query->oper == FTS_IGNORE);
#ifdef FTS_INTERNAL_DIAG_PRINT
- fprintf(stderr, "DIFFERENCE: Searching: '%.*s'\n",
- (int) token->f_len, token->f_str);
+ {
+ ib::info out;
+ out << "DIFFERENCE: Searching: '";
+ out.write(token->f_str, token->f_len);
+ out << "'";
+ }
#endif
if (query->doc_ids) {
@@ -1234,8 +1215,12 @@ fts_query_intersect(
ut_a(query->oper == FTS_EXIST);
#ifdef FTS_INTERNAL_DIAG_PRINT
- fprintf(stderr, "INTERSECT: Searching: '%.*s'\n",
- (int) token->f_len, token->f_str);
+ {
+ ib::info out;
+ out << "INTERSECT: Searching: '";
+ out.write(token->f_str, token->f_len);
+ out << "'";
+ }
#endif
/* If the words set is not empty and multi exist is true,
@@ -1416,8 +1401,12 @@ fts_query_union(
query->oper == FTS_NEGATE || query->oper == FTS_INCR_RATING);
#ifdef FTS_INTERNAL_DIAG_PRINT
- fprintf(stderr, "UNION: Searching: '%.*s'\n",
- (int) token->f_len, token->f_str);
+ {
+ ib::info out;
+ out << "UNION: Searching: '";
+ out.write(token->f_str, token->f_len);
+ out << "'";
+ }
#endif
if (query->doc_ids) {
@@ -1428,10 +1417,6 @@ fts_query_union(
return(query->error);
}
- /* Single '%' would confuse parser in pars_like_rebind(). In addition,
- our wildcard search only supports prefix search */
- ut_ad(*token->f_str != '%');
-
fts_query_cache(query, token);
/* Setup the callback args for filtering and
@@ -1627,18 +1612,17 @@ fts_query_match_phrase_terms(
const fts_string_t* token;
int result;
ulint ret;
- ulint offset;
ret = innobase_mysql_fts_get_token(
- phrase->charset, ptr, (byte*) end,
- &match, &offset);
+ phrase->charset, ptr,
+ const_cast<byte*>(end), &match);
if (match.f_len > 0) {
/* Get next token to match. */
token = static_cast<const fts_string_t*>(
ib_vector_get_const(tokens, i));
- fts_utf8_string_dup(&cmp_str, &match, heap);
+ fts_string_dup(&cmp_str, &match, heap);
result = innobase_fts_text_case_cmp(
phrase->charset, token, &cmp_str);
@@ -1719,12 +1703,11 @@ fts_proximity_is_word_in_range(
while (cur_pos <= proximity_pos->max_pos[i]) {
ulint len;
fts_string_t str;
- ulint offset = 0;
len = innobase_mysql_fts_get_token(
phrase->charset,
start + cur_pos,
- start + total_len, &str, &offset);
+ start + total_len, &str);
if (len == 0) {
break;
@@ -1754,6 +1737,103 @@ fts_proximity_is_word_in_range(
}
/*****************************************************************//**
+FTS plugin parser 'myql_add_word' callback function for phrase match
+Refer to 'st_mysql_ftparser_param' for more detail.
+@return 0 if match, or return non-zero */
+static
+int
+fts_query_match_phrase_add_word_for_parser(
+/*=======================================*/
+ MYSQL_FTPARSER_PARAM* param, /*!< in: parser param */
+ const char* word, /*!< in: token */
+ int word_len, /*!< in: token length */
+ MYSQL_FTPARSER_BOOLEAN_INFO* info) /*!< in: token info */
+{
+ fts_phrase_param_t* phrase_param;
+ fts_phrase_t* phrase;
+ const ib_vector_t* tokens;
+ fts_string_t match;
+ fts_string_t cmp_str;
+ const fts_string_t* token;
+ int result;
+ mem_heap_t* heap;
+
+ phrase_param = static_cast<fts_phrase_param_t*>(param->mysql_ftparam);
+ heap = phrase_param->heap;
+ phrase = phrase_param->phrase;
+ tokens = phrase->tokens;
+
+ /* In case plugin parser doesn't check return value */
+ if (phrase_param->token_index == ib_vector_size(tokens)) {
+ return(1);
+ }
+
+ match.f_str = (uchar *)(word);
+ match.f_len = word_len;
+ match.f_n_char = fts_get_token_size(phrase->charset, word, word_len);
+
+ if (match.f_len > 0) {
+ /* Get next token to match. */
+ ut_a(phrase_param->token_index < ib_vector_size(tokens));
+ token = static_cast<const fts_string_t*>(
+ ib_vector_get_const(tokens, phrase_param->token_index));
+
+ fts_string_dup(&cmp_str, &match, heap);
+
+ result = innobase_fts_text_case_cmp(
+ phrase->charset, token, &cmp_str);
+
+ if (result == 0) {
+ phrase_param->token_index++;
+ } else {
+ return(1);
+ }
+ }
+
+ /* Can't be greater than the number of elements. */
+ ut_a(phrase_param->token_index <= ib_vector_size(tokens));
+
+ /* This is the case for multiple words. */
+ if (phrase_param->token_index == ib_vector_size(tokens)) {
+ phrase->found = TRUE;
+ }
+
+ return(static_cast<int>(phrase->found));
+}
+
+/*****************************************************************//**
+Check whether the terms in the phrase match the text.
+@return TRUE if matched else FALSE */
+static
+ibool
+fts_query_match_phrase_terms_by_parser(
+/*===================================*/
+ fts_phrase_param_t* phrase_param, /* in/out: phrase param */
+ st_mysql_ftparser* parser, /* in: plugin fts parser */
+ byte* text, /* in: text to check */
+ ulint len) /* in: text length */
+{
+ MYSQL_FTPARSER_PARAM param;
+
+ ut_a(parser);
+
+ /* Set paramters for param */
+ param.mysql_parse = fts_tokenize_document_internal;
+ param.mysql_add_word = fts_query_match_phrase_add_word_for_parser;
+ param.mysql_ftparam = phrase_param;
+ param.cs = phrase_param->phrase->charset;
+ param.doc = reinterpret_cast<char*>(text);
+ param.length = static_cast<int>(len);
+ param.mode= MYSQL_FTPARSER_WITH_STOPWORDS;
+
+ PARSER_INIT(parser, &param);
+ parser->parse(&param);
+ PARSER_DEINIT(parser, &param);
+
+ return(phrase_param->phrase->found);
+}
+
+/*****************************************************************//**
Callback function to fetch and search the document.
@return TRUE if matched else FALSE */
static
@@ -1787,11 +1867,7 @@ fts_query_match_phrase(
for (i = phrase->match->start; i < ib_vector_size(positions); ++i) {
ulint pos;
- fts_string_t match;
- fts_string_t cmp_str;
byte* ptr = start;
- ulint ret;
- ulint offset;
pos = *(ulint*) ib_vector_get_const(positions, i);
@@ -1808,39 +1884,60 @@ fts_query_match_phrase(
searched field to adjust the doc position when search
phrases. */
pos -= prev_len;
- ptr = match.f_str = start + pos;
+ ptr = start + pos;
/* Within limits ? */
if (ptr >= end) {
break;
}
- ret = innobase_mysql_fts_get_token(
- phrase->charset, start + pos, (byte*) end,
- &match, &offset);
+ if (phrase->parser) {
+ fts_phrase_param_t phrase_param;
- if (match.f_len == 0) {
- break;
- }
+ phrase_param.phrase = phrase;
+ phrase_param.token_index = 0;
+ phrase_param.heap = heap;
- fts_utf8_string_dup(&cmp_str, &match, heap);
+ if (fts_query_match_phrase_terms_by_parser(
+ &phrase_param,
+ phrase->parser,
+ ptr,
+ (end - ptr))) {
+ break;
+ }
+ } else {
+ fts_string_t match;
+ fts_string_t cmp_str;
+ ulint ret;
- if (innobase_fts_text_case_cmp(
- phrase->charset, first, &cmp_str) == 0) {
+ match.f_str = ptr;
+ ret = innobase_mysql_fts_get_token(
+ phrase->charset, start + pos,
+ const_cast<byte*>(end), &match);
- /* This is the case for the single word
- in the phrase. */
- if (ib_vector_size(phrase->tokens) == 1) {
- phrase->found = TRUE;
+ if (match.f_len == 0) {
break;
}
- ptr += ret;
+ fts_string_dup(&cmp_str, &match, heap);
- /* Match the remaining terms in the phrase. */
- if (fts_query_match_phrase_terms(phrase, &ptr,
- end, heap)) {
- break;
+ if (innobase_fts_text_case_cmp(
+ phrase->charset, first, &cmp_str) == 0) {
+
+ /* This is the case for the single word
+ in the phrase. */
+ if (ib_vector_size(phrase->tokens) == 1) {
+ phrase->found = TRUE;
+ break;
+ }
+
+ ptr += ret;
+
+ /* Match the remaining terms in the phrase. */
+ if (fts_query_match_phrase_terms(phrase, &ptr,
+ end, heap)) {
+ break;
+ }
}
}
}
@@ -1916,7 +2013,7 @@ fts_query_fetch_document(
if (dfield_is_ext(dfield)) {
data = btr_copy_externally_stored_field(
- &cur_len, data, phrase->zip_size,
+ &cur_len, data, phrase->page_size,
dfield_get_len(dfield), phrase->heap);
} else {
cur_len = dfield_get_len(dfield);
@@ -2074,9 +2171,9 @@ fts_query_find_term(
"DECLARE CURSOR c IS"
" SELECT doc_count, ilist\n"
" FROM $index_table_name\n"
- " WHERE word LIKE :word AND "
- " first_doc_id <= :min_doc_id AND "
- " last_doc_id >= :max_doc_id\n"
+ " WHERE word LIKE :word AND"
+ " first_doc_id <= :min_doc_id AND"
+ " last_doc_id >= :max_doc_id\n"
" ORDER BY first_doc_id;\n"
"BEGIN\n"
"\n"
@@ -2090,24 +2187,22 @@ fts_query_find_term(
"CLOSE c;");
}
- for(;;) {
+ for (;;) {
error = fts_eval_sql(trx, *graph);
if (error == DB_SUCCESS) {
break; /* Exit the loop. */
} else {
- ut_print_timestamp(stderr);
if (error == DB_LOCK_WAIT_TIMEOUT) {
- fprintf(stderr, " InnoDB: Warning: lock wait "
- "timeout reading FTS index. "
- "Retrying!\n");
+ ib::warn() << "lock wait timeout reading FTS"
+ " index. Retrying!";
trx->error_state = DB_SUCCESS;
} else {
- fprintf(stderr, " InnoDB: Error: %lu "
- "while reading FTS index.\n", error);
+ ib::error() << error
+ << " while reading FTS index.";
break; /* Exit the loop. */
}
@@ -2198,7 +2293,7 @@ fts_query_total_docs_containing_term(
"DECLARE CURSOR c IS"
" SELECT doc_count\n"
" FROM $index_table_name\n"
- " WHERE word = :word "
+ " WHERE word = :word"
" ORDER BY first_doc_id;\n"
"BEGIN\n"
"\n"
@@ -2211,24 +2306,22 @@ fts_query_total_docs_containing_term(
"END LOOP;\n"
"CLOSE c;");
- for(;;) {
+ for (;;) {
error = fts_eval_sql(trx, graph);
if (error == DB_SUCCESS) {
break; /* Exit the loop. */
} else {
- ut_print_timestamp(stderr);
if (error == DB_LOCK_WAIT_TIMEOUT) {
- fprintf(stderr, " InnoDB: Warning: lock wait "
- "timeout reading FTS index. "
- "Retrying!\n");
+ ib::warn() << "lock wait timeout reading FTS"
+ " index. Retrying!";
trx->error_state = DB_SUCCESS;
} else {
- fprintf(stderr, " InnoDB: Error: %lu "
- "while reading FTS index.\n", error);
+ ib::error() << error
+ << " while reading FTS index.";
break; /* Exit the loop. */
}
@@ -2283,8 +2376,8 @@ fts_query_terms_in_document(
"DECLARE CURSOR c IS"
" SELECT count\n"
" FROM $index_table_name\n"
- " WHERE doc_id = :doc_id "
- "BEGIN\n"
+ " WHERE doc_id = :doc_id"
+ " BEGIN\n"
"\n"
"OPEN c;\n"
"WHILE 1 = 1 LOOP\n"
@@ -2295,25 +2388,22 @@ fts_query_terms_in_document(
"END LOOP;\n"
"CLOSE c;");
- for(;;) {
+ for (;;) {
error = fts_eval_sql(trx, graph);
if (error == DB_SUCCESS) {
break; /* Exit the loop. */
} else {
- ut_print_timestamp(stderr);
if (error == DB_LOCK_WAIT_TIMEOUT) {
- fprintf(stderr, " InnoDB: Warning: lock wait "
- "timeout reading FTS doc id table. "
- "Retrying!\n");
+ ib::warn() << "lock wait timeout reading FTS"
+ " doc id table. Retrying!";
trx->error_state = DB_SUCCESS;
} else {
- fprintf(stderr, " InnoDB: Error: %lu "
- "while reading FTS doc id table.\n",
- error);
+ ib::error() << error << " while reading FTS"
+ " doc id table.";
break; /* Exit the loop. */
}
@@ -2329,7 +2419,8 @@ fts_query_terms_in_document(
/*****************************************************************//**
Retrieve the document and match the phrase tokens.
@return DB_SUCCESS or error code */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
+MY_ATTRIBUTE((nonnull(1,2,3,6), warn_unused_result))
+static
dberr_t
fts_query_match_document(
/*=====================*/
@@ -2337,20 +2428,18 @@ fts_query_match_document(
fts_get_doc_t* get_doc, /*!< in: table and prepared statements */
fts_match_t* match, /*!< in: doc id and positions */
ulint distance, /*!< in: proximity distance */
+ st_mysql_ftparser* parser, /*!< in: fts plugin parser */
ibool* found) /*!< out: TRUE if phrase found */
{
dberr_t error;
- fts_phrase_t phrase;
-
- memset(&phrase, 0x0, sizeof(phrase));
+ fts_phrase_t phrase(get_doc->index_cache->index->table);
phrase.match = match; /* Positions to match */
phrase.tokens = tokens; /* Tokens to match */
phrase.distance = distance;
phrase.charset = get_doc->index_cache->charset;
- phrase.zip_size = dict_table_zip_size(
- get_doc->index_cache->index->table);
phrase.heap = mem_heap_create(512);
+ phrase.parser = parser;
*found = phrase.found = FALSE;
@@ -2359,9 +2448,8 @@ fts_query_match_document(
fts_query_fetch_document, &phrase);
if (error != DB_SUCCESS) {
- ut_print_timestamp(stderr);
- fprintf(stderr, "InnoDB: Error: (%s) matching document.\n",
- ut_strerr(error));
+ ib::error() << "(" << ut_strerr(error)
+ << ") matching document.";
} else {
*found = phrase.found;
}
@@ -2384,23 +2472,21 @@ fts_query_is_in_proximity_range(
fts_proximity_t* qualified_pos) /*!< in: position info for
qualified ranges */
{
- fts_get_doc_t get_doc;
- fts_cache_t* cache = query->index->table->fts->cache;
- dberr_t err;
- fts_phrase_t phrase;
+ fts_get_doc_t get_doc;
+ fts_cache_t* cache = query->index->table->fts->cache;
+ dberr_t err;
memset(&get_doc, 0x0, sizeof(get_doc));
- memset(&phrase, 0x0, sizeof(phrase));
rw_lock_x_lock(&cache->lock);
get_doc.index_cache = fts_find_index_cache(cache, query->index);
rw_lock_x_unlock(&cache->lock);
ut_a(get_doc.index_cache != NULL);
+ fts_phrase_t phrase(get_doc.index_cache->index->table);
+
phrase.distance = query->distance;
phrase.charset = get_doc.index_cache->charset;
- phrase.zip_size = dict_table_zip_size(
- get_doc.index_cache->index->table);
phrase.heap = mem_heap_create(512);
phrase.proximity_pos = qualified_pos;
phrase.found = FALSE;
@@ -2410,9 +2496,8 @@ fts_query_is_in_proximity_range(
fts_query_fetch_document, &phrase);
if (err != DB_SUCCESS) {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Error: (%s) in verification phase of proximity "
- "search", ut_strerr(err));
+ ib::error() << "(" << ut_strerr(err) << ") in verification"
+ " phase of proximity search";
}
/* Free the prepared statement. */
@@ -2463,8 +2548,7 @@ fts_query_search_phrase(
rw_lock_x_unlock(&cache->lock);
#ifdef FTS_INTERNAL_DIAG_PRINT
- ut_print_timestamp(stderr);
- fprintf(stderr, " Start phrase search\n");
+ ib::info() << "Start phrase search";
#endif
/* Read the document from disk and do the actual
@@ -2482,8 +2566,8 @@ fts_query_search_phrase(
if (match->doc_id != 0) {
query->error = fts_query_match_document(
- orig_tokens, &get_doc,
- match, query->distance, &found);
+ orig_tokens, &get_doc, match,
+ query->distance, query->parser, &found);
if (query->error == DB_SUCCESS && found) {
ulint z;
@@ -2515,57 +2599,77 @@ func_exit:
return(query->error);
}
-/*****************************************************************//**
-Text/Phrase search.
-@return DB_SUCCESS or error code */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
-dberr_t
-fts_query_phrase_search(
-/*====================*/
- fts_query_t* query, /*!< in: query instance */
- const fts_string_t* phrase) /*!< in: token to search */
+/** Split the phrase into tokens
+@param[in,out] query query instance
+@param[in] node query node to search
+@param[in,out] tokens token vector
+@param[in,out] orig_tokens original node tokens include stopword
+@param[in,out] heap mem heap */
+static
+void
+fts_query_phrase_split(
+ fts_query_t* query,
+ const fts_ast_node_t* node,
+ ib_vector_t* tokens,
+ ib_vector_t* orig_tokens,
+ mem_heap_t* heap)
{
- ib_vector_t* tokens;
- ib_vector_t* orig_tokens;
- mem_heap_t* heap = mem_heap_create(sizeof(fts_string_t));
- ulint len = phrase->f_len;
+ fts_string_t phrase;
+ ulint len = 0;
ulint cur_pos = 0;
- ib_alloc_t* heap_alloc;
- ulint num_token;
- CHARSET_INFO* charset;
-
- charset = query->fts_index_table.charset;
-
- heap_alloc = ib_heap_allocator_create(heap);
-
- tokens = ib_vector_create(heap_alloc, sizeof(fts_string_t), 4);
- orig_tokens = ib_vector_create(heap_alloc, sizeof(fts_string_t), 4);
+ fts_ast_node_t* term_node = NULL;
- if (query->distance != ULINT_UNDEFINED && query->distance > 0) {
- query->flags = FTS_PROXIMITY;
+ if (node->type == FTS_AST_TEXT) {
+ phrase.f_str = node->text.ptr->str;
+ phrase.f_len = node->text.ptr->len;
+ len = phrase.f_len;
} else {
- query->flags = FTS_PHRASE;
+ ut_ad(node->type == FTS_AST_PARSER_PHRASE_LIST);
+ phrase.f_str = NULL;
+ phrase.f_len = 0;
+ term_node = node->list.head;
}
- /* Split the phrase into tokens. */
- while (cur_pos < len) {
+ while (true) {
fts_cache_t* cache = query->index->table->fts->cache;
- ib_rbt_bound_t parent;
- ulint offset;
ulint cur_len;
fts_string_t result_str;
- cur_len = innobase_mysql_fts_get_token(
- charset,
- reinterpret_cast<const byte*>(phrase->f_str) + cur_pos,
- reinterpret_cast<const byte*>(phrase->f_str) + len,
- &result_str, &offset);
+ if (node->type == FTS_AST_TEXT) {
+ if (cur_pos >= len) {
+ break;
+ }
- if (cur_len == 0) {
- break;
- }
+ cur_len = innobase_mysql_fts_get_token(
+ query->fts_index_table.charset,
+ reinterpret_cast<const byte*>(phrase.f_str)
+ + cur_pos,
+ reinterpret_cast<const byte*>(phrase.f_str)
+ + len,
+ &result_str);
+
+ if (cur_len == 0) {
+ break;
+ }
+
+ cur_pos += cur_len;
+ } else {
+ ut_ad(node->type == FTS_AST_PARSER_PHRASE_LIST);
+ /* Term node in parser phrase list */
+ if (term_node == NULL) {
+ break;
+ }
+
+ ut_a(term_node->type == FTS_AST_TERM);
+ result_str.f_str = term_node->term.ptr->str;
+ result_str.f_len = term_node->term.ptr->len;
+ result_str.f_n_char = fts_get_token_size(
+ query->fts_index_table.charset,
+ reinterpret_cast<char*>(result_str.f_str),
+ result_str.f_len);
- cur_pos += cur_len;
+ term_node = term_node->next;
+ }
if (result_str.f_n_char == 0) {
continue;
@@ -2573,19 +2677,12 @@ fts_query_phrase_search(
fts_string_t* token = static_cast<fts_string_t*>(
ib_vector_push(tokens, NULL));
+ fts_string_dup(token, &result_str, heap);
- token->f_str = static_cast<byte*>(
- mem_heap_alloc(heap, result_str.f_len + 1));
- ut_memcpy(token->f_str, result_str.f_str, result_str.f_len);
-
- token->f_len = result_str.f_len;
- token->f_str[token->f_len] = 0;
-
- if (cache->stopword_info.cached_stopword
- && rbt_search(cache->stopword_info.cached_stopword,
- &parent, token) != 0
- && result_str.f_n_char >= fts_min_token_size
- && result_str.f_n_char <= fts_max_token_size) {
+ if (fts_check_token(
+ &result_str,
+ cache->stopword_info.cached_stopword,
+ query->fts_index_table.charset)) {
/* Add the word to the RB tree so that we can
calculate it's frequencey within a document. */
fts_query_add_word_freq(query, token);
@@ -2604,6 +2701,37 @@ fts_query_phrase_search(
orig_token->f_len = token->f_len;
}
}
+}
+
+/*****************************************************************//**
+Text/Phrase search.
+@return DB_SUCCESS or error code */
+static MY_ATTRIBUTE((warn_unused_result))
+dberr_t
+fts_query_phrase_search(
+/*====================*/
+ fts_query_t* query, /*!< in: query instance */
+ const fts_ast_node_t* node) /*!< in: node to search */
+{
+ ib_vector_t* tokens;
+ ib_vector_t* orig_tokens;
+ mem_heap_t* heap = mem_heap_create(sizeof(fts_string_t));
+ ib_alloc_t* heap_alloc;
+ ulint num_token;
+
+ heap_alloc = ib_heap_allocator_create(heap);
+
+ tokens = ib_vector_create(heap_alloc, sizeof(fts_string_t), 4);
+ orig_tokens = ib_vector_create(heap_alloc, sizeof(fts_string_t), 4);
+
+ if (query->distance != ULINT_UNDEFINED && query->distance > 0) {
+ query->flags = FTS_PROXIMITY;
+ } else {
+ query->flags = FTS_PHRASE;
+ }
+
+ /* Split the phrase into tokens. */
+ fts_query_phrase_split(query, node, tokens, orig_tokens, heap);
num_token = ib_vector_size(tokens);
if (num_token > MAX_PROXIMITY_ITEM) {
@@ -2801,7 +2929,7 @@ fts_query_execute(
/*****************************************************************//**
Create a wildcard string. It's the responsibility of the caller to
-free the byte* pointer. It's allocated using ut_malloc().
+free the byte* pointer. It's allocated using ut_malloc_nokey().
@return ptr to allocated memory */
static
byte*
@@ -2822,7 +2950,7 @@ fts_query_get_token(
if (node->term.wildcard) {
- token->f_str = static_cast<byte*>(ut_malloc(str_len + 2));
+ token->f_str = static_cast<byte*>(ut_malloc_nokey(str_len + 2));
token->f_len = str_len + 1;
memcpy(token->f_str, node->term.ptr->str, str_len);
@@ -2860,8 +2988,7 @@ fts_query_visitor(
switch (node->type) {
case FTS_AST_TEXT:
- token.f_str = node->text.ptr->str;
- token.f_len = node->text.ptr->len;
+ case FTS_AST_PARSER_PHRASE_LIST:
if (query->oper == FTS_EXIST) {
ut_ad(query->intersection == NULL);
@@ -2877,7 +3004,7 @@ fts_query_visitor(
/* Force collection of doc ids and the positions. */
query->collect_positions = TRUE;
- query->error = fts_query_phrase_search(query, &token);
+ query->error = fts_query_phrase_search(query, node);
query->collect_positions = FALSE;
@@ -2893,6 +3020,20 @@ fts_query_visitor(
token.f_str = node->term.ptr->str;
token.f_len = node->term.ptr->len;
+ /* Collect wildcard words for QUERY EXPANSION. */
+ if (node->term.wildcard && query->wildcard_words != NULL) {
+ ib_rbt_bound_t parent;
+
+ if (rbt_search(query->wildcard_words, &parent, &token)
+ != 0) {
+ fts_string_t word;
+
+ fts_string_dup(&word, &token, query->heap);
+ rbt_add_node(query->wildcard_words, &parent,
+ &word);
+ }
+ }
+
/* Add the word to our RB tree that will be used to
calculate this terms per document frequency. */
fts_query_add_word_freq(query, &token);
@@ -2903,6 +3044,7 @@ fts_query_visitor(
if (ptr) {
ut_free(ptr);
}
+
break;
case FTS_AST_SUBEXP_LIST:
@@ -2920,18 +3062,19 @@ fts_query_visitor(
DBUG_RETURN(query->error);
}
-/*****************************************************************//**
-Process (nested) sub-expression, create a new result set to store the
+/** Process (nested) sub-expression, create a new result set to store the
sub-expression result by processing nodes under current sub-expression
list. Merge the sub-expression result with that of parent expression list.
-@return DB_SUCCESS if all well */
-UNIV_INTERN
+@param[in,out] node current root node
+@param[in,out] visitor callback function
+@param[in,out] arg argument for callback
+@return DB_SUCCESS if all go well */
+static
dberr_t
fts_ast_visit_sub_exp(
-/*==================*/
- fts_ast_node_t* node, /*!< in,out: current root node */
- fts_ast_callback visitor, /*!< in: callback function */
- void* arg) /*!< in,out: arg for callback */
+ fts_ast_node_t* node,
+ fts_ast_callback visitor,
+ void* arg)
{
fts_ast_oper_t cur_oper;
fts_query_t* query = static_cast<fts_query_t*>(arg);
@@ -3194,8 +3337,9 @@ fts_query_read_node(
byte buf[FTS_MAX_WORD_LEN + 1];
dberr_t error = DB_SUCCESS;
- ut_a(query->cur_node->type == FTS_AST_TERM ||
- query->cur_node->type == FTS_AST_TEXT);
+ ut_a(query->cur_node->type == FTS_AST_TERM
+ || query->cur_node->type == FTS_AST_TEXT
+ || query->cur_node->type == FTS_AST_PARSER_PHRASE_LIST);
memset(&node, 0, sizeof(node));
term.f_str = buf;
@@ -3205,6 +3349,7 @@ fts_query_read_node(
to assign the frequency on search string behalf. */
if (query->cur_node->type == FTS_AST_TERM
&& query->cur_node->term.wildcard) {
+
term.f_len = query->cur_node->term.ptr->len;
ut_ad(FTS_MAX_WORD_LEN >= term.f_len);
memcpy(term.f_str, query->cur_node->term.ptr->str, term.f_len);
@@ -3358,11 +3503,11 @@ fts_query_calculate_idf(
}
if (fts_enable_diag_print) {
- fprintf(stderr,"'%s' -> " UINT64PF "/" UINT64PF
- " %6.5lf\n",
- word_freq->word.f_str,
- query->total_docs, word_freq->doc_count,
- word_freq->idf);
+ ib::info() << "'" << word_freq->word.f_str << "' -> "
+ << query->total_docs << "/"
+ << word_freq->doc_count << " "
+ << std::setw(6) << std::setprecision(5)
+ << word_freq->idf;
}
}
}
@@ -3491,9 +3636,8 @@ fts_query_prepare_result(
DBUG_ENTER("fts_query_prepare_result");
if (result == NULL) {
- result = static_cast<fts_result_t*>(ut_malloc(sizeof(*result)));
-
- memset(result, 0x0, sizeof(*result));
+ result = static_cast<fts_result_t*>(
+ ut_zalloc_nokey(sizeof(*result)));
result->rankings_by_id = rbt_create(
sizeof(fts_ranking_t), fts_ranking_doc_id_cmp);
@@ -3619,8 +3763,8 @@ fts_query_get_result(
result = fts_query_prepare_result(query, result);
} else {
/* Create an empty result instance. */
- result = static_cast<fts_result_t*>(ut_malloc(sizeof(*result)));
- memset(result, 0, sizeof(*result));
+ result = static_cast<fts_result_t*>(
+ ut_zalloc_nokey(sizeof(*result)));
}
DBUG_RETURN(result);
@@ -3675,12 +3819,18 @@ fts_query_free(
rbt_free(query->word_freqs);
}
+ if (query->wildcard_words != NULL) {
+ rbt_free(query->wildcard_words);
+ }
+
+ ut_a(!query->intersection);
+
if (query->word_map) {
rbt_free(query->word_map);
}
- if (query->word_vector) {
- delete query->word_vector;
+ if (query->word_vector != NULL) {
+ UT_DELETE(query->word_vector);
}
if (query->heap) {
@@ -3691,7 +3841,8 @@ fts_query_free(
}
/*****************************************************************//**
-Parse the query using flex/bison. */
+Parse the query using flex/bison or plugin parser.
+@return parse tree node. */
static
fts_ast_node_t*
fts_query_parse(
@@ -3707,12 +3858,24 @@ fts_query_parse(
memset(&state, 0x0, sizeof(state));
- /* Setup the scanner to use, this depends on the mode flag. */
- state.lexer = fts_lexer_create(mode, query_str, query_len);
state.charset = query->fts_index_table.charset;
- error = fts_parse(&state);
- fts_lexer_free(state.lexer);
- state.lexer = NULL;
+
+ DBUG_EXECUTE_IF("fts_instrument_query_disable_parser",
+ query->parser = NULL;);
+
+ if (query->parser) {
+ state.root = state.cur_node =
+ fts_ast_create_node_list(&state, NULL);
+ error = fts_parse_by_parser(mode, query_str, query_len,
+ query->parser, &state);
+ } else {
+ /* Setup the scanner to use, this depends on the mode flag. */
+ state.lexer = fts_lexer_create(mode, query_str, query_len);
+ state.charset = query->fts_index_table.charset;
+ error = fts_parse(&state);
+ fts_lexer_free(state.lexer);
+ state.lexer = NULL;
+ }
/* Error during parsing ? */
if (error) {
@@ -3720,6 +3883,10 @@ fts_query_parse(
fts_ast_state_free(&state);
} else {
query->root = state.root;
+
+ if (fts_enable_diag_print && query->root != NULL) {
+ fts_ast_node_print(query->root);
+ }
}
DBUG_RETURN(state.root);
@@ -3749,117 +3916,27 @@ fts_query_can_optimize(
}
}
-/*******************************************************************//**
-Pre-process the query string
-1) make it lower case
-2) in boolean mode, if there is '-' or '+' that is immediately proceeded
-and followed by valid word, make it a space
-@return the processed string */
-static
-byte*
-fts_query_str_preprocess(
-/*=====================*/
- const byte* query_str, /*!< in: FTS query */
- ulint query_len, /*!< in: FTS query string len */
- ulint *result_len, /*!< out: result string length */
- CHARSET_INFO* charset, /*!< in: string charset */
- bool boolean_mode) /*!< in: is boolean mode */
-{
- ulint cur_pos = 0;
- ulint str_len;
- byte* str_ptr;
- bool in_phrase = false;
-
- /* Convert the query string to lower case before parsing. We own
- the ut_malloc'ed result and so remember to free it before return. */
-
- str_len = query_len * charset->casedn_multiply + 1;
- str_ptr = static_cast<byte*>(ut_malloc(str_len));
-
- /* For binary collations, a case sensitive search is
- performed. Hence don't convert to lower case. */
- if (my_binary_compare(charset)) {
- memcpy(str_ptr, query_str, query_len);
- str_ptr[query_len]= 0;
- *result_len= query_len;
- } else {
- *result_len = innobase_fts_casedn_str(
- charset, const_cast<char*>
- (reinterpret_cast<const char*>( query_str)),
- query_len,
- reinterpret_cast<char*>(str_ptr), str_len);
- }
-
- ut_ad(*result_len < str_len);
-
- str_ptr[*result_len] = 0;
-
- /* If it is boolean mode, no need to check for '-/+' */
- if (!boolean_mode) {
- return(str_ptr);
- }
-
- /* Otherwise, we travese the string to find any '-/+' that are
- immediately proceeded and followed by valid search word.
- NOTE: we should not do so for CJK languages, this should
- be taken care of in our CJK implementation */
- while (cur_pos < *result_len) {
- fts_string_t str;
- ulint offset;
- ulint cur_len;
-
- cur_len = innobase_mysql_fts_get_token(
- charset, str_ptr + cur_pos, str_ptr + *result_len,
- &str, &offset);
-
- if (cur_len == 0 || str.f_str == NULL) {
- /* No valid word found */
- break;
- }
-
- /* Check if we are in a phrase, if so, no need to do
- replacement of '-/+'. */
- for (byte* ptr = str_ptr + cur_pos; ptr < str.f_str; ptr++) {
- if ((char) (*ptr) == '"' ) {
- in_phrase = !in_phrase;
- }
- }
-
- /* Find those are not leading '-/+' and also not in a phrase */
- if (cur_pos > 0 && str.f_str - str_ptr - cur_pos == 1
- && !in_phrase) {
- char* last_op = reinterpret_cast<char*>(
- str_ptr + cur_pos);
-
- if (*last_op == '-' || *last_op == '+') {
- *last_op = ' ';
- }
- }
-
- cur_pos += cur_len;
- }
-
- return(str_ptr);
-}
-
-/*******************************************************************//**
-FTS Query entry point.
+/** FTS Query entry point.
+@param[in] trx transaction
+@param[in] index fts index to search
+@param[in] flags FTS search mode
+@param[in] query_str FTS query
+@param[in] query_len FTS query string len in bytes
+@param[in,out] result result doc ids
@return DB_SUCCESS if successful otherwise error code */
-UNIV_INTERN
dberr_t
fts_query(
-/*======*/
- trx_t* trx, /*!< in: transaction */
- dict_index_t* index, /*!< in: The FTS index to search */
- uint flags, /*!< in: FTS search mode */
- const byte* query_str, /*!< in: FTS query */
- ulint query_len, /*!< in: FTS query string len
- in bytes */
- fts_result_t** result) /*!< in/out: result doc ids */
+ trx_t* trx,
+ dict_index_t* index,
+ uint flags,
+ const byte* query_str,
+ ulint query_len,
+ fts_result_t** result)
{
fts_query_t query;
dberr_t error = DB_SUCCESS;
byte* lc_query_str;
+ ulint lc_query_str_len;
ulint result_len;
bool boolean_mode;
trx_t* query_trx;
@@ -3895,35 +3972,25 @@ fts_query(
query.fts_index_table.table = index->table;
query.word_map = rbt_create_arg_cmp(
- sizeof(fts_string_t), innobase_fts_text_cmp,
- (void *) charset);
- query.word_vector = new word_vector_t;
+ sizeof(fts_string_t), innobase_fts_text_cmp, (void*)charset);
+ query.word_vector = UT_NEW_NOKEY(word_vector_t());
query.error = DB_SUCCESS;
/* Setup the RB tree that will be used to collect per term
statistics. */
query.word_freqs = rbt_create_arg_cmp(
- sizeof(fts_word_freq_t), innobase_fts_text_cmp,
+ sizeof(fts_word_freq_t), innobase_fts_text_cmp,
(void*) charset);
+ if (flags & FTS_EXPAND) {
+ query.wildcard_words = rbt_create_arg_cmp(
+ sizeof(fts_string_t), innobase_fts_text_cmp, (void *)charset);
+ }
+
query.total_size += SIZEOF_RBT_CREATE;
query.total_docs = dict_table_get_n_rows(index->table);
-#ifdef FTS_DOC_STATS_DEBUG
- if (ft_enable_diag_print) {
- error = fts_get_total_word_count(
- trx, query.index, &query.total_words);
-
- if (error != DB_SUCCESS) {
- goto func_exit;
- }
-
- fprintf(stderr, "Total docs: " UINT64PF " Total words: %lu\n",
- query.total_docs, query.total_words);
- }
-#endif /* FTS_DOC_STATS_DEBUG */
-
query.fts_common_table.suffix = "DELETED";
/* Read the deleted doc_ids, we need these for filtering. */
@@ -3951,31 +4018,34 @@ fts_query(
/* Sort the vector so that we can do a binary search over the ids. */
ib_vector_sort(query.deleted->doc_ids, fts_update_doc_id_cmp);
-#if 0
/* Convert the query string to lower case before parsing. We own
the ut_malloc'ed result and so remember to free it before return. */
lc_query_str_len = query_len * charset->casedn_multiply + 1;
- lc_query_str = static_cast<byte*>(ut_malloc(lc_query_str_len));
+ lc_query_str = static_cast<byte*>(ut_malloc_nokey(lc_query_str_len));
+ /* For binary collations, a case sensitive search is
+ performed. Hence don't convert to lower case. */
+ if (my_binary_compare(charset)) {
+ memcpy(lc_query_str, query_str, query_len);
+ lc_query_str[query_len]= 0;
+ result_len= query_len;
+ } else {
result_len = innobase_fts_casedn_str(
- charset, (char*) query_str, query_len,
- (char*) lc_query_str, lc_query_str_len);
+ charset, (char*)( query_str), query_len,
+ (char*)(lc_query_str), lc_query_str_len);
+ }
ut_ad(result_len < lc_query_str_len);
lc_query_str[result_len] = 0;
-#endif
-
- lc_query_str = fts_query_str_preprocess(
- query_str, query_len, &result_len, charset, boolean_mode);
-
query.heap = mem_heap_create(128);
/* Create the rb tree for the doc id (current) set. */
query.doc_ids = rbt_create(
sizeof(fts_ranking_t), fts_ranking_doc_id_cmp);
+ query.parser = index->parser;
query.total_size += SIZEOF_RBT_CREATE;
@@ -4024,8 +4094,7 @@ fts_query(
} else {
/* still return an empty result set */
*result = static_cast<fts_result_t*>(
- ut_malloc(sizeof(**result)));
- memset(*result, 0, sizeof(**result));
+ ut_zalloc_nokey(sizeof(**result)));
}
if (trx_is_interrupted(trx)) {
@@ -4041,21 +4110,21 @@ fts_query(
if (fts_enable_diag_print && (*result)) {
ulint diff_time = ut_time_ms() - start_time_ms;
- fprintf(stderr, "FTS Search Processing time: %ld secs:"
- " %ld millisec: row(s) %d \n",
- diff_time / 1000, diff_time % 1000,
- (*result)->rankings_by_id
- ? (int) rbt_size((*result)->rankings_by_id)
- : -1);
+
+ ib::info() << "FTS Search Processing time: "
+ << diff_time / 1000 << " secs: " << diff_time % 1000
+ << " millisec: row(s) "
+ << ((*result)->rankings_by_id
+ ? rbt_size((*result)->rankings_by_id)
+ : -1);
/* Log memory consumption & result size */
- ib_logf(IB_LOG_LEVEL_INFO,
- "Full Search Memory: "
- "%zu (bytes), Row: %lu .",
- query.total_size,
- (*result)->rankings_by_id
- ? rbt_size((*result)->rankings_by_id)
- : 0);
+ ib::info() << "Full Search Memory: " << query.total_size
+ << " (bytes), Row: "
+ << ((*result)->rankings_by_id
+ ? rbt_size((*result)->rankings_by_id)
+ : 0)
+ << ".";
}
func_exit:
@@ -4068,7 +4137,6 @@ func_exit:
/*****************************************************************//**
FTS Query free result, returned by fts_query(). */
-
void
fts_query_free_result(
/*==================*/
@@ -4091,7 +4159,6 @@ fts_query_free_result(
/*****************************************************************//**
FTS Query sort result, returned by fts_query() on fts_ranking_t::rank. */
-
void
fts_query_sort_result_on_rank(
/*==========================*/
@@ -4127,7 +4194,6 @@ fts_query_sort_result_on_rank(
result->rankings_by_rank = ranked;
}
-#ifdef UNIV_DEBUG
/*******************************************************************//**
A debug function to print result doc_id set. */
static
@@ -4145,18 +4211,16 @@ fts_print_doc_id(
fts_ranking_t* ranking;
ranking = rbt_value(fts_ranking_t, node);
- ib_logf(IB_LOG_LEVEL_INFO, "doc_ids info, doc_id: %ld \n",
- (ulint) ranking->doc_id);
+ ib::info() << "doc_ids info, doc_id: " << ranking->doc_id;
ulint pos = 0;
fts_string_t word;
while (fts_ranking_words_get_next(query, ranking, &pos, &word)) {
- ib_logf(IB_LOG_LEVEL_INFO, "doc_ids info, value: %s \n", word.f_str);
+ ib::info() << "doc_ids info, value: " << word.f_str;
}
}
}
-#endif
/*************************************************************//**
This function implements a simple "blind" query expansion search:
@@ -4196,19 +4260,19 @@ fts_expand_query(
(void*) index_cache->charset);
result_doc.charset = index_cache->charset;
+ result_doc.parser = index_cache->index->parser;
query->total_size += SIZEOF_RBT_CREATE;
-#ifdef UNIV_DEBUG
- fts_print_doc_id(query);
-#endif
+
+ if (fts_enable_diag_print) {
+ fts_print_doc_id(query);
+ }
for (node = rbt_first(query->doc_ids);
node;
node = rbt_next(query->doc_ids, node)) {
fts_ranking_t* ranking;
- ulint pos;
- fts_string_t word;
ulint prev_token_size;
ulint estimate_size;
@@ -4227,24 +4291,6 @@ fts_expand_query(
fts_query_expansion_fetch_doc,
&result_doc);
- /* Remove words that have already been searched in the
- first pass */
- pos = 0;
- while (fts_ranking_words_get_next(query, ranking, &pos,
- &word)) {
- ibool ret;
-
- ret = rbt_delete(result_doc.tokens, &word);
-
- /* The word must exist in the doc we found */
- if (!ret) {
- ib_logf(IB_LOG_LEVEL_ERROR, "Did not "
- "find word %s in doc %ld for query "
- "expansion search.\n", word.f_str,
- (ulint) ranking->doc_id);
- }
- }
-
/* Estimate memory used, see fts_process_token and fts_token_t.
We ignore token size here. */
estimate_size = (rbt_size(result_doc.tokens) - prev_token_size)
@@ -4258,6 +4304,30 @@ fts_expand_query(
}
}
+ /* Remove words that have already been searched in the first pass */
+ for (ulint i = 0; i < query->word_vector->size(); i++) {
+ fts_string_t word = query->word_vector->at(i);
+ ib_rbt_bound_t parent;
+
+ if (query->wildcard_words
+ && rbt_search(query->wildcard_words, &parent, &word) == 0) {
+ /* If it's a wildcard word, remove words having
+ it as prefix. */
+ while (rbt_search_cmp(result_doc.tokens,
+ &parent, &word, NULL,
+ innobase_fts_text_cmp_prefix)
+ == 0) {
+ ut_free(rbt_remove_node(result_doc.tokens,
+ parent.last));
+ }
+ } else {
+ /* We don't check return value, because the word may
+ have been deleted by a previous wildcard word as its
+ prefix, e.g. ('g * good'). */
+ rbt_delete(result_doc.tokens, &word);
+ }
+ }
+
/* Search the table the second time with expanded search list */
for (token_node = rbt_first(result_doc.tokens);
token_node;
@@ -4265,6 +4335,12 @@ fts_expand_query(
fts_token_t* mytoken;
mytoken = rbt_value(fts_token_t, token_node);
+ /* '%' in the end is treated as prefix search,
+ it can cause assert failure, so we skip it. */
+ if (mytoken->text.f_str[mytoken->text.f_len - 1] == '%') {
+ continue;
+ }
+
ut_ad(mytoken->text.f_str[mytoken->text.f_len] == 0);
fts_query_add_word_freq(query, &mytoken->text);
error = fts_query_union(query, &mytoken->text);
diff --git a/storage/innobase/fts/fts0sql.cc b/storage/innobase/fts/fts0sql.cc
index 2c91115281b..376662bba8a 100644
--- a/storage/innobase/fts/fts0sql.cc
+++ b/storage/innobase/fts/fts0sql.cc
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 2007, 2013, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2007, 2016, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2019, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
@@ -31,11 +31,6 @@ Created 2007-03-27 Sunny Bains
#include "fts0types.h"
#include "fts0priv.h"
-#ifndef UNIV_NONINL
-#include "fts0types.ic"
-#include "fts0vlc.ic"
-#endif
-
/** SQL statements for creating the ancillary FTS tables. */
/** Preamble to all SQL statements. */
@@ -50,7 +45,6 @@ static const char* fts_sql_end=
/******************************************************************//**
Get the table id.
@return number of bytes written */
-UNIV_INTERN
int
fts_get_table_id(
/*=============*/
@@ -99,20 +93,19 @@ fts_get_table_id(
@param[in] fts_table metadata on fulltext-indexed table
@param[in] dict_locked whether dict_sys->mutex is being held
@return the prefix, must be freed with ut_free() */
-UNIV_INTERN char* fts_get_table_name_prefix(const fts_table_t* fts_table)
+char* fts_get_table_name_prefix(const fts_table_t* fts_table)
{
char table_id[FTS_AUX_MIN_TABLE_ID_LENGTH];
const size_t table_id_len = size_t(fts_get_table_id(fts_table,
table_id)) + 1;
mutex_enter(&dict_sys->mutex);
- const char* slash = strchr(fts_table->table->name, '/');
- ut_ad(slash);
/* Include the separator as well. */
- const size_t dbname_len = (slash - fts_table->table->name) + 1;
+ const size_t dbname_len = fts_table->table->name.dblen() + 1;
ut_ad(dbname_len > 1);
const size_t prefix_name_len = dbname_len + 4 + table_id_len;
- char* prefix_name = static_cast<char*>(ut_malloc(prefix_name_len));
- memcpy(prefix_name, fts_table->table->name, dbname_len);
+ char* prefix_name = static_cast<char*>(
+ ut_malloc_nokey(prefix_name_len));
+ memcpy(prefix_name, fts_table->table->name.m_name, dbname_len);
mutex_exit(&dict_sys->mutex);
memcpy(prefix_name + dbname_len, "FTS_", 4);
memcpy(prefix_name + dbname_len + 4, table_id, table_id_len);
@@ -123,7 +116,6 @@ UNIV_INTERN char* fts_get_table_name_prefix(const fts_table_t* fts_table)
@param[in] fts_table metadata on fulltext-indexed table
@param[out] table_name a name up to MAX_FULL_NAME_LEN
@param[in] dict_locked whether dict_sys->mutex is being held */
-UNIV_INTERN
void fts_get_table_name(const fts_table_t* fts_table, char* table_name,
bool dict_locked)
{
@@ -131,12 +123,10 @@ void fts_get_table_name(const fts_table_t* fts_table, char* table_name,
mutex_enter(&dict_sys->mutex);
}
ut_ad(mutex_own(&dict_sys->mutex));
- const char* slash = strchr(fts_table->table->name, '/');
- ut_ad(slash);
/* Include the separator as well. */
- const size_t dbname_len = (slash - fts_table->table->name) + 1;
+ const size_t dbname_len = fts_table->table->name.dblen() + 1;
ut_ad(dbname_len > 1);
- memcpy(table_name, fts_table->table->name, dbname_len);
+ memcpy(table_name, fts_table->table->name.m_name, dbname_len);
if (!dict_locked) {
mutex_exit(&dict_sys->mutex);
}
@@ -148,9 +138,8 @@ void fts_get_table_name(const fts_table_t* fts_table, char* table_name,
}
/******************************************************************//**
-Parse an SQL string. %s is replaced with the table's id.
+Parse an SQL string.
@return query graph */
-UNIV_INTERN
que_t*
fts_parse_sql(
/*==========*/
@@ -168,7 +157,7 @@ fts_parse_sql(
&& fts_table->table->fts->dict_locked);
if (!dict_locked) {
- ut_ad(!mutex_own(&(dict_sys->mutex)));
+ ut_ad(!mutex_own(&dict_sys->mutex));
/* The InnoDB SQL parser is not re-entrant. */
mutex_enter(&dict_sys->mutex);
@@ -181,7 +170,7 @@ fts_parse_sql(
mutex_exit(&dict_sys->mutex);
}
- mem_free(str);
+ ut_free(str);
return(graph);
}
@@ -189,7 +178,6 @@ fts_parse_sql(
/******************************************************************//**
Parse an SQL string.
@return query graph */
-UNIV_INTERN
que_t*
fts_parse_sql_no_dict_lock(
/*=======================*/
@@ -207,7 +195,7 @@ fts_parse_sql_no_dict_lock(
graph = pars_sql(info, str);
ut_a(graph);
- mem_free(str);
+ ut_free(str);
return(graph);
}
@@ -215,7 +203,6 @@ fts_parse_sql_no_dict_lock(
/******************************************************************//**
Evaluate an SQL query graph.
@return DB_SUCCESS or error code */
-UNIV_INTERN
dberr_t
fts_eval_sql(
/*=========*/
@@ -249,7 +236,6 @@ Two indexed columns named "subject" and "content":
"$sel0, $sel1",
info/ids: sel0 -> "subject", sel1 -> "content",
@return heap-allocated WHERE string */
-UNIV_INTERN
const char*
fts_get_select_columns_str(
/*=======================*/
@@ -280,7 +266,6 @@ fts_get_select_columns_str(
/******************************************************************//**
Commit a transaction.
@return DB_SUCCESS or error code */
-UNIV_INTERN
dberr_t
fts_sql_commit(
/*===========*/
@@ -299,7 +284,6 @@ fts_sql_commit(
/******************************************************************//**
Rollback a transaction.
@return DB_SUCCESS or error code */
-UNIV_INTERN
dberr_t
fts_sql_rollback(
/*=============*/
diff --git a/storage/innobase/fts/fts0tlex.cc b/storage/innobase/fts/fts0tlex.cc
index 9cb46c70e1a..29f73f23b9d 100644
--- a/storage/innobase/fts/fts0tlex.cc
+++ b/storage/innobase/fts/fts0tlex.cc
@@ -1,3 +1,4 @@
+#include "univ.i"
#line 2 "fts0tlex.cc"
#line 4 "fts0tlex.cc"
@@ -665,10 +666,11 @@ this program; if not, write to the Free Software Foundation, Inc.,
/* Required for reentrant parser */
#define YY_DECL int fts_tlexer(YYSTYPE* val, yyscan_t yyscanner)
+#define exit(A) ut_error
-#line 670 "fts0tlex.cc"
+#line 671 "fts0tlex.cc"
#define YY_NO_INPUT 1
-#line 672 "fts0tlex.cc"
+#line 673 "fts0tlex.cc"
#define INITIAL 0
@@ -926,10 +928,10 @@ YY_DECL
}
{
-#line 44 "fts0tlex.l"
+#line 45 "fts0tlex.l"
-#line 933 "fts0tlex.cc"
+#line 934 "fts0tlex.cc"
while ( /*CONSTCOND*/1 ) /* loops until end-of-file is reached */
{
@@ -984,12 +986,12 @@ do_action: /* This label is used only to access EOF actions. */
case 1:
YY_RULE_SETUP
-#line 46 "fts0tlex.l"
+#line 47 "fts0tlex.l"
/* Ignore whitespace */ ;
YY_BREAK
case 2:
YY_RULE_SETUP
-#line 48 "fts0tlex.l"
+#line 49 "fts0tlex.l"
{
val->oper = fts0tget_text(yyscanner)[0];
@@ -998,7 +1000,7 @@ YY_RULE_SETUP
YY_BREAK
case 3:
YY_RULE_SETUP
-#line 54 "fts0tlex.l"
+#line 55 "fts0tlex.l"
{
val->token = fts_ast_string_create(reinterpret_cast<const byte*>(fts0tget_text(yyscanner)), fts0tget_leng(yyscanner));
@@ -1007,7 +1009,7 @@ YY_RULE_SETUP
YY_BREAK
case 4:
YY_RULE_SETUP
-#line 60 "fts0tlex.l"
+#line 61 "fts0tlex.l"
{
val->token = fts_ast_string_create(reinterpret_cast<const byte*>(fts0tget_text(yyscanner)), fts0tget_leng(yyscanner));
@@ -1016,21 +1018,21 @@ YY_RULE_SETUP
YY_BREAK
case 5:
YY_RULE_SETUP
-#line 65 "fts0tlex.l"
+#line 66 "fts0tlex.l"
;
YY_BREAK
case 6:
/* rule 6 can match eol */
YY_RULE_SETUP
-#line 66 "fts0tlex.l"
+#line 67 "fts0tlex.l"
YY_BREAK
case 7:
YY_RULE_SETUP
-#line 68 "fts0tlex.l"
+#line 69 "fts0tlex.l"
ECHO;
YY_BREAK
-#line 1034 "fts0tlex.cc"
+#line 1035 "fts0tlex.cc"
case YY_STATE_EOF(INITIAL):
yyterminate();
@@ -2162,6 +2164,6 @@ void yyfree (void * ptr , yyscan_t yyscanner)
#define YYTABLES_NAME "yytables"
-#line 68 "fts0tlex.l"
+#line 69 "fts0tlex.l"
diff --git a/storage/innobase/fts/fts0tlex.l b/storage/innobase/fts/fts0tlex.l
index bcf5439ae9f..e19e907f8c9 100644
--- a/storage/innobase/fts/fts0tlex.l
+++ b/storage/innobase/fts/fts0tlex.l
@@ -30,6 +30,7 @@ this program; if not, write to the Free Software Foundation, Inc.,
/* Required for reentrant parser */
#define YY_DECL int fts_tlexer(YYSTYPE* val, yyscan_t yyscanner)
+#define exit(A) ut_error
%}
diff --git a/storage/innobase/fut/fut0fut.cc b/storage/innobase/fut/fut0fut.cc
deleted file mode 100644
index 74814acbb1d..00000000000
--- a/storage/innobase/fut/fut0fut.cc
+++ /dev/null
@@ -1,31 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1995, 2011, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/******************************************************************//**
-@file fut/fut0fut.cc
-File-based utilities
-
-Created 12/13/1995 Heikki Tuuri
-***********************************************************************/
-
-#include "fut0fut.h"
-
-#ifdef UNIV_NONINL
-#include "fut0fut.ic"
-#endif
-
diff --git a/storage/innobase/fut/fut0lst.cc b/storage/innobase/fut/fut0lst.cc
index 76cf3cadd5f..9359e5bf478 100644
--- a/storage/innobase/fut/fut0lst.cc
+++ b/storage/innobase/fut/fut0lst.cc
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1995, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -24,11 +24,6 @@ Created 11/28/1995 Heikki Tuuri
***********************************************************************/
#include "fut0lst.h"
-
-#ifdef UNIV_NONINL
-#include "fut0lst.ic"
-#endif
-
#include "buf0buf.h"
#include "page0page.h"
@@ -49,9 +44,13 @@ flst_add_to_empty(
ut_ad(mtr && base && node);
ut_ad(base != node);
- ut_ad(mtr_memo_contains_page(mtr, base, MTR_MEMO_PAGE_X_FIX));
- ut_ad(mtr_memo_contains_page(mtr, node, MTR_MEMO_PAGE_X_FIX));
- len = flst_get_len(base, mtr);
+ ut_ad(mtr_memo_contains_page_flagged(mtr, base,
+ MTR_MEMO_PAGE_X_FIX
+ | MTR_MEMO_PAGE_SX_FIX));
+ ut_ad(mtr_memo_contains_page_flagged(mtr, node,
+ MTR_MEMO_PAGE_X_FIX
+ | MTR_MEMO_PAGE_SX_FIX));
+ len = flst_get_len(base);
ut_a(len == 0);
buf_ptr_get_fsp_addr(node, &space, &node_addr);
@@ -69,8 +68,28 @@ flst_add_to_empty(
}
/********************************************************************//**
+Inserts a node after another in a list. */
+static
+void
+flst_insert_after(
+/*==============*/
+ flst_base_node_t* base, /*!< in: pointer to base node of list */
+ flst_node_t* node1, /*!< in: node to insert after */
+ flst_node_t* node2, /*!< in: node to add */
+ mtr_t* mtr); /*!< in: mini-transaction handle */
+/********************************************************************//**
+Inserts a node before another in a list. */
+static
+void
+flst_insert_before(
+/*===============*/
+ flst_base_node_t* base, /*!< in: pointer to base node of list */
+ flst_node_t* node2, /*!< in: node to insert */
+ flst_node_t* node3, /*!< in: node to insert before */
+ mtr_t* mtr); /*!< in: mini-transaction handle */
+
+/********************************************************************//**
Adds a node as the last node in a list. */
-UNIV_INTERN
void
flst_add_last(
/*==========*/
@@ -82,26 +101,35 @@ flst_add_last(
fil_addr_t node_addr;
ulint len;
fil_addr_t last_addr;
- flst_node_t* last_node;
ut_ad(mtr && base && node);
ut_ad(base != node);
- ut_ad(mtr_memo_contains_page(mtr, base, MTR_MEMO_PAGE_X_FIX));
- ut_ad(mtr_memo_contains_page(mtr, node, MTR_MEMO_PAGE_X_FIX));
- len = flst_get_len(base, mtr);
+ ut_ad(mtr_memo_contains_page_flagged(mtr, base,
+ MTR_MEMO_PAGE_X_FIX
+ | MTR_MEMO_PAGE_SX_FIX));
+ ut_ad(mtr_memo_contains_page_flagged(mtr, node,
+ MTR_MEMO_PAGE_X_FIX
+ | MTR_MEMO_PAGE_SX_FIX));
+ len = flst_get_len(base);
last_addr = flst_get_last(base, mtr);
buf_ptr_get_fsp_addr(node, &space, &node_addr);
/* If the list is not empty, call flst_insert_after */
if (len != 0) {
+ flst_node_t* last_node;
+
if (last_addr.page == node_addr.page) {
last_node = page_align(node) + last_addr.boffset;
} else {
- ulint zip_size = fil_space_get_zip_size(space);
+ bool found;
+ const page_size_t& page_size
+ = fil_space_get_page_size(space, &found);
- last_node = fut_get_ptr(space, zip_size, last_addr,
- RW_X_LATCH, mtr);
+ ut_ad(found);
+
+ last_node = fut_get_ptr(space, page_size, last_addr,
+ RW_SX_LATCH, mtr);
}
flst_insert_after(base, last_node, node, mtr);
@@ -113,7 +141,6 @@ flst_add_last(
/********************************************************************//**
Adds a node as the first node in a list. */
-UNIV_INTERN
void
flst_add_first(
/*===========*/
@@ -129,9 +156,13 @@ flst_add_first(
ut_ad(mtr && base && node);
ut_ad(base != node);
- ut_ad(mtr_memo_contains_page(mtr, base, MTR_MEMO_PAGE_X_FIX));
- ut_ad(mtr_memo_contains_page(mtr, node, MTR_MEMO_PAGE_X_FIX));
- len = flst_get_len(base, mtr);
+ ut_ad(mtr_memo_contains_page_flagged(mtr, base,
+ MTR_MEMO_PAGE_X_FIX
+ | MTR_MEMO_PAGE_SX_FIX));
+ ut_ad(mtr_memo_contains_page_flagged(mtr, node,
+ MTR_MEMO_PAGE_X_FIX
+ | MTR_MEMO_PAGE_SX_FIX));
+ len = flst_get_len(base);
first_addr = flst_get_first(base, mtr);
buf_ptr_get_fsp_addr(node, &space, &node_addr);
@@ -141,10 +172,14 @@ flst_add_first(
if (first_addr.page == node_addr.page) {
first_node = page_align(node) + first_addr.boffset;
} else {
- ulint zip_size = fil_space_get_zip_size(space);
+ bool found;
+ const page_size_t& page_size
+ = fil_space_get_page_size(space, &found);
+
+ ut_ad(found);
- first_node = fut_get_ptr(space, zip_size, first_addr,
- RW_X_LATCH, mtr);
+ first_node = fut_get_ptr(space, page_size, first_addr,
+ RW_SX_LATCH, mtr);
}
flst_insert_before(base, node, first_node, mtr);
@@ -156,7 +191,7 @@ flst_add_first(
/********************************************************************//**
Inserts a node after another in a list. */
-UNIV_INTERN
+static
void
flst_insert_after(
/*==============*/
@@ -176,9 +211,15 @@ flst_insert_after(
ut_ad(base != node1);
ut_ad(base != node2);
ut_ad(node2 != node1);
- ut_ad(mtr_memo_contains_page(mtr, base, MTR_MEMO_PAGE_X_FIX));
- ut_ad(mtr_memo_contains_page(mtr, node1, MTR_MEMO_PAGE_X_FIX));
- ut_ad(mtr_memo_contains_page(mtr, node2, MTR_MEMO_PAGE_X_FIX));
+ ut_ad(mtr_memo_contains_page_flagged(mtr, base,
+ MTR_MEMO_PAGE_X_FIX
+ | MTR_MEMO_PAGE_SX_FIX));
+ ut_ad(mtr_memo_contains_page_flagged(mtr, node1,
+ MTR_MEMO_PAGE_X_FIX
+ | MTR_MEMO_PAGE_SX_FIX));
+ ut_ad(mtr_memo_contains_page_flagged(mtr, node2,
+ MTR_MEMO_PAGE_X_FIX
+ | MTR_MEMO_PAGE_SX_FIX));
buf_ptr_get_fsp_addr(node1, &space, &node1_addr);
buf_ptr_get_fsp_addr(node2, &space, &node2_addr);
@@ -191,10 +232,14 @@ flst_insert_after(
if (!fil_addr_is_null(node3_addr)) {
/* Update prev field of node3 */
- ulint zip_size = fil_space_get_zip_size(space);
+ bool found;
+ const page_size_t& page_size
+ = fil_space_get_page_size(space, &found);
- node3 = fut_get_ptr(space, zip_size,
- node3_addr, RW_X_LATCH, mtr);
+ ut_ad(found);
+
+ node3 = fut_get_ptr(space, page_size,
+ node3_addr, RW_SX_LATCH, mtr);
flst_write_addr(node3 + FLST_PREV, node2_addr, mtr);
} else {
/* node1 was last in list: update last field in base */
@@ -205,13 +250,13 @@ flst_insert_after(
flst_write_addr(node1 + FLST_NEXT, node2_addr, mtr);
/* Update len of base node */
- len = flst_get_len(base, mtr);
+ len = flst_get_len(base);
mlog_write_ulint(base + FLST_LEN, len + 1, MLOG_4BYTES, mtr);
}
/********************************************************************//**
Inserts a node before another in a list. */
-UNIV_INTERN
+static
void
flst_insert_before(
/*===============*/
@@ -231,9 +276,15 @@ flst_insert_before(
ut_ad(base != node2);
ut_ad(base != node3);
ut_ad(node2 != node3);
- ut_ad(mtr_memo_contains_page(mtr, base, MTR_MEMO_PAGE_X_FIX));
- ut_ad(mtr_memo_contains_page(mtr, node2, MTR_MEMO_PAGE_X_FIX));
- ut_ad(mtr_memo_contains_page(mtr, node3, MTR_MEMO_PAGE_X_FIX));
+ ut_ad(mtr_memo_contains_page_flagged(mtr, base,
+ MTR_MEMO_PAGE_X_FIX
+ | MTR_MEMO_PAGE_SX_FIX));
+ ut_ad(mtr_memo_contains_page_flagged(mtr, node2,
+ MTR_MEMO_PAGE_X_FIX
+ | MTR_MEMO_PAGE_SX_FIX));
+ ut_ad(mtr_memo_contains_page_flagged(mtr, node3,
+ MTR_MEMO_PAGE_X_FIX
+ | MTR_MEMO_PAGE_SX_FIX));
buf_ptr_get_fsp_addr(node2, &space, &node2_addr);
buf_ptr_get_fsp_addr(node3, &space, &node3_addr);
@@ -245,10 +296,15 @@ flst_insert_before(
flst_write_addr(node2 + FLST_NEXT, node3_addr, mtr);
if (!fil_addr_is_null(node1_addr)) {
- ulint zip_size = fil_space_get_zip_size(space);
+ bool found;
+ const page_size_t& page_size
+ = fil_space_get_page_size(space, &found);
+
+ ut_ad(found);
+
/* Update next field of node1 */
- node1 = fut_get_ptr(space, zip_size, node1_addr,
- RW_X_LATCH, mtr);
+ node1 = fut_get_ptr(space, page_size, node1_addr,
+ RW_SX_LATCH, mtr);
flst_write_addr(node1 + FLST_NEXT, node2_addr, mtr);
} else {
/* node3 was first in list: update first field in base */
@@ -259,13 +315,12 @@ flst_insert_before(
flst_write_addr(node3 + FLST_PREV, node2_addr, mtr);
/* Update len of base node */
- len = flst_get_len(base, mtr);
+ len = flst_get_len(base);
mlog_write_ulint(base + FLST_LEN, len + 1, MLOG_4BYTES, mtr);
}
/********************************************************************//**
Removes a node. */
-UNIV_INTERN
void
flst_remove(
/*========*/
@@ -274,7 +329,6 @@ flst_remove(
mtr_t* mtr) /*!< in: mini-transaction handle */
{
ulint space;
- ulint zip_size;
flst_node_t* node1;
fil_addr_t node1_addr;
fil_addr_t node2_addr;
@@ -283,11 +337,20 @@ flst_remove(
ulint len;
ut_ad(mtr && node2 && base);
- ut_ad(mtr_memo_contains_page(mtr, base, MTR_MEMO_PAGE_X_FIX));
- ut_ad(mtr_memo_contains_page(mtr, node2, MTR_MEMO_PAGE_X_FIX));
+ ut_ad(mtr_memo_contains_page_flagged(mtr, base,
+ MTR_MEMO_PAGE_X_FIX
+ | MTR_MEMO_PAGE_SX_FIX));
+ ut_ad(mtr_memo_contains_page_flagged(mtr, node2,
+ MTR_MEMO_PAGE_X_FIX
+ | MTR_MEMO_PAGE_SX_FIX));
buf_ptr_get_fsp_addr(node2, &space, &node2_addr);
- zip_size = fil_space_get_zip_size(space);
+
+ bool found;
+ const page_size_t& page_size = fil_space_get_page_size(space,
+ &found);
+
+ ut_ad(found);
node1_addr = flst_get_prev_addr(node2, mtr);
node3_addr = flst_get_next_addr(node2, mtr);
@@ -300,8 +363,8 @@ flst_remove(
node1 = page_align(node2) + node1_addr.boffset;
} else {
- node1 = fut_get_ptr(space, zip_size,
- node1_addr, RW_X_LATCH, mtr);
+ node1 = fut_get_ptr(space, page_size,
+ node1_addr, RW_SX_LATCH, mtr);
}
ut_ad(node1 != node2);
@@ -319,8 +382,8 @@ flst_remove(
node3 = page_align(node2) + node3_addr.boffset;
} else {
- node3 = fut_get_ptr(space, zip_size,
- node3_addr, RW_X_LATCH, mtr);
+ node3 = fut_get_ptr(space, page_size,
+ node3_addr, RW_SX_LATCH, mtr);
}
ut_ad(node2 != node3);
@@ -332,7 +395,7 @@ flst_remove(
}
/* Update len of base node */
- len = flst_get_len(base, mtr);
+ len = flst_get_len(base);
ut_ad(len > 0);
mlog_write_ulint(base + FLST_LEN, len - 1, MLOG_4BYTES, mtr);
@@ -340,8 +403,7 @@ flst_remove(
/********************************************************************//**
Validates a file-based list.
-@return TRUE if ok */
-UNIV_INTERN
+@return TRUE if ok */
ibool
flst_validate(
/*==========*/
@@ -349,7 +411,6 @@ flst_validate(
mtr_t* mtr1) /*!< in: mtr */
{
ulint space;
- ulint zip_size;
const flst_node_t* node;
fil_addr_t node_addr;
fil_addr_t base_addr;
@@ -358,7 +419,9 @@ flst_validate(
mtr_t mtr2;
ut_ad(base);
- ut_ad(mtr_memo_contains_page(mtr1, base, MTR_MEMO_PAGE_X_FIX));
+ ut_ad(mtr_memo_contains_page_flagged(mtr1, base,
+ MTR_MEMO_PAGE_X_FIX
+ | MTR_MEMO_PAGE_SX_FIX));
/* We use two mini-transaction handles: the first is used to
lock the base node, and prevent other threads from modifying the
@@ -369,16 +432,21 @@ flst_validate(
/* Find out the space id */
buf_ptr_get_fsp_addr(base, &space, &base_addr);
- zip_size = fil_space_get_zip_size(space);
- len = flst_get_len(base, mtr1);
+ bool found;
+ const page_size_t& page_size = fil_space_get_page_size(space,
+ &found);
+
+ ut_ad(found);
+
+ len = flst_get_len(base);
node_addr = flst_get_first(base, mtr1);
for (i = 0; i < len; i++) {
mtr_start(&mtr2);
- node = fut_get_ptr(space, zip_size,
- node_addr, RW_X_LATCH, &mtr2);
+ node = fut_get_ptr(space, page_size,
+ node_addr, RW_SX_LATCH, &mtr2);
node_addr = flst_get_next_addr(node, &mtr2);
mtr_commit(&mtr2); /* Commit mtr2 each round to prevent buffer
@@ -392,8 +460,8 @@ flst_validate(
for (i = 0; i < len; i++) {
mtr_start(&mtr2);
- node = fut_get_ptr(space, zip_size,
- node_addr, RW_X_LATCH, &mtr2);
+ node = fut_get_ptr(space, page_size,
+ node_addr, RW_SX_LATCH, &mtr2);
node_addr = flst_get_prev_addr(node, &mtr2);
mtr_commit(&mtr2); /* Commit mtr2 each round to prevent buffer
@@ -404,29 +472,3 @@ flst_validate(
return(TRUE);
}
-
-/********************************************************************//**
-Prints info of a file-based list. */
-UNIV_INTERN
-void
-flst_print(
-/*=======*/
- const flst_base_node_t* base, /*!< in: pointer to base node of list */
- mtr_t* mtr) /*!< in: mtr */
-{
- const buf_frame_t* frame;
- ulint len;
-
- ut_ad(base && mtr);
- ut_ad(mtr_memo_contains_page(mtr, base, MTR_MEMO_PAGE_X_FIX));
- frame = page_align((byte*) base);
-
- len = flst_get_len(base, mtr);
-
- fprintf(stderr,
- "FILE-BASED LIST:\n"
- "Base node in space %lu page %lu byte offset %lu; len %lu\n",
- (ulong) page_get_space_id(frame),
- (ulong) page_get_page_no(frame),
- (ulong) page_offset(base), (ulong) len);
-}
diff --git a/storage/innobase/gis/gis0geo.cc b/storage/innobase/gis/gis0geo.cc
new file mode 100644
index 00000000000..3a727185632
--- /dev/null
+++ b/storage/innobase/gis/gis0geo.cc
@@ -0,0 +1,782 @@
+/*****************************************************************************
+
+Copyright (c) 2013, 2015, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2019, MariaDB Corporation.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file gis/gis0geo.cc
+InnoDB R-tree related functions.
+
+Created 2013/03/27 Allen Lai and Jimmy Yang
+*******************************************************/
+
+#include "page0types.h"
+#include "gis0geo.h"
+#include "page0cur.h"
+#include "ut0rnd.h"
+#include "mach0data.h"
+
+#include <spatial.h>
+
+/* These definitions are for comparing 2 mbrs. */
+
+/* Check if a intersects b.
+Return false if a intersects b, otherwise true. */
+#define INTERSECT_CMP(amin, amax, bmin, bmax) \
+(((amin) > (bmax)) || ((bmin) > (amax)))
+
+/* Check if b contains a.
+Return false if b contains a, otherwise true. */
+#define CONTAIN_CMP(amin, amax, bmin, bmax) \
+(((bmin) > (amin)) || ((bmax) < (amax)))
+
+/* Check if b is within a.
+Return false if b is within a, otherwise true. */
+#define WITHIN_CMP(amin, amax, bmin, bmax) \
+(((amin) > (bmin)) || ((amax) < (bmax)))
+
+/* Check if a disjoints b.
+Return false if a disjoints b, otherwise true. */
+#define DISJOINT_CMP(amin, amax, bmin, bmax) \
+(((amin) <= (bmax)) && ((bmin) <= (amax)))
+
+/* Check if a equals b.
+Return false if equal, otherwise true. */
+#define EQUAL_CMP(amin, amax, bmin, bmax) \
+(((amin) != (bmin)) || ((amax) != (bmax)))
+
+/****************************************************************
+Functions for generating mbr
+****************************************************************/
+/*************************************************************//**
+Add one point stored in wkb to a given mbr.
+@return 0 if the point in wkb is valid, otherwise -1. */
+static
+int
+rtree_add_point_to_mbr(
+/*===================*/
+ const uchar** wkb, /*!< in: pointer to wkb,
+ where point is stored */
+ const uchar* end, /*!< in: end of wkb. */
+ uint n_dims, /*!< in: dimensions. */
+ uchar byte_order, /*!< in: byte order. */
+ double* mbr) /*!< in/out: mbr, which
+ must be of length n_dims * 2. */
+{
+ double ord;
+ double* mbr_end = mbr + n_dims * 2;
+
+ while (mbr < mbr_end) {
+ if ((*wkb) + sizeof(double) > end) {
+ return(-1);
+ }
+
+ ord = mach_double_read(*wkb);
+ (*wkb) += sizeof(double);
+
+ if (ord < *mbr) {
+ *mbr = ord;
+ }
+ mbr++;
+
+ if (ord > *mbr) {
+ *mbr = ord;
+ }
+ mbr++;
+ }
+
+ return(0);
+}
+
+/*************************************************************//**
+Get mbr of point stored in wkb.
+@return 0 if ok, otherwise -1. */
+static
+int
+rtree_get_point_mbr(
+/*================*/
+ const uchar** wkb, /*!< in: pointer to wkb,
+ where point is stored. */
+ const uchar* end, /*!< in: end of wkb. */
+ uint n_dims, /*!< in: dimensions. */
+ uchar byte_order, /*!< in: byte order. */
+ double* mbr) /*!< in/out: mbr,
+ must be of length n_dims * 2. */
+{
+ return rtree_add_point_to_mbr(wkb, end, n_dims, byte_order, mbr);
+}
+
+
+/*************************************************************//**
+Get mbr of linestring stored in wkb.
+@return 0 if the linestring is valid, otherwise -1. */
+static
+int
+rtree_get_linestring_mbr(
+/*=====================*/
+ const uchar** wkb, /*!< in: pointer to wkb,
+ where point is stored. */
+ const uchar* end, /*!< in: end of wkb. */
+ uint n_dims, /*!< in: dimensions. */
+ uchar byte_order, /*!< in: byte order. */
+ double* mbr) /*!< in/out: mbr,
+ must be of length n_dims * 2. */
+{
+ uint n_points;
+
+ n_points = uint4korr(*wkb);
+ (*wkb) += 4;
+
+ for (; n_points > 0; --n_points) {
+ /* Add next point to mbr */
+ if (rtree_add_point_to_mbr(wkb, end, n_dims,
+ byte_order, mbr)) {
+ return(-1);
+ }
+ }
+
+ return(0);
+}
+
+/*************************************************************//**
+Get mbr of polygon stored in wkb.
+@return 0 if the polygon is valid, otherwise -1. */
+static
+int
+rtree_get_polygon_mbr(
+/*==================*/
+ const uchar** wkb, /*!< in: pointer to wkb,
+ where point is stored. */
+ const uchar* end, /*!< in: end of wkb. */
+ uint n_dims, /*!< in: dimensions. */
+ uchar byte_order, /*!< in: byte order. */
+ double* mbr) /*!< in/out: mbr,
+ must be of length n_dims * 2. */
+{
+ uint n_linear_rings;
+ uint n_points;
+
+ n_linear_rings = uint4korr((*wkb));
+ (*wkb) += 4;
+
+ for (; n_linear_rings > 0; --n_linear_rings) {
+ n_points = uint4korr((*wkb));
+ (*wkb) += 4;
+
+ for (; n_points > 0; --n_points) {
+ /* Add next point to mbr */
+ if (rtree_add_point_to_mbr(wkb, end, n_dims,
+ byte_order, mbr)) {
+ return(-1);
+ }
+ }
+ }
+
+ return(0);
+}
+
+/*************************************************************//**
+Get mbr of geometry stored in wkb.
+@return 0 if the geometry is valid, otherwise -1. */
+static
+int
+rtree_get_geometry_mbr(
+/*===================*/
+ const uchar** wkb, /*!< in: pointer to wkb,
+ where point is stored. */
+ const uchar* end, /*!< in: end of wkb. */
+ uint n_dims, /*!< in: dimensions. */
+ double* mbr, /*!< in/out: mbr. */
+ int top) /*!< in: if it is the top,
+ which means it's not called
+ by itself. */
+{
+ int res;
+ uchar byte_order = 2;
+ uint wkb_type = 0;
+ uint n_items;
+
+ byte_order = *(*wkb);
+ ++(*wkb);
+
+ wkb_type = uint4korr((*wkb));
+ (*wkb) += 4;
+
+ switch ((enum wkbType) wkb_type) {
+ case wkbPoint:
+ res = rtree_get_point_mbr(wkb, end, n_dims, byte_order, mbr);
+ break;
+ case wkbLineString:
+ res = rtree_get_linestring_mbr(wkb, end, n_dims,
+ byte_order, mbr);
+ break;
+ case wkbPolygon:
+ res = rtree_get_polygon_mbr(wkb, end, n_dims, byte_order, mbr);
+ break;
+ case wkbMultiPoint:
+ n_items = uint4korr((*wkb));
+ (*wkb) += 4;
+ for (; n_items > 0; --n_items) {
+ byte_order = *(*wkb);
+ ++(*wkb);
+ (*wkb) += 4;
+ if (rtree_get_point_mbr(wkb, end, n_dims,
+ byte_order, mbr)) {
+ return(-1);
+ }
+ }
+ res = 0;
+ break;
+ case wkbMultiLineString:
+ n_items = uint4korr((*wkb));
+ (*wkb) += 4;
+ for (; n_items > 0; --n_items) {
+ byte_order = *(*wkb);
+ ++(*wkb);
+ (*wkb) += 4;
+ if (rtree_get_linestring_mbr(wkb, end, n_dims,
+ byte_order, mbr)) {
+ return(-1);
+ }
+ }
+ res = 0;
+ break;
+ case wkbMultiPolygon:
+ n_items = uint4korr((*wkb));
+ (*wkb) += 4;
+ for (; n_items > 0; --n_items) {
+ byte_order = *(*wkb);
+ ++(*wkb);
+ (*wkb) += 4;
+ if (rtree_get_polygon_mbr(wkb, end, n_dims,
+ byte_order, mbr)) {
+ return(-1);
+ }
+ }
+ res = 0;
+ break;
+ case wkbGeometryCollection:
+ if (!top) {
+ return(-1);
+ }
+
+ n_items = uint4korr((*wkb));
+ (*wkb) += 4;
+ for (; n_items > 0; --n_items) {
+ if (rtree_get_geometry_mbr(wkb, end, n_dims,
+ mbr, 0)) {
+ return(-1);
+ }
+ }
+ res = 0;
+ break;
+ default:
+ res = -1;
+ }
+
+ return(res);
+}
+
+/*************************************************************//**
+Calculate Minimal Bounding Rectangle (MBR) of the spatial object
+stored in "well-known binary representation" (wkb) format.
+@return 0 if ok. */
+int
+rtree_mbr_from_wkb(
+/*===============*/
+ const uchar* wkb, /*!< in: wkb */
+ uint size, /*!< in: size of wkb. */
+ uint n_dims, /*!< in: dimensions. */
+ double* mbr) /*!< in/out: mbr, which must
+ be of length n_dim2 * 2. */
+{
+ for (uint i = 0; i < n_dims; ++i) {
+ mbr[i * 2] = DBL_MAX;
+ mbr[i * 2 + 1] = -DBL_MAX;
+ }
+
+ return rtree_get_geometry_mbr(&wkb, wkb + size, n_dims, mbr, 1);
+}
+
+
+/****************************************************************
+Functions for Rtree split
+****************************************************************/
+/*************************************************************//**
+Join 2 mbrs of dimensions n_dim. */
+static
+void
+mbr_join(
+/*=====*/
+ double* a, /*!< in/out: the first mbr,
+ where the joined result will be. */
+ const double* b, /*!< in: the second mbr. */
+ int n_dim) /*!< in: dimensions. */
+{
+ double* end = a + n_dim * 2;
+
+ do {
+ if (a[0] > b[0]) {
+ a[0] = b[0];
+ }
+
+ if (a[1] < b[1]) {
+ a[1] = b[1];
+ }
+
+ a += 2;
+ b += 2;
+
+ } while (a != end);
+}
+
+/*************************************************************//**
+Counts the square of mbr which is the join of a and b. Both a and b
+are of dimensions n_dim. */
+static
+double
+mbr_join_square(
+/*============*/
+ const double* a, /*!< in: the first mbr. */
+ const double* b, /*!< in: the second mbr. */
+ int n_dim) /*!< in: dimensions. */
+{
+ const double* end = a + n_dim * 2;
+ double square = 1.0;
+
+ do {
+ square *= std::max(a[1], b[1]) - std::min(a[0], b[0]);
+
+ a += 2;
+ b += 2;
+ } while (a != end);
+
+ /* Check if finite (not infinity or NaN),
+ so we don't get NaN in calculations */
+ if (!std::isfinite(square)) {
+ return DBL_MAX;
+ }
+
+ return square;
+}
+
+/*************************************************************//**
+Counts the square of mbr of dimension n_dim. */
+static
+double
+count_square(
+/*=========*/
+ const double* a, /*!< in: the mbr. */
+ int n_dim) /*!< in: dimensions. */
+{
+ const double* end = a + n_dim * 2;
+ double square = 1.0;
+
+ do {
+ square *= a[1] - a[0];
+ a += 2;
+ } while (a != end);
+
+ return square;
+}
+
+/*************************************************************//**
+Copy mbr of dimension n_dim from src to dst. */
+inline
+static
+void
+copy_coords(
+/*========*/
+ double* dst, /*!< in/out: destination. */
+ const double* src, /*!< in: source. */
+ int n_dim) /*!< in: dimensions. */
+{
+ memcpy(dst, src, DATA_MBR_LEN);
+}
+
+/*************************************************************//**
+Select two nodes to collect group upon */
+static
+void
+pick_seeds(
+/*=======*/
+ rtr_split_node_t* node, /*!< in: split nodes. */
+ int n_entries, /*!< in: entries number. */
+ rtr_split_node_t** seed_a, /*!< out: seed 1. */
+ rtr_split_node_t** seed_b, /*!< out: seed 2. */
+ int n_dim) /*!< in: dimensions. */
+{
+ rtr_split_node_t* cur1;
+ rtr_split_node_t* lim1 = node + (n_entries - 1);
+ rtr_split_node_t* cur2;
+ rtr_split_node_t* lim2 = node + n_entries;
+
+ double max_d = -DBL_MAX;
+ double d;
+
+ *seed_a = node;
+ *seed_b = node + 1;
+
+ for (cur1 = node; cur1 < lim1; ++cur1) {
+ for (cur2 = cur1 + 1; cur2 < lim2; ++cur2) {
+ d = mbr_join_square(cur1->coords, cur2->coords, n_dim) -
+ cur1->square - cur2->square;
+ if (d > max_d) {
+ max_d = d;
+ *seed_a = cur1;
+ *seed_b = cur2;
+ }
+ }
+ }
+}
+
+/*************************************************************//**
+Select next node and group where to add. */
+static
+void
+pick_next(
+/*======*/
+ rtr_split_node_t* node, /*!< in: split nodes. */
+ int n_entries, /*!< in: entries number. */
+ double* g1, /*!< in: mbr of group 1. */
+ double* g2, /*!< in: mbr of group 2. */
+ rtr_split_node_t** choice, /*!< out: the next node.*/
+ int* n_group, /*!< out: group number.*/
+ int n_dim) /*!< in: dimensions. */
+{
+ rtr_split_node_t* cur = node;
+ rtr_split_node_t* end = node + n_entries;
+ double max_diff = -DBL_MAX;
+
+ for (; cur < end; ++cur) {
+ double diff;
+ double abs_diff;
+
+ if (cur->n_node != 0) {
+ continue;
+ }
+
+ diff = mbr_join_square(g1, cur->coords, n_dim) -
+ mbr_join_square(g2, cur->coords, n_dim);
+
+ abs_diff = fabs(diff);
+ if (abs_diff > max_diff) {
+ max_diff = abs_diff;
+
+ /* Introduce some randomness if the record
+ is identical */
+ if (diff == 0) {
+ diff = static_cast<double>(ut_rnd_gen() & 1);
+ }
+
+ *n_group = 1 + (diff > 0);
+ *choice = cur;
+ }
+ }
+}
+
+/*************************************************************//**
+Mark not-in-group entries as n_group. */
+static
+void
+mark_all_entries(
+/*=============*/
+ rtr_split_node_t* node, /*!< in/out: split nodes. */
+ int n_entries, /*!< in: entries number. */
+ int n_group) /*!< in: group number. */
+{
+ rtr_split_node_t* cur = node;
+ rtr_split_node_t* end = node + n_entries;
+ for (; cur < end; ++cur) {
+ if (cur->n_node != 0) {
+ continue;
+ }
+ cur->n_node = n_group;
+ }
+}
+
+/*************************************************************//**
+Split rtree node.
+Return which group the first rec is in. */
+int
+split_rtree_node(
+/*=============*/
+ rtr_split_node_t* node, /*!< in: split nodes. */
+ int n_entries, /*!< in: entries number. */
+ int all_size, /*!< in: total key's size. */
+ int key_size, /*!< in: key's size. */
+ int min_size, /*!< in: minimal group size. */
+ int size1, /*!< in: size of group. */
+ int size2, /*!< in: initial group sizes */
+ double** d_buffer, /*!< in/out: buffer. */
+ int n_dim, /*!< in: dimensions. */
+ uchar* first_rec) /*!< in: the first rec. */
+{
+ rtr_split_node_t* cur;
+ rtr_split_node_t* a = NULL;
+ rtr_split_node_t* b = NULL;
+ double* g1 = reserve_coords(d_buffer, n_dim);
+ double* g2 = reserve_coords(d_buffer, n_dim);
+ rtr_split_node_t* next = NULL;
+ int next_node = 0;
+ int i;
+ int first_rec_group = 1;
+ rtr_split_node_t* end = node + n_entries;
+
+ if (all_size < min_size * 2) {
+ return 1;
+ }
+
+ cur = node;
+ for (; cur < end; ++cur) {
+ cur->square = count_square(cur->coords, n_dim);
+ cur->n_node = 0;
+ }
+
+ pick_seeds(node, n_entries, &a, &b, n_dim);
+ a->n_node = 1;
+ b->n_node = 2;
+
+ copy_coords(g1, a->coords, n_dim);
+ size1 += key_size;
+ copy_coords(g2, b->coords, n_dim);
+ size2 += key_size;
+
+ for (i = n_entries - 2; i > 0; --i) {
+ /* Can't write into group 2 */
+ if (all_size - (size2 + key_size) < min_size) {
+ mark_all_entries(node, n_entries, 1);
+ break;
+ }
+
+ /* Can't write into group 1 */
+ if (all_size - (size1 + key_size) < min_size) {
+ mark_all_entries(node, n_entries, 2);
+ break;
+ }
+
+ pick_next(node, n_entries, g1, g2, &next, &next_node, n_dim);
+ if (next_node == 1) {
+ size1 += key_size;
+ mbr_join(g1, next->coords, n_dim);
+ } else {
+ size2 += key_size;
+ mbr_join(g2, next->coords, n_dim);
+ }
+
+ next->n_node = next_node;
+
+ /* Find out where the first rec (of the page) will be at,
+ and inform the caller */
+ if (first_rec && first_rec == next->key) {
+ first_rec_group = next_node;
+ }
+ }
+
+ return(first_rec_group);
+}
+
+/*************************************************************//**
+Compares two keys a and b depending on nextflag
+nextflag can contain these flags:
+ MBR_INTERSECT(a,b) a overlaps b
+ MBR_CONTAIN(a,b) a contains b
+ MBR_DISJOINT(a,b) a disjoint b
+ MBR_WITHIN(a,b) a within b
+ MBR_EQUAL(a,b) All coordinates of MBRs are equal
+Return 0 on success, otherwise 1. */
+int
+rtree_key_cmp(
+/*==========*/
+ page_cur_mode_t mode, /*!< in: compare method. */
+ const uchar* b, /*!< in: first key. */
+ int b_len, /*!< in: first key len. */
+ const uchar* a, /*!< in: second key. */
+ int a_len) /*!< in: second key len. */
+{
+ double amin, amax, bmin, bmax;
+ int key_len;
+ int keyseg_len;
+
+ keyseg_len = 2 * sizeof(double);
+ for (key_len = a_len; key_len > 0; key_len -= keyseg_len) {
+ amin = mach_double_read(a);
+ bmin = mach_double_read(b);
+ amax = mach_double_read(a + sizeof(double));
+ bmax = mach_double_read(b + sizeof(double));
+
+ switch (mode) {
+ case PAGE_CUR_INTERSECT:
+ if (INTERSECT_CMP(amin, amax, bmin, bmax)) {
+ return(1);
+ }
+ break;
+ case PAGE_CUR_CONTAIN:
+ if (CONTAIN_CMP(amin, amax, bmin, bmax)) {
+ return(1);
+ }
+ break;
+ case PAGE_CUR_WITHIN:
+ if (WITHIN_CMP(amin, amax, bmin, bmax)) {
+ return(1);
+ }
+ break;
+ case PAGE_CUR_MBR_EQUAL:
+ if (EQUAL_CMP(amin, amax, bmin, bmax)) {
+ return(1);
+ }
+ break;
+ case PAGE_CUR_DISJOINT:
+ int result;
+
+ result = DISJOINT_CMP(amin, amax, bmin, bmax);
+ if (result == 0) {
+ return(0);
+ }
+
+ if (key_len - keyseg_len <= 0) {
+ return(1);
+ }
+
+ break;
+ default:
+ /* if unknown comparison operator */
+ ut_ad(0);
+ }
+
+ a += keyseg_len;
+ b += keyseg_len;
+ }
+
+ return(0);
+}
+
+/*************************************************************//**
+Calculates MBR_AREA(a+b) - MBR_AREA(a)
+Note: when 'a' and 'b' objects are far from each other,
+the area increase can be really big, so this function
+can return 'inf' as a result.
+Return the area increaed. */
+double
+rtree_area_increase(
+ const uchar* a, /*!< in: original mbr. */
+ const uchar* b, /*!< in: new mbr. */
+ int mbr_len, /*!< in: mbr length of a and b. */
+ double* ab_area) /*!< out: increased area. */
+{
+ double a_area = 1.0;
+ double loc_ab_area = 1.0;
+ double amin, amax, bmin, bmax;
+ int key_len;
+ int keyseg_len;
+ double data_round = 1.0;
+
+ keyseg_len = 2 * sizeof(double);
+
+ for (key_len = mbr_len; key_len > 0; key_len -= keyseg_len) {
+ double area;
+
+ amin = mach_double_read(a);
+ bmin = mach_double_read(b);
+ amax = mach_double_read(a + sizeof(double));
+ bmax = mach_double_read(b + sizeof(double));
+
+ area = amax - amin;
+ if (area == 0) {
+ a_area *= LINE_MBR_WEIGHTS;
+ } else {
+ a_area *= area;
+ }
+
+ area = (double)std::max(amax, bmax) -
+ (double)std::min(amin, bmin);
+ if (area == 0) {
+ loc_ab_area *= LINE_MBR_WEIGHTS;
+ } else {
+ loc_ab_area *= area;
+ }
+
+ /* Value of amax or bmin can be so large that small difference
+ are ignored. For example: 3.2884281489988079e+284 - 100 =
+ 3.2884281489988079e+284. This results some area difference
+ are not detected */
+ if (loc_ab_area == a_area) {
+ if (bmin < amin || bmax > amax) {
+ data_round *= ((double)std::max(amax, bmax)
+ - amax
+ + (amin - (double)std::min(
+ amin, bmin)));
+ } else {
+ data_round *= area;
+ }
+ }
+
+ a += keyseg_len;
+ b += keyseg_len;
+ }
+
+ *ab_area = loc_ab_area;
+
+ if (loc_ab_area == a_area && data_round != 1.0) {
+ return(data_round);
+ }
+
+ return(loc_ab_area - a_area);
+}
+
+/** Calculates overlapping area
+@param[in] a mbr a
+@param[in] b mbr b
+@param[in] mbr_len mbr length
+@return overlapping area */
+double
+rtree_area_overlapping(
+ const uchar* a,
+ const uchar* b,
+ int mbr_len)
+{
+ double area = 1.0;
+ double amin;
+ double amax;
+ double bmin;
+ double bmax;
+ int key_len;
+ int keyseg_len;
+
+ keyseg_len = 2 * sizeof(double);
+
+ for (key_len = mbr_len; key_len > 0; key_len -= keyseg_len) {
+ amin = mach_double_read(a);
+ bmin = mach_double_read(b);
+ amax = mach_double_read(a + sizeof(double));
+ bmax = mach_double_read(b + sizeof(double));
+
+ amin = std::max(amin, bmin);
+ amax = std::min(amax, bmax);
+
+ if (amin > amax) {
+ return(0);
+ } else {
+ area *= (amax - amin);
+ }
+
+ a += keyseg_len;
+ b += keyseg_len;
+ }
+
+ return(area);
+}
diff --git a/storage/innobase/gis/gis0rtree.cc b/storage/innobase/gis/gis0rtree.cc
new file mode 100644
index 00000000000..1808fe851b8
--- /dev/null
+++ b/storage/innobase/gis/gis0rtree.cc
@@ -0,0 +1,1975 @@
+/*****************************************************************************
+
+Copyright (c) 2016, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2019, 2020, MariaDB Corporation.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file gis/gis0rtree.cc
+InnoDB R-tree interfaces
+
+Created 2013/03/27 Allen Lai and Jimmy Yang
+***********************************************************************/
+
+#include "fsp0fsp.h"
+#include "page0page.h"
+#include "page0cur.h"
+#include "page0zip.h"
+#include "gis0rtree.h"
+#include "btr0cur.h"
+#include "btr0sea.h"
+#include "btr0pcur.h"
+#include "rem0cmp.h"
+#include "lock0lock.h"
+#include "ibuf0ibuf.h"
+#include "trx0undo.h"
+#include "srv0mon.h"
+#include "gis0geo.h"
+
+/*************************************************************//**
+Initial split nodes info for R-tree split.
+@return initialized split nodes array */
+static
+rtr_split_node_t*
+rtr_page_split_initialize_nodes(
+/*============================*/
+ mem_heap_t* heap, /*!< in: pointer to memory heap, or NULL */
+ btr_cur_t* cursor, /*!< in: cursor at which to insert; when the
+ function returns, the cursor is positioned
+ on the predecessor of the inserted record */
+ offset_t** offsets,/*!< in: offsets on inserted record */
+ const dtuple_t* tuple, /*!< in: tuple to insert */
+ double** buf_pos)/*!< in/out: current buffer position */
+{
+ rtr_split_node_t* split_node_array;
+ double* buf;
+ ulint n_recs;
+ rtr_split_node_t* task;
+ rtr_split_node_t* stop;
+ rtr_split_node_t* cur;
+ rec_t* rec;
+ buf_block_t* block;
+ page_t* page;
+ ulint n_uniq;
+ ulint len;
+ const byte* source_cur;
+
+ block = btr_cur_get_block(cursor);
+ page = buf_block_get_frame(block);
+ n_uniq = dict_index_get_n_unique_in_tree(cursor->index);
+
+ n_recs = page_get_n_recs(page) + 1;
+
+ /*We reserve 2 MBRs memory space for temp result of split
+ algrithm. And plus the new mbr that need to insert, we
+ need (n_recs + 3)*MBR size for storing all MBRs.*/
+ buf = static_cast<double*>(mem_heap_alloc(
+ heap, DATA_MBR_LEN * (n_recs + 3)
+ + sizeof(rtr_split_node_t) * (n_recs + 1)));
+
+ split_node_array = (rtr_split_node_t*)(buf + SPDIMS * 2 * (n_recs + 3));
+ task = split_node_array;
+ *buf_pos = buf;
+ stop = task + n_recs;
+
+ rec = page_rec_get_next(page_get_infimum_rec(page));
+ ut_d(const bool is_leaf = page_is_leaf(page));
+ *offsets = rec_get_offsets(rec, cursor->index, *offsets, is_leaf,
+ n_uniq, &heap);
+
+ source_cur = rec_get_nth_field(rec, *offsets, 0, &len);
+
+ for (cur = task; cur < stop - 1; ++cur) {
+ cur->coords = reserve_coords(buf_pos, SPDIMS);
+ cur->key = rec;
+
+ memcpy(cur->coords, source_cur, DATA_MBR_LEN);
+
+ rec = page_rec_get_next(rec);
+ *offsets = rec_get_offsets(rec, cursor->index, *offsets,
+ is_leaf, n_uniq, &heap);
+ source_cur = rec_get_nth_field(rec, *offsets, 0, &len);
+ }
+
+ /* Put the insert key to node list */
+ source_cur = static_cast<const byte*>(dfield_get_data(
+ dtuple_get_nth_field(tuple, 0)));
+ cur->coords = reserve_coords(buf_pos, SPDIMS);
+ rec = (byte*) mem_heap_alloc(
+ heap, rec_get_converted_size(cursor->index, tuple, 0));
+
+ rec = rec_convert_dtuple_to_rec(rec, cursor->index, tuple, 0);
+ cur->key = rec;
+
+ memcpy(cur->coords, source_cur, DATA_MBR_LEN);
+
+ return split_node_array;
+}
+
+/**********************************************************************//**
+Builds a Rtree node pointer out of a physical record and a page number.
+Note: For Rtree, we just keep the mbr and page no field in non-leaf level
+page. It's different with Btree, Btree still keeps PK fields so far.
+@return own: node pointer */
+dtuple_t*
+rtr_index_build_node_ptr(
+/*=====================*/
+ const dict_index_t* index, /*!< in: index */
+ const rtr_mbr_t* mbr, /*!< in: mbr of lower page */
+ const rec_t* rec, /*!< in: record for which to build node
+ pointer */
+ ulint page_no,/*!< in: page number to put in node
+ pointer */
+ mem_heap_t* heap, /*!< in: memory heap where pointer
+ created */
+ ulint level) /*!< in: level of rec in tree:
+ 0 means leaf level */
+{
+ dtuple_t* tuple;
+ dfield_t* field;
+ byte* buf;
+ ulint n_unique;
+ ulint info_bits;
+
+ ut_ad(dict_index_is_spatial(index));
+
+ n_unique = DICT_INDEX_SPATIAL_NODEPTR_SIZE;
+
+ tuple = dtuple_create(heap, n_unique + 1);
+
+ /* For rtree internal node, we need to compare page number
+ fields. */
+ dtuple_set_n_fields_cmp(tuple, n_unique + 1);
+
+ dict_index_copy_types(tuple, index, n_unique);
+
+ /* Write page no field */
+ buf = static_cast<byte*>(mem_heap_alloc(heap, 4));
+
+ mach_write_to_4(buf, page_no);
+
+ field = dtuple_get_nth_field(tuple, n_unique);
+ dfield_set_data(field, buf, 4);
+
+ dtype_set(dfield_get_type(field), DATA_SYS_CHILD, DATA_NOT_NULL, 4);
+
+ /* Set info bits. */
+ info_bits = rec_get_info_bits(rec, dict_table_is_comp(index->table));
+ dtuple_set_info_bits(tuple, info_bits | REC_STATUS_NODE_PTR);
+
+ /* Set mbr as index entry data */
+ field = dtuple_get_nth_field(tuple, 0);
+
+ buf = static_cast<byte*>(mem_heap_alloc(heap, DATA_MBR_LEN));
+
+ rtr_write_mbr(buf, mbr);
+
+ dfield_set_data(field, buf, DATA_MBR_LEN);
+
+ ut_ad(dtuple_check_typed(tuple));
+
+ return(tuple);
+}
+
+/**************************************************************//**
+In-place update the mbr field of a spatial index row.
+@return true if update is successful */
+static
+bool
+rtr_update_mbr_field_in_place(
+/*==========================*/
+ dict_index_t* index, /*!< in: spatial index. */
+ rec_t* rec, /*!< in/out: rec to be modified.*/
+ offset_t* offsets, /*!< in/out: offsets on rec. */
+ rtr_mbr_t* mbr, /*!< in: the new mbr. */
+ mtr_t* mtr) /*!< in: mtr */
+{
+ void* new_mbr_ptr;
+ double new_mbr[SPDIMS * 2];
+ byte* log_ptr;
+ page_t* page = page_align(rec);
+ ulint len = DATA_MBR_LEN;
+ ulint flags = BTR_NO_UNDO_LOG_FLAG
+ | BTR_NO_LOCKING_FLAG
+ | BTR_KEEP_SYS_FLAG;
+ ulint rec_info;
+
+ rtr_write_mbr(reinterpret_cast<byte*>(&new_mbr), mbr);
+ new_mbr_ptr = static_cast<void*>(new_mbr);
+ /* Otherwise, set the mbr to the new_mbr. */
+ rec_set_nth_field(rec, offsets, 0, new_mbr_ptr, len);
+
+ rec_info = rec_get_info_bits(rec, rec_offs_comp(offsets));
+
+ /* Write redo log. */
+ /* For now, we use LOG_REC_UPDATE_IN_PLACE to log this enlarge.
+ In the future, we may need to add a new log type for this. */
+ log_ptr = mlog_open_and_write_index(mtr, rec, index, page_is_comp(page)
+ ? MLOG_COMP_REC_UPDATE_IN_PLACE
+ : MLOG_REC_UPDATE_IN_PLACE,
+ 1 + DATA_ROLL_PTR_LEN + 14 + 2
+ + MLOG_BUF_MARGIN);
+
+ if (!log_ptr) {
+ /* Logging in mtr is switched off during
+ crash recovery */
+ return(false);
+ }
+
+ /* Flags */
+ mach_write_to_1(log_ptr, flags);
+ log_ptr++;
+ /* TRX_ID Position */
+ log_ptr += mach_write_compressed(log_ptr, 0);
+ /* ROLL_PTR */
+ trx_write_roll_ptr(log_ptr, 0);
+ log_ptr += DATA_ROLL_PTR_LEN;
+ /* TRX_ID */
+ log_ptr += mach_u64_write_compressed(log_ptr, 0);
+
+ /* Offset */
+ mach_write_to_2(log_ptr, page_offset(rec));
+ log_ptr += 2;
+ /* Info bits */
+ mach_write_to_1(log_ptr, rec_info);
+ log_ptr++;
+ /* N fields */
+ log_ptr += mach_write_compressed(log_ptr, 1);
+ /* Field no, len */
+ log_ptr += mach_write_compressed(log_ptr, 0);
+ log_ptr += mach_write_compressed(log_ptr, len);
+ /* Data */
+ memcpy(log_ptr, new_mbr_ptr, len);
+ log_ptr += len;
+
+ mlog_close(mtr, log_ptr);
+
+ return(true);
+}
+
+/**************************************************************//**
+Update the mbr field of a spatial index row.
+@return true if update is successful */
+bool
+rtr_update_mbr_field(
+/*=================*/
+ btr_cur_t* cursor, /*!< in/out: cursor pointed to rec.*/
+ offset_t* offsets, /*!< in/out: offsets on rec. */
+ btr_cur_t* cursor2, /*!< in/out: cursor pointed to rec
+ that should be deleted.
+ this cursor is for btr_compress to
+ delete the merged page's father rec.*/
+ page_t* child_page, /*!< in: child page. */
+ rtr_mbr_t* mbr, /*!< in: the new mbr. */
+ rec_t* new_rec, /*!< in: rec to use */
+ mtr_t* mtr) /*!< in: mtr */
+{
+ dict_index_t* index = cursor->index;
+ mem_heap_t* heap;
+ page_t* page;
+ rec_t* rec;
+ ulint flags = BTR_NO_UNDO_LOG_FLAG
+ | BTR_NO_LOCKING_FLAG
+ | BTR_KEEP_SYS_FLAG;
+ dberr_t err;
+ big_rec_t* dummy_big_rec;
+ buf_block_t* block;
+ rec_t* child_rec;
+ ulint up_match = 0;
+ ulint low_match = 0;
+ ulint child;
+ ulint level;
+ ulint rec_info;
+ page_zip_des_t* page_zip;
+ bool ins_suc = true;
+ ulint cur2_pos = 0;
+ ulint del_page_no = 0;
+ offset_t* offsets2;
+
+ rec = btr_cur_get_rec(cursor);
+ page = page_align(rec);
+
+ rec_info = rec_get_info_bits(rec, rec_offs_comp(offsets));
+
+ heap = mem_heap_create(100);
+ block = btr_cur_get_block(cursor);
+ ut_ad(page == buf_block_get_frame(block));
+ page_zip = buf_block_get_page_zip(block);
+
+ child = btr_node_ptr_get_child_page_no(rec, offsets);
+ level = btr_page_get_level(buf_block_get_frame(block), mtr);
+
+ if (new_rec) {
+ child_rec = new_rec;
+ } else {
+ child_rec = page_rec_get_next(page_get_infimum_rec(child_page));
+ }
+
+ dtuple_t* node_ptr = rtr_index_build_node_ptr(
+ index, mbr, child_rec, child, heap, level);
+
+ /* We need to remember the child page no of cursor2, since page could be
+ reorganized or insert a new rec before it. */
+ if (cursor2) {
+ rec_t* del_rec = btr_cur_get_rec(cursor2);
+ offsets2 = rec_get_offsets(btr_cur_get_rec(cursor2),
+ index, NULL, false,
+ ULINT_UNDEFINED, &heap);
+ del_page_no = btr_node_ptr_get_child_page_no(del_rec, offsets2);
+ cur2_pos = page_rec_get_n_recs_before(btr_cur_get_rec(cursor2));
+ }
+
+ if (rec_info & REC_INFO_MIN_REC_FLAG) {
+ /* When the rec is minimal rec in this level, we do
+ in-place update for avoiding it move to other place. */
+
+ if (page_zip) {
+ /* Check if there's enough space for in-place
+ update the zip page. */
+ if (!btr_cur_update_alloc_zip(
+ page_zip,
+ btr_cur_get_page_cur(cursor),
+ index, offsets,
+ rec_offs_size(offsets),
+ false, mtr)) {
+
+ /* If there's not enought space for
+ inplace update zip page, we do delete
+ insert. */
+ ins_suc = false;
+
+ /* Since btr_cur_update_alloc_zip could
+ reorganize the page, we need to repositon
+ cursor2. */
+ if (cursor2) {
+ cursor2->page_cur.rec =
+ page_rec_get_nth(page,
+ cur2_pos);
+ }
+
+ goto update_mbr;
+ }
+
+ /* Record could be repositioned */
+ rec = btr_cur_get_rec(cursor);
+
+#ifdef UNIV_DEBUG
+ /* Make sure it is still the first record */
+ rec_info = rec_get_info_bits(
+ rec, rec_offs_comp(offsets));
+ ut_ad(rec_info & REC_INFO_MIN_REC_FLAG);
+#endif /* UNIV_DEBUG */
+ }
+
+ if (!rtr_update_mbr_field_in_place(index, rec,
+ offsets, mbr, mtr)) {
+ return(false);
+ }
+
+ if (page_zip) {
+ page_zip_write_rec(page_zip, rec, index, offsets, 0);
+ }
+
+ if (cursor2) {
+ offset_t* offsets2;
+
+ if (page_zip) {
+ cursor2->page_cur.rec
+ = page_rec_get_nth(page, cur2_pos);
+ }
+ offsets2 = rec_get_offsets(btr_cur_get_rec(cursor2),
+ index, NULL, false,
+ ULINT_UNDEFINED, &heap);
+ ut_ad(del_page_no == btr_node_ptr_get_child_page_no(
+ cursor2->page_cur.rec,
+ offsets2));
+
+ page_cur_delete_rec(btr_cur_get_page_cur(cursor2),
+ index, offsets2, mtr);
+ }
+ } else if (page_get_n_recs(page) == 1) {
+ /* When there's only one rec in the page, we do insert/delete to
+ avoid page merge. */
+
+ page_cur_t page_cur;
+ rec_t* insert_rec;
+ offset_t* insert_offsets = NULL;
+ ulint old_pos;
+ rec_t* old_rec;
+
+ ut_ad(cursor2 == NULL);
+
+ /* Insert the new mbr rec. */
+ old_pos = page_rec_get_n_recs_before(rec);
+
+ err = btr_cur_optimistic_insert(
+ flags,
+ cursor, &insert_offsets, &heap,
+ node_ptr, &insert_rec, &dummy_big_rec, 0, NULL, mtr);
+
+ ut_ad(err == DB_SUCCESS);
+
+ btr_cur_position(index, insert_rec, block, cursor);
+
+ /* Delete the old mbr rec. */
+ old_rec = page_rec_get_nth(page, old_pos);
+ ut_ad(old_rec != insert_rec);
+
+ page_cur_position(old_rec, block, &page_cur);
+ offsets2 = rec_get_offsets(old_rec, index, NULL, !level,
+ ULINT_UNDEFINED, &heap);
+ page_cur_delete_rec(&page_cur, index, offsets2, mtr);
+
+ } else {
+update_mbr:
+ /* When there're not only 1 rec in the page, we do delete/insert
+ to avoid page split. */
+ rec_t* insert_rec;
+ offset_t* insert_offsets = NULL;
+ rec_t* next_rec;
+
+ /* Delete the rec which cursor point to. */
+ next_rec = page_rec_get_next(rec);
+ page_cur_delete_rec(btr_cur_get_page_cur(cursor),
+ index, offsets, mtr);
+ if (!ins_suc) {
+ ut_ad(rec_info & REC_INFO_MIN_REC_FLAG);
+
+ btr_set_min_rec_mark(next_rec, mtr);
+ }
+
+ /* If there's more than 1 rec left in the page, delete
+ the rec which cursor2 point to. Otherwise, delete it later.*/
+ if (cursor2 && page_get_n_recs(page) > 1) {
+ ulint cur2_rec_info;
+ rec_t* cur2_rec;
+
+ cur2_rec = cursor2->page_cur.rec;
+ offsets2 = rec_get_offsets(cur2_rec, index, NULL,
+ !level,
+ ULINT_UNDEFINED, &heap);
+
+ cur2_rec_info = rec_get_info_bits(cur2_rec,
+ rec_offs_comp(offsets2));
+ if (cur2_rec_info & REC_INFO_MIN_REC_FLAG) {
+ /* If we delete the leftmost node
+ pointer on a non-leaf level, we must
+ mark the new leftmost node pointer as
+ the predefined minimum record */
+ rec_t* next_rec = page_rec_get_next(cur2_rec);
+ btr_set_min_rec_mark(next_rec, mtr);
+ }
+
+ ut_ad(del_page_no
+ == btr_node_ptr_get_child_page_no(cur2_rec,
+ offsets2));
+ page_cur_delete_rec(btr_cur_get_page_cur(cursor2),
+ index, offsets2, mtr);
+ cursor2 = NULL;
+ }
+
+ /* Insert the new rec. */
+ page_cur_search_with_match(block, index, node_ptr,
+ PAGE_CUR_LE , &up_match, &low_match,
+ btr_cur_get_page_cur(cursor), NULL);
+
+ err = btr_cur_optimistic_insert(flags, cursor, &insert_offsets,
+ &heap, node_ptr, &insert_rec,
+ &dummy_big_rec, 0, NULL, mtr);
+
+ if (!ins_suc && err == DB_SUCCESS) {
+ ins_suc = true;
+ }
+
+ /* If optimistic insert fail, try reorganize the page
+ and insert again. */
+ if (err != DB_SUCCESS && ins_suc) {
+ btr_page_reorganize(btr_cur_get_page_cur(cursor),
+ index, mtr);
+
+ err = btr_cur_optimistic_insert(flags,
+ cursor,
+ &insert_offsets,
+ &heap,
+ node_ptr,
+ &insert_rec,
+ &dummy_big_rec,
+ 0, NULL, mtr);
+
+ /* Will do pessimistic insert */
+ if (err != DB_SUCCESS) {
+ ins_suc = false;
+ }
+ }
+
+ /* Insert succeed, position cursor the inserted rec.*/
+ if (ins_suc) {
+ btr_cur_position(index, insert_rec, block, cursor);
+ offsets = rec_get_offsets(insert_rec,
+ index, offsets, !level,
+ ULINT_UNDEFINED, &heap);
+ }
+
+ /* Delete the rec which cursor2 point to. */
+ if (cursor2) {
+ ulint cur2_pno;
+ rec_t* cur2_rec;
+
+ cursor2->page_cur.rec = page_rec_get_nth(page,
+ cur2_pos);
+
+ cur2_rec = btr_cur_get_rec(cursor2);
+
+ offsets2 = rec_get_offsets(cur2_rec, index, NULL,
+ !level,
+ ULINT_UNDEFINED, &heap);
+
+ /* If the cursor2 position is on a wrong rec, we
+ need to reposition it. */
+ cur2_pno = btr_node_ptr_get_child_page_no(cur2_rec, offsets2);
+ if ((del_page_no != cur2_pno)
+ || (cur2_rec == insert_rec)) {
+ cur2_rec = page_rec_get_next(
+ page_get_infimum_rec(page));
+
+ while (!page_rec_is_supremum(cur2_rec)) {
+ offsets2 = rec_get_offsets(cur2_rec, index,
+ NULL,
+ !level,
+ ULINT_UNDEFINED,
+ &heap);
+ cur2_pno = btr_node_ptr_get_child_page_no(
+ cur2_rec, offsets2);
+ if (cur2_pno == del_page_no) {
+ if (insert_rec != cur2_rec) {
+ cursor2->page_cur.rec =
+ cur2_rec;
+ break;
+ }
+ }
+ cur2_rec = page_rec_get_next(cur2_rec);
+ }
+
+ ut_ad(!page_rec_is_supremum(cur2_rec));
+ }
+
+ rec_info = rec_get_info_bits(cur2_rec,
+ rec_offs_comp(offsets2));
+ if (rec_info & REC_INFO_MIN_REC_FLAG) {
+ /* If we delete the leftmost node
+ pointer on a non-leaf level, we must
+ mark the new leftmost node pointer as
+ the predefined minimum record */
+ rec_t* next_rec = page_rec_get_next(cur2_rec);
+ btr_set_min_rec_mark(next_rec, mtr);
+ }
+
+ ut_ad(cur2_pno == del_page_no && cur2_rec != insert_rec);
+
+ page_cur_delete_rec(btr_cur_get_page_cur(cursor2),
+ index, offsets2, mtr);
+ }
+
+ if (!ins_suc) {
+ mem_heap_t* new_heap = NULL;
+
+ err = btr_cur_pessimistic_insert(
+ flags,
+ cursor, &insert_offsets, &new_heap,
+ node_ptr, &insert_rec, &dummy_big_rec,
+ 0, NULL, mtr);
+
+ ut_ad(err == DB_SUCCESS);
+
+ if (new_heap) {
+ mem_heap_free(new_heap);
+ }
+
+ }
+
+ if (cursor2) {
+ btr_cur_compress_if_useful(cursor, FALSE, mtr);
+ }
+ }
+
+ ut_ad(page_has_prev(page)
+ || (REC_INFO_MIN_REC_FLAG & rec_get_info_bits(
+ page_rec_get_next(page_get_infimum_rec(page)),
+ page_is_comp(page))));
+
+ mem_heap_free(heap);
+
+ return(true);
+}
+
+/**************************************************************//**
+Update parent page's MBR and Predicate lock information during a split */
+static MY_ATTRIBUTE((nonnull))
+void
+rtr_adjust_upper_level(
+/*===================*/
+ btr_cur_t* sea_cur, /*!< in: search cursor */
+ ulint flags, /*!< in: undo logging and
+ locking flags */
+ buf_block_t* block, /*!< in/out: page to be split */
+ buf_block_t* new_block, /*!< in/out: the new half page */
+ rtr_mbr_t* mbr, /*!< in: MBR on the old page */
+ rtr_mbr_t* new_mbr, /*!< in: MBR on the new page */
+ ulint direction, /*!< in: FSP_UP or FSP_DOWN */
+ mtr_t* mtr) /*!< in: mtr */
+{
+ page_t* page;
+ page_t* new_page;
+ ulint page_no;
+ ulint new_page_no;
+ page_zip_des_t* page_zip;
+ page_zip_des_t* new_page_zip;
+ dict_index_t* index = sea_cur->index;
+ btr_cur_t cursor;
+ offset_t* offsets;
+ mem_heap_t* heap;
+ ulint level;
+ dtuple_t* node_ptr_upper;
+ page_cur_t* page_cursor;
+ rtr_mbr_t parent_mbr;
+ lock_prdt_t prdt;
+ lock_prdt_t new_prdt;
+ lock_prdt_t parent_prdt;
+ dberr_t err;
+ big_rec_t* dummy_big_rec;
+ rec_t* rec;
+
+ /* Create a memory heap where the data tuple is stored */
+ heap = mem_heap_create(1024);
+ cursor.init();
+
+ cursor.thr = sea_cur->thr;
+
+ /* Get the level of the split pages */
+ level = btr_page_get_level(buf_block_get_frame(block), mtr);
+ ut_ad(level
+ == btr_page_get_level(buf_block_get_frame(new_block), mtr));
+
+ page = buf_block_get_frame(block);
+ page_no = block->page.id.page_no();
+ page_zip = buf_block_get_page_zip(block);
+
+ new_page = buf_block_get_frame(new_block);
+ new_page_no = new_block->page.id.page_no();
+ new_page_zip = buf_block_get_page_zip(new_block);
+
+ /* Set new mbr for the old page on the upper level. */
+ /* Look up the index for the node pointer to page */
+ offsets = rtr_page_get_father_block(
+ NULL, heap, index, block, mtr, sea_cur, &cursor);
+
+ page_cursor = btr_cur_get_page_cur(&cursor);
+
+ rtr_get_mbr_from_rec(page_cursor->rec, offsets, &parent_mbr);
+
+ rtr_update_mbr_field(&cursor, offsets, NULL, page, mbr, NULL, mtr);
+
+ /* Already updated parent MBR, reset in our path */
+ if (sea_cur->rtr_info) {
+ node_visit_t* node_visit = rtr_get_parent_node(
+ sea_cur, level + 1, true);
+ if (node_visit) {
+ node_visit->mbr_inc = 0;
+ }
+ }
+
+ /* Insert the node for the new page. */
+ node_ptr_upper = rtr_index_build_node_ptr(
+ index, new_mbr,
+ page_rec_get_next(page_get_infimum_rec(new_page)),
+ new_page_no, heap, level);
+
+ ulint up_match = 0;
+ ulint low_match = 0;
+
+ buf_block_t* father_block = btr_cur_get_block(&cursor);
+
+ page_cur_search_with_match(
+ father_block, index, node_ptr_upper,
+ PAGE_CUR_LE , &up_match, &low_match,
+ btr_cur_get_page_cur(&cursor), NULL);
+
+ err = btr_cur_optimistic_insert(
+ flags
+ | BTR_NO_LOCKING_FLAG
+ | BTR_KEEP_SYS_FLAG
+ | BTR_NO_UNDO_LOG_FLAG,
+ &cursor, &offsets, &heap,
+ node_ptr_upper, &rec, &dummy_big_rec, 0, NULL, mtr);
+
+ if (err == DB_FAIL) {
+ cursor.rtr_info = sea_cur->rtr_info;
+ cursor.tree_height = sea_cur->tree_height;
+
+ /* Recreate a memory heap as input parameter for
+ btr_cur_pessimistic_insert(), because the heap may be
+ emptied in btr_cur_pessimistic_insert(). */
+ mem_heap_t* new_heap = mem_heap_create(1024);
+
+ err = btr_cur_pessimistic_insert(flags
+ | BTR_NO_LOCKING_FLAG
+ | BTR_KEEP_SYS_FLAG
+ | BTR_NO_UNDO_LOG_FLAG,
+ &cursor, &offsets, &new_heap,
+ node_ptr_upper, &rec,
+ &dummy_big_rec, 0, NULL, mtr);
+ cursor.rtr_info = NULL;
+ ut_a(err == DB_SUCCESS);
+
+ mem_heap_free(new_heap);
+ }
+
+ prdt.data = static_cast<void*>(mbr);
+ prdt.op = 0;
+ new_prdt.data = static_cast<void*>(new_mbr);
+ new_prdt.op = 0;
+ parent_prdt.data = static_cast<void*>(&parent_mbr);
+ parent_prdt.op = 0;
+
+ lock_prdt_update_parent(block, new_block, &prdt, &new_prdt,
+ &parent_prdt, dict_index_get_space(index),
+ page_cursor->block->page.id.page_no());
+
+ mem_heap_free(heap);
+
+ const uint32_t next_page_no = btr_page_get_next(page);
+
+ if (next_page_no != FIL_NULL) {
+ page_id_t next_page_id(block->page.id.space(),
+ next_page_no);
+
+ buf_block_t* next_block = btr_block_get(
+ next_page_id, dict_table_page_size(index->table),
+ RW_X_LATCH, index, mtr);
+#ifdef UNIV_BTR_DEBUG
+ ut_a(page_is_comp(next_block->frame) == page_is_comp(page));
+ ut_a(btr_page_get_prev(next_block->frame)
+ == block->page.id.page_no());
+#endif /* UNIV_BTR_DEBUG */
+
+ btr_page_set_prev(buf_block_get_frame(next_block),
+ buf_block_get_page_zip(next_block),
+ new_page_no, mtr);
+ }
+
+ btr_page_set_next(page, page_zip, new_page_no, mtr);
+
+ btr_page_set_prev(new_page, new_page_zip, page_no, mtr);
+ btr_page_set_next(new_page, new_page_zip, next_page_no, mtr);
+}
+
+/*************************************************************//**
+Moves record list to another page for rtree splitting.
+
+IMPORTANT: The caller will have to update IBUF_BITMAP_FREE
+if new_block is a compressed leaf page in a secondary index.
+This has to be done either within the same mini-transaction,
+or by invoking ibuf_reset_free_bits() before mtr_commit().
+
+@return TRUE on success; FALSE on compression failure */
+static
+ibool
+rtr_split_page_move_rec_list(
+/*=========================*/
+ rtr_split_node_t* node_array, /*!< in: split node array. */
+ int first_rec_group,/*!< in: group number of the
+ first rec. */
+ buf_block_t* new_block, /*!< in/out: index page
+ where to move */
+ buf_block_t* block, /*!< in/out: page containing
+ split_rec */
+ rec_t* first_rec, /*!< in: first record not to
+ move */
+ dict_index_t* index, /*!< in: record descriptor */
+ mem_heap_t* heap, /*!< in: pointer to memory
+ heap, or NULL */
+ mtr_t* mtr) /*!< in: mtr */
+{
+ rtr_split_node_t* cur_split_node;
+ rtr_split_node_t* end_split_node;
+ page_cur_t page_cursor;
+ page_cur_t new_page_cursor;
+ page_t* page;
+ page_t* new_page;
+ offset_t offsets_[REC_OFFS_NORMAL_SIZE];
+ offset_t* offsets = offsets_;
+ page_zip_des_t* new_page_zip
+ = buf_block_get_page_zip(new_block);
+ rec_t* rec;
+ rec_t* ret;
+ ulint moved = 0;
+ ulint max_to_move = 0;
+ rtr_rec_move_t* rec_move = NULL;
+
+ ut_ad(!dict_index_is_ibuf(index));
+ ut_ad(dict_index_is_spatial(index));
+
+ rec_offs_init(offsets_);
+
+ page_cur_set_before_first(block, &page_cursor);
+ page_cur_set_before_first(new_block, &new_page_cursor);
+
+ page = buf_block_get_frame(block);
+ new_page = buf_block_get_frame(new_block);
+ ret = page_rec_get_prev(page_get_supremum_rec(new_page));
+
+ end_split_node = node_array + page_get_n_recs(page);
+
+ mtr_log_t log_mode = MTR_LOG_NONE;
+
+ if (new_page_zip) {
+ log_mode = mtr_set_log_mode(mtr, MTR_LOG_NONE);
+ }
+
+ max_to_move = page_get_n_recs(
+ buf_block_get_frame(block));
+ rec_move = static_cast<rtr_rec_move_t*>(mem_heap_alloc(
+ heap,
+ sizeof (*rec_move) * max_to_move));
+ const bool is_leaf = page_is_leaf(page);
+
+ /* Insert the recs in group 2 to new page. */
+ for (cur_split_node = node_array;
+ cur_split_node < end_split_node; ++cur_split_node) {
+ if (cur_split_node->n_node != first_rec_group) {
+ lock_rec_store_on_page_infimum(
+ block, cur_split_node->key);
+
+ offsets = rec_get_offsets(cur_split_node->key,
+ index, offsets, is_leaf,
+ ULINT_UNDEFINED, &heap);
+
+ ut_ad(!is_leaf || cur_split_node->key != first_rec);
+
+ rec = page_cur_insert_rec_low(
+ page_cur_get_rec(&new_page_cursor),
+ index,
+ cur_split_node->key,
+ offsets,
+ mtr);
+
+ ut_a(rec);
+
+ lock_rec_restore_from_page_infimum(
+ new_block, rec, block);
+
+ page_cur_move_to_next(&new_page_cursor);
+
+ rec_move[moved].new_rec = rec;
+ rec_move[moved].old_rec = cur_split_node->key;
+ rec_move[moved].moved = false;
+ moved++;
+
+ if (moved > max_to_move) {
+ ut_ad(0);
+ break;
+ }
+ }
+ }
+
+ /* Update PAGE_MAX_TRX_ID on the uncompressed page.
+ Modifications will be redo logged and copied to the compressed
+ page in page_zip_compress() or page_zip_reorganize() below.
+ Multiple transactions cannot simultaneously operate on the
+ same temp-table in parallel.
+ max_trx_id is ignored for temp tables because it not required
+ for MVCC. */
+ if (is_leaf && !dict_table_is_temporary(index->table)) {
+ page_update_max_trx_id(new_block, NULL,
+ page_get_max_trx_id(page),
+ mtr);
+ }
+
+ if (new_page_zip) {
+ mtr_set_log_mode(mtr, log_mode);
+
+ if (!page_zip_compress(new_page_zip, new_page, index,
+ page_zip_level, NULL, mtr)) {
+ ulint ret_pos;
+
+ /* Before trying to reorganize the page,
+ store the number of preceding records on the page. */
+ ret_pos = page_rec_get_n_recs_before(ret);
+ /* Before copying, "ret" was the predecessor
+ of the predefined supremum record. If it was
+ the predefined infimum record, then it would
+ still be the infimum, and we would have
+ ret_pos == 0. */
+
+ if (UNIV_UNLIKELY
+ (!page_zip_reorganize(new_block, index, mtr))) {
+
+ if (UNIV_UNLIKELY
+ (!page_zip_decompress(new_page_zip,
+ new_page, FALSE))) {
+ ut_error;
+ }
+#ifdef UNIV_GIS_DEBUG
+ ut_ad(page_validate(new_page, index));
+#endif
+
+ return(false);
+ }
+
+ /* The page was reorganized: Seek to ret_pos. */
+ ret = page_rec_get_nth(new_page, ret_pos);
+ }
+ }
+
+ /* Update the lock table */
+ lock_rtr_move_rec_list(new_block, block, rec_move, moved);
+
+ /* Delete recs in second group from the old page. */
+ for (cur_split_node = node_array;
+ cur_split_node < end_split_node; ++cur_split_node) {
+ if (cur_split_node->n_node != first_rec_group) {
+ page_cur_position(cur_split_node->key,
+ block, &page_cursor);
+ offsets = rec_get_offsets(
+ page_cur_get_rec(&page_cursor), index,
+ offsets, is_leaf, ULINT_UNDEFINED,
+ &heap);
+ page_cur_delete_rec(&page_cursor,
+ index, offsets, mtr);
+ }
+ }
+
+ return(true);
+}
+
+/*************************************************************//**
+Splits an R-tree index page to halves and inserts the tuple. It is assumed
+that mtr holds an x-latch to the index tree. NOTE: the tree x-latch is
+released within this function! NOTE that the operation of this
+function must always succeed, we cannot reverse it: therefore enough
+free disk space (2 pages) must be guaranteed to be available before
+this function is called.
+@return inserted record */
+rec_t*
+rtr_page_split_and_insert(
+/*======================*/
+ ulint flags, /*!< in: undo logging and locking flags */
+ btr_cur_t* cursor, /*!< in/out: cursor at which to insert; when the
+ function returns, the cursor is positioned
+ on the predecessor of the inserted record */
+ offset_t** offsets,/*!< out: offsets on inserted record */
+ mem_heap_t** heap, /*!< in/out: pointer to memory heap, or NULL */
+ const dtuple_t* tuple, /*!< in: tuple to insert */
+ ulint n_ext, /*!< in: number of externally stored columns */
+ mtr_t* mtr) /*!< in: mtr */
+{
+ buf_block_t* block;
+ page_t* page;
+ page_t* new_page;
+ ulint page_no;
+ byte direction;
+ ulint hint_page_no;
+ buf_block_t* new_block;
+ page_zip_des_t* page_zip;
+ page_zip_des_t* new_page_zip;
+ buf_block_t* insert_block;
+ page_cur_t* page_cursor;
+ rec_t* rec = 0;
+ ulint n_recs;
+ ulint total_data;
+ ulint insert_size;
+ rtr_split_node_t* rtr_split_node_array;
+ rtr_split_node_t* cur_split_node;
+ rtr_split_node_t* end_split_node;
+ double* buf_pos;
+ ulint page_level;
+ node_seq_t current_ssn;
+ node_seq_t next_ssn;
+ buf_block_t* root_block;
+ rtr_mbr_t mbr;
+ rtr_mbr_t new_mbr;
+ lock_prdt_t prdt;
+ lock_prdt_t new_prdt;
+ rec_t* first_rec = NULL;
+ int first_rec_group = 1;
+ ulint n_iterations = 0;
+
+ if (!*heap) {
+ *heap = mem_heap_create(1024);
+ }
+
+func_start:
+ mem_heap_empty(*heap);
+ *offsets = NULL;
+
+ ut_ad(mtr_memo_contains_flagged(mtr, dict_index_get_lock(cursor->index),
+ MTR_MEMO_X_LOCK | MTR_MEMO_SX_LOCK));
+ ut_ad(!dict_index_is_online_ddl(cursor->index)
+ || (flags & BTR_CREATE_FLAG)
+ || dict_index_is_clust(cursor->index));
+ ut_ad(rw_lock_own_flagged(dict_index_get_lock(cursor->index),
+ RW_LOCK_FLAG_X | RW_LOCK_FLAG_SX));
+
+ block = btr_cur_get_block(cursor);
+ page = buf_block_get_frame(block);
+ page_zip = buf_block_get_page_zip(block);
+ page_level = btr_page_get_level(page, mtr);
+ current_ssn = page_get_ssn_id(page);
+
+ ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
+ ut_ad(page_get_n_recs(page) >= 1);
+
+ page_no = block->page.id.page_no();
+
+ if (!page_has_prev(page) && !page_is_leaf(page)) {
+ first_rec = page_rec_get_next(
+ page_get_infimum_rec(buf_block_get_frame(block)));
+ }
+
+ /* Initial split nodes array. */
+ rtr_split_node_array = rtr_page_split_initialize_nodes(
+ *heap, cursor, offsets, tuple, &buf_pos);
+
+ /* Divide all mbrs to two groups. */
+ n_recs = page_get_n_recs(page) + 1;
+
+ end_split_node = rtr_split_node_array + n_recs;
+
+#ifdef UNIV_GIS_DEBUG
+ fprintf(stderr, "Before split a page:\n");
+ for (cur_split_node = rtr_split_node_array;
+ cur_split_node < end_split_node; ++cur_split_node) {
+ for (int i = 0; i < SPDIMS * 2; i++) {
+ fprintf(stderr, "%.2lf ",
+ *(cur_split_node->coords + i));
+ }
+ fprintf(stderr, "\n");
+ }
+#endif
+
+ insert_size = rec_get_converted_size(cursor->index, tuple, n_ext);
+ total_data = page_get_data_size(page) + insert_size;
+ first_rec_group = split_rtree_node(rtr_split_node_array,
+ static_cast<int>(n_recs),
+ static_cast<int>(total_data),
+ static_cast<int>(insert_size),
+ 0, 2, 2, &buf_pos, SPDIMS,
+ static_cast<uchar*>(first_rec));
+
+ /* Allocate a new page to the index */
+ direction = FSP_UP;
+ hint_page_no = page_no + 1;
+ new_block = btr_page_alloc(cursor->index, hint_page_no, direction,
+ page_level, mtr, mtr);
+ new_page_zip = buf_block_get_page_zip(new_block);
+ btr_page_create(new_block, new_page_zip, cursor->index,
+ page_level, mtr);
+
+ new_page = buf_block_get_frame(new_block);
+ ut_ad(page_get_ssn_id(new_page) == 0);
+
+ /* Set new ssn to the new page and page. */
+ page_set_ssn_id(new_block, new_page_zip, current_ssn, mtr);
+ next_ssn = rtr_get_new_ssn_id(cursor->index);
+
+ page_set_ssn_id(block, page_zip, next_ssn, mtr);
+
+ /* Keep recs in first group to the old page, move recs in second
+ groups to the new page. */
+ if (0
+#ifdef UNIV_ZIP_COPY
+ || page_zip
+#endif
+ || !rtr_split_page_move_rec_list(rtr_split_node_array,
+ first_rec_group,
+ new_block, block, first_rec,
+ cursor->index, *heap, mtr)) {
+ ulint n = 0;
+ rec_t* rec;
+ ulint moved = 0;
+ ulint max_to_move = 0;
+ rtr_rec_move_t* rec_move = NULL;
+ ulint pos;
+
+ /* For some reason, compressing new_page failed,
+ even though it should contain fewer records than
+ the original page. Copy the page byte for byte
+ and then delete the records from both pages
+ as appropriate. Deleting will always succeed. */
+ ut_a(new_page_zip);
+
+ page_zip_copy_recs(new_page_zip, new_page,
+ page_zip, page, cursor->index, mtr);
+
+ page_cursor = btr_cur_get_page_cur(cursor);
+
+ /* Move locks on recs. */
+ max_to_move = page_get_n_recs(page);
+ rec_move = static_cast<rtr_rec_move_t*>(mem_heap_alloc(
+ *heap,
+ sizeof (*rec_move) * max_to_move));
+
+ /* Init the rec_move array for moving lock on recs. */
+ for (cur_split_node = rtr_split_node_array;
+ cur_split_node < end_split_node - 1; ++cur_split_node) {
+ if (cur_split_node->n_node != first_rec_group) {
+ pos = page_rec_get_n_recs_before(
+ cur_split_node->key);
+ rec = page_rec_get_nth(new_page, pos);
+ ut_a(rec);
+
+ rec_move[moved].new_rec = rec;
+ rec_move[moved].old_rec = cur_split_node->key;
+ rec_move[moved].moved = false;
+ moved++;
+
+ if (moved > max_to_move) {
+ ut_ad(0);
+ break;
+ }
+ }
+ }
+
+ /* Update the lock table */
+ lock_rtr_move_rec_list(new_block, block, rec_move, moved);
+
+ /* Delete recs in first group from the new page. */
+ for (cur_split_node = rtr_split_node_array;
+ cur_split_node < end_split_node - 1; ++cur_split_node) {
+ if (cur_split_node->n_node == first_rec_group) {
+ ulint pos;
+
+ pos = page_rec_get_n_recs_before(
+ cur_split_node->key);
+ ut_a(pos > 0);
+ rec_t* new_rec = page_rec_get_nth(new_page,
+ pos - n);
+
+ ut_a(new_rec && page_rec_is_user_rec(new_rec));
+ page_cur_position(new_rec, new_block,
+ page_cursor);
+
+ *offsets = rec_get_offsets(
+ page_cur_get_rec(page_cursor),
+ cursor->index, *offsets, !page_level,
+ ULINT_UNDEFINED, heap);
+
+ page_cur_delete_rec(page_cursor,
+ cursor->index, *offsets, mtr);
+ n++;
+ }
+ }
+
+ /* Delete recs in second group from the old page. */
+ for (cur_split_node = rtr_split_node_array;
+ cur_split_node < end_split_node - 1; ++cur_split_node) {
+ if (cur_split_node->n_node != first_rec_group) {
+ page_cur_position(cur_split_node->key,
+ block, page_cursor);
+ *offsets = rec_get_offsets(
+ page_cur_get_rec(page_cursor),
+ cursor->index, *offsets, !page_level,
+ ULINT_UNDEFINED, heap);
+ page_cur_delete_rec(page_cursor,
+ cursor->index, *offsets, mtr);
+ }
+ }
+
+#ifdef UNIV_GIS_DEBUG
+ ut_ad(page_validate(new_page, cursor->index));
+ ut_ad(page_validate(page, cursor->index));
+#endif
+ }
+
+ /* Insert the new rec to the proper page. */
+ cur_split_node = end_split_node - 1;
+ if (cur_split_node->n_node != first_rec_group) {
+ insert_block = new_block;
+ } else {
+ insert_block = block;
+ }
+
+ /* Reposition the cursor for insert and try insertion */
+ page_cursor = btr_cur_get_page_cur(cursor);
+
+ page_cur_search(insert_block, cursor->index, tuple,
+ PAGE_CUR_LE, page_cursor);
+
+ /* It's possible that the new record is too big to be inserted into
+ the page, and it'll need the second round split in this case.
+ We test this scenario here*/
+ DBUG_EXECUTE_IF("rtr_page_need_second_split",
+ if (n_iterations == 0) {
+ rec = NULL;
+ goto after_insert; }
+ );
+
+ rec = page_cur_tuple_insert(page_cursor, tuple, cursor->index,
+ offsets, heap, n_ext, mtr);
+
+ /* If insert did not fit, try page reorganization.
+ For compressed pages, page_cur_tuple_insert() will have
+ attempted this already. */
+ if (rec == NULL) {
+ if (!page_cur_get_page_zip(page_cursor)
+ && btr_page_reorganize(page_cursor, cursor->index, mtr)) {
+ rec = page_cur_tuple_insert(page_cursor, tuple,
+ cursor->index, offsets,
+ heap, n_ext, mtr);
+
+ }
+ /* If insert fail, we will try to split the insert_block
+ again. */
+ }
+
+#ifdef UNIV_DEBUG
+after_insert:
+#endif
+ /* Calculate the mbr on the upper half-page, and the mbr on
+ original page. */
+ rtr_page_cal_mbr(cursor->index, block, &mbr, *heap);
+ rtr_page_cal_mbr(cursor->index, new_block, &new_mbr, *heap);
+ prdt.data = &mbr;
+ new_prdt.data = &new_mbr;
+
+ /* Check any predicate locks need to be moved/copied to the
+ new page */
+ lock_prdt_update_split(block, new_block, &prdt, &new_prdt,
+ dict_index_get_space(cursor->index), page_no);
+
+ /* Adjust the upper level. */
+ rtr_adjust_upper_level(cursor, flags, block, new_block,
+ &mbr, &new_mbr, direction, mtr);
+
+ /* Save the new ssn to the root page, since we need to reinit
+ the first ssn value from it after restart server. */
+
+ root_block = btr_root_block_get(cursor->index, RW_SX_LATCH, mtr);
+
+ page_zip = buf_block_get_page_zip(root_block);
+ page_set_ssn_id(root_block, page_zip, next_ssn, mtr);
+
+ /* Insert fit on the page: update the free bits for the
+ left and right pages in the same mtr */
+
+ if (page_is_leaf(page)) {
+ ibuf_update_free_bits_for_two_pages_low(
+ block, new_block, mtr);
+ }
+
+
+ /* If the new res insert fail, we need to do another split
+ again. */
+ if (!rec) {
+ /* We play safe and reset the free bits for new_page */
+ if (!dict_index_is_clust(cursor->index)
+ && !dict_table_is_temporary(cursor->index->table)) {
+ ibuf_reset_free_bits(new_block);
+ ibuf_reset_free_bits(block);
+ }
+
+ /* We need to clean the parent path here and search father
+ node later, otherwise, it's possible that find a wrong
+ parent. */
+ rtr_clean_rtr_info(cursor->rtr_info, true);
+ cursor->rtr_info = NULL;
+ n_iterations++;
+
+ rec_t* i_rec = page_rec_get_next(page_get_infimum_rec(
+ buf_block_get_frame(block)));
+ btr_cur_position(cursor->index, i_rec, block, cursor);
+
+ goto func_start;
+ }
+
+#ifdef UNIV_GIS_DEBUG
+ ut_ad(page_validate(buf_block_get_frame(block), cursor->index));
+ ut_ad(page_validate(buf_block_get_frame(new_block), cursor->index));
+
+ ut_ad(!rec || rec_offs_validate(rec, cursor->index, *offsets));
+#endif
+ MONITOR_INC(MONITOR_INDEX_SPLIT);
+
+ return(rec);
+}
+
+/****************************************************************//**
+Following the right link to find the proper block for insert.
+@return the proper block.*/
+dberr_t
+rtr_ins_enlarge_mbr(
+/*================*/
+ btr_cur_t* btr_cur, /*!< in: btr cursor */
+ que_thr_t* thr, /*!< in: query thread */
+ mtr_t* mtr) /*!< in: mtr */
+{
+ dberr_t err = DB_SUCCESS;
+ rtr_mbr_t new_mbr;
+ buf_block_t* block;
+ mem_heap_t* heap;
+ dict_index_t* index = btr_cur->index;
+ page_cur_t* page_cursor;
+ offset_t* offsets;
+ node_visit_t* node_visit;
+ btr_cur_t cursor;
+ page_t* page;
+
+ ut_ad(dict_index_is_spatial(index));
+
+ /* If no rtr_info or rtree is one level tree, return. */
+ if (!btr_cur->rtr_info || btr_cur->tree_height == 1) {
+ return(err);
+ }
+
+ /* Check path info is not empty. */
+ ut_ad(!btr_cur->rtr_info->parent_path->empty());
+
+ /* Create a memory heap. */
+ heap = mem_heap_create(1024);
+
+ /* Leaf level page is stored in cursor */
+ page_cursor = btr_cur_get_page_cur(btr_cur);
+ block = page_cur_get_block(page_cursor);
+
+ for (ulint i = 1; i < btr_cur->tree_height; i++) {
+ node_visit = rtr_get_parent_node(btr_cur, i, true);
+ ut_ad(node_visit != NULL);
+
+ /* If there's no mbr enlarge, return.*/
+ if (node_visit->mbr_inc == 0) {
+ block = btr_pcur_get_block(node_visit->cursor);
+ continue;
+ }
+
+ /* Calculate the mbr of the child page. */
+ rtr_page_cal_mbr(index, block, &new_mbr, heap);
+
+ /* Get father block. */
+ cursor.init();
+ offsets = rtr_page_get_father_block(
+ NULL, heap, index, block, mtr, btr_cur, &cursor);
+
+ page = buf_block_get_frame(block);
+
+ /* Update the mbr field of the rec. */
+ if (!rtr_update_mbr_field(&cursor, offsets, NULL, page,
+ &new_mbr, NULL, mtr)) {
+ err = DB_ERROR;
+ break;
+ }
+
+ page_cursor = btr_cur_get_page_cur(&cursor);
+ block = page_cur_get_block(page_cursor);
+ }
+
+ mem_heap_free(heap);
+
+ return(err);
+}
+
+/*************************************************************//**
+Copy recs from a page to new_block of rtree.
+Differs from page_copy_rec_list_end, because this function does not
+touch the lock table and max trx id on page or compress the page.
+
+IMPORTANT: The caller will have to update IBUF_BITMAP_FREE
+if new_block is a compressed leaf page in a secondary index.
+This has to be done either within the same mini-transaction,
+or by invoking ibuf_reset_free_bits() before mtr_commit(). */
+void
+rtr_page_copy_rec_list_end_no_locks(
+/*================================*/
+ buf_block_t* new_block, /*!< in: index page to copy to */
+ buf_block_t* block, /*!< in: index page of rec */
+ rec_t* rec, /*!< in: record on page */
+ dict_index_t* index, /*!< in: record descriptor */
+ mem_heap_t* heap, /*!< in/out: heap memory */
+ rtr_rec_move_t* rec_move, /*!< in: recording records moved */
+ ulint max_move, /*!< in: num of rec to move */
+ ulint* num_moved, /*!< out: num of rec to move */
+ mtr_t* mtr) /*!< in: mtr */
+{
+ page_t* new_page = buf_block_get_frame(new_block);
+ page_cur_t page_cur;
+ page_cur_t cur1;
+ rec_t* cur_rec;
+ offset_t offsets_1[REC_OFFS_NORMAL_SIZE];
+ offset_t* offsets1 = offsets_1;
+ offset_t offsets_2[REC_OFFS_NORMAL_SIZE];
+ offset_t* offsets2 = offsets_2;
+ ulint moved = 0;
+ bool is_leaf = page_is_leaf(new_page);
+
+ rec_offs_init(offsets_1);
+ rec_offs_init(offsets_2);
+
+ page_cur_position(rec, block, &cur1);
+
+ if (page_cur_is_before_first(&cur1)) {
+ page_cur_move_to_next(&cur1);
+ }
+
+ btr_assert_not_corrupted(new_block, index);
+ ut_a(page_is_comp(new_page) == page_rec_is_comp(rec));
+ ut_a(mach_read_from_2(new_page + UNIV_PAGE_SIZE - 10) == (ulint)
+ (page_is_comp(new_page) ? PAGE_NEW_INFIMUM : PAGE_OLD_INFIMUM));
+
+ cur_rec = page_rec_get_next(
+ page_get_infimum_rec(buf_block_get_frame(new_block)));
+ page_cur_position(cur_rec, new_block, &page_cur);
+
+ /* Copy records from the original page to the new page */
+ while (!page_cur_is_after_last(&cur1)) {
+ rec_t* cur1_rec = page_cur_get_rec(&cur1);
+ rec_t* ins_rec;
+
+ if (page_rec_is_infimum(cur_rec)) {
+ cur_rec = page_rec_get_next(cur_rec);
+ }
+
+ offsets1 = rec_get_offsets(cur1_rec, index, offsets1, is_leaf,
+ ULINT_UNDEFINED, &heap);
+ while (!page_rec_is_supremum(cur_rec)) {
+ ulint cur_matched_fields = 0;
+ int cmp;
+
+ offsets2 = rec_get_offsets(cur_rec, index, offsets2,
+ is_leaf,
+ ULINT_UNDEFINED, &heap);
+ cmp = cmp_rec_rec(cur1_rec, cur_rec,
+ offsets1, offsets2, index, false,
+ &cur_matched_fields);
+ if (cmp < 0) {
+ page_cur_move_to_prev(&page_cur);
+ break;
+ } else if (cmp > 0) {
+ /* Skip small recs. */
+ page_cur_move_to_next(&page_cur);
+ cur_rec = page_cur_get_rec(&page_cur);
+ } else if (is_leaf) {
+ if (rec_get_deleted_flag(cur1_rec,
+ dict_table_is_comp(index->table))) {
+ goto next;
+ } else {
+ /* We have two identical leaf records,
+ skip copying the undeleted one, and
+ unmark deleted on the current page */
+ btr_rec_set_deleted_flag(
+ cur_rec, NULL, FALSE);
+ goto next;
+ }
+ }
+ }
+
+ /* If position is on suprenum rec, need to move to
+ previous rec. */
+ if (page_rec_is_supremum(cur_rec)) {
+ page_cur_move_to_prev(&page_cur);
+ }
+
+ cur_rec = page_cur_get_rec(&page_cur);
+
+ offsets1 = rec_get_offsets(cur1_rec, index, offsets1, is_leaf,
+ ULINT_UNDEFINED, &heap);
+
+ ins_rec = page_cur_insert_rec_low(cur_rec, index,
+ cur1_rec, offsets1, mtr);
+ if (UNIV_UNLIKELY(!ins_rec)) {
+ fprintf(stderr, "page number %ld and %ld\n",
+ (long)new_block->page.id.page_no(),
+ (long)block->page.id.page_no());
+
+ ib::fatal() << "rec offset " << page_offset(rec)
+ << ", cur1 offset "
+ << page_offset(page_cur_get_rec(&cur1))
+ << ", cur_rec offset "
+ << page_offset(cur_rec);
+ }
+
+ rec_move[moved].new_rec = ins_rec;
+ rec_move[moved].old_rec = cur1_rec;
+ rec_move[moved].moved = false;
+ moved++;
+next:
+ if (moved > max_move) {
+ ut_ad(0);
+ break;
+ }
+
+ page_cur_move_to_next(&cur1);
+ }
+
+ *num_moved = moved;
+}
+
+/*************************************************************//**
+Copy recs till a specified rec from a page to new_block of rtree. */
+void
+rtr_page_copy_rec_list_start_no_locks(
+/*==================================*/
+ buf_block_t* new_block, /*!< in: index page to copy to */
+ buf_block_t* block, /*!< in: index page of rec */
+ rec_t* rec, /*!< in: record on page */
+ dict_index_t* index, /*!< in: record descriptor */
+ mem_heap_t* heap, /*!< in/out: heap memory */
+ rtr_rec_move_t* rec_move, /*!< in: recording records moved */
+ ulint max_move, /*!< in: num of rec to move */
+ ulint* num_moved, /*!< out: num of rec to move */
+ mtr_t* mtr) /*!< in: mtr */
+{
+ page_cur_t cur1;
+ rec_t* cur_rec;
+ offset_t offsets_1[REC_OFFS_NORMAL_SIZE];
+ offset_t* offsets1 = offsets_1;
+ offset_t offsets_2[REC_OFFS_NORMAL_SIZE];
+ offset_t* offsets2 = offsets_2;
+ page_cur_t page_cur;
+ ulint moved = 0;
+ bool is_leaf = page_is_leaf(buf_block_get_frame(block));
+
+ rec_offs_init(offsets_1);
+ rec_offs_init(offsets_2);
+
+ page_cur_set_before_first(block, &cur1);
+ page_cur_move_to_next(&cur1);
+
+ cur_rec = page_rec_get_next(
+ page_get_infimum_rec(buf_block_get_frame(new_block)));
+ page_cur_position(cur_rec, new_block, &page_cur);
+
+ while (page_cur_get_rec(&cur1) != rec) {
+ rec_t* cur1_rec = page_cur_get_rec(&cur1);
+ rec_t* ins_rec;
+
+ if (page_rec_is_infimum(cur_rec)) {
+ cur_rec = page_rec_get_next(cur_rec);
+ }
+
+ offsets1 = rec_get_offsets(cur1_rec, index, offsets1, is_leaf,
+ ULINT_UNDEFINED, &heap);
+
+ while (!page_rec_is_supremum(cur_rec)) {
+ ulint cur_matched_fields = 0;
+
+ offsets2 = rec_get_offsets(cur_rec, index, offsets2,
+ is_leaf,
+ ULINT_UNDEFINED, &heap);
+ int cmp = cmp_rec_rec(cur1_rec, cur_rec,
+ offsets1, offsets2, index, false,
+ &cur_matched_fields);
+ if (cmp < 0) {
+ page_cur_move_to_prev(&page_cur);
+ cur_rec = page_cur_get_rec(&page_cur);
+ break;
+ } else if (cmp > 0) {
+ /* Skip small recs. */
+ page_cur_move_to_next(&page_cur);
+ cur_rec = page_cur_get_rec(&page_cur);
+ } else if (is_leaf) {
+ if (rec_get_deleted_flag(
+ cur1_rec,
+ dict_table_is_comp(index->table))) {
+ goto next;
+ } else {
+ /* We have two identical leaf records,
+ skip copying the undeleted one, and
+ unmark deleted on the current page */
+ btr_rec_set_deleted_flag(
+ cur_rec, NULL, FALSE);
+ goto next;
+ }
+ }
+ }
+
+ /* If position is on suprenum rec, need to move to
+ previous rec. */
+ if (page_rec_is_supremum(cur_rec)) {
+ page_cur_move_to_prev(&page_cur);
+ }
+
+ cur_rec = page_cur_get_rec(&page_cur);
+
+ offsets1 = rec_get_offsets(cur1_rec, index, offsets1, is_leaf,
+ ULINT_UNDEFINED, &heap);
+
+ ins_rec = page_cur_insert_rec_low(cur_rec, index,
+ cur1_rec, offsets1, mtr);
+ if (UNIV_UNLIKELY(!ins_rec)) {
+ fprintf(stderr, "page number %ld and %ld\n",
+ (long)new_block->page.id.page_no(),
+ (long)block->page.id.page_no());
+
+ ib::fatal() << "rec offset " << page_offset(rec)
+ << ", cur1 offset "
+ << page_offset(page_cur_get_rec(&cur1))
+ << ", cur_rec offset "
+ << page_offset(cur_rec);
+ }
+
+ rec_move[moved].new_rec = ins_rec;
+ rec_move[moved].old_rec = cur1_rec;
+ rec_move[moved].moved = false;
+ moved++;
+next:
+ if (moved > max_move) {
+ ut_ad(0);
+ break;
+ }
+
+ page_cur_move_to_next(&cur1);
+ }
+
+ *num_moved = moved;
+}
+
+/****************************************************************//**
+Check two MBRs are identical or need to be merged */
+bool
+rtr_merge_mbr_changed(
+/*==================*/
+ btr_cur_t* cursor, /*!< in/out: cursor */
+ btr_cur_t* cursor2, /*!< in: the other cursor */
+ offset_t* offsets, /*!< in: rec offsets */
+ offset_t* offsets2, /*!< in: rec offsets */
+ rtr_mbr_t* new_mbr, /*!< out: MBR to update */
+ buf_block_t* merge_block, /*!< in: page to merge */
+ buf_block_t* block, /*!< in: page be merged */
+ dict_index_t* index) /*!< in: index */
+{
+ double* mbr;
+ double mbr1[SPDIMS * 2];
+ double mbr2[SPDIMS * 2];
+ rec_t* rec;
+ ulint len;
+ bool changed = false;
+
+ ut_ad(dict_index_is_spatial(cursor->index));
+
+ rec = btr_cur_get_rec(cursor);
+
+ rtr_read_mbr(rec_get_nth_field(rec, offsets, 0, &len),
+ reinterpret_cast<rtr_mbr_t*>(mbr1));
+
+ rec = btr_cur_get_rec(cursor2);
+
+ rtr_read_mbr(rec_get_nth_field(rec, offsets2, 0, &len),
+ reinterpret_cast<rtr_mbr_t*>(mbr2));
+
+ mbr = reinterpret_cast<double*>(new_mbr);
+
+ for (int i = 0; i < SPDIMS * 2; i += 2) {
+ changed = (changed || mbr1[i] != mbr2[i]);
+ *mbr = mbr1[i] < mbr2[i] ? mbr1[i] : mbr2[i];
+ mbr++;
+ changed = (changed || mbr1[i + 1] != mbr2 [i + 1]);
+ *mbr = mbr1[i + 1] > mbr2[i + 1] ? mbr1[i + 1] : mbr2[i + 1];
+ mbr++;
+ }
+
+ return(changed);
+}
+
+/****************************************************************//**
+Merge 2 mbrs and update the the mbr that cursor is on. */
+dberr_t
+rtr_merge_and_update_mbr(
+/*=====================*/
+ btr_cur_t* cursor, /*!< in/out: cursor */
+ btr_cur_t* cursor2, /*!< in: the other cursor */
+ offset_t* offsets, /*!< in: rec offsets */
+ offset_t* offsets2, /*!< in: rec offsets */
+ page_t* child_page, /*!< in: the page. */
+ buf_block_t* merge_block, /*!< in: page to merge */
+ buf_block_t* block, /*!< in: page be merged */
+ dict_index_t* index, /*!< in: index */
+ mtr_t* mtr) /*!< in: mtr */
+{
+ dberr_t err = DB_SUCCESS;
+ rtr_mbr_t new_mbr;
+ bool changed = false;
+
+ ut_ad(dict_index_is_spatial(cursor->index));
+
+ changed = rtr_merge_mbr_changed(cursor, cursor2, offsets, offsets2,
+ &new_mbr, merge_block,
+ block, index);
+
+ /* Update the mbr field of the rec. And will delete the record
+ pointed by cursor2 */
+ if (changed) {
+ if (!rtr_update_mbr_field(cursor, offsets, cursor2, child_page,
+ &new_mbr, NULL, mtr)) {
+ err = DB_ERROR;
+ }
+ } else {
+ rtr_node_ptr_delete(cursor2->index, cursor2, block, mtr);
+ }
+
+ return(err);
+}
+
+/*************************************************************//**
+Deletes on the upper level the node pointer to a page. */
+void
+rtr_node_ptr_delete(
+/*================*/
+ dict_index_t* index, /*!< in: index tree */
+ btr_cur_t* cursor, /*!< in: search cursor, contains information
+ about parent nodes in search */
+ buf_block_t* block, /*!< in: page whose node pointer is deleted */
+ mtr_t* mtr) /*!< in: mtr */
+{
+ ibool compressed;
+ dberr_t err;
+
+ compressed = btr_cur_pessimistic_delete(&err, TRUE, cursor,
+ BTR_CREATE_FLAG, false, mtr);
+ ut_a(err == DB_SUCCESS);
+
+ if (!compressed) {
+ btr_cur_compress_if_useful(cursor, FALSE, mtr);
+ }
+}
+
+/**************************************************************//**
+Check whether a Rtree page is child of a parent page
+@return true if there is child/parent relationship */
+bool
+rtr_check_same_block(
+/*================*/
+ dict_index_t* index, /*!< in: index tree */
+ btr_cur_t* cursor, /*!< in/out: position at the parent entry
+ pointing to the child if successful */
+ buf_block_t* parentb,/*!< in: parent page to check */
+ buf_block_t* childb, /*!< in: child Page */
+ mem_heap_t* heap) /*!< in: memory heap */
+
+{
+ ulint page_no = childb->page.id.page_no();
+ offset_t* offsets;
+ rec_t* rec = page_rec_get_next(page_get_infimum_rec(
+ buf_block_get_frame(parentb)));
+
+ while (!page_rec_is_supremum(rec)) {
+ offsets = rec_get_offsets(
+ rec, index, NULL, false, ULINT_UNDEFINED, &heap);
+
+ if (btr_node_ptr_get_child_page_no(rec, offsets) == page_no) {
+ btr_cur_position(index, rec, parentb, cursor);
+ return(true);
+ }
+
+ rec = page_rec_get_next(rec);
+ }
+
+ return(false);
+}
+
+/****************************************************************//**
+Calculate the area increased for a new record
+@return area increased */
+double
+rtr_rec_cal_increase(
+/*=================*/
+ const dtuple_t* dtuple, /*!< in: data tuple to insert, which
+ cause area increase */
+ const rec_t* rec, /*!< in: physical record which differs from
+ dtuple in some of the common fields, or which
+ has an equal number or more fields than
+ dtuple */
+ const offset_t* offsets,/*!< in: array returned by rec_get_offsets() */
+ double* area) /*!< out: increased area */
+{
+ const dfield_t* dtuple_field;
+ ulint dtuple_f_len;
+ ulint rec_f_len;
+ const byte* rec_b_ptr;
+ double ret = 0;
+
+ ut_ad(!page_rec_is_supremum(rec));
+ ut_ad(!page_rec_is_infimum(rec));
+
+ dtuple_field = dtuple_get_nth_field(dtuple, 0);
+ dtuple_f_len = dfield_get_len(dtuple_field);
+
+ rec_b_ptr = rec_get_nth_field(rec, offsets, 0, &rec_f_len);
+ ret = rtree_area_increase(
+ rec_b_ptr,
+ static_cast<const byte*>(dfield_get_data(dtuple_field)),
+ static_cast<int>(dtuple_f_len), area);
+
+ return(ret);
+}
+
+/** Estimates the number of rows in a given area.
+@param[in] index index
+@param[in] tuple range tuple containing mbr, may also be empty tuple
+@param[in] mode search mode
+@return estimated number of rows */
+int64_t
+rtr_estimate_n_rows_in_range(
+ dict_index_t* index,
+ const dtuple_t* tuple,
+ page_cur_mode_t mode)
+{
+ /* Check tuple & mode */
+ if (tuple->n_fields == 0) {
+ return(HA_POS_ERROR);
+ }
+
+ switch (mode) {
+ case PAGE_CUR_DISJOINT:
+ case PAGE_CUR_CONTAIN:
+ case PAGE_CUR_INTERSECT:
+ case PAGE_CUR_WITHIN:
+ case PAGE_CUR_MBR_EQUAL:
+ break;
+ default:
+ return(HA_POS_ERROR);
+ }
+
+ DBUG_EXECUTE_IF("rtr_pcur_move_to_next_return",
+ return(2);
+ );
+
+ /* Read mbr from tuple. */
+ const dfield_t* dtuple_field;
+ ulint dtuple_f_len MY_ATTRIBUTE((unused));
+ rtr_mbr_t range_mbr;
+ double range_area;
+
+ dtuple_field = dtuple_get_nth_field(tuple, 0);
+ dtuple_f_len = dfield_get_len(dtuple_field);
+ const byte* range_mbr_ptr = static_cast<const byte*>(
+ dfield_get_data(dtuple_field));
+
+ ut_ad(dtuple_f_len >= DATA_MBR_LEN);
+ rtr_read_mbr(range_mbr_ptr, &range_mbr);
+ range_area = (range_mbr.xmax - range_mbr.xmin)
+ * (range_mbr.ymax - range_mbr.ymin);
+
+ /* Get index root page. */
+ page_size_t page_size(dict_table_page_size(index->table));
+ page_id_t page_id(dict_index_get_space(index),
+ dict_index_get_page(index));
+ mtr_t mtr;
+ buf_block_t* block;
+ page_t* page;
+ ulint n_recs;
+
+ mtr_start(&mtr);
+ mtr.set_named_space(dict_index_get_space(index));
+ mtr_s_lock(dict_index_get_lock(index), &mtr);
+
+ block = btr_block_get(page_id, page_size, RW_S_LATCH, index, &mtr);
+ page = buf_block_get_frame(block);
+ n_recs = page_header_get_field(page, PAGE_N_RECS);
+
+ if (n_recs == 0) {
+ mtr_commit(&mtr);
+ return(HA_POS_ERROR);
+ }
+
+ rec_t* rec;
+ byte* field;
+ ulint len;
+ offset_t* offsets = NULL;
+ mem_heap_t* heap;
+
+ heap = mem_heap_create(512);
+ rec = page_rec_get_next(page_get_infimum_rec(page));
+ offsets = rec_get_offsets(rec, index, offsets, page_rec_is_leaf(rec),
+ ULINT_UNDEFINED, &heap);
+
+ /* Scan records in root page and calculate area. */
+ double area = 0;
+ while (!page_rec_is_supremum(rec)) {
+ rtr_mbr_t mbr;
+ double rec_area;
+
+ field = rec_get_nth_field(rec, offsets, 0, &len);
+ ut_ad(len == DATA_MBR_LEN);
+
+ rtr_read_mbr(field, &mbr);
+
+ rec_area = (mbr.xmax - mbr.xmin) * (mbr.ymax - mbr.ymin);
+
+ if (rec_area == 0) {
+ switch (mode) {
+ case PAGE_CUR_CONTAIN:
+ case PAGE_CUR_INTERSECT:
+ area += 1;
+ break;
+
+ case PAGE_CUR_DISJOINT:
+ break;
+
+ case PAGE_CUR_WITHIN:
+ case PAGE_CUR_MBR_EQUAL:
+ if (rtree_key_cmp(
+ PAGE_CUR_WITHIN, range_mbr_ptr,
+ DATA_MBR_LEN, field, DATA_MBR_LEN)
+ == 0) {
+ area += 1;
+ }
+
+ break;
+
+ default:
+ ut_error;
+ }
+ } else {
+ switch (mode) {
+ case PAGE_CUR_CONTAIN:
+ case PAGE_CUR_INTERSECT:
+ area += rtree_area_overlapping(range_mbr_ptr,
+ field, DATA_MBR_LEN) / rec_area;
+ break;
+
+ case PAGE_CUR_DISJOINT:
+ area += 1;
+ area -= rtree_area_overlapping(range_mbr_ptr,
+ field, DATA_MBR_LEN) / rec_area;
+ break;
+
+ case PAGE_CUR_WITHIN:
+ case PAGE_CUR_MBR_EQUAL:
+ if (rtree_key_cmp(
+ PAGE_CUR_WITHIN, range_mbr_ptr,
+ DATA_MBR_LEN, field, DATA_MBR_LEN)
+ == 0) {
+ area += range_area / rec_area;
+ }
+
+ break;
+ default:
+ ut_error;
+ }
+ }
+
+ rec = page_rec_get_next(rec);
+ }
+
+ mtr_commit(&mtr);
+ mem_heap_free(heap);
+
+ if (!std::isfinite(area)) {
+ return(HA_POS_ERROR);
+ }
+
+ return(static_cast<int64_t>(dict_table_get_n_rows(index->table)
+ * area / n_recs));
+}
diff --git a/storage/innobase/gis/gis0sea.cc b/storage/innobase/gis/gis0sea.cc
new file mode 100644
index 00000000000..7ac529ed0db
--- /dev/null
+++ b/storage/innobase/gis/gis0sea.cc
@@ -0,0 +1,2017 @@
+/*****************************************************************************
+
+Copyright (c) 2016, 2018, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2017, 2018, MariaDB Corporation.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file gis/gis0sea.cc
+InnoDB R-tree search interfaces
+
+Created 2014/01/16 Jimmy Yang
+***********************************************************************/
+
+#include "fsp0fsp.h"
+#include "page0page.h"
+#include "page0cur.h"
+#include "page0zip.h"
+#include "gis0rtree.h"
+#include "btr0cur.h"
+#include "btr0sea.h"
+#include "btr0pcur.h"
+#include "rem0cmp.h"
+#include "lock0lock.h"
+#include "ibuf0ibuf.h"
+#include "trx0trx.h"
+#include "srv0mon.h"
+#include "que0que.h"
+#include "gis0geo.h"
+
+/** Restore the stored position of a persistent cursor bufferfixing the page */
+static
+bool
+rtr_cur_restore_position(
+ ulint latch_mode, /*!< in: BTR_SEARCH_LEAF, ... */
+ btr_cur_t* cursor, /*!< in: detached persistent cursor */
+ ulint level, /*!< in: index level */
+ mtr_t* mtr); /*!< in: mtr */
+
+/*************************************************************//**
+Pop out used parent path entry, until we find the parent with matching
+page number */
+static
+void
+rtr_adjust_parent_path(
+/*===================*/
+ rtr_info_t* rtr_info, /* R-Tree info struct */
+ ulint page_no) /* page number to look for */
+{
+ while (!rtr_info->parent_path->empty()) {
+ if (rtr_info->parent_path->back().child_no == page_no) {
+ break;
+ } else {
+ if (rtr_info->parent_path->back().cursor) {
+ btr_pcur_close(
+ rtr_info->parent_path->back().cursor);
+ ut_free(rtr_info->parent_path->back().cursor);
+ }
+
+ rtr_info->parent_path->pop_back();
+ }
+ }
+}
+
+/*************************************************************//**
+Find the next matching record. This function is used by search
+or record locating during index delete/update.
+@return true if there is suitable record found, otherwise false */
+static
+bool
+rtr_pcur_getnext_from_path(
+/*=======================*/
+ const dtuple_t* tuple, /*!< in: data tuple */
+ page_cur_mode_t mode, /*!< in: cursor search mode */
+ btr_cur_t* btr_cur,/*!< in: persistent cursor; NOTE that the
+ function may release the page latch */
+ ulint target_level,
+ /*!< in: target level */
+ ulint latch_mode,
+ /*!< in: latch_mode */
+ bool index_locked,
+ /*!< in: index tree locked */
+ mtr_t* mtr) /*!< in: mtr */
+{
+ dict_index_t* index = btr_cur->index;
+ bool found = false;
+ ulint space = dict_index_get_space(index);
+ page_cur_t* page_cursor;
+ ulint level = 0;
+ node_visit_t next_rec;
+ rtr_info_t* rtr_info = btr_cur->rtr_info;
+ node_seq_t page_ssn;
+ ulint my_latch_mode;
+ ulint skip_parent = false;
+ bool new_split = false;
+ bool need_parent;
+ bool for_delete = false;
+ bool for_undo_ins = false;
+
+ /* exhausted all the pages to be searched */
+ if (rtr_info->path->empty()) {
+ return(false);
+ }
+
+ ut_ad(dtuple_get_n_fields_cmp(tuple));
+
+ my_latch_mode = BTR_LATCH_MODE_WITHOUT_FLAGS(latch_mode);
+
+ for_delete = latch_mode & BTR_RTREE_DELETE_MARK;
+ for_undo_ins = latch_mode & BTR_RTREE_UNDO_INS;
+
+ /* There should be no insert coming to this function. Only
+ mode with BTR_MODIFY_* should be delete */
+ ut_ad(mode != PAGE_CUR_RTREE_INSERT);
+ ut_ad(my_latch_mode == BTR_SEARCH_LEAF
+ || my_latch_mode == BTR_MODIFY_LEAF
+ || my_latch_mode == BTR_MODIFY_TREE
+ || my_latch_mode == BTR_CONT_MODIFY_TREE);
+
+ /* Whether need to track parent information. Only need so
+ when we do tree altering operations (such as index page merge) */
+ need_parent = ((my_latch_mode == BTR_MODIFY_TREE
+ || my_latch_mode == BTR_CONT_MODIFY_TREE)
+ && mode == PAGE_CUR_RTREE_LOCATE);
+
+ if (!index_locked) {
+ ut_ad(latch_mode & BTR_SEARCH_LEAF
+ || latch_mode & BTR_MODIFY_LEAF);
+ mtr_s_lock(dict_index_get_lock(index), mtr);
+ } else {
+ ut_ad(mtr_memo_contains_flagged(mtr, &index->lock,
+ MTR_MEMO_SX_LOCK
+ | MTR_MEMO_S_LOCK
+ | MTR_MEMO_X_LOCK));
+ }
+
+ const page_size_t& page_size = dict_table_page_size(index->table);
+
+ /* Pop each node/page to be searched from "path" structure
+ and do a search on it. Please note, any pages that are in
+ the "path" structure are protected by "page" lock, so tey
+ cannot be shrunk away */
+ do {
+ buf_block_t* block;
+ node_seq_t path_ssn;
+ const page_t* page;
+ ulint rw_latch = RW_X_LATCH;
+ ulint tree_idx;
+
+ mutex_enter(&rtr_info->rtr_path_mutex);
+ next_rec = rtr_info->path->back();
+ rtr_info->path->pop_back();
+ level = next_rec.level;
+ path_ssn = next_rec.seq_no;
+ tree_idx = btr_cur->tree_height - level - 1;
+
+ /* Maintain the parent path info as well, if needed */
+ if (need_parent && !skip_parent && !new_split) {
+ ulint old_level;
+ ulint new_level;
+
+ ut_ad(!rtr_info->parent_path->empty());
+
+ /* Cleanup unused parent info */
+ if (rtr_info->parent_path->back().cursor) {
+ btr_pcur_close(
+ rtr_info->parent_path->back().cursor);
+ ut_free(rtr_info->parent_path->back().cursor);
+ }
+
+ old_level = rtr_info->parent_path->back().level;
+
+ rtr_info->parent_path->pop_back();
+
+ ut_ad(!rtr_info->parent_path->empty());
+
+ /* check whether there is a level change. If so,
+ the current parent path needs to pop enough
+ nodes to adjust to the new search page */
+ new_level = rtr_info->parent_path->back().level;
+
+ if (old_level < new_level) {
+ rtr_adjust_parent_path(
+ rtr_info, next_rec.page_no);
+ }
+
+ ut_ad(!rtr_info->parent_path->empty());
+
+ ut_ad(next_rec.page_no
+ == rtr_info->parent_path->back().child_no);
+ }
+
+ mutex_exit(&rtr_info->rtr_path_mutex);
+
+ skip_parent = false;
+ new_split = false;
+
+ /* Once we have pages in "path", these pages are
+ predicate page locked, so they can't be shrunk away.
+ They also have SSN (split sequence number) to detect
+ splits, so we can directly latch single page while
+ getting them. They can be unlatched if not qualified.
+ One reason for pre-latch is that we might need to position
+ some parent position (requires latch) during search */
+ if (level == 0) {
+ /* S latched for SEARCH_LEAF, and X latched
+ for MODIFY_LEAF */
+ if (my_latch_mode <= BTR_MODIFY_LEAF) {
+ rw_latch = my_latch_mode;
+ }
+
+ if (my_latch_mode == BTR_CONT_MODIFY_TREE
+ || my_latch_mode == BTR_MODIFY_TREE) {
+ rw_latch = RW_NO_LATCH;
+ }
+
+ } else if (level == target_level) {
+ rw_latch = RW_X_LATCH;
+ }
+
+ /* Release previous locked blocks */
+ if (my_latch_mode != BTR_SEARCH_LEAF) {
+ for (ulint idx = 0; idx < btr_cur->tree_height;
+ idx++) {
+ if (rtr_info->tree_blocks[idx]) {
+ mtr_release_block_at_savepoint(
+ mtr,
+ rtr_info->tree_savepoints[idx],
+ rtr_info->tree_blocks[idx]);
+ rtr_info->tree_blocks[idx] = NULL;
+ }
+ }
+ for (ulint idx = RTR_MAX_LEVELS; idx < RTR_MAX_LEVELS + 3;
+ idx++) {
+ if (rtr_info->tree_blocks[idx]) {
+ mtr_release_block_at_savepoint(
+ mtr,
+ rtr_info->tree_savepoints[idx],
+ rtr_info->tree_blocks[idx]);
+ rtr_info->tree_blocks[idx] = NULL;
+ }
+ }
+ }
+
+ /* set up savepoint to record any locks to be taken */
+ rtr_info->tree_savepoints[tree_idx] = mtr_set_savepoint(mtr);
+
+#ifdef UNIV_RTR_DEBUG
+ ut_ad(!(rw_lock_own_flagged(&btr_cur->page_cur.block->lock,
+ RW_LOCK_FLAG_X | RW_LOCK_FLAG_S))
+ || my_latch_mode == BTR_MODIFY_TREE
+ || my_latch_mode == BTR_CONT_MODIFY_TREE
+ || !page_is_leaf(buf_block_get_frame(
+ btr_cur->page_cur.block)));
+#endif /* UNIV_RTR_DEBUG */
+
+ page_id_t page_id(space, next_rec.page_no);
+ dberr_t err = DB_SUCCESS;
+
+ block = buf_page_get_gen(
+ page_id, page_size,
+ rw_latch, NULL, BUF_GET, __FILE__, __LINE__, mtr, &err);
+
+ if (block == NULL) {
+ continue;
+ } else if (rw_latch != RW_NO_LATCH) {
+ ut_ad(!dict_index_is_ibuf(index));
+ buf_block_dbg_add_level(block, SYNC_TREE_NODE);
+ }
+
+ rtr_info->tree_blocks[tree_idx] = block;
+
+ page = buf_block_get_frame(block);
+ page_ssn = page_get_ssn_id(page);
+
+ /* If there are splits, push the splitted page.
+ Note that we have SX lock on index->lock, there
+ should not be any split/shrink happening here */
+ if (page_ssn > path_ssn) {
+ uint32_t next_page_no = btr_page_get_next(page);
+ rtr_non_leaf_stack_push(
+ rtr_info->path, next_page_no, path_ssn,
+ level, 0, NULL, 0);
+
+ if (!srv_read_only_mode
+ && mode != PAGE_CUR_RTREE_INSERT
+ && mode != PAGE_CUR_RTREE_LOCATE) {
+ ut_ad(rtr_info->thr);
+ lock_place_prdt_page_lock(
+ space, next_page_no, index,
+ rtr_info->thr);
+ }
+ new_split = true;
+#if UNIV_GIS_DEBUG
+ fprintf(stderr,
+ "GIS_DIAG: Splitted page found: %d, %ld\n",
+ static_cast<int>(need_parent), next_page_no);
+#endif
+ }
+
+ page_cursor = btr_cur_get_page_cur(btr_cur);
+ page_cursor->rec = NULL;
+
+ if (mode == PAGE_CUR_RTREE_LOCATE) {
+ if (level == target_level && level == 0) {
+ ulint low_match;
+
+ found = false;
+
+ low_match = page_cur_search(
+ block, index, tuple,
+ PAGE_CUR_LE,
+ btr_cur_get_page_cur(btr_cur));
+
+ if (low_match == dtuple_get_n_fields_cmp(
+ tuple)) {
+ rec_t* rec = btr_cur_get_rec(btr_cur);
+
+ if (!rec_get_deleted_flag(rec,
+ dict_table_is_comp(index->table))
+ || (!for_delete && !for_undo_ins)) {
+ found = true;
+ btr_cur->low_match = low_match;
+ } else {
+ /* mark we found deleted row */
+ btr_cur->rtr_info->fd_del
+ = true;
+ }
+ }
+ } else {
+ page_cur_mode_t page_mode = mode;
+
+ if (level == target_level
+ && target_level != 0) {
+ page_mode = PAGE_CUR_RTREE_GET_FATHER;
+ }
+ found = rtr_cur_search_with_match(
+ block, index, tuple, page_mode,
+ page_cursor, btr_cur->rtr_info);
+
+ /* Save the position of parent if needed */
+ if (found && need_parent) {
+ btr_pcur_t* r_cursor =
+ rtr_get_parent_cursor(
+ btr_cur, level, false);
+
+ rec_t* rec = page_cur_get_rec(
+ page_cursor);
+ page_cur_position(
+ rec, block,
+ btr_pcur_get_page_cur(r_cursor));
+ r_cursor->pos_state =
+ BTR_PCUR_IS_POSITIONED;
+ r_cursor->latch_mode = my_latch_mode;
+ btr_pcur_store_position(r_cursor, mtr);
+#ifdef UNIV_DEBUG
+ ulint num_stored =
+ rtr_store_parent_path(
+ block, btr_cur,
+ rw_latch, level, mtr);
+ ut_ad(num_stored > 0);
+#else
+ rtr_store_parent_path(
+ block, btr_cur, rw_latch,
+ level, mtr);
+#endif /* UNIV_DEBUG */
+ }
+ }
+ } else {
+ found = rtr_cur_search_with_match(
+ block, index, tuple, mode, page_cursor,
+ btr_cur->rtr_info);
+ }
+
+ /* Attach predicate lock if needed, no matter whether
+ there are matched records */
+ if (mode != PAGE_CUR_RTREE_INSERT
+ && mode != PAGE_CUR_RTREE_LOCATE
+ && mode >= PAGE_CUR_CONTAIN
+ && btr_cur->rtr_info->need_prdt_lock) {
+ lock_prdt_t prdt;
+
+ trx_t* trx = thr_get_trx(
+ btr_cur->rtr_info->thr);
+ lock_mutex_enter();
+ lock_init_prdt_from_mbr(
+ &prdt, &btr_cur->rtr_info->mbr,
+ mode, trx->lock.lock_heap);
+ lock_mutex_exit();
+
+ if (rw_latch == RW_NO_LATCH) {
+ rw_lock_s_lock(&(block->lock));
+ }
+
+ lock_prdt_lock(block, &prdt, index, LOCK_S,
+ LOCK_PREDICATE, btr_cur->rtr_info->thr,
+ mtr);
+
+ if (rw_latch == RW_NO_LATCH) {
+ rw_lock_s_unlock(&(block->lock));
+ }
+ }
+
+ if (found) {
+ if (level == target_level) {
+ page_cur_t* r_cur;;
+
+ if (my_latch_mode == BTR_MODIFY_TREE
+ && level == 0) {
+ ut_ad(rw_latch == RW_NO_LATCH);
+ page_id_t my_page_id(
+ space, block->page.id.page_no());
+
+ btr_cur_latch_leaves(
+ block, my_page_id,
+ page_size, BTR_MODIFY_TREE,
+ btr_cur, mtr);
+ }
+
+ r_cur = btr_cur_get_page_cur(btr_cur);
+
+ page_cur_position(
+ page_cur_get_rec(page_cursor),
+ page_cur_get_block(page_cursor),
+ r_cur);
+
+ btr_cur->low_match = level != 0 ?
+ DICT_INDEX_SPATIAL_NODEPTR_SIZE + 1
+ : btr_cur->low_match;
+ break;
+ }
+
+ /* Keep the parent path node, which points to
+ last node just located */
+ skip_parent = true;
+ } else {
+ /* Release latch on the current page */
+ ut_ad(rtr_info->tree_blocks[tree_idx]);
+
+ mtr_release_block_at_savepoint(
+ mtr, rtr_info->tree_savepoints[tree_idx],
+ rtr_info->tree_blocks[tree_idx]);
+ rtr_info->tree_blocks[tree_idx] = NULL;
+ }
+
+ } while (!rtr_info->path->empty());
+
+ const rec_t* rec = btr_cur_get_rec(btr_cur);
+
+ if (page_rec_is_infimum(rec) || page_rec_is_supremum(rec)) {
+ mtr_commit(mtr);
+ mtr_start(mtr);
+ } else if (!index_locked) {
+ mtr_memo_release(mtr, dict_index_get_lock(index),
+ MTR_MEMO_X_LOCK);
+ }
+
+ return(found);
+}
+
+/*************************************************************//**
+Find the next matching record. This function will first exhaust
+the copied record listed in the rtr_info->matches vector before
+moving to the next page
+@return true if there is suitable record found, otherwise false */
+bool
+rtr_pcur_move_to_next(
+/*==================*/
+ const dtuple_t* tuple, /*!< in: data tuple; NOTE: n_fields_cmp in
+ tuple must be set so that it cannot get
+ compared to the node ptr page number field! */
+ page_cur_mode_t mode, /*!< in: cursor search mode */
+ btr_pcur_t* cursor, /*!< in: persistent cursor; NOTE that the
+ function may release the page latch */
+ ulint level, /*!< in: target level */
+ mtr_t* mtr) /*!< in: mtr */
+{
+ rtr_info_t* rtr_info = cursor->btr_cur.rtr_info;
+
+ ut_a(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
+
+ mutex_enter(&rtr_info->matches->rtr_match_mutex);
+ /* First retrieve the next record on the current page */
+ if (!rtr_info->matches->matched_recs->empty()) {
+ rtr_rec_t rec;
+ rec = rtr_info->matches->matched_recs->back();
+ rtr_info->matches->matched_recs->pop_back();
+ mutex_exit(&rtr_info->matches->rtr_match_mutex);
+
+ cursor->btr_cur.page_cur.rec = rec.r_rec;
+ cursor->btr_cur.page_cur.block = &rtr_info->matches->block;
+
+ DEBUG_SYNC_C("rtr_pcur_move_to_next_return");
+ return(true);
+ }
+
+ mutex_exit(&rtr_info->matches->rtr_match_mutex);
+
+ /* Fetch the next page */
+ return(rtr_pcur_getnext_from_path(tuple, mode, &cursor->btr_cur,
+ level, cursor->latch_mode,
+ false, mtr));
+}
+
+/*************************************************************//**
+Check if the cursor holds record pointing to the specified child page
+@return true if it is (pointing to the child page) false otherwise */
+static
+bool
+rtr_compare_cursor_rec(
+/*===================*/
+ dict_index_t* index, /*!< in: index */
+ btr_cur_t* cursor, /*!< in: Cursor to check */
+ ulint page_no, /*!< in: desired child page number */
+ mem_heap_t** heap) /*!< in: memory heap */
+{
+ const rec_t* rec;
+ offset_t* offsets;
+
+ rec = btr_cur_get_rec(cursor);
+
+ offsets = rec_get_offsets(
+ rec, index, NULL, false, ULINT_UNDEFINED, heap);
+
+ return(btr_node_ptr_get_child_page_no(rec, offsets) == page_no);
+}
+
+/**************************************************************//**
+Initializes and opens a persistent cursor to an index tree. It should be
+closed with btr_pcur_close. Mainly called by row_search_index_entry() */
+void
+rtr_pcur_open_low(
+/*==============*/
+ dict_index_t* index, /*!< in: index */
+ ulint level, /*!< in: level in the rtree */
+ const dtuple_t* tuple, /*!< in: tuple on which search done */
+ page_cur_mode_t mode, /*!< in: PAGE_CUR_RTREE_LOCATE, ... */
+ ulint latch_mode,/*!< in: BTR_SEARCH_LEAF, ... */
+ btr_pcur_t* cursor, /*!< in: memory buffer for persistent cursor */
+ const char* file, /*!< in: file name */
+ unsigned line, /*!< in: line where called */
+ mtr_t* mtr) /*!< in: mtr */
+{
+ btr_cur_t* btr_cursor;
+ ulint n_fields;
+ ulint low_match;
+ rec_t* rec;
+ bool tree_latched = false;
+ bool for_delete = false;
+ bool for_undo_ins = false;
+
+ ut_ad(level == 0);
+
+ ut_ad(latch_mode & BTR_MODIFY_LEAF || latch_mode & BTR_MODIFY_TREE);
+ ut_ad(mode == PAGE_CUR_RTREE_LOCATE);
+
+ /* Initialize the cursor */
+
+ btr_pcur_init(cursor);
+
+ for_delete = latch_mode & BTR_RTREE_DELETE_MARK;
+ for_undo_ins = latch_mode & BTR_RTREE_UNDO_INS;
+
+ cursor->latch_mode = BTR_LATCH_MODE_WITHOUT_FLAGS(latch_mode);
+ cursor->search_mode = mode;
+
+ /* Search with the tree cursor */
+
+ btr_cursor = btr_pcur_get_btr_cur(cursor);
+
+ btr_cursor->rtr_info = rtr_create_rtr_info(false, false,
+ btr_cursor, index);
+
+ /* Purge will SX lock the tree instead of take Page Locks */
+ if (btr_cursor->thr) {
+ btr_cursor->rtr_info->need_page_lock = true;
+ btr_cursor->rtr_info->thr = btr_cursor->thr;
+ }
+
+ btr_cur_search_to_nth_level(index, level, tuple, mode, latch_mode,
+ btr_cursor, 0, file, line, mtr);
+ cursor->pos_state = BTR_PCUR_IS_POSITIONED;
+
+ cursor->trx_if_known = NULL;
+
+ low_match = btr_pcur_get_low_match(cursor);
+
+ rec = btr_pcur_get_rec(cursor);
+
+ n_fields = dtuple_get_n_fields(tuple);
+
+ if (latch_mode & BTR_ALREADY_S_LATCHED) {
+ ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(index),
+ MTR_MEMO_S_LOCK));
+ tree_latched = true;
+ }
+
+ if (latch_mode & BTR_MODIFY_TREE) {
+ ut_ad(mtr_memo_contains_flagged(mtr, &index->lock,
+ MTR_MEMO_X_LOCK
+ | MTR_MEMO_SX_LOCK));
+ tree_latched = true;
+ }
+
+ if (page_rec_is_infimum(rec) || low_match != n_fields
+ || (rec_get_deleted_flag(rec, dict_table_is_comp(index->table))
+ && (for_delete || for_undo_ins))) {
+
+ if (rec_get_deleted_flag(rec, dict_table_is_comp(index->table))
+ && for_delete) {
+ btr_cursor->rtr_info->fd_del = true;
+ btr_cursor->low_match = 0;
+ }
+ /* Did not find matched row in first dive. Release
+ latched block if any before search more pages */
+ if (latch_mode & BTR_MODIFY_LEAF) {
+ ulint tree_idx = btr_cursor->tree_height - 1;
+ rtr_info_t* rtr_info = btr_cursor->rtr_info;
+
+ ut_ad(level == 0);
+
+ if (rtr_info->tree_blocks[tree_idx]) {
+ mtr_release_block_at_savepoint(
+ mtr,
+ rtr_info->tree_savepoints[tree_idx],
+ rtr_info->tree_blocks[tree_idx]);
+ rtr_info->tree_blocks[tree_idx] = NULL;
+ }
+ }
+
+ bool ret = rtr_pcur_getnext_from_path(
+ tuple, mode, btr_cursor, level, latch_mode,
+ tree_latched, mtr);
+
+ if (ret) {
+ low_match = btr_pcur_get_low_match(cursor);
+ ut_ad(low_match == n_fields);
+ }
+ }
+}
+
+/* Get the rtree page father.
+@param[in] index rtree index
+@param[in] block child page in the index
+@param[in] mtr mtr
+@param[in] sea_cur search cursor, contains information
+ about parent nodes in search
+@param[in] cursor cursor on node pointer record,
+ its page x-latched */
+void
+rtr_page_get_father(
+ dict_index_t* index,
+ buf_block_t* block,
+ mtr_t* mtr,
+ btr_cur_t* sea_cur,
+ btr_cur_t* cursor)
+{
+ mem_heap_t* heap = mem_heap_create(100);
+#ifdef UNIV_DEBUG
+ offset_t* offsets;
+
+ offsets = rtr_page_get_father_block(
+ NULL, heap, index, block, mtr, sea_cur, cursor);
+
+ ulint page_no = btr_node_ptr_get_child_page_no(cursor->page_cur.rec,
+ offsets);
+
+ ut_ad(page_no == block->page.id.page_no());
+#else
+ rtr_page_get_father_block(
+ NULL, heap, index, block, mtr, sea_cur, cursor);
+#endif
+
+ mem_heap_free(heap);
+}
+
+/** Returns the upper level node pointer to a R-Tree page. It is assumed
+that mtr holds an SX-latch or X-latch on the tree.
+@return rec_get_offsets() of the node pointer record */
+static
+offset_t*
+rtr_page_get_father_node_ptr(
+ offset_t* offsets,/*!< in: work area for the return value */
+ mem_heap_t* heap, /*!< in: memory heap to use */
+ btr_cur_t* sea_cur,/*!< in: search cursor */
+ btr_cur_t* cursor, /*!< in: cursor pointing to user record,
+ out: cursor on node pointer record,
+ its page x-latched */
+ mtr_t* mtr) /*!< in: mtr */
+{
+ dtuple_t* tuple;
+ rec_t* user_rec;
+ rec_t* node_ptr;
+ ulint level;
+ ulint page_no;
+ dict_index_t* index;
+ rtr_mbr_t mbr;
+
+ page_no = btr_cur_get_block(cursor)->page.id.page_no();
+ index = btr_cur_get_index(cursor);
+
+ ut_ad(srv_read_only_mode
+ || mtr_memo_contains_flagged(mtr, dict_index_get_lock(index),
+ MTR_MEMO_X_LOCK | MTR_MEMO_SX_LOCK));
+
+ ut_ad(dict_index_get_page(index) != page_no);
+
+ level = btr_page_get_level(btr_cur_get_page(cursor), mtr);
+
+ user_rec = btr_cur_get_rec(cursor);
+ ut_a(page_rec_is_user_rec(user_rec));
+
+ offsets = rec_get_offsets(user_rec, index, offsets, !level,
+ ULINT_UNDEFINED, &heap);
+ rtr_get_mbr_from_rec(user_rec, offsets, &mbr);
+
+ tuple = rtr_index_build_node_ptr(
+ index, &mbr, user_rec, page_no, heap, level);
+
+ if (sea_cur && !sea_cur->rtr_info) {
+ sea_cur = NULL;
+ }
+
+ rtr_get_father_node(index, level + 1, tuple, sea_cur, cursor,
+ page_no, mtr);
+
+ node_ptr = btr_cur_get_rec(cursor);
+ ut_ad(!page_rec_is_comp(node_ptr)
+ || rec_get_status(node_ptr) == REC_STATUS_NODE_PTR);
+ offsets = rec_get_offsets(node_ptr, index, offsets, false,
+ ULINT_UNDEFINED, &heap);
+
+ ulint child_page = btr_node_ptr_get_child_page_no(node_ptr, offsets);
+
+ if (child_page != page_no) {
+ const rec_t* print_rec;
+
+ ib::fatal error;
+
+ error << "Corruption of index " << index->name
+ << " of table " << index->table->name
+ << " parent page " << page_no
+ << " child page " << child_page;
+
+ print_rec = page_rec_get_next(
+ page_get_infimum_rec(page_align(user_rec)));
+ offsets = rec_get_offsets(print_rec, index, offsets,
+ page_rec_is_leaf(user_rec),
+ ULINT_UNDEFINED, &heap);
+ error << "; child ";
+ rec_print(error.m_oss, print_rec,
+ rec_get_info_bits(print_rec, rec_offs_comp(offsets)),
+ offsets);
+ offsets = rec_get_offsets(node_ptr, index, offsets, false,
+ ULINT_UNDEFINED, &heap);
+ error << "; parent ";
+ rec_print(error.m_oss, print_rec,
+ rec_get_info_bits(print_rec, rec_offs_comp(offsets)),
+ offsets);
+
+ error << ". You should dump + drop + reimport the table to"
+ " fix the corruption. If the crash happens at"
+ " database startup, see "
+ "https://mariadb.com/kb/en/library/innodb-recovery-modes/"
+ " about forcing"
+ " recovery. Then dump + drop + reimport.";
+ }
+
+ return(offsets);
+}
+
+/************************************************************//**
+Returns the father block to a page. It is assumed that mtr holds
+an X or SX latch on the tree.
+@return rec_get_offsets() of the node pointer record */
+offset_t*
+rtr_page_get_father_block(
+/*======================*/
+ offset_t* offsets,/*!< in: work area for the return value */
+ mem_heap_t* heap, /*!< in: memory heap to use */
+ dict_index_t* index, /*!< in: b-tree index */
+ buf_block_t* block, /*!< in: child page in the index */
+ mtr_t* mtr, /*!< in: mtr */
+ btr_cur_t* sea_cur,/*!< in: search cursor, contains information
+ about parent nodes in search */
+ btr_cur_t* cursor) /*!< out: cursor on node pointer record,
+ its page x-latched */
+{
+ rec_t* rec = page_rec_get_next(
+ page_get_infimum_rec(buf_block_get_frame(block)));
+ btr_cur_position(index, rec, block, cursor);
+
+ return(rtr_page_get_father_node_ptr(offsets, heap, sea_cur,
+ cursor, mtr));
+}
+
+/********************************************************************//**
+Returns the upper level node pointer to a R-Tree page. It is assumed
+that mtr holds an x-latch on the tree. */
+void
+rtr_get_father_node(
+/*================*/
+ dict_index_t* index, /*!< in: index */
+ ulint level, /*!< in: the tree level of search */
+ const dtuple_t* tuple, /*!< in: data tuple; NOTE: n_fields_cmp in
+ tuple must be set so that it cannot get
+ compared to the node ptr page number field! */
+ btr_cur_t* sea_cur,/*!< in: search cursor */
+ btr_cur_t* btr_cur,/*!< in/out: tree cursor; the cursor page is
+ s- or x-latched, but see also above! */
+ ulint page_no,/*!< Current page no */
+ mtr_t* mtr) /*!< in: mtr */
+{
+ mem_heap_t* heap = NULL;
+ bool ret = false;
+ const rec_t* rec;
+ ulint n_fields;
+ bool new_rtr = false;
+
+ /* Try to optimally locate the parent node. Level should always
+ less than sea_cur->tree_height unless the root is splitting */
+ if (sea_cur && sea_cur->tree_height > level) {
+
+ ut_ad(mtr_memo_contains_flagged(mtr,
+ dict_index_get_lock(index),
+ MTR_MEMO_X_LOCK
+ | MTR_MEMO_SX_LOCK));
+ ret = rtr_cur_restore_position(
+ BTR_CONT_MODIFY_TREE, sea_cur, level, mtr);
+
+ /* Once we block shrink tree nodes while there are
+ active search on it, this optimal locating should always
+ succeeds */
+ ut_ad(ret);
+
+ if (ret) {
+ btr_pcur_t* r_cursor = rtr_get_parent_cursor(
+ sea_cur, level, false);
+
+ rec = btr_pcur_get_rec(r_cursor);
+
+ ut_ad(r_cursor->rel_pos == BTR_PCUR_ON);
+ page_cur_position(rec,
+ btr_pcur_get_block(r_cursor),
+ btr_cur_get_page_cur(btr_cur));
+ btr_cur->rtr_info = sea_cur->rtr_info;
+ btr_cur->tree_height = sea_cur->tree_height;
+ ut_ad(rtr_compare_cursor_rec(
+ index, btr_cur, page_no, &heap));
+ goto func_exit;
+ }
+ }
+
+ /* We arrive here in one of two scenario
+ 1) check table and btr_valide
+ 2) index root page being raised */
+ ut_ad(!sea_cur || sea_cur->tree_height == level);
+
+ if (btr_cur->rtr_info) {
+ rtr_clean_rtr_info(btr_cur->rtr_info, true);
+ } else {
+ new_rtr = true;
+ }
+
+ btr_cur->rtr_info = rtr_create_rtr_info(false, false, btr_cur, index);
+
+ if (sea_cur && sea_cur->tree_height == level) {
+ /* root split, and search the new root */
+ btr_cur_search_to_nth_level(
+ index, level, tuple, PAGE_CUR_RTREE_LOCATE,
+ BTR_CONT_MODIFY_TREE, btr_cur, 0,
+ __FILE__, __LINE__, mtr);
+
+ } else {
+ /* btr_validate */
+ ut_ad(level >= 1);
+ ut_ad(!sea_cur);
+
+ btr_cur_search_to_nth_level(
+ index, level, tuple, PAGE_CUR_RTREE_LOCATE,
+ BTR_CONT_MODIFY_TREE, btr_cur, 0,
+ __FILE__, __LINE__, mtr);
+
+ rec = btr_cur_get_rec(btr_cur);
+ n_fields = dtuple_get_n_fields_cmp(tuple);
+
+ if (page_rec_is_infimum(rec)
+ || (btr_cur->low_match != n_fields)) {
+ ret = rtr_pcur_getnext_from_path(
+ tuple, PAGE_CUR_RTREE_LOCATE, btr_cur,
+ level, BTR_CONT_MODIFY_TREE,
+ true, mtr);
+
+ ut_ad(ret && btr_cur->low_match == n_fields);
+ }
+ }
+
+ ret = rtr_compare_cursor_rec(
+ index, btr_cur, page_no, &heap);
+
+ ut_ad(ret);
+
+func_exit:
+ if (heap) {
+ mem_heap_free(heap);
+ }
+
+ if (new_rtr && btr_cur->rtr_info) {
+ rtr_clean_rtr_info(btr_cur->rtr_info, true);
+ btr_cur->rtr_info = NULL;
+ }
+}
+
+/*******************************************************************//**
+Create a RTree search info structure */
+rtr_info_t*
+rtr_create_rtr_info(
+/******************/
+ bool need_prdt, /*!< in: Whether predicate lock
+ is needed */
+ bool init_matches, /*!< in: Whether to initiate the
+ "matches" structure for collecting
+ matched leaf records */
+ btr_cur_t* cursor, /*!< in: tree search cursor */
+ dict_index_t* index) /*!< in: index struct */
+{
+ rtr_info_t* rtr_info;
+
+ index = index ? index : cursor->index;
+ ut_ad(index);
+
+ rtr_info = static_cast<rtr_info_t*>(ut_zalloc_nokey(sizeof(*rtr_info)));
+
+ rtr_info->allocated = true;
+ rtr_info->cursor = cursor;
+ rtr_info->index = index;
+
+ if (init_matches) {
+ rtr_info->heap = mem_heap_create(sizeof(*(rtr_info->matches)));
+ rtr_info->matches = static_cast<matched_rec_t*>(
+ mem_heap_zalloc(
+ rtr_info->heap,
+ sizeof(*rtr_info->matches)));
+
+ rtr_info->matches->matched_recs
+ = UT_NEW_NOKEY(rtr_rec_vector());
+
+ rtr_info->matches->bufp = page_align(rtr_info->matches->rec_buf
+ + UNIV_PAGE_SIZE_MAX + 1);
+ mutex_create(LATCH_ID_RTR_MATCH_MUTEX,
+ &rtr_info->matches->rtr_match_mutex);
+ rw_lock_create(PFS_NOT_INSTRUMENTED,
+ &(rtr_info->matches->block.lock),
+ SYNC_LEVEL_VARYING);
+ }
+
+ rtr_info->path = UT_NEW_NOKEY(rtr_node_path_t());
+ rtr_info->parent_path = UT_NEW_NOKEY(rtr_node_path_t());
+ rtr_info->need_prdt_lock = need_prdt;
+ mutex_create(LATCH_ID_RTR_PATH_MUTEX,
+ &rtr_info->rtr_path_mutex);
+
+ mutex_enter(&index->rtr_track->rtr_active_mutex);
+ index->rtr_track->rtr_active->push_back(rtr_info);
+ mutex_exit(&index->rtr_track->rtr_active_mutex);
+ return(rtr_info);
+}
+
+/*******************************************************************//**
+Update a btr_cur_t with rtr_info */
+void
+rtr_info_update_btr(
+/******************/
+ btr_cur_t* cursor, /*!< in/out: tree cursor */
+ rtr_info_t* rtr_info) /*!< in: rtr_info to set to the
+ cursor */
+{
+ ut_ad(rtr_info);
+
+ cursor->rtr_info = rtr_info;
+}
+
+/*******************************************************************//**
+Initialize a R-Tree Search structure */
+void
+rtr_init_rtr_info(
+/****************/
+ rtr_info_t* rtr_info, /*!< in: rtr_info to set to the
+ cursor */
+ bool need_prdt, /*!< in: Whether predicate lock is
+ needed */
+ btr_cur_t* cursor, /*!< in: tree search cursor */
+ dict_index_t* index, /*!< in: index structure */
+ bool reinit) /*!< in: Whether this is a reinit */
+{
+ ut_ad(rtr_info);
+
+ if (!reinit) {
+ /* Reset all members. */
+ rtr_info->path = NULL;
+ rtr_info->parent_path = NULL;
+ rtr_info->matches = NULL;
+
+ mutex_create(LATCH_ID_RTR_PATH_MUTEX,
+ &rtr_info->rtr_path_mutex);
+
+ memset(rtr_info->tree_blocks, 0x0,
+ sizeof(rtr_info->tree_blocks));
+ memset(rtr_info->tree_savepoints, 0x0,
+ sizeof(rtr_info->tree_savepoints));
+ rtr_info->mbr.xmin = 0.0;
+ rtr_info->mbr.xmax = 0.0;
+ rtr_info->mbr.ymin = 0.0;
+ rtr_info->mbr.ymax = 0.0;
+ rtr_info->thr = NULL;
+ rtr_info->heap = NULL;
+ rtr_info->cursor = NULL;
+ rtr_info->index = NULL;
+ rtr_info->need_prdt_lock = false;
+ rtr_info->need_page_lock = false;
+ rtr_info->allocated = false;
+ rtr_info->mbr_adj = false;
+ rtr_info->fd_del = false;
+ rtr_info->search_tuple = NULL;
+ rtr_info->search_mode = PAGE_CUR_UNSUPP;
+ }
+
+ ut_ad(!rtr_info->matches || rtr_info->matches->matched_recs->empty());
+
+ rtr_info->path = UT_NEW_NOKEY(rtr_node_path_t());
+ rtr_info->parent_path = UT_NEW_NOKEY(rtr_node_path_t());
+ rtr_info->need_prdt_lock = need_prdt;
+ rtr_info->cursor = cursor;
+ rtr_info->index = index;
+
+ mutex_enter(&index->rtr_track->rtr_active_mutex);
+ index->rtr_track->rtr_active->push_back(rtr_info);
+ mutex_exit(&index->rtr_track->rtr_active_mutex);
+}
+
+/**************************************************************//**
+Clean up R-Tree search structure */
+void
+rtr_clean_rtr_info(
+/*===============*/
+ rtr_info_t* rtr_info, /*!< in: RTree search info */
+ bool free_all) /*!< in: need to free rtr_info itself */
+{
+ dict_index_t* index;
+ bool initialized = false;
+
+ if (!rtr_info) {
+ return;
+ }
+
+ index = rtr_info->index;
+
+ if (index) {
+ mutex_enter(&index->rtr_track->rtr_active_mutex);
+ }
+
+ while (rtr_info->parent_path && !rtr_info->parent_path->empty()) {
+ btr_pcur_t* cur = rtr_info->parent_path->back().cursor;
+ rtr_info->parent_path->pop_back();
+
+ if (cur) {
+ btr_pcur_close(cur);
+ ut_free(cur);
+ }
+ }
+
+ UT_DELETE(rtr_info->parent_path);
+ rtr_info->parent_path = NULL;
+
+ if (rtr_info->path != NULL) {
+ UT_DELETE(rtr_info->path);
+ rtr_info->path = NULL;
+ initialized = true;
+ }
+
+ if (rtr_info->matches) {
+ rtr_info->matches->used = false;
+ rtr_info->matches->locked = false;
+ rtr_info->matches->valid = false;
+ rtr_info->matches->matched_recs->clear();
+ }
+
+ if (index) {
+ index->rtr_track->rtr_active->remove(rtr_info);
+ mutex_exit(&index->rtr_track->rtr_active_mutex);
+ }
+
+ if (free_all) {
+ if (rtr_info->matches) {
+ if (rtr_info->matches->matched_recs != NULL) {
+ UT_DELETE(rtr_info->matches->matched_recs);
+ }
+
+ rw_lock_free(&(rtr_info->matches->block.lock));
+
+ mutex_destroy(&rtr_info->matches->rtr_match_mutex);
+ }
+
+ if (rtr_info->heap) {
+ mem_heap_free(rtr_info->heap);
+ }
+
+ if (initialized) {
+ mutex_destroy(&rtr_info->rtr_path_mutex);
+ }
+
+ if (rtr_info->allocated) {
+ ut_free(rtr_info);
+ }
+ }
+}
+
+/**************************************************************//**
+Rebuilt the "path" to exclude the removing page no */
+static
+void
+rtr_rebuild_path(
+/*=============*/
+ rtr_info_t* rtr_info, /*!< in: RTree search info */
+ ulint page_no) /*!< in: need to free rtr_info itself */
+{
+ rtr_node_path_t* new_path
+ = UT_NEW_NOKEY(rtr_node_path_t());
+
+ rtr_node_path_t::iterator rit;
+#ifdef UNIV_DEBUG
+ ulint before_size = rtr_info->path->size();
+#endif /* UNIV_DEBUG */
+
+ for (rit = rtr_info->path->begin();
+ rit != rtr_info->path->end(); ++rit) {
+ node_visit_t next_rec = *rit;
+
+ if (next_rec.page_no == page_no) {
+ continue;
+ }
+
+ new_path->push_back(next_rec);
+#ifdef UNIV_DEBUG
+ node_visit_t rec = new_path->back();
+ ut_ad(rec.level < rtr_info->cursor->tree_height
+ && rec.page_no > 0);
+#endif /* UNIV_DEBUG */
+ }
+
+ UT_DELETE(rtr_info->path);
+
+ ut_ad(new_path->size() == before_size - 1);
+
+ rtr_info->path = new_path;
+
+ if (!rtr_info->parent_path->empty()) {
+ rtr_node_path_t* new_parent_path = UT_NEW_NOKEY(
+ rtr_node_path_t());
+
+ for (rit = rtr_info->parent_path->begin();
+ rit != rtr_info->parent_path->end(); ++rit) {
+ node_visit_t next_rec = *rit;
+
+ if (next_rec.child_no == page_no) {
+ btr_pcur_t* cur = next_rec.cursor;
+
+ if (cur) {
+ btr_pcur_close(cur);
+ ut_free(cur);
+ }
+
+ continue;
+ }
+
+ new_parent_path->push_back(next_rec);
+ }
+ UT_DELETE(rtr_info->parent_path);
+ rtr_info->parent_path = new_parent_path;
+ }
+
+}
+
+/**************************************************************//**
+Check whether a discarding page is in anyone's search path */
+void
+rtr_check_discard_page(
+/*===================*/
+ dict_index_t* index, /*!< in: index */
+ btr_cur_t* cursor, /*!< in: cursor on the page to discard: not on
+ the root page */
+ buf_block_t* block) /*!< in: block of page to be discarded */
+{
+ ulint pageno = block->page.id.page_no();
+ rtr_info_t* rtr_info;
+ rtr_info_active::iterator it;
+
+ mutex_enter(&index->rtr_track->rtr_active_mutex);
+
+ for (it = index->rtr_track->rtr_active->begin();
+ it != index->rtr_track->rtr_active->end(); ++it) {
+ rtr_info = *it;
+ rtr_node_path_t::iterator rit;
+ bool found = false;
+
+ if (cursor && rtr_info == cursor->rtr_info) {
+ continue;
+ }
+
+ mutex_enter(&rtr_info->rtr_path_mutex);
+ for (rit = rtr_info->path->begin();
+ rit != rtr_info->path->end(); ++rit) {
+ node_visit_t node = *rit;
+
+ if (node.page_no == pageno) {
+ found = true;
+ break;
+ }
+ }
+
+ if (found) {
+ rtr_rebuild_path(rtr_info, pageno);
+ }
+ mutex_exit(&rtr_info->rtr_path_mutex);
+
+ if (rtr_info->matches) {
+ mutex_enter(&rtr_info->matches->rtr_match_mutex);
+
+ if ((&rtr_info->matches->block)->page.id.page_no()
+ == pageno) {
+ if (!rtr_info->matches->matched_recs->empty()) {
+ rtr_info->matches->matched_recs->clear();
+ }
+ ut_ad(rtr_info->matches->matched_recs->empty());
+ rtr_info->matches->valid = false;
+ }
+
+ mutex_exit(&rtr_info->matches->rtr_match_mutex);
+ }
+ }
+
+ mutex_exit(&index->rtr_track->rtr_active_mutex);
+
+ lock_mutex_enter();
+ lock_prdt_page_free_from_discard(block, lock_sys->prdt_hash);
+ lock_prdt_page_free_from_discard(block, lock_sys->prdt_page_hash);
+ lock_mutex_exit();
+}
+
+/** Restore the stored position of a persistent cursor bufferfixing the page */
+static
+bool
+rtr_cur_restore_position(
+ ulint latch_mode, /*!< in: BTR_SEARCH_LEAF, ... */
+ btr_cur_t* btr_cur, /*!< in: detached persistent cursor */
+ ulint level, /*!< in: index level */
+ mtr_t* mtr) /*!< in: mtr */
+{
+ dict_index_t* index;
+ mem_heap_t* heap;
+ btr_pcur_t* r_cursor = rtr_get_parent_cursor(btr_cur, level, false);
+ dtuple_t* tuple;
+ bool ret = false;
+
+ ut_ad(mtr);
+ ut_ad(r_cursor);
+ ut_ad(mtr->is_active());
+
+ index = btr_cur_get_index(btr_cur);
+
+ if (r_cursor->rel_pos == BTR_PCUR_AFTER_LAST_IN_TREE
+ || r_cursor->rel_pos == BTR_PCUR_BEFORE_FIRST_IN_TREE) {
+ return(false);
+ }
+
+ DBUG_EXECUTE_IF(
+ "rtr_pessimistic_position",
+ r_cursor->modify_clock = 100;
+ );
+
+ ut_ad(latch_mode == BTR_CONT_MODIFY_TREE);
+
+ if (!buf_pool_is_obsolete(r_cursor->withdraw_clock)
+ && buf_page_optimistic_get(RW_X_LATCH,
+ r_cursor->block_when_stored,
+ r_cursor->modify_clock,
+ __FILE__, __LINE__, mtr)) {
+ ut_ad(r_cursor->pos_state == BTR_PCUR_IS_POSITIONED);
+
+ ut_ad(r_cursor->rel_pos == BTR_PCUR_ON);
+#ifdef UNIV_DEBUG
+ do {
+ const rec_t* rec;
+ const offset_t* offsets1;
+ const offset_t* offsets2;
+ ulint comp;
+
+ rec = btr_pcur_get_rec(r_cursor);
+
+ heap = mem_heap_create(256);
+ offsets1 = rec_get_offsets(
+ r_cursor->old_rec, index, NULL, !level,
+ r_cursor->old_n_fields, &heap);
+ offsets2 = rec_get_offsets(
+ rec, index, NULL, !level,
+ r_cursor->old_n_fields, &heap);
+
+ comp = rec_offs_comp(offsets1);
+
+ if (rec_get_info_bits(r_cursor->old_rec, comp)
+ & REC_INFO_MIN_REC_FLAG) {
+ ut_ad(rec_get_info_bits(rec, comp)
+ & REC_INFO_MIN_REC_FLAG);
+ } else {
+
+ ut_ad(!cmp_rec_rec(r_cursor->old_rec,
+ rec, offsets1, offsets2,
+ index));
+ }
+
+ mem_heap_free(heap);
+ } while (0);
+#endif /* UNIV_DEBUG */
+
+ return(true);
+ }
+
+ /* Page has changed, for R-Tree, the page cannot be shrunk away,
+ so we search the page and its right siblings */
+ buf_block_t* block;
+ node_seq_t page_ssn;
+ const page_t* page;
+ page_cur_t* page_cursor;
+ node_visit_t* node = rtr_get_parent_node(btr_cur, level, false);
+ ulint space = dict_index_get_space(index);
+ node_seq_t path_ssn = node->seq_no;
+ page_size_t page_size = dict_table_page_size(index->table);
+
+ ulint page_no = node->page_no;
+
+ heap = mem_heap_create(256);
+
+ tuple = dict_index_build_data_tuple(r_cursor->old_rec, index, !level,
+ r_cursor->old_n_fields, heap);
+
+ page_cursor = btr_pcur_get_page_cur(r_cursor);
+ ut_ad(r_cursor == node->cursor);
+
+search_again:
+ page_id_t page_id(space, page_no);
+ dberr_t err = DB_SUCCESS;
+
+ block = buf_page_get_gen(
+ page_id, page_size, RW_X_LATCH, NULL,
+ BUF_GET, __FILE__, __LINE__, mtr, &err);
+
+ ut_ad(block);
+
+ /* Get the page SSN */
+ page = buf_block_get_frame(block);
+ page_ssn = page_get_ssn_id(page);
+
+ ulint low_match = page_cur_search(
+ block, index, tuple, PAGE_CUR_LE, page_cursor);
+
+ if (low_match == r_cursor->old_n_fields) {
+ const rec_t* rec;
+ const offset_t* offsets1;
+ const offset_t* offsets2;
+ ulint comp;
+
+ rec = btr_pcur_get_rec(r_cursor);
+
+ offsets1 = rec_get_offsets(
+ r_cursor->old_rec, index, NULL, !level,
+ r_cursor->old_n_fields, &heap);
+ offsets2 = rec_get_offsets(
+ rec, index, NULL, !level,
+ r_cursor->old_n_fields, &heap);
+
+ comp = rec_offs_comp(offsets1);
+
+ if ((rec_get_info_bits(r_cursor->old_rec, comp)
+ & REC_INFO_MIN_REC_FLAG)
+ && (rec_get_info_bits(rec, comp) & REC_INFO_MIN_REC_FLAG)) {
+ r_cursor->pos_state = BTR_PCUR_IS_POSITIONED;
+ ret = true;
+ } else if (!cmp_rec_rec(r_cursor->old_rec, rec, offsets1, offsets2,
+ index)) {
+ r_cursor->pos_state = BTR_PCUR_IS_POSITIONED;
+ ret = true;
+ }
+ }
+
+ /* Check the page SSN to see if it has been splitted, if so, search
+ the right page */
+ if (!ret && page_ssn > path_ssn) {
+ page_no = btr_page_get_next(page);
+ goto search_again;
+ }
+
+ mem_heap_free(heap);
+
+ return(ret);
+}
+
+/****************************************************************//**
+Copy the leaf level R-tree record, and push it to matched_rec in rtr_info */
+static
+void
+rtr_leaf_push_match_rec(
+/*====================*/
+ const rec_t* rec, /*!< in: record to copy */
+ rtr_info_t* rtr_info, /*!< in/out: search stack */
+ offset_t* offsets, /*!< in: offsets */
+ bool is_comp) /*!< in: is compact format */
+{
+ byte* buf;
+ matched_rec_t* match_rec = rtr_info->matches;
+ rec_t* copy;
+ ulint data_len;
+ rtr_rec_t rtr_rec;
+
+ buf = match_rec->block.frame + match_rec->used;
+ ut_ad(page_rec_is_leaf(rec));
+
+ copy = rec_copy(buf, rec, offsets);
+
+ if (is_comp) {
+ rec_set_next_offs_new(copy, PAGE_NEW_SUPREMUM);
+ } else {
+ rec_set_next_offs_old(copy, PAGE_OLD_SUPREMUM);
+ }
+
+ rtr_rec.r_rec = copy;
+ rtr_rec.locked = false;
+
+ match_rec->matched_recs->push_back(rtr_rec);
+ match_rec->valid = true;
+
+ data_len = rec_offs_data_size(offsets) + rec_offs_extra_size(offsets);
+ match_rec->used += data_len;
+
+ ut_ad(match_rec->used < UNIV_PAGE_SIZE);
+}
+
+/**************************************************************//**
+Store the parent path cursor
+@return number of cursor stored */
+ulint
+rtr_store_parent_path(
+/*==================*/
+ const buf_block_t* block, /*!< in: block of the page */
+ btr_cur_t* btr_cur,/*!< in/out: persistent cursor */
+ ulint latch_mode,
+ /*!< in: latch_mode */
+ ulint level, /*!< in: index level */
+ mtr_t* mtr) /*!< in: mtr */
+{
+ ulint num = btr_cur->rtr_info->parent_path->size();
+ ulint num_stored = 0;
+
+ while (num >= 1) {
+ node_visit_t* node = &(*btr_cur->rtr_info->parent_path)[
+ num - 1];
+ btr_pcur_t* r_cursor = node->cursor;
+ buf_block_t* cur_block;
+
+ if (node->level > level) {
+ break;
+ }
+
+ r_cursor->pos_state = BTR_PCUR_IS_POSITIONED;
+ r_cursor->latch_mode = latch_mode;
+
+ cur_block = btr_pcur_get_block(r_cursor);
+
+ if (cur_block == block) {
+ btr_pcur_store_position(r_cursor, mtr);
+ num_stored++;
+ } else {
+ break;
+ }
+
+ num--;
+ }
+
+ return(num_stored);
+}
+/**************************************************************//**
+push a nonleaf index node to the search path for insertion */
+static
+void
+rtr_non_leaf_insert_stack_push(
+/*===========================*/
+ dict_index_t* index, /*!< in: index descriptor */
+ rtr_node_path_t* path, /*!< in/out: search path */
+ ulint level, /*!< in: index page level */
+ ulint child_no,/*!< in: child page no */
+ const buf_block_t* block, /*!< in: block of the page */
+ const rec_t* rec, /*!< in: positioned record */
+ double mbr_inc)/*!< in: MBR needs to be enlarged */
+{
+ node_seq_t new_seq;
+ btr_pcur_t* my_cursor;
+ ulint page_no = block->page.id.page_no();
+
+ my_cursor = static_cast<btr_pcur_t*>(
+ ut_malloc_nokey(sizeof(*my_cursor)));
+
+ btr_pcur_init(my_cursor);
+
+ page_cur_position(rec, block, btr_pcur_get_page_cur(my_cursor));
+
+ (btr_pcur_get_btr_cur(my_cursor))->index = index;
+
+ new_seq = rtr_get_current_ssn_id(index);
+ rtr_non_leaf_stack_push(path, page_no, new_seq, level, child_no,
+ my_cursor, mbr_inc);
+}
+
+/** Copy a buf_block_t strcuture, except "block->lock" and "block->mutex".
+@param[in,out] matches copy to match->block
+@param[in] block block to copy */
+static
+void
+rtr_copy_buf(
+ matched_rec_t* matches,
+ const buf_block_t* block)
+{
+ /* Copy all members of "block" to "matches->block" except "mutex"
+ and "lock". We skip "mutex" and "lock" because they are not used
+ from the dummy buf_block_t we create here and because memcpy()ing
+ them generates (valid) compiler warnings that the vtable pointer
+ will be copied. It is also undefined what will happen with the
+ newly memcpy()ed mutex if the source mutex was acquired by
+ (another) thread while it was copied. */
+ new (&matches->block.page) buf_page_t(block->page);
+ matches->block.frame = block->frame;
+ matches->block.unzip_LRU = block->unzip_LRU;
+
+ ut_d(matches->block.in_unzip_LRU_list = block->in_unzip_LRU_list);
+ ut_d(matches->block.in_withdraw_list = block->in_withdraw_list);
+
+ /* Skip buf_block_t::mutex */
+ /* Skip buf_block_t::lock */
+ matches->block.lock_hash_val = block->lock_hash_val;
+ matches->block.modify_clock = block->modify_clock;
+#ifdef BTR_CUR_HASH_ADAPT
+ matches->block.n_hash_helps = block->n_hash_helps;
+ matches->block.n_fields = block->n_fields;
+ matches->block.left_side = block->left_side;
+#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
+ matches->block.n_pointers = block->n_pointers;
+#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
+ matches->block.curr_n_fields = block->curr_n_fields;
+ matches->block.curr_left_side = block->curr_left_side;
+ matches->block.index = block->index;
+#endif /* BTR_CUR_HASH_ADAPT */
+ ut_d(matches->block.debug_latch = block->debug_latch);
+
+}
+
+/****************************************************************//**
+Generate a shadow copy of the page block header to save the
+matched records */
+static
+void
+rtr_init_match(
+/*===========*/
+ matched_rec_t* matches,/*!< in/out: match to initialize */
+ const buf_block_t* block, /*!< in: buffer block */
+ const page_t* page) /*!< in: buffer page */
+{
+ ut_ad(matches->matched_recs->empty());
+ matches->locked = false;
+ rtr_copy_buf(matches, block);
+ matches->block.frame = matches->bufp;
+ matches->valid = false;
+ /* We have to copy PAGE_W*_SUPREMUM_END bytes so that we can
+ use infimum/supremum of this page as normal btr page for search. */
+ memcpy(matches->block.frame, page, page_is_comp(page)
+ ? PAGE_NEW_SUPREMUM_END
+ : PAGE_OLD_SUPREMUM_END);
+ matches->used = page_is_comp(page)
+ ? PAGE_NEW_SUPREMUM_END
+ : PAGE_OLD_SUPREMUM_END;
+#ifdef RTR_SEARCH_DIAGNOSTIC
+ ulint pageno = page_get_page_no(page);
+ fprintf(stderr, "INNODB_RTR: Searching leaf page %d\n",
+ static_cast<int>(pageno));
+#endif /* RTR_SEARCH_DIAGNOSTIC */
+}
+
+/****************************************************************//**
+Get the bounding box content from an index record */
+void
+rtr_get_mbr_from_rec(
+/*=================*/
+ const rec_t* rec, /*!< in: data tuple */
+ const offset_t* offsets,/*!< in: offsets array */
+ rtr_mbr_t* mbr) /*!< out MBR */
+{
+ ulint rec_f_len;
+ const byte* data;
+
+ data = rec_get_nth_field(rec, offsets, 0, &rec_f_len);
+
+ rtr_read_mbr(data, mbr);
+}
+
+/****************************************************************//**
+Get the bounding box content from a MBR data record */
+void
+rtr_get_mbr_from_tuple(
+/*===================*/
+ const dtuple_t* dtuple, /*!< in: data tuple */
+ rtr_mbr* mbr) /*!< out: mbr to fill */
+{
+ const dfield_t* dtuple_field;
+ ulint dtuple_f_len;
+
+ dtuple_field = dtuple_get_nth_field(dtuple, 0);
+ dtuple_f_len = dfield_get_len(dtuple_field);
+ ut_a(dtuple_f_len >= 4 * sizeof(double));
+
+ rtr_read_mbr(static_cast<const byte*>(dfield_get_data(dtuple_field)),
+ mbr);
+}
+
+/****************************************************************//**
+Searches the right position in rtree for a page cursor. */
+bool
+rtr_cur_search_with_match(
+/*======================*/
+ const buf_block_t* block, /*!< in: buffer block */
+ dict_index_t* index, /*!< in: index descriptor */
+ const dtuple_t* tuple, /*!< in: data tuple */
+ page_cur_mode_t mode, /*!< in: PAGE_CUR_RTREE_INSERT,
+ PAGE_CUR_RTREE_LOCATE etc. */
+ page_cur_t* cursor, /*!< in/out: page cursor */
+ rtr_info_t* rtr_info)/*!< in/out: search stack */
+{
+ bool found = false;
+ const page_t* page;
+ const rec_t* rec;
+ const rec_t* last_rec;
+ offset_t offsets_[REC_OFFS_NORMAL_SIZE];
+ offset_t* offsets = offsets_;
+ mem_heap_t* heap = NULL;
+ int cmp = 1;
+ double least_inc = DBL_MAX;
+ const rec_t* best_rec;
+ const rec_t* last_match_rec = NULL;
+ bool match_init = false;
+ ulint space = block->page.id.space();
+ page_cur_mode_t orig_mode = mode;
+ const rec_t* first_rec = NULL;
+
+ rec_offs_init(offsets_);
+
+ ut_ad(RTREE_SEARCH_MODE(mode));
+
+ ut_ad(dict_index_is_spatial(index));
+
+ page = buf_block_get_frame(block);
+
+ const ulint level = btr_page_get_level(page, mtr);
+ const bool is_leaf = !level;
+
+ if (mode == PAGE_CUR_RTREE_LOCATE) {
+ ut_ad(level != 0);
+ mode = PAGE_CUR_WITHIN;
+ }
+
+ rec = page_dir_slot_get_rec(page_dir_get_nth_slot(page, 0));
+
+ last_rec = rec;
+ best_rec = rec;
+
+ if (page_rec_is_infimum(rec)) {
+ rec = page_rec_get_next_const(rec);
+ }
+
+ /* Check insert tuple size is larger than first rec, and try to
+ avoid it if possible */
+ if (mode == PAGE_CUR_RTREE_INSERT && !page_rec_is_supremum(rec)) {
+
+ ulint new_rec_size = rec_get_converted_size(index, tuple, 0);
+
+ offsets = rec_get_offsets(rec, index, offsets, is_leaf,
+ dtuple_get_n_fields_cmp(tuple),
+ &heap);
+
+ if (rec_offs_size(offsets) < new_rec_size) {
+ first_rec = rec;
+ }
+
+ /* If this is the left-most page of this index level
+ and the table is a compressed table, try to avoid
+ first page as much as possible, as there will be problem
+ when update MIN_REC rec in compress table */
+ if (buf_block_get_page_zip(block)
+ && !page_has_prev(page)
+ && page_get_n_recs(page) >= 2) {
+
+ rec = page_rec_get_next_const(rec);
+ }
+ }
+
+ while (!page_rec_is_supremum(rec)) {
+ offsets = rec_get_offsets(rec, index, offsets, is_leaf,
+ dtuple_get_n_fields_cmp(tuple),
+ &heap);
+ if (!is_leaf) {
+ switch (mode) {
+ case PAGE_CUR_CONTAIN:
+ case PAGE_CUR_INTERSECT:
+ case PAGE_CUR_MBR_EQUAL:
+ /* At non-leaf level, we will need to check
+ both CONTAIN and INTERSECT for either of
+ the search mode */
+ cmp = cmp_dtuple_rec_with_gis(
+ tuple, rec, offsets, PAGE_CUR_CONTAIN);
+
+ if (cmp != 0) {
+ cmp = cmp_dtuple_rec_with_gis(
+ tuple, rec, offsets,
+ PAGE_CUR_INTERSECT);
+ }
+ break;
+ case PAGE_CUR_DISJOINT:
+ cmp = cmp_dtuple_rec_with_gis(
+ tuple, rec, offsets, mode);
+
+ if (cmp != 0) {
+ cmp = cmp_dtuple_rec_with_gis(
+ tuple, rec, offsets,
+ PAGE_CUR_INTERSECT);
+ }
+ break;
+ case PAGE_CUR_RTREE_INSERT:
+ double increase;
+ double area;
+
+ cmp = cmp_dtuple_rec_with_gis(
+ tuple, rec, offsets, PAGE_CUR_WITHIN);
+
+ if (cmp != 0) {
+ increase = rtr_rec_cal_increase(
+ tuple, rec, offsets, &area);
+ /* Once it goes beyond DBL_MAX,
+ it would not make sense to record
+ such value, just make it
+ DBL_MAX / 2 */
+ if (increase >= DBL_MAX) {
+ increase = DBL_MAX / 2;
+ }
+
+ if (increase < least_inc) {
+ least_inc = increase;
+ best_rec = rec;
+ } else if (best_rec
+ && best_rec == first_rec) {
+ /* if first_rec is set,
+ we will try to avoid it */
+ least_inc = increase;
+ best_rec = rec;
+ }
+ }
+ break;
+ case PAGE_CUR_RTREE_GET_FATHER:
+ cmp = cmp_dtuple_rec_with_gis_internal(
+ tuple, rec, offsets);
+ break;
+ default:
+ /* WITHIN etc. */
+ cmp = cmp_dtuple_rec_with_gis(
+ tuple, rec, offsets, mode);
+ }
+ } else {
+ /* At leaf level, INSERT should translate to LE */
+ ut_ad(mode != PAGE_CUR_RTREE_INSERT);
+
+ cmp = cmp_dtuple_rec_with_gis(
+ tuple, rec, offsets, mode);
+ }
+
+ if (cmp == 0) {
+ found = true;
+
+ /* If located, the matching node/rec will be pushed
+ to rtr_info->path for non-leaf nodes, or
+ rtr_info->matches for leaf nodes */
+ if (rtr_info && mode != PAGE_CUR_RTREE_INSERT) {
+ if (!is_leaf) {
+ ulint page_no;
+ node_seq_t new_seq;
+ bool is_loc;
+
+ is_loc = (orig_mode
+ == PAGE_CUR_RTREE_LOCATE
+ || orig_mode
+ == PAGE_CUR_RTREE_GET_FATHER);
+
+ offsets = rec_get_offsets(
+ rec, index, offsets, false,
+ ULINT_UNDEFINED, &heap);
+
+ page_no = btr_node_ptr_get_child_page_no(
+ rec, offsets);
+
+ ut_ad(level >= 1);
+
+ /* Get current SSN, before we insert
+ it into the path stack */
+ new_seq = rtr_get_current_ssn_id(index);
+
+ rtr_non_leaf_stack_push(
+ rtr_info->path,
+ page_no,
+ new_seq, level - 1, 0,
+ NULL, 0);
+
+ if (is_loc) {
+ rtr_non_leaf_insert_stack_push(
+ index,
+ rtr_info->parent_path,
+ level, page_no, block,
+ rec, 0);
+ }
+
+ if (!srv_read_only_mode
+ && (rtr_info->need_page_lock
+ || !is_loc)) {
+
+ /* Lock the page, preventing it
+ from being shrunk */
+ lock_place_prdt_page_lock(
+ space, page_no, index,
+ rtr_info->thr);
+ }
+ } else {
+ ut_ad(orig_mode
+ != PAGE_CUR_RTREE_LOCATE);
+
+ if (!match_init) {
+ rtr_init_match(
+ rtr_info->matches,
+ block, page);
+ match_init = true;
+ }
+
+ /* Collect matched records on page */
+ offsets = rec_get_offsets(
+ rec, index, offsets, true,
+ ULINT_UNDEFINED, &heap);
+ rtr_leaf_push_match_rec(
+ rec, rtr_info, offsets,
+ page_is_comp(page));
+ }
+
+ last_match_rec = rec;
+ } else {
+ /* This is the insertion case, it will break
+ once it finds the first MBR that can accomodate
+ the inserting rec */
+ break;
+ }
+ }
+
+ last_rec = rec;
+
+ rec = page_rec_get_next_const(rec);
+ }
+
+ /* All records on page are searched */
+ if (page_rec_is_supremum(rec)) {
+ if (!is_leaf) {
+ if (!found) {
+ /* No match case, if it is for insertion,
+ then we select the record that result in
+ least increased area */
+ if (mode == PAGE_CUR_RTREE_INSERT) {
+ ulint child_no;
+ ut_ad(least_inc < DBL_MAX);
+ offsets = rec_get_offsets(
+ best_rec, index, offsets,
+ false, ULINT_UNDEFINED, &heap);
+ child_no =
+ btr_node_ptr_get_child_page_no(
+ best_rec, offsets);
+
+ rtr_non_leaf_insert_stack_push(
+ index, rtr_info->parent_path,
+ level, child_no, block,
+ best_rec, least_inc);
+
+ page_cur_position(best_rec, block,
+ cursor);
+ rtr_info->mbr_adj = true;
+ } else {
+ /* Position at the last rec of the
+ page, if it is not the leaf page */
+ page_cur_position(last_rec, block,
+ cursor);
+ }
+ } else {
+ /* There are matching records, position
+ in the last matching records */
+ if (rtr_info) {
+ rec = last_match_rec;
+ page_cur_position(
+ rec, block, cursor);
+ }
+ }
+ } else if (rtr_info) {
+ /* Leaf level, no match, position at the
+ last (supremum) rec */
+ if (!last_match_rec) {
+ page_cur_position(rec, block, cursor);
+ goto func_exit;
+ }
+
+ /* There are matched records */
+ matched_rec_t* match_rec = rtr_info->matches;
+
+ rtr_rec_t test_rec;
+
+ test_rec = match_rec->matched_recs->back();
+#ifdef UNIV_DEBUG
+ offset_t offsets_2[REC_OFFS_NORMAL_SIZE];
+ offset_t* offsets2 = offsets_2;
+ rec_offs_init(offsets_2);
+
+ ut_ad(found);
+
+ /* Verify the record to be positioned is the same
+ as the last record in matched_rec vector */
+ offsets2 = rec_get_offsets(test_rec.r_rec, index,
+ offsets2, true,
+ ULINT_UNDEFINED, &heap);
+
+ offsets = rec_get_offsets(last_match_rec, index,
+ offsets, true,
+ ULINT_UNDEFINED, &heap);
+
+ ut_ad(cmp_rec_rec(test_rec.r_rec, last_match_rec,
+ offsets2, offsets, index) == 0);
+#endif /* UNIV_DEBUG */
+ /* Pop the last match record and position on it */
+ match_rec->matched_recs->pop_back();
+ page_cur_position(test_rec.r_rec, &match_rec->block,
+ cursor);
+ }
+ } else {
+
+ if (mode == PAGE_CUR_RTREE_INSERT) {
+ ulint child_no;
+ ut_ad(!last_match_rec && rec);
+
+ offsets = rec_get_offsets(
+ rec, index, offsets, false,
+ ULINT_UNDEFINED, &heap);
+
+ child_no = btr_node_ptr_get_child_page_no(rec, offsets);
+
+ rtr_non_leaf_insert_stack_push(
+ index, rtr_info->parent_path, level, child_no,
+ block, rec, 0);
+
+ } else if (rtr_info && found && !is_leaf) {
+ rec = last_match_rec;
+ }
+
+ page_cur_position(rec, block, cursor);
+ }
+
+#ifdef UNIV_DEBUG
+ /* Verify that we are positioned at the same child page as pushed in
+ the path stack */
+ if (!is_leaf && (!page_rec_is_supremum(rec) || found)
+ && mode != PAGE_CUR_RTREE_INSERT) {
+ ulint page_no;
+
+ offsets = rec_get_offsets(rec, index, offsets, false,
+ ULINT_UNDEFINED, &heap);
+ page_no = btr_node_ptr_get_child_page_no(rec, offsets);
+
+ if (rtr_info && found) {
+ rtr_node_path_t* path = rtr_info->path;
+ node_visit_t last_visit = path->back();
+
+ ut_ad(last_visit.page_no == page_no);
+ }
+ }
+#endif /* UNIV_DEBUG */
+
+func_exit:
+ if (UNIV_LIKELY_NULL(heap)) {
+ mem_heap_free(heap);
+ }
+
+ return(found);
+}
diff --git a/storage/innobase/ha/ha0ha.cc b/storage/innobase/ha/ha0ha.cc
index 499412ade12..cf9a454ad8d 100644
--- a/storage/innobase/ha/ha0ha.cc
+++ b/storage/innobase/ha/ha0ha.cc
@@ -1,6 +1,7 @@
/*****************************************************************************
-Copyright (c) 1994, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1994, 2016, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2017, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -24,41 +25,29 @@ Created 8/22/1994 Heikki Tuuri
*************************************************************************/
#include "ha0ha.h"
-#ifdef UNIV_NONINL
-#include "ha0ha.ic"
-#endif
-#ifndef UNIV_HOTBACKUP
#ifdef UNIV_DEBUG
# include "buf0buf.h"
#endif /* UNIV_DEBUG */
-# include "btr0sea.h"
+#include "btr0sea.h"
#include "page0page.h"
/*************************************************************//**
Creates a hash table with at least n array cells. The actual number
of cells is chosen to be a prime number slightly bigger than n.
-@return own: created table */
-UNIV_INTERN
+@return own: created table */
hash_table_t*
-ha_create_func(
-/*===========*/
- ulint n, /*!< in: number of array cells */
-#ifdef UNIV_SYNC_DEBUG
- ulint sync_level, /*!< in: level of the mutexes or rw_locks
- in the latching order: this is used in the
- debug version */
-#endif /* UNIV_SYNC_DEBUG */
- ulint n_sync_obj, /*!< in: number of mutexes or rw_locks
- to protect the hash table: must be a
- power of 2, or 0 */
- ulint type) /*!< in: type of datastructure for which
- the memory heap is going to be used e.g.:
- MEM_HEAP_FOR_BTR_SEARCH or
+ib_create(
+/*======*/
+ ulint n, /*!< in: number of array cells */
+ latch_id_t id, /*!< in: latch ID */
+ ulint n_sync_obj,
+ /*!< in: number of mutexes to protect the
+ hash table: must be a power of 2, or 0 */
+ ulint type) /*!< in: type of datastructure for which
MEM_HEAP_FOR_PAGE_HASH */
{
hash_table_t* table;
- ulint i;
ut_a(type == MEM_HEAP_FOR_BTR_SEARCH
|| type == MEM_HEAP_FOR_PAGE_HASH);
@@ -71,7 +60,10 @@ ha_create_func(
if (n_sync_obj == 0) {
table->heap = mem_heap_create_typed(
- ut_min(4096, MEM_MAX_ALLOC_IN_BUF), type);
+ ut_min(static_cast<ulint>(4096),
+ MEM_MAX_ALLOC_IN_BUF / 2
+ - MEM_BLOCK_HEADER_SIZE - MEM_SPACE_NEEDED(0)),
+ type);
ut_a(table->heap);
return(table);
@@ -80,61 +72,103 @@ ha_create_func(
if (type == MEM_HEAP_FOR_PAGE_HASH) {
/* We create a hash table protected by rw_locks for
buf_pool->page_hash. */
- hash_create_sync_obj(table, HASH_TABLE_SYNC_RW_LOCK,
- n_sync_obj, sync_level);
+ hash_create_sync_obj(
+ table, HASH_TABLE_SYNC_RW_LOCK, id, n_sync_obj);
} else {
- hash_create_sync_obj(table, HASH_TABLE_SYNC_MUTEX,
- n_sync_obj, sync_level);
+ hash_create_sync_obj(
+ table, HASH_TABLE_SYNC_MUTEX, id, n_sync_obj);
}
table->heaps = static_cast<mem_heap_t**>(
- mem_alloc(n_sync_obj * sizeof(void*)));
-
- for (i = 0; i < n_sync_obj; i++) {
- table->heaps[i] = mem_heap_create_typed(4096, type);
+ ut_malloc_nokey(n_sync_obj * sizeof(void*)));
+
+ for (ulint i = 0; i < n_sync_obj; i++) {
+ table->heaps[i] = mem_heap_create_typed(
+ ut_min(static_cast<ulint>(4096),
+ MEM_MAX_ALLOC_IN_BUF / 2
+ - MEM_BLOCK_HEADER_SIZE - MEM_SPACE_NEEDED(0)),
+ type);
ut_a(table->heaps[i]);
}
return(table);
}
+/** Recreate a hash table with at least n array cells. The actual number
+of cells is chosen to be a prime number slightly bigger than n.
+The new cells are all cleared. The heaps are recreated.
+The sync objects are reused.
+@param[in,out] table hash table to be resuzed (to be freed later)
+@param[in] n number of array cells
+@return resized new table */
+hash_table_t*
+ib_recreate(
+ hash_table_t* table,
+ ulint n)
+{
+ /* This function is for only page_hash for now */
+ ut_ad(table->type == HASH_TABLE_SYNC_RW_LOCK);
+ ut_ad(table->n_sync_obj > 0);
+
+ hash_table_t* new_table = hash_create(n);
+
+ new_table->type = table->type;
+ new_table->n_sync_obj = table->n_sync_obj;
+ new_table->sync_obj = table->sync_obj;
+
+ for (ulint i = 0; i < table->n_sync_obj; i++) {
+ mem_heap_free(table->heaps[i]);
+ }
+ ut_free(table->heaps);
+
+ new_table->heaps = static_cast<mem_heap_t**>(
+ ut_malloc_nokey(new_table->n_sync_obj * sizeof(void*)));
+
+ for (ulint i = 0; i < new_table->n_sync_obj; i++) {
+ new_table->heaps[i] = mem_heap_create_typed(
+ ut_min(static_cast<ulint>(4096),
+ MEM_MAX_ALLOC_IN_BUF / 2
+ - MEM_BLOCK_HEADER_SIZE - MEM_SPACE_NEEDED(0)),
+ MEM_HEAP_FOR_PAGE_HASH);
+ ut_a(new_table->heaps[i]);
+ }
+
+ return(new_table);
+}
+
/*************************************************************//**
Empties a hash table and frees the memory heaps. */
-UNIV_INTERN
void
ha_clear(
/*=====*/
hash_table_t* table) /*!< in, own: hash table */
{
- ulint i;
- ulint n;
-
- ut_ad(table);
ut_ad(table->magic_n == HASH_TABLE_MAGIC_N);
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(!table->adaptive
- || rw_lock_own(&btr_search_latch, RW_LOCK_EXCLUSIVE));
-#endif /* UNIV_SYNC_DEBUG */
-
- /* Free the memory heaps. */
- n = table->n_sync_obj;
+#ifdef BTR_CUR_HASH_ADAPT
+ ut_ad(!table->adaptive || btr_search_own_all(RW_LOCK_X));
+#endif /* BTR_CUR_HASH_ADAPT */
- for (i = 0; i < n; i++) {
+ for (ulint i = 0; i < table->n_sync_obj; i++) {
mem_heap_free(table->heaps[i]);
}
- if (table->heaps) {
- mem_free(table->heaps);
- }
+ ut_free(table->heaps);
switch (table->type) {
case HASH_TABLE_SYNC_MUTEX:
- mem_free(table->sync_obj.mutexes);
+ for (ulint i = 0; i < table->n_sync_obj; ++i) {
+ mutex_destroy(&table->sync_obj.mutexes[i]);
+ }
+ ut_free(table->sync_obj.mutexes);
table->sync_obj.mutexes = NULL;
break;
case HASH_TABLE_SYNC_RW_LOCK:
- mem_free(table->sync_obj.rw_locks);
+ for (ulint i = 0; i < table->n_sync_obj; ++i) {
+ rw_lock_free(&table->sync_obj.rw_locks[i]);
+ }
+
+ ut_free(table->sync_obj.rw_locks);
table->sync_obj.rw_locks = NULL;
break;
@@ -148,20 +182,26 @@ ha_clear(
/* Clear the hash table. */
- n = hash_get_n_cells(table);
+ ulint n = hash_get_n_cells(table);
- for (i = 0; i < n; i++) {
+ for (ulint i = 0; i < n; i++) {
hash_get_nth_cell(table, i)->node = NULL;
}
}
+#ifdef BTR_CUR_HASH_ADAPT
+# if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
+/** Maximum number of records in a page */
+static const lint MAX_N_POINTERS
+ = UNIV_PAGE_SIZE_MAX / REC_N_NEW_EXTRA_BYTES;
+# endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
+
/*************************************************************//**
Inserts an entry into a hash table. If an entry with the same fold number
is found, its node is updated to point to the new data, and no new node
is inserted. If btr_search_enabled is set to FALSE, we will only allow
updating existing nodes, but no new node is allowed to be added.
-@return TRUE if succeed, FALSE if no more memory could be allocated */
-UNIV_INTERN
+@return TRUE if succeed, FALSE if no more memory could be allocated */
ibool
ha_insert_for_fold_func(
/*====================*/
@@ -202,9 +242,11 @@ ha_insert_for_fold_func(
buf_block_t* prev_block = prev_node->block;
ut_a(prev_block->frame
== page_align(prev_node->data));
- ut_a(prev_block->n_pointers > 0);
- prev_block->n_pointers--;
- block->n_pointers++;
+ ut_a(my_atomic_addlint(
+ &prev_block->n_pointers, -1)
+ < MAX_N_POINTERS);
+ ut_a(my_atomic_addlint(&block->n_pointers, 1)
+ < MAX_N_POINTERS);
}
prev_node->block = block;
@@ -235,7 +277,8 @@ ha_insert_for_fold_func(
#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
if (table->adaptive) {
- block->n_pointers++;
+ ut_a(my_atomic_addlint(&block->n_pointers, 1)
+ < MAX_N_POINTERS);
}
#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
@@ -262,9 +305,27 @@ ha_insert_for_fold_func(
return(TRUE);
}
+#ifdef UNIV_DEBUG
+/** Verify if latch corresponding to the hash table is x-latched
+@param[in] table hash table */
+static
+void
+ha_btr_search_latch_x_locked(const hash_table_t* table)
+{
+ ulint i;
+ for (i = 0; i < btr_ahi_parts; ++i) {
+ if (btr_search_sys->hash_tables[i] == table) {
+ break;
+ }
+ }
+
+ ut_ad(i < btr_ahi_parts);
+ ut_ad(rw_lock_own(btr_search_latches[i], RW_LOCK_X));
+}
+#endif /* UNIV_DEBUG */
+
/***********************************************************//**
Deletes a hash node. */
-UNIV_INTERN
void
ha_delete_hash_node(
/*================*/
@@ -273,15 +334,13 @@ ha_delete_hash_node(
{
ut_ad(table);
ut_ad(table->magic_n == HASH_TABLE_MAGIC_N);
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(rw_lock_own(&btr_search_latch, RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
+ ut_d(ha_btr_search_latch_x_locked(table));
ut_ad(btr_search_enabled);
#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
if (table->adaptive) {
ut_a(del_node->block->frame = page_align(del_node->data));
- ut_a(del_node->block->n_pointers > 0);
- del_node->block->n_pointers--;
+ ut_a(my_atomic_addlint(&del_node->block->n_pointers, -1)
+ < MAX_N_POINTERS);
}
#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
@@ -292,7 +351,6 @@ ha_delete_hash_node(
Looks for an element when we know the pointer to the data, and updates
the pointer to data, if found.
@return TRUE if found */
-UNIV_INTERN
ibool
ha_search_and_update_if_found_func(
/*===============================*/
@@ -312,9 +370,8 @@ ha_search_and_update_if_found_func(
#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
ut_a(new_block->frame == page_align(new_data));
#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(rw_lock_own(&btr_search_latch, RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
+
+ ut_d(ha_btr_search_latch_x_locked(table));
if (!btr_search_enabled) {
return(FALSE);
@@ -325,9 +382,10 @@ ha_search_and_update_if_found_func(
if (node) {
#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
if (table->adaptive) {
- ut_a(node->block->n_pointers > 0);
- node->block->n_pointers--;
- new_block->n_pointers++;
+ ut_a(my_atomic_addlint(&node->block->n_pointers, -1)
+ < MAX_N_POINTERS);
+ ut_a(my_atomic_addlint(&new_block->n_pointers, 1)
+ < MAX_N_POINTERS);
}
node->block = new_block;
@@ -343,7 +401,6 @@ ha_search_and_update_if_found_func(
/*****************************************************************//**
Removes from the chain determined by fold all nodes whose data pointer
points to the page given. */
-UNIV_INTERN
void
ha_remove_all_nodes_to_page(
/*========================*/
@@ -386,14 +443,13 @@ ha_remove_all_nodes_to_page(
node = ha_chain_get_next(node);
}
-#endif
+#endif /* UNIV_DEBUG */
}
#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
/*************************************************************//**
Validates a given range of the cells in hash table.
-@return TRUE if ok */
-UNIV_INTERN
+@return TRUE if ok */
ibool
ha_validate(
/*========*/
@@ -421,12 +477,9 @@ ha_validate(
node = node->next) {
if (hash_calc_hash(node->fold, table) != i) {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- "InnoDB: Error: hash table node"
- " fold value %lu does not\n"
- "InnoDB: match the cell number %lu.\n",
- (ulong) node->fold, (ulong) i);
+ ib::error() << "Hash table node fold value "
+ << node->fold << " does not match the"
+ " cell number " << i << ".";
ok = FALSE;
}
@@ -436,63 +489,4 @@ ha_validate(
return(ok);
}
#endif /* defined UNIV_AHI_DEBUG || defined UNIV_DEBUG */
-
-/*************************************************************//**
-Prints info of a hash table. */
-UNIV_INTERN
-void
-ha_print_info(
-/*==========*/
- FILE* file, /*!< in: file where to print */
- hash_table_t* table) /*!< in: hash table */
-{
-#ifdef UNIV_DEBUG
-/* Some of the code here is disabled for performance reasons in production
-builds, see http://bugs.mysql.com/36941 */
-#define PRINT_USED_CELLS
-#endif /* UNIV_DEBUG */
-
-#ifdef PRINT_USED_CELLS
- hash_cell_t* cell;
- ulint cells = 0;
- ulint i;
-#endif /* PRINT_USED_CELLS */
- ulint n_bufs;
-
- ut_ad(table);
- ut_ad(table->magic_n == HASH_TABLE_MAGIC_N);
-#ifdef PRINT_USED_CELLS
- for (i = 0; i < hash_get_n_cells(table); i++) {
-
- cell = hash_get_nth_cell(table, i);
-
- if (cell->node) {
-
- cells++;
- }
- }
-#endif /* PRINT_USED_CELLS */
-
- fprintf(file, "Hash table size %lu",
- (ulong) hash_get_n_cells(table));
-
-#ifdef PRINT_USED_CELLS
- fprintf(file, ", used cells %lu", (ulong) cells);
-#endif /* PRINT_USED_CELLS */
-
- if (table->heaps == NULL && table->heap != NULL) {
-
- /* This calculation is intended for the adaptive hash
- index: how many buffer frames we have reserved? */
-
- n_bufs = UT_LIST_GET_LEN(table->heap->base) - 1;
-
- if (table->heap->free_block) {
- n_bufs++;
- }
-
- fprintf(file, ", node heap has %lu buffer(s)\n",
- (ulong) n_bufs);
- }
-}
-#endif /* !UNIV_HOTBACKUP */
+#endif /* BTR_CUR_HASH_ADAPT */
diff --git a/storage/innobase/ha/ha0storage.cc b/storage/innobase/ha/ha0storage.cc
index 2a8ee8b2432..8857b81f2d2 100644
--- a/storage/innobase/ha/ha0storage.cc
+++ b/storage/innobase/ha/ha0storage.cc
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 2007, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2007, 2016, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -25,16 +25,11 @@ its own storage, avoiding duplicates.
Created September 22, 2007 Vasil Dimov
*******************************************************/
-#include "univ.i"
#include "ha0storage.h"
#include "hash0hash.h"
#include "mem0mem.h"
#include "ut0rnd.h"
-#ifdef UNIV_NONINL
-#include "ha0storage.ic"
-#endif
-
/*******************************************************************//**
Retrieves a data from a storage. If it is present, a pointer to the
stored copy of data is returned, otherwise NULL is returned. */
@@ -83,7 +78,6 @@ data_len bytes need to be allocated) and the size of storage is going to
become more than "memlim" then "data" is not added and NULL is returned.
To disable this behavior "memlim" can be set to 0, which stands for
"no limit". */
-UNIV_INTERN
const void*
ha_storage_put_memlim(
/*==================*/
@@ -169,14 +163,13 @@ test_ha_storage()
p = ha_storage_put(storage, buf, sizeof(buf));
if (p != stored[i]) {
-
- fprintf(stderr, "ha_storage_put() returned %p "
- "instead of %p, i=%d\n", p, stored[i], i);
+ ib::warn() << "ha_storage_put() returned " << p
+ << " instead of " << stored[i] << ", i=" << i;
return;
}
}
- fprintf(stderr, "all ok\n");
+ ib::info() << "all ok";
ha_storage_free(storage);
}
diff --git a/storage/innobase/ha/hash0hash.cc b/storage/innobase/ha/hash0hash.cc
index 40354aafa43..51f3db09922 100644
--- a/storage/innobase/ha/hash0hash.cc
+++ b/storage/innobase/ha/hash0hash.cc
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1997, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1997, 2016, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -24,212 +24,24 @@ Created 5/20/1997 Heikki Tuuri
*******************************************************/
#include "hash0hash.h"
-#ifdef UNIV_NONINL
-#include "hash0hash.ic"
-#endif
-
#include "mem0mem.h"
-
-#ifndef UNIV_HOTBACKUP
-
-# ifdef UNIV_PFS_MUTEX
-UNIV_INTERN mysql_pfs_key_t hash_table_mutex_key;
-# endif /* UNIV_PFS_MUTEX */
-
-# ifdef UNIV_PFS_RWLOCK
-UNIV_INTERN mysql_pfs_key_t hash_table_rw_lock_key;
-# endif /* UNIV_PFS_RWLOCK */
-/************************************************************//**
-Reserves the mutex for a fold value in a hash table. */
-UNIV_INTERN
-void
-hash_mutex_enter(
-/*=============*/
- hash_table_t* table, /*!< in: hash table */
- ulint fold) /*!< in: fold */
-{
- ut_ad(table->type == HASH_TABLE_SYNC_MUTEX);
- mutex_enter(hash_get_mutex(table, fold));
-}
-
-/************************************************************//**
-Releases the mutex for a fold value in a hash table. */
-UNIV_INTERN
-void
-hash_mutex_exit(
-/*============*/
- hash_table_t* table, /*!< in: hash table */
- ulint fold) /*!< in: fold */
-{
- ut_ad(table->type == HASH_TABLE_SYNC_MUTEX);
- mutex_exit(hash_get_mutex(table, fold));
-}
-
-/************************************************************//**
-Reserves all the mutexes of a hash table, in an ascending order. */
-UNIV_INTERN
-void
-hash_mutex_enter_all(
-/*=================*/
- hash_table_t* table) /*!< in: hash table */
-{
- ulint i;
-
- ut_ad(table->type == HASH_TABLE_SYNC_MUTEX);
- for (i = 0; i < table->n_sync_obj; i++) {
-
- mutex_enter(table->sync_obj.mutexes + i);
- }
-}
-
-/************************************************************//**
-Releases all the mutexes of a hash table. */
-UNIV_INTERN
-void
-hash_mutex_exit_all(
-/*================*/
- hash_table_t* table) /*!< in: hash table */
-{
- ulint i;
-
- ut_ad(table->type == HASH_TABLE_SYNC_MUTEX);
- for (i = 0; i < table->n_sync_obj; i++) {
-
- mutex_exit(table->sync_obj.mutexes + i);
- }
-}
-
-/************************************************************//**
-Releases all but the passed in mutex of a hash table. */
-UNIV_INTERN
-void
-hash_mutex_exit_all_but(
-/*====================*/
- hash_table_t* table, /*!< in: hash table */
- ib_mutex_t* keep_mutex) /*!< in: mutex to keep */
-{
- ulint i;
-
- ut_ad(table->type == HASH_TABLE_SYNC_MUTEX);
- for (i = 0; i < table->n_sync_obj; i++) {
-
- ib_mutex_t* mutex = table->sync_obj.mutexes + i;
- if (UNIV_LIKELY(keep_mutex != mutex)) {
- mutex_exit(mutex);
- }
- }
-
- ut_ad(mutex_own(keep_mutex));
-}
-
-/************************************************************//**
-s-lock a lock for a fold value in a hash table. */
-UNIV_INTERN
-void
-hash_lock_s(
-/*========*/
- hash_table_t* table, /*!< in: hash table */
- ulint fold) /*!< in: fold */
-{
-
- rw_lock_t* lock = hash_get_lock(table, fold);
-
- ut_ad(table->type == HASH_TABLE_SYNC_RW_LOCK);
- ut_ad(lock);
-
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(!rw_lock_own(lock, RW_LOCK_SHARED));
- ut_ad(!rw_lock_own(lock, RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
-
- rw_lock_s_lock(lock);
-}
-
-/************************************************************//**
-x-lock a lock for a fold value in a hash table. */
-UNIV_INTERN
-void
-hash_lock_x(
-/*========*/
- hash_table_t* table, /*!< in: hash table */
- ulint fold) /*!< in: fold */
-{
-
- rw_lock_t* lock = hash_get_lock(table, fold);
-
- ut_ad(table->type == HASH_TABLE_SYNC_RW_LOCK);
- ut_ad(lock);
-
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(!rw_lock_own(lock, RW_LOCK_SHARED));
- ut_ad(!rw_lock_own(lock, RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
-
- rw_lock_x_lock(lock);
-}
-
-/************************************************************//**
-unlock an s-lock for a fold value in a hash table. */
-UNIV_INTERN
-void
-hash_unlock_s(
-/*==========*/
-
- hash_table_t* table, /*!< in: hash table */
- ulint fold) /*!< in: fold */
-{
-
- rw_lock_t* lock = hash_get_lock(table, fold);
-
- ut_ad(table->type == HASH_TABLE_SYNC_RW_LOCK);
- ut_ad(lock);
-
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(rw_lock_own(lock, RW_LOCK_SHARED));
-#endif /* UNIV_SYNC_DEBUG */
-
- rw_lock_s_unlock(lock);
-}
-
-/************************************************************//**
-unlock x-lock for a fold value in a hash table. */
-UNIV_INTERN
-void
-hash_unlock_x(
-/*==========*/
- hash_table_t* table, /*!< in: hash table */
- ulint fold) /*!< in: fold */
-{
- rw_lock_t* lock = hash_get_lock(table, fold);
-
- ut_ad(table->type == HASH_TABLE_SYNC_RW_LOCK);
- ut_ad(lock);
-
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(rw_lock_own(lock, RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
-
- rw_lock_x_unlock(lock);
-}
+#include "sync0sync.h"
/************************************************************//**
Reserves all the locks of a hash table, in an ascending order. */
-UNIV_INTERN
void
hash_lock_x_all(
/*============*/
hash_table_t* table) /*!< in: hash table */
{
- ulint i;
-
ut_ad(table->type == HASH_TABLE_SYNC_RW_LOCK);
- for (i = 0; i < table->n_sync_obj; i++) {
+
+ for (ulint i = 0; i < table->n_sync_obj; i++) {
rw_lock_t* lock = table->sync_obj.rw_locks + i;
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(!rw_lock_own(lock, RW_LOCK_SHARED));
- ut_ad(!rw_lock_own(lock, RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
+
+ ut_ad(!rw_lock_own(lock, RW_LOCK_S));
+ ut_ad(!rw_lock_own(lock, RW_LOCK_X));
rw_lock_x_lock(lock);
}
@@ -237,21 +49,18 @@ hash_lock_x_all(
/************************************************************//**
Releases all the locks of a hash table, in an ascending order. */
-UNIV_INTERN
void
hash_unlock_x_all(
/*==============*/
hash_table_t* table) /*!< in: hash table */
{
- ulint i;
-
ut_ad(table->type == HASH_TABLE_SYNC_RW_LOCK);
- for (i = 0; i < table->n_sync_obj; i++) {
+
+ for (ulint i = 0; i < table->n_sync_obj; i++) {
rw_lock_t* lock = table->sync_obj.rw_locks + i;
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(rw_lock_own(lock, RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
+
+ ut_ad(rw_lock_own(lock, RW_LOCK_X));
rw_lock_x_unlock(lock);
}
@@ -259,36 +68,30 @@ hash_unlock_x_all(
/************************************************************//**
Releases all but passed in lock of a hash table, */
-UNIV_INTERN
void
hash_unlock_x_all_but(
/*==================*/
hash_table_t* table, /*!< in: hash table */
rw_lock_t* keep_lock) /*!< in: lock to keep */
{
- ulint i;
-
ut_ad(table->type == HASH_TABLE_SYNC_RW_LOCK);
- for (i = 0; i < table->n_sync_obj; i++) {
+
+ for (ulint i = 0; i < table->n_sync_obj; i++) {
rw_lock_t* lock = table->sync_obj.rw_locks + i;
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(rw_lock_own(lock, RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
- if (UNIV_LIKELY(keep_lock != lock)) {
+ ut_ad(rw_lock_own(lock, RW_LOCK_X));
+
+ if (keep_lock != lock) {
rw_lock_x_unlock(lock);
}
}
}
-#endif /* !UNIV_HOTBACKUP */
-
/*************************************************************//**
Creates a hash table with >= n array cells. The actual number of cells is
chosen to be a prime number slightly bigger than n.
-@return own: created table */
-UNIV_INTERN
+@return own: created table */
hash_table_t*
hash_create(
/*========*/
@@ -300,24 +103,25 @@ hash_create(
prime = ut_find_prime(n);
- table = static_cast<hash_table_t*>(mem_alloc(sizeof(hash_table_t)));
+ table = static_cast<hash_table_t*>(
+ ut_malloc_nokey(sizeof(hash_table_t)));
array = static_cast<hash_cell_t*>(
- ut_malloc(sizeof(hash_cell_t) * prime));
+ ut_malloc_nokey(sizeof(hash_cell_t) * prime));
/* The default type of hash_table is HASH_TABLE_SYNC_NONE i.e.:
the caller is responsible for access control to the table. */
table->type = HASH_TABLE_SYNC_NONE;
table->array = array;
table->n_cells = prime;
-#ifndef UNIV_HOTBACKUP
+#ifdef BTR_CUR_HASH_ADAPT
# if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
table->adaptive = FALSE;
# endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
+#endif /* BTR_CUR_HASH_ADAPT */
table->n_sync_obj = 0;
table->sync_obj.mutexes = NULL;
table->heaps = NULL;
-#endif /* !UNIV_HOTBACKUP */
table->heap = NULL;
ut_d(table->magic_n = HASH_TABLE_MAGIC_N);
@@ -329,70 +133,64 @@ hash_create(
/*************************************************************//**
Frees a hash table. */
-UNIV_INTERN
void
hash_table_free(
/*============*/
hash_table_t* table) /*!< in, own: hash table */
{
- ut_ad(table);
ut_ad(table->magic_n == HASH_TABLE_MAGIC_N);
ut_free(table->array);
- mem_free(table);
+ ut_free(table);
}
-#ifndef UNIV_HOTBACKUP
/*************************************************************//**
Creates a sync object array to protect a hash table.
::sync_obj can be mutexes or rw_locks depening on the type of
hash table. */
-UNIV_INTERN
void
-hash_create_sync_obj_func(
-/*======================*/
+hash_create_sync_obj(
+/*=================*/
hash_table_t* table, /*!< in: hash table */
enum hash_table_sync_t type, /*!< in: HASH_TABLE_SYNC_MUTEX
or HASH_TABLE_SYNC_RW_LOCK */
-#ifdef UNIV_SYNC_DEBUG
- ulint sync_level,/*!< in: latching order level
- of the mutexes: used in the
- debug version */
-#endif /* UNIV_SYNC_DEBUG */
+ latch_id_t id, /*!< in: latch ID */
ulint n_sync_obj)/*!< in: number of sync objects,
must be a power of 2 */
{
- ulint i;
-
- ut_ad(table);
- ut_ad(table->magic_n == HASH_TABLE_MAGIC_N);
ut_a(n_sync_obj > 0);
ut_a(ut_is_2pow(n_sync_obj));
+ ut_ad(table->magic_n == HASH_TABLE_MAGIC_N);
table->type = type;
- switch (type) {
+ switch (table->type) {
case HASH_TABLE_SYNC_MUTEX:
table->sync_obj.mutexes = static_cast<ib_mutex_t*>(
- mem_alloc(n_sync_obj * sizeof(ib_mutex_t)));
+ ut_malloc_nokey(n_sync_obj * sizeof(ib_mutex_t)));
- for (i = 0; i < n_sync_obj; i++) {
- mutex_create(hash_table_mutex_key,
- table->sync_obj.mutexes + i, sync_level);
+ for (ulint i = 0; i < n_sync_obj; i++) {
+ mutex_create(id, table->sync_obj.mutexes + i);
}
break;
- case HASH_TABLE_SYNC_RW_LOCK:
+ case HASH_TABLE_SYNC_RW_LOCK: {
+
+ latch_level_t level = sync_latch_get_level(id);
+
+ ut_a(level != SYNC_UNKNOWN);
+
table->sync_obj.rw_locks = static_cast<rw_lock_t*>(
- mem_alloc(n_sync_obj * sizeof(rw_lock_t)));
+ ut_malloc_nokey(n_sync_obj * sizeof(rw_lock_t)));
- for (i = 0; i < n_sync_obj; i++) {
- rw_lock_create(hash_table_rw_lock_key,
- table->sync_obj.rw_locks + i, sync_level);
+ for (ulint i = 0; i < n_sync_obj; i++) {
+ rw_lock_create(hash_table_locks_key,
+ table->sync_obj.rw_locks + i, level);
}
break;
+ }
case HASH_TABLE_SYNC_NONE:
ut_error;
@@ -400,4 +198,3 @@ hash_create_sync_obj_func(
table->n_sync_obj = n_sync_obj;
}
-#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innobase/ha_innodb.def b/storage/innobase/ha_innodb.def
deleted file mode 100644
index e0faa62deb1..00000000000
--- a/storage/innobase/ha_innodb.def
+++ /dev/null
@@ -1,4 +0,0 @@
-EXPORTS
- _mysql_plugin_interface_version_
- _mysql_sizeof_struct_st_plugin_
- _mysql_plugin_declarations_
diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc
index f9bad3a128c..31841ccff2f 100644
--- a/storage/innobase/handler/ha_innodb.cc
+++ b/storage/innobase/handler/ha_innodb.cc
@@ -33,95 +33,116 @@ this program; if not, write to the Free Software Foundation, Inc.,
*****************************************************************************/
-#define lower_case_file_system lower_case_file_system_server
-#define mysql_unpacked_real_data_home mysql_unpacked_real_data_home_server
-#include <sql_table.h> // explain_filename, nz2, EXPLAIN_PARTITIONS_AS_COMMENT,
- // EXPLAIN_FILENAME_MAX_EXTRA_LENGTH
-
-#include <sql_acl.h> // PROCESS_ACL
-#include <debug_sync.h> // DEBUG_SYNC
-#include <my_base.h> // HA_OPTION_*
+/** @file ha_innodb.cc */
+
+#include "univ.i"
+
+/* Include necessary SQL headers */
+#include "ha_prototypes.h"
+#include <debug_sync.h>
+#include <gstream.h>
+#include <log.h>
#include <mysys_err.h>
#include <innodb_priv.h>
+#include <strfunc.h>
+#include <sql_acl.h>
+#include <sql_class.h>
+#include <sql_show.h>
+#include <sql_table.h>
#include <table_cache.h>
#include <my_check_opt.h>
+#include <my_bitmap.h>
+#include <mysql/service_thd_alloc.h>
+#include <mysql/service_thd_wait.h>
-#undef lower_case_file_system
-#undef mysql_unpacked_real_data_home
-MYSQL_PLUGIN_IMPORT extern my_bool lower_case_file_system;
-MYSQL_PLUGIN_IMPORT extern char mysql_unpacked_real_data_home[];
-
-#ifdef _WIN32
-#include <io.h>
-#endif
+// MYSQL_PLUGIN_IMPORT extern my_bool lower_case_file_system;
+// MYSQL_PLUGIN_IMPORT extern char mysql_unpacked_real_data_home[];
#include <my_service_manager.h>
-
-/** @file ha_innodb.cc */
+#include <key.h>
/* Include necessary InnoDB headers */
-#include "univ.i"
+#include "btr0btr.h"
+#include "btr0cur.h"
+#include "btr0bulk.h"
+#include "btr0sea.h"
+#include "buf0dblwr.h"
#include "buf0dump.h"
-#include "buf0lru.h"
#include "buf0flu.h"
-#include "buf0dblwr.h"
-#include "btr0sea.h"
+#include "buf0lru.h"
+#include "dict0boot.h"
+#include "dict0load.h"
#include "btr0defragment.h"
+#include "dict0crea.h"
+#include "dict0dict.h"
+#include "dict0stats.h"
+#include "dict0stats_bg.h"
+#include "fil0fil.h"
+#include "fsp0fsp.h"
+#include "fts0fts.h"
+#include "fts0plugin.h"
+#include "fts0priv.h"
+#include "fts0types.h"
+#include "ibuf0ibuf.h"
+#include "lock0lock.h"
+#include "log0crypt.h"
+#include "mtr0mtr.h"
#include "os0file.h"
-#include "os0thread.h"
-#include "srv0start.h"
-#include "srv0srv.h"
-#include "trx0roll.h"
-#include "trx0trx.h"
-#include "trx0sys.h"
+#include "page0zip.h"
+#include "pars0pars.h"
#include "rem0types.h"
+#include "row0import.h"
#include "row0ins.h"
+#include "row0merge.h"
#include "row0mysql.h"
+#include "row0quiesce.h"
#include "row0sel.h"
+#include "row0trunc.h"
#include "row0upd.h"
-#include "log0log.h"
-#include "lock0lock.h"
-#include "dict0crea.h"
-#include "btr0cur.h"
-#include "btr0btr.h"
-#include "fsp0fsp.h"
-#include "sync0sync.h"
-#include "fil0fil.h"
#include "fil0crypt.h"
-#include "trx0xa.h"
-#include "row0merge.h"
-#include "dict0boot.h"
-#include "dict0stats.h"
-#include "dict0stats_bg.h"
-#include "ha_prototypes.h"
-#include "ut0mem.h"
-#include "ibuf0ibuf.h"
-#include "dict0dict.h"
#include "srv0mon.h"
-#include "api0api.h"
-#include "api0misc.h"
-#include "pars0pars.h"
-#include "fts0fts.h"
-#include "fts0types.h"
-#include "row0import.h"
-#include "row0quiesce.h"
+#include "srv0srv.h"
+#include "srv0start.h"
#ifdef UNIV_DEBUG
#include "trx0purge.h"
#endif /* UNIV_DEBUG */
-#include "fts0priv.h"
-#include "page0zip.h"
+#include "trx0roll.h"
+#include "trx0sys.h"
+#include "trx0trx.h"
#include "fil0pagecompress.h"
-#include "dict0priv.h"
+#include "trx0xa.h"
+#include "ut0mem.h"
+#include "row0ext.h"
#define thd_get_trx_isolation(X) ((enum_tx_isolation)thd_tx_isolation(X))
+extern "C" void thd_mark_transaction_to_rollback(MYSQL_THD thd, bool all);
+unsigned long long thd_get_query_id(const MYSQL_THD thd);
+void thd_clear_error(MYSQL_THD thd);
+
+TABLE *find_fk_open_table(THD *thd, const char *db, size_t db_len,
+ const char *table, size_t table_len);
+MYSQL_THD create_thd();
+void destroy_thd(MYSQL_THD thd);
+void reset_thd(MYSQL_THD thd);
+TABLE *open_purge_table(THD *thd, const char *db, size_t dblen,
+ const char *tb, size_t tblen);
+void close_thread_tables(THD* thd);
+
+/** Check if user has used xtradb extended system variable that
+is not currently supported by innodb or marked as deprecated. */
+static
+void
+innodb_check_deprecated(void);
+
#ifdef MYSQL_DYNAMIC_PLUGIN
-#define tc_size 2000
+#define tc_size 400
#define tdc_size 400
#endif
#include "ha_innodb.h"
#include "i_s.h"
+#include "sync0sync.h"
#include <string>
#include <sstream>
@@ -129,17 +150,11 @@ MYSQL_PLUGIN_IMPORT extern char mysql_unpacked_real_data_home[];
#include <mysql/plugin.h>
#include <mysql/service_wsrep.h>
-# ifndef MYSQL_PLUGIN_IMPORT
-# define MYSQL_PLUGIN_IMPORT /* nothing */
-# endif /* MYSQL_PLUGIN_IMPORT */
-
#ifdef WITH_WSREP
-#include "../../../wsrep/wsrep_api.h"
-#include "../storage/innobase/include/ut0byte.h"
+#include "dict0priv.h"
+#include "ut0byte.h"
#include <mysql/service_md5.h>
-
-class binlog_trx_data;
-extern handlerton *binlog_hton;
+#include "wsrep_sst.h"
extern MYSQL_PLUGIN_IMPORT MYSQL_BIN_LOG mysql_bin_log;
@@ -160,90 +175,78 @@ static int innobase_wsrep_set_checkpoint(handlerton* hton, const XID* xid);
static int innobase_wsrep_get_checkpoint(handlerton* hton, XID* xid);
#endif /* WITH_WSREP */
-/** to protect innobase_open_files */
-static mysql_mutex_t innobase_share_mutex;
/** to force correct commit order in binlog */
static ulong commit_threads = 0;
static mysql_cond_t commit_cond;
static mysql_mutex_t commit_cond_m;
static mysql_mutex_t pending_checkpoint_mutex;
-static bool innodb_inited = 0;
#define INSIDE_HA_INNOBASE_CC
#define EQ_CURRENT_THD(thd) ((thd) == current_thd)
-static struct handlerton* innodb_hton_ptr;
+struct handlerton* innodb_hton_ptr;
static const long AUTOINC_OLD_STYLE_LOCKING = 0;
static const long AUTOINC_NEW_STYLE_LOCKING = 1;
static const long AUTOINC_NO_LOCKING = 2;
-static long innobase_mirrored_log_groups;
static long innobase_log_buffer_size;
-static long innobase_additional_mem_pool_size;
-static long innobase_file_io_threads;
-static long innobase_open_files;
+static long innobase_open_files=0;
static long innobase_autoinc_lock_mode;
static ulong innobase_commit_concurrency = 0;
static ulong innobase_read_io_threads;
static ulong innobase_write_io_threads;
-static long innobase_buffer_pool_instances = 1;
-static long long innobase_buffer_pool_size, innobase_log_file_size;
+static long long innobase_buffer_pool_size;
/** Percentage of the buffer pool to reserve for 'old' blocks.
Connected to buf_LRU_old_ratio. */
static uint innobase_old_blocks_pct;
-/** Maximum on-disk size of change buffer in terms of percentage
-of the buffer pool. */
-static uint innobase_change_buffer_max_size = CHANGE_BUFFER_DEFAULT_SIZE;
-
/* The default values for the following char* start-up parameters
are determined in innobase_init below: */
-static char* innobase_data_home_dir = NULL;
-static char* innobase_data_file_path = NULL;
-static char* innobase_file_format_name = NULL;
-static char* innobase_change_buffering = NULL;
-static char* innobase_enable_monitor_counter = NULL;
-static char* innobase_disable_monitor_counter = NULL;
-static char* innobase_reset_monitor_counter = NULL;
-static char* innobase_reset_all_monitor_counter = NULL;
+static char* innobase_data_home_dir;
+static char* innobase_data_file_path;
+static char* innobase_temp_data_file_path;
+static char* innobase_file_format_name;
+static char* innobase_change_buffering;
+static char* innobase_enable_monitor_counter;
+static char* innobase_disable_monitor_counter;
+static char* innobase_reset_monitor_counter;
+static char* innobase_reset_all_monitor_counter;
/* The highest file format being used in the database. The value can be
set by user, however, it will be adjusted to the newer file format if
a table of such format is created/opened. */
-static char* innobase_file_format_max = NULL;
+char* innobase_file_format_max;
+
+/** Default value of innodb_file_format */
+static const char* innodb_file_format_default = "Barracuda";
+/** Default value of innodb_file_format_max */
+static const char* innodb_file_format_max_default = "Antelope";
-static char* innobase_file_flush_method = NULL;
+static char* innobase_file_flush_method;
/* This variable can be set in the server configure file, specifying
stopword table to be used */
-static char* innobase_server_stopword_table = NULL;
+static char* innobase_server_stopword_table;
/* Below we have boolean-valued start-up parameters, and their default
values */
-static ulong innobase_fast_shutdown = 1;
-static my_bool innobase_file_format_check = TRUE;
-#ifdef UNIV_LOG_ARCHIVE
-static my_bool innobase_log_archive = FALSE;
-static char* innobase_log_arch_dir = NULL;
-#endif /* UNIV_LOG_ARCHIVE */
-static my_bool innobase_use_atomic_writes = FALSE;
-static my_bool innobase_use_fallocate = TRUE;
-static my_bool innobase_use_doublewrite = TRUE;
-static my_bool innobase_use_checksums = TRUE;
-static my_bool innobase_locks_unsafe_for_binlog = FALSE;
-static my_bool innobase_rollback_on_timeout = FALSE;
-static my_bool innobase_create_status_file = FALSE;
-static my_bool innobase_stats_on_metadata = TRUE;
-static my_bool innobase_large_prefix = FALSE;
-static my_bool innodb_optimize_fulltext_only = FALSE;
-
-static char* internal_innobase_data_file_path = NULL;
+static my_bool innobase_file_format_check;
+static my_bool innobase_use_atomic_writes;
+static my_bool innobase_use_fallocate;
+static my_bool innobase_use_doublewrite;
+static my_bool innobase_use_checksums;
+static my_bool innobase_locks_unsafe_for_binlog;
+static my_bool innobase_rollback_on_timeout;
+static my_bool innobase_create_status_file;
+my_bool innobase_stats_on_metadata;
+static my_bool innobase_large_prefix;
+static my_bool innodb_optimize_fulltext_only;
static char* innodb_version_str = (char*) INNODB_VERSION_STR;
@@ -259,6 +262,118 @@ extern uint srv_background_scrub_data_check_interval;
extern my_bool srv_scrub_force_testing;
#endif
+/** Note we cannot use rec_format_enum because we do not allow
+COMPRESSED row format for innodb_default_row_format option. */
+enum default_row_format_enum {
+ DEFAULT_ROW_FORMAT_REDUNDANT = 0,
+ DEFAULT_ROW_FORMAT_COMPACT = 1,
+ DEFAULT_ROW_FORMAT_DYNAMIC = 2,
+};
+
+static
+void set_my_errno(int err)
+{
+ errno = err;
+}
+
+/** Checks whether the file name belongs to a partition of a table.
+@param[in] file_name file name
+@return pointer to the end of the table name part of the file name, or NULL */
+static
+char*
+is_partition(
+/*=========*/
+ char* file_name)
+{
+ /* We look for pattern #P# to see if the table is partitioned
+ MariaDB table. */
+ return strstr(file_name, table_name_t::part_suffix);
+}
+
+/** Signal to shut down InnoDB (NULL if shutdown was signaled, or if
+running in innodb_read_only mode, srv_read_only_mode) */
+st_my_thread_var *srv_running;
+/** Service thread that waits for the server shutdown and stops purge threads.
+Purge workers have THDs that are needed to calculate virtual columns.
+This THDs must be destroyed rather early in the server shutdown sequence.
+This service thread creates a THD and idly waits for it to get a signal to
+die. Then it notifies all purge workers to shutdown.
+*/
+static pthread_t thd_destructor_thread;
+
+pthread_handler_t
+thd_destructor_proxy(void *)
+{
+ mysql_mutex_t thd_destructor_mutex;
+ mysql_cond_t thd_destructor_cond;
+
+ my_thread_init();
+ mysql_mutex_init(PSI_NOT_INSTRUMENTED, &thd_destructor_mutex, 0);
+ mysql_cond_init(PSI_NOT_INSTRUMENTED, &thd_destructor_cond, 0);
+
+ st_my_thread_var *myvar= _my_thread_var();
+ myvar->current_mutex = &thd_destructor_mutex;
+ myvar->current_cond = &thd_destructor_cond;
+
+ THD *thd= create_thd();
+ thd_proc_info(thd, "InnoDB shutdown handler");
+
+
+ mysql_mutex_lock(&thd_destructor_mutex);
+ my_atomic_storeptr_explicit(reinterpret_cast<void**>(&srv_running),
+ myvar,
+ MY_MEMORY_ORDER_RELAXED);
+ /* wait until the server wakes the THD to abort and die */
+ while (!srv_running->abort)
+ mysql_cond_wait(&thd_destructor_cond, &thd_destructor_mutex);
+ mysql_mutex_unlock(&thd_destructor_mutex);
+ my_atomic_storeptr_explicit(reinterpret_cast<void**>(&srv_running),
+ NULL,
+ MY_MEMORY_ORDER_RELAXED);
+
+ while (srv_fast_shutdown == 0 &&
+ (trx_sys_any_active_transactions() ||
+ (uint)thread_count > srv_n_purge_threads + 1)) {
+ thd_proc_info(thd, "InnoDB slow shutdown wait");
+ os_thread_sleep(1000);
+ }
+
+ /* Some background threads might generate undo pages that will
+ need to be purged, so they have to be shut down before purge
+ threads if slow shutdown is requested. */
+ srv_shutdown_bg_undo_sources();
+ srv_purge_shutdown();
+
+ destroy_thd(thd);
+ mysql_cond_destroy(&thd_destructor_cond);
+ mysql_mutex_destroy(&thd_destructor_mutex);
+ my_thread_end();
+ return 0;
+}
+
+/** Return the InnoDB ROW_FORMAT enum value
+@param[in] row_format row_format from "innodb_default_row_format"
+@return InnoDB ROW_FORMAT value from rec_format_t enum. */
+static
+rec_format_t
+get_row_format(
+ ulong row_format)
+{
+ switch(row_format) {
+ case DEFAULT_ROW_FORMAT_REDUNDANT:
+ return(REC_FORMAT_REDUNDANT);
+ case DEFAULT_ROW_FORMAT_COMPACT:
+ return(REC_FORMAT_COMPACT);
+ case DEFAULT_ROW_FORMAT_DYNAMIC:
+ return(REC_FORMAT_DYNAMIC);
+ default:
+ ut_ad(0);
+ return(REC_FORMAT_DYNAMIC);
+ }
+}
+
+static ulong innodb_default_row_format = DEFAULT_ROW_FORMAT_DYNAMIC;
+
/** Possible values for system variable "innodb_stats_method". The values
are defined the same as its corresponding MyISAM system variable
"myisam_stats_method"(see "myisam_stats_method_names"), for better usability */
@@ -278,42 +393,26 @@ static TYPELIB innodb_stats_method_typelib = {
NULL
};
-/** Possible values for system variable "innodb_checksum_algorithm". */
-static const char* innodb_checksum_algorithm_names[] = {
- "CRC32",
- "STRICT_CRC32",
- "INNODB",
- "STRICT_INNODB",
- "NONE",
- "STRICT_NONE",
+/** Possible values of the parameter innodb_checksum_algorithm */
+const char* innodb_checksum_algorithm_names[] = {
+ "crc32",
+ "strict_crc32",
+ "innodb",
+ "strict_innodb",
+ "none",
+ "strict_none",
NullS
};
/** Used to define an enumerate type of the system variable
innodb_checksum_algorithm. */
-static TYPELIB innodb_checksum_algorithm_typelib = {
+TYPELIB innodb_checksum_algorithm_typelib = {
array_elements(innodb_checksum_algorithm_names) - 1,
"innodb_checksum_algorithm_typelib",
innodb_checksum_algorithm_names,
NULL
};
-/** Possible values of the parameter innodb_lock_schedule_algorithm */
-static const char* innodb_lock_schedule_algorithm_names[] = {
- "fcfs",
- "vats",
- NullS
-};
-
-/** Used to define an enumerate type of the system variable
-innodb_lock_schedule_algorithm. */
-static TYPELIB innodb_lock_schedule_algorithm_typelib = {
- array_elements(innodb_lock_schedule_algorithm_names) - 1,
- "innodb_lock_schedule_algorithm_typelib",
- innodb_lock_schedule_algorithm_names,
- NULL
-};
-
/** Possible values for system variable "innodb_default_row_format". */
static const char* innodb_default_row_format_names[] = {
"redundant",
@@ -331,6 +430,22 @@ static TYPELIB innodb_default_row_format_typelib = {
NULL
};
+/** Possible values of the parameter innodb_lock_schedule_algorithm */
+static const char* innodb_lock_schedule_algorithm_names[] = {
+ "fcfs",
+ "vats",
+ NullS
+};
+
+/** Used to define an enumerate type of the system variable
+innodb_lock_schedule_algorithm. */
+static TYPELIB innodb_lock_schedule_algorithm_typelib = {
+ array_elements(innodb_lock_schedule_algorithm_names) - 1,
+ "innodb_lock_schedule_algorithm_typelib",
+ innodb_lock_schedule_algorithm_names,
+ NULL
+};
+
/* The following counter is used to convey information to InnoDB
about server activity: in case of normal DML ops it is not
sensible to call srv_active_wake_master_thread after each
@@ -339,8 +454,6 @@ operation, we only do it every INNOBASE_WAKE_INTERVAL'th step. */
#define INNOBASE_WAKE_INTERVAL 32
static ulong innobase_active_counter = 0;
-static hash_table_t* innobase_open_tables;
-
/** Allowed values of innodb_change_buffering */
static const char* innobase_change_buffering_values[IBUF_USE_COUNT] = {
"none", /* IBUF_USE_NONE */
@@ -351,34 +464,30 @@ static const char* innobase_change_buffering_values[IBUF_USE_COUNT] = {
"all" /* IBUF_USE_ALL */
};
-
-/** Note we cannot use rec_format_enum because we do not allow
-COMPRESSED row format for innodb_default_row_format option. */
-enum default_row_format_enum {
- DEFAULT_ROW_FORMAT_REDUNDANT = 0,
- DEFAULT_ROW_FORMAT_COMPACT = 1,
- DEFAULT_ROW_FORMAT_DYNAMIC = 2
-};
-
-/** Convert an InnoDB ROW_FORMAT value.
-@param[in] row_format row_format from "innodb_default_row_format"
-@return converted ROW_FORMAT */
-static rec_format_t get_row_format(ulong row_format)
-{
- switch (row_format) {
- case DEFAULT_ROW_FORMAT_REDUNDANT:
- return REC_FORMAT_REDUNDANT;
- case DEFAULT_ROW_FORMAT_COMPACT:
- return REC_FORMAT_COMPACT;
- case DEFAULT_ROW_FORMAT_DYNAMIC:
- return REC_FORMAT_DYNAMIC;
- default:
- ut_ad(0);
- return REC_FORMAT_COMPACT;
- }
-}
-
-static ulong innodb_default_row_format;
+/** Retrieve the FTS Relevance Ranking result for doc with doc_id
+of m_prebuilt->fts_doc_id
+@param[in,out] fts_hdl FTS handler
+@return the relevance ranking value */
+static
+float
+innobase_fts_retrieve_ranking(
+ FT_INFO* fts_hdl);
+/** Free the memory for the FTS handler
+@param[in,out] fts_hdl FTS handler */
+static
+void
+innobase_fts_close_ranking(
+ FT_INFO* fts_hdl);
+/** Find and Retrieve the FTS Relevance Ranking result for doc with doc_id
+of m_prebuilt->fts_doc_id
+@param[in,out] fts_hdl FTS handler
+@return the relevance ranking value */
+static
+float
+innobase_fts_find_ranking(
+ FT_INFO* fts_hdl,
+ uchar*,
+ uint);
/* Call back function array defined by MySQL and used to
retrieve FTS results. */
@@ -388,26 +497,74 @@ const struct _ft_vft ft_vft_result = {NULL,
innobase_fts_retrieve_ranking,
NULL};
+/** @return version of the extended FTS API */
+static
+uint
+innobase_fts_get_version()
+{
+ /* Currently this doesn't make much sense as returning
+ HA_CAN_FULLTEXT_EXT automatically mean this version is supported.
+ This supposed to ease future extensions. */
+ return(2);
+}
+
+/** @return Which part of the extended FTS API is supported */
+static
+ulonglong
+innobase_fts_flags()
+{
+ return(FTS_ORDERED_RESULT | FTS_DOCID_IN_RESULT);
+}
+
+/** Find and Retrieve the FTS doc_id for the current result row
+@param[in,out] fts_hdl FTS handler
+@return the document ID */
+static
+ulonglong
+innobase_fts_retrieve_docid(
+ FT_INFO_EXT* fts_hdl);
+
+/** Find and retrieve the size of the current result
+@param[in,out] fts_hdl FTS handler
+@return number of matching rows */
+static
+ulonglong
+innobase_fts_count_matches(
+ FT_INFO_EXT* fts_hdl) /*!< in: FTS handler */
+{
+ NEW_FT_INFO* handle = reinterpret_cast<NEW_FT_INFO*>(fts_hdl);
+
+ if (handle->ft_result->rankings_by_id != NULL) {
+ return(rbt_size(handle->ft_result->rankings_by_id));
+ } else {
+ return(0);
+ }
+}
+
const struct _ft_vft_ext ft_vft_ext_result = {innobase_fts_get_version,
innobase_fts_flags,
innobase_fts_retrieve_docid,
innobase_fts_count_matches};
#ifdef HAVE_PSI_INTERFACE
+# define PSI_KEY(n) {&n##_key, #n, 0}
+/* All RWLOCK used in Innodb are SX-locks */
+# define PSI_RWLOCK_KEY(n) {&n##_key, #n, PSI_RWLOCK_FLAG_SX}
+
/* Keys to register pthread mutexes/cond in the current file with
performance schema */
-static mysql_pfs_key_t innobase_share_mutex_key;
static mysql_pfs_key_t commit_cond_mutex_key;
static mysql_pfs_key_t commit_cond_key;
static mysql_pfs_key_t pending_checkpoint_mutex_key;
+static mysql_pfs_key_t thd_destructor_thread_key;
static PSI_mutex_info all_pthread_mutexes[] = {
- {&commit_cond_mutex_key, "commit_cond_mutex", 0},
- {&innobase_share_mutex_key, "innobase_share_mutex", 0}
+ PSI_KEY(commit_cond_mutex),
+ PSI_KEY(pending_checkpoint_mutex),
};
static PSI_cond_info all_innodb_conds[] = {
- {&commit_cond_key, "commit_cond", 0}
+ PSI_KEY(commit_cond)
};
# ifdef UNIV_PFS_MUTEX
@@ -415,76 +572,63 @@ static PSI_cond_info all_innodb_conds[] = {
performance schema instrumented if "UNIV_PFS_MUTEX"
is defined */
static PSI_mutex_info all_innodb_mutexes[] = {
- {&autoinc_mutex_key, "autoinc_mutex", 0},
+ PSI_KEY(autoinc_mutex),
# ifndef PFS_SKIP_BUFFER_MUTEX_RWLOCK
- {&buffer_block_mutex_key, "buffer_block_mutex", 0},
+ PSI_KEY(buffer_block_mutex),
# endif /* !PFS_SKIP_BUFFER_MUTEX_RWLOCK */
- {&buf_pool_mutex_key, "buf_pool_mutex", 0},
- {&buf_pool_zip_mutex_key, "buf_pool_zip_mutex", 0},
- {&cache_last_read_mutex_key, "cache_last_read_mutex", 0},
- {&dict_foreign_err_mutex_key, "dict_foreign_err_mutex", 0},
- {&dict_sys_mutex_key, "dict_sys_mutex", 0},
- {&file_format_max_mutex_key, "file_format_max_mutex", 0},
- {&fil_system_mutex_key, "fil_system_mutex", 0},
- {&flush_list_mutex_key, "flush_list_mutex", 0},
- {&fts_bg_threads_mutex_key, "fts_bg_threads_mutex", 0},
- {&fts_delete_mutex_key, "fts_delete_mutex", 0},
- {&fts_optimize_mutex_key, "fts_optimize_mutex", 0},
- {&fts_doc_id_mutex_key, "fts_doc_id_mutex", 0},
- {&fts_pll_tokenize_mutex_key, "fts_pll_tokenize_mutex", 0},
- {&log_flush_order_mutex_key, "log_flush_order_mutex", 0},
- {&hash_table_mutex_key, "hash_table_mutex", 0},
- {&ibuf_bitmap_mutex_key, "ibuf_bitmap_mutex", 0},
- {&ibuf_mutex_key, "ibuf_mutex", 0},
- {&ibuf_pessimistic_insert_mutex_key,
- "ibuf_pessimistic_insert_mutex", 0},
-# ifndef HAVE_ATOMIC_BUILTINS
- {&server_mutex_key, "server_mutex", 0},
-# endif /* !HAVE_ATOMIC_BUILTINS */
- {&log_sys_mutex_key, "log_sys_mutex", 0},
-# ifdef UNIV_MEM_DEBUG
- {&mem_hash_mutex_key, "mem_hash_mutex", 0},
-# endif /* UNIV_MEM_DEBUG */
- {&mem_pool_mutex_key, "mem_pool_mutex", 0},
- {&mutex_list_mutex_key, "mutex_list_mutex", 0},
- {&page_zip_stat_per_index_mutex_key, "page_zip_stat_per_index_mutex", 0},
- {&purge_sys_bh_mutex_key, "purge_sys_bh_mutex", 0},
- {&recv_sys_mutex_key, "recv_sys_mutex", 0},
- {&recv_writer_mutex_key, "recv_writer_mutex", 0},
- {&rseg_mutex_key, "rseg_mutex", 0},
-# ifdef UNIV_SYNC_DEBUG
- {&rw_lock_debug_mutex_key, "rw_lock_debug_mutex", 0},
-# endif /* UNIV_SYNC_DEBUG */
- {&rw_lock_list_mutex_key, "rw_lock_list_mutex", 0},
- {&rw_lock_mutex_key, "rw_lock_mutex", 0},
- {&srv_dict_tmpfile_mutex_key, "srv_dict_tmpfile_mutex", 0},
- {&srv_innodb_monitor_mutex_key, "srv_innodb_monitor_mutex", 0},
- {&srv_misc_tmpfile_mutex_key, "srv_misc_tmpfile_mutex", 0},
- {&srv_monitor_file_mutex_key, "srv_monitor_file_mutex", 0},
-# ifdef UNIV_SYNC_DEBUG
- {&sync_thread_mutex_key, "sync_thread_mutex", 0},
-# endif /* UNIV_SYNC_DEBUG */
- {&buf_dblwr_mutex_key, "buf_dblwr_mutex", 0},
- {&trx_undo_mutex_key, "trx_undo_mutex", 0},
- {&srv_sys_mutex_key, "srv_sys_mutex", 0},
- {&lock_sys_mutex_key, "lock_mutex", 0},
- {&lock_sys_wait_mutex_key, "lock_wait_mutex", 0},
- {&trx_mutex_key, "trx_mutex", 0},
- {&srv_sys_tasks_mutex_key, "srv_threads_mutex", 0},
- /* mutex with os_fast_mutex_ interfaces */
+ PSI_KEY(buf_pool_mutex),
+ PSI_KEY(buf_pool_zip_mutex),
+ PSI_KEY(cache_last_read_mutex),
+ PSI_KEY(dict_foreign_err_mutex),
+ PSI_KEY(dict_sys_mutex),
+ PSI_KEY(recalc_pool_mutex),
+ PSI_KEY(file_format_max_mutex),
+ PSI_KEY(fil_system_mutex),
+ PSI_KEY(flush_list_mutex),
+ PSI_KEY(fts_bg_threads_mutex),
+ PSI_KEY(fts_delete_mutex),
+ PSI_KEY(fts_optimize_mutex),
+ PSI_KEY(fts_doc_id_mutex),
+ PSI_KEY(log_flush_order_mutex),
+ PSI_KEY(hash_table_mutex),
+ PSI_KEY(ibuf_bitmap_mutex),
+ PSI_KEY(ibuf_mutex),
+ PSI_KEY(ibuf_pessimistic_insert_mutex),
+ PSI_KEY(log_sys_mutex),
+ PSI_KEY(log_sys_write_mutex),
+ PSI_KEY(mutex_list_mutex),
+ PSI_KEY(page_zip_stat_per_index_mutex),
+ PSI_KEY(purge_sys_pq_mutex),
+ PSI_KEY(recv_sys_mutex),
+ PSI_KEY(recv_writer_mutex),
+ PSI_KEY(redo_rseg_mutex),
+ PSI_KEY(noredo_rseg_mutex),
+# ifdef UNIV_DEBUG
+ PSI_KEY(rw_lock_debug_mutex),
+# endif /* UNIV_DEBUG */
+ PSI_KEY(rw_lock_list_mutex),
+ PSI_KEY(rw_lock_mutex),
+ PSI_KEY(srv_innodb_monitor_mutex),
+ PSI_KEY(srv_misc_tmpfile_mutex),
+ PSI_KEY(srv_monitor_file_mutex),
+ PSI_KEY(buf_dblwr_mutex),
+ PSI_KEY(trx_undo_mutex),
+ PSI_KEY(trx_pool_mutex),
+ PSI_KEY(trx_pool_manager_mutex),
+ PSI_KEY(srv_sys_mutex),
+ PSI_KEY(lock_mutex),
+ PSI_KEY(lock_wait_mutex),
+ PSI_KEY(trx_mutex),
+ PSI_KEY(srv_threads_mutex),
# ifndef PFS_SKIP_EVENT_MUTEX
- {&event_os_mutex_key, "event_os_mutex", 0},
+ PSI_KEY(event_mutex),
# endif /* PFS_SKIP_EVENT_MUTEX */
- {&os_mutex_key, "os_mutex", 0},
-#ifndef HAVE_ATOMIC_BUILTINS
- {&srv_conc_mutex_key, "srv_conc_mutex", 0},
-#endif /* !HAVE_ATOMIC_BUILTINS */
-#ifndef HAVE_ATOMIC_BUILTINS_64
- {&monitor_mutex_key, "monitor_mutex", 0},
-#endif /* !HAVE_ATOMIC_BUILTINS_64 */
- {&ut_list_mutex_key, "ut_list_mutex", 0},
- {&trx_sys_mutex_key, "trx_sys_mutex", 0},
- {&zip_pad_mutex_key, "zip_pad_mutex", 0},
+ PSI_KEY(rtr_active_mutex),
+ PSI_KEY(rtr_match_mutex),
+ PSI_KEY(rtr_path_mutex),
+ PSI_KEY(rtr_ssn_mutex),
+ PSI_KEY(trx_sys_mutex),
+ PSI_KEY(zip_pad_mutex)
};
# endif /* UNIV_PFS_MUTEX */
@@ -493,27 +637,24 @@ static PSI_mutex_info all_innodb_mutexes[] = {
performance schema instrumented if "UNIV_PFS_RWLOCK"
is defined */
static PSI_rwlock_info all_innodb_rwlocks[] = {
-# ifdef UNIV_LOG_ARCHIVE
- {&archive_lock_key, "archive_lock", 0},
-# endif /* UNIV_LOG_ARCHIVE */
- {&btr_search_latch_key, "btr_search_latch", 0},
+ PSI_RWLOCK_KEY(btr_search_latch),
# ifndef PFS_SKIP_BUFFER_MUTEX_RWLOCK
- {&buf_block_lock_key, "buf_block_lock", 0},
+ PSI_RWLOCK_KEY(buf_block_lock),
# endif /* !PFS_SKIP_BUFFER_MUTEX_RWLOCK */
-# ifdef UNIV_SYNC_DEBUG
- {&buf_block_debug_latch_key, "buf_block_debug_latch", 0},
-# endif /* UNIV_SYNC_DEBUG */
- {&dict_operation_lock_key, "dict_operation_lock", 0},
- {&fil_space_latch_key, "fil_space_latch", 0},
- {&checkpoint_lock_key, "checkpoint_lock", 0},
- {&fts_cache_rw_lock_key, "fts_cache_rw_lock", 0},
- {&fts_cache_init_rw_lock_key, "fts_cache_init_rw_lock", 0},
- {&trx_i_s_cache_lock_key, "trx_i_s_cache_lock", 0},
- {&trx_purge_latch_key, "trx_purge_latch", 0},
- {&index_tree_rw_lock_key, "index_tree_rw_lock", 0},
- {&index_online_log_key, "index_online_log", 0},
- {&dict_table_stats_key, "dict_table_stats", 0},
- {&hash_table_rw_lock_key, "hash_table_locks", 0}
+# ifdef UNIV_DEBUG
+ PSI_RWLOCK_KEY(buf_block_debug_latch),
+# endif /* UNIV_DEBUG */
+ PSI_RWLOCK_KEY(dict_operation_lock),
+ PSI_RWLOCK_KEY(fil_space_latch),
+ PSI_RWLOCK_KEY(checkpoint_lock),
+ PSI_RWLOCK_KEY(fts_cache_rw_lock),
+ PSI_RWLOCK_KEY(fts_cache_init_rw_lock),
+ PSI_RWLOCK_KEY(trx_i_s_cache_lock),
+ PSI_RWLOCK_KEY(trx_purge_latch),
+ PSI_RWLOCK_KEY(index_tree_rw_lock),
+ PSI_RWLOCK_KEY(index_online_log),
+ PSI_RWLOCK_KEY(dict_table_stats),
+ PSI_RWLOCK_KEY(hash_table_locks)
};
# endif /* UNIV_PFS_RWLOCK */
@@ -522,15 +663,23 @@ static PSI_rwlock_info all_innodb_rwlocks[] = {
performance schema instrumented if "UNIV_PFS_THREAD"
is defined */
static PSI_thread_info all_innodb_threads[] = {
- {&trx_rollback_clean_thread_key, "trx_rollback_clean_thread", 0},
- {&io_handler_thread_key, "io_handler_thread", 0},
- {&srv_lock_timeout_thread_key, "srv_lock_timeout_thread", 0},
- {&srv_error_monitor_thread_key, "srv_error_monitor_thread", 0},
- {&srv_monitor_thread_key, "srv_monitor_thread", 0},
- {&srv_master_thread_key, "srv_master_thread", 0},
- {&srv_purge_thread_key, "srv_purge_thread", 0},
- {&buf_page_cleaner_thread_key, "page_cleaner_thread", 0},
- {&recv_writer_thread_key, "recv_writer_thread", 0}
+ PSI_KEY(buf_dump_thread),
+ PSI_KEY(dict_stats_thread),
+ PSI_KEY(io_handler_thread),
+ PSI_KEY(io_ibuf_thread),
+ PSI_KEY(io_log_thread),
+ PSI_KEY(io_read_thread),
+ PSI_KEY(io_write_thread),
+ PSI_KEY(page_cleaner_thread),
+ PSI_KEY(recv_writer_thread),
+ PSI_KEY(srv_error_monitor_thread),
+ PSI_KEY(srv_lock_timeout_thread),
+ PSI_KEY(srv_master_thread),
+ PSI_KEY(srv_monitor_thread),
+ PSI_KEY(srv_purge_thread),
+ PSI_KEY(srv_worker_thread),
+ PSI_KEY(trx_rollback_clean_thread),
+ PSI_KEY(thd_destructor_thread),
};
# endif /* UNIV_PFS_THREAD */
@@ -538,152 +687,21 @@ static PSI_thread_info all_innodb_threads[] = {
/* all_innodb_files array contains the type of files that are
performance schema instrumented if "UNIV_PFS_IO" is defined */
static PSI_file_info all_innodb_files[] = {
- {&innodb_file_data_key, "innodb_data_file", 0},
- {&innodb_file_log_key, "innodb_log_file", 0},
- {&innodb_file_temp_key, "innodb_temp_file", 0}
+ PSI_KEY(innodb_data_file),
+ PSI_KEY(innodb_log_file),
+ PSI_KEY(innodb_temp_file)
};
# endif /* UNIV_PFS_IO */
#endif /* HAVE_PSI_INTERFACE */
-/** Always normalize table name to lower case on Windows */
-#ifdef __WIN__
-#define normalize_table_name(norm_name, name) \
- normalize_table_name_low(norm_name, name, TRUE)
-#else
-#define normalize_table_name(norm_name, name) \
- normalize_table_name_low(norm_name, name, FALSE)
-#endif /* __WIN__ */
-
-/** Set up InnoDB API callback function array */
-ib_cb_t innodb_api_cb[] = {
- (ib_cb_t) ib_cursor_open_table,
- (ib_cb_t) ib_cursor_read_row,
- (ib_cb_t) ib_cursor_insert_row,
- (ib_cb_t) ib_cursor_delete_row,
- (ib_cb_t) ib_cursor_update_row,
- (ib_cb_t) ib_cursor_moveto,
- (ib_cb_t) ib_cursor_first,
- (ib_cb_t) ib_cursor_next,
- (ib_cb_t) ib_cursor_last,
- (ib_cb_t) ib_cursor_set_match_mode,
- (ib_cb_t) ib_sec_search_tuple_create,
- (ib_cb_t) ib_clust_read_tuple_create,
- (ib_cb_t) ib_tuple_delete,
- (ib_cb_t) ib_tuple_copy,
- (ib_cb_t) ib_tuple_read_u8,
- (ib_cb_t) ib_tuple_write_u8,
- (ib_cb_t) ib_tuple_read_u16,
- (ib_cb_t) ib_tuple_write_u16,
- (ib_cb_t) ib_tuple_read_u32,
- (ib_cb_t) ib_tuple_write_u32,
- (ib_cb_t) ib_tuple_read_u64,
- (ib_cb_t) ib_tuple_write_u64,
- (ib_cb_t) ib_tuple_read_i8,
- (ib_cb_t) ib_tuple_write_i8,
- (ib_cb_t) ib_tuple_read_i16,
- (ib_cb_t) ib_tuple_write_i16,
- (ib_cb_t) ib_tuple_read_i32,
- (ib_cb_t) ib_tuple_write_i32,
- (ib_cb_t) ib_tuple_read_i64,
- (ib_cb_t) ib_tuple_write_i64,
- (ib_cb_t) ib_tuple_get_n_cols,
- (ib_cb_t) ib_col_set_value,
- (ib_cb_t) ib_col_get_value,
- (ib_cb_t) ib_col_get_meta,
- (ib_cb_t) ib_trx_begin,
- (ib_cb_t) ib_trx_commit,
- (ib_cb_t) ib_trx_rollback,
- (ib_cb_t) ib_trx_start,
- (ib_cb_t) ib_trx_release,
- (ib_cb_t) ib_trx_state,
- (ib_cb_t) ib_cursor_lock,
- (ib_cb_t) ib_cursor_close,
- (ib_cb_t) ib_cursor_new_trx,
- (ib_cb_t) ib_cursor_reset,
- (ib_cb_t) ib_open_table_by_name,
- (ib_cb_t) ib_col_get_name,
- (ib_cb_t) ib_table_truncate,
- (ib_cb_t) ib_cursor_open_index_using_name,
- (ib_cb_t) ib_close_thd,
- (ib_cb_t) ib_cfg_get_cfg,
- (ib_cb_t) ib_cursor_set_memcached_sync,
- (ib_cb_t) ib_cursor_set_cluster_access,
- (ib_cb_t) ib_cursor_commit_trx,
- (ib_cb_t) ib_cfg_trx_level,
- (ib_cb_t) ib_tuple_get_n_user_cols,
- (ib_cb_t) ib_cursor_set_lock_mode,
- (ib_cb_t) ib_cursor_clear_trx,
- (ib_cb_t) ib_get_idx_field_name,
- (ib_cb_t) ib_trx_get_start_time,
- (ib_cb_t) ib_cfg_bk_commit_interval,
- (ib_cb_t) ib_cursor_stmt_begin,
- (ib_cb_t) ib_trx_read_only
-};
-
-/**
- Test a file path whether it is same as mysql data directory path.
-
- @param path null terminated character string
-
- @return
- @retval TRUE The path is different from mysql data directory.
- @retval FALSE The path is same as mysql data directory.
-*/
-static bool is_mysql_datadir_path(const char *path)
-{
- if (path == NULL)
- return false;
-
- char mysql_data_dir[FN_REFLEN], path_dir[FN_REFLEN];
- convert_dirname(path_dir, path, NullS);
- convert_dirname(mysql_data_dir, mysql_unpacked_real_data_home, NullS);
- size_t mysql_data_home_len= dirname_length(mysql_data_dir);
- size_t path_len = dirname_length(path_dir);
-
- if (path_len < mysql_data_home_len)
- return true;
-
- if (!lower_case_file_system)
- return(memcmp(mysql_data_dir, path_dir, mysql_data_home_len));
-
- return(files_charset_info->coll->strnncoll(files_charset_info,
- (uchar *) path_dir, path_len,
- (uchar *) mysql_data_dir,
- mysql_data_home_len,
- TRUE));
-
-}
-
-
-static int mysql_tmpfile_path(const char *path, const char *prefix)
-{
- DBUG_ASSERT(path != NULL);
- DBUG_ASSERT((strlen(path) + strlen(prefix)) <= FN_REFLEN);
-
- char filename[FN_REFLEN];
- File fd = create_temp_file(filename, path, prefix,
-#ifdef __WIN__
- O_BINARY | O_TRUNC | O_SEQUENTIAL |
- O_SHORT_LIVED |
-#endif /* __WIN__ */
- O_CREAT | O_EXCL | O_RDWR | O_TEMPORARY,
- MYF(MY_WME));
- if (fd >= 0) {
-#ifndef __WIN__
- /*
- This can be removed once the following bug is fixed:
- Bug #28903 create_temp_file() doesn't honor O_TEMPORARY option
- (file not removed) (Unix)
- */
- unlink(filename);
-#endif /* !__WIN__ */
- }
-
- return fd;
-}
-
static void innodb_remember_check_sysvar_funcs();
mysql_var_check_func check_sysvar_enum;
+mysql_var_check_func check_sysvar_int;
+
+// should page compression be used by default for new tables
+static MYSQL_THDVAR_BOOL(compression_default, PLUGIN_VAR_OPCMDARG,
+ "Is compression the default for new tables",
+ NULL, NULL, FALSE);
/** Update callback for SET [SESSION] innodb_default_encryption_key_id */
static void
@@ -718,12 +736,10 @@ ha_create_table_option innodb_table_option_list[]=
{
/* With this option user can enable page compression feature for the
table */
- HA_TOPTION_BOOL("PAGE_COMPRESSED", page_compressed, 0),
+ HA_TOPTION_SYSVAR("PAGE_COMPRESSED", page_compressed, compression_default),
/* With this option user can set zip compression level for page
compression for this table*/
HA_TOPTION_NUMBER("PAGE_COMPRESSION_LEVEL", page_compression_level, 0, 1, 9, 1),
- /* With this option user can enable atomic writes feature for this table */
- HA_TOPTION_ENUM("ATOMIC_WRITES", atomic_writes, "DEFAULT,ON,OFF", 0),
/* With this option the user can enable encryption for the table */
HA_TOPTION_ENUM("ENCRYPTED", encryption, "DEFAULT,YES,NO", 0),
/* With this option the user defines the key identifier using for the encryption */
@@ -747,6 +763,8 @@ innodb_stopword_table_validate(
for update function */
struct st_mysql_value* value); /*!< in: incoming string */
+static bool is_mysql_datadir_path(const char *path);
+
/** Validate passed-in "value" is a valid directory name.
This function is registered as a callback with MySQL.
@param[in,out] thd thread handle
@@ -797,6 +815,7 @@ innodb_tmpdir_validate(
return(1);
}
+ os_normalize_path(alter_tmp_dir);
my_realpath(tmp_abs_path, alter_tmp_dir, 0);
size_t tmp_abs_len = strlen(tmp_abs_path);
@@ -849,10 +868,6 @@ innodb_tmpdir_validate(
return(0);
}
-/** "GEN_CLUST_INDEX" is the name reserved for InnoDB default
-system clustered index when there is no primary key. */
-const char innobase_index_reserve_name[] = "GEN_CLUST_INDEX";
-
/******************************************************************//**
Maps a MySQL trx isolation level code to the InnoDB isolation level code
@return InnoDB isolation level */
@@ -862,15 +877,19 @@ innobase_map_isolation_level(
/*=========================*/
enum_tx_isolation iso); /*!< in: MySQL isolation level code */
-/******************************************************************//**
-Maps a MySQL trx isolation level code to the InnoDB isolation level code
-@return InnoDB isolation level */
+/** Gets field offset for a field in a table.
+@param[in] table MySQL table object
+@param[in] field MySQL field object (from table->field array)
+@return offset */
static inline
-ulint
-innobase_map_isolation_level(
-/*=========================*/
- enum_tx_isolation iso); /*!< in: MySQL isolation level code
- */
+uint
+get_field_offset(
+ const TABLE* table,
+ const Field* field)
+{
+ return field->offset(table->record[0]);
+}
+
/*************************************************************//**
Check for a valid value of innobase_compression_algorithm.
@@ -891,6 +910,7 @@ static ibool innodb_have_lz4=IF_LZ4(1, 0);
static ibool innodb_have_lzma=IF_LZMA(1, 0);
static ibool innodb_have_bzip2=IF_BZIP2(1, 0);
static ibool innodb_have_snappy=IF_SNAPPY(1, 0);
+static ibool innodb_have_punch_hole=IF_PUNCH_HOLE(1, 0);
static
int
@@ -905,9 +925,40 @@ innodb_encrypt_tables_validate(
static const char innobase_hton_name[]= "InnoDB";
+static const char* deprecated_innodb_support_xa
+ = "Using innodb_support_xa is deprecated and the"
+ " parameter may be removed in future releases.";
+
+static const char* deprecated_innodb_support_xa_off
+ = "Using innodb_support_xa is deprecated and the"
+ " parameter may be removed in future releases."
+ " Only innodb_support_xa=ON is allowed.";
+
+/** Update the session variable innodb_support_xa.
+@param[in] thd current session
+@param[in] var the system variable innodb_support_xa
+@param[in,out] var_ptr the contents of the variable
+@param[in] save the to-be-updated value */
+static
+void
+innodb_support_xa_update(
+ THD* thd,
+ struct st_mysql_sys_var* var,
+ void* var_ptr,
+ const void* save)
+{
+ my_bool innodb_support_xa = *static_cast<const my_bool*>(save);
+
+ push_warning(thd, Sql_condition::WARN_LEVEL_WARN,
+ HA_ERR_WRONG_COMMAND,
+ innodb_support_xa
+ ? deprecated_innodb_support_xa
+ : deprecated_innodb_support_xa_off);
+}
+
static MYSQL_THDVAR_BOOL(support_xa, PLUGIN_VAR_OPCMDARG,
"Enable InnoDB support for the XA two-phase commit",
- /* check_func */ NULL, /* update_func */ NULL,
+ /* check_func */ NULL, innodb_support_xa_update,
/* default */ TRUE);
static MYSQL_THDVAR_BOOL(table_locks, PLUGIN_VAR_OPCMDARG,
@@ -917,7 +968,7 @@ static MYSQL_THDVAR_BOOL(table_locks, PLUGIN_VAR_OPCMDARG,
static MYSQL_THDVAR_BOOL(strict_mode, PLUGIN_VAR_OPCMDARG,
"Use strict mode when evaluating create options.",
- NULL, NULL, FALSE);
+ NULL, NULL, TRUE);
static MYSQL_THDVAR_BOOL(ft_enable_stopword, PLUGIN_VAR_OPCMDARG,
"Create FTS index with stopword.",
@@ -943,6 +994,8 @@ static SHOW_VAR innodb_status_variables[]= {
(char*) &export_vars.innodb_buffer_pool_dump_status, SHOW_CHAR},
{"buffer_pool_load_status",
(char*) &export_vars.innodb_buffer_pool_load_status, SHOW_CHAR},
+ {"buffer_pool_resize_status",
+ (char*) &export_vars.innodb_buffer_pool_resize_status, SHOW_CHAR},
{"buffer_pool_pages_data",
(char*) &export_vars.innodb_buffer_pool_pages_data, SHOW_LONG},
{"buffer_pool_bytes_data",
@@ -997,8 +1050,6 @@ static SHOW_VAR innodb_status_variables[]= {
(char*) &export_vars.innodb_dblwr_pages_written, SHOW_LONG},
{"dblwr_writes",
(char*) &export_vars.innodb_dblwr_writes, SHOW_LONG},
- {"have_atomic_builtins",
- (char*) &export_vars.innodb_have_atomic_builtins, SHOW_BOOL},
{"log_waits",
(char*) &export_vars.innodb_log_waits, SHOW_LONG},
{"log_write_requests",
@@ -1055,29 +1106,12 @@ static SHOW_VAR innodb_status_variables[]= {
(char*) &export_vars.innodb_truncated_status_writes, SHOW_LONG},
{"available_undo_logs",
(char*) &export_vars.innodb_available_undo_logs, SHOW_LONG},
-#ifdef UNIV_DEBUG
- {"purge_trx_id_age",
- (char*) &export_vars.innodb_purge_trx_id_age, SHOW_LONG},
- {"purge_view_trx_id_age",
- (char*) &export_vars.innodb_purge_view_trx_id_age, SHOW_LONG},
-#endif /* UNIV_DEBUG */
+ {"undo_truncations",
+ (char*) &export_vars.innodb_undo_truncations, SHOW_LONG},
+
/* Status variables for page compression */
{"page_compression_saved",
(char*) &export_vars.innodb_page_compression_saved, SHOW_LONGLONG},
- {"page_compression_trim_sect512",
- (char*) &export_vars.innodb_page_compression_trim_sect512, SHOW_LONGLONG},
- {"page_compression_trim_sect1024",
- (char*) &export_vars.innodb_page_compression_trim_sect1024, SHOW_LONGLONG},
- {"page_compression_trim_sect2048",
- (char*) &export_vars.innodb_page_compression_trim_sect2048, SHOW_LONGLONG},
- {"page_compression_trim_sect4096",
- (char*) &export_vars.innodb_page_compression_trim_sect4096, SHOW_LONGLONG},
- {"page_compression_trim_sect8192",
- (char*) &export_vars.innodb_page_compression_trim_sect8192, SHOW_LONGLONG},
- {"page_compression_trim_sect16384",
- (char*) &export_vars.innodb_page_compression_trim_sect16384, SHOW_LONGLONG},
- {"page_compression_trim_sect32768",
- (char*) &export_vars.innodb_page_compression_trim_sect32768, SHOW_LONGLONG},
{"num_index_pages_written",
(char*) &export_vars.innodb_index_pages_written, SHOW_LONGLONG},
{"num_non_index_pages_written",
@@ -1086,8 +1120,6 @@ static SHOW_VAR innodb_status_variables[]= {
(char*) &export_vars.innodb_pages_page_compressed, SHOW_LONGLONG},
{"num_page_compressed_trim_op",
(char*) &export_vars.innodb_page_compressed_trim_op, SHOW_LONGLONG},
- {"num_page_compressed_trim_op_saved",
- (char*) &export_vars.innodb_page_compressed_trim_op_saved, SHOW_LONGLONG},
{"num_pages_page_decompressed",
(char*) &export_vars.innodb_pages_page_decompressed, SHOW_LONGLONG},
{"num_pages_page_compression_error",
@@ -1106,6 +1138,8 @@ static SHOW_VAR innodb_status_variables[]= {
(char*) &innodb_have_bzip2, SHOW_BOOL},
{"have_snappy",
(char*) &innodb_have_snappy, SHOW_BOOL},
+ {"have_punch_hole",
+ (char*) &innodb_have_punch_hole, SHOW_BOOL},
/* Defragmentation */
{"defragment_compression_failures",
@@ -1161,6 +1195,12 @@ static SHOW_VAR innodb_status_variables[]= {
{"encryption_n_rowlog_blocks_decrypted",
(char*)&export_vars.innodb_n_rowlog_blocks_decrypted,
SHOW_LONGLONG},
+ {"encryption_n_temp_blocks_encrypted",
+ (char*)&export_vars.innodb_n_temp_blocks_encrypted,
+ SHOW_LONGLONG},
+ {"encryption_n_temp_blocks_decrypted",
+ (char*)&export_vars.innodb_n_temp_blocks_decrypted,
+ SHOW_LONGLONG},
/* scrubing */
{"scrub_background_page_reorganizations",
@@ -1181,37 +1221,23 @@ static SHOW_VAR innodb_status_variables[]= {
{"scrub_background_page_split_failures_unknown",
(char*) &export_vars.innodb_scrub_page_split_failures_unknown,
SHOW_LONG},
+ {"scrub_log",
+ (char*) &export_vars.innodb_scrub_log,
+ SHOW_LONGLONG},
{"encryption_num_key_requests",
(char*) &export_vars.innodb_encryption_key_requests, SHOW_LONGLONG},
{NullS, NullS, SHOW_LONG}
};
-/************************************************************************//**
-Handling the shared INNOBASE_SHARE structure that is needed to provide table
-locking. Register the table name if it doesn't exist in the hash table. */
-static
-INNOBASE_SHARE*
-get_share(
-/*======*/
- const char* table_name); /*!< in: table to lookup */
-
-/************************************************************************//**
-Free the shared object that was registered with get_share(). */
-static
-void
-free_share(
-/*=======*/
- INNOBASE_SHARE* share); /*!< in/own: share to free */
-
/*****************************************************************//**
Frees a possible InnoDB trx object associated with the current THD.
-@return 0 or error number */
+@return 0 or error number */
static
int
innobase_close_connection(
/*======================*/
- handlerton* hton, /*!< in/out: Innodb handlerton */
+ handlerton* hton, /*!< in/out: InnoDB handlerton */
THD* thd); /*!< in: MySQL thread handle for
which to close the connection */
@@ -1223,12 +1249,12 @@ static void innobase_commit_ordered(handlerton *hton, THD* thd, bool all);
/*****************************************************************//**
Commits a transaction in an InnoDB database or marks an SQL statement
ended.
-@return 0 */
+@return 0 */
static
int
innobase_commit(
/*============*/
- handlerton* hton, /*!< in/out: Innodb handlerton */
+ handlerton* hton, /*!< in/out: InnoDB handlerton */
THD* thd, /*!< in: MySQL thread handle of the
user for whom the transaction should
be committed */
@@ -1244,7 +1270,7 @@ static
int
innobase_rollback(
/*==============*/
- handlerton* hton, /*!< in/out: Innodb handlerton */
+ handlerton* hton, /*!< in/out: InnoDB handlerton */
THD* thd, /*!< in: handle to the MySQL thread
of the user whose transaction should
be rolled back */
@@ -1281,7 +1307,7 @@ innobase_rollback_to_savepoint_can_release_mdl(
/*****************************************************************//**
Sets a transaction savepoint.
-@return always 0, that is, always succeeds */
+@return always 0, that is, always succeeds */
static
int
innobase_savepoint(
@@ -1300,7 +1326,7 @@ static
int
innobase_release_savepoint(
/*=======================*/
- handlerton* hton, /*!< in/out: handlerton for Innodb */
+ handlerton* hton, /*!< in/out: handlerton for InnoDB */
THD* thd, /*!< in: handle to the MySQL thread
of the user whose transaction's
savepoint should be released */
@@ -1308,16 +1334,6 @@ innobase_release_savepoint(
static void innobase_checkpoint_request(handlerton *hton, void *cookie);
-/************************************************************************//**
-Function for constructing an InnoDB table handler instance. */
-static
-handler*
-innobase_create_handler(
-/*====================*/
- handlerton* hton, /*!< in/out: handlerton for Innodb */
- TABLE_SHARE* table,
- MEM_ROOT* mem_root);
-
/** @brief Initialize the default value of innodb_commit_concurrency.
Once InnoDB is running, the innodb_commit_concurrency must not change
@@ -1332,19 +1348,15 @@ void
innobase_commit_concurrency_init_default();
/*=======================================*/
-/** @brief Initialize the default and max value of innodb_undo_logs.
-
-Once InnoDB is running, the default value and the max value of
-innodb_undo_logs must be equal to the available undo logs,
-given by srv_available_undo_logs. */
+/** @brief Adjust some InnoDB startup parameters based on file contents
+or innodb_page_size. */
static
void
-innobase_undo_logs_init_default_max();
-/*==================================*/
+innodb_params_adjust();
/************************************************************//**
Validate the file format name and return its corresponding id.
-@return valid file format id */
+@return valid file format id */
static
uint
innobase_file_format_name_lookup(
@@ -1354,7 +1366,7 @@ innobase_file_format_name_lookup(
/************************************************************//**
Validate the file format check config parameters, as a side effect it
sets the srv_max_file_format_at_startup variable.
-@return the format_id if valid config value, otherwise, return -1 */
+@return the format_id if valid config value, otherwise, return -1 */
static
int
innobase_file_format_validate_and_set(
@@ -1363,7 +1375,7 @@ innobase_file_format_validate_and_set(
/*******************************************************************//**
This function is used to prepare an X/Open XA distributed transaction.
-@return 0 or error number */
+@return 0 or error number */
static
int
innobase_xa_prepare(
@@ -1377,7 +1389,7 @@ innobase_xa_prepare(
ended */
/*******************************************************************//**
This function is used to recover X/Open XA distributed transactions.
-@return number of prepared transactions stored in xid_list */
+@return number of prepared transactions stored in xid_list */
static
int
innobase_xa_recover(
@@ -1388,7 +1400,7 @@ innobase_xa_recover(
/*******************************************************************//**
This function is used to commit one X/Open XA distributed transaction
which is in the prepared state
-@return 0 or error number */
+@return 0 or error number */
static
int
innobase_commit_by_xid(
@@ -1396,66 +1408,17 @@ innobase_commit_by_xid(
handlerton* hton, /*!< in: InnoDB handlerton */
XID* xid); /*!< in: X/Open XA transaction
identification */
-/*******************************************************************//**
-This function is used to rollback one X/Open XA distributed transaction
-which is in the prepared state
-@return 0 or error number */
-static
-int
-innobase_rollback_by_xid(
-/*=====================*/
- handlerton* hton, /*!< in: InnoDB handlerton */
- XID* xid); /*!< in: X/Open XA transaction
- identification */
-/*******************************************************************//**
-Create a consistent view for a cursor based on current transaction
-which is created if the corresponding MySQL thread still lacks one.
-This consistent view is then used inside of MySQL when accessing records
-using a cursor.
-@return pointer to cursor view or NULL */
-static
-void*
-innobase_create_cursor_view(
-/*========================*/
- handlerton* hton, /*!< in: innobase hton */
- THD* thd); /*!< in: user thread handle */
-/*******************************************************************//**
-Set the given consistent cursor view to a transaction which is created
-if the corresponding MySQL thread still lacks one. If the given
-consistent cursor view is NULL global read view of a transaction is
-restored to a transaction read view. */
-static
-void
-innobase_set_cursor_view(
-/*=====================*/
- handlerton* hton, /*!< in: handlerton of Innodb */
- THD* thd, /*!< in: user thread handle */
- void* curview); /*!< in: Consistent cursor view to
- be set */
-/*******************************************************************//**
-Close the given consistent cursor view of a transaction and restore
-global read view to a transaction read view. Transaction is created if the
-corresponding MySQL thread still lacks one. */
-static
-void
-innobase_close_cursor_view(
-/*=======================*/
- handlerton* hton, /*!< in: handlerton of Innodb */
- THD* thd, /*!< in: user thread handle */
- void* curview); /*!< in: Consistent read view to be
- closed */
-/*****************************************************************//**
-Removes all tables in the named database inside InnoDB. */
+/** Remove all tables in the named database inside InnoDB.
+@param[in] hton handlerton from InnoDB
+@param[in] path Database path; Inside InnoDB the name of the last
+directory in the path is used as the database name.
+For example, in 'mysql/data/test' the database name is 'test'. */
static
void
innobase_drop_database(
-/*===================*/
- handlerton* hton, /*!< in: handlerton of Innodb */
- char* path); /*!< in: database path; inside InnoDB
- the name of the last directory in
- the path is used as the database name:
- for example, in 'mysql/data/test' the
- database name is 'test' */
+ handlerton* hton,
+ char* path);
+
/** Shut down the InnoDB storage engine.
@return 0 */
static
@@ -1467,24 +1430,66 @@ Creates an InnoDB transaction struct for the thd if it does not yet have one.
Starts a new InnoDB transaction if a transaction is not yet started. And
assigns a new snapshot for a consistent read if the transaction does not yet
have one.
-@return 0 */
+@return 0 */
static
int
innobase_start_trx_and_assign_read_view(
/*====================================*/
- handlerton* hton, /* in: Innodb handlerton */
+ handlerton* hton, /* in: InnoDB handlerton */
THD* thd); /* in: MySQL thread handle of the
user for whom the transaction should
be committed */
-/****************************************************************//**
-Flushes InnoDB logs to disk and makes a checkpoint. Really, a commit flushes
-the logs, and the name of this function should be innobase_checkpoint.
-@return TRUE if error */
+
+/** Flush InnoDB redo logs to the file system.
+@param[in] hton InnoDB handlerton
+@param[in] binlog_group_flush true if we got invoked by binlog
+group commit during flush stage, false in other cases.
+@return false */
static
bool
innobase_flush_logs(
-/*================*/
- handlerton* hton); /*!< in: InnoDB handlerton */
+ handlerton* hton,
+ bool binlog_group_flush)
+{
+ DBUG_ENTER("innobase_flush_logs");
+ DBUG_ASSERT(hton == innodb_hton_ptr);
+
+ if (srv_read_only_mode) {
+ DBUG_RETURN(false);
+ }
+
+ /* If !binlog_group_flush, we got invoked by FLUSH LOGS or similar.
+ Else, we got invoked by binlog group commit during flush stage. */
+
+ if (binlog_group_flush && srv_flush_log_at_trx_commit == 0) {
+ /* innodb_flush_log_at_trx_commit=0
+ (write and sync once per second).
+ Do not flush the redo log during binlog group commit. */
+ DBUG_RETURN(false);
+ }
+
+ /* Flush the redo log buffer to the redo log file.
+ Sync it to disc if we are in FLUSH LOGS, or if
+ innodb_flush_log_at_trx_commit=1
+ (write and sync at each commit). */
+ log_buffer_flush_to_disk(!binlog_group_flush
+ || srv_flush_log_at_trx_commit == 1);
+
+ DBUG_RETURN(false);
+}
+
+/** Flush InnoDB redo logs to the file system.
+@param[in] hton InnoDB handlerton
+@param[in] binlog_group_flush true if we got invoked by binlog
+group commit during flush stage, false in other cases.
+@return false */
+static
+bool
+innobase_flush_logs(
+ handlerton* hton)
+{
+ return innobase_flush_logs(hton, true);
+}
/************************************************************************//**
Implements the SHOW ENGINE INNODB STATUS command. Sends the output of the
@@ -1511,14 +1516,6 @@ innobase_show_status(
stat_print_fn* stat_print,
enum ha_stat_type stat_type);
-/*****************************************************************//**
-Commits a transaction in an InnoDB database. */
-static
-void
-innobase_commit_low(
-/*================*/
- trx_t* trx); /*!< in: transaction handle */
-
/****************************************************************//**
Parse and enable InnoDB monitor counters during server startup.
User can enable monitor counters/groups by specifying
@@ -1530,23 +1527,28 @@ innodb_enable_monitor_at_startup(
/*=============================*/
char* str); /*!< in: monitor counter enable list */
-/*********************************************************************
-Normalizes a table name string. A normalized name consists of the
-database name catenated to '/' and table name. An example:
-test/mytable. On Windows normalization puts both the database name and the
-table name always to lower case if "set_lower_case" is set to TRUE. */
+#ifdef MYSQL_STORE_FTS_DOC_ID
+/** Store doc_id value into FTS_DOC_ID field
+@param[in,out] tbl table containing FULLTEXT index
+@param[in] doc_id FTS_DOC_ID value */
+static
void
-normalize_table_name_low(
-/*=====================*/
- char* norm_name, /* out: normalized name as a
- null-terminated string */
- const char* name, /* in: table name string */
- ibool set_lower_case); /* in: TRUE if we want to set
- name to lower case */
+innobase_fts_store_docid(
+ TABLE* tbl,
+ ulonglong doc_id)
+{
+ my_bitmap_map* old_map
+ = dbug_tmp_use_all_columns(tbl, tbl->write_set);
+
+ tbl->fts_doc_id_field->store(static_cast<longlong>(doc_id), true);
+
+ dbug_tmp_restore_column_map(tbl->write_set, old_map);
+}
+#endif
/*************************************************************//**
Check for a valid value of innobase_commit_concurrency.
-@return 0 for valid innodb_commit_concurrency */
+@return 0 for valid innodb_commit_concurrency */
static
int
innobase_commit_concurrency_validate(
@@ -1591,15 +1593,15 @@ innobase_create_handler(
/* General functions */
-/*************************************************************//**
-Check that a page_size is correct for InnoDB. If correct, set the
-associated page_size_shift which is the power of 2 for this page size.
-@return an associated page_size_shift if valid, 0 if invalid. */
+/** Check that a page_size is correct for InnoDB.
+If correct, set the associated page_size_shift which is the power of 2
+for this page size.
+@param[in] page_size Page Size to evaluate
+@return an associated page_size_shift if valid, 0 if invalid. */
inline
-int
+ulong
innodb_page_size_validate(
-/*======================*/
- ulong page_size) /*!< in: Page Size to evaluate */
+ ulong page_size)
{
ulong n;
@@ -1608,7 +1610,7 @@ innodb_page_size_validate(
for (n = UNIV_PAGE_SIZE_SHIFT_MIN;
n <= UNIV_PAGE_SIZE_SHIFT_MAX;
n++) {
- if (page_size == (ulong) (1 << n)) {
+ if (page_size == static_cast<ulong>(1 << n)) {
DBUG_RETURN(n);
}
}
@@ -1622,8 +1624,7 @@ server. Used in srv_conc_enter_innodb() to determine if the thread
should be allowed to enter InnoDB - the replication thread is treated
differently than other threads. Also used in
srv_conc_force_exit_innodb().
-@return true if thd is the replication thread */
-UNIV_INTERN
+@return true if thd is the replication thread */
ibool
thd_is_replication_slave_thread(
/*============================*/
@@ -1634,9 +1635,8 @@ thd_is_replication_slave_thread(
/******************************************************************//**
Returns true if transaction should be flagged as read-only.
-@return true if the thd is marked as read-only */
-UNIV_INTERN
-ibool
+@return true if the thd is marked as read-only */
+bool
thd_trx_is_read_only(
/*=================*/
THD* thd) /*!< in: thread handle */
@@ -1644,11 +1644,62 @@ thd_trx_is_read_only(
return(thd != 0 && thd_tx_is_read_only(thd));
}
+static MYSQL_THDVAR_BOOL(background_thread,
+ PLUGIN_VAR_NOCMDOPT | PLUGIN_VAR_NOSYSVAR,
+ "Internal (not user visible) flag to mark "
+ "background purge threads", NULL, NULL, 0);
+
+/** Create a MYSQL_THD for a background thread and mark it as such.
+@param name thread info for SHOW PROCESSLIST
+@return new MYSQL_THD */
+MYSQL_THD
+innobase_create_background_thd(const char* name)
+/*============================*/
+{
+ MYSQL_THD thd= create_thd();
+ thd_proc_info(thd, name);
+ THDVAR(thd, background_thread) = true;
+ return thd;
+}
+
+
+/** Destroy a background purge thread THD.
+@param[in] thd MYSQL_THD to destroy */
+void
+innobase_destroy_background_thd(
+/*============================*/
+ MYSQL_THD thd)
+{
+ /* need to close the connection explicitly, the server won't do it
+ if innodb is in the PLUGIN_IS_DYING state */
+ innobase_close_connection(innodb_hton_ptr, thd);
+ thd_set_ha_data(thd, innodb_hton_ptr, NULL);
+ destroy_thd(thd);
+}
+
+/** Close opened tables, free memory, delete items for a MYSQL_THD.
+@param[in] thd MYSQL_THD to reset */
+void
+innobase_reset_background_thd(MYSQL_THD thd)
+{
+ if (!thd) {
+ thd = current_thd;
+ }
+
+ ut_ad(thd);
+ ut_ad(THDVAR(thd, background_thread));
+
+ /* background purge thread */
+ const char *proc_info= thd_proc_info(thd, "reset");
+ reset_thd(thd);
+ thd_proc_info(thd, proc_info);
+}
+
+
/******************************************************************//**
Check if the transaction is an auto-commit transaction. TRUE also
implies that it is a SELECT (read-only) transaction.
-@return true if the transaction is an auto commit read-only transaction. */
-UNIV_INTERN
+@return true if the transaction is an auto commit read-only transaction. */
ibool
thd_trx_is_auto_commit(
/*===================*/
@@ -1661,18 +1712,17 @@ thd_trx_is_auto_commit(
&& thd_is_select(thd));
}
-/******************************************************************//**
-Save some CPU by testing the value of srv_thread_concurrency in inline
-functions. */
-static inline
-void
-innobase_srv_conc_enter_innodb(
-/*===========================*/
- trx_t* trx) /*!< in: transaction handle */
+/** Enter InnoDB engine after checking the max number of user threads
+allowed, else the thread is put into sleep.
+@param[in,out] prebuilt row prebuilt handler */
+static inline void innobase_srv_conc_enter_innodb(row_prebuilt_t *prebuilt)
{
+ trx_t* trx = prebuilt->trx;
+
#ifdef WITH_WSREP
if (trx->is_wsrep() && wsrep_thd_is_BF(trx->mysql_thd, FALSE)) return;
#endif /* WITH_WSREP */
+
if (srv_thread_concurrency) {
if (trx->n_tickets_to_enter_innodb > 0) {
@@ -1691,23 +1741,20 @@ innobase_srv_conc_enter_innodb(
os_thread_sleep(2000 /* 2 ms */);
}
} else {
- srv_conc_enter_innodb(trx);
+ srv_conc_enter_innodb(prebuilt);
}
}
}
-/******************************************************************//**
-Note that the thread wants to leave InnoDB only if it doesn't have
-any spare tickets. */
-static inline
-void
-innobase_srv_conc_exit_innodb(
-/*==========================*/
- trx_t* trx) /*!< in: transaction handle */
+/** Note that the thread wants to leave InnoDB only if it doesn't have
+any spare tickets.
+@param[in,out] m_prebuilt row prebuilt handler */
+static inline void innobase_srv_conc_exit_innodb(row_prebuilt_t *prebuilt)
{
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(!sync_thread_levels_nonempty_trx(trx->has_search_latch));
-#endif /* UNIV_SYNC_DEBUG */
+ ut_ad(!sync_check_iterate(sync_check()));
+
+ trx_t* trx = prebuilt->trx;
+
#ifdef WITH_WSREP
if (trx->is_wsrep() && wsrep_thd_is_BF(trx->mysql_thd, FALSE)) return;
#endif /* WITH_WSREP */
@@ -1728,9 +1775,7 @@ innobase_srv_conc_force_exit_innodb(
/*================================*/
trx_t* trx) /*!< in: transaction handle */
{
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(!sync_thread_levels_nonempty_trx(trx->has_search_latch));
-#endif /* UNIV_SYNC_DEBUG */
+ ut_ad(!sync_check_iterate(sync_check()));
/* This is to avoid making an unnecessary function call. */
if (trx->declared_to_be_inside_innodb) {
@@ -1740,8 +1785,7 @@ innobase_srv_conc_force_exit_innodb(
/******************************************************************//**
Returns the NUL terminated value of glob_hostname.
-@return pointer to glob_hostname. */
-UNIV_INTERN
+@return pointer to glob_hostname. */
const char*
server_get_hostname()
/*=================*/
@@ -1754,8 +1798,7 @@ Returns true if the transaction this thread is processing has edited
non-transactional tables. Used by the deadlock detector when deciding
which transaction to rollback in case of a deadlock - we try to avoid
rolling back transactions that have edited non-transactional tables.
-@return true if non-transactional tables have been edited */
-UNIV_INTERN
+@return true if non-transactional tables have been edited */
ibool
thd_has_edited_nontrans_tables(
/*===========================*/
@@ -1775,8 +1818,7 @@ thd_query_start_micro(
/******************************************************************//**
Returns true if the thread is executing a SELECT statement.
-@return true if thd is executing SELECT */
-UNIV_INTERN
+@return true if thd is executing SELECT */
ibool
thd_is_select(
/*==========*/
@@ -1786,43 +1828,8 @@ thd_is_select(
}
/******************************************************************//**
-Returns true if the thread supports XA,
-global value of innodb_supports_xa if thd is NULL.
-@return true if thd has XA support */
-UNIV_INTERN
-ibool
-thd_supports_xa(
-/*============*/
- THD* thd) /*!< in: thread handle, or NULL to query
- the global innodb_supports_xa */
-{
- return(THDVAR(thd, support_xa));
-}
-
-/** Get the value of innodb_tmpdir.
-@param[in] thd thread handle, or NULL to query
- the global innodb_tmpdir.
-@retval NULL if innodb_tmpdir="" */
-UNIV_INTERN
-const char*
-thd_innodb_tmpdir(
- THD* thd)
-{
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(!sync_thread_levels_nonempty_trx(false));
-#endif /* UNIV_SYNC_DEBUG */
-
- const char* tmp_dir = THDVAR(thd, tmpdir);
- if (tmp_dir != NULL && *tmp_dir == '\0') {
- tmp_dir = NULL;
- }
-
- return(tmp_dir);
-}
-/******************************************************************//**
Returns the lock wait timeout for the current connection.
-@return the lock wait timeout, in seconds */
-UNIV_INTERN
+@return the lock wait timeout, in seconds */
ulong
thd_lock_wait_timeout(
/*==================*/
@@ -1836,7 +1843,6 @@ thd_lock_wait_timeout(
/******************************************************************//**
Set the time waited for the lock for the current query. */
-UNIV_INTERN
void
thd_set_lock_wait_time(
/*===================*/
@@ -1848,22 +1854,40 @@ thd_set_lock_wait_time(
}
}
-/********************************************************************//**
-Obtain the InnoDB transaction of a MySQL thread.
-@return reference to transaction pointer */
-MY_ATTRIBUTE((warn_unused_result, nonnull))
-static inline
-trx_t*&
-thd_to_trx(
-/*=======*/
- THD* thd) /*!< in: MySQL thread */
+/** Get the value of innodb_tmpdir.
+@param[in] thd thread handle, or NULL to query
+ the global innodb_tmpdir.
+@retval NULL if innodb_tmpdir="" */
+const char*
+thd_innodb_tmpdir(
+ THD* thd)
{
- return(*(trx_t**) thd_ha_data(thd, innodb_hton_ptr));
+ ut_ad(!sync_check_iterate(sync_check()));
+
+ const char* tmp_dir = THDVAR(thd, tmpdir);
+
+ if (tmp_dir != NULL && *tmp_dir == '\0') {
+ tmp_dir = NULL;
+ }
+
+ return(tmp_dir);
}
+
+/** Obtain the InnoDB transaction of a MySQL thread.
+@param[in,out] thd thread handle
+@return reference to transaction pointer */
+static trx_t* thd_to_trx(THD* thd)
+{
+ return reinterpret_cast<trx_t*>(thd_get_ha_data(thd, innodb_hton_ptr));
+}
+
#ifdef WITH_WSREP
+/********************************************************************//**
+Obtain the InnoDB transaction id of a MySQL thread.
+@return transaction id */
+__attribute__((warn_unused_result, nonnull))
ulonglong
thd_to_trx_id(
-/*=======*/
THD* thd) /*!< in: MySQL thread */
{
return(thd_to_trx(thd)->id);
@@ -1871,39 +1895,11 @@ thd_to_trx_id(
#endif /* WITH_WSREP */
/********************************************************************//**
-Call this function when mysqld passes control to the client. That is to
-avoid deadlocks on the adaptive hash S-latch possibly held by thd. For more
-documentation, see handler.cc.
-@return 0 */
-static
-int
-innobase_release_temporary_latches(
-/*===============================*/
- handlerton* hton, /*!< in: handlerton */
- THD* thd) /*!< in: MySQL thread */
-{
- DBUG_ASSERT(hton == innodb_hton_ptr);
-
- if (!innodb_inited) {
-
- return(0);
- }
-
- trx_t* trx = thd_to_trx(thd);
-
- if (trx != NULL) {
- trx_search_latch_release_if_reserved(trx);
- }
-
- return(0);
-}
-
-/********************************************************************//**
Increments innobase_active_counter and every INNOBASE_WAKE_INTERVALth
time calls srv_active_wake_master_thread. This function should be used
when a single database operation may introduce a small need for
server utility activity, like checkpointing. */
-static inline
+inline
void
innobase_active_small(void)
/*=======================*/
@@ -1919,8 +1915,7 @@ innobase_active_small(void)
Converts an InnoDB error code to a MySQL error code and also tells to MySQL
about a possible transaction rollback inside InnoDB caused by a lock wait
timeout or a deadlock.
-@return MySQL error code */
-static
+@return MySQL error code */
int
convert_error_code_to_mysql(
/*========================*/
@@ -1933,7 +1928,7 @@ convert_error_code_to_mysql(
return(0);
case DB_INTERRUPTED:
- return(HA_ERR_ABORTED_BY_USER);
+ return(HA_ERR_ABORTED_BY_USER);
case DB_FOREIGN_EXCEED_MAX_CASCADE:
ut_ad(thd);
@@ -1945,12 +1940,15 @@ convert_error_code_to_mysql(
"depth of %d. Please "
"drop extra constraints and try "
"again", DICT_FK_MAX_RECURSIVE_LOAD);
+ return(HA_ERR_FK_DEPTH_EXCEEDED);
- /* fall through */
+ case DB_CANT_CREATE_GEOMETRY_OBJECT:
+ my_error(ER_CANT_CREATE_GEOMETRY_OBJECT, MYF(0));
+ return(HA_ERR_NULL_IN_SPATIAL);
case DB_ERROR:
default:
- return(-1); /* unspecified error */
+ return(HA_ERR_GENERIC); /* unspecified error */
case DB_DUPLICATE_KEY:
/* Be cautious with returning this error, since
@@ -1978,8 +1976,8 @@ convert_error_code_to_mysql(
tell it also to MySQL so that MySQL knows to empty the
cached binlog for this transaction */
- if (thd) {
- thd_mark_transaction_to_rollback(thd, TRUE);
+ if (thd != NULL) {
+ thd_mark_transaction_to_rollback(thd, 1);
}
return(HA_ERR_LOCK_DEADLOCK);
@@ -2002,6 +2000,7 @@ convert_error_code_to_mysql(
case DB_ROW_IS_REFERENCED:
return(HA_ERR_ROW_IS_REFERENCED);
+ case DB_NO_FK_ON_S_BASE_COL:
case DB_CANNOT_ADD_CONSTRAINT:
case DB_CHILD_NO_INDEX:
case DB_PARENT_NO_INDEX:
@@ -2019,10 +2018,10 @@ convert_error_code_to_mysql(
case DB_OUT_OF_FILE_SPACE:
return(HA_ERR_RECORD_FILE_FULL);
- case DB_TEMP_FILE_WRITE_FAILURE:
+ case DB_TEMP_FILE_WRITE_FAIL:
my_error(ER_GET_ERRMSG, MYF(0),
- DB_TEMP_FILE_WRITE_FAILURE,
- ut_strerr(DB_TEMP_FILE_WRITE_FAILURE),
+ DB_TEMP_FILE_WRITE_FAIL,
+ ut_strerr(DB_TEMP_FILE_WRITE_FAIL),
"InnoDB");
return(HA_ERR_INTERNAL_ERROR);
@@ -2032,7 +2031,6 @@ convert_error_code_to_mysql(
case DB_TABLE_IS_BEING_USED:
return(HA_ERR_WRONG_COMMAND);
- case DB_TABLESPACE_DELETED:
case DB_TABLE_NOT_FOUND:
return(HA_ERR_NO_SUCH_TABLE);
@@ -2040,19 +2038,17 @@ convert_error_code_to_mysql(
return(HA_ERR_DECRYPTION_FAILED);
case DB_TABLESPACE_NOT_FOUND:
- return(HA_ERR_NO_SUCH_TABLE);
+ return(HA_ERR_TABLESPACE_MISSING);
case DB_TOO_BIG_RECORD: {
/* If prefix is true then a 768-byte prefix is stored
locally for BLOB fields. Refer to dict_table_get_format().
- Note that in page0zip.ic page_zip_rec_needs_ext() rec_size
- is limited to COMPRESSED_REC_MAX_DATA_SIZE (16K) or
- REDUNDANT_REC_MAX_DATA_SIZE (16K-1). */
+ We limit max record size to 16k for 64k page size. */
bool prefix = (dict_tf_get_format(flags) == UNIV_FORMAT_A);
bool comp = !!(flags & DICT_TF_COMPACT);
ulint free_space = page_get_free_space_of_empty(comp) / 2;
- if (free_space >= (comp ? COMPRESSED_REC_MAX_DATA_SIZE :
+ if (free_space >= ulint(comp ? COMPRESSED_REC_MAX_DATA_SIZE :
REDUNDANT_REC_MAX_DATA_SIZE)) {
free_space = (comp ? COMPRESSED_REC_MAX_DATA_SIZE :
REDUNDANT_REC_MAX_DATA_SIZE) - 1;
@@ -2074,18 +2070,9 @@ convert_error_code_to_mysql(
return(HA_ERR_TO_BIG_ROW);
}
-
- case DB_TOO_BIG_FOR_REDO:
- my_printf_error(ER_TOO_BIG_ROWSIZE, "%s" , MYF(0),
- "The size of BLOB/TEXT data inserted"
- " in one transaction is greater than"
- " 10% of redo log size. Increase the"
- " redo log size using innodb_log_file_size.");
- return(HA_ERR_TO_BIG_ROW);
-
case DB_TOO_BIG_INDEX_COL:
my_error(ER_INDEX_COLUMN_TOO_LONG, MYF(0),
- DICT_MAX_FIELD_LEN_BY_FORMAT_FLAG(flags));
+ (ulong) DICT_MAX_FIELD_LEN_BY_FORMAT_FLAG(flags));
return(HA_ERR_INDEX_COL_TOO_LONG);
case DB_NO_SAVEPOINT:
@@ -2097,7 +2084,7 @@ convert_error_code_to_mysql(
cached binlog for this transaction */
if (thd) {
- thd_mark_transaction_to_rollback(thd, TRUE);
+ thd_mark_transaction_to_rollback(thd, 1);
}
return(HA_ERR_LOCK_TABLE_FULL);
@@ -2118,16 +2105,21 @@ convert_error_code_to_mysql(
return(HA_ERR_OUT_OF_MEM);
case DB_TABLESPACE_EXISTS:
return(HA_ERR_TABLESPACE_EXISTS);
+ case DB_TABLESPACE_DELETED:
+ return(HA_ERR_TABLESPACE_MISSING);
case DB_IDENTIFIER_TOO_LONG:
return(HA_ERR_INTERNAL_ERROR);
+ case DB_TABLE_CORRUPT:
+ return(HA_ERR_TABLE_CORRUPT);
case DB_FTS_TOO_MANY_WORDS_IN_PHRASE:
return(HA_ERR_FTS_TOO_MANY_WORDS_IN_PHRASE);
+ case DB_COMPUTE_VALUE_FAILED:
+ return(HA_ERR_GENERIC); // impossible
}
}
/*************************************************************//**
Prints info of a THD object (== user session thread) to the given file. */
-UNIV_INTERN
void
innobase_mysql_print_thd(
/*=====================*/
@@ -2146,7 +2138,6 @@ innobase_mysql_print_thd(
/******************************************************************//**
Get the error message format string.
@return the format string or 0 if not found. */
-UNIV_INTERN
const char*
innobase_get_err_msg(
/*=================*/
@@ -2157,7 +2148,6 @@ innobase_get_err_msg(
/******************************************************************//**
Get the variable length bounds of the given character set. */
-UNIV_INTERN
void
innobase_get_cset_width(
/*====================*/
@@ -2183,11 +2173,11 @@ innobase_get_cset_width(
/* Fix bug#46256: allow tables to be dropped if the
collation is not found, but issue a warning. */
- if ((global_system_variables.log_warnings)
- && (cset != 0)){
+ if (cset != 0) {
sql_print_warning(
- "Unknown collation #%lu.", cset);
+ "Unknown collation #" ULINTPF ".",
+ cset);
}
} else {
@@ -2200,14 +2190,13 @@ innobase_get_cset_width(
/******************************************************************//**
Converts an identifier to a table name. */
-UNIV_INTERN
void
innobase_convert_from_table_id(
/*===========================*/
- struct charset_info_st* cs, /*!< in: the 'from' character set */
- char* to, /*!< out: converted identifier */
- const char* from, /*!< in: identifier to convert */
- ulint len) /*!< in: length of 'to', in bytes */
+ CHARSET_INFO* cs, /*!< in: the 'from' character set */
+ char* to, /*!< out: converted identifier */
+ const char* from, /*!< in: identifier to convert */
+ ulint len) /*!< in: length of 'to', in bytes */
{
uint errors;
@@ -2217,7 +2206,6 @@ innobase_convert_from_table_id(
/**********************************************************************
Check if the length of the identifier exceeds the maximum allowed.
return true when length of identifier is too long. */
-UNIV_INTERN
my_bool
innobase_check_identifier_length(
/*=============================*/
@@ -2228,7 +2216,7 @@ innobase_check_identifier_length(
CHARSET_INFO *cs = system_charset_info;
DBUG_ENTER("innobase_check_identifier_length");
- size_t len = cs->cset->well_formed_len(
+ size_t len = my_well_formed_length(
cs, id, id + strlen(id),
NAME_CHAR_LEN, &well_formed_error);
@@ -2241,14 +2229,13 @@ innobase_check_identifier_length(
/******************************************************************//**
Converts an identifier to UTF-8. */
-UNIV_INTERN
void
innobase_convert_from_id(
/*=====================*/
- struct charset_info_st* cs, /*!< in: the 'from' character set */
- char* to, /*!< out: converted identifier */
- const char* from, /*!< in: identifier to convert */
- ulint len) /*!< in: length of 'to', in bytes */
+ CHARSET_INFO* cs, /*!< in: the 'from' character set */
+ char* to, /*!< out: converted identifier */
+ const char* from, /*!< in: identifier to convert */
+ ulint len) /*!< in: length of 'to', in bytes */
{
uint errors;
@@ -2257,8 +2244,7 @@ innobase_convert_from_id(
/******************************************************************//**
Compares NUL-terminated UTF-8 strings case insensitively.
-@return 0 if a=b, <0 if a<b, >1 if a>b */
-UNIV_INTERN
+@return 0 if a=b, <0 if a<b, >1 if a>b */
int
innobase_strcasecmp(
/*================*/
@@ -2282,7 +2268,7 @@ innobase_strcasecmp(
Compares NUL-terminated UTF-8 strings case insensitively. The
second string contains wildcards.
@return 0 if a match is found, 1 if not */
-UNIV_INTERN
+static
int
innobase_wildcasecmp(
/*=================*/
@@ -2292,14 +2278,12 @@ innobase_wildcasecmp(
return(wild_case_compare(system_charset_info, a, b));
}
-/******************************************************************//**
-Strip dir name from a full path name and return only the file name
+/** Strip dir name from a full path name and return only the file name
+@param[in] path_name full path name
@return file name or "null" if no file name */
-UNIV_INTERN
const char*
innobase_basename(
-/*==============*/
- const char* path_name) /*!< in: full path name */
+ const char* path_name)
{
const char* name = base_name(path_name);
@@ -2308,7 +2292,6 @@ innobase_basename(
/******************************************************************//**
Makes all characters in a NUL-terminated UTF-8 string lower case. */
-UNIV_INTERN
void
innobase_casedn_str(
/*================*/
@@ -2319,9 +2302,8 @@ innobase_casedn_str(
/**********************************************************************//**
Determines the connection character set.
-@return connection character set */
-UNIV_INTERN
-struct charset_info_st*
+@return connection character set */
+CHARSET_INFO*
innobase_get_charset(
/*=================*/
THD* mysql_thd) /*!< in: MySQL thread handle */
@@ -2329,29 +2311,46 @@ innobase_get_charset(
return(thd_charset(mysql_thd));
}
-/**********************************************************************//**
-Determines the current SQL statement.
-@return SQL statement string */
-UNIV_INTERN
+/** Determines the current SQL statement.
+Thread unsafe, can only be called from the thread owning the THD.
+@param[in] thd MySQL thread handle
+@param[out] length Length of the SQL statement
+@return SQL statement string */
const char*
-innobase_get_stmt(
-/*==============*/
- THD* thd, /*!< in: MySQL thread handle */
- size_t* length) /*!< out: length of the SQL statement */
+innobase_get_stmt_unsafe(
+ THD* thd,
+ size_t* length)
{
if (const LEX_STRING *stmt = thd_query_string(thd)) {
*length = stmt->length;
return stmt->str;
}
+
+ *length = 0;
return NULL;
}
+/** Determines the current SQL statement.
+Thread safe, can be called from any thread as the string is copied
+into the provided buffer.
+@param[in] thd MySQL thread handle
+@param[out] buf Buffer containing SQL statement
+@param[in] buflen Length of provided buffer
+@return Length of the SQL statement */
+size_t
+innobase_get_stmt_safe(
+ THD* thd,
+ char* buf,
+ size_t buflen)
+{
+ return thd_query_safe(thd, buf, buflen);
+}
+
/**********************************************************************//**
Get the current setting of the tdc_size global parameter. We do
a dirty read because for one there is no synchronization object and
secondly there is little harm in doing so even if we get a torn read.
@return value of tdc_size */
-UNIV_INTERN
ulint
innobase_get_table_cache_size(void)
/*===============================*/
@@ -2364,8 +2363,7 @@ Get the current setting of the lower_case_table_names global parameter from
mysqld.cc. We do a dirty read because for one there is no synchronization
object and secondly there is little harm in doing so even if we get a torn
read.
-@return value of lower_case_table_names */
-UNIV_INTERN
+@return value of lower_case_table_names */
ulint
innobase_get_lower_case_table_names(void)
/*=====================================*/
@@ -2373,11 +2371,70 @@ innobase_get_lower_case_table_names(void)
return(lower_case_table_names);
}
-/** Create a temporary file in the location specified by the parameter
-path. If the path is null, then it will be created in tmpdir.
+/**
+ Test a file path whether it is same as mysql data directory path.
+
+ @param path null terminated character string
+
+ @return
+ @retval TRUE The path is different from mysql data directory.
+ @retval FALSE The path is same as mysql data directory.
+*/
+static bool is_mysql_datadir_path(const char *path)
+{
+ if (path == NULL)
+ return false;
+
+ char mysql_data_dir[FN_REFLEN], path_dir[FN_REFLEN];
+ convert_dirname(path_dir, path, NullS);
+ convert_dirname(mysql_data_dir, mysql_unpacked_real_data_home, NullS);
+ size_t mysql_data_home_len= dirname_length(mysql_data_dir);
+ size_t path_len = dirname_length(path_dir);
+
+ if (path_len < mysql_data_home_len)
+ return true;
+
+ if (!lower_case_file_system)
+ return(memcmp(mysql_data_dir, path_dir, mysql_data_home_len));
+
+ return(files_charset_info->coll->strnncoll(files_charset_info,
+ (uchar *) path_dir, path_len,
+ (uchar *) mysql_data_dir,
+ mysql_data_home_len,
+ TRUE));
+}
+
+static int mysql_tmpfile_path(const char *path, const char *prefix)
+{
+ DBUG_ASSERT(path != NULL);
+ DBUG_ASSERT((strlen(path) + strlen(prefix)) <= FN_REFLEN);
+
+ char filename[FN_REFLEN];
+ File fd = create_temp_file(filename, path, prefix,
+#ifdef __WIN__
+ O_BINARY | O_TRUNC | O_SEQUENTIAL |
+ O_SHORT_LIVED |
+#endif /* __WIN__ */
+ O_CREAT | O_EXCL | O_RDWR | O_TEMPORARY,
+ MYF(MY_WME));
+ if (fd >= 0) {
+#ifndef __WIN__
+ /*
+ This can be removed once the following bug is fixed:
+ Bug #28903 create_temp_file() doesn't honor O_TEMPORARY option
+ (file not removed) (Unix)
+ */
+ unlink(filename);
+#endif /* !__WIN__ */
+ }
+
+ return fd;
+}
+
+/** Creates a temporary file in the location specified by the parameter
+path. If the path is NULL, then it will be created in tmpdir.
@param[in] path location for creating temporary file
-@return temporary file descriptor, or < 0 on error */
-UNIV_INTERN
+@return temporary file descriptor, or < 0 on error */
int
innobase_mysql_tmpfile(
const char* path)
@@ -2436,11 +2493,13 @@ innobase_mysql_tmpfile(
#endif
#endif
if (fd2 < 0) {
+ char errbuf[MYSYS_STRERROR_SIZE];
DBUG_PRINT("error",("Got error %d on dup",fd2));
- my_errno=errno;
+ set_my_errno(errno);
my_error(EE_OUT_OF_FILERESOURCES,
- MYF(ME_BELL+ME_WAITTANG),
- "ib*", my_errno);
+ MYF(0),
+ "ib*", errno,
+ my_strerror(errbuf, sizeof(errbuf), errno));
}
my_close(fd, MYF(MY_WME));
}
@@ -2449,8 +2508,8 @@ innobase_mysql_tmpfile(
/*********************************************************************//**
Wrapper around MySQL's copy_and_convert function.
-@return number of bytes copied to 'to' */
-UNIV_INTERN
+@return number of bytes copied to 'to' */
+static
ulint
innobase_convert_string(
/*====================*/
@@ -2479,8 +2538,7 @@ Not more than "buf_size" bytes are written to "buf".
The result is always NUL-terminated (provided buf_size > 0) and the
number of bytes that were written to "buf" is returned (including the
terminating NUL).
-@return number of bytes that were written */
-UNIV_INTERN
+@return number of bytes that were written */
ulint
innobase_raw_format(
/*================*/
@@ -2526,8 +2584,7 @@ values we want to reserve for multi-value inserts e.g.,
innobase_next_autoinc() will be called with increment set to 3 where
autoinc_lock_mode != TRADITIONAL because we want to reserve 3 values for
the multi-value INSERT above.
-@return the next value */
-UNIV_INTERN
+@return the next value */
ulonglong
innobase_next_autoinc(
/*==================*/
@@ -2619,6 +2676,64 @@ innobase_next_autoinc(
return(next_value);
}
+/********************************************************************//**
+Reset the autoinc value in the table.
+@return DB_SUCCESS if all went well else error code */
+UNIV_INTERN
+dberr_t
+ha_innobase::innobase_reset_autoinc(
+/*================================*/
+ ulonglong autoinc) /*!< in: value to store */
+{
+ dberr_t error;
+
+ error = innobase_lock_autoinc();
+
+ if (error == DB_SUCCESS) {
+
+ dict_table_autoinc_initialize(m_prebuilt->table, autoinc);
+
+ dict_table_autoinc_unlock(m_prebuilt->table);
+ }
+
+ return(error);
+}
+
+/*******************************************************************//**
+Reset the auto-increment counter to the given value, i.e. the next row
+inserted will get the given value. This is called e.g. after TRUNCATE
+is emulated by doing a 'DELETE FROM t'. HA_ERR_WRONG_COMMAND is
+returned by storage engines that don't support this operation.
+@return 0 or error code */
+UNIV_INTERN
+int
+ha_innobase::reset_auto_increment(
+/*==============================*/
+ ulonglong value) /*!< in: new value for table autoinc */
+{
+ DBUG_ENTER("ha_innobase::reset_auto_increment");
+
+ dberr_t error;
+
+ update_thd(ha_thd());
+
+ error = row_lock_table_autoinc_for_mysql(m_prebuilt);
+
+ if (error != DB_SUCCESS) {
+ DBUG_RETURN(convert_error_code_to_mysql(
+ error, m_prebuilt->table->flags, m_user_thd));
+ }
+
+ /* The next value can never be 0. */
+ if (value == 0) {
+ value = 1;
+ }
+
+ innobase_reset_autoinc(value);
+
+ DBUG_RETURN(0);
+}
+
/*********************************************************************//**
Initializes some fields in an InnoDB transaction object. */
static
@@ -2645,8 +2760,7 @@ innobase_trx_init(
/*********************************************************************//**
Allocates an InnoDB transaction for a MySQL handler object for DML.
-@return InnoDB transaction handle */
-UNIV_INTERN
+@return InnoDB transaction handle */
trx_t*
innobase_trx_allocate(
/*==================*/
@@ -2671,26 +2785,22 @@ innobase_trx_allocate(
Gets the InnoDB transaction handle for a MySQL handler object, creates
an InnoDB transaction struct if the corresponding MySQL thread struct still
lacks one.
-@return InnoDB transaction handle */
+@return InnoDB transaction handle */
static inline
trx_t*
check_trx_exists(
/*=============*/
THD* thd) /*!< in: user thread handle */
{
- trx_t*& trx = thd_to_trx(thd);
-
- if (trx == NULL) {
+ if (trx_t* trx = thd_to_trx(thd)) {
+ ut_a(trx->magic_n == TRX_MAGIC_N);
+ innobase_trx_init(thd, trx);
+ return trx;
+ } else {
trx = innobase_trx_allocate(thd);
thd_set_ha_data(thd, innodb_hton_ptr, trx);
- } else if (UNIV_UNLIKELY(trx->magic_n != TRX_MAGIC_N)) {
- mem_analyze_corruption(trx);
- ut_error;
+ return trx;
}
-
- innobase_trx_init(thd, trx);
-
- return(trx);
}
/*************************************************************************
@@ -2700,8 +2810,7 @@ innobase_get_trx()
{
THD *thd=current_thd;
if (likely(thd != 0)) {
- trx_t*& trx = thd_to_trx(thd);
- return(trx);
+ return thd_to_trx(thd);
} else {
return(NULL);
}
@@ -2713,7 +2822,7 @@ Note that a transaction has been registered with MySQL.
static inline
bool
trx_is_registered_for_2pc(
-/*=========================*/
+/*======================*/
const trx_t* trx) /* in: transaction */
{
return(trx->is_registered == 1);
@@ -2724,7 +2833,7 @@ Note that innobase_commit_ordered() was run. */
static inline
void
trx_set_active_commit_ordered(
-/*==============================*/
+/*==========================*/
trx_t* trx) /* in: transaction */
{
ut_a(trx_is_registered_for_2pc(trx));
@@ -2752,7 +2861,7 @@ trx_deregister_from_2pc(
trx_t* trx) /* in: transaction */
{
trx->is_registered = 0;
- trx->active_commit_ordered = 0;
+ trx->active_commit_ordered = 0;
}
/*********************************************************************//**
@@ -2767,23 +2876,11 @@ trx_is_active_commit_ordered(
}
/*********************************************************************//**
-Check if transaction is started.
-@reutrn true if transaction is in state started */
-static
-bool
-trx_is_started(
-/*===========*/
- trx_t* trx) /* in: transaction */
-{
- return(trx->state != TRX_STATE_NOT_STARTED);
-}
-
-/*********************************************************************//**
Copy table flags from MySQL's HA_CREATE_INFO into an InnoDB table object.
Those flags are stored in .frm file and end up in the MySQL table object,
but are frequently used inside InnoDB so we keep their copies into the
InnoDB table object. */
-UNIV_INTERN
+static
void
innobase_copy_frm_flags_from_create_info(
/*=====================================*/
@@ -2819,7 +2916,6 @@ Copy table flags from MySQL's TABLE_SHARE into an InnoDB table object.
Those flags are stored in .frm file and end up in the MySQL table object,
but are frequently used inside InnoDB so we keep their copies into the
InnoDB table object. */
-UNIV_INTERN
void
innobase_copy_frm_flags_from_table_share(
/*=====================================*/
@@ -2852,30 +2948,43 @@ innobase_copy_frm_flags_from_table_share(
/*********************************************************************//**
Construct ha_innobase handler. */
-UNIV_INTERN
+
ha_innobase::ha_innobase(
/*=====================*/
handlerton* hton,
TABLE_SHARE* table_arg)
:handler(hton, table_arg),
- int_table_flags(HA_REC_NOT_IN_SEQ |
- HA_NULL_IN_KEY | HA_CAN_VIRTUAL_COLUMNS |
- HA_CAN_INDEX_BLOBS | HA_CONCURRENT_OPTIMIZE |
- HA_CAN_SQL_HANDLER |
- HA_PRIMARY_KEY_REQUIRED_FOR_POSITION |
- HA_PRIMARY_KEY_IN_READ_INDEX |
- HA_BINLOG_ROW_CAPABLE |
- HA_CAN_GEOMETRY | HA_PARTIAL_COLUMN_READ |
- HA_TABLE_SCAN_ON_INDEX | HA_CAN_FULLTEXT |
- (srv_force_primary_key ? HA_REQUIRE_PRIMARY_KEY : 0 ) |
- HA_CAN_FULLTEXT_EXT | HA_CAN_EXPORT),
- start_of_scan(0),
- num_write_row(0)
+ m_prebuilt(),
+ m_user_thd(),
+ m_int_table_flags(HA_REC_NOT_IN_SEQ
+ | HA_NULL_IN_KEY
+ | HA_CAN_VIRTUAL_COLUMNS
+ | HA_CAN_INDEX_BLOBS
+ | HA_CAN_SQL_HANDLER
+ | HA_REQUIRES_KEY_COLUMNS_FOR_DELETE
+ | HA_PRIMARY_KEY_REQUIRED_FOR_POSITION
+ | HA_PRIMARY_KEY_IN_READ_INDEX
+ | HA_BINLOG_ROW_CAPABLE
+ | HA_CAN_GEOMETRY
+ | HA_PARTIAL_COLUMN_READ
+ | HA_TABLE_SCAN_ON_INDEX
+ | HA_CAN_FULLTEXT
+ | HA_CAN_FULLTEXT_EXT
+ /* JAN: TODO: MySQL 5.7
+ | HA_CAN_FULLTEXT_HINTS
+ */
+ | HA_CAN_EXPORT
+ | HA_CAN_RTREEKEYS
+ | HA_CONCURRENT_OPTIMIZE
+ | (srv_force_primary_key ? HA_REQUIRE_PRIMARY_KEY : 0)
+ ),
+ m_start_of_scan(),
+ m_mysql_has_locked()
{}
/*********************************************************************//**
Destruct ha_innobase handler. */
-UNIV_INTERN
+
ha_innobase::~ha_innobase()
/*======================*/
{
@@ -2884,38 +2993,42 @@ ha_innobase::~ha_innobase()
/*********************************************************************//**
Updates the user_thd field in a handle and also allocates a new InnoDB
transaction handle if needed, and updates the transaction fields in the
-prebuilt struct. */
-UNIV_INTERN inline
+m_prebuilt struct. */
void
ha_innobase::update_thd(
/*====================*/
THD* thd) /*!< in: thd to use the handle */
{
- trx_t* trx;
-
DBUG_ENTER("ha_innobase::update_thd");
DBUG_PRINT("ha_innobase::update_thd", ("user_thd: %p -> %p",
- user_thd, thd));
+ m_user_thd, thd));
/* The table should have been opened in ha_innobase::open(). */
- DBUG_ASSERT(prebuilt->table->n_ref_count > 0);
+ DBUG_ASSERT(m_prebuilt->table->get_ref_count() > 0);
- trx = check_trx_exists(thd);
+ trx_t* trx = check_trx_exists(thd);
- if (prebuilt->trx != trx) {
+ ut_ad(trx->dict_operation_lock_mode == 0);
+ ut_ad(trx->dict_operation == TRX_DICT_OP_NONE);
- row_update_prebuilt_trx(prebuilt, trx);
+ if (m_prebuilt->trx != trx) {
+
+ row_update_prebuilt_trx(m_prebuilt, trx);
}
- user_thd = thd;
+ m_user_thd = thd;
+
+ DBUG_ASSERT(m_prebuilt->trx->magic_n == TRX_MAGIC_N);
+ DBUG_ASSERT(m_prebuilt->trx == thd_to_trx(m_user_thd));
+
DBUG_VOID_RETURN;
}
/*********************************************************************//**
Updates the user_thd field in a handle and also allocates a new InnoDB
transaction handle if needed, and updates the transaction fields in the
-prebuilt struct. */
-UNIV_INTERN
+m_prebuilt struct. */
+
void
ha_innobase::update_thd()
/*=====================*/
@@ -2941,11 +3054,18 @@ innobase_register_trx(
THD* thd, /* in: MySQL thd (connection) object */
trx_t* trx) /* in: transaction to register */
{
+ /* JAN: TODO: MySQL 5.7 PSI
+ const ulonglong trx_id = static_cast<const ulonglong>(
+ trx_get_id_for_print(trx));
+
+ trans_register_ha(thd, FALSE, hton, &trx_id);
+ */
trans_register_ha(thd, FALSE, hton);
if (!trx_is_registered_for_2pc(trx)
&& thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)) {
+ //trans_register_ha(thd, TRUE, hton, &trx_id);
trans_register_ha(thd, TRUE, hton);
}
@@ -2997,6 +3117,83 @@ AUTOCOMMIT==0 or we are inside BEGIN ... COMMIT. Thus transactions no longer
put restrictions on the use of the query cache.
*/
+/** Check if mysql can allow the transaction to read from/store to
+the query cache.
+@param[in] table table object
+@param[in] trx transaction object
+@return whether the storing or retrieving from the query cache is permitted */
+static bool innobase_query_caching_table_check_low(
+ const dict_table_t* table,
+ trx_t* trx)
+{
+ /* The following conditions will decide the query cache
+ retrieval or storing into:
+
+ (1) There should not be any locks on the table.
+ (2) Someother trx shouldn't invalidate the cache before this
+ transaction started.
+ (3) Read view shouldn't exist. If exists then the view
+ low_limit_id should be greater than or equal to the transaction that
+ invalidates the cache for the particular table.
+
+ For read-only transaction: should satisfy (1) and (3)
+ For read-write transaction: should satisfy (1), (2), (3) */
+
+ if (lock_table_get_n_locks(table)) {
+ return false;
+ }
+
+ if (trx->id && trx->id < table->query_cache_inv_trx_id) {
+ return false;
+ }
+
+ return !MVCC::is_view_active(trx->read_view)
+ || trx->read_view->low_limit_id()
+ >= table->query_cache_inv_trx_id;
+}
+
+/** Checks if MySQL at the moment is allowed for this table to retrieve a
+consistent read result, or store it to the query cache.
+@param[in,out] trx transaction
+@param[in] norm_name concatenation of database name,
+ '/' char, table name
+@return whether storing or retrieving from the query cache is permitted */
+static bool innobase_query_caching_table_check(
+ trx_t* trx,
+ const char* norm_name)
+{
+ dict_table_t* table = dict_table_open_on_name(
+ norm_name, FALSE, FALSE, DICT_ERR_IGNORE_FK_NOKEY);
+
+ if (table == NULL) {
+ return false;
+ }
+
+ /* Start the transaction if it is not started yet */
+ trx_start_if_not_started(trx, false);
+
+ bool allow = innobase_query_caching_table_check_low(table, trx);
+
+ dict_table_close(table, FALSE, FALSE);
+
+ if (allow) {
+ /* If the isolation level is high, assign a read view for the
+ transaction if it does not yet have one */
+
+ if (trx->isolation_level >= TRX_ISO_REPEATABLE_READ
+ && !srv_read_only_mode
+ && !MVCC::is_view_active(trx->read_view)) {
+
+ /* Start the transaction if it is not started yet */
+ trx_start_if_not_started(trx, false);
+
+ trx_sys->mvcc->view_open(trx->read_view, trx);
+ }
+ }
+
+ return allow;
+}
+
/******************************************************************//**
The MySQL query cache uses this to check from InnoDB if the query cache at
the moment is allowed to operate on an InnoDB table. The SQL query must
@@ -3013,7 +3210,7 @@ Why a deadlock of threads is not possible: the query cache calls this function
at the start of a SELECT processing. Then the calling thread cannot be
holding any InnoDB semaphores. The calling thread is holding the
query cache mutex, and this function will reserve the InnoDB trx_sys->mutex.
-Thus, the 'rank' in sync0sync.h of the MySQL query cache mutex is above
+Thus, the 'rank' in sync0mutex.h of the MySQL query cache mutex is above
the InnoDB trx_sys->mutex.
@return TRUE if permitted, FALSE if not; note that the value FALSE
does not mean we should invalidate the query cache: invalidation is
@@ -3027,44 +3224,25 @@ innobase_query_caching_of_table_permitted(
retrieve it */
char* full_name, /*!< in: normalized path to the table */
uint full_name_len, /*!< in: length of the normalized path
- to the table */
+ to the table */
ulonglong *unused) /*!< unused for this engine */
{
- ibool is_autocommit;
- trx_t* trx;
char norm_name[1000];
+ trx_t* trx = check_trx_exists(thd);
ut_a(full_name_len < 999);
- trx = check_trx_exists(thd);
-
if (trx->isolation_level == TRX_ISO_SERIALIZABLE) {
/* In the SERIALIZABLE mode we add LOCK IN SHARE MODE to every
plain SELECT if AUTOCOMMIT is not on. */
- return((my_bool)FALSE);
- }
-
- if (UNIV_UNLIKELY(trx->has_search_latch)) {
- sql_print_error("The calling thread is holding the adaptive "
- "search, latch though calling "
- "innobase_query_caching_of_table_permitted.");
- trx_print(stderr, trx, 1024);
+ return(false);
}
- trx_search_latch_release_if_reserved(trx);
-
innobase_srv_conc_force_exit_innodb(trx);
- if (!thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)) {
-
- is_autocommit = TRUE;
- } else {
- is_autocommit = FALSE;
-
- }
-
- if (is_autocommit && trx->n_mysql_tables_in_use == 0) {
+ if (!thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)
+ && trx->n_mysql_tables_in_use == 0) {
/* We are going to retrieve the query result from the query
cache. This cannot be a store operation to the query cache
because then MySQL would have locks on tables already.
@@ -3083,7 +3261,7 @@ innobase_query_caching_of_table_permitted(
then trx2 would have already invalidated the cache. Thus we
can trust the result in the cache is ok for this query. */
- return((my_bool)TRUE);
+ return(true);
}
/* Normalize the table name to InnoDB format */
@@ -3091,219 +3269,203 @@ innobase_query_caching_of_table_permitted(
innobase_register_trx(innodb_hton_ptr, thd, trx);
- if (row_search_check_if_query_cache_permitted(trx, norm_name)) {
-
- /* printf("Query cache for %s permitted\n", norm_name); */
-
- return((my_bool)TRUE);
- }
-
- /* printf("Query cache for %s NOT permitted\n", norm_name); */
-
- return((my_bool)FALSE);
+ return innobase_query_caching_table_check(trx, norm_name);
}
/*****************************************************************//**
Invalidates the MySQL query cache for the table. */
-UNIV_INTERN
void
innobase_invalidate_query_cache(
/*============================*/
trx_t* trx, /*!< in: transaction which
modifies the table */
const char* full_name, /*!< in: concatenation of
- database name, null char NUL,
+ database name, path separator,
table name, null char NUL;
NOTE that in Windows this is
always in LOWER CASE! */
ulint full_name_len) /*!< in: full name length where
also the null chars count */
{
- /* Note that the sync0sync.h rank of the query cache mutex is just
+ /* Note that the sync0mutex.h rank of the query cache mutex is just
above the InnoDB trx_sys_t->lock. The caller of this function must
not have latches of a lower rank. */
#ifdef HAVE_QUERY_CACHE
- char qcache_key_name[2 * (NAME_LEN + 1)];
- size_t tabname_len;
- size_t dbname_len;
-
- /* Construct the key("db-name\0table$name\0") for the query cache using
- the path name("db@002dname\0table@0024name\0") of the table in its
+ char qcache_key_name[2 * (NAME_LEN + 1)];
+ char db_name[NAME_CHAR_LEN * MY_CS_MBMAXLEN + 1];
+ const char *key_ptr;
+ size_t tabname_len;
+ size_t dbname_len;
+
+ // Extract the database name.
+ key_ptr= strchr(full_name, '/');
+ DBUG_ASSERT(key_ptr != NULL); // Database name should be present
+ memcpy(db_name, full_name, (dbname_len= (key_ptr - full_name)));
+ db_name[dbname_len]= '\0';
+
+ /* Construct the key("db-name\0table$name\0") for the query cache using
+ the path name("db@002dname\0table@0024name\0") of the table in its
canonical form. */
- dbname_len = filename_to_tablename(full_name, qcache_key_name,
- sizeof(qcache_key_name));
- tabname_len = filename_to_tablename(full_name + strlen(full_name) + 1,
- qcache_key_name + dbname_len + 1,
- sizeof(qcache_key_name)
- - dbname_len - 1);
-
- /* Argument TRUE below means we are using transactions */
- mysql_query_cache_invalidate4(trx->mysql_thd,
- qcache_key_name,
- (dbname_len + tabname_len + 2),
- TRUE);
+ dbname_len = filename_to_tablename(db_name, qcache_key_name,
+ sizeof(qcache_key_name));
+ tabname_len = filename_to_tablename(++key_ptr,
+ (qcache_key_name + dbname_len + 1),
+ sizeof(qcache_key_name) -
+ dbname_len - 1);
+
+ /* Argument TRUE below means we are using transactions */
+ mysql_query_cache_invalidate4(trx->mysql_thd,
+ qcache_key_name,
+ (dbname_len + tabname_len + 2),
+ TRUE);
#endif
}
-/*****************************************************************//**
-Convert an SQL identifier to the MySQL system_charset_info (UTF-8)
-and quote it if needed.
-@return pointer to the end of buf */
-static
-char*
-innobase_convert_identifier(
-/*========================*/
- char* buf, /*!< out: buffer for converted identifier */
- ulint buflen, /*!< in: length of buf, in bytes */
- const char* id, /*!< in: identifier to convert */
- ulint idlen, /*!< in: length of id, in bytes */
- THD* thd, /*!< in: MySQL connection thread, or NULL */
- ibool file_id)/*!< in: TRUE=id is a table or database name;
- FALSE=id is an UTF-8 string */
+/** Quote a standard SQL identifier like index or column name.
+@param[in] file output stream
+@param[in] trx InnoDB transaction, or NULL
+@param[in] id identifier to quote */
+void
+innobase_quote_identifier(
+ FILE* file,
+ trx_t* trx,
+ const char* id)
{
- char nz2[MAX_TABLE_NAME_LEN + 1];
- const char* s = id;
- int q;
-
- if (file_id) {
+ const int q = trx != NULL && trx->mysql_thd != NULL
+ ? get_quote_char_for_identifier(trx->mysql_thd, id, strlen(id))
+ : '`';
- char nz[MAX_TABLE_NAME_LEN + 1];
+ if (q == EOF) {
+ fputs(id, file);
+ } else {
+ putc(q, file);
- /* Decode the table name. The MySQL function expects
- a NUL-terminated string. The input and output strings
- buffers must not be shared. */
- ut_a(idlen <= MAX_TABLE_NAME_LEN);
- memcpy(nz, id, idlen);
- nz[idlen] = 0;
+ while (int c = *id++) {
+ if (c == q) {
+ putc(c, file);
+ }
+ putc(c, file);
+ }
- s = nz2;
- idlen = explain_filename(thd, nz, nz2, sizeof nz2,
- EXPLAIN_PARTITIONS_AS_COMMENT);
- goto no_quote;
+ putc(q, file);
}
+}
- /* See if the identifier needs to be quoted. */
- if (UNIV_UNLIKELY(!thd)) {
- q = '"';
- } else {
- q = get_quote_char_for_identifier(thd, s, (int) idlen);
- }
+/** Quote a standard SQL identifier like tablespace, index or column name.
+@param[in] trx InnoDB transaction, or NULL
+@param[in] id identifier to quote
+@return quoted identifier */
+std::string
+innobase_quote_identifier(
+/*======================*/
+ trx_t* trx,
+ const char* id)
+{
+ std::string quoted_identifier;
+ const int q = trx != NULL && trx->mysql_thd != NULL
+ ? get_quote_char_for_identifier(trx->mysql_thd, id, strlen(id))
+ : '`';
if (q == EOF) {
-no_quote:
- if (UNIV_UNLIKELY(idlen > buflen)) {
- idlen = buflen;
- }
- memcpy(buf, s, idlen);
- return(buf + idlen);
+ quoted_identifier.append(id);
+ } else {
+ quoted_identifier += (unsigned char)q;
+ quoted_identifier.append(id);
+ quoted_identifier += (unsigned char)q;
}
- /* Quote the identifier. */
- if (buflen < 2) {
- return(buf);
- }
+ return (quoted_identifier);
+}
- *buf++ = q;
- buflen--;
+/** Convert a table name to the MySQL system_charset_info (UTF-8)
+and quote it.
+@param[out] buf buffer for converted identifier
+@param[in] buflen length of buf, in bytes
+@param[in] id identifier to convert
+@param[in] idlen length of id, in bytes
+@param[in] thd MySQL connection thread, or NULL
+@return pointer to the end of buf */
+static
+char*
+innobase_convert_identifier(
+ char* buf,
+ ulint buflen,
+ const char* id,
+ ulint idlen,
+ THD* thd)
+{
+ const char* s = id;
- for (; idlen; idlen--) {
- int c = *s++;
- if (UNIV_UNLIKELY(c == q)) {
- if (UNIV_UNLIKELY(buflen < 3)) {
- break;
- }
+ char nz[MAX_TABLE_NAME_LEN + 1];
+ char nz2[MAX_TABLE_NAME_LEN + 1];
- *buf++ = c;
- *buf++ = c;
- buflen -= 2;
- } else {
- if (UNIV_UNLIKELY(buflen < 2)) {
- break;
- }
+ /* Decode the table name. The MySQL function expects
+ a NUL-terminated string. The input and output strings
+ buffers must not be shared. */
+ ut_a(idlen <= MAX_TABLE_NAME_LEN);
+ memcpy(nz, id, idlen);
+ nz[idlen] = 0;
- *buf++ = c;
- buflen--;
- }
+ s = nz2;
+ idlen = explain_filename(thd, nz, nz2, sizeof nz2,
+ EXPLAIN_PARTITIONS_AS_COMMENT);
+ if (idlen > buflen) {
+ idlen = buflen;
}
-
- *buf++ = q;
- return(buf);
+ memcpy(buf, s, idlen);
+ return(buf + idlen);
}
/*****************************************************************//**
-Convert a table or index name to the MySQL system_charset_info (UTF-8)
-and quote it if needed.
-@return pointer to the end of buf */
-UNIV_INTERN
+Convert a table name to the MySQL system_charset_info (UTF-8).
+@return pointer to the end of buf */
char*
innobase_convert_name(
/*==================*/
char* buf, /*!< out: buffer for converted identifier */
ulint buflen, /*!< in: length of buf, in bytes */
- const char* id, /*!< in: identifier to convert */
+ const char* id, /*!< in: table name to convert */
ulint idlen, /*!< in: length of id, in bytes */
- THD* thd, /*!< in: MySQL connection thread, or NULL */
- ibool table_id)/*!< in: TRUE=id is a table or database name;
- FALSE=id is an index name */
+ THD* thd) /*!< in: MySQL connection thread, or NULL */
{
char* s = buf;
const char* bufend = buf + buflen;
- if (table_id) {
- const char* slash = (const char*) memchr(id, '/', idlen);
- if (!slash) {
-
- goto no_db_name;
- }
+ const char* slash = (const char*) memchr(id, '/', idlen);
- /* Print the database name and table name separately. */
- s = innobase_convert_identifier(s, bufend - s, id, slash - id,
- thd, TRUE);
- if (UNIV_LIKELY(s < bufend)) {
- *s++ = '.';
- s = innobase_convert_identifier(s, bufend - s,
- slash + 1, idlen
- - (slash - id) - 1,
- thd, TRUE);
- }
- } else if (UNIV_UNLIKELY(*id == TEMP_INDEX_PREFIX)) {
- /* Temporary index name (smart ALTER TABLE) */
- const char temp_index_suffix[]= "--temporary--";
+ if (slash == NULL) {
+ return(innobase_convert_identifier(
+ buf, buflen, id, idlen, thd));
+ }
- s = innobase_convert_identifier(buf, buflen, id + 1, idlen - 1,
- thd, FALSE);
- if (s - buf + (sizeof temp_index_suffix - 1) < buflen) {
- memcpy(s, temp_index_suffix,
- sizeof temp_index_suffix - 1);
- s += sizeof temp_index_suffix - 1;
- }
- } else {
-no_db_name:
- s = innobase_convert_identifier(buf, buflen, id, idlen,
- thd, table_id);
+ /* Print the database name and table name separately. */
+ s = innobase_convert_identifier(s, bufend - s, id, slash - id, thd);
+ if (s < bufend) {
+ *s++ = '.';
+ s = innobase_convert_identifier(s, bufend - s,
+ slash + 1, idlen
+ - (slash - id) - 1,
+ thd);
}
return(s);
}
/*****************************************************************//**
-A wrapper function of innobase_convert_name(), convert a table or
-index name to the MySQL system_charset_info (UTF-8) and quote it if needed.
-@return pointer to the end of buf */
-UNIV_INTERN
+A wrapper function of innobase_convert_name(), convert a table name
+to the MySQL system_charset_info (UTF-8) and quote it if needed.
+@return pointer to the end of buf */
void
innobase_format_name(
/*==================*/
char* buf, /*!< out: buffer for converted identifier */
ulint buflen, /*!< in: length of buf, in bytes */
- const char* name, /*!< in: index or table name to format */
- ibool is_index_name) /*!< in: index name */
+ const char* name) /*!< in: table name to format */
{
const char* bufend;
- bufend = innobase_convert_name(buf, buflen, name, strlen(name),
- NULL, !is_index_name);
+ bufend = innobase_convert_name(buf, buflen, name, strlen(name), NULL);
ut_ad((ulint) (bufend - buf) < buflen);
@@ -3312,127 +3474,235 @@ innobase_format_name(
/**********************************************************************//**
Determines if the currently running transaction has been interrupted.
-@return TRUE if interrupted */
-UNIV_INTERN
+@return TRUE if interrupted */
ibool
trx_is_interrupted(
/*===============*/
const trx_t* trx) /*!< in: transaction */
{
- return(trx && trx->mysql_thd && thd_kill_level(trx->mysql_thd));
-}
-
-/**********************************************************************//**
-Determines if the currently running transaction is in strict mode.
-@return TRUE if strict */
-UNIV_INTERN
-ibool
-trx_is_strict(
-/*==========*/
- trx_t* trx) /*!< in: transaction */
-{
- return(trx && trx->mysql_thd && THDVAR(trx->mysql_thd, strict_mode));
+ return(trx && trx->mysql_thd && thd_kill_level(trx->mysql_thd));
}
/**************************************************************//**
-Resets some fields of a prebuilt struct. The template is used in fast
+Resets some fields of a m_prebuilt struct. The template is used in fast
retrieval of just those column values MySQL needs in its processing. */
-inline
void
ha_innobase::reset_template(void)
/*=============================*/
{
- ut_ad(prebuilt->magic_n == ROW_PREBUILT_ALLOCATED);
- ut_ad(prebuilt->magic_n2 == prebuilt->magic_n);
+ ut_ad(m_prebuilt->magic_n == ROW_PREBUILT_ALLOCATED);
+ ut_ad(m_prebuilt->magic_n2 == m_prebuilt->magic_n);
/* Force table to be freed in close_thread_table(). */
DBUG_EXECUTE_IF("free_table_in_fts_query",
- if (prebuilt->in_fts_query) {
+ if (m_prebuilt->in_fts_query) {
table->m_needs_reopen = true;
}
);
- prebuilt->keep_other_fields_on_keyread = 0;
- prebuilt->read_just_key = 0;
- prebuilt->in_fts_query = 0;
+ m_prebuilt->keep_other_fields_on_keyread = 0;
+ m_prebuilt->read_just_key = 0;
+ m_prebuilt->in_fts_query = 0;
+
/* Reset index condition pushdown state. */
- if (prebuilt->idx_cond) {
- prebuilt->idx_cond = NULL;
- prebuilt->idx_cond_n_cols = 0;
- /* Invalidate prebuilt->mysql_template
+ if (m_prebuilt->idx_cond) {
+ m_prebuilt->idx_cond = NULL;
+ m_prebuilt->idx_cond_n_cols = 0;
+ /* Invalidate m_prebuilt->mysql_template
in ha_innobase::write_row(). */
- prebuilt->template_type = ROW_MYSQL_NO_TEMPLATE;
+ m_prebuilt->template_type = ROW_MYSQL_NO_TEMPLATE;
}
}
/*****************************************************************//**
Call this when you have opened a new table handle in HANDLER, before you
-call index_read_idx() etc. Actually, we can let the cursor stay open even
+call index_read_map() etc. Actually, we can let the cursor stay open even
over a transaction commit! Then you should call this before every operation,
fetch next etc. This function inits the necessary things even after a
transaction commit. */
-UNIV_INTERN
+
void
ha_innobase::init_table_handle_for_HANDLER(void)
/*============================================*/
{
/* If current thd does not yet have a trx struct, create one.
- If the current handle does not yet have a prebuilt struct, create
- one. Update the trx pointers in the prebuilt struct. Normally
+ If the current handle does not yet have a m_prebuilt struct, create
+ one. Update the trx pointers in the m_prebuilt struct. Normally
this operation is done in external_lock. */
update_thd(ha_thd());
- /* Initialize the prebuilt struct much like it would be inited in
+ /* Initialize the m_prebuilt struct much like it would be inited in
external_lock */
- trx_search_latch_release_if_reserved(prebuilt->trx);
-
- innobase_srv_conc_force_exit_innodb(prebuilt->trx);
+ innobase_srv_conc_force_exit_innodb(m_prebuilt->trx);
/* If the transaction is not started yet, start it */
- trx_start_if_not_started_xa(prebuilt->trx);
+ trx_start_if_not_started_xa(m_prebuilt->trx, false);
/* Assign a read view if the transaction does not have it yet */
- trx_assign_read_view(prebuilt->trx);
+ trx_assign_read_view(m_prebuilt->trx);
- innobase_register_trx(ht, user_thd, prebuilt->trx);
+ innobase_register_trx(ht, m_user_thd, m_prebuilt->trx);
/* We did the necessary inits in this function, no need to repeat them
in row_search_for_mysql */
- prebuilt->sql_stat_start = FALSE;
+ m_prebuilt->sql_stat_start = FALSE;
/* We let HANDLER always to do the reads as consistent reads, even
if the trx isolation level would have been specified as SERIALIZABLE */
- prebuilt->select_lock_type = LOCK_NONE;
- prebuilt->stored_select_lock_type = LOCK_NONE;
+ m_prebuilt->select_lock_type = LOCK_NONE;
+ m_prebuilt->stored_select_lock_type = LOCK_NONE;
/* Always fetch all columns in the index record */
- prebuilt->hint_need_to_fetch_extra_cols = ROW_RETRIEVE_ALL_COLS;
+ m_prebuilt->hint_need_to_fetch_extra_cols = ROW_RETRIEVE_ALL_COLS;
/* We want always to fetch all columns in the whole row? Or do
we???? */
- prebuilt->used_in_HANDLER = TRUE;
+ m_prebuilt->used_in_HANDLER = TRUE;
+
reset_template();
}
+/** Free tablespace resources allocated. */
+void innobase_space_shutdown()
+{
+ DBUG_ENTER("innobase_space_shutdown");
+
+ srv_sys_space.shutdown();
+ if (srv_tmp_space.get_sanity_check_status()) {
+ fil_space_close(srv_tmp_space.name());
+ srv_tmp_space.delete_files();
+ }
+ srv_tmp_space.shutdown();
+
+#ifdef WITH_INNODB_DISALLOW_WRITES
+ os_event_destroy(srv_allow_writes_event);
+#endif /* WITH_INNODB_DISALLOW_WRITES */
+
+ DBUG_VOID_RETURN;
+}
+
+/*********************************************************************//**
+Free any resources that were allocated and return failure.
+@return always return 1 */
+static
+int
+innobase_init_abort()
+/*=================*/
+{
+ DBUG_ENTER("innobase_init_abort");
+ innobase_space_shutdown();
+ DBUG_RETURN(1);
+}
+
+/** Return partitioning flags. */
+static uint innobase_partition_flags()
+{
+ return (0);
+}
+
+/** Deprecation message about InnoDB file format related parameters */
+#define DEPRECATED_FORMAT_PARAMETER(x) \
+ "Using " x " is deprecated and the parameter" \
+ " may be removed in future releases." \
+ " See https://mariadb.com/kb/en/library/xtradbinnodb-file-format/"
+
+/** Deprecation message about innodb_file_format */
+static const char* deprecated_file_format
+ = DEPRECATED_FORMAT_PARAMETER("innodb_file_format");
+
+/** Deprecation message about innodb_large_prefix */
+static const char* deprecated_large_prefix
+ = DEPRECATED_FORMAT_PARAMETER("innodb_large_prefix");
+
+/** Deprecation message about innodb_file_format_check */
+static const char* deprecated_file_format_check
+ = DEPRECATED_FORMAT_PARAMETER("innodb_file_format_check");
+
+/** Deprecation message about innodb_file_format_max */
+static const char* deprecated_file_format_max
+ = DEPRECATED_FORMAT_PARAMETER("innodb_file_format_max");
+
+/** Deprecation message about innodb_use_trim */
+static const char* deprecated_use_trim
+ = "Using innodb_use_trim is deprecated"
+ " and the parameter will be removed in MariaDB 10.3.";
+
+/** Deprecation message about innodb_instrument_semaphores */
+static const char* deprecated_instrument_semaphores
+ = "Using innodb_instrument_semaphores is deprecated"
+ " and the parameter will be removed in MariaDB 10.3.";
+
+static const char* deprecated_use_mtflush
+ = "Using innodb_use_mtflush is deprecated"
+ " and the parameter will be removed in MariaDB 10.3."
+ " Use innodb-page-cleaners instead. ";
+
+static const char* deprecated_mtflush_threads
+ = "Using innodb_mtflush_threads is deprecated"
+ " and the parameter will be removed in MariaDB 10.3."
+ " Use innodb-page-cleaners instead. ";
+
+static my_bool innodb_instrument_semaphores;
+
+/** Update log_checksum_algorithm_ptr with a pointer to the function
+corresponding to whether checksums are enabled.
+@param[in,out] thd client session, or NULL if at startup
+@param[in] check whether redo log block checksums are enabled
+@return whether redo log block checksums are enabled */
+static inline
+bool
+innodb_log_checksums_func_update(THD* thd, bool check)
+{
+ static const char msg[] = "innodb_encrypt_log implies"
+ " innodb_log_checksums";
+
+ ut_ad(!thd == !srv_was_started);
+
+ if (!check) {
+ check = srv_encrypt_log;
+ if (!check) {
+ } else if (thd) {
+ push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
+ HA_ERR_UNSUPPORTED, msg);
+ } else {
+ sql_print_warning(msg);
+ }
+ }
+
+ if (thd) {
+ log_mutex_enter();
+ log_checksum_algorithm_ptr = check
+ ? log_block_calc_checksum_crc32
+ : log_block_calc_checksum_none;
+ log_mutex_exit();
+ } else {
+ log_checksum_algorithm_ptr = check
+ ? log_block_calc_checksum_crc32
+ : log_block_calc_checksum_none;
+ }
+
+ return(check);
+}
+
/****************************************************************//**
Gives the file extension of an InnoDB single-table tablespace. */
static const char* ha_innobase_exts[] = {
- ".ibd",
- ".isl",
- NullS
+ dot_ext[IBD],
+ dot_ext[ISL],
+ NullS
};
/*********************************************************************//**
Opens an InnoDB database.
-@return 0 on success, error code on failure */
+@return 0 on success, 1 on failure */
static
int
innobase_init(
@@ -3441,23 +3711,25 @@ innobase_init(
{
static char current_dir[3]; /*!< Set if using current lib */
int err;
- bool ret;
char *default_path;
uint format_id;
ulong num_pll_degree;
DBUG_ENTER("innobase_init");
- handlerton *innobase_hton= (handlerton*) p;
+ handlerton* innobase_hton= (handlerton*) p;
innodb_hton_ptr = innobase_hton;
innobase_hton->state = SHOW_OPTION_YES;
- innobase_hton->db_type= DB_TYPE_INNODB;
+ innobase_hton->db_type = DB_TYPE_INNODB;
innobase_hton->savepoint_offset = sizeof(trx_named_savept_t);
innobase_hton->close_connection = innobase_close_connection;
+ innobase_hton->kill_query = innobase_kill_query;
innobase_hton->savepoint_set = innobase_savepoint;
innobase_hton->savepoint_rollback = innobase_rollback_to_savepoint;
+
innobase_hton->savepoint_rollback_can_release_mdl =
innobase_rollback_to_savepoint_can_release_mdl;
+
innobase_hton->savepoint_release = innobase_release_savepoint;
innobase_hton->prepare_ordered= NULL;
innobase_hton->commit_ordered= innobase_commit_ordered;
@@ -3468,12 +3740,11 @@ innobase_init(
innobase_hton->commit_by_xid = innobase_commit_by_xid;
innobase_hton->rollback_by_xid = innobase_rollback_by_xid;
innobase_hton->commit_checkpoint_request=innobase_checkpoint_request;
- innobase_hton->create_cursor_read_view = innobase_create_cursor_view;
- innobase_hton->set_cursor_read_view = innobase_set_cursor_view;
- innobase_hton->close_cursor_read_view = innobase_close_cursor_view;
innobase_hton->create = innobase_create_handler;
+
innobase_hton->drop_database = innobase_drop_database;
innobase_hton->panic = innobase_end;
+ innobase_hton->partition_flags= innobase_partition_flags;
innobase_hton->start_consistent_snapshot =
innobase_start_trx_and_assign_read_view;
@@ -3483,19 +3754,14 @@ innobase_init(
innobase_hton->flags =
HTON_SUPPORTS_EXTENDED_KEYS | HTON_SUPPORTS_FOREIGN_KEYS;
- innobase_hton->release_temporary_latches =
- innobase_release_temporary_latches;
#ifdef WITH_WSREP
innobase_hton->abort_transaction=wsrep_abort_transaction;
innobase_hton->set_checkpoint=innobase_wsrep_set_checkpoint;
innobase_hton->get_checkpoint=innobase_wsrep_get_checkpoint;
innobase_hton->fake_trx_id=wsrep_fake_trx_id;
#endif /* WITH_WSREP */
- innobase_hton->kill_query = innobase_kill_query;
-
- if (srv_file_per_table)
- innobase_hton->tablefile_extensions = ha_innobase_exts;
+ innobase_hton->tablefile_extensions = ha_innobase_exts;
innobase_hton->table_options = innodb_table_option_list;
innodb_remember_check_sysvar_funcs();
@@ -3518,8 +3784,7 @@ innobase_init(
test_filename)) {
sql_print_error("tablename encoding has been changed");
-
- goto error;
+ DBUG_RETURN(innobase_init_abort());
}
#endif /* DBUG_OFF */
@@ -3527,22 +3792,27 @@ innobase_init(
if (sizeof(ulint) == 4) {
if (innobase_buffer_pool_size > UINT_MAX32) {
sql_print_error(
- "innobase_buffer_pool_size can't be over 4GB"
+ "innodb_buffer_pool_size can't be over 4GB"
" on 32-bit systems");
- goto error;
+ DBUG_RETURN(innobase_init_abort());
}
}
+ os_file_set_umask(my_umask);
+
+ /* Setup the memory alloc/free tracing mechanisms before calling
+ any functions that could possibly allocate memory. */
+ ut_new_boot();
+
/* The buffer pool needs to be able to accommodate enough many
pages, even for larger pages */
if (UNIV_PAGE_SIZE > UNIV_PAGE_SIZE_DEF
&& innobase_buffer_pool_size < (24 * 1024 * 1024)) {
- ib_logf(IB_LOG_LEVEL_INFO,
- "innodb_page_size= " ULINTPF " requires "
- "innodb_buffer_pool_size > 24M current %lld. ",
- UNIV_PAGE_SIZE,
- innobase_buffer_pool_size);
+ ib::info() << "innodb_page_size="
+ << UNIV_PAGE_SIZE << " requires "
+ << "innodb_buffer_pool_size > 24M current "
+ << innobase_buffer_pool_size;
goto error;
}
@@ -3550,10 +3820,25 @@ innobase_init(
/* Currently, Galera does not support VATS lock schedule algorithm. */
if (innodb_lock_schedule_algorithm == INNODB_LOCK_SCHEDULE_ALGORITHM_VATS
&& global_system_variables.wsrep_on) {
- /* Do not allow InnoDB startup with VATS and Galera */
- sql_print_error("In Galera, innodb_lock_schedule_algorithm=vats"
- " is not supported.");
- goto error;
+ ib::info() << "For Galera, using innodb_lock_schedule_algorithm=fcfs";
+ innodb_lock_schedule_algorithm = INNODB_LOCK_SCHEDULE_ALGORITHM_FCFS;
+ }
+
+ /* Print deprecation info if xtrabackup is used for SST method */
+ if (global_system_variables.wsrep_on
+ && wsrep_sst_method
+ && (!strcmp(wsrep_sst_method, "xtrabackup")
+ || !strcmp(wsrep_sst_method, "xtrabackup-v2"))) {
+ ib::info() << "Galera SST method xtrabackup is deprecated and the "
+ " support for it may be removed in future releases.";
+
+ /* We can't blindly turn on this as it will cause a
+ modification of the redo log format identifier. See
+ MDEV-13564 for more information. */
+ if (!srv_safe_truncate) {
+ ib::info() << "Requested xtrabackup based SST for Galera but"
+ << "innodb_safe_truncate is disabled.";
+ }
}
#endif /* WITH_WSREP */
@@ -3602,14 +3887,16 @@ innobase_init(
}
#endif
- if ((srv_encrypt_tables || srv_encrypt_log)
+ if ((srv_encrypt_tables || srv_encrypt_log
+ || innodb_encrypt_temporary_tables)
&& !encryption_key_id_exists(FIL_DEFAULT_ENCRYPTION_KEY)) {
sql_print_error("InnoDB: cannot enable encryption, "
"encryption plugin is not available");
goto error;
}
- os_innodb_umask = (ulint) my_umask;
+ innodb_check_deprecated();
+
/* First calculate the default path for innodb_data_home_dir etc.,
in case the user has not given any value.
@@ -3619,7 +3906,6 @@ innobase_init(
if (mysqld_embedded) {
default_path = mysql_real_data_home;
- fil_path_to_mysql_datadir = mysql_real_data_home;
} else {
/* It's better to use current lib, to keep paths short */
current_dir[0] = FN_CURLIB;
@@ -3630,46 +3916,94 @@ innobase_init(
ut_a(default_path);
+ fil_path_to_mysql_datadir = default_path;
+
/* Set InnoDB initialization parameters according to the values
read from MySQL .cnf file */
- /*--------------- Data files -------------------------*/
-
/* The default dir for data files is the datadir of MySQL */
- srv_data_home = (innobase_data_home_dir ? innobase_data_home_dir :
- default_path);
-
+ srv_data_home = innobase_data_home_dir
+ ? innobase_data_home_dir : default_path;
#ifdef WITH_WSREP
/* If we use the wsrep API, then we need to tell the server
the path to the data files (for passing it to the SST scripts): */
- wsrep_set_data_home_dir(innobase_data_home_dir);
+ wsrep_set_data_home_dir(srv_data_home);
#endif /* WITH_WSREP */
- /* Set default InnoDB data file size to 12 MB and let it be
- auto-extending. Thus users can use InnoDB in >= 4.0 without having
- to specify any startup options. */
+ /*--------------- Shared tablespaces -------------------------*/
+
+ /* Check that the value of system variable innodb_page_size was
+ set correctly. Its value was put into srv_page_size. If valid,
+ return the associated srv_page_size_shift. */
+ srv_page_size_shift = innodb_page_size_validate(srv_page_size);
+ if (!srv_page_size_shift) {
+ sql_print_error("InnoDB: Invalid page size=%lu.\n",
+ srv_page_size);
+ DBUG_RETURN(innobase_init_abort());
+ }
+
+ /* Set default InnoDB temp data file size to 12 MB and let it be
+ auto-extending. */
if (!innobase_data_file_path) {
innobase_data_file_path = (char*) "ibdata1:12M:autoextend";
}
- /* Since InnoDB edits the argument in the next call, we make another
- copy of it: */
+ /* This is the first time univ_page_size is used.
+ It was initialized to 16k pages before srv_page_size was set */
+ univ_page_size.copy_from(
+ page_size_t(srv_page_size, srv_page_size, false));
- internal_innobase_data_file_path = my_strdup(innobase_data_file_path,
- MYF(MY_FAE));
+ srv_sys_space.set_space_id(TRX_SYS_SPACE);
+ srv_sys_space.set_flags(FSP_FLAGS_PAGE_SSIZE());
+ srv_sys_space.set_name("innodb_system");
+ srv_sys_space.set_path(srv_data_home);
- ret = (bool) srv_parse_data_file_paths_and_sizes(
- internal_innobase_data_file_path);
- if (ret == FALSE) {
- sql_print_error(
- "InnoDB: syntax error in innodb_data_file_path"
- " or size specified is less than 1 megabyte");
-mem_free_and_error:
- srv_free_paths_and_sizes();
- my_free(internal_innobase_data_file_path);
- goto error;
+ /* Supports raw devices */
+ if (!srv_sys_space.parse_params(innobase_data_file_path, true)) {
+ ib::error() << "Unable to parse innodb_data_file_path="
+ << innobase_data_file_path;
+ DBUG_RETURN(innobase_init_abort());
+ }
+
+ /* Set default InnoDB temp data file size to 12 MB and let it be
+ auto-extending. */
+
+ if (!innobase_temp_data_file_path) {
+ innobase_temp_data_file_path = (char*) "ibtmp1:12M:autoextend";
+ }
+
+ /* We set the temporary tablspace id later, after recovery.
+ The temp tablespace doesn't support raw devices.
+ Set the name and path. */
+ srv_tmp_space.set_name("innodb_temporary");
+ srv_tmp_space.set_path(srv_data_home);
+ srv_tmp_space.set_flags(FSP_FLAGS_PAGE_SSIZE());
+
+ if (!srv_tmp_space.parse_params(innobase_temp_data_file_path, false)) {
+ ib::error() << "Unable to parse innodb_temp_data_file_path="
+ << innobase_temp_data_file_path;
+ DBUG_RETURN(innobase_init_abort());
+ }
+
+ /* Perform all sanity check before we take action of deleting files*/
+ if (srv_sys_space.intersection(&srv_tmp_space)) {
+ sql_print_error("%s and %s file names seem to be the same.",
+ srv_tmp_space.name(), srv_sys_space.name());
+ DBUG_RETURN(innobase_init_abort());
+ }
+
+ /* ------------ UNDO tablespaces files ---------------------*/
+ if (!srv_undo_dir) {
+ srv_undo_dir = default_path;
+ }
+
+ os_normalize_path(srv_undo_dir);
+
+ if (strchr(srv_undo_dir, ';')) {
+ sql_print_error("syntax error in innodb_undo_directory");
+ DBUG_RETURN(innobase_init_abort());
}
/* -------------- All log files ---------------------------*/
@@ -3680,43 +4014,36 @@ mem_free_and_error:
srv_log_group_home_dir = default_path;
}
-#ifdef UNIV_LOG_ARCHIVE
- /* Since innodb_log_arch_dir has no relevance under MySQL,
- starting from 4.0.6 we always set it the same as
- innodb_log_group_home_dir: */
+ os_normalize_path(srv_log_group_home_dir);
- innobase_log_arch_dir = innobase_log_group_home_dir;
+ if (strchr(srv_log_group_home_dir, ';')) {
+ sql_print_error("syntax error in innodb_log_group_home_dir");
+ DBUG_RETURN(innobase_init_abort());
+ }
- srv_arch_dir = innobase_log_arch_dir;
-#endif /* UNIG_LOG_ARCHIVE */
+ if (!innobase_large_prefix) {
+ ib::warn() << deprecated_large_prefix;
+ }
- srv_normalize_path_for_win(srv_log_group_home_dir);
+ if (!THDVAR(NULL, support_xa)) {
+ ib::warn() << deprecated_innodb_support_xa_off;
+ THDVAR(NULL, support_xa) = TRUE;
+ }
- if (strchr(srv_log_group_home_dir, ';')) {
- sql_print_error("syntax error in innodb_log_group_home_dir");
- goto mem_free_and_error;
+ if (innobase_file_format_name != innodb_file_format_default) {
+ ib::warn() << deprecated_file_format;
}
- if (innobase_mirrored_log_groups == 1) {
- sql_print_warning(
- "innodb_mirrored_log_groups is an unimplemented "
- "feature and the variable will be completely "
- "removed in a future version.");
+ if (innodb_instrument_semaphores) {
+ ib::warn() << deprecated_instrument_semaphores;
}
- if (innobase_mirrored_log_groups > 1) {
- sql_print_error(
- "innodb_mirrored_log_groups is an unimplemented feature and "
- "the variable will be completely removed in a future version. "
- "Using values other than 1 is not supported.");
- goto mem_free_and_error;
+ if (srv_use_mtflush) {
+ ib::warn() << deprecated_use_mtflush;
}
- if (innobase_mirrored_log_groups == 0) {
- /* To throw a deprecation warning message when the option is
- passed, the default was changed to '0' (as a workaround). Since
- the only value accepted for this option is '1', reset it to 1 */
- innobase_mirrored_log_groups = 1;
+ if (srv_use_mtflush && srv_mtflush_threads != MTFLUSH_DEFAULT_WORKER) {
+ ib::warn() << deprecated_mtflush_threads;
}
/* Validate the file format by animal name */
@@ -3729,7 +4056,7 @@ mem_free_and_error:
sql_print_error("InnoDB: wrong innodb_file_format.");
- goto mem_free_and_error;
+ DBUG_RETURN(innobase_init_abort());
}
} else {
/* Set it to the default file format id. Though this
@@ -3749,6 +4076,7 @@ mem_free_and_error:
/* Check innobase_file_format_check variable */
if (!innobase_file_format_check) {
+ ib::warn() << deprecated_file_format_check;
/* Set the value to disable checking. */
srv_max_file_format_at_startup = UNIV_FORMAT_MAX + 1;
@@ -3759,20 +4087,24 @@ mem_free_and_error:
srv_max_file_format_at_startup = UNIV_FORMAT_MIN;
}
+ if (innobase_file_format_max != innodb_file_format_max_default) {
+ ib::warn() << deprecated_file_format_max;
+ }
+
/* Did the user specify a format name that we support?
As a side effect it will update the variable
srv_max_file_format_at_startup */
if (innobase_file_format_validate_and_set(
innobase_file_format_max) < 0) {
- sql_print_error("InnoDB: invalid "
- "innodb_file_format_max value: "
- "should be any value up to %s or its "
- "equivalent numeric id",
+ sql_print_error("InnoDB: invalid"
+ " innodb_file_format_max value:"
+ " should be any value up to %s or its"
+ " equivalent numeric id",
trx_sys_file_format_id_to_name(
UNIV_FORMAT_MAX));
- goto mem_free_and_error;
+ DBUG_RETURN(innobase_init_abort());
}
if (innobase_change_buffering) {
@@ -3789,10 +4121,10 @@ mem_free_and_error:
}
}
- sql_print_error("InnoDB: invalid value "
- "innodb_change_buffering=%s",
+ sql_print_error("InnoDB: invalid value"
+ " innodb_change_buffering=%s",
innobase_change_buffering);
- goto mem_free_and_error;
+ DBUG_RETURN(innobase_init_abort());
}
innobase_change_buffering_inited_ok:
@@ -3820,16 +4152,15 @@ innobase_change_buffering_inited_ok:
} else {
/* The user has not set the value. We should
set it based on innodb_io_capacity. */
- srv_max_io_capacity = static_cast<ulong>(
- ut_max(2 * srv_io_capacity, 2000));
+ srv_max_io_capacity =
+ ut_max(2 * srv_io_capacity, 2000UL);
}
} else if (srv_max_io_capacity < srv_io_capacity) {
sql_print_warning("InnoDB: innodb_io_capacity"
" cannot be set higher than"
- " innodb_io_capacity_max.\n"
- "InnoDB: Setting"
- " innodb_io_capacity to %lu\n",
+ " innodb_io_capacity_max."
+ "Setting innodb_io_capacity=%lu",
srv_max_io_capacity);
srv_io_capacity = srv_max_io_capacity;
@@ -3839,126 +4170,82 @@ innobase_change_buffering_inited_ok:
strlen(srv_buf_dump_filename), FALSE)) {
sql_print_error("InnoDB: innodb_buffer_pool_filename"
" cannot have colon (:) in the file name.");
- goto mem_free_and_error;
+ DBUG_RETURN(innobase_init_abort());
}
/* --------------------------------------------------*/
srv_file_flush_method_str = innobase_file_flush_method;
- srv_log_file_size = (ib_uint64_t) innobase_log_file_size;
-
-#ifdef UNIV_LOG_ARCHIVE
- srv_log_archive_on = (ulint) innobase_log_archive;
-#endif /* UNIV_LOG_ARCHIVE */
-
- /* Check that the value of system variable innodb_page_size was
- set correctly. Its value was put into srv_page_size. If valid,
- return the associated srv_page_size_shift.*/
- srv_page_size_shift = innodb_page_size_validate(srv_page_size);
- if (!srv_page_size_shift) {
- sql_print_error("InnoDB: Invalid page size=%lu.\n",
- srv_page_size);
- goto mem_free_and_error;
- }
-
if (UNIV_PAGE_SIZE_DEF != srv_page_size) {
- ib_logf(IB_LOG_LEVEL_INFO,
- " innodb-page-size has been changed"
- " from the default value %d to " ULINTPF ".",
- UNIV_PAGE_SIZE_DEF, srv_page_size);
- }
+ ib::info() << "innodb_page_size=" << srv_page_size;
- srv_log_buffer_size = (ulint) innobase_log_buffer_size;
+ srv_max_undo_log_size = std::max(
+ srv_max_undo_log_size,
+ ulonglong(SRV_UNDO_TABLESPACE_SIZE_IN_PAGES)
+ * srv_page_size);
+ }
- if (innobase_buffer_pool_instances == 0) {
- innobase_buffer_pool_instances = 8;
+ if (srv_log_write_ahead_size > srv_page_size) {
+ srv_log_write_ahead_size = srv_page_size;
+ } else {
+ ulong srv_log_write_ahead_size_tmp = OS_FILE_LOG_BLOCK_SIZE;
-#if defined(__WIN__) && !defined(_WIN64)
- if (innobase_buffer_pool_size > 1331 * 1024 * 1024) {
- innobase_buffer_pool_instances
- = ut_min(MAX_BUFFER_POOLS,
- (long) (innobase_buffer_pool_size
- / (128 * 1024 * 1024)));
+ while (srv_log_write_ahead_size_tmp
+ < srv_log_write_ahead_size) {
+ srv_log_write_ahead_size_tmp
+ = srv_log_write_ahead_size_tmp * 2;
+ }
+ if (srv_log_write_ahead_size_tmp
+ != srv_log_write_ahead_size) {
+ srv_log_write_ahead_size
+ = srv_log_write_ahead_size_tmp / 2;
}
-#endif /* defined(__WIN__) && !defined(_WIN64) */
}
- srv_buf_pool_size = (ulint) innobase_buffer_pool_size;
- srv_buf_pool_instances = (ulint) innobase_buffer_pool_instances;
-
- srv_mem_pool_size = (ulint) innobase_additional_mem_pool_size;
-
- if (innobase_additional_mem_pool_size
- != 8*1024*1024L /* the default */ ) {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Warning: Using "
- "innodb_additional_mem_pool_size is DEPRECATED. "
- "This option may be removed in future releases, "
- "together with the option innodb_use_sys_malloc "
- "and with the InnoDB's internal memory "
- "allocator.\n");
- }
+ srv_log_buffer_size = (ulint) innobase_log_buffer_size;
- if (!srv_use_sys_malloc ) {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Warning: Setting "
- "innodb_use_sys_malloc to FALSE is DEPRECATED. "
- "This option may be removed in future releases, "
- "together with the InnoDB's internal memory "
- "allocator.\n");
- }
+ srv_buf_pool_size = (ulint) innobase_buffer_pool_size;
- srv_n_file_io_threads = (ulint) innobase_file_io_threads;
srv_n_read_io_threads = (ulint) innobase_read_io_threads;
srv_n_write_io_threads = (ulint) innobase_write_io_threads;
srv_use_doublewrite_buf = (ibool) innobase_use_doublewrite;
if (!innobase_use_checksums) {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Warning: Setting "
- "innodb_checksums to OFF is DEPRECATED. "
- "This option may be removed in future releases. "
- "You should set innodb_checksum_algorithm=NONE "
- "instead.\n");
+ ib::warn() << "Setting innodb_checksums to OFF is DEPRECATED."
+ " This option may be removed in future releases. You"
+ " should set innodb_checksum_algorithm=NONE instead.";
srv_checksum_algorithm = SRV_CHECKSUM_ALGORITHM_NONE;
}
-#ifdef HAVE_LARGE_PAGES
- if ((os_use_large_pages = (ibool) my_use_large_pages)) {
- os_large_page_size = (ulint) opt_large_page_size;
- }
-#endif
+ innodb_log_checksums = innodb_log_checksums_func_update(
+ NULL, innodb_log_checksums);
row_rollback_on_timeout = (ibool) innobase_rollback_on_timeout;
srv_locks_unsafe_for_binlog = (ibool) innobase_locks_unsafe_for_binlog;
if (innobase_locks_unsafe_for_binlog) {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Warning: Using "
- "innodb_locks_unsafe_for_binlog is DEPRECATED. "
- "This option may be removed in future releases. "
- "Please use READ COMMITTED transaction isolation "
- "level instead, see " REFMAN "set-transaction.html.\n");
+ ib::warn() << "Using innodb_locks_unsafe_for_binlog is"
+ " DEPRECATED. This option may be removed in future"
+ " releases. Please use READ COMMITTED transaction"
+ " isolation level instead; " << SET_TRANSACTION_MSG;
}
if (innobase_open_files < 10) {
innobase_open_files = 300;
- if (srv_file_per_table && tc_size > 300) {
+ if (srv_file_per_table && tc_size > 300 && tc_size < open_files_limit) {
innobase_open_files = tc_size;
}
}
- if (innobase_open_files > (long) tc_size) {
- fprintf(stderr,
- "innodb_open_files should not be greater"
- " than the open_files_limit.\n");
- innobase_open_files = tc_size;
+ if (innobase_open_files > (long) open_files_limit) {
+ ib::warn() << "innodb_open_files " << innobase_open_files
+ << " should not be greater"
+ << " than the open_files_limit " << open_files_limit;
+ if (innobase_open_files > (long) tc_size) {
+ innobase_open_files = tc_size;
+ }
}
srv_max_n_open_files = (ulint) innobase_open_files;
@@ -3981,50 +4268,36 @@ innobase_change_buffering_inited_ok:
data_mysql_default_charset_coll = (ulint) default_charset_info->number;
- ut_a(DATA_MYSQL_LATIN1_SWEDISH_CHARSET_COLL ==
- my_charset_latin1.number);
- ut_a(DATA_MYSQL_BINARY_CHARSET_COLL == my_charset_bin.number);
-
- /* Store the latin1_swedish_ci character ordering table to InnoDB. For
- non-latin1_swedish_ci charsets we use the MySQL comparison functions,
- and consequently we do not need to know the ordering internally in
- InnoDB. */
-
- srv_latin1_ordering = my_charset_latin1.sort_order;
-
innobase_commit_concurrency_init_default();
-#ifdef HAVE_POSIX_FALLOCATE
- srv_use_posix_fallocate = (ibool) innobase_use_fallocate;
-#endif
- srv_use_atomic_writes = (ibool) innobase_use_atomic_writes;
-
- if (innobase_use_atomic_writes) {
- fprintf(stderr, "InnoDB: using atomic writes.\n");
+ if (innobase_use_fallocate) {
+ ib::warn() << "innodb_use_fallocate is DEPRECATED"
+ " and has no effect in MariaDB 10.2."
+ " It will be removed in MariaDB 10.3.";
+ }
- /* Force doublewrite buffer off, atomic writes replace it. */
- if (srv_use_doublewrite_buf) {
- fprintf(stderr, "InnoDB: Switching off doublewrite buffer "
- "because of atomic writes.\n");
- innobase_use_doublewrite = srv_use_doublewrite_buf = FALSE;
- }
+ srv_use_atomic_writes
+ = innobase_use_atomic_writes && my_may_have_atomic_write;
+ if (srv_use_atomic_writes && !srv_file_per_table)
+ {
+ fprintf(stderr, "InnoDB: Disabling atomic_writes as file_per_table is not used.\n");
+ srv_use_atomic_writes= 0;
+ }
- /* Force O_DIRECT on Unixes (on Windows writes are always unbuffered)*/
+ if (srv_use_atomic_writes) {
+ fprintf(stderr, "InnoDB: using atomic writes.\n");
+ /*
+ Force O_DIRECT on Unixes (on Windows writes are always
+ unbuffered)
+ */
#ifndef _WIN32
- if(!innobase_file_flush_method ||
+ if (!innobase_file_flush_method ||
!strstr(innobase_file_flush_method, "O_DIRECT")) {
innobase_file_flush_method =
srv_file_flush_method_str = (char*)"O_DIRECT";
fprintf(stderr, "InnoDB: using O_DIRECT due to atomic writes.\n");
}
#endif
-#ifdef HAVE_POSIX_FALLOCATE
- /* Due to a bug in directFS, using atomics needs
- * posix_fallocate to extend the file
- * pwrite() past end of the file won't work
- */
- srv_use_posix_fallocate = TRUE;
-#endif
}
#ifdef HAVE_PSI_INTERFACE
@@ -4032,7 +4305,7 @@ innobase_change_buffering_inited_ok:
int count;
count = array_elements(all_pthread_mutexes);
- mysql_mutex_register("innodb", all_pthread_mutexes, count);
+ mysql_mutex_register("innodb", all_pthread_mutexes, count);
# ifdef UNIV_PFS_MUTEX
count = array_elements(all_innodb_mutexes);
@@ -4058,37 +4331,37 @@ innobase_change_buffering_inited_ok:
mysql_cond_register("innodb", all_innodb_conds, count);
#endif /* HAVE_PSI_INTERFACE */
- /* Since we in this module access directly the fields of a trx
- struct, and due to different headers and flags it might happen that
- ib_mutex_t has a different size in this module and in InnoDB
- modules, we check at run time that the size is the same in
- these compilation modules. */
-
err = innobase_start_or_create_for_mysql();
+ innobase_buffer_pool_size = static_cast<long long>(srv_buf_pool_size);
+
if (err != DB_SUCCESS) {
- goto mem_free_and_error;
+ innodb_shutdown();
+ DBUG_RETURN(innobase_init_abort());
+ } else if (!srv_read_only_mode) {
+ mysql_thread_create(thd_destructor_thread_key,
+ &thd_destructor_thread,
+ NULL, thd_destructor_proxy, NULL);
+ while (!my_atomic_loadptr_explicit(reinterpret_cast<void**>
+ (&srv_running),
+ MY_MEMORY_ORDER_RELAXED))
+ os_thread_sleep(20);
}
- /* Adjust the innodb_undo_logs config object */
- innobase_undo_logs_init_default_max();
+ srv_was_started = true;
+ innodb_params_adjust();
innobase_old_blocks_pct = static_cast<uint>(
buf_LRU_old_ratio_update(innobase_old_blocks_pct, TRUE));
- ibuf_max_size_update(innobase_change_buffer_max_size);
+ ibuf_max_size_update(srv_change_buffer_max_size);
- innobase_open_tables = hash_create(200);
- mysql_mutex_init(innobase_share_mutex_key,
- &innobase_share_mutex,
- MY_MUTEX_INIT_FAST);
mysql_mutex_init(commit_cond_mutex_key,
&commit_cond_m, MY_MUTEX_INIT_FAST);
- mysql_cond_init(commit_cond_key, &commit_cond, NULL);
+ mysql_cond_init(commit_cond_key, &commit_cond, 0);
mysql_mutex_init(pending_checkpoint_mutex_key,
&pending_checkpoint_mutex,
MY_MUTEX_INIT_FAST);
- innodb_inited= 1;
#ifdef MYSQL_DYNAMIC_PLUGIN
if (innobase_hton != p) {
innobase_hton = reinterpret_cast<handlerton*>(p);
@@ -4115,9 +4388,30 @@ innobase_change_buffering_inited_ok:
/* Turn on monitor counters that are default on */
srv_mon_default_on();
- DBUG_RETURN(FALSE);
+
+ /* Unit Tests */
+#ifdef UNIV_ENABLE_UNIT_TEST_GET_PARENT_DIR
+ unit_test_os_file_get_parent_dir();
+#endif /* UNIV_ENABLE_UNIT_TEST_GET_PARENT_DIR */
+
+#ifdef UNIV_ENABLE_UNIT_TEST_MAKE_FILEPATH
+ test_make_filepath();
+#endif /*UNIV_ENABLE_UNIT_TEST_MAKE_FILEPATH */
+
+#ifdef UNIV_ENABLE_DICT_STATS_TEST
+ test_dict_stats_all();
+#endif /*UNIV_ENABLE_DICT_STATS_TEST */
+
+#ifdef UNIV_ENABLE_UNIT_TEST_ROW_RAW_FORMAT_INT
+# ifdef HAVE_UT_CHRONO_T
+ test_row_raw_format_int();
+# endif /* HAVE_UT_CHRONO_T */
+#endif /* UNIV_ENABLE_UNIT_TEST_ROW_RAW_FORMAT_INT */
+
+ DBUG_RETURN(0);
+
error:
- DBUG_RETURN(TRUE);
+ DBUG_RETURN(1);
}
/** Shut down the InnoDB storage engine.
@@ -4128,8 +4422,7 @@ innobase_end(handlerton*, ha_panic_function)
{
DBUG_ENTER("innobase_end");
- if (innodb_inited) {
-
+ if (srv_was_started) {
THD *thd= current_thd;
if (thd) { // may be UNINSTALL PLUGIN statement
trx_t* trx = thd_to_trx(thd);
@@ -4138,15 +4431,25 @@ innobase_end(handlerton*, ha_panic_function)
}
}
- srv_fast_shutdown = (ulint) innobase_fast_shutdown;
+ st_my_thread_var* r = reinterpret_cast<st_my_thread_var*>(
+ my_atomic_loadptr_explicit(
+ reinterpret_cast<void**>(&srv_running),
+ MY_MEMORY_ORDER_RELAXED));
+ if (r) {
+ ut_ad(!srv_read_only_mode);
+ if (!abort_loop) {
+ // may be UNINSTALL PLUGIN statement
+ mysql_mutex_lock(r->current_mutex);
+ r->abort = 1;
+ mysql_cond_broadcast(r->current_cond);
+ mysql_mutex_unlock(r->current_mutex);
+ }
+ pthread_join(thd_destructor_thread, NULL);
+ }
- innodb_inited = 0;
- hash_table_free(innobase_open_tables);
- innobase_open_tables = NULL;
innodb_shutdown();
- srv_free_paths_and_sizes();
- my_free(internal_innobase_data_file_path);
- mysql_mutex_destroy(&innobase_share_mutex);
+ innobase_space_shutdown();
+
mysql_mutex_destroy(&commit_cond_m);
mysql_cond_destroy(&commit_cond);
mysql_mutex_destroy(&pending_checkpoint_mutex);
@@ -4155,31 +4458,8 @@ innobase_end(handlerton*, ha_panic_function)
DBUG_RETURN(0);
}
-/****************************************************************//**
-Flushes InnoDB logs to disk and makes a checkpoint. Really, a commit flushes
-the logs, and the name of this function should be innobase_checkpoint.
-@return TRUE if error */
-static
-bool
-innobase_flush_logs(
-/*================*/
- handlerton* hton) /*!< in/out: InnoDB handlerton */
-{
- bool result = 0;
-
- DBUG_ENTER("innobase_flush_logs");
- DBUG_ASSERT(hton == innodb_hton_ptr);
-
- if (!srv_read_only_mode) {
- log_buffer_flush_to_disk();
- }
-
- DBUG_RETURN(result);
-}
-
/*****************************************************************//**
Commits a transaction in an InnoDB database. */
-static
void
innobase_commit_low(
/*================*/
@@ -4204,6 +4484,7 @@ innobase_commit_low(
trx_commit_for_mysql(trx);
}
+ trx->will_lock = 0;
#ifdef WITH_WSREP
if (trx->is_wsrep()) { thd_proc_info(trx->mysql_thd, tmp); }
#endif /* WITH_WSREP */
@@ -4214,36 +4495,29 @@ Creates an InnoDB transaction struct for the thd if it does not yet have one.
Starts a new InnoDB transaction if a transaction is not yet started. And
assigns a new snapshot for a consistent read if the transaction does not yet
have one.
-@return 0 */
+@return 0 */
static
int
innobase_start_trx_and_assign_read_view(
/*====================================*/
- handlerton* hton, /*!< in: Innodb handlerton */
+ handlerton* hton, /*!< in: InnoDB handlerton */
THD* thd) /*!< in: MySQL thread handle of the user for
whom the transaction should be committed */
{
- trx_t* trx;
-
DBUG_ENTER("innobase_start_trx_and_assign_read_view");
DBUG_ASSERT(hton == innodb_hton_ptr);
/* Create a new trx struct for thd, if it does not yet have one */
- trx = check_trx_exists(thd);
-
- /* This is just to play safe: release a possible FIFO ticket and
- search latch. Since we can potentially reserve the trx_sys->mutex,
- we have to release the search system latch first to obey the latching
- order. */
-
- trx_search_latch_release_if_reserved(trx);
+ trx_t* trx = check_trx_exists(thd);
innobase_srv_conc_force_exit_innodb(trx);
- /* If the transaction is not started yet, start it */
+ /* The transaction should not be active yet, start it */
+
+ ut_ad(!trx_is_started(trx));
- trx_start_if_not_started_xa(trx);
+ trx_start_if_not_started_xa(trx, false);
/* Assign a read view if the transaction does not have it yet.
Do this only if transaction is using REPEATABLE READ isolation
@@ -4256,10 +4530,10 @@ innobase_start_trx_and_assign_read_view(
} else {
push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
HA_ERR_UNSUPPORTED,
- "InnoDB: WITH CONSISTENT SNAPSHOT "
- "was ignored because this phrase "
- "can only be used with "
- "REPEATABLE READ isolation level.");
+ "InnoDB: WITH CONSISTENT SNAPSHOT"
+ " was ignored because this phrase"
+ " can only be used with"
+ " REPEATABLE READ isolation level.");
}
/* Set the MySQL flag to mark that there is an active transaction */
@@ -4272,48 +4546,75 @@ innobase_start_trx_and_assign_read_view(
static
void
innobase_commit_ordered_2(
-/*============*/
+/*======================*/
trx_t* trx, /*!< in: Innodb transaction */
THD* thd) /*!< in: MySQL thread handle */
{
DBUG_ENTER("innobase_commit_ordered_2");
- /* We need current binlog position for mysqlbackup to work.
- Note, the position is current because commit_ordered is guaranteed
- to be called in same sequenece as writing to binlog. */
+ bool read_only = trx->read_only || trx->id == 0;
-retry:
- if (innobase_commit_concurrency > 0) {
- mysql_mutex_lock(&commit_cond_m);
- commit_threads++;
+ if (!read_only) {
+
+ while (innobase_commit_concurrency > 0) {
+
+ mysql_mutex_lock(&commit_cond_m);
+
+ ++commit_threads;
+
+ if (commit_threads
+ <= innobase_commit_concurrency) {
+
+ mysql_mutex_unlock(&commit_cond_m);
+ break;
+ }
+
+ --commit_threads;
+
+ mysql_cond_wait(&commit_cond, &commit_cond_m);
- if (commit_threads > innobase_commit_concurrency) {
- commit_threads--;
- mysql_cond_wait(&commit_cond,
- &commit_cond_m);
- mysql_mutex_unlock(&commit_cond_m);
- goto retry;
- }
- else {
mysql_mutex_unlock(&commit_cond_m);
}
+
+ /* The following call reads the binary log position of
+ the transaction being committed.
+
+ Binary logging of other engines is not relevant to
+ InnoDB as all InnoDB requires is that committing
+ InnoDB transactions appear in the same order in the
+ MySQL binary log as they appear in InnoDB logs, which
+ is guaranteed by the server.
+
+ If the binary log is not enabled, or the transaction
+ is not written to the binary log, the file name will
+ be a NULL pointer. */
+ ulonglong pos;
+
+ thd_binlog_pos(thd, &trx->mysql_log_file_name, &pos);
+
+ trx->mysql_log_offset = static_cast<int64_t>(pos);
+
+ /* Don't do write + flush right now. For group commit
+ to work we want to do the flush later. */
+ trx->flush_log_later = true;
}
- unsigned long long pos;
- thd_binlog_pos(thd, &trx->mysql_log_file_name, &pos);
- trx->mysql_log_offset= static_cast<ib_int64_t>(pos);
- /* Don't do write + flush right now. For group commit
- to work we want to do the flush in the innobase_commit()
- method, which runs without holding any locks. */
- trx->flush_log_later = TRUE;
innobase_commit_low(trx);
- trx->flush_log_later = FALSE;
- if (innobase_commit_concurrency > 0) {
- mysql_mutex_lock(&commit_cond_m);
- commit_threads--;
- mysql_cond_signal(&commit_cond);
- mysql_mutex_unlock(&commit_cond_m);
+ if (!read_only) {
+ trx->flush_log_later = false;
+
+ if (innobase_commit_concurrency > 0) {
+
+ mysql_mutex_lock(&commit_cond_m);
+
+ ut_ad(commit_threads > 0);
+ --commit_threads;
+
+ mysql_cond_signal(&commit_cond);
+
+ mysql_mutex_unlock(&commit_cond_m);
+ }
}
DBUG_VOID_RETURN;
@@ -4334,7 +4635,7 @@ the one handling the rest of the transaction. */
static
void
innobase_commit_ordered(
-/*============*/
+/*====================*/
handlerton *hton, /*!< in: Innodb handlerton */
THD* thd, /*!< in: MySQL thread handle of the user for whom
the transaction should be committed */
@@ -4347,12 +4648,6 @@ innobase_commit_ordered(
trx = check_trx_exists(thd);
- /* Since we will reserve the kernel mutex, we must not be holding the
- search system latch, or we will disobey the latching order. But we
- already released it in innobase_xa_prepare() (if not before), so just
- have an assert here.*/
- ut_ad(!trx->has_search_latch);
-
if (!trx_is_registered_for_2pc(trx) && trx_is_started(trx)) {
/* We cannot throw error here; instead we will catch this error
again in innobase_commit() and report it from there. */
@@ -4366,7 +4661,7 @@ innobase_commit_ordered(
innobase_commit_ordered_2(trx, thd);
- trx_set_active_commit_ordered(trx);
+ trx_set_active_commit_ordered(trx);
DBUG_VOID_RETURN;
}
@@ -4374,12 +4669,13 @@ innobase_commit_ordered(
/*****************************************************************//**
Commits a transaction in an InnoDB database or marks an SQL statement
ended.
-@return 0 */
+@return 0 or deadlock error if the transaction was aborted by another
+ higher priority transaction. */
static
int
innobase_commit(
/*============*/
- handlerton* hton, /*!< in: Innodb handlerton */
+ handlerton* hton, /*!< in: InnoDB handlerton */
THD* thd, /*!< in: MySQL thread handle of the
user for whom the transaction should
be committed */
@@ -4387,20 +4683,15 @@ innobase_commit(
false - the current SQL statement
ended */
{
- trx_t* trx;
-
DBUG_ENTER("innobase_commit");
+ DBUG_PRINT("enter", ("commit_trx: %d", commit_trx));
DBUG_ASSERT(hton == innodb_hton_ptr);
DBUG_PRINT("trans", ("ending transaction"));
- trx = check_trx_exists(thd);
-
- /* Since we will reserve the trx_sys->mutex, we have to release
- the search system latch first to obey the latching order. */
+ trx_t* trx = check_trx_exists(thd);
- if (trx->has_search_latch && !trx_is_active_commit_ordered(trx)) {
- trx_search_latch_release_if_reserved(trx);
- }
+ ut_ad(trx->dict_operation_lock_mode == 0);
+ ut_ad(trx->dict_operation == TRX_DICT_OP_NONE);
/* Transaction is deregistered only in a commit or a rollback. If
it is deregistered we know there cannot be resources to be freed
@@ -4409,16 +4700,23 @@ innobase_commit(
if (!trx_is_registered_for_2pc(trx) && trx_is_started(trx)) {
- sql_print_error("Transaction not registered for MySQL 2PC, "
- "but transaction is active");
+ sql_print_error("Transaction not registered for MariaDB 2PC,"
+ " but transaction is active");
}
+ bool read_only = trx->read_only || trx->id == 0;
+ DBUG_PRINT("info", ("readonly: %d", read_only));
+
if (commit_trx
|| (!thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN))) {
+ DBUG_EXECUTE_IF("crash_innodb_before_commit",
+ DBUG_SUICIDE(););
+
/* Run the fast part of commit if we did not already. */
if (!trx_is_active_commit_ordered(trx)) {
innobase_commit_ordered_2(trx, thd);
+
}
/* We were instructed to commit the whole transaction, or
@@ -4429,10 +4727,10 @@ innobase_commit(
this one, to allow then to group commit with us. */
thd_wakeup_subsequent_commits(thd, 0);
- /* We did the first part already in innobase_commit_ordered(),
- Now finish by doing a write + flush of logs. */
+ /* Now do a write + flush of logs. */
trx_commit_complete_for_mysql(trx);
- trx_deregister_from_2pc(trx);
+
+ trx_deregister_from_2pc(trx);
} else {
/* We just mark the SQL statement ended and do not do a
transaction commit */
@@ -4440,7 +4738,9 @@ innobase_commit(
/* If we had reserved the auto-inc lock for some
table in this SQL statement we release it now */
- lock_unlock_table_autoinc(trx);
+ if (!read_only) {
+ lock_unlock_table_autoinc(trx);
+ }
/* Store the current undo_no of the transaction so that we
know where to roll back if we have to roll back the next
@@ -4449,7 +4749,8 @@ innobase_commit(
trx_mark_sql_stat_end(trx);
}
- trx->n_autoinc_rows = 0; /* Reset the number AUTO-INC rows required */
+ /* Reset the number AUTO-INC rows required */
+ trx->n_autoinc_rows = 0;
/* This is a statement level variable. */
trx->fts_next_doc_id = 0;
@@ -4461,12 +4762,12 @@ innobase_commit(
/*****************************************************************//**
Rolls back a transaction or the latest SQL statement.
-@return 0 or error number */
+@return 0 or error number */
static
int
innobase_rollback(
/*==============*/
- handlerton* hton, /*!< in: Innodb handlerton */
+ handlerton* hton, /*!< in: InnoDB handlerton */
THD* thd, /*!< in: handle to the MySQL thread
of the user whose transaction should
be rolled back */
@@ -4474,79 +4775,72 @@ innobase_rollback(
transaction FALSE - rollback the current
statement only */
{
- dberr_t error;
- trx_t* trx;
-
DBUG_ENTER("innobase_rollback");
DBUG_ASSERT(hton == innodb_hton_ptr);
DBUG_PRINT("trans", ("aborting transaction"));
- trx = check_trx_exists(thd);
+ trx_t* trx = check_trx_exists(thd);
- /* Release a possible FIFO ticket and search latch. Since we will
- reserve the trx_sys->mutex, we have to release the search system
- latch first to obey the latching order. */
-
- trx_search_latch_release_if_reserved(trx);
+ ut_ad(trx->dict_operation_lock_mode == 0);
+ ut_ad(trx->dict_operation == TRX_DICT_OP_NONE);
innobase_srv_conc_force_exit_innodb(trx);
- trx->n_autoinc_rows = 0; /* Reset the number AUTO-INC rows required */
+ /* Reset the number AUTO-INC rows required */
+
+ trx->n_autoinc_rows = 0;
/* If we had reserved the auto-inc lock for some table (if
we come here to roll back the latest SQL statement) we
release it now before a possibly lengthy rollback */
-
lock_unlock_table_autoinc(trx);
/* This is a statement level variable. */
+
trx->fts_next_doc_id = 0;
+ dberr_t error;
+
if (rollback_trx
|| !thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)) {
error = trx_rollback_for_mysql(trx);
+
trx_deregister_from_2pc(trx);
} else {
+
error = trx_rollback_last_sql_stat_for_mysql(trx);
}
- DBUG_RETURN(convert_error_code_to_mysql(error, 0, NULL));
+ DBUG_RETURN(convert_error_code_to_mysql(error, 0, trx->mysql_thd));
}
/*****************************************************************//**
Rolls back a transaction
-@return 0 or error number */
+@return 0 or error number */
static
int
innobase_rollback_trx(
/*==================*/
trx_t* trx) /*!< in: transaction */
{
- dberr_t error = DB_SUCCESS;
-
DBUG_ENTER("innobase_rollback_trx");
DBUG_PRINT("trans", ("aborting transaction"));
- /* Release a possible FIFO ticket and search latch. Since we will
- reserve the trx_sys->mutex, we have to release the search system
- latch first to obey the latching order. */
-
- trx_search_latch_release_if_reserved(trx);
-
innobase_srv_conc_force_exit_innodb(trx);
/* If we had reserved the auto-inc lock for some table (if
we come here to roll back the latest SQL statement) we
release it now before a possibly lengthy rollback */
-
lock_unlock_table_autoinc(trx);
- if (!trx->read_only) {
- error = trx_rollback_for_mysql(trx);
+ if (!trx->has_logged()) {
+ trx->will_lock = 0;
+ DBUG_RETURN(0);
}
- DBUG_RETURN(convert_error_code_to_mysql(error, 0, NULL));
+ DBUG_RETURN(convert_error_code_to_mysql(trx_rollback_for_mysql(trx),
+ 0, trx->mysql_thd));
}
@@ -4625,7 +4919,7 @@ checkpoint when necessary.*/
UNIV_INTERN
void
innobase_mysql_log_notify(
-/*===============*/
+/*======================*/
ib_uint64_t write_lsn, /*!< in: LSN written to log file */
ib_uint64_t flush_lsn) /*!< in: LSN flushed to disk */
{
@@ -4698,35 +4992,29 @@ static
int
innobase_rollback_to_savepoint(
/*===========================*/
- handlerton* hton, /*!< in: Innodb handlerton */
+ handlerton* hton, /*!< in: InnoDB handlerton */
THD* thd, /*!< in: handle to the MySQL thread
of the user whose transaction should
be rolled back to savepoint */
void* savepoint) /*!< in: savepoint data */
{
- ib_int64_t mysql_binlog_cache_pos;
- dberr_t error;
- trx_t* trx;
- char name[64];
DBUG_ENTER("innobase_rollback_to_savepoint");
DBUG_ASSERT(hton == innodb_hton_ptr);
- trx = check_trx_exists(thd);
-
- /* Release a possible FIFO ticket and search latch. Since we will
- reserve the trx_sys->mutex, we have to release the search system
- latch first to obey the latching order. */
-
- trx_search_latch_release_if_reserved(trx);
+ trx_t* trx = check_trx_exists(thd);
innobase_srv_conc_force_exit_innodb(trx);
/* TODO: use provided savepoint data area to store savepoint data */
+ char name[64];
+
longlong2str((ulint) savepoint, name, 36);
- error = trx_rollback_to_savepoint_for_mysql(
+ int64_t mysql_binlog_cache_pos;
+
+ dberr_t error = trx_rollback_to_savepoint_for_mysql(
trx, name, &mysql_binlog_cache_pos);
if (error == DB_SUCCESS && trx->fts_trx != NULL) {
@@ -4751,17 +5039,15 @@ innobase_rollback_to_savepoint_can_release_mdl(
of the user whose transaction should
be rolled back to savepoint */
{
- trx_t* trx;
-
DBUG_ENTER("innobase_rollback_to_savepoint_can_release_mdl");
DBUG_ASSERT(hton == innodb_hton_ptr);
- trx = check_trx_exists(thd);
- ut_ad(trx);
+ trx_t* trx = check_trx_exists(thd);
+
+ /* If transaction has not acquired any locks then it is safe
+ to release MDL after rollback to savepoint */
+ if (UT_LIST_GET_LEN(trx->lock.trx_locks) == 0) {
- /* If transaction has not acquired any locks then it is safe
- to release MDL after rollback to savepoint */
- if (!(UT_LIST_GET_LEN(trx->lock.trx_locks))) {
DBUG_RETURN(true);
}
@@ -4776,7 +5062,7 @@ static
int
innobase_release_savepoint(
/*=======================*/
- handlerton* hton, /*!< in: handlerton for Innodb */
+ handlerton* hton, /*!< in: handlerton for InnoDB */
THD* thd, /*!< in: handle to the MySQL thread
of the user whose transaction's
savepoint should be released */
@@ -4791,10 +5077,6 @@ innobase_release_savepoint(
trx = check_trx_exists(thd);
- if (trx->state == TRX_STATE_NOT_STARTED) {
- trx_start_if_not_started(trx);
- }
-
/* TODO: use provided savepoint data area to store savepoint data */
longlong2str((ulint) savepoint, name, 36);
@@ -4810,18 +5092,15 @@ innobase_release_savepoint(
/*****************************************************************//**
Sets a transaction savepoint.
-@return always 0, that is, always succeeds */
+@return always 0, that is, always succeeds */
static
int
innobase_savepoint(
/*===============*/
- handlerton* hton, /*!< in: handle to the Innodb handlerton */
- THD* thd, /*!< in: handle to the MySQL thread */
- void* savepoint) /*!< in: savepoint data */
+ handlerton* hton, /*!< in: handle to the InnoDB handlerton */
+ THD* thd, /*!< in: handle to the MySQL thread */
+ void* savepoint)/*!< in: savepoint data */
{
- dberr_t error;
- trx_t* trx;
-
DBUG_ENTER("innobase_savepoint");
DBUG_ASSERT(hton == innodb_hton_ptr);
@@ -4829,13 +5108,7 @@ innobase_savepoint(
(unless we are in sub-statement), so SQL layer ensures that
this method is never called in such situation. */
- trx = check_trx_exists(thd);
-
- /* Release a possible FIFO ticket and search latch. Since we will
- reserve the trx_sys->mutex, we have to release the search system
- latch first to obey the latching order. */
-
- trx_search_latch_release_if_reserved(trx);
+ trx_t* trx = check_trx_exists(thd);
innobase_srv_conc_force_exit_innodb(trx);
@@ -4843,10 +5116,11 @@ innobase_savepoint(
DBUG_ASSERT(trx_is_registered_for_2pc(trx));
/* TODO: use provided savepoint data area to store savepoint data */
- char name[64];
+ char name[64];
+
longlong2str((ulint) savepoint,name,36);
- error = trx_savepoint_for_mysql(trx, name, (ib_int64_t)0);
+ dberr_t error = trx_savepoint_for_mysql(trx, name, 0);
if (error == DB_SUCCESS && trx->fts_trx != NULL) {
fts_savepoint_take(trx, trx->fts_trx, name);
@@ -4857,7 +5131,7 @@ innobase_savepoint(
/*****************************************************************//**
Frees a possible InnoDB trx object associated with the current THD.
-@return 0 or error number */
+@return 0 or error number */
static
int
innobase_close_connection(
@@ -4866,53 +5140,63 @@ innobase_close_connection(
THD* thd) /*!< in: handle to the MySQL thread of the user
whose resources should be free'd */
{
- trx_t* trx;
DBUG_ENTER("innobase_close_connection");
DBUG_ASSERT(hton == innodb_hton_ptr);
- trx = thd_to_trx(thd);
-
- ut_a(trx);
-
- if (!trx_is_registered_for_2pc(trx) && trx_is_started(trx)) {
- sql_print_error("Transaction not registered for MySQL 2PC, "
- "but transaction is active");
- }
+ trx_t* trx = thd_to_trx(thd);
- if (trx_is_started(trx) && global_system_variables.log_warnings) {
+ /* During server initialization MySQL layer will try to open
+ some of the master-slave tables those residing in InnoDB.
+ After MySQL layer is done with needed checks these tables
+ are closed followed by invocation of close_connection on the
+ associated thd.
- sql_print_warning(
- "MySQL is closing a connection that has an active "
- "InnoDB transaction. " TRX_ID_FMT " row modifications "
- "will roll back.",
- trx->undo_no);
- }
+ close_connection rolls back the trx and then frees it.
+ Once trx is freed thd should avoid maintaining reference to
+ it else it can be classified as stale reference.
- innobase_rollback_trx(trx);
+ Re-invocation of innodb_close_connection on same thd should
+ get trx as NULL. */
- trx_free_for_mysql(trx);
+ if (trx) {
- DBUG_RETURN(0);
-}
+ if (!trx_is_registered_for_2pc(trx) && trx_is_started(trx)) {
-/*****************************************************************//**
-Frees a possible InnoDB trx object associated with the current THD.
-@return 0 or error number */
-UNIV_INTERN
-int
-innobase_close_thd(
-/*===============*/
- THD* thd) /*!< in: handle to the MySQL thread of the user
- whose resources should be free'd */
-{
- trx_t* trx = thd_to_trx(thd);
+ sql_print_error("Transaction not registered for MariaDB 2PC, "
+ "but transaction is active");
+ }
- if (!trx) {
- return(0);
+ /* Disconnect causes rollback in the following cases:
+ - trx is not started, or
+ - trx is in *not* in PREPARED state, or
+ - trx has not updated any persistent data.
+ TODO/FIXME: it does not make sense to initiate rollback
+ in the 1st and 3rd case. */
+ if (trx_is_started(trx)) {
+ if (trx_state_eq(trx, TRX_STATE_PREPARED)) {
+ if (trx->has_logged_persistent()) {
+ trx_disconnect_prepared(trx);
+ } else {
+ trx_deregister_from_2pc(trx);
+ goto rollback_and_free;
+ }
+ } else {
+ sql_print_warning(
+ "MariaDB is closing a connection that has an active "
+ "InnoDB transaction. " TRX_ID_FMT " row modifications "
+ "will roll back.",
+ trx->undo_no);
+ goto rollback_and_free;
+ }
+ } else {
+rollback_and_free:
+ innobase_rollback_trx(trx);
+ trx_free_for_mysql(trx);
+ }
}
- return(innobase_close_connection(innodb_hton_ptr, thd));
+ DBUG_RETURN(0);
}
UNIV_INTERN void lock_cancel_waiting_and_release(lock_t* lock);
@@ -4936,35 +5220,8 @@ static void innobase_kill_query(handlerton*, THD* thd, enum thd_kill_levels)
if (trx_t* trx = thd_to_trx(thd)) {
ut_ad(trx->mysql_thd == thd);
-
- switch (trx->abort_type) {
-#ifdef WITH_WSREP
- case TRX_WSREP_ABORT:
- break;
-#endif
- case TRX_SERVER_ABORT:
- if (!wsrep_thd_is_BF(trx->mysql_thd, FALSE)) {
- lock_mutex_enter();
- }
- /* fall through */
- case TRX_REPLICATION_ABORT:
- trx_mutex_enter(trx);
- }
/* Cancel a pending lock request if there are any */
lock_trx_handle_wait(trx);
- switch (trx->abort_type) {
-#ifdef WITH_WSREP
- case TRX_WSREP_ABORT:
- break;
-#endif
- case TRX_SERVER_ABORT:
- if (!wsrep_thd_is_BF(trx->mysql_thd, FALSE)) {
- lock_mutex_exit();
- }
- /* fall through */
- case TRX_REPLICATION_ABORT:
- trx_mutex_exit(trx);
- }
}
DBUG_VOID_RETURN;
@@ -4975,17 +5232,15 @@ static void innobase_kill_query(handlerton*, THD* thd, enum thd_kill_levels)
** InnoDB database tables
*****************************************************************************/
-/****************************************************************//**
-Get the record format from the data dictionary.
+/** Get the record format from the data dictionary.
@return one of ROW_TYPE_REDUNDANT, ROW_TYPE_COMPACT,
ROW_TYPE_COMPRESSED, ROW_TYPE_DYNAMIC */
-UNIV_INTERN
+
enum row_type
ha_innobase::get_row_type() const
-/*=============================*/
{
- if (prebuilt && prebuilt->table) {
- const ulint flags = prebuilt->table->flags;
+ if (m_prebuilt && m_prebuilt->table) {
+ const ulint flags = m_prebuilt->table->flags;
switch (dict_tf_get_rec_format(flags)) {
case REC_FORMAT_REDUNDANT:
@@ -5002,31 +5257,33 @@ ha_innobase::get_row_type() const
return(ROW_TYPE_NOT_USED);
}
-
-
/****************************************************************//**
Get the table flags to use for the statement.
-@return table flags */
-UNIV_INTERN
+@return table flags */
+
handler::Table_flags
ha_innobase::table_flags() const
/*============================*/
{
+ THD* thd = ha_thd();
+ handler::Table_flags flags = m_int_table_flags;
+
/* Need to use tx_isolation here since table flags is (also)
called before prebuilt is inited. */
- ulong const tx_isolation = thd_tx_isolation(ha_thd());
+
+ ulong const tx_isolation = thd_tx_isolation(thd);
if (tx_isolation <= ISO_READ_COMMITTED) {
- return(int_table_flags);
+ return(flags);
}
- return(int_table_flags | HA_BINLOG_STMT_CAPABLE);
+ return(flags | HA_BINLOG_STMT_CAPABLE);
}
/****************************************************************//**
Returns the table type (storage engine name).
-@return table type */
-UNIV_INTERN
+@return table type */
+
const char*
ha_innobase::table_type() const
/*===========================*/
@@ -5037,7 +5294,7 @@ ha_innobase::table_type() const
/****************************************************************//**
Returns the index type.
@return index type */
-UNIV_INTERN
+
const char*
ha_innobase::index_type(
/*====================*/
@@ -5047,6 +5304,8 @@ ha_innobase::index_type(
if (index && index->type & DICT_FTS) {
return("FULLTEXT");
+ } else if (dict_index_is_spatial(index)) {
+ return("SPATIAL");
} else {
return("BTREE");
}
@@ -5054,8 +5313,8 @@ ha_innobase::index_type(
/****************************************************************//**
Returns the table file name extension.
-@return file extension string */
-UNIV_INTERN
+@return file extension string */
+
const char**
ha_innobase::bas_ext() const
/*========================*/
@@ -5065,8 +5324,8 @@ ha_innobase::bas_ext() const
/****************************************************************//**
Returns the operations supported for indexes.
-@return flags of supported operations */
-UNIV_INTERN
+@return flags of supported operations */
+
ulong
ha_innobase::index_flags(
/*=====================*/
@@ -5074,20 +5333,35 @@ ha_innobase::index_flags(
uint,
bool) const
{
- ulong extra_flag= 0;
- if (table && key == table->s->primary_key)
- extra_flag= HA_CLUSTERED_INDEX;
- return((table_share->key_info[key].algorithm == HA_KEY_ALG_FULLTEXT)
- ? 0
- : (HA_READ_NEXT | HA_READ_PREV | HA_READ_ORDER
- | HA_READ_RANGE | HA_KEYREAD_ONLY | extra_flag
- | HA_DO_INDEX_COND_PUSHDOWN));
+ if (table_share->key_info[key].algorithm == HA_KEY_ALG_FULLTEXT) {
+ return(0);
+ }
+
+ ulong extra_flag= 0;
+
+ if (table && key == table->s->primary_key) {
+ extra_flag= HA_CLUSTERED_INDEX;
+ }
+
+ ulong flags = HA_READ_NEXT | HA_READ_PREV | HA_READ_ORDER
+ | HA_READ_RANGE | HA_KEYREAD_ONLY
+ | extra_flag
+ | HA_DO_INDEX_COND_PUSHDOWN;
+
+ /* For spatial index, we don't support descending scan
+ and ICP so far. */
+ if (table_share->key_info[key].flags & HA_SPATIAL) {
+ flags = HA_READ_NEXT | HA_READ_ORDER| HA_READ_RANGE
+ | HA_KEYREAD_ONLY | HA_KEY_SCAN_NOT_ROR;
+ }
+
+ return(flags);
}
/****************************************************************//**
Returns the maximum number of keys.
-@return MAX_KEY */
-UNIV_INTERN
+@return MAX_KEY */
+
uint
ha_innobase::max_supported_keys() const
/*===================================*/
@@ -5097,8 +5371,8 @@ ha_innobase::max_supported_keys() const
/****************************************************************//**
Returns the maximum key length.
-@return maximum supported key length, in bytes */
-UNIV_INTERN
+@return maximum supported key length, in bytes */
+
uint
ha_innobase::max_supported_key_length() const
/*=========================================*/
@@ -5120,22 +5394,23 @@ ha_innobase::max_supported_key_length() const
switch (UNIV_PAGE_SIZE) {
case 4096:
- return(768);
+ /* Hack: allow mysql.innodb_index_stats to be created. */
+ /* FIXME: rewrite this API, and in sql_table.cc consider
+ that in index-organized tables (such as InnoDB), secondary
+ index records will be padded with the PRIMARY KEY, instead
+ of some short ROWID or record heap address. */
+ return(1173);
case 8192:
return(1536);
default:
-#ifdef WITH_WSREP
return(3500);
-#else
- return(3500);
-#endif
}
}
/****************************************************************//**
Returns the key map of keys that are usable for scanning.
-@return key_map_full */
-UNIV_INTERN
+@return key_map_full */
+
const key_map*
ha_innobase::keys_to_use_for_scanning()
/*===================================*/
@@ -5144,9 +5419,39 @@ ha_innobase::keys_to_use_for_scanning()
}
/****************************************************************//**
+Ensures that if there's a concurrent inplace ADD INDEX, being-indexed virtual
+columns are computed. They are not marked as indexed in the old table, so the
+server won't add them to the vcol_set automatically */
+void
+ha_innobase::column_bitmaps_signal()
+/*================================*/
+{
+ if (!table->vfield || table->current_lock != F_WRLCK) {
+ return;
+ }
+
+ dict_index_t* clust_index = dict_table_get_first_index(m_prebuilt->table);
+ uint num_v = 0;
+ for (uint j = 0; j < table->s->virtual_fields; j++) {
+ if (table->vfield[j]->stored_in_db()) {
+ continue;
+ }
+
+ dict_col_t* col = &m_prebuilt->table->v_cols[num_v].m_col;
+ if (col->ord_part ||
+ (dict_index_is_online_ddl(clust_index) &&
+ row_log_col_is_indexed(clust_index, num_v))) {
+ table->mark_virtual_col(table->vfield[j]);
+ }
+ num_v++;
+ }
+}
+
+
+/****************************************************************//**
Determines if table caching is supported.
-@return HA_CACHE_TBL_ASKTRANSACT */
-UNIV_INTERN
+@return HA_CACHE_TBL_ASKTRANSACT */
+
uint8
ha_innobase::table_cache_type()
/*===========================*/
@@ -5156,8 +5461,8 @@ ha_innobase::table_cache_type()
/****************************************************************//**
Determines if the primary key is clustered index.
-@return true */
-UNIV_INTERN
+@return true */
+
bool
ha_innobase::primary_key_is_clustered()
/*===================================*/
@@ -5165,19 +5470,22 @@ ha_innobase::primary_key_is_clustered()
return(true);
}
-/*****************************************************************//**
-Normalizes a table name string. A normalized name consists of the
-database name catenated to '/' and table name. Example: test/mytable.
-On Windows normalization puts both the database name and the
-table name always to lower case if "set_lower_case" is set to TRUE. */
+/** Normalizes a table name string.
+A normalized name consists of the database name catenated to '/'
+and table name. For example: test/mytable.
+On Windows, normalization puts both the database name and the
+table name always to lower case if "set_lower_case" is set to TRUE.
+@param[out] norm_name Normalized name, null-terminated.
+@param[in] name Name to normalize.
+@param[in] set_lower_case True if we also should fold to lower case. */
void
-normalize_table_name_low(
-/*=====================*/
- char* norm_name, /*!< out: normalized name as a
+normalize_table_name_c_low(
+/*=======================*/
+ char* norm_name, /* out: normalized name as a
null-terminated string */
- const char* name, /*!< in: table name string */
- ibool set_lower_case) /*!< in: TRUE if we want to set name
- to lower case */
+ const char* name, /* in: table name string */
+ ibool set_lower_case) /* in: TRUE if we want to set
+ name to lower case */
{
char* name_ptr;
ulint name_len;
@@ -5230,6 +5538,42 @@ normalize_table_name_low(
}
}
+create_table_info_t::create_table_info_t(
+ THD* thd,
+ const TABLE* form,
+ HA_CREATE_INFO* create_info,
+ char* table_name,
+ char* remote_path,
+ bool file_per_table,
+ trx_t* trx)
+ : m_thd(thd),
+ m_trx(trx),
+ m_form(form),
+ m_default_row_format(innodb_default_row_format),
+ m_create_info(create_info),
+ m_table_name(table_name), m_drop_before_rollback(false),
+ m_remote_path(remote_path),
+ m_innodb_file_per_table(file_per_table)
+{
+}
+
+/** Normalizes a table name string.
+A normalized name consists of the database name catenated to '/'
+and table name. For example: test/mytable.
+On Windows, normalization puts both the database name and the
+table name always to lower case if "set_lower_case" is set to TRUE.
+@param[out] norm_name Normalized name, null-terminated.
+@param[in] name Name to normalize.
+@param[in] set_lower_case True if we also should fold to lower case. */
+void
+create_table_info_t::normalize_table_name_low(
+ char* norm_name,
+ const char* name,
+ ibool set_lower_case)
+{
+ normalize_table_name_c_low(norm_name, name, set_lower_case);
+}
+
#if !defined(DBUG_OFF)
/*********************************************************************
Test normalize_table_name_low(). */
@@ -5280,11 +5624,12 @@ test_normalize_table_name_low()
};
for (size_t i = 0; i < UT_ARR_SIZE(test_data); i++) {
- printf("test_normalize_table_name_low(): "
- "testing \"%s\", expected \"%s\"... ",
+ printf("test_normalize_table_name_low():"
+ " testing \"%s\", expected \"%s\"... ",
test_data[i][0], test_data[i][1]);
- normalize_table_name_low(norm_name, test_data[i][0], FALSE);
+ create_table_info_t::normalize_table_name_low(
+ norm_name, test_data[i][0], FALSE);
if (strcmp(norm_name, test_data[i][1]) == 0) {
printf("ok\n");
@@ -5306,30 +5651,27 @@ test_ut_format_name()
struct {
const char* name;
- ibool is_table;
ulint buf_size;
const char* expected;
} test_data[] = {
- {"test/t1", TRUE, sizeof(buf), "\"test\".\"t1\""},
- {"test/t1", TRUE, 12, "\"test\".\"t1\""},
- {"test/t1", TRUE, 11, "\"test\".\"t1"},
- {"test/t1", TRUE, 10, "\"test\".\"t"},
- {"test/t1", TRUE, 9, "\"test\".\""},
- {"test/t1", TRUE, 8, "\"test\"."},
- {"test/t1", TRUE, 7, "\"test\""},
- {"test/t1", TRUE, 6, "\"test"},
- {"test/t1", TRUE, 5, "\"tes"},
- {"test/t1", TRUE, 4, "\"te"},
- {"test/t1", TRUE, 3, "\"t"},
- {"test/t1", TRUE, 2, "\""},
- {"test/t1", TRUE, 1, ""},
- {"test/t1", TRUE, 0, "BUF_NOT_CHANGED"},
- {"table", TRUE, sizeof(buf), "\"table\""},
- {"ta'le", TRUE, sizeof(buf), "\"ta'le\""},
- {"ta\"le", TRUE, sizeof(buf), "\"ta\"\"le\""},
- {"ta`le", TRUE, sizeof(buf), "\"ta`le\""},
- {"index", FALSE, sizeof(buf), "\"index\""},
- {"ind/ex", FALSE, sizeof(buf), "\"ind/ex\""},
+ {"test/t1", sizeof(buf), "`test`.`t1`"},
+ {"test/t1", 12, "`test`.`t1`"},
+ {"test/t1", 11, "`test`.`t1"},
+ {"test/t1", 10, "`test`.`t"},
+ {"test/t1", 9, "`test`.`"},
+ {"test/t1", 8, "`test`."},
+ {"test/t1", 7, "`test`"},
+ {"test/t1", 6, "`test"},
+ {"test/t1", 5, "`tes"},
+ {"test/t1", 4, "`te"},
+ {"test/t1", 3, "`t"},
+ {"test/t1", 2, "`"},
+ {"test/t1", 1, ""},
+ {"test/t1", 0, "BUF_NOT_CHANGED"},
+ {"table", sizeof(buf), "`table`"},
+ {"ta'le", sizeof(buf), "`ta'le`"},
+ {"ta\"le", sizeof(buf), "`ta\"le`"},
+ {"ta`le", sizeof(buf), "`ta``le`"},
};
for (size_t i = 0; i < UT_ARR_SIZE(test_data); i++) {
@@ -5339,109 +5681,38 @@ test_ut_format_name()
char* ret;
ret = ut_format_name(test_data[i].name,
- test_data[i].is_table,
buf,
test_data[i].buf_size);
ut_a(ret == buf);
if (strcmp(buf, test_data[i].expected) == 0) {
- fprintf(stderr,
- "ut_format_name(%s, %s, buf, %lu), "
- "expected %s, OK\n",
- test_data[i].name,
- test_data[i].is_table ? "TRUE" : "FALSE",
- test_data[i].buf_size,
- test_data[i].expected);
+ ib::info() << "ut_format_name(" << test_data[i].name
+ << ", buf, " << test_data[i].buf_size << "),"
+ " expected " << test_data[i].expected
+ << ", OK";
} else {
- fprintf(stderr,
- "ut_format_name(%s, %s, buf, %lu), "
- "expected %s, ERROR: got %s\n",
- test_data[i].name,
- test_data[i].is_table ? "TRUE" : "FALSE",
- test_data[i].buf_size,
- test_data[i].expected,
- buf);
+ ib::error() << "ut_format_name(" << test_data[i].name
+ << ", buf, " << test_data[i].buf_size << "),"
+ " expected " << test_data[i].expected
+ << ", ERROR: got " << buf;
ut_error;
}
}
}
#endif /* !DBUG_OFF */
-/********************************************************************//**
-Get the upper limit of the MySQL integral and floating-point type.
-@return maximum allowed value for the field */
-UNIV_INTERN
-ulonglong
-innobase_get_int_col_max_value(
-/*===========================*/
- const Field* field) /*!< in: MySQL field */
-{
- ulonglong max_value = 0;
-
- switch (field->key_type()) {
- /* TINY */
- case HA_KEYTYPE_BINARY:
- max_value = 0xFFULL;
- break;
- case HA_KEYTYPE_INT8:
- max_value = 0x7FULL;
- break;
- /* SHORT */
- case HA_KEYTYPE_USHORT_INT:
- max_value = 0xFFFFULL;
- break;
- case HA_KEYTYPE_SHORT_INT:
- max_value = 0x7FFFULL;
- break;
- /* MEDIUM */
- case HA_KEYTYPE_UINT24:
- max_value = 0xFFFFFFULL;
- break;
- case HA_KEYTYPE_INT24:
- max_value = 0x7FFFFFULL;
- break;
- /* LONG */
- case HA_KEYTYPE_ULONG_INT:
- max_value = 0xFFFFFFFFULL;
- break;
- case HA_KEYTYPE_LONG_INT:
- max_value = 0x7FFFFFFFULL;
- break;
- /* BIG */
- case HA_KEYTYPE_ULONGLONG:
- max_value = 0xFFFFFFFFFFFFFFFFULL;
- break;
- case HA_KEYTYPE_LONGLONG:
- max_value = 0x7FFFFFFFFFFFFFFFULL;
- break;
- case HA_KEYTYPE_FLOAT:
- /* We use the maximum as per IEEE754-2008 standard, 2^24 */
- max_value = 0x1000000ULL;
- break;
- case HA_KEYTYPE_DOUBLE:
- /* We use the maximum as per IEEE754-2008 standard, 2^53 */
- max_value = 0x20000000000000ULL;
- break;
- default:
- ut_error;
- }
-
- return(max_value);
-}
-
-/*******************************************************************//**
+/** Match index columns between MySQL and InnoDB.
This function checks whether the index column information
is consistent between KEY info from mysql and that from innodb index.
-@return TRUE if all column types match. */
+@param[in] key_info Index info from mysql
+@param[in] index_info Index info from InnoDB
+@return true if all column types match. */
static
-ibool
+bool
innobase_match_index_columns(
-/*=========================*/
- const KEY* key_info, /*!< in: Index info
- from mysql */
- const dict_index_t* index_info) /*!< in: Index info
- from Innodb */
+ const KEY* key_info,
+ const dict_index_t* index_info)
{
const KEY_PART_INFO* key_part;
const KEY_PART_INFO* key_end;
@@ -5466,7 +5737,7 @@ innobase_match_index_columns(
column name got modified in mysql but such change does not
propagate to InnoDB.
One hidden assumption here is that the index column sequences
- are matched up between those in mysql and Innodb. */
+ are matched up between those in mysql and InnoDB. */
for (; key_part != key_end; ++key_part) {
ulint col_type;
ibool is_unsigned;
@@ -5474,10 +5745,10 @@ innobase_match_index_columns(
/* Need to translate to InnoDB column type before
comparison. */
- col_type = get_innobase_type_from_mysql_type(&is_unsigned,
- key_part->field);
+ col_type = get_innobase_type_from_mysql_type(
+ &is_unsigned, key_part->field);
- /* Ignore Innodb specific system columns. */
+ /* Ignore InnoDB specific system columns. */
while (mtype == DATA_SYS) {
innodb_idx_fld++;
@@ -5486,15 +5757,31 @@ innobase_match_index_columns(
}
}
- // MariaDB-5.5 compatibility
- if ((key_part->field->real_type() == MYSQL_TYPE_ENUM ||
- key_part->field->real_type() == MYSQL_TYPE_SET) &&
- mtype == DATA_FIXBINARY)
- col_type= DATA_FIXBINARY;
+ /* MariaDB-5.5 compatibility */
+ if ((key_part->field->real_type() == MYSQL_TYPE_ENUM ||
+ key_part->field->real_type() == MYSQL_TYPE_SET) &&
+ mtype == DATA_FIXBINARY) {
+ col_type= DATA_FIXBINARY;
+ }
if (col_type != mtype) {
- /* Column Type mismatches */
- DBUG_RETURN(FALSE);
+ /* If the col_type we get from mysql type is a geometry
+ data type, we should check if mtype is a legacy type
+ from 5.6, either upgraded to DATA_GEOMETRY or not.
+ This is indeed not an accurate check, but should be
+ safe, since DATA_BLOB would be upgraded once we create
+ spatial index on it and we intend to use DATA_GEOMETRY
+ for legacy GIS data types which are of var-length. */
+ switch (col_type) {
+ case DATA_GEOMETRY:
+ if (mtype == DATA_BLOB) {
+ break;
+ }
+ /* Fall through */
+ default:
+ /* Column type mismatches */
+ DBUG_RETURN(false);
+ }
}
innodb_idx_fld++;
@@ -5503,80 +5790,233 @@ innobase_match_index_columns(
DBUG_RETURN(TRUE);
}
-/*******************************************************************//**
-This function builds a translation table in INNOBASE_SHARE
-structure for fast index location with mysql array number from its
-table->key_info structure. This also provides the necessary translation
-between the key order in mysql key_info and Innodb ib_table->indexes if
-they are not fully matched with each other.
-Note we do not have any mutex protecting the translation table
-building based on the assumption that there is no concurrent
-index creation/drop and DMLs that requires index lookup. All table
-handle will be closed before the index creation/drop.
-@return TRUE if index translation table built successfully */
+/** Build a template for a base column for a virtual column
+@param[in] table MySQL TABLE
+@param[in] clust_index InnoDB clustered index
+@param[in] field field in MySQL table
+@param[in] col InnoDB column
+@param[in,out] templ template to fill
+@param[in] col_no field index for virtual col
+*/
static
-ibool
-innobase_build_index_translation(
-/*=============================*/
- const TABLE* table, /*!< in: table in MySQL data
- dictionary */
- dict_table_t* ib_table,/*!< in: table in Innodb data
- dictionary */
- INNOBASE_SHARE* share) /*!< in/out: share structure
- where index translation table
- will be constructed in. */
+void
+innobase_vcol_build_templ(
+ const TABLE* table,
+ dict_index_t* clust_index,
+ Field* field,
+ const dict_col_t* col,
+ mysql_row_templ_t* templ,
+ ulint col_no)
+{
+ if (dict_col_is_virtual(col)) {
+ templ->is_virtual = true;
+ templ->col_no = col_no;
+ templ->clust_rec_field_no = ULINT_UNDEFINED;
+ templ->rec_field_no = col->ind;
+ } else {
+ templ->is_virtual = false;
+ templ->col_no = col_no;
+ templ->clust_rec_field_no = dict_col_get_clust_pos(
+ col, clust_index);
+ ut_a(templ->clust_rec_field_no != ULINT_UNDEFINED);
+
+ templ->rec_field_no = templ->clust_rec_field_no;
+ }
+
+ if (field->real_maybe_null()) {
+ templ->mysql_null_byte_offset =
+ field->null_offset();
+
+ templ->mysql_null_bit_mask = (ulint) field->null_bit;
+ } else {
+ templ->mysql_null_bit_mask = 0;
+ }
+
+ templ->mysql_col_offset = static_cast<ulint>(
+ get_field_offset(table, field));
+ templ->mysql_col_len = static_cast<ulint>(field->pack_length());
+ templ->type = col->mtype;
+ templ->mysql_type = static_cast<ulint>(field->type());
+
+ if (templ->mysql_type == DATA_MYSQL_TRUE_VARCHAR) {
+ templ->mysql_length_bytes = static_cast<ulint>(
+ ((Field_varstring*) field)->length_bytes);
+ }
+
+ templ->charset = dtype_get_charset_coll(col->prtype);
+ templ->mbminlen = dict_col_get_mbminlen(col);
+ templ->mbmaxlen = dict_col_get_mbmaxlen(col);
+ templ->is_unsigned = col->prtype & DATA_UNSIGNED;
+}
+
+/** Build template for the virtual columns and their base columns. This
+is done when the table first opened.
+@param[in] table MySQL TABLE
+@param[in] ib_table InnoDB dict_table_t
+@param[in,out] s_templ InnoDB template structure
+@param[in] add_v new virtual columns added along with
+ add index call
+@param[in] locked true if dict_sys mutex is held */
+void
+innobase_build_v_templ(
+ const TABLE* table,
+ const dict_table_t* ib_table,
+ dict_vcol_templ_t* s_templ,
+ const dict_add_v_col_t* add_v,
+ bool locked)
{
- ulint mysql_num_index;
- ulint ib_num_index;
- dict_index_t** index_mapping;
- ibool ret = TRUE;
+ ulint ncol = ib_table->n_cols - DATA_N_SYS_COLS;
+ ulint n_v_col = ib_table->n_v_cols;
+ bool marker[REC_MAX_N_FIELDS];
- DBUG_ENTER("innobase_build_index_translation");
+ DBUG_ENTER("innobase_build_v_templ");
+ ut_ad(ncol < REC_MAX_N_FIELDS);
- mutex_enter(&dict_sys->mutex);
+ if (add_v != NULL) {
+ n_v_col += add_v->n_v_col;
+ }
- mysql_num_index = table->s->keys;
- ib_num_index = UT_LIST_GET_LEN(ib_table->indexes);
+ ut_ad(n_v_col > 0);
- index_mapping = share->idx_trans_tbl.index_mapping;
+ if (!locked) {
+ mutex_enter(&dict_sys->mutex);
+ }
- /* If there exists inconsistency between MySQL and InnoDB dictionary
- (metadata) information, the number of index defined in MySQL
- could exceed that in InnoDB, do not build index translation
- table in such case */
- if (UNIV_UNLIKELY(ib_num_index < mysql_num_index)) {
- ret = FALSE;
- goto func_exit;
+ if (s_templ->vtempl) {
+ if (!locked) {
+ mutex_exit(&dict_sys->mutex);
+ }
+ DBUG_VOID_RETURN;
}
- /* If index entry count is non-zero, nothing has
- changed since last update, directly return TRUE */
- if (share->idx_trans_tbl.index_count) {
- /* Index entry count should still match mysql_num_index */
- ut_a(share->idx_trans_tbl.index_count == mysql_num_index);
- goto func_exit;
+ memset(marker, 0, sizeof(bool) * ncol);
+
+ s_templ->vtempl = static_cast<mysql_row_templ_t**>(
+ ut_zalloc_nokey((ncol + n_v_col)
+ * sizeof *s_templ->vtempl));
+ s_templ->n_col = ncol;
+ s_templ->n_v_col = n_v_col;
+ s_templ->rec_len = table->s->reclength;
+ s_templ->default_rec = UT_NEW_ARRAY_NOKEY(uchar, s_templ->rec_len);
+ memcpy(s_templ->default_rec, table->s->default_values, s_templ->rec_len);
+
+ /* Mark those columns could be base columns */
+ for (ulint i = 0; i < ib_table->n_v_cols; i++) {
+ const dict_v_col_t* vcol = dict_table_get_nth_v_col(
+ ib_table, i);
+
+ for (ulint j = 0; j < vcol->num_base; j++) {
+ ulint col_no = vcol->base_col[j]->ind;
+ marker[col_no] = true;
+ }
}
- /* The number of index increased, rebuild the mapping table */
- if (mysql_num_index > share->idx_trans_tbl.array_size) {
- index_mapping = (dict_index_t**) my_realloc(index_mapping,
- mysql_num_index *
- sizeof(*index_mapping),
- MYF(MY_ALLOW_ZERO_PTR));
-
- if (!index_mapping) {
- /* Report an error if index_mapping continues to be
- NULL and mysql_num_index is a non-zero value */
- sql_print_error("InnoDB: fail to allocate memory for "
- "index translation table. Number of "
- "Index:%lu, array size:%lu",
- mysql_num_index,
- share->idx_trans_tbl.array_size);
- ret = FALSE;
- goto func_exit;
+ if (add_v) {
+ for (ulint i = 0; i < add_v->n_v_col; i++) {
+ const dict_v_col_t* vcol = &add_v->v_col[i];
+
+ for (ulint j = 0; j < vcol->num_base; j++) {
+ ulint col_no = vcol->base_col[j]->ind;
+ marker[col_no] = true;
+ }
}
+ }
+
+ ulint j = 0;
+ ulint z = 0;
+
+ dict_index_t* clust_index = dict_table_get_first_index(ib_table);
+
+ for (ulint i = 0; i < table->s->fields; i++) {
+ Field* field = table->field[i];
+
+ /* Build template for virtual columns */
+ if (!field->stored_in_db()) {
+#ifdef UNIV_DEBUG
+ const char* name;
+
+ if (z >= ib_table->n_v_def) {
+ name = add_v->v_col_name[z - ib_table->n_v_def];
+ } else {
+ name = dict_table_get_v_col_name(ib_table, z);
+ }
+
+ ut_ad(!my_strcasecmp(system_charset_info, name,
+ field->field_name));
+#endif
+ const dict_v_col_t* vcol;
+
+ if (z >= ib_table->n_v_def) {
+ vcol = &add_v->v_col[z - ib_table->n_v_def];
+ } else {
+ vcol = dict_table_get_nth_v_col(ib_table, z);
+ }
+
+ s_templ->vtempl[z + s_templ->n_col]
+ = static_cast<mysql_row_templ_t*>(
+ ut_malloc_nokey(
+ sizeof *s_templ->vtempl[j]));
+
+ innobase_vcol_build_templ(
+ table, clust_index, field,
+ &vcol->m_col,
+ s_templ->vtempl[z + s_templ->n_col],
+ z);
+ z++;
+ continue;
+ }
+
+ ut_ad(j < ncol);
- share->idx_trans_tbl.array_size = mysql_num_index;
+ /* Build template for base columns */
+ if (marker[j]) {
+ dict_col_t* col = dict_table_get_nth_col(
+ ib_table, j);
+
+ ut_ad(!my_strcasecmp(system_charset_info,
+ dict_table_get_col_name(
+ ib_table, j),
+ field->field_name));
+
+ s_templ->vtempl[j] = static_cast<
+ mysql_row_templ_t*>(
+ ut_malloc_nokey(
+ sizeof *s_templ->vtempl[j]));
+
+ innobase_vcol_build_templ(
+ table, clust_index, field, col,
+ s_templ->vtempl[j], j);
+ }
+
+ j++;
+ }
+
+ if (!locked) {
+ mutex_exit(&dict_sys->mutex);
+ }
+
+ s_templ->db_name = table->s->db.str;
+ s_templ->tb_name = table->s->table_name.str;
+ DBUG_VOID_RETURN;
+}
+
+/** Check consistency between .frm indexes and InnoDB indexes.
+@param[in] table table object formed from .frm
+@param[in] ib_table InnoDB table definition
+@retval true if not errors were found */
+static bool
+check_index_consistency(const TABLE* table, const dict_table_t* ib_table)
+{
+ ulint mysql_num_index = table->s->keys;
+ ulint ib_num_index = UT_LIST_GET_LEN(ib_table->indexes);
+ bool ret = true;
+
+ /* If there exists inconsistency between MySQL and InnoDB dictionary
+ (metadata) information, the number of index defined in MySQL
+ could exceed that in InnoDB, return error */
+ if (ib_num_index < mysql_num_index) {
+ ret = false;
+ goto func_exit;
}
/* For each index in the mysql key_info array, fetch its
@@ -5584,102 +6024,123 @@ innobase_build_index_translation(
array. */
for (ulint count = 0; count < mysql_num_index; count++) {
- /* Fetch index pointers into index_mapping according to mysql
- index sequence */
- index_mapping[count] = dict_table_get_index_on_name(
- ib_table, table->key_info[count].name);
+ const dict_index_t* index = dict_table_get_index_on_name(
+ ib_table, table->key_info[count].name);
- if (!index_mapping[count]) {
- sql_print_error("Cannot find index %s in InnoDB "
- "index dictionary.",
+ if (index == NULL) {
+ sql_print_error("Cannot find index %s in InnoDB"
+ " index dictionary.",
table->key_info[count].name);
- ret = FALSE;
+ ret = false;
goto func_exit;
}
/* Double check fetched index has the same
column info as those in mysql key_info. */
if (!innobase_match_index_columns(&table->key_info[count],
- index_mapping[count])) {
- sql_print_error("Found index %s whose column info "
- "does not match that of MySQL.",
+ index)) {
+ sql_print_error("Found index %s whose column info"
+ " does not match that of MariaDB.",
table->key_info[count].name);
- ret = FALSE;
+ ret = false;
goto func_exit;
}
}
- /* Successfully built the translation table */
- share->idx_trans_tbl.index_count = mysql_num_index;
-
func_exit:
- if (!ret) {
- /* Build translation table failed. */
- my_free(index_mapping);
-
- share->idx_trans_tbl.array_size = 0;
- share->idx_trans_tbl.index_count = 0;
- index_mapping = NULL;
- }
-
- share->idx_trans_tbl.index_mapping = index_mapping;
-
- mutex_exit(&dict_sys->mutex);
-
- DBUG_RETURN(ret);
+ return ret;
}
-/*******************************************************************//**
-This function uses index translation table to quickly locate the
-requested index structure.
-Note we do not have mutex protection for the index translatoin table
-access, it is based on the assumption that there is no concurrent
-translation table rebuild (fter create/drop index) and DMLs that
-require index lookup.
-@return dict_index_t structure for requested index. NULL if
-fail to locate the index structure. */
-static
-dict_index_t*
-innobase_index_lookup(
-/*==================*/
- INNOBASE_SHARE* share, /*!< in: share structure for index
- translation table. */
- uint keynr) /*!< in: index number for the requested
- index */
+/********************************************************************//**
+Get the upper limit of the MySQL integral and floating-point type.
+@return maximum allowed value for the field */
+UNIV_INTERN
+ulonglong
+innobase_get_int_col_max_value(
+/*===========================*/
+ const Field* field) /*!< in: MySQL field */
{
- if (!share->idx_trans_tbl.index_mapping
- || keynr >= share->idx_trans_tbl.index_count) {
- return(NULL);
+ ulonglong max_value = 0;
+
+ switch (field->key_type()) {
+ /* TINY */
+ case HA_KEYTYPE_BINARY:
+ max_value = 0xFFULL;
+ break;
+ case HA_KEYTYPE_INT8:
+ max_value = 0x7FULL;
+ break;
+ /* SHORT */
+ case HA_KEYTYPE_USHORT_INT:
+ max_value = 0xFFFFULL;
+ break;
+ case HA_KEYTYPE_SHORT_INT:
+ max_value = 0x7FFFULL;
+ break;
+ /* MEDIUM */
+ case HA_KEYTYPE_UINT24:
+ max_value = 0xFFFFFFULL;
+ break;
+ case HA_KEYTYPE_INT24:
+ max_value = 0x7FFFFFULL;
+ break;
+ /* LONG */
+ case HA_KEYTYPE_ULONG_INT:
+ max_value = 0xFFFFFFFFULL;
+ break;
+ case HA_KEYTYPE_LONG_INT:
+ max_value = 0x7FFFFFFFULL;
+ break;
+ /* BIG */
+ case HA_KEYTYPE_ULONGLONG:
+ max_value = 0xFFFFFFFFFFFFFFFFULL;
+ break;
+ case HA_KEYTYPE_LONGLONG:
+ max_value = 0x7FFFFFFFFFFFFFFFULL;
+ break;
+ case HA_KEYTYPE_FLOAT:
+ /* We use the maximum as per IEEE754-2008 standard, 2^24 */
+ max_value = 0x1000000ULL;
+ break;
+ case HA_KEYTYPE_DOUBLE:
+ /* We use the maximum as per IEEE754-2008 standard, 2^53 */
+ max_value = 0x20000000000000ULL;
+ break;
+ default:
+ ut_error;
}
- return(share->idx_trans_tbl.index_mapping[keynr]);
+ return(max_value);
}
-/************************************************************************
-Set the autoinc column max value. This should only be called once from
-ha_innobase::open(). Therefore there's no need for a covering lock. */
-UNIV_INTERN
+/** Initialize the AUTO_INCREMENT column metadata.
+
+Since a partial table definition for a persistent table can already be
+present in the InnoDB dict_sys cache before it is accessed from SQL,
+we have to initialize the AUTO_INCREMENT counter on the first
+ha_innobase::open().
+
+@param[in,out] table persistent table
+@param[in] field the AUTO_INCREMENT column */
+static
void
-ha_innobase::innobase_initialize_autoinc()
-/*======================================*/
+initialize_auto_increment(dict_table_t* table, const Field* field)
{
- ulonglong auto_inc;
- const Field* field = table->found_next_number_field;
+ ut_ad(!dict_table_is_temporary(table));
- if (field != NULL) {
- auto_inc = innobase_get_int_col_max_value(field);
- } else {
- /* We have no idea what's been passed in to us as the
- autoinc column. We set it to the 0, effectively disabling
- updates to the table. */
- auto_inc = 0;
+ const unsigned col_no = innodb_col_no(field);
- ut_print_timestamp(stderr);
- fprintf(stderr, " InnoDB: Unable to determine the AUTOINC "
- "column name\n");
- }
+ dict_table_autoinc_lock(table);
- if (srv_force_recovery >= SRV_FORCE_NO_IBUF_MERGE) {
+ table->persistent_autoinc = 1
+ + dict_table_get_nth_col_pos(table, col_no, NULL);
+
+ if (table->autoinc) {
+ /* Already initialized. Our caller checked
+ table->persistent_autoinc without
+ dict_table_autoinc_lock(), and there might be multiple
+ ha_innobase::open() executing concurrently. */
+ } else if (srv_force_recovery >= SRV_FORCE_NO_IBUF_MERGE) {
/* If the recovery level is set so high that writes
are disabled we force the AUTOINC counter to 0
value effectively disabling writes to the table.
@@ -5690,254 +6151,78 @@ ha_innobase::innobase_initialize_autoinc()
tables can be dumped with minimal hassle. If an error
were returned in this case, the first attempt to read
the table would fail and subsequent SELECTs would succeed. */
- auto_inc = 0;
- } else if (field == NULL) {
- /* This is a far more serious error, best to avoid
- opening the table and return failure. */
- my_error(ER_AUTOINC_READ_FAILED, MYF(0));
- } else {
- dict_index_t* index;
- const char* col_name;
- ib_uint64_t read_auto_inc;
- ulint err;
-
- update_thd(ha_thd());
-
- ut_a(prebuilt->trx == thd_to_trx(user_thd));
-
- col_name = field->field_name;
- index = innobase_get_index(table->s->next_number_index);
-
- /* Execute SELECT MAX(col_name) FROM TABLE; */
- err = row_search_max_autoinc(index, col_name, &read_auto_inc);
-
- switch (err) {
- case DB_SUCCESS: {
- ulonglong col_max_value;
-
- col_max_value = innobase_get_int_col_max_value(field);
-
- /* At the this stage we do not know the increment
- nor the offset, so use a default increment of 1. */
-
- auto_inc = innobase_next_autoinc(
- read_auto_inc, 1, 1, 0, col_max_value);
-
- break;
- }
- case DB_RECORD_NOT_FOUND:
- char buf[MAX_FULL_NAME_LEN];
- ut_format_name(index->table->name, TRUE, buf, sizeof(buf));
-
- ib_logf(IB_LOG_LEVEL_ERROR,
- "MySQL and InnoDB data "
- "dictionaries are out of sync."
- " Unable to find the AUTOINC column "
- " %s in the InnoDB table %s."
- " We set the next AUTOINC column "
- "value to 0"
- " in effect disabling the AUTOINC "
- "next value generation."
- " You can either set the next "
- "AUTOINC value explicitly using ALTER TABLE "
- " or fix the data dictionary by "
- "recreating the table.",
- col_name, buf);
-
- /* This will disable the AUTOINC generation. */
- auto_inc = 0;
-
- /* We want the open to succeed, so that the user can
- take corrective action. ie. reads should succeed but
- updates should fail. */
- err = DB_SUCCESS;
- break;
- default:
- /* row_search_max_autoinc() should only return
- one of DB_SUCCESS or DB_RECORD_NOT_FOUND. */
- ut_error;
- }
+ } else if (table->persistent_autoinc) {
+ table->autoinc = innobase_next_autoinc(
+ btr_read_autoinc_with_fallback(table, col_no),
+ 1 /* need */,
+ 1 /* auto_increment_increment */,
+ 0 /* auto_increment_offset */,
+ innobase_get_int_col_max_value(field));
}
- dict_table_autoinc_initialize(prebuilt->table, auto_inc);
+ dict_table_autoinc_unlock(table);
}
-/*****************************************************************//**
-Creates and opens a handle to a table which already exists in an InnoDB
-database.
-@return 1 if error, 0 if success */
-UNIV_INTERN
+/** Open an InnoDB table
+@param[in] name table name
+@return error code
+@retval 0 on success */
int
-ha_innobase::open(
-/*==============*/
- const char* name, /*!< in: table name */
- int mode, /*!< in: not used */
- uint test_if_locked) /*!< in: not used */
+ha_innobase::open(const char* name, int, uint)
{
- dict_table_t* ib_table;
char norm_name[FN_REFLEN];
- THD* thd;
- char* is_part = NULL;
- ibool par_case_name_set = FALSE;
- char par_case_name[FN_REFLEN];
- dict_err_ignore_t ignore_err = DICT_ERR_IGNORE_NONE;
DBUG_ENTER("ha_innobase::open");
- UT_NOT_USED(mode);
- UT_NOT_USED(test_if_locked);
-
- thd = ha_thd();
-
- /* Under some cases MySQL seems to call this function while
- holding btr_search_latch. This breaks the latching order as
- we acquire dict_sys->mutex below and leads to a deadlock. */
- if (thd != NULL) {
- innobase_release_temporary_latches(ht, thd);
- }
-
normalize_table_name(norm_name, name);
- user_thd = NULL;
-
- if (!(share=get_share(name))) {
-
- DBUG_RETURN(1);
- }
+ m_user_thd = NULL;
/* Will be allocated if it is needed in ::update_row() */
- upd_buf = NULL;
- upd_buf_size = 0;
+ m_upd_buf = NULL;
+ m_upd_buf_size = 0;
- /* We look for pattern #P# to see if the table is partitioned
- MySQL table. */
-#ifdef __WIN__
- is_part = strstr(norm_name, "#p#");
-#else
- is_part = strstr(norm_name, "#P#");
-#endif /* __WIN__ */
+ char* is_part = is_partition(norm_name);
+ THD* thd = ha_thd();
+ dict_table_t* ib_table = open_dict_table(name, norm_name, is_part,
+ DICT_ERR_IGNORE_FK_NOKEY);
- /* Check whether FOREIGN_KEY_CHECKS is set to 0. If so, the table
- can be opened even if some FK indexes are missing. If not, the table
- can't be opened in the same situation */
- if (thd_test_options(thd, OPTION_NO_FOREIGN_KEY_CHECKS)) {
- ignore_err = DICT_ERR_IGNORE_FK_NOKEY;
- }
-
- /* Get pointer to a table object in InnoDB dictionary cache */
- ib_table = dict_table_open_on_name(norm_name, FALSE, TRUE, ignore_err);
-
- if (ib_table
- && (table->s->stored_fields != dict_table_get_n_user_cols(ib_table)
- - !!DICT_TF2_FLAG_IS_SET(ib_table, DICT_TF2_FTS_HAS_DOC_ID))) {
- ib_logf(IB_LOG_LEVEL_WARN,
- "table %s contains " ULINTPF " user defined columns "
- "in InnoDB, but %u columns in MySQL. Please "
- "check INFORMATION_SCHEMA.INNODB_SYS_COLUMNS and "
- REFMAN "innodb-troubleshooting.html "
- "for how to resolve it",
- norm_name, dict_table_get_n_user_cols(ib_table)
- - !!DICT_TF2_FLAG_IS_SET(ib_table,
- DICT_TF2_FTS_HAS_DOC_ID),
- table->s->stored_fields);
-
- /* Mark this table as corrupted, so the drop table
- or force recovery can still use it, but not others. */
- ib_table->file_unreadable = true;
- ib_table->corrupted = true;
- dict_table_close(ib_table, FALSE, FALSE);
- ib_table = NULL;
- is_part = NULL;
- }
+ DEBUG_SYNC(thd, "ib_open_after_dict_open");
if (NULL == ib_table) {
- if (is_part) {
- /* MySQL partition engine hard codes the file name
- separator as "#P#". The text case is fixed even if
- lower_case_table_names is set to 1 or 2. This is true
- for sub-partition names as well. InnoDB always
- normalises file names to lower case on Windows, this
- can potentially cause problems when copying/moving
- tables between platforms.
-
- 1) If boot against an installation from Windows
- platform, then its partition table name could
- be in lower case in system tables. So we will
- need to check lower case name when load table.
-
- 2) If we boot an installation from other case
- sensitive platform in Windows, we might need to
- check the existence of table name without lower
- case in the system table. */
- if (innobase_get_lower_case_table_names() == 1) {
-
- if (!par_case_name_set) {
-#ifndef __WIN__
- /* Check for the table using lower
- case name, including the partition
- separator "P" */
- strcpy(par_case_name, norm_name);
- innobase_casedn_str(par_case_name);
-#else
- /* On Windows platfrom, check
- whether there exists table name in
- system table whose name is
- not being normalized to lower case */
- normalize_table_name_low(
- par_case_name, name, FALSE);
-#endif
- par_case_name_set = TRUE;
- }
-
- ib_table = dict_table_open_on_name(
- par_case_name, FALSE, TRUE,
- ignore_err);
- }
-
- if (ib_table) {
-#ifndef __WIN__
- sql_print_warning("Partition table %s opened "
- "after converting to lower "
- "case. The table may have "
- "been moved from a case "
- "in-sensitive file system. "
- "Please recreate table in "
- "the current file system\n",
- norm_name);
-#else
- sql_print_warning("Partition table %s opened "
- "after skipping the step to "
- "lower case the table name. "
- "The table may have been "
- "moved from a case sensitive "
- "file system. Please "
- "recreate table in the "
- "current file system\n",
- norm_name);
-#endif
- goto table_opened;
- }
- }
if (is_part) {
sql_print_error("Failed to open table %s.\n",
norm_name);
}
-
- ib_logf(IB_LOG_LEVEL_WARN,
- "Cannot open table %s from the internal data "
- "dictionary of InnoDB though the .frm file "
- "for the table exists. See "
- REFMAN "innodb-troubleshooting.html for how "
- "you can resolve the problem.", norm_name);
-
- free_share(share);
- my_errno = ENOENT;
+no_such_table:
+ set_my_errno(ENOENT);
DBUG_RETURN(HA_ERR_NO_SUCH_TABLE);
}
-table_opened:
+ uint n_fields = omits_virtual_cols(*table_share)
+ ? table_share->stored_fields : table_share->fields;
+ uint n_cols = dict_table_get_n_user_cols(ib_table)
+ + dict_table_get_n_v_cols(ib_table)
+ - !!DICT_TF2_FLAG_IS_SET(ib_table, DICT_TF2_FTS_HAS_DOC_ID);
+
+ if (UNIV_UNLIKELY(n_cols != n_fields)) {
+ ib::warn() << "Table " << norm_name << " contains "
+ << n_cols << " user"
+ " defined columns in InnoDB, but " << n_fields
+ << " columns in MariaDB. Please check"
+ " INFORMATION_SCHEMA.INNODB_SYS_COLUMNS and"
+ " https://mariadb.com/kb/en/innodb-data-dictionary-troubleshooting/"
+ " for how to resolve the issue.";
+
+ /* Mark this table as corrupted, so the drop table
+ or force recovery can still use it, but not others. */
+ ib_table->file_unreadable = true;
+ ib_table->corrupted = true;
+ dict_table_close(ib_table, FALSE, FALSE);
+ goto no_such_table;
+ }
innobase_copy_frm_flags_from_table_share(ib_table, table->s);
@@ -5994,9 +6279,8 @@ table_opened:
}
if (!thd_tablespace_op(thd) && no_tablespace) {
- free_share(share);
- my_errno = ENOENT;
- int ret_err = HA_ERR_NO_SUCH_TABLE;
+ set_my_errno(ENOENT);
+ int ret_err = HA_ERR_TABLESPACE_MISSING;
/* If table has no talespace but it has crypt data, check
is tablespace made unaccessible because encryption service
@@ -6004,16 +6288,15 @@ table_opened:
if (encrypted) {
bool warning_pushed = false;
- char buf[MAX_FULL_NAME_LEN];
- ut_format_name(ib_table->name, TRUE, buf, sizeof(buf));
-
if (!encryption_key_id_exists(space()->crypt_data->key_id)) {
- push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
+ push_warning_printf(
+ thd, Sql_condition::WARN_LEVEL_WARN,
HA_ERR_DECRYPTION_FAILED,
"Table %s in file %s is encrypted but encryption service or"
" used key_id %u is not available. "
" Can't continue reading table.",
- buf, space()->chain.start->name,
+ table_share->table_name.str,
+ space()->chain.start->name,
space()->crypt_data->key_id);
ret_err = HA_ERR_DECRYPTION_FAILED;
warning_pushed = true;
@@ -6023,33 +6306,50 @@ table_opened:
warning if it has not been already done as used
key_id might be found but it is incorrect. */
if (!warning_pushed) {
- push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
+ push_warning_printf(
+ thd, Sql_condition::WARN_LEVEL_WARN,
HA_ERR_DECRYPTION_FAILED,
"Table %s in file %s is encrypted but encryption service or"
" used key_id is not available. "
" Can't continue reading table.",
- buf, space()->chain.start->name);
+ table_share->table_name.str,
+ space()->chain.start->name);
ret_err = HA_ERR_DECRYPTION_FAILED;
}
}
dict_table_close(ib_table, FALSE, FALSE);
-
DBUG_RETURN(ret_err);
}
- prebuilt = row_create_prebuilt(ib_table, table->s->stored_rec_length);
+ m_prebuilt = row_create_prebuilt(ib_table, table->s->reclength);
+
+ m_prebuilt->default_rec = table->s->default_values;
+ ut_ad(m_prebuilt->default_rec);
- prebuilt->default_rec = table->s->default_values;
- ut_ad(prebuilt->default_rec);
+ m_prebuilt->m_mysql_table = table;
/* Looks like MySQL-3.23 sometimes has primary key number != 0 */
- primary_key = table->s->primary_key;
- key_used_on_scan = primary_key;
+ m_primary_key = table->s->primary_key;
+
+ key_used_on_scan = m_primary_key;
+
+ if (ib_table->n_v_cols) {
+ mutex_enter(&dict_sys->mutex);
+ if (ib_table->vc_templ == NULL) {
+ ib_table->vc_templ = UT_NEW_NOKEY(dict_vcol_templ_t());
+ innobase_build_v_templ(
+ table, ib_table, ib_table->vc_templ, NULL,
+ true);
+ }
+
+ mutex_exit(&dict_sys->mutex);
+ }
- if (!innobase_build_index_translation(table, ib_table, share)) {
- sql_print_error("Build InnoDB index translation table for"
- " Table %s failed", name);
+ if (!check_index_consistency(table, ib_table)) {
+ sql_print_error("InnoDB indexes are inconsistent with what "
+ "defined in .frm for table %s",
+ name);
}
/* Allocate a buffer for a 'row reference'. A row reference is
@@ -6057,12 +6357,9 @@ table_opened:
a row in our table. Note that MySQL may also compare two row
references for equality by doing a simple memcmp on the strings
of length ref_length! */
-
- if (!row_table_got_default_clust_index(ib_table)) {
-
- prebuilt->clust_index_was_generated = FALSE;
-
- if (UNIV_UNLIKELY(primary_key >= MAX_KEY)) {
+ if (!(m_prebuilt->clust_index_was_generated
+ = dict_index_is_auto_gen_clust(ib_table->indexes.start))) {
+ if (m_primary_key >= MAX_KEY) {
ib_table->dict_frm_mismatch = DICT_FRM_NO_PK;
/* This mismatch could cause further problems
@@ -6072,7 +6369,7 @@ table_opened:
ib_push_frm_error(thd, ib_table, table, 0, true);
- /* If primary_key >= MAX_KEY, its (primary_key)
+ /* If m_primary_key >= MAX_KEY, its (m_primary_key)
value could be out of bound if continue to index
into key_info[] array. Find InnoDB primary index,
and assign its key_length to ref_length.
@@ -6113,10 +6410,10 @@ table_opened:
save space, because all row reference buffers are
allocated based on ref_length. */
- ref_length = table->key_info[primary_key].key_length;
+ ref_length = table->key_info[m_primary_key].key_length;
}
} else {
- if (primary_key != MAX_KEY) {
+ if (m_primary_key != MAX_KEY) {
ib_table->dict_frm_mismatch = DICT_NO_PK_FRM_HAS;
@@ -6127,8 +6424,6 @@ table_opened:
ib_push_frm_error(thd, ib_table, table, 0, true);
}
- prebuilt->clust_index_was_generated = TRUE;
-
ref_length = DATA_ROW_ID_LEN;
/* If we automatically created the clustered index, then
@@ -6141,74 +6436,180 @@ table_opened:
if (key_used_on_scan != MAX_KEY) {
sql_print_warning(
- "Table %s key_used_on_scan is %lu even "
+ "Table %s key_used_on_scan is %u even "
"though there is no primary key inside "
- "InnoDB.", name, (ulong) key_used_on_scan);
+ "InnoDB.", name, key_used_on_scan);
}
}
/* Index block size in InnoDB: used by MySQL in query optimization */
stats.block_size = UNIV_PAGE_SIZE;
- /* Init table lock structure */
- thr_lock_data_init(&share->lock,&lock,(void*) 0);
-
- if (prebuilt->table) {
+ if (m_prebuilt->table != NULL) {
/* We update the highest file format in the system table
space, if this table has higher file format setting. */
trx_sys_file_format_max_upgrade(
(const char**) &innobase_file_format_max,
- dict_table_get_format(prebuilt->table));
+ dict_table_get_format(m_prebuilt->table));
}
- /* Only if the table has an AUTOINC column. */
- if (prebuilt->table != NULL
- && prebuilt->table->is_readable()
- && table->found_next_number_field != NULL) {
- dict_table_autoinc_lock(prebuilt->table);
+ if (m_prebuilt->table == NULL
+ || dict_table_is_temporary(m_prebuilt->table)
+ || m_prebuilt->table->persistent_autoinc
+ || !m_prebuilt->table->is_readable()) {
+ } else if (const Field* ai = table->found_next_number_field) {
+ initialize_auto_increment(m_prebuilt->table, ai);
+ }
- /* Since a table can already be "open" in InnoDB's internal
- data dictionary, we only init the autoinc counter once, the
- first time the table is loaded. We can safely reuse the
- autoinc value from a previous MySQL open. */
- if (dict_table_autoinc_read(prebuilt->table) == 0) {
+ /* Set plugin parser for fulltext index */
+ for (uint i = 0; i < table->s->keys; i++) {
+ if (table->key_info[i].flags & HA_USES_PARSER) {
+ dict_index_t* index = innobase_get_index(i);
+ plugin_ref parser = table->key_info[i].parser;
- innobase_initialize_autoinc();
- }
+ ut_ad(index->type & DICT_FTS);
+ index->parser =
+ static_cast<st_mysql_ftparser *>(
+ plugin_decl(parser)->info);
- dict_table_autoinc_unlock(prebuilt->table);
+ DBUG_EXECUTE_IF("fts_instrument_use_default_parser",
+ index->parser = &fts_default_parser;);
+ }
}
info(HA_STATUS_NO_LOCK | HA_STATUS_VARIABLE | HA_STATUS_CONST);
-
DBUG_RETURN(0);
}
-UNIV_INTERN
+/** Convert MySQL column number to dict_table_t::cols[] offset.
+@param[in] field non-virtual column
+@return column number relative to dict_table_t::cols[] */
+unsigned
+innodb_col_no(const Field* field)
+{
+ ut_ad(!innobase_is_s_fld(field));
+ const TABLE* table = field->table;
+ unsigned col_no = 0;
+ ut_ad(field == table->field[field->field_index]);
+ for (unsigned i = 0; i < field->field_index; i++) {
+ if (table->field[i]->stored_in_db()) {
+ col_no++;
+ }
+ }
+ return(col_no);
+}
+
+/** Opens dictionary table object using table name. For partition, we need to
+try alternative lower/upper case names to support moving data files across
+platforms.
+@param[in] table_name name of the table/partition
+@param[in] norm_name normalized name of the table/partition
+@param[in] is_partition if this is a partition of a table
+@param[in] ignore_err error to ignore for loading dictionary object
+@return dictionary table object or NULL if not found */
+dict_table_t*
+ha_innobase::open_dict_table(
+ const char* table_name,
+ const char* norm_name,
+ bool is_partition,
+ dict_err_ignore_t ignore_err)
+{
+ DBUG_ENTER("ha_innobase::open_dict_table");
+ dict_table_t* ib_table = dict_table_open_on_name(norm_name, FALSE,
+ TRUE, ignore_err);
+
+ if (NULL == ib_table && is_partition) {
+ /* MySQL partition engine hard codes the file name
+ separator as "#P#". The text case is fixed even if
+ lower_case_table_names is set to 1 or 2. This is true
+ for sub-partition names as well. InnoDB always
+ normalises file names to lower case on Windows, this
+ can potentially cause problems when copying/moving
+ tables between platforms.
+
+ 1) If boot against an installation from Windows
+ platform, then its partition table name could
+ be in lower case in system tables. So we will
+ need to check lower case name when load table.
+
+ 2) If we boot an installation from other case
+ sensitive platform in Windows, we might need to
+ check the existence of table name without lower
+ case in the system table. */
+ if (innobase_get_lower_case_table_names() == 1) {
+ char par_case_name[FN_REFLEN];
+
+#ifndef _WIN32
+ /* Check for the table using lower
+ case name, including the partition
+ separator "P" */
+ strcpy(par_case_name, norm_name);
+ innobase_casedn_str(par_case_name);
+#else
+ /* On Windows platfrom, check
+ whether there exists table name in
+ system table whose name is
+ not being normalized to lower case */
+ create_table_info_t::
+ normalize_table_name_low(
+ par_case_name,
+ table_name, FALSE);
+#endif
+ ib_table = dict_table_open_on_name(
+ par_case_name, FALSE, TRUE,
+ ignore_err);
+ }
+
+ if (ib_table != NULL) {
+#ifndef _WIN32
+ sql_print_warning("Partition table %s opened"
+ " after converting to lower"
+ " case. The table may have"
+ " been moved from a case"
+ " in-sensitive file system."
+ " Please recreate table in"
+ " the current file system\n",
+ norm_name);
+#else
+ sql_print_warning("Partition table %s opened"
+ " after skipping the step to"
+ " lower case the table name."
+ " The table may have been"
+ " moved from a case sensitive"
+ " file system. Please"
+ " recreate table in the"
+ " current file system\n",
+ norm_name);
+#endif
+ }
+ }
+
+ DBUG_RETURN(ib_table);
+}
+
handler*
ha_innobase::clone(
/*===============*/
const char* name, /*!< in: table name */
MEM_ROOT* mem_root) /*!< in: memory context */
{
- ha_innobase* new_handler;
-
DBUG_ENTER("ha_innobase::clone");
- new_handler = static_cast<ha_innobase*>(handler::clone(name,
- mem_root));
- if (new_handler) {
- DBUG_ASSERT(new_handler->prebuilt != NULL);
+ ha_innobase* new_handler = static_cast<ha_innobase*>(
+ handler::clone(name, mem_root));
+
+ if (new_handler != NULL) {
+ DBUG_ASSERT(new_handler->m_prebuilt != NULL);
- new_handler->prebuilt->select_lock_type
- = prebuilt->select_lock_type;
+ new_handler->m_prebuilt->select_lock_type
+ = m_prebuilt->select_lock_type;
}
DBUG_RETURN(new_handler);
}
-UNIV_INTERN
+
uint
ha_innobase::max_supported_key_part_length() const
/*==============================================*/
@@ -6222,32 +6623,23 @@ ha_innobase::max_supported_key_part_length() const
/******************************************************************//**
Closes a handle to an InnoDB table.
-@return 0 */
-UNIV_INTERN
+@return 0 */
+
int
ha_innobase::close()
/*================*/
{
- THD* thd;
-
DBUG_ENTER("ha_innobase::close");
- thd = ha_thd();
- if (thd != NULL) {
- innobase_release_temporary_latches(ht, thd);
- }
-
- row_prebuilt_free(prebuilt, FALSE);
+ row_prebuilt_free(m_prebuilt, FALSE);
- if (upd_buf != NULL) {
- ut_ad(upd_buf_size != 0);
- my_free(upd_buf);
- upd_buf = NULL;
- upd_buf_size = 0;
+ if (m_upd_buf != NULL) {
+ ut_ad(m_upd_buf_size != 0);
+ my_free(m_upd_buf);
+ m_upd_buf = NULL;
+ m_upd_buf_size = 0;
}
- free_share(share);
-
MONITOR_INC(MONITOR_TABLE_CLOSE);
/* Tell InnoDB server that there might be work for
@@ -6260,24 +6652,11 @@ ha_innobase::close()
/* The following accessor functions should really be inside MySQL code! */
-/**************************************************************//**
-Gets field offset for a field in a table.
-@return offset */
-static inline
-uint
-get_field_offset(
-/*=============*/
- const TABLE* table, /*!< in: MySQL table object */
- const Field* field) /*!< in: MySQL field object */
-{
- return((uint) (field->ptr - table->record[0]));
-}
-
#ifdef WITH_WSREP
UNIV_INTERN
int
wsrep_innobase_mysql_sort(
-/*===============*/
+/*======================*/
/* out: str contains sort string */
int mysql_type, /* in: MySQL type */
uint charset_number, /* in: number of the charset */
@@ -6323,7 +6702,7 @@ wsrep_innobase_mysql_sort(
if (charset == NULL) {
sql_print_error("InnoDB needs charset %lu for doing "
- "a comparison, but MySQL cannot "
+ "a comparison, but MariaDB cannot "
"find that charset.",
(ulong) charset_number);
ut_a(0);
@@ -6334,8 +6713,8 @@ wsrep_innobase_mysql_sort(
memcpy(tmp_str, str, str_length);
tmp_length = charset->coll->strnxfrm(charset, str, str_length,
- str_length, tmp_str,
- tmp_length, 0);
+ str_length, tmp_str,
+ tmp_length, 0);
DBUG_ASSERT(tmp_length <= str_length);
if (wsrep_protocol_version < 3) {
tmp_length = charset->coll->strnxfrm(
@@ -6353,7 +6732,7 @@ wsrep_innobase_mysql_sort(
DBUG_ASSERT(tmp_length <= buf_length);
ret_length = tmp_length;
}
-
+
break;
}
case MYSQL_TYPE_DECIMAL :
@@ -6384,172 +6763,8 @@ wsrep_innobase_mysql_sort(
}
#endif /* WITH_WSREP */
-/*************************************************************//**
-InnoDB uses this function to compare two data fields for which the data type
-is such that we must use MySQL code to compare them. NOTE that the prototype
-of this function is in rem0cmp.cc in InnoDB source code! If you change this
-function, remember to update the prototype there!
-@return 1, 0, -1, if a is greater, equal, less than b, respectively */
-UNIV_INTERN
-int
-innobase_mysql_cmp(
-/*===============*/
- int mysql_type, /*!< in: MySQL type */
- uint charset_number, /*!< in: number of the charset */
- const unsigned char* a, /*!< in: data field */
- unsigned int a_length, /*!< in: data field length,
- not UNIV_SQL_NULL */
- const unsigned char* b, /*!< in: data field */
- unsigned int b_length) /*!< in: data field length,
- not UNIV_SQL_NULL */
-{
- CHARSET_INFO* charset;
- enum_field_types mysql_tp;
- int ret;
-
- DBUG_ASSERT(a_length != UNIV_SQL_NULL);
- DBUG_ASSERT(b_length != UNIV_SQL_NULL);
-
- mysql_tp = (enum_field_types) mysql_type;
-
- switch (mysql_tp) {
-
- case MYSQL_TYPE_BIT:
- case MYSQL_TYPE_STRING:
- case MYSQL_TYPE_VAR_STRING:
- case MYSQL_TYPE_TINY_BLOB:
- case MYSQL_TYPE_MEDIUM_BLOB:
- case MYSQL_TYPE_BLOB:
- case MYSQL_TYPE_LONG_BLOB:
- case MYSQL_TYPE_VARCHAR:
- /* Use the charset number to pick the right charset struct for
- the comparison. Since the MySQL function get_charset may be
- slow before Bar removes the mutex operation there, we first
- look at 2 common charsets directly. */
-
- if (charset_number == default_charset_info->number) {
- charset = default_charset_info;
- } else if (charset_number == my_charset_latin1.number) {
- charset = &my_charset_latin1;
- } else {
- charset = get_charset(charset_number, MYF(MY_WME));
-
- if (charset == NULL) {
- sql_print_error("InnoDB needs charset %lu for doing "
- "a comparison, but MySQL cannot "
- "find that charset.",
- (ulong) charset_number);
- ut_a(0);
- }
- }
-
- /* Starting from 4.1.3, we use strnncollsp() in comparisons of
- non-latin1_swedish_ci strings. NOTE that the collation order
- changes then: 'b\0\0...' is ordered BEFORE 'b ...'. Users
- having indexes on such data need to rebuild their tables! */
-
- ret = charset->coll->strnncollsp(
- charset, a, a_length, b, b_length, 0);
-
- if (ret < 0) {
- return(-1);
- } else if (ret > 0) {
- return(1);
- } else {
- return(0);
- }
- default:
- ut_error;
- }
-
- return(0);
-}
-
-
-/*************************************************************//**
-Get the next token from the given string and store it in *token. */
-UNIV_INTERN
-CHARSET_INFO*
-innobase_get_fts_charset(
-/*=====================*/
- int mysql_type, /*!< in: MySQL type */
- uint charset_number) /*!< in: number of the charset */
-{
- enum_field_types mysql_tp;
- CHARSET_INFO* charset;
-
- mysql_tp = (enum_field_types) mysql_type;
-
- switch (mysql_tp) {
-
- case MYSQL_TYPE_BIT:
- case MYSQL_TYPE_STRING:
- case MYSQL_TYPE_VAR_STRING:
- case MYSQL_TYPE_TINY_BLOB:
- case MYSQL_TYPE_MEDIUM_BLOB:
- case MYSQL_TYPE_BLOB:
- case MYSQL_TYPE_LONG_BLOB:
- case MYSQL_TYPE_VARCHAR:
- /* Use the charset number to pick the right charset struct for
- the comparison. Since the MySQL function get_charset may be
- slow before Bar removes the mutex operation there, we first
- look at 2 common charsets directly. */
-
- if (charset_number == default_charset_info->number) {
- charset = default_charset_info;
- } else if (charset_number == my_charset_latin1.number) {
- charset = &my_charset_latin1;
- } else {
- charset = get_charset(charset_number, MYF(MY_WME));
-
- if (charset == NULL) {
- sql_print_error("InnoDB needs charset %lu for doing "
- "a comparison, but MySQL cannot "
- "find that charset.",
- (ulong) charset_number);
- ut_a(0);
- }
- }
- break;
- default:
- ut_error;
- }
-
- return(charset);
-}
-
-/*************************************************************//**
-InnoDB uses this function to compare two data fields for which the data type
-is such that we must use MySQL code to compare them. NOTE that the prototype
-of this function is in rem0cmp.c in InnoDB source code! If you change this
-function, remember to update the prototype there!
-@return 1, 0, -1, if a is greater, equal, less than b, respectively */
-UNIV_INTERN
-int
-innobase_mysql_cmp_prefix(
-/*======================*/
- int mysql_type, /*!< in: MySQL type */
- uint charset_number, /*!< in: number of the charset */
- const unsigned char* a, /*!< in: data field */
- unsigned int a_length, /*!< in: data field length,
- not UNIV_SQL_NULL */
- const unsigned char* b, /*!< in: data field */
- unsigned int b_length) /*!< in: data field length,
- not UNIV_SQL_NULL */
-{
- CHARSET_INFO* charset;
- int result;
-
- charset = innobase_get_fts_charset(mysql_type, charset_number);
-
- result = ha_compare_text(charset, (uchar*) a, a_length,
- (uchar*) b, b_length, 1, 0);
-
- return(result);
-}
/******************************************************************//**
compare two character string according to their charset. */
-UNIV_INTERN
int
innobase_fts_text_cmp(
/*==================*/
@@ -6563,11 +6778,11 @@ innobase_fts_text_cmp(
return(ha_compare_text(
charset, s1->f_str, static_cast<uint>(s1->f_len),
- s2->f_str, static_cast<uint>(s2->f_len), 0, 0));
+ s2->f_str, static_cast<uint>(s2->f_len), 0));
}
+
/******************************************************************//**
compare two character string case insensitively according to their charset. */
-UNIV_INTERN
int
innobase_fts_text_case_cmp(
/*=======================*/
@@ -6586,11 +6801,11 @@ innobase_fts_text_case_cmp(
return(ha_compare_text(
charset, s1->f_str, static_cast<uint>(s1->f_len),
- s2->f_str, static_cast<uint>(newlen), 0, 0));
+ s2->f_str, static_cast<uint>(newlen), 0));
}
+
/******************************************************************//**
Get the first character's code position for FTS index partition. */
-UNIV_INTERN
ulint
innobase_strnxfrm(
/*==============*/
@@ -6619,7 +6834,6 @@ innobase_strnxfrm(
/******************************************************************//**
compare two character string according to their charset. */
-UNIV_INTERN
int
innobase_fts_text_cmp_prefix(
/*=========================*/
@@ -6634,7 +6848,7 @@ innobase_fts_text_cmp_prefix(
result = ha_compare_text(
charset, s2->f_str, static_cast<uint>(s2->f_len),
- s1->f_str, static_cast<uint>(s1->f_len), 1, 0);
+ s1->f_str, static_cast<uint>(s1->f_len), 1);
/* We switched s1, s2 position in ha_compare_text. So we need
to negate the result */
@@ -6643,7 +6857,6 @@ innobase_fts_text_cmp_prefix(
/******************************************************************//**
Makes all characters in a string lower case. */
-UNIV_INTERN
size_t
innobase_fts_casedn_str(
/*====================*/
@@ -6672,7 +6885,6 @@ innobase_fts_casedn_str(
Get the next token from the given string and store it in *token.
It is mostly copied from MyISAM's doc parsing function ft_simple_get_word()
@return length of string processed */
-UNIV_INTERN
ulint
innobase_mysql_fts_get_token(
/*=========================*/
@@ -6680,10 +6892,7 @@ innobase_mysql_fts_get_token(
const byte* start, /*!< in: start of text */
const byte* end, /*!< in: one character past end of
text */
- fts_string_t* token, /*!< out: token's text */
- ulint* offset) /*!< out: offset to token,
- measured as characters from
- 'start' */
+ fts_string_t* token) /*!< out: token's text */
{
int mbl;
const uchar* doc = start;
@@ -6741,21 +6950,17 @@ innobase_mysql_fts_get_token(
return(doc - start);
}
-/**************************************************************//**
-Converts a MySQL type to an InnoDB type. Note that this function returns
+/** Converts a MySQL type to an InnoDB type. Note that this function returns
the 'mtype' of InnoDB. InnoDB differentiates between MySQL's old <= 4.1
VARCHAR and the new true VARCHAR in >= 5.0.3 by the 'prtype'.
-@return DATA_BINARY, DATA_VARCHAR, ... */
-UNIV_INTERN
+@param[out] unsigned_flag DATA_UNSIGNED if an 'unsigned type'; at least
+ENUM and SET, and unsigned integer types are 'unsigned types'
+@param[in] f MySQL Field
+@return DATA_BINARY, DATA_VARCHAR, ... */
ulint
get_innobase_type_from_mysql_type(
-/*==============================*/
- ulint* unsigned_flag, /*!< out: DATA_UNSIGNED if an
- 'unsigned type';
- at least ENUM and SET,
- and unsigned integer
- types are 'unsigned types' */
- const void* f) /*!< in: MySQL Field */
+ ulint* unsigned_flag,
+ const void* f)
{
const class Field* field = reinterpret_cast<const class Field*>(f);
@@ -6822,13 +7027,14 @@ get_innobase_type_from_mysql_type(
case MYSQL_TYPE_YEAR:
case MYSQL_TYPE_NEWDATE:
return(DATA_INT);
- case MYSQL_TYPE_TIMESTAMP:
case MYSQL_TYPE_TIME:
case MYSQL_TYPE_DATETIME:
- if (field->key_type() == HA_KEYTYPE_BINARY)
+ case MYSQL_TYPE_TIMESTAMP:
+ if (field->key_type() == HA_KEYTYPE_BINARY) {
return(DATA_FIXBINARY);
- else
+ } else {
return(DATA_INT);
+ }
case MYSQL_TYPE_FLOAT:
return(DATA_FLOAT);
case MYSQL_TYPE_DOUBLE:
@@ -6836,6 +7042,7 @@ get_innobase_type_from_mysql_type(
case MYSQL_TYPE_DECIMAL:
return(DATA_DECIMAL);
case MYSQL_TYPE_GEOMETRY:
+ return(DATA_GEOMETRY);
case MYSQL_TYPE_TINY_BLOB:
case MYSQL_TYPE_MEDIUM_BLOB:
case MYSQL_TYPE_BLOB:
@@ -6854,25 +7061,9 @@ get_innobase_type_from_mysql_type(
}
/*******************************************************************//**
-Writes an unsigned integer value < 64k to 2 bytes, in the little-endian
-storage format. */
-static inline
-void
-innobase_write_to_2_little_endian(
-/*==============================*/
- byte* buf, /*!< in: where to store */
- ulint val) /*!< in: value to write, must be < 64k */
-{
- ut_a(val < 256 * 256);
-
- buf[0] = (byte)(val & 0xFF);
- buf[1] = (byte)(val / 256);
-}
-
-/*******************************************************************//**
Reads an unsigned integer value < 64k from 2 bytes, in the little-endian
storage format.
-@return value */
+@return value */
static inline
uint
innobase_read_from_2_little_endian(
@@ -6889,7 +7080,7 @@ Stores a key value for a row to a buffer.
UNIV_INTERN
uint
wsrep_store_key_val_for_row(
-/*===============================*/
+/*=========================*/
THD* thd,
TABLE* table,
uint keynr, /*!< in: key number */
@@ -6919,7 +7110,7 @@ wsrep_store_key_val_for_row(
if (key_part->null_bit) {
if (buff_space > 0) {
- if (record[key_part->null_offset]
+ if (record[key_part->null_offset]
& key_part->null_bit) {
*buff = 1;
part_is_null = TRUE;
@@ -6978,11 +7169,11 @@ wsrep_store_key_val_for_row(
the true length of the key */
if (len > 0 && cs->mbmaxlen > 1) {
- true_len = (ulint) cs->cset->well_formed_len(cs,
+ true_len = (ulint) my_well_formed_length(cs,
(const char *) data,
(const char *) data + len,
- (uint) (key_len /
- cs->mbmaxlen),
+ (uint) (key_len /
+ cs->mbmaxlen),
&error);
}
@@ -6995,15 +7186,14 @@ wsrep_store_key_val_for_row(
memcpy(sorted, data, true_len);
true_len = wsrep_innobase_mysql_sort(
- mysql_type, cs->number, sorted, true_len,
+ mysql_type, cs->number, sorted, true_len,
REC_VERSION_56_MAX_INDEX_COL_LEN);
-
if (wsrep_protocol_version > 1) {
- /* Note that we always reserve the maximum possible
- length of the true VARCHAR in the key value, though
- only len first bytes after the 2 length bytes contain
- actual data. The rest of the space was reset to zero
- in the bzero() call above. */
+ /* Note that we always reserve the maximum possible
+ length of the true VARCHAR in the key value, though
+ only len first bytes after the 2 length bytes contain
+ actual data. The rest of the space was reset to zero
+ in the bzero() call above. */
if (true_len > buff_space) {
fprintf (stderr,
"WSREP: key truncated: %s\n",
@@ -7011,11 +7201,11 @@ wsrep_store_key_val_for_row(
true_len = buff_space;
}
memcpy(buff, sorted, true_len);
- buff += true_len;
+ buff += true_len;
buff_space -= true_len;
- } else {
- buff += key_len;
- }
+ } else {
+ buff += key_len;
+ }
} else if (mysql_type == MYSQL_TYPE_TINY_BLOB
|| mysql_type == MYSQL_TYPE_MEDIUM_BLOB
|| mysql_type == MYSQL_TYPE_BLOB
@@ -7065,12 +7255,12 @@ wsrep_store_key_val_for_row(
the true length of the key */
if (blob_len > 0 && cs->mbmaxlen > 1) {
- true_len = (ulint) cs->cset->well_formed_len(cs,
+ true_len = (ulint) my_well_formed_length(cs,
(const char *) blob_data,
(const char *) blob_data
+ blob_len,
- (uint) (key_len /
- cs->mbmaxlen),
+ (uint) (key_len /
+ cs->mbmaxlen),
&error);
}
@@ -7090,7 +7280,7 @@ wsrep_store_key_val_for_row(
/* Note that we always reserve the maximum possible
length of the BLOB prefix in the key value. */
- if (wsrep_protocol_version > 1) {
+ if (wsrep_protocol_version > 1) {
if (true_len > buff_space) {
fprintf (stderr,
"WSREP: key truncated: %s\n",
@@ -7154,12 +7344,12 @@ wsrep_store_key_val_for_row(
if (key_len > 0 && cs->mbmaxlen > 1) {
true_len = (ulint)
- cs->cset->well_formed_len(cs,
+ my_well_formed_length(cs,
(const char *)src_start,
(const char *)src_start
+ key_len,
- (uint) (key_len /
- cs->mbmaxlen),
+ (uint) (key_len /
+ cs->mbmaxlen),
&error);
}
memcpy(sorted, src_start, true_len);
@@ -7187,285 +7377,8 @@ wsrep_store_key_val_for_row(
DBUG_RETURN((uint)(buff - buff_start));
}
#endif /* WITH_WSREP */
-
-/*******************************************************************//**
-Stores a key value for a row to a buffer.
-@return key value length as stored in buff */
-UNIV_INTERN
-uint
-ha_innobase::store_key_val_for_row(
-/*===============================*/
- uint keynr, /*!< in: key number */
- char* buff, /*!< in/out: buffer for the key value (in MySQL
- format) */
- uint buff_len,/*!< in: buffer length */
- const uchar* record)/*!< in: row in MySQL format */
-{
- KEY* key_info = table->key_info + keynr;
- KEY_PART_INFO* key_part = key_info->key_part;
- KEY_PART_INFO* end =
- key_part + key_info->user_defined_key_parts;
- char* buff_start = buff;
- enum_field_types mysql_type;
- Field* field;
- ibool is_null;
-
- DBUG_ENTER("store_key_val_for_row");
-
- /* The format for storing a key field in MySQL is the following:
-
- 1. If the column can be NULL, then in the first byte we put 1 if the
- field value is NULL, 0 otherwise.
-
- 2. If the column is of a BLOB type (it must be a column prefix field
- in this case), then we put the length of the data in the field to the
- next 2 bytes, in the little-endian format. If the field is SQL NULL,
- then these 2 bytes are set to 0. Note that the length of data in the
- field is <= column prefix length.
-
- 3. In a column prefix field, prefix_len next bytes are reserved for
- data. In a normal field the max field length next bytes are reserved
- for data. For a VARCHAR(n) the max field length is n. If the stored
- value is the SQL NULL then these data bytes are set to 0.
-
- 4. We always use a 2 byte length for a true >= 5.0.3 VARCHAR. Note that
- in the MySQL row format, the length is stored in 1 or 2 bytes,
- depending on the maximum allowed length. But in the MySQL key value
- format, the length always takes 2 bytes.
-
- We have to zero-fill the buffer so that MySQL is able to use a
- simple memcmp to compare two key values to determine if they are
- equal. MySQL does this to compare contents of two 'ref' values. */
-
- memset(buff, 0, buff_len);
-
- for (; key_part != end; key_part++) {
- is_null = FALSE;
-
- if (key_part->null_bit) {
- if (record[key_part->null_offset]
- & key_part->null_bit) {
- *buff = 1;
- is_null = TRUE;
- } else {
- *buff = 0;
- }
- buff++;
- }
-
- field = key_part->field;
- mysql_type = field->type();
-
- if (mysql_type == MYSQL_TYPE_VARCHAR) {
- /* >= 5.0.3 true VARCHAR */
- ulint lenlen;
- ulint len;
- const byte* data;
- ulint key_len;
- ulint true_len;
- const CHARSET_INFO* cs;
- int error=0;
-
- key_len = key_part->length;
-
- if (is_null) {
- buff += key_len + 2;
-
- continue;
- }
- cs = field->charset();
-
- lenlen = (ulint)
- (((Field_varstring*) field)->length_bytes);
-
- data = row_mysql_read_true_varchar(&len,
- (byte*) (record
- + (ulint) get_field_offset(table, field)),
- lenlen);
-
- true_len = len;
-
- /* For multi byte character sets we need to calculate
- the true length of the key */
-
- if (len > 0 && cs->mbmaxlen > 1) {
- true_len = (ulint) cs->cset->well_formed_len(cs,
- (const char*) data,
- (const char*) data + len,
- (uint) (key_len / cs->mbmaxlen),
- &error);
- }
-
- /* In a column prefix index, we may need to truncate
- the stored value: */
-
- if (true_len > key_len) {
- true_len = key_len;
- }
-
- /* The length in a key value is always stored in 2
- bytes */
-
- row_mysql_store_true_var_len((byte*) buff, true_len, 2);
- buff += 2;
-
- memcpy(buff, data, true_len);
-
- /* Note that we always reserve the maximum possible
- length of the true VARCHAR in the key value, though
- only len first bytes after the 2 length bytes contain
- actual data. The rest of the space was reset to zero
- in the memset() call above. */
-
- buff += key_len;
-
- } else if (mysql_type == MYSQL_TYPE_TINY_BLOB
- || mysql_type == MYSQL_TYPE_MEDIUM_BLOB
- || mysql_type == MYSQL_TYPE_BLOB
- || mysql_type == MYSQL_TYPE_LONG_BLOB
- /* MYSQL_TYPE_GEOMETRY data is treated
- as BLOB data in innodb. */
- || mysql_type == MYSQL_TYPE_GEOMETRY) {
-
- const CHARSET_INFO* cs;
- ulint key_len;
- ulint true_len;
- int error=0;
- ulint blob_len;
- const byte* blob_data;
-
- ut_a(key_part->key_part_flag & HA_PART_KEY_SEG);
-
- key_len = key_part->length;
-
- if (is_null) {
- buff += key_len + 2;
-
- continue;
- }
-
- cs = field->charset();
-
- blob_data = row_mysql_read_blob_ref(&blob_len,
- (byte*) (record
- + (ulint) get_field_offset(table, field)),
- (ulint) field->pack_length());
-
- true_len = blob_len;
-
- ut_a(get_field_offset(table, field)
- == key_part->offset);
-
- /* For multi byte character sets we need to calculate
- the true length of the key */
-
- if (blob_len > 0 && cs->mbmaxlen > 1) {
- true_len = (ulint) cs->cset->well_formed_len(cs,
- (const char*) blob_data,
- (const char*) blob_data
- + blob_len,
- (uint) (key_len / cs->mbmaxlen),
- &error);
- }
-
- /* All indexes on BLOB and TEXT are column prefix
- indexes, and we may need to truncate the data to be
- stored in the key value: */
-
- if (true_len > key_len) {
- true_len = key_len;
- }
-
- /* MySQL reserves 2 bytes for the length and the
- storage of the number is little-endian */
-
- innobase_write_to_2_little_endian(
- (byte*) buff, true_len);
- buff += 2;
-
- memcpy(buff, blob_data, true_len);
-
- /* Note that we always reserve the maximum possible
- length of the BLOB prefix in the key value. */
-
- buff += key_len;
- } else {
- /* Here we handle all other data types except the
- true VARCHAR, BLOB and TEXT. Note that the column
- value we store may be also in a column prefix
- index. */
-
- const CHARSET_INFO* cs = NULL;
- ulint true_len;
- ulint key_len;
- const uchar* src_start;
- int error=0;
- enum_field_types real_type;
-
- key_len = key_part->length;
-
- if (is_null) {
- buff += key_len;
-
- continue;
- }
-
- src_start = record + key_part->offset;
- real_type = field->real_type();
- true_len = key_len;
-
- /* Character set for the field is defined only
- to fields whose type is string and real field
- type is not enum or set. For these fields check
- if character set is multi byte. */
-
- if (real_type != MYSQL_TYPE_ENUM
- && real_type != MYSQL_TYPE_SET
- && ( mysql_type == MYSQL_TYPE_VAR_STRING
- || mysql_type == MYSQL_TYPE_STRING)) {
-
- cs = field->charset();
-
- /* For multi byte character sets we need to
- calculate the true length of the key */
-
- if (key_len > 0 && cs->mbmaxlen > 1) {
-
- true_len = (ulint)
- cs->cset->well_formed_len(cs,
- (const char*) src_start,
- (const char*) src_start
- + key_len,
- (uint) (key_len
- / cs->mbmaxlen),
- &error);
- }
- }
-
- memcpy(buff, src_start, true_len);
- buff += true_len;
-
- /* Pad the unused space with spaces. */
-
- if (true_len < key_len) {
- ulint pad_len = key_len - true_len;
- ut_a(cs != NULL);
- ut_a(!(pad_len % cs->mbminlen));
-
- cs->cset->fill(cs, buff, pad_len,
- 0x20 /* space */);
- buff += pad_len;
- }
- }
- }
-
- ut_a(buff <= buff_start + buff_len);
-
- DBUG_RETURN((uint)(buff - buff_start));
-}
-
/**************************************************************//**
-Determines if a field is needed in a prebuilt struct 'template'.
+Determines if a field is needed in a m_prebuilt struct 'template'.
@return field to use, or NULL if the field is not needed */
static
const Field*
@@ -7489,11 +7402,14 @@ build_template_needs_field(
dict_index_t* index, /*!< in: InnoDB index to use */
const TABLE* table, /*!< in: MySQL table object */
ulint i, /*!< in: field index in InnoDB table */
- ulint sql_idx) /*!< in: field index in SQL table */
+ ulint num_v) /*!< in: num virtual column so far */
{
- const Field* field = table->field[sql_idx];
+ const Field* field = table->field[i];
- ut_ad(index_contains == dict_index_contains_col_or_prefix(index, i));
+ if (!field->stored_in_db()
+ && ha_innobase::omits_virtual_cols(*table->s)) {
+ return NULL;
+ }
if (!index_contains) {
if (read_just_key) {
@@ -7508,17 +7424,17 @@ build_template_needs_field(
return(field);
}
- if (bitmap_is_set(table->read_set, static_cast<uint>(sql_idx))
- || bitmap_is_set(table->write_set, static_cast<uint>(sql_idx))) {
+ if (bitmap_is_set(table->read_set, static_cast<uint>(i))
+ || bitmap_is_set(table->write_set, static_cast<uint>(i))) {
/* This field is needed in the query */
return(field);
}
+ ut_ad(i >= num_v);
if (fetch_primary_key_cols
- && dict_table_col_in_clustered_key(index->table, i)) {
+ && dict_table_col_in_clustered_key(index->table, i - num_v)) {
/* This field is needed in the query */
-
return(field);
}
@@ -7528,7 +7444,7 @@ build_template_needs_field(
}
/**************************************************************//**
-Determines if a field is needed in a prebuilt struct 'template'.
+Determines if a field is needed in a m_prebuilt struct 'template'.
@return whether the field is needed for index condition pushdown */
inline
bool
@@ -7538,17 +7454,19 @@ build_template_needs_field_in_icp(
const row_prebuilt_t* prebuilt,/*!< in: row fetch template */
bool contains,/*!< in: whether the index contains
column i */
- ulint i) /*!< in: column number */
+ ulint i, /*!< in: column number */
+ bool is_virtual)
+ /*!< in: a virtual column or not */
{
- ut_ad(contains == dict_index_contains_col_or_prefix(index, i));
+ ut_ad(contains == dict_index_contains_col_or_prefix(index, i, is_virtual));
return(index == prebuilt->index
? contains
- : dict_index_contains_col_or_prefix(prebuilt->index, i));
+ : dict_index_contains_col_or_prefix(prebuilt->index, i, is_virtual));
}
/**************************************************************//**
-Adds a field to a prebuilt struct 'template'.
+Adds a field to a m_prebuilt struct 'template'.
@return the field template */
static
mysql_row_templ_t*
@@ -7559,94 +7477,107 @@ build_template_field(
dict_index_t* index, /*!< in: InnoDB index to use */
TABLE* table, /*!< in: MySQL table object */
const Field* field, /*!< in: field in MySQL table */
- ulint i) /*!< in: field index in InnoDB table */
+ ulint i, /*!< in: field index in InnoDB table */
+ ulint v_no) /*!< in: field index for virtual col */
{
mysql_row_templ_t* templ;
const dict_col_t* col;
- //ut_ad(field == table->field[i]);
ut_ad(clust_index->table == index->table);
- col = dict_table_get_nth_col(index->table, i);
-
templ = prebuilt->mysql_template + prebuilt->n_template++;
UNIV_MEM_INVALID(templ, sizeof *templ);
- templ->col_no = i;
- templ->clust_rec_field_no = dict_col_get_clust_pos(col, clust_index);
-
- /* If clustered index record field is not found, lets print out
- field names and all the rest to understand why field is not found. */
- if (templ->clust_rec_field_no == ULINT_UNDEFINED) {
- const char* tb_col_name = dict_table_get_col_name(clust_index->table, i);
- dict_field_t* field=NULL;
- size_t size = 0;
-
- for(ulint j=0; j < clust_index->n_user_defined_cols; j++) {
- dict_field_t* ifield = &(clust_index->fields[j]);
- if (ifield && !memcmp(tb_col_name, ifield->name,
- strlen(tb_col_name))) {
- field = ifield;
- break;
+ templ->is_virtual = !field->stored_in_db();
+
+ if (!templ->is_virtual) {
+ templ->col_no = i;
+ col = dict_table_get_nth_col(index->table, i);
+ templ->clust_rec_field_no = dict_col_get_clust_pos(
+ col, clust_index);
+ /* If clustered index record field is not found, lets print out
+ field names and all the rest to understand why field is not found. */
+ if (templ->clust_rec_field_no == ULINT_UNDEFINED) {
+ const char* tb_col_name = dict_table_get_col_name(clust_index->table, i);
+ dict_field_t* field=NULL;
+ size_t size = 0;
+
+ for(ulint j=0; j < clust_index->n_user_defined_cols; j++) {
+ dict_field_t* ifield = &(clust_index->fields[j]);
+ if (ifield && !memcmp(tb_col_name, ifield->name,
+ strlen(tb_col_name))) {
+ field = ifield;
+ break;
+ }
}
- }
- ib_logf(IB_LOG_LEVEL_INFO,
- "Looking for field %lu name %s from table %s",
- i,
- (tb_col_name ? tb_col_name : "NULL"),
- clust_index->table->name);
-
-
- for(ulint j=0; j < clust_index->n_user_defined_cols; j++) {
- dict_field_t* ifield = &(clust_index->fields[j]);
- ib_logf(IB_LOG_LEVEL_INFO,
- "InnoDB Table %s field %lu name %s",
- clust_index->table->name,
- j,
- (ifield ? ifield->name : "NULL"));
- }
-
- for(ulint j=0; j < table->s->stored_fields; j++) {
- ib_logf(IB_LOG_LEVEL_INFO,
- "MySQL table %s field %lu name %s",
- table->s->table_name.str,
- j,
- table->field[j]->field_name);
- }
-
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Clustered record field for column %lu"
- " not found table n_user_defined %d"
- " index n_user_defined %d"
- " InnoDB table %s field name %s"
- " MySQL table %s field name %s n_fields %d"
- " query %s",
- i,
- clust_index->n_user_defined_cols,
- clust_index->table->n_cols - DATA_N_SYS_COLS,
- clust_index->table->name,
- (field ? field->name : "NULL"),
- table->s->table_name.str,
- (tb_col_name ? tb_col_name : "NULL"),
- table->s->stored_fields,
- innobase_get_stmt(current_thd, &size));
+ ib::info() << "Looking for field " << i << " name "
+ << (tb_col_name ? tb_col_name : "NULL")
+ << " from table " << clust_index->table->name;
- ut_a(templ->clust_rec_field_no != ULINT_UNDEFINED);
- }
- templ->rec_field_is_prefix = FALSE;
- if (dict_index_is_clust(index)) {
- templ->rec_field_no = templ->clust_rec_field_no;
+ for(ulint j=0; j < clust_index->n_user_defined_cols; j++) {
+ dict_field_t* ifield = &(clust_index->fields[j]);
+ ib::info() << "InnoDB Table "
+ << clust_index->table->name
+ << "field " << j << " name "
+ << (ifield ? ifield->name() : "NULL");
+ }
+
+ for(ulint j=0; j < table->s->stored_fields; j++) {
+ ib::info() << "MySQL table "
+ << table->s->table_name.str
+ << " field " << j << " name "
+ << table->field[j]->field_name;
+ }
+
+ ib::fatal() << "Clustered record field for column " << i
+ << " not found table n_user_defined "
+ << clust_index->n_user_defined_cols
+ << " index n_user_defined "
+ << clust_index->table->n_cols - DATA_N_SYS_COLS
+ << " InnoDB table "
+ << clust_index->table->name
+ << " field name "
+ << (field ? field->name() : "NULL")
+ << " MySQL table "
+ << table->s->table_name.str
+ << " field name "
+ << (tb_col_name ? tb_col_name : "NULL")
+ << " n_fields "
+ << table->s->stored_fields
+ << " query "
+ << innobase_get_stmt_unsafe(current_thd, &size);
+ }
+ templ->rec_field_is_prefix = FALSE;
templ->rec_prefix_field_no = ULINT_UNDEFINED;
- } else {
- /* If we're in a secondary index, keep track
- * of the original index position even if this
- * is just a prefix index; we will use this
- * later to avoid a cluster index lookup in
- * some cases.*/
- templ->rec_field_no = dict_index_get_nth_col_pos(index, i,
+ if (dict_index_is_clust(index)) {
+ templ->rec_field_no = templ->clust_rec_field_no;
+ } else {
+ /* If we're in a secondary index, keep track
+ * of the original index position even if this
+ * is just a prefix index; we will use this
+ * later to avoid a cluster index lookup in
+ * some cases.*/
+
+ templ->rec_field_no = dict_index_get_nth_col_pos(index, i,
&templ->rec_prefix_field_no);
+ }
+ } else {
+ DBUG_ASSERT(!ha_innobase::omits_virtual_cols(*table->s));
+ col = &dict_table_get_nth_v_col(index->table, v_no)->m_col;
+ templ->clust_rec_field_no = v_no;
+ templ->rec_prefix_field_no = ULINT_UNDEFINED;
+
+ if (dict_index_is_clust(index)) {
+ templ->rec_field_no = templ->clust_rec_field_no;
+ } else {
+ templ->rec_field_no
+ = dict_index_get_nth_col_or_prefix_pos(
+ index, v_no, FALSE, true,
+ &templ->rec_prefix_field_no);
+ }
+ templ->icp_rec_field_no = ULINT_UNDEFINED;
}
if (field->real_maybe_null()) {
@@ -7658,8 +7589,8 @@ build_template_field(
templ->mysql_null_bit_mask = 0;
}
- templ->mysql_col_offset = (ulint) get_field_offset(table, field);
+ templ->mysql_col_offset = (ulint) get_field_offset(table, field);
templ->mysql_col_len = (ulint) field->pack_length();
templ->type = col->mtype;
templ->mysql_type = (ulint) field->type();
@@ -7667,6 +7598,8 @@ build_template_field(
if (templ->mysql_type == DATA_MYSQL_TRUE_VARCHAR) {
templ->mysql_length_bytes = (ulint)
(((Field_varstring*) field)->length_bytes);
+ } else {
+ templ->mysql_length_bytes = 0;
}
templ->charset = dtype_get_charset_coll(col->prtype);
@@ -7686,13 +7619,18 @@ build_template_field(
}
}
+ /* For spatial index, we need to access cluster index. */
+ if (dict_index_is_spatial(index)) {
+ prebuilt->need_to_access_clustered = TRUE;
+ }
+
if (prebuilt->mysql_prefix_len < templ->mysql_col_offset
+ templ->mysql_col_len) {
prebuilt->mysql_prefix_len = templ->mysql_col_offset
+ templ->mysql_col_len;
}
- if (templ->type == DATA_BLOB) {
+ if (DATA_LARGE_MTYPE(templ->type)) {
prebuilt->templ_contains_blob = TRUE;
}
@@ -7700,9 +7638,9 @@ build_template_field(
}
/**************************************************************//**
-Builds a 'template' to the prebuilt struct. The template is used in fast
+Builds a 'template' to the m_prebuilt struct. The template is used in fast
retrieval of just those column values MySQL needs in its processing. */
-UNIV_INTERN
+
void
ha_innobase::build_template(
/*========================*/
@@ -7711,25 +7649,23 @@ ha_innobase::build_template(
{
dict_index_t* index;
dict_index_t* clust_index;
- ulint n_stored_fields;
ibool fetch_all_in_key = FALSE;
ibool fetch_primary_key_cols = FALSE;
- ulint i, sql_idx;
- if (prebuilt->select_lock_type == LOCK_X) {
+ if (m_prebuilt->select_lock_type == LOCK_X) {
/* We always retrieve the whole clustered index record if we
use exclusive row level locks, for example, if the read is
done in an UPDATE statement. */
whole_row = true;
} else if (!whole_row) {
- if (prebuilt->hint_need_to_fetch_extra_cols
+ if (m_prebuilt->hint_need_to_fetch_extra_cols
== ROW_RETRIEVE_ALL_COLS) {
/* We know we must at least fetch all columns in the
key, or all columns in the table */
- if (prebuilt->read_just_key) {
+ if (m_prebuilt->read_just_key) {
/* MySQL has instructed us that it is enough
to fetch the columns in the key; looks like
MySQL can set this flag also when there is
@@ -7741,68 +7677,80 @@ ha_innobase::build_template(
} else {
whole_row = true;
}
- } else if (prebuilt->hint_need_to_fetch_extra_cols
+ } else if (m_prebuilt->hint_need_to_fetch_extra_cols
== ROW_RETRIEVE_PRIMARY_KEY) {
/* We must at least fetch all primary key cols. Note
that if the clustered index was internally generated
by InnoDB on the row id (no primary key was
defined), then row_search_for_mysql() will always
retrieve the row id to a special buffer in the
- prebuilt struct. */
+ m_prebuilt struct. */
fetch_primary_key_cols = TRUE;
}
}
- clust_index = dict_table_get_first_index(prebuilt->table);
+ clust_index = dict_table_get_first_index(m_prebuilt->table);
- index = whole_row ? clust_index : prebuilt->index;
+ index = whole_row ? clust_index : m_prebuilt->index;
- prebuilt->need_to_access_clustered = (index == clust_index);
+ m_prebuilt->need_to_access_clustered = (index == clust_index);
- /* Either prebuilt->index should be a secondary index, or it
+ /* Either m_prebuilt->index should be a secondary index, or it
should be the clustered index. */
ut_ad(dict_index_is_clust(index) == (index == clust_index));
/* Below we check column by column if we need to access
the clustered index. */
- n_stored_fields= (ulint)table->s->stored_fields; /* number of stored columns */
+ const bool skip_virtual = omits_virtual_cols(*table_share);
+ const ulint n_fields = table_share->fields;
- if (!prebuilt->mysql_template) {
- prebuilt->mysql_template = (mysql_row_templ_t*)
- mem_alloc(n_stored_fields * sizeof(mysql_row_templ_t));
+ if (!m_prebuilt->mysql_template) {
+ m_prebuilt->mysql_template = (mysql_row_templ_t*)
+ ut_malloc_nokey(n_fields * sizeof(mysql_row_templ_t));
}
- prebuilt->template_type = whole_row
+ m_prebuilt->template_type = whole_row
? ROW_MYSQL_WHOLE_ROW : ROW_MYSQL_REC_FIELDS;
- prebuilt->null_bitmap_len = table->s->null_bytes;
+ m_prebuilt->null_bitmap_len = table->s->null_bytes;
- /* Prepare to build prebuilt->mysql_template[]. */
- prebuilt->templ_contains_blob = FALSE;
- prebuilt->mysql_prefix_len = 0;
- prebuilt->n_template = 0;
- prebuilt->idx_cond_n_cols = 0;
+ /* Prepare to build m_prebuilt->mysql_template[]. */
+ m_prebuilt->templ_contains_blob = FALSE;
+ m_prebuilt->mysql_prefix_len = 0;
+ m_prebuilt->n_template = 0;
+ m_prebuilt->idx_cond_n_cols = 0;
/* Note that in InnoDB, i is the column number in the table.
MySQL calls columns 'fields'. */
- if (active_index != MAX_KEY && active_index == pushed_idx_cond_keyno) {
- /* Push down an index condition or an end_range check. */
- for (i = 0, sql_idx = 0; i < n_stored_fields; i++, sql_idx++) {
-
- while (!table->field[sql_idx]->stored_in_db) {
- sql_idx++;
- }
+ ulint num_v = 0;
- const ibool index_contains
- = dict_index_contains_col_or_prefix(index, i);
+ if (active_index != MAX_KEY
+ && active_index == pushed_idx_cond_keyno) {
+ /* Push down an index condition or an end_range check. */
+ for (ulint i = 0; i < n_fields; i++) {
+ const Field* field = table->field[i];
+ const bool is_v = !field->stored_in_db();
+ if (is_v && skip_virtual) {
+ num_v++;
+ continue;
+ }
+ ibool index_contains
+ = dict_index_contains_col_or_prefix(
+ index, is_v ? num_v : i - num_v, is_v);
+ if (is_v && index_contains) {
+ m_prebuilt->n_template = 0;
+ num_v = 0;
+ goto no_icp;
+ }
/* Test if an end_range or an index condition
refers to the field. Note that "index" and
"index_contains" may refer to the clustered index.
- Index condition pushdown is relative to prebuilt->index
- (the index that is being looked up first). */
+ Index condition pushdown is relative to
+ m_prebuilt->index (the index that is being
+ looked up first). */
/* When join_read_always_key() invokes this
code via handler::ha_index_init() and
@@ -7813,43 +7761,47 @@ ha_innobase::build_template(
field->part_of_key.is_set(active_index)
which would be acceptable if end_range==NULL. */
if (build_template_needs_field_in_icp(
- index, prebuilt, index_contains, i)) {
- /* Needed in ICP */
- const Field* field;
- mysql_row_templ_t* templ;
-
- if (whole_row) {
- field = table->field[sql_idx];
- } else {
+ index, m_prebuilt, index_contains,
+ is_v ? num_v : i - num_v, is_v)) {
+ if (!whole_row) {
field = build_template_needs_field(
index_contains,
- prebuilt->read_just_key,
+ m_prebuilt->read_just_key,
fetch_all_in_key,
fetch_primary_key_cols,
- index, table, i, sql_idx);
+ index, table, i, num_v);
if (!field) {
+ if (is_v) {
+ num_v++;
+ }
continue;
}
}
- templ = build_template_field(
- prebuilt, clust_index, index,
- table, field, i);
- prebuilt->idx_cond_n_cols++;
- ut_ad(prebuilt->idx_cond_n_cols
- == prebuilt->n_template);
+ ut_ad(!is_v);
+
+ mysql_row_templ_t* templ= build_template_field(
+ m_prebuilt, clust_index, index,
+ table, field, i - num_v, 0);
+
+ ut_ad(!templ->is_virtual);
- if (index == prebuilt->index) {
+ m_prebuilt->idx_cond_n_cols++;
+ ut_ad(m_prebuilt->idx_cond_n_cols
+ == m_prebuilt->n_template);
+
+ if (index == m_prebuilt->index) {
templ->icp_rec_field_no
= templ->rec_field_no;
} else {
templ->icp_rec_field_no
= dict_index_get_nth_col_pos(
- prebuilt->index, i,
- NULL);
+ m_prebuilt->index,
+ i - num_v,
+ &templ->rec_prefix_field_no);
}
- if (dict_index_is_clust(prebuilt->index)) {
+ if (dict_index_is_clust(m_prebuilt->index)) {
ut_ad(templ->icp_rec_field_no
!= ULINT_UNDEFINED);
/* If the primary key includes
@@ -7860,7 +7812,7 @@ ha_innobase::build_template(
off-page (externally stored)
columns. */
if (templ->icp_rec_field_no
- < prebuilt->index->n_uniq) {
+ < m_prebuilt->index->n_uniq) {
/* This is a key column;
all set. */
continue;
@@ -7876,7 +7828,9 @@ ha_innobase::build_template(
templ->icp_rec_field_no
= dict_index_get_nth_col_or_prefix_pos(
- prebuilt->index, i, TRUE, NULL);
+ m_prebuilt->index, i - num_v,
+ true, false,
+ &templ->rec_prefix_field_no);
ut_ad(templ->icp_rec_field_no
!= ULINT_UNDEFINED);
@@ -7896,91 +7850,147 @@ ha_innobase::build_template(
we were unable to use an accurate condition
for end_range in the "if" condition above,
and the following assertion would fail.
- ut_ad(!dict_index_is_clust(prebuilt->index)
+ ut_ad(!dict_index_is_clust(m_prebuilt->index)
|| templ->rec_field_no
- < prebuilt->index->n_uniq);
+ < m_prebuilt->index->n_uniq);
*/
}
+
+ if (is_v) {
+ num_v++;
+ }
}
- ut_ad(prebuilt->idx_cond_n_cols > 0);
- ut_ad(prebuilt->idx_cond_n_cols == prebuilt->n_template);
+ ut_ad(m_prebuilt->idx_cond_n_cols > 0);
+ ut_ad(m_prebuilt->idx_cond_n_cols == m_prebuilt->n_template);
+
+ num_v = 0;
/* Include the fields that are not needed in index condition
pushdown. */
- for (i = 0, sql_idx = 0; i < n_stored_fields; i++, sql_idx++) {
-
- while (!table->field[sql_idx]->stored_in_db) {
- sql_idx++;
- }
+ for (ulint i = 0; i < n_fields; i++) {
+ const Field* field = table->field[i];
+ const bool is_v = !field->stored_in_db();
+ if (is_v && skip_virtual) {
+ num_v++;
+ continue;
+ }
- const ibool index_contains
- = dict_index_contains_col_or_prefix(index, i);
+ ibool index_contains
+ = dict_index_contains_col_or_prefix(
+ index, is_v ? num_v : i - num_v, is_v);
if (!build_template_needs_field_in_icp(
- index, prebuilt, index_contains, i)) {
+ index, m_prebuilt, index_contains,
+ is_v ? num_v : i - num_v, is_v)) {
/* Not needed in ICP */
- const Field* field;
-
- if (whole_row) {
- field = table->field[sql_idx];
- } else {
+ if (!whole_row) {
field = build_template_needs_field(
index_contains,
- prebuilt->read_just_key,
+ m_prebuilt->read_just_key,
fetch_all_in_key,
fetch_primary_key_cols,
- index, table, i, sql_idx);
+ index, table, i, num_v);
if (!field) {
+ if (is_v) {
+ num_v++;
+ }
continue;
}
}
- build_template_field(prebuilt,
- clust_index, index,
- table, field, i);
+ ut_d(mysql_row_templ_t* templ =)
+ build_template_field(
+ m_prebuilt, clust_index, index,
+ table, field, i - num_v, num_v);
+ ut_ad(templ->is_virtual == (ulint)is_v);
+
+ if (is_v) {
+ num_v++;
+ }
}
}
- prebuilt->idx_cond = this;
+ m_prebuilt->idx_cond = this;
} else {
+no_icp:
/* No index condition pushdown */
- prebuilt->idx_cond = NULL;
+ m_prebuilt->idx_cond = NULL;
+ ut_ad(num_v == 0);
- for (i = 0, sql_idx = 0; i < n_stored_fields; i++, sql_idx++) {
- const Field* field;
-
- while (!table->field[sql_idx]->stored_in_db) {
- sql_idx++;
- }
+ for (ulint i = 0; i < n_fields; i++) {
+ const Field* field = table->field[i];
+ const bool is_v = !field->stored_in_db();
if (whole_row) {
- field = table->field[sql_idx];
+ if (is_v && skip_virtual) {
+ num_v++;
+ continue;
+ }
+ /* Even this is whole_row, if the seach is
+ on a virtual column, and read_just_key is
+ set, and field is not in this index, we
+ will not try to fill the value since they
+ are not stored in such index nor in the
+ cluster index. */
+ if (is_v
+ && m_prebuilt->read_just_key
+ && !dict_index_contains_col_or_prefix(
+ m_prebuilt->index, num_v, true))
+ {
+ /* Turn off ROW_MYSQL_WHOLE_ROW */
+ m_prebuilt->template_type =
+ ROW_MYSQL_REC_FIELDS;
+ num_v++;
+ continue;
+ }
} else {
+ ibool contain;
+
+ if (!is_v) {
+ contain = dict_index_contains_col_or_prefix(
+ index, i - num_v,
+ false);
+ } else if (skip_virtual
+ || dict_index_is_clust(index)) {
+ num_v++;
+ continue;
+ } else {
+ contain = dict_index_contains_col_or_prefix(
+ index, num_v, true);
+ }
+
field = build_template_needs_field(
- dict_index_contains_col_or_prefix(
- index, i),
- prebuilt->read_just_key,
+ contain,
+ m_prebuilt->read_just_key,
fetch_all_in_key,
fetch_primary_key_cols,
- index, table, i, sql_idx);
+ index, table, i, num_v);
if (!field) {
+ if (is_v) {
+ num_v++;
+ }
continue;
}
}
- build_template_field(prebuilt, clust_index, index,
- table, field, i);
+ ut_d(mysql_row_templ_t* templ =)
+ build_template_field(
+ m_prebuilt, clust_index, index,
+ table, field, i - num_v, num_v);
+ ut_ad(templ->is_virtual == (ulint)is_v);
+ if (is_v) {
+ num_v++;
+ }
}
}
- if (index != clust_index && prebuilt->need_to_access_clustered) {
+ if (index != clust_index && m_prebuilt->need_to_access_clustered) {
/* Change rec_field_no's to correspond to the clustered index
record */
- for (i = 0; i < prebuilt->n_template; i++) {
-
+ for (ulint i = 0; i < m_prebuilt->n_template; i++) {
mysql_row_templ_t* templ
- = &prebuilt->mysql_template[i];
+ = &m_prebuilt->mysql_template[i];
templ->rec_field_no = templ->clust_rec_field_no;
}
@@ -7993,8 +8003,8 @@ binlogging. We need to eliminate the non-determinism that will arise in
INSERT ... SELECT type of statements, since MySQL binlog only stores the
min value of the autoinc interval. Once that is fixed we can get rid of
the special lock handling.
-@return DB_SUCCESS if all OK else error code */
-UNIV_INTERN
+@return DB_SUCCESS if all OK else error code */
+
dberr_t
ha_innobase::innobase_lock_autoinc(void)
/*====================================*/
@@ -8007,7 +8017,7 @@ ha_innobase::innobase_lock_autoinc(void)
switch (innobase_autoinc_lock_mode) {
case AUTOINC_NO_LOCKING:
/* Acquire only the AUTOINC mutex. */
- dict_table_autoinc_lock(prebuilt->table);
+ dict_table_autoinc_lock(m_prebuilt->table);
break;
case AUTOINC_NEW_STYLE_LOCKING:
@@ -8016,21 +8026,20 @@ ha_innobase::innobase_lock_autoinc(void)
transaction has already acquired the AUTOINC lock on
behalf of a LOAD FILE or INSERT ... SELECT etc. type of
statement. */
- if (thd_sql_command(user_thd) == SQLCOM_INSERT
- || thd_sql_command(user_thd) == SQLCOM_REPLACE
- || thd_sql_command(user_thd) == SQLCOM_END // RBR event
+ if (thd_sql_command(m_user_thd) == SQLCOM_INSERT
+ || thd_sql_command(m_user_thd) == SQLCOM_REPLACE
+ || thd_sql_command(m_user_thd) == SQLCOM_END // RBR event
) {
- dict_table_t* ib_table = prebuilt->table;
/* Acquire the AUTOINC mutex. */
- dict_table_autoinc_lock(ib_table);
+ dict_table_autoinc_lock(m_prebuilt->table);
/* We need to check that another transaction isn't
already holding the AUTOINC lock on the table. */
- if (ib_table->n_waiting_or_granted_auto_inc_locks) {
+ if (m_prebuilt->table->n_waiting_or_granted_auto_inc_locks) {
/* Release the mutex to avoid deadlocks and
fall back to old style locking. */
- dict_table_autoinc_unlock(ib_table);
+ dict_table_autoinc_unlock(m_prebuilt->table);
} else {
/* Do not fall back to old style locking. */
break;
@@ -8041,12 +8050,12 @@ ha_innobase::innobase_lock_autoinc(void)
case AUTOINC_OLD_STYLE_LOCKING:
DBUG_EXECUTE_IF("die_if_autoinc_old_lock_style_used",
ut_ad(0););
- error = row_lock_table_autoinc_for_mysql(prebuilt);
+ error = row_lock_table_autoinc_for_mysql(m_prebuilt);
if (error == DB_SUCCESS) {
/* Acquire the AUTOINC mutex. */
- dict_table_autoinc_lock(prebuilt->table);
+ dict_table_autoinc_lock(m_prebuilt->table);
}
break;
@@ -8058,33 +8067,10 @@ ha_innobase::innobase_lock_autoinc(void)
}
/********************************************************************//**
-Reset the autoinc value in the table.
-@return DB_SUCCESS if all went well else error code */
-UNIV_INTERN
-dberr_t
-ha_innobase::innobase_reset_autoinc(
-/*================================*/
- ulonglong autoinc) /*!< in: value to store */
-{
- dberr_t error;
-
- error = innobase_lock_autoinc();
-
- if (error == DB_SUCCESS) {
-
- dict_table_autoinc_initialize(prebuilt->table, autoinc);
-
- dict_table_autoinc_unlock(prebuilt->table);
- }
-
- return(error);
-}
-
-/********************************************************************//**
Store the autoinc value in the table. The autoinc value is only set if
it's greater than the existing autoinc value in the table.
-@return DB_SUCCESS if all went well else error code */
-UNIV_INTERN
+@return DB_SUCCESS if all went well else error code */
+
dberr_t
ha_innobase::innobase_set_max_autoinc(
/*==================================*/
@@ -8096,9 +8082,9 @@ ha_innobase::innobase_set_max_autoinc(
if (error == DB_SUCCESS) {
- dict_table_autoinc_update_if_greater(prebuilt->table, auto_inc);
+ dict_table_autoinc_update_if_greater(m_prebuilt->table, auto_inc);
- dict_table_autoinc_unlock(prebuilt->table);
+ dict_table_autoinc_unlock(m_prebuilt->table);
}
return(error);
@@ -8107,38 +8093,38 @@ ha_innobase::innobase_set_max_autoinc(
/********************************************************************//**
Stores a row in an InnoDB database, to the table specified in this
handle.
-@return error code */
-UNIV_INTERN
+@return error code */
+
int
ha_innobase::write_row(
/*===================*/
uchar* record) /*!< in: a row in MySQL format */
{
dberr_t error;
- int error_result= 0;
- ibool auto_inc_used= FALSE;
#ifdef WITH_WSREP
- ibool auto_inc_inserted= FALSE; /* if NULL was inserted */
+ bool wsrep_auto_inc_inserted= false;
#endif
- ulint sql_command;
- trx_t* trx = thd_to_trx(user_thd);
+ int error_result = 0;
+ bool auto_inc_used = false;
DBUG_ENTER("ha_innobase::write_row");
+ trx_t* trx = thd_to_trx(m_user_thd);
+
+ /* Validation checks before we commence write_row operation. */
if (high_level_read_only) {
ib_senderrf(ha_thd(), IB_LOG_LEVEL_WARN, ER_READ_ONLY_MODE);
DBUG_RETURN(HA_ERR_TABLE_READONLY);
- } else if (prebuilt->trx != trx) {
- sql_print_error("The transaction object for the table handle "
- "is at %p, but for the current thread it is at "
- "%p",
- (const void*) prebuilt->trx, (const void*) trx);
-
- fputs("InnoDB: Dump of 200 bytes around prebuilt: ", stderr);
- ut_print_buf(stderr, ((const byte*) prebuilt) - 100, 200);
- fputs("\n"
- "InnoDB: Dump of 200 bytes around ha_data: ",
- stderr);
+ } else if (m_prebuilt->trx != trx) {
+
+ ib::error() << "The transaction object for the table handle is"
+ " at " << static_cast<const void*>(m_prebuilt->trx)
+ << ", but for the current thread it is at "
+ << static_cast<const void*>(trx);
+
+ fputs("InnoDB: Dump of 200 bytes around m_prebuilt: ", stderr);
+ ut_print_buf(stderr, ((const byte*) m_prebuilt) - 100, 200);
+ fputs("\nInnoDB: Dump of 200 bytes around ha_data: ", stderr);
ut_print_buf(stderr, ((const byte*) trx) - 100, 200);
putc('\n', stderr);
ut_error;
@@ -8146,143 +8132,27 @@ ha_innobase::write_row(
++trx->will_lock;
}
- ha_statistic_increment(&SSV::ha_write_count);
-
- sql_command = thd_sql_command(user_thd);
-
- if (num_write_row >= 10000
- && (sql_command == SQLCOM_ALTER_TABLE
- || sql_command == SQLCOM_OPTIMIZE
- || sql_command == SQLCOM_CREATE_INDEX
#ifdef WITH_WSREP
- || (sql_command == SQLCOM_LOAD &&
- trx->is_wsrep() && wsrep_load_data_splitting &&
- !thd_test_options(
- user_thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN))
-#endif /* WITH_WSREP */
- || sql_command == SQLCOM_DROP_INDEX)) {
-#ifdef WITH_WSREP
- if (sql_command == SQLCOM_LOAD && trx->is_wsrep()) {
- WSREP_DEBUG("forced trx split for LOAD: %s",
- wsrep_thd_query(user_thd));
- }
-#endif /* WITH_WSREP */
- /* ALTER TABLE is COMMITted at every 10000 copied rows.
- The IX table lock for the original table has to be re-issued.
- As this method will be called on a temporary table where the
- contents of the original table is being copied to, it is
- a bit tricky to determine the source table. The cursor
- position in the source table need not be adjusted after the
- intermediate COMMIT, since writes by other transactions are
- being blocked by a MySQL table lock TL_WRITE_ALLOW_READ. */
-
- dict_table_t* src_table;
- enum lock_mode mode;
-
- num_write_row = 0;
-
- /* Commit the transaction. This will release the table
- locks, so they have to be acquired again. */
-
- /* Altering an InnoDB table */
- /* Get the source table. */
- src_table = lock_get_src_table(
- prebuilt->trx, prebuilt->table, &mode);
- if (!src_table) {
-no_commit:
- /* Unknown situation: do not commit */
- /*
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: ALTER TABLE is holding lock"
- " on %lu tables!\n",
- prebuilt->trx->mysql_n_tables_locked);
- */
- ;
- } else if (src_table == prebuilt->table) {
-#ifdef WITH_WSREP
- if (sql_command == SQLCOM_LOAD && trx->is_wsrep() &&
- wsrep_load_data_splitting &&
- !thd_test_options(user_thd,
- OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN))
- {
- switch (wsrep_run_wsrep_commit(user_thd, 1))
- {
- case WSREP_TRX_OK:
- break;
- case WSREP_TRX_SIZE_EXCEEDED:
- case WSREP_TRX_CERT_FAIL:
- case WSREP_TRX_ERROR:
- DBUG_RETURN(1);
- }
-
- if (binlog_hton->commit(binlog_hton, user_thd, 1))
- DBUG_RETURN(1);
- wsrep_post_commit(user_thd, TRUE);
- }
-#endif /* WITH_WSREP */
- /* Source table is not in InnoDB format:
- no need to re-acquire locks on it. */
-
- /* Altering to InnoDB format */
- innobase_commit(ht, user_thd, 1);
- /* Note that this transaction is still active. */
- trx_register_for_2pc(prebuilt->trx);
- /* We will need an IX lock on the destination table. */
- prebuilt->sql_stat_start = TRUE;
- } else {
-#ifdef WITH_WSREP
- if (sql_command == SQLCOM_LOAD && trx->is_wsrep() &&
- wsrep_load_data_splitting &&
- !thd_test_options(user_thd,
- OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN))
- {
- switch (wsrep_run_wsrep_commit(user_thd, 1))
- {
- case WSREP_TRX_OK:
- break;
- case WSREP_TRX_SIZE_EXCEEDED:
- case WSREP_TRX_CERT_FAIL:
- case WSREP_TRX_ERROR:
- DBUG_RETURN(1);
- }
-
- if (binlog_hton->commit(binlog_hton, user_thd, 1))
- DBUG_RETURN(1);
- wsrep_post_commit(user_thd, TRUE);
- }
-#endif /* WITH_WSREP */
- /* Ensure that there are no other table locks than
- LOCK_IX and LOCK_AUTO_INC on the destination table. */
-
- if (!lock_is_table_exclusive(prebuilt->table,
- prebuilt->trx)) {
- goto no_commit;
- }
-
- /* Commit the transaction. This will release the table
- locks, so they have to be acquired again. */
- innobase_commit(ht, user_thd, 1);
- /* Note that this transaction is still active. */
- trx_register_for_2pc(prebuilt->trx);
- /* Re-acquire the table lock on the source table. */
- row_lock_table_for_mysql(prebuilt, src_table, mode);
- /* We will need an IX lock on the destination table. */
- prebuilt->sql_stat_start = TRUE;
- }
+ if (trx->is_wsrep() && wsrep_is_load_multi_commit(m_user_thd))
+ {
+ /* Note that this transaction is still active. */
+ trx_register_for_2pc(m_prebuilt->trx);
+ /* We will need an IX lock on the destination table. */
+ m_prebuilt->sql_stat_start = TRUE;
}
+#endif /* WITH_WSREP */
- num_write_row++;
-
- /* This is the case where the table has an auto-increment column */
+ /* Handling of Auto-Increment Columns. */
if (table->next_number_field && record == table->record[0]) {
/* Reset the error code before calling
innobase_get_auto_increment(). */
- prebuilt->autoinc_error = DB_SUCCESS;
+ m_prebuilt->autoinc_error = DB_SUCCESS;
#ifdef WITH_WSREP
- auto_inc_inserted= (table->next_number_field->val_int() == 0);
+ wsrep_auto_inc_inserted = trx->is_wsrep()
+ && wsrep_drupal_282555_workaround
+ && table->next_number_field->val_int() == 0;
#endif
if ((error_result = update_auto_increment())) {
@@ -8290,13 +8160,13 @@ no_commit:
/* Handle the case where the AUTOINC sub-system
failed during initialization. */
- if (prebuilt->autoinc_error == DB_UNSUPPORTED) {
+ if (m_prebuilt->autoinc_error == DB_UNSUPPORTED) {
error_result = ER_AUTOINC_READ_FAILED;
/* Set the error message to report too. */
my_error(ER_AUTOINC_READ_FAILED, MYF(0));
goto func_exit;
- } else if (prebuilt->autoinc_error != DB_SUCCESS) {
- error = prebuilt->autoinc_error;
+ } else if (m_prebuilt->autoinc_error != DB_SUCCESS) {
+ error = m_prebuilt->autoinc_error;
goto report_error;
}
@@ -8304,11 +8174,13 @@ no_commit:
goto func_exit;
}
- auto_inc_used = TRUE;
+ auto_inc_used = true;
}
- if (prebuilt->mysql_template == NULL
- || prebuilt->template_type != ROW_MYSQL_WHOLE_ROW) {
+ /* Prepare INSERT graph that will be executed for actual INSERT
+ (This is a one time operation) */
+ if (m_prebuilt->mysql_template == NULL
+ || m_prebuilt->template_type != ROW_MYSQL_WHOLE_ROW) {
/* Build the template used in converting quickly between
the two database formats */
@@ -8316,12 +8188,14 @@ no_commit:
build_template(true);
}
- innobase_srv_conc_enter_innodb(prebuilt->trx);
+ innobase_srv_conc_enter_innodb(m_prebuilt);
+
+ /* Execute insert graph that will result in actual insert. */
+ error = row_insert_for_mysql((byte*) record, m_prebuilt);
- error = row_insert_for_mysql((byte*) record, prebuilt);
- DEBUG_SYNC(user_thd, "ib_after_row_insert");
+ DEBUG_SYNC(m_user_thd, "ib_after_row_insert");
- /* Handle duplicate key errors */
+ /* Handling of errors related to auto-increment. */
if (auto_inc_used) {
ulonglong auto_inc;
ulonglong col_max_value;
@@ -8329,7 +8203,7 @@ no_commit:
/* Note the number of rows processed for this statement, used
by get_auto_increment() to determine the number of AUTO-INC
values to reserve. This is only useful for a mult-value INSERT
- and is a statement level counter.*/
+ and is a statement level counter. */
if (trx->n_autoinc_rows > 0) {
--trx->n_autoinc_rows;
}
@@ -8350,13 +8224,11 @@ no_commit:
must update the autoinc counter if we are performing
those statements. */
- switch (sql_command) {
+ switch (thd_sql_command(m_user_thd)) {
case SQLCOM_LOAD:
- if (trx->duplicates) {
-
- goto set_max_autoinc;
+ if (!trx->duplicates) {
+ break;
}
- break;
case SQLCOM_REPLACE:
case SQLCOM_INSERT_SELECT:
@@ -8369,25 +8241,23 @@ no_commit:
WSREP_DEBUG("DUPKEY error for autoinc\n"
"THD %ld, value %llu, off %llu inc %llu",
- thd_get_thread_id(current_thd),
+ thd_get_thread_id(m_user_thd),
auto_inc,
- prebuilt->autoinc_offset,
- prebuilt->autoinc_increment);
-
- if (trx->is_wsrep() &&
- auto_inc_inserted &&
- wsrep_drupal_282555_workaround &&
- wsrep_thd_retry_counter(user_thd) == 0 &&
- !thd_test_options(user_thd,
- OPTION_NOT_AUTOCOMMIT |
+ m_prebuilt->autoinc_offset,
+ m_prebuilt->autoinc_increment);
+
+ if (wsrep_auto_inc_inserted &&
+ wsrep_thd_retry_counter(m_user_thd) == 0 &&
+ !thd_test_options(m_user_thd,
+ OPTION_NOT_AUTOCOMMIT |
OPTION_BEGIN)) {
WSREP_DEBUG(
"retrying insert: %s",
- wsrep_thd_query(user_thd));
+ wsrep_thd_query(m_user_thd));
error= DB_SUCCESS;
wsrep_thd_set_conflict_state(
- user_thd, MUST_ABORT);
- innobase_srv_conc_exit_innodb(prebuilt->trx);
+ m_user_thd, MUST_ABORT);
+ innobase_srv_conc_exit_innodb(m_prebuilt);
/* jump straight to func exit over
* later wsrep hooks */
goto func_exit;
@@ -8405,9 +8275,9 @@ no_commit:
/* If the actual value inserted is greater than
the upper limit of the interval, then we try and
update the table upper limit. Note: last_value
- will be 0 if get_auto_increment() was not called.*/
+ will be 0 if get_auto_increment() was not called. */
- if (auto_inc >= prebuilt->autoinc_last_value) {
+ if (auto_inc >= m_prebuilt->autoinc_last_value) {
set_max_autoinc:
/* This should filter out the negative
values set explicitly by the user. */
@@ -8416,26 +8286,25 @@ set_max_autoinc:
ulonglong offset;
ulonglong increment;
dberr_t err;
-
#ifdef WITH_WSREP
/* Applier threads which are processing
ROW events and don't go through server
level autoinc processing, therefore
- prebuilt autoinc values don't get
+ m_prebuilt autoinc values don't get
properly assigned. Fetch values from
server side. */
if (trx->is_wsrep() &&
- wsrep_thd_exec_mode(user_thd) == REPL_RECV)
+ wsrep_thd_exec_mode(m_user_thd) == REPL_RECV)
{
wsrep_thd_auto_increment_variables(
- user_thd, &offset, &increment);
+ m_user_thd, &offset, &increment);
}
else
{
#endif /* WITH_WSREP */
- ut_a(prebuilt->autoinc_increment > 0);
- offset = prebuilt->autoinc_offset;
- increment = prebuilt->autoinc_increment;
+ ut_a(m_prebuilt->autoinc_increment > 0);
+ offset = m_prebuilt->autoinc_offset;
+ increment = m_prebuilt->autoinc_increment;
#ifdef WITH_WSREP
}
#endif /* WITH_WSREP */
@@ -8458,9 +8327,10 @@ set_max_autoinc:
}
}
- innobase_srv_conc_exit_innodb(prebuilt->trx);
+ innobase_srv_conc_exit_innodb(m_prebuilt);
report_error:
+ /* Cleanup and exit. */
if (error == DB_TABLESPACE_DELETED) {
ib_senderrf(
trx->mysql_thd, IB_LOG_LEVEL_ERROR,
@@ -8468,16 +8338,15 @@ report_error:
table->s->table_name.str);
}
- error_result = convert_error_code_to_mysql(error,
- prebuilt->table->flags,
- user_thd);
+ error_result = convert_error_code_to_mysql(
+ error, m_prebuilt->table->flags, m_user_thd);
#ifdef WITH_WSREP
if (!error_result && trx->is_wsrep()
- && wsrep_thd_exec_mode(user_thd) == LOCAL_STATE
- && !wsrep_consistency_check(user_thd)
- && !wsrep_thd_ignore_table(user_thd)) {
- if (wsrep_append_keys(user_thd, WSREP_KEY_EXCLUSIVE, record,
+ && wsrep_thd_exec_mode(m_user_thd) == LOCAL_STATE
+ && !wsrep_consistency_check(m_user_thd)
+ && !wsrep_thd_ignore_table(m_user_thd)) {
+ if (wsrep_append_keys(m_user_thd, WSREP_KEY_EXCLUSIVE, record,
NULL)) {
DBUG_PRINT("wsrep", ("row key failed"));
error_result = HA_ERR_INTERNAL_ERROR;
@@ -8496,32 +8365,78 @@ func_exit:
DBUG_RETURN(error_result);
}
-/**********************************************************************//**
-Checks which fields have changed in a row and stores information
-of them to an update vector.
-@return DB_SUCCESS or error code */
+/** Fill the update vector's "old_vrow" field for those non-updated,
+but indexed columns. Such columns could stil present in the virtual
+index rec fields even if they are not updated (some other fields updated),
+so needs to be logged.
+@param[in] prebuilt InnoDB prebuilt struct
+@param[in,out] vfield field to filled
+@param[in] o_len actual column length
+@param[in,out] col column to be filled
+@param[in] old_mysql_row_col MySQL old field ptr
+@param[in] col_pack_len MySQL field col length
+@param[in,out] buf buffer for a converted integer value
+@return used buffer ptr from row_mysql_store_col_in_innobase_format() */
+static
+byte*
+innodb_fill_old_vcol_val(
+ row_prebuilt_t* prebuilt,
+ dfield_t* vfield,
+ ulint o_len,
+ dict_col_t* col,
+ const byte* old_mysql_row_col,
+ ulint col_pack_len,
+ byte* buf)
+{
+ dict_col_copy_type(
+ col, dfield_get_type(vfield));
+ if (o_len != UNIV_SQL_NULL) {
+
+ buf = row_mysql_store_col_in_innobase_format(
+ vfield,
+ buf,
+ TRUE,
+ old_mysql_row_col,
+ col_pack_len,
+ dict_table_is_comp(prebuilt->table));
+ } else {
+ dfield_set_null(vfield);
+ }
+
+ return(buf);
+}
+
+/** Calculate an update vector corresponding to the changes
+between old_row and new_row.
+@param[out] uvect update vector
+@param[in] old_row current row in MySQL format
+@param[in] new_row intended updated row in MySQL format
+@param[in] table MySQL table handle
+@param[in,out] upd_buff buffer to use for converted values
+@param[in] buff_len length of upd_buff
+@param[in,out] prebuilt InnoDB execution context
+@param[out] auto_inc updated AUTO_INCREMENT value, or 0 if none
+@return DB_SUCCESS or error code */
static
dberr_t
calc_row_difference(
-/*================*/
- upd_t* uvect, /*!< in/out: update vector */
- uchar* old_row, /*!< in: old row in MySQL format */
- uchar* new_row, /*!< in: new row in MySQL format */
- TABLE* table, /*!< in: table in MySQL data
- dictionary */
- uchar* upd_buff, /*!< in: buffer to use */
- ulint buff_len, /*!< in: buffer length */
- row_prebuilt_t* prebuilt, /*!< in: InnoDB prebuilt struct */
- THD* thd) /*!< in: user thread */
+ upd_t* uvect,
+ const uchar* old_row,
+ uchar* new_row,
+ TABLE* table,
+ uchar* upd_buff,
+ ulint buff_len,
+ row_prebuilt_t* prebuilt,
+ ib_uint64_t& auto_inc)
{
uchar* original_upd_buff = upd_buff;
Field* field;
enum_field_types field_mysql_type;
- uint n_fields;
ulint o_len;
ulint n_len;
ulint col_pack_len;
const byte* new_mysql_row_col;
+ const byte* old_mysql_row_col;
const byte* o_ptr;
const byte* n_ptr;
byte* buf;
@@ -8530,24 +8445,31 @@ calc_row_difference(
ulint n_changed = 0;
dfield_t dfield;
dict_index_t* clust_index;
- uint sql_idx, innodb_idx= 0;
ibool changes_fts_column = FALSE;
ibool changes_fts_doc_col = FALSE;
- trx_t* trx = thd_to_trx(thd);
+ trx_t* const trx = prebuilt->trx;
doc_id_t doc_id = FTS_NULL_DOC_ID;
+ ulint num_v = 0;
+ const bool skip_virtual = ha_innobase::omits_virtual_cols(*table->s);
ut_ad(!srv_read_only_mode);
- n_fields = table->s->fields;
clust_index = dict_table_get_first_index(prebuilt->table);
+ auto_inc = 0;
/* We use upd_buff to convert changed fields */
buf = (byte*) upd_buff;
- for (sql_idx = 0; sql_idx < n_fields; sql_idx++) {
- field = table->field[sql_idx];
- if (!field->stored_in_db)
- continue;
+ for (uint i = 0; i < table->s->fields; i++) {
+ field = table->field[i];
+ const bool is_virtual = !field->stored_in_db();
+ if (is_virtual && skip_virtual) {
+ num_v++;
+ continue;
+ }
+ dict_col_t* col = is_virtual
+ ? &prebuilt->table->v_cols[num_v].m_col
+ : &prebuilt->table->cols[i - num_v];
o_ptr = (const byte*) old_row + get_field_offset(table, field);
n_ptr = (const byte*) new_row + get_field_offset(table, field);
@@ -8555,6 +8477,7 @@ calc_row_difference(
/* Use new_mysql_row_col and col_pack_len save the values */
new_mysql_row_col = n_ptr;
+ old_mysql_row_col = o_ptr;
col_pack_len = field->pack_length();
o_len = col_pack_len;
@@ -8565,11 +8488,12 @@ calc_row_difference(
field_mysql_type = field->type();
- col_type = prebuilt->table->cols[innodb_idx].mtype;
+ col_type = col->mtype;
switch (col_type) {
case DATA_BLOB:
+ case DATA_GEOMETRY:
o_ptr = row_mysql_read_blob_ref(&o_len, o_ptr, o_len);
n_ptr = row_mysql_read_blob_ref(&n_len, n_ptr, n_len);
@@ -8610,7 +8534,6 @@ calc_row_difference(
}
}
-
if (field->real_maybe_null()) {
if (field->is_null_in_record(old_row)) {
o_len = UNIV_SQL_NULL;
@@ -8621,6 +8544,64 @@ calc_row_difference(
}
}
+#ifdef UNIV_DEBUG
+ bool online_ord_part = false;
+#endif
+
+ if (is_virtual) {
+ /* If the virtual column is not indexed,
+ we shall ignore it for update */
+ if (!col->ord_part) {
+ /* Check whether there is a table-rebuilding
+ online ALTER TABLE in progress, and this
+ virtual column could be newly indexed, thus
+ it will be materialized. Then we will have
+ to log its update.
+ Note, we do not support online dropping virtual
+ column while adding new index, nor with
+ online alter column order while adding index,
+ so the virtual column sequence must not change
+ if it is online operation */
+ if (dict_index_is_online_ddl(clust_index)
+ && row_log_col_is_indexed(clust_index,
+ num_v)) {
+#ifdef UNIV_DEBUG
+ online_ord_part = true;
+#endif
+ } else {
+ num_v++;
+ continue;
+ }
+ }
+
+ if (!uvect->old_vrow) {
+ uvect->old_vrow = dtuple_create_with_vcol(
+ uvect->heap, 0, prebuilt->table->n_v_cols);
+ }
+
+ ulint max_field_len = DICT_MAX_FIELD_LEN_BY_FORMAT(
+ prebuilt->table);
+
+ /* for virtual columns, we only materialize
+ its index, and index field length would not
+ exceed max_field_len. So continue if the
+ first max_field_len bytes are matched up */
+ if (o_len != UNIV_SQL_NULL
+ && n_len != UNIV_SQL_NULL
+ && o_len >= max_field_len
+ && n_len >= max_field_len
+ && memcmp(o_ptr, n_ptr, max_field_len) == 0) {
+ dfield_t* vfield = dtuple_get_nth_v_field(
+ uvect->old_vrow, num_v);
+ buf = innodb_fill_old_vcol_val(
+ prebuilt, vfield, o_len,
+ col, old_mysql_row_col,
+ col_pack_len, buf);
+ num_v++;
+ continue;
+ }
+ }
+
if (o_len != n_len || (o_len != 0 && o_len != UNIV_SQL_NULL
&& 0 != memcmp(o_ptr, n_ptr, o_len))) {
/* The field has changed */
@@ -8631,10 +8612,18 @@ calc_row_difference(
/* Let us use a dummy dfield to make the conversion
from the MySQL column format to the InnoDB format */
+
+ /* If the length of new geometry object is 0, means
+ this object is invalid geometry object, we need
+ to block it. */
+ if (DATA_GEOMETRY_MTYPE(col_type)
+ && o_len != 0 && n_len == 0) {
+ return(DB_CANT_CREATE_GEOMETRY_OBJECT);
+ }
+
if (n_len != UNIV_SQL_NULL) {
- dict_col_copy_type(prebuilt->table->cols +
- innodb_idx,
- dfield_get_type(&dfield));
+ dict_col_copy_type(
+ col, dfield_get_type(&dfield));
buf = row_mysql_store_col_in_innobase_format(
&dfield,
@@ -8645,13 +8634,63 @@ calc_row_difference(
dict_table_is_comp(prebuilt->table));
dfield_copy(&ufield->new_val, &dfield);
} else {
+ dict_col_copy_type(
+ col, dfield_get_type(&ufield->new_val));
dfield_set_null(&ufield->new_val);
}
ufield->exp = NULL;
ufield->orig_len = 0;
- ufield->field_no = dict_col_get_clust_pos(
- &prebuilt->table->cols[innodb_idx], clust_index);
+ if (is_virtual) {
+ dfield_t* vfield = dtuple_get_nth_v_field(
+ uvect->old_vrow, num_v);
+ upd_fld_set_virtual_col(ufield);
+ ufield->field_no = num_v;
+
+ ut_ad(col->ord_part || online_ord_part);
+ ufield->old_v_val = static_cast<dfield_t*>(
+ mem_heap_alloc(
+ uvect->heap,
+ sizeof *ufield->old_v_val));
+
+ if (!field->is_null_in_record(old_row)) {
+ if (n_len == UNIV_SQL_NULL) {
+ dict_col_copy_type(
+ col, dfield_get_type(
+ &dfield));
+ }
+
+ buf = row_mysql_store_col_in_innobase_format(
+ &dfield,
+ (byte*) buf,
+ TRUE,
+ old_mysql_row_col,
+ col_pack_len,
+ dict_table_is_comp(
+ prebuilt->table));
+ dfield_copy(ufield->old_v_val,
+ &dfield);
+ dfield_copy(vfield, &dfield);
+ } else {
+ dict_col_copy_type(
+ col, dfield_get_type(
+ ufield->old_v_val));
+ dfield_set_null(ufield->old_v_val);
+ dfield_set_null(vfield);
+ }
+ num_v++;
+ ut_ad(field != table->found_next_number_field);
+ } else {
+ ufield->field_no = dict_col_get_clust_pos(
+ &prebuilt->table->cols[i - num_v],
+ clust_index);
+ ufield->old_v_val = NULL;
+ if (field != table->found_next_number_field
+ || dfield_is_null(&ufield->new_val)) {
+ } else {
+ auto_inc = field->val_uint();
+ }
+ }
n_changed++;
/* If an FTS indexed column was changed by this
@@ -8663,8 +8702,8 @@ calc_row_difference(
checking only once here. Later we will need to
note which columns have been updated and do
selective processing. */
- if (prebuilt->table->fts != NULL) {
- ulint offset;
+ if (prebuilt->table->fts != NULL && !is_virtual) {
+ ulint offset;
dict_table_t* innodb_table;
innodb_table = prebuilt->table;
@@ -8684,9 +8723,16 @@ calc_row_difference(
innodb_table, ufield);
}
}
+ } else if (is_virtual) {
+ dfield_t* vfield = dtuple_get_nth_v_field(
+ uvect->old_vrow, num_v);
+ buf = innodb_fill_old_vcol_val(
+ prebuilt, vfield, o_len,
+ col, old_mysql_row_col,
+ col_pack_len, buf);
+ ut_ad(col->ord_part || online_ord_part);
+ num_v++;
}
- if (field->stored_in_db)
- innodb_idx++;
}
/* If the update changes a column with an FTS index on it, we
@@ -8694,7 +8740,7 @@ calc_row_difference(
other changes. We piggy back our changes on the normal UPDATE
to reduce processing and IO overhead. */
if (!prebuilt->table->fts) {
- trx->fts_next_doc_id = 0;
+ trx->fts_next_doc_id = 0;
} else if (changes_fts_column || changes_fts_doc_col) {
dict_table_t* innodb_table = prebuilt->table;
@@ -8708,37 +8754,31 @@ calc_row_difference(
Doc ID must also be updated. Otherwise, return
error */
if (changes_fts_column && !changes_fts_doc_col) {
- ut_print_timestamp(stderr);
- fprintf(stderr, " InnoDB: A new Doc ID"
- " must be supplied while updating"
- " FTS indexed columns.\n");
+ ib::warn() << "A new Doc ID must be supplied"
+ " while updating FTS indexed columns.";
return(DB_FTS_INVALID_DOCID);
}
/* Doc ID must monotonically increase */
ut_ad(innodb_table->fts->cache);
if (doc_id < prebuilt->table->fts->cache->next_doc_id) {
- fprintf(stderr,
- "InnoDB: FTS Doc ID must be larger than"
- " " IB_ID_FMT " for table",
- innodb_table->fts->cache->next_doc_id
- - 1);
- ut_print_name(stderr, trx,
- TRUE, innodb_table->name);
- putc('\n', stderr);
+
+ ib::warn() << "FTS Doc ID must be larger than "
+ << innodb_table->fts->cache->next_doc_id
+ - 1 << " for table "
+ << innodb_table->name;
return(DB_FTS_INVALID_DOCID);
} else if ((doc_id
- prebuilt->table->fts->cache->next_doc_id)
>= FTS_DOC_ID_MAX_STEP) {
- fprintf(stderr,
- "InnoDB: Doc ID " UINT64PF " is too"
+
+ ib::warn() << "Doc ID " << doc_id << " is too"
" big. Its difference with largest"
- " Doc ID used " UINT64PF " cannot"
- " exceed or equal to %d\n",
- doc_id,
- prebuilt->table->fts->cache->next_doc_id - 1,
- FTS_DOC_ID_MAX_STEP);
+ " Doc ID used " << prebuilt->table->fts
+ ->cache->next_doc_id - 1
+ << " cannot exceed or equal to "
+ << FTS_DOC_ID_MAX_STEP;
}
@@ -8770,6 +8810,7 @@ calc_row_difference(
ut_a(buf <= (byte*) original_upd_buff + buff_len);
+ ut_ad(uvect->validate());
return(DB_SUCCESS);
}
@@ -8785,33 +8826,25 @@ wsrep_calc_row_hash(
row_prebuilt_t* prebuilt, /*!< in: InnoDB prebuilt struct */
THD* thd) /*!< in: user thread */
{
- Field* field;
- enum_field_types field_mysql_type;
- uint n_fields;
ulint len;
const byte* ptr;
- ulint col_type;
- uint i;
void *ctx = alloca(my_md5_context_size());
- my_md5_init(ctx);
-
- n_fields = table->s->fields;
+ my_md5_init(ctx);
- for (i = 0; i < n_fields; i++) {
+ for (uint i = 0; i < table->s->fields; i++) {
byte null_byte=0;
byte true_byte=1;
- field = table->field[i];
+ const Field* field = table->field[i];
+ if (!field->stored_in_db()) {
+ continue;
+ }
ptr = (const byte*) row + get_field_offset(table, field);
len = field->pack_length();
- field_mysql_type = field->type();
-
- col_type = prebuilt->table->cols[i].mtype;
-
- switch (col_type) {
+ switch (prebuilt->table->cols[i].mtype) {
case DATA_BLOB:
ptr = row_mysql_read_blob_ref(&len, ptr, len);
@@ -8821,7 +8854,7 @@ wsrep_calc_row_hash(
case DATA_VARCHAR:
case DATA_BINARY:
case DATA_VARMYSQL:
- if (field_mysql_type == MYSQL_TYPE_VARCHAR) {
+ if (field->type() == MYSQL_TYPE_VARCHAR) {
/* This is a >= 5.0.3 type true VARCHAR where
the real payload data length is stored in
1 or 2 bytes */
@@ -8855,28 +8888,31 @@ wsrep_calc_row_hash(
return(0);
}
#endif /* WITH_WSREP */
-/**********************************************************************//**
+
+/**
Updates a row given as a parameter to a new value. Note that we are given
whole rows, not just the fields which are updated: this incurs some
overhead for CPU when we check which fields are actually updated.
TODO: currently InnoDB does not prevent the 'Halloween problem':
in a searched update a single row can get updated several times
if its index columns are updated!
-@return error number or 0 */
-UNIV_INTERN
+@param[in] old_row Old row contents in MySQL format
+@param[out] new_row Updated row contents in MySQL format
+@return error number or 0 */
+
int
ha_innobase::update_row(
-/*====================*/
- const uchar* old_row, /*!< in: old row in MySQL format */
- uchar* new_row) /*!< in: new row in MySQL format */
+ const uchar* old_row,
+ uchar* new_row)
{
- upd_t* uvect;
+ int err;
+
dberr_t error;
- trx_t* trx = thd_to_trx(user_thd);
+ trx_t* trx = thd_to_trx(m_user_thd);
DBUG_ENTER("ha_innobase::update_row");
- ut_a(prebuilt->trx == trx);
+ ut_a(m_prebuilt->trx == trx);
if (high_level_read_only) {
ib_senderrf(ha_thd(), IB_LOG_LEVEL_WARN, ER_READ_ONLY_MODE);
@@ -8885,124 +8921,113 @@ ha_innobase::update_row(
++trx->will_lock;
}
- if (upd_buf == NULL) {
- ut_ad(upd_buf_size == 0);
+ if (m_upd_buf == NULL) {
+ ut_ad(m_upd_buf_size == 0);
- /* Create a buffer for packing the fields of a record. Why
- table->stored_rec_length did not work here? Obviously,
- because char fields when packed actually became 1 byte
- longer, when we also stored the string length as the first
- byte. */
+ /* Create a buffer for packing the fields of a record. Why
+ table->reclength did not work here? Obviously, because char
+ fields when packed actually became 1 byte longer, when we also
+ stored the string length as the first byte. */
- upd_buf_size = table->s->stored_rec_length +
- table->s->max_key_length + MAX_REF_PARTS * 3;
- upd_buf = (uchar*) my_malloc(upd_buf_size, MYF(MY_WME));
- if (upd_buf == NULL) {
- upd_buf_size = 0;
+ m_upd_buf_size = table->s->reclength + table->s->max_key_length
+ + MAX_REF_PARTS * 3;
+
+ m_upd_buf = reinterpret_cast<uchar*>(
+ my_malloc(//PSI_INSTRUMENT_ME,
+ m_upd_buf_size,
+ MYF(MY_WME)));
+
+ if (m_upd_buf == NULL) {
+ m_upd_buf_size = 0;
DBUG_RETURN(HA_ERR_OUT_OF_MEM);
}
}
- ha_statistic_increment(&SSV::ha_update_count);
-
- if (prebuilt->upd_node) {
- uvect = prebuilt->upd_node->update;
- } else {
- uvect = row_get_prebuilt_update_vector(prebuilt);
- }
+ upd_t* uvect = row_get_prebuilt_update_vector(m_prebuilt);
+ ib_uint64_t autoinc;
/* Build an update vector from the modified fields in the rows
- (uses upd_buf of the handle) */
+ (uses m_upd_buf of the handle) */
- error = calc_row_difference(uvect, (uchar*) old_row, new_row, table,
- upd_buf, upd_buf_size, prebuilt, user_thd);
+ error = calc_row_difference(
+ uvect, old_row, new_row, table, m_upd_buf, m_upd_buf_size,
+ m_prebuilt, autoinc);
if (error != DB_SUCCESS) {
goto func_exit;
}
- /* This is not a delete */
- prebuilt->upd_node->is_delete = FALSE;
-
- ut_a(prebuilt->template_type == ROW_MYSQL_WHOLE_ROW);
-
- innobase_srv_conc_enter_innodb(trx);
-
- error = row_update_for_mysql((byte*) old_row, prebuilt);
-
- /* We need to do some special AUTOINC handling for the following case:
-
- INSERT INTO t (c1,c2) VALUES(x,y) ON DUPLICATE KEY UPDATE ...
-
- We need to use the AUTOINC counter that was actually used by
- MySQL in the UPDATE statement, which can be different from the
- value used in the INSERT statement.*/
-
- if (error == DB_SUCCESS
- && table->next_number_field
- && new_row == table->record[0]
- && thd_sql_command(user_thd) == SQLCOM_INSERT
- && trx->duplicates) {
-
- ulonglong auto_inc;
- ulonglong col_max_value;
+ if (!uvect->n_fields) {
+ /* This is the same as success, but instructs
+ MySQL that the row is not really updated and it
+ should not increase the count of updated rows.
+ This is fix for http://bugs.mysql.com/29157 */
+ DBUG_RETURN(HA_ERR_RECORD_IS_THE_SAME);
+ }
- auto_inc = table->next_number_field->val_uint();
+ /* This is not a delete */
+ m_prebuilt->upd_node->is_delete = FALSE;
- /* We need the upper limit of the col type to check for
- whether we update the table autoinc counter or not. */
- col_max_value =
- table->next_number_field->get_max_int_value();
+ innobase_srv_conc_enter_innodb(m_prebuilt);
- if (auto_inc <= col_max_value && auto_inc != 0) {
+ error = row_update_for_mysql(m_prebuilt);
- ulonglong offset;
- ulonglong increment;
+ if (error == DB_SUCCESS && autoinc) {
+ /* A value for an AUTO_INCREMENT column
+ was specified in the UPDATE statement. */
+ ulonglong offset;
+ ulonglong increment;
#ifdef WITH_WSREP
- /* Applier threads which are processing
- ROW events and don't go through server
- level autoinc processing, therefore
- prebuilt autoinc values don't get
- properly assigned. Fetch values from
- server side. */
- if (trx->is_wsrep() &&
- wsrep_thd_exec_mode(user_thd) == REPL_RECV)
- {
- wsrep_thd_auto_increment_variables(
- user_thd, &offset, &increment);
- }
- else
- {
+ /* Applier threads which are processing
+ ROW events and don't go through server
+ level autoinc processing, therefore
+ m_prebuilt autoinc values don't get
+ properly assigned. Fetch values from
+ server side. */
+ if (trx->is_wsrep() &&
+ wsrep_thd_exec_mode(m_user_thd) == REPL_RECV)
+ {
+ wsrep_thd_auto_increment_variables(
+ m_user_thd, &offset, &increment);
+ }
+ else
+ {
#endif /* WITH_WSREP */
- offset = prebuilt->autoinc_offset;
- increment = prebuilt->autoinc_increment;
+ offset = m_prebuilt->autoinc_offset;
+ increment = m_prebuilt->autoinc_increment;
#ifdef WITH_WSREP
- }
+ }
#endif /* WITH_WSREP */
- auto_inc = innobase_next_autoinc(
- auto_inc, 1, increment, offset, col_max_value);
- error = innobase_set_max_autoinc(auto_inc);
+ autoinc = innobase_next_autoinc(
+ autoinc, 1, increment, offset,
+ table->found_next_number_field->get_max_int_value());
+
+ error = innobase_set_max_autoinc(autoinc);
+
+ if (m_prebuilt->table->persistent_autoinc) {
+ /* Update the PAGE_ROOT_AUTO_INC. Yes, we do
+ this even if dict_table_t::autoinc already was
+ greater than autoinc, because we cannot know
+ if any INSERT actually used (and wrote to
+ PAGE_ROOT_AUTO_INC) a value bigger than our
+ autoinc. */
+ btr_write_autoinc(dict_table_get_first_index(
+ m_prebuilt->table),
+ autoinc);
}
}
- innobase_srv_conc_exit_innodb(trx);
+ innobase_srv_conc_exit_innodb(m_prebuilt);
func_exit:
- int err = convert_error_code_to_mysql(error,
- prebuilt->table->flags, user_thd);
-
- /* If success and no columns were updated. */
- if (err == 0 && uvect->n_fields == 0) {
-
- /* This is the same as success, but instructs
- MySQL that the row is not really updated and it
- should not increase the count of updated rows.
- This is fix for http://bugs.mysql.com/29157 */
- err = HA_ERR_RECORD_IS_THE_SAME;
- } else if (err == HA_FTS_INVALID_DOCID) {
+ if (error == DB_FTS_INVALID_DOCID) {
+ err = HA_FTS_INVALID_DOCID;
my_error(HA_FTS_INVALID_DOCID, MYF(0));
+ } else {
+ err = convert_error_code_to_mysql(
+ error, m_prebuilt->table->flags, m_user_thd);
}
/* Tell InnoDB server that there might be work for
@@ -9012,12 +9037,11 @@ func_exit:
#ifdef WITH_WSREP
if (error == DB_SUCCESS && trx->is_wsrep() &&
- wsrep_thd_exec_mode(user_thd) == LOCAL_STATE &&
- !wsrep_thd_ignore_table(user_thd))
- {
+ wsrep_thd_exec_mode(m_user_thd) == LOCAL_STATE &&
+ !wsrep_thd_ignore_table(m_user_thd)) {
DBUG_PRINT("wsrep", ("update row key"));
- if (wsrep_append_keys(user_thd, WSREP_KEY_EXCLUSIVE, old_row,
+ if (wsrep_append_keys(m_user_thd, WSREP_KEY_EXCLUSIVE, old_row,
new_row)) {
WSREP_DEBUG("WSREP: UPDATE_ROW_KEY FAILED");
DBUG_PRINT("wsrep", ("row key failed"));
@@ -9031,19 +9055,19 @@ func_exit:
/**********************************************************************//**
Deletes a row given as the parameter.
-@return error number or 0 */
-UNIV_INTERN
+@return error number or 0 */
+
int
ha_innobase::delete_row(
/*====================*/
const uchar* record) /*!< in: a row in MySQL format */
{
dberr_t error;
- trx_t* trx = thd_to_trx(user_thd);
+ trx_t* trx = thd_to_trx(m_user_thd);
DBUG_ENTER("ha_innobase::delete_row");
- ut_a(prebuilt->trx == trx);
+ ut_a(m_prebuilt->trx == trx);
if (high_level_read_only) {
ib_senderrf(ha_thd(), IB_LOG_LEVEL_WARN, ER_READ_ONLY_MODE);
@@ -9052,21 +9076,19 @@ ha_innobase::delete_row(
++trx->will_lock;
}
- ha_statistic_increment(&SSV::ha_delete_count);
-
- if (!prebuilt->upd_node) {
- row_get_prebuilt_update_vector(prebuilt);
+ if (!m_prebuilt->upd_node) {
+ row_get_prebuilt_update_vector(m_prebuilt);
}
/* This is a delete */
- prebuilt->upd_node->is_delete = TRUE;
+ m_prebuilt->upd_node->is_delete = TRUE;
- innobase_srv_conc_enter_innodb(trx);
+ innobase_srv_conc_enter_innodb(m_prebuilt);
- error = row_update_for_mysql((byte*) record, prebuilt);
+ error = row_update_for_mysql(m_prebuilt);
- innobase_srv_conc_exit_innodb(trx);
+ innobase_srv_conc_exit_innodb(m_prebuilt);
/* Tell the InnoDB server that there might be work for
utility threads: */
@@ -9074,11 +9096,10 @@ ha_innobase::delete_row(
innobase_active_small();
#ifdef WITH_WSREP
- if (error == DB_SUCCESS && trx->is_wsrep() &&
- wsrep_thd_exec_mode(user_thd) == LOCAL_STATE &&
- !wsrep_thd_ignore_table(user_thd))
- {
- if (wsrep_append_keys(user_thd, WSREP_KEY_EXCLUSIVE, record,
+ if (error == DB_SUCCESS && trx->is_wsrep()
+ && wsrep_thd_exec_mode(m_user_thd) == LOCAL_STATE
+ && !wsrep_thd_ignore_table(m_user_thd)) {
+ if (wsrep_append_keys(m_user_thd, WSREP_KEY_EXCLUSIVE, record,
NULL)) {
DBUG_PRINT("wsrep", ("delete fail"));
DBUG_RETURN(HA_ERR_INTERNAL_ERROR);
@@ -9086,46 +9107,49 @@ ha_innobase::delete_row(
}
#endif /* WITH_WSREP */
DBUG_RETURN(convert_error_code_to_mysql(
- error, prebuilt->table->flags, user_thd));
+ error, m_prebuilt->table->flags, m_user_thd));
+}
+
+/** Delete all rows from the table.
+@return error number or 0 */
+
+int
+ha_innobase::delete_all_rows()
+{
+ DBUG_ENTER("ha_innobase::delete_all_rows");
+ DBUG_RETURN(HA_ERR_WRONG_COMMAND);
}
/**********************************************************************//**
Removes a new lock set on a row, if it was not read optimistically. This can
be called after a row has been read in the processing of an UPDATE or a DELETE
query, if the option innodb_locks_unsafe_for_binlog is set. */
-UNIV_INTERN
+
void
ha_innobase::unlock_row(void)
/*=========================*/
{
DBUG_ENTER("ha_innobase::unlock_row");
- /* Consistent read does not take any locks, thus there is
- nothing to unlock. */
-
- if (prebuilt->select_lock_type == LOCK_NONE) {
+ if (m_prebuilt->select_lock_type == LOCK_NONE) {
DBUG_VOID_RETURN;
}
- /* Ideally, this assert must be in the beginning of the function.
- But there are some calls to this function from the SQL layer when the
- transaction is in state TRX_STATE_NOT_STARTED. The check on
- prebuilt->select_lock_type above gets around this issue. */
- ut_ad(trx_state_eq(prebuilt->trx, TRX_STATE_ACTIVE, true));
+ ut_ad(trx_state_eq(m_prebuilt->trx, TRX_STATE_ACTIVE, true));
- switch (prebuilt->row_read_type) {
+ switch (m_prebuilt->row_read_type) {
case ROW_READ_WITH_LOCKS:
if (!srv_locks_unsafe_for_binlog
- && prebuilt->trx->isolation_level
+ && m_prebuilt->trx->isolation_level
> TRX_ISO_READ_COMMITTED) {
break;
}
/* fall through */
case ROW_READ_TRY_SEMI_CONSISTENT:
- row_unlock_for_mysql(prebuilt, FALSE);
+ row_unlock_for_mysql(m_prebuilt, FALSE);
break;
case ROW_READ_DID_SEMI_CONSISTENT:
- prebuilt->row_read_type = ROW_READ_TRY_SEMI_CONSISTENT;
+ m_prebuilt->row_read_type = ROW_READ_TRY_SEMI_CONSISTENT;
break;
}
@@ -9133,21 +9157,21 @@ ha_innobase::unlock_row(void)
}
/* See handler.h and row0mysql.h for docs on this function. */
-UNIV_INTERN
+
bool
ha_innobase::was_semi_consistent_read(void)
/*=======================================*/
{
- return(prebuilt->row_read_type == ROW_READ_DID_SEMI_CONSISTENT);
+ return(m_prebuilt->row_read_type == ROW_READ_DID_SEMI_CONSISTENT);
}
/* See handler.h and row0mysql.h for docs on this function. */
-UNIV_INTERN
+
void
ha_innobase::try_semi_consistent_read(bool yes)
/*===========================================*/
{
- ut_a(prebuilt->trx == thd_to_trx(ha_thd()));
+ ut_a(m_prebuilt->trx == thd_to_trx(ha_thd()));
/* Row read type is set to semi consistent read if this was
requested by the MySQL and either innodb_locks_unsafe_for_binlog
@@ -9156,22 +9180,26 @@ ha_innobase::try_semi_consistent_read(bool yes)
if (yes
&& (srv_locks_unsafe_for_binlog
- || prebuilt->trx->isolation_level <= TRX_ISO_READ_COMMITTED)) {
- prebuilt->row_read_type = ROW_READ_TRY_SEMI_CONSISTENT;
+ || m_prebuilt->trx->isolation_level
+ <= TRX_ISO_READ_COMMITTED)) {
+
+ m_prebuilt->row_read_type = ROW_READ_TRY_SEMI_CONSISTENT;
+
} else {
- prebuilt->row_read_type = ROW_READ_WITH_LOCKS;
+ m_prebuilt->row_read_type = ROW_READ_WITH_LOCKS;
}
}
/******************************************************************//**
Initializes a handle to use an index.
-@return 0 or error number */
-UNIV_INTERN
+@return 0 or error number */
+
int
ha_innobase::index_init(
/*====================*/
- uint keynr, /*!< in: key (index) number */
- bool sorted) /*!< in: 1 if result MUST be sorted according to index */
+ uint keynr, /*!< in: key (index) number */
+ bool sorted) /*!< in: 1 if result MUST be sorted
+ according to index */
{
DBUG_ENTER("index_init");
@@ -9180,65 +9208,55 @@ ha_innobase::index_init(
/******************************************************************//**
Currently does nothing.
-@return 0 */
-UNIV_INTERN
+@return 0 */
+
int
ha_innobase::index_end(void)
/*========================*/
{
- int error = 0;
DBUG_ENTER("index_end");
+
active_index = MAX_KEY;
+
in_range_check_pushed_down = FALSE;
- ds_mrr.dsmrr_close();
- DBUG_RETURN(error);
+
+ m_ds_mrr.dsmrr_close();
+
+ DBUG_RETURN(0);
}
/*********************************************************************//**
Converts a search mode flag understood by MySQL to a flag understood
by InnoDB. */
-static inline
-ulint
+page_cur_mode_t
convert_search_mode_to_innobase(
/*============================*/
- enum ha_rkey_function find_flag)
+ ha_rkey_function find_flag)
{
switch (find_flag) {
case HA_READ_KEY_EXACT:
/* this does not require the index to be UNIQUE */
- return(PAGE_CUR_GE);
case HA_READ_KEY_OR_NEXT:
return(PAGE_CUR_GE);
- case HA_READ_KEY_OR_PREV:
- return(PAGE_CUR_LE);
case HA_READ_AFTER_KEY:
return(PAGE_CUR_G);
case HA_READ_BEFORE_KEY:
return(PAGE_CUR_L);
- case HA_READ_PREFIX:
- return(PAGE_CUR_GE);
+ case HA_READ_KEY_OR_PREV:
case HA_READ_PREFIX_LAST:
- return(PAGE_CUR_LE);
case HA_READ_PREFIX_LAST_OR_PREV:
return(PAGE_CUR_LE);
- /* In MySQL-4.0 HA_READ_PREFIX and HA_READ_PREFIX_LAST always
- pass a complete-field prefix of a key value as the search
- tuple. I.e., it is not allowed that the last field would
- just contain n first bytes of the full field value.
- MySQL uses a 'padding' trick to convert LIKE 'abc%'
- type queries so that it can use as a search tuple
- a complete-field-prefix of a key value. Thus, the InnoDB
- search mode PAGE_CUR_LE_OR_EXTENDS is never used.
- TODO: when/if MySQL starts to use also partial-field
- prefixes, we have to deal with stripping of spaces
- and comparison of non-latin1 char type fields in
- innobase_mysql_cmp() to get PAGE_CUR_LE_OR_EXTENDS to
- work correctly. */
case HA_READ_MBR_CONTAIN:
+ return(PAGE_CUR_CONTAIN);
case HA_READ_MBR_INTERSECT:
+ return(PAGE_CUR_INTERSECT);
case HA_READ_MBR_WITHIN:
+ return(PAGE_CUR_WITHIN);
case HA_READ_MBR_DISJOINT:
+ return(PAGE_CUR_DISJOINT);
case HA_READ_MBR_EQUAL:
+ return(PAGE_CUR_MBR_EQUAL);
+ case HA_READ_PREFIX:
return(PAGE_CUR_UNSUPP);
/* do not use "default:" in order to produce a gcc warning:
enumeration value '...' not handled in switch
@@ -9258,22 +9276,22 @@ the start of a new SQL statement, and what is associated with it.
For each table in the database the MySQL interpreter may have several
table handle instances in use, also in a single SQL query. For each table
-handle instance there is an InnoDB 'prebuilt' struct which contains most
+handle instance there is an InnoDB 'm_prebuilt' struct which contains most
of the InnoDB data associated with this table handle instance.
A) if the user has not explicitly set any MySQL table level locks:
1) MySQL calls ::external_lock to set an 'intention' table level lock on
the table of the handle instance. There we set
-prebuilt->sql_stat_start = TRUE. The flag sql_stat_start should be set
+m_prebuilt->sql_stat_start = TRUE. The flag sql_stat_start should be set
true if we are taking this table handle instance to use in a new SQL
statement issued by the user. We also increment trx->n_mysql_tables_in_use.
- 2) If prebuilt->sql_stat_start == TRUE we 'pre-compile' the MySQL search
-instructions to prebuilt->template of the table handle instance in
+ 2) If m_prebuilt->sql_stat_start == TRUE we 'pre-compile' the MySQL search
+instructions to m_prebuilt->template of the table handle instance in
::index_read. The template is used to save CPU time in large joins.
- 3) In row_search_for_mysql, if prebuilt->sql_stat_start is true, we
+ 3) In row_search_for_mysql, if m_prebuilt->sql_stat_start is true, we
allocate a new consistent read view for the trx if it does not yet have one,
or in the case of a locking read, set an InnoDB 'intention' table level
lock on the table.
@@ -9302,8 +9320,8 @@ start of a new SQL statement. */
/**********************************************************************//**
Positions an index cursor to the index specified in the handle. Fetches the
row if any.
-@return 0, HA_ERR_KEY_NOT_FOUND, or error number */
-UNIV_INTERN
+@return 0, HA_ERR_KEY_NOT_FOUND, or error number */
+
int
ha_innobase::index_read(
/*====================*/
@@ -9321,28 +9339,21 @@ ha_innobase::index_read(
uint key_len,/*!< in: key value length */
enum ha_rkey_function find_flag)/*!< in: search flags from my_base.h */
{
- ulint mode;
- dict_index_t* index;
- ulint match_mode = 0;
- int error;
- dberr_t ret;
-
DBUG_ENTER("index_read");
DEBUG_SYNC_C("ha_innobase_index_read_begin");
- ut_a(prebuilt->trx == thd_to_trx(user_thd));
+ ut_a(m_prebuilt->trx == thd_to_trx(m_user_thd));
ut_ad(key_len != 0 || find_flag != HA_READ_KEY_EXACT);
- ha_statistic_increment(&SSV::ha_read_key_count);
+ dict_index_t* index = m_prebuilt->index;
- index = prebuilt->index;
-
- if (UNIV_UNLIKELY(index == NULL) || dict_index_is_corrupted(index)) {
- prebuilt->index_usable = FALSE;
+ if (index == NULL || index->is_corrupted()) {
+ m_prebuilt->index_usable = FALSE;
DBUG_RETURN(HA_ERR_CRASHED);
}
- if (UNIV_UNLIKELY(!prebuilt->index_usable)) {
- DBUG_RETURN(dict_index_is_corrupted(index)
+
+ if (!m_prebuilt->index_usable) {
+ DBUG_RETURN(index->is_corrupted()
? HA_ERR_INDEX_CORRUPT
: HA_ERR_TABLE_DEF_CHANGED);
}
@@ -9351,105 +9362,121 @@ ha_innobase::index_read(
DBUG_RETURN(HA_ERR_KEY_NOT_FOUND);
}
+ /* For R-Tree index, we will always place the page lock to
+ pages being searched */
+ if (dict_index_is_spatial(index)) {
+ ++m_prebuilt->trx->will_lock;
+ }
+
/* Note that if the index for which the search template is built is not
- necessarily prebuilt->index, but can also be the clustered index */
+ necessarily m_prebuilt->index, but can also be the clustered index */
- if (prebuilt->sql_stat_start) {
+ if (m_prebuilt->sql_stat_start) {
build_template(false);
}
- if (key_ptr) {
+ if (key_ptr != NULL) {
/* Convert the search key value to InnoDB format into
- prebuilt->search_tuple */
+ m_prebuilt->search_tuple */
row_sel_convert_mysql_key_to_innobase(
- prebuilt->search_tuple,
- prebuilt->srch_key_val1,
- prebuilt->srch_key_val_len,
+ m_prebuilt->search_tuple,
+ m_prebuilt->srch_key_val1,
+ m_prebuilt->srch_key_val_len,
index,
(byte*) key_ptr,
(ulint) key_len,
- prebuilt->trx);
- DBUG_ASSERT(prebuilt->search_tuple->n_fields > 0);
+ m_prebuilt->trx);
+
+ DBUG_ASSERT(m_prebuilt->search_tuple->n_fields > 0);
} else {
/* We position the cursor to the last or the first entry
in the index */
- dtuple_set_n_fields(prebuilt->search_tuple, 0);
+ dtuple_set_n_fields(m_prebuilt->search_tuple, 0);
}
- mode = convert_search_mode_to_innobase(find_flag);
+ page_cur_mode_t mode = convert_search_mode_to_innobase(find_flag);
- match_mode = 0;
+ ulint match_mode = 0;
if (find_flag == HA_READ_KEY_EXACT) {
match_mode = ROW_SEL_EXACT;
- } else if (find_flag == HA_READ_PREFIX
- || find_flag == HA_READ_PREFIX_LAST) {
+ } else if (find_flag == HA_READ_PREFIX_LAST) {
match_mode = ROW_SEL_EXACT_PREFIX;
}
- last_match_mode = (uint) match_mode;
+ m_last_match_mode = (uint) match_mode;
+
+ dberr_t ret;
if (mode != PAGE_CUR_UNSUPP) {
- innobase_srv_conc_enter_innodb(prebuilt->trx);
+ innobase_srv_conc_enter_innodb(m_prebuilt);
- ret = row_search_for_mysql((byte*) buf, mode, prebuilt,
- match_mode, 0);
+ ret = row_search_mvcc(
+ buf, mode, m_prebuilt, match_mode, 0);
- innobase_srv_conc_exit_innodb(prebuilt->trx);
+ innobase_srv_conc_exit_innodb(m_prebuilt);
} else {
ret = DB_UNSUPPORTED;
}
+ DBUG_EXECUTE_IF("ib_select_query_failure", ret = DB_ERROR;);
+
+ int error;
+
switch (ret) {
case DB_SUCCESS:
error = 0;
table->status = 0;
- if (prebuilt->table->is_system_db) {
+ if (m_prebuilt->table->is_system_db) {
srv_stats.n_system_rows_read.add(
- (size_t) prebuilt->trx->id, 1);
+ thd_get_thread_id(m_prebuilt->trx->mysql_thd), 1);
} else {
srv_stats.n_rows_read.add(
- (size_t) prebuilt->trx->id, 1);
+ thd_get_thread_id(m_prebuilt->trx->mysql_thd), 1);
}
break;
+
case DB_RECORD_NOT_FOUND:
error = HA_ERR_KEY_NOT_FOUND;
table->status = STATUS_NOT_FOUND;
break;
+
case DB_END_OF_INDEX:
error = HA_ERR_KEY_NOT_FOUND;
table->status = STATUS_NOT_FOUND;
break;
- case DB_TABLESPACE_DELETED:
+ case DB_TABLESPACE_DELETED:
ib_senderrf(
- prebuilt->trx->mysql_thd, IB_LOG_LEVEL_ERROR,
+ m_prebuilt->trx->mysql_thd, IB_LOG_LEVEL_ERROR,
ER_TABLESPACE_DISCARDED,
table->s->table_name.str);
table->status = STATUS_NOT_FOUND;
- error = HA_ERR_NO_SUCH_TABLE;
+ error = HA_ERR_TABLESPACE_MISSING;
break;
+
case DB_TABLESPACE_NOT_FOUND:
ib_senderrf(
- prebuilt->trx->mysql_thd, IB_LOG_LEVEL_ERROR,
- ER_TABLESPACE_MISSING, MYF(0),
+ m_prebuilt->trx->mysql_thd, IB_LOG_LEVEL_ERROR,
+ ER_TABLESPACE_MISSING,
table->s->table_name.str);
table->status = STATUS_NOT_FOUND;
- error = HA_ERR_NO_SUCH_TABLE;
+ error = HA_ERR_TABLESPACE_MISSING;
break;
+
default:
error = convert_error_code_to_mysql(
- ret, prebuilt->table->flags, user_thd);
+ ret, m_prebuilt->table->flags, m_user_thd);
table->status = STATUS_NOT_FOUND;
break;
@@ -9461,8 +9488,8 @@ ha_innobase::index_read(
/*******************************************************************//**
The following functions works like index_read, but it find the last
row with the current key value or prefix.
-@return 0, HA_ERR_KEY_NOT_FOUND, or an error code */
-UNIV_INTERN
+@return 0, HA_ERR_KEY_NOT_FOUND, or an error code */
+
int
ha_innobase::index_read_last(
/*=========================*/
@@ -9477,8 +9504,8 @@ ha_innobase::index_read_last(
/********************************************************************//**
Get the index for a handle. Does not change active index.
-@return NULL or index instance. */
-UNIV_INTERN
+@return NULL or index instance. */
+
dict_index_t*
ha_innobase::innobase_get_index(
/*============================*/
@@ -9486,68 +9513,25 @@ ha_innobase::innobase_get_index(
clustered index, even if it was internally
generated by InnoDB */
{
- KEY* key = 0;
- dict_index_t* index = 0;
+ KEY* key = NULL;
+ dict_table_t* ib_table = m_prebuilt->table;
+ dict_index_t* index;
DBUG_ENTER("innobase_get_index");
if (keynr != MAX_KEY && table->s->keys > 0) {
- key = table->key_info + keynr;
-
- index = innobase_index_lookup(share, keynr);
-
- if (index) {
- if (!key || ut_strcmp(index->name, key->name) != 0) {
- fprintf(stderr, "InnoDB: [Error] Index for key no %u"
- " mysql name %s , InnoDB name %s for table %s\n",
- keynr, key ? key->name : "NULL",
- index->name,
- prebuilt->table->name);
-
- for(ulint i=0; i < table->s->keys; i++) {
- index = innobase_index_lookup(share, i);
- key = table->key_info + keynr;
-
- if (index) {
-
- fprintf(stderr, "InnoDB: [Note] Index for key no %u"
- " mysql name %s , InnoDB name %s for table %s\n",
- keynr, key ? key->name : "NULL",
- index->name,
- prebuilt->table->name);
- }
- }
-
- }
-
- ut_a(ut_strcmp(index->name, key->name) == 0);
- } else {
- /* Can't find index with keynr in the translation
- table. Only print message if the index translation
- table exists */
- if (share->idx_trans_tbl.index_mapping) {
- sql_print_warning("InnoDB could not find "
- "index %s key no %u for "
- "table %s through its "
- "index translation table",
- key ? key->name : "NULL",
- keynr,
- prebuilt->table->name);
- }
-
- index = dict_table_get_index_on_name(prebuilt->table,
- key->name);
- }
+ key = &table->key_info[keynr];
+ index = dict_table_get_index_on_name(ib_table, key->name);
} else {
- index = dict_table_get_first_index(prebuilt->table);
+ index = dict_table_get_first_index(ib_table);
}
- if (!index) {
+ if (index == NULL) {
sql_print_error(
- "Innodb could not find key n:o %u with name %s "
- "from dict cache for table %s",
+ "InnoDB could not find key no %u with name %s"
+ " from dict cache for table %s",
keynr, key ? key->name : "NULL",
- prebuilt->table->name);
+ ib_table->name.m_name);
}
DBUG_RETURN(index);
@@ -9555,8 +9539,8 @@ ha_innobase::innobase_get_index(
/********************************************************************//**
Changes the active index of a handle.
-@return 0 or error code */
-UNIV_INTERN
+@return 0 or error code */
+
int
ha_innobase::change_active_index(
/*=============================*/
@@ -9566,46 +9550,52 @@ ha_innobase::change_active_index(
{
DBUG_ENTER("change_active_index");
- ut_ad(user_thd == ha_thd());
- ut_a(prebuilt->trx == thd_to_trx(user_thd));
+ ut_ad(m_user_thd == ha_thd());
+ ut_a(m_prebuilt->trx == thd_to_trx(m_user_thd));
active_index = keynr;
- prebuilt->index = innobase_get_index(keynr);
+ m_prebuilt->index = innobase_get_index(keynr);
- if (UNIV_UNLIKELY(!prebuilt->index)) {
+ if (m_prebuilt->index == NULL) {
sql_print_warning("InnoDB: change_active_index(%u) failed",
keynr);
- prebuilt->index_usable = FALSE;
+ m_prebuilt->index_usable = FALSE;
DBUG_RETURN(1);
}
- prebuilt->index_usable = row_merge_is_index_usable(prebuilt->trx,
- prebuilt->index);
+ m_prebuilt->index_usable = row_merge_is_index_usable(
+ m_prebuilt->trx, m_prebuilt->index);
- if (UNIV_UNLIKELY(!prebuilt->index_usable)) {
- if (dict_index_is_corrupted(prebuilt->index)) {
- char index_name[MAX_FULL_NAME_LEN + 1];
- char table_name[MAX_FULL_NAME_LEN + 1];
-
- innobase_format_name(
- index_name, sizeof index_name,
- prebuilt->index->name, TRUE);
+ if (!m_prebuilt->index_usable) {
+ if (m_prebuilt->index->is_corrupted()) {
+ char table_name[MAX_FULL_NAME_LEN + 1];
innobase_format_name(
table_name, sizeof table_name,
- prebuilt->index->table->name, FALSE);
+ m_prebuilt->index->table->name.m_name);
- push_warning_printf(
- user_thd, Sql_condition::WARN_LEVEL_WARN,
- HA_ERR_INDEX_CORRUPT,
- "InnoDB: Index %s for table %s is"
- " marked as corrupted",
- index_name, table_name);
- DBUG_RETURN(HA_ERR_INDEX_CORRUPT);
+ if (m_prebuilt->index->is_primary()) {
+ ut_ad(m_prebuilt->index->table->corrupted);
+ push_warning_printf(
+ m_user_thd, Sql_condition::WARN_LEVEL_WARN,
+ ER_TABLE_CORRUPT,
+ "InnoDB: Table %s is corrupted.",
+ table_name);
+ DBUG_RETURN(ER_TABLE_CORRUPT);
+ } else {
+ push_warning_printf(
+ m_user_thd, Sql_condition::WARN_LEVEL_WARN,
+ HA_ERR_INDEX_CORRUPT,
+ "InnoDB: Index %s for table %s is"
+ " marked as corrupted",
+ m_prebuilt->index->name(),
+ table_name);
+ DBUG_RETURN(HA_ERR_INDEX_CORRUPT);
+ }
} else {
push_warning_printf(
- user_thd, Sql_condition::WARN_LEVEL_WARN,
+ m_user_thd, Sql_condition::WARN_LEVEL_WARN,
HA_ERR_TABLE_DEF_CHANGED,
"InnoDB: insufficient history for index %u",
keynr);
@@ -9614,15 +9604,41 @@ ha_innobase::change_active_index(
/* The caller seems to ignore this. Thus, we must check
this again in row_search_for_mysql(). */
DBUG_RETURN(convert_error_code_to_mysql(DB_MISSING_HISTORY,
- 0, NULL));
+ 0, NULL));
}
- ut_a(prebuilt->search_tuple != 0);
+ ut_a(m_prebuilt->search_tuple != 0);
+
+ /* Initialization of search_tuple is not needed for FT index
+ since FT search returns rank only. In addition engine should
+ be able to retrieve FTS_DOC_ID column value if necessary. */
+ if (m_prebuilt->index->type & DICT_FTS) {
+ for (uint i = 0; i < table->s->fields; i++) {
+ if (m_prebuilt->read_just_key
+ && bitmap_is_set(table->read_set, i)
+ && !strcmp(table->s->field[i]->field_name,
+ FTS_DOC_ID_COL_NAME)) {
+ m_prebuilt->fts_doc_id_in_read_set = true;
+ break;
+ }
+ }
+ } else {
+ dtuple_set_n_fields(m_prebuilt->search_tuple,
+ m_prebuilt->index->n_fields);
- dtuple_set_n_fields(prebuilt->search_tuple, prebuilt->index->n_fields);
+ dict_index_copy_types(
+ m_prebuilt->search_tuple, m_prebuilt->index,
+ m_prebuilt->index->n_fields);
- dict_index_copy_types(prebuilt->search_tuple, prebuilt->index,
- prebuilt->index->n_fields);
+ /* If it's FTS query and FTS_DOC_ID exists FTS_DOC_ID field is
+ always added to read_set. */
+ m_prebuilt->fts_doc_id_in_read_set = m_prebuilt->in_fts_query
+ && m_prebuilt->read_just_key
+ && dict_index_contains_col_or_prefix(
+ m_prebuilt->index,
+ m_prebuilt->table->fts->doc_col,
+ false);
+ }
/* MySQL changes the active index for a handle also during some
queries, for example SELECT MAX(a), SUM(a) first retrieves the MAX()
@@ -9635,37 +9651,11 @@ ha_innobase::change_active_index(
DBUG_RETURN(0);
}
-/**********************************************************************//**
-Positions an index cursor to the index specified in keynr. Fetches the
-row if any.
-??? This is only used to read whole keys ???
-@return error number or 0 */
-UNIV_INTERN
-int
-ha_innobase::index_read_idx(
-/*========================*/
- uchar* buf, /*!< in/out: buffer for the returned
- row */
- uint keynr, /*!< in: use this index */
- const uchar* key, /*!< in: key value; if this is NULL
- we position the cursor at the
- start or end of index */
- uint key_len, /*!< in: key value length */
- enum ha_rkey_function find_flag)/*!< in: search flags from my_base.h */
-{
- if (change_active_index(keynr)) {
-
- return(1);
- }
-
- return(index_read(buf, key, key_len, find_flag));
-}
-
/***********************************************************************//**
Reads the next or previous row from a cursor, which must have previously been
positioned using index_read.
-@return 0, HA_ERR_END_OF_FILE, or error number */
-UNIV_INTERN
+@return 0, HA_ERR_END_OF_FILE, or error number */
+
int
ha_innobase::general_fetch(
/*=======================*/
@@ -9675,50 +9665,42 @@ ha_innobase::general_fetch(
uint match_mode) /*!< in: 0, ROW_SEL_EXACT, or
ROW_SEL_EXACT_PREFIX */
{
- dberr_t ret;
- int error;
-
DBUG_ENTER("general_fetch");
- /* If transaction is not startted do not continue, instead return a error code. */
- if(!(prebuilt->sql_stat_start || (prebuilt->trx && prebuilt->trx->state == 1))) {
- DBUG_RETURN(HA_ERR_END_OF_FILE);
- }
+ const trx_t* trx = m_prebuilt->trx;
- ut_a(prebuilt->trx == thd_to_trx(user_thd));
+ ut_ad(trx == thd_to_trx(m_user_thd));
- if (prebuilt->table->is_readable()) {
+ if (m_prebuilt->table->is_readable()) {
+ } else if (m_prebuilt->table->corrupted) {
+ DBUG_RETURN(HA_ERR_CRASHED);
} else {
- if (prebuilt->table->corrupted) {
- DBUG_RETURN(HA_ERR_CRASHED);
- } else {
- FilSpace space(prebuilt->table->space, true);
+ FilSpace space(m_prebuilt->table->space, true);
- if (space()) {
- DBUG_RETURN(HA_ERR_DECRYPTION_FAILED);
- } else {
- DBUG_RETURN(HA_ERR_NO_SUCH_TABLE);
- }
- }
+ DBUG_RETURN(space()
+ ? HA_ERR_DECRYPTION_FAILED
+ : HA_ERR_NO_SUCH_TABLE);
}
- innobase_srv_conc_enter_innodb(prebuilt->trx);
+ innobase_srv_conc_enter_innodb(m_prebuilt);
+
+ dberr_t ret = row_search_mvcc(
+ buf, PAGE_CUR_UNSUPP, m_prebuilt, match_mode, direction);
- ret = row_search_for_mysql(
- (byte*) buf, 0, prebuilt, match_mode, direction);
+ innobase_srv_conc_exit_innodb(m_prebuilt);
- innobase_srv_conc_exit_innodb(prebuilt->trx);
+ int error;
switch (ret) {
case DB_SUCCESS:
error = 0;
table->status = 0;
- if (prebuilt->table->is_system_db) {
+ if (m_prebuilt->table->is_system_db) {
srv_stats.n_system_rows_read.add(
- (size_t) prebuilt->trx->id, 1);
+ thd_get_thread_id(trx->mysql_thd), 1);
} else {
srv_stats.n_rows_read.add(
- (size_t) prebuilt->trx->id, 1);
+ thd_get_thread_id(trx->mysql_thd), 1);
}
break;
case DB_RECORD_NOT_FOUND:
@@ -9730,28 +9712,27 @@ ha_innobase::general_fetch(
table->status = STATUS_NOT_FOUND;
break;
case DB_TABLESPACE_DELETED:
-
ib_senderrf(
- prebuilt->trx->mysql_thd, IB_LOG_LEVEL_ERROR,
+ trx->mysql_thd, IB_LOG_LEVEL_ERROR,
ER_TABLESPACE_DISCARDED,
table->s->table_name.str);
table->status = STATUS_NOT_FOUND;
- error = HA_ERR_NO_SUCH_TABLE;
+ error = HA_ERR_TABLESPACE_MISSING;
break;
case DB_TABLESPACE_NOT_FOUND:
ib_senderrf(
- prebuilt->trx->mysql_thd, IB_LOG_LEVEL_ERROR,
+ trx->mysql_thd, IB_LOG_LEVEL_ERROR,
ER_TABLESPACE_MISSING,
table->s->table_name.str);
table->status = STATUS_NOT_FOUND;
- error = HA_ERR_NO_SUCH_TABLE;
+ error = HA_ERR_TABLESPACE_MISSING;
break;
default:
error = convert_error_code_to_mysql(
- ret, prebuilt->table->flags, user_thd);
+ ret, m_prebuilt->table->flags, m_user_thd);
table->status = STATUS_NOT_FOUND;
break;
@@ -9763,23 +9744,21 @@ ha_innobase::general_fetch(
/***********************************************************************//**
Reads the next row from a cursor, which must have previously been
positioned using index_read.
-@return 0, HA_ERR_END_OF_FILE, or error number */
-UNIV_INTERN
+@return 0, HA_ERR_END_OF_FILE, or error number */
+
int
ha_innobase::index_next(
/*====================*/
uchar* buf) /*!< in/out: buffer for next row in MySQL
format */
{
- ha_statistic_increment(&SSV::ha_read_next_count);
-
return(general_fetch(buf, ROW_SEL_NEXT, 0));
}
/*******************************************************************//**
Reads the next row matching to the key value given as the parameter.
-@return 0, HA_ERR_END_OF_FILE, or error number */
-UNIV_INTERN
+@return 0, HA_ERR_END_OF_FILE, or error number */
+
int
ha_innobase::index_next_same(
/*=========================*/
@@ -9787,42 +9766,35 @@ ha_innobase::index_next_same(
const uchar* key, /*!< in: key value */
uint keylen) /*!< in: key value length */
{
- ha_statistic_increment(&SSV::ha_read_next_count);
-
- return(general_fetch(buf, ROW_SEL_NEXT, last_match_mode));
+ return(general_fetch(buf, ROW_SEL_NEXT, m_last_match_mode));
}
/***********************************************************************//**
Reads the previous row from a cursor, which must have previously been
positioned using index_read.
-@return 0, HA_ERR_END_OF_FILE, or error number */
-UNIV_INTERN
+@return 0, HA_ERR_END_OF_FILE, or error number */
+
int
ha_innobase::index_prev(
/*====================*/
uchar* buf) /*!< in/out: buffer for previous row in MySQL format */
{
- ha_statistic_increment(&SSV::ha_read_prev_count);
-
return(general_fetch(buf, ROW_SEL_PREV, 0));
}
/********************************************************************//**
Positions a cursor on the first record in an index and reads the
corresponding row to buf.
-@return 0, HA_ERR_END_OF_FILE, or error code */
-UNIV_INTERN
+@return 0, HA_ERR_END_OF_FILE, or error code */
+
int
ha_innobase::index_first(
/*=====================*/
uchar* buf) /*!< in/out: buffer for the row */
{
- int error;
-
DBUG_ENTER("index_first");
- ha_statistic_increment(&SSV::ha_read_first_count);
- error = index_read(buf, NULL, 0, HA_READ_AFTER_KEY);
+ int error = index_read(buf, NULL, 0, HA_READ_AFTER_KEY);
/* MySQL does not seem to allow this to return HA_ERR_KEY_NOT_FOUND */
@@ -9836,19 +9808,16 @@ ha_innobase::index_first(
/********************************************************************//**
Positions a cursor on the last record in an index and reads the
corresponding row to buf.
-@return 0, HA_ERR_END_OF_FILE, or error code */
-UNIV_INTERN
+@return 0, HA_ERR_END_OF_FILE, or error code */
+
int
ha_innobase::index_last(
/*====================*/
uchar* buf) /*!< in/out: buffer for the row */
{
- int error;
-
DBUG_ENTER("index_last");
- ha_statistic_increment(&SSV::ha_read_last_count);
- error = index_read(buf, NULL, 0, HA_READ_BEFORE_KEY);
+ int error = index_read(buf, NULL, 0, HA_READ_BEFORE_KEY);
/* MySQL does not seem to allow this to return HA_ERR_KEY_NOT_FOUND */
@@ -9861,22 +9830,22 @@ ha_innobase::index_last(
/****************************************************************//**
Initialize a table scan.
-@return 0 or error number */
-UNIV_INTERN
+@return 0 or error number */
+
int
ha_innobase::rnd_init(
/*==================*/
- bool scan) /*!< in: TRUE if table/index scan FALSE otherwise */
+ bool scan) /*!< in: true if table/index scan FALSE otherwise */
{
- int err;
+ int err;
/* Store the active index value so that we can restore the original
value after a scan */
- if (prebuilt->clust_index_was_generated) {
+ if (m_prebuilt->clust_index_was_generated) {
err = change_active_index(MAX_KEY);
} else {
- err = change_active_index(primary_key);
+ err = change_active_index(m_primary_key);
}
/* Don't use semi-consistent read in random row reads (by position).
@@ -9886,15 +9855,15 @@ ha_innobase::rnd_init(
try_semi_consistent_read(0);
}
- start_of_scan = 1;
+ m_start_of_scan = true;
return(err);
}
/*****************************************************************//**
Ends a table scan.
-@return 0 or error number */
-UNIV_INTERN
+@return 0 or error number */
+
int
ha_innobase::rnd_end(void)
/*======================*/
@@ -9905,8 +9874,8 @@ ha_innobase::rnd_end(void)
/*****************************************************************//**
Reads the next row in a table scan (also used to read the FIRST row
in a table scan).
-@return 0, HA_ERR_END_OF_FILE, or error number */
-UNIV_INTERN
+@return 0, HA_ERR_END_OF_FILE, or error number */
+
int
ha_innobase::rnd_next(
/*==================*/
@@ -9916,16 +9885,15 @@ ha_innobase::rnd_next(
int error;
DBUG_ENTER("rnd_next");
- ha_statistic_increment(&SSV::ha_read_rnd_next_count);
- if (start_of_scan) {
+ if (m_start_of_scan) {
error = index_first(buf);
if (error == HA_ERR_KEY_NOT_FOUND) {
error = HA_ERR_END_OF_FILE;
}
- start_of_scan = 0;
+ m_start_of_scan = false;
} else {
error = general_fetch(buf, ROW_SEL_NEXT, 0);
}
@@ -9935,8 +9903,8 @@ ha_innobase::rnd_next(
/**********************************************************************//**
Fetches a row from the table based on a row reference.
-@return 0, HA_ERR_KEY_NOT_FOUND, or error code */
-UNIV_INTERN
+@return 0, HA_ERR_KEY_NOT_FOUND, or error code */
+
int
ha_innobase::rnd_pos(
/*=================*/
@@ -9946,20 +9914,17 @@ ha_innobase::rnd_pos(
index was internally generated by InnoDB; the
length of data in pos has to be ref_length */
{
- int error;
DBUG_ENTER("rnd_pos");
DBUG_DUMP("key", pos, ref_length);
- ha_statistic_increment(&SSV::ha_read_rnd_count);
-
- ut_a(prebuilt->trx == thd_to_trx(ha_thd()));
+ ut_a(m_prebuilt->trx == thd_to_trx(ha_thd()));
/* Note that we assume the length of the row reference is fixed
for the table, and it is == ref_length */
- error = index_read(buf, pos, ref_length, HA_READ_KEY_EXACT);
+ int error = index_read(buf, pos, ref_length, HA_READ_KEY_EXACT);
- if (error) {
+ if (error != 0) {
DBUG_PRINT("error", ("Got error: %d", error));
}
@@ -9969,7 +9934,7 @@ ha_innobase::rnd_pos(
/**********************************************************************//**
Initialize FT index scan
@return 0 or error number */
-UNIV_INTERN
+
int
ha_innobase::ft_init()
/*==================*/
@@ -9993,7 +9958,7 @@ ha_innobase::ft_init()
/**********************************************************************//**
Initialize FT index scan
@return FT_INFO structure if successful or NULL */
-UNIV_INTERN
+
FT_INFO*
ha_innobase::ft_init_ext(
/*=====================*/
@@ -10001,47 +9966,48 @@ ha_innobase::ft_init_ext(
uint keynr, /* in: */
String* key) /* in: */
{
- trx_t* trx;
- dict_table_t* ft_table;
- dberr_t error;
- byte* query = (byte*) key->ptr();
- ulint query_len = key->length();
- const CHARSET_INFO* char_set = key->charset();
NEW_FT_INFO* fts_hdl = NULL;
dict_index_t* index;
fts_result_t* result;
char buf_tmp[8192];
ulint buf_tmp_used;
uint num_errors;
+ ulint query_len = key->length();
+ const CHARSET_INFO* char_set = key->charset();
+ const char* query = key->ptr();
if (fts_enable_diag_print) {
- fprintf(stderr, "keynr=%u, '%.*s'\n",
- keynr, (int) key->length(), (byte*) key->ptr());
+ {
+ ib::info out;
+ out << "keynr=" << keynr << ", '";
+ out.write(key->ptr(), key->length());
+ }
if (flags & FT_BOOL) {
- fprintf(stderr, "BOOL search\n");
+ ib::info() << "BOOL search";
} else {
- fprintf(stderr, "NL search\n");
+ ib::info() << "NL search";
}
}
/* FIXME: utf32 and utf16 are not compatible with some
string function used. So to convert them to uft8 before
- proceed. */
+ we proceed. */
if (strcmp(char_set->csname, "utf32") == 0
|| strcmp(char_set->csname, "utf16") == 0) {
+
buf_tmp_used = innobase_convert_string(
buf_tmp, sizeof(buf_tmp) - 1,
&my_charset_utf8_general_ci,
query, query_len, (CHARSET_INFO*) char_set,
&num_errors);
- query = (byte*) buf_tmp;
+ buf_tmp[buf_tmp_used] = 0;
+ query = buf_tmp;
query_len = buf_tmp_used;
- query[query_len] = 0;
}
- trx = prebuilt->trx;
+ trx_t* trx = m_prebuilt->trx;
/* FTS queries are not treated as autocommit non-locking selects.
This is because the FTS implementation can acquire locks behind
@@ -10052,7 +10018,7 @@ ha_innobase::ft_init_ext(
++trx->will_lock;
}
- ft_table = prebuilt->table;
+ dict_table_t* ft_table = m_prebuilt->table;
/* Table does not have an FTS index */
if (!ft_table->fts || ib_vector_is_empty(ft_table->fts->indexes)) {
@@ -10062,19 +10028,20 @@ ha_innobase::ft_init_ext(
/* If tablespace is discarded, we should return here */
if (dict_table_is_discarded(ft_table)) {
- my_error(ER_NO_SUCH_TABLE, MYF(0), table->s->db.str,
+ my_error(ER_TABLESPACE_MISSING, MYF(0), table->s->db.str,
table->s->table_name.str);
return(NULL);
}
if (keynr == NO_SUCH_KEY) {
/* FIXME: Investigate the NO_SUCH_KEY usage */
- index = (dict_index_t*) ib_vector_getp(ft_table->fts->indexes, 0);
+ index = reinterpret_cast<dict_index_t*>
+ (ib_vector_getp(ft_table->fts->indexes, 0));
} else {
index = innobase_get_index(keynr);
}
- if (!index || index->type != DICT_FTS) {
+ if (index == NULL || index->type != DICT_FTS) {
my_error(ER_TABLE_HAS_NO_FT, MYF(0));
return(NULL);
}
@@ -10085,52 +10052,33 @@ ha_innobase::ft_init_ext(
ft_table->fts->added_synced = true;
}
- error = fts_query(trx, index, flags, query, query_len, &result);
+ const byte* q = reinterpret_cast<const byte*>(
+ const_cast<char*>(query));
+
+ // FIXME: support ft_init_ext_with_hints(), pass LIMIT
+ dberr_t error = fts_query(trx, index, flags, q, query_len, &result);
if (error != DB_SUCCESS) {
- my_error(convert_error_code_to_mysql(error, 0, NULL),
- MYF(0));
+ my_error(convert_error_code_to_mysql(error, 0, NULL), MYF(0));
return(NULL);
}
/* Allocate FTS handler, and instantiate it before return */
- fts_hdl = static_cast<NEW_FT_INFO*>(my_malloc(sizeof(NEW_FT_INFO),
- MYF(0)));
+ fts_hdl = reinterpret_cast<NEW_FT_INFO*>(
+ my_malloc(/*PSI_INSTRUMENT_ME,*/ sizeof(NEW_FT_INFO), MYF(0)));
fts_hdl->please = const_cast<_ft_vft*>(&ft_vft_result);
fts_hdl->could_you = const_cast<_ft_vft_ext*>(&ft_vft_ext_result);
- fts_hdl->ft_prebuilt = prebuilt;
+ fts_hdl->ft_prebuilt = m_prebuilt;
fts_hdl->ft_result = result;
- /* FIXME: Re-evluate the condition when Bug 14469540
- is resolved */
- prebuilt->in_fts_query = true;
+ /* FIXME: Re-evaluate the condition when Bug 14469540 is resolved */
+ m_prebuilt->in_fts_query = true;
- return((FT_INFO*) fts_hdl);
+ return(reinterpret_cast<FT_INFO*>(fts_hdl));
}
/*****************************************************************//**
-Copy a cached MySQL row.
-If requested, also avoids overwriting non-read columns.
-@param[out] buf Row in MySQL format.
-@param[in] cached_row Which row to copy.
-@param[in] rec_len Record length. */
-void
-ha_innobase::copy_cached_row(
- uchar* buf,
- const uchar* cached_row,
- uint rec_len)
-{
- if (prebuilt->keep_other_fields_on_keyread) {
- row_sel_copy_cached_fields_for_mysql(buf, cached_row,
- prebuilt);
- } else {
- memcpy(buf, cached_row, rec_len);
- }
-}
-
-
-/*****************************************************************//**
Set up search tuple for a query through FTS_DOC_ID_INDEX on
supplied Doc ID. This is used by MySQL to retrieve the documents
once the search result (Doc IDs) is available */
@@ -10138,7 +10086,7 @@ static
void
innobase_fts_create_doc_id_key(
/*===========================*/
- dtuple_t* tuple, /* in/out: prebuilt->search_tuple */
+ dtuple_t* tuple, /* in/out: m_prebuilt->search_tuple */
const dict_index_t*
index, /* in: index (FTS_DOC_ID_INDEX) */
doc_id_t* doc_id) /* in/out: doc id to search, value
@@ -10158,7 +10106,7 @@ innobase_fts_create_doc_id_key(
dict_field_t* field = dict_index_get_nth_field(index, 0);
ut_a(field->col->mtype == DATA_INT);
ut_ad(sizeof(*doc_id) == field->fixed_len);
- ut_ad(innobase_strcasecmp(index->name, FTS_DOC_ID_INDEX_NAME) == 0);
+ ut_ad(!strcmp(index->name, FTS_DOC_ID_INDEX_NAME));
#endif /* UNIV_DEBUG */
/* Convert to storage byte order */
@@ -10177,21 +10125,21 @@ innobase_fts_create_doc_id_key(
/**********************************************************************//**
Fetch next result from the FT result set
@return error code */
-UNIV_INTERN
+
int
ha_innobase::ft_read(
/*=================*/
uchar* buf) /*!< in/out: buf contain result row */
{
- fts_result_t* result;
- int error;
row_prebuilt_t* ft_prebuilt;
- ft_prebuilt = ((NEW_FT_INFO*) ft_handler)->ft_prebuilt;
+ ft_prebuilt = reinterpret_cast<NEW_FT_INFO*>(ft_handler)->ft_prebuilt;
- ut_a(ft_prebuilt == prebuilt);
+ ut_a(ft_prebuilt == m_prebuilt);
+
+ fts_result_t* result;
- result = ((NEW_FT_INFO*) ft_handler)->ft_result;
+ result = reinterpret_cast<NEW_FT_INFO*>(ft_handler)->ft_result;
if (result->current == NULL) {
/* This is the case where the FTS query did not
@@ -10216,25 +10164,34 @@ ha_innobase::ft_read(
next_record:
if (result->current != NULL) {
- dict_index_t* index;
- dtuple_t* tuple = prebuilt->search_tuple;
doc_id_t search_doc_id;
+ dtuple_t* tuple = m_prebuilt->search_tuple;
/* If we only need information from result we can return
without fetching the table row */
if (ft_prebuilt->read_just_key) {
+#ifdef MYSQL_STORE_FTS_DOC_ID
+ if (m_prebuilt->fts_doc_id_in_read_set) {
+ fts_ranking_t* ranking;
+ ranking = rbt_value(fts_ranking_t,
+ result->current);
+ innobase_fts_store_docid(
+ table, ranking->doc_id);
+ }
+#endif
table->status= 0;
return(0);
}
- index = dict_table_get_index_on_name(
- prebuilt->table, FTS_DOC_ID_INDEX_NAME);
+ dict_index_t* index;
+
+ index = m_prebuilt->table->fts_doc_id_index;
/* Must find the index */
- ut_a(index);
+ ut_a(index != NULL);
/* Switch to the FTS doc id index */
- prebuilt->index = index;
+ m_prebuilt->index = index;
fts_ranking_t* ranking = rbt_value(
fts_ranking_t, result->current);
@@ -10246,12 +10203,14 @@ next_record:
tuple. */
innobase_fts_create_doc_id_key(tuple, index, &search_doc_id);
- innobase_srv_conc_enter_innodb(prebuilt->trx);
+ innobase_srv_conc_enter_innodb(m_prebuilt);
dberr_t ret = row_search_for_mysql(
- (byte*) buf, PAGE_CUR_GE, prebuilt, ROW_SEL_EXACT, 0);
+ (byte*) buf, PAGE_CUR_GE, m_prebuilt, ROW_SEL_EXACT, 0);
- innobase_srv_conc_exit_innodb(prebuilt->trx);
+ innobase_srv_conc_exit_innodb(m_prebuilt);
+
+ int error;
switch (ret) {
case DB_SUCCESS:
@@ -10281,26 +10240,26 @@ next_record:
case DB_TABLESPACE_DELETED:
ib_senderrf(
- prebuilt->trx->mysql_thd, IB_LOG_LEVEL_ERROR,
+ m_prebuilt->trx->mysql_thd, IB_LOG_LEVEL_ERROR,
ER_TABLESPACE_DISCARDED,
table->s->table_name.str);
table->status = STATUS_NOT_FOUND;
- error = HA_ERR_NO_SUCH_TABLE;
+ error = HA_ERR_TABLESPACE_MISSING;
break;
case DB_TABLESPACE_NOT_FOUND:
ib_senderrf(
- prebuilt->trx->mysql_thd, IB_LOG_LEVEL_ERROR,
+ m_prebuilt->trx->mysql_thd, IB_LOG_LEVEL_ERROR,
ER_TABLESPACE_MISSING,
table->s->table_name.str);
table->status = STATUS_NOT_FOUND;
- error = HA_ERR_NO_SUCH_TABLE;
+ error = HA_ERR_TABLESPACE_MISSING;
break;
default:
error = convert_error_code_to_mysql(
- ret, 0, user_thd);
+ ret, 0, m_user_thd);
table->status = STATUS_NOT_FOUND;
break;
@@ -10313,16 +10272,6 @@ next_record:
}
#ifdef WITH_WSREP
-extern dict_index_t*
-wsrep_dict_foreign_find_index(
- dict_table_t* table,
- const char** col_names,
- const char** columns,
- ulint n_cols,
- dict_index_t* types_idx,
- ibool check_charsets,
- ulint check_null);
-
inline
const char*
wsrep_key_type_to_str(wsrep_key_type type)
@@ -10349,64 +10298,58 @@ wsrep_append_foreign_key(
wsrep_key_type key_type) /*!< in: access type of this key
(shared, exclusive, semi...) */
{
- ut_a(trx);
- THD* thd = (THD*)trx->mysql_thd;
- ulint rcode = DB_SUCCESS;
- char cache_key[513] = {'\0'};
- int cache_key_len;
- bool const copy = true;
+ THD* thd = trx->mysql_thd;
- if (!trx->is_wsrep() || wsrep_thd_exec_mode(thd) != LOCAL_STATE)
+ if (!trx->is_wsrep() || wsrep_thd_exec_mode(thd) != LOCAL_STATE) {
return DB_SUCCESS;
+ }
- if (!thd || !foreign ||
- (!foreign->referenced_table && !foreign->foreign_table))
- {
+ if (!foreign ||
+ (!foreign->referenced_table && !foreign->foreign_table)) {
WSREP_INFO("FK: %s missing in: %s",
- (!thd) ? "thread" :
- ((!foreign) ? "constraint" :
- ((!foreign->referenced_table) ?
+ (!foreign ? "constraint" :
+ (!foreign->referenced_table ?
"referenced table" : "foreign table")),
- (thd && wsrep_thd_query(thd)) ?
- wsrep_thd_query(thd) : "void");
+ wsrep_thd_query(thd));
return DB_ERROR;
}
+ ulint rcode = DB_SUCCESS;
+ char cache_key[513] = {'\0'};
+ int cache_key_len=0;
+ bool const copy = true;
+
if ( !((referenced) ?
- foreign->referenced_table : foreign->foreign_table))
- {
+ foreign->referenced_table : foreign->foreign_table)) {
WSREP_DEBUG("pulling %s table into cache",
(referenced) ? "referenced" : "foreign");
mutex_enter(&(dict_sys->mutex));
- if (referenced)
- {
+
+ if (referenced) {
foreign->referenced_table =
dict_table_get_low(
foreign->referenced_table_name_lookup);
- if (foreign->referenced_table)
- {
+ if (foreign->referenced_table) {
foreign->referenced_index =
- wsrep_dict_foreign_find_index(
+ dict_foreign_find_index(
foreign->referenced_table, NULL,
foreign->referenced_col_names,
- foreign->n_fields,
+ foreign->n_fields,
foreign->foreign_index,
TRUE, FALSE);
}
- }
- else
- {
+ } else {
foreign->foreign_table =
dict_table_get_low(
foreign->foreign_table_name_lookup);
- if (foreign->foreign_table)
- {
+
+ if (foreign->foreign_table) {
foreign->foreign_index =
- wsrep_dict_foreign_find_index(
+ dict_foreign_find_index(
foreign->foreign_table, NULL,
foreign->foreign_col_names,
foreign->n_fields,
- foreign->referenced_index,
+ foreign->referenced_index,
TRUE, FALSE);
}
}
@@ -10414,8 +10357,7 @@ wsrep_append_foreign_key(
}
if ( !((referenced) ?
- foreign->referenced_table : foreign->foreign_table))
- {
+ foreign->referenced_table : foreign->foreign_table)) {
WSREP_WARN("FK: %s missing in query: %s",
(!foreign->referenced_table) ?
"referenced table" : "foreign table",
@@ -10423,6 +10365,7 @@ wsrep_append_foreign_key(
wsrep_thd_query(thd) : "void");
return DB_ERROR;
}
+
byte key[WSREP_MAX_SUPPORTED_KEY_LENGTH+1] = {'\0'};
ulint len = WSREP_MAX_SUPPORTED_KEY_LENGTH;
@@ -10432,12 +10375,14 @@ wsrep_append_foreign_key(
UT_LIST_GET_FIRST(foreign->referenced_table->indexes) :
UT_LIST_GET_FIRST(foreign->foreign_table->indexes);
int i = 0;
+
while (idx != NULL && idx != idx_target) {
if (innobase_strcasecmp (idx->name, innobase_index_reserve_name) != 0) {
i++;
}
idx = UT_LIST_GET_NEXT(indexes, idx);
}
+
ut_a(idx);
key[0] = (char)i;
@@ -10447,12 +10392,12 @@ wsrep_append_foreign_key(
if (rcode != DB_SUCCESS) {
WSREP_ERROR(
- "FK key set failed: %lu (%lu %s), index: %s %s, %s",
+ "FK key set failed: " ULINTPF
+ " (" ULINTPF " %s), index: %s %s, %s",
rcode, referenced, wsrep_key_type_to_str(key_type),
- (index && index->name) ? index->name :
- "void index",
- (index && index->table_name) ? index->table_name :
- "void table",
+ index ? index->name() : "void index",
+ (index && index->table) ? index->table->name.m_name :
+ "void table",
wsrep_thd_query(thd));
return DB_ERROR;
}
@@ -10460,10 +10405,11 @@ wsrep_append_foreign_key(
strncpy(cache_key,
(wsrep_protocol_version > 1) ?
((referenced) ?
- foreign->referenced_table->name :
- foreign->foreign_table->name) :
- foreign->foreign_table->name, sizeof(cache_key) - 1);
+ foreign->referenced_table->name.m_name :
+ foreign->foreign_table->name.m_name) :
+ foreign->foreign_table->name.m_name, sizeof(cache_key) - 1);
cache_key_len = strlen(cache_key);
+
#ifdef WSREP_DEBUG_PRINT
ulint j;
fprintf(stderr, "FK parent key, table: %s %s len: %lu ",
@@ -10474,16 +10420,18 @@ wsrep_append_foreign_key(
fprintf(stderr, "\n");
#endif
char *p = strchr(cache_key, '/');
+
if (p) {
*p = '\0';
} else {
WSREP_WARN("unexpected foreign key table %s %s",
- foreign->referenced_table->name,
- foreign->foreign_table->name);
+ foreign->referenced_table->name.m_name,
+ foreign->foreign_table->name.m_name);
}
wsrep_buf_t wkey_part[3];
wsrep_key_t wkey = {wkey_part, 3};
+
if (!wsrep_prepare_key(
(const uchar*)cache_key,
cache_key_len + 1,
@@ -10495,7 +10443,9 @@ wsrep_append_foreign_key(
wsrep_thd_query(thd) : "void");
return DB_ERROR;
}
+
wsrep_t *wsrep= get_wsrep();
+
rcode = (int)wsrep->append_key(
wsrep,
wsrep_ws_handle(thd, trx),
@@ -10503,9 +10453,11 @@ wsrep_append_foreign_key(
1,
key_type,
copy);
+
if (rcode) {
- DBUG_PRINT("wsrep", ("row key failed: %lu", rcode));
- WSREP_ERROR("Appending cascaded fk row key failed: %s, %lu",
+ DBUG_PRINT("wsrep", ("row key failed: " ULINTPF, rcode));
+ WSREP_ERROR("Appending cascaded fk row key failed: %s, "
+ ULINTPF,
(wsrep_thd_query(thd)) ?
wsrep_thd_query(thd) : "void", rcode);
return DB_ERROR;
@@ -10516,7 +10468,7 @@ wsrep_append_foreign_key(
static int
wsrep_append_key(
-/*==================*/
+/*=============*/
THD *thd,
trx_t *trx,
TABLE_SHARE *table_share,
@@ -10541,6 +10493,7 @@ wsrep_append_key(
#endif
wsrep_buf_t wkey_part[3];
wsrep_key_t wkey = {wkey_part, 3};
+
if (!wsrep_prepare_key(
(const uchar*)table_share->table_cache_key.str,
table_share->table_cache_key.length,
@@ -10554,6 +10507,7 @@ wsrep_append_key(
}
wsrep_t *wsrep= get_wsrep();
+
int rcode = (int)wsrep->append_key(
wsrep,
wsrep_ws_handle(thd, trx),
@@ -10568,21 +10522,26 @@ wsrep_append_key(
wsrep_thd_query(thd) : "void", rcode);
DBUG_RETURN(HA_ERR_INTERNAL_ERROR);
}
+
DBUG_RETURN(0);
}
static bool
-referenced_by_foreign_key2(dict_table_t* table,
- dict_index_t* index) {
- ut_ad(table != NULL);
- ut_ad(index != NULL);
+referenced_by_foreign_key2(
+/*=======================*/
+ dict_table_t* table,
+ dict_index_t* index)
+{
+ ut_ad(table != NULL);
+ ut_ad(index != NULL);
+
+ const dict_foreign_set* fks = &table->referenced_set;
- const dict_foreign_set* fks = &table->referenced_set;
- for (dict_foreign_set::const_iterator it = fks->begin();
+ for (dict_foreign_set::const_iterator it = fks->begin();
it != fks->end();
- ++it)
- {
+ ++it) {
dict_foreign_t* foreign = *it;
+
if (foreign->referenced_index != index) {
continue;
}
@@ -10594,7 +10553,7 @@ referenced_by_foreign_key2(dict_table_t* table,
int
ha_innobase::wsrep_append_keys(
-/*==================*/
+/*===========================*/
THD *thd,
wsrep_key_type key_type, /*!< in: access type of this key
(shared, exclusive, semi...) */
@@ -10608,10 +10567,10 @@ ha_innobase::wsrep_append_keys(
trx_t *trx = thd_to_trx(thd);
if (table_share && table_share->tmp_table != NO_TMP_TABLE) {
- WSREP_DEBUG("skipping tmp table DML: THD: %lu tmp: %d SQL: %s",
+ WSREP_DEBUG("skipping tmp table DML: THD: %lu tmp: %d SQL: %s",
thd_get_thread_id(thd),
table_share->tmp_table,
- (wsrep_thd_query(thd)) ?
+ (wsrep_thd_query(thd)) ?
wsrep_thd_query(thd) : "void");
DBUG_RETURN(0);
}
@@ -10628,13 +10587,13 @@ ha_innobase::wsrep_append_keys(
if (!is_null) {
rcode = wsrep_append_key(
- thd, trx, table_share, table, keyval,
+ thd, trx, table_share, table, keyval,
len, key_type);
if (rcode) DBUG_RETURN(rcode);
}
else
{
- WSREP_DEBUG("NULL key skipped (proto 0): %s",
+ WSREP_DEBUG("NULL key skipped (proto 0): %s",
wsrep_thd_query(thd));
}
} else {
@@ -10665,8 +10624,8 @@ ha_innobase::wsrep_append_keys(
keyval1[0] = (char)i;
if (!tab) {
- WSREP_WARN("MySQL-InnoDB key mismatch %s %s",
- table->s->table_name.str,
+ WSREP_WARN("MariaDB-InnoDB key mismatch %s %s",
+ table->s->table_name.str,
key_info->name);
}
/* !hasPK == table with no PK, must append all non-unique keys */
@@ -10676,34 +10635,36 @@ ha_innobase::wsrep_append_keys(
(!tab && referenced_by_foreign_key()))) {
len = wsrep_store_key_val_for_row(
- thd, table, i, key0,
- WSREP_MAX_SUPPORTED_KEY_LENGTH,
+ thd, table, i, key0,
+ WSREP_MAX_SUPPORTED_KEY_LENGTH,
record0, &is_null);
if (!is_null) {
rcode = wsrep_append_key(
- thd, trx, table_share, table,
+ thd, trx, table_share, table,
keyval0, len+1, key_type);
if (rcode) DBUG_RETURN(rcode);
if (key_info->flags & HA_NOSAME ||
key_type == WSREP_KEY_SHARED)
key_appended = true;
- }
- else
- {
- WSREP_DEBUG("NULL key skipped: %s",
+ } else {
+ WSREP_DEBUG("NULL key skipped: %s",
wsrep_thd_query(thd));
}
+
if (record1) {
len = wsrep_store_key_val_for_row(
- thd, table, i, key1,
+ thd, table, i, key1,
WSREP_MAX_SUPPORTED_KEY_LENGTH,
record1, &is_null);
- if (!is_null && memcmp(key0, key1, len)) {
+
+ if (!is_null
+ && memcmp(key0, key1, len)) {
rcode = wsrep_append_key(
- thd, trx, table_share,
- table,
- keyval1, len+1, key_type);
+ thd, trx, table_share,
+ table,
+ keyval1, len+1,
+ key_type);
if (rcode) DBUG_RETURN(rcode);
}
}
@@ -10716,19 +10677,19 @@ ha_innobase::wsrep_append_keys(
uchar digest[16];
int rcode;
- wsrep_calc_row_hash(digest, record0, table, prebuilt, thd);
- if ((rcode = wsrep_append_key(thd, trx, table_share, table,
- (const char*) digest, 16,
+ wsrep_calc_row_hash(digest, record0, table, m_prebuilt, thd);
+ if ((rcode = wsrep_append_key(thd, trx, table_share, table,
+ (const char*) digest, 16,
key_type))) {
DBUG_RETURN(rcode);
}
if (record1) {
wsrep_calc_row_hash(
- digest, record1, table, prebuilt, thd);
- if ((rcode = wsrep_append_key(thd, trx, table_share,
+ digest, record1, table, m_prebuilt, thd);
+ if ((rcode = wsrep_append_key(thd, trx, table_share,
table,
- (const char*) digest,
+ (const char*) digest,
16, key_type))) {
DBUG_RETURN(rcode);
}
@@ -10748,7 +10709,7 @@ is the current 'position' of the handle, because if row ref is actually
the row id internally generated in InnoDB, then 'record' does not contain
it. We just guess that the row id must be for the record where the handle
was positioned the last time. */
-UNIV_INTERN
+
void
ha_innobase::position(
/*==================*/
@@ -10756,9 +10717,9 @@ ha_innobase::position(
{
uint len;
- ut_a(prebuilt->trx == thd_to_trx(ha_thd()));
+ ut_a(m_prebuilt->trx == thd_to_trx(ha_thd()));
- if (prebuilt->clust_index_was_generated) {
+ if (m_prebuilt->clust_index_was_generated) {
/* No primary key was defined for the table and we
generated the clustered index from row id: the
row reference will be the row id, not any key value
@@ -10766,19 +10727,16 @@ ha_innobase::position(
len = DATA_ROW_ID_LEN;
- memcpy(ref, prebuilt->row_id, len);
+ memcpy(ref, m_prebuilt->row_id, len);
} else {
- len = store_key_val_for_row(primary_key, (char*) ref,
- ref_length, record);
- }
-
- /* We assume that the 'ref' value len is always fixed for the same
- table. */
- if (len != ref_length) {
- sql_print_error("Stored ref len is %lu, but table ref len is "
- "%lu", (ulong) len, (ulong) ref_length);
+ /* Copy primary key as the row reference */
+ KEY* key_info = table->key_info + m_primary_key;
+ key_copy(ref, (uchar*)record, key_info, key_info->key_length);
+ len = key_info->key_length;
}
+
+ ut_ad(len == ref_length);
}
/*****************************************************************//**
@@ -10804,9 +10762,12 @@ create_table_check_doc_id_col(
ulint unsigned_type;
field = form->field[i];
+ if (!field->stored_in_db()) {
+ continue;
+ }
- col_type = get_innobase_type_from_mysql_type(&unsigned_type,
- field);
+ col_type = get_innobase_type_from_mysql_type(
+ &unsigned_type, field);
col_len = field->pack_length();
@@ -10826,9 +10787,9 @@ create_table_check_doc_id_col(
trx->mysql_thd,
Sql_condition::WARN_LEVEL_WARN,
ER_ILLEGAL_HA_CREATE_OPTION,
- "InnoDB: FTS_DOC_ID column must be "
- "of BIGINT NOT NULL type, and named "
- "in all capitalized characters");
+ "InnoDB: FTS_DOC_ID column must be"
+ " of BIGINT NOT NULL type, and named"
+ " in all capitalized characters");
my_error(ER_WRONG_COLUMN_NAME, MYF(0),
field->field_name);
*doc_id_col = ULINT_UNDEFINED;
@@ -10841,33 +10802,130 @@ create_table_check_doc_id_col(
return(false);
}
-/*****************************************************************//**
-Creates a table definition to an InnoDB database. */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
+
+/** Finds all base columns needed to compute a given generated column.
+This is returned as a bitmap, in field->table->tmp_set.
+Works for both dict_v_col_t and dict_s_col_t columns.
+@param[in] table InnoDB table
+@param[in] field MySQL field
+@param[in,out] col virtual or stored column */
+template <typename T>
+void
+prepare_vcol_for_base_setup(
+/*========================*/
+ const dict_table_t* table,
+ const Field* field,
+ T* col)
+{
+ ut_ad(col->num_base == 0);
+ ut_ad(col->base_col == NULL);
+
+ MY_BITMAP *old_read_set = field->table->read_set;
+ MY_BITMAP *old_vcol_set = field->table->vcol_set;
+
+ field->table->read_set = field->table->vcol_set = &field->table->tmp_set;
+
+ bitmap_clear_all(&field->table->tmp_set);
+ field->vcol_info->expr->walk(
+ &Item::register_field_in_read_map, 1, field->table);
+ col->num_base= bitmap_bits_set(&field->table->tmp_set);
+ if (col->num_base != 0) {
+ col->base_col = static_cast<dict_col_t**>(mem_heap_zalloc(
+ table->heap, col->num_base * sizeof(
+ * col->base_col)));
+ }
+ field->table->read_set= old_read_set;
+ field->table->vcol_set= old_vcol_set;
+}
+
+
+/** Set up base columns for virtual column
+@param[in] table InnoDB table
+@param[in] field MySQL field
+@param[in,out] v_col virtual column */
+void
+innodb_base_col_setup(
+ dict_table_t* table,
+ const Field* field,
+ dict_v_col_t* v_col)
+{
+ int n = 0;
+
+ prepare_vcol_for_base_setup(table, field, v_col);
+
+ for (uint i= 0; i < field->table->s->fields; ++i) {
+ const Field* base_field = field->table->field[i];
+ if (base_field->stored_in_db()
+ && bitmap_is_set(&field->table->tmp_set, i)) {
+ ulint z;
+
+ for (z = 0; z < table->n_cols; z++) {
+ const char* name = dict_table_get_col_name(table, z);
+ if (!innobase_strcasecmp(name,
+ base_field->field_name)) {
+ break;
+ }
+ }
+
+ ut_ad(z != table->n_cols);
+
+ v_col->base_col[n] = dict_table_get_nth_col(table, z);
+ ut_ad(v_col->base_col[n]->ind == z);
+ n++;
+ }
+ }
+ v_col->num_base= n;
+}
+
+/** Set up base columns for stored column
+@param[in] table InnoDB table
+@param[in] field MySQL field
+@param[in,out] s_col stored column */
+void
+innodb_base_col_setup_for_stored(
+ const dict_table_t* table,
+ const Field* field,
+ dict_s_col_t* s_col)
+{
+ ulint n = 0;
+
+ prepare_vcol_for_base_setup(table, field, s_col);
+
+ for (uint i= 0; i < field->table->s->fields; ++i) {
+ const Field* base_field = field->table->field[i];
+
+ if (base_field->stored_in_db()
+ && bitmap_is_set(&field->table->tmp_set, i)) {
+ ulint z;
+ for (z = 0; z < table->n_cols; z++) {
+ const char* name = dict_table_get_col_name(
+ table, z);
+ if (!innobase_strcasecmp(
+ name, base_field->field_name)) {
+ break;
+ }
+ }
+
+ ut_ad(z != table->n_cols);
+
+ s_col->base_col[n] = dict_table_get_nth_col(table, z);
+ n++;
+
+ if (n == s_col->num_base) {
+ break;
+ }
+ }
+ }
+ s_col->num_base= n;
+}
+
+/** Create a table definition to an InnoDB database.
+@return ER_* level error */
+inline MY_ATTRIBUTE((warn_unused_result))
int
-create_table_def(
-/*=============*/
- trx_t* trx, /*!< in: InnoDB transaction handle */
- const TABLE* form, /*!< in: information on table
- columns and indexes */
- const char* table_name, /*!< in: table name */
- const char* temp_path, /*!< in: if this is a table explicitly
- created by the user with the
- TEMPORARY keyword, then this
- parameter is the dir path where the
- table should be placed if we create
- an .ibd file for it (no .ibd extension
- in the path, though). Otherwise this
- is a zero length-string */
- const char* remote_path, /*!< in: Remote path or zero length-string */
- ulint flags, /*!< in: table flags */
- ulint flags2, /*!< in: table flags2 */
- const ha_table_option_struct*options)
-{
- THD* thd = trx->mysql_thd;
+create_table_info_t::create_table_def()
+{
dict_table_t* table;
- ulint n_cols;
- dberr_t err;
ulint col_type;
ulint col_len;
ulint nulls_allowed;
@@ -10875,106 +10933,111 @@ create_table_def(
ulint binary_type;
ulint long_true_varchar;
ulint charset_no;
- ulint i;
ulint doc_id_col = 0;
ibool has_doc_id_col = FALSE;
mem_heap_t* heap;
+ ulint space_id = 0;
+ ha_table_option_struct *options= m_form->s->option_struct;
+ dberr_t err = DB_SUCCESS;
DBUG_ENTER("create_table_def");
- DBUG_PRINT("enter", ("table_name: %s", table_name));
+ DBUG_PRINT("enter", ("table_name: %s", m_table_name));
- DBUG_ASSERT(thd != NULL);
+ DBUG_ASSERT(m_trx->mysql_thd == m_thd);
+ DBUG_ASSERT(!m_drop_before_rollback);
/* MySQL does the name length check. But we do additional check
on the name length here */
- const size_t table_name_len = strlen(table_name);
+ const size_t table_name_len = strlen(m_table_name);
if (table_name_len > MAX_FULL_NAME_LEN) {
push_warning_printf(
- thd, Sql_condition::WARN_LEVEL_WARN,
+ m_thd, Sql_condition::WARN_LEVEL_WARN,
ER_TABLE_NAME,
"InnoDB: Table Name or Database Name is too long");
DBUG_RETURN(ER_TABLE_NAME);
}
- if (table_name[table_name_len - 1] == '/') {
+ if (m_table_name[table_name_len - 1] == '/') {
push_warning_printf(
- thd, Sql_condition::WARN_LEVEL_WARN,
+ m_thd, Sql_condition::WARN_LEVEL_WARN,
ER_TABLE_NAME,
"InnoDB: Table name is empty");
DBUG_RETURN(ER_WRONG_TABLE_NAME);
}
- n_cols = form->s->fields;
+ /* Find out the number of virtual columns. */
+ ulint num_v = 0;
+ const bool omit_virtual = ha_innobase::omits_virtual_cols(*m_form->s);
+ const ulint n_cols = omit_virtual
+ ? m_form->s->stored_fields : m_form->s->fields;
+
+ if (!omit_virtual) {
+ for (ulint i = 0; i < n_cols; i++) {
+ num_v += !m_form->field[i]->stored_in_db();
+ }
+ }
/* Check whether there already exists a FTS_DOC_ID column */
- if (create_table_check_doc_id_col(trx, form, &doc_id_col)){
+ if (create_table_check_doc_id_col(m_trx, m_form, &doc_id_col)){
/* Raise error if the Doc ID column is of wrong type or name */
if (doc_id_col == ULINT_UNDEFINED) {
- trx_commit_for_mysql(trx);
err = DB_ERROR;
- goto error_ret;
+error_ret:
+ DBUG_RETURN(convert_error_code_to_mysql(err, m_flags,
+ m_thd));
} else {
has_doc_id_col = TRUE;
}
}
- /* We pass 0 as the space id, and determine at a lower level the space
- id where to store the table */
+ /* For single-table tablespaces, we pass 0 as the space id, and then
+ determine the actual space id when the tablespace is created. */
- if (flags2 & DICT_TF2_FTS) {
- /* Adjust for the FTS hidden field */
- if (!has_doc_id_col) {
- table = dict_mem_table_create(table_name, 0, form->s->stored_fields + 1,
- flags, flags2);
+ /* Adjust the number of columns for the FTS hidden field */
+ const ulint actual_n_cols = n_cols
+ + (m_flags2 & DICT_TF2_FTS && !has_doc_id_col);
- /* Set the hidden doc_id column. */
- table->fts->doc_col = form->s->stored_fields;
- } else {
- table = dict_mem_table_create(table_name, 0, form->s->stored_fields,
- flags, flags2);
- table->fts->doc_col = doc_id_col;
- }
- } else {
- table = dict_mem_table_create(table_name, 0, form->s->stored_fields,
- flags, flags2);
- }
+ table = dict_mem_table_create(m_table_name, space_id,
+ actual_n_cols, num_v, m_flags, m_flags2);
- if (flags2 & DICT_TF2_TEMPORARY) {
- ut_a(strlen(temp_path));
- table->dir_path_of_temp_table =
- mem_heap_strdup(table->heap, temp_path);
+ /* Set the hidden doc_id column. */
+ if (m_flags2 & DICT_TF2_FTS) {
+ table->fts->doc_col = has_doc_id_col
+ ? doc_id_col : n_cols - num_v;
}
- if (DICT_TF_HAS_DATA_DIR(flags)) {
- ut_a(strlen(remote_path));
- table->data_dir_path = mem_heap_strdup(table->heap, remote_path);
+ if (DICT_TF_HAS_DATA_DIR(m_flags)) {
+ ut_a(strlen(m_remote_path));
+
+ table->data_dir_path = mem_heap_strdup(
+ table->heap, m_remote_path);
+
} else {
table->data_dir_path = NULL;
}
+
heap = mem_heap_create(1000);
- for (i = 0; i < n_cols; i++) {
- Field* field = form->field[i];
- if (!field->stored_in_db)
- continue;
+ for (ulint i = 0, j = 0; j < n_cols; i++) {
+ Field* field = m_form->field[i];
- col_type = get_innobase_type_from_mysql_type(&unsigned_type,
- field);
+ col_type = get_innobase_type_from_mysql_type(
+ &unsigned_type, field);
if (!col_type) {
push_warning_printf(
- thd, Sql_condition::WARN_LEVEL_WARN,
+ m_thd, Sql_condition::WARN_LEVEL_WARN,
ER_CANT_CREATE_TABLE,
- "Error creating table '%s' with "
- "column '%s'. Please check its "
- "column type and try to re-create "
- "the table with an appropriate "
- "column type.",
- table->name, field->field_name);
+ "Error creating table '%s' with"
+ " column '%s'. Please check its"
+ " column type and try to re-create"
+ " the table with an appropriate"
+ " column type.",
+ table->name.m_name, field->field_name);
goto err_col;
}
@@ -10987,24 +11050,27 @@ create_table_def(
charset_no = (ulint) field->charset()->number;
- if (UNIV_UNLIKELY(charset_no > MAX_CHAR_COLL_NUM)) {
+ DBUG_EXECUTE_IF("simulate_max_char_col",
+ charset_no = MAX_CHAR_COLL_NUM + 1;
+ );
+
+ if (charset_no > MAX_CHAR_COLL_NUM) {
/* in data0type.h we assume that the
number fits in one byte in prtype */
push_warning_printf(
- thd, Sql_condition::WARN_LEVEL_WARN,
+ m_thd, Sql_condition::WARN_LEVEL_WARN,
ER_CANT_CREATE_TABLE,
"In InnoDB, charset-collation codes"
" must be below 256."
- " Unsupported code %lu.",
- (ulong) charset_no);
+ " Unsupported code " ULINTPF ".",
+ charset_no);
mem_heap_free(heap);
+ dict_mem_table_free(table);
+
DBUG_RETURN(ER_CANT_CREATE_TABLE);
}
}
- /* we assume in dtype_form_prtype() that this fits in
- two bytes */
- ut_a(static_cast<uint>(field->type()) <= MAX_CHAR_COLL_NUM);
col_len = field->pack_length();
/* The MySQL pack length contains 1 or 2 bytes length field
@@ -11030,31 +11096,136 @@ create_table_def(
err_col:
dict_mem_table_free(table);
mem_heap_free(heap);
- trx_commit_for_mysql(trx);
err = DB_ERROR;
goto error_ret;
}
- dict_mem_table_add_col(table, heap,
- field->field_name,
- col_type,
- dtype_form_prtype(
- (ulint) field->type()
- | nulls_allowed | unsigned_type
- | binary_type | long_true_varchar,
- charset_no),
- col_len);
+ ulint is_virtual = !field->stored_in_db() ? DATA_VIRTUAL : 0;
+
+ if (!is_virtual) {
+ dict_mem_table_add_col(table, heap,
+ field->field_name, col_type,
+ dtype_form_prtype(
+ (ulint) field->type()
+ | nulls_allowed | unsigned_type
+ | binary_type | long_true_varchar,
+ charset_no),
+ col_len);
+ } else if (!omit_virtual) {
+ dict_mem_table_add_v_col(table, heap,
+ field->field_name, col_type,
+ dtype_form_prtype(
+ (ulint) field->type()
+ | nulls_allowed | unsigned_type
+ | binary_type | long_true_varchar
+ | is_virtual,
+ charset_no),
+ col_len, i, 0);
+ }
+
+ if (innobase_is_s_fld(field)) {
+ ut_ad(!is_virtual);
+ /* Added stored column in m_s_cols list. */
+ dict_mem_table_add_s_col(
+ table, 0);
+ }
+
+ if (is_virtual && omit_virtual) {
+ continue;
+ }
+
+ j++;
+ }
+
+ if (num_v) {
+ for (ulint i = 0, j = 0; i < n_cols; i++) {
+ dict_v_col_t* v_col;
+
+ const Field* field = m_form->field[i];
+
+ if (field->stored_in_db()) {
+ continue;
+ }
+
+ v_col = dict_table_get_nth_v_col(table, j);
+
+ j++;
+
+ innodb_base_col_setup(table, field, v_col);
+ }
+ }
+
+ /** Fill base columns for the stored column present in the list. */
+ if (table->s_cols && table->s_cols->size()) {
+ for (ulint i = 0; i < n_cols; i++) {
+ Field* field = m_form->field[i];
+
+ if (!innobase_is_s_fld(field)) {
+ continue;
+ }
+
+ dict_s_col_list::iterator it;
+ for (it = table->s_cols->begin();
+ it != table->s_cols->end(); ++it) {
+ dict_s_col_t s_col = *it;
+
+ if (s_col.s_pos == i) {
+ innodb_base_col_setup_for_stored(
+ table, field, &s_col);
+ break;
+ }
+ }
+ }
}
/* Add the FTS doc_id hidden column. */
- if (flags2 & DICT_TF2_FTS && !has_doc_id_col) {
+ if (m_flags2 & DICT_TF2_FTS && !has_doc_id_col) {
fts_add_doc_id_column(table, heap);
}
- err = row_create_table_for_mysql(table, trx, false,
- fil_encryption_t(options->encryption),
- options->encryption_key_id);
+ /* If temp table, then we avoid creation of entries in SYSTEM TABLES.
+ Given that temp table lifetime is limited to connection/server lifetime
+ on re-start we don't need to restore temp-table and so no entry is
+ needed in SYSTEM tables. */
+ if (table->is_temporary()) {
+
+ if ((options->encryption == 1
+ && !innodb_encrypt_temporary_tables)
+ || (options->encryption == 2
+ && innodb_encrypt_temporary_tables)) {
+ push_warning_printf(m_thd, Sql_condition::WARN_LEVEL_WARN,
+ ER_ILLEGAL_HA_CREATE_OPTION,
+ "Ignoring encryption parameter during "
+ "temporary table creation.");
+ }
+
+ /* Get a new table ID */
+ dict_table_assign_new_id(table, m_trx);
+ table->space = SRV_TMP_SPACE_ID;
+
+ /* Temp-table are maintained in memory and so
+ can_be_evicted is FALSE. */
+ mem_heap_t* temp_table_heap = mem_heap_create(256);
+
+ dict_table_add_to_cache(table, FALSE, temp_table_heap);
+
+ DBUG_EXECUTE_IF("ib_ddl_crash_during_create2",
+ DBUG_SUICIDE(););
+
+ mem_heap_free(temp_table_heap);
+ } else {
+ if (err == DB_SUCCESS) {
+ err = row_create_table_for_mysql(
+ table, m_trx,
+ fil_encryption_t(options->encryption),
+ uint32_t(options->encryption_key_id));
+ m_drop_before_rollback = (err == DB_SUCCESS);
+ }
+
+ DBUG_EXECUTE_IF("ib_crash_during_create_for_encryption",
+ DBUG_SUICIDE(););
+ }
mem_heap_free(heap);
@@ -11065,8 +11236,8 @@ err_col:
char display_name[FN_REFLEN];
char* buf_end = innobase_convert_identifier(
display_name, sizeof(display_name) - 1,
- table_name, strlen(table_name),
- thd, TRUE);
+ m_table_name, strlen(m_table_name),
+ m_thd);
*buf_end = '\0';
@@ -11075,13 +11246,12 @@ err_col:
: ER_TABLESPACE_EXISTS, MYF(0), display_name);
}
-error_ret:
- DBUG_RETURN(convert_error_code_to_mysql(err, flags, thd));
+ goto error_ret;
}
/*****************************************************************//**
Creates an index in an InnoDB database. */
-static
+inline
int
create_index(
/*=========*/
@@ -11105,15 +11275,30 @@ create_index(
/* Assert that "GEN_CLUST_INDEX" cannot be used as non-primary index */
ut_a(innobase_strcasecmp(key->name, innobase_index_reserve_name) != 0);
- if (key->flags & HA_FULLTEXT) {
+ ind_type = 0;
+ if (key->flags & HA_SPATIAL) {
+ ind_type = DICT_SPATIAL;
+ } else if (key->flags & HA_FULLTEXT) {
+ ind_type = DICT_FTS;
+ }
+
+ if (ind_type != 0)
+ {
index = dict_mem_index_create(table_name, key->name, 0,
- DICT_FTS,
+ ind_type,
key->user_defined_key_parts);
for (ulint i = 0; i < key->user_defined_key_parts; i++) {
- KEY_PART_INFO* key_part = key->key_part + i;
- dict_mem_index_add_field(
- index, key_part->field->field_name, 0);
+ const Field* field = key->key_part[i].field;
+
+ /* We do not support special (Fulltext or Spatial)
+ index on virtual columns */
+ if (!field->stored_in_db()) {
+ ut_ad(0);
+ DBUG_RETURN(HA_ERR_UNSUPPORTED);
+ }
+
+ dict_mem_index_add_field(index, field->field_name, 0);
}
DBUG_RETURN(convert_error_code_to_mysql(
@@ -11133,7 +11318,7 @@ create_index(
ind_type |= DICT_UNIQUE;
}
- field_lengths = (ulint*) my_malloc(
+ field_lengths = (ulint*) my_malloc(//PSI_INSTRUMENT_ME,
key->user_defined_key_parts * sizeof *
field_lengths, MYF(MY_FAE));
@@ -11155,29 +11340,22 @@ create_index(
specified number of first bytes of the column to
the index field.) The flag does not seem to be
properly set by MySQL. Let us fall back on testing
- the length of the key part versus the column. */
-
- Field* field = NULL;
-
- for (ulint j = 0; j < form->s->fields; j++) {
-
- field = form->field[j];
-
- if (0 == innobase_strcasecmp(
- field->field_name,
- key_part->field->field_name)) {
- /* Found the corresponding column */
-
- goto found;
- }
- }
+ the length of the key part versus the column.
+ We first reach to the table's column; if the index is on a
+ prefix, key_part->field is not the table's column (it's a
+ "fake" field forged in open_table_from_share() with length
+ equal to the length of the prefix); so we have to go to
+ form->fied. */
+ Field* field= form->field[key_part->field->field_index];
+ if (field == NULL)
+ ut_error;
+
+ const char* field_name = key_part->field->field_name;
- ut_error;
-found:
col_type = get_innobase_type_from_mysql_type(
&is_unsigned, key_part->field);
- if (DATA_BLOB == col_type
+ if (DATA_LARGE_MTYPE(col_type)
|| (key_part->length < field->pack_length()
&& field->type() != MYSQL_TYPE_VARCHAR)
|| (field->type() == MYSQL_TYPE_VARCHAR
@@ -11193,10 +11371,10 @@ found:
case DATA_DOUBLE:
case DATA_DECIMAL:
sql_print_error(
- "MySQL is trying to create a column "
- "prefix index field, on an "
- "inappropriate data type. Table "
- "name %s, column name %s.",
+ "MariaDB is trying to create a column"
+ " prefix index field, on an"
+ " inappropriate data type. Table"
+ " name %s, column name %s.",
table_name,
key_part->field->field_name);
@@ -11208,8 +11386,11 @@ found:
field_lengths[i] = key_part->length;
- dict_mem_index_add_field(
- index, key_part->field->field_name, prefix_len);
+ if (!key_part->field->stored_in_db()) {
+ index->type |= DICT_VIRTUAL;
+ }
+
+ dict_mem_index_add_field(index, field_name, prefix_len);
}
ut_ad(key->flags & HA_FULLTEXT || !(index->type & DICT_FTS));
@@ -11230,7 +11411,7 @@ found:
/*****************************************************************//**
Creates an index to an InnoDB table when the user has defined no
primary index. */
-static
+inline
int
create_clustered_index_when_no_primary(
/*===================================*/
@@ -11252,14 +11433,13 @@ create_clustered_index_when_no_primary(
return(convert_error_code_to_mysql(error, flags, NULL));
}
-/*****************************************************************//**
-Return a display name for the row format
+/** Return a display name for the row format
+@param[in] row_format Row Format
@return row format name */
-UNIV_INTERN
+static
const char*
get_row_format_name(
-/*================*/
- enum row_type row_format) /*!< in: Row Format */
+ enum row_type row_format)
{
switch (row_format) {
case ROW_TYPE_COMPACT:
@@ -11281,75 +11461,103 @@ get_row_format_name(
return("NOT USED");
}
-/** If file-per-table is missing, issue warning and set ret false */
-#define CHECK_ERROR_ROW_TYPE_NEEDS_FILE_PER_TABLE(use_tablespace)\
- if (!use_tablespace) { \
- push_warning_printf( \
- thd, Sql_condition::WARN_LEVEL_WARN, \
- ER_ILLEGAL_HA_CREATE_OPTION, \
- "InnoDB: ROW_FORMAT=%s requires" \
- " innodb_file_per_table.", \
- get_row_format_name(row_format)); \
- ret = "ROW_FORMAT"; \
+/** Validate DATA DIRECTORY option.
+@return true if valid, false if not. */
+bool
+create_table_info_t::create_option_data_directory_is_valid()
+{
+ bool is_valid = true;
+
+ ut_ad(m_create_info->data_file_name
+ && m_create_info->data_file_name[0] != '\0');
+
+ /* Use DATA DIRECTORY only with file-per-table. */
+ if (!m_allow_file_per_table) {
+ push_warning(
+ m_thd, Sql_condition::WARN_LEVEL_WARN,
+ ER_ILLEGAL_HA_CREATE_OPTION,
+ "InnoDB: DATA DIRECTORY requires"
+ " innodb_file_per_table.");
+ is_valid = false;
}
-/** If file-format is Antelope, issue warning and set ret false */
-#define CHECK_ERROR_ROW_TYPE_NEEDS_GT_ANTELOPE \
- if (srv_file_format < UNIV_FORMAT_B) { \
- push_warning_printf( \
- thd, Sql_condition::WARN_LEVEL_WARN, \
- ER_ILLEGAL_HA_CREATE_OPTION, \
- "InnoDB: ROW_FORMAT=%s requires" \
- " innodb_file_format > Antelope.", \
- get_row_format_name(row_format)); \
- ret = "ROW_FORMAT"; \
+ /* Do not use DATA DIRECTORY with TEMPORARY TABLE. */
+ if (m_create_info->options & HA_LEX_CREATE_TMP_TABLE) {
+ push_warning(
+ m_thd, Sql_condition::WARN_LEVEL_WARN,
+ ER_ILLEGAL_HA_CREATE_OPTION,
+ "InnoDB: DATA DIRECTORY cannot be used"
+ " for TEMPORARY tables.");
+ is_valid = false;
}
+ /* We check for a DATA DIRECTORY mixed with TABLESPACE in
+ create_option_tablespace_is_valid(), no need to here. */
-/*****************************************************************//**
-Validates the create options. We may build on this function
-in future. For now, it checks two specifiers:
-KEY_BLOCK_SIZE and ROW_FORMAT
-If innodb_strict_mode is not set then this function is a no-op
-@return NULL if valid, string if not. */
-UNIV_INTERN
+ return(is_valid);
+}
+
+/** Validate the create options. Check that the options KEY_BLOCK_SIZE,
+ROW_FORMAT, DATA DIRECTORY, TEMPORARY are compatible with
+each other and other settings. These CREATE OPTIONS are not validated
+here unless innodb_strict_mode is on. With strict mode, this function
+will report each problem it finds using a custom message with error
+code ER_ILLEGAL_HA_CREATE_OPTION, not its built-in message.
+@return NULL if valid, string name of bad option if not. */
const char*
-create_options_are_invalid(
-/*=======================*/
- THD* thd, /*!< in: connection thread. */
- TABLE* form, /*!< in: information on table
- columns and indexes */
- HA_CREATE_INFO* create_info, /*!< in: create info. */
- bool use_tablespace) /*!< in: srv_file_per_table */
+create_table_info_t::create_options_are_invalid()
{
- ibool kbs_specified = FALSE;
- const char* ret = NULL;
- enum row_type row_format = form->s->row_type;
+ bool has_key_block_size = (m_create_info->key_block_size != 0);
- ut_ad(thd != NULL);
+ const char* ret = NULL;
+ enum row_type row_format = m_create_info->row_type;
+ const bool is_temp
+ = m_create_info->options & HA_LEX_CREATE_TMP_TABLE;
+
+ ut_ad(m_thd != NULL);
- /* If innodb_strict_mode is not set don't do any validation. */
- if (!(THDVAR(thd, strict_mode))) {
+ /* If innodb_strict_mode is not set don't do any more validation. */
+ if (!THDVAR(m_thd, strict_mode)) {
return(NULL);
}
- ut_ad(form != NULL);
- ut_ad(create_info != NULL);
+ /* Check if a non-zero KEY_BLOCK_SIZE was specified. */
+ if (has_key_block_size) {
+ if (is_temp) {
+ my_error(ER_UNSUPPORT_COMPRESSED_TEMPORARY_TABLE,
+ MYF(0));
+ return("KEY_BLOCK_SIZE");
+ }
- /* First check if a non-zero KEY_BLOCK_SIZE was specified. */
- if (create_info->key_block_size) {
- kbs_specified = TRUE;
- switch (create_info->key_block_size) {
+ switch (m_create_info->key_block_size) {
ulint kbs_max;
case 1:
case 2:
case 4:
case 8:
case 16:
+ /* The maximum KEY_BLOCK_SIZE (KBS) is
+ UNIV_PAGE_SIZE_MAX. But if UNIV_PAGE_SIZE is
+ smaller than UNIV_PAGE_SIZE_MAX, the maximum
+ KBS is also smaller. */
+ kbs_max = ut_min(
+ 1 << (UNIV_PAGE_SSIZE_MAX - 1),
+ 1 << (PAGE_ZIP_SSIZE_MAX - 1));
+ if (m_create_info->key_block_size > kbs_max) {
+ push_warning_printf(
+ m_thd, Sql_condition::WARN_LEVEL_WARN,
+ ER_ILLEGAL_HA_CREATE_OPTION,
+ "InnoDB: KEY_BLOCK_SIZE=%ld"
+ " cannot be larger than %ld.",
+ m_create_info->key_block_size,
+ kbs_max);
+ ret = "KEY_BLOCK_SIZE";
+ }
+
/* Valid KEY_BLOCK_SIZE, check its dependencies. */
- if (!use_tablespace) {
+ if (!m_allow_file_per_table) {
push_warning(
- thd, Sql_condition::WARN_LEVEL_WARN,
+ m_thd, Sql_condition::WARN_LEVEL_WARN,
ER_ILLEGAL_HA_CREATE_OPTION,
"InnoDB: KEY_BLOCK_SIZE requires"
" innodb_file_per_table.");
@@ -11357,57 +11565,70 @@ create_options_are_invalid(
}
if (srv_file_format < UNIV_FORMAT_B) {
push_warning(
- thd, Sql_condition::WARN_LEVEL_WARN,
+ m_thd, Sql_condition::WARN_LEVEL_WARN,
ER_ILLEGAL_HA_CREATE_OPTION,
"InnoDB: KEY_BLOCK_SIZE requires"
" innodb_file_format > Antelope.");
ret = "KEY_BLOCK_SIZE";
}
-
- /* The maximum KEY_BLOCK_SIZE (KBS) is 16. But if
- UNIV_PAGE_SIZE is smaller than 16k, the maximum
- KBS is also smaller. */
- kbs_max = ut_min(
- 1 << (UNIV_PAGE_SSIZE_MAX - 1),
- 1 << (PAGE_ZIP_SSIZE_MAX - 1));
- if (create_info->key_block_size > kbs_max) {
- push_warning_printf(
- thd, Sql_condition::WARN_LEVEL_WARN,
- ER_ILLEGAL_HA_CREATE_OPTION,
- "InnoDB: KEY_BLOCK_SIZE=%ld"
- " cannot be larger than %ld.",
- create_info->key_block_size,
- kbs_max);
- ret = "KEY_BLOCK_SIZE";
- }
break;
default:
push_warning_printf(
- thd, Sql_condition::WARN_LEVEL_WARN,
+ m_thd, Sql_condition::WARN_LEVEL_WARN,
ER_ILLEGAL_HA_CREATE_OPTION,
- "InnoDB: invalid KEY_BLOCK_SIZE = %lu."
+ "InnoDB: invalid KEY_BLOCK_SIZE = %u."
" Valid values are [1, 2, 4, 8, 16]",
- create_info->key_block_size);
+ (uint) m_create_info->key_block_size);
ret = "KEY_BLOCK_SIZE";
break;
}
}
- /* Check for a valid Innodb ROW_FORMAT specifier and
+ /* Check for a valid InnoDB ROW_FORMAT specifier and
other incompatibilities. */
switch (row_format) {
case ROW_TYPE_COMPRESSED:
- CHECK_ERROR_ROW_TYPE_NEEDS_FILE_PER_TABLE(use_tablespace);
- CHECK_ERROR_ROW_TYPE_NEEDS_GT_ANTELOPE;
+ if (is_temp) {
+ my_error(ER_UNSUPPORT_COMPRESSED_TEMPORARY_TABLE,
+ MYF(0));
+ return("ROW_FORMAT");
+ }
+ if (!m_allow_file_per_table) {
+ push_warning_printf(
+ m_thd, Sql_condition::WARN_LEVEL_WARN,
+ ER_ILLEGAL_HA_CREATE_OPTION,
+ "InnoDB: ROW_FORMAT=%s requires"
+ " innodb_file_per_table.",
+ get_row_format_name(row_format));
+ ret = "ROW_FORMAT";
+ }
+ if (srv_file_format < UNIV_FORMAT_B) {
+ push_warning_printf(
+ m_thd, Sql_condition::WARN_LEVEL_WARN,
+ ER_ILLEGAL_HA_CREATE_OPTION,
+ "InnoDB: ROW_FORMAT=%s requires"
+ " innodb_file_format > Antelope.",
+ get_row_format_name(row_format));
+ ret = "ROW_FORMAT";
+ }
break;
case ROW_TYPE_DYNAMIC:
+ if (!is_temp && srv_file_format < UNIV_FORMAT_B) {
+ push_warning_printf(
+ m_thd, Sql_condition::WARN_LEVEL_WARN,
+ ER_ILLEGAL_HA_CREATE_OPTION,
+ "InnoDB: ROW_FORMAT=%s requires"
+ " innodb_file_format > Antelope.",
+ get_row_format_name(row_format));
+ ret = "ROW_FORMAT";
+ }
/* ROW_FORMAT=DYNAMIC also shuns KEY_BLOCK_SIZE */
/* fall through */
case ROW_TYPE_COMPACT:
case ROW_TYPE_REDUNDANT:
- if (kbs_specified) {
+ if (has_key_block_size) {
push_warning_printf(
- thd, Sql_condition::WARN_LEVEL_WARN,
+ m_thd, Sql_condition::WARN_LEVEL_WARN,
ER_ILLEGAL_HA_CREATE_OPTION,
"InnoDB: cannot specify ROW_FORMAT = %s"
" with KEY_BLOCK_SIZE.",
@@ -11421,52 +11642,37 @@ create_options_are_invalid(
case ROW_TYPE_PAGE:
case ROW_TYPE_NOT_USED:
push_warning(
- thd, Sql_condition::WARN_LEVEL_WARN,
- ER_ILLEGAL_HA_CREATE_OPTION, \
+ m_thd, Sql_condition::WARN_LEVEL_WARN,
+ ER_ILLEGAL_HA_CREATE_OPTION,
"InnoDB: invalid ROW_FORMAT specifier.");
ret = "ROW_TYPE";
break;
}
- /* Use DATA DIRECTORY only with file-per-table. */
- if (create_info->data_file_name && !use_tablespace) {
- push_warning(
- thd, Sql_condition::WARN_LEVEL_WARN,
- ER_ILLEGAL_HA_CREATE_OPTION,
- "InnoDB: DATA DIRECTORY requires"
- " innodb_file_per_table.");
- ret = "DATA DIRECTORY";
- }
-
- /* Do not use DATA DIRECTORY with TEMPORARY TABLE. */
- if (create_info->data_file_name
- && create_info->options & HA_LEX_CREATE_TMP_TABLE) {
- push_warning(
- thd, Sql_condition::WARN_LEVEL_WARN,
- ER_ILLEGAL_HA_CREATE_OPTION,
- "InnoDB: DATA DIRECTORY cannot be used"
- " for TEMPORARY tables.");
+ if (m_create_info->data_file_name
+ && m_create_info->data_file_name[0] != '\0'
+ && !create_option_data_directory_is_valid()) {
ret = "DATA DIRECTORY";
}
/* Do not allow INDEX_DIRECTORY */
- if (create_info->index_file_name) {
+ if (m_create_info->index_file_name) {
push_warning_printf(
- thd, Sql_condition::WARN_LEVEL_WARN,
+ m_thd, Sql_condition::WARN_LEVEL_WARN,
ER_ILLEGAL_HA_CREATE_OPTION,
"InnoDB: INDEX DIRECTORY is not supported");
ret = "INDEX DIRECTORY";
}
- if ((kbs_specified || row_format == ROW_TYPE_COMPRESSED)
- && UNIV_PAGE_SIZE > (1<<14)) {
- push_warning(
- thd, Sql_condition::WARN_LEVEL_WARN,
- ER_ILLEGAL_HA_CREATE_OPTION,
- "InnoDB: Cannot create a COMPRESSED table"
- " when innodb_page_size > 16k.");
-
- if (kbs_specified) {
+ /* Don't support compressed table when page size > 16k. */
+ if ((has_key_block_size || row_format == ROW_TYPE_COMPRESSED)
+ && UNIV_PAGE_SIZE > UNIV_PAGE_SIZE_DEF) {
+ push_warning(m_thd, Sql_condition::WARN_LEVEL_WARN,
+ ER_ILLEGAL_HA_CREATE_OPTION,
+ "InnoDB: Cannot create a COMPRESSED table"
+ " when innodb_page_size > 16k.");
+
+ if (has_key_block_size) {
ret = "KEY_BLOCK_SIZE";
} else {
ret = "ROW_TYPE";
@@ -11477,30 +11683,187 @@ create_options_are_invalid(
}
/*****************************************************************//**
+Check engine specific table options not handled by SQL-parser.
+@return NULL if valid, string if not */
+const char*
+create_table_info_t::check_table_options()
+{
+ enum row_type row_format = m_create_info->row_type;
+ const ha_table_option_struct *options= m_form->s->option_struct;
+
+ switch (options->encryption) {
+ case FIL_ENCRYPTION_OFF:
+ if (options->encryption_key_id != FIL_DEFAULT_ENCRYPTION_KEY) {
+ push_warning(
+ m_thd, Sql_condition::WARN_LEVEL_WARN,
+ HA_WRONG_CREATE_OPTION,
+ "InnoDB: ENCRYPTED=NO implies"
+ " ENCRYPTION_KEY_ID=1");
+ compile_time_assert(FIL_DEFAULT_ENCRYPTION_KEY == 1);
+ }
+ if (srv_encrypt_tables != 2) {
+ break;
+ }
+ push_warning(
+ m_thd, Sql_condition::WARN_LEVEL_WARN,
+ HA_WRONG_CREATE_OPTION,
+ "InnoDB: ENCRYPTED=NO cannot be used with"
+ " innodb_encrypt_tables=FORCE");
+ return "ENCRYPTED";
+ case FIL_ENCRYPTION_DEFAULT:
+ if (!srv_encrypt_tables) {
+ break;
+ }
+ /* fall through */
+ case FIL_ENCRYPTION_ON:
+ const uint32_t key_id = uint32_t(options->encryption_key_id);
+ if (!encryption_key_id_exists(key_id)) {
+ push_warning_printf(
+ m_thd, Sql_condition::WARN_LEVEL_WARN,
+ HA_WRONG_CREATE_OPTION,
+ "InnoDB: ENCRYPTION_KEY_ID %u not available",
+ key_id);
+ return "ENCRYPTION_KEY_ID";
+ }
+
+ /* Currently we do not support encryption for spatial indexes.
+ Do not allow ENCRYPTED=YES if any SPATIAL INDEX exists. */
+ if (options->encryption != FIL_ENCRYPTION_ON) {
+ break;
+ }
+ for (ulint i = 0; i < m_form->s->keys; i++) {
+ if (m_form->key_info[i].flags & HA_SPATIAL) {
+ push_warning(m_thd,
+ Sql_condition::WARN_LEVEL_WARN,
+ HA_ERR_UNSUPPORTED,
+ "InnoDB: ENCRYPTED=YES is not"
+ " supported for SPATIAL INDEX");
+ return "ENCRYPTED";
+ }
+ }
+ }
+
+ if (!m_allow_file_per_table
+ && options->encryption != FIL_ENCRYPTION_DEFAULT) {
+ push_warning(
+ m_thd, Sql_condition::WARN_LEVEL_WARN,
+ HA_WRONG_CREATE_OPTION,
+ "InnoDB: ENCRYPTED requires innodb_file_per_table");
+ return "ENCRYPTED";
+ }
+
+ /* Check page compression requirements */
+ if (options->page_compressed) {
+
+ if (row_format == ROW_TYPE_COMPRESSED) {
+ push_warning(
+ m_thd, Sql_condition::WARN_LEVEL_WARN,
+ HA_WRONG_CREATE_OPTION,
+ "InnoDB: PAGE_COMPRESSED table can't have"
+ " ROW_TYPE=COMPRESSED");
+ return "PAGE_COMPRESSED";
+ }
+
+ switch (row_format) {
+ default:
+ break;
+ case ROW_TYPE_DEFAULT:
+ if (m_default_row_format
+ != DEFAULT_ROW_FORMAT_REDUNDANT) {
+ break;
+ }
+ /* fall through */
+ case ROW_TYPE_REDUNDANT:
+ push_warning(
+ m_thd, Sql_condition::WARN_LEVEL_WARN,
+ HA_WRONG_CREATE_OPTION,
+ "InnoDB: PAGE_COMPRESSED table can't have"
+ " ROW_TYPE=REDUNDANT");
+ return "PAGE_COMPRESSED";
+ }
+
+ if (!m_allow_file_per_table) {
+ push_warning(
+ m_thd, Sql_condition::WARN_LEVEL_WARN,
+ HA_WRONG_CREATE_OPTION,
+ "InnoDB: PAGE_COMPRESSED requires"
+ " innodb_file_per_table.");
+ return "PAGE_COMPRESSED";
+ }
+
+ if (srv_file_format < UNIV_FORMAT_B) {
+ push_warning(
+ m_thd, Sql_condition::WARN_LEVEL_WARN,
+ HA_WRONG_CREATE_OPTION,
+ "InnoDB: PAGE_COMPRESSED requires"
+ " innodb_file_format > Antelope.");
+ return "PAGE_COMPRESSED";
+ }
+
+ if (m_create_info->key_block_size) {
+ push_warning(
+ m_thd, Sql_condition::WARN_LEVEL_WARN,
+ HA_WRONG_CREATE_OPTION,
+ "InnoDB: PAGE_COMPRESSED table can't have"
+ " key_block_size");
+ return "PAGE_COMPRESSED";
+ }
+ }
+
+ /* Check page compression level requirements, some of them are
+ already checked above */
+ if (options->page_compression_level != 0) {
+ if (options->page_compressed == false) {
+ push_warning(
+ m_thd, Sql_condition::WARN_LEVEL_WARN,
+ HA_WRONG_CREATE_OPTION,
+ "InnoDB: PAGE_COMPRESSION_LEVEL requires"
+ " PAGE_COMPRESSED");
+ return "PAGE_COMPRESSION_LEVEL";
+ }
+
+ if (options->page_compression_level < 1 || options->page_compression_level > 9) {
+ push_warning_printf(
+ m_thd, Sql_condition::WARN_LEVEL_WARN,
+ HA_WRONG_CREATE_OPTION,
+ "InnoDB: invalid PAGE_COMPRESSION_LEVEL = %lu."
+ " Valid values are [1, 2, 3, 4, 5, 6, 7, 8, 9]",
+ options->page_compression_level);
+ return "PAGE_COMPRESSION_LEVEL";
+ }
+ }
+
+ return NULL;
+}
+
+/*****************************************************************//**
Update create_info. Used in SHOW CREATE TABLE et al. */
-UNIV_INTERN
+
void
ha_innobase::update_create_info(
/*============================*/
HA_CREATE_INFO* create_info) /*!< in/out: create info */
{
if (!(create_info->used_fields & HA_CREATE_USED_AUTO)) {
- ha_innobase::info(HA_STATUS_AUTO);
+ info(HA_STATUS_AUTO);
create_info->auto_increment_value = stats.auto_increment_value;
}
+ if (dict_table_is_temporary(m_prebuilt->table)) {
+ return;
+ }
+
/* Update the DATA DIRECTORY name from SYS_DATAFILES. */
- dict_get_and_save_data_dir_path(prebuilt->table, false);
+ dict_get_and_save_data_dir_path(m_prebuilt->table, false);
- if (prebuilt->table->data_dir_path) {
- create_info->data_file_name = prebuilt->table->data_dir_path;
+ if (m_prebuilt->table->data_dir_path) {
+ create_info->data_file_name = m_prebuilt->table->data_dir_path;
}
}
/*****************************************************************//**
Initialize the table FTS stopword list
@return TRUE if success */
-UNIV_INTERN
ibool
innobase_fts_load_stopword(
/*=======================*/
@@ -11514,29 +11877,16 @@ innobase_fts_load_stopword(
THDVAR(thd, ft_enable_stopword), FALSE));
}
-/*****************************************************************//**
-Parses the table name into normal name and either temp path or remote path
-if needed.
-@return 0 if successful, otherwise, error number */
-UNIV_INTERN
+/** Parse the table name into normal name and remote path if needed.
+@param[in] name Table name (db/table or full path).
+@return 0 if successful, otherwise, error number */
int
-ha_innobase::parse_table_name(
-/*==========================*/
- const char* name, /*!< in/out: table name provided*/
- HA_CREATE_INFO* create_info, /*!< in: more information of the
- created table, contains also the
- create statement string */
- ulint flags, /*!< in: flags*/
- ulint flags2, /*!< in: flags2*/
- char* norm_name, /*!< out: normalized table name */
- char* temp_path, /*!< out: absolute path of table */
- char* remote_path) /*!< out: remote path of table */
-{
- THD* thd = ha_thd();
- bool use_tablespace = flags2 & DICT_TF2_USE_TABLESPACE;
- DBUG_ENTER("ha_innobase::parse_table_name");
+create_table_info_t::parse_table_name(
+ const char* name)
+{
+ DBUG_ENTER("parse_table_name");
-#ifdef __WIN__
+#ifdef _WIN32
/* Names passed in from server are in two formats:
1. <database_name>/<table_name>: for normal table creation
2. full path: for temp table creation, or DATA DIRECTORY.
@@ -11548,9 +11898,9 @@ ha_innobase::parse_table_name(
returns error if it is in full path format, but not creating a temp.
table. Currently InnoDB does not support symbolic link on Windows. */
- if (use_tablespace
+ if (m_innodb_file_per_table
&& !mysqld_embedded
- && !(create_info->options & HA_LEX_CREATE_TMP_TABLE)) {
+ && !(m_create_info->options & HA_LEX_CREATE_TMP_TABLE)) {
if ((name[1] == ':')
|| (name[0] == '\\' && name[1] == '\\')) {
@@ -11560,51 +11910,31 @@ ha_innobase::parse_table_name(
}
#endif
- normalize_table_name(norm_name, name);
- temp_path[0] = '\0';
- remote_path[0] = '\0';
+ m_remote_path[0] = '\0';
- /* A full path is used for TEMPORARY TABLE and DATA DIRECTORY.
- In the case of;
+ /* Make sure DATA DIRECTORY is compatible with other options
+ and set the remote path. In the case of either;
CREATE TEMPORARY TABLE ... DATA DIRECTORY={path} ... ;
- We ignore the DATA DIRECTORY. */
- if (create_info->options & HA_LEX_CREATE_TMP_TABLE) {
- strncpy(temp_path, name, FN_REFLEN - 1);
- }
-
- if (create_info->data_file_name) {
- bool ignore = false;
-
- /* Use DATA DIRECTORY only with file-per-table. */
- if (!use_tablespace) {
- push_warning(
- thd, Sql_condition::WARN_LEVEL_WARN,
- ER_ILLEGAL_HA_CREATE_OPTION,
- "InnoDB: DATA DIRECTORY requires"
- " innodb_file_per_table.");
- ignore = true;
- }
-
- /* Do not use DATA DIRECTORY with TEMPORARY TABLE. */
- if (create_info->options & HA_LEX_CREATE_TMP_TABLE) {
- push_warning(
- thd, Sql_condition::WARN_LEVEL_WARN,
- ER_ILLEGAL_HA_CREATE_OPTION,
- "InnoDB: DATA DIRECTORY cannot be"
- " used for TEMPORARY tables.");
- ignore = true;
- }
-
- if (ignore) {
- my_error(WARN_OPTION_IGNORED, ME_JUST_WARNING,
+ CREATE TABLE ... DATA DIRECTORY={path} TABLESPACE={name}... ;
+ we ignore the DATA DIRECTORY. */
+ if (m_create_info->data_file_name
+ && m_create_info->data_file_name[0] != '\0') {
+ if (!create_option_data_directory_is_valid()) {
+ push_warning_printf(
+ m_thd, Sql_condition::WARN_LEVEL_WARN,
+ WARN_OPTION_IGNORED,
+ ER_DEFAULT(WARN_OPTION_IGNORED),
"DATA DIRECTORY");
+
+ m_flags &= ~DICT_TF_MASK_DATA_DIR;
} else {
- strncpy(remote_path, create_info->data_file_name,
+ strncpy(m_remote_path,
+ m_create_info->data_file_name,
FN_REFLEN - 1);
}
}
- if (create_info->index_file_name) {
+ if (m_create_info->index_file_name) {
my_error(WARN_OPTION_IGNORED, ME_JUST_WARNING,
"INDEX DIRECTORY");
}
@@ -11612,32 +11942,27 @@ ha_innobase::parse_table_name(
DBUG_RETURN(0);
}
-/*****************************************************************//**
-Determines InnoDB table flags.
-@retval true if successful, false if error */
-UNIV_INTERN
-bool
-innobase_table_flags(
-/*=================*/
- const TABLE* form, /*!< in: table */
- const HA_CREATE_INFO* create_info, /*!< in: information
- on table columns and indexes */
- THD* thd, /*!< in: connection */
- bool use_tablespace, /*!< in: whether to create
- outside system tablespace */
- ulint* flags, /*!< out: DICT_TF flags */
- ulint* flags2) /*!< out: DICT_TF2 flags */
+/** Determine InnoDB table flags.
+If strict_mode=OFF, this will adjust the flags to what should be assumed.
+@retval true on success
+@retval false on error */
+bool create_table_info_t::innobase_table_flags()
{
DBUG_ENTER("innobase_table_flags");
const char* fts_doc_id_index_bad = NULL;
- bool zip_allowed = true;
ulint zip_ssize = 0;
- enum row_type row_format;
+ enum row_type row_type;
rec_format_t innodb_row_format =
- get_row_format(innodb_default_row_format);
- bool use_data_dir;
- ha_table_option_struct *options= form->s->option_struct;
+ get_row_format(m_default_row_format);
+ const bool is_temp
+ = m_create_info->options & HA_LEX_CREATE_TMP_TABLE;
+ bool zip_allowed
+ = !is_temp;
+
+ const ulint zip_ssize_max =
+ ut_min(static_cast<ulint>(UNIV_PAGE_SSIZE_MAX),
+ static_cast<ulint>(PAGE_ZIP_SSIZE_MAX));
/* Cache the value of innodb_file_format, in case it is
modified by another thread while the table is being created. */
@@ -11647,34 +11972,25 @@ innobase_table_flags(
modified by another thread while the table is being created. */
const ulint default_compression_level = page_zip_level;
- *flags = 0;
- *flags2 = 0;
+ ha_table_option_struct *options= m_form->s->option_struct;
+
+ m_flags = 0;
+ m_flags2 = 0;
/* Check if there are any FTS indexes defined on this table. */
- for (uint i = 0; i < form->s->keys; i++) {
- const KEY* key = &form->key_info[i];
+ for (uint i = 0; i < m_form->s->keys; i++) {
+ const KEY* key = &m_form->key_info[i];
if (key->flags & HA_FULLTEXT) {
- *flags2 |= DICT_TF2_FTS;
+ m_flags2 |= DICT_TF2_FTS;
/* We don't support FTS indexes in temporary
tables. */
- if (create_info->options & HA_LEX_CREATE_TMP_TABLE) {
-
+ if (is_temp) {
my_error(ER_INNODB_NO_FT_TEMP_TABLE, MYF(0));
DBUG_RETURN(false);
}
- if (key->flags & HA_USES_PARSER) {
- my_error(ER_INNODB_NO_FT_USES_PARSER, MYF(0));
- DBUG_RETURN(false);
- }
-
- if (key->flags & HA_USES_PARSER) {
- my_error(ER_INNODB_NO_FT_USES_PARSER, MYF(0));
- DBUG_RETURN(false);
- }
-
if (fts_doc_id_index_bad) {
goto index_bad;
}
@@ -11692,7 +12008,7 @@ innobase_table_flags(
fts_doc_id_index_bad = key->name;
}
- if (fts_doc_id_index_bad && (*flags2 & DICT_TF2_FTS)) {
+ if (fts_doc_id_index_bad && (m_flags2 & DICT_TF2_FTS)) {
index_bad:
my_error(ER_INNODB_FT_WRONG_DOCID_INDEX, MYF(0),
fts_doc_id_index_bad);
@@ -11700,61 +12016,66 @@ index_bad:
}
}
- row_format = form->s->row_type;
-
- if (create_info->key_block_size) {
+ if (m_create_info->key_block_size > 0) {
/* The requested compressed page size (key_block_size)
is given in kilobytes. If it is a valid number, store
that value as the number of log2 shifts from 512 in
zip_ssize. Zero means it is not compressed. */
- ulint zssize; /* Zip Shift Size */
- ulint kbsize; /* Key Block Size */
+ ulint zssize; /* Zip Shift Size */
+ ulint kbsize; /* Key Block Size */
for (zssize = kbsize = 1;
- zssize <= ut_min(UNIV_PAGE_SSIZE_MAX,
- PAGE_ZIP_SSIZE_MAX);
+ zssize <= zip_ssize_max;
zssize++, kbsize <<= 1) {
- if (kbsize == create_info->key_block_size) {
+ if (kbsize == m_create_info->key_block_size) {
zip_ssize = zssize;
break;
}
}
/* Make sure compressed row format is allowed. */
- if (!use_tablespace) {
+ if (is_temp) {
push_warning(
- thd, Sql_condition::WARN_LEVEL_WARN,
+ m_thd, Sql_condition::WARN_LEVEL_WARN,
+ ER_ILLEGAL_HA_CREATE_OPTION,
+ "InnoDB: KEY_BLOCK_SIZE is ignored"
+ " for TEMPORARY TABLE.");
+ zip_allowed = false;
+ } else if (!m_allow_file_per_table) {
+ push_warning(
+ m_thd, Sql_condition::WARN_LEVEL_WARN,
ER_ILLEGAL_HA_CREATE_OPTION,
"InnoDB: KEY_BLOCK_SIZE requires"
" innodb_file_per_table.");
- zip_allowed = FALSE;
+ zip_allowed = false;
}
if (file_format_allowed < UNIV_FORMAT_B) {
push_warning(
- thd, Sql_condition::WARN_LEVEL_WARN,
+ m_thd, Sql_condition::WARN_LEVEL_WARN,
ER_ILLEGAL_HA_CREATE_OPTION,
"InnoDB: KEY_BLOCK_SIZE requires"
" innodb_file_format > Antelope.");
- zip_allowed = FALSE;
+ zip_allowed = false;
}
if (!zip_allowed
- || zssize > ut_min(UNIV_PAGE_SSIZE_MAX,
- PAGE_ZIP_SSIZE_MAX)) {
+ || zssize > zip_ssize_max) {
push_warning_printf(
- thd, Sql_condition::WARN_LEVEL_WARN,
+ m_thd, Sql_condition::WARN_LEVEL_WARN,
ER_ILLEGAL_HA_CREATE_OPTION,
- "InnoDB: ignoring KEY_BLOCK_SIZE=%lu.",
- create_info->key_block_size);
+ "InnoDB: ignoring KEY_BLOCK_SIZE=%u.",
+ (uint) m_create_info->key_block_size);
}
}
+ row_type = m_create_info->row_type;
+
if (zip_ssize && zip_allowed) {
/* if ROW_FORMAT is set to default,
- automatically change it to COMPRESSED.*/
- if (row_format == ROW_TYPE_DEFAULT) {
- row_format = ROW_TYPE_COMPRESSED;
- } else if (row_format != ROW_TYPE_COMPRESSED) {
+ automatically change it to COMPRESSED. */
+ if (row_type == ROW_TYPE_DEFAULT) {
+ row_type = ROW_TYPE_COMPRESSED;
+ } else if (row_type != ROW_TYPE_COMPRESSED) {
/* ROW_FORMAT other than COMPRESSED
ignores KEY_BLOCK_SIZE. It does not
make sense to reject conflicting
@@ -11762,443 +12083,489 @@ index_bad:
such combinations can be obtained
with ALTER TABLE anyway. */
push_warning_printf(
- thd, Sql_condition::WARN_LEVEL_WARN,
+ m_thd, Sql_condition::WARN_LEVEL_WARN,
ER_ILLEGAL_HA_CREATE_OPTION,
- "InnoDB: ignoring KEY_BLOCK_SIZE=%lu"
+ "InnoDB: ignoring KEY_BLOCK_SIZE=%u"
" unless ROW_FORMAT=COMPRESSED.",
- create_info->key_block_size);
- zip_allowed = FALSE;
+ (uint) m_create_info->key_block_size);
+ zip_allowed = false;
}
} else {
- /* zip_ssize == 0 means no KEY_BLOCK_SIZE.*/
- if (row_format == ROW_TYPE_COMPRESSED && zip_allowed) {
+ /* zip_ssize == 0 means no KEY_BLOCK_SIZE. */
+ if (row_type == ROW_TYPE_COMPRESSED && zip_allowed) {
/* ROW_FORMAT=COMPRESSED without KEY_BLOCK_SIZE
implies half the maximum KEY_BLOCK_SIZE(*1k) or
UNIV_PAGE_SIZE, whichever is less. */
- zip_ssize = ut_min(UNIV_PAGE_SSIZE_MAX,
- PAGE_ZIP_SSIZE_MAX) - 1;
+ zip_ssize = zip_ssize_max - 1;
}
}
/* Validate the row format. Correct it if necessary */
- switch (row_format) {
- case ROW_TYPE_DEFAULT:
- break;
+
+ switch (row_type) {
case ROW_TYPE_REDUNDANT:
innodb_row_format = REC_FORMAT_REDUNDANT;
break;
- case ROW_TYPE_DYNAMIC:
- innodb_row_format = REC_FORMAT_DYNAMIC;
+ case ROW_TYPE_COMPACT:
+ innodb_row_format = REC_FORMAT_COMPACT;
break;
case ROW_TYPE_COMPRESSED:
- if (!use_tablespace) {
+ if (is_temp) {
push_warning_printf(
- thd, Sql_condition::WARN_LEVEL_WARN,
+ m_thd, Sql_condition::WARN_LEVEL_WARN,
+ ER_ILLEGAL_HA_CREATE_OPTION,
+ "InnoDB: ROW_FORMAT=%s is ignored for"
+ " TEMPORARY TABLE.",
+ get_row_format_name(row_type));
+ } else if (!m_allow_file_per_table) {
+ push_warning_printf(
+ m_thd, Sql_condition::WARN_LEVEL_WARN,
ER_ILLEGAL_HA_CREATE_OPTION,
"InnoDB: ROW_FORMAT=COMPRESSED requires"
" innodb_file_per_table.");
+
} else if (file_format_allowed == UNIV_FORMAT_A) {
push_warning_printf(
- thd, Sql_condition::WARN_LEVEL_WARN,
+ m_thd, Sql_condition::WARN_LEVEL_WARN,
ER_ILLEGAL_HA_CREATE_OPTION,
"InnoDB: ROW_FORMAT=COMPRESSED requires"
" innodb_file_format > Antelope.");
} else {
innodb_row_format = REC_FORMAT_COMPRESSED;
- break; /* Correct row_format */
+ break;
}
- zip_allowed = FALSE;
+ zip_allowed = false;
/* Set ROW_FORMAT = COMPACT */
/* fall through */
case ROW_TYPE_NOT_USED:
case ROW_TYPE_FIXED:
case ROW_TYPE_PAGE:
push_warning(
- thd, Sql_condition::WARN_LEVEL_WARN,
+ m_thd, Sql_condition::WARN_LEVEL_WARN,
ER_ILLEGAL_HA_CREATE_OPTION,
- "InnoDB: assuming ROW_FORMAT=COMPACT.");
+ "InnoDB: assuming ROW_FORMAT=DYNAMIC.");
/* fall through */
- case ROW_TYPE_COMPACT:
- innodb_row_format = REC_FORMAT_COMPACT;
+ case ROW_TYPE_DYNAMIC:
+ innodb_row_format = REC_FORMAT_DYNAMIC;
break;
+ case ROW_TYPE_DEFAULT:
+ ;
}
/* Don't support compressed table when page size > 16k. */
if (zip_allowed && zip_ssize && UNIV_PAGE_SIZE > UNIV_PAGE_SIZE_DEF) {
- push_warning(
- thd, Sql_condition::WARN_LEVEL_WARN,
- ER_ILLEGAL_HA_CREATE_OPTION,
- "InnoDB: Cannot create a COMPRESSED table"
- " when innodb_page_size > 16k."
- " Assuming ROW_FORMAT=COMPACT.");
- zip_allowed = FALSE;
+ push_warning(m_thd, Sql_condition::WARN_LEVEL_WARN,
+ ER_ILLEGAL_HA_CREATE_OPTION,
+ "InnoDB: Cannot create a COMPRESSED table"
+ " when innodb_page_size > 16k."
+ " Assuming ROW_FORMAT=DYNAMIC.");
+ zip_allowed = false;
}
+ ut_ad(!is_temp || !zip_allowed);
+ ut_ad(!is_temp || innodb_row_format != REC_FORMAT_COMPRESSED);
+
/* Set the table flags */
if (!zip_allowed) {
zip_ssize = 0;
}
- use_data_dir = use_tablespace
- && ((create_info->data_file_name != NULL)
- && !(create_info->options & HA_LEX_CREATE_TMP_TABLE));
-
- /* Set up table dictionary flags */
- dict_tf_set(flags,
- innodb_row_format,
- zip_ssize,
- use_data_dir,
- options->page_compressed,
- options->page_compression_level == 0 ?
- default_compression_level : options->page_compression_level,
- options->atomic_writes);
-
- if (create_info->options & HA_LEX_CREATE_TMP_TABLE) {
- *flags2 |= DICT_TF2_TEMPORARY;
+ if (is_temp) {
+ m_flags2 |= DICT_TF2_TEMPORARY;
+ } else if (m_use_file_per_table) {
+ m_flags2 |= DICT_TF2_USE_FILE_PER_TABLE;
}
- if (use_tablespace) {
- *flags2 |= DICT_TF2_USE_TABLESPACE;
- }
+ /* Set the table flags */
+ dict_tf_set(&m_flags, innodb_row_format, zip_ssize,
+ m_use_data_dir,
+ options->page_compressed,
+ options->page_compression_level == 0 ?
+ default_compression_level : static_cast<ulint>(options->page_compression_level),
+ 0);
/* Set the flags2 when create table or alter tables */
- *flags2 |= DICT_TF2_FTS_AUX_HEX_NAME;
+ m_flags2 |= DICT_TF2_FTS_AUX_HEX_NAME;
DBUG_EXECUTE_IF("innodb_test_wrong_fts_aux_table_name",
- *flags2 &= ~DICT_TF2_FTS_AUX_HEX_NAME;);
+ m_flags2 &= ~DICT_TF2_FTS_AUX_HEX_NAME;);
DBUG_RETURN(true);
}
-
-/*****************************************************************//**
-Check engine specific table options not handled by SQL-parser.
-@return NULL if valid, string if not */
-UNIV_INTERN
-const char*
-ha_innobase::check_table_options(
- THD *thd, /*!< in: thread handle */
- TABLE* table, /*!< in: information on table
- columns and indexes */
- HA_CREATE_INFO* create_info, /*!< in: more information of the
- created table, contains also the
- create statement string */
- const bool use_tablespace, /*!< in: use file par table */
- const ulint file_format)
+/** Parse MERGE_THRESHOLD value from the string.
+@param[in] thd connection
+@param[in] str string which might include 'MERGE_THRESHOLD='
+@return value parsed. 0 means not found or invalid value. */
+static
+ulint
+innobase_parse_merge_threshold(
+ THD* thd,
+ const char* str)
{
- enum row_type row_format = table->s->row_type;
- ha_table_option_struct *options= table->s->option_struct;
- atomic_writes_t awrites = (atomic_writes_t)options->atomic_writes;
+ static const char* label = "MERGE_THRESHOLD=";
+ static const size_t label_len = strlen(label);
+ const char* pos = str;
- switch (options->encryption) {
- case FIL_ENCRYPTION_OFF:
- if (options->encryption_key_id != FIL_DEFAULT_ENCRYPTION_KEY) {
- push_warning(
- thd, Sql_condition::WARN_LEVEL_WARN,
- HA_WRONG_CREATE_OPTION,
- "InnoDB: ENCRYPTED=NO implies"
- " ENCRYPTION_KEY_ID=1");
- compile_time_assert(FIL_DEFAULT_ENCRYPTION_KEY == 1);
- }
- if (srv_encrypt_tables != 2) {
- break;
- }
- push_warning(
- thd, Sql_condition::WARN_LEVEL_WARN,
- HA_WRONG_CREATE_OPTION,
- "InnoDB: ENCRYPTED=NO cannot be used with"
- " innodb_encrypt_tables=FORCE");
- return "ENCRYPTED";
- case FIL_ENCRYPTION_DEFAULT:
- if (!srv_encrypt_tables) {
- break;
- }
- /* fall through */
- case FIL_ENCRYPTION_ON:
- if (!encryption_key_id_exists(options->encryption_key_id)) {
- push_warning_printf(
- thd, Sql_condition::WARN_LEVEL_WARN,
- HA_WRONG_CREATE_OPTION,
- "InnoDB: ENCRYPTION_KEY_ID %u not available",
- options->encryption_key_id);
- return "ENCRYPTION_KEY_ID";
- }
+ pos = strstr(str, label);
+
+ if (pos == NULL) {
+ return(0);
}
- if (!use_tablespace && options->encryption != FIL_ENCRYPTION_DEFAULT) {
- push_warning(thd, Sql_condition::WARN_LEVEL_WARN,
- HA_WRONG_CREATE_OPTION,
- "InnoDB: ENCRYPTED requires"
- " innodb_file_per_table");
- return "ENCRYPTED";
+ pos += label_len;
+
+ lint ret = atoi(pos);
+
+ if (ret > 0 && ret <= 50) {
+ return(static_cast<ulint>(ret));
}
- /* Check page compression requirements */
- if (options->page_compressed) {
+ push_warning_printf(
+ thd, Sql_condition::WARN_LEVEL_WARN,
+ ER_ILLEGAL_HA_CREATE_OPTION,
+ "InnoDB: Invalid value for MERGE_THRESHOLD in the CREATE TABLE"
+ " statement. The value is ignored.");
- if (row_format == ROW_TYPE_COMPRESSED) {
- push_warning(
- thd, Sql_condition::WARN_LEVEL_WARN,
- HA_WRONG_CREATE_OPTION,
- "InnoDB: PAGE_COMPRESSED table can't have"
- " ROW_TYPE=COMPRESSED");
- return "PAGE_COMPRESSED";
- }
+ return(0);
+}
- if (row_format == ROW_TYPE_REDUNDANT) {
- push_warning(
- thd, Sql_condition::WARN_LEVEL_WARN,
- HA_WRONG_CREATE_OPTION,
- "InnoDB: PAGE_COMPRESSED table can't have"
- " ROW_TYPE=REDUNDANT");
- return "PAGE_COMPRESSED";
- }
+/** Parse hint for table and its indexes, and update the information
+in dictionary.
+@param[in] thd connection
+@param[in,out] table target table
+@param[in] table_share table definition */
+void
+innobase_parse_hint_from_comment(
+ THD* thd,
+ dict_table_t* table,
+ const TABLE_SHARE* table_share)
+{
+ ulint merge_threshold_table;
+ ulint merge_threshold_index[MAX_KEY];
+ bool is_found[MAX_KEY];
+
+ if (table_share->comment.str != NULL) {
+ merge_threshold_table
+ = innobase_parse_merge_threshold(
+ thd, table_share->comment.str);
+ } else {
+ merge_threshold_table = DICT_INDEX_MERGE_THRESHOLD_DEFAULT;
+ }
- if (!use_tablespace) {
- push_warning(
- thd, Sql_condition::WARN_LEVEL_WARN,
- HA_WRONG_CREATE_OPTION,
- "InnoDB: PAGE_COMPRESSED requires"
- " innodb_file_per_table.");
- return "PAGE_COMPRESSED";
- }
+ if (merge_threshold_table == 0) {
+ merge_threshold_table = DICT_INDEX_MERGE_THRESHOLD_DEFAULT;
+ }
- if (file_format < UNIV_FORMAT_B) {
- push_warning(
- thd, Sql_condition::WARN_LEVEL_WARN,
- HA_WRONG_CREATE_OPTION,
- "InnoDB: PAGE_COMPRESSED requires"
- " innodb_file_format > Antelope.");
- return "PAGE_COMPRESSED";
+ for (uint i = 0; i < table_share->keys; i++) {
+ KEY* key_info = &table_share->key_info[i];
+
+ ut_ad(i < sizeof(merge_threshold_index)
+ / sizeof(merge_threshold_index[0]));
+
+ if (key_info->flags & HA_USES_COMMENT
+ && key_info->comment.str != NULL) {
+ merge_threshold_index[i]
+ = innobase_parse_merge_threshold(
+ thd, key_info->comment.str);
+ } else {
+ merge_threshold_index[i] = merge_threshold_table;
}
- if (create_info->key_block_size) {
- push_warning(
- thd, Sql_condition::WARN_LEVEL_WARN,
- HA_WRONG_CREATE_OPTION,
- "InnoDB: PAGE_COMPRESSED table can't have"
- " key_block_size");
- return "PAGE_COMPRESSED";
+ if (merge_threshold_index[i] == 0) {
+ merge_threshold_index[i] = merge_threshold_table;
}
}
- /* Check page compression level requirements, some of them are
- already checked above */
- if (options->page_compression_level != 0) {
- if (options->page_compressed == false) {
- push_warning(
- thd, Sql_condition::WARN_LEVEL_WARN,
- HA_WRONG_CREATE_OPTION,
- "InnoDB: PAGE_COMPRESSION_LEVEL requires"
- " PAGE_COMPRESSED");
- return "PAGE_COMPRESSION_LEVEL";
+ /* update SYS_INDEX table */
+ if (!dict_table_is_temporary(table)) {
+ for (uint i = 0; i < table_share->keys; i++) {
+ is_found[i] = false;
}
- if (options->page_compression_level < 1 || options->page_compression_level > 9) {
- push_warning_printf(
- thd, Sql_condition::WARN_LEVEL_WARN,
- HA_WRONG_CREATE_OPTION,
- "InnoDB: invalid PAGE_COMPRESSION_LEVEL = %lu."
- " Valid values are [1, 2, 3, 4, 5, 6, 7, 8, 9]",
- options->page_compression_level);
- return "PAGE_COMPRESSION_LEVEL";
+ for (dict_index_t* index = UT_LIST_GET_FIRST(table->indexes);
+ index != NULL;
+ index = UT_LIST_GET_NEXT(indexes, index)) {
+
+ if (dict_index_is_auto_gen_clust(index)) {
+
+ /* GEN_CLUST_INDEX should use
+ merge_threshold_table */
+ dict_index_set_merge_threshold(
+ index, merge_threshold_table);
+ continue;
+ }
+
+ for (uint i = 0; i < table_share->keys; i++) {
+ if (is_found[i]) {
+ continue;
+ }
+
+ KEY* key_info = &table_share->key_info[i];
+
+ if (innobase_strcasecmp(
+ index->name, key_info->name) == 0) {
+
+ dict_index_set_merge_threshold(
+ index,
+ merge_threshold_index[i]);
+ is_found[i] = true;
+ break;
+ }
+ }
}
}
- /* Check atomic writes requirements */
- if (awrites == ATOMIC_WRITES_ON ||
- (awrites == ATOMIC_WRITES_DEFAULT && srv_use_atomic_writes)) {
- if (!use_tablespace) {
- push_warning(
- thd, Sql_condition::WARN_LEVEL_WARN,
- HA_WRONG_CREATE_OPTION,
- "InnoDB: ATOMIC_WRITES requires"
- " innodb_file_per_table.");
- return "ATOMIC_WRITES";
- }
+ for (uint i = 0; i < table_share->keys; i++) {
+ is_found[i] = false;
}
- return 0;
+ /* update in memory */
+ for (dict_index_t* index = UT_LIST_GET_FIRST(table->indexes);
+ index != NULL;
+ index = UT_LIST_GET_NEXT(indexes, index)) {
+
+ if (dict_index_is_auto_gen_clust(index)) {
+
+ /* GEN_CLUST_INDEX should use merge_threshold_table */
+
+ /* x-lock index is needed to exclude concurrent
+ pessimistic tree operations */
+ rw_lock_x_lock(dict_index_get_lock(index));
+ index->merge_threshold = merge_threshold_table;
+ rw_lock_x_unlock(dict_index_get_lock(index));
+
+ continue;
+ }
+
+ for (uint i = 0; i < table_share->keys; i++) {
+ if (is_found[i]) {
+ continue;
+ }
+
+ KEY* key_info = &table_share->key_info[i];
+
+ if (innobase_strcasecmp(
+ index->name, key_info->name) == 0) {
+
+ /* x-lock index is needed to exclude concurrent
+ pessimistic tree operations */
+ rw_lock_x_lock(dict_index_get_lock(index));
+ index->merge_threshold
+ = merge_threshold_index[i];
+ rw_lock_x_unlock(dict_index_get_lock(index));
+ is_found[i] = true;
+
+ break;
+ }
+ }
+ }
}
-/*****************************************************************//**
-Creates a new table to an InnoDB database.
-@return error number */
-UNIV_INTERN
+/** Set m_use_* flags. */
+void
+create_table_info_t::set_tablespace_type(
+ bool table_being_altered_is_file_per_table)
+{
+ /** Allow file_per_table for this table either because:
+ 1) the setting innodb_file_per_table=on,
+ 2) the table being altered is currently file_per_table */
+ m_allow_file_per_table =
+ m_innodb_file_per_table
+ || table_being_altered_is_file_per_table;
+
+ /* Ignore the current innodb-file-per-table setting if we are
+ creating a temporary table. */
+ m_use_file_per_table =
+ m_allow_file_per_table
+ && !(m_create_info->options & HA_LEX_CREATE_TMP_TABLE);
+
+ /* DATA DIRECTORY must have m_use_file_per_table but cannot be
+ used with TEMPORARY tables. */
+ m_use_data_dir =
+ m_use_file_per_table
+ && (m_create_info->data_file_name != NULL)
+ && (m_create_info->data_file_name[0] != '\0');
+}
+
+/** Initialize the create_table_info_t object.
+@return error number */
int
-ha_innobase::create(
-/*================*/
- const char* name, /*!< in: table name */
- TABLE* form, /*!< in: information on table
- columns and indexes */
- HA_CREATE_INFO* create_info) /*!< in: more information of the
- created table, contains also the
- create statement string */
+create_table_info_t::initialize()
{
- int error;
- trx_t* parent_trx;
- trx_t* trx;
- int primary_key_no;
- uint i;
- char norm_name[FN_REFLEN]; /* {database}/{tablename} */
- char temp_path[FN_REFLEN]; /* absolute path of temp frm */
- char remote_path[FN_REFLEN]; /* absolute path of table */
- THD* thd = ha_thd();
- ib_int64_t auto_inc_value;
-
- /* Cache the global variable "srv_file_per_table" to a local
- variable before using it. Note that "srv_file_per_table"
- is not under dict_sys mutex protection, and could be changed
- while creating the table. So we read the current value here
- and make all further decisions based on this. */
- bool use_tablespace = srv_file_per_table;
- const ulint file_format = srv_file_format;
-
- /* Zip Shift Size - log2 - 9 of compressed page size,
- zero for uncompressed */
- ulint flags;
- ulint flags2;
- dict_table_t* innobase_table = NULL;
+ DBUG_ENTER("create_table_info_t::initialize");
- const char* stmt;
- size_t stmt_len;
+ ut_ad(m_thd != NULL);
+ ut_ad(m_create_info != NULL);
- DBUG_ENTER("ha_innobase::create");
+ if (m_form->s->fields > REC_MAX_N_USER_FIELDS) {
+ DBUG_RETURN(HA_ERR_TOO_MANY_FIELDS);
+ }
- DBUG_ASSERT(thd != NULL);
- DBUG_ASSERT(create_info != NULL);
+ /* Check for name conflicts (with reserved name) for
+ any user indices to be created. */
+ if (innobase_index_name_is_reserved(m_thd, m_form->key_info,
+ m_form->s->keys)) {
+ DBUG_RETURN(HA_ERR_WRONG_INDEX);
+ }
- if (form->s->stored_fields > REC_MAX_N_USER_FIELDS) {
- DBUG_RETURN(HA_ERR_TOO_MANY_FIELDS);
- } else if (high_level_read_only) {
- DBUG_RETURN(HA_ERR_TABLE_READONLY);
+ /* Get the transaction associated with the current thd, or create one
+ if not yet created */
+
+ check_trx_exists(m_thd);
+
+ DBUG_RETURN(0);
+}
+
+
+/** Check if a virtual column is part of a fulltext or spatial index. */
+bool
+create_table_info_t::gcols_in_fulltext_or_spatial()
+{
+ for (ulint i = 0; i < m_form->s->keys; i++) {
+ const KEY* key = m_form->key_info + i;
+ if (!(key->flags & (HA_SPATIAL | HA_FULLTEXT))) {
+ continue;
+ }
+ for (ulint j = 0; j < key->user_defined_key_parts; j++) {
+ /* We do not support special (Fulltext or
+ Spatial) index on virtual columns */
+ if (!key->key_part[j].field->stored_in_db()) {
+ my_error(ER_UNSUPPORTED_ACTION_ON_GENERATED_COLUMN, MYF(0));
+ return true;
+ }
+ }
}
+ return false;
+}
+
+
+/** Prepare to create a new table to an InnoDB database.
+@param[in] name Table name
+@return error number */
+int create_table_info_t::prepare_create_table(const char* name, bool strict)
+{
+ DBUG_ENTER("prepare_create_table");
+
+ ut_ad(m_thd != NULL);
+ ut_ad(m_create_info != NULL);
+
+ set_tablespace_type(false);
- /* Create the table definition in InnoDB */
+ normalize_table_name(m_table_name, name);
/* Validate table options not handled by the SQL-parser */
- if(check_table_options(thd, form, create_info, use_tablespace,
- file_format)) {
+ if (check_table_options()) {
DBUG_RETURN(HA_WRONG_CREATE_OPTION);
}
- /* Validate create options if innodb_strict_mode is set. */
- if (create_options_are_invalid(
- thd, form, create_info, use_tablespace)) {
+ /* Validate the create options if innodb_strict_mode is set.
+ Do not use the regular message for ER_ILLEGAL_HA_CREATE_OPTION
+ because InnoDB might actually support the option, but not under
+ the current conditions. The messages revealing the specific
+ problems are reported inside this function. */
+ if (strict && create_options_are_invalid()) {
DBUG_RETURN(HA_WRONG_CREATE_OPTION);
}
- if (!innobase_table_flags(form, create_info,
- thd, use_tablespace,
- &flags, &flags2)) {
- DBUG_RETURN(-1);
+ /* Create the table flags and flags2 */
+ if (!innobase_table_flags()) {
+ DBUG_RETURN(HA_WRONG_CREATE_OPTION);
}
- error = parse_table_name(name, create_info, flags, flags2,
- norm_name, temp_path, remote_path);
- if (error) {
- DBUG_RETURN(error);
+ if (high_level_read_only) {
+ DBUG_RETURN(HA_ERR_TABLE_READONLY);
}
- /* Look for a primary key */
- primary_key_no = (form->s->primary_key != MAX_KEY ?
- (int) form->s->primary_key :
- -1);
+ if (gcols_in_fulltext_or_spatial()) {
+ DBUG_RETURN(HA_ERR_UNSUPPORTED);
+ }
- /* Our function innobase_get_mysql_key_number_for_index assumes
- the primary key is always number 0, if it exists */
- ut_a(primary_key_no == -1 || primary_key_no == 0);
+ for (uint i = 0; i < m_form->s->keys; i++) {
+ const size_t max_field_len
+ = DICT_MAX_FIELD_LEN_BY_FORMAT_FLAG(m_flags);
+ const KEY& key = m_form->key_info[i];
- /* Check for name conflicts (with reserved name) for
- any user indices to be created. */
- if (innobase_index_name_is_reserved(thd, form->key_info,
- form->s->keys)) {
- DBUG_RETURN(-1);
- }
+ if (key.algorithm == HA_KEY_ALG_FULLTEXT) {
+ continue;
+ }
- if (row_is_magic_monitor_table(norm_name)) {
- push_warning_printf(thd,
- Sql_condition::WARN_LEVEL_WARN,
- HA_ERR_WRONG_COMMAND,
- "Using the table name %s to enable "
- "diagnostic output is deprecated "
- "and may be removed in future releases. "
- "Use INFORMATION_SCHEMA or "
- "PERFORMANCE_SCHEMA tables or "
- "SET GLOBAL innodb_status_output=ON.",
- dict_remove_db_name(norm_name));
-
- /* Limit innodb monitor access to users with PROCESS privilege.
- See http://bugs.mysql.com/32710 why we chose PROCESS. */
- if (check_global_access(thd, PROCESS_ACL)) {
- DBUG_RETURN(HA_ERR_GENERIC);
+ if (too_big_key_part_length(max_field_len, key)) {
+ DBUG_RETURN(convert_error_code_to_mysql(
+ DB_TOO_BIG_INDEX_COL, m_flags, NULL));
}
}
- /* Get the transaction associated with the current thd, or create one
- if not yet created */
-
- parent_trx = check_trx_exists(thd);
+ DBUG_RETURN(parse_table_name(name));
+}
- /* In case MySQL calls this in the middle of a SELECT query, release
- possible adaptive hash latch to avoid deadlocks of threads */
+/** Create the internal innodb table.
+@param create_fk whether to add FOREIGN KEY constraints */
+int create_table_info_t::create_table(bool create_fk)
+{
+ int error;
+ int primary_key_no;
+ uint i;
+ dict_table_t* innobase_table = NULL;
- trx_search_latch_release_if_reserved(parent_trx);
+ DBUG_ENTER("create_table");
- trx = innobase_trx_allocate(thd);
+ /* Look for a primary key */
+ primary_key_no = (m_form->s->primary_key != MAX_KEY ?
+ (int) m_form->s->primary_key : -1);
- /* Latch the InnoDB data dictionary exclusively so that no deadlocks
- or lock waits can happen in it during a table create operation.
- Drop table etc. do this latching in row0mysql.cc. */
+ /* Our function innobase_get_mysql_key_number_for_index assumes
+ the primary key is always number 0, if it exists */
+ ut_a(primary_key_no == -1 || primary_key_no == 0);
- row_mysql_lock_data_dictionary(trx);
+ error = create_table_def();
- error = create_table_def(trx, form, norm_name, temp_path,
- remote_path, flags, flags2,
- form->s->option_struct);
if (error) {
- goto cleanup;
+ DBUG_RETURN(error);
}
+ DBUG_ASSERT(m_drop_before_rollback
+ == !(m_flags2 & DICT_TF2_TEMPORARY));
+
/* Create the keys */
- if (form->s->keys == 0 || primary_key_no == -1) {
+ if (m_form->s->keys == 0 || primary_key_no == -1) {
/* Create an index which is used as the clustered index;
order the rows by their row id which is internally generated
by InnoDB */
error = create_clustered_index_when_no_primary(
- trx, flags, norm_name);
+ m_trx, m_flags, m_table_name);
if (error) {
- goto cleanup;
+ DBUG_RETURN(error);
}
}
if (primary_key_no != -1) {
/* In InnoDB the clustered index must always be created
first */
- if ((error = create_index(trx, form, flags, norm_name,
+ if ((error = create_index(m_trx, m_form, m_flags, m_table_name,
(uint) primary_key_no))) {
- goto cleanup;
+ DBUG_RETURN(error);
}
}
/* Create the ancillary tables that are common to all FTS indexes on
this table. */
- if (flags2 & DICT_TF2_FTS) {
- enum fts_doc_id_index_enum ret;
+ if (m_flags2 & DICT_TF2_FTS) {
+ fts_doc_id_index_enum ret;
innobase_table = dict_table_open_on_name(
- norm_name, TRUE, FALSE, DICT_ERR_IGNORE_NONE);
+ m_table_name, TRUE, FALSE, DICT_ERR_IGNORE_NONE);
ut_a(innobase_table);
/* Check whether there already exists FTS_DOC_ID_INDEX */
ret = innobase_fts_check_doc_id_index_in_def(
- form->s->keys, form->key_info);
+ m_form->s->keys, m_form->key_info);
switch (ret) {
case FTS_INCORRECT_DOC_ID_INDEX:
- push_warning_printf(thd,
+ push_warning_printf(m_thd,
Sql_condition::WARN_LEVEL_WARN,
ER_WRONG_NAME_FOR_INDEX,
" InnoDB: Index name %s is reserved"
@@ -12210,7 +12577,7 @@ ha_innobase::create(
" make sure it is of correct"
" type\n",
FTS_DOC_ID_INDEX_NAME,
- innobase_table->name);
+ innobase_table->name.m_name);
if (innobase_table->fts) {
fts_free(innobase_table);
@@ -12219,15 +12586,14 @@ ha_innobase::create(
dict_table_close(innobase_table, TRUE, FALSE);
my_error(ER_WRONG_NAME_FOR_INDEX, MYF(0),
FTS_DOC_ID_INDEX_NAME);
- error = -1;
- goto cleanup;
+ DBUG_RETURN(-1);
case FTS_EXIST_DOC_ID_INDEX:
case FTS_NOT_EXIST_DOC_ID_INDEX:
break;
}
dberr_t err = fts_create_common_tables(
- trx, innobase_table, norm_name,
+ m_trx, innobase_table, m_table_name,
(ret == FTS_EXIST_DOC_ID_INDEX));
error = convert_error_code_to_mysql(err, 0, NULL);
@@ -12235,24 +12601,24 @@ ha_innobase::create(
dict_table_close(innobase_table, TRUE, FALSE);
if (error) {
- goto cleanup;
+ DBUG_RETURN(error);
}
}
- for (i = 0; i < form->s->keys; i++) {
+ for (i = 0; i < m_form->s->keys; i++) {
if (i != static_cast<uint>(primary_key_no)) {
- if ((error = create_index(trx, form, flags,
- norm_name, i))) {
- goto cleanup;
+ if ((error = create_index(m_trx, m_form, m_flags,
+ m_table_name, i))) {
+ DBUG_RETURN(error);
}
}
}
/* Cache all the FTS indexes on this table in the FTS specific
structure. They are used for FTS indexed column update handling. */
- if (flags2 & DICT_TF2_FTS) {
+ if (m_flags2 & DICT_TF2_FTS) {
fts_t* fts = innobase_table->fts;
ut_a(fts != NULL);
@@ -12260,61 +12626,339 @@ ha_innobase::create(
dict_table_get_all_fts_indexes(innobase_table, fts->indexes);
}
- stmt = innobase_get_stmt(thd, &stmt_len);
-
- if (stmt) {
- dberr_t err = row_table_add_foreign_constraints(
- trx, stmt, stmt_len, norm_name,
- create_info->options & HA_LEX_CREATE_TMP_TABLE);
+ size_t stmt_len;
+ if (const char* stmt = innobase_get_stmt_unsafe(m_thd, &stmt_len)) {
+ dberr_t err = create_fk
+ ? dict_create_foreign_constraints(
+ m_trx, stmt, stmt_len, m_table_name,
+ m_flags2 & DICT_TF2_TEMPORARY)
+ : DB_SUCCESS;
+ if (err == DB_SUCCESS) {
+ /* Check that also referencing constraints are ok */
+ dict_names_t fk_tables;
+ err = dict_load_foreigns(m_table_name, NULL,
+ false, true,
+ DICT_ERR_IGNORE_NONE,
+ fk_tables);
+ while (err == DB_SUCCESS && !fk_tables.empty()) {
+ dict_load_table(fk_tables.front(),
+ DICT_ERR_IGNORE_NONE);
+ fk_tables.pop_front();
+ }
+ }
switch (err) {
-
case DB_PARENT_NO_INDEX:
push_warning_printf(
- thd, Sql_condition::WARN_LEVEL_WARN,
+ m_thd, Sql_condition::WARN_LEVEL_WARN,
HA_ERR_CANNOT_ADD_FOREIGN,
"Create table '%s' with foreign key constraint"
" failed. There is no index in the referenced"
" table where the referenced columns appear"
- " as the first columns.\n", norm_name);
+ " as the first columns.\n", m_table_name);
break;
case DB_CHILD_NO_INDEX:
push_warning_printf(
- thd, Sql_condition::WARN_LEVEL_WARN,
+ m_thd, Sql_condition::WARN_LEVEL_WARN,
HA_ERR_CANNOT_ADD_FOREIGN,
"Create table '%s' with foreign key constraint"
" failed. There is no index in the referencing"
" table where referencing columns appear"
- " as the first columns.\n", norm_name);
+ " as the first columns.\n", m_table_name);
break;
+ case DB_NO_FK_ON_S_BASE_COL:
+ push_warning_printf(
+ m_thd, Sql_condition::WARN_LEVEL_WARN,
+ HA_ERR_CANNOT_ADD_FOREIGN,
+ "Create table '%s' with foreign key constraint"
+ " failed. Cannot add foreign key constraint"
+ " placed on the base column of stored"
+ " column. \n",
+ m_table_name);
default:
break;
}
- error = convert_error_code_to_mysql(err, flags, NULL);
-
- if (error) {
- goto cleanup;
+ if (err != DB_SUCCESS) {
+ DBUG_RETURN(convert_error_code_to_mysql(
+ err, m_flags, NULL));
}
}
- innobase_commit_low(trx);
+ innobase_table = dict_table_open_on_name(m_table_name, true, false,
+ DICT_ERR_IGNORE_NONE);
+ ut_ad(innobase_table);
- row_mysql_unlock_data_dictionary(trx);
+ /* In TRUNCATE TABLE, we will merely warn about the maximum
+ row size being too large. */
+ const bool is_acceptable = row_size_is_acceptable(*innobase_table,
+ create_fk);
- /* Flush the log to reduce probability that the .frm files and
- the InnoDB data dictionary get out-of-sync if the user runs
- with innodb_flush_log_at_trx_commit = 0 */
+ dict_table_close(innobase_table, true, false);
- log_buffer_flush_to_disk();
+ if (!is_acceptable) {
+ DBUG_RETURN(convert_error_code_to_mysql(
+ DB_TOO_BIG_RECORD, m_flags, NULL));
+ }
+
+ DBUG_RETURN(0);
+}
+
+bool create_table_info_t::row_size_is_acceptable(
+ const dict_table_t &table, bool strict) const
+{
+ for (dict_index_t *index= dict_table_get_first_index(&table); index;
+ index= dict_table_get_next_index(index))
+ if (!row_size_is_acceptable(*index, strict))
+ return false;
+ return true;
+}
+
+/* FIXME: row size check has some flaws and should be improved */
+dict_index_t::record_size_info_t dict_index_t::record_size_info() const
+{
+ ut_ad(!(type & DICT_FTS));
+
+ /* maximum allowed size of a node pointer record */
+ ulint page_ptr_max;
+ const bool comp= dict_table_is_comp(table);
+ const page_size_t page_size(dict_table_page_size(table));
+ record_size_info_t result;
+
+ if (page_size.is_compressed() &&
+ page_size.physical() < univ_page_size.physical())
+ {
+ /* On a ROW_FORMAT=COMPRESSED page, two records must fit in the
+ uncompressed page modification log. On compressed pages
+ with size.physical() == univ_page_size.physical(),
+ this limit will never be reached. */
+ ut_ad(comp);
+ /* The maximum allowed record size is the size of
+ an empty page, minus a byte for recoding the heap
+ number in the page modification log. The maximum
+ allowed node pointer size is half that. */
+ result.max_leaf_size= page_zip_empty_size(n_fields, page_size.physical());
+ if (result.max_leaf_size)
+ {
+ result.max_leaf_size--;
+ }
+ page_ptr_max= result.max_leaf_size / 2;
+ /* On a compressed page, there is a two-byte entry in
+ the dense page directory for every record. But there
+ is no record header. */
+ result.shortest_size= 2;
+ }
+ else
+ {
+ /* The maximum allowed record size is half a B-tree
+ page(16k for 64k page size). No additional sparse
+ page directory entry will be generated for the first
+ few user records. */
+ result.max_leaf_size= (comp || srv_page_size < UNIV_PAGE_SIZE_MAX)
+ ? page_get_free_space_of_empty(comp) / 2
+ : REDUNDANT_REC_MAX_DATA_SIZE;
+
+ page_ptr_max= result.max_leaf_size;
+ /* Each record has a header. */
+ result.shortest_size= comp ? REC_N_NEW_EXTRA_BYTES : REC_N_OLD_EXTRA_BYTES;
+ }
+
+ if (comp)
+ {
+ /* Include the "null" flags in the
+ maximum possible record size. */
+ result.shortest_size+= UT_BITS_IN_BYTES(n_nullable);
+ }
+ else
+ {
+ /* For each column, include a 2-byte offset and a
+ "null" flag. The 1-byte format is only used in short
+ records that do not contain externally stored columns.
+ Such records could never exceed the page limit, even
+ when using the 2-byte format. */
+ result.shortest_size+= 2 * n_fields;
+ }
+
+ const ulint max_local_len= table->get_overflow_field_local_len();
+
+ /* Compute the maximum possible record size. */
+ for (unsigned i= 0; i < n_fields; i++)
+ {
+ const dict_field_t &f= fields[i];
+ const dict_col_t &col= *f.col;
+
+ /* In dtuple_convert_big_rec(), variable-length columns
+ that are longer than BTR_EXTERN_LOCAL_STORED_MAX_SIZE
+ may be chosen for external storage.
+
+ Fixed-length columns, and all columns of secondary
+ index records are always stored inline. */
+
+ /* Determine the maximum length of the index field.
+ The field_ext_max_size should be computed as the worst
+ case in rec_get_converted_size_comp() for
+ REC_STATUS_ORDINARY records. */
+
+ size_t field_max_size= dict_col_get_fixed_size(&col, comp);
+ if (field_max_size && f.fixed_len != 0)
+ {
+ /* dict_index_add_col() should guarantee this */
+ ut_ad(!f.prefix_len || f.fixed_len == f.prefix_len);
+ /* Fixed lengths are not encoded
+ in ROW_FORMAT=COMPACT. */
+ goto add_field_size;
+ }
+
+ field_max_size= dict_col_get_max_size(&col);
+
+ if (f.prefix_len)
+ {
+ if (f.prefix_len < field_max_size)
+ {
+ field_max_size= f.prefix_len;
+ }
+
+ /* those conditions were copied from dtuple_convert_big_rec()*/
+ }
+ else if (field_max_size > max_local_len &&
+ field_max_size > BTR_EXTERN_LOCAL_STORED_MAX_SIZE &&
+ DATA_BIG_COL(&col) && dict_index_is_clust(this))
+ {
+
+ /* In the worst case, we have a locally stored
+ column of BTR_EXTERN_LOCAL_STORED_MAX_SIZE bytes.
+ The length can be stored in one byte. If the
+ column were stored externally, the lengths in
+ the clustered index page would be
+ BTR_EXTERN_FIELD_REF_SIZE and 2. */
+ field_max_size= max_local_len;
+ }
+
+ if (comp)
+ {
+ /* Add the extra size for ROW_FORMAT=COMPACT.
+ For ROW_FORMAT=REDUNDANT, these bytes were
+ added to result.shortest_size before this loop. */
+ result.shortest_size+= field_max_size < 256 ? 1 : 2;
+ }
+ add_field_size:
+ result.shortest_size+= field_max_size;
+
+ /* Check the size limit on leaf pages. */
+ if (result.shortest_size >= result.max_leaf_size)
+ {
+ result.set_too_big(i);
+ }
+
+ /* Check the size limit on non-leaf pages. Records
+ stored in non-leaf B-tree pages consist of the unique
+ columns of the record (the key columns of the B-tree)
+ and a node pointer field. When we have processed the
+ unique columns, result.shortest_size equals the size of the
+ node pointer record minus the node pointer column. */
+ if (i + 1 == dict_index_get_n_unique_in_tree(this) &&
+ result.shortest_size + REC_NODE_PTR_SIZE >= page_ptr_max)
+ {
+ result.set_too_big(i);
+ }
+ }
+
+ return result;
+}
+
+/** Issue a warning that the row is too big. */
+static void ib_warn_row_too_big(THD *thd, const dict_table_t *table)
+{
+ /* FIXME: this row size check should be improved */
+ /* If prefix is true then a 768-byte prefix is stored
+ locally for BLOB fields. Refer to dict_table_get_format() */
+ const bool prefix= (dict_tf_get_format(table->flags) == UNIV_FORMAT_A);
+
+ const ulint free_space=
+ page_get_free_space_of_empty(table->flags & DICT_TF_COMPACT) / 2;
+
+ push_warning_printf(
+ thd, Sql_condition::WARN_LEVEL_WARN, HA_ERR_TO_BIG_ROW,
+ "Row size too large (> " ULINTPF "). Changing some columns to TEXT"
+ " or BLOB %smay help. In current row format, BLOB prefix of"
+ " %d bytes is stored inline.",
+ free_space,
+ prefix ? "or using ROW_FORMAT=DYNAMIC or ROW_FORMAT=COMPRESSED " : "",
+ prefix ? DICT_MAX_FIXED_COL_LEN : 0);
+}
+
+bool create_table_info_t::row_size_is_acceptable(
+ const dict_index_t &index, bool strict) const
+{
+ if ((index.type & DICT_FTS) || index.table->is_system_db)
+ {
+ /* Ignore system tables check because innodb_table_stats
+ maximum row size can not fit on 4k page. */
+ return true;
+ }
+
+ const bool innodb_strict_mode= THDVAR(m_thd, strict_mode);
+ dict_index_t::record_size_info_t info= index.record_size_info();
+
+ if (info.row_is_too_big())
+ {
+ ut_ad(info.get_overrun_size() != 0);
+ ut_ad(info.max_leaf_size != 0);
+
+ const size_t idx= info.get_first_overrun_field_index();
+ const dict_field_t *field= dict_index_get_nth_field(&index, idx);
+
+ if (innodb_strict_mode || global_system_variables.log_warnings > 2)
+ {
+ ib::error_or_warn(strict && innodb_strict_mode)
+ << "Cannot add field " << field->name << " in table "
+ << index.table->name << " because after adding it, the row size is "
+ << info.get_overrun_size()
+ << " which is greater than maximum allowed size ("
+ << info.max_leaf_size << " bytes) for a record on index leaf page.";
+ }
+
+ if (strict && innodb_strict_mode)
+ return false;
+
+ ib_warn_row_too_big(m_thd, index.table);
+ }
+
+ return true;
+}
+
+/** Update a new table in an InnoDB database.
+@return error number */
+int
+create_table_info_t::create_table_update_dict()
+{
+ dict_table_t* innobase_table;
+
+ DBUG_ENTER("create_table_update_dict");
innobase_table = dict_table_open_on_name(
- norm_name, FALSE, FALSE, DICT_ERR_IGNORE_NONE);
+ m_table_name, FALSE, FALSE, DICT_ERR_IGNORE_NONE);
DBUG_ASSERT(innobase_table != 0);
+ if (innobase_table->fts != NULL) {
+ if (innobase_table->fts_doc_id_index == NULL) {
+ innobase_table->fts_doc_id_index
+ = dict_table_get_index_on_name(
+ innobase_table, FTS_DOC_ID_INDEX_NAME);
+ DBUG_ASSERT(innobase_table->fts_doc_id_index != NULL);
+ } else {
+ DBUG_ASSERT(innobase_table->fts_doc_id_index
+ == dict_table_get_index_on_name(
+ innobase_table,
+ FTS_DOC_ID_INDEX_NAME));
+ }
+ }
+
+ DBUG_ASSERT((innobase_table->fts == NULL)
+ == (innobase_table->fts_doc_id_index == NULL));
- innobase_copy_frm_flags_from_create_info(innobase_table, create_info);
+ innobase_copy_frm_flags_from_create_info(innobase_table, m_create_info);
dict_stats_update(innobase_table, DICT_STATS_EMPTY_TABLE);
@@ -12328,11 +12972,11 @@ ha_innobase::create(
}
/* Load server stopword into FTS cache */
- if (flags2 & DICT_TF2_FTS) {
- if (!innobase_fts_load_stopword(innobase_table, NULL, thd)) {
+ if (m_flags2 & DICT_TF2_FTS) {
+ if (!innobase_fts_load_stopword(innobase_table, NULL, m_thd)) {
dict_table_close(innobase_table, FALSE, FALSE);
srv_active_wake_master_thread();
- trx_free_for_mysql(trx);
+ trx_free_for_mysql(m_trx);
DBUG_RETURN(-1);
}
@@ -12341,100 +12985,209 @@ ha_innobase::create(
mutex_exit(&dict_sys->mutex);
}
- /* Note: We can't call update_thd() as prebuilt will not be
- setup at this stage and so we use thd. */
+ if (const Field* ai = m_form->found_next_number_field) {
+ ut_ad(ai->stored_in_db());
- /* We need to copy the AUTOINC value from the old table if
- this is an ALTER|OPTIMIZE TABLE or CREATE INDEX because CREATE INDEX
- does a table copy too. If query was one of :
+ ib_uint64_t autoinc = m_create_info->auto_increment_value;
- CREATE TABLE ...AUTO_INCREMENT = x; or
- ALTER TABLE...AUTO_INCREMENT = x; or
- OPTIMIZE TABLE t; or
- CREATE INDEX x on t(...);
-
- Find out a table definition from the dictionary and get
- the current value of the auto increment field. Set a new
- value to the auto increment field if the value is greater
- than the maximum value in the column. */
+ if (autoinc == 0) {
+ autoinc = 1;
+ }
- if (((create_info->used_fields & HA_CREATE_USED_AUTO)
- || thd_sql_command(thd) == SQLCOM_ALTER_TABLE
- || thd_sql_command(thd) == SQLCOM_OPTIMIZE
- || thd_sql_command(thd) == SQLCOM_CREATE_INDEX)
- && create_info->auto_increment_value > 0) {
+ dict_table_autoinc_lock(innobase_table);
+ dict_table_autoinc_initialize(innobase_table, autoinc);
- auto_inc_value = create_info->auto_increment_value;
+ if (dict_table_is_temporary(innobase_table)) {
+ /* AUTO_INCREMENT is not persistent for
+ TEMPORARY TABLE. Temporary tables are never
+ evicted. Keep the counter in memory only. */
+ } else {
+ const unsigned col_no = innodb_col_no(ai);
+
+ innobase_table->persistent_autoinc = 1
+ + dict_table_get_nth_col_pos(
+ innobase_table, col_no, NULL);
+
+ /* Persist the "last used" value, which
+ typically is AUTO_INCREMENT - 1.
+ In btr_create(), the value 0 was already written. */
+ if (--autoinc) {
+ btr_write_autoinc(
+ dict_table_get_first_index(
+ innobase_table),
+ autoinc);
+ }
+ }
- dict_table_autoinc_lock(innobase_table);
- dict_table_autoinc_initialize(innobase_table, auto_inc_value);
dict_table_autoinc_unlock(innobase_table);
}
+ innobase_parse_hint_from_comment(m_thd, innobase_table, m_form->s);
+
dict_table_close(innobase_table, FALSE, FALSE);
+ DBUG_RETURN(0);
+}
- /* Tell the InnoDB server that there might be work for
- utility threads: */
+/** Allocate a new trx. */
+void
+create_table_info_t::allocate_trx()
+{
+ m_trx = innobase_trx_allocate(m_thd);
- srv_active_wake_master_thread();
+ m_trx->will_lock++;
+ m_trx->ddl = true;
+}
- trx_free_for_mysql(trx);
+/** Create a new table to an InnoDB database.
+@param[in] name Table name, format: "db/table_name".
+@param[in] form Table format; columns and index information.
+@param[in] create_info Create info (including create statement string).
+@param[in] file_per_table whether to create .ibd file
+@param[in,out] trx dictionary transaction, or NULL to create new
+@return 0 if success else error number. */
+inline int
+ha_innobase::create(
+ const char* name,
+ TABLE* form,
+ HA_CREATE_INFO* create_info,
+ bool file_per_table,
+ trx_t* trx)
+{
+ int error;
+ char norm_name[FN_REFLEN]; /* {database}/{tablename} */
+ char remote_path[FN_REFLEN]; /* Absolute path of table */
- DBUG_RETURN(0);
+ DBUG_ENTER("ha_innobase::create");
+
+ create_table_info_t info(ha_thd(),
+ form,
+ create_info,
+ norm_name,
+ remote_path,
+ file_per_table, trx);
-cleanup:
- trx_rollback_for_mysql(trx);
+ if ((error = info.initialize())
+ || (error = info.prepare_create_table(name, !trx))) {
+ if (trx) {
+ trx_rollback_for_mysql(trx);
+ row_mysql_unlock_data_dictionary(trx);
+ }
+ DBUG_RETURN(error);
+ }
+ const bool own_trx = !trx;
+
+ if (own_trx) {
+ info.allocate_trx();
+ trx = info.trx();
+ /* Latch the InnoDB data dictionary exclusively so that no deadlocks
+ or lock waits can happen in it during a table create operation.
+ Drop table etc. do this latching in row0mysql.cc. */
+ row_mysql_lock_data_dictionary(trx);
+ DBUG_ASSERT(trx_state_eq(trx, TRX_STATE_NOT_STARTED));
+ }
+
+ if ((error = info.create_table(own_trx))) {
+ /* Drop the being-created table before rollback,
+ so that rollback can possibly rename back a table
+ that could have been renamed before the failed creation. */
+ if (info.drop_before_rollback()) {
+ trx->error_state = DB_SUCCESS;
+ row_drop_table_for_mysql(info.table_name(),
+ trx, SQLCOM_TRUNCATE, true,
+ false);
+ }
+ trx_rollback_for_mysql(trx);
+ row_mysql_unlock_data_dictionary(trx);
+ if (own_trx) {
+ trx_free_for_mysql(trx);
+ }
+ DBUG_RETURN(error);
+ }
+
+ innobase_commit_low(trx);
row_mysql_unlock_data_dictionary(trx);
- trx_free_for_mysql(trx);
+ if (own_trx) {
+ trx_free_for_mysql(trx);
+ }
+
+ /* Flush the log to reduce probability that the .frm files and
+ the InnoDB data dictionary get out-of-sync if the user runs
+ with innodb_flush_log_at_trx_commit = 0 */
+ log_buffer_flush_to_disk();
+
+ ut_ad(!srv_read_only_mode);
+
+ error = info.create_table_update_dict();
+
+ /* Tell the InnoDB server that there might be work for
+ utility threads: */
+
+ srv_active_wake_master_thread();
DBUG_RETURN(error);
}
+/** Create a new table to an InnoDB database.
+@param[in] name Table name, format: "db/table_name".
+@param[in] form Table format; columns and index information.
+@param[in] create_info Create info (including create statement string).
+@return 0 if success else error number. */
+int
+ha_innobase::create(
+ const char* name,
+ TABLE* form,
+ HA_CREATE_INFO* create_info)
+{
+ return create(name, form, create_info, srv_file_per_table);
+}
+
/*****************************************************************//**
Discards or imports an InnoDB tablespace.
-@return 0 == success, -1 == error */
-UNIV_INTERN
+@return 0 == success, -1 == error */
+
int
ha_innobase::discard_or_import_tablespace(
/*======================================*/
- my_bool discard) /*!< in: TRUE if discard, else import */
+ my_bool discard) /*!< in: TRUE if discard, else import */
{
- dberr_t err;
- dict_table_t* dict_table;
DBUG_ENTER("ha_innobase::discard_or_import_tablespace");
- ut_a(prebuilt->trx);
- ut_a(prebuilt->trx->magic_n == TRX_MAGIC_N);
- ut_a(prebuilt->trx == thd_to_trx(ha_thd()));
+ ut_a(m_prebuilt->trx != NULL);
+ ut_a(m_prebuilt->trx->magic_n == TRX_MAGIC_N);
+ ut_a(m_prebuilt->trx == thd_to_trx(ha_thd()));
if (high_level_read_only) {
DBUG_RETURN(HA_ERR_TABLE_READONLY);
}
- dict_table = prebuilt->table;
+ dict_table_t* dict_table = m_prebuilt->table;
- if (dict_table->space == TRX_SYS_SPACE) {
+ if (dict_table_is_temporary(dict_table)) {
ib_senderrf(
- prebuilt->trx->mysql_thd, IB_LOG_LEVEL_ERROR,
- ER_TABLE_IN_SYSTEM_TABLESPACE,
- table->s->table_name.str);
+ m_prebuilt->trx->mysql_thd, IB_LOG_LEVEL_ERROR,
+ ER_CANNOT_DISCARD_TEMPORARY_TABLE);
DBUG_RETURN(HA_ERR_TABLE_NEEDS_UPGRADE);
}
- trx_start_if_not_started(prebuilt->trx);
+ if (dict_table->space == srv_sys_space.space_id()) {
+ ib_senderrf(
+ m_prebuilt->trx->mysql_thd, IB_LOG_LEVEL_ERROR,
+ ER_TABLE_IN_SYSTEM_TABLESPACE,
+ dict_table->name.m_name);
- /* In case MySQL calls this in the middle of a SELECT query, release
- possible adaptive hash latch to avoid deadlocks of threads. */
- trx_search_latch_release_if_reserved(prebuilt->trx);
+ DBUG_RETURN(HA_ERR_TABLE_NEEDS_UPGRADE);
+ }
+
+ trx_start_if_not_started(m_prebuilt->trx, true);
/* Obtain an exclusive lock on the table. */
- err = row_mysql_lock_table(
- prebuilt->trx, dict_table, LOCK_X,
+ dberr_t err = row_mysql_lock_table(
+ m_prebuilt->trx, dict_table, LOCK_X,
discard ? "setting table lock for DISCARD TABLESPACE"
: "setting table lock for IMPORT TABLESPACE");
@@ -12449,35 +13202,33 @@ ha_innobase::discard_or_import_tablespace(
if (!dict_table->is_readable()) {
ib_senderrf(
- prebuilt->trx->mysql_thd,
+ m_prebuilt->trx->mysql_thd,
IB_LOG_LEVEL_WARN, ER_TABLESPACE_MISSING,
- table->s->table_name.str);
+ dict_table->name.m_name);
}
err = row_discard_tablespace_for_mysql(
- dict_table->name, prebuilt->trx);
+ dict_table->name.m_name, m_prebuilt->trx);
} else if (dict_table->is_readable()) {
/* Commit the transaction in order to
release the table lock. */
- trx_commit_for_mysql(prebuilt->trx);
+ trx_commit_for_mysql(m_prebuilt->trx);
+ ib::error() << "Unable to import tablespace "
+ << dict_table->name << " because it already"
+ " exists. Please DISCARD the tablespace"
+ " before IMPORT.";
ib_senderrf(
- prebuilt->trx->mysql_thd, IB_LOG_LEVEL_ERROR,
- ER_TABLESPACE_EXISTS, table->s->table_name.str);
+ m_prebuilt->trx->mysql_thd, IB_LOG_LEVEL_ERROR,
+ ER_TABLESPACE_EXISTS, dict_table->name.m_name);
DBUG_RETURN(HA_ERR_TABLE_EXIST);
} else {
- err = row_import_for_mysql(dict_table, prebuilt);
+ err = row_import_for_mysql(dict_table, m_prebuilt);
if (err == DB_SUCCESS) {
- if (table->found_next_number_field) {
- dict_table_autoinc_lock(dict_table);
- innobase_initialize_autoinc();
- dict_table_autoinc_unlock(dict_table);
- }
-
info(HA_STATUS_TIME
| HA_STATUS_CONST
| HA_STATUS_VARIABLE
@@ -12488,7 +13239,7 @@ ha_innobase::discard_or_import_tablespace(
}
/* Commit the transaction in order to release the table lock. */
- trx_commit_for_mysql(prebuilt->trx);
+ trx_commit_for_mysql(m_prebuilt->trx);
if (err == DB_SUCCESS && !discard
&& dict_stats_is_persistent_enabled(dict_table)) {
@@ -12505,82 +13256,25 @@ ha_innobase::discard_or_import_tablespace(
ER_ALTER_INFO,
"Error updating stats for table '%s'"
" after table rebuild: %s",
- dict_table->name, ut_strerr(ret));
+ dict_table->name.m_name, ut_strerr(ret));
}
}
DBUG_RETURN(convert_error_code_to_mysql(err, dict_table->flags, NULL));
}
-/*****************************************************************//**
-Deletes all rows of an InnoDB table.
-@return error number */
-UNIV_INTERN
-int
-ha_innobase::truncate()
-/*===================*/
-{
- dberr_t err;
- int error;
-
- DBUG_ENTER("ha_innobase::truncate");
-
- if (high_level_read_only) {
- DBUG_RETURN(HA_ERR_TABLE_READONLY);
- }
-
- /* Get the transaction associated with the current thd, or create one
- if not yet created, and update prebuilt->trx */
-
- update_thd(ha_thd());
-
- if (!trx_is_started(prebuilt->trx)) {
- ++prebuilt->trx->will_lock;
- }
- /* Truncate the table in InnoDB */
-
- err = row_truncate_table_for_mysql(prebuilt->table, prebuilt->trx);
-
- switch (err) {
-
- case DB_TABLESPACE_DELETED:
- case DB_TABLESPACE_NOT_FOUND:
- ib_senderrf(
- prebuilt->trx->mysql_thd, IB_LOG_LEVEL_ERROR,
- (err == DB_TABLESPACE_DELETED ?
- ER_TABLESPACE_DISCARDED : ER_TABLESPACE_MISSING),
- table->s->table_name.str);
- table->status = STATUS_NOT_FOUND;
- error = HA_ERR_NO_SUCH_TABLE;
- break;
-
- default:
- error = convert_error_code_to_mysql(
- err, prebuilt->table->flags,
- prebuilt->trx->mysql_thd);
- table->status = STATUS_NOT_FOUND;
- break;
- }
- DBUG_RETURN(error);
-}
-
-/*****************************************************************//**
+/**
Drops a table from an InnoDB database. Before calling this function,
MySQL calls innobase_commit to commit the transaction of the current user.
Then the current user cannot have locks set on the table. Drop table
operation inside InnoDB will remove all locks any user has on the table
inside InnoDB.
-@return error number */
-UNIV_INTERN
-int
-ha_innobase::delete_table(
-/*======================*/
- const char* name) /*!< in: table name */
+@param[in] name table name
+@param[in] sqlcom SQLCOM_DROP_DB, SQLCOM_TRUNCATE, ...
+@return error number */
+inline int ha_innobase::delete_table(const char* name, enum_sql_command sqlcom)
{
- ulint name_len;
dberr_t err;
- trx_t* parent_trx;
- trx_t* trx;
THD* thd = ha_thd();
char norm_name[FN_REFLEN];
@@ -12599,24 +13293,35 @@ ha_innobase::delete_table(
extension, in contrast to ::create */
normalize_table_name(norm_name, name);
- if (srv_read_only_mode
- || srv_force_recovery >= SRV_FORCE_NO_UNDO_LOG_SCAN) {
+ if (high_level_read_only) {
DBUG_RETURN(HA_ERR_TABLE_READONLY);
- } else if (row_is_magic_monitor_table(norm_name)
- && check_global_access(thd, PROCESS_ACL)) {
- DBUG_RETURN(HA_ERR_GENERIC);
}
- parent_trx = check_trx_exists(thd);
+ trx_t* parent_trx = check_trx_exists(thd);
+
+ /* Remove the to-be-dropped table from the list of modified tables
+ by parent_trx. Otherwise we may end up with an orphaned pointer to
+ the table object from parent_trx::mod_tables. This could happen in:
+ SET AUTOCOMMIT=0;
+ CREATE TABLE t (PRIMARY KEY (a)) ENGINE=INNODB SELECT 1 AS a UNION
+ ALL SELECT 1 AS a; */
+ trx_mod_tables_t::const_iterator iter;
- /* In case MySQL calls this in the middle of a SELECT query, release
- possible adaptive hash latch to avoid deadlocks of threads */
+ for (iter = parent_trx->mod_tables.begin();
+ iter != parent_trx->mod_tables.end();
+ ++iter) {
- trx_search_latch_release_if_reserved(parent_trx);
+ dict_table_t* table_to_drop = *iter;
- trx = innobase_trx_allocate(thd);
+ if (strcmp(norm_name, table_to_drop->name.m_name) == 0) {
+ parent_trx->mod_tables.erase(table_to_drop);
+ break;
+ }
+ }
- name_len = strlen(name);
+ trx_t* trx = innobase_trx_allocate(thd);
+
+ ulint name_len = strlen(name);
ut_a(name_len < 1000);
@@ -12627,23 +13332,14 @@ ha_innobase::delete_table(
/* We are doing a DDL operation. */
++trx->will_lock;
- trx->ddl = true;
-
- const int sqlcom = thd_sql_command(thd);
/* Drop the table in InnoDB */
- err = row_drop_table_for_mysql(
- norm_name, trx, sqlcom == SQLCOM_DROP_DB,
- sqlcom == SQLCOM_CREATE_TABLE /* CREATE TABLE ... SELECT */);
+
+ err = row_drop_table_for_mysql(norm_name, trx, sqlcom);
if (err == DB_TABLE_NOT_FOUND
&& innobase_get_lower_case_table_names() == 1) {
- char* is_part = NULL;
-#ifdef __WIN__
- is_part = strstr(norm_name, "#p#");
-#else
- is_part = strstr(norm_name, "#P#");
-#endif /* __WIN__ */
+ char* is_part = is_partition(norm_name);
if (is_part) {
char par_case_name[FN_REFLEN];
@@ -12659,17 +13355,74 @@ ha_innobase::delete_table(
whether there exists table name in
system table whose name is
not being normalized to lower case */
- normalize_table_name_low(
+ normalize_table_name_c_low(
par_case_name, name, FALSE);
#endif
err = row_drop_table_for_mysql(
- par_case_name, trx,
- sqlcom == SQLCOM_DROP_DB,
- sqlcom == SQLCOM_CREATE_TABLE
- /* CREATE TABLE ... SELECT */);
+ par_case_name, trx, sqlcom);
+ }
+ }
+
+ if (err == DB_TABLE_NOT_FOUND) {
+ /* Test to drop all tables which matches db/tablename + '#'.
+ Only partitions can have '#' as non-first character in
+ the table name!
+
+ Temporary table names always start with '#', partitions are
+ the only 'tables' that can have '#' after the first character
+ and table name must have length > 0. User tables cannot have
+ '#' since it would be translated to @0023. Therefor this should
+ only match partitions. */
+ uint len = (uint) strlen(norm_name);
+ ulint num_partitions;
+ ut_a(len < FN_REFLEN);
+ norm_name[len] = '#';
+ norm_name[len + 1] = 0;
+ err = row_drop_database_for_mysql(norm_name, trx,
+ &num_partitions);
+ norm_name[len] = 0;
+ table_name_t tbl_name(norm_name);
+ if (num_partitions == 0 && !tbl_name.is_temporary()) {
+ ib::error() << "Table " << tbl_name <<
+ " does not exist in the InnoDB"
+ " internal data dictionary though MariaDB is"
+ " trying to drop it. Have you copied the .frm"
+ " file of the table to the MariaDB database"
+ " directory from another database? "
+ << TROUBLESHOOTING_MSG;
+ }
+ if (num_partitions == 0) {
+ err = DB_TABLE_NOT_FOUND;
}
}
+ if (err == DB_TABLE_NOT_FOUND
+ && innobase_get_lower_case_table_names() == 1) {
+ char* is_part = is_partition(norm_name);
+
+ if (is_part != NULL) {
+ char par_case_name[FN_REFLEN];
+
+#ifndef _WIN32
+ /* Check for the table using lower
+ case name, including the partition
+ separator "P" */
+ strcpy(par_case_name, norm_name);
+ innobase_casedn_str(par_case_name);
+#else
+ /* On Windows platfrom, check
+ whether there exists table name in
+ system table whose name is
+ not being normalized to lower case */
+ create_table_info_t::normalize_table_name_low(
+ par_case_name, name, FALSE);
+#endif /* _WIN32 */
+ err = row_drop_table_for_mysql(
+ par_case_name, trx, sqlcom, true);
+ }
+ }
+
+ ut_ad(!srv_read_only_mode);
/* Flush the log to reduce probability that the .frm files and
the InnoDB data dictionary get out-of-sync if the user runs
with innodb_flush_log_at_trx_commit = 0 */
@@ -12682,151 +13435,61 @@ ha_innobase::delete_table(
DBUG_RETURN(convert_error_code_to_mysql(err, 0, NULL));
}
-/*****************************************************************//**
-Defragment table.
-@return error number */
-UNIV_INTERN
-int
-ha_innobase::defragment_table(
-/*==========================*/
- const char* name, /*!< in: table name */
- const char* index_name, /*!< in: index name */
- bool async) /*!< in: whether to wait until finish */
-{
- char norm_name[FN_REFLEN];
- dict_table_t* table = NULL;
- dict_index_t* index = NULL;
- ibool one_index = (index_name != 0);
- int ret = 0;
- dberr_t err = DB_SUCCESS;
-
- if (!srv_defragment) {
- return ER_FEATURE_DISABLED;
- }
-
- normalize_table_name(norm_name, name);
-
- table = dict_table_open_on_name(norm_name, FALSE,
- FALSE, DICT_ERR_IGNORE_NONE);
-
- for (index = dict_table_get_first_index(table); index;
- index = dict_table_get_next_index(index)) {
-
- if (dict_index_is_corrupted(index)) {
- continue;
- }
-
- if (index->page == FIL_NULL) {
- /* Do not defragment auxiliary tables related
- to FULLTEXT INDEX. */
- ut_ad(index->type & DICT_FTS);
- continue;
- }
-
- if (one_index && strcasecmp(index_name, index->name) != 0) {
- continue;
- }
- if (btr_defragment_find_index(index)) {
- // We borrow this error code. When the same index is
- // already in the defragmentation queue, issue another
- // defragmentation only introduces overhead. We return
- // an error here to let the user know this is not
- // necessary. Note that this will fail a query that's
- // trying to defragment a full table if one of the
- // indicies in that table is already in defragmentation.
- // We choose this behavior so user is aware of this
- // rather than silently defragment other indicies of
- // that table.
- ret = ER_SP_ALREADY_EXISTS;
- break;
- }
-
- os_event_t event = btr_defragment_add_index(index, async, &err);
-
- if (err != DB_SUCCESS) {
- push_warning_printf(
- current_thd,
- Sql_condition::WARN_LEVEL_WARN,
- ER_NO_SUCH_TABLE,
- "Table %s is encrypted but encryption service or"
- " used key_id is not available. "
- " Can't continue checking table.",
- index->table->name);
-
- ret = convert_error_code_to_mysql(err, 0, current_thd);
- break;
- }
-
- if (!async && event) {
- while(os_event_wait_time(event, 1000000)) {
- if (thd_killed(current_thd)) {
- btr_defragment_remove_index(index);
- ret = ER_QUERY_INTERRUPTED;
- break;
- }
- }
- os_event_free(event);
- }
-
- if (ret) {
- break;
- }
-
- if (one_index) {
- one_index = FALSE;
- break;
- }
- }
-
- dict_table_close(table, FALSE, FALSE);
+/** Drop an InnoDB table.
+@param[in] name table name
+@return error number */
+int ha_innobase::delete_table(const char* name)
+{
+ enum_sql_command sqlcom = enum_sql_command(thd_sql_command(ha_thd()));
+ /* SQLCOM_TRUNCATE should be passed via ha_innobase::truncate() only.
- if (ret == 0 && one_index) {
- ret = ER_NO_SUCH_INDEX;
- }
+ On client disconnect, when dropping temporary tables, the
+ previous sqlcom would not be overwritten. In such a case, we
+ will have thd_kill_level() != NOT_KILLED, !m_prebuilt can
+ hold, and sqlcom could be anything, including TRUNCATE.
- return ret;
+ The sqlcom only matters for persistent tables; no persistent
+ metadata or FOREIGN KEY metadata is kept for temporary
+ tables. Therefore, we relax the assertion. If there is a bug
+ that slips through this assertion due to !m_prebuilt, the
+ worst impact should be that on DROP TABLE of a persistent
+ table, FOREIGN KEY constraints will be ignored and their
+ metadata will not be removed. */
+ DBUG_ASSERT(sqlcom != SQLCOM_TRUNCATE
+ || (thd_kill_level(ha_thd()) != THD_IS_NOT_KILLED
+ && (!m_prebuilt
+ || m_prebuilt->table->is_temporary())));
+ return delete_table(name, sqlcom);
}
-/*****************************************************************//**
-Removes all tables in the named database inside InnoDB. */
+/** Remove all tables in the named database inside InnoDB.
+@param[in] hton handlerton from InnoDB
+@param[in] path Database path; Inside InnoDB the name of the last
+directory in the path is used as the database name.
+For example, in 'mysql/data/test' the database name is 'test'. */
+
static
void
innobase_drop_database(
-/*===================*/
- handlerton* hton, /*!< in: handlerton of Innodb */
- char* path) /*!< in: database path; inside InnoDB the name
- of the last directory in the path is used as
- the database name: for example, in
- 'mysql/data/test' the database name is 'test' */
+ handlerton* hton,
+ char* path)
{
- ulint len = 0;
- trx_t* trx;
- char* ptr;
char* namebuf;
- THD* thd = current_thd;
/* Get the transaction associated with the current thd, or create one
if not yet created */
DBUG_ASSERT(hton == innodb_hton_ptr);
- if (srv_read_only_mode) {
+ if (high_level_read_only) {
return;
}
- /* In the Windows plugin, thd = current_thd is always NULL */
- if (thd) {
- trx_t* parent_trx = check_trx_exists(thd);
-
- /* In case MySQL calls this in the middle of a SELECT
- query, release possible adaptive hash latch to avoid
- deadlocks of threads */
-
- trx_search_latch_release_if_reserved(parent_trx);
- }
+ THD* thd = current_thd;
- ptr = strend(path) - 2;
+ ulint len = 0;
+ char* ptr = strend(path) - 2;
while (ptr >= path && *ptr != '\\' && *ptr != '/') {
ptr--;
@@ -12834,15 +13497,17 @@ innobase_drop_database(
}
ptr++;
- namebuf = (char*) my_malloc((uint) len + 2, MYF(0));
+ namebuf = (char*) my_malloc(/*PSI_INSTRUMENT_ME,*/ (uint) len + 2, MYF(0));
memcpy(namebuf, ptr, len);
namebuf[len] = '/';
namebuf[len + 1] = '\0';
-#ifdef __WIN__
+
+#ifdef _WIN32
innobase_casedn_str(namebuf);
-#endif
- trx = innobase_trx_allocate(thd);
+#endif /* _WIN32 */
+
+ trx_t* trx = innobase_trx_allocate(thd);
/* Either the transaction is already flagged as a locking transaction
or it hasn't been started yet. */
@@ -12852,7 +13517,9 @@ innobase_drop_database(
/* We are doing a DDL operation. */
++trx->will_lock;
- row_drop_database_for_mysql(namebuf, trx);
+ ulint dummy;
+
+ row_drop_database_for_mysql(namebuf, trx, &dummy);
my_free(namebuf);
@@ -12863,27 +13530,33 @@ innobase_drop_database(
log_buffer_flush_to_disk();
innobase_commit_low(trx);
+
trx_free_for_mysql(trx);
}
-/*********************************************************************//**
-Renames an InnoDB table.
+/** Rename an InnoDB table.
+@param[in,out] trx InnoDB data dictionary transaction
+@param[in] from old table name
+@param[in] to new table name
+@param[in] commit whether to commit trx
+@param[in] use_fk whether to parse and enforce FOREIGN KEY constraints
@return DB_SUCCESS or error code */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
+inline
dberr_t
innobase_rename_table(
-/*==================*/
- THD* thd, /*!< Connection thread handle */
- trx_t* trx, /*!< in: transaction */
- const char* from, /*!< in: old name of the table */
- const char* to) /*!< in: new name of the table */
+ trx_t* trx,
+ const char* from,
+ const char* to,
+ bool commit,
+ bool use_fk)
{
dberr_t error;
char norm_to[FN_REFLEN];
char norm_from[FN_REFLEN];
DBUG_ENTER("innobase_rename_table");
- DBUG_ASSERT(trx_get_dict_operation(trx) == TRX_DICT_OP_INDEX);
+ DBUG_ASSERT(trx_get_dict_operation(trx) == TRX_DICT_OP_INDEX
+ || trx_get_dict_operation(trx) == TRX_DICT_OP_TABLE);
ut_ad(!srv_read_only_mode);
@@ -12892,19 +13565,21 @@ innobase_rename_table(
DEBUG_SYNC_C("innodb_rename_table_ready");
- trx_start_if_not_started(trx);
+ trx_start_if_not_started(trx, true);
+ ut_ad(trx->will_lock > 0);
- /* Serialize data dictionary operations with dictionary mutex:
- no deadlocks can occur then in these operations. */
-
- row_mysql_lock_data_dictionary(trx);
+ if (commit) {
+ /* Serialize data dictionary operations with dictionary mutex:
+ no deadlocks can occur then in these operations. */
+ row_mysql_lock_data_dictionary(trx);
+ }
- dict_table_t* table = dict_table_open_on_name(norm_from, TRUE, FALSE,
- DICT_ERR_IGNORE_NONE);
+ dict_table_t* table = dict_table_open_on_name(
+ norm_from, TRUE, FALSE, DICT_ERR_IGNORE_FK_NOKEY);
/* Since DICT_BG_YIELD has sleep for 250 milliseconds,
Convert lock_wait_timeout unit from second to 250 milliseconds */
- long int lock_wait_timeout = thd_lock_wait_timeout(thd) * 4;
+ long int lock_wait_timeout = thd_lock_wait_timeout(trx->mysql_thd) * 4;
if (table != NULL) {
for (dict_index_t* index = dict_table_get_first_index(table);
index != NULL;
@@ -12925,8 +13600,7 @@ innobase_rename_table(
/* FTS sync is in progress. We shall timeout this operation */
if (lock_wait_timeout < 0) {
error = DB_LOCK_WAIT_TIMEOUT;
- row_mysql_unlock_data_dictionary(trx);
- DBUG_RETURN(error);
+ goto func_exit;
}
/* Transaction must be flagged as a locking transaction or it hasn't
@@ -12934,22 +13608,17 @@ innobase_rename_table(
ut_a(trx->will_lock > 0);
- error = row_rename_table_for_mysql(
- norm_from, norm_to, trx, TRUE);
+ error = row_rename_table_for_mysql(norm_from, norm_to, trx, commit,
+ use_fk);
if (error != DB_SUCCESS) {
if (error == DB_TABLE_NOT_FOUND
&& innobase_get_lower_case_table_names() == 1) {
- char* is_part = NULL;
-#ifdef __WIN__
- is_part = strstr(norm_from, "#p#");
-#else
- is_part = strstr(norm_from, "#P#");
-#endif /* __WIN__ */
+ char* is_part = is_partition(norm_from);
if (is_part) {
char par_case_name[FN_REFLEN];
-#ifndef __WIN__
+#ifndef _WIN32
/* Check for the table using lower
case name, including the partition
separator "P" */
@@ -12960,36 +13629,40 @@ innobase_rename_table(
whether there exists table name in
system table whose name is
not being normalized to lower case */
- normalize_table_name_low(
+ create_table_info_t::normalize_table_name_low(
par_case_name, from, FALSE);
-#endif
- trx_start_if_not_started(trx);
+#endif /* _WIN32 */
+ trx_start_if_not_started(trx, true);
error = row_rename_table_for_mysql(
- par_case_name, norm_to, trx, TRUE);
+ par_case_name, norm_to, trx,
+ true, false);
}
}
if (error == DB_SUCCESS) {
-#ifndef __WIN__
- sql_print_warning("Rename partition table %s "
- "succeeds after converting to lower "
- "case. The table may have "
- "been moved from a case "
- "in-sensitive file system.\n",
+#ifndef _WIN32
+ sql_print_warning("Rename partition table %s"
+ " succeeds after converting to lower"
+ " case. The table may have"
+ " been moved from a case"
+ " in-sensitive file system.\n",
norm_from);
#else
- sql_print_warning("Rename partition table %s "
- "succeeds after skipping the step to "
- "lower case the table name. "
- "The table may have been "
- "moved from a case sensitive "
- "file system.\n",
+ sql_print_warning("Rename partition table %s"
+ " succeeds after skipping the step to"
+ " lower case the table name."
+ " The table may have been"
+ " moved from a case sensitive"
+ " file system.\n",
norm_from);
-#endif /* __WIN__ */
+#endif /* _WIN32 */
}
}
- row_mysql_unlock_data_dictionary(trx);
+func_exit:
+ if (commit) {
+ row_mysql_unlock_data_dictionary(trx);
+ }
/* Flush the log to reduce probability that the .frm
files and the InnoDB data dictionary get out-of-sync
@@ -13000,20 +13673,150 @@ innobase_rename_table(
DBUG_RETURN(error);
}
+/** TRUNCATE TABLE
+@return error code
+@retval 0 on success */
+int ha_innobase::truncate()
+{
+ DBUG_ENTER("ha_innobase::truncate");
+
+ if (high_level_read_only) {
+ DBUG_RETURN(HA_ERR_TABLE_READONLY);
+ }
+
+ update_thd();
+
+ if (!srv_safe_truncate) {
+ if (!trx_is_started(m_prebuilt->trx)) {
+ ++m_prebuilt->trx->will_lock;
+ }
+
+ dberr_t err = row_truncate_table_for_mysql(
+ m_prebuilt->table, m_prebuilt->trx);
+
+ int error;
+
+ switch (err) {
+ case DB_TABLESPACE_DELETED:
+ case DB_TABLESPACE_NOT_FOUND:
+ ib_senderrf(
+ m_prebuilt->trx->mysql_thd, IB_LOG_LEVEL_ERROR,
+ err == DB_TABLESPACE_DELETED
+ ? ER_TABLESPACE_DISCARDED
+ : ER_TABLESPACE_MISSING,
+ table->s->table_name.str);
+ error = HA_ERR_TABLESPACE_MISSING;
+ break;
+ default:
+ error = convert_error_code_to_mysql(
+ err, m_prebuilt->table->flags,
+ m_prebuilt->trx->mysql_thd);
+ break;
+ }
+ table->status = STATUS_NOT_FOUND;
+ DBUG_RETURN(error);
+ }
+
+ HA_CREATE_INFO info;
+ mem_heap_t* heap = mem_heap_create(1000);
+ dict_table_t* ib_table = m_prebuilt->table;
+ const time_t update_time = ib_table->update_time;
+ const ulint stored_lock = m_prebuilt->stored_select_lock_type;
+ info.init();
+ update_create_info_from_table(&info, table);
+
+ if (dict_table_is_temporary(ib_table)) {
+ info.options|= HA_LEX_CREATE_TMP_TABLE;
+ } else {
+ dict_get_and_save_data_dir_path(ib_table, false);
+ }
+
+ char* data_file_name = ib_table->data_dir_path;
+
+ if (data_file_name) {
+ info.data_file_name = data_file_name
+ = mem_heap_strdup(heap, data_file_name);
+ }
+
+ const char* temp_name = dict_mem_create_temporary_tablename(
+ heap, ib_table->name.m_name, ib_table->id);
+ const char* name = mem_heap_strdup(heap, ib_table->name.m_name);
+ trx_t* trx = innobase_trx_allocate(m_user_thd);
+
+ ++trx->will_lock;
+ trx_set_dict_operation(trx, TRX_DICT_OP_TABLE);
+ row_mysql_lock_data_dictionary(trx);
+ int err = convert_error_code_to_mysql(
+ innobase_rename_table(trx, ib_table->name.m_name, temp_name,
+ false, false),
+ ib_table->flags, m_user_thd);
+ if (err) {
+ trx_rollback_for_mysql(trx);
+ row_mysql_unlock_data_dictionary(trx);
+ } else {
+ switch (dict_tf_get_rec_format(ib_table->flags)) {
+ case REC_FORMAT_REDUNDANT:
+ info.row_type = ROW_TYPE_REDUNDANT;
+ break;
+ case REC_FORMAT_COMPACT:
+ info.row_type = ROW_TYPE_COMPACT;
+ break;
+ case REC_FORMAT_COMPRESSED:
+ info.row_type = ROW_TYPE_COMPRESSED;
+ break;
+ case REC_FORMAT_DYNAMIC:
+ info.row_type = ROW_TYPE_DYNAMIC;
+ break;
+ }
+
+ err = create(name, table, &info,
+ ib_table->is_temporary()
+ || dict_table_is_file_per_table(ib_table), trx);
+ }
+
+ trx_free_for_mysql(trx);
+
+ if (!err) {
+ /* Reopen the newly created table, and drop the
+ original table that was renamed to temp_name. */
+
+ row_prebuilt_t* prebuilt = m_prebuilt;
+ uchar* upd_buf = m_upd_buf;
+ ulint upd_buf_size = m_upd_buf_size;
+ /* Mimic ha_innobase::close(). */
+ m_prebuilt = NULL;
+ m_upd_buf = NULL;
+ m_upd_buf_size = 0;
+ err = open(name, 0, 0);
+ if (!err) {
+ m_prebuilt->stored_select_lock_type = stored_lock;
+ m_prebuilt->table->update_time = update_time;
+ row_prebuilt_free(prebuilt, FALSE);
+ delete_table(temp_name, SQLCOM_TRUNCATE);
+ my_free(upd_buf);
+ } else {
+ /* Revert to the old table before truncation. */
+ m_prebuilt = prebuilt;
+ m_upd_buf = upd_buf;
+ m_upd_buf_size = upd_buf_size;
+ }
+ }
+
+ mem_heap_free(heap);
+ DBUG_RETURN(err);
+}
+
/*********************************************************************//**
Renames an InnoDB table.
-@return 0 or error code */
-UNIV_INTERN
+@return 0 or error code */
+
int
ha_innobase::rename_table(
/*======================*/
const char* from, /*!< in: old name of the table */
const char* to) /*!< in: new name of the table */
{
- trx_t* trx;
- dberr_t error;
- trx_t* parent_trx;
- THD* thd = ha_thd();
+ THD* thd = ha_thd();
DBUG_ENTER("ha_innobase::rename_table");
@@ -13022,27 +13825,18 @@ ha_innobase::rename_table(
DBUG_RETURN(HA_ERR_TABLE_READONLY);
}
- /* Get the transaction associated with the current thd, or create one
- if not yet created */
-
- parent_trx = check_trx_exists(thd);
-
- /* In case MySQL calls this in the middle of a SELECT query, release
- possible adaptive hash latch to avoid deadlocks of threads */
-
- trx_search_latch_release_if_reserved(parent_trx);
-
- trx = innobase_trx_allocate(thd);
+ trx_t* trx = innobase_trx_allocate(thd);
/* We are doing a DDL operation. */
++trx->will_lock;
trx_set_dict_operation(trx, TRX_DICT_OP_INDEX);
- error = innobase_rename_table(thd, trx, from, to);
+ dberr_t error = innobase_rename_table(trx, from, to, true, true);
DEBUG_SYNC(thd, "after_innobase_rename_table");
innobase_commit_low(trx);
+
trx_free_for_mysql(trx);
if (error == DB_SUCCESS) {
@@ -13058,8 +13852,7 @@ ha_innobase::rename_table(
errstr, sizeof(errstr));
if (ret != DB_SUCCESS) {
- ut_print_timestamp(stderr);
- fprintf(stderr, " InnoDB: %s\n", errstr);
+ ib::error() << errstr;
push_warning(thd, Sql_condition::WARN_LEVEL_WARN,
ER_LOCK_WAIT_TIMEOUT, errstr);
@@ -13093,8 +13886,8 @@ ha_innobase::rename_table(
/*********************************************************************//**
Estimates the number of index records in a range.
-@return estimated number of rows */
-UNIV_INTERN
+@return estimated number of rows */
+
ha_rows
ha_innobase::records_in_range(
/*==========================*/
@@ -13108,21 +13901,16 @@ ha_innobase::records_in_range(
dict_index_t* index;
dtuple_t* range_start;
dtuple_t* range_end;
- ib_int64_t n_rows;
- ulint mode1;
- ulint mode2;
+ int64_t n_rows;
+ page_cur_mode_t mode1;
+ page_cur_mode_t mode2;
mem_heap_t* heap;
DBUG_ENTER("records_in_range");
- ut_a(prebuilt->trx == thd_to_trx(ha_thd()));
-
- prebuilt->trx->op_info = (char*)"estimating records in index range";
+ ut_a(m_prebuilt->trx == thd_to_trx(ha_thd()));
- /* In case MySQL calls this in the middle of a SELECT query, release
- possible adaptive hash latch to avoid deadlocks of threads */
-
- trx_search_latch_release_if_reserved(prebuilt->trx);
+ m_prebuilt->trx->op_info = "estimating records in index range";
active_index = keynr;
@@ -13133,19 +13921,19 @@ ha_innobase::records_in_range(
/* There exists possibility of not being able to find requested
index due to inconsistency between MySQL and InoDB dictionary info.
Necessary message should have been printed in innobase_get_index() */
- if (dict_table_is_discarded(prebuilt->table)) {
+ if (dict_table_is_discarded(m_prebuilt->table)) {
n_rows = HA_POS_ERROR;
goto func_exit;
}
- if (UNIV_UNLIKELY(!index)) {
+ if (!index) {
n_rows = HA_POS_ERROR;
goto func_exit;
}
- if (dict_index_is_corrupted(index)) {
+ if (index->is_corrupted()) {
n_rows = HA_ERR_INDEX_CORRUPT;
goto func_exit;
}
- if (UNIV_UNLIKELY(!row_merge_is_index_usable(prebuilt->trx, index))) {
+ if (!row_merge_is_index_usable(m_prebuilt->trx, index)) {
n_rows = HA_ERR_TABLE_DEF_CHANGED;
goto func_exit;
}
@@ -13160,41 +13948,47 @@ ha_innobase::records_in_range(
dict_index_copy_types(range_end, index, key->ext_key_parts);
row_sel_convert_mysql_key_to_innobase(
- range_start,
- prebuilt->srch_key_val1,
- prebuilt->srch_key_val_len,
- index,
- (byte*) (min_key ? min_key->key :
- (const uchar*) 0),
- (ulint) (min_key ? min_key->length : 0),
- prebuilt->trx);
+ range_start,
+ m_prebuilt->srch_key_val1,
+ m_prebuilt->srch_key_val_len,
+ index,
+ (byte*) (min_key ? min_key->key : (const uchar*) 0),
+ (ulint) (min_key ? min_key->length : 0),
+ m_prebuilt->trx);
+
DBUG_ASSERT(min_key
? range_start->n_fields > 0
: range_start->n_fields == 0);
row_sel_convert_mysql_key_to_innobase(
- range_end,
- prebuilt->srch_key_val2,
- prebuilt->srch_key_val_len,
- index,
- (byte*) (max_key ? max_key->key :
- (const uchar*) 0),
- (ulint) (max_key ? max_key->length : 0),
- prebuilt->trx);
+ range_end,
+ m_prebuilt->srch_key_val2,
+ m_prebuilt->srch_key_val_len,
+ index,
+ (byte*) (max_key ? max_key->key : (const uchar*) 0),
+ (ulint) (max_key ? max_key->length : 0),
+ m_prebuilt->trx);
+
DBUG_ASSERT(max_key
? range_end->n_fields > 0
: range_end->n_fields == 0);
- mode1 = convert_search_mode_to_innobase(min_key ? min_key->flag :
- HA_READ_KEY_EXACT);
- mode2 = convert_search_mode_to_innobase(max_key ? max_key->flag :
- HA_READ_KEY_EXACT);
+ mode1 = convert_search_mode_to_innobase(
+ min_key ? min_key->flag : HA_READ_KEY_EXACT);
+
+ mode2 = convert_search_mode_to_innobase(
+ max_key ? max_key->flag : HA_READ_KEY_EXACT);
if (mode1 != PAGE_CUR_UNSUPP && mode2 != PAGE_CUR_UNSUPP) {
- n_rows = btr_estimate_n_rows_in_range(index, range_start,
- mode1, range_end,
- mode2);
+ if (dict_index_is_spatial(index)) {
+ /*Only min_key used in spatial index. */
+ n_rows = rtr_estimate_n_rows_in_range(
+ index, range_start, mode1);
+ } else {
+ n_rows = btr_estimate_n_rows_in_range(
+ index, range_start, mode1, range_end, mode2);
+ }
} else {
n_rows = HA_POS_ERROR;
@@ -13202,9 +13996,18 @@ ha_innobase::records_in_range(
mem_heap_free(heap);
+ DBUG_EXECUTE_IF(
+ "print_btr_estimate_n_rows_in_range_return_value",
+ push_warning_printf(
+ ha_thd(), Sql_condition::WARN_LEVEL_WARN,
+ ER_NO_DEFAULT,
+ "btr_estimate_n_rows_in_range(): %lld",
+ (longlong) n_rows);
+ );
+
func_exit:
- prebuilt->trx->op_info = (char*)"";
+ m_prebuilt->trx->op_info = (char*)"";
/* The MySQL optimizer seems to believe an estimate of 0 rows is
always accurate and may return the result 'Empty set' based on that.
@@ -13222,8 +14025,8 @@ func_exit:
/*********************************************************************//**
Gives an UPPER BOUND to the number of rows in a table. This is used in
filesort.cc.
-@return upper bound of rows */
-UNIV_INTERN
+@return upper bound of rows */
+
ha_rows
ha_innobase::estimate_rows_upper_bound()
/*====================================*/
@@ -13231,7 +14034,6 @@ ha_innobase::estimate_rows_upper_bound()
const dict_index_t* index;
ulonglong estimate;
ulonglong local_data_file_length;
- ulint stat_n_leaf_pages;
DBUG_ENTER("estimate_rows_upper_bound");
@@ -13241,16 +14043,11 @@ ha_innobase::estimate_rows_upper_bound()
update_thd(ha_thd());
- prebuilt->trx->op_info = "calculating upper bound for table rows";
-
- /* In case MySQL calls this in the middle of a SELECT query, release
- possible adaptive hash latch to avoid deadlocks of threads */
+ m_prebuilt->trx->op_info = "calculating upper bound for table rows";
- trx_search_latch_release_if_reserved(prebuilt->trx);
+ index = dict_table_get_first_index(m_prebuilt->table);
- index = dict_table_get_first_index(prebuilt->table);
-
- stat_n_leaf_pages = index->stat_n_leaf_pages;
+ ulint stat_n_leaf_pages = index->stat_n_leaf_pages;
ut_a(stat_n_leaf_pages > 0);
@@ -13265,7 +14062,7 @@ ha_innobase::estimate_rows_upper_bound()
estimate = 2 * local_data_file_length
/ dict_index_calc_min_rec_len(index);
- prebuilt->trx->op_info = "";
+ m_prebuilt->trx->op_info = "";
/* Set num_rows less than MERGEBUFF to simulate the case where we do
not have enough space to merge the externally sorted file blocks. */
@@ -13281,8 +14078,8 @@ ha_innobase::estimate_rows_upper_bound()
How many seeks it will take to read through the table. This is to be
comparable to the number returned by records_in_range so that we can
decide if we should scan the table or use keys.
-@return estimated time measured in disk seeks */
-UNIV_INTERN
+@return estimated time measured in disk seeks */
+
double
ha_innobase::scan_time()
/*====================*/
@@ -13296,26 +14093,22 @@ ha_innobase::scan_time()
it we could end up returning uninitialized value to the caller,
which in the worst case could make some query plan go bogus or
issue a Valgrind warning. */
-#if 0
- /* avoid potential lock order violation with dict_table_stats_lock()
- below */
- update_thd(ha_thd());
- trx_search_latch_release_if_reserved(prebuilt->trx);
-#endif
+ if (m_prebuilt == NULL) {
+ /* In case of derived table, Optimizer will try to fetch stat
+ for table even before table is create or open. In such
+ cases return default value of 1.
+ TODO: This will be further improved to return some approximate
+ estimate but that would also needs pre-population of stats
+ structure. As of now approach is in sync with MyISAM. */
+ return(ulonglong2double(stats.data_file_length) / IO_SIZE + 2);
+ }
ulint stat_clustered_index_size;
-#if 0
- dict_table_stats_lock(prebuilt->table, RW_S_LATCH);
-#endif
-
- ut_a(prebuilt->table->stat_initialized);
+ ut_a(m_prebuilt->table->stat_initialized);
- stat_clustered_index_size = prebuilt->table->stat_clustered_index_size;
-
-#if 0
- dict_table_stats_unlock(prebuilt->table, RW_S_LATCH);
-#endif
+ stat_clustered_index_size =
+ m_prebuilt->table->stat_clustered_index_size;
return((double) stat_clustered_index_size);
}
@@ -13323,8 +14116,8 @@ ha_innobase::scan_time()
/******************************************************************//**
Calculate the time it takes to read a set of ranges through an index
This enables us to optimise reads for clustered indexes.
-@return estimated time measured in disk seeks */
-UNIV_INTERN
+@return estimated time measured in disk seeks */
+
double
ha_innobase::read_time(
/*===================*/
@@ -13333,7 +14126,6 @@ ha_innobase::read_time(
ha_rows rows) /*!< in: estimated number of rows in the ranges */
{
ha_rows total_rows;
- double time_for_scan;
if (index != table->s->primary_key) {
/* Not clustered */
@@ -13343,7 +14135,7 @@ ha_innobase::read_time(
/* Assume that the read time is proportional to the scan time for all
rows + at most one seek per range. */
- time_for_scan = scan_time();
+ double time_for_scan = scan_time();
if ((total_rows = estimate_rows_upper_bound()) < rows) {
@@ -13355,7 +14147,7 @@ ha_innobase::read_time(
/******************************************************************//**
Return the size of the InnoDB memory buffer. */
-UNIV_INTERN
+
longlong
ha_innobase::get_memory_buffer_size() const
/*=======================================*/
@@ -13363,32 +14155,31 @@ ha_innobase::get_memory_buffer_size() const
return(innobase_buffer_pool_size);
}
+/** Update the system variable with the given value of the InnoDB
+buffer pool size.
+@param[in] buf_pool_size given value of buffer pool size.*/
+void
+innodb_set_buf_pool_size(ulonglong buf_pool_size)
+{
+ innobase_buffer_pool_size = buf_pool_size;
+}
+
/*********************************************************************//**
-Calculates the key number used inside MySQL for an Innobase index. We will
-first check the "index translation table" for a match of the index to get
-the index number. If there does not exist an "index translation table",
-or not able to find the index in the translation table, then we will fall back
-to the traditional way of looping through dict_index_t list to find a
-match. In this case, we have to take into account if we generated a
-default clustered index for the table
+Calculates the key number used inside MySQL for an Innobase index.
@return the key number used inside MySQL */
static
int
innobase_get_mysql_key_number_for_index(
/*====================================*/
- INNOBASE_SHARE* share, /*!< in: share structure for index
- translation table. */
const TABLE* table, /*!< in: table in MySQL data
dictionary */
- dict_table_t* ib_table,/*!< in: table in Innodb data
+ dict_table_t* ib_table,/*!< in: table in InnoDB data
dictionary */
const dict_index_t* index) /*!< in: index */
{
const dict_index_t* ind;
unsigned int i;
- ut_a(index);
-
/* If index does not belong to the table object of share structure
(ib_table comes from the share structure) search the index->table
object instead */
@@ -13401,7 +14192,8 @@ innobase_get_mysql_key_number_for_index(
i++;
}
- if (row_table_got_default_clust_index(index->table)) {
+ if (dict_index_is_clust(index)
+ && dict_index_is_auto_gen_clust(index)) {
ut_a(i > 0);
i--;
}
@@ -13409,27 +14201,8 @@ innobase_get_mysql_key_number_for_index(
return(i);
}
- /* If index translation table exists, we will first check
- the index through index translation table for a match. */
- if (share->idx_trans_tbl.index_mapping) {
- for (i = 0; i < share->idx_trans_tbl.index_count; i++) {
- if (share->idx_trans_tbl.index_mapping[i] == index) {
- return(i);
- }
- }
-
- /* Print an error message if we cannot find the index
- in the "index translation table". */
- if (*index->name != TEMP_INDEX_PREFIX) {
- sql_print_error("Cannot find index %s in InnoDB index "
- "translation table.", index->name);
- }
- }
-
- /* If we do not have an "index translation table", or not able
- to find the index in the translation table, we'll directly find
- matching index with information from mysql TABLE structure and
- InnoDB dict_index_t list */
+ /* Directly find matching index with information from mysql TABLE
+ structure and InnoDB dict_index_t list */
for (i = 0; i < table->s->keys; i++) {
ind = dict_table_get_index_on_name(
ib_table, table->key_info[i].name);
@@ -13447,12 +14220,13 @@ innobase_get_mysql_key_number_for_index(
/* Temp index is internal to InnoDB, that is
not present in the MySQL index list, so no
need to print such mismatch warning. */
- if (*(index->name) != TEMP_INDEX_PREFIX) {
+ if (index->is_committed()) {
sql_print_warning(
- "Find index %s in InnoDB index list "
- "but not its MySQL index number "
- "It could be an InnoDB internal index.",
- index->name);
+ "Found index %s in InnoDB index list"
+ " but not its MariaDB index number."
+ " It could be an InnoDB internal"
+ " index.",
+ index->name());
}
return(-1);
}
@@ -13467,8 +14241,7 @@ innobase_get_mysql_key_number_for_index(
Calculate Record Per Key value. Need to exclude the NULL value if
innodb_stats_method is set to "nulls_ignored"
@return estimated record per key value */
-static
-ha_rows
+rec_per_key_t
innodb_rec_per_key(
/*===============*/
dict_index_t* index, /*!< in: dict_index_t structure */
@@ -13476,18 +14249,25 @@ innodb_rec_per_key(
calculating rec per key */
ha_rows records) /*!< in: estimated total records */
{
- ha_rows rec_per_key;
+ rec_per_key_t rec_per_key;
ib_uint64_t n_diff;
ut_a(index->table->stat_initialized);
ut_ad(i < dict_index_get_n_unique(index));
+ ut_ad(!dict_index_is_spatial(index));
+
+ if (records == 0) {
+ /* "Records per key" is meaningless for empty tables.
+ Return 1.0 because that is most convenient to the Optimizer. */
+ return(1.0);
+ }
n_diff = index->stat_n_diff_key_vals[i];
if (n_diff == 0) {
- rec_per_key = records;
+ rec_per_key = static_cast<rec_per_key_t>(records);
} else if (srv_innodb_stats_method == SRV_STATS_NULLS_IGNORED) {
ib_uint64_t n_null;
ib_uint64_t n_non_null;
@@ -13510,16 +14290,23 @@ innodb_rec_per_key(
consider that the table consists mostly of NULL value.
Set rec_per_key to 1. */
if (n_diff <= n_null) {
- rec_per_key = 1;
+ rec_per_key = 1.0;
} else {
/* Need to exclude rows with NULL values from
rec_per_key calculation */
- rec_per_key = (ha_rows)
- ((records - n_null) / (n_diff - n_null));
+ rec_per_key
+ = static_cast<rec_per_key_t>(records - n_null)
+ / (n_diff - n_null);
}
} else {
DEBUG_SYNC_C("after_checking_for_0");
- rec_per_key = (ha_rows) (records / n_diff);
+ rec_per_key = static_cast<rec_per_key_t>(records) / n_diff;
+ }
+
+ if (rec_per_key < 1.0) {
+ /* Values below 1.0 are meaningless and must be due to the
+ stats being imprecise. */
+ rec_per_key = 1.0;
}
return(rec_per_key);
@@ -13529,7 +14316,7 @@ innodb_rec_per_key(
Returns statistics information of the table to the MySQL interpreter,
in various fields of the handle object.
@return HA_ERR_* error code or 0 */
-UNIV_INTERN
+
int
ha_innobase::info_low(
/*==================*/
@@ -13537,12 +14324,14 @@ ha_innobase::info_low(
bool is_analyze)
{
dict_table_t* ib_table;
- ha_rows rec_per_key;
ib_uint64_t n_rows;
+ char path[FN_REFLEN];
os_file_stat_t stat_info;
DBUG_ENTER("info");
+ DEBUG_SYNC_C("ha_innobase_info_low");
+
/* If we are forcing recovery at a high level, we will suppress
statistics calculation on tables, because that may crash the
server if an index is badly corrupted. */
@@ -13553,15 +14342,10 @@ ha_innobase::info_low(
update_thd(ha_thd());
- /* In case MySQL calls this in the middle of a SELECT query, release
- possible adaptive hash latch to avoid deadlocks of threads */
-
- prebuilt->trx->op_info = (char*)"returning various info to MySQL";
-
- trx_search_latch_release_if_reserved(prebuilt->trx);
+ m_prebuilt->trx->op_info = "returning various info to MariaDB";
- ib_table = prebuilt->table;
- DBUG_ASSERT(ib_table->n_ref_count > 0);
+ ib_table = m_prebuilt->table;
+ DBUG_ASSERT(ib_table->get_ref_count() > 0);
if (flag & HA_STATUS_TIME) {
if (is_analyze || innobase_stats_on_metadata) {
@@ -13569,7 +14353,7 @@ ha_innobase::info_low(
dict_stats_upd_option_t opt;
dberr_t ret;
- prebuilt->trx->op_info = "updating table statistics";
+ m_prebuilt->trx->op_info = "updating table statistics";
if (dict_stats_is_persistent_enabled(ib_table)) {
@@ -13588,19 +14372,20 @@ ha_innobase::info_low(
ret = dict_stats_update(ib_table, opt);
if (ret != DB_SUCCESS) {
- prebuilt->trx->op_info = "";
+ m_prebuilt->trx->op_info = "";
DBUG_RETURN(HA_ERR_GENERIC);
}
- prebuilt->trx->op_info =
- "returning various info to MySQL";
+ m_prebuilt->trx->op_info =
+ "returning various info to MariaDB";
}
+
+ stats.update_time = (ulong) ib_table->update_time;
}
if (flag & HA_STATUS_VARIABLE) {
- ulint page_size;
ulint stat_clustered_index_size;
ulint stat_sum_of_other_index_sizes;
@@ -13640,32 +14425,30 @@ ha_innobase::info_low(
n_rows can not be 0 unless the table is empty, set to 1
instead. The original problem of bug#29507 is actually
fixed in the server code. */
- if (thd_sql_command(user_thd) == SQLCOM_TRUNCATE) {
+ if (thd_sql_command(m_user_thd) == SQLCOM_TRUNCATE) {
n_rows = 1;
- /* We need to reset the prebuilt value too, otherwise
+ /* We need to reset the m_prebuilt value too, otherwise
checks for values greater than the last value written
to the table will fail and the autoinc counter will
not be updated. This will force write_row() into
attempting an update of the table's AUTOINC counter. */
- prebuilt->autoinc_last_value = 0;
+ m_prebuilt->autoinc_last_value = 0;
}
- page_size = dict_table_zip_size(ib_table);
- if (page_size == 0) {
- page_size = UNIV_PAGE_SIZE;
- }
+ const page_size_t& page_size
+ = dict_table_page_size(ib_table);
stats.records = (ha_rows) n_rows;
stats.deleted = 0;
stats.data_file_length
= ((ulonglong) stat_clustered_index_size)
- * page_size;
+ * page_size.physical();
stats.index_file_length
= ((ulonglong) stat_sum_of_other_index_sizes)
- * page_size;
+ * page_size.physical();
/* Since fsp_get_available_space_in_free_extents() is
acquiring latches inside InnoDB, we do not call it if we
@@ -13680,19 +14463,19 @@ ha_innobase::info_low(
the ha_statistics' constructor. Also we only
need delete_length to be set when
HA_STATUS_VARIABLE_EXTRA is set */
- } else if (UNIV_UNLIKELY
- (srv_force_recovery >= SRV_FORCE_NO_IBUF_MERGE)) {
+ } else if (srv_force_recovery >= SRV_FORCE_NO_IBUF_MERGE) {
/* Avoid accessing the tablespace if
innodb_crash_recovery is set to a high value. */
stats.delete_length = 0;
} else {
- ullint avail_space;
+ uintmax_t avail_space;
avail_space = fsp_get_available_space_in_free_extents(
ib_table->space);
- if (avail_space == ULLINT_UNDEFINED) {
+ if (avail_space == UINTMAX_MAX) {
THD* thd;
+ char errbuf[MYSYS_STRERROR_SIZE];
thd = ha_thd();
@@ -13700,13 +14483,15 @@ ha_innobase::info_low(
thd,
Sql_condition::WARN_LEVEL_WARN,
ER_CANT_GET_STAT,
- "InnoDB: Trying to get the free "
- "space for table %s but its "
- "tablespace has been discarded or "
- "the .ibd file is missing. Setting "
- "the free space to zero. "
- "(errno: %M)",
- ib_table->name, errno);
+ "InnoDB: Trying to get the free"
+ " space for table %s but its"
+ " tablespace has been discarded or"
+ " the .ibd file is missing. Setting"
+ " the free space to zero."
+ " (errno: %d - %s)",
+ ib_table->name.m_name, errno,
+ my_strerror(errbuf, sizeof(errbuf),
+ errno));
stats.delete_length = 0;
} else {
@@ -13715,7 +14500,7 @@ ha_innobase::info_low(
}
stats.check_time = 0;
- stats.mrr_length_per_rec= ref_length + 8; // 8 = max(sizeof(void *));
+ stats.mrr_length_per_rec= ref_length + 8; // 8 = max(sizeof(void *));
if (stats.records == 0) {
stats.mean_rec_length = 0;
@@ -13727,12 +14512,11 @@ ha_innobase::info_low(
if (flag & HA_STATUS_CONST) {
ulong i;
- char path[FN_REFLEN];
/* Verify the number of index in InnoDB and MySQL
- matches up. If prebuilt->clust_index_was_generated
+ matches up. If m_prebuilt->clust_index_was_generated
holds, InnoDB defines GEN_CLUST_INDEX internally */
ulint num_innodb_index = UT_LIST_GET_LEN(ib_table->indexes)
- - prebuilt->clust_index_was_generated;
+ - m_prebuilt->clust_index_was_generated;
if (table->s->keys < num_innodb_index) {
/* If there are too many indexes defined
inside InnoDB, ignore those that are being
@@ -13752,7 +14536,7 @@ ha_innobase::info_low(
time frame, dict_index_is_online_ddl()
would not hold and the index would
still not be included in TABLE_SHARE. */
- if (*index->name == TEMP_INDEX_PREFIX) {
+ if (!index->is_committed()) {
num_innodb_index--;
}
}
@@ -13767,7 +14551,7 @@ ha_innobase::info_low(
if (table->s->keys != num_innodb_index) {
ib_table->dict_frm_mismatch = DICT_FRM_INCONSISTENT_KEYS;
- ib_push_frm_error(user_thd, ib_table, table, num_innodb_index, true);
+ ib_push_frm_error(m_user_thd, ib_table, table, num_innodb_index, true);
}
if (!(flag & HA_STATUS_NO_LOCK)) {
@@ -13778,111 +14562,96 @@ ha_innobase::info_low(
for (i = 0; i < table->s->keys; i++) {
ulong j;
- rec_per_key = 1;
- /* We could get index quickly through internal
- index mapping with the index translation table.
- The identity of index (match up index name with
- that of table->key_info[i]) is already verified in
- innobase_get_index(). */
+
dict_index_t* index = innobase_get_index(i);
if (index == NULL) {
ib_table->dict_frm_mismatch = DICT_FRM_INCONSISTENT_KEYS;
- ib_push_frm_error(user_thd, ib_table, table, num_innodb_index, true);
+ ib_push_frm_error(m_user_thd, ib_table, table, num_innodb_index, true);
break;
}
- for (j = 0; j < table->key_info[i].ext_key_parts; j++) {
+ KEY* key = &table->key_info[i];
+
+ for (j = 0; j < key->ext_key_parts; j++) {
+
+ if ((key->flags & HA_FULLTEXT)
+ || (key->flags & HA_SPATIAL)) {
- if (table->key_info[i].flags & HA_FULLTEXT) {
- /* The whole concept has no validity
- for FTS indexes. */
- table->key_info[i].rec_per_key[j] = 1;
+ /* The record per key does not apply to
+ FTS or Spatial indexes. */
+ /*
+ key->rec_per_key[j] = 1;
+ key->set_records_per_key(j, 1.0);
+ */
continue;
}
if (j + 1 > index->n_uniq) {
sql_print_error(
- "Index %s of %s has %lu columns"
+ "Index %s of %s has %u columns"
" unique inside InnoDB, but "
"MySQL is asking statistics for"
" %lu columns. Have you mixed "
"up .frm files from different "
- "installations? "
- "See " REFMAN
- "innodb-troubleshooting.html\n",
- index->name,
- ib_table->name,
- (unsigned long)
- index->n_uniq, j + 1);
+ " installations? %s",
+ index->name(),
+ ib_table->name.m_name,
+ index->n_uniq, j + 1,
+ TROUBLESHOOTING_MSG);
break;
}
- DBUG_EXECUTE_IF("ib_ha_innodb_stat_not_initialized",
- index->table->stat_initialized = FALSE;);
-
- if (!ib_table->stat_initialized ||
- (index->table != ib_table ||
- !index->table->stat_initialized)) {
- fprintf(stderr,
- "InnoDB: Warning: Index %s points to table %s"
- " and ib_table %s statistics is initialized %d "
- " but index table %s initialized %d "
- " mysql table is %s. Have you mixed "
- "up .frm files from different "
- "installations? "
- "See " REFMAN
- "innodb-troubleshooting.html\n",
- index->name,
- index->table->name,
- ib_table->name,
- ib_table->stat_initialized,
- index->table->name,
- index->table->stat_initialized,
- table->s->table_name.str
- );
-
- /* This is better than
- assert on below function */
- dict_stats_init(index->table);
- }
-
- rec_per_key = innodb_rec_per_key(
- index, j, stats.records);
+ /* innodb_rec_per_key() will use
+ index->stat_n_diff_key_vals[] and the value we
+ pass index->table->stat_n_rows. Both are
+ calculated by ANALYZE and by the background
+ stats gathering thread (which kicks in when too
+ much of the table has been changed). In
+ addition table->stat_n_rows is adjusted with
+ each DML (e.g. ++ on row insert). Those
+ adjustments are not MVCC'ed and not even
+ reversed on rollback. So,
+ index->stat_n_diff_key_vals[] and
+ index->table->stat_n_rows could have been
+ calculated at different time. This is
+ acceptable. */
+
+ ulong rec_per_key_int = static_cast<ulong>(
+ innodb_rec_per_key(index, j,
+ stats.records));
/* Since MySQL seems to favor table scans
too much over index searches, we pretend
index selectivity is 2 times better than
our estimate: */
- rec_per_key = rec_per_key / 2;
+ rec_per_key_int = rec_per_key_int / 2;
- if (rec_per_key == 0) {
- rec_per_key = 1;
+ if (rec_per_key_int == 0) {
+ rec_per_key_int = 1;
}
- table->key_info[i].rec_per_key[j] =
- rec_per_key >= ~(ulong) 0 ? ~(ulong) 0 :
- (ulong) rec_per_key;
+ key->rec_per_key[j] = rec_per_key_int;
}
-
}
if (!(flag & HA_STATUS_NO_LOCK)) {
dict_table_stats_unlock(ib_table, RW_S_LATCH);
}
- my_snprintf(path, sizeof(path), "%s/%s%s",
- mysql_data_home,
- table->s->normalized_path.str,
- reg_ext);
+ snprintf(path, sizeof(path), "%s/%s%s",
+ mysql_data_home, table->s->normalized_path.str,
+ reg_ext);
unpack_filename(path,path);
/* Note that we do not know the access time of the table,
nor the CHECK TABLE time, nor the UPDATE or INSERT time. */
- if (os_file_get_status(path, &stat_info, false) == DB_SUCCESS) {
+ if (os_file_get_status(
+ path, &stat_info, false,
+ srv_read_only_mode) == DB_SUCCESS) {
stats.create_time = (ulong) stat_info.ctime;
}
}
@@ -13890,25 +14659,24 @@ ha_innobase::info_low(
if (srv_force_recovery >= SRV_FORCE_NO_IBUF_MERGE) {
goto func_exit;
- }
- if (flag & HA_STATUS_ERRKEY) {
+ } else if (flag & HA_STATUS_ERRKEY) {
const dict_index_t* err_index;
- ut_a(prebuilt->trx);
- ut_a(prebuilt->trx->magic_n == TRX_MAGIC_N);
+ ut_a(m_prebuilt->trx);
+ ut_a(m_prebuilt->trx->magic_n == TRX_MAGIC_N);
- err_index = trx_get_error_info(prebuilt->trx);
+ err_index = trx_get_error_info(m_prebuilt->trx);
if (err_index) {
errkey = innobase_get_mysql_key_number_for_index(
- share, table, ib_table, err_index);
+ table, ib_table, err_index);
} else {
errkey = (unsigned int) (
- (prebuilt->trx->error_key_num
+ (m_prebuilt->trx->error_key_num
== ULINT_UNDEFINED)
? ~0
- : prebuilt->trx->error_key_num);
+ : m_prebuilt->trx->error_key_num);
}
}
@@ -13917,7 +14685,7 @@ ha_innobase::info_low(
}
func_exit:
- prebuilt->trx->op_info = (char*)"";
+ m_prebuilt->trx->op_info = (char*)"";
DBUG_RETURN(0);
}
@@ -13926,31 +14694,29 @@ func_exit:
Returns statistics information of the table to the MySQL interpreter,
in various fields of the handle object.
@return HA_ERR_* error code or 0 */
-UNIV_INTERN
+
int
ha_innobase::info(
/*==============*/
uint flag) /*!< in: what information is requested */
{
- return(this->info_low(flag, false /* not ANALYZE */));
+ return(info_low(flag, false /* not ANALYZE */));
}
-/**********************************************************************//**
+/*
Updates index cardinalities of the table, based on random dives into
each index tree. This does NOT calculate exact statistics on the table.
-@return HA_ADMIN_* error code or HA_ADMIN_OK */
-UNIV_INTERN
+@return HA_ADMIN_* error code or HA_ADMIN_OK */
+
int
ha_innobase::analyze(
/*=================*/
THD* thd, /*!< in: connection thread handle */
HA_CHECK_OPT* check_opt) /*!< in: currently ignored */
{
- int ret;
-
- /* Simply call this->info_low() with all the flags
+ /* Simply call info_low() with all the flags
and request recalculation of the statistics */
- ret = this->info_low(
+ int ret = info_low(
HA_STATUS_TIME | HA_STATUS_CONST | HA_STATUS_VARIABLE,
true /* this is ANALYZE */);
@@ -13961,17 +14727,132 @@ ha_innobase::analyze(
return(HA_ADMIN_OK);
}
+/*****************************************************************//**
+Defragment table.
+@return error number */
+UNIV_INTERN
+int
+ha_innobase::defragment_table(
+/*==========================*/
+ const char* name, /*!< in: table name */
+ const char* index_name, /*!< in: index name */
+ bool async) /*!< in: whether to wait until finish */
+{
+ char norm_name[FN_REFLEN];
+ dict_table_t* table = NULL;
+ dict_index_t* index = NULL;
+ ibool one_index = (index_name != 0);
+ int ret = 0;
+ dberr_t err = DB_SUCCESS;
+
+ if (!srv_defragment) {
+ return ER_FEATURE_DISABLED;
+ }
+
+ normalize_table_name(norm_name, name);
+
+ table = dict_table_open_on_name(norm_name, FALSE,
+ FALSE, DICT_ERR_IGNORE_FK_NOKEY);
+
+ for (index = dict_table_get_first_index(table); index;
+ index = dict_table_get_next_index(index)) {
+
+ if (index->is_corrupted()) {
+ continue;
+ }
+
+ if (dict_index_is_spatial(index)) {
+ /* Do not try to defragment spatial indexes,
+ because doing it properly would require
+ appropriate logic around the SSN (split
+ sequence number). */
+ continue;
+ }
+
+ if (index->page == FIL_NULL) {
+ /* Do not defragment auxiliary tables related
+ to FULLTEXT INDEX. */
+ ut_ad(index->type & DICT_FTS);
+ continue;
+ }
+
+ if (one_index && strcasecmp(index_name, index->name) != 0) {
+ continue;
+ }
+
+ if (btr_defragment_find_index(index)) {
+ // We borrow this error code. When the same index is
+ // already in the defragmentation queue, issue another
+ // defragmentation only introduces overhead. We return
+ // an error here to let the user know this is not
+ // necessary. Note that this will fail a query that's
+ // trying to defragment a full table if one of the
+ // indicies in that table is already in defragmentation.
+ // We choose this behavior so user is aware of this
+ // rather than silently defragment other indicies of
+ // that table.
+ ret = ER_SP_ALREADY_EXISTS;
+ break;
+ }
+
+ os_event_t event = btr_defragment_add_index(index, async, &err);
+
+ if (err != DB_SUCCESS) {
+ push_warning_printf(
+ current_thd,
+ Sql_condition::WARN_LEVEL_WARN,
+ ER_NO_SUCH_TABLE,
+ "Table %s is encrypted but encryption service or"
+ " used key_id is not available. "
+ " Can't continue checking table.",
+ index->table->name.m_name);
+
+ ret = convert_error_code_to_mysql(err, 0, current_thd);
+ break;
+ }
+
+ if (!async && event) {
+ while(os_event_wait_time(event, 1000000)) {
+ if (thd_killed(current_thd)) {
+ btr_defragment_remove_index(index);
+ ret = ER_QUERY_INTERRUPTED;
+ break;
+ }
+ }
+ os_event_destroy(event);
+ }
+
+ if (ret) {
+ break;
+ }
+
+ if (one_index) {
+ one_index = FALSE;
+ break;
+ }
+ }
+
+ dict_table_close(table, FALSE, FALSE);
+
+ if (ret == 0 && one_index) {
+ ret = ER_NO_SUCH_INDEX;
+ }
+
+ return ret;
+}
+
/**********************************************************************//**
This is mapped to "ALTER TABLE tablename ENGINE=InnoDB", which rebuilds
the table in MySQL. */
-UNIV_INTERN
+
int
ha_innobase::optimize(
/*==================*/
THD* thd, /*!< in: connection thread handle */
HA_CHECK_OPT* check_opt) /*!< in: currently ignored */
{
- /*FTS-FIXME: Since MySQL doesn't support engine-specific commands,
+
+ /* FTS-FIXME: Since MySQL doesn't support engine-specific commands,
we have to hijack some existing command in order to be able to test
the new admin commands added in InnoDB's FTS support. For now, we
use MySQL's OPTIMIZE command, normally mapped to ALTER TABLE in
@@ -13981,10 +14862,10 @@ ha_innobase::optimize(
calls to OPTIMIZE, which is undesirable. */
bool try_alter = true;
- if (srv_defragment) {
+ if (!m_prebuilt->table->is_temporary() && srv_defragment) {
int err;
- err = defragment_table(prebuilt->table->name, NULL, false);
+ err = defragment_table(m_prebuilt->table->name.m_name, NULL, false);
if (err == 0) {
try_alter = false;
@@ -13992,7 +14873,7 @@ ha_innobase::optimize(
push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
err,
"InnoDB: Cannot defragment table %s: returned error code %d\n",
- prebuilt->table->name, err);
+ m_prebuilt->table->name.m_name, err);
if(err == ER_SP_ALREADY_EXISTS) {
try_alter = false;
@@ -14001,10 +14882,10 @@ ha_innobase::optimize(
}
if (innodb_optimize_fulltext_only) {
- if (prebuilt->table->fts && prebuilt->table->fts->cache
- && !dict_table_is_discarded(prebuilt->table)) {
- fts_sync_table(prebuilt->table, false, true, false);
- fts_optimize_table(prebuilt->table);
+ if (m_prebuilt->table->fts && m_prebuilt->table->fts->cache
+ && !dict_table_is_discarded(m_prebuilt->table)) {
+ fts_sync_table(m_prebuilt->table);
+ fts_optimize_table(m_prebuilt->table);
}
try_alter = false;
}
@@ -14016,8 +14897,8 @@ ha_innobase::optimize(
Tries to check that an InnoDB table is not corrupted. If corruption is
noticed, prints to stderr information about it. In case of corruption
may also assert a failure and crash the server.
-@return HA_ADMIN_CORRUPT or HA_ADMIN_OK */
-UNIV_INTERN
+@return HA_ADMIN_CORRUPT or HA_ADMIN_OK */
+
int
ha_innobase::check(
/*===============*/
@@ -14029,22 +14910,21 @@ ha_innobase::check(
ulint n_rows_in_table = ULINT_UNDEFINED;
bool is_ok = true;
ulint old_isolation_level;
- ibool table_corrupted;
+ dberr_t ret;
DBUG_ENTER("ha_innobase::check");
DBUG_ASSERT(thd == ha_thd());
- ut_a(prebuilt->trx);
- ut_a(prebuilt->trx->magic_n == TRX_MAGIC_N);
- ut_a(prebuilt->trx == thd_to_trx(thd));
+ ut_a(m_prebuilt->trx->magic_n == TRX_MAGIC_N);
+ ut_a(m_prebuilt->trx == thd_to_trx(thd));
- if (prebuilt->mysql_template == NULL) {
+ if (m_prebuilt->mysql_template == NULL) {
/* Build the template; we will use a dummy template
in index scans done in checking */
build_template(true);
}
- if (dict_table_is_discarded(prebuilt->table)) {
+ if (dict_table_is_discarded(m_prebuilt->table)) {
ib_senderrf(
thd,
@@ -14054,8 +14934,8 @@ ha_innobase::check(
DBUG_RETURN(HA_ADMIN_CORRUPT);
- } else if (prebuilt->table->file_unreadable &&
- fil_space_get(prebuilt->table->space) == NULL) {
+ } else if (!m_prebuilt->table->is_readable() &&
+ !fil_space_get(m_prebuilt->table->space)) {
ib_senderrf(
thd, IB_LOG_LEVEL_ERROR,
@@ -14065,87 +14945,71 @@ ha_innobase::check(
DBUG_RETURN(HA_ADMIN_CORRUPT);
}
- if (prebuilt->table->corrupted) {
- char index_name[MAX_FULL_NAME_LEN + 1];
+ m_prebuilt->trx->op_info = "checking table";
+
+ if (m_prebuilt->table->corrupted) {
/* If some previous operation has marked the table as
corrupted in memory, and has not propagated such to
clustered index, we will do so here */
- index = dict_table_get_first_index(prebuilt->table);
+ index = dict_table_get_first_index(m_prebuilt->table);
- if (!dict_index_is_corrupted(index)) {
- row_mysql_lock_data_dictionary(prebuilt->trx);
- dict_set_corrupted(index, prebuilt->trx, "CHECK TABLE");
- row_mysql_unlock_data_dictionary(prebuilt->trx);
+ if (!index->is_corrupted()) {
+ dict_set_corrupted(
+ index, m_prebuilt->trx, "CHECK TABLE");
}
- innobase_format_name(index_name, sizeof index_name,
- index->name, TRUE);
-
- push_warning_printf(thd,
+ push_warning_printf(m_user_thd,
Sql_condition::WARN_LEVEL_WARN,
HA_ERR_INDEX_CORRUPT,
"InnoDB: Index %s is marked as"
- " corrupted", index_name);
+ " corrupted",
+ index->name());
/* Now that the table is already marked as corrupted,
there is no need to check any index of this table */
- prebuilt->trx->op_info = "";
+ m_prebuilt->trx->op_info = "";
DBUG_RETURN(HA_ADMIN_CORRUPT);
}
- prebuilt->trx->op_info = "checking table";
-
- old_isolation_level = prebuilt->trx->isolation_level;
+ old_isolation_level = m_prebuilt->trx->isolation_level;
/* We must run the index record counts at an isolation level
>= READ COMMITTED, because a dirty read can see a wrong number
of records in some index; to play safe, we use always
REPEATABLE READ here */
+ m_prebuilt->trx->isolation_level = TRX_ISO_REPEATABLE_READ;
- prebuilt->trx->isolation_level = TRX_ISO_REPEATABLE_READ;
-
- /* Check whether the table is already marked as corrupted
- before running the check table */
- table_corrupted = prebuilt->table->corrupted;
-
- /* Reset table->corrupted bit so that check table can proceed to
- do additional check */
- prebuilt->table->corrupted = FALSE;
+ ut_ad(!m_prebuilt->table->corrupted);
- for (index = dict_table_get_first_index(prebuilt->table);
+ for (index = dict_table_get_first_index(m_prebuilt->table);
index != NULL;
index = dict_table_get_next_index(index)) {
- char index_name[MAX_FULL_NAME_LEN + 1];
-
/* If this is an index being created or dropped, skip */
- if (*index->name == TEMP_INDEX_PREFIX) {
+ if (!index->is_committed()) {
continue;
}
- if (!(check_opt->flags & T_QUICK)) {
+ if (!(check_opt->flags & T_QUICK)
+ && !index->is_corrupted()) {
/* Enlarge the fatal lock wait timeout during
CHECK TABLE. */
- os_increment_counter_by_amount(
- server_mutex,
- srv_fatal_semaphore_wait_threshold,
+ my_atomic_addlong(
+ &srv_fatal_semaphore_wait_threshold,
SRV_SEMAPHORE_WAIT_EXTENSION);
- dberr_t err = btr_validate_index(index, prebuilt->trx);
+
+ dberr_t err = btr_validate_index(
+ index, m_prebuilt->trx, false);
/* Restore the fatal lock wait timeout after
CHECK TABLE. */
- os_decrement_counter_by_amount(
- server_mutex,
- srv_fatal_semaphore_wait_threshold,
- SRV_SEMAPHORE_WAIT_EXTENSION);
+ my_atomic_addlong(
+ &srv_fatal_semaphore_wait_threshold,
+ -SRV_SEMAPHORE_WAIT_EXTENSION);
if (err != DB_SUCCESS) {
is_ok = false;
- innobase_format_name(
- index_name, sizeof index_name,
- index->name, TRUE);
-
if (err == DB_DECRYPTION_FAILED) {
push_warning_printf(
thd,
@@ -14154,7 +15018,7 @@ ha_innobase::check(
"Table %s is encrypted but encryption service or"
" used key_id is not available. "
" Can't continue checking table.",
- index->table->name);
+ index->table->name.m_name);
} else {
push_warning_printf(
thd,
@@ -14162,7 +15026,7 @@ ha_innobase::check(
ER_NOT_KEYFILE,
"InnoDB: The B-tree of"
" index %s is corrupted.",
- index_name);
+ index->name());
}
continue;
@@ -14172,131 +15036,117 @@ ha_innobase::check(
/* Instead of invoking change_active_index(), set up
a dummy template for non-locking reads, disabling
access to the clustered index. */
- prebuilt->index = index;
+ m_prebuilt->index = index;
- prebuilt->index_usable = row_merge_is_index_usable(
- prebuilt->trx, prebuilt->index);
+ m_prebuilt->index_usable = row_merge_is_index_usable(
+ m_prebuilt->trx, m_prebuilt->index);
DBUG_EXECUTE_IF(
"dict_set_index_corrupted",
- if (!dict_index_is_clust(index)) {
- prebuilt->index_usable = FALSE;
- row_mysql_lock_data_dictionary(prebuilt->trx);
- dict_set_corrupted(index, prebuilt->trx, "dict_set_index_corrupted");
- row_mysql_unlock_data_dictionary(prebuilt->trx);
+ if (!index->is_primary()) {
+ m_prebuilt->index_usable = FALSE;
+ // row_mysql_lock_data_dictionary(m_prebuilt->trx);
+ dict_set_corrupted(index, m_prebuilt->trx, "dict_set_index_corrupted");
+ // row_mysql_unlock_data_dictionary(m_prebuilt->trx);
});
- if (UNIV_UNLIKELY(!prebuilt->index_usable)) {
- innobase_format_name(
- index_name, sizeof index_name,
- prebuilt->index->name, TRUE);
-
- if (dict_index_is_corrupted(prebuilt->index)) {
+ if (UNIV_UNLIKELY(!m_prebuilt->index_usable)) {
+ if (index->is_corrupted()) {
push_warning_printf(
- user_thd,
+ m_user_thd,
Sql_condition::WARN_LEVEL_WARN,
HA_ERR_INDEX_CORRUPT,
"InnoDB: Index %s is marked as"
" corrupted",
- index_name);
+ index->name());
is_ok = false;
} else {
push_warning_printf(
- thd,
+ m_user_thd,
Sql_condition::WARN_LEVEL_WARN,
HA_ERR_TABLE_DEF_CHANGED,
"InnoDB: Insufficient history for"
" index %s",
- index_name);
+ index->name());
}
continue;
}
- prebuilt->sql_stat_start = TRUE;
- prebuilt->template_type = ROW_MYSQL_DUMMY_TEMPLATE;
- prebuilt->n_template = 0;
- prebuilt->need_to_access_clustered = FALSE;
+ m_prebuilt->sql_stat_start = TRUE;
+ m_prebuilt->template_type = ROW_MYSQL_DUMMY_TEMPLATE;
+ m_prebuilt->n_template = 0;
+ m_prebuilt->need_to_access_clustered = FALSE;
- dtuple_set_n_fields(prebuilt->search_tuple, 0);
+ dtuple_set_n_fields(m_prebuilt->search_tuple, 0);
- prebuilt->select_lock_type = LOCK_NONE;
+ m_prebuilt->select_lock_type = LOCK_NONE;
- if (!row_check_index_for_mysql(prebuilt, index, &n_rows)) {
- innobase_format_name(
- index_name, sizeof index_name,
- index->name, TRUE);
+ /* Scan this index. */
+ if (dict_index_is_spatial(index)) {
+ ret = row_count_rtree_recs(m_prebuilt, &n_rows);
+ } else {
+ ret = row_scan_index_for_mysql(
+ m_prebuilt, index, &n_rows);
+ }
+ DBUG_EXECUTE_IF(
+ "dict_set_index_corrupted",
+ if (!index->is_primary()) {
+ ret = DB_CORRUPTION;
+ });
+
+ if (ret == DB_INTERRUPTED || thd_killed(m_user_thd)) {
+ /* Do not report error since this could happen
+ during shutdown */
+ break;
+ }
+ if (ret != DB_SUCCESS) {
+ /* Assume some kind of corruption. */
push_warning_printf(
thd, Sql_condition::WARN_LEVEL_WARN,
ER_NOT_KEYFILE,
"InnoDB: The B-tree of"
" index %s is corrupted.",
- index_name);
+ index->name());
is_ok = false;
dict_set_corrupted(
- index, prebuilt->trx, "CHECK TABLE-check index");
+ index, m_prebuilt->trx, "CHECK TABLE-check index");
}
- if (thd_kill_level(user_thd)) {
- break;
- }
-
-#if 0
- fprintf(stderr, "%lu entries in index %s\n", n_rows,
- index->name);
-#endif
- if (index == dict_table_get_first_index(prebuilt->table)) {
+ if (index == dict_table_get_first_index(m_prebuilt->table)) {
n_rows_in_table = n_rows;
} else if (!(index->type & DICT_FTS)
&& (n_rows != n_rows_in_table)) {
push_warning_printf(
thd, Sql_condition::WARN_LEVEL_WARN,
ER_NOT_KEYFILE,
- "InnoDB: Index '%-.200s' contains %lu"
- " entries, should be %lu.",
- index->name,
- (ulong) n_rows,
- (ulong) n_rows_in_table);
+ "InnoDB: Index '%-.200s' contains " ULINTPF
+ " entries, should be " ULINTPF ".",
+ index->name(), n_rows, n_rows_in_table);
is_ok = false;
dict_set_corrupted(
- index, prebuilt->trx,
+ index, m_prebuilt->trx,
"CHECK TABLE; Wrong count");
}
}
- if (table_corrupted) {
- /* If some previous operation has marked the table as
- corrupted in memory, and has not propagated such to
- clustered index, we will do so here */
- index = dict_table_get_first_index(prebuilt->table);
-
- if (!dict_index_is_corrupted(index)) {
- dict_set_corrupted(
- index, prebuilt->trx, "CHECK TABLE");
- }
- prebuilt->table->corrupted = TRUE;
- }
-
/* Restore the original isolation level */
- prebuilt->trx->isolation_level = old_isolation_level;
-
+ m_prebuilt->trx->isolation_level = old_isolation_level;
+#ifdef BTR_CUR_HASH_ADAPT
+# if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
/* We validate the whole adaptive hash index for all tables
at every CHECK TABLE only when QUICK flag is not present. */
-#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
if (!(check_opt->flags & T_QUICK) && !btr_search_validate()) {
push_warning(thd, Sql_condition::WARN_LEVEL_WARN,
ER_NOT_KEYFILE,
"InnoDB: The adaptive hash index is corrupted.");
is_ok = false;
}
-#endif /* defined UNIV_AHI_DEBUG || defined UNIV_DEBUG */
-
- prebuilt->trx->op_info = "";
- if (thd_kill_level(user_thd)) {
- my_error(ER_QUERY_INTERRUPTED, MYF(0));
- }
+# endif /* defined UNIV_AHI_DEBUG || defined UNIV_DEBUG */
+#endif /* BTR_CUR_HASH_ADAPT */
+ m_prebuilt->trx->op_info = "";
DBUG_RETURN(is_ok ? HA_ADMIN_OK : HA_ADMIN_CORRUPT);
}
@@ -14314,7 +15164,7 @@ ha_innobase::update_table_comment(
{
uint length = (uint) strlen(comment);
char* str=0;
- long flen;
+ size_t flen;
std::string fk_str;
/* We do not know if MySQL can call this function before calling
@@ -14327,36 +15177,26 @@ ha_innobase::update_table_comment(
update_thd(ha_thd());
- prebuilt->trx->op_info = (char*)"returning table comment";
-
- /* In case MySQL calls this in the middle of a SELECT query, release
- possible adaptive hash latch to avoid deadlocks of threads */
-
- trx_search_latch_release_if_reserved(prebuilt->trx);
+ m_prebuilt->trx->op_info = "returning table comment";
#define SSTR( x ) reinterpret_cast< std::ostringstream & >( \
( std::ostringstream() << std::dec << x ) ).str()
fk_str.append("InnoDB free: ");
fk_str.append(SSTR(fsp_get_available_space_in_free_extents(
- prebuilt->table->space)));
+ m_prebuilt->table->space)));
fk_str.append(dict_print_info_on_foreign_keys(
- FALSE, prebuilt->trx,
- prebuilt->table));
+ FALSE, m_prebuilt->trx,
+ m_prebuilt->table));
flen = fk_str.length();
- if (flen < 0) {
- flen = 0;
- } else if (length + flen + 3 > 64000) {
+ if (length + flen + 3 > 64000) {
flen = 64000 - 3 - length;
}
-
/* allocate buffer for the full string */
-
str = (char*) my_malloc(length + flen + 3, MYF(0));
-
if (str) {
char* pos = str + length;
if (length) {
@@ -14364,12 +15204,11 @@ ha_innobase::update_table_comment(
*pos++ = ';';
*pos++ = ' ';
}
-
memcpy(pos, fk_str.c_str(), flen);
pos[flen] = 0;
}
- prebuilt->trx->op_info = (char*)"";
+ m_prebuilt->trx->op_info = (char*)"";
return(str ? str : (char*) comment);
}
@@ -14379,14 +15218,12 @@ Gets the foreign key create info for a table stored in InnoDB.
@return own: character string in the form which can be inserted to the
CREATE TABLE statement, MUST be freed with
ha_innobase::free_foreign_key_create_info */
-UNIV_INTERN
+
char*
ha_innobase::get_foreign_key_create_info(void)
/*==========================================*/
{
- char* fk_str = 0;
-
- ut_a(prebuilt != NULL);
+ ut_a(m_prebuilt != NULL);
/* We do not know if MySQL can call this function before calling
external_lock(). To be safe, update the thd of the current table
@@ -14394,23 +15231,24 @@ ha_innobase::get_foreign_key_create_info(void)
update_thd(ha_thd());
- prebuilt->trx->op_info = (char*)"getting info on foreign keys";
-
- /* In case MySQL calls this in the middle of a SELECT query,
- release possible adaptive hash latch to avoid
- deadlocks of threads */
-
- trx_search_latch_release_if_reserved(prebuilt->trx);
+ m_prebuilt->trx->op_info = "getting info on foreign keys";
- /* Output the data to a temporary file */
+ /* Output the data to a temporary string */
std::string str = dict_print_info_on_foreign_keys(
- TRUE, prebuilt->trx,
- prebuilt->table);
+ TRUE, m_prebuilt->trx,
+ m_prebuilt->table);
- prebuilt->trx->op_info = (char*)"";
+ m_prebuilt->trx->op_info = "";
/* Allocate buffer for the string */
- fk_str = (char*) my_malloc(str.length() + 1, MYF(0));
+ char* fk_str = (char*) my_malloc(str.length() + 1, MYF(0));
+
+ /* JAN: TODO: MySQL 5.7
+ fk_str = reinterpret_cast<char*>(
+ my_malloc(PSI_INSTRUMENT_ME, str.length() + 1, MYF(0)));
+ */
+
+
if (fk_str) {
memcpy(fk_str, str.c_str(), str.length());
@@ -14428,26 +15266,26 @@ static
FOREIGN_KEY_INFO*
get_foreign_key_info(
/*=================*/
- THD* thd, /*!< in: user thread handle */
- dict_foreign_t* foreign) /*!< in: foreign key constraint */
+ THD* thd, /*!< in: user thread handle */
+ dict_foreign_t* foreign)/*!< in: foreign key constraint */
{
FOREIGN_KEY_INFO f_key_info;
FOREIGN_KEY_INFO* pf_key_info;
uint i = 0;
- ulint len;
+ size_t len;
char tmp_buff[NAME_LEN+1];
char name_buff[NAME_LEN+1];
const char* ptr;
LEX_STRING* referenced_key_name;
LEX_STRING* name = NULL;
- if (row_is_mysql_tmp_table_name(foreign->foreign_table_name)) {
+ if (dict_table_t::is_temporary_name(foreign->foreign_table_name)) {
return NULL;
}
ptr = dict_remove_db_name(foreign->id);
- f_key_info.foreign_id = thd_make_lex_string(thd, 0, ptr,
- (uint) strlen(ptr), 1);
+ f_key_info.foreign_id = thd_make_lex_string(
+ thd, 0, ptr, strlen(ptr), 1);
/* Name format: database name, '/', table name, '\0' */
@@ -14459,13 +15297,13 @@ get_foreign_key_info(
len = filename_to_tablename(tmp_buff, name_buff, sizeof(name_buff));
f_key_info.referenced_db = thd_make_lex_string(
- thd, 0, name_buff, static_cast<unsigned int>(len), 1);
+ thd, 0, name_buff, len, 1);
/* Referenced (parent) table name */
ptr = dict_remove_db_name(foreign->referenced_table_name);
len = filename_to_tablename(ptr, name_buff, sizeof(name_buff), 1);
f_key_info.referenced_table = thd_make_lex_string(
- thd, 0, name_buff, static_cast<unsigned int>(len), 1);
+ thd, 0, name_buff, len, 1);
/* Dependent (child) database name */
len = dict_get_db_name_len(foreign->foreign_table_name);
@@ -14475,22 +15313,22 @@ get_foreign_key_info(
len = filename_to_tablename(tmp_buff, name_buff, sizeof(name_buff));
f_key_info.foreign_db = thd_make_lex_string(
- thd, 0, name_buff, static_cast<unsigned int>(len), 1);
+ thd, 0, name_buff, len, 1);
/* Dependent (child) table name */
ptr = dict_remove_db_name(foreign->foreign_table_name);
len = filename_to_tablename(ptr, name_buff, sizeof(name_buff), 1);
f_key_info.foreign_table = thd_make_lex_string(
- thd, 0, name_buff, static_cast<unsigned int>(len), 1);
+ thd, 0, name_buff, len, 1);
do {
ptr = foreign->foreign_col_names[i];
name = thd_make_lex_string(thd, name, ptr,
- (uint) strlen(ptr), 1);
+ strlen(ptr), 1);
f_key_info.foreign_fields.push_back(name);
ptr = foreign->referenced_col_names[i];
name = thd_make_lex_string(thd, name, ptr,
- (uint) strlen(ptr), 1);
+ strlen(ptr), 1);
f_key_info.referenced_fields.push_back(name);
} while (++i < foreign->n_fields);
@@ -14504,6 +15342,7 @@ get_foreign_key_info(
f_key_info.delete_method = FK_OPTION_RESTRICT;
}
+
if (foreign->type & DICT_FOREIGN_ON_UPDATE_CASCADE) {
f_key_info.update_method = FK_OPTION_CASCADE;
} else if (foreign->type & DICT_FOREIGN_ON_UPDATE_SET_NULL) {
@@ -14514,12 +15353,40 @@ get_foreign_key_info(
f_key_info.update_method = FK_OPTION_RESTRICT;
}
- if (foreign->referenced_index && foreign->referenced_index->name) {
- referenced_key_name = thd_make_lex_string(thd,
- f_key_info.referenced_key_name,
- foreign->referenced_index->name,
- (uint) strlen(foreign->referenced_index->name),
- 1);
+ /* Load referenced table to update FK referenced key name. */
+ if (foreign->referenced_table == NULL) {
+
+ dict_table_t* ref_table;
+
+ ut_ad(mutex_own(&dict_sys->mutex));
+ ref_table = dict_table_open_on_name(
+ foreign->referenced_table_name_lookup,
+ TRUE, FALSE, DICT_ERR_IGNORE_NONE);
+
+ if (ref_table == NULL) {
+
+ if (!thd_test_options(
+ thd, OPTION_NO_FOREIGN_KEY_CHECKS)) {
+ ib::info()
+ << "Foreign Key referenced table "
+ << foreign->referenced_table_name
+ << " not found for foreign table "
+ << foreign->foreign_table_name;
+ }
+ } else {
+
+ dict_table_close(ref_table, TRUE, FALSE);
+ }
+ }
+
+ if (foreign->referenced_index
+ && foreign->referenced_index->name != NULL) {
+ referenced_key_name = thd_make_lex_string(
+ thd,
+ f_key_info.referenced_key_name,
+ foreign->referenced_index->name,
+ strlen(foreign->referenced_index->name),
+ 1);
} else {
referenced_key_name = NULL;
}
@@ -14535,41 +15402,37 @@ get_foreign_key_info(
/*******************************************************************//**
Gets the list of foreign keys in this table.
@return always 0, that is, always succeeds */
-UNIV_INTERN
+
int
ha_innobase::get_foreign_key_list(
/*==============================*/
THD* thd, /*!< in: user thread handle */
List<FOREIGN_KEY_INFO>* f_key_list) /*!< out: foreign key list */
{
- FOREIGN_KEY_INFO* pf_key_info;
- dict_foreign_t* foreign;
-
- ut_a(prebuilt != NULL);
update_thd(ha_thd());
- prebuilt->trx->op_info = "getting list of foreign keys";
+ m_prebuilt->trx->op_info = "getting list of foreign keys";
- trx_search_latch_release_if_reserved(prebuilt->trx);
-
- mutex_enter(&(dict_sys->mutex));
+ mutex_enter(&dict_sys->mutex);
for (dict_foreign_set::iterator it
- = prebuilt->table->foreign_set.begin();
- it != prebuilt->table->foreign_set.end();
+ = m_prebuilt->table->foreign_set.begin();
+ it != m_prebuilt->table->foreign_set.end();
++it) {
- foreign = *it;
+ FOREIGN_KEY_INFO* pf_key_info;
+ dict_foreign_t* foreign = *it;
pf_key_info = get_foreign_key_info(thd, foreign);
- if (pf_key_info) {
+
+ if (pf_key_info != NULL) {
f_key_list->push_back(pf_key_info);
}
}
- mutex_exit(&(dict_sys->mutex));
+ mutex_exit(&dict_sys->mutex);
- prebuilt->trx->op_info = "";
+ m_prebuilt->trx->op_info = "";
return(0);
}
@@ -14577,41 +15440,202 @@ ha_innobase::get_foreign_key_list(
/*******************************************************************//**
Gets the set of foreign keys where this table is the referenced table.
@return always 0, that is, always succeeds */
-UNIV_INTERN
+
int
ha_innobase::get_parent_foreign_key_list(
/*=====================================*/
THD* thd, /*!< in: user thread handle */
List<FOREIGN_KEY_INFO>* f_key_list) /*!< out: foreign key list */
{
- FOREIGN_KEY_INFO* pf_key_info;
- dict_foreign_t* foreign;
-
- ut_a(prebuilt != NULL);
update_thd(ha_thd());
- prebuilt->trx->op_info = "getting list of referencing foreign keys";
+ m_prebuilt->trx->op_info = "getting list of referencing foreign keys";
- trx_search_latch_release_if_reserved(prebuilt->trx);
-
- mutex_enter(&(dict_sys->mutex));
+ mutex_enter(&dict_sys->mutex);
for (dict_foreign_set::iterator it
- = prebuilt->table->referenced_set.begin();
- it != prebuilt->table->referenced_set.end();
+ = m_prebuilt->table->referenced_set.begin();
+ it != m_prebuilt->table->referenced_set.end();
++it) {
- foreign = *it;
+ FOREIGN_KEY_INFO* pf_key_info;
+ dict_foreign_t* foreign = *it;
pf_key_info = get_foreign_key_info(thd, foreign);
- if (pf_key_info) {
+
+ if (pf_key_info != NULL) {
f_key_list->push_back(pf_key_info);
}
}
- mutex_exit(&(dict_sys->mutex));
+ mutex_exit(&dict_sys->mutex);
- prebuilt->trx->op_info = "";
+ m_prebuilt->trx->op_info = "";
+
+ return(0);
+}
+
+/** Table list item structure is used to store only the table
+and name. It is used by get_cascade_foreign_key_table_list to store
+the intermediate result for fetching the table set. */
+struct table_list_item {
+ /** InnoDB table object */
+ const dict_table_t* table;
+ /** Table name */
+ const char* name;
+};
+
+/** Structure to compare two st_tablename objects using their
+db and tablename. It is used in the ordering of cascade_fk_set.
+It returns true if the first argument precedes the second argument
+and false otherwise. */
+struct tablename_compare {
+
+ bool operator()(const st_handler_tablename lhs,
+ const st_handler_tablename rhs) const
+ {
+ int cmp = strcmp(lhs.db, rhs.db);
+ if (cmp == 0) {
+ cmp = strcmp(lhs.tablename, rhs.tablename);
+ }
+
+ return(cmp < 0);
+ }
+};
+
+/** Get the table name and database name for the given table.
+@param[in,out] thd user thread handle
+@param[out] f_key_info pointer to table_name_info object
+@param[in] foreign foreign key constraint. */
+static
+void
+get_table_name_info(
+ THD* thd,
+ st_handler_tablename* f_key_info,
+ const dict_foreign_t* foreign)
+{
+#define FILENAME_CHARSET_MBMAXLEN 5
+ char tmp_buff[NAME_CHAR_LEN * FILENAME_CHARSET_MBMAXLEN + 1];
+ char name_buff[NAME_CHAR_LEN * FILENAME_CHARSET_MBMAXLEN + 1];
+ const char* ptr;
+
+ size_t len = dict_get_db_name_len(
+ foreign->referenced_table_name_lookup);
+ ut_memcpy(tmp_buff, foreign->referenced_table_name_lookup, len);
+ tmp_buff[len] = 0;
+
+ ut_ad(len < sizeof(tmp_buff));
+
+ len = filename_to_tablename(tmp_buff, name_buff, sizeof(name_buff));
+ f_key_info->db = thd_strmake(thd, name_buff, len);
+
+ ptr = dict_remove_db_name(foreign->referenced_table_name_lookup);
+ len = filename_to_tablename(ptr, name_buff, sizeof(name_buff));
+ f_key_info->tablename = thd_strmake(thd, name_buff, len);
+}
+
+/** Get the list of tables ordered by the dependency on the other tables using
+the 'CASCADE' foreign key constraint.
+@param[in,out] thd user thread handle
+@param[out] fk_table_list set of tables name info for the
+ dependent table
+@retval 0 for success. */
+int
+ha_innobase::get_cascade_foreign_key_table_list(
+ THD* thd,
+ List<st_handler_tablename>* fk_table_list)
+{
+ m_prebuilt->trx->op_info = "getting cascading foreign keys";
+
+ std::list<table_list_item, ut_allocator<table_list_item> > table_list;
+
+ typedef std::set<st_handler_tablename, tablename_compare,
+ ut_allocator<st_handler_tablename> > cascade_fk_set;
+
+ cascade_fk_set fk_set;
+
+ mutex_enter(&dict_sys->mutex);
+
+ /* Initialize the table_list with prebuilt->table name. */
+ struct table_list_item item = {m_prebuilt->table,
+ m_prebuilt->table->name.m_name};
+
+ table_list.push_back(item);
+
+ /* Get the parent table, grand parent table info from the
+ table list by depth-first traversal. */
+ do {
+ const dict_table_t* parent_table;
+ dict_table_t* parent = NULL;
+ std::pair<cascade_fk_set::iterator,bool> ret;
+
+ item = table_list.back();
+ table_list.pop_back();
+ parent_table = item.table;
+
+ if (parent_table == NULL) {
+
+ ut_ad(item.name != NULL);
+
+ parent_table = parent = dict_table_open_on_name(
+ item.name, TRUE, FALSE,
+ DICT_ERR_IGNORE_NONE);
+
+ if (parent_table == NULL) {
+ /* foreign_key_checks is or was probably
+ disabled; ignore the constraint */
+ continue;
+ }
+ }
+
+ for (dict_foreign_set::const_iterator it =
+ parent_table->foreign_set.begin();
+ it != parent_table->foreign_set.end(); ++it) {
+
+ const dict_foreign_t* foreign = *it;
+ st_handler_tablename f1;
+
+ /* Skip the table if there is no
+ cascading operation. */
+ if (0 == (foreign->type
+ & ~(DICT_FOREIGN_ON_DELETE_NO_ACTION
+ | DICT_FOREIGN_ON_UPDATE_NO_ACTION))) {
+ continue;
+ }
+
+ if (foreign->referenced_table_name_lookup != NULL) {
+ get_table_name_info(thd, &f1, foreign);
+ ret = fk_set.insert(f1);
+
+ /* Ignore the table if it is already
+ in the set. */
+ if (!ret.second) {
+ continue;
+ }
+
+ struct table_list_item item1 = {
+ foreign->referenced_table,
+ foreign->referenced_table_name_lookup};
+
+ table_list.push_back(item1);
+
+ st_handler_tablename* fk_table =
+ (st_handler_tablename*) thd_memdup(
+ thd, &f1, sizeof(*fk_table));
+
+ fk_table_list->push_back(fk_table);
+ }
+ }
+
+ if (parent != NULL) {
+ dict_table_close(parent, true, false);
+ }
+
+ } while(!table_list.empty());
+
+ mutex_exit(&dict_sys->mutex);
+
+ m_prebuilt->trx->op_info = "";
return(0);
}
@@ -14620,26 +15644,26 @@ ha_innobase::get_parent_foreign_key_list(
Checks if ALTER TABLE may change the storage engine of the table.
Changing storage engines is not allowed for tables for which there
are foreign key constraints (parent or child tables).
-@return TRUE if can switch engines */
-UNIV_INTERN
+@return TRUE if can switch engines */
+
bool
ha_innobase::can_switch_engines(void)
/*=================================*/
{
- bool can_switch;
-
DBUG_ENTER("ha_innobase::can_switch_engines");
+
update_thd();
- prebuilt->trx->op_info =
+ m_prebuilt->trx->op_info =
"determining if there are foreign key constraints";
- row_mysql_freeze_data_dictionary(prebuilt->trx);
- can_switch = prebuilt->table->referenced_set.empty()
- && prebuilt->table->foreign_set.empty();
+ row_mysql_freeze_data_dictionary(m_prebuilt->trx);
+
+ bool can_switch = m_prebuilt->table->referenced_set.empty()
+ && m_prebuilt->table->foreign_set.empty();
- row_mysql_unfreeze_data_dictionary(prebuilt->trx);
- prebuilt->trx->op_info = "";
+ row_mysql_unfreeze_data_dictionary(m_prebuilt->trx);
+ m_prebuilt->trx->op_info = "";
DBUG_RETURN(can_switch);
}
@@ -14649,13 +15673,13 @@ Checks if a table is referenced by a foreign key. The MySQL manual states that
a REPLACE is either equivalent to an INSERT, or DELETE(s) + INSERT. Only a
delete is then allowed internally to resolve a duplicate key conflict in
REPLACE, not an update.
-@return > 0 if referenced by a FOREIGN KEY */
-UNIV_INTERN
+@return > 0 if referenced by a FOREIGN KEY */
+
uint
ha_innobase::referenced_by_foreign_key(void)
/*========================================*/
{
- if (dict_table_is_referenced_by_foreign_key(prebuilt->table)) {
+ if (dict_table_is_referenced_by_foreign_key(m_prebuilt->table)) {
return(1);
}
@@ -14666,21 +15690,21 @@ ha_innobase::referenced_by_foreign_key(void)
/*******************************************************************//**
Frees the foreign key create info for a table stored in InnoDB, if it is
non-NULL. */
-UNIV_INTERN
+
void
ha_innobase::free_foreign_key_create_info(
/*======================================*/
char* str) /*!< in, own: create info string to free */
{
- if (str) {
+ if (str != NULL) {
my_free(str);
}
}
/*******************************************************************//**
Tells something additional to the handler about how to do things.
-@return 0 or error number */
-UNIV_INTERN
+@return 0 or error number */
+
int
ha_innobase::extra(
/*===============*/
@@ -14690,13 +15714,13 @@ ha_innobase::extra(
check_trx_exists(ha_thd());
/* Warning: since it is not sure that MySQL calls external_lock
- before calling this function, the trx field in prebuilt can be
+ before calling this function, the trx field in m_prebuilt can be
obsolete! */
switch (operation) {
case HA_EXTRA_FLUSH:
- if (prebuilt->blob_heap) {
- row_mysql_prebuilt_free_blob_heap(prebuilt);
+ if (m_prebuilt->blob_heap) {
+ row_mysql_prebuilt_free_blob_heap(m_prebuilt);
}
break;
case HA_EXTRA_RESET_STATE:
@@ -14704,16 +15728,16 @@ ha_innobase::extra(
thd_to_trx(ha_thd())->duplicates = 0;
break;
case HA_EXTRA_NO_KEYREAD:
- prebuilt->read_just_key = 0;
+ m_prebuilt->read_just_key = 0;
break;
case HA_EXTRA_KEYREAD:
- prebuilt->read_just_key = 1;
+ m_prebuilt->read_just_key = 1;
break;
case HA_EXTRA_KEYREAD_PRESERVE_FIELDS:
- prebuilt->keep_other_fields_on_keyread = 1;
+ m_prebuilt->keep_other_fields_on_keyread = 1;
break;
- /* IMPORTANT: prebuilt->trx can be obsolete in
+ /* IMPORTANT: m_prebuilt->trx can be obsolete in
this method, because it is not sure that MySQL
calls external_lock before this method with the
parameters below. We must not invoke update_thd()
@@ -14731,6 +15755,16 @@ ha_innobase::extra(
case HA_EXTRA_WRITE_CANNOT_REPLACE:
thd_to_trx(ha_thd())->duplicates &= ~TRX_DUP_REPLACE;
break;
+ case HA_EXTRA_BEGIN_ALTER_COPY:
+ m_prebuilt->table->skip_alter_undo = 1;
+ break;
+ case HA_EXTRA_END_ALTER_COPY:
+ m_prebuilt->table->skip_alter_undo = 0;
+ break;
+ case HA_EXTRA_FAKE_START_STMT:
+ trx_register_for_2pc(m_prebuilt->trx);
+ m_prebuilt->sql_stat_start = true;
+ break;
default:/* Do nothing */
;
}
@@ -14738,29 +15772,40 @@ ha_innobase::extra(
return(0);
}
-/******************************************************************//**
-*/
-UNIV_INTERN
+/**
+MySQL calls this method at the end of each statement. This method
+exists for readability only. ha_innobase::reset() doesn't give any
+clue about the method. */
+
int
-ha_innobase::reset()
-/*================*/
+ha_innobase::end_stmt()
{
- if (prebuilt->blob_heap) {
- row_mysql_prebuilt_free_blob_heap(prebuilt);
+ if (m_prebuilt->blob_heap) {
+ row_mysql_prebuilt_free_blob_heap(m_prebuilt);
}
reset_template();
- ds_mrr.dsmrr_close();
+
+ m_ds_mrr.dsmrr_close();
/* TODO: This should really be reset in reset_template() but for now
it's safer to do it explicitly here. */
/* This is a statement level counter. */
- prebuilt->autoinc_last_value = 0;
+ m_prebuilt->autoinc_last_value = 0;
return(0);
}
+/**
+MySQL calls this method at the end of each statement */
+
+int
+ha_innobase::reset()
+{
+ return(end_stmt());
+}
+
/******************************************************************//**
MySQL calls this function at the start of each SQL statement inside LOCK
TABLES. Inside LOCK TABLES the ::external_lock method does not work to
@@ -14772,42 +15817,36 @@ procedure. To make the execution more deterministic for binlogging, MySQL-5.0
locks all tables involved in a stored procedure with full explicit table
locks (thd_in_lock_tables(thd) holds in store_lock()) before executing the
procedure.
-@return 0 or error code */
-UNIV_INTERN
+@return 0 or error code */
+
int
ha_innobase::start_stmt(
/*====================*/
THD* thd, /*!< in: handle to the user thread */
thr_lock_type lock_type)
{
- trx_t* trx;
+ trx_t* trx = m_prebuilt->trx;
+
DBUG_ENTER("ha_innobase::start_stmt");
update_thd(thd);
- trx = prebuilt->trx;
+ ut_ad(m_prebuilt->table != NULL);
- /* Here we release the search latch and the InnoDB thread FIFO ticket
- if they were reserved. They should have been released already at the
- end of the previous statement, but because inside LOCK TABLES the
- lock count method does not work to mark the end of a SELECT statement,
- that may not be the case. We MUST release the search latch before an
- INSERT, for example. */
-
- trx_search_latch_release_if_reserved(trx);
+ trx = m_prebuilt->trx;
innobase_srv_conc_force_exit_innodb(trx);
/* Reset the AUTOINC statement level counter for multi-row INSERTs. */
trx->n_autoinc_rows = 0;
- prebuilt->sql_stat_start = TRUE;
- prebuilt->hint_need_to_fetch_extra_cols = 0;
+ m_prebuilt->sql_stat_start = TRUE;
+ m_prebuilt->hint_need_to_fetch_extra_cols = 0;
reset_template();
- if (dict_table_is_temporary(prebuilt->table)
- && prebuilt->mysql_has_locked
- && prebuilt->select_lock_type == LOCK_NONE) {
+ if (dict_table_is_temporary(m_prebuilt->table)
+ && m_mysql_has_locked
+ && m_prebuilt->select_lock_type == LOCK_NONE) {
dberr_t error;
switch (thd_sql_command(thd)) {
@@ -14816,12 +15855,12 @@ ha_innobase::start_stmt(
case SQLCOM_DELETE:
case SQLCOM_REPLACE:
init_table_handle_for_HANDLER();
- prebuilt->select_lock_type = LOCK_X;
- prebuilt->stored_select_lock_type = LOCK_X;
- error = row_lock_table_for_mysql(prebuilt, NULL, 1);
+ m_prebuilt->select_lock_type = LOCK_X;
+ m_prebuilt->stored_select_lock_type = LOCK_X;
+ error = row_lock_table(m_prebuilt);
if (error != DB_SUCCESS) {
- int st = convert_error_code_to_mysql(
+ int st = convert_error_code_to_mysql(
error, 0, thd);
DBUG_RETURN(st);
}
@@ -14829,13 +15868,13 @@ ha_innobase::start_stmt(
}
}
- if (!prebuilt->mysql_has_locked) {
+ if (!m_mysql_has_locked) {
/* This handle is for a temporary table created inside
this same LOCK TABLES; since MySQL does NOT call external_lock
in this case, we must use x-row locks inside InnoDB to be
prepared for an update of a row */
- prebuilt->select_lock_type = LOCK_X;
+ m_prebuilt->select_lock_type = LOCK_X;
} else if (trx->isolation_level != TRX_ISO_SERIALIZABLE
&& thd_sql_command(thd) == SQLCOM_SELECT
@@ -14844,18 +15883,19 @@ ha_innobase::start_stmt(
/* For other than temporary tables, we obtain
no lock for consistent read (plain SELECT). */
- prebuilt->select_lock_type = LOCK_NONE;
+ m_prebuilt->select_lock_type = LOCK_NONE;
} else {
/* Not a consistent read: restore the
select_lock_type value. The value of
stored_select_lock_type was decided in:
1) ::store_lock(),
2) ::external_lock(),
- 3) ::init_table_handle_for_HANDLER(), and
- 4) ::transactional_table_lock(). */
+ 3) ::init_table_handle_for_HANDLER(). */
+
+ ut_a(m_prebuilt->stored_select_lock_type != LOCK_NONE_UNSET);
- ut_a(prebuilt->stored_select_lock_type != LOCK_NONE_UNSET);
- prebuilt->select_lock_type = prebuilt->stored_select_lock_type;
+ m_prebuilt->select_lock_type =
+ m_prebuilt->stored_select_lock_type;
}
*trx->detailed_error = 0;
@@ -14871,7 +15911,7 @@ ha_innobase::start_stmt(
/******************************************************************//**
Maps a MySQL trx isolation level code to the InnoDB isolation level code
-@return InnoDB isolation level */
+@return InnoDB isolation level */
static inline
ulint
innobase_map_isolation_level(
@@ -14898,8 +15938,8 @@ the THD in the handle. We will also use this function to communicate
to InnoDB that a new SQL statement has started and that we must store a
savepoint to our transaction handle, so that we are able to roll back
the SQL statement in case of an error.
-@return 0 */
-UNIV_INTERN
+@return 0 */
+
int
ha_innobase::external_lock(
/*=======================*/
@@ -14910,7 +15950,8 @@ ha_innobase::external_lock(
DBUG_PRINT("enter",("lock_type: %d", lock_type));
update_thd(thd);
- trx_t* trx = prebuilt->trx;
+ trx_t* trx = m_prebuilt->trx;
+ ut_ad(m_prebuilt->table);
/* Statement based binlogging does not work in isolation level
READ UNCOMMITTED and READ COMMITTED since the necessary
@@ -14931,11 +15972,13 @@ ha_innobase::external_lock(
#endif /* WITH_WSREP */
/* used by test case */
DBUG_EXECUTE_IF("no_innodb_binlog_errors", skip = true;);
+
if (!skip) {
my_error(ER_BINLOG_STMT_MODE_AND_ROW_ENGINE, MYF(0),
- " InnoDB is limited to row-logging when "
- "transaction isolation level is "
- "READ COMMITTED or READ UNCOMMITTED.");
+ " InnoDB is limited to row-logging when"
+ " transaction isolation level is"
+ " READ COMMITTED or READ UNCOMMITTED.");
+
DBUG_RETURN(HA_ERR_LOGGING_IMPOSSIBLE);
}
}
@@ -14954,8 +15997,7 @@ ha_innobase::external_lock(
|| thd_sql_command(thd) == SQLCOM_DROP_INDEX
|| thd_sql_command(thd) == SQLCOM_DELETE)) {
- if (thd_sql_command(thd) == SQLCOM_CREATE_TABLE)
- {
+ if (thd_sql_command(thd) == SQLCOM_CREATE_TABLE) {
ib_senderrf(thd, IB_LOG_LEVEL_WARN,
ER_READ_ONLY_MODE);
DBUG_RETURN(HA_ERR_TABLE_READONLY);
@@ -14964,22 +16006,29 @@ ha_innobase::external_lock(
ER_READ_ONLY_MODE);
DBUG_RETURN(HA_ERR_TABLE_READONLY);
}
-
}
- prebuilt->sql_stat_start = TRUE;
- prebuilt->hint_need_to_fetch_extra_cols = 0;
+ m_prebuilt->sql_stat_start = TRUE;
+ m_prebuilt->hint_need_to_fetch_extra_cols = 0;
reset_template();
- switch (prebuilt->table->quiesce) {
+ switch (m_prebuilt->table->quiesce) {
case QUIESCE_START:
/* Check for FLUSH TABLE t WITH READ LOCK; */
if (!srv_read_only_mode
&& thd_sql_command(thd) == SQLCOM_FLUSH
&& lock_type == F_RDLCK) {
- row_quiesce_table_start(prebuilt->table, trx);
+ if (dict_table_is_discarded(m_prebuilt->table)) {
+ ib_senderrf(trx->mysql_thd, IB_LOG_LEVEL_ERROR,
+ ER_TABLESPACE_DISCARDED,
+ table->s->table_name.str);
+
+ DBUG_RETURN(HA_ERR_TABLESPACE_MISSING);
+ }
+
+ row_quiesce_table_start(m_prebuilt->table, trx);
/* Use the transaction instance to track UNLOCK
TABLES. It can be done via START TRANSACTION; too
@@ -14995,7 +16044,7 @@ ha_innobase::external_lock(
if (trx->flush_tables > 0
&& (lock_type == F_UNLCK || trx_is_interrupted(trx))) {
- row_quiesce_table_complete(prebuilt->table, trx);
+ row_quiesce_table_complete(m_prebuilt->table, trx);
ut_a(trx->flush_tables > 0);
--trx->flush_tables;
@@ -15011,8 +16060,8 @@ ha_innobase::external_lock(
/* If this is a SELECT, then it is in UPDATE TABLE ...
or SELECT ... FOR UPDATE */
- prebuilt->select_lock_type = LOCK_X;
- prebuilt->stored_select_lock_type = LOCK_X;
+ m_prebuilt->select_lock_type = LOCK_X;
+ m_prebuilt->stored_select_lock_type = LOCK_X;
}
if (lock_type != F_UNLCK) {
@@ -15023,7 +16072,7 @@ ha_innobase::external_lock(
innobase_register_trx(ht, thd, trx);
if (trx->isolation_level == TRX_ISO_SERIALIZABLE
- && prebuilt->select_lock_type == LOCK_NONE
+ && m_prebuilt->select_lock_type == LOCK_NONE
&& thd_test_options(
thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)) {
@@ -15035,8 +16084,8 @@ ha_innobase::external_lock(
can be serialized also if performed as consistent
reads. */
- prebuilt->select_lock_type = LOCK_S;
- prebuilt->stored_select_lock_type = LOCK_S;
+ m_prebuilt->select_lock_type = LOCK_S;
+ m_prebuilt->stored_select_lock_type = LOCK_S;
}
/* Starting from 4.1.9, no InnoDB table lock is taken in LOCK
@@ -15050,17 +16099,17 @@ ha_innobase::external_lock(
can hold in some cases, e.g., at the start of a stored
procedure call (SQLCOM_CALL). */
- if (prebuilt->select_lock_type != LOCK_NONE) {
+ if (m_prebuilt->select_lock_type != LOCK_NONE) {
if (thd_sql_command(thd) == SQLCOM_LOCK_TABLES
&& THDVAR(thd, table_locks)
&& thd_test_options(thd, OPTION_NOT_AUTOCOMMIT)
&& thd_in_lock_tables(thd)) {
- dberr_t error = row_lock_table_for_mysql(
- prebuilt, NULL, 0);
+ dberr_t error = row_lock_table(m_prebuilt);
if (error != DB_SUCCESS) {
+
DBUG_RETURN(
convert_error_code_to_mysql(
error, 0, thd));
@@ -15071,28 +16120,24 @@ ha_innobase::external_lock(
}
trx->n_mysql_tables_in_use++;
- prebuilt->mysql_has_locked = TRUE;
+ m_mysql_has_locked = true;
if (!trx_is_started(trx)
- && (prebuilt->select_lock_type != LOCK_NONE
- || prebuilt->stored_select_lock_type != LOCK_NONE)) {
+ && (m_prebuilt->select_lock_type != LOCK_NONE
+ || m_prebuilt->stored_select_lock_type != LOCK_NONE)) {
++trx->will_lock;
}
DBUG_RETURN(0);
+ } else {
+ DEBUG_SYNC_C("ha_innobase_end_statement");
}
/* MySQL is releasing a table lock */
trx->n_mysql_tables_in_use--;
- prebuilt->mysql_has_locked = FALSE;
-
- /* Release a possible FIFO ticket and search latch. Since we
- may reserve the trx_sys->mutex, we have to release the search
- system latch first to obey the latching order. */
-
- trx_search_latch_release_if_reserved(trx);
+ m_mysql_has_locked = false;
innobase_srv_conc_force_exit_innodb(trx);
@@ -15102,28 +16147,31 @@ ha_innobase::external_lock(
if (trx->n_mysql_tables_in_use == 0) {
trx->mysql_n_tables_locked = 0;
- prebuilt->used_in_HANDLER = FALSE;
+ m_prebuilt->used_in_HANDLER = FALSE;
if (!thd_test_options(
thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)) {
if (trx_is_started(trx)) {
+
innobase_commit(ht, thd, TRUE);
}
} else if (trx->isolation_level <= TRX_ISO_READ_COMMITTED
- && trx->global_read_view) {
+ && MVCC::is_view_active(trx->read_view)) {
- /* At low transaction isolation levels we let
- each consistent read set its own snapshot */
+ mutex_enter(&trx_sys->mutex);
+
+ trx_sys->mvcc->view_close(trx->read_view, true);
- read_view_close_for_mysql(trx);
+ mutex_exit(&trx_sys->mutex);
}
}
if (!trx_is_started(trx)
- && (prebuilt->select_lock_type != LOCK_NONE
- || prebuilt->stored_select_lock_type != LOCK_NONE)) {
+ && lock_type != F_UNLCK
+ && (m_prebuilt->select_lock_type != LOCK_NONE
+ || m_prebuilt->stored_select_lock_type != LOCK_NONE)) {
++trx->will_lock;
}
@@ -15131,100 +16179,6 @@ ha_innobase::external_lock(
DBUG_RETURN(0);
}
-/******************************************************************//**
-With this function MySQL request a transactional lock to a table when
-user issued query LOCK TABLES..WHERE ENGINE = InnoDB.
-@return error code */
-UNIV_INTERN
-int
-ha_innobase::transactional_table_lock(
-/*==================================*/
- THD* thd, /*!< in: handle to the user thread */
- int lock_type) /*!< in: lock type */
-{
- trx_t* trx;
-
- DBUG_ENTER("ha_innobase::transactional_table_lock");
- DBUG_PRINT("enter",("lock_type: %d", lock_type));
-
- /* We do not know if MySQL can call this function before calling
- external_lock(). To be safe, update the thd of the current table
- handle. */
-
- update_thd(thd);
-
- if (!thd_tablespace_op(thd)) {
-
- if (dict_table_is_discarded(prebuilt->table)) {
-
- ib_senderrf(
- thd, IB_LOG_LEVEL_ERROR,
- ER_TABLESPACE_DISCARDED,
- table->s->table_name.str);
-
- } else if (!prebuilt->table->is_readable()) {
-
- ib_senderrf(
- thd, IB_LOG_LEVEL_ERROR,
- ER_TABLESPACE_MISSING,
- table->s->table_name.str);
- }
-
- DBUG_RETURN(HA_ERR_CRASHED);
- }
-
- trx = prebuilt->trx;
-
- prebuilt->sql_stat_start = TRUE;
- prebuilt->hint_need_to_fetch_extra_cols = 0;
-
- reset_template();
-
- if (lock_type == F_WRLCK) {
- prebuilt->select_lock_type = LOCK_X;
- prebuilt->stored_select_lock_type = LOCK_X;
- } else if (lock_type == F_RDLCK) {
- prebuilt->select_lock_type = LOCK_S;
- prebuilt->stored_select_lock_type = LOCK_S;
- } else {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "MySQL is trying to set transactional table lock "
- "with corrupted lock type to table %s, lock type "
- "%d does not exist.",
- table->s->table_name.str, lock_type);
-
- DBUG_RETURN(HA_ERR_CRASHED);
- }
-
- /* MySQL is setting a new transactional table lock */
-
- innobase_register_trx(ht, thd, trx);
-
- if (THDVAR(thd, table_locks) && thd_in_lock_tables(thd)) {
- dberr_t error;
-
- error = row_lock_table_for_mysql(prebuilt, NULL, 0);
-
- if (error != DB_SUCCESS) {
- DBUG_RETURN(
- convert_error_code_to_mysql(
- error, prebuilt->table->flags, thd));
- }
-
- if (thd_test_options(
- thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)) {
-
- /* Store the current undo_no of the transaction
- so that we know where to roll back if we have
- to roll back the next SQL statement */
-
- trx_mark_sql_stat_end(trx);
- }
- }
-
- DBUG_RETURN(0);
-}
-
/************************************************************************//**
Here we export InnoDB status variables to MySQL. */
static
@@ -15232,7 +16186,7 @@ void
innodb_export_status()
/*==================*/
{
- if (innodb_inited) {
+ if (srv_was_started) {
srv_export_innodb_status();
}
}
@@ -15249,7 +16203,6 @@ innodb_show_status(
THD* thd, /*!< in: the MySQL query thread of the caller */
stat_print_fn* stat_print)
{
- trx_t* trx;
static const char truncated_msg[] = "... truncated...\n";
const long MAX_STATUS_SIZE = 1048576;
ulint trx_list_start = ULINT_UNDEFINED;
@@ -15266,9 +16219,9 @@ innodb_show_status(
DBUG_RETURN(0);
}
- trx = check_trx_exists(thd);
+ srv_wake_purge_thread_if_not_active();
- trx_search_latch_release_if_reserved(trx);
+ trx_t* trx = check_trx_exists(thd);
innobase_srv_conc_force_exit_innodb(trx);
@@ -15276,7 +16229,7 @@ innodb_show_status(
bytes of text. */
char* str;
- ssize_t flen, usable_len;
+ ssize_t flen;
mutex_enter(&srv_monitor_file_mutex);
rewind(srv_monitor_file);
@@ -15290,6 +16243,8 @@ innodb_show_status(
flen = 0;
}
+ ssize_t usable_len;
+
if (flen > MAX_STATUS_SIZE) {
usable_len = MAX_STATUS_SIZE;
srv_truncated_status_writes++;
@@ -15300,7 +16255,8 @@ innodb_show_status(
/* allocate buffer for the string, and
read the contents of the temporary file */
- if (!(str = (char*) my_malloc(usable_len + 1, MYF(0)))) {
+ if (!(str = (char*) my_malloc(//PSI_INSTRUMENT_ME,
+ usable_len + 1, MYF(0)))) {
mutex_exit(&srv_monitor_file_mutex);
DBUG_RETURN(1);
}
@@ -15342,161 +16298,337 @@ innodb_show_status(
DBUG_RETURN(ret_val);
}
-/************************************************************************//**
-Implements the SHOW MUTEX STATUS command.
-@return 0 on success. */
-static
-int
-innodb_mutex_show_status(
-/*=====================*/
- handlerton* hton, /*!< in: the innodb handlerton */
- THD* thd, /*!< in: the MySQL query thread of the
- caller */
- stat_print_fn* stat_print) /*!< in: function for printing
- statistics */
-{
- char buf1[IO_SIZE];
- char buf2[IO_SIZE];
- ib_mutex_t* mutex;
- rw_lock_t* lock;
- ulint block_mutex_oswait_count = 0;
- ulint block_lock_oswait_count = 0;
- ib_mutex_t* block_mutex = NULL;
- rw_lock_t* block_lock = NULL;
-#ifdef UNIV_DEBUG
- ulint rw_lock_count= 0;
- ulint rw_lock_count_spin_loop= 0;
- ulint rw_lock_count_spin_rounds= 0;
- ulint rw_lock_count_os_wait= 0;
- ulint rw_lock_count_os_yield= 0;
- ulonglong rw_lock_wait_time= 0;
-#endif /* UNIV_DEBUG */
- uint buf1len;
- uint buf2len;
- uint hton_name_len;
+/** Callback for collecting mutex statistics */
+struct ShowStatus {
+
+ /** For tracking the mutex metrics */
+ struct Value {
+
+ /** Constructor
+ @param[in] name Name of the mutex
+ @param[in] spins Number of spins
+ @param[in] os_waits OS waits so far
+ @param[in] calls Number of calls to enter() */
+ Value(const char* name,
+ ulint spins,
+ uint64_t waits,
+ uint64_t calls)
+ :
+ m_name(name),
+ m_spins(spins),
+ m_waits(waits),
+ m_calls(calls)
+ {
+ /* No op */
+ }
- hton_name_len = (uint) strlen(innobase_hton_name);
+ /** Mutex name */
+ std::string m_name;
- DBUG_ENTER("innodb_mutex_show_status");
- DBUG_ASSERT(hton == innodb_hton_ptr);
+ /** Spins so far */
+ ulint m_spins;
- mutex_enter(&mutex_list_mutex);
+ /** Waits so far */
+ uint64_t m_waits;
- for (mutex = UT_LIST_GET_FIRST(mutex_list); mutex != NULL;
- mutex = UT_LIST_GET_NEXT(list, mutex)) {
- if (mutex->count_os_wait == 0) {
- continue;
+ /** Number of calls so far */
+ uint64_t m_calls;
+ };
+
+ /** Order by m_waits, in descending order. */
+ struct OrderByWaits: public std::binary_function<Value, Value, bool>
+ {
+ /** @return true if rhs < lhs */
+ bool operator()(
+ const Value& lhs,
+ const Value& rhs) const
+ UNIV_NOTHROW
+ {
+ return(rhs.m_waits < lhs.m_waits);
}
+ };
- if (buf_pool_is_block_mutex(mutex)) {
- block_mutex = mutex;
- block_mutex_oswait_count += mutex->count_os_wait;
- continue;
+ typedef std::vector<Value, ut_allocator<Value> > Values;
+
+ /** Collect the individual latch counts */
+ struct GetCount {
+ typedef latch_meta_t::CounterType::Count Count;
+
+ /** Constructor
+ @param[in] name Latch name
+ @param[in,out] values Put the values here */
+ GetCount(
+ const char* name,
+ Values* values)
+ UNIV_NOTHROW
+ :
+ m_name(name),
+ m_values(values)
+ {
+ /* No op */
}
- buf1len= (uint) my_snprintf(buf1, sizeof(buf1), "%s:%lu",
- innobase_basename(mutex->cfile_name),
- (ulong) mutex->cline);
- buf2len= (uint) my_snprintf(buf2, sizeof(buf2), "os_waits=%lu",
- (ulong) mutex->count_os_wait);
+ /** Collect the latch metrics. Ignore entries where the
+ spins and waits are zero.
+ @param[in] count The latch metrics */
+ void operator()(Count* count)
+ UNIV_NOTHROW
+ {
+ if (count->m_spins > 0 || count->m_waits > 0) {
- if (stat_print(thd, innobase_hton_name,
- hton_name_len, buf1, buf1len,
- buf2, buf2len)) {
- mutex_exit(&mutex_list_mutex);
- DBUG_RETURN(1);
+ m_values->push_back(Value(
+ m_name,
+ count->m_spins,
+ count->m_waits,
+ count->m_calls));
+ }
}
+
+ /** The latch name */
+ const char* m_name;
+
+ /** For collecting the active mutex stats. */
+ Values* m_values;
+ };
+
+ /** Constructor */
+ ShowStatus() { }
+
+ /** Callback for collecting the stats
+ @param[in] latch_meta Latch meta data
+ @return always returns true */
+ bool operator()(latch_meta_t& latch_meta)
+ UNIV_NOTHROW
+ {
+ latch_meta_t::CounterType* counter;
+
+ counter = latch_meta.get_counter();
+
+ GetCount get_count(latch_meta.get_name(), &m_values);
+
+ counter->iterate(get_count);
+
+ return(true);
}
- if (block_mutex) {
- buf1len = (uint) my_snprintf(buf1, sizeof buf1,
- "combined %s:%lu",
- innobase_basename(
- block_mutex->cfile_name),
- (ulong) block_mutex->cline);
- buf2len = (uint) my_snprintf(buf2, sizeof buf2,
- "os_waits=%lu",
- (ulong) block_mutex_oswait_count);
+ /** Implements the SHOW MUTEX STATUS command, for mutexes.
+ The table structure is like so: Engine | Mutex Name | Status
+ We store the metrics in the "Status" column as:
+
+ spins=N,waits=N,calls=N"
+
+ The user has to parse the dataunfortunately
+ @param[in,out] hton the innodb handlerton
+ @param[in,out] thd the MySQL query thread of the caller
+ @param[in,out] stat_print function for printing statistics
+ @return true on success. */
+ bool to_string(
+ handlerton* hton,
+ THD* thd,
+ stat_print_fn* stat_print)
+ UNIV_NOTHROW;
+
+ /** For collecting the active mutex stats. */
+ Values m_values;
+};
+
+/** Implements the SHOW MUTEX STATUS command, for mutexes.
+The table structure is like so: Engine | Mutex Name | Status
+We store the metrics in the "Status" column as:
+
+ spins=N,waits=N,calls=N"
+
+The user has to parse the dataunfortunately
+@param[in,out] hton the innodb handlerton
+@param[in,out] thd the MySQL query thread of the caller
+@param[in,out] stat_print function for printing statistics
+@return true on success. */
+bool
+ShowStatus::to_string(
+ handlerton* hton,
+ THD* thd,
+ stat_print_fn* stat_print)
+ UNIV_NOTHROW
+{
+ uint hton_name_len = (uint) strlen(innobase_hton_name);
+
+ std::sort(m_values.begin(), m_values.end(), OrderByWaits());
+
+ Values::iterator end = m_values.end();
+
+ for (Values::iterator it = m_values.begin(); it != end; ++it) {
+
+ int name_len;
+ char name_buf[IO_SIZE];
+
+ name_len = snprintf(
+ name_buf, sizeof(name_buf), "%s", it->m_name.c_str());
+
+ int status_len;
+ char status_buf[IO_SIZE];
+
+ status_len = snprintf(
+ status_buf, sizeof(status_buf),
+ "spins=%lu,waits=%lu,calls=%llu",
+ static_cast<ulong>(it->m_spins),
+ static_cast<long>(it->m_waits),
+ (ulonglong) it->m_calls);
if (stat_print(thd, innobase_hton_name,
- hton_name_len, buf1, buf1len,
- buf2, buf2len)) {
- mutex_exit(&mutex_list_mutex);
- DBUG_RETURN(1);
+ hton_name_len,
+ name_buf, static_cast<uint>(name_len),
+ status_buf, static_cast<uint>(status_len))) {
+
+ return(false);
}
}
- mutex_exit(&mutex_list_mutex);
+ return(true);
+}
+
+/** Implements the SHOW MUTEX STATUS command, for mutexes.
+@param[in,out] hton the innodb handlerton
+@param[in,out] thd the MySQL query thread of the caller
+@param[in,out] stat_print function for printing statistics
+@return 0 on success. */
+static
+int
+innodb_show_mutex_status(
+ handlerton* hton,
+ THD* thd,
+ stat_print_fn* stat_print)
+{
+ DBUG_ENTER("innodb_show_mutex_status");
+
+ ShowStatus collector;
+
+ DBUG_ASSERT(hton == innodb_hton_ptr);
+
+ mutex_monitor.iterate(collector);
+
+ if (!collector.to_string(hton, thd, stat_print)) {
+ DBUG_RETURN(1);
+ }
+
+ DBUG_RETURN(0);
+}
+
+/** Implements the SHOW MUTEX STATUS command.
+@param[in,out] hton the innodb handlerton
+@param[in,out] thd the MySQL query thread of the caller
+@param[in,out] stat_print function for printing statistics
+@return 0 on success. */
+static
+int
+innodb_show_rwlock_status(
+ handlerton* hton,
+ THD* thd,
+ stat_print_fn* stat_print)
+{
+ DBUG_ENTER("innodb_show_rwlock_status");
+
+ rw_lock_t* block_rwlock = NULL;
+ ulint block_rwlock_oswait_count = 0;
+ uint hton_name_len = (uint) strlen(innobase_hton_name);
+
+ DBUG_ASSERT(hton == innodb_hton_ptr);
mutex_enter(&rw_lock_list_mutex);
- for (lock = UT_LIST_GET_FIRST(rw_lock_list); lock != NULL;
- lock = UT_LIST_GET_NEXT(list, lock)) {
- if (lock->count_os_wait == 0) {
+ for (rw_lock_t* rw_lock = UT_LIST_GET_FIRST(rw_lock_list);
+ rw_lock != NULL;
+ rw_lock = UT_LIST_GET_NEXT(list, rw_lock)) {
+
+ if (rw_lock->count_os_wait == 0) {
continue;
}
- if (buf_pool_is_block_lock(lock)) {
- block_lock = lock;
- block_lock_oswait_count += lock->count_os_wait;
+ int buf1len;
+ char buf1[IO_SIZE];
+
+ if (rw_lock->is_block_lock) {
+
+ block_rwlock = rw_lock;
+ block_rwlock_oswait_count += rw_lock->count_os_wait;
+
continue;
}
- buf1len = (uint) my_snprintf(
- buf1, sizeof buf1, "%s:%lu",
- innobase_basename(lock->cfile_name),
- static_cast<ulong>(lock->cline));
- buf2len = (uint) my_snprintf(
- buf2, sizeof buf2, "os_waits=%lu",
- static_cast<ulong>(lock->count_os_wait));
+ buf1len = snprintf(
+ buf1, sizeof buf1, "rwlock: %s:%u",
+ innobase_basename(rw_lock->cfile_name),
+ rw_lock->cline);
+
+ int buf2len;
+ char buf2[IO_SIZE];
+
+ buf2len = snprintf(
+ buf2, sizeof buf2, "waits=%u",
+ rw_lock->count_os_wait);
if (stat_print(thd, innobase_hton_name,
- hton_name_len, buf1, buf1len,
- buf2, buf2len)) {
+ hton_name_len,
+ buf1, static_cast<uint>(buf1len),
+ buf2, static_cast<uint>(buf2len))) {
+
mutex_exit(&rw_lock_list_mutex);
+
DBUG_RETURN(1);
}
}
- if (block_lock) {
- buf1len = (uint) my_snprintf(buf1, sizeof buf1,
- "combined %s:%lu",
- innobase_basename(
- block_lock->cfile_name),
- (ulong) block_lock->cline);
- buf2len = (uint) my_snprintf(buf2, sizeof buf2,
- "os_waits=%lu",
- (ulong) block_lock_oswait_count);
+ if (block_rwlock != NULL) {
+
+ int buf1len;
+ char buf1[IO_SIZE];
+
+ buf1len = snprintf(
+ buf1, sizeof buf1, "sum rwlock: %s:%u",
+ innobase_basename(block_rwlock->cfile_name),
+ block_rwlock->cline);
+
+ int buf2len;
+ char buf2[IO_SIZE];
+
+ buf2len = snprintf(
+ buf2, sizeof buf2, "waits=" ULINTPF,
+ block_rwlock_oswait_count);
if (stat_print(thd, innobase_hton_name,
- hton_name_len, buf1, buf1len,
- buf2, buf2len)) {
+ hton_name_len,
+ buf1, static_cast<uint>(buf1len),
+ buf2, static_cast<uint>(buf2len))) {
+
mutex_exit(&rw_lock_list_mutex);
+
DBUG_RETURN(1);
}
}
mutex_exit(&rw_lock_list_mutex);
-#ifdef UNIV_DEBUG
- buf2len = static_cast<uint>(my_snprintf(buf2, sizeof buf2,
- "count=%lu, spin_waits=%lu, spin_rounds=%lu, "
- "os_waits=%lu, os_yields=%lu, os_wait_times=%lu",
- (ulong) rw_lock_count,
- (ulong) rw_lock_count_spin_loop,
- (ulong) rw_lock_count_spin_rounds,
- (ulong) rw_lock_count_os_wait,
- (ulong) rw_lock_count_os_yield,
- (ulong) (rw_lock_wait_time / 1000)));
-
- if (stat_print(thd, innobase_hton_name, hton_name_len,
- STRING_WITH_LEN("rw_lock_mutexes"), buf2, buf2len)) {
- DBUG_RETURN(1);
+ DBUG_RETURN(0);
+}
+
+/** Implements the SHOW MUTEX STATUS command.
+@param[in,out] hton the innodb handlerton
+@param[in,out] thd the MySQL query thread of the caller
+@param[in,out] stat_print function for printing statistics
+@return 0 on success. */
+static
+int
+innodb_show_latch_status(
+ handlerton* hton,
+ THD* thd,
+ stat_print_fn* stat_print)
+{
+ int ret = innodb_show_mutex_status(hton, thd, stat_print);
+
+ if (ret != 0) {
+ return(ret);
}
-#endif /* UNIV_DEBUG */
- /* Success */
- DBUG_RETURN(0);
+ return(innodb_show_rwlock_status(hton, thd, stat_print));
}
/************************************************************************//**
@@ -15520,8 +16652,7 @@ innobase_show_status(
return(innodb_show_status(hton, thd, stat_print) != 0);
case HA_ENGINE_MUTEX:
- /* Non-zero return value means there was an error. */
- return(innodb_mutex_show_status(hton, thd, stat_print) != 0);
+ return(innodb_show_latch_status(hton, thd, stat_print) != 0);
case HA_ENGINE_LOGS:
/* Not handled */
@@ -15531,128 +16662,54 @@ innobase_show_status(
/* Success */
return(false);
}
+/*********************************************************************//**
+Returns number of THR_LOCK locks used for one instance of InnoDB table.
+InnoDB no longer relies on THR_LOCK locks so 0 value is returned.
+Instead of THR_LOCK locks InnoDB relies on combination of metadata locks
+(e.g. for LOCK TABLES and DDL) and its own locking subsystem.
+Note that even though this method returns 0, SQL-layer still calls
+::store_lock(), ::start_stmt() and ::external_lock() methods for InnoDB
+tables. */
-/************************************************************************//**
-Handling the shared INNOBASE_SHARE structure that is needed to provide table
-locking. Register the table name if it doesn't exist in the hash table. */
-static
-INNOBASE_SHARE*
-get_share(
-/*======*/
- const char* table_name)
-{
- INNOBASE_SHARE* share;
-
- mysql_mutex_lock(&innobase_share_mutex);
-
- ulint fold = ut_fold_string(table_name);
-
- HASH_SEARCH(table_name_hash, innobase_open_tables, fold,
- INNOBASE_SHARE*, share,
- ut_ad(share->use_count > 0),
- !strcmp(share->table_name, table_name));
-
- if (!share) {
-
- uint length = (uint) strlen(table_name);
-
- /* TODO: invoke HASH_MIGRATE if innobase_open_tables
- grows too big */
-
- share = (INNOBASE_SHARE*) my_malloc(sizeof(*share)+length+1,
- MYF(MY_FAE | MY_ZEROFILL));
-
- share->table_name = (char*) memcpy(share + 1,
- table_name, length + 1);
-
- HASH_INSERT(INNOBASE_SHARE, table_name_hash,
- innobase_open_tables, fold, share);
-
- thr_lock_init(&share->lock);
-
- /* Index translation table initialization */
- share->idx_trans_tbl.index_mapping = NULL;
- share->idx_trans_tbl.index_count = 0;
- share->idx_trans_tbl.array_size = 0;
- }
-
- share->use_count++;
- mysql_mutex_unlock(&innobase_share_mutex);
-
- return(share);
-}
-
-/************************************************************************//**
-Free the shared object that was registered with get_share(). */
-static
-void
-free_share(
-/*=======*/
- INNOBASE_SHARE* share) /*!< in/own: table share to free */
+uint
+ha_innobase::lock_count(void) const
+/*===============================*/
{
- mysql_mutex_lock(&innobase_share_mutex);
-
-#ifdef UNIV_DEBUG
- INNOBASE_SHARE* share2;
- ulint fold = ut_fold_string(share->table_name);
-
- HASH_SEARCH(table_name_hash, innobase_open_tables, fold,
- INNOBASE_SHARE*, share2,
- ut_ad(share->use_count > 0),
- !strcmp(share->table_name, share2->table_name));
-
- ut_a(share2 == share);
-#endif /* UNIV_DEBUG */
-
- if (!--share->use_count) {
- ulint fold = ut_fold_string(share->table_name);
-
- HASH_DELETE(INNOBASE_SHARE, table_name_hash,
- innobase_open_tables, fold, share);
- thr_lock_delete(&share->lock);
-
- /* Free any memory from index translation table */
- my_free(share->idx_trans_tbl.index_mapping);
-
- my_free(share);
-
- /* TODO: invoke HASH_MIGRATE if innobase_open_tables
- shrinks too much */
- }
-
- mysql_mutex_unlock(&innobase_share_mutex);
+ return 0;
}
/*****************************************************************//**
-Converts a MySQL table lock stored in the 'lock' field of the handle to
-a proper type before storing pointer to the lock into an array of pointers.
+Supposed to convert a MySQL table lock stored in the 'lock' field of the
+handle to a proper type before storing pointer to the lock into an array
+of pointers.
+In practice, since InnoDB no longer relies on THR_LOCK locks and its
+lock_count() method returns 0 it just informs storage engine about type
+of THR_LOCK which SQL-layer would have acquired for this specific statement
+on this specific table.
MySQL also calls this if it wants to reset some table locks to a not-locked
state during the processing of an SQL query. An example is that during a
SELECT the read lock is released early on the 'const' tables where we only
fetch one row. MySQL does not call this when it releases all locks at the
end of an SQL statement.
-@return pointer to the next element in the 'to' array */
-UNIV_INTERN
+@return pointer to the current element in the 'to' array. */
+
THR_LOCK_DATA**
ha_innobase::store_lock(
/*====================*/
THD* thd, /*!< in: user thread handle */
- THR_LOCK_DATA** to, /*!< in: pointer to an array
- of pointers to lock structs;
- pointer to the 'lock' field
- of current handle is stored
- next to this array */
- enum thr_lock_type lock_type) /*!< in: lock type to store in
+ THR_LOCK_DATA** to, /*!< in: pointer to the current
+ element in an array of pointers
+ to lock structs;
+ only used as return value */
+ thr_lock_type lock_type) /*!< in: lock type to store in
'lock'; this may also be
TL_IGNORE */
{
- trx_t* trx;
-
- /* Note that trx in this function is NOT necessarily prebuilt->trx
+ /* Note that trx in this function is NOT necessarily m_prebuilt->trx
because we call update_thd() later, in ::external_lock()! Failure to
understand this caused a serious memory corruption bug in 5.1.11. */
- trx = check_trx_exists(thd);
+ trx_t* trx = check_trx_exists(thd);
/* NOTE: MySQL can call this function with lock 'type' TL_IGNORE!
Be careful to ignore TL_IGNORE if we are going to do something with
@@ -15667,12 +16724,16 @@ ha_innobase::store_lock(
(enum_tx_isolation) thd_tx_isolation(thd));
if (trx->isolation_level <= TRX_ISO_READ_COMMITTED
- && trx->global_read_view) {
+ && MVCC::is_view_active(trx->read_view)) {
/* At low transaction isolation levels we let
each consistent read set its own snapshot */
- read_view_close_for_mysql(trx);
+ mutex_enter(&trx_sys->mutex);
+
+ trx_sys->mvcc->view_close(trx->read_view, true);
+
+ mutex_exit(&trx_sys->mutex);
}
}
@@ -15708,16 +16769,16 @@ ha_innobase::store_lock(
detected in the function. */
dberr_t err = row_quiesce_set_state(
- prebuilt->table, QUIESCE_START, trx);
+ m_prebuilt->table, QUIESCE_START, trx);
ut_a(err == DB_SUCCESS || err == DB_UNSUPPORTED);
if (trx->isolation_level == TRX_ISO_SERIALIZABLE) {
- prebuilt->select_lock_type = LOCK_S;
- prebuilt->stored_select_lock_type = LOCK_S;
+ m_prebuilt->select_lock_type = LOCK_S;
+ m_prebuilt->stored_select_lock_type = LOCK_S;
} else {
- prebuilt->select_lock_type = LOCK_NONE;
- prebuilt->stored_select_lock_type = LOCK_NONE;
+ m_prebuilt->select_lock_type = LOCK_NONE;
+ m_prebuilt->stored_select_lock_type = LOCK_NONE;
}
/* Check for DROP TABLE */
@@ -15725,7 +16786,7 @@ ha_innobase::store_lock(
/* MySQL calls this function in DROP TABLE though this table
handle may belong to another thd that is running a query. Let
- us in that case skip any changes to the prebuilt struct. */
+ us in that case skip any changes to the m_prebuilt struct. */
/* Check for LOCK TABLE t1,...,tn WITH SHARED LOCKS */
} else if ((lock_type == TL_READ && in_lock_tables)
@@ -15756,6 +16817,7 @@ ha_innobase::store_lock(
/* Use consistent read for checksum table */
if (sql_command == SQLCOM_CHECKSUM
+ || (sql_command == SQLCOM_ANALYZE && lock_type == TL_READ)
|| ((srv_locks_unsafe_for_binlog
|| trx->isolation_level <= TRX_ISO_READ_COMMITTED)
&& trx->isolation_level != TRX_ISO_SERIALIZABLE
@@ -15776,11 +16838,11 @@ ha_innobase::store_lock(
MODE in select, then we use consistent read
for select. */
- prebuilt->select_lock_type = LOCK_NONE;
- prebuilt->stored_select_lock_type = LOCK_NONE;
+ m_prebuilt->select_lock_type = LOCK_NONE;
+ m_prebuilt->stored_select_lock_type = LOCK_NONE;
} else {
- prebuilt->select_lock_type = LOCK_S;
- prebuilt->stored_select_lock_type = LOCK_S;
+ m_prebuilt->select_lock_type = LOCK_S;
+ m_prebuilt->stored_select_lock_type = LOCK_S;
}
} else if (lock_type != TL_IGNORE) {
@@ -15788,82 +16850,13 @@ ha_innobase::store_lock(
/* We set possible LOCK_X value in external_lock, not yet
here even if this would be SELECT ... FOR UPDATE */
- prebuilt->select_lock_type = LOCK_NONE;
- prebuilt->stored_select_lock_type = LOCK_NONE;
+ m_prebuilt->select_lock_type = LOCK_NONE;
+ m_prebuilt->stored_select_lock_type = LOCK_NONE;
}
- if (lock_type != TL_IGNORE && lock.type == TL_UNLOCK) {
-
- /* Starting from 5.0.7, we weaken also the table locks
- set at the start of a MySQL stored procedure call, just like
- we weaken the locks set at the start of an SQL statement.
- MySQL does set in_lock_tables TRUE there, but in reality
- we do not need table locks to make the execution of a
- single transaction stored procedure call deterministic
- (if it does not use a consistent read). */
-
- if (lock_type == TL_READ
- && sql_command == SQLCOM_LOCK_TABLES) {
- /* We come here if MySQL is processing LOCK TABLES
- ... READ LOCAL. MyISAM under that table lock type
- reads the table as it was at the time the lock was
- granted (new inserts are allowed, but not seen by the
- reader). To get a similar effect on an InnoDB table,
- we must use LOCK TABLES ... READ. We convert the lock
- type here, so that for InnoDB, READ LOCAL is
- equivalent to READ. This will change the InnoDB
- behavior in mysqldump, so that dumps of InnoDB tables
- are consistent with dumps of MyISAM tables. */
-
- lock_type = TL_READ_NO_INSERT;
- }
-
- /* If we are not doing a LOCK TABLE, DISCARD/IMPORT
- TABLESPACE or TRUNCATE TABLE then allow multiple
- writers. Note that ALTER TABLE uses a TL_WRITE_ALLOW_READ
- < TL_WRITE_CONCURRENT_INSERT.
-
- We especially allow multiple writers if MySQL is at the
- start of a stored procedure call (SQLCOM_CALL) or a
- stored function call (MySQL does have in_lock_tables
- TRUE there). */
-
- if ((lock_type >= TL_WRITE_CONCURRENT_INSERT
- && lock_type <= TL_WRITE)
- && !(in_lock_tables
- && sql_command == SQLCOM_LOCK_TABLES)
- && !thd_tablespace_op(thd)
- && sql_command != SQLCOM_TRUNCATE
- && sql_command != SQLCOM_OPTIMIZE
- && sql_command != SQLCOM_CREATE_TABLE) {
-
- lock_type = TL_WRITE_ALLOW_WRITE;
- }
-
- /* In queries of type INSERT INTO t1 SELECT ... FROM t2 ...
- MySQL would use the lock TL_READ_NO_INSERT on t2, and that
- would conflict with TL_WRITE_ALLOW_WRITE, blocking all inserts
- to t2. Convert the lock to a normal read lock to allow
- concurrent inserts to t2.
-
- We especially allow concurrent inserts if MySQL is at the
- start of a stored procedure call (SQLCOM_CALL)
- (MySQL does have thd_in_lock_tables() TRUE there). */
-
- if (lock_type == TL_READ_NO_INSERT
- && sql_command != SQLCOM_LOCK_TABLES) {
-
- lock_type = TL_READ;
- }
-
- lock.type = lock_type;
- }
-
- *to++= &lock;
-
if (!trx_is_started(trx)
- && (prebuilt->select_lock_type != LOCK_NONE
- || prebuilt->stored_select_lock_type != LOCK_NONE)) {
+ && (m_prebuilt->select_lock_type != LOCK_NONE
+ || m_prebuilt->stored_select_lock_type != LOCK_NONE)) {
++trx->will_lock;
}
@@ -15875,8 +16868,8 @@ ha_innobase::store_lock(
Read the next autoinc value. Acquire the relevant locks before reading
the AUTOINC value. If SUCCESS then the table AUTOINC mutex will be locked
on return and all relevant locks acquired.
-@return DB_SUCCESS or error code */
-UNIV_INTERN
+@return DB_SUCCESS or error code */
+
dberr_t
ha_innobase::innobase_get_autoinc(
/*==============================*/
@@ -15884,28 +16877,28 @@ ha_innobase::innobase_get_autoinc(
{
*value = 0;
- prebuilt->autoinc_error = innobase_lock_autoinc();
+ m_prebuilt->autoinc_error = innobase_lock_autoinc();
- if (prebuilt->autoinc_error == DB_SUCCESS) {
+ if (m_prebuilt->autoinc_error == DB_SUCCESS) {
/* Determine the first value of the interval */
- *value = dict_table_autoinc_read(prebuilt->table);
+ *value = dict_table_autoinc_read(m_prebuilt->table);
/* It should have been initialized during open. */
if (*value == 0) {
- prebuilt->autoinc_error = DB_UNSUPPORTED;
- dict_table_autoinc_unlock(prebuilt->table);
+ m_prebuilt->autoinc_error = DB_UNSUPPORTED;
+ dict_table_autoinc_unlock(m_prebuilt->table);
}
}
- return(prebuilt->autoinc_error);
+ return(m_prebuilt->autoinc_error);
}
/*******************************************************************//**
This function reads the global auto-inc counter. It doesn't use the
AUTOINC lock even if the lock mode is set to TRADITIONAL.
-@return the autoinc value */
-UNIV_INTERN
+@return the autoinc value */
+
ulonglong
ha_innobase::innobase_peek_autoinc(void)
/*====================================*/
@@ -15913,19 +16906,18 @@ ha_innobase::innobase_peek_autoinc(void)
ulonglong auto_inc;
dict_table_t* innodb_table;
- ut_a(prebuilt != NULL);
- ut_a(prebuilt->table != NULL);
+ ut_a(m_prebuilt != NULL);
+ ut_a(m_prebuilt->table != NULL);
- innodb_table = prebuilt->table;
+ innodb_table = m_prebuilt->table;
dict_table_autoinc_lock(innodb_table);
auto_inc = dict_table_autoinc_read(innodb_table);
if (auto_inc == 0) {
- ut_print_timestamp(stderr);
- fprintf(stderr, " InnoDB: AUTOINC next value generation "
- "is disabled for '%s'\n", innodb_table->name);
+ ib::info() << "AUTOINC next value generation is disabled for"
+ " '" << innodb_table->name << "'";
}
dict_table_autoinc_unlock(innodb_table);
@@ -15935,7 +16927,7 @@ ha_innobase::innobase_peek_autoinc(void)
/*********************************************************************//**
Returns the value of the auto-inc counter in *first_value and ~0 on failure. */
-UNIV_INTERN
+
void
ha_innobase::get_auto_increment(
/*============================*/
@@ -15952,7 +16944,7 @@ ha_innobase::get_auto_increment(
dberr_t error;
ulonglong autoinc = 0;
- /* Prepare prebuilt->trx in the table handle */
+ /* Prepare m_prebuilt->trx in the table handle */
update_thd(ha_thd());
error = innobase_get_autoinc(&autoinc);
@@ -15970,7 +16962,7 @@ ha_innobase::get_auto_increment(
called and count down from that as rows are written (see write_row()).
*/
- trx = prebuilt->trx;
+ trx = m_prebuilt->trx;
/* Note: We can't rely on *first_value since some MySQL engines,
in particular the partition engine, don't initialize it to 0 when
@@ -15979,8 +16971,8 @@ ha_innobase::get_auto_increment(
/* We need the upper limit of the col type to check for
whether we update the table autoinc counter or not. */
- ulonglong col_max_value = innobase_get_int_col_max_value(
- table->next_number_field);
+ ulonglong col_max_value =
+ table->next_number_field->get_max_int_value();
/** The following logic is needed to avoid duplicate key error
for autoincrement column.
@@ -15993,7 +16985,7 @@ ha_innobase::get_auto_increment(
(3) It is restricted only for insert operations. */
- if (increment > 1 && thd_sql_command(user_thd) != SQLCOM_ALTER_TABLE
+ if (increment > 1 && thd_sql_command(m_user_thd) != SQLCOM_ALTER_TABLE
&& autoinc < col_max_value) {
ulonglong prev_auto_inc = autoinc;
@@ -16027,19 +17019,19 @@ ha_innobase::get_auto_increment(
set_if_bigger(*first_value, autoinc);
/* Not in the middle of a mult-row INSERT. */
- } else if (prebuilt->autoinc_last_value == 0) {
+ } else if (m_prebuilt->autoinc_last_value == 0) {
set_if_bigger(*first_value, autoinc);
}
- if (*first_value > col_max_value)
- {
- /* Out of range number. Let handler::update_auto_increment()
- take care of this */
- prebuilt->autoinc_last_value = 0;
- dict_table_autoinc_unlock(prebuilt->table);
- *nb_reserved_values= 0;
- return;
- }
+ if (*first_value > col_max_value) {
+ /* Out of range number. Let handler::update_auto_increment()
+ take care of this */
+ m_prebuilt->autoinc_last_value = 0;
+ dict_table_autoinc_unlock(m_prebuilt->table);
+ *nb_reserved_values= 0;
+ return;
+ }
+
*nb_reserved_values = trx->n_autoinc_rows;
/* With old style AUTOINC locking we only update the table's
@@ -16055,69 +17047,35 @@ ha_innobase::get_auto_increment(
current, *nb_reserved_values, increment, offset,
col_max_value);
- prebuilt->autoinc_last_value = next_value;
+ m_prebuilt->autoinc_last_value = next_value;
- if (prebuilt->autoinc_last_value < *first_value) {
+ if (m_prebuilt->autoinc_last_value < *first_value) {
*first_value = (~(ulonglong) 0);
} else {
/* Update the table autoinc variable */
dict_table_autoinc_update_if_greater(
- prebuilt->table, prebuilt->autoinc_last_value);
+ m_prebuilt->table,
+ m_prebuilt->autoinc_last_value);
}
} else {
/* This will force write_row() into attempting an update
of the table's AUTOINC counter. */
- prebuilt->autoinc_last_value = 0;
+ m_prebuilt->autoinc_last_value = 0;
}
/* The increment to be used to increase the AUTOINC value, we use
this in write_row() and update_row() to increase the autoinc counter
for columns that are filled by the user. We need the offset and
the increment. */
- prebuilt->autoinc_offset = offset;
- prebuilt->autoinc_increment = increment;
+ m_prebuilt->autoinc_offset = offset;
+ m_prebuilt->autoinc_increment = increment;
- dict_table_autoinc_unlock(prebuilt->table);
-}
-
-/*******************************************************************//**
-Reset the auto-increment counter to the given value, i.e. the next row
-inserted will get the given value. This is called e.g. after TRUNCATE
-is emulated by doing a 'DELETE FROM t'. HA_ERR_WRONG_COMMAND is
-returned by storage engines that don't support this operation.
-@return 0 or error code */
-UNIV_INTERN
-int
-ha_innobase::reset_auto_increment(
-/*==============================*/
- ulonglong value) /*!< in: new value for table autoinc */
-{
- DBUG_ENTER("ha_innobase::reset_auto_increment");
-
- dberr_t error;
-
- update_thd(ha_thd());
-
- error = row_lock_table_autoinc_for_mysql(prebuilt);
-
- if (error != DB_SUCCESS) {
- DBUG_RETURN(convert_error_code_to_mysql(
- error, prebuilt->table->flags, user_thd));
- }
-
- /* The next value can never be 0. */
- if (value == 0) {
- value = 1;
- }
-
- innobase_reset_autoinc(value);
-
- DBUG_RETURN(0);
+ dict_table_autoinc_unlock(m_prebuilt->table);
}
/*******************************************************************//**
See comment in handler.cc */
-UNIV_INTERN
+
bool
ha_innobase::get_error_message(
/*===========================*/
@@ -16137,24 +17095,21 @@ ha_innobase::get_error_message(
return(FALSE);
}
-/*******************************************************************//**
- Retrieves the names of the table and the key for which there was a
- duplicate entry in the case of HA_ERR_FOREIGN_DUPLICATE_KEY.
-
- If any of the names is not available, then this method will return
- false and will not change any of child_table_name or child_key_name.
-
- @param child_table_name[out] Table name
- @param child_table_name_len[in] Table name buffer size
- @param child_key_name[out] Key name
- @param child_key_name_len[in] Key name buffer size
-
- @retval true table and key names were available
- and were written into the corresponding
- out parameters.
- @retval false table and key names were not available,
- the out parameters were not touched.
-*/
+/** Retrieves the names of the table and the key for which there was a
+duplicate entry in the case of HA_ERR_FOREIGN_DUPLICATE_KEY.
+
+If any of the names is not available, then this method will return
+false and will not change any of child_table_name or child_key_name.
+
+@param[out] child_table_name Table name
+@param[in] child_table_name_len Table name buffer size
+@param[out] child_key_name Key name
+@param[in] child_key_name_len Key name buffer size
+
+@retval true table and key names were available and were written into the
+corresponding out parameters.
+@retval false table and key names were not available, the out parameters
+were not touched. */
bool
ha_innobase::get_foreign_dup_key(
/*=============================*/
@@ -16165,10 +17120,10 @@ ha_innobase::get_foreign_dup_key(
{
const dict_index_t* err_index;
- ut_a(prebuilt->trx != NULL);
- ut_a(prebuilt->trx->magic_n == TRX_MAGIC_N);
+ ut_a(m_prebuilt->trx != NULL);
+ ut_a(m_prebuilt->trx->magic_n == TRX_MAGIC_N);
- err_index = trx_get_error_info(prebuilt->trx);
+ err_index = trx_get_error_info(m_prebuilt->trx);
if (err_index == NULL) {
return(false);
@@ -16177,20 +17132,24 @@ ha_innobase::get_foreign_dup_key(
/* copy table name (and convert from filename-safe encoding to
system_charset_info) */
- char* p;
- p = strchr(err_index->table->name, '/');
+ char* p = strchr(err_index->table->name.m_name, '/');
+
/* strip ".../" prefix if any */
if (p != NULL) {
p++;
} else {
- p = err_index->table->name;
+ p = err_index->table->name.m_name;
}
- uint len;
+
+ size_t len;
+
len = filename_to_tablename(p, child_table_name, child_table_name_len);
+
child_table_name[len] = '\0';
/* copy index name */
- ut_snprintf(child_key_name, child_key_name_len, "%s", err_index->name);
+ snprintf(child_key_name, child_key_name_len, "%s",
+ err_index->name());
return(true);
}
@@ -16199,8 +17158,8 @@ ha_innobase::get_foreign_dup_key(
Compares two 'refs'. A 'ref' is the (internal) primary key value of the row.
If there is no explicitly declared non-null unique key or a primary key, then
InnoDB internally uses the row id as the primary key.
-@return < 0 if ref1 < ref2, 0 if equal, else > 0 */
-UNIV_INTERN
+@return < 0 if ref1 < ref2, 0 if equal, else > 0 */
+
int
ha_innobase::cmp_ref(
/*=================*/
@@ -16217,7 +17176,7 @@ ha_innobase::cmp_ref(
uint len2;
int result;
- if (prebuilt->clust_index_was_generated) {
+ if (m_prebuilt->clust_index_was_generated) {
/* The 'ref' is an InnoDB row id */
return(memcmp(ref1, ref2, DATA_ROW_ID_LEN));
@@ -16229,7 +17188,7 @@ ha_innobase::cmp_ref(
key_part = table->key_info[table->s->primary_key].key_part;
key_part_end = key_part
- + table->key_info[table->s->primary_key].user_defined_key_parts;
+ + table->key_info[table->s->primary_key].user_defined_key_parts;
for (; key_part != key_part_end; ++key_part) {
field = key_part->field;
@@ -16266,8 +17225,8 @@ ha_innobase::cmp_ref(
/*******************************************************************//**
Ask InnoDB if a query to a table can be cached.
-@return TRUE if query caching of the table is permitted */
-UNIV_INTERN
+@return TRUE if query caching of the table is permitted */
+
my_bool
ha_innobase::register_query_cache_table(
/*====================================*/
@@ -16282,34 +17241,13 @@ ha_innobase::register_query_cache_table(
is permitted */
ulonglong *engine_data) /*!< in/out: data to call_back */
{
- *call_back = innobase_query_caching_of_table_permitted;
*engine_data = 0;
- return(innobase_query_caching_of_table_permitted(thd, table_key,
- key_length,
- engine_data));
-}
-
-/*******************************************************************//**
-Get the bin log name. */
-UNIV_INTERN
-const char*
-ha_innobase::get_mysql_bin_log_name()
-/*=================================*/
-{
- return(trx_sys_mysql_bin_log_name);
-}
-
-/*******************************************************************//**
-Get the bin log offset (or file position). */
-UNIV_INTERN
-ulonglong
-ha_innobase::get_mysql_bin_log_pos()
-/*================================*/
-{
- /* trx... is ib_int64_t, which is a typedef for a 64-bit integer
- (__int64 or longlong) so it's ok to cast it to ulonglong. */
+ *call_back = innobase_query_caching_of_table_permitted;
- return(trx_sys_mysql_bin_log_pos);
+ return(innobase_query_caching_of_table_permitted(
+ thd, table_key,
+ static_cast<uint>(key_length),
+ engine_data));
}
/******************************************************************//**
@@ -16317,8 +17255,7 @@ This function is used to find the storage length in bytes of the first n
characters for prefix indexes using a multibyte character set. The function
finds charset information and returns length of prefix_len characters in the
index field in bytes.
-@return number of bytes occupied by the first n characters */
-UNIV_INTERN
+@return number of bytes occupied by the first n characters */
ulint
innobase_get_at_most_n_mbchars(
/*===========================*/
@@ -16371,12 +17308,13 @@ innobase_get_at_most_n_mbchars(
if (char_length > data_len) {
char_length = data_len;
}
+ } else if (data_len < prefix_len) {
+
+ char_length = data_len;
+
} else {
- if (data_len < prefix_len) {
- char_length = data_len;
- } else {
- char_length = prefix_len;
- }
+
+ char_length = prefix_len;
}
return(char_length);
@@ -16384,7 +17322,7 @@ innobase_get_at_most_n_mbchars(
/*******************************************************************//**
This function is used to prepare an X/Open XA distributed transaction.
-@return 0 or error number */
+@return 0 or error number */
static
int
innobase_xa_prepare(
@@ -16397,36 +17335,18 @@ innobase_xa_prepare(
false - the current SQL statement
ended */
{
- int error = 0;
trx_t* trx = check_trx_exists(thd);
DBUG_ASSERT(hton == innodb_hton_ptr);
- /* we use support_xa value as it was seen at transaction start
- time, not the current session variable value. Any possible changes
- to the session variable take effect only in the next transaction */
- if (!trx->support_xa) {
-
-#ifdef WITH_WSREP
- thd_get_xid(thd, (MYSQL_XID*) &trx->xid);
-#endif // WITH_WSREP
- return(0);
- }
-
- thd_get_xid(thd, (MYSQL_XID*) &trx->xid);
-
- /* Release a possible FIFO ticket and search latch. Since we will
- reserve the trx_sys->mutex, we have to release the search system
- latch first to obey the latching order. */
-
- trx_search_latch_release_if_reserved(trx);
+ thd_get_xid(thd, (MYSQL_XID*) trx->xid);
innobase_srv_conc_force_exit_innodb(trx);
if (!trx_is_registered_for_2pc(trx) && trx_is_started(trx)) {
- sql_print_error("Transaction not registered for MySQL 2PC, "
- "but transaction is active");
+ sql_print_error("Transaction not registered for MariaDB 2PC,"
+ " but transaction is active");
}
if (prepare_trx
@@ -16438,8 +17358,6 @@ innobase_xa_prepare(
ut_ad(trx_is_registered_for_2pc(trx));
trx_prepare_for_mysql(trx);
-
- error = 0;
} else {
/* We just mark the SQL statement ended and do not do a
transaction prepare */
@@ -16456,12 +17374,30 @@ innobase_xa_prepare(
trx_mark_sql_stat_end(trx);
}
- return(error);
+ if (thd_sql_command(thd) != SQLCOM_XA_PREPARE
+ && (prepare_trx
+ || !thd_test_options(
+ thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN))) {
+
+ /* For mysqlbackup to work the order of transactions in binlog
+ and InnoDB must be the same. Consider the situation
+
+ thread1> prepare; write to binlog; ...
+ <context switch>
+ thread2> prepare; write to binlog; commit
+ thread1> ... commit
+
+ The server guarantees that writes to the binary log
+ and commits are in the same order, so we do not have
+ to handle this case. */
+ }
+
+ return(0);
}
/*******************************************************************//**
This function is used to recover X/Open XA distributed transactions.
-@return number of prepared transactions stored in xid_list */
+@return number of prepared transactions stored in xid_list */
static
int
innobase_xa_recover(
@@ -16483,7 +17419,7 @@ innobase_xa_recover(
/*******************************************************************//**
This function is used to commit one X/Open XA distributed transaction
which is in the prepared state
-@return 0 or error number */
+@return 0 or error number */
static
int
innobase_commit_by_xid(
@@ -16491,113 +17427,61 @@ innobase_commit_by_xid(
handlerton* hton,
XID* xid) /*!< in: X/Open XA transaction identification */
{
- trx_t* trx;
-
DBUG_ASSERT(hton == innodb_hton_ptr);
if (high_level_read_only) {
return(XAER_RMFAIL);
}
- trx = trx_get_trx_by_xid(xid);
-
- if (trx) {
+ if (trx_t* trx = trx_get_trx_by_xid(xid)) {
+ /* use cases are: disconnected xa, slave xa, recovery */
innobase_commit_low(trx);
+ ut_ad(trx->mysql_thd == NULL);
+ trx_deregister_from_2pc(trx);
+ ut_ad(!trx->will_lock); /* trx cache requirement */
trx_free_for_background(trx);
+
return(XA_OK);
} else {
return(XAER_NOTA);
}
}
-/*******************************************************************//**
-This function is used to rollback one X/Open XA distributed transaction
+/** This function is used to rollback one X/Open XA distributed transaction
which is in the prepared state
-@return 0 or error number */
-static
-int
-innobase_rollback_by_xid(
-/*=====================*/
- handlerton* hton, /*!< in: InnoDB handlerton */
- XID* xid) /*!< in: X/Open XA transaction
- identification */
-{
- trx_t* trx;
+@param[in] hton InnoDB handlerton
+@param[in] xid X/Open XA transaction identification
+
+@return 0 or error number */
+int innobase_rollback_by_xid(handlerton* hton, XID* xid)
+{
DBUG_ASSERT(hton == innodb_hton_ptr);
if (high_level_read_only) {
return(XAER_RMFAIL);
}
- trx = trx_get_trx_by_xid(xid);
- if (trx) {
- int ret = innobase_rollback_trx(trx);
+ if (trx_t* trx = trx_get_trx_by_xid(xid)) {
+#ifdef WITH_WSREP
+ /* If a wsrep transaction is being rolled back during
+ the recovery, we must clear the xid in order to avoid
+ writing serialisation history for rolled back transaction. */
+ if (wsrep_is_wsrep_xid(trx->xid)) {
+ trx->xid->null();
+ }
+#endif /* WITH_WSREP */
+ int ret = innobase_rollback_trx(trx);
+ trx_deregister_from_2pc(trx);
+ ut_ad(!trx->will_lock);
trx_free_for_background(trx);
+
return(ret);
} else {
return(XAER_NOTA);
}
}
-/*******************************************************************//**
-Create a consistent view for a cursor based on current transaction
-which is created if the corresponding MySQL thread still lacks one.
-This consistent view is then used inside of MySQL when accessing records
-using a cursor.
-@return pointer to cursor view or NULL */
-static
-void*
-innobase_create_cursor_view(
-/*========================*/
- handlerton* hton, /*!< in: innobase hton */
- THD* thd) /*!< in: user thread handle */
-{
- DBUG_ASSERT(hton == innodb_hton_ptr);
-
- return(read_cursor_view_create_for_mysql(check_trx_exists(thd)));
-}
-
-/*******************************************************************//**
-Close the given consistent cursor view of a transaction and restore
-global read view to a transaction read view. Transaction is created if the
-corresponding MySQL thread still lacks one. */
-static
-void
-innobase_close_cursor_view(
-/*=======================*/
- handlerton* hton, /*!< in: innobase hton */
- THD* thd, /*!< in: user thread handle */
- void* curview)/*!< in: Consistent read view to be closed */
-{
- DBUG_ASSERT(hton == innodb_hton_ptr);
-
- read_cursor_view_close_for_mysql(check_trx_exists(thd),
- (cursor_view_t*) curview);
-}
-
-/*******************************************************************//**
-Set the given consistent cursor view to a transaction which is created
-if the corresponding MySQL thread still lacks one. If the given
-consistent cursor view is NULL global read view of a transaction is
-restored to a transaction read view. */
-static
-void
-innobase_set_cursor_view(
-/*=====================*/
- handlerton* hton, /*!< in: innobase hton */
- THD* thd, /*!< in: user thread handle */
- void* curview)/*!< in: Consistent cursor view to be set */
-{
- DBUG_ASSERT(hton == innodb_hton_ptr);
-
- read_cursor_set_for_mysql(check_trx_exists(thd),
- (cursor_view_t*) curview);
-}
-
-/*******************************************************************//**
-*/
-UNIV_INTERN
bool
ha_innobase::check_if_incompatible_data(
/*====================================*/
@@ -16610,7 +17494,7 @@ ha_innobase::check_if_incompatible_data(
param_new = info->option_struct;
param_old = table->s->option_struct;
- innobase_copy_frm_flags_from_create_info(prebuilt->table, info);
+ innobase_copy_frm_flags_from_create_info(m_prebuilt->table, info);
if (table_changes != IS_EQUAL_YES) {
@@ -16618,8 +17502,8 @@ ha_innobase::check_if_incompatible_data(
}
/* Check that auto_increment value was not changed */
- if ((info->used_fields & HA_CREATE_USED_AUTO) &&
- info->auto_increment_value != 0) {
+ if ((info->used_fields & HA_CREATE_USED_AUTO)
+ && info->auto_increment_value != 0) {
return(COMPATIBLE_DATA_NO);
}
@@ -16638,8 +17522,8 @@ ha_innobase::check_if_incompatible_data(
/* Changes on engine specific table options requests a rebuild of the table. */
if (param_new->page_compressed != param_old->page_compressed ||
- param_new->page_compression_level != param_old->page_compression_level ||
- param_new->atomic_writes != param_old->atomic_writes) {
+ param_new->page_compression_level != param_old->page_compression_level)
+ {
return(COMPATIBLE_DATA_NO);
}
@@ -16662,6 +17546,7 @@ innodb_io_capacity_max_update(
from check function */
{
ulong in_val = *static_cast<const ulong*>(save);
+
if (in_val < srv_io_capacity) {
push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
ER_WRONG_ARGUMENTS,
@@ -16672,7 +17557,7 @@ innodb_io_capacity_max_update(
srv_io_capacity = in_val;
push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
- ER_WRONG_ARGUMENTS,
+ ER_WRONG_ARGUMENTS,
"Setting innodb_io_capacity to %lu",
srv_io_capacity);
}
@@ -16701,8 +17586,8 @@ innodb_io_capacity_update(
push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
ER_WRONG_ARGUMENTS,
"Setting innodb_io_capacity to %lu"
- " higher than innodb_io_capacity_max %lu",
- in_val, srv_max_io_capacity);
+ " higher than innodb_io_capacity_max %lu",
+ in_val, srv_max_io_capacity);
srv_max_io_capacity = in_val * 2;
@@ -16784,7 +17669,7 @@ innodb_max_dirty_pages_pct_lwm_update(
/************************************************************//**
Validate the file format name and return its corresponding id.
-@return valid file format id */
+@return valid file format id */
static
uint
innobase_file_format_name_lookup(
@@ -16851,7 +17736,7 @@ innobase_file_format_validate_and_set(
/*************************************************************//**
Check if it is a valid file format. This function is registered as
a callback with MySQL.
-@return 0 for valid file format */
+@return 0 for valid file format */
static
int
innodb_file_format_name_validate(
@@ -16893,6 +17778,37 @@ innodb_file_format_name_validate(
return(1);
}
+/*************************************************************//**
+Don't allow to set innodb_fast_shutdown=0 if purge threads are
+already down.
+@return 0 if innodb_fast_shutdown can be set */
+static
+int
+fast_shutdown_validate(
+/*=============================*/
+ THD* thd, /*!< in: thread handle */
+ struct st_mysql_sys_var* var, /*!< in: pointer to system
+ variable */
+ void* save, /*!< out: immediate result
+ for update function */
+ struct st_mysql_value* value) /*!< in: incoming string */
+{
+ if (check_sysvar_int(thd, var, save, value)) {
+ return(1);
+ }
+
+ uint new_val = *reinterpret_cast<uint*>(save);
+
+ if (srv_fast_shutdown && !new_val
+ && !my_atomic_loadptr_explicit(reinterpret_cast<void**>
+ (&srv_running),
+ MY_MEMORY_ORDER_RELAXED)) {
+ return(1);
+ }
+
+ return(0);
+}
+
/****************************************************************//**
Update the system variable innodb_file_format using the "saved"
value. This function is registered as a callback with MySQL. */
@@ -16913,6 +17829,10 @@ innodb_file_format_name_update(
ut_a(var_ptr != NULL);
ut_a(save != NULL);
+
+ push_warning(thd, Sql_condition::WARN_LEVEL_WARN,
+ HA_ERR_WRONG_COMMAND, deprecated_file_format);
+
format_name = *static_cast<const char*const*>(save);
if (format_name) {
@@ -16932,7 +17852,7 @@ innodb_file_format_name_update(
/*************************************************************//**
Check if valid argument to innodb_file_format_max. This function
is registered as a callback with MySQL.
-@return 0 for valid file format */
+@return 0 for valid file format */
static
int
innodb_file_format_max_validate(
@@ -16972,9 +17892,9 @@ innodb_file_format_max_validate(
push_warning_printf(thd,
Sql_condition::WARN_LEVEL_WARN,
ER_WRONG_ARGUMENTS,
- "InnoDB: invalid innodb_file_format_max "
- "value; can be any format up to %s "
- "or equivalent id of %d",
+ "InnoDB: invalid innodb_file_format_max"
+ " value; can be any format up to %s"
+ " or equivalent id of %d",
trx_sys_file_format_id_to_name(UNIV_FORMAT_MAX),
UNIV_FORMAT_MAX);
}
@@ -17006,6 +17926,10 @@ innodb_file_format_max_update(
ut_a(save != NULL);
ut_a(var_ptr != NULL);
+
+ push_warning(thd, Sql_condition::WARN_LEVEL_WARN,
+ HA_ERR_WRONG_COMMAND, deprecated_file_format_max);
+
format_name_in = *static_cast<const char*const*>(save);
if (!format_name_in) {
@@ -17028,13 +17952,29 @@ innodb_file_format_max_update(
/* Update the max format id in the system tablespace. */
if (trx_sys_file_format_max_set(format_id, format_name_out)) {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " [Info] InnoDB: the file format in the system "
- "tablespace is now set to %s.\n", *format_name_out);
+ ib::info() << "The file format in the system tablespace is now"
+ " set to " << *format_name_out << ".";
}
}
+/** Update innodb_large_prefix.
+@param[in,out] thd MySQL client connection
+@param[out] var_ptr current value
+@param[in] save to-be-assigned value */
+static
+void
+innodb_large_prefix_update(
+ THD* thd,
+ st_mysql_sys_var*,
+ void* var_ptr,
+ const void* save)
+{
+ push_warning(thd, Sql_condition::WARN_LEVEL_WARN,
+ HA_ERR_WRONG_COMMAND, deprecated_large_prefix);
+
+ *static_cast<my_bool*>(var_ptr) = *static_cast<const my_bool*>(save);
+}
+
/*************************************************************//**
Check whether valid argument given to innobase_*_stopword_table.
This function is registered as a callback with MySQL.
@@ -17078,6 +18018,32 @@ innodb_stopword_table_validate(
return(ret);
}
+/** Update the system variable innodb_buffer_pool_size using the "saved"
+value. This function is registered as a callback with MySQL.
+@param[in] thd thread handle
+@param[in] var pointer to system variable
+@param[out] var_ptr where the formal string goes
+@param[in] save immediate result from check function */
+static
+void
+innodb_buffer_pool_size_update(
+ THD* thd,
+ struct st_mysql_sys_var* var,
+ void* var_ptr,
+ const void* save)
+{
+ longlong in_val = *static_cast<const longlong*>(save);
+
+ snprintf(export_vars.innodb_buffer_pool_resize_status,
+ sizeof(export_vars.innodb_buffer_pool_resize_status),
+ "Requested to resize buffer pool.");
+
+ os_event_set(srv_buf_resize_event);
+
+ ib::info() << export_vars.innodb_buffer_pool_resize_status
+ << " (new size: " << in_val << " bytes)";
+}
+
/** The latest assigned innodb_ft_aux_table name */
static char* innodb_ft_aux_table;
@@ -17111,6 +18077,7 @@ static int innodb_ft_aux_table_validate(THD*, st_mysql_sys_var*,
}
}
+#ifdef BTR_CUR_HASH_ADAPT
/****************************************************************//**
Update the system variable innodb_adaptive_hash_index using the "saved"
value. This function is registered as a callback with MySQL. */
@@ -17130,10 +18097,11 @@ innodb_adaptive_hash_index_update(
if (*(my_bool*) save) {
btr_search_enable();
} else {
- btr_search_disable();
+ btr_search_disable(true);
}
mysql_mutex_lock(&LOCK_global_system_variables);
}
+#endif /* BTR_CUR_HASH_ADAPT */
/****************************************************************//**
Update the system variable innodb_cmp_per_index using the "saved"
@@ -17198,15 +18166,15 @@ innodb_change_buffer_max_size_update(
const void* save) /*!< in: immediate result
from check function */
{
- innobase_change_buffer_max_size = *static_cast<const uint*>(save);
+ srv_change_buffer_max_size = *static_cast<const uint*>(save);
mysql_mutex_unlock(&LOCK_global_system_variables);
- ibuf_max_size_update(innobase_change_buffer_max_size);
+ ibuf_max_size_update(srv_change_buffer_max_size);
mysql_mutex_lock(&LOCK_global_system_variables);
}
#ifdef UNIV_DEBUG
-ulong srv_fil_make_page_dirty_debug = 0;
-ulong srv_saved_page_number_debug = 0;
+static ulong srv_fil_make_page_dirty_debug = 0;
+static ulong srv_saved_page_number_debug = 0;
/****************************************************************//**
Save an InnoDB page number. */
@@ -17224,9 +18192,8 @@ innodb_save_page_no(
{
srv_saved_page_number_debug = *static_cast<const ulong*>(save);
- ib_logf(IB_LOG_LEVEL_INFO,
- "Saving InnoDB page number: %lu",
- srv_saved_page_number_debug);
+ ib::info() << "Saving InnoDB page number: "
+ << srv_saved_page_number_debug;
}
/****************************************************************//**
@@ -17243,35 +18210,49 @@ innodb_make_page_dirty(
const void* save) /*!< in: immediate result
from check function */
{
- mtr_t mtr;
- ulong space_id = *static_cast<const ulong*>(save);
+ mtr_t mtr;
+ ulong space_id = *static_cast<const ulong*>(save);
mysql_mutex_unlock(&LOCK_global_system_variables);
+ fil_space_t* space = fil_space_acquire_silent(space_id);
+
+ if (space == NULL) {
+func_exit_no_space:
+ mysql_mutex_lock(&LOCK_global_system_variables);
+ return;
+ }
+
+ if (srv_saved_page_number_debug >= space->size) {
+func_exit:
+ fil_space_release(space);
+ goto func_exit_no_space;
+ }
- mtr_start(&mtr);
+ mtr.start();
+ mtr.set_named_space(space);
- buf_block_t* block = buf_page_get(
- space_id, 0, srv_saved_page_number_debug, RW_X_LATCH, &mtr);
+ buf_block_t* block = buf_page_get(
+ page_id_t(space_id, srv_saved_page_number_debug),
+ page_size_t(space->flags), RW_X_LATCH, &mtr);
+
+ if (block != NULL) {
+ byte* page = block->frame;
+
+ ib::info() << "Dirtying page: " << page_id_t(
+ page_get_space_id(page), page_get_page_no(page));
- if (block) {
- byte* page = block->frame;
- ib_logf(IB_LOG_LEVEL_INFO,
- "Dirtying page:%lu of space:%lu",
- page_get_page_no(page),
- page_get_space_id(page));
mlog_write_ulint(page + FIL_PAGE_TYPE,
fil_page_get_type(page),
MLOG_2BYTES, &mtr);
}
- mtr_commit(&mtr);
- mysql_mutex_lock(&LOCK_global_system_variables);
+ mtr.commit();
+ goto func_exit;
}
#endif // UNIV_DEBUG
-
/*************************************************************//**
Find the corresponding ibuf_use_t value that indexes into
innobase_change_buffering_values[] array for the input
change buffering option name.
-@return corresponding IBUF_USE_* value for the input variable
+@return corresponding IBUF_USE_* value for the input variable
name, or IBUF_USE_COUNT if not able to find a match */
static
ibuf_use_t
@@ -17280,14 +18261,14 @@ innodb_find_change_buffering_value(
const char* input_name) /*!< in: input change buffering
option name */
{
- ulint use;
+ for (ulint i = 0;
+ i < UT_ARR_SIZE(innobase_change_buffering_values);
+ ++i) {
- for (use = 0; use < UT_ARR_SIZE(innobase_change_buffering_values);
- use++) {
/* found a match */
if (!innobase_strcasecmp(
- input_name, innobase_change_buffering_values[use])) {
- return((ibuf_use_t) use);
+ input_name, innobase_change_buffering_values[i])) {
+ return(static_cast<ibuf_use_t>(i));
}
}
@@ -17298,7 +18279,7 @@ innodb_find_change_buffering_value(
/*************************************************************//**
Check if it is a valid value of innodb_change_buffering. This function is
registered as a callback with MySQL.
-@return 0 for valid innodb_change_buffering */
+@return 0 for valid innodb_change_buffering */
static
int
innodb_change_buffering_validate(
@@ -17371,7 +18352,7 @@ innodb_change_buffering_update(
/*************************************************************//**
Just emit a warning that the usage of the variable is deprecated.
-@return 0 */
+@return 0 */
static
void
innodb_stats_sample_pages_update(
@@ -17384,19 +18365,16 @@ innodb_stats_sample_pages_update(
const void* save) /*!< in: immediate result
from check function */
{
-#define STATS_SAMPLE_PAGES_DEPRECATED_MSG \
- "Using innodb_stats_sample_pages is deprecated and " \
- "the variable may be removed in future releases. " \
- "Please use innodb_stats_transient_sample_pages " \
- "instead."
+
+ const char* STATS_SAMPLE_PAGES_DEPRECATED_MSG =
+ "Using innodb_stats_sample_pages is deprecated and"
+ " the variable may be removed in future releases."
+ " Please use innodb_stats_transient_sample_pages instead.";
push_warning(thd, Sql_condition::WARN_LEVEL_WARN,
HA_ERR_WRONG_COMMAND, STATS_SAMPLE_PAGES_DEPRECATED_MSG);
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Warning: %s\n",
- STATS_SAMPLE_PAGES_DEPRECATED_MSG);
+ ib::warn() << STATS_SAMPLE_PAGES_DEPRECATED_MSG;
srv_stats_transient_sample_pages =
*static_cast<const unsigned long long*>(save);
@@ -17431,11 +18409,15 @@ innodb_monitor_set_option(
exisitng monitor counter (status variable),
make special processing to remember existing
counter value. */
- if (monitor_info->monitor_type
- & MONITOR_EXISTING) {
+ if (monitor_info->monitor_type & MONITOR_EXISTING) {
srv_mon_process_existing_counter(
monitor_id, MONITOR_TURN_ON);
}
+
+ if (MONITOR_IS_ON(MONITOR_LATCHES)) {
+
+ mutex_monitor.enable();
+ }
break;
case MONITOR_TURN_OFF:
@@ -17446,14 +18428,25 @@ innodb_monitor_set_option(
MONITOR_OFF(monitor_id);
MONITOR_SET_OFF(monitor_id);
+
+ if (!MONITOR_IS_ON(MONITOR_LATCHES)) {
+
+ mutex_monitor.disable();
+ }
break;
case MONITOR_RESET_VALUE:
srv_mon_reset(monitor_id);
+
+ if (monitor_id == (MONITOR_LATCHES)) {
+
+ mutex_monitor.reset();
+ }
break;
case MONITOR_RESET_ALL_VALUE:
srv_mon_reset_all(monitor_id);
+ mutex_monitor.reset();
break;
default:
@@ -17526,7 +18519,7 @@ innodb_monitor_update_wildcard(
/*************************************************************//**
Given a configuration variable name, find corresponding monitor counter
and return its monitor ID if found.
-@return monitor ID if found, MONITOR_NO_MATCH if there is no match */
+@return monitor ID if found, MONITOR_NO_MATCH if there is no match */
static
ulint
innodb_monitor_id_by_name_get(
@@ -17557,7 +18550,7 @@ innodb_monitor_id_by_name_get(
/*************************************************************//**
Validate that the passed in monitor name matches at least one
monitor counter name with wildcard compare.
-@return TRUE if at least one monitor name matches */
+@return TRUE if at least one monitor name matches */
static
ibool
innodb_monitor_validate_wildcard_name(
@@ -17576,7 +18569,7 @@ innodb_monitor_validate_wildcard_name(
/*************************************************************//**
Validate the passed in monitor name, find and save the
corresponding monitor name in the function parameter "save".
-@return 0 if monitor name is valid */
+@return 0 if monitor name is valid */
static
int
innodb_monitor_valid_byname(
@@ -17639,7 +18632,7 @@ innodb_monitor_valid_byname(
/*************************************************************//**
Validate passed-in "value" is a valid monitor counter name.
This function is registered as a callback with MySQL.
-@return 0 for valid name */
+@return 0 for valid name */
static
int
innodb_monitor_validate(
@@ -17667,7 +18660,8 @@ innodb_monitor_validate(
by InnoDB, so we can access it in another callback
function innodb_monitor_update() and free it appropriately */
if (name) {
- monitor_name = my_strdup(name, MYF(0));
+ monitor_name = my_strdup(//PSI_INSTRUMENT_ME,
+ name, MYF(0));
} else {
return(1);
}
@@ -17735,14 +18729,14 @@ innodb_monitor_update(
push_warning_printf(
thd, Sql_condition::WARN_LEVEL_WARN,
ER_NO_DEFAULT,
- "Default value is not defined for "
- "this set option. Please specify "
- "correct counter or module name.");
+ "Default value is not defined for"
+ " this set option. Please specify"
+ " correct counter or module name.");
} else {
sql_print_error(
- "Default value is not defined for "
- "this set option. Please specify "
- "correct counter or module name.\n");
+ "Default value is not defined for"
+ " this set option. Please specify"
+ " correct counter or module name.\n");
}
if (var_ptr) {
@@ -17785,7 +18779,7 @@ exit:
been turned on, we will set err_monitor. Print related
information */
if (err_monitor) {
- sql_print_warning("Monitor %s is already enabled.",
+ sql_print_warning("InnoDB: Monitor %s is already enabled.",
srv_mon_get_name((monitor_id_t) err_monitor));
}
@@ -17796,13 +18790,13 @@ exit:
return;
}
-#ifdef __WIN__
+#ifdef _WIN32
/*************************************************************//**
Validate if passed-in "value" is a valid value for
innodb_buffer_pool_filename. On Windows, file names with colon (:)
are not allowed.
-@return 0 for valid name */
+@return 0 for valid name */
static
int
innodb_srv_buf_dump_filename_validate(
@@ -17814,16 +18808,15 @@ innodb_srv_buf_dump_filename_validate(
for update function */
struct st_mysql_value* value) /*!< in: incoming string */
{
- const char* buf_name;
char buff[OS_FILE_MAX_PATH];
- int len= sizeof(buff);
+ int len = sizeof(buff);
ut_a(save != NULL);
ut_a(value != NULL);
- buf_name = value->val_str(value, buff, &len);
+ const char* buf_name = value->val_str(value, buff, &len);
- if (buf_name) {
+ if (buf_name != NULL) {
if (is_filename_allowed(buf_name, len, FALSE)){
*static_cast<const char**>(save) = buf_name;
return(0);
@@ -17831,17 +18824,17 @@ innodb_srv_buf_dump_filename_validate(
push_warning_printf(thd,
Sql_condition::WARN_LEVEL_WARN,
ER_WRONG_ARGUMENTS,
- "InnoDB: innodb_buffer_pool_filename "
- "cannot have colon (:) in the file name.");
+ "InnoDB: innodb_buffer_pool_filename"
+ " cannot have colon (:) in the file name.");
}
}
return(1);
}
-#else /* __WIN__ */
+#else /* _WIN32 */
# define innodb_srv_buf_dump_filename_validate NULL
-#endif /* __WIN__ */
+#endif /* _WIN32 */
#ifdef UNIV_DEBUG
static char* srv_buffer_pool_evict;
@@ -18012,6 +19005,15 @@ innodb_defragment_frequency_update(
srv_defragment_interval = 1000000000ULL / srv_defragment_frequency;
}
+static inline char *my_strtok_r(char *str, const char *delim, char **saveptr)
+{
+#if defined _WIN32
+ return strtok_s(str, delim, saveptr);
+#else
+ return strtok_r(str, delim, saveptr);
+#endif
+}
+
/****************************************************************//**
Parse and enable InnoDB monitor counters during server startup.
User can list the monitor counters/groups to be enable by specifying
@@ -18033,9 +19035,9 @@ innodb_enable_monitor_at_startup(
and/or counter group name, and calling innodb_monitor_update()
if successfully updated. Please note that the "str" would be
changed by strtok_r() as it walks through it. */
- for (char* option = strtok_r(str, sep, &last);
+ for (char* option = my_strtok_r(str, sep, &last);
option;
- option = strtok_r(NULL, sep, &last)) {
+ option = my_strtok_r(NULL, sep, &last)) {
ulint ret;
char* option_name;
@@ -18066,6 +19068,7 @@ show_innodb_vars(
innodb_export_status();
var->type = SHOW_ARRAY;
var->value = (char*) &innodb_status_variables;
+ //var->scope = SHOW_SCOPE_GLOBAL;
return(0);
}
@@ -18076,7 +19079,6 @@ system default primary index name 'GEN_CLUST_INDEX'. If a name
matches, this function pushes an warning message to the client,
and returns true.
@return true if the index name matches the reserved name */
-UNIV_INTERN
bool
innobase_index_name_is_reserved(
/*============================*/
@@ -18097,10 +19099,10 @@ innobase_index_name_is_reserved(
push_warning_printf(thd,
Sql_condition::WARN_LEVEL_WARN,
ER_WRONG_NAME_FOR_INDEX,
- "Cannot Create Index with name "
- "'%s'. The name is reserved "
- "for the system default primary "
- "index.",
+ "Cannot Create Index with name"
+ " '%s'. The name is reserved"
+ " for the system default primary"
+ " index.",
innobase_index_reserve_name);
my_error(ER_WRONG_NAME_FOR_INDEX, MYF(0),
@@ -18113,89 +19115,74 @@ innobase_index_name_is_reserved(
return(false);
}
-/***********************************************************************
-Retrieve the FTS Relevance Ranking result for doc with doc_id
-of prebuilt->fts_doc_id
+/** Retrieve the FTS Relevance Ranking result for doc with doc_id
+of m_prebuilt->fts_doc_id
+@param[in,out] fts_hdl FTS handler
@return the relevance ranking value */
-UNIV_INTERN
+static
float
innobase_fts_retrieve_ranking(
-/*============================*/
- FT_INFO * fts_hdl) /*!< in: FTS handler */
+ FT_INFO* fts_hdl)
{
- row_prebuilt_t* ft_prebuilt;
fts_result_t* result;
+ row_prebuilt_t* ft_prebuilt;
- result = ((NEW_FT_INFO*) fts_hdl)->ft_result;
+ result = reinterpret_cast<NEW_FT_INFO*>(fts_hdl)->ft_result;
- ft_prebuilt = ((NEW_FT_INFO*) fts_hdl)->ft_prebuilt;
+ ft_prebuilt = reinterpret_cast<NEW_FT_INFO*>(fts_hdl)->ft_prebuilt;
- if (ft_prebuilt->read_just_key) {
- fts_ranking_t* ranking =
- rbt_value(fts_ranking_t, result->current);
- return(ranking->rank);
- }
+ fts_ranking_t* ranking = rbt_value(fts_ranking_t, result->current);
+ ft_prebuilt->fts_doc_id= ranking->doc_id;
- /* Retrieve the ranking value for doc_id with value of
- prebuilt->fts_doc_id */
- return(fts_retrieve_ranking(result, ft_prebuilt->fts_doc_id));
+ return(ranking->rank);
}
-/***********************************************************************
-Free the memory for the FTS handler */
-UNIV_INTERN
+/** Free the memory for the FTS handler
+@param[in,out] fts_hdl FTS handler */
+static
void
innobase_fts_close_ranking(
-/*=======================*/
- FT_INFO * fts_hdl)
+ FT_INFO* fts_hdl)
{
fts_result_t* result;
- result = ((NEW_FT_INFO*) fts_hdl)->ft_result;
+ result = reinterpret_cast<NEW_FT_INFO*>(fts_hdl)->ft_result;
fts_query_free_result(result);
my_free((uchar*) fts_hdl);
-
- return;
}
-/***********************************************************************
-Find and Retrieve the FTS Relevance Ranking result for doc with doc_id
-of prebuilt->fts_doc_id
+/** Find and Retrieve the FTS Relevance Ranking result for doc with doc_id
+of m_prebuilt->fts_doc_id
+@param[in,out] fts_hdl FTS handler
@return the relevance ranking value */
-UNIV_INTERN
+static
float
-innobase_fts_find_ranking(
-/*======================*/
- FT_INFO* fts_hdl, /*!< in: FTS handler */
- uchar* record, /*!< in: Unused */
- uint len) /*!< in: Unused */
+innobase_fts_find_ranking(FT_INFO* fts_hdl, uchar*, uint)
{
- row_prebuilt_t* ft_prebuilt;
fts_result_t* result;
+ row_prebuilt_t* ft_prebuilt;
- ft_prebuilt = ((NEW_FT_INFO*) fts_hdl)->ft_prebuilt;
- result = ((NEW_FT_INFO*) fts_hdl)->ft_result;
+ ft_prebuilt = reinterpret_cast<NEW_FT_INFO*>(fts_hdl)->ft_prebuilt;
+ result = reinterpret_cast<NEW_FT_INFO*>(fts_hdl)->ft_result;
/* Retrieve the ranking value for doc_id with value of
- prebuilt->fts_doc_id */
+ m_prebuilt->fts_doc_id */
return(fts_retrieve_ranking(result, ft_prebuilt->fts_doc_id));
}
#ifdef UNIV_DEBUG
-static my_bool innodb_purge_run_now = TRUE;
-static my_bool innodb_purge_stop_now = TRUE;
+static my_bool innodb_background_drop_list_empty = TRUE;
static my_bool innodb_log_checkpoint_now = TRUE;
static my_bool innodb_buf_flush_list_now = TRUE;
+static uint innodb_merge_threshold_set_all_debug
+ = DICT_INDEX_MERGE_THRESHOLD_DEFAULT;
-/****************************************************************//**
-Set the purge state to RUN. If purge is disabled then it
-is a no-op. This function is registered as a callback with MySQL. */
+/** Wait for the background drop list to become empty. */
static
void
-purge_run_now_set(
-/*==============*/
+wait_background_drop_list_empty(
THD* thd /*!< in: thread handle */
MY_ATTRIBUTE((unused)),
struct st_mysql_sys_var* var /*!< in: pointer to system
@@ -18207,36 +19194,7 @@ purge_run_now_set(
const void* save) /*!< in: immediate result from
check function */
{
- if (*(my_bool*) save && trx_purge_state() != PURGE_STATE_DISABLED) {
- mysql_mutex_unlock(&LOCK_global_system_variables);
- trx_purge_run();
- mysql_mutex_lock(&LOCK_global_system_variables);
- }
-}
-
-/****************************************************************//**
-Set the purge state to STOP. If purge is disabled then it
-is a no-op. This function is registered as a callback with MySQL. */
-static
-void
-purge_stop_now_set(
-/*===============*/
- THD* thd /*!< in: thread handle */
- MY_ATTRIBUTE((unused)),
- struct st_mysql_sys_var* var /*!< in: pointer to system
- variable */
- MY_ATTRIBUTE((unused)),
- void* var_ptr /*!< out: where the formal
- string goes */
- MY_ATTRIBUTE((unused)),
- const void* save) /*!< in: immediate result from
- check function */
-{
- if (*(my_bool*) save && trx_purge_state() != PURGE_STATE_DISABLED) {
- mysql_mutex_unlock(&LOCK_global_system_variables);
- trx_purge_stop();
- mysql_mutex_lock(&LOCK_global_system_variables);
- }
+ row_wait_for_background_drop_list_empty();
}
/****************************************************************//**
@@ -18259,18 +19217,19 @@ checkpoint_now_set(
if (*(my_bool*) save) {
mysql_mutex_unlock(&LOCK_global_system_variables);
- while (log_sys->last_checkpoint_lsn < log_sys->lsn) {
- log_make_checkpoint_at(LSN_MAX, TRUE);
- fil_flush_file_spaces(FIL_LOG);
+ while (log_sys->last_checkpoint_lsn
+ + SIZE_OF_MLOG_CHECKPOINT
+ + (log_sys->append_on_checkpoint != NULL
+ ? log_sys->append_on_checkpoint->size() : 0)
+ < log_sys->lsn) {
+ log_make_checkpoint();
+ fil_flush_file_spaces(FIL_TYPE_LOG);
}
dberr_t err = fil_write_flushed_lsn(log_sys->lsn);
if (err != DB_SUCCESS) {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Failed to write flush lsn to the "
- "system tablespace at checkpoint err=%s",
- ut_strerr(err));
+ ib::warn() << "Checkpoint set failed " << err;
}
mysql_mutex_lock(&LOCK_global_system_variables);
@@ -18296,75 +19255,57 @@ buf_flush_list_now_set(
{
if (*(my_bool*) save) {
mysql_mutex_unlock(&LOCK_global_system_variables);
- buf_flush_list(ULINT_MAX, LSN_MAX, NULL);
- buf_flush_wait_batch_end(NULL, BUF_FLUSH_LIST);
+ buf_flush_sync_all_buf_pools();
mysql_mutex_lock(&LOCK_global_system_variables);
}
}
-#endif /* UNIV_DEBUG */
-/***********************************************************************
-@return version of the extended FTS API */
-uint
-innobase_fts_get_version()
-/*======================*/
-{
- /* Currently this doesn't make much sense as returning
- HA_CAN_FULLTEXT_EXT automatically mean this version is supported.
- This supposed to ease future extensions. */
- return(2);
-}
-
-/***********************************************************************
-@return Which part of the extended FTS API is supported */
-ulonglong
-innobase_fts_flags()
-/*================*/
+/** Override current MERGE_THRESHOLD setting for all indexes at dictionary
+now.
+@param[in] thd thread handle
+@param[in] var pointer to system variable
+@param[out] var_ptr where the formal string goes
+@param[in] save immediate result from check function */
+static
+void
+innodb_merge_threshold_set_all_debug_update(
+ THD* thd,
+ struct st_mysql_sys_var* var,
+ void* var_ptr,
+ const void* save)
{
- return(FTS_ORDERED_RESULT | FTS_DOCID_IN_RESULT);
+ innodb_merge_threshold_set_all_debug
+ = (*static_cast<const uint*>(save));
+ dict_set_merge_threshold_all_debug(
+ innodb_merge_threshold_set_all_debug);
}
+#endif /* UNIV_DEBUG */
-
-/***********************************************************************
-Find and Retrieve the FTS doc_id for the current result row
+/** Find and Retrieve the FTS doc_id for the current result row
+@param[in,out] fts_hdl FTS handler
@return the document ID */
+static
ulonglong
innobase_fts_retrieve_docid(
-/*========================*/
- FT_INFO_EXT * fts_hdl) /*!< in: FTS handler */
+ FT_INFO_EXT* fts_hdl)
{
- row_prebuilt_t* ft_prebuilt;
fts_result_t* result;
+ row_prebuilt_t* ft_prebuilt;
- ft_prebuilt = ((NEW_FT_INFO *)fts_hdl)->ft_prebuilt;
- result = ((NEW_FT_INFO *)fts_hdl)->ft_result;
+ ft_prebuilt = reinterpret_cast<NEW_FT_INFO *>(fts_hdl)->ft_prebuilt;
+ result = reinterpret_cast<NEW_FT_INFO *>(fts_hdl)->ft_result;
if (ft_prebuilt->read_just_key) {
+
fts_ranking_t* ranking =
rbt_value(fts_ranking_t, result->current);
+
return(ranking->doc_id);
}
return(ft_prebuilt->fts_doc_id);
}
-/***********************************************************************
-Find and retrieve the size of the current result
-@return number of matching rows */
-ulonglong
-innobase_fts_count_matches(
-/*=======================*/
- FT_INFO_EXT* fts_hdl) /*!< in: FTS handler */
-{
- NEW_FT_INFO* handle = (NEW_FT_INFO *) fts_hdl;
-
- if (handle->ft_result->rankings_by_id != 0) {
- return rbt_size(handle->ft_result->rankings_by_id);
- } else {
- return(0);
- }
-}
-
/* These variables are never read by InnoDB or changed. They are a kind of
dummies that are needed by the MySQL infrastructure to call
buffer_pool_dump_now(), buffer_pool_load_now() and buffer_pool_load_abort()
@@ -18414,13 +19355,13 @@ void
buffer_pool_load_now(
/*=================*/
THD* thd /*!< in: thread handle */
- __attribute__((unused)),
+ MY_ATTRIBUTE((unused)),
struct st_mysql_sys_var* var /*!< in: pointer to system
variable */
- __attribute__((unused)),
+ MY_ATTRIBUTE((unused)),
void* var_ptr /*!< out: where the formal
string goes */
- __attribute__((unused)),
+ MY_ATTRIBUTE((unused)),
const void* save) /*!< in: immediate result from
check function */
{
@@ -18439,13 +19380,13 @@ void
buffer_pool_load_abort(
/*===================*/
THD* thd /*!< in: thread handle */
- __attribute__((unused)),
+ MY_ATTRIBUTE((unused)),
struct st_mysql_sys_var* var /*!< in: pointer to system
variable */
- __attribute__((unused)),
+ MY_ATTRIBUTE((unused)),
void* var_ptr /*!< out: where the formal
string goes */
- __attribute__((unused)),
+ MY_ATTRIBUTE((unused)),
const void* save) /*!< in: immediate result from
check function */
{
@@ -18456,6 +19397,54 @@ buffer_pool_load_abort(
}
}
+/****************************************************************//**
+Update the system variable innodb_log_write_ahead_size using the "saved"
+value. This function is registered as a callback with MySQL. */
+static
+void
+innodb_log_write_ahead_size_update(
+/*===============================*/
+ THD* thd, /*!< in: thread handle */
+ struct st_mysql_sys_var* var, /*!< in: pointer to
+ system variable */
+ void* var_ptr,/*!< out: where the
+ formal string goes */
+ const void* save) /*!< in: immediate result
+ from check function */
+{
+ ulong val = OS_FILE_LOG_BLOCK_SIZE;
+ ulong in_val = *static_cast<const ulong*>(save);
+
+ while (val < in_val) {
+ val = val * 2;
+ }
+
+ if (val > UNIV_PAGE_SIZE) {
+ val = UNIV_PAGE_SIZE;
+ push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
+ ER_WRONG_ARGUMENTS,
+ "innodb_log_write_ahead_size cannot"
+ " be set higher than innodb_page_size.");
+ push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
+ ER_WRONG_ARGUMENTS,
+ "Setting innodb_log_write_ahead_size"
+ " to %lu",
+ UNIV_PAGE_SIZE);
+ } else if (val != in_val) {
+ push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
+ ER_WRONG_ARGUMENTS,
+ "innodb_log_write_ahead_size should be"
+ " set 2^n value and larger than 512.");
+ push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
+ ER_WRONG_ARGUMENTS,
+ "Setting innodb_log_write_ahead_size"
+ " to %lu",
+ val);
+ }
+
+ srv_log_write_ahead_size = val;
+}
+
/** Update innodb_status_output or innodb_status_output_locks,
which control InnoDB "status monitor" output to the error log.
@param[out] var current value
@@ -18517,6 +19506,50 @@ innodb_encrypt_tables_update(THD*,st_mysql_sys_var*,void*,const void*save)
mysql_mutex_lock(&LOCK_global_system_variables);
}
+/** Update the innodb_log_checksums parameter.
+@param[in] thd thread handle
+@param[in] var system variable
+@param[out] var_ptr current value
+@param[in] save immediate result from check function */
+static
+void
+innodb_log_checksums_update(
+ THD* thd,
+ struct st_mysql_sys_var* var,
+ void* var_ptr,
+ const void* save)
+{
+ *static_cast<my_bool*>(var_ptr) = innodb_log_checksums_func_update(
+ thd, *static_cast<const my_bool*>(save));
+}
+
+#ifdef UNIV_DEBUG
+static
+void
+innobase_debug_sync_callback(srv_slot_t *slot, const void *value)
+{
+ const char *value_str = *static_cast<const char* const*>(value);
+ size_t len = strlen(value_str) + 1;
+
+
+ // One allocatoin for list node object and value.
+ void *buf = ut_malloc_nokey(sizeof(srv_slot_t::debug_sync_t) + len);
+ srv_slot_t::debug_sync_t *sync = new(buf) srv_slot_t::debug_sync_t();
+ strcpy(reinterpret_cast<char*>(&sync[1]), value_str);
+
+ rw_lock_x_lock(&slot->debug_sync_lock);
+ UT_LIST_ADD_LAST(slot->debug_sync, sync);
+ rw_lock_x_unlock(&slot->debug_sync_lock);
+}
+static
+void
+innobase_debug_sync_set(THD *thd, st_mysql_sys_var*, void *, const void *value)
+{
+ srv_for_each_thread(SRV_WORKER, innobase_debug_sync_callback, value);
+ srv_for_each_thread(SRV_PURGE, innobase_debug_sync_callback, value);
+}
+#endif
+
static SHOW_VAR innodb_status_variables_export[]= {
{"Innodb", (char*) &show_innodb_vars, SHOW_FUNC},
{NullS, NullS, SHOW_LONG}
@@ -18527,7 +19560,10 @@ static struct st_mysql_storage_engine innobase_storage_engine=
#ifdef WITH_WSREP
void
-wsrep_abort_slave_trx(wsrep_seqno_t bf_seqno, wsrep_seqno_t victim_seqno)
+wsrep_abort_slave_trx(
+/*==================*/
+ wsrep_seqno_t bf_seqno,
+ wsrep_seqno_t victim_seqno)
{
WSREP_ERROR("Trx %lld tries to abort slave trx %lld. This could be "
"caused by:\n\t"
@@ -18543,6 +19579,7 @@ This function is used to kill one transaction in BF. */
UNIV_INTERN
int
wsrep_innobase_kill_one_trx(
+/*========================*/
void * const bf_thd_ptr,
const trx_t * const bf_trx,
trx_t *victim_trx,
@@ -18573,9 +19610,9 @@ wsrep_innobase_kill_one_trx(
WSREP_LOG_CONFLICT(bf_thd, thd, TRUE);
- WSREP_DEBUG("BF kill (%lu, seqno: %lld), victim: (%lu) trx: "
- TRX_ID_FMT,
- signal, (long long)bf_seqno,
+ WSREP_DEBUG("BF kill (" ULINTPF ", seqno: " INT64PF
+ "), victim: (%lu) trx: " TRX_ID_FMT,
+ signal, bf_seqno,
thd_get_thread_id(thd),
victim_trx->id);
@@ -18602,7 +19639,7 @@ wsrep_innobase_kill_one_trx(
DBUG_RETURN(0);
}
- if(wsrep_thd_exec_mode(thd) != LOCAL_STATE) {
+ if (wsrep_thd_exec_mode(thd) != LOCAL_STATE) {
WSREP_DEBUG("withdraw for BF trx: " TRX_ID_FMT ", state: %d",
victim_trx->id,
wsrep_thd_get_conflict_state(thd));
@@ -18663,8 +19700,7 @@ wsrep_innobase_kill_one_trx(
WSREP_ERROR(
"cancel commit bad exit: %d "
TRX_ID_FMT,
- rcode,
- victim_trx->id);
+ rcode, victim_trx->id);
/* unable to interrupt, must abort */
/* note: kill_mysql() will block, if we cannot.
* kill the lock holder first.
@@ -18684,10 +19720,12 @@ wsrep_innobase_kill_one_trx(
victim_trx->id);
victim_trx->lock.was_chosen_as_deadlock_victim= TRUE;
+
if (victim_trx->lock.wait_lock) {
WSREP_DEBUG("victim has wait flag: %ld",
thd_get_thread_id(thd));
lock_t* wait_lock = victim_trx->lock.wait_lock;
+
if (wait_lock) {
WSREP_DEBUG("canceling wait lock");
victim_trx->lock.was_chosen_as_deadlock_victim= TRUE;
@@ -18733,7 +19771,7 @@ wsrep_innobase_kill_one_trx(
if (wsrep_aborting_thd_contains(thd)) {
WSREP_WARN("duplicate thd aborter %lu",
- thd_get_thread_id(thd));
+ (ulong) thd_get_thread_id(thd));
} else {
wsrep_aborting_thd_enqueue(thd);
DBUG_PRINT("wsrep",("enqueuing trx abort for %lu",
@@ -18762,13 +19800,14 @@ wsrep_innobase_kill_one_trx(
static
int
wsrep_abort_transaction(
+/*====================*/
handlerton* hton,
THD *bf_thd,
THD *victim_thd,
my_bool signal)
{
DBUG_ENTER("wsrep_innobase_abort_thd");
-
+
trx_t* victim_trx = thd_to_trx(victim_thd);
trx_t* bf_trx = (bf_thd) ? thd_to_trx(bf_thd) : NULL;
@@ -18780,12 +19819,10 @@ wsrep_abort_transaction(
if (victim_trx) {
lock_mutex_enter();
trx_mutex_enter(victim_trx);
- victim_trx->abort_type = TRX_WSREP_ABORT;
int rcode = wsrep_innobase_kill_one_trx(bf_thd, bf_trx,
victim_trx, signal);
- trx_mutex_exit(victim_trx);
lock_mutex_exit();
- victim_trx->abort_type = TRX_SERVER_ABORT;
+ trx_mutex_exit(victim_trx);
wsrep_srv_conc_cancel_wait(victim_trx);
DBUG_RETURN(rcode);
} else {
@@ -18799,30 +19836,44 @@ wsrep_abort_transaction(
DBUG_RETURN(-1);
}
-static int innobase_wsrep_set_checkpoint(handlerton* hton, const XID* xid)
+static
+int
+innobase_wsrep_set_checkpoint(
+/*==========================*/
+ handlerton* hton,
+ const XID* xid)
{
DBUG_ASSERT(hton == innodb_hton_ptr);
- if (wsrep_is_wsrep_xid(xid)) {
- mtr_t mtr;
- mtr_start(&mtr);
- trx_sysf_t* sys_header = trx_sysf_get(&mtr);
- trx_sys_update_wsrep_checkpoint(xid, sys_header, &mtr);
- mtr_commit(&mtr);
- innobase_flush_logs(hton);
- return 0;
- } else {
- return 1;
- }
+
+ if (wsrep_is_wsrep_xid(xid)) {
+ mtr_t mtr;
+ mtr_start(&mtr);
+ trx_sysf_t* sys_header = trx_sysf_get(&mtr);
+ trx_sys_update_wsrep_checkpoint(xid, sys_header, &mtr);
+ mtr_commit(&mtr);
+ innobase_flush_logs(hton, false);
+ return 0;
+ } else {
+ return 1;
+ }
}
-static int innobase_wsrep_get_checkpoint(handlerton* hton, XID* xid)
+static
+int
+innobase_wsrep_get_checkpoint(
+/*==========================*/
+ handlerton* hton,
+ XID* xid)
{
DBUG_ASSERT(hton == innodb_hton_ptr);
trx_sys_read_wsrep_checkpoint(xid);
return 0;
}
-static void wsrep_fake_trx_id(
+static
+void
+wsrep_fake_trx_id(
+/*==============*/
handlerton *hton,
THD *thd) /*!< in: user thread handle */
{
@@ -18836,38 +19887,81 @@ static void wsrep_fake_trx_id(
#endif /* WITH_WSREP */
+/** Update the innodb_use_trim parameter.
+@param[in] thd thread handle
+@param[in] var system variable
+@param[out] var_ptr current value
+@param[in] save immediate result from check function */
+static
+void
+innodb_use_trim_update(
+ THD* thd,
+ struct st_mysql_sys_var* var,
+ void* var_ptr,
+ const void* save)
+{
+ srv_use_trim = *static_cast<const my_bool*>(save);
+
+ push_warning(thd, Sql_condition::WARN_LEVEL_WARN,
+ HA_ERR_WRONG_COMMAND, deprecated_use_trim);
+}
+
+/** Update the innodb_instrument_sempahores parameter.
+@param[in] thd thread handle
+@param[in] var system variable
+@param[out] var_ptr current value
+@param[in] save immediate result from check function */
+static
+void
+innodb_instrument_semaphores_update(
+ THD* thd,
+ struct st_mysql_sys_var* var,
+ void* var_ptr,
+ const void* save)
+{
+ innodb_instrument_semaphores = *static_cast<const my_bool*>(save);
+
+ push_warning(thd, Sql_condition::WARN_LEVEL_WARN,
+ HA_ERR_WRONG_COMMAND, deprecated_instrument_semaphores);
+}
+
/* plugin options */
static MYSQL_SYSVAR_ENUM(checksum_algorithm, srv_checksum_algorithm,
PLUGIN_VAR_RQCMDARG,
- "The algorithm InnoDB uses for page checksumming. Possible values are "
- "CRC32 (hardware accelerated if the CPU supports it) "
- "write crc32, allow any of the other checksums to match when reading; "
- "STRICT_CRC32 "
- "write crc32, do not allow other algorithms to match when reading; "
- "INNODB "
- "write a software calculated checksum, allow any other checksums "
- "to match when reading; "
- "STRICT_INNODB "
- "write a software calculated checksum, do not allow other algorithms "
- "to match when reading; "
- "NONE "
- "write a constant magic number, do not do any checksum verification "
- "when reading (same as innodb_checksums=OFF); "
- "STRICT_NONE "
- "write a constant magic number, do not allow values other than that "
- "magic number when reading; "
- "Files updated when this option is set to crc32 or strict_crc32 will "
- "not be readable by MySQL versions older than 5.6.3",
- NULL, NULL, SRV_CHECKSUM_ALGORITHM_INNODB,
+ "The algorithm InnoDB uses for page checksumming. Possible values are"
+ " CRC32 (hardware accelerated if the CPU supports it)"
+ " write crc32, allow any of the other checksums to match when reading;"
+ " STRICT_CRC32"
+ " write crc32, do not allow other algorithms to match when reading;"
+ " INNODB"
+ " write a software calculated checksum, allow any other checksums"
+ " to match when reading;"
+ " STRICT_INNODB"
+ " write a software calculated checksum, do not allow other algorithms"
+ " to match when reading;"
+ " NONE"
+ " write a constant magic number, do not do any checksum verification"
+ " when reading (same as innodb_checksums=OFF);"
+ " STRICT_NONE"
+ " write a constant magic number, do not allow values other than that"
+ " magic number when reading;"
+ " Files updated when this option is set to crc32 or strict_crc32 will"
+ " not be readable by MariaDB versions older than 10.0.4",
+ NULL, NULL, SRV_CHECKSUM_ALGORITHM_CRC32,
&innodb_checksum_algorithm_typelib);
+static MYSQL_SYSVAR_BOOL(log_checksums, innodb_log_checksums,
+ PLUGIN_VAR_RQCMDARG,
+ "Whether to compute and require checksums for InnoDB redo log blocks",
+ NULL, innodb_log_checksums_update, TRUE);
+
static MYSQL_SYSVAR_BOOL(checksums, innobase_use_checksums,
PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY,
- "DEPRECATED. Use innodb_checksum_algorithm=NONE instead of setting "
- "this to OFF. "
- "Enable InnoDB checksums validation (enabled by default). "
- "Disable with --skip-innodb-checksums.",
+ "DEPRECATED. Use innodb_checksum_algorithm=NONE instead of setting"
+ " this to OFF."
+ " Enable InnoDB checksums validation (enabled by default)."
+ " Disable with --skip-innodb-checksums.",
NULL, NULL, TRUE);
static MYSQL_SYSVAR_STR(data_home_dir, innobase_data_home_dir,
@@ -18877,28 +19971,29 @@ static MYSQL_SYSVAR_STR(data_home_dir, innobase_data_home_dir,
static MYSQL_SYSVAR_BOOL(doublewrite, innobase_use_doublewrite,
PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY,
- "Enable InnoDB doublewrite buffer (enabled by default). "
- "Disable with --skip-innodb-doublewrite.",
+ "Enable InnoDB doublewrite buffer (enabled by default)."
+ " Disable with --skip-innodb-doublewrite.",
NULL, NULL, TRUE);
static MYSQL_SYSVAR_BOOL(use_atomic_writes, innobase_use_atomic_writes,
PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY,
- "Prevent partial page writes, via atomic writes."
- "The option is used to prevent partial writes in case of a crash/poweroff, "
- "as faster alternative to doublewrite buffer."
- "Currently this option works only "
- "on Linux only with FusionIO device, and directFS filesystem.",
- NULL, NULL, FALSE);
+ "Enable atomic writes, instead of using the doublewrite buffer, for files "
+ "on devices that supports atomic writes. "
+ "To use this option one must use "
+ "file_per_table=1, flush_method=O_DIRECT and use_fallocate=1. "
+ "This option only works on Linux with either FusionIO cards using "
+ "the directFS filesystem or with Shannon cards using any file system.",
+ NULL, NULL, TRUE);
static MYSQL_SYSVAR_BOOL(use_fallocate, innobase_use_fallocate,
PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY,
- "Preallocate files fast, using operating system functionality. On POSIX systems, posix_fallocate system call is used.",
+ "Use posix_fallocate() to allocate files. DEPRECATED, has no effect.",
NULL, NULL, FALSE);
static MYSQL_SYSVAR_BOOL(stats_include_delete_marked,
srv_stats_include_delete_marked,
PLUGIN_VAR_OPCMDARG,
- "Scan delete marked records for persistent stat",
+ "Include delete marked records when calculating persistent statistics",
NULL, NULL, FALSE);
static MYSQL_SYSVAR_ULONG(io_capacity, srv_io_capacity,
@@ -18921,15 +20016,11 @@ static MYSQL_SYSVAR_ULONG(idle_flush_pct,
NULL, NULL, 100, 0, 100, 0);
#ifdef UNIV_DEBUG
-static MYSQL_SYSVAR_BOOL(purge_run_now, innodb_purge_run_now,
+static MYSQL_SYSVAR_BOOL(background_drop_list_empty,
+ innodb_background_drop_list_empty,
PLUGIN_VAR_OPCMDARG,
- "Set purge state to RUN",
- NULL, purge_run_now_set, FALSE);
-
-static MYSQL_SYSVAR_BOOL(purge_stop_now, innodb_purge_stop_now,
- PLUGIN_VAR_OPCMDARG,
- "Set purge state to STOP",
- NULL, purge_stop_now_set, FALSE);
+ "Wait for the background drop list to become empty",
+ NULL, wait_background_drop_list_empty, FALSE);
static MYSQL_SYSVAR_BOOL(log_checkpoint_now, innodb_log_checkpoint_now,
PLUGIN_VAR_OPCMDARG,
@@ -18940,6 +20031,14 @@ static MYSQL_SYSVAR_BOOL(buf_flush_list_now, innodb_buf_flush_list_now,
PLUGIN_VAR_OPCMDARG,
"Force dirty page flush now",
NULL, buf_flush_list_now_set, FALSE);
+
+static MYSQL_SYSVAR_UINT(merge_threshold_set_all_debug,
+ innodb_merge_threshold_set_all_debug,
+ PLUGIN_VAR_RQCMDARG,
+ "Override current MERGE_THRESHOLD setting for all indexes at dictionary"
+ " cache by the specified value dynamically, at the time.",
+ NULL, innodb_merge_threshold_set_all_debug_update,
+ DICT_INDEX_MERGE_THRESHOLD_DEFAULT, 1, 50, 0);
#endif /* UNIV_DEBUG */
static MYSQL_SYSVAR_ULONG(purge_batch_size, srv_purge_batch_size,
@@ -18952,11 +20051,12 @@ static MYSQL_SYSVAR_ULONG(purge_batch_size, srv_purge_batch_size,
static MYSQL_SYSVAR_ULONG(purge_threads, srv_n_purge_threads,
PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_READONLY,
- "Purge threads can be from 1 to 32. Default is 1.",
+ "Purge threads can be from 1 to 32. Default is 4.",
NULL, NULL,
- 1, /* Default setting */
+ 4, /* Default setting */
1, /* Minimum value */
- 32, 0); /* Maximum value */
+ srv_max_purge_threads,/* Maximum value */
+ 0);
static MYSQL_SYSVAR_ULONG(sync_array_size, srv_sync_array_size,
PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_READONLY,
@@ -18966,11 +20066,11 @@ static MYSQL_SYSVAR_ULONG(sync_array_size, srv_sync_array_size,
1, /* Minimum value */
1024, 0); /* Maximum value */
-static MYSQL_SYSVAR_ULONG(fast_shutdown, innobase_fast_shutdown,
+static MYSQL_SYSVAR_UINT(fast_shutdown, srv_fast_shutdown,
PLUGIN_VAR_OPCMDARG,
- "Speeds up the shutdown process of the InnoDB storage engine. Possible "
- "values are 0, 1 (faster) or 2 (fastest - crash-like).",
- NULL, NULL, 1, 0, 2, 0);
+ "Speeds up the shutdown process of the InnoDB storage engine. Possible"
+ " values are 0, 1 (faster) or 2 (fastest - crash-like).",
+ fast_shutdown_validate, NULL, 1, 0, 2, 0);
static MYSQL_SYSVAR_BOOL(file_per_table, srv_file_per_table,
PLUGIN_VAR_NOCMDARG,
@@ -18981,7 +20081,7 @@ static MYSQL_SYSVAR_STR(file_format, innobase_file_format_name,
PLUGIN_VAR_RQCMDARG,
"File format to use for new tables in .ibd files.",
innodb_file_format_name_validate,
- innodb_file_format_name_update, "Antelope");
+ innodb_file_format_name_update, innodb_file_format_default);
/* "innobase_file_format_check" decides whether we would continue
booting the server if the file format stamped on the system
@@ -19002,7 +20102,7 @@ static MYSQL_SYSVAR_STR(file_format_max, innobase_file_format_max,
PLUGIN_VAR_OPCMDARG,
"The highest file format in the tablespace.",
innodb_file_format_max_validate,
- innodb_file_format_max_update, "Antelope");
+ innodb_file_format_max_update, innodb_file_format_max_default);
static MYSQL_SYSVAR_STR(ft_server_stopword_table, innobase_server_stopword_table,
PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_MEMALLOC,
@@ -19037,7 +20137,7 @@ static MYSQL_SYSVAR_STR(flush_method, innobase_file_flush_method,
static MYSQL_SYSVAR_BOOL(large_prefix, innobase_large_prefix,
PLUGIN_VAR_NOCMDARG,
"Support large index prefix length of REC_VERSION_56_MAX_INDEX_COL_LEN (3072) bytes.",
- NULL, NULL, FALSE);
+ NULL, innodb_large_prefix_update, TRUE);
static MYSQL_SYSVAR_BOOL(force_load_corrupted, srv_load_corrupted,
PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY,
@@ -19046,35 +20146,30 @@ static MYSQL_SYSVAR_BOOL(force_load_corrupted, srv_load_corrupted,
static MYSQL_SYSVAR_BOOL(locks_unsafe_for_binlog, innobase_locks_unsafe_for_binlog,
PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY,
- "DEPRECATED. This option may be removed in future releases. "
- "Please use READ COMMITTED transaction isolation level instead. "
- "Force InnoDB to not use next-key locking, to use only row-level locking.",
+ "DEPRECATED. This option may be removed in future releases."
+ " Please use READ COMMITTED transaction isolation level instead."
+ " Force InnoDB to not use next-key locking, to use only row-level locking.",
NULL, NULL, FALSE);
-#ifdef UNIV_LOG_ARCHIVE
-static MYSQL_SYSVAR_STR(log_arch_dir, innobase_log_arch_dir,
- PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
- "Where full logs should be archived.", NULL, NULL, NULL);
-
-static MYSQL_SYSVAR_BOOL(log_archive, innobase_log_archive,
- PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_READONLY,
- "Set to 1 if you want to have logs archived.", NULL, NULL, FALSE);
-#endif /* UNIV_LOG_ARCHIVE */
-
static MYSQL_SYSVAR_STR(log_group_home_dir, srv_log_group_home_dir,
PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
"Path to InnoDB log files.", NULL, NULL, NULL);
+static MYSQL_SYSVAR_ULONG(page_cleaners, srv_n_page_cleaners,
+ PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_READONLY,
+ "Page cleaner threads can be from 1 to 64. Default is 4.",
+ NULL, NULL, 4, 1, 64, 0);
+
static MYSQL_SYSVAR_DOUBLE(max_dirty_pages_pct, srv_max_buf_pool_modified_pct,
PLUGIN_VAR_RQCMDARG,
"Percentage of dirty pages allowed in bufferpool.",
- NULL, innodb_max_dirty_pages_pct_update, 75.0, 0.001, 99.999, 0);
+ NULL, innodb_max_dirty_pages_pct_update, 75.0, 0, 99.999, 0);
static MYSQL_SYSVAR_DOUBLE(max_dirty_pages_pct_lwm,
srv_max_dirty_pages_pct_lwm,
PLUGIN_VAR_RQCMDARG,
"Percentage of dirty pages at which flushing kicks in.",
- NULL, innodb_max_dirty_pages_pct_lwm_update, 0.001, 0.000, 99.999, 0);
+ NULL, innodb_max_dirty_pages_pct_lwm_update, 0, 0, 99.999, 0);
static MYSQL_SYSVAR_DOUBLE(adaptive_flushing_lwm,
srv_adaptive_flushing_lwm,
@@ -19087,6 +20182,11 @@ static MYSQL_SYSVAR_BOOL(adaptive_flushing, srv_adaptive_flushing,
"Attempt flushing dirty pages to avoid IO bursts at checkpoints.",
NULL, NULL, TRUE);
+static MYSQL_SYSVAR_BOOL(flush_sync, srv_flush_sync,
+ PLUGIN_VAR_NOCMDARG,
+ "Allow IO bursts at the checkpoints ignoring io_capacity setting.",
+ NULL, NULL, TRUE);
+
static MYSQL_SYSVAR_ULONG(flushing_avg_loops,
srv_flushing_avg_loops,
PLUGIN_VAR_RQCMDARG,
@@ -19118,8 +20218,8 @@ static MYSQL_SYSVAR_BOOL(status_file, innobase_create_status_file,
static MYSQL_SYSVAR_BOOL(stats_on_metadata, innobase_stats_on_metadata,
PLUGIN_VAR_OPCMDARG,
- "Enable statistics gathering for metadata commands such as "
- "SHOW TABLE STATUS for tables that use transient statistics (off by default)",
+ "Enable statistics gathering for metadata commands such as"
+ " SHOW TABLE STATUS for tables that use transient statistics (off by default)",
NULL, NULL, FALSE);
static MYSQL_SYSVAR_ULONGLONG(stats_sample_pages, srv_stats_transient_sample_pages,
@@ -19130,29 +20230,29 @@ static MYSQL_SYSVAR_ULONGLONG(stats_sample_pages, srv_stats_transient_sample_pag
static MYSQL_SYSVAR_ULONGLONG(stats_transient_sample_pages,
srv_stats_transient_sample_pages,
PLUGIN_VAR_RQCMDARG,
- "The number of leaf index pages to sample when calculating transient "
- "statistics (if persistent statistics are not used, default 8)",
+ "The number of leaf index pages to sample when calculating transient"
+ " statistics (if persistent statistics are not used, default 8)",
NULL, NULL, 8, 1, ~0ULL, 0);
static MYSQL_SYSVAR_BOOL(stats_persistent, srv_stats_persistent,
PLUGIN_VAR_OPCMDARG,
- "InnoDB persistent statistics enabled for all tables unless overridden "
- "at table level",
+ "InnoDB persistent statistics enabled for all tables unless overridden"
+ " at table level",
NULL, NULL, TRUE);
static MYSQL_SYSVAR_BOOL(stats_auto_recalc, srv_stats_auto_recalc,
PLUGIN_VAR_OPCMDARG,
- "InnoDB automatic recalculation of persistent statistics enabled for all "
- "tables unless overridden at table level (automatic recalculation is only "
- "done when InnoDB decides that the table has changed too much and needs a "
- "new statistics)",
+ "InnoDB automatic recalculation of persistent statistics enabled for all"
+ " tables unless overridden at table level (automatic recalculation is only"
+ " done when InnoDB decides that the table has changed too much and needs a"
+ " new statistics)",
NULL, NULL, TRUE);
static MYSQL_SYSVAR_ULONGLONG(stats_persistent_sample_pages,
srv_stats_persistent_sample_pages,
PLUGIN_VAR_RQCMDARG,
- "The number of leaf index pages to sample when calculating persistent "
- "statistics (by ANALYZE, default 20)",
+ "The number of leaf index pages to sample when calculating persistent"
+ " statistics (by ANALYZE, default 20)",
NULL, NULL, 20, 1, ~0ULL, 0);
static MYSQL_SYSVAR_ULONGLONG(stats_modified_counter, srv_stats_modified_counter,
@@ -19165,16 +20265,26 @@ static MYSQL_SYSVAR_BOOL(stats_traditional, srv_stats_sample_traditional,
"Enable traditional statistic calculation based on number of configured pages (default true)",
NULL, NULL, TRUE);
+#ifdef BTR_CUR_HASH_ADAPT
static MYSQL_SYSVAR_BOOL(adaptive_hash_index, btr_search_enabled,
PLUGIN_VAR_OPCMDARG,
- "Enable InnoDB adaptive hash index (enabled by default). "
- "Disable with --skip-innodb-adaptive-hash-index.",
- NULL, innodb_adaptive_hash_index_update, TRUE);
+ "Enable InnoDB adaptive hash index (enabled by default). "
+ " Disable with --skip-innodb-adaptive-hash-index.",
+ NULL, innodb_adaptive_hash_index_update, true);
+
+/** Number of distinct partitions of AHI.
+Each partition is protected by its own latch and so we have parts number
+of latches protecting complete search system. */
+static MYSQL_SYSVAR_ULONG(adaptive_hash_index_parts, btr_ahi_parts,
+ PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_READONLY,
+ "Number of InnoDB Adaptive Hash Index Partitions (default 8)",
+ NULL, NULL, 8, 1, 512, 0);
+#endif /* BTR_CUR_HASH_ADAPT */
static MYSQL_SYSVAR_ULONG(replication_delay, srv_replication_delay,
PLUGIN_VAR_RQCMDARG,
- "Replication thread delay (ms) on the slave server if "
- "innodb_thread_concurrency is reached (0 by default)",
+ "Replication thread delay (ms) on the slave server if"
+ " innodb_thread_concurrency is reached (0 by default)",
NULL, NULL, 0, 0, ~0UL, 0);
static MYSQL_SYSVAR_UINT(compression_level, page_zip_level,
@@ -19192,23 +20302,56 @@ static MYSQL_SYSVAR_BOOL(log_compressed_pages, page_zip_log_pages,
" compression algorithm doesn't change.",
NULL, NULL, TRUE);
-static MYSQL_SYSVAR_LONG(additional_mem_pool_size, innobase_additional_mem_pool_size,
- PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
- "DEPRECATED. This option may be removed in future releases, "
- "together with the option innodb_use_sys_malloc and with the InnoDB's "
- "internal memory allocator. "
- "Size of a memory pool InnoDB uses to store data dictionary information and other internal data structures.",
- NULL, NULL, 8*1024*1024L, 512*1024L, LONG_MAX, 1024);
+static MYSQL_SYSVAR_BOOL(log_optimize_ddl, innodb_log_optimize_ddl,
+ PLUGIN_VAR_OPCMDARG,
+ "Reduce redo logging when natively creating indexes or rebuilding tables."
+ " Setting this OFF avoids delay due to page flushing and"
+ " allows concurrent backup.",
+ NULL, NULL, TRUE);
-static MYSQL_SYSVAR_ULONG(autoextend_increment, srv_auto_extend_increment,
+static MYSQL_SYSVAR_ULONG(autoextend_increment,
+ sys_tablespace_auto_extend_increment,
PLUGIN_VAR_RQCMDARG,
"Data file autoextend increment in megabytes",
NULL, NULL, 64L, 1L, 1000L, 0);
+/** Validate the requested buffer pool size. Also, reserve the necessary
+memory needed for buffer pool resize.
+@param[in] thd thread handle
+@param[in] var pointer to system variable
+@param[out] save immediate result for update function
+@param[in] value incoming string
+@return 0 on success, 1 on failure.
+*/
+static
+int
+innodb_buffer_pool_size_validate(
+ THD* thd,
+ struct st_mysql_sys_var* var,
+ void* save,
+ struct st_mysql_value* value);
+
+/* If the default value of innodb_buffer_pool_size is increased to be more than
+BUF_POOL_SIZE_THRESHOLD (srv/srv0start.cc), then srv_buf_pool_instances_default
+can be removed and 8 used instead. The problem with the current setup is that
+with 128MiB default buffer pool size and 8 instances by default we would emit
+a warning when no options are specified. */
static MYSQL_SYSVAR_LONGLONG(buffer_pool_size, innobase_buffer_pool_size,
- PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
+ PLUGIN_VAR_RQCMDARG,
"The size of the memory buffer InnoDB uses to cache data and indexes of its tables.",
- NULL, NULL, 128*1024*1024L, 5*1024*1024L, LONGLONG_MAX, 1024*1024L);
+ innodb_buffer_pool_size_validate,
+ innodb_buffer_pool_size_update,
+ static_cast<longlong>(srv_buf_pool_def_size),
+ static_cast<longlong>(srv_buf_pool_min_size),
+ LLONG_MAX, 1024*1024L);
+
+static MYSQL_SYSVAR_ULONG(buffer_pool_chunk_size, srv_buf_pool_chunk_unit,
+ PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
+ "Size of a single memory chunk within each buffer pool instance"
+ " for resizing buffer pool. Online buffer pool resizing happens"
+ " at this granularity. 0 means disable resizing buffer pool.",
+ NULL, NULL,
+ 128 * 1024 * 1024, 1024 * 1024, LONG_MAX, 1024 * 1024);
#if defined UNIV_DEBUG || defined UNIV_PERF_DEBUG
static MYSQL_SYSVAR_ULONG(page_hash_locks, srv_n_page_hash_locks,
@@ -19227,17 +20370,17 @@ static MYSQL_SYSVAR_ENUM(lock_schedule_algorithm, innodb_lock_schedule_algorithm
"The algorithm Innodb uses for deciding which locks to grant next when"
" a lock is released. Possible values are"
" FCFS"
- " grant the locks in First-Come-First-Served order;"
+ " grant the locks in First-Come-First-Served order;"
" VATS"
- " use the Variance-Aware-Transaction-Scheduling algorithm, which"
- " uses an Eldest-Transaction-First heuristic.",
+ " use the Variance-Aware-Transaction-Scheduling algorithm, which"
+ " uses an Eldest-Transaction-First heuristic.",
NULL, NULL, INNODB_LOCK_SCHEDULE_ALGORITHM_FCFS,
&innodb_lock_schedule_algorithm_typelib);
-static MYSQL_SYSVAR_LONG(buffer_pool_instances, innobase_buffer_pool_instances,
+static MYSQL_SYSVAR_ULONG(buffer_pool_instances, srv_buf_pool_instances,
PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
"Number of buffer pool instances, set to higher value on high-end machines to increase scalability",
- NULL, NULL, 0L, 0L, MAX_BUFFER_POOLS, 1L);
+ NULL, NULL, srv_buf_pool_instances_default, 0, MAX_BUFFER_POOLS, 0);
static MYSQL_SYSVAR_STR(buffer_pool_filename, srv_buf_dump_filename,
PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_MEMALLOC,
@@ -19252,12 +20395,12 @@ static MYSQL_SYSVAR_BOOL(buffer_pool_dump_now, innodb_buffer_pool_dump_now,
static MYSQL_SYSVAR_BOOL(buffer_pool_dump_at_shutdown, srv_buffer_pool_dump_at_shutdown,
PLUGIN_VAR_RQCMDARG,
"Dump the buffer pool into a file named @@innodb_buffer_pool_filename",
- NULL, NULL, FALSE);
+ NULL, NULL, TRUE);
static MYSQL_SYSVAR_ULONG(buffer_pool_dump_pct, srv_buf_pool_dump_pct,
PLUGIN_VAR_RQCMDARG,
- "Dump only the hottest N% of each buffer pool, defaults to 100",
- NULL, NULL, 100, 1, 100, 0);
+ "Dump only the hottest N% of each buffer pool, defaults to 25",
+ NULL, NULL, 25, 1, 100, 0);
#ifdef UNIV_DEBUG
static MYSQL_SYSVAR_STR(buffer_pool_evict, srv_buffer_pool_evict,
@@ -19280,7 +20423,7 @@ static MYSQL_SYSVAR_BOOL(buffer_pool_load_abort, innodb_buffer_pool_load_abort,
static MYSQL_SYSVAR_BOOL(buffer_pool_load_at_startup, srv_buffer_pool_load_at_startup,
PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
"Load the buffer pool from a file named @@innodb_buffer_pool_filename",
- NULL, NULL, FALSE);
+ NULL, NULL, TRUE);
static MYSQL_SYSVAR_BOOL(defragment, srv_defragment,
PLUGIN_VAR_RQCMDARG,
@@ -19359,10 +20502,17 @@ static MYSQL_SYSVAR_ULONG(concurrency_tickets, srv_n_free_tickets_to_enter,
"Number of times a thread is allowed to enter InnoDB within the same SQL query after it has once got the ticket",
NULL, NULL, 5000L, 1L, ~0UL, 0);
-static MYSQL_SYSVAR_LONG(file_io_threads, innobase_file_io_threads,
- PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY | PLUGIN_VAR_NOSYSVAR,
- "Number of file I/O threads in InnoDB.",
- NULL, NULL, 4, 4, 64, 0);
+static MYSQL_SYSVAR_BOOL(deadlock_detect, innobase_deadlock_detect,
+ PLUGIN_VAR_NOCMDARG,
+ "Enable/disable InnoDB deadlock detector (default ON)."
+ " if set to OFF, deadlock detection is skipped,"
+ " and we rely on innodb_lock_wait_timeout in case of deadlock.",
+ NULL, NULL, TRUE);
+
+static MYSQL_SYSVAR_LONG(fill_factor, innobase_fill_factor,
+ PLUGIN_VAR_RQCMDARG,
+ "Percentage of B-tree page filled during bulk insert",
+ NULL, NULL, 100, 10, 100, 0);
static MYSQL_SYSVAR_BOOL(ft_enable_diag_print, fts_enable_diag_print,
PLUGIN_VAR_OPCMDARG,
@@ -19375,7 +20525,7 @@ static MYSQL_SYSVAR_BOOL(disable_sort_file_cache, srv_disable_sort_file_cache,
NULL, NULL, FALSE);
static MYSQL_SYSVAR_STR(ft_aux_table, innodb_ft_aux_table,
- PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_MEMALLOC,
+ PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_MEMALLOC,
"FTS internal auxiliary table to be checked",
innodb_ft_aux_table_validate, NULL, NULL);
@@ -19404,7 +20554,6 @@ static MYSQL_SYSVAR_ULONG(ft_max_token_size, fts_max_token_size,
"InnoDB Fulltext search maximum token size in characters",
NULL, NULL, FTS_MAX_WORD_LEN_IN_CHAR, 10, FTS_MAX_WORD_LEN_IN_CHAR, 0);
-
static MYSQL_SYSVAR_ULONG(ft_num_word_optimize, fts_num_word_optimize,
PLUGIN_VAR_OPCMDARG,
"InnoDB Fulltext search number of words to optimize for each optimize table call ",
@@ -19456,22 +20605,24 @@ static MYSQL_SYSVAR_LONG(log_buffer_size, innobase_log_buffer_size,
"The size of the buffer which InnoDB uses to write log to the log files on disk.",
NULL, NULL, 16*1024*1024L, 256*1024L, LONG_MAX, 1024);
-static MYSQL_SYSVAR_LONGLONG(log_file_size, innobase_log_file_size,
+static MYSQL_SYSVAR_ULONGLONG(log_file_size, srv_log_file_size,
PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
"Size of each log file in a log group.",
- NULL, NULL, 48*1024*1024L, 1*1024*1024L, LONGLONG_MAX, 1024*1024L);
+ NULL, NULL, 48 << 20, 1 << 20, log_group_max_size, UNIV_PAGE_SIZE_MAX);
+/* OS_FILE_LOG_BLOCK_SIZE would be more appropriate than UNIV_PAGE_SIZE_MAX,
+but fil_space_t is being used for the redo log, and it uses data pages. */
static MYSQL_SYSVAR_ULONG(log_files_in_group, srv_n_log_files,
PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
"Number of log files in the log group. InnoDB writes to the files in a circular fashion.",
- NULL, NULL, 2, 2, SRV_N_LOG_FILES_MAX, 0);
+ NULL, NULL, 2, 1, SRV_N_LOG_FILES_MAX, 0);
-/* Note that the default and minimum values are set to 0 to
-detect if the option is passed and print deprecation message */
-static MYSQL_SYSVAR_LONG(mirrored_log_groups, innobase_mirrored_log_groups,
- PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
- "Number of identical copies of log groups we keep for the database. Currently this should be set to 1.",
- NULL, NULL, 0, 0, 10, 0);
+static MYSQL_SYSVAR_ULONG(log_write_ahead_size, srv_log_write_ahead_size,
+ PLUGIN_VAR_RQCMDARG,
+ "Redo log write ahead unit size to avoid read-on-write,"
+ " it should match the OS cache block IO size",
+ NULL, innodb_log_write_ahead_size_update,
+ 8*1024L, OS_FILE_LOG_BLOCK_SIZE, UNIV_PAGE_SIZE_DEF, OS_FILE_LOG_BLOCK_SIZE);
static MYSQL_SYSVAR_UINT(old_blocks_pct, innobase_old_blocks_pct,
PLUGIN_VAR_RQCMDARG,
@@ -19495,17 +20646,16 @@ static MYSQL_SYSVAR_ULONG(sync_spin_loops, srv_n_spin_wait_rounds,
"Count of spin-loop rounds in InnoDB mutexes (30 by default)",
NULL, NULL, 30L, 0L, ~0UL, 0);
-static MYSQL_SYSVAR_ULONG(spin_wait_delay, srv_spin_wait_delay,
+static MYSQL_SYSVAR_UINT(spin_wait_delay, srv_spin_wait_delay,
PLUGIN_VAR_OPCMDARG,
- "Maximum delay between polling for a spin lock (6 by default)",
- NULL, NULL, 6L, 0L, ~0UL, 0);
+ "Maximum delay between polling for a spin lock (4 by default)",
+ NULL, NULL, 4, 0, 6000, 0);
static MYSQL_SYSVAR_ULONG(thread_concurrency, srv_thread_concurrency,
PLUGIN_VAR_RQCMDARG,
"Helps in performance tuning in heavily concurrent environments. Sets the maximum number of threads allowed inside InnoDB. Value 0 will disable the thread throttling.",
NULL, NULL, 0, 0, 1000, 0);
-#ifdef HAVE_ATOMIC_BUILTINS
static MYSQL_SYSVAR_ULONG(
adaptive_max_sleep_delay, srv_adaptive_max_sleep_delay,
PLUGIN_VAR_RQCMDARG,
@@ -19514,7 +20664,6 @@ static MYSQL_SYSVAR_ULONG(
150000, /* Default setting */
0, /* Minimum value */
1000000, 0); /* Maximum value */
-#endif /* HAVE_ATOMIC_BUILTINS */
static MYSQL_SYSVAR_BOOL(prefix_index_cluster_optimization,
srv_prefix_index_cluster_optimization,
@@ -19524,8 +20673,8 @@ static MYSQL_SYSVAR_BOOL(prefix_index_cluster_optimization,
static MYSQL_SYSVAR_ULONG(thread_sleep_delay, srv_thread_sleep_delay,
PLUGIN_VAR_RQCMDARG,
- "Time of innodb thread sleeping before joining InnoDB queue (usec). "
- "Value 0 disable a sleep",
+ "Time of innodb thread sleeping before joining InnoDB queue (usec)."
+ " Value 0 disable a sleep",
NULL, NULL,
10000L,
0L,
@@ -19536,18 +20685,23 @@ static MYSQL_SYSVAR_STR(data_file_path, innobase_data_file_path,
"Path to individual files and their sizes.",
NULL, NULL, NULL);
+static MYSQL_SYSVAR_STR(temp_data_file_path, innobase_temp_data_file_path,
+ PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
+ "Path to files and their sizes making temp-tablespace.",
+ NULL, NULL, NULL);
+
static MYSQL_SYSVAR_STR(undo_directory, srv_undo_dir,
PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
"Directory where undo tablespace files live, this path can be absolute.",
- NULL, NULL, ".");
+ NULL, NULL, NULL);
static MYSQL_SYSVAR_ULONG(undo_tablespaces, srv_undo_tablespaces,
PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
- "Number of undo tablespaces to use. ",
+ "Number of undo tablespaces to use.",
NULL, NULL,
0L, /* Default seting */
0L, /* Minimum value */
- 126L, 0); /* Maximum value */
+ TRX_SYS_MAX_UNDO_SPACES, 0); /* Maximum value */
static MYSQL_SYSVAR_ULONG(undo_logs, srv_undo_logs,
PLUGIN_VAR_OPCMDARG,
@@ -19557,6 +20711,25 @@ static MYSQL_SYSVAR_ULONG(undo_logs, srv_undo_logs,
1, /* Minimum value */
TRX_SYS_N_RSEGS, 0); /* Maximum value */
+static MYSQL_SYSVAR_ULONGLONG(max_undo_log_size, srv_max_undo_log_size,
+ PLUGIN_VAR_OPCMDARG,
+ "Desired maximum UNDO tablespace size in bytes",
+ NULL, NULL,
+ 10 << 20, 10 << 20,
+ 1ULL << (32 + UNIV_PAGE_SIZE_SHIFT_MAX), 0);
+
+static MYSQL_SYSVAR_ULONG(purge_rseg_truncate_frequency,
+ srv_purge_rseg_truncate_frequency,
+ PLUGIN_VAR_OPCMDARG,
+ "Dictates rate at which UNDO records are purged. Value N means"
+ " purge rollback segment(s) on every Nth iteration of purge invocation",
+ NULL, NULL, 128, 1, 128, 0);
+
+static MYSQL_SYSVAR_BOOL(undo_log_truncate, srv_undo_log_truncate,
+ PLUGIN_VAR_OPCMDARG,
+ "Enable or Disable Truncate of UNDO tablespace.",
+ NULL, NULL, FALSE);
+
/* Alias for innodb_undo_logs, this config variable is deprecated. */
static MYSQL_SYSVAR_ULONG(rollback_segments, srv_undo_logs,
PLUGIN_VAR_OPCMDARG,
@@ -19568,11 +20741,10 @@ static MYSQL_SYSVAR_ULONG(rollback_segments, srv_undo_logs,
static MYSQL_SYSVAR_LONG(autoinc_lock_mode, innobase_autoinc_lock_mode,
PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
- "The AUTOINC lock modes supported by InnoDB: "
- "0 => Old style AUTOINC locking (for backward"
- " compatibility) "
- "1 => New style AUTOINC locking "
- "2 => No AUTOINC locking (unsafe for SBR)",
+ "The AUTOINC lock modes supported by InnoDB:"
+ " 0 => Old style AUTOINC locking (for backward compatibility);"
+ " 1 => New style AUTOINC locking;"
+ " 2 => No AUTOINC locking (unsafe for SBR)",
NULL, NULL,
AUTOINC_NEW_STYLE_LOCKING, /* Default setting */
AUTOINC_OLD_STYLE_LOCKING, /* Minimum value */
@@ -19582,13 +20754,6 @@ static MYSQL_SYSVAR_STR(version, innodb_version_str,
PLUGIN_VAR_NOCMDOPT | PLUGIN_VAR_READONLY,
"InnoDB version", NULL, NULL, INNODB_VERSION_STR);
-static MYSQL_SYSVAR_BOOL(use_sys_malloc, srv_use_sys_malloc,
- PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY,
- "DEPRECATED. This option may be removed in future releases, "
- "together with the InnoDB's internal memory allocator. "
- "Use OS memory allocator instead of InnoDB's internal memory allocator",
- NULL, NULL, TRUE);
-
static MYSQL_SYSVAR_BOOL(use_native_aio, srv_use_native_aio,
PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY,
"Use native AIO if supported on this platform.",
@@ -19599,48 +20764,17 @@ static MYSQL_SYSVAR_BOOL(numa_interleave, srv_numa_interleave,
PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY,
"Use NUMA interleave memory policy to allocate InnoDB buffer pool.",
NULL, NULL, FALSE);
-#endif // HAVE_LIBNUMA
-
-static MYSQL_SYSVAR_BOOL(api_enable_binlog, ib_binlog_enabled,
- PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY,
- "Enable binlog for applications direct access InnoDB through InnoDB APIs",
- NULL, NULL, FALSE);
-
-static MYSQL_SYSVAR_BOOL(api_enable_mdl, ib_mdl_enabled,
- PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY,
- "Enable MDL for applications direct access InnoDB through InnoDB APIs",
- NULL, NULL, FALSE);
-
-static MYSQL_SYSVAR_BOOL(api_disable_rowlock, ib_disable_row_lock,
- PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY,
- "Disable row lock when direct access InnoDB through InnoDB APIs",
- NULL, NULL, FALSE);
-
-static MYSQL_SYSVAR_ULONG(api_trx_level, ib_trx_level_setting,
- PLUGIN_VAR_OPCMDARG,
- "InnoDB API transaction isolation level",
- NULL, NULL,
- 0, /* Default setting */
- 0, /* Minimum value */
- 3, 0); /* Maximum value */
-
-static MYSQL_SYSVAR_ULONG(api_bk_commit_interval, ib_bk_commit_interval,
- PLUGIN_VAR_OPCMDARG,
- "Background commit interval in seconds",
- NULL, NULL,
- 5, /* Default setting */
- 1, /* Minimum value */
- 1024 * 1024 * 1024, 0); /* Maximum value */
+#endif /* HAVE_LIBNUMA */
static MYSQL_SYSVAR_STR(change_buffering, innobase_change_buffering,
PLUGIN_VAR_RQCMDARG,
- "Buffer changes to reduce random access: "
- "OFF, ON, inserting, deleting, changing, or purging.",
+ "Buffer changes to reduce random access:"
+ " OFF, ON, inserting, deleting, changing, or purging.",
innodb_change_buffering_validate,
innodb_change_buffering_update, "all");
static MYSQL_SYSVAR_UINT(change_buffer_max_size,
- innobase_change_buffer_max_size,
+ srv_change_buffer_max_size,
PLUGIN_VAR_RQCMDARG,
"Maximum on-disk size of change buffer in terms of percentage"
" of the buffer pool.",
@@ -19649,16 +20783,21 @@ static MYSQL_SYSVAR_UINT(change_buffer_max_size,
static MYSQL_SYSVAR_ENUM(stats_method, srv_innodb_stats_method,
PLUGIN_VAR_RQCMDARG,
- "Specifies how InnoDB index statistics collection code should "
- "treat NULLs. Possible values are NULLS_EQUAL (default), "
- "NULLS_UNEQUAL and NULLS_IGNORED",
+ "Specifies how InnoDB index statistics collection code should"
+ " treat NULLs. Possible values are NULLS_EQUAL (default),"
+ " NULLS_UNEQUAL and NULLS_IGNORED",
NULL, NULL, SRV_STATS_NULLS_EQUAL, &innodb_stats_method_typelib);
#if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG
+static MYSQL_SYSVAR_BOOL(change_buffer_dump, ibuf_dump,
+ PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY,
+ "Dump the change buffer at startup.",
+ NULL, NULL, FALSE);
+
static MYSQL_SYSVAR_UINT(change_buffering_debug, ibuf_debug,
PLUGIN_VAR_RQCMDARG,
- "Debug flags for InnoDB change buffering (0=none, 2=crash at merge)",
- NULL, NULL, 0, 0, 2, 0);
+ "Debug flags for InnoDB change buffering (0=none, 1=try to buffer)",
+ NULL, NULL, 0, 0, 1, 0);
static MYSQL_SYSVAR_BOOL(disable_background_merge,
srv_ibuf_disable_background_merge,
@@ -19700,10 +20839,11 @@ innobase_disallow_writes_update(
*static_cast<my_bool*>(var_ptr) = val;
ut_a(srv_allow_writes_event);
mysql_mutex_unlock(&LOCK_global_system_variables);
- if (val)
+ if (val) {
os_event_reset(srv_allow_writes_event);
- else
+ } else {
os_event_set(srv_allow_writes_event);
+ }
mysql_mutex_lock(&LOCK_global_system_variables);
}
@@ -19712,6 +20852,7 @@ static MYSQL_SYSVAR_BOOL(disallow_writes, innobase_disallow_writes,
"Tell InnoDB to stop any writes to disk",
NULL, innobase_disallow_writes_update, FALSE);
#endif /* WITH_INNODB_DISALLOW_WRITES */
+
static MYSQL_SYSVAR_BOOL(random_read_ahead, srv_random_read_ahead,
PLUGIN_VAR_NOCMDARG,
"Whether to use read ahead for random access within an extent.",
@@ -19719,8 +20860,8 @@ static MYSQL_SYSVAR_BOOL(random_read_ahead, srv_random_read_ahead,
static MYSQL_SYSVAR_ULONG(read_ahead_threshold, srv_read_ahead_threshold,
PLUGIN_VAR_RQCMDARG,
- "Number of pages that must be accessed sequentially for InnoDB to "
- "trigger a readahead.",
+ "Number of pages that must be accessed sequentially for InnoDB to"
+ " trigger a readahead.",
NULL, NULL, 56, 0, 64, 0);
static MYSQL_SYSVAR_STR(monitor_enable, innobase_enable_monitor_counter,
@@ -19758,7 +20899,7 @@ static MYSQL_SYSVAR_BOOL(status_output_locks, srv_print_innodb_lock_monitor,
static MYSQL_SYSVAR_BOOL(print_all_deadlocks, srv_print_all_deadlocks,
PLUGIN_VAR_OPCMDARG,
- "Print all deadlocks to MySQL error log (off by default)",
+ "Print all deadlocks to MariaDB error log (off by default)",
NULL, NULL, FALSE);
static MYSQL_SYSVAR_ULONG(compression_failure_threshold_pct,
@@ -19779,10 +20920,15 @@ static MYSQL_SYSVAR_BOOL(read_only, srv_read_only_mode,
"Start InnoDB in read only mode (off by default)",
NULL, NULL, FALSE);
+static MYSQL_SYSVAR_BOOL(safe_truncate, srv_safe_truncate,
+ PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_READONLY,
+ "Use backup-safe TRUNCATE TABLE and crash-safe RENAME (incompatible with older MariaDB 10.2; ON by default)",
+ NULL, NULL, TRUE);
+
static MYSQL_SYSVAR_BOOL(cmp_per_index_enabled, srv_cmp_per_index_enabled,
PLUGIN_VAR_OPCMDARG,
- "Enable INFORMATION_SCHEMA.innodb_cmp_per_index, "
- "may have negative impact on performance (off by default)",
+ "Enable INFORMATION_SCHEMA.innodb_cmp_per_index,"
+ " may have negative impact on performance (off by default)",
NULL, innodb_cmp_per_index_update, FALSE);
static MYSQL_SYSVAR_ENUM(default_row_format, innodb_default_row_format,
@@ -19790,7 +20936,7 @@ static MYSQL_SYSVAR_ENUM(default_row_format, innodb_default_row_format,
"The default ROW FORMAT for all innodb tables created without explicit"
" ROW_FORMAT. Possible values are REDUNDANT, COMPACT, and DYNAMIC."
" The ROW_FORMAT value COMPRESSED is not allowed",
- NULL, NULL, DEFAULT_ROW_FORMAT_COMPACT,
+ NULL, NULL, DEFAULT_ROW_FORMAT_DYNAMIC,
&innodb_default_row_format_typelib);
#ifdef UNIV_DEBUG
@@ -19806,9 +20952,9 @@ static MYSQL_SYSVAR_UINT(limit_optimistic_insert_debug,
static MYSQL_SYSVAR_BOOL(trx_purge_view_update_only_debug,
srv_purge_view_update_only_debug, PLUGIN_VAR_NOCMDOPT,
- "Pause actual purging any delete-marked records, but merely update the purge view. "
- "It is to create artificially the situation the purge view have been updated "
- "but the each purges were not done yet.",
+ "Pause actual purging any delete-marked records, but merely update the purge view."
+ " It is to create artificially the situation the purge view have been updated"
+ " but the each purges were not done yet.",
NULL, NULL, FALSE);
static MYSQL_SYSVAR_UINT(data_file_size_debug,
@@ -19826,12 +20972,40 @@ static MYSQL_SYSVAR_ULONG(saved_page_number_debug,
srv_saved_page_number_debug, PLUGIN_VAR_OPCMDARG,
"An InnoDB page number.",
NULL, innodb_save_page_no, 0, 0, UINT_MAX32, 0);
-#endif /* UNIV_DEBUG */
+
+static MYSQL_SYSVAR_BOOL(disable_resize_buffer_pool_debug,
+ buf_disable_resize_buffer_pool_debug, PLUGIN_VAR_NOCMDARG,
+ "Disable resizing buffer pool to make assertion code not expensive.",
+ NULL, NULL, TRUE);
+
+static MYSQL_SYSVAR_BOOL(page_cleaner_disabled_debug,
+ innodb_page_cleaner_disabled_debug,
+ PLUGIN_VAR_OPCMDARG,
+ "Disable page cleaner",
+ NULL, buf_flush_page_cleaner_disabled_debug_update, FALSE);
+
+static MYSQL_SYSVAR_BOOL(sync_debug, srv_sync_debug,
+ PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_READONLY,
+ "Enable the sync debug checks",
+ NULL, NULL, FALSE);
+
+static MYSQL_SYSVAR_BOOL(dict_stats_disabled_debug,
+ innodb_dict_stats_disabled_debug,
+ PLUGIN_VAR_OPCMDARG,
+ "Disable dict_stats thread",
+ NULL, dict_stats_disabled_debug_update, FALSE);
+
+static MYSQL_SYSVAR_BOOL(master_thread_disabled_debug,
+ srv_master_thread_disabled_debug,
+ PLUGIN_VAR_OPCMDARG,
+ "Disable master thread",
+ NULL, srv_master_thread_disabled_debug_update, FALSE);
static MYSQL_SYSVAR_UINT(simulate_comp_failures, srv_simulate_comp_failures,
PLUGIN_VAR_NOCMDARG,
"Simulate compression failures.",
NULL, NULL, 0, 0, 99, 0);
+#endif /* UNIV_DEBUG */
static MYSQL_SYSVAR_BOOL(force_primary_key,
srv_force_primary_key,
@@ -19841,8 +21015,8 @@ static MYSQL_SYSVAR_BOOL(force_primary_key,
static MYSQL_SYSVAR_BOOL(use_trim, srv_use_trim,
PLUGIN_VAR_OPCMDARG,
- "Use trim. Default FALSE.",
- NULL, NULL, FALSE);
+ "Deallocate (punch_hole|trim) unused portions of the page compressed page (on by default)",
+ NULL, innodb_use_trim_update, TRUE);
static const char *page_compression_algorithms[]= { "none", "zlib", "lz4", "lzo", "lzma", "bzip2", "snappy", 0 };
static TYPELIB page_compression_algorithms_typelib=
@@ -19862,7 +21036,7 @@ static MYSQL_SYSVAR_ENUM(compression_algorithm, innodb_compression_algorithm,
static MYSQL_SYSVAR_LONG(mtflush_threads, srv_mtflush_threads,
PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
- "Number of multi-threaded flush threads",
+ "DEPRECATED. Number of multi-threaded flush threads",
NULL, NULL,
MTFLUSH_DEFAULT_WORKER, /* Default setting */
1, /* Minimum setting */
@@ -19871,7 +21045,7 @@ static MYSQL_SYSVAR_LONG(mtflush_threads, srv_mtflush_threads,
static MYSQL_SYSVAR_BOOL(use_mtflush, srv_use_mtflush,
PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY,
- "Use multi-threaded flush. Default FALSE.",
+ "DEPRECATED. Use multi-threaded flush. Default FALSE.",
NULL, NULL, FALSE);
static MYSQL_SYSVAR_ULONG(fatal_semaphore_wait_threshold, srv_fatal_semaphore_wait_threshold,
@@ -19988,20 +21162,34 @@ static MYSQL_SYSVAR_BOOL(debug_force_scrubbing,
0,
"Perform extra scrubbing to increase test exposure",
NULL, NULL, FALSE);
+
+char *innobase_debug_sync;
+static MYSQL_SYSVAR_STR(debug_sync, innobase_debug_sync,
+ PLUGIN_VAR_NOCMDARG,
+ "debug_sync for innodb purge threads. "
+ "Use it to set up sync points for all purge threads "
+ "at once. The commands will be applied sequentially at "
+ "the beginning of purging the next undo record.",
+ NULL,
+ innobase_debug_sync_set, NULL);
#endif /* UNIV_DEBUG */
-static MYSQL_SYSVAR_BOOL(instrument_semaphores, srv_instrument_semaphores,
+static MYSQL_SYSVAR_BOOL(instrument_semaphores, innodb_instrument_semaphores,
PLUGIN_VAR_OPCMDARG,
- "Enable semaphore request instrumentation. This could have some effect on performance but allows better"
- " information on long semaphore wait problems. (Default: not enabled)",
- 0, 0, FALSE);
+ "DEPRECATED. This setting has no effect.",
+ NULL, innodb_instrument_semaphores_update, FALSE);
+
+static MYSQL_SYSVAR_BOOL(encrypt_temporary_tables, innodb_encrypt_temporary_tables,
+ PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_READONLY,
+ "Enrypt the temporary table data.",
+ NULL, NULL, false);
+
+#include "ha_xtradb.h"
static struct st_mysql_sys_var* innobase_system_variables[]= {
- MYSQL_SYSVAR(additional_mem_pool_size),
- MYSQL_SYSVAR(api_trx_level),
- MYSQL_SYSVAR(api_bk_commit_interval),
MYSQL_SYSVAR(autoextend_increment),
MYSQL_SYSVAR(buffer_pool_size),
+ MYSQL_SYSVAR(buffer_pool_chunk_size),
MYSQL_SYSVAR(buffer_pool_instances),
MYSQL_SYSVAR(buffer_pool_filename),
MYSQL_SYSVAR(buffer_pool_dump_now),
@@ -20022,21 +21210,19 @@ static struct st_mysql_sys_var* innobase_system_variables[]= {
MYSQL_SYSVAR(lru_scan_depth),
MYSQL_SYSVAR(flush_neighbors),
MYSQL_SYSVAR(checksum_algorithm),
+ MYSQL_SYSVAR(log_checksums),
MYSQL_SYSVAR(checksums),
MYSQL_SYSVAR(commit_concurrency),
MYSQL_SYSVAR(concurrency_tickets),
MYSQL_SYSVAR(compression_level),
MYSQL_SYSVAR(data_file_path),
+ MYSQL_SYSVAR(temp_data_file_path),
MYSQL_SYSVAR(data_home_dir),
MYSQL_SYSVAR(doublewrite),
+ MYSQL_SYSVAR(stats_include_delete_marked),
MYSQL_SYSVAR(use_atomic_writes),
MYSQL_SYSVAR(use_fallocate),
- MYSQL_SYSVAR(stats_include_delete_marked),
- MYSQL_SYSVAR(api_enable_binlog),
- MYSQL_SYSVAR(api_enable_mdl),
- MYSQL_SYSVAR(api_disable_rowlock),
MYSQL_SYSVAR(fast_shutdown),
- MYSQL_SYSVAR(file_io_threads),
MYSQL_SYSVAR(read_io_threads),
MYSQL_SYSVAR(write_io_threads),
MYSQL_SYSVAR(file_per_table),
@@ -20047,6 +21233,7 @@ static struct st_mysql_sys_var* innobase_system_variables[]= {
MYSQL_SYSVAR(flush_log_at_trx_commit),
MYSQL_SYSVAR(flush_method),
MYSQL_SYSVAR(force_recovery),
+ MYSQL_SYSVAR(fill_factor),
MYSQL_SYSVAR(ft_cache_size),
MYSQL_SYSVAR(ft_total_cache_size),
MYSQL_SYSVAR(ft_result_cache_limit),
@@ -20060,24 +21247,23 @@ static struct st_mysql_sys_var* innobase_system_variables[]= {
MYSQL_SYSVAR(lock_schedule_algorithm),
MYSQL_SYSVAR(locks_unsafe_for_binlog),
MYSQL_SYSVAR(lock_wait_timeout),
-#ifdef UNIV_LOG_ARCHIVE
- MYSQL_SYSVAR(log_arch_dir),
- MYSQL_SYSVAR(log_archive),
-#endif /* UNIV_LOG_ARCHIVE */
+ MYSQL_SYSVAR(deadlock_detect),
MYSQL_SYSVAR(page_size),
MYSQL_SYSVAR(log_buffer_size),
MYSQL_SYSVAR(log_file_size),
MYSQL_SYSVAR(log_files_in_group),
+ MYSQL_SYSVAR(log_write_ahead_size),
MYSQL_SYSVAR(log_group_home_dir),
MYSQL_SYSVAR(log_compressed_pages),
+ MYSQL_SYSVAR(log_optimize_ddl),
MYSQL_SYSVAR(max_dirty_pages_pct),
MYSQL_SYSVAR(max_dirty_pages_pct_lwm),
MYSQL_SYSVAR(adaptive_flushing_lwm),
MYSQL_SYSVAR(adaptive_flushing),
+ MYSQL_SYSVAR(flush_sync),
MYSQL_SYSVAR(flushing_avg_loops),
MYSQL_SYSVAR(max_purge_lag),
MYSQL_SYSVAR(max_purge_lag_delay),
- MYSQL_SYSVAR(mirrored_log_groups),
MYSQL_SYSVAR(old_blocks_pct),
MYSQL_SYSVAR(old_blocks_time),
MYSQL_SYSVAR(open_files),
@@ -20096,7 +21282,10 @@ static struct st_mysql_sys_var* innobase_system_variables[]= {
MYSQL_SYSVAR(stats_auto_recalc),
MYSQL_SYSVAR(stats_modified_counter),
MYSQL_SYSVAR(stats_traditional),
+#ifdef BTR_CUR_HASH_ADAPT
MYSQL_SYSVAR(adaptive_hash_index),
+ MYSQL_SYSVAR(adaptive_hash_index_parts),
+#endif /* BTR_CUR_HASH_ADAPT */
MYSQL_SYSVAR(stats_method),
MYSQL_SYSVAR(replication_delay),
MYSQL_SYSVAR(status_file),
@@ -20108,21 +21297,20 @@ static struct st_mysql_sys_var* innobase_system_variables[]= {
MYSQL_SYSVAR(spin_wait_delay),
MYSQL_SYSVAR(table_locks),
MYSQL_SYSVAR(thread_concurrency),
-#ifdef HAVE_ATOMIC_BUILTINS
MYSQL_SYSVAR(adaptive_max_sleep_delay),
-#endif /* HAVE_ATOMIC_BUILTINS */
MYSQL_SYSVAR(prefix_index_cluster_optimization),
MYSQL_SYSVAR(thread_sleep_delay),
+ MYSQL_SYSVAR(tmpdir),
MYSQL_SYSVAR(autoinc_lock_mode),
MYSQL_SYSVAR(version),
- MYSQL_SYSVAR(use_sys_malloc),
MYSQL_SYSVAR(use_native_aio),
#ifdef HAVE_LIBNUMA
MYSQL_SYSVAR(numa_interleave),
-#endif // HAVE_LIBNUMA
+#endif /* HAVE_LIBNUMA */
MYSQL_SYSVAR(change_buffering),
MYSQL_SYSVAR(change_buffer_max_size),
#if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG
+ MYSQL_SYSVAR(change_buffer_dump),
MYSQL_SYSVAR(change_buffering_debug),
MYSQL_SYSVAR(disable_background_merge),
#endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */
@@ -20132,8 +21320,10 @@ static struct st_mysql_sys_var* innobase_system_variables[]= {
MYSQL_SYSVAR(random_read_ahead),
MYSQL_SYSVAR(read_ahead_threshold),
MYSQL_SYSVAR(read_only),
+ MYSQL_SYSVAR(safe_truncate),
MYSQL_SYSVAR(io_capacity),
MYSQL_SYSVAR(io_capacity_max),
+ MYSQL_SYSVAR(page_cleaners),
MYSQL_SYSVAR(idle_flush_pct),
MYSQL_SYSVAR(monitor_enable),
MYSQL_SYSVAR(monitor_disable),
@@ -20142,10 +21332,10 @@ static struct st_mysql_sys_var* innobase_system_variables[]= {
MYSQL_SYSVAR(purge_threads),
MYSQL_SYSVAR(purge_batch_size),
#ifdef UNIV_DEBUG
- MYSQL_SYSVAR(purge_run_now),
- MYSQL_SYSVAR(purge_stop_now),
+ MYSQL_SYSVAR(background_drop_list_empty),
MYSQL_SYSVAR(log_checkpoint_now),
MYSQL_SYSVAR(buf_flush_list_now),
+ MYSQL_SYSVAR(merge_threshold_set_all_debug),
#endif /* UNIV_DEBUG */
#if defined UNIV_DEBUG || defined UNIV_PERF_DEBUG
MYSQL_SYSVAR(page_hash_locks),
@@ -20156,6 +21346,9 @@ static struct st_mysql_sys_var* innobase_system_variables[]= {
MYSQL_SYSVAR(print_all_deadlocks),
MYSQL_SYSVAR(cmp_per_index_enabled),
MYSQL_SYSVAR(undo_logs),
+ MYSQL_SYSVAR(max_undo_log_size),
+ MYSQL_SYSVAR(purge_rseg_truncate_frequency),
+ MYSQL_SYSVAR(undo_log_truncate),
MYSQL_SYSVAR(rollback_segments),
MYSQL_SYSVAR(undo_directory),
MYSQL_SYSVAR(undo_tablespaces),
@@ -20163,20 +21356,25 @@ static struct st_mysql_sys_var* innobase_system_variables[]= {
MYSQL_SYSVAR(compression_failure_threshold_pct),
MYSQL_SYSVAR(compression_pad_pct_max),
MYSQL_SYSVAR(default_row_format),
- MYSQL_SYSVAR(simulate_comp_failures),
#ifdef UNIV_DEBUG
+ MYSQL_SYSVAR(simulate_comp_failures),
MYSQL_SYSVAR(trx_rseg_n_slots_debug),
MYSQL_SYSVAR(limit_optimistic_insert_debug),
MYSQL_SYSVAR(trx_purge_view_update_only_debug),
MYSQL_SYSVAR(data_file_size_debug),
MYSQL_SYSVAR(fil_make_page_dirty_debug),
MYSQL_SYSVAR(saved_page_number_debug),
+ MYSQL_SYSVAR(disable_resize_buffer_pool_debug),
+ MYSQL_SYSVAR(page_cleaner_disabled_debug),
+ MYSQL_SYSVAR(dict_stats_disabled_debug),
+ MYSQL_SYSVAR(master_thread_disabled_debug),
+ MYSQL_SYSVAR(sync_debug),
#endif /* UNIV_DEBUG */
- MYSQL_SYSVAR(tmpdir),
MYSQL_SYSVAR(force_primary_key),
MYSQL_SYSVAR(fatal_semaphore_wait_threshold),
/* Table page compression feature */
MYSQL_SYSVAR(use_trim),
+ MYSQL_SYSVAR(compression_default),
MYSQL_SYSVAR(compression_algorithm),
MYSQL_SYSVAR(mtflush_threads),
MYSQL_SYSVAR(use_mtflush),
@@ -20197,9 +21395,17 @@ static struct st_mysql_sys_var* innobase_system_variables[]= {
MYSQL_SYSVAR(background_scrub_data_check_interval),
#ifdef UNIV_DEBUG
MYSQL_SYSVAR(debug_force_scrubbing),
+ MYSQL_SYSVAR(debug_sync),
#endif
MYSQL_SYSVAR(instrument_semaphores),
MYSQL_SYSVAR(buf_dump_status_frequency),
+ MYSQL_SYSVAR(background_thread),
+ MYSQL_SYSVAR(encrypt_temporary_tables),
+
+ /* XtraDB compatibility system variables */
+#define HA_XTRADB_SYSVARS
+#include "ha_xtradb.h"
+
NULL
};
@@ -20247,6 +21453,7 @@ i_s_innodb_sys_foreign,
i_s_innodb_sys_foreign_cols,
i_s_innodb_sys_tablespaces,
i_s_innodb_sys_datafiles,
+i_s_innodb_sys_virtual,
i_s_innodb_mutexes,
i_s_innodb_sys_semaphore_waits,
i_s_innodb_tablespaces_encryption,
@@ -20271,219 +21478,652 @@ innobase_commit_concurrency_init_default()
= innobase_commit_concurrency;
}
-/** @brief Initialize the default and max value of innodb_undo_logs.
-
-Once InnoDB is running, the default value and the max value of
-innodb_undo_logs must be equal to the available undo logs,
-given by srv_available_undo_logs. */
+/** @brief Adjust some InnoDB startup parameters based on file contents
+or innodb_page_size. */
static
void
-innobase_undo_logs_init_default_max()
-/*=================================*/
+innodb_params_adjust()
{
+ /* The default value and the max value of
+ innodb_undo_logs must be equal to the available undo logs. */
MYSQL_SYSVAR_NAME(undo_logs).max_val
= MYSQL_SYSVAR_NAME(undo_logs).def_val
- = static_cast<unsigned long>(srv_available_undo_logs);
+ = srv_available_undo_logs;
+ MYSQL_SYSVAR_NAME(max_undo_log_size).max_val
+ = 1ULL << (32 + UNIV_PAGE_SIZE_SHIFT);
+ MYSQL_SYSVAR_NAME(max_undo_log_size).min_val
+ = MYSQL_SYSVAR_NAME(max_undo_log_size).def_val
+ = ulonglong(SRV_UNDO_TABLESPACE_SIZE_IN_PAGES)
+ * srv_page_size;
+ MYSQL_SYSVAR_NAME(max_undo_log_size).max_val
+ = 1ULL << (32 + UNIV_PAGE_SIZE_SHIFT);
}
-#ifdef UNIV_COMPILE_TEST_FUNCS
+/****************************************************************************
+ * DS-MRR implementation
+ ***************************************************************************/
-struct innobase_convert_name_test_t {
- char* buf;
- ulint buflen;
- const char* id;
- ulint idlen;
- void* thd;
- ibool file_id;
+/**
+Multi Range Read interface, DS-MRR calls */
+int
+ha_innobase::multi_range_read_init(
+ RANGE_SEQ_IF* seq,
+ void* seq_init_param,
+ uint n_ranges,
+ uint mode,
+ HANDLER_BUFFER* buf)
+{
+ return(m_ds_mrr.dsmrr_init(this, seq, seq_init_param,
+ n_ranges, mode, buf));
+}
- const char* expected;
-};
+int
+ha_innobase::multi_range_read_next(
+ range_id_t* range_info)
+{
+ return(m_ds_mrr.dsmrr_next(range_info));
+}
-void
-test_innobase_convert_name()
-{
- char buf[1024];
- ulint i;
-
- innobase_convert_name_test_t test_input[] = {
- {buf, sizeof(buf), "abcd", 4, NULL, TRUE, "\"abcd\""},
- {buf, 7, "abcd", 4, NULL, TRUE, "\"abcd\""},
- {buf, 6, "abcd", 4, NULL, TRUE, "\"abcd\""},
- {buf, 5, "abcd", 4, NULL, TRUE, "\"abc\""},
- {buf, 4, "abcd", 4, NULL, TRUE, "\"ab\""},
-
- {buf, sizeof(buf), "ab@0060cd", 9, NULL, TRUE, "\"ab`cd\""},
- {buf, 9, "ab@0060cd", 9, NULL, TRUE, "\"ab`cd\""},
- {buf, 8, "ab@0060cd", 9, NULL, TRUE, "\"ab`cd\""},
- {buf, 7, "ab@0060cd", 9, NULL, TRUE, "\"ab`cd\""},
- {buf, 6, "ab@0060cd", 9, NULL, TRUE, "\"ab`c\""},
- {buf, 5, "ab@0060cd", 9, NULL, TRUE, "\"ab`\""},
- {buf, 4, "ab@0060cd", 9, NULL, TRUE, "\"ab\""},
-
- {buf, sizeof(buf), "ab\"cd", 5, NULL, TRUE,
- "\"#mysql50#ab\"\"cd\""},
- {buf, 17, "ab\"cd", 5, NULL, TRUE,
- "\"#mysql50#ab\"\"cd\""},
- {buf, 16, "ab\"cd", 5, NULL, TRUE,
- "\"#mysql50#ab\"\"c\""},
- {buf, 15, "ab\"cd", 5, NULL, TRUE,
- "\"#mysql50#ab\"\"\""},
- {buf, 14, "ab\"cd", 5, NULL, TRUE,
- "\"#mysql50#ab\""},
- {buf, 13, "ab\"cd", 5, NULL, TRUE,
- "\"#mysql50#ab\""},
- {buf, 12, "ab\"cd", 5, NULL, TRUE,
- "\"#mysql50#a\""},
- {buf, 11, "ab\"cd", 5, NULL, TRUE,
- "\"#mysql50#\""},
- {buf, 10, "ab\"cd", 5, NULL, TRUE,
- "\"#mysql50\""},
-
- {buf, sizeof(buf), "ab/cd", 5, NULL, TRUE, "\"ab\".\"cd\""},
- {buf, 9, "ab/cd", 5, NULL, TRUE, "\"ab\".\"cd\""},
- {buf, 8, "ab/cd", 5, NULL, TRUE, "\"ab\".\"c\""},
- {buf, 7, "ab/cd", 5, NULL, TRUE, "\"ab\".\"\""},
- {buf, 6, "ab/cd", 5, NULL, TRUE, "\"ab\"."},
- {buf, 5, "ab/cd", 5, NULL, TRUE, "\"ab\"."},
- {buf, 4, "ab/cd", 5, NULL, TRUE, "\"ab\""},
- {buf, 3, "ab/cd", 5, NULL, TRUE, "\"a\""},
- {buf, 2, "ab/cd", 5, NULL, TRUE, "\"\""},
- /* XXX probably "" is a better result in this case
- {buf, 1, "ab/cd", 5, NULL, TRUE, "."},
- */
- {buf, 0, "ab/cd", 5, NULL, TRUE, ""},
- };
+ha_rows
+ha_innobase::multi_range_read_info_const(
+ uint keyno,
+ RANGE_SEQ_IF* seq,
+ void* seq_init_param,
+ uint n_ranges,
+ uint* bufsz,
+ uint* flags,
+ Cost_estimate* cost)
+{
+ /* See comments in ha_myisam::multi_range_read_info_const */
+ m_ds_mrr.init(this, table);
- for (i = 0; i < sizeof(test_input) / sizeof(test_input[0]); i++) {
+ if (m_prebuilt->select_lock_type != LOCK_NONE) {
+ *flags |= HA_MRR_USE_DEFAULT_IMPL;
+ }
- char* end;
- ibool ok = TRUE;
- size_t res_len;
+ ha_rows res= m_ds_mrr.dsmrr_info_const(keyno, seq, seq_init_param, n_ranges,
+ bufsz, flags, cost);
+ return res;
+}
- fprintf(stderr, "TESTING %lu, %s, %lu, %s\n",
- test_input[i].buflen,
- test_input[i].id,
- test_input[i].idlen,
- test_input[i].expected);
+ha_rows
+ha_innobase::multi_range_read_info(
+ uint keyno,
+ uint n_ranges,
+ uint keys,
+ uint key_parts,
+ uint* bufsz,
+ uint* flags,
+ Cost_estimate* cost)
+{
+ m_ds_mrr.init(this, table);
+ ha_rows res= m_ds_mrr.dsmrr_info(keyno, n_ranges, keys, key_parts, bufsz,
+ flags, cost);
+ return res;
+}
- end = innobase_convert_name(
- test_input[i].buf,
- test_input[i].buflen,
- test_input[i].id,
- test_input[i].idlen,
- test_input[i].thd,
- test_input[i].file_id);
+int
+ha_innobase::multi_range_read_explain_info(
+ uint mrr_mode,
+ char *str,
+ size_t size)
+{
+ return m_ds_mrr.dsmrr_explain_info(mrr_mode, str, size);
+}
- res_len = (size_t) (end - test_input[i].buf);
+/**
+Index Condition Pushdown interface implementation */
- if (res_len != strlen(test_input[i].expected)) {
+/*************************************************************//**
+InnoDB index push-down condition check
+@return ICP_NO_MATCH, ICP_MATCH, or ICP_OUT_OF_RANGE */
+ICP_RESULT
+innobase_index_cond(
+/*================*/
+ void* file) /*!< in/out: pointer to ha_innobase */
+{
+ return handler_index_cond_check(file);
+}
- fprintf(stderr, "unexpected len of the result: %u, "
- "expected: %u\n", (unsigned) res_len,
- (unsigned) strlen(test_input[i].expected));
- ok = FALSE;
- }
+/** Parse the table file name into table name and database name.
+@param[in] tbl_name InnoDB table name
+@param[out] dbname database name buffer (NAME_LEN + 1 bytes)
+@param[out] tblname table name buffer (NAME_LEN + 1 bytes)
+@param[out] dbnamelen database name length
+@param[out] tblnamelen table name length
+@return true if the table name is parsed properly. */
+static bool table_name_parse(
+ const table_name_t& tbl_name,
+ char* dbname,
+ char* tblname,
+ ulint& dbnamelen,
+ ulint& tblnamelen)
+{
+ dbnamelen = dict_get_db_name_len(tbl_name.m_name);
+ char db_buf[MAX_DATABASE_NAME_LEN + 1];
+ char tbl_buf[MAX_TABLE_NAME_LEN + 1];
- if (memcmp(test_input[i].buf,
- test_input[i].expected,
- strlen(test_input[i].expected)) != 0
- || !ok) {
+ ut_ad(dbnamelen > 0);
+ ut_ad(dbnamelen <= MAX_DATABASE_NAME_LEN);
- fprintf(stderr, "unexpected result: %.*s, "
- "expected: %s\n", (int) res_len,
- test_input[i].buf,
- test_input[i].expected);
- ok = FALSE;
- }
+ memcpy(db_buf, tbl_name.m_name, dbnamelen);
+ db_buf[dbnamelen] = 0;
- if (ok) {
- fprintf(stderr, "OK: res: %.*s\n\n", (int) res_len,
- buf);
- } else {
- fprintf(stderr, "FAILED\n\n");
- return;
- }
+ tblnamelen = strlen(tbl_name.m_name + dbnamelen + 1);
+ memcpy(tbl_buf, tbl_name.m_name + dbnamelen + 1, tblnamelen);
+ tbl_buf[tblnamelen] = 0;
+
+ filename_to_tablename(db_buf, dbname, MAX_DATABASE_NAME_LEN + 1, true);
+
+ if (tblnamelen > TEMP_FILE_PREFIX_LENGTH
+ && !strncmp(tbl_buf, TEMP_FILE_PREFIX, TEMP_FILE_PREFIX_LENGTH)) {
+ return false;
+ }
+
+ if (char *is_part = strchr(tbl_buf, '#')) {
+ *is_part = '\0';
+ tblnamelen = is_part - tbl_buf;
}
+
+ filename_to_tablename(tbl_buf, tblname, MAX_TABLE_NAME_LEN + 1, true);
+ return true;
}
-#endif /* UNIV_COMPILE_TEST_FUNCS */
-/****************************************************************************
- * DS-MRR implementation
- ***************************************************************************/
+/** Acquire metadata lock and MariaDB table handle for an InnoDB table.
+@param[in,out] thd thread handle
+@param[in,out] table InnoDB table
+@return MariaDB table handle
+@retval NULL if the table does not exist, is unaccessible or corrupted. */
+static TABLE* innodb_acquire_mdl(THD* thd, dict_table_t* table)
+{
+ char db_buf[NAME_LEN + 1], db_buf1[NAME_LEN + 1];
+ char tbl_buf[NAME_LEN + 1], tbl_buf1[NAME_LEN + 1];
+ ulint db_buf_len, db_buf1_len;
+ ulint tbl_buf_len, tbl_buf1_len;
+
+ if (!table_name_parse(table->name, db_buf, tbl_buf,
+ db_buf_len, tbl_buf_len)) {
+ table->release();
+ return NULL;
+ }
-/**
- * Multi Range Read interface, DS-MRR calls
- */
+ DEBUG_SYNC(thd, "ib_purge_virtual_latch_released");
-int ha_innobase::multi_range_read_init(RANGE_SEQ_IF *seq, void *seq_init_param,
- uint n_ranges, uint mode,
- HANDLER_BUFFER *buf)
-{
- return ds_mrr.dsmrr_init(this, seq, seq_init_param, n_ranges, mode, buf);
-}
+ const table_id_t table_id = table->id;
+retry_mdl:
+ const bool unaccessible = !table->is_readable() || table->corrupted;
+ table->release();
-int ha_innobase::multi_range_read_next(range_id_t *range_info)
-{
- return ds_mrr.dsmrr_next(range_info);
+ if (unaccessible) {
+ return NULL;
+ }
+
+ TABLE* mariadb_table = open_purge_table(thd, db_buf, db_buf_len,
+ tbl_buf, tbl_buf_len);
+ if (!mariadb_table)
+ thd_clear_error(thd);
+
+ DEBUG_SYNC(thd, "ib_purge_virtual_got_no_such_table");
+
+ table = dict_table_open_on_id(table_id, false, DICT_TABLE_OP_NORMAL);
+
+ if (table == NULL) {
+ /* Table is dropped. */
+ goto fail;
+ }
+
+ if (!fil_table_accessible(table)) {
+release_fail:
+ table->release();
+fail:
+ if (mariadb_table) {
+ close_thread_tables(thd);
+ }
+
+ return NULL;
+ }
+
+ if (!table_name_parse(table->name, db_buf1, tbl_buf1,
+ db_buf1_len, tbl_buf1_len)) {
+ goto release_fail;
+ }
+
+ if (!mariadb_table) {
+ } else if (!strcmp(db_buf, db_buf1) && !strcmp(tbl_buf, tbl_buf1)) {
+ return mariadb_table;
+ } else {
+ /* Table is renamed. So release MDL for old name and try
+ to acquire the MDL for new table name. */
+ close_thread_tables(thd);
+ }
+
+ strcpy(tbl_buf, tbl_buf1);
+ strcpy(db_buf, db_buf1);
+ tbl_buf_len = tbl_buf1_len;
+ db_buf_len = db_buf1_len;
+ goto retry_mdl;
}
-ha_rows ha_innobase::multi_range_read_info_const(uint keyno, RANGE_SEQ_IF *seq,
- void *seq_init_param,
- uint n_ranges, uint *bufsz,
- uint *flags,
- Cost_estimate *cost)
+/** Find or open a table handle for the virtual column template
+@param[in] thd thread handle
+@param[in,out] table InnoDB table whose virtual column template
+ is to be updated
+@return table handle
+@retval NULL if the table is dropped, unaccessible or corrupted
+for purge thread */
+static TABLE* innodb_find_table_for_vc(THD* thd, dict_table_t* table)
{
- /* See comments in ha_myisam::multi_range_read_info_const */
- ds_mrr.init(this, table);
+ DBUG_EXECUTE_IF(
+ "ib_purge_virtual_mdev_16222_1",
+ DBUG_ASSERT(!debug_sync_set_action(
+ thd,
+ STRING_WITH_LEN("ib_purge_virtual_latch_released "
+ "SIGNAL latch_released "
+ "WAIT_FOR drop_started"))););
+ DBUG_EXECUTE_IF(
+ "ib_purge_virtual_mdev_16222_2",
+ DBUG_ASSERT(!debug_sync_set_action(
+ thd,
+ STRING_WITH_LEN("ib_purge_virtual_got_no_such_table "
+ "SIGNAL got_no_such_table"))););
+
+ if (THDVAR(thd, background_thread)) {
+ /* Purge thread acquires dict_operation_lock while
+ processing undo log record. Release the dict_operation_lock
+ before acquiring MDL on the table. */
+ rw_lock_s_unlock(&dict_operation_lock);
+ return innodb_acquire_mdl(thd, table);
+ } else {
+ if (table->vc_templ->mysql_table_query_id
+ == thd_get_query_id(thd)) {
+ return table->vc_templ->mysql_table;
+ }
+ }
+
+ char db_buf[NAME_LEN + 1];
+ char tbl_buf[NAME_LEN + 1];
+ ulint db_buf_len, tbl_buf_len;
+
+ if (!table_name_parse(table->name, db_buf, tbl_buf,
+ db_buf_len, tbl_buf_len)) {
+ ut_ad(!"invalid table name");
+ return NULL;
+ }
- if (prebuilt->select_lock_type != LOCK_NONE)
- *flags |= HA_MRR_USE_DEFAULT_IMPL;
+ TABLE* mysql_table = find_fk_open_table(thd, db_buf, db_buf_len,
+ tbl_buf, tbl_buf_len);
- ha_rows res= ds_mrr.dsmrr_info_const(keyno, seq, seq_init_param, n_ranges,
- bufsz, flags, cost);
- return res;
+ table->vc_templ->mysql_table = mysql_table;
+ table->vc_templ->mysql_table_query_id = thd_get_query_id(thd);
+ return mysql_table;
}
-ha_rows ha_innobase::multi_range_read_info(uint keyno, uint n_ranges,
- uint keys, uint key_parts,
- uint *bufsz, uint *flags,
- Cost_estimate *cost)
+/** Get the computed value by supplying the base column values.
+@param[in,out] table table whose virtual column
+ template to be built */
+TABLE* innobase_init_vc_templ(dict_table_t* table)
{
- ds_mrr.init(this, table);
- ha_rows res= ds_mrr.dsmrr_info(keyno, n_ranges, keys, key_parts, bufsz,
- flags, cost);
- return res;
+ if (table->vc_templ != NULL) {
+ return NULL;
+ }
+ DBUG_ENTER("innobase_init_vc_templ");
+
+ table->vc_templ = UT_NEW_NOKEY(dict_vcol_templ_t());
+
+ TABLE *mysql_table= innodb_find_table_for_vc(current_thd, table);
+
+ ut_ad(mysql_table);
+ if (!mysql_table) {
+ DBUG_RETURN(NULL);
+ }
+
+ mutex_enter(&dict_sys->mutex);
+ innobase_build_v_templ(mysql_table, table, table->vc_templ, NULL, true);
+ mutex_exit(&dict_sys->mutex);
+ DBUG_RETURN(mysql_table);
}
-int ha_innobase::multi_range_read_explain_info(uint mrr_mode, char *str,
- size_t size)
-{
- return ds_mrr.dsmrr_explain_info(mrr_mode, str, size);
+/** Change dbname and table name in table->vc_templ.
+@param[in,out] table the table whose virtual column template
+dbname and tbname to be renamed. */
+void
+innobase_rename_vc_templ(
+ dict_table_t* table)
+{
+ char dbname[MAX_DATABASE_NAME_LEN + 1];
+ char tbname[MAX_DATABASE_NAME_LEN + 1];
+ char* name = table->name.m_name;
+ ulint dbnamelen = dict_get_db_name_len(name);
+ ulint tbnamelen = strlen(name) - dbnamelen - 1;
+ char t_dbname[MAX_DATABASE_NAME_LEN + 1];
+ char t_tbname[MAX_TABLE_NAME_LEN + 1];
+
+ strncpy(dbname, name, dbnamelen);
+ dbname[dbnamelen] = 0;
+ strncpy(tbname, name + dbnamelen + 1, tbnamelen);
+ tbname[tbnamelen] =0;
+
+ /* For partition table, remove the partition name and use the
+ "main" table name to build the template */
+ char* is_part = is_partition(tbname);
+
+ if (is_part != NULL) {
+ *is_part = '\0';
+ tbnamelen = is_part - tbname;
+ }
+
+ dbnamelen = filename_to_tablename(dbname, t_dbname,
+ MAX_DATABASE_NAME_LEN + 1);
+ tbnamelen = filename_to_tablename(tbname, t_tbname,
+ MAX_TABLE_NAME_LEN + 1);
+
+ table->vc_templ->db_name = t_dbname;
+ table->vc_templ->tb_name = t_tbname;
+}
+
+/** Get the updated parent field value from the update vector for the
+given col_no.
+@param[in] foreign foreign key information
+@param[in] update updated parent vector.
+@param[in] col_no base column position of the child table to check
+@return updated field from the parent update vector, else NULL */
+static
+dfield_t*
+innobase_get_field_from_update_vector(
+ dict_foreign_t* foreign,
+ upd_t* update,
+ ulint col_no)
+{
+ dict_table_t* parent_table = foreign->referenced_table;
+ dict_index_t* parent_index = foreign->referenced_index;
+ ulint parent_field_no;
+ ulint parent_col_no;
+ ulint prefix_col_no;
+
+ for (ulint i = 0; i < foreign->n_fields; i++) {
+ if (dict_index_get_nth_col_no(foreign->foreign_index, i)
+ != col_no) {
+ continue;
+ }
+
+ parent_col_no = dict_index_get_nth_col_no(parent_index, i);
+ parent_field_no = dict_table_get_nth_col_pos(
+ parent_table, parent_col_no, &prefix_col_no);
+
+ for (ulint j = 0; j < update->n_fields; j++) {
+ upd_field_t* parent_ufield
+ = &update->fields[j];
+
+ if (parent_ufield->field_no == parent_field_no) {
+ return(&parent_ufield->new_val);
+ }
+ }
+ }
+
+ return (NULL);
}
+
/**
- * Index Condition Pushdown interface implementation
- */
+ Allocate a heap and record for calculating virtual fields
+ Used mainly for virtual fields in indexes
+
+@param[in] thd MariaDB THD
+@param[in] index Index in use
+@param[out] heap Heap that holds temporary row
+@param[in,out] table MariaDB table
+@param[out] record Pointer to allocated MariaDB record
+@param[out] storage Internal storage for blobs etc
+
+@retval false on success
+@retval true on malloc failure or failed to open the maria table
+ for purge thread.
+*/
-/*************************************************************//**
-InnoDB index push-down condition check
-@return ICP_NO_MATCH, ICP_MATCH, or ICP_OUT_OF_RANGE */
-UNIV_INTERN
-enum icp_result
-innobase_index_cond(
-/*================*/
- void* file) /*!< in/out: pointer to ha_innobase */
-{
- return handler_index_cond_check(file);
+bool innobase_allocate_row_for_vcol(
+ THD * thd,
+ dict_index_t* index,
+ mem_heap_t** heap,
+ TABLE** table,
+ byte** record,
+ VCOL_STORAGE** storage)
+{
+ TABLE *maria_table;
+ String *blob_value_storage;
+ if (!*table)
+ *table= innodb_find_table_for_vc(thd, index->table);
+
+ /* For purge thread, there is a possiblity that table could have
+ dropped, corrupted or unaccessible. */
+ if (!*table)
+ return true;
+ maria_table= *table;
+ if (!*heap && !(*heap= mem_heap_create(srv_page_size)))
+ {
+ *storage= 0;
+ return TRUE;
+ }
+ *record= static_cast<byte*>(mem_heap_alloc(*heap,
+ maria_table->s->reclength));
+ *storage= static_cast<VCOL_STORAGE*>
+ (mem_heap_alloc(*heap, sizeof(**storage)));
+ blob_value_storage= static_cast<String*>
+ (mem_heap_alloc(*heap,
+ maria_table->s->virtual_not_stored_blob_fields *
+ sizeof(String)));
+ if (!*record || !*storage || !blob_value_storage)
+ {
+ *storage= 0;
+ return TRUE;
+ }
+ (*storage)->maria_table= maria_table;
+ (*storage)->innobase_record= *record;
+ (*storage)->maria_record= maria_table->field[0]->record_ptr();
+ (*storage)->blob_value_storage= blob_value_storage;
+
+ maria_table->move_fields(maria_table->field, *record,
+ (*storage)->maria_record);
+ maria_table->remember_blob_values(blob_value_storage);
+
+ return FALSE;
+}
+
+
+/** Free memory allocated by innobase_allocate_row_for_vcol() */
+
+void innobase_free_row_for_vcol(VCOL_STORAGE *storage)
+{
+ TABLE *maria_table= storage->maria_table;
+ maria_table->move_fields(maria_table->field, storage->maria_record,
+ storage->innobase_record);
+ maria_table->restore_blob_values(storage->blob_value_storage);
+}
+
+
+/** Get the computed value by supplying the base column values.
+@param[in,out] row the data row
+@param[in] col virtual column
+@param[in] index index
+@param[in,out] local_heap heap memory for processing large data etc.
+@param[in,out] heap memory heap that copies the actual index row
+@param[in] ifield index field
+@param[in] thd MySQL thread handle
+@param[in,out] mysql_table mysql table object
+@param[in] old_table during ALTER TABLE, this is the old table
+ or NULL.
+@param[in] parent_update update vector for the parent row
+@param[in] foreign foreign key information
+@return the field filled with computed value, or NULL if just want
+to store the value in passed in "my_rec" */
+dfield_t*
+innobase_get_computed_value(
+ dtuple_t* row,
+ const dict_v_col_t* col,
+ const dict_index_t* index,
+ mem_heap_t** local_heap,
+ mem_heap_t* heap,
+ const dict_field_t* ifield,
+ THD* thd,
+ TABLE* mysql_table,
+ byte* mysql_rec,
+ const dict_table_t* old_table,
+ upd_t* parent_update,
+ dict_foreign_t* foreign)
+{
+ byte rec_buf2[REC_VERSION_56_MAX_INDEX_COL_LEN];
+ byte* buf;
+ dfield_t* field;
+ ulint len;
+
+ const page_size_t page_size = (old_table == NULL)
+ ? dict_table_page_size(index->table)
+ : dict_table_page_size(old_table);
+
+ ulint ret = 0;
+
+ ut_ad(index->table->vc_templ);
+ ut_ad(thd != NULL);
+ ut_ad(mysql_table);
+
+ DBUG_ENTER("innobase_get_computed_value");
+ const mysql_row_templ_t*
+ vctempl = index->table->vc_templ->vtempl[
+ index->table->vc_templ->n_col + col->v_pos];
+
+ if (!heap || index->table->vc_templ->rec_len
+ >= REC_VERSION_56_MAX_INDEX_COL_LEN) {
+ if (*local_heap == NULL) {
+ *local_heap = mem_heap_create(UNIV_PAGE_SIZE);
+ }
+
+ buf = static_cast<byte*>(mem_heap_alloc(
+ *local_heap, index->table->vc_templ->rec_len));
+ } else {
+ buf = rec_buf2;
+ }
+
+ for (ulint i = 0; i < col->num_base; i++) {
+ dict_col_t* base_col = col->base_col[i];
+ const dfield_t* row_field = NULL;
+ ulint col_no = base_col->ind;
+ const mysql_row_templ_t* templ
+ = index->table->vc_templ->vtempl[col_no];
+ const byte* data;
+
+ if (parent_update != NULL) {
+ /** Get the updated field from update vector
+ of the parent table. */
+ row_field = innobase_get_field_from_update_vector(
+ foreign, parent_update, col_no);
+ }
+
+ if (row_field == NULL) {
+ row_field = dtuple_get_nth_field(row, col_no);
+ }
+
+ data = static_cast<const byte*>(row_field->data);
+ len = row_field->len;
+
+ if (row_field->ext) {
+ if (*local_heap == NULL) {
+ *local_heap = mem_heap_create(UNIV_PAGE_SIZE);
+ }
+
+ data = btr_copy_externally_stored_field(
+ &len, data, page_size,
+ dfield_get_len(row_field), *local_heap);
+ }
+
+ if (len == UNIV_SQL_NULL) {
+ mysql_rec[templ->mysql_null_byte_offset]
+ |= (byte) templ->mysql_null_bit_mask;
+ memcpy(mysql_rec + templ->mysql_col_offset,
+ static_cast<const byte*>(
+ index->table->vc_templ->default_rec
+ + templ->mysql_col_offset),
+ templ->mysql_col_len);
+ } else {
+
+ row_sel_field_store_in_mysql_format(
+ mysql_rec + templ->mysql_col_offset,
+ templ, index, templ->clust_rec_field_no,
+ (const byte*)data, len);
+
+ if (templ->mysql_null_bit_mask) {
+ /* It is a nullable column with a
+ non-NULL value */
+ mysql_rec[templ->mysql_null_byte_offset]
+ &= ~(byte) templ->mysql_null_bit_mask;
+ }
+ }
+ }
+
+ field = dtuple_get_nth_v_field(row, col->v_pos);
+
+ my_bitmap_map* old_write_set = dbug_tmp_use_all_columns(mysql_table, mysql_table->write_set);
+ my_bitmap_map* old_read_set = dbug_tmp_use_all_columns(mysql_table, mysql_table->read_set);
+ ret = mysql_table->update_virtual_field(mysql_table->field[col->m_col.ind]);
+ dbug_tmp_restore_column_map(mysql_table->read_set, old_read_set);
+ dbug_tmp_restore_column_map(mysql_table->write_set, old_write_set);
+
+ if (ret != 0) {
+ // FIXME: Why this error message is macro-hidden?
+#ifdef INNODB_VIRTUAL_DEBUG
+ ib::warn() << "Compute virtual column values failed ";
+ fputs("InnoDB: Cannot compute value for following record ",
+ stderr);
+ dtuple_print(stderr, row);
+#endif /* INNODB_VIRTUAL_DEBUG */
+ DBUG_RETURN(NULL);
+ }
+
+ if (vctempl->mysql_null_bit_mask
+ && (mysql_rec[vctempl->mysql_null_byte_offset]
+ & vctempl->mysql_null_bit_mask)) {
+ dfield_set_null(field);
+ field->type.prtype |= DATA_VIRTUAL;
+ DBUG_RETURN(field);
+ }
+
+ row_mysql_store_col_in_innobase_format(
+ field, buf,
+ TRUE, mysql_rec + vctempl->mysql_col_offset,
+ vctempl->mysql_col_len, dict_table_is_comp(index->table));
+ field->type.prtype |= DATA_VIRTUAL;
+
+ ulint max_prefix = col->m_col.max_prefix;
+
+ if (max_prefix && ifield
+ && (ifield->prefix_len == 0
+ || ifield->prefix_len > col->m_col.max_prefix)) {
+ max_prefix = ifield->prefix_len;
+ }
+
+ /* If this is a prefix index, we only need a portion of the field */
+ if (max_prefix) {
+ len = dtype_get_at_most_n_mbchars(
+ col->m_col.prtype,
+ col->m_col.mbminlen, col->m_col.mbmaxlen,
+ max_prefix,
+ field->len,
+ static_cast<char*>(dfield_get_data(field)));
+ dfield_set_len(field, len);
+ }
+
+ if (heap) {
+ dfield_dup(field, heap);
+ }
+
+ DBUG_RETURN(field);
}
+
/** Attempt to push down an index condition.
-* @param[in] keyno MySQL key number
-* @param[in] idx_cond Index condition to be checked
-* @return Part of idx_cond which the handler will not evaluate
-*/
-UNIV_INTERN
+@param[in] keyno MySQL key number
+@param[in] idx_cond Index condition to be checked
+@return Part of idx_cond which the handler will not evaluate */
+
class Item*
ha_innobase::idx_cond_push(
uint keyno,
@@ -20493,6 +22133,12 @@ ha_innobase::idx_cond_push(
DBUG_ASSERT(keyno != MAX_KEY);
DBUG_ASSERT(idx_cond != NULL);
+ /* We can only evaluate the condition if all columns are stored.*/
+ dict_index_t* idx = innobase_get_index(keyno);
+ if (idx && dict_index_has_virtual(idx)) {
+ DBUG_RETURN(idx_cond);
+ }
+
pushed_idx_cond = idx_cond;
pushed_idx_cond_keyno = keyno;
in_range_check_pushed_down = TRUE;
@@ -20507,10 +22153,9 @@ errmsg-utf8.txt directly as is.
Push a warning message to the client, it is a wrapper around:
void push_warning_printf(
- THD *thd, Sql_condition::enum_warning_level level,
+ THD *thd, Sql_condition::enum_condition_level level,
uint code, const char *format, ...);
*/
-UNIV_INTERN
void
ib_senderrf(
/*========*/
@@ -20519,7 +22164,7 @@ ib_senderrf(
ib_uint32_t code, /*!< MySQL error code */
...) /*!< Args */
{
- va_list args;
+ va_list args;
const char* format = innobase_get_err_msg(code);
/* If the caller wants to push a message to the client then
@@ -20532,25 +22177,21 @@ ib_senderrf(
va_start(args, code);
- myf l=0;
+ myf l;
- switch(level) {
+ switch (level) {
case IB_LOG_LEVEL_INFO:
- l = ME_JUST_INFO;
+ l = ME_JUST_INFO;
break;
case IB_LOG_LEVEL_WARN:
- l = ME_JUST_WARNING;
- break;
- case IB_LOG_LEVEL_ERROR:
- case IB_LOG_LEVEL_FATAL:
- l = 0;
+ l = ME_JUST_WARNING;
break;
default:
l = 0;
break;
}
- my_printv_error(code, format, MYF(l), args);
+ my_printv_error(code, format, MYF(l), args);
va_end(args);
@@ -20567,10 +22208,9 @@ must be: "Some string ... %s".
Push a warning message to the client, it is a wrapper around:
void push_warning_printf(
- THD *thd, Sql_condition::enum_warning_level level,
+ THD *thd, Sql_condition::enum_condition_level level,
uint code, const char *format, ...);
*/
-UNIV_INTERN
void
ib_errf(
/*====*/
@@ -20580,7 +22220,7 @@ ib_errf(
const char* format, /*!< printf format */
...) /*!< Args */
{
- char* str;
+ char* str = NULL;
va_list args;
/* If the caller wants to push a message to the client then
@@ -20591,22 +22231,32 @@ ib_errf(
va_start(args, format);
-#ifdef __WIN__
+#ifdef _WIN32
int size = _vscprintf(format, args) + 1;
- str = static_cast<char*>(malloc(size));
+ if (size > 0) {
+ str = static_cast<char*>(malloc(size));
+ }
+ if (str == NULL) {
+ va_end(args);
+ return; /* Watch for Out-Of-Memory */
+ }
str[size - 1] = 0x0;
vsnprintf(str, size, format, args);
#elif HAVE_VASPRINTF
if (vasprintf(&str, format, args) == -1) {
/* In case of failure use a fixed length string */
str = static_cast<char*>(malloc(BUFSIZ));
- my_vsnprintf(str, BUFSIZ, format, args);
+ vsnprintf(str, BUFSIZ, format, args);
}
#else
/* Use a fixed length string. */
str = static_cast<char*>(malloc(BUFSIZ));
- my_vsnprintf(str, BUFSIZ, format, args);
-#endif /* __WIN__ */
+ if (str == NULL) {
+ va_end(args);
+ return; /* Watch for Out-Of-Memory */
+ }
+ vsnprintf(str, BUFSIZ, format, args);
+#endif /* _WIN32 */
ib_senderrf(thd, level, code, str);
@@ -20614,62 +22264,37 @@ ib_errf(
free(str);
}
-/******************************************************************//**
-Write a message to the MySQL log, prefixed with "InnoDB: " */
-UNIV_INTERN
-void
-ib_logf(
-/*====*/
- ib_log_level_t level, /*!< in: warning level */
- const char* format, /*!< printf format */
- ...) /*!< Args */
-{
- char* str;
- va_list args;
+/* Keep the first 16 characters as-is, since the url is sometimes used
+as an offset from this.*/
+const char* TROUBLESHOOTING_MSG =
+ "Please refer to https://mariadb.com/kb/en/innodb-troubleshooting/"
+ " for how to resolve the issue.";
- va_start(args, format);
+const char* TROUBLESHOOT_DATADICT_MSG =
+ "Please refer to https://mariadb.com/kb/en/innodb-data-dictionary-troubleshooting/"
+ " for how to resolve the issue.";
-#ifdef __WIN__
- int size = _vscprintf(format, args) + 1;
- str = static_cast<char*>(malloc(size));
- str[size - 1] = 0x0;
- vsnprintf(str, size, format, args);
-#elif HAVE_VASPRINTF
- if (vasprintf(&str, format, args) == -1) {
- /* In case of failure use a fixed length string */
- str = static_cast<char*>(malloc(BUFSIZ));
- my_vsnprintf(str, BUFSIZ, format, args);
- }
-#else
- /* Use a fixed length string. */
- str = static_cast<char*>(malloc(BUFSIZ));
- my_vsnprintf(str, BUFSIZ, format, args);
-#endif /* __WIN__ */
+const char* BUG_REPORT_MSG =
+ "Submit a detailed bug report to https://jira.mariadb.org/";
- switch(level) {
- case IB_LOG_LEVEL_INFO:
- sql_print_information("InnoDB: %s", str);
- break;
- case IB_LOG_LEVEL_WARN:
- sql_print_warning("InnoDB: %s", str);
- break;
- case IB_LOG_LEVEL_ERROR:
- sql_print_error("InnoDB: %s", str);
- sd_notifyf(0, "STATUS=InnoDB: Error: %s", str);
- break;
- case IB_LOG_LEVEL_FATAL:
- sql_print_error("InnoDB: %s", str);
- sd_notifyf(0, "STATUS=InnoDB: Fatal: %s", str);
- break;
- }
+const char* FORCE_RECOVERY_MSG =
+ "Please refer to "
+ "https://mariadb.com/kb/en/library/innodb-recovery-modes/"
+ " for information about forcing recovery.";
- va_end(args);
- free(str);
+const char* OPERATING_SYSTEM_ERROR_MSG =
+ "Some operating system error numbers are described at"
+ " https://mariadb.com/kb/en/library/operating-system-error-codes/";
- if (level == IB_LOG_LEVEL_FATAL) {
- ut_error;
- }
-}
+const char* FOREIGN_KEY_CONSTRAINTS_MSG =
+ "Please refer to https://mariadb.com/kb/en/library/foreign-keys/"
+ " for correct foreign key definition.";
+
+const char* SET_TRANSACTION_MSG =
+ "Please refer to https://mariadb.com/kb/en/library/set-transaction/";
+
+const char* INNODB_PARAMETERS_MSG =
+ "Please refer to https://mariadb.com/kb/en/library/innodb-system-variables/";
/**********************************************************************
Converts an identifier from my_charset_filename to UTF-8 charset.
@@ -20685,8 +22310,9 @@ innobase_convert_to_filename_charset(
CHARSET_INFO* cs_to = &my_charset_filename;
CHARSET_INFO* cs_from = system_charset_info;
- return(strconvert( cs_from, from, strlen(from), cs_to, to,
- static_cast<uint>(len), &errors));
+ return(static_cast<uint>(strconvert(
+ cs_from, from, strlen(from),
+ cs_to, to, static_cast<uint>(len), &errors)));
}
/**********************************************************************
@@ -20703,38 +22329,112 @@ innobase_convert_to_system_charset(
CHARSET_INFO* cs1 = &my_charset_filename;
CHARSET_INFO* cs2 = system_charset_info;
- return(strconvert(cs1, from, strlen(from), cs2, to,
- static_cast<uint>(len), errors));
+ return(static_cast<uint>(strconvert(
+ cs1, from, strlen(from),
+ cs2, to, static_cast<uint>(len), errors)));
}
-/**********************************************************************
-Issue a warning that the row is too big. */
-UNIV_INTERN
-void
-ib_warn_row_too_big(const dict_table_t* table)
+/** Validate the requested buffer pool size. Also, reserve the necessary
+memory needed for buffer pool resize.
+@param[in] thd thread handle
+@param[in] var pointer to system variable
+@param[out] save immediate result for update function
+@param[in] value incoming string
+@return 0 on success, 1 on failure.
+*/
+static
+int
+innodb_buffer_pool_size_validate(
+ THD* thd,
+ struct st_mysql_sys_var* var,
+ void* save,
+ struct st_mysql_value* value)
{
- /* If prefix is true then a 768-byte prefix is stored
- locally for BLOB fields. Refer to dict_table_get_format() */
- const bool prefix = (dict_tf_get_format(table->flags)
- == UNIV_FORMAT_A);
+ longlong intbuf;
- const ulint free_space = page_get_free_space_of_empty(
- table->flags & DICT_TF_COMPACT) / 2;
- THD* thd = current_thd;
+ value->val_int(value, &intbuf);
- if (thd == NULL) {
- return;
+ if (!srv_was_started) {
+ push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
+ ER_WRONG_ARGUMENTS,
+ "Cannot update innodb_buffer_pool_size,"
+ " because InnoDB is not started.");
+ return(1);
}
- push_warning_printf(
- thd, Sql_condition::WARN_LEVEL_WARN, HA_ERR_TO_BIG_ROW,
- "Row size too large (> %lu). Changing some columns to TEXT"
- " or BLOB %smay help. In current row format, BLOB prefix of"
- " %d bytes is stored inline.", free_space
- , prefix ? "or using ROW_FORMAT=DYNAMIC or"
- " ROW_FORMAT=COMPRESSED ": ""
- , prefix ? DICT_MAX_FIXED_COL_LEN : 0);
+#ifdef UNIV_DEBUG
+ if (buf_disable_resize_buffer_pool_debug == TRUE) {
+ push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
+ ER_WRONG_ARGUMENTS,
+ "Cannot update innodb_buffer_pool_size,"
+ " because innodb_disable_resize_buffer_pool_debug"
+ " is set.");
+ ib::warn() << "Cannot update innodb_buffer_pool_size,"
+ " because innodb_disable_resize_buffer_pool_debug"
+ " is set.";
+ return(1);
+ }
+#endif /* UNIV_DEBUG */
+
+
+ buf_pool_mutex_enter_all();
+
+ if (srv_buf_pool_old_size != srv_buf_pool_size) {
+ buf_pool_mutex_exit_all();
+ my_printf_error(ER_WRONG_ARGUMENTS,
+ "Another buffer pool resize is already in progress.", MYF(0));
+ return(1);
+ }
+
+ if (srv_buf_pool_instances > 1 && intbuf < BUF_POOL_SIZE_THRESHOLD) {
+ buf_pool_mutex_exit_all();
+
+ push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
+ ER_WRONG_ARGUMENTS,
+ "Cannot update innodb_buffer_pool_size"
+ " to less than 1GB if"
+ " innodb_buffer_pool_instances > 1.");
+ return(1);
+ }
+
+ ulint requested_buf_pool_size
+ = buf_pool_size_align(static_cast<ulint>(intbuf));
+
+ *static_cast<longlong*>(save) = requested_buf_pool_size;
+
+ if (srv_buf_pool_size == static_cast<ulint>(intbuf)) {
+ buf_pool_mutex_exit_all();
+ /* nothing to do */
+ return(0);
+ }
+
+ if (srv_buf_pool_size == requested_buf_pool_size) {
+ buf_pool_mutex_exit_all();
+ push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
+ ER_WRONG_ARGUMENTS,
+ "innodb_buffer_pool_size must be at least"
+ " innodb_buffer_pool_chunk_size=%lu",
+ srv_buf_pool_chunk_unit);
+ /* nothing to do */
+ return(0);
+ }
+
+ srv_buf_pool_size = requested_buf_pool_size;
+ buf_pool_mutex_exit_all();
+
+ if (intbuf != static_cast<longlong>(requested_buf_pool_size)) {
+ char buf[64];
+ int len = 64;
+ value->val_str(value, buf, &len);
+ push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
+ ER_TRUNCATED_WRONG_VALUE,
+ "Truncated incorrect %-.32s value: '%-.128s'",
+ mysql_sysvar_buffer_pool_size.name,
+ value->val_str(value, buf, &len));
+ }
+
+ return(0);
}
/*************************************************************//**
@@ -20844,16 +22544,6 @@ innodb_encrypt_tables_validate(
return 1;
}
- if (!srv_fil_crypt_rotate_key_age) {
- const char *msg = (encrypt_tables ? "enable" : "disable");
- push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
- HA_ERR_UNSUPPORTED,
- "InnoDB: cannot %s encryption, "
- "innodb_encryption_rotate_key_age=0"
- " i.e. key rotation disabled", msg);
- return 1;
- }
-
return 0;
}
@@ -20862,6 +22552,9 @@ static void innodb_remember_check_sysvar_funcs()
/* remember build-in sysvar check functions */
ut_ad((MYSQL_SYSVAR_NAME(checksum_algorithm).flags & 0x1FF) == PLUGIN_VAR_ENUM);
check_sysvar_enum = MYSQL_SYSVAR_NAME(checksum_algorithm).check;
+
+ ut_ad((MYSQL_SYSVAR_NAME(flush_log_at_timeout).flags & 15) == PLUGIN_VAR_INT);
+ check_sysvar_int = MYSQL_SYSVAR_NAME(flush_log_at_timeout).check;
}
/********************************************************************//**
@@ -20941,13 +22634,12 @@ ib_push_frm_error(
case DICT_FRM_NO_PK:
sql_print_error("Table %s has a primary key in "
"InnoDB data dictionary, but not "
- "in MySQL!"
+ "in MariaDB!"
" Have you mixed up "
".frm files from different "
"installations? See "
- REFMAN
- "innodb-troubleshooting.html\n",
- ib_table->name);
+ "https://mariadb.com/kb/en/innodb-troubleshooting/\n",
+ ib_table->name.m_name);
if (push_warning) {
push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
@@ -20955,20 +22647,20 @@ ib_push_frm_error(
"InnoDB: Table %s has a "
"primary key in InnoDB data "
"dictionary, but not in "
- "MySQL!", ib_table->name);
+ "MariaDB!", ib_table->name.m_name);
}
break;
case DICT_NO_PK_FRM_HAS:
sql_print_error(
"Table %s has no primary key in InnoDB data "
- "dictionary, but has one in MySQL! If you "
- "created the table with a MySQL version < "
+ "dictionary, but has one in MariaDB! If you "
+ "created the table with a MariaDB version < "
"3.23.54 and did not define a primary key, "
"but defined a unique key with all non-NULL "
- "columns, then MySQL internally treats that "
+ "columns, then MariaDB internally treats that "
"key as the primary key. You can fix this "
"error by dump + DROP + CREATE + reimport "
- "of the table.", ib_table->name);
+ "of the table.", ib_table->name.m_name);
if (push_warning) {
push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
@@ -20976,32 +22668,31 @@ ib_push_frm_error(
"InnoDB: Table %s has no "
"primary key in InnoDB data "
"dictionary, but has one in "
- "MySQL!",
- ib_table->name);
+ "MariaDB!",
+ ib_table->name.m_name);
}
break;
case DICT_FRM_INCONSISTENT_KEYS:
- sql_print_error("InnoDB: Table %s contains %lu "
+ sql_print_error("InnoDB: Table %s contains " ULINTPF " "
"indexes inside InnoDB, which "
"is different from the number of "
- "indexes %u defined in the MySQL "
+ "indexes %u defined in the MariaDB "
" Have you mixed up "
".frm files from different "
"installations? See "
- REFMAN
- "innodb-troubleshooting.html\n",
- ib_table->name, n_keys,
+ "https://mariadb.com/kb/en/innodb-troubleshooting/\n",
+ ib_table->name.m_name, n_keys,
table->s->keys);
if (push_warning) {
push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
ER_NO_SUCH_INDEX,
- "InnoDB: Table %s contains %lu "
+ "InnoDB: Table %s contains " ULINTPF " "
"indexes inside InnoDB, which "
"is different from the number of "
- "indexes %u defined in the MySQL ",
- ib_table->name, n_keys,
+ "indexes %u defined in the MariaDB ",
+ ib_table->name.m_name, n_keys,
table->s->keys);
}
break;
@@ -21009,9 +22700,9 @@ ib_push_frm_error(
case DICT_FRM_CONSISTENT:
default:
sql_print_error("InnoDB: Table %s is consistent "
- "on InnoDB data dictionary and MySQL "
+ "on InnoDB data dictionary and MariaDB "
" FRM file.",
- ib_table->name);
+ ib_table->name.m_name);
ut_error;
break;
}
diff --git a/storage/innobase/handler/ha_innodb.h b/storage/innobase/handler/ha_innodb.h
index a417828f242..cdbbce51085 100644
--- a/storage/innobase/handler/ha_innodb.h
+++ b/storage/innobase/handler/ha_innodb.h
@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 2000, 2017, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2013, 2018, MariaDB Corporation.
+Copyright (c) 2013, 2020, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -17,50 +17,25 @@ this program; if not, write to the Free Software Foundation, Inc.,
*****************************************************************************/
-/*
- This file is based on ha_berkeley.h of MySQL distribution
-
- This file defines the Innodb handler: the interface between MySQL and
- Innodb
-*/
-
-#include "dict0stats.h"
-
#ifdef WITH_WSREP
-#include "../../../wsrep/wsrep_api.h"
+# include <mysql/service_wsrep.h>
+# include "../../../wsrep/wsrep_api.h"
#endif /* WITH_WSREP */
-/* Structure defines translation table between mysql index and innodb
-index structures */
-struct innodb_idx_translate_t {
- ulint index_count; /*!< number of valid index entries
- in the index_mapping array */
- ulint array_size; /*!< array size of index_mapping */
- dict_index_t** index_mapping; /*!< index pointer array directly
- maps to index in Innodb from MySQL
- array index */
-};
+#include "table.h"
+/* The InnoDB handler: the interface between MySQL and InnoDB. */
-/** InnoDB table share */
-typedef struct st_innobase_share {
- THR_LOCK lock; /*!< MySQL lock protecting
- this structure */
- const char* table_name; /*!< InnoDB table name */
- uint use_count; /*!< reference count,
- incremented in get_share()
- and decremented in
- free_share() */
- void* table_name_hash;/*!< hash table chain node */
- innodb_idx_translate_t idx_trans_tbl; /*!< index translation
- table between MySQL and
- Innodb */
-} INNOBASE_SHARE;
-
+/** "GEN_CLUST_INDEX" is the name reserved for InnoDB default
+system clustered index when there is no primary key. */
+extern const char innobase_index_reserve_name[];
/** Prebuilt structures in an InnoDB table handle used within MySQL */
struct row_prebuilt_t;
+/** InnoDB transaction */
+struct trx_t;
+
/** Engine specific table options are defined using this struct */
struct ha_table_option_struct
{
@@ -77,96 +52,103 @@ struct ha_table_option_struct
uint encryption; /*!< DEFAULT, ON, OFF */
ulonglong encryption_key_id; /*!< encryption key id */
};
-
-
+/* JAN: TODO: MySQL 5.7 handler.h */
+struct st_handler_tablename
+{
+ const char *db;
+ const char *tablename;
+};
/** The class defining a handle to an Innodb table */
class ha_innobase: public handler
{
- row_prebuilt_t* prebuilt; /*!< prebuilt struct in InnoDB, used
- to save CPU time with prebuilt data
- structures*/
- THD* user_thd; /*!< the thread handle of the user
- currently using the handle; this is
- set in external_lock function */
- THR_LOCK_DATA lock;
- INNOBASE_SHARE* share; /*!< information for MySQL
- table locking */
-
- uchar* upd_buf; /*!< buffer used in updates */
- ulint upd_buf_size; /*!< the size of upd_buf in bytes */
- Table_flags int_table_flags;
- uint primary_key;
- ulong start_of_scan; /*!< this is set to 1 when we are
- starting a table scan but have not
- yet fetched any row, else 0 */
- uint last_match_mode;/* match mode of the latest search:
- ROW_SEL_EXACT, ROW_SEL_EXACT_PREFIX,
- or undefined */
- uint num_write_row; /*!< number of write_row() calls */
-
- uint store_key_val_for_row(uint keynr, char* buff, uint buff_len,
- const uchar* record);
- inline void update_thd(THD* thd);
- void update_thd();
- int change_active_index(uint keynr);
- int general_fetch(uchar* buf, uint direction, uint match_mode);
- dberr_t innobase_lock_autoinc();
- ulonglong innobase_peek_autoinc();
- dberr_t innobase_set_max_autoinc(ulonglong auto_inc);
- dberr_t innobase_reset_autoinc(ulonglong auto_inc);
- dberr_t innobase_get_autoinc(ulonglong* value);
- void innobase_initialize_autoinc();
- dict_index_t* innobase_get_index(uint keynr);
-
-#ifdef WITH_WSREP
- int wsrep_append_keys(THD *thd, wsrep_key_type key_type,
- const uchar* record0, const uchar* record1);
-#endif
- /* Init values for the class: */
- public:
- ha_innobase(handlerton *hton, TABLE_SHARE *table_arg);
+public:
+ ha_innobase(handlerton* hton, TABLE_SHARE* table_arg);
~ha_innobase();
- /*
- Get the row type from the storage engine. If this method returns
- ROW_TYPE_NOT_USED, the information in HA_CREATE_INFO should be used.
- */
+
+ /** Get the row type from the storage engine. If this method returns
+ ROW_TYPE_NOT_USED, the information in HA_CREATE_INFO should be used. */
enum row_type get_row_type() const;
const char* table_type() const;
+
const char* index_type(uint key_number);
+
const char** bas_ext() const;
+
Table_flags table_flags() const;
+
ulong index_flags(uint idx, uint part, bool all_parts) const;
+
uint max_supported_keys() const;
+
uint max_supported_key_length() const;
+
uint max_supported_key_part_length() const;
+
const key_map* keys_to_use_for_scanning();
+ void column_bitmaps_signal();
+
+ /** Opens dictionary table object using table name. For partition, we need to
+ try alternative lower/upper case names to support moving data files across
+ platforms.
+ @param[in] table_name name of the table/partition
+ @param[in] norm_name normalized name of the table/partition
+ @param[in] is_partition if this is a partition of a table
+ @param[in] ignore_err error to ignore for loading dictionary object
+ @return dictionary table object or NULL if not found */
+ static dict_table_t* open_dict_table(
+ const char* table_name,
+ const char* norm_name,
+ bool is_partition,
+ dict_err_ignore_t ignore_err);
+
int open(const char *name, int mode, uint test_if_locked);
+
handler* clone(const char *name, MEM_ROOT *mem_root);
+
int close(void);
+
double scan_time();
+
double read_time(uint index, uint ranges, ha_rows rows);
+
longlong get_memory_buffer_size() const;
+ int delete_all_rows();
+
int write_row(uchar * buf);
+
int update_row(const uchar * old_data, uchar * new_data);
+
int delete_row(const uchar * buf);
+
bool was_semi_consistent_read();
+
void try_semi_consistent_read(bool yes);
+
void unlock_row();
int index_init(uint index, bool sorted);
+
int index_end();
- int index_read(uchar * buf, const uchar * key,
- uint key_len, enum ha_rkey_function find_flag);
- int index_read_idx(uchar * buf, uint index, const uchar * key,
- uint key_len, enum ha_rkey_function find_flag);
+
+ int index_read(
+ uchar* buf,
+ const uchar* key,
+ uint key_len,
+ ha_rkey_function find_flag);
+
int index_read_last(uchar * buf, const uchar * key, uint key_len);
+
int index_next(uchar * buf);
+
int index_next_same(uchar * buf, const uchar *key, uint keylen);
+
int index_prev(uchar * buf);
+
int index_first(uchar * buf);
+
int index_last(uchar * buf);
/* Copy a cached MySQL row. If requested, also avoids
@@ -174,8 +156,11 @@ class ha_innobase: public handler
void copy_cached_row(uchar *to_rec, const uchar *from_rec,
uint rec_length);
int rnd_init(bool scan);
+
int rnd_end();
+
int rnd_next(uchar *buf);
+
int rnd_pos(uchar * buf, uchar *pos);
int ft_init();
@@ -184,100 +169,145 @@ class ha_innobase: public handler
int ft_read(uchar* buf);
void position(const uchar *record);
+
int info(uint);
+
int analyze(THD* thd,HA_CHECK_OPT* check_opt);
+
int optimize(THD* thd,HA_CHECK_OPT* check_opt);
+
int discard_or_import_tablespace(my_bool discard);
- int extra(enum ha_extra_function operation);
+
+ int extra(ha_extra_function operation);
+
int reset();
+
int external_lock(THD *thd, int lock_type);
- int transactional_table_lock(THD *thd, int lock_type);
+
int start_stmt(THD *thd, thr_lock_type lock_type);
+
void position(uchar *record);
- ha_rows records_in_range(uint inx, key_range *min_key, key_range
- *max_key);
+
+ ha_rows records_in_range(
+ uint inx,
+ key_range* min_key,
+ key_range* max_key);
+
ha_rows estimate_rows_upper_bound();
void update_create_info(HA_CREATE_INFO* create_info);
- int parse_table_name(const char*name,
- HA_CREATE_INFO* create_info,
- ulint flags,
- ulint flags2,
- char* norm_name,
- char* temp_path,
- char* remote_path);
+
+ inline int create(
+ const char* name,
+ TABLE* form,
+ HA_CREATE_INFO* create_info,
+ bool file_per_table,
+ trx_t* trx = NULL);
+
+ int create(
+ const char* name,
+ TABLE* form,
+ HA_CREATE_INFO* create_info);
+
const char* check_table_options(THD *thd, TABLE* table,
HA_CREATE_INFO* create_info, const bool use_tablespace, const ulint file_format);
- int create(const char *name, TABLE *form,
- HA_CREATE_INFO *create_info);
+
+ inline int delete_table(const char* name, enum_sql_command sqlcom);
+
int truncate();
+
int delete_table(const char *name);
+
int rename_table(const char* from, const char* to);
int defragment_table(const char* name, const char* index_name,
bool async);
int check(THD* thd, HA_CHECK_OPT* check_opt);
char* update_table_comment(const char* comment);
+
char* get_foreign_key_create_info();
+
int get_foreign_key_list(THD *thd, List<FOREIGN_KEY_INFO> *f_key_list);
- int get_parent_foreign_key_list(THD *thd,
- List<FOREIGN_KEY_INFO> *f_key_list);
+
+ int get_parent_foreign_key_list(
+ THD* thd,
+ List<FOREIGN_KEY_INFO>* f_key_list);
+ int get_cascade_foreign_key_table_list(
+ THD* thd,
+ List<st_handler_tablename>* fk_table_list);
+
+
bool can_switch_engines();
+
uint referenced_by_foreign_key();
+
void free_foreign_key_create_info(char* str);
- THR_LOCK_DATA **store_lock(THD *thd, THR_LOCK_DATA **to,
- enum thr_lock_type lock_type);
+
+ uint lock_count(void) const;
+
+ THR_LOCK_DATA** store_lock(
+ THD* thd,
+ THR_LOCK_DATA** to,
+ thr_lock_type lock_type);
+
void init_table_handle_for_HANDLER();
- virtual void get_auto_increment(ulonglong offset, ulonglong increment,
- ulonglong nb_desired_values,
- ulonglong *first_value,
- ulonglong *nb_reserved_values);
+
+ virtual void get_auto_increment(
+ ulonglong offset,
+ ulonglong increment,
+ ulonglong nb_desired_values,
+ ulonglong* first_value,
+ ulonglong* nb_reserved_values);
int reset_auto_increment(ulonglong value);
virtual bool get_error_message(int error, String *buf);
+
virtual bool get_foreign_dup_key(char*, uint, char*, uint);
+
uint8 table_cache_type();
- /*
- ask handler about permission to cache table during query registration
+
+ /**
+ Ask handler about permission to cache table during query registration
*/
- my_bool register_query_cache_table(THD *thd, char *table_key,
- uint key_length,
- qc_engine_callback *call_back,
- ulonglong *engine_data);
- static const char *get_mysql_bin_log_name();
- static ulonglong get_mysql_bin_log_pos();
+ my_bool register_query_cache_table(
+ THD* thd,
+ char* table_key,
+ uint key_length,
+ qc_engine_callback* call_back,
+ ulonglong* engine_data);
+
bool primary_key_is_clustered();
- int cmp_ref(const uchar *ref1, const uchar *ref2);
+
+ int cmp_ref(const uchar* ref1, const uchar* ref2);
+
/** On-line ALTER TABLE interface @see handler0alter.cc @{ */
/** Check if InnoDB supports a particular alter table in-place
- @param altered_table TABLE object for new version of table.
- @param ha_alter_info Structure describing changes to be done
+ @param altered_table TABLE object for new version of table.
+ @param ha_alter_info Structure describing changes to be done
by ALTER TABLE and holding data used during in-place alter.
- @retval HA_ALTER_INPLACE_NOT_SUPPORTED Not supported
- @retval HA_ALTER_INPLACE_NO_LOCK Supported
+ @retval HA_ALTER_INPLACE_NOT_SUPPORTED Not supported
+ @retval HA_ALTER_INPLACE_NO_LOCK Supported
@retval HA_ALTER_INPLACE_SHARED_LOCK_AFTER_PREPARE
- Supported, but requires lock
- during main phase and exclusive
- lock during prepare phase.
+ Supported, but requires lock during main phase and
+ exclusive lock during prepare phase.
@retval HA_ALTER_INPLACE_NO_LOCK_AFTER_PREPARE
- Supported, prepare phase
- requires exclusive lock.
- */
+ Supported, prepare phase requires exclusive lock. */
enum_alter_inplace_result check_if_supported_inplace_alter(
TABLE* altered_table,
Alter_inplace_info* ha_alter_info);
+
/** Allows InnoDB to update internal structures with concurrent
writes blocked (provided that check_if_supported_inplace_alter()
did not return HA_ALTER_INPLACE_NO_LOCK).
This will be invoked before inplace_alter_table().
- @param altered_table TABLE object for new version of table.
- @param ha_alter_info Structure describing changes to be done
+ @param altered_table TABLE object for new version of table.
+ @param ha_alter_info Structure describing changes to be done
by ALTER TABLE and holding data used during in-place alter.
- @retval true Failure
- @retval false Success
+ @retval true Failure
+ @retval false Success
*/
bool prepare_inplace_alter_table(
TABLE* altered_table,
@@ -288,12 +318,12 @@ class ha_innobase: public handler
The level of concurrency allowed during this operation depends
on the return value from check_if_supported_inplace_alter().
- @param altered_table TABLE object for new version of table.
- @param ha_alter_info Structure describing changes to be done
+ @param altered_table TABLE object for new version of table.
+ @param ha_alter_info Structure describing changes to be done
by ALTER TABLE and holding data used during in-place alter.
- @retval true Failure
- @retval false Success
+ @retval true Failure
+ @retval false Success
*/
bool inplace_alter_table(
TABLE* altered_table,
@@ -306,95 +336,165 @@ class ha_innobase: public handler
inplace_alter_table() and thus might be higher than during
prepare_inplace_alter_table(). (E.g concurrent writes were
blocked during prepare, but might not be during commit).
- @param altered_table TABLE object for new version of table.
- @param ha_alter_info Structure describing changes to be done
+ @param altered_table TABLE object for new version of table.
+ @param ha_alter_info Structure describing changes to be done
by ALTER TABLE and holding data used during in-place alter.
- @param commit true => Commit, false => Rollback.
- @retval true Failure
- @retval false Success
+ @param commit true => Commit, false => Rollback.
+ @retval true Failure
+ @retval false Success
*/
bool commit_inplace_alter_table(
TABLE* altered_table,
Alter_inplace_info* ha_alter_info,
bool commit);
/** @} */
- bool check_if_incompatible_data(HA_CREATE_INFO *info,
- uint table_changes);
-private:
- /** Builds a 'template' to the prebuilt struct.
- The template is used in fast retrieval of just those column
- values MySQL needs in its processing.
- @param whole_row true if access is needed to a whole row,
- false if accessing individual fields is enough */
- void build_template(bool whole_row);
- /** Resets a query execution 'template'.
- @see build_template() */
- inline void reset_template();
-
- int info_low(uint, bool);
+ bool check_if_incompatible_data(
+ HA_CREATE_INFO* info,
+ uint table_changes);
-public:
/** @name Multi Range Read interface @{ */
+
/** Initialize multi range read @see DsMrr_impl::dsmrr_init
- * @param seq
- * @param seq_init_param
- * @param n_ranges
- * @param mode
- * @param buf
- */
- int multi_range_read_init(RANGE_SEQ_IF* seq,
- void* seq_init_param,
- uint n_ranges, uint mode,
- HANDLER_BUFFER* buf);
+ @param seq
+ @param seq_init_param
+ @param n_ranges
+ @param mode
+ @param buf */
+ int multi_range_read_init(
+ RANGE_SEQ_IF* seq,
+ void* seq_init_param,
+ uint n_ranges,
+ uint mode,
+ HANDLER_BUFFER* buf);
+
/** Process next multi range read @see DsMrr_impl::dsmrr_next
- * @param range_info
- */
+ @param range_info */
int multi_range_read_next(range_id_t *range_info);
+
/** Initialize multi range read and get information.
- * @see ha_myisam::multi_range_read_info_const
- * @see DsMrr_impl::dsmrr_info_const
- * @param keyno
- * @param seq
- * @param seq_init_param
- * @param n_ranges
- * @param bufsz
- * @param flags
- * @param cost
- */
- ha_rows multi_range_read_info_const(uint keyno, RANGE_SEQ_IF* seq,
- void* seq_init_param,
- uint n_ranges, uint* bufsz,
- uint* flags, Cost_estimate* cost);
+ @see ha_myisam::multi_range_read_info_const
+ @see DsMrr_impl::dsmrr_info_const
+ @param keyno
+ @param seq
+ @param seq_init_param
+ @param n_ranges
+ @param bufsz
+ @param flags
+ @param cost */
+ ha_rows multi_range_read_info_const(
+ uint keyno,
+ RANGE_SEQ_IF* seq,
+ void* seq_init_param,
+ uint n_ranges,
+ uint* bufsz,
+ uint* flags,
+ Cost_estimate* cost);
+
/** Initialize multi range read and get information.
- * @see DsMrr_impl::dsmrr_info
- * @param keyno
- * @param seq
- * @param seq_init_param
- * @param n_ranges
- * @param bufsz
- * @param flags
- * @param cost
- */
+ @see DsMrr_impl::dsmrr_info
+ @param keyno
+ @param seq
+ @param seq_init_param
+ @param n_ranges
+ @param bufsz
+ @param flags
+ @param cost */
ha_rows multi_range_read_info(uint keyno, uint n_ranges, uint keys,
uint key_parts, uint* bufsz, uint* flags,
Cost_estimate* cost);
- int multi_range_read_explain_info(uint mrr_mode, char *str,
- size_t size);
+ int multi_range_read_explain_info(uint mrr_mode,
+ char *str, size_t size);
+
/** Attempt to push down an index condition.
- * @param[in] keyno MySQL key number
- * @param[in] idx_cond Index condition to be checked
- * @return idx_cond if pushed; NULL if not pushed
- */
- class Item* idx_cond_push(uint keyno, class Item* idx_cond);
+ @param[in] keyno MySQL key number
+ @param[in] idx_cond Index condition to be checked
+ @return idx_cond if pushed; NULL if not pushed */
+ Item* idx_cond_push(uint keyno, Item* idx_cond);
+ /* @} */
+
+ /** Check if InnoDB is not storing virtual column metadata for a table.
+ @param s table definition (based on .frm file)
+ @return whether InnoDB will omit virtual column metadata */
+ static bool omits_virtual_cols(const TABLE_SHARE& s)
+ {
+ return s.frm_version<FRM_VER_EXPRESSSIONS && s.virtual_fields;
+ }
+
+protected:
+ /**
+ MySQL calls this method at the end of each statement. This method
+ exists for readability only, called from reset(). The name reset()
+ doesn't give any clue that it is called at the end of a statement. */
+ int end_stmt();
+
+ dberr_t innobase_get_autoinc(ulonglong* value);
+ dberr_t innobase_lock_autoinc();
+ ulonglong innobase_peek_autoinc();
+ dberr_t innobase_set_max_autoinc(ulonglong auto_inc);
+ dberr_t innobase_reset_autoinc(ulonglong auto_inc);
+
+ /** Resets a query execution 'template'.
+ @see build_template() */
+ void reset_template();
+
+ inline void update_thd(THD* thd);
+ void update_thd();
+
+ int general_fetch(uchar* buf, uint direction, uint match_mode);
+ int change_active_index(uint keynr);
+ dict_index_t* innobase_get_index(uint keynr);
+
+#ifdef WITH_WSREP
+ int wsrep_append_keys(THD *thd, wsrep_key_type key_type,
+ const uchar* record0, const uchar* record1);
+#endif
+ /** Builds a 'template' to the prebuilt struct.
+
+ The template is used in fast retrieval of just those column
+ values MySQL needs in its processing.
+ @param whole_row true if access is needed to a whole row,
+ false if accessing individual fields is enough */
+ void build_template(bool whole_row);
+
+ virtual int info_low(uint, bool);
-private:
/** The multi range read session object */
- DsMrr_impl ds_mrr;
- /* @} */
+ DsMrr_impl m_ds_mrr;
+
+ /** Save CPU time with prebuilt/cached data structures */
+ row_prebuilt_t* m_prebuilt;
+
+ /** Thread handle of the user currently using the handler;
+ this is set in external_lock function */
+ THD* m_user_thd;
+
+ /** buffer used in updates */
+ uchar* m_upd_buf;
+
+ /** the size of upd_buf in bytes */
+ ulint m_upd_buf_size;
+
+ /** Flags that specificy the handler instance (table) capability. */
+ Table_flags m_int_table_flags;
+
+ /** Index into the server's primkary keye meta-data table->key_info{} */
+ uint m_primary_key;
+
+ /** this is set to 1 when we are starting a table scan but have
+ not yet fetched any row, else false */
+ bool m_start_of_scan;
+
+ /*!< match mode of the latest search: ROW_SEL_EXACT,
+ ROW_SEL_EXACT_PREFIX, or undefined */
+ uint m_last_match_mode;
+
+ /** If mysql has locked with external_lock() */
+ bool m_mysql_has_locked;
};
+
/* Some accessor functions which the InnoDB plugin needs, but which
can not be added to mysql/plugin.h as part of the public interface;
the definitions are bracketed with #ifdef INNODB_COMPATIBILITY_HOOKS */
@@ -404,79 +504,53 @@ the definitions are bracketed with #ifdef INNODB_COMPATIBILITY_HOOKS */
#endif
LEX_STRING* thd_query_string(MYSQL_THD thd);
+size_t thd_query_safe(MYSQL_THD thd, char *buf, size_t buflen);
extern "C" {
struct charset_info_st *thd_charset(MYSQL_THD thd);
-/**
- Check if a user thread is a replication slave thread
- @param thd user thread
- @retval 0 the user thread is not a replication slave thread
- @retval 1 the user thread is a replication slave thread
-*/
+/** Check if a user thread is a replication slave thread
+@param thd user thread
+@retval 0 the user thread is not a replication slave thread
+@retval 1 the user thread is a replication slave thread */
int thd_slave_thread(const MYSQL_THD thd);
-/**
- Check if a user thread is running a non-transactional update
- @param thd user thread
- @retval 0 the user thread is not running a non-transactional update
- @retval 1 the user thread is running a non-transactional update
-*/
+/** Check if a user thread is running a non-transactional update
+@param thd user thread
+@retval 0 the user thread is not running a non-transactional update
+@retval 1 the user thread is running a non-transactional update */
int thd_non_transactional_update(const MYSQL_THD thd);
-/**
- Get high resolution timestamp for the current query start time.
- The timestamp is not anchored to any specific point in time,
- but can be used for comparison.
-
- @retval timestamp in microseconds precision
+/** Get high resolution timestamp for the current query start time.
+The timestamp is not anchored to any specific point in time,
+but can be used for comparison.
+@param thd user thread
+@retval timestamp in microseconds precision
*/
unsigned long long thd_start_utime(const MYSQL_THD thd);
-/**
- Get the user thread's binary logging format
- @param thd user thread
- @return Value to be used as index into the binlog_format_names array
-*/
+/** Get the user thread's binary logging format
+@param thd user thread
+@return Value to be used as index into the binlog_format_names array */
int thd_binlog_format(const MYSQL_THD thd);
-/**
- Mark transaction to rollback and mark error as fatal to a sub-statement.
- @param thd Thread handle
- @param all TRUE <=> rollback main transaction.
-*/
-void thd_mark_transaction_to_rollback(MYSQL_THD thd, bool all);
-
-/**
- Check if binary logging is filtered for thread's current db.
- @param thd Thread handle
- @retval 1 the query is not filtered, 0 otherwise.
-*/
+/** Check if binary logging is filtered for thread's current db.
+@param thd Thread handle
+@retval 1 the query is not filtered, 0 otherwise. */
bool thd_binlog_filter_ok(const MYSQL_THD thd);
-/**
- Check if the query may generate row changes which
- may end up in the binary.
- @param thd Thread handle
- @return 1 the query may generate row changes, 0 otherwise.
+/** Check if the query may generate row changes which may end up in the binary.
+@param thd Thread handle
+@retval 1 the query may generate row changes, 0 otherwise.
*/
bool thd_sqlcom_can_generate_row_events(const MYSQL_THD thd);
-/**
- Gets information on the durability property requested by
- a thread.
- @param thd Thread handle
- @return a durability property.
-*/
-enum durability_properties thd_get_durability_property(const MYSQL_THD thd);
-
/** Is strict sql_mode set.
-@param thd Thread object
-@return True if sql_mode has strict mode (all or trans), false otherwise.
-*/
-bool thd_is_strict_mode(const MYSQL_THD thd)
-MY_ATTRIBUTE((nonnull));
+@param thd Thread object
+@return True if sql_mode has strict mode (all or trans), false otherwise. */
+bool thd_is_strict_mode(const MYSQL_THD thd);
+
} /* extern "C" */
/** Get the file name and position of the MySQL binlog corresponding to the
@@ -484,13 +558,29 @@ MY_ATTRIBUTE((nonnull));
*/
extern void mysql_bin_log_commit_pos(THD *thd, ulonglong *out_pos, const char **out_file);
+struct trx_t;
#ifdef WITH_WSREP
-#include <mysql/service_wsrep.h>
+//extern "C" int wsrep_trx_order_before(void *thd1, void *thd2);
+
+extern "C" bool wsrep_thd_is_wsrep_on(THD *thd);
+
+
+extern "C" void wsrep_thd_set_exec_mode(THD *thd, enum wsrep_exec_mode mode);
+extern "C" void wsrep_thd_set_query_state(
+ THD *thd, enum wsrep_query_state state);
+
+extern "C" void wsrep_thd_set_trx_to_replay(THD *thd, uint64 trx_id);
+
+extern "C" uint32 wsrep_thd_wsrep_rand(THD *thd);
+extern "C" time_t wsrep_thd_query_start(THD *thd);
+extern "C" query_id_t wsrep_thd_query_id(THD *thd);
+extern "C" query_id_t wsrep_thd_wsrep_last_query_id(THD *thd);
+extern "C" void wsrep_thd_set_wsrep_last_query_id(THD *thd, query_id_t id);
#endif
extern const struct _ft_vft ft_vft_result;
-/* Structure Returned by ha_innobase::ft_init_ext() */
+/** Structure Returned by ha_innobase::ft_init_ext() */
typedef struct new_ft_info
{
struct _ft_vft *please;
@@ -499,12 +589,11 @@ typedef struct new_ft_info
fts_result_t* ft_result;
} NEW_FT_INFO;
-/*********************************************************************//**
+/**
Allocates an InnoDB transaction for a MySQL handler object.
-@return InnoDB transaction handle */
+@return InnoDB transaction handle */
trx_t*
innobase_trx_allocate(
-/*==================*/
MYSQL_THD thd); /*!< in: user thread handle */
/*********************************************************************//**
@@ -513,93 +602,203 @@ system default primary index name 'GEN_CLUST_INDEX'. If a name
matches, this function pushes an warning message to the client,
and returns true.
@return true if the index name matches the reserved name */
-UNIV_INTERN
bool
innobase_index_name_is_reserved(
-/*============================*/
THD* thd, /*!< in/out: MySQL connection */
const KEY* key_info, /*!< in: Indexes to be created */
ulint num_of_keys) /*!< in: Number of indexes to
be created. */
MY_ATTRIBUTE((nonnull(1), warn_unused_result));
-/*****************************************************************//**
-#ifdef WITH_WSREP
-extern "C" int wsrep_trx_is_aborting(void *thd_ptr);
-#endif
-Determines InnoDB table flags.
-@retval true if successful, false if error */
-UNIV_INTERN
-bool
-innobase_table_flags(
-/*=================*/
- const TABLE* form, /*!< in: table */
- const HA_CREATE_INFO* create_info, /*!< in: information
- on table columns and indexes */
- THD* thd, /*!< in: connection */
- bool use_tablespace, /*!< in: whether to create
- outside system tablespace */
- ulint* flags, /*!< out: DICT_TF flags */
- ulint* flags2) /*!< out: DICT_TF2 flags */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
+/** Parse hint for table and its indexes, and update the information
+in dictionary.
+@param[in] thd Connection thread
+@param[in,out] table Target table
+@param[in] table_share Table definition */
+void
+innobase_parse_hint_from_comment(
+ THD* thd,
+ dict_table_t* table,
+ const TABLE_SHARE* table_share);
-/*****************************************************************//**
-Validates the create options. We may build on this function
-in future. For now, it checks two specifiers:
-KEY_BLOCK_SIZE and ROW_FORMAT
-If innodb_strict_mode is not set then this function is a no-op
-@return NULL if valid, string if not. */
-UNIV_INTERN
-const char*
-create_options_are_invalid(
-/*=======================*/
- THD* thd, /*!< in: connection thread. */
- TABLE* form, /*!< in: information on table
- columns and indexes */
- HA_CREATE_INFO* create_info, /*!< in: create info. */
- bool use_tablespace) /*!< in: srv_file_per_table */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
+/** Class for handling create table information. */
+class create_table_info_t
+{
+public:
+ /** Constructor.
+ Used in two ways:
+ - all but file_per_table is used, when creating the table.
+ - all but name/path is used, when validating options and using flags. */
+ create_table_info_t(
+ THD* thd,
+ const TABLE* form,
+ HA_CREATE_INFO* create_info,
+ char* table_name,
+ char* remote_path,
+ bool file_per_table,
+ trx_t* trx = NULL);
+
+ /** Initialize the object. */
+ int initialize();
+
+ /** Set m_tablespace_type. */
+ void set_tablespace_type(bool table_being_altered_is_file_per_table);
+
+ /** Create the internal innodb table.
+ @param create_fk whether to add FOREIGN KEY constraints */
+ int create_table(bool create_fk = true);
+
+ /** Update the internal data dictionary. */
+ int create_table_update_dict();
+
+ /** Validates the create options. Checks that the options
+ KEY_BLOCK_SIZE, ROW_FORMAT, DATA DIRECTORY, TEMPORARY & TABLESPACE
+ are compatible with each other and other settings.
+ These CREATE OPTIONS are not validated here unless innodb_strict_mode
+ is on. With strict mode, this function will report each problem it
+ finds using a custom message with error code
+ ER_ILLEGAL_HA_CREATE_OPTION, not its built-in message.
+ @return NULL if valid, string name of bad option if not. */
+ const char* create_options_are_invalid();
+
+ bool gcols_in_fulltext_or_spatial();
+
+ /** Validates engine specific table options not handled by
+ SQL-parser.
+ @return NULL if valid, string name of bad option if not. */
+ const char* check_table_options();
+
+ /** Validate DATA DIRECTORY option. */
+ bool create_option_data_directory_is_valid();
+
+ /** Validate TABLESPACE option. */
+ bool create_option_tablespace_is_valid();
+
+ /** Prepare to create a table. */
+ int prepare_create_table(const char* name, bool strict = true);
+
+ void allocate_trx();
+
+ /** Checks that every index have sane size. Depends on strict mode */
+ bool row_size_is_acceptable(const dict_table_t& table,
+ bool strict) const;
+ /** Checks that given index have sane size. Depends on strict mode */
+ bool row_size_is_acceptable(const dict_index_t& index,
+ bool strict) const;
+
+ /** Determines InnoDB table flags.
+ If strict_mode=OFF, this will adjust the flags to what should be assumed.
+ @retval true if successful, false if error */
+ bool innobase_table_flags();
+
+ /** Set flags and append '/' to remote path if necessary. */
+ void set_remote_path_flags();
+
+ /** Get table flags. */
+ ulint flags() const
+ { return(m_flags); }
+
+ /** Update table flags. */
+ void flags_set(ulint flags) { m_flags |= flags; }
+
+ /** Get table flags2. */
+ ulint flags2() const
+ { return(m_flags2); }
+
+ /** Get trx. */
+ trx_t* trx() const
+ { return(m_trx); }
+
+ /** Return table name. */
+ const char* table_name() const
+ { return(m_table_name); }
+
+ /** @return whether the table needs to be dropped on rollback */
+ bool drop_before_rollback() const { return m_drop_before_rollback; }
+
+ THD* thd() const
+ { return(m_thd); }
+
+ /** Normalizes a table name string.
+ A normalized name consists of the database name catenated to '/' and
+ table name. An example: test/mytable. On Windows normalization puts
+ both the database name and the table name always to lower case if
+ "set_lower_case" is set to true.
+ @param[in,out] norm_name Buffer to return the normalized name in.
+ @param[in] name Table name string.
+ @param[in] set_lower_case True if we want to set name to lower
+ case. */
+ static void normalize_table_name_low(
+ char* norm_name,
+ const char* name,
+ ibool set_lower_case);
-/*********************************************************************//**
-Retrieve the FTS Relevance Ranking result for doc with doc_id
-of prebuilt->fts_doc_id
-@return the relevance ranking value */
-UNIV_INTERN
-float
-innobase_fts_retrieve_ranking(
-/*==========================*/
- FT_INFO* fts_hdl); /*!< in: FTS handler */
+private:
+ /** Parses the table name into normal name and either temp path or
+ remote path if needed.*/
+ int
+ parse_table_name(
+ const char* name);
-/*********************************************************************//**
-Find and Retrieve the FTS Relevance Ranking result for doc with doc_id
-of prebuilt->fts_doc_id
-@return the relevance ranking value */
-UNIV_INTERN
-float
-innobase_fts_find_ranking(
-/*======================*/
- FT_INFO* fts_hdl, /*!< in: FTS handler */
- uchar* record, /*!< in: Unused */
- uint len); /*!< in: Unused */
-/*********************************************************************//**
-Free the memory for the FTS handler */
-UNIV_INTERN
-void
-innobase_fts_close_ranking(
-/*=======================*/
- FT_INFO* fts_hdl) /*!< in: FTS handler */
- MY_ATTRIBUTE((nonnull));
-/*****************************************************************//**
+ /** Create the internal innodb table definition. */
+ int create_table_def();
+
+ /** Connection thread handle. */
+ THD* m_thd;
+
+ /** InnoDB transaction handle. */
+ trx_t* m_trx;
+
+ /** Information on table columns and indexes. */
+ const TABLE* m_form;
+
+ /** Value of innodb_default_row_format */
+ const ulong m_default_row_format;
+
+ /** Create options. */
+ HA_CREATE_INFO* m_create_info;
+
+ /** Table name */
+ char* m_table_name;
+ /** Whether the table needs to be dropped before rollback */
+ bool m_drop_before_rollback;
+
+ /** Remote path (DATA DIRECTORY) or zero length-string */
+ char* m_remote_path;
+
+ /** Local copy of srv_file_per_table. */
+ bool m_innodb_file_per_table;
+
+ /** Allow file_per_table for this table either because:
+ 1) the setting innodb_file_per_table=on,
+ 2) it was explicitly requested by tablespace=innodb_file_per_table.
+ 3) the table being altered is currently file_per_table */
+ bool m_allow_file_per_table;
+
+ /** After all considerations, this shows whether we will actually
+ create a table and tablespace using file-per-table. */
+ bool m_use_file_per_table;
+
+ /** Using DATA DIRECTORY */
+ bool m_use_data_dir;
+
+ /** Table flags */
+ ulint m_flags;
+
+ /** Table flags2 */
+ ulint m_flags2;
+};
+
+/**
Initialize the table FTS stopword list
@return TRUE if success */
-UNIV_INTERN
ibool
innobase_fts_load_stopword(
/*=======================*/
dict_table_t* table, /*!< in: Table has the FTS */
trx_t* trx, /*!< in: transaction */
THD* thd) /*!< in: current thread */
- MY_ATTRIBUTE((nonnull(1,3), warn_unused_result));
+ MY_ATTRIBUTE((warn_unused_result));
/** Some defines for innobase_fts_check_doc_id_index() return value */
enum fts_doc_id_index_enum {
@@ -608,14 +807,12 @@ enum fts_doc_id_index_enum {
FTS_NOT_EXIST_DOC_ID_INDEX
};
-/*******************************************************************//**
+/**
Check whether the table has a unique index with FTS_DOC_ID_INDEX_NAME
on the Doc ID column.
@return the status of the FTS_DOC_ID index */
-UNIV_INTERN
-enum fts_doc_id_index_enum
+fts_doc_id_index_enum
innobase_fts_check_doc_id_index(
-/*============================*/
const dict_table_t* table, /*!< in: table definition */
const TABLE* altered_table, /*!< in: MySQL table
that is being altered */
@@ -623,73 +820,137 @@ innobase_fts_check_doc_id_index(
Doc ID */
MY_ATTRIBUTE((warn_unused_result));
-/*******************************************************************//**
+/**
Check whether the table has a unique index with FTS_DOC_ID_INDEX_NAME
on the Doc ID column in MySQL create index definition.
@return FTS_EXIST_DOC_ID_INDEX if there exists the FTS_DOC_ID index,
FTS_INCORRECT_DOC_ID_INDEX if the FTS_DOC_ID index is of wrong format */
-UNIV_INTERN
-enum fts_doc_id_index_enum
+fts_doc_id_index_enum
innobase_fts_check_doc_id_index_in_def(
-/*===================================*/
ulint n_key, /*!< in: Number of keys */
const KEY* key_info) /*!< in: Key definitions */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-
-/***********************************************************************
-@return version of the extended FTS API */
-uint
-innobase_fts_get_version();
-
-/***********************************************************************
-@return Which part of the extended FTS API is supported */
-ulonglong
-innobase_fts_flags();
-
-/***********************************************************************
-Find and Retrieve the FTS doc_id for the current result row
-@return the document ID */
-ulonglong
-innobase_fts_retrieve_docid(
-/*============================*/
- FT_INFO_EXT* fts_hdl); /*!< in: FTS handler */
-
-/***********************************************************************
-Find and retrieve the size of the current result
-@return number of matching rows */
-ulonglong
-innobase_fts_count_matches(
-/*============================*/
- FT_INFO_EXT* fts_hdl); /*!< in: FTS handler */
-
-/** "GEN_CLUST_INDEX" is the name reserved for InnoDB default
-system clustered index when there is no primary key. */
-extern const char innobase_index_reserve_name[];
-
-/*********************************************************************//**
-Copy table flags from MySQL's HA_CREATE_INFO into an InnoDB table object.
-Those flags are stored in .frm file and end up in the MySQL table object,
-but are frequently used inside InnoDB so we keep their copies into the
-InnoDB table object. */
-UNIV_INTERN
-void
-innobase_copy_frm_flags_from_create_info(
-/*=====================================*/
- dict_table_t* innodb_table, /*!< in/out: InnoDB table */
- const HA_CREATE_INFO* create_info); /*!< in: create info */
+ MY_ATTRIBUTE((warn_unused_result));
-/*********************************************************************//**
+/**
Copy table flags from MySQL's TABLE_SHARE into an InnoDB table object.
Those flags are stored in .frm file and end up in the MySQL table object,
but are frequently used inside InnoDB so we keep their copies into the
InnoDB table object. */
-UNIV_INTERN
void
innobase_copy_frm_flags_from_table_share(
-/*=====================================*/
dict_table_t* innodb_table, /*!< in/out: InnoDB table */
const TABLE_SHARE* table_share); /*!< in: table share */
+/** Set up base columns for virtual column
+@param[in] table the InnoDB table
+@param[in] field MySQL field
+@param[in,out] v_col virtual column to be set up */
+void
+innodb_base_col_setup(
+ dict_table_t* table,
+ const Field* field,
+ dict_v_col_t* v_col);
+
+/** Set up base columns for stored column
+@param[in] table InnoDB table
+@param[in] field MySQL field
+@param[in,out] s_col stored column */
+void
+innodb_base_col_setup_for_stored(
+ const dict_table_t* table,
+ const Field* field,
+ dict_s_col_t* s_col);
+
+/** whether this is a stored generated column */
+#define innobase_is_s_fld(field) ((field)->vcol_info && (field)->stored_in_db())
+
+/** Always normalize table name to lower case on Windows */
+#ifdef _WIN32
+#define normalize_table_name(norm_name, name) \
+ create_table_info_t::normalize_table_name_low(norm_name, name, TRUE)
+#else
+#define normalize_table_name(norm_name, name) \
+ create_table_info_t::normalize_table_name_low(norm_name, name, FALSE)
+#endif /* _WIN32 */
+
+/** Converts an InnoDB error code to a MySQL error code.
+Also tells to MySQL about a possible transaction rollback inside InnoDB caused
+by a lock wait timeout or a deadlock.
+@param[in] error InnoDB error code.
+@param[in] flags InnoDB table flags or 0.
+@param[in] thd MySQL thread or NULL.
+@return MySQL error code */
+int
+convert_error_code_to_mysql(
+ dberr_t error,
+ ulint flags,
+ THD* thd);
+
+/** Converts a search mode flag understood by MySQL to a flag understood
+by InnoDB.
+@param[in] find_flag MySQL search mode flag.
+@return InnoDB search mode flag. */
+page_cur_mode_t
+convert_search_mode_to_innobase(
+ enum ha_rkey_function find_flag);
+
+/** Commits a transaction in an InnoDB database.
+@param[in] trx Transaction handle. */
+void
+innobase_commit_low(
+ trx_t* trx);
+
+extern my_bool innobase_stats_on_metadata;
+
+/** Calculate Record Per Key value.
+Need to exclude the NULL value if innodb_stats_method is set to "nulls_ignored"
+@param[in] index InnoDB index.
+@param[in] i The column we are calculating rec per key.
+@param[in] records Estimated total records.
+@return estimated record per key value */
+/* JAN: TODO: MySQL 5.7 */
+typedef float rec_per_key_t;
+rec_per_key_t
+innodb_rec_per_key(
+ dict_index_t* index,
+ ulint i,
+ ha_rows records);
+
+/** Build template for the virtual columns and their base columns
+@param[in] table MySQL TABLE
+@param[in] ib_table InnoDB dict_table_t
+@param[in,out] s_templ InnoDB template structure
+@param[in] add_v new virtual columns added along with
+ add index call
+@param[in] locked true if innobase_share_mutex is held */
+void
+innobase_build_v_templ(
+ const TABLE* table,
+ const dict_table_t* ib_table,
+ dict_vcol_templ_t* s_templ,
+ const dict_add_v_col_t* add_v,
+ bool locked);
+
+/** callback used by MySQL server layer to initialized
+the table virtual columns' template
+@param[in] table MySQL TABLE
+@param[in,out] ib_table InnoDB dict_table_t */
+void
+innobase_build_v_templ_callback(
+ const TABLE* table,
+ void* ib_table);
+
+/** Callback function definition, used by MySQL server layer to initialized
+the table virtual columns' template */
+typedef void (*my_gcolumn_templatecallback_t)(const TABLE*, void*);
+
+/** Convert MySQL column number to dict_table_t::cols[] offset.
+@param[in] field non-virtual column
+@return column number relative to dict_table_t::cols[] */
+unsigned
+innodb_col_no(const Field* field)
+ MY_ATTRIBUTE((nonnull, warn_unused_result));
+
/********************************************************************//**
Helper function to push frm mismatch error to error log and
if needed to sql-layer. */
@@ -702,3 +963,22 @@ ib_push_frm_error(
TABLE* table, /*!< in: MySQL table */
ulint n_keys, /*!< in: InnoDB #keys */
bool push_warning); /*!< in: print warning ? */
+
+/** Check each index part length whether they not exceed the max limit
+@param[in] max_field_len maximum allowed key part length
+@param[in] key MariaDB key definition
+@return true if index column length exceeds limit */
+MY_ATTRIBUTE((warn_unused_result))
+bool too_big_key_part_length(size_t max_field_len, const KEY& key);
+
+/** This function is used to rollback one X/Open XA distributed transaction
+which is in the prepared state
+
+@param[in] hton InnoDB handlerton
+@param[in] xid X/Open XA transaction identification
+
+@return 0 or error number */
+int innobase_rollback_by_xid(handlerton* hton, XID* xid);
+
+/** Free tablespace resources allocated. */
+void innobase_space_shutdown();
diff --git a/storage/innobase/handler/ha_xtradb.h b/storage/innobase/handler/ha_xtradb.h
new file mode 100644
index 00000000000..9e898818a01
--- /dev/null
+++ b/storage/innobase/handler/ha_xtradb.h
@@ -0,0 +1,1009 @@
+/*****************************************************************************
+
+Copyright (c) 2000, 2016, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2017, 2018, MariaDB Corporation.
+Copyright (c) 2009, Percona Inc.
+
+Portions of this file contain modifications contributed and copyrighted
+by Percona Inc.. Those modifications are
+gratefully acknowledged and are described briefly in the InnoDB
+documentation. The contributions by Percona Inc. are incorporated with
+their permission, and subject to the conditions contained in the file
+COPYING.Percona.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/** @file ha_xtradb.h */
+
+#ifndef HA_XTRADB_H
+#define HA_XTRADB_H
+
+static
+void
+innodb_print_deprecation(const char* param);
+
+/* XtraDB compatibility system variables. Note that default value and
+minimum value can be different compared to original to detect has user
+really used the parameter or not. */
+
+static my_bool innodb_buffer_pool_populate;
+#if defined UNIV_DEBUG || defined UNIV_PERF_DEBUG
+static ulong srv_cleaner_max_lru_time;
+static ulong srv_cleaner_max_flush_time;
+static ulong srv_cleaner_flush_chunk_size;
+static ulong srv_cleaner_lru_chunk_size;
+static ulong srv_cleaner_free_list_lwm;
+static my_bool srv_cleaner_eviction_factor;
+#endif /* defined UNIV_DEBUG || defined UNIV_PERF_DEBUG */
+static ulong srv_pass_corrupt_table;
+static ulong srv_empty_free_list_algorithm;
+static ulong innobase_file_io_threads;
+static ulong srv_foreground_preflush;
+static longlong srv_kill_idle_transaction;
+static my_bool srv_fake_changes_locks;
+static my_bool innobase_log_archive;
+static char* innobase_log_arch_dir = NULL;
+static ulong srv_log_arch_expire_sec;
+static ulong innobase_log_block_size;
+static ulong srv_log_checksum_algorithm;
+static ulonglong srv_max_bitmap_file_size;
+static ulonglong srv_max_changed_pages;
+static ulong innobase_mirrored_log_groups;
+#ifdef UNIV_LINUX
+static ulong srv_sched_priority_cleaner;
+#if defined UNIV_DEBUG || defined UNIV_PERF_DEBUG
+static my_bool srv_cleaner_thread_priority;
+static my_bool srv_io_thread_priority;
+static my_bool srv_master_thread_priority;
+static my_bool srv_purge_thread_priority;
+static ulong srv_sched_priority_io;
+static ulong srv_sched_priority_master;
+static ulong srv_sched_priority_purge;
+#endif /* UNIV_DEBUG || UNIV_PERF_DEBUG */
+#endif /* UNIV_LINUX */
+static ulong srv_cleaner_lsn_age_factor;
+static ulong srv_show_locks_held;
+static ulong srv_show_verbose_locks;
+static my_bool srv_track_changed_pages;
+static my_bool innodb_track_redo_log_now;
+static my_bool srv_use_global_flush_log_at_trx_commit;
+static my_bool srv_use_stacktrace;
+
+
+static const char innodb_deprecated_msg[]= "Using %s is deprecated and the"
+ " parameter may be removed in future releases."
+ " Ignoning the parameter.";
+
+
+#ifdef BTR_CUR_HASH_ADAPT
+/* it is just alias for innodb_adaptive_hash_index_parts */
+/** Number of distinct partitions of AHI.
+Each partition is protected by its own latch and so we have parts number
+of latches protecting complete search system. */
+static MYSQL_SYSVAR_ULONG(adaptive_hash_index_partitions, btr_ahi_parts,
+ PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_READONLY,
+ "It is an alias for innodb_adaptive_hash_index_parts; "
+ "only exists to allow easier upgrade from earlier XtraDB versions.",
+ NULL, NULL, 8, 1, 512, 0);
+#endif /* BTR_CUR_HASH_ADAPT */
+
+static MYSQL_SYSVAR_BOOL(buffer_pool_populate, innodb_buffer_pool_populate,
+ PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY,
+ "Deprecated and ignored; only exists to allow easier upgrade from "
+ "earlier XtraDB versions.",
+ NULL, NULL, FALSE);
+
+#if defined UNIV_DEBUG || defined UNIV_PERF_DEBUG
+static
+void
+set_cleaner_max_lru_time(THD*thd, st_mysql_sys_var*, void*, const void*)
+{
+ push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
+ ER_WARN_DEPRECATED_SYNTAX,
+ innodb_deprecated_msg,
+ "innodb_cleaner_max_lru_time");
+}
+/* Original default 1000 */
+static MYSQL_SYSVAR_ULONG(cleaner_max_lru_time, srv_cleaner_max_lru_time,
+ PLUGIN_VAR_RQCMDARG,
+ "Deprecated and ignored; only exists to allow easier upgrade from "
+ "earlier XtraDB versions.",
+ NULL, set_cleaner_max_lru_time, 0, 0, ~0UL, 0);
+
+static
+void
+set_cleaner_max_flush_time(THD*thd, st_mysql_sys_var*, void*, const void*)
+{
+ push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
+ ER_WARN_DEPRECATED_SYNTAX,
+ innodb_deprecated_msg,
+ "innodb_cleaner_max_flush_time");
+}
+/* Original default 1000 */
+static MYSQL_SYSVAR_ULONG(cleaner_max_flush_time, srv_cleaner_max_flush_time,
+ PLUGIN_VAR_RQCMDARG,
+ "Deprecated and ignored; only exists to allow easier upgrade from "
+ "earlier XtraDB versions.",
+ NULL, &set_cleaner_max_flush_time, 0, 0, ~0UL, 0);
+
+static
+void
+set_cleaner_flush_chunk_size(THD*thd, st_mysql_sys_var*, void*, const void*)
+{
+ push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
+ ER_WARN_DEPRECATED_SYNTAX,
+ innodb_deprecated_msg,
+ "innodb_cleaner_flush_chunk_size");
+}
+/* Original default 100 */
+static MYSQL_SYSVAR_ULONG(cleaner_flush_chunk_size,
+ srv_cleaner_flush_chunk_size,
+ PLUGIN_VAR_RQCMDARG,
+ "Deprecated and ignored; only exists to allow easier upgrade from "
+ "earlier XtraDB versions.",
+ NULL, &set_cleaner_flush_chunk_size, 0, 0, ~0UL, 0);
+
+static
+void
+set_cleaner_lru_chunk_size(THD*thd, st_mysql_sys_var*, void*, const void*)
+{
+ push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
+ ER_WARN_DEPRECATED_SYNTAX,
+ innodb_deprecated_msg,
+ "innodb_cleaner_lru_chunk_size");
+}
+/* Original default 100 */
+static MYSQL_SYSVAR_ULONG(cleaner_lru_chunk_size,
+ srv_cleaner_lru_chunk_size,
+ PLUGIN_VAR_RQCMDARG,
+ "Deprecated and ignored; only exists to allow easier upgrade from "
+ "earlier XtraDB versions.",
+ NULL, &set_cleaner_lru_chunk_size, 0, 0, ~0UL, 0);
+
+static
+void
+set_cleaner_free_list_lwm(THD* thd, st_mysql_sys_var*, void*, const void*)
+{
+ push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
+ ER_WARN_DEPRECATED_SYNTAX,
+ innodb_deprecated_msg,
+ "innodb_cleaner_free_list_lwm");
+}
+/* Original default 10 */
+static MYSQL_SYSVAR_ULONG(cleaner_free_list_lwm, srv_cleaner_free_list_lwm,
+ PLUGIN_VAR_RQCMDARG,
+ "Deprecated and ignored; only exists to allow easier upgrade from "
+ "earlier XtraDB versions.",
+ NULL, &set_cleaner_free_list_lwm, 0, 0, 100, 0);
+
+static
+void
+set_cleaner_eviction_factor(THD* thd, st_mysql_sys_var*, void*, const void*)
+{
+ push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
+ ER_WARN_DEPRECATED_SYNTAX,
+ innodb_deprecated_msg,
+ "innodb_cleaner_eviction_factor");
+}
+static MYSQL_SYSVAR_BOOL(cleaner_eviction_factor, srv_cleaner_eviction_factor,
+ PLUGIN_VAR_OPCMDARG,
+ "Deprecated and ignored; only exists to allow easier upgrade from "
+ "earlier XtraDB versions.",
+ NULL, &set_cleaner_eviction_factor, FALSE);
+
+#endif /* defined UNIV_DEBUG || defined UNIV_PERF_DEBUG */
+
+/* Added new default DEPRECATED */
+/** Possible values for system variable "innodb_cleaner_lsn_age_factor". */
+static const char* innodb_cleaner_lsn_age_factor_names[] = {
+ "LEGACY",
+ "HIGH_CHECKPOINT",
+ "DEPRECATED",
+ NullS
+};
+
+/** Enumeration for innodb_cleaner_lsn_age_factor. */
+static TYPELIB innodb_cleaner_lsn_age_factor_typelib = {
+ array_elements(innodb_cleaner_lsn_age_factor_names) - 1,
+ "innodb_cleaner_lsn_age_factor_typelib",
+ innodb_cleaner_lsn_age_factor_names,
+ NULL
+};
+
+/** Alternatives for srv_cleaner_lsn_age_factor, set through
+innodb_cleaner_lsn_age_factor variable */
+enum srv_cleaner_lsn_age_factor_t {
+ SRV_CLEANER_LSN_AGE_FACTOR_LEGACY, /*!< Original Oracle MySQL 5.6
+ formula */
+ SRV_CLEANER_LSN_AGE_FACTOR_HIGH_CHECKPOINT,
+ /*!< Percona Server 5.6 formula
+ that returns lower values than
+ legacy option for low
+ checkpoint ages, and higher
+ values for high ages. This has
+ the effect of stabilizing the
+ checkpoint age higher. */
+ SRV_CLEANER_LSN_AGE_FACTOR_DEPRECATED /*!< Deprecated, do not use */
+};
+
+/** Alternatives for srv_foreground_preflush, set through
+innodb_foreground_preflush variable */
+enum srv_foreground_preflush_t {
+ SRV_FOREGROUND_PREFLUSH_SYNC_PREFLUSH, /*!< Original Oracle MySQL 5.6
+ behavior of performing a sync
+ flush list flush */
+ SRV_FOREGROUND_PREFLUSH_EXP_BACKOFF, /*!< Exponential backoff wait
+ for the page cleaner to flush
+ for us */
+ SRV_FOREGROUND_PREFLUSH_DEPRECATED /*!< Deprecated, do not use */
+};
+
+/** Alternatives for srv_empty_free_list_algorithm, set through
+innodb_empty_free_list_algorithm variable */
+enum srv_empty_free_list_t {
+ SRV_EMPTY_FREE_LIST_LEGACY, /*!< Original Oracle MySQL 5.6
+ algorithm */
+ SRV_EMPTY_FREE_LIST_BACKOFF, /*!< Percona Server 5.6 algorithm that
+ loops in a progressive backoff until a
+ free page is produced by the cleaner
+ thread */
+ SRV_EMPTY_FREE_LIST_DEPRECATED /*!< Deprecated, do not use */
+};
+
+#define SRV_CHECKSUM_ALGORITHM_DEPRECATED 6
+
+static
+void
+set_cleaner_lsn_age_factor(THD* thd, st_mysql_sys_var*, void*, const void*)
+{
+ push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
+ ER_WARN_DEPRECATED_SYNTAX,
+ innodb_deprecated_msg,
+ "innodb_cleaner_lsn_age_factor");
+}
+static MYSQL_SYSVAR_ENUM(cleaner_lsn_age_factor,
+ srv_cleaner_lsn_age_factor,
+ PLUGIN_VAR_OPCMDARG,
+ "Deprecated and ignored; only exists to allow easier upgrade from "
+ "earlier XtraDB versions.",
+ NULL, &set_cleaner_lsn_age_factor, SRV_CLEANER_LSN_AGE_FACTOR_DEPRECATED,
+ &innodb_cleaner_lsn_age_factor_typelib);
+
+/* Added new default drepcated, 3 */
+const char *corrupt_table_action_names[]=
+{
+ "assert", /* 0 */
+ "warn", /* 1 */
+ "salvage", /* 2 */
+ "deprecated", /* 3 */
+ NullS
+};
+
+TYPELIB corrupt_table_action_typelib=
+{
+ array_elements(corrupt_table_action_names) - 1, "corrupt_table_action_typelib",
+ corrupt_table_action_names, NULL
+};
+
+static
+void
+set_corrupt_table_action(THD* thd, st_mysql_sys_var*, void*, const void*)
+{
+ push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
+ ER_WARN_DEPRECATED_SYNTAX,
+ innodb_deprecated_msg,
+ "innodb_corrupt_table_action");
+}
+static MYSQL_SYSVAR_ENUM(corrupt_table_action, srv_pass_corrupt_table,
+ PLUGIN_VAR_RQCMDARG,
+ "Deprecated and ignored; only exists to allow easier upgrade from "
+ "earlier XtraDB versions.",
+ NULL, &set_corrupt_table_action, 3, &corrupt_table_action_typelib);
+
+/* Added new default DEPRECATED */
+/** Possible values for system variable "innodb_empty_free_list_algorithm". */
+static const char* innodb_empty_free_list_algorithm_names[] = {
+ "LEGACY",
+ "BACKOFF",
+ "DEPRECATED",
+ NullS
+};
+
+/** Enumeration for innodb_empty_free_list_algorithm. */
+static TYPELIB innodb_empty_free_list_algorithm_typelib = {
+ array_elements(innodb_empty_free_list_algorithm_names) - 1,
+ "innodb_empty_free_list_algorithm_typelib",
+ innodb_empty_free_list_algorithm_names,
+ NULL
+};
+
+static
+void
+set_empty_free_list_algorithm(THD* thd, st_mysql_sys_var*, void*, const void*)
+{
+ push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
+ ER_WARN_DEPRECATED_SYNTAX,
+ innodb_deprecated_msg,
+ "innodb_empty_free_list_algorithm");
+}
+static MYSQL_SYSVAR_ENUM(empty_free_list_algorithm,
+ srv_empty_free_list_algorithm,
+ PLUGIN_VAR_OPCMDARG,
+ "Deprecated and ignored; only exists to allow easier upgrade from "
+ "earlier XtraDB versions.",
+ NULL, &set_empty_free_list_algorithm, SRV_EMPTY_FREE_LIST_DEPRECATED,
+ &innodb_empty_free_list_algorithm_typelib);
+
+static
+void
+set_fake_changes(THD* thd, st_mysql_sys_var*, void*, const void*)
+{
+ push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
+ ER_WARN_DEPRECATED_SYNTAX,
+ innodb_deprecated_msg,
+ "innodb_fake_changes");
+}
+static MYSQL_THDVAR_BOOL(fake_changes, PLUGIN_VAR_OPCMDARG,
+ "Deprecated and ignored; only exists to allow easier upgrade from "
+ "earlier XtraDB versions.",
+ NULL, &set_fake_changes, FALSE);
+
+/* Original default, min 4. */
+static MYSQL_SYSVAR_ULONG(file_io_threads, innobase_file_io_threads,
+ PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY | PLUGIN_VAR_NOSYSVAR,
+ "Deprecated and ignored; only exists to allow easier upgrade from "
+ "earlier XtraDB versions.",
+ NULL, NULL, 0, 0, 64, 0);
+
+/** Possible values for system variable "innodb_foreground_preflush". */
+static const char* innodb_foreground_preflush_names[] = {
+ "SYNC_PREFLUSH",
+ "EXPONENTIAL_BACKOFF",
+ "DEPRECATED",
+ NullS
+};
+
+/* Enumeration for innodb_foreground_preflush. */
+static TYPELIB innodb_foreground_preflush_typelib = {
+ array_elements(innodb_foreground_preflush_names) - 1,
+ "innodb_foreground_preflush_typelib",
+ innodb_foreground_preflush_names,
+ NULL
+};
+
+static
+void
+set_foreground_preflush(THD* thd, st_mysql_sys_var*, void*, const void*)
+{
+ push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
+ ER_WARN_DEPRECATED_SYNTAX,
+ innodb_deprecated_msg,
+ "innodb_foreground_preflush");
+}
+static MYSQL_SYSVAR_ENUM(foreground_preflush, srv_foreground_preflush,
+ PLUGIN_VAR_OPCMDARG,
+ "Deprecated and ignored; only exists to allow easier upgrade from "
+ "earlier XtraDB versions.",
+ NULL, &set_foreground_preflush, SRV_FOREGROUND_PREFLUSH_DEPRECATED,
+ &innodb_foreground_preflush_typelib);
+
+#ifdef EXTENDED_FOR_KILLIDLE
+#define kill_idle_help_text "If non-zero value, the idle session with transaction which is idle over the value in seconds is killed by InnoDB."
+#else
+#define kill_idle_help_text "No effect for this build."
+#endif
+static
+void
+set_kill_idle_transaction(THD* thd, st_mysql_sys_var*, void*, const void*)
+{
+ push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
+ ER_WARN_DEPRECATED_SYNTAX,
+ innodb_deprecated_msg,
+ "innodb_kill_idle_transaction");
+}
+static MYSQL_SYSVAR_LONGLONG(kill_idle_transaction, srv_kill_idle_transaction,
+ PLUGIN_VAR_RQCMDARG, kill_idle_help_text,
+ NULL, &set_kill_idle_transaction, 0, 0, LONG_MAX, 0);
+
+static
+void
+set_locking_fake_changes(THD* thd, st_mysql_sys_var*, void*, const void*)
+{
+ push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
+ ER_WARN_DEPRECATED_SYNTAX,
+ innodb_deprecated_msg,
+ "innodb_locking_fake_changes");
+}
+/* Original default: TRUE */
+static MYSQL_SYSVAR_BOOL(locking_fake_changes, srv_fake_changes_locks,
+ PLUGIN_VAR_NOCMDARG,
+ "Deprecated and ignored; only exists to allow easier upgrade from "
+ "earlier XtraDB versions.",
+ NULL, &set_locking_fake_changes, FALSE);
+
+static MYSQL_SYSVAR_STR(log_arch_dir, innobase_log_arch_dir,
+ PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
+ "Deprecated and ignored; only exists to allow easier upgrade from "
+ "earlier XtraDB versions.",
+ NULL, NULL, NULL);
+
+static
+void
+set_log_archive(THD* thd, st_mysql_sys_var*, void*, const void*)
+{
+ push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
+ ER_WARN_DEPRECATED_SYNTAX,
+ innodb_deprecated_msg,
+ "innodb_log_archive");
+}
+static MYSQL_SYSVAR_BOOL(log_archive, innobase_log_archive,
+ PLUGIN_VAR_OPCMDARG,
+ "Deprecated and ignored; only exists to allow easier upgrade from "
+ "earlier XtraDB versions.",
+ NULL, &set_log_archive, FALSE);
+
+static
+void
+set_log_arch_expire_sec(THD* thd, st_mysql_sys_var*, void*, const void*)
+{
+ push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
+ ER_WARN_DEPRECATED_SYNTAX,
+ innodb_deprecated_msg,
+ "innodb_log_arch_expire_sec");
+}
+static MYSQL_SYSVAR_ULONG(log_arch_expire_sec,
+ srv_log_arch_expire_sec, PLUGIN_VAR_OPCMDARG,
+ "Deprecated and ignored; only exists to allow easier upgrade from "
+ "earlier XtraDB versions.",
+ NULL, &set_log_arch_expire_sec, 0, 0, ~0UL, 0);
+
+/* Original default, min 512 */
+static MYSQL_SYSVAR_ULONG(log_block_size, innobase_log_block_size,
+ PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
+ "Deprecated and ignored; only exists to allow easier upgrade from "
+ "earlier XtraDB versions.",
+ NULL, NULL, 0, 0,
+ (1 << UNIV_PAGE_SIZE_SHIFT_MAX), 0);
+
+/* Added new default deprecated */
+/** Possible values for system variables "innodb_checksum_algorithm" and
+"innodb_log_checksum_algorithm". */
+static const char* innodb_checksum_algorithm_names2[] = {
+ "CRC32",
+ "STRICT_CRC32",
+ "INNODB",
+ "STRICT_INNODB",
+ "NONE",
+ "STRICT_NONE",
+ "DEPRECATED",
+ NullS
+};
+
+/** Used to define an enumerate type of the system variables
+innodb_checksum_algorithm and innodb_log_checksum_algorithm. */
+static TYPELIB innodb_checksum_algorithm_typelib2 = {
+ array_elements(innodb_checksum_algorithm_names2) - 1,
+ "innodb_checksum_algorithm_typelib2",
+ innodb_checksum_algorithm_names2,
+ NULL
+};
+static
+void
+set_log_checksum_algorithm(THD* thd, st_mysql_sys_var*, void*, const void* save)
+{
+ push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
+ ER_WARN_DEPRECATED_SYNTAX,
+ innodb_deprecated_msg,
+ "innodb_log_checksum_algorithm");
+ log_mutex_enter();
+ srv_log_checksum_algorithm = *static_cast<const ulong*>(save);
+ if (srv_log_checksum_algorithm == SRV_CHECKSUM_ALGORITHM_NONE) {
+ ib::info() << "Setting innodb_log_checksums = false";
+ innodb_log_checksums = false;
+ log_checksum_algorithm_ptr = log_block_calc_checksum_none;
+ } else {
+ ib::info() << "Setting innodb_log_checksums = true";
+ innodb_log_checksums = true;
+ log_checksum_algorithm_ptr = log_block_calc_checksum_crc32;
+ }
+ log_mutex_exit();
+}
+static MYSQL_SYSVAR_ENUM(log_checksum_algorithm, srv_log_checksum_algorithm,
+ PLUGIN_VAR_RQCMDARG,
+ "Deprecated and translated to innodb_log_checksums (NONE to OFF, "
+ "everything else to ON); only exists to allow easier upgrade from "
+ "earlier XtraDB versions.",
+ NULL, &set_log_checksum_algorithm, SRV_CHECKSUM_ALGORITHM_DEPRECATED,
+ &innodb_checksum_algorithm_typelib2);
+
+static
+void
+set_max_bitmap_file_size(THD* thd, st_mysql_sys_var*, void*, const void*)
+{
+ push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
+ ER_WARN_DEPRECATED_SYNTAX,
+ innodb_deprecated_msg,
+ "innodb_max_bitmap_file_size");
+}
+/* Original default 100M, min 4K */
+static MYSQL_SYSVAR_ULONGLONG(max_bitmap_file_size, srv_max_bitmap_file_size,
+ PLUGIN_VAR_RQCMDARG,
+ "Deprecated and ignored; only exists to allow easier upgrade from "
+ "earlier XtraDB versions.",
+ NULL, &set_max_bitmap_file_size, 0, 0, ULONGLONG_MAX, 0);
+
+static
+void
+set_max_changed_pages(THD* thd, st_mysql_sys_var*, void*, const void*)
+{
+ push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
+ ER_WARN_DEPRECATED_SYNTAX,
+ innodb_deprecated_msg,
+ "innodb_max_changed_pages");
+}
+/* Original default 1000000 */
+static MYSQL_SYSVAR_ULONGLONG(max_changed_pages, srv_max_changed_pages,
+ PLUGIN_VAR_RQCMDARG,
+ "Deprecated and ignored; only exists to allow easier upgrade from "
+ "earlier XtraDB versions.",
+ NULL, &set_max_changed_pages, 0, 0, ~0ULL, 0);
+
+/* Note that the default and minimum values are set to 0 to
+detect if the option is passed and print deprecation message */
+static MYSQL_SYSVAR_ULONG(mirrored_log_groups, innobase_mirrored_log_groups,
+ PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
+ "Deprecated and ignored; only exists to allow easier upgrade from "
+ "earlier XtraDB versions.",
+ NULL, NULL, 0, 0, 10, 0);
+
+#ifdef UNIV_LINUX
+
+static
+void
+set_sched_priority_cleaner(THD* thd, st_mysql_sys_var*, void*, const void*)
+{
+ push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
+ ER_WARN_DEPRECATED_SYNTAX,
+ innodb_deprecated_msg,
+ "innodb_sched_priority_cleaner");
+}
+/* Original default 19 */
+static MYSQL_SYSVAR_ULONG(sched_priority_cleaner, srv_sched_priority_cleaner,
+ PLUGIN_VAR_RQCMDARG,
+ "Deprecated and ignored; only exists to allow easier upgrade from "
+ "earlier XtraDB versions.",
+ NULL, &set_sched_priority_cleaner, 0, 0, 39, 0);
+
+#if defined UNIV_DEBUG || defined UNIV_PERF_DEBUG
+static
+void
+set_priority_cleaner(THD* thd, st_mysql_sys_var*, void*, const void*)
+{
+ push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
+ ER_WARN_DEPRECATED_SYNTAX,
+ innodb_deprecated_msg,
+ "innodb_priority_cleaner");
+}
+static MYSQL_SYSVAR_BOOL(priority_cleaner, srv_cleaner_thread_priority,
+ PLUGIN_VAR_OPCMDARG,
+ "Deprecated and ignored; only exists to allow easier upgrade from "
+ "earlier XtraDB versions.",
+ NULL, &set_priority_cleaner, FALSE);
+
+static
+void
+set_priority_io(THD* thd, st_mysql_sys_var*, void*, const void*)
+{
+ push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
+ ER_WARN_DEPRECATED_SYNTAX,
+ innodb_deprecated_msg,
+ "innodb_priority_io");
+}
+static MYSQL_SYSVAR_BOOL(priority_io, srv_io_thread_priority,
+ PLUGIN_VAR_OPCMDARG,
+ "Deprecated and ignored; only exists to allow easier upgrade from "
+ "earlier XtraDB versions.",
+ NULL, &set_priority_io, FALSE);
+
+static
+void
+set_priority_master(THD* thd, st_mysql_sys_var*, void*, const void*)
+{
+ push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
+ ER_WARN_DEPRECATED_SYNTAX,
+ innodb_deprecated_msg,
+ "innodb_priority_master");
+}
+static MYSQL_SYSVAR_BOOL(priority_master, srv_master_thread_priority,
+ PLUGIN_VAR_OPCMDARG,
+ "Deprecated and ignored; only exists to allow easier upgrade from "
+ "earlier XtraDB versions.",
+ NULL, &set_priority_master, FALSE);
+
+static
+void
+set_priority_purge(THD* thd, st_mysql_sys_var*, void*, const void*)
+{
+ push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
+ ER_WARN_DEPRECATED_SYNTAX,
+ innodb_deprecated_msg,
+ "innodb_priority_purge");
+}
+static MYSQL_SYSVAR_BOOL(priority_purge, srv_purge_thread_priority,
+ PLUGIN_VAR_OPCMDARG,
+ "Deprecated and ignored; only exists to allow easier upgrade from "
+ "earlier XtraDB versions.",
+ NULL, &set_priority_purge, FALSE);
+
+static
+void
+set_sched_priority_io(THD* thd, st_mysql_sys_var*, void*, const void*)
+{
+ push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
+ ER_WARN_DEPRECATED_SYNTAX,
+ innodb_deprecated_msg,
+ "innodb_sched_priority_io");
+}
+/* Original default 19 */
+static MYSQL_SYSVAR_ULONG(sched_priority_io, srv_sched_priority_io,
+ PLUGIN_VAR_RQCMDARG,
+ "Deprecated and ignored; only exists to allow easier upgrade from "
+ "earlier XtraDB versions.",
+ NULL, &set_sched_priority_io, 0, 0, 39, 0);
+
+static
+void
+set_sched_priority_master(THD* thd, st_mysql_sys_var*, void*, const void*)
+{
+ push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
+ ER_WARN_DEPRECATED_SYNTAX,
+ innodb_deprecated_msg,
+ "innodb_sched_priority_master");
+}
+/* Original default 19 */
+static MYSQL_SYSVAR_ULONG(sched_priority_master, srv_sched_priority_master,
+ PLUGIN_VAR_RQCMDARG,
+ "Deprecated and ignored; only exists to allow easier upgrade from "
+ "earlier XtraDB versions.",
+ NULL, &set_sched_priority_master, 0, 0, 39, 0);
+
+static
+void
+set_sched_priority_purge(THD* thd, st_mysql_sys_var*, void*, const void*)
+{
+ push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
+ ER_WARN_DEPRECATED_SYNTAX,
+ innodb_deprecated_msg,
+ "innodb_sched_priority_purge");
+}
+/* Original default 19 */
+static MYSQL_SYSVAR_ULONG(sched_priority_purge, srv_sched_priority_purge,
+ PLUGIN_VAR_RQCMDARG,
+ "Deprecated and ignored; only exists to allow easier upgrade from "
+ "earlier XtraDB versions.",
+ NULL, &set_sched_priority_purge, 0, 0, 39, 0);
+#endif /* UNIV_DEBUG || UNIV_PERF_DEBUG */
+#endif /* UNIV_LINUX */
+
+static
+void
+set_show_locks_held(THD* thd, st_mysql_sys_var*, void*, const void*)
+{
+ push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
+ ER_WARN_DEPRECATED_SYNTAX,
+ innodb_deprecated_msg,
+ "innodb_show_locks_held");
+}
+/* TODO: Implement */
+static MYSQL_SYSVAR_ULONG(show_locks_held, srv_show_locks_held,
+ PLUGIN_VAR_RQCMDARG,
+ "Deprecated and ignored; only exists to allow easier upgrade from "
+ "earlier XtraDB versions.",
+ NULL, &set_show_locks_held, 0, 0, 1000, 0);
+
+static
+void
+set_show_verbose_locks(THD* thd, st_mysql_sys_var*, void*, const void*)
+{
+ push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
+ ER_WARN_DEPRECATED_SYNTAX,
+ innodb_deprecated_msg,
+ "innodb_show_verbose_locks");
+}
+/* TODO: Implement */
+static MYSQL_SYSVAR_ULONG(show_verbose_locks, srv_show_verbose_locks,
+ PLUGIN_VAR_RQCMDARG,
+ "Deprecated and ignored; only exists to allow easier upgrade from "
+ "earlier XtraDB versions.",
+ NULL, &set_show_verbose_locks, 0, 0, 1, 0);
+
+static MYSQL_SYSVAR_BOOL(track_changed_pages, srv_track_changed_pages,
+ PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY,
+ "Deprecated and ignored; only exists to allow easier upgrade from "
+ "earlier XtraDB versions.",
+ NULL, NULL, FALSE);
+
+static
+void
+set_track_redo_log_now(THD* thd, st_mysql_sys_var*, void*, const void*)
+{
+ push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
+ ER_WARN_DEPRECATED_SYNTAX,
+ innodb_deprecated_msg,
+ "innodb_track_redo_log_now");
+}
+static MYSQL_SYSVAR_BOOL(track_redo_log_now,
+ innodb_track_redo_log_now,
+ PLUGIN_VAR_OPCMDARG,
+ "Deprecated and ignored; only exists to allow easier upgrade from "
+ "earlier XtraDB versions.",
+ NULL, &set_track_redo_log_now, FALSE);
+
+static
+void
+set_use_global_flush_log_at_trx_commit(THD* thd, st_mysql_sys_var*, void*, const void*)
+{
+ push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
+ ER_WARN_DEPRECATED_SYNTAX,
+ innodb_deprecated_msg,
+ "innodb_use_global_flush_log_at_trx_commit");
+}
+static MYSQL_SYSVAR_BOOL(use_global_flush_log_at_trx_commit, srv_use_global_flush_log_at_trx_commit,
+ PLUGIN_VAR_NOCMDARG,
+ "Deprecated and ignored; only exists to allow easier upgrade from "
+ "earlier XtraDB versions.",
+ NULL, &set_use_global_flush_log_at_trx_commit, FALSE);
+
+static MYSQL_SYSVAR_BOOL(use_stacktrace, srv_use_stacktrace,
+ PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY,
+ "Deprecated and ignored; only exists to allow easier upgrade from "
+ "earlier XtraDB versions.",
+ NULL, NULL, FALSE);
+
+/** Print deprecation message for a given system variable.
+@param[in] param System parameter name */
+static
+void
+innodb_print_deprecation(const char* param)
+{
+ ib::warn() << "Using " << param << " is deprecated and the"
+ " parameter may be removed in future releases."
+ " Ignoning the parameter.";
+}
+
+/** Check if user has used xtradb extended system variable that
+is not currently supported by innodb or marked as deprecated. */
+static
+void
+innodb_check_deprecated(void)
+{
+ if (innodb_buffer_pool_populate) {
+ innodb_print_deprecation("innodb-buffer-pool-populate");
+ }
+
+#if defined UNIV_DEBUG || defined UNIV_PERF_DEBUG
+ if (srv_cleaner_max_lru_time) {
+ innodb_print_deprecation("innodb-cleaner-max-lru-time");
+ }
+
+ if (srv_cleaner_max_flush_time) {
+ innodb_print_deprecation("innodb-cleaner-max-flush-time");
+ }
+
+ if (srv_cleaner_flush_chunk_size) {
+ innodb_print_deprecation("innodb-cleaner-flush-chunk-size");
+ }
+
+ if (srv_cleaner_lru_chunk_size) {
+ innodb_print_deprecation("innodb-cleaner-lru_chunk_size");
+ }
+ if (srv_cleaner_free_list_lwm) {
+ innodb_print_deprecation("innodb-cleaner-free-list-lwm");
+ }
+
+ if (srv_cleaner_eviction_factor) {
+ innodb_print_deprecation("innodb-cleaner-eviction-factor");
+ }
+
+#endif /* defined UNIV_DEBUG || defined UNIV_PERF_DEBUG */
+
+ if (srv_cleaner_lsn_age_factor != SRV_CLEANER_LSN_AGE_FACTOR_DEPRECATED) {
+ innodb_print_deprecation("innodb-cleaner-lsn-age-factor");
+ }
+
+ if (srv_pass_corrupt_table != 3) {
+ innodb_print_deprecation("innodb-pass-corrupt-table");
+ }
+
+ if (srv_empty_free_list_algorithm != SRV_EMPTY_FREE_LIST_DEPRECATED) {
+ innodb_print_deprecation("innodb-empty-free-list-algorithm");
+ }
+
+ if (THDVAR((THD*) NULL, fake_changes)) {
+ innodb_print_deprecation("innodb-fake-changes");
+ }
+
+ if (innobase_file_io_threads) {
+ innodb_print_deprecation("innodb-file-io-threads");
+ }
+
+ if (srv_foreground_preflush != SRV_FOREGROUND_PREFLUSH_DEPRECATED) {
+ innodb_print_deprecation("innodb-foreground-preflush");
+ }
+
+ if (srv_kill_idle_transaction != 0) {
+ innodb_print_deprecation("innodb-kill-idle-transaction");
+ }
+
+ if (srv_fake_changes_locks) {
+ innodb_print_deprecation("innodb-fake-changes-locks");
+ }
+
+ if (innobase_log_arch_dir) {
+ innodb_print_deprecation("innodb-log-arch-dir");
+ }
+
+ if (innobase_log_archive) {
+ innodb_print_deprecation("innodb-log-archive");
+ }
+
+ if (srv_log_arch_expire_sec) {
+ innodb_print_deprecation("innodb-log-arch-expire-sec");
+ }
+
+ if (innobase_log_block_size) {
+ innodb_print_deprecation("innodb-log-block-size");
+ }
+
+ if (srv_log_checksum_algorithm != SRV_CHECKSUM_ALGORITHM_DEPRECATED) {
+ innodb_print_deprecation("innodb-log-checksum-algorithm");
+ if (srv_log_checksum_algorithm == SRV_CHECKSUM_ALGORITHM_NONE) {
+ ib::info() << "Setting innodb_log_checksums = false";
+ innodb_log_checksums = false;
+ log_checksum_algorithm_ptr = log_block_calc_checksum_none;
+ } else {
+ ib::info() << "Setting innodb_log_checksums = true";
+ innodb_log_checksums = true;
+ log_checksum_algorithm_ptr = log_block_calc_checksum_crc32;
+ }
+ }
+
+ if (srv_max_changed_pages) {
+ innodb_print_deprecation("innodb-max-changed-pages");
+ }
+
+ if (innobase_mirrored_log_groups) {
+ innodb_print_deprecation("innodb-mirrored-log-groups");
+ }
+
+#ifdef UNIV_LINUX
+ if (srv_sched_priority_cleaner) {
+ innodb_print_deprecation("innodb-sched-priority-cleaner");
+ }
+
+#if defined UNIV_DEBUG || defined UNIV_PERF_DEBUG
+ if (srv_cleaner_thread_priority) {
+ innodb_print_deprecation("innodb-cleaner-thread-priority");
+ }
+
+ if (srv_io_thread_priority) {
+ innodb_print_deprecation("innodb-io-thread-priority");
+ }
+
+ if (srv_master_thread_priority) {
+ innodb_print_deprecation("inodb-master-thread-priority");
+ }
+
+ if (srv_purge_thread_priority) {
+ innodb_print_deprecation("inodb-purge-thread-priority");
+ }
+
+ if (srv_sched_priority_io) {
+ innodb_print_deprecation("innodb-sched-priority-io");
+ }
+
+ if (srv_sched_priority_master) {
+ innodb_print_deprecation("innodb-sched-priority-master");
+ }
+
+ if (srv_sched_priority_purge) {
+ innodb_print_deprecation("innodb-sched-priority-purge");
+ }
+#endif /* defined UNIV_DEBUG || defined UNIV_PERF_DEBUG */
+#endif /* UNIV_LINUX */
+
+ if (srv_track_changed_pages) {
+ innodb_print_deprecation("innodb-track-changed-pages");
+ }
+
+ if (innodb_track_redo_log_now) {
+ innodb_print_deprecation("innodb-track-redo-log-now");
+ }
+
+ if (srv_use_global_flush_log_at_trx_commit) {
+ innodb_print_deprecation("innodb-use-global-flush-log-at-trx-commit");
+ }
+
+ if (srv_use_stacktrace) {
+ innodb_print_deprecation("innodb-use-stacktrace");
+ }
+
+ if (srv_max_bitmap_file_size) {
+ innodb_print_deprecation("innodb-max-bitmap-file-size");
+ }
+
+ if (srv_show_locks_held) {
+ innodb_print_deprecation("innodb-show-locks-held");
+ }
+
+ if (srv_show_verbose_locks) {
+ innodb_print_deprecation("innodb-show-verbose-locks");
+ }
+}
+
+#endif /* HA_XTRADB_H */
+
+#ifdef HA_XTRADB_SYSVARS
+ /* XtraDB compatibility system variables */
+#ifdef BTR_CUR_HASH_ADAPT
+ MYSQL_SYSVAR(adaptive_hash_index_partitions),
+#endif /* BTR_CUR_HASH_ADAPT */
+ MYSQL_SYSVAR(buffer_pool_populate),
+#if defined UNIV_DEBUG || defined UNIV_PERF_DEBUG
+ MYSQL_SYSVAR(cleaner_eviction_factor),
+ MYSQL_SYSVAR(cleaner_flush_chunk_size),
+ MYSQL_SYSVAR(cleaner_free_list_lwm),
+ MYSQL_SYSVAR(cleaner_lru_chunk_size),
+ MYSQL_SYSVAR(cleaner_max_lru_time),
+ MYSQL_SYSVAR(cleaner_max_flush_time),
+#endif /* defined UNIV_DEBUG || defined UNIV_PERF_DEBUG */
+ MYSQL_SYSVAR(cleaner_lsn_age_factor),
+ MYSQL_SYSVAR(corrupt_table_action),
+ MYSQL_SYSVAR(empty_free_list_algorithm),
+ MYSQL_SYSVAR(fake_changes),
+ MYSQL_SYSVAR(file_io_threads),
+ MYSQL_SYSVAR(foreground_preflush),
+ MYSQL_SYSVAR(kill_idle_transaction),
+ MYSQL_SYSVAR(locking_fake_changes),
+ MYSQL_SYSVAR(log_arch_dir),
+ MYSQL_SYSVAR(log_archive),
+ MYSQL_SYSVAR(log_arch_expire_sec),
+ MYSQL_SYSVAR(log_block_size),
+ MYSQL_SYSVAR(log_checksum_algorithm),
+ MYSQL_SYSVAR(max_bitmap_file_size),
+ MYSQL_SYSVAR(max_changed_pages),
+ MYSQL_SYSVAR(mirrored_log_groups),
+#ifdef UNIV_LINUX
+ MYSQL_SYSVAR(sched_priority_cleaner),
+#endif
+#if defined UNIV_DEBUG || defined UNIV_PERF_DEBUG
+#ifdef UNIV_LINUX
+ MYSQL_SYSVAR(priority_cleaner),
+ MYSQL_SYSVAR(priority_io),
+ MYSQL_SYSVAR(priority_master),
+ MYSQL_SYSVAR(priority_purge),
+ MYSQL_SYSVAR(sched_priority_io),
+ MYSQL_SYSVAR(sched_priority_master),
+ MYSQL_SYSVAR(sched_priority_purge),
+#endif /* UNIV_LINUX */
+#endif /* defined UNIV_DEBUG || defined UNIV_PERF_DEBUG */
+ MYSQL_SYSVAR(show_locks_held),
+ MYSQL_SYSVAR(show_verbose_locks),
+ MYSQL_SYSVAR(track_changed_pages),
+ MYSQL_SYSVAR(track_redo_log_now),
+ MYSQL_SYSVAR(use_global_flush_log_at_trx_commit),
+ MYSQL_SYSVAR(use_stacktrace),
+
+#endif /* HA_XTRADB_SYSVARS */
diff --git a/storage/innobase/handler/handler0alter.cc b/storage/innobase/handler/handler0alter.cc
index 682f09992a4..eee0e43d73f 100644
--- a/storage/innobase/handler/handler0alter.cc
+++ b/storage/innobase/handler/handler0alter.cc
@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 2005, 2019, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2013, 2019, MariaDB Corporation.
+Copyright (c) 2013, 2020, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -22,16 +22,17 @@ this program; if not, write to the Free Software Foundation, Inc.,
Smart ALTER TABLE
*******************************************************/
-#include <my_global.h>
-#include <unireg.h>
-#include <mysqld_error.h>
-#include <log.h>
+/* Include necessary SQL headers */
+#include "univ.i"
#include <debug_sync.h>
-#include <innodb_priv.h>
-#include <sql_alter.h>
+#include <log.h>
+#include <sql_lex.h>
#include <sql_class.h>
#include <sql_table.h>
+#include <mysql/plugin.h>
+/* Include necessary InnoDB headers */
+#include "btr0sea.h"
#include "dict0crea.h"
#include "dict0dict.h"
#include "dict0priv.h"
@@ -41,16 +42,23 @@ Smart ALTER TABLE
#include "rem0types.h"
#include "row0log.h"
#include "row0merge.h"
-#include "srv0srv.h"
#include "trx0trx.h"
#include "trx0roll.h"
-#include "ha_prototypes.h"
#include "handler0alter.h"
#include "srv0mon.h"
#include "fts0priv.h"
+#include "fts0plugin.h"
#include "pars0pars.h"
#include "row0sel.h"
#include "ha_innodb.h"
+#include "ut0stage.h"
+#include "span.h"
+
+using st_::span;
+
+static const char *MSG_UNSUPPORTED_ALTER_ONLINE_ON_VIRTUAL_COLUMN=
+ "INPLACE ADD or DROP of virtual columns cannot be "
+ "combined with other ALTER TABLE actions";
/** Operations for creating secondary indexes (no rebuild needed) */
static const Alter_inplace_info::HA_ALTER_FLAGS INNOBASE_ONLINE_CREATE
@@ -65,13 +73,12 @@ static const Alter_inplace_info::HA_ALTER_FLAGS INNOBASE_ALTER_REBUILD
/* CHANGE_CREATE_OPTION needs to check innobase_need_rebuild() */
| Alter_inplace_info::ALTER_COLUMN_NULLABLE
| Alter_inplace_info::ALTER_COLUMN_NOT_NULLABLE
- | Alter_inplace_info::ALTER_COLUMN_ORDER
- | Alter_inplace_info::DROP_COLUMN
- | Alter_inplace_info::ADD_COLUMN
+ | Alter_inplace_info::ALTER_STORED_COLUMN_ORDER
+ | Alter_inplace_info::DROP_STORED_COLUMN
+ | Alter_inplace_info::ADD_STORED_BASE_COLUMN
| Alter_inplace_info::RECREATE_TABLE
/*
- | Alter_inplace_info::ALTER_COLUMN_TYPE
- | Alter_inplace_info::ALTER_COLUMN_EQUAL_PACK_LENGTH
+ | Alter_inplace_info::ALTER_STORED_COLUMN_TYPE
*/
;
@@ -85,23 +92,206 @@ static const Alter_inplace_info::HA_ALTER_FLAGS INNOBASE_INPLACE_IGNORE
| Alter_inplace_info::ALTER_PARTITIONED
| Alter_inplace_info::ALTER_COLUMN_COLUMN_FORMAT
| Alter_inplace_info::ALTER_COLUMN_STORAGE_TYPE
- | Alter_inplace_info::ALTER_RENAME;
+ | Alter_inplace_info::ALTER_VIRTUAL_GCOL_EXPR
+ | Alter_inplace_info::ALTER_RENAME
+ | Alter_inplace_info::ALTER_COLUMN_INDEX_LENGTH;
/** Operations on foreign key definitions (changing the schema only) */
static const Alter_inplace_info::HA_ALTER_FLAGS INNOBASE_FOREIGN_OPERATIONS
= Alter_inplace_info::DROP_FOREIGN_KEY
| Alter_inplace_info::ADD_FOREIGN_KEY;
-/** Operations that InnoDB cares about and can perform without rebuild */
-static const Alter_inplace_info::HA_ALTER_FLAGS INNOBASE_ALTER_NOREBUILD
- = INNOBASE_ONLINE_CREATE
- | INNOBASE_FOREIGN_OPERATIONS
+/** Operations that InnoDB cares about and can perform without validation */
+static const Alter_inplace_info::HA_ALTER_FLAGS INNOBASE_ALTER_NOVALIDATE
+ = INNOBASE_FOREIGN_OPERATIONS
| Alter_inplace_info::DROP_INDEX
| Alter_inplace_info::DROP_UNIQUE_INDEX
- | Alter_inplace_info::ALTER_COLUMN_NAME;
+ | Alter_inplace_info::ALTER_COLUMN_NAME
+ //| Alter_inplace_info::ALTER_INDEX_COMMENT
+ | Alter_inplace_info::DROP_VIRTUAL_COLUMN
+ | Alter_inplace_info::ALTER_VIRTUAL_COLUMN_ORDER;
+
+/** Operations that InnoDB cares about and can perform without rebuild */
+static const Alter_inplace_info::HA_ALTER_FLAGS INNOBASE_ALTER_NOREBUILD
+ = INNOBASE_ALTER_NOVALIDATE
+ | INNOBASE_ONLINE_CREATE
+ | Alter_inplace_info::ALTER_COLUMN_EQUAL_PACK_LENGTH
+ | Alter_inplace_info::ADD_VIRTUAL_COLUMN;
+
+struct ha_innobase_inplace_ctx : public inplace_alter_handler_ctx
+{
+ /** Dummy query graph */
+ que_thr_t* thr;
+ /** The prebuilt struct of the creating instance */
+ row_prebuilt_t*& prebuilt;
+ /** InnoDB indexes being created */
+ dict_index_t** add_index;
+ /** MySQL key numbers for the InnoDB indexes that are being created */
+ const ulint* add_key_numbers;
+ /** number of InnoDB indexes being created */
+ ulint num_to_add_index;
+ /** InnoDB indexes being dropped */
+ dict_index_t** drop_index;
+ /** number of InnoDB indexes being dropped */
+ const ulint num_to_drop_index;
+ /** InnoDB indexes being renamed */
+ dict_index_t** rename;
+ /** number of InnoDB indexes being renamed */
+ const ulint num_to_rename;
+ /** InnoDB foreign key constraints being dropped */
+ dict_foreign_t** drop_fk;
+ /** number of InnoDB foreign key constraints being dropped */
+ const ulint num_to_drop_fk;
+ /** InnoDB foreign key constraints being added */
+ dict_foreign_t** add_fk;
+ /** number of InnoDB foreign key constraints being dropped */
+ const ulint num_to_add_fk;
+ /** whether to create the indexes online */
+ bool online;
+ /** memory heap */
+ mem_heap_t* heap;
+ /** dictionary transaction */
+ trx_t* trx;
+ /** original table (if rebuilt, differs from indexed_table) */
+ dict_table_t* old_table;
+ /** table where the indexes are being created or dropped */
+ dict_table_t* new_table;
+ /** mapping of old column numbers to new ones, or NULL */
+ const ulint* col_map;
+ /** new column names, or NULL if nothing was renamed */
+ const char** col_names;
+ /** added AUTO_INCREMENT column position, or ULINT_UNDEFINED */
+ const ulint add_autoinc;
+ /** default values of ADD COLUMN, or NULL */
+ const dtuple_t* add_cols;
+ /** autoinc sequence to use */
+ ib_sequence_t sequence;
+ /** temporary table name to use for old table when renaming tables */
+ const char* tmp_name;
+ /** whether the order of the clustered index is unchanged */
+ bool skip_pk_sort;
+ /** number of virtual columns to be added */
+ ulint num_to_add_vcol;
+ /** virtual columns to be added */
+ dict_v_col_t* add_vcol;
+ const char** add_vcol_name;
+ /** number of virtual columns to be dropped */
+ ulint num_to_drop_vcol;
+ /** virtual columns to be dropped */
+ dict_v_col_t* drop_vcol;
+ const char** drop_vcol_name;
+ /** ALTER TABLE stage progress recorder */
+ ut_stage_alter_t* m_stage;
+
+ ha_innobase_inplace_ctx(row_prebuilt_t*& prebuilt_arg,
+ dict_index_t** drop_arg,
+ ulint num_to_drop_arg,
+ dict_index_t** rename_arg,
+ ulint num_to_rename_arg,
+ dict_foreign_t** drop_fk_arg,
+ ulint num_to_drop_fk_arg,
+ dict_foreign_t** add_fk_arg,
+ ulint num_to_add_fk_arg,
+ bool online_arg,
+ mem_heap_t* heap_arg,
+ dict_table_t* new_table_arg,
+ const char** col_names_arg,
+ ulint add_autoinc_arg,
+ ulonglong autoinc_col_min_value_arg,
+ ulonglong autoinc_col_max_value_arg,
+ ulint num_to_drop_vcol_arg) :
+ inplace_alter_handler_ctx(),
+ prebuilt (prebuilt_arg),
+ add_index (0), add_key_numbers (0), num_to_add_index (0),
+ drop_index (drop_arg), num_to_drop_index (num_to_drop_arg),
+ rename (rename_arg), num_to_rename (num_to_rename_arg),
+ drop_fk (drop_fk_arg), num_to_drop_fk (num_to_drop_fk_arg),
+ add_fk (add_fk_arg), num_to_add_fk (num_to_add_fk_arg),
+ online (online_arg), heap (heap_arg), trx (0),
+ old_table (prebuilt_arg->table),
+ new_table (new_table_arg),
+ col_map (0), col_names (col_names_arg),
+ add_autoinc (add_autoinc_arg),
+ add_cols (0),
+ sequence(prebuilt->trx->mysql_thd,
+ autoinc_col_min_value_arg, autoinc_col_max_value_arg),
+ tmp_name (0),
+ skip_pk_sort(false),
+ num_to_add_vcol(0),
+ add_vcol(0),
+ add_vcol_name(0),
+ num_to_drop_vcol(0),
+ drop_vcol(0),
+ drop_vcol_name(0),
+ m_stage(NULL)
+ {
+#ifdef UNIV_DEBUG
+ for (ulint i = 0; i < num_to_add_index; i++) {
+ ut_ad(!add_index[i]->to_be_dropped);
+ }
+ for (ulint i = 0; i < num_to_drop_index; i++) {
+ ut_ad(drop_index[i]->to_be_dropped);
+ }
+#endif /* UNIV_DEBUG */
+
+ thr = pars_complete_graph_for_exec(NULL, prebuilt->trx, heap,
+ prebuilt);
+ }
+
+ ~ha_innobase_inplace_ctx()
+ {
+ UT_DELETE(m_stage);
+ mem_heap_free(heap);
+ }
+
+ /** Determine if the table will be rebuilt.
+ @return whether the table will be rebuilt */
+ bool need_rebuild () const { return(old_table != new_table); }
+
+ /** Clear uncommmitted added indexes after a failed operation. */
+ void clear_added_indexes()
+ {
+ for (ulint i = 0; i < num_to_add_index; i++) {
+ if (!add_index[i]->is_committed()) {
+ add_index[i]->detach_columns();
+ }
+ }
+ }
+
+ /** Share context between partitions.
+ @param[in] ctx context from another partition of the table */
+ void set_shared_data(const inplace_alter_handler_ctx& ctx)
+ {
+ if (add_autoinc != ULINT_UNDEFINED) {
+ const ha_innobase_inplace_ctx& ha_ctx =
+ static_cast<const ha_innobase_inplace_ctx&>
+ (ctx);
+ /* When adding an AUTO_INCREMENT column to a
+ partitioned InnoDB table, we must share the
+ sequence for all partitions. */
+ ut_ad(ha_ctx.add_autoinc == add_autoinc);
+ ut_ad(ha_ctx.sequence.last());
+ sequence = ha_ctx.sequence;
+ }
+ }
+
+private:
+ // Disable copying
+ ha_innobase_inplace_ctx(const ha_innobase_inplace_ctx&);
+ ha_innobase_inplace_ctx& operator=(const ha_innobase_inplace_ctx&);
+};
+
+/********************************************************************//**
+Get the upper limit of the MySQL integral and floating-point type.
+@return maximum allowed value for the field */
+UNIV_INTERN
+ulonglong
+innobase_get_int_col_max_value(
+/*===========================*/
+ const Field* field); /*!< in: MySQL field */
/* Report an InnoDB error to the client by invoking my_error(). */
-static UNIV_COLD MY_ATTRIBUTE((nonnull))
+static ATTRIBUTE_COLD __attribute__((nonnull))
void
my_error_innodb(
/*============*/
@@ -131,15 +321,12 @@ my_error_innodb(
case DB_OUT_OF_FILE_SPACE:
my_error(ER_RECORD_FILE_FULL, MYF(0), table);
break;
- case DB_TEMP_FILE_WRITE_FAILURE:
- my_error(ER_GET_ERRMSG, MYF(0),
- DB_TEMP_FILE_WRITE_FAILURE,
- ut_strerr(DB_TEMP_FILE_WRITE_FAILURE),
- "InnoDB");
+ case DB_TEMP_FILE_WRITE_FAIL:
+ my_error(ER_TEMP_FILE_WRITE_FAILURE, MYF(0));
break;
case DB_TOO_BIG_INDEX_COL:
my_error(ER_INDEX_COLUMN_TOO_LONG, MYF(0),
- DICT_MAX_FIELD_LEN_BY_FORMAT_FLAG(flags));
+ (ulong) DICT_MAX_FIELD_LEN_BY_FORMAT_FLAG(flags));
break;
case DB_TOO_MANY_CONCURRENT_TRXS:
my_error(ER_TOO_MANY_CONCURRENT_TRXS, MYF(0));
@@ -160,8 +347,8 @@ my_error_innodb(
bool comp = !!(flags & DICT_TF_COMPACT);
ulint free_space = page_get_free_space_of_empty(comp) / 2;
- if (free_space >= (comp ? COMPRESSED_REC_MAX_DATA_SIZE :
- REDUNDANT_REC_MAX_DATA_SIZE)) {
+ if (free_space >= ulint(comp ? COMPRESSED_REC_MAX_DATA_SIZE :
+ REDUNDANT_REC_MAX_DATA_SIZE)) {
free_space = (comp ? COMPRESSED_REC_MAX_DATA_SIZE :
REDUNDANT_REC_MAX_DATA_SIZE) - 1;
}
@@ -173,6 +360,9 @@ my_error_innodb(
/* TODO: report the row, as we do for DB_DUPLICATE_KEY */
my_error(ER_INVALID_USE_OF_NULL, MYF(0));
break;
+ case DB_CANT_CREATE_GEOMETRY_OBJECT:
+ my_error(ER_CANT_CREATE_GEOMETRY_OBJECT, MYF(0));
+ break;
case DB_TABLESPACE_EXISTS:
my_error(ER_TABLESPACE_EXISTS, MYF(0), table);
break;
@@ -191,8 +381,8 @@ my_error_innodb(
}
/** Determine if fulltext indexes exist in a given table.
-@param table MySQL table
-@return whether fulltext indexes exist on the table */
+@param table MySQL table
+@return whether fulltext indexes exist on the table */
static
bool
innobase_fulltext_exist(
@@ -208,6 +398,44 @@ innobase_fulltext_exist(
return(false);
}
+/** Determine whether indexed virtual columns exist in a table.
+@param[in] table table definition
+@return whether indexes exist on virtual columns */
+static bool innobase_indexed_virtual_exist(const TABLE* table)
+{
+ const KEY* const end = &table->key_info[table->s->keys];
+
+ for (const KEY* key = table->key_info; key < end; key++) {
+ const KEY_PART_INFO* const key_part_end = key->key_part
+ + key->user_defined_key_parts;
+ for (const KEY_PART_INFO* key_part = key->key_part;
+ key_part < key_part_end; key_part++) {
+ if (!key_part->field->stored_in_db())
+ return true;
+ }
+ }
+
+ return false;
+}
+
+/** Determine if spatial indexes exist in a given table.
+@param table MySQL table
+@return whether spatial indexes exist on the table */
+static
+bool
+innobase_spatial_exist(
+/*===================*/
+ const TABLE* table)
+{
+ for (uint i = 0; i < table->s->keys; i++) {
+ if (table->key_info[i].flags & HA_SPATIAL) {
+ return(true);
+ }
+ }
+
+ return(false);
+}
+
/** Determine if ALTER TABLE needs to rebuild the table.
@param ha_alter_info the DDL operation
@param table metadata before ALTER TABLE
@@ -248,21 +476,119 @@ innobase_need_rebuild(
return(!!(alter_inplace_flags & INNOBASE_ALTER_REBUILD));
}
+/** Check if virtual column in old and new table are in order, excluding
+those dropped column. This is needed because when we drop a virtual column,
+ALTER_VIRTUAL_COLUMN_ORDER is also turned on, so we can't decide if this
+is a real ORDER change or just DROP COLUMN
+@param[in] table old TABLE
+@param[in] altered_table new TABLE
+@param[in] ha_alter_info Structure describing changes to be done
+by ALTER TABLE and holding data used during in-place alter.
+@return true is all columns in order, false otherwise. */
+static
+bool
+check_v_col_in_order(
+ const TABLE* table,
+ const TABLE* altered_table,
+ Alter_inplace_info* ha_alter_info)
+{
+ ulint j = 0;
+
+ /* We don't support any adding new virtual column before
+ existed virtual column. */
+ if (ha_alter_info->handler_flags
+ & Alter_inplace_info::ADD_VIRTUAL_COLUMN) {
+ bool has_new = false;
+
+ List_iterator_fast<Create_field> cf_it(
+ ha_alter_info->alter_info->create_list);
+
+ cf_it.rewind();
+
+ while (const Create_field* new_field = cf_it++) {
+ if (new_field->stored_in_db()) {
+ continue;
+ }
+
+ /* Found a new added virtual column. */
+ if (!new_field->field) {
+ has_new = true;
+ continue;
+ }
+
+ /* If there's any old virtual column
+ after the new added virtual column,
+ order must be changed. */
+ if (has_new) {
+ return(false);
+ }
+ }
+ }
+
+ /* directly return true if ALTER_VIRTUAL_COLUMN_ORDER is not on */
+ if (!(ha_alter_info->handler_flags
+ & Alter_inplace_info::ALTER_VIRTUAL_COLUMN_ORDER)) {
+ return(true);
+ }
+
+ for (ulint i = 0; i < table->s->fields; i++) {
+ Field* field = table->field[i];
+
+ if (field->stored_in_db()) {
+ continue;
+ }
+
+ if (field->flags & FIELD_IS_DROPPED) {
+ continue;
+ }
+
+ /* Now check if the next virtual column in altered table
+ matches this column */
+ while (j < altered_table->s->fields) {
+ Field* new_field = altered_table->s->field[j];
+
+ if (new_field->stored_in_db()) {
+ j++;
+ continue;
+ }
+
+ if (my_strcasecmp(system_charset_info,
+ field->field_name,
+ new_field->field_name) != 0) {
+ /* different column */
+ return(false);
+ } else {
+ j++;
+ break;
+ }
+ }
+
+ if (j > altered_table->s->fields) {
+ /* there should not be less column in new table
+ without them being in drop list */
+ ut_ad(0);
+ return(false);
+ }
+ }
+
+ return(true);
+}
+
/** Check if InnoDB supports a particular alter table in-place
-@param altered_table TABLE object for new version of table.
-@param ha_alter_info Structure describing changes to be done
+@param altered_table TABLE object for new version of table.
+@param ha_alter_info Structure describing changes to be done
by ALTER TABLE and holding data used during in-place alter.
-@retval HA_ALTER_INPLACE_NOT_SUPPORTED Not supported
-@retval HA_ALTER_INPLACE_NO_LOCK Supported
+@retval HA_ALTER_INPLACE_NOT_SUPPORTED Not supported
+@retval HA_ALTER_INPLACE_NO_LOCK Supported
@retval HA_ALTER_INPLACE_SHARED_LOCK_AFTER_PREPARE Supported, but requires
lock during main phase and exclusive lock during prepare phase.
-@retval HA_ALTER_INPLACE_NO_LOCK_AFTER_PREPARE Supported, prepare phase
+@retval HA_ALTER_INPLACE_NO_LOCK_AFTER_PREPARE Supported, prepare phase
requires exclusive lock (any transactions that have accessed the table
must commit or roll back first, and no transactions can access the table
while prepare_inplace_alter_table() is executing)
*/
-UNIV_INTERN
+
enum_alter_inplace_result
ha_innobase::check_if_supported_inplace_alter(
/*==========================================*/
@@ -271,18 +597,21 @@ ha_innobase::check_if_supported_inplace_alter(
{
DBUG_ENTER("check_if_supported_inplace_alter");
- if (high_level_read_only) {
- ha_alter_info->unsupported_reason =
- innobase_get_err_msg(ER_READ_ONLY_MODE);
+ /* Before 10.2.2 information about virtual columns was not stored in
+ system tables. We need to do a full alter to rebuild proper 10.2.2+
+ metadata with the information about virtual columns */
+ if (omits_virtual_cols(*table_share)) {
DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
- } else if (srv_created_new_raw || srv_force_recovery) {
+ }
+ if (high_level_read_only) {
ha_alter_info->unsupported_reason =
innobase_get_err_msg(ER_READ_ONLY_MODE);
+
DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
}
- if (altered_table->s->stored_fields > REC_MAX_N_USER_FIELDS) {
+ if (altered_table->s->fields > REC_MAX_N_USER_FIELDS) {
/* Deny the inplace ALTER TABLE. MySQL will try to
re-create the table and ha_innobase::create() will
return an error too. This is how we effectively
@@ -293,7 +622,6 @@ ha_innobase::check_if_supported_inplace_alter(
}
update_thd();
- trx_search_latch_release_if_reserved(prebuilt->trx);
if (ha_alter_info->handler_flags
& ~(INNOBASE_INPLACE_IGNORE
@@ -301,18 +629,17 @@ ha_innobase::check_if_supported_inplace_alter(
| INNOBASE_ALTER_REBUILD)) {
if (ha_alter_info->handler_flags
- & (Alter_inplace_info::ALTER_COLUMN_EQUAL_PACK_LENGTH
- | Alter_inplace_info::ALTER_COLUMN_TYPE))
+ & Alter_inplace_info::ALTER_STORED_COLUMN_TYPE) {
ha_alter_info->unsupported_reason = innobase_get_err_msg(
ER_ALTER_OPERATION_NOT_SUPPORTED_REASON_COLUMN_TYPE);
+ }
DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
}
/* Only support online add foreign key constraint when
check_foreigns is turned off */
- if ((ha_alter_info->handler_flags
- & Alter_inplace_info::ADD_FOREIGN_KEY)
- && prebuilt->trx->check_foreigns) {
+ if ((ha_alter_info->handler_flags & Alter_inplace_info::ADD_FOREIGN_KEY)
+ && m_prebuilt->trx->check_foreigns) {
ha_alter_info->unsupported_reason = innobase_get_err_msg(
ER_ALTER_OPERATION_NOT_SUPPORTED_REASON_FK_CHECK);
DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
@@ -328,7 +655,7 @@ ha_innobase::check_if_supported_inplace_alter(
NULL to a NOT NULL value. */
if ((ha_alter_info->handler_flags
& Alter_inplace_info::ALTER_COLUMN_NOT_NULLABLE)
- && (ha_alter_info->ignore || !thd_is_strict_mode(user_thd))) {
+ && (ha_alter_info->ignore || !thd_is_strict_mode(m_user_thd))) {
ha_alter_info->unsupported_reason = innobase_get_err_msg(
ER_ALTER_OPERATION_NOT_SUPPORTED_REASON_NOT_NULL);
DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
@@ -360,14 +687,15 @@ ha_innobase::check_if_supported_inplace_alter(
/* If a column change from NOT NULL to NULL,
and there's a implict pk on this column. the
table should be rebuild. The change should
- only go through the "Copy" method.*/
+ only go through the "Copy" method. */
if ((ha_alter_info->handler_flags
& Alter_inplace_info::ALTER_COLUMN_NULLABLE)) {
- uint primary_key = altered_table->s->primary_key;
+ const uint my_primary_key = altered_table->s->primary_key;
- /* See if MYSQL table has no pk but we do.*/
- if (UNIV_UNLIKELY(primary_key >= MAX_KEY)
- && !row_table_got_default_clust_index(prebuilt->table)) {
+ /* See if MYSQL table has no pk but we do. */
+ if (UNIV_UNLIKELY(my_primary_key >= MAX_KEY)
+ && !dict_index_is_auto_gen_clust(
+ dict_table_get_first_index(m_prebuilt->table))) {
ha_alter_info->unsupported_reason = innobase_get_err_msg(
ER_PRIMARY_CANT_HAVE_NULL);
DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
@@ -379,18 +707,21 @@ ha_innobase::check_if_supported_inplace_alter(
codes for certain types. In some cases the signed/unsigned bit was
generated differently too.
- Online ALTER would change the mtype/unsigned_flag (to what the
+ Inplace ALTER would change the mtype/unsigned_flag (to what the
current code generates) without changing the underlying data
represenation, and it might result in data corruption.
- Don't do online ALTER if mtype/unsigned_flag are wrong.
+ Don't do inplace ALTER if mtype/unsigned_flag are wrong.
*/
for (ulint i = 0, icol= 0; i < table->s->fields; i++) {
const Field* field = table->field[i];
- const dict_col_t* col = dict_table_get_nth_col(prebuilt->table, icol);
+ const dict_col_t* col = dict_table_get_nth_col(m_prebuilt->table, icol);
ulint unsigned_flag;
- if (!field->stored_in_db)
+
+ if (!field->stored_in_db()) {
continue;
+ }
+
icol++;
if (col->mtype != get_innobase_type_from_mysql_type(&unsigned_flag, field)) {
@@ -404,49 +735,61 @@ ha_innobase::check_if_supported_inplace_alter(
}
}
- /* If we have column that has changed from NULL -> NOT NULL
- and column default has changed we need to do additional
- check. */
- if ((ha_alter_info->handler_flags
- & Alter_inplace_info::ALTER_COLUMN_NOT_NULLABLE) &&
- (ha_alter_info->handler_flags
- & Alter_inplace_info::ALTER_COLUMN_DEFAULT)) {
- Alter_info *alter_info = ha_alter_info->alter_info;
- List_iterator<Create_field> def_it(alter_info->create_list);
- Create_field *def;
- while ((def=def_it++)) {
-
- /* If this is first column definition whose SQL type
- is TIMESTAMP and it is defined as NOT NULL and
- it has either constant default or function default
- we must use "Copy" method. */
- if (is_timestamp_type(def->sql_type)) {
- if ((def->flags & NOT_NULL_FLAG) != 0 && // NOT NULL
- (def->def != NULL || // constant default ?
- def->unireg_check != Field::NONE)) { // function default
- ha_alter_info->unsupported_reason = innobase_get_err_msg(
- ER_ALTER_OPERATION_NOT_SUPPORTED_REASON_NOT_NULL);
- DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
- }
- break;
- }
- }
- }
-
- ulint n_indexes = UT_LIST_GET_LEN((prebuilt->table)->indexes);
+ ulint n_indexes = UT_LIST_GET_LEN((m_prebuilt->table)->indexes);
/* If InnoDB dictionary and MySQL frm file are not consistent
use "Copy" method. */
- if (prebuilt->table->dict_frm_mismatch) {
+ if (m_prebuilt->table->dict_frm_mismatch) {
ha_alter_info->unsupported_reason = innobase_get_err_msg(
ER_NO_SUCH_INDEX);
- ib_push_frm_error(user_thd, prebuilt->table, altered_table,
+ ib_push_frm_error(m_user_thd, m_prebuilt->table, altered_table,
n_indexes, true);
DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
}
+ bool add_drop_v_cols = false;
+
+ /* If there is add or drop virtual columns, we will support operations
+ with these 2 options alone with inplace interface for now */
+
+ if (ha_alter_info->handler_flags
+ & (Alter_inplace_info::ADD_VIRTUAL_COLUMN
+ | Alter_inplace_info::DROP_VIRTUAL_COLUMN
+ | Alter_inplace_info::ALTER_VIRTUAL_COLUMN_ORDER)) {
+ ulonglong flags = ha_alter_info->handler_flags;
+
+ /* TODO: uncomment the flags below, once we start to
+ support them */
+
+ flags &= ~(Alter_inplace_info::ADD_VIRTUAL_COLUMN
+ | Alter_inplace_info::DROP_VIRTUAL_COLUMN
+ | Alter_inplace_info::ALTER_VIRTUAL_COLUMN_ORDER
+ | Alter_inplace_info::ALTER_VIRTUAL_GCOL_EXPR
+ | Alter_inplace_info::ALTER_COLUMN_VCOL
+ /*
+ | Alter_inplace_info::ADD_STORED_BASE_COLUMN
+ | Alter_inplace_info::DROP_STORED_COLUMN
+ | Alter_inplace_info::ALTER_STORED_COLUMN_ORDER
+ | Alter_inplace_info::ADD_UNIQUE_INDEX
+ */
+ | Alter_inplace_info::ADD_INDEX
+ | Alter_inplace_info::DROP_INDEX);
+
+ if (flags != 0
+ || IF_PARTITIONING((altered_table->s->partition_info_str
+ && altered_table->s->partition_info_str_len), 0)
+ || (!check_v_col_in_order(
+ this->table, altered_table, ha_alter_info))) {
+ ha_alter_info->unsupported_reason =
+ MSG_UNSUPPORTED_ALTER_ONLINE_ON_VIRTUAL_COLUMN;
+ DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
+ }
+
+ add_drop_v_cols = true;
+ }
+
/* We should be able to do the operation in-place.
See if we can do it online (LOCK=NONE). */
bool online = true;
@@ -459,12 +802,24 @@ ha_innobase::check_if_supported_inplace_alter(
new_key < ha_alter_info->key_info_buffer
+ ha_alter_info->key_count;
new_key++) {
+
+ /* Do not support adding/droping a virtual column, while
+ there is a table rebuild caused by adding a new FTS_DOC_ID */
+ if ((new_key->flags & HA_FULLTEXT) && add_drop_v_cols
+ && !DICT_TF2_FLAG_IS_SET(m_prebuilt->table,
+ DICT_TF2_FTS_HAS_DOC_ID)) {
+ ha_alter_info->unsupported_reason =
+ MSG_UNSUPPORTED_ALTER_ONLINE_ON_VIRTUAL_COLUMN;
+ DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
+ }
+
for (KEY_PART_INFO* key_part = new_key->key_part;
key_part < new_key->key_part + new_key->user_defined_key_parts;
key_part++) {
const Create_field* new_field;
- DBUG_ASSERT(key_part->fieldnr < altered_table->s->fields);
+ DBUG_ASSERT(key_part->fieldnr
+ < altered_table->s->fields);
cf_it.rewind();
for (uint fieldnr = 0; (new_field = cf_it++);
@@ -478,10 +833,11 @@ ha_innobase::check_if_supported_inplace_alter(
key_part->field = altered_table->field[
key_part->fieldnr];
+
/* In some special cases InnoDB emits "false"
duplicate key errors with NULL key values. Let
us play safe and ensure that we can correctly
- print key values even in such cases .*/
+ print key values even in such cases. */
key_part->null_offset = key_part->field->null_offset();
key_part->null_bit = key_part->field->null_bit;
@@ -496,7 +852,7 @@ ha_innobase::check_if_supported_inplace_alter(
/* We cannot replace a hidden FTS_DOC_ID
with a user-visible FTS_DOC_ID. */
- if (prebuilt->table->fts
+ if (m_prebuilt->table->fts
&& innobase_fulltext_exist(altered_table)
&& !my_strcasecmp(
system_charset_info,
@@ -521,15 +877,32 @@ ha_innobase::check_if_supported_inplace_alter(
ER_ALTER_OPERATION_NOT_SUPPORTED_REASON_AUTOINC);
online = false;
}
+
+ if (!key_part->field->stored_in_db()) {
+ /* Do not support adding index on newly added
+ virtual column, while there is also a drop
+ virtual column in the same clause */
+ if (ha_alter_info->handler_flags
+ & Alter_inplace_info::DROP_VIRTUAL_COLUMN) {
+ ha_alter_info->unsupported_reason =
+ MSG_UNSUPPORTED_ALTER_ONLINE_ON_VIRTUAL_COLUMN;
+
+ DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
+ }
+
+ ha_alter_info->unsupported_reason =
+ MSG_UNSUPPORTED_ALTER_ONLINE_ON_VIRTUAL_COLUMN;
+ online = false;
+ }
}
}
- DBUG_ASSERT(!prebuilt->table->fts || prebuilt->table->fts->doc_col
- <= table->s->stored_fields);
- DBUG_ASSERT(!prebuilt->table->fts || prebuilt->table->fts->doc_col
- < dict_table_get_n_user_cols(prebuilt->table));
+ DBUG_ASSERT(!m_prebuilt->table->fts || m_prebuilt->table->fts->doc_col
+ <= table->s->fields);
+ DBUG_ASSERT(!m_prebuilt->table->fts || m_prebuilt->table->fts->doc_col
+ < dict_table_get_n_user_cols(m_prebuilt->table));
- if (prebuilt->table->fts
+ if (m_prebuilt->table->fts
&& innobase_fulltext_exist(altered_table)) {
/* FULLTEXT indexes are supposed to remain. */
/* Disallow DROP INDEX FTS_DOC_ID_INDEX */
@@ -566,7 +939,7 @@ ha_innobase::check_if_supported_inplace_alter(
}
}
- prebuilt->trx->will_lock++;
+ m_prebuilt->trx->will_lock++;
if (!online) {
/* We already determined that only a non-locking
@@ -574,25 +947,45 @@ ha_innobase::check_if_supported_inplace_alter(
} else if (((ha_alter_info->handler_flags
& Alter_inplace_info::ADD_PK_INDEX)
|| innobase_need_rebuild(ha_alter_info, table))
- && (innobase_fulltext_exist(altered_table))) {
+ && (innobase_fulltext_exist(altered_table)
+ || innobase_spatial_exist(altered_table)
+ || innobase_indexed_virtual_exist(altered_table))) {
/* Refuse to rebuild the table online, if
- fulltext indexes are to survive the rebuild. */
+ FULLTEXT OR SPATIAL indexes or indexed virtual columns
+ are to survive the rebuild. */
online = false;
/* If the table already contains fulltext indexes,
refuse to rebuild the table natively altogether. */
- if (prebuilt->table->fts) {
+ if (m_prebuilt->table->fts) {
ha_alter_info->unsupported_reason = innobase_get_err_msg(
ER_INNODB_FT_LIMIT);
DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
}
- ha_alter_info->unsupported_reason = innobase_get_err_msg(
- ER_ALTER_OPERATION_NOT_SUPPORTED_REASON_FTS);
+
+ if (innobase_spatial_exist(altered_table)) {
+ ha_alter_info->unsupported_reason =
+ innobase_get_err_msg(
+ ER_ALTER_OPERATION_NOT_SUPPORTED_REASON_GIS);
+ } else if (!innobase_fulltext_exist(altered_table)) {
+ /* MDEV-14341 FIXME: Remove this limitation. */
+ ha_alter_info->unsupported_reason =
+ "online rebuild with indexed virtual columns";
+ } else {
+ ha_alter_info->unsupported_reason =
+ innobase_get_err_msg(
+ ER_ALTER_OPERATION_NOT_SUPPORTED_REASON_FTS);
+ }
} else if ((ha_alter_info->handler_flags
& Alter_inplace_info::ADD_INDEX)) {
- /* Building a full-text index requires a lock.
- We could do without a lock if the table already contains
- an FTS_DOC_ID column, but in that case we would have
- to apply the modification log to the full-text indexes. */
+ /* ADD FULLTEXT|SPATIAL INDEX requires a lock.
+
+ We could do ADD FULLTEXT INDEX without a lock if the
+ table already contains an FTS_DOC_ID column, but in
+ that case we would have to apply the modification log
+ to the full-text indexes.
+
+ We could also do ADD SPATIAL INDEX by implementing
+ row_log_apply() for it. */
for (uint i = 0; i < ha_alter_info->index_add_count; i++) {
const KEY* key =
@@ -609,9 +1002,109 @@ ha_innobase::check_if_supported_inplace_alter(
online = false;
break;
}
+ if (key->flags & HA_SPATIAL) {
+ ha_alter_info->unsupported_reason = innobase_get_err_msg(
+ ER_ALTER_OPERATION_NOT_SUPPORTED_REASON_GIS);
+ online = false;
+ break;
+ }
}
}
+ /* When changing a NULL column to NOT NULL and specifying a
+ DEFAULT value, ensure that the DEFAULT expression is a constant.
+ Also, in ADD COLUMN, for now we only support a
+ constant DEFAULT expression. */
+ cf_it.rewind();
+ Field **af = altered_table->field;
+
+ while (Create_field* cf = cf_it++) {
+ DBUG_ASSERT(cf->field
+ || (ha_alter_info->handler_flags
+ & Alter_inplace_info::ADD_COLUMN));
+
+ if (const Field* f = cf->field) {
+ /* This could be changing an existing column
+ from NULL to NOT NULL. */
+ switch ((*af)->type()) {
+ case MYSQL_TYPE_TIMESTAMP:
+ case MYSQL_TYPE_TIMESTAMP2:
+ /* Inserting NULL into a TIMESTAMP column
+ would cause the DEFAULT value to be
+ replaced. Ensure that the DEFAULT
+ expression is not changing during
+ ALTER TABLE. */
+ if (!f->real_maybe_null()
+ || (*af)->real_maybe_null()) {
+ /* The column was NOT NULL, or it
+ will allow NULL after ALTER TABLE. */
+ goto next_column;
+ }
+
+ if (!(*af)->default_value
+ && (*af)->is_real_null()) {
+ /* No DEFAULT value is
+ specified. We can report
+ errors for any NULL values for
+ the TIMESTAMP.
+
+ FIXME: Allow any DEFAULT
+ expression whose value does
+ not change during ALTER TABLE.
+ This would require a fix in
+ row_merge_read_clustered_index()
+ to try to replace the DEFAULT
+ value before reporting
+ DB_INVALID_NULL. */
+ goto next_column;
+ }
+ break;
+ default:
+ /* For any other data type, NULL
+ values are not converted.
+ (An AUTO_INCREMENT attribute cannot
+ be introduced to a column with
+ ALGORITHM=INPLACE.) */
+ ut_ad((MTYP_TYPENR((*af)->unireg_check)
+ == Field::NEXT_NUMBER)
+ == (MTYP_TYPENR(f->unireg_check)
+ == Field::NEXT_NUMBER));
+ goto next_column;
+ }
+
+ ha_alter_info->unsupported_reason
+ = innobase_get_err_msg(
+ ER_ALTER_OPERATION_NOT_SUPPORTED_REASON_NOT_NULL);
+ } else if (!(*af)->default_value
+ || !((*af)->default_value->flags
+ & ~(VCOL_SESSION_FUNC | VCOL_TIME_FUNC))) {
+ /* The added NOT NULL column lacks a DEFAULT value,
+ or the DEFAULT is the same for all rows.
+ (Time functions, such as CURRENT_TIMESTAMP(),
+ are evaluated from a timestamp that is assigned
+ at the start of the statement. Session
+ functions, such as USER(), always evaluate the
+ same within a statement.) */
+
+ /* Compute the DEFAULT values of non-constant columns
+ (VCOL_SESSION_FUNC | VCOL_TIME_FUNC). */
+ switch ((*af)->set_default()) {
+ case 0: /* OK */
+ case 3: /* DATETIME to TIME or DATE conversion */
+ goto next_column;
+ case -1: /* OOM, or GEOMETRY type mismatch */
+ case 1: /* A number adjusted to the min/max value */
+ case 2: /* String truncation, or conversion problem */
+ break;
+ }
+ }
+
+ DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
+
+next_column:
+ af++;
+ }
+
DBUG_RETURN(online
? HA_ALTER_INPLACE_NO_LOCK_AFTER_PREPARE
: HA_ALTER_INPLACE_SHARED_LOCK_AFTER_PREPARE);
@@ -652,12 +1145,12 @@ innobase_init_foreign(
same MySQL 'database' as the table itself. We store the name
to foreign->id. */
- db_len = dict_get_db_name_len(table->name);
+ db_len = dict_get_db_name_len(table->name.m_name);
foreign->id = static_cast<char*>(mem_heap_alloc(
foreign->heap, db_len + strlen(constraint_name) + 2));
- ut_memcpy(foreign->id, table->name, db_len);
+ ut_memcpy(foreign->id, table->name.m_name, db_len);
foreign->id[db_len] = '/';
strcpy(foreign->id + db_len + 1, constraint_name);
@@ -672,7 +1165,7 @@ innobase_init_foreign(
foreign->foreign_table = table;
foreign->foreign_table_name = mem_heap_strdup(
- foreign->heap, table->name);
+ foreign->heap, table->name.m_name);
dict_mem_foreign_table_name_lookup_set(foreign, TRUE);
foreign->foreign_index = index;
@@ -752,6 +1245,7 @@ innobase_set_foreign_key_option(
ut_ad(!foreign->type);
switch (fk_key->delete_opt) {
+ // JAN: TODO: ? MySQL 5.7 used enum fk_option directly from sql_lex.h
case FK_OPTION_NO_ACTION:
case FK_OPTION_RESTRICT:
case FK_OPTION_SET_DEFAULT:
@@ -785,22 +1279,25 @@ innobase_set_foreign_key_option(
/*******************************************************************//**
Check if a foreign key constraint can make use of an index
that is being created.
-@return useable index, or NULL if none found */
+@param[in] col_names column names
+@param[in] n_cols number of columns
+@param[in] keys index information
+@param[in] add indexes being created
+@return useable index, or NULL if none found */
static MY_ATTRIBUTE((nonnull, warn_unused_result))
const KEY*
innobase_find_equiv_index(
-/*======================*/
const char*const* col_names,
- /*!< in: column names */
- uint n_cols, /*!< in: number of columns */
- const KEY* keys, /*!< in: index information */
- const uint* add, /*!< in: indexes being created */
- uint n_add) /*!< in: number of indexes to create */
+ uint n_cols,
+ const KEY* keys,
+ span<uint> add)
{
- for (uint i = 0; i < n_add; i++) {
- const KEY* key = &keys[add[i]];
+ for (span<uint>::iterator it = add.begin(), end = add.end(); it != end;
+ ++it) {
+ const KEY* key = &keys[*it];
- if (key->user_defined_key_parts < n_cols) {
+ if (key->user_defined_key_parts < n_cols
+ || key->flags & HA_SPATIAL) {
no_match:
continue;
}
@@ -810,6 +1307,12 @@ no_match:
uint32 col_len
= key_part.field->pack_length();
+ /* Any index on virtual columns cannot be used
+ for reference constaint */
+ if (!key_part.field->stored_in_db()) {
+ goto no_match;
+ }
+
/* The MySQL pack length contains 1 or 2 bytes
length field for a true VARCHAR. */
@@ -842,7 +1345,7 @@ no_match:
Find an index whose first fields are the columns in the array
in the same order and is not marked for deletion
@return matching index, NULL if not found */
-static MY_ATTRIBUTE((nonnull(1,2,6), warn_unused_result))
+static MY_ATTRIBUTE((nonnull(1,2,5), warn_unused_result))
dict_index_t*
innobase_find_fk_index(
/*===================*/
@@ -852,10 +1355,8 @@ innobase_find_fk_index(
const char** col_names,
/*!< in: column names, or NULL
to use table->col_names */
- dict_index_t** drop_index,
+ span<dict_index_t*> drop_index,
/*!< in: indexes to be dropped */
- ulint n_drop_index,
- /*!< in: size of drop_index[] */
const char** columns,/*!< in: array of column names */
ulint n_cols) /*!< in: number of columns */
{
@@ -864,49 +1365,108 @@ innobase_find_fk_index(
index = dict_table_get_first_index(table);
while (index != NULL) {
- if (!(index->type & DICT_FTS)
- && dict_foreign_qualify_index(
- table, col_names, columns, n_cols,
- index, NULL, true, 0,
- NULL, NULL, NULL)) {
- for (ulint i = 0; i < n_drop_index; i++) {
- if (index == drop_index[i]) {
- /* Skip to-be-dropped indexes. */
- goto next_rec;
- }
- }
-
- return(index);
+ if (dict_foreign_qualify_index(table, col_names, columns,
+ n_cols, index, NULL, true, 0,
+ NULL, NULL, NULL)
+ && std::find(drop_index.begin(), drop_index.end(), index)
+ == drop_index.end()) {
+ return index;
}
-next_rec:
index = dict_table_get_next_index(index);
}
return(NULL);
}
-/*************************************************************//**
-Create InnoDB foreign key structure from MySQL alter_info
+/** Check whether given column is a base of stored column.
+@param[in] col_name column name
+@param[in] table table
+@param[in] s_cols list of stored columns
+@return true if the given column is a base of stored column,else false. */
+static
+bool
+innobase_col_check_fk(
+ const char* col_name,
+ const dict_table_t* table,
+ dict_s_col_list* s_cols)
+{
+ dict_s_col_list::const_iterator it;
+
+ for (it = s_cols->begin(); it != s_cols->end(); ++it) {
+ for (ulint j = it->num_base; j--; ) {
+ if (!strcmp(col_name, dict_table_get_col_name(
+ table, it->base_col[j]->ind))) {
+ return(true);
+ }
+ }
+ }
+
+ return(false);
+}
+
+/** Check whether the foreign key constraint is on base of any stored columns.
+@param[in] foreign Foriegn key constraing information
+@param[in] table table to which the foreign key objects
+to be added
+@param[in] s_cols list of stored column information in the table.
+@return true if yes, otherwise false. */
+static
+bool
+innobase_check_fk_stored(
+ const dict_foreign_t* foreign,
+ const dict_table_t* table,
+ dict_s_col_list* s_cols)
+{
+ ulint type = foreign->type;
+
+ type &= ~(DICT_FOREIGN_ON_DELETE_NO_ACTION
+ | DICT_FOREIGN_ON_UPDATE_NO_ACTION);
+
+ if (type == 0 || s_cols == NULL) {
+ return(false);
+ }
+
+ for (ulint i = 0; i < foreign->n_fields; i++) {
+ if (innobase_col_check_fk(
+ foreign->foreign_col_names[i], table, s_cols)) {
+ return(true);
+ }
+ }
+
+ return(false);
+}
+
+/** Create InnoDB foreign key structure from MySQL alter_info
+@param[in] ha_alter_info alter table info
+@param[in] table_share TABLE_SHARE
+@param[in] table table object
+@param[in] col_names column names, or NULL to use
+table->col_names
+@param[in] drop_index indexes to be dropped
+@param[in] n_drop_index size of drop_index
+@param[out] add_fk foreign constraint added
+@param[out] n_add_fk number of foreign constraints
+added
+@param[in] trx user transaction
+@param[in] s_cols list of stored column information
@retval true if successful
@retval false on error (will call my_error()) */
static MY_ATTRIBUTE((nonnull(1,2,3,7,8), warn_unused_result))
bool
innobase_get_foreign_key_info(
-/*==========================*/
Alter_inplace_info*
- ha_alter_info, /*!< in: alter table info */
+ ha_alter_info,
const TABLE_SHARE*
- table_share, /*!< in: the TABLE_SHARE */
- dict_table_t* table, /*!< in: table */
- const char** col_names, /*!< in: column names, or NULL
- to use table->col_names */
- dict_index_t** drop_index, /*!< in: indexes to be dropped */
- ulint n_drop_index, /*!< in: size of drop_index[] */
- dict_foreign_t**add_fk, /*!< out: foreign constraint added */
- ulint* n_add_fk, /*!< out: number of foreign
- constraints added */
- const trx_t* trx) /*!< in: user transaction */
+ table_share,
+ dict_table_t* table,
+ const char** col_names,
+ dict_index_t** drop_index,
+ ulint n_drop_index,
+ dict_foreign_t**add_fk,
+ ulint* n_add_fk,
+ const trx_t* trx,
+ dict_s_col_list*s_cols)
{
Key* key;
Foreign_key* fk_key;
@@ -915,6 +1475,8 @@ innobase_get_foreign_key_info(
ulint num_fk = 0;
Alter_info* alter_info = ha_alter_info->alter_info;
+ DBUG_ENTER("innobase_get_foreign_key_info");
+
*n_add_fk = 0;
List_iterator<Key> key_iterator(alter_info->key_list);
@@ -957,7 +1519,7 @@ innobase_get_foreign_key_info(
index = innobase_find_fk_index(
ha_alter_info,
table, col_names,
- drop_index, n_drop_index,
+ span<dict_index_t*>(drop_index, n_drop_index),
column_names, i);
/* MySQL would add a index in the creation
@@ -972,8 +1534,8 @@ innobase_get_foreign_key_info(
if (!index && !innobase_find_equiv_index(
column_names, static_cast<uint>(i),
ha_alter_info->key_info_buffer,
- ha_alter_info->index_add_buffer,
- ha_alter_info->index_add_count)) {
+ span<uint>(ha_alter_info->index_add_buffer,
+ ha_alter_info->index_add_count))) {
my_error(
ER_FK_NO_INDEX_CHILD,
MYF(0),
@@ -988,8 +1550,8 @@ innobase_get_foreign_key_info(
add_fk[num_fk] = dict_mem_foreign_create();
-#ifndef __WIN__
- if(fk_key->ref_db.str) {
+#ifndef _WIN32
+ if (fk_key->ref_db.str) {
tablename_to_filename(fk_key->ref_db.str, db_name,
MAX_DATABASE_NAME_LEN);
db_namep = db_name;
@@ -1020,7 +1582,7 @@ innobase_get_foreign_key_info(
mutex_enter(&dict_sys->mutex);
referenced_table_name = dict_get_referenced_table(
- table->name,
+ table->name.m_name,
db_namep,
db_name_len,
tbl_namep,
@@ -1087,7 +1649,7 @@ innobase_get_foreign_key_info(
/* Not possible to add a foreign key without a
referenced column */
mutex_exit(&dict_sys->mutex);
- my_error(ER_CANNOT_ADD_FOREIGN, MYF(0), tbl_namep);
+ my_error(ER_CANNOT_ADD_FOREIGN, MYF(0));
goto err_exit;
}
@@ -1099,9 +1661,9 @@ innobase_get_foreign_key_info(
referenced_column_names, referenced_num_col)) {
mutex_exit(&dict_sys->mutex);
my_error(
- ER_FK_DUP_NAME,
+ ER_DUP_CONSTRAINT_NAME,
MYF(0),
- add_fk[num_fk]->id);
+ "FOREIGN KEY", add_fk[num_fk]->id);
goto err_exit;
}
@@ -1121,12 +1683,21 @@ innobase_get_foreign_key_info(
goto err_exit;
}
+ if (innobase_check_fk_stored(
+ add_fk[num_fk], table, s_cols)) {
+ my_printf_error(
+ HA_ERR_UNSUPPORTED,
+ "Cannot add foreign key on the base column "
+ "of stored column", MYF(0));
+ goto err_exit;
+ }
+
num_fk++;
}
*n_add_fk = num_fk;
- return(true);
+ DBUG_RETURN(true);
err_exit:
for (ulint i = 0; i <= num_fk; i++) {
if (add_fk[i]) {
@@ -1134,7 +1705,7 @@ err_exit:
}
}
- return(false);
+ DBUG_RETURN(false);
}
/*************************************************************//**
@@ -1188,6 +1759,7 @@ innobase_col_to_mysql(
memcpy(dest, data, len);
break;
+ case DATA_GEOMETRY:
case DATA_BLOB:
/* Skip MySQL BLOBs when reporting an erroneous row
during index creation or table rebuild. */
@@ -1227,36 +1799,32 @@ innobase_col_to_mysql(
/*************************************************************//**
Copies an InnoDB record to table->record[0]. */
-UNIV_INTERN
void
innobase_rec_to_mysql(
/*==================*/
struct TABLE* table, /*!< in/out: MySQL table */
const rec_t* rec, /*!< in: record */
const dict_index_t* index, /*!< in: index */
- const ulint* offsets)/*!< in: rec_get_offsets(
+ const offset_t* offsets)/*!< in: rec_get_offsets(
rec, index, ...) */
{
- uint n_fields = table->s->stored_fields;
- uint sql_idx = 0;
+ uint n_fields = table->s->fields;
ut_ad(n_fields == dict_table_get_n_user_cols(index->table)
- !!(DICT_TF2_FLAG_IS_SET(index->table,
DICT_TF2_FTS_HAS_DOC_ID)));
- for (uint i = 0; i < n_fields; i++, sql_idx++) {
- Field* field;
+ for (uint i = 0; i < n_fields; i++) {
+ Field* field = table->field[i];
ulint ipos;
ulint ilen;
const uchar* ifield;
-
- while (!((field= table->field[sql_idx])->stored_in_db))
- sql_idx++;
+ ulint prefix_col;
field->reset();
- ipos = dict_index_get_nth_col_or_prefix_pos(index, i, TRUE,
- NULL);
+ ipos = dict_index_get_nth_col_or_prefix_pos(
+ index, i, true, false, &prefix_col);
if (ipos == ULINT_UNDEFINED
|| rec_offs_nth_extern(offsets, ipos)) {
@@ -1283,8 +1851,9 @@ null_field:
}
/*************************************************************//**
-Copies an InnoDB index entry to table->record[0]. */
-UNIV_INTERN
+Copies an InnoDB index entry to table->record[0].
+This is used in preparation for print_keydup_error() from
+inline add index */
void
innobase_fields_to_mysql(
/*=====================*/
@@ -1292,24 +1861,26 @@ innobase_fields_to_mysql(
const dict_index_t* index, /*!< in: InnoDB index */
const dfield_t* fields) /*!< in: InnoDB index fields */
{
- uint n_fields = table->s->stored_fields;
- uint sql_idx = 0;
+ uint n_fields = table->s->fields;
+ ulint num_v = 0;
ut_ad(n_fields == dict_table_get_n_user_cols(index->table)
+ + dict_table_get_n_v_cols(index->table)
- !!(DICT_TF2_FLAG_IS_SET(index->table,
DICT_TF2_FTS_HAS_DOC_ID)));
- for (uint i = 0; i < n_fields; i++, sql_idx++) {
- Field* field;
+ for (uint i = 0; i < n_fields; i++) {
+ Field* field = table->field[i];
ulint ipos;
-
- while (!((field= table->field[sql_idx])->stored_in_db))
- sql_idx++;
+ ulint prefix_col;
field->reset();
- ipos = dict_index_get_nth_col_or_prefix_pos(index, i, TRUE,
- NULL);
+ const bool is_v = !field->stored_in_db();
+ const ulint col_n = is_v ? num_v++ : i - num_v;
+
+ ipos = dict_index_get_nth_col_or_prefix_pos(
+ index, col_n, true, is_v, &prefix_col);
if (ipos == ULINT_UNDEFINED
|| dfield_is_ext(&fields[ipos])
@@ -1331,8 +1902,9 @@ innobase_fields_to_mysql(
}
/*************************************************************//**
-Copies an InnoDB row to table->record[0]. */
-UNIV_INTERN
+Copies an InnoDB row to table->record[0].
+This is used in preparation for print_keydup_error() from
+row_log_table_apply() */
void
innobase_row_to_mysql(
/*==================*/
@@ -1340,39 +1912,49 @@ innobase_row_to_mysql(
const dict_table_t* itab, /*!< in: InnoDB table */
const dtuple_t* row) /*!< in: InnoDB row */
{
- uint n_fields = table->s->stored_fields;
- uint sql_idx = 0;
+ uint n_fields = table->s->fields;
+ ulint num_v = 0;
/* The InnoDB row may contain an extra FTS_DOC_ID column at the end. */
ut_ad(row->n_fields == dict_table_get_n_cols(itab));
ut_ad(n_fields == row->n_fields - DATA_N_SYS_COLS
+ + dict_table_get_n_v_cols(itab)
- !!(DICT_TF2_FLAG_IS_SET(itab, DICT_TF2_FTS_HAS_DOC_ID)));
- for (uint i = 0; i < n_fields; i++, sql_idx++) {
- Field* field;
- const dfield_t* df = dtuple_get_nth_field(row, i);
-
- while (!((field= table->field[sql_idx])->stored_in_db))
- sql_idx++;
+ for (uint i = 0; i < n_fields; i++) {
+ Field* field = table->field[i];
field->reset();
+ if (!field->stored_in_db()) {
+ /* Virtual column are not stored in InnoDB table, so
+ skip it */
+ num_v++;
+ continue;
+ }
+
+ const dfield_t* df = dtuple_get_nth_field(row, i - num_v);
+
if (dfield_is_ext(df) || dfield_is_null(df)) {
field->set_null();
} else {
field->set_notnull();
innobase_col_to_mysql(
- dict_table_get_nth_col(itab, i),
+ dict_table_get_nth_col(itab, i - num_v),
static_cast<const uchar*>(dfield_get_data(df)),
dfield_get_len(df), field);
}
}
+ if (table->vfield) {
+ my_bitmap_map* old_vcol_set = tmp_use_all_columns(table, table->vcol_set);
+ table->update_virtual_fields(table->file, VCOL_UPDATE_FOR_READ);
+ tmp_restore_column_map(table->vcol_set, old_vcol_set);
+ }
}
/*************************************************************//**
Resets table->record[0]. */
-UNIV_INTERN
void
innobase_rec_reset(
/*===============*/
@@ -1388,7 +1970,7 @@ innobase_rec_reset(
/*******************************************************************//**
This function checks that index keys are sensible.
-@return 0 or error number */
+@return 0 or error number */
static MY_ATTRIBUTE((nonnull, warn_unused_result))
int
innobase_check_index_keys(
@@ -1425,11 +2007,24 @@ innobase_check_index_keys(
for (index = dict_table_get_first_index(innodb_table);
index; index = dict_table_get_next_index(index)) {
- if (!strcmp(key.name, index->name)) {
+ if (index->is_committed()
+ && !strcmp(key.name, index->name)) {
break;
}
}
+ /* Now we are in a situation where we have "ADD INDEX x"
+ and an index by the same name already exists. We have 4
+ possible cases:
+ 1. No further clauses for an index x are given. Should reject
+ the operation.
+ 2. "DROP INDEX x" is given. Should allow the operation.
+ 3. "RENAME INDEX x TO y" is given. Should allow the operation.
+ 4. "DROP INDEX x, RENAME INDEX x TO y" is given. Should allow
+ the operation, since no name clash occurs. In this particular
+ case MySQL cancels the operation without calling InnoDB
+ methods. */
+
if (index) {
/* If a key by the same name is being created and
dropped, the name clash is OK. */
@@ -1443,7 +2038,9 @@ innobase_check_index_keys(
}
}
- my_error(ER_WRONG_NAME_FOR_INDEX, MYF(0), key.name);
+
+ my_error(ER_WRONG_NAME_FOR_INDEX, MYF(0),
+ key.name);
return(ER_WRONG_NAME_FOR_INDEX);
}
@@ -1525,31 +2122,30 @@ innobase_create_index_field_def(
const Field* field;
ibool is_unsigned;
ulint col_type;
- ulint innodb_fieldnr=0;
+ ulint num_v = 0;
DBUG_ENTER("innobase_create_index_field_def");
- /* Virtual columns are not stored in InnoDB data dictionary, thus
- if there is virtual columns we need to skip them to find the
- correct field. */
- for(ulint i = 0; i < key_part->fieldnr; i++) {
- const Field* table_field = altered_table->field[i];
- if (!table_field->stored_in_db) {
- continue;
+ field = new_clustered
+ ? altered_table->field[key_part->fieldnr]
+ : key_part->field;
+
+ for (ulint i = 0; i < key_part->fieldnr; i++) {
+ if (!altered_table->field[i]->stored_in_db()) {
+ num_v++;
}
- innodb_fieldnr++;
}
- field = new_clustered ?
- altered_table->field[key_part->fieldnr]
- : key_part->field;
-
- ut_a(field);
+ col_type = get_innobase_type_from_mysql_type(
+ &is_unsigned, field);
- index_field->col_no = innodb_fieldnr;
- col_type = get_innobase_type_from_mysql_type(&is_unsigned, field);
+ if ((index_field->is_v_col = !field->stored_in_db())) {
+ index_field->col_no = num_v;
+ } else {
+ index_field->col_no = key_part->fieldnr - num_v;
+ }
- if (DATA_BLOB == col_type
+ if (DATA_LARGE_MTYPE(col_type)
|| (key_part->length < field->pack_length()
&& field->type() != MYSQL_TYPE_VARCHAR)
|| (field->type() == MYSQL_TYPE_VARCHAR
@@ -1564,73 +2160,110 @@ innobase_create_index_field_def(
DBUG_VOID_RETURN;
}
-/*******************************************************************//**
-Create index definition for key */
+/** Create index definition for key
+@param[in] altered_table MySQL table that is being altered
+@param[in] keys key definitions
+@param[in] key_number MySQL key number
+@param[in] new_clustered true if generating a new clustered
+index on the table
+@param[in] key_clustered true if this is the new clustered index
+@param[out] index index definition
+@param[in] heap heap where memory is allocated */
static MY_ATTRIBUTE((nonnull))
void
innobase_create_index_def(
-/*======================*/
- const TABLE* altered_table, /*!< in: MySQL table that is
- being altered */
- const KEY* keys, /*!< in: key definitions */
- ulint key_number, /*!< in: MySQL key number */
- bool new_clustered, /*!< in: true if generating
- a new clustered index
- on the table */
- bool key_clustered, /*!< in: true if this is
- the new clustered index */
- index_def_t* index, /*!< out: index definition */
- mem_heap_t* heap) /*!< in: heap where memory
- is allocated */
+ const TABLE* altered_table,
+ const KEY* keys,
+ ulint key_number,
+ bool new_clustered,
+ bool key_clustered,
+ index_def_t* index,
+ mem_heap_t* heap)
{
const KEY* key = &keys[key_number];
ulint i;
- ulint len;
ulint n_fields = key->user_defined_key_parts;
- char* index_name;
DBUG_ENTER("innobase_create_index_def");
DBUG_ASSERT(!key_clustered || new_clustered);
index->fields = static_cast<index_field_t*>(
mem_heap_alloc(heap, n_fields * sizeof *index->fields));
- index->ind_type = 0;
+
+ index->parser = NULL;
index->key_number = key_number;
index->n_fields = n_fields;
- len = strlen(key->name) + 1;
- index->name = index_name = static_cast<char*>(
- mem_heap_alloc(heap, len + !new_clustered));
-
- if (!new_clustered) {
- *index_name++ = TEMP_INDEX_PREFIX;
- }
-
- memcpy(index_name, key->name, len);
-
- if (key->flags & HA_NOSAME) {
- index->ind_type |= DICT_UNIQUE;
- }
+ index->name = mem_heap_strdup(heap, key->name);
+ index->rebuild = new_clustered;
if (key_clustered) {
- DBUG_ASSERT(!(key->flags & HA_FULLTEXT));
+ DBUG_ASSERT(!(key->flags & (HA_FULLTEXT | HA_SPATIAL)));
DBUG_ASSERT(key->flags & HA_NOSAME);
- index->ind_type |= DICT_CLUSTERED;
+ index->ind_type = DICT_CLUSTERED | DICT_UNIQUE;
} else if (key->flags & HA_FULLTEXT) {
+ DBUG_ASSERT(!(key->flags & (HA_SPATIAL | HA_NOSAME)));
DBUG_ASSERT(!(key->flags & HA_KEYFLAG_MASK
& ~(HA_FULLTEXT
| HA_PACK_KEY
| HA_BINARY_PACK_KEY)));
+ index->ind_type = DICT_FTS;
+
+ /* Note: key->parser is only parser name,
+ we need to get parser from altered_table instead */
+
+ if (key->flags & HA_USES_PARSER) {
+ for (ulint j = 0; j < altered_table->s->keys; j++) {
+ if (ut_strcmp(altered_table->key_info[j].name,
+ key->name) == 0) {
+ ut_ad(altered_table->key_info[j].flags
+ & HA_USES_PARSER);
+
+ plugin_ref parser =
+ altered_table->key_info[j].parser;
+ index->parser =
+ static_cast<st_mysql_ftparser*>(
+ plugin_decl(parser)->info);
+
+ break;
+ }
+ }
+
+ DBUG_EXECUTE_IF("fts_instrument_use_default_parser",
+ index->parser = &fts_default_parser;);
+ ut_ad(index->parser);
+ }
+ } else if (key->flags & HA_SPATIAL) {
DBUG_ASSERT(!(key->flags & HA_NOSAME));
- DBUG_ASSERT(!index->ind_type);
- index->ind_type |= DICT_FTS;
+ index->ind_type = DICT_SPATIAL;
+ ut_ad(n_fields == 1);
+ ulint num_v = 0;
+
+ /* Need to count the virtual fields before this spatial
+ indexed field */
+ for (ulint i = 0; i < key->key_part->fieldnr; i++) {
+ num_v += !altered_table->field[i]->stored_in_db();
+ }
+ index->fields[0].col_no = key->key_part[0].fieldnr - num_v;
+ index->fields[0].prefix_len = 0;
+ index->fields[0].is_v_col = false;
+
+ /* Currently, the spatial index cannot be created
+ on virtual columns. It is blocked in the SQL layer. */
+ DBUG_ASSERT(key->key_part[0].field->stored_in_db());
+ } else {
+ index->ind_type = (key->flags & HA_NOSAME) ? DICT_UNIQUE : 0;
}
- for (i = 0; i < n_fields; i++) {
- innobase_create_index_field_def(
- new_clustered,
- altered_table,
- &key->key_part[i],
- &index->fields[i]);
+ if (!(key->flags & HA_SPATIAL)) {
+ for (i = 0; i < n_fields; i++) {
+ innobase_create_index_field_def(
+ new_clustered, altered_table,
+ &key->key_part[i], &index->fields[i]);
+
+ if (index->fields[i].is_v_col) {
+ index->ind_type |= DICT_VIRTUAL;
+ }
+ }
}
DBUG_VOID_RETURN;
@@ -1648,22 +2281,26 @@ innobase_fts_check_doc_id_col(
const TABLE* altered_table,
/*!< in: MySQL table with
fulltext index */
- ulint* fts_doc_col_no)
+ ulint* fts_doc_col_no,
/*!< out: The column number for
Doc ID, or ULINT_UNDEFINED
if it is of wrong type */
+ ulint* num_v) /*!< out: number of virtual column */
{
*fts_doc_col_no = ULINT_UNDEFINED;
- const uint n_cols = altered_table->s->stored_fields;
- uint sql_idx = 0;
- uint i;
+ const uint n_cols = altered_table->s->fields;
+ ulint i;
+
+ *num_v = 0;
+
+ for (i = 0; i < n_cols; i++) {
+ const Field* field = altered_table->field[i];
+
+ if (!field->stored_in_db()) {
+ (*num_v)++;
+ }
- for (i = 0; i < n_cols; i++, sql_idx++) {
- const Field* field;
- while (!((field= altered_table->field[sql_idx])->
- stored_in_db))
- sql_idx++;
if (my_strcasecmp(system_charset_info,
field->field_name, FTS_DOC_ID_COL_NAME)) {
continue;
@@ -1675,11 +2312,12 @@ innobase_fts_check_doc_id_col(
} else if (field->type() != MYSQL_TYPE_LONGLONG
|| field->pack_length() != 8
|| field->real_maybe_null()
- || !(field->flags & UNSIGNED_FLAG)) {
+ || !(field->flags & UNSIGNED_FLAG)
+ || !field->stored_in_db()) {
my_error(ER_INNODB_FT_WRONG_DOCID_COLUMN, MYF(0),
field->field_name);
} else {
- *fts_doc_col_no = i;
+ *fts_doc_col_no = i - *num_v;
}
return(true);
@@ -1689,6 +2327,9 @@ innobase_fts_check_doc_id_col(
return(false);
}
+ /* Not to count the virtual columns */
+ i -= *num_v;
+
for (; i + DATA_N_SYS_COLS < (uint) table->n_cols; i++) {
const char* name = dict_table_get_col_name(table, i);
@@ -1717,8 +2358,7 @@ innobase_fts_check_doc_id_col(
/*******************************************************************//**
Check whether the table has a unique index with FTS_DOC_ID_INDEX_NAME
on the Doc ID column.
-@return the status of the FTS_DOC_ID index */
-UNIV_INTERN
+@return the status of the FTS_DOC_ID index */
enum fts_doc_id_index_enum
innobase_fts_check_doc_id_index(
/*============================*/
@@ -1787,7 +2427,8 @@ innobase_fts_check_doc_id_index(
if (strcmp(field->name, FTS_DOC_ID_COL_NAME) == 0
&& field->col->mtype == DATA_INT
&& field->col->len == 8
- && field->col->prtype & DATA_NOT_NULL) {
+ && field->col->prtype & DATA_NOT_NULL
+ && !dict_col_is_virtual(field->col)) {
if (fts_doc_col_no) {
*fts_doc_col_no = dict_col_get_no(field->col);
}
@@ -1804,9 +2445,8 @@ innobase_fts_check_doc_id_index(
/*******************************************************************//**
Check whether the table has a unique index with FTS_DOC_ID_INDEX_NAME
on the Doc ID column in MySQL create index definition.
-@return FTS_EXIST_DOC_ID_INDEX if there exists the FTS_DOC_ID index,
+@return FTS_EXIST_DOC_ID_INDEX if there exists the FTS_DOC_ID index,
FTS_INCORRECT_DOC_ID_INDEX if the FTS_DOC_ID index is of wrong format */
-UNIV_INTERN
enum fts_doc_id_index_enum
innobase_fts_check_doc_id_index_in_def(
/*===================================*/
@@ -1852,7 +2492,7 @@ ELSE
ENDIF
-@return key definitions */
+@return key definitions */
static MY_ATTRIBUTE((nonnull, warn_unused_result, malloc))
index_def_t*
innobase_create_key_defs(
@@ -1939,8 +2579,8 @@ innobase_create_key_defs(
index->fields = NULL;
index->n_fields = 0;
index->ind_type = DICT_CLUSTERED;
- index->name = mem_heap_strdup(
- heap, innobase_index_reserve_name);
+ index->name = innobase_index_reserve_name;
+ index->rebuild = true;
index->key_number = ~0;
primary_key_number = ULINT_UNDEFINED;
goto created_clustered;
@@ -1951,7 +2591,7 @@ innobase_create_key_defs(
/* Create the PRIMARY key index definition */
innobase_create_index_def(
altered_table, key_info, primary_key_number,
- TRUE, TRUE, indexdef++, heap);
+ true, true, indexdef++, heap);
created_clustered:
n_add = 1;
@@ -1962,8 +2602,8 @@ created_clustered:
}
/* Copy the index definitions. */
innobase_create_index_def(
- altered_table, key_info, i, TRUE, FALSE,
- indexdef, heap);
+ altered_table, key_info, i, true,
+ false, indexdef, heap);
if (indexdef->ind_type & DICT_FTS) {
n_fts_add++;
@@ -1974,12 +2614,13 @@ created_clustered:
}
if (n_fts_add > 0) {
+ ulint num_v = 0;
+
if (!add_fts_doc_id
&& !innobase_fts_check_doc_id_col(
NULL, altered_table,
- &fts_doc_id_col)) {
- fts_doc_id_col =
- altered_table->s->stored_fields;
+ &fts_doc_id_col, &num_v)) {
+ fts_doc_id_col = altered_table->s->fields - num_v;
add_fts_doc_id = true;
}
@@ -2007,8 +2648,8 @@ created_clustered:
for (ulint i = 0; i < n_add; i++) {
innobase_create_index_def(
- altered_table, key_info, add[i], FALSE, FALSE,
- indexdef, heap);
+ altered_table, key_info, add[i],
+ false, false, indexdef, heap);
if (indexdef->ind_type & DICT_FTS) {
n_fts_add++;
@@ -2028,23 +2669,14 @@ created_clustered:
index->n_fields = 1;
index->fields->col_no = fts_doc_id_col;
index->fields->prefix_len = 0;
+ index->fields->is_v_col = false;
index->ind_type = DICT_UNIQUE;
+ ut_ad(!rebuild
+ || !add_fts_doc_id
+ || fts_doc_id_col <= altered_table->s->fields);
- if (rebuild) {
- index->name = mem_heap_strdup(
- heap, FTS_DOC_ID_INDEX_NAME);
- ut_ad(!add_fts_doc_id
- || fts_doc_id_col == altered_table->s->stored_fields);
- } else {
- char* index_name;
- index->name = index_name = static_cast<char*>(
- mem_heap_alloc(
- heap,
- 1 + sizeof FTS_DOC_ID_INDEX_NAME));
- *index_name++ = TEMP_INDEX_PREFIX;
- memcpy(index_name, FTS_DOC_ID_INDEX_NAME,
- sizeof FTS_DOC_ID_INDEX_NAME);
- }
+ index->name = FTS_DOC_ID_INDEX_NAME;
+ index->rebuild = rebuild;
/* TODO: assign a real MySQL key number for this */
index->key_number = ULINT_UNDEFINED;
@@ -2059,148 +2691,17 @@ created_clustered:
DBUG_RETURN(indexdefs);
}
-/*******************************************************************//**
-Check each index column size, make sure they do not exceed the max limit
-@return true if index column size exceeds limit */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
-bool
-innobase_check_column_length(
-/*=========================*/
- ulint max_col_len, /*!< in: maximum column length */
- const KEY* key_info) /*!< in: Indexes to be created */
+MY_ATTRIBUTE((warn_unused_result))
+bool too_big_key_part_length(size_t max_field_len, const KEY& key)
{
- for (ulint key_part = 0; key_part < key_info->user_defined_key_parts; key_part++) {
- if (key_info->key_part[key_part].length > max_col_len) {
- return(true);
+ for (ulint i = 0; i < key.user_defined_key_parts; i++) {
+ if (key.key_part[i].length > max_field_len) {
+ return true;
}
}
- return(false);
+ return false;
}
-struct ha_innobase_inplace_ctx : public inplace_alter_handler_ctx
-{
- /** Dummy query graph */
- que_thr_t* thr;
- /** reference to the prebuilt struct of the creating instance */
- row_prebuilt_t*&prebuilt;
- /** InnoDB indexes being created */
- dict_index_t** add_index;
- /** MySQL key numbers for the InnoDB indexes that are being created */
- const ulint* add_key_numbers;
- /** number of InnoDB indexes being created */
- ulint num_to_add_index;
- /** InnoDB indexes being dropped */
- dict_index_t** drop_index;
- /** number of InnoDB indexes being dropped */
- const ulint num_to_drop_index;
- /** InnoDB foreign key constraints being dropped */
- dict_foreign_t** drop_fk;
- /** number of InnoDB foreign key constraints being dropped */
- const ulint num_to_drop_fk;
- /** InnoDB foreign key constraints being added */
- dict_foreign_t** add_fk;
- /** number of InnoDB foreign key constraints being dropped */
- const ulint num_to_add_fk;
- /** whether to create the indexes online */
- bool online;
- /** memory heap */
- mem_heap_t* heap;
- /** dictionary transaction */
- trx_t* trx;
- /** original table (if rebuilt, differs from indexed_table) */
- dict_table_t* old_table;
- /** table where the indexes are being created or dropped */
- dict_table_t* new_table;
- /** mapping of old column numbers to new ones, or NULL */
- const ulint* col_map;
- /** new column names, or NULL if nothing was renamed */
- const char** col_names;
- /** added AUTO_INCREMENT column position, or ULINT_UNDEFINED */
- const ulint add_autoinc;
- /** default values of ADD COLUMN, or NULL */
- const dtuple_t* add_cols;
- /** autoinc sequence to use */
- ib_sequence_t sequence;
- /** maximum auto-increment value */
- ulonglong max_autoinc;
- /** temporary table name to use for old table when renaming tables */
- const char* tmp_name;
-
- ha_innobase_inplace_ctx(row_prebuilt_t*& prebuilt_arg,
- dict_index_t** drop_arg,
- ulint num_to_drop_arg,
- dict_foreign_t** drop_fk_arg,
- ulint num_to_drop_fk_arg,
- dict_foreign_t** add_fk_arg,
- ulint num_to_add_fk_arg,
- bool online_arg,
- mem_heap_t* heap_arg,
- dict_table_t* new_table_arg,
- const char** col_names_arg,
- ulint add_autoinc_arg,
- ulonglong autoinc_col_min_value_arg,
- ulonglong autoinc_col_max_value_arg) :
- inplace_alter_handler_ctx(),
- prebuilt (prebuilt_arg),
- add_index (0), add_key_numbers (0), num_to_add_index (0),
- drop_index (drop_arg), num_to_drop_index (num_to_drop_arg),
- drop_fk (drop_fk_arg), num_to_drop_fk (num_to_drop_fk_arg),
- add_fk (add_fk_arg), num_to_add_fk (num_to_add_fk_arg),
- online (online_arg), heap (heap_arg), trx (0),
- old_table (prebuilt_arg->table),
- new_table (new_table_arg),
- col_map (0), col_names (col_names_arg),
- add_autoinc (add_autoinc_arg),
- add_cols (0),
- sequence(prebuilt->trx->mysql_thd,
- autoinc_col_min_value_arg, autoinc_col_max_value_arg),
- max_autoinc (0),
- tmp_name (0)
- {
-#ifdef UNIV_DEBUG
- for (ulint i = 0; i < num_to_add_index; i++) {
- ut_ad(!add_index[i]->to_be_dropped);
- }
- for (ulint i = 0; i < num_to_drop_index; i++) {
- ut_ad(drop_index[i]->to_be_dropped);
- }
-#endif /* UNIV_DEBUG */
-
- thr = pars_complete_graph_for_exec(NULL, prebuilt->trx, heap);
- }
-
- ~ha_innobase_inplace_ctx()
- {
- mem_heap_free(heap);
- }
-
- /** Determine if the table will be rebuilt.
- @return whether the table will be rebuilt */
- bool need_rebuild () const { return(old_table != new_table); }
-
- /** Share context between partitions.
- @param[in] ctx context from another partition of the table */
- void set_shared_data(const inplace_alter_handler_ctx& ctx)
- {
- if (add_autoinc != ULINT_UNDEFINED) {
- const ha_innobase_inplace_ctx& ha_ctx =
- static_cast<const ha_innobase_inplace_ctx&>
- (ctx);
- /* When adding an AUTO_INCREMENT column to a
- partitioned InnoDB table, we must share the
- sequence for all partitions. */
- ut_ad(ha_ctx.add_autoinc == add_autoinc);
- ut_ad(ha_ctx.sequence.last());
- sequence = ha_ctx.sequence;
- }
- }
-
-private:
- // Disable copying
- ha_innobase_inplace_ctx(const ha_innobase_inplace_ctx&);
- ha_innobase_inplace_ctx& operator=(const ha_innobase_inplace_ctx&);
-};
-
/********************************************************************//**
Drop any indexes that we were not able to free previously due to
open table handles. */
@@ -2219,7 +2720,7 @@ online_retry_drop_indexes_low(
may have prebuilt->table pointing to the table. However, these
other threads should be between statements, waiting for the
next statement to execute, or for a meta-data lock. */
- ut_ad(table->n_ref_count >= 1);
+ ut_ad(table->get_ref_count() >= 1);
if (table->drop_aborted) {
row_merge_drop_indexes(trx, table, TRUE);
@@ -2248,12 +2749,10 @@ online_retry_drop_indexes(
trx_free_for_mysql(trx);
}
-#ifdef UNIV_DEBUG
- mutex_enter(&dict_sys->mutex);
- dict_table_check_for_dup_indexes(table, CHECK_ALL_COMPLETE);
- mutex_exit(&dict_sys->mutex);
- ut_a(!table->drop_aborted);
-#endif /* UNIV_DEBUG */
+ ut_d(mutex_enter(&dict_sys->mutex));
+ ut_d(dict_table_check_for_dup_indexes(table, CHECK_ALL_COMPLETE));
+ ut_d(mutex_exit(&dict_sys->mutex));
+ ut_ad(!table->drop_aborted);
}
/********************************************************************//**
@@ -2267,6 +2766,7 @@ online_retry_drop_indexes_with_trx(
trx_t* trx) /*!< in/out: transaction */
{
ut_ad(trx_state_eq(trx, TRX_STATE_NOT_STARTED));
+
ut_ad(trx->dict_operation_lock_mode == RW_X_LATCH);
/* Now that the dictionary is being locked, check if we can
@@ -2284,9 +2784,9 @@ online_retry_drop_indexes_with_trx(
}
/** Determines if InnoDB is dropping a foreign key constraint.
-@param foreign the constraint
-@param drop_fk constraints being dropped
-@param n_drop_fk number of constraints that are being dropped
+@param foreign the constraint
+@param drop_fk constraints being dropped
+@param n_drop_fk number of constraints that are being dropped
@return whether the constraint is being dropped */
MY_ATTRIBUTE((pure, nonnull(1), warn_unused_result))
inline
@@ -2307,13 +2807,13 @@ innobase_dropping_foreign(
/** Determines if an InnoDB FOREIGN KEY constraint depends on a
column that is being dropped or modified to NOT NULL.
-@param user_table InnoDB table as it is before the ALTER operation
-@param col_name Name of the column being altered
-@param drop_fk constraints being dropped
-@param n_drop_fk number of constraints that are being dropped
-@param drop true=drop column, false=set NOT NULL
-@retval true Not allowed (will call my_error())
-@retval false Allowed
+@param user_table InnoDB table as it is before the ALTER operation
+@param col_name Name of the column being altered
+@param drop_fk constraints being dropped
+@param n_drop_fk number of constraints that are being dropped
+@param drop true=drop column, false=set NOT NULL
+@retval true Not allowed (will call my_error())
+@retval false Allowed
*/
MY_ATTRIBUTE((pure, nonnull(1,4), warn_unused_result))
static
@@ -2391,7 +2891,7 @@ innobase_check_foreigns_low(
display_name, (sizeof display_name) - 1,
foreign->foreign_table_name,
strlen(foreign->foreign_table_name),
- NULL, TRUE);
+ NULL);
*buf_end = '\0';
my_error(ER_FK_COLUMN_CANNOT_DROP_CHILD,
MYF(0), col_name, foreign->id,
@@ -2406,14 +2906,14 @@ innobase_check_foreigns_low(
/** Determines if an InnoDB FOREIGN KEY constraint depends on a
column that is being dropped or modified to NOT NULL.
-@param ha_alter_info Data used during in-place alter
-@param altered_table MySQL table that is being altered
-@param old_table MySQL table as it is before the ALTER operation
-@param user_table InnoDB table as it is before the ALTER operation
-@param drop_fk constraints being dropped
-@param n_drop_fk number of constraints that are being dropped
-@retval true Not allowed (will call my_error())
-@retval false Allowed
+@param ha_alter_info Data used during in-place alter
+@param altered_table MySQL table that is being altered
+@param old_table MySQL table as it is before the ALTER operation
+@param user_table InnoDB table as it is before the ALTER operation
+@param drop_fk constraints being dropped
+@param n_drop_fk number of constraints that are being dropped
+@retval true Not allowed (will call my_error())
+@retval false Allowed
*/
MY_ATTRIBUTE((pure, nonnull(1,2,3,4), warn_unused_result))
static
@@ -2456,10 +2956,10 @@ innobase_check_foreigns(
/** Convert a default value for ADD COLUMN.
-@param heap Memory heap where allocated
-@param dfield InnoDB data field to copy to
-@param field MySQL value for the column
-@param comp nonzero if in compact format */
+@param heap Memory heap where allocated
+@param dfield InnoDB data field to copy to
+@param field MySQL value for the column
+@param comp nonzero if in compact format */
static MY_ATTRIBUTE((nonnull))
void
innobase_build_col_map_add(
@@ -2478,21 +2978,23 @@ innobase_build_col_map_add(
byte* buf = static_cast<byte*>(mem_heap_alloc(heap, size));
+ const byte* mysql_data = field->ptr;
+
row_mysql_store_col_in_innobase_format(
- dfield, buf, TRUE, field->ptr, size, comp);
+ dfield, buf, true, mysql_data, size, comp);
}
/** Construct the translation table for reordering, dropping or
adding columns.
-@param ha_alter_info Data used during in-place alter
-@param altered_table MySQL table that is being altered
-@param table MySQL table as it is before the ALTER operation
-@param new_table InnoDB table corresponding to MySQL altered_table
-@param old_table InnoDB table corresponding to MYSQL table
-@param add_cols Default values for ADD COLUMN, or NULL if no ADD COLUMN
-@param heap Memory heap where allocated
-@return array of integers, mapping column numbers in the table
+@param ha_alter_info Data used during in-place alter
+@param altered_table MySQL table that is being altered
+@param table MySQL table as it is before the ALTER operation
+@param new_table InnoDB table corresponding to MySQL altered_table
+@param old_table InnoDB table corresponding to MYSQL table
+@param add_cols Default values for ADD COLUMN, or NULL if no ADD COLUMN
+@param heap Memory heap where allocated
+@return array of integers, mapping column numbers in the table
to column numbers in altered_table */
static MY_ATTRIBUTE((nonnull(1,2,3,4,5,7), warn_unused_result))
const ulint*
@@ -2506,64 +3008,91 @@ innobase_build_col_map(
dtuple_t* add_cols,
mem_heap_t* heap)
{
- uint old_i, old_innobase_i;
DBUG_ENTER("innobase_build_col_map");
DBUG_ASSERT(altered_table != table);
DBUG_ASSERT(new_table != old_table);
DBUG_ASSERT(dict_table_get_n_cols(new_table)
- >= altered_table->s->stored_fields + DATA_N_SYS_COLS);
+ + dict_table_get_n_v_cols(new_table)
+ >= altered_table->s->fields + DATA_N_SYS_COLS);
DBUG_ASSERT(dict_table_get_n_cols(old_table)
- >= table->s->stored_fields + DATA_N_SYS_COLS);
+ + dict_table_get_n_v_cols(old_table)
+ >= table->s->fields + DATA_N_SYS_COLS
+ || ha_innobase::omits_virtual_cols(*table->s));
DBUG_ASSERT(!!add_cols == !!(ha_alter_info->handler_flags
& Alter_inplace_info::ADD_COLUMN));
DBUG_ASSERT(!add_cols || dtuple_get_n_fields(add_cols)
== dict_table_get_n_cols(new_table));
+ const uint old_n_v_cols = uint(table->s->fields
+ - table->s->stored_fields);
+ DBUG_ASSERT(old_n_v_cols == old_table->n_v_cols
+ || table->s->frm_version < FRM_VER_EXPRESSSIONS);
+ DBUG_ASSERT(!old_n_v_cols || table->s->virtual_fields);
+
ulint* col_map = static_cast<ulint*>(
- mem_heap_alloc(heap, old_table->n_cols * sizeof *col_map));
+ mem_heap_alloc(
+ heap, (size_t(old_table->n_cols) + old_n_v_cols)
+ * sizeof *col_map));
List_iterator_fast<Create_field> cf_it(
ha_alter_info->alter_info->create_list);
- uint i = 0, sql_idx = 0;
+ uint i = 0;
+ uint num_v = 0;
/* Any dropped columns will map to ULINT_UNDEFINED. */
- for (old_innobase_i = 0;
- old_innobase_i + DATA_N_SYS_COLS < old_table->n_cols;
- old_innobase_i++) {
- col_map[old_innobase_i] = ULINT_UNDEFINED;
+ for (uint old_i = 0; old_i + DATA_N_SYS_COLS < old_table->n_cols;
+ old_i++) {
+ col_map[old_i] = ULINT_UNDEFINED;
+ }
+
+ for (uint old_i = 0; old_i < old_n_v_cols; old_i++) {
+ col_map[old_i + old_table->n_cols] = ULINT_UNDEFINED;
}
+ const bool omits_virtual = ha_innobase::omits_virtual_cols(*table->s);
+
while (const Create_field* new_field = cf_it++) {
- if (!new_field->stored_in_db)
- {
- sql_idx++;
- continue;
- }
- for (old_i = 0, old_innobase_i= 0;
- table->field[old_i];
- old_i++) {
+ bool is_v = !new_field->stored_in_db();
+ ulint num_old_v = 0;
+
+ for (uint old_i = 0; table->field[old_i]; old_i++) {
const Field* field = table->field[old_i];
- if (!table->field[old_i]->stored_in_db)
- continue;
+ if (!field->stored_in_db()) {
+ if (is_v && new_field->field == field) {
+ if (!omits_virtual) {
+ col_map[old_table->n_cols
+ + num_v]
+ = num_old_v;
+ }
+ num_old_v++;
+ goto found_col;
+ }
+ num_old_v++;
+ continue;
+ }
+
if (new_field->field == field) {
- col_map[old_innobase_i] = i;
+ col_map[old_i - num_old_v] = i;
goto found_col;
}
- old_innobase_i++;
}
+ ut_ad(!is_v);
innobase_build_col_map_add(
heap, dtuple_get_nth_field(add_cols, i),
- altered_table->field[sql_idx],
+ altered_table->field[i + num_v],
dict_table_is_comp(new_table));
found_col:
- i++;
- sql_idx++;
+ if (is_v) {
+ num_v++;
+ } else {
+ i++;
+ }
}
- DBUG_ASSERT(i == altered_table->s->stored_fields);
+ DBUG_ASSERT(i == altered_table->s->fields - num_v);
- i = table->s->stored_fields;
+ i = table->s->fields - old_n_v_cols;
/* Add the InnoDB hidden FTS_DOC_ID column, if any. */
if (i + DATA_N_SYS_COLS < old_table->n_cols) {
@@ -2573,17 +3102,21 @@ found_col:
DICT_TF2_FTS_HAS_DOC_ID));
DBUG_ASSERT(i + DATA_N_SYS_COLS + 1 == old_table->n_cols);
DBUG_ASSERT(!strcmp(dict_table_get_col_name(
- old_table, table->s->stored_fields),
+ old_table, i),
FTS_DOC_ID_COL_NAME));
- if (altered_table->s->stored_fields + DATA_N_SYS_COLS
+ if (altered_table->s->fields + DATA_N_SYS_COLS
+ - new_table->n_v_cols
< new_table->n_cols) {
DBUG_ASSERT(DICT_TF2_FLAG_IS_SET(
new_table,
DICT_TF2_FTS_HAS_DOC_ID));
- DBUG_ASSERT(altered_table->s->stored_fields
+ DBUG_ASSERT(altered_table->s->fields
+ DATA_N_SYS_COLS + 1
- == new_table->n_cols);
- col_map[i] = altered_table->s->stored_fields;
+ == static_cast<ulint>(
+ new_table->n_cols
+ + new_table->n_v_cols));
+ col_map[i] = altered_table->s->fields
+ - new_table->n_v_cols;
} else {
DBUG_ASSERT(!DICT_TF2_FLAG_IS_SET(
new_table,
@@ -2607,9 +3140,9 @@ found_col:
/** Drop newly create FTS index related auxiliary table during
FIC create index process, before fts_add_index is called
-@param table table that was being rebuilt online
-@param trx transaction
-@return DB_SUCCESS if successful, otherwise last error code
+@param table table that was being rebuilt online
+@param trx transaction
+@return DB_SUCCESS if successful, otherwise last error code
*/
static
dberr_t
@@ -2637,7 +3170,7 @@ innobase_drop_fts_index_table(
return(ret_err);
}
-/** Get the new column names if any columns were renamed
+/** Get the new non-virtual column names if any columns were renamed
@param ha_alter_info Data used during in-place alter
@param altered_table MySQL table that is being altered
@param table MySQL table as it is before the ALTER operation
@@ -2657,7 +3190,7 @@ innobase_get_col_names(
uint i;
DBUG_ENTER("innobase_get_col_names");
- DBUG_ASSERT(user_table->n_def > table->s->fields);
+ DBUG_ASSERT(user_table->n_t_def > table->s->fields);
DBUG_ASSERT(ha_alter_info->handler_flags
& Alter_inplace_info::ALTER_COLUMN_NAME);
@@ -2668,11 +3201,18 @@ innobase_get_col_names(
List_iterator_fast<Create_field> cf_it(
ha_alter_info->alter_info->create_list);
while (const Create_field* new_field = cf_it++) {
+ ulint num_v = 0;
DBUG_ASSERT(i < altered_table->s->fields);
+ if (!new_field->stored_in_db()) {
+ continue;
+ }
+
for (uint old_i = 0; table->field[old_i]; old_i++) {
+ num_v += !table->field[old_i]->stored_in_db();
+
if (new_field->field == table->field[old_i]) {
- cols[old_i] = new_field->field_name;
+ cols[old_i - num_v] = new_field->field_name;
break;
}
}
@@ -2681,7 +3221,7 @@ innobase_get_col_names(
}
/* Copy the internal column names. */
- i = table->s->fields;
+ i = table->s->fields - user_table->n_v_def;
cols[i] = dict_table_get_col_name(user_table, i);
while (++i < user_table->n_def) {
@@ -2691,21 +3231,1065 @@ innobase_get_col_names(
DBUG_RETURN(cols);
}
+/** Check whether the column prefix is increased, decreased, or unchanged.
+@param[in] new_prefix_len new prefix length
+@param[in] old_prefix_len new prefix length
+@retval 1 prefix is increased
+@retval 0 prefix is unchanged
+@retval -1 prefix is decreased */
+static inline
+lint
+innobase_pk_col_prefix_compare(
+ ulint new_prefix_len,
+ ulint old_prefix_len)
+{
+ ut_ad(new_prefix_len < COMPRESSED_REC_MAX_DATA_SIZE);
+ ut_ad(old_prefix_len < COMPRESSED_REC_MAX_DATA_SIZE);
+
+ if (new_prefix_len == old_prefix_len) {
+ return(0);
+ }
+
+ if (new_prefix_len == 0) {
+ new_prefix_len = ULINT_MAX;
+ }
+
+ if (old_prefix_len == 0) {
+ old_prefix_len = ULINT_MAX;
+ }
+
+ if (new_prefix_len > old_prefix_len) {
+ return(1);
+ } else {
+ return(-1);
+ }
+}
+
+/** Check whether the column is existing in old table.
+@param[in] new_col_no new column no
+@param[in] col_map mapping of old column numbers to new ones
+@param[in] col_map_size the column map size
+@return true if the column is existing, otherwise false. */
+static inline
+bool
+innobase_pk_col_is_existing(
+ const ulint new_col_no,
+ const ulint* col_map,
+ const ulint col_map_size)
+{
+ for (ulint i = 0; i < col_map_size; i++) {
+ if (col_map[i] == new_col_no) {
+ return(true);
+ }
+ }
+
+ return(false);
+}
+
+/** Determine whether both the indexes have same set of primary key
+fields arranged in the same order.
+
+Rules when we cannot skip sorting:
+(1) Removing existing PK columns somewhere else than at the end of the PK;
+(2) Adding existing columns to the PK, except at the end of the PK when no
+columns are removed from the PK;
+(3) Changing the order of existing PK columns;
+(4) Decreasing the prefix length just like removing existing PK columns
+follows rule(1), Increasing the prefix length just like adding existing
+PK columns follows rule(2).
+@param[in] col_map mapping of old column numbers to new ones
+@param[in] ha_alter_info Data used during in-place alter
+@param[in] old_clust_index index to be compared
+@param[in] new_clust_index index to be compared
+@retval true if both indexes have same order.
+@retval false. */
+static MY_ATTRIBUTE((warn_unused_result))
+bool
+innobase_pk_order_preserved(
+ const ulint* col_map,
+ const dict_index_t* old_clust_index,
+ const dict_index_t* new_clust_index)
+{
+ ulint old_n_uniq
+ = dict_index_get_n_ordering_defined_by_user(
+ old_clust_index);
+ ulint new_n_uniq
+ = dict_index_get_n_ordering_defined_by_user(
+ new_clust_index);
+
+ ut_ad(dict_index_is_clust(old_clust_index));
+ ut_ad(dict_index_is_clust(new_clust_index));
+ ut_ad(old_clust_index->table != new_clust_index->table);
+ ut_ad(col_map != NULL);
+
+ if (old_n_uniq == 0) {
+ /* There was no PRIMARY KEY in the table.
+ If there is no PRIMARY KEY after the ALTER either,
+ no sorting is needed. */
+ return(new_n_uniq == old_n_uniq);
+ }
+
+ /* DROP PRIMARY KEY is only allowed in combination with
+ ADD PRIMARY KEY. */
+ ut_ad(new_n_uniq > 0);
+
+ /* The order of the last processed new_clust_index key field,
+ not counting ADD COLUMN, which are constant. */
+ lint last_field_order = -1;
+ ulint existing_field_count = 0;
+ ulint old_n_cols = dict_table_get_n_cols(old_clust_index->table);
+ for (ulint new_field = 0; new_field < new_n_uniq; new_field++) {
+ ulint new_col_no =
+ new_clust_index->fields[new_field].col->ind;
+
+ /* Check if there is a match in old primary key. */
+ ulint old_field = 0;
+ while (old_field < old_n_uniq) {
+ ulint old_col_no =
+ old_clust_index->fields[old_field].col->ind;
+
+ if (col_map[old_col_no] == new_col_no) {
+ break;
+ }
+
+ old_field++;
+ }
+
+ /* The order of key field in the new primary key.
+ 1. old PK column: idx in old primary key
+ 2. existing column: old_n_uniq + sequence no
+ 3. newly added column: no order */
+ lint new_field_order;
+ const bool old_pk_column = old_field < old_n_uniq;
+
+ if (old_pk_column) {
+ new_field_order = old_field;
+ } else if (innobase_pk_col_is_existing(new_col_no, col_map,
+ old_n_cols)) {
+ new_field_order = old_n_uniq + existing_field_count++;
+ } else {
+ /* Skip newly added column. */
+ continue;
+ }
+
+ if (last_field_order + 1 != new_field_order) {
+ /* Old PK order is not kept, or existing column
+ is not added at the end of old PK. */
+ return(false);
+ }
+
+ last_field_order = new_field_order;
+
+ if (!old_pk_column) {
+ continue;
+ }
+
+ /* Check prefix length change. */
+ const lint prefix_change = innobase_pk_col_prefix_compare(
+ new_clust_index->fields[new_field].prefix_len,
+ old_clust_index->fields[old_field].prefix_len);
+
+ if (prefix_change < 0) {
+ /* If a column's prefix length is decreased, it should
+ be the last old PK column in new PK.
+ Note: we set last_field_order to -2, so that if there
+ are any old PK colmns or existing columns after it in
+ new PK, the comparison to new_field_order will fail in
+ the next round.*/
+ last_field_order = -2;
+ } else if (prefix_change > 0) {
+ /* If a column's prefix length is increased, it should
+ be the last PK column in old PK. */
+ if (old_field != old_n_uniq - 1) {
+ return(false);
+ }
+ }
+ }
+
+ return(true);
+}
+
+/** Update the mtype from DATA_BLOB to DATA_GEOMETRY for a specified
+GIS column of a table. This is used when we want to create spatial index
+on legacy GIS columns coming from 5.6, where we store GIS data as DATA_BLOB
+in innodb layer.
+@param[in] table_id table id
+@param[in] col_name column name
+@param[in] trx data dictionary transaction
+@retval true Failure
+@retval false Success */
+static
+bool
+innobase_update_gis_column_type(
+ table_id_t table_id,
+ const char* col_name,
+ trx_t* trx)
+{
+ pars_info_t* info;
+ dberr_t error;
+
+ DBUG_ENTER("innobase_update_gis_column_type");
+
+ DBUG_ASSERT(trx_get_dict_operation(trx) == TRX_DICT_OP_INDEX);
+ ut_ad(trx->dict_operation_lock_mode == RW_X_LATCH);
+ ut_ad(mutex_own(&dict_sys->mutex));
+ ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_X));
+
+ info = pars_info_create();
+
+ pars_info_add_ull_literal(info, "tableid", table_id);
+ pars_info_add_str_literal(info, "name", col_name);
+ pars_info_add_int4_literal(info, "mtype", DATA_GEOMETRY);
+
+ trx->op_info = "update column type to DATA_GEOMETRY";
+
+ error = que_eval_sql(
+ info,
+ "PROCEDURE UPDATE_SYS_COLUMNS_PROC () IS\n"
+ "BEGIN\n"
+ "UPDATE SYS_COLUMNS SET MTYPE=:mtype\n"
+ "WHERE TABLE_ID=:tableid AND NAME=:name;\n"
+ "END;\n",
+ false, trx);
+
+ trx->error_state = DB_SUCCESS;
+ trx->op_info = "";
+
+ DBUG_RETURN(error != DB_SUCCESS);
+}
+
+/** Check if we are creating spatial indexes on GIS columns, which are
+legacy columns from earlier MySQL, such as 5.6. If so, we have to update
+the mtypes of the old GIS columns to DATA_GEOMETRY.
+In 5.6, we store GIS columns as DATA_BLOB in InnoDB layer, it will introduce
+confusion when we run latest server on older data. That's why we need to
+do the upgrade.
+@param[in] ha_alter_info Data used during in-place alter
+@param[in] table Table on which we want to add indexes
+@param[in] trx Transaction
+@return DB_SUCCESS if update successfully or no columns need to be updated,
+otherwise DB_ERROR, which means we can't update the mtype for some
+column, and creating spatial index on it should be dangerous */
+static
+dberr_t
+innobase_check_gis_columns(
+ Alter_inplace_info* ha_alter_info,
+ dict_table_t* table,
+ trx_t* trx)
+{
+ DBUG_ENTER("innobase_check_gis_columns");
+
+ for (uint key_num = 0;
+ key_num < ha_alter_info->index_add_count;
+ key_num++) {
+
+ const KEY& key = ha_alter_info->key_info_buffer[
+ ha_alter_info->index_add_buffer[key_num]];
+
+ if (!(key.flags & HA_SPATIAL)) {
+ continue;
+ }
+
+ ut_ad(key.user_defined_key_parts == 1);
+ const KEY_PART_INFO& key_part = key.key_part[0];
+
+ /* Does not support spatial index on virtual columns */
+ if (!key_part.field->stored_in_db()) {
+ DBUG_RETURN(DB_UNSUPPORTED);
+ }
+
+ ulint col_nr = dict_table_has_column(
+ table,
+ key_part.field->field_name,
+ key_part.fieldnr);
+ ut_ad(col_nr != table->n_def);
+ dict_col_t* col = &table->cols[col_nr];
+
+ if (col->mtype != DATA_BLOB) {
+ ut_ad(DATA_GEOMETRY_MTYPE(col->mtype));
+ continue;
+ }
+
+ const char* col_name = dict_table_get_col_name(
+ table, col_nr);
+
+ if (innobase_update_gis_column_type(
+ table->id, col_name, trx)) {
+
+ DBUG_RETURN(DB_ERROR);
+ } else {
+ col->mtype = DATA_GEOMETRY;
+
+ ib::info() << "Updated mtype of column" << col_name
+ << " in table " << table->name
+ << ", whose id is " << table->id
+ << " to DATA_GEOMETRY";
+ }
+ }
+
+ DBUG_RETURN(DB_SUCCESS);
+}
+
+/** Collect virtual column info for its addition
+@param[in] ha_alter_info Data used during in-place alter
+@param[in] altered_table MySQL table that is being altered to
+@param[in] table MySQL table as it is before the ALTER operation
+@retval true Failure
+@retval false Success */
+static
+bool
+prepare_inplace_add_virtual(
+ Alter_inplace_info* ha_alter_info,
+ const TABLE* altered_table,
+ const TABLE* table)
+{
+ ha_innobase_inplace_ctx* ctx;
+ ulint i = 0;
+ ulint j = 0;
+ const Create_field* new_field;
+
+ ctx = static_cast<ha_innobase_inplace_ctx*>
+ (ha_alter_info->handler_ctx);
+
+ ctx->num_to_add_vcol = altered_table->s->fields
+ + ctx->num_to_drop_vcol - table->s->fields;
+
+ ctx->add_vcol = static_cast<dict_v_col_t*>(
+ mem_heap_zalloc(ctx->heap, ctx->num_to_add_vcol
+ * sizeof *ctx->add_vcol));
+ ctx->add_vcol_name = static_cast<const char**>(
+ mem_heap_alloc(ctx->heap, ctx->num_to_add_vcol
+ * sizeof *ctx->add_vcol_name));
+
+ List_iterator_fast<Create_field> cf_it(
+ ha_alter_info->alter_info->create_list);
+
+ while ((new_field = (cf_it++)) != NULL) {
+ const Field* field = new_field->field;
+ ulint old_i;
+
+ for (old_i = 0; table->field[old_i]; old_i++) {
+ const Field* n_field = table->field[old_i];
+ if (field == n_field) {
+ break;
+ }
+ }
+
+ i++;
+
+ if (table->field[old_i]) {
+ continue;
+ }
+
+ ut_ad(!field);
+
+ ulint col_len;
+ ulint is_unsigned;
+ ulint field_type;
+ ulint charset_no;
+
+ field = altered_table->field[i - 1];
+
+ ulint col_type
+ = get_innobase_type_from_mysql_type(
+ &is_unsigned, field);
+
+
+ if (field->stored_in_db()) {
+ continue;
+ }
+
+ col_len = field->pack_length();
+ field_type = (ulint) field->type();
+
+ if (!field->real_maybe_null()) {
+ field_type |= DATA_NOT_NULL;
+ }
+
+ if (field->binary()) {
+ field_type |= DATA_BINARY_TYPE;
+ }
+
+ if (is_unsigned) {
+ field_type |= DATA_UNSIGNED;
+ }
+
+ if (dtype_is_string_type(col_type)) {
+ charset_no = (ulint) field->charset()->number;
+
+ DBUG_EXECUTE_IF(
+ "ib_alter_add_virtual_fail",
+ charset_no += MAX_CHAR_COLL_NUM;);
+
+ if (charset_no > MAX_CHAR_COLL_NUM) {
+ my_error(ER_WRONG_KEY_COLUMN, MYF(0), "InnoDB",
+ field->field_name);
+ return(true);
+ }
+ } else {
+ charset_no = 0;
+ }
+
+ if (field->type() == MYSQL_TYPE_VARCHAR) {
+ uint32 length_bytes
+ = static_cast<const Field_varstring*>(
+ field)->length_bytes;
+
+ col_len -= length_bytes;
+
+ if (length_bytes == 2) {
+ field_type |= DATA_LONG_TRUE_VARCHAR;
+ }
+ }
+
+
+ ctx->add_vcol[j].m_col.prtype = dtype_form_prtype(
+ field_type, charset_no);
+
+ ctx->add_vcol[j].m_col.prtype |= DATA_VIRTUAL;
+
+ ctx->add_vcol[j].m_col.mtype = col_type;
+
+ ctx->add_vcol[j].m_col.len = col_len;
+
+ ctx->add_vcol[j].m_col.ind = i - 1;
+ ctx->add_vcol[j].num_base = 0;
+ ctx->add_vcol_name[j] = field->field_name;
+ ctx->add_vcol[j].base_col = NULL;
+ ctx->add_vcol[j].v_pos = ctx->old_table->n_v_cols
+ - ctx->num_to_drop_vcol + j;
+
+ /* No need to track the list */
+ ctx->add_vcol[j].v_indexes = NULL;
+ innodb_base_col_setup(ctx->old_table, field, &ctx->add_vcol[j]);
+ j++;
+ }
+
+ return(false);
+}
+
+/** Collect virtual column info for its addition
+@param[in] ha_alter_info Data used during in-place alter
+@param[in] altered_table MySQL table that is being altered to
+@param[in] table MySQL table as it is before the ALTER operation
+@retval true Failure
+@retval false Success */
+static
+bool
+prepare_inplace_drop_virtual(
+ Alter_inplace_info* ha_alter_info,
+ const TABLE* altered_table,
+ const TABLE* table)
+{
+ ha_innobase_inplace_ctx* ctx;
+ ulint i = 0;
+ ulint j = 0;
+
+ ctx = static_cast<ha_innobase_inplace_ctx*>
+ (ha_alter_info->handler_ctx);
+
+ ctx->num_to_drop_vcol = 0;
+ for (i = 0; table->field[i]; i++) {
+ const Field* field = table->field[i];
+ if (field->flags & FIELD_IS_DROPPED && !field->stored_in_db()) {
+ ctx->num_to_drop_vcol++;
+ }
+ }
+
+ ctx->drop_vcol = static_cast<dict_v_col_t*>(
+ mem_heap_alloc(ctx->heap, ctx->num_to_drop_vcol
+ * sizeof *ctx->drop_vcol));
+ ctx->drop_vcol_name = static_cast<const char**>(
+ mem_heap_alloc(ctx->heap, ctx->num_to_drop_vcol
+ * sizeof *ctx->drop_vcol_name));
+
+ for (i = 0; table->field[i]; i++) {
+ Field *field = table->field[i];
+ if (!(field->flags & FIELD_IS_DROPPED) || field->stored_in_db()) {
+ continue;
+ }
+
+ ulint col_len;
+ ulint is_unsigned;
+ ulint field_type;
+ ulint charset_no;
+
+ ulint col_type
+ = get_innobase_type_from_mysql_type(
+ &is_unsigned, field);
+
+ col_len = field->pack_length();
+ field_type = (ulint) field->type();
+
+ if (!field->real_maybe_null()) {
+ field_type |= DATA_NOT_NULL;
+ }
+
+ if (field->binary()) {
+ field_type |= DATA_BINARY_TYPE;
+ }
+
+ if (is_unsigned) {
+ field_type |= DATA_UNSIGNED;
+ }
+
+ if (dtype_is_string_type(col_type)) {
+ charset_no = (ulint) field->charset()->number;
+
+ DBUG_EXECUTE_IF(
+ "ib_alter_add_virtual_fail",
+ charset_no += MAX_CHAR_COLL_NUM;);
+
+ if (charset_no > MAX_CHAR_COLL_NUM) {
+ my_error(ER_WRONG_KEY_COLUMN, MYF(0), "InnoDB",
+ field->field_name);
+ return(true);
+ }
+ } else {
+ charset_no = 0;
+ }
+
+ if (field->type() == MYSQL_TYPE_VARCHAR) {
+ uint32 length_bytes
+ = static_cast<const Field_varstring*>(
+ field)->length_bytes;
+
+ col_len -= length_bytes;
+
+ if (length_bytes == 2) {
+ field_type |= DATA_LONG_TRUE_VARCHAR;
+ }
+ }
+
+
+ ctx->drop_vcol[j].m_col.prtype = dtype_form_prtype(
+ field_type, charset_no);
+
+ ctx->drop_vcol[j].m_col.prtype |= DATA_VIRTUAL;
+
+ ctx->drop_vcol[j].m_col.mtype = col_type;
+
+ ctx->drop_vcol[j].m_col.len = col_len;
+
+ ctx->drop_vcol[j].m_col.ind = i;
+
+ ctx->drop_vcol_name[j] = field->field_name;
+
+ dict_v_col_t* v_col = dict_table_get_nth_v_col_mysql(
+ ctx->old_table, i);
+ ctx->drop_vcol[j].v_pos = v_col->v_pos;
+ j++;
+ }
+
+ return(false);
+}
+
+/** Insert a new record to INNODB SYS_VIRTUAL
+@param[in] table InnoDB table
+@param[in] pos virtual column column no
+@param[in] base_pos base column pos
+@param[in] trx transaction
+@return DB_SUCCESS if successful, otherwise error code */
+static
+dberr_t
+innobase_insert_sys_virtual(
+ const dict_table_t* table,
+ ulint pos,
+ ulint base_pos,
+ trx_t* trx)
+{
+ pars_info_t* info = pars_info_create();
+
+ pars_info_add_ull_literal(info, "id", table->id);
+
+ pars_info_add_int4_literal(info, "pos", pos);
+
+ pars_info_add_int4_literal(info, "base_pos", base_pos);
+
+ dberr_t error = que_eval_sql(
+ info,
+ "PROCEDURE P () IS\n"
+ "BEGIN\n"
+ "INSERT INTO SYS_VIRTUAL VALUES"
+ "(:id, :pos, :base_pos);\n"
+ "END;\n",
+ FALSE, trx);
+
+ return(error);
+}
+
+/** Update INNODB SYS_COLUMNS on new virtual columns
+@param[in] table InnoDB table
+@param[in] col_name column name
+@param[in] vcol virtual column
+@param[in] trx transaction
+@return DB_SUCCESS if successful, otherwise error code */
+static
+dberr_t
+innobase_add_one_virtual(
+ const dict_table_t* table,
+ const char* col_name,
+ dict_v_col_t* vcol,
+ trx_t* trx)
+{
+ ulint pos = dict_create_v_col_pos(vcol->v_pos,
+ vcol->m_col.ind);
+ ulint mtype = vcol->m_col.mtype;
+ ulint prtype = vcol->m_col.prtype;
+ ulint len = vcol->m_col.len;
+ pars_info_t* info = pars_info_create();
+
+ pars_info_add_ull_literal(info, "id", table->id);
+
+ pars_info_add_int4_literal(info, "pos", pos);
+
+ pars_info_add_str_literal(info, "name", col_name);
+ pars_info_add_int4_literal(info, "mtype", mtype);
+ pars_info_add_int4_literal(info, "prtype", prtype);
+ pars_info_add_int4_literal(info, "len", len);
+ pars_info_add_int4_literal(info, "prec", vcol->num_base);
+
+ dberr_t error = que_eval_sql(
+ info,
+ "PROCEDURE P () IS\n"
+ "BEGIN\n"
+ "INSERT INTO SYS_COLUMNS VALUES"
+ "(:id, :pos, :name, :mtype, :prtype, :len, :prec);\n"
+ "END;\n",
+ FALSE, trx);
+
+ if (error != DB_SUCCESS) {
+ return(error);
+ }
+
+ for (ulint i = 0; i < vcol->num_base; i++) {
+ error = innobase_insert_sys_virtual(
+ table, pos, vcol->base_col[i]->ind, trx);
+ if (error != DB_SUCCESS) {
+ return(error);
+ }
+ }
+
+ return(error);
+}
+
+/** Update INNODB SYS_TABLES on number of virtual columns
+@param[in] user_table InnoDB table
+@param[in] n_col number of columns
+@param[in] trx transaction
+@return DB_SUCCESS if successful, otherwise error code */
+static
+dberr_t
+innobase_update_n_virtual(
+ const dict_table_t* table,
+ ulint n_col,
+ trx_t* trx)
+{
+ dberr_t err = DB_SUCCESS;
+ pars_info_t* info = pars_info_create();
+
+ pars_info_add_int4_literal(info, "num_col", n_col);
+ pars_info_add_ull_literal(info, "id", table->id);
+
+ err = que_eval_sql(
+ info,
+ "PROCEDURE RENUMBER_TABLE_ID_PROC () IS\n"
+ "BEGIN\n"
+ "UPDATE SYS_TABLES"
+ " SET N_COLS = :num_col\n"
+ " WHERE ID = :id;\n"
+ "END;\n", FALSE, trx);
+
+ return(err);
+}
+
+/** Update system table for adding virtual column(s)
+@param[in] ha_alter_info Data used during in-place alter
+@param[in] altered_table MySQL table that is being altered
+@param[in] table MySQL table as it is before the ALTER operation
+@param[in] user_table InnoDB table
+@param[in] trx transaction
+@retval true Failure
+@retval false Success */
+static
+bool
+innobase_add_virtual_try(
+ Alter_inplace_info* ha_alter_info,
+ const TABLE* altered_table,
+ const TABLE* table,
+ const dict_table_t* user_table,
+ trx_t* trx)
+{
+ ha_innobase_inplace_ctx* ctx;
+ dberr_t err = DB_SUCCESS;
+
+ ctx = static_cast<ha_innobase_inplace_ctx*>(
+ ha_alter_info->handler_ctx);
+
+ for (ulint i = 0; i < ctx->num_to_add_vcol; i++) {
+
+ err = innobase_add_one_virtual(
+ user_table, ctx->add_vcol_name[i],
+ &ctx->add_vcol[i], trx);
+
+ if (err != DB_SUCCESS) {
+ my_error(ER_INTERNAL_ERROR, MYF(0),
+ "InnoDB: ADD COLUMN...VIRTUAL");
+ return(true);
+ }
+ }
+
+
+ ulint n_col = user_table->n_cols;
+ ulint n_v_col = user_table->n_v_cols;
+
+ n_v_col += ctx->num_to_add_vcol;
+
+ n_col -= DATA_N_SYS_COLS;
+
+ n_v_col -= ctx->num_to_drop_vcol;
+
+ ulint new_n = dict_table_encode_n_col(n_col, n_v_col)
+ + ((user_table->flags & DICT_TF_COMPACT) << 31);
+
+ err = innobase_update_n_virtual(user_table, new_n, trx);
+
+ if (err != DB_SUCCESS) {
+ my_error(ER_INTERNAL_ERROR, MYF(0),
+ "InnoDB: ADD COLUMN...VIRTUAL");
+ return(true);
+ }
+
+ return(false);
+}
+
+/** Update INNODB SYS_COLUMNS on new virtual column's position
+@param[in] table InnoDB table
+@param[in] old_pos old position
+@param[in] new_pos new position
+@param[in] trx transaction
+@return DB_SUCCESS if successful, otherwise error code */
+static
+dberr_t
+innobase_update_v_pos_sys_columns(
+ const dict_table_t* table,
+ ulint old_pos,
+ ulint new_pos,
+ trx_t* trx)
+{
+ pars_info_t* info = pars_info_create();
+
+ pars_info_add_int4_literal(info, "pos", old_pos);
+ pars_info_add_int4_literal(info, "val", new_pos);
+ pars_info_add_ull_literal(info, "id", table->id);
+
+ dberr_t error = que_eval_sql(
+ info,
+ "PROCEDURE P () IS\n"
+ "BEGIN\n"
+ "UPDATE SYS_COLUMNS\n"
+ "SET POS = :val\n"
+ "WHERE POS = :pos\n"
+ "AND TABLE_ID = :id;\n"
+ "END;\n",
+ FALSE, trx);
+
+ return(error);
+}
+
+/** Update INNODB SYS_VIRTUAL table with new virtual column position
+@param[in] table InnoDB table
+@param[in] old_pos old position
+@param[in] new_pos new position
+@param[in] trx transaction
+@return DB_SUCCESS if successful, otherwise error code */
+static
+dberr_t
+innobase_update_v_pos_sys_virtual(
+ const dict_table_t* table,
+ ulint old_pos,
+ ulint new_pos,
+ trx_t* trx)
+{
+ pars_info_t* info = pars_info_create();
+
+ pars_info_add_int4_literal(info, "pos", old_pos);
+ pars_info_add_int4_literal(info, "val", new_pos);
+ pars_info_add_ull_literal(info, "id", table->id);
+
+ dberr_t error = que_eval_sql(
+ info,
+ "PROCEDURE P () IS\n"
+ "BEGIN\n"
+ "UPDATE SYS_VIRTUAL\n"
+ "SET POS = :val\n"
+ "WHERE POS = :pos\n"
+ "AND TABLE_ID = :id;\n"
+ "END;\n",
+ FALSE, trx);
+
+ return(error);
+}
+
+/** Update InnoDB system tables on dropping a virtual column
+@param[in] table InnoDB table
+@param[in] col_name column name of the dropping column
+@param[in] drop_col col information for the dropping column
+@param[in] n_prev_dropped number of previously dropped columns in the
+ same alter clause
+@param[in] trx transaction
+@return DB_SUCCESS if successful, otherwise error code */
+static
+dberr_t
+innobase_drop_one_virtual_sys_columns(
+ const dict_table_t* table,
+ const char* col_name,
+ dict_col_t* drop_col,
+ ulint n_prev_dropped,
+ trx_t* trx)
+{
+ pars_info_t* info = pars_info_create();
+ pars_info_add_ull_literal(info, "id", table->id);
+
+ pars_info_add_str_literal(info, "name", col_name);
+
+ dberr_t error = que_eval_sql(
+ info,
+ "PROCEDURE P () IS\n"
+ "BEGIN\n"
+ "DELETE FROM SYS_COLUMNS\n"
+ "WHERE TABLE_ID = :id\n"
+ "AND NAME = :name;\n"
+ "END;\n",
+ FALSE, trx);
+
+ if (error != DB_SUCCESS) {
+ return(error);
+ }
+
+ dict_v_col_t* v_col = dict_table_get_nth_v_col_mysql(
+ table, drop_col->ind);
+
+ /* Adjust column positions for all subsequent columns */
+ for (ulint i = v_col->v_pos + 1; i < table->n_v_cols; i++) {
+ dict_v_col_t* t_col = dict_table_get_nth_v_col(table, i);
+ ulint old_p = dict_create_v_col_pos(
+ t_col->v_pos - n_prev_dropped,
+ t_col->m_col.ind - n_prev_dropped);
+ ulint new_p = dict_create_v_col_pos(
+ t_col->v_pos - 1 - n_prev_dropped,
+ t_col->m_col.ind - 1 - n_prev_dropped);
+
+ error = innobase_update_v_pos_sys_columns(
+ table, old_p, new_p, trx);
+ if (error != DB_SUCCESS) {
+ return(error);
+ }
+ error = innobase_update_v_pos_sys_virtual(
+ table, old_p, new_p, trx);
+ if (error != DB_SUCCESS) {
+ return(error);
+ }
+ }
+
+ return(error);
+}
+
+/** Delete virtual column's info from INNODB SYS_VIRTUAL
+@param[in] table InnoDB table
+@param[in] pos position of the virtual column to be deleted
+@param[in] trx transaction
+@return DB_SUCCESS if successful, otherwise error code */
+static
+dberr_t
+innobase_drop_one_virtual_sys_virtual(
+ const dict_table_t* table,
+ ulint pos,
+ trx_t* trx)
+{
+ pars_info_t* info = pars_info_create();
+ pars_info_add_ull_literal(info, "id", table->id);
+
+ pars_info_add_int4_literal(info, "pos", pos);
+
+ dberr_t error = que_eval_sql(
+ info,
+ "PROCEDURE P () IS\n"
+ "BEGIN\n"
+ "DELETE FROM SYS_VIRTUAL\n"
+ "WHERE TABLE_ID = :id\n"
+ "AND POS = :pos;\n"
+ "END;\n",
+ FALSE, trx);
+
+ return(error);
+}
+
+/** Update system table for dropping virtual column(s)
+@param[in] ha_alter_info Data used during in-place alter
+@param[in] altered_table MySQL table that is being altered
+@param[in] table MySQL table as it is before the ALTER operation
+@param[in] user_table InnoDB table
+@param[in] trx transaction
+@retval true Failure
+@retval false Success */
+static
+bool
+innobase_drop_virtual_try(
+ Alter_inplace_info* ha_alter_info,
+ const TABLE* altered_table,
+ const TABLE* table,
+ const dict_table_t* user_table,
+ trx_t* trx)
+{
+ ha_innobase_inplace_ctx* ctx;
+ dberr_t err = DB_SUCCESS;
+
+ ctx = static_cast<ha_innobase_inplace_ctx*>
+ (ha_alter_info->handler_ctx);
+
+ for (ulint i = 0; i < ctx->num_to_drop_vcol; i++) {
+
+ ulint pos = dict_create_v_col_pos(
+ ctx->drop_vcol[i].v_pos - i,
+ ctx->drop_vcol[i].m_col.ind - i);
+ err = innobase_drop_one_virtual_sys_virtual(
+ user_table, pos, trx);
+
+ if (err != DB_SUCCESS) {
+ my_error(ER_INTERNAL_ERROR, MYF(0),
+ "InnoDB: DROP COLUMN...VIRTUAL");
+ return(true);
+ }
+
+ err = innobase_drop_one_virtual_sys_columns(
+ user_table, ctx->drop_vcol_name[i],
+ &(ctx->drop_vcol[i].m_col), i, trx);
+
+ if (err != DB_SUCCESS) {
+ my_error(ER_INTERNAL_ERROR, MYF(0),
+ "InnoDB: DROP COLUMN...VIRTUAL");
+ return(true);
+ }
+ }
+
+
+ ulint n_col = user_table->n_cols;
+ ulint n_v_col = user_table->n_v_cols;
+
+ n_v_col -= ctx->num_to_drop_vcol;
+
+ n_col -= DATA_N_SYS_COLS;
+
+ ulint new_n = dict_table_encode_n_col(n_col, n_v_col)
+ + ((user_table->flags & DICT_TF_COMPACT) << 31);
+
+ err = innobase_update_n_virtual(user_table, new_n, trx);
+
+ if (err != DB_SUCCESS) {
+ my_error(ER_INTERNAL_ERROR, MYF(0),
+ "InnoDB: DROP COLUMN...VIRTUAL");
+ }
+
+ return(false);
+}
+
+/** Adjust the create index column number from "New table" to
+"old InnoDB table" while we are doing dropping virtual column. Since we do
+not create separate new table for the dropping/adding virtual columns.
+To correctly find the indexed column, we will need to find its col_no
+in the "Old Table", not the "New table".
+@param[in] ha_alter_info Data used during in-place alter
+@param[in] old_table MySQL table as it is before the ALTER operation
+@param[in] num_v_dropped number of virtual column dropped
+@param[in,out] index_def index definition */
+static
+void
+innodb_v_adjust_idx_col(
+ const Alter_inplace_info* ha_alter_info,
+ const TABLE* old_table,
+ ulint num_v_dropped,
+ index_def_t* index_def)
+{
+ List_iterator_fast<Create_field> cf_it(
+ ha_alter_info->alter_info->create_list);
+ for (ulint i = 0; i < index_def->n_fields; i++) {
+#ifdef UNIV_DEBUG
+ bool col_found = false;
+#endif /* UNIV_DEBUG */
+ ulint num_v = 0;
+
+ index_field_t* index_field = &index_def->fields[i];
+
+ /* Only adjust virtual column col_no, since non-virtual
+ column position (in non-vcol list) won't change unless
+ table rebuild */
+ if (!index_field->is_v_col) {
+ continue;
+ }
+
+ const Field* field = NULL;
+
+ cf_it.rewind();
+
+ /* Found the field in the new table */
+ while (const Create_field* new_field = cf_it++) {
+ if (new_field->stored_in_db()) {
+ continue;
+ }
+
+ field = new_field->field;
+
+ if (num_v == index_field->col_no) {
+ break;
+ }
+ num_v++;
+ }
+
+ if (!field) {
+ /* this means the field is a newly added field, this
+ should have been blocked when we drop virtual column
+ at the same time */
+ ut_ad(num_v_dropped > 0);
+ ut_a(0);
+ }
+
+ ut_ad(!field->stored_in_db());
+
+ num_v = 0;
+
+ /* Look for its position in old table */
+ for (uint old_i = 0; old_table->field[old_i]; old_i++) {
+ if (old_table->field[old_i] == field) {
+ /* Found it, adjust its col_no to its position
+ in old table */
+ index_def->fields[i].col_no = num_v;
+ ut_d(col_found = true);
+ break;
+ }
+
+ num_v += !old_table->field[old_i]->stored_in_db();
+ }
+
+ ut_ad(col_found);
+ }
+}
+
/** Update internal structures with concurrent writes blocked,
while preparing ALTER TABLE.
-@param ha_alter_info Data used during in-place alter
-@param altered_table MySQL table that is being altered
-@param old_table MySQL table as it is before the ALTER operation
-@param table_name Table name in MySQL
-@param flags Table and tablespace flags
-@param flags2 Additional table flags
-@param fts_doc_id_col The column number of FTS_DOC_ID
-@param add_fts_doc_id Flag: add column FTS_DOC_ID?
+@param ha_alter_info Data used during in-place alter
+@param altered_table MySQL table that is being altered
+@param old_table MySQL table as it is before the ALTER operation
+@param table_name Table name in MySQL
+@param flags Table and tablespace flags
+@param flags2 Additional table flags
+@param fts_doc_id_col The column number of FTS_DOC_ID
+@param add_fts_doc_id Flag: add column FTS_DOC_ID?
@param add_fts_doc_id_idx Flag: add index FTS_DOC_ID_INDEX (FTS_DOC_ID)?
-@retval true Failure
-@retval false Success
+@retval true Failure
+@retval false Success
*/
static MY_ATTRIBUTE((warn_unused_result, nonnull(1,2,3,4)))
bool
@@ -2729,7 +4313,7 @@ prepare_inplace_alter_table_dict(
ulint new_clustered = 0;
dberr_t error;
ulint num_fts_index;
- uint sql_idx;
+ dict_add_v_col_t* add_v = NULL;
ha_innobase_inplace_ctx*ctx;
DBUG_ENTER("prepare_inplace_alter_table_dict");
@@ -2751,7 +4335,48 @@ prepare_inplace_alter_table_dict(
user_table = ctx->new_table;
- trx_start_if_not_started_xa(ctx->prebuilt->trx);
+ trx_start_if_not_started_xa(ctx->prebuilt->trx, true);
+
+ if (ha_alter_info->handler_flags
+ & Alter_inplace_info::DROP_VIRTUAL_COLUMN) {
+ if (prepare_inplace_drop_virtual(
+ ha_alter_info, altered_table, old_table)) {
+ DBUG_RETURN(true);
+ }
+ }
+
+ if (ha_alter_info->handler_flags
+ & Alter_inplace_info::ADD_VIRTUAL_COLUMN) {
+ if (prepare_inplace_add_virtual(
+ ha_alter_info, altered_table, old_table)) {
+ DBUG_RETURN(true);
+ }
+
+ /* Need information for newly added virtual columns
+ for create index */
+
+ if (ha_alter_info->handler_flags
+ & Alter_inplace_info::ADD_INDEX) {
+ for (ulint i = 0; i < ctx->num_to_add_vcol; i++) {
+ /* Set mbminmax for newly added column */
+ dict_col_t& col = ctx->add_vcol[i].m_col;
+ ulint mbminlen, mbmaxlen;
+ dtype_get_mblen(col.mtype, col.prtype,
+ &mbminlen, &mbmaxlen);
+ col.mbminlen = mbminlen;
+ col.mbmaxlen = mbmaxlen;
+ }
+ add_v = static_cast<dict_add_v_col_t*>(
+ mem_heap_alloc(ctx->heap, sizeof *add_v));
+ add_v->n_v_col = ctx->num_to_add_vcol;
+ add_v->v_col = ctx->add_vcol;
+ add_v->v_col_name = ctx->add_vcol_name;
+ }
+ }
+
+ /* There should be no order change for virtual columns coming in
+ here */
+ ut_ad(check_v_col_in_order(old_table, altered_table, ha_alter_info));
/* Create a background transaction for the operations on
the data dictionary tables. */
@@ -2772,12 +4397,17 @@ prepare_inplace_alter_table_dict(
index_defs = innobase_create_key_defs(
ctx->heap, ha_alter_info, altered_table, ctx->num_to_add_index,
num_fts_index,
- row_table_got_default_clust_index(ctx->new_table),
+ dict_index_is_auto_gen_clust(dict_table_get_first_index(
+ ctx->new_table)),
fts_doc_id_col, add_fts_doc_id, add_fts_doc_id_idx,
old_table);
new_clustered = DICT_CLUSTERED & index_defs[0].ind_type;
+ create_table_info_t info(ctx->prebuilt->trx->mysql_thd, altered_table,
+ ha_alter_info->create_info, NULL, NULL,
+ srv_file_per_table);
+
if (num_fts_index > 1) {
my_error(ER_INNODB_FT_LIMIT, MYF(0));
goto error_handled;
@@ -2791,11 +4421,13 @@ prepare_inplace_alter_table_dict(
|| !innobase_fulltext_exist(altered_table))) {
/* InnoDB can perform an online operation (LOCK=NONE). */
} else {
+ size_t query_length;
/* This should have been blocked in
check_if_supported_inplace_alter(). */
ut_ad(0);
my_error(ER_NOT_SUPPORTED_YET, MYF(0),
- thd_query_string(ctx->prebuilt->trx->mysql_thd)->str);
+ innobase_get_stmt_unsafe(ctx->prebuilt->trx->mysql_thd,
+ &query_length));
goto error_handled;
}
@@ -2858,18 +4490,29 @@ prepare_inplace_alter_table_dict(
to rebuild the table with a temporary name. */
if (new_clustered) {
- const char* new_table_name
- = dict_mem_create_temporary_tablename(
- ctx->heap,
- ctx->new_table->name,
- ctx->new_table->id);
- ulint n_cols;
+ size_t dblen = ctx->old_table->name.dblen() + 1;
+ size_t tablen = altered_table->s->table_name.length;
+ const char* part = ctx->old_table->name.part();
+ size_t partlen = part ? strlen(part) : 0;
+ char* new_table_name = static_cast<char*>(
+ mem_heap_alloc(ctx->heap,
+ dblen + tablen + partlen + 1));
+ memcpy(new_table_name, ctx->old_table->name.m_name, dblen);
+ memcpy(new_table_name + dblen,
+ altered_table->s->table_name.str, tablen);
+ memcpy(new_table_name + dblen + tablen,
+ part ? part : "", partlen + 1);
+ ulint n_cols = 0;
+ ulint n_v_cols = 0;
dtuple_t* add_cols;
- ulint key_id = FIL_DEFAULT_ENCRYPTION_KEY;
+ ulint space_id = 0;
+ ulint z = 0;
+ uint32_t key_id = FIL_DEFAULT_ENCRYPTION_KEY;
fil_encryption_t mode = FIL_ENCRYPTION_DEFAULT;
- if (fil_space_t* space
- = fil_space_acquire(ctx->prebuilt->table->space)) {
+ if (dict_table_is_discarded(ctx->prebuilt->table)) {
+ } else if (fil_space_t* space
+ = fil_space_acquire(ctx->prebuilt->table->space)) {
if (const fil_space_crypt_t* crypt_data
= space->crypt_data) {
key_id = crypt_data->key_id;
@@ -2888,7 +4531,7 @@ prepare_inplace_alter_table_dict(
if (alt_opt.encryption != opt.encryption
|| alt_opt.encryption_key_id
!= opt.encryption_key_id) {
- key_id = alt_opt.encryption_key_id;
+ key_id = uint32_t(alt_opt.encryption_key_id);
mode = fil_encryption_t(alt_opt.encryption);
}
}
@@ -2899,7 +4542,17 @@ prepare_inplace_alter_table_dict(
goto new_clustered_failed;
}
- n_cols = altered_table->s->stored_fields;
+ for (uint i = 0; i < altered_table->s->fields; i++) {
+ const Field* field = altered_table->field[i];
+
+ if (!field->stored_in_db()) {
+ n_v_cols++;
+ } else {
+ n_cols++;
+ }
+ }
+
+ ut_ad(n_cols + n_v_cols == altered_table->s->fields);
if (add_fts_doc_id) {
n_cols++;
@@ -2921,9 +4574,12 @@ prepare_inplace_alter_table_dict(
goto new_clustered_failed;
}
- /* The initial space id 0 may be overridden later. */
+ /* The initial space id 0 may be overridden later if this
+ table is going to be a file_per_table tablespace. */
ctx->new_table = dict_mem_table_create(
- new_table_name, 0, n_cols, flags, flags2);
+ new_table_name, space_id, n_cols + n_v_cols, n_v_cols,
+ flags, flags2);
+
/* The rebuilt indexed_table will use the renamed
column names. */
ctx->col_names = NULL;
@@ -2934,12 +4590,8 @@ prepare_inplace_alter_table_dict(
user_table->data_dir_path);
}
- sql_idx= 0;
- for (uint i = 0; i < altered_table->s->stored_fields; i++, sql_idx++) {
- const Field* field;
- while (!((field= altered_table->field[sql_idx])->
- stored_in_db))
- sql_idx++;
+ for (uint i = 0; i < altered_table->s->fields; i++) {
+ const Field* field = altered_table->field[i];
ulint is_unsigned;
ulint field_type
= (ulint) field->type();
@@ -2948,6 +4600,7 @@ prepare_inplace_alter_table_dict(
&is_unsigned, field);
ulint charset_no;
ulint col_len;
+ const bool is_virtual = !field->stored_in_db();
/* we assume in dtype_form_prtype() that this
fits in two bytes */
@@ -2971,7 +4624,7 @@ prepare_inplace_alter_table_dict(
if (charset_no > MAX_CHAR_COLL_NUM) {
dict_mem_table_free(
ctx->new_table);
- my_error(ER_WRONG_KEY_COLUMN, MYF(0),
+ my_error(ER_WRONG_KEY_COLUMN, MYF(0), "InnoDB",
field->field_name);
goto new_clustered_failed;
}
@@ -2997,6 +4650,7 @@ prepare_inplace_alter_table_dict(
if (length_bytes == 2) {
field_type |= DATA_LONG_TRUE_VARCHAR;
}
+
}
if (dict_col_name_is_reserved(field->field_name)) {
@@ -3006,24 +4660,53 @@ prepare_inplace_alter_table_dict(
goto new_clustered_failed;
}
- dict_mem_table_add_col(
- ctx->new_table, ctx->heap,
- field->field_name,
- col_type,
- dtype_form_prtype(field_type, charset_no),
- col_len);
+ if (is_virtual) {
+ dict_mem_table_add_v_col(
+ ctx->new_table, ctx->heap,
+ field->field_name,
+ col_type,
+ dtype_form_prtype(
+ field_type, charset_no)
+ | DATA_VIRTUAL,
+ col_len, i, 0);
+ } else {
+ dict_mem_table_add_col(
+ ctx->new_table, ctx->heap,
+ field->field_name,
+ col_type,
+ dtype_form_prtype(
+ field_type, charset_no),
+ col_len);
+ }
+ }
+
+ if (n_v_cols) {
+ for (uint i = 0; i < altered_table->s->fields; i++) {
+ dict_v_col_t* v_col;
+ const Field* field = altered_table->field[i];
+
+ if (!!field->stored_in_db()) {
+ continue;
+ }
+ v_col = dict_table_get_nth_v_col(
+ ctx->new_table, z);
+ z++;
+ innodb_base_col_setup(
+ ctx->new_table, field, v_col);
+ }
}
if (add_fts_doc_id) {
fts_add_doc_id_column(ctx->new_table, ctx->heap);
ctx->new_table->fts->doc_col = fts_doc_id_col;
- ut_ad(fts_doc_id_col == altered_table->s->stored_fields);
+ ut_ad(fts_doc_id_col
+ == altered_table->s->fields - n_v_cols);
} else if (ctx->new_table->fts) {
ctx->new_table->fts->doc_col = fts_doc_id_col;
}
error = row_create_table_for_mysql(
- ctx->new_table, ctx->trx, false, mode, key_id);
+ ctx->new_table, ctx->trx, mode, key_id);
switch (error) {
dict_table_t* temp_table;
@@ -3035,13 +4718,13 @@ prepare_inplace_alter_table_dict(
the dict_sys->mutex. */
ut_ad(mutex_own(&dict_sys->mutex));
temp_table = dict_table_open_on_name(
- ctx->new_table->name, TRUE, FALSE,
+ ctx->new_table->name.m_name, TRUE, FALSE,
DICT_ERR_IGNORE_NONE);
ut_a(ctx->new_table == temp_table);
/* n_ref_count must be 1, because purge cannot
be executing on this very table as we are
holding dict_operation_lock X-latch. */
- DBUG_ASSERT(ctx->new_table->n_ref_count == 1);
+ DBUG_ASSERT(ctx->new_table->get_ref_count() == 1);
break;
case DB_TABLESPACE_EXISTS:
my_error(ER_TABLESPACE_EXISTS, MYF(0),
@@ -3051,13 +4734,17 @@ prepare_inplace_alter_table_dict(
my_error(HA_ERR_TABLE_EXIST, MYF(0),
altered_table->s->table_name.str);
goto new_clustered_failed;
+ case DB_UNSUPPORTED:
+ my_error(ER_UNSUPPORTED_EXTENSION, MYF(0),
+ ctx->new_table->name.m_name);
+ goto new_clustered_failed;
default:
my_error_innodb(error, table_name, flags);
- new_clustered_failed:
+new_clustered_failed:
DBUG_ASSERT(ctx->trx != ctx->prebuilt->trx);
trx_rollback_to_savepoint(ctx->trx, NULL);
- ut_ad(user_table->n_ref_count == 1);
+ ut_ad(user_table->get_ref_count() == 1);
online_retry_drop_indexes_with_trx(
user_table, ctx->trx);
@@ -3066,9 +4753,10 @@ prepare_inplace_alter_table_dict(
if (ha_alter_info->handler_flags
& Alter_inplace_info::ADD_COLUMN) {
- add_cols = dtuple_create(
+ add_cols = dtuple_create_with_vcol(
ctx->heap,
- dict_table_get_n_cols(ctx->new_table));
+ dict_table_get_n_cols(ctx->new_table),
+ dict_table_get_n_v_cols(ctx->new_table));
dict_table_copy_types(add_cols, ctx->new_table);
} else {
@@ -3085,12 +4773,44 @@ prepare_inplace_alter_table_dict(
DBUG_ASSERT(old_table->s->primary_key
== altered_table->s->primary_key);
+ for (dict_index_t* index
+ = dict_table_get_first_index(user_table);
+ index != NULL;
+ index = dict_table_get_next_index(index)) {
+ if (!index->to_be_dropped && index->is_corrupted()) {
+ my_error(ER_CHECK_NO_SUCH_TABLE, MYF(0));
+ goto error_handled;
+ }
+ }
+
+ for (dict_index_t* index
+ = dict_table_get_first_index(user_table);
+ index != NULL;
+ index = dict_table_get_next_index(index)) {
+ if (!index->to_be_dropped && index->is_corrupted()) {
+ my_error(ER_CHECK_NO_SUCH_TABLE, MYF(0));
+ goto error_handled;
+ }
+ }
+
if (!ctx->new_table->fts
&& innobase_fulltext_exist(altered_table)) {
ctx->new_table->fts = fts_create(
ctx->new_table);
ctx->new_table->fts->doc_col = fts_doc_id_col;
}
+
+ /* Check if we need to update mtypes of legacy GIS columns.
+ This check is only needed when we don't have to rebuild
+ the table, since rebuild would update all mtypes for GIS
+ columns */
+ error = innobase_check_gis_columns(
+ ha_alter_info, ctx->new_table, ctx->trx);
+ if (error != DB_SUCCESS) {
+ ut_ad(error == DB_ERROR);
+ error = DB_UNSUPPORTED;
+ goto error_handling;
+ }
}
/* Assign table_id, so that no table id of
@@ -3101,6 +4821,13 @@ prepare_inplace_alter_table_dict(
/* Create the indexes in SYS_INDEXES and load into dictionary. */
for (ulint a = 0; a < ctx->num_to_add_index; a++) {
+ if (index_defs[a].ind_type & DICT_VIRTUAL
+ && ctx->num_to_drop_vcol > 0 && !new_clustered) {
+ innodb_v_adjust_idx_col(ha_alter_info, old_table,
+ ctx->num_to_drop_vcol,
+ &index_defs[a]);
+ }
+
DBUG_EXECUTE_IF(
"create_index_metadata_fail",
if (a + 1 == ctx->num_to_add_index) {
@@ -3109,7 +4836,7 @@ prepare_inplace_alter_table_dict(
goto index_created;
});
ctx->add_index[a] = row_merge_create_index(
- ctx->trx, ctx->new_table, &index_defs[a]);
+ ctx->trx, ctx->new_table, &index_defs[a], add_v);
#ifndef DBUG_OFF
index_created:
#endif
@@ -3121,6 +4848,21 @@ index_created:
goto error_handling;
}
+ /* For ALTER TABLE...FORCE or OPTIMIZE TABLE, we may
+ only issue warnings, because there will be no schema change. */
+ if (!info.row_size_is_acceptable(
+ *ctx->add_index[a],
+ !!(ha_alter_info->handler_flags
+ & ~(INNOBASE_INPLACE_IGNORE
+ | INNOBASE_ALTER_NOVALIDATE
+ | Alter_inplace_info::RECREATE_TABLE)))) {
+ error = DB_TOO_BIG_RECORD;
+ goto error_handling;
+ }
+
+ DBUG_ASSERT(ctx->add_index[a]->is_committed()
+ == !!new_clustered);
+
if (ctx->add_index[a]->type & DICT_FTS) {
DBUG_ASSERT(num_fts_index);
DBUG_ASSERT(!fts_index);
@@ -3150,8 +4892,8 @@ index_created:
rw_lock_x_lock(&ctx->add_index[a]->lock);
bool ok = row_log_allocate(ctx->add_index[a],
- NULL, true, NULL,
- NULL, path);
+ NULL, true, NULL, NULL,
+ path);
rw_lock_x_unlock(&ctx->add_index[a]->lock);
if (!ok) {
@@ -3167,22 +4909,49 @@ index_created:
error = DB_OUT_OF_MEMORY;
goto error_handling;);
- if (new_clustered && ctx->online) {
- /* Allocate a log for online table rebuild. */
- dict_index_t* clust_index = dict_table_get_first_index(
+ if (new_clustered) {
+ dict_index_t* clust_index = dict_table_get_first_index(
user_table);
+ dict_index_t* new_clust_index = dict_table_get_first_index(
+ ctx->new_table);
+ ctx->skip_pk_sort = innobase_pk_order_preserved(
+ ctx->col_map, clust_index, new_clust_index);
+
+ DBUG_EXECUTE_IF("innodb_alter_table_pk_assert_no_sort",
+ DBUG_ASSERT(ctx->skip_pk_sort););
+
+ DBUG_ASSERT(!ctx->new_table->persistent_autoinc);
+ if (const Field* ai = altered_table->found_next_number_field) {
+ const unsigned col_no = innodb_col_no(ai);
+
+ ctx->new_table->persistent_autoinc = 1
+ + dict_table_get_nth_col_pos(
+ ctx->new_table, col_no, NULL);
+
+ /* Initialize the AUTO_INCREMENT sequence
+ to the rebuilt table from the old one. */
+ if (!old_table->found_next_number_field
+ || dict_table_is_discarded(user_table)) {
+ } else if (ib_uint64_t autoinc
+ = btr_read_autoinc(clust_index)) {
+ btr_write_autoinc(new_clust_index, autoinc);
+ }
+ }
- rw_lock_x_lock(&clust_index->lock);
- bool ok = row_log_allocate(
- clust_index, ctx->new_table,
- !(ha_alter_info->handler_flags
- & Alter_inplace_info::ADD_PK_INDEX),
- ctx->add_cols, ctx->col_map, path);
- rw_lock_x_unlock(&clust_index->lock);
+ if (ctx->online) {
+ /* Allocate a log for online table rebuild. */
+ rw_lock_x_lock(&clust_index->lock);
+ bool ok = row_log_allocate(
+ clust_index, ctx->new_table,
+ !(ha_alter_info->handler_flags
+ & Alter_inplace_info::ADD_PK_INDEX),
+ ctx->add_cols, ctx->col_map, path);
+ rw_lock_x_unlock(&clust_index->lock);
- if (!ok) {
- error = DB_OUT_OF_MEMORY;
- goto error_handling;
+ if (!ok) {
+ error = DB_OUT_OF_MEMORY;
+ goto error_handling;
+ }
}
}
@@ -3210,11 +4979,17 @@ op_ok:
#endif /* UNIV_DEBUG */
ut_ad(ctx->trx->dict_operation_lock_mode == RW_X_LATCH);
ut_ad(mutex_own(&dict_sys->mutex));
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
+ ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_X));
DICT_TF2_FLAG_SET(ctx->new_table, DICT_TF2_FTS);
+ if (new_clustered) {
+ /* For !new_clustered, this will be set at
+ commit_cache_norebuild(). */
+ ctx->new_table->fts_doc_id_index
+ = dict_table_get_index_on_name(
+ ctx->new_table, FTS_DOC_ID_INDEX_NAME);
+ DBUG_ASSERT(ctx->new_table->fts_doc_id_index != NULL);
+ }
/* This function will commit the transaction and reset
the trx_t::dict_operation flag on success. */
@@ -3235,7 +5010,7 @@ op_ok:
|| ib_vector_size(ctx->new_table->fts->indexes) == 0) {
error = fts_create_common_tables(
ctx->trx, ctx->new_table,
- user_table->name, TRUE);
+ user_table->name.m_name, TRUE);
DBUG_EXECUTE_IF(
"innodb_test_fail_after_fts_common_table",
@@ -3295,6 +5070,9 @@ error_handling:
case DB_DUPLICATE_KEY:
my_error(ER_DUP_KEY, MYF(0), "SYS_INDEXES");
break;
+ case DB_UNSUPPORTED:
+ my_error(ER_TABLE_CANT_HANDLE_SPKEYS, MYF(0), "SYS_COLUMNS");
+ break;
default:
my_error_innodb(error, table_name, user_table->flags);
}
@@ -3317,17 +5095,7 @@ error_handled:
ctx->new_table, ctx->trx);
}
- dict_table_close(ctx->new_table, TRUE, FALSE);
-
-#if defined UNIV_DEBUG || defined UNIV_DDL_DEBUG
- /* Nobody should have initialized the stats of the
- newly created table yet. When this is the case, we
- know that it has not been added for background stats
- gathering. */
- ut_a(!ctx->new_table->stat_initialized);
-#endif /* UNIV_DEBUG || UNIV_DDL_DEBUG */
-
- row_merge_drop_table(ctx->trx, ctx->new_table);
+ dict_table_close_and_drop(ctx->trx, ctx->new_table);
/* Free the log for online table rebuild, if
one was allocated. */
@@ -3351,7 +5119,7 @@ error_handled:
/* n_ref_count must be 1, because purge cannot
be executing on this very table as we are
holding dict_operation_lock X-latch. */
- DBUG_ASSERT(user_table->n_ref_count == 1 || ctx->online);
+ DBUG_ASSERT(user_table->get_ref_count() == 1 || ctx->online);
online_retry_drop_indexes_with_trx(user_table, ctx->trx);
} else {
@@ -3366,7 +5134,7 @@ error_handled:
err_exit:
/* Clear the to_be_dropped flag in the data dictionary cache. */
for (ulint i = 0; i < ctx->num_to_drop_index; i++) {
- DBUG_ASSERT(*ctx->drop_index[i]->name != TEMP_INDEX_PREFIX);
+ DBUG_ASSERT(ctx->drop_index[i]->is_committed());
DBUG_ASSERT(ctx->drop_index[i]->to_be_dropped);
ctx->drop_index[i]->to_be_dropped = 0;
}
@@ -3403,9 +5171,6 @@ innobase_check_foreign_key_index(
ulint n_drop_fk) /*!< in: Number of foreign keys
to drop */
{
- ut_ad(index != NULL);
- ut_ad(indexed_table != NULL);
-
const dict_foreign_set* fks = &indexed_table->referenced_set;
/* Check for all FK references from other tables to the index. */
@@ -3429,8 +5194,8 @@ innobase_check_foreign_key_index(
foreign->referenced_col_names,
foreign->n_fields,
ha_alter_info->key_info_buffer,
- ha_alter_info->index_add_buffer,
- ha_alter_info->index_add_count)) {
+ span<uint>(ha_alter_info->index_add_buffer,
+ ha_alter_info->index_add_count))) {
/* Index cannot be dropped. */
trx->error_info = index;
@@ -3464,8 +5229,8 @@ innobase_check_foreign_key_index(
foreign->foreign_col_names,
foreign->n_fields,
ha_alter_info->key_info_buffer,
- ha_alter_info->index_add_buffer,
- ha_alter_info->index_add_count)) {
+ span<uint>(ha_alter_info->index_add_buffer,
+ ha_alter_info->index_add_count))) {
/* Index cannot be dropped. */
trx->error_info = index;
@@ -3476,19 +5241,75 @@ innobase_check_foreign_key_index(
return(false);
}
+
+/** Fill the stored column information in s_cols list.
+@param[in] altered_table mysql table object
+@param[in] table innodb table object
+@param[out] s_cols list of stored column
+@param[out] s_heap heap for storing stored
+column information. */
+static
+void
+alter_fill_stored_column(
+ const TABLE* altered_table,
+ dict_table_t* table,
+ dict_s_col_list** s_cols,
+ mem_heap_t** s_heap)
+{
+ ulint n_cols = altered_table->s->fields;
+ ulint stored_col_no = 0;
+
+ for (ulint i = 0; i < n_cols; i++) {
+ Field* field = altered_table->field[i];
+ dict_s_col_t s_col;
+
+ if (field->stored_in_db()) {
+ stored_col_no++;
+ }
+
+ if (!innobase_is_s_fld(field)) {
+ continue;
+ }
+
+ ulint num_base = 0;
+ dict_col_t* col = dict_table_get_nth_col(table,
+ stored_col_no);
+
+ s_col.m_col = col;
+ s_col.s_pos = i;
+
+ if (*s_cols == NULL) {
+ *s_cols = UT_NEW_NOKEY(dict_s_col_list());
+ *s_heap = mem_heap_create(1000);
+ }
+
+ if (num_base != 0) {
+ s_col.base_col = static_cast<dict_col_t**>(mem_heap_zalloc(
+ *s_heap, num_base * sizeof(dict_col_t*)));
+ } else {
+ s_col.base_col = NULL;
+ }
+
+ s_col.num_base = num_base;
+ innodb_base_col_setup_for_stored(table, field, &s_col);
+ (*s_cols)->push_back(s_col);
+ }
+}
+
+
/** Allows InnoDB to update internal structures with concurrent
writes blocked (provided that check_if_supported_inplace_alter()
did not return HA_ALTER_INPLACE_NO_LOCK).
This will be invoked before inplace_alter_table().
-@param altered_table TABLE object for new version of table.
-@param ha_alter_info Structure describing changes to be done
+@param altered_table TABLE object for new version of table.
+@param ha_alter_info Structure describing changes to be done
by ALTER TABLE and holding data used during in-place alter.
-@retval true Failure
-@retval false Success
+@retval true Failure
+@retval false Success
*/
-UNIV_INTERN
+
bool
ha_innobase::prepare_inplace_alter_table(
/*=====================================*/
@@ -3497,16 +5318,16 @@ ha_innobase::prepare_inplace_alter_table(
{
dict_index_t** drop_index; /*!< Index to be dropped */
ulint n_drop_index; /*!< Number of indexes to drop */
+ dict_index_t** rename_index; /*!< Indexes to be dropped */
+ ulint n_rename_index; /*!< Number of indexes to rename */
dict_foreign_t**drop_fk; /*!< Foreign key constraints to drop */
ulint n_drop_fk; /*!< Number of foreign keys to drop */
dict_foreign_t**add_fk = NULL; /*!< Foreign key constraints to drop */
ulint n_add_fk; /*!< Number of foreign keys to drop */
dict_table_t* indexed_table; /*!< Table where indexes are created */
- mem_heap_t* heap;
+ mem_heap_t* heap;
const char** col_names;
int error;
- ulint flags;
- ulint flags2;
ulint max_col_len;
ulint add_autoinc_col_no = ULINT_UNDEFINED;
ulonglong autoinc_col_max_value = 0;
@@ -3514,6 +5335,8 @@ ha_innobase::prepare_inplace_alter_table(
bool add_fts_doc_id = false;
bool add_fts_doc_id_idx = false;
bool add_fts_idx = false;
+ dict_s_col_list*s_cols = NULL;
+ mem_heap_t* s_heap = NULL;
DBUG_ENTER("prepare_inplace_alter_table");
DBUG_ASSERT(!ha_alter_info->handler_ctx);
@@ -3528,7 +5351,7 @@ ha_innobase::prepare_inplace_alter_table(
MONITOR_ATOMIC_INC(MONITOR_PENDING_ALTER_TABLE);
#ifdef UNIV_DEBUG
- for (dict_index_t* index = dict_table_get_first_index(prebuilt->table);
+ for (dict_index_t* index = dict_table_get_first_index(m_prebuilt->table);
index;
index = dict_table_get_next_index(index)) {
ut_ad(!index->to_be_dropped);
@@ -3537,15 +5360,42 @@ ha_innobase::prepare_inplace_alter_table(
ut_d(mutex_enter(&dict_sys->mutex));
ut_d(dict_table_check_for_dup_indexes(
- prebuilt->table, CHECK_ABORTED_OK));
+ m_prebuilt->table, CHECK_ABORTED_OK));
ut_d(mutex_exit(&dict_sys->mutex));
if (!(ha_alter_info->handler_flags & ~INNOBASE_INPLACE_IGNORE)) {
/* Nothing to do */
- goto func_exit;
+ DBUG_ASSERT(m_prebuilt->trx->dict_operation_lock_mode == 0);
+ if (ha_alter_info->handler_flags & ~INNOBASE_INPLACE_IGNORE) {
+
+ online_retry_drop_indexes(
+ m_prebuilt->table, m_user_thd);
+
+ }
+ DBUG_RETURN(false);
}
- indexed_table = prebuilt->table;
+ indexed_table = m_prebuilt->table;
+
+ /* ALTER TABLE will not implicitly move a table from a single-table
+ tablespace to the system tablespace when innodb_file_per_table=OFF.
+ But it will implicitly move a table from the system tablespace to a
+ single-table tablespace if innodb_file_per_table = ON. */
+
+ create_table_info_t info(m_user_thd,
+ altered_table,
+ ha_alter_info->create_info,
+ NULL,
+ NULL,
+ srv_file_per_table);
+
+ info.set_tablespace_type(indexed_table->space != TRX_SYS_SPACE);
+
+ if (ha_alter_info->handler_flags & Alter_inplace_info::ADD_INDEX) {
+ if (info.gcols_in_fulltext_or_spatial()) {
+ goto err_exit_no_heap;
+ }
+ }
if (indexed_table->is_readable()) {
} else {
@@ -3557,15 +5407,16 @@ ha_innobase::prepare_inplace_alter_table(
if (space()) {
String str;
const char* engine= table_type();
- char buf[MAX_FULL_NAME_LEN];
- ut_format_name(indexed_table->name, TRUE, buf, sizeof(buf));
- push_warning_printf(user_thd, Sql_condition::WARN_LEVEL_WARN,
+ push_warning_printf(
+ m_user_thd,
+ Sql_condition::WARN_LEVEL_WARN,
HA_ERR_DECRYPTION_FAILED,
"Table %s in file %s is encrypted but encryption service or"
" used key_id is not available. "
" Can't continue reading table.",
- buf, space()->chain.start->name);
+ table_share->table_name.str,
+ space()->chain.start->name);
my_error(ER_GET_ERRMSG, MYF(0), HA_ERR_DECRYPTION_FAILED, str.c_ptr(), engine);
DBUG_RETURN(true);
@@ -3575,30 +5426,21 @@ ha_innobase::prepare_inplace_alter_table(
if (indexed_table->corrupted
|| dict_table_get_first_index(indexed_table) == NULL
- || dict_index_is_corrupted(
- dict_table_get_first_index(indexed_table))) {
+ || dict_table_get_first_index(indexed_table)->is_corrupted()) {
/* The clustered index is corrupted. */
my_error(ER_CHECK_NO_SUCH_TABLE, MYF(0));
DBUG_RETURN(true);
- }
+ } else {
+ const char* invalid_opt = info.create_options_are_invalid();
- if (ha_alter_info->handler_flags
- & Alter_inplace_info::CHANGE_CREATE_OPTION) {
/* Check engine specific table options */
- if (const char* invalid_tbopt = check_table_options(
- user_thd, altered_table,
- ha_alter_info->create_info,
- prebuilt->table->space != 0,
- srv_file_format)) {
+ if (const char* invalid_tbopt = info.check_table_options()) {
my_error(ER_ILLEGAL_HA_CREATE_OPTION, MYF(0),
table_type(), invalid_tbopt);
goto err_exit_no_heap;
}
- if (const char* invalid_opt = create_options_are_invalid(
- user_thd, altered_table,
- ha_alter_info->create_info,
- prebuilt->table->space != 0)) {
+ if (invalid_opt) {
my_error(ER_ILLEGAL_HA_CREATE_OPTION, MYF(0),
table_type(), invalid_opt);
goto err_exit_no_heap;
@@ -3607,18 +5449,20 @@ ha_innobase::prepare_inplace_alter_table(
/* Check if any index name is reserved. */
if (innobase_index_name_is_reserved(
- user_thd,
+ m_user_thd,
ha_alter_info->key_info_buffer,
ha_alter_info->key_count)) {
err_exit_no_heap:
- DBUG_ASSERT(prebuilt->trx->dict_operation_lock_mode == 0);
+ DBUG_ASSERT(m_prebuilt->trx->dict_operation_lock_mode == 0);
if (ha_alter_info->handler_flags & ~INNOBASE_INPLACE_IGNORE) {
- online_retry_drop_indexes(prebuilt->table, user_thd);
+
+ online_retry_drop_indexes(
+ m_prebuilt->table, m_user_thd);
}
DBUG_RETURN(true);
}
- indexed_table = prebuilt->table;
+ indexed_table = m_prebuilt->table;
/* Check that index keys are sensible */
error = innobase_check_index_keys(ha_alter_info, indexed_table);
@@ -3663,16 +5507,18 @@ check_if_ok_to_rename:
}
/* Prohibit renaming a column to an internal column. */
- const char* s = prebuilt->table->col_names;
+ const char* s = m_prebuilt->table->col_names;
unsigned j;
/* Skip user columns.
MySQL should have checked these already.
We want to allow renaming of c1 to c2, c2 to c1. */
for (j = 0; j < table->s->fields; j++) {
- s += strlen(s) + 1;
+ if (table->field[j]->stored_in_db()) {
+ s += strlen(s) + 1;
+ }
}
- for (; j < prebuilt->table->n_def; j++) {
+ for (; j < m_prebuilt->table->n_def; j++) {
if (!my_strcasecmp(
system_charset_info, name, s)) {
my_error(ER_WRONG_COLUMN_NAME, MYF(0),
@@ -3685,22 +5531,18 @@ check_if_ok_to_rename:
}
}
- if (!innobase_table_flags(altered_table,
- ha_alter_info->create_info,
- user_thd,
- srv_file_per_table
- || indexed_table->space != 0,
- &flags, &flags2)) {
+ if (!info.innobase_table_flags()) {
goto err_exit_no_heap;
}
- /* Preserve this flag, because it currenlty can't be changed during
- ALTER TABLE*/
- if (flags2 & DICT_TF2_USE_TABLESPACE) {
- flags |= prebuilt->table->flags & 1U << DICT_TF_POS_DATA_DIR;
+ if (info.flags2() & DICT_TF2_USE_FILE_PER_TABLE) {
+ /* Preserve the DATA DIRECTORY attribute, because it
+ currently cannot be changed during ALTER TABLE. */
+ info.flags_set(m_prebuilt->table->flags
+ & 1U << DICT_TF_POS_DATA_DIR);
}
- max_col_len = DICT_MAX_FIELD_LEN_BY_FORMAT_FLAG(flags);
+ max_col_len = DICT_MAX_FIELD_LEN_BY_FORMAT_FLAG(info.flags());
/* Check each index's column length to make sure they do not
exceed limit */
@@ -3720,7 +5562,7 @@ check_if_ok_to_rename:
continue;
}
- if (innobase_check_column_length(max_col_len, key)) {
+ if (too_big_key_part_length(max_col_len, *key)) {
my_error(ER_INDEX_COLUMN_TOO_LONG, MYF(0),
max_col_len);
goto err_exit_no_heap;
@@ -3751,11 +5593,11 @@ check_if_ok_to_rename:
/* We need to drop any corrupted fts indexes
before we add a new fts index. */
if (add_fts_idx && index->type & DICT_CORRUPT) {
- ib_errf(user_thd, IB_LOG_LEVEL_ERROR,
+ ib_errf(m_user_thd, IB_LOG_LEVEL_ERROR,
ER_INNODB_INDEX_CORRUPT,
"Fulltext index '%s' is corrupt. "
"you should drop this index first.",
- index->name);
+ index->name());
goto err_exit_no_heap;
}
@@ -3815,8 +5657,8 @@ check_if_ok_to_rename:
dict_foreign_t* foreign;
for (dict_foreign_set::iterator it
- = prebuilt->table->foreign_set.begin();
- it != prebuilt->table->foreign_set.end();
+ = m_prebuilt->table->foreign_set.begin();
+ it != m_prebuilt->table->foreign_set.end();
++it) {
foreign = *it;
@@ -3835,7 +5677,7 @@ check_if_ok_to_rename:
}
my_error(ER_CANT_DROP_FIELD_OR_KEY, MYF(0),
- drop->name);
+ drop->type_name(), drop->name);
goto err_exit;
found_fk:
for (ulint i = n_drop_fk; i--; ) {
@@ -3849,6 +5691,7 @@ dup_fk:
}
DBUG_ASSERT(n_drop_fk > 0);
+
DBUG_ASSERT(n_drop_fk
<= ha_alter_info->alter_info->drop_list.elements);
} else {
@@ -3872,19 +5715,19 @@ dup_fk:
const KEY* key
= ha_alter_info->index_drop_buffer[i];
dict_index_t* index
- = dict_table_get_index_on_name_and_min_id(
+ = dict_table_get_index_on_name(
indexed_table, key->name);
if (!index) {
push_warning_printf(
- user_thd,
+ m_user_thd,
Sql_condition::WARN_LEVEL_WARN,
HA_ERR_WRONG_INDEX,
- "InnoDB could not find key "
- "with name %s", key->name);
+ "InnoDB could not find key"
+ " with name %s", key->name);
} else {
ut_ad(!index->to_be_dropped);
- if (!dict_index_is_clust(index)) {
+ if (!index->is_primary()) {
drop_index[n_drop_index++] = index;
} else {
drop_primary = index;
@@ -3901,8 +5744,8 @@ dup_fk:
&& !DICT_TF2_FLAG_IS_SET(
indexed_table, DICT_TF2_FTS_HAS_DOC_ID)) {
dict_index_t* fts_doc_index
- = dict_table_get_index_on_name(
- indexed_table, FTS_DOC_ID_INDEX_NAME);
+ = indexed_table->fts_doc_id_index;
+ ut_ad(fts_doc_index);
// Add some fault tolerance for non-debug builds.
if (fts_doc_index == NULL) {
@@ -3932,7 +5775,7 @@ check_if_can_drop_indexes:
/* Prevent a race condition between DROP INDEX and
CREATE TABLE adding FOREIGN KEY constraints. */
- row_mysql_lock_data_dictionary(prebuilt->trx);
+ row_mysql_lock_data_dictionary(m_prebuilt->trx);
if (!n_drop_index) {
drop_index = NULL;
@@ -3944,19 +5787,19 @@ check_if_can_drop_indexes:
}
}
- if (prebuilt->trx->check_foreigns) {
+ if (m_prebuilt->trx->check_foreigns) {
for (uint i = 0; i < n_drop_index; i++) {
- dict_index_t* index = drop_index[i];
+ dict_index_t* index = drop_index[i];
if (innobase_check_foreign_key_index(
- ha_alter_info, index,
- indexed_table, col_names,
- prebuilt->trx, drop_fk, n_drop_fk)) {
+ ha_alter_info, index,
+ indexed_table, col_names,
+ m_prebuilt->trx, drop_fk, n_drop_fk)) {
row_mysql_unlock_data_dictionary(
- prebuilt->trx);
- prebuilt->trx->error_info = index;
+ m_prebuilt->trx);
+ m_prebuilt->trx->error_info = index;
print_error(HA_ERR_DROP_INDEX_FK,
- MYF(0));
+ MYF(0));
goto err_exit;
}
}
@@ -3964,17 +5807,17 @@ check_if_can_drop_indexes:
/* If a primary index is dropped, need to check
any depending foreign constraints get affected */
if (drop_primary
- && innobase_check_foreign_key_index(
- ha_alter_info, drop_primary,
- indexed_table, col_names,
- prebuilt->trx, drop_fk, n_drop_fk)) {
- row_mysql_unlock_data_dictionary(prebuilt->trx);
+ && innobase_check_foreign_key_index(
+ ha_alter_info, drop_primary,
+ indexed_table, col_names,
+ m_prebuilt->trx, drop_fk, n_drop_fk)) {
+ row_mysql_unlock_data_dictionary(m_prebuilt->trx);
print_error(HA_ERR_DROP_INDEX_FK, MYF(0));
goto err_exit;
}
}
- row_mysql_unlock_data_dictionary(prebuilt->trx);
+ row_mysql_unlock_data_dictionary(m_prebuilt->trx);
} else {
drop_index = NULL;
}
@@ -3985,23 +5828,25 @@ check_if_can_drop_indexes:
for (dict_index_t* index = dict_table_get_first_index(indexed_table);
index != NULL; index = dict_table_get_next_index(index)) {
- if (!index->to_be_dropped && dict_index_is_corrupted(index)) {
- char index_name[MAX_FULL_NAME_LEN + 1];
-
- innobase_format_name(index_name, sizeof index_name,
- index->name, TRUE);
-
- my_error(ER_INDEX_CORRUPT, MYF(0), index_name);
+ if (!index->to_be_dropped && index->is_committed()
+ && index->is_corrupted()) {
+ my_error(ER_INDEX_CORRUPT, MYF(0), index->name());
goto err_exit;
}
}
}
+ n_rename_index = 0;
+ rename_index = NULL;
+
n_add_fk = 0;
if (ha_alter_info->handler_flags
& Alter_inplace_info::ADD_FOREIGN_KEY) {
- ut_ad(!prebuilt->trx->check_foreigns);
+ ut_ad(!m_prebuilt->trx->check_foreigns);
+
+ alter_fill_stored_column(altered_table, m_prebuilt->table,
+ &s_cols, &s_heap);
add_fk = static_cast<dict_foreign_t**>(
mem_heap_zalloc(
@@ -4011,30 +5856,40 @@ check_if_can_drop_indexes:
if (!innobase_get_foreign_key_info(
ha_alter_info, table_share,
- prebuilt->table, col_names,
+ m_prebuilt->table, col_names,
drop_index, n_drop_index,
- add_fk, &n_add_fk, prebuilt->trx)) {
+ add_fk, &n_add_fk, m_prebuilt->trx, s_cols)) {
err_exit:
if (n_drop_index) {
- row_mysql_lock_data_dictionary(prebuilt->trx);
+ row_mysql_lock_data_dictionary(m_prebuilt->trx);
/* Clear the to_be_dropped flags, which might
have been set at this point. */
for (ulint i = 0; i < n_drop_index; i++) {
- DBUG_ASSERT(*drop_index[i]->name
- != TEMP_INDEX_PREFIX);
+ ut_ad(drop_index[i]->is_committed());
drop_index[i]->to_be_dropped = 0;
}
- row_mysql_unlock_data_dictionary(prebuilt->trx);
+ row_mysql_unlock_data_dictionary(
+ m_prebuilt->trx);
}
if (heap) {
mem_heap_free(heap);
}
+ if (s_cols != NULL) {
+ UT_DELETE(s_cols);
+ mem_heap_free(s_heap);
+ }
+
goto err_exit_no_heap;
}
+
+ if (s_cols != NULL) {
+ UT_DELETE(s_cols);
+ mem_heap_free(s_heap);
+ }
}
if (!(ha_alter_info->handler_flags & INNOBASE_ALTER_DATA)
@@ -4045,20 +5900,38 @@ err_exit:
if (heap) {
ha_alter_info->handler_ctx
= new ha_innobase_inplace_ctx(
- prebuilt,
+ m_prebuilt,
drop_index, n_drop_index,
+ rename_index, n_rename_index,
drop_fk, n_drop_fk,
add_fk, n_add_fk,
ha_alter_info->online,
heap, indexed_table,
- col_names, ULINT_UNDEFINED, 0, 0);
+ col_names, ULINT_UNDEFINED, 0, 0, 0);
}
-func_exit:
- DBUG_ASSERT(prebuilt->trx->dict_operation_lock_mode == 0);
+ DBUG_ASSERT(m_prebuilt->trx->dict_operation_lock_mode == 0);
if (ha_alter_info->handler_flags & ~INNOBASE_INPLACE_IGNORE) {
- online_retry_drop_indexes(prebuilt->table, user_thd);
+
+ online_retry_drop_indexes(
+ m_prebuilt->table, m_user_thd);
+
+ }
+
+ if ((ha_alter_info->handler_flags
+ & Alter_inplace_info::DROP_VIRTUAL_COLUMN)
+ && prepare_inplace_drop_virtual(
+ ha_alter_info, altered_table, table)) {
+ DBUG_RETURN(true);
+ }
+
+ if ((ha_alter_info->handler_flags
+ & Alter_inplace_info::ADD_VIRTUAL_COLUMN)
+ && prepare_inplace_add_virtual(
+ ha_alter_info, altered_table, table)) {
+ DBUG_RETURN(true);
}
+
DBUG_RETURN(false);
}
@@ -4067,25 +5940,29 @@ func_exit:
add a Doc ID hidden column and rebuild the primary index */
if (innobase_fulltext_exist(altered_table)) {
ulint doc_col_no;
+ ulint num_v = 0;
if (!innobase_fts_check_doc_id_col(
- prebuilt->table, altered_table, &fts_doc_col_no)) {
- fts_doc_col_no = altered_table->s->stored_fields;
+ m_prebuilt->table,
+ altered_table, &fts_doc_col_no, &num_v)) {
+
+ fts_doc_col_no = altered_table->s->fields - num_v;
add_fts_doc_id = true;
add_fts_doc_id_idx = true;
push_warning_printf(
- user_thd,
+ m_user_thd,
Sql_condition::WARN_LEVEL_WARN,
HA_ERR_WRONG_INDEX,
- "InnoDB rebuilding table to add column "
- FTS_DOC_ID_COL_NAME);
+ "InnoDB rebuilding table to add"
+ " column " FTS_DOC_ID_COL_NAME);
} else if (fts_doc_col_no == ULINT_UNDEFINED) {
goto err_exit;
}
switch (innobase_fts_check_doc_id_index(
- prebuilt->table, altered_table, &doc_col_no)) {
+ m_prebuilt->table, altered_table,
+ &doc_col_no)) {
case FTS_NOT_EXIST_DOC_ID_INDEX:
add_fts_doc_id_idx = true;
break;
@@ -4094,32 +5971,27 @@ func_exit:
FTS_DOC_ID_INDEX_NAME);
goto err_exit;
case FTS_EXIST_DOC_ID_INDEX:
- DBUG_ASSERT(doc_col_no == fts_doc_col_no
- || doc_col_no == ULINT_UNDEFINED
- || (ha_alter_info->handler_flags
- & (Alter_inplace_info::ALTER_COLUMN_ORDER
- | Alter_inplace_info::DROP_COLUMN
- | Alter_inplace_info::ADD_COLUMN)));
+ DBUG_ASSERT(
+ doc_col_no == fts_doc_col_no
+ || doc_col_no == ULINT_UNDEFINED
+ || (ha_alter_info->handler_flags
+ & (Alter_inplace_info::ALTER_STORED_COLUMN_ORDER
+ | Alter_inplace_info::DROP_STORED_COLUMN
+ | Alter_inplace_info::ADD_STORED_BASE_COLUMN)));
}
}
/* See if an AUTO_INCREMENT column was added. */
- uint i = 0, innodb_idx= 0;
+ uint i = 0;
+ ulint num_v = 0;
List_iterator_fast<Create_field> cf_it(
ha_alter_info->alter_info->create_list);
while (const Create_field* new_field = cf_it++) {
const Field* field;
- if (!new_field->stored_in_db) {
- i++;
- continue;
- }
DBUG_ASSERT(i < altered_table->s->fields);
- DBUG_ASSERT(innodb_idx < altered_table->s->stored_fields);
for (uint old_i = 0; table->field[old_i]; old_i++) {
- if (!table->field[old_i]->stored_in_db)
- continue;
if (new_field->field == table->field[old_i]) {
goto found_col;
}
@@ -4143,38 +6015,105 @@ func_exit:
my_error(ER_WRONG_AUTO_KEY, MYF(0));
goto err_exit;
}
- add_autoinc_col_no = innodb_idx;
- autoinc_col_max_value = innobase_get_int_col_max_value(
- field);
+ /* Get the col no of the old table non-virtual column array */
+ add_autoinc_col_no = i - num_v;
+
+ autoinc_col_max_value = innobase_get_int_col_max_value(field);
}
found_col:
+ num_v += !new_field->stored_in_db();
i++;
- innodb_idx++;
}
DBUG_ASSERT(heap);
- DBUG_ASSERT(user_thd == prebuilt->trx->mysql_thd);
+ DBUG_ASSERT(m_user_thd == m_prebuilt->trx->mysql_thd);
DBUG_ASSERT(!ha_alter_info->handler_ctx);
ha_alter_info->handler_ctx = new ha_innobase_inplace_ctx(
- prebuilt,
+ m_prebuilt,
drop_index, n_drop_index,
+ rename_index, n_rename_index,
drop_fk, n_drop_fk, add_fk, n_add_fk,
ha_alter_info->online,
- heap, prebuilt->table, col_names,
+ heap, m_prebuilt->table, col_names,
add_autoinc_col_no,
ha_alter_info->create_info->auto_increment_value,
- autoinc_col_max_value);
+ autoinc_col_max_value, 0);
DBUG_RETURN(prepare_inplace_alter_table_dict(
ha_alter_info, altered_table, table,
table_share->table_name.str,
- flags, flags2,
+ info.flags(), info.flags2(),
fts_doc_col_no, add_fts_doc_id,
add_fts_doc_id_idx));
}
+/** Check that the column is part of a virtual index(index contains
+virtual column) in the table
+@param[in] table Table containing column
+@param[in] col column to be checked
+@return true if this column is indexed with other virtual columns */
+static
+bool
+dict_col_in_v_indexes(
+ dict_table_t* table,
+ dict_col_t* col)
+{
+ for (dict_index_t* index = dict_table_get_next_index(
+ dict_table_get_first_index(table)); index != NULL;
+ index = dict_table_get_next_index(index)) {
+ if (!dict_index_has_virtual(index)) {
+ continue;
+ }
+ for (ulint k = 0; k < index->n_fields; k++) {
+ dict_field_t* field
+ = dict_index_get_nth_field(index, k);
+ if (field->col->ind == col->ind) {
+ return(true);
+ }
+ }
+ }
+
+ return(false);
+}
+
+/* Check whether a columnn length change alter operation requires
+to rebuild the template.
+@param[in] altered_table TABLE object for new version of table.
+@param[in] ha_alter_info Structure describing changes to be done
+ by ALTER TABLE and holding data used
+ during in-place alter.
+@param[in] table table being altered
+@return TRUE if needs rebuild. */
+static
+bool
+alter_templ_needs_rebuild(
+ TABLE* altered_table,
+ Alter_inplace_info* ha_alter_info,
+ dict_table_t* table)
+{
+ ulint i = 0;
+ List_iterator_fast<Create_field> cf_it(
+ ha_alter_info->alter_info->create_list);
+
+ for (Field** fp = altered_table->field; *fp; fp++, i++) {
+ cf_it.rewind();
+ while (const Create_field* cf = cf_it++) {
+ for (ulint j=0; j < table->n_cols; j++) {
+ dict_col_t* cols
+ = dict_table_get_nth_col(table, j);
+ if (cf->length > cols->len
+ && dict_col_in_v_indexes(table, cols)) {
+ return(true);
+ }
+ }
+ }
+ }
+
+ return(false);
+}
+
/** Get the name of an erroneous key.
@param[in] error_key_num InnoDB number of the erroneus key
@param[in] ha_alter_info changes that were being performed
@@ -4201,35 +6140,38 @@ specified using Alter_inplace_info.
The level of concurrency allowed during this operation depends
on the return value from check_if_supported_inplace_alter().
-@param altered_table TABLE object for new version of table.
-@param ha_alter_info Structure describing changes to be done
+@param altered_table TABLE object for new version of table.
+@param ha_alter_info Structure describing changes to be done
by ALTER TABLE and holding data used during in-place alter.
-@retval true Failure
-@retval false Success
+@retval true Failure
+@retval false Success
*/
-UNIV_INTERN
+
bool
ha_innobase::inplace_alter_table(
/*=============================*/
TABLE* altered_table,
Alter_inplace_info* ha_alter_info)
{
- dberr_t error;
-
+ dberr_t error;
+ dict_add_v_col_t* add_v = NULL;
+ dict_vcol_templ_t* s_templ = NULL;
+ dict_vcol_templ_t* old_templ = NULL;
+ struct TABLE* eval_table = altered_table;
+ bool rebuild_templ = false;
DBUG_ENTER("inplace_alter_table");
DBUG_ASSERT(!srv_read_only_mode);
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(!rw_lock_own(&dict_operation_lock, RW_LOCK_EX));
- ut_ad(!rw_lock_own(&dict_operation_lock, RW_LOCK_SHARED));
-#endif /* UNIV_SYNC_DEBUG */
+ ut_ad(!sync_check_iterate(sync_check()));
+ ut_ad(!rw_lock_own_flagged(&dict_operation_lock,
+ RW_LOCK_FLAG_X | RW_LOCK_FLAG_S));
- DEBUG_SYNC(user_thd, "innodb_inplace_alter_table_enter");
+ DEBUG_SYNC(m_user_thd, "innodb_inplace_alter_table_enter");
if (!(ha_alter_info->handler_flags & INNOBASE_ALTER_DATA)) {
ok_exit:
- DEBUG_SYNC(user_thd, "innodb_after_inplace_alter_table");
+ DEBUG_SYNC(m_user_thd, "innodb_after_inplace_alter_table");
DBUG_RETURN(false);
}
@@ -4245,13 +6187,76 @@ ok_exit:
DBUG_ASSERT(ctx);
DBUG_ASSERT(ctx->trx);
- DBUG_ASSERT(ctx->prebuilt == prebuilt);
+ DBUG_ASSERT(ctx->prebuilt == m_prebuilt);
+
+ dict_index_t* pk = dict_table_get_first_index(m_prebuilt->table);
+ ut_ad(pk != NULL);
+
+ /* For partitioned tables this could be already allocated from a
+ previous partition invocation. For normal tables this is NULL. */
+ UT_DELETE(ctx->m_stage);
- if (prebuilt->table->file_unreadable
- || dict_table_is_discarded(prebuilt->table)) {
+ ctx->m_stage = UT_NEW_NOKEY(ut_stage_alter_t(pk));
+
+ if (!m_prebuilt->table->is_readable()
+ || dict_table_is_discarded(m_prebuilt->table)) {
goto all_done;
}
+ /* If we are doing a table rebuilding or having added virtual
+ columns in the same clause, we will need to build a table template
+ that carries translation information between MySQL TABLE and InnoDB
+ table, which indicates the virtual columns and their base columns
+ info. This is used to do the computation callback, so that the
+ data in base columns can be extracted send to server.
+ If the Column length changes and it is a part of virtual
+ index then we need to rebuild the template. */
+ rebuild_templ
+ = ctx->need_rebuild()
+ || ((ha_alter_info->handler_flags
+ & Alter_inplace_info::ALTER_COLUMN_EQUAL_PACK_LENGTH)
+ && alter_templ_needs_rebuild(
+ altered_table, ha_alter_info, ctx->new_table));
+
+ if ((ctx->new_table->n_v_cols > 0) && rebuild_templ) {
+ /* Save the templ if isn't NULL so as to restore the
+ original state in case of alter operation failures. */
+ if (ctx->new_table->vc_templ != NULL && !ctx->need_rebuild()) {
+ old_templ = ctx->new_table->vc_templ;
+ }
+ s_templ = UT_NEW_NOKEY(dict_vcol_templ_t());
+
+ innobase_build_v_templ(
+ altered_table, ctx->new_table, s_templ, NULL, false);
+
+ ctx->new_table->vc_templ = s_templ;
+ } else if (ctx->num_to_add_vcol > 0 && ctx->num_to_drop_vcol == 0) {
+ /* if there is ongoing drop virtual column, then we disallow
+ inplace add index on newly added virtual column, so it does
+ not need to come in here to rebuild template with add_v.
+ Please also see the assertion in innodb_v_adjust_idx_col() */
+
+ s_templ = UT_NEW_NOKEY(dict_vcol_templ_t());
+
+ add_v = static_cast<dict_add_v_col_t*>(
+ mem_heap_alloc(ctx->heap, sizeof *add_v));
+ add_v->n_v_col = ctx->num_to_add_vcol;
+ add_v->v_col = ctx->add_vcol;
+ add_v->v_col_name = ctx->add_vcol_name;
+
+ innobase_build_v_templ(
+ altered_table, ctx->new_table, s_templ, add_v, false);
+ old_templ = ctx->new_table->vc_templ;
+ ctx->new_table->vc_templ = s_templ;
+ }
+
+ /* Drop virtual column without rebuild will keep dict table
+ unchanged, we use old table to evaluate virtual column value
+ in innobase_get_computed_value(). */
+ if (!ctx->need_rebuild() && ctx->num_to_drop_vcol > 0) {
+ eval_table = table;
+ }
+
/* Read the clustered index of the table and build
indexes based on this information using temporary
files and merge sort. */
@@ -4259,19 +6264,22 @@ ok_exit:
error = DB_OUT_OF_MEMORY; goto oom;);
error = row_merge_build_indexes(
- prebuilt->trx,
- prebuilt->table, ctx->new_table,
+ m_prebuilt->trx,
+ m_prebuilt->table, ctx->new_table,
ctx->online,
ctx->add_index, ctx->add_key_numbers, ctx->num_to_add_index,
altered_table, ctx->add_cols, ctx->col_map,
- ctx->add_autoinc, ctx->sequence);
+ ctx->add_autoinc, ctx->sequence, ctx->skip_pk_sort,
+ ctx->m_stage, add_v, eval_table);
+
#ifndef DBUG_OFF
oom:
#endif /* !DBUG_OFF */
if (error == DB_SUCCESS && ctx->online && ctx->need_rebuild()) {
DEBUG_SYNC_C("row_log_table_apply1_before");
error = row_log_table_apply(
- ctx->thr, prebuilt->table, altered_table);
+ ctx->thr, m_prebuilt->table, altered_table,
+ ctx->m_stage);
}
/* Init online ddl status variables */
@@ -4279,11 +6287,20 @@ oom:
onlineddl_rowlog_pct_used = 0;
onlineddl_pct_progress = 0;
+ if (s_templ) {
+ ut_ad(ctx->need_rebuild() || ctx->num_to_add_vcol > 0
+ || rebuild_templ);
+ dict_free_vc_templ(s_templ);
+ UT_DELETE(s_templ);
+
+ ctx->new_table->vc_templ = old_templ;
+ }
+
DEBUG_SYNC_C("inplace_after_index_build");
DBUG_EXECUTE_IF("create_index_fail",
error = DB_DUPLICATE_KEY;
- prebuilt->trx->error_key_num = ULINT_UNDEFINED;);
+ m_prebuilt->trx->error_key_num = ULINT_UNDEFINED;);
/* After an error, remove all those index definitions
from the dictionary which were defined. */
@@ -4294,13 +6311,13 @@ oom:
case DB_SUCCESS:
ut_d(mutex_enter(&dict_sys->mutex));
ut_d(dict_table_check_for_dup_indexes(
- prebuilt->table, CHECK_PARTIAL_OK));
+ m_prebuilt->table, CHECK_PARTIAL_OK));
ut_d(mutex_exit(&dict_sys->mutex));
/* prebuilt->table->n_ref_count can be anything here,
given that we hold at most a shared lock on the table. */
goto ok_exit;
case DB_DUPLICATE_KEY:
- if (prebuilt->trx->error_key_num == ULINT_UNDEFINED
+ if (m_prebuilt->trx->error_key_num == ULINT_UNDEFINED
|| ha_alter_info->key_count == 0) {
/* This should be the hidden index on
FTS_DOC_ID, or there is no PRIMARY KEY in the
@@ -4308,23 +6325,23 @@ oom:
reporting a bogus duplicate key error. */
dup_key = NULL;
} else {
- DBUG_ASSERT(prebuilt->trx->error_key_num
+ DBUG_ASSERT(m_prebuilt->trx->error_key_num
< ha_alter_info->key_count);
dup_key = &ha_alter_info->key_info_buffer[
- prebuilt->trx->error_key_num];
+ m_prebuilt->trx->error_key_num];
}
print_keydup_error(altered_table, dup_key, MYF(0));
break;
case DB_ONLINE_LOG_TOO_BIG:
DBUG_ASSERT(ctx->online);
my_error(ER_INNODB_ONLINE_LOG_TOO_BIG, MYF(0),
- get_error_key_name(prebuilt->trx->error_key_num,
- ha_alter_info, prebuilt->table));
+ get_error_key_name(m_prebuilt->trx->error_key_num,
+ ha_alter_info, m_prebuilt->table));
break;
case DB_INDEX_CORRUPT:
my_error(ER_INDEX_CORRUPT, MYF(0),
- get_error_key_name(prebuilt->trx->error_key_num,
- ha_alter_info, prebuilt->table));
+ get_error_key_name(m_prebuilt->trx->error_key_num,
+ ha_alter_info, m_prebuilt->table));
break;
case DB_DECRYPTION_FAILED: {
String str;
@@ -4336,19 +6353,20 @@ oom:
default:
my_error_innodb(error,
table_share->table_name.str,
- prebuilt->table->flags);
+ m_prebuilt->table->flags);
}
/* prebuilt->table->n_ref_count can be anything here, given
that we hold at most a shared lock on the table. */
- prebuilt->trx->error_info = NULL;
+ m_prebuilt->trx->error_info = NULL;
ctx->trx->error_state = DB_SUCCESS;
+ ctx->clear_added_indexes();
DBUG_RETURN(true);
}
/** Free the modification log for online table rebuild.
-@param table table that was being rebuilt online */
+@param table table that was being rebuilt online */
static
void
innobase_online_rebuild_log_free(
@@ -4358,9 +6376,7 @@ innobase_online_rebuild_log_free(
dict_index_t* clust_index = dict_table_get_first_index(table);
ut_ad(mutex_own(&dict_sys->mutex));
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
+ ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_X));
rw_lock_x_lock(&clust_index->lock);
@@ -4377,12 +6393,67 @@ innobase_online_rebuild_log_free(
rw_lock_x_unlock(&clust_index->lock);
}
+/** For each user column, which is part of an index which is not going to be
+dropped, it checks if the column number of the column is same as col_no
+argument passed.
+@param[in] table table
+@param[in] col_no column number
+@param[in] is_v if this is a virtual column
+@param[in] only_committed whether to consider only committed indexes
+@retval true column exists
+@retval false column does not exist, true if column is system column or
+it is in the index. */
+static
+bool
+check_col_exists_in_indexes(
+ const dict_table_t* table,
+ ulint col_no,
+ bool is_v,
+ bool only_committed = false)
+{
+ /* This function does not check system columns */
+ if (!is_v && dict_table_get_nth_col(table, col_no)->mtype == DATA_SYS) {
+ return(true);
+ }
+
+ for (const dict_index_t* index = dict_table_get_first_index(table);
+ index;
+ index = dict_table_get_next_index(index)) {
+
+ if (only_committed
+ ? !index->is_committed()
+ : index->to_be_dropped) {
+ continue;
+ }
+
+ for (ulint i = 0; i < index->n_user_defined_cols; i++) {
+ const dict_col_t* idx_col
+ = dict_index_get_nth_col(index, i);
+
+ if (is_v && dict_col_is_virtual(idx_col)) {
+ const dict_v_col_t* v_col = reinterpret_cast<
+ const dict_v_col_t*>(idx_col);
+ if (v_col->v_pos == col_no) {
+ return(true);
+ }
+ }
+
+ if (!is_v && !dict_col_is_virtual(idx_col)
+ && dict_col_get_no(idx_col) == col_no) {
+ return(true);
+ }
+ }
+ }
+
+ return(false);
+}
+
/** Rollback a secondary index creation, drop the indexes with
temparary index prefix
-@param user_table InnoDB table
-@param table the TABLE
-@param locked TRUE=table locked, FALSE=may need to do a lazy drop
-@param trx the transaction
+@param user_table InnoDB table
+@param table the TABLE
+@param locked TRUE=table locked, FALSE=may need to do a lazy drop
+@param trx the transaction
*/
static MY_ATTRIBUTE((nonnull))
void
@@ -4412,11 +6483,11 @@ for inplace_alter_table() and thus might be higher than during
prepare_inplace_alter_table(). (E.g concurrent writes were blocked
during prepare, but might not be during commit).
-@param ha_alter_info Data used during in-place alter.
-@param table the TABLE
-@param prebuilt the prebuilt struct
-@retval true Failure
-@retval false Success
+@param ha_alter_info Data used during in-place alter.
+@param table the TABLE
+@param prebuilt the prebuilt struct
+@retval true Failure
+@retval false Success
*/
inline MY_ATTRIBUTE((nonnull, warn_unused_result))
bool
@@ -4440,6 +6511,7 @@ rollback_inplace_alter_table(
goto func_exit;
}
+ trx_start_for_ddl(ctx->trx, TRX_DICT_OP_INDEX);
row_mysql_lock_data_dictionary(ctx->trx);
if (ctx->need_rebuild()) {
@@ -4451,13 +6523,13 @@ rollback_inplace_alter_table(
if (!ctx->new_table) {
ut_ad(ctx->need_rebuild());
} else if (ctx->need_rebuild()) {
- dberr_t err;
+ dberr_t err= DB_SUCCESS;
ulint flags = ctx->new_table->flags;
/* Since the FTS index specific auxiliary tables has
not yet registered with "table->fts" by fts_add_index(),
we will need explicitly delete them here */
- if (DICT_TF2_FLAG_IS_SET(ctx->new_table, DICT_TF2_FTS)) {
+ if (dict_table_has_fts_index(ctx->new_table)) {
err = innobase_drop_fts_index_table(
ctx->new_table, ctx->trx);
@@ -4470,18 +6542,7 @@ rollback_inplace_alter_table(
}
}
- /* Drop the table. */
- dict_table_close(ctx->new_table, TRUE, FALSE);
-
-#if defined UNIV_DEBUG || defined UNIV_DDL_DEBUG
- /* Nobody should have initialized the stats of the
- newly created table yet. When this is the case, we
- know that it has not been added for background stats
- gathering. */
- ut_a(!ctx->new_table->stat_initialized);
-#endif /* UNIV_DEBUG || UNIV_DDL_DEBUG */
-
- err = row_merge_drop_table(ctx->trx, ctx->new_table);
+ dict_table_close_and_drop(ctx->trx, ctx->new_table);
switch (err) {
case DB_SUCCESS:
@@ -4496,8 +6557,6 @@ rollback_inplace_alter_table(
& Alter_inplace_info::ADD_PK_INDEX));
DBUG_ASSERT(ctx->new_table == prebuilt->table);
- trx_start_for_ddl(ctx->trx, TRX_DICT_OP_INDEX);
-
innobase_rollback_sec_index(
prebuilt->table, table, FALSE, ctx->trx);
}
@@ -4534,8 +6593,7 @@ func_exit:
commit_inplace_alter_table(). */
for (ulint i = 0; i < ctx->num_to_drop_index; i++) {
dict_index_t* index = ctx->drop_index[i];
- DBUG_ASSERT(*index->name != TEMP_INDEX_PREFIX);
-
+ DBUG_ASSERT(index->is_committed());
index->to_be_dropped = 0;
}
@@ -4543,17 +6601,42 @@ func_exit:
}
}
+ /* Reset dict_col_t::ord_part for those columns fail to be indexed,
+ we do this by checking every existing column, if any current
+ index would index them */
+ for (ulint i = 0; i < dict_table_get_n_cols(prebuilt->table); i++) {
+ dict_col_t& col = prebuilt->table->cols[i];
+ if (!col.ord_part) {
+ continue;
+ }
+ if (!check_col_exists_in_indexes(prebuilt->table, i, false,
+ true)) {
+ col.ord_part = 0;
+ }
+ }
+
+ for (ulint i = 0; i < dict_table_get_n_v_cols(prebuilt->table); i++) {
+ dict_col_t& col = prebuilt->table->v_cols[i].m_col;
+ if (!col.ord_part) {
+ continue;
+ }
+ if (!check_col_exists_in_indexes(prebuilt->table, i, true,
+ true)) {
+ col.ord_part = 0;
+ }
+ }
+
trx_commit_for_mysql(prebuilt->trx);
MONITOR_ATOMIC_DEC(MONITOR_PENDING_ALTER_TABLE);
DBUG_RETURN(fail);
}
/** Drop a FOREIGN KEY constraint from the data dictionary tables.
-@param trx data dictionary transaction
-@param table_name Table name in MySQL
-@param foreign_id Foreign key constraint identifier
-@retval true Failure
-@retval false Success */
+@param trx data dictionary transaction
+@param table_name Table name in MySQL
+@param foreign_id Foreign key constraint identifier
+@retval true Failure
+@retval false Success */
static MY_ATTRIBUTE((nonnull, warn_unused_result))
bool
innobase_drop_foreign_try(
@@ -4567,9 +6650,7 @@ innobase_drop_foreign_try(
DBUG_ASSERT(trx_get_dict_operation(trx) == TRX_DICT_OP_INDEX);
ut_ad(trx->dict_operation_lock_mode == RW_X_LATCH);
ut_ad(mutex_own(&dict_sys->mutex));
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
+ ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_X));
/* Drop the constraint from the data dictionary. */
static const char sql[] =
@@ -4602,26 +6683,27 @@ innobase_drop_foreign_try(
}
/** Rename a column in the data dictionary tables.
-@param user_table InnoDB table that was being altered
-@param trx data dictionary transaction
-@param table_name Table name in MySQL
-@param nth_col 0-based index of the column
-@param from old column name
-@param to new column name
-@param new_clustered whether the table has been rebuilt
-@retval true Failure
-@retval false Success */
+@param[in] user_table InnoDB table that was being altered
+@param[in] trx data dictionary transaction
+@param[in] table_name Table name in MySQL
+@param[in] nth_col 0-based index of the column
+@param[in] from old column name
+@param[in] to new column name
+@param[in] new_clustered whether the table has been rebuilt
+@param[in] is_virtual whether it is a virtual column
+@retval true Failure
+@retval false Success */
static MY_ATTRIBUTE((nonnull, warn_unused_result))
bool
innobase_rename_column_try(
-/*=======================*/
const dict_table_t* user_table,
trx_t* trx,
const char* table_name,
ulint nth_col,
const char* from,
const char* to,
- bool new_clustered)
+ bool new_clustered,
+ bool is_virtual)
{
pars_info_t* info;
dberr_t error;
@@ -4631,9 +6713,7 @@ innobase_rename_column_try(
DBUG_ASSERT(trx_get_dict_operation(trx) == TRX_DICT_OP_INDEX);
ut_ad(trx->dict_operation_lock_mode == RW_X_LATCH);
ut_ad(mutex_own(&dict_sys->mutex));
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
+ ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_X));
if (new_clustered) {
goto rename_foreign;
@@ -4817,13 +6897,13 @@ rename_foreign:
}
/** Rename columns in the data dictionary tables.
-@param ha_alter_info Data used during in-place alter.
-@param ctx In-place ALTER TABLE context
-@param table the TABLE
-@param trx data dictionary transaction
-@param table_name Table name in MySQL
-@retval true Failure
-@retval false Success */
+@param ha_alter_info Data used during in-place alter.
+@param ctx In-place ALTER TABLE context
+@param table the TABLE
+@param trx data dictionary transaction
+@param table_name Table name in MySQL
+@retval true Failure
+@retval false Success */
static MY_ATTRIBUTE((nonnull, warn_unused_result))
bool
innobase_rename_columns_try(
@@ -4836,24 +6916,34 @@ innobase_rename_columns_try(
{
List_iterator_fast<Create_field> cf_it(
ha_alter_info->alter_info->create_list);
- uint i = 0;
+ uint i = 0;
+ ulint num_v = 0;
DBUG_ASSERT(ha_alter_info->handler_flags
& Alter_inplace_info::ALTER_COLUMN_NAME);
for (Field** fp = table->field; *fp; fp++, i++) {
- if (!((*fp)->flags & FIELD_IS_RENAMED) || !((*fp)->stored_in_db)) {
- continue;
+ const bool is_virtual = !(*fp)->stored_in_db();
+ if (!((*fp)->flags & FIELD_IS_RENAMED)) {
+ goto processed_field;
}
cf_it.rewind();
+
while (Create_field* cf = cf_it++) {
if (cf->field == *fp) {
+ ulint col_n = is_virtual
+ ? dict_create_v_col_pos(
+ num_v, i)
+ : i - num_v;
+
if (innobase_rename_column_try(
- ctx->old_table, trx, table_name, i,
+ ctx->old_table, trx, table_name,
+ col_n,
cf->field->field_name,
cf->field_name,
- ctx->need_rebuild())) {
+ ctx->need_rebuild(),
+ is_virtual)) {
return(true);
}
goto processed_field;
@@ -4862,139 +6952,342 @@ innobase_rename_columns_try(
ut_error;
processed_field:
+ if (is_virtual) {
+ num_v++;
+ }
+
continue;
}
return(false);
}
-/** Rename columns in the data dictionary cache
+/** Enlarge a column in the data dictionary tables.
+@param user_table InnoDB table that was being altered
+@param trx data dictionary transaction
+@param table_name Table name in MySQL
+@param nth_col 0-based index of the column
+@param new_len new column length, in bytes
+@param is_v if it's a virtual column
+@retval true Failure
+@retval false Success */
+static MY_ATTRIBUTE((nonnull, warn_unused_result))
+bool
+innobase_enlarge_column_try(
+/*========================*/
+ const dict_table_t* user_table,
+ trx_t* trx,
+ const char* table_name,
+ ulint nth_col,
+ ulint new_len,
+ bool is_v)
+{
+ pars_info_t* info;
+ dberr_t error;
+#ifdef UNIV_DEBUG
+ dict_col_t* col;
+#endif /* UNIV_DEBUG */
+ dict_v_col_t* v_col;
+ ulint pos;
+
+ DBUG_ENTER("innobase_enlarge_column_try");
+
+ DBUG_ASSERT(trx_get_dict_operation(trx) == TRX_DICT_OP_INDEX);
+ ut_ad(trx->dict_operation_lock_mode == RW_X_LATCH);
+ ut_ad(mutex_own(&dict_sys->mutex));
+ ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_X));
+
+ if (is_v) {
+ v_col = dict_table_get_nth_v_col(user_table, nth_col);
+ pos = dict_create_v_col_pos(v_col->v_pos, v_col->m_col.ind);
+#ifdef UNIV_DEBUG
+ col = &v_col->m_col;
+#endif /* UNIV_DEBUG */
+ } else {
+#ifdef UNIV_DEBUG
+ col = dict_table_get_nth_col(user_table, nth_col);
+#endif /* UNIV_DEBUG */
+ pos = nth_col;
+ }
+
+#ifdef UNIV_DEBUG
+ ut_ad(col->len < new_len);
+ switch (col->mtype) {
+ case DATA_MYSQL:
+ /* NOTE: we could allow this when !(prtype & DATA_BINARY_TYPE)
+ and ROW_FORMAT is not REDUNDANT and mbminlen<mbmaxlen.
+ That is, we treat a UTF-8 CHAR(n) column somewhat like
+ a VARCHAR. */
+ ut_error;
+ case DATA_BINARY:
+ case DATA_VARCHAR:
+ case DATA_VARMYSQL:
+ case DATA_DECIMAL:
+ case DATA_BLOB:
+ break;
+ default:
+ ut_error;
+ }
+#endif /* UNIV_DEBUG */
+ info = pars_info_create();
+
+ pars_info_add_ull_literal(info, "tableid", user_table->id);
+ pars_info_add_int4_literal(info, "nth", pos);
+ pars_info_add_int4_literal(info, "new", new_len);
+
+ trx->op_info = "resizing column in SYS_COLUMNS";
+
+ error = que_eval_sql(
+ info,
+ "PROCEDURE RESIZE_SYS_COLUMNS_PROC () IS\n"
+ "BEGIN\n"
+ "UPDATE SYS_COLUMNS SET LEN=:new\n"
+ "WHERE TABLE_ID=:tableid AND POS=:nth;\n"
+ "END;\n",
+ FALSE, trx);
+
+ DBUG_EXECUTE_IF("ib_resize_column_error",
+ error = DB_OUT_OF_FILE_SPACE;);
+
+ trx->op_info = "";
+ trx->error_state = DB_SUCCESS;
+
+ if (error != DB_SUCCESS) {
+ my_error_innodb(error, table_name, 0);
+ DBUG_RETURN(true);
+ }
+
+ DBUG_RETURN(false);
+}
+
+/** Enlarge columns in the data dictionary tables.
+@param ha_alter_info Data used during in-place alter.
+@param table the TABLE
+@param user_table InnoDB table that was being altered
+@param trx data dictionary transaction
+@param table_name Table name in MySQL
+@retval true Failure
+@retval false Success */
+static MY_ATTRIBUTE((nonnull, warn_unused_result))
+bool
+innobase_enlarge_columns_try(
+/*=========================*/
+ Alter_inplace_info* ha_alter_info,
+ const TABLE* table,
+ const dict_table_t* user_table,
+ trx_t* trx,
+ const char* table_name)
+{
+ List_iterator_fast<Create_field> cf_it(
+ ha_alter_info->alter_info->create_list);
+ ulint i = 0;
+ ulint num_v = 0;
+
+ for (Field** fp = table->field; *fp; fp++, i++) {
+ const bool is_v = !(*fp)->stored_in_db();
+ ulint idx = is_v ? num_v++ : i - num_v;
+
+ cf_it.rewind();
+ while (Create_field* cf = cf_it++) {
+ if (cf->field == *fp) {
+ if ((*fp)->is_equal(cf)
+ == IS_EQUAL_PACK_LENGTH
+ && innobase_enlarge_column_try(
+ user_table, trx, table_name,
+ idx, static_cast<ulint>(cf->length), is_v)) {
+ return(true);
+ }
+
+ break;
+ }
+ }
+ }
+
+ return(false);
+}
+
+/** Rename or enlarge columns in the data dictionary cache
as part of commit_cache_norebuild().
-@param ha_alter_info Data used during in-place alter.
-@param table the TABLE
-@param user_table InnoDB table that was being altered */
+@param ha_alter_info Data used during in-place alter.
+@param table the TABLE
+@param user_table InnoDB table that was being altered */
static MY_ATTRIBUTE((nonnull))
void
-innobase_rename_columns_cache(
-/*==========================*/
+innobase_rename_or_enlarge_columns_cache(
+/*=====================================*/
Alter_inplace_info* ha_alter_info,
const TABLE* table,
dict_table_t* user_table)
{
if (!(ha_alter_info->handler_flags
- & Alter_inplace_info::ALTER_COLUMN_NAME)) {
+ & (Alter_inplace_info::ALTER_COLUMN_EQUAL_PACK_LENGTH
+ | Alter_inplace_info::ALTER_COLUMN_NAME))) {
return;
}
List_iterator_fast<Create_field> cf_it(
ha_alter_info->alter_info->create_list);
- uint i = 0;
+ uint i = 0;
+ ulint num_v = 0;
for (Field** fp = table->field; *fp; fp++, i++) {
- if (!((*fp)->flags & FIELD_IS_RENAMED)) {
- continue;
- }
+ const bool is_virtual = !(*fp)->stored_in_db();
cf_it.rewind();
while (Create_field* cf = cf_it++) {
- if (cf->field == *fp) {
- dict_mem_table_col_rename(user_table, i,
- cf->field->field_name,
- cf->field_name);
- goto processed_field;
+ if (cf->field != *fp) {
+ continue;
+ }
+
+ ulint col_n = is_virtual ? num_v : i - num_v;
+
+ if ((*fp)->is_equal(cf) == IS_EQUAL_PACK_LENGTH) {
+ if (is_virtual) {
+ dict_table_get_nth_v_col(
+ user_table, col_n)->m_col.len
+ = cf->length;
+ } else {
+ dict_table_get_nth_col(
+ user_table, col_n)->len
+ = cf->length;
+ }
+ }
+
+ if ((*fp)->flags & FIELD_IS_RENAMED) {
+ dict_mem_table_col_rename(
+ user_table, col_n,
+ cf->field->field_name,
+ cf->field_name, is_virtual);
}
+
+ break;
}
- ut_error;
-processed_field:
- continue;
+ if (is_virtual) {
+ num_v++;
+ }
}
}
-/** Get the auto-increment value of the table on commit.
-@param[in] ha_alter_info Data used during in-place alter
-@param[in,out] ctx In-place ALTER TABLE context
- return autoinc value in ctx->max_autoinc
-@param altered_table[in] MySQL table that is being altered
-@param old_table[in] MySQL table as it is before the ALTER operation
-retval true Failure
-@retval false Success*/
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
+/** Set the auto-increment value of the table on commit.
+@param ha_alter_info Data used during in-place alter
+@param ctx In-place ALTER TABLE context
+@param altered_table MySQL table that is being altered
+@param old_table MySQL table as it is before the ALTER operation
+@return whether the operation failed (and my_error() was called) */
+static MY_ATTRIBUTE((nonnull))
bool
-commit_get_autoinc(
-/*===============*/
+commit_set_autoinc(
Alter_inplace_info* ha_alter_info,
ha_innobase_inplace_ctx*ctx,
const TABLE* altered_table,
const TABLE* old_table)
{
-
- DBUG_ENTER("commit_get_autoinc");
+ DBUG_ENTER("commit_set_autoinc");
if (!altered_table->found_next_number_field) {
/* There is no AUTO_INCREMENT column in the table
after the ALTER operation. */
- ctx->max_autoinc = 0;
} else if (ctx->add_autoinc != ULINT_UNDEFINED) {
+ ut_ad(ctx->need_rebuild());
/* An AUTO_INCREMENT column was added. Get the last
value from the sequence, which may be based on a
supplied AUTO_INCREMENT value. */
- ctx->max_autoinc = ctx->sequence.last();
+ ib_uint64_t autoinc = ctx->sequence.last();
+ ctx->new_table->autoinc = autoinc;
+ /* Bulk index creation does not update
+ PAGE_ROOT_AUTO_INC, so we must persist the "last used"
+ value here. */
+ btr_write_autoinc(dict_table_get_first_index(ctx->new_table),
+ autoinc - 1, true);
} else if ((ha_alter_info->handler_flags
& Alter_inplace_info::CHANGE_CREATE_OPTION)
&& (ha_alter_info->create_info->used_fields
& HA_CREATE_USED_AUTO)) {
- /* Check if the table is discarded */
- if(dict_table_is_discarded(ctx->old_table)) {
+ if (dict_table_is_discarded(ctx->old_table)) {
+ my_error(ER_TABLESPACE_DISCARDED, MYF(0),
+ old_table->s->table_name.str);
DBUG_RETURN(true);
}
- /* An AUTO_INCREMENT value was supplied, but the table was not
- rebuilt. Get the user-supplied value or the last value from the
- sequence. */
- ib_uint64_t max_value_table;
- dberr_t err;
-
- Field* autoinc_field =
- old_table->found_next_number_field;
- KEY* autoinc_key =
- old_table->key_info + old_table->s->next_number_index;
-
- dict_index_t* index = dict_table_get_index_on_name(
- ctx->old_table, autoinc_key->name);
+ /* An AUTO_INCREMENT value was supplied by the user.
+ It must be persisted to the data file. */
+ const Field* ai = old_table->found_next_number_field;
+ ut_ad(!strcmp(dict_table_get_col_name(ctx->old_table,
+ innodb_col_no(ai)),
+ ai->field_name));
- ctx->max_autoinc =
- ha_alter_info->create_info->auto_increment_value;
-
- dict_table_autoinc_lock(ctx->old_table);
-
- err = row_search_max_autoinc(
- index, autoinc_field->field_name, &max_value_table);
-
- if (err != DB_SUCCESS) {
- ut_ad(0);
- ctx->max_autoinc = 0;
- } else if (ctx->max_autoinc <= max_value_table) {
- ulonglong col_max_value;
- ulonglong offset;
+ ib_uint64_t autoinc
+ = ha_alter_info->create_info->auto_increment_value;
+ if (autoinc == 0) {
+ autoinc = 1;
+ }
- col_max_value = innobase_get_int_col_max_value(
- old_table->found_next_number_field);
+ if (autoinc >= ctx->old_table->autoinc) {
+ /* Persist the predecessor of the
+ AUTO_INCREMENT value as the last used one. */
+ ctx->new_table->autoinc = autoinc--;
+ } else {
+ /* Mimic ALGORITHM=COPY in the following scenario:
+
+ CREATE TABLE t (a SERIAL);
+ INSERT INTO t SET a=100;
+ ALTER TABLE t AUTO_INCREMENT = 1;
+ INSERT INTO t SET a=NULL;
+ SELECT * FROM t;
+
+ By default, ALGORITHM=INPLACE would reset the
+ sequence to 1, while after ALGORITHM=COPY, the
+ last INSERT would use a value larger than 100.
+
+ We could only search the tree to know current
+ max counter in the table and compare. */
+ const dict_col_t* autoinc_col
+ = dict_table_get_nth_col(ctx->old_table,
+ innodb_col_no(ai));
+ dict_index_t* index
+ = dict_table_get_first_index(ctx->old_table);
+ while (index != NULL
+ && index->fields[0].col != autoinc_col) {
+ index = dict_table_get_next_index(index);
+ }
- offset = ctx->prebuilt->autoinc_offset;
- ctx->max_autoinc = innobase_next_autoinc(
- max_value_table, 1, 1, offset,
- col_max_value);
+ ut_ad(index);
+
+ ib_uint64_t max_in_table = index
+ ? row_search_max_autoinc(index)
+ : 0;
+
+ if (autoinc <= max_in_table) {
+ ctx->new_table->autoinc = innobase_next_autoinc(
+ max_in_table, 1,
+ ctx->prebuilt->autoinc_increment,
+ ctx->prebuilt->autoinc_offset,
+ innobase_get_int_col_max_value(ai));
+ /* Persist the maximum value as the
+ last used one. */
+ autoinc = max_in_table;
+ } else {
+ /* Persist the predecessor of the
+ AUTO_INCREMENT value as the last used one. */
+ ctx->new_table->autoinc = autoinc--;
+ }
}
- dict_table_autoinc_unlock(ctx->old_table);
- } else {
- /* An AUTO_INCREMENT value was not specified.
- Read the old counter value from the table. */
- ut_ad(old_table->found_next_number_field);
- dict_table_autoinc_lock(ctx->old_table);
- ctx->max_autoinc = ctx->old_table->autoinc;
- dict_table_autoinc_unlock(ctx->old_table);
+
+ btr_write_autoinc(dict_table_get_first_index(ctx->new_table),
+ autoinc, true);
+ } else if (ctx->need_rebuild()) {
+ /* No AUTO_INCREMENT value was specified.
+ Copy it from the old table. */
+ ctx->new_table->autoinc = ctx->old_table->autoinc;
+ /* The persistent value was already copied in
+ prepare_inplace_alter_table_dict() when ctx->new_table
+ was created. If this was a LOCK=NONE operation, the
+ AUTO_INCREMENT values would be updated during
+ row_log_table_apply(). If this was LOCK!=NONE,
+ the table contents could not possibly have changed
+ between prepare_inplace and commit_inplace. */
}
DBUG_RETURN(false);
@@ -5002,12 +7295,12 @@ commit_get_autoinc(
/** Add or drop foreign key constraints to the data dictionary tables,
but do not touch the data dictionary cache.
-@param ha_alter_info Data used during in-place alter
-@param ctx In-place ALTER TABLE context
-@param trx Data dictionary transaction
-@param table_name Table name in MySQL
-@retval true Failure
-@retval false Success
+@param ha_alter_info Data used during in-place alter
+@param ctx In-place ALTER TABLE context
+@param trx Data dictionary transaction
+@param table_name Table name in MySQL
+@retval true Failure
+@retval false Success
*/
static MY_ATTRIBUTE((nonnull, warn_unused_result))
bool
@@ -5033,7 +7326,7 @@ innobase_update_foreign_try(
|| fk->foreign_table == ctx->old_table);
dberr_t error = dict_create_add_foreign_id(
- &foreign_id, ctx->old_table->name, fk);
+ &foreign_id, ctx->old_table->name.m_name, fk);
if (error != DB_SUCCESS) {
my_error(ER_TOO_LONG_IDENT, MYF(0),
@@ -5061,7 +7354,7 @@ innobase_update_foreign_try(
names, while the columns in ctx->old_table have not
been renamed yet. */
error = dict_create_add_foreign_to_dictionary(
- (dict_table_t*)ctx->old_table,ctx->old_table->name, fk, trx);
+ ctx->old_table->name.m_name, fk, trx);
DBUG_EXECUTE_IF(
"innodb_test_cannot_add_fk_system",
@@ -5104,6 +7397,8 @@ innobase_update_foreign_cache(
DBUG_ENTER("innobase_update_foreign_cache");
+ ut_ad(mutex_own(&dict_sys->mutex));
+
user_table = ctx->old_table;
/* Discard the added foreign keys, because we will
@@ -5135,17 +7430,23 @@ innobase_update_foreign_cache(
/* Load the old or added foreign keys from the data dictionary
and prevent the table from being evicted from the data
dictionary cache (work around the lack of WL#6049). */
- err = dict_load_foreigns(user_table->name,
+ dict_names_t fk_tables;
+
+ err = dict_load_foreigns(user_table->name.m_name,
ctx->col_names, false, true,
- DICT_ERR_IGNORE_NONE);
+ DICT_ERR_IGNORE_NONE,
+ fk_tables);
if (err == DB_CANNOT_ADD_CONSTRAINT) {
+ fk_tables.clear();
+
/* It is possible there are existing foreign key are
loaded with "foreign_key checks" off,
so let's retry the loading with charset_check is off */
- err = dict_load_foreigns(user_table->name,
+ err = dict_load_foreigns(user_table->name.m_name,
ctx->col_names, false, false,
- DICT_ERR_IGNORE_NONE);
+ DICT_ERR_IGNORE_NONE,
+ fk_tables);
/* The load with "charset_check" off is successful, warn
the user that the foreign key has loaded with mis-matched
@@ -5157,24 +7458,44 @@ innobase_update_foreign_cache(
ER_ALTER_INFO,
"Foreign key constraints for table '%s'"
" are loaded with charset check off",
- user_table->name);
+ user_table->name.m_name);
}
}
+ /* For complete loading of foreign keys, all associated tables must
+ also be loaded. */
+ while (err == DB_SUCCESS && !fk_tables.empty()) {
+ dict_table_t* table = dict_load_table(
+ fk_tables.front(), DICT_ERR_IGNORE_NONE);
+
+ if (table == NULL) {
+ err = DB_TABLE_NOT_FOUND;
+ ib::error()
+ << "Failed to load table '"
+ << table_name_t(const_cast<char*>
+ (fk_tables.front()))
+ << "' which has a foreign key constraint with"
+ << " table '" << user_table->name << "'.";
+ break;
+ }
+
+ fk_tables.pop_front();
+ }
+
DBUG_RETURN(err);
}
/** Commit the changes made during prepare_inplace_alter_table()
and inplace_alter_table() inside the data dictionary tables,
when rebuilding the table.
-@param ha_alter_info Data used during in-place alter
-@param ctx In-place ALTER TABLE context
-@param altered_table MySQL table that is being altered
-@param old_table MySQL table as it is before the ALTER operation
-@param trx Data dictionary transaction
-@param table_name Table name in MySQL
-@retval true Failure
-@retval false Success
+@param ha_alter_info Data used during in-place alter
+@param ctx In-place ALTER TABLE context
+@param altered_table MySQL table that is being altered
+@param old_table MySQL table as it is before the ALTER operation
+@param trx Data dictionary transaction
+@param table_name Table name in MySQL
+@retval true Failure
+@retval false Success
*/
inline MY_ATTRIBUTE((nonnull, warn_unused_result))
bool
@@ -5204,10 +7525,9 @@ commit_try_rebuild(
index = dict_table_get_next_index(index)) {
DBUG_ASSERT(dict_index_get_online_status(index)
== ONLINE_INDEX_COMPLETE);
- DBUG_ASSERT(*index->name != TEMP_INDEX_PREFIX);
- if (dict_index_is_corrupted(index)) {
- my_error(ER_INDEX_CORRUPT, MYF(0),
- index->name);
+ DBUG_ASSERT(index->is_committed());
+ if (index->is_corrupted()) {
+ my_error(ER_INDEX_CORRUPT, MYF(0), index->name());
DBUG_RETURN(true);
}
}
@@ -5223,54 +7543,11 @@ commit_try_rebuild(
for (ulint i = 0; i < ctx->num_to_drop_index; i++) {
dict_index_t* index = ctx->drop_index[i];
DBUG_ASSERT(index->table == user_table);
- DBUG_ASSERT(*index->name != TEMP_INDEX_PREFIX);
+ DBUG_ASSERT(index->is_committed());
DBUG_ASSERT(index->to_be_dropped);
index->to_be_dropped = 0;
}
- /* We copied the table. Any indexes that were requested to be
- dropped were not created in the copy of the table. Apply any
- last bit of the rebuild log and then rename the tables. */
-
- if (ctx->online) {
- DEBUG_SYNC_C("row_log_table_apply2_before");
- error = row_log_table_apply(
- ctx->thr, user_table, altered_table);
- ulint err_key = thr_get_trx(ctx->thr)->error_key_num;
-
- switch (error) {
- KEY* dup_key;
- case DB_SUCCESS:
- break;
- case DB_DUPLICATE_KEY:
- if (err_key == ULINT_UNDEFINED) {
- /* This should be the hidden index on
- FTS_DOC_ID. */
- dup_key = NULL;
- } else {
- DBUG_ASSERT(err_key < ha_alter_info->key_count);
- dup_key = &ha_alter_info
- ->key_info_buffer[err_key];
- }
-
- print_keydup_error(altered_table, dup_key, MYF(0));
- DBUG_RETURN(true);
- case DB_ONLINE_LOG_TOO_BIG:
- my_error(ER_INNODB_ONLINE_LOG_TOO_BIG, MYF(0),
- get_error_key_name(err_key, ha_alter_info,
- rebuilt_table));
- DBUG_RETURN(true);
- case DB_INDEX_CORRUPT:
- my_error(ER_INDEX_CORRUPT, MYF(0),
- get_error_key_name(err_key, ha_alter_info,
- rebuilt_table));
- DBUG_RETURN(true);
- default:
- my_error_innodb(error, table_name, user_table->flags);
- DBUG_RETURN(true);
- }
- }
-
if ((ha_alter_info->handler_flags
& Alter_inplace_info::ALTER_COLUMN_NAME)
&& innobase_rename_columns_try(ha_alter_info, ctx, old_table,
@@ -5298,34 +7575,20 @@ commit_try_rebuild(
user_table, rebuilt_table, ctx->tmp_name, trx);
/* We must be still holding a table handle. */
- DBUG_ASSERT(user_table->n_ref_count >= 1);
+ DBUG_ASSERT(user_table->get_ref_count() == 1);
DBUG_EXECUTE_IF("ib_ddl_crash_after_rename", DBUG_SUICIDE(););
DBUG_EXECUTE_IF("ib_rebuild_cannot_rename", error = DB_ERROR;);
- if (user_table->n_ref_count > 1) {
- /* This should only occur when an innodb_memcached
- connection with innodb_api_enable_mdl=off was started
- before commit_inplace_alter_table() locked the data
- dictionary. We must roll back the ALTER TABLE, because
- we cannot drop a table while it is being used. */
-
- /* Normally, n_ref_count must be 1, because purge
- cannot be executing on this very table as we are
- holding dict_operation_lock X-latch. */
-
- error = DB_LOCK_WAIT_TIMEOUT;
- }
-
switch (error) {
case DB_SUCCESS:
DBUG_RETURN(false);
case DB_TABLESPACE_EXISTS:
- ut_a(rebuilt_table->n_ref_count == 1);
+ ut_a(rebuilt_table->get_ref_count() == 1);
my_error(ER_TABLESPACE_EXISTS, MYF(0), ctx->tmp_name);
DBUG_RETURN(true);
case DB_DUPLICATE_KEY:
- ut_a(rebuilt_table->n_ref_count == 1);
+ ut_a(rebuilt_table->get_ref_count() == 1);
my_error(ER_TABLE_EXISTS_ERROR, MYF(0), ctx->tmp_name);
DBUG_RETURN(true);
default:
@@ -5336,7 +7599,7 @@ commit_try_rebuild(
/** Apply the changes made during commit_try_rebuild(),
to the data dictionary cache and the file system.
-@param ctx In-place ALTER TABLE context */
+@param ctx In-place ALTER TABLE context */
inline MY_ATTRIBUTE((nonnull))
void
commit_cache_rebuild(
@@ -5346,91 +7609,77 @@ commit_cache_rebuild(
dberr_t error;
DBUG_ENTER("commit_cache_rebuild");
+ DEBUG_SYNC_C("commit_cache_rebuild");
DBUG_ASSERT(ctx->need_rebuild());
DBUG_ASSERT(dict_table_is_discarded(ctx->old_table)
== dict_table_is_discarded(ctx->new_table));
const char* old_name = mem_heap_strdup(
- ctx->heap, ctx->old_table->name);
+ ctx->heap, ctx->old_table->name.m_name);
/* We already committed and redo logged the renames,
so this must succeed. */
error = dict_table_rename_in_cache(
- ctx->old_table, ctx->tmp_name, FALSE);
+ ctx->old_table, ctx->tmp_name, false);
ut_a(error == DB_SUCCESS);
error = dict_table_rename_in_cache(
- ctx->new_table, old_name, FALSE);
+ ctx->new_table, old_name, false);
ut_a(error == DB_SUCCESS);
DBUG_VOID_RETURN;
}
+/** Set of column numbers */
+typedef std::set<ulint, std::less<ulint>, ut_allocator<ulint> > col_set;
+
/** Store the column number of the columns in a list belonging
to indexes which are not being dropped.
@param[in] ctx In-place ALTER TABLE context
-@param[out] drop_col_list list which will be set, containing columns
- which is part of index being dropped */
+@param[in, out] drop_col_list list which will be set, containing columns
+ which is part of index being dropped
+@param[in, out] drop_v_col_list list which will be set, containing
+ virtual columns which is part of index
+ being dropped */
static
void
get_col_list_to_be_dropped(
- ha_innobase_inplace_ctx* ctx,
- std::set<ulint>& drop_col_list)
+ const ha_innobase_inplace_ctx* ctx,
+ col_set& drop_col_list,
+ col_set& drop_v_col_list)
{
for (ulint index_count = 0; index_count < ctx->num_to_drop_index;
index_count++) {
- dict_index_t* index = ctx->drop_index[index_count];
+ const dict_index_t* index = ctx->drop_index[index_count];
for (ulint col = 0; col < index->n_user_defined_cols; col++) {
- ulint col_no = dict_index_get_nth_col_no(index, col);
- drop_col_list.insert(col_no);
- }
- }
-}
+ const dict_col_t* idx_col
+ = dict_index_get_nth_col(index, col);
-/** For each column, which is part of an index which is not going to be
-dropped, it checks if the column number of the column is same as col_no
-argument passed.
-@param[in] table table object
-@param[in] col_no column number of the column which is to be checked
-@retval true column exists
-@retval false column does not exist. */
-static
-bool
-check_col_exists_in_indexes(
- const dict_table_t* table,
- ulint col_no)
-{
- for (dict_index_t* index = dict_table_get_first_index(table); index;
- index = dict_table_get_next_index(index)) {
+ if (dict_col_is_virtual(idx_col)) {
+ const dict_v_col_t* v_col
+ = reinterpret_cast<
+ const dict_v_col_t*>(idx_col);
+ drop_v_col_list.insert(v_col->v_pos);
- if (index->to_be_dropped) {
- continue;
- }
-
- for (ulint col = 0; col < index->n_user_defined_cols; col++) {
-
- ulint index_col_no = dict_index_get_nth_col_no(
- index, col);
- if (col_no == index_col_no) {
- return(true);
+ } else {
+ ulint col_no = dict_col_get_no(idx_col);
+ drop_col_list.insert(col_no);
}
}
}
-
- return(false);
}
/** Commit the changes made during prepare_inplace_alter_table()
and inplace_alter_table() inside the data dictionary tables,
when not rebuilding the table.
-@param ha_alter_info Data used during in-place alter
-@param ctx In-place ALTER TABLE context
-@param old_table MySQL table as it is before the ALTER operation
-@param trx Data dictionary transaction
-@param table_name Table name in MySQL
-@retval true Failure
-@retval false Success
+@param ha_alter_info Data used during in-place alter
+@param ctx In-place ALTER TABLE context
+@param old_table MySQL table as it is before the ALTER operation
+@param trx Data dictionary transaction
+@param table_name Table name in MySQL
+@retval true Failure
+@retval false Success
*/
inline MY_ATTRIBUTE((nonnull, warn_unused_result))
bool
@@ -5438,6 +7687,7 @@ commit_try_norebuild(
/*=================*/
Alter_inplace_info* ha_alter_info,
ha_innobase_inplace_ctx*ctx,
+ TABLE* altered_table,
const TABLE* old_table,
trx_t* trx,
const char* table_name)
@@ -5449,14 +7699,16 @@ commit_try_norebuild(
& Alter_inplace_info::DROP_FOREIGN_KEY)
|| ctx->num_to_drop_fk > 0);
DBUG_ASSERT(ctx->num_to_drop_fk
- <= ha_alter_info->alter_info->drop_list.elements);
+ <= ha_alter_info->alter_info->drop_list.elements
+ || ctx->num_to_drop_vcol
+ == ha_alter_info->alter_info->drop_list.elements);
for (ulint i = 0; i < ctx->num_to_add_index; i++) {
dict_index_t* index = ctx->add_index[i];
DBUG_ASSERT(dict_index_get_online_status(index)
== ONLINE_INDEX_COMPLETE);
- DBUG_ASSERT(*index->name == TEMP_INDEX_PREFIX);
- if (dict_index_is_corrupted(index)) {
+ DBUG_ASSERT(!index->is_committed());
+ if (index->is_corrupted()) {
/* Report a duplicate key
error for the index that was
flagged corrupted, most likely
@@ -5469,7 +7721,7 @@ commit_try_norebuild(
with a detailed reason once
WL#6379 has been implemented. */
my_error(ER_DUP_UNKNOWN_IN_INDEX,
- MYF(0), index->name + 1);
+ MYF(0), index->name());
DBUG_RETURN(true);
}
}
@@ -5480,17 +7732,27 @@ commit_try_norebuild(
dberr_t error;
- /* We altered the table in place. */
- /* Lose the TEMP_INDEX_PREFIX. */
+ /* We altered the table in place. Mark the indexes as committed. */
for (ulint i = 0; i < ctx->num_to_add_index; i++) {
dict_index_t* index = ctx->add_index[i];
DBUG_ASSERT(dict_index_get_online_status(index)
== ONLINE_INDEX_COMPLETE);
- DBUG_ASSERT(*index->name
- == TEMP_INDEX_PREFIX);
+ DBUG_ASSERT(!index->is_committed());
error = row_merge_rename_index_to_add(
trx, ctx->new_table->id, index->id);
- if (error != DB_SUCCESS) {
+ switch (error) {
+ case DB_SUCCESS:
+ break;
+ case DB_TOO_MANY_CONCURRENT_TRXS:
+ /* If we wrote some undo log here, then the
+ persistent data dictionary for this table may
+ probably be corrupted. This is because a
+ 'trigger' on SYS_INDEXES could already have invoked
+ btr_free_if_exists(), which cannot be rolled back. */
+ DBUG_ASSERT(trx->undo_no == 0);
+ my_error(ER_TOO_MANY_CONCURRENT_TRXS, MYF(0));
+ DBUG_RETURN(true);
+ default:
sql_print_error(
"InnoDB: rename index to add: %lu\n",
(ulong) error);
@@ -5502,14 +7764,11 @@ commit_try_norebuild(
}
/* Drop any indexes that were requested to be dropped.
- Rename them to TEMP_INDEX_PREFIX in the data
- dictionary first. We do not bother to rename
- index->name in the dictionary cache, because the index
- is about to be freed after row_merge_drop_indexes_dict(). */
+ Flag them in the data dictionary first. */
for (ulint i = 0; i < ctx->num_to_drop_index; i++) {
dict_index_t* index = ctx->drop_index[i];
- DBUG_ASSERT(*index->name != TEMP_INDEX_PREFIX);
+ DBUG_ASSERT(index->is_committed());
DBUG_ASSERT(index->table == ctx->new_table);
DBUG_ASSERT(index->to_be_dropped);
@@ -5526,20 +7785,44 @@ commit_try_norebuild(
}
}
- if (!(ha_alter_info->handler_flags
- & Alter_inplace_info::ALTER_COLUMN_NAME)) {
- DBUG_RETURN(false);
+ if ((ha_alter_info->handler_flags
+ & Alter_inplace_info::ALTER_COLUMN_NAME)
+ && innobase_rename_columns_try(ha_alter_info, ctx, old_table,
+ trx, table_name)) {
+ DBUG_RETURN(true);
+ }
+
+ if ((ha_alter_info->handler_flags
+ & Alter_inplace_info::ALTER_COLUMN_EQUAL_PACK_LENGTH)
+ && innobase_enlarge_columns_try(ha_alter_info, old_table,
+ ctx->old_table, trx, table_name)) {
+ DBUG_RETURN(true);
}
- DBUG_RETURN(innobase_rename_columns_try(ha_alter_info, ctx,
- old_table, trx, table_name));
+ if ((ha_alter_info->handler_flags
+ & Alter_inplace_info::DROP_VIRTUAL_COLUMN)
+ && innobase_drop_virtual_try(
+ ha_alter_info, altered_table, old_table,
+ ctx->old_table, trx)) {
+ DBUG_RETURN(true);
+ }
+
+ if ((ha_alter_info->handler_flags
+ & Alter_inplace_info::ADD_VIRTUAL_COLUMN)
+ && innobase_add_virtual_try(
+ ha_alter_info, altered_table, old_table,
+ ctx->old_table, trx)) {
+ DBUG_RETURN(true);
+ }
+
+ DBUG_RETURN(false);
}
/** Commit the changes to the data dictionary cache
after a successful commit_try_norebuild() call.
-@param ctx In-place ALTER TABLE context
-@param table the TABLE before the ALTER
-@param trx Data dictionary transaction object
+@param ctx In-place ALTER TABLE context
+@param table the TABLE before the ALTER
+@param trx Data dictionary transaction object
(will be started and committed)
@return whether all replacements were found for dropped indexes */
inline MY_ATTRIBUTE((nonnull, warn_unused_result))
@@ -5556,26 +7839,36 @@ commit_cache_norebuild(
DBUG_ASSERT(!ctx->need_rebuild());
- std::set<ulint> drop_list;
- std::set<ulint>::const_iterator col_it;
+ col_set drop_list;
+ col_set v_drop_list;
+ col_set::const_iterator col_it;
/* Check if the column, part of an index to be dropped is part of any
other index which is not being dropped. If it so, then set the ord_part
of the column to 0. */
- get_col_list_to_be_dropped(ctx, drop_list);
+ get_col_list_to_be_dropped(ctx, drop_list, v_drop_list);
- for(col_it = drop_list.begin(); col_it != drop_list.end(); ++col_it) {
- if (!check_col_exists_in_indexes(ctx->new_table, *col_it)) {
+ for (col_it = drop_list.begin(); col_it != drop_list.end(); ++col_it) {
+ if (!check_col_exists_in_indexes(ctx->new_table,
+ *col_it, false)) {
ctx->new_table->cols[*col_it].ord_part = 0;
}
}
+ for (col_it = v_drop_list.begin();
+ col_it != v_drop_list.end(); ++col_it) {
+ if (!check_col_exists_in_indexes(ctx->new_table,
+ *col_it, true)) {
+ ctx->new_table->v_cols[*col_it].m_col.ord_part = 0;
+ }
+ }
+
for (ulint i = 0; i < ctx->num_to_add_index; i++) {
dict_index_t* index = ctx->add_index[i];
DBUG_ASSERT(dict_index_get_online_status(index)
== ONLINE_INDEX_COMPLETE);
- DBUG_ASSERT(*index->name == TEMP_INDEX_PREFIX);
- index->name++;
+ DBUG_ASSERT(!index->is_committed());
+ index->set_committed(true);
}
if (ctx->num_to_drop_index) {
@@ -5584,13 +7877,13 @@ commit_cache_norebuild(
(after renaming the indexes), so that in the
event of a crash, crash recovery will drop the
indexes, because it drops all indexes whose
- names start with TEMP_INDEX_PREFIX. Once we
+ names start with TEMP_INDEX_PREFIX_STR. Once we
have started dropping an index tree, there is
no way to roll it back. */
for (ulint i = 0; i < ctx->num_to_drop_index; i++) {
dict_index_t* index = ctx->drop_index[i];
- DBUG_ASSERT(*index->name != TEMP_INDEX_PREFIX);
+ DBUG_ASSERT(index->is_committed());
DBUG_ASSERT(index->table == ctx->new_table);
DBUG_ASSERT(index->to_be_dropped);
@@ -5614,7 +7907,7 @@ commit_cache_norebuild(
for (ulint i = 0; i < ctx->num_to_drop_index; i++) {
dict_index_t* index = ctx->drop_index[i];
- DBUG_ASSERT(*index->name != TEMP_INDEX_PREFIX);
+ DBUG_ASSERT(index->is_committed());
DBUG_ASSERT(index->table == ctx->new_table);
if (index->type & DICT_FTS) {
@@ -5632,17 +7925,25 @@ commit_cache_norebuild(
trx_commit_for_mysql(trx);
}
+ ctx->new_table->fts_doc_id_index
+ = ctx->new_table->fts
+ ? dict_table_get_index_on_name(
+ ctx->new_table, FTS_DOC_ID_INDEX_NAME)
+ : NULL;
+ DBUG_ASSERT((ctx->new_table->fts == NULL)
+ == (ctx->new_table->fts_doc_id_index == NULL));
+
DBUG_RETURN(found);
}
/** Adjust the persistent statistics after non-rebuilding ALTER TABLE.
Remove statistics for dropped indexes, add statistics for created indexes
and rename statistics for renamed indexes.
-@param ha_alter_info Data used during in-place alter
-@param ctx In-place ALTER TABLE context
-@param altered_table MySQL table that is being altered
-@param table_name Table name in MySQL
-@param thd MySQL connection
+@param ha_alter_info Data used during in-place alter
+@param ctx In-place ALTER TABLE context
+@param altered_table MySQL table that is being altered
+@param table_name Table name in MySQL
+@param thd MySQL connection
*/
static
void
@@ -5663,7 +7964,19 @@ alter_stats_norebuild(
DBUG_VOID_RETURN;
}
- /* TODO: This will not drop the (unused) statistics for
+ /* Delete corresponding rows from the stats table. We do this
+ in a separate transaction from trx, because lock waits are not
+ allowed in a data dictionary transaction. (Lock waits are possible
+ on the statistics table, because it is directly accessible by users,
+ not covered by the dict_operation_lock.)
+
+ Because the data dictionary changes were already committed, orphaned
+ rows may be left in the statistics table if the system crashes.
+
+ FIXME: each change to the statistics tables is being committed in a
+ separate transaction, meaning that the operation is not atomic
+
+ FIXME: This will not drop the (unused) statistics for
FTS_DOC_ID_INDEX if it was a hidden index, dropped together
with the last renamining FULLTEXT index. */
for (i = 0; i < ha_alter_info->index_drop_count; i++) {
@@ -5678,7 +7991,7 @@ alter_stats_norebuild(
char errstr[1024];
if (dict_stats_drop_index(
- ctx->new_table->name, key->name,
+ ctx->new_table->name.m_name, key->name,
errstr, sizeof errstr) != DB_SUCCESS) {
push_warning(thd,
Sql_condition::WARN_LEVEL_WARN,
@@ -5702,9 +8015,9 @@ alter_stats_norebuild(
/** Adjust the persistent statistics after rebuilding ALTER TABLE.
Remove statistics for dropped indexes, add statistics for created indexes
and rename statistics for renamed indexes.
-@param table InnoDB table that was rebuilt by ALTER TABLE
-@param table_name Table name in MySQL
-@param thd MySQL connection
+@param table InnoDB table that was rebuilt by ALTER TABLE
+@param table_name Table name in MySQL
+@param thd MySQL connection
*/
static
void
@@ -5721,17 +8034,30 @@ alter_stats_rebuild(
DBUG_VOID_RETURN;
}
- dberr_t ret;
+#ifndef DBUG_OFF
+ bool file_unreadable_orig = false;
+#endif /* DBUG_OFF */
+
+ DBUG_EXECUTE_IF(
+ "ib_rename_index_fail2",
+ file_unreadable_orig = table->file_unreadable;
+ table->file_unreadable = true;
+ );
+
+ dberr_t ret = dict_stats_update(table, DICT_STATS_RECALC_PERSISTENT);
- ret = dict_stats_update(table, DICT_STATS_RECALC_PERSISTENT);
+ DBUG_EXECUTE_IF(
+ "ib_rename_index_fail2",
+ table->file_unreadable = file_unreadable_orig;
+ );
if (ret != DB_SUCCESS) {
push_warning_printf(
thd,
Sql_condition::WARN_LEVEL_WARN,
ER_ALTER_INFO,
- "Error updating stats for table '%s' "
- "after table rebuild: %s",
+ "Error updating stats for table '%s'"
+ " after table rebuild: %s",
table_name, ut_strerr(ret));
}
@@ -5742,13 +8068,97 @@ alter_stats_rebuild(
# define DBUG_INJECT_CRASH(prefix, count) \
do { \
char buf[32]; \
- ut_snprintf(buf, sizeof buf, prefix "_%u", count); \
+ snprintf(buf, sizeof buf, prefix "_%u", count); \
DBUG_EXECUTE_IF(buf, DBUG_SUICIDE();); \
} while (0)
#else
# define DBUG_INJECT_CRASH(prefix, count)
#endif
+/** Apply the log for the table rebuild operation.
+@param[in] ctx Inplace Alter table context
+@param[in] altered_table MySQL table that is being altered
+@return true Failure, else false. */
+static bool alter_rebuild_apply_log(
+ ha_innobase_inplace_ctx* ctx,
+ Alter_inplace_info* ha_alter_info,
+ TABLE* altered_table)
+{
+ DBUG_ENTER("alter_rebuild_apply_log");
+
+ if (!ctx->online) {
+ DBUG_RETURN(false);
+ }
+
+ /* We copied the table. Any indexes that were requested to be
+ dropped were not created in the copy of the table. Apply any
+ last bit of the rebuild log and then rename the tables. */
+ dict_table_t* user_table = ctx->old_table;
+ dict_table_t* rebuilt_table = ctx->new_table;
+
+ DEBUG_SYNC_C("row_log_table_apply2_before");
+
+ dict_vcol_templ_t* s_templ = NULL;
+
+ if (ctx->new_table->n_v_cols > 0) {
+ s_templ = UT_NEW_NOKEY(
+ dict_vcol_templ_t());
+ s_templ->vtempl = NULL;
+
+ innobase_build_v_templ(altered_table, ctx->new_table, s_templ,
+ NULL, true);
+ ctx->new_table->vc_templ = s_templ;
+ }
+
+ dberr_t error = row_log_table_apply(
+ ctx->thr, user_table, altered_table,
+ static_cast<ha_innobase_inplace_ctx*>(
+ ha_alter_info->handler_ctx)->m_stage);
+
+ if (s_templ) {
+ ut_ad(ctx->need_rebuild());
+ dict_free_vc_templ(s_templ);
+ UT_DELETE(s_templ);
+ ctx->new_table->vc_templ = NULL;
+ }
+
+ ulint err_key = thr_get_trx(ctx->thr)->error_key_num;
+
+ switch (error) {
+ KEY* dup_key;
+ case DB_SUCCESS:
+ break;
+ case DB_DUPLICATE_KEY:
+ if (err_key == ULINT_UNDEFINED) {
+ /* This should be the hidden index on
+ FTS_DOC_ID. */
+ dup_key = NULL;
+ } else {
+ DBUG_ASSERT(err_key < ha_alter_info->key_count);
+ dup_key = &ha_alter_info->key_info_buffer[err_key];
+ }
+
+ print_keydup_error(altered_table, dup_key, MYF(0));
+ DBUG_RETURN(true);
+ case DB_ONLINE_LOG_TOO_BIG:
+ my_error(ER_INNODB_ONLINE_LOG_TOO_BIG, MYF(0),
+ get_error_key_name(err_key, ha_alter_info,
+ rebuilt_table));
+ DBUG_RETURN(true);
+ case DB_INDEX_CORRUPT:
+ my_error(ER_INDEX_CORRUPT, MYF(0),
+ get_error_key_name(err_key, ha_alter_info,
+ rebuilt_table));
+ DBUG_RETURN(true);
+ default:
+ my_error_innodb(error, ctx->old_table->name.m_name,
+ user_table->flags);
+ DBUG_RETURN(true);
+ }
+
+ DBUG_RETURN(false);
+}
+
/** Commit or rollback the changes made during
prepare_inplace_alter_table() and inplace_alter_table() inside
the storage engine. Note that the allowed level of concurrency
@@ -5756,14 +8166,14 @@ during this operation will be the same as for
inplace_alter_table() and thus might be higher than during
prepare_inplace_alter_table(). (E.g concurrent writes were
blocked during prepare, but might not be during commit).
-@param altered_table TABLE object for new version of table.
-@param ha_alter_info Structure describing changes to be done
+@param altered_table TABLE object for new version of table.
+@param ha_alter_info Structure describing changes to be done
by ALTER TABLE and holding data used during in-place alter.
-@param commit true => Commit, false => Rollback.
-@retval true Failure
-@retval false Success
+@param commit true => Commit, false => Rollback.
+@retval true Failure
+@retval false Success
*/
-UNIV_INTERN
+
bool
ha_innobase::commit_inplace_alter_table(
/*====================================*/
@@ -5771,31 +8181,39 @@ ha_innobase::commit_inplace_alter_table(
Alter_inplace_info* ha_alter_info,
bool commit)
{
- ha_innobase_inplace_ctx* ctx0
- = static_cast<ha_innobase_inplace_ctx*>
+ ha_innobase_inplace_ctx*ctx0;
+ struct mtr_buf_copy_t logs;
+
+ ctx0 = static_cast<ha_innobase_inplace_ctx*>
(ha_alter_info->handler_ctx);
+
#ifndef DBUG_OFF
- uint crash_inject_count = 1;
- uint crash_fail_inject_count = 1;
- uint failure_inject_count = 1;
-#endif
+ uint crash_inject_count = 1;
+ uint crash_fail_inject_count = 1;
+ uint failure_inject_count = 1;
+#endif /* DBUG_OFF */
DBUG_ENTER("commit_inplace_alter_table");
DBUG_ASSERT(!srv_read_only_mode);
- DBUG_ASSERT(!ctx0 || ctx0->prebuilt == prebuilt);
- DBUG_ASSERT(!ctx0 || ctx0->old_table == prebuilt->table);
+ DBUG_ASSERT(!ctx0 || ctx0->prebuilt == m_prebuilt);
+ DBUG_ASSERT(!ctx0 || ctx0->old_table == m_prebuilt->table);
DEBUG_SYNC_C("innodb_commit_inplace_alter_table_enter");
DEBUG_SYNC_C("innodb_commit_inplace_alter_table_wait");
+ if (ctx0 != NULL && ctx0->m_stage != NULL) {
+ ctx0->m_stage->begin_phase_end();
+ }
+
if (!commit) {
/* A rollback is being requested. So far we may at
most have created some indexes. If any indexes were to
be dropped, they would actually be dropped in this
method if commit=true. */
- DBUG_RETURN(rollback_inplace_alter_table(
- ha_alter_info, table, prebuilt));
+ const bool ret = rollback_inplace_alter_table(
+ ha_alter_info, table, m_prebuilt);
+ DBUG_RETURN(ret);
}
if (!(ha_alter_info->handler_flags & ~INNOBASE_INPLACE_IGNORE)) {
@@ -5819,22 +8237,22 @@ ha_innobase::commit_inplace_alter_table(
}
DBUG_ASSERT(ctx0 == ctx_array[0]);
- ut_ad(prebuilt->table == ctx0->old_table);
+ ut_ad(m_prebuilt->table == ctx0->old_table);
ha_alter_info->group_commit_ctx = NULL;
- trx_start_if_not_started_xa(prebuilt->trx);
+ trx_start_if_not_started_xa(m_prebuilt->trx, true);
for (inplace_alter_handler_ctx** pctx = ctx_array; *pctx; pctx++) {
ha_innobase_inplace_ctx* ctx
= static_cast<ha_innobase_inplace_ctx*>(*pctx);
- DBUG_ASSERT(ctx->prebuilt->trx == prebuilt->trx);
+ DBUG_ASSERT(ctx->prebuilt->trx == m_prebuilt->trx);
/* If decryption failed for old table or new table
fail here. */
- if ((ctx->old_table->file_unreadable &&
- fil_space_get(ctx->old_table->space) != NULL)||
- (ctx->new_table->file_unreadable &&
- fil_space_get(ctx->new_table->space) != NULL)) {
+ if ((!ctx->old_table->is_readable()
+ && fil_space_get(ctx->old_table->space))
+ || (!ctx->new_table->is_readable()
+ && fil_space_get(ctx->new_table->space))) {
String str;
const char* engine= table_type();
get_error_message(HA_ERR_DECRYPTION_FAILED, &str);
@@ -5851,7 +8269,7 @@ ha_innobase::commit_inplace_alter_table(
holding InnoDB locks only, not MySQL locks. */
dberr_t error = row_merge_lock_table(
- prebuilt->trx, ctx->old_table, LOCK_X);
+ m_prebuilt->trx, ctx->old_table, LOCK_X);
if (error != DB_SUCCESS) {
my_error_innodb(
@@ -5860,7 +8278,7 @@ ha_innobase::commit_inplace_alter_table(
}
}
- DEBUG_SYNC(user_thd, "innodb_alter_commit_after_lock_table");
+ DEBUG_SYNC(m_user_thd, "innodb_alter_commit_after_lock_table");
const bool new_clustered = ctx0->need_rebuild();
trx_t* trx = ctx0->trx;
@@ -5885,11 +8303,24 @@ ha_innobase::commit_inplace_alter_table(
ut_ad(!ctx->new_table->fts->add_wq);
fts_optimize_remove_table(ctx->new_table);
}
+
+ /* Apply the online log of the table before acquiring
+ data dictionary latches. Here alter thread already acquired
+ MDL_EXCLUSIVE on the table. So there can't be anymore DDLs, DMLs
+ for the altered table. By applying the log here, InnoDB
+ makes sure that concurrent DDLs, purge thread or any other
+ background thread doesn't wait for the dict_operation_lock
+ for longer time. */
+ if (new_clustered && commit
+ && alter_rebuild_apply_log(
+ ctx, ha_alter_info, altered_table)) {
+ DBUG_RETURN(true);
+ }
}
if (!trx) {
DBUG_ASSERT(!new_clustered);
- trx = innobase_trx_allocate(user_thd);
+ trx = innobase_trx_allocate(m_user_thd);
}
trx_start_for_ddl(trx, TRX_DICT_OP_INDEX);
@@ -5897,6 +8328,8 @@ ha_innobase::commit_inplace_alter_table(
or lock waits can happen in it during the data dictionary operation. */
row_mysql_lock_data_dictionary(trx);
+ ut_ad(log_append_on_checkpoint(NULL) == NULL);
+
/* Prevent the background statistics collection from accessing
the tables. */
for (;;) {
@@ -5956,9 +8389,8 @@ ha_innobase::commit_inplace_alter_table(
if (retry_count < 100) {
retry_count++;
} else {
- ib_logf(IB_LOG_LEVEL_INFO,
- "Drop index waiting for background sync"
- " to finish");
+ ib::info() << "Drop index waiting for background sync"
+ " to finish";
retry_count = 0;
}
@@ -5975,17 +8407,13 @@ ha_innobase::commit_inplace_alter_table(
DBUG_ASSERT(new_clustered == ctx->need_rebuild());
- if (commit_get_autoinc(ha_alter_info, ctx, altered_table,
- table)) {
- fail = true;
- my_error(ER_TABLESPACE_DISCARDED, MYF(0),
- table->s->table_name.str);
- goto rollback_trx;
- }
+ fail = commit_set_autoinc(ha_alter_info, ctx, altered_table,
+ table);
- if (ctx->need_rebuild()) {
+ if (fail) {
+ } else if (ctx->need_rebuild()) {
ctx->tmp_name = dict_mem_create_temporary_tablename(
- ctx->heap, ctx->new_table->name,
+ ctx->heap, ctx->new_table->name.m_name,
ctx->new_table->id);
fail = commit_try_rebuild(
@@ -5993,7 +8421,7 @@ ha_innobase::commit_inplace_alter_table(
trx, table_share->table_name.str);
} else {
fail = commit_try_norebuild(
- ha_alter_info, ctx, table, trx,
+ ha_alter_info, ctx, altered_table, table, trx,
table_share->table_name.str);
}
DBUG_INJECT_CRASH("ib_commit_inplace_crash",
@@ -6002,8 +8430,11 @@ ha_innobase::commit_inplace_alter_table(
{
/* Generate a dynamic dbug text. */
char buf[32];
- ut_snprintf(buf, sizeof buf, "ib_commit_inplace_fail_%u",
+
+ snprintf(buf, sizeof buf,
+ "ib_commit_inplace_fail_%u",
failure_inject_count++);
+
DBUG_EXECUTE_IF(buf,
my_error(ER_INTERNAL_ERROR, MYF(0),
"Injected error!");
@@ -6013,8 +8444,6 @@ ha_innobase::commit_inplace_alter_table(
#endif
}
-rollback_trx:
-
/* Commit or roll back the changes to the data dictionary. */
if (fail) {
@@ -6042,7 +8471,7 @@ rollback_trx:
/* Out of memory or a problem will occur
when renaming files. */
fail = true;
- my_error_innodb(error, ctx->old_table->name,
+ my_error_innodb(error, ctx->old_table->name.m_name,
ctx->old_table->flags);
}
DBUG_INJECT_CRASH("ib_commit_inplace_crash",
@@ -6060,10 +8489,30 @@ rollback_trx:
ut_ad(!trx->fts_trx);
if (fail) {
- mtr_set_log_mode(&mtr, MTR_LOG_NO_REDO);
+ mtr.set_log_mode(MTR_LOG_NO_REDO);
mtr_commit(&mtr);
trx_rollback_for_mysql(trx);
} else {
+ ut_ad(trx_state_eq(trx, TRX_STATE_ACTIVE));
+ ut_ad(trx->has_logged());
+
+ if (mtr.get_log()->size() > 0) {
+ ut_ad(*mtr.get_log()->front()->begin()
+ == MLOG_FILE_RENAME2);
+
+ /* Append the MLOG_FILE_RENAME2
+ records on checkpoint, as a separate
+ mini-transaction before the one that
+ contains the MLOG_CHECKPOINT marker. */
+ static const byte multi
+ = MLOG_MULTI_REC_END;
+
+ mtr.get_log()->for_each_block(logs);
+ logs.m_buf.push(&multi, sizeof multi);
+
+ log_append_on_checkpoint(&logs.m_buf);
+ }
+
/* The following call commits the
mini-transaction, making the data dictionary
transaction committed at mtr.end_lsn. The
@@ -6071,8 +8520,6 @@ rollback_trx:
log_buffer_flush_to_disk() returns. In the
logical sense the commit in the file-based
data structures happens here. */
- ut_ad(trx_state_eq(trx, TRX_STATE_ACTIVE));
- ut_ad(trx->insert_undo || trx->update_undo);
trx_commit_low(trx, &mtr);
}
@@ -6097,7 +8544,6 @@ rollback_trx:
update the in-memory structures, close some handles, release
temporary files, and (unless we rolled back) update persistent
statistics. */
-
for (inplace_alter_handler_ctx** pctx = ctx_array;
*pctx; pctx++) {
ha_innobase_inplace_ctx* ctx
@@ -6111,20 +8557,10 @@ rollback_trx:
if (fail) {
if (new_clustered) {
- dict_table_close(ctx->new_table,
- TRUE, FALSE);
-
-#if defined UNIV_DEBUG || defined UNIV_DDL_DEBUG
- /* Nobody should have initialized the
- stats of the newly created table
- yet. When this is the case, we know
- that it has not been added for
- background stats gathering. */
- ut_a(!ctx->new_table->stat_initialized);
-#endif /* UNIV_DEBUG || UNIV_DDL_DEBUG */
-
trx_start_for_ddl(trx, TRX_DICT_OP_TABLE);
- row_merge_drop_table(trx, ctx->new_table);
+
+ dict_table_close_and_drop(trx, ctx->new_table);
+
trx_commit_for_mysql(trx);
ctx->new_table = NULL;
} else {
@@ -6159,15 +8595,18 @@ rollback_trx:
implemented yet. */
ctx->old_table->to_be_dropped = true;
+ DBUG_PRINT("to_be_dropped",
+ ("table: %s", ctx->old_table->name.m_name));
+
/* Rename the tablespace files. */
commit_cache_rebuild(ctx);
- if (innobase_update_foreign_cache(ctx, user_thd)
+ if (innobase_update_foreign_cache(ctx, m_user_thd)
!= DB_SUCCESS
- && prebuilt->trx->check_foreigns) {
+ && m_prebuilt->trx->check_foreigns) {
foreign_fail:
push_warning_printf(
- user_thd,
+ m_user_thd,
Sql_condition::WARN_LEVEL_WARN,
ER_ALTER_INFO,
"failed to load FOREIGN KEY"
@@ -6175,26 +8614,27 @@ foreign_fail:
}
} else {
bool fk_fail = innobase_update_foreign_cache(
- ctx, user_thd) != DB_SUCCESS;
+ ctx, m_user_thd) != DB_SUCCESS;
if (!commit_cache_norebuild(ctx, table, trx)) {
fk_fail = true;
}
- innobase_rename_columns_cache(ha_alter_info, table,
- ctx->new_table);
- if (fk_fail && prebuilt->trx->check_foreigns) {
+ innobase_rename_or_enlarge_columns_cache(
+ ha_alter_info, table, ctx->new_table);
+ if (fk_fail && m_prebuilt->trx->check_foreigns) {
goto foreign_fail;
}
}
+
+ dict_mem_table_free_foreign_vcol_set(ctx->new_table);
+ dict_mem_table_fill_foreign_vcol_set(ctx->new_table);
+
DBUG_INJECT_CRASH("ib_commit_inplace_crash",
crash_inject_count++);
}
- /* Invalidate the index translation table. In partitioned
- tables, there is one TABLE_SHARE (and also only one TABLE)
- covering all partitions. */
- share->idx_trans_tbl.index_count = 0;
+ log_append_on_checkpoint(NULL);
/* Tell the InnoDB server that there might be work for
utility threads: */
@@ -6248,8 +8688,52 @@ foreign_fail:
}
}
+ if (ctx0->num_to_drop_vcol || ctx0->num_to_add_vcol) {
+ DBUG_ASSERT(ctx0->old_table->get_ref_count() == 1);
+
+ trx_commit_for_mysql(m_prebuilt->trx);
+#ifdef BTR_CUR_HASH_ADAPT
+ if (btr_search_enabled) {
+ btr_search_disable(false);
+ btr_search_enable();
+ }
+#endif /* BTR_CUR_HASH_ADAPT */
+
+ char tb_name[FN_REFLEN];
+ ut_strcpy(tb_name, m_prebuilt->table->name.m_name);
+
+ tb_name[strlen(m_prebuilt->table->name.m_name)] = 0;
+
+ dict_table_close(m_prebuilt->table, true, false);
+ dict_table_remove_from_cache(m_prebuilt->table);
+ m_prebuilt->table = dict_table_open_on_name(
+ tb_name, TRUE, TRUE, DICT_ERR_IGNORE_NONE);
+
+ /* Drop outdated table stats. */
+ char errstr[1024];
+ if (dict_stats_drop_table(
+ m_prebuilt->table->name.m_name,
+ errstr, sizeof(errstr))
+ != DB_SUCCESS) {
+ push_warning_printf(
+ m_user_thd,
+ Sql_condition::WARN_LEVEL_WARN,
+ ER_ALTER_INFO,
+ "Deleting persistent statistics"
+ " for table '%s' in"
+ " InnoDB failed: %s",
+ table->s->table_name.str,
+ errstr);
+ }
+
+ row_mysql_unlock_data_dictionary(trx);
+ trx_free_for_mysql(trx);
+ MONITOR_ATOMIC_DEC(MONITOR_PENDING_ALTER_TABLE);
+ DBUG_RETURN(false);
+ }
+
/* Release the table locks. */
- trx_commit_for_mysql(prebuilt->trx);
+ trx_commit_for_mysql(m_prebuilt->trx);
DBUG_EXECUTE_IF("ib_ddl_crash_after_user_trx_commit", DBUG_SUICIDE(););
@@ -6260,14 +8744,6 @@ foreign_fail:
(*pctx);
DBUG_ASSERT(ctx->need_rebuild() == new_clustered);
- if (altered_table->found_next_number_field) {
- dict_table_t* t = ctx->new_table;
-
- dict_table_autoinc_lock(t);
- dict_table_autoinc_initialize(t, ctx->max_autoinc);
- dict_table_autoinc_unlock(t);
- }
-
/* Publish the created fulltext index, if any.
Note that a fulltext index can be created without
creating the clustered index, if there already exists
@@ -6310,15 +8786,20 @@ foreign_fail:
char errstr[1024];
- DBUG_ASSERT(0 == strcmp(ctx->old_table->name,
+ DBUG_ASSERT(0 == strcmp(ctx->old_table->name.m_name,
ctx->tmp_name));
+ DBUG_EXECUTE_IF(
+ "ib_rename_index_fail3",
+ DBUG_SET("+d,innodb_report_deadlock");
+ );
+
if (dict_stats_drop_table(
- ctx->new_table->name,
+ ctx->new_table->name.m_name,
errstr, sizeof(errstr))
!= DB_SUCCESS) {
push_warning_printf(
- user_thd,
+ m_user_thd,
Sql_condition::WARN_LEVEL_WARN,
ER_ALTER_INFO,
"Deleting persistent statistics"
@@ -6328,10 +8809,19 @@ foreign_fail:
errstr);
}
+ DBUG_EXECUTE_IF(
+ "ib_rename_index_fail3",
+ DBUG_SET("-d,innodb_report_deadlock");
+ );
+
DBUG_EXECUTE_IF("ib_ddl_crash_before_commit",
DBUG_SUICIDE(););
- trx_t* const user_trx = prebuilt->trx;
+ ut_ad(m_prebuilt != ctx->prebuilt
+ || ctx == ctx0);
+ bool update_own_prebuilt =
+ (m_prebuilt == ctx->prebuilt);
+ trx_t* const user_trx = m_prebuilt->trx;
row_prebuilt_free(ctx->prebuilt, TRUE);
@@ -6341,15 +8831,27 @@ foreign_fail:
before this is completed, some orphan tables
with ctx->tmp_name may be recovered. */
trx_start_for_ddl(trx, TRX_DICT_OP_TABLE);
- row_merge_drop_table(trx, ctx->old_table);
+ dberr_t error = row_merge_drop_table(trx, ctx->old_table);
+
+ if (error != DB_SUCCESS) {
+ ib::error() << "Inplace alter table " << ctx->old_table->name
+ << " dropping copy of the old table failed error "
+ << error
+ << ". tmp_name " << (ctx->tmp_name ? ctx->tmp_name : "N/A")
+ << " new_table " << ctx->new_table->name;
+ }
+
trx_commit_for_mysql(trx);
/* Rebuild the prebuilt object. */
ctx->prebuilt = row_create_prebuilt(
ctx->new_table, altered_table->s->reclength);
- trx_start_if_not_started(user_trx);
+ if (update_own_prebuilt) {
+ m_prebuilt = ctx->prebuilt;
+ }
+ trx_start_if_not_started(user_trx, true);
user_trx->will_lock++;
- prebuilt->trx = user_trx;
+ m_prebuilt->trx = user_trx;
}
DBUG_INJECT_CRASH("ib_commit_inplace_crash",
crash_inject_count++);
@@ -6372,7 +8874,7 @@ foreign_fail:
alter_stats_rebuild(
ctx->new_table, table->s->table_name.str,
- user_thd);
+ m_user_thd);
DBUG_INJECT_CRASH("ib_commit_inplace_crash",
crash_inject_count++);
}
@@ -6386,39 +8888,40 @@ foreign_fail:
alter_stats_norebuild(
ha_alter_info, ctx, altered_table,
- table->s->table_name.str, user_thd);
+ table->s->table_name.str, m_user_thd);
DBUG_INJECT_CRASH("ib_commit_inplace_crash",
crash_inject_count++);
}
}
+ innobase_parse_hint_from_comment(
+ m_user_thd, m_prebuilt->table, altered_table->s);
+
/* TODO: Also perform DROP TABLE and DROP INDEX after
the MDL downgrade. */
#ifndef DBUG_OFF
dict_index_t* clust_index = dict_table_get_first_index(
- prebuilt->table);
+ ctx0->prebuilt->table);
DBUG_ASSERT(!clust_index->online_log);
DBUG_ASSERT(dict_index_get_online_status(clust_index)
== ONLINE_INDEX_COMPLETE);
- for (dict_index_t* index = dict_table_get_first_index(
- prebuilt->table);
+ for (dict_index_t* index = clust_index;
index;
index = dict_table_get_next_index(index)) {
DBUG_ASSERT(!index->to_be_dropped);
}
#endif /* DBUG_OFF */
-
MONITOR_ATOMIC_DEC(MONITOR_PENDING_ALTER_TABLE);
DBUG_RETURN(false);
}
/**
-@param thd - the session
-@param start_value - the lower bound
-@param max_value - the upper bound (inclusive) */
-UNIV_INTERN
+@param thd the session
+@param start_value the lower bound
+@param max_value the upper bound (inclusive) */
+
ib_sequence_t::ib_sequence_t(
THD* thd,
ulonglong start_value,
@@ -6455,7 +8958,7 @@ ib_sequence_t::ib_sequence_t(
/**
Postfix increment
@return the next value to insert */
-UNIV_INTERN
+
ulonglong
ib_sequence_t::operator++(int) UNIV_NOTHROW
{
diff --git a/storage/innobase/handler/i_s.cc b/storage/innobase/handler/i_s.cc
index 2729a755570..960bd8113db 100644
--- a/storage/innobase/handler/i_s.cc
+++ b/storage/innobase/handler/i_s.cc
@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 2007, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2014, 2019, MariaDB Corporation.
+Copyright (c) 2014, 2020, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -26,19 +26,14 @@ Modified Dec 29, 2014 Jan Lindström (Added sys_semaphore_waits)
*******************************************************/
#include "univ.i"
+#include <mysql_version.h>
+#include <field.h>
-#include <mysqld_error.h>
#include <sql_acl.h>
+#include <sql_show.h>
+#include <sql_time.h>
-#include <m_ctype.h>
-#include <hash.h>
-#include <myisampack.h>
-#include <mysys_err.h>
-#include <my_sys.h>
#include "i_s.h"
-#include <sql_plugin.h>
-#include <innodb_priv.h>
-
#include "btr0pcur.h"
#include "btr0types.h"
#include "dict0dict.h"
@@ -48,7 +43,6 @@ Modified Dec 29, 2014 Jan Lindström (Added sys_semaphore_waits)
#include "ibuf0ibuf.h"
#include "dict0mem.h"
#include "dict0types.h"
-#include "ha_prototypes.h"
#include "srv0start.h"
#include "trx0i_s.h"
#include "trx0trx.h"
@@ -63,6 +57,7 @@ Modified Dec 29, 2014 Jan Lindström (Added sys_semaphore_waits)
#include "sync0arr.h"
#include "fil0fil.h"
#include "fil0crypt.h"
+#include "dict0crea.h"
/** The latest successfully looked up innodb_fts_aux_table */
UNIV_INTERN table_id_t innodb_ft_aux_table_id;
@@ -75,17 +70,28 @@ struct buf_page_desc_t{
ulint type_value; /*!< Page type or page state */
};
-/** Change buffer B-tree page */
-#define I_S_PAGE_TYPE_IBUF (FIL_PAGE_TYPE_LAST + 1)
-
-/** Any states greater than I_S_PAGE_TYPE_IBUF would be treated as
-unknown. */
-#define I_S_PAGE_TYPE_UNKNOWN (I_S_PAGE_TYPE_IBUF + 1)
-
/** We also define I_S_PAGE_TYPE_INDEX as the Index Page's position
in i_s_page_type[] array */
#define I_S_PAGE_TYPE_INDEX 1
+/** Any unassigned FIL_PAGE_TYPE will be treated as unknown. */
+#define I_S_PAGE_TYPE_UNKNOWN FIL_PAGE_TYPE_UNKNOWN
+
+/** R-tree index page */
+#define I_S_PAGE_TYPE_RTREE (FIL_PAGE_TYPE_LAST + 1)
+
+/** Change buffer B-tree page */
+#define I_S_PAGE_TYPE_IBUF (FIL_PAGE_TYPE_LAST + 2)
+
+#define I_S_PAGE_TYPE_LAST I_S_PAGE_TYPE_IBUF
+
+#define I_S_PAGE_TYPE_BITS 4
+
+/* Check if we can hold all page types */
+#if I_S_PAGE_TYPE_LAST >= 1 << I_S_PAGE_TYPE_BITS
+# error i_s_page_type[] is too large
+#endif
+
/** Name string for File Page Types */
static buf_page_desc_t i_s_page_type[] = {
{"ALLOCATED", FIL_PAGE_TYPE_ALLOCATED},
@@ -101,16 +107,13 @@ static buf_page_desc_t i_s_page_type[] = {
{"BLOB", FIL_PAGE_TYPE_BLOB},
{"COMPRESSED_BLOB", FIL_PAGE_TYPE_ZBLOB},
{"COMPRESSED_BLOB2", FIL_PAGE_TYPE_ZBLOB2},
+ {"UNKNOWN", I_S_PAGE_TYPE_UNKNOWN},
+ {"RTREE_INDEX", I_S_PAGE_TYPE_RTREE},
{"IBUF_INDEX", I_S_PAGE_TYPE_IBUF},
{"PAGE COMPRESSED", FIL_PAGE_PAGE_COMPRESSED},
- {"UNKNOWN", I_S_PAGE_TYPE_UNKNOWN}
+ {"PAGE COMPRESSED AND ENCRYPTED", FIL_PAGE_PAGE_COMPRESSED_ENCRYPTED},
};
-/* Check if we can hold all page type in a 4 bit value */
-#if I_S_PAGE_TYPE_UNKNOWN > 1<<4
-# error "i_s_page_type[] is too large"
-#endif
-
/** This structure defines information we will fetch from pages
currently cached in the buffer pool. It will be used to populate
table INFORMATION_SCHEMA.INNODB_BUFFER_PAGE */
@@ -126,8 +129,10 @@ struct buf_page_info_t{
unsigned io_fix:2; /*!< type of pending I/O operation */
unsigned fix_count:19; /*!< Count of how manyfold this block
is bufferfixed */
+#ifdef BTR_CUR_HASH_ADAPT
unsigned hashed:1; /*!< Whether hash index has been
built on this page */
+#endif /* BTR_CUR_HASH_ADAPT */
unsigned is_old:1; /*!< TRUE if the block is in the old
blocks in buf_pool->LRU_old */
unsigned freed_page_clock:31; /*!< the value of
@@ -135,7 +140,7 @@ struct buf_page_info_t{
unsigned zip_ssize:PAGE_ZIP_SSIZE_BITS;
/*!< Compressed page size */
unsigned page_state:BUF_PAGE_STATE_BITS; /*!< Page state */
- unsigned page_type:4; /*!< Page type */
+ unsigned page_type:I_S_PAGE_TYPE_BITS; /*!< Page type */
unsigned num_recs:UNIV_PAGE_SIZE_SHIFT_MAX-2;
/*!< Number of records on Page */
unsigned data_size:UNIV_PAGE_SIZE_SHIFT_MAX;
@@ -194,7 +199,7 @@ Common function to fill any of the dynamic tables:
INFORMATION_SCHEMA.innodb_trx
INFORMATION_SCHEMA.innodb_locks
INFORMATION_SCHEMA.innodb_lock_waits
-@return 0 on success */
+@return 0 on success */
static
int
trx_i_s_common_fill_table(
@@ -205,7 +210,7 @@ trx_i_s_common_fill_table(
/*******************************************************************//**
Unbind a dynamic INFORMATION_SCHEMA table.
-@return 0 on success */
+@return 0 on success */
static
int
i_s_common_deinit(
@@ -214,7 +219,7 @@ i_s_common_deinit(
/*******************************************************************//**
Auxiliary function to store time_t value in MYSQL_TYPE_DATETIME
field.
-@return 0 on success */
+@return 0 on success */
static
int
field_store_time_t(
@@ -240,12 +245,15 @@ field_store_time_t(
memset(&my_time, 0, sizeof(my_time));
}
+ /* JAN: TODO: MySQL 5.7
+ return(field->store_time(&my_time, MYSQL_TIMESTAMP_DATETIME));
+ */
return(field->store_time(&my_time));
}
/*******************************************************************//**
Auxiliary function to store char* value in MYSQL_TYPE_STRING field.
-@return 0 on success */
+@return 0 on success */
int
field_store_string(
/*===============*/
@@ -253,65 +261,19 @@ field_store_string(
const char* str) /*!< in: NUL-terminated utf-8 string,
or NULL */
{
- int ret;
-
- if (str != NULL) {
-
- ret = field->store(str, static_cast<uint>(strlen(str)),
- system_charset_info);
- field->set_notnull();
- } else {
-
- ret = 0; /* success */
+ if (!str) {
field->set_null();
- }
-
- return(ret);
-}
-
-/*******************************************************************//**
-Store the name of an index in a MYSQL_TYPE_VARCHAR field.
-Handles the names of incomplete secondary indexes.
-@return 0 on success */
-static
-int
-field_store_index_name(
-/*===================*/
- Field* field, /*!< in/out: target field for
- storage */
- const char* index_name) /*!< in: NUL-terminated utf-8
- index name, possibly starting with
- TEMP_INDEX_PREFIX */
-{
- int ret;
-
- ut_ad(index_name != NULL);
- ut_ad(field->real_type() == MYSQL_TYPE_VARCHAR);
-
- /* Since TEMP_INDEX_PREFIX is not a valid UTF8, we need to convert
- it to something else. */
- if (index_name[0] == TEMP_INDEX_PREFIX) {
- char buf[NAME_LEN + 1];
- buf[0] = '?';
- memcpy(buf + 1, index_name + 1, strlen(index_name));
- ret = field->store(
- buf, static_cast<uint>(strlen(buf)),
- system_charset_info);
- } else {
- ret = field->store(
- index_name, static_cast<uint>(strlen(index_name)),
- system_charset_info);
+ return 0;
}
field->set_notnull();
-
- return(ret);
+ return field->store(str, uint(strlen(str)), system_charset_info);
}
/*******************************************************************//**
Auxiliary function to store ulint value in MYSQL_TYPE_LONGLONG field.
-If the value is ULINT_UNDEFINED then the field it set to NULL.
-@return 0 on success */
+If the value is ULINT_UNDEFINED then the field is set to NULL.
+@return 0 on success */
int
field_store_ulint(
/*==============*/
@@ -322,7 +284,7 @@ field_store_ulint(
if (n != ULINT_UNDEFINED) {
- ret = field->store(static_cast<double>(n));
+ ret = field->store(n, true);
field->set_notnull();
} else {
@@ -333,6 +295,12 @@ field_store_ulint(
return(ret);
}
+#ifdef BTR_CUR_HASH_ADAPT
+# define I_S_AHI 1 /* Include the IS_HASHED column */
+#else
+# define I_S_AHI 0 /* Omit the IS_HASHED column */
+#endif
+
/* Fields of the dynamic table INFORMATION_SCHEMA.innodb_trx */
static ST_FIELD_INFO innodb_trx_fields_info[] =
{
@@ -516,6 +484,7 @@ static ST_FIELD_INFO innodb_trx_fields_info[] =
STRUCT_FLD(old_name, ""),
STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
+#ifdef BTR_CUR_HASH_ADAPT
#define IDX_TRX_ADAPTIVE_HASH_LATCHED 20
{STRUCT_FLD(field_name, "trx_adaptive_hash_latched"),
STRUCT_FLD(field_length, 1),
@@ -524,17 +493,9 @@ static ST_FIELD_INFO innodb_trx_fields_info[] =
STRUCT_FLD(field_flags, 0),
STRUCT_FLD(old_name, ""),
STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
+#endif /* BTR_CUR_HASH_ADAPT */
-#define IDX_TRX_ADAPTIVE_HASH_TIMEOUT 21
- {STRUCT_FLD(field_name, "trx_adaptive_hash_timeout"),
- STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
- STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
-#define IDX_TRX_READ_ONLY 22
+#define IDX_TRX_READ_ONLY 20 + I_S_AHI
{STRUCT_FLD(field_name, "trx_is_read_only"),
STRUCT_FLD(field_length, 1),
STRUCT_FLD(field_type, MYSQL_TYPE_LONG),
@@ -543,7 +504,7 @@ static ST_FIELD_INFO innodb_trx_fields_info[] =
STRUCT_FLD(old_name, ""),
STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-#define IDX_TRX_AUTOCOMMIT_NON_LOCKING 23
+#define IDX_TRX_AUTOCOMMIT_NON_LOCKING 21 + I_S_AHI
{STRUCT_FLD(field_name, "trx_autocommit_non_locking"),
STRUCT_FLD(field_length, 1),
STRUCT_FLD(field_type, MYSQL_TYPE_LONG),
@@ -558,7 +519,7 @@ static ST_FIELD_INFO innodb_trx_fields_info[] =
/*******************************************************************//**
Read data from cache buffer and fill the INFORMATION_SCHEMA.innodb_trx
table with it.
-@return 0 on success */
+@return 0 on success */
static
int
fill_innodb_trx_from_cache(
@@ -590,7 +551,7 @@ fill_innodb_trx_from_cache(
cache, I_S_INNODB_TRX, i);
/* trx_id */
- ut_snprintf(trx_id, sizeof(trx_id), TRX_ID_FMT, row->trx_id);
+ snprintf(trx_id, sizeof(trx_id), TRX_ID_FMT, row->trx_id);
OK(field_store_string(fields[IDX_TRX_ID], trx_id));
/* trx_state */
@@ -623,12 +584,11 @@ fill_innodb_trx_from_cache(
}
/* trx_weight */
- OK(fields[IDX_TRX_WEIGHT]->store((longlong) row->trx_weight,
- true));
+ OK(fields[IDX_TRX_WEIGHT]->store(row->trx_weight, true));
/* trx_mysql_thread_id */
OK(fields[IDX_TRX_MYSQL_THREAD_ID]->store(
- static_cast<double>(row->trx_mysql_thread_id)));
+ row->trx_mysql_thread_id, true));
/* trx_query */
if (row->trx_query) {
@@ -649,31 +609,31 @@ fill_innodb_trx_from_cache(
/* trx_tables_in_use */
OK(fields[IDX_TRX_TABLES_IN_USE]->store(
- (longlong) row->trx_tables_in_use, true));
+ row->trx_tables_in_use, true));
/* trx_tables_locked */
OK(fields[IDX_TRX_TABLES_LOCKED]->store(
- (longlong) row->trx_tables_locked, true));
+ row->trx_tables_locked, true));
/* trx_lock_structs */
OK(fields[IDX_TRX_LOCK_STRUCTS]->store(
- (longlong) row->trx_lock_structs, true));
+ row->trx_lock_structs, true));
/* trx_lock_memory_bytes */
OK(fields[IDX_TRX_LOCK_MEMORY_BYTES]->store(
- (longlong) row->trx_lock_memory_bytes, true));
+ row->trx_lock_memory_bytes, true));
/* trx_rows_locked */
OK(fields[IDX_TRX_ROWS_LOCKED]->store(
- (longlong) row->trx_rows_locked, true));
+ row->trx_rows_locked, true));
/* trx_rows_modified */
OK(fields[IDX_TRX_ROWS_MODIFIED]->store(
- (longlong) row->trx_rows_modified, true));
+ row->trx_rows_modified, true));
/* trx_concurrency_tickets */
OK(fields[IDX_TRX_CONNCURRENCY_TICKETS]->store(
- (longlong) row->trx_concurrency_tickets, true));
+ row->trx_concurrency_tickets, true));
/* trx_isolation_level */
OK(field_store_string(fields[IDX_TRX_ISOLATION_LEVEL],
@@ -681,32 +641,29 @@ fill_innodb_trx_from_cache(
/* trx_unique_checks */
OK(fields[IDX_TRX_UNIQUE_CHECKS]->store(
- static_cast<double>(row->trx_unique_checks)));
+ row->trx_unique_checks, true));
/* trx_foreign_key_checks */
OK(fields[IDX_TRX_FOREIGN_KEY_CHECKS]->store(
- static_cast<double>(row->trx_foreign_key_checks)));
+ row->trx_foreign_key_checks, true));
/* trx_last_foreign_key_error */
OK(field_store_string(fields[IDX_TRX_LAST_FOREIGN_KEY_ERROR],
row->trx_foreign_key_error));
+#ifdef BTR_CUR_HASH_ADAPT
/* trx_adaptive_hash_latched */
- OK(fields[IDX_TRX_ADAPTIVE_HASH_LATCHED]->store(
- static_cast<double>(row->trx_has_search_latch)));
-
- /* trx_adaptive_hash_timeout */
- OK(fields[IDX_TRX_ADAPTIVE_HASH_TIMEOUT]->store(
- (longlong) row->trx_search_latch_timeout, true));
+ OK(fields[IDX_TRX_ADAPTIVE_HASH_LATCHED]->store(0, true));
+#endif /* BTR_CUR_HASH_ADAPT */
/* trx_is_read_only*/
OK(fields[IDX_TRX_READ_ONLY]->store(
- (longlong) row->trx_is_read_only, true));
+ row->trx_is_read_only, true));
/* trx_is_autocommit_non_locking */
OK(fields[IDX_TRX_AUTOCOMMIT_NON_LOCKING]->store(
- (longlong) row->trx_is_autocommit_non_locking,
- true));
+ (longlong) row->trx_is_autocommit_non_locking,
+ true));
OK(schema_table_store_record(thd, table));
}
@@ -716,7 +673,7 @@ fill_innodb_trx_from_cache(
/*******************************************************************//**
Bind the dynamic table INFORMATION_SCHEMA.innodb_trx
-@return 0 on success */
+@return 0 on success */
static
int
innodb_trx_init(
@@ -889,7 +846,7 @@ static ST_FIELD_INFO innodb_locks_fields_info[] =
/*******************************************************************//**
Read data from cache buffer and fill the INFORMATION_SCHEMA.innodb_locks
table with it.
-@return 0 on success */
+@return 0 on success */
static
int
fill_innodb_locks_from_cache(
@@ -928,8 +885,8 @@ fill_innodb_locks_from_cache(
lock_id));
/* lock_trx_id */
- ut_snprintf(lock_trx_id, sizeof(lock_trx_id),
- TRX_ID_FMT, row->lock_trx_id);
+ snprintf(lock_trx_id, sizeof(lock_trx_id),
+ TRX_ID_FMT, row->lock_trx_id);
OK(field_store_string(fields[IDX_LOCK_TRX_ID], lock_trx_id));
/* lock_mode */
@@ -944,18 +901,13 @@ fill_innodb_locks_from_cache(
bufend = innobase_convert_name(buf, sizeof(buf),
row->lock_table,
strlen(row->lock_table),
- thd, TRUE);
+ thd);
OK(fields[IDX_LOCK_TABLE]->store(
- buf, static_cast<uint>(bufend - buf),
- system_charset_info));
+ buf, uint(bufend - buf), system_charset_info));
/* lock_index */
- if (row->lock_index != NULL) {
- OK(field_store_index_name(fields[IDX_LOCK_INDEX],
- row->lock_index));
- } else {
- fields[IDX_LOCK_INDEX]->set_null();
- }
+ OK(field_store_string(fields[IDX_LOCK_INDEX],
+ row->lock_index));
/* lock_space */
OK(field_store_ulint(fields[IDX_LOCK_SPACE],
@@ -981,7 +933,7 @@ fill_innodb_locks_from_cache(
/*******************************************************************//**
Bind the dynamic table INFORMATION_SCHEMA.innodb_locks
-@return 0 on success */
+@return 0 on success */
static
int
innodb_locks_init(
@@ -1094,7 +1046,7 @@ static ST_FIELD_INFO innodb_lock_waits_fields_info[] =
/*******************************************************************//**
Read data from cache buffer and fill the
INFORMATION_SCHEMA.innodb_lock_waits table with it.
-@return 0 on success */
+@return 0 on success */
static
int
fill_innodb_lock_waits_from_cache(
@@ -1129,8 +1081,8 @@ fill_innodb_lock_waits_from_cache(
cache, I_S_INNODB_LOCK_WAITS, i);
/* requesting_trx_id */
- ut_snprintf(requesting_trx_id, sizeof(requesting_trx_id),
- TRX_ID_FMT, row->requested_lock_row->lock_trx_id);
+ snprintf(requesting_trx_id, sizeof(requesting_trx_id),
+ TRX_ID_FMT, row->requested_lock_row->lock_trx_id);
OK(field_store_string(fields[IDX_REQUESTING_TRX_ID],
requesting_trx_id));
@@ -1143,8 +1095,8 @@ fill_innodb_lock_waits_from_cache(
sizeof(requested_lock_id))));
/* blocking_trx_id */
- ut_snprintf(blocking_trx_id, sizeof(blocking_trx_id),
- TRX_ID_FMT, row->blocking_lock_row->lock_trx_id);
+ snprintf(blocking_trx_id, sizeof(blocking_trx_id),
+ TRX_ID_FMT, row->blocking_lock_row->lock_trx_id);
OK(field_store_string(fields[IDX_BLOCKING_TRX_ID],
blocking_trx_id));
@@ -1164,7 +1116,7 @@ fill_innodb_lock_waits_from_cache(
/*******************************************************************//**
Bind the dynamic table INFORMATION_SCHEMA.innodb_lock_waits
-@return 0 on success */
+@return 0 on success */
static
int
innodb_lock_waits_init(
@@ -1237,7 +1189,7 @@ Common function to fill any of the dynamic tables:
INFORMATION_SCHEMA.innodb_trx
INFORMATION_SCHEMA.innodb_locks
INFORMATION_SCHEMA.innodb_lock_waits
-@return 0 on success */
+@return 0 on success */
static
int
trx_i_s_common_fill_table(
@@ -1275,10 +1227,8 @@ trx_i_s_common_fill_table(
if (trx_i_s_cache_is_truncated(cache)) {
- /* XXX show warning to user if possible */
- fprintf(stderr, "Warning: data in %s truncated due to "
- "memory limit of %d bytes\n", table_name,
- TRX_I_S_MEM_LIMIT);
+ ib::warn() << "Data in " << table_name << " truncated due to"
+ " memory limit of " << TRX_I_S_MEM_LIMIT << " bytes";
}
ret = 0;
@@ -1310,14 +1260,11 @@ trx_i_s_common_fill_table(
}
} else {
-
- /* huh! what happened!? */
- fprintf(stderr,
- "InnoDB: trx_i_s_common_fill_table() was "
- "called to fill unknown table: %s.\n"
- "This function only knows how to fill "
- "innodb_trx, innodb_locks and "
- "innodb_lock_waits tables.\n", table_name);
+ ib::error() << "trx_i_s_common_fill_table() was"
+ " called to fill unknown table: " << table_name << "."
+ " This function only knows how to fill"
+ " innodb_trx, innodb_locks and"
+ " innodb_lock_waits tables.";
ret = 1;
}
@@ -1397,7 +1344,7 @@ static ST_FIELD_INFO i_s_cmp_fields_info[] =
/*******************************************************************//**
Fill the dynamic table information_schema.innodb_cmp or
innodb_cmp_reset.
-@return 0 on success, 1 on failure */
+@return 0 on success, 1 on failure */
static
int
i_s_cmp_fill_low(
@@ -1454,7 +1401,7 @@ i_s_cmp_fill_low(
/*******************************************************************//**
Fill the dynamic table information_schema.innodb_cmp.
-@return 0 on success, 1 on failure */
+@return 0 on success, 1 on failure */
static
int
i_s_cmp_fill(
@@ -1468,7 +1415,7 @@ i_s_cmp_fill(
/*******************************************************************//**
Fill the dynamic table information_schema.innodb_cmp_reset.
-@return 0 on success, 1 on failure */
+@return 0 on success, 1 on failure */
static
int
i_s_cmp_reset_fill(
@@ -1482,7 +1429,7 @@ i_s_cmp_reset_fill(
/*******************************************************************//**
Bind the dynamic table information_schema.innodb_cmp.
-@return 0 on success */
+@return 0 on success */
static
int
i_s_cmp_init(
@@ -1500,7 +1447,7 @@ i_s_cmp_init(
/*******************************************************************//**
Bind the dynamic table information_schema.innodb_cmp_reset.
-@return 0 on success */
+@return 0 on success */
static
int
i_s_cmp_reset_init(
@@ -1699,7 +1646,7 @@ static ST_FIELD_INFO i_s_cmp_per_index_fields_info[] =
Fill the dynamic table
information_schema.innodb_cmp_per_index or
information_schema.innodb_cmp_per_index_reset.
-@return 0 on success, 1 on failure */
+@return 0 on success, 1 on failure */
static
int
i_s_cmp_per_index_fill_low(
@@ -1736,7 +1683,6 @@ i_s_cmp_per_index_fill_low(
for (iter = snap.begin(), i = 0; iter != snap.end(); iter++, i++) {
- char name[192];
dict_index_t* index = dict_index_find_on_id_low(iter->first);
if (index != NULL) {
@@ -1747,48 +1693,49 @@ i_s_cmp_per_index_fill_low(
db_utf8, sizeof(db_utf8),
table_utf8, sizeof(table_utf8));
- field_store_string(fields[IDX_DATABASE_NAME], db_utf8);
- field_store_string(fields[IDX_TABLE_NAME], table_utf8);
- field_store_index_name(fields[IDX_INDEX_NAME],
- index->name);
+ status = field_store_string(fields[IDX_DATABASE_NAME],
+ db_utf8)
+ || field_store_string(fields[IDX_TABLE_NAME],
+ table_utf8)
+ || field_store_string(fields[IDX_INDEX_NAME],
+ index->name);
} else {
/* index not found */
- ut_snprintf(name, sizeof(name),
- "index_id:" IB_ID_FMT, iter->first);
- field_store_string(fields[IDX_DATABASE_NAME],
- "unknown");
- field_store_string(fields[IDX_TABLE_NAME],
- "unknown");
- field_store_string(fields[IDX_INDEX_NAME],
- name);
+ char name[MY_INT64_NUM_DECIMAL_DIGITS
+ + sizeof "index_id: "];
+ fields[IDX_DATABASE_NAME]->set_null();
+ fields[IDX_TABLE_NAME]->set_null();
+ fields[IDX_INDEX_NAME]->set_notnull();
+ status = fields[IDX_INDEX_NAME]->store(
+ name,
+ uint(snprintf(name, sizeof name,
+ "index_id: " IB_ID_FMT,
+ iter->first)),
+ system_charset_info);
}
- fields[IDX_COMPRESS_OPS]->store(
- static_cast<double>(iter->second.compressed));
-
- fields[IDX_COMPRESS_OPS_OK]->store(
- static_cast<double>(iter->second.compressed_ok));
-
- fields[IDX_COMPRESS_TIME]->store(
- static_cast<double>(iter->second.compressed_usec / 1000000));
-
- fields[IDX_UNCOMPRESS_OPS]->store(
- static_cast<double>(iter->second.decompressed));
-
- fields[IDX_UNCOMPRESS_TIME]->store(
- static_cast<double>(iter->second.decompressed_usec / 1000000));
-
- if (schema_table_store_record(thd, table)) {
+ if (status
+ || fields[IDX_COMPRESS_OPS]->store(
+ iter->second.compressed, true)
+ || fields[IDX_COMPRESS_OPS_OK]->store(
+ iter->second.compressed_ok, true)
+ || fields[IDX_COMPRESS_TIME]->store(
+ iter->second.compressed_usec / 1000000, true)
+ || fields[IDX_UNCOMPRESS_OPS]->store(
+ iter->second.decompressed, true)
+ || fields[IDX_UNCOMPRESS_TIME]->store(
+ iter->second.decompressed_usec / 1000000, true)
+ || schema_table_store_record(thd, table)) {
status = 1;
break;
}
-
/* Release and reacquire the dict mutex to allow other
threads to proceed. This could eventually result in the
contents of INFORMATION_SCHEMA.innodb_cmp_per_index being
inconsistent, but it is an acceptable compromise. */
- if (i % 1000 == 0) {
+ if (i == 1000) {
mutex_exit(&dict_sys->mutex);
+ i = 0;
mutex_enter(&dict_sys->mutex);
}
}
@@ -1804,7 +1751,7 @@ i_s_cmp_per_index_fill_low(
/*******************************************************************//**
Fill the dynamic table information_schema.innodb_cmp_per_index.
-@return 0 on success, 1 on failure */
+@return 0 on success, 1 on failure */
static
int
i_s_cmp_per_index_fill(
@@ -1818,7 +1765,7 @@ i_s_cmp_per_index_fill(
/*******************************************************************//**
Fill the dynamic table information_schema.innodb_cmp_per_index_reset.
-@return 0 on success, 1 on failure */
+@return 0 on success, 1 on failure */
static
int
i_s_cmp_per_index_reset_fill(
@@ -1832,7 +1779,7 @@ i_s_cmp_per_index_reset_fill(
/*******************************************************************//**
Bind the dynamic table information_schema.innodb_cmp_per_index.
-@return 0 on success */
+@return 0 on success */
static
int
i_s_cmp_per_index_init(
@@ -1850,7 +1797,7 @@ i_s_cmp_per_index_init(
/*******************************************************************//**
Bind the dynamic table information_schema.innodb_cmp_per_index_reset.
-@return 0 on success */
+@return 0 on success */
static
int
i_s_cmp_per_index_reset_init(
@@ -2023,7 +1970,7 @@ static ST_FIELD_INFO i_s_cmpmem_fields_info[] =
/*******************************************************************//**
Fill the dynamic table information_schema.innodb_cmpmem or
innodb_cmpmem_reset.
-@return 0 on success, 1 on failure */
+@return 0 on success, 1 on failure */
static
int
i_s_cmpmem_fill_low(
@@ -2047,37 +1994,43 @@ i_s_cmpmem_fill_low(
RETURN_IF_INNODB_NOT_STARTED(tables->schema_table_name);
for (ulint i = 0; i < srv_buf_pool_instances; i++) {
- buf_pool_t* buf_pool;
+ buf_pool_t* buf_pool;
+ ulint zip_free_len_local[BUF_BUDDY_SIZES_MAX + 1];
+ buf_buddy_stat_t buddy_stat_local[BUF_BUDDY_SIZES_MAX + 1];
status = 0;
buf_pool = buf_pool_from_array(i);
+ /* Save buddy stats for buffer pool in local variables. */
buf_pool_mutex_enter(buf_pool);
-
for (uint x = 0; x <= BUF_BUDDY_SIZES; x++) {
- buf_buddy_stat_t* buddy_stat;
- buddy_stat = &buf_pool->buddy_stat[x];
+ zip_free_len_local[x] = (x < BUF_BUDDY_SIZES) ?
+ UT_LIST_GET_LEN(buf_pool->zip_free[x]) : 0;
- table->field[0]->store(BUF_BUDDY_LOW << x);
- table->field[1]->store(static_cast<double>(i));
- table->field[2]->store(static_cast<double>(
- buddy_stat->used));
- table->field[3]->store(static_cast<double>(
- (x < BUF_BUDDY_SIZES)
- ? UT_LIST_GET_LEN(buf_pool->zip_free[x])
- : 0));
- table->field[4]->store(
- (longlong) buddy_stat->relocated, true);
- table->field[5]->store(
- static_cast<double>(buddy_stat->relocated_usec / 1000000));
+ buddy_stat_local[x] = buf_pool->buddy_stat[x];
if (reset) {
/* This is protected by buf_pool->mutex. */
- buddy_stat->relocated = 0;
- buddy_stat->relocated_usec = 0;
+ buf_pool->buddy_stat[x].relocated = 0;
+ buf_pool->buddy_stat[x].relocated_usec = 0;
}
+ }
+ buf_pool_mutex_exit(buf_pool);
+
+ for (uint x = 0; x <= BUF_BUDDY_SIZES; x++) {
+ buf_buddy_stat_t* buddy_stat;
+
+ buddy_stat = &buddy_stat_local[x];
+
+ table->field[0]->store(BUF_BUDDY_LOW << x);
+ table->field[1]->store(i, true);
+ table->field[2]->store(buddy_stat->used, true);
+ table->field[3]->store(zip_free_len_local[x], true);
+ table->field[4]->store(buddy_stat->relocated, true);
+ table->field[5]->store(
+ buddy_stat->relocated_usec / 1000000, true);
if (schema_table_store_record(thd, table)) {
status = 1;
@@ -2085,8 +2038,6 @@ i_s_cmpmem_fill_low(
}
}
- buf_pool_mutex_exit(buf_pool);
-
if (status) {
break;
}
@@ -2097,7 +2048,7 @@ i_s_cmpmem_fill_low(
/*******************************************************************//**
Fill the dynamic table information_schema.innodb_cmpmem.
-@return 0 on success, 1 on failure */
+@return 0 on success, 1 on failure */
static
int
i_s_cmpmem_fill(
@@ -2111,7 +2062,7 @@ i_s_cmpmem_fill(
/*******************************************************************//**
Fill the dynamic table information_schema.innodb_cmpmem_reset.
-@return 0 on success, 1 on failure */
+@return 0 on success, 1 on failure */
static
int
i_s_cmpmem_reset_fill(
@@ -2125,7 +2076,7 @@ i_s_cmpmem_reset_fill(
/*******************************************************************//**
Bind the dynamic table information_schema.innodb_cmpmem.
-@return 0 on success */
+@return 0 on success */
static
int
i_s_cmpmem_init(
@@ -2143,7 +2094,7 @@ i_s_cmpmem_init(
/*******************************************************************//**
Bind the dynamic table information_schema.innodb_cmpmem_reset.
-@return 0 on success */
+@return 0 on success */
static
int
i_s_cmpmem_reset_init(
@@ -2419,7 +2370,7 @@ static ST_FIELD_INFO innodb_metrics_fields_info[] =
/**********************************************************************//**
Fill the information schema metrics table.
-@return 0 on success */
+@return 0 on success */
static
int
i_s_metrics_fill(
@@ -2594,7 +2545,7 @@ i_s_metrics_fill(
} else if (!(monitor_info->monitor_type & MONITOR_NO_AVERAGE)
&& !(monitor_info->monitor_type
& MONITOR_DISPLAY_CURRENT)) {
- if (time_diff) {
+ if (time_diff != 0) {
OK(fields[METRIC_AVG_VALUE_START]->store(
(double) MONITOR_VALUE_SINCE_START(
count) / time_diff));
@@ -2619,7 +2570,7 @@ i_s_metrics_fill(
time_diff = 0;
}
- if (time_diff) {
+ if (time_diff != 0) {
OK(fields[METRIC_AVG_VALUE_RESET]->store(
static_cast<double>(
MONITOR_VALUE(count) / time_diff)));
@@ -2692,7 +2643,7 @@ i_s_metrics_fill(
/*******************************************************************//**
Function to fill information schema metrics tables.
-@return 0 on success */
+@return 0 on success */
static
int
i_s_metrics_fill_table(
@@ -2714,7 +2665,7 @@ i_s_metrics_fill_table(
}
/*******************************************************************//**
Bind the dynamic table INFORMATION_SCHEMA.innodb_metrics
-@return 0 on success */
+@return 0 on success */
static
int
innodb_metrics_init(
@@ -2798,7 +2749,7 @@ static ST_FIELD_INFO i_s_stopword_fields_info[] =
/*******************************************************************//**
Fill the dynamic table information_schema.innodb_ft_default_stopword.
-@return 0 on success, 1 on failure */
+@return 0 on success, 1 on failure */
static
int
i_s_stopword_fill(
@@ -2830,7 +2781,7 @@ i_s_stopword_fill(
/*******************************************************************//**
Bind the dynamic table information_schema.innodb_ft_default_stopword.
-@return 0 on success */
+@return 0 on success */
static
int
i_s_stopword_init(
@@ -2914,7 +2865,7 @@ static ST_FIELD_INFO i_s_fts_doc_fields_info[] =
/*******************************************************************//**
Fill the dynamic table INFORMATION_SCHEMA.INNODB_FT_DELETED or
INFORMATION_SCHEMA.INNODB_FT_BEING_DELETED
-@return 0 on success, 1 on failure */
+@return 0 on success, 1 on failure */
static
int
i_s_fts_deleted_generic_fill(
@@ -2993,7 +2944,7 @@ i_s_fts_deleted_generic_fill(
/*******************************************************************//**
Fill the dynamic table INFORMATION_SCHEMA.INNODB_FT_DELETED
-@return 0 on success, 1 on failure */
+@return 0 on success, 1 on failure */
static
int
i_s_fts_deleted_fill(
@@ -3009,7 +2960,7 @@ i_s_fts_deleted_fill(
/*******************************************************************//**
Bind the dynamic table INFORMATION_SCHEMA.INNODB_FT_DELETED
-@return 0 on success */
+@return 0 on success */
static
int
i_s_fts_deleted_init(
@@ -3076,7 +3027,7 @@ UNIV_INTERN struct st_maria_plugin i_s_innodb_ft_deleted =
/*******************************************************************//**
Fill the dynamic table INFORMATION_SCHEMA.INNODB_FT_BEING_DELETED
-@return 0 on success, 1 on failure */
+@return 0 on success, 1 on failure */
static
int
i_s_fts_being_deleted_fill(
@@ -3092,7 +3043,7 @@ i_s_fts_being_deleted_fill(
/*******************************************************************//**
Bind the dynamic table INFORMATION_SCHEMA.INNODB_FT_BEING_DELETED
-@return 0 on success */
+@return 0 on success */
static
int
i_s_fts_being_deleted_init(
@@ -3221,7 +3172,7 @@ static ST_FIELD_INFO i_s_fts_index_fields_info[] =
/*******************************************************************//**
Go through the Doc Node and its ilist, fill the dynamic table
INFORMATION_SCHEMA.INNODB_FT_INDEX_CACHED for one FTS index on the table.
-@return 0 on success, 1 on failure */
+@return 0 on success, 1 on failure */
static
int
i_s_fts_index_cache_fill_one_index(
@@ -3293,28 +3244,28 @@ i_s_fts_index_cache_fill_one_index(
pos = fts_decode_vlc(&ptr);
OK(field_store_string(
- fields[I_S_FTS_WORD],
- word_str));
+ fields[I_S_FTS_WORD],
+ word_str));
OK(fields[I_S_FTS_FIRST_DOC_ID]->store(
- (longlong) node->first_doc_id,
- true));
+ node->first_doc_id,
+ true));
OK(fields[I_S_FTS_LAST_DOC_ID]->store(
- (longlong) node->last_doc_id,
- true));
+ node->last_doc_id,
+ true));
OK(fields[I_S_FTS_DOC_COUNT]->store(
- static_cast<double>(node->doc_count)));
+ node->doc_count, true));
OK(fields[I_S_FTS_ILIST_DOC_ID]->store(
- (longlong) doc_id, true));
+ doc_id, true));
OK(fields[I_S_FTS_ILIST_DOC_POS]->store(
- static_cast<double>(pos)));
+ pos, true));
OK(schema_table_store_record(
- thd, table));
+ thd, table));
}
++ptr;
@@ -3328,7 +3279,7 @@ i_s_fts_index_cache_fill_one_index(
}
/*******************************************************************//**
Fill the dynamic table INFORMATION_SCHEMA.INNODB_FT_INDEX_CACHED
-@return 0 on success, 1 on failure */
+@return 0 on success, 1 on failure */
static
int
i_s_fts_index_cache_fill(
@@ -3393,7 +3344,7 @@ no_fts:
/*******************************************************************//**
Bind the dynamic table INFORMATION_SCHEMA.INNODB_FT_INDEX_CACHE
-@return 0 on success */
+@return 0 on success */
static
int
i_s_fts_index_cache_init(
@@ -3461,7 +3412,7 @@ UNIV_INTERN struct st_maria_plugin i_s_innodb_ft_index_cache =
/*******************************************************************//**
Go through a FTS index auxiliary table, fetch its rows and fill
FTS word cache structure.
-@return DB_SUCCESS on success, otherwise error code */
+@return DB_SUCCESS on success, otherwise error code */
static
dberr_t
i_s_fts_index_table_fill_selected(
@@ -3506,8 +3457,8 @@ i_s_fts_index_table_fill_selected(
&fts_table, info,
"DECLARE FUNCTION my_func;\n"
"DECLARE CURSOR c IS"
- " SELECT word, doc_count, first_doc_id, last_doc_id, "
- "ilist\n"
+ " SELECT word, doc_count, first_doc_id, last_doc_id,"
+ " ilist\n"
" FROM $table_name WHERE word >= :word;\n"
"BEGIN\n"
"\n"
@@ -3520,7 +3471,7 @@ i_s_fts_index_table_fill_selected(
"END LOOP;\n"
"CLOSE c;");
- for(;;) {
+ for (;;) {
error = fts_eval_sql(trx, graph);
if (error == DB_SUCCESS) {
@@ -3530,17 +3481,14 @@ i_s_fts_index_table_fill_selected(
} else {
fts_sql_rollback(trx);
- ut_print_timestamp(stderr);
-
if (error == DB_LOCK_WAIT_TIMEOUT) {
- fprintf(stderr, " InnoDB: Warning: "
- "lock wait timeout reading "
- "FTS index. Retrying!\n");
+ ib::warn() << "Lock wait timeout reading"
+ " FTS index. Retrying!";
trx->error_state = DB_SUCCESS;
} else {
- fprintf(stderr, " InnoDB: Error: %d "
- "while reading FTS index.\n", error);
+ ib::error() << "Error occurred while reading"
+ " FTS index: " << ut_strerr(error);
break;
}
}
@@ -3664,28 +3612,26 @@ i_s_fts_index_table_fill_one_fetch(
pos = fts_decode_vlc(&ptr);
OK(field_store_string(
- fields[I_S_FTS_WORD],
- word_str));
+ fields[I_S_FTS_WORD],
+ word_str));
OK(fields[I_S_FTS_FIRST_DOC_ID]->store(
- (longlong) node->first_doc_id,
- true));
+ longlong(node->first_doc_id), true));
OK(fields[I_S_FTS_LAST_DOC_ID]->store(
- (longlong) node->last_doc_id,
- true));
+ longlong(node->last_doc_id), true));
OK(fields[I_S_FTS_DOC_COUNT]->store(
- static_cast<double>(node->doc_count)));
+ node->doc_count, true));
OK(fields[I_S_FTS_ILIST_DOC_ID]->store(
- (longlong) doc_id, true));
+ longlong(doc_id), true));
OK(fields[I_S_FTS_ILIST_DOC_POS]->store(
- static_cast<double>(pos)));
+ pos, true));
OK(schema_table_store_record(
- thd, table));
+ thd, table));
}
++ptr;
@@ -3701,7 +3647,7 @@ i_s_fts_index_table_fill_one_fetch(
/*******************************************************************//**
Go through a FTS index and its auxiliary tables, fetch rows in each table
and fill INFORMATION_SCHEMA.INNODB_FT_INDEX_TABLE.
-@return 0 on success, 1 on failure */
+@return 0 on success, 1 on failure */
static
int
i_s_fts_index_table_fill_one_index(
@@ -3713,7 +3659,6 @@ i_s_fts_index_table_fill_one_index(
{
ib_vector_t* words;
mem_heap_t* heap;
- fts_string_t word;
CHARSET_INFO* index_charset;
dberr_t error;
int ret = 0;
@@ -3726,17 +3671,17 @@ i_s_fts_index_table_fill_one_index(
words = ib_vector_create(ib_heap_allocator_create(heap),
sizeof(fts_word_t), 256);
- word.f_str = NULL;
- word.f_len = 0;
- word.f_n_char = 0;
-
index_charset = fts_index_get_charset(index);
/* Iterate through each auxiliary table as described in
fts_index_selector */
- for (ulint selected = 0; fts_index_selector[selected].value;
- selected++) {
- bool has_more = false;
+ for (ulint selected = 0; selected < FTS_NUM_AUX_INDEX; selected++) {
+ fts_string_t word;
+ bool has_more = false;
+
+ word.f_str = NULL;
+ word.f_len = 0;
+ word.f_n_char = 0;
do {
/* Fetch from index */
@@ -3759,7 +3704,7 @@ i_s_fts_index_table_fill_one_index(
/* Prepare start point for next fetch */
last_word = static_cast<fts_word_t*>(ib_vector_last(words));
ut_ad(last_word != NULL);
- fts_utf8_string_dup(&word, &last_word->text, heap);
+ fts_string_dup(&word, &last_word->text, heap);
}
/* Fill into tables */
@@ -3781,7 +3726,7 @@ func_exit:
}
/*******************************************************************//**
Fill the dynamic table INFORMATION_SCHEMA.INNODB_FT_INDEX_TABLE
-@return 0 on success, 1 on failure */
+@return 0 on success, 1 on failure */
static
int
i_s_fts_index_table_fill(
@@ -3818,7 +3763,7 @@ i_s_fts_index_table_fill(
fts_string_t conv_str;
conv_str.f_len = system_charset_info->mbmaxlen
* FTS_MAX_WORD_LEN_IN_CHAR;
- conv_str.f_str = static_cast<byte*>(ut_malloc(conv_str.f_len));
+ conv_str.f_str = static_cast<byte*>(ut_malloc_nokey(conv_str.f_len));
for (index = dict_table_get_first_index(user_table);
index; index = dict_table_get_next_index(index)) {
@@ -3839,7 +3784,7 @@ i_s_fts_index_table_fill(
/*******************************************************************//**
Bind the dynamic table INFORMATION_SCHEMA.INNODB_FT_INDEX_TABLE
-@return 0 on success */
+@return 0 on success */
static
int
i_s_fts_index_table_init(
@@ -3938,7 +3883,7 @@ static const char* fts_config_key[] = {
/*******************************************************************//**
Fill the dynamic table INFORMATION_SCHEMA.INNODB_FT_CONFIG
-@return 0 on success, 1 on failure */
+@return 0 on success, 1 on failure */
static
int
i_s_fts_config_fill(
@@ -4047,7 +3992,7 @@ no_fts:
/*******************************************************************//**
Bind the dynamic table INFORMATION_SCHEMA.INNODB_FT_CONFIG
-@return 0 on success */
+@return 0 on success */
static
int
i_s_fts_config_init(
@@ -4409,7 +4354,7 @@ static ST_FIELD_INFO i_s_innodb_buffer_stats_fields_info[] =
/*******************************************************************//**
Fill Information Schema table INNODB_BUFFER_POOL_STATS for a particular
buffer pool
-@return 0 on success, 1 on failure */
+@return 0 on success, 1 on failure */
static
int
i_s_innodb_stats_fill(
@@ -4429,67 +4374,67 @@ i_s_innodb_stats_fill(
fields = table->field;
OK(fields[IDX_BUF_STATS_POOL_ID]->store(
- static_cast<double>(info->pool_unique_id)));
+ info->pool_unique_id, true));
OK(fields[IDX_BUF_STATS_POOL_SIZE]->store(
- static_cast<double>(info->pool_size)));
+ info->pool_size, true));
OK(fields[IDX_BUF_STATS_LRU_LEN]->store(
- static_cast<double>(info->lru_len)));
+ info->lru_len, true));
OK(fields[IDX_BUF_STATS_OLD_LRU_LEN]->store(
- static_cast<double>(info->old_lru_len)));
+ info->old_lru_len, true));
OK(fields[IDX_BUF_STATS_FREE_BUFFERS]->store(
- static_cast<double>(info->free_list_len)));
+ info->free_list_len, true));
OK(fields[IDX_BUF_STATS_FLUSH_LIST_LEN]->store(
- static_cast<double>(info->flush_list_len)));
+ info->flush_list_len, true));
OK(fields[IDX_BUF_STATS_PENDING_ZIP]->store(
- static_cast<double>(info->n_pend_unzip)));
+ info->n_pend_unzip, true));
OK(fields[IDX_BUF_STATS_PENDING_READ]->store(
- static_cast<double>(info->n_pend_reads)));
+ info->n_pend_reads, true));
OK(fields[IDX_BUF_STATS_FLUSH_LRU]->store(
- static_cast<double>(info->n_pending_flush_lru)));
+ info->n_pending_flush_lru, true));
OK(fields[IDX_BUF_STATS_FLUSH_LIST]->store(
- static_cast<double>(info->n_pending_flush_list)));
+ info->n_pending_flush_list, true));
OK(fields[IDX_BUF_STATS_PAGE_YOUNG]->store(
- static_cast<double>(info->n_pages_made_young)));
+ info->n_pages_made_young, true));
OK(fields[IDX_BUF_STATS_PAGE_NOT_YOUNG]->store(
- static_cast<double>(info->n_pages_not_made_young)));
+ info->n_pages_not_made_young, true));
OK(fields[IDX_BUF_STATS_PAGE_YOUNG_RATE]->store(
- info->page_made_young_rate));
+ info->page_made_young_rate));
OK(fields[IDX_BUF_STATS_PAGE_NOT_YOUNG_RATE]->store(
- info->page_not_made_young_rate));
+ info->page_not_made_young_rate));
OK(fields[IDX_BUF_STATS_PAGE_READ]->store(
- static_cast<double>(info->n_pages_read)));
+ info->n_pages_read, true));
OK(fields[IDX_BUF_STATS_PAGE_CREATED]->store(
- static_cast<double>(info->n_pages_created)));
+ info->n_pages_created, true));
OK(fields[IDX_BUF_STATS_PAGE_WRITTEN]->store(
- static_cast<double>(info->n_pages_written)));
+ info->n_pages_written, true));
OK(fields[IDX_BUF_STATS_GET]->store(
- static_cast<double>(info->n_page_gets)));
+ info->n_page_gets, true));
OK(fields[IDX_BUF_STATS_PAGE_READ_RATE]->store(
- info->pages_read_rate));
+ info->pages_read_rate));
OK(fields[IDX_BUF_STATS_PAGE_CREATE_RATE]->store(
- info->pages_created_rate));
+ info->pages_created_rate));
OK(fields[IDX_BUF_STATS_PAGE_WRITTEN_RATE]->store(
- info->pages_written_rate));
+ info->pages_written_rate));
if (info->n_page_get_delta) {
if (info->page_read_delta <= info->n_page_get_delta) {
@@ -4502,43 +4447,41 @@ i_s_innodb_stats_fill(
}
OK(fields[IDX_BUF_STATS_MADE_YOUNG_PCT]->store(
- static_cast<double>(
- 1000 * info->young_making_delta
- / info->n_page_get_delta)));
+ 1000 * info->young_making_delta
+ / info->n_page_get_delta, true));
OK(fields[IDX_BUF_STATS_NOT_MADE_YOUNG_PCT]->store(
- static_cast<double>(
- 1000 * info->not_young_making_delta
- / info->n_page_get_delta)));
+ 1000 * info->not_young_making_delta
+ / info->n_page_get_delta, true));
} else {
- OK(fields[IDX_BUF_STATS_HIT_RATE]->store(0));
- OK(fields[IDX_BUF_STATS_MADE_YOUNG_PCT]->store(0));
- OK(fields[IDX_BUF_STATS_NOT_MADE_YOUNG_PCT]->store(0));
+ OK(fields[IDX_BUF_STATS_HIT_RATE]->store(0, true));
+ OK(fields[IDX_BUF_STATS_MADE_YOUNG_PCT]->store(0, true));
+ OK(fields[IDX_BUF_STATS_NOT_MADE_YOUNG_PCT]->store(0, true));
}
OK(fields[IDX_BUF_STATS_READ_AHREAD]->store(
- static_cast<double>(info->n_ra_pages_read)));
+ info->n_ra_pages_read, true));
OK(fields[IDX_BUF_STATS_READ_AHEAD_EVICTED]->store(
- static_cast<double>(info->n_ra_pages_evicted)));
+ info->n_ra_pages_evicted, true));
OK(fields[IDX_BUF_STATS_READ_AHEAD_RATE]->store(
- info->pages_readahead_rate));
+ info->pages_readahead_rate));
OK(fields[IDX_BUF_STATS_READ_AHEAD_EVICT_RATE]->store(
- info->pages_evicted_rate));
+ info->pages_evicted_rate));
OK(fields[IDX_BUF_STATS_LRU_IO_SUM]->store(
- static_cast<double>(info->io_sum)));
+ info->io_sum, true));
OK(fields[IDX_BUF_STATS_LRU_IO_CUR]->store(
- static_cast<double>(info->io_cur)));
+ info->io_cur, true));
OK(fields[IDX_BUF_STATS_UNZIP_SUM]->store(
- static_cast<double>(info->unzip_sum)));
+ info->unzip_sum, true));
OK(fields[IDX_BUF_STATS_UNZIP_CUR]->store(
- static_cast<double>(info->unzip_cur)));
+ info->unzip_cur, true));
DBUG_RETURN(schema_table_store_record(thd, table));
}
@@ -4546,7 +4489,7 @@ i_s_innodb_stats_fill(
/*******************************************************************//**
This is the function that loops through each buffer pool and fetch buffer
pool stats to information schema table: I_S_INNODB_BUFFER_POOL_STATS
-@return 0 on success, 1 on failure */
+@return 0 on success, 1 on failure */
static
int
i_s_innodb_buffer_stats_fill_table(
@@ -4566,7 +4509,7 @@ i_s_innodb_buffer_stats_fill_table(
DBUG_RETURN(0);
}
- pool_info = (buf_pool_info_t*) mem_zalloc(
+ pool_info = (buf_pool_info_t*) ut_zalloc_nokey(
srv_buf_pool_instances * sizeof *pool_info);
/* Walk through each buffer pool */
@@ -4586,14 +4529,14 @@ i_s_innodb_buffer_stats_fill_table(
}
}
- mem_free(pool_info);
+ ut_free(pool_info);
DBUG_RETURN(status);
}
/*******************************************************************//**
Bind the dynamic table INFORMATION_SCHEMA.INNODB_BUFFER_POOL_STATS.
-@return 0 on success, 1 on failure */
+@return 0 on success, 1 on failure */
static
int
i_s_innodb_buffer_pool_stats_init(
@@ -4727,6 +4670,7 @@ static ST_FIELD_INFO i_s_innodb_buffer_page_fields_info[] =
STRUCT_FLD(old_name, ""),
STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
+#ifdef BTR_CUR_HASH_ADAPT
#define IDX_BUFFER_PAGE_HASHED 7
{STRUCT_FLD(field_name, "IS_HASHED"),
STRUCT_FLD(field_length, 3),
@@ -4735,8 +4679,9 @@ static ST_FIELD_INFO i_s_innodb_buffer_page_fields_info[] =
STRUCT_FLD(field_flags, MY_I_S_MAYBE_NULL),
STRUCT_FLD(old_name, ""),
STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
+#endif /* BTR_CUR_HASH_ADAPT */
-#define IDX_BUFFER_PAGE_NEWEST_MOD 8
+#define IDX_BUFFER_PAGE_NEWEST_MOD 7 + I_S_AHI
{STRUCT_FLD(field_name, "NEWEST_MODIFICATION"),
STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
@@ -4745,7 +4690,7 @@ static ST_FIELD_INFO i_s_innodb_buffer_page_fields_info[] =
STRUCT_FLD(old_name, ""),
STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-#define IDX_BUFFER_PAGE_OLDEST_MOD 9
+#define IDX_BUFFER_PAGE_OLDEST_MOD 8 + I_S_AHI
{STRUCT_FLD(field_name, "OLDEST_MODIFICATION"),
STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
@@ -4754,7 +4699,7 @@ static ST_FIELD_INFO i_s_innodb_buffer_page_fields_info[] =
STRUCT_FLD(old_name, ""),
STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-#define IDX_BUFFER_PAGE_ACCESS_TIME 10
+#define IDX_BUFFER_PAGE_ACCESS_TIME 9 + I_S_AHI
{STRUCT_FLD(field_name, "ACCESS_TIME"),
STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
@@ -4763,7 +4708,7 @@ static ST_FIELD_INFO i_s_innodb_buffer_page_fields_info[] =
STRUCT_FLD(old_name, ""),
STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-#define IDX_BUFFER_PAGE_TABLE_NAME 11
+#define IDX_BUFFER_PAGE_TABLE_NAME 10 + I_S_AHI
{STRUCT_FLD(field_name, "TABLE_NAME"),
STRUCT_FLD(field_length, 1024),
STRUCT_FLD(field_type, MYSQL_TYPE_STRING),
@@ -4772,7 +4717,7 @@ static ST_FIELD_INFO i_s_innodb_buffer_page_fields_info[] =
STRUCT_FLD(old_name, ""),
STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-#define IDX_BUFFER_PAGE_INDEX_NAME 12
+#define IDX_BUFFER_PAGE_INDEX_NAME 11 + I_S_AHI
{STRUCT_FLD(field_name, "INDEX_NAME"),
STRUCT_FLD(field_length, 1024),
STRUCT_FLD(field_type, MYSQL_TYPE_STRING),
@@ -4781,7 +4726,7 @@ static ST_FIELD_INFO i_s_innodb_buffer_page_fields_info[] =
STRUCT_FLD(old_name, ""),
STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-#define IDX_BUFFER_PAGE_NUM_RECS 13
+#define IDX_BUFFER_PAGE_NUM_RECS 12 + I_S_AHI
{STRUCT_FLD(field_name, "NUMBER_RECORDS"),
STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
@@ -4790,7 +4735,7 @@ static ST_FIELD_INFO i_s_innodb_buffer_page_fields_info[] =
STRUCT_FLD(old_name, ""),
STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-#define IDX_BUFFER_PAGE_DATA_SIZE 14
+#define IDX_BUFFER_PAGE_DATA_SIZE 13 + I_S_AHI
{STRUCT_FLD(field_name, "DATA_SIZE"),
STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
@@ -4799,7 +4744,7 @@ static ST_FIELD_INFO i_s_innodb_buffer_page_fields_info[] =
STRUCT_FLD(old_name, ""),
STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-#define IDX_BUFFER_PAGE_ZIP_SIZE 15
+#define IDX_BUFFER_PAGE_ZIP_SIZE 14 + I_S_AHI
{STRUCT_FLD(field_name, "COMPRESSED_SIZE"),
STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
@@ -4808,7 +4753,7 @@ static ST_FIELD_INFO i_s_innodb_buffer_page_fields_info[] =
STRUCT_FLD(old_name, ""),
STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-#define IDX_BUFFER_PAGE_STATE 16
+#define IDX_BUFFER_PAGE_STATE 15 + I_S_AHI
{STRUCT_FLD(field_name, "PAGE_STATE"),
STRUCT_FLD(field_length, 64),
STRUCT_FLD(field_type, MYSQL_TYPE_STRING),
@@ -4817,7 +4762,7 @@ static ST_FIELD_INFO i_s_innodb_buffer_page_fields_info[] =
STRUCT_FLD(old_name, ""),
STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-#define IDX_BUFFER_PAGE_IO_FIX 17
+#define IDX_BUFFER_PAGE_IO_FIX 16 + I_S_AHI
{STRUCT_FLD(field_name, "IO_FIX"),
STRUCT_FLD(field_length, 64),
STRUCT_FLD(field_type, MYSQL_TYPE_STRING),
@@ -4826,7 +4771,7 @@ static ST_FIELD_INFO i_s_innodb_buffer_page_fields_info[] =
STRUCT_FLD(old_name, ""),
STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-#define IDX_BUFFER_PAGE_IS_OLD 18
+#define IDX_BUFFER_PAGE_IS_OLD 17 + I_S_AHI
{STRUCT_FLD(field_name, "IS_OLD"),
STRUCT_FLD(field_length, 3),
STRUCT_FLD(field_type, MYSQL_TYPE_STRING),
@@ -4835,7 +4780,7 @@ static ST_FIELD_INFO i_s_innodb_buffer_page_fields_info[] =
STRUCT_FLD(old_name, ""),
STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-#define IDX_BUFFER_PAGE_FREE_CLOCK 19
+#define IDX_BUFFER_PAGE_FREE_CLOCK 18 + I_S_AHI
{STRUCT_FLD(field_name, "FREE_PAGE_CLOCK"),
STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
@@ -4850,7 +4795,7 @@ static ST_FIELD_INFO i_s_innodb_buffer_page_fields_info[] =
/*******************************************************************//**
Fill Information Schema table INNODB_BUFFER_PAGE with information
cached in the buf_page_info_t array
-@return 0 on success, 1 on failure */
+@return 0 on success, 1 on failure */
static
int
i_s_innodb_buffer_page_fill(
@@ -4896,8 +4841,8 @@ i_s_innodb_buffer_page_fill(
page_info->page_num, true));
OK(field_store_string(
- fields[IDX_BUFFER_PAGE_TYPE],
- i_s_page_type[page_info->page_type].type_str));
+ fields[IDX_BUFFER_PAGE_TYPE],
+ i_s_page_type[page_info->page_type].type_str));
OK(fields[IDX_BUFFER_PAGE_FLUSH_TYPE]->store(
page_info->flush_type, true));
@@ -4905,14 +4850,16 @@ i_s_innodb_buffer_page_fill(
OK(fields[IDX_BUFFER_PAGE_FIX_COUNT]->store(
page_info->fix_count, true));
+#ifdef BTR_CUR_HASH_ADAPT
OK(field_store_string(fields[IDX_BUFFER_PAGE_HASHED],
page_info->hashed ? "YES" : "NO"));
+#endif /* BTR_CUR_HASH_ADAPT */
OK(fields[IDX_BUFFER_PAGE_NEWEST_MOD]->store(
- (longlong) page_info->newest_mod, true));
+ page_info->newest_mod, true));
OK(fields[IDX_BUFFER_PAGE_OLDEST_MOD]->store(
- (longlong) page_info->oldest_mod, true));
+ page_info->oldest_mod, true));
OK(fields[IDX_BUFFER_PAGE_ACCESS_TIME]->store(
page_info->access_time, true));
@@ -4937,7 +4884,7 @@ i_s_innodb_buffer_page_fill(
table_name, sizeof(table_name),
index->table_name,
strlen(index->table_name),
- thd, TRUE);
+ thd);
ret = fields[IDX_BUFFER_PAGE_TABLE_NAME]
->store(table_name,
@@ -4945,10 +4892,10 @@ i_s_innodb_buffer_page_fill(
table_name_end
- table_name),
system_charset_info)
- || field_store_index_name(
- fields
- [IDX_BUFFER_PAGE_INDEX_NAME],
- index->name);
+ || fields[IDX_BUFFER_PAGE_INDEX_NAME]
+ ->store(index->name,
+ uint(strlen(index->name)),
+ system_charset_info);
}
mutex_exit(&dict_sys->mutex);
@@ -4958,6 +4905,8 @@ i_s_innodb_buffer_page_fill(
if (index) {
fields[IDX_BUFFER_PAGE_TABLE_NAME]
->set_notnull();
+ fields[IDX_BUFFER_PAGE_INDEX_NAME]
+ ->set_notnull();
}
}
@@ -5028,7 +4977,7 @@ i_s_innodb_buffer_page_fill(
(page_info->is_old) ? "YES" : "NO"));
OK(fields[IDX_BUFFER_PAGE_FREE_CLOCK]->store(
- page_info->freed_page_clock));
+ page_info->freed_page_clock, true));
OK(schema_table_store_record(thd, table));
}
@@ -5047,14 +4996,14 @@ i_s_innodb_set_page_type(
ulint page_type, /*!< in: page type */
const byte* frame) /*!< in: buffer frame */
{
- if (page_type == FIL_PAGE_INDEX) {
+ if (fil_page_type_is_index(page_type)) {
const page_t* page = (const page_t*) frame;
page_info->index_id = btr_page_get_index_id(page);
- /* FIL_PAGE_INDEX is a bit special, its value
- is defined as 17855, so we cannot use FIL_PAGE_INDEX
- to index into i_s_page_type[] array, its array index
+ /* FIL_PAGE_INDEX and FIL_PAGE_RTREE are a bit special,
+ their values are defined as 17855 and 17854, so we cannot
+ use them to index into i_s_page_type[] array, its array index
in the i_s_page_type[] array is I_S_PAGE_TYPE_INDEX
(1) for index pages or I_S_PAGE_TYPE_IBUF for
change buffer index pages */
@@ -5062,11 +5011,13 @@ i_s_innodb_set_page_type(
== static_cast<index_id_t>(DICT_IBUF_ID_MIN
+ IBUF_SPACE_ID)) {
page_info->page_type = I_S_PAGE_TYPE_IBUF;
+ } else if (page_type == FIL_PAGE_RTREE) {
+ page_info->page_type = I_S_PAGE_TYPE_RTREE;
} else {
page_info->page_type = I_S_PAGE_TYPE_INDEX;
}
- page_info->data_size = (ulint)(page_header_get_field(
+ page_info->data_size = unsigned(page_header_get_field(
page, PAGE_HEAP_TOP) - (page_is_comp(page)
? PAGE_NEW_SUPREMUM_END
: PAGE_OLD_SUPREMUM_END)
@@ -5123,9 +5074,9 @@ i_s_innodb_buffer_page_get_info(
const byte* frame;
ulint page_type;
- page_info->space_id = buf_page_get_space(bpage);
+ page_info->space_id = bpage->id.space();
- page_info->page_num = buf_page_get_page_no(bpage);
+ page_info->page_num = bpage->id.page_no();
page_info->flush_type = bpage->flush_type;
@@ -5160,7 +5111,13 @@ i_s_innodb_buffer_page_get_info(
block = reinterpret_cast<const buf_block_t*>(bpage);
frame = block->frame;
+#ifdef BTR_CUR_HASH_ADAPT
+ /* Note: this may be a false positive, that
+ is, block->index will not always be set to
+ NULL when the last adaptive hash index
+ reference is dropped. */
page_info->hashed = (block->index != NULL);
+#endif /* BTR_CUR_HASH_ADAPT */
} else {
ut_ad(page_info->zip_ssize);
frame = bpage->zip.data;
@@ -5177,7 +5134,7 @@ i_s_innodb_buffer_page_get_info(
/*******************************************************************//**
This is the function that goes through each block of the buffer pool
and fetch information to information schema tables: INNODB_BUFFER_PAGE.
-@return 0 on success, 1 on failure */
+@return 0 on success, 1 on failure */
static
int
i_s_innodb_fill_buffer_pool(
@@ -5191,13 +5148,13 @@ i_s_innodb_fill_buffer_pool(
mem_heap_t* heap;
DBUG_ENTER("i_s_innodb_fill_buffer_pool");
- RETURN_IF_INNODB_NOT_STARTED(tables->schema_table_name);
heap = mem_heap_create(10000);
/* Go through each chunk of buffer pool. Currently, we only
have one single chunk for each buffer pool */
- for (ulint n = 0; n < buf_pool->n_chunks; n++) {
+ for (ulint n = 0;
+ n < ut_min(buf_pool->n_chunks, buf_pool->n_chunks_new); n++) {
const buf_block_t* block;
ulint n_blocks;
buf_page_info_t* info_buffer;
@@ -5215,7 +5172,7 @@ i_s_innodb_fill_buffer_pool(
/* we cache maximum MAX_BUF_INFO_CACHED number of
buffer page info */
num_to_process = ut_min(chunk_size,
- MAX_BUF_INFO_CACHED);
+ (ulint)MAX_BUF_INFO_CACHED);
mem_size = num_to_process * sizeof(buf_page_info_t);
@@ -5267,7 +5224,7 @@ i_s_innodb_fill_buffer_pool(
/*******************************************************************//**
Fill page information for pages in InnoDB buffer pool to the
dynamic table INFORMATION_SCHEMA.INNODB_BUFFER_PAGE
-@return 0 on success, 1 on failure */
+@return 0 on success, 1 on failure */
static
int
i_s_innodb_buffer_page_fill_table(
@@ -5280,6 +5237,8 @@ i_s_innodb_buffer_page_fill_table(
DBUG_ENTER("i_s_innodb_buffer_page_fill_table");
+ RETURN_IF_INNODB_NOT_STARTED(tables->schema_table_name);
+
/* deny access to user without PROCESS privilege */
if (check_global_access(thd, PROCESS_ACL)) {
DBUG_RETURN(0);
@@ -5306,7 +5265,7 @@ i_s_innodb_buffer_page_fill_table(
/*******************************************************************//**
Bind the dynamic table INFORMATION_SCHEMA.INNODB_BUFFER_PAGE.
-@return 0 on success, 1 on failure */
+@return 0 on success, 1 on failure */
static
int
i_s_innodb_buffer_page_init(
@@ -5439,6 +5398,7 @@ static ST_FIELD_INFO i_s_innodb_buf_page_lru_fields_info[] =
STRUCT_FLD(old_name, ""),
STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
+#ifdef BTR_CUR_HASH_ADAPT
#define IDX_BUF_LRU_PAGE_HASHED 7
{STRUCT_FLD(field_name, "IS_HASHED"),
STRUCT_FLD(field_length, 3),
@@ -5447,8 +5407,9 @@ static ST_FIELD_INFO i_s_innodb_buf_page_lru_fields_info[] =
STRUCT_FLD(field_flags, MY_I_S_MAYBE_NULL),
STRUCT_FLD(old_name, ""),
STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
+#endif /* BTR_CUR_HASH_ADAPT */
-#define IDX_BUF_LRU_PAGE_NEWEST_MOD 8
+#define IDX_BUF_LRU_PAGE_NEWEST_MOD 7 + I_S_AHI
{STRUCT_FLD(field_name, "NEWEST_MODIFICATION"),
STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
@@ -5457,7 +5418,7 @@ static ST_FIELD_INFO i_s_innodb_buf_page_lru_fields_info[] =
STRUCT_FLD(old_name, ""),
STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-#define IDX_BUF_LRU_PAGE_OLDEST_MOD 9
+#define IDX_BUF_LRU_PAGE_OLDEST_MOD 8 + I_S_AHI
{STRUCT_FLD(field_name, "OLDEST_MODIFICATION"),
STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
@@ -5466,7 +5427,7 @@ static ST_FIELD_INFO i_s_innodb_buf_page_lru_fields_info[] =
STRUCT_FLD(old_name, ""),
STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-#define IDX_BUF_LRU_PAGE_ACCESS_TIME 10
+#define IDX_BUF_LRU_PAGE_ACCESS_TIME 9 + I_S_AHI
{STRUCT_FLD(field_name, "ACCESS_TIME"),
STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
@@ -5475,7 +5436,7 @@ static ST_FIELD_INFO i_s_innodb_buf_page_lru_fields_info[] =
STRUCT_FLD(old_name, ""),
STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-#define IDX_BUF_LRU_PAGE_TABLE_NAME 11
+#define IDX_BUF_LRU_PAGE_TABLE_NAME 10 + I_S_AHI
{STRUCT_FLD(field_name, "TABLE_NAME"),
STRUCT_FLD(field_length, 1024),
STRUCT_FLD(field_type, MYSQL_TYPE_STRING),
@@ -5484,7 +5445,7 @@ static ST_FIELD_INFO i_s_innodb_buf_page_lru_fields_info[] =
STRUCT_FLD(old_name, ""),
STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-#define IDX_BUF_LRU_PAGE_INDEX_NAME 12
+#define IDX_BUF_LRU_PAGE_INDEX_NAME 11 + I_S_AHI
{STRUCT_FLD(field_name, "INDEX_NAME"),
STRUCT_FLD(field_length, 1024),
STRUCT_FLD(field_type, MYSQL_TYPE_STRING),
@@ -5493,7 +5454,7 @@ static ST_FIELD_INFO i_s_innodb_buf_page_lru_fields_info[] =
STRUCT_FLD(old_name, ""),
STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-#define IDX_BUF_LRU_PAGE_NUM_RECS 13
+#define IDX_BUF_LRU_PAGE_NUM_RECS 12 + I_S_AHI
{STRUCT_FLD(field_name, "NUMBER_RECORDS"),
STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
@@ -5502,7 +5463,7 @@ static ST_FIELD_INFO i_s_innodb_buf_page_lru_fields_info[] =
STRUCT_FLD(old_name, ""),
STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-#define IDX_BUF_LRU_PAGE_DATA_SIZE 14
+#define IDX_BUF_LRU_PAGE_DATA_SIZE 13 + I_S_AHI
{STRUCT_FLD(field_name, "DATA_SIZE"),
STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
@@ -5511,7 +5472,7 @@ static ST_FIELD_INFO i_s_innodb_buf_page_lru_fields_info[] =
STRUCT_FLD(old_name, ""),
STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-#define IDX_BUF_LRU_PAGE_ZIP_SIZE 15
+#define IDX_BUF_LRU_PAGE_ZIP_SIZE 14 + I_S_AHI
{STRUCT_FLD(field_name, "COMPRESSED_SIZE"),
STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
@@ -5520,7 +5481,7 @@ static ST_FIELD_INFO i_s_innodb_buf_page_lru_fields_info[] =
STRUCT_FLD(old_name, ""),
STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-#define IDX_BUF_LRU_PAGE_STATE 16
+#define IDX_BUF_LRU_PAGE_STATE 15 + I_S_AHI
{STRUCT_FLD(field_name, "COMPRESSED"),
STRUCT_FLD(field_length, 3),
STRUCT_FLD(field_type, MYSQL_TYPE_STRING),
@@ -5529,7 +5490,7 @@ static ST_FIELD_INFO i_s_innodb_buf_page_lru_fields_info[] =
STRUCT_FLD(old_name, ""),
STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-#define IDX_BUF_LRU_PAGE_IO_FIX 17
+#define IDX_BUF_LRU_PAGE_IO_FIX 16 + I_S_AHI
{STRUCT_FLD(field_name, "IO_FIX"),
STRUCT_FLD(field_length, 64),
STRUCT_FLD(field_type, MYSQL_TYPE_STRING),
@@ -5538,7 +5499,7 @@ static ST_FIELD_INFO i_s_innodb_buf_page_lru_fields_info[] =
STRUCT_FLD(old_name, ""),
STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-#define IDX_BUF_LRU_PAGE_IS_OLD 18
+#define IDX_BUF_LRU_PAGE_IS_OLD 17 + I_S_AHI
{STRUCT_FLD(field_name, "IS_OLD"),
STRUCT_FLD(field_length, 3),
STRUCT_FLD(field_type, MYSQL_TYPE_STRING),
@@ -5547,7 +5508,7 @@ static ST_FIELD_INFO i_s_innodb_buf_page_lru_fields_info[] =
STRUCT_FLD(old_name, ""),
STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-#define IDX_BUF_LRU_PAGE_FREE_CLOCK 19
+#define IDX_BUF_LRU_PAGE_FREE_CLOCK 18 + I_S_AHI
{STRUCT_FLD(field_name, "FREE_PAGE_CLOCK"),
STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
@@ -5562,7 +5523,7 @@ static ST_FIELD_INFO i_s_innodb_buf_page_lru_fields_info[] =
/*******************************************************************//**
Fill Information Schema table INNODB_BUFFER_PAGE_LRU with information
cached in the buf_page_info_t array
-@return 0 on success, 1 on failure */
+@return 0 on success, 1 on failure */
static
int
i_s_innodb_buf_page_lru_fill(
@@ -5593,6 +5554,7 @@ i_s_innodb_buf_page_lru_fill(
OK(fields[IDX_BUF_LRU_POOL_ID]->store(
page_info->pool_id, true));
+
OK(fields[IDX_BUF_LRU_POS]->store(
page_info->block_id, true));
@@ -5612,8 +5574,10 @@ i_s_innodb_buf_page_lru_fill(
OK(fields[IDX_BUF_LRU_PAGE_FIX_COUNT]->store(
page_info->fix_count, true));
+#ifdef BTR_CUR_HASH_ADAPT
OK(field_store_string(fields[IDX_BUF_LRU_PAGE_HASHED],
page_info->hashed ? "YES" : "NO"));
+#endif /* BTR_CUR_HASH_ADAPT */
OK(fields[IDX_BUF_LRU_PAGE_NEWEST_MOD]->store(
page_info->newest_mod, true));
@@ -5644,7 +5608,7 @@ i_s_innodb_buf_page_lru_fill(
table_name, sizeof(table_name),
index->table_name,
strlen(index->table_name),
- thd, TRUE);
+ thd);
ret = fields[IDX_BUF_LRU_PAGE_TABLE_NAME]
->store(table_name,
@@ -5652,10 +5616,10 @@ i_s_innodb_buf_page_lru_fill(
table_name_end
- table_name),
system_charset_info)
- || field_store_index_name(
- fields
- [IDX_BUF_LRU_PAGE_INDEX_NAME],
- index->name);
+ || fields[IDX_BUF_LRU_PAGE_INDEX_NAME]
+ ->store(index->name,
+ uint(strlen(index->name)),
+ system_charset_info);
}
mutex_exit(&dict_sys->mutex);
@@ -5665,6 +5629,8 @@ i_s_innodb_buf_page_lru_fill(
if (index) {
fields[IDX_BUF_LRU_PAGE_TABLE_NAME]
->set_notnull();
+ fields[IDX_BUF_LRU_PAGE_INDEX_NAME]
+ ->set_notnull();
}
}
@@ -5736,7 +5702,7 @@ i_s_innodb_buf_page_lru_fill(
/*******************************************************************//**
This is the function that goes through buffer pool's LRU list
and fetch information to INFORMATION_SCHEMA.INNODB_BUFFER_PAGE_LRU.
-@return 0 on success, 1 on failure */
+@return 0 on success, 1 on failure */
static
int
i_s_innodb_fill_buffer_lru(
@@ -5753,7 +5719,6 @@ i_s_innodb_fill_buffer_lru(
ulint lru_len;
DBUG_ENTER("i_s_innodb_fill_buffer_lru");
- RETURN_IF_INNODB_NOT_STARTED(tables->schema_table_name);
/* Obtain buf_pool mutex before allocate info_buffer, since
UT_LIST_GET_LEN(buf_pool->LRU) could change */
@@ -5764,6 +5729,10 @@ i_s_innodb_fill_buffer_lru(
/* Print error message if malloc fail */
info_buffer = (buf_page_info_t*) my_malloc(
lru_len * sizeof *info_buffer, MYF(MY_WME));
+ /* JAN: TODO: MySQL 5.7 PSI
+ info_buffer = (buf_page_info_t*) my_malloc(PSI_INSTRUMENT_ME,
+ lru_len * sizeof *info_buffer, MYF(MY_WME));
+ */
if (!info_buffer) {
status = 1;
@@ -5806,7 +5775,7 @@ exit:
/*******************************************************************//**
Fill page information for pages in InnoDB buffer pool to the
dynamic table INFORMATION_SCHEMA.INNODB_BUFFER_PAGE_LRU
-@return 0 on success, 1 on failure */
+@return 0 on success, 1 on failure */
static
int
i_s_innodb_buf_page_lru_fill_table(
@@ -5819,6 +5788,8 @@ i_s_innodb_buf_page_lru_fill_table(
DBUG_ENTER("i_s_innodb_buf_page_lru_fill_table");
+ RETURN_IF_INNODB_NOT_STARTED(tables->schema_table_name);
+
/* deny access to any users that do not hold PROCESS_ACL */
if (check_global_access(thd, PROCESS_ACL)) {
DBUG_RETURN(0);
@@ -5845,7 +5816,7 @@ i_s_innodb_buf_page_lru_fill_table(
/*******************************************************************//**
Bind the dynamic table INFORMATION_SCHEMA.INNODB_BUFFER_PAGE_LRU.
-@return 0 on success, 1 on failure */
+@return 0 on success, 1 on failure */
static
int
i_s_innodb_buffer_page_lru_init(
@@ -5915,7 +5886,7 @@ UNIV_INTERN struct st_maria_plugin i_s_innodb_buffer_page_lru =
/*******************************************************************//**
Unbind a dynamic INFORMATION_SCHEMA table.
-@return 0 on success */
+@return 0 on success */
static
int
i_s_common_deinit(
@@ -6005,13 +5976,22 @@ static ST_FIELD_INFO innodb_sys_tables_fields_info[] =
STRUCT_FLD(old_name, ""),
STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
+#define SYS_TABLES_SPACE_TYPE 8
+ {STRUCT_FLD(field_name, "SPACE_TYPE"),
+ STRUCT_FLD(field_length, 10),
+ STRUCT_FLD(field_type, MYSQL_TYPE_STRING),
+ STRUCT_FLD(value, 0),
+ STRUCT_FLD(field_flags, MY_I_S_MAYBE_NULL),
+ STRUCT_FLD(old_name, ""),
+ STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
+
END_OF_ST_FIELD_INFO
};
/**********************************************************************//**
Populate information_schema.innodb_sys_tables table with information
from SYS_TABLES.
-@return 0 on success */
+@return 0 on success */
static
int
i_s_dict_fill_sys_tables(
@@ -6020,31 +6000,39 @@ i_s_dict_fill_sys_tables(
dict_table_t* table, /*!< in: table */
TABLE* table_to_fill) /*!< in/out: fill this table */
{
- Field** fields;
- ulint compact = DICT_TF_GET_COMPACT(table->flags);
- ulint atomic_blobs = DICT_TF_HAS_ATOMIC_BLOBS(table->flags);
- ulint zip_size = dict_tf_get_zip_size(table->flags);
- const char* file_format;
- const char* row_format;
+ Field** fields;
+ ulint compact = DICT_TF_GET_COMPACT(table->flags);
+ ulint atomic_blobs = DICT_TF_HAS_ATOMIC_BLOBS(
+ table->flags);
+ const page_size_t& page_size = dict_tf_get_page_size(table->flags);
+ const char* file_format;
+ const char* row_format;
+ const char* space_type;
file_format = trx_sys_file_format_id_to_name(atomic_blobs);
if (!compact) {
row_format = "Redundant";
} else if (!atomic_blobs) {
row_format = "Compact";
- } else if DICT_TF_GET_ZIP_SSIZE(table->flags) {
+ } else if (DICT_TF_GET_ZIP_SSIZE(table->flags)) {
row_format = "Compressed";
} else {
row_format = "Dynamic";
}
+ if (is_system_tablespace(table->space)) {
+ space_type = "System";
+ } else {
+ space_type = "Single";
+ }
+
DBUG_ENTER("i_s_dict_fill_sys_tables");
fields = table_to_fill->field;
OK(fields[SYS_TABLES_ID]->store(longlong(table->id), TRUE));
- OK(field_store_string(fields[SYS_TABLES_NAME], table->name));
+ OK(field_store_string(fields[SYS_TABLES_NAME], table->name.m_name));
OK(fields[SYS_TABLES_FLAG]->store(table->flags));
@@ -6057,7 +6045,11 @@ i_s_dict_fill_sys_tables(
OK(field_store_string(fields[SYS_TABLES_ROW_FORMAT], row_format));
OK(fields[SYS_TABLES_ZIP_PAGE_SIZE]->store(
- static_cast<double>(zip_size)));
+ page_size.is_compressed()
+ ? page_size.physical()
+ : 0, true));
+
+ OK(field_store_string(fields[SYS_TABLES_SPACE_TYPE], space_type));
OK(schema_table_store_record(thd, table_to_fill));
@@ -6089,7 +6081,7 @@ i_s_sys_tables_fill_table(
}
heap = mem_heap_create(1000);
- mutex_enter(&(dict_sys->mutex));
+ mutex_enter(&dict_sys->mutex);
mtr_start(&mtr);
rec = dict_startscan_system(&pcur, &mtr, SYS_TABLES);
@@ -6295,17 +6287,20 @@ static ST_FIELD_INFO innodb_sys_tablestats_fields_info[] =
END_OF_ST_FIELD_INFO
};
-/**********************************************************************//**
-Populate information_schema.innodb_sys_tablestats table with information
+/** Populate information_schema.innodb_sys_tablestats table with information
from SYS_TABLES.
-@return 0 on success */
+@param[in] thd thread ID
+@param[in,out] table table
+@param[in] ref_count table reference count
+@param[in,out] table_to_fill fill this table
+@return 0 on success */
static
int
i_s_dict_fill_sys_tablestats(
-/*=========================*/
- THD* thd, /*!< in: thread */
- dict_table_t* table, /*!< in: table */
- TABLE* table_to_fill) /*!< in/out: fill this table */
+ THD* thd,
+ dict_table_t* table,
+ ulint ref_count,
+ TABLE* table_to_fill)
{
Field** fields;
@@ -6315,7 +6310,8 @@ i_s_dict_fill_sys_tablestats(
OK(fields[SYS_TABLESTATS_ID]->store(longlong(table->id), TRUE));
- OK(field_store_string(fields[SYS_TABLESTATS_NAME], table->name));
+ OK(field_store_string(fields[SYS_TABLESTATS_NAME],
+ table->name.m_name));
dict_table_stats_lock(table, RW_S_LATCH);
@@ -6324,35 +6320,34 @@ i_s_dict_fill_sys_tablestats(
"Initialized"));
OK(fields[SYS_TABLESTATS_NROW]->store(table->stat_n_rows,
- TRUE));
+ true));
OK(fields[SYS_TABLESTATS_CLUST_SIZE]->store(
- static_cast<double>(table->stat_clustered_index_size)));
+ table->stat_clustered_index_size, true));
OK(fields[SYS_TABLESTATS_INDEX_SIZE]->store(
- static_cast<double>(table->stat_sum_of_other_index_sizes)));
+ table->stat_sum_of_other_index_sizes, true));
OK(fields[SYS_TABLESTATS_MODIFIED]->store(
- static_cast<double>(table->stat_modified_counter)));
+ table->stat_modified_counter, true));
} else {
OK(field_store_string(fields[SYS_TABLESTATS_INIT],
"Uninitialized"));
- OK(fields[SYS_TABLESTATS_NROW]->store(0, TRUE));
+ OK(fields[SYS_TABLESTATS_NROW]->store(0, true));
- OK(fields[SYS_TABLESTATS_CLUST_SIZE]->store(0));
+ OK(fields[SYS_TABLESTATS_CLUST_SIZE]->store(0, true));
- OK(fields[SYS_TABLESTATS_INDEX_SIZE]->store(0));
+ OK(fields[SYS_TABLESTATS_INDEX_SIZE]->store(0, true));
- OK(fields[SYS_TABLESTATS_MODIFIED]->store(0));
+ OK(fields[SYS_TABLESTATS_MODIFIED]->store(0, true));
}
dict_table_stats_unlock(table, RW_S_LATCH);
- OK(fields[SYS_TABLESTATS_AUTONINC]->store(table->autoinc, TRUE));
+ OK(fields[SYS_TABLESTATS_AUTONINC]->store(table->autoinc, true));
- OK(fields[SYS_TABLESTATS_TABLE_REF_COUNT]->store(
- static_cast<double>(table->n_ref_count)));
+ OK(fields[SYS_TABLESTATS_TABLE_REF_COUNT]->store(ref_count, true));
OK(schema_table_store_record(thd, table_to_fill));
@@ -6402,12 +6397,20 @@ i_s_sys_tables_fill_table_stats(
heap, rec, &table_rec,
DICT_TABLE_LOAD_FROM_CACHE, &mtr);
+ ulint ref_count = table_rec ? table_rec->get_ref_count() : 0;
mutex_exit(&dict_sys->mutex);
- if (!err_msg) {
- i_s_dict_fill_sys_tablestats(thd, table_rec,
+ DBUG_EXECUTE_IF("test_sys_tablestats", {
+ if (strcmp("test/t1", table_rec->name.m_name) == 0 ) {
+ DEBUG_SYNC_C("dict_table_not_protected");
+ }});
+
+ if (table_rec != NULL) {
+ ut_ad(err_msg == NULL);
+ i_s_dict_fill_sys_tablestats(thd, table_rec, ref_count,
tables->table);
} else {
+ ut_ad(err_msg != NULL);
push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
ER_CANT_FIND_SYSTEM_REC, "%s",
err_msg);
@@ -6419,6 +6422,7 @@ i_s_sys_tables_fill_table_stats(
/* Get the next record */
rw_lock_s_lock(&dict_operation_lock);
mutex_enter(&dict_sys->mutex);
+
mtr_start(&mtr);
rec = dict_getnext_system(&pcur, &mtr);
}
@@ -6568,6 +6572,15 @@ static ST_FIELD_INFO innodb_sysindex_fields_info[] =
STRUCT_FLD(old_name, ""),
STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
+#define SYS_INDEX_MERGE_THRESHOLD 7
+ {STRUCT_FLD(field_name, "MERGE_THRESHOLD"),
+ STRUCT_FLD(field_length, MY_INT32_NUM_DECIMAL_DIGITS),
+ STRUCT_FLD(field_type, MYSQL_TYPE_LONG),
+ STRUCT_FLD(value, 0),
+ STRUCT_FLD(field_flags, 0),
+ STRUCT_FLD(old_name, ""),
+ STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
+
END_OF_ST_FIELD_INFO
};
@@ -6591,11 +6604,19 @@ i_s_dict_fill_sys_indexes(
fields = table_to_fill->field;
- OK(field_store_index_name(fields[SYS_INDEX_NAME], index->name));
+ if (*index->name == *TEMP_INDEX_PREFIX_STR) {
+ /* Since TEMP_INDEX_PREFIX_STR is not valid UTF-8, we
+ need to convert it to something else. */
+ *const_cast<char*>(index->name()) = '?';
+ }
- OK(fields[SYS_INDEX_ID]->store(longlong(index->id), TRUE));
+ OK(fields[SYS_INDEX_NAME]->store(index->name,
+ uint(strlen(index->name)),
+ system_charset_info));
- OK(fields[SYS_INDEX_TABLE_ID]->store(longlong(table_id), TRUE));
+ OK(fields[SYS_INDEX_ID]->store(longlong(index->id), true));
+
+ OK(fields[SYS_INDEX_TABLE_ID]->store(longlong(table_id), true));
OK(fields[SYS_INDEX_TYPE]->store(index->type));
@@ -6610,6 +6631,8 @@ i_s_dict_fill_sys_indexes(
OK(fields[SYS_INDEX_SPACE]->store(index->space));
+ OK(fields[SYS_INDEX_MERGE_THRESHOLD]->store(index->merge_threshold));
+
OK(schema_table_store_record(thd, table_to_fill));
DBUG_RETURN(0);
@@ -6827,6 +6850,8 @@ i_s_dict_fill_sys_columns(
const char* col_name, /*!< in: column name */
dict_col_t* column, /*!< in: dict_col_t struct holding
more column information */
+ ulint nth_v_col, /*!< in: virtual column, its
+ sequence number (nth virtual col) */
TABLE* table_to_fill) /*!< in/out: fill this table */
{
Field** fields;
@@ -6835,11 +6860,16 @@ i_s_dict_fill_sys_columns(
fields = table_to_fill->field;
- OK(fields[SYS_COLUMN_TABLE_ID]->store(longlong(table_id), TRUE));
+ OK(fields[SYS_COLUMN_TABLE_ID]->store((longlong) table_id, TRUE));
OK(field_store_string(fields[SYS_COLUMN_NAME], col_name));
- OK(fields[SYS_COLUMN_POSITION]->store(column->ind));
+ if (dict_col_is_virtual(column)) {
+ ulint pos = dict_create_v_col_pos(nth_v_col, column->ind);
+ OK(fields[SYS_COLUMN_POSITION]->store(pos, true));
+ } else {
+ OK(fields[SYS_COLUMN_POSITION]->store(column->ind, true));
+ }
OK(fields[SYS_COLUMN_MTYPE]->store(column->mtype));
@@ -6887,18 +6917,20 @@ i_s_sys_columns_fill_table(
const char* err_msg;
dict_col_t column_rec;
table_id_t table_id;
+ ulint nth_v_col;
/* populate a dict_col_t structure with information from
a SYS_COLUMNS row */
err_msg = dict_process_sys_columns_rec(heap, rec, &column_rec,
- &table_id, &col_name);
+ &table_id, &col_name,
+ &nth_v_col);
mtr_commit(&mtr);
mutex_exit(&dict_sys->mutex);
if (!err_msg) {
i_s_dict_fill_sys_columns(thd, table_id, col_name,
- &column_rec,
+ &column_rec, nth_v_col,
tables->table);
} else {
push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
@@ -6990,6 +7022,213 @@ UNIV_INTERN struct st_maria_plugin i_s_innodb_sys_columns =
STRUCT_FLD(maturity, MariaDB_PLUGIN_MATURITY_STABLE),
};
+/** SYS_VIRTUAL **************************************************/
+/** Fields of the dynamic table INFORMATION_SCHEMA.INNODB_SYS_VIRTUAL */
+static ST_FIELD_INFO innodb_sys_virtual_fields_info[] =
+{
+#define SYS_VIRTUAL_TABLE_ID 0
+ {STRUCT_FLD(field_name, "TABLE_ID"),
+ STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
+ STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
+ STRUCT_FLD(value, 0),
+ STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
+ STRUCT_FLD(old_name, ""),
+ STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
+
+#define SYS_VIRTUAL_POS 1
+ {STRUCT_FLD(field_name, "POS"),
+ STRUCT_FLD(field_length, MY_INT32_NUM_DECIMAL_DIGITS),
+ STRUCT_FLD(field_type, MYSQL_TYPE_LONG),
+ STRUCT_FLD(value, 0),
+ STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
+ STRUCT_FLD(old_name, ""),
+ STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
+
+#define SYS_VIRTUAL_BASE_POS 2
+ {STRUCT_FLD(field_name, "BASE_POS"),
+ STRUCT_FLD(field_length, MY_INT32_NUM_DECIMAL_DIGITS),
+ STRUCT_FLD(field_type, MYSQL_TYPE_LONG),
+ STRUCT_FLD(value, 0),
+ STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
+ STRUCT_FLD(old_name, ""),
+ STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
+
+ END_OF_ST_FIELD_INFO
+};
+
+/** Function to populate the information_schema.innodb_sys_virtual with
+related information
+param[in] thd thread
+param[in] table_id table ID
+param[in] pos virtual column position
+param[in] base_pos base column position
+param[in,out] table_to_fill fill this table
+@return 0 on success */
+static
+int
+i_s_dict_fill_sys_virtual(
+ THD* thd,
+ table_id_t table_id,
+ ulint pos,
+ ulint base_pos,
+ TABLE* table_to_fill)
+{
+ Field** fields;
+
+ DBUG_ENTER("i_s_dict_fill_sys_virtual");
+
+ fields = table_to_fill->field;
+
+ OK(fields[SYS_VIRTUAL_TABLE_ID]->store(table_id, true));
+
+ OK(fields[SYS_VIRTUAL_POS]->store(pos, true));
+
+ OK(fields[SYS_VIRTUAL_BASE_POS]->store(base_pos, true));
+
+ OK(schema_table_store_record(thd, table_to_fill));
+
+ DBUG_RETURN(0);
+}
+
+/** Function to fill information_schema.innodb_sys_virtual with information
+collected by scanning SYS_VIRTUAL table.
+param[in] thd thread
+param[in,out] tables tables to fill
+param[in] item condition (not used)
+@return 0 on success */
+static
+int
+i_s_sys_virtual_fill_table(
+ THD* thd,
+ TABLE_LIST* tables,
+ Item* )
+{
+ btr_pcur_t pcur;
+ const rec_t* rec;
+ ulint pos;
+ ulint base_pos;
+ mem_heap_t* heap;
+ mtr_t mtr;
+
+ DBUG_ENTER("i_s_sys_virtual_fill_table");
+ RETURN_IF_INNODB_NOT_STARTED(tables->schema_table_name);
+
+ /* deny access to user without PROCESS_ACL privilege */
+ if (check_global_access(thd, PROCESS_ACL)) {
+ DBUG_RETURN(0);
+ }
+
+ heap = mem_heap_create(1000);
+ mutex_enter(&dict_sys->mutex);
+ mtr_start(&mtr);
+
+ rec = dict_startscan_system(&pcur, &mtr, SYS_VIRTUAL);
+
+ while (rec) {
+ const char* err_msg;
+ table_id_t table_id;
+
+ /* populate a dict_col_t structure with information from
+ a SYS_VIRTUAL row */
+ err_msg = dict_process_sys_virtual_rec(heap, rec,
+ &table_id, &pos,
+ &base_pos);
+
+ mtr_commit(&mtr);
+ mutex_exit(&dict_sys->mutex);
+
+ if (!err_msg) {
+ i_s_dict_fill_sys_virtual(thd, table_id, pos, base_pos,
+ tables->table);
+ } else {
+ push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
+ ER_CANT_FIND_SYSTEM_REC, "%s",
+ err_msg);
+ }
+
+ mem_heap_empty(heap);
+
+ /* Get the next record */
+ mutex_enter(&dict_sys->mutex);
+ mtr_start(&mtr);
+ rec = dict_getnext_system(&pcur, &mtr);
+ }
+
+ mtr_commit(&mtr);
+ mutex_exit(&dict_sys->mutex);
+ mem_heap_free(heap);
+
+ DBUG_RETURN(0);
+}
+
+/** Bind the dynamic table INFORMATION_SCHEMA.innodb_sys_virtual
+param[in,out] p table schema object
+@return 0 on success */
+static
+int
+innodb_sys_virtual_init(
+ void* p)
+{
+ ST_SCHEMA_TABLE* schema;
+
+ DBUG_ENTER("innodb_sys_virtual_init");
+
+ schema = (ST_SCHEMA_TABLE*) p;
+
+ schema->fields_info = innodb_sys_virtual_fields_info;
+ schema->fill_table = i_s_sys_virtual_fill_table;
+
+ DBUG_RETURN(0);
+}
+
+struct st_maria_plugin i_s_innodb_sys_virtual =
+{
+ /* the plugin type (a MYSQL_XXX_PLUGIN value) */
+ /* int */
+ STRUCT_FLD(type, MYSQL_INFORMATION_SCHEMA_PLUGIN),
+
+ /* pointer to type-specific plugin descriptor */
+ /* void* */
+ STRUCT_FLD(info, &i_s_info),
+
+ /* plugin name */
+ /* const char* */
+ STRUCT_FLD(name, "INNODB_SYS_VIRTUAL"),
+
+ /* plugin author (for SHOW PLUGINS) */
+ /* const char* */
+ STRUCT_FLD(author, plugin_author),
+
+ /* general descriptive text (for SHOW PLUGINS) */
+ /* const char* */
+ STRUCT_FLD(descr, "InnoDB SYS_VIRTUAL"),
+
+ /* the plugin license (PLUGIN_LICENSE_XXX) */
+ /* int */
+ STRUCT_FLD(license, PLUGIN_LICENSE_GPL),
+
+ /* the function to invoke when plugin is loaded */
+ /* int (*)(void*); */
+ STRUCT_FLD(init, innodb_sys_virtual_init),
+
+ /* the function to invoke when plugin is unloaded */
+ /* int (*)(void*); */
+ STRUCT_FLD(deinit, i_s_common_deinit),
+
+ /* plugin version (for SHOW PLUGINS) */
+ /* unsigned int */
+ STRUCT_FLD(version, INNODB_VERSION_SHORT),
+
+ /* struct st_mysql_show_var* */
+ STRUCT_FLD(status_vars, NULL),
+
+ /* struct st_mysql_sys_var** */
+ STRUCT_FLD(system_vars, NULL),
+
+ /* Maria extension */
+ STRUCT_FLD(version_info, INNODB_VERSION_STR),
+ STRUCT_FLD(maturity, MariaDB_PLUGIN_MATURITY_STABLE),
+};
/** SYS_FIELDS ***************************************************/
/* Fields of the dynamic table INFORMATION_SCHEMA.INNODB_SYS_FIELDS */
static ST_FIELD_INFO innodb_sys_fields_fields_info[] =
@@ -7044,11 +7283,11 @@ i_s_dict_fill_sys_fields(
fields = table_to_fill->field;
- OK(fields[SYS_FIELD_INDEX_ID]->store(longlong(index_id), TRUE));
+ OK(fields[SYS_FIELD_INDEX_ID]->store(index_id, true));
OK(field_store_string(fields[SYS_FIELD_NAME], field->name));
- OK(fields[SYS_FIELD_POS]->store(static_cast<double>(pos)));
+ OK(fields[SYS_FIELD_POS]->store(pos, true));
OK(schema_table_store_record(thd, table_to_fill));
@@ -7496,7 +7735,7 @@ i_s_dict_fill_sys_foreign_cols(
OK(field_store_string(fields[SYS_FOREIGN_COL_REF_NAME], ref_col_name));
- OK(fields[SYS_FOREIGN_COL_POS]->store(static_cast<double>(pos)));
+ OK(fields[SYS_FOREIGN_COL_POS]->store(pos, true));
OK(schema_table_store_record(thd, table_to_fill));
@@ -7709,6 +7948,42 @@ static ST_FIELD_INFO innodb_sys_tablespaces_fields_info[] =
STRUCT_FLD(old_name, ""),
STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
+#define SYS_TABLESPACES_SPACE_TYPE 7
+ {STRUCT_FLD(field_name, "SPACE_TYPE"),
+ STRUCT_FLD(field_length, 10),
+ STRUCT_FLD(field_type, MYSQL_TYPE_STRING),
+ STRUCT_FLD(value, 0),
+ STRUCT_FLD(field_flags, MY_I_S_MAYBE_NULL),
+ STRUCT_FLD(old_name, ""),
+ STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
+
+#define SYS_TABLESPACES_FS_BLOCK_SIZE 8
+ {STRUCT_FLD(field_name, "FS_BLOCK_SIZE"),
+ STRUCT_FLD(field_length, MY_INT32_NUM_DECIMAL_DIGITS),
+ STRUCT_FLD(field_type, MYSQL_TYPE_LONG),
+ STRUCT_FLD(value, 0),
+ STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
+ STRUCT_FLD(old_name, ""),
+ STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
+
+#define SYS_TABLESPACES_FILE_SIZE 9
+ {STRUCT_FLD(field_name, "FILE_SIZE"),
+ STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
+ STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
+ STRUCT_FLD(value, 0),
+ STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
+ STRUCT_FLD(old_name, ""),
+ STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
+
+#define SYS_TABLESPACES_ALLOC_SIZE 10
+ {STRUCT_FLD(field_name, "ALLOCATED_SIZE"),
+ STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
+ STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
+ STRUCT_FLD(value, 0),
+ STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
+ STRUCT_FLD(old_name, ""),
+ STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
+
END_OF_ST_FIELD_INFO
};
@@ -7735,12 +8010,14 @@ i_s_dict_fill_sys_tablespaces(
DBUG_ENTER("i_s_dict_fill_sys_tablespaces");
file_format = trx_sys_file_format_id_to_name(atomic_blobs);
- if (!atomic_blobs) {
- row_format = "Compact or Redundant";
- } else if DICT_TF_GET_ZIP_SSIZE(flags) {
+ if (is_system_tablespace(space)) {
+ row_format = "Compact, Redundant or Dynamic";
+ } else if (FSP_FLAGS_GET_ZIP_SSIZE(flags)) {
row_format = "Compressed";
- } else {
+ } else if (atomic_blobs) {
row_format = "Dynamic";
+ } else {
+ row_format = "Compact or Redundant";
}
fields = table_to_fill->field;
@@ -7754,26 +8031,88 @@ i_s_dict_fill_sys_tablespaces(
OK(field_store_string(fields[SYS_TABLESPACES_FILE_FORMAT],
file_format));
- OK(field_store_string(fields[SYS_TABLESPACES_ROW_FORMAT],
- row_format));
+ OK(field_store_string(fields[SYS_TABLESPACES_ROW_FORMAT], row_format));
+
+ OK(field_store_string(fields[SYS_TABLESPACES_SPACE_TYPE],
+ is_system_tablespace(space)
+ ? "System" : "Single"));
ulint cflags = fsp_flags_is_valid(flags, space)
? flags : fsp_flags_convert_from_101(flags);
- if (cflags != ULINT_UNDEFINED) {
- OK(fields[SYS_TABLESPACES_PAGE_SIZE]->store(
- fsp_flags_get_page_size(cflags), true));
-
- OK(fields[SYS_TABLESPACES_ZIP_PAGE_SIZE]->store(
- fsp_flags_get_zip_size(cflags), true));
- } else {
+ if (cflags == ULINT_UNDEFINED) {
fields[SYS_TABLESPACES_PAGE_SIZE]->set_null();
fields[SYS_TABLESPACES_ZIP_PAGE_SIZE]->set_null();
+ fields[SYS_TABLESPACES_FS_BLOCK_SIZE]->set_null();
+ fields[SYS_TABLESPACES_FILE_SIZE]->set_null();
+ fields[SYS_TABLESPACES_ALLOC_SIZE]->set_null();
+ OK(schema_table_store_record(thd, table_to_fill));
+ DBUG_RETURN(0);
}
+ const page_size_t page_size(cflags);
+
+ OK(fields[SYS_TABLESPACES_PAGE_SIZE]->store(
+ page_size.logical(), true));
+
+ OK(fields[SYS_TABLESPACES_ZIP_PAGE_SIZE]->store(
+ page_size.physical(), true));
+
+ os_file_stat_t stat;
+ os_file_size_t file;
+
+ memset(&file, 0xff, sizeof(file));
+ memset(&stat, 0x0, sizeof(stat));
+
+ if (fil_space_t* s = fil_space_acquire_silent(space)) {
+ const char *filepath = s->chain.start
+ ? s->chain.start->name : NULL;
+ if (!filepath) {
+ goto file_done;
+ }
+
+ file = os_file_get_size(filepath);
+
+ /* Get the file system (or Volume) block size. */
+ switch (dberr_t err = os_file_get_status(filepath, &stat,
+ false, false)) {
+ case DB_FAIL:
+ ib::warn()
+ << "File '" << filepath << "', failed to get "
+ << "stats";
+ break;
+
+ case DB_SUCCESS:
+ case DB_NOT_FOUND:
+ break;
+
+ default:
+ ib::error()
+ << "File '" << filepath << "' "
+ << ut_strerr(err);
+ break;
+ }
+
+file_done:
+ fil_space_release(s);
+ }
+
+ if (file.m_total_size == static_cast<os_offset_t>(~0)) {
+ stat.block_size = 0;
+ file.m_total_size = 0;
+ file.m_alloc_size = 0;
+ }
+
+ OK(fields[SYS_TABLESPACES_FS_BLOCK_SIZE]->store(stat.block_size, true));
+
+ OK(fields[SYS_TABLESPACES_FILE_SIZE]->store(file.m_total_size, true));
+
+ OK(fields[SYS_TABLESPACES_ALLOC_SIZE]->store(file.m_alloc_size, true));
+
OK(schema_table_store_record(thd, table_to_fill));
DBUG_RETURN(0);
}
+
/*******************************************************************//**
Function to populate INFORMATION_SCHEMA.INNODB_SYS_TABLESPACES table.
Loop through each record in SYS_TABLESPACES, and extract the column
@@ -7804,9 +8143,10 @@ i_s_sys_tablespaces_fill_table(
mutex_enter(&dict_sys->mutex);
mtr_start(&mtr);
- rec = dict_startscan_system(&pcur, &mtr, SYS_TABLESPACES);
+ for (rec = dict_startscan_system(&pcur, &mtr, SYS_TABLESPACES);
+ rec != NULL;
+ rec = dict_getnext_system(&pcur, &mtr)) {
- while (rec) {
const char* err_msg;
ulint space;
const char* name;
@@ -7834,7 +8174,6 @@ i_s_sys_tablespaces_fill_table(
/* Get the next record */
mutex_enter(&dict_sys->mutex);
mtr_start(&mtr);
- rec = dict_getnext_system(&pcur, &mtr);
}
mtr_commit(&mtr);
@@ -8228,31 +8567,31 @@ i_s_dict_fill_tablespaces_encryption(
goto skip;
}
- OK(fields[TABLESPACES_ENCRYPTION_SPACE]->store(space->id));
+ OK(fields[TABLESPACES_ENCRYPTION_SPACE]->store(space->id, true));
OK(field_store_string(fields[TABLESPACES_ENCRYPTION_NAME],
space->name));
OK(fields[TABLESPACES_ENCRYPTION_ENCRYPTION_SCHEME]->store(
- status.scheme));
+ status.scheme, true));
OK(fields[TABLESPACES_ENCRYPTION_KEYSERVER_REQUESTS]->store(
- status.keyserver_requests));
+ status.keyserver_requests, true));
OK(fields[TABLESPACES_ENCRYPTION_MIN_KEY_VERSION]->store(
- status.min_key_version));
+ status.min_key_version, true));
OK(fields[TABLESPACES_ENCRYPTION_CURRENT_KEY_VERSION]->store(
- status.current_key_version));
+ status.current_key_version, true));
OK(fields[TABLESPACES_ENCRYPTION_CURRENT_KEY_ID]->store(
- status.key_id));
+ status.key_id, true));
OK(fields[TABLESPACES_ENCRYPTION_ROTATING_OR_FLUSHING]->store(
- (status.rotating || status.flushing) ? 1 : 0));
+ status.rotating || status.flushing, true));
if (status.rotating) {
fields[TABLESPACES_ENCRYPTION_KEY_ROTATION_PAGE_NUMBER]->set_notnull();
OK(fields[TABLESPACES_ENCRYPTION_KEY_ROTATION_PAGE_NUMBER]->store(
- status.rotate_next_page_number));
+ status.rotate_next_page_number, true));
fields[TABLESPACES_ENCRYPTION_KEY_ROTATION_MAX_PAGE_NUMBER]->set_notnull();
OK(fields[TABLESPACES_ENCRYPTION_KEY_ROTATION_MAX_PAGE_NUMBER]->store(
- status.rotate_max_page_number));
+ status.rotate_max_page_number, true));
} else {
fields[TABLESPACES_ENCRYPTION_KEY_ROTATION_PAGE_NUMBER]
->set_null();
@@ -8278,12 +8617,6 @@ i_s_tablespaces_encryption_fill_table(
TABLE_LIST* tables, /*!< in/out: tables to fill */
Item* ) /*!< in: condition (not used) */
{
- btr_pcur_t pcur;
- const rec_t* rec;
- mem_heap_t* heap;
- mtr_t mtr;
- bool found_space_0 = false;
-
DBUG_ENTER("i_s_tablespaces_encryption_fill_table");
RETURN_IF_INNODB_NOT_STARTED(tables->schema_table_name);
@@ -8292,68 +8625,24 @@ i_s_tablespaces_encryption_fill_table(
DBUG_RETURN(0);
}
- heap = mem_heap_create(1000);
- mutex_enter(&dict_sys->mutex);
- mtr_start(&mtr);
-
- rec = dict_startscan_system(&pcur, &mtr, SYS_TABLESPACES);
-
- while (rec) {
- const char* err_msg;
- ulint space_id;
- const char* name;
- ulint flags;
-
- /* Extract necessary information from a SYS_TABLESPACES row */
- err_msg = dict_process_sys_tablespaces(
- heap, rec, &space_id, &name, &flags);
-
- mtr_commit(&mtr);
- mutex_exit(&dict_sys->mutex);
-
- if (space_id == 0) {
- found_space_0 = true;
- }
-
- fil_space_t* space = fil_space_acquire_silent(space_id);
-
- if (!err_msg && space) {
- i_s_dict_fill_tablespaces_encryption(
- thd, space, tables->table);
- } else {
- push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
- ER_CANT_FIND_SYSTEM_REC, "%s",
- err_msg);
- }
-
- if (space) {
- fil_space_release(space);
+ mutex_enter(&fil_system->mutex);
+
+ for (fil_space_t* space = UT_LIST_GET_FIRST(fil_system->space_list);
+ space; space = UT_LIST_GET_NEXT(space_list, space)) {
+ if (space->purpose == FIL_TYPE_TABLESPACE) {
+ space->n_pending_ops++;
+ mutex_exit(&fil_system->mutex);
+ if (int err = i_s_dict_fill_tablespaces_encryption(
+ thd, space, tables->table)) {
+ fil_space_release(space);
+ DBUG_RETURN(err);
+ }
+ mutex_enter(&fil_system->mutex);
+ space->n_pending_ops--;
}
-
- mem_heap_empty(heap);
-
- /* Get the next record */
- mutex_enter(&dict_sys->mutex);
- mtr_start(&mtr);
- rec = dict_getnext_system(&pcur, &mtr);
- }
-
- mtr_commit(&mtr);
- mutex_exit(&dict_sys->mutex);
- mem_heap_free(heap);
-
- if (found_space_0 == false) {
- /* space 0 does for what ever unknown reason not show up
- * in iteration above, add it manually */
-
- fil_space_t* space = fil_space_acquire_silent(0);
-
- i_s_dict_fill_tablespaces_encryption(
- thd, space, tables->table);
-
- fil_space_release(space);
}
+ mutex_exit(&fil_system->mutex);
DBUG_RETURN(0);
}
/*******************************************************************//**
@@ -8529,13 +8818,13 @@ i_s_dict_fill_tablespaces_scrubbing(
fil_space_get_scrub_status(space, &status);
- OK(fields[TABLESPACES_SCRUBBING_SPACE]->store(space->id));
+ OK(fields[TABLESPACES_SCRUBBING_SPACE]->store(space->id, true));
OK(field_store_string(fields[TABLESPACES_SCRUBBING_NAME],
space->name));
OK(fields[TABLESPACES_SCRUBBING_COMPRESSED]->store(
- status.compressed ? 1 : 0));
+ status.compressed ? 1 : 0, true));
if (status.last_scrub_completed == 0) {
fields[TABLESPACES_SCRUBBING_LAST_SCRUB_COMPLETED]->set_null();
@@ -8562,11 +8851,11 @@ i_s_dict_fill_tablespaces_scrubbing(
fields[TABLESPACES_SCRUBBING_CURRENT_SCRUB_STARTED],
status.current_scrub_started));
OK(fields[TABLESPACES_SCRUBBING_CURRENT_SCRUB_ACTIVE_THREADS]
- ->store(status.current_scrub_active_threads));
+ ->store(status.current_scrub_active_threads, true));
OK(fields[TABLESPACES_SCRUBBING_CURRENT_SCRUB_PAGE_NUMBER]
- ->store(status.current_scrub_page_number));
+ ->store(status.current_scrub_page_number, true));
OK(fields[TABLESPACES_SCRUBBING_CURRENT_SCRUB_MAX_PAGE_NUMBER]
- ->store(status.current_scrub_max_page_number));
+ ->store(status.current_scrub_max_page_number, true));
} else {
for (uint i = 0; i < array_elements(field_numbers); i++) {
fields[field_numbers[i]]->set_null();
@@ -8590,12 +8879,6 @@ i_s_tablespaces_scrubbing_fill_table(
TABLE_LIST* tables, /*!< in/out: tables to fill */
Item* ) /*!< in: condition (not used) */
{
- btr_pcur_t pcur;
- const rec_t* rec;
- mem_heap_t* heap;
- mtr_t mtr;
- bool found_space_0 = false;
-
DBUG_ENTER("i_s_tablespaces_scrubbing_fill_table");
RETURN_IF_INNODB_NOT_STARTED(tables->schema_table_name);
@@ -8604,67 +8887,24 @@ i_s_tablespaces_scrubbing_fill_table(
DBUG_RETURN(0);
}
- heap = mem_heap_create(1000);
- mutex_enter(&dict_sys->mutex);
- mtr_start(&mtr);
-
- rec = dict_startscan_system(&pcur, &mtr, SYS_TABLESPACES);
-
- while (rec) {
- const char* err_msg;
- ulint space_id;
- const char* name;
- ulint flags;
-
- /* Extract necessary information from a SYS_TABLESPACES row */
- err_msg = dict_process_sys_tablespaces(
- heap, rec, &space_id, &name, &flags);
-
- mtr_commit(&mtr);
- mutex_exit(&dict_sys->mutex);
-
- if (space_id == 0) {
- found_space_0 = true;
- }
-
- fil_space_t* space = fil_space_acquire_silent(space_id);
-
- if (!err_msg && space) {
- i_s_dict_fill_tablespaces_scrubbing(
- thd, space, tables->table);
- } else {
- push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
- ER_CANT_FIND_SYSTEM_REC, "%s",
- err_msg);
- }
-
- if (space) {
- fil_space_release(space);
+ mutex_enter(&fil_system->mutex);
+
+ for (fil_space_t* space = UT_LIST_GET_FIRST(fil_system->space_list);
+ space; space = UT_LIST_GET_NEXT(space_list, space)) {
+ if (space->purpose == FIL_TYPE_TABLESPACE) {
+ space->n_pending_ops++;
+ mutex_exit(&fil_system->mutex);
+ if (int err = i_s_dict_fill_tablespaces_scrubbing(
+ thd, space, tables->table)) {
+ fil_space_release(space);
+ DBUG_RETURN(err);
+ }
+ mutex_enter(&fil_system->mutex);
+ space->n_pending_ops--;
}
-
- mem_heap_empty(heap);
-
- /* Get the next record */
- mutex_enter(&dict_sys->mutex);
- mtr_start(&mtr);
- rec = dict_getnext_system(&pcur, &mtr);
- }
-
- mtr_commit(&mtr);
- mutex_exit(&dict_sys->mutex);
- mem_heap_free(heap);
-
- if (found_space_0 == false) {
- /* space 0 does for what ever unknown reason not show up
- * in iteration above, add it manually */
- fil_space_t* space = fil_space_acquire_silent(0);
-
- i_s_dict_fill_tablespaces_scrubbing(
- thd, space, tables->table);
-
- fil_space_release(space);
}
+ mutex_exit(&fil_system->mutex);
DBUG_RETURN(0);
}
/*******************************************************************//**
@@ -8790,11 +9030,8 @@ i_s_innodb_mutexes_fill_table(
TABLE_LIST* tables, /*!< in/out: tables to fill */
Item* ) /*!< in: condition (not used) */
{
- ib_mutex_t* mutex;
rw_lock_t* lock;
- ulint block_mutex_oswait_count = 0;
ulint block_lock_oswait_count = 0;
- ib_mutex_t* block_mutex = NULL;
rw_lock_t* block_lock = NULL;
Field** fields = tables->table->field;
@@ -8806,56 +9043,51 @@ i_s_innodb_mutexes_fill_table(
DBUG_RETURN(0);
}
- {
- struct Locking
- {
- Locking() { mutex_enter(&mutex_list_mutex); }
- ~Locking() { mutex_exit(&mutex_list_mutex); }
- } locking;
+ // mutex_enter(&mutex_list_mutex);
- for (mutex = UT_LIST_GET_FIRST(mutex_list); mutex != NULL;
- mutex = UT_LIST_GET_NEXT(list, mutex)) {
- if (mutex->count_os_wait == 0) {
- continue;
- }
-
- if (buf_pool_is_block_mutex(mutex)) {
- block_mutex = mutex;
- block_mutex_oswait_count
- += mutex->count_os_wait;
- continue;
- }
+#ifdef JAN_TODO_FIXME
+ ib_mutex_t* mutex;
+ ulint block_mutex_oswait_count = 0;
+ ib_mutex_t* block_mutex = NULL;
+ for (mutex = UT_LIST_GET_FIRST(os_mutex_list); mutex != NULL;
+ mutex = UT_LIST_GET_NEXT(list, mutex)) {
+ if (mutex->count_os_wait == 0) {
+ continue;
+ }
- OK(field_store_string(fields[MUTEXES_NAME],
- mutex->cmutex_name));
- OK(field_store_string(
- fields[MUTEXES_CREATE_FILE],
- innobase_basename(mutex->cfile_name)));
- OK(field_store_ulint(fields[MUTEXES_CREATE_LINE],
- mutex->cline));
- OK(field_store_ulint(fields[MUTEXES_OS_WAITS],
- mutex->count_os_wait));
- OK(schema_table_store_record(thd, tables->table));
+ if (buf_pool_is_block_mutex(mutex)) {
+ block_mutex = mutex;
+ block_mutex_oswait_count += mutex->count_os_wait;
+ continue;
}
- if (block_mutex) {
- char buf1[IO_SIZE];
+ OK(field_store_string(fields[MUTEXES_NAME], mutex->cmutex_name));
+ OK(field_store_string(fields[MUTEXES_CREATE_FILE],
+ innobase_basename(mutex->cfile_name)));
+ OK(fields[MUTEXES_CREATE_LINE]->store(lock->cline, true));
+ fields[MUTEXES_CREATE_LINE]->set_notnull();
+ OK(fields[MUTEXES_OS_WAITS]->store(lock->count_os_wait, true));
+ fields[MUTEXES_OS_WAITS]->set_notnull();
+ OK(schema_table_store_record(thd, tables->table));
+ }
- my_snprintf(buf1, sizeof buf1, "combined %s",
- innobase_basename(block_mutex->cfile_name));
+ if (block_mutex) {
+ char buf1[IO_SIZE];
- OK(field_store_string(fields[MUTEXES_NAME],
- block_mutex->cmutex_name));
- OK(field_store_string(fields[MUTEXES_CREATE_FILE],
- buf1));
- OK(field_store_ulint(fields[MUTEXES_CREATE_LINE],
- block_mutex->cline));
- OK(field_store_ulint(fields[MUTEXES_OS_WAITS],
- block_mutex_oswait_count));
- OK(schema_table_store_record(thd, tables->table));
- }
+ snprintf(buf1, sizeof buf1, "combined %s",
+ innobase_basename(block_mutex->cfile_name));
+
+ OK(field_store_string(fields[MUTEXES_NAME], block_mutex->cmutex_name));
+ OK(field_store_string(fields[MUTEXES_CREATE_FILE], buf1));
+ OK(fields[MUTEXES_CREATE_LINE]->store(block_mutex->cline, true));
+ fields[MUTEXES_CREATE_LINE]->set_notnull();
+ OK(field_store_ulint(fields[MUTEXES_OS_WAITS], (longlong)block_mutex_oswait_count));
+ OK(schema_table_store_record(thd, tables->table));
}
+ mutex_exit(&mutex_list_mutex);
+#endif /* JAN_TODO_FIXME */
+
{
struct Locking
{
@@ -8863,6 +9095,8 @@ i_s_innodb_mutexes_fill_table(
~Locking() { mutex_exit(&rw_lock_list_mutex); }
} locking;
+ char lock_name[sizeof "buf0dump.cc:12345"];
+
for (lock = UT_LIST_GET_FIRST(rw_lock_list); lock != NULL;
lock = UT_LIST_GET_NEXT(list, lock)) {
if (lock->count_os_wait == 0) {
@@ -8875,32 +9109,41 @@ i_s_innodb_mutexes_fill_table(
continue;
}
+ const char* basename = innobase_basename(
+ lock->cfile_name);
+
+ snprintf(lock_name, sizeof lock_name, "%s:%u",
+ basename, lock->cline);
+
OK(field_store_string(fields[MUTEXES_NAME],
- lock->lock_name));
- OK(field_store_string(
- fields[MUTEXES_CREATE_FILE],
- innobase_basename(lock->cfile_name)));
- OK(field_store_ulint(fields[MUTEXES_CREATE_LINE],
- lock->cline));
- OK(field_store_ulint(fields[MUTEXES_OS_WAITS],
- lock->count_os_wait));
+ lock_name));
+ OK(field_store_string(fields[MUTEXES_CREATE_FILE],
+ basename));
+ OK(fields[MUTEXES_CREATE_LINE]->store(lock->cline,
+ true));
+ fields[MUTEXES_CREATE_LINE]->set_notnull();
+ OK(fields[MUTEXES_OS_WAITS]->store(lock->count_os_wait,
+ true));
+ fields[MUTEXES_OS_WAITS]->set_notnull();
OK(schema_table_store_record(thd, tables->table));
}
if (block_lock) {
char buf1[IO_SIZE];
- my_snprintf(buf1, sizeof buf1, "combined %s",
- innobase_basename(block_lock->cfile_name));
+ snprintf(buf1, sizeof buf1, "combined %s",
+ innobase_basename(block_lock->cfile_name));
OK(field_store_string(fields[MUTEXES_NAME],
- block_lock->lock_name));
+ "buf_block_t::lock"));
OK(field_store_string(fields[MUTEXES_CREATE_FILE],
buf1));
- OK(field_store_ulint(fields[MUTEXES_CREATE_LINE],
- block_lock->cline));
- OK(field_store_ulint(fields[MUTEXES_OS_WAITS],
- block_lock_oswait_count));
+ OK(fields[MUTEXES_CREATE_LINE]->store(block_lock->cline,
+ true));
+ fields[MUTEXES_CREATE_LINE]->set_notnull();
+ OK(fields[MUTEXES_OS_WAITS]->store(
+ block_lock_oswait_count, true));
+ fields[MUTEXES_OS_WAITS]->set_notnull();
OK(schema_table_store_record(thd, tables->table));
}
}
diff --git a/storage/innobase/handler/i_s.h b/storage/innobase/handler/i_s.h
index 9dc025fa649..ed4165bdeeb 100644
--- a/storage/innobase/handler/i_s.h
+++ b/storage/innobase/handler/i_s.h
@@ -1,7 +1,7 @@
/*****************************************************************************
-Copyright (c) 2007, 2013, Oracle and/or its affiliates. All Rights Reserved.
-Copyrigth (c) 2014, 2019, MariaDB Corporation.
+Copyright (c) 2007, 2015, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2014, 2019, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -61,6 +61,7 @@ extern struct st_maria_plugin i_s_innodb_sys_foreign_cols;
extern struct st_maria_plugin i_s_innodb_sys_tablespaces;
extern struct st_maria_plugin i_s_innodb_sys_datafiles;
extern struct st_maria_plugin i_s_innodb_mutexes;
+extern struct st_maria_plugin i_s_innodb_sys_virtual;
extern struct st_maria_plugin i_s_innodb_tablespaces_encryption;
extern struct st_maria_plugin i_s_innodb_tablespaces_scrubbing;
extern struct st_maria_plugin i_s_innodb_sys_semaphore_waits;
diff --git a/storage/innobase/ibuf/ibuf0ibuf.cc b/storage/innobase/ibuf/ibuf0ibuf.cc
index 74d73379fbb..32f9b20a1d0 100644
--- a/storage/innobase/ibuf/ibuf0ibuf.cc
+++ b/storage/innobase/ibuf/ibuf0ibuf.cc
@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 1997, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2016, 2019, MariaDB Corporation.
+Copyright (c) 2016, 2020 MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -25,9 +25,13 @@ Created 7/19/1997 Heikki Tuuri
*******************************************************/
#include "ibuf0ibuf.h"
+#include "sync0sync.h"
+#include "btr0sea.h"
+
+using st_::span;
#if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG
-UNIV_INTERN my_bool srv_ibuf_disable_background_merge;
+my_bool srv_ibuf_disable_background_merge;
#endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */
/** Number of bits describing a single page */
@@ -38,12 +42,6 @@ UNIV_INTERN my_bool srv_ibuf_disable_background_merge;
/** The start address for an insert buffer bitmap page bitmap */
#define IBUF_BITMAP PAGE_DATA
-#ifdef UNIV_NONINL
-#include "ibuf0ibuf.ic"
-#endif
-
-#ifndef UNIV_HOTBACKUP
-
#include "buf0buf.h"
#include "buf0rea.h"
#include "fsp0fsp.h"
@@ -54,14 +52,12 @@ UNIV_INTERN my_bool srv_ibuf_disable_background_merge;
#include "btr0pcur.h"
#include "btr0btr.h"
#include "row0upd.h"
-#include "sync0sync.h"
#include "dict0boot.h"
#include "fut0lst.h"
#include "lock0lock.h"
#include "log0recv.h"
#include "que0que.h"
#include "srv0start.h" /* srv_shutdown_state */
-#include "ha_prototypes.h"
#include "rem0cmp.h"
/* STRUCTURE OF AN INSERT BUFFER RECORD
@@ -190,58 +186,18 @@ level 2 i/o. However, if an OS thread does the i/o handling for itself, i.e.,
it uses synchronous aio, it can access any pages, as long as it obeys the
access order rules. */
-/** Table name for the insert buffer. */
-#define IBUF_TABLE_NAME "SYS_IBUF_TABLE"
-
/** Operations that can currently be buffered. */
-UNIV_INTERN ibuf_use_t ibuf_use = IBUF_USE_ALL;
+ibuf_use_t ibuf_use = IBUF_USE_ALL;
#if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG
+/** Dump the change buffer at startup */
+my_bool ibuf_dump;
/** Flag to control insert buffer debugging. */
-UNIV_INTERN uint ibuf_debug;
+uint ibuf_debug;
#endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */
/** The insert buffer control structure */
-UNIV_INTERN ibuf_t* ibuf = NULL;
-
-#ifdef UNIV_PFS_MUTEX
-UNIV_INTERN mysql_pfs_key_t ibuf_pessimistic_insert_mutex_key;
-UNIV_INTERN mysql_pfs_key_t ibuf_mutex_key;
-UNIV_INTERN mysql_pfs_key_t ibuf_bitmap_mutex_key;
-#endif /* UNIV_PFS_MUTEX */
-
-#ifdef UNIV_IBUF_COUNT_DEBUG
-/** Number of tablespaces in the ibuf_counts array */
-#define IBUF_COUNT_N_SPACES 4
-/** Number of pages within each tablespace in the ibuf_counts array */
-#define IBUF_COUNT_N_PAGES 130000
-
-/** Buffered entry counts for file pages, used in debugging */
-static ulint ibuf_counts[IBUF_COUNT_N_SPACES][IBUF_COUNT_N_PAGES];
-
-/******************************************************************//**
-Checks that the indexes to ibuf_counts[][] are within limits. */
-UNIV_INLINE
-void
-ibuf_count_check(
-/*=============*/
- ulint space_id, /*!< in: space identifier */
- ulint page_no) /*!< in: page number */
-{
- if (space_id < IBUF_COUNT_N_SPACES && page_no < IBUF_COUNT_N_PAGES) {
- return;
- }
-
- fprintf(stderr,
- "InnoDB: UNIV_IBUF_COUNT_DEBUG limits space_id and page_no\n"
- "InnoDB: and breaks crash recovery.\n"
- "InnoDB: space_id=%lu, should be 0<=space_id<%lu\n"
- "InnoDB: page_no=%lu, should be 0<=page_no<%lu\n",
- (ulint) space_id, (ulint) IBUF_COUNT_N_SPACES,
- (ulint) page_no, (ulint) IBUF_COUNT_N_PAGES);
- ut_error;
-}
-#endif
+ibuf_t* ibuf = NULL;
/** @name Offsets to the per-page bits in the insert buffer bitmap */
/* @{ */
@@ -300,31 +256,31 @@ static ib_mutex_t ibuf_mutex;
static ib_mutex_t ibuf_bitmap_mutex;
/** The area in pages from which contract looks for page numbers for merge */
-#define IBUF_MERGE_AREA 8UL
+const ulint IBUF_MERGE_AREA = 8;
/** Inside the merge area, pages which have at most 1 per this number less
buffered entries compared to maximum volume that can buffered for a single
page are merged along with the page whose buffer became full */
-#define IBUF_MERGE_THRESHOLD 4
+const ulint IBUF_MERGE_THRESHOLD = 4;
/** In ibuf_contract at most this number of pages is read to memory in one
batch, in order to merge the entries for them in the insert buffer */
-#define IBUF_MAX_N_PAGES_MERGED IBUF_MERGE_AREA
+const ulint IBUF_MAX_N_PAGES_MERGED = IBUF_MERGE_AREA;
/** If the combined size of the ibuf trees exceeds ibuf->max_size by this
many pages, we start to contract it in connection to inserts there, using
non-synchronous contract */
-#define IBUF_CONTRACT_ON_INSERT_NON_SYNC 0
+const ulint IBUF_CONTRACT_ON_INSERT_NON_SYNC = 0;
/** If the combined size of the ibuf trees exceeds ibuf->max_size by this
many pages, we start to contract it in connection to inserts there, using
synchronous contract */
-#define IBUF_CONTRACT_ON_INSERT_SYNC 5
+const ulint IBUF_CONTRACT_ON_INSERT_SYNC = 5;
/** If the combined size of the ibuf trees exceeds ibuf->max_size by
this many pages, we start to contract it synchronous contract, but do
not insert */
-#define IBUF_CONTRACT_DO_NOT_INSERT 10
+const ulint IBUF_CONTRACT_DO_NOT_INSERT = 10;
/* TODO: how to cope with drop table if there are records in the insert
buffer for the indexes of the table? Is there actually any problem,
@@ -341,8 +297,8 @@ ibuf_enter(
/*=======*/
mtr_t* mtr) /*!< in/out: mini-transaction */
{
- ut_ad(!mtr->inside_ibuf);
- mtr->inside_ibuf = TRUE;
+ ut_ad(!mtr->is_inside_ibuf());
+ mtr->enter_ibuf();
}
/******************************************************************//**
@@ -354,8 +310,8 @@ ibuf_exit(
/*======*/
mtr_t* mtr) /*!< in/out: mini-transaction */
{
- ut_ad(mtr->inside_ibuf);
- mtr->inside_ibuf = FALSE;
+ ut_ad(mtr->is_inside_ibuf());
+ mtr->exit_ibuf();
}
/**************************************************************//**
@@ -374,7 +330,7 @@ ibuf_btr_pcur_commit_specify_mtr(
/******************************************************************//**
Gets the ibuf header page and x-latches it.
-@return insert buffer header page */
+@return insert buffer header page */
static
page_t*
ibuf_header_page_get(
@@ -387,11 +343,11 @@ ibuf_header_page_get(
page_t* page = NULL;
block = buf_page_get(
- IBUF_SPACE_ID, 0, FSP_IBUF_HEADER_PAGE_NO, RW_X_LATCH, mtr);
+ page_id_t(IBUF_SPACE_ID, FSP_IBUF_HEADER_PAGE_NO),
+ univ_page_size, RW_X_LATCH, mtr);
- if (!block->page.encrypted) {
+ if (block) {
buf_block_dbg_add_level(block, SYNC_IBUF_HEADER);
-
page = buf_block_get_frame(block);
}
@@ -399,8 +355,8 @@ ibuf_header_page_get(
}
/******************************************************************//**
-Gets the root page and x-latches it.
-@return insert buffer tree root page */
+Gets the root page and sx-latches it.
+@return insert buffer tree root page */
static
page_t*
ibuf_tree_root_get(
@@ -413,10 +369,12 @@ ibuf_tree_root_get(
ut_ad(ibuf_inside(mtr));
ut_ad(mutex_own(&ibuf_mutex));
- mtr_x_lock(dict_index_get_lock(ibuf->index), mtr);
+ mtr_sx_lock(dict_index_get_lock(ibuf->index), mtr);
+ /* only segment list access is exclusive each other */
block = buf_page_get(
- IBUF_SPACE_ID, 0, FSP_IBUF_TREE_ROOT_PAGE_NO, RW_X_LATCH, mtr);
+ page_id_t(IBUF_SPACE_ID, FSP_IBUF_TREE_ROOT_PAGE_NO),
+ univ_page_size, RW_SX_LATCH, mtr);
buf_block_dbg_add_level(block, SYNC_IBUF_TREE_NODE_NEW);
@@ -429,58 +387,24 @@ ibuf_tree_root_get(
return(root);
}
-#ifdef UNIV_IBUF_COUNT_DEBUG
-/******************************************************************//**
-Gets the ibuf count for a given page.
-@return number of entries in the insert buffer currently buffered for
-this page */
-UNIV_INTERN
-ulint
-ibuf_count_get(
-/*===========*/
- ulint space, /*!< in: space id */
- ulint page_no)/*!< in: page number */
-{
- ibuf_count_check(space, page_no);
-
- return(ibuf_counts[space][page_no]);
-}
-
-/******************************************************************//**
-Sets the ibuf count for a given page. */
-static
-void
-ibuf_count_set(
-/*===========*/
- ulint space, /*!< in: space id */
- ulint page_no,/*!< in: page number */
- ulint val) /*!< in: value to set */
-{
- ibuf_count_check(space, page_no);
- ut_a(val < UNIV_PAGE_SIZE);
-
- ibuf_counts[space][page_no] = val;
-}
-#endif
-
/******************************************************************//**
Closes insert buffer and frees the data structures. */
-UNIV_INTERN
void
ibuf_close(void)
/*============*/
{
mutex_free(&ibuf_pessimistic_insert_mutex);
- memset(&ibuf_pessimistic_insert_mutex,
- 0x0, sizeof(ibuf_pessimistic_insert_mutex));
mutex_free(&ibuf_mutex);
- memset(&ibuf_mutex, 0x0, sizeof(ibuf_mutex));
mutex_free(&ibuf_bitmap_mutex);
- memset(&ibuf_bitmap_mutex, 0x0, sizeof(ibuf_mutex));
- mem_free(ibuf);
+ dict_table_t* ibuf_table = ibuf->index->table;
+ rw_lock_free(&ibuf->index->lock);
+ dict_mem_index_free(ibuf->index);
+ dict_mem_table_free(ibuf_table);
+
+ ut_free(ibuf);
ibuf = NULL;
}
@@ -491,15 +415,14 @@ static
void
ibuf_size_update(
/*=============*/
- const page_t* root, /*!< in: ibuf tree root */
- mtr_t* mtr) /*!< in: mtr */
+ const page_t* root) /*!< in: ibuf tree root */
{
ut_ad(mutex_own(&ibuf_mutex));
ibuf->free_list_len = flst_get_len(root + PAGE_HEADER
- + PAGE_BTR_IBUF_FREE_LIST, mtr);
+ + PAGE_BTR_IBUF_FREE_LIST);
- ibuf->height = 1 + btr_page_get_level(root, mtr);
+ ibuf->height = 1 + btr_page_get_level_low(root);
/* the '1 +' is the ibuf header page */
ibuf->size = ibuf->seg_size - (1 + ibuf->free_list_len);
@@ -509,21 +432,17 @@ ibuf_size_update(
Creates the insert buffer data structure at a database startup and initializes
the data structures for the insert buffer.
@return DB_SUCCESS or failure */
-UNIV_INTERN
dberr_t
ibuf_init_at_db_start(void)
/*=======================*/
{
page_t* root;
mtr_t mtr;
- dict_table_t* table;
- mem_heap_t* heap;
- dict_index_t* index;
ulint n_used;
page_t* header_page;
dberr_t error= DB_SUCCESS;
- ibuf = static_cast<ibuf_t*>(mem_zalloc(sizeof(ibuf_t)));
+ ibuf = static_cast<ibuf_t*>(ut_zalloc_nokey(sizeof(ibuf_t)));
/* At startup we intialize ibuf to have a maximum of
CHANGE_BUFFER_DEFAULT_SIZE in terms of percentage of the
@@ -533,21 +452,18 @@ ibuf_init_at_db_start(void)
ibuf->max_size = ((buf_pool_get_curr_size() / UNIV_PAGE_SIZE)
* CHANGE_BUFFER_DEFAULT_SIZE) / 100;
- mutex_create(ibuf_pessimistic_insert_mutex_key,
- &ibuf_pessimistic_insert_mutex,
- SYNC_IBUF_PESS_INSERT_MUTEX);
+ mutex_create(LATCH_ID_IBUF, &ibuf_mutex);
- mutex_create(ibuf_mutex_key,
- &ibuf_mutex, SYNC_IBUF_MUTEX);
+ mutex_create(LATCH_ID_IBUF_BITMAP, &ibuf_bitmap_mutex);
- mutex_create(ibuf_bitmap_mutex_key,
- &ibuf_bitmap_mutex, SYNC_IBUF_BITMAP_MUTEX);
+ mutex_create(LATCH_ID_IBUF_PESSIMISTIC_INSERT,
+ &ibuf_pessimistic_insert_mutex);
mtr_start(&mtr);
- mutex_enter(&ibuf_mutex);
+ mtr_x_lock_space(IBUF_SPACE_ID, &mtr);
- mtr_x_lock(fil_space_get_latch(IBUF_SPACE_ID, NULL), &mtr);
+ mutex_enter(&ibuf_mutex);
header_page = ibuf_header_page_get(&mtr);
@@ -557,7 +473,6 @@ ibuf_init_at_db_start(void)
fseg_n_reserved_pages(header_page + IBUF_HEADER + IBUF_TREE_SEG_HEADER,
&n_used, &mtr);
- ibuf_enter(&mtr);
ut_ad(n_used >= 2);
@@ -567,50 +482,58 @@ ibuf_init_at_db_start(void)
buf_block_t* block;
block = buf_page_get(
- IBUF_SPACE_ID, 0, FSP_IBUF_TREE_ROOT_PAGE_NO,
- RW_X_LATCH, &mtr);
+ page_id_t(IBUF_SPACE_ID, FSP_IBUF_TREE_ROOT_PAGE_NO),
+ univ_page_size, RW_X_LATCH, &mtr);
+
buf_block_dbg_add_level(block, SYNC_IBUF_TREE_NODE);
root = buf_block_get_frame(block);
}
- ibuf_size_update(root, &mtr);
+ ibuf_size_update(root);
mutex_exit(&ibuf_mutex);
ibuf->empty = page_is_empty(root);
- ibuf_mtr_commit(&mtr);
+ mtr.commit();
+
+ ibuf->index = dict_mem_index_create(
+ "innodb_change_buffer", "CLUST_IND",
+ IBUF_SPACE_ID, DICT_CLUSTERED | DICT_IBUF, 1);
+ ibuf->index->id = DICT_IBUF_ID_MIN + IBUF_SPACE_ID;
+ ibuf->index->table = dict_mem_table_create(
+ "innodb_change_buffer", IBUF_SPACE_ID, 1, 0, 0, 0);
+ ibuf->index->n_uniq = REC_MAX_N_FIELDS;
+ rw_lock_create(index_tree_rw_lock_key, &ibuf->index->lock,
+ SYNC_IBUF_INDEX_TREE);
+#ifdef BTR_CUR_ADAPT
+ ibuf->index->search_info = btr_search_info_create(ibuf->index->heap);
+#endif /* BTR_CUR_ADAPT */
+ ibuf->index->page = FSP_IBUF_TREE_ROOT_PAGE_NO;
+ ut_d(ibuf->index->cached = TRUE);
- heap = mem_heap_create(450);
-
- /* Use old-style record format for the insert buffer. */
- table = dict_mem_table_create(IBUF_TABLE_NAME, IBUF_SPACE_ID, 1, 0, 0);
-
- dict_mem_table_add_col(table, heap, "DUMMY_COLUMN", DATA_BINARY, 0, 0);
-
- table->id = DICT_IBUF_ID_MIN + IBUF_SPACE_ID;
-
- dict_table_add_to_cache(table, FALSE, heap);
- mem_heap_free(heap);
-
- index = dict_mem_index_create(
- IBUF_TABLE_NAME, "CLUST_IND",
- IBUF_SPACE_ID, DICT_CLUSTERED | DICT_UNIVERSAL | DICT_IBUF, 1);
-
- dict_mem_index_add_field(index, "DUMMY_COLUMN", 0);
-
- index->id = DICT_IBUF_ID_MIN + IBUF_SPACE_ID;
-
- error = dict_index_add_to_cache(table, index,
- FSP_IBUF_TREE_ROOT_PAGE_NO, FALSE);
- ut_a(error == DB_SUCCESS);
+#if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG
+ if (!ibuf_dump) {
+ return error;
+ }
+ ib::info() << "Dumping the change buffer";
+ ibuf_mtr_start(&mtr);
+ btr_pcur_t pcur;
+ if (DB_SUCCESS == btr_pcur_open_at_index_side(
+ true, ibuf->index, BTR_SEARCH_LEAF, &pcur,
+ true, 0, &mtr)) {
+ while (btr_pcur_move_to_next_user_rec(&pcur, &mtr)) {
+ rec_print_old(stderr, btr_pcur_get_rec(&pcur));
+ }
+ }
+ ibuf_mtr_commit(&mtr);
+ ib::info() << "Dumped the change buffer";
+#endif
- ibuf->index = dict_table_get_first_index(table);
return (error);
}
/*********************************************************************//**
Updates the max_size value for ibuf. */
-UNIV_INTERN
void
ibuf_max_size_update(
/*=================*/
@@ -625,10 +548,8 @@ ibuf_max_size_update(
}
-#endif /* !UNIV_HOTBACKUP */
/*********************************************************************//**
Initializes an ibuf bitmap page. */
-UNIV_INTERN
void
ibuf_bitmap_page_init(
/*==================*/
@@ -637,35 +558,24 @@ ibuf_bitmap_page_init(
{
page_t* page;
ulint byte_offset;
- ulint zip_size = buf_block_get_zip_size(block);
-
- ut_a(ut_is_2pow(zip_size));
page = buf_block_get_frame(block);
fil_page_set_type(page, FIL_PAGE_IBUF_BITMAP);
/* Write all zeros to the bitmap */
- if (!zip_size) {
- byte_offset = UT_BITS_IN_BYTES(UNIV_PAGE_SIZE
- * IBUF_BITS_PER_PAGE);
- } else {
- byte_offset = UT_BITS_IN_BYTES(zip_size * IBUF_BITS_PER_PAGE);
- }
+ byte_offset = UT_BITS_IN_BYTES(block->page.size.physical()
+ * IBUF_BITS_PER_PAGE);
memset(page + IBUF_BITMAP, 0, byte_offset);
/* The remaining area (up to the page trailer) is uninitialized. */
-
-#ifndef UNIV_HOTBACKUP
mlog_write_initial_log_record(page, MLOG_IBUF_BITMAP_INIT, mtr);
-#endif /* !UNIV_HOTBACKUP */
}
/*********************************************************************//**
Parses a redo log record of an ibuf bitmap page init.
-@return end of log record or NULL */
-UNIV_INTERN
+@return end of log record or NULL */
byte*
ibuf_parse_bitmap_init(
/*===================*/
@@ -674,7 +584,8 @@ ibuf_parse_bitmap_init(
buf_block_t* block, /*!< in: block or NULL */
mtr_t* mtr) /*!< in: mtr or NULL */
{
- ut_ad(ptr && end_ptr);
+ ut_ad(ptr != NULL);
+ ut_ad(end_ptr != NULL);
if (block) {
ibuf_bitmap_page_init(block, mtr);
@@ -682,50 +593,52 @@ ibuf_parse_bitmap_init(
return(ptr);
}
-#ifndef UNIV_HOTBACKUP
+
# ifdef UNIV_DEBUG
/** Gets the desired bits for a given page from a bitmap page.
-@param page in: bitmap page
-@param offset in: page whose bits to get
-@param zs in: compressed page size in bytes; 0 for uncompressed pages
-@param bit in: IBUF_BITMAP_FREE, IBUF_BITMAP_BUFFERED, ...
-@param mtr in: mini-transaction holding an x-latch on the bitmap page
-@return value of bits */
-# define ibuf_bitmap_page_get_bits(page, offset, zs, bit, mtr) \
- ibuf_bitmap_page_get_bits_low(page, offset, zs, \
+@param[in] page bitmap page
+@param[in] page_id page id whose bits to get
+@param[in] page_size page id whose bits to get
+@param[in] bit IBUF_BITMAP_FREE, IBUF_BITMAP_BUFFERED, ...
+@param[in,out] mtr mini-transaction holding an x-latch on the
+bitmap page
+@return value of bits */
+# define ibuf_bitmap_page_get_bits(page, page_id, page_size, bit, mtr) \
+ ibuf_bitmap_page_get_bits_low(page, page_id, page_size, \
MTR_MEMO_PAGE_X_FIX, mtr, bit)
# else /* UNIV_DEBUG */
/** Gets the desired bits for a given page from a bitmap page.
-@param page in: bitmap page
-@param offset in: page whose bits to get
-@param zs in: compressed page size in bytes; 0 for uncompressed pages
-@param bit in: IBUF_BITMAP_FREE, IBUF_BITMAP_BUFFERED, ...
-@param mtr in: mini-transaction holding an x-latch on the bitmap page
-@return value of bits */
-# define ibuf_bitmap_page_get_bits(page, offset, zs, bit, mtr) \
- ibuf_bitmap_page_get_bits_low(page, offset, zs, bit)
+@param[in] page bitmap page
+@param[in] page_id page id whose bits to get
+@param[in] page_size page id whose bits to get
+@param[in] bit IBUF_BITMAP_FREE, IBUF_BITMAP_BUFFERED, ...
+@param[in,out] mtr mini-transaction holding an x-latch on the
+bitmap page
+@return value of bits */
+# define ibuf_bitmap_page_get_bits(page, page_id, page_size, bit, mtr) \
+ ibuf_bitmap_page_get_bits_low(page, page_id, page_size, bit)
# endif /* UNIV_DEBUG */
-/********************************************************************//**
-Gets the desired bits for a given page from a bitmap page.
-@return value of bits */
+/** Gets the desired bits for a given page from a bitmap page.
+@param[in] page bitmap page
+@param[in] page_id page id whose bits to get
+@param[in] page_size page size
+@param[in] latch_type MTR_MEMO_PAGE_X_FIX, MTR_MEMO_BUF_FIX, ...
+@param[in,out] mtr mini-transaction holding latch_type on the
+bitmap page
+@param[in] bit IBUF_BITMAP_FREE, IBUF_BITMAP_BUFFERED, ...
+@return value of bits */
UNIV_INLINE
ulint
ibuf_bitmap_page_get_bits_low(
-/*==========================*/
- const page_t* page, /*!< in: bitmap page */
- ulint page_no,/*!< in: page whose bits to get */
- ulint zip_size,/*!< in: compressed page size in bytes;
- 0 for uncompressed pages */
+ const page_t* page,
+ const page_id_t page_id,
+ const page_size_t& page_size,
#ifdef UNIV_DEBUG
- ulint latch_type,
- /*!< in: MTR_MEMO_PAGE_X_FIX,
- MTR_MEMO_BUF_FIX, ... */
- mtr_t* mtr, /*!< in: mini-transaction holding latch_type
- on the bitmap page */
+ ulint latch_type,
+ mtr_t* mtr,
#endif /* UNIV_DEBUG */
- ulint bit) /*!< in: IBUF_BITMAP_FREE,
- IBUF_BITMAP_BUFFERED, ... */
+ ulint bit)
{
ulint byte_offset;
ulint bit_offset;
@@ -736,16 +649,10 @@ ibuf_bitmap_page_get_bits_low(
#if IBUF_BITS_PER_PAGE % 2
# error "IBUF_BITS_PER_PAGE % 2 != 0"
#endif
- ut_ad(ut_is_2pow(zip_size));
ut_ad(mtr_memo_contains_page(mtr, page, latch_type));
- if (!zip_size) {
- bit_offset = (page_no % UNIV_PAGE_SIZE) * IBUF_BITS_PER_PAGE
- + bit;
- } else {
- bit_offset = (page_no & (zip_size - 1)) * IBUF_BITS_PER_PAGE
- + bit;
- }
+ bit_offset = (page_id.page_no() % page_size.physical())
+ * IBUF_BITS_PER_PAGE + bit;
byte_offset = bit_offset / 8;
bit_offset = bit_offset % 8;
@@ -765,19 +672,22 @@ ibuf_bitmap_page_get_bits_low(
return(value);
}
-/********************************************************************//**
-Sets the desired bit for a given page in a bitmap page. */
+/** Sets the desired bit for a given page in a bitmap page.
+@param[in,out] page bitmap page
+@param[in] page_id page id whose bits to set
+@param[in] page_size page size
+@param[in] bit IBUF_BITMAP_FREE, IBUF_BITMAP_BUFFERED, ...
+@param[in] val value to set
+@param[in,out] mtr mtr containing an x-latch to the bitmap page */
static
void
ibuf_bitmap_page_set_bits(
-/*======================*/
- page_t* page, /*!< in: bitmap page */
- ulint page_no,/*!< in: page whose bits to set */
- ulint zip_size,/*!< in: compressed page size in bytes;
- 0 for uncompressed pages */
- ulint bit, /*!< in: IBUF_BITMAP_FREE, IBUF_BITMAP_BUFFERED, ... */
- ulint val, /*!< in: value to set */
- mtr_t* mtr) /*!< in: mtr containing an x-latch to the bitmap page */
+ page_t* page,
+ const page_id_t page_id,
+ const page_size_t& page_size,
+ ulint bit,
+ ulint val,
+ mtr_t* mtr)
{
ulint byte_offset;
ulint bit_offset;
@@ -787,20 +697,11 @@ ibuf_bitmap_page_set_bits(
#if IBUF_BITS_PER_PAGE % 2
# error "IBUF_BITS_PER_PAGE % 2 != 0"
#endif
- ut_ad(ut_is_2pow(zip_size));
ut_ad(mtr_memo_contains_page(mtr, page, MTR_MEMO_PAGE_X_FIX));
-#ifdef UNIV_IBUF_COUNT_DEBUG
- ut_a((bit != IBUF_BITMAP_BUFFERED) || (val != FALSE)
- || (0 == ibuf_count_get(page_get_space_id(page),
- page_no)));
-#endif
- if (!zip_size) {
- bit_offset = (page_no % UNIV_PAGE_SIZE) * IBUF_BITS_PER_PAGE
- + bit;
- } else {
- bit_offset = (page_no & (zip_size - 1)) * IBUF_BITS_PER_PAGE
- + bit;
- }
+ ut_ad(mtr->is_named_space(page_id.space()));
+
+ bit_offset = (page_id.page_no() % page_size.physical())
+ * IBUF_BITS_PER_PAGE + bit;
byte_offset = bit_offset / 8;
bit_offset = bit_offset % 8;
@@ -824,75 +725,70 @@ ibuf_bitmap_page_set_bits(
MLOG_1BYTE, mtr);
}
-/********************************************************************//**
-Calculates the bitmap page number for a given page number.
-@return the bitmap page number where the file page is mapped */
+/** Calculates the bitmap page number for a given page number.
+@param[in] page_id page id
+@param[in] page_size page size
+@return the bitmap page id where the file page is mapped */
UNIV_INLINE
-ulint
+const page_id_t
ibuf_bitmap_page_no_calc(
-/*=====================*/
- ulint zip_size, /*!< in: compressed page size in bytes;
- 0 for uncompressed pages */
- ulint page_no) /*!< in: tablespace page number */
+ const page_id_t page_id,
+ const page_size_t& page_size)
{
- ut_ad(ut_is_2pow(zip_size));
+ ulint bitmap_page_no;
- if (!zip_size) {
- return(FSP_IBUF_BITMAP_OFFSET
- + (page_no & ~(UNIV_PAGE_SIZE - 1)));
- } else {
- return(FSP_IBUF_BITMAP_OFFSET
- + (page_no & ~(zip_size - 1)));
- }
+ bitmap_page_no = FSP_IBUF_BITMAP_OFFSET
+ + (page_id.page_no() & ~(page_size.physical() - 1));
+
+ return(page_id_t(page_id.space(), bitmap_page_no));
}
-/********************************************************************//**
-Gets the ibuf bitmap page where the bits describing a given file page are
+/** Gets the ibuf bitmap page where the bits describing a given file page are
stored.
+@param[in] page_id page id of the file page
+@param[in] page_size page size of the file page
+@param[in] file file name
+@param[in] line line where called
+@param[in,out] mtr mini-transaction
@return bitmap page where the file page is mapped, that is, the bitmap
page containing the descriptor bits for the file page; the bitmap page
is x-latched */
static
page_t*
ibuf_bitmap_get_map_page_func(
-/*==========================*/
- ulint space, /*!< in: space id of the file page */
- ulint page_no,/*!< in: page number of the file page */
- ulint zip_size,/*!< in: compressed page size in bytes;
- 0 for uncompressed pages */
- const char* file, /*!< in: file name */
- ulint line, /*!< in: line where called */
- mtr_t* mtr) /*!< in: mtr */
+ const page_id_t page_id,
+ const page_size_t& page_size,
+ const char* file,
+ unsigned line,
+ mtr_t* mtr)
{
buf_block_t* block = NULL;
dberr_t err = DB_SUCCESS;
- block = buf_page_get_gen(space, zip_size,
- ibuf_bitmap_page_no_calc(zip_size, page_no),
- RW_X_LATCH, NULL, BUF_GET,
+ block = buf_page_get_gen(ibuf_bitmap_page_no_calc(page_id, page_size),
+ page_size, RW_X_LATCH, NULL, BUF_GET,
file, line, mtr, &err);
if (err != DB_SUCCESS) {
return NULL;
}
+
buf_block_dbg_add_level(block, SYNC_IBUF_BITMAP);
return(buf_block_get_frame(block));
}
-/********************************************************************//**
-Gets the ibuf bitmap page where the bits describing a given file page are
+/** Gets the ibuf bitmap page where the bits describing a given file page are
stored.
+@param[in] page_id page id of the file page
+@param[in] page_size page size of the file page
+@param[in,out] mtr mini-transaction
@return bitmap page where the file page is mapped, that is, the bitmap
page containing the descriptor bits for the file page; the bitmap page
-is x-latched
-@param space in: space id of the file page
-@param page_no in: page number of the file page
-@param zip_size in: compressed page size in bytes; 0 for uncompressed pages
-@param mtr in: mini-transaction */
-#define ibuf_bitmap_get_map_page(space, page_no, zip_size, mtr) \
- ibuf_bitmap_get_map_page_func(space, page_no, zip_size, \
+is x-latched */
+#define ibuf_bitmap_get_map_page(page_id, page_size, mtr) \
+ ibuf_bitmap_get_map_page_func(page_id, page_size, \
__FILE__, __LINE__, mtr)
/************************************************************************//**
@@ -904,8 +800,6 @@ UNIV_INLINE
void
ibuf_set_free_bits_low(
/*===================*/
- ulint zip_size,/*!< in: compressed page size in bytes;
- 0 for uncompressed pages */
const buf_block_t* block, /*!< in: index page; free bits are set if
the index is non-clustered and page
level is 0 */
@@ -913,10 +807,10 @@ ibuf_set_free_bits_low(
mtr_t* mtr) /*!< in/out: mtr */
{
page_t* bitmap_page;
- ulint space;
- ulint page_no;
buf_frame_t* frame;
+ ut_ad(mtr->is_named_space(block->page.id.space()));
+
if (!block) {
return;
}
@@ -927,21 +821,16 @@ ibuf_set_free_bits_low(
return;
}
- space = buf_block_get_space(block);
- page_no = buf_block_get_page_no(block);
- bitmap_page = ibuf_bitmap_get_map_page(space, page_no, zip_size, mtr);
-#ifdef UNIV_IBUF_DEBUG
-# if 0
- fprintf(stderr,
- "Setting space %lu page %lu free bits to %lu should be %lu\n",
- space, page_no, val,
- ibuf_index_page_calc_free(zip_size, block));
-# endif
+ bitmap_page = ibuf_bitmap_get_map_page(block->page.id,
+ block->page.size, mtr);
- ut_a(val <= ibuf_index_page_calc_free(zip_size, block));
+#ifdef UNIV_IBUF_DEBUG
+ ut_a(val <= ibuf_index_page_calc_free(block));
#endif /* UNIV_IBUF_DEBUG */
- ibuf_bitmap_page_set_bits(bitmap_page, page_no, zip_size,
- IBUF_BITMAP_FREE, val, mtr);
+
+ ibuf_bitmap_page_set_bits(
+ bitmap_page, block->page.id, block->page.size,
+ IBUF_BITMAP_FREE, val, mtr);
}
/************************************************************************//**
@@ -949,7 +838,6 @@ Sets the free bit of the page in the ibuf bitmap. This is done in a separate
mini-transaction, hence this operation does not restrict further work to only
ibuf bitmap operations, which would result if the latch to the bitmap page
were kept. */
-UNIV_INTERN
void
ibuf_set_free_bits_func(
/*====================*/
@@ -965,9 +853,6 @@ ibuf_set_free_bits_func(
mtr_t mtr;
page_t* page;
page_t* bitmap_page;
- ulint space;
- ulint page_no;
- ulint zip_size;
page = buf_block_get_frame(block);
@@ -977,18 +862,32 @@ ibuf_set_free_bits_func(
}
mtr_start(&mtr);
+ const fil_space_t* space = mtr.set_named_space(block->page.id.space());
- space = buf_block_get_space(block);
- page_no = buf_block_get_page_no(block);
- zip_size = buf_block_get_zip_size(block);
- bitmap_page = ibuf_bitmap_get_map_page(space, page_no, zip_size, &mtr);
+ bitmap_page = ibuf_bitmap_get_map_page(block->page.id,
+ block->page.size, &mtr);
+
+ switch (space->purpose) {
+ case FIL_TYPE_LOG:
+ ut_ad(0);
+ break;
+ case FIL_TYPE_TABLESPACE:
+ /* Avoid logging while fixing up truncate of table. */
+ if (!srv_is_tablespace_truncated(block->page.id.space())) {
+ break;
+ }
+ /* fall through */
+ case FIL_TYPE_TEMPORARY:
+ case FIL_TYPE_IMPORT:
+ mtr_set_log_mode(&mtr, MTR_LOG_NO_REDO);
+ }
#ifdef UNIV_IBUF_DEBUG
if (max_val != ULINT_UNDEFINED) {
ulint old_val;
old_val = ibuf_bitmap_page_get_bits(
- bitmap_page, page_no, zip_size,
+ bitmap_page, block->page.id,
IBUF_BITMAP_FREE, &mtr);
# if 0
if (old_val != max_val) {
@@ -1004,13 +903,16 @@ ibuf_set_free_bits_func(
# if 0
fprintf(stderr, "Setting page no %lu free bits to %lu should be %lu\n",
page_get_page_no(page), val,
- ibuf_index_page_calc_free(zip_size, block));
+ ibuf_index_page_calc_free(block));
# endif
- ut_a(val <= ibuf_index_page_calc_free(zip_size, block));
+ ut_a(val <= ibuf_index_page_calc_free(block));
#endif /* UNIV_IBUF_DEBUG */
- ibuf_bitmap_page_set_bits(bitmap_page, page_no, zip_size,
- IBUF_BITMAP_FREE, val, &mtr);
+
+ ibuf_bitmap_page_set_bits(
+ bitmap_page, block->page.id, block->page.size,
+ IBUF_BITMAP_FREE, val, &mtr);
+
mtr_commit(&mtr);
}
@@ -1023,7 +925,6 @@ buffer bitmap must never exceed the free space on a page. It is safe
to decrement or reset the bits in the bitmap in a mini-transaction
that is committed before the mini-transaction that affects the free
space. */
-UNIV_INTERN
void
ibuf_reset_free_bits(
/*=================*/
@@ -1042,7 +943,6 @@ thread until mtr is committed. NOTE: The free bits in the insert
buffer bitmap must never exceed the free space on a page. It is safe
to set the free bits in the same mini-transaction that updated the
page. */
-UNIV_INTERN
void
ibuf_update_free_bits_low(
/*======================*/
@@ -1058,17 +958,19 @@ ibuf_update_free_bits_low(
ulint after;
ut_a(!buf_block_get_page_zip(block));
+ ut_ad(mtr->is_named_space(block->page.id.space()));
- before = ibuf_index_page_calc_free_bits(0, max_ins_size);
+ before = ibuf_index_page_calc_free_bits(block->page.size.logical(),
+ max_ins_size);
- after = ibuf_index_page_calc_free(0, block);
+ after = ibuf_index_page_calc_free(block);
/* This approach cannot be used on compressed pages, since the
computed value of "before" often does not match the current
state of the bitmap. This is because the free space may
increase or decrease when a compressed page is reorganized. */
if (before != after) {
- ibuf_set_free_bits_low(0, block, after, mtr);
+ ibuf_set_free_bits_low(block, after, mtr);
}
}
@@ -1080,7 +982,6 @@ thread until mtr is committed. NOTE: The free bits in the insert
buffer bitmap must never exceed the free space on a page. It is safe
to set the free bits in the same mini-transaction that updated the
page. */
-UNIV_INTERN
void
ibuf_update_free_bits_zip(
/*======================*/
@@ -1088,25 +989,18 @@ ibuf_update_free_bits_zip(
mtr_t* mtr) /*!< in/out: mtr */
{
page_t* bitmap_page;
- ulint space;
- ulint page_no;
- ulint zip_size;
ulint after;
- space = buf_block_get_space(block);
- page_no = buf_block_get_page_no(block);
- zip_size = buf_block_get_zip_size(block);
-
ut_a(block);
-
buf_frame_t* frame = buf_block_get_frame(block);
+ ut_a(frame);
+ ut_a(page_is_leaf(frame));
+ ut_a(block->page.size.is_compressed());
- ut_a(frame && page_is_leaf(frame));
- ut_a(zip_size);
-
- bitmap_page = ibuf_bitmap_get_map_page(space, page_no, zip_size, mtr);
+ bitmap_page = ibuf_bitmap_get_map_page(block->page.id,
+ block->page.size, mtr);
- after = ibuf_index_page_calc_free_zip(zip_size, block);
+ after = ibuf_index_page_calc_free_zip(block);
if (after == 0) {
/* We move the page to the front of the buffer pool LRU list:
@@ -1117,8 +1011,9 @@ ibuf_update_free_bits_zip(
buf_page_make_young(&block->page);
}
- ibuf_bitmap_page_set_bits(bitmap_page, page_no, zip_size,
- IBUF_BITMAP_FREE, after, mtr);
+ ibuf_bitmap_page_set_bits(
+ bitmap_page, block->page.id, block->page.size,
+ IBUF_BITMAP_FREE, after, mtr);
}
/**********************************************************************//**
@@ -1128,73 +1023,72 @@ virtually prevent any further operations until mtr is committed.
NOTE: The free bits in the insert buffer bitmap must never exceed the
free space on a page. It is safe to set the free bits in the same
mini-transaction that updated the pages. */
-UNIV_INTERN
void
ibuf_update_free_bits_for_two_pages_low(
/*====================================*/
- ulint zip_size,/*!< in: compressed page size in bytes;
- 0 for uncompressed pages */
buf_block_t* block1, /*!< in: index page */
buf_block_t* block2, /*!< in: index page */
mtr_t* mtr) /*!< in: mtr */
{
ulint state;
+ ut_ad(mtr->is_named_space(block1->page.id.space()));
+ ut_ad(block1->page.id.space() == block2->page.id.space());
+
/* As we have to x-latch two random bitmap pages, we have to acquire
the bitmap mutex to prevent a deadlock with a similar operation
performed by another OS thread. */
mutex_enter(&ibuf_bitmap_mutex);
- state = ibuf_index_page_calc_free(zip_size, block1);
+ state = ibuf_index_page_calc_free(block1);
- ibuf_set_free_bits_low(zip_size, block1, state, mtr);
+ ibuf_set_free_bits_low(block1, state, mtr);
- state = ibuf_index_page_calc_free(zip_size, block2);
+ state = ibuf_index_page_calc_free(block2);
- ibuf_set_free_bits_low(zip_size, block2, state, mtr);
+ ibuf_set_free_bits_low(block2, state, mtr);
mutex_exit(&ibuf_bitmap_mutex);
}
-/**********************************************************************//**
-Returns TRUE if the page is one of the fixed address ibuf pages.
-@return TRUE if a fixed address ibuf i/o page */
+/** Returns TRUE if the page is one of the fixed address ibuf pages.
+@param[in] page_id page id
+@param[in] page_size page size
+@return TRUE if a fixed address ibuf i/o page */
UNIV_INLINE
ibool
ibuf_fixed_addr_page(
-/*=================*/
- ulint space, /*!< in: space id */
- ulint zip_size,/*!< in: compressed page size in bytes;
- 0 for uncompressed pages */
- ulint page_no)/*!< in: page number */
+ const page_id_t page_id,
+ const page_size_t& page_size)
{
- return((space == IBUF_SPACE_ID && page_no == IBUF_TREE_ROOT_PAGE_NO)
- || ibuf_bitmap_page(zip_size, page_no));
+ return((page_id.space() == IBUF_SPACE_ID
+ && page_id.page_no() == IBUF_TREE_ROOT_PAGE_NO)
+ || ibuf_bitmap_page(page_id, page_size));
}
-/***********************************************************************//**
-Checks if a page is a level 2 or 3 page in the ibuf hierarchy of pages.
-Must not be called when recv_no_ibuf_operations==TRUE.
-@return TRUE if level 2 or level 3 page */
-UNIV_INTERN
+/** Checks if a page is a level 2 or 3 page in the ibuf hierarchy of pages.
+Must not be called when recv_no_ibuf_operations==true.
+@param[in] page_id page id
+@param[in] page_size page size
+@param[in] x_latch FALSE if relaxed check (avoid latching the
+bitmap page)
+@param[in] file file name
+@param[in] line line where called
+@param[in,out] mtr mtr which will contain an x-latch to the
+bitmap page if the page is not one of the fixed address ibuf pages, or NULL,
+in which case a new transaction is created.
+@return TRUE if level 2 or level 3 page */
ibool
ibuf_page_low(
-/*==========*/
- ulint space, /*!< in: space id */
- ulint zip_size,/*!< in: compressed page size in bytes, or 0 */
- ulint page_no,/*!< in: page number */
+ const page_id_t page_id,
+ const page_size_t& page_size,
#ifdef UNIV_DEBUG
- ibool x_latch,/*!< in: FALSE if relaxed check
- (avoid latching the bitmap page) */
+ ibool x_latch,
#endif /* UNIV_DEBUG */
- const char* file, /*!< in: file name */
- ulint line, /*!< in: line where called */
- mtr_t* mtr) /*!< in: mtr which will contain an
- x-latch to the bitmap page if the page
- is not one of the fixed address ibuf
- pages, or NULL, in which case a new
- transaction is created. */
+ const char* file,
+ unsigned line,
+ mtr_t* mtr)
{
ibool ret;
mtr_t local_mtr;
@@ -1203,15 +1097,15 @@ ibuf_page_low(
ut_ad(!recv_no_ibuf_operations);
ut_ad(x_latch || mtr == NULL);
- if (ibuf_fixed_addr_page(space, zip_size, page_no)) {
+ if (ibuf_fixed_addr_page(page_id, page_size)) {
return(TRUE);
- } else if (space != IBUF_SPACE_ID) {
+ } else if (page_id.space() != IBUF_SPACE_ID) {
return(FALSE);
}
- ut_ad(fil_space_get_type(IBUF_SPACE_ID) == FIL_TABLESPACE);
+ ut_ad(fil_space_get_type(IBUF_SPACE_ID) == FIL_TYPE_TABLESPACE);
#ifdef UNIV_DEBUG
if (!x_latch) {
@@ -1227,16 +1121,17 @@ ibuf_page_low(
not be modified by any other thread. Nobody should be
calling ibuf_add_free_page() or ibuf_remove_free_page()
while the page is linked to the insert buffer b-tree. */
+ dberr_t err = DB_SUCCESS;
+
+ buf_block_t* block = buf_page_get_gen(
+ ibuf_bitmap_page_no_calc(page_id, page_size),
+ page_size, RW_NO_LATCH, NULL, BUF_GET_NO_LATCH,
+ file, line, &local_mtr, &err);
- bitmap_page = buf_block_get_frame(
- buf_page_get_gen(
- space, zip_size,
- ibuf_bitmap_page_no_calc(zip_size, page_no),
- RW_NO_LATCH, NULL, BUF_GET_NO_LATCH,
- file, line, &local_mtr));
+ bitmap_page = buf_block_get_frame(block);
ret = ibuf_bitmap_page_get_bits_low(
- bitmap_page, page_no, zip_size,
+ bitmap_page, page_id, page_size,
MTR_MEMO_BUF_FIX, &local_mtr, IBUF_BITMAP_IBUF);
mtr_commit(&local_mtr);
@@ -1249,10 +1144,10 @@ ibuf_page_low(
mtr_start(mtr);
}
- bitmap_page = ibuf_bitmap_get_map_page_func(space, page_no, zip_size,
+ bitmap_page = ibuf_bitmap_get_map_page_func(page_id, page_size,
file, line, mtr);
- ret = ibuf_bitmap_page_get_bits(bitmap_page, page_no, zip_size,
+ ret = ibuf_bitmap_page_get_bits(bitmap_page, page_id, page_size,
IBUF_BITMAP_IBUF, mtr);
if (mtr == &local_mtr) {
@@ -1270,7 +1165,7 @@ ibuf_page_low(
/********************************************************************//**
Returns the page number field of an ibuf record.
-@return page number */
+@return page number */
static
ulint
ibuf_rec_get_page_no_func(
@@ -1283,8 +1178,9 @@ ibuf_rec_get_page_no_func(
const byte* field;
ulint len;
- ut_ad(mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_X_FIX)
- || mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_S_FIX));
+ ut_ad(mtr_memo_contains_page_flagged(mtr, rec,
+ MTR_MEMO_PAGE_X_FIX
+ | MTR_MEMO_PAGE_S_FIX));
ut_ad(ibuf_inside(mtr));
ut_ad(rec_get_n_fields_old(rec) > 2);
@@ -1308,7 +1204,7 @@ ibuf_rec_get_page_no_func(
/********************************************************************//**
Returns the space id field of an ibuf record. For < 4.1.x format records
returns 0.
-@return space id */
+@return space id */
static
ulint
ibuf_rec_get_space_func(
@@ -1321,8 +1217,8 @@ ibuf_rec_get_space_func(
const byte* field;
ulint len;
- ut_ad(mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_X_FIX)
- || mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_S_FIX));
+ ut_ad(mtr_memo_contains_page_flagged(mtr, rec, MTR_MEMO_PAGE_X_FIX
+ | MTR_MEMO_PAGE_S_FIX));
ut_ad(ibuf_inside(mtr));
ut_ad(rec_get_n_fields_old(rec) > 2);
@@ -1371,8 +1267,8 @@ ibuf_rec_get_info_func(
ulint info_len_local;
ulint counter_local;
- ut_ad(mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_X_FIX)
- || mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_S_FIX));
+ ut_ad(mtr_memo_contains_page_flagged(mtr, rec, MTR_MEMO_PAGE_X_FIX
+ | MTR_MEMO_PAGE_S_FIX));
ut_ad(ibuf_inside(mtr));
fields = rec_get_n_fields_old(rec);
ut_a(fields > IBUF_REC_FIELD_USER);
@@ -1431,7 +1327,7 @@ ibuf_rec_get_info_func(
/****************************************************************//**
Returns the operation type field of an ibuf record.
-@return operation type */
+@return operation type */
static
ibuf_op_t
ibuf_rec_get_op_type_func(
@@ -1443,8 +1339,8 @@ ibuf_rec_get_op_type_func(
{
ulint len;
- ut_ad(mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_X_FIX)
- || mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_S_FIX));
+ ut_ad(mtr_memo_contains_page_flagged(mtr, rec, MTR_MEMO_PAGE_X_FIX
+ | MTR_MEMO_PAGE_S_FIX));
ut_ad(ibuf_inside(mtr));
ut_ad(rec_get_n_fields_old(rec) > 2);
@@ -1468,7 +1364,6 @@ Read the first two bytes from a record's fourth field (counter field in new
records; something else in older records).
@return "counter" field, or ULINT_UNDEFINED if for some reason it
can't be read */
-UNIV_INTERN
ulint
ibuf_rec_get_counter(
/*=================*/
@@ -1506,16 +1401,8 @@ ibuf_add_ops(
{
ulint i;
-#ifndef HAVE_ATOMIC_BUILTINS
- ut_ad(mutex_own(&ibuf_mutex));
-#endif /* !HAVE_ATOMIC_BUILTINS */
-
for (i = 0; i < IBUF_OP_COUNT; i++) {
-#ifdef HAVE_ATOMIC_BUILTINS
- os_atomic_increment_ulint(&arr[i], ops[i]);
-#else /* HAVE_ATOMIC_BUILTINS */
- arr[i] += ops[i];
-#endif /* HAVE_ATOMIC_BUILTINS */
+ my_atomic_addlint(&arr[i], ops[i]);
}
}
@@ -1538,8 +1425,8 @@ ibuf_print_ops(
ut_a(UT_ARR_SIZE(op_names) == IBUF_OP_COUNT);
for (i = 0; i < IBUF_OP_COUNT; i++) {
- fprintf(file, "%s %lu%s", op_names[i],
- (ulong) ops[i], (i < (IBUF_OP_COUNT - 1)) ? ", " : "");
+ fprintf(file, "%s " ULINTPF "%s", op_names[i],
+ ops[i], (i < (IBUF_OP_COUNT - 1)) ? ", " : "");
}
putc('\n', file);
@@ -1547,7 +1434,7 @@ ibuf_print_ops(
/********************************************************************//**
Creates a dummy index for inserting a record to a non-clustered index.
-@return dummy index */
+@return dummy index */
static
dict_index_t*
ibuf_dummy_index_create(
@@ -1559,7 +1446,7 @@ ibuf_dummy_index_create(
dict_index_t* index;
table = dict_mem_table_create("IBUF_DUMMY",
- DICT_HDR_SPACE, n,
+ DICT_HDR_SPACE, n, 0,
comp ? DICT_TF_COMPACT : 0, 0);
index = dict_mem_index_create("IBUF_DUMMY", "IBUF_DUMMY",
@@ -1569,6 +1456,7 @@ ibuf_dummy_index_create(
/* avoid ut_ad(index->cached) in dict_index_get_n_unique_in_tree */
index->cached = TRUE;
+ ut_d(index->is_dummy = true);
return(index);
}
@@ -1651,8 +1539,8 @@ ibuf_build_entry_from_ibuf_rec_func(
ulint comp;
dict_index_t* index;
- ut_ad(mtr_memo_contains_page(mtr, ibuf_rec, MTR_MEMO_PAGE_X_FIX)
- || mtr_memo_contains_page(mtr, ibuf_rec, MTR_MEMO_PAGE_S_FIX));
+ ut_ad(mtr_memo_contains_page_flagged(mtr, ibuf_rec, MTR_MEMO_PAGE_X_FIX
+ | MTR_MEMO_PAGE_S_FIX));
ut_ad(ibuf_inside(mtr));
data = rec_get_nth_field_old(ibuf_rec, IBUF_REC_FIELD_MARKER, &len);
@@ -1705,7 +1593,7 @@ ibuf_build_entry_from_ibuf_rec_func(
/******************************************************************//**
Get the data size.
-@return size of fields */
+@return size of fields */
UNIV_INLINE
ulint
ibuf_rec_get_size(
@@ -1773,8 +1661,8 @@ ibuf_rec_get_volume_func(
ibuf_op_t op;
ulint info_len;
- ut_ad(mtr_memo_contains_page(mtr, ibuf_rec, MTR_MEMO_PAGE_X_FIX)
- || mtr_memo_contains_page(mtr, ibuf_rec, MTR_MEMO_PAGE_S_FIX));
+ ut_ad(mtr_memo_contains_page_flagged(mtr, ibuf_rec, MTR_MEMO_PAGE_X_FIX
+ | MTR_MEMO_PAGE_S_FIX));
ut_ad(ibuf_inside(mtr));
ut_ad(rec_get_n_fields_old(ibuf_rec) > 2);
@@ -1829,7 +1717,7 @@ non-clustered index.
NOTE that the original entry must be kept because we copy pointers to
its fields.
-@return own: entry to insert into an ibuf index tree */
+@return own: entry to insert into an ibuf index tree */
static
dtuple_t*
ibuf_entry_build(
@@ -1991,7 +1879,7 @@ ibuf_entry_build(
/*********************************************************************//**
Builds a search tuple used to search buffered inserts for an index page.
This is for >= 4.1.x format records.
-@return own: search tuple */
+@return own: search tuple */
static
dtuple_t*
ibuf_search_tuple_build(
@@ -2044,7 +1932,7 @@ ibuf_search_tuple_build(
/*********************************************************************//**
Checks if there are enough pages in the free list of the ibuf tree that we
dare to start a pessimistic insert to the insert buffer.
-@return TRUE if enough free pages in list */
+@return TRUE if enough free pages in list */
UNIV_INLINE
ibool
ibuf_data_enough_free_for_insert(void)
@@ -2064,7 +1952,7 @@ ibuf_data_enough_free_for_insert(void)
/*********************************************************************//**
Checks if there are enough pages in the free list of the ibuf tree that we
should remove them and free to the file space management.
-@return TRUE if enough free pages in list */
+@return TRUE if enough free pages in list */
UNIV_INLINE
ibool
ibuf_data_too_much_free(void)
@@ -2078,7 +1966,7 @@ ibuf_data_too_much_free(void)
/*********************************************************************//**
Allocates a new page from the ibuf file segment and adds it to the free
list.
-@return TRUE on success, FALSE if no space left */
+@return TRUE on success, FALSE if no space left */
static
ibool
ibuf_add_free_page(void)
@@ -2086,20 +1974,17 @@ ibuf_add_free_page(void)
{
mtr_t mtr;
page_t* header_page;
- ulint flags;
- ulint zip_size;
buf_block_t* block;
page_t* page;
page_t* root;
page_t* bitmap_page;
mtr_start(&mtr);
+ fil_space_t* space = mtr.set_sys_modified();
/* Acquire the fsp latch before the ibuf header, obeying the latching
order */
- mtr_x_lock(fil_space_get_latch(IBUF_SPACE_ID, &flags), &mtr);
- zip_size = fsp_flags_get_zip_size(flags);
-
+ mtr_x_lock(&space->latch, &mtr);
header_page = ibuf_header_page_get(&mtr);
/* Allocate a new page: NOTE that if the page has been a part of a
@@ -2144,14 +2029,15 @@ ibuf_add_free_page(void)
/* Set the bit indicating that this page is now an ibuf tree page
(level 2 page) */
- bitmap_page = ibuf_bitmap_get_map_page(
- IBUF_SPACE_ID, buf_block_get_page_no(block), zip_size, &mtr);
+ const page_id_t page_id(IBUF_SPACE_ID, block->page.id.page_no());
+ const page_size_t page_size(space->flags);
+
+ bitmap_page = ibuf_bitmap_get_map_page(page_id, page_size, &mtr);
mutex_exit(&ibuf_mutex);
- ibuf_bitmap_page_set_bits(
- bitmap_page, buf_block_get_page_no(block), zip_size,
- IBUF_BITMAP_IBUF, TRUE, &mtr);
+ ibuf_bitmap_page_set_bits(bitmap_page, page_id, page_size,
+ IBUF_BITMAP_IBUF, TRUE, &mtr);
ibuf_mtr_commit(&mtr);
@@ -2168,8 +2054,6 @@ ibuf_remove_free_page(void)
mtr_t mtr;
mtr_t mtr2;
page_t* header_page;
- ulint flags;
- ulint zip_size;
ulint page_no;
page_t* page;
page_t* root;
@@ -2178,12 +2062,13 @@ ibuf_remove_free_page(void)
log_free_check();
mtr_start(&mtr);
+ fil_space_t* space = mtr.set_sys_modified();
+ const page_size_t page_size(space->flags);
/* Acquire the fsp latch before the ibuf header, obeying the latching
order */
- mtr_x_lock(fil_space_get_latch(IBUF_SPACE_ID, &flags), &mtr);
- zip_size = fsp_flags_get_zip_size(flags);
+ mtr_x_lock(&space->latch, &mtr);
header_page = ibuf_header_page_get(&mtr);
/* Prevent pessimistic inserts to insert buffer trees for a while */
@@ -2224,11 +2109,11 @@ ibuf_remove_free_page(void)
page from it. */
fseg_free_page(header_page + IBUF_HEADER + IBUF_TREE_SEG_HEADER,
- IBUF_SPACE_ID, page_no, &mtr);
+ IBUF_SPACE_ID, page_no, false, &mtr);
-#if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG
- buf_page_reset_file_page_was_freed(IBUF_SPACE_ID, page_no);
-#endif /* UNIV_DEBUG_FILE_ACCESSES || UNIV_DEBUG */
+ const page_id_t page_id(IBUF_SPACE_ID, page_no);
+
+ ut_d(buf_page_reset_file_page_was_freed(page_id));
ibuf_enter(&mtr);
@@ -2242,8 +2127,7 @@ ibuf_remove_free_page(void)
{
buf_block_t* block;
- block = buf_page_get(
- IBUF_SPACE_ID, 0, page_no, RW_X_LATCH, &mtr);
+ block = buf_page_get(page_id, univ_page_size, RW_X_LATCH, &mtr);
buf_block_dbg_add_level(block, SYNC_IBUF_TREE_NODE);
@@ -2263,17 +2147,16 @@ ibuf_remove_free_page(void)
/* Set the bit indicating that this page is no more an ibuf tree page
(level 2 page) */
- bitmap_page = ibuf_bitmap_get_map_page(
- IBUF_SPACE_ID, page_no, zip_size, &mtr);
+ bitmap_page = ibuf_bitmap_get_map_page(page_id, page_size, &mtr);
mutex_exit(&ibuf_mutex);
ibuf_bitmap_page_set_bits(
- bitmap_page, page_no, zip_size, IBUF_BITMAP_IBUF, FALSE, &mtr);
+ bitmap_page, page_id, page_size, IBUF_BITMAP_IBUF, FALSE,
+ &mtr);
+
+ ut_d(buf_page_set_file_page_was_freed(page_id));
-#if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG
- buf_page_set_file_page_was_freed(IBUF_SPACE_ID, page_no);
-#endif /* UNIV_DEBUG_FILE_ACCESSES || UNIV_DEBUG */
ibuf_mtr_commit(&mtr);
}
@@ -2281,13 +2164,10 @@ ibuf_remove_free_page(void)
Frees excess pages from the ibuf free list. This function is called when an OS
thread calls fsp services to allocate a new file segment, or a new page to a
file segment, and the thread did not own the fsp latch before this call. */
-UNIV_INTERN
void
ibuf_free_excess_pages(void)
/*========================*/
{
- ulint i;
-
if (srv_force_recovery >= SRV_FORCE_NO_IBUF_MERGE) {
return;
}
@@ -2295,7 +2175,7 @@ ibuf_free_excess_pages(void)
/* Free at most a few pages at a time, so that we do not delay the
requested service too much */
- for (i = 0; i < 4; i++) {
+ for (ulint i = 0; i < 4; i++) {
ibool too_much_free;
@@ -2312,11 +2192,11 @@ ibuf_free_excess_pages(void)
}
#ifdef UNIV_DEBUG
-# define ibuf_get_merge_page_nos(contract,rec,mtr,ids,vers,pages,n_stored) \
- ibuf_get_merge_page_nos_func(contract,rec,mtr,ids,vers,pages,n_stored)
+# define ibuf_get_merge_page_nos(contract,rec,mtr,ids,pages,n_stored) \
+ ibuf_get_merge_page_nos_func(contract,rec,mtr,ids,pages,n_stored)
#else /* UNIV_DEBUG */
-# define ibuf_get_merge_page_nos(contract,rec,mtr,ids,vers,pages,n_stored) \
- ibuf_get_merge_page_nos_func(contract,rec,ids,vers,pages,n_stored)
+# define ibuf_get_merge_page_nos(contract,rec,mtr,ids,pages,n_stored) \
+ ibuf_get_merge_page_nos_func(contract,rec,ids,pages,n_stored)
#endif /* UNIV_DEBUG */
/*********************************************************************//**
@@ -2336,9 +2216,6 @@ ibuf_get_merge_page_nos_func(
mtr_t* mtr, /*!< in: mini-transaction holding rec */
#endif /* UNIV_DEBUG */
ulint* space_ids,/*!< in/out: space id's of the pages */
- ib_int64_t* space_versions,/*!< in/out: tablespace version
- timestamps; used to prevent reading in old
- pages after DISCARD + IMPORT tablespace */
ulint* page_nos,/*!< in/out: buffer for at least
IBUF_MAX_N_PAGES_MERGED many page numbers;
the page numbers are in an ascending order */
@@ -2357,13 +2234,14 @@ ibuf_get_merge_page_nos_func(
ulint limit;
ulint n_pages;
- ut_ad(mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_X_FIX)
- || mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_S_FIX));
+ ut_ad(mtr_memo_contains_page_flagged(mtr, rec, MTR_MEMO_PAGE_X_FIX
+ | MTR_MEMO_PAGE_S_FIX));
ut_ad(ibuf_inside(mtr));
*n_stored = 0;
- limit = ut_min(IBUF_MAX_N_PAGES_MERGED, buf_pool_get_curr_size() / 4);
+ limit = ut_min(IBUF_MAX_N_PAGES_MERGED,
+ buf_pool_get_curr_size() / 4);
if (page_rec_is_supremum(rec)) {
@@ -2433,16 +2311,23 @@ ibuf_get_merge_page_nos_func(
} else {
rec_page_no = ibuf_rec_get_page_no(mtr, rec);
rec_space_id = ibuf_rec_get_space(mtr, rec);
- /* In the system tablespace, the smallest
+ /* In the system tablespace the smallest
possible secondary index leaf page number is
- bigger than IBUF_TREE_ROOT_PAGE_NO (4). In
- other tablespaces, the clustered index tree is
- created at page 3, which makes page 4 the
- smallest possible secondary index leaf page
- (and that only after DROP INDEX). */
- ut_ad(rec_page_no
- > (ulint) IBUF_TREE_ROOT_PAGE_NO
- - (rec_space_id != 0));
+ bigger than FSP_DICT_HDR_PAGE_NO (7).
+ In all tablespaces, pages 0 and 1 are reserved
+ for the allocation bitmap and the change
+ buffer bitmap. In file-per-table tablespaces,
+ a file segment inode page will be created at
+ page 2 and the clustered index tree is created
+ at page 3. So for file-per-table tablespaces,
+ page 4 is the smallest possible secondary
+ index leaf page. CREATE TABLESPACE also initially
+ uses pages 2 and 3 for the first created table,
+ but that table may be dropped, allowing page 2
+ to be reused for a secondary index leaf page.
+ To keep this assertion simple, just
+ make sure the page is >= 2. */
+ ut_ad(rec_page_no >= FSP_FIRST_INODE_PAGE_NO);
}
#ifdef UNIV_IBUF_DEBUG
@@ -2462,8 +2347,6 @@ ibuf_get_merge_page_nos_func(
/ IBUF_MERGE_THRESHOLD)) {
space_ids[*n_stored] = prev_space_id;
- space_versions[*n_stored]
- = fil_space_get_version(prev_space_id);
page_nos[*n_stored] = prev_page_no;
(*n_stored)++;
@@ -2509,7 +2392,7 @@ ibuf_get_merge_page_nos_func(
/*******************************************************************//**
Get the matching records for space id.
-@return current rec or NULL */
+@return current rec or NULL */
static MY_ATTRIBUTE((nonnull, warn_unused_result))
const rec_t*
ibuf_get_user_rec(
@@ -2541,13 +2424,11 @@ ibuf_get_merge_pages(
ulint limit, /*!< in: max page numbers to read */
ulint* pages, /*!< out: pages read */
ulint* spaces, /*!< out: spaces read */
- ib_int64_t* versions,/*!< out: space versions read */
ulint* n_pages,/*!< out: number of pages read */
mtr_t* mtr) /*!< in: mini transaction */
{
const rec_t* rec;
ulint volume = 0;
- ib_int64_t version = fil_space_get_version(space);
ut_a(space != ULINT_UNDEFINED);
@@ -2562,7 +2443,6 @@ ibuf_get_merge_pages(
if (*n_pages == 0 || pages[*n_pages - 1] != page_no) {
spaces[*n_pages] = space;
pages[*n_pages] = page_no;
- versions[*n_pages] = version;
++*n_pages;
}
@@ -2593,7 +2473,6 @@ ibuf_merge_pages(
ulint sum_sizes;
ulint page_nos[IBUF_MAX_N_PAGES_MERGED];
ulint space_ids[IBUF_MAX_N_PAGES_MERGED];
- ib_int64_t space_versions[IBUF_MAX_N_PAGES_MERGED];
*n_pages = 0;
@@ -2601,8 +2480,12 @@ ibuf_merge_pages(
/* Open a cursor to a randomly chosen leaf of the tree, at a random
position within the leaf */
+ bool available;
- btr_pcur_open_at_rnd_pos(ibuf->index, BTR_SEARCH_LEAF, &pcur, &mtr);
+ available = btr_pcur_open_at_rnd_pos(ibuf->index, BTR_SEARCH_LEAF,
+ &pcur, &mtr);
+ /* No one should make this index unavailable when server is running */
+ ut_a(available);
ut_ad(page_validate(btr_pcur_get_page(&pcur), ibuf->index));
@@ -2624,7 +2507,7 @@ ibuf_merge_pages(
sum_sizes = ibuf_get_merge_page_nos(TRUE,
btr_pcur_get_rec(&pcur), &mtr,
- space_ids, space_versions,
+ space_ids,
page_nos, n_pages);
#if 0 /* defined UNIV_IBUF_DEBUG */
fprintf(stderr, "Ibuf contract sync %lu pages %lu volume %lu\n",
@@ -2634,7 +2517,7 @@ ibuf_merge_pages(
btr_pcur_close(&pcur);
buf_read_ibuf_merge_pages(
- sync, space_ids, space_versions, page_nos, *n_pages);
+ sync, space_ids, page_nos, *n_pages);
return(sum_sizes + 1);
}
@@ -2643,7 +2526,6 @@ ibuf_merge_pages(
Contracts insert buffer trees by reading pages referring to space_id
to the buffer pool.
@returns number of pages merged.*/
-UNIV_INTERN
ulint
ibuf_merge_space(
/*=============*/
@@ -2672,7 +2554,6 @@ ibuf_merge_space(
ulint sum_sizes = 0;
ulint pages[IBUF_MAX_N_PAGES_MERGED];
ulint spaces[IBUF_MAX_N_PAGES_MERGED];
- ib_int64_t versions[IBUF_MAX_N_PAGES_MERGED];
if (page_is_empty(btr_pcur_get_page(&pcur))) {
/* If a B-tree page is empty, it must be the root page
@@ -2687,12 +2568,10 @@ ibuf_merge_space(
} else {
sum_sizes = ibuf_get_merge_pages(
- &pcur, space, IBUF_MAX_N_PAGES_MERGED,
- &pages[0], &spaces[0], &versions[0], &n_pages,
- &mtr);
- ib_logf(IB_LOG_LEVEL_INFO,"\n Size of pages merged %lu"
- ,sum_sizes);
-
+ &pcur, space, IBUF_MAX_N_PAGES_MERGED,
+ &pages[0], &spaces[0], &n_pages,
+ &mtr);
+ ib::info() << "Size of pages merged " << sum_sizes;
}
ibuf_mtr_commit(&mtr);
@@ -2700,18 +2579,16 @@ ibuf_merge_space(
btr_pcur_close(&pcur);
if (n_pages > 0) {
-
-#ifdef UNIV_DEBUG
ut_ad(n_pages <= UT_ARR_SIZE(pages));
+#ifdef UNIV_DEBUG
for (ulint i = 0; i < n_pages; ++i) {
ut_ad(spaces[i] == space);
- ut_ad(i == 0 || versions[i] == versions[i - 1]);
}
#endif /* UNIV_DEBUG */
buf_read_ibuf_merge_pages(
- true, spaces, versions, pages, n_pages);
+ true, spaces, pages, n_pages);
}
return(n_pages);
@@ -2724,16 +2601,11 @@ the issued reads to complete
@return a lower limit for the combined size in bytes of entries which
will be merged from ibuf trees to the pages read, 0 if ibuf is
empty */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
+static MY_ATTRIBUTE((warn_unused_result))
ulint
ibuf_merge(
-/*=======*/
- ulint* n_pages, /*!< out: number of pages to
- which merged */
- bool sync) /*!< in: TRUE if the caller
- wants to wait for the issued
- read with the highest
- tablespace address to complete */
+ ulint* n_pages,
+ bool sync)
{
*n_pages = 0;
@@ -2761,10 +2633,7 @@ will be merged from ibuf trees to the pages read, 0 if ibuf is empty */
static
ulint
ibuf_contract(
-/*==========*/
- bool sync) /*!< in: TRUE if the caller wants to wait for the
- issued read with the highest tablespace address
- to complete */
+ bool sync)
{
ulint n_pages;
@@ -2778,15 +2647,9 @@ based on the current size of the change buffer.
@return a lower limit for the combined size in bytes of entries which
will be merged from ibuf trees to the pages read, 0 if ibuf is
empty */
-UNIV_INTERN
ulint
ibuf_merge_in_background(
-/*=====================*/
- bool full) /*!< in: TRUE if the caller wants to
- do a full contract based on PCT_IO(100).
- If FALSE then the size of contract
- batch is determined based on the
- current size of the ibuf tree. */
+ bool full)
{
ulint sum_bytes = 0;
ulint sum_pages = 0;
@@ -2826,8 +2689,6 @@ ibuf_merge_in_background(
}
#endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */
-
-
while (sum_pages < n_pages) {
ulint n_bytes;
@@ -2887,7 +2748,7 @@ ibuf_contract_after_insert(
/*********************************************************************//**
Determine if an insert buffer record has been encountered already.
-@return TRUE if a new record, FALSE if possible duplicate */
+@return TRUE if a new record, FALSE if possible duplicate */
static
ibool
ibuf_get_volume_buffered_hash(
@@ -2929,7 +2790,8 @@ ibuf_get_volume_buffered_hash(
#else /* UNIV_DEBUG */
# define ibuf_get_volume_buffered_count(mtr,rec,hash,size,n_recs) \
ibuf_get_volume_buffered_count_func(rec,hash,size,n_recs)
-#endif
+#endif /* UNIV_DEBUG */
+
/*********************************************************************//**
Update the estimate of the number of records on a page, and
get the space taken by merging the buffered record to the index page.
@@ -2953,8 +2815,8 @@ ibuf_get_volume_buffered_count_func(
const byte* types;
ulint n_fields;
- ut_ad(mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_X_FIX)
- || mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_S_FIX));
+ ut_ad(mtr_memo_contains_page_flagged(mtr, rec, MTR_MEMO_PAGE_X_FIX
+ | MTR_MEMO_PAGE_S_FIX));
ut_ad(ibuf_inside(mtr));
n_fields = rec_get_n_fields_old(rec);
@@ -3129,7 +2991,7 @@ ibuf_get_volume_buffered(
/* Look at the previous page */
- prev_page_no = btr_page_get_prev(page, mtr);
+ prev_page_no = btr_page_get_prev(page);
if (prev_page_no == FIL_NULL) {
@@ -3140,18 +3002,17 @@ ibuf_get_volume_buffered(
buf_block_t* block;
block = buf_page_get(
- IBUF_SPACE_ID, 0, prev_page_no, RW_X_LATCH,
- mtr);
+ page_id_t(IBUF_SPACE_ID, prev_page_no),
+ univ_page_size, RW_X_LATCH, mtr);
buf_block_dbg_add_level(block, SYNC_IBUF_TREE_NODE);
-
prev_page = buf_block_get_frame(block);
ut_ad(page_validate(prev_page, ibuf->index));
}
#ifdef UNIV_BTR_DEBUG
- ut_a(btr_page_get_next(prev_page, mtr) == page_get_page_no(page));
+ ut_a(!memcmp(prev_page + FIL_PAGE_NEXT, page + FIL_PAGE_OFFSET, 4));
#endif /* UNIV_BTR_DEBUG */
rec = page_get_supremum_rec(prev_page);
@@ -3202,7 +3063,7 @@ count_later:
/* Look at the next page */
- next_page_no = btr_page_get_next(page, mtr);
+ next_page_no = btr_page_get_next(page);
if (next_page_no == FIL_NULL) {
@@ -3213,18 +3074,17 @@ count_later:
buf_block_t* block;
block = buf_page_get(
- IBUF_SPACE_ID, 0, next_page_no, RW_X_LATCH,
- mtr);
+ page_id_t(IBUF_SPACE_ID, next_page_no),
+ univ_page_size, RW_X_LATCH, mtr);
buf_block_dbg_add_level(block, SYNC_IBUF_TREE_NODE);
-
next_page = buf_block_get_frame(block);
ut_ad(page_validate(next_page, ibuf->index));
}
#ifdef UNIV_BTR_DEBUG
- ut_a(btr_page_get_prev(next_page, mtr) == page_get_page_no(page));
+ ut_a(!memcmp(next_page + FIL_PAGE_PREV, page + FIL_PAGE_OFFSET, 4));
#endif /* UNIV_BTR_DEBUG */
rec = page_get_infimum_rec(next_page);
@@ -3255,7 +3115,6 @@ count_later:
/*********************************************************************//**
Reads the biggest tablespace id from the high end of the insert buffer
tree and updates the counter in fil_system. */
-UNIV_INTERN
void
ibuf_update_max_tablespace_id(void)
/*===============================*/
@@ -3329,8 +3188,8 @@ ibuf_get_entry_counter_low_func(
ulint len;
ut_ad(ibuf_inside(mtr));
- ut_ad(mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_X_FIX)
- || mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_S_FIX));
+ ut_ad(mtr_memo_contains_page_flagged(mtr, rec, MTR_MEMO_PAGE_X_FIX
+ | MTR_MEMO_PAGE_S_FIX));
ut_ad(rec_get_n_fields_old(rec) > 2);
field = rec_get_nth_field_old(rec, IBUF_REC_FIELD_MARKER, &len);
@@ -3379,12 +3238,12 @@ ibuf_get_entry_counter_low_func(
#else /* UNIV_DEBUG */
# define ibuf_get_entry_counter(space,page_no,rec,mtr,exact_leaf) \
ibuf_get_entry_counter_func(space,page_no,rec,exact_leaf)
-#endif
+#endif /* UNIV_DEBUG */
/****************************************************************//**
Calculate the counter field for an entry based on the current
last record in ibuf for (space, page_no).
-@return the counter field, or ULINT_UNDEFINED
+@return the counter field, or ULINT_UNDEFINED
if we should abort this insertion to ibuf */
static
ulint
@@ -3414,8 +3273,7 @@ ibuf_get_entry_counter_func(
return(ULINT_UNDEFINED);
} else if (!page_rec_is_infimum(rec)) {
return(ibuf_get_entry_counter_low(mtr, rec, space, page_no));
- } else if (only_leaf
- || fil_page_get_prev(page_align(rec)) == FIL_NULL) {
+ } else if (only_leaf || !page_has_prev(page_align(rec))) {
/* The parent node pointer did not contain the
searched for (space, page_no), which means that the
search ended on the correct page regardless of the
@@ -3430,28 +3288,32 @@ ibuf_get_entry_counter_func(
}
}
-/*********************************************************************//**
-Buffer an operation in the insert/delete buffer, instead of doing it
+/** Buffer an operation in the insert/delete buffer, instead of doing it
directly to the disk page, if this is possible.
-@return DB_SUCCESS, DB_STRONG_FAIL or other error */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
+@param[in] mode BTR_MODIFY_PREV or BTR_MODIFY_TREE
+@param[in] op operation type
+@param[in] no_counter TRUE=use 5.0.3 format; FALSE=allow delete
+buffering
+@param[in] entry index entry to insert
+@param[in] entry_size rec_get_converted_size(index, entry)
+@param[in,out] index index where to insert; must not be unique
+or clustered
+@param[in] page_id page id where to insert
+@param[in] page_size page size
+@param[in,out] thr query thread
+@return DB_SUCCESS, DB_STRONG_FAIL or other error */
+static MY_ATTRIBUTE((warn_unused_result))
dberr_t
ibuf_insert_low(
-/*============*/
- ulint mode, /*!< in: BTR_MODIFY_PREV or BTR_MODIFY_TREE */
- ibuf_op_t op, /*!< in: operation type */
- ibool no_counter,
- /*!< in: TRUE=use 5.0.3 format;
- FALSE=allow delete buffering */
- const dtuple_t* entry, /*!< in: index entry to insert */
- ulint entry_size,
- /*!< in: rec_get_converted_size(index, entry) */
- dict_index_t* index, /*!< in: index where to insert; must not be
- unique or clustered */
- ulint space, /*!< in: space id where to insert */
- ulint zip_size,/*!< in: compressed page size in bytes, or 0 */
- ulint page_no,/*!< in: page number where to insert */
- que_thr_t* thr) /*!< in: query thread */
+ ulint mode,
+ ibuf_op_t op,
+ ibool no_counter,
+ const dtuple_t* entry,
+ ulint entry_size,
+ dict_index_t* index,
+ const page_id_t page_id,
+ const page_size_t& page_size,
+ que_thr_t* thr)
{
big_rec_t* dummy_big_rec;
btr_pcur_t pcur;
@@ -3459,7 +3321,7 @@ ibuf_insert_low(
dtuple_t* ibuf_entry;
mem_heap_t* offsets_heap = NULL;
mem_heap_t* heap;
- ulint* offsets = NULL;
+ offset_t* offsets = NULL;
ulint buffered;
lint min_n_recs;
rec_t* ins_rec;
@@ -3470,15 +3332,14 @@ ibuf_insert_low(
dberr_t err;
ibool do_merge;
ulint space_ids[IBUF_MAX_N_PAGES_MERGED];
- ib_int64_t space_versions[IBUF_MAX_N_PAGES_MERGED];
ulint page_nos[IBUF_MAX_N_PAGES_MERGED];
ulint n_stored;
mtr_t mtr;
mtr_t bitmap_mtr;
ut_a(!dict_index_is_clust(index));
+ ut_ad(!dict_index_is_spatial(index));
ut_ad(dtuple_check_typed(entry));
- ut_ad(ut_is_2pow(zip_size));
ut_ad(!no_counter || op == IBUF_OP_INSERT);
ut_a(op < IBUF_OP_COUNT);
@@ -3517,14 +3378,14 @@ ibuf_insert_low(
value just before actually inserting the entry.) */
ibuf_entry = ibuf_entry_build(
- op, index, entry, space, page_no,
+ op, index, entry, page_id.space(), page_id.page_no(),
no_counter ? ULINT_UNDEFINED : 0xFFFF, heap);
/* Open a cursor to the insert buffer tree to calculate if we can add
the new entry to it without exceeding the free space limit for the
page. */
- if (mode == BTR_MODIFY_TREE) {
+ if (BTR_LATCH_MODE_WITHOUT_INTENTION(mode) == BTR_MODIFY_TREE) {
for (;;) {
mutex_enter(&ibuf_pessimistic_insert_mutex);
mutex_enter(&ibuf_mutex);
@@ -3537,7 +3398,7 @@ ibuf_insert_low(
mutex_exit(&ibuf_mutex);
mutex_exit(&ibuf_pessimistic_insert_mutex);
- if (UNIV_UNLIKELY(!ibuf_add_free_page())) {
+ if (!ibuf_add_free_page()) {
mem_heap_free(heap);
return(DB_STRONG_FAIL);
@@ -3553,14 +3414,15 @@ ibuf_insert_low(
/* Find out the volume of already buffered inserts for the same index
page */
min_n_recs = 0;
- buffered = ibuf_get_volume_buffered(&pcur, space, page_no,
+ buffered = ibuf_get_volume_buffered(&pcur,
+ page_id.space(),
+ page_id.page_no(),
op == IBUF_OP_DELETE
? &min_n_recs
: NULL, &mtr);
if (op == IBUF_OP_DELETE
- && (min_n_recs < 2
- || buf_pool_watch_occurred(space, page_no))) {
+ && (min_n_recs < 2 || buf_pool_watch_occurred(page_id))) {
/* The page could become empty after the record is
deleted, or the page has been read in to the buffer
pool. Refuse to buffer the operation. */
@@ -3579,7 +3441,7 @@ ibuf_insert_low(
until after the IBUF_OP_DELETE has been buffered. */
fail_exit:
- if (mode == BTR_MODIFY_TREE) {
+ if (BTR_LATCH_MODE_WITHOUT_INTENTION(mode) == BTR_MODIFY_TREE) {
mutex_exit(&ibuf_mutex);
mutex_exit(&ibuf_pessimistic_insert_mutex);
}
@@ -3597,18 +3459,17 @@ fail_exit:
which it cannot do until we have buffered the IBUF_OP_DELETE
and done mtr_commit(&mtr) to release the latch. */
-#ifdef UNIV_IBUF_COUNT_DEBUG
- ut_a((buffered == 0) || ibuf_count_get(space, page_no));
-#endif
ibuf_mtr_start(&bitmap_mtr);
+ bitmap_mtr.set_named_space(page_id.space());
- bitmap_page = ibuf_bitmap_get_map_page(space, page_no,
- zip_size, &bitmap_mtr);
+ bitmap_page = ibuf_bitmap_get_map_page(page_id, page_size,
+ &bitmap_mtr);
/* We check if the index page is suitable for buffered entries */
- if (buf_page_peek(space, page_no)
- || lock_rec_expl_exist_on_page(space, page_no)) {
+ if (buf_page_peek(page_id)
+ || lock_rec_expl_exist_on_page(page_id.space(),
+ page_id.page_no())) {
ibuf_mtr_commit(&bitmap_mtr);
goto fail_exit;
@@ -3616,11 +3477,11 @@ fail_exit:
if (op == IBUF_OP_INSERT) {
ulint bits = ibuf_bitmap_page_get_bits(
- bitmap_page, page_no, zip_size, IBUF_BITMAP_FREE,
+ bitmap_page, page_id, page_size, IBUF_BITMAP_FREE,
&bitmap_mtr);
if (buffered + entry_size + page_dir_calc_reserved_space(1)
- > ibuf_index_page_calc_free_from_bits(zip_size, bits)) {
+ > ibuf_index_page_calc_free_from_bits(page_size, bits)) {
/* Release the bitmap page latch early. */
ibuf_mtr_commit(&bitmap_mtr);
@@ -3629,7 +3490,7 @@ fail_exit:
ibuf_get_merge_page_nos(FALSE,
btr_pcur_get_rec(&pcur), &mtr,
- space_ids, space_versions,
+ space_ids,
page_nos, &n_stored);
goto fail_exit;
@@ -3641,7 +3502,8 @@ fail_exit:
insert. This can change the insert position, which can
result in the need to abort in some cases. */
ulint counter = ibuf_get_entry_counter(
- space, page_no, btr_pcur_get_rec(&pcur), &mtr,
+ page_id.space(), page_id.page_no(),
+ btr_pcur_get_rec(&pcur), &mtr,
btr_pcur_get_btr_cur(&pcur)->low_match
< IBUF_REC_FIELD_METADATA);
dfield_t* field;
@@ -3662,11 +3524,11 @@ fail_exit:
buffered entries for this index page, if the bit is not set yet */
old_bit_value = ibuf_bitmap_page_get_bits(
- bitmap_page, page_no, zip_size,
+ bitmap_page, page_id, page_size,
IBUF_BITMAP_BUFFERED, &bitmap_mtr);
if (!old_bit_value) {
- ibuf_bitmap_page_set_bits(bitmap_page, page_no, zip_size,
+ ibuf_bitmap_page_set_bits(bitmap_page, page_id, page_size,
IBUF_BITMAP_BUFFERED, TRUE,
&bitmap_mtr);
}
@@ -3682,11 +3544,10 @@ fail_exit:
ibuf_entry, &ins_rec,
&dummy_big_rec, 0, thr, &mtr);
block = btr_cur_get_block(cursor);
- ut_ad(buf_block_get_space(block) == IBUF_SPACE_ID);
+ ut_ad(block->page.id.space() == IBUF_SPACE_ID);
/* If this is the root page, update ibuf->empty. */
- if (UNIV_UNLIKELY(buf_block_get_page_no(block)
- == FSP_IBUF_TREE_ROOT_PAGE_NO)) {
+ if (block->page.id.page_no() == FSP_IBUF_TREE_ROOT_PAGE_NO) {
const page_t* root = buf_block_get_frame(block);
ut_ad(page_get_space_id(root) == IBUF_SPACE_ID);
@@ -3696,11 +3557,12 @@ fail_exit:
ibuf->empty = page_is_empty(root);
}
} else {
- ut_ad(mode == BTR_MODIFY_TREE);
+ ut_ad(BTR_LATCH_MODE_WITHOUT_INTENTION(mode)
+ == BTR_MODIFY_TREE);
- /* We acquire an x-latch to the root page before the insert,
+ /* We acquire an sx-latch to the root page before the insert,
because a pessimistic insert releases the tree x-latch,
- which would cause the x-latching of the root after that to
+ which would cause the sx-latching of the root after that to
break the latching order. */
root = ibuf_tree_root_get(&mtr);
@@ -3720,12 +3582,12 @@ fail_exit:
}
mutex_exit(&ibuf_pessimistic_insert_mutex);
- ibuf_size_update(root, &mtr);
+ ibuf_size_update(root);
mutex_exit(&ibuf_mutex);
ibuf->empty = page_is_empty(root);
block = btr_cur_get_block(cursor);
- ut_ad(buf_block_get_space(block) == IBUF_SPACE_ID);
+ ut_ad(block->page.id.space() == IBUF_SPACE_ID);
}
if (offsets_heap) {
@@ -3739,24 +3601,13 @@ fail_exit:
}
func_exit:
-#ifdef UNIV_IBUF_COUNT_DEBUG
- if (err == DB_SUCCESS) {
- fprintf(stderr,
- "Incrementing ibuf count of space %lu page %lu\n"
- "from %lu by 1\n", space, page_no,
- ibuf_count_get(space, page_no));
-
- ibuf_count_set(space, page_no,
- ibuf_count_get(space, page_no) + 1);
- }
-#endif
-
ibuf_mtr_commit(&mtr);
btr_pcur_close(&pcur);
mem_heap_free(heap);
- if (err == DB_SUCCESS && mode == BTR_MODIFY_TREE) {
+ if (err == DB_SUCCESS
+ && BTR_LATCH_MODE_WITHOUT_INTENTION(mode) == BTR_MODIFY_TREE) {
ibuf_contract_after_insert(entry_size);
}
@@ -3764,29 +3615,31 @@ func_exit:
#ifdef UNIV_IBUF_DEBUG
ut_a(n_stored <= IBUF_MAX_N_PAGES_MERGED);
#endif
- buf_read_ibuf_merge_pages(false, space_ids, space_versions,
+ buf_read_ibuf_merge_pages(false, space_ids,
page_nos, n_stored);
}
return(err);
}
-/*********************************************************************//**
-Buffer an operation in the insert/delete buffer, instead of doing it
+/** Buffer an operation in the insert/delete buffer, instead of doing it
directly to the disk page, if this is possible. Does not do it if the index
is clustered or unique.
-@return TRUE if success */
-UNIV_INTERN
+@param[in] op operation type
+@param[in] entry index entry to insert
+@param[in,out] index index where to insert
+@param[in] page_id page id where to insert
+@param[in] page_size page size
+@param[in,out] thr query thread
+@return TRUE if success */
ibool
ibuf_insert(
-/*========*/
- ibuf_op_t op, /*!< in: operation type */
- const dtuple_t* entry, /*!< in: index entry to insert */
- dict_index_t* index, /*!< in: index where to insert */
- ulint space, /*!< in: space id where to insert */
- ulint zip_size,/*!< in: compressed page size in bytes, or 0 */
- ulint page_no,/*!< in: page number where to insert */
- que_thr_t* thr) /*!< in: query thread */
+ ibuf_op_t op,
+ const dtuple_t* entry,
+ dict_index_t* index,
+ const page_id_t page_id,
+ const page_size_t& page_size,
+ que_thr_t* thr)
{
dberr_t err;
ulint entry_size;
@@ -3796,13 +3649,14 @@ ibuf_insert(
ibuf_use_t use = ibuf_use;
DBUG_ENTER("ibuf_insert");
- DBUG_PRINT("ibuf", ("op: %d, space: %ld, page_no: %ld",
- op, space, page_no));
+ DBUG_PRINT("ibuf", ("op: %d, space: " UINT32PF ", page_no: " UINT32PF,
+ op, page_id.space(), page_id.page_no()));
ut_ad(dtuple_check_typed(entry));
- ut_ad(ut_is_2pow(zip_size));
+ ut_ad(page_id.space() != SRV_TMP_SPACE_ID);
ut_a(!dict_index_is_clust(index));
+ ut_ad(!dict_table_is_temporary(index->table));
no_counter = use <= IBUF_USE_INSERT;
@@ -3872,11 +3726,11 @@ check_watch:
buf_pool_watch_set(space, page_no). */
{
- buf_page_t* bpage;
- buf_pool_t* buf_pool = buf_pool_get(space, page_no);
- bpage = buf_page_get_also_watch(buf_pool, space, page_no);
+ buf_pool_t* buf_pool = buf_pool_get(page_id);
+ buf_page_t* bpage
+ = buf_page_get_also_watch(buf_pool, page_id);
- if (UNIV_LIKELY_NULL(bpage)) {
+ if (bpage != NULL) {
/* A buffer pool watch has been set or the
page has been read into the buffer pool.
Do not buffer the request. If a purge operation
@@ -3899,11 +3753,11 @@ skip_watch:
err = ibuf_insert_low(BTR_MODIFY_PREV, op, no_counter,
entry, entry_size,
- index, space, zip_size, page_no, thr);
+ index, page_id, page_size, thr);
if (err == DB_FAIL) {
- err = ibuf_insert_low(BTR_MODIFY_TREE, op, no_counter,
- entry, entry_size,
- index, space, zip_size, page_no, thr);
+ err = ibuf_insert_low(BTR_MODIFY_TREE | BTR_LATCH_FOR_INSERT,
+ op, no_counter, entry, entry_size,
+ index, page_id, page_size, thr);
}
if (err == DB_SUCCESS) {
@@ -3932,16 +3786,13 @@ ibuf_insert_to_index_page_low(
buf_block_t* block, /*!< in/out: index page where the buffered
entry should be placed */
dict_index_t* index, /*!< in: record descriptor */
- ulint** offsets,/*!< out: offsets on *rec */
+ offset_t** offsets,/*!< out: offsets on *rec */
mem_heap_t* heap, /*!< in/out: memory heap */
mtr_t* mtr, /*!< in/out: mtr */
page_cur_t* page_cur)/*!< in/out: cursor positioned on the record
after which to insert the buffered entry */
{
const page_t* page;
- ulint space;
- ulint page_no;
- ulint zip_size;
const page_t* bitmap_page;
ulint old_bits;
rec_t* rec;
@@ -3973,34 +3824,27 @@ ibuf_insert_to_index_page_low(
page = buf_block_get_frame(block);
- ut_print_timestamp(stderr);
+ ib::error() << "Insert buffer insert fails; page free "
+ << page_get_max_insert_size(page, 1) << ", dtuple size "
+ << rec_get_converted_size(index, entry, 0);
- fprintf(stderr,
- " InnoDB: Error: Insert buffer insert fails;"
- " page free %lu, dtuple size %lu\n",
- (ulong) page_get_max_insert_size(page, 1),
- (ulong) rec_get_converted_size(index, entry, 0));
fputs("InnoDB: Cannot insert index record ", stderr);
dtuple_print(stderr, entry);
fputs("\nInnoDB: The table where this index record belongs\n"
"InnoDB: is now probably corrupt. Please run CHECK TABLE on\n"
"InnoDB: that table.\n", stderr);
- space = page_get_space_id(page);
- zip_size = buf_block_get_zip_size(block);
- page_no = page_get_page_no(page);
+ bitmap_page = ibuf_bitmap_get_map_page(block->page.id,
+ block->page.size, mtr);
+ old_bits = ibuf_bitmap_page_get_bits(
+ bitmap_page, block->page.id, block->page.size,
+ IBUF_BITMAP_FREE, mtr);
- bitmap_page = ibuf_bitmap_get_map_page(space, page_no, zip_size, mtr);
- old_bits = ibuf_bitmap_page_get_bits(bitmap_page, page_no, zip_size,
- IBUF_BITMAP_FREE, mtr);
+ ib::error() << "page " << block->page.id << ", size "
+ << block->page.size.physical() << ", bitmap bits " << old_bits;
- fprintf(stderr,
- "InnoDB: space %lu, page %lu, zip_size %lu, bitmap bits %lu\n",
- (ulong) space, (ulong) page_no,
- (ulong) zip_size, (ulong) old_bits);
+ ib::error() << BUG_REPORT_MSG;
- fputs("InnoDB: Submit a detailed bug report"
- " to https://jira.mariadb.org/\n", stderr);
ut_ad(0);
DBUG_RETURN(NULL);
}
@@ -4022,64 +3866,61 @@ ibuf_insert_to_index_page(
ulint low_match;
page_t* page = buf_block_get_frame(block);
rec_t* rec;
- ulint* offsets;
+ offset_t* offsets;
mem_heap_t* heap;
DBUG_ENTER("ibuf_insert_to_index_page");
- DBUG_PRINT("ibuf", ("page_no: %ld", buf_block_get_page_no(block)));
- DBUG_PRINT("ibuf", ("index name: %s", index->name));
- DBUG_PRINT("ibuf", ("online status: %d",
- dict_index_get_online_status(index)));
+ DBUG_PRINT("ibuf", ("page " UINT32PF ":" UINT32PF,
+ block->page.id.space(),
+ block->page.id.page_no()));
+ ut_ad(!dict_index_is_online_ddl(index));// this is an ibuf_dummy index
ut_ad(ibuf_inside(mtr));
ut_ad(dtuple_check_typed(entry));
- ut_ad(!buf_block_align(page)->index);
+#ifdef BTR_CUR_HASH_ADAPT
+ /* A change buffer merge must occur before users are granted
+ any access to the page. No adaptive hash index entries may
+ point to a freshly read page. */
+ ut_ad(!block->index);
+ assert_block_ahi_empty(block);
+#endif /* BTR_CUR_HASH_ADAPT */
+ ut_ad(mtr->is_named_space(block->page.id.space()));
if (UNIV_UNLIKELY(dict_table_is_comp(index->table)
!= (ibool)!!page_is_comp(page))) {
- fputs("InnoDB: Trying to insert a record from"
- " the insert buffer to an index page\n"
- "InnoDB: but the 'compact' flag does not match!\n",
- stderr);
+ ib::warn() << "Trying to insert a record from the insert"
+ " buffer to an index page but the 'compact' flag does"
+ " not match!";
goto dump;
}
rec = page_rec_get_next(page_get_infimum_rec(page));
if (page_rec_is_supremum(rec)) {
- fputs("InnoDB: Trying to insert a record from"
- " the insert buffer to an index page\n"
- "InnoDB: but the index page is empty!\n",
- stderr);
+ ib::warn() << "Trying to insert a record from the insert"
+ " buffer to an index page but the index page"
+ " is empty!";
goto dump;
}
- if (UNIV_UNLIKELY(rec_get_n_fields(rec, index)
- != dtuple_get_n_fields(entry))) {
- fputs("InnoDB: Trying to insert a record from"
- " the insert buffer to an index page\n"
- "InnoDB: but the number of fields does not match!\n",
- stderr);
+ if (!rec_n_fields_is_sane(index, rec, entry)) {
+ ib::warn() << "Trying to insert a record from the insert"
+ " buffer to an index page but the number of fields"
+ " does not match!";
+ rec_print(stderr, rec, index);
dump:
- buf_page_print(page, 0);
-
dtuple_print(stderr, entry);
ut_ad(0);
- fputs("InnoDB: The table where where"
- " this index record belongs\n"
- "InnoDB: is now probably corrupt."
- " Please run CHECK TABLE on\n"
- "InnoDB: your tables.\n"
- "InnoDB: Submit a detailed bug report to"
- " https://jira.mariadb.org/\n", stderr);
+ ib::warn() << "The table where this index record belongs"
+ " is now probably corrupt. Please run CHECK TABLE on"
+ " your tables. " << BUG_REPORT_MSG;
DBUG_VOID_RETURN;
}
- low_match = page_cur_search(block, index, entry,
- PAGE_CUR_LE, &page_cur);
+ low_match = page_cur_search(block, index, entry, &page_cur);
heap = mem_heap_create(
sizeof(upd_t)
@@ -4097,8 +3938,8 @@ dump:
row_ins_sec_index_entry_by_modify(BTR_MODIFY_LEAF). */
ut_ad(rec_get_deleted_flag(rec, page_is_comp(page)));
- offsets = rec_get_offsets(rec, index, NULL, ULINT_UNDEFINED,
- &heap);
+ offsets = rec_get_offsets(rec, index, NULL, true,
+ ULINT_UNDEFINED, &heap);
update = row_upd_build_sec_rec_difference_binary(
rec, index, offsets, entry, heap);
@@ -4137,12 +3978,12 @@ dump:
just write dummy trx_id(0), roll_ptr(0) */
btr_cur_update_in_place_log(BTR_KEEP_SYS_FLAG, rec,
index, update, 0, 0, mtr);
+
DBUG_EXECUTE_IF(
"crash_after_log_ibuf_upd_inplace",
log_buffer_flush_to_disk();
- ib_logf(IB_LOG_LEVEL_INFO,
- "Wrote log record for ibuf update in "
- "place operation");
+ ib::info() << "Wrote log record for ibuf"
+ " update in place operation";
DBUG_SUICIDE();
);
@@ -4210,8 +4051,7 @@ ibuf_set_del_mark(
ut_ad(ibuf_inside(mtr));
ut_ad(dtuple_check_typed(entry));
- low_match = page_cur_search(
- block, index, entry, PAGE_CUR_LE, &page_cur);
+ low_match = page_cur_search(block, index, entry, &page_cur);
if (low_match == dtuple_get_n_fields(entry)) {
rec_t* rec;
@@ -4238,22 +4078,18 @@ ibuf_set_del_mark(
const buf_block_t* block
= page_cur_get_block(&page_cur);
- ut_print_timestamp(stderr);
- fputs(" InnoDB: unable to find a record to delete-mark\n",
- stderr);
+ ib::error() << "Unable to find a record to delete-mark";
fputs("InnoDB: tuple ", stderr);
dtuple_print(stderr, entry);
fputs("\n"
"InnoDB: record ", stderr);
rec_print(stderr, page_cur_get_rec(&page_cur), index);
- fprintf(stderr, "\nspace %u offset %u"
- " (%u records, index id %llu)\n"
- "InnoDB: Submit a detailed bug report"
- " to https://jira.mariadb.org/\n",
- (unsigned) buf_block_get_space(block),
- (unsigned) buf_block_get_page_no(block),
- (unsigned) page_get_n_recs(page),
- (ulonglong) btr_page_get_index_id(page));
+
+ ib::error() << "page " << block->page.id << " ("
+ << page_get_n_recs(page) << " records, index id "
+ << btr_page_get_index_id(page) << ").";
+
+ ib::error() << BUG_REPORT_MSG;
ut_ad(0);
}
}
@@ -4275,9 +4111,9 @@ ibuf_delete(
ut_ad(ibuf_inside(mtr));
ut_ad(dtuple_check_typed(entry));
+ ut_ad(!dict_index_is_spatial(index));
- low_match = page_cur_search(
- block, index, entry, PAGE_CUR_LE, &page_cur);
+ low_match = page_cur_search(block, index, entry, &page_cur);
if (low_match == dtuple_get_n_fields(entry)) {
page_zip_des_t* page_zip= buf_block_get_page_zip(block);
@@ -4287,35 +4123,33 @@ ibuf_delete(
/* TODO: the below should probably be a separate function,
it's a bastardized version of btr_cur_optimistic_delete. */
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- ulint* offsets = offsets_;
+ offset_t offsets_[REC_OFFS_NORMAL_SIZE];
+ offset_t* offsets = offsets_;
mem_heap_t* heap = NULL;
ulint max_ins_size = 0;
rec_offs_init(offsets_);
offsets = rec_get_offsets(
- rec, index, offsets, ULINT_UNDEFINED, &heap);
+ rec, index, offsets, true, ULINT_UNDEFINED, &heap);
if (page_get_n_recs(page) <= 1
|| !(REC_INFO_DELETED_FLAG
& rec_get_info_bits(rec, page_is_comp(page)))) {
/* Refuse to purge the last record or a
record that has not been marked for deletion. */
- ut_print_timestamp(stderr);
- fputs(" InnoDB: unable to purge a record\n",
- stderr);
+ ib::error() << "Unable to purge a record";
fputs("InnoDB: tuple ", stderr);
dtuple_print(stderr, entry);
fputs("\n"
"InnoDB: record ", stderr);
rec_print_new(stderr, rec, offsets);
- fprintf(stderr, "\nspace %u offset %u"
+ fprintf(stderr, "\nspace " UINT32PF " offset " UINT32PF
" (%u records, index id %llu)\n"
"InnoDB: Submit a detailed bug report"
" to https://jira.mariadb.org/\n",
- (unsigned) buf_block_get_space(block),
- (unsigned) buf_block_get_page_no(block),
+ block->page.id.space(),
+ block->page.id.page_no(),
(unsigned) page_get_n_recs(page),
(ulonglong) btr_page_get_index_id(page));
@@ -4354,7 +4188,7 @@ ibuf_delete(
/*********************************************************************//**
Restores insert buffer tree cursor position
-@return TRUE if the position was restored; FALSE if not */
+@return TRUE if the position was restored; FALSE if not */
static MY_ATTRIBUTE((nonnull))
ibool
ibuf_restore_pos(
@@ -4369,26 +4203,22 @@ ibuf_restore_pos(
position is to be restored */
mtr_t* mtr) /*!< in/out: mini-transaction */
{
- ut_ad(mode == BTR_MODIFY_LEAF || mode == BTR_MODIFY_TREE);
+ ut_ad(mode == BTR_MODIFY_LEAF
+ || BTR_LATCH_MODE_WITHOUT_INTENTION(mode) == BTR_MODIFY_TREE);
if (btr_pcur_restore_position(mode, pcur, mtr)) {
return(TRUE);
}
- if (fil_space_get_flags(space) == ULINT_UNDEFINED) {
- /* The tablespace has been dropped. It is possible
- that another thread has deleted the insert buffer
- entry. Do not complain. */
- ibuf_btr_pcur_commit_specify_mtr(pcur, mtr);
- } else {
- fprintf(stderr,
- "InnoDB: ERROR: Submit the output to"
- " https://jira.mariadb.org/\n"
- "InnoDB: ibuf cursor restoration fails!\n"
- "InnoDB: ibuf record inserted to page %lu:%lu\n",
- (ulong) space, (ulong) page_no);
- fflush(stderr);
+ if (fil_space_t* s = fil_space_acquire_silent(space)) {
+ ib::error() << "ibuf cursor restoration fails!"
+ " ibuf record inserted to page "
+ << space << ":" << page_no
+ << " in file " << s->chain.start->name;
+ fil_space_release(s);
+
+ ib::error() << BUG_REPORT_MSG;
rec_print_old(stderr, btr_pcur_get_rec(pcur));
rec_print_old(stderr, pcur->old_rec);
@@ -4396,12 +4226,9 @@ ibuf_restore_pos(
rec_print_old(stderr,
page_rec_get_next(btr_pcur_get_rec(pcur)));
- fflush(stderr);
-
- ibuf_btr_pcur_commit_specify_mtr(pcur, mtr);
- ut_ad(0);
}
+ ibuf_btr_pcur_commit_specify_mtr(pcur, mtr);
return(FALSE);
}
@@ -4409,7 +4236,7 @@ ibuf_restore_pos(
Deletes from ibuf the record on which pcur is positioned. If we have to
resort to a pessimistic delete, this function commits mtr and closes
the cursor.
-@return TRUE if mtr was committed and pcur closed in this operation */
+@return TRUE if mtr was committed and pcur closed in this operation */
static MY_ATTRIBUTE((warn_unused_result))
ibool
ibuf_delete_rec(
@@ -4432,25 +4259,11 @@ ibuf_delete_rec(
ut_ad(ibuf_rec_get_page_no(mtr, btr_pcur_get_rec(pcur)) == page_no);
ut_ad(ibuf_rec_get_space(mtr, btr_pcur_get_rec(pcur)) == space);
-#if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG
- if (ibuf_debug == 2) {
- /* Inject a fault (crash). We do this before trying
- optimistic delete, because a pessimistic delete in the
- change buffer would require a larger test case. */
-
- /* Flag the buffered record as processed, to avoid
- an assertion failure after crash recovery. */
- btr_cur_set_deleted_flag_for_ibuf(
- btr_pcur_get_rec(pcur), NULL, TRUE, mtr);
- ibuf_mtr_commit(mtr);
- log_write_up_to(LSN_MAX, LOG_WAIT_ALL_GROUPS, TRUE);
- DBUG_SUICIDE();
- }
-#endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */
-
success = btr_cur_optimistic_delete(btr_pcur_get_btr_cur(pcur),
0, mtr);
+ const page_id_t page_id(space, page_no);
+
if (success) {
if (page_is_empty(btr_pcur_get_page(pcur))) {
/* If a B-tree page is empty, it must be the root page
@@ -4468,14 +4281,6 @@ ibuf_delete_rec(
ibuf->empty = true;
}
-#ifdef UNIV_IBUF_COUNT_DEBUG
- fprintf(stderr,
- "Decrementing ibuf count of space %lu page %lu\n"
- "from %lu by 1\n", space, page_no,
- ibuf_count_get(space, page_no));
- ibuf_count_set(space, page_no,
- ibuf_count_get(space, page_no) - 1);
-#endif
return(FALSE);
}
@@ -4497,118 +4302,169 @@ ibuf_delete_rec(
mutex_enter(&ibuf_mutex);
if (!ibuf_restore_pos(space, page_no, search_tuple,
- BTR_MODIFY_TREE, pcur, mtr)) {
+ BTR_MODIFY_TREE | BTR_LATCH_FOR_DELETE,
+ pcur, mtr)) {
mutex_exit(&ibuf_mutex);
- ut_ad(mtr->state == MTR_COMMITTED);
+ ut_ad(mtr->has_committed());
goto func_exit;
}
root = ibuf_tree_root_get(mtr);
btr_cur_pessimistic_delete(&err, TRUE, btr_pcur_get_btr_cur(pcur), 0,
- RB_NONE, mtr);
+ false, mtr);
ut_a(err == DB_SUCCESS);
-#ifdef UNIV_IBUF_COUNT_DEBUG
- ibuf_count_set(space, page_no, ibuf_count_get(space, page_no) - 1);
-#endif
- ibuf_size_update(root, mtr);
+ ibuf_size_update(root);
mutex_exit(&ibuf_mutex);
ibuf->empty = page_is_empty(root);
ibuf_btr_pcur_commit_specify_mtr(pcur, mtr);
func_exit:
- ut_ad(mtr->state == MTR_COMMITTED);
+ ut_ad(mtr->has_committed());
btr_pcur_close(pcur);
return(TRUE);
}
-/*********************************************************************//**
-When an index page is read from a disk to the buffer pool, this function
+/**
+Delete any buffered entries for a page.
+This prevents an infinite loop on slow shutdown
+in the case where the change buffer bitmap claims that no buffered
+changes exist, while entries exist in the change buffer tree.
+@param page_id page number for which there should be no unbuffered changes */
+ATTRIBUTE_COLD void ibuf_delete_recs(const page_id_t page_id)
+{
+ ulint dops[IBUF_OP_COUNT];
+ mtr_t mtr;
+ btr_pcur_t pcur;
+ mem_heap_t* heap = mem_heap_create(512);
+ const dtuple_t* tuple = ibuf_search_tuple_build(
+ page_id.space(), page_id.page_no(), heap);
+ memset(dops, 0, sizeof(dops));
+
+loop:
+ ibuf_mtr_start(&mtr);
+ btr_pcur_open(ibuf->index, tuple, PAGE_CUR_GE, BTR_MODIFY_LEAF,
+ &pcur, &mtr);
+
+ if (!btr_pcur_is_on_user_rec(&pcur)) {
+ ut_ad(btr_pcur_is_after_last_on_page(&pcur));
+ goto func_exit;
+ }
+
+ for (;;) {
+ ut_ad(btr_pcur_is_on_user_rec(&pcur));
+
+ const rec_t* ibuf_rec = btr_pcur_get_rec(&pcur);
+
+ if (ibuf_rec_get_space(&mtr, ibuf_rec)
+ != page_id.space()
+ || ibuf_rec_get_page_no(&mtr, ibuf_rec)
+ != page_id.page_no()) {
+ break;
+ }
+
+ dops[ibuf_rec_get_op_type(&mtr, ibuf_rec)]++;
+
+ /* Delete the record from ibuf */
+ if (ibuf_delete_rec(page_id.space(), page_id.page_no(),
+ &pcur, tuple, &mtr)) {
+ /* Deletion was pessimistic and mtr was committed:
+ we start from the beginning again */
+ ut_ad(mtr.has_committed());
+ goto loop;
+ }
+
+ if (btr_pcur_is_after_last_on_page(&pcur)) {
+ ibuf_mtr_commit(&mtr);
+ btr_pcur_close(&pcur);
+ goto loop;
+ }
+ }
+
+func_exit:
+ ibuf_mtr_commit(&mtr);
+ btr_pcur_close(&pcur);
+
+ ibuf_add_ops(ibuf->n_discarded_ops, dops);
+
+ mem_heap_free(heap);
+}
+
+/** When an index page is read from a disk to the buffer pool, this function
applies any buffered operations to the page and deletes the entries from the
insert buffer. If the page is not read, but created in the buffer pool, this
function deletes its buffered entries from the insert buffer; there can
exist entries for such a page if the page belonged to an index which
-subsequently was dropped. */
-UNIV_INTERN
+subsequently was dropped.
+@param[in,out] block if page has been read from disk,
+pointer to the page x-latched, else NULL
+@param[in] page_id page id of the index page
+@param[in] update_ibuf_bitmap normally this is set to TRUE, but
+if we have deleted or are deleting the tablespace, then we naturally do not
+want to update a non-existent bitmap page */
void
ibuf_merge_or_delete_for_page(
-/*==========================*/
- buf_block_t* block, /*!< in: if page has been read from
- disk, pointer to the page x-latched,
- else NULL */
- ulint space_id,/*!< in: space id of the index page */
- ulint page_no,/*!< in: page number of the index page */
- ulint zip_size,/*!< in: compressed page size in bytes,
- or 0 */
- ibool update_ibuf_bitmap)/*!< in: normally this is set
- to TRUE, but if we have deleted or are
- deleting the tablespace, then we
- naturally do not want to update a
- non-existent bitmap page */
+ buf_block_t* block,
+ const page_id_t page_id,
+ const page_size_t* page_size,
+ ibool update_ibuf_bitmap)
{
- mem_heap_t* heap;
btr_pcur_t pcur;
- dtuple_t* search_tuple;
#ifdef UNIV_IBUF_DEBUG
ulint volume = 0;
-#endif
+#endif /* UNIV_IBUF_DEBUG */
page_zip_des_t* page_zip = NULL;
- ibool corruption_noticed = FALSE;
+ bool corruption_noticed = false;
mtr_t mtr;
- fil_space_t* space = NULL;
/* Counts for merged & discarded operations. */
ulint mops[IBUF_OP_COUNT];
ulint dops[IBUF_OP_COUNT];
- ut_ad(!block || buf_block_get_space(block) == space_id);
- ut_ad(!block || buf_block_get_page_no(block) == page_no);
- ut_ad(!block || buf_block_get_zip_size(block) == zip_size);
- ut_ad(!block || buf_block_get_io_fix(block) == BUF_IO_READ);
+ ut_ad(block == NULL || page_id == block->page.id);
+ ut_ad(block == NULL || buf_block_get_io_fix(block) == BUF_IO_READ
+ || recv_recovery_is_on());
if (srv_force_recovery >= SRV_FORCE_NO_IBUF_MERGE
- || trx_sys_hdr_page(space_id, page_no)) {
+ || trx_sys_hdr_page(page_id)
+ || fsp_is_system_temporary(page_id.space())) {
return;
}
- /* We cannot refer to zip_size in the following, because
- zip_size is passed as ULINT_UNDEFINED (it is unknown) when
- buf_read_ibuf_merge_pages() is merging (discarding) changes
- for a dropped tablespace. When block != NULL or
- update_ibuf_bitmap is specified, the zip_size must be known.
- That is why we will repeat the check below, with zip_size in
- place of 0. Passing zip_size as 0 assumes that the
+ /* We cannot refer to page_size in the following, because it is passed
+ as NULL (it is unknown) when buf_read_ibuf_merge_pages() is merging
+ (discarding) changes for a dropped tablespace. When block != NULL or
+ update_ibuf_bitmap is specified, then page_size must be known.
+ That is why we will repeat the check below, with page_size in
+ place of univ_page_size. Passing univ_page_size assumes that the
uncompressed page size always is a power-of-2 multiple of the
compressed page size. */
- if (ibuf_fixed_addr_page(space_id, 0, page_no)
- || fsp_descr_page(0, page_no)) {
+ if (ibuf_fixed_addr_page(page_id, univ_page_size)
+ || fsp_descr_page(page_id, univ_page_size)) {
return;
}
- if (UNIV_LIKELY(update_ibuf_bitmap)) {
- ut_a(ut_is_2pow(zip_size));
+ fil_space_t* space;
- if (ibuf_fixed_addr_page(space_id, zip_size, page_no)
- || fsp_descr_page(zip_size, page_no)) {
+ if (update_ibuf_bitmap) {
+
+ ut_ad(page_size != NULL);
+
+ if (ibuf_fixed_addr_page(page_id, *page_size)
+ || fsp_descr_page(page_id, *page_size)) {
return;
}
- /* If the following returns space, we get the counter
- incremented, and must decrement it when we leave this
- function. When the counter is > 0, that prevents tablespace
- from being dropped. */
-
- space = fil_space_acquire_silent(space_id);
+ space = fil_space_acquire_silent(page_id.space());
if (UNIV_UNLIKELY(!space)) {
- /* Do not try to read the bitmap page from space;
- just delete the ibuf records for the page */
-
+ /* Do not try to read the bitmap page from the
+ non-existent tablespace, delete the ibuf records */
block = NULL;
update_ibuf_bitmap = FALSE;
} else {
@@ -4618,39 +4474,47 @@ ibuf_merge_or_delete_for_page(
ibuf_mtr_start(&mtr);
bitmap_page = ibuf_bitmap_get_map_page(
- space_id, page_no, zip_size, &mtr);
+ page_id, *page_size, &mtr);
if (bitmap_page &&
fil_page_get_type(bitmap_page) != FIL_PAGE_TYPE_ALLOCATED) {
bitmap_bits = ibuf_bitmap_page_get_bits(
- bitmap_page, page_no, zip_size,
+ bitmap_page, page_id, *page_size,
IBUF_BITMAP_BUFFERED, &mtr);
}
ibuf_mtr_commit(&mtr);
if (!bitmap_bits) {
- /* No inserts buffered for this page */
-
- if (space) {
- fil_space_release(space);
+ /* No changes are buffered for this page. */
+
+ fil_space_release(space);
+ if (UNIV_UNLIKELY(srv_shutdown_state)
+ && !srv_fast_shutdown) {
+ /* Prevent an infinite loop on slow
+ shutdown, in case the bitmap bits are
+ wrongly clear even though buffered
+ changes exist. */
+ ibuf_delete_recs(page_id);
}
-
return;
}
}
- } else if (block
- && (ibuf_fixed_addr_page(space_id, zip_size, page_no)
- || fsp_descr_page(zip_size, page_no))) {
+ } else if (block != NULL
+ && (ibuf_fixed_addr_page(page_id, *page_size)
+ || fsp_descr_page(page_id, *page_size))) {
return;
+ } else {
+ space = NULL;
}
- heap = mem_heap_create(512);
+ mem_heap_t* heap = mem_heap_create(512);
- search_tuple = ibuf_search_tuple_build(space_id, page_no, heap);
+ const dtuple_t* search_tuple = ibuf_search_tuple_build(
+ page_id.space(), page_id.page_no(), heap);
- if (block) {
+ if (block != NULL) {
/* Move the ownership of the x-latch on the page to this OS
thread, so that we can acquire a second x-latch on it. This
is needed for the insert operations to the index page to pass
@@ -4659,54 +4523,20 @@ ibuf_merge_or_delete_for_page(
rw_lock_x_lock_move_ownership(&(block->lock));
page_zip = buf_block_get_page_zip(block);
- if (UNIV_UNLIKELY(fil_page_get_type(block->frame)
- != FIL_PAGE_INDEX)
- || UNIV_UNLIKELY(!page_is_leaf(block->frame))) {
-
- page_t* bitmap_page;
+ if (!fil_page_index_page_check(block->frame)
+ || !page_is_leaf(block->frame)) {
- corruption_noticed = TRUE;
+ corruption_noticed = true;
- ut_print_timestamp(stderr);
-
- ibuf_mtr_start(&mtr);
-
- fputs(" InnoDB: Dump of the ibuf bitmap page:\n",
- stderr);
-
- bitmap_page = ibuf_bitmap_get_map_page(space_id, page_no,
- zip_size, &mtr);
- if (bitmap_page == NULL)
- {
- fputs("InnoDB: cannot retrieve bitmap page\n",
- stderr);
- } else {
- buf_page_print(bitmap_page, 0);
- }
- ibuf_mtr_commit(&mtr);
-
- fputs("\nInnoDB: Dump of the page:\n", stderr);
-
- buf_page_print(block->frame, 0);
-
- fprintf(stderr,
- "InnoDB: Error: corruption in the tablespace."
- " Bitmap shows insert\n"
- "InnoDB: buffer records to page n:o %lu"
- " though the page\n"
- "InnoDB: type is %lu, which is"
- " not an index leaf page!\n"
- "InnoDB: We try to resolve the problem"
- " by skipping the insert buffer\n"
- "InnoDB: merge for this page."
- " Please run CHECK TABLE on your tables\n"
- "InnoDB: to determine if they are corrupt"
- " after this.\n\n"
- "InnoDB: Please submit a detailed bug report"
- " to https://jira.mariadb.org/\n\n",
- (ulong) page_no,
- (ulong)
- fil_page_get_type(block->frame));
+ ib::error() << "Corruption in the tablespace. Bitmap"
+ " shows insert buffer records to page "
+ << page_id << " though the page type is "
+ << fil_page_get_type(block->frame)
+ << ", which is not an index leaf page. We try"
+ " to resolve the problem by skipping the"
+ " insert buffer merge for this page. Please"
+ " run CHECK TABLE on your tables to determine"
+ " if they are corrupt after this.";
ut_ad(0);
}
}
@@ -4723,9 +4553,11 @@ loop:
ibuf->index, search_tuple, PAGE_CUR_GE, BTR_MODIFY_LEAF,
&pcur, &mtr);
- if (block) {
+ if (block != NULL) {
ibool success;
+ mtr.set_named_space(page_id.space());
+
success = buf_page_get_known_nowait(
RW_X_LATCH, block,
BUF_KEEP_OLD, __FILE__, __LINE__, &mtr);
@@ -4739,11 +4571,12 @@ loop:
the block is io-fixed. Other threads must not try to
latch an io-fixed block. */
buf_block_dbg_add_level(block, SYNC_IBUF_TREE_NODE);
+ } else if (update_ibuf_bitmap) {
+ mtr.set_named_space(page_id.space());
}
if (!btr_pcur_is_on_user_rec(&pcur)) {
- ut_ad(btr_pcur_is_after_last_in_tree(&pcur, &mtr));
-
+ ut_ad(btr_pcur_is_after_last_on_page(&pcur));
goto reset_bit;
}
@@ -4755,10 +4588,10 @@ loop:
rec = btr_pcur_get_rec(&pcur);
/* Check if the entry is for this index page */
- if (ibuf_rec_get_page_no(&mtr, rec) != page_no
- || ibuf_rec_get_space(&mtr, rec) != space_id) {
+ if (ibuf_rec_get_page_no(&mtr, rec) != page_id.page_no()
+ || ibuf_rec_get_space(&mtr, rec) != page_id.space()) {
- if (block) {
+ if (block != NULL) {
page_header_reset_last_insert(
block->frame, page_zip, &mtr);
}
@@ -4766,11 +4599,11 @@ loop:
goto reset_bit;
}
- if (UNIV_UNLIKELY(corruption_noticed)) {
+ if (corruption_noticed) {
fputs("InnoDB: Discarding record\n ", stderr);
rec_print_old(stderr, rec);
fputs("\nInnoDB: from the insert buffer!\n\n", stderr);
- } else if (block && !rec_get_deleted_flag(rec, 0)) {
+ } else if (block != NULL && !rec_get_deleted_flag(rec, 0)) {
/* Now we have at pcur a record which should be
applied on the index page; NOTE that the call below
copies pointers to fields in rec, and we must
@@ -4822,8 +4655,9 @@ loop:
ut_ad(rec == btr_pcur_get_rec(&pcur));
ut_ad(page_rec_is_user_rec(rec));
ut_ad(ibuf_rec_get_page_no(&mtr, rec)
- == page_no);
- ut_ad(ibuf_rec_get_space(&mtr, rec) == space_id);
+ == page_id.page_no());
+ ut_ad(ibuf_rec_get_space(&mtr, rec)
+ == page_id.space());
/* Mark the change buffer record processed,
so that it will not be merged again in case
@@ -4839,6 +4673,7 @@ loop:
ibuf_btr_pcur_commit_specify_mtr(&pcur, &mtr);
ibuf_mtr_start(&mtr);
+ mtr.set_named_space(page_id.space());
success = buf_page_get_known_nowait(
RW_X_LATCH, block,
@@ -4853,12 +4688,13 @@ loop:
buf_block_dbg_add_level(
block, SYNC_IBUF_TREE_NODE);
- if (!ibuf_restore_pos(space_id, page_no,
+ if (!ibuf_restore_pos(page_id.space(),
+ page_id.page_no(),
search_tuple,
BTR_MODIFY_LEAF,
&pcur, &mtr)) {
- ut_ad(mtr.state == MTR_COMMITTED);
+ ut_ad(mtr.has_committed());
mops[op]++;
ibuf_dummy_index_free(dummy_index);
goto loop;
@@ -4877,12 +4713,12 @@ loop:
}
/* Delete the record from ibuf */
- if (ibuf_delete_rec(space_id, page_no, &pcur, search_tuple,
- &mtr)) {
+ if (ibuf_delete_rec(page_id.space(), page_id.page_no(),
+ &pcur, search_tuple, &mtr)) {
/* Deletion was pessimistic and mtr was committed:
we start from the beginning again */
- ut_ad(mtr.state == MTR_COMMITTED);
+ ut_ad(mtr.has_committed());
goto loop;
} else if (btr_pcur_is_after_last_on_page(&pcur)) {
ibuf_mtr_commit(&mtr);
@@ -4893,66 +4729,50 @@ loop:
}
reset_bit:
- if (UNIV_LIKELY(update_ibuf_bitmap)) {
+ if (update_ibuf_bitmap) {
page_t* bitmap_page;
- bitmap_page = ibuf_bitmap_get_map_page(
- space_id, page_no, zip_size, &mtr);
+ bitmap_page = ibuf_bitmap_get_map_page(page_id, *page_size,
+ &mtr);
ibuf_bitmap_page_set_bits(
- bitmap_page, page_no, zip_size,
+ bitmap_page, page_id, *page_size,
IBUF_BITMAP_BUFFERED, FALSE, &mtr);
- if (block) {
+ if (block != NULL) {
ulint old_bits = ibuf_bitmap_page_get_bits(
- bitmap_page, page_no, zip_size,
+ bitmap_page, page_id, *page_size,
IBUF_BITMAP_FREE, &mtr);
- ulint new_bits = ibuf_index_page_calc_free(
- zip_size, block);
+ ulint new_bits = ibuf_index_page_calc_free(block);
if (old_bits != new_bits) {
ibuf_bitmap_page_set_bits(
- bitmap_page, page_no, zip_size,
+ bitmap_page, page_id, *page_size,
IBUF_BITMAP_FREE, new_bits, &mtr);
}
}
}
ibuf_mtr_commit(&mtr);
- btr_pcur_close(&pcur);
- mem_heap_free(heap);
-
-#ifdef HAVE_ATOMIC_BUILTINS
- os_atomic_increment_ulint(&ibuf->n_merges, 1);
- ibuf_add_ops(ibuf->n_merged_ops, mops);
- ibuf_add_ops(ibuf->n_discarded_ops, dops);
-#else /* HAVE_ATOMIC_BUILTINS */
- /* Protect our statistics keeping from race conditions */
- mutex_enter(&ibuf_mutex);
-
- ibuf->n_merges++;
- ibuf_add_ops(ibuf->n_merged_ops, mops);
- ibuf_add_ops(ibuf->n_discarded_ops, dops);
-
- mutex_exit(&ibuf_mutex);
-#endif /* HAVE_ATOMIC_BUILTINS */
if (space) {
fil_space_release(space);
}
-#ifdef UNIV_IBUF_COUNT_DEBUG
- ut_a(ibuf_count_get(space, page_no) == 0);
-#endif
+ btr_pcur_close(&pcur);
+ mem_heap_free(heap);
+
+ my_atomic_addlint(&ibuf->n_merges, 1);
+ ibuf_add_ops(ibuf->n_merged_ops, mops);
+ ibuf_add_ops(ibuf->n_discarded_ops, dops);
}
/*********************************************************************//**
Deletes all entries in the insert buffer for a given space id. This is used
-in DISCARD TABLESPACE and IMPORT TABLESPACE.
+in DISCARD TABLESPACE, IMPORT TABLESPACE, and 5.7 TRUNCATE TABLE recovery.
NOTE: this does not update the page free bitmaps in the space. The space will
become CORRUPT when you call this function! */
-UNIV_INTERN
void
ibuf_delete_for_discarded_space(
/*============================*/
@@ -4986,8 +4806,7 @@ loop:
&pcur, &mtr);
if (!btr_pcur_is_on_user_rec(&pcur)) {
- ut_ad(btr_pcur_is_after_last_in_tree(&pcur, &mtr));
-
+ ut_ad(btr_pcur_is_after_last_on_page(&pcur));
goto leave_loop;
}
@@ -5012,7 +4831,7 @@ loop:
/* Deletion was pessimistic and mtr was committed:
we start from the beginning again */
- ut_ad(mtr.state == MTR_COMMITTED);
+ ut_ad(mtr.has_committed());
goto loop;
}
@@ -5028,22 +4847,14 @@ leave_loop:
ibuf_mtr_commit(&mtr);
btr_pcur_close(&pcur);
-#ifdef HAVE_ATOMIC_BUILTINS
- ibuf_add_ops(ibuf->n_discarded_ops, dops);
-#else /* HAVE_ATOMIC_BUILTINS */
- /* Protect our statistics keeping from race conditions */
- mutex_enter(&ibuf_mutex);
ibuf_add_ops(ibuf->n_discarded_ops, dops);
- mutex_exit(&ibuf_mutex);
-#endif /* HAVE_ATOMIC_BUILTINS */
mem_heap_free(heap);
}
/******************************************************************//**
Looks if the insert buffer is empty.
-@return true if empty */
-UNIV_INTERN
+@return true if empty */
bool
ibuf_is_empty(void)
/*===============*/
@@ -5067,26 +4878,20 @@ ibuf_is_empty(void)
/******************************************************************//**
Prints info of ibuf. */
-UNIV_INTERN
void
ibuf_print(
/*=======*/
FILE* file) /*!< in: file where to print */
{
-#ifdef UNIV_IBUF_COUNT_DEBUG
- ulint i;
- ulint j;
-#endif
-
mutex_enter(&ibuf_mutex);
fprintf(file,
- "Ibuf: size %lu, free list len %lu,"
- " seg size %lu, %lu merges\n",
- (ulong) ibuf->size,
- (ulong) ibuf->free_list_len,
- (ulong) ibuf->seg_size,
- (ulong) ibuf->n_merges);
+ "Ibuf: size " ULINTPF ", free list len " ULINTPF ","
+ " seg size " ULINTPF ", " ULINTPF " merges\n",
+ ibuf->size,
+ ibuf->free_list_len,
+ ibuf->seg_size,
+ ibuf->n_merges);
fputs("merged operations:\n ", file);
ibuf_print_ops(ibuf->n_merged_ops, file);
@@ -5094,62 +4899,48 @@ ibuf_print(
fputs("discarded operations:\n ", file);
ibuf_print_ops(ibuf->n_discarded_ops, file);
-#ifdef UNIV_IBUF_COUNT_DEBUG
- for (i = 0; i < IBUF_COUNT_N_SPACES; i++) {
- for (j = 0; j < IBUF_COUNT_N_PAGES; j++) {
- ulint count = ibuf_count_get(i, j);
-
- if (count > 0) {
- fprintf(stderr,
- "Ibuf count for space/page %lu/%lu"
- " is %lu\n",
- (ulong) i, (ulong) j, (ulong) count);
- }
- }
- }
-#endif /* UNIV_IBUF_COUNT_DEBUG */
-
mutex_exit(&ibuf_mutex);
}
/******************************************************************//**
Checks the insert buffer bitmaps on IMPORT TABLESPACE.
@return DB_SUCCESS or error code */
-UNIV_INTERN
dberr_t
ibuf_check_bitmap_on_import(
/*========================*/
const trx_t* trx, /*!< in: transaction */
ulint space_id) /*!< in: tablespace identifier */
{
- ulint zip_size;
- ulint page_size;
- ulint size;
ulint page_no;
ut_ad(space_id);
ut_ad(trx->mysql_thd);
- zip_size = fil_space_get_zip_size(space_id);
-
- if (zip_size == ULINT_UNDEFINED) {
+ FilSpace space(space_id);
+ if (!space()) {
return(DB_TABLE_NOT_FOUND);
}
+ const page_size_t page_size(space->flags);
+ /* fil_space_t::size and fil_space_t::free_limit would still be 0
+ at this point. So, we will have to read page 0. */
+ ut_ad(!space->free_limit);
+ ut_ad(!space->size);
+
mtr_t mtr;
- mtr_start(&mtr);
- {
- buf_block_t* sp = buf_page_get(space_id, zip_size, 0,
- RW_S_LATCH, &mtr);
- if (sp) {
- size = mach_read_from_4(
- FSP_HEADER_OFFSET + FSP_FREE_LIMIT
- + sp->frame);
- } else {
- size = 0;
- }
+ ulint size;
+ mtr.start();
+ if (buf_block_t* sp = buf_page_get(page_id_t(space_id, 0), page_size,
+ RW_S_LATCH, &mtr)) {
+ size = std::min(
+ mach_read_from_4(FSP_HEADER_OFFSET + FSP_FREE_LIMIT
+ + sp->frame),
+ mach_read_from_4(FSP_HEADER_OFFSET + FSP_SIZE
+ + sp->frame));
+ } else {
+ size = 0;
}
- mtr_commit(&mtr);
+ mtr.commit();
if (size == 0) {
return(DB_TABLE_NOT_FOUND);
@@ -5157,9 +4948,13 @@ ibuf_check_bitmap_on_import(
mutex_enter(&ibuf_mutex);
- page_size = zip_size ? zip_size : UNIV_PAGE_SIZE;
+ /* The two bitmap pages (allocation bitmap and ibuf bitmap) repeat
+ every page_size pages. For example if page_size is 16 KiB, then the
+ two bitmap pages repeat every 16 KiB * 16384 = 256 MiB. In the loop
+ below page_no is measured in number of pages since the beginning of
+ the space, as usual. */
- for (page_no = 0; page_no < size; page_no += page_size) {
+ for (page_no = 0; page_no < size; page_no += page_size.physical()) {
page_t* bitmap_page;
ulint i;
@@ -5175,19 +4970,47 @@ ibuf_check_bitmap_on_import(
ibuf_enter(&mtr);
bitmap_page = ibuf_bitmap_get_map_page(
- space_id, page_no, zip_size, &mtr);
+ page_id_t(space_id, page_no), page_size, &mtr);
+
+ if (buf_is_zeroes(span<const byte>(bitmap_page,
+ page_size.physical()))) {
+ /* This means we got all-zero page instead of
+ ibuf bitmap page. The subsequent page should be
+ all-zero pages. */
+#ifdef UNIV_DEBUG
+ for (ulint curr_page = page_no + 1;
+ curr_page < page_size.physical(); curr_page++) {
+
+ buf_block_t* block = buf_page_get(
+ page_id_t(space_id, curr_page),
+ page_size,
+ RW_S_LATCH, &mtr);
+ page_t* page = buf_block_get_frame(block);
+ ut_ad(buf_is_zeroes(span<const byte>(
+ page, page_size.physical())));
+ }
+#endif /* UNIV_DEBUG */
+ ibuf_exit(&mtr);
+ mtr_commit(&mtr);
+ continue;
+ }
if (!bitmap_page) {
mutex_exit(&ibuf_mutex);
return DB_CORRUPTION;
}
- for (i = FSP_IBUF_BITMAP_OFFSET + 1; i < page_size; i++) {
+ for (i = FSP_IBUF_BITMAP_OFFSET + 1;
+ i < page_size.physical();
+ i++) {
+
const ulint offset = page_no + i;
+ const page_id_t cur_page_id(space_id, offset);
+
if (ibuf_bitmap_page_get_bits(
- bitmap_page, offset, zip_size,
- IBUF_BITMAP_IBUF, &mtr)) {
+ bitmap_page, cur_page_id, page_size,
+ IBUF_BITMAP_IBUF, &mtr)) {
mutex_exit(&ibuf_mutex);
ibuf_exit(&mtr);
@@ -5206,7 +5029,7 @@ ibuf_check_bitmap_on_import(
}
if (ibuf_bitmap_page_get_bits(
- bitmap_page, offset, zip_size,
+ bitmap_page, cur_page_id, page_size,
IBUF_BITMAP_BUFFERED, &mtr)) {
ib_errf(trx->mysql_thd,
@@ -5221,7 +5044,7 @@ ibuf_check_bitmap_on_import(
slightly corrupted tables can be
imported and dumped. Clear the bit. */
ibuf_bitmap_page_set_bits(
- bitmap_page, offset, zip_size,
+ bitmap_page, cur_page_id, page_size,
IBUF_BITMAP_BUFFERED, FALSE, &mtr);
}
}
@@ -5233,4 +5056,37 @@ ibuf_check_bitmap_on_import(
mutex_exit(&ibuf_mutex);
return(DB_SUCCESS);
}
-#endif /* !UNIV_HOTBACKUP */
+
+/** Updates free bits and buffered bits for bulk loaded page.
+@param[in] block index page
+@param[in] reset flag if reset free val */
+void
+ibuf_set_bitmap_for_bulk_load(
+ buf_block_t* block,
+ bool reset)
+{
+ page_t* bitmap_page;
+ mtr_t mtr;
+ ulint free_val;
+
+ ut_a(page_is_leaf(buf_block_get_frame(block)));
+
+ free_val = ibuf_index_page_calc_free(block);
+
+ mtr_start(&mtr);
+ mtr.set_named_space(block->page.id.space());
+
+ bitmap_page = ibuf_bitmap_get_map_page(block->page.id,
+ block->page.size, &mtr);
+
+ free_val = reset ? 0 : ibuf_index_page_calc_free(block);
+ ibuf_bitmap_page_set_bits(
+ bitmap_page, block->page.id, block->page.size,
+ IBUF_BITMAP_FREE, free_val, &mtr);
+
+ ibuf_bitmap_page_set_bits(
+ bitmap_page, block->page.id, block->page.size,
+ IBUF_BITMAP_BUFFERED, FALSE, &mtr);
+
+ mtr_commit(&mtr);
+}
diff --git a/storage/innobase/include/api0api.h b/storage/innobase/include/api0api.h
deleted file mode 100644
index 1d4882d1f04..00000000000
--- a/storage/innobase/include/api0api.h
+++ /dev/null
@@ -1,1312 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 2011, 2016, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/api0api.h
-InnoDB Native API
-
-2008-08-01 Created by Sunny Bains.
-3/20/2011 Jimmy Yang extracted from Embedded InnoDB
-*******************************************************/
-
-#ifndef api0api_h
-#define api0api_h
-
-#include "db0err.h"
-#include <stdio.h>
-
-#ifdef _MSC_VER
-#define strncasecmp _strnicmp
-#define strcasecmp _stricmp
-#endif
-
-#if defined(__GNUC__) && (__GNUC__ > 2) && ! defined(__INTEL_COMPILER)
-#define UNIV_NO_IGNORE MY_ATTRIBUTE ((warn_unused_result))
-#else
-#define UNIV_NO_IGNORE
-#endif /* __GNUC__ && __GNUC__ > 2 && !__INTEL_COMPILER */
-
-/* See comment about ib_bool_t as to why the two macros are unsigned long. */
-/** The boolean value of "true" used internally within InnoDB */
-#define IB_TRUE 0x1UL
-/** The boolean value of "false" used internally within InnoDB */
-#define IB_FALSE 0x0UL
-
-/* Basic types used by the InnoDB API. */
-/** All InnoDB error codes are represented by ib_err_t */
-typedef enum dberr_t ib_err_t;
-/** Representation of a byte within InnoDB */
-typedef unsigned char ib_byte_t;
-/** Representation of an unsigned long int within InnoDB */
-typedef unsigned long int ib_ulint_t;
-
-/* We assume C99 support except when using VisualStudio. */
-#if !defined(_MSC_VER)
-#include <stdint.h>
-#endif /* _MSC_VER */
-
-/* Integer types used by the API. Microsft VS defines its own types
-and we use the Microsoft types when building with Visual Studio. */
-#if defined(_MSC_VER)
-/** A signed 8 bit integral type. */
-typedef __int8 ib_i8_t;
-#else
-/** A signed 8 bit integral type. */
-typedef int8_t ib_i8_t;
-#endif
-
-#if defined(_MSC_VER)
-/** An unsigned 8 bit integral type. */
-typedef unsigned __int8 ib_u8_t;
-#else
-/** An unsigned 8 bit integral type. */
-typedef uint8_t ib_u8_t;
-#endif
-
-#if defined(_MSC_VER)
-/** A signed 16 bit integral type. */
-typedef __int16 ib_i16_t;
-#else
-/** A signed 16 bit integral type. */
-typedef int16_t ib_i16_t;
-#endif
-
-#if defined(_MSC_VER)
-/** An unsigned 16 bit integral type. */
-typedef unsigned __int16 ib_u16_t;
-#else
-/** An unsigned 16 bit integral type. */
-typedef uint16_t ib_u16_t;
-#endif
-
-#if defined(_MSC_VER)
-/** A signed 32 bit integral type. */
-typedef __int32 ib_i32_t;
-#else
-/** A signed 32 bit integral type. */
-typedef int32_t ib_i32_t;
-#endif
-
-#if defined(_MSC_VER)
-/** An unsigned 32 bit integral type. */
-typedef unsigned __int32 ib_u32_t;
-#else
-/** An unsigned 32 bit integral type. */
-typedef uint32_t ib_u32_t;
-#endif
-
-#if defined(_MSC_VER)
-/** A signed 64 bit integral type. */
-typedef __int64 ib_i64_t;
-#else
-/** A signed 64 bit integral type. */
-typedef int64_t ib_i64_t;
-#endif
-
-#if defined(_MSC_VER)
-/** An unsigned 64 bit integral type. */
-typedef unsigned __int64 ib_u64_t;
-#else
-/** An unsigned 64 bit integral type. */
-typedef uint64_t ib_u64_t;
-#endif
-
-typedef void* ib_opaque_t;
-typedef ib_opaque_t ib_charset_t;
-typedef ib_ulint_t ib_bool_t;
-typedef ib_u64_t ib_id_u64_t;
-
-/** @enum ib_cfg_type_t Possible types for a configuration variable. */
-typedef enum {
- IB_CFG_IBOOL, /*!< The configuration parameter is
- of type ibool */
-
- /* XXX Can we avoid having different types for ulint and ulong?
- - On Win64 "unsigned long" is 32 bits
- - ulong is always defined as "unsigned long"
- - On Win64 ulint is defined as 64 bit integer
- => On Win64 ulint != ulong.
- If we typecast all ulong and ulint variables to the smaller type
- ulong, then we will cut the range of the ulint variables.
- This is not a problem for most ulint variables because their max
- allowed values do not exceed 2^32-1 (e.g. log_groups is ulint
- but its max allowed value is 10). BUT buffer_pool_size and
- log_file_size allow up to 2^64-1. */
-
- IB_CFG_ULINT, /*!< The configuration parameter is
- of type ulint */
-
- IB_CFG_ULONG, /*!< The configuration parameter is
- of type ulong */
-
- IB_CFG_TEXT, /*!< The configuration parameter is
- of type char* */
-
- IB_CFG_CB /*!< The configuration parameter is
- a callback parameter */
-} ib_cfg_type_t;
-
-/** @enum ib_col_type_t column types that are supported. */
-typedef enum {
- IB_VARCHAR = 1, /*!< Character varying length. The
- column is not padded. */
-
- IB_CHAR = 2, /*!< Fixed length character string. The
- column is padded to the right. */
-
- IB_BINARY = 3, /*!< Fixed length binary, similar to
- IB_CHAR but the column is not padded
- to the right. */
-
- IB_VARBINARY = 4, /*!< Variable length binary */
-
- IB_BLOB = 5, /*!< Binary large object, or
- a TEXT type */
-
- IB_INT = 6, /*!< Integer: can be any size
- from 1 - 8 bytes. If the size is
- 1, 2, 4 and 8 bytes then you can use
- the typed read and write functions. For
- other sizes you will need to use the
- ib_col_get_value() function and do the
- conversion yourself. */
-
- IB_SYS = 8, /*!< System column, this column can
- be one of DATA_TRX_ID, DATA_ROLL_PTR
- or DATA_ROW_ID. */
-
- IB_FLOAT = 9, /*!< C (float) floating point value. */
-
- IB_DOUBLE = 10, /*!> C (double) floating point value. */
-
- IB_DECIMAL = 11, /*!< Decimal stored as an ASCII
- string */
-
- IB_VARCHAR_ANYCHARSET = 12, /*!< Any charset, varying length */
-
- IB_CHAR_ANYCHARSET = 13 /*!< Any charset, fixed length */
-
-} ib_col_type_t;
-
-/** @enum ib_tbl_fmt_t InnoDB table format types */
-typedef enum {
- IB_TBL_REDUNDANT, /*!< Redundant row format, the column
- type and length is stored in the row.*/
-
- IB_TBL_COMPACT, /*!< Compact row format, the column
- type is not stored in the row. The
- length is stored in the row but the
- storage format uses a compact format
- to store the length of the column data
- and record data storage format also
- uses less storage. */
-
- IB_TBL_DYNAMIC, /*!< Compact row format. BLOB prefixes
- are not stored in the clustered index */
-
- IB_TBL_COMPRESSED /*!< Similar to dynamic format but
- with pages compressed */
-} ib_tbl_fmt_t;
-
-/** @enum ib_col_attr_t InnoDB column attributes */
-typedef enum {
- IB_COL_NONE = 0, /*!< No special attributes. */
-
- IB_COL_NOT_NULL = 1, /*!< Column data can't be NULL. */
-
- IB_COL_UNSIGNED = 2, /*!< Column is IB_INT and unsigned. */
-
- IB_COL_NOT_USED = 4, /*!< Future use, reserved. */
-
- IB_COL_CUSTOM1 = 8, /*!< Custom precision type, this is
- a bit that is ignored by InnoDB and so
- can be set and queried by users. */
-
- IB_COL_CUSTOM2 = 16, /*!< Custom precision type, this is
- a bit that is ignored by InnoDB and so
- can be set and queried by users. */
-
- IB_COL_CUSTOM3 = 32 /*!< Custom precision type, this is
- a bit that is ignored by InnoDB and so
- can be set and queried by users. */
-} ib_col_attr_t;
-
-/* Note: must match lock0types.h */
-/** @enum ib_lck_mode_t InnoDB lock modes. */
-typedef enum {
- IB_LOCK_IS = 0, /*!< Intention shared, an intention
- lock should be used to lock tables */
-
- IB_LOCK_IX, /*!< Intention exclusive, an intention
- lock should be used to lock tables */
-
- IB_LOCK_S, /*!< Shared locks should be used to
- lock rows */
-
- IB_LOCK_X, /*!< Exclusive locks should be used to
- lock rows*/
-
- IB_LOCK_TABLE_X, /*!< exclusive table lock */
-
- IB_LOCK_NONE, /*!< This is used internally to note
- consistent read */
-
- IB_LOCK_NUM = IB_LOCK_NONE /*!< number of lock modes */
-} ib_lck_mode_t;
-
-typedef enum {
- IB_CLUSTERED = 1, /*!< clustered index */
- IB_UNIQUE = 2 /*!< unique index */
-} ib_index_type_t;
-
-/** @enum ib_srch_mode_t InnoDB cursor search modes for ib_cursor_moveto().
-Note: Values must match those found in page0cur.h */
-typedef enum {
- IB_CUR_G = 1, /*!< If search key is not found then
- position the cursor on the row that
- is greater than the search key */
-
- IB_CUR_GE = 2, /*!< If the search key not found then
- position the cursor on the row that
- is greater than or equal to the search
- key */
-
- IB_CUR_L = 3, /*!< If search key is not found then
- position the cursor on the row that
- is less than the search key */
-
- IB_CUR_LE = 4 /*!< If search key is not found then
- position the cursor on the row that
- is less than or equal to the search
- key */
-} ib_srch_mode_t;
-
-/** @enum ib_match_mode_t Various match modes used by ib_cursor_moveto() */
-typedef enum {
- IB_CLOSEST_MATCH, /*!< Closest match possible */
-
- IB_EXACT_MATCH, /*!< Search using a complete key
- value */
-
- IB_EXACT_PREFIX /*!< Search using a key prefix which
- must match to rows: the prefix may
- contain an incomplete field (the
- last field in prefix may be just
- a prefix of a fixed length column) */
-} ib_match_mode_t;
-
-/** @struct ib_col_meta_t InnoDB column meta data. */
-typedef struct {
- ib_col_type_t type; /*!< Type of the column */
-
- ib_col_attr_t attr; /*!< Column attributes */
-
- ib_u32_t type_len; /*!< Length of type */
-
- ib_u16_t client_type; /*!< 16 bits of data relevant only to
- the client. InnoDB doesn't care */
-
- ib_charset_t* charset; /*!< Column charset */
-} ib_col_meta_t;
-
-/* Note: Must be in sync with trx0trx.h */
-/** @enum ib_trx_state_t The transaction state can be queried using the
-ib_trx_state() function. The InnoDB deadlock monitor can roll back a
-transaction and users should be prepared for this, especially where there
-is high contention. The way to determine the state of the transaction is to
-query it's state and check. */
-typedef enum {
- IB_TRX_NOT_STARTED, /*!< Has not started yet, the
- transaction has not ben started yet.*/
-
- IB_TRX_ACTIVE, /*!< The transaction is currently
- active and needs to be either
- committed or rolled back. */
-
- IB_TRX_COMMITTED_IN_MEMORY, /*!< Not committed to disk yet */
-
- IB_TRX_PREPARED /*!< Support for 2PC/XA */
-} ib_trx_state_t;
-
-/* Note: Must be in sync with trx0trx.h */
-/** @enum ib_trx_level_t Transaction isolation levels */
-typedef enum {
- IB_TRX_READ_UNCOMMITTED = 0, /*!< Dirty read: non-locking SELECTs are
- performed so that we do not look at a
- possible earlier version of a record;
- thus they are not 'consistent' reads
- under this isolation level; otherwise
- like level 2 */
-
- IB_TRX_READ_COMMITTED = 1, /*!< Somewhat Oracle-like isolation,
- except that in range UPDATE and DELETE
- we must block phantom rows with
- next-key locks; SELECT ... FOR UPDATE
- and ... LOCK IN SHARE MODE only lock
- the index records, NOT the gaps before
- them, and thus allow free inserting;
- each consistent read reads its own
- snapshot */
-
- IB_TRX_REPEATABLE_READ = 2, /*!< All consistent reads in the same
- trx read the same snapshot; full
- next-key locking used in locking reads
- to block insertions into gaps */
-
- IB_TRX_SERIALIZABLE = 3 /*!< All plain SELECTs are converted to
- LOCK IN SHARE MODE reads */
-} ib_trx_level_t;
-
-/** Generical InnoDB callback prototype. */
-typedef void (*ib_cb_t)(void);
-
-#define IB_CFG_BINLOG_ENABLED 0x1
-#define IB_CFG_MDL_ENABLED 0x2
-#define IB_CFG_DISABLE_ROWLOCK 0x4
-
-/** The first argument to the InnoDB message logging function. By default
-it's set to stderr. You should treat ib_msg_stream_t as a void*, since
-it will probably change in the future. */
-typedef FILE* ib_msg_stream_t;
-
-/** All log messages are written to this function.It should have the same
-behavior as fprintf(3). */
-typedef int (*ib_msg_log_t)(ib_msg_stream_t, const char*, ...);
-
-/* Note: This is to make it easy for API users to have type
-checking for arguments to our functions. Making it ib_opaque_t
-by itself will result in pointer decay resulting in subverting
-of the compiler's type checking. */
-
-/** InnoDB tuple handle. This handle can refer to either a cluster index
-tuple or a secondary index tuple. There are two types of tuples for each
-type of index, making a total of four types of tuple handles. There
-is a tuple for reading the entire row contents and another for searching
-on the index key. */
-typedef struct ib_tuple_t* ib_tpl_t;
-
-/** InnoDB transaction handle, all database operations need to be covered
-by transactions. This handle represents a transaction. The handle can be
-created with ib_trx_begin(), you commit your changes with ib_trx_commit()
-and undo your changes using ib_trx_rollback(). If the InnoDB deadlock
-monitor rolls back the transaction then you need to free the transaction
-using the function ib_trx_release(). You can query the state of an InnoDB
-transaction by calling ib_trx_state(). */
-typedef struct trx_t* ib_trx_t;
-
-/** InnoDB cursor handle */
-typedef struct ib_cursor_t* ib_crsr_t;
-
-/*************************************************************//**
-This function is used to compare two data fields for which the data type
-is such that we must use the client code to compare them.
-
-@param col_meta column meta data
-@param p1 key
-@oaram p1_len key length
-@param p2 second key
-@param p2_len second key length
-@return 1, 0, -1, if a is greater, equal, less than b, respectively */
-
-typedef int (*ib_client_cmp_t)(
- const ib_col_meta_t* col_meta,
- const ib_byte_t* p1,
- ib_ulint_t p1_len,
- const ib_byte_t* p2,
- ib_ulint_t p2_len);
-
-/* This should be the same as univ.i */
-/** Represents SQL_NULL length */
-#define IB_SQL_NULL 0xFFFFFFFF
-/** The number of system columns in a row. */
-#define IB_N_SYS_COLS 3
-
-/** The maximum length of a text column. */
-#define MAX_TEXT_LEN 4096
-
-/* MySQL uses 3 byte UTF-8 encoding. */
-/** The maximum length of a column name in a table schema. */
-#define IB_MAX_COL_NAME_LEN (64 * 3)
-
-/** The maximum length of a table name (plus database name). */
-#define IB_MAX_TABLE_NAME_LEN (64 * 3) * 2
-
-/*****************************************************************//**
-Start a transaction that's been rolled back. This special function
-exists for the case when InnoDB's deadlock detector has rolledack
-a transaction. While the transaction has been rolled back the handle
-is still valid and can be reused by calling this function. If you
-don't want to reuse the transaction handle then you can free the handle
-by calling ib_trx_release().
-@return innobase txn handle */
-
-ib_err_t
-ib_trx_start(
-/*=========*/
- ib_trx_t ib_trx, /*!< in: transaction to restart */
- ib_trx_level_t ib_trx_level, /*!< in: trx isolation level */
- ib_bool_t read_write, /*!< in: true if read write
- transaction */
- ib_bool_t auto_commit, /*!< in: auto commit after each
- single DML */
- void* thd); /*!< in: THD */
-
-/*****************************************************************//**
-Begin a transaction. This will allocate a new transaction handle and
-put the transaction in the active state.
-@return innobase txn handle */
-
-ib_trx_t
-ib_trx_begin(
-/*=========*/
- ib_trx_level_t ib_trx_level, /*!< in: trx isolation level */
- ib_bool_t read_write, /*!< in: true if read write
- transaction */
- ib_bool_t auto_commit); /*!< in: auto commit after each
- single DML */
-
-/*****************************************************************//**
-Query the transaction's state. This function can be used to check for
-the state of the transaction in case it has been rolled back by the
-InnoDB deadlock detector. Note that when a transaction is selected as
-a victim for rollback, InnoDB will always return an appropriate error
-code indicating this. @see DB_DEADLOCK, @see DB_LOCK_TABLE_FULL and
-@see DB_LOCK_WAIT_TIMEOUT
-@return transaction state */
-
-ib_trx_state_t
-ib_trx_state(
-/*=========*/
- ib_trx_t ib_trx); /*!< in: trx handle */
-
-
-/*****************************************************************//**
-Check if the transaction is read_only */
-ib_u32_t
-ib_trx_read_only(
-/*=============*/
- ib_trx_t ib_trx); /*!< in: trx handle */
-
-/*****************************************************************//**
-Release the resources of the transaction. If the transaction was
-selected as a victim by InnoDB and rolled back then use this function
-to free the transaction handle.
-@return DB_SUCCESS or err code */
-
-ib_err_t
-ib_trx_release(
-/*===========*/
- ib_trx_t ib_trx); /*!< in: trx handle */
-
-/*****************************************************************//**
-Commit a transaction. This function will release the schema latches too.
-It will also free the transaction handle.
-@return DB_SUCCESS or err code */
-
-ib_err_t
-ib_trx_commit(
-/*==========*/
- ib_trx_t ib_trx); /*!< in: trx handle */
-
-/*****************************************************************//**
-Rollback a transaction. This function will release the schema latches too.
-It will also free the transaction handle.
-@return DB_SUCCESS or err code */
-
-ib_err_t
-ib_trx_rollback(
-/*============*/
- ib_trx_t ib_trx); /*!< in: trx handle */
-
-/*****************************************************************//**
-Open an InnoDB table and return a cursor handle to it.
-@return DB_SUCCESS or err code */
-
-ib_err_t
-ib_cursor_open_table_using_id(
-/*==========================*/
- ib_id_u64_t table_id, /*!< in: table id of table to open */
- ib_trx_t ib_trx, /*!< in: Current transaction handle
- can be NULL */
- ib_crsr_t* ib_crsr); /*!< out,own: InnoDB cursor */
-
-/*****************************************************************//**
-Open an InnoDB index and return a cursor handle to it.
-@return DB_SUCCESS or err code */
-
-ib_err_t
-ib_cursor_open_index_using_id(
-/*==========================*/
- ib_id_u64_t index_id, /*!< in: index id of index to open */
- ib_trx_t ib_trx, /*!< in: Current transaction handle
- can be NULL */
- ib_crsr_t* ib_crsr); /*!< out: InnoDB cursor */
-
-/*****************************************************************//**
-Open an InnoDB secondary index cursor and return a cursor handle to it.
-@return DB_SUCCESS or err code */
-
-ib_err_t
-ib_cursor_open_index_using_name(
-/*============================*/
- ib_crsr_t ib_open_crsr, /*!< in: open/active cursor */
- const char* index_name, /*!< in: secondary index name */
- ib_crsr_t* ib_crsr, /*!< out,own: InnoDB index cursor */
- int* idx_type, /*!< out: index is cluster index */
- ib_id_u64_t* idx_id); /*!< out: index id */
-
-/*****************************************************************//**
-Open an InnoDB table by name and return a cursor handle to it.
-@return DB_SUCCESS or err code */
-
-ib_err_t
-ib_cursor_open_table(
-/*=================*/
- const char* name, /*!< in: table name */
- ib_trx_t ib_trx, /*!< in: Current transaction handle
- can be NULL */
- ib_crsr_t* ib_crsr); /*!< out,own: InnoDB cursor */
-
-/*****************************************************************//**
-Reset the cursor.
-@return DB_SUCCESS or err code */
-
-ib_err_t
-ib_cursor_reset(
-/*============*/
- ib_crsr_t ib_crsr); /*!< in/out: InnoDB cursor */
-
-
-/*****************************************************************//**
-set a cursor trx to NULL*/
-
-void
-ib_cursor_clear_trx(
-/*================*/
- ib_crsr_t ib_crsr); /*!< in/out: InnoDB cursor */
-
-/*****************************************************************//**
-Close an InnoDB table and free the cursor.
-@return DB_SUCCESS or err code */
-
-ib_err_t
-ib_cursor_close(
-/*============*/
- ib_crsr_t ib_crsr); /*!< in/out: InnoDB cursor */
-
-/*****************************************************************//**
-Close the table, decrement n_ref_count count.
-@return DB_SUCCESS or err code */
-
-ib_err_t
-ib_cursor_close_table(
-/*==================*/
- ib_crsr_t ib_crsr); /*!< in/out: InnoDB cursor */
-
-/*****************************************************************//**
-update the cursor with new transactions and also reset the cursor
-@return DB_SUCCESS or err code */
-
-ib_err_t
-ib_cursor_new_trx(
-/*==============*/
- ib_crsr_t ib_crsr, /*!< in/out: InnoDB cursor */
- ib_trx_t ib_trx); /*!< in: transaction */
-
-/*****************************************************************//**
-Commit the transaction in a cursor
-@return DB_SUCCESS or err code */
-
-ib_err_t
-ib_cursor_commit_trx(
-/*=================*/
- ib_crsr_t ib_crsr, /*!< in/out: InnoDB cursor */
- ib_trx_t ib_trx); /*!< in: transaction */
-
-/********************************************************************//**
-Open a table using the table name, if found then increment table ref count.
-@return table instance if found */
-
-void*
-ib_open_table_by_name(
-/*==================*/
- const char* name); /*!< in: table name to lookup */
-
-/*****************************************************************//**
-Insert a row to a table.
-@return DB_SUCCESS or err code */
-
-ib_err_t
-ib_cursor_insert_row(
-/*=================*/
- ib_crsr_t ib_crsr, /*!< in/out: InnoDB cursor instance */
- const ib_tpl_t ib_tpl); /*!< in: tuple to insert */
-
-/*****************************************************************//**
-Update a row in a table.
-@return DB_SUCCESS or err code */
-
-ib_err_t
-ib_cursor_update_row(
-/*=================*/
- ib_crsr_t ib_crsr, /*!< in: InnoDB cursor instance */
- const ib_tpl_t ib_old_tpl, /*!< in: Old tuple in table */
- const ib_tpl_t ib_new_tpl); /*!< in: New tuple to update */
-
-/*****************************************************************//**
-Delete a row in a table.
-@return DB_SUCCESS or err code */
-
-ib_err_t
-ib_cursor_delete_row(
-/*=================*/
- ib_crsr_t ib_crsr); /*!< in: cursor instance */
-
-/*****************************************************************//**
-Read current row.
-@return DB_SUCCESS or err code */
-
-ib_err_t
-ib_cursor_read_row(
-/*===============*/
- ib_crsr_t ib_crsr, /*!< in: InnoDB cursor instance */
- ib_tpl_t ib_tpl, /*!< out: read cols into this tuple */
- void** row_buf, /*!< in/out: row buffer */
- ib_ulint_t* row_len); /*!< in/out: row buffer len */
-
-/*****************************************************************//**
-Move cursor to the first record in the table.
-@return DB_SUCCESS or err code */
-
-ib_err_t
-ib_cursor_first(
-/*============*/
- ib_crsr_t ib_crsr); /*!< in: InnoDB cursor instance */
-
-/*****************************************************************//**
-Move cursor to the last record in the table.
-@return DB_SUCCESS or err code */
-
-ib_err_t
-ib_cursor_last(
-/*===========*/
- ib_crsr_t ib_crsr); /*!< in: InnoDB cursor instance */
-
-/*****************************************************************//**
-Move cursor to the next record in the table.
-@return DB_SUCCESS or err code */
-
-ib_err_t
-ib_cursor_next(
-/*===========*/
- ib_crsr_t ib_crsr); /*!< in: InnoDB cursor instance */
-
-/*****************************************************************//**
-Search for key.
-@return DB_SUCCESS or err code */
-
-ib_err_t
-ib_cursor_moveto(
-/*=============*/
- ib_crsr_t ib_crsr, /*!< in: InnoDB cursor instance */
- ib_tpl_t ib_tpl, /*!< in: Key to search for */
- ib_srch_mode_t ib_srch_mode); /*!< in: search mode */
-
-/*****************************************************************//**
-Set the match mode for ib_cursor_move(). */
-
-void
-ib_cursor_set_match_mode(
-/*=====================*/
- ib_crsr_t ib_crsr, /*!< in: Cursor instance */
- ib_match_mode_t match_mode); /*!< in: ib_cursor_moveto match mode */
-
-/*****************************************************************//**
-Set a column of the tuple. Make a copy using the tuple's heap.
-@return DB_SUCCESS or error code */
-
-ib_err_t
-ib_col_set_value(
-/*=============*/
- ib_tpl_t ib_tpl, /*!< in: tuple instance */
- ib_ulint_t col_no, /*!< in: column index in tuple */
- const void* src, /*!< in: data value */
- ib_ulint_t len, /*!< in: data value len */
- ib_bool_t need_cpy); /*!< in: if need memcpy */
-
-
-/*****************************************************************//**
-Get the size of the data available in the column the tuple.
-@return bytes avail or IB_SQL_NULL */
-
-ib_ulint_t
-ib_col_get_len(
-/*===========*/
- ib_tpl_t ib_tpl, /*!< in: tuple instance */
- ib_ulint_t i); /*!< in: column index in tuple */
-
-/*****************************************************************//**
-Copy a column value from the tuple.
-@return bytes copied or IB_SQL_NULL */
-
-ib_ulint_t
-ib_col_copy_value(
-/*==============*/
- ib_tpl_t ib_tpl, /*!< in: tuple instance */
- ib_ulint_t i, /*!< in: column index in tuple */
- void* dst, /*!< out: copied data value */
- ib_ulint_t len); /*!< in: max data value len to copy */
-
-/*************************************************************//**
-Read a signed int 8 bit column from an InnoDB tuple.
-@return DB_SUCCESS or error */
-
-ib_err_t
-ib_tuple_read_i8(
-/*=============*/
- ib_tpl_t ib_tpl, /*!< in: InnoDB tuple */
- ib_ulint_t i, /*!< in: column number */
- ib_i8_t* ival); /*!< out: integer value */
-
-/*************************************************************//**
-Read an unsigned int 8 bit column from an InnoDB tuple.
-@return DB_SUCCESS or error */
-
-ib_err_t
-ib_tuple_read_u8(
-/*=============*/
- ib_tpl_t ib_tpl, /*!< in: InnoDB tuple */
- ib_ulint_t i, /*!< in: column number */
- ib_u8_t* ival); /*!< out: integer value */
-
-/*************************************************************//**
-Read a signed int 16 bit column from an InnoDB tuple.
-@return DB_SUCCESS or error */
-
-ib_err_t
-ib_tuple_read_i16(
-/*==============*/
- ib_tpl_t ib_tpl, /*!< in: InnoDB tuple */
- ib_ulint_t i, /*!< in: column number */
- ib_i16_t* ival); /*!< out: integer value */
-
-/*************************************************************//**
-Read an unsigned int 16 bit column from an InnoDB tuple.
-@return DB_SUCCESS or error */
-
-ib_err_t
-ib_tuple_read_u16(
-/*==============*/
- ib_tpl_t ib_tpl, /*!< in: InnoDB tuple */
- ib_ulint_t i, /*!< in: column number */
- ib_u16_t* ival); /*!< out: integer value */
-
-/*************************************************************//**
-Read a signed int 32 bit column from an InnoDB tuple.
-@return DB_SUCCESS or error */
-
-ib_err_t
-ib_tuple_read_i32(
-/*==============*/
- ib_tpl_t ib_tpl, /*!< in: InnoDB tuple */
- ib_ulint_t i, /*!< in: column number */
- ib_i32_t* ival); /*!< out: integer value */
-
-/*************************************************************//**
-Read an unsigned int 32 bit column from an InnoDB tuple.
-@return DB_SUCCESS or error */
-
-ib_err_t
-ib_tuple_read_u32(
-/*==============*/
- ib_tpl_t ib_tpl, /*!< in: InnoDB tuple */
- ib_ulint_t i, /*!< in: column number */
- ib_u32_t* ival); /*!< out: integer value */
-
-/*************************************************************//**
-Read a signed int 64 bit column from an InnoDB tuple.
-@return DB_SUCCESS or error */
-
-ib_err_t
-ib_tuple_read_i64(
-/*==============*/
- ib_tpl_t ib_tpl, /*!< in: InnoDB tuple */
- ib_ulint_t i, /*!< in: column number */
- ib_i64_t* ival); /*!< out: integer value */
-
-/*************************************************************//**
-Read an unsigned int 64 bit column from an InnoDB tuple.
-@return DB_SUCCESS or error */
-
-ib_err_t
-ib_tuple_read_u64(
-/*==============*/
- ib_tpl_t ib_tpl, /*!< in: InnoDB tuple */
- ib_ulint_t i, /*!< in: column number */
- ib_u64_t* ival); /*!< out: integer value */
-
-/*****************************************************************//**
-Get a column value pointer from the tuple.
-@return NULL or pointer to buffer */
-
-const void*
-ib_col_get_value(
-/*=============*/
- ib_tpl_t ib_tpl, /*!< in: InnoDB tuple */
- ib_ulint_t i); /*!< in: column number */
-
-/*****************************************************************//**
-Get a column type, length and attributes from the tuple.
-@return len of column data */
-
-ib_ulint_t
-ib_col_get_meta(
-/*============*/
- ib_tpl_t ib_tpl, /*!< in: InnoDB tuple */
- ib_ulint_t i, /*!< in: column number */
- ib_col_meta_t* ib_col_meta); /*!< out: column meta data */
-
-/*****************************************************************//**
-"Clear" or reset an InnoDB tuple. We free the heap and recreate the tuple.
-@return new tuple, or NULL */
-
-ib_tpl_t
-ib_tuple_clear(
-/*============*/
- ib_tpl_t ib_tpl); /*!< in: InnoDB tuple */
-
-/*****************************************************************//**
-Create a new cluster key search tuple and copy the contents of the
-secondary index key tuple columns that refer to the cluster index record
-to the cluster key. It does a deep copy of the column data.
-@return DB_SUCCESS or error code */
-
-ib_err_t
-ib_tuple_get_cluster_key(
-/*=====================*/
- ib_crsr_t ib_crsr, /*!< in: secondary index cursor */
- ib_tpl_t* ib_dst_tpl, /*!< out,own: destination tuple */
- const ib_tpl_t ib_src_tpl); /*!< in: source tuple */
-
-/*****************************************************************//**
-Copy the contents of source tuple to destination tuple. The tuples
-must be of the same type and belong to the same table/index.
-@return DB_SUCCESS or error code */
-
-ib_err_t
-ib_tuple_copy(
-/*==========*/
- ib_tpl_t ib_dst_tpl, /*!< in: destination tuple */
- const ib_tpl_t ib_src_tpl); /*!< in: source tuple */
-
-/*****************************************************************//**
-Create an InnoDB tuple used for index/table search.
-@return tuple for current index */
-
-ib_tpl_t
-ib_sec_search_tuple_create(
-/*=======================*/
- ib_crsr_t ib_crsr); /*!< in: Cursor instance */
-
-/*****************************************************************//**
-Create an InnoDB tuple used for index/table search.
-@return tuple for current index */
-
-ib_tpl_t
-ib_sec_read_tuple_create(
-/*=====================*/
- ib_crsr_t ib_crsr); /*!< in: Cursor instance */
-
-/*****************************************************************//**
-Create an InnoDB tuple used for table key operations.
-@return tuple for current table */
-
-ib_tpl_t
-ib_clust_search_tuple_create(
-/*=========================*/
- ib_crsr_t ib_crsr); /*!< in: Cursor instance */
-
-/*****************************************************************//**
-Create an InnoDB tuple for table row operations.
-@return tuple for current table */
-
-ib_tpl_t
-ib_clust_read_tuple_create(
-/*=======================*/
- ib_crsr_t ib_crsr); /*!< in: Cursor instance */
-
-/*****************************************************************//**
-Return the number of user columns in the tuple definition.
-@return number of user columns */
-
-ib_ulint_t
-ib_tuple_get_n_user_cols(
-/*=====================*/
- const ib_tpl_t ib_tpl); /*!< in: Tuple for current table */
-
-/*****************************************************************//**
-Return the number of columns in the tuple definition.
-@return number of columns */
-
-ib_ulint_t
-ib_tuple_get_n_cols(
-/*================*/
- const ib_tpl_t ib_tpl); /*!< in: Tuple for current table */
-
-/*****************************************************************//**
-Destroy an InnoDB tuple. */
-
-void
-ib_tuple_delete(
-/*============*/
- ib_tpl_t ib_tpl); /*!< in,own: Tuple instance to delete */
-
-/*****************************************************************//**
-Truncate a table. The cursor handle will be closed and set to NULL
-on success.
-@return DB_SUCCESS or error code */
-
-ib_err_t
-ib_cursor_truncate(
-/*===============*/
- ib_crsr_t* ib_crsr, /*!< in/out: cursor for table
- to truncate */
- ib_id_u64_t* table_id); /*!< out: new table id */
-
-/*****************************************************************//**
-Get a table id.
-@return DB_SUCCESS if found */
-
-ib_err_t
-ib_table_get_id(
-/*============*/
- const char* table_name, /*!< in: table to find */
- ib_id_u64_t* table_id); /*!< out: table id if found */
-
-/*****************************************************************//**
-Get an index id.
-@return DB_SUCCESS if found */
-
-ib_err_t
-ib_index_get_id(
-/*============*/
- const char* table_name, /*!< in: find index for this table */
- const char* index_name, /*!< in: index to find */
- ib_id_u64_t* index_id); /*!< out: index id if found */
-
-/*****************************************************************//**
-Check if cursor is positioned.
-@return IB_TRUE if positioned */
-
-ib_bool_t
-ib_cursor_is_positioned(
-/*====================*/
- const ib_crsr_t ib_crsr); /*!< in: InnoDB cursor instance */
-
-/*****************************************************************//**
-Checks if the data dictionary is latched in exclusive mode by a
-user transaction.
-@return TRUE if exclusive latch */
-
-ib_bool_t
-ib_schema_lock_is_exclusive(
-/*========================*/
- const ib_trx_t ib_trx); /*!< in: transaction */
-
-/*****************************************************************//**
-Lock an InnoDB cursor/table.
-@return DB_SUCCESS or error code */
-
-ib_err_t
-ib_cursor_lock(
-/*===========*/
- ib_crsr_t ib_crsr, /*!< in/out: InnoDB cursor */
- ib_lck_mode_t ib_lck_mode); /*!< in: InnoDB lock mode */
-
-/*****************************************************************//**
-Set the Lock an InnoDB table using the table id.
-@return DB_SUCCESS or error code */
-
-ib_err_t
-ib_table_lock(
-/*===========*/
- ib_trx_t ib_trx, /*!< in/out: transaction */
- ib_id_u64_t table_id, /*!< in: table id */
- ib_lck_mode_t ib_lck_mode); /*!< in: InnoDB lock mode */
-
-/*****************************************************************//**
-Set the Lock mode of the cursor.
-@return DB_SUCCESS or error code */
-
-ib_err_t
-ib_cursor_set_lock_mode(
-/*====================*/
- ib_crsr_t ib_crsr, /*!< in/out: InnoDB cursor */
- ib_lck_mode_t ib_lck_mode); /*!< in: InnoDB lock mode */
-
-/*****************************************************************//**
-Set need to access clustered index record flag. */
-
-void
-ib_cursor_set_cluster_access(
-/*=========================*/
- ib_crsr_t ib_crsr); /*!< in/out: InnoDB cursor */
-
-/*****************************************************************//**
-Write an integer value to a column. Integers are stored in big-endian
-format and will need to be converted from the host format.
-@return DB_SUCESS or error */
-
-ib_err_t
-ib_tuple_write_i8(
-/*==============*/
- ib_tpl_t ib_tpl, /*!< in/out: tuple to write to */
- int col_no, /*!< in: column number */
- ib_i8_t val); /*!< in: value to write */
-
-/*****************************************************************//**
-Write an integer value to a column. Integers are stored in big-endian
-format and will need to be converted from the host format.
-@return DB_SUCESS or error */
-
-ib_err_t
-ib_tuple_write_i16(
-/*=================*/
- ib_tpl_t ib_tpl, /*!< in/out: tuple to write to */
- int col_no, /*!< in: column number */
- ib_i16_t val); /*!< in: value to write */
-
-/*****************************************************************//**
-Write an integer value to a column. Integers are stored in big-endian
-format and will need to be converted from the host format.
-@return DB_SUCESS or error */
-
-ib_err_t
-ib_tuple_write_i32(
-/*===============*/
- ib_tpl_t ib_tpl, /*!< in/out: tuple to write to */
- int col_no, /*!< in: column number */
- ib_i32_t val); /*!< in: value to write */
-
-/*****************************************************************//**
-Write an integer value to a column. Integers are stored in big-endian
-format and will need to be converted from the host format.
-@return DB_SUCESS or error */
-
-ib_err_t
-ib_tuple_write_i64(
-/*===============*/
- ib_tpl_t ib_tpl, /*!< in/out: tuple to write to */
- int col_no, /*!< in: column number */
- ib_i64_t val); /*!< in: value to write */
-
-/*****************************************************************//**
-Write an integer value to a column. Integers are stored in big-endian
-format and will need to be converted from the host format.
-@return DB_SUCESS or error */
-
-ib_err_t
-ib_tuple_write_u8(
-/*==============*/
- ib_tpl_t ib_tpl, /*!< in/out: tuple to write to */
- int col_no, /*!< in: column number */
- ib_u8_t val); /*!< in: value to write */
-
-/*****************************************************************//**
-Write an integer value to a column. Integers are stored in big-endian
-format and will need to be converted from the host format.
-@return DB_SUCESS or error */
-
-ib_err_t
-ib_tuple_write_u16(
-/*===============*/
- ib_tpl_t ib_tpl, /*!< in/out: tuple to write to */
- int col_no, /*!< in: column number */
- ib_u16_t val); /*!< in: value to write */
-
-/*****************************************************************//**
-Write an integer value to a column. Integers are stored in big-endian
-format and will need to be converted from the host format.
-@return DB_SUCESS or error */
-
-ib_err_t
-ib_tuple_write_u32(
-/*=================*/
- ib_tpl_t ib_tpl, /*!< in/out: tuple to write to */
- int col_no, /*!< in: column number */
- ib_u32_t val); /*!< in: value to write */
-
-/*****************************************************************//**
-Write an integer value to a column. Integers are stored in big-endian
-format and will need to be converted from the host format.
-@return DB_SUCESS or error */
-
-ib_err_t
-ib_tuple_write_u64(
-/*===============*/
- ib_tpl_t ib_tpl, /*!< in/out: tuple to write to */
- int col_no, /*!< in: column number */
- ib_u64_t val); /*!< in: value to write */
-
-/*****************************************************************//**
-Inform the cursor that it's the start of an SQL statement. */
-
-void
-ib_cursor_stmt_begin(
-/*=================*/
- ib_crsr_t ib_crsr); /*!< in: cursor */
-
-/*****************************************************************//**
-Write a double value to a column.
-@return DB_SUCCESS or error */
-
-ib_err_t
-ib_tuple_write_double(
-/*==================*/
- ib_tpl_t ib_tpl, /*!< in: InnoDB tuple */
- int col_no, /*!< in: column number */
- double val); /*!< in: value to write */
-
-/*************************************************************//**
-Read a double column value from an InnoDB tuple.
-@return DB_SUCCESS or error */
-
-ib_err_t
-ib_tuple_read_double(
-/*=================*/
- ib_tpl_t ib_tpl, /*!< in: InnoDB tuple */
- ib_ulint_t col_no, /*!< in: column number */
- double* dval); /*!< out: double value */
-
-/*****************************************************************//**
-Write a float value to a column.
-@return DB_SUCCESS or error */
-
-ib_err_t
-ib_tuple_write_float(
-/*=================*/
- ib_tpl_t ib_tpl, /*!< in/out: tuple to write to */
- int col_no, /*!< in: column number */
- float val); /*!< in: value to write */
-
-/*************************************************************//**
-Read a float value from an InnoDB tuple.
-@return DB_SUCCESS or error */
-
-ib_err_t
-ib_tuple_read_float(
-/*================*/
- ib_tpl_t ib_tpl, /*!< in: InnoDB tuple */
- ib_ulint_t col_no, /*!< in: column number */
- float* fval); /*!< out: float value */
-
-/*****************************************************************//**
-Get a column type, length and attributes from the tuple.
-@return len of column data */
-
-const char*
-ib_col_get_name(
-/*============*/
- ib_crsr_t ib_crsr, /*!< in: InnoDB cursor instance */
- ib_ulint_t i); /*!< in: column index in tuple */
-
-/*****************************************************************//**
-Get an index field name from the cursor.
-@return name of the field */
-
-const char*
-ib_get_idx_field_name(
-/*==================*/
- ib_crsr_t ib_crsr, /*!< in: InnoDB cursor instance */
- ib_ulint_t i); /*!< in: column index in tuple */
-
-/*****************************************************************//**
-Truncate a table.
-@return DB_SUCCESS or error code */
-
-ib_err_t
-ib_table_truncate(
-/*==============*/
- const char* table_name, /*!< in: table name */
- ib_id_u64_t* table_id); /*!< out: new table id */
-
-/*****************************************************************//**
-Frees a possible InnoDB trx object associated with the current THD.
-@return DB_SUCCESS or error number */
-
-ib_err_t
-ib_close_thd(
-/*=========*/
- void* thd); /*!< in: handle to the MySQL
- thread of the user whose resources
- should be free'd */
-
-/*****************************************************************//**
-Get generic configure status
-@return configure status*/
-
-int
-ib_cfg_get_cfg();
-/*============*/
-
-/*****************************************************************//**
-Increase/decrease the memcached sync count of table to sync memcached
-DML with SQL DDLs.
-@return DB_SUCCESS or error number */
-ib_err_t
-ib_cursor_set_memcached_sync(
-/*=========================*/
- ib_crsr_t ib_crsr, /*!< in: cursor */
- ib_bool_t flag); /*!< in: true for increasing */
-
-/*****************************************************************//**
-Check whether the table name conforms to our requirements. Currently
-we only do a simple check for the presence of a '/'.
-@return DB_SUCCESS or err code */
-
-ib_err_t
-ib_table_name_check(
-/*================*/
- const char* name); /*!< in: table name to check */
-
-/*****************************************************************//**
-Return isolation configuration set by "innodb_api_trx_level"
-@return trx isolation level*/
-
-ib_trx_state_t
-ib_cfg_trx_level();
-/*==============*/
-
-/*****************************************************************//**
-Return configure value for background commit interval (in seconds)
-@return background commit interval (in seconds) */
-
-ib_ulint_t
-ib_cfg_bk_commit_interval();
-/*=======================*/
-
-/*****************************************************************//**
-Get a trx start time.
-@return trx start_time */
-
-ib_u64_t
-ib_trx_get_start_time(
-/*==================*/
- ib_trx_t ib_trx); /*!< in: transaction */
-
-#endif /* api0api_h */
diff --git a/storage/innobase/include/api0misc.h b/storage/innobase/include/api0misc.h
deleted file mode 100644
index 4827b53d249..00000000000
--- a/storage/innobase/include/api0misc.h
+++ /dev/null
@@ -1,78 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 2008, 2012, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/api0misc.h
-InnoDB Native API
-
-3/20/2011 Jimmy Yang extracted from Embedded InnoDB
-2008 Created by Sunny Bains
-*******************************************************/
-
-#ifndef api0misc_h
-#define api0misc_h
-
-#include "univ.i"
-#include "os0file.h"
-#include "que0que.h"
-#include "trx0trx.h"
-
-/** Whether binlog is enabled for applications using InnoDB APIs */
-extern my_bool ib_binlog_enabled;
-
-/** Whether MySQL MDL is enabled for applications using InnoDB APIs */
-extern my_bool ib_mdl_enabled;
-
-/** Whether InnoDB row lock is disabled for applications using InnoDB APIs */
-extern my_bool ib_disable_row_lock;
-
-/** configure value for transaction isolation level */
-extern ulong ib_trx_level_setting;
-
-/** configure value for background commit interval (in seconds) */
-extern ulong ib_bk_commit_interval;
-
-/********************************************************************
-Handles user errors and lock waits detected by the database engine.
-@return TRUE if it was a lock wait and we should continue running
-the query thread */
-UNIV_INTERN
-ibool
-ib_handle_errors(
-/*=============*/
- dberr_t* new_err, /*!< out: possible new error
- encountered in lock wait, or if
- no new error, the value of
- trx->error_state at the entry of this
- function */
- trx_t* trx, /*!< in: transaction */
- que_thr_t* thr, /*!< in: query thread */
- trx_savept_t* savept); /*!< in: savepoint or NULL */
-
-/*************************************************************************
-Sets a lock on a table.
-@return error code or DB_SUCCESS */
-UNIV_INTERN
-dberr_t
-ib_trx_lock_table_with_retry(
-/*=========================*/
- trx_t* trx, /*!< in/out: transaction */
- dict_table_t* table, /*!< in: table to lock */
- enum lock_mode mode); /*!< in: lock mode */
-
-#endif /* api0misc_h */
diff --git a/storage/innobase/include/btr0btr.h b/storage/innobase/include/btr0btr.h
index a5661c23fcb..c5ae633016a 100644
--- a/storage/innobase/include/btr0btr.h
+++ b/storage/innobase/include/btr0btr.h
@@ -2,7 +2,7 @@
Copyright (c) 1994, 2016, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2012, Facebook Inc.
-Copyright (c) 2014, 2015, MariaDB Corporation. All Rights Reserved.
+Copyright (c) 2014, 2019, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -28,15 +28,13 @@ Created 6/2/1994 Heikki Tuuri
#ifndef btr0btr_h
#define btr0btr_h
-#include "univ.i"
-
#include "dict0dict.h"
#include "data0data.h"
+#include "rem0types.h"
#include "page0cur.h"
-#include "mtr0mtr.h"
#include "btr0types.h"
+#include "gis0type.h"
-#ifndef UNIV_HOTBACKUP
/** Maximum record size which can be stored on a page, without using the
special big record storage structure */
#define BTR_PAGE_MAX_REC_SIZE (UNIV_PAGE_SIZE / 2 - 200)
@@ -67,55 +65,116 @@ enum btr_latch_mode {
/** Search the previous record. */
BTR_SEARCH_PREV = 35,
/** Modify the previous record. */
- BTR_MODIFY_PREV = 36
+ BTR_MODIFY_PREV = 36,
+ /** Start searching the entire B-tree. */
+ BTR_SEARCH_TREE = 37,
+ /** Continue searching the entire B-tree. */
+ BTR_CONT_SEARCH_TREE = 38,
+
+ /* BTR_INSERT, BTR_DELETE and BTR_DELETE_MARK are mutually
+ exclusive. */
+ /** The search tuple will be inserted to the secondary index
+ at the searched position. When the leaf page is not in the
+ buffer pool, try to use the change buffer. */
+ BTR_INSERT = 512,
+
+ /** Try to delete mark a secondary index leaf page record at
+ the searched position using the change buffer when the page is
+ not in the buffer pool. */
+ BTR_DELETE_MARK = 4096,
+
+ /** Try to purge the record using the change buffer when the
+ secondary index leaf page is not in the buffer pool. */
+ BTR_DELETE = 8192,
+
+ /** The caller is already holding dict_index_t::lock S-latch. */
+ BTR_ALREADY_S_LATCHED = 16384,
+ /** Search and S-latch a leaf page, assuming that the
+ dict_index_t::lock S-latch is being held. */
+ BTR_SEARCH_LEAF_ALREADY_S_LATCHED = BTR_SEARCH_LEAF
+ | BTR_ALREADY_S_LATCHED,
+ /** Search the entire index tree, assuming that the
+ dict_index_t::lock S-latch is being held. */
+ BTR_SEARCH_TREE_ALREADY_S_LATCHED = BTR_SEARCH_TREE
+ | BTR_ALREADY_S_LATCHED,
+ /** Search and X-latch a leaf page, assuming that the
+ dict_index_t::lock S-latch is being held. */
+ BTR_MODIFY_LEAF_ALREADY_S_LATCHED = BTR_MODIFY_LEAF
+ | BTR_ALREADY_S_LATCHED,
+
+ /** Attempt to delete-mark a secondary index record. */
+ BTR_DELETE_MARK_LEAF = BTR_MODIFY_LEAF | BTR_DELETE_MARK,
+ /** Attempt to delete-mark a secondary index record
+ while holding the dict_index_t::lock S-latch. */
+ BTR_DELETE_MARK_LEAF_ALREADY_S_LATCHED = BTR_DELETE_MARK_LEAF
+ | BTR_ALREADY_S_LATCHED,
+ /** Attempt to purge a secondary index record. */
+ BTR_PURGE_LEAF = BTR_MODIFY_LEAF | BTR_DELETE,
+ /** Attempt to purge a secondary index record
+ while holding the dict_index_t::lock S-latch. */
+ BTR_PURGE_LEAF_ALREADY_S_LATCHED = BTR_PURGE_LEAF
+ | BTR_ALREADY_S_LATCHED,
+
+ /** In the case of BTR_MODIFY_TREE, the caller specifies
+ the intention to delete record only. It is used to optimize
+ block->lock range.*/
+ BTR_LATCH_FOR_DELETE = 65536,
+
+ /** Attempt to purge a secondary index record in the tree. */
+ BTR_PURGE_TREE = BTR_MODIFY_TREE | BTR_LATCH_FOR_DELETE
};
-/* BTR_INSERT, BTR_DELETE and BTR_DELETE_MARK are mutually exclusive. */
-
-/** If this is ORed to btr_latch_mode, it means that the search tuple
-will be inserted to the index, at the searched position.
-When the record is not in the buffer pool, try to use the insert buffer. */
-#define BTR_INSERT 512
-
/** This flag ORed to btr_latch_mode says that we do the search in query
optimization */
-#define BTR_ESTIMATE 1024
+#define BTR_ESTIMATE 1024U
/** This flag ORed to BTR_INSERT says that we can ignore possible
UNIQUE definition on secondary indexes when we decide if we can use
the insert buffer to speed up inserts */
-#define BTR_IGNORE_SEC_UNIQUE 2048
-
-/** Try to delete mark the record at the searched position using the
-insert/delete buffer when the record is not in the buffer pool. */
-#define BTR_DELETE_MARK 4096
-
-/** Try to purge the record at the searched position using the insert/delete
-buffer when the record is not in the buffer pool. */
-#define BTR_DELETE 8192
-
-/** In the case of BTR_SEARCH_LEAF or BTR_MODIFY_LEAF, the caller is
-already holding an S latch on the index tree */
-#define BTR_ALREADY_S_LATCHED 16384
-
-#define BTR_LATCH_MODE_WITHOUT_FLAGS(latch_mode) \
- ((latch_mode) & ~(BTR_INSERT \
- | BTR_DELETE_MARK \
- | BTR_DELETE \
- | BTR_ESTIMATE \
- | BTR_IGNORE_SEC_UNIQUE \
- | BTR_ALREADY_S_LATCHED))
-#endif /* UNIV_HOTBACKUP */
+#define BTR_IGNORE_SEC_UNIQUE 2048U
+
+/** In the case of BTR_MODIFY_TREE, the caller specifies the intention
+to insert record only. It is used to optimize block->lock range.*/
+#define BTR_LATCH_FOR_INSERT 32768U
+
+/** This flag is for undo insert of rtree. For rtree, we need this flag
+to find proper rec to undo insert.*/
+#define BTR_RTREE_UNDO_INS 131072U
+
+/** In the case of BTR_MODIFY_LEAF, the caller intends to allocate or
+free the pages of externally stored fields. */
+#define BTR_MODIFY_EXTERNAL 262144U
+
+/** Try to delete mark the record at the searched position when the
+record is in spatial index */
+#define BTR_RTREE_DELETE_MARK 524288U
+
+#define BTR_LATCH_MODE_WITHOUT_FLAGS(latch_mode) \
+ ((latch_mode) & btr_latch_mode(~(BTR_INSERT \
+ | BTR_DELETE_MARK \
+ | BTR_RTREE_UNDO_INS \
+ | BTR_RTREE_DELETE_MARK \
+ | BTR_DELETE \
+ | BTR_ESTIMATE \
+ | BTR_IGNORE_SEC_UNIQUE \
+ | BTR_ALREADY_S_LATCHED \
+ | BTR_LATCH_FOR_INSERT \
+ | BTR_LATCH_FOR_DELETE \
+ | BTR_MODIFY_EXTERNAL)))
+
+#define BTR_LATCH_MODE_WITHOUT_INTENTION(latch_mode) \
+ ((latch_mode) & btr_latch_mode(~(BTR_LATCH_FOR_INSERT \
+ | BTR_LATCH_FOR_DELETE \
+ | BTR_MODIFY_EXTERNAL)))
/**************************************************************//**
Report that an index page is corrupted. */
-UNIV_INTERN
void
btr_corruption_report(
/*==================*/
const buf_block_t* block, /*!< in: corrupted block */
const dict_index_t* index) /*!< in: index tree */
- UNIV_COLD MY_ATTRIBUTE((nonnull));
+ ATTRIBUTE_COLD __attribute__((nonnull));
/** Assert that a B-tree page is not corrupted.
@param block buffer block containing a B-tree page
@@ -127,96 +186,9 @@ btr_corruption_report(
ut_error; \
}
-#ifndef UNIV_HOTBACKUP
-#ifdef UNIV_BLOB_DEBUG
-# include "ut0rbt.h"
-/** An index->blobs entry for keeping track of off-page column references */
-struct btr_blob_dbg_t
-{
- unsigned blob_page_no:32; /*!< first BLOB page number */
- unsigned ref_page_no:32; /*!< referring page number */
- unsigned ref_heap_no:16; /*!< referring heap number */
- unsigned ref_field_no:10; /*!< referring field number */
- unsigned owner:1; /*!< TRUE if BLOB owner */
- unsigned always_owner:1; /*!< TRUE if always
- has been the BLOB owner;
- reset to TRUE on B-tree
- page splits and merges */
- unsigned del:1; /*!< TRUE if currently
- delete-marked */
-};
-
-/**************************************************************//**
-Add a reference to an off-page column to the index->blobs map. */
-UNIV_INTERN
-void
-btr_blob_dbg_add_blob(
-/*==================*/
- const rec_t* rec, /*!< in: clustered index record */
- ulint field_no, /*!< in: number of off-page column */
- ulint page_no, /*!< in: start page of the column */
- dict_index_t* index, /*!< in/out: index tree */
- const char* ctx) /*!< in: context (for logging) */
- MY_ATTRIBUTE((nonnull));
/**************************************************************//**
-Display the references to off-page columns.
-This function is to be called from a debugger,
-for example when a breakpoint on ut_dbg_assertion_failed is hit. */
-UNIV_INTERN
-void
-btr_blob_dbg_print(
-/*===============*/
- const dict_index_t* index) /*!< in: index tree */
- MY_ATTRIBUTE((nonnull));
-/**************************************************************//**
-Check that there are no references to off-page columns from or to
-the given page. Invoked when freeing or clearing a page.
-@return TRUE when no orphan references exist */
-UNIV_INTERN
-ibool
-btr_blob_dbg_is_empty(
-/*==================*/
- dict_index_t* index, /*!< in: index */
- ulint page_no) /*!< in: page number */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-
-/**************************************************************//**
-Modify the 'deleted' flag of a record. */
-UNIV_INTERN
-void
-btr_blob_dbg_set_deleted_flag(
-/*==========================*/
- const rec_t* rec, /*!< in: record */
- dict_index_t* index, /*!< in/out: index */
- const ulint* offsets,/*!< in: rec_get_offs(rec, index) */
- ibool del) /*!< in: TRUE=deleted, FALSE=exists */
- MY_ATTRIBUTE((nonnull));
-/**************************************************************//**
-Change the ownership of an off-page column. */
-UNIV_INTERN
-void
-btr_blob_dbg_owner(
-/*===============*/
- const rec_t* rec, /*!< in: record */
- dict_index_t* index, /*!< in/out: index */
- const ulint* offsets,/*!< in: rec_get_offs(rec, index) */
- ulint i, /*!< in: ith field in rec */
- ibool own) /*!< in: TRUE=owned, FALSE=disowned */
- MY_ATTRIBUTE((nonnull));
-/** Assert that there are no BLOB references to or from the given page. */
-# define btr_blob_dbg_assert_empty(index, page_no) \
- ut_a(btr_blob_dbg_is_empty(index, page_no))
-#else /* UNIV_BLOB_DEBUG */
-# define btr_blob_dbg_add_blob(rec, field_no, page, index, ctx) ((void) 0)
-# define btr_blob_dbg_set_deleted_flag(rec, index, offsets, del)((void) 0)
-# define btr_blob_dbg_owner(rec, index, offsets, i, val) ((void) 0)
-# define btr_blob_dbg_assert_empty(index, page_no) ((void) 0)
-#endif /* UNIV_BLOB_DEBUG */
-
-/**************************************************************//**
-Gets the root node of a tree and x-latches it.
-@return root page, x-latched */
-UNIV_INTERN
+Gets the root node of a tree and sx-latches it for segment access.
+@return root page, sx-latched */
page_t*
btr_root_get(
/*=========*/
@@ -227,150 +199,90 @@ btr_root_get(
/**************************************************************//**
Checks and adjusts the root node of a tree during IMPORT TABLESPACE.
@return error code, or DB_SUCCESS */
-UNIV_INTERN
dberr_t
btr_root_adjust_on_import(
/*======================*/
const dict_index_t* index) /*!< in: index tree */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
+ MY_ATTRIBUTE((warn_unused_result));
/**************************************************************//**
Gets the height of the B-tree (the level of the root, when the leaf
level is assumed to be 0). The caller must hold an S or X latch on
the index.
-@return tree height (level of the root) */
-UNIV_INTERN
+@return tree height (level of the root) */
ulint
btr_height_get(
/*===========*/
dict_index_t* index, /*!< in: index tree */
mtr_t* mtr) /*!< in/out: mini-transaction */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-/**************************************************************//**
-Gets a buffer page and declares its latching order level. */
+ MY_ATTRIBUTE((warn_unused_result));
+
+/** Gets a buffer page and declares its latching order level.
+@param[in] page_id page id
+@param[in] mode latch mode
+@param[in] file file name
+@param[in] line line where called
+@param[in] index index tree, may be NULL if it is not an insert buffer
+tree
+@param[in,out] mtr mini-transaction
+@return block */
UNIV_INLINE
buf_block_t*
btr_block_get_func(
-/*===============*/
- ulint space, /*!< in: space id */
- ulint zip_size, /*!< in: compressed page size in bytes
- or 0 for uncompressed pages */
- ulint page_no, /*!< in: page number */
- ulint mode, /*!< in: latch mode */
- const char* file, /*!< in: file name */
- ulint line, /*!< in: line where called */
- dict_index_t* index, /*!< in: index tree, may be NULL
- if it is not an insert buffer tree */
- mtr_t* mtr); /*!< in/out: mini-transaction */
-# ifdef UNIV_SYNC_DEBUG
-/** Gets a buffer page and declares its latching order level.
-@param space tablespace identifier
-@param zip_size compressed page size in bytes or 0 for uncompressed pages
-@param page_no page number
-@param mode latch mode
-@param index index tree, may be NULL if not the insert buffer tree
-@param mtr mini-transaction handle
-@return the block descriptor */
-# define btr_block_get(space,zip_size,page_no,mode,index,mtr) \
- btr_block_get_func(space,zip_size,page_no,mode, \
- __FILE__,__LINE__,index,mtr)
-# else /* UNIV_SYNC_DEBUG */
+ const page_id_t page_id,
+ const page_size_t& page_size,
+ ulint mode,
+ const char* file,
+ unsigned line,
+ dict_index_t* index,
+ mtr_t* mtr);
+
/** Gets a buffer page and declares its latching order level.
-@param space tablespace identifier
-@param zip_size compressed page size in bytes or 0 for uncompressed pages
-@param page_no page number
-@param mode latch mode
-@param idx index tree, may be NULL if not the insert buffer tree
-@param mtr mini-transaction handle
+@param page_id tablespace/page identifier
+@param page_size page size
+@param mode latch mode
+@param index index tree, may be NULL if not the insert buffer tree
+@param mtr mini-transaction handle
@return the block descriptor */
-# define btr_block_get(space,zip_size,page_no,mode,idx,mtr) \
- btr_block_get_func(space,zip_size,page_no,mode, \
- __FILE__,__LINE__,idx,mtr)
-# endif /* UNIV_SYNC_DEBUG */
-/** Gets a buffer page and declares its latching order level.
-@param space tablespace identifier
-@param zip_size compressed page size in bytes or 0 for uncompressed pages
-@param page_no page number
-@param mode latch mode
-@param idx index tree, may be NULL if not the insert buffer tree
-@param mtr mini-transaction handle
-@return the uncompressed page frame */
-UNIV_INLINE
-page_t*
-btr_page_get(
-/*=========*/
- ulint space,
- ulint zip_size,
- ulint root_page_no,
- ulint mode,
- dict_index_t* index,
- mtr_t* mtr)
- MY_ATTRIBUTE((warn_unused_result));
-#endif /* !UNIV_HOTBACKUP */
+# define btr_block_get(page_id, page_size, mode, index, mtr) \
+ btr_block_get_func(page_id, page_size, mode, \
+ __FILE__, __LINE__, (dict_index_t*)index, mtr)
/**************************************************************//**
Gets the index id field of a page.
-@return index id */
+@return index id */
UNIV_INLINE
index_id_t
btr_page_get_index_id(
/*==================*/
const page_t* page) /*!< in: index page */
- MY_ATTRIBUTE((nonnull, pure, warn_unused_result));
-#ifndef UNIV_HOTBACKUP
+ MY_ATTRIBUTE((warn_unused_result));
/********************************************************//**
Gets the node level field in an index page.
-@return level, leaf level == 0 */
+@return level, leaf level == 0 */
UNIV_INLINE
ulint
btr_page_get_level_low(
/*===================*/
const page_t* page) /*!< in: index page */
- MY_ATTRIBUTE((nonnull, pure, warn_unused_result));
+ MY_ATTRIBUTE((warn_unused_result));
#define btr_page_get_level(page, mtr) btr_page_get_level_low(page)
-/********************************************************//**
-Gets the next index page number.
-@return next page number */
-UNIV_INLINE
-ulint
-btr_page_get_next(
-/*==============*/
- const page_t* page, /*!< in: index page */
- mtr_t* mtr) /*!< in: mini-transaction handle */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-/********************************************************//**
-Gets the previous index page number.
-@return prev page number */
-UNIV_INLINE
-ulint
-btr_page_get_prev(
-/*==============*/
- const page_t* page, /*!< in: index page */
- mtr_t* mtr) /*!< in: mini-transaction handle */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-/*************************************************************//**
-Gets pointer to the previous user record in the tree. It is assumed
-that the caller has appropriate latches on the page and its neighbor.
-@return previous user record, NULL if there is none */
-UNIV_INTERN
-rec_t*
-btr_get_prev_user_rec(
-/*==================*/
- rec_t* rec, /*!< in: record on leaf level */
- mtr_t* mtr) /*!< in: mtr holding a latch on the page, and if
- needed, also to the previous page */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-/*************************************************************//**
-Gets pointer to the next user record in the tree. It is assumed
-that the caller has appropriate latches on the page and its neighbor.
-@return next user record, NULL if there is none */
-UNIV_INTERN
-rec_t*
-btr_get_next_user_rec(
-/*==================*/
- rec_t* rec, /*!< in: record on leaf level */
- mtr_t* mtr) /*!< in: mtr holding a latch on the page, and if
- needed, also to the next page */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
+
+/** Read FIL_PAGE_NEXT.
+@param page buffer pool page
+@return previous page number */
+inline uint32_t btr_page_get_next(const page_t* page)
+{
+ return mach_read_from_4(page + FIL_PAGE_NEXT);
+}
+
+/** Read FIL_PAGE_PREV.
+@param page buffer pool page
+@return previous page number */
+inline uint32_t btr_page_get_prev(const page_t* page)
+{
+ return mach_read_from_4(page + FIL_PAGE_PREV);
+}
+
/**************************************************************//**
Releases the latch on a leaf page and bufferunfixes it. */
UNIV_INLINE
@@ -388,60 +300,91 @@ NOTE: the offsets array must contain all offsets for the record since
we read the last field according to offsets and assume that it contains
the child page number. In other words offsets must have been retrieved
with rec_get_offsets(n_fields=ULINT_UNDEFINED).
-@return child node address */
+@return child node address */
UNIV_INLINE
ulint
btr_node_ptr_get_child_page_no(
/*===========================*/
const rec_t* rec, /*!< in: node pointer record */
- const ulint* offsets)/*!< in: array returned by rec_get_offsets() */
- MY_ATTRIBUTE((nonnull, pure, warn_unused_result));
-/************************************************************//**
-Creates the root node for a new index tree.
-@return page number of the created root, FIL_NULL if did not succeed */
-UNIV_INTERN
+ const offset_t* offsets)/*!< in: array returned by rec_get_offsets() */
+ MY_ATTRIBUTE((warn_unused_result));
+
+/** Create the root node for a new index tree.
+@param[in] type type of the index
+@param[in] space space where created
+@param[in] page_size page size
+@param[in] index_id index id
+@param[in] index index, or NULL when applying TRUNCATE
+log record during recovery
+@param[in] btr_redo_create_info used for applying TRUNCATE log
+@param[in] mtr mini-transaction handle
+record during recovery
+@return page number of the created root, FIL_NULL if did not succeed */
ulint
btr_create(
-/*=======*/
- ulint type, /*!< in: type of the index */
- ulint space, /*!< in: space where created */
- ulint zip_size,/*!< in: compressed page size in bytes
- or 0 for uncompressed pages */
- index_id_t index_id,/*!< in: index id */
- dict_index_t* index, /*!< in: index */
- mtr_t* mtr) /*!< in: mini-transaction handle */
- MY_ATTRIBUTE((nonnull));
-/************************************************************//**
-Frees a B-tree except the root page, which MUST be freed after this
-by calling btr_free_root. */
-UNIV_INTERN
+ ulint type,
+ ulint space,
+ const page_size_t& page_size,
+ index_id_t index_id,
+ dict_index_t* index,
+ const btr_create_t* btr_redo_create_info,
+ mtr_t* mtr);
+
+/** Free a persistent index tree if it exists.
+@param[in] page_id root page id
+@param[in] page_size page size
+@param[in] index_id PAGE_INDEX_ID contents
+@param[in,out] mtr mini-transaction */
void
-btr_free_but_not_root(
-/*==================*/
- ulint space, /*!< in: space where created */
- ulint zip_size, /*!< in: compressed page size in bytes
- or 0 for uncompressed pages */
- ulint root_page_no); /*!< in: root page number */
-/************************************************************//**
-Frees the B-tree root page. Other tree MUST already have been freed. */
-UNIV_INTERN
+btr_free_if_exists(
+ const page_id_t page_id,
+ const page_size_t& page_size,
+ index_id_t index_id,
+ mtr_t* mtr);
+
+/** Free an index tree in a temporary tablespace or during TRUNCATE TABLE.
+@param[in] page_id root page id
+@param[in] page_size page size */
+void
+btr_free(
+ const page_id_t page_id,
+ const page_size_t& page_size);
+
+/** Read the last used AUTO_INCREMENT value from PAGE_ROOT_AUTO_INC.
+@param[in,out] index clustered index
+@return the last used AUTO_INCREMENT value
+@retval 0 on error or if no AUTO_INCREMENT value was used yet */
+ib_uint64_t
+btr_read_autoinc(dict_index_t* index)
+ MY_ATTRIBUTE((nonnull, warn_unused_result));
+
+/** Read the last used AUTO_INCREMENT value from PAGE_ROOT_AUTO_INC,
+or fall back to MAX(auto_increment_column).
+@param[in] table table containing an AUTO_INCREMENT column
+@param[in] col_no index of the AUTO_INCREMENT column
+@return the AUTO_INCREMENT value
+@retval 0 on error or if no AUTO_INCREMENT value was used yet */
+ib_uint64_t
+btr_read_autoinc_with_fallback(const dict_table_t* table, unsigned col_no)
+ MY_ATTRIBUTE((nonnull, warn_unused_result));
+
+/** Write the next available AUTO_INCREMENT value to PAGE_ROOT_AUTO_INC.
+@param[in,out] index clustered index
+@param[in] autoinc the AUTO_INCREMENT value
+@param[in] reset whether to reset the AUTO_INCREMENT
+ to a possibly smaller value than currently
+ exists in the page */
void
-btr_free_root(
-/*==========*/
- ulint space, /*!< in: space where created */
- ulint zip_size, /*!< in: compressed page size in bytes
- or 0 for uncompressed pages */
- ulint root_page_no, /*!< in: root page number */
- mtr_t* mtr) /*!< in/out: mini-transaction */
+btr_write_autoinc(dict_index_t* index, ib_uint64_t autoinc, bool reset = false)
MY_ATTRIBUTE((nonnull));
+
/*************************************************************//**
Makes tree one level higher by splitting the root, and inserts
the tuple. It is assumed that mtr contains an x-latch on the tree.
NOTE that the operation of this function must always succeed,
we cannot reverse it: therefore enough free disk space must be
guaranteed to be available before this function is called.
-@return inserted record */
-UNIV_INTERN
+@return inserted record */
rec_t*
btr_root_raise_and_insert(
/*======================*/
@@ -450,13 +393,13 @@ btr_root_raise_and_insert(
on the root page; when the function returns,
the cursor is positioned on the predecessor
of the inserted record */
- ulint** offsets,/*!< out: offsets on inserted record */
+ offset_t** offsets,/*!< out: offsets on inserted record */
mem_heap_t** heap, /*!< in/out: pointer to memory heap
that can be emptied, or NULL */
const dtuple_t* tuple, /*!< in: tuple to insert */
ulint n_ext, /*!< in: number of externally stored columns */
mtr_t* mtr) /*!< in: mtr */
- __attribute__((nonnull(2,3,4,7), warn_unused_result));
+ MY_ATTRIBUTE((warn_unused_result));
/*************************************************************//**
Reorganizes an index page.
@@ -468,7 +411,6 @@ IBUF_BITMAP_FREE is unaffected by reorganization.
@retval true if the operation was successful
@retval false if it is a compressed page, and recompression failed */
-UNIV_INTERN
bool
btr_page_reorganize_low(
/*====================*/
@@ -482,7 +424,7 @@ btr_page_reorganize_low(
page_cur_t* cursor, /*!< in/out: page cursor */
dict_index_t* index, /*!< in: the index tree of the page */
mtr_t* mtr) /*!< in/out: mini-transaction */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
+ MY_ATTRIBUTE((warn_unused_result));
/*************************************************************//**
Reorganizes an index page.
@@ -494,7 +436,6 @@ IBUF_BITMAP_FREE is unaffected by reorganization.
@retval true if the operation was successful
@retval false if it is a compressed page, and recompression failed */
-UNIV_INTERN
bool
btr_page_reorganize(
/*================*/
@@ -502,32 +443,23 @@ btr_page_reorganize(
dict_index_t* index, /*!< in: the index tree of the page */
mtr_t* mtr) /*!< in/out: mini-transaction */
MY_ATTRIBUTE((nonnull));
-/*************************************************************//**
-Decides if the page should be split at the convergence point of
-inserts converging to left.
-@return TRUE if split recommended */
-UNIV_INTERN
-ibool
-btr_page_get_split_rec_to_left(
-/*===========================*/
- btr_cur_t* cursor, /*!< in: cursor at which to insert */
- rec_t** split_rec)/*!< out: if split recommended,
- the first record on upper half page,
- or NULL if tuple should be first */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-/*************************************************************//**
-Decides if the page should be split at the convergence point of
-inserts converging to right.
-@return TRUE if split recommended */
-UNIV_INTERN
-ibool
-btr_page_get_split_rec_to_right(
-/*============================*/
- btr_cur_t* cursor, /*!< in: cursor at which to insert */
- rec_t** split_rec)/*!< out: if split recommended,
- the first record on upper half page,
- or NULL if tuple should be first */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
+/** Decide if the page should be split at the convergence point of inserts
+converging to the left.
+@param[in] cursor insert position
+@return the first record to be moved to the right half page
+@retval NULL if no split is recommended */
+rec_t* btr_page_get_split_rec_to_left(const btr_cur_t* cursor);
+/** Decide if the page should be split at the convergence point of inserts
+converging to the right.
+@param[in] cursor insert position
+@param[out] split_rec if split recommended, the first record
+ on the right half page, or
+ NULL if the to-be-inserted record
+ should be first
+@return whether split is recommended */
+bool
+btr_page_get_split_rec_to_right(const btr_cur_t* cursor, rec_t** split_rec);
+
/*************************************************************//**
Splits an index page to halves and inserts the tuple. It is assumed
that mtr holds an x-latch to the index tree. NOTE: the tree x-latch is
@@ -537,7 +469,6 @@ free disk space (2 pages) must be guaranteed to be available before
this function is called.
@return inserted record */
-UNIV_INTERN
rec_t*
btr_page_split_and_insert(
/*======================*/
@@ -545,17 +476,16 @@ btr_page_split_and_insert(
btr_cur_t* cursor, /*!< in: cursor at which to insert; when the
function returns, the cursor is positioned
on the predecessor of the inserted record */
- ulint** offsets,/*!< out: offsets on inserted record */
+ offset_t** offsets,/*!< out: offsets on inserted record */
mem_heap_t** heap, /*!< in/out: pointer to memory heap
that can be emptied, or NULL */
const dtuple_t* tuple, /*!< in: tuple to insert */
ulint n_ext, /*!< in: number of externally stored columns */
mtr_t* mtr) /*!< in: mtr */
- __attribute__((nonnull(2,3,4,7), warn_unused_result));
+ MY_ATTRIBUTE((warn_unused_result));
/*******************************************************//**
Inserts a data tuple to a tree on a non-leaf level. It is assumed
that mtr holds an x-latch on the tree. */
-UNIV_INTERN
void
btr_insert_on_non_leaf_level_func(
/*==============================*/
@@ -564,44 +494,33 @@ btr_insert_on_non_leaf_level_func(
ulint level, /*!< in: level, must be > 0 */
dtuple_t* tuple, /*!< in: the record to be inserted */
const char* file, /*!< in: file name */
- ulint line, /*!< in: line where called */
- mtr_t* mtr) /*!< in: mtr */
- MY_ATTRIBUTE((nonnull));
-# define btr_insert_on_non_leaf_level(f,i,l,t,m) \
+ unsigned line, /*!< in: line where called */
+ mtr_t* mtr); /*!< in: mtr */
+#define btr_insert_on_non_leaf_level(f,i,l,t,m) \
btr_insert_on_non_leaf_level_func(f,i,l,t,__FILE__,__LINE__,m)
-#endif /* !UNIV_HOTBACKUP */
-/****************************************************************//**
-Sets a record as the predefined minimum record. */
-UNIV_INTERN
-void
-btr_set_min_rec_mark(
-/*=================*/
- rec_t* rec, /*!< in/out: record */
- mtr_t* mtr) /*!< in: mtr */
- MY_ATTRIBUTE((nonnull));
-#ifndef UNIV_HOTBACKUP
-/*************************************************************//**
-Deletes on the upper level the node pointer to a page. */
-UNIV_INTERN
-void
-btr_node_ptr_delete(
-/*================*/
- dict_index_t* index, /*!< in: index tree */
- buf_block_t* block, /*!< in: page whose node pointer is deleted */
- mtr_t* mtr) /*!< in: mtr */
+
+/** Sets a record as the predefined minimum record. */
+void btr_set_min_rec_mark(rec_t* rec, mtr_t* mtr) MY_ATTRIBUTE((nonnull));
+
+/** Seek to the parent page of a B-tree page.
+@param[in,out] index b-tree
+@param[in] block child page
+@param[in,out] mtr mini-transaction
+@param[out] cursor cursor pointing to the x-latched parent page */
+void btr_page_get_father(dict_index_t* index, buf_block_t* block, mtr_t* mtr,
+ btr_cur_t* cursor)
MY_ATTRIBUTE((nonnull));
#ifdef UNIV_DEBUG
/************************************************************//**
Checks that the node pointer to a page is appropriate.
-@return TRUE */
-UNIV_INTERN
+@return TRUE */
ibool
btr_check_node_ptr(
/*===============*/
dict_index_t* index, /*!< in: index tree */
buf_block_t* block, /*!< in: index page */
mtr_t* mtr) /*!< in: mtr */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
+ MY_ATTRIBUTE((warn_unused_result));
#endif /* UNIV_DEBUG */
/*************************************************************//**
Tries to merge the page first to the left immediate brother if such a
@@ -612,8 +531,7 @@ level lifts the records of the page to the father page, thus reducing the
tree height. It is assumed that mtr holds an x-latch on the tree and on the
page. If cursor is on the leaf level, mtr must also hold x-latches to
the brothers, if they exist.
-@return TRUE on success */
-UNIV_INTERN
+@return TRUE on success */
ibool
btr_compress(
/*=========*/
@@ -629,20 +547,16 @@ btr_compress(
Discards a page from a B-tree. This is used to remove the last record from
a B-tree page: the whole page must be removed at the same time. This cannot
be used for the root page, which is allowed to be empty. */
-UNIV_INTERN
void
btr_discard_page(
/*=============*/
btr_cur_t* cursor, /*!< in: cursor on the page to discard: not on
the root page */
- mtr_t* mtr) /*!< in: mtr */
- MY_ATTRIBUTE((nonnull));
-#endif /* !UNIV_HOTBACKUP */
+ mtr_t* mtr); /*!< in: mtr */
/****************************************************************//**
Parses the redo log record for setting an index record as the predefined
minimum record.
-@return end of log record or NULL */
-UNIV_INTERN
+@return end of log record or NULL */
byte*
btr_parse_set_min_rec_mark(
/*=======================*/
@@ -654,8 +568,7 @@ btr_parse_set_min_rec_mark(
MY_ATTRIBUTE((nonnull(1,2), warn_unused_result));
/***********************************************************//**
Parses a redo log record of reorganizing a page.
-@return end of log record or NULL */
-UNIV_INTERN
+@return end of log record or NULL */
byte*
btr_parse_page_reorganize(
/*======================*/
@@ -665,12 +578,10 @@ btr_parse_page_reorganize(
bool compressed,/*!< in: true if compressed page */
buf_block_t* block, /*!< in: page to be reorganized, or NULL */
mtr_t* mtr) /*!< in: mtr or NULL */
- MY_ATTRIBUTE((nonnull(1,2,3), warn_unused_result));
-#ifndef UNIV_HOTBACKUP
+ MY_ATTRIBUTE((warn_unused_result));
/**************************************************************//**
Gets the number of pages in a B-tree.
-@return number of pages, or ULINT_UNDEFINED if the index is unavailable */
-UNIV_INTERN
+@return number of pages, or ULINT_UNDEFINED if the index is unavailable */
ulint
btr_get_size(
/*=========*/
@@ -678,7 +589,7 @@ btr_get_size(
ulint flag, /*!< in: BTR_N_LEAF_PAGES or BTR_TOTAL_SIZE */
mtr_t* mtr) /*!< in/out: mini-transaction where index
is s-latched */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
+ MY_ATTRIBUTE((warn_unused_result));
/**************************************************************//**
Gets the number of reserved and used pages in a B-tree.
@return number of pages reserved, or ULINT_UNDEFINED if the index
@@ -701,7 +612,6 @@ that the caller has made the reservation for free extents!
@retval block, rw_lock_x_lock_count(&block->lock) == 1 if allocation succeeded
(init_mtr == mtr, or the page was not previously freed in mtr)
@retval block (not allocated or initialized) otherwise */
-UNIV_INTERN
buf_block_t*
btr_page_alloc(
/*===========*/
@@ -716,32 +626,39 @@ btr_page_alloc(
mtr_t* init_mtr) /*!< in/out: mini-transaction
for x-latching and initializing
the page */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
+ MY_ATTRIBUTE((warn_unused_result));
/**************************************************************//**
-Frees a file page used in an index tree. NOTE: cannot free field external
-storage pages because the page must contain info on its level. */
-UNIV_INTERN
+Creates a new index page (not the root, and also not
+used in page reorganization). @see btr_page_empty(). */
void
-btr_page_free(
-/*==========*/
- dict_index_t* index, /*!< in: index tree */
- buf_block_t* block, /*!< in: block to be freed, x-latched */
- mtr_t* mtr) /*!< in: mtr */
- MY_ATTRIBUTE((nonnull));
+btr_page_create(
+/*============*/
+ buf_block_t* block, /*!< in/out: page to be created */
+ page_zip_des_t* page_zip,/*!< in/out: compressed page, or NULL */
+ dict_index_t* index, /*!< in: index */
+ ulint level, /*!< in: the B-tree level of the page */
+ mtr_t* mtr); /*!< in: mtr */
+
+/** Free an index page.
+@param[in,out] index index tree
+@param[in,out] block block to be freed
+@param[in,out] mtr mini-transaction
+@param[in] blob whether this is freeing a BLOB page */
+MY_ATTRIBUTE((nonnull))
+void btr_page_free(dict_index_t* index, buf_block_t* block, mtr_t* mtr,
+ bool blob = false);
+
/**************************************************************//**
-Frees a file page used in an index tree. Can be used also to BLOB
-external storage pages, because the page level 0 can be given as an
-argument. */
-UNIV_INTERN
-void
-btr_page_free_low(
-/*==============*/
- dict_index_t* index, /*!< in: index tree */
- buf_block_t* block, /*!< in: block to be freed, x-latched */
- ulint level, /*!< in: page level */
- bool blob, /*!< in: blob page */
- mtr_t* mtr) /*!< in: mtr */
- __attribute__((nonnull));
+Gets the root node of a tree and x- or s-latches it.
+@return root page, x- or s-latched */
+buf_block_t*
+btr_root_block_get(
+/*===============*/
+ const dict_index_t* index, /*!< in: index tree */
+ ulint mode, /*!< in: either RW_S_LATCH
+ or RW_X_LATCH */
+ mtr_t* mtr); /*!< in: mtr */
+
/*************************************************************//**
Reorganizes an index page.
@@ -772,7 +689,6 @@ btr_page_reorganize_block(
#ifdef UNIV_BTR_PRINT
/*************************************************************//**
Prints size info of a B-tree. */
-UNIV_INTERN
void
btr_print_size(
/*===========*/
@@ -780,7 +696,6 @@ btr_print_size(
MY_ATTRIBUTE((nonnull));
/**************************************************************//**
Prints directories and other info of all nodes in the index. */
-UNIV_INTERN
void
btr_print_index(
/*============*/
@@ -792,8 +707,7 @@ btr_print_index(
/************************************************************//**
Checks the size and number of fields in a record based on the definition of
the index.
-@return TRUE if ok */
-UNIV_INTERN
+@return TRUE if ok */
ibool
btr_index_rec_validate(
/*===================*/
@@ -802,39 +716,17 @@ btr_index_rec_validate(
ibool dump_on_error) /*!< in: TRUE if the function
should print hex dump of record
and page on error */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
+ MY_ATTRIBUTE((warn_unused_result));
/**************************************************************//**
Checks the consistency of an index tree.
@return DB_SUCCESS if ok, error code if not */
-UNIV_INTERN
dberr_t
btr_validate_index(
/*===============*/
- dict_index_t* index, /*!< in: index */
- const trx_t* trx) /*!< in: transaction or 0 */
- MY_ATTRIBUTE((nonnull(1), warn_unused_result));
-
-#ifdef UNIV_SYNC_DEBUG
-/*************************************************************//**
-Removes a page from the level list of pages.
-@param space in: space where removed
-@param zip_size in: compressed page size in bytes, or 0 for uncompressed
-@param page in/out: page to remove
-@param index in: index tree
-@param mtr in/out: mini-transaction */
-# define btr_level_list_remove(space,zip_size,page,index,mtr) \
- btr_level_list_remove_func(space,zip_size,page,index,mtr)
-#else /* UNIV_SYNC_DEBUG */
-/*************************************************************//**
-Removes a page from the level list of pages.
-@param space in: space where removed
-@param zip_size in: compressed page size in bytes, or 0 for uncompressed
-@param page in/out: page to remove
-@param index in: index tree
-@param mtr in/out: mini-transaction */
-# define btr_level_list_remove(space,zip_size,page,index,mtr) \
- btr_level_list_remove_func(space,zip_size,page,index,mtr)
-#endif /* UNIV_SYNC_DEBUG */
+ dict_index_t* index, /*!< in: index */
+ const trx_t* trx, /*!< in: transaction or 0 */
+ bool lockout)/*!< in: true if X-latch index is intended */
+ MY_ATTRIBUTE((warn_unused_result));
/*************************************************************//**
Removes a page from the level list of pages. */
@@ -843,11 +735,19 @@ void
btr_level_list_remove_func(
/*=======================*/
ulint space, /*!< in: space where removed */
- ulint zip_size,/*!< in: compressed page size in bytes
- or 0 for uncompressed pages */
+ const page_size_t& page_size,/*!< in: page size */
page_t* page, /*!< in/out: page to remove */
dict_index_t* index, /*!< in: index tree */
mtr_t* mtr); /*!< in/out: mini-transaction */
+/*************************************************************//**
+Removes a page from the level list of pages.
+@param space in: space where removed
+@param zip_size in: compressed page size in bytes, or 0 for uncompressed
+@param page in/out: page to remove
+@param index in: index tree
+@param mtr in/out: mini-transaction */
+# define btr_level_list_remove(space,zip_size,page,index,mtr) \
+ btr_level_list_remove_func(space,zip_size,page,index,mtr)
/*************************************************************//**
If page is the only on its level, this function moves its records to the
@@ -867,11 +767,8 @@ btr_lift_page_up(
#define BTR_N_LEAF_PAGES 1
#define BTR_TOTAL_SIZE 2
-#endif /* !UNIV_HOTBACKUP */
-#ifndef UNIV_NONINL
#include "btr0btr.ic"
-#endif
/****************************************************************
Global variable controlling if scrubbing should be performed */
diff --git a/storage/innobase/include/btr0btr.ic b/storage/innobase/include/btr0btr.ic
index 4215a1092bc..e5d0e805bd8 100644
--- a/storage/innobase/include/btr0btr.ic
+++ b/storage/innobase/include/btr0btr.ic
@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 1994, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2015, 2016, MariaDB Corporation.
+Copyright (c) 2015, 2017, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -25,7 +25,6 @@ Created 6/2/1994 Heikki Tuuri
*******************************************************/
#include "mach0data.h"
-#ifndef UNIV_HOTBACKUP
#include "mtr0mtr.h"
#include "mtr0log.h"
#include "page0zip.h"
@@ -36,28 +35,31 @@ Created 6/2/1994 Heikki Tuuri
in btr_page_set_level and
btr_page_get_level_low */
-/**************************************************************//**
-Gets a buffer page and declares its latching order level. */
+/** Gets a buffer page and declares its latching order level.
+@param[in] page_id page id
+@param[in] mode latch mode
+@param[in] file file name
+@param[in] line line where called
+@param[in] index index tree, may be NULL if it is not an insert buffer
+tree
+@param[in,out] mtr mini-transaction
+@return block */
UNIV_INLINE
buf_block_t*
btr_block_get_func(
-/*===============*/
- ulint space, /*!< in: space id */
- ulint zip_size, /*!< in: compressed page size in bytes
- or 0 for uncompressed pages */
- ulint page_no, /*!< in: page number */
- ulint mode, /*!< in: latch mode */
- const char* file, /*!< in: file name */
- ulint line, /*!< in: line where called */
- dict_index_t* index, /*!< in: index tree, may be NULL
- if it is not an insert buffer tree */
- mtr_t* mtr) /*!< in/out: mtr */
+ const page_id_t page_id,
+ const page_size_t& page_size,
+ ulint mode,
+ const char* file,
+ unsigned line,
+ dict_index_t* index,
+ mtr_t* mtr)
{
buf_block_t* block;
- dberr_t err;
+ dberr_t err=DB_SUCCESS;
- block = buf_page_get_gen(space, zip_size, page_no, mode,
- NULL, BUF_GET, file, line, mtr, &err);
+ block = buf_page_get_gen(
+ page_id, page_size, mode, NULL, BUF_GET, file, line, mtr, &err);
if (err == DB_DECRYPTION_FAILED) {
if (index && index->table) {
@@ -99,42 +101,9 @@ btr_page_set_index_id(
}
}
-/** Gets a buffer page and declares its latching order level.
-@param space tablespace identifier
-@param zip_size compressed page size in bytes or 0 for uncompressed pages
-@param page_no page number
-@param mode latch mode
-@param idx index tree, may be NULL if not the insert buffer tree
-@param mtr mini-transaction handle
-@return the uncompressed page frame */
-UNIV_INLINE
-page_t*
-btr_page_get(
-/*=========*/
- ulint space,
- ulint zip_size,
- ulint root_page_no,
- ulint mode,
- dict_index_t* index,
- mtr_t* mtr)
-{
- buf_block_t* block=NULL;
- buf_frame_t* frame=NULL;
-
- block = btr_block_get(space, zip_size, root_page_no, mode, index, mtr);
-
- if (block) {
- frame = buf_block_get_frame(block);
- }
-
- return ((page_t*)frame);
-}
-
-#endif /* !UNIV_HOTBACKUP */
-
/**************************************************************//**
Gets the index id field of a page.
-@return index id */
+@return index id */
UNIV_INLINE
index_id_t
btr_page_get_index_id(
@@ -144,10 +113,9 @@ btr_page_get_index_id(
return(mach_read_from_8(page + PAGE_HEADER + PAGE_INDEX_ID));
}
-#ifndef UNIV_HOTBACKUP
/********************************************************//**
Gets the node level field in an index page.
-@return level, leaf level == 0 */
+@return level, leaf level == 0 */
UNIV_INLINE
ulint
btr_page_get_level_low(
@@ -177,7 +145,8 @@ btr_page_set_level(
ulint level, /*!< in: level, leaf level == 0 */
mtr_t* mtr) /*!< in: mini-transaction handle */
{
- ut_ad(page && mtr);
+ ut_ad(page != NULL);
+ ut_ad(mtr != NULL);
ut_ad(level <= BTR_MAX_NODE_LEVEL);
if (page_zip) {
@@ -192,26 +161,6 @@ btr_page_set_level(
}
/********************************************************//**
-Gets the next index page number.
-@return next page number */
-UNIV_INLINE
-ulint
-btr_page_get_next(
-/*==============*/
- const page_t* page, /*!< in: index page */
- mtr_t* mtr MY_ATTRIBUTE((unused)))
- /*!< in: mini-transaction handle */
-{
- ut_ad(page != NULL);
- ut_ad(mtr != NULL);
-#ifndef UNIV_INNOCHECKSUM
- ut_ad(mtr_memo_contains_page(mtr, page, MTR_MEMO_PAGE_X_FIX)
- || mtr_memo_contains_page(mtr, page, MTR_MEMO_PAGE_S_FIX));
-#endif /* UNIV_INNOCHECKSUM */
- return(mach_read_from_4(page + FIL_PAGE_NEXT));
-}
-
-/********************************************************//**
Sets the next index page field. */
UNIV_INLINE
void
@@ -235,22 +184,6 @@ btr_page_set_next(
}
/********************************************************//**
-Gets the previous index page number.
-@return prev page number */
-UNIV_INLINE
-ulint
-btr_page_get_prev(
-/*==============*/
- const page_t* page, /*!< in: index page */
- mtr_t* mtr MY_ATTRIBUTE((unused))) /*!< in: mini-transaction handle */
-{
- ut_ad(page != NULL);
- ut_ad(mtr != NULL);
-
- return(mach_read_from_4(page + FIL_PAGE_PREV));
-}
-
-/********************************************************//**
Sets the previous index page field. */
UNIV_INLINE
void
@@ -279,13 +212,13 @@ NOTE: the offsets array must contain all offsets for the record since
we read the last field according to offsets and assume that it contains
the child page number. In other words offsets must have been retrieved
with rec_get_offsets(n_fields=ULINT_UNDEFINED).
-@return child node address */
+@return child node address */
UNIV_INLINE
ulint
btr_node_ptr_get_child_page_no(
/*===========================*/
const rec_t* rec, /*!< in: node pointer record */
- const ulint* offsets)/*!< in: array returned by rec_get_offsets() */
+ const offset_t* offsets)/*!< in: array returned by rec_get_offsets() */
{
const byte* field;
ulint len;
@@ -300,15 +233,7 @@ btr_node_ptr_get_child_page_no(
ut_ad(len == 4);
page_no = mach_read_from_4(field);
-
- if (page_no == 0) {
- fprintf(stderr,
- "InnoDB: a nonsensical page number 0"
- " in a node ptr record at offset %lu\n",
- (ulong) page_offset(rec));
- buf_page_print(page_align(rec), 0);
- ut_ad(0);
- }
+ ut_ad(page_no > 1);
return(page_no);
}
@@ -324,12 +249,26 @@ btr_leaf_page_release(
BTR_MODIFY_LEAF */
mtr_t* mtr) /*!< in: mtr */
{
- ut_ad(latch_mode == BTR_SEARCH_LEAF || latch_mode == BTR_MODIFY_LEAF);
+ ut_ad(latch_mode == BTR_SEARCH_LEAF
+ || latch_mode == BTR_MODIFY_LEAF
+ || latch_mode == BTR_NO_LATCHES);
+
ut_ad(!mtr_memo_contains(mtr, block, MTR_MEMO_MODIFY));
- mtr_memo_release(mtr, block,
- latch_mode == BTR_SEARCH_LEAF
- ? MTR_MEMO_PAGE_S_FIX
- : MTR_MEMO_PAGE_X_FIX);
+ ulint mode;
+ switch (latch_mode) {
+ case BTR_SEARCH_LEAF:
+ mode = MTR_MEMO_PAGE_S_FIX;
+ break;
+ case BTR_MODIFY_LEAF:
+ mode = MTR_MEMO_PAGE_X_FIX;
+ break;
+ case BTR_NO_LATCHES:
+ mode = MTR_MEMO_BUF_FIX;
+ break;
+ default:
+ ut_a(0);
+ }
+
+ mtr->memo_release(block, mode);
}
-#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innobase/include/btr0bulk.h b/storage/innobase/include/btr0bulk.h
new file mode 100644
index 00000000000..3dbc85cad6d
--- /dev/null
+++ b/storage/innobase/include/btr0bulk.h
@@ -0,0 +1,381 @@
+/*****************************************************************************
+
+Copyright (c) 2014, 2015, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2019, MariaDB Corporation.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/********************************************************************//**
+@file include/btr0bulk.h
+The B-tree bulk load
+
+Created 03/11/2014 Shaohua Wang
+*************************************************************************/
+
+#ifndef btr0bulk_h
+#define btr0bulk_h
+
+#include "dict0dict.h"
+#include "rem0types.h"
+#include "page0cur.h"
+
+#include <vector>
+
+/** Innodb B-tree index fill factor for bulk load. */
+extern long innobase_fill_factor;
+/** whether to reduce redo logging during ALTER TABLE */
+extern my_bool innodb_log_optimize_ddl;
+
+/*
+The proper function call sequence of PageBulk is as below:
+-- PageBulk::init
+-- PageBulk::insert
+-- PageBulk::finish
+-- PageBulk::compress(COMPRESSED table only)
+-- PageBulk::pageSplit(COMPRESSED table only)
+-- PageBulk::commit
+*/
+
+class PageBulk
+{
+public:
+ /** Constructor
+ @param[in] index B-tree index
+ @param[in] page_no page number
+ @param[in] level page level
+ @param[in] trx_id transaction id
+ @param[in] observer flush observer */
+ PageBulk(
+ dict_index_t* index,
+ trx_id_t trx_id,
+ ulint page_no,
+ ulint level,
+ FlushObserver* observer)
+ :
+ m_heap(NULL),
+ m_index(index),
+ m_mtr(),
+ m_trx_id(trx_id),
+ m_block(NULL),
+ m_page(NULL),
+ m_page_zip(NULL),
+ m_cur_rec(NULL),
+ m_page_no(page_no),
+ m_level(level),
+ m_is_comp(dict_table_is_comp(index->table)),
+ m_heap_top(NULL),
+ m_rec_no(0),
+ m_free_space(0),
+ m_reserved_space(0),
+#ifdef UNIV_DEBUG
+ m_total_data(0),
+#endif /* UNIV_DEBUG */
+ m_modify_clock(0),
+ m_flush_observer(observer),
+ m_err(DB_SUCCESS)
+ {
+ ut_ad(!dict_index_is_spatial(m_index));
+ ut_ad(!dict_table_is_temporary(m_index->table));
+ }
+
+ /** Deconstructor */
+ ~PageBulk()
+ {
+ mem_heap_free(m_heap);
+ }
+
+ /** Initialize members and allocate page if needed and start mtr.
+ Note: must be called and only once right after constructor.
+ @return error code */
+ dberr_t init();
+
+ /** Insert a record in the page.
+ @param[in] rec record
+ @param[in] offsets record offsets */
+ void insert(const rec_t* rec, offset_t* offsets);
+
+ /** Mark end of insertion to the page. Scan all records to set page
+ dirs, and set page header members. */
+ void finish();
+
+ /** Commit mtr for a page
+ @param[in] success Flag whether all inserts succeed. */
+ void commit(bool success);
+
+ /** Compress if it is compressed table
+ @return true compress successfully or no need to compress
+ @return false compress failed. */
+ bool compress();
+
+ /** Check whether the record needs to be stored externally.
+ @return true
+ @return false */
+ bool needExt(const dtuple_t* tuple, ulint rec_size);
+
+ /** Store external record
+ @param[in] big_rec external recrod
+ @param[in] offsets record offsets
+ @return error code */
+ dberr_t storeExt(const big_rec_t* big_rec, offset_t* offsets);
+
+ /** Get node pointer
+ @return node pointer */
+ dtuple_t* getNodePtr();
+
+ /** Get split rec in the page. We split a page in half when compresssion
+ fails, and the split rec should be copied to the new page.
+ @return split rec */
+ rec_t* getSplitRec();
+
+ /** Copy all records after split rec including itself.
+ @param[in] rec split rec */
+ void copyIn(rec_t* split_rec);
+
+ /** Remove all records after split rec including itself.
+ @param[in] rec split rec */
+ void copyOut(rec_t* split_rec);
+
+ /** Set next page
+ @param[in] next_page_no next page no */
+ inline void setNext(ulint next_page_no);
+
+ /** Set previous page
+ @param[in] prev_page_no previous page no */
+ inline void setPrev(ulint prev_page_no);
+
+ /** Release block by commiting mtr */
+ inline void release();
+
+ /** Start mtr and latch block */
+ inline dberr_t latch();
+
+ /** Check if required space is available in the page for the rec
+ to be inserted. We check fill factor & padding here.
+ @param[in] length required length
+ @return true if space is available */
+ inline bool isSpaceAvailable(ulint rec_size);
+
+ /** Get page no */
+ ulint getPageNo()
+ {
+ return(m_page_no);
+ }
+
+ /** Get page level */
+ ulint getLevel()
+ {
+ return(m_level);
+ }
+
+ /** Get record no */
+ ulint getRecNo()
+ {
+ return(m_rec_no);
+ }
+
+ /** Get page */
+ page_t* getPage()
+ {
+ return(m_page);
+ }
+
+ /** Get page zip */
+ page_zip_des_t* getPageZip()
+ {
+ return(m_page_zip);
+ }
+
+ dberr_t getError()
+ {
+ return(m_err);
+ }
+
+ /* Memory heap for internal allocation */
+ mem_heap_t* m_heap;
+
+private:
+ /** The index B-tree */
+ dict_index_t* m_index;
+
+ /** The mini-transaction */
+ mtr_t m_mtr;
+
+ /** The transaction id */
+ trx_id_t m_trx_id;
+
+ /** The buffer block */
+ buf_block_t* m_block;
+
+ /** The page */
+ page_t* m_page;
+
+ /** The page zip descriptor */
+ page_zip_des_t* m_page_zip;
+
+ /** The current rec, just before the next insert rec */
+ rec_t* m_cur_rec;
+
+ /** The page no */
+ ulint m_page_no;
+
+ /** The page level in B-tree */
+ ulint m_level;
+
+ /** Flag: is page in compact format */
+ const bool m_is_comp;
+
+ /** The heap top in page for next insert */
+ byte* m_heap_top;
+
+ /** User record no */
+ ulint m_rec_no;
+
+ /** The free space left in the page */
+ ulint m_free_space;
+
+ /** The reserved space for fill factor */
+ ulint m_reserved_space;
+
+ /** The padding space for compressed page */
+ ulint m_padding_space;
+
+#ifdef UNIV_DEBUG
+ /** Total data in the page */
+ ulint m_total_data;
+#endif /* UNIV_DEBUG */
+
+ /** The modify clock value of the buffer block
+ when the block is re-pinned */
+ ib_uint64_t m_modify_clock;
+
+ /** Flush observer, or NULL if redo logging is enabled */
+ FlushObserver* m_flush_observer;
+
+ /** Operation result DB_SUCCESS or error code */
+ dberr_t m_err;
+};
+
+typedef std::vector<PageBulk*, ut_allocator<PageBulk*> >
+ page_bulk_vector;
+
+class BtrBulk
+{
+public:
+ /** Constructor
+ @param[in] index B-tree index
+ @param[in] trx transaction
+ @param[in] observer flush observer */
+ BtrBulk(
+ dict_index_t* index,
+ const trx_t* trx,
+ FlushObserver* observer)
+ :
+ m_index(index),
+ m_trx(trx),
+ m_flush_observer(observer)
+ {
+ ut_ad(!dict_index_is_spatial(index));
+#ifdef UNIV_DEBUG
+ if (m_flush_observer)
+ fil_space_inc_redo_skipped_count(m_index->space);
+#endif /* UNIV_DEBUG */
+ }
+
+ /** Destructor */
+ ~BtrBulk()
+ {
+#ifdef UNIV_DEBUG
+ if (m_flush_observer)
+ fil_space_dec_redo_skipped_count(m_index->space);
+#endif /* UNIV_DEBUG */
+ }
+
+ /** Insert a tuple
+ @param[in] tuple tuple to insert.
+ @return error code */
+ dberr_t insert(dtuple_t* tuple)
+ {
+ return(insert(tuple, 0));
+ }
+
+ /** Btree bulk load finish. We commit the last page in each level
+ and copy the last page in top level to the root page of the index
+ if no error occurs.
+ @param[in] err whether bulk load was successful until now
+ @return error code */
+ dberr_t finish(dberr_t err);
+
+ /** Release all latches */
+ void release();
+
+ /** Re-latch all latches */
+ void latch();
+
+private:
+ /** Insert a tuple to a page in a level
+ @param[in] tuple tuple to insert
+ @param[in] level B-tree level
+ @return error code */
+ dberr_t insert(dtuple_t* tuple, ulint level);
+
+ /** Split a page
+ @param[in] page_bulk page to split
+ @param[in] next_page_bulk next page
+ @return error code */
+ dberr_t pageSplit(PageBulk* page_bulk,
+ PageBulk* next_page_bulk);
+
+ /** Commit(finish) a page. We set next/prev page no, compress a page of
+ compressed table and split the page if compression fails, insert a node
+ pointer to father page if needed, and commit mini-transaction.
+ @param[in] page_bulk page to commit
+ @param[in] next_page_bulk next page
+ @param[in] insert_father flag whether need to insert node ptr
+ @return error code */
+ dberr_t pageCommit(PageBulk* page_bulk,
+ PageBulk* next_page_bulk,
+ bool insert_father);
+
+ /** Abort a page when an error occurs
+ @param[in] page_bulk page bulk object
+ Note: we should call pageAbort for a PageBulk object, which is not in
+ m_page_bulks after pageCommit, and we will commit or abort PageBulk
+ objects in function "finish". */
+ void pageAbort(PageBulk* page_bulk)
+ {
+ page_bulk->commit(false);
+ }
+
+ /** Log free check */
+ inline void logFreeCheck();
+
+private:
+ /** B-tree index */
+ dict_index_t*const m_index;
+
+ /** Transaction */
+ const trx_t*const m_trx;
+
+ /** Root page level */
+ ulint m_root_level;
+
+ /** Flush observer, or NULL if redo logging is enabled */
+ FlushObserver*const m_flush_observer;
+
+ /** Page cursor vector for all level */
+ page_bulk_vector m_page_bulks;
+};
+
+#endif
diff --git a/storage/innobase/include/btr0cur.h b/storage/innobase/include/btr0cur.h
index 214ae435627..63338579064 100644
--- a/storage/innobase/include/btr0cur.h
+++ b/storage/innobase/include/btr0cur.h
@@ -1,7 +1,7 @@
/*****************************************************************************
-Copyright (c) 1994, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2017, MariaDB Corporation.
+Copyright (c) 1994, 2019, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2017, 2020, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -27,10 +27,11 @@ Created 10/16/1994 Heikki Tuuri
#ifndef btr0cur_h
#define btr0cur_h
-#include "univ.i"
#include "dict0dict.h"
#include "page0cur.h"
#include "btr0types.h"
+#include "rem0types.h"
+#include "gis0type.h"
/** Mode flags for btr_cur operations; these can be ORed */
enum {
@@ -53,18 +54,21 @@ enum {
BTR_KEEP_IBUF_BITMAP = 32
};
-#ifndef UNIV_HOTBACKUP
+/* btr_cur_latch_leaves() returns latched blocks and savepoints. */
+struct btr_latch_leaves_t {
+ /* left block, target block and right block */
+ buf_block_t* blocks[3];
+ ulint savepoints[3];
+};
+
#include "que0types.h"
#include "row0types.h"
#include "ha0ha.h"
-#define BTR_CUR_ADAPT
-#define BTR_CUR_HASH_ADAPT
-
#ifdef UNIV_DEBUG
/*********************************************************//**
Returns the page cursor component of a tree cursor.
-@return pointer to page cursor component */
+@return pointer to page cursor component */
UNIV_INLINE
page_cur_t*
btr_cur_get_page_cur(
@@ -72,7 +76,7 @@ btr_cur_get_page_cur(
const btr_cur_t* cursor);/*!< in: tree cursor */
/*********************************************************//**
Returns the buffer block on which the tree cursor is positioned.
-@return pointer to buffer block */
+@return pointer to buffer block */
UNIV_INLINE
buf_block_t*
btr_cur_get_block(
@@ -80,7 +84,7 @@ btr_cur_get_block(
const btr_cur_t* cursor);/*!< in: tree cursor */
/*********************************************************//**
Returns the record pointer of a tree cursor.
-@return pointer to record */
+@return pointer to record */
UNIV_INLINE
rec_t*
btr_cur_get_rec(
@@ -93,22 +97,15 @@ btr_cur_get_rec(
#endif /* UNIV_DEBUG */
/*********************************************************//**
Returns the compressed page on which the tree cursor is positioned.
-@return pointer to compressed page, or NULL if the page is not compressed */
+@return pointer to compressed page, or NULL if the page is not compressed */
UNIV_INLINE
page_zip_des_t*
btr_cur_get_page_zip(
/*=================*/
btr_cur_t* cursor);/*!< in: tree cursor */
/*********************************************************//**
-Invalidates a tree cursor by setting record pointer to NULL. */
-UNIV_INLINE
-void
-btr_cur_invalidate(
-/*===============*/
- btr_cur_t* cursor);/*!< in: tree cursor */
-/*********************************************************//**
Returns the page of a tree cursor.
-@return pointer to page */
+@return pointer to page */
UNIV_INLINE
page_t*
btr_cur_get_page(
@@ -116,8 +113,8 @@ btr_cur_get_page(
btr_cur_t* cursor);/*!< in: tree cursor */
/*********************************************************//**
Returns the index of a cursor.
-@param cursor b-tree cursor
-@return index */
+@param cursor b-tree cursor
+@return index */
#define btr_cur_get_index(cursor) ((cursor)->index)
/*********************************************************//**
Positions a tree cursor at a given record. */
@@ -129,6 +126,26 @@ btr_cur_position(
rec_t* rec, /*!< in: record in tree */
buf_block_t* block, /*!< in: buffer block of rec */
btr_cur_t* cursor);/*!< in: cursor */
+
+/** Optimistically latches the leaf page or pages requested.
+@param[in] block guessed buffer block
+@param[in] modify_clock modify clock value
+@param[in,out] latch_mode BTR_SEARCH_LEAF, ...
+@param[in,out] cursor cursor
+@param[in] file file name
+@param[in] line line where called
+@param[in] mtr mini-transaction
+@return true if success */
+bool
+btr_cur_optimistic_latch_leaves(
+ buf_block_t* block,
+ ib_uint64_t modify_clock,
+ ulint* latch_mode,
+ btr_cur_t* cursor,
+ const char* file,
+ unsigned line,
+ mtr_t* mtr);
+
/********************************************************************//**
Searches an index tree and positions a tree cursor on a given level.
NOTE: n_fields_cmp in tuple must be set so that it cannot be compared
@@ -136,7 +153,6 @@ to node pointer page number fields on the upper levels of the tree!
Note that if mode is PAGE_CUR_LE, which is used in inserts, then
cursor->up_match and cursor->low_match both will have sensible values.
If mode is PAGE_CUR_GE, then up_match will a have a sensible value. */
-UNIV_INTERN
dberr_t
btr_cur_search_to_nth_level(
/*========================*/
@@ -145,7 +161,7 @@ btr_cur_search_to_nth_level(
const dtuple_t* tuple, /*!< in: data tuple; NOTE: n_fields_cmp in
tuple must be set so that it cannot get
compared to the node ptr page number field! */
- ulint mode, /*!< in: PAGE_CUR_L, ...;
+ page_cur_mode_t mode, /*!< in: PAGE_CUR_L, ...;
NOTE that if the search is made using a unique
prefix of a record, mode should be PAGE_CUR_LE,
not PAGE_CUR_GE, as the latter may end up on
@@ -165,15 +181,20 @@ btr_cur_search_to_nth_level(
to protect the record! */
btr_cur_t* cursor, /*!< in/out: tree cursor; the cursor page is
s- or x-latched, but see also above! */
- ulint has_search_latch,/*!< in: latch mode the caller
- currently has on btr_search_latch:
+ ulint has_search_latch,
+ /*!< in: latch mode the caller
+ currently has on search system:
RW_S_LATCH, or 0 */
const char* file, /*!< in: file name */
- ulint line, /*!< in: line where called */
- mtr_t* mtr); /*!< in: mtr */
+ unsigned line, /*!< in: line where called */
+ mtr_t* mtr, /*!< in/out: mini-transaction */
+ ib_uint64_t autoinc = 0);
+ /*!< in: PAGE_ROOT_AUTO_INC to be written
+ (0 if none) */
+
/*****************************************************************//**
-Opens a cursor at either end of an index. */
-UNIV_INTERN
+Opens a cursor at either end of an index.
+@return DB_SUCCESS or error code */
dberr_t
btr_cur_open_at_index_side_func(
/*============================*/
@@ -185,22 +206,25 @@ btr_cur_open_at_index_side_func(
ulint level, /*!< in: level to search for
(0=leaf) */
const char* file, /*!< in: file name */
- ulint line, /*!< in: line where called */
+ unsigned line, /*!< in: line where called */
mtr_t* mtr) /*!< in/out: mini-transaction */
MY_ATTRIBUTE((nonnull));
+
#define btr_cur_open_at_index_side(f,i,l,c,lv,m) \
btr_cur_open_at_index_side_func(f,i,l,c,lv,__FILE__,__LINE__,m)
+
/**********************************************************************//**
-Positions a cursor at a randomly chosen position within a B-tree. */
-UNIV_INTERN
-void
+Positions a cursor at a randomly chosen position within a B-tree.
+@return true if the index is available and we have put the cursor, false
+if the index is unavailable */
+bool
btr_cur_open_at_rnd_pos_func(
/*=========================*/
dict_index_t* index, /*!< in: index */
ulint latch_mode, /*!< in: BTR_SEARCH_LEAF, ... */
btr_cur_t* cursor, /*!< in/out: B-tree cursor */
const char* file, /*!< in: file name */
- ulint line, /*!< in: line where called */
+ unsigned line, /*!< in: line where called */
mtr_t* mtr); /*!< in: mtr */
#define btr_cur_open_at_rnd_pos(i,l,c,m) \
btr_cur_open_at_rnd_pos_func(i,l,c,__FILE__,__LINE__,m)
@@ -210,8 +234,7 @@ It is assumed that mtr holds an x-latch on the page. The operation does
not succeed if there is too little space on the page. If there is just
one record on the page, the insert will always succeed; this is to
prevent trying to split a page with just one record.
-@return DB_SUCCESS, DB_WAIT_LOCK, DB_FAIL, or error number */
-UNIV_INTERN
+@return DB_SUCCESS, DB_WAIT_LOCK, DB_FAIL, or error number */
dberr_t
btr_cur_optimistic_insert(
/*======================*/
@@ -220,7 +243,7 @@ btr_cur_optimistic_insert(
specified */
btr_cur_t* cursor, /*!< in: cursor on page after which to insert;
cursor stays valid */
- ulint** offsets,/*!< out: offsets on *rec */
+ offset_t** offsets,/*!< out: offsets on *rec */
mem_heap_t** heap, /*!< in/out: pointer to memory heap */
dtuple_t* entry, /*!< in/out: entry to insert */
rec_t** rec, /*!< out: pointer to inserted record if
@@ -244,8 +267,7 @@ Performs an insert on a page of an index tree. It is assumed that mtr
holds an x-latch on the tree and on the cursor page. If the insert is
made on the leaf level, to avoid deadlocks, mtr must also own x-latches
to brothers of page, if those brothers exist.
-@return DB_SUCCESS or error number */
-UNIV_INTERN
+@return DB_SUCCESS or error number */
dberr_t
btr_cur_pessimistic_insert(
/*=======================*/
@@ -257,7 +279,7 @@ btr_cur_pessimistic_insert(
insertion will certainly succeed */
btr_cur_t* cursor, /*!< in: cursor after which to insert;
cursor stays valid */
- ulint** offsets,/*!< out: offsets on *rec */
+ offset_t** offsets,/*!< out: offsets on *rec */
mem_heap_t** heap, /*!< in/out: pointer to memory heap
that can be emptied */
dtuple_t* entry, /*!< in/out: entry to insert */
@@ -278,13 +300,12 @@ an update-in-place.
@retval false if out of space; IBUF_BITMAP_FREE will be reset
outside mtr if the page was recompressed
-@retval true if enough place;
+@retval true if enough place;
IMPORTANT: The caller will have to update IBUF_BITMAP_FREE if this is
a secondary index leaf page. This has to be done either within the
same mini-transaction, or by invoking ibuf_reset_free_bits() before
mtr_commit(mtr). */
-UNIV_INTERN
bool
btr_cur_update_alloc_zip_func(
/*==========================*/
@@ -292,7 +313,7 @@ btr_cur_update_alloc_zip_func(
page_cur_t* cursor, /*!< in/out: B-tree page cursor */
dict_index_t* index, /*!< in: the index corresponding to cursor */
#ifdef UNIV_DEBUG
- ulint* offsets,/*!< in/out: offsets of the cursor record */
+ offset_t* offsets,/*!< in/out: offsets of the cursor record */
#endif /* UNIV_DEBUG */
ulint length, /*!< in: size needed */
bool create, /*!< in: true=delete-and-insert,
@@ -312,7 +333,6 @@ Updates a record when the update causes no size changes in its fields.
@retval DB_SUCCESS on success
@retval DB_ZIP_OVERFLOW if there is not enough space left
on the compressed page (IBUF_BITMAP_FREE was reset outside mtr) */
-UNIV_INTERN
dberr_t
btr_cur_update_in_place(
/*====================*/
@@ -320,7 +340,7 @@ btr_cur_update_in_place(
btr_cur_t* cursor, /*!< in: cursor on the record to update;
cursor stays valid and positioned on the
same record */
- ulint* offsets,/*!< in/out: offsets on cursor->page_cur.rec */
+ offset_t* offsets,/*!< in/out: offsets on cursor->page_cur.rec */
const upd_t* update, /*!< in: update vector */
ulint cmpl_info,/*!< in: compiler info on secondary index
updates */
@@ -333,7 +353,6 @@ btr_cur_update_in_place(
MY_ATTRIBUTE((warn_unused_result, nonnull));
/***********************************************************//**
Writes a redo log record of updating a record in-place. */
-UNIV_INTERN
void
btr_cur_update_in_place_log(
/*========================*/
@@ -356,7 +375,6 @@ so that tree compression is recommended.
@retval DB_UNDERFLOW if the page would become too empty
@retval DB_ZIP_OVERFLOW if there is not enough space left
on the compressed page */
-UNIV_INTERN
dberr_t
btr_cur_optimistic_update(
/*======================*/
@@ -364,7 +382,7 @@ btr_cur_optimistic_update(
btr_cur_t* cursor, /*!< in: cursor on the record to update;
cursor stays valid and positioned on the
same record */
- ulint** offsets,/*!< out: offsets on cursor->page_cur.rec */
+ offset_t** offsets,/*!< out: offsets on cursor->page_cur.rec */
mem_heap_t** heap, /*!< in/out: pointer to NULL or memory heap */
const upd_t* update, /*!< in: update vector; this must also
contain trx id and roll ptr fields */
@@ -382,8 +400,7 @@ Performs an update of a record on a page of a tree. It is assumed
that mtr holds an x-latch on the tree and on the cursor page. If the
update is made on the leaf level, to avoid deadlocks, mtr must also
own x-latches to brothers of page, if those brothers exist.
-@return DB_SUCCESS or error code */
-UNIV_INTERN
+@return DB_SUCCESS or error code */
dberr_t
btr_cur_pessimistic_update(
/*=======================*/
@@ -392,7 +409,7 @@ btr_cur_pessimistic_update(
btr_cur_t* cursor, /*!< in/out: cursor on the record to update;
cursor may become invalid if *big_rec == NULL
|| !(flags & BTR_KEEP_POS_FLAG) */
- ulint** offsets,/*!< out: offsets on cursor->page_cur.rec */
+ offset_t** offsets,/*!< out: offsets on cursor->page_cur.rec */
mem_heap_t** offsets_heap,
/*!< in/out: pointer to memory heap
that can be emptied */
@@ -401,9 +418,10 @@ btr_cur_pessimistic_update(
big_rec and the index tuple */
big_rec_t** big_rec,/*!< out: big rec vector whose fields have to
be stored externally by the caller */
- const upd_t* update, /*!< in: update vector; this is allowed also
- contain trx id and roll ptr fields, but
- the values in update vector have no effect */
+ upd_t* update, /*!< in/out: update vector; this is allowed to
+ also contain trx id and roll ptr fields.
+ Non-updated columns that are moved offpage will
+ be appended to this. */
ulint cmpl_info,/*!< in: compiler info on secondary index
updates */
que_thr_t* thr, /*!< in: query thread */
@@ -416,22 +434,21 @@ Marks a clustered index record deleted. Writes an undo log record to
undo log on this delete marking. Writes in the trx id field the id
of the deleting transaction, and in the roll ptr field pointer to the
undo log record created.
-@return DB_SUCCESS, DB_LOCK_WAIT, or error number */
-UNIV_INTERN
+@return DB_SUCCESS, DB_LOCK_WAIT, or error number */
dberr_t
btr_cur_del_mark_set_clust_rec(
/*===========================*/
buf_block_t* block, /*!< in/out: buffer block of the record */
rec_t* rec, /*!< in/out: record */
dict_index_t* index, /*!< in: clustered index of the record */
- const ulint* offsets,/*!< in: rec_get_offsets(rec) */
+ const offset_t* offsets,/*!< in: rec_get_offsets(rec) */
que_thr_t* thr, /*!< in: query thread */
+ const dtuple_t* entry, /*!< in: dtuple for the deleting record */
mtr_t* mtr) /*!< in/out: mini-transaction */
MY_ATTRIBUTE((nonnull, warn_unused_result));
/***********************************************************//**
Sets a secondary index record delete mark to TRUE or FALSE.
-@return DB_SUCCESS, DB_LOCK_WAIT, or error number */
-UNIV_INTERN
+@return DB_SUCCESS, DB_LOCK_WAIT, or error number */
dberr_t
btr_cur_del_mark_set_sec_rec(
/*=========================*/
@@ -447,8 +464,7 @@ that mtr holds an x-latch on the tree and on the cursor page. To avoid
deadlocks, mtr must also own x-latches to brothers of page, if those
brothers exist. NOTE: it is assumed that the caller has reserved enough
free extents so that the compression will always succeed if done!
-@return TRUE if compression occurred */
-UNIV_INTERN
+@return TRUE if compression occurred */
ibool
btr_cur_compress_if_useful(
/*=======================*/
@@ -463,8 +479,7 @@ btr_cur_compress_if_useful(
Removes the record on which the tree cursor is positioned. It is assumed
that the mtr has an x-latch on the page where the cursor is positioned,
but no latch on the whole tree.
-@return TRUE if success, i.e., the page did not become too empty */
-UNIV_INTERN
+@return TRUE if success, i.e., the page did not become too empty */
ibool
btr_cur_optimistic_delete_func(
/*===========================*/
@@ -494,8 +509,7 @@ or if it is the only page on the level. It is assumed that mtr holds
an x-latch on the tree and on the cursor page. To avoid deadlocks,
mtr must also own x-latches to brothers of page, if those brothers
exist.
-@return TRUE if compression occurred */
-UNIV_INTERN
+@return TRUE if compression occurred */
ibool
btr_cur_pessimistic_delete(
/*=======================*/
@@ -513,14 +527,17 @@ btr_cur_pessimistic_delete(
stays valid: it points to successor of
deleted record on function exit */
ulint flags, /*!< in: BTR_CREATE_FLAG or 0 */
- enum trx_rb_ctx rb_ctx, /*!< in: rollback context */
+ bool rollback,/*!< in: performing rollback? */
mtr_t* mtr) /*!< in: mtr */
MY_ATTRIBUTE((nonnull));
-#endif /* !UNIV_HOTBACKUP */
+/** Delete the node pointer in a parent page.
+@param[in,out] parent cursor pointing to parent record
+@param[in,out] mtr mini-transaction */
+void btr_cur_node_ptr_delete(btr_cur_t* parent, mtr_t* mtr)
+ MY_ATTRIBUTE((nonnull));
/***********************************************************//**
Parses a redo log record of updating a record in-place.
-@return end of log record or NULL */
-UNIV_INTERN
+@return end of log record or NULL */
byte*
btr_cur_parse_update_in_place(
/*==========================*/
@@ -532,8 +549,7 @@ btr_cur_parse_update_in_place(
/****************************************************************//**
Parses the redo log record for delete marking or unmarking of a clustered
index record.
-@return end of log record or NULL */
-UNIV_INTERN
+@return end of log record or NULL */
byte*
btr_cur_parse_del_mark_set_clust_rec(
/*=================================*/
@@ -545,8 +561,7 @@ btr_cur_parse_del_mark_set_clust_rec(
/****************************************************************//**
Parses the redo log record for delete marking or unmarking of a secondary
index record.
-@return end of log record or NULL */
-UNIV_INTERN
+@return end of log record or NULL */
byte*
btr_cur_parse_del_mark_set_sec_rec(
/*===============================*/
@@ -554,19 +569,22 @@ btr_cur_parse_del_mark_set_sec_rec(
byte* end_ptr,/*!< in: buffer end */
page_t* page, /*!< in/out: page or NULL */
page_zip_des_t* page_zip);/*!< in/out: compressed page, or NULL */
-#ifndef UNIV_HOTBACKUP
-/*******************************************************************//**
-Estimates the number of rows in a given index range.
-@return estimated number of rows */
-UNIV_INTERN
-ib_int64_t
+
+/** Estimates the number of rows in a given index range.
+@param[in] index index
+@param[in] tuple1 range start, may also be empty tuple
+@param[in] mode1 search mode for range start
+@param[in] tuple2 range end, may also be empty tuple
+@param[in] mode2 search mode for range end
+@return estimated number of rows */
+int64_t
btr_estimate_n_rows_in_range(
-/*=========================*/
- dict_index_t* index, /*!< in: index */
- const dtuple_t* tuple1, /*!< in: range start, may also be empty tuple */
- ulint mode1, /*!< in: search mode for range start */
- const dtuple_t* tuple2, /*!< in: range end, may also be empty tuple */
- ulint mode2); /*!< in: search mode for range end */
+ dict_index_t* index,
+ const dtuple_t* tuple1,
+ page_cur_mode_t mode1,
+ const dtuple_t* tuple2,
+ page_cur_mode_t mode2);
+
/*******************************************************************//**
Estimates the number of different key values in a given index, for
each n-column prefix of the index where 1 <= n <= dict_index_get_n_unique(index).
@@ -575,9 +593,10 @@ The estimates are stored in the array index->stat_n_diff_key_vals[] (indexed
index->stat_n_sample_sizes[].
If innodb_stats_method is nulls_ignored, we also record the number of
non-null values for each prefix and stored the estimates in
-array index->stat_n_non_null_key_vals. */
-UNIV_INTERN
-void
+array index->stat_n_non_null_key_vals.
+@return true if the index is available and we get the estimated numbers,
+false if the index is unavailable. */
+bool
btr_estimate_number_of_different_key_vals(
/*======================================*/
dict_index_t* index); /*!< in: index */
@@ -586,18 +605,16 @@ btr_estimate_number_of_different_key_vals(
@param[in] rec record
@param[in] offsets array returned by rec_get_offsets()
@return externally stored part, in units of a database page */
-
ulint
btr_rec_get_externally_stored_len(
const rec_t* rec,
- const ulint* offsets);
+ const offset_t* offsets);
/*******************************************************************//**
Marks non-updated off-page fields as disowned by this record. The ownership
must be transferred to the updated record which is inserted elsewhere in the
index tree. In purge only the owner of externally stored field is allowed
to free the field. */
-UNIV_INTERN
void
btr_cur_disown_inherited_fields(
/*============================*/
@@ -605,7 +622,7 @@ btr_cur_disown_inherited_fields(
part will be updated, or NULL */
rec_t* rec, /*!< in/out: record in a clustered index */
dict_index_t* index, /*!< in: index of the page */
- const ulint* offsets,/*!< in: array returned by rec_get_offsets() */
+ const offset_t* offsets,/*!< in: array returned by rec_get_offsets() */
const upd_t* update, /*!< in: update vector */
mtr_t* mtr) /*!< in/out: mini-transaction */
MY_ATTRIBUTE((nonnull(2,3,4,5,6)));
@@ -617,7 +634,9 @@ enum blob_op {
/** Store off-page columns for an insert by update */
BTR_STORE_INSERT_UPDATE,
/** Store off-page columns for an update */
- BTR_STORE_UPDATE
+ BTR_STORE_UPDATE,
+ /** Store off-page columns for a freshly inserted record by bulk */
+ BTR_STORE_INSERT_BULK
};
/*******************************************************************//**
@@ -635,32 +654,30 @@ Stores the fields in big_rec_vec to the tablespace and puts pointers to
them in rec. The extern flags in rec will have to be set beforehand.
The fields are stored on pages allocated from leaf node
file segment of the index tree.
-@return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
-UNIV_INTERN
+@return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
dberr_t
btr_store_big_rec_extern_fields(
/*============================*/
- dict_index_t* index, /*!< in: index of rec; the index tree
- MUST be X-latched */
- buf_block_t* rec_block, /*!< in/out: block containing rec */
- rec_t* rec, /*!< in/out: record */
- const ulint* offsets, /*!< in: rec_get_offsets(rec, index);
- the "external storage" flags in offsets
- will not correspond to rec when
- this function returns */
+ btr_pcur_t* pcur, /*!< in/out: a persistent cursor. if
+ btr_mtr is restarted, then this can
+ be repositioned. */
+ offset_t* offsets, /*!< in/out: rec_get_offsets() on
+ pcur. the "external storage" flags
+ in offsets will correctly correspond
+ to rec when this function returns */
const big_rec_t*big_rec_vec, /*!< in: vector containing fields
to be stored externally */
- mtr_t* btr_mtr, /*!< in: mtr containing the
- latches to the clustered index */
+ mtr_t* btr_mtr, /*!< in/out: mtr containing the
+ latches to the clustered index. can be
+ committed and restarted. */
enum blob_op op) /*! in: operation code */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
+ MY_ATTRIBUTE((warn_unused_result));
/*******************************************************************//**
Frees the space in an externally stored field to the file space
management if the field in data is owned the externally stored field,
in a rollback we may have the additional condition that the field must
not be inherited. */
-UNIV_INTERN
void
btr_free_externally_stored_field(
/*=============================*/
@@ -675,114 +692,140 @@ btr_free_externally_stored_field(
byte* field_ref, /*!< in/out: field reference */
const rec_t* rec, /*!< in: record containing field_ref, for
page_zip_write_blob_ptr(), or NULL */
- const ulint* offsets, /*!< in: rec_get_offsets(rec, index),
+ const offset_t* offsets, /*!< in: rec_get_offsets(rec, index),
or NULL */
page_zip_des_t* page_zip, /*!< in: compressed page corresponding
to rec, or NULL if rec == NULL */
ulint i, /*!< in: field number of field_ref;
ignored if rec == NULL */
- enum trx_rb_ctx rb_ctx, /*!< in: rollback context */
- mtr_t* local_mtr); /*!< in: mtr containing the latch to
- data an an X-latch to the index
- tree */
-/*******************************************************************//**
-Copies the prefix of an externally stored field of a record. The
-clustered index record must be protected by a lock or a page latch.
+ bool rollback, /*!< in: performing rollback? */
+ mtr_t* local_mtr); /*!< in: mtr containing the latch */
+/** Copies the prefix of an externally stored field of a record.
+The clustered index record must be protected by a lock or a page latch.
+@param[out] buf the field, or a prefix of it
+@param[in] len length of buf, in bytes
+@param[in] page_size BLOB page size
+@param[in] data 'internally' stored part of the field
+containing also the reference to the external part; must be protected by
+a lock or a page latch
+@param[in] local_len length of data, in bytes
@return the length of the copied field, or 0 if the column was being
or has been deleted */
-UNIV_INTERN
ulint
btr_copy_externally_stored_field_prefix(
-/*====================================*/
- byte* buf, /*!< out: the field, or a prefix of it */
- ulint len, /*!< in: length of buf, in bytes */
- ulint zip_size,/*!< in: nonzero=compressed BLOB page size,
- zero for uncompressed BLOBs */
- const byte* data, /*!< in: 'internally' stored part of the
- field containing also the reference to
- the external part; must be protected by
- a lock or a page latch */
- ulint local_len);/*!< in: length of data, in bytes */
-/*******************************************************************//**
-Copies an externally stored field of a record to mem heap. The
-clustered index record must be protected by a lock or a page latch.
+ byte* buf,
+ ulint len,
+ const page_size_t& page_size,
+ const byte* data,
+ ulint local_len);
+
+/** Copies an externally stored field of a record to mem heap.
+The clustered index record must be protected by a lock or a page latch.
+@param[out] len length of the whole field
+@param[in] data 'internally' stored part of the field
+containing also the reference to the external part; must be protected by
+a lock or a page latch
+@param[in] page_size BLOB page size
+@param[in] local_len length of data
+@param[in,out] heap mem heap
@return the whole field copied to heap */
-UNIV_INTERN
byte*
btr_copy_externally_stored_field(
-/*=============================*/
- ulint* len, /*!< out: length of the whole field */
- const byte* data, /*!< in: 'internally' stored part of the
- field containing also the reference to
- the external part; must be protected by
- a lock or a page latch */
- ulint zip_size,/*!< in: nonzero=compressed BLOB page size,
- zero for uncompressed BLOBs */
- ulint local_len,/*!< in: length of data */
- mem_heap_t* heap); /*!< in: mem heap */
-/*******************************************************************//**
-Copies an externally stored field of a record to mem heap.
-@return the field copied to heap, or NULL if the field is incomplete */
-UNIV_INTERN
+ ulint* len,
+ const byte* data,
+ const page_size_t& page_size,
+ ulint local_len,
+ mem_heap_t* heap);
+
+/** Copies an externally stored field of a record to mem heap.
+@param[in] rec record in a clustered index; must be
+protected by a lock or a page latch
+@param[in] offset array returned by rec_get_offsets()
+@param[in] page_size BLOB page size
+@param[in] no field number
+@param[out] len length of the field
+@param[in,out] heap mem heap
+@return the field copied to heap, or NULL if the field is incomplete */
byte*
btr_rec_copy_externally_stored_field(
-/*=================================*/
- const rec_t* rec, /*!< in: record in a clustered index;
- must be protected by a lock or a page latch */
- const ulint* offsets,/*!< in: array returned by rec_get_offsets() */
- ulint zip_size,/*!< in: nonzero=compressed BLOB page size,
- zero for uncompressed BLOBs */
- ulint no, /*!< in: field number */
- ulint* len, /*!< out: length of the field */
- mem_heap_t* heap); /*!< in: mem heap */
-/*******************************************************************//**
-Flags the data tuple fields that are marked as extern storage in the
-update vector. We use this function to remember which fields we must
-mark as extern storage in a record inserted for an update.
-@return number of flagged external columns */
-UNIV_INTERN
-ulint
-btr_push_update_extern_fields(
-/*==========================*/
- dtuple_t* tuple, /*!< in/out: data tuple */
- const upd_t* update, /*!< in: update vector */
- mem_heap_t* heap) /*!< in: memory heap */
- MY_ATTRIBUTE((nonnull));
+ const rec_t* rec,
+ const offset_t* offsets,
+ const page_size_t& page_size,
+ ulint no,
+ ulint* len,
+ mem_heap_t* heap);
+
/***********************************************************//**
Sets a secondary index record's delete mark to the given value. This
function is only used by the insert buffer merge mechanism. */
-UNIV_INTERN
void
btr_cur_set_deleted_flag_for_ibuf(
/*==============================*/
rec_t* rec, /*!< in/out: record */
page_zip_des_t* page_zip, /*!< in/out: compressed page
corresponding to rec, or NULL
- when the tablespace is
- uncompressed */
+ when the tablespace is uncompressed */
ibool val, /*!< in: value to set */
mtr_t* mtr); /*!< in/out: mini-transaction */
+
+/******************************************************//**
+The following function is used to set the deleted bit of a record. */
+UNIV_INLINE
+void
+btr_rec_set_deleted_flag(
+/*=====================*/
+ rec_t* rec, /*!< in/out: physical record */
+ page_zip_des_t* page_zip,/*!< in/out: compressed page (or NULL) */
+ ulint flag); /*!< in: nonzero if delete marked */
+
+/** Latches the leaf page or pages requested.
+@param[in] block leaf page where the search converged
+@param[in] page_id page id of the leaf
+@param[in] latch_mode BTR_SEARCH_LEAF, ...
+@param[in] cursor cursor
+@param[in] mtr mini-transaction
+@return blocks and savepoints which actually latched. */
+btr_latch_leaves_t
+btr_cur_latch_leaves(
+ buf_block_t* block,
+ const page_id_t page_id,
+ const page_size_t& page_size,
+ ulint latch_mode,
+ btr_cur_t* cursor,
+ mtr_t* mtr);
+
/*######################################################################*/
/** In the pessimistic delete, if the page data size drops below this
limit, merging it to a neighbor is tried */
-#define BTR_CUR_PAGE_COMPRESS_LIMIT (UNIV_PAGE_SIZE / 2)
+#define BTR_CUR_PAGE_COMPRESS_LIMIT(index) \
+ ((UNIV_PAGE_SIZE * (ulint)((index)->merge_threshold)) / 100)
/** A slot in the path array. We store here info on a search path down the
tree. Each slot contains data on a single level of the tree. */
-
-struct btr_path_t{
- ulint nth_rec; /*!< index of the record
- where the page cursor stopped on
- this level (index in alphabetical
- order); value ULINT_UNDEFINED
- denotes array end */
- ulint n_recs; /*!< number of records on the page */
- ulint page_no; /*!< no of the page containing the record */
- ulint page_level; /*!< level of the page, if later we fetch
- the page under page_no and it is no different
- level then we know that the tree has been
- reorganized */
+struct btr_path_t {
+ /* Assume a page like:
+ records: (inf, a, b, c, d, sup)
+ index of the record: 0, 1, 2, 3, 4, 5
+ */
+
+ /** Index of the record where the page cursor stopped on this level
+ (index in alphabetical order). Value ULINT_UNDEFINED denotes array
+ end. In the above example, if the search stopped on record 'c', then
+ nth_rec will be 3. */
+ ulint nth_rec;
+
+ /** Number of the records on the page, not counting inf and sup.
+ In the above example n_recs will be 4. */
+ ulint n_recs;
+
+ /** Number of the page containing the record. */
+ ulint page_no;
+
+ /** Level of the page. If later we fetch the page under page_no
+ and it is no different level then we know that the tree has been
+ reorganized. */
+ ulint page_level;
};
#define BTR_PATH_ARRAY_N_SLOTS 250 /*!< size of path array (in slots) */
@@ -859,7 +902,7 @@ struct btr_cur_t {
other search modes; see also the NOTE
in up_match! */
ulint low_bytes; /*!< number of matched bytes to the
- right at the time cursor positioned;
+ left at the time cursor positioned;
only used internally in searches: not
defined after the search */
ulint n_fields; /*!< prefix length used in a hash
@@ -873,8 +916,41 @@ struct btr_cur_t {
rows in range, we store in this array
information of the path through
the tree */
+ rtr_info_t* rtr_info; /*!< rtree search info */
+ btr_cur_t():thr(NULL), rtr_info(NULL) {}
+ /* default values */
+ /** Zero-initialize all fields */
+ void init()
+ {
+ index = NULL;
+ memset(&page_cur, 0, sizeof page_cur);
+ purge_node = NULL;
+ left_block = NULL;
+ thr = NULL;
+ flag = btr_cur_method(0);
+ tree_height = 0;
+ up_match = 0;
+ up_bytes = 0;
+ low_match = 0;
+ low_bytes = 0;
+ n_fields = 0;
+ n_bytes = 0;
+ fold = 0;
+ path_arr = NULL;
+ rtr_info = NULL;
+ }
};
+/******************************************************//**
+The following function is used to set the deleted bit of a record. */
+UNIV_INLINE
+void
+btr_rec_set_deleted_flag(
+/*=====================*/
+ rec_t* rec, /*!< in/out: physical record */
+ page_zip_des_t* page_zip,/*!< in/out: compressed page (or NULL) */
+ ulint flag); /*!< in: nonzero if delete marked */
+
/** If pessimistic delete fails because of lack of file space, there
is still a good change of success a little later. Try this many
times. */
@@ -907,36 +983,35 @@ stored part. */
significant bit of the byte at smallest address) is set to 1 if this
field does not 'own' the externally stored field; only the owner field
is allowed to free the field in purge! */
-#define BTR_EXTERN_OWNER_FLAG 128
+#define BTR_EXTERN_OWNER_FLAG 128U
/** If the second most significant bit of BTR_EXTERN_LEN (i.e., the
second most significant bit of the byte at smallest address) is 1 then
it means that the externally stored field was inherited from an
earlier version of the row. In rollback we are not allowed to free an
inherited external field. */
-#define BTR_EXTERN_INHERITED_FLAG 64
+#define BTR_EXTERN_INHERITED_FLAG 64U
/** Number of searches down the B-tree in btr_cur_search_to_nth_level(). */
extern ulint btr_cur_n_non_sea;
-/** Number of successful adaptive hash index lookups in
-btr_cur_search_to_nth_level(). */
-extern ulint btr_cur_n_sea;
/** Old value of btr_cur_n_non_sea. Copied by
srv_refresh_innodb_monitor_stats(). Referenced by
srv_printf_innodb_monitor(). */
extern ulint btr_cur_n_non_sea_old;
+#ifdef BTR_CUR_HASH_ADAPT
+/** Number of successful adaptive hash index lookups in
+btr_cur_search_to_nth_level(). */
+extern ulint btr_cur_n_sea;
/** Old value of btr_cur_n_sea. Copied by
srv_refresh_innodb_monitor_stats(). Referenced by
srv_printf_innodb_monitor(). */
extern ulint btr_cur_n_sea_old;
-#endif /* !UNIV_HOTBACKUP */
+#endif /* BTR_CUR_HASH_ADAPT */
#ifdef UNIV_DEBUG
/* Flag to limit optimistic insert records */
extern uint btr_cur_limit_optimistic_insert_debug;
#endif /* UNIV_DEBUG */
-#ifndef UNIV_NONINL
#include "btr0cur.ic"
-#endif
#endif
diff --git a/storage/innobase/include/btr0cur.ic b/storage/innobase/include/btr0cur.ic
index c27956f29cf..7cf6c5982fa 100644
--- a/storage/innobase/include/btr0cur.ic
+++ b/storage/innobase/include/btr0cur.ic
@@ -1,6 +1,7 @@
/*****************************************************************************
-Copyright (c) 1994, 2014, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1994, 2015, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2018, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -23,7 +24,6 @@ The index tree cursor
Created 10/16/1994 Heikki Tuuri
*******************************************************/
-#ifndef UNIV_HOTBACKUP
#include "btr0btr.h"
#ifdef UNIV_DEBUG
@@ -39,7 +39,7 @@ if (btr_cur_limit_optimistic_insert_debug > 1\
#ifdef UNIV_DEBUG
/*********************************************************//**
Returns the page cursor component of a tree cursor.
-@return pointer to page cursor component */
+@return pointer to page cursor component */
UNIV_INLINE
page_cur_t*
btr_cur_get_page_cur(
@@ -51,7 +51,7 @@ btr_cur_get_page_cur(
/*********************************************************//**
Returns the buffer block on which the tree cursor is positioned.
-@return pointer to buffer block */
+@return pointer to buffer block */
UNIV_INLINE
buf_block_t*
btr_cur_get_block(
@@ -63,7 +63,7 @@ btr_cur_get_block(
/*********************************************************//**
Returns the record pointer of a tree cursor.
-@return pointer to record */
+@return pointer to record */
UNIV_INLINE
rec_t*
btr_cur_get_rec(
@@ -76,7 +76,7 @@ btr_cur_get_rec(
/*********************************************************//**
Returns the compressed page on which the tree cursor is positioned.
-@return pointer to compressed page, or NULL if the page is not compressed */
+@return pointer to compressed page, or NULL if the page is not compressed */
UNIV_INLINE
page_zip_des_t*
btr_cur_get_page_zip(
@@ -87,19 +87,8 @@ btr_cur_get_page_zip(
}
/*********************************************************//**
-Invalidates a tree cursor by setting record pointer to NULL. */
-UNIV_INLINE
-void
-btr_cur_invalidate(
-/*===============*/
- btr_cur_t* cursor) /*!< in: tree cursor */
-{
- page_cur_invalidate(&(cursor->page_cur));
-}
-
-/*********************************************************//**
Returns the page of a tree cursor.
-@return pointer to page */
+@return pointer to page */
UNIV_INLINE
page_t*
btr_cur_get_page(
@@ -130,7 +119,7 @@ btr_cur_position(
/*********************************************************************//**
Checks if compressing an index page where a btr cursor is placed makes
sense.
-@return TRUE if compression is recommended */
+@return TRUE if compression is recommended */
UNIV_INLINE
ibool
btr_cur_compress_recommendation(
@@ -140,17 +129,18 @@ btr_cur_compress_recommendation(
{
const page_t* page;
- ut_ad(mtr_memo_contains(mtr, btr_cur_get_block(cursor),
- MTR_MEMO_PAGE_X_FIX));
+ ut_ad(mtr_is_block_fix(
+ mtr, btr_cur_get_block(cursor),
+ MTR_MEMO_PAGE_X_FIX, cursor->index->table));
page = btr_cur_get_page(cursor);
- LIMIT_OPTIMISTIC_INSERT_DEBUG(page_get_n_recs(page) * 2,
+ LIMIT_OPTIMISTIC_INSERT_DEBUG(page_get_n_recs(page) * 2U,
return(FALSE));
- if ((page_get_data_size(page) < BTR_CUR_PAGE_COMPRESS_LIMIT)
- || ((btr_page_get_next(page, mtr) == FIL_NULL)
- && (btr_page_get_prev(page, mtr) == FIL_NULL))) {
+ if (page_get_data_size(page)
+ < BTR_CUR_PAGE_COMPRESS_LIMIT(cursor->index)
+ || !page_has_siblings(page)) {
/* The page fillfactor has dropped below a predefined
minimum value OR the level in the B-tree contains just
@@ -167,7 +157,7 @@ btr_cur_compress_recommendation(
/*********************************************************************//**
Checks if the record on which the cursor is placed can be deleted without
making tree compression necessary (or, recommended).
-@return TRUE if can be deleted without recommended compression */
+@return TRUE if can be deleted without recommended compression */
UNIV_INLINE
ibool
btr_cur_can_delete_without_compress(
@@ -183,10 +173,9 @@ btr_cur_can_delete_without_compress(
page = btr_cur_get_page(cursor);
- if ((page_get_data_size(page) - rec_size < BTR_CUR_PAGE_COMPRESS_LIMIT)
- || ((btr_page_get_next(page, mtr) == FIL_NULL)
- && (btr_page_get_prev(page, mtr) == FIL_NULL))
- || (page_get_n_recs(page) < 2)) {
+ if (page_get_data_size(page) - rec_size
+ < BTR_CUR_PAGE_COMPRESS_LIMIT(cursor->index)
+ || !page_has_siblings(page) || page_get_n_recs(page) < 2) {
/* The page fillfactor will drop below a predefined
minimum value, OR the level in the B-tree contains just
@@ -211,6 +200,7 @@ btr_blob_op_is_update(
{
switch (op) {
case BTR_STORE_INSERT:
+ case BTR_STORE_INSERT_BULK:
return(FALSE);
case BTR_STORE_INSERT_UPDATE:
case BTR_STORE_UPDATE:
@@ -220,4 +210,21 @@ btr_blob_op_is_update(
ut_ad(0);
return(FALSE);
}
-#endif /* !UNIV_HOTBACKUP */
+
+/******************************************************//**
+The following function is used to set the deleted bit of a record. */
+UNIV_INLINE
+void
+btr_rec_set_deleted_flag(
+/*=====================*/
+ rec_t* rec, /*!< in/out: physical record */
+ page_zip_des_t* page_zip,/*!< in/out: compressed page (or NULL) */
+ ulint flag) /*!< in: nonzero if delete marked */
+{
+ if (page_rec_is_comp(rec)) {
+ rec_set_deleted_flag_new(rec, page_zip, flag);
+ } else {
+ ut_ad(!page_zip);
+ rec_set_deleted_flag_old(rec, flag);
+ }
+}
diff --git a/storage/innobase/include/btr0defragment.h b/storage/innobase/include/btr0defragment.h
index 9f6aba490a8..57f8c2f3811 100644
--- a/storage/innobase/include/btr0defragment.h
+++ b/storage/innobase/include/btr0defragment.h
@@ -20,10 +20,6 @@ this program; if not, write to the Free Software Foundation, Inc.,
#ifndef btr0defragment_h
#define btr0defragment_h
-#include "univ.i"
-
-#ifndef UNIV_HOTBACKUP
-
#include "btr0pcur.h"
/* Max number of pages to consider at once during defragmentation. */
@@ -98,5 +94,4 @@ DECLARE_THREAD(btr_defragment_thread)(void*);
/** Whether btr_defragment_thread is active */
extern bool btr_defragment_thread_active;
-#endif /* !UNIV_HOTBACKUP */
#endif
diff --git a/storage/innobase/include/btr0pcur.h b/storage/innobase/include/btr0pcur.h
index bcb9fb6dba3..b79260e5ab6 100644
--- a/storage/innobase/include/btr0pcur.h
+++ b/storage/innobase/include/btr0pcur.h
@@ -1,6 +1,7 @@
/*****************************************************************************
Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2017, 2019, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -26,30 +27,27 @@ Created 2/23/1996 Heikki Tuuri
#ifndef btr0pcur_h
#define btr0pcur_h
-#include "univ.i"
#include "dict0dict.h"
-#include "data0data.h"
-#include "mtr0mtr.h"
-#include "page0cur.h"
#include "btr0cur.h"
#include "btr0btr.h"
-#include "btr0types.h"
+#include "gis0rtree.h"
/* Relative positions for a stored cursor position */
-#define BTR_PCUR_ON 1
-#define BTR_PCUR_BEFORE 2
-#define BTR_PCUR_AFTER 3
+enum btr_pcur_pos_t {
+ BTR_PCUR_ON = 1,
+ BTR_PCUR_BEFORE = 2,
+ BTR_PCUR_AFTER = 3,
/* Note that if the tree is not empty, btr_pcur_store_position does not
use the following, but only uses the above three alternatives, where the
position is stored relative to a specific record: this makes implementation
of a scroll cursor easier */
-#define BTR_PCUR_BEFORE_FIRST_IN_TREE 4 /* in an empty tree */
-#define BTR_PCUR_AFTER_LAST_IN_TREE 5 /* in an empty tree */
+ BTR_PCUR_BEFORE_FIRST_IN_TREE = 4, /* in an empty tree */
+ BTR_PCUR_AFTER_LAST_IN_TREE = 5 /* in an empty tree */
+};
/**************************************************************//**
Allocates memory for a persistent cursor object and initializes the cursor.
-@return own: persistent cursor */
-UNIV_INTERN
+@return own: persistent cursor */
btr_pcur_t*
btr_pcur_create_for_mysql(void);
/*============================*/
@@ -57,7 +55,6 @@ btr_pcur_create_for_mysql(void);
/**************************************************************//**
Resets a persistent cursor object, freeing ::old_rec_buf if it is
allocated and resetting the other members to their initial values. */
-UNIV_INTERN
void
btr_pcur_reset(
/*===========*/
@@ -65,14 +62,12 @@ btr_pcur_reset(
/**************************************************************//**
Frees the memory for a persistent cursor object. */
-UNIV_INTERN
void
btr_pcur_free_for_mysql(
/*====================*/
btr_pcur_t* cursor); /*!< in, own: persistent cursor */
/**************************************************************//**
Copies the stored position of a pcur to another pcur. */
-UNIV_INTERN
void
btr_pcur_copy_stored_position(
/*==========================*/
@@ -87,17 +82,25 @@ void
btr_pcur_init(
/*==========*/
btr_pcur_t* pcur); /*!< in: persistent cursor */
+
+/** Free old_rec_buf.
+@param[in] pcur Persistent cursor holding old_rec to be freed. */
+UNIV_INLINE
+void
+btr_pcur_free(
+ btr_pcur_t* pcur);
+
/**************************************************************//**
Initializes and opens a persistent cursor to an index tree. It should be
closed with btr_pcur_close. */
UNIV_INLINE
-void
+dberr_t
btr_pcur_open_low(
/*==============*/
dict_index_t* index, /*!< in: index */
ulint level, /*!< in: level in the btree */
const dtuple_t* tuple, /*!< in: tuple on which search done */
- ulint mode, /*!< in: PAGE_CUR_L, ...;
+ page_cur_mode_t mode, /*!< in: PAGE_CUR_L, ...;
NOTE that if the search is made using a unique
prefix of a record, mode should be
PAGE_CUR_LE, not PAGE_CUR_GE, as the latter
@@ -106,10 +109,12 @@ btr_pcur_open_low(
ulint latch_mode,/*!< in: BTR_SEARCH_LEAF, ... */
btr_pcur_t* cursor, /*!< in: memory buffer for persistent cursor */
const char* file, /*!< in: file name */
- ulint line, /*!< in: line where called */
+ unsigned line, /*!< in: line where called */
+ ib_uint64_t autoinc,/*!< in: PAGE_ROOT_AUTO_INC to be written
+ (0 if none) */
mtr_t* mtr); /*!< in: mtr */
#define btr_pcur_open(i,t,md,l,c,m) \
- btr_pcur_open_low(i,0,t,md,l,c,__FILE__,__LINE__,m)
+ btr_pcur_open_low(i,0,t,md,l,c,__FILE__,__LINE__,0,m)
/**************************************************************//**
Opens an persistent cursor to an index tree without initializing the
cursor. */
@@ -119,7 +124,7 @@ btr_pcur_open_with_no_init_func(
/*============================*/
dict_index_t* index, /*!< in: index */
const dtuple_t* tuple, /*!< in: tuple on which search done */
- ulint mode, /*!< in: PAGE_CUR_L, ...;
+ page_cur_mode_t mode, /*!< in: PAGE_CUR_L, ...;
NOTE that if the search is made using a unique
prefix of a record, mode should be
PAGE_CUR_LE, not PAGE_CUR_GE, as the latter
@@ -131,11 +136,12 @@ btr_pcur_open_with_no_init_func(
page, but assume that the caller uses his
btr search latch to protect the record! */
btr_pcur_t* cursor, /*!< in: memory buffer for persistent cursor */
- ulint has_search_latch,/*!< in: latch mode the caller
- currently has on btr_search_latch:
+ ulint has_search_latch,
+ /*!< in: latch mode the caller
+ currently has on search system:
RW_S_LATCH, or 0 */
const char* file, /*!< in: file name */
- ulint line, /*!< in: line where called */
+ unsigned line, /*!< in: line where called */
mtr_t* mtr); /*!< in: mtr */
#define btr_pcur_open_with_no_init(ix,t,md,l,cur,has,m) \
btr_pcur_open_with_no_init_func(ix,t,md,l,cur,has,__FILE__,__LINE__,m)
@@ -181,33 +187,34 @@ PAGE_CUR_LE, on the last user record. If no such user record exists, then
in the first case sets the cursor after last in tree, and in the latter case
before first in tree. The latching mode must be BTR_SEARCH_LEAF or
BTR_MODIFY_LEAF. */
-UNIV_INTERN
void
btr_pcur_open_on_user_rec_func(
/*===========================*/
dict_index_t* index, /*!< in: index */
const dtuple_t* tuple, /*!< in: tuple on which search done */
- ulint mode, /*!< in: PAGE_CUR_L, ... */
+ page_cur_mode_t mode, /*!< in: PAGE_CUR_L, ... */
ulint latch_mode, /*!< in: BTR_SEARCH_LEAF or
BTR_MODIFY_LEAF */
btr_pcur_t* cursor, /*!< in: memory buffer for persistent
cursor */
const char* file, /*!< in: file name */
- ulint line, /*!< in: line where called */
+ unsigned line, /*!< in: line where called */
mtr_t* mtr); /*!< in: mtr */
#define btr_pcur_open_on_user_rec(i,t,md,l,c,m) \
btr_pcur_open_on_user_rec_func(i,t,md,l,c,__FILE__,__LINE__,m)
/**********************************************************************//**
-Positions a cursor at a randomly chosen position within a B-tree. */
+Positions a cursor at a randomly chosen position within a B-tree.
+@return true if the index is available and we have put the cursor, false
+if the index is unavailable */
UNIV_INLINE
-void
+bool
btr_pcur_open_at_rnd_pos_func(
/*==========================*/
dict_index_t* index, /*!< in: index */
ulint latch_mode, /*!< in: BTR_SEARCH_LEAF, ... */
btr_pcur_t* cursor, /*!< in/out: B-tree pcur */
const char* file, /*!< in: file name */
- ulint line, /*!< in: line where called */
+ unsigned line, /*!< in: line where called */
mtr_t* mtr); /*!< in: mtr */
#define btr_pcur_open_at_rnd_pos(i,l,c,m) \
btr_pcur_open_at_rnd_pos_func(i,l,c,__FILE__,__LINE__,m)
@@ -235,7 +242,6 @@ cursor data structure, or just setting a flag if the cursor id before the
first in an EMPTY tree, or after the last in an EMPTY tree. NOTE that the
page where the cursor is positioned must not be empty if the index tree is
not totally empty! */
-UNIV_INTERN
void
btr_pcur_store_position(
/*====================*/
@@ -256,20 +262,19 @@ restores to before first or after the last in the tree.
@return TRUE if the cursor position was stored when it was on a user
record and it can be restored on a user record whose ordering fields
are identical to the ones of the original user record */
-UNIV_INTERN
ibool
btr_pcur_restore_position_func(
/*===========================*/
ulint latch_mode, /*!< in: BTR_SEARCH_LEAF, ... */
btr_pcur_t* cursor, /*!< in: detached persistent cursor */
const char* file, /*!< in: file name */
- ulint line, /*!< in: line where called */
+ unsigned line, /*!< in: line where called */
mtr_t* mtr); /*!< in: mtr */
#define btr_pcur_restore_position(l,cur,mtr) \
btr_pcur_restore_position_func(l,cur,__FILE__,__LINE__,mtr)
/*********************************************************//**
Gets the rel_pos field for a cursor whose position has been stored.
-@return BTR_PCUR_ON, ... */
+@return BTR_PCUR_ON, ... */
UNIV_INLINE
ulint
btr_pcur_get_rel_pos(
@@ -286,10 +291,25 @@ btr_pcur_commit_specify_mtr(
/*========================*/
btr_pcur_t* pcur, /*!< in: persistent cursor */
mtr_t* mtr); /*!< in: mtr to commit */
+
+/** Commits the mtr and sets the clustered index pcur and secondary index
+pcur latch mode to BTR_NO_LATCHES, that is, the cursor becomes detached.
+Function btr_pcur_store_position should be used for both cursor before
+calling this, if restoration of cursor is wanted later.
+@param[in] pcur persistent cursor
+@param[in] sec_pcur secondary index persistent cursor
+@param[in] mtr mtr to commit */
+UNIV_INLINE
+void
+btr_pcurs_commit_specify_mtr(
+ btr_pcur_t* pcur,
+ btr_pcur_t* sec_pcur,
+ mtr_t* mtr);
+
/*********************************************************//**
Moves the persistent cursor to the next record in the tree. If no records are
left, the cursor stays 'after last in tree'.
-@return TRUE if the cursor was not after last in tree */
+@return TRUE if the cursor was not after last in tree */
UNIV_INLINE
ibool
btr_pcur_move_to_next(
@@ -300,8 +320,7 @@ btr_pcur_move_to_next(
/*********************************************************//**
Moves the persistent cursor to the previous record in the tree. If no records
are left, the cursor stays 'before first in tree'.
-@return TRUE if the cursor was not before first in tree */
-UNIV_INTERN
+@return TRUE if the cursor was not before first in tree */
ibool
btr_pcur_move_to_prev(
/*==================*/
@@ -309,17 +328,9 @@ btr_pcur_move_to_prev(
function may release the page latch */
mtr_t* mtr); /*!< in: mtr */
/*********************************************************//**
-Moves the persistent cursor to the last record on the same page. */
-UNIV_INLINE
-void
-btr_pcur_move_to_last_on_page(
-/*==========================*/
- btr_pcur_t* cursor, /*!< in: persistent cursor */
- mtr_t* mtr); /*!< in: mtr */
-/*********************************************************//**
Moves the persistent cursor to the next user record in the tree. If no user
records are left, the cursor ends up 'after last in tree'.
-@return TRUE if the cursor moved forward, ending on a user record */
+@return TRUE if the cursor moved forward, ending on a user record */
UNIV_INLINE
ibool
btr_pcur_move_to_next_user_rec(
@@ -332,34 +343,16 @@ Moves the persistent cursor to the first record on the next page.
Releases the latch on the current page, and bufferunfixes it.
Note that there must not be modifications on the current page,
as then the x-latch can be released only in mtr_commit. */
-UNIV_INTERN
void
btr_pcur_move_to_next_page(
/*=======================*/
btr_pcur_t* cursor, /*!< in: persistent cursor; must be on the
last record of the current page */
mtr_t* mtr); /*!< in: mtr */
-/*********************************************************//**
-Moves the persistent cursor backward if it is on the first record
-of the page. Releases the latch on the current page, and bufferunfixes
-it. Note that to prevent a possible deadlock, the operation first
-stores the position of the cursor, releases the leaf latch, acquires
-necessary latches and restores the cursor position again before returning.
-The alphabetical position of the cursor is guaranteed to be sensible
-on return, but it may happen that the cursor is not positioned on the
-last record of any page, because the structure of the tree may have
-changed while the cursor had no latches. */
-UNIV_INTERN
-void
-btr_pcur_move_backward_from_page(
-/*=============================*/
- btr_pcur_t* cursor, /*!< in: persistent cursor, must be on the
- first record of the current page */
- mtr_t* mtr); /*!< in: mtr */
#ifdef UNIV_DEBUG
/*********************************************************//**
Returns the btr cursor component of a persistent cursor.
-@return pointer to btr cursor component */
+@return pointer to btr cursor component */
UNIV_INLINE
btr_cur_t*
btr_pcur_get_btr_cur(
@@ -367,7 +360,7 @@ btr_pcur_get_btr_cur(
const btr_pcur_t* cursor); /*!< in: persistent cursor */
/*********************************************************//**
Returns the page cursor component of a persistent cursor.
-@return pointer to page cursor component */
+@return pointer to page cursor component */
UNIV_INLINE
page_cur_t*
btr_pcur_get_page_cur(
@@ -375,7 +368,7 @@ btr_pcur_get_page_cur(
const btr_pcur_t* cursor); /*!< in: persistent cursor */
/*********************************************************//**
Returns the page of a persistent cursor.
-@return pointer to the page */
+@return pointer to the page */
UNIV_INLINE
page_t*
btr_pcur_get_page(
@@ -383,7 +376,7 @@ btr_pcur_get_page(
const btr_pcur_t* cursor);/*!< in: persistent cursor */
/*********************************************************//**
Returns the buffer block of a persistent cursor.
-@return pointer to the block */
+@return pointer to the block */
UNIV_INLINE
buf_block_t*
btr_pcur_get_block(
@@ -391,7 +384,7 @@ btr_pcur_get_block(
const btr_pcur_t* cursor);/*!< in: persistent cursor */
/*********************************************************//**
Returns the record of a persistent cursor.
-@return pointer to the record */
+@return pointer to the record */
UNIV_INLINE
rec_t*
btr_pcur_get_rec(
@@ -493,56 +486,66 @@ enum pcur_pos_t {
selects, updates, and deletes. */
struct btr_pcur_t{
- btr_cur_t btr_cur; /*!< a B-tree cursor */
- ulint latch_mode; /*!< see TODO note below!
- BTR_SEARCH_LEAF, BTR_MODIFY_LEAF,
- BTR_MODIFY_TREE, or BTR_NO_LATCHES,
- depending on the latching state of
- the page and tree where the cursor is
- positioned; BTR_NO_LATCHES means that
- the cursor is not currently positioned:
- we say then that the cursor is
- detached; it can be restored to
- attached if the old position was
- stored in old_rec */
- ulint old_stored; /*!< BTR_PCUR_OLD_STORED
- or BTR_PCUR_OLD_NOT_STORED */
- rec_t* old_rec; /*!< if cursor position is stored,
- contains an initial segment of the
- latest record cursor was positioned
- either on, before, or after */
- ulint old_n_fields; /*!< number of fields in old_rec */
- ulint rel_pos; /*!< BTR_PCUR_ON, BTR_PCUR_BEFORE, or
- BTR_PCUR_AFTER, depending on whether
- cursor was on, before, or after the
- old_rec record */
- buf_block_t* block_when_stored;/* buffer block when the position was
- stored */
- ib_uint64_t modify_clock; /*!< the modify clock value of the
- buffer block when the cursor position
- was stored */
- enum pcur_pos_t pos_state; /*!< btr_pcur_store_position() and
- btr_pcur_restore_position() state. */
- ulint search_mode; /*!< PAGE_CUR_G, ... */
- trx_t* trx_if_known; /*!< the transaction, if we know it;
- otherwise this field is not defined;
- can ONLY BE USED in error prints in
- fatal assertion failures! */
+ /** a B-tree cursor */
+ btr_cur_t btr_cur;
+ /** see TODO note below!
+ BTR_SEARCH_LEAF, BTR_MODIFY_LEAF, BTR_MODIFY_TREE or BTR_NO_LATCHES,
+ depending on the latching state of the page and tree where the cursor
+ is positioned; BTR_NO_LATCHES means that the cursor is not currently
+ positioned:
+ we say then that the cursor is detached; it can be restored to
+ attached if the old position was stored in old_rec */
+ ulint latch_mode;
+ /** true if old_rec is stored */
+ bool old_stored;
+ /** if cursor position is stored, contains an initial segment of the
+ latest record cursor was positioned either on, before or after */
+ rec_t* old_rec;
+ /** number of fields in old_rec */
+ ulint old_n_fields;
+ /** BTR_PCUR_ON, BTR_PCUR_BEFORE, or BTR_PCUR_AFTER, depending on
+ whether cursor was on, before, or after the old_rec record */
+ enum btr_pcur_pos_t rel_pos;
+ /** buffer block when the position was stored */
+ buf_block_t* block_when_stored;
+ /** the modify clock value of the buffer block when the cursor position
+ was stored */
+ ib_uint64_t modify_clock;
+ /** the withdraw clock value of the buffer pool when the cursor
+ position was stored */
+ ulint withdraw_clock;
+ /** btr_pcur_store_position() and btr_pcur_restore_position() state. */
+ enum pcur_pos_t pos_state;
+ /** PAGE_CUR_G, ... */
+ page_cur_mode_t search_mode;
+ /** the transaction, if we know it; otherwise this field is not defined;
+ can ONLY BE USED in error prints in fatal assertion failures! */
+ trx_t* trx_if_known;
/*-----------------------------*/
/* NOTE that the following fields may possess dynamically allocated
memory which should be freed if not needed anymore! */
- byte* old_rec_buf; /*!< NULL, or a dynamically allocated
- buffer for old_rec */
- ulint buf_size; /*!< old_rec_buf size if old_rec_buf
- is not NULL */
-};
+ /** NULL, or a dynamically allocated buffer for old_rec */
+ byte* old_rec_buf;
+ /** old_rec_buf size if old_rec_buf is not NULL */
+ ulint buf_size;
-#define BTR_PCUR_OLD_STORED 908467085
-#define BTR_PCUR_OLD_NOT_STORED 122766467
+ btr_pcur_t() :
+ btr_cur(), latch_mode(0), old_stored(false), old_rec(NULL),
+ old_n_fields(0), rel_pos(btr_pcur_pos_t(0)),
+ block_when_stored(NULL),
+ modify_clock(0), withdraw_clock(0),
+ pos_state(BTR_PCUR_NOT_POSITIONED),
+ search_mode(PAGE_CUR_UNSUPP), trx_if_known(NULL),
+ old_rec_buf(NULL), buf_size(0)
+ {
+ btr_cur.init();
+ }
+
+ /** Return the index of this persistent cursor */
+ dict_index_t* index() const { return(btr_cur.index); }
+};
-#ifndef UNIV_NONINL
#include "btr0pcur.ic"
-#endif
#endif
diff --git a/storage/innobase/include/btr0pcur.ic b/storage/innobase/include/btr0pcur.ic
index 05ac35c1ebb..8b0da666250 100644
--- a/storage/innobase/include/btr0pcur.ic
+++ b/storage/innobase/include/btr0pcur.ic
@@ -1,6 +1,7 @@
/*****************************************************************************
-Copyright (c) 1996, 2013, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1996, 2015, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2015, 2019, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -26,7 +27,7 @@ Created 2/23/1996 Heikki Tuuri
/*********************************************************//**
Gets the rel_pos field for a cursor whose position has been stored.
-@return BTR_PCUR_ON, ... */
+@return BTR_PCUR_ON, ... */
UNIV_INLINE
ulint
btr_pcur_get_rel_pos(
@@ -35,7 +36,7 @@ btr_pcur_get_rel_pos(
{
ut_ad(cursor);
ut_ad(cursor->old_rec);
- ut_ad(cursor->old_stored == BTR_PCUR_OLD_STORED);
+ ut_ad(cursor->old_stored);
ut_ad(cursor->pos_state == BTR_PCUR_WAS_POSITIONED
|| cursor->pos_state == BTR_PCUR_IS_POSITIONED);
@@ -45,7 +46,7 @@ btr_pcur_get_rel_pos(
#ifdef UNIV_DEBUG
/*********************************************************//**
Returns the btr cursor component of a persistent cursor.
-@return pointer to btr cursor component */
+@return pointer to btr cursor component */
UNIV_INLINE
btr_cur_t*
btr_pcur_get_btr_cur(
@@ -58,7 +59,7 @@ btr_pcur_get_btr_cur(
/*********************************************************//**
Returns the page cursor component of a persistent cursor.
-@return pointer to page cursor component */
+@return pointer to page cursor component */
UNIV_INLINE
page_cur_t*
btr_pcur_get_page_cur(
@@ -70,7 +71,7 @@ btr_pcur_get_page_cur(
/*********************************************************//**
Returns the page of a persistent cursor.
-@return pointer to the page */
+@return pointer to the page */
UNIV_INLINE
page_t*
btr_pcur_get_page(
@@ -84,7 +85,7 @@ btr_pcur_get_page(
/*********************************************************//**
Returns the buffer block of a persistent cursor.
-@return pointer to the block */
+@return pointer to the block */
UNIV_INLINE
buf_block_t*
btr_pcur_get_block(
@@ -98,7 +99,7 @@ btr_pcur_get_block(
/*********************************************************//**
Returns the record of a persistent cursor.
-@return pointer to the record */
+@return pointer to the record */
UNIV_INLINE
rec_t*
btr_pcur_get_rec(
@@ -218,12 +219,8 @@ btr_pcur_is_before_first_in_tree(
ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
- if (btr_page_get_prev(btr_pcur_get_page(cursor), mtr) != FIL_NULL) {
-
- return(FALSE);
- }
-
- return(page_cur_is_before_first(btr_pcur_get_page_cur(cursor)));
+ return !page_has_prev(btr_pcur_get_page(cursor))
+ && page_cur_is_before_first(btr_pcur_get_page_cur(cursor));
}
/*********************************************************//**
@@ -239,12 +236,8 @@ btr_pcur_is_after_last_in_tree(
ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
- if (btr_page_get_next(btr_pcur_get_page(cursor), mtr) != FIL_NULL) {
-
- return(FALSE);
- }
-
- return(page_cur_is_after_last(btr_pcur_get_page_cur(cursor)));
+ return !page_has_next(btr_pcur_get_page(cursor))
+ && page_cur_is_after_last(btr_pcur_get_page_cur(cursor));
}
/*********************************************************//**
@@ -260,7 +253,7 @@ btr_pcur_move_to_next_on_page(
page_cur_move_to_next(btr_pcur_get_page_cur(cursor));
- cursor->old_stored = BTR_PCUR_OLD_NOT_STORED;
+ cursor->old_stored = false;
}
/*********************************************************//**
@@ -276,31 +269,13 @@ btr_pcur_move_to_prev_on_page(
page_cur_move_to_prev(btr_pcur_get_page_cur(cursor));
- cursor->old_stored = BTR_PCUR_OLD_NOT_STORED;
-}
-
-/*********************************************************//**
-Moves the persistent cursor to the last record on the same page. */
-UNIV_INLINE
-void
-btr_pcur_move_to_last_on_page(
-/*==========================*/
- btr_pcur_t* cursor, /*!< in: persistent cursor */
- mtr_t* mtr) /*!< in: mtr */
-{
- UT_NOT_USED(mtr);
- ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
-
- page_cur_set_after_last(btr_pcur_get_block(cursor),
- btr_pcur_get_page_cur(cursor));
-
- cursor->old_stored = BTR_PCUR_OLD_NOT_STORED;
+ cursor->old_stored = false;
}
/*********************************************************//**
Moves the persistent cursor to the next user record in the tree. If no user
records are left, the cursor ends up 'after last in tree'.
-@return TRUE if the cursor moved forward, ending on a user record */
+@return TRUE if the cursor moved forward, ending on a user record */
UNIV_INLINE
ibool
btr_pcur_move_to_next_user_rec(
@@ -311,7 +286,7 @@ btr_pcur_move_to_next_user_rec(
{
ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
- cursor->old_stored = BTR_PCUR_OLD_NOT_STORED;
+ cursor->old_stored = false;
loop:
if (btr_pcur_is_after_last_on_page(cursor)) {
@@ -336,7 +311,7 @@ loop:
/*********************************************************//**
Moves the persistent cursor to the next record in the tree. If no records are
left, the cursor stays 'after last in tree'.
-@return TRUE if the cursor was not after last in tree */
+@return TRUE if the cursor was not after last in tree */
UNIV_INLINE
ibool
btr_pcur_move_to_next(
@@ -348,7 +323,7 @@ btr_pcur_move_to_next(
ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
- cursor->old_stored = BTR_PCUR_OLD_NOT_STORED;
+ cursor->old_stored = false;
if (btr_pcur_is_after_last_on_page(cursor)) {
@@ -388,6 +363,32 @@ btr_pcur_commit_specify_mtr(
pcur->pos_state = BTR_PCUR_WAS_POSITIONED;
}
+/** Commits the mtr and sets the clustered index pcur and secondary index
+pcur latch mode to BTR_NO_LATCHES, that is, the cursor becomes detached.
+Function btr_pcur_store_position should be used for both cursor before
+calling this, if restoration of cursor is wanted later.
+@param[in] pcur persistent cursor
+@param[in] sec_pcur secondary index persistent cursor
+@param[in] mtr mtr to commit */
+UNIV_INLINE
+void
+btr_pcurs_commit_specify_mtr(
+ btr_pcur_t* pcur,
+ btr_pcur_t* sec_pcur,
+ mtr_t* mtr)
+{
+ ut_ad(pcur->pos_state == BTR_PCUR_IS_POSITIONED);
+ ut_ad(sec_pcur->pos_state == BTR_PCUR_IS_POSITIONED);
+
+ pcur->latch_mode = BTR_NO_LATCHES;
+ sec_pcur->latch_mode = BTR_NO_LATCHES;
+
+ mtr_commit(mtr);
+
+ pcur->pos_state = BTR_PCUR_WAS_POSITIONED;
+ sec_pcur->pos_state = BTR_PCUR_WAS_POSITIONED;
+}
+
/**************************************************************//**
Sets the old_rec_buf field to NULL. */
UNIV_INLINE
@@ -396,22 +397,34 @@ btr_pcur_init(
/*==========*/
btr_pcur_t* pcur) /*!< in: persistent cursor */
{
- pcur->old_stored = BTR_PCUR_OLD_NOT_STORED;
+ pcur->old_stored = false;
pcur->old_rec_buf = NULL;
pcur->old_rec = NULL;
+
+ pcur->btr_cur.rtr_info = NULL;
+}
+
+/** Free old_rec_buf.
+@param[in] pcur Persistent cursor holding old_rec to be freed. */
+UNIV_INLINE
+void
+btr_pcur_free(
+ btr_pcur_t* pcur)
+{
+ ut_free(pcur->old_rec_buf);
}
/**************************************************************//**
Initializes and opens a persistent cursor to an index tree. It should be
closed with btr_pcur_close. */
UNIV_INLINE
-void
+dberr_t
btr_pcur_open_low(
/*==============*/
dict_index_t* index, /*!< in: index */
ulint level, /*!< in: level in the btree */
const dtuple_t* tuple, /*!< in: tuple on which search done */
- ulint mode, /*!< in: PAGE_CUR_L, ...;
+ page_cur_mode_t mode, /*!< in: PAGE_CUR_L, ...;
NOTE that if the search is made using a unique
prefix of a record, mode should be
PAGE_CUR_LE, not PAGE_CUR_GE, as the latter
@@ -420,10 +433,13 @@ btr_pcur_open_low(
ulint latch_mode,/*!< in: BTR_SEARCH_LEAF, ... */
btr_pcur_t* cursor, /*!< in: memory buffer for persistent cursor */
const char* file, /*!< in: file name */
- ulint line, /*!< in: line where called */
+ unsigned line, /*!< in: line where called */
+ ib_uint64_t autoinc,/*!< in: PAGE_ROOT_AUTO_INC to be written
+ (0 if none) */
mtr_t* mtr) /*!< in: mtr */
{
btr_cur_t* btr_cursor;
+ dberr_t err = DB_SUCCESS;
/* Initialize the cursor */
@@ -436,11 +452,27 @@ btr_pcur_open_low(
btr_cursor = btr_pcur_get_btr_cur(cursor);
- btr_cur_search_to_nth_level(index, level, tuple, mode, latch_mode,
- btr_cursor, 0, file, line, mtr);
+ ut_ad(!dict_index_is_spatial(index));
+
+ err = btr_cur_search_to_nth_level(
+ index, level, tuple, mode, latch_mode,
+ btr_cursor, 0, file, line, mtr, autoinc);
+
+ if (err != DB_SUCCESS) {
+ ib::warn() << " Error code: " << err
+ << " btr_pcur_open_low "
+ << " level: " << level
+ << " called from file: "
+ << file << " line: " << line
+ << " table: " << index->table->name
+ << " index: " << index->name;
+ }
+
cursor->pos_state = BTR_PCUR_IS_POSITIONED;
cursor->trx_if_known = NULL;
+
+ return(err);
}
/**************************************************************//**
@@ -452,7 +484,7 @@ btr_pcur_open_with_no_init_func(
/*============================*/
dict_index_t* index, /*!< in: index */
const dtuple_t* tuple, /*!< in: tuple on which search done */
- ulint mode, /*!< in: PAGE_CUR_L, ...;
+ page_cur_mode_t mode, /*!< in: PAGE_CUR_L, ...;
NOTE that if the search is made using a unique
prefix of a record, mode should be
PAGE_CUR_LE, not PAGE_CUR_GE, as the latter
@@ -464,29 +496,31 @@ btr_pcur_open_with_no_init_func(
page, but assume that the caller uses his
btr search latch to protect the record! */
btr_pcur_t* cursor, /*!< in: memory buffer for persistent cursor */
- ulint has_search_latch,/*!< in: latch mode the caller
- currently has on btr_search_latch:
+ ulint has_search_latch,
+ /*!< in: latch mode the caller
+ currently has on search system:
RW_S_LATCH, or 0 */
const char* file, /*!< in: file name */
- ulint line, /*!< in: line where called */
+ unsigned line, /*!< in: line where called */
mtr_t* mtr) /*!< in: mtr */
{
btr_cur_t* btr_cursor;
dberr_t err = DB_SUCCESS;
- cursor->latch_mode = latch_mode;
+ cursor->latch_mode = BTR_LATCH_MODE_WITHOUT_INTENTION(latch_mode);
cursor->search_mode = mode;
/* Search with the tree cursor */
btr_cursor = btr_pcur_get_btr_cur(cursor);
- err = btr_cur_search_to_nth_level(index, 0, tuple, mode, latch_mode,
- btr_cursor, has_search_latch,
- file, line, mtr);
+ err = btr_cur_search_to_nth_level(
+ index, 0, tuple, mode, latch_mode, btr_cursor,
+ has_search_latch, file, line, mtr);
+
cursor->pos_state = BTR_PCUR_IS_POSITIONED;
- cursor->old_stored = BTR_PCUR_OLD_NOT_STORED;
+ cursor->old_stored = false;
cursor->trx_if_known = NULL;
return err;
@@ -518,11 +552,12 @@ btr_pcur_open_at_index_side(
btr_pcur_init(pcur);
}
- err = btr_cur_open_at_index_side(from_left, index, latch_mode,
- btr_pcur_get_btr_cur(pcur), level, mtr);
+ err = btr_cur_open_at_index_side(
+ from_left, index, latch_mode,
+ btr_pcur_get_btr_cur(pcur), level, mtr);
pcur->pos_state = BTR_PCUR_IS_POSITIONED;
- pcur->old_stored = BTR_PCUR_OLD_NOT_STORED;
+ pcur->old_stored = false;
pcur->trx_if_known = NULL;
@@ -530,16 +565,18 @@ btr_pcur_open_at_index_side(
}
/**********************************************************************//**
-Positions a cursor at a randomly chosen position within a B-tree. */
+Positions a cursor at a randomly chosen position within a B-tree.
+@return true if the index is available and we have put the cursor, false
+if the index is unavailable */
UNIV_INLINE
-void
+bool
btr_pcur_open_at_rnd_pos_func(
/*==========================*/
dict_index_t* index, /*!< in: index */
ulint latch_mode, /*!< in: BTR_SEARCH_LEAF, ... */
btr_pcur_t* cursor, /*!< in/out: B-tree pcur */
const char* file, /*!< in: file name */
- ulint line, /*!< in: line where called */
+ unsigned line, /*!< in: line where called */
mtr_t* mtr) /*!< in: mtr */
{
/* Initialize the cursor */
@@ -549,13 +586,17 @@ btr_pcur_open_at_rnd_pos_func(
btr_pcur_init(cursor);
- btr_cur_open_at_rnd_pos_func(index, latch_mode,
- btr_pcur_get_btr_cur(cursor),
- file, line, mtr);
+ bool available;
+
+ available = btr_cur_open_at_rnd_pos_func(index, latch_mode,
+ btr_pcur_get_btr_cur(cursor),
+ file, line, mtr);
cursor->pos_state = BTR_PCUR_IS_POSITIONED;
- cursor->old_stored = BTR_PCUR_OLD_NOT_STORED;
+ cursor->old_stored = false;
cursor->trx_if_known = NULL;
+
+ return(available);
}
/**************************************************************//**
@@ -576,18 +617,20 @@ btr_pcur_close(
/*===========*/
btr_pcur_t* cursor) /*!< in: persistent cursor */
{
- if (cursor->old_rec_buf != NULL) {
+ ut_free(cursor->old_rec_buf);
- mem_free(cursor->old_rec_buf);
-
- cursor->old_rec = NULL;
- cursor->old_rec_buf = NULL;
+ if (cursor->btr_cur.rtr_info) {
+ rtr_clean_rtr_info(cursor->btr_cur.rtr_info, true);
+ cursor->btr_cur.rtr_info = NULL;
}
+ cursor->old_rec = NULL;
+ cursor->old_rec_buf = NULL;
cursor->btr_cur.page_cur.rec = NULL;
cursor->btr_cur.page_cur.block = NULL;
+
cursor->old_rec = NULL;
- cursor->old_stored = BTR_PCUR_OLD_NOT_STORED;
+ cursor->old_stored = false;
cursor->latch_mode = BTR_NO_LATCHES;
cursor->pos_state = BTR_PCUR_NOT_POSITIONED;
@@ -608,5 +651,5 @@ btr_pcur_move_before_first_on_page(
page_cur_set_before_first(btr_pcur_get_block(cursor),
btr_pcur_get_page_cur(cursor));
- cursor->old_stored = BTR_PCUR_OLD_NOT_STORED;
+ cursor->old_stored = false;
}
diff --git a/storage/innobase/include/btr0scrub.h b/storage/innobase/include/btr0scrub.h
index 8029cc91005..feaf61784d0 100644
--- a/storage/innobase/include/btr0scrub.h
+++ b/storage/innobase/include/btr0scrub.h
@@ -3,13 +3,7 @@
#ifndef btr0scrub_h
#define btr0scrub_h
-#include "univ.i"
-
#include "dict0dict.h"
-#include "data0data.h"
-#include "page0cur.h"
-#include "mtr0mtr.h"
-#include "btr0types.h"
/**
* enum describing page allocation status
diff --git a/storage/innobase/include/btr0sea.h b/storage/innobase/include/btr0sea.h
index 06f18c2d23f..645b3689ff6 100644
--- a/storage/innobase/include/btr0sea.h
+++ b/storage/innobase/include/btr0sea.h
@@ -27,67 +27,42 @@ Created 2/17/1996 Heikki Tuuri
#ifndef btr0sea_h
#define btr0sea_h
-#include "univ.i"
-
-#include "rem0rec.h"
#include "dict0dict.h"
-#include "btr0types.h"
-#include "mtr0mtr.h"
+#ifdef BTR_CUR_HASH_ADAPT
#include "ha0ha.h"
-/*****************************************************************//**
-Creates and initializes the adaptive search system at a database start. */
-UNIV_INTERN
+/** Creates and initializes the adaptive search system at a database start.
+@param[in] hash_size hash table size. */
void
-btr_search_sys_create(
-/*==================*/
- ulint hash_size); /*!< in: hash index hash table size */
-/*****************************************************************//**
-Frees the adaptive search system at a database shutdown. */
-UNIV_INTERN
+btr_search_sys_create(ulint hash_size);
+
+/** Resize hash index hash table.
+@param[in] hash_size hash index hash table size */
void
-btr_search_sys_free(void);
-/*=====================*/
+btr_search_sys_resize(ulint hash_size);
-/********************************************************************//**
-Disable the adaptive hash search system and empty the index. */
-UNIV_INTERN
+/** Frees the adaptive search system at a database shutdown. */
void
-btr_search_disable(void);
-/*====================*/
-/********************************************************************//**
-Enable the adaptive hash search system. */
-UNIV_INTERN
+btr_search_sys_free();
+
+/** Disable the adaptive hash search system and empty the index.
+@param need_mutex need to acquire dict_sys->mutex */
+void
+btr_search_disable(
+ bool need_mutex);
+/** Enable the adaptive hash search system. */
void
-btr_search_enable(void);
-/*====================*/
+btr_search_enable();
-/********************************************************************//**
-Returns search info for an index.
-@return search info; search mutex reserved */
-UNIV_INLINE
-btr_search_t*
-btr_search_get_info(
-/*================*/
- dict_index_t* index) /*!< in: index */
- MY_ATTRIBUTE((nonnull));
-/*****************************************************************//**
-Creates and initializes a search info struct.
-@return own: search info struct */
-UNIV_INTERN
-btr_search_t*
-btr_search_info_create(
-/*===================*/
- mem_heap_t* heap); /*!< in: heap where created */
-/*****************************************************************//**
-Returns the value of ref_count. The value is protected by
-btr_search_latch.
-@return ref_count value. */
-UNIV_INTERN
+/** Returns the value of ref_count. The value is protected by latch.
+@param[in] info search info
+@param[in] index index identifier
+@return ref_count value. */
ulint
btr_search_info_get_ref_count(
-/*==========================*/
- btr_search_t* info); /*!< in: search info. */
+ btr_search_t* info,
+ dict_index_t* index);
+
/*********************************************************************//**
Updates the search info. */
UNIV_INLINE
@@ -96,110 +71,212 @@ btr_search_info_update(
/*===================*/
dict_index_t* index, /*!< in: index of the cursor */
btr_cur_t* cursor);/*!< in: cursor which was just positioned */
-/******************************************************************//**
-Tries to guess the right search position based on the hash search info
+
+/** Tries to guess the right search position based on the hash search info
of the index. Note that if mode is PAGE_CUR_LE, which is used in inserts,
and the function returns TRUE, then cursor->up_match and cursor->low_match
both have sensible values.
-@return TRUE if succeeded */
-UNIV_INTERN
+@param[in,out] index index
+@param[in,out] info index search info
+@param[in] tuple logical record
+@param[in] mode PAGE_CUR_L, ....
+@param[in] latch_mode BTR_SEARCH_LEAF, ...;
+ NOTE that only if has_search_latch is 0, we will
+ have a latch set on the cursor page, otherwise
+ we assume the caller uses his search latch
+ to protect the record!
+@param[out] cursor tree cursor
+@param[in] has_search_latch
+ latch mode the caller currently has on
+ search system: RW_S/X_LATCH or 0
+@param[in] mtr mini transaction
+@return TRUE if succeeded */
ibool
btr_search_guess_on_hash(
-/*=====================*/
- dict_index_t* index, /*!< in: index */
- btr_search_t* info, /*!< in: index search info */
- const dtuple_t* tuple, /*!< in: logical record */
- ulint mode, /*!< in: PAGE_CUR_L, ... */
- ulint latch_mode, /*!< in: BTR_SEARCH_LEAF, ... */
- btr_cur_t* cursor, /*!< out: tree cursor */
- ulint has_search_latch,/*!< in: latch mode the caller
- currently has on btr_search_latch:
- RW_S_LATCH, RW_X_LATCH, or 0 */
- mtr_t* mtr); /*!< in: mtr */
-/********************************************************************//**
-Moves or deletes hash entries for moved records. If new_page is already hashed,
-then the hash index for page, if any, is dropped. If new_page is not hashed,
-and page is hashed, then a new hash index is built to new_page with the same
-parameters as page (this often happens when a page is split). */
-UNIV_INTERN
+ dict_index_t* index,
+ btr_search_t* info,
+ const dtuple_t* tuple,
+ ulint mode,
+ ulint latch_mode,
+ btr_cur_t* cursor,
+ ulint has_search_latch,
+ mtr_t* mtr);
+
+/** Moves or deletes hash entries for moved records. If new_page is already
+hashed, then the hash index for page, if any, is dropped. If new_page is not
+hashed, and page is hashed, then a new hash index is built to new_page with the
+same parameters as page (this often happens when a page is split).
+@param[in,out] new_block records are copied to this page.
+@param[in,out] block index page from which record are copied, and the
+ copied records will be deleted from this page.
+@param[in,out] index record descriptor */
void
btr_search_move_or_delete_hash_entries(
-/*===================================*/
- buf_block_t* new_block, /*!< in: records are copied
- to this page */
- buf_block_t* block, /*!< in: index page from which
- records were copied, and the
- copied records will be deleted
- from this page */
- dict_index_t* index); /*!< in: record descriptor */
-/********************************************************************//**
-Drops a page hash index. */
-UNIV_INTERN
+ buf_block_t* new_block,
+ buf_block_t* block,
+ dict_index_t* index);
+
+/** Drop any adaptive hash index entries that point to an index page.
+@param[in,out] block block containing index page, s- or x-latched, or an
+ index page for which we know that
+ block->buf_fix_count == 0 or it is an index page which
+ has already been removed from the buf_pool->page_hash
+ i.e.: it is in state BUF_BLOCK_REMOVE_HASH */
void
-btr_search_drop_page_hash_index(
-/*============================*/
- buf_block_t* block); /*!< in: block containing index page,
- s- or x-latched, or an index page
- for which we know that
- block->buf_fix_count == 0 */
+btr_search_drop_page_hash_index(buf_block_t* block);
+
/** Drop possible adaptive hash index entries when a page is evicted
-from the buffer pool or freed in a file, or the index is being dropped. */
-UNIV_INTERN
-void
-btr_search_drop_page_hash_when_freed(ulint space, ulint page_no);
-/********************************************************************//**
-Updates the page hash index when a single record is inserted on a page. */
-UNIV_INTERN
+from the buffer pool or freed in a file, or the index is being dropped.
+@param[in] page_id page id */
+void btr_search_drop_page_hash_when_freed(const page_id_t page_id);
+
+/** Updates the page hash index when a single record is inserted on a page.
+@param[in] cursor cursor which was positioned to the place to insert
+ using btr_cur_search_, and the new record has been
+ inserted next to the cursor. */
void
-btr_search_update_hash_node_on_insert(
-/*==================================*/
- btr_cur_t* cursor);/*!< in: cursor which was positioned to the
+btr_search_update_hash_node_on_insert(btr_cur_t* cursor);
+
+/** Updates the page hash index when a single record is inserted on a page.
+@param[in] cursor cursor which was positioned to the
place to insert using btr_cur_search_...,
and the new record has been inserted next
to the cursor */
-/********************************************************************//**
-Updates the page hash index when a single record is inserted on a page. */
-UNIV_INTERN
void
-btr_search_update_hash_on_insert(
-/*=============================*/
- btr_cur_t* cursor);/*!< in: cursor which was positioned to the
- place to insert using btr_cur_search_...,
- and the new record has been inserted next
- to the cursor */
-/********************************************************************//**
-Updates the page hash index when a single record is deleted from a page. */
-UNIV_INTERN
+btr_search_update_hash_on_insert(btr_cur_t* cursor);
+
+/** Updates the page hash index when a single record is deleted from a page.
+@param[in] cursor cursor which was positioned on the record to delete
+ using btr_cur_search_, the record is not yet deleted.*/
void
-btr_search_update_hash_on_delete(
-/*=============================*/
- btr_cur_t* cursor);/*!< in: cursor which was positioned on the
- record to delete using btr_cur_search_...,
- the record is not yet deleted */
-#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
-/********************************************************************//**
-Validates the search system.
-@return TRUE if ok */
-UNIV_INTERN
-ibool
-btr_search_validate(void);
-/*======================*/
-#endif /* defined UNIV_AHI_DEBUG || defined UNIV_DEBUG */
+btr_search_update_hash_on_delete(btr_cur_t* cursor);
+
+/** Validates the search system.
+@return true if ok */
+bool
+btr_search_validate();
+
+/** X-Lock the search latch (corresponding to given index)
+@param[in] index index handler */
+UNIV_INLINE
+void
+btr_search_x_lock(const dict_index_t* index);
+
+/** X-Unlock the search latch (corresponding to given index)
+@param[in] index index handler */
+UNIV_INLINE
+void
+btr_search_x_unlock(const dict_index_t* index);
+
+/** Lock all search latches in exclusive mode. */
+UNIV_INLINE
+void
+btr_search_x_lock_all();
+
+/** Unlock all search latches from exclusive mode. */
+UNIV_INLINE
+void
+btr_search_x_unlock_all();
+
+/** S-Lock the search latch (corresponding to given index)
+@param[in] index index handler */
+UNIV_INLINE
+void
+btr_search_s_lock(const dict_index_t* index);
+
+/** S-Unlock the search latch (corresponding to given index)
+@param[in] index index handler */
+UNIV_INLINE
+void
+btr_search_s_unlock(const dict_index_t* index);
+
+/** Lock all search latches in shared mode. */
+UNIV_INLINE
+void
+btr_search_s_lock_all();
+
+#ifdef UNIV_DEBUG
+/** Check if thread owns all the search latches.
+@param[in] mode lock mode check
+@retval true if owns all of them
+@retval false if does not own some of them */
+UNIV_INLINE
+bool
+btr_search_own_all(ulint mode);
+
+/** Check if thread owns any of the search latches.
+@param[in] mode lock mode check
+@retval true if owns any of them
+@retval false if owns no search latch */
+UNIV_INLINE
+bool
+btr_search_own_any(ulint mode);
+#endif /* UNIV_DEBUG */
+
+/** Unlock all search latches from shared mode. */
+UNIV_INLINE
+void
+btr_search_s_unlock_all();
+
+/** Get the latch based on index attributes.
+A latch is selected from an array of latches using pair of index-id, space-id.
+@param[in] index index handler
+@return latch */
+UNIV_INLINE
+rw_lock_t*
+btr_get_search_latch(const dict_index_t* index);
+
+/** Get the hash-table based on index attributes.
+A table is selected from an array of tables using pair of index-id, space-id.
+@param[in] index index handler
+@return hash table */
+UNIV_INLINE
+hash_table_t*
+btr_get_search_table(const dict_index_t* index);
+#else /* BTR_CUR_HASH_ADAPT */
+# define btr_search_sys_create(size)
+# define btr_search_drop_page_hash_index(block)
+# define btr_search_s_lock(index)
+# define btr_search_s_unlock(index)
+# define btr_search_s_lock_all(index)
+# define btr_search_s_unlock_all(index)
+# define btr_search_x_lock(index)
+# define btr_search_x_unlock(index)
+# define btr_search_info_update(index, cursor)
+# define btr_search_move_or_delete_hash_entries(new_block, block, index)
+# define btr_search_update_hash_on_insert(cursor)
+# define btr_search_update_hash_on_delete(cursor)
+# define btr_search_sys_resize(hash_size)
+#endif /* BTR_CUR_HASH_ADAPT */
+
+#ifdef BTR_CUR_ADAPT
+/** Create and initialize search info.
+@param[in,out] heap heap where created
+@return own: search info struct */
+UNIV_INLINE
+btr_search_t*
+btr_search_info_create(mem_heap_t* heap)
+ MY_ATTRIBUTE((nonnull, warn_unused_result));
+
+/** @return the search info of an index */
+UNIV_INLINE
+btr_search_t*
+btr_search_get_info(dict_index_t* index)
+{
+ return(index->search_info);
+}
+#endif /* BTR_CUR_ADAPT */
/** The search info struct in an index */
struct btr_search_t{
- ulint ref_count; /*!< Number of blocks in this index tree
- that have search index built
- i.e. block->index points to this index.
- Protected by btr_search_latch except
- when during initialization in
- btr_search_info_create(). */
-
/* @{ The following fields are not protected by any latch.
Unfortunately, this means that they must be aligned to
the machine word, i.e., they cannot be turned into bit-fields. */
buf_block_t* root_guess;/*!< the root page frame when it was last time
fetched, or NULL */
+ ulint withdraw_clock; /*!< the withdraw clock value of the buffer
+ pool when root_guess was stored */
+#ifdef BTR_CUR_HASH_ADAPT
ulint hash_analysis; /*!< when this exceeds
BTR_SEARCH_HASH_ANALYSIS, the hash
analysis starts; this is reset if no
@@ -215,6 +292,13 @@ struct btr_search_t{
using the hash index;
the range is 0 .. BTR_SEARCH_BUILD_LIMIT + 5 */
/* @} */
+ ulint ref_count; /*!< Number of blocks in this index tree
+ that have search index built
+ i.e. block->index points to this index.
+ Protected by search latch except
+ when during initialization in
+ btr_search_info_create(). */
+
/*---------------------- @{ */
ulint n_fields; /*!< recommended prefix length for hash search:
number of full fields */
@@ -234,6 +318,7 @@ struct btr_search_t{
far */
ulint n_searches; /*!< number of searches */
#endif /* UNIV_SEARCH_PERF_STAT */
+#endif /* BTR_CUR_HASH_ADAPT */
#ifdef UNIV_DEBUG
ulint magic_n; /*!< magic number @see BTR_SEARCH_MAGIC_N */
/** value of btr_search_t::magic_n, used in assertions */
@@ -241,13 +326,17 @@ struct btr_search_t{
#endif /* UNIV_DEBUG */
};
+#ifdef BTR_CUR_HASH_ADAPT
/** The hash index system */
struct btr_search_sys_t{
- hash_table_t* hash_index; /*!< the adaptive hash index,
+ hash_table_t** hash_tables; /*!< the adaptive hash tables,
mapping dtuple_fold values
to rec_t pointers on index pages */
};
+/** Latches protecting access to adaptive hash index. */
+extern rw_lock_t** btr_search_latches;
+
/** The adaptive hash index */
extern btr_search_sys_t* btr_search_sys;
@@ -275,9 +364,8 @@ the hash index */
over calls from MySQL. If we notice someone waiting for the latch, we
again set this much timeout. This is to reduce contention. */
#define BTR_SEA_TIMEOUT 10000
+#endif /* BTR_CUR_HASH_ADAPT */
-#ifndef UNIV_NONINL
#include "btr0sea.ic"
-#endif
#endif
diff --git a/storage/innobase/include/btr0sea.ic b/storage/innobase/include/btr0sea.ic
index 9a512427fa9..4972de16064 100644
--- a/storage/innobase/include/btr0sea.ic
+++ b/storage/innobase/include/btr0sea.ic
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1996, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1996, 2015, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -27,27 +27,32 @@ Created 2/17/1996 Heikki Tuuri
#include "btr0cur.h"
#include "buf0buf.h"
+/** Create and initialize search info.
+@param[in,out] heap heap where created
+@return own: search info struct */
+UNIV_INLINE
+btr_search_t*
+btr_search_info_create(mem_heap_t* heap)
+{
+ btr_search_t* info = static_cast<btr_search_t*>(
+ mem_heap_zalloc(heap, sizeof(btr_search_t)));
+ ut_d(info->magic_n = BTR_SEARCH_MAGIC_N);
+#ifdef BTR_CUR_HASH_ADAPT
+ info->n_fields = 1;
+ info->left_side = TRUE;
+#endif /* BTR_CUR_HASH_ADAPT */
+ return(info);
+}
+
+#ifdef BTR_CUR_HASH_ADAPT
/*********************************************************************//**
Updates the search info. */
-UNIV_INTERN
void
btr_search_info_update_slow(
/*========================*/
btr_search_t* info, /*!< in/out: search info */
btr_cur_t* cursor);/*!< in: cursor which was just positioned */
-/********************************************************************//**
-Returns search info for an index.
-@return search info; search mutex reserved */
-UNIV_INLINE
-btr_search_t*
-btr_search_get_info(
-/*================*/
- dict_index_t* index) /*!< in: index */
-{
- return(index->search_info);
-}
-
/*********************************************************************//**
Updates the search info. */
UNIV_INLINE
@@ -57,13 +62,14 @@ btr_search_info_update(
dict_index_t* index, /*!< in: index of the cursor */
btr_cur_t* cursor) /*!< in: cursor which was just positioned */
{
- btr_search_t* info;
+ ut_ad(!rw_lock_own(btr_get_search_latch(index), RW_LOCK_S));
+ ut_ad(!rw_lock_own(btr_get_search_latch(index), RW_LOCK_X));
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_SHARED));
- ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
+ if (dict_index_is_spatial(index) || !btr_search_enabled) {
+ return;
+ }
+ btr_search_t* info;
info = btr_search_get_info(index);
info->hash_analysis++;
@@ -80,3 +86,145 @@ btr_search_info_update(
btr_search_info_update_slow(info, cursor);
}
+
+/** X-Lock the search latch (corresponding to given index)
+@param[in] index index handler */
+UNIV_INLINE
+void
+btr_search_x_lock(const dict_index_t* index)
+{
+ rw_lock_x_lock(btr_get_search_latch(index));
+}
+
+/** X-Unlock the search latch (corresponding to given index)
+@param[in] index index handler */
+UNIV_INLINE
+void
+btr_search_x_unlock(const dict_index_t* index)
+{
+ rw_lock_x_unlock(btr_get_search_latch(index));
+}
+
+/** Lock all search latches in exclusive mode. */
+UNIV_INLINE
+void
+btr_search_x_lock_all()
+{
+ for (ulint i = 0; i < btr_ahi_parts; ++i) {
+ rw_lock_x_lock(btr_search_latches[i]);
+ }
+}
+
+/** Unlock all search latches from exclusive mode. */
+UNIV_INLINE
+void
+btr_search_x_unlock_all()
+{
+ for (ulint i = 0; i < btr_ahi_parts; ++i) {
+ rw_lock_x_unlock(btr_search_latches[i]);
+ }
+}
+
+/** S-Lock the search latch (corresponding to given index)
+@param[in] index index handler */
+UNIV_INLINE
+void
+btr_search_s_lock(const dict_index_t* index)
+{
+ rw_lock_s_lock(btr_get_search_latch(index));
+}
+
+/** S-Unlock the search latch (corresponding to given index)
+@param[in] index index handler */
+UNIV_INLINE
+void
+btr_search_s_unlock(const dict_index_t* index)
+{
+ rw_lock_s_unlock(btr_get_search_latch(index));
+}
+
+/** Lock all search latches in shared mode. */
+UNIV_INLINE
+void
+btr_search_s_lock_all()
+{
+ for (ulint i = 0; i < btr_ahi_parts; ++i) {
+ rw_lock_s_lock(btr_search_latches[i]);
+ }
+}
+
+/** Unlock all search latches from shared mode. */
+UNIV_INLINE
+void
+btr_search_s_unlock_all()
+{
+ for (ulint i = 0; i < btr_ahi_parts; ++i) {
+ rw_lock_s_unlock(btr_search_latches[i]);
+ }
+}
+
+#ifdef UNIV_DEBUG
+/** Check if thread owns all the search latches.
+@param[in] mode lock mode check
+@retval true if owns all of them
+@retval false if does not own some of them */
+UNIV_INLINE
+bool
+btr_search_own_all(ulint mode)
+{
+ for (ulint i = 0; i < btr_ahi_parts; ++i) {
+ if (!rw_lock_own(btr_search_latches[i], mode)) {
+ return(false);
+ }
+ }
+ return(true);
+}
+
+/** Check if thread owns any of the search latches.
+@param[in] mode lock mode check
+@retval true if owns any of them
+@retval false if owns no search latch */
+UNIV_INLINE
+bool
+btr_search_own_any(ulint mode)
+{
+ for (ulint i = 0; i < btr_ahi_parts; ++i) {
+ if (rw_lock_own(btr_search_latches[i], mode)) {
+ return(true);
+ }
+ }
+ return(false);
+}
+#endif /* UNIV_DEBUG */
+
+/** Get the adaptive hash search index latch for a b-tree.
+@param[in] index b-tree index
+@return latch */
+UNIV_INLINE
+rw_lock_t*
+btr_get_search_latch(const dict_index_t* index)
+{
+ ut_ad(index != NULL);
+
+ ulint ifold = ut_fold_ulint_pair(static_cast<ulint>(index->id),
+ static_cast<ulint>(index->space));
+
+ return(btr_search_latches[ifold % btr_ahi_parts]);
+}
+
+/** Get the hash-table based on index attributes.
+A table is selected from an array of tables using pair of index-id, space-id.
+@param[in] index index handler
+@return hash table */
+UNIV_INLINE
+hash_table_t*
+btr_get_search_table(const dict_index_t* index)
+{
+ ut_ad(index != NULL);
+
+ ulint ifold = ut_fold_ulint_pair(static_cast<ulint>(index->id),
+ static_cast<ulint>(index->space));
+
+ return(btr_search_sys->hash_tables[ifold % btr_ahi_parts]);
+}
+#endif /* BTR_CUR_HASH_ADAPT */
diff --git a/storage/innobase/include/btr0types.h b/storage/innobase/include/btr0types.h
index 56705932cf1..22e1ef11a68 100644
--- a/storage/innobase/include/btr0types.h
+++ b/storage/innobase/include/btr0types.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1996, 2015, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -26,11 +26,9 @@ Created 2/17/1996 Heikki Tuuri
#ifndef btr0types_h
#define btr0types_h
-#include "univ.i"
-
-#include "rem0types.h"
#include "page0types.h"
-#include "sync0rw.h"
+#include "page0size.h"
+#include "rem0types.h"
/** Persistent cursor */
struct btr_pcur_t;
@@ -39,165 +37,53 @@ struct btr_cur_t;
/** B-tree search information for the adaptive hash index */
struct btr_search_t;
-#ifndef UNIV_HOTBACKUP
-
-/** @brief The latch protecting the adaptive search system
-
-This latch protects the
-(1) hash index;
-(2) columns of a record to which we have a pointer in the hash index;
-
-but does NOT protect:
-
-(3) next record offset field in a record;
-(4) next or previous records on the same page.
-
-Bear in mind (3) and (4) when using the hash index.
-*/
-extern rw_lock_t* btr_search_latch_temp;
-
-#endif /* UNIV_HOTBACKUP */
-
-/** The latch protecting the adaptive search system */
-#define btr_search_latch (*btr_search_latch_temp)
-
-/** Flag: has the search system been enabled?
-Protected by btr_search_latch. */
+#ifdef BTR_CUR_HASH_ADAPT
+/** Is search system enabled.
+Search system is protected by array of latches. */
extern char btr_search_enabled;
-#ifdef UNIV_BLOB_DEBUG
-# include "buf0types.h"
-/** An index->blobs entry for keeping track of off-page column references */
-struct btr_blob_dbg_t;
-
-/** Insert to index->blobs a reference to an off-page column.
-@param index the index tree
-@param b the reference
-@param ctx context (for logging) */
-UNIV_INTERN
-void
-btr_blob_dbg_rbt_insert(
-/*====================*/
- dict_index_t* index, /*!< in/out: index tree */
- const btr_blob_dbg_t* b, /*!< in: the reference */
- const char* ctx) /*!< in: context (for logging) */
- MY_ATTRIBUTE((nonnull));
-
-/** Remove from index->blobs a reference to an off-page column.
-@param index the index tree
-@param b the reference
-@param ctx context (for logging) */
-UNIV_INTERN
-void
-btr_blob_dbg_rbt_delete(
-/*====================*/
- dict_index_t* index, /*!< in/out: index tree */
- const btr_blob_dbg_t* b, /*!< in: the reference */
- const char* ctx) /*!< in: context (for logging) */
- MY_ATTRIBUTE((nonnull));
-
-/**************************************************************//**
-Add to index->blobs any references to off-page columns from a record.
-@return number of references added */
-UNIV_INTERN
-ulint
-btr_blob_dbg_add_rec(
-/*=================*/
- const rec_t* rec, /*!< in: record */
- dict_index_t* index, /*!< in/out: index */
- const ulint* offsets,/*!< in: offsets */
- const char* ctx) /*!< in: context (for logging) */
- MY_ATTRIBUTE((nonnull));
-/**************************************************************//**
-Remove from index->blobs any references to off-page columns from a record.
-@return number of references removed */
-UNIV_INTERN
-ulint
-btr_blob_dbg_remove_rec(
-/*====================*/
- const rec_t* rec, /*!< in: record */
- dict_index_t* index, /*!< in/out: index */
- const ulint* offsets,/*!< in: offsets */
- const char* ctx) /*!< in: context (for logging) */
- MY_ATTRIBUTE((nonnull));
-/**************************************************************//**
-Count and add to index->blobs any references to off-page columns
-from records on a page.
-@return number of references added */
-UNIV_INTERN
-ulint
-btr_blob_dbg_add(
-/*=============*/
- const page_t* page, /*!< in: rewritten page */
- dict_index_t* index, /*!< in/out: index */
- const char* ctx) /*!< in: context (for logging) */
- MY_ATTRIBUTE((nonnull));
-/**************************************************************//**
-Count and remove from index->blobs any references to off-page columns
-from records on a page.
-Used when reorganizing a page, before copying the records.
-@return number of references removed */
-UNIV_INTERN
-ulint
-btr_blob_dbg_remove(
-/*================*/
- const page_t* page, /*!< in: b-tree page */
- dict_index_t* index, /*!< in/out: index */
- const char* ctx) /*!< in: context (for logging) */
- MY_ATTRIBUTE((nonnull));
-/**************************************************************//**
-Restore in index->blobs any references to off-page columns
-Used when page reorganize fails due to compressed page overflow. */
-UNIV_INTERN
-void
-btr_blob_dbg_restore(
-/*=================*/
- const page_t* npage, /*!< in: page that failed to compress */
- const page_t* page, /*!< in: copy of original page */
- dict_index_t* index, /*!< in/out: index */
- const char* ctx) /*!< in: context (for logging) */
- MY_ATTRIBUTE((nonnull));
-
-/** Operation that processes the BLOB references of an index record
-@param[in] rec record on index page
-@param[in/out] index the index tree of the record
-@param[in] offsets rec_get_offsets(rec,index)
-@param[in] ctx context (for logging)
-@return number of BLOB references processed */
-typedef ulint (*btr_blob_dbg_op_f)
-(const rec_t* rec,dict_index_t* index,const ulint* offsets,const char* ctx);
-
-/**************************************************************//**
-Count and process all references to off-page columns on a page.
-@return number of references processed */
-UNIV_INTERN
-ulint
-btr_blob_dbg_op(
-/*============*/
- const page_t* page, /*!< in: B-tree leaf page */
- const rec_t* rec, /*!< in: record to start from
- (NULL to process the whole page) */
- dict_index_t* index, /*!< in/out: index */
- const char* ctx, /*!< in: context (for logging) */
- const btr_blob_dbg_op_f op) /*!< in: operation on records */
- MY_ATTRIBUTE((nonnull(1,3,4,5)));
-#else /* UNIV_BLOB_DEBUG */
-# define btr_blob_dbg_add_rec(rec, index, offsets, ctx) ((void) 0)
-# define btr_blob_dbg_add(page, index, ctx) ((void) 0)
-# define btr_blob_dbg_remove_rec(rec, index, offsets, ctx) ((void) 0)
-# define btr_blob_dbg_remove(page, index, ctx) ((void) 0)
-# define btr_blob_dbg_restore(npage, page, index, ctx) ((void) 0)
-# define btr_blob_dbg_op(page, rec, index, ctx, op) ((void) 0)
-#endif /* UNIV_BLOB_DEBUG */
+/** Number of adaptive hash index partition. */
+extern ulong btr_ahi_parts;
+#endif /* BTR_CUR_HASH_ADAPT */
/** The size of a reference to data stored on a different page.
The reference is stored at the end of the prefix of the field
in the index record. */
-#define BTR_EXTERN_FIELD_REF_SIZE 20
-
-/** A BLOB field reference full of zero, for use in assertions and tests.
-Initially, BLOB field references are set to zero, in
-dtuple_convert_big_rec(). */
-extern const byte field_ref_zero[BTR_EXTERN_FIELD_REF_SIZE];
+#define BTR_EXTERN_FIELD_REF_SIZE FIELD_REF_SIZE
+
+/** If the data don't exceed the size, the data are stored locally. */
+#define BTR_EXTERN_LOCAL_STORED_MAX_SIZE \
+ (BTR_EXTERN_FIELD_REF_SIZE * 2)
+
+/** The information is used for creating a new index tree when
+applying TRUNCATE log record during recovery */
+struct btr_create_t {
+
+ explicit btr_create_t(const byte* const ptr)
+ :
+ format_flags(),
+ n_fields(),
+ field_len(),
+ fields(ptr),
+ trx_id_pos(ULINT_UNDEFINED)
+ {
+ /* Do nothing */
+ }
+
+ /** Page format */
+ ulint format_flags;
+
+ /** Numbr of index fields */
+ ulint n_fields;
+
+ /** The length of the encoded meta-data */
+ ulint field_len;
+
+ /** Field meta-data, encoded. */
+ const byte* const fields;
+
+ /** Position of trx-id column. */
+ ulint trx_id_pos;
+};
#endif
diff --git a/storage/innobase/include/buf0buddy.h b/storage/innobase/include/buf0buddy.h
index be4415e6557..1697c8649c0 100644
--- a/storage/innobase/include/buf0buddy.h
+++ b/storage/innobase/include/buf0buddy.h
@@ -31,7 +31,6 @@ Created December 2006 by Marko Makela
# define UNIV_INLINE
#endif
-#include "univ.i"
#include "buf0types.h"
/**********************************************************************//**
@@ -39,7 +38,7 @@ Allocate a block. The thread calling this function must hold
buf_pool->mutex and must not hold buf_pool->zip_mutex or any
block->mutex. The buf_pool->mutex may be released and reacquired.
This function should only be used for allocating compressed page frames.
-@return allocated block, never NULL */
+@return allocated block, never NULL */
UNIV_INLINE
byte*
buf_buddy_alloc(
@@ -70,8 +69,24 @@ buf_buddy_free(
up to UNIV_PAGE_SIZE */
MY_ATTRIBUTE((nonnull));
-#ifndef UNIV_NONINL
-# include "buf0buddy.ic"
-#endif
+/** Reallocate a block.
+@param[in] buf_pool buffer pool instance
+@param[in] buf block to be reallocated, must be pointed
+to by the buffer pool
+@param[in] size block size, up to UNIV_PAGE_SIZE
+@retval false if failed because of no free blocks. */
+bool
+buf_buddy_realloc(
+ buf_pool_t* buf_pool,
+ void* buf,
+ ulint size);
+
+/** Combine all pairs of free buddies.
+@param[in] buf_pool buffer pool instance */
+void
+buf_buddy_condense_free(
+ buf_pool_t* buf_pool);
+
+#include "buf0buddy.ic"
#endif /* buf0buddy_h */
diff --git a/storage/innobase/include/buf0buddy.ic b/storage/innobase/include/buf0buddy.ic
index 4ff19eb5ed2..4afa795e762 100644
--- a/storage/innobase/include/buf0buddy.ic
+++ b/storage/innobase/include/buf0buddy.ic
@@ -30,15 +30,12 @@ Created December 2006 by Marko Makela
#include "buf0buf.h"
#include "buf0buddy.h"
-#include "ut0ut.h"
-#include "sync0sync.h"
/**********************************************************************//**
Allocate a block. The thread calling this function must hold
buf_pool->mutex and must not hold buf_pool->zip_mutex or any block->mutex.
The buf_pool_mutex may be released and reacquired.
-@return allocated block, never NULL */
-UNIV_INTERN
+@return allocated block, never NULL */
void*
buf_buddy_alloc_low(
/*================*/
@@ -54,7 +51,6 @@ buf_buddy_alloc_low(
/**********************************************************************//**
Deallocate a block. */
-UNIV_INTERN
void
buf_buddy_free_low(
/*===============*/
@@ -67,7 +63,7 @@ buf_buddy_free_low(
/**********************************************************************//**
Get the index of buf_pool->zip_free[] for a given block size.
-@return index of buf_pool->zip_free[], or BUF_BUDDY_SIZES */
+@return index of buf_pool->zip_free[], or BUF_BUDDY_SIZES */
UNIV_INLINE
ulint
buf_buddy_get_slot(
@@ -91,7 +87,7 @@ Allocate a block. The thread calling this function must hold
buf_pool->mutex and must not hold buf_pool->zip_mutex or any
block->mutex. The buf_pool->mutex may be released and reacquired.
This function should only be used for allocating compressed page frames.
-@return allocated block, never NULL */
+@return allocated block, never NULL */
UNIV_INLINE
byte*
buf_buddy_alloc(
diff --git a/storage/innobase/include/buf0buf.h b/storage/innobase/include/buf0buf.h
index b6ad0b85b19..d120dc36091 100644
--- a/storage/innobase/include/buf0buf.h
+++ b/storage/innobase/include/buf0buf.h
@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2013, 2018, MariaDB Corporation.
+Copyright (c) 2013, 2020 MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -27,19 +27,26 @@ Created 11/5/1995 Heikki Tuuri
#ifndef buf0buf_h
#define buf0buf_h
-#include "univ.i"
+/** Magic value to use instead of checksums when they are disabled */
+#define BUF_NO_CHECKSUM_MAGIC 0xDEADBEEFUL
+
#include "fil0fil.h"
#include "mtr0types.h"
#include "buf0types.h"
+#include "span.h"
#ifndef UNIV_INNOCHECKSUM
#include "hash0hash.h"
#include "ut0byte.h"
#include "page0types.h"
-#ifndef UNIV_HOTBACKUP
#include "ut0rbt.h"
#include "os0proc.h"
#include "log0log.h"
+#include "srv0srv.h"
#include "my_atomic.h"
+#include <ostream>
+
+// Forward declaration
+struct fil_addr_t;
/** @name Modes for buf_page_get_gen */
/* @{ */
@@ -88,17 +95,18 @@ Created 11/5/1995 Heikki Tuuri
extern buf_pool_t* buf_pool_ptr; /*!< The buffer pools
of the database */
-#ifdef UNIV_DEBUG
-extern ibool buf_debug_prints;/*!< If this is set TRUE, the program
- prints info whenever read or flush
- occurs */
-#endif /* UNIV_DEBUG */
-extern ulint srv_buf_pool_instances;
-extern ulint srv_buf_pool_curr_size;
-#else /* !UNIV_HOTBACKUP */
-extern buf_block_t* back_block1; /*!< first block, for --apply-log */
-extern buf_block_t* back_block2; /*!< second block, for page reorganize */
-#endif /* !UNIV_HOTBACKUP */
+
+extern volatile bool buf_pool_withdrawing; /*!< true when withdrawing buffer
+ pool pages might cause page relocation */
+
+extern volatile ulint buf_withdraw_clock; /*!< the clock is incremented
+ every time a pointer to a page may
+ become obsolete */
+
+# ifdef UNIV_DEBUG
+extern my_bool buf_disable_resize_buffer_pool_debug; /*!< if TRUE, resizing
+ buffer pool is not allowed. */
+# endif /* UNIV_DEBUG */
/** @brief States of a control block
@see buf_page_t
@@ -206,8 +214,18 @@ struct buf_pools_list_size_t {
ulint unzip_LRU_bytes; /*!< unzip_LRU size in bytes */
ulint flush_list_bytes; /*!< flush_list size in bytes */
};
+#endif /* !UNIV_INNOCHECKSUM */
-#ifndef UNIV_HOTBACKUP
+/** Print the given page_id_t object.
+@param[in,out] out the output stream
+@param[in] page_id the page_id_t object to be printed
+@return the output stream */
+std::ostream&
+operator<<(
+ std::ostream& out,
+ const page_id_t page_id);
+
+#ifndef UNIV_INNOCHECKSUM
/********************************************************************//**
Acquire mutex on all buffer pool instances */
UNIV_INLINE
@@ -224,8 +242,7 @@ buf_pool_mutex_exit_all(void);
/********************************************************************//**
Creates the buffer pool.
-@return DB_SUCCESS if success, DB_ERROR if not enough memory or error */
-UNIV_INTERN
+@return DB_SUCCESS if success, DB_ERROR if not enough memory or error */
dberr_t
buf_pool_init(
/*=========*/
@@ -234,42 +251,53 @@ buf_pool_init(
/********************************************************************//**
Frees the buffer pool at shutdown. This must not be invoked before
freeing all mutexes. */
-UNIV_INTERN
void
buf_pool_free(
/*==========*/
ulint n_instances); /*!< in: numbere of instances to free */
-/********************************************************************//**
-Clears the adaptive hash index on all pages in the buffer pool. */
-UNIV_INTERN
-void
-buf_pool_clear_hash_index(void);
-/*===========================*/
+/** Determines if a block is intended to be withdrawn.
+@param[in] buf_pool buffer pool instance
+@param[in] block pointer to control block
+@retval true if will be withdrawn */
+bool
+buf_block_will_withdrawn(
+ buf_pool_t* buf_pool,
+ const buf_block_t* block);
+
+/** Determines if a frame is intended to be withdrawn.
+@param[in] buf_pool buffer pool instance
+@param[in] ptr pointer to a frame
+@retval true if will be withdrawn */
+bool
+buf_frame_will_withdrawn(
+ buf_pool_t* buf_pool,
+ const byte* ptr);
-/********************************************************************//**
-Relocate a buffer control block. Relocates the block on the LRU list
-and in buf_pool->page_hash. Does not relocate bpage->list.
-The caller must take care of relocating bpage->list. */
-UNIV_INTERN
+/** This is the thread for resizing buffer pool. It waits for an event and
+when waked up either performs a resizing and sleeps again.
+@return this function does not return, calls os_thread_exit()
+*/
+extern "C"
+os_thread_ret_t
+DECLARE_THREAD(buf_resize_thread)(void*);
+
+#ifdef BTR_CUR_HASH_ADAPT
+/** Clear the adaptive hash index on all pages in the buffer pool. */
void
-buf_relocate(
-/*=========*/
- buf_page_t* bpage, /*!< in/out: control block being relocated;
- buf_page_get_state(bpage) must be
- BUF_BLOCK_ZIP_DIRTY or BUF_BLOCK_ZIP_PAGE */
- buf_page_t* dpage) /*!< in/out: destination control block */
- MY_ATTRIBUTE((nonnull));
+buf_pool_clear_hash_index();
+#endif /* BTR_CUR_HASH_ADAPT */
+
/*********************************************************************//**
Gets the current size of buffer buf_pool in bytes.
-@return size in bytes */
+@return size in bytes */
UNIV_INLINE
ulint
buf_pool_get_curr_size(void);
/*========================*/
/*********************************************************************//**
Gets the current size of buffer buf_pool in frames.
-@return size in pages */
+@return size in pages */
UNIV_INLINE
ulint
buf_pool_get_n_pages(void);
@@ -277,8 +305,7 @@ buf_pool_get_n_pages(void);
/********************************************************************//**
Gets the smallest oldest_modification lsn for any page in the pool. Returns
zero if all modified pages have been flushed to disk.
-@return oldest modification in pool, zero if none */
-UNIV_INTERN
+@return oldest modification in pool, zero if none */
lsn_t
buf_pool_get_oldest_modification(void);
/*==================================*/
@@ -302,8 +329,7 @@ buf_page_free_descriptor(
/********************************************************************//**
Allocates a buffer block.
-@return own: the allocated block, in state BUF_BLOCK_MEMORY */
-UNIV_INTERN
+@return own: the allocated block, in state BUF_BLOCK_MEMORY */
buf_block_t*
buf_block_alloc(
/*============*/
@@ -317,38 +343,36 @@ void
buf_block_free(
/*===========*/
buf_block_t* block); /*!< in, own: block to be freed */
-#endif /* !UNIV_HOTBACKUP */
+
/*********************************************************************//**
Copies contents of a buffer frame to a given buffer.
-@return buf */
+@return buf */
UNIV_INLINE
byte*
buf_frame_copy(
/*===========*/
byte* buf, /*!< in: buffer to copy to */
const buf_frame_t* frame); /*!< in: buffer frame */
-#ifndef UNIV_HOTBACKUP
+
/**************************************************************//**
NOTE! The following macros should be used instead of buf_page_get_gen,
to improve debugging. Only values RW_S_LATCH and RW_X_LATCH are allowed
in LA! */
-#define buf_page_get(SP, ZS, OF, LA, MTR) buf_page_get_gen(\
- SP, ZS, OF, LA, NULL,\
- BUF_GET, __FILE__, __LINE__, MTR)
+#define buf_page_get(ID, SIZE, LA, MTR) \
+ buf_page_get_gen(ID, SIZE, LA, NULL, BUF_GET, __FILE__, __LINE__, MTR, NULL)
/**************************************************************//**
Use these macros to bufferfix a page with no latching. Remember not to
read the contents of the page unless you know it is safe. Do not modify
the contents of the page! We have separated this case, because it is
error-prone programming not to set a latch, and it should be used
with care. */
-#define buf_page_get_with_no_latch(SP, ZS, OF, MTR) buf_page_get_gen(\
- SP, ZS, OF, RW_NO_LATCH, NULL,\
- BUF_GET_NO_LATCH, __FILE__, __LINE__, MTR)
+#define buf_page_get_with_no_latch(ID, SIZE, MTR) \
+ buf_page_get_gen(ID, SIZE, RW_NO_LATCH, NULL, BUF_GET_NO_LATCH, \
+ __FILE__, __LINE__, MTR, NULL)
/********************************************************************//**
This is the general function used to get optimistic access to a database
page.
-@return TRUE if success */
-UNIV_INTERN
+@return TRUE if success */
ibool
buf_page_optimistic_get(
/*====================*/
@@ -356,13 +380,12 @@ buf_page_optimistic_get(
buf_block_t* block, /*!< in: guessed block */
ib_uint64_t modify_clock,/*!< in: modify clock value */
const char* file, /*!< in: file name */
- ulint line, /*!< in: line where called */
+ unsigned line, /*!< in: line where called */
mtr_t* mtr); /*!< in: mini-transaction */
/********************************************************************//**
This is used to get access to a known database page, when no waiting can be
done.
-@return TRUE if success */
-UNIV_INTERN
+@return TRUE if success */
ibool
buf_page_get_known_nowait(
/*======================*/
@@ -370,102 +393,109 @@ buf_page_get_known_nowait(
buf_block_t* block, /*!< in: the known page */
ulint mode, /*!< in: BUF_MAKE_YOUNG or BUF_KEEP_OLD */
const char* file, /*!< in: file name */
- ulint line, /*!< in: line where called */
+ unsigned line, /*!< in: line where called */
mtr_t* mtr); /*!< in: mini-transaction */
-/*******************************************************************//**
-Given a tablespace id and page number tries to get that page. If the
+/** Given a tablespace id and page number tries to get that page. If the
page is not in the buffer pool it is not loaded and NULL is returned.
-Suitable for using when holding the lock_sys_t::mutex. */
-UNIV_INTERN
+Suitable for using when holding the lock_sys_t::mutex.
+@param[in] page_id page id
+@param[in] file file name
+@param[in] line line where called
+@param[in] mtr mini-transaction
+@return pointer to a page or NULL */
buf_block_t*
buf_page_try_get_func(
-/*==================*/
- ulint space_id,/*!< in: tablespace id */
- ulint page_no,/*!< in: page number */
- ulint rw_latch, /*!< in: RW_S_LATCH, RW_X_LATCH */
- bool possibly_freed, /*!< in: don't mind if page is freed */
- const char* file, /*!< in: file name */
- ulint line, /*!< in: line where called */
- mtr_t* mtr); /*!< in: mini-transaction */
-
-/** Tries to get a page. If the page is not in the buffer pool it is
-not loaded. Suitable for using when holding the lock_sys_t::mutex.
-@param space_id in: tablespace id
-@param page_no in: page number
-@param mtr in: mini-transaction
-@return the page if in buffer pool, NULL if not */
-#define buf_page_try_get(space_id, page_no, mtr) \
- buf_page_try_get_func(space_id, page_no, RW_S_LATCH, false, \
- __FILE__, __LINE__, mtr);
-
-/********************************************************************//**
-Get read access to a compressed page (usually of type
+ const page_id_t page_id,
+ const char* file,
+ unsigned line,
+ mtr_t* mtr);
+
+/** Tries to get a page.
+If the page is not in the buffer pool it is not loaded. Suitable for using
+when holding the lock_sys_t::mutex.
+@param[in] page_id page identifier
+@param[in] mtr mini-transaction
+@return the page if in buffer pool, NULL if not */
+#define buf_page_try_get(page_id, mtr) \
+ buf_page_try_get_func((page_id), __FILE__, __LINE__, mtr);
+
+/** Get read access to a compressed page (usually of type
FIL_PAGE_TYPE_ZBLOB or FIL_PAGE_TYPE_ZBLOB2).
The page must be released with buf_page_release_zip().
NOTE: the page is not protected by any latch. Mutual exclusion has to
be implemented at a higher level. In other words, all possible
accesses to a given page through this function must be protected by
the same set of mutexes or latches.
-@return pointer to the block, or NULL if not compressed */
-UNIV_INTERN
+@param[in] page_id page id
+@param[in] page_size page size
+@return pointer to the block */
buf_page_t*
buf_page_get_zip(
-/*=============*/
- ulint space, /*!< in: space id */
- ulint zip_size,/*!< in: compressed page size */
- ulint offset);/*!< in: page number */
-/********************************************************************//**
-This is the general function used to get access to a database page.
-@return pointer to the block or NULL */
-UNIV_INTERN
+ const page_id_t page_id,
+ const page_size_t& page_size);
+
+/** This is the general function used to get access to a database page.
+It does page initialization and applies the buffered redo logs.
+@param[in] page_id page id
+@param[in] rw_latch RW_S_LATCH, RW_X_LATCH, RW_NO_LATCH
+@param[in] guess guessed block or NULL
+@param[in] mode BUF_GET, BUF_GET_IF_IN_POOL,
+BUF_PEEK_IF_IN_POOL, BUF_GET_NO_LATCH, or BUF_GET_IF_IN_POOL_OR_WATCH
+@param[in] file file name
+@param[in] line line where called
+@param[in] mtr mini-transaction
+@param[out] err DB_SUCCESS or error code
+@return pointer to the block or NULL */
buf_block_t*
buf_page_get_gen(
-/*=============*/
- ulint space, /*!< in: space id */
- ulint zip_size,/*!< in: compressed page size in bytes
- or 0 for uncompressed pages */
- ulint offset, /*!< in: page number */
- ulint rw_latch,/*!< in: RW_S_LATCH, RW_X_LATCH, RW_NO_LATCH */
- buf_block_t* guess, /*!< in: guessed block or NULL */
- ulint mode, /*!< in: BUF_GET, BUF_GET_IF_IN_POOL,
- BUF_PEEK_IF_IN_POOL, BUF_GET_NO_LATCH or
- BUF_GET_IF_IN_POOL_OR_WATCH */
- const char* file, /*!< in: file name */
- ulint line, /*!< in: line where called */
- mtr_t* mtr, /*!< in: mini-transaction */
- dberr_t* err = NULL); /*!< out: error code */
-/********************************************************************//**
-Initializes a page to the buffer buf_pool. The page is usually not read
+ const page_id_t page_id,
+ const page_size_t& page_size,
+ ulint rw_latch,
+ buf_block_t* guess,
+ ulint mode,
+ const char* file,
+ unsigned line,
+ mtr_t* mtr,
+ dberr_t* err);
+
+/** This is the low level function used to get access to a database page.
+@param[in] page_id page id
+@param[in] rw_latch RW_S_LATCH, RW_X_LATCH, RW_NO_LATCH
+@param[in] guess guessed block or NULL
+@param[in] mode BUF_GET, BUF_GET_IF_IN_POOL,
+BUF_PEEK_IF_IN_POOL, BUF_GET_NO_LATCH, or BUF_GET_IF_IN_POOL_OR_WATCH
+@param[in] file file name
+@param[in] line line where called
+@param[in] mtr mini-transaction
+@param[out] err DB_SUCCESS or error code
+@return pointer to the block or NULL */
+buf_block_t*
+buf_page_get_low(
+ const page_id_t page_id,
+ const page_size_t& page_size,
+ ulint rw_latch,
+ buf_block_t* guess,
+ ulint mode,
+ const char* file,
+ unsigned line,
+ mtr_t* mtr,
+ dberr_t* err);
+
+/** Initializes a page to the buffer buf_pool. The page is usually not read
from a file even if it cannot be found in the buffer buf_pool. This is one
of the functions which perform to a block a state transition NOT_USED =>
FILE_PAGE (the other is buf_page_get_gen).
-@return pointer to the block, page bufferfixed */
-UNIV_INTERN
+@param[in] page_id page id
+@param[in] page_size page size
+@param[in] mtr mini-transaction
+@return pointer to the block, page bufferfixed */
buf_block_t*
buf_page_create(
-/*============*/
- ulint space, /*!< in: space id */
- ulint offset, /*!< in: offset of the page within space in units of
- a page */
- ulint zip_size,/*!< in: compressed page size, or 0 */
- mtr_t* mtr); /*!< in: mini-transaction handle */
-#else /* !UNIV_HOTBACKUP */
-/********************************************************************//**
-Inits a page to the buffer buf_pool, for use in mysqlbackup --restore. */
-UNIV_INTERN
-void
-buf_page_init_for_backup_restore(
-/*=============================*/
- ulint space, /*!< in: space id */
- ulint offset, /*!< in: offset of the page within space
- in units of a page */
- ulint zip_size,/*!< in: compressed page size in bytes
- or 0 for uncompressed pages */
- buf_block_t* block); /*!< in: block to init */
-#endif /* !UNIV_HOTBACKUP */
-
-#ifndef UNIV_HOTBACKUP
+ const page_id_t page_id,
+ const page_size_t& page_size,
+ mtr_t* mtr);
+
/********************************************************************//**
Releases a compressed-only page acquired with buf_page_get_zip(). */
UNIV_INLINE
@@ -474,12 +504,11 @@ buf_page_release_zip(
/*=================*/
buf_page_t* bpage); /*!< in: buffer block */
/********************************************************************//**
-Decrements the bufferfix count of a buffer control block and releases
-a latch, if specified. */
+Releases a latch, if specified. */
UNIV_INLINE
void
-buf_page_release(
-/*=============*/
+buf_page_release_latch(
+/*=====================*/
buf_block_t* block, /*!< in: buffer block */
ulint rw_latch); /*!< in: RW_S_LATCH, RW_X_LATCH,
RW_NO_LATCH */
@@ -487,68 +516,55 @@ buf_page_release(
Moves a page to the start of the buffer pool LRU list. This high-level
function can be used to prevent an important page from slipping out of
the buffer pool. */
-UNIV_INTERN
void
buf_page_make_young(
/*================*/
buf_page_t* bpage); /*!< in: buffer block of a file page */
-/********************************************************************//**
-Returns TRUE if the page can be found in the buffer pool hash table.
+/** Returns TRUE if the page can be found in the buffer pool hash table.
NOTE that it is possible that the page is not yet read from disk,
though.
+@param[in] page_id page id
+@return TRUE if found in the page hash table */
+inline bool buf_page_peek(const page_id_t page_id);
-@return TRUE if found in the page hash table */
-UNIV_INLINE
-ibool
-buf_page_peek(
-/*==========*/
- ulint space, /*!< in: space id */
- ulint offset);/*!< in: page number */
-#if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG
-/********************************************************************//**
-Sets file_page_was_freed TRUE if the page is found in the buffer pool.
+#ifdef UNIV_DEBUG
+
+/** Sets file_page_was_freed TRUE if the page is found in the buffer pool.
This function should be called when we free a file page and want the
debug version to check that it is not accessed any more unless
reallocated.
-@return control block if found in page hash table, otherwise NULL */
-UNIV_INTERN
-buf_page_t*
-buf_page_set_file_page_was_freed(
-/*=============================*/
- ulint space, /*!< in: space id */
- ulint offset);/*!< in: page number */
-/********************************************************************//**
-Sets file_page_was_freed FALSE if the page is found in the buffer pool.
+@param[in] page_id page id
+@return control block if found in page hash table, otherwise NULL */
+buf_page_t* buf_page_set_file_page_was_freed(const page_id_t page_id);
+
+/** Sets file_page_was_freed FALSE if the page is found in the buffer pool.
This function should be called when we free a file page and want the
debug version to check that it is not accessed any more unless
reallocated.
-@return control block if found in page hash table, otherwise NULL */
-UNIV_INTERN
-buf_page_t*
-buf_page_reset_file_page_was_freed(
-/*===============================*/
- ulint space, /*!< in: space id */
- ulint offset); /*!< in: page number */
-#endif /* UNIV_DEBUG_FILE_ACCESSES || UNIV_DEBUG */
+@param[in] page_id page id
+@return control block if found in page hash table, otherwise NULL */
+buf_page_t* buf_page_reset_file_page_was_freed(const page_id_t page_id);
+
+#endif /* UNIV_DEBUG */
/********************************************************************//**
Reads the freed_page_clock of a buffer block.
-@return freed_page_clock */
+@return freed_page_clock */
UNIV_INLINE
-ulint
+unsigned
buf_page_get_freed_page_clock(
/*==========================*/
const buf_page_t* bpage) /*!< in: block */
- MY_ATTRIBUTE((pure));
+ MY_ATTRIBUTE((warn_unused_result));
/********************************************************************//**
Reads the freed_page_clock of a buffer block.
-@return freed_page_clock */
+@return freed_page_clock */
UNIV_INLINE
-ulint
+unsigned
buf_block_get_freed_page_clock(
/*===========================*/
const buf_block_t* block) /*!< in: block */
- MY_ATTRIBUTE((pure));
+ MY_ATTRIBUTE((warn_unused_result));
/********************************************************************//**
Tells if a block is still close enough to the MRU end of the LRU list
@@ -556,7 +572,7 @@ meaning that it is not in danger of getting evicted and also implying
that it has been accessed recently.
Note that this is for heuristics only and does not reserve buffer pool
mutex.
-@return TRUE if block is close to MRU end of LRU */
+@return TRUE if block is close to MRU end of LRU */
UNIV_INLINE
ibool
buf_page_peek_if_young(
@@ -566,7 +582,7 @@ buf_page_peek_if_young(
Recommends a move of a block to the start of the LRU list if there is danger
of dropping from the buffer pool. NOTE: does not reserve the buffer pool
mutex.
-@return TRUE if should be made younger */
+@return TRUE if should be made younger */
UNIV_INLINE
ibool
buf_page_peek_if_too_old(
@@ -575,7 +591,7 @@ buf_page_peek_if_too_old(
/********************************************************************//**
Gets the youngest modification log sequence number for a frame.
Returns zero if not file page or no modification occurred yet.
-@return newest modification to page */
+@return newest modification to page */
UNIV_INLINE
lsn_t
buf_page_get_newest_modification(
@@ -594,7 +610,7 @@ buf_block_modify_clock_inc(
/********************************************************************//**
Returns the value of the modify clock. The caller must have an s-lock
or x-lock on the block.
-@return value */
+@return value */
UNIV_INLINE
ib_uint64_t
buf_block_get_modify_clock(
@@ -606,106 +622,136 @@ UNIV_INLINE
void
buf_block_buf_fix_inc_func(
/*=======================*/
-# ifdef UNIV_SYNC_DEBUG
+# ifdef UNIV_DEBUG
const char* file, /*!< in: file name */
- ulint line, /*!< in: line */
-# endif /* UNIV_SYNC_DEBUG */
+ unsigned line, /*!< in: line */
+# endif /* UNIV_DEBUG */
buf_block_t* block) /*!< in/out: block to bufferfix */
MY_ATTRIBUTE((nonnull));
-/*******************************************************************//**
-Increments the bufferfix count. */
+/** Increments the bufferfix count.
+@param[in,out] bpage block to bufferfix
+@return the count */
UNIV_INLINE
-void
-buf_block_fix(
-/*===========*/
- buf_block_t* block); /*!< in/out: block to bufferfix */
+ulint
+buf_block_fix(buf_page_t* bpage);
-/*******************************************************************//**
-Increments the bufferfix count. */
+/** Increments the bufferfix count.
+@param[in,out] block block to bufferfix
+@return the count */
UNIV_INLINE
-void
-buf_block_unfix(
-/*===========*/
- buf_block_t* block); /*!< in/out: block to bufferfix */
+ulint
+buf_block_fix(buf_block_t* block);
+
+/** Decrements the bufferfix count.
+@param[in,out] bpage block to bufferunfix
+@return the remaining buffer-fix count */
+UNIV_INLINE
+ulint
+buf_block_unfix(buf_page_t* bpage);
+/** Decrements the bufferfix count.
+@param[in,out] block block to bufferunfix
+@return the remaining buffer-fix count */
+UNIV_INLINE
+ulint
+buf_block_unfix(buf_block_t* block);
-# ifdef UNIV_SYNC_DEBUG
+# ifdef UNIV_DEBUG
/** Increments the bufferfix count.
-@param b in/out: block to bufferfix
-@param f in: file name where requested
-@param l in: line number where requested */
-# define buf_block_buf_fix_inc(b,f,l) buf_block_buf_fix_inc_func(f,l,b)
-# else /* UNIV_SYNC_DEBUG */
+@param[in,out] b block to bufferfix
+@param[in] f file name where requested
+@param[in] l line number where requested */
+# define buf_block_buf_fix_inc(b,f,l) buf_block_buf_fix_inc_func(f,l,b)
+# else /* UNIV_DEBUG */
/** Increments the bufferfix count.
-@param b in/out: block to bufferfix
-@param f in: file name where requested
-@param l in: line number where requested */
-# define buf_block_buf_fix_inc(b,f,l) buf_block_buf_fix_inc_func(b)
-# endif /* UNIV_SYNC_DEBUG */
-#else /* !UNIV_HOTBACKUP */
-# define buf_block_modify_clock_inc(block) ((void) 0)
-#endif /* !UNIV_HOTBACKUP */
-
+@param[in,out] b block to bufferfix
+@param[in] f file name where requested
+@param[in] l line number where requested */
+# define buf_block_buf_fix_inc(b,f,l) buf_block_buf_fix_inc_func(b)
+# endif /* UNIV_DEBUG */
#endif /* !UNIV_INNOCHECKSUM */
+/** Check if a buffer is all zeroes.
+@param[in] buf data to check
+@return whether the buffer is all zeroes */
+bool buf_is_zeroes(st_::span<const byte> buf);
+
/** Checks if the page is in crc32 checksum format.
-@param[in] read_buf database page
-@param[in] checksum_field1 new checksum field
-@param[in] checksum_field2 old checksum field
-@return true if the page is in crc32 checksum format */
+@param[in] read_buf database page
+@param[in] checksum_field1 new checksum field
+@param[in] checksum_field2 old checksum field
+@return true if the page is in crc32 checksum format. */
bool
buf_page_is_checksum_valid_crc32(
- const byte* read_buf,
- ulint checksum_field1,
- ulint checksum_field2)
- MY_ATTRIBUTE((warn_unused_result));
+ const byte* read_buf,
+ ulint checksum_field1,
+ ulint checksum_field2)
+ MY_ATTRIBUTE((nonnull(1), warn_unused_result));
/** Checks if the page is in innodb checksum format.
@param[in] read_buf database page
@param[in] checksum_field1 new checksum field
@param[in] checksum_field2 old checksum field
-@return true if the page is in innodb checksum format */
+@return true if the page is in innodb checksum format. */
bool
buf_page_is_checksum_valid_innodb(
- const byte* read_buf,
- ulint checksum_field1,
- ulint checksum_field2)
- MY_ATTRIBUTE((warn_unused_result));
+ const byte* read_buf,
+ ulint checksum_field1,
+ ulint checksum_field2)
+ MY_ATTRIBUTE((nonnull(1), warn_unused_result));
/** Checks if the page is in none checksum format.
@param[in] read_buf database page
@param[in] checksum_field1 new checksum field
@param[in] checksum_field2 old checksum field
-@return true if the page is in none checksum format */
+@return true if the page is in none checksum format. */
bool
buf_page_is_checksum_valid_none(
- const byte* read_buf,
- ulint checksum_field1,
- ulint checksum_field2)
- MY_ATTRIBUTE((warn_unused_result));
+ const byte* read_buf,
+ ulint checksum_field1,
+ ulint checksum_field2)
+ MY_ATTRIBUTE((nonnull(1), warn_unused_result));
/** Check if a page is corrupt.
-@param[in] check_lsn true if LSN should be checked
-@param[in] read_buf Page to be checked
-@param[in] zip_size compressed size or 0
-@param[in] space Pointer to tablespace
-@return true if corrupted, false if not */
-UNIV_INTERN
+@param[in] check_lsn whether the LSN should be checked
+@param[in] read_buf database page
+@param[in] page_size page size
+@param[in] space tablespace
+@return whether the page is corrupted */
bool
buf_page_is_corrupted(
bool check_lsn,
const byte* read_buf,
- ulint zip_size,
+ const page_size_t& page_size,
#ifndef UNIV_INNOCHECKSUM
- const fil_space_t* space)
+ const fil_space_t* space = NULL)
#else
const void* space = NULL)
#endif
MY_ATTRIBUTE((warn_unused_result));
-#ifndef UNIV_INNOCHECKSUM
+inline void *aligned_malloc(size_t size, size_t align)
+{
+#ifdef _MSC_VER
+ return _aligned_malloc(size, align);
+#else
+ void *result;
+ if (posix_memalign(&result, align, size))
+ result= NULL;
+ return result;
+#endif
+}
-#ifndef UNIV_HOTBACKUP
+inline void aligned_free(void *ptr)
+{
+#ifdef _MSC_VER
+ _aligned_free(ptr);
+#else
+ free(ptr);
+#endif
+}
+
+#ifndef UNIV_INNOCHECKSUM
/**********************************************************************//**
Gets the space id, page offset, and byte offset within page of a
pointer pointing to a buffer frame containing a file page. */
@@ -719,19 +765,18 @@ buf_ptr_get_fsp_addr(
/**********************************************************************//**
Gets the hash value of a block. This can be used in searches in the
lock hash table.
-@return lock hash value */
+@return lock hash value */
UNIV_INLINE
-ulint
+unsigned
buf_block_get_lock_hash_val(
/*========================*/
const buf_block_t* block) /*!< in: block */
- MY_ATTRIBUTE((pure));
+ MY_ATTRIBUTE((warn_unused_result));
#ifdef UNIV_DEBUG
/*********************************************************************//**
Finds a block in the buffer pool that points to a
given compressed page.
-@return buffer block pointing to the compressed page, or NULL */
-UNIV_INTERN
+@return buffer block pointing to the compressed page, or NULL */
buf_block_t*
buf_pool_contains_zip(
/*==================*/
@@ -752,8 +797,7 @@ buf_frame_align(
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
/*********************************************************************//**
Validates the buffer pool data structure.
-@return TRUE */
-UNIV_INTERN
+@return TRUE */
ibool
buf_validate(void);
/*==============*/
@@ -761,49 +805,43 @@ buf_validate(void);
#if defined UNIV_DEBUG_PRINT || defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
/*********************************************************************//**
Prints info of the buffer pool data structure. */
-UNIV_INTERN
void
buf_print(void);
/*============*/
#endif /* UNIV_DEBUG_PRINT || UNIV_DEBUG || UNIV_BUF_DEBUG */
-#endif /* !UNIV_HOTBACKUP */
/** Dump a page to stderr.
@param[in] read_buf database page
-@param[in] zip_size compressed page size, or 0 for uncompressed */
+@param[in] page_size page size */
UNIV_INTERN
void
-buf_page_print(const byte* read_buf, ulint zip_size)
- UNIV_COLD MY_ATTRIBUTE((nonnull));
+buf_page_print(const byte* read_buf, const page_size_t& page_size)
+ ATTRIBUTE_COLD __attribute__((nonnull));
/********************************************************************//**
Decompress a block.
-@return TRUE if successful */
-UNIV_INTERN
+@return TRUE if successful */
ibool
buf_zip_decompress(
/*===============*/
buf_block_t* block, /*!< in/out: block */
ibool check); /*!< in: TRUE=verify the page checksum */
-#ifndef UNIV_HOTBACKUP
+
#ifdef UNIV_DEBUG
/*********************************************************************//**
Returns the number of latched pages in the buffer pool.
-@return number of latched pages */
-UNIV_INTERN
+@return number of latched pages */
ulint
buf_get_latched_pages_number(void);
/*==============================*/
#endif /* UNIV_DEBUG */
/*********************************************************************//**
Returns the number of pending buf pool read ios.
-@return number of pending read I/O operations */
-UNIV_INTERN
+@return number of pending read I/O operations */
ulint
buf_get_n_pending_read_ios(void);
/*============================*/
/*********************************************************************//**
Prints info of the buffer i/o. */
-UNIV_INTERN
void
buf_print_io(
/*=========*/
@@ -812,7 +850,6 @@ buf_print_io(
Collect buffer pool stats information for a buffer pool. Also
record aggregated stats if there are more than one buffer pool
in the server */
-UNIV_INTERN
void
buf_stats_get_pool_info(
/*====================*/
@@ -820,39 +857,22 @@ buf_stats_get_pool_info(
ulint pool_id, /*!< in: buffer pool ID */
buf_pool_info_t* all_pool_info); /*!< in/out: buffer pool info
to fill */
-/*********************************************************************//**
-Returns the ratio in percents of modified pages in the buffer pool /
+/** Return the ratio in percents of modified pages in the buffer pool /
database pages in the buffer pool.
-@return modified page percentage ratio */
-UNIV_INTERN
+@return modified page percentage ratio */
double
buf_get_modified_ratio_pct(void);
-/*============================*/
-/**********************************************************************//**
-Refreshes the statistics used to print per-second averages. */
-UNIV_INTERN
-void
-buf_refresh_io_stats(
-/*=================*/
- buf_pool_t* buf_pool); /*!< buffer pool instance */
-/**********************************************************************//**
-Refreshes the statistics used to print per-second averages. */
-UNIV_INTERN
+/** Refresh the statistics used to print per-second averages. */
void
buf_refresh_io_stats_all(void);
-/*=================*/
-/*********************************************************************//**
-Asserts that all file pages in the buffer are in a replaceable state.
-@return TRUE */
-UNIV_INTERN
+/** Assert that all file pages in the buffer are in a replaceable state.
+@return TRUE */
ibool
buf_all_freed(void);
-/*===============*/
/*********************************************************************//**
Checks that there currently are no pending i/o-operations for the buffer
pool.
-@return number of pending i/o operations */
-UNIV_INTERN
+@return number of pending i/o operations */
ulint
buf_pool_check_no_pending_io(void);
/*==============================*/
@@ -860,17 +880,15 @@ buf_pool_check_no_pending_io(void);
Invalidates the file pages in the buffer pool when an archive recovery is
completed. All the file pages buffered must be in a replaceable state when
this function is called: not latched and not modified. */
-UNIV_INTERN
void
buf_pool_invalidate(void);
/*=====================*/
-#endif /* !UNIV_HOTBACKUP */
/*========================================================================
--------------------------- LOWER LEVEL ROUTINES -------------------------
=========================================================================*/
-#ifdef UNIV_SYNC_DEBUG
+#ifdef UNIV_DEBUG
/*********************************************************************//**
Adds latch level info for the rw-lock protecting the buffer frame. This
should be called in the debug version after a successful latching of a
@@ -881,13 +899,13 @@ buf_block_dbg_add_level(
/*====================*/
buf_block_t* block, /*!< in: buffer page
where we have acquired latch */
- ulint level); /*!< in: latching order level */
-#else /* UNIV_SYNC_DEBUG */
+ latch_level_t level); /*!< in: latching order level */
+#else /* UNIV_DEBUG */
# define buf_block_dbg_add_level(block, level) /* nothing */
-#endif /* UNIV_SYNC_DEBUG */
+#endif /* UNIV_DEBUG */
/*********************************************************************//**
Gets the state of a block.
-@return state */
+@return state */
UNIV_INLINE
enum buf_page_state
buf_page_get_state(
@@ -905,13 +923,13 @@ buf_get_state_name(
block */
/*********************************************************************//**
Gets the state of a block.
-@return state */
+@return state */
UNIV_INLINE
enum buf_page_state
buf_block_get_state(
/*================*/
const buf_block_t* block) /*!< in: pointer to the control block */
- MY_ATTRIBUTE((pure));
+ MY_ATTRIBUTE((warn_unused_result));
/*********************************************************************//**
Sets the state of a block. */
UNIV_INLINE
@@ -930,43 +948,43 @@ buf_block_set_state(
enum buf_page_state state); /*!< in: state */
/*********************************************************************//**
Determines if a block is mapped to a tablespace.
-@return TRUE if mapped */
+@return TRUE if mapped */
UNIV_INLINE
ibool
buf_page_in_file(
/*=============*/
const buf_page_t* bpage) /*!< in: pointer to control block */
- MY_ATTRIBUTE((pure));
-#ifndef UNIV_HOTBACKUP
+ MY_ATTRIBUTE((warn_unused_result));
+
/*********************************************************************//**
Determines if a block should be on unzip_LRU list.
-@return TRUE if block belongs to unzip_LRU */
+@return TRUE if block belongs to unzip_LRU */
UNIV_INLINE
ibool
buf_page_belongs_to_unzip_LRU(
/*==========================*/
const buf_page_t* bpage) /*!< in: pointer to control block */
- MY_ATTRIBUTE((pure));
+ MY_ATTRIBUTE((warn_unused_result));
/*********************************************************************//**
Gets the mutex of a block.
-@return pointer to mutex protecting bpage */
+@return pointer to mutex protecting bpage */
UNIV_INLINE
-ib_mutex_t*
+BPageMutex*
buf_page_get_mutex(
/*===============*/
const buf_page_t* bpage) /*!< in: pointer to control block */
- MY_ATTRIBUTE((pure));
+ MY_ATTRIBUTE((warn_unused_result));
/*********************************************************************//**
Get the flush type of a page.
-@return flush type */
+@return flush type */
UNIV_INLINE
buf_flush_t
buf_page_get_flush_type(
/*====================*/
const buf_page_t* bpage) /*!< in: buffer page */
- MY_ATTRIBUTE((pure));
+ MY_ATTRIBUTE((warn_unused_result));
/*********************************************************************//**
Set the flush type of a page. */
UNIV_INLINE
@@ -975,33 +993,34 @@ buf_page_set_flush_type(
/*====================*/
buf_page_t* bpage, /*!< in: buffer page */
buf_flush_t flush_type); /*!< in: flush type */
-/*********************************************************************//**
-Map a block to a file page. */
+
+/** Map a block to a file page.
+@param[in,out] block pointer to control block
+@param[in] page_id page id */
UNIV_INLINE
void
buf_block_set_file_page(
-/*====================*/
- buf_block_t* block, /*!< in/out: pointer to control block */
- ulint space, /*!< in: tablespace id */
- ulint page_no);/*!< in: page number */
+ buf_block_t* block,
+ const page_id_t page_id);
+
/*********************************************************************//**
Gets the io_fix state of a block.
-@return io_fix state */
+@return io_fix state */
UNIV_INLINE
enum buf_io_fix
buf_page_get_io_fix(
/*================*/
const buf_page_t* bpage) /*!< in: pointer to the control block */
- MY_ATTRIBUTE((pure));
+ MY_ATTRIBUTE((warn_unused_result));
/*********************************************************************//**
Gets the io_fix state of a block.
-@return io_fix state */
+@return io_fix state */
UNIV_INLINE
enum buf_io_fix
buf_block_get_io_fix(
/*================*/
const buf_block_t* block) /*!< in: pointer to the control block */
- MY_ATTRIBUTE((pure));
+ MY_ATTRIBUTE((warn_unused_result));
/*********************************************************************//**
Sets the io_fix state of a block. */
UNIV_INLINE
@@ -1047,17 +1066,17 @@ ibool
buf_page_can_relocate(
/*==================*/
const buf_page_t* bpage) /*!< control block being relocated */
- MY_ATTRIBUTE((pure));
+ MY_ATTRIBUTE((warn_unused_result));
/*********************************************************************//**
Determine if a block has been flagged old.
-@return TRUE if old */
+@return TRUE if old */
UNIV_INLINE
ibool
buf_page_is_old(
/*============*/
const buf_page_t* bpage) /*!< in: control block */
- MY_ATTRIBUTE((pure));
+ MY_ATTRIBUTE((warn_unused_result));
/*********************************************************************//**
Flag a block old. */
UNIV_INLINE
@@ -1065,16 +1084,16 @@ void
buf_page_set_old(
/*=============*/
buf_page_t* bpage, /*!< in/out: control block */
- ibool old); /*!< in: old */
+ bool old); /*!< in: old */
/*********************************************************************//**
Determine the time of first access of a block in the buffer pool.
-@return ut_time_ms() at the time of first access, 0 if not accessed */
+@return ut_time_ms() at the time of first access, 0 if not accessed */
UNIV_INLINE
unsigned
buf_page_is_accessed(
/*=================*/
const buf_page_t* bpage) /*!< in: control block */
- MY_ATTRIBUTE((nonnull, pure));
+ MY_ATTRIBUTE((warn_unused_result));
/*********************************************************************//**
Flag a block accessed. */
UNIV_INLINE
@@ -1086,130 +1105,65 @@ buf_page_set_accessed(
/*********************************************************************//**
Gets the buf_block_t handle of a buffered file block if an uncompressed
page frame exists, or NULL. Note: even though bpage is not declared a
-const we don't update its value. It is safe to make this pure.
-@return control block, or NULL */
+const we don't update its value.
+@return control block, or NULL */
UNIV_INLINE
buf_block_t*
buf_page_get_block(
/*===============*/
buf_page_t* bpage) /*!< in: control block, or NULL */
- MY_ATTRIBUTE((pure));
-#endif /* !UNIV_HOTBACKUP */
+ MY_ATTRIBUTE((warn_unused_result));
+
#ifdef UNIV_DEBUG
/*********************************************************************//**
Gets a pointer to the memory frame of a block.
-@return pointer to the frame */
+@return pointer to the frame */
UNIV_INLINE
buf_frame_t*
buf_block_get_frame(
/*================*/
const buf_block_t* block) /*!< in: pointer to the control block */
- MY_ATTRIBUTE((pure));
+ MY_ATTRIBUTE((warn_unused_result));
#else /* UNIV_DEBUG */
# define buf_block_get_frame(block) (block)->frame
#endif /* UNIV_DEBUG */
-/*********************************************************************//**
-Gets the space id of a block.
-@return space id */
-UNIV_INLINE
-ulint
-buf_page_get_space(
-/*===============*/
- const buf_page_t* bpage) /*!< in: pointer to the control block */
- MY_ATTRIBUTE((pure));
-/*********************************************************************//**
-Gets the space id of a block.
-@return space id */
-UNIV_INLINE
-ulint
-buf_block_get_space(
-/*================*/
- const buf_block_t* block) /*!< in: pointer to the control block */
- MY_ATTRIBUTE((pure));
-/*********************************************************************//**
-Gets the page number of a block.
-@return page number */
-UNIV_INLINE
-ulint
-buf_page_get_page_no(
-/*=================*/
- const buf_page_t* bpage) /*!< in: pointer to the control block */
- MY_ATTRIBUTE((pure));
-/*********************************************************************//**
-Gets the page number of a block.
-@return page number */
-UNIV_INLINE
-ulint
-buf_block_get_page_no(
-/*==================*/
- const buf_block_t* block) /*!< in: pointer to the control block */
- MY_ATTRIBUTE((pure));
-/*********************************************************************//**
-Gets the compressed page size of a block.
-@return compressed page size, or 0 */
-UNIV_INLINE
-ulint
-buf_page_get_zip_size(
-/*==================*/
- const buf_page_t* bpage) /*!< in: pointer to the control block */
- MY_ATTRIBUTE((pure));
-/*********************************************************************//**
-Gets the compressed page size of a block.
-@return compressed page size, or 0 */
-UNIV_INLINE
-ulint
-buf_block_get_zip_size(
-/*===================*/
- const buf_block_t* block) /*!< in: pointer to the control block */
- MY_ATTRIBUTE((pure));
+
/*********************************************************************//**
Gets the compressed page descriptor corresponding to an uncompressed page
if applicable. */
#define buf_block_get_page_zip(block) \
((block)->page.zip.data ? &(block)->page.zip : NULL)
-#ifndef UNIV_HOTBACKUP
-/*******************************************************************//**
-Gets the block to whose frame the pointer is pointing to.
-@return pointer to block, never NULL */
-UNIV_INTERN
+
+#ifdef BTR_CUR_HASH_ADAPT
+/** Get a buffer block from an adaptive hash index pointer.
+This function does not return if the block is not identified.
+@param[in] ptr pointer to within a page frame
+@return pointer to block, never NULL */
buf_block_t*
-buf_block_align(
-/*============*/
- const byte* ptr); /*!< in: pointer to a frame */
+buf_block_from_ahi(const byte* ptr);
+#endif /* BTR_CUR_HASH_ADAPT */
+
/********************************************************************//**
Find out if a pointer belongs to a buf_block_t. It can be a pointer to
the buf_block_t itself or a member of it
-@return TRUE if ptr belongs to a buf_block_t struct */
-UNIV_INTERN
+@return TRUE if ptr belongs to a buf_block_t struct */
ibool
buf_pointer_is_block_field(
/*=======================*/
const void* ptr); /*!< in: pointer not
dereferenced */
/** Find out if a pointer corresponds to a buf_block_t::mutex.
-@param m in: mutex candidate
-@return TRUE if m is a buf_block_t::mutex */
+@param m in: mutex candidate
+@return TRUE if m is a buf_block_t::mutex */
#define buf_pool_is_block_mutex(m) \
buf_pointer_is_block_field((const void*)(m))
/** Find out if a pointer corresponds to a buf_block_t::lock.
-@param l in: rw-lock candidate
-@return TRUE if l is a buf_block_t::lock */
+@param l in: rw-lock candidate
+@return TRUE if l is a buf_block_t::lock */
#define buf_pool_is_block_lock(l) \
buf_pointer_is_block_field((const void*)(l))
-#if defined UNIV_DEBUG || defined UNIV_ZIP_DEBUG
-/*********************************************************************//**
-Gets the compressed page descriptor corresponding to an uncompressed page
-if applicable.
-@return compressed page descriptor, or NULL */
-UNIV_INLINE
-const page_zip_des_t*
-buf_frame_get_page_zip(
-/*===================*/
- const byte* ptr); /*!< in: pointer to the page */
-#endif /* UNIV_DEBUG || UNIV_ZIP_DEBUG */
-/********************************************************************//**
-Function which inits a page for read to the buffer buf_pool. If the page is
+/** Initialize a page for read to the buffer buf_pool. If the page is
(1) already in buf_pool, or
(2) if we specify to read only ibuf pages and the page is not an ibuf page, or
(3) if the space is deleted or being deleted,
@@ -1217,24 +1171,25 @@ then this function does nothing.
Sets the io_fix flag to BUF_IO_READ and sets a non-recursive exclusive lock
on the buffer frame. The io-handler must take care that the flag is cleared
and the lock released later.
-@return pointer to the block or NULL */
-UNIV_INTERN
+@param[out] err DB_SUCCESS or DB_TABLESPACE_DELETED
+@param[in] mode BUF_READ_IBUF_PAGES_ONLY, ...
+@param[in] page_id page id
+@param[in] unzip whether the uncompressed page is
+ requested (for ROW_FORMAT=COMPRESSED)
+@return pointer to the block
+@retval NULL in case of an error */
buf_page_t*
buf_page_init_for_read(
-/*===================*/
- dberr_t* err, /*!< out: DB_SUCCESS or DB_TABLESPACE_DELETED */
- ulint mode, /*!< in: BUF_READ_IBUF_PAGES_ONLY, ... */
- ulint space, /*!< in: space id */
- ulint zip_size,/*!< in: compressed page size, or 0 */
- ibool unzip, /*!< in: TRUE=request uncompressed page */
- ib_int64_t tablespace_version,/*!< in: prevents reading from a wrong
- version of the tablespace in case we have done
- DISCARD + IMPORT */
- ulint offset);/*!< in: page number */
+ dberr_t* err,
+ ulint mode,
+ const page_id_t page_id,
+ const page_size_t& page_size,
+ bool unzip);
+
/** Complete a read or write request of a file page to or from the buffer pool.
-@param[in,out] bpage Page to complete
-@param[in] evict whether or not to evict the page
- from LRU list.
+@param[in,out] bpage page to complete
+@param[in] dblwr whether the doublewrite buffer was used (on write)
+@param[in] evict whether or not to evict the page from LRU list
@return whether the operation succeeded
@retval DB_SUCCESS always when writing, or if a read page was OK
@retval DB_PAGE_CORRUPTED if the checksum fails on a page read
@@ -1243,29 +1198,18 @@ buf_page_init_for_read(
not match */
UNIV_INTERN
dberr_t
-buf_page_io_complete(buf_page_t* bpage, bool evict = false)
+buf_page_io_complete(buf_page_t* bpage, bool dblwr = false, bool evict = false)
MY_ATTRIBUTE((nonnull));
/********************************************************************//**
-Calculates a folded value of a file page address to use in the page hash
-table.
-@return the folded value */
-UNIV_INLINE
-ulint
-buf_page_address_fold(
-/*==================*/
- ulint space, /*!< in: space id */
- ulint offset) /*!< in: offset of the page within space */
- MY_ATTRIBUTE((const));
-/********************************************************************//**
Calculates the index of a buffer pool to the buf_pool[] array.
-@return the position of the buffer pool in buf_pool[] */
+@return the position of the buffer pool in buf_pool[] */
UNIV_INLINE
-ulint
+unsigned
buf_pool_index(
/*===========*/
const buf_pool_t* buf_pool) /*!< in: buffer pool */
- MY_ATTRIBUTE((nonnull, const));
+ MY_ATTRIBUTE((warn_unused_result));
/******************************************************************//**
Returns the buffer pool instance given a page instance
@return buf_pool */
@@ -1282,15 +1226,12 @@ buf_pool_t*
buf_pool_from_block(
/*================*/
const buf_block_t* block); /*!< in: block */
-/******************************************************************//**
-Returns the buffer pool instance given space and offset of page
+
+/** Returns the buffer pool instance given a page id.
+@param[in] page_id page id
@return buffer pool */
-UNIV_INLINE
-buf_pool_t*
-buf_pool_get(
-/*==========*/
- ulint space, /*!< in: space id */
- ulint offset);/*!< in: offset of the page within space */
+inline buf_pool_t* buf_pool_get(const page_id_t page_id);
+
/******************************************************************//**
Returns the buffer pool instance given its array index
@return buffer pool */
@@ -1300,71 +1241,64 @@ buf_pool_from_array(
/*================*/
ulint index); /*!< in: array index to get
buffer pool instance from */
-/******************************************************************//**
-Returns the control block of a file page, NULL if not found.
-@return block, NULL if not found */
+
+/** Returns the control block of a file page, NULL if not found.
+@param[in] buf_pool buffer pool instance
+@param[in] page_id page id
+@return block, NULL if not found */
UNIV_INLINE
buf_page_t*
buf_page_hash_get_low(
-/*==================*/
- buf_pool_t* buf_pool,/*!< buffer pool instance */
- ulint space, /*!< in: space id */
- ulint offset, /*!< in: offset of the page within space */
- ulint fold); /*!< in: buf_page_address_fold(space, offset) */
-/******************************************************************//**
-Returns the control block of a file page, NULL if not found.
+ buf_pool_t* buf_pool,
+ const page_id_t page_id);
+
+/** Returns the control block of a file page, NULL if not found.
If the block is found and lock is not NULL then the appropriate
page_hash lock is acquired in the specified lock mode. Otherwise,
mode value is ignored. It is up to the caller to release the
lock. If the block is found and the lock is NULL then the page_hash
lock is released by this function.
-@return block, NULL if not found, or watch sentinel (if watch is true) */
+@param[in] buf_pool buffer pool instance
+@param[in] page_id page id
+@param[in,out] lock lock of the page hash acquired if bpage is
+found, NULL otherwise. If NULL is passed then the hash_lock is released by
+this function.
+@param[in] lock_mode RW_LOCK_X or RW_LOCK_S. Ignored if
+lock == NULL
+@param[in] watch if true, return watch sentinel also.
+@return pointer to the bpage or NULL; if NULL, lock is also NULL or
+a watch sentinel. */
UNIV_INLINE
buf_page_t*
buf_page_hash_get_locked(
-/*=====================*/
- /*!< out: pointer to the bpage,
- or NULL; if NULL, hash_lock
- is also NULL. */
- buf_pool_t* buf_pool, /*!< buffer pool instance */
- ulint space, /*!< in: space id */
- ulint offset, /*!< in: page number */
- rw_lock_t** lock, /*!< in/out: lock of the page
- hash acquired if bpage is
- found. NULL otherwise. If NULL
- is passed then the hash_lock
- is released by this function */
- ulint lock_mode, /*!< in: RW_LOCK_EX or
- RW_LOCK_SHARED. Ignored if
- lock == NULL */
- bool watch = false); /*!< in: if true, return watch
- sentinel also. */
-/******************************************************************//**
-Returns the control block of a file page, NULL if not found.
+ buf_pool_t* buf_pool,
+ const page_id_t page_id,
+ rw_lock_t** lock,
+ ulint lock_mode,
+ bool watch = false);
+
+/** Returns the control block of a file page, NULL if not found.
If the block is found and lock is not NULL then the appropriate
page_hash lock is acquired in the specified lock mode. Otherwise,
mode value is ignored. It is up to the caller to release the
lock. If the block is found and the lock is NULL then the page_hash
lock is released by this function.
-@return block, NULL if not found */
+@param[in] buf_pool buffer pool instance
+@param[in] page_id page id
+@param[in,out] lock lock of the page hash acquired if bpage is
+found, NULL otherwise. If NULL is passed then the hash_lock is released by
+this function.
+@param[in] lock_mode RW_LOCK_X or RW_LOCK_S. Ignored if
+lock == NULL
+@return pointer to the block or NULL; if NULL, lock is also NULL. */
UNIV_INLINE
buf_block_t*
buf_block_hash_get_locked(
-/*=====================*/
- /*!< out: pointer to the bpage,
- or NULL; if NULL, hash_lock
- is also NULL. */
- buf_pool_t* buf_pool, /*!< buffer pool instance */
- ulint space, /*!< in: space id */
- ulint offset, /*!< in: page number */
- rw_lock_t** lock, /*!< in/out: lock of the page
- hash acquired if bpage is
- found. NULL otherwise. If NULL
- is passed then the hash_lock
- is released by this function */
- ulint lock_mode); /*!< in: RW_LOCK_EX or
- RW_LOCK_SHARED. Ignored if
- lock == NULL */
+ buf_pool_t* buf_pool,
+ const page_id_t page_id,
+ rw_lock_t** lock,
+ ulint lock_mode);
+
/* There are four different ways we can try to get a bpage or block
from the page hash:
1) Caller already holds the appropriate page hash lock: in the case call
@@ -1372,75 +1306,47 @@ buf_page_hash_get_low() function.
2) Caller wants to hold page hash lock in x-mode
3) Caller wants to hold page hash lock in s-mode
4) Caller doesn't want to hold page hash lock */
-#define buf_page_hash_get_s_locked(b, s, o, l) \
- buf_page_hash_get_locked(b, s, o, l, RW_LOCK_SHARED)
-#define buf_page_hash_get_x_locked(b, s, o, l) \
- buf_page_hash_get_locked(b, s, o, l, RW_LOCK_EX)
-#define buf_page_hash_get(b, s, o) \
- buf_page_hash_get_locked(b, s, o, NULL, 0)
-#define buf_page_get_also_watch(b, s, o) \
- buf_page_hash_get_locked(b, s, o, NULL, 0, true)
-
-#define buf_block_hash_get_s_locked(b, s, o, l) \
- buf_block_hash_get_locked(b, s, o, l, RW_LOCK_SHARED)
-#define buf_block_hash_get_x_locked(b, s, o, l) \
- buf_block_hash_get_locked(b, s, o, l, RW_LOCK_EX)
-#define buf_block_hash_get(b, s, o) \
- buf_block_hash_get_locked(b, s, o, NULL, 0)
-
-/*********************************************************************//**
-Gets the current length of the free list of buffer blocks.
-@return length of the free list */
-UNIV_INTERN
-ulint
-buf_get_free_list_len(void);
-/*=======================*/
+#define buf_page_hash_get_s_locked(b, page_id, l) \
+ buf_page_hash_get_locked(b, page_id, l, RW_LOCK_S)
+#define buf_page_hash_get_x_locked(b, page_id, l) \
+ buf_page_hash_get_locked(b, page_id, l, RW_LOCK_X)
+#define buf_page_hash_get(b, page_id) \
+ buf_page_hash_get_locked(b, page_id, NULL, 0)
+#define buf_page_get_also_watch(b, page_id) \
+ buf_page_hash_get_locked(b, page_id, NULL, 0, true)
+
+#define buf_block_hash_get_s_locked(b, page_id, l) \
+ buf_block_hash_get_locked(b, page_id, l, RW_LOCK_S)
+#define buf_block_hash_get_x_locked(b, page_id, l) \
+ buf_block_hash_get_locked(b, page_id, l, RW_LOCK_X)
+#define buf_block_hash_get(b, page_id) \
+ buf_block_hash_get_locked(b, page_id, NULL, 0)
/********************************************************************//**
Determine if a block is a sentinel for a buffer pool watch.
-@return TRUE if a sentinel for a buffer pool watch, FALSE if not */
-UNIV_INTERN
+@return TRUE if a sentinel for a buffer pool watch, FALSE if not */
ibool
buf_pool_watch_is_sentinel(
/*=======================*/
- buf_pool_t* buf_pool, /*!< buffer pool instance */
+ const buf_pool_t* buf_pool, /*!< buffer pool instance */
const buf_page_t* bpage) /*!< in: block */
MY_ATTRIBUTE((nonnull, warn_unused_result));
-/****************************************************************//**
-Add watch for the given page to be read in. Caller must have the buffer pool
-@return NULL if watch set, block if the page is in the buffer pool */
-UNIV_INTERN
-buf_page_t*
-buf_pool_watch_set(
-/*===============*/
- ulint space, /*!< in: space id */
- ulint offset, /*!< in: page number */
- ulint fold) /*!< in: buf_page_address_fold(space, offset) */
- MY_ATTRIBUTE((warn_unused_result));
-/****************************************************************//**
-Stop watching if the page has been read in.
-buf_pool_watch_set(space,offset) must have returned NULL before. */
-UNIV_INTERN
-void
-buf_pool_watch_unset(
-/*=================*/
- ulint space, /*!< in: space id */
- ulint offset);/*!< in: page number */
-/****************************************************************//**
-Check if the page has been read in.
+
+/** Stop watching if the page has been read in.
+buf_pool_watch_set(space,offset) must have returned NULL before.
+@param[in] page_id page id */
+void buf_pool_watch_unset(const page_id_t page_id);
+
+/** Check if the page has been read in.
This may only be called after buf_pool_watch_set(space,offset)
has returned NULL and before invoking buf_pool_watch_unset(space,offset).
-@return FALSE if the given page was not read in, TRUE if it was */
-UNIV_INTERN
-ibool
-buf_pool_watch_occurred(
-/*====================*/
- ulint space, /*!< in: space id */
- ulint offset) /*!< in: page number */
- MY_ATTRIBUTE((warn_unused_result));
+@param[in] page_id page id
+@return FALSE if the given page was not read in, TRUE if it was */
+bool buf_pool_watch_occurred(const page_id_t page_id)
+MY_ATTRIBUTE((warn_unused_result));
+
/********************************************************************//**
Get total buffer pool statistics. */
-UNIV_INTERN
void
buf_get_total_list_len(
/*===================*/
@@ -1449,7 +1355,6 @@ buf_get_total_list_len(
ulint* flush_list_len);/*!< out: length of all flush lists */
/********************************************************************//**
Get total list size in bytes from all buffer pools. */
-UNIV_INTERN
void
buf_get_total_list_size_in_bytes(
/*=============================*/
@@ -1457,7 +1362,6 @@ buf_get_total_list_size_in_bytes(
in all buffer pools */
/********************************************************************//**
Get total buffer pool statistics. */
-UNIV_INTERN
void
buf_get_total_stat(
/*===============*/
@@ -1473,17 +1377,33 @@ buf_get_nth_chunk_block(
ulint n, /*!< in: nth chunk in the buffer pool */
ulint* chunk_size); /*!< in: chunk size */
-/********************************************************************//**
-Calculate the checksum of a page from compressed table and update the page. */
-UNIV_INTERN
+/** Verify the possibility that a stored page is not in buffer pool.
+@param[in] withdraw_clock withdraw clock when stored the page
+@retval true if the page might be relocated */
+UNIV_INLINE
+bool
+buf_pool_is_obsolete(
+ ulint withdraw_clock);
+
+/** Calculate aligned buffer pool size based on srv_buf_pool_chunk_unit,
+if needed.
+@param[in] size size in bytes
+@return aligned size */
+UNIV_INLINE
+ulint
+buf_pool_size_align(
+ ulint size);
+
+/** Calculate the checksum of a page from compressed table and update the
+page.
+@param[in,out] page page to update
+@param[in] size compressed page size
+@param[in] lsn LSN to stamp on the page */
void
buf_flush_update_zip_checksum(
-/*==========================*/
- buf_frame_t* page, /*!< in/out: Page to update */
- ulint zip_size, /*!< in: Compressed page size */
- lsn_t lsn); /*!< in: Lsn to stamp on the page */
-
-#endif /* !UNIV_HOTBACKUP */
+ buf_frame_t* page,
+ ulint size,
+ lsn_t lsn);
/** Encryption and page_compression hook that is called just before
a page is written to disk.
@@ -1542,7 +1462,8 @@ for compressed and uncompressed frames */
/** Number of bits used for buffer page states. */
#define BUF_PAGE_STATE_BITS 3
-struct buf_page_t{
+class buf_page_t {
+public:
/** @name General fields
None of these bit-fields must be modified without holding
buf_page_get_mutex() [buf_block_t::mutex or
@@ -1551,41 +1472,25 @@ struct buf_page_t{
by buf_pool->mutex. */
/* @{ */
- ib_uint32_t space; /*!< tablespace id; also protected
- by buf_pool->mutex. */
- ib_uint32_t offset; /*!< page number; also protected
- by buf_pool->mutex. */
+ /** Page id. Protected by buf_pool mutex. */
+ page_id_t id;
buf_page_t* hash; /*!< node used in chaining to
buf_pool->page_hash or
buf_pool->zip_hash */
- /** count of how manyfold this block is currently bufferfixed */
-#ifdef PAGE_ATOMIC_REF_COUNT
- ib_uint32_t buf_fix_count;
- /** type of pending I/O operation; also protected by
- buf_pool->mutex for writes only @see enum buf_io_fix */
- byte io_fix;
+ /** Page size. Protected by buf_pool mutex. */
+ page_size_t size;
- byte state;
-#else
- unsigned buf_fix_count:19;
+ /** Count of how manyfold this block is currently bufferfixed. */
+ int32 buf_fix_count;
/** type of pending I/O operation; also protected by
- buf_pool->mutex for writes only @see enum buf_io_fix */
- unsigned io_fix:2;
+ buf_pool->mutex for writes only */
+ buf_io_fix io_fix;
- /*!< state of the control block; also protected by buf_pool->mutex.
- State transitions from BUF_BLOCK_READY_FOR_USE to BUF_BLOCK_MEMORY
- need not be protected by buf_page_get_mutex(). @see enum buf_page_state.
- State changes that are relevant to page_hash are additionally protected
- by the appropriate page_hash mutex i.e.: if a page is in page_hash or
- is being added to/removed from page_hash then the corresponding changes
- must also be protected by page_hash mutex. */
- unsigned state:BUF_PAGE_STATE_BITS;
+ /** Block state. @see buf_page_in_file */
+ buf_page_state state;
-#endif /* PAGE_ATOMIC_REF_COUNT */
-
-#ifndef UNIV_HOTBACKUP
unsigned flush_type:2; /*!< if this block is currently being
flushed to disk, this tells the
flush_type.
@@ -1596,7 +1501,6 @@ struct buf_page_t{
# error "MAX_BUFFER_POOLS > 64; redefine buf_pool_index:6"
# endif
/* @} */
-#endif /* !UNIV_HOTBACKUP */
page_zip_des_t zip; /*!< compressed page; zip.data
(but not the data it points to) is
also protected by buf_pool->mutex;
@@ -1609,8 +1513,6 @@ struct buf_page_t{
if written again we check is TRIM
operation needed. */
- bool encrypted; /*!< page is still encrypted */
-
ulint real_size; /*!< Real size of the page
Normal pages == UNIV_PAGE_SIZE
page compressed pages, payload
@@ -1620,7 +1522,6 @@ struct buf_page_t{
buf_tmp_buffer_t* slot; /*!< Slot for temporary memory
used for encryption/compression
or NULL */
-#ifndef UNIV_HOTBACKUP
#ifdef UNIV_DEBUG
ibool in_page_hash; /*!< TRUE if in buf_pool->page_hash */
ibool in_zip_hash; /*!< TRUE if in buf_pool->zip_hash */
@@ -1638,7 +1539,7 @@ struct buf_page_t{
in one of the following lists in
buf_pool:
- - BUF_BLOCK_NOT_USED: free
+ - BUF_BLOCK_NOT_USED: free, withdraw
- BUF_BLOCK_FILE_PAGE: flush_list
- BUF_BLOCK_ZIP_DIRTY: flush_list
- BUF_BLOCK_ZIP_PAGE: zip_clean
@@ -1674,6 +1575,9 @@ struct buf_page_t{
should hold: in_free_list
== (state == BUF_BLOCK_NOT_USED) */
#endif /* UNIV_DEBUG */
+
+ FlushObserver* flush_observer; /*!< flush observer */
+
lsn_t newest_modification;
/*!< log sequence number of
the youngest modification to
@@ -1721,15 +1625,13 @@ struct buf_page_t{
0 if the block was never accessed
in the buffer pool. Protected by
block mutex */
-
-# if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG
+# ifdef UNIV_DEBUG
ibool file_page_was_freed;
/*!< this is set to TRUE when
fsp frees a page in buffer pool;
protected by buf_pool->zip_mutex
or buf_block_t::mutex. */
-# endif /* UNIV_DEBUG_FILE_ACCESSES || UNIV_DEBUG */
-#endif /* !UNIV_HOTBACKUP */
+# endif /* UNIV_DEBUG */
};
/** The buffer control block structure */
@@ -1747,7 +1649,8 @@ struct buf_block_t{
is of size UNIV_PAGE_SIZE, and
aligned to an address divisible by
UNIV_PAGE_SIZE */
-#ifndef UNIV_HOTBACKUP
+ BPageLock lock; /*!< read-write lock of the buffer
+ frame */
UT_LIST_NODE_T(buf_block_t) unzip_LRU;
/*!< node of the decompressed LRU list;
a block is in the unzip_LRU list
@@ -1757,15 +1660,8 @@ struct buf_block_t{
ibool in_unzip_LRU_list;/*!< TRUE if the page is in the
decompressed LRU list;
used in debugging */
+ ibool in_withdraw_list;
#endif /* UNIV_DEBUG */
- ib_mutex_t mutex; /*!< mutex protecting this block:
- state (also protected by the buffer
- pool mutex), io_fix, buf_fix_count,
- and accessed; we introduce this new
- mutex in InnoDB-5.1 to relieve
- contention on the buffer pool mutex */
- rw_lock_t lock; /*!< read-write lock of the buffer
- frame */
unsigned lock_hash_val:32;/*!< hashed value of the page address
in the record lock hash table;
protected by buf_block_t::lock
@@ -1773,15 +1669,6 @@ struct buf_block_t{
in buf_page_get_gen(),
buf_page_init_for_read()
and buf_page_create()) */
- ibool check_index_page_at_flush;
- /*!< TRUE if we know that this is
- an index page, and want the database
- to check its consistency before flush;
- note that there may be pages in the
- buffer pool which are index pages,
- but this flag is not set because
- we do not keep track of all pages;
- NOT protected by any mutex */
/* @} */
/** @name Optimistic search field */
/* @{ */
@@ -1798,41 +1685,78 @@ struct buf_block_t{
bufferfixed, or (2) the thread has an
x-latch on the block */
/* @} */
+#ifdef BTR_CUR_HASH_ADAPT
/** @name Hash search fields (unprotected)
NOTE that these fields are NOT protected by any semaphore! */
/* @{ */
ulint n_hash_helps; /*!< counter which controls building
of a new hash index for the page */
- ulint n_fields; /*!< recommended prefix length for hash
+ volatile ulint n_bytes; /*!< recommended prefix length for hash
+ search: number of bytes in
+ an incomplete last field */
+ volatile ulint n_fields; /*!< recommended prefix length for hash
search: number of full fields */
- ulint n_bytes; /*!< recommended prefix: number of bytes
- in an incomplete field */
- ibool left_side; /*!< TRUE or FALSE, depending on
+ volatile bool left_side; /*!< true or false, depending on
whether the leftmost record of several
records with the same prefix should be
indexed in the hash index */
/* @} */
/** @name Hash search fields
- These 5 fields may only be modified when we have
- an x-latch on btr_search_latch AND
- - we are holding an s-latch or x-latch on buf_block_t::lock or
- - we know that buf_block_t::buf_fix_count == 0.
+ These 5 fields may only be modified when:
+ we are holding the appropriate x-latch in btr_search_latches[], and
+ one of the following holds:
+ (1) the block state is BUF_BLOCK_FILE_PAGE, and
+ we are holding an s-latch or x-latch on buf_block_t::lock, or
+ (2) buf_block_t::buf_fix_count == 0, or
+ (3) the block state is BUF_BLOCK_REMOVE_HASH.
An exception to this is when we init or create a page
in the buffer pool in buf0buf.cc.
- Another exception is that assigning block->index = NULL
- is allowed whenever holding an x-latch on btr_search_latch. */
+ Another exception for buf_pool_clear_hash_index() is that
+ assigning block->index = NULL (and block->n_pointers = 0)
+ is allowed whenever btr_search_own_all(RW_LOCK_X).
+
+ Another exception is that ha_insert_for_fold_func() may
+ decrement n_pointers without holding the appropriate latch
+ in btr_search_latches[]. Thus, n_pointers must be
+ protected by atomic memory access.
+
+ This implies that the fields may be read without race
+ condition whenever any of the following hold:
+ - the btr_search_latches[] s-latch or x-latch is being held, or
+ - the block state is not BUF_BLOCK_FILE_PAGE or BUF_BLOCK_REMOVE_HASH,
+ and holding some latch prevents the state from changing to that.
+
+ Some use of assert_block_ahi_empty() or assert_block_ahi_valid()
+ is prone to race conditions while buf_pool_clear_hash_index() is
+ executing (the adaptive hash index is being disabled). Such use
+ is explicitly commented. */
/* @{ */
-#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
+# if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
ulint n_pointers; /*!< used in debugging: the number of
pointers in the adaptive hash index
- pointing to this frame */
-#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
+ pointing to this frame;
+ protected by atomic memory access
+ or btr_search_own_all(). */
+# define assert_block_ahi_empty(block) \
+ ut_a(my_atomic_addlint(&(block)->n_pointers, 0) == 0)
+# define assert_block_ahi_empty_on_init(block) do { \
+ UNIV_MEM_VALID(&(block)->n_pointers, sizeof (block)->n_pointers); \
+ assert_block_ahi_empty(block); \
+} while (0)
+# define assert_block_ahi_valid(block) \
+ ut_a((block)->index \
+ || my_atomic_loadlint(&(block)->n_pointers) == 0)
+# else /* UNIV_AHI_DEBUG || UNIV_DEBUG */
+# define assert_block_ahi_empty(block) /* nothing */
+# define assert_block_ahi_empty_on_init(block) /* nothing */
+# define assert_block_ahi_valid(block) /* nothing */
+# endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
unsigned curr_n_fields:10;/*!< prefix length for hash indexing:
number of full fields */
unsigned curr_n_bytes:15;/*!< number of bytes in hash
@@ -1848,7 +1772,15 @@ struct buf_block_t{
have been hash collisions,
record deletions, etc. */
/* @} */
-# ifdef UNIV_SYNC_DEBUG
+#else /* BTR_CUR_HASH_ADAPT */
+# define assert_block_ahi_empty(block) /* nothing */
+# define assert_block_ahi_empty_on_init(block) /* nothing */
+# define assert_block_ahi_valid(block) /* nothing */
+#endif /* BTR_CUR_HASH_ADAPT */
+ bool skip_flush_check;
+ /*!< Skip check in buf_dblwr_check_block
+ during bulk load, protected by lock.*/
+# ifdef UNIV_DEBUG
/** @name Debug fields */
/* @{ */
rw_lock_t debug_latch; /*!< in the debug version, each thread
@@ -1857,17 +1789,22 @@ struct buf_block_t{
debug utilities in sync0rw */
/* @} */
# endif
-#endif /* !UNIV_HOTBACKUP */
+ BPageMutex mutex; /*!< mutex protecting this block:
+ state (also protected by the buffer
+ pool mutex), io_fix, buf_fix_count,
+ and accessed; we introduce this new
+ mutex in InnoDB-5.1 to relieve
+ contention on the buffer pool mutex */
};
/** Check if a buf_block_t object is in a valid state
-@param block buffer block
-@return TRUE if valid */
+@param block buffer block
+@return TRUE if valid */
#define buf_block_state_valid(block) \
(buf_block_get_state(block) >= BUF_BLOCK_NOT_USED \
&& (buf_block_get_state(block) <= BUF_BLOCK_REMOVE_HASH))
-#ifndef UNIV_HOTBACKUP
+
/**********************************************************************//**
Compute the hash fold value for blocks in buf_pool->zip_hash. */
/* @{ */
@@ -1898,7 +1835,7 @@ public:
virtual ~HazardPointer() {}
/** Get current value */
- buf_page_t* get()
+ buf_page_t* get() const
{
ut_ad(mutex_own(m_mutex));
return(m_hp);
@@ -2082,15 +2019,14 @@ struct buf_pool_t{
/** @name General fields */
/* @{ */
- ib_mutex_t mutex; /*!< Buffer pool mutex of this
+ BufPoolMutex mutex; /*!< Buffer pool mutex of this
instance */
- ib_mutex_t zip_mutex; /*!< Zip mutex of this buffer
+ BufPoolZipMutex zip_mutex; /*!< Zip mutex of this buffer
pool instance, protects compressed
only pages (of type buf_page_t, not
buf_block_t */
ulint instance_no; /*!< Array index of this buffer
pool instance */
- ulint old_pool_size; /*!< Old pool size in bytes */
ulint curr_pool_size; /*!< Current pool size in bytes */
ulint LRU_old_ratio; /*!< Reserve this much of the buffer
pool for "old" blocks */
@@ -2101,9 +2037,19 @@ struct buf_pool_t{
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
ulint mutex_exit_forbidden; /*!< Forbid release mutex */
#endif
- ulint n_chunks; /*!< number of buffer pool chunks */
+ ut_allocator<unsigned char> allocator; /*!< Allocator used for
+ allocating memory for the the "chunks"
+ member. */
+ volatile ulint n_chunks; /*!< number of buffer pool chunks */
+ volatile ulint n_chunks_new; /*!< new number of buffer pool chunks */
buf_chunk_t* chunks; /*!< buffer pool chunks */
+ buf_chunk_t* chunks_old; /*!< old buffer pool chunks to be freed
+ after resizing buffer pool */
ulint curr_size; /*!< current pool size in pages */
+ ulint old_size; /*!< previous pool size in pages */
+ ulint read_ahead_area;/*!< size in pages of the area which
+ the read-ahead algorithms read if
+ invoked */
hash_table_t* page_hash; /*!< hash table of buf_page_t or
buf_block_t file pages,
buf_page_in_file() == TRUE,
@@ -2115,6 +2061,8 @@ struct buf_pool_t{
page_hash mutex. Lookups can happen
while holding the buf_pool->mutex or
the relevant page_hash mutex. */
+ hash_table_t* page_hash_old; /*!< old pointer to page_hash to be
+ freed after resizing buffer pool */
hash_table_t* zip_hash; /*!< hash table of buf_block_t blocks
whose frames are allocated to the
zip buddy system,
@@ -2138,7 +2086,7 @@ struct buf_pool_t{
/* @{ */
- ib_mutex_t flush_list_mutex;/*!< mutex protecting the
+ FlushListMutex flush_list_mutex;/*!< mutex protecting the
flush list access. This mutex
protects flush_list, flush_rbt
and bpage::list pointers when
@@ -2179,7 +2127,7 @@ struct buf_pool_t{
recovery and is set to NULL
once the recovery is over.
Protected by flush_list_mutex */
- ulint freed_page_clock;/*!< a sequence number used
+ unsigned freed_page_clock;/*!< a sequence number used
to count the number of buffer
blocks removed from the end of
the LRU list; NOTE that this
@@ -2207,6 +2155,15 @@ struct buf_pool_t{
/*!< base node of the free
block list */
+ UT_LIST_BASE_NODE_T(buf_page_t) withdraw;
+ /*!< base node of the withdraw
+ block list. It is only used during
+ shrinking buffer pool size, not to
+ reuse the blocks will be removed */
+
+ ulint withdraw_target;/*!< target length of withdraw
+ block list, when withdrawing */
+
/** "hazard pointer" used during scan of LRU while doing
LRU list batch. Protected by buf_pool::mutex */
LRUHp lru_hp;
@@ -2221,6 +2178,7 @@ struct buf_pool_t{
UT_LIST_BASE_NODE_T(buf_page_t) LRU;
/*!< base node of the LRU list */
+
buf_page_t* LRU_old; /*!< pointer to the about
LRU_old_ratio/BUF_LRU_OLD_RATIO_DIV
oldest blocks in the LRU list;
@@ -2268,6 +2226,15 @@ struct buf_pool_t{
/* @} */
};
+/** Print the given buf_pool_t object.
+@param[in,out] out the output stream
+@param[in] buf_pool the buf_pool_t object to be printed
+@return the output stream */
+std::ostream&
+operator<<(
+ std::ostream& out,
+ const buf_pool_t& buf_pool);
+
/** @name Accessors for buf_pool->mutex.
Use these instead of accessing buf_pool->mutex directly. */
/* @{ */
@@ -2275,77 +2242,79 @@ Use these instead of accessing buf_pool->mutex directly. */
/** Test if a buffer pool mutex is owned. */
#define buf_pool_mutex_own(b) mutex_own(&b->mutex)
/** Acquire a buffer pool mutex. */
-#define buf_pool_mutex_enter(b) do { \
- ut_ad(!mutex_own(&b->zip_mutex)); \
- mutex_enter(&b->mutex); \
+#define buf_pool_mutex_enter(b) do { \
+ ut_ad(!(b)->zip_mutex.is_owned()); \
+ mutex_enter(&(b)->mutex); \
} while (0)
/** Test if flush list mutex is owned. */
-#define buf_flush_list_mutex_own(b) mutex_own(&b->flush_list_mutex)
+#define buf_flush_list_mutex_own(b) mutex_own(&(b)->flush_list_mutex)
/** Acquire the flush list mutex. */
-#define buf_flush_list_mutex_enter(b) do { \
- mutex_enter(&b->flush_list_mutex); \
+#define buf_flush_list_mutex_enter(b) do { \
+ mutex_enter(&(b)->flush_list_mutex); \
} while (0)
/** Release the flush list mutex. */
-# define buf_flush_list_mutex_exit(b) do { \
- mutex_exit(&b->flush_list_mutex); \
+# define buf_flush_list_mutex_exit(b) do { \
+ mutex_exit(&(b)->flush_list_mutex); \
} while (0)
+
/** Test if block->mutex is owned. */
-#define buf_block_mutex_own(b) mutex_own(&(b)->mutex)
+#define buf_page_mutex_own(b) (b)->mutex.is_owned()
/** Acquire the block->mutex. */
-#define buf_block_mutex_enter(b) do { \
+#define buf_page_mutex_enter(b) do { \
mutex_enter(&(b)->mutex); \
} while (0)
/** Release the trx->mutex. */
-#define buf_block_mutex_exit(b) do { \
- mutex_exit(&(b)->mutex); \
+#define buf_page_mutex_exit(b) do { \
+ (b)->mutex.exit(); \
} while (0)
/** Get appropriate page_hash_lock. */
-# define buf_page_hash_lock_get(b, f) \
- hash_get_lock(b->page_hash, f)
+# define buf_page_hash_lock_get(buf_pool, page_id) \
+ hash_get_lock((buf_pool)->page_hash, (page_id).fold())
+
+/** If not appropriate page_hash_lock, relock until appropriate. */
+# define buf_page_hash_lock_s_confirm(hash_lock, buf_pool, page_id)\
+ hash_lock_s_confirm(hash_lock, (buf_pool)->page_hash, (page_id).fold())
-#ifdef UNIV_SYNC_DEBUG
+# define buf_page_hash_lock_x_confirm(hash_lock, buf_pool, page_id)\
+ hash_lock_x_confirm(hash_lock, (buf_pool)->page_hash, (page_id).fold())
+
+#ifdef UNIV_DEBUG
/** Test if page_hash lock is held in s-mode. */
-# define buf_page_hash_lock_held_s(b, p) \
- rw_lock_own(buf_page_hash_lock_get(b, \
- buf_page_address_fold(p->space, \
- p->offset)), \
- RW_LOCK_SHARED)
+# define buf_page_hash_lock_held_s(buf_pool, bpage) \
+ rw_lock_own(buf_page_hash_lock_get((buf_pool), (bpage)->id), RW_LOCK_S)
/** Test if page_hash lock is held in x-mode. */
-# define buf_page_hash_lock_held_x(b, p) \
- rw_lock_own(buf_page_hash_lock_get(b, \
- buf_page_address_fold(p->space, \
- p->offset)), \
- RW_LOCK_EX)
+# define buf_page_hash_lock_held_x(buf_pool, bpage) \
+ rw_lock_own(buf_page_hash_lock_get((buf_pool), (bpage)->id), RW_LOCK_X)
/** Test if page_hash lock is held in x or s-mode. */
-# define buf_page_hash_lock_held_s_or_x(b, p) \
- (buf_page_hash_lock_held_s(b, p) \
- || buf_page_hash_lock_held_x(b, p))
+# define buf_page_hash_lock_held_s_or_x(buf_pool, bpage)\
+ (buf_page_hash_lock_held_s((buf_pool), (bpage)) \
+ || buf_page_hash_lock_held_x((buf_pool), (bpage)))
-# define buf_block_hash_lock_held_s(b, p) \
- buf_page_hash_lock_held_s(b, &(p->page))
+# define buf_block_hash_lock_held_s(buf_pool, block) \
+ buf_page_hash_lock_held_s((buf_pool), &(block)->page)
-# define buf_block_hash_lock_held_x(b, p) \
- buf_page_hash_lock_held_x(b, &(p->page))
+# define buf_block_hash_lock_held_x(buf_pool, block) \
+ buf_page_hash_lock_held_x((buf_pool), &(block)->page)
-# define buf_block_hash_lock_held_s_or_x(b, p) \
- buf_page_hash_lock_held_s_or_x(b, &(p->page))
-#else /* UNIV_SYNC_DEBUG */
+# define buf_block_hash_lock_held_s_or_x(buf_pool, block) \
+ buf_page_hash_lock_held_s_or_x((buf_pool), &(block)->page)
+#else /* UNIV_DEBUG */
# define buf_page_hash_lock_held_s(b, p) (TRUE)
# define buf_page_hash_lock_held_x(b, p) (TRUE)
# define buf_page_hash_lock_held_s_or_x(b, p) (TRUE)
# define buf_block_hash_lock_held_s(b, p) (TRUE)
# define buf_block_hash_lock_held_x(b, p) (TRUE)
# define buf_block_hash_lock_held_s_or_x(b, p) (TRUE)
-#endif /* UNIV_SYNC_DEBUG */
+#endif /* UNIV_DEBUG */
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
/** Forbid the release of the buffer pool mutex. */
@@ -2372,7 +2341,6 @@ Use these instead of accessing buf_pool->mutex directly. */
/** Release the buffer pool mutex. */
# define buf_pool_mutex_exit(b) mutex_exit(&b->mutex)
#endif
-#endif /* !UNIV_HOTBACKUP */
/* @} */
/**********************************************************************
@@ -2426,6 +2394,11 @@ struct CheckInLRUList {
{
ut_a(elem->in_LRU_list);
}
+
+ static void validate(const buf_pool_t* buf_pool)
+ {
+ ut_list_validate(buf_pool->LRU, CheckInLRUList());
+ }
};
/** Functor to validate the LRU list. */
@@ -2434,6 +2407,11 @@ struct CheckInFreeList {
{
ut_a(elem->in_free_list);
}
+
+ static void validate(const buf_pool_t* buf_pool)
+ {
+ ut_list_validate(buf_pool->free, CheckInFreeList());
+ }
};
struct CheckUnzipLRUAndLRUList {
@@ -2442,12 +2420,17 @@ struct CheckUnzipLRUAndLRUList {
ut_a(elem->page.in_LRU_list);
ut_a(elem->in_unzip_LRU_list);
}
+
+ static void validate(const buf_pool_t* buf_pool)
+ {
+ ut_list_validate(buf_pool->unzip_LRU,
+ CheckUnzipLRUAndLRUList());
+ }
};
#endif /* UNIV_DEBUG || defined UNIV_BUF_DEBUG */
-#ifndef UNIV_NONINL
#include "buf0buf.ic"
-#endif
-#endif /*! UNIV_INNOCHECKSUM */
+#endif /* !UNIV_INNOCHECKSUM */
+
#endif
diff --git a/storage/innobase/include/buf0buf.ic b/storage/innobase/include/buf0buf.ic
index 9901ca9224c..e1c8986c2ed 100644
--- a/storage/innobase/include/buf0buf.ic
+++ b/storage/innobase/include/buf0buf.ic
@@ -1,8 +1,8 @@
/*****************************************************************************
-Copyright (c) 1995, 2014, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2008, Google Inc.
-Copyright (c) 2014, 2015, MariaDB Corporation.
+Copyright (c) 2014, 2019, MariaDB Corporation.
Portions of this file contain modifications contributed and copyrighted by
Google, Inc. Those modifications are gratefully acknowledged and are described
@@ -32,18 +32,26 @@ Created 11/5/1995 Heikki Tuuri
*******************************************************/
#include "mtr0mtr.h"
-#ifndef UNIV_HOTBACKUP
#include "buf0flu.h"
#include "buf0lru.h"
#include "buf0rea.h"
+#include "fsp0types.h"
/** A chunk of buffers. The buffer pool is allocated in chunks. */
struct buf_chunk_t{
- ulint mem_size; /*!< allocated size of the chunk */
ulint size; /*!< size of frames[] and blocks[] */
- void* mem; /*!< pointer to the memory area which
+ unsigned char* mem; /*!< pointer to the memory area which
was allocated for the frames */
+ ut_new_pfx_t mem_pfx; /*!< Auxiliary structure, describing
+ "mem". It is filled by the allocator's
+ alloc method and later passed to the
+ deallocate method. */
buf_block_t* blocks; /*!< array of buffer control blocks */
+
+ /** Get the size of 'mem' in bytes. */
+ size_t mem_size() const {
+ return(mem_pfx.m_size);
+ }
};
/*********************************************************************//**
@@ -59,14 +67,14 @@ buf_pool_get_curr_size(void)
/********************************************************************//**
Calculates the index of a buffer pool to the buf_pool[] array.
-@return the position of the buffer pool in buf_pool[] */
+@return the position of the buffer pool in buf_pool[] */
UNIV_INLINE
-ulint
+unsigned
buf_pool_index(
/*===========*/
const buf_pool_t* buf_pool) /*!< in: buffer pool */
{
- ulint i = buf_pool - buf_pool_ptr;
+ unsigned i = unsigned(buf_pool - buf_pool_ptr);
ut_ad(i < MAX_BUFFER_POOLS);
ut_ad(i < srv_buf_pool_instances);
return(i);
@@ -81,10 +89,8 @@ buf_pool_from_bpage(
/*================*/
const buf_page_t* bpage) /*!< in: buffer pool page */
{
- ulint i;
- i = bpage->buf_pool_index;
- ut_ad(i < srv_buf_pool_instances);
- return(&buf_pool_ptr[i]);
+ ut_ad(bpage->buf_pool_index < srv_buf_pool_instances);
+ return(&buf_pool_ptr[bpage->buf_pool_index]);
}
/******************************************************************//**
@@ -107,14 +113,24 @@ ulint
buf_pool_get_n_pages(void)
/*======================*/
{
- return(buf_pool_get_curr_size() / UNIV_PAGE_SIZE);
+ if (!buf_pool_ptr)
+ return buf_pool_get_curr_size() >> srv_page_size_shift;
+
+ ulint chunk_size= 0;
+ for (uint i= 0; i < srv_buf_pool_instances; i++)
+ {
+ buf_pool_t* buf_pool = buf_pool_from_array(i);
+ for (uint j= 0; j < buf_pool->n_chunks; j++)
+ chunk_size+= buf_pool->chunks[j].size;
+ }
+ return chunk_size;
}
/********************************************************************//**
Reads the freed_page_clock of a buffer block.
-@return freed_page_clock */
+@return freed_page_clock */
UNIV_INLINE
-ulint
+unsigned
buf_page_get_freed_page_clock(
/*==========================*/
const buf_page_t* bpage) /*!< in: block */
@@ -125,9 +141,9 @@ buf_page_get_freed_page_clock(
/********************************************************************//**
Reads the freed_page_clock of a buffer block.
-@return freed_page_clock */
+@return freed_page_clock */
UNIV_INLINE
-ulint
+unsigned
buf_block_get_freed_page_clock(
/*===========================*/
const buf_block_t* block) /*!< in: block */
@@ -141,7 +157,7 @@ meaning that it is not in danger of getting evicted and also implying
that it has been accessed recently.
Note that this is for heuristics only and does not reserve buffer pool
mutex.
-@return TRUE if block is close to MRU end of LRU */
+@return TRUE if block is close to MRU end of LRU */
UNIV_INLINE
ibool
buf_page_peek_if_young(
@@ -152,7 +168,7 @@ buf_page_peek_if_young(
/* FIXME: bpage->freed_page_clock is 31 bits */
return((buf_pool->freed_page_clock & ((1UL << 31) - 1))
- < ((ulint) bpage->freed_page_clock
+ < (bpage->freed_page_clock
+ (buf_pool->curr_size
* (BUF_LRU_OLD_RATIO_DIV - buf_pool->LRU_old_ratio)
/ (BUF_LRU_OLD_RATIO_DIV * 4))));
@@ -162,7 +178,7 @@ buf_page_peek_if_young(
Recommends a move of a block to the start of the LRU list if there is danger
of dropping from the buffer pool. NOTE: does not reserve the buffer pool
mutex.
-@return TRUE if should be made younger */
+@return TRUE if should be made younger */
UNIV_INLINE
ibool
buf_page_peek_if_too_old(
@@ -179,6 +195,12 @@ buf_page_peek_if_too_old(
} else if (buf_LRU_old_threshold_ms && bpage->old) {
unsigned access_time = buf_page_is_accessed(bpage);
+ /* It is possible that the below comparison returns an
+ unexpected result. 2^32 milliseconds pass in about 50 days,
+ so if the difference between ut_time_ms() and access_time
+ is e.g. 50 days + 15 ms, then the below will behave as if
+ it is 15 ms. This is known and fixing it would require to
+ increase buf_page_t::access_time from 32 to 64 bits. */
if (access_time > 0
&& ((ib_uint32_t) (ut_time_ms() - access_time))
>= buf_LRU_old_threshold_ms) {
@@ -191,18 +213,17 @@ buf_page_peek_if_too_old(
return(!buf_page_peek_if_young(bpage));
}
}
-#endif /* !UNIV_HOTBACKUP */
/*********************************************************************//**
Gets the state of a block.
-@return state */
+@return state */
UNIV_INLINE
enum buf_page_state
buf_page_get_state(
/*===============*/
const buf_page_t* bpage) /*!< in: pointer to the control block */
{
- enum buf_page_state state = (enum buf_page_state) bpage->state;
+ enum buf_page_state state = bpage->state;
#ifdef UNIV_DEBUG
switch (state) {
@@ -224,7 +245,7 @@ buf_page_get_state(
}
/*********************************************************************//**
Gets the state of a block.
-@return state */
+@return state */
UNIV_INLINE
enum buf_page_state
buf_block_get_state(
@@ -303,7 +324,8 @@ buf_page_set_state(
break;
case BUF_BLOCK_FILE_PAGE:
if (!(state == BUF_BLOCK_NOT_USED
- || state == BUF_BLOCK_REMOVE_HASH)) {
+ || state == BUF_BLOCK_REMOVE_HASH
+ || state == BUF_BLOCK_FILE_PAGE)) {
const char *old_state_name = buf_get_state_name((buf_block_t*)bpage);
bpage->state = state;
@@ -314,10 +336,11 @@ buf_page_set_state(
old_state_name,
state,
buf_get_state_name((buf_block_t*)bpage));
+ ut_a(state == BUF_BLOCK_NOT_USED
+ || state == BUF_BLOCK_REMOVE_HASH
+ || state == BUF_BLOCK_FILE_PAGE);
}
- ut_a(state == BUF_BLOCK_NOT_USED
- || state == BUF_BLOCK_REMOVE_HASH);
break;
case BUF_BLOCK_REMOVE_HASH:
ut_a(state == BUF_BLOCK_MEMORY);
@@ -341,7 +364,7 @@ buf_block_set_state(
/*********************************************************************//**
Determines if a block is mapped to a tablespace.
-@return TRUE if mapped */
+@return TRUE if mapped */
UNIV_INLINE
ibool
buf_page_in_file(
@@ -366,10 +389,9 @@ buf_page_in_file(
return(FALSE);
}
-#ifndef UNIV_HOTBACKUP
/*********************************************************************//**
Determines if a block should be on unzip_LRU list.
-@return TRUE if block belongs to unzip_LRU */
+@return TRUE if block belongs to unzip_LRU */
UNIV_INLINE
ibool
buf_page_belongs_to_unzip_LRU(
@@ -384,23 +406,22 @@ buf_page_belongs_to_unzip_LRU(
/*********************************************************************//**
Gets the mutex of a block.
-@return pointer to mutex protecting bpage */
+@return pointer to mutex protecting bpage */
UNIV_INLINE
-ib_mutex_t*
+BPageMutex*
buf_page_get_mutex(
/*===============*/
const buf_page_t* bpage) /*!< in: pointer to control block */
{
+ buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
+
switch (buf_page_get_state(bpage)) {
case BUF_BLOCK_POOL_WATCH:
ut_error;
return(NULL);
case BUF_BLOCK_ZIP_PAGE:
- case BUF_BLOCK_ZIP_DIRTY: {
- buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
-
+ case BUF_BLOCK_ZIP_DIRTY:
return(&buf_pool->zip_mutex);
- }
default:
return(&((buf_block_t*) bpage)->mutex);
}
@@ -408,7 +429,7 @@ buf_page_get_mutex(
/*********************************************************************//**
Get the flush type of a page.
-@return flush type */
+@return flush type */
UNIV_INLINE
buf_flush_t
buf_page_get_flush_type(
@@ -443,24 +464,22 @@ buf_page_set_flush_type(
ut_ad(buf_page_get_flush_type(bpage) == flush_type);
}
-/*********************************************************************//**
-Map a block to a file page. */
+/** Map a block to a file page.
+@param[in,out] block pointer to control block
+@param[in] page_id page id */
UNIV_INLINE
void
buf_block_set_file_page(
-/*====================*/
- buf_block_t* block, /*!< in/out: pointer to control block */
- ulint space, /*!< in: tablespace id */
- ulint page_no)/*!< in: page number */
+ buf_block_t* block,
+ const page_id_t page_id)
{
buf_block_set_state(block, BUF_BLOCK_FILE_PAGE);
- block->page.space = static_cast<ib_uint32_t>(space);
- block->page.offset = static_cast<ib_uint32_t>(page_no);
+ block->page.id = page_id;
}
/*********************************************************************//**
Gets the io_fix state of a block.
-@return io_fix state */
+@return io_fix state */
UNIV_INLINE
enum buf_io_fix
buf_page_get_io_fix(
@@ -469,7 +488,8 @@ buf_page_get_io_fix(
{
ut_ad(bpage != NULL);
- enum buf_io_fix io_fix = (enum buf_io_fix) bpage->io_fix;
+ enum buf_io_fix io_fix = bpage->io_fix;
+
#ifdef UNIV_DEBUG
switch (io_fix) {
case BUF_IO_NONE:
@@ -485,7 +505,7 @@ buf_page_get_io_fix(
/*********************************************************************//**
Gets the io_fix state of a block.
-@return io_fix state */
+@return io_fix state */
UNIV_INLINE
enum buf_io_fix
buf_block_get_io_fix(
@@ -507,7 +527,7 @@ buf_page_set_io_fix(
#ifdef UNIV_DEBUG
buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
ut_ad(buf_pool_mutex_own(buf_pool));
-#endif
+#endif /* UNIV_DEBUG */
ut_ad(mutex_own(buf_page_get_mutex(bpage)));
bpage->io_fix = io_fix;
@@ -544,7 +564,7 @@ buf_page_set_sticky(
#ifdef UNIV_DEBUG
buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
ut_ad(buf_pool_mutex_own(buf_pool));
-#endif
+#endif /* UNIV_DEBUG */
ut_ad(mutex_own(buf_page_get_mutex(bpage)));
ut_ad(buf_page_get_io_fix(bpage) == BUF_IO_NONE);
@@ -562,7 +582,7 @@ buf_page_unset_sticky(
#ifdef UNIV_DEBUG
buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
ut_ad(buf_pool_mutex_own(buf_pool));
-#endif
+#endif /* UNIV_DEBUG */
ut_ad(mutex_own(buf_page_get_mutex(bpage)));
ut_ad(buf_page_get_io_fix(bpage) == BUF_IO_PIN);
@@ -581,7 +601,7 @@ buf_page_can_relocate(
#ifdef UNIV_DEBUG
buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
ut_ad(buf_pool_mutex_own(buf_pool));
-#endif
+#endif /* UNIV_DEBUG */
ut_ad(mutex_own(buf_page_get_mutex(bpage)));
ut_ad(buf_page_in_file(bpage));
ut_ad(bpage->in_LRU_list);
@@ -592,7 +612,7 @@ buf_page_can_relocate(
/*********************************************************************//**
Determine if a block has been flagged old.
-@return TRUE if old */
+@return TRUE if old */
UNIV_INLINE
ibool
buf_page_is_old(
@@ -602,7 +622,7 @@ buf_page_is_old(
#ifdef UNIV_DEBUG
buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
ut_ad(buf_pool_mutex_own(buf_pool));
-#endif
+#endif /* UNIV_DEBUG */
ut_ad(buf_page_in_file(bpage));
return(bpage->old);
@@ -615,7 +635,7 @@ void
buf_page_set_old(
/*=============*/
buf_page_t* bpage, /*!< in/out: control block */
- ibool old) /*!< in: old */
+ bool old) /*!< in: old */
{
#ifdef UNIV_DEBUG
buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
@@ -646,7 +666,7 @@ buf_page_set_old(
/*********************************************************************//**
Determine the time of first access of a block in the buffer pool.
-@return ut_time_ms() at the time of first access, 0 if not accessed */
+@return ut_time_ms() at the time of first access, 0 if not accessed */
UNIV_INLINE
unsigned
buf_page_is_accessed(
@@ -683,7 +703,7 @@ buf_page_set_accessed(
/*********************************************************************//**
Gets the buf_block_t handle of a buffered file block if an uncompressed
page frame exists, or NULL.
-@return control block, or NULL */
+@return control block, or NULL */
UNIV_INLINE
buf_block_t*
buf_page_get_block(
@@ -700,12 +720,11 @@ buf_page_get_block(
return(NULL);
}
-#endif /* !UNIV_HOTBACKUP */
#ifdef UNIV_DEBUG
/*********************************************************************//**
Gets a pointer to the memory frame of a block.
-@return pointer to the frame */
+@return pointer to the frame */
UNIV_INLINE
buf_frame_t*
buf_block_get_frame(
@@ -721,15 +740,10 @@ buf_block_get_frame(
case BUF_BLOCK_ZIP_PAGE:
case BUF_BLOCK_ZIP_DIRTY:
case BUF_BLOCK_NOT_USED:
- if (block->page.encrypted) {
- goto ok;
- }
ut_error;
break;
case BUF_BLOCK_FILE_PAGE:
-# ifndef UNIV_HOTBACKUP
ut_a(block->page.buf_fix_count > 0);
-# endif /* !UNIV_HOTBACKUP */
/* fall through */
case BUF_BLOCK_READY_FOR_USE:
case BUF_BLOCK_MEMORY:
@@ -742,50 +756,6 @@ ok:
}
#endif /* UNIV_DEBUG */
-/*********************************************************************//**
-Gets the space id of a block.
-@return space id */
-UNIV_INLINE
-ulint
-buf_page_get_space(
-/*===============*/
- const buf_page_t* bpage) /*!< in: pointer to the control block */
-{
- ut_ad(bpage);
- ut_a(buf_page_in_file(bpage));
-
- return(bpage->space);
-}
-
-/*********************************************************************//**
-Gets the space id of a block.
-@return space id */
-UNIV_INLINE
-ulint
-buf_block_get_space(
-/*================*/
- const buf_block_t* block) /*!< in: pointer to the control block */
-{
- ut_ad(block);
- ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
-
- return(block->page.space);
-}
-
-/*********************************************************************//**
-Gets the page number of a block.
-@return page number */
-UNIV_INLINE
-ulint
-buf_page_get_page_no(
-/*=================*/
- const buf_page_t* bpage) /*!< in: pointer to the control block */
-{
- ut_ad(bpage);
- ut_a(buf_page_in_file(bpage));
-
- return(bpage->offset);
-}
/***********************************************************************
FIXME_FTS Gets the frame the pointer is pointing to. */
UNIV_INLINE
@@ -804,64 +774,6 @@ buf_frame_align(
return(frame);
}
-/*********************************************************************//**
-Gets the page number of a block.
-@return page number */
-UNIV_INLINE
-ulint
-buf_block_get_page_no(
-/*==================*/
- const buf_block_t* block) /*!< in: pointer to the control block */
-{
- ut_ad(block);
- ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
-
- return(block->page.offset);
-}
-
-/*********************************************************************//**
-Gets the compressed page size of a block.
-@return compressed page size, or 0 */
-UNIV_INLINE
-ulint
-buf_page_get_zip_size(
-/*==================*/
- const buf_page_t* bpage) /*!< in: pointer to the control block */
-{
- return(bpage->zip.ssize
- ? (UNIV_ZIP_SIZE_MIN >> 1) << bpage->zip.ssize : 0);
-}
-
-/*********************************************************************//**
-Gets the compressed page size of a block.
-@return compressed page size, or 0 */
-UNIV_INLINE
-ulint
-buf_block_get_zip_size(
-/*===================*/
- const buf_block_t* block) /*!< in: pointer to the control block */
-{
- return(block->page.zip.ssize
- ? (UNIV_ZIP_SIZE_MIN >> 1) << block->page.zip.ssize : 0);
-}
-
-#ifndef UNIV_HOTBACKUP
-#if defined UNIV_DEBUG || defined UNIV_ZIP_DEBUG
-/*********************************************************************//**
-Gets the compressed page descriptor corresponding to an uncompressed page
-if applicable.
-@return compressed page descriptor, or NULL */
-UNIV_INLINE
-const page_zip_des_t*
-buf_frame_get_page_zip(
-/*===================*/
- const byte* ptr) /*!< in: pointer to the page */
-{
- return(buf_block_get_page_zip(buf_block_align(ptr)));
-}
-#endif /* UNIV_DEBUG || UNIV_ZIP_DEBUG */
-#endif /* !UNIV_HOTBACKUP */
-
/**********************************************************************//**
Gets the space id, page offset, and byte offset within page of a
pointer pointing to a buffer frame containing a file page. */
@@ -881,23 +793,21 @@ buf_ptr_get_fsp_addr(
addr->boffset = ut_align_offset(ptr, UNIV_PAGE_SIZE);
}
-#ifndef UNIV_HOTBACKUP
/**********************************************************************//**
Gets the hash value of the page the pointer is pointing to. This can be used
in searches in the lock hash table.
-@return lock hash value */
+@return lock hash value */
UNIV_INLINE
-ulint
+unsigned
buf_block_get_lock_hash_val(
/*========================*/
const buf_block_t* block) /*!< in: block */
{
ut_ad(block);
ut_ad(buf_page_in_file(&block->page));
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(rw_lock_own(&(((buf_block_t*) block)->lock), RW_LOCK_EXCLUSIVE)
- || rw_lock_own(&(((buf_block_t*) block)->lock), RW_LOCK_SHARED));
-#endif /* UNIV_SYNC_DEBUG */
+ ut_ad(rw_lock_own(&(((buf_block_t*) block)->lock), RW_LOCK_X)
+ || rw_lock_own(&(((buf_block_t*) block)->lock), RW_LOCK_S));
+
return(block->lock_hash_val);
}
@@ -912,8 +822,8 @@ buf_page_alloc_descriptor(void)
{
buf_page_t* bpage;
- bpage = (buf_page_t*) ut_malloc(sizeof *bpage);
- ut_d(memset(bpage, 0, sizeof *bpage));
+ bpage = (buf_page_t*) ut_zalloc_nokey(sizeof *bpage);
+ ut_ad(bpage);
UNIV_MEM_ALLOC(bpage, sizeof *bpage);
return(bpage);
@@ -942,21 +852,20 @@ buf_block_free(
buf_pool_mutex_enter(buf_pool);
- mutex_enter(&block->mutex);
+ buf_page_mutex_enter(block);
ut_a(buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE);
buf_LRU_block_free_non_file_page(block);
- mutex_exit(&block->mutex);
+ buf_page_mutex_exit(block);
buf_pool_mutex_exit(buf_pool);
}
-#endif /* !UNIV_HOTBACKUP */
/*********************************************************************//**
Copies contents of a buffer frame to a given buffer.
-@return buf */
+@return buf */
UNIV_INLINE
byte*
buf_frame_copy(
@@ -971,25 +880,10 @@ buf_frame_copy(
return(buf);
}
-#ifndef UNIV_HOTBACKUP
-/********************************************************************//**
-Calculates a folded value of a file page address to use in the page hash
-table.
-@return the folded value */
-UNIV_INLINE
-ulint
-buf_page_address_fold(
-/*==================*/
- ulint space, /*!< in: space id */
- ulint offset) /*!< in: offset of the page within space */
-{
- return((space << 20) + space + offset);
-}
-
/********************************************************************//**
Gets the youngest modification log sequence number for a frame.
Returns zero if not file page or no modification occurred yet.
-@return newest modification to page */
+@return newest modification to page */
UNIV_INLINE
lsn_t
buf_page_get_newest_modification(
@@ -998,7 +892,7 @@ buf_page_get_newest_modification(
page frame */
{
lsn_t lsn;
- ib_mutex_t* block_mutex = buf_page_get_mutex(bpage);
+ BPageMutex* block_mutex = buf_page_get_mutex(bpage);
mutex_enter(block_mutex);
@@ -1023,13 +917,18 @@ buf_block_modify_clock_inc(
/*=======================*/
buf_block_t* block) /*!< in: block */
{
-#ifdef UNIV_SYNC_DEBUG
+#ifdef UNIV_DEBUG
buf_pool_t* buf_pool = buf_pool_from_bpage((buf_page_t*) block);
- ut_ad((buf_pool_mutex_own(buf_pool)
- && (block->page.buf_fix_count == 0))
- || rw_lock_own(&(block->lock), RW_LOCK_EXCLUSIVE));
-#endif /* UNIV_SYNC_DEBUG */
+ /* No latch is acquired for the shared temporary tablespace. */
+ if (!fsp_is_system_temporary(block->page.id.space())) {
+ ut_ad((buf_pool_mutex_own(buf_pool)
+ && (block->page.buf_fix_count == 0))
+ || rw_lock_own_flagged(&block->lock,
+ RW_LOCK_FLAG_X | RW_LOCK_FLAG_SX));
+ }
+#endif /* UNIV_DEBUG */
+ assert_block_ahi_valid(block);
block->modify_clock++;
}
@@ -1037,38 +936,66 @@ buf_block_modify_clock_inc(
/********************************************************************//**
Returns the value of the modify clock. The caller must have an s-lock
or x-lock on the block.
-@return value */
+@return value */
UNIV_INLINE
ib_uint64_t
buf_block_get_modify_clock(
/*=======================*/
buf_block_t* block) /*!< in: block */
{
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(rw_lock_own(&(block->lock), RW_LOCK_SHARED)
- || rw_lock_own(&(block->lock), RW_LOCK_EXCLUSIVE));
-#endif /* UNIV_SYNC_DEBUG */
+#ifdef UNIV_DEBUG
+ /* No latch is acquired for the shared temporary tablespace. */
+ if (!fsp_is_system_temporary(block->page.id.space())) {
+ ut_ad(rw_lock_own(&(block->lock), RW_LOCK_S)
+ || rw_lock_own(&(block->lock), RW_LOCK_X)
+ || rw_lock_own(&(block->lock), RW_LOCK_SX));
+ }
+#endif /* UNIV_DEBUG */
return(block->modify_clock);
}
-/*******************************************************************//**
-Increments the bufferfix count. */
+/** Increments the bufferfix count.
+@param[in,out] bpage block to bufferfix
+@return the count */
UNIV_INLINE
-void
-buf_block_fix(
-/*===========*/
- buf_block_t* block) /*!< in/out: block to bufferfix */
+ulint
+buf_block_fix(buf_page_t* bpage)
{
-#ifdef PAGE_ATOMIC_REF_COUNT
- os_atomic_increment_uint32(&block->page.buf_fix_count, 1);
-#else
- ib_mutex_t* block_mutex = buf_page_get_mutex(&block->page);
+ return uint32(my_atomic_add32_explicit(
+ &bpage->buf_fix_count, 1,
+ MY_MEMORY_ORDER_RELAXED)) + 1;
+}
- mutex_enter(block_mutex);
- ++block->page.buf_fix_count;
- mutex_exit(block_mutex);
-#endif /* PAGE_ATOMIC_REF_COUNT */
+/** Increments the bufferfix count.
+@param[in,out] block block to bufferfix
+@return the count */
+UNIV_INLINE
+ulint
+buf_block_fix(buf_block_t* block)
+{
+ return buf_block_fix(&block->page);
+}
+
+/** Get the bufferfix count.
+@param[in] bpage block to bufferfix
+@return the count */
+UNIV_INLINE
+ulint
+buf_block_get_fix(buf_page_t* bpage)
+{
+ return my_atomic_load32_explicit(&bpage->buf_fix_count,
+ MY_MEMORY_ORDER_RELAXED);
+}
+
+/** Get the bufferfix count.
+@param[in] bpage block to bufferfix
+@return the count */
+UNIV_INLINE
+ulint
+buf_block_get_fix(buf_block_t* block)
+{
+ return buf_block_get_fix(&block->page);
}
/*******************************************************************//**
@@ -1077,47 +1004,48 @@ UNIV_INLINE
void
buf_block_buf_fix_inc_func(
/*=======================*/
-#ifdef UNIV_SYNC_DEBUG
+#ifdef UNIV_DEBUG
const char* file, /*!< in: file name */
- ulint line, /*!< in: line */
-#endif /* UNIV_SYNC_DEBUG */
+ unsigned line, /*!< in: line */
+#endif /* UNIV_DEBUG */
buf_block_t* block) /*!< in/out: block to bufferfix */
{
-#ifdef UNIV_SYNC_DEBUG
- ibool ret;
-
- ret = rw_lock_s_lock_nowait(&(block->debug_latch), file, line);
- ut_a(ret);
-#endif /* UNIV_SYNC_DEBUG */
-
-#ifdef PAGE_ATOMIC_REF_COUNT
- os_atomic_increment_uint32(&block->page.buf_fix_count, 1);
-#else
- ut_ad(mutex_own(&block->mutex));
+#ifdef UNIV_DEBUG
+ /* No debug latch is acquired if block belongs to system temporary.
+ Debug latch is not of much help if access to block is single
+ threaded. */
+ if (!fsp_is_system_temporary(block->page.id.space())) {
+ ibool ret;
+ ret = rw_lock_s_lock_nowait(&block->debug_latch, file, line);
+ ut_a(ret);
+ }
+#endif /* UNIV_DEBUG */
- ++block->page.buf_fix_count;
-#endif /* PAGE_ATOMIC_REF_COUNT */
+ buf_block_fix(block);
}
-/*******************************************************************//**
-Decrements the bufferfix count. */
+/** Decrements the bufferfix count.
+@param[in,out] bpage block to bufferunfix
+@return the remaining buffer-fix count */
UNIV_INLINE
-void
-buf_block_unfix(
-/*============*/
- buf_block_t* block) /*!< in/out: block to bufferunfix */
+ulint
+buf_block_unfix(buf_page_t* bpage)
{
- ut_ad(block->page.buf_fix_count > 0);
-
-#ifdef PAGE_ATOMIC_REF_COUNT
- os_atomic_decrement_uint32(&block->page.buf_fix_count, 1);
-#else
- ib_mutex_t* block_mutex = buf_page_get_mutex(&block->page);
+ uint32 count = uint32(my_atomic_add32_explicit(
+ &bpage->buf_fix_count,
+ -1, MY_MEMORY_ORDER_RELAXED));
+ ut_ad(count != 0);
+ return count - 1;
+}
- mutex_enter(block_mutex);
- --block->page.buf_fix_count;
- mutex_exit(block_mutex);
-#endif /* PAGE_ATOMIC_REF_COUNT */
+/** Decrements the bufferfix count.
+@param[in,out] block block to bufferunfix
+@return the remaining buffer-fix count */
+UNIV_INLINE
+ulint
+buf_block_unfix(buf_block_t* block)
+{
+ return buf_block_unfix(&block->page);
}
/*******************************************************************//**
@@ -1128,39 +1056,31 @@ buf_block_buf_fix_dec(
/*==================*/
buf_block_t* block) /*!< in/out: block to bufferunfix */
{
- ut_ad(block->page.buf_fix_count > 0);
-
-#ifdef PAGE_ATOMIC_REF_COUNT
- os_atomic_decrement_uint32(&block->page.buf_fix_count, 1);
-#else
- mutex_enter(&block->mutex);
- --block->page.buf_fix_count;
- mutex_exit(&block->mutex);
-#endif /* PAGE_ATOMIC_REF_COUNT */
+ buf_block_unfix(block);
-#ifdef UNIV_SYNC_DEBUG
- rw_lock_s_unlock(&block->debug_latch);
-#endif
+#ifdef UNIV_DEBUG
+ /* No debug latch is acquired if block belongs to system temporary.
+ Debug latch is not of much help if access to block is single
+ threaded. */
+ if (!fsp_is_system_temporary(block->page.id.space())) {
+ rw_lock_s_unlock(&block->debug_latch);
+ }
+#endif /* UNIV_DEBUG */
}
-/******************************************************************//**
-Returns the buffer pool instance given space and offset of page
+/** Returns the buffer pool instance given a page id.
+@param[in] page_id page id
@return buffer pool */
-UNIV_INLINE
-buf_pool_t*
-buf_pool_get(
-/*==========*/
- ulint space, /*!< in: space id */
- ulint offset) /*!< in: offset of the page within space */
-{
- ulint fold;
- ulint index;
- ulint ignored_offset;
-
- ignored_offset = offset >> 6; /* 2log of BUF_READ_AHEAD_AREA (64)*/
- fold = buf_page_address_fold(space, ignored_offset);
- index = fold % srv_buf_pool_instances;
- return(&buf_pool_ptr[index]);
+inline buf_pool_t* buf_pool_get(const page_id_t page_id)
+{
+ /* 2log of BUF_READ_AHEAD_AREA (64) */
+ ulint ignored_page_no = page_id.page_no() >> 6;
+
+ page_id_t id(page_id.space(), ignored_page_no);
+
+ ulint i = id.fold() % srv_buf_pool_instances;
+
+ return(&buf_pool_ptr[i]);
}
/******************************************************************//**
@@ -1178,103 +1098,98 @@ buf_pool_from_array(
return(&buf_pool_ptr[index]);
}
-/******************************************************************//**
-Returns the control block of a file page, NULL if not found.
-@return block, NULL if not found */
+/** Returns the control block of a file page, NULL if not found.
+@param[in] buf_pool buffer pool instance
+@param[in] page_id page id
+@return block, NULL if not found */
UNIV_INLINE
buf_page_t*
buf_page_hash_get_low(
-/*==================*/
- buf_pool_t* buf_pool,/*!< buffer pool instance */
- ulint space, /*!< in: space id */
- ulint offset, /*!< in: offset of the page within space */
- ulint fold) /*!< in: buf_page_address_fold(space, offset) */
+ buf_pool_t* buf_pool,
+ const page_id_t page_id)
{
buf_page_t* bpage;
-#ifdef UNIV_SYNC_DEBUG
- ulint hash_fold;
+#ifdef UNIV_DEBUG
rw_lock_t* hash_lock;
- hash_fold = buf_page_address_fold(space, offset);
- ut_ad(hash_fold == fold);
-
- hash_lock = hash_get_lock(buf_pool->page_hash, fold);
- ut_ad(rw_lock_own(hash_lock, RW_LOCK_EX)
- || rw_lock_own(hash_lock, RW_LOCK_SHARED));
-#endif /* UNIV_SYNC_DEBUG */
+ hash_lock = hash_get_lock(buf_pool->page_hash, page_id.fold());
+ ut_ad(rw_lock_own(hash_lock, RW_LOCK_X)
+ || rw_lock_own(hash_lock, RW_LOCK_S));
+#endif /* UNIV_DEBUG */
/* Look for the page in the hash table */
- HASH_SEARCH(hash, buf_pool->page_hash, fold, buf_page_t*, bpage,
+ HASH_SEARCH(hash, buf_pool->page_hash, page_id.fold(), buf_page_t*,
+ bpage,
ut_ad(bpage->in_page_hash && !bpage->in_zip_hash
&& buf_page_in_file(bpage)),
- bpage->space == space && bpage->offset == offset);
+ page_id == bpage->id);
if (bpage) {
ut_a(buf_page_in_file(bpage));
ut_ad(bpage->in_page_hash);
ut_ad(!bpage->in_zip_hash);
+ ut_ad(buf_pool_from_bpage(bpage) == buf_pool);
}
return(bpage);
}
-/******************************************************************//**
-Returns the control block of a file page, NULL if not found.
+/** Returns the control block of a file page, NULL if not found.
If the block is found and lock is not NULL then the appropriate
page_hash lock is acquired in the specified lock mode. Otherwise,
mode value is ignored. It is up to the caller to release the
lock. If the block is found and the lock is NULL then the page_hash
lock is released by this function.
-@return block, NULL if not found, or watch sentinel (if watch is true) */
+@param[in] buf_pool buffer pool instance
+@param[in] page_id page id
+@param[in,out] lock lock of the page hash acquired if bpage is
+found, NULL otherwise. If NULL is passed then the hash_lock is released by
+this function.
+@param[in] lock_mode RW_LOCK_X or RW_LOCK_S. Ignored if
+lock == NULL
+@param[in] watch if true, return watch sentinel also.
+@return pointer to the bpage or NULL; if NULL, lock is also NULL or
+a watch sentinel. */
UNIV_INLINE
buf_page_t*
buf_page_hash_get_locked(
-/*=====================*/
- /*!< out: pointer to the bpage,
- or NULL; if NULL, hash_lock
- is also NULL. */
- buf_pool_t* buf_pool, /*!< buffer pool instance */
- ulint space, /*!< in: space id */
- ulint offset, /*!< in: page number */
- rw_lock_t** lock, /*!< in/out: lock of the page
- hash acquired if bpage is
- found. NULL otherwise. If NULL
- is passed then the hash_lock
- is released by this function */
- ulint lock_mode, /*!< in: RW_LOCK_EX or
- RW_LOCK_SHARED. Ignored if
- lock == NULL */
- bool watch) /*!< in: if true, return watch
- sentinel also. */
+ buf_pool_t* buf_pool,
+ const page_id_t page_id,
+ rw_lock_t** lock,
+ ulint lock_mode,
+ bool watch)
{
buf_page_t* bpage = NULL;
- ulint fold;
rw_lock_t* hash_lock;
- ulint mode = RW_LOCK_SHARED;
+ ulint mode = RW_LOCK_S;
if (lock != NULL) {
*lock = NULL;
- ut_ad(lock_mode == RW_LOCK_EX
- || lock_mode == RW_LOCK_SHARED);
+ ut_ad(lock_mode == RW_LOCK_X
+ || lock_mode == RW_LOCK_S);
mode = lock_mode;
}
- fold = buf_page_address_fold(space, offset);
- hash_lock = hash_get_lock(buf_pool->page_hash, fold);
+ hash_lock = hash_get_lock(buf_pool->page_hash, page_id.fold());
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(!rw_lock_own(hash_lock, RW_LOCK_EX)
- && !rw_lock_own(hash_lock, RW_LOCK_SHARED));
-#endif /* UNIV_SYNC_DEBUG */
+ ut_ad(!rw_lock_own(hash_lock, RW_LOCK_X)
+ && !rw_lock_own(hash_lock, RW_LOCK_S));
- if (mode == RW_LOCK_SHARED) {
+ if (mode == RW_LOCK_S) {
rw_lock_s_lock(hash_lock);
+
+ /* If not own buf_pool_mutex, page_hash can be changed. */
+ hash_lock = hash_lock_s_confirm(
+ hash_lock, buf_pool->page_hash, page_id.fold());
} else {
rw_lock_x_lock(hash_lock);
+ /* If not own buf_pool_mutex, page_hash can be changed. */
+ hash_lock = hash_lock_x_confirm(
+ hash_lock, buf_pool->page_hash, page_id.fold());
}
- bpage = buf_page_hash_get_low(buf_pool, space, offset, fold);
+ bpage = buf_page_hash_get_low(buf_pool, page_id);
if (!bpage || buf_pool_watch_is_sentinel(buf_pool, bpage)) {
if (!watch) {
@@ -1284,8 +1199,7 @@ buf_page_hash_get_locked(
}
ut_ad(buf_page_in_file(bpage));
- ut_ad(offset == bpage->offset);
- ut_ad(space == bpage->space);
+ ut_ad(page_id == bpage->id);
if (lock == NULL) {
/* The caller wants us to release the page_hash lock */
@@ -1297,7 +1211,7 @@ buf_page_hash_get_locked(
}
unlock_and_exit:
- if (mode == RW_LOCK_SHARED) {
+ if (mode == RW_LOCK_S) {
rw_lock_s_unlock(hash_lock);
} else {
rw_lock_x_unlock(hash_lock);
@@ -1306,52 +1220,46 @@ exit:
return(bpage);
}
-/******************************************************************//**
-Returns the control block of a file page, NULL if not found.
+/** Returns the control block of a file page, NULL if not found.
If the block is found and lock is not NULL then the appropriate
page_hash lock is acquired in the specified lock mode. Otherwise,
mode value is ignored. It is up to the caller to release the
lock. If the block is found and the lock is NULL then the page_hash
lock is released by this function.
-@return block, NULL if not found */
+@param[in] buf_pool buffer pool instance
+@param[in] page_id page id
+@param[in,out] lock lock of the page hash acquired if bpage is
+found, NULL otherwise. If NULL is passed then the hash_lock is released by
+this function.
+@param[in] lock_mode RW_LOCK_X or RW_LOCK_S. Ignored if
+lock == NULL
+@return pointer to the block or NULL; if NULL, lock is also NULL. */
UNIV_INLINE
buf_block_t*
buf_block_hash_get_locked(
-/*=====================*/
- /*!< out: pointer to the bpage,
- or NULL; if NULL, hash_lock
- is also NULL. */
- buf_pool_t* buf_pool, /*!< buffer pool instance */
- ulint space, /*!< in: space id */
- ulint offset, /*!< in: page number */
- rw_lock_t** lock, /*!< in/out: lock of the page
- hash acquired if bpage is
- found. NULL otherwise. If NULL
- is passed then the hash_lock
- is released by this function */
- ulint lock_mode) /*!< in: RW_LOCK_EX or
- RW_LOCK_SHARED. Ignored if
- lock == NULL */
+ buf_pool_t* buf_pool,
+ const page_id_t page_id,
+ rw_lock_t** lock,
+ ulint lock_mode)
{
buf_page_t* bpage = buf_page_hash_get_locked(buf_pool,
- space,
- offset,
+ page_id,
lock,
lock_mode);
buf_block_t* block = buf_page_get_block(bpage);
- if (block) {
+ if (block != NULL) {
+
ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
-#ifdef UNIV_SYNC_DEBUG
ut_ad(!lock || rw_lock_own(*lock, lock_mode));
-#endif /* UNIV_SYNC_DEBUG */
+
return(block);
} else if (bpage) {
/* It is not a block. Just a bpage */
ut_ad(buf_page_in_file(bpage));
if (lock) {
- if (lock_mode == RW_LOCK_SHARED) {
+ if (lock_mode == RW_LOCK_S) {
rw_lock_s_unlock(*lock);
} else {
rw_lock_x_unlock(*lock);
@@ -1366,23 +1274,16 @@ buf_block_hash_get_locked(
return(NULL);
}
-/********************************************************************//**
-Returns TRUE if the page can be found in the buffer pool hash table.
-
+/** Returns TRUE if the page can be found in the buffer pool hash table.
NOTE that it is possible that the page is not yet read from disk,
though.
-
-@return TRUE if found in the page hash table */
-UNIV_INLINE
-ibool
-buf_page_peek(
-/*==========*/
- ulint space, /*!< in: space id */
- ulint offset) /*!< in: page number */
+@param[in] page_id page id
+@return true if found in the page hash table */
+inline bool buf_page_peek(const page_id_t page_id)
{
- buf_pool_t* buf_pool = buf_pool_get(space, offset);
+ buf_pool_t* buf_pool = buf_pool_get(page_id);
- return(buf_page_hash_get(buf_pool, space, offset) != NULL);
+ return(buf_page_hash_get(buf_pool, page_id) != NULL);
}
/********************************************************************//**
@@ -1393,19 +1294,26 @@ buf_page_release_zip(
/*=================*/
buf_page_t* bpage) /*!< in: buffer block */
{
- buf_block_t* block;
-
- block = (buf_block_t*) bpage;
+ ut_ad(bpage);
+ ut_a(bpage->buf_fix_count > 0);
switch (buf_page_get_state(bpage)) {
case BUF_BLOCK_FILE_PAGE:
-#ifdef UNIV_SYNC_DEBUG
- rw_lock_s_unlock(&block->debug_latch);
-#endif /* UNUV_SYNC_DEBUG */
+#ifdef UNIV_DEBUG
+ {
+ /* No debug latch is acquired if block belongs to system
+ temporary. Debug latch is not of much help if access to block
+ is single threaded. */
+ buf_block_t* block = reinterpret_cast<buf_block_t*>(bpage);
+ if (!fsp_is_system_temporary(block->page.id.space())) {
+ rw_lock_s_unlock(&block->debug_latch);
+ }
+ }
+#endif /* UNIV_DEBUG */
/* Fall through */
case BUF_BLOCK_ZIP_PAGE:
case BUF_BLOCK_ZIP_DIRTY:
- buf_block_unfix(block);
+ buf_block_unfix(reinterpret_cast<buf_block_t*>(bpage));
return;
case BUF_BLOCK_POOL_WATCH:
@@ -1420,31 +1328,34 @@ buf_page_release_zip(
}
/********************************************************************//**
-Decrements the bufferfix count of a buffer control block and releases
-a latch, if specified. */
+Releases a latch, if specified. */
UNIV_INLINE
void
-buf_page_release(
-/*=============*/
+buf_page_release_latch(
+/*===================*/
buf_block_t* block, /*!< in: buffer block */
ulint rw_latch) /*!< in: RW_S_LATCH, RW_X_LATCH,
RW_NO_LATCH */
{
- ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
+#ifdef UNIV_DEBUG
+ /* No debug latch is acquired if block belongs to system
+ temporary. Debug latch is not of much help if access to block
+ is single threaded. */
+ if (!fsp_is_system_temporary(block->page.id.space())) {
+ rw_lock_s_unlock(&block->debug_latch);
+ }
+#endif /* UNIV_DEBUG */
-#ifdef UNIV_SYNC_DEBUG
- rw_lock_s_unlock(&(block->debug_latch));
-#endif
if (rw_latch == RW_S_LATCH) {
- rw_lock_s_unlock(&(block->lock));
+ rw_lock_s_unlock(&block->lock);
+ } else if (rw_latch == RW_SX_LATCH) {
+ rw_lock_sx_unlock(&block->lock);
} else if (rw_latch == RW_X_LATCH) {
- rw_lock_x_unlock(&(block->lock));
+ rw_lock_x_unlock(&block->lock);
}
-
- buf_block_unfix(block);
}
-#ifdef UNIV_SYNC_DEBUG
+#ifdef UNIV_DEBUG
/*********************************************************************//**
Adds latch level info for the rw-lock protecting the buffer frame. This
should be called in the debug version after a successful latching of a
@@ -1455,12 +1366,12 @@ buf_block_dbg_add_level(
/*====================*/
buf_block_t* block, /*!< in: buffer page
where we have acquired latch */
- ulint level) /*!< in: latching order level */
+ latch_level_t level) /*!< in: latching order level */
{
- sync_thread_add_level(&block->lock, level, FALSE);
+ sync_check_lock(&block->lock, level);
}
-#endif /* UNIV_SYNC_DEBUG */
+#endif /* UNIV_DEBUG */
/********************************************************************//**
Acquire mutex on all buffer pool instances. */
UNIV_INLINE
@@ -1468,12 +1379,9 @@ void
buf_pool_mutex_enter_all(void)
/*==========================*/
{
- ulint i;
-
- for (i = 0; i < srv_buf_pool_instances; i++) {
- buf_pool_t* buf_pool;
+ for (ulint i = 0; i < srv_buf_pool_instances; ++i) {
+ buf_pool_t* buf_pool = buf_pool_from_array(i);
- buf_pool = buf_pool_from_array(i);
buf_pool_mutex_enter(buf_pool);
}
}
@@ -1531,4 +1439,33 @@ buf_page_get_frame(
}
}
-#endif /* !UNIV_HOTBACKUP */
+/** Verify the possibility that a stored page is not in buffer pool.
+@param[in] withdraw_clock withdraw clock when stored the page
+@retval true if the page might be relocated */
+UNIV_INLINE
+bool
+buf_pool_is_obsolete(
+ ulint withdraw_clock)
+{
+ return(buf_pool_withdrawing
+ || buf_withdraw_clock != withdraw_clock);
+}
+
+/** Calculate aligned buffer pool size based on srv_buf_pool_chunk_unit,
+if needed.
+@param[in] size size in bytes
+@return aligned size */
+UNIV_INLINE
+ulint
+buf_pool_size_align(
+ ulint size)
+{
+ const ib_uint64_t m = ((ib_uint64_t)srv_buf_pool_instances) * srv_buf_pool_chunk_unit;
+ size = ut_max(size, srv_buf_pool_min_size);
+
+ if (size % m == 0) {
+ return(size);
+ } else {
+ return (ulint)((size / m + 1) * m);
+ }
+}
diff --git a/storage/innobase/include/buf0checksum.h b/storage/innobase/include/buf0checksum.h
index 8a87c4815ea..ce39e290ac7 100644
--- a/storage/innobase/include/buf0checksum.h
+++ b/storage/innobase/include/buf0checksum.h
@@ -1,6 +1,7 @@
/*****************************************************************************
-Copyright (c) 1995, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2017, 2018, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -26,57 +27,53 @@ Created Aug 11, 2011 Vasil Dimov
#ifndef buf0checksum_h
#define buf0checksum_h
-#include "univ.i"
-
#include "buf0types.h"
-/** Magic value to use instead of checksums when they are disabled */
-#define BUF_NO_CHECKSUM_MAGIC 0xDEADBEEFUL
-
-/********************************************************************//**
-Calculates a page CRC32 which is stored to the page when it is written
+#ifdef INNODB_BUG_ENDIAN_CRC32
+/** Calculate the CRC32 checksum of a page. The value is stored to the page
+when it is written to a file and also checked for a match when reading from
+the file. Note that we must be careful to calculate the same value on all
+architectures.
+@param[in] page buffer page (srv_page_size bytes)
+@param[in] bug_endian whether to use big endian byteorder
+when converting byte strings to integers, for bug-compatibility with
+big-endian architecture running MySQL 5.6, MariaDB 10.0 or MariaDB 10.1
+@return CRC-32C */
+uint32_t buf_calc_page_crc32(const byte* page, bool bug_endian = false);
+#else
+/** Calculate the CRC32 checksum of a page. The value is stored to the page
+when it is written to a file and also checked for a match when reading from
+the file. Note that we must be careful to calculate the same value on all
+architectures.
+@param[in] page buffer page (srv_page_size bytes)
+@return CRC-32C */
+uint32_t buf_calc_page_crc32(const byte* page);
+#endif
+
+/** Calculate a checksum which is stored to the page when it is written
to a file. Note that we must be careful to calculate the same value on
32-bit and 64-bit architectures.
-@return checksum */
-UNIV_INTERN
-ib_uint32_t
-buf_calc_page_crc32(
-/*================*/
- const byte* page); /*!< in: buffer page */
-
-/********************************************************************//**
-Calculates a page checksum which is stored to the page when it is written
-to a file. Note that we must be careful to calculate the same value on
-32-bit and 64-bit architectures.
-@return checksum */
-UNIV_INTERN
-ulint
-buf_calc_page_new_checksum(
-/*=======================*/
- const byte* page); /*!< in: buffer page */
-
-/********************************************************************//**
-In versions < 4.0.14 and < 4.1.1 there was a bug that the checksum only
-looked at the first few bytes of the page. This calculates that old
-checksum.
+@param[in] page file page (srv_page_size bytes)
+@return checksum */
+uint32_t
+buf_calc_page_new_checksum(const byte* page);
+
+/** In MySQL before 4.0.14 or 4.1.1 there was an InnoDB bug that
+the checksum only looked at the first few bytes of the page.
+This calculates that old checksum.
NOTE: we must first store the new formula checksum to
FIL_PAGE_SPACE_OR_CHKSUM before calculating and storing this old checksum
because this takes that field as an input!
-@return checksum */
-UNIV_INTERN
-ulint
-buf_calc_page_old_checksum(
-/*=======================*/
- const byte* page); /*!< in: buffer page */
-
-/********************************************************************//**
-Return a printable string describing the checksum algorithm.
-@return algorithm name */
-UNIV_INTERN
+@param[in] page file page (srv_page_size bytes)
+@return checksum */
+uint32_t
+buf_calc_page_old_checksum(const byte* page);
+
+/** Return a printable string describing the checksum algorithm.
+@param[in] algo algorithm
+@return algorithm name */
const char*
-buf_checksum_algorithm_name(
-/*========================*/
- srv_checksum_algorithm_t algo); /*!< in: algorithm */
+buf_checksum_algorithm_name(srv_checksum_algorithm_t algo);
extern ulong srv_checksum_algorithm;
diff --git a/storage/innobase/include/buf0dblwr.h b/storage/innobase/include/buf0dblwr.h
index 3d51c1e1061..07ffd626956 100644
--- a/storage/innobase/include/buf0dblwr.h
+++ b/storage/innobase/include/buf0dblwr.h
@@ -27,13 +27,11 @@ Created 2011/12/19 Inaam Rana
#ifndef buf0dblwr_h
#define buf0dblwr_h
-#include "univ.i"
#include "ut0byte.h"
#include "log0log.h"
+#include "buf0types.h"
#include "log0recv.h"
-#ifndef UNIV_HOTBACKUP
-
/** Doublewrite system */
extern buf_dblwr_t* buf_dblwr;
/** Set to TRUE when the doublewrite buffer is being created */
@@ -44,39 +42,35 @@ is not present in the TRX_SYS page.
@return whether the operation succeeded
@retval true if the doublewrite buffer exists or was created
@retval false if the creation failed (too small first data file) */
-UNIV_INTERN
+MY_ATTRIBUTE((warn_unused_result))
bool
-buf_dblwr_create()
- MY_ATTRIBUTE((warn_unused_result));
+buf_dblwr_create();
-/****************************************************************//**
-At a database startup initializes the doublewrite buffer memory structure if
+/**
+At database startup initializes the doublewrite buffer memory structure if
we already have a doublewrite buffer created in the data files. If we are
upgrading to an InnoDB version which supports multiple tablespaces, then this
function performs the necessary update operations. If we are in a crash
-recovery, this function loads the pages from double write buffer into memory. */
-void
+recovery, this function loads the pages from double write buffer into memory.
+@param[in] file File handle
+@param[in] path Path name of file
+@return DB_SUCCESS or error code */
+dberr_t
buf_dblwr_init_or_load_pages(
-/*=========================*/
pfs_os_file_t file,
- char* path,
- bool load_corrupt_pages);
+ const char* path);
-/****************************************************************//**
-Process the double write buffer pages. */
+/** Process and remove the double write buffer pages for all tablespaces. */
void
-buf_dblwr_process(void);
-/*===================*/
+buf_dblwr_process();
/****************************************************************//**
frees doublewrite buffer. */
-UNIV_INTERN
void
-buf_dblwr_free(void);
-/*================*/
+buf_dblwr_free();
+
/********************************************************************//**
Updates the doublewrite buffer when an IO request is completed. */
-UNIV_INTERN
void
buf_dblwr_update(
/*=============*/
@@ -86,7 +80,6 @@ buf_dblwr_update(
Determines if a page number is located inside the doublewrite buffer.
@return TRUE if the location is inside the two blocks of the
doublewrite buffer */
-UNIV_INTERN
ibool
buf_dblwr_page_inside(
/*==================*/
@@ -95,21 +88,26 @@ buf_dblwr_page_inside(
Posts a buffer page for writing. If the doublewrite memory buffer is
full, calls buf_dblwr_flush_buffered_writes and waits for for free
space to appear. */
-UNIV_INTERN
void
buf_dblwr_add_to_batch(
/*====================*/
buf_page_t* bpage); /*!< in: buffer block to write */
+
+/********************************************************************//**
+Flush a batch of writes to the datafiles that have already been
+written to the dblwr buffer on disk. */
+void
+buf_dblwr_sync_datafiles();
+
/********************************************************************//**
Flushes possible buffered writes from the doublewrite memory buffer to disk,
and also wakes up the aio thread if simulated aio is used. It is very
important to call this function after a batch of writes has been posted,
and also when we may have to wait for a page latch! Otherwise a deadlock
of threads can occur. */
-UNIV_INTERN
void
-buf_dblwr_flush_buffered_writes(void);
-/*=================================*/
+buf_dblwr_flush_buffered_writes();
+
/********************************************************************//**
Writes a page to the doublewrite buffer on disk, sync it, then write
the page to the datafile and sync the datafile. This function is used
@@ -118,7 +116,6 @@ flushes in the doublewrite buffer are in use we wait here for one to
become free. We are guaranteed that a slot will become free because any
thread that is using a slot must also release the slot before leaving
this function. */
-UNIV_INTERN
void
buf_dblwr_write_single_page(
/*========================*/
@@ -161,7 +158,4 @@ struct buf_dblwr_t{
cached to write_buf */
};
-
-#endif /* UNIV_HOTBACKUP */
-
#endif
diff --git a/storage/innobase/include/buf0dump.h b/storage/innobase/include/buf0dump.h
index 32db2a0eb02..8a7ef95ef9c 100644
--- a/storage/innobase/include/buf0dump.h
+++ b/storage/innobase/include/buf0dump.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 2011, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2011, 2014, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -33,7 +33,6 @@ Wakes up the buffer pool dump/load thread and instructs it to start
a dump. This function is called by MySQL code via buffer_pool_dump_now()
and it should return immediately because the whole MySQL is frozen during
its execution. */
-UNIV_INTERN
void
buf_dump_start();
/*============*/
@@ -43,7 +42,6 @@ Wakes up the buffer pool dump/load thread and instructs it to start
a load. This function is called by MySQL code via buffer_pool_load_now()
and it should return immediately because the whole MySQL is frozen during
its execution. */
-UNIV_INTERN
void
buf_load_start();
/*============*/
@@ -52,7 +50,6 @@ buf_load_start();
Aborts a currently running buffer pool load. This function is called by
MySQL code via buffer_pool_load_abort() and it should return immediately
because the whole MySQL is frozen during its execution. */
-UNIV_INTERN
void
buf_load_abort();
/*============*/
@@ -62,7 +59,7 @@ This is the main thread for buffer pool dump/load. It waits for an
event and when waked up either performs a dump or load and sleeps
again.
@return this function does not return, it calls os_thread_exit() */
-extern "C" UNIV_INTERN
+extern "C"
os_thread_ret_t
DECLARE_THREAD(buf_dump_thread)(
/*============================*/
diff --git a/storage/innobase/include/buf0flu.h b/storage/innobase/include/buf0flu.h
index ef01d31524b..c7f5d410099 100644
--- a/storage/innobase/include/buf0flu.h
+++ b/storage/innobase/include/buf0flu.h
@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2014, 2016, MariaDB Corporation
+Copyright (c) 2014, 2018, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -27,19 +27,25 @@ Created 11/5/1995 Heikki Tuuri
#ifndef buf0flu_h
#define buf0flu_h
-#include "univ.i"
#include "ut0byte.h"
#include "log0log.h"
-#ifndef UNIV_HOTBACKUP
-#include "mtr0types.h"
#include "buf0types.h"
/** Flag indicating if the page_cleaner is in active state. */
extern bool buf_page_cleaner_is_active;
+#ifdef UNIV_DEBUG
+
+/** Value of MySQL global variable used to disable page cleaner. */
+extern my_bool innodb_page_cleaner_disabled_debug;
+
+#endif /* UNIV_DEBUG */
+
/** Event to synchronise with the flushing. */
extern os_event_t buf_flush_event;
+class ut_stage_alter_t;
+
/** Handled page counters for a single flush */
struct flush_counters_t {
ulint flushed; /*!< number of dirty pages flushed */
@@ -50,7 +56,6 @@ struct flush_counters_t {
/********************************************************************//**
Remove a block from the flush list of modified blocks. */
-UNIV_INTERN
void
buf_flush_remove(
/*=============*/
@@ -59,31 +64,27 @@ buf_flush_remove(
Relocates a buffer control block on the flush_list.
Note that it is assumed that the contents of bpage has already been
copied to dpage. */
-UNIV_INTERN
void
buf_flush_relocate_on_flush_list(
/*=============================*/
buf_page_t* bpage, /*!< in/out: control block being moved */
buf_page_t* dpage); /*!< in/out: destination block */
-/********************************************************************//**
-Updates the flush system data structures when a write is completed. */
-UNIV_INTERN
-void
-buf_flush_write_complete(
-/*=====================*/
- buf_page_t* bpage); /*!< in: pointer to the block in question */
-#endif /* !UNIV_HOTBACKUP */
-/********************************************************************//**
-Initializes a page for writing to the tablespace. */
-UNIV_INTERN
+/** Update the flush system data structures when a write is completed.
+@param[in,out] bpage flushed page
+@param[in] dblwr whether the doublewrite buffer was used */
+void buf_flush_write_complete(buf_page_t* bpage, bool dblwr);
+/** Initialize a page for writing to the tablespace.
+@param[in] block buffer block; NULL if bypassing the buffer pool
+@param[in,out] page page frame
+@param[in,out] page_zip_ compressed page, or NULL if uncompressed
+@param[in] newest_lsn newest modification LSN to the page */
void
buf_flush_init_for_writing(
-/*=======================*/
- byte* page, /*!< in/out: page */
- void* page_zip_, /*!< in/out: compressed page, or NULL */
- lsn_t newest_lsn); /*!< in: newest modification lsn
- to the page */
-#ifndef UNIV_HOTBACKUP
+ const buf_block_t* block,
+ byte* page,
+ void* page_zip_,
+ lsn_t newest_lsn);
+
# if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG
/********************************************************************//**
Writes a flushable page asynchronously from the buffer pool to a file.
@@ -91,36 +92,53 @@ NOTE: buf_pool->mutex and block->mutex must be held upon entering this
function, and they will be released by this function after flushing.
This is loosely based on buf_flush_batch() and buf_flush_page().
@return TRUE if the page was flushed and the mutexes released */
-UNIV_INTERN
ibool
buf_flush_page_try(
/*===============*/
buf_pool_t* buf_pool, /*!< in/out: buffer pool instance */
buf_block_t* block) /*!< in/out: buffer control block */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
+ MY_ATTRIBUTE((warn_unused_result));
# endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */
-/*******************************************************************//**
-This utility flushes dirty blocks from the end of the flush list of
-all buffer pool instances.
+/** Do flushing batch of a given type.
+NOTE: The calling thread is not allowed to own any latches on pages!
+@param[in,out] buf_pool buffer pool instance
+@param[in] type flush type
+@param[in] min_n wished minimum mumber of blocks flushed
+(it is not guaranteed that the actual number is that big, though)
+@param[in] lsn_limit in the case BUF_FLUSH_LIST all blocks whose
+oldest_modification is smaller than this should be flushed (if their number
+does not exceed min_n), otherwise ignored
+@param[out] n the number of pages which were processed is
+passed back to caller. Ignored if NULL
+@retval true if a batch was queued successfully.
+@retval false if another batch of same type was already running. */
+bool
+buf_flush_do_batch(
+ buf_pool_t* buf_pool,
+ buf_flush_t type,
+ ulint min_n,
+ lsn_t lsn_limit,
+ flush_counters_t* n);
+
+/** This utility flushes dirty blocks from the end of the flush list of all
+buffer pool instances.
NOTE: The calling thread is not allowed to own any latches on pages!
+@param[in] min_n wished minimum mumber of blocks flushed (it is
+not guaranteed that the actual number is that big, though)
+@param[in] lsn_limit in the case BUF_FLUSH_LIST all blocks whose
+oldest_modification is smaller than this should be flushed (if their number
+does not exceed min_n), otherwise ignored
+@param[out] n_processed the number of pages which were processed is
+passed back to caller. Ignored if NULL.
@return true if a batch was queued successfully for each buffer pool
instance. false if another batch of same type was already running in
at least one of the buffer pool instance */
-UNIV_INTERN
bool
-buf_flush_list(
-/*===========*/
- ulint min_n, /*!< in: wished minimum mumber of blocks
- flushed (it is not guaranteed that the
- actual number is that big, though) */
- lsn_t lsn_limit, /*!< in the case BUF_FLUSH_LIST all
- blocks whose oldest_modification is
- smaller than this should be flushed
- (if their number does not exceed
- min_n), otherwise ignored */
- ulint* n_processed); /*!< out: the number of pages
- which were processed is passed
- back to caller. Ignored if NULL */
+buf_flush_lists(
+ ulint min_n,
+ lsn_t lsn_limit,
+ ulint* n_processed);
+
/******************************************************************//**
This function picks up a single page from the tail of the LRU
list, flushes it (if it is dirty), removes it from page_hash and LRU
@@ -128,21 +146,27 @@ list and puts it on the free list. It is called from user threads when
they are unable to find a replaceable page at the tail of the LRU
list i.e.: when the background LRU flushing in the page_cleaner thread
is not fast enough to keep pace with the workload.
-@return TRUE if success. */
-UNIV_INTERN
-ibool
+@return true if success. */
+bool
buf_flush_single_page_from_LRU(
/*===========================*/
buf_pool_t* buf_pool); /*!< in/out: buffer pool instance */
/******************************************************************//**
Waits until a flush batch of the given type ends */
-UNIV_INTERN
void
buf_flush_wait_batch_end(
/*=====================*/
buf_pool_t* buf_pool, /*!< in: buffer pool instance */
buf_flush_t type); /*!< in: BUF_FLUSH_LRU
or BUF_FLUSH_LIST */
+/**
+Waits until a flush batch of the given lsn ends
+@param[in] new_oldest target oldest_modified_lsn to wait for */
+
+void
+buf_flush_wait_flushed(
+ lsn_t new_oldest);
+
/********************************************************************//**
This function should be called at a mini-transaction commit, if a page was
modified in it. Puts the block to the list of modified blocks, if it not
@@ -151,8 +175,13 @@ UNIV_INLINE
void
buf_flush_note_modification(
/*========================*/
- buf_block_t* block, /*!< in: block which is modified */
- mtr_t* mtr); /*!< in: mtr */
+ buf_block_t* block, /*!< in: block which is modified */
+ lsn_t start_lsn, /*!< in: start lsn of the first mtr in a
+ set of mtr's */
+ lsn_t end_lsn, /*!< in: end lsn of the last mtr in the
+ set of mtr's */
+ FlushObserver* observer); /*!< in: flush observer */
+
/********************************************************************//**
This function should be called when recovery has modified a buffer page. */
UNIV_INLINE
@@ -167,46 +196,59 @@ buf_flush_recv_note_modification(
/********************************************************************//**
Returns TRUE if the file page block is immediately suitable for replacement,
i.e., transition FILE_PAGE => NOT_USED allowed.
-@return TRUE if can replace immediately */
-UNIV_INTERN
+@return TRUE if can replace immediately */
ibool
buf_flush_ready_for_replace(
/*========================*/
buf_page_t* bpage); /*!< in: buffer control block, must be
buf_page_in_file(bpage) and in the LRU list */
+
+#ifdef UNIV_DEBUG
+/** Disables page cleaner threads (coordinator and workers).
+It's used by: SET GLOBAL innodb_page_cleaner_disabled_debug = 1 (0).
+@param[in] thd thread handle
+@param[in] var pointer to system variable
+@param[out] var_ptr where the formal string goes
+@param[in] save immediate result from check function */
+void
+buf_flush_page_cleaner_disabled_debug_update(
+ THD* thd,
+ struct st_mysql_sys_var* var,
+ void* var_ptr,
+ const void* save);
+#endif /* UNIV_DEBUG */
+
/******************************************************************//**
page_cleaner thread tasked with flushing dirty pages from the buffer
-pools. As of now we'll have only one instance of this thread.
+pools. As of now we'll have only one coordinator of this thread.
+@return a dummy parameter */
+extern "C"
+os_thread_ret_t
+DECLARE_THREAD(buf_flush_page_cleaner_coordinator)(
+/*===============================================*/
+ void* arg); /*!< in: a dummy parameter required by
+ os_thread_create */
+/******************************************************************//**
+Worker thread of page_cleaner.
@return a dummy parameter */
-extern "C" UNIV_INTERN
+extern "C"
os_thread_ret_t
-DECLARE_THREAD(buf_flush_page_cleaner_thread)(
+DECLARE_THREAD(buf_flush_page_cleaner_worker)(
/*==========================================*/
void* arg); /*!< in: a dummy parameter required by
os_thread_create */
-/*********************************************************************//**
-Clears up tail of the LRU lists:
-* Put replaceable pages at the tail of LRU to the free list
-* Flush dirty pages at the tail of LRU to the disk
-The depth to which we scan each buffer pool is controlled by dynamic
-config parameter innodb_LRU_scan_depth.
-@return total pages flushed */
-UNIV_INTERN
-ulint
-buf_flush_LRU_tail(void);
-/*====================*/
-/*********************************************************************//**
-Wait for any possible LRU flushes that are in progress to end. */
-UNIV_INTERN
+/** Initialize page_cleaner. */
+void
+buf_flush_page_cleaner_init(void);
+
+/** Wait for any possible LRU flushes that are in progress to end. */
void
buf_flush_wait_LRU_batch_end(void);
-/*==============================*/
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
/******************************************************************//**
Validates the flush list.
-@return TRUE if ok */
-UNIV_INTERN
+@return TRUE if ok */
ibool
buf_flush_validate(
/*===============*/
@@ -217,14 +259,12 @@ buf_flush_validate(
Initialize the red-black tree to speed up insertions into the flush_list
during recovery process. Should be called at the start of recovery
process before any page has been read/written. */
-UNIV_INTERN
void
buf_flush_init_flush_rbt(void);
/*==========================*/
/********************************************************************//**
Frees up the red-black tree. */
-UNIV_INTERN
void
buf_flush_free_flush_rbt(void);
/*==========================*/
@@ -235,10 +275,9 @@ NOTE: in simulated aio we must call
os_aio_simulated_wake_handler_threads after we have posted a batch of
writes! NOTE: buf_pool->mutex and buf_page_get_mutex(bpage) must be
held upon entering this function, and they will be released by this
-function if it returns true.
-@return TRUE if the page was flushed */
-UNIV_INTERN
-bool
+function.
+@return TRUE if page was flushed */
+ibool
buf_flush_page(
/*===========*/
buf_pool_t* buf_pool, /*!< in: buffer pool instance */
@@ -247,8 +286,7 @@ buf_flush_page(
bool sync); /*!< in: true if sync IO request */
/********************************************************************//**
Returns true if the block is modified and ready for flushing.
-@return true if can flush immediately */
-UNIV_INTERN
+@return true if can flush immediately */
bool
buf_flush_ready_for_flush(
/*======================*/
@@ -257,28 +295,112 @@ buf_flush_ready_for_flush(
buf_flush_t flush_type)/*!< in: type of flush */
MY_ATTRIBUTE((warn_unused_result));
-#ifdef UNIV_DEBUG
/******************************************************************//**
Check if there are any dirty pages that belong to a space id in the flush
list in a particular buffer pool.
-@return number of dirty pages present in a single buffer pool */
-UNIV_INTERN
+@return number of dirty pages present in a single buffer pool */
ulint
buf_pool_get_dirty_pages_count(
/*===========================*/
buf_pool_t* buf_pool, /*!< in: buffer pool */
- ulint id); /*!< in: space id to check */
-/******************************************************************//**
-Check if there are any dirty pages that belong to a space id in the flush list.
-@return count of dirty pages present in all the buffer pools */
-UNIV_INTERN
-ulint
-buf_flush_get_dirty_pages_count(
-/*============================*/
- ulint id); /*!< in: space id to check */
-#endif /* UNIV_DEBUG */
+ ulint id, /*!< in: space id to check */
+ FlushObserver* observer); /*!< in: flush observer to check */
+
+/*******************************************************************//**
+Synchronously flush dirty blocks from the end of the flush list of all buffer
+pool instances.
+NOTE: The calling thread is not allowed to own any latches on pages! */
+void
+buf_flush_sync_all_buf_pools(void);
+/*==============================*/
+
+/** Request IO burst and wake page_cleaner up.
+@param[in] lsn_limit upper limit of LSN to be flushed */
+void
+buf_flush_request_force(
+ lsn_t lsn_limit);
+
+/** We use FlushObserver to track flushing of non-redo logged pages in bulk
+create index(BtrBulk.cc).Since we disable redo logging during a index build,
+we need to make sure that all dirty pages modifed by the index build are
+flushed to disk before any redo logged operations go to the index. */
+
+class FlushObserver {
+public:
+ /** Constructor
+ @param[in] space_id table space id
+ @param[in] trx trx instance
+ @param[in] stage performance schema accounting object,
+ used by ALTER TABLE. It is passed to log_preflush_pool_modified_pages()
+ for accounting. */
+ FlushObserver(ulint space_id, trx_t* trx, ut_stage_alter_t* stage);
+
+ /** Deconstructor */
+ ~FlushObserver();
+
+ /** Check pages have been flushed and removed from the flush list
+ in a buffer pool instance.
+ @param[in] instance_no buffer pool instance no
+ @return true if the pages were removed from the flush list */
+ bool is_complete(ulint instance_no)
+ {
+ return(m_flushed->at(instance_no) == m_removed->at(instance_no)
+ || m_interrupted);
+ }
+
+ /** @return whether to flush only some pages of the tablespace */
+ bool is_partial_flush() const { return m_stage != NULL; }
+
+ /** @return whether the operation was interrupted */
+ bool is_interrupted() const { return m_interrupted; }
+
+ /** Interrupt observer not to wait. */
+ void interrupted()
+ {
+ m_interrupted = true;
+ }
-#endif /* !UNIV_HOTBACKUP */
+ /** Check whether the operation has been interrupted */
+ void check_interrupted();
+
+ /** Flush dirty pages. */
+ void flush();
+ /** Notify observer of flushing a page
+ @param[in] buf_pool buffer pool instance
+ @param[in] bpage buffer page to flush */
+ void notify_flush(
+ buf_pool_t* buf_pool,
+ buf_page_t* bpage);
+
+ /** Notify observer of removing a page from flush list
+ @param[in] buf_pool buffer pool instance
+ @param[in] bpage buffer page flushed */
+ void notify_remove(
+ buf_pool_t* buf_pool,
+ buf_page_t* bpage);
+private:
+ /** Table space id */
+ const ulint m_space_id;
+
+ /** Trx instance */
+ const trx_t* const m_trx;
+
+ /** Performance schema accounting object, used by ALTER TABLE.
+ If not NULL, then stage->begin_phase_flush() will be called initially,
+ specifying the number of pages to be attempted to be flushed and
+ subsequently, stage->inc() will be called for each page we attempt to
+ flush. */
+ ut_stage_alter_t* m_stage;
+
+ /* Flush request sent */
+ std::vector<ulint>* m_flushed;
+
+ /* Flush request finished */
+ std::vector<ulint>* m_removed;
+
+ /* True if the operation was interrupted. */
+ bool m_interrupted;
+};
/******************************************************************//**
Start a buffer flush batch for LRU or flush list */
@@ -331,8 +453,6 @@ buf_flush_batch(
counts */
-#ifndef UNIV_NONINL
#include "buf0flu.ic"
-#endif
#endif
diff --git a/storage/innobase/include/buf0flu.ic b/storage/innobase/include/buf0flu.ic
index dd049daa726..8d06a53c547 100644
--- a/storage/innobase/include/buf0flu.ic
+++ b/storage/innobase/include/buf0flu.ic
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1995, 2009, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1995, 2015, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -23,25 +23,24 @@ The database buffer pool flush algorithm
Created 11/5/1995 Heikki Tuuri
*******************************************************/
-#ifndef UNIV_HOTBACKUP
#include "buf0buf.h"
#include "mtr0mtr.h"
#include "srv0srv.h"
+#include "fsp0types.h"
/********************************************************************//**
Inserts a modified block into the flush list. */
-UNIV_INTERN
void
buf_flush_insert_into_flush_list(
/*=============================*/
buf_pool_t* buf_pool, /*!< buffer pool instance */
buf_block_t* block, /*!< in/out: block which is modified */
lsn_t lsn); /*!< in: oldest modification */
+
/********************************************************************//**
Inserts a modified block into the flush list in the right sorted position.
This function is used by recovery, because there the modifications do not
necessarily come in the order of lsn's. */
-UNIV_INTERN
void
buf_flush_insert_sorted_into_flush_list(
/*====================================*/
@@ -57,40 +56,49 @@ UNIV_INLINE
void
buf_flush_note_modification(
/*========================*/
- buf_block_t* block, /*!< in: block which is modified */
- mtr_t* mtr) /*!< in: mtr */
+ buf_block_t* block, /*!< in: block which is modified */
+ lsn_t start_lsn, /*!< in: start lsn of the mtr that
+ modified this block */
+ lsn_t end_lsn, /*!< in: end lsn of the mtr that
+ modified this block */
+ FlushObserver* observer) /*!< in: flush observer */
{
- buf_pool_t* buf_pool = buf_pool_from_block(block);
-
- ut_ad(!srv_read_only_mode);
- ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
- ut_ad(block->page.buf_fix_count > 0);
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(rw_lock_own(&(block->lock), RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
+#ifdef UNIV_DEBUG
+ {
+ /* Allow write to proceed to shared temporary tablespace
+ in read-only mode. */
+ ut_ad(!srv_read_only_mode
+ || fsp_is_system_temporary(block->page.id.space()));
+ ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
+ ut_ad(block->page.buf_fix_count > 0);
+
+ buf_pool_t* buf_pool = buf_pool_from_block(block);
+
+ ut_ad(!buf_pool_mutex_own(buf_pool));
+ ut_ad(!buf_flush_list_mutex_own(buf_pool));
+ }
+#endif /* UNIV_DEBUG */
- ut_ad(!buf_pool_mutex_own(buf_pool));
- ut_ad(!buf_flush_list_mutex_own(buf_pool));
- ut_ad(!mtr->made_dirty || log_flush_order_mutex_own());
+ mutex_enter(&block->mutex);
- ut_ad(mtr->start_lsn != 0);
- ut_ad(mtr->modifications);
+ ut_ad(block->page.newest_modification <= end_lsn);
+ block->page.newest_modification = end_lsn;
- mutex_enter(&block->mutex);
- ut_ad(block->page.newest_modification <= mtr->end_lsn);
+ /* Don't allow to set flush observer from non-null to null,
+ or from one observer to another. */
+ ut_ad(block->page.flush_observer == NULL
+ || block->page.flush_observer == observer);
+ block->page.flush_observer = observer;
- block->page.newest_modification = mtr->end_lsn;
+ if (block->page.oldest_modification == 0) {
+ buf_pool_t* buf_pool = buf_pool_from_block(block);
- if (!block->page.oldest_modification) {
- ut_a(mtr->made_dirty);
- ut_ad(log_flush_order_mutex_own());
- buf_flush_insert_into_flush_list(
- buf_pool, block, mtr->start_lsn);
+ buf_flush_insert_into_flush_list(buf_pool, block, start_lsn);
} else {
- ut_ad(block->page.oldest_modification <= mtr->start_lsn);
+ ut_ad(block->page.oldest_modification <= start_lsn);
}
- mutex_exit(&block->mutex);
+ buf_page_mutex_exit(block);
srv_stats.buf_pool_write_requests.inc();
}
@@ -107,33 +115,35 @@ buf_flush_recv_note_modification(
lsn_t end_lsn) /*!< in: end lsn of the last mtr in the
set of mtr's */
{
- buf_pool_t* buf_pool = buf_pool_from_block(block);
+#ifdef UNIV_DEBUG
+ {
+ ut_ad(!srv_read_only_mode);
+ ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
+ ut_ad(block->page.buf_fix_count > 0);
- ut_ad(!srv_read_only_mode);
- ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
- ut_ad(block->page.buf_fix_count > 0);
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(rw_lock_own(&(block->lock), RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
+ buf_pool_t* buf_pool = buf_pool_from_block(block);
- ut_ad(!buf_pool_mutex_own(buf_pool));
- ut_ad(!buf_flush_list_mutex_own(buf_pool));
- ut_ad(log_flush_order_mutex_own());
+ ut_ad(!buf_pool_mutex_own(buf_pool));
+ ut_ad(!buf_flush_list_mutex_own(buf_pool));
- ut_ad(start_lsn != 0);
- ut_ad(block->page.newest_modification <= end_lsn);
+ ut_ad(start_lsn != 0);
+ ut_ad(block->page.newest_modification <= end_lsn);
+ }
+#endif /* UNIV_DEBUG */
+
+ buf_page_mutex_enter(block);
- mutex_enter(&block->mutex);
block->page.newest_modification = end_lsn;
if (!block->page.oldest_modification) {
+ buf_pool_t* buf_pool = buf_pool_from_block(block);
+
buf_flush_insert_sorted_into_flush_list(
buf_pool, block, start_lsn);
} else {
ut_ad(block->page.oldest_modification <= start_lsn);
}
- mutex_exit(&block->mutex);
+ buf_page_mutex_exit(block);
}
-#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innobase/include/buf0lru.h b/storage/innobase/include/buf0lru.h
index 09100b35e68..1efbb1f03ef 100644
--- a/storage/innobase/include/buf0lru.h
+++ b/storage/innobase/include/buf0lru.h
@@ -27,21 +27,17 @@ Created 11/5/1995 Heikki Tuuri
#ifndef buf0lru_h
#define buf0lru_h
-#include "univ.i"
-#ifndef UNIV_HOTBACKUP
#include "ut0byte.h"
#include "buf0types.h"
// Forward declaration
struct trx_t;
-struct dict_table_t;
/******************************************************************//**
Returns TRUE if less than 25 % of the buffer pool is available. This can be
used in heuristics to prevent huge transactions eating up the whole buffer
pool for their locks.
-@return TRUE if less than 25 % of buffer pool left */
-UNIV_INTERN
+@return TRUE if less than 25 % of buffer pool left */
ibool
buf_LRU_buf_pool_running_out(void);
/*==============================*/
@@ -53,22 +49,28 @@ These are low-level functions
/** Minimum LRU list length for which the LRU_old pointer is defined */
#define BUF_LRU_OLD_MIN_LEN 512 /* 8 megabytes of 16k pages */
+#ifdef BTR_CUR_HASH_ADAPT
+struct dict_table_t;
/** Try to drop the adaptive hash index for a tablespace.
@param[in,out] table table
@return whether anything was dropped */
-UNIV_INTERN bool buf_LRU_drop_page_hash_for_tablespace(dict_table_t* table)
+bool buf_LRU_drop_page_hash_for_tablespace(dict_table_t* table)
MY_ATTRIBUTE((warn_unused_result,nonnull));
+#else
+# define buf_LRU_drop_page_hash_for_tablespace(table)
+#endif /* BTR_CUR_HASH_ADAPT */
/** Empty the flush list for all pages belonging to a tablespace.
@param[in] id tablespace identifier
-@param[in] trx transaction, for checking for user interrupt;
- or NULL if nothing is to be written */
-UNIV_INTERN void buf_LRU_flush_or_remove_pages(ulint id, const trx_t* trx);
+@param[in,out] observer flush observer,
+ or NULL if nothing is to be written
+@param[in] first first page to be flushed or evicted */
+void buf_LRU_flush_or_remove_pages(ulint id, FlushObserver* observer,
+ ulint first = 0);
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
/********************************************************************//**
Insert a compressed block into buf_pool->zip_clean in the LRU order. */
-UNIV_INTERN
void
buf_LRU_insert_zip_clean(
/*=====================*/
@@ -86,7 +88,6 @@ accessible via bpage.
The caller must hold buf_pool->mutex and must not hold any
buf_page_get_mutex() when calling this function.
@return true if freed, false otherwise. */
-UNIV_INTERN
bool
buf_LRU_free_page(
/*==============*/
@@ -96,21 +97,19 @@ buf_LRU_free_page(
MY_ATTRIBUTE((nonnull));
/******************************************************************//**
Try to free a replaceable block.
-@return TRUE if found and freed */
-UNIV_INTERN
-ibool
+@return true if found and freed */
+bool
buf_LRU_scan_and_free_block(
/*========================*/
buf_pool_t* buf_pool, /*!< in: buffer pool instance */
- ibool scan_all) /*!< in: scan whole LRU list
- if TRUE, otherwise scan only
+ bool scan_all) /*!< in: scan whole LRU list
+ if true, otherwise scan only
'old' blocks. */
MY_ATTRIBUTE((nonnull,warn_unused_result));
/******************************************************************//**
Returns a free block from the buf_pool. The block is taken off the
free list. If it is empty, returns NULL.
-@return a free control block, or NULL if the buf_block->free list is empty */
-UNIV_INTERN
+@return a free control block, or NULL if the buf_block->free list is empty */
buf_block_t*
buf_LRU_get_free_only(
/*==================*/
@@ -138,8 +137,7 @@ we put it to free list to be used.
* scan LRU list even if buf_pool->try_LRU_scan is not set
* iteration > 1:
* same as iteration 1 but sleep 10ms
-@return the free control block, in state BUF_BLOCK_READY_FOR_USE */
-UNIV_INTERN
+@return the free control block, in state BUF_BLOCK_READY_FOR_USE */
buf_block_t*
buf_LRU_get_free_block(
/*===================*/
@@ -148,25 +146,21 @@ buf_LRU_get_free_block(
/******************************************************************//**
Determines if the unzip_LRU list should be used for evicting a victim
instead of the general LRU list.
-@return TRUE if should use unzip_LRU */
-UNIV_INTERN
+@return TRUE if should use unzip_LRU */
ibool
buf_LRU_evict_from_unzip_LRU(
/*=========================*/
buf_pool_t* buf_pool);
/******************************************************************//**
Puts a block back to the free list. */
-UNIV_INTERN
void
buf_LRU_block_free_non_file_page(
/*=============================*/
buf_block_t* block); /*!< in: block, must not contain a file page */
/******************************************************************//**
-Adds a block to the LRU list. Please make sure that the zip_size is
-already set into the page zip when invoking the function, so that we
-can get correct zip_size from the buffer page when adding a block
-into LRU */
-UNIV_INTERN
+Adds a block to the LRU list. Please make sure that the page_size is
+already set when invoking the function, so that we can get correct
+page_size from the buffer page when adding a block into LRU */
void
buf_LRU_add_block(
/*==============*/
@@ -177,7 +171,6 @@ buf_LRU_add_block(
the start regardless of this parameter */
/******************************************************************//**
Adds a block to the LRU list of decompressed zip pages. */
-UNIV_INTERN
void
buf_unzip_LRU_add_block(
/*====================*/
@@ -186,22 +179,13 @@ buf_unzip_LRU_add_block(
of the list, else put to the start */
/******************************************************************//**
Moves a block to the start of the LRU list. */
-UNIV_INTERN
void
buf_LRU_make_block_young(
/*=====================*/
buf_page_t* bpage); /*!< in: control block */
-/******************************************************************//**
-Moves a block to the end of the LRU list. */
-UNIV_INTERN
-void
-buf_LRU_make_block_old(
-/*===================*/
- buf_page_t* bpage); /*!< in: control block */
/**********************************************************************//**
Updates buf_pool->LRU_old_ratio.
-@return updated old_pct */
-UNIV_INTERN
+@return updated old_pct */
uint
buf_LRU_old_ratio_update(
/*=====================*/
@@ -213,25 +197,20 @@ buf_LRU_old_ratio_update(
/********************************************************************//**
Update the historical stats that we are collecting for LRU eviction
policy at the end of each interval. */
-UNIV_INTERN
void
buf_LRU_stat_update(void);
/*=====================*/
-/******************************************************************//**
-Remove one page from LRU list and put it to free list */
-UNIV_INTERN
-void
-buf_LRU_free_one_page(
-/*==================*/
- buf_page_t* bpage) /*!< in/out: block, must contain a file page and
- be in a state where it can be freed; there
- may or may not be a hash index to the page */
+/** Remove one page from LRU list and put it to free list.
+@param[in,out] bpage block, must contain a file page and be in
+ a freeable state; there may or may not be a
+ hash index to the page
+@param[in] old_page_id page number before bpage->id was invalidated */
+void buf_LRU_free_one_page(buf_page_t* bpage, page_id_t old_page_id)
MY_ATTRIBUTE((nonnull));
/******************************************************************//**
Adjust LRU hazard pointers if needed. */
-
void
buf_LRU_adjust_hp(
/*==============*/
@@ -241,8 +220,7 @@ buf_LRU_adjust_hp(
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
/**********************************************************************//**
Validates the LRU list.
-@return TRUE */
-UNIV_INTERN
+@return TRUE */
ibool
buf_LRU_validate(void);
/*==================*/
@@ -250,7 +228,6 @@ buf_LRU_validate(void);
#if defined UNIV_DEBUG_PRINT || defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
/**********************************************************************//**
Prints the LRU list. */
-UNIV_INTERN
void
buf_LRU_print(void);
/*===============*/
@@ -308,10 +285,4 @@ Increments the I/O counter in buf_LRU_stat_cur. */
Increments the page_zip_decompress() counter in buf_LRU_stat_cur. */
#define buf_LRU_stat_inc_unzip() buf_LRU_stat_cur.unzip++
-#ifndef UNIV_NONINL
-#include "buf0lru.ic"
-#endif
-
-#endif /* !UNIV_HOTBACKUP */
-
#endif
diff --git a/storage/innobase/include/buf0lru.ic b/storage/innobase/include/buf0lru.ic
deleted file mode 100644
index b39fc06e96c..00000000000
--- a/storage/innobase/include/buf0lru.ic
+++ /dev/null
@@ -1,25 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1995, 2009, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/buf0lru.ic
-The database buffer replacement algorithm
-
-Created 11/5/1995 Heikki Tuuri
-*******************************************************/
-
diff --git a/storage/innobase/include/buf0rea.h b/storage/innobase/include/buf0rea.h
index 5ca9ea478e0..e590d818334 100644
--- a/storage/innobase/include/buf0rea.h
+++ b/storage/innobase/include/buf0rea.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1995, 2013, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1995, 2015, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2015, 2017, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
@@ -27,45 +27,39 @@ Created 11/5/1995 Heikki Tuuri
#ifndef buf0rea_h
#define buf0rea_h
-#include "univ.i"
-#include "buf0types.h"
+#include "buf0buf.h"
-/********************************************************************//**
-High-level function which reads a page asynchronously from a file to the
+/** High-level function which reads a page asynchronously from a file to the
buffer buf_pool if it is not already there. Sets the io_fix flag and sets
an exclusive lock on the buffer frame. The flag is cleared and the x-lock
released by the i/o-handler thread.
-
-@param[in] space space_id
-@param[in] zip_size compressed page size in bytes, or 0
-@param[in] offset page number
-@return DB_SUCCESS if page has been read and is not corrupted,
+@param[in] page_id page id
+@param[in] page_size page size
+@retval DB_SUCCESS if the page was read and is not corrupted,
@retval DB_PAGE_CORRUPTED if page based on checksum check is corrupted,
@retval DB_DECRYPTION_FAILED if page post encryption checksum matches but
after decryption normal page checksum does not match.
@retval DB_TABLESPACE_DELETED if tablespace .ibd file is missing */
-UNIV_INTERN
dberr_t
buf_read_page(
- ulint space,
- ulint zip_size,
- ulint offset);
+ const page_id_t page_id,
+ const page_size_t& page_size);
/********************************************************************//**
High-level function which reads a page asynchronously from a file to the
buffer buf_pool if it is not already there. Sets the io_fix flag and sets
an exclusive lock on the buffer frame. The flag is cleared and the x-lock
released by the i/o-handler thread.
-
-@param[in] space Tablespace id
-@param[in] offset Page number */
-UNIV_INTERN
+@param[in] page_id page id
+@param[in] page_size page size
+@param[in] sync true if synchronous aio is desired */
void
-buf_read_page_async(
- ulint space,
- ulint offset);
-/********************************************************************//**
-Applies a random read-ahead in buf_pool if there are at least a threshold
+buf_read_page_background(
+ const page_id_t page_id,
+ const page_size_t& page_size,
+ bool sync);
+
+/** Applies a random read-ahead in buf_pool if there are at least a threshold
value of accessed pages from the random read-ahead area. Does not read any
page, not even the one at the position (space, offset), if the read-ahead
mechanism is not activated. NOTE 1: the calling thread may own latches on
@@ -74,23 +68,20 @@ end up waiting for these latches! NOTE 2: the calling thread must want
access to the page given: this rule is set to prevent unintended read-aheads
performed by ibuf routines, a situation which could result in a deadlock if
the OS does not support asynchronous i/o.
+@param[in] page_id page id of a page which the current thread
+wants to access
+@param[in] page_size page size
+@param[in] inside_ibuf TRUE if we are inside ibuf routine
@return number of page read requests issued; NOTE that if we read ibuf
pages, it may happen that the page at the given page number does not
-get read even if we return a positive value!
-@return number of page read requests issued */
-UNIV_INTERN
+get read even if we return a positive value! */
ulint
buf_read_ahead_random(
-/*==================*/
- ulint space, /*!< in: space id */
- ulint zip_size, /*!< in: compressed page size in bytes,
- or 0 */
- ulint offset, /*!< in: page number of a page which
- the current thread wants to access */
- ibool inside_ibuf); /*!< in: TRUE if we are inside ibuf
- routine */
-/********************************************************************//**
-Applies linear read-ahead if in the buf_pool the page is a border page of
+ const page_id_t page_id,
+ const page_size_t& page_size,
+ ibool inside_ibuf);
+
+/** Applies linear read-ahead if in the buf_pool the page is a border page of
a linear read-ahead area and all the pages in the area have been accessed.
Does not read any page if the read-ahead mechanism is not activated. Note
that the algorithm looks at the 'natural' adjacent successor and
@@ -112,20 +103,20 @@ latches!
NOTE 3: the calling thread must want access to the page given: this rule is
set to prevent unintended read-aheads performed by ibuf routines, a situation
which could result in a deadlock if the OS does not support asynchronous io.
-@return number of page read requests issued */
-UNIV_INTERN
+@param[in] page_id page id; see NOTE 3 above
+@param[in] page_size page size
+@param[in] inside_ibuf TRUE if we are inside ibuf routine
+@return number of page read requests issued */
ulint
buf_read_ahead_linear(
-/*==================*/
- ulint space, /*!< in: space id */
- ulint zip_size, /*!< in: compressed page size in bytes, or 0 */
- ulint offset, /*!< in: page number; see NOTE 3 above */
- ibool inside_ibuf); /*!< in: TRUE if we are inside ibuf routine */
+ const page_id_t page_id,
+ const page_size_t& page_size,
+ ibool inside_ibuf);
+
/********************************************************************//**
Issues read requests for pages which the ibuf module wants to read in, in
order to contract the insert buffer tree. Technically, this function is like
a read-ahead function. */
-UNIV_INTERN
void
buf_read_ibuf_merge_pages(
/*======================*/
@@ -135,53 +126,37 @@ buf_read_ibuf_merge_pages(
to get read in, before this
function returns */
const ulint* space_ids, /*!< in: array of space ids */
- const ib_int64_t* space_versions,/*!< in: the spaces must have
- this version number
- (timestamp), otherwise we
- discard the read; we use this
- to cancel reads if DISCARD +
- IMPORT may have changed the
- tablespace size */
const ulint* page_nos, /*!< in: array of page numbers
to read, with the highest page
number the last in the
array */
ulint n_stored); /*!< in: number of elements
in the arrays */
-/********************************************************************//**
-Issues read requests for pages which recovery wants to read in. */
-UNIV_INTERN
+
+/** Issues read requests for pages which recovery wants to read in.
+@param[in] sync true if the caller wants this function to wait
+for the highest address page to get read in, before this function returns
+@param[in] space_id tablespace id
+@param[in] page_nos array of page numbers to read, with the
+highest page number the last in the array
+@param[in] n_stored number of page numbers in the array */
+
void
buf_read_recv_pages(
-/*================*/
- ibool sync, /*!< in: TRUE if the caller
- wants this function to wait
- for the highest address page
- to get read in, before this
- function returns */
- ulint space, /*!< in: space id */
- ulint zip_size, /*!< in: compressed page size in
- bytes, or 0 */
- const ulint* page_nos, /*!< in: array of page numbers
- to read, with the highest page
- number the last in the
- array */
- ulint n_stored); /*!< in: number of page numbers
- in the array */
+ bool sync,
+ ulint space_id,
+ const ulint* page_nos,
+ ulint n_stored);
/** The size in pages of the area which the read-ahead algorithms read if
invoked */
-#define BUF_READ_AHEAD_AREA(b) \
- ut_min(64, ut_2_power_up((b)->curr_size / 32))
+#define BUF_READ_AHEAD_AREA(b) ((b)->read_ahead_area)
/** @name Modes used in read-ahead @{ */
/** read only pages belonging to the insert buffer tree */
#define BUF_READ_IBUF_PAGES_ONLY 131
/** read any page */
#define BUF_READ_ANY_PAGE 132
-/** read any page, but ignore (return an error) if a page does not exist
-instead of crashing like BUF_READ_ANY_PAGE does */
-#define BUF_READ_IGNORE_NONEXISTENT_PAGES 1024
/* @} */
#endif
diff --git a/storage/innobase/include/buf0types.h b/storage/innobase/include/buf0types.h
index 8a7b5c6d9cb..27ffee03d4c 100644
--- a/storage/innobase/include/buf0types.h
+++ b/storage/innobase/include/buf0types.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1995, 2013, Oracle and/or its affiliates. All Rights Reserved
+Copyright (c) 1995, 2015, Oracle and/or its affiliates. All rights reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -26,12 +26,11 @@ Created 11/17/1995 Heikki Tuuri
#ifndef buf0types_h
#define buf0types_h
-#if defined(INNODB_PAGE_ATOMIC_REF_COUNT) && defined(HAVE_ATOMIC_BUILTINS)
-#define PAGE_ATOMIC_REF_COUNT
-#endif /* INNODB_PAGE_ATOMIC_REF_COUNT && HAVE_ATOMIC_BUILTINS */
+#include "os0event.h"
+#include "ut0ut.h"
/** Buffer page (uncompressed or compressed) */
-struct buf_page_t;
+class buf_page_t;
/** Buffer block for which an uncompressed page exists */
struct buf_block_t;
/** Buffer pool chunk comprising buf_block_t */
@@ -44,6 +43,8 @@ struct buf_pool_stat_t;
struct buf_buddy_stat_t;
/** Doublewrite memory struct */
struct buf_dblwr_t;
+/** Flush observer for bulk create index */
+class FlushObserver;
/** A buffer frame. @see page_t */
typedef byte buf_frame_t;
@@ -85,6 +86,24 @@ enum srv_checksum_algorithm_t {
when reading */
};
+inline
+bool
+is_checksum_strict(srv_checksum_algorithm_t algo)
+{
+ return(algo == SRV_CHECKSUM_ALGORITHM_STRICT_CRC32
+ || algo == SRV_CHECKSUM_ALGORITHM_STRICT_INNODB
+ || algo == SRV_CHECKSUM_ALGORITHM_STRICT_NONE);
+}
+
+inline
+bool
+is_checksum_strict(ulint algo)
+{
+ return(algo == SRV_CHECKSUM_ALGORITHM_STRICT_CRC32
+ || algo == SRV_CHECKSUM_ALGORITHM_STRICT_INNODB
+ || algo == SRV_CHECKSUM_ALGORITHM_STRICT_NONE);
+}
+
/** Parameters of binary buddy system for compressed pages (buf0buddy.h) */
/* @{ */
/** Zip shift value for the smallest page size */
@@ -106,4 +125,93 @@ this must be equal to UNIV_PAGE_SIZE */
#define BUF_BUDDY_HIGH (BUF_BUDDY_LOW << BUF_BUDDY_SIZES)
/* @} */
+/** Page identifier. */
+class page_id_t {
+public:
+
+ /** Constructor from (space, page_no).
+ @param[in] space tablespace id
+ @param[in] page_no page number */
+ page_id_t(ulint space, ulint page_no)
+ : m_space(uint32_t(space)), m_page_no(uint32(page_no))
+ {
+ ut_ad(space <= 0xFFFFFFFFU);
+ ut_ad(page_no <= 0xFFFFFFFFU);
+ }
+
+ bool operator==(const page_id_t& rhs) const
+ {
+ return m_space == rhs.m_space && m_page_no == rhs.m_page_no;
+ }
+ bool operator!=(const page_id_t& rhs) const { return !(*this == rhs); }
+
+ bool operator<(const page_id_t& rhs) const
+ {
+ if (m_space == rhs.m_space) {
+ return m_page_no < rhs.m_page_no;
+ }
+
+ return m_space < rhs.m_space;
+ }
+
+ /** Retrieve the tablespace id.
+ @return tablespace id */
+ uint32_t space() const { return m_space; }
+
+ /** Retrieve the page number.
+ @return page number */
+ uint32_t page_no() const { return m_page_no; }
+
+ /** Retrieve the fold value.
+ @return fold value */
+ ulint fold() const { return (m_space << 20) + m_space + m_page_no; }
+
+ /** Reset the page number only.
+ @param[in] page_no page number */
+ void set_page_no(ulint page_no)
+ {
+ m_page_no = uint32_t(page_no);
+
+ ut_ad(page_no <= 0xFFFFFFFFU);
+ }
+
+ /** Set the FIL_NULL for the space and page_no */
+ void set_corrupt_id()
+ {
+ m_space = m_page_no = ULINT32_UNDEFINED;
+ }
+
+private:
+
+ /** Tablespace id. */
+ uint32_t m_space;
+
+ /** Page number. */
+ uint32_t m_page_no;
+
+ /** Declare the overloaded global operator<< as a friend of this
+ class. Refer to the global declaration for further details. Print
+ the given page_id_t object.
+ @param[in,out] out the output stream
+ @param[in] page_id the page_id_t object to be printed
+ @return the output stream */
+ friend
+ std::ostream&
+ operator<<(
+ std::ostream& out,
+ const page_id_t page_id);
+};
+
+#ifndef UNIV_INNOCHECKSUM
+
+#include "ut0mutex.h"
+#include "sync0rw.h"
+
+typedef ib_bpmutex_t BPageMutex;
+typedef ib_mutex_t BufPoolMutex;
+typedef ib_mutex_t FlushListMutex;
+typedef BPageMutex BufPoolZipMutex;
+typedef rw_lock_t BPageLock;
+#endif /* !UNIV_INNOCHECKSUM */
+
#endif /* buf0types.h */
diff --git a/storage/innobase/include/data0data.h b/storage/innobase/include/data0data.h
index 2cd9f61baf4..fdf1a14feee 100644
--- a/storage/innobase/include/data0data.h
+++ b/storage/innobase/include/data0data.h
@@ -1,6 +1,7 @@
/*****************************************************************************
Copyright (c) 1994, 2016, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2017, 2019, 2020 MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -26,40 +27,25 @@ Created 5/30/1994 Heikki Tuuri
#ifndef data0data_h
#define data0data_h
-#include "univ.i"
-
#include "data0types.h"
#include "data0type.h"
#include "mem0mem.h"
#include "dict0types.h"
+#include "btr0types.h"
+#include <vector>
+
+#include <ostream>
/** Storage for overflow data in a big record, that is, a clustered
index record which needs external storage of data fields */
struct big_rec_t;
+struct upd_t;
+
+/** Dummy variable to catch access to uninitialized fields. In the
+debug version, dtuple_create() will make all fields of dtuple_t point
+to data_error. */
+ut_d(extern byte data_error);
-#ifdef UNIV_DEBUG
-/*********************************************************************//**
-Gets pointer to the type struct of SQL data field.
-@return pointer to the type struct */
-UNIV_INLINE
-dtype_t*
-dfield_get_type(
-/*============*/
- const dfield_t* field) /*!< in: SQL data field */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-/*********************************************************************//**
-Gets pointer to the data in a field.
-@return pointer to data */
-UNIV_INLINE
-void*
-dfield_get_data(
-/*============*/
- const dfield_t* field) /*!< in: field */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-#else /* UNIV_DEBUG */
-# define dfield_get_type(field) (&(field)->type)
-# define dfield_get_data(field) ((field)->data)
-#endif /* UNIV_DEBUG */
/*********************************************************************//**
Sets the type struct of SQL data field. */
UNIV_INLINE
@@ -67,17 +53,8 @@ void
dfield_set_type(
/*============*/
dfield_t* field, /*!< in: SQL data field */
- const dtype_t* type) /*!< in: pointer to data type struct */
- MY_ATTRIBUTE((nonnull));
-/*********************************************************************//**
-Gets length of field data.
-@return length of data; UNIV_SQL_NULL if SQL null data */
-UNIV_INLINE
-ulint
-dfield_get_len(
-/*===========*/
- const dfield_t* field) /*!< in: field */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
+ const dtype_t* type); /*!< in: pointer to data type struct */
+
/*********************************************************************//**
Sets length in a field. */
UNIV_INLINE
@@ -87,32 +64,23 @@ dfield_set_len(
dfield_t* field, /*!< in: field */
ulint len) /*!< in: length or UNIV_SQL_NULL */
MY_ATTRIBUTE((nonnull));
-/*********************************************************************//**
-Determines if a field is SQL NULL
-@return nonzero if SQL null data */
-UNIV_INLINE
-ulint
-dfield_is_null(
-/*===========*/
- const dfield_t* field) /*!< in: field */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-/*********************************************************************//**
-Determines if a field is externally stored
-@return nonzero if externally stored */
+
+/** Gets spatial status for "external storage"
+@param[in,out] field field */
UNIV_INLINE
-ulint
-dfield_is_ext(
-/*==========*/
- const dfield_t* field) /*!< in: field */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-/*********************************************************************//**
-Sets the "external storage" flag */
+spatial_status_t
+dfield_get_spatial_status(
+ const dfield_t* field);
+
+/** Sets spatial status for "external storage"
+@param[in,out] field field
+@param[in] spatial_status spatial status */
UNIV_INLINE
void
-dfield_set_ext(
-/*===========*/
- dfield_t* field) /*!< in/out: field */
- MY_ATTRIBUTE((nonnull));
+dfield_set_spatial_status(
+ dfield_t* field,
+ spatial_status_t spatial_status);
+
/*********************************************************************//**
Sets pointer to the data and length in a field. */
UNIV_INLINE
@@ -124,6 +92,15 @@ dfield_set_data(
ulint len) /*!< in: length or UNIV_SQL_NULL */
MY_ATTRIBUTE((nonnull(1)));
/*********************************************************************//**
+Sets pointer to the data and length in a field. */
+UNIV_INLINE
+void
+dfield_write_mbr(
+/*=============*/
+ dfield_t* field, /*!< in: field */
+ const double* mbr) /*!< in: data */
+ MY_ATTRIBUTE((nonnull(1)));
+/*********************************************************************//**
Sets a data field to SQL NULL. */
UNIV_INLINE
void
@@ -146,9 +123,9 @@ UNIV_INLINE
void
dfield_copy_data(
/*=============*/
- dfield_t* field1, /*!< out: field to copy to */
- const dfield_t* field2) /*!< in: field to copy from */
- MY_ATTRIBUTE((nonnull));
+ dfield_t* field1, /*!< out: field to copy to */
+ const dfield_t* field2); /*!< in: field to copy from */
+
/*********************************************************************//**
Copies a data field to another. */
UNIV_INLINE
@@ -167,12 +144,12 @@ dfield_dup(
dfield_t* field, /*!< in/out: data field */
mem_heap_t* heap) /*!< in: memory heap where allocated */
MY_ATTRIBUTE((nonnull));
-#ifndef UNIV_HOTBACKUP
+
/*********************************************************************//**
Tests if two data fields are equal.
If len==0, tests the data length and content for equality.
If len>0, tests the first len bytes of the content for equality.
-@return TRUE if both fields are NULL or if they are equal */
+@return TRUE if both fields are NULL or if they are equal */
UNIV_INLINE
ibool
dfield_datas_are_binary_equal(
@@ -184,7 +161,7 @@ dfield_datas_are_binary_equal(
MY_ATTRIBUTE((nonnull, warn_unused_result));
/*********************************************************************//**
Tests if dfield data length and content is equal to the given.
-@return TRUE if equal */
+@return TRUE if equal */
UNIV_INLINE
ibool
dfield_data_is_binary_equal(
@@ -192,33 +169,11 @@ dfield_data_is_binary_equal(
const dfield_t* field, /*!< in: field */
ulint len, /*!< in: data length or UNIV_SQL_NULL */
const byte* data) /*!< in: data */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-#endif /* !UNIV_HOTBACKUP */
-/*********************************************************************//**
-Gets number of fields in a data tuple.
-@return number of fields */
-UNIV_INLINE
-ulint
-dtuple_get_n_fields(
-/*================*/
- const dtuple_t* tuple) /*!< in: tuple */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-#ifdef UNIV_DEBUG
-/*********************************************************************//**
-Gets nth field of a tuple.
-@return nth field */
-UNIV_INLINE
-dfield_t*
-dtuple_get_nth_field(
-/*=================*/
- const dtuple_t* tuple, /*!< in: tuple */
- ulint n); /*!< in: index of field */
-#else /* UNIV_DEBUG */
-# define dtuple_get_nth_field(tuple, n) ((tuple)->fields + (n))
-#endif /* UNIV_DEBUG */
+ MY_ATTRIBUTE((nonnull(1), warn_unused_result));
+
/*********************************************************************//**
Gets info bits in a data tuple.
-@return info bits */
+@return info bits */
UNIV_INLINE
ulint
dtuple_get_info_bits(
@@ -236,7 +191,7 @@ dtuple_set_info_bits(
MY_ATTRIBUTE((nonnull));
/*********************************************************************//**
Gets number of fields used in record comparisons.
-@return number of fields used in comparisons in rem0cmp.* */
+@return number of fields used in comparisons in rem0cmp.* */
UNIV_INLINE
ulint
dtuple_get_n_fields_cmp(
@@ -259,25 +214,28 @@ creating a new dtuple_t object */
#define DTUPLE_EST_ALLOC(n_fields) \
(sizeof(dtuple_t) + (n_fields) * sizeof(dfield_t))
-/**********************************************************//**
-Creates a data tuple from an already allocated chunk of memory.
+/** Creates a data tuple from an already allocated chunk of memory.
The size of the chunk must be at least DTUPLE_EST_ALLOC(n_fields).
The default value for number of fields used in record comparisons
for this tuple is n_fields.
-@return created tuple (inside buf) */
+@param[in,out] buf buffer to use
+@param[in] buf_size buffer size
+@param[in] n_fields number of field
+@param[in] n_v_fields number of fields on virtual columns
+@return created tuple (inside buf) */
UNIV_INLINE
dtuple_t*
dtuple_create_from_mem(
-/*===================*/
- void* buf, /*!< in, out: buffer to use */
- ulint buf_size, /*!< in: buffer size */
- ulint n_fields) /*!< in: number of fields */
+ void* buf,
+ ulint buf_size,
+ ulint n_fields,
+ ulint n_v_fields)
MY_ATTRIBUTE((nonnull, warn_unused_result));
/**********************************************************//**
Creates a data tuple to a memory heap. The default value for number
of fields used in record comparisons for this tuple is n_fields.
-@return own: created tuple */
+@return own: created tuple */
UNIV_INLINE
dtuple_t*
dtuple_create(
@@ -288,20 +246,48 @@ dtuple_create(
ulint n_fields)/*!< in: number of fields */
MY_ATTRIBUTE((nonnull, malloc));
+/** Initialize the virtual field data in a dtuple_t
+@param[in,out] vrow dtuple contains the virtual fields */
+UNIV_INLINE void dtuple_init_v_fld(dtuple_t* vrow);
+
+/** Duplicate the virtual field data in a dtuple_t
+@param[in,out] vrow dtuple contains the virtual fields
+@param[in] heap heap memory to use */
+UNIV_INLINE void dtuple_dup_v_fld(dtuple_t* vrow, mem_heap_t* heap);
+
+/** Creates a data tuple with possible virtual columns to a memory heap.
+@param[in] heap memory heap where the tuple is created
+@param[in] n_fields number of fields
+@param[in] n_v_fields number of fields on virtual col
+@return own: created tuple */
+UNIV_INLINE
+dtuple_t*
+dtuple_create_with_vcol(
+ mem_heap_t* heap,
+ ulint n_fields,
+ ulint n_v_fields);
+
/*********************************************************************//**
Sets number of fields used in a tuple. Normally this is set in
dtuple_create, but if you want later to set it smaller, you can use this. */
-UNIV_INTERN
void
dtuple_set_n_fields(
/*================*/
dtuple_t* tuple, /*!< in: tuple */
ulint n_fields) /*!< in: number of fields */
MY_ATTRIBUTE((nonnull));
+/** Copies a data tuple's virtaul fields to another. This is a shallow copy;
+@param[in,out] d_tuple destination tuple
+@param[in] s_tuple source tuple */
+UNIV_INLINE
+void
+dtuple_copy_v_fields(
+ dtuple_t* d_tuple,
+ const dtuple_t* s_tuple);
/*********************************************************************//**
Copies a data tuple to another. This is a shallow copy; if a deep copy
is desired, dfield_dup() will have to be invoked on each field.
-@return own: copy of tuple */
+@return own: copy of tuple */
UNIV_INLINE
dtuple_t*
dtuple_copy(
@@ -313,7 +299,7 @@ dtuple_copy(
/**********************************************************//**
The following function returns the sum of data lengths of a tuple. The space
occupied by the field structs or the tuple struct is not counted.
-@return sum of data lens */
+@return sum of data lens */
UNIV_INLINE
ulint
dtuple_get_data_size(
@@ -323,37 +309,37 @@ dtuple_get_data_size(
MY_ATTRIBUTE((nonnull));
/*********************************************************************//**
Computes the number of externally stored fields in a data tuple.
-@return number of fields */
+@return number of fields */
UNIV_INLINE
ulint
dtuple_get_n_ext(
/*=============*/
const dtuple_t* tuple) /*!< in: tuple */
MY_ATTRIBUTE((nonnull));
-/************************************************************//**
-Compare two data tuples, respecting the collation of character fields.
-@return 1, 0 , -1 if tuple1 is greater, equal, less, respectively,
-than tuple2 */
-UNIV_INTERN
+/** Compare two data tuples.
+@param[in] tuple1 first data tuple
+@param[in] tuple2 second data tuple
+@return positive, 0, negative if tuple1 is greater, equal, less, than tuple2,
+respectively */
int
dtuple_coll_cmp(
-/*============*/
- const dtuple_t* tuple1, /*!< in: tuple 1 */
- const dtuple_t* tuple2) /*!< in: tuple 2 */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-/************************************************************//**
-Folds a prefix given as the number of fields of a tuple.
-@return the folded value */
+ const dtuple_t* tuple1,
+ const dtuple_t* tuple2)
+ MY_ATTRIBUTE((warn_unused_result));
+/** Fold a prefix given as the number of fields of a tuple.
+@param[in] tuple index record
+@param[in] n_fields number of complete fields to fold
+@param[in] n_bytes number of bytes to fold in the last field
+@param[in] index_id index tree ID
+@return the folded value */
UNIV_INLINE
ulint
dtuple_fold(
-/*========*/
- const dtuple_t* tuple, /*!< in: the tuple */
- ulint n_fields,/*!< in: number of complete fields to fold */
- ulint n_bytes,/*!< in: number of bytes to fold in an
- incomplete last field */
- index_id_t tree_id)/*!< in: index tree id */
- MY_ATTRIBUTE((nonnull, pure, warn_unused_result));
+ const dtuple_t* tuple,
+ ulint n_fields,
+ ulint n_bytes,
+ index_id_t tree_id)
+ MY_ATTRIBUTE((warn_unused_result));
/*******************************************************************//**
Sets types of fields binary in a tuple. */
UNIV_INLINE
@@ -365,7 +351,7 @@ dtuple_set_types_binary(
MY_ATTRIBUTE((nonnull));
/**********************************************************************//**
Checks if a dtuple contains an SQL null value.
-@return TRUE if some field is SQL null */
+@return TRUE if some field is SQL null */
UNIV_INLINE
ibool
dtuple_contains_null(
@@ -374,8 +360,7 @@ dtuple_contains_null(
MY_ATTRIBUTE((nonnull, warn_unused_result));
/**********************************************************//**
Checks that a data field is typed. Asserts an error if not.
-@return TRUE if ok */
-UNIV_INTERN
+@return TRUE if ok */
ibool
dfield_check_typed(
/*===============*/
@@ -383,28 +368,17 @@ dfield_check_typed(
MY_ATTRIBUTE((nonnull, warn_unused_result));
/**********************************************************//**
Checks that a data tuple is typed. Asserts an error if not.
-@return TRUE if ok */
-UNIV_INTERN
+@return TRUE if ok */
ibool
dtuple_check_typed(
/*===============*/
const dtuple_t* tuple) /*!< in: tuple */
MY_ATTRIBUTE((nonnull, warn_unused_result));
-/**********************************************************//**
-Checks that a data tuple is typed.
-@return TRUE if ok */
-UNIV_INTERN
-ibool
-dtuple_check_typed_no_assert(
-/*=========================*/
- const dtuple_t* tuple) /*!< in: tuple */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
#ifdef UNIV_DEBUG
/**********************************************************//**
Validates the consistency of a tuple which must be complete, i.e,
all fields must have been set.
-@return TRUE if ok */
-UNIV_INTERN
+@return TRUE if ok */
ibool
dtuple_validate(
/*============*/
@@ -413,7 +387,6 @@ dtuple_validate(
#endif /* UNIV_DEBUG */
/*************************************************************//**
Pretty prints a dfield value according to its data type. */
-UNIV_INTERN
void
dfield_print(
/*=========*/
@@ -422,7 +395,6 @@ dfield_print(
/*************************************************************//**
Pretty prints a dfield value according to its data type. Also the hex string
is printed if a string contains non-printable characters. */
-UNIV_INTERN
void
dfield_print_also_hex(
/*==================*/
@@ -430,13 +402,41 @@ dfield_print_also_hex(
MY_ATTRIBUTE((nonnull));
/**********************************************************//**
The following function prints the contents of a tuple. */
-UNIV_INTERN
void
dtuple_print(
/*=========*/
FILE* f, /*!< in: output stream */
const dtuple_t* tuple) /*!< in: tuple */
MY_ATTRIBUTE((nonnull));
+
+/** Print the contents of a tuple.
+@param[out] o output stream
+@param[in] field array of data fields
+@param[in] n number of data fields */
+void
+dfield_print(
+ std::ostream& o,
+ const dfield_t* field,
+ ulint n);
+/** Print the contents of a tuple.
+@param[out] o output stream
+@param[in] tuple data tuple */
+void
+dtuple_print(
+ std::ostream& o,
+ const dtuple_t* tuple);
+
+/** Print the contents of a tuple.
+@param[out] o output stream
+@param[in] tuple data tuple */
+inline
+std::ostream&
+operator<<(std::ostream& o, const dtuple_t& tuple)
+{
+ dtuple_print(o, &tuple);
+ return(o);
+}
+
/**************************************************************//**
Moves parts of long fields in entry to the big record vector so that
the size of tuple drops below the maximum record size allowed in the
@@ -445,20 +445,19 @@ to determine uniquely the insertion place of the tuple in the index.
@return own: created big record vector, NULL if we are not able to
shorten the entry enough, i.e., if there are too many fixed-length or
short fields in entry or the index is clustered */
-UNIV_INTERN
big_rec_t*
dtuple_convert_big_rec(
/*===================*/
dict_index_t* index, /*!< in: index */
+ upd_t* upd, /*!< in/out: update vector */
dtuple_t* entry, /*!< in/out: index entry */
ulint* n_ext) /*!< in/out: number of
externally stored columns */
- MY_ATTRIBUTE((nonnull, malloc, warn_unused_result));
+ MY_ATTRIBUTE((malloc, warn_unused_result));
/**************************************************************//**
Puts back to entry the data stored in vector. Note that to ensure the
fields in entry can accommodate the data, vector must have been created
from entry with dtuple_convert_big_rec. */
-UNIV_INTERN
void
dtuple_convert_back_big_rec(
/*========================*/
@@ -483,8 +482,16 @@ dtuple_big_rec_free(
struct dfield_t{
void* data; /*!< pointer to data */
unsigned ext:1; /*!< TRUE=externally stored, FALSE=local */
- unsigned len:32; /*!< data length; UNIV_SQL_NULL if SQL null */
+ unsigned spatial_status:2;
+ /*!< spatial status of externally stored field
+ in undo log for purge */
+ unsigned len; /*!< data length; UNIV_SQL_NULL if SQL null */
dtype_t type; /*!< type of data */
+
+ /** Create a deep copy of this object.
+ @param[in,out] heap memory heap in which the clone will be created
+ @return the cloned object */
+ dfield_t* clone(mem_heap_t* heap) const;
};
/** Structure for an SQL data tuple of fields (logical record) */
@@ -502,9 +509,8 @@ struct dtuple_t {
default value in dtuple creation is
the same value as n_fields */
dfield_t* fields; /*!< fields */
- UT_LIST_NODE_T(dtuple_t) tuple_list;
- /*!< data tuples can be linked into a
- list using this field */
+ ulint n_v_fields; /*!< number of virtual fields */
+ dfield_t* v_fields; /*!< fields on virtual column */
#ifdef UNIV_DEBUG
ulint magic_n; /*!< magic number, used in
debug assertions */
@@ -513,8 +519,82 @@ struct dtuple_t {
#endif /* UNIV_DEBUG */
};
+inline ulint dtuple_get_n_fields(const dtuple_t* tuple)
+{ return tuple->n_fields; }
+inline dtype_t* dfield_get_type(dfield_t* field) { return &field->type; }
+inline const dtype_t* dfield_get_type(const dfield_t* field)
+{ return &field->type; }
+inline void* dfield_get_data(dfield_t* field)
+{
+ ut_ad(field->len == UNIV_SQL_NULL || field->data != &data_error);
+ return field->data;
+}
+inline const void* dfield_get_data(const dfield_t* field)
+{
+ ut_ad(field->len == UNIV_SQL_NULL || field->data != &data_error);
+ return field->data;
+}
+inline ulint dfield_get_len(const dfield_t* field) { return field->len; }
+inline bool dfield_is_null(const dfield_t* field)
+{ return field->len == UNIV_SQL_NULL; }
+/** @return whether a column is to be stored off-page */
+inline bool dfield_is_ext(const dfield_t* field)
+{
+ ut_ad(!field->ext || field->len >= BTR_EXTERN_FIELD_REF_SIZE);
+ return static_cast<bool>(field->ext);
+}
+/** Set the "external storage" flag */
+inline void dfield_set_ext(dfield_t* field) { field->ext = 1; }
+
+/** Gets number of virtual fields in a data tuple.
+@param[in] tuple dtuple to check
+@return number of fields */
+inline ulint
+dtuple_get_n_v_fields(const dtuple_t* tuple) { return tuple->n_v_fields; }
+
+inline const dfield_t* dtuple_get_nth_field(const dtuple_t* tuple, ulint n)
+{
+ ut_ad(n < tuple->n_fields);
+ return &tuple->fields[n];
+}
+inline dfield_t* dtuple_get_nth_field(dtuple_t* tuple, ulint n)
+{
+ ut_ad(n < tuple->n_fields);
+ return &tuple->fields[n];
+}
+
+/** Get a virtual column in a table row or an extended clustered index record.
+@param[in] tuple tuple
+@oaran[in] n the nth virtual field to get
+@return nth virtual field */
+inline const dfield_t* dtuple_get_nth_v_field(const dtuple_t* tuple, ulint n)
+{
+ ut_ad(n < tuple->n_v_fields);
+ return &tuple->v_fields[n];
+}
+/** Get a virtual column in a table row or an extended clustered index record.
+@param[in] tuple tuple
+@oaran[in] n the nth virtual field to get
+@return nth virtual field */
+inline dfield_t* dtuple_get_nth_v_field(dtuple_t* tuple, ulint n)
+{
+ ut_ad(n < tuple->n_v_fields);
+ return &tuple->v_fields[n];
+}
+
/** A slot for a field in a big rec vector */
struct big_rec_field_t {
+
+ /** Constructor.
+ @param[in] field_no_ the field number
+ @param[in] len_ the data length
+ @param[in] data_ the data */
+ big_rec_field_t(ulint field_no_, ulint len_, const void* data_)
+ : field_no(field_no_),
+ len(len_),
+ data(data_)
+ {}
+
ulint field_no; /*!< field number in record */
ulint len; /*!< stored data length, in bytes */
const void* data; /*!< stored data */
@@ -525,12 +605,38 @@ clustered index record which needs external storage of data fields */
struct big_rec_t {
mem_heap_t* heap; /*!< memory heap from which
allocated */
+ const ulint capacity; /*!< fields array size */
ulint n_fields; /*!< number of stored fields */
big_rec_field_t*fields; /*!< stored fields */
+
+ /** Constructor.
+ @param[in] max the capacity of the array of fields. */
+ explicit big_rec_t(const ulint max)
+ : heap(0),
+ capacity(max),
+ n_fields(0),
+ fields(0)
+ {}
+
+ /** Append one big_rec_field_t object to the end of array of fields */
+ void append(const big_rec_field_t& field)
+ {
+ ut_ad(n_fields < capacity);
+ fields[n_fields] = field;
+ n_fields++;
+ }
+
+ /** Allocate a big_rec_t object in the given memory heap, and for
+ storing n_fld number of fields.
+ @param[in] heap memory heap in which this object is allocated
+ @param[in] n_fld maximum number of fields that can be stored in
+ this object
+ @return the allocated object */
+ static big_rec_t* alloc(
+ mem_heap_t* heap,
+ ulint n_fld);
};
-#ifndef UNIV_NONINL
#include "data0data.ic"
-#endif
#endif
diff --git a/storage/innobase/include/data0data.ic b/storage/innobase/include/data0data.ic
index 21f534d1be7..295c786a583 100644
--- a/storage/innobase/include/data0data.ic
+++ b/storage/innobase/include/data0data.ic
@@ -24,28 +24,8 @@ SQL data field and tuple
Created 5/30/1994 Heikki Tuuri
*************************************************************************/
-#include "mem0mem.h"
#include "ut0rnd.h"
-#ifdef UNIV_DEBUG
-/** Dummy variable to catch access to uninitialized fields. In the
-debug version, dtuple_create() will make all fields of dtuple_t point
-to data_error. */
-extern byte data_error;
-
-/*********************************************************************//**
-Gets pointer to the type struct of SQL data field.
-@return pointer to the type struct */
-UNIV_INLINE
-dtype_t*
-dfield_get_type(
-/*============*/
- const dfield_t* field) /*!< in: SQL data field */
-{
- return((dtype_t*) &(field->type));
-}
-#endif /* UNIV_DEBUG */
-
/*********************************************************************//**
Sets the type struct of SQL data field. */
UNIV_INLINE
@@ -61,38 +41,6 @@ dfield_set_type(
field->type = *type;
}
-#ifdef UNIV_DEBUG
-/*********************************************************************//**
-Gets pointer to the data in a field.
-@return pointer to data */
-UNIV_INLINE
-void*
-dfield_get_data(
-/*============*/
- const dfield_t* field) /*!< in: field */
-{
- ut_ad((field->len == UNIV_SQL_NULL)
- || (field->data != &data_error));
-
- return((void*) field->data);
-}
-#endif /* UNIV_DEBUG */
-
-/*********************************************************************//**
-Gets length of field data.
-@return length of data; UNIV_SQL_NULL if SQL null data */
-UNIV_INLINE
-ulint
-dfield_get_len(
-/*===========*/
- const dfield_t* field) /*!< in: field */
-{
- ut_ad((field->len == UNIV_SQL_NULL)
- || (field->data != &data_error));
-
- return(field->len);
-}
-
/*********************************************************************//**
Sets length in a field. */
UNIV_INLINE
@@ -107,42 +55,35 @@ dfield_set_len(
#endif /* UNIV_VALGRIND_DEBUG */
field->ext = 0;
- field->len = len;
+ field->len = static_cast<unsigned int>(len);
}
-/*********************************************************************//**
-Determines if a field is SQL NULL
-@return nonzero if SQL null data */
+/** Gets spatial status for "external storage"
+@param[in,out] field field */
UNIV_INLINE
-ulint
-dfield_is_null(
-/*===========*/
- const dfield_t* field) /*!< in: field */
+spatial_status_t
+dfield_get_spatial_status(
+ const dfield_t* field)
{
- return(field->len == UNIV_SQL_NULL);
-}
+ ut_ad(field);
+ ut_ad(dfield_is_ext(field));
-/*********************************************************************//**
-Determines if a field is externally stored
-@return nonzero if externally stored */
-UNIV_INLINE
-ulint
-dfield_is_ext(
-/*==========*/
- const dfield_t* field) /*!< in: field */
-{
- return(field->ext);
+ return(static_cast<spatial_status_t>(field->spatial_status));
}
-/*********************************************************************//**
-Sets the "external storage" flag */
+/** Sets spatial status for "external storage"
+@param[in,out] field field
+@param[in] spatial_status spatial status */
UNIV_INLINE
void
-dfield_set_ext(
-/*===========*/
- dfield_t* field) /*!< in/out: field */
+dfield_set_spatial_status(
+ dfield_t* field,
+ spatial_status_t spatial_status)
{
- field->ext = 1;
+ ut_ad(field);
+ ut_ad(dfield_is_ext(field));
+
+ field->spatial_status = spatial_status;
}
/*********************************************************************//**
@@ -160,7 +101,29 @@ dfield_set_data(
#endif /* UNIV_VALGRIND_DEBUG */
field->data = (void*) data;
field->ext = 0;
- field->len = len;
+ field->len = static_cast<unsigned int>(len);
+}
+
+/*********************************************************************//**
+Sets pointer to the data and length in a field. */
+UNIV_INLINE
+void
+dfield_write_mbr(
+/*=============*/
+ dfield_t* field, /*!< in: field */
+ const double* mbr) /*!< in: data */
+{
+#ifdef UNIV_VALGRIND_DEBUG
+ if (len != UNIV_SQL_NULL) UNIV_MEM_ASSERT_RW(data, len);
+#endif /* UNIV_VALGRIND_DEBUG */
+ field->ext = 0;
+
+ for (unsigned i = 0; i < SPDIMS * 2; i++) {
+ mach_double_write(static_cast<byte*>(field->data)
+ + i * sizeof(double), mbr[i]);
+ }
+
+ field->len = DATA_MBR_LEN;
}
/*********************************************************************//**
@@ -189,6 +152,7 @@ dfield_copy_data(
field1->data = field2->data;
field1->len = field2->len;
field1->ext = field2->ext;
+ field1->spatial_status = field2->spatial_status;
}
/*********************************************************************//**
@@ -218,12 +182,11 @@ dfield_dup(
}
}
-#ifndef UNIV_HOTBACKUP
/*********************************************************************//**
Tests if two data fields are equal.
If len==0, tests the data length and content for equality.
If len>0, tests the first len bytes of the content for equality.
-@return TRUE if both fields are NULL or if they are equal */
+@return TRUE if both fields are NULL or if they are equal */
UNIV_INLINE
ibool
dfield_datas_are_binary_equal(
@@ -250,7 +213,7 @@ dfield_datas_are_binary_equal(
/*********************************************************************//**
Tests if dfield data length and content is equal to the given.
-@return TRUE if equal */
+@return TRUE if equal */
UNIV_INLINE
ibool
dfield_data_is_binary_equal(
@@ -260,14 +223,13 @@ dfield_data_is_binary_equal(
const byte* data) /*!< in: data */
{
return(len == dfield_get_len(field)
- && (len == UNIV_SQL_NULL
+ && (!len || len == UNIV_SQL_NULL
|| !memcmp(dfield_get_data(field), data, len)));
}
-#endif /* !UNIV_HOTBACKUP */
/*********************************************************************//**
Gets info bits in a data tuple.
-@return info bits */
+@return info bits */
UNIV_INLINE
ulint
dtuple_get_info_bits(
@@ -291,7 +253,7 @@ dtuple_set_info_bits(
/*********************************************************************//**
Gets number of fields used in record comparisons.
-@return number of fields used in comparisons in rem0cmp.* */
+@return number of fields used in comparisons in rem0cmp.* */
UNIV_INLINE
ulint
dtuple_get_n_fields_cmp(
@@ -315,60 +277,39 @@ dtuple_set_n_fields_cmp(
tuple->n_fields_cmp = n_fields_cmp;
}
-/*********************************************************************//**
-Gets number of fields in a data tuple.
-@return number of fields */
-UNIV_INLINE
-ulint
-dtuple_get_n_fields(
-/*================*/
- const dtuple_t* tuple) /*!< in: tuple */
-{
- return(tuple->n_fields);
-}
-
-#ifdef UNIV_DEBUG
-/*********************************************************************//**
-Gets nth field of a tuple.
-@return nth field */
-UNIV_INLINE
-dfield_t*
-dtuple_get_nth_field(
-/*=================*/
- const dtuple_t* tuple, /*!< in: tuple */
- ulint n) /*!< in: index of field */
-{
- ut_ad(tuple);
- ut_ad(n < tuple->n_fields);
-
- return((dfield_t*) tuple->fields + n);
-}
-#endif /* UNIV_DEBUG */
-
-/**********************************************************//**
-Creates a data tuple from an already allocated chunk of memory.
+/** Creates a data tuple from an already allocated chunk of memory.
The size of the chunk must be at least DTUPLE_EST_ALLOC(n_fields).
The default value for number of fields used in record comparisons
for this tuple is n_fields.
-@return created tuple (inside buf) */
+@param[in,out] buf buffer to use
+@param[in] buf_size buffer size
+@param[in] n_fields number of field
+@param[in] n_v_fields number of fields on virtual columns
+@return created tuple (inside buf) */
UNIV_INLINE
dtuple_t*
dtuple_create_from_mem(
-/*===================*/
- void* buf, /*!< in, out: buffer to use */
- ulint buf_size, /*!< in: buffer size */
- ulint n_fields) /*!< in: number of fields */
+ void* buf,
+ ulint buf_size,
+ ulint n_fields,
+ ulint n_v_fields)
{
dtuple_t* tuple;
+ ulint n_t_fields = n_fields + n_v_fields;
- ut_ad(buf != NULL);
- ut_a(buf_size >= DTUPLE_EST_ALLOC(n_fields));
+ ut_a(buf_size >= DTUPLE_EST_ALLOC(n_t_fields));
tuple = (dtuple_t*) buf;
tuple->info_bits = 0;
tuple->n_fields = n_fields;
+ tuple->n_v_fields = n_v_fields;
tuple->n_fields_cmp = n_fields;
tuple->fields = (dfield_t*) &tuple[1];
+ if (n_v_fields > 0) {
+ tuple->v_fields = &tuple->fields[n_fields];
+ } else {
+ tuple->v_fields = NULL;
+ }
#ifdef UNIV_DEBUG
tuple->magic_n = DATA_TUPLE_MAGIC_N;
@@ -376,26 +317,58 @@ dtuple_create_from_mem(
{ /* In the debug version, initialize fields to an error value */
ulint i;
- for (i = 0; i < n_fields; i++) {
+ for (i = 0; i < n_t_fields; i++) {
dfield_t* field;
- field = dtuple_get_nth_field(tuple, i);
+ if (i >= n_fields) {
+ field = dtuple_get_nth_v_field(
+ tuple, i - n_fields);
+ } else {
+ field = dtuple_get_nth_field(tuple, i);
+ }
dfield_set_len(field, UNIV_SQL_NULL);
field->data = &data_error;
dfield_get_type(field)->mtype = DATA_ERROR;
+ dfield_get_type(field)->prtype = DATA_ERROR;
}
}
#endif
- UNIV_MEM_ASSERT_W(tuple->fields, n_fields * sizeof *tuple->fields);
- UNIV_MEM_INVALID(tuple->fields, n_fields * sizeof *tuple->fields);
+ UNIV_MEM_ASSERT_W(tuple->fields, n_t_fields * sizeof *tuple->fields);
+ UNIV_MEM_INVALID(tuple->fields, n_t_fields * sizeof *tuple->fields);
return(tuple);
}
+/** Duplicate the virtual field data in a dtuple_t
+@param[in,out] vrow dtuple contains the virtual fields
+@param[in,out] heap heap memory to use */
+UNIV_INLINE
+void
+dtuple_dup_v_fld(dtuple_t* vrow, mem_heap_t* heap)
+{
+ for (ulint i = 0; i < vrow->n_v_fields; i++) {
+ dfield_t* dfield = dtuple_get_nth_v_field(vrow, i);
+ dfield_dup(dfield, heap);
+ }
+}
+
+/** Initialize the virtual field data in a dtuple_t
+@param[in,out] vrow dtuple contains the virtual fields */
+UNIV_INLINE
+void
+dtuple_init_v_fld(dtuple_t* vrow)
+{
+ for (ulint i = 0; i < vrow->n_v_fields; i++) {
+ dfield_t* dfield = dtuple_get_nth_v_field(vrow, i);
+ dfield_get_type(dfield)->mtype = DATA_MISSING;
+ dfield_set_len(dfield, UNIV_SQL_NULL);
+ }
+}
+
/**********************************************************//**
Creates a data tuple to a memory heap. The default value for number
of fields used in record comparisons for this tuple is n_fields.
-@return own: created tuple */
+@return own: created tuple */
UNIV_INLINE
dtuple_t*
dtuple_create(
@@ -405,24 +378,58 @@ dtuple_create(
bytes will be allocated from this heap */
ulint n_fields) /*!< in: number of fields */
{
+ return(dtuple_create_with_vcol(heap, n_fields, 0));
+}
+
+/** Creates a data tuple with virtual columns to a memory heap.
+@param[in] heap memory heap where the tuple is created
+@param[in] n_fields number of fields
+@param[in] n_v_fields number of fields on virtual col
+@return own: created tuple */
+UNIV_INLINE
+dtuple_t*
+dtuple_create_with_vcol(
+ mem_heap_t* heap,
+ ulint n_fields,
+ ulint n_v_fields)
+{
void* buf;
ulint buf_size;
dtuple_t* tuple;
ut_ad(heap);
- buf_size = DTUPLE_EST_ALLOC(n_fields);
+ buf_size = DTUPLE_EST_ALLOC(n_fields + n_v_fields);
buf = mem_heap_alloc(heap, buf_size);
- tuple = dtuple_create_from_mem(buf, buf_size, n_fields);
+ tuple = dtuple_create_from_mem(buf, buf_size, n_fields, n_v_fields);
return(tuple);
}
+/** Copies a data tuple's virtual fields to another. This is a shallow copy;
+@param[in,out] d_tuple destination tuple
+@param[in] s_tuple source tuple */
+UNIV_INLINE
+void
+dtuple_copy_v_fields(
+ dtuple_t* d_tuple,
+ const dtuple_t* s_tuple)
+{
+
+ ulint n_v_fields = dtuple_get_n_v_fields(d_tuple);
+ ut_ad(n_v_fields == dtuple_get_n_v_fields(s_tuple));
+
+ for (ulint i = 0; i < n_v_fields; i++) {
+ dfield_copy(dtuple_get_nth_v_field(d_tuple, i),
+ dtuple_get_nth_v_field(s_tuple, i));
+ }
+}
+
/*********************************************************************//**
Copies a data tuple to another. This is a shallow copy; if a deep copy
is desired, dfield_dup() will have to be invoked on each field.
-@return own: copy of tuple */
+@return own: copy of tuple */
UNIV_INLINE
dtuple_t*
dtuple_copy(
@@ -432,7 +439,9 @@ dtuple_copy(
where the tuple is created */
{
ulint n_fields = dtuple_get_n_fields(tuple);
- dtuple_t* new_tuple = dtuple_create(heap, n_fields);
+ ulint n_v_fields = dtuple_get_n_v_fields(tuple);
+ dtuple_t* new_tuple = dtuple_create_with_vcol(
+ heap, n_fields, n_v_fields);
ulint i;
for (i = 0; i < n_fields; i++) {
@@ -440,6 +449,11 @@ dtuple_copy(
dtuple_get_nth_field(tuple, i));
}
+ for (i = 0; i < n_v_fields; i++) {
+ dfield_copy(dtuple_get_nth_v_field(new_tuple, i),
+ dtuple_get_nth_v_field(tuple, i));
+ }
+
return(new_tuple);
}
@@ -447,7 +461,7 @@ dtuple_copy(
The following function returns the sum of data lengths of a tuple. The space
occupied by the field structs or the tuple struct is not counted. Neither
is possible space in externally stored parts of the field.
-@return sum of data lengths */
+@return sum of data lengths */
UNIV_INLINE
ulint
dtuple_get_data_size(
@@ -483,7 +497,7 @@ dtuple_get_data_size(
/*********************************************************************//**
Computes the number of externally stored fields in a data tuple.
-@return number of externally stored fields */
+@return number of externally stored fields */
UNIV_INLINE
ulint
dtuple_get_n_ext(
@@ -522,18 +536,19 @@ dtuple_set_types_binary(
}
}
-/************************************************************//**
-Folds a prefix given as the number of fields of a tuple.
-@return the folded value */
+/** Fold a prefix given as the number of fields of a tuple.
+@param[in] tuple index record
+@param[in] n_fields number of complete fields to fold
+@param[in] n_bytes number of bytes to fold in the last field
+@param[in] index_id index tree ID
+@return the folded value */
UNIV_INLINE
ulint
dtuple_fold(
-/*========*/
- const dtuple_t* tuple, /*!< in: the tuple */
- ulint n_fields,/*!< in: number of complete fields to fold */
- ulint n_bytes,/*!< in: number of bytes to fold in an
- incomplete last field */
- index_id_t tree_id)/*!< in: index tree id */
+ const dtuple_t* tuple,
+ ulint n_fields,
+ ulint n_bytes,
+ index_id_t tree_id)
{
const dfield_t* field;
ulint i;
@@ -592,7 +607,7 @@ data_write_sql_null(
/**********************************************************************//**
Checks if a dtuple contains an SQL null value.
-@return TRUE if some field is SQL null */
+@return TRUE if some field is SQL null */
UNIV_INLINE
ibool
dtuple_contains_null(
diff --git a/storage/innobase/include/data0type.h b/storage/innobase/include/data0type.h
index 11ff98ca19e..f641af8a6c1 100644
--- a/storage/innobase/include/data0type.h
+++ b/storage/innobase/include/data0type.h
@@ -1,7 +1,7 @@
/*****************************************************************************
-Copyright (c) 1996, 2012, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2018, MariaDB Corporation.
+Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2017, 2019, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -30,19 +30,15 @@ Created 1/16/1996 Heikki Tuuri
#include "univ.i"
extern ulint data_mysql_default_charset_coll;
-#define DATA_MYSQL_LATIN1_SWEDISH_CHARSET_COLL 8
#define DATA_MYSQL_BINARY_CHARSET_COLL 63
/* SQL data type struct */
struct dtype_t;
-/* SQL Like operator comparison types */
+/** SQL Like operator comparison types */
enum ib_like_t {
- IB_LIKE_EXACT, /* e.g. STRING */
- IB_LIKE_PREFIX, /* e.g., STRING% */
- IB_LIKE_SUFFIX, /* e.g., %STRING */
- IB_LIKE_SUBSTR, /* e.g., %STRING% */
- IB_LIKE_REGEXP /* Future */
+ IB_LIKE_EXACT, /**< e.g. STRING */
+ IB_LIKE_PREFIX /**< e.g., STRING% */
};
/*-------------------------------------------*/
@@ -80,8 +76,19 @@ binary strings */
DATA_VARMYSQL for all character sets, and the
charset-collation for tables created with it
can also be latin1_swedish_ci */
+
+/* DATA_GEOMETRY includes all standard geometry datatypes as described in
+OGC standard(point, line_string, polygon, multi_point, multi_polygon,
+multi_line_string, geometry_collection, geometry).
+Currently, geometry data is stored in the standard Well-Known Binary(WKB)
+format (http://www.opengeospatial.org/standards/sfa).
+We use BLOB as the underlying datatype. */
+#define DATA_GEOMETRY 14 /* geometry datatype of variable length */
#define DATA_MTYPE_MAX 63 /* dtype_store_for_order_and_null_size()
requires the values are <= 63 */
+
+#define DATA_MTYPE_CURRENT_MIN DATA_VARCHAR /* minimum value of mtype */
+#define DATA_MTYPE_CURRENT_MAX DATA_GEOMETRY /* maximum value of mtype */
/*-------------------------------------------*/
/* The 'PRECISE TYPE' of a column */
/*
@@ -131,7 +138,7 @@ columns, and for them the precise type is usually not used at all.
for InnoDB's own system tables */
#define DATA_ERROR 111 /* another relic from pre-MySQL time */
-#define DATA_MYSQL_TYPE_MASK 255 /* AND with this mask to extract the MySQL
+#define DATA_MYSQL_TYPE_MASK 255U/* AND with this mask to extract the MySQL
type from the precise type */
#define DATA_MYSQL_TRUE_VARCHAR 15 /* MySQL type code for the >= 5.0.3
format true VARCHAR */
@@ -152,14 +159,14 @@ be less than 256 */
#define DATA_FTS_DOC_ID 3 /* Used as FTS DOC ID column */
-#define DATA_SYS_PRTYPE_MASK 0xF /* mask to extract the above from prtype */
+#define DATA_SYS_PRTYPE_MASK 0xFU /* mask to extract the above from prtype */
/* Flags ORed to the precise data type */
-#define DATA_NOT_NULL 256 /* this is ORed to the precise type when
+#define DATA_NOT_NULL 256U /* this is ORed to the precise type when
the column is declared as NOT NULL */
-#define DATA_UNSIGNED 512 /* this id ORed to the precise type when
+#define DATA_UNSIGNED 512U /* this id ORed to the precise type when
we have an unsigned integer type */
-#define DATA_BINARY_TYPE 1024 /* if the data type is a binary character
+#define DATA_BINARY_TYPE 1024U /* if the data type is a binary character
string, this is ORed to the precise type:
this only holds for tables created with
>= MySQL-4.0.14 */
@@ -167,10 +174,18 @@ be less than 256 */
In earlier versions this was set for some
BLOB columns.
*/
-#define DATA_LONG_TRUE_VARCHAR 4096 /* this is ORed to the precise data
+#define DATA_GIS_MBR 2048U /* Used as GIS MBR column */
+#define DATA_MBR_LEN SPDIMS * 2 * sizeof(double) /* GIS MBR length*/
+
+#define DATA_LONG_TRUE_VARCHAR 4096U /* this is ORed to the precise data
type when the column is true VARCHAR where
MySQL uses 2 bytes to store the data len;
for shorter VARCHARs MySQL uses only 1 byte */
+#define DATA_VIRTUAL 8192U /* Virtual column */
+
+/** Check whether locking is disabled (never). */
+#define dict_table_is_locking_disabled(table) false
+
/*-------------------------------------------*/
/* This many bytes we need to store the type information affecting the
@@ -184,16 +199,33 @@ store the charset-collation number; one byte is left unused, though */
/* Maximum multi-byte character length in bytes, plus 1 */
#define DATA_MBMAX 8
+/* For checking if mtype is GEOMETRY datatype */
+#define DATA_GEOMETRY_MTYPE(mtype) ((mtype) == DATA_GEOMETRY)
+
+/* For checking if mtype is BLOB or GEOMETRY, since we use BLOB as
+the underlying datatype of GEOMETRY data. */
+#define DATA_LARGE_MTYPE(mtype) ((mtype) == DATA_BLOB \
+ || (mtype) == DATA_GEOMETRY)
+
+/* For checking if data type is big length data type. */
+#define DATA_BIG_LEN_MTYPE(len, mtype) ((len) > 255 || DATA_LARGE_MTYPE(mtype))
+
+/* For checking if the column is a big length column. */
+#define DATA_BIG_COL(col) DATA_BIG_LEN_MTYPE((col)->len, (col)->mtype)
+
+/* For checking if data type is large binary data type. */
+#define DATA_LARGE_BINARY(mtype,prtype) ((mtype) == DATA_GEOMETRY || \
+ ((mtype) == DATA_BLOB && !((prtype) & DATA_BINARY_TYPE)))
+
/* We now support 15 bits (up to 32767) collation number */
#define MAX_CHAR_COLL_NUM 32767
/* Mask to get the Charset Collation number (0x7fff) */
#define CHAR_COLL_MASK MAX_CHAR_COLL_NUM
-#ifndef UNIV_HOTBACKUP
/*********************************************************************//**
Gets the MySQL type code from a dtype.
-@return MySQL type code; this is NOT an InnoDB type code! */
+@return MySQL type code; this is NOT an InnoDB type code! */
UNIV_INLINE
ulint
dtype_get_mysql_type(
@@ -203,8 +235,7 @@ dtype_get_mysql_type(
Determine how many bytes the first n characters of the given string occupy.
If the string is shorter than n characters, returns the number of bytes
the characters in the string occupy.
-@return length of the prefix, in bytes */
-UNIV_INTERN
+@return length of the prefix, in bytes */
ulint
dtype_get_at_most_n_mbchars(
/*========================*/
@@ -219,12 +250,10 @@ dtype_get_at_most_n_mbchars(
ulint data_len, /*!< in: length of str (in bytes) */
const char* str); /*!< in: the string whose prefix
length is being determined */
-#endif /* !UNIV_HOTBACKUP */
/*********************************************************************//**
Checks if a data main type is a string type. Also a BLOB is considered a
string type.
-@return TRUE if string type */
-UNIV_INTERN
+@return TRUE if string type */
ibool
dtype_is_string_type(
/*=================*/
@@ -233,8 +262,7 @@ dtype_is_string_type(
Checks if a type is a binary string type. Note that for tables created with
< 4.0.14, we do not know if a DATA_BLOB column is a BLOB or a TEXT column. For
those DATA_BLOB columns this function currently returns FALSE.
-@return TRUE if binary string type */
-UNIV_INTERN
+@return TRUE if binary string type */
ibool
dtype_is_binary_string_type(
/*========================*/
@@ -245,8 +273,7 @@ Checks if a type is a non-binary string type. That is, dtype_is_string_type is
TRUE and dtype_is_binary_string_type is FALSE. Note that for tables created
with < 4.0.14, we do not know if a DATA_BLOB column is a BLOB or a TEXT column.
For those DATA_BLOB columns this function currently returns TRUE.
-@return TRUE if non-binary string type */
-UNIV_INTERN
+@return TRUE if non-binary string type */
ibool
dtype_is_non_binary_string_type(
/*============================*/
@@ -272,7 +299,7 @@ dtype_copy(
const dtype_t* type2); /*!< in: type struct to copy from */
/*********************************************************************//**
Gets the SQL main data type.
-@return SQL main data type */
+@return SQL main data type */
UNIV_INLINE
ulint
dtype_get_mtype(
@@ -280,13 +307,13 @@ dtype_get_mtype(
const dtype_t* type); /*!< in: data type */
/*********************************************************************//**
Gets the precise data type.
-@return precise data type */
+@return precise data type */
UNIV_INLINE
ulint
dtype_get_prtype(
/*=============*/
const dtype_t* type); /*!< in: data type */
-#ifndef UNIV_HOTBACKUP
+
/*********************************************************************//**
Compute the mbminlen and mbmaxlen members of a data type structure. */
UNIV_INLINE
@@ -301,43 +328,46 @@ dtype_get_mblen(
multi-byte character */
/*********************************************************************//**
Gets the MySQL charset-collation code for MySQL string types.
-@return MySQL charset-collation code */
+@return MySQL charset-collation code */
UNIV_INLINE
ulint
dtype_get_charset_coll(
/*===================*/
ulint prtype);/*!< in: precise data type */
-/*********************************************************************//**
-Forms a precise type from the < 4.1.2 format precise type plus the
+/** Form a precise type from the < 4.1.2 format precise type plus the
charset-collation code.
+@param[in] old_prtype MySQL type code and the flags
+ DATA_BINARY_TYPE etc.
+@param[in] charset_coll character-set collation code
@return precise type, including the charset-collation code */
-UNIV_INTERN
-ulint
-dtype_form_prtype(
-/*==============*/
- ulint old_prtype, /*!< in: the MySQL type code and the flags
- DATA_BINARY_TYPE etc. */
- ulint charset_coll); /*!< in: MySQL charset-collation code */
+UNIV_INLINE
+uint32_t
+dtype_form_prtype(ulint old_prtype, ulint charset_coll)
+{
+ ut_ad(old_prtype < 256 * 256);
+ ut_ad(charset_coll <= MAX_CHAR_COLL_NUM);
+ return(uint32_t(old_prtype + (charset_coll << 16)));
+}
+
/*********************************************************************//**
Determines if a MySQL string type is a subset of UTF-8. This function
may return false negatives, in case further character-set collation
codes are introduced in MySQL later.
-@return TRUE if a subset of UTF-8 */
+@return TRUE if a subset of UTF-8 */
UNIV_INLINE
ibool
dtype_is_utf8(
/*==========*/
ulint prtype);/*!< in: precise data type */
-#endif /* !UNIV_HOTBACKUP */
/*********************************************************************//**
Gets the type length.
-@return fixed length of the type, in bytes, or 0 if variable-length */
+@return fixed length of the type, in bytes, or 0 if variable-length */
UNIV_INLINE
ulint
dtype_get_len(
/*==========*/
const dtype_t* type); /*!< in: data type */
-#ifndef UNIV_HOTBACKUP
+
/*********************************************************************//**
Gets the minimum length of a character, in bytes.
@return minimum length of a char, in bytes, or 0 if this is not a
@@ -356,19 +386,9 @@ ulint
dtype_get_mbmaxlen(
/*===============*/
const dtype_t* type); /*!< in: type */
-/*********************************************************************//**
-Gets the padding character code for the type.
-@return padding character code, or ULINT_UNDEFINED if no padding specified */
-UNIV_INLINE
-ulint
-dtype_get_pad_char(
-/*===============*/
- ulint mtype, /*!< in: main type */
- ulint prtype); /*!< in: precise type */
-#endif /* !UNIV_HOTBACKUP */
/***********************************************************************//**
Returns the size of a fixed size data type, 0 if not a fixed size type.
-@return fixed size, or 0 */
+@return fixed size, or 0 */
UNIV_INLINE
ulint
dtype_get_fixed_size_low(
@@ -381,10 +401,10 @@ dtype_get_fixed_size_low(
ulint mbmaxlen, /*!< in: maximum length of a
multibyte character, in bytes */
ulint comp); /*!< in: nonzero=ROW_FORMAT=COMPACT */
-#ifndef UNIV_HOTBACKUP
+
/***********************************************************************//**
Returns the minimum size of a data type.
-@return minimum size */
+@return minimum size */
UNIV_INLINE
ulint
dtype_get_min_size_low(
@@ -397,25 +417,24 @@ dtype_get_min_size_low(
/***********************************************************************//**
Returns the maximum size of a data type. Note: types in system tables may be
incomplete and return incorrect information.
-@return maximum size */
+@return maximum size */
UNIV_INLINE
ulint
dtype_get_max_size_low(
/*===================*/
ulint mtype, /*!< in: main type */
ulint len); /*!< in: length */
-#endif /* !UNIV_HOTBACKUP */
/***********************************************************************//**
Returns the ROW_FORMAT=REDUNDANT stored SQL NULL size of a type.
For fixed length types it is the fixed length of the type, otherwise 0.
-@return SQL null storage size in ROW_FORMAT=REDUNDANT */
+@return SQL null storage size in ROW_FORMAT=REDUNDANT */
UNIV_INLINE
ulint
dtype_get_sql_null_size(
/*====================*/
const dtype_t* type, /*!< in: type */
ulint comp); /*!< in: nonzero=ROW_FORMAT=COMPACT */
-#ifndef UNIV_HOTBACKUP
+
/**********************************************************************//**
Reads to a type the stored information which determines its alphabetical
ordering and the storage size of an SQL NULL value. */
@@ -463,23 +482,20 @@ dtype_sql_name(
char* name, /*!< out: SQL name */
unsigned name_sz);/*!< in: size of the name buffer */
-#endif /* !UNIV_HOTBACKUP */
-
/*********************************************************************//**
Validates a data type structure.
-@return TRUE if ok */
-UNIV_INTERN
+@return TRUE if ok */
ibool
dtype_validate(
/*===========*/
const dtype_t* type); /*!< in: type struct to validate */
-/*********************************************************************//**
-Prints a data type structure. */
-UNIV_INTERN
+#ifdef UNIV_DEBUG
+/** Print a data type structure.
+@param[in] type data type */
void
dtype_print(
-/*========*/
- const dtype_t* type); /*!< in: type */
+ const dtype_t* type);
+#endif /* UNIV_DEBUG */
/* Structure for an SQL data type.
If you add fields to this structure, be sure to initialize them everywhere.
@@ -509,16 +525,12 @@ struct dtype_t{
string data (in addition to
the string, MySQL uses 1 or 2
bytes to store the string length) */
-#ifndef UNIV_HOTBACKUP
unsigned mbminlen:3; /*!< minimum length of a character,
in bytes */
unsigned mbmaxlen:3; /*!< maximum length of a character,
in bytes */
-#endif /* !UNIV_HOTBACKUP */
};
-#ifndef UNIV_NONINL
#include "data0type.ic"
-#endif
#endif
diff --git a/storage/innobase/include/data0type.ic b/storage/innobase/include/data0type.ic
index b06a58cd3ab..1956016c58b 100644
--- a/storage/innobase/include/data0type.ic
+++ b/storage/innobase/include/data0type.ic
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1996, 2012, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1996, 2014, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2017, 2019, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
@@ -24,15 +24,12 @@ Data types
Created 1/16/1996 Heikki Tuuri
*******************************************************/
-#include <string.h> /* strlen() */
-
#include "mach0data.h"
-#ifndef UNIV_HOTBACKUP
-# include "ha_prototypes.h"
+#include "ha_prototypes.h"
/*********************************************************************//**
Gets the MySQL charset-collation code for MySQL string types.
-@return MySQL charset-collation code */
+@return MySQL charset-collation code */
UNIV_INLINE
ulint
dtype_get_charset_coll(
@@ -46,7 +43,7 @@ dtype_get_charset_coll(
Determines if a MySQL string type is a subset of UTF-8. This function
may return false negatives, in case further character-set collation
codes are introduced in MySQL later.
-@return TRUE if a subset of UTF-8 */
+@return TRUE if a subset of UTF-8 */
UNIV_INLINE
ibool
dtype_is_utf8(
@@ -69,7 +66,7 @@ dtype_is_utf8(
/*********************************************************************//**
Gets the MySQL type code from a dtype.
-@return MySQL type code; this is NOT an InnoDB type code! */
+@return MySQL type code; this is NOT an InnoDB type code! */
UNIV_INLINE
ulint
dtype_get_mysql_type(
@@ -120,9 +117,6 @@ dtype_set_mblen(
ut_ad(dtype_validate(type));
}
-#else /* !UNIV_HOTBACKUP */
-# define dtype_set_mblen(type) (void) 0
-#endif /* !UNIV_HOTBACKUP */
/*********************************************************************//**
Sets a data type structure. */
@@ -138,9 +132,9 @@ dtype_set(
ut_ad(type);
ut_ad(mtype <= DATA_MTYPE_MAX);
- type->mtype = mtype;
- type->prtype = prtype;
- type->len = len;
+ type->mtype = unsigned(mtype);
+ type->prtype = unsigned(prtype);
+ type->len = unsigned(len);
dtype_set_mblen(type);
}
@@ -161,7 +155,7 @@ dtype_copy(
/*********************************************************************//**
Gets the SQL main data type.
-@return SQL main data type */
+@return SQL main data type */
UNIV_INLINE
ulint
dtype_get_mtype(
@@ -175,7 +169,7 @@ dtype_get_mtype(
/*********************************************************************//**
Gets the precise data type.
-@return precise data type */
+@return precise data type */
UNIV_INLINE
ulint
dtype_get_prtype(
@@ -189,7 +183,7 @@ dtype_get_prtype(
/*********************************************************************//**
Gets the type length.
-@return fixed length of the type, in bytes, or 0 if variable-length */
+@return fixed length of the type, in bytes, or 0 if variable-length */
UNIV_INLINE
ulint
dtype_get_len(
@@ -201,7 +195,6 @@ dtype_get_len(
return(type->len);
}
-#ifndef UNIV_HOTBACKUP
/*********************************************************************//**
Gets the minimum length of a character, in bytes.
@return minimum length of a char, in bytes, or 0 if this is not a
@@ -227,45 +220,6 @@ dtype_get_mbmaxlen(
return type->mbmaxlen;
}
-/*********************************************************************//**
-Gets the padding character code for a type.
-@return padding character code, or ULINT_UNDEFINED if no padding specified */
-UNIV_INLINE
-ulint
-dtype_get_pad_char(
-/*===============*/
- ulint mtype, /*!< in: main type */
- ulint prtype) /*!< in: precise type */
-{
- switch (mtype) {
- case DATA_FIXBINARY:
- case DATA_BINARY:
- if (dtype_get_charset_coll(prtype)
- == DATA_MYSQL_BINARY_CHARSET_COLL) {
- /* Starting from 5.0.18, do not pad
- VARBINARY or BINARY columns. */
- return(ULINT_UNDEFINED);
- }
- /* Fall through */
- case DATA_CHAR:
- case DATA_VARCHAR:
- case DATA_MYSQL:
- case DATA_VARMYSQL:
- /* Space is the padding character for all char and binary
- strings, and starting from 5.0.3, also for TEXT strings. */
-
- return(0x20);
- case DATA_BLOB:
- if (!(prtype & DATA_BINARY_TYPE)) {
- return(0x20);
- }
- /* Fall through */
- default:
- /* No padding specified */
- return(ULINT_UNDEFINED);
- }
-}
-
/**********************************************************************//**
Stores for a type the information which determines its alphabetical ordering
and the storage size of an SQL NULL value. This is the >= 4.1.x storage
@@ -288,7 +242,7 @@ dtype_new_store_for_order_and_null_size(
ut_ad(type);
ut_ad(type->mtype >= DATA_VARCHAR);
- ut_ad(type->mtype <= DATA_MYSQL);
+ ut_ad(type->mtype <= DATA_MTYPE_MAX);
buf[0] = (byte)(type->mtype & 0xFFUL);
@@ -411,76 +365,79 @@ dtype_sql_name(
#define APPEND_UNSIGNED() \
do { \
if (prtype & DATA_UNSIGNED) { \
- ut_snprintf(name + strlen(name), \
+ snprintf(name + strlen(name), \
name_sz - strlen(name), \
" UNSIGNED"); \
} \
} while (0)
- ut_snprintf(name, name_sz, "UNKNOWN");
+ snprintf(name, name_sz, "UNKNOWN");
switch (mtype) {
case DATA_INT:
switch (len) {
case 1:
- ut_snprintf(name, name_sz, "TINYINT");
+ snprintf(name, name_sz, "TINYINT");
break;
case 2:
- ut_snprintf(name, name_sz, "SMALLINT");
+ snprintf(name, name_sz, "SMALLINT");
break;
case 3:
- ut_snprintf(name, name_sz, "MEDIUMINT");
+ snprintf(name, name_sz, "MEDIUMINT");
break;
case 4:
- ut_snprintf(name, name_sz, "INT");
+ snprintf(name, name_sz, "INT");
break;
case 8:
- ut_snprintf(name, name_sz, "BIGINT");
+ snprintf(name, name_sz, "BIGINT");
break;
}
APPEND_UNSIGNED();
break;
case DATA_FLOAT:
- ut_snprintf(name, name_sz, "FLOAT");
+ snprintf(name, name_sz, "FLOAT");
APPEND_UNSIGNED();
break;
case DATA_DOUBLE:
- ut_snprintf(name, name_sz, "DOUBLE");
+ snprintf(name, name_sz, "DOUBLE");
APPEND_UNSIGNED();
break;
case DATA_FIXBINARY:
- ut_snprintf(name, name_sz, "BINARY(%u)", len);
+ snprintf(name, name_sz, "BINARY(%u)", len);
break;
case DATA_CHAR:
case DATA_MYSQL:
- ut_snprintf(name, name_sz, "CHAR(%u)", len);
+ snprintf(name, name_sz, "CHAR(%u)", len);
break;
case DATA_VARCHAR:
case DATA_VARMYSQL:
- ut_snprintf(name, name_sz, "VARCHAR(%u)", len);
+ snprintf(name, name_sz, "VARCHAR(%u)", len);
break;
case DATA_BINARY:
- ut_snprintf(name, name_sz, "VARBINARY(%u)", len);
+ snprintf(name, name_sz, "VARBINARY(%u)", len);
+ break;
+ case DATA_GEOMETRY:
+ snprintf(name, name_sz, "GEOMETRY");
break;
case DATA_BLOB:
switch (len) {
case 9:
- ut_snprintf(name, name_sz, "TINYBLOB");
+ snprintf(name, name_sz, "TINYBLOB");
break;
case 10:
- ut_snprintf(name, name_sz, "BLOB");
+ snprintf(name, name_sz, "BLOB");
break;
case 11:
- ut_snprintf(name, name_sz, "MEDIUMBLOB");
+ snprintf(name, name_sz, "MEDIUMBLOB");
break;
case 12:
- ut_snprintf(name, name_sz, "LONGBLOB");
+ snprintf(name, name_sz, "LONGBLOB");
break;
}
}
if (prtype & DATA_NOT_NULL) {
- ut_snprintf(name + strlen(name),
+ snprintf(name + strlen(name),
name_sz - strlen(name),
" NOT NULL");
}
@@ -488,11 +445,9 @@ dtype_sql_name(
return(name);
}
-#endif /* !UNIV_HOTBACKUP */
-
/***********************************************************************//**
Returns the size of a fixed size data type, 0 if not a fixed size type.
-@return fixed size, or 0 */
+@return fixed size, or 0 */
UNIV_INLINE
ulint
dtype_get_fixed_size_low(
@@ -532,7 +487,6 @@ dtype_get_fixed_size_low(
case DATA_DOUBLE:
return(len);
case DATA_MYSQL:
-#ifndef UNIV_HOTBACKUP
if (prtype & DATA_BINARY_TYPE) {
return(len);
} else if (!comp) {
@@ -552,15 +506,13 @@ dtype_get_fixed_size_low(
return(len);
}
}
-#else /* !UNIV_HOTBACKUP */
- return(len);
-#endif /* !UNIV_HOTBACKUP */
/* Treat as variable-length. */
/* fall through */
case DATA_VARCHAR:
case DATA_BINARY:
case DATA_DECIMAL:
case DATA_VARMYSQL:
+ case DATA_GEOMETRY:
case DATA_BLOB:
return(0);
default:
@@ -570,10 +522,9 @@ dtype_get_fixed_size_low(
return(0);
}
-#ifndef UNIV_HOTBACKUP
/***********************************************************************//**
Returns the minimum size of a data type.
-@return minimum size */
+@return minimum size */
UNIV_INLINE
ulint
dtype_get_min_size_low(
@@ -627,6 +578,7 @@ dtype_get_min_size_low(
case DATA_BINARY:
case DATA_DECIMAL:
case DATA_VARMYSQL:
+ case DATA_GEOMETRY:
case DATA_BLOB:
return(0);
default:
@@ -639,7 +591,7 @@ dtype_get_min_size_low(
/***********************************************************************//**
Returns the maximum size of a data type. Note: types in system tables may be
incomplete and return incorrect information.
-@return maximum size */
+@return maximum size */
UNIV_INLINE
ulint
dtype_get_max_size_low(
@@ -660,6 +612,7 @@ dtype_get_max_size_low(
case DATA_DECIMAL:
case DATA_VARMYSQL:
return(len);
+ case DATA_GEOMETRY:
case DATA_BLOB:
break;
default:
@@ -668,12 +621,11 @@ dtype_get_max_size_low(
return(ULINT_MAX);
}
-#endif /* !UNIV_HOTBACKUP */
/***********************************************************************//**
Returns the ROW_FORMAT=REDUNDANT stored SQL NULL size of a type.
For fixed length types it is the fixed length of the type, otherwise 0.
-@return SQL null storage size in ROW_FORMAT=REDUNDANT */
+@return SQL null storage size in ROW_FORMAT=REDUNDANT */
UNIV_INLINE
ulint
dtype_get_sql_null_size(
@@ -681,11 +633,6 @@ dtype_get_sql_null_size(
const dtype_t* type, /*!< in: type */
ulint comp) /*!< in: nonzero=ROW_FORMAT=COMPACT */
{
-#ifndef UNIV_HOTBACKUP
return(dtype_get_fixed_size_low(type->mtype, type->prtype, type->len,
type->mbminlen, type->mbmaxlen, comp));
-#else /* !UNIV_HOTBACKUP */
- return(dtype_get_fixed_size_low(type->mtype, type->prtype, type->len,
- 0, 0, 0));
-#endif /* !UNIV_HOTBACKUP */
}
diff --git a/storage/innobase/include/db0err.h b/storage/innobase/include/db0err.h
index 09eafc0e9d8..f70a65890c9 100644
--- a/storage/innobase/include/db0err.h
+++ b/storage/innobase/include/db0err.h
@@ -1,7 +1,7 @@
/*****************************************************************************
-Copyright (c) 1996, 2014, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2015, MariaDB Corporation.
+Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2015, 2018, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -27,14 +27,16 @@ Created 5/24/1996 Heikki Tuuri
#ifndef db0err_h
#define db0err_h
+/* Do not include univ.i because univ.i includes this. */
enum dberr_t {
+ DB_SUCCESS,
+
DB_SUCCESS_LOCKED_REC = 9, /*!< like DB_SUCCESS, but a new
explicit record lock was created */
- DB_SUCCESS = 10,
/* The following are error codes */
- DB_ERROR,
+ DB_ERROR = 11,
DB_INTERRUPTED,
DB_OUT_OF_MEMORY,
DB_OUT_OF_FILE_SPACE,
@@ -42,7 +44,6 @@ enum dberr_t {
DB_DEADLOCK,
DB_ROLLBACK,
DB_DUPLICATE_KEY,
- DB_QUE_THR_SUSPENDED,
DB_MISSING_HISTORY, /*!< required history data has been
deleted due to lack of space in
rollback segment */
@@ -64,7 +65,8 @@ enum dberr_t {
which is referenced */
DB_CANNOT_ADD_CONSTRAINT, /*!< adding a foreign key constraint
to a table failed */
- DB_CORRUPTION, /*!< data structure corruption noticed */
+ DB_CORRUPTION, /*!< data structure corruption
+ noticed */
DB_CANNOT_DROP_CONSTRAINT, /*!< dropping a foreign key constraint
from a table failed */
DB_NO_SAVEPOINT, /*!< no savepoint exists with the given
@@ -124,20 +126,43 @@ enum dberr_t {
DB_ONLINE_LOG_TOO_BIG, /*!< Modification log grew too big
during online index creation */
- DB_IO_ERROR, /*!< Generic IO error */
DB_IDENTIFIER_TOO_LONG, /*!< Identifier name too long */
DB_FTS_EXCEED_RESULT_CACHE_LIMIT, /*!< FTS query memory
exceeds result cache limit */
- DB_TEMP_FILE_WRITE_FAILURE, /*!< Temp file write failure */
+ DB_TEMP_FILE_WRITE_FAIL, /*!< Temp file write failure */
+ DB_CANT_CREATE_GEOMETRY_OBJECT, /*!< Cannot create specified Geometry
+ data object */
+ DB_CANNOT_OPEN_FILE, /*!< Cannot open a file */
DB_FTS_TOO_MANY_WORDS_IN_PHRASE,
/*< Too many words in a phrase */
- DB_TOO_BIG_FOR_REDO, /* Record length greater than 10%
- of redo log */
+
+ DB_TABLESPACE_TRUNCATED, /*!< tablespace was truncated */
+
DB_DECRYPTION_FAILED, /* Tablespace encrypted and
decrypt operation failed because
of missing key management plugin,
or missing or incorrect key or
incorret AES method or algorithm. */
+
+ DB_IO_ERROR = 100, /*!< Generic IO error */
+
+ DB_IO_PARTIAL_FAILED, /*!< Partial IO request failed */
+
+ DB_FORCED_ABORT, /*!< Transaction was forced to rollback
+ by a higher priority transaction */
+
+ DB_TABLE_CORRUPT, /*!< Table/clustered index is
+ corrupted */
+
+ DB_COMPUTE_VALUE_FAILED, /*!< Compute generated value failed */
+
+ DB_NO_FK_ON_S_BASE_COL, /*!< Cannot add foreign constrain
+ placed on the base column of
+ stored column */
+
+ DB_IO_NO_PUNCH_HOLE, /*!< Punch hole not supported by
+ file system. */
+
DB_PAGE_CORRUPTED, /* Page read from tablespace is
corrupted. */
/* The following are partial failure codes */
@@ -148,21 +173,7 @@ enum dberr_t {
DB_ZIP_OVERFLOW,
DB_RECORD_NOT_FOUND = 1500,
DB_END_OF_INDEX,
- DB_DICT_CHANGED, /*!< Some part of table dictionary has
- changed. Such as index dropped or
- foreign key dropped */
-
-
- /* The following are API only error codes. */
- DB_DATA_MISMATCH = 2000, /*!< Column update or read failed
- because the types mismatch */
-
- DB_SCHEMA_NOT_LOCKED, /*!< If an API function expects the
- schema to be locked in exclusive mode
- and if it's not then that API function
- will return this error code */
-
- DB_NOT_FOUND /*!< Generic error code for "Not found"
+ DB_NOT_FOUND, /*!< Generic error code for "Not found"
type of errors */
};
diff --git a/storage/innobase/include/dict0boot.h b/storage/innobase/include/dict0boot.h
index 186c8b50b8d..3baefdd1132 100644
--- a/storage/innobase/include/dict0boot.h
+++ b/storage/innobase/include/dict0boot.h
@@ -26,54 +26,52 @@ Created 4/18/1996 Heikki Tuuri
#ifndef dict0boot_h
#define dict0boot_h
-#include "univ.i"
-
#include "mtr0mtr.h"
#include "mtr0log.h"
#include "ut0byte.h"
#include "buf0buf.h"
-#include "fsp0fsp.h"
#include "dict0dict.h"
typedef byte dict_hdr_t;
/**********************************************************************//**
Gets a pointer to the dictionary header and x-latches its page.
-@return pointer to the dictionary header, page x-latched */
-UNIV_INTERN
+@return pointer to the dictionary header, page x-latched */
dict_hdr_t*
dict_hdr_get(
/*=========*/
mtr_t* mtr); /*!< in: mtr */
/**********************************************************************//**
Returns a new table, index, or space id. */
-UNIV_INTERN
void
dict_hdr_get_new_id(
/*================*/
- table_id_t* table_id, /*!< out: table id
- (not assigned if NULL) */
- index_id_t* index_id, /*!< out: index id
- (not assigned if NULL) */
- ulint* space_id); /*!< out: space id
- (not assigned if NULL) */
+ table_id_t* table_id, /*!< out: table id
+ (not assigned if NULL) */
+ index_id_t* index_id, /*!< out: index id
+ (not assigned if NULL) */
+ ulint* space_id, /*!< out: space id
+ (not assigned if NULL) */
+ const dict_table_t* table, /*!< in: table */
+ bool disable_redo); /*!< in: if true and table
+ object is NULL
+ then disable-redo */
/**********************************************************************//**
Writes the current value of the row id counter to the dictionary header file
page. */
-UNIV_INTERN
void
dict_hdr_flush_row_id(void);
/*=======================*/
/**********************************************************************//**
Returns a new row id.
-@return the new id */
+@return the new id */
UNIV_INLINE
row_id_t
dict_sys_get_new_row_id(void);
/*=========================*/
/**********************************************************************//**
Reads a row id from a record or other 6-byte stored form.
-@return row id */
+@return row id */
UNIV_INLINE
row_id_t
dict_sys_read_row_id(
@@ -91,7 +89,6 @@ dict_sys_write_row_id(
Initializes the data dictionary memory structures when the database is
started. This function is also called when the data dictionary is created.
@return DB_SUCCESS or error code. */
-UNIV_INTERN
dberr_t
dict_boot(void)
/*===========*/
@@ -100,7 +97,6 @@ dict_boot(void)
/*****************************************************************//**
Creates and initializes the data dictionary at the server bootstrap.
@return DB_SUCCESS or error code. */
-UNIV_INTERN
dberr_t
dict_create(void)
/*=============*/
@@ -221,7 +217,8 @@ enum dict_col_sys_indexes_enum {
DICT_COL__SYS_INDEXES__TYPE = 4,
DICT_COL__SYS_INDEXES__SPACE = 5,
DICT_COL__SYS_INDEXES__PAGE_NO = 6,
- DICT_NUM_COLS__SYS_INDEXES = 7
+ DICT_COL__SYS_INDEXES__MERGE_THRESHOLD = 7,
+ DICT_NUM_COLS__SYS_INDEXES = 8
};
/* The field numbers in the SYS_INDEXES clustered index */
enum dict_fld_sys_indexes_enum {
@@ -234,7 +231,8 @@ enum dict_fld_sys_indexes_enum {
DICT_FLD__SYS_INDEXES__TYPE = 6,
DICT_FLD__SYS_INDEXES__SPACE = 7,
DICT_FLD__SYS_INDEXES__PAGE_NO = 8,
- DICT_NUM_FIELDS__SYS_INDEXES = 9
+ DICT_FLD__SYS_INDEXES__MERGE_THRESHOLD = 9,
+ DICT_NUM_FIELDS__SYS_INDEXES = 10
};
/* The columns in SYS_FIELDS */
enum dict_col_sys_fields_enum {
@@ -325,6 +323,23 @@ enum dict_fld_sys_datafiles_enum {
DICT_NUM_FIELDS__SYS_DATAFILES = 4
};
+/* The columns in SYS_VIRTUAL */
+enum dict_col_sys_virtual_enum {
+ DICT_COL__SYS_VIRTUAL__TABLE_ID = 0,
+ DICT_COL__SYS_VIRTUAL__POS = 1,
+ DICT_COL__SYS_VIRTUAL__BASE_POS = 2,
+ DICT_NUM_COLS__SYS_VIRTUAL = 3
+};
+/* The field numbers in the SYS_VIRTUAL clustered index */
+enum dict_fld_sys_virtual_enum {
+ DICT_FLD__SYS_VIRTUAL__TABLE_ID = 0,
+ DICT_FLD__SYS_VIRTUAL__POS = 1,
+ DICT_FLD__SYS_VIRTUAL__BASE_POS = 2,
+ DICT_FLD__SYS_VIRTUAL__DB_TRX_ID = 3,
+ DICT_FLD__SYS_VIRTUAL__DB_ROLL_PTR = 4,
+ DICT_NUM_FIELDS__SYS_VIRTUAL = 5
+};
+
/* A number of the columns above occur in multiple tables. These are the
length of thos fields. */
#define DICT_FLD_LEN_SPACE 4
@@ -335,8 +350,6 @@ two) is assigned, the field DICT_HDR_ROW_ID on the dictionary header page is
updated */
#define DICT_HDR_ROW_ID_WRITE_MARGIN 256
-#ifndef UNIV_NONINL
#include "dict0boot.ic"
-#endif
#endif
diff --git a/storage/innobase/include/dict0boot.ic b/storage/innobase/include/dict0boot.ic
index 2f12e7a5d49..c3862b5c76a 100644
--- a/storage/innobase/include/dict0boot.ic
+++ b/storage/innobase/include/dict0boot.ic
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1996, 2012, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1996, 2013, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -25,7 +25,7 @@ Created 4/18/1996 Heikki Tuuri
/**********************************************************************//**
Returns a new row id.
-@return the new id */
+@return the new id */
UNIV_INLINE
row_id_t
dict_sys_get_new_row_id(void)
@@ -33,7 +33,7 @@ dict_sys_get_new_row_id(void)
{
row_id_t id;
- mutex_enter(&(dict_sys->mutex));
+ mutex_enter(&dict_sys->mutex);
id = dict_sys->row_id;
@@ -44,14 +44,14 @@ dict_sys_get_new_row_id(void)
dict_sys->row_id++;
- mutex_exit(&(dict_sys->mutex));
+ mutex_exit(&dict_sys->mutex);
return(id);
}
/**********************************************************************//**
Reads a row id from a record or other 6-byte stored form.
-@return row id */
+@return row id */
UNIV_INLINE
row_id_t
dict_sys_read_row_id(
diff --git a/storage/innobase/include/dict0crea.h b/storage/innobase/include/dict0crea.h
index 2a92c523e35..359d9f556e5 100644
--- a/storage/innobase/include/dict0crea.h
+++ b/storage/innobase/include/dict0crea.h
@@ -1,6 +1,7 @@
/*****************************************************************************
Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2017, 2019, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -26,8 +27,6 @@ Created 1/8/1996 Heikki Tuuri
#ifndef dict0crea_h
#define dict0crea_h
-#include "univ.i"
-#include "dict0types.h"
#include "dict0dict.h"
#include "que0types.h"
#include "row0types.h"
@@ -36,95 +35,134 @@ Created 1/8/1996 Heikki Tuuri
/*********************************************************************//**
Creates a table create graph.
-@return own: table create node */
-UNIV_INTERN
+@return own: table create node */
tab_node_t*
tab_create_graph_create(
/*====================*/
- dict_table_t* table, /*!< in: table to create, built as a memory data
- structure */
- mem_heap_t* heap, /*!< in: heap where created */
- bool commit, /*!< in: true if the commit node should be
- added to the query graph */
- fil_encryption_t mode, /*!< in: encryption mode */
- ulint key_id);/*!< in: encryption key_id */
-/*********************************************************************//**
-Creates an index create graph.
-@return own: index create node */
-UNIV_INTERN
+ dict_table_t* table, /*!< in: table to create, built as
+ a memory data structure */
+ mem_heap_t* heap, /*!< in: heap where created */
+ fil_encryption_t mode, /*!< in: encryption mode */
+ uint32_t key_id); /*!< in: encryption key_id */
+
+/** Creates an index create graph.
+@param[in] index index to create, built as a memory data structure
+@param[in,out] heap heap where created
+@param[in] add_v new virtual columns added in the same clause with
+ add index
+@return own: index create node */
ind_node_t*
ind_create_graph_create(
-/*====================*/
- dict_index_t* index, /*!< in: index to create, built as a memory data
- structure */
- mem_heap_t* heap, /*!< in: heap where created */
- bool commit);/*!< in: true if the commit node should be
- added to the query graph */
+ dict_index_t* index,
+ mem_heap_t* heap,
+ const dict_add_v_col_t* add_v);
+
/***********************************************************//**
Creates a table. This is a high-level function used in SQL execution graphs.
-@return query thread to run next or NULL */
-UNIV_INTERN
+@return query thread to run next or NULL */
que_thr_t*
dict_create_table_step(
/*===================*/
- que_thr_t* thr); /*!< in: query thread */
+ que_thr_t* thr); /*!< in: query thread */
+
+/** Assign a new table ID and put it into the table cache and the transaction.
+@param[in,out] table Table that needs an ID
+@param[in,out] trx Transaction */
+void
+dict_table_assign_new_id(
+ dict_table_t* table,
+ trx_t* trx);
+
/***********************************************************//**
Creates an index. This is a high-level function used in SQL execution
graphs.
-@return query thread to run next or NULL */
-UNIV_INTERN
+@return query thread to run next or NULL */
que_thr_t*
dict_create_index_step(
/*===================*/
- que_thr_t* thr); /*!< in: query thread */
+ que_thr_t* thr); /*!< in: query thread */
+
+/***************************************************************//**
+Builds an index definition but doesn't update sys_table.
+@return DB_SUCCESS or error code */
+void
+dict_build_index_def(
+/*=================*/
+ const dict_table_t* table, /*!< in: table */
+ dict_index_t* index, /*!< in/out: index */
+ trx_t* trx); /*!< in/out: InnoDB transaction
+ handle */
+/***************************************************************//**
+Creates an index tree for the index if it is not a member of a cluster.
+Don't update SYSTEM TABLES.
+@return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
+dberr_t
+dict_create_index_tree(
+/*===================*/
+ dict_index_t* index, /*!< in/out: index */
+ const trx_t* trx); /*!< in: InnoDB transaction handle */
+
/*******************************************************************//**
-Truncates the index tree associated with a row in SYS_INDEXES table.
+Recreate the index tree associated with a row in SYS_INDEXES table.
@return new root page number, or FIL_NULL on failure */
-UNIV_INTERN
ulint
-dict_truncate_index_tree(
-/*=====================*/
- dict_table_t* table, /*!< in: the table the index belongs to */
- ulint space, /*!< in: 0=truncate,
- nonzero=create the index tree in the
- given tablespace */
- btr_pcur_t* pcur, /*!< in/out: persistent cursor pointing to
- record in the clustered index of
- SYS_INDEXES table. The cursor may be
- repositioned in this call. */
- mtr_t* mtr); /*!< in: mtr having the latch
- on the record page. The mtr may be
- committed and restarted in this call. */
+dict_recreate_index_tree(
+/*======================*/
+ const dict_table_t* table, /*!< in: the table the index
+ belongs to */
+ btr_pcur_t* pcur, /*!< in/out: persistent cursor pointing
+ to record in the clustered index of
+ SYS_INDEXES table. The cursor may be
+ repositioned in this call. */
+ mtr_t* mtr); /*!< in: mtr having the latch
+ on the record page. The mtr may be
+ committed and restarted in this call. */
+
+/** Drop the index tree associated with a row in SYS_INDEXES table.
+@param[in,out] rec SYS_INDEXES record
+@param[in,out] pcur persistent cursor on rec
+@param[in,out] trx dictionary transaction
+@param[in,out] mtr mini-transaction
+@return whether freeing the B-tree was attempted */
+bool dict_drop_index_tree(rec_t* rec, btr_pcur_t* pcur, trx_t* trx, mtr_t* mtr)
+ MY_ATTRIBUTE((nonnull));
+
+/***************************************************************//**
+Creates an index tree for the index if it is not a member of a cluster.
+Don't update SYSTEM TABLES.
+@return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
+dberr_t
+dict_create_index_tree_in_mem(
+/*==========================*/
+ dict_index_t* index, /*!< in/out: index */
+ const trx_t* trx); /*!< in: InnoDB transaction handle */
+
/*******************************************************************//**
-Drops the index tree associated with a row in SYS_INDEXES table. */
-UNIV_INTERN
+Truncates the index tree but don't update SYSTEM TABLES.
+@return DB_SUCCESS or error */
+dberr_t
+dict_truncate_index_tree_in_mem(
+/*============================*/
+ dict_index_t* index); /*!< in/out: index */
+
+/*******************************************************************//**
+Drops the index tree but don't update SYS_INDEXES table. */
void
-dict_drop_index_tree(
-/*=================*/
- rec_t* rec, /*!< in/out: record in the clustered index
- of SYS_INDEXES table */
- mtr_t* mtr); /*!< in: mtr having the latch on the record page */
+dict_drop_index_tree_in_mem(
+/*========================*/
+ const dict_index_t* index, /*!< in: index */
+ ulint page_no);/*!< in: index page-no */
+
/****************************************************************//**
Creates the foreign key constraints system tables inside InnoDB
at server bootstrap or server start if they are not found or are
not of the right form.
-@return DB_SUCCESS or error code */
-UNIV_INTERN
+@return DB_SUCCESS or error code */
dberr_t
dict_create_or_check_foreign_constraint_tables(void);
/*================================================*/
/********************************************************************//**
-Construct foreign key constraint defintion from data dictionary information.
-*/
-UNIV_INTERN
-char*
-dict_foreign_def_get(
-/*=================*/
- dict_foreign_t* foreign,/*!< in: foreign */
- trx_t* trx); /*!< in: trx */
-
-/********************************************************************//**
Generate a foreign key constraint name when it was not named by the user.
A generated constraint has a name of the format dbname/tablename_ibfk_NUMBER,
where the numbers start from 1, and are given locally for this table, that is,
@@ -133,11 +171,10 @@ UNIV_INLINE
dberr_t
dict_create_add_foreign_id(
/*=======================*/
- ulint* id_nr, /*!< in/out: number to use in id generation;
- incremented if used */
- const char* name, /*!< in: table name */
- dict_foreign_t* foreign)/*!< in/out: foreign key */
- MY_ATTRIBUTE((nonnull));
+ ulint* id_nr, /*!< in/out: number to use in id
+ generation; incremented if used */
+ const char* name, /*!< in: table name */
+ dict_foreign_t* foreign); /*!< in/out: foreign key */
/** Adds the given set of foreign key objects to the dictionary tables
in the database. This function does not modify the dictionary cache. The
@@ -149,7 +186,6 @@ the dictionary tables
local_fk_set belong to
@param[in,out] trx transaction
@return error code or DB_SUCCESS */
-UNIV_INTERN
dberr_t
dict_create_add_foreigns_to_dictionary(
/*===================================*/
@@ -157,38 +193,57 @@ dict_create_add_foreigns_to_dictionary(
const dict_table_t* table,
trx_t* trx)
MY_ATTRIBUTE((nonnull, warn_unused_result));
+
+/** Check if a foreign constraint is on columns server as base columns
+of any stored column. This is to prevent creating SET NULL or CASCADE
+constraint on such columns
+@param[in] local_fk_set set of foreign key objects, to be added to
+the dictionary tables
+@param[in] table table to which the foreign key objects in
+local_fk_set belong to
+@return true if yes, otherwise, false */
+bool
+dict_foreigns_has_s_base_col(
+ const dict_foreign_set& local_fk_set,
+ const dict_table_t* table);
+
/****************************************************************//**
Creates the tablespaces and datafiles system tables inside InnoDB
at server bootstrap or server start if they are not found or are
not of the right form.
-@return DB_SUCCESS or error code */
-UNIV_INTERN
+@return DB_SUCCESS or error code */
dberr_t
dict_create_or_check_sys_tablespace(void);
/*=====================================*/
-/********************************************************************//**
-Add a single tablespace definition to the data dictionary tables in the
-database.
-@return error code or DB_SUCCESS */
-UNIV_INTERN
+/** Creates the virtual column system tables inside InnoDB
+at server bootstrap or server start if they are not found or are
+not of the right form.
+@return DB_SUCCESS or error code */
dberr_t
-dict_create_add_tablespace_to_dictionary(
-/*=====================================*/
- ulint space, /*!< in: tablespace id */
- const char* name, /*!< in: tablespace name */
- ulint flags, /*!< in: tablespace flags */
- const char* path, /*!< in: tablespace path */
- trx_t* trx, /*!< in: transaction */
- bool commit); /*!< in: if true then commit the
- transaction */
+dict_create_or_check_sys_virtual();
+
+/** Put a tablespace definition into the data dictionary,
+replacing what was there previously.
+@param[in] space Tablespace id
+@param[in] name Tablespace name
+@param[in] flags Tablespace flags
+@param[in] path Tablespace path
+@param[in] trx Transaction
+@return error code or DB_SUCCESS */
+dberr_t
+dict_replace_tablespace_in_dictionary(
+ ulint space_id,
+ const char* name,
+ ulint flags,
+ const char* path,
+ trx_t* trx);
+
/********************************************************************//**
Add a foreign key definition to the data dictionary tables.
-@return error code or DB_SUCCESS */
-UNIV_INTERN
+@return error code or DB_SUCCESS */
dberr_t
dict_create_add_foreign_to_dictionary(
/*==================================*/
- dict_table_t* table, /*!< in: table */
const char* name, /*!< in: table name */
const dict_foreign_t* foreign,/*!< in: foreign key */
trx_t* trx) /*!< in/out: dictionary transaction */
@@ -206,68 +261,103 @@ dict_foreign_def_get(
/* Table create node structure */
struct tab_node_t{
- que_common_t common; /*!< node type: QUE_NODE_TABLE_CREATE */
- dict_table_t* table; /*!< table to create, built as a memory data
- structure with dict_mem_... functions */
- ins_node_t* tab_def; /* child node which does the insert of
- the table definition; the row to be inserted
- is built by the parent node */
- ins_node_t* col_def; /* child node which does the inserts of
- the column definitions; the row to be inserted
- is built by the parent node */
- commit_node_t* commit_node;
- /* child node which performs a commit after
- a successful table creation */
+ que_common_t common; /*!< node type: QUE_NODE_TABLE_CREATE */
+ dict_table_t* table; /*!< table to create, built as a
+ memory data structure with
+ dict_mem_... functions */
+ ins_node_t* tab_def; /*!< child node which does the insert of
+ the table definition; the row to be
+ inserted is built by the parent node */
+ ins_node_t* col_def; /*!< child node which does the inserts
+ of the column definitions; the row to
+ be inserted is built by the parent
+ node */
+ ins_node_t* v_col_def; /*!< child node which does the inserts
+ of the sys_virtual row definitions;
+ the row to be inserted is built by
+ the parent node */
/*----------------------*/
/* Local storage for this graph node */
- ulint state; /*!< node execution state */
- ulint col_no; /*!< next column definition to insert */
- ulint key_id; /*!< encryption key_id */
+ ulint state; /*!< node execution state */
+ ulint col_no; /*!< next column definition to insert */
+ uint key_id; /*!< encryption key_id */
fil_encryption_t mode; /*!< encryption mode */
- mem_heap_t* heap; /*!< memory heap used as auxiliary storage */
+ ulint base_col_no; /*!< next base column to insert */
+ mem_heap_t* heap; /*!< memory heap used as auxiliary
+ storage */
};
/* Table create node states */
#define TABLE_BUILD_TABLE_DEF 1
#define TABLE_BUILD_COL_DEF 2
-#define TABLE_COMMIT_WORK 3
+#define TABLE_BUILD_V_COL_DEF 3
#define TABLE_ADD_TO_CACHE 4
#define TABLE_COMPLETED 5
/* Index create node struct */
struct ind_node_t{
- que_common_t common; /*!< node type: QUE_NODE_INDEX_CREATE */
- dict_index_t* index; /*!< index to create, built as a memory data
- structure with dict_mem_... functions */
- ins_node_t* ind_def; /* child node which does the insert of
- the index definition; the row to be inserted
- is built by the parent node */
- ins_node_t* field_def; /* child node which does the inserts of
- the field definitions; the row to be inserted
- is built by the parent node */
- commit_node_t* commit_node;
- /* child node which performs a commit after
- a successful index creation */
+ que_common_t common; /*!< node type: QUE_NODE_INDEX_CREATE */
+ dict_index_t* index; /*!< index to create, built as a
+ memory data structure with
+ dict_mem_... functions */
+ ins_node_t* ind_def; /*!< child node which does the insert of
+ the index definition; the row to be
+ inserted is built by the parent node */
+ ins_node_t* field_def; /*!< child node which does the inserts
+ of the field definitions; the row to
+ be inserted is built by the parent
+ node */
/*----------------------*/
/* Local storage for this graph node */
- ulint state; /*!< node execution state */
- ulint page_no;/* root page number of the index */
- dict_table_t* table; /*!< table which owns the index */
- dtuple_t* ind_row;/* index definition row built */
- ulint field_no;/* next field definition to insert */
- mem_heap_t* heap; /*!< memory heap used as auxiliary storage */
+ ulint state; /*!< node execution state */
+ ulint page_no; /* root page number of the index */
+ dict_table_t* table; /*!< table which owns the index */
+ dtuple_t* ind_row; /* index definition row built */
+ ulint field_no; /* next field definition to insert */
+ mem_heap_t* heap; /*!< memory heap used as auxiliary
+ storage */
+ const dict_add_v_col_t*
+ add_v; /*!< new virtual columns that being
+ added along with an add index call */
};
+/** Compose a column number for a virtual column, stored in the "POS" field
+of Sys_columns. The column number includes both its virtual column sequence
+(the "nth" virtual column) and its actual column position in original table
+@param[in] v_pos virtual column sequence
+@param[in] col_pos column position in original table definition
+@return composed column position number */
+UNIV_INLINE
+ulint
+dict_create_v_col_pos(
+ ulint v_pos,
+ ulint col_pos);
+
+/** Get the column number for a virtual column (the column position in
+original table), stored in the "POS" field of Sys_columns
+@param[in] pos virtual column position
+@return column position in original table */
+UNIV_INLINE
+ulint
+dict_get_v_col_mysql_pos(
+ ulint pos);
+
+/** Get a virtual column sequence (the "nth" virtual column) for a
+virtual column, stord in the "POS" field of Sys_columns
+@param[in] pos virtual column position
+@return virtual column sequence */
+UNIV_INLINE
+ulint
+dict_get_v_col_pos(
+ ulint pos);
+
/* Index create node states */
#define INDEX_BUILD_INDEX_DEF 1
#define INDEX_BUILD_FIELD_DEF 2
#define INDEX_CREATE_INDEX_TREE 3
-#define INDEX_COMMIT_WORK 4
-#define INDEX_ADD_TO_CACHE 5
+#define INDEX_ADD_TO_CACHE 4
-#ifndef UNIV_NONINL
#include "dict0crea.ic"
-#endif
#endif
diff --git a/storage/innobase/include/dict0crea.ic b/storage/innobase/include/dict0crea.ic
index 59d6fb9d80c..5641206d313 100644
--- a/storage/innobase/include/dict0crea.ic
+++ b/storage/innobase/include/dict0crea.ic
@@ -24,20 +24,9 @@ Database object creation
Created 1/8/1996 Heikki Tuuri
*******************************************************/
-#include "mem0mem.h"
-
-/*********************************************************************//**
-Checks if a table name contains the string "/#sql" which denotes temporary
-tables in MySQL.
-@return true if temporary table */
-UNIV_INTERN
-bool
-row_is_mysql_tmp_table_name(
-/*========================*/
- const char* name) MY_ATTRIBUTE((warn_unused_result));
- /*!< in: table name in the form
- 'database/tablename' */
+#include "ha_prototypes.h"
+#include "mem0mem.h"
/********************************************************************//**
Generate a foreign key constraint name when it was not named by the user.
@@ -53,6 +42,8 @@ dict_create_add_foreign_id(
const char* name, /*!< in: table name */
dict_foreign_t* foreign)/*!< in/out: foreign key */
{
+ DBUG_ENTER("dict_create_add_foreign_id");
+
if (foreign->id == NULL) {
/* Generate a new constraint id */
ulint namelen = strlen(name);
@@ -60,7 +51,7 @@ dict_create_add_foreign_id(
mem_heap_alloc(foreign->heap,
namelen + 20));
- if (row_is_mysql_tmp_table_name(name)) {
+ if (dict_table_t::is_temporary_name(name)) {
/* no overflow if number < 1e13 */
sprintf(id, "%s_ibfk_%lu", name,
@@ -89,12 +80,57 @@ dict_create_add_foreign_id(
if (innobase_check_identifier_length(
strchr(id,'/') + 1)) {
- return(DB_IDENTIFIER_TOO_LONG);
+ DBUG_RETURN(DB_IDENTIFIER_TOO_LONG);
}
}
foreign->id = id;
+
+ DBUG_PRINT("dict_create_add_foreign_id",
+ ("generated foreign id: %s", id));
}
- return(DB_SUCCESS);
+
+ DBUG_RETURN(DB_SUCCESS);
+}
+
+/** Compose a column number for a virtual column, stored in the "POS" field
+of Sys_columns. The column number includes both its virtual column sequence
+(the "nth" virtual column) and its actual column position in original table
+@param[in] v_pos virtual column sequence
+@param[in] col_pos column position in original table definition
+@return composed column position number */
+UNIV_INLINE
+ulint
+dict_create_v_col_pos(
+ ulint v_pos,
+ ulint col_pos)
+{
+ ut_ad(v_pos <= REC_MAX_N_FIELDS);
+ ut_ad(col_pos <= REC_MAX_N_FIELDS);
+
+ return(((v_pos + 1) << 16) + col_pos);
}
+/** Get the column number for a virtual column (the column position in
+original table), stored in the "POS" field of Sys_columns
+@param[in] pos virtual column position
+@return column position in original table */
+UNIV_INLINE
+ulint
+dict_get_v_col_mysql_pos(
+ ulint pos)
+{
+ return(pos & 0xFFFF);
+}
+
+/** Get a virtual column sequence (the "nth" virtual column) for a
+virtual column, stord in the "POS" field of Sys_columns
+@param[in] pos virtual column position
+@return virtual column sequence */
+UNIV_INLINE
+ulint
+dict_get_v_col_pos(
+ ulint pos)
+{
+ return((pos >> 16) - 1);
+}
diff --git a/storage/innobase/include/dict0defrag_bg.h b/storage/innobase/include/dict0defrag_bg.h
new file mode 100644
index 00000000000..ae017932b9f
--- /dev/null
+++ b/storage/innobase/include/dict0defrag_bg.h
@@ -0,0 +1,108 @@
+/*****************************************************************************
+
+Copyright (c) 2016, 2018, MariaDB Corporation.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/dict0defrag_bg.h
+Code used for background table and index
+defragmentation
+
+Created 25/08/2016 Jan Lindström
+*******************************************************/
+
+#ifndef dict0defrag_bg_h
+#define dict0defrag_bg_h
+
+#include "os0event.h"
+#include "os0thread.h"
+#include "dict0types.h"
+
+/** Indices whose defrag stats need to be saved to persistent storage.*/
+struct defrag_pool_item_t {
+ table_id_t table_id;
+ index_id_t index_id;
+};
+
+/** Allocator type, used by std::vector */
+typedef ut_allocator<defrag_pool_item_t>
+ defrag_pool_allocator_t;
+
+/** The multitude of tables to be defragmented- an STL vector */
+typedef std::vector<defrag_pool_item_t, defrag_pool_allocator_t>
+ defrag_pool_t;
+
+/** Pool where we store information on which tables are to be processed
+by background defragmentation. */
+extern defrag_pool_t defrag_pool;
+
+/*****************************************************************//**
+Initialize the defrag pool, called once during thread initialization. */
+void
+dict_defrag_pool_init(void);
+/*========================*/
+
+/*****************************************************************//**
+Free the resources occupied by the defrag pool, called once during
+thread de-initialization. */
+void
+dict_defrag_pool_deinit(void);
+/*==========================*/
+
+/*****************************************************************//**
+Add an index in a table to the defrag pool, which is processed by the
+background stats gathering thread. Only the table id and index id are
+added to the list, so the table can be closed after being enqueued and
+it will be opened when needed. If the table or index does not exist later
+(has been DROPped), then it will be removed from the pool and skipped. */
+void
+dict_stats_defrag_pool_add(
+/*=======================*/
+ const dict_index_t* index); /*!< in: table to add */
+
+/*****************************************************************//**
+Delete a given index from the auto defrag pool. */
+void
+dict_stats_defrag_pool_del(
+/*=======================*/
+ const dict_table_t* table, /*!<in: if given, remove
+ all entries for the table */
+ const dict_index_t* index); /*!< in: index to remove */
+
+/*****************************************************************//**
+Get the first index that has been added for updating persistent defrag
+stats and eventually save its stats. */
+void
+dict_defrag_process_entries_from_defrag_pool();
+/*===========================================*/
+
+/*********************************************************************//**
+Save defragmentation result.
+@return DB_SUCCESS or error code */
+dberr_t
+dict_stats_save_defrag_summary(
+/*============================*/
+ dict_index_t* index) /*!< in: index */
+ MY_ATTRIBUTE((warn_unused_result));
+
+/*********************************************************************//**
+Save defragmentation stats for a given index.
+@return DB_SUCCESS or error code */
+dberr_t
+dict_stats_save_defrag_stats(
+/*============================*/
+ dict_index_t* index); /*!< in: index */
+#endif /* dict0defrag_bg_h */
diff --git a/storage/innobase/include/dict0dict.h b/storage/innobase/include/dict0dict.h
index 93ca3692141..565ea77374d 100644
--- a/storage/innobase/include/dict0dict.h
+++ b/storage/innobase/include/dict0dict.h
@@ -1,8 +1,8 @@
/*****************************************************************************
-Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1996, 2018, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2012, Facebook Inc.
-Copyright (c) 2013, 2018, MariaDB Corporation.
+Copyright (c) 2013, 2019, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -28,42 +28,19 @@ Created 1/8/1996 Heikki Tuuri
#ifndef dict0dict_h
#define dict0dict_h
-#include "univ.i"
-#include "db0err.h"
-#include "dict0types.h"
-#include "dict0mem.h"
-#include "data0type.h"
#include "data0data.h"
-#include "mem0mem.h"
-#include "rem0types.h"
-#include "ut0mem.h"
-#include "ut0lst.h"
-#include "hash0hash.h"
-#include "ut0rnd.h"
-#include "ut0byte.h"
-#include "trx0types.h"
-#include "row0types.h"
+#include "dict0mem.h"
#include "fsp0fsp.h"
+#include <deque>
#include "dict0pagecompress.h"
extern bool innodb_table_stats_not_found;
extern bool innodb_index_stats_not_found;
-#ifndef UNIV_HOTBACKUP
-# include "sync0sync.h"
-# include "sync0rw.h"
-/******************************************************************//**
-Makes all characters in a NUL-terminated UTF-8 string lower case. */
-UNIV_INTERN
-void
-dict_casedn_str(
-/*============*/
- char* a) /*!< in/out: string to put in lower case */
- MY_ATTRIBUTE((nonnull));
+#include "sync0rw.h"
/********************************************************************//**
Get the database name length in a table name.
-@return database name length */
-UNIV_INTERN
+@return database name length */
ulint
dict_get_db_name_len(
/*=================*/
@@ -75,7 +52,6 @@ Open a table from its database and table name, this is currently used by
foreign constraint parser to get the referenced table.
@return complete table name with database and table name, allocated from
heap memory passed in */
-UNIV_INTERN
char*
dict_get_referenced_table(
/*======================*/
@@ -88,7 +64,6 @@ dict_get_referenced_table(
mem_heap_t* heap); /*!< in: heap memory */
/*********************************************************************//**
Frees a foreign key struct. */
-
void
dict_foreign_free(
/*==============*/
@@ -98,7 +73,6 @@ Finds the highest [number] for foreign key constraints of the table. Looks
only at the >= 4.0.18-format id's, which are of the form
databasename/tablename_ibfk_[number].
@return highest number, 0 if table has no new format foreign key constraints */
-UNIV_INTERN
ulint
dict_table_get_highest_foreign_id(
/*==============================*/
@@ -106,8 +80,7 @@ dict_table_get_highest_foreign_id(
memory cache */
/********************************************************************//**
Return the end of table name where we have removed dbname and '/'.
-@return table name */
-UNIV_INTERN
+@return table name */
const char*
dict_remove_db_name(
/*================*/
@@ -130,29 +103,22 @@ enum dict_table_op_t {
/**********************************************************************//**
Returns a table object based on table id.
-@return table, NULL if does not exist */
-UNIV_INTERN
+@return table, NULL if does not exist */
dict_table_t*
dict_table_open_on_id(
/*==================*/
table_id_t table_id, /*!< in: table id */
ibool dict_locked, /*!< in: TRUE=data dictionary locked */
dict_table_op_t table_op) /*!< in: operation to perform */
- __attribute__((warn_unused_result));
+ MY_ATTRIBUTE((warn_unused_result));
/**********************************************************************//**
Returns a table object based on table id.
@return table, NULL if does not exist */
-UNIV_INTERN
-dict_table_t*
-dict_table_open_on_index_id(
-/*==================*/
- table_id_t table_id, /*!< in: table id */
- bool dict_locked) /*!< in: TRUE=data dictionary locked */
+dict_table_t* dict_table_open_on_index_id(index_id_t index_id)
__attribute__((warn_unused_result));
/********************************************************************//**
Decrements the count of open handles to a table. */
-UNIV_INTERN
void
dict_table_close(
/*=============*/
@@ -162,22 +128,22 @@ dict_table_close(
indexes after an aborted online
index creation */
MY_ATTRIBUTE((nonnull));
+/*********************************************************************//**
+Closes the only open handle to a table and drops a table while assuring
+that dict_sys->mutex is held the whole time. This assures that the table
+is not evicted after the close when the count of open handles goes to zero.
+Because dict_sys->mutex is held, we do not need to call
+dict_table_prevent_eviction(). */
+void
+dict_table_close_and_drop(
+/*======================*/
+ trx_t* trx, /*!< in: data dictionary transaction */
+ dict_table_t* table); /*!< in/out: table */
/**********************************************************************//**
Inits the data dictionary module. */
-UNIV_INTERN
void
dict_init(void);
-/*===========*/
-/********************************************************************//**
-Gets the space id of every table of the data dictionary and makes a linear
-list and a hash table of them to the data dictionary cache. This function
-can be called at database startup if we did not need to do a crash recovery.
-In crash recovery we must scan the space id's from the .ibd files in MySQL
-database directories. */
-UNIV_INTERN
-void
-dict_load_space_id_list(void);
-/*=========================*/
+
/*********************************************************************//**
Gets the minimum number of bytes per character.
@return minimum multi-byte char size, in bytes */
@@ -203,8 +169,8 @@ void
dict_col_copy_type(
/*===============*/
const dict_col_t* col, /*!< in: column */
- dtype_t* type) /*!< out: data type */
- MY_ATTRIBUTE((nonnull));
+ dtype_t* type); /*!< out: data type */
+
/**********************************************************************//**
Determine bytes of column prefix to be stored in the undo log. Please
note if the table format is UNIV_FORMAT_A (< UNIV_FORMAT_B), no prefix
@@ -218,11 +184,22 @@ dict_max_field_len_store_undo(
const dict_col_t* col) /*!< in: column which index prefix
is based on */
MY_ATTRIBUTE((nonnull, warn_unused_result));
-#endif /* !UNIV_HOTBACKUP */
+
+/** Determine maximum bytes of a virtual column need to be stored
+in the undo log.
+@param[in] table dict_table_t for the table
+@param[in] col_no virtual column number
+@return maximum bytes of virtual column to be stored in the undo log */
+UNIV_INLINE
+ulint
+dict_max_v_field_len_store_undo(
+ dict_table_t* table,
+ ulint col_no);
+
#ifdef UNIV_DEBUG
/*********************************************************************//**
Assert that a column and a data type match.
-@return TRUE */
+@return TRUE */
UNIV_INLINE
ibool
dict_col_type_assert_equal(
@@ -231,10 +208,10 @@ dict_col_type_assert_equal(
const dtype_t* type) /*!< in: data type */
MY_ATTRIBUTE((nonnull, warn_unused_result));
#endif /* UNIV_DEBUG */
-#ifndef UNIV_HOTBACKUP
+
/***********************************************************************//**
Returns the minimum size of the column.
-@return minimum size */
+@return minimum size */
UNIV_INLINE
ulint
dict_col_get_min_size(
@@ -243,7 +220,7 @@ dict_col_get_min_size(
MY_ATTRIBUTE((nonnull, warn_unused_result));
/***********************************************************************//**
Returns the maximum size of the column.
-@return maximum size */
+@return maximum size */
UNIV_INLINE
ulint
dict_col_get_max_size(
@@ -252,7 +229,7 @@ dict_col_get_max_size(
MY_ATTRIBUTE((nonnull, warn_unused_result));
/***********************************************************************//**
Returns the size of a fixed size column, 0 if not a fixed size column.
-@return fixed size, or 0 */
+@return fixed size, or 0 */
UNIV_INLINE
ulint
dict_col_get_fixed_size(
@@ -263,7 +240,7 @@ dict_col_get_fixed_size(
/***********************************************************************//**
Returns the ROW_FORMAT=REDUNDANT stored SQL NULL size of a column.
For fixed length types it is the fixed length of the type, otherwise 0.
-@return SQL null storage size in ROW_FORMAT=REDUNDANT */
+@return SQL null storage size in ROW_FORMAT=REDUNDANT */
UNIV_INLINE
ulint
dict_col_get_sql_null_size(
@@ -273,7 +250,7 @@ dict_col_get_sql_null_size(
MY_ATTRIBUTE((nonnull, warn_unused_result));
/*********************************************************************//**
Gets the column number.
-@return col->ind, table column position (starting from 0) */
+@return col->ind, table column position (starting from 0) */
UNIV_INLINE
ulint
dict_col_get_no(
@@ -289,11 +266,22 @@ dict_col_get_clust_pos(
const dict_col_t* col, /*!< in: table column */
const dict_index_t* clust_index) /*!< in: clustered index */
MY_ATTRIBUTE((nonnull, warn_unused_result));
+
+/** Gets the column position in the given index.
+@param[in] col table column
+@param[in] index index to be searched for column
+@return position of column in the given index. */
+UNIV_INLINE
+ulint
+dict_col_get_index_pos(
+ const dict_col_t* col,
+ const dict_index_t* index)
+ MY_ATTRIBUTE((nonnull, warn_unused_result));
+
/****************************************************************//**
If the given column name is reserved for InnoDB system columns, return
TRUE.
-@return TRUE if name is reserved */
-UNIV_INTERN
+@return TRUE if name is reserved */
ibool
dict_col_name_is_reserved(
/*======================*/
@@ -301,89 +289,83 @@ dict_col_name_is_reserved(
MY_ATTRIBUTE((nonnull, warn_unused_result));
/********************************************************************//**
Acquire the autoinc lock. */
-UNIV_INTERN
void
dict_table_autoinc_lock(
/*====================*/
dict_table_t* table) /*!< in/out: table */
MY_ATTRIBUTE((nonnull));
-/********************************************************************//**
-Unconditionally set the autoinc counter. */
-UNIV_INTERN
+/** Unconditionally set the AUTO_INCREMENT counter.
+@param[in,out] table table or partition
+@param[in] value next available AUTO_INCREMENT value */
+MY_ATTRIBUTE((nonnull))
+UNIV_INLINE
void
-dict_table_autoinc_initialize(
-/*==========================*/
- dict_table_t* table, /*!< in/out: table */
- ib_uint64_t value) /*!< in: next value to assign to a row */
- MY_ATTRIBUTE((nonnull));
+dict_table_autoinc_initialize(dict_table_t* table, ib_uint64_t value)
+{
+ ut_ad(dict_table_autoinc_own(table));
+ table->autoinc = value;
+}
-/** Store autoinc value when the table is evicted.
-@param[in] table table evicted */
-UNIV_INTERN
-void
-dict_table_autoinc_store(
- const dict_table_t* table);
+/**
+@param[in] table table or partition
+@return the next AUTO_INCREMENT counter value
+@retval 0 if AUTO_INCREMENT is not yet initialized */
+MY_ATTRIBUTE((nonnull, warn_unused_result))
+UNIV_INLINE
+ib_uint64_t
+dict_table_autoinc_read(const dict_table_t* table)
+{
+ ut_ad(dict_table_autoinc_own(table));
+ return(table->autoinc);
+}
-/** Restore autoinc value when the table is loaded.
-@param[in] table table loaded */
-UNIV_INTERN
-void
-dict_table_autoinc_restore(
- dict_table_t* table);
+/** Update the AUTO_INCREMENT sequence if the value supplied is greater
+than the current value.
+@param[in,out] table table or partition
+@param[in] value AUTO_INCREMENT value that was assigned to a row
+@return whether the AUTO_INCREMENT sequence was updated */
+MY_ATTRIBUTE((nonnull))
+UNIV_INLINE
+bool
+dict_table_autoinc_update_if_greater(dict_table_t* table, ib_uint64_t value)
+{
+ ut_ad(dict_table_autoinc_own(table));
-/********************************************************************//**
-Reads the next autoinc value (== autoinc counter value), 0 if not yet
-initialized.
-@return value for a new row, or 0 */
-UNIV_INTERN
-ib_uint64_t
-dict_table_autoinc_read(
-/*====================*/
- const dict_table_t* table) /*!< in: table */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-/********************************************************************//**
-Updates the autoinc counter if the value supplied is greater than the
-current value. */
-UNIV_INTERN
-void
-dict_table_autoinc_update_if_greater(
-/*=================================*/
+ if (value > table->autoinc) {
+
+ table->autoinc = value;
+ return(true);
+ }
+
+ return(false);
+}
- dict_table_t* table, /*!< in/out: table */
- ib_uint64_t value) /*!< in: value which was assigned to a row */
- MY_ATTRIBUTE((nonnull));
/********************************************************************//**
Release the autoinc lock. */
-UNIV_INTERN
void
dict_table_autoinc_unlock(
/*======================*/
dict_table_t* table) /*!< in/out: table */
MY_ATTRIBUTE((nonnull));
-#endif /* !UNIV_HOTBACKUP */
/**********************************************************************//**
Adds system columns to a table object. */
-UNIV_INTERN
void
dict_table_add_system_columns(
/*==========================*/
dict_table_t* table, /*!< in/out: table */
mem_heap_t* heap) /*!< in: temporary heap */
MY_ATTRIBUTE((nonnull));
-#ifndef UNIV_HOTBACKUP
/**********************************************************************//**
Adds a table object to the dictionary cache. */
-UNIV_INTERN
void
dict_table_add_to_cache(
/*====================*/
dict_table_t* table, /*!< in: table */
- ibool can_be_evicted, /*!< in: TRUE if can be evicted*/
+ bool can_be_evicted, /*!< in: whether can be evicted*/
mem_heap_t* heap) /*!< in: temporary heap */
MY_ATTRIBUTE((nonnull));
/**********************************************************************//**
Removes a table object from the dictionary cache. */
-UNIV_INTERN
void
dict_table_remove_from_cache(
/*=========================*/
@@ -391,40 +373,43 @@ dict_table_remove_from_cache(
MY_ATTRIBUTE((nonnull));
/**********************************************************************//**
Removes a table object from the dictionary cache. */
-UNIV_INTERN
void
dict_table_remove_from_cache_low(
/*=============================*/
dict_table_t* table, /*!< in, own: table */
- ibool lru_evict); /*!< in: TRUE if table being evicted
+ ibool lru_evict) /*!< in: TRUE if table being evicted
to make room in the table LRU list */
+ MY_ATTRIBUTE((nonnull));
/**********************************************************************//**
Renames a table object.
-@return TRUE if success */
-UNIV_INTERN
+@return TRUE if success */
dberr_t
dict_table_rename_in_cache(
/*=======================*/
dict_table_t* table, /*!< in/out: table */
const char* new_name, /*!< in: new name */
- ibool rename_also_foreigns)
+ bool rename_also_foreigns,
/*!< in: in ALTER TABLE we want
to preserve the original table name
in constraints which reference it */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-/**********************************************************************//**
-Removes an index from the dictionary cache. */
-UNIV_INTERN
+ bool replace_new_file = false)
+ /*!< in: whether to replace the
+ file with the new name
+ (as part of rolling back TRUNCATE) */
+ MY_ATTRIBUTE((nonnull));
+
+/** Removes an index from the dictionary cache.
+@param[in,out] table table whose index to remove
+@param[in,out] index index to remove, this object is destroyed and must not
+be accessed by the caller afterwards */
void
dict_index_remove_from_cache(
-/*=========================*/
- dict_table_t* table, /*!< in/out: table */
- dict_index_t* index) /*!< in, own: index */
- MY_ATTRIBUTE((nonnull));
+ dict_table_t* table,
+ dict_index_t* index);
+
/**********************************************************************//**
Change the id of a table object in the dictionary cache. This is used in
DISCARD TABLESPACE. */
-UNIV_INTERN
void
dict_table_change_id_in_cache(
/*==========================*/
@@ -433,7 +418,6 @@ dict_table_change_id_in_cache(
MY_ATTRIBUTE((nonnull));
/**********************************************************************//**
Removes a foreign constraint struct from the dictionary cache. */
-UNIV_INTERN
void
dict_foreign_remove_from_cache(
/*===========================*/
@@ -444,8 +428,7 @@ Adds a foreign key constraint object to the dictionary cache. May free
the object if there already is an object with the same identifier in.
At least one of foreign table or referenced table must already be in
the dictionary cache!
-@return DB_SUCCESS or error code */
-UNIV_INTERN
+@return DB_SUCCESS or error code */
dberr_t
dict_foreign_add_to_cache(
/*======================*/
@@ -462,8 +445,7 @@ dict_foreign_add_to_cache(
MY_ATTRIBUTE((nonnull(1), warn_unused_result));
/*********************************************************************//**
Checks if a table is referenced by foreign keys.
-@return TRUE if table is referenced by a foreign key */
-UNIV_INTERN
+@return TRUE if table is referenced by a foreign key */
ibool
dict_table_is_referenced_by_foreign_key(
/*====================================*/
@@ -473,7 +455,6 @@ dict_table_is_referenced_by_foreign_key(
Replace the index passed in with another equivalent index in the
foreign key lists of the table.
@return whether all replacements were found */
-UNIV_INTERN
bool
dict_foreign_replace_index(
/*=======================*/
@@ -483,51 +464,38 @@ dict_foreign_replace_index(
to use table->col_names */
const dict_index_t* index) /*!< in: index to be replaced */
MY_ATTRIBUTE((nonnull(1,3), warn_unused_result));
-/**********************************************************************//**
-Determines whether a string starts with the specified keyword.
-@return TRUE if str starts with keyword */
-UNIV_INTERN
-ibool
-dict_str_starts_with_keyword(
-/*=========================*/
- THD* thd, /*!< in: MySQL thread handle */
- const char* str, /*!< in: string to scan for keyword */
- const char* keyword) /*!< in: keyword to look for */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-/*********************************************************************//**
-Scans a table create SQL string and adds to the data dictionary
+/** Scans a table create SQL string and adds to the data dictionary
the foreign key constraints declared in the string. This function
should be called after the indexes for a table have been created.
Each foreign key constraint must be accompanied with indexes in
bot participating tables. The indexes are allowed to contain more
fields than mentioned in the constraint.
-@return error code or DB_SUCCESS */
-UNIV_INTERN
+
+@param[in] trx transaction
+@param[in] sql_string table create statement where
+ foreign keys are declared like:
+ FOREIGN KEY (a, b) REFERENCES table2(c, d),
+ table2 can be written also with the database
+ name before it: test.table2; the default
+ database id the database of parameter name
+@param[in] sql_length length of sql_string
+@param[in] name table full name in normalized form
+@param[in] reject_fks if TRUE, fail with error code
+ DB_CANNOT_ADD_CONSTRAINT if any
+ foreign keys are found.
+@return error code or DB_SUCCESS */
dberr_t
dict_create_foreign_constraints(
-/*============================*/
- trx_t* trx, /*!< in: transaction */
- const char* sql_string, /*!< in: table create statement where
- foreign keys are declared like:
- FOREIGN KEY (a, b) REFERENCES
- table2(c, d), table2 can be written
- also with the database
- name before it: test.table2; the
- default database id the database of
- parameter name */
- size_t sql_length, /*!< in: length of sql_string */
- const char* name, /*!< in: table full name in the
- normalized form
- database_name/table_name */
- ibool reject_fks) /*!< in: if TRUE, fail with error
- code DB_CANNOT_ADD_CONSTRAINT if
- any foreign keys are found. */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
+ trx_t* trx,
+ const char* sql_string,
+ size_t sql_length,
+ const char* name,
+ ibool reject_fks)
+ MY_ATTRIBUTE((warn_unused_result));
/**********************************************************************//**
Parses the CONSTRAINT id's to be dropped in an ALTER TABLE statement.
@return DB_SUCCESS or DB_CANNOT_DROP_CONSTRAINT if syntax error or the
constraint id does not match */
-UNIV_INTERN
dberr_t
dict_foreign_parse_drop_constraints(
/*================================*/
@@ -545,27 +513,40 @@ Returns a table object and increments its open handle count.
NOTE! This is a high-level function to be used mainly from outside the
'dict' directory. Inside this directory dict_table_get_low
is usually the appropriate function.
-@return table, NULL if does not exist */
-UNIV_INTERN
+@param[in] table_name Table name
+@param[in] dict_locked TRUE=data dictionary locked
+@param[in] try_drop TRUE=try to drop any orphan indexes after
+ an aborted online index creation
+@param[in] ignore_err error to be ignored when loading the table
+@return table, NULL if does not exist */
dict_table_t*
dict_table_open_on_name(
-/*====================*/
- const char* table_name, /*!< in: table name */
- ibool dict_locked, /*!< in: TRUE=data dictionary locked */
- ibool try_drop, /*!< in: TRUE=try to drop any orphan
- indexes after an aborted online
- index creation */
- dict_err_ignore_t
- ignore_err) /*!< in: error to be ignored when
- loading the table */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
+ const char* table_name,
+ ibool dict_locked,
+ ibool try_drop,
+ dict_err_ignore_t ignore_err)
+ MY_ATTRIBUTE((warn_unused_result));
+
+/** Outcome of dict_foreign_find_index() or dict_foreign_qualify_index() */
+enum fkerr_t
+{
+ /** A backing index was found for a FOREIGN KEY constraint */
+ FK_SUCCESS = 0,
+ /** There is no index that covers the columns in the constraint. */
+ FK_INDEX_NOT_FOUND,
+ /** The index is for a prefix index, not a full column. */
+ FK_IS_PREFIX_INDEX,
+ /** A condition of SET NULL conflicts with a NOT NULL column. */
+ FK_COL_NOT_NULL,
+ /** The column types do not match */
+ FK_COLS_NOT_EQUAL
+};
/*********************************************************************//**
Tries to find an index whose first fields are the columns in the array,
in the same order and is not marked for deletion and is not the same
as types_idx.
-@return matching index, NULL if not found */
-UNIV_INTERN
+@return matching index, NULL if not found */
dict_index_t*
dict_foreign_find_index(
/*====================*/
@@ -587,12 +568,12 @@ dict_foreign_find_index(
/*!< in: nonzero if none of
the columns must be declared
NOT NULL */
- ulint* error, /*!< out: error code */
- ulint* err_col_no,
+ fkerr_t* error = NULL, /*!< out: error code */
+ ulint* err_col_no = NULL,
/*!< out: column number where
error happened */
- dict_index_t** err_index)
- /*!< out: index where error
+ dict_index_t** err_index = NULL)
+ /*!< out: index where error
happened */
MY_ATTRIBUTE((nonnull(1,3), warn_unused_result));
@@ -600,24 +581,36 @@ dict_foreign_find_index(
Returns a column's name.
@return column name. NOTE: not guaranteed to stay valid if table is
modified in any way (columns added, etc.). */
-UNIV_INTERN
const char*
dict_table_get_col_name(
/*====================*/
const dict_table_t* table, /*!< in: table */
ulint col_nr) /*!< in: column number */
MY_ATTRIBUTE((nonnull, warn_unused_result));
-/**********************************************************************//**
-Prints a table data. */
-UNIV_INTERN
-void
-dict_table_print(
-/*=============*/
- dict_table_t* table) /*!< in: table */
- MY_ATTRIBUTE((nonnull));
+
+/** Returns a virtual column's name.
+@param[in] table table object
+@param[in] col_nr virtual column number(nth virtual column)
+@return column name. */
+const char*
+dict_table_get_v_col_name(
+ const dict_table_t* table,
+ ulint col_nr);
+
+/** Check if the table has a given column.
+@param[in] table table object
+@param[in] col_name column name
+@param[in] col_nr column number guessed, 0 as default
+@return column number if the table has the specified column,
+otherwise table->n_def */
+ulint
+dict_table_has_column(
+ const dict_table_t* table,
+ const char* col_name,
+ ulint col_nr = 0);
+
/**********************************************************************//**
Outputs info on foreign keys of a table. */
-UNIV_INTERN
std::string
dict_print_info_on_foreign_keys(
/*============================*/
@@ -627,32 +620,22 @@ dict_print_info_on_foreign_keys(
of SHOW TABLE STATUS */
trx_t* trx, /*!< in: transaction */
dict_table_t* table); /*!< in: table */
+
/**********************************************************************//**
Outputs info on a foreign key of a table in a format suitable for
CREATE TABLE. */
-UNIV_INTERN
std::string
dict_print_info_on_foreign_key_in_create_format(
/*============================================*/
trx_t* trx, /*!< in: transaction */
dict_foreign_t* foreign, /*!< in: foreign key constraint */
ibool add_newline); /*!< in: whether to add a newline */
-/********************************************************************//**
-Displays the names of the index and the table. */
-UNIV_INTERN
-void
-dict_index_name_print(
-/*==================*/
- FILE* file, /*!< in: output stream */
- const trx_t* trx, /*!< in: transaction */
- const dict_index_t* index) /*!< in: index to print */
- MY_ATTRIBUTE((nonnull(1,3)));
+
/*********************************************************************//**
Tries to find an index whose first fields are the columns in the array,
in the same order and is not marked for deletion and is not the same
as types_idx.
-@return matching index, NULL if not found */
-UNIV_INTERN
+@return matching index, NULL if not found */
bool
dict_foreign_qualify_index(
/*====================*/
@@ -675,7 +658,7 @@ dict_foreign_qualify_index(
/*!< in: nonzero if none of
the columns must be declared
NOT NULL */
- ulint* error, /*!< out: error code */
+ fkerr_t* error, /*!< out: error code */
ulint* err_col_no,
/*!< out: column number where
error happened */
@@ -686,7 +669,7 @@ dict_foreign_qualify_index(
#ifdef UNIV_DEBUG
/********************************************************************//**
Gets the first index on the table (the clustered index).
-@return index, NULL if none exists */
+@return index, NULL if none exists */
UNIV_INLINE
dict_index_t*
dict_table_get_first_index(
@@ -695,7 +678,7 @@ dict_table_get_first_index(
MY_ATTRIBUTE((nonnull, warn_unused_result));
/********************************************************************//**
Gets the last index on the table.
-@return index, NULL if none exists */
+@return index, NULL if none exists */
UNIV_INLINE
dict_index_t*
dict_table_get_last_index(
@@ -704,7 +687,7 @@ dict_table_get_last_index(
MY_ATTRIBUTE((nonnull, warn_unused_result));
/********************************************************************//**
Gets the next index on the table.
-@return index, NULL if none left */
+@return index, NULL if none left */
UNIV_INLINE
dict_index_t*
dict_table_get_next_index(
@@ -716,11 +699,10 @@ dict_table_get_next_index(
# define dict_table_get_last_index(table) UT_LIST_GET_LAST((table)->indexes)
# define dict_table_get_next_index(index) UT_LIST_GET_NEXT(indexes, index)
#endif /* UNIV_DEBUG */
-#endif /* !UNIV_HOTBACKUP */
/* Skip corrupted index */
#define dict_table_skip_corrupt_index(index) \
- while (index && dict_index_is_corrupted(index)) { \
+ while (index && index->is_corrupted()) { \
index = dict_table_get_next_index(index); \
}
@@ -733,90 +715,119 @@ do { \
/********************************************************************//**
Check whether the index is the clustered index.
-@return nonzero for clustered index, zero for other indexes */
+@return nonzero for clustered index, zero for other indexes */
UNIV_INLINE
ulint
dict_index_is_clust(
/*================*/
const dict_index_t* index) /*!< in: index */
- MY_ATTRIBUTE((nonnull, pure, warn_unused_result));
+ MY_ATTRIBUTE((warn_unused_result));
+
+/** Check if index is auto-generated clustered index.
+@param[in] index index
+
+@return true if index is auto-generated clustered index. */
+UNIV_INLINE
+bool
+dict_index_is_auto_gen_clust(
+ const dict_index_t* index);
+
/********************************************************************//**
Check whether the index is unique.
-@return nonzero for unique index, zero for other indexes */
+@return nonzero for unique index, zero for other indexes */
UNIV_INLINE
ulint
dict_index_is_unique(
/*=================*/
const dict_index_t* index) /*!< in: index */
- MY_ATTRIBUTE((nonnull, pure, warn_unused_result));
+ MY_ATTRIBUTE((warn_unused_result));
+/********************************************************************//**
+Check whether the index is a Spatial Index.
+@return nonzero for Spatial Index, zero for other indexes */
+UNIV_INLINE
+ulint
+dict_index_is_spatial(
+/*==================*/
+ const dict_index_t* index) /*!< in: index */
+ MY_ATTRIBUTE((warn_unused_result));
+
+#define dict_index_has_virtual(index) (index)->has_virtual()
+
/********************************************************************//**
Check whether the index is the insert buffer tree.
-@return nonzero for insert buffer, zero for other indexes */
+@return nonzero for insert buffer, zero for other indexes */
UNIV_INLINE
ulint
dict_index_is_ibuf(
/*===============*/
const dict_index_t* index) /*!< in: index */
- MY_ATTRIBUTE((nonnull, pure, warn_unused_result));
+ MY_ATTRIBUTE((warn_unused_result));
/********************************************************************//**
Check whether the index is a secondary index or the insert buffer tree.
-@return nonzero for insert buffer, zero for other indexes */
+@return nonzero for insert buffer, zero for other indexes */
UNIV_INLINE
ulint
dict_index_is_sec_or_ibuf(
/*======================*/
const dict_index_t* index) /*!< in: index */
- MY_ATTRIBUTE((nonnull, pure, warn_unused_result));
+ MY_ATTRIBUTE((warn_unused_result));
-/************************************************************************
-Gets the all the FTS indexes for the table. NOTE: must not be called for
-tables which do not have an FTS-index. */
-UNIV_INTERN
+/** Get all the FTS indexes on a table.
+@param[in] table table
+@param[out] indexes all FTS indexes on this table
+@return number of FTS indexes */
ulint
dict_table_get_all_fts_indexes(
-/*===========================*/
- /* out: number of indexes collected */
- dict_table_t* table, /* in: table */
- ib_vector_t* indexes)/* out: vector for collecting FTS indexes */
- MY_ATTRIBUTE((nonnull));
+ const dict_table_t* table,
+ ib_vector_t* indexes);
+
/********************************************************************//**
-Gets the number of user-defined columns in a table in the dictionary
-cache.
-@return number of user-defined (e.g., not ROW_ID) columns of a table */
+Gets the number of user-defined non-virtual columns in a table in the
+dictionary cache.
+@return number of user-defined (e.g., not ROW_ID) non-virtual
+columns of a table */
UNIV_INLINE
ulint
dict_table_get_n_user_cols(
/*=======================*/
const dict_table_t* table) /*!< in: table */
- MY_ATTRIBUTE((nonnull, pure, warn_unused_result));
-/********************************************************************//**
-Gets the number of system columns in a table in the dictionary cache.
-@return number of system (e.g., ROW_ID) columns of a table */
-UNIV_INLINE
-ulint
-dict_table_get_n_sys_cols(
-/*======================*/
- const dict_table_t* table) /*!< in: table */
- MY_ATTRIBUTE((nonnull, pure, warn_unused_result));
+ MY_ATTRIBUTE((warn_unused_result));
/********************************************************************//**
-Gets the number of all columns (also system) in a table in the dictionary
-cache.
-@return number of columns of a table */
+Gets the number of all non-virtual columns (also system) in a table
+in the dictionary cache.
+@return number of columns of a table */
UNIV_INLINE
ulint
dict_table_get_n_cols(
/*==================*/
const dict_table_t* table) /*!< in: table */
- MY_ATTRIBUTE((nonnull, pure, warn_unused_result));
+ MY_ATTRIBUTE((warn_unused_result));
+
+/** Gets the number of virtual columns in a table in the dictionary cache.
+@param[in] table the table to check
+@return number of virtual columns of a table */
+UNIV_INLINE
+ulint
+dict_table_get_n_v_cols(
+ const dict_table_t* table);
+
+/** Check if a table has indexed virtual columns
+@param[in] table the table to check
+@return true is the table has indexed virtual columns */
+UNIV_INLINE
+bool
+dict_table_has_indexed_v_cols(
+ const dict_table_t* table);
+
/********************************************************************//**
Gets the approximately estimated number of rows in the table.
-@return estimated number of rows */
+@return estimated number of rows */
UNIV_INLINE
ib_uint64_t
dict_table_get_n_rows(
/*==================*/
const dict_table_t* table) /*!< in: table */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
+ MY_ATTRIBUTE((warn_unused_result));
/********************************************************************//**
Increment the number of rows in the table by one.
Notice that this operation is not protected by any latch, the number is
@@ -837,10 +848,20 @@ dict_table_n_rows_dec(
/*==================*/
dict_table_t* table) /*!< in/out: table */
MY_ATTRIBUTE((nonnull));
+
+/** Get nth virtual column
+@param[in] table target table
+@param[in] col_nr column number in MySQL Table definition
+@return dict_v_col_t ptr */
+dict_v_col_t*
+dict_table_get_nth_v_col_mysql(
+ const dict_table_t* table,
+ ulint col_nr);
+
#ifdef UNIV_DEBUG
/********************************************************************//**
Gets the nth column of a table.
-@return pointer to column object */
+@return pointer to column object */
UNIV_INLINE
dict_col_t*
dict_table_get_nth_col(
@@ -848,9 +869,18 @@ dict_table_get_nth_col(
const dict_table_t* table, /*!< in: table */
ulint pos) /*!< in: position of column */
MY_ATTRIBUTE((nonnull, warn_unused_result));
+/** Gets the nth virtual column of a table.
+@param[in] table table
+@param[in] pos position of virtual column
+@return pointer to virtual column object */
+UNIV_INLINE
+dict_v_col_t*
+dict_table_get_nth_v_col(
+ const dict_table_t* table,
+ ulint pos);
/********************************************************************//**
Gets the given system column of a table.
-@return pointer to column object */
+@return pointer to column object */
UNIV_INLINE
dict_col_t*
dict_table_get_sys_col(
@@ -859,14 +889,15 @@ dict_table_get_sys_col(
ulint sys) /*!< in: DATA_ROW_ID, ... */
MY_ATTRIBUTE((nonnull, warn_unused_result));
#else /* UNIV_DEBUG */
-#define dict_table_get_nth_col(table, pos) \
-((table)->cols + (pos))
-#define dict_table_get_sys_col(table, sys) \
-((table)->cols + (table)->n_cols + (sys) - DATA_N_SYS_COLS)
+#define dict_table_get_nth_col(table, pos) (&(table)->cols[pos])
+#define dict_table_get_sys_col(table, sys) \
+ &(table)->cols[(table)->n_cols + (sys) - DATA_N_SYS_COLS]
+/* Get nth virtual columns */
+#define dict_table_get_nth_v_col(table, pos) (&(table)->v_cols[pos])
#endif /* UNIV_DEBUG */
/********************************************************************//**
Gets the given system column number of a table.
-@return column number */
+@return column number */
UNIV_INLINE
ulint
dict_table_get_sys_col_no(
@@ -874,29 +905,29 @@ dict_table_get_sys_col_no(
const dict_table_t* table, /*!< in: table */
ulint sys) /*!< in: DATA_ROW_ID, ... */
MY_ATTRIBUTE((nonnull, warn_unused_result));
-#ifndef UNIV_HOTBACKUP
+
/********************************************************************//**
Returns the minimum data size of an index record.
-@return minimum data size in bytes */
+@return minimum data size in bytes */
UNIV_INLINE
ulint
dict_index_get_min_size(
/*====================*/
const dict_index_t* index) /*!< in: index */
MY_ATTRIBUTE((nonnull, warn_unused_result));
-#endif /* !UNIV_HOTBACKUP */
/********************************************************************//**
Check whether the table uses the compact page format.
-@return TRUE if table uses the compact page format */
+@return TRUE if table uses the compact page format */
UNIV_INLINE
ibool
dict_table_is_comp(
/*===============*/
const dict_table_t* table) /*!< in: table */
MY_ATTRIBUTE((nonnull, warn_unused_result));
+
/********************************************************************//**
Determine the file format of a table.
-@return file format version */
+@return file format version */
UNIV_INLINE
ulint
dict_table_get_format(
@@ -905,68 +936,76 @@ dict_table_get_format(
MY_ATTRIBUTE((nonnull, warn_unused_result));
/********************************************************************//**
Determine the file format from a dict_table_t::flags.
-@return file format version */
+@return file format version */
UNIV_INLINE
ulint
dict_tf_get_format(
/*===============*/
ulint flags) /*!< in: dict_table_t::flags */
MY_ATTRIBUTE((warn_unused_result));
-/********************************************************************//**
-Set the various values in a dict_table_t::flags pointer. */
+
+/** Set the various values in a dict_table_t::flags pointer.
+@param[in,out] flags, Pointer to a 4 byte Table Flags
+@param[in] format, File Format
+@param[in] zip_ssize Zip Shift Size
+@param[in] use_data_dir Table uses DATA DIRECTORY
+@param[in] page_compressed Table uses page compression
+@param[in] page_compression_level Page compression level
+@param[in] not_used For future */
UNIV_INLINE
void
dict_tf_set(
-/*========*/
- ulint* flags, /*!< in/out: table */
- rec_format_t format, /*!< in: file format */
- ulint zip_ssize, /*!< in: zip shift size */
- bool remote_path, /*!< in: table uses DATA DIRECTORY
- */
- bool page_compressed,/*!< in: table uses page compressed
- pages */
- ulint page_compression_level, /*!< in: table page compression
- level */
- ulint atomic_writes) /*!< in: table atomic
- writes option value*/
- __attribute__((nonnull));
-/********************************************************************//**
-Convert a 32 bit integer table flags to the 32 bit integer that is
-written into the tablespace header at the offset FSP_SPACE_FLAGS and is
-also stored in the fil_space_t::flags field. The following chart shows
-the translation of the low order bit. Other bits are the same.
+ ulint* flags,
+ rec_format_t format,
+ ulint zip_ssize,
+ bool use_data_dir,
+ bool page_compressed,
+ ulint page_compression_level,
+ ulint not_used);
+
+/** Convert a 32 bit integer table flags to the 32 bit FSP Flags.
+Fsp Flags are written into the tablespace header at the offset
+FSP_SPACE_FLAGS and are also stored in the fil_space_t::flags field.
+The following chart shows the translation of the low order bit.
+Other bits are the same.
========================= Low order bit ==========================
| REDUNDANT | COMPACT | COMPRESSED | DYNAMIC
dict_table_t::flags | 0 | 1 | 1 | 1
fil_space_t::flags | 0 | 0 | 1 | 1
==================================================================
-@return tablespace flags (fil_space_t::flags) */
+@param[in] table_flags dict_table_t::flags
+@return tablespace flags (fil_space_t::flags) */
UNIV_INLINE
ulint
-dict_tf_to_fsp_flags(
-/*=================*/
- ulint flags) /*!< in: dict_table_t::flags */
+dict_tf_to_fsp_flags(ulint table_flags)
MY_ATTRIBUTE((const));
-/********************************************************************//**
-Extract the compressed page size from table flags.
-@return compressed page size, or 0 if not compressed */
+
+/** Extract the page size from table flags.
+@param[in] flags flags
+@return compressed page size, or 0 if not compressed */
UNIV_INLINE
+const page_size_t
+dict_tf_get_page_size(
+ ulint flags)
+MY_ATTRIBUTE((const));
+
+/** Determine the extent size (in pages) for the given table
+@param[in] table the table whose extent size is being
+ calculated.
+@return extent size in pages (256, 128 or 64) */
ulint
-dict_tf_get_zip_size(
-/*=================*/
- ulint flags) /*!< in: flags */
- __attribute__((const));
+dict_table_extent_size(
+ const dict_table_t* table);
-/********************************************************************//**
-Check whether the table uses the compressed compact page format.
-@return compressed page size, or 0 if not compressed */
+/** Get the table page size.
+@param[in] table table
+@return compressed page size, or 0 if not compressed */
UNIV_INLINE
-ulint
-dict_table_zip_size(
-/*================*/
- const dict_table_t* table) /*!< in: table */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-#ifndef UNIV_HOTBACKUP
+const page_size_t
+dict_table_page_size(
+ const dict_table_t* table)
+ MY_ATTRIBUTE((warn_unused_result));
+
/*********************************************************************//**
Obtain exclusive locks on all index trees of the table. This is to prevent
accessing index trees while InnoDB is updating internal metadata for
@@ -988,8 +1027,7 @@ dict_table_x_unlock_indexes(
/********************************************************************//**
Checks if a column is in the ordering columns of the clustered index of a
table. Column prefixes are treated like whole columns.
-@return TRUE if the column, or its prefix, is in the clustered key */
-UNIV_INTERN
+@return TRUE if the column, or its prefix, is in the clustered key */
ibool
dict_table_col_in_clustered_key(
/*============================*/
@@ -1005,11 +1043,21 @@ dict_table_has_fts_index(
/*=====================*/
dict_table_t* table) /*!< in: table */
MY_ATTRIBUTE((nonnull, warn_unused_result));
+/** Copies types of virtual columns contained in table to tuple and sets all
+fields of the tuple to the SQL NULL value. This function should
+be called right after dtuple_create().
+@param[in,out] tuple data tuple
+@param[in] table table
+*/
+void
+dict_table_copy_v_types(
+ dtuple_t* tuple,
+ const dict_table_t* table);
+
/*******************************************************************//**
Copies types of columns contained in table to tuple and sets all
fields of the tuple to the SQL NULL value. This function should
be called right after dtuple_create(). */
-UNIV_INTERN
void
dict_table_copy_types(
/*==================*/
@@ -1020,8 +1068,7 @@ dict_table_copy_types(
Looks for an index with the given id. NOTE that we do not reserve
the dictionary mutex: this function is for emergency purposes like
printing info of a corrupt database page!
-@return index or NULL if not found from cache */
-UNIV_INTERN
+@return index or NULL if not found from cache */
dict_index_t*
dict_index_find_on_id_low(
/*======================*/
@@ -1032,41 +1079,39 @@ Make room in the table cache by evicting an unused table. The unused table
should not be part of FK relationship and currently not used in any user
transaction. There is no guarantee that it will remove a table.
@return number of tables evicted. */
-UNIV_INTERN
ulint
dict_make_room_in_cache(
/*====================*/
ulint max_tables, /*!< in: max tables allowed in cache */
ulint pct_check); /*!< in: max percent to check */
-/**********************************************************************//**
-Adds an index to the dictionary cache.
-@return DB_SUCCESS, DB_TOO_BIG_RECORD, or DB_CORRUPTION */
-UNIV_INTERN
+
+#define BIG_ROW_SIZE 1024
+
+/** Clears the virtual column's index list before index is being freed.
+@param[in] index Index being freed */
+void
+dict_index_remove_from_v_col_list(
+ dict_index_t* index);
+
+/** Adds an index to the dictionary cache, with possible indexing newly
+added column.
+@param[in,out] table table on which the index is
+@param[in,out] index index; NOTE! The index memory
+ object is freed in this function!
+@param[in] page_no root page number of the index
+@param[in] add_v virtual columns being added along with ADD INDEX
+@return DB_SUCCESS, or DB_CORRUPTION */
dberr_t
dict_index_add_to_cache(
-/*====================*/
- dict_table_t* table, /*!< in: table on which the index is */
- dict_index_t* index, /*!< in, own: index; NOTE! The index memory
- object is freed in this function! */
- ulint page_no,/*!< in: root page number of the index */
- ibool strict) /*!< in: TRUE=refuse to create the index
- if records could be too big to fit in
- an B-tree page */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-/**********************************************************************//**
-Removes an index from the dictionary cache. */
-UNIV_INTERN
-void
-dict_index_remove_from_cache(
-/*=========================*/
- dict_table_t* table, /*!< in/out: table */
- dict_index_t* index) /*!< in, own: index */
- MY_ATTRIBUTE((nonnull));
-#endif /* !UNIV_HOTBACKUP */
+ dict_table_t* table,
+ dict_index_t*& index,
+ ulint page_no,
+ const dict_add_v_col_t* add_v = NULL)
+ MY_ATTRIBUTE((warn_unused_result));
/********************************************************************//**
Gets the number of fields in the internal representation of an index,
including fields added by the dictionary system.
-@return number of fields */
+@return number of fields */
UNIV_INLINE
ulint
dict_index_get_n_fields(
@@ -1080,7 +1125,7 @@ Gets the number of fields in the internal representation of an index
that uniquely determine the position of an index entry in the index, if
we do not take multiversioning into account: in the B-tree use the value
returned by dict_index_get_n_unique_in_tree.
-@return number of fields */
+@return number of fields */
UNIV_INLINE
ulint
dict_index_get_n_unique(
@@ -1092,7 +1137,7 @@ dict_index_get_n_unique(
Gets the number of fields in the internal representation of an index
which uniquely determine the position of an index entry in the index, if
we also take multiversioning into account.
-@return number of fields */
+@return number of fields */
UNIV_INLINE
ulint
dict_index_get_n_unique_in_tree(
@@ -1100,12 +1145,28 @@ dict_index_get_n_unique_in_tree(
const dict_index_t* index) /*!< in: an internal representation
of index (in the dictionary cache) */
MY_ATTRIBUTE((nonnull, warn_unused_result));
+
+/** The number of fields in the nonleaf page of spatial index, except
+the page no field. */
+#define DICT_INDEX_SPATIAL_NODEPTR_SIZE 1
+/**
+Gets the number of fields on nonleaf page level in the internal representation
+of an index which uniquely determine the position of an index entry in the
+index, if we also take multiversioning into account. Note, it doesn't
+include page no field.
+@param[in] index index
+@return number of fields */
+UNIV_INLINE
+ulint
+dict_index_get_n_unique_in_tree_nonleaf(
+ const dict_index_t* index)
+ MY_ATTRIBUTE((nonnull, warn_unused_result));
/********************************************************************//**
Gets the number of user-defined ordering fields in the index. In the internal
representation we add the row id to the ordering fields to make all indexes
unique, but this function returns the number of fields the user defined
in the index as ordering fields.
-@return number of fields */
+@return number of fields */
UNIV_INLINE
ulint
dict_index_get_n_ordering_defined_by_user(
@@ -1116,7 +1177,7 @@ dict_index_get_n_ordering_defined_by_user(
#ifdef UNIV_DEBUG
/********************************************************************//**
Gets the nth field of an index.
-@return pointer to field object */
+@return pointer to field object */
UNIV_INLINE
dict_field_t*
dict_index_get_nth_field(
@@ -1129,7 +1190,7 @@ dict_index_get_nth_field(
#endif /* UNIV_DEBUG */
/********************************************************************//**
Gets pointer to the nth column in an index.
-@return column */
+@return column */
UNIV_INLINE
const dict_col_t*
dict_index_get_nth_col(
@@ -1139,7 +1200,7 @@ dict_index_get_nth_col(
MY_ATTRIBUTE((nonnull, warn_unused_result));
/********************************************************************//**
Gets the column number of the nth field in an index.
-@return column number */
+@return column number */
UNIV_INLINE
ulint
dict_index_get_nth_col_no(
@@ -1158,32 +1219,41 @@ dict_index_get_nth_col_pos(
const dict_index_t* index, /*!< in: index */
ulint n, /*!< in: column number */
ulint* prefix_col_pos) /*!< out: col num if prefix */
- __attribute__((nonnull(1), warn_unused_result));
-/********************************************************************//**
-Looks for column n in an index.
+ MY_ATTRIBUTE((nonnull(1), warn_unused_result));
+
+/** Looks for column n in an index.
+@param[in] index index
+@param[in] n column number
+@param[in] inc_prefix true=consider column prefixes too
+@param[in] is_virtual true==virtual column
@return position in internal representation of the index;
ULINT_UNDEFINED if not contained */
-UNIV_INTERN
ulint
dict_index_get_nth_col_or_prefix_pos(
-/*=================================*/
const dict_index_t* index, /*!< in: index */
ulint n, /*!< in: column number */
- ibool inc_prefix, /*!< in: TRUE=consider
+ bool inc_prefix, /*!< in: TRUE=consider
column prefixes too */
- ulint* prefix_col_pos) /*!< out: col num if prefix */
+ bool is_virtual, /*!< in: is a virtual column
+ */
+ ulint* prefix_col_pos) /*!< out: col num if prefix
+ */
+ __attribute__((warn_unused_result));
- __attribute__((nonnull(1), warn_unused_result));
/********************************************************************//**
Returns TRUE if the index contains a column or a prefix of that column.
-@return TRUE if contains the column or its prefix */
-UNIV_INTERN
+@param[in] index index
+@param[in] n column number
+@param[in] is_virtual whether it is a virtual col
+@return TRUE if contains the column or its prefix */
ibool
dict_index_contains_col_or_prefix(
/*==============================*/
const dict_index_t* index, /*!< in: index */
- ulint n) /*!< in: column number */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
+ ulint n, /*!< in: column number */
+ bool is_virtual)
+ /*!< in: whether it is a virtual col */
+ MY_ATTRIBUTE((warn_unused_result));
/********************************************************************//**
Looks for a matching field in an index. The column has to be the same. The
column in index must be complete, or must contain a prefix longer than the
@@ -1191,7 +1261,6 @@ column in index2. That is, we must be able to construct the prefix in index2
from the prefix in index.
@return position in internal representation of the index;
ULINT_UNDEFINED if not contained */
-UNIV_INTERN
ulint
dict_index_get_nth_field_pos(
/*=========================*/
@@ -1201,17 +1270,17 @@ dict_index_get_nth_field_pos(
MY_ATTRIBUTE((nonnull, warn_unused_result));
/********************************************************************//**
Looks for column n position in the clustered index.
-@return position in internal representation of the clustered index */
-UNIV_INTERN
+@return position in internal representation of the clustered index */
ulint
dict_table_get_nth_col_pos(
/*=======================*/
const dict_table_t* table, /*!< in: table */
- ulint n) /*!< in: column number */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
+ ulint n, /*!< in: column number */
+ ulint* prefix_col_pos) /*!< out: col num if prefix */
+ MY_ATTRIBUTE((nonnull(1), warn_unused_result));
/********************************************************************//**
Returns the position of a system column in an index.
-@return position, ULINT_UNDEFINED if not contained */
+@return position, ULINT_UNDEFINED if not contained */
UNIV_INLINE
ulint
dict_index_get_sys_col_pos(
@@ -1221,7 +1290,6 @@ dict_index_get_sys_col_pos(
MY_ATTRIBUTE((nonnull, warn_unused_result));
/*******************************************************************//**
Adds a column to index. */
-UNIV_INTERN
void
dict_index_add_col(
/*===============*/
@@ -1230,10 +1298,9 @@ dict_index_add_col(
dict_col_t* col, /*!< in: column */
ulint prefix_len) /*!< in: column prefix length */
MY_ATTRIBUTE((nonnull));
-#ifndef UNIV_HOTBACKUP
+
/*******************************************************************//**
Copies types of fields contained in index to tuple. */
-UNIV_INTERN
void
dict_index_copy_types(
/*==================*/
@@ -1242,22 +1309,20 @@ dict_index_copy_types(
ulint n_fields) /*!< in: number of
field types to copy */
MY_ATTRIBUTE((nonnull));
-#endif /* !UNIV_HOTBACKUP */
/*********************************************************************//**
Gets the field column.
-@return field->col, pointer to the table column */
+@return field->col, pointer to the table column */
UNIV_INLINE
const dict_col_t*
dict_field_get_col(
/*===============*/
const dict_field_t* field) /*!< in: index field */
MY_ATTRIBUTE((nonnull, warn_unused_result));
-#ifndef UNIV_HOTBACKUP
+
/**********************************************************************//**
Returns an index object if it is found in the dictionary cache.
Assumes that dict_sys->mutex is already being held.
-@return index, NULL if not found */
-UNIV_INTERN
+@return index, NULL if not found */
dict_index_t*
dict_index_get_if_in_cache_low(
/*===========================*/
@@ -1266,8 +1331,7 @@ dict_index_get_if_in_cache_low(
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
/**********************************************************************//**
Returns an index object if it is found in the dictionary cache.
-@return index, NULL if not found */
-UNIV_INTERN
+@return index, NULL if not found */
dict_index_t*
dict_index_get_if_in_cache(
/*=======================*/
@@ -1278,8 +1342,7 @@ dict_index_get_if_in_cache(
/**********************************************************************//**
Checks that a tuple has n_fields_cmp value in a sensible range, so that
no comparison can occur with the page number field in a node pointer.
-@return TRUE if ok */
-UNIV_INTERN
+@return TRUE if ok */
ibool
dict_index_check_search_tuple(
/*==========================*/
@@ -1297,7 +1360,6 @@ enum check_name {
};
/**********************************************************************//**
Check for duplicate index entries in a table [using the index name] */
-UNIV_INTERN
void
dict_table_check_for_dup_indexes(
/*=============================*/
@@ -1309,8 +1371,7 @@ dict_table_check_for_dup_indexes(
#endif /* UNIV_DEBUG */
/**********************************************************************//**
Builds a node pointer out of a physical record and a page number.
-@return own: node pointer */
-UNIV_INTERN
+@return own: node pointer */
dtuple_t*
dict_index_build_node_ptr(
/*======================*/
@@ -1327,8 +1388,7 @@ dict_index_build_node_ptr(
/**********************************************************************//**
Copies an initial segment of a physical record, long enough to specify an
index entry uniquely.
-@return pointer to the prefix record */
-UNIV_INTERN
+@return pointer to the prefix record */
rec_t*
dict_index_copy_rec_order_prefix(
/*=============================*/
@@ -1340,21 +1400,34 @@ dict_index_copy_rec_order_prefix(
copied prefix, or NULL */
ulint* buf_size)/*!< in/out: buffer size */
MY_ATTRIBUTE((nonnull, warn_unused_result));
-/**********************************************************************//**
-Builds a typed data tuple out of a physical record.
-@return own: data tuple */
-UNIV_INTERN
+/** Convert a physical record into a search tuple.
+@param[in] rec index record (not necessarily in an index page)
+@param[in] index index
+@param[in] leaf whether rec is in a leaf page
+@param[in] n_fields number of data fields
+@param[in,out] heap memory heap for allocation
+@return own: data tuple */
dtuple_t*
-dict_index_build_data_tuple(
-/*========================*/
- dict_index_t* index, /*!< in: index */
- rec_t* rec, /*!< in: record for which to build data tuple */
- ulint n_fields,/*!< in: number of data fields */
- mem_heap_t* heap) /*!< in: memory heap where tuple created */
+dict_index_build_data_tuple_func(
+ const rec_t* rec,
+ const dict_index_t* index,
+#ifdef UNIV_DEBUG
+ bool leaf,
+#endif /* UNIV_DEBUG */
+ ulint n_fields,
+ mem_heap_t* heap)
MY_ATTRIBUTE((nonnull, warn_unused_result));
+#ifdef UNIV_DEBUG
+# define dict_index_build_data_tuple(rec, index, leaf, n_fields, heap) \
+ dict_index_build_data_tuple_func(rec, index, leaf, n_fields, heap)
+#else /* UNIV_DEBUG */
+# define dict_index_build_data_tuple(rec, index, leaf, n_fields, heap) \
+ dict_index_build_data_tuple_func(rec, index, n_fields, heap)
+#endif /* UNIV_DEBUG */
+
/*********************************************************************//**
Gets the space id of the root of the index tree.
-@return space id */
+@return space id */
UNIV_INLINE
ulint
dict_index_get_space(
@@ -1372,7 +1445,7 @@ dict_index_set_space(
MY_ATTRIBUTE((nonnull));
/*********************************************************************//**
Gets the page number of the root of the index tree.
-@return page number */
+@return page number */
UNIV_INLINE
ulint
dict_index_get_page(
@@ -1381,7 +1454,7 @@ dict_index_get_page(
MY_ATTRIBUTE((nonnull, warn_unused_result));
/*********************************************************************//**
Gets the read-write lock of the index tree.
-@return read-write lock */
+@return read-write lock */
UNIV_INLINE
rw_lock_t*
dict_index_get_lock(
@@ -1392,7 +1465,7 @@ dict_index_get_lock(
Returns free space reserved for future updates of records. This is
relevant only in the case of many consecutive inserts, as updates
which make the records bigger might fragment the index.
-@return number of free bytes on page, reserved for updates */
+@return number of free bytes on page, reserved for updates */
UNIV_INLINE
ulint
dict_index_get_space_reserve(void);
@@ -1433,25 +1506,20 @@ dict_index_is_online_ddl(
MY_ATTRIBUTE((nonnull, warn_unused_result));
/*********************************************************************//**
Calculates the minimum record length in an index. */
-UNIV_INTERN
ulint
dict_index_calc_min_rec_len(
/*========================*/
const dict_index_t* index) /*!< in: index */
MY_ATTRIBUTE((nonnull, warn_unused_result));
-/********************************************************************//**
-Reserves the dictionary system mutex for MySQL. */
-UNIV_INTERN
+/** Reserve the dictionary system mutex. */
void
-dict_mutex_enter_for_mysql_func(const char * file, ulint line);
-/*============================*/
+dict_mutex_enter_for_mysql_func(const char *file, unsigned line);
#define dict_mutex_enter_for_mysql() \
dict_mutex_enter_for_mysql_func(__FILE__, __LINE__)
/********************************************************************//**
Releases the dictionary system mutex for MySQL. */
-UNIV_INTERN
void
dict_mutex_exit_for_mysql(void);
/*===========================*/
@@ -1462,7 +1530,6 @@ or from a thread that has not shared the table object with other threads.
@param[in,out] table table whose stats latch to create
@param[in] enabled if false then the latch is disabled
and dict_table_stats_lock()/unlock() become noop on this table. */
-
void
dict_table_stats_latch_create(
dict_table_t* table,
@@ -1472,33 +1539,29 @@ dict_table_stats_latch_create(
This function is only called from either single threaded environment
or from a thread that has not shared the table object with other threads.
@param[in,out] table table whose stats latch to destroy */
-
void
dict_table_stats_latch_destroy(
dict_table_t* table);
-/**********************************************************************//**
-Lock the appropriate latch to protect a given table's statistics.
-table->id is used to pick the corresponding latch from a global array of
-latches. */
-UNIV_INTERN
+/** Lock the appropriate latch to protect a given table's statistics.
+@param[in] table table whose stats to lock
+@param[in] latch_mode RW_S_LATCH or RW_X_LATCH */
void
dict_table_stats_lock(
-/*==================*/
- dict_table_t* table, /*!< in: table */
- ulint latch_mode); /*!< in: RW_S_LATCH or RW_X_LATCH */
-/**********************************************************************//**
-Unlock the latch that has been locked by dict_table_stats_lock() */
-UNIV_INTERN
+ dict_table_t* table,
+ ulint latch_mode);
+
+/** Unlock the latch that has been locked by dict_table_stats_lock().
+@param[in] table table whose stats to unlock
+@param[in] latch_mode RW_S_LATCH or RW_X_LATCH */
void
dict_table_stats_unlock(
-/*====================*/
- dict_table_t* table, /*!< in: table */
- ulint latch_mode); /*!< in: RW_S_LATCH or RW_X_LATCH */
+ dict_table_t* table,
+ ulint latch_mode);
+
/********************************************************************//**
Checks if the database name in two table names is the same.
-@return TRUE if same db name */
-UNIV_INTERN
+@return TRUE if same db name */
ibool
dict_tables_have_same_db(
/*=====================*/
@@ -1507,46 +1570,27 @@ dict_tables_have_same_db(
const char* name2) /*!< in: table name in the form
dbname '/' tablename */
MY_ATTRIBUTE((nonnull, warn_unused_result));
-/*********************************************************************//**
-Removes an index from the cache */
-UNIV_INTERN
-void
-dict_index_remove_from_cache(
-/*=========================*/
- dict_table_t* table, /*!< in/out: table */
- dict_index_t* index) /*!< in, own: index */
- MY_ATTRIBUTE((nonnull));
-/**********************************************************************//**
-Get index by name
-@return index, NULL if does not exist */
-UNIV_INTERN
-dict_index_t*
-dict_table_get_index_on_name(
-/*=========================*/
- dict_table_t* table, /*!< in: table */
- const char* name) /*!< in: name of the index to find */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-/**********************************************************************//**
-Looks for an index with the given id given a table instance.
-@return index or NULL */
-UNIV_INTERN
-dict_index_t*
-dict_table_find_index_on_id(
-/*========================*/
- const dict_table_t* table, /*!< in: table instance */
- index_id_t id) /*!< in: index id */
- __attribute__((nonnull, warn_unused_result));
-/**********************************************************************//**
-In case there is more than one index with the same name return the index
-with the min(id).
-@return index, NULL if does not exist */
-UNIV_INTERN
+
+/** Get an index by name.
+@param[in] table the table where to look for the index
+@param[in] name the index name to look for
+@return index, NULL if does not exist */
dict_index_t*
-dict_table_get_index_on_name_and_min_id(
-/*====================================*/
- dict_table_t* table, /*!< in: table */
- const char* name) /*!< in: name of the index to find */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
+dict_table_get_index_on_name(dict_table_t* table, const char* name)
+ MY_ATTRIBUTE((warn_unused_result));
+
+/** Get an index by name.
+@param[in] table the table where to look for the index
+@param[in] name the index name to look for
+@return index, NULL if does not exist */
+inline
+const dict_index_t*
+dict_table_get_index_on_name(const dict_table_t* table, const char* name)
+{
+ return dict_table_get_index_on_name(const_cast<dict_table_t*>(table),
+ name);
+}
+
/***************************************************************
Check whether a column exists in an FTS index. */
UNIV_INLINE
@@ -1556,27 +1600,39 @@ dict_table_is_fts_column(
/* out: ULINT_UNDEFINED if no match else
the offset within the vector */
ib_vector_t* indexes,/* in: vector containing only FTS indexes */
- ulint col_no) /* in: col number to search for */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
+ ulint col_no, /* in: col number to search for */
+ bool is_virtual)/*!< in: whether it is a virtual column */
+ MY_ATTRIBUTE((warn_unused_result));
/**********************************************************************//**
-Move a table to the non LRU end of the LRU list. */
-UNIV_INTERN
+Prevent table eviction by moving a table to the non-LRU list from the
+LRU list if it is not already there. */
+UNIV_INLINE
void
-dict_table_move_from_lru_to_non_lru(
-/*================================*/
- dict_table_t* table) /*!< in: table to move from LRU to non-LRU */
+dict_table_prevent_eviction(
+/*========================*/
+ dict_table_t* table) /*!< in: table to prevent eviction */
MY_ATTRIBUTE((nonnull));
+
/**********************************************************************//**
-Move a table to the LRU list from the non-LRU list. */
-UNIV_INTERN
+Move a table to the non LRU end of the LRU list. */
void
-dict_table_move_from_non_lru_to_lru(
+dict_table_move_from_lru_to_non_lru(
/*================================*/
- dict_table_t* table) /*!< in: table to move from non-LRU to LRU */
+ dict_table_t* table) /*!< in: table to move from LRU to non-LRU */
MY_ATTRIBUTE((nonnull));
+
+/** Looks for an index with the given id given a table instance.
+@param[in] table table instance
+@param[in] id index id
+@return index or NULL */
+dict_index_t*
+dict_table_find_index_on_id(
+ const dict_table_t* table,
+ index_id_t id)
+ MY_ATTRIBUTE((nonnull(1)));
+
/**********************************************************************//**
Move to the most recently used segment of the LRU list. */
-UNIV_INTERN
void
dict_move_to_mru(
/*=============*/
@@ -1590,19 +1646,18 @@ constraint */
/* Buffers for storing detailed information about the latest foreign key
and unique key errors */
-extern FILE* dict_foreign_err_file;
-extern ib_mutex_t dict_foreign_err_mutex; /* mutex protecting the buffers */
+extern FILE* dict_foreign_err_file;
+extern ib_mutex_t dict_foreign_err_mutex; /* mutex protecting the
+ foreign key error messages */
/** the dictionary system */
extern dict_sys_t* dict_sys;
/** the data dictionary rw-latch protecting dict_sys */
extern rw_lock_t dict_operation_lock;
-typedef std::map<table_id_t, ib_uint64_t> autoinc_map_t;
-
/* Dictionary system struct */
struct dict_sys_t{
- ib_mutex_t mutex; /*!< mutex protecting the data
+ DictSysMutex mutex; /*!< mutex protecting the data
dictionary; protects also the
disk-based dictionary system tables;
this mutex serializes CREATE TABLE
@@ -1623,6 +1678,7 @@ struct dict_sys_t{
dict_table_t* sys_columns; /*!< SYS_COLUMNS table */
dict_table_t* sys_indexes; /*!< SYS_INDEXES table */
dict_table_t* sys_fields; /*!< SYS_FIELDS table */
+ dict_table_t* sys_virtual; /*!< SYS_VIRTUAL table */
/*=============================*/
UT_LIST_BASE_NODE_T(dict_table_t)
@@ -1631,22 +1687,18 @@ struct dict_sys_t{
UT_LIST_BASE_NODE_T(dict_table_t)
table_non_LRU; /*!< List of tables that can't be
evicted from the cache */
- autoinc_map_t* autoinc_map; /*!< Map to store table id and autoinc
- when table is evicted */
};
-#endif /* !UNIV_HOTBACKUP */
/** dummy index for ROW_FORMAT=REDUNDANT supremum and infimum records */
extern dict_index_t* dict_ind_redundant;
-/** dummy index for ROW_FORMAT=COMPACT supremum and infimum records */
-extern dict_index_t* dict_ind_compact;
-/**********************************************************************//**
-Inits dict_ind_redundant and dict_ind_compact. */
-UNIV_INTERN
+/** Initialize dict_ind_redundant. */
void
-dict_ind_init(void);
-/*===============*/
+dict_ind_init();
+
+/** Free dict_ind_redundant. */
+void
+dict_ind_free();
/* Auxiliary structs for checking a table definition @{ */
@@ -1663,7 +1715,7 @@ struct dict_col_meta_t {
};
/* This struct is used for checking whether a given table exists and
-whether it has a predefined schema (number of columns and columns names
+whether it has a predefined schema (number of columns and column names
and types) */
struct dict_table_schema_t {
const char* table_name; /* the name of the table whose
@@ -1691,7 +1743,6 @@ types. The order of the columns does not matter.
The caller must own the dictionary mutex.
dict_table_schema_check() @{
@return DB_SUCCESS if the table exists and contains the necessary columns */
-UNIV_INTERN
dberr_t
dict_table_schema_check(
/*====================*/
@@ -1710,7 +1761,6 @@ Converts a database and table name from filesystem encoding
(e.g. d@i1b/a@q1b@1Kc, same format as used in dict_table_t::name) in two
strings in UTF8 encoding (e.g. dцb and aюbØc). The output buffers must be
at least MAX_DB_UTF8_LEN and MAX_TABLE_UTF8_LEN bytes. */
-UNIV_INTERN
void
dict_fs2utf8(
/*=========*/
@@ -1722,16 +1772,19 @@ dict_fs2utf8(
size_t table_utf8_size)/*!< in: table_utf8 size */
MY_ATTRIBUTE((nonnull));
+/** Resize the hash tables besed on the current buffer pool size. */
+void
+dict_resize();
+
/**********************************************************************//**
Closes the data dictionary module. */
-UNIV_INTERN
void
dict_close(void);
/*============*/
-#ifndef UNIV_HOTBACKUP
+
/**********************************************************************//**
Check whether the table is corrupted.
-@return nonzero for corrupted table, zero for valid tables */
+@return nonzero for corrupted table, zero for valid tables */
UNIV_INLINE
ulint
dict_table_is_corrupted(
@@ -1740,52 +1793,34 @@ dict_table_is_corrupted(
MY_ATTRIBUTE((nonnull, warn_unused_result));
/**********************************************************************//**
-Check whether the index is corrupted.
-@return nonzero for corrupted index, zero for valid indexes */
-UNIV_INLINE
-ulint
-dict_index_is_corrupted(
-/*====================*/
- const dict_index_t* index) /*!< in: index */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-
-#endif /* !UNIV_HOTBACKUP */
-/**********************************************************************//**
Flags an index and table corrupted both in the data dictionary cache
and in the system table SYS_INDEXES. */
-UNIV_INTERN
void
dict_set_corrupted(
/*===============*/
dict_index_t* index, /*!< in/out: index */
trx_t* trx, /*!< in/out: transaction */
const char* ctx) /*!< in: context */
- UNIV_COLD MY_ATTRIBUTE((nonnull));
+ ATTRIBUTE_COLD __attribute__((nonnull));
-/**********************************************************************//**
-Flags an index corrupted in the data dictionary cache only. This
+/** Flags an index corrupted in the data dictionary cache only. This
is used mostly to mark a corrupted index when index's own dictionary
-is corrupted, and we force to load such index for repair purpose */
-UNIV_INTERN
+is corrupted, and we force to load such index for repair purpose
+@param[in,out] index index that is corrupted */
void
dict_set_corrupted_index_cache_only(
-/*================================*/
- dict_index_t* index, /*!< in/out: index */
- dict_table_t* table) /*!< in/out: table */
- MY_ATTRIBUTE((nonnull));
+ dict_index_t* index);
/**********************************************************************//**
Flags a table with specified space_id corrupted in the table dictionary
cache.
@return TRUE if successful */
-UNIV_INTERN
ibool
dict_set_corrupted_by_space(
/*========================*/
ulint space_id); /*!< in: space ID */
-/**********************************************************************//**
-Flags a table with specified space_id encrypted in the data dictionary
+/** Flag a table with specified space_id encrypted in the data dictionary
cache
@param[in] space_id Tablespace id */
UNIV_INTERN
@@ -1793,41 +1828,56 @@ void
dict_set_encrypted_by_space(
ulint space_id);
-/********************************************************************//**
-Validate the table flags.
-@return true if valid. */
+/** Sets merge_threshold in the SYS_INDEXES
+@param[in,out] index index
+@param[in] merge_threshold value to set */
+void
+dict_index_set_merge_threshold(
+ dict_index_t* index,
+ ulint merge_threshold);
+
+#ifdef UNIV_DEBUG
+/** Sets merge_threshold for all indexes in dictionary cache for debug.
+@param[in] merge_threshold_all value to set for all indexes */
+void
+dict_set_merge_threshold_all_debug(
+ uint merge_threshold_all);
+#endif /* UNIV_DEBUG */
+
+/** Validate the table flags.
+@param[in] flags Table flags
+@return true if valid. */
UNIV_INLINE
bool
dict_tf_is_valid(
-/*=============*/
- ulint flags) /*!< in: table flags */
- MY_ATTRIBUTE((warn_unused_result));
+ ulint flags);
-/********************************************************************//**
-Check if the tablespace for the table has been discarded.
-@return true if the tablespace has been discarded. */
+/** Validate both table flags and table flags2 and make sure they
+are compatible.
+@param[in] flags Table flags
+@param[in] flags2 Table flags2
+@return true if valid. */
UNIV_INLINE
bool
-dict_table_is_discarded(
-/*====================*/
- const dict_table_t* table) /*!< in: table to check */
- MY_ATTRIBUTE((nonnull, pure, warn_unused_result));
+dict_tf2_is_valid(
+ ulint flags,
+ ulint flags2);
/********************************************************************//**
-Check if it is a temporary table.
-@return true if temporary table flag is set. */
+Check if the tablespace for the table has been discarded.
+@return true if the tablespace has been discarded. */
UNIV_INLINE
bool
-dict_table_is_temporary(
+dict_table_is_discarded(
/*====================*/
const dict_table_t* table) /*!< in: table to check */
- MY_ATTRIBUTE((nonnull, pure, warn_unused_result));
+ MY_ATTRIBUTE((warn_unused_result));
+
+#define dict_table_is_temporary(table) (table)->is_temporary()
-#ifndef UNIV_HOTBACKUP
/*********************************************************************//**
This function should be called whenever a page is successfully
compressed. Updates the compression padding information. */
-UNIV_INTERN
void
dict_index_zip_success(
/*===================*/
@@ -1836,7 +1886,6 @@ dict_index_zip_success(
/*********************************************************************//**
This function should be called whenever a page compression attempt
fails. Updates the compression padding information. */
-UNIV_INTERN
void
dict_index_zip_failure(
/*===================*/
@@ -1845,7 +1894,6 @@ dict_index_zip_failure(
/*********************************************************************//**
Return the optimal page size, for which page will likely compress.
@return page size beyond which page may not compress*/
-UNIV_INTERN
ulint
dict_index_zip_pad_optimal_page_size(
/*=================================*/
@@ -1855,22 +1903,35 @@ dict_index_zip_pad_optimal_page_size(
/*************************************************************//**
Convert table flag to row format string.
@return row format name */
-UNIV_INTERN
const char*
dict_tf_to_row_format_string(
/*=========================*/
ulint table_flag); /*!< in: row format setting */
-/*****************************************************************//**
-Get index by first field of the index
-@return index which is having first field matches
-with the field present in field_index position of table */
+
+#define dict_col_is_virtual(col) (col)->is_virtual()
+
+/** encode number of columns and number of virtual columns in one
+4 bytes value. We could do this because the number of columns in
+InnoDB is limited to 1017
+@param[in] n_col number of non-virtual column
+@param[in] n_v_col number of virtual column
+@return encoded value */
UNIV_INLINE
-dict_index_t*
-dict_table_get_index_on_first_col(
-/*==============================*/
- const dict_table_t* table, /*!< in: table */
- ulint col_index); /*!< in: position of column
- in table */
+ulint
+dict_table_encode_n_col(
+ ulint n_col,
+ ulint n_v_col);
+
+/** Decode number of virtual and non-virtual columns in one 4 bytes value.
+@param[in] encoded encoded value
+@param[in,out] n_col number of non-virtual column
+@param[in,out] n_v_col number of virtual column */
+UNIV_INLINE
+void
+dict_table_decode_n_col(
+ ulint encoded,
+ ulint* n_col,
+ ulint* n_v_col);
/** Calculate the used memory occupied by the data dictionary
table and index objects.
@@ -1879,11 +1940,35 @@ UNIV_INTERN
ulint
dict_sys_get_size();
-#endif /* !UNIV_HOTBACKUP */
+/** Look for any dictionary objects that are found in the given tablespace.
+@param[in] space_id Tablespace ID to search for.
+@return true if tablespace is empty. */
+bool
+dict_space_is_empty(
+ ulint space_id);
+
+/** Find the space_id for the given name in sys_tablespaces.
+@param[in] name Tablespace name to search for.
+@return the tablespace ID. */
+ulint
+dict_space_get_id(
+ const char* name);
+
+/** Free the virtual column template
+@param[in,out] vc_templ virtual column template */
+UNIV_INLINE
+void
+dict_free_vc_templ(
+ dict_vcol_templ_t* vc_templ);
+/** Check whether the table have virtual index.
+@param[in] table InnoDB table
+@return true if the table have virtual index, false otherwise. */
+UNIV_INLINE
+bool
+dict_table_have_virtual_index(
+ dict_table_t* table);
-#ifndef UNIV_NONINL
#include "dict0dict.ic"
-#endif
#endif
diff --git a/storage/innobase/include/dict0dict.ic b/storage/innobase/include/dict0dict.ic
index bd1d529f753..bb77bb7e6e6 100644
--- a/storage/innobase/include/dict0dict.ic
+++ b/storage/innobase/include/dict0dict.ic
@@ -24,13 +24,7 @@ Data dictionary system
Created 1/8/1996 Heikki Tuuri
***********************************************************************/
-#include "data0type.h"
-#ifndef UNIV_HOTBACKUP
-#include "dict0load.h"
-#include "rem0types.h"
-#include "fsp0fsp.h"
-#include "srv0srv.h"
-#include "sync0rw.h" /* RW_S_LATCH */
+#include "fsp0sysspace.h"
/*********************************************************************//**
Gets the minimum number of bytes per character.
@@ -72,12 +66,11 @@ dict_col_copy_type(
type->mbminlen = col->mbminlen;
type->mbmaxlen = col->mbmaxlen;
}
-#endif /* !UNIV_HOTBACKUP */
#ifdef UNIV_DEBUG
/*********************************************************************//**
Assert that a column and a data type match.
-@return TRUE */
+@return TRUE */
UNIV_INLINE
ibool
dict_col_type_assert_equal(
@@ -88,19 +81,16 @@ dict_col_type_assert_equal(
ut_ad(col->mtype == type->mtype);
ut_ad(col->prtype == type->prtype);
//ut_ad(col->len == type->len);
-# ifndef UNIV_HOTBACKUP
ut_ad(col->mbminlen == type->mbminlen);
ut_ad(col->mbmaxlen == type->mbmaxlen);
-# endif /* !UNIV_HOTBACKUP */
return(TRUE);
}
#endif /* UNIV_DEBUG */
-#ifndef UNIV_HOTBACKUP
/***********************************************************************//**
Returns the minimum size of the column.
-@return minimum size */
+@return minimum size */
UNIV_INLINE
ulint
dict_col_get_min_size(
@@ -112,7 +102,7 @@ dict_col_get_min_size(
}
/***********************************************************************//**
Returns the maximum size of the column.
-@return maximum size */
+@return maximum size */
UNIV_INLINE
ulint
dict_col_get_max_size(
@@ -121,10 +111,9 @@ dict_col_get_max_size(
{
return(dtype_get_max_size_low(col->mtype, col->len));
}
-#endif /* !UNIV_HOTBACKUP */
/***********************************************************************//**
Returns the size of a fixed size column, 0 if not a fixed size column.
-@return fixed size, or 0 */
+@return fixed size, or 0 */
UNIV_INLINE
ulint
dict_col_get_fixed_size(
@@ -138,7 +127,7 @@ dict_col_get_fixed_size(
/***********************************************************************//**
Returns the ROW_FORMAT=REDUNDANT stored SQL NULL size of a column.
For fixed length types it is the fixed length of the type, otherwise 0.
-@return SQL null storage size in ROW_FORMAT=REDUNDANT */
+@return SQL null storage size in ROW_FORMAT=REDUNDANT */
UNIV_INLINE
ulint
dict_col_get_sql_null_size(
@@ -151,7 +140,7 @@ dict_col_get_sql_null_size(
/*********************************************************************//**
Gets the column number.
-@return col->ind, table column position (starting from 0) */
+@return col->ind, table column position (starting from 0) */
UNIV_INLINE
ulint
dict_col_get_no(
@@ -185,11 +174,33 @@ dict_col_get_clust_pos(
return(ULINT_UNDEFINED);
}
-#ifndef UNIV_HOTBACKUP
+/** Gets the column position in the given index.
+@param[in] col table column
+@param[in] index index to be searched for column
+@return position of column in the given index. */
+UNIV_INLINE
+ulint
+dict_col_get_index_pos(
+ const dict_col_t* col,
+ const dict_index_t* index)
+{
+ ulint i;
+
+ for (i = 0; i < index->n_def; i++) {
+ const dict_field_t* field = &index->fields[i];
+
+ if (!field->prefix_len && field->col == col) {
+ return(i);
+ }
+ }
+
+ return(ULINT_UNDEFINED);
+}
+
#ifdef UNIV_DEBUG
/********************************************************************//**
Gets the first index on the table (the clustered index).
-@return index, NULL if none exists */
+@return index, NULL if none exists */
UNIV_INLINE
dict_index_t*
dict_table_get_first_index(
@@ -203,7 +214,7 @@ dict_table_get_first_index(
/********************************************************************//**
Gets the last index on the table.
-@return index, NULL if none exists */
+@return index, NULL if none exists */
UNIV_INLINE
dict_index_t*
dict_table_get_last_index(
@@ -217,7 +228,7 @@ dict_table_get_last_index(
/********************************************************************//**
Gets the next index on the table.
-@return index, NULL if none left */
+@return index, NULL if none left */
UNIV_INLINE
dict_index_t*
dict_table_get_next_index(
@@ -228,11 +239,10 @@ dict_table_get_next_index(
return(UT_LIST_GET_NEXT(indexes, (dict_index_t*) index));
}
#endif /* UNIV_DEBUG */
-#endif /* !UNIV_HOTBACKUP */
/********************************************************************//**
Check whether the index is the clustered index.
-@return nonzero for clustered index, zero for other indexes */
+@return nonzero for clustered index, zero for other indexes */
UNIV_INLINE
ulint
dict_index_is_clust(
@@ -242,9 +252,22 @@ dict_index_is_clust(
ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
return(index->type & DICT_CLUSTERED);
}
+
+/** Check if index is auto-generated clustered index.
+@param[in] index index
+
+@return true if index is auto-generated clustered index. */
+UNIV_INLINE
+bool
+dict_index_is_auto_gen_clust(
+ const dict_index_t* index)
+{
+ return(index->type == DICT_CLUSTERED);
+}
+
/********************************************************************//**
Check whether the index is unique.
-@return nonzero for unique index, zero for other indexes */
+@return nonzero for unique index, zero for other indexes */
UNIV_INLINE
ulint
dict_index_is_unique(
@@ -256,53 +279,49 @@ dict_index_is_unique(
}
/********************************************************************//**
-Check whether the index is the insert buffer tree.
-@return nonzero for insert buffer, zero for other indexes */
+Check whether the index is a Spatial Index.
+@return nonzero for Spatial Index, zero for other indexes */
UNIV_INLINE
ulint
-dict_index_is_ibuf(
-/*===============*/
+dict_index_is_spatial(
+/*==================*/
const dict_index_t* index) /*!< in: index */
{
ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
- return(index->type & DICT_IBUF);
+ return(index->type & DICT_SPATIAL);
}
/********************************************************************//**
-Check whether the index is an universal index tree.
-@return nonzero for universal tree, zero for other indexes */
+Check whether the index is the insert buffer tree.
+@return nonzero for insert buffer, zero for other indexes */
UNIV_INLINE
ulint
-dict_index_is_univ(
+dict_index_is_ibuf(
/*===============*/
const dict_index_t* index) /*!< in: index */
{
ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
- return(index->type & DICT_UNIVERSAL);
+ return(index->type & DICT_IBUF);
}
/********************************************************************//**
Check whether the index is a secondary index or the insert buffer tree.
-@return nonzero for insert buffer, zero for other indexes */
+@return nonzero for insert buffer, zero for other indexes */
UNIV_INLINE
ulint
dict_index_is_sec_or_ibuf(
/*======================*/
const dict_index_t* index) /*!< in: index */
{
- ulint type;
-
ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
-
- type = index->type;
-
- return(!(type & DICT_CLUSTERED) || (type & DICT_IBUF));
+ return((index->type & (DICT_CLUSTERED | DICT_IBUF)) != DICT_CLUSTERED);
}
/********************************************************************//**
-Gets the number of user-defined columns in a table in the dictionary
-cache.
-@return number of user-defined (e.g., not ROW_ID) columns of a table */
+Gets the number of user-defined non-virtual columns in a table in the
+dictionary cache.
+@return number of user-defined (e.g., not ROW_ID) non-virtual
+columns of a table */
UNIV_INLINE
ulint
dict_table_get_n_user_cols(
@@ -310,44 +329,59 @@ dict_table_get_n_user_cols(
const dict_table_t* table) /*!< in: table */
{
ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
-
return(table->n_cols - DATA_N_SYS_COLS);
}
/********************************************************************//**
-Gets the number of system columns in a table in the dictionary cache.
-@return number of system (e.g., ROW_ID) columns of a table */
+Gets the number of all non-virtual columns (also system) in a table
+in the dictionary cache.
+@return number of non-virtual columns of a table */
UNIV_INLINE
ulint
-dict_table_get_n_sys_cols(
-/*======================*/
- const dict_table_t* table MY_ATTRIBUTE((unused))) /*!< in: table */
+dict_table_get_n_cols(
+/*==================*/
+ const dict_table_t* table) /*!< in: table */
{
- ut_ad(table);
ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
- ut_ad(table->cached);
-
- return(DATA_N_SYS_COLS);
+ return(table->n_cols);
}
-/********************************************************************//**
-Gets the number of all columns (also system) in a table in the dictionary
-cache.
-@return number of columns of a table */
+/** Gets the number of virtual columns in a table in the dictionary cache.
+@param[in] table the table to check
+@return number of virtual columns of a table */
UNIV_INLINE
ulint
-dict_table_get_n_cols(
-/*==================*/
- const dict_table_t* table) /*!< in: table */
+dict_table_get_n_v_cols(
+ const dict_table_t* table)
{
+ ut_ad(table);
ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
- return(table->n_cols);
+ return(table->n_v_cols);
+}
+
+/** Check if a table has indexed virtual columns
+@param[in] table the table to check
+@return true is the table has indexed virtual columns */
+UNIV_INLINE
+bool
+dict_table_has_indexed_v_cols(
+ const dict_table_t* table)
+{
+
+ for (ulint i = 0; i < table->n_v_cols; i++) {
+ const dict_v_col_t* col = dict_table_get_nth_v_col(table, i);
+ if (col->m_col.ord_part) {
+ return(true);
+ }
+ }
+
+ return(false);
}
/********************************************************************//**
Gets the approximately estimated number of rows in the table.
-@return estimated number of rows */
+@return estimated number of rows */
UNIV_INLINE
ib_uint64_t
dict_table_get_n_rows(
@@ -398,7 +432,7 @@ dict_table_n_rows_dec(
#ifdef UNIV_DEBUG
/********************************************************************//**
Gets the nth column of a table.
-@return pointer to column object */
+@return pointer to column object */
UNIV_INLINE
dict_col_t*
dict_table_get_nth_col(
@@ -412,9 +446,26 @@ dict_table_get_nth_col(
return((dict_col_t*) (table->cols) + pos);
}
+/** Gets the nth virtual column of a table.
+@param[in] table table
+@param[in] pos position of virtual column
+@return pointer to virtual column object */
+UNIV_INLINE
+dict_v_col_t*
+dict_table_get_nth_v_col(
+ const dict_table_t* table,
+ ulint pos)
+{
+ ut_ad(table);
+ ut_ad(pos < table->n_v_def);
+ ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
+
+ return(static_cast<dict_v_col_t*>(table->v_cols) + pos);
+}
+
/********************************************************************//**
Gets the given system column of a table.
-@return pointer to column object */
+@return pointer to column object */
UNIV_INLINE
dict_col_t*
dict_table_get_sys_col(
@@ -424,12 +475,11 @@ dict_table_get_sys_col(
{
dict_col_t* col;
- ut_ad(table);
ut_ad(sys < DATA_N_SYS_COLS);
ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
col = dict_table_get_nth_col(table, table->n_cols
- - DATA_N_SYS_COLS + sys);
+ + (sys - DATA_N_SYS_COLS));
ut_ad(col->mtype == DATA_SYS);
ut_ad(col->prtype == (sys | DATA_NOT_NULL));
@@ -439,7 +489,7 @@ dict_table_get_sys_col(
/********************************************************************//**
Gets the given system column number of a table.
-@return column number */
+@return column number */
UNIV_INLINE
ulint
dict_table_get_sys_col_no(
@@ -449,13 +499,12 @@ dict_table_get_sys_col_no(
{
ut_ad(sys < DATA_N_SYS_COLS);
ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
-
- return(table->n_cols - DATA_N_SYS_COLS + sys);
+ return table->n_cols + (sys - DATA_N_SYS_COLS);
}
/********************************************************************//**
Check whether the table uses the compact page format.
-@return TRUE if table uses the compact page format */
+@return TRUE if table uses the compact page format */
UNIV_INLINE
ibool
dict_table_is_comp(
@@ -483,267 +532,98 @@ dict_table_has_fts_index(
return(DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS));
}
-/********************************************************************//**
-Validate the table flags.
-@return true if valid. */
-UNIV_INLINE
+/** Validate the flags for tables that are not ROW_FORMAT=REDUNDANT.
+@param[in] flags table flags
+@return whether the flags are valid */
+inline
bool
-dict_tf_is_valid(
-/*=============*/
- ulint flags) /*!< in: table flags */
+dict_tf_is_valid_not_redundant(ulint flags)
{
- ulint compact = DICT_TF_GET_COMPACT(flags);
+ const bool atomic_blobs = DICT_TF_HAS_ATOMIC_BLOBS(flags);
+
ulint zip_ssize = DICT_TF_GET_ZIP_SSIZE(flags);
- ulint atomic_blobs = DICT_TF_HAS_ATOMIC_BLOBS(flags);
- ulint unused = DICT_TF_GET_UNUSED(flags);
- ulint page_compression = DICT_TF_GET_PAGE_COMPRESSION(flags);
- ulint page_compression_level = DICT_TF_GET_PAGE_COMPRESSION_LEVEL(flags);
- ulint data_dir = DICT_TF_HAS_DATA_DIR(flags);
- ulint atomic_writes = DICT_TF_GET_ATOMIC_WRITES(flags);
-
- /* Make sure there are no bits that we do not know about. */
- if (unused != 0) {
- fprintf(stderr,
- "InnoDB: Error: table unused flags are " ULINTPF
- " in the data dictionary and are corrupted\n"
- "InnoDB: Error: data dictionary flags are\n"
- "InnoDB: compact " ULINTPF " atomic_blobs " ULINTPF
- "\nInnoDB: unused " ULINTPF " data_dir " ULINTPF
- " zip_ssize " ULINTPF
- "\nInnoDB: page_compression " ULINTPF
- " page_compression_level " ULINTPF
- "\nInnoDB: atomic_writes " ULINTPF "\n",
- unused,
- compact, atomic_blobs, unused, data_dir, zip_ssize,
- page_compression, page_compression_level, atomic_writes
- );
+ if (!zip_ssize) {
+ /* Not ROW_FORMAT=COMPRESSED */
+ } else if (!atomic_blobs) {
+ /* ROW_FORMAT=COMPRESSED implies ROW_FORMAT=DYNAMIC
+ for the uncompressed page format */
return(false);
-
- } else if (atomic_blobs) {
- /* Barracuda row formats COMPRESSED and DYNAMIC build on
- the page structure introduced for the COMPACT row format
- by allowing keys in secondary indexes to be made from
- data stored off-page in the clustered index. */
-
- if (!compact) {
- fprintf(stderr,
- "InnoDB: Error: table compact flags are "
- ULINTPF
- " in the data dictionary and are corrupted\n"
- "InnoDB: Error: data dictionary flags are\n"
- "InnoDB: compact " ULINTPF
- " atomic_blobs " ULINTPF "\n"
- "InnoDB: unused " ULINTPF
- " data_dir " ULINTPF " zip_ssize " ULINTPF
- "\nInnoDB: page_compression " ULINTPF
- " page_compression_level " ULINTPF
- "\nInnoDB: atomic_writes " ULINTPF "\n",
- compact, compact, atomic_blobs, unused, data_dir, zip_ssize,
- page_compression, page_compression_level, atomic_writes
- );
- return(false);
- }
-
- } else if (zip_ssize) {
-
- /* Antelope does not support COMPRESSED row format. */
- fprintf(stderr,
- "InnoDB: Error: table flags are " ULINTPF
- " in the data dictionary and are corrupted\n"
- "InnoDB: Error: data dictionary flags are\n"
- "InnoDB: compact " ULINTPF " atomic_blobs " ULINTPF
- "\nInnoDB: unused " ULINTPF " data_dir " ULINTPF
- " zip_ssize " ULINTPF
- "\nInnoDB: page_compression " ULINTPF
- " page_compression_level " ULINTPF
- "\nInnoDB: atomic_writes " ULINTPF "\n",
- flags, compact, atomic_blobs, unused, data_dir, zip_ssize,
- page_compression, page_compression_level, atomic_writes
- );
+ } else if (zip_ssize > PAGE_ZIP_SSIZE_MAX
+ || zip_ssize > UNIV_PAGE_SIZE_SHIFT
+ || UNIV_PAGE_SIZE_SHIFT > UNIV_ZIP_SIZE_SHIFT_MAX) {
+ /* KEY_BLOCK_SIZE is out of bounds, or
+ ROW_FORMAT=COMPRESSED is not supported with this
+ innodb_page_size (only up to 16KiB) */
return(false);
}
- if (zip_ssize) {
-
- /* COMPRESSED row format must have compact and atomic_blobs
- bits set and validate the number is within allowed range. */
-
- if (!compact
- || !atomic_blobs
- || zip_ssize > PAGE_ZIP_SSIZE_MAX) {
-
- fprintf(stderr,
- "InnoDB: Error: table compact flags are "
- ULINTPF
- " in the data dictionary and are corrupted\n"
- "InnoDB: Error: data dictionary flags are\n"
- "InnoDB: compact " ULINTPF
- " atomic_blobs " ULINTPF "\n"
- "InnoDB: unused " ULINTPF
- " data_dir " ULINTPF " zip_ssize " ULINTPF
- "\nInnoDB: page_compression " ULINTPF
- " page_compression_level " ULINTPF
- "\nInnoDB: atomic_writes " ULINTPF "\n",
- flags,
- compact, atomic_blobs, unused, data_dir, zip_ssize,
- page_compression, page_compression_level, atomic_writes
-
- );
- return(false);
- }
- }
-
- if (page_compression || page_compression_level) {
- /* Page compression format must have compact and
- atomic_blobs and page_compression_level requires
- page_compression */
- if (!compact
- || !page_compression
- || !atomic_blobs) {
-
- fprintf(stderr,
- "InnoDB: Error: table flags are " ULINTPF
- " in the data dictionary and are corrupted\n"
- "InnoDB: Error: data dictionary flags are\n"
- "InnoDB: compact " ULINTPF
- " atomic_blobs " ULINTPF "\n"
- "InnoDB: unused " ULINTPF
- " data_dir " ULINTPF " zip_ssize " ULINTPF
- "\nInnoDB: page_compression " ULINTPF
- " page_compression_level " ULINTPF
- "\nInnoDB: atomic_writes " ULINTPF "\n",
- flags, compact, atomic_blobs, unused, data_dir, zip_ssize,
- page_compression, page_compression_level, atomic_writes
- );
- return(false);
- }
- }
-
- if (atomic_writes) {
-
- if(atomic_writes > ATOMIC_WRITES_OFF) {
-
- fprintf(stderr,
- "InnoDB: Error: table flags are " ULINTPF
- " in the data dictionary and are corrupted\n"
- "InnoDB: Error: data dictionary flags are\n"
- "InnoDB: compact " ULINTPF
- " atomic_blobs " ULINTPF "\n"
- "InnoDB: unused " ULINTPF
- " data_dir " ULINTPF " zip_ssize " ULINTPF
- "\nInnoDB: page_compression " ULINTPF
- " page_compression_level " ULINTPF
- "\nInnoDB: atomic_writes " ULINTPF "\n",
- flags, compact, atomic_blobs, unused, data_dir, zip_ssize,
- page_compression, page_compression_level, atomic_writes
- );
- return(false);
- }
+ switch (DICT_TF_GET_PAGE_COMPRESSION_LEVEL(flags)) {
+ case 0:
+ /* PAGE_COMPRESSION_LEVEL=0 should imply PAGE_COMPRESSED=NO */
+ return(!DICT_TF_GET_PAGE_COMPRESSION(flags));
+ case 1: case 2: case 3: case 4: case 5: case 6: case 7: case 8: case 9:
+ /* PAGE_COMPRESSION_LEVEL requires
+ ROW_FORMAT=COMPACT or ROW_FORMAT=DYNAMIC
+ (not ROW_FORMAT=COMPRESSED or ROW_FORMAT=REDUNDANT)
+ and PAGE_COMPRESSED=YES */
+ return(!zip_ssize && DICT_TF_GET_PAGE_COMPRESSION(flags));
+ default:
+ /* Invalid PAGE_COMPRESSION_LEVEL value */
+ return(false);
}
-
- /* CREATE TABLE ... DATA DIRECTORY is supported for any row format,
- so the DATA_DIR flag is compatible with all other table flags. */
-
- return(true);
}
-/********************************************************************//**
-Validate a SYS_TABLES TYPE field and return it.
-@return Same as input after validating it as a SYS_TABLES TYPE field.
-If there is an error, return ULINT_UNDEFINED. */
+/** Validate the table flags.
+@param[in] flags Table flags
+@return true if valid. */
UNIV_INLINE
-ulint
-dict_sys_tables_type_validate(
-/*==========================*/
- ulint type, /*!< in: SYS_TABLES.TYPE */
- ulint n_cols) /*!< in: SYS_TABLES.N_COLS */
-{
- ulint low_order_bit = DICT_TF_GET_COMPACT(type);
- ulint redundant = !(n_cols & DICT_N_COLS_COMPACT);
- ulint zip_ssize = DICT_TF_GET_ZIP_SSIZE(type);
- ulint atomic_blobs = DICT_TF_HAS_ATOMIC_BLOBS(type);
- ulint unused = DICT_TF_GET_UNUSED(type);
- ulint page_compression = DICT_TF_GET_PAGE_COMPRESSION(type);
- ulint page_compression_level = DICT_TF_GET_PAGE_COMPRESSION_LEVEL(type);
- ulint atomic_writes = DICT_TF_GET_ATOMIC_WRITES(type);
-
- /* The low order bit of SYS_TABLES.TYPE is always set to 1.
- If the format is UNIV_FORMAT_B or higher, this field is the same
- as dict_table_t::flags. Zero is not allowed here. */
- if (!low_order_bit) {
- return(ULINT_UNDEFINED);
- }
-
- if (redundant) {
- if (zip_ssize || atomic_blobs) {
- return(ULINT_UNDEFINED);
- }
- }
-
- /* Make sure there are no bits that we do not know about. */
- if (unused) {
- return(ULINT_UNDEFINED);
- }
-
- if (atomic_blobs) {
- /* Barracuda row formats COMPRESSED and DYNAMIC build on
- the page structure introduced for the COMPACT row format
- by allowing keys in secondary indexes to be made from
- data stored off-page in the clustered index.
-
- The DICT_N_COLS_COMPACT flag should be in N_COLS,
- but we already know that. */
- } else if (zip_ssize) {
- /* Antelope does not support COMPRESSED format. */
- return(ULINT_UNDEFINED);
- }
-
- if (zip_ssize) {
- /* COMPRESSED row format must have low_order_bit and
- atomic_blobs bits set and the DICT_N_COLS_COMPACT flag
- should be in N_COLS, but we already know about the
- low_order_bit and DICT_N_COLS_COMPACT flags. */
- if (!atomic_blobs) {
- return(ULINT_UNDEFINED);
- }
-
- /* Validate that the number is within allowed range. */
- if (zip_ssize > PAGE_ZIP_SSIZE_MAX) {
- return(ULINT_UNDEFINED);
- }
+bool
+dict_tf_is_valid(
+ ulint flags)
+{
+ ut_ad(flags < 1U << DICT_TF_BITS);
+ /* The DATA_DIRECTORY flag can be assigned fully independently
+ of all other persistent table flags. */
+ flags &= ~DICT_TF_MASK_DATA_DIR;
+ if (!(flags & 1)) {
+ /* Only ROW_FORMAT=REDUNDANT has 0 in the least significant
+ bit. For ROW_FORMAT=REDUNDANT, only the DATA_DIR flag
+ (which we cleared above) can be set. If any other flags
+ are set, the flags are invalid. */
+ return(flags == 0);
}
- /* There is nothing to validate for the data_dir field.
- CREATE TABLE ... DATA DIRECTORY is supported for any row
- format, so the DATA_DIR flag is compatible with any other
- table flags. However, it is not used with TEMPORARY tables.*/
-
- if (page_compression || page_compression_level) {
- /* page compressed row format must have low_order_bit and
- atomic_blobs bits set and the DICT_N_COLS_COMPACT flag
- should be in N_COLS, but we already know about the
- low_order_bit and DICT_N_COLS_COMPACT flags. */
+ return(dict_tf_is_valid_not_redundant(flags));
+}
- if (!atomic_blobs || !page_compression) {
- return(ULINT_UNDEFINED);
- }
+/** Validate both table flags and table flags2 and make sure they
+are compatible.
+@param[in] flags Table flags
+@param[in] flags2 Table flags2
+@return true if valid. */
+UNIV_INLINE
+bool
+dict_tf2_is_valid(
+ ulint flags,
+ ulint flags2)
+{
+ if (!dict_tf_is_valid(flags)) {
+ return(false);
}
- /* Validate that the atomic writes number is within allowed range. */
- if (atomic_writes > ATOMIC_WRITES_OFF) {
- return(ULINT_UNDEFINED);
+ if ((flags2 & DICT_TF2_UNUSED_BIT_MASK) != 0) {
+ return(false);
}
- /* Return the validated SYS_TABLES.TYPE. */
- return(type);
+ return(true);
}
/********************************************************************//**
Determine the file format from dict_table_t::flags
The low order bit will be zero for REDUNDANT and 1 for COMPACT. For any
other row_format, file_format is > 0 and DICT_TF_COMPACT will also be set.
-@return file format version */
+@return file format version */
UNIV_INLINE
rec_format_t
dict_tf_get_rec_format(
@@ -769,7 +649,7 @@ dict_tf_get_rec_format(
/********************************************************************//**
Determine the file format from a dict_table_t::flags.
-@return file format version */
+@return file format version */
UNIV_INLINE
ulint
dict_tf_get_format(
@@ -785,7 +665,7 @@ dict_tf_get_format(
/********************************************************************//**
Determine the file format of a table.
-@return file format version */
+@return file format version */
UNIV_INLINE
ulint
dict_table_get_format(
@@ -797,42 +677,45 @@ dict_table_get_format(
return(dict_tf_get_format(table->flags));
}
-/********************************************************************//**
-Set the file format and zip size in a dict_table_t::flags. If zip size
-is not needed, it should be 0. */
+/** Set the various values in a dict_table_t::flags pointer.
+@param[in,out] flags, Pointer to a 4 byte Table Flags
+@param[in] format File Format
+@param[in] zip_ssize Zip Shift Size
+@param[in] use_data_dir Table uses DATA DIRECTORY
+@param[in] page_compressed Table uses page compression
+@param[in] page_compression_level Page compression level
+@param[in] not_used For future */
UNIV_INLINE
void
dict_tf_set(
/*========*/
- ulint* flags, /*!< in/out: table flags */
- rec_format_t format, /*!< in: file format */
- ulint zip_ssize, /*!< in: zip shift size */
- bool use_data_dir, /*!< in: table uses DATA DIRECTORY
- */
- bool page_compressed,/*!< in: table uses page compressed
- pages */
- ulint page_compression_level, /*!< in: table page compression
- level */
- ulint atomic_writes) /*!< in: table atomic writes setup */
-{
- atomic_writes_t awrites = (atomic_writes_t)atomic_writes;
+ ulint* flags,
+ rec_format_t format,
+ ulint zip_ssize,
+ bool use_data_dir,
+ bool page_compressed,
+ ulint page_compression_level,
+ ulint not_used)
+{
+ *flags = use_data_dir ? 1 << DICT_TF_POS_DATA_DIR : 0;
switch (format) {
case REC_FORMAT_REDUNDANT:
- *flags = 0;
ut_ad(zip_ssize == 0);
- break;
+ /* no other options are allowed */
+ ut_ad(!page_compressed);
+ return;
case REC_FORMAT_COMPACT:
- *flags = DICT_TF_COMPACT;
+ *flags |= DICT_TF_COMPACT;
ut_ad(zip_ssize == 0);
break;
case REC_FORMAT_COMPRESSED:
- *flags = DICT_TF_COMPACT
+ *flags |= DICT_TF_COMPACT
| (1 << DICT_TF_POS_ATOMIC_BLOBS)
| (zip_ssize << DICT_TF_POS_ZIP_SSIZE);
break;
case REC_FORMAT_DYNAMIC:
- *flags = DICT_TF_COMPACT
+ *flags |= DICT_TF_COMPACT
| (1 << DICT_TF_POS_ATOMIC_BLOBS);
ut_ad(zip_ssize == 0);
break;
@@ -840,43 +723,34 @@ dict_tf_set(
if (page_compressed) {
*flags |= (1 << DICT_TF_POS_ATOMIC_BLOBS)
- | (1 << DICT_TF_POS_PAGE_COMPRESSION)
+ | (1 << DICT_TF_POS_PAGE_COMPRESSION)
| (page_compression_level << DICT_TF_POS_PAGE_COMPRESSION_LEVEL);
ut_ad(zip_ssize == 0);
ut_ad(dict_tf_get_page_compression(*flags) == TRUE);
ut_ad(dict_tf_get_page_compression_level(*flags) == page_compression_level);
}
-
- *flags |= (atomic_writes << DICT_TF_POS_ATOMIC_WRITES);
- ut_a(dict_tf_get_atomic_writes(*flags) == awrites);
-
- if (use_data_dir) {
- *flags |= (1 << DICT_TF_POS_DATA_DIR);
- }
}
-/********************************************************************//**
-Convert a 32 bit integer table flags to the 32 bit integer that is
-written into the tablespace header at the offset FSP_SPACE_FLAGS and is
-also stored in the fil_space_t::flags field. The following chart shows
-the translation of the low order bit. Other bits are the same.
+/** Convert a 32 bit integer table flags to the 32 bit FSP Flags.
+Fsp Flags are written into the tablespace header at the offset
+FSP_SPACE_FLAGS and are also stored in the fil_space_t::flags field.
+The following chart shows the translation of the low order bit.
+Other bits are the same.
========================= Low order bit ==========================
| REDUNDANT | COMPACT | COMPRESSED | DYNAMIC
dict_table_t::flags | 0 | 1 | 1 | 1
fil_space_t::flags | 0 | 0 | 1 | 1
==================================================================
-@return tablespace flags (fil_space_t::flags) */
+@param[in] table_flags dict_table_t::flags
+@return tablespace flags (fil_space_t::flags) */
UNIV_INLINE
ulint
-dict_tf_to_fsp_flags(
-/*=================*/
- ulint table_flags) /*!< in: dict_table_t::flags */
+dict_tf_to_fsp_flags(ulint table_flags)
{
ulint fsp_flags;
ulint page_compression_level = DICT_TF_GET_PAGE_COMPRESSION_LEVEL(
table_flags);
- ulint atomic_writes = DICT_TF_GET_ATOMIC_WRITES(table_flags);
ut_ad((DICT_TF_GET_PAGE_COMPRESSION(table_flags) == 0)
== (page_compression_level == 0));
@@ -903,50 +777,12 @@ dict_tf_to_fsp_flags(
fsp_flags |= 1U << FSP_FLAGS_MEM_DATA_DIR;
}
- fsp_flags |= atomic_writes << FSP_FLAGS_MEM_ATOMIC_WRITES;
fsp_flags |= page_compression_level << FSP_FLAGS_MEM_COMPRESSION_LEVEL;
return(fsp_flags);
}
/********************************************************************//**
-Convert a 32 bit integer from SYS_TABLES.TYPE to dict_table_t::flags
-The following chart shows the translation of the low order bit.
-Other bits are the same.
-========================= Low order bit ==========================
- | REDUNDANT | COMPACT | COMPRESSED and DYNAMIC
-SYS_TABLES.TYPE | 1 | 1 | 1
-dict_table_t::flags | 0 | 1 | 1
-==================================================================
-@return ulint containing SYS_TABLES.TYPE */
-UNIV_INLINE
-ulint
-dict_sys_tables_type_to_tf(
-/*=======================*/
- ulint type, /*!< in: SYS_TABLES.TYPE field */
- ulint n_cols) /*!< in: SYS_TABLES.N_COLS field */
-{
- ulint flags;
- ulint redundant = !(n_cols & DICT_N_COLS_COMPACT);
-
- /* Adjust bit zero. */
- flags = redundant ? 0 : 1;
-
- /* ZIP_SSIZE, ATOMIC_BLOBS, DATA_DIR, PAGE_COMPRESSION,
- PAGE_COMPRESSION_LEVEL, ATOMIC_WRITES are the same. */
- flags |= type & (DICT_TF_MASK_ZIP_SSIZE
- | DICT_TF_MASK_ATOMIC_BLOBS
- | DICT_TF_MASK_DATA_DIR
- | DICT_TF_MASK_PAGE_COMPRESSION
- | DICT_TF_MASK_PAGE_COMPRESSION_LEVEL
- | DICT_TF_MASK_ATOMIC_WRITES
-
- );
-
- return(flags);
-}
-
-/********************************************************************//**
Convert a 32 bit integer table flags to the 32bit integer that is written
to a SYS_TABLES.TYPE field. The following chart shows the translation of
the low order bit. Other bits are the same.
@@ -955,7 +791,7 @@ the low order bit. Other bits are the same.
dict_table_t::flags | 0 | 1 | 1
SYS_TABLES.TYPE | 1 | 1 | 1
==================================================================
-@return ulint containing SYS_TABLES.TYPE */
+@return ulint containing SYS_TABLES.TYPE */
UNIV_INLINE
ulint
dict_tf_to_sys_tables_type(
@@ -970,52 +806,52 @@ dict_tf_to_sys_tables_type(
type = 1;
/* ZIP_SSIZE, ATOMIC_BLOBS, DATA_DIR, PAGE_COMPRESSION,
- PAGE_COMPRESSION_LEVEL, ATOMIC_WRITES are the same. */
+ PAGE_COMPRESSION_LEVEL are the same. */
type |= flags & (DICT_TF_MASK_ZIP_SSIZE
| DICT_TF_MASK_ATOMIC_BLOBS
| DICT_TF_MASK_DATA_DIR
| DICT_TF_MASK_PAGE_COMPRESSION
- | DICT_TF_MASK_PAGE_COMPRESSION_LEVEL
- | DICT_TF_MASK_ATOMIC_WRITES);
+ | DICT_TF_MASK_PAGE_COMPRESSION_LEVEL);
return(type);
}
-/********************************************************************//**
-Extract the compressed page size from dict_table_t::flags.
-These flags are in memory, so assert that they are valid.
-@return compressed page size, or 0 if not compressed */
+/** Extract the page size info from table flags.
+@param[in] flags flags
+@return a structure containing the compressed and uncompressed
+page sizes and a boolean indicating if the page is compressed. */
UNIV_INLINE
-ulint
-dict_tf_get_zip_size(
-/*=================*/
- ulint flags) /*!< in: flags */
+const page_size_t
+dict_tf_get_page_size(
+ ulint flags)
{
- ulint zip_ssize = DICT_TF_GET_ZIP_SSIZE(flags);
- ulint zip_size = (zip_ssize
- ? (UNIV_ZIP_SIZE_MIN >> 1) << zip_ssize
- : 0);
+ const ulint zip_ssize = DICT_TF_GET_ZIP_SSIZE(flags);
+
+ if (zip_ssize == 0) {
+ return(univ_page_size);
+ }
+
+ const ulint zip_size = (UNIV_ZIP_SIZE_MIN >> 1) << zip_ssize;
ut_ad(zip_size <= UNIV_ZIP_SIZE_MAX);
- return(zip_size);
+ return(page_size_t(zip_size, univ_page_size.logical(), true));
}
-/********************************************************************//**
-Check whether the table uses the compressed compact page format.
-@return compressed page size, or 0 if not compressed */
+/** Get the table page size.
+@param[in] table table
+@return a structure containing the compressed and uncompressed
+page sizes and a boolean indicating if the page is compressed */
UNIV_INLINE
-ulint
-dict_table_zip_size(
-/*================*/
- const dict_table_t* table) /*!< in: table */
+const page_size_t
+dict_table_page_size(
+ const dict_table_t* table)
{
- ut_ad(table);
+ ut_ad(table != NULL);
- return(dict_tf_get_zip_size(table->flags));
+ return(dict_tf_get_page_size(table->flags));
}
-#ifndef UNIV_HOTBACKUP
/*********************************************************************//**
Obtain exclusive locks on all index trees of the table. This is to prevent
accessing index trees while InnoDB is updating internal metadata for
@@ -1077,12 +913,11 @@ dict_table_x_unlock_indexes(
rw_lock_x_unlock(dict_index_get_lock(index));
}
}
-#endif /* !UNIV_HOTBACKUP */
/********************************************************************//**
Gets the number of fields in the internal representation of an index,
including fields added by the dictionary system.
-@return number of fields */
+@return number of fields */
UNIV_INLINE
ulint
dict_index_get_n_fields(
@@ -1100,7 +935,7 @@ Gets the number of fields in the internal representation of an index
that uniquely determine the position of an index entry in the index, if
we do not take multiversioning into account: in the B-tree use the value
returned by dict_index_get_n_unique_in_tree.
-@return number of fields */
+@return number of fields */
UNIV_INLINE
ulint
dict_index_get_n_unique(
@@ -1117,7 +952,7 @@ dict_index_get_n_unique(
Gets the number of fields in the internal representation of an index
which uniquely determine the position of an index entry in the index, if
we also take multiversioning into account.
-@return number of fields */
+@return number of fields */
UNIV_INLINE
ulint
dict_index_get_n_unique_in_tree(
@@ -1136,12 +971,37 @@ dict_index_get_n_unique_in_tree(
return(dict_index_get_n_fields(index));
}
+/**
+Gets the number of fields on nonleaf page level in the internal representation
+of an index which uniquely determine the position of an index entry in the
+index, if we also take multiversioning into account. Note, it doesn't
+include page no field.
+@param[in] index index
+@return number of fields */
+UNIV_INLINE
+ulint
+dict_index_get_n_unique_in_tree_nonleaf(
+ const dict_index_t* index)
+{
+ ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
+ ut_ad(index->cached);
+
+ if (dict_index_is_spatial(index)) {
+ /* For spatial index, on non-leaf page, we have only
+ 2 fields(mbr+page_no). So, except page no field,
+ there's one field there. */
+ return(DICT_INDEX_SPATIAL_NODEPTR_SIZE);
+ } else {
+ return(dict_index_get_n_unique_in_tree(index));
+ }
+}
+
/********************************************************************//**
Gets the number of user-defined ordering fields in the index. In the internal
representation of clustered indexes we add the row id to the ordering fields
to make a clustered index unique, but this function returns the number of
fields the user defined in the index as ordering fields.
-@return number of fields */
+@return number of fields */
UNIV_INLINE
ulint
dict_index_get_n_ordering_defined_by_user(
@@ -1155,7 +1015,7 @@ dict_index_get_n_ordering_defined_by_user(
#ifdef UNIV_DEBUG
/********************************************************************//**
Gets the nth field of an index.
-@return pointer to field object */
+@return pointer to field object */
UNIV_INLINE
dict_field_t*
dict_index_get_nth_field(
@@ -1172,7 +1032,7 @@ dict_index_get_nth_field(
/********************************************************************//**
Returns the position of a system column in an index.
-@return position, ULINT_UNDEFINED if not contained */
+@return position, ULINT_UNDEFINED if not contained */
UNIV_INLINE
ulint
dict_index_get_sys_col_pos(
@@ -1181,7 +1041,7 @@ dict_index_get_sys_col_pos(
ulint type) /*!< in: DATA_ROW_ID, ... */
{
ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
- ut_ad(!dict_index_is_univ(index));
+ ut_ad(!dict_index_is_ibuf(index));
if (dict_index_is_clust(index)) {
@@ -1191,13 +1051,12 @@ dict_index_get_sys_col_pos(
}
return(dict_index_get_nth_col_pos(
- index, dict_table_get_sys_col_no(index->table, type),
- NULL));
+ index, dict_table_get_sys_col_no(index->table, type), NULL));
}
/*********************************************************************//**
Gets the field column.
-@return field->col, pointer to the table column */
+@return field->col, pointer to the table column */
UNIV_INLINE
const dict_col_t*
dict_field_get_col(
@@ -1209,7 +1068,7 @@ dict_field_get_col(
/********************************************************************//**
Gets pointer to the nth column in an index.
-@return column */
+@return column */
UNIV_INLINE
const dict_col_t*
dict_index_get_nth_col(
@@ -1222,7 +1081,7 @@ dict_index_get_nth_col(
/********************************************************************//**
Gets the column number the nth field in an index.
-@return column number */
+@return column number */
UNIV_INLINE
ulint
dict_index_get_nth_col_no(
@@ -1245,14 +1104,13 @@ dict_index_get_nth_col_pos(
ulint n, /*!< in: column number */
ulint* prefix_col_pos) /*!< out: col num if prefix */
{
- return(dict_index_get_nth_col_or_prefix_pos(index, n, FALSE,
+ return(dict_index_get_nth_col_or_prefix_pos(index, n, false, false,
prefix_col_pos));
}
-#ifndef UNIV_HOTBACKUP
/********************************************************************//**
Returns the minimum data size of an index record.
-@return minimum data size in bytes */
+@return minimum data size in bytes */
UNIV_INLINE
ulint
dict_index_get_min_size(
@@ -1272,7 +1130,7 @@ dict_index_get_min_size(
/*********************************************************************//**
Gets the space id of the root of the index tree.
-@return space id */
+@return space id */
UNIV_INLINE
ulint
dict_index_get_space(
@@ -1297,12 +1155,12 @@ dict_index_set_space(
ut_ad(index);
ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
- index->space = space;
+ index->space = unsigned(space);
}
/*********************************************************************//**
Gets the page number of the root of the index tree.
-@return page number */
+@return page number */
UNIV_INLINE
ulint
dict_index_get_page(
@@ -1316,7 +1174,7 @@ dict_index_get_page(
/*********************************************************************//**
Gets the read-write lock of the index tree.
-@return read-write lock */
+@return read-write lock */
UNIV_INLINE
rw_lock_t*
dict_index_get_lock(
@@ -1332,7 +1190,7 @@ dict_index_get_lock(
Returns free space reserved for future updates of records. This is
relevant only in the case of many consecutive inserts, as updates
which make the records bigger might fragment the index.
-@return number of free bytes on page, reserved for updates */
+@return number of free bytes on page, reserved for updates */
UNIV_INLINE
ulint
dict_index_get_space_reserve(void)
@@ -1384,9 +1242,8 @@ dict_index_set_online_status(
enum online_index_status status) /*!< in: status */
{
ut_ad(!(index->type & DICT_FTS));
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(rw_lock_own(dict_index_get_lock(index), RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
+ ut_ad(rw_lock_own(dict_index_get_lock(index), RW_LOCK_X));
+
#ifdef UNIV_DEBUG
switch (dict_index_get_online_status(index)) {
case ONLINE_INDEX_COMPLETE:
@@ -1446,7 +1303,8 @@ ulint
dict_table_is_fts_column(
/*=====================*/
ib_vector_t* indexes,/*!< in: vector containing only FTS indexes */
- ulint col_no) /*!< in: col number to search for */
+ ulint col_no, /*!< in: col number to search for */
+ bool is_virtual) /*!< in: whether it is a virtual column */
{
ulint i;
@@ -1456,7 +1314,8 @@ dict_table_is_fts_column(
index = (dict_index_t*) ib_vector_getp(indexes, i);
- if (dict_index_contains_col_or_prefix(index, col_no)) {
+ if (dict_index_contains_col_or_prefix(
+ index, col_no, is_virtual)) {
return(i);
}
@@ -1490,37 +1349,70 @@ dict_max_field_len_store_undo(
return(prefix_len);
}
-/********************************************************************//**
-Check whether the table is corrupted.
-@return nonzero for corrupted table, zero for valid tables */
+/** Determine maximum bytes of a virtual column need to be stored
+in the undo log.
+@param[in] table dict_table_t for the table
+@param[in] col_no virtual column number
+@return maximum bytes of virtual column to be stored in the undo log */
UNIV_INLINE
ulint
-dict_table_is_corrupted(
-/*====================*/
- const dict_table_t* table) /*!< in: table */
+dict_max_v_field_len_store_undo(
+ dict_table_t* table,
+ ulint col_no)
+{
+ const dict_col_t* col
+ = &dict_table_get_nth_v_col(table, col_no)->m_col;
+ ulint max_log_len;
+
+ /* This calculation conforms to the non-virtual column
+ maximum log length calculation:
+ 1) for UNIV_FORMAT_A, upto REC_ANTELOPE_MAX_INDEX_COL_LEN
+ for UNIV_FORMAT_B, upto col->max_prefix or
+ 2) REC_VERSION_56_MAX_INDEX_COL_LEN, whichever is less */
+ if (dict_table_get_format(table) >= UNIV_FORMAT_B) {
+ if (DATA_BIG_COL(col) && col->max_prefix > 0) {
+ max_log_len = col->max_prefix;
+ } else {
+ max_log_len = DICT_MAX_FIELD_LEN_BY_FORMAT(table);
+ }
+ } else {
+ max_log_len = REC_ANTELOPE_MAX_INDEX_COL_LEN;
+ }
+
+ return(max_log_len);
+}
+
+/**********************************************************************//**
+Prevent table eviction by moving a table to the non-LRU list from the
+LRU list if it is not already there. */
+UNIV_INLINE
+void
+dict_table_prevent_eviction(
+/*========================*/
+ dict_table_t* table) /*!< in: table to prevent eviction */
{
- ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
- return(table->corrupted);
+ ut_ad(mutex_own(&dict_sys->mutex));
+ if (table->can_be_evicted) {
+ dict_table_move_from_lru_to_non_lru(table);
+ }
}
/********************************************************************//**
-Check whether the index is corrupted.
-@return nonzero for corrupted index, zero for valid indexes */
+Check whether the table is corrupted.
+@return nonzero for corrupted table, zero for valid tables */
UNIV_INLINE
ulint
-dict_index_is_corrupted(
+dict_table_is_corrupted(
/*====================*/
- const dict_index_t* index) /*!< in: index */
+ const dict_table_t* table) /*!< in: table */
{
- ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
-
- return((index->type & DICT_CORRUPT)
- || (index->table && index->table->corrupted));
+ ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
+ return(table->corrupted);
}
/********************************************************************//**
Check if the tablespace for the table has been discarded.
-@return true if the tablespace has been discarded. */
+@return true if the tablespace has been discarded. */
UNIV_INLINE
bool
dict_table_is_discarded(
@@ -1530,43 +1422,122 @@ dict_table_is_discarded(
return(DICT_TF2_FLAG_IS_SET(table, DICT_TF2_DISCARDED));
}
-/********************************************************************//**
-Check if it is a temporary table.
-@return true if temporary table flag is set. */
+/** Check if the table is found is a file_per_table tablespace.
+This test does not use table flags2 since some REDUNDANT tables in the
+system tablespace may have garbage in the MIX_LEN field where flags2 is
+stored. These garbage MIX_LEN fields were written before v3.23.52.
+A patch was added to v3.23.52 which initializes the MIX_LEN field to 0.
+Since file-per-table tablespaces were added in 4.1, any SYS_TABLES
+record with a non-zero space ID will have a reliable MIX_LEN field.
+However, this test does not use flags2 from SYS_TABLES.MIX_LEN. Instead,
+assume that if the tablespace is not a predefined system tablespace,
+ then it must be file-per-table.
+Also, during ALTER TABLE, the DICT_TF2_USE_FILE_PER_TABLE flag may not be
+set on one of the file-per-table tablespaces.
+This test cannot be done on a table in the process of being created
+because the space_id will be zero until the tablespace is created.
+@param[in] table An existing open table to check
+@return true if this table was created as a file-per-table tablespace. */
UNIV_INLINE
bool
-dict_table_is_temporary(
-/*====================*/
+dict_table_is_file_per_table(
const dict_table_t* table) /*!< in: table to check */
{
- return(DICT_TF2_FLAG_IS_SET(table, DICT_TF2_TEMPORARY));
+ return !is_system_tablespace(table->space);
}
-/**********************************************************************//**
-Get index by first field of the index
-@return index which is having first field matches
-with the field present in field_index position of table */
+/** Acquire the table handle. */
+inline
+void
+dict_table_t::acquire()
+{
+ ut_ad(mutex_own(&dict_sys->mutex));
+ my_atomic_add32_explicit(&n_ref_count, 1, MY_MEMORY_ORDER_RELAXED);
+}
+
+/** Release the table handle.
+@return whether the last handle was released */
+inline
+bool
+dict_table_t::release()
+{
+ int32 n = my_atomic_add32_explicit(
+ &n_ref_count, -1, MY_MEMORY_ORDER_RELAXED);
+ ut_ad(n > 0);
+ return n == 1;
+}
+
+/** Encode the number of columns and number of virtual columns in a
+4 bytes value. We could do this because the number of columns in
+InnoDB is limited to 1017
+@param[in] n_col number of non-virtual column
+@param[in] n_v_col number of virtual column
+@return encoded value */
UNIV_INLINE
-dict_index_t*
-dict_table_get_index_on_first_col(
-/*==============================*/
- const dict_table_t* table, /*!< in: table */
- ulint col_index) /*!< in: position of column
- in table */
+ulint
+dict_table_encode_n_col(
+ ulint n_col,
+ ulint n_v_col)
{
- ut_ad(col_index < table->n_cols);
+ return(n_col + (n_v_col<<16));
+}
- dict_col_t* column = dict_table_get_nth_col(table, col_index);
+/** decode number of virtual and non-virtual columns in one 4 bytes value.
+@param[in] encoded encoded value
+@param[in,out] n_col number of non-virtual column
+@param[in,out] n_v_col number of virtual column */
+UNIV_INLINE
+void
+dict_table_decode_n_col(
+ ulint encoded,
+ ulint* n_col,
+ ulint* n_v_col)
+{
- for (dict_index_t* index = dict_table_get_first_index(table);
- index != NULL; index = dict_table_get_next_index(index)) {
+ ulint num = encoded & ~DICT_N_COLS_COMPACT;
+ *n_v_col = num >> 16;
+ *n_col = num & 0xFFFF;
+}
- if (index->fields[0].col == column) {
- return(index);
+/** Free the virtual column template
+@param[in,out] vc_templ virtual column template */
+void
+dict_free_vc_templ(
+ dict_vcol_templ_t* vc_templ)
+{
+ UT_DELETE_ARRAY(vc_templ->default_rec);
+ vc_templ->default_rec = NULL;
+
+ if (vc_templ->vtempl != NULL) {
+ ut_ad(vc_templ->n_v_col > 0);
+ for (ulint i = 0; i < vc_templ->n_col
+ + vc_templ->n_v_col; i++) {
+ if (vc_templ->vtempl[i] != NULL) {
+ ut_free(vc_templ->vtempl[i]);
+ }
}
+ ut_free(vc_templ->vtempl);
+ vc_templ->vtempl = NULL;
}
- ut_error;
- return(0);
}
-#endif /* !UNIV_HOTBACKUP */
+/** Check whether the table have virtual index.
+@param[in] table InnoDB table
+@return true if the table have virtual index, false otherwise. */
+UNIV_INLINE
+bool
+dict_table_have_virtual_index(
+ dict_table_t* table)
+{
+ for (ulint col_no = 0; col_no < dict_table_get_n_v_cols(table);
+ col_no++) {
+ const dict_v_col_t* col
+ = dict_table_get_nth_v_col(table, col_no);
+
+ if (col->m_col.ord_part) {
+ return(true);
+ }
+ }
+
+ return(false);
+}
diff --git a/storage/innobase/include/dict0load.h b/storage/innobase/include/dict0load.h
index 1d68fcc58f3..b288c0b337a 100644
--- a/storage/innobase/include/dict0load.h
+++ b/storage/innobase/include/dict0load.h
@@ -1,6 +1,7 @@
/*****************************************************************************
Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2017, 2020, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -27,13 +28,17 @@ Created 4/24/1996 Heikki Tuuri
#ifndef dict0load_h
#define dict0load_h
-#include "univ.i"
#include "dict0types.h"
#include "trx0types.h"
#include "ut0byte.h"
#include "mem0mem.h"
#include "btr0types.h"
+#include <deque>
+
+/** A stack of table names related through foreign key constraints */
+typedef std::deque<const char*, ut_allocator<const char*> > dict_names_t;
+
/** enum that defines all system table IDs. @see SYSTEM_TABLE_NAME[] */
enum dict_system_id_t {
SYS_TABLES = 0,
@@ -44,6 +49,7 @@ enum dict_system_id_t {
SYS_FOREIGN_COLS,
SYS_TABLESPACES,
SYS_DATAFILES,
+ SYS_VIRTUAL,
/* This must be last item. Defines the number of system tables. */
SYS_NUM_SYSTEM_TABLES
@@ -58,156 +64,54 @@ enum dict_table_info_t {
is in the cache, if so, return it */
};
-/** Check type for dict_check_tablespaces_and_store_max_id() */
-enum dict_check_t {
- /** No user tablespaces have been opened
- (no crash recovery, no transactions recovered). */
- DICT_CHECK_NONE_LOADED = 0,
- /** Some user tablespaces may have been opened
- (no crash recovery; recovered table locks for transactions). */
- DICT_CHECK_SOME_LOADED,
- /** All user tablespaces have been opened (crash recovery). */
- DICT_CHECK_ALL_LOADED
-};
+/** Check each tablespace found in the data dictionary.
+Look at each table defined in SYS_TABLES that has a space_id > 0.
+If the tablespace is not yet in the fil_system cache, look up the
+tablespace in SYS_DATAFILES to ensure the correct path.
-/********************************************************************//**
-In a crash recovery we already have all the tablespace objects created.
-This function compares the space id information in the InnoDB data dictionary
-to what we already read with fil_load_single_table_tablespaces().
+In a crash recovery we already have some tablespace objects created from
+processing the REDO log. Any other tablespace in SYS_TABLESPACES not
+previously used in recovery will be opened here. We will compare the
+space_id information in the data dictionary to what we find in the
+tablespace file. In addition, more validation will be done if recovery
+was needed and force_recovery is not set.
-In a normal startup, we create the tablespace objects for every table in
-InnoDB's data dictionary, if the corresponding .ibd file exists.
We also scan the biggest space id, and store it to fil_system. */
-UNIV_INTERN
-void
-dict_check_tablespaces_and_store_max_id(
-/*====================================*/
- dict_check_t dict_check); /*!< in: how to check */
+void dict_check_tablespaces_and_store_max_id();
+
/********************************************************************//**
Finds the first table name in the given database.
@return own: table name, NULL if does not exist; the caller must free
the memory in the string! */
-UNIV_INTERN
char*
dict_get_first_table_name_in_db(
/*============================*/
const char* name); /*!< in: database name which ends to '/' */
-/********************************************************************//**
-Loads a table definition from a SYS_TABLES record to dict_table_t.
-Does not load any columns or indexes.
-@return error message, or NULL on success */
-UNIV_INTERN
-const char*
-dict_load_table_low(
-/*================*/
- const char* name, /*!< in: table name */
- const rec_t* rec, /*!< in: SYS_TABLES record */
- dict_table_t** table); /*!< out,own: table, or NULL */
-/********************************************************************//**
-Loads a table column definition from a SYS_COLUMNS record to
-dict_table_t.
-@return error message, or NULL on success */
-UNIV_INTERN
-const char*
-dict_load_column_low(
-/*=================*/
- dict_table_t* table, /*!< in/out: table, could be NULL
- if we just populate a dict_column_t
- struct with information from
- a SYS_COLUMNS record */
- mem_heap_t* heap, /*!< in/out: memory heap
- for temporary storage */
- dict_col_t* column, /*!< out: dict_column_t to fill,
- or NULL if table != NULL */
- table_id_t* table_id, /*!< out: table id */
- const char** col_name, /*!< out: column name */
- const rec_t* rec); /*!< in: SYS_COLUMNS record */
-/********************************************************************//**
-Loads an index definition from a SYS_INDEXES record to dict_index_t.
-If allocate=TRUE, we will create a dict_index_t structure and fill it
-accordingly. If allocated=FALSE, the dict_index_t will be supplied by
-the caller and filled with information read from the record. @return
-error message, or NULL on success */
-UNIV_INTERN
-const char*
-dict_load_index_low(
-/*================*/
- byte* table_id, /*!< in/out: table id (8 bytes),
- an "in" value if allocate=TRUE
- and "out" when allocate=FALSE */
- const char* table_name, /*!< in: table name */
- mem_heap_t* heap, /*!< in/out: temporary memory heap */
- const rec_t* rec, /*!< in: SYS_INDEXES record */
- ibool allocate, /*!< in: TRUE=allocate *index,
- FALSE=fill in a pre-allocated
- *index */
- dict_index_t** index); /*!< out,own: index, or NULL */
-/********************************************************************//**
-Loads an index field definition from a SYS_FIELDS record to
-dict_index_t.
-@return error message, or NULL on success */
-UNIV_INTERN
-const char*
-dict_load_field_low(
-/*================*/
- byte* index_id, /*!< in/out: index id (8 bytes)
- an "in" value if index != NULL
- and "out" if index == NULL */
- dict_index_t* index, /*!< in/out: index, could be NULL
- if we just populate a dict_field_t
- struct with information from
- a SYS_FIELDS record */
- dict_field_t* sys_field, /*!< out: dict_field_t to be
- filled */
- ulint* pos, /*!< out: Field position */
- byte* last_index_id, /*!< in: last index id */
- mem_heap_t* heap, /*!< in/out: memory heap
- for temporary storage */
- const rec_t* rec); /*!< in: SYS_FIELDS record */
-/********************************************************************//**
-Using the table->heap, copy the null-terminated filepath into
-table->data_dir_path and put a null byte before the extension.
-This allows SHOW CREATE TABLE to return the correct DATA DIRECTORY path.
-Make this data directory path only if it has not yet been saved. */
-UNIV_INTERN
-void
-dict_save_data_dir_path(
-/*====================*/
- dict_table_t* table, /*!< in/out: table */
- char* filepath); /*!< in: filepath of tablespace */
-/*****************************************************************//**
-Make sure the data_file_name is saved in dict_table_t if needed. Try to
-read it from the file dictionary first, then from SYS_DATAFILES. */
-UNIV_INTERN
+/** Make sure the data_file_name is saved in dict_table_t if needed.
+Try to read it from the fil_system first, then from SYS_DATAFILES.
+@param[in] table Table object
+@param[in] dict_mutex_own true if dict_sys->mutex is owned already */
void
dict_get_and_save_data_dir_path(
-/*============================*/
- dict_table_t* table, /*!< in/out: table */
- bool dict_mutex_own); /*!< in: true if dict_sys->mutex
- is owned already */
-/********************************************************************//**
-Loads a table definition and also all its index definitions, and also
+ dict_table_t* table,
+ bool dict_mutex_own);
+
+/** Loads a table definition and also all its index definitions, and also
the cluster definition if the table is a member in a cluster. Also loads
all foreign key constraints where the foreign key is in the table or where
a foreign key references columns in this table.
+@param[in] name Table name in the dbname/tablename format
+@param[in] ignore_err Error to be ignored when loading
+ table and its index definition
@return table, NULL if does not exist; if the table is stored in an
-.ibd file, but the file does not exist, then we set the
-ibd_file_missing flag TRUE in the table object we return */
-UNIV_INTERN
-dict_table_t*
-dict_load_table(
-/*============*/
- const char* name, /*!< in: table name in the
- databasename/tablename format */
- ibool cached, /*!< in: TRUE=add to cache, FALSE=do not */
- dict_err_ignore_t ignore_err);
- /*!< in: error to be ignored when loading
- table and its indexes' definition */
+.ibd file, but the file does not exist, then we set the file_unreadable
+flag in the table object we return. */
+dict_table_t* dict_load_table(const char* name, dict_err_ignore_t ignore_err);
+
/***********************************************************************//**
Loads a table object based on the table id.
-@return table; NULL if table does not exist */
-UNIV_INTERN
+@return table; NULL if table does not exist */
dict_table_t*
dict_load_table_on_id(
/*==================*/
@@ -218,7 +122,6 @@ dict_load_table_on_id(
This function is called when the database is booted.
Loads system table index definitions except for the clustered index which
is added to the dictionary cache at booting before calling this function. */
-UNIV_INTERN
void
dict_load_sys_table(
/*================*/
@@ -226,11 +129,13 @@ dict_load_sys_table(
/***********************************************************************//**
Loads foreign key constraints where the table is either the foreign key
holder or where the table is referenced by a foreign key. Adds these
-constraints to the data dictionary. Note that we know that the dictionary
-cache already contains all constraints where the other relevant table is
-already in the dictionary cache.
-@return DB_SUCCESS or error code */
-UNIV_INTERN
+constraints to the data dictionary.
+
+The foreign key constraint is loaded only if the referenced table is also
+in the dictionary cache. If the referenced table is not in dictionary
+cache, then it is added to the output parameter (fk_tables).
+
+@return DB_SUCCESS or error code */
dberr_t
dict_load_foreigns(
/*===============*/
@@ -242,20 +147,16 @@ dict_load_foreigns(
chained by FK */
bool check_charsets, /*!< in: whether to check
charset compatibility */
- dict_err_ignore_t ignore_err) /*!< in: error to be ignored */
+ dict_err_ignore_t ignore_err, /*!< in: error to be ignored */
+ dict_names_t& fk_tables) /*!< out: stack of table names
+ which must be loaded
+ subsequently to load all the
+ foreign key constraints. */
MY_ATTRIBUTE((nonnull(1), warn_unused_result));
-/********************************************************************//**
-Prints to the standard output information on all tables found in the data
-dictionary system table. */
-UNIV_INTERN
-void
-dict_print(void);
-/*============*/
/********************************************************************//**
This function opens a system table, and return the first record.
-@return first record of the system table */
-UNIV_INTERN
+@return first record of the system table */
const rec_t*
dict_startscan_system(
/*==================*/
@@ -265,8 +166,7 @@ dict_startscan_system(
dict_system_id_t system_id); /*!< in: which system table to open */
/********************************************************************//**
This function get the next system table record as we scan the table.
-@return the record if found, NULL if end of scan. */
-UNIV_INTERN
+@return the record if found, NULL if end of scan. */
const rec_t*
dict_getnext_system(
/*================*/
@@ -275,10 +175,8 @@ dict_getnext_system(
mtr_t* mtr); /*!< in: the mini-transaction */
/********************************************************************//**
This function processes one SYS_TABLES record and populate the dict_table_t
-struct for the table. Extracted out of dict_print() to be used by
-both monitor table output and information schema innodb_sys_tables output.
+struct for the table.
@return error message, or NULL on success */
-UNIV_INTERN
const char*
dict_process_sys_tables_rec_and_mtr_commit(
/*=======================================*/
@@ -296,7 +194,6 @@ This function parses a SYS_INDEXES record and populate a dict_index_t
structure with the information from the record. For detail information
about SYS_INDEXES fields, please refer to dict_boot() function.
@return error message, or NULL on success */
-UNIV_INTERN
const char*
dict_process_sys_indexes_rec(
/*=========================*/
@@ -309,7 +206,6 @@ dict_process_sys_indexes_rec(
This function parses a SYS_COLUMNS record and populate a dict_column_t
structure with the information from the record.
@return error message, or NULL on success */
-UNIV_INTERN
const char*
dict_process_sys_columns_rec(
/*=========================*/
@@ -317,12 +213,29 @@ dict_process_sys_columns_rec(
const rec_t* rec, /*!< in: current SYS_COLUMNS rec */
dict_col_t* column, /*!< out: dict_col_t to be filled */
table_id_t* table_id, /*!< out: table id */
- const char** col_name); /*!< out: column name */
+ const char** col_name, /*!< out: column name */
+ ulint* nth_v_col); /*!< out: if virtual col, this is
+ records its sequence number */
+
+/** This function parses a SYS_VIRTUAL record and extract virtual column
+information
+@param[in,out] heap heap memory
+@param[in] rec current SYS_COLUMNS rec
+@param[in,out] table_id table id
+@param[in,out] pos virtual column position
+@param[in,out] base_pos base column position
+@return error message, or NULL on success */
+const char*
+dict_process_sys_virtual_rec(
+ mem_heap_t* heap,
+ const rec_t* rec,
+ table_id_t* table_id,
+ ulint* pos,
+ ulint* base_pos);
/********************************************************************//**
This function parses a SYS_FIELDS record and populate a dict_field_t
structure with the information from the record.
@return error message, or NULL on success */
-UNIV_INTERN
const char*
dict_process_sys_fields_rec(
/*========================*/
@@ -338,7 +251,6 @@ This function parses a SYS_FOREIGN record and populate a dict_foreign_t
structure with the information from the record. For detail information
about SYS_FOREIGN fields, please refer to dict_load_foreign() function
@return error message, or NULL on success */
-UNIV_INTERN
const char*
dict_process_sys_foreign_rec(
/*=========================*/
@@ -350,7 +262,6 @@ dict_process_sys_foreign_rec(
This function parses a SYS_FOREIGN_COLS record and extract necessary
information from the record and return to caller.
@return error message, or NULL on success */
-UNIV_INTERN
const char*
dict_process_sys_foreign_col_rec(
/*=============================*/
@@ -365,7 +276,6 @@ dict_process_sys_foreign_col_rec(
This function parses a SYS_TABLESPACES record, extracts necessary
information from the record and returns to caller.
@return error message, or NULL on success */
-UNIV_INTERN
const char*
dict_process_sys_tablespaces(
/*=========================*/
@@ -378,7 +288,6 @@ dict_process_sys_tablespaces(
This function parses a SYS_DATAFILES record, extracts necessary
information from the record and returns to caller.
@return error message, or NULL on success */
-UNIV_INTERN
const char*
dict_process_sys_datafiles(
/*=======================*/
@@ -386,43 +295,28 @@ dict_process_sys_datafiles(
const rec_t* rec, /*!< in: current SYS_DATAFILES rec */
ulint* space, /*!< out: pace id */
const char** path); /*!< out: datafile path */
-/********************************************************************//**
-Get the filepath for a spaceid from SYS_DATAFILES. This function provides
-a temporary heap which is used for the table lookup, but not for the path.
-The caller must free the memory for the path returned. This function can
-return NULL if the space ID is not found in SYS_DATAFILES, then the caller
-will assume that the ibd file is in the normal datadir.
-@return own: A copy of the first datafile found in SYS_DATAFILES.PATH for
-the given space ID. NULL if space ID is zero or not found. */
-UNIV_INTERN
-char*
-dict_get_first_path(
-/*================*/
- ulint space, /*!< in: space id */
- const char* name); /*!< in: tablespace name */
-/********************************************************************//**
-Update the record for space_id in SYS_TABLESPACES to this filepath.
-@return DB_SUCCESS if OK, dberr_t if the insert failed */
-UNIV_INTERN
+
+/** Update the record for space_id in SYS_TABLESPACES to this filepath.
+@param[in] space_id Tablespace ID
+@param[in] filepath Tablespace filepath
+@return DB_SUCCESS if OK, dberr_t if the insert failed */
dberr_t
dict_update_filepath(
-/*=================*/
- ulint space_id, /*!< in: space id */
- const char* filepath); /*!< in: filepath */
-/********************************************************************//**
-Insert records into SYS_TABLESPACES and SYS_DATAFILES.
-@return DB_SUCCESS if OK, dberr_t if the insert failed */
-UNIV_INTERN
-dberr_t
-dict_insert_tablespace_and_filepath(
-/*================================*/
- ulint space, /*!< in: space id */
- const char* name, /*!< in: talespace name */
- const char* filepath, /*!< in: filepath */
- ulint fsp_flags); /*!< in: tablespace flags */
+ ulint space_id,
+ const char* filepath);
-#ifndef UNIV_NONINL
-#include "dict0load.ic"
-#endif
+/** Replace records in SYS_TABLESPACES and SYS_DATAFILES associated with
+the given space_id using an independent transaction.
+@param[in] space_id Tablespace ID
+@param[in] name Tablespace name
+@param[in] filepath First filepath
+@param[in] fsp_flags Tablespace flags
+@return DB_SUCCESS if OK, dberr_t if the insert failed */
+dberr_t
+dict_replace_tablespace_and_filepath(
+ ulint space_id,
+ const char* name,
+ const char* filepath,
+ ulint fsp_flags);
#endif
diff --git a/storage/innobase/include/dict0load.ic b/storage/innobase/include/dict0load.ic
deleted file mode 100644
index a0b0feb56dd..00000000000
--- a/storage/innobase/include/dict0load.ic
+++ /dev/null
@@ -1,26 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1996, 2009, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/dict0load.ic
-Loads to the memory cache database object definitions
-from dictionary tables
-
-Created 4/24/1996 Heikki Tuuri
-*******************************************************/
-
diff --git a/storage/innobase/include/dict0mem.h b/storage/innobase/include/dict0mem.h
index 505d4925324..ed4bf073061 100644
--- a/storage/innobase/include/dict0mem.h
+++ b/storage/innobase/include/dict0mem.h
@@ -28,25 +28,22 @@ Created 1/8/1996 Heikki Tuuri
#ifndef dict0mem_h
#define dict0mem_h
-#include "univ.i"
-#include "dict0types.h"
#include "data0type.h"
#include "mem0mem.h"
#include "row0types.h"
#include "rem0types.h"
#include "btr0types.h"
-#ifndef UNIV_HOTBACKUP
-# include "lock0types.h"
-# include "que0types.h"
-# include "sync0rw.h"
-#endif /* !UNIV_HOTBACKUP */
+#include "lock0types.h"
+#include "que0types.h"
+#include "sync0rw.h"
#include "ut0mem.h"
-#include "ut0lst.h"
#include "ut0rnd.h"
#include "ut0byte.h"
#include "hash0hash.h"
#include "trx0types.h"
#include "fts0fts.h"
+#include "buf0buf.h"
+#include "gis0type.h"
#include "os0once.h"
#include "fil0fil.h"
#include <my_crypt.h>
@@ -62,17 +59,20 @@ struct ib_rbt_t;
/** Type flags of an index: OR'ing of the flags is allowed to define a
combination of types */
/* @{ */
-#define DICT_CLUSTERED 1 /*!< clustered index */
+#define DICT_CLUSTERED 1 /*!< clustered index; for other than
+ auto-generated clustered indexes,
+ also DICT_UNIQUE will be set */
#define DICT_UNIQUE 2 /*!< unique index */
-#define DICT_UNIVERSAL 4 /*!< index which can contain records from any
- other index */
#define DICT_IBUF 8 /*!< insert buffer tree */
#define DICT_CORRUPT 16 /*!< bit to store the corrupted flag
in SYS_INDEXES.TYPE */
#define DICT_FTS 32 /* FTS index; can't be combined with the
other flags */
+#define DICT_SPATIAL 64 /* SPATIAL index; can't be combined with the
+ other flags */
+#define DICT_VIRTUAL 128 /* Index on Virtual column */
-#define DICT_IT_BITS 6 /*!< number of bits used for
+#define DICT_IT_BITS 8 /*!< number of bits used for
SYS_INDEXES.TYPE */
/* @} */
@@ -115,14 +115,17 @@ the Compact page format is used, i.e ROW_FORMAT != REDUNDANT */
/** Width of the COMPACT flag */
#define DICT_TF_WIDTH_COMPACT 1
+
/** Width of the ZIP_SSIZE flag */
#define DICT_TF_WIDTH_ZIP_SSIZE 4
+
/** Width of the ATOMIC_BLOBS flag. The Antelope file formats broke up
BLOB and TEXT fields, storing the first 768 bytes in the clustered index.
-Brracuda row formats store the whole blob or text field off-page atomically.
+Barracuda row formats store the whole blob or text field off-page atomically.
Secondary indexes are created from this external data using row_ext_t
to cache the BLOB prefixes. */
#define DICT_TF_WIDTH_ATOMIC_BLOBS 1
+
/** If a table is created with the MYSQL option DATA DIRECTORY and
innodb-file-per-table, an older engine will not be able to find that table.
This flag prevents older engines from attempting to open the table and
@@ -136,30 +139,18 @@ Width of the page compression flag
#define DICT_TF_WIDTH_PAGE_COMPRESSION_LEVEL 4
/**
-Width of the page encryption flag
-*/
-#define DICT_TF_WIDTH_PAGE_ENCRYPTION 1
-#define DICT_TF_WIDTH_PAGE_ENCRYPTION_KEY 8
-
-/**
Width of atomic writes flag
DEFAULT=0, ON = 1, OFF = 2
*/
#define DICT_TF_WIDTH_ATOMIC_WRITES 2
/** Width of all the currently known table flags */
-#define DICT_TF_BITS (DICT_TF_WIDTH_COMPACT \
- + DICT_TF_WIDTH_ZIP_SSIZE \
- + DICT_TF_WIDTH_ATOMIC_BLOBS \
- + DICT_TF_WIDTH_DATA_DIR \
- + DICT_TF_WIDTH_PAGE_COMPRESSION \
- + DICT_TF_WIDTH_PAGE_COMPRESSION_LEVEL \
- + DICT_TF_WIDTH_ATOMIC_WRITES \
- + DICT_TF_WIDTH_PAGE_ENCRYPTION \
- + DICT_TF_WIDTH_PAGE_ENCRYPTION_KEY)
-
-/** A mask of all the known/used bits in table flags */
-#define DICT_TF_BIT_MASK (~(~0U << DICT_TF_BITS))
+#define DICT_TF_BITS (DICT_TF_WIDTH_COMPACT \
+ + DICT_TF_WIDTH_ZIP_SSIZE \
+ + DICT_TF_WIDTH_ATOMIC_BLOBS \
+ + DICT_TF_WIDTH_DATA_DIR \
+ + DICT_TF_WIDTH_PAGE_COMPRESSION \
+ + DICT_TF_WIDTH_PAGE_COMPRESSION_LEVEL)
/** Zero relative shift position of the COMPACT field */
#define DICT_TF_POS_COMPACT 0
@@ -173,22 +164,16 @@ DEFAULT=0, ON = 1, OFF = 2
#define DICT_TF_POS_DATA_DIR (DICT_TF_POS_ATOMIC_BLOBS \
+ DICT_TF_WIDTH_ATOMIC_BLOBS)
/** Zero relative shift position of the PAGE_COMPRESSION field */
-#define DICT_TF_POS_PAGE_COMPRESSION (DICT_TF_POS_DATA_DIR \
- + DICT_TF_WIDTH_DATA_DIR)
+#define DICT_TF_POS_PAGE_COMPRESSION (DICT_TF_POS_DATA_DIR \
+ + DICT_TF_WIDTH_DATA_DIR)
/** Zero relative shift position of the PAGE_COMPRESSION_LEVEL field */
#define DICT_TF_POS_PAGE_COMPRESSION_LEVEL (DICT_TF_POS_PAGE_COMPRESSION \
+ DICT_TF_WIDTH_PAGE_COMPRESSION)
/** Zero relative shift position of the ATOMIC_WRITES field */
#define DICT_TF_POS_ATOMIC_WRITES (DICT_TF_POS_PAGE_COMPRESSION_LEVEL \
+ DICT_TF_WIDTH_PAGE_COMPRESSION_LEVEL)
-/** Zero relative shift position of the PAGE_ENCRYPTION field */
-#define DICT_TF_POS_PAGE_ENCRYPTION (DICT_TF_POS_ATOMIC_WRITES \
- + DICT_TF_WIDTH_ATOMIC_WRITES)
-/** Zero relative shift position of the PAGE_ENCRYPTION_KEY field */
-#define DICT_TF_POS_PAGE_ENCRYPTION_KEY (DICT_TF_POS_PAGE_ENCRYPTION \
- + DICT_TF_WIDTH_PAGE_ENCRYPTION)
-#define DICT_TF_POS_UNUSED (DICT_TF_POS_PAGE_ENCRYPTION_KEY \
- + DICT_TF_WIDTH_PAGE_ENCRYPTION_KEY)
+#define DICT_TF_POS_UNUSED (DICT_TF_POS_ATOMIC_WRITES \
+ + DICT_TF_WIDTH_ATOMIC_WRITES)
/** Bit mask of the COMPACT field */
#define DICT_TF_MASK_COMPACT \
@@ -218,14 +203,6 @@ DEFAULT=0, ON = 1, OFF = 2
#define DICT_TF_MASK_ATOMIC_WRITES \
((~(~0U << DICT_TF_WIDTH_ATOMIC_WRITES)) \
<< DICT_TF_POS_ATOMIC_WRITES)
-/** Bit mask of the PAGE_ENCRYPTION field */
-#define DICT_TF_MASK_PAGE_ENCRYPTION \
- ((~(~0U << DICT_TF_WIDTH_PAGE_ENCRYPTION)) \
- << DICT_TF_POS_PAGE_ENCRYPTION)
-/** Bit mask of the PAGE_ENCRYPTION_KEY field */
-#define DICT_TF_MASK_PAGE_ENCRYPTION_KEY \
- ((~(~0U << DICT_TF_WIDTH_PAGE_ENCRYPTION_KEY)) \
- << DICT_TF_POS_PAGE_ENCRYPTION_KEY)
/** Return the value of the COMPACT field */
#define DICT_TF_GET_COMPACT(flags) \
@@ -239,7 +216,7 @@ DEFAULT=0, ON = 1, OFF = 2
#define DICT_TF_HAS_ATOMIC_BLOBS(flags) \
((flags & DICT_TF_MASK_ATOMIC_BLOBS) \
>> DICT_TF_POS_ATOMIC_BLOBS)
-/** Return the value of the ATOMIC_BLOBS field */
+/** Return the value of the DATA_DIR field */
#define DICT_TF_HAS_DATA_DIR(flags) \
((flags & DICT_TF_MASK_DATA_DIR) \
>> DICT_TF_POS_DATA_DIR)
@@ -255,18 +232,7 @@ DEFAULT=0, ON = 1, OFF = 2
#define DICT_TF_GET_ATOMIC_WRITES(flags) \
((flags & DICT_TF_MASK_ATOMIC_WRITES) \
>> DICT_TF_POS_ATOMIC_WRITES)
-/** Return the contents of the PAGE_ENCRYPTION field */
-#define DICT_TF_GET_PAGE_ENCRYPTION(flags) \
- ((flags & DICT_TF_MASK_PAGE_ENCRYPTION) \
- >> DICT_TF_POS_PAGE_ENCRYPTION)
-/** Return the contents of the PAGE_ENCRYPTION KEY field */
-#define DICT_TF_GET_PAGE_ENCRYPTION_KEY(flags) \
- ((flags & DICT_TF_MASK_PAGE_ENCRYPTION_KEY) \
- >> DICT_TF_POS_PAGE_ENCRYPTION_KEY)
-
-/** Return the contents of the UNUSED bits */
-#define DICT_TF_GET_UNUSED(flags) \
- (flags >> DICT_TF_POS_UNUSED)
+
/* @} */
/** @brief Table Flags set number 2.
@@ -279,36 +245,42 @@ for unknown bits in order to protect backward incompatibility. */
/* @{ */
/** Total number of bits in table->flags2. */
#define DICT_TF2_BITS 7
-#define DICT_TF2_BIT_MASK ~(~0U << DICT_TF2_BITS)
+#define DICT_TF2_UNUSED_BIT_MASK (~0U << DICT_TF2_BITS)
+#define DICT_TF2_BIT_MASK ~DICT_TF2_UNUSED_BIT_MASK
/** TEMPORARY; TRUE for tables from CREATE TEMPORARY TABLE. */
-#define DICT_TF2_TEMPORARY 1
+#define DICT_TF2_TEMPORARY 1U
+
/** The table has an internal defined DOC ID column */
-#define DICT_TF2_FTS_HAS_DOC_ID 2
+#define DICT_TF2_FTS_HAS_DOC_ID 2U
+
/** The table has an FTS index */
-#define DICT_TF2_FTS 4
+#define DICT_TF2_FTS 4U
+
/** Need to add Doc ID column for FTS index build.
This is a transient bit for index build */
-#define DICT_TF2_FTS_ADD_DOC_ID 8
+#define DICT_TF2_FTS_ADD_DOC_ID 8U
+
/** This bit is used during table creation to indicate that it will
use its own tablespace instead of the system tablespace. */
-#define DICT_TF2_USE_TABLESPACE 16
+#define DICT_TF2_USE_FILE_PER_TABLE 16U
/** Set when we discard/detach the tablespace */
-#define DICT_TF2_DISCARDED 32
+#define DICT_TF2_DISCARDED 32U
/** This bit is set if all aux table names (both common tables and
index tables) of a FTS table are in HEX format. */
-#define DICT_TF2_FTS_AUX_HEX_NAME 64
+#define DICT_TF2_FTS_AUX_HEX_NAME 64U
+
/* @} */
-#define DICT_TF2_FLAG_SET(table, flag) \
+#define DICT_TF2_FLAG_SET(table, flag) \
(table->flags2 |= (flag))
-#define DICT_TF2_FLAG_IS_SET(table, flag) \
+#define DICT_TF2_FLAG_IS_SET(table, flag) \
(table->flags2 & (flag))
-#define DICT_TF2_FLAG_UNSET(table, flag) \
+#define DICT_TF2_FLAG_UNSET(table, flag) \
(table->flags2 &= ~(flag))
/** Tables could be chained together with Foreign key constraint. When
@@ -325,19 +297,21 @@ result in recursive cascading calls. This defines the maximum number of
such cascading deletes/updates allowed. When exceeded, the delete from
parent table will fail, and user has to drop excessive foreign constraint
before proceeds. */
-#define FK_MAX_CASCADE_DEL 255
+#define FK_MAX_CASCADE_DEL 15
/**********************************************************************//**
Creates a table memory object.
-@return own: table object */
-UNIV_INTERN
+@return own: table object */
dict_table_t*
dict_mem_table_create(
/*==================*/
const char* name, /*!< in: table name */
ulint space, /*!< in: space where the clustered index
of the table is placed */
- ulint n_cols, /*!< in: number of columns */
+ ulint n_cols, /*!< in: total number of columns
+ including virtual and non-virtual
+ columns */
+ ulint n_v_cols, /*!< in: number of virtual columns */
ulint flags, /*!< in: table flags */
ulint flags2); /*!< in: table flags2 */
/**********************************************************************//**
@@ -350,14 +324,12 @@ dict_mem_table_is_system(
char *name); /*!< in: table name */
/****************************************************************//**
Free a table memory object. */
-UNIV_INTERN
void
dict_mem_table_free(
/*================*/
dict_table_t* table); /*!< in: table */
/**********************************************************************//**
Adds a column definition to a table. */
-UNIV_INTERN
void
dict_mem_table_add_col(
/*===================*/
@@ -368,21 +340,53 @@ dict_mem_table_add_col(
ulint prtype, /*!< in: precise type */
ulint len) /*!< in: precision */
MY_ATTRIBUTE((nonnull(1)));
+/** Adds a virtual column definition to a table.
+@param[in,out] table table
+@param[in] heap temporary memory heap, or NULL. It is
+ used to store name when we have not finished
+ adding all columns. When all columns are
+ added, the whole name will copy to memory from
+ table->heap
+@param[in] name column name
+@param[in] mtype main datatype
+@param[in] prtype precise type
+@param[in] len length
+@param[in] pos position in a table
+@param[in] num_base number of base columns
+@return the virtual column definition */
+dict_v_col_t*
+dict_mem_table_add_v_col(
+ dict_table_t* table,
+ mem_heap_t* heap,
+ const char* name,
+ ulint mtype,
+ ulint prtype,
+ ulint len,
+ ulint pos,
+ ulint num_base);
+
+/** Adds a stored column definition to a table.
+@param[in] table table
+@param[in] num_base number of base columns. */
+void
+dict_mem_table_add_s_col(
+ dict_table_t* table,
+ ulint num_base);
+
/**********************************************************************//**
Renames a column of a table in the data dictionary cache. */
-UNIV_INTERN
void
dict_mem_table_col_rename(
/*======================*/
dict_table_t* table, /*!< in/out: table */
- unsigned nth_col,/*!< in: column index */
+ ulint nth_col,/*!< in: column index */
const char* from, /*!< in: old column name */
- const char* to) /*!< in: new column name */
- MY_ATTRIBUTE((nonnull));
+ const char* to, /*!< in: new column name */
+ bool is_virtual);
+ /*!< in: if this is a virtual column */
/**********************************************************************//**
This function populates a dict_col_t memory structure with
supplied information. */
-UNIV_INTERN
void
dict_mem_fill_column_struct(
/*========================*/
@@ -411,8 +415,7 @@ dict_mem_fill_index_struct(
ulint n_fields); /*!< in: number of fields */
/**********************************************************************//**
Creates an index memory object.
-@return own: index object */
-UNIV_INTERN
+@return own: index object */
dict_index_t*
dict_mem_index_create(
/*==================*/
@@ -428,7 +431,6 @@ dict_mem_index_create(
Adds a field definition to an index. NOTE: does not take a copy
of the column name if the field is a column. The memory occupied
by the column name may be released only after publishing the index. */
-UNIV_INTERN
void
dict_mem_index_add_field(
/*=====================*/
@@ -439,15 +441,13 @@ dict_mem_index_add_field(
INDEX (textcol(25)) */
/**********************************************************************//**
Frees an index memory object. */
-UNIV_INTERN
void
dict_mem_index_free(
/*================*/
dict_index_t* index); /*!< in: index */
/**********************************************************************//**
Creates and initializes a foreign constraint memory object.
-@return own: foreign constraint struct */
-UNIV_INTERN
+@return own: foreign constraint struct */
dict_foreign_t*
dict_mem_foreign_create(void);
/*=========================*/
@@ -457,7 +457,6 @@ Sets the foreign_table_name_lookup pointer based on the value of
lower_case_table_names. If that is 0 or 1, foreign_table_name_lookup
will point to foreign_table_name. If 2, then another string is
allocated from the heap and set to lower case. */
-UNIV_INTERN
void
dict_mem_foreign_table_name_lookup_set(
/*===================================*/
@@ -469,26 +468,38 @@ Sets the referenced_table_name_lookup pointer based on the value of
lower_case_table_names. If that is 0 or 1, referenced_table_name_lookup
will point to referenced_table_name. If 2, then another string is
allocated from the heap and set to lower case. */
-UNIV_INTERN
void
dict_mem_referenced_table_name_lookup_set(
/*======================================*/
dict_foreign_t* foreign, /*!< in/out: foreign struct */
ibool do_alloc); /*!< in: is an alloc needed */
-/** Create a temporary tablename like "#sql-ibtid-inc where
- tid = the Table ID
- inc = a randomly initialized number that is incremented for each file
-The table ID is a 64 bit integer, can use up to 20 digits, and is
-initialized at bootstrap. The second number is 32 bits, can use up to 10
-digits, and is initialized at startup to a randomly distributed number.
-It is hoped that the combination of these two numbers will provide a
-reasonably unique temporary file name.
+/** Fills the dependent virtual columns in a set.
+Reason for being dependent are
+1) FK can be present on base column of virtual columns
+2) FK can be present on column which is a part of virtual index
+@param[in,out] foreign foreign key information. */
+void
+dict_mem_foreign_fill_vcol_set(
+ dict_foreign_t* foreign);
+
+/** Fill virtual columns set in each fk constraint present in the table.
+@param[in,out] table innodb table object. */
+void
+dict_mem_table_fill_foreign_vcol_set(
+ dict_table_t* table);
+
+/** Free the vcol_set from all foreign key constraint on the table.
+@param[in,out] table innodb table object. */
+void
+dict_mem_table_free_foreign_vcol_set(
+ dict_table_t* table);
+
+/** Create a temporary tablename like "#sql-ibNNN".
@param[in] heap A memory heap
@param[in] dbtab Table name in the form database/table name
@param[in] id Table id
@return A unique temporary tablename suitable for InnoDB use */
-UNIV_INTERN
char*
dict_mem_create_temporary_tablename(
mem_heap_t* heap,
@@ -496,10 +507,90 @@ dict_mem_create_temporary_tablename(
table_id_t id);
/** Initialize dict memory variables */
-
void
dict_mem_init(void);
+/** SQL identifier name wrapper for pretty-printing */
+class id_name_t
+{
+public:
+ /** Default constructor */
+ id_name_t()
+ : m_name()
+ {}
+ /** Constructor
+ @param[in] name identifier to assign */
+ explicit id_name_t(
+ const char* name)
+ : m_name(name)
+ {}
+
+ /** Assignment operator
+ @param[in] name identifier to assign */
+ id_name_t& operator=(
+ const char* name)
+ {
+ m_name = name;
+ return(*this);
+ }
+
+ /** Implicit type conversion
+ @return the name */
+ operator const char*() const
+ {
+ return(m_name);
+ }
+
+ /** Explicit type conversion
+ @return the name */
+ const char* operator()() const
+ {
+ return(m_name);
+ }
+
+private:
+ /** The name in internal representation */
+ const char* m_name;
+};
+
+/** Table name wrapper for pretty-printing */
+struct table_name_t
+{
+ /** The name in internal representation */
+ char* m_name;
+
+ /** Default constructor */
+ table_name_t() {}
+ /** Constructor */
+ table_name_t(char* name) : m_name(name) {}
+
+ /** @return the end of the schema name */
+ const char* dbend() const
+ {
+ const char* sep = strchr(m_name, '/');
+ ut_ad(sep);
+ return sep;
+ }
+
+ /** @return the length of the schema name, in bytes */
+ size_t dblen() const { return dbend() - m_name; }
+
+ /** Determine the filename-safe encoded table name.
+ @return the filename-safe encoded table name */
+ const char* basename() const { return dbend() + 1; }
+
+ /** The start of the table basename suffix for partitioned tables */
+ static const char part_suffix[4];
+
+ /** Determine the partition or subpartition name suffix.
+ @return the partition name
+ @retval NULL if the table is not partitioned */
+ const char* part() const { return strstr(basename(), part_suffix); }
+
+ /** @return whether this is a temporary or intermediate table name */
+ inline bool is_temporary() const;
+};
+
/** Data structure for a column in a table */
struct dict_col_t{
/*----------------------*/
@@ -542,8 +633,76 @@ struct dict_col_t{
unsigned max_prefix:12; /*!< maximum index prefix length on
this column. Our current max limit is
3072 for Barracuda table */
+
+ /** @return whether this is a virtual column */
+ bool is_virtual() const { return prtype & DATA_VIRTUAL; }
+
+ /** Detach the column from an index.
+ @param[in] index index to be detached from */
+ inline void detach(const dict_index_t& index);
+};
+
+/** Index information put in a list of virtual column structure. Index
+id and virtual column position in the index will be logged.
+There can be multiple entries for a given index, with a different position. */
+struct dict_v_idx_t {
+ /** active index on the column */
+ dict_index_t* index;
+
+ /** position in this index */
+ ulint nth_field;
+};
+
+/** Index list to put in dict_v_col_t */
+typedef std::list<dict_v_idx_t, ut_allocator<dict_v_idx_t> > dict_v_idx_list;
+
+/** Data structure for a virtual column in a table */
+struct dict_v_col_t{
+ /** column structure */
+ dict_col_t m_col;
+
+ /** array of base column ptr */
+ dict_col_t** base_col;
+
+ /** number of base column */
+ ulint num_base;
+
+ /** column pos in table */
+ ulint v_pos;
+
+ /** Virtual index list, and column position in the index,
+ the allocated memory is not from table->heap */
+ dict_v_idx_list* v_indexes;
+
+};
+
+/** Data structure for newly added virtual column in a table */
+struct dict_add_v_col_t{
+ /** number of new virtual column */
+ ulint n_v_col;
+
+ /** column structures */
+ const dict_v_col_t* v_col;
+
+ /** new col names */
+ const char** v_col_name;
+};
+
+/** Data structure for a stored column in a table. */
+struct dict_s_col_t {
+ /** Stored column ptr */
+ dict_col_t* m_col;
+ /** array of base col ptr */
+ dict_col_t** base_col;
+ /** number of base columns */
+ ulint num_base;
+ /** column pos in table */
+ ulint s_pos;
};
+/** list to put stored column for create_table_info_t */
+typedef std::list<dict_s_col_t, ut_allocator<dict_s_col_t> > dict_s_col_list;
+
/** @brief DICT_ANTELOPE_MAX_INDEX_COL_LEN is measured in bytes and
is the maximum indexed column length (or indexed prefix length) in
ROW_FORMAT=REDUNDANT and ROW_FORMAT=COMPACT. Also, in any format,
@@ -574,6 +733,7 @@ be REC_VERSION_56_MAX_INDEX_COL_LEN (3072) bytes */
/** Defines the maximum fixed length column size */
#define DICT_MAX_FIXED_COL_LEN DICT_ANTELOPE_MAX_INDEX_COL_LEN
+
#ifdef WITH_WSREP
#define WSREP_MAX_SUPPORTED_KEY_LENGTH 3500
#endif /* WITH_WSREP */
@@ -581,7 +741,7 @@ be REC_VERSION_56_MAX_INDEX_COL_LEN (3072) bytes */
/** Data structure for a field in an index */
struct dict_field_t{
dict_col_t* col; /*!< pointer to the table column */
- const char* name; /*!< name of the column */
+ id_name_t name; /*!< name of the column */
unsigned prefix_len:12; /*!< 0 or the length of the column
prefix in bytes in a MySQL index of
type, e.g., INDEX (textcol(25));
@@ -636,12 +796,11 @@ extern ulong zip_failure_threshold_pct;
compression failures */
extern ulong zip_pad_max;
-/** Data structure to hold information about how much space in
+/** Data structure to hold information about about how much space in
an uncompressed page should be left as padding to avoid compression
failures. This estimate is based on a self-adapting heuristic. */
struct zip_pad_info_t {
- os_fast_mutex_t*
- mutex; /*!< mutex protecting the info */
+ SysMutex* mutex; /*!< mutex protecting the info */
ulint pad; /*!< number of bytes used as pad */
ulint success;/*!< successful compression ops during
current round */
@@ -658,22 +817,29 @@ struct zip_pad_info_t {
a certain index.*/
#define STAT_DEFRAG_DATA_SIZE_N_SAMPLE 10
+/** "GEN_CLUST_INDEX" is the name reserved for InnoDB default
+system clustered index when there is no primary key. */
+const char innobase_index_reserve_name[] = "GEN_CLUST_INDEX";
+
/** Data structure for an index. Most fields will be
initialized to 0, NULL or FALSE in dict_mem_index_create(). */
struct dict_index_t{
index_id_t id; /*!< id of the index */
mem_heap_t* heap; /*!< memory heap */
- const char* name; /*!< index name */
+ id_name_t name; /*!< index name */
const char* table_name;/*!< table name */
dict_table_t* table; /*!< back pointer to table */
-#ifndef UNIV_HOTBACKUP
unsigned space:32;
/*!< space where the index tree is placed */
unsigned page:32;/*!< index tree root page number */
-#endif /* !UNIV_HOTBACKUP */
+ unsigned merge_threshold:6;
+ /*!< In the pessimistic delete, if the page
+ data size drops below this limit in percent,
+ merging it to a neighbor is tried */
+# define DICT_INDEX_MERGE_THRESHOLD_DEFAULT 50
unsigned type:DICT_IT_BITS;
/*!< index type (DICT_CLUSTERED, DICT_UNIQUE,
- DICT_UNIVERSAL, DICT_IBUF, DICT_CORRUPT) */
+ DICT_IBUF, DICT_CORRUPT) */
#define MAX_KEY_LENGTH_BITS 12
unsigned trx_id_offset:MAX_KEY_LENGTH_BITS;
/*!< position of the trx id column
@@ -687,6 +853,17 @@ struct dict_index_t{
/*!< number of columns the user defined to
be in the index: in the internal
representation we add more columns */
+ unsigned nulls_equal:1;
+ /*!< if true, SQL NULL == SQL NULL */
+#ifdef BTR_CUR_HASH_ADAPT
+#ifdef MYSQL_INDEX_DISABLE_AHI
+ unsigned disable_ahi:1;
+ /*!< whether to disable the
+ adaptive hash index.
+ Maybe this could be disabled for
+ temporary tables? */
+#endif
+#endif /* BTR_CUR_HASH_ADAPT */
unsigned n_uniq:10;/*!< number of fields from the beginning
which are enough to determine an index
entry uniquely */
@@ -705,14 +882,33 @@ struct dict_index_t{
by dict_operation_lock and
dict_sys->mutex. Other changes are
protected by index->lock. */
+ unsigned uncommitted:1;
+ /*!< a flag that is set for secondary indexes
+ that have not been committed to the
+ data dictionary yet */
+
+#ifdef UNIV_DEBUG
+ /** whether this is a dummy index object */
+ bool is_dummy;
+ uint32_t magic_n;/*!< magic number */
+/** Value of dict_index_t::magic_n */
+# define DICT_INDEX_MAGIC_N 76789786
+#endif
dict_field_t* fields; /*!< array of field descriptions */
+ st_mysql_ftparser*
+ parser; /*!< fulltext parser plugin */
+ bool has_new_v_col;
+ /*!< whether it has a newly added virtual
+ column in ALTER */
bool index_fts_syncing;/*!< Whether the fts index is
- still syncing in the background */
-#ifndef UNIV_HOTBACKUP
+ still syncing in the background;
+ FIXME: remove this and use MDL */
UT_LIST_NODE_T(dict_index_t)
indexes;/*!< list of indexes of the table */
+#ifdef BTR_CUR_ADAPT
btr_search_t* search_info;
/*!< info used in optimistic searches */
+#endif /* BTR_CUR_ADAPT */
row_log_t* online_log;
/*!< the log of modifications
during online index creation;
@@ -768,31 +964,148 @@ struct dict_index_t{
/* in which slot the next sample should be
saved. */
/* @} */
- rw_lock_t lock; /*!< read-write lock protecting the
- upper levels of the index tree */
+ rtr_ssn_t rtr_ssn;/*!< Node sequence number for RTree */
+ rtr_info_track_t*
+ rtr_track;/*!< tracking all R-Tree search cursors */
trx_id_t trx_id; /*!< id of the transaction that created this
index, or 0 if the index existed
when InnoDB was started up */
zip_pad_info_t zip_pad;/*!< Information about state of
compression failures and successes */
-#endif /* !UNIV_HOTBACKUP */
-#ifdef UNIV_BLOB_DEBUG
- ib_mutex_t blobs_mutex;
- /*!< mutex protecting blobs */
- ib_rbt_t* blobs; /*!< map of (page_no,heap_no,field_no)
- to first_blob_page_no; protected by
- blobs_mutex; @see btr_blob_dbg_t */
-#endif /* UNIV_BLOB_DEBUG */
+ rw_lock_t lock; /*!< read-write lock protecting the
+ upper levels of the index tree */
- bool is_readable() const;
+ /** Determine if the index has been committed to the
+ data dictionary.
+ @return whether the index definition has been committed */
+ bool is_committed() const
+ {
+ ut_ad(!uncommitted || !(type & DICT_CLUSTERED));
+ return(UNIV_LIKELY(!uncommitted));
+ }
-#ifdef UNIV_DEBUG
- ulint magic_n;/*!< magic number */
-/** Value of dict_index_t::magic_n */
-# define DICT_INDEX_MAGIC_N 76789786
-#endif
+ /** Flag an index committed or uncommitted.
+ @param[in] committed whether the index is committed */
+ void set_committed(bool committed)
+ {
+ ut_ad(!to_be_dropped);
+ ut_ad(committed || !(type & DICT_CLUSTERED));
+ uncommitted = !committed;
+ }
+
+ /** @return whether this index is readable
+ @retval true normally
+ @retval false if this is a single-table tablespace
+ and the .ibd file is missing, or a
+ page cannot be read or decrypted */
+ inline bool is_readable() const;
+
+ /** @return whether the index is the primary key index
+ (not the clustered index of the change buffer) */
+ bool is_primary() const
+ {
+ return DICT_CLUSTERED == (type & (DICT_CLUSTERED | DICT_IBUF));
+ }
+
+ /** @return whether the index includes virtual columns */
+ bool has_virtual() const { return type & DICT_VIRTUAL; }
+
+ /** @return whether the index is corrupted */
+ inline bool is_corrupted() const;
+
+ /** Detach the columns from the index that is to be freed. */
+ void detach_columns()
+ {
+ if (has_virtual()) {
+ for (unsigned i = 0; i < n_fields; i++) {
+ fields[i].col->detach(*this);
+ }
+
+ n_fields = 0;
+ }
+ }
+
+ /** This ad-hoc class is used by record_size_info only. */
+ class record_size_info_t {
+ public:
+ record_size_info_t()
+ : max_leaf_size(0), shortest_size(0), too_big(false),
+ first_overrun_field_index(SIZE_T_MAX), overrun_size(0)
+ {
+ }
+
+ /** Mark row potentially too big for page and set up first
+ overflow field index. */
+ void set_too_big(size_t field_index)
+ {
+ ut_ad(field_index != SIZE_T_MAX);
+
+ too_big = true;
+ if (first_overrun_field_index > field_index) {
+ first_overrun_field_index = field_index;
+ overrun_size = shortest_size;
+ }
+ }
+
+ /** @return overrun field index or SIZE_T_MAX if nothing
+ overflowed*/
+ size_t get_first_overrun_field_index() const
+ {
+ ut_ad(row_is_too_big());
+ ut_ad(first_overrun_field_index != SIZE_T_MAX);
+ return first_overrun_field_index;
+ }
+
+ size_t get_overrun_size() const
+ {
+ ut_ad(row_is_too_big());
+ return overrun_size;
+ }
+
+ bool row_is_too_big() const { return too_big; }
+
+ size_t max_leaf_size; /** Bigger row size this index can
+ produce */
+ size_t shortest_size; /** shortest because it counts everything
+ as in overflow pages */
+
+ private:
+ bool too_big; /** This one is true when maximum row size this
+ index can produce is bigger than maximum row
+ size given page can hold. */
+ size_t first_overrun_field_index; /** After adding this field
+ index row overflowed maximum
+ allowed size. Useful for
+ reporting back to user. */
+ size_t overrun_size; /** Just overrun row size */
+ };
+
+ /** Returns max possibly record size for that index, size of a shortest
+ everything in overflow) size of the longest possible row and index
+ of a field which made index records too big to fit on a page.*/
+ inline record_size_info_t record_size_info() const;
};
+/** Detach a column from an index.
+@param[in] index index to be detached from */
+inline void dict_col_t::detach(const dict_index_t& index)
+{
+ if (!is_virtual()) {
+ return;
+ }
+
+ if (dict_v_idx_list* v_indexes = reinterpret_cast<const dict_v_col_t*>
+ (this)->v_indexes) {
+ for (dict_v_idx_list::iterator i = v_indexes->begin();
+ i != v_indexes->end(); i++) {
+ if (i->index == &index) {
+ v_indexes->erase(i);
+ return;
+ }
+ }
+ }
+}
+
/** The status of online index creation */
enum online_index_status {
/** the index is complete and ready for access */
@@ -813,6 +1126,11 @@ enum online_index_status {
ONLINE_INDEX_ABORTED_DROPPED
};
+/** Set to store the virtual columns which are affected by Foreign
+key constraint. */
+typedef std::set<dict_v_col_t*, std::less<dict_v_col_t*>,
+ ut_allocator<dict_v_col_t*> > dict_vcol_set;
+
/** Data structure for a foreign key constraint; an example:
FOREIGN KEY (A, B) REFERENCES TABLE2 (C, D). Most fields will be
initialized to 0, NULL or FALSE in dict_mem_foreign_create(). */
@@ -848,6 +1166,9 @@ struct dict_foreign_t{
does not generate new indexes
implicitly */
dict_index_t* referenced_index;/*!< referenced index */
+
+ dict_vcol_set* v_cols; /*!< set of virtual columns affected
+ by foreign key constraint. */
};
std::ostream&
@@ -896,6 +1217,24 @@ struct dict_foreign_with_index {
const dict_index_t* m_index;
};
+#ifdef WITH_WSREP
+/** A function object to find a foreign key with the given index as the
+foreign index. Return the foreign key with matching criteria or NULL */
+struct dict_foreign_with_foreign_index {
+
+ dict_foreign_with_foreign_index(const dict_index_t* index)
+ : m_index(index)
+ {}
+
+ bool operator()(const dict_foreign_t* foreign) const
+ {
+ return(foreign->foreign_index == m_index);
+ }
+
+ const dict_index_t* m_index;
+};
+#endif
+
/* A function object to check if the foreign constraint is between different
tables. Returns true if foreign key constraint is between different tables,
false otherwise. */
@@ -933,7 +1272,10 @@ struct dict_foreign_matches_id {
const char* m_id;
};
-typedef std::set<dict_foreign_t*, dict_foreign_compare> dict_foreign_set;
+typedef std::set<
+ dict_foreign_t*,
+ dict_foreign_compare,
+ ut_allocator<dict_foreign_t*> > dict_foreign_set;
std::ostream&
operator<< (std::ostream& out, const dict_foreign_set& fk_set);
@@ -977,6 +1319,10 @@ dict_foreign_free(
/*==============*/
dict_foreign_t* foreign) /*!< in, own: foreign key struct */
{
+ if (foreign->v_cols != NULL) {
+ UT_DELETE(foreign->v_cols);
+ }
+
mem_heap_free(foreign->heap);
}
@@ -1003,18 +1349,74 @@ struct dict_foreign_set_free {
/** The flags for ON_UPDATE and ON_DELETE can be ORed; the default is that
a foreign key constraint is enforced, therefore RESTRICT just means no flag */
/* @{ */
-#define DICT_FOREIGN_ON_DELETE_CASCADE 1 /*!< ON DELETE CASCADE */
-#define DICT_FOREIGN_ON_DELETE_SET_NULL 2 /*!< ON UPDATE SET NULL */
-#define DICT_FOREIGN_ON_UPDATE_CASCADE 4 /*!< ON DELETE CASCADE */
-#define DICT_FOREIGN_ON_UPDATE_SET_NULL 8 /*!< ON UPDATE SET NULL */
-#define DICT_FOREIGN_ON_DELETE_NO_ACTION 16 /*!< ON DELETE NO ACTION */
-#define DICT_FOREIGN_ON_UPDATE_NO_ACTION 32 /*!< ON UPDATE NO ACTION */
+#define DICT_FOREIGN_ON_DELETE_CASCADE 1U /*!< ON DELETE CASCADE */
+#define DICT_FOREIGN_ON_DELETE_SET_NULL 2U /*!< ON UPDATE SET NULL */
+#define DICT_FOREIGN_ON_UPDATE_CASCADE 4U /*!< ON DELETE CASCADE */
+#define DICT_FOREIGN_ON_UPDATE_SET_NULL 8U /*!< ON UPDATE SET NULL */
+#define DICT_FOREIGN_ON_DELETE_NO_ACTION 16U /*!< ON DELETE NO ACTION */
+#define DICT_FOREIGN_ON_UPDATE_NO_ACTION 32U /*!< ON UPDATE NO ACTION */
/* @} */
-/* This flag is for sync SQL DDL and memcached DML.
-if table->memcached_sync_count == DICT_TABLE_IN_DDL means there's DDL running on
-the table, DML from memcached will be blocked. */
-#define DICT_TABLE_IN_DDL -1
+/** Display an identifier.
+@param[in,out] s output stream
+@param[in] id_name SQL identifier (other than table name)
+@return the output stream */
+std::ostream&
+operator<<(
+ std::ostream& s,
+ const id_name_t& id_name);
+
+/** Display a table name.
+@param[in,out] s output stream
+@param[in] table_name table name
+@return the output stream */
+std::ostream&
+operator<<(
+ std::ostream& s,
+ const table_name_t& table_name);
+
+/** List of locks that different transactions have acquired on a table. This
+list has a list node that is embedded in a nested union/structure. We have to
+generate a specific template for it. */
+
+typedef ut_list_base<lock_t, ut_list_node<lock_t> lock_table_t::*>
+ table_lock_list_t;
+
+/** mysql template structure defined in row0mysql.cc */
+struct mysql_row_templ_t;
+
+/** Structure defines template related to virtual columns and
+their base columns */
+struct dict_vcol_templ_t {
+ /** number of regular columns */
+ ulint n_col;
+
+ /** number of virtual columns */
+ ulint n_v_col;
+
+ /** array of templates for virtual col and their base columns */
+ mysql_row_templ_t** vtempl;
+
+ /** table's database name */
+ std::string db_name;
+
+ /** table name */
+ std::string tb_name;
+
+ /** MySQL record length */
+ ulint rec_len;
+
+ /** default column value if any */
+ byte* default_rec;
+
+ /** cached MySQL TABLE object */
+ TABLE* mysql_table;
+
+ /** when mysql_table was cached */
+ uint64_t mysql_table_query_id;
+
+ dict_vcol_templ_t() : vtempl(0), mysql_table_query_id(~0ULL) {}
+};
/** These are used when MySQL FRM and InnoDB data dictionary are
in inconsistent state. */
@@ -1030,61 +1432,165 @@ typedef enum {
/** Data structure for a database table. Most fields will be
initialized to 0, NULL or FALSE in dict_mem_table_create(). */
-struct dict_table_t{
+struct dict_table_t {
+ /** Get reference count.
+ @return current value of n_ref_count */
+ inline int32 get_ref_count()
+ {
+ return my_atomic_load32_explicit(&n_ref_count,
+ MY_MEMORY_ORDER_RELAXED);
+ }
+
+ /** Acquire the table handle. */
+ inline void acquire();
+
+ /** Release the table handle.
+ @return whether the last handle was released */
+ inline bool release();
+
+ /** @return whether this is a temporary table */
+ bool is_temporary() const
+ {
+ return flags2 & DICT_TF2_TEMPORARY;
+ }
+
+ /** @return whether this table is readable
+ @retval true normally
+ @retval false if this is a single-table tablespace
+ and the .ibd file is missing, or a
+ page cannot be read or decrypted */
+ bool is_readable() const
+ {
+ return(UNIV_LIKELY(!file_unreadable));
+ }
+
+ /** Check if a table name contains the string "/#sql"
+ which denotes temporary or intermediate tables in MariaDB. */
+ static bool is_temporary_name(const char* name)
+ {
+ return strstr(name, "/" TEMP_FILE_PREFIX) != NULL;
+ }
+
+ /** For overflow fields returns potential max length stored inline */
+ size_t get_overflow_field_local_len() const;
+
+ /** Id of the table. */
+ table_id_t id;
+ /** Hash chain node. */
+ hash_node_t id_hash;
+ /** Table name. */
+ table_name_t name;
+ /** Hash chain node. */
+ hash_node_t name_hash;
+
+ /** Memory heap */
+ mem_heap_t* heap;
+
+ /** NULL or the directory path specified by DATA DIRECTORY. */
+ char* data_dir_path;
+
+ /** Space where the clustered index of the table is placed. */
+ uint32_t space;
+
+ /** Stores information about:
+ 1 row format (redundant or compact),
+ 2 compressed page size (zip shift size),
+ 3 whether using atomic blobs,
+ 4 whether the table has been created with the option DATA DIRECTORY.
+ Use DICT_TF_GET_COMPACT(), DICT_TF_GET_ZIP_SSIZE(),
+ DICT_TF_HAS_ATOMIC_BLOBS() and DICT_TF_HAS_DATA_DIR() to parse this
+ flag. */
+ unsigned flags:DICT_TF_BITS;
+
+ /** Stores information about:
+ 1 whether the table has been created using CREATE TEMPORARY TABLE,
+ 2 whether the table has an internally defined DOC ID column,
+ 3 whether the table has a FTS index,
+ 4 whether DOC ID column need to be added to the FTS index,
+ 5 whether the table is being created its own tablespace,
+ 6 whether the table has been DISCARDed,
+ 7 whether the aux FTS tables names are in hex.
+ Use DICT_TF2_FLAG_IS_SET() to parse this flag. */
+ unsigned flags2:DICT_TF2_BITS;
+
+ /** TRUE if the table is an intermediate table during copy alter
+ operation or a partition/subpartition which is required for copying
+ data and skip the undo log for insertion of row in the table.
+ This variable will be set and unset during extra(), or during the
+ process of altering partitions */
+ unsigned skip_alter_undo:1;
+
+ /*!< whether this is in a single-table tablespace and the .ibd
+ file is missing or page decryption failed and page is corrupted */
+ unsigned file_unreadable:1;
+
+ /** TRUE if the table object has been added to the dictionary cache. */
+ unsigned cached:1;
+
+ /** TRUE if the table is to be dropped, but not yet actually dropped
+ (could in the background drop list). It is turned on at the beginning
+ of row_drop_table_for_mysql() and turned off just before we start to
+ update system tables for the drop. It is protected by
+ dict_operation_lock. */
+ unsigned to_be_dropped:1;
+
+ /** Number of non-virtual columns defined so far. */
+ unsigned n_def:10;
+
+ /** Number of non-virtual columns. */
+ unsigned n_cols:10;
+
+ /** Number of total columns (inlcude virtual and non-virtual) */
+ unsigned n_t_cols:10;
+
+ /** Number of total columns defined so far. */
+ unsigned n_t_def:10;
+
+ /** Number of virtual columns defined so far. */
+ unsigned n_v_def:10;
+
+ /** Number of virtual columns. */
+ unsigned n_v_cols:10;
+
+ /** 1 + the position of autoinc counter field in clustered
+ index, or 0 if there is no persistent AUTO_INCREMENT column in
+ the table. */
+ unsigned persistent_autoinc:10;
+
+ /** TRUE if it's not an InnoDB system table or a table that has no FK
+ relationships. */
+ unsigned can_be_evicted:1;
+
+ /** TRUE if table is corrupted. */
+ unsigned corrupted:1;
+
+ /** TRUE if some indexes should be dropped after ONLINE_INDEX_ABORTED
+ or ONLINE_INDEX_ABORTED_DROPPED. */
+ unsigned drop_aborted:1;
+
+ /** Array of column descriptions. */
+ dict_col_t* cols;
+
+ /** Array of virtual column descriptions. */
+ dict_v_col_t* v_cols;
+
+ /** List of stored column descriptions. It is used only for foreign key
+ check during create table and copy alter operations.
+ During copy alter, s_cols list is filled during create table operation
+ and need to preserve till rename table operation. That is the
+ reason s_cols is a part of dict_table_t */
+ dict_s_col_list* s_cols;
+
+ /** Column names packed in a character string
+ "name1\0name2\0...nameN\0". Until the string contains n_cols, it will
+ be allocated from a temporary heap. The final string will be allocated
+ from table->heap. */
+ const char* col_names;
+
+ /** Virtual column names */
+ const char* v_col_names;
- table_id_t id; /*!< id of the table */
- hash_node_t id_hash; /*!< hash chain node */
- mem_heap_t* heap; /*!< memory heap */
- char* name; /*!< table name */
- hash_node_t name_hash; /*!< hash chain node */
- const char* dir_path_of_temp_table;/*!< NULL or the directory path
- where a TEMPORARY table that was explicitly
- created by a user should be placed if
- innodb_file_per_table is defined in my.cnf;
- in Unix this is usually /tmp/..., in Windows
- temp\... */
- char* data_dir_path; /*!< NULL or the directory path
- specified by DATA DIRECTORY */
- unsigned space:32;
- /*!< space where the clustered index of the
- table is placed */
- unsigned flags:DICT_TF_BITS; /*!< DICT_TF_... */
- unsigned flags2:DICT_TF2_BITS; /*!< DICT_TF2_... */
- unsigned file_unreadable:1;
- /*!< true if this is in a single-table
- tablespace and the .ibd file is missing or
- page decryption failed and page is corrupted; then
- we must return in ha_innodb.cc an error if the
- user tries to query such an orphaned table */
- unsigned cached:1;/*!< TRUE if the table object has been added
- to the dictionary cache */
- unsigned to_be_dropped:1;
- /*!< TRUE if the table is to be dropped, but
- not yet actually dropped (could in the bk
- drop list); It is turned on at the beginning
- of row_drop_table_for_mysql() and turned off
- just before we start to update system tables
- for the drop. It is protected by
- dict_operation_lock */
- unsigned n_def:10;/*!< number of columns defined so far */
- unsigned n_cols:10;/*!< number of columns */
- unsigned can_be_evicted:1;
- /*!< TRUE if it's not an InnoDB system table
- or a table that has no FK relationships */
- unsigned corrupted:1;
- /*!< TRUE if table is corrupted */
- unsigned drop_aborted:1;
- /*!< TRUE if some indexes should be dropped
- after ONLINE_INDEX_ABORTED
- or ONLINE_INDEX_ABORTED_DROPPED */
- dict_col_t* cols; /*!< array of column descriptions */
- const char* col_names;
- /*!< Column names packed in a character string
- "name1\0name2\0...nameN\0". Until
- the string contains n_cols, it will be
- allocated from a temporary heap. The final
- string will be allocated from table->heap. */
bool is_system_db;
/*!< True if the table belongs to a system
database (mysql, information_schema or
@@ -1093,294 +1599,275 @@ struct dict_table_t{
/*!< !DICT_FRM_CONSISTENT==0 if data
dictionary information and
MySQL FRM information mismatch. */
-#ifndef UNIV_HOTBACKUP
- UT_LIST_BASE_NODE_T(dict_index_t)
- indexes; /*!< list of indexes of the table */
-
- dict_foreign_set foreign_set;
- /*!< set of foreign key constraints
- in the table; these refer to columns
- in other tables */
-
- dict_foreign_set referenced_set;
- /*!< list of foreign key constraints
- which refer to this table */
-
- UT_LIST_NODE_T(dict_table_t)
- table_LRU; /*!< node of the LRU list of tables */
- unsigned fk_max_recusive_level:8;
- /*!< maximum recursive level we support when
- loading tables chained together with FK
- constraints. If exceeds this level, we will
- stop loading child table into memory along with
- its parent table */
- ulint n_foreign_key_checks_running;
- /*!< count of how many foreign key check
- operations are currently being performed
- on the table: we cannot drop the table while
- there are foreign key checks running on
- it! */
- trx_id_t def_trx_id;
- /*!< transaction id that last touched
- the table definition, either when
- loading the definition or CREATE
- TABLE, or ALTER TABLE (prepare,
- commit, and rollback phases) */
- trx_id_t query_cache_inv_trx_id;
- /*!< transactions whose trx id is
- smaller than this number are not
- allowed to store to the MySQL query
- cache or retrieve from it; when a trx
- with undo logs commits, it sets this
- to the value of the trx id counter for
- the tables it had an IX lock on */
-#ifdef UNIV_DEBUG
- /*----------------------*/
- ibool does_not_fit_in_memory;
- /*!< this field is used to specify in
- simulations tables which are so big
- that disk should be accessed: disk
- access is simulated by putting the
- thread to sleep for a while; NOTE that
- this flag is not stored to the data
- dictionary on disk, and the database
- will forget about value TRUE if it has
- to reload the table definition from
- disk */
-#endif /* UNIV_DEBUG */
- /*----------------------*/
- unsigned big_rows:1;
- /*!< flag: TRUE if the maximum length of
- a single row exceeds BIG_ROW_SIZE;
- initialized in dict_table_add_to_cache() */
- /** Statistics for query optimization */
- /* @{ */
-
- volatile os_once::state_t stats_latch_created;
- /*!< Creation state of 'stats_latch'. */
-
- rw_lock_t* stats_latch; /*!< this latch protects:
- dict_table_t::stat_initialized
- dict_table_t::stat_n_rows (*)
- dict_table_t::stat_clustered_index_size
- dict_table_t::stat_sum_of_other_index_sizes
- dict_table_t::stat_modified_counter (*)
- dict_table_t::indexes*::stat_n_diff_key_vals[]
- dict_table_t::indexes*::stat_index_size
- dict_table_t::indexes*::stat_n_leaf_pages
- (*) those are not always protected for
- performance reasons */
- unsigned stat_initialized:1; /*!< TRUE if statistics have
- been calculated the first time
- after database startup or table creation */
-#define DICT_TABLE_IN_USED -1
- lint memcached_sync_count;
- /*!< count of how many handles are opened
- to this table from memcached; DDL on the
- table is NOT allowed until this count
- goes to zero. If it's -1, means there's DDL
- on the table, DML from memcached will be
- blocked. */
- time_t stats_last_recalc;
- /*!< Timestamp of last recalc of the stats */
- ib_uint32_t stat_persistent;
- /*!< The two bits below are set in the
- ::stat_persistent member and have the following
- meaning:
- 1. _ON=0, _OFF=0, no explicit persistent stats
- setting for this table, the value of the global
- srv_stats_persistent is used to determine
- whether the table has persistent stats enabled
- or not
- 2. _ON=0, _OFF=1, persistent stats are
- explicitly disabled for this table, regardless
- of the value of the global srv_stats_persistent
- 3. _ON=1, _OFF=0, persistent stats are
- explicitly enabled for this table, regardless
- of the value of the global srv_stats_persistent
- 4. _ON=1, _OFF=1, not allowed, we assert if
- this ever happens. */
-#define DICT_STATS_PERSISTENT_ON (1 << 1)
-#define DICT_STATS_PERSISTENT_OFF (1 << 2)
- ib_uint32_t stats_auto_recalc;
- /*!< The two bits below are set in the
- ::stats_auto_recalc member and have
- the following meaning:
- 1. _ON=0, _OFF=0, no explicit auto recalc
- setting for this table, the value of the global
- srv_stats_persistent_auto_recalc is used to
- determine whether the table has auto recalc
- enabled or not
- 2. _ON=0, _OFF=1, auto recalc is explicitly
- disabled for this table, regardless of the
- value of the global
- srv_stats_persistent_auto_recalc
- 3. _ON=1, _OFF=0, auto recalc is explicitly
- enabled for this table, regardless of the
- value of the global
- srv_stats_persistent_auto_recalc
- 4. _ON=1, _OFF=1, not allowed, we assert if
- this ever happens. */
-#define DICT_STATS_AUTO_RECALC_ON (1 << 1)
-#define DICT_STATS_AUTO_RECALC_OFF (1 << 2)
- ulint stats_sample_pages;
- /*!< the number of pages to sample for this
- table during persistent stats estimation;
- if this is 0, then the value of the global
- srv_stats_persistent_sample_pages will be
- used instead. */
- ib_uint64_t stat_n_rows;
- /*!< approximate number of rows in the table;
- we periodically calculate new estimates */
- ulint stat_clustered_index_size;
- /*!< approximate clustered index size in
- database pages */
- ulint stat_sum_of_other_index_sizes;
- /*!< other indexes in database pages */
- ib_uint64_t stat_modified_counter;
- /*!< when a row is inserted, updated,
- or deleted,
- we add 1 to this number; we calculate new
- estimates for the stat_... values for the
- table and the indexes when about 1 / 16 of
- table has been modified;
- also when the estimate operation is
- called for MySQL SHOW TABLE STATUS; the
- counter is reset to zero at statistics
- calculation; this counter is not protected by
- any latch, because this is only used for
- heuristics */
-
-#define BG_STAT_IN_PROGRESS ((byte)(1 << 0))
- /*!< BG_STAT_IN_PROGRESS is set in
- stats_bg_flag when the background
- stats code is working on this table. The DROP
- TABLE code waits for this to be cleared
- before proceeding. */
-#define BG_STAT_SHOULD_QUIT ((byte)(1 << 1))
- /*!< BG_STAT_SHOULD_QUIT is set in
- stats_bg_flag when DROP TABLE starts
- waiting on BG_STAT_IN_PROGRESS to be cleared,
- the background stats thread will detect this
- and will eventually quit sooner */
-#define BG_SCRUB_IN_PROGRESS ((byte)(1 << 2))
+ /** The FTS_DOC_ID_INDEX, or NULL if no fulltext indexes exist */
+ dict_index_t* fts_doc_id_index;
+
+ /** List of indexes of the table. */
+ UT_LIST_BASE_NODE_T(dict_index_t) indexes;
+
+ /** List of foreign key constraints in the table. These refer to
+ columns in other tables. */
+ UT_LIST_BASE_NODE_T(dict_foreign_t) foreign_list;
+
+ /** List of foreign key constraints which refer to this table. */
+ UT_LIST_BASE_NODE_T(dict_foreign_t) referenced_list;
+
+ /** Node of the LRU list of tables. */
+ UT_LIST_NODE_T(dict_table_t) table_LRU;
+
+ /** Maximum recursive level we support when loading tables chained
+ together with FK constraints. If exceeds this level, we will stop
+ loading child table into memory along with its parent table. */
+ unsigned fk_max_recusive_level:8;
+
+ /** Count of how many foreign key check operations are currently being
+ performed on the table. We cannot drop the table while there are
+ foreign key checks running on it. */
+ ulint n_foreign_key_checks_running;
+
+ /** Transactions whose view low limit is greater than this number are
+ not allowed to store to the MySQL query cache or retrieve from it.
+ When a trx with undo logs commits, it sets this to the value of the
+ transaction id. */
+ trx_id_t query_cache_inv_trx_id;
+
+ /** Transaction id that last touched the table definition. Either when
+ loading the definition or CREATE TABLE, or ALTER TABLE (prepare,
+ commit, and rollback phases). */
+ trx_id_t def_trx_id;
+
+ /*!< set of foreign key constraints in the table; these refer to
+ columns in other tables */
+ dict_foreign_set foreign_set;
+
+ /*!< set of foreign key constraints which refer to this table */
+ dict_foreign_set referenced_set;
+
+ /** Statistics for query optimization. @{ */
+
+ /** Creation state of 'stats_latch'. */
+ volatile os_once::state_t stats_latch_created;
+
+ /** This latch protects:
+ dict_table_t::stat_initialized,
+ dict_table_t::stat_n_rows (*),
+ dict_table_t::stat_clustered_index_size,
+ dict_table_t::stat_sum_of_other_index_sizes,
+ dict_table_t::stat_modified_counter (*),
+ dict_table_t::indexes*::stat_n_diff_key_vals[],
+ dict_table_t::indexes*::stat_index_size,
+ dict_table_t::indexes*::stat_n_leaf_pages.
+ (*) Those are not always protected for
+ performance reasons. */
+ rw_lock_t* stats_latch;
+
+ /** TRUE if statistics have been calculated the first time after
+ database startup or table creation. */
+ unsigned stat_initialized:1;
+
+ /** Timestamp of last recalc of the stats. */
+ time_t stats_last_recalc;
+
+ /** The two bits below are set in the 'stat_persistent' member. They
+ have the following meaning:
+ 1. _ON=0, _OFF=0, no explicit persistent stats setting for this table,
+ the value of the global srv_stats_persistent is used to determine
+ whether the table has persistent stats enabled or not
+ 2. _ON=0, _OFF=1, persistent stats are explicitly disabled for this
+ table, regardless of the value of the global srv_stats_persistent
+ 3. _ON=1, _OFF=0, persistent stats are explicitly enabled for this
+ table, regardless of the value of the global srv_stats_persistent
+ 4. _ON=1, _OFF=1, not allowed, we assert if this ever happens. */
+ #define DICT_STATS_PERSISTENT_ON (1 << 1)
+ #define DICT_STATS_PERSISTENT_OFF (1 << 2)
+
+ /** Indicates whether the table uses persistent stats or not. See
+ DICT_STATS_PERSISTENT_ON and DICT_STATS_PERSISTENT_OFF. */
+ ib_uint32_t stat_persistent;
+
+ /** The two bits below are set in the 'stats_auto_recalc' member. They
+ have the following meaning:
+ 1. _ON=0, _OFF=0, no explicit auto recalc setting for this table, the
+ value of the global srv_stats_persistent_auto_recalc is used to
+ determine whether the table has auto recalc enabled or not
+ 2. _ON=0, _OFF=1, auto recalc is explicitly disabled for this table,
+ regardless of the value of the global srv_stats_persistent_auto_recalc
+ 3. _ON=1, _OFF=0, auto recalc is explicitly enabled for this table,
+ regardless of the value of the global srv_stats_persistent_auto_recalc
+ 4. _ON=1, _OFF=1, not allowed, we assert if this ever happens. */
+ #define DICT_STATS_AUTO_RECALC_ON (1 << 1)
+ #define DICT_STATS_AUTO_RECALC_OFF (1 << 2)
+
+ /** Indicates whether the table uses automatic recalc for persistent
+ stats or not. See DICT_STATS_AUTO_RECALC_ON and
+ DICT_STATS_AUTO_RECALC_OFF. */
+ ib_uint32_t stats_auto_recalc;
+
+ /** The number of pages to sample for this table during persistent
+ stats estimation. If this is 0, then the value of the global
+ srv_stats_persistent_sample_pages will be used instead. */
+ ulint stats_sample_pages;
+
+ /** Approximate number of rows in the table. We periodically calculate
+ new estimates. */
+ ib_uint64_t stat_n_rows;
+
+ /** Approximate clustered index size in database pages. */
+ ulint stat_clustered_index_size;
+
+ /** Approximate size of other indexes in database pages. */
+ ulint stat_sum_of_other_index_sizes;
+
+ /** How many rows are modified since last stats recalc. When a row is
+ inserted, updated, or deleted, we add 1 to this number; we calculate
+ new estimates for the table and the indexes if the table has changed
+ too much, see dict_stats_update_if_needed(). The counter is reset
+ to zero at statistics calculation. This counter is not protected by
+ any latch, because this is only used for heuristics. */
+ ib_uint64_t stat_modified_counter;
+
+ /** Background stats thread is not working on this table. */
+ #define BG_STAT_NONE 0
+
+ /** Set in 'stats_bg_flag' when the background stats code is working
+ on this table. The DROP TABLE code waits for this to be cleared before
+ proceeding. */
+ #define BG_STAT_IN_PROGRESS (1 << 0)
+
+ /** Set in 'stats_bg_flag' when DROP TABLE starts waiting on
+ BG_STAT_IN_PROGRESS to be cleared. The background stats thread will
+ detect this and will eventually quit sooner. */
+ #define BG_STAT_SHOULD_QUIT (1 << 1)
+
+ /** The state of the background stats thread wrt this table.
+ See BG_STAT_NONE, BG_STAT_IN_PROGRESS and BG_STAT_SHOULD_QUIT.
+ Writes are covered by dict_sys->mutex. Dirty reads are possible. */
+
+ #define BG_SCRUB_IN_PROGRESS ((byte)(1 << 2))
/*!< BG_SCRUB_IN_PROGRESS is set in
stats_bg_flag when the background
scrub code is working on this table. The DROP
TABLE code waits for this to be cleared
before proceeding. */
-#define BG_IN_PROGRESS (BG_STAT_IN_PROGRESS | BG_SCRUB_IN_PROGRESS)
+ #define BG_STAT_SHOULD_QUIT (1 << 1)
+
+ #define BG_IN_PROGRESS (BG_STAT_IN_PROGRESS | BG_SCRUB_IN_PROGRESS)
+
+
+ /** The state of the background stats thread wrt this table.
+ See BG_STAT_NONE, BG_STAT_IN_PROGRESS and BG_STAT_SHOULD_QUIT.
+ Writes are covered by dict_sys->mutex. Dirty reads are possible. */
+ byte stats_bg_flag;
- byte stats_bg_flag;
- /*!< see BG_STAT_* above.
- Writes are covered by dict_sys->mutex.
- Dirty reads are possible. */
bool stats_error_printed;
/*!< Has persistent stats error beein
already printed for this table ? */
- /* @} */
- /*----------------------*/
- /**!< The following fields are used by the
- AUTOINC code. The actual collection of
- tables locked during AUTOINC read/write is
- kept in trx_t. In order to quickly determine
- whether a transaction has locked the AUTOINC
- lock we keep a pointer to the transaction
- here in the autoinc_trx variable. This is to
- avoid acquiring the lock_sys_t::mutex and
- scanning the vector in trx_t.
-
- When an AUTOINC lock has to wait, the
- corresponding lock instance is created on
- the trx lock heap rather than use the
- pre-allocated instance in autoinc_lock below.*/
- /* @{ */
- lock_t* autoinc_lock;
- /*!< a buffer for an AUTOINC lock
- for this table: we allocate the memory here
- so that individual transactions can get it
- and release it without a need to allocate
- space from the lock heap of the trx:
- otherwise the lock heap would grow rapidly
- if we do a large insert from a select */
- ib_mutex_t* autoinc_mutex;
- /*!< mutex protecting the autoincrement
- counter */
+ /* @} */
+
+ /** AUTOINC related members. @{ */
+
+ /* The actual collection of tables locked during AUTOINC read/write is
+ kept in trx_t. In order to quickly determine whether a transaction has
+ locked the AUTOINC lock we keep a pointer to the transaction here in
+ the 'autoinc_trx' member. This is to avoid acquiring the
+ lock_sys_t::mutex and scanning the vector in trx_t.
+ When an AUTOINC lock has to wait, the corresponding lock instance is
+ created on the trx lock heap rather than use the pre-allocated instance
+ in autoinc_lock below. */
+
+ /** A buffer for an AUTOINC lock for this table. We allocate the
+ memory here so that individual transactions can get it and release it
+ without a need to allocate space from the lock heap of the trx:
+ otherwise the lock heap would grow rapidly if we do a large insert
+ from a select. */
+ lock_t* autoinc_lock;
/** Creation state of autoinc_mutex member */
- volatile os_once::state_t
- autoinc_mutex_created;
-
- ib_uint64_t autoinc;/*!< autoinc counter value to give to the
- next inserted row */
- ulong n_waiting_or_granted_auto_inc_locks;
- /*!< This counter is used to track the number
- of granted and pending autoinc locks on this
- table. This value is set after acquiring the
- lock_sys_t::mutex but we peek the contents to
- determine whether other transactions have
- acquired the AUTOINC lock or not. Of course
- only one transaction can be granted the
- lock but there can be multiple waiters. */
- const trx_t* autoinc_trx;
- /*!< The transaction that currently holds the
- the AUTOINC lock on this table.
- Protected by lock_sys->mutex. */
- fts_t* fts; /* FTS specific state variables */
- /* @} */
- /*----------------------*/
+ volatile os_once::state_t autoinc_mutex_created;
- ib_quiesce_t quiesce;/*!< Quiescing states, protected by the
- dict_index_t::lock. ie. we can only change
- the state if we acquire all the latches
- (dict_index_t::lock) in X mode of this table's
- indexes. */
+ /** Mutex protecting the autoincrement counter. */
+ ib_mutex_t* autoinc_mutex;
- /*----------------------*/
- ulint n_rec_locks;
- /*!< Count of the number of record locks on
- this table. We use this to determine whether
- we can evict the table from the dictionary
- cache. It is protected by lock_sys->mutex. */
- ulint n_ref_count;
- /*!< count of how many handles are opened
- to this table; dropping of the table is
- NOT allowed until this count gets to zero;
- MySQL does NOT itself check the number of
- open handles at drop */
- UT_LIST_BASE_NODE_T(lock_t)
- locks; /*!< list of locks on the table; protected
- by lock_sys->mutex */
-#endif /* !UNIV_HOTBACKUP */
-
- /* Returns true if this is a single-table tablespace
- and the .ibd file is missing or page decryption failed
- and/or page is corrupted.
- @return true if table is readable
- @retval false if table is not readable */
- inline bool is_readable() const
- {
- return(UNIV_LIKELY(!file_unreadable));
- }
+ /** Autoinc counter value to give to the next inserted row. */
+ ib_uint64_t autoinc;
+
+ /** This counter is used to track the number of granted and pending
+ autoinc locks on this table. This value is set after acquiring the
+ lock_sys_t::mutex but we peek the contents to determine whether other
+ transactions have acquired the AUTOINC lock or not. Of course only one
+ transaction can be granted the lock but there can be multiple
+ waiters. */
+ ulong n_waiting_or_granted_auto_inc_locks;
+
+ /** The transaction that currently holds the the AUTOINC lock on this
+ table. Protected by lock_sys->mutex. */
+ const trx_t* autoinc_trx;
+
+ /* @} */
+
+ /** FTS specific state variables. */
+ fts_t* fts;
+
+ /** Quiescing states, protected by the dict_index_t::lock. ie. we can
+ only change the state if we acquire all the latches (dict_index_t::lock)
+ in X mode of this table's indexes. */
+ ib_quiesce_t quiesce;
+
+ /** Count of the number of record locks on this table. We use this to
+ determine whether we can evict the table from the dictionary cache.
+ It is protected by lock_sys->mutex. */
+ ulint n_rec_locks;
+
+private:
+ /** Count of how many handles are opened to this table. Dropping of the
+ table is NOT allowed until this count gets to zero. MySQL does NOT
+ itself check the number of open handles at DROP. */
+ int32 n_ref_count;
+
+public:
+ /** List of locks on the table. Protected by lock_sys->mutex. */
+ table_lock_list_t locks;
+
+ /** Timestamp of the last modification of this table. */
+ time_t update_time;
#ifdef UNIV_DEBUG
- ulint magic_n;/*!< magic number */
-/** Value of dict_table_t::magic_n */
-# define DICT_TABLE_MAGIC_N 76333786
+ /** Value of 'magic_n'. */
+ #define DICT_TABLE_MAGIC_N 76333786
+
+ /** Magic number. */
+ ulint magic_n;
#endif /* UNIV_DEBUG */
+ /** mysql_row_templ_t for base columns used for compute the virtual
+ columns */
+ dict_vcol_templ_t* vc_templ;
};
-/* Returns true if this is a single-table tablespace
-and the .ibd file is missing or page decryption failed
-and/or page is corrupted.
-@return true if table is readable
-@retval false if table is not readable */
+inline bool table_name_t::is_temporary() const
+{
+ return dict_table_t::is_temporary_name(m_name);
+}
+
inline bool dict_index_t::is_readable() const
{
return(UNIV_LIKELY(!table->file_unreadable));
}
+inline bool dict_index_t::is_corrupted() const
+{
+ return UNIV_UNLIKELY(online_status >= ONLINE_INDEX_ABORTED
+ || (type & DICT_CORRUPT)
+ || (table && table->corrupted));
+}
+
+/*******************************************************************//**
+Initialise the table lock list. */
+void
+lock_table_lock_list_init(
+/*======================*/
+ table_lock_list_t* locks); /*!< List to initialise */
+
/** A function object to add the foreign key constraint to the referenced set
of the referenced table, if it exists in the dictionary cache. */
struct dict_foreign_add_to_referenced_table {
@@ -1406,24 +1893,10 @@ dict_table_autoinc_destroy(
if (table->autoinc_mutex_created == os_once::DONE
&& table->autoinc_mutex != NULL) {
mutex_free(table->autoinc_mutex);
- delete table->autoinc_mutex;
+ UT_DELETE(table->autoinc_mutex);
}
}
-/** Allocate and init the autoinc latch of a given table.
-This function must not be called concurrently on the same table object.
-@param[in,out] table_void table whose autoinc latch to create */
-void
-dict_table_autoinc_alloc(
- void* table_void);
-
-/** Allocate and init the zip_pad_mutex of a given index.
-This function must not be called concurrently on the same index object.
-@param[in,out] index_void index whose zip_pad_mutex to create */
-void
-dict_index_zip_pad_alloc(
- void* index_void);
-
/** Request for lazy creation of the autoinc latch of a given table.
This function is only called from either single threaded environment
or from a thread that has not shared the table object with other threads.
@@ -1433,13 +1906,8 @@ void
dict_table_autoinc_create_lazy(
dict_table_t* table)
{
-#ifdef HAVE_ATOMIC_BUILTINS
table->autoinc_mutex = NULL;
table->autoinc_mutex_created = os_once::NEVER_DONE;
-#else /* HAVE_ATOMIC_BUILTINS */
- dict_table_autoinc_alloc(table);
- table->autoinc_mutex_created = os_once::DONE;
-#endif /* HAVE_ATOMIC_BUILTINS */
}
/** Request a lazy creation of dict_index_t::zip_pad::mutex.
@@ -1451,13 +1919,8 @@ void
dict_index_zip_pad_mutex_create_lazy(
dict_index_t* index)
{
-#ifdef HAVE_ATOMIC_BUILTINS
index->zip_pad.mutex = NULL;
index->zip_pad.mutex_created = os_once::NEVER_DONE;
-#else /* HAVE_ATOMIC_BUILTINS */
- dict_index_zip_pad_alloc(index);
- index->zip_pad.mutex_created = os_once::DONE;
-#endif /* HAVE_ATOMIC_BUILTINS */
}
/** Destroy the zip_pad_mutex of the given index.
@@ -1471,8 +1934,8 @@ dict_index_zip_pad_mutex_destroy(
{
if (index->zip_pad.mutex_created == os_once::DONE
&& index->zip_pad.mutex != NULL) {
- os_fast_mutex_free(index->zip_pad.mutex);
- delete index->zip_pad.mutex;
+ mutex_free(index->zip_pad.mutex);
+ UT_DELETE(index->zip_pad.mutex);
}
}
@@ -1483,7 +1946,7 @@ void
dict_index_zip_pad_unlock(
dict_index_t* index)
{
- os_fast_mutex_unlock(index->zip_pad.mutex);
+ mutex_exit(index->zip_pad.mutex);
}
#ifdef UNIV_DEBUG
@@ -1499,8 +1962,47 @@ dict_table_autoinc_own(
}
#endif /* UNIV_DEBUG */
-#ifndef UNIV_NONINL
+/** Check whether the col is used in spatial index or regular index.
+@param[in] col column to check
+@return spatial status */
+inline
+spatial_status_t
+dict_col_get_spatial_status(
+ const dict_col_t* col)
+{
+ spatial_status_t spatial_status = SPATIAL_NONE;
+
+ /* Column is not a part of any index. */
+ if (!col->ord_part) {
+ return(spatial_status);
+ }
+
+ if (DATA_GEOMETRY_MTYPE(col->mtype)) {
+ if (col->max_prefix == 0) {
+ spatial_status = SPATIAL_ONLY;
+ } else {
+ /* Any regular index on a geometry column
+ should have a prefix. */
+ spatial_status = SPATIAL_MIXED;
+ }
+ }
+
+ return(spatial_status);
+}
+
+/** Clear defragmentation summary. */
+inline void dict_stats_empty_defrag_summary(dict_index_t* index)
+{
+ index->stat_defrag_n_pages_freed = 0;
+}
+
+/** Clear defragmentation related index stats. */
+inline void dict_stats_empty_defrag_stats(dict_index_t* index)
+{
+ index->stat_defrag_modified_counter = 0;
+ index->stat_defrag_n_page_split = 0;
+}
+
#include "dict0mem.ic"
-#endif
-#endif
+#endif /* dict0mem_h */
diff --git a/storage/innobase/include/dict0mem.ic b/storage/innobase/include/dict0mem.ic
index 9acdd6a45cc..d63f15ef7f3 100644
--- a/storage/innobase/include/dict0mem.ic
+++ b/storage/innobase/include/dict0mem.ic
@@ -1,6 +1,7 @@
/*****************************************************************************
-Copyright (c) 1996, 2009, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1996, 2015, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2017, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -59,16 +60,19 @@ dict_mem_fill_index_struct(
/* Assign a ulint to a 4-bit-mapped field.
Only the low-order 4 bits are assigned. */
- index->type = type;
-#ifndef UNIV_HOTBACKUP
+ index->type = unsigned(type);
index->space = (unsigned int) space;
index->page = FIL_NULL;
-#endif /* !UNIV_HOTBACKUP */
+ index->merge_threshold = DICT_INDEX_MERGE_THRESHOLD_DEFAULT;
index->table_name = table_name;
index->n_fields = (unsigned int) n_fields;
/* The '1 +' above prevents allocation
of an empty mem block */
-#ifdef UNIV_DEBUG
- index->magic_n = DICT_INDEX_MAGIC_N;
-#endif /* UNIV_DEBUG */
+ index->nulls_equal = false;
+#ifdef BTR_CUR_HASH_ADAPT
+#ifdef MYSQL_INDEX_DISABLE_AHI
+ index->disable_ahi = false;
+#endif
+#endif /* BTR_CUR_HASH_ADAPT */
+ ut_d(index->magic_n = DICT_INDEX_MAGIC_N);
}
diff --git a/storage/innobase/include/dict0pagecompress.h b/storage/innobase/include/dict0pagecompress.h
index 9c0c551d9e1..dfa6f2a244d 100644
--- a/storage/innobase/include/dict0pagecompress.h
+++ b/storage/innobase/include/dict0pagecompress.h
@@ -56,28 +56,6 @@ dict_table_page_compression_level(
const dict_table_t* table) /*!< in: table */
__attribute__((const));
-/********************************************************************//**
-Extract the atomic writes flag from table flags.
-@return true if atomic writes are used, false if not used */
-UNIV_INLINE
-atomic_writes_t
-dict_tf_get_atomic_writes(
-/*======================*/
- ulint flags) /*!< in: flags */
- __attribute__((const));
-
-/********************************************************************//**
-Check whether the table uses the atomic writes.
-@return true if atomic writes is used, false if not */
-UNIV_INLINE
-atomic_writes_t
-dict_table_get_atomic_writes(
-/*=========================*/
- const dict_table_t* table); /*!< in: table */
-
-
-#ifndef UNIV_NONINL
#include "dict0pagecompress.ic"
-#endif
#endif
diff --git a/storage/innobase/include/dict0pagecompress.ic b/storage/innobase/include/dict0pagecompress.ic
index 6311668ee8a..c959f9cada2 100644
--- a/storage/innobase/include/dict0pagecompress.ic
+++ b/storage/innobase/include/dict0pagecompress.ic
@@ -79,27 +79,3 @@ dict_table_is_page_compressed(
{
return (dict_tf_get_page_compression(table->flags));
}
-
-/********************************************************************//**
-Extract the atomic writes flag from table flags.
-@return enumerated value of atomic writes */
-UNIV_INLINE
-atomic_writes_t
-dict_tf_get_atomic_writes(
-/*======================*/
- ulint flags) /*!< in: flags */
-{
- return((atomic_writes_t)DICT_TF_GET_ATOMIC_WRITES(flags));
-}
-
-/********************************************************************//**
-Check whether the table uses the atomic writes.
-@return enumerated value of atomic writes */
-UNIV_INLINE
-atomic_writes_t
-dict_table_get_atomic_writes(
-/*=========================*/
- const dict_table_t* table) /*!< in: table */
-{
- return ((atomic_writes_t)dict_tf_get_atomic_writes(table->flags));
-}
diff --git a/storage/innobase/include/dict0priv.h b/storage/innobase/include/dict0priv.h
index 3ebd7599703..e56848d1954 100644
--- a/storage/innobase/include/dict0priv.h
+++ b/storage/innobase/include/dict0priv.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 2010, 2013, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2010, 2016, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -29,7 +29,7 @@ Created Fri 2 Jul 2010 13:30:38 EST - Sunny Bains
/**********************************************************************//**
Gets a table; loads it to the dictionary cache if necessary. A low-level
function. Note: Not to be called from outside dict0*c functions.
-@return table, NULL if not found */
+@return table, NULL if not found */
UNIV_INLINE
dict_table_t*
dict_table_get_low(
@@ -38,7 +38,7 @@ dict_table_get_low(
/**********************************************************************//**
Checks if a table is in the dictionary cache.
-@return table, NULL if not found */
+@return table, NULL if not found */
UNIV_INLINE
dict_table_t*
dict_table_check_if_in_cache_low(
@@ -47,7 +47,7 @@ dict_table_check_if_in_cache_low(
/**********************************************************************//**
Returns a table object based on table id.
-@return table, NULL if does not exist */
+@return table, NULL if does not exist */
UNIV_INLINE
dict_table_t*
dict_table_open_on_id_low(
@@ -57,8 +57,6 @@ dict_table_open_on_id_low(
when loading the table */
ibool open_only_if_in_cache);
-#ifndef UNIV_NONINL
#include "dict0priv.ic"
-#endif
#endif /* dict0priv.h */
diff --git a/storage/innobase/include/dict0priv.ic b/storage/innobase/include/dict0priv.ic
index e9b111c3271..7b584c7e1cb 100644
--- a/storage/innobase/include/dict0priv.ic
+++ b/storage/innobase/include/dict0priv.ic
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 2010, 2013, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2010, 2014, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -26,12 +26,11 @@ Created Wed 13 Oct 2010 16:10:14 EST Sunny Bains
#include "dict0dict.h"
#include "dict0load.h"
#include "dict0priv.h"
-#ifndef UNIV_HOTBACKUP
/**********************************************************************//**
Gets a table; loads it to the dictionary cache if necessary. A low-level
function.
-@return table, NULL if not found */
+@return table, NULL if not found */
UNIV_INLINE
dict_table_t*
dict_table_get_low(
@@ -41,24 +40,22 @@ dict_table_get_low(
dict_table_t* table;
ut_ad(table_name);
- ut_ad(mutex_own(&(dict_sys->mutex)));
+ ut_ad(mutex_own(&dict_sys->mutex));
table = dict_table_check_if_in_cache_low(table_name);
if (table && table->corrupted) {
- fprintf(stderr, "InnoDB: table");
- ut_print_name(stderr, NULL, TRUE, table->name);
+ ib::error error;
+ error << "Table " << table->name << "is corrupted";
if (srv_load_corrupted) {
- fputs(" is corrupted, but"
- " innodb_force_load_corrupted is set\n", stderr);
+ error << ", but innodb_force_load_corrupted is set";
} else {
- fputs(" is corrupted\n", stderr);
return(NULL);
}
}
if (table == NULL) {
- table = dict_load_table(table_name, TRUE, DICT_ERR_IGNORE_NONE);
+ table = dict_load_table(table_name, DICT_ERR_IGNORE_NONE);
}
ut_ad(!table || table->cached);
@@ -68,7 +65,7 @@ dict_table_get_low(
/**********************************************************************//**
Returns a table object based on table id.
-@return table, NULL if does not exist */
+@return table, NULL if does not exist */
UNIV_INLINE
dict_table_t*
dict_table_open_on_id_low(
@@ -81,7 +78,7 @@ dict_table_open_on_id_low(
dict_table_t* table;
ulint fold;
- ut_ad(mutex_own(&(dict_sys->mutex)));
+ ut_ad(mutex_own(&dict_sys->mutex));
/* Look for the table name in the hash table */
fold = ut_fold_ull(table_id);
@@ -102,7 +99,7 @@ dict_table_open_on_id_low(
/**********************************************************************//**
Checks if a table is in the dictionary cache.
-@return table, NULL if not found */
+@return table, NULL if not found */
UNIV_INLINE
dict_table_t*
dict_table_check_if_in_cache_low(
@@ -112,15 +109,18 @@ dict_table_check_if_in_cache_low(
dict_table_t* table;
ulint table_fold;
+ DBUG_ENTER("dict_table_check_if_in_cache_low");
+ DBUG_PRINT("dict_table_check_if_in_cache_low",
+ ("table: '%s'", table_name));
+
ut_ad(table_name);
- ut_ad(mutex_own(&(dict_sys->mutex)));
+ ut_ad(mutex_own(&dict_sys->mutex));
/* Look for the table name in the hash table */
table_fold = ut_fold_string(table_name);
HASH_SEARCH(name_hash, dict_sys->table_hash, table_fold,
dict_table_t*, table, ut_ad(table->cached),
- !strcmp(table->name, table_name));
- return(table);
+ !strcmp(table->name.m_name, table_name));
+ DBUG_RETURN(table);
}
-#endif /*! UNIV_HOTBACKUP */
diff --git a/storage/innobase/include/dict0stats.h b/storage/innobase/include/dict0stats.h
index 4eda752c46f..98956412ae2 100644
--- a/storage/innobase/include/dict0stats.h
+++ b/storage/innobase/include/dict0stats.h
@@ -1,6 +1,7 @@
/*****************************************************************************
-Copyright (c) 2009, 2016, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2009, 2018, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2017, 2018, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -26,12 +27,12 @@ Created Jan 06, 2010 Vasil Dimov
#ifndef dict0stats_h
#define dict0stats_h
-#include "univ.i"
-
-#include "db0err.h"
#include "dict0types.h"
#include "trx0types.h"
+#define TABLE_STATS_NAME "mysql/innodb_table_stats"
+#define INDEX_STATS_NAME "mysql/innodb_index_stats"
+
enum dict_stats_upd_option_t {
DICT_STATS_RECALC_PERSISTENT,/* (re) calculate the
statistics using a precise and slow
@@ -55,18 +56,6 @@ enum dict_stats_upd_option_t {
};
/*********************************************************************//**
-Calculates new estimates for table and index statistics. This function
-is relatively quick and is used to calculate transient statistics that
-are not saved on disk.
-This was the only way to calculate statistics before the
-Persistent Statistics feature was introduced. */
-UNIV_INTERN
-void
-dict_stats_update_transient(
-/*========================*/
- dict_table_t* table); /*!< in/out: table */
-
-/*********************************************************************//**
Set the persistent statistics flag for a given table. This is set only
in the in-memory table object and is not saved on disk. It will be read
from the .frm file upon first open from MySQL after a server restart. */
@@ -79,14 +68,10 @@ dict_stats_set_persistent(
ibool ps_off) /*!< in: persistent stats explicitly disabled */
MY_ATTRIBUTE((nonnull));
-/*********************************************************************//**
-Check whether persistent statistics is enabled for a given table.
-@return TRUE if enabled, FALSE otherwise */
+/** @return whether persistent statistics is enabled for a given table */
UNIV_INLINE
-ibool
-dict_stats_is_persistent_enabled(
-/*=============================*/
- const dict_table_t* table) /*!< in: table */
+bool
+dict_stats_is_persistent_enabled(const dict_table_t* table)
MY_ATTRIBUTE((nonnull, warn_unused_result));
/*********************************************************************//**
@@ -102,14 +87,11 @@ dict_stats_auto_recalc_set(
ibool auto_recalc_on, /*!< in: explicitly enabled */
ibool auto_recalc_off); /*!< in: explicitly disabled */
-/*********************************************************************//**
-Check whether auto recalc is enabled for a given table.
-@return TRUE if enabled, FALSE otherwise */
+/** @return whether auto recalc is enabled for a given table*/
UNIV_INLINE
-ibool
-dict_stats_auto_recalc_is_enabled(
-/*==============================*/
- const dict_table_t* table); /*!< in: table */
+bool
+dict_stats_auto_recalc_is_enabled(const dict_table_t* table)
+ MY_ATTRIBUTE((nonnull, warn_unused_result));
/*********************************************************************//**
Initialize table's stats for the first time when opening a table. */
@@ -129,11 +111,26 @@ dict_stats_deinit(
dict_table_t* table) /*!< in/out: table */
MY_ATTRIBUTE((nonnull));
+#ifdef WITH_WSREP
+/** Update the table modification counter and if necessary,
+schedule new estimates for table and index statistics to be calculated.
+@param[in,out] table persistent or temporary table
+@param[in] thd current session */
+void dict_stats_update_if_needed(dict_table_t* table, THD* thd)
+ MY_ATTRIBUTE((nonnull(1)));
+#else
+/** Update the table modification counter and if necessary,
+schedule new estimates for table and index statistics to be calculated.
+@param[in,out] table persistent or temporary table */
+void dict_stats_update_if_needed_func(dict_table_t* table)
+ MY_ATTRIBUTE((nonnull));
+# define dict_stats_update_if_needed(t,thd) dict_stats_update_if_needed_func(t)
+#endif
+
/*********************************************************************//**
Calculates new estimates for table and index statistics. The statistics
are used in query optimization.
@return DB_* error code or DB_SUCCESS */
-UNIV_INTERN
dberr_t
dict_stats_update(
/*==============*/
@@ -148,7 +145,6 @@ Removes the information for a particular index's stats from the persistent
storage if it exists and if there is data stored for this index.
This function creates its own trx and commits it.
@return DB_SUCCESS or error code */
-UNIV_INTERN
dberr_t
dict_stats_drop_index(
/*==================*/
@@ -163,7 +159,6 @@ Removes the statistics for a table and all of its indexes from the
persistent storage if it exists and if there is data stored for the table.
This function creates its own transaction and commits it.
@return DB_SUCCESS or error code */
-UNIV_INTERN
dberr_t
dict_stats_drop_table(
/*==================*/
@@ -174,7 +169,6 @@ dict_stats_drop_table(
/*********************************************************************//**
Fetches or calculates new estimates for index statistics. */
-UNIV_INTERN
void
dict_stats_update_for_index(
/*========================*/
@@ -185,7 +179,6 @@ dict_stats_update_for_index(
Renames a table in InnoDB persistent stats storage.
This function creates its own transaction and commits it.
@return DB_SUCCESS or error code */
-UNIV_INTERN
dberr_t
dict_stats_rename_table(
/*====================*/
@@ -195,41 +188,43 @@ dict_stats_rename_table(
is returned */
size_t errstr_sz); /*!< in: errstr size */
-/*********************************************************************//**
-Save defragmentation result.
+/** Save an individual index's statistic into the persistent statistics
+storage.
+@param[in] index index to be updated
+@param[in] last_update timestamp of the stat
+@param[in] stat_name name of the stat
+@param[in] stat_value value of the stat
+@param[in] sample_size n pages sampled or NULL
+@param[in] stat_description description of the stat
+@param[in,out] trx in case of NULL the function will
+allocate and free the trx object. If it is not NULL then it will be
+rolled back only in the case of error, but not freed.
@return DB_SUCCESS or error code */
-UNIV_INTERN
dberr_t
-dict_stats_save_defrag_summary(
- dict_index_t* index); /*!< in: index */
-
-/*********************************************************************//**
-Save defragmentation stats for a given index.
-@return DB_SUCCESS or error code */
-UNIV_INTERN
+dict_stats_save_index_stat(
+ dict_index_t* index,
+ time_t last_update,
+ const char* stat_name,
+ ib_uint64_t stat_value,
+ ib_uint64_t* sample_size,
+ const char* stat_description,
+ trx_t* trx);
+
+/** Report an error if updating table statistics failed because
+.ibd file is missing, table decryption failed or table is corrupted.
+@param[in,out] table Table
+@param[in] defragment true if statistics is for defragment
+@retval DB_DECRYPTION_FAILED if decryption of the table failed
+@retval DB_TABLESPACE_DELETED if .ibd file is missing
+@retval DB_CORRUPTION if table is marked as corrupted */
dberr_t
-dict_stats_save_defrag_stats(
- dict_index_t* index); /*!< in: index */
-
-/**********************************************************************//**
-Clear defragmentation summary. */
-UNIV_INTERN
-void
-dict_stats_empty_defrag_summary(
-/*==================*/
- dict_index_t* index); /*!< in: index to clear defragmentation stats */
-
-/**********************************************************************//**
-Clear defragmentation related index stats. */
-UNIV_INTERN
-void
-dict_stats_empty_defrag_stats(
-/*==================*/
- dict_index_t* index); /*!< in: index to clear defragmentation stats */
-
+dict_stats_report_error(dict_table_t* table, bool defragment = false)
+ MY_ATTRIBUTE((nonnull, warn_unused_result));
-#ifndef UNIV_NONINL
#include "dict0stats.ic"
-#endif
+
+#ifdef UNIV_ENABLE_UNIT_TEST_DICT_STATS
+void test_dict_stats_all();
+#endif /* UNIV_ENABLE_UNIT_TEST_DICT_STATS */
#endif /* dict0stats_h */
diff --git a/storage/innobase/include/dict0stats.ic b/storage/innobase/include/dict0stats.ic
index 94010007760..31065d15c45 100644
--- a/storage/innobase/include/dict0stats.ic
+++ b/storage/innobase/include/dict0stats.ic
@@ -1,6 +1,7 @@
/*****************************************************************************
-Copyright (c) 2012, 2013, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2012, 2015, Oracle and/or its affiliates. All rights reserved.
+Copyright (c) 2017, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -23,10 +24,8 @@ Code used for calculating and manipulating table statistics.
Created Jan 23, 2012 Vasil Dimov
*******************************************************/
-#include "univ.i"
-#include "dict0dict.h" /* dict_table_stats_lock() */
-#include "dict0types.h" /* dict_table_t */
-#include "srv0srv.h" /* srv_stats_persistent, srv_stats_auto_recalc */
+#include "dict0dict.h"
+#include "srv0srv.h"
/*********************************************************************//**
Set the persistent statistics flag for a given table. This is set only
@@ -61,14 +60,10 @@ dict_stats_set_persistent(
table->stat_persistent = stat_persistent;
}
-/*********************************************************************//**
-Check whether persistent statistics is enabled for a given table.
-@return TRUE if enabled, FALSE otherwise */
+/** @return whether persistent statistics is enabled for a given table */
UNIV_INLINE
-ibool
-dict_stats_is_persistent_enabled(
-/*=============================*/
- const dict_table_t* table) /*!< in: table */
+bool
+dict_stats_is_persistent_enabled(const dict_table_t* table)
{
/* Because of the nature of this check (non-locking) it is possible
that a table becomes:
@@ -83,16 +78,16 @@ dict_stats_is_persistent_enabled(
protect the ::stat_persistent with dict_table_stats_lock() like the
other ::stat_ members which would be too big performance penalty,
especially when this function is called from
- row_update_statistics_if_needed(). */
+ dict_stats_update_if_needed(). */
/* we rely on this read to be atomic */
ib_uint32_t stat_persistent = table->stat_persistent;
if (stat_persistent & DICT_STATS_PERSISTENT_ON) {
ut_ad(!(stat_persistent & DICT_STATS_PERSISTENT_OFF));
- return(TRUE);
+ return(true);
} else if (stat_persistent & DICT_STATS_PERSISTENT_OFF) {
- return(FALSE);
+ return(false);
} else {
return(srv_stats_persistent);
}
@@ -127,23 +122,19 @@ dict_stats_auto_recalc_set(
table->stats_auto_recalc = stats_auto_recalc;
}
-/*********************************************************************//**
-Check whether auto recalc is enabled for a given table.
-@return TRUE if enabled, FALSE otherwise */
+/** @return whether auto recalc is enabled for a given table*/
UNIV_INLINE
-ibool
-dict_stats_auto_recalc_is_enabled(
-/*==============================*/
- const dict_table_t* table) /*!< in: table */
+bool
+dict_stats_auto_recalc_is_enabled(const dict_table_t* table)
{
/* we rely on this read to be atomic */
ib_uint32_t stats_auto_recalc = table->stats_auto_recalc;
if (stats_auto_recalc & DICT_STATS_AUTO_RECALC_ON) {
ut_ad(!(stats_auto_recalc & DICT_STATS_AUTO_RECALC_OFF));
- return(TRUE);
+ return(true);
} else if (stats_auto_recalc & DICT_STATS_AUTO_RECALC_OFF) {
- return(FALSE);
+ return(false);
} else {
return(srv_stats_auto_recalc);
}
@@ -185,7 +176,7 @@ dict_stats_deinit(
{
ut_ad(mutex_own(&dict_sys->mutex));
- ut_a(table->n_ref_count == 0);
+ ut_a(table->get_ref_count() == 0);
dict_table_stats_lock(table, RW_X_LATCH);
diff --git a/storage/innobase/include/dict0stats_bg.h b/storage/innobase/include/dict0stats_bg.h
index a14c183c667..9c611640fdf 100644
--- a/storage/innobase/include/dict0stats_bg.h
+++ b/storage/innobase/include/dict0stats_bg.h
@@ -27,59 +27,31 @@ Created Apr 26, 2012 Vasil Dimov
#ifndef dict0stats_bg_h
#define dict0stats_bg_h
-#include "univ.i"
-
-#include "dict0types.h" /* dict_table_t, table_id_t */
-#include "os0sync.h" /* os_event_t */
-#include "os0thread.h" /* DECLARE_THREAD */
+#include "dict0types.h"
+#include "os0event.h"
+#include "os0thread.h"
/** Event to wake up dict_stats_thread on dict_stats_recalc_pool_add()
or shutdown. Not protected by any mutex. */
extern os_event_t dict_stats_event;
-/*****************************************************************//**
-Add a table to the recalc pool, which is processed by the
-background stats gathering thread. Only the table id is added to the
-list, so the table can be closed after being enqueued and it will be
-opened when needed. If the table does not exist later (has been DROPped),
-then it will be removed from the pool and skipped. */
-UNIV_INTERN
-void
-dict_stats_recalc_pool_add(
-/*=======================*/
- const dict_table_t* table); /*!< in: table to add */
+#ifdef HAVE_PSI_INTERFACE
+extern mysql_pfs_key_t dict_stats_recalc_pool_mutex_key;
+#endif /* HAVE_PSI_INTERFACE */
+
+#ifdef UNIV_DEBUG
+/** Value of MySQL global used to disable dict_stats thread. */
+extern my_bool innodb_dict_stats_disabled_debug;
+#endif /* UNIV_DEBUG */
/*****************************************************************//**
Delete a given table from the auto recalc pool.
dict_stats_recalc_pool_del() */
-UNIV_INTERN
void
dict_stats_recalc_pool_del(
/*=======================*/
const dict_table_t* table); /*!< in: table to remove */
-/*****************************************************************//**
-Add an index in a table to the defrag pool, which is processed by the
-background stats gathering thread. Only the table id and index id are
-added to the list, so the table can be closed after being enqueued and
-it will be opened when needed. If the table or index does not exist later
-(has been DROPped), then it will be removed from the pool and skipped. */
-UNIV_INTERN
-void
-dict_stats_defrag_pool_add(
-/*=======================*/
- const dict_index_t* index); /*!< in: table to add */
-
-/*****************************************************************//**
-Delete a given index from the auto defrag pool. */
-UNIV_INTERN
-void
-dict_stats_defrag_pool_del(
-/*=======================*/
- const dict_table_t* table, /*!<in: if given, remove
- all entries for the table */
- const dict_index_t* index); /*!< in: index to remove */
-
/** Yield the data dictionary latch when waiting
for the background thread to stop accessing a table.
@param trx transaction holding the data dictionary locks */
@@ -98,7 +70,17 @@ bool
dict_stats_stop_bg(
/*===============*/
dict_table_t* table) /*!< in/out: table */
- MY_ATTRIBUTE((warn_unused_result));
+{
+ ut_ad(!srv_read_only_mode);
+ ut_ad(mutex_own(&dict_sys->mutex));
+
+ if (!(table->stats_bg_flag & BG_STAT_IN_PROGRESS)) {
+ return(true);
+ }
+
+ table->stats_bg_flag |= BG_STAT_SHOULD_QUIT;
+ return(false);
+}
/*****************************************************************//**
Wait until background stats thread has stopped using the specified table.
@@ -109,7 +91,6 @@ The background stats thread is guaranteed not to start using the specified
table after this function returns and before the caller unlocks the data
dictionary because it sets the BG_STAT_IN_PROGRESS bit in table->stats_bg_flag
under dict_sys->mutex. */
-UNIV_INTERN
void
dict_stats_wait_bg_to_stop_using_table(
/*===================================*/
@@ -119,7 +100,6 @@ dict_stats_wait_bg_to_stop_using_table(
/*****************************************************************//**
Initialize global variables needed for the operation of dict_stats_thread().
Must be called before dict_stats_thread() is started. */
-UNIV_INTERN
void
dict_stats_thread_init();
/*====================*/
@@ -127,17 +107,31 @@ dict_stats_thread_init();
/*****************************************************************//**
Free resources allocated by dict_stats_thread_init(), must be called
after dict_stats_thread() has exited. */
-UNIV_INTERN
void
dict_stats_thread_deinit();
/*======================*/
+#ifdef UNIV_DEBUG
+/** Disables dict stats thread. It's used by:
+ SET GLOBAL innodb_dict_stats_disabled_debug = 1 (0).
+@param[in] thd thread handle
+@param[in] var pointer to system variable
+@param[out] var_ptr where the formal string goes
+@param[in] save immediate result from check function */
+void
+dict_stats_disabled_debug_update(
+ THD* thd,
+ struct st_mysql_sys_var* var,
+ void* var_ptr,
+ const void* save);
+#endif /* UNIV_DEBUG */
+
/*****************************************************************//**
This is the thread for background stats gathering. It pops tables, from
the auto recalc list and proceeds them, eventually recalculating their
statistics.
@return this function does not return, it calls os_thread_exit() */
-extern "C" UNIV_INTERN
+extern "C"
os_thread_ret_t
DECLARE_THREAD(dict_stats_thread)(
/*==============================*/
@@ -148,8 +142,4 @@ DECLARE_THREAD(dict_stats_thread)(
void
dict_stats_shutdown();
-# ifndef UNIV_NONINL
-# include "dict0stats_bg.ic"
-# endif
-
#endif /* dict0stats_bg_h */
diff --git a/storage/innobase/include/dict0stats_bg.ic b/storage/innobase/include/dict0stats_bg.ic
deleted file mode 100644
index 7e0d596afa9..00000000000
--- a/storage/innobase/include/dict0stats_bg.ic
+++ /dev/null
@@ -1,45 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 2012, 2013, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/dict0stats_bg.ic
-Code used for background table and index stats gathering.
-
-Created Feb 8, 2013 Marko Makela
-*******************************************************/
-
-/*****************************************************************//**
-Request the background collection of statistics to stop for a table.
-@retval true when no background process is active
-@retval false when it is not safe to modify the table definition */
-UNIV_INLINE
-bool
-dict_stats_stop_bg(
-/*===============*/
- dict_table_t* table) /*!< in/out: table */
-{
- ut_ad(!srv_read_only_mode);
- ut_ad(mutex_own(&dict_sys->mutex));
-
- if (!(table->stats_bg_flag & BG_STAT_IN_PROGRESS)) {
- return(true);
- }
-
- table->stats_bg_flag |= BG_STAT_SHOULD_QUIT;
- return(false);
-}
diff --git a/storage/innobase/include/dict0types.h b/storage/innobase/include/dict0types.h
index 25facfbaf08..bea08f398de 100644
--- a/storage/innobase/include/dict0types.h
+++ b/storage/innobase/include/dict0types.h
@@ -1,7 +1,7 @@
/*****************************************************************************
-Copyright (c) 1996, 2013, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2013, 2014, SkySQL Ab. All Rights Reserved.
+Copyright (c) 1996, 2015, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2013, 2019, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -27,15 +27,19 @@ Created 1/8/1996 Heikki Tuuri
#ifndef dict0types_h
#define dict0types_h
+#include <ut0mutex.h>
+
struct dict_sys_t;
struct dict_col_t;
struct dict_field_t;
struct dict_index_t;
struct dict_table_t;
struct dict_foreign_t;
+struct dict_v_col_t;
struct ind_node_t;
struct tab_node_t;
+struct dict_add_v_col_t;
/* Space id and page no where the dictionary header resides */
#define DICT_HDR_SPACE 0 /* the SYSTEM tablespace */
@@ -55,18 +59,21 @@ Note: please define the IGNORE_ERR_* as bits, so their value can
be or-ed together */
enum dict_err_ignore_t {
DICT_ERR_IGNORE_NONE = 0, /*!< no error to ignore */
- DICT_ERR_IGNORE_INDEX_ROOT = 1, /*!< ignore error if index root
- page is FIL_NULL or incorrect value */
- DICT_ERR_IGNORE_CORRUPT = 2, /*!< skip corrupted indexes */
- DICT_ERR_IGNORE_FK_NOKEY = 4, /*!< ignore error if any foreign
+ DICT_ERR_IGNORE_FK_NOKEY = 1, /*!< ignore error if any foreign
key is missing */
+ DICT_ERR_IGNORE_INDEX_ROOT = 2, /*!< ignore error if index root
+ page is FIL_NULL or incorrect value */
+ DICT_ERR_IGNORE_CORRUPT = 4, /*!< skip corrupted indexes */
DICT_ERR_IGNORE_RECOVER_LOCK = 8,
/*!< Used when recovering table locks
for resurrected transactions.
Silently load a missing
tablespace, and do not load
incomplete index definitions. */
- DICT_ERR_IGNORE_ALL = 0xFFFF /*!< ignore all errors */
+ /** ignore all errors above */
+ DICT_ERR_IGNORE_ALL = 15,
+ /** prepare to drop the table; do not attempt to load tablespace */
+ DICT_ERR_IGNORE_DROP = 31
};
/** Quiescing states for flushing tables to disk. */
@@ -76,24 +83,50 @@ enum ib_quiesce_t {
QUIESCE_COMPLETE /*!< All done */
};
-/** Enum values for atomic_writes table option */
-typedef enum {
- ATOMIC_WRITES_DEFAULT = 0,
- ATOMIC_WRITES_ON = 1,
- ATOMIC_WRITES_OFF = 2
-} atomic_writes_t;
+#ifndef UNIV_INNOCHECKSUM
+typedef ib_mutex_t DictSysMutex;
+#endif /* !UNIV_INNOCHECKSUM */
/** Prefix for tmp tables, adopted from sql/table.h */
-#define tmp_file_prefix "#sql"
-#define tmp_file_prefix_length 4
-#define TEMP_FILE_PREFIX_INNODB "#sql-ib"
+#define TEMP_FILE_PREFIX "#sql"
+#define TEMP_FILE_PREFIX_LENGTH 4
+#define TEMP_FILE_PREFIX_INNODB "#sql-ib"
#define TEMP_TABLE_PREFIX "#sql"
#define TEMP_TABLE_PATH_PREFIX "/" TEMP_TABLE_PREFIX
#if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG
+/** Dump the change buffer at startup */
+extern my_bool ibuf_dump;
/** Flag to control insert buffer debugging. */
extern uint ibuf_debug;
#endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */
+/** Shift for spatial status */
+#define SPATIAL_STATUS_SHIFT 12
+
+/** Mask to encode/decode spatial status. */
+#define SPATIAL_STATUS_MASK (3U << SPATIAL_STATUS_SHIFT)
+
+#if SPATIAL_STATUS_MASK < REC_VERSION_56_MAX_INDEX_COL_LEN
+# error SPATIAL_STATUS_MASK < REC_VERSION_56_MAX_INDEX_COL_LEN
+#endif
+
+/** whether a col is used in spatial index or regular index
+Note: the spatial status is part of persistent undo log,
+so we should not modify the values in MySQL 5.7 */
+enum spatial_status_t {
+ /* Unkown status (undo format in 5.7.9) */
+ SPATIAL_UNKNOWN = 0,
+
+ /** Not used in gis index. */
+ SPATIAL_NONE = 1,
+
+ /** Used in both spatial index and regular index. */
+ SPATIAL_MIXED = 2,
+
+ /** Only used in spatial index. */
+ SPATIAL_ONLY = 3
+};
+
#endif
diff --git a/storage/innobase/include/dyn0buf.h b/storage/innobase/include/dyn0buf.h
new file mode 100644
index 00000000000..311f6518943
--- /dev/null
+++ b/storage/innobase/include/dyn0buf.h
@@ -0,0 +1,511 @@
+/*****************************************************************************
+
+Copyright (c) 2013, 2016, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2019, MariaDB Corporation.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/dyn0buf.h
+The dynamically allocated buffer implementation
+
+Created 2013-03-16 Sunny Bains
+*******************************************************/
+
+#ifndef dyn0buf_h
+#define dyn0buf_h
+
+#include "mem0mem.h"
+#include "dyn0types.h"
+#include "intrusive_list.h"
+
+
+/** Class that manages dynamic buffers. It uses a UT_LIST of
+dyn_buf_t::block_t instances. We don't use STL containers in
+order to avoid the overhead of heap calls. Using a custom memory
+allocator doesn't solve the problem either because we have to get
+the memory from somewhere. We can't use the block_t::m_data as the
+backend for the custom allocator because we would like the data in
+the blocks to be contiguous. */
+template <size_t SIZE = DYN_ARRAY_DATA_SIZE>
+class dyn_buf_t {
+public:
+
+ class block_t : public intrusive::list_node<> {
+ public:
+
+ block_t()
+ {
+ ut_ad(MAX_DATA_SIZE <= (2 << 15));
+ init();
+ }
+
+ ~block_t() { }
+
+ /**
+ Gets the number of used bytes in a block.
+ @return number of bytes used */
+ ulint used() const
+ MY_ATTRIBUTE((warn_unused_result))
+ {
+ return(static_cast<ulint>(m_used & ~DYN_BLOCK_FULL_FLAG));
+ }
+
+ /**
+ Gets pointer to the start of data.
+ @return pointer to data */
+ byte* start()
+ MY_ATTRIBUTE((warn_unused_result))
+ {
+ return(m_data);
+ }
+
+ /**
+ @return start of data - non const version */
+ byte* begin()
+ MY_ATTRIBUTE((warn_unused_result))
+ {
+ return(m_data);
+ }
+
+ /**
+ @return end of used data - non const version */
+ byte* end()
+ MY_ATTRIBUTE((warn_unused_result))
+ {
+ return(begin() + m_used);
+ }
+
+ /**
+ @return start of data - const version */
+ const byte* begin() const
+ MY_ATTRIBUTE((warn_unused_result))
+ {
+ return(m_data);
+ }
+
+ /**
+ @return end of used data - const version */
+ const byte* end() const
+ MY_ATTRIBUTE((warn_unused_result))
+ {
+ return(begin() + m_used);
+ }
+
+ private:
+ /**
+ @return pointer to start of reserved space */
+ template <typename Type>
+ Type push(ib_uint32_t size)
+ {
+ Type ptr = reinterpret_cast<Type>(end());
+
+ m_used += size;
+ ut_ad(m_used <= static_cast<ib_uint32_t>(MAX_DATA_SIZE));
+
+ return(ptr);
+ }
+
+ /**
+ Grow the stack. */
+ void close(const byte* ptr)
+ {
+ /* Check that it is within bounds */
+ ut_ad(ptr >= begin());
+ ut_ad(ptr <= begin() + m_buf_end);
+
+ /* We have done the boundary check above */
+ m_used = static_cast<ib_uint32_t>(ptr - begin());
+
+ ut_ad(m_used <= MAX_DATA_SIZE);
+ ut_d(m_buf_end = 0);
+ }
+
+ /**
+ Initialise the block */
+ void init()
+ {
+ m_used = 0;
+ ut_d(m_buf_end = 0);
+ ut_d(m_magic_n = DYN_BLOCK_MAGIC_N);
+ }
+ private:
+#ifdef UNIV_DEBUG
+ /** If opened then this is the buffer end offset, else 0 */
+ ulint m_buf_end;
+
+ /** Magic number (DYN_BLOCK_MAGIC_N) */
+ ulint m_magic_n;
+#endif /* UNIV_DEBUG */
+
+ /** SIZE - sizeof(m_node) + sizeof(m_used) */
+ enum {
+ MAX_DATA_SIZE = SIZE
+ - sizeof(intrusive::list_node<>)
+ + sizeof(ib_uint32_t)
+ };
+
+ /** Storage */
+ byte m_data[MAX_DATA_SIZE];
+
+ /** number of data bytes used in this block;
+ DYN_BLOCK_FULL_FLAG is set when the block becomes full */
+ ib_uint32_t m_used;
+
+ friend class dyn_buf_t;
+ };
+
+ typedef intrusive::list<block_t> list_t;
+
+ enum { MAX_DATA_SIZE = block_t::MAX_DATA_SIZE};
+
+ /** Default constructor */
+ dyn_buf_t()
+ :
+ m_heap(),
+ m_size()
+ {
+ push_back(&m_first_block);
+ }
+
+ /** Destructor */
+ ~dyn_buf_t()
+ {
+ erase();
+ }
+
+ /** Reset the buffer vector */
+ void erase()
+ {
+ if (m_heap != NULL) {
+ mem_heap_free(m_heap);
+ m_heap = NULL;
+
+ /* Initialise the list and add the first block. */
+ m_list.clear();
+ m_list.push_back(m_first_block);
+ } else {
+ m_first_block.init();
+ ut_ad(m_list.size() == 1);
+ }
+
+ m_size = 0;
+ }
+
+ /**
+ Makes room on top and returns a pointer to a buffer in it. After
+ copying the elements, the caller must close the buffer using close().
+ @param size in bytes of the buffer; MUST be <= MAX_DATA_SIZE!
+ @return pointer to the buffer */
+ byte* open(ulint size)
+ MY_ATTRIBUTE((warn_unused_result))
+ {
+ ut_ad(size > 0);
+ ut_ad(size <= MAX_DATA_SIZE);
+
+ block_t* block;
+
+ block = has_space(size) ? back() : add_block();
+
+ ut_ad(block->m_used <= MAX_DATA_SIZE);
+ ut_d(block->m_buf_end = block->m_used + size);
+
+ return(block->end());
+ }
+
+ /**
+ Closes the buffer returned by open.
+ @param ptr end of used space */
+ void close(const byte* ptr)
+ {
+ ut_ad(!m_list.empty());
+ block_t* block = back();
+
+ m_size -= block->used();
+
+ block->close(ptr);
+
+ m_size += block->used();
+ }
+
+ /**
+ Makes room on top and returns a pointer to the added element.
+ The caller must copy the element to the pointer returned.
+ @param size in bytes of the element
+ @return pointer to the element */
+ template <typename Type>
+ Type push(ib_uint32_t size)
+ {
+ ut_ad(size > 0);
+ ut_ad(size <= MAX_DATA_SIZE);
+
+ block_t* block;
+
+ block = has_space(size) ? back() : add_block();
+
+ m_size += size;
+
+ /* See ISO C++03 14.2/4 for why "template" is required. */
+
+ return(block->template push<Type>(size));
+ }
+
+ /**
+ Pushes n bytes.
+ @param str string to write
+ @param len string length */
+ void push(const byte* ptr, ib_uint32_t len)
+ {
+ while (len > 0) {
+ ib_uint32_t n_copied;
+
+ if (len >= MAX_DATA_SIZE) {
+ n_copied = MAX_DATA_SIZE;
+ } else {
+ n_copied = len;
+ }
+
+ ::memmove(push<byte*>(n_copied), ptr, n_copied);
+
+ ptr += n_copied;
+ len -= n_copied;
+ }
+ }
+
+ /**
+ Returns a pointer to an element in the buffer. const version.
+ @param pos position of element in bytes from start
+ @return pointer to element */
+ template <typename Type>
+ const Type at(ulint pos) const
+ {
+ block_t* block = const_cast<block_t*>(
+ const_cast<dyn_buf_t*>(this)->find(pos));
+
+ return(reinterpret_cast<Type>(block->begin() + pos));
+ }
+
+ /**
+ Returns a pointer to an element in the buffer. non const version.
+ @param pos position of element in bytes from start
+ @return pointer to element */
+ template <typename Type>
+ Type at(ulint pos)
+ {
+ block_t* block = const_cast<block_t*>(find(pos));
+
+ return(reinterpret_cast<Type>(block->begin() + pos));
+ }
+
+ /**
+ Returns the size of the total stored data.
+ @return data size in bytes */
+ ulint size() const
+ MY_ATTRIBUTE((warn_unused_result))
+ {
+#ifdef UNIV_DEBUG
+ ulint total_size = 0;
+
+ for (typename list_t::iterator it = m_list.begin(),
+ end = m_list.end();
+ it != end; ++it) {
+ total_size += it->used();
+ }
+
+ ut_ad(total_size == m_size);
+#endif /* UNIV_DEBUG */
+ return(m_size);
+ }
+
+ /**
+ Iterate over each block and call the functor.
+ @return false if iteration was terminated. */
+ template <typename Functor>
+ bool for_each_block(Functor& functor) const
+ {
+ for (typename list_t::iterator it = m_list.begin(),
+ end = m_list.end();
+ it != end; ++it) {
+
+ if (!functor(&*it)) {
+ return false;
+ }
+ }
+
+ return(true);
+ }
+
+ /**
+ Iterate over all the blocks in reverse and call the iterator
+ @return false if iteration was terminated. */
+ template <typename Functor>
+ bool for_each_block_in_reverse(Functor& functor) const
+ {
+ for (typename list_t::reverse_iterator it = m_list.rbegin(),
+ end = m_list.rend();
+ it != end; ++it) {
+
+ if (!functor(&*it)) {
+ return false;
+ }
+ }
+
+ return(true);
+ }
+
+ /**
+ Iterate over all the blocks in reverse and call the iterator
+ @return false if iteration was terminated. */
+ template <typename Functor>
+ bool for_each_block_in_reverse(const Functor& functor) const
+ {
+ for (typename list_t::reverse_iterator it = m_list.rbegin(),
+ end = m_list.rend();
+ it != end; ++it) {
+
+ if (!functor(&*it)) {
+ return false;
+ }
+ }
+
+ return(true);
+ }
+
+ /**
+ @return the first block */
+ block_t* front()
+ MY_ATTRIBUTE((warn_unused_result))
+ {
+ return &m_list.front();
+ }
+
+ /**
+ @return true if m_first_block block was not filled fully */
+ bool is_small() const
+ MY_ATTRIBUTE((warn_unused_result))
+ {
+ return(m_heap == NULL);
+ }
+
+private:
+ // Disable copying
+ dyn_buf_t(const dyn_buf_t&);
+ dyn_buf_t& operator=(const dyn_buf_t&);
+
+ /**
+ Add the block to the end of the list*/
+ void push_back(block_t* block)
+ {
+ block->init();
+ m_list.push_back(*block);
+ }
+
+ /** @return the last block in the list */
+ block_t* back()
+ {
+ return &m_list.back();
+ }
+
+ /*
+ @return true if request can be fullfilled */
+ bool has_space(ulint size) const
+ {
+ return(back()->m_used + size <= MAX_DATA_SIZE);
+ }
+
+ /*
+ @return true if request can be fullfilled */
+ bool has_space(ulint size)
+ {
+ return(back()->m_used + size <= MAX_DATA_SIZE);
+ }
+
+ /** Find the block that contains the pos.
+ @param pos absolute offset, it is updated to make it relative
+ to the block
+ @return the block containing the pos. */
+ block_t* find(ulint& pos)
+ {
+ ut_ad(!m_list.empty());
+
+ for (typename list_t::iterator it = m_list.begin(),
+ end = m_list.end();
+ it != end; ++it) {
+
+ if (pos < it->used()) {
+ ut_ad(it->used() >= pos);
+
+ return &*it;
+ }
+
+ pos -= it->used();
+ }
+
+ return NULL;
+ }
+
+ /**
+ Allocate and add a new block to m_list */
+ block_t* add_block()
+ {
+ block_t* block;
+
+ if (m_heap == NULL) {
+ m_heap = mem_heap_create(sizeof(*block));
+ }
+
+ block = reinterpret_cast<block_t*>(
+ mem_heap_alloc(m_heap, sizeof(*block)));
+
+ push_back(block);
+
+ return(block);
+ }
+
+private:
+ /** Heap to use for memory allocation */
+ mem_heap_t* m_heap;
+
+ /** Allocated blocks */
+ list_t m_list;
+
+ /** Total size used by all blocks */
+ ulint m_size;
+
+ /** The default block, should always be the first element. This
+ is for backwards compatibility and to avoid an extra heap allocation
+ for small REDO log records */
+ block_t m_first_block;
+};
+
+typedef dyn_buf_t<DYN_ARRAY_DATA_SIZE> mtr_buf_t;
+
+/** mtr_buf_t copier */
+struct mtr_buf_copy_t {
+ /** The copied buffer */
+ mtr_buf_t m_buf;
+
+ /** Append a block to the redo log buffer.
+ @return whether the appending should continue (always true here) */
+ bool operator()(const mtr_buf_t::block_t* block)
+ {
+ byte* buf = m_buf.open(block->used());
+ memcpy(buf, block->begin(), block->used());
+ m_buf.close(buf + block->used());
+ return(true);
+ }
+};
+
+#endif /* dyn0buf_h */
diff --git a/storage/innobase/include/dyn0dyn.h b/storage/innobase/include/dyn0dyn.h
deleted file mode 100644
index 2fbfb838b59..00000000000
--- a/storage/innobase/include/dyn0dyn.h
+++ /dev/null
@@ -1,199 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/dyn0dyn.h
-The dynamically allocated array
-
-Created 2/5/1996 Heikki Tuuri
-*******************************************************/
-
-#ifndef dyn0dyn_h
-#define dyn0dyn_h
-
-#include "univ.i"
-#include "ut0lst.h"
-#include "mem0mem.h"
-
-/** A block in a dynamically allocated array */
-struct dyn_block_t;
-/** Dynamically allocated array */
-typedef dyn_block_t dyn_array_t;
-
-/** This is the initial 'payload' size of a dynamic array;
-this must be > MLOG_BUF_MARGIN + 30! */
-#define DYN_ARRAY_DATA_SIZE 512
-
-/*********************************************************************//**
-Initializes a dynamic array.
-@return initialized dyn array */
-UNIV_INLINE
-dyn_array_t*
-dyn_array_create(
-/*=============*/
- dyn_array_t* arr) /*!< in/out memory buffer of
- size sizeof(dyn_array_t) */
- MY_ATTRIBUTE((nonnull));
-/************************************************************//**
-Frees a dynamic array. */
-UNIV_INLINE
-void
-dyn_array_free(
-/*===========*/
- dyn_array_t* arr) /*!< in,own: dyn array */
- MY_ATTRIBUTE((nonnull));
-/*********************************************************************//**
-Makes room on top of a dyn array and returns a pointer to a buffer in it.
-After copying the elements, the caller must close the buffer using
-dyn_array_close.
-@return pointer to the buffer */
-UNIV_INLINE
-byte*
-dyn_array_open(
-/*===========*/
- dyn_array_t* arr, /*!< in: dynamic array */
- ulint size) /*!< in: size in bytes of the buffer; MUST be
- smaller than DYN_ARRAY_DATA_SIZE! */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-/*********************************************************************//**
-Closes the buffer returned by dyn_array_open. */
-UNIV_INLINE
-void
-dyn_array_close(
-/*============*/
- dyn_array_t* arr, /*!< in: dynamic array */
- const byte* ptr) /*!< in: end of used space */
- MY_ATTRIBUTE((nonnull));
-/*********************************************************************//**
-Makes room on top of a dyn array and returns a pointer to
-the added element. The caller must copy the element to
-the pointer returned.
-@return pointer to the element */
-UNIV_INLINE
-void*
-dyn_array_push(
-/*===========*/
- dyn_array_t* arr, /*!< in/out: dynamic array */
- ulint size) /*!< in: size in bytes of the element */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-/************************************************************//**
-Returns pointer to an element in dyn array.
-@return pointer to element */
-UNIV_INLINE
-void*
-dyn_array_get_element(
-/*==================*/
- const dyn_array_t* arr, /*!< in: dyn array */
- ulint pos) /*!< in: position of element
- in bytes from array start */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-/************************************************************//**
-Returns the size of stored data in a dyn array.
-@return data size in bytes */
-UNIV_INLINE
-ulint
-dyn_array_get_data_size(
-/*====================*/
- const dyn_array_t* arr) /*!< in: dyn array */
- MY_ATTRIBUTE((nonnull, warn_unused_result, pure));
-/************************************************************//**
-Gets the first block in a dyn array.
-@param arr dyn array
-@return first block */
-#define dyn_array_get_first_block(arr) (arr)
-/************************************************************//**
-Gets the last block in a dyn array.
-@param arr dyn array
-@return last block */
-#define dyn_array_get_last_block(arr) \
- ((arr)->heap ? UT_LIST_GET_LAST((arr)->base) : (arr))
-/********************************************************************//**
-Gets the next block in a dyn array.
-@param arr dyn array
-@param block dyn array block
-@return pointer to next, NULL if end of list */
-#define dyn_array_get_next_block(arr, block) \
- ((arr)->heap ? UT_LIST_GET_NEXT(list, block) : NULL)
-/********************************************************************//**
-Gets the previous block in a dyn array.
-@param arr dyn array
-@param block dyn array block
-@return pointer to previous, NULL if end of list */
-#define dyn_array_get_prev_block(arr, block) \
- ((arr)->heap ? UT_LIST_GET_PREV(list, block) : NULL)
-/********************************************************************//**
-Gets the number of used bytes in a dyn array block.
-@return number of bytes used */
-UNIV_INLINE
-ulint
-dyn_block_get_used(
-/*===============*/
- const dyn_block_t* block) /*!< in: dyn array block */
- MY_ATTRIBUTE((nonnull, warn_unused_result, pure));
-/********************************************************************//**
-Gets pointer to the start of data in a dyn array block.
-@return pointer to data */
-UNIV_INLINE
-byte*
-dyn_block_get_data(
-/*===============*/
- const dyn_block_t* block) /*!< in: dyn array block */
- MY_ATTRIBUTE((nonnull, warn_unused_result, pure));
-/********************************************************//**
-Pushes n bytes to a dyn array. */
-UNIV_INLINE
-void
-dyn_push_string(
-/*============*/
- dyn_array_t* arr, /*!< in/out: dyn array */
- const byte* str, /*!< in: string to write */
- ulint len) /*!< in: string length */
- MY_ATTRIBUTE((nonnull));
-
-/*#################################################################*/
-
-/** @brief A block in a dynamically allocated array.
-NOTE! Do not access the fields of the struct directly: the definition
-appears here only for the compiler to know its size! */
-struct dyn_block_t{
- mem_heap_t* heap; /*!< in the first block this is != NULL
- if dynamic allocation has been needed */
- ulint used; /*!< number of data bytes used in this block;
- DYN_BLOCK_FULL_FLAG is set when the block
- becomes full */
- byte data[DYN_ARRAY_DATA_SIZE];
- /*!< storage for array elements */
- UT_LIST_BASE_NODE_T(dyn_block_t) base;
- /*!< linear list of dyn blocks: this node is
- used only in the first block */
- UT_LIST_NODE_T(dyn_block_t) list;
- /*!< linear list node: used in all blocks */
-#ifdef UNIV_DEBUG
- ulint buf_end;/*!< only in the debug version: if dyn
- array is opened, this is the buffer
- end offset, else this is 0 */
- ulint magic_n;/*!< magic number (DYN_BLOCK_MAGIC_N) */
-#endif
-};
-
-
-#ifndef UNIV_NONINL
-#include "dyn0dyn.ic"
-#endif
-
-#endif
diff --git a/storage/innobase/include/dyn0dyn.ic b/storage/innobase/include/dyn0dyn.ic
deleted file mode 100644
index 6f2fbc4a153..00000000000
--- a/storage/innobase/include/dyn0dyn.ic
+++ /dev/null
@@ -1,298 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/dyn0dyn.ic
-The dynamically allocated array
-
-Created 2/5/1996 Heikki Tuuri
-*******************************************************/
-
-/** Value of dyn_block_t::magic_n */
-#define DYN_BLOCK_MAGIC_N 375767
-/** Flag for dyn_block_t::used that indicates a full block */
-#define DYN_BLOCK_FULL_FLAG 0x1000000UL
-
-/************************************************************//**
-Adds a new block to a dyn array.
-@return created block */
-UNIV_INTERN
-dyn_block_t*
-dyn_array_add_block(
-/*================*/
- dyn_array_t* arr) /*!< in/out: dyn array */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-
-/********************************************************************//**
-Gets the number of used bytes in a dyn array block.
-@return number of bytes used */
-UNIV_INLINE
-ulint
-dyn_block_get_used(
-/*===============*/
- const dyn_block_t* block) /*!< in: dyn array block */
-{
- return((block->used) & ~DYN_BLOCK_FULL_FLAG);
-}
-
-/********************************************************************//**
-Gets pointer to the start of data in a dyn array block.
-@return pointer to data */
-UNIV_INLINE
-byte*
-dyn_block_get_data(
-/*===============*/
- const dyn_block_t* block) /*!< in: dyn array block */
-{
- ut_ad(block);
-
- return(const_cast<byte*>(block->data));
-}
-
-/*********************************************************************//**
-Initializes a dynamic array.
-@return initialized dyn array */
-UNIV_INLINE
-dyn_array_t*
-dyn_array_create(
-/*=============*/
- dyn_array_t* arr) /*!< in/out: memory buffer of
- size sizeof(dyn_array_t) */
-{
-#if DYN_ARRAY_DATA_SIZE >= DYN_BLOCK_FULL_FLAG
-# error "DYN_ARRAY_DATA_SIZE >= DYN_BLOCK_FULL_FLAG"
-#endif
-
- arr->heap = NULL;
- arr->used = 0;
-
- ut_d(arr->buf_end = 0);
- ut_d(arr->magic_n = DYN_BLOCK_MAGIC_N);
-
- return(arr);
-}
-
-/************************************************************//**
-Frees a dynamic array. */
-UNIV_INLINE
-void
-dyn_array_free(
-/*===========*/
- dyn_array_t* arr) /*!< in: dyn array */
-{
- if (arr->heap != NULL) {
- mem_heap_free(arr->heap);
- }
-
- ut_d(arr->magic_n = 0);
-}
-
-/*********************************************************************//**
-Makes room on top of a dyn array and returns a pointer to the added element.
-The caller must copy the element to the pointer returned.
-@return pointer to the element */
-UNIV_INLINE
-void*
-dyn_array_push(
-/*===========*/
- dyn_array_t* arr, /*!< in/out: dynamic array */
- ulint size) /*!< in: size in bytes of the element */
-{
- dyn_block_t* block;
- ulint used;
-
- ut_ad(arr->magic_n == DYN_BLOCK_MAGIC_N);
- ut_ad(size <= DYN_ARRAY_DATA_SIZE);
- ut_ad(size);
-
- block = arr;
-
- if (block->used + size > DYN_ARRAY_DATA_SIZE) {
- /* Get the last array block */
-
- block = dyn_array_get_last_block(arr);
-
- if (block->used + size > DYN_ARRAY_DATA_SIZE) {
- block = dyn_array_add_block(arr);
- }
- }
-
- used = block->used;
-
- block->used = used + size;
- ut_ad(block->used <= DYN_ARRAY_DATA_SIZE);
-
- return(block->data + used);
-}
-
-/*********************************************************************//**
-Makes room on top of a dyn array and returns a pointer to a buffer in it.
-After copying the elements, the caller must close the buffer using
-dyn_array_close.
-@return pointer to the buffer */
-UNIV_INLINE
-byte*
-dyn_array_open(
-/*===========*/
- dyn_array_t* arr, /*!< in: dynamic array */
- ulint size) /*!< in: size in bytes of the buffer; MUST be
- smaller than DYN_ARRAY_DATA_SIZE! */
-{
- dyn_block_t* block;
-
- ut_ad(arr->magic_n == DYN_BLOCK_MAGIC_N);
- ut_ad(size <= DYN_ARRAY_DATA_SIZE);
- ut_ad(size);
-
- block = arr;
-
- if (block->used + size > DYN_ARRAY_DATA_SIZE) {
- /* Get the last array block */
-
- block = dyn_array_get_last_block(arr);
-
- if (block->used + size > DYN_ARRAY_DATA_SIZE) {
- block = dyn_array_add_block(arr);
- ut_a(size <= DYN_ARRAY_DATA_SIZE);
- }
- }
-
- ut_ad(block->used <= DYN_ARRAY_DATA_SIZE);
- ut_ad(arr->buf_end == 0);
- ut_d(arr->buf_end = block->used + size);
-
- return(block->data + block->used);
-}
-
-/*********************************************************************//**
-Closes the buffer returned by dyn_array_open. */
-UNIV_INLINE
-void
-dyn_array_close(
-/*============*/
- dyn_array_t* arr, /*!< in/out: dynamic array */
- const byte* ptr) /*!< in: end of used space */
-{
- dyn_block_t* block;
-
- ut_ad(arr->magic_n == DYN_BLOCK_MAGIC_N);
-
- block = dyn_array_get_last_block(arr);
-
- ut_ad(arr->buf_end + block->data >= ptr);
-
- block->used = ptr - block->data;
-
- ut_ad(block->used <= DYN_ARRAY_DATA_SIZE);
-
- ut_d(arr->buf_end = 0);
-}
-
-/************************************************************//**
-Returns pointer to an element in dyn array.
-@return pointer to element */
-UNIV_INLINE
-void*
-dyn_array_get_element(
-/*==================*/
- const dyn_array_t* arr, /*!< in: dyn array */
- ulint pos) /*!< in: position of element
- in bytes from array start */
-{
- const dyn_block_t* block;
-
- ut_ad(arr->magic_n == DYN_BLOCK_MAGIC_N);
-
- /* Get the first array block */
- block = dyn_array_get_first_block(arr);
-
- if (arr->heap != NULL) {
- for (;;) {
- ulint used = dyn_block_get_used(block);
-
- if (pos < used) {
- break;
- }
-
- pos -= used;
- block = UT_LIST_GET_NEXT(list, block);
- ut_ad(block);
- }
- }
-
- ut_ad(block);
- ut_ad(dyn_block_get_used(block) >= pos);
-
- return(const_cast<byte*>(block->data) + pos);
-}
-
-/************************************************************//**
-Returns the size of stored data in a dyn array.
-@return data size in bytes */
-UNIV_INLINE
-ulint
-dyn_array_get_data_size(
-/*====================*/
- const dyn_array_t* arr) /*!< in: dyn array */
-{
- const dyn_block_t* block;
- ulint sum = 0;
-
- ut_ad(arr->magic_n == DYN_BLOCK_MAGIC_N);
-
- if (arr->heap == NULL) {
-
- return(arr->used);
- }
-
- /* Get the first array block */
- block = dyn_array_get_first_block(arr);
-
- while (block != NULL) {
- sum += dyn_block_get_used(block);
- block = dyn_array_get_next_block(arr, block);
- }
-
- return(sum);
-}
-
-/********************************************************//**
-Pushes n bytes to a dyn array. */
-UNIV_INLINE
-void
-dyn_push_string(
-/*============*/
- dyn_array_t* arr, /*!< in/out: dyn array */
- const byte* str, /*!< in: string to write */
- ulint len) /*!< in: string length */
-{
- ulint n_copied;
-
- while (len > 0) {
- if (len > DYN_ARRAY_DATA_SIZE) {
- n_copied = DYN_ARRAY_DATA_SIZE;
- } else {
- n_copied = len;
- }
-
- memcpy(dyn_array_push(arr, n_copied), str, n_copied);
-
- str += n_copied;
- len -= n_copied;
- }
-}
diff --git a/storage/innobase/include/row0quiesce.ic b/storage/innobase/include/dyn0types.h
index cf0a5a1164e..06d837081a1 100644
--- a/storage/innobase/include/row0quiesce.ic
+++ b/storage/innobase/include/dyn0types.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 2012, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2013, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -17,10 +17,23 @@ this program; if not, write to the Free Software Foundation, Inc.,
*****************************************************************************/
/**************************************************//**
-@file include/row0quiesce.ic
+@file include/dyn0types.h
+The dynamically allocated buffer types and constants
-Quiesce a tablespace.
-
-Created 2012-02-08 Sunny Bains
+Created 2013-03-16 Sunny Bains
*******************************************************/
+#ifndef dyn0types_h
+#define dyn0types_h
+
+/** Value of dyn_block_t::magic_n */
+#define DYN_BLOCK_MAGIC_N 375767
+
+/** This is the initial 'payload' size of a dynamic array;
+this must be > MLOG_BUF_MARGIN + 30! */
+#define DYN_ARRAY_DATA_SIZE 512
+
+/** Flag for dyn_block_t::used that indicates a full block */
+#define DYN_BLOCK_FULL_FLAG 0x1000000UL
+
+#endif /* dyn0types_h */
diff --git a/storage/innobase/include/eval0eval.h b/storage/innobase/include/eval0eval.h
index 4301b491ad2..ebd40924a49 100644
--- a/storage/innobase/include/eval0eval.h
+++ b/storage/innobase/include/eval0eval.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1997, 2009, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1997, 2016, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -27,7 +27,6 @@ Created 12/29/1997 Heikki Tuuri
#ifndef eval0eval_h
#define eval0eval_h
-#include "univ.i"
#include "que0types.h"
#include "pars0sym.h"
#include "pars0pars.h"
@@ -36,7 +35,6 @@ Created 12/29/1997 Heikki Tuuri
Free the buffer from global dynamic memory for a value of a que_node,
if it has been allocated in the above function. The freeing for pushed
column values is done in sel_col_prefetch_buf_free. */
-UNIV_INTERN
void
eval_node_free_val_buf(
/*===================*/
@@ -65,7 +63,7 @@ eval_node_set_int_val(
lint val); /*!< in: value to set */
/*****************************************************************//**
Gets an integer value from an expression node.
-@return integer value */
+@return integer value */
UNIV_INLINE
lint
eval_node_get_int_val(
@@ -91,7 +89,7 @@ eval_node_copy_val(
que_node_t* node2); /*!< in: node to copy from */
/*****************************************************************//**
Gets a iboolean value from a query node.
-@return iboolean value */
+@return iboolean value */
UNIV_INLINE
ibool
eval_node_get_ibool_val(
@@ -99,16 +97,13 @@ eval_node_get_ibool_val(
que_node_t* node); /*!< in: query graph node */
/*****************************************************************//**
Evaluates a comparison node.
-@return the result of the comparison */
-UNIV_INTERN
+@return the result of the comparison */
ibool
eval_cmp(
/*=====*/
func_node_t* cmp_node); /*!< in: comparison node */
-#ifndef UNIV_NONINL
#include "eval0eval.ic"
-#endif
#endif
diff --git a/storage/innobase/include/eval0eval.ic b/storage/innobase/include/eval0eval.ic
index 47d70d59322..ae0887408b0 100644
--- a/storage/innobase/include/eval0eval.ic
+++ b/storage/innobase/include/eval0eval.ic
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1997, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1997, 2014, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -30,7 +30,6 @@ Created 12/29/1997 Heikki Tuuri
/*****************************************************************//**
Evaluates a function node. */
-UNIV_INTERN
void
eval_func(
/*======*/
@@ -41,8 +40,7 @@ NOTE that this memory must be explicitly freed when the query graph is
freed. If the node already has allocated buffer, that buffer is freed
here. NOTE that this is the only function where dynamic memory should be
allocated for a query node val field.
-@return pointer to allocated buffer */
-UNIV_INTERN
+@return pointer to allocated buffer */
byte*
eval_node_alloc_val_buf(
/*====================*/
@@ -54,7 +52,7 @@ eval_node_alloc_val_buf(
/*****************************************************************//**
Allocates a new buffer if needed.
-@return pointer to buffer */
+@return pointer to buffer */
UNIV_INLINE
byte*
eval_node_ensure_val_buf(
@@ -145,7 +143,7 @@ eval_node_set_int_val(
/*****************************************************************//**
Gets an integer non-SQL null value from an expression node.
-@return integer value */
+@return integer value */
UNIV_INLINE
lint
eval_node_get_int_val(
@@ -165,7 +163,7 @@ eval_node_get_int_val(
/*****************************************************************//**
Gets a iboolean value from a query node.
-@return iboolean value */
+@return iboolean value */
UNIV_INLINE
ibool
eval_node_get_ibool_val(
diff --git a/storage/innobase/include/eval0proc.h b/storage/innobase/include/eval0proc.h
index 6dfeff1cc89..71700bb5933 100644
--- a/storage/innobase/include/eval0proc.h
+++ b/storage/innobase/include/eval0proc.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1998, 2009, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1998, 2016, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -26,14 +26,13 @@ Created 1/20/1998 Heikki Tuuri
#ifndef eval0proc_h
#define eval0proc_h
-#include "univ.i"
#include "que0types.h"
#include "pars0sym.h"
#include "pars0pars.h"
/**********************************************************************//**
Performs an execution step of a procedure node.
-@return query thread to run next or NULL */
+@return query thread to run next or NULL */
UNIV_INLINE
que_thr_t*
proc_step(
@@ -41,39 +40,35 @@ proc_step(
que_thr_t* thr); /*!< in: query thread */
/**********************************************************************//**
Performs an execution step of an if-statement node.
-@return query thread to run next or NULL */
-UNIV_INTERN
+@return query thread to run next or NULL */
que_thr_t*
if_step(
/*====*/
que_thr_t* thr); /*!< in: query thread */
/**********************************************************************//**
Performs an execution step of a while-statement node.
-@return query thread to run next or NULL */
-UNIV_INTERN
+@return query thread to run next or NULL */
que_thr_t*
while_step(
/*=======*/
que_thr_t* thr); /*!< in: query thread */
/**********************************************************************//**
Performs an execution step of a for-loop node.
-@return query thread to run next or NULL */
-UNIV_INTERN
+@return query thread to run next or NULL */
que_thr_t*
for_step(
/*=====*/
que_thr_t* thr); /*!< in: query thread */
/**********************************************************************//**
Performs an execution step of an assignment statement node.
-@return query thread to run next or NULL */
-UNIV_INTERN
+@return query thread to run next or NULL */
que_thr_t*
assign_step(
/*========*/
que_thr_t* thr); /*!< in: query thread */
/**********************************************************************//**
Performs an execution step of a procedure call node.
-@return query thread to run next or NULL */
+@return query thread to run next or NULL */
UNIV_INLINE
que_thr_t*
proc_eval_step(
@@ -81,24 +76,19 @@ proc_eval_step(
que_thr_t* thr); /*!< in: query thread */
/**********************************************************************//**
Performs an execution step of an exit statement node.
-@return query thread to run next or NULL */
-UNIV_INTERN
+@return query thread to run next or NULL */
que_thr_t*
exit_step(
/*======*/
que_thr_t* thr); /*!< in: query thread */
/**********************************************************************//**
Performs an execution step of a return-statement node.
-@return query thread to run next or NULL */
-UNIV_INTERN
+@return query thread to run next or NULL */
que_thr_t*
return_step(
/*========*/
que_thr_t* thr); /*!< in: query thread */
-
-#ifndef UNIV_NONINL
#include "eval0proc.ic"
-#endif
#endif
diff --git a/storage/innobase/include/eval0proc.ic b/storage/innobase/include/eval0proc.ic
index c3ba8a97d3f..b0c5f75b94e 100644
--- a/storage/innobase/include/eval0proc.ic
+++ b/storage/innobase/include/eval0proc.ic
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1998, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1998, 2013, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -29,7 +29,7 @@ Created 1/20/1998 Heikki Tuuri
/**********************************************************************//**
Performs an execution step of a procedure node.
-@return query thread to run next or NULL */
+@return query thread to run next or NULL */
UNIV_INLINE
que_thr_t*
proc_step(
@@ -64,7 +64,7 @@ proc_step(
/**********************************************************************//**
Performs an execution step of a procedure call node.
-@return query thread to run next or NULL */
+@return query thread to run next or NULL */
UNIV_INLINE
que_thr_t*
proc_eval_step(
diff --git a/storage/innobase/include/fil0crypt.h b/storage/innobase/include/fil0crypt.h
index 2b4f924247c..3c56315ee9a 100644
--- a/storage/innobase/include/fil0crypt.h
+++ b/storage/innobase/include/fil0crypt.h
@@ -26,7 +26,11 @@ Created 04/01/2015 Jan Lindström
#ifndef fil0crypt_h
#define fil0crypt_h
-#include "os0sync.h"
+#ifndef UNIV_INNOCHECKSUM
+
+#include "os0event.h"
+#include "my_crypt.h"
+#endif /*! UNIV_INNOCHECKSUM */
/**
* Magic pattern in start of crypt data on page 0
@@ -71,12 +75,6 @@ struct key_struct
/** is encryption enabled */
extern ulong srv_encrypt_tables;
-#ifndef UNIV_INNOCHECKSUM
-#ifdef UNIV_PFS_MUTEX
-extern mysql_pfs_key_t fil_crypt_data_mutex_key;
-#endif
-#endif /* !UNIV_INNOCHECKSUM */
-
/** Mutex helper for crypt_data->scheme
@param[in, out] schme encryption scheme
@param[in] exit should we exit or enter mutex ? */
@@ -126,8 +124,7 @@ struct fil_space_crypt_t : st_encryption_scheme
{
key_id = new_key_id;
my_random_bytes(iv, sizeof(iv));
- mutex_create(fil_crypt_data_mutex_key,
- &mutex, SYNC_NO_ORDER_CHECK);
+ mutex_create(LATCH_ID_FIL_CRYPT_DATA_MUTEX, &mutex);
locker = crypt_data_scheme_locker;
type = new_type;
@@ -183,10 +180,17 @@ struct fil_space_crypt_t : st_encryption_scheme
return (encryption == FIL_ENCRYPTION_OFF);
}
+ /** Fill crypt data information to the give page.
+ It should be called during ibd file creation.
+ @param[in] flags tablespace flags
+ @param[in,out] page first page of the tablespace */
+ void fill_page0(ulint flags, byte* page);
+
/** Write crypt data to a page (0)
- @param[in,out] page0 Page 0 where to write
- @param[in,out] mtr Minitransaction */
- void write_page0(byte* page0, mtr_t* mtr);
+ @param[in] space tablespace
+ @param[in,out] page0 first page of the tablespace
+ @param[in,out] mtr mini-transaction */
+ void write_page0(const fil_space_t* space, byte* page0, mtr_t* mtr);
uint min_key_version; // min key version for this space
ulint page0_offset; // byte offset on page 0 for crypt data
@@ -211,7 +215,7 @@ struct fil_space_crypt_status_t {
uint min_key_version; /*!< min key version */
uint current_key_version;/*!< current key version */
uint keyserver_requests;/*!< no of key requests to key server */
- ulint key_id; /*!< current key_id */
+ uint key_id; /*!< current key_id */
bool rotating; /*!< is key rotation ongoing */
bool flushing; /*!< is flush at end of rotation ongoing */
ulint rotate_next_page_number; /*!< next page if key rotating */
@@ -251,7 +255,7 @@ UNIV_INTERN
void
fil_space_crypt_cleanup();
-/******************************************************************
+/**
Create a fil_space_crypt_t object
@param[in] encrypt_mode FIL_ENCRYPTION_DEFAULT or
FIL_ENCRYPTION_ON or
@@ -276,21 +280,17 @@ fil_space_merge_crypt_data(
fil_space_crypt_t* dst,
const fil_space_crypt_t* src);
-/******************************************************************
-Read crypt data from a page (0)
-@param[in] space space_id
-@param[in] page Page 0
-@param[in] offset Offset to crypt data
-@return crypt data from page 0 or NULL. */
+/** Initialize encryption parameters from a tablespace header page.
+@param[in] page_size page size of the tablespace
+@param[in] page first page of the tablespace
+@return crypt data from page 0
+@retval NULL if not present or not valid */
UNIV_INTERN
fil_space_crypt_t*
-fil_space_read_crypt_data(
- ulint space,
- const byte* page,
- ulint offset)
- MY_ATTRIBUTE((warn_unused_result));
+fil_space_read_crypt_data(const page_size_t& page_size, const byte* page)
+ MY_ATTRIBUTE((nonnull, warn_unused_result));
-/******************************************************************
+/**
Free a crypt data object
@param[in,out] crypt_data crypt data to be freed */
UNIV_INTERN
@@ -314,30 +314,28 @@ fil_parse_write_crypt_data(
dberr_t* err)
MY_ATTRIBUTE((warn_unused_result));
-/******************************************************************
-Encrypt a buffer
+/** Encrypt a buffer.
@param[in,out] crypt_data Crypt data
@param[in] space space_id
@param[in] offset Page offset
@param[in] lsn Log sequence number
@param[in] src_frame Page to encrypt
-@param[in] zip_size Compressed size or 0
+@param[in] page_size Page size
@param[in,out] dst_frame Output buffer
@return encrypted buffer or NULL */
-UNIV_INTERN
byte*
fil_encrypt_buf(
- fil_space_crypt_t* crypt_data,
- ulint space,
- ulint offset,
- lsn_t lsn,
- const byte* src_frame,
- ulint zip_size,
- byte* dst_frame)
+ fil_space_crypt_t* crypt_data,
+ ulint space,
+ ulint offset,
+ lsn_t lsn,
+ const byte* src_frame,
+ const page_size_t& page_size,
+ byte* dst_frame)
MY_ATTRIBUTE((warn_unused_result));
-/******************************************************************
-Encrypt a page
+/**
+Encrypt a page.
@param[in] space Tablespace
@param[in] offset Page offset
@@ -355,8 +353,8 @@ fil_space_encrypt(
byte* dst_frame)
MY_ATTRIBUTE((warn_unused_result));
-/******************************************************************
-Decrypt a page
+/**
+Decrypt a page.
@param[in,out] crypt_data crypt_data
@param[in] tmp_frame Temporary buffer
@param[in] page_size Page size
@@ -368,7 +366,7 @@ bool
fil_space_decrypt(
fil_space_crypt_t* crypt_data,
byte* tmp_frame,
- ulint page_size,
+ const page_size_t& page_size,
byte* src_frame,
dberr_t* err);
@@ -376,9 +374,7 @@ fil_space_decrypt(
Decrypt a page
@param[in] space Tablespace
@param[in] tmp_frame Temporary buffer used for decrypting
-@param[in] page_size Page size
@param[in,out] src_frame Page to decrypt
-@param[out] decrypted true if page was decrypted
@return decrypted page, or original not encrypted page if decryption is
not needed.*/
UNIV_INTERN
@@ -386,42 +382,22 @@ byte*
fil_space_decrypt(
const fil_space_t* space,
byte* tmp_frame,
- byte* src_frame,
- bool* decrypted)
+ byte* src_frame)
MY_ATTRIBUTE((warn_unused_result));
/******************************************************************
Calculate post encryption checksum
-@param[in] zip_size zip_size or 0
+@param[in] page_size page size
@param[in] dst_frame Block where checksum is calculated
@return page checksum or BUF_NO_CHECKSUM_MAGIC
not needed. */
UNIV_INTERN
-ulint
+uint32_t
fil_crypt_calculate_checksum(
- ulint zip_size,
- const byte* dst_frame)
+ const page_size_t& page_size,
+ const byte* dst_frame)
MY_ATTRIBUTE((warn_unused_result));
-#endif /* UNIV_INNOCHECKSUM */
-
-/*********************************************************************
-Verify that post encryption checksum match calculated checksum.
-This function should be called only if tablespace contains crypt_data
-metadata (this is strong indication that tablespace is encrypted).
-Function also verifies that traditional checksum does not match
-calculated checksum as if it does page could be valid unencrypted,
-encrypted, or corrupted.
-
-@param[in] page Page to verify
-@param[in] zip_size zip size
-@return whether the encrypted page is OK */
-UNIV_INTERN
-bool fil_space_verify_crypt_checksum(const byte* page, ulint zip_size)
- MY_ATTRIBUTE((warn_unused_result));
-
-#ifndef UNIV_INNOCHECKSUM
-
/*********************************************************************
Adjust thread count for key rotation
@param[in] enw_cnt Number of threads to be used */
@@ -493,7 +469,7 @@ void
fil_crypt_total_stat(
fil_crypt_stat_t *stat);
-/*********************************************************************
+/**
Get scrub status for a space (used by information_schema)
@param[in] space Tablespace
@@ -502,12 +478,25 @@ return 0 if data found */
UNIV_INTERN
void
fil_space_get_scrub_status(
- const fil_space_t* space,
- struct fil_space_scrub_status_t* status);
+ const fil_space_t* space,
+ fil_space_scrub_status_t* status);
-#ifndef UNIV_NONINL
#include "fil0crypt.ic"
-#endif
-
#endif /* !UNIV_INNOCHECKSUM */
+
+/**
+Verify that post encryption checksum match calculated checksum.
+This function should be called only if tablespace contains crypt_data
+metadata (this is strong indication that tablespace is encrypted).
+Function also verifies that traditional checksum does not match
+calculated checksum as if it does page could be valid unencrypted,
+encrypted, or corrupted.
+
+@param[in,out] page page frame (checksum is temporarily modified)
+@param[in] page_size page size
+@return true if page is encrypted AND OK, false otherwise */
+bool
+fil_space_verify_crypt_checksum(const byte* page, const page_size_t& page_size)
+ MY_ATTRIBUTE((warn_unused_result));
+
#endif /* fil0crypt_h */
diff --git a/storage/innobase/include/fil0crypt.ic b/storage/innobase/include/fil0crypt.ic
index 0c3b0114487..cc59b394368 100644
--- a/storage/innobase/include/fil0crypt.ic
+++ b/storage/innobase/include/fil0crypt.ic
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 2015, MariaDB Corporation.
+Copyright (c) 2015, 2017, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -17,7 +17,7 @@ this program; if not, write to the Free Software Foundation, Inc.,
*****************************************************************************/
/**************************************************//**
-@file include/fil0fil.h
+@file include/fil0crypt.ic
The low-level file system encryption support functions
Created 04/01/2015 Jan Lindström
@@ -34,3 +34,48 @@ fil_page_is_encrypted(
{
return(mach_read_from_4(buf+FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION) != 0);
}
+
+/*******************************************************************//**
+Get current encryption mode from crypt_data.
+@return string representation */
+UNIV_INLINE
+const char *
+fil_crypt_get_mode(
+/*===============*/
+ const fil_space_crypt_t* crypt_data)
+{
+ switch (crypt_data->encryption) {
+ case FIL_ENCRYPTION_DEFAULT:
+ return("Default tablespace encryption mode");
+ case FIL_ENCRYPTION_ON:
+ return("Tablespace encrypted");
+ case FIL_ENCRYPTION_OFF:
+ return("Tablespace not encrypted");
+ }
+
+ ut_error;
+ return ("NULL");
+}
+
+/*******************************************************************//**
+Get current encryption type from crypt_data.
+@return string representation */
+UNIV_INLINE
+const char *
+fil_crypt_get_type(
+ const fil_space_crypt_t* crypt_data)
+{
+ ut_ad(crypt_data != NULL);
+ switch (crypt_data->type) {
+ case CRYPT_SCHEME_UNENCRYPTED:
+ return("scheme unencrypted");
+ break;
+ case CRYPT_SCHEME_1:
+ return("scheme encrypted");
+ break;
+ default:
+ ut_error;
+ }
+
+ return ("NULL");
+}
diff --git a/storage/innobase/include/fil0fil.h b/storage/innobase/include/fil0fil.h
index 4bd394ff94d..9c722944665 100644
--- a/storage/innobase/include/fil0fil.h
+++ b/storage/innobase/include/fil0fil.h
@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 1995, 2017, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2013, 2019, MariaDB Corporation.
+Copyright (c) 2013, 2020, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -26,56 +26,299 @@ Created 10/25/1995 Heikki Tuuri
#ifndef fil0fil_h
#define fil0fil_h
-#include "univ.i"
#ifndef UNIV_INNOCHECKSUM
+#include "log0recv.h"
#include "dict0types.h"
-#include "ut0byte.h"
-#include "os0file.h"
-#include "hash0hash.h"
-#ifndef UNIV_HOTBACKUP
-#include "sync0rw.h"
+#include "page0size.h"
#include "ibuf0types.h"
-#include "log0log.h"
-#endif /* !UNIV_HOTBACKUP */
+#include "intrusive_list.h"
#include <list>
+struct unflushed_spaces_tag_t;
+struct rotation_list_tag_t;
+
// Forward declaration
+extern ibool srv_use_doublewrite_buf;
+extern struct buf_dblwr_t* buf_dblwr;
+class page_id_t;
struct trx_t;
+class truncate_t;
+
+typedef std::list<char*, ut_allocator<char*> > space_name_list_t;
+
+/** Structure containing encryption specification */
+struct fil_space_crypt_t;
-typedef std::list<const char*> space_name_list_t;
+/** File types */
+enum fil_type_t {
+ /** temporary tablespace (temporary undo log or tables) */
+ FIL_TYPE_TEMPORARY,
+ /** a tablespace that is being imported (no logging until finished) */
+ FIL_TYPE_IMPORT,
+ /** persistent tablespace (for system, undo log or tables) */
+ FIL_TYPE_TABLESPACE,
+ /** redo log covering changes to files of FIL_TYPE_TABLESPACE */
+ FIL_TYPE_LOG
+};
+
+/** Check if fil_type is any of FIL_TYPE_TEMPORARY, FIL_TYPE_IMPORT
+or FIL_TYPE_TABLESPACE.
+@param[in] type variable of type fil_type_t
+@return true if any of FIL_TYPE_TEMPORARY, FIL_TYPE_IMPORT
+or FIL_TYPE_TABLESPACE */
+inline
+bool
+fil_type_is_data(
+ fil_type_t type)
+{
+ return(type == FIL_TYPE_TEMPORARY
+ || type == FIL_TYPE_IMPORT
+ || type == FIL_TYPE_TABLESPACE);
+}
+
+struct fil_node_t;
+
+/** Tablespace or log data space */
+struct fil_space_t : intrusive::list_node<unflushed_spaces_tag_t>,
+ intrusive::list_node<rotation_list_tag_t>
+{
+ ulint id; /*!< space id */
+ hash_node_t hash; /*!< hash chain node */
+ char* name; /*!< Tablespace name */
+ hash_node_t name_hash;/*!< hash chain the name_hash table */
+ lsn_t max_lsn;
+ /*!< LSN of the most recent
+ fil_names_write_if_was_clean().
+ Reset to 0 by fil_names_clear().
+ Protected by log_sys->mutex.
+ If and only if this is nonzero, the
+ tablespace will be in named_spaces. */
+ /** Log sequence number of the latest MLOG_INDEX_LOAD record
+ that was found while parsing the redo log */
+ lsn_t enable_lsn;
+ bool stop_new_ops;
+ /*!< we set this true when we start
+ deleting a single-table tablespace.
+ When this is set following new ops
+ are not allowed:
+ * read IO request
+ * ibuf merge
+ * file flush
+ Note that we can still possibly have
+ new write operations because we don't
+ check this flag when doing flush
+ batches. */
+ /** whether undo tablespace truncation is in progress */
+ bool is_being_truncated;
+#ifdef UNIV_DEBUG
+ ulint redo_skipped_count;
+ /*!< reference count for operations who want
+ to skip redo log in the file space in order
+ to make modify_check() pass. */
+#endif
+ fil_type_t purpose;/*!< purpose */
+ UT_LIST_BASE_NODE_T(fil_node_t) chain;
+ /*!< base node for the file chain */
+ ulint size; /*!< tablespace file size in pages;
+ 0 if not known yet */
+ ulint size_in_header;
+ /* FSP_SIZE in the tablespace header;
+ 0 if not known yet */
+ ulint free_len;
+ /*!< length of the FSP_FREE list */
+ ulint free_limit;
+ /*!< contents of FSP_FREE_LIMIT */
+ ulint recv_size;
+ /*!< recovered tablespace size in pages;
+ 0 if no size change was read from the redo log,
+ or if the size change was implemented */
+ ulint flags; /*!< FSP_SPACE_FLAGS and FSP_FLAGS_MEM_ flags;
+ see fsp0types.h,
+ fsp_flags_is_valid(),
+ page_size_t(ulint) (constructor) */
+ ulint n_reserved_extents;
+ /*!< number of reserved free extents for
+ ongoing operations like B-tree page split */
+ ulint n_pending_flushes; /*!< this is positive when flushing
+ the tablespace to disk; dropping of the
+ tablespace is forbidden if this is positive */
+ /** Number of pending buffer pool operations accessing the tablespace
+ without holding a table lock or dict_operation_lock S-latch
+ that would prevent the table (and tablespace) from being
+ dropped. An example is change buffer merge.
+ The tablespace cannot be dropped while this is nonzero,
+ or while fil_node_t::n_pending is nonzero.
+ Protected by fil_system->mutex. */
+ ulint n_pending_ops;
+ /** Number of pending block read or write operations
+ (when a write is imminent or a read has recently completed).
+ The tablespace object cannot be freed while this is nonzero,
+ but it can be detached from fil_system.
+ Note that fil_node_t::n_pending tracks actual pending I/O requests.
+ Protected by fil_system->mutex. */
+ ulint n_pending_ios;
+ rw_lock_t latch; /*!< latch protecting the file space storage
+ allocation */
+ UT_LIST_NODE_T(fil_space_t) named_spaces;
+ /*!< list of spaces for which MLOG_FILE_NAME
+ records have been issued */
+ /** Checks that this tablespace in a list of unflushed tablespaces.
+ @return true if in a list */
+ bool is_in_unflushed_spaces() const;
+ UT_LIST_NODE_T(fil_space_t) space_list;
+ /*!< list of all spaces */
+ /** Checks that this tablespace needs key rotation.
+ @return true if in a rotation list */
+ bool is_in_rotation_list() const;
+
+ /** MariaDB encryption data */
+ fil_space_crypt_t* crypt_data;
+
+ /** True if the device this filespace is on supports atomic writes */
+ bool atomic_write_supported;
+
+ /** Release the reserved free extents.
+ @param[in] n_reserved number of reserved extents */
+ void release_free_extents(ulint n_reserved);
+
+ /** True if file system storing this tablespace supports
+ punch hole */
+ bool punch_hole;
+
+ ulint magic_n;/*!< FIL_SPACE_MAGIC_N */
+
+ /** @return whether the tablespace is about to be dropped */
+ bool is_stopping() const { return stop_new_ops; }
+
+ /** @return whether doublewrite buffering is needed */
+ bool use_doublewrite() const
+ {
+ return !atomic_write_supported
+ && srv_use_doublewrite_buf && buf_dblwr;
+ }
+
+ /** Append a file to the chain of files of a space.
+ @param[in] name file name of a file that is not open
+ @param[in] handle file handle, or OS_FILE_CLOSED
+ @param[in] size file size in entire database pages
+ @param[in] is_raw whether this is a raw device
+ @param[in] atomic_write true if atomic write could be enabled
+ @param[in] max_pages maximum number of pages in file,
+ or ULINT_MAX for unlimited
+ @return file object */
+ fil_node_t* add(const char* name, pfs_os_file_t handle,
+ ulint size, bool is_raw, bool atomic_write,
+ ulint max_pages = ULINT_MAX);
+#ifdef UNIV_DEBUG
+ /** Assert that the mini-transaction is compatible with
+ updating an allocation bitmap page.
+ @param[in] mtr mini-transaction */
+ void modify_check(const mtr_t& mtr) const;
+#endif /* UNIV_DEBUG */
+};
+
+/** Value of fil_space_t::magic_n */
+#define FIL_SPACE_MAGIC_N 89472
+
+/** File node of a tablespace or the log data space */
+struct fil_node_t {
+ /** tablespace containing this file */
+ fil_space_t* space;
+ /** file name; protected by fil_system->mutex and log_sys->mutex. */
+ char* name;
+ /** file handle (valid if is_open) */
+ pfs_os_file_t handle;
+ /** whether the file actually is a raw device or disk partition */
+ bool is_raw_disk;
+ /** size of the file in database pages (0 if not known yet);
+ the possible last incomplete megabyte may be ignored
+ if space->id == 0 */
+ ulint size;
+ /** initial size of the file in database pages;
+ FIL_IBD_FILE_INITIAL_SIZE by default */
+ ulint init_size;
+ /** maximum size of the file in database pages (0 if unlimited) */
+ ulint max_size;
+ /** count of pending i/o's; is_open must be true if nonzero */
+ ulint n_pending;
+ /** count of pending flushes; is_open must be true if nonzero */
+ ulint n_pending_flushes;
+ /** whether the file is currently being extended */
+ bool being_extended;
+ /** whether this file had writes after lasy fsync() */
+ bool needs_flush;
+ /** link to other files in this tablespace */
+ UT_LIST_NODE_T(fil_node_t) chain;
+ /** link to the fil_system->LRU list (keeping track of open files) */
+ UT_LIST_NODE_T(fil_node_t) LRU;
+
+ /** whether this file could use atomic write (data file) */
+ bool atomic_write;
+
+ /** Filesystem block size */
+ ulint block_size;
+
+ /** FIL_NODE_MAGIC_N */
+ ulint magic_n;
+
+ /** @return whether this file is open */
+ bool is_open() const
+ {
+ return(handle != OS_FILE_CLOSED);
+ }
+
+ /** Read the first page of a data file.
+ @param[in] first whether this is the very first read
+ @return whether the page was found valid */
+ bool read_page0(bool first);
+};
+
+/** Value of fil_node_t::magic_n */
+#define FIL_NODE_MAGIC_N 89389
+
+/** Common InnoDB file extentions */
+enum ib_extention {
+ NO_EXT = 0,
+ IBD = 1,
+ ISL = 2,
+ CFG = 3
+};
+extern const char* dot_ext[];
+#define DOT_IBD dot_ext[IBD]
+#define DOT_ISL dot_ext[ISL]
+#define DOT_CFG dot_ext[CFG]
/** When mysqld is run, the default directory "." is the mysqld datadir,
but in the MySQL Embedded Server Library and mysqlbackup it is not the default
directory, and we must set the base file path explicitly */
extern const char* fil_path_to_mysql_datadir;
-/** Initial size of a single-table tablespace in pages */
-#define FIL_IBD_FILE_INITIAL_SIZE 4
-
-/** 'null' (undefined) page offset in the context of file spaces */
-#define FIL_NULL ULINT32_UNDEFINED
-
/* Space address data type; this is intended to be used when
addresses accurate to a byte are stored in file pages. If the page part
of the address is FIL_NULL, the address is considered undefined. */
typedef byte fil_faddr_t; /*!< 'type' definition in C: an address
stored in a file page is a string of bytes */
-
#endif /* !UNIV_INNOCHECKSUM */
+/** Initial size of a single-table tablespace in pages */
+#define FIL_IBD_FILE_INITIAL_SIZE 4
+
+/** 'null' (undefined) page offset in the context of file spaces */
+#define FIL_NULL ULINT32_UNDEFINED
+
+
#define FIL_ADDR_PAGE 0 /* first in address is the page offset */
#define FIL_ADDR_BYTE 4 /* then comes 2-byte byte offset within page*/
-
#define FIL_ADDR_SIZE 6 /* address size is 6 bytes */
#ifndef UNIV_INNOCHECKSUM
/** File space address */
-struct fil_addr_t{
+struct fil_addr_t {
ulint page; /*!< page number within a space */
ulint boffset; /*!< byte offset within the page */
};
@@ -134,11 +377,17 @@ extern fil_addr_t fil_addr_null;
used to encrypt the page + 32-bit checksum
or 64 bits of zero if no encryption
*/
-#define FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID 34 /*!< starting from 4.1.x this
- contains the space id of the page */
+
+/** This overloads FIL_PAGE_FILE_FLUSH_LSN for RTREE Split Sequence Number */
+#define FIL_RTREE_SPLIT_SEQ_NUM FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION
+
+/** starting from 4.1.x this contains the space id of the page */
+#define FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID 34
+
#define FIL_PAGE_SPACE_ID FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID
-#define FIL_PAGE_DATA 38 /*!< start of the data on the page */
+#define FIL_PAGE_DATA 38U /*!< start of the data on the page */
+
/* Following are used when page compression is used */
#define FIL_PAGE_COMPRESSED_SIZE 2 /*!< Number of bytes used to store
actual payload data size on
@@ -160,6 +409,7 @@ extern fil_addr_t fil_addr_null;
then encrypted */
#define FIL_PAGE_PAGE_COMPRESSED 34354 /*!< page compressed page */
#define FIL_PAGE_INDEX 17855 /*!< B-tree node */
+#define FIL_PAGE_RTREE 17854 /*!< B-tree node */
#define FIL_PAGE_UNDO_LOG 2 /*!< Undo log page */
#define FIL_PAGE_INODE 3 /*!< Index node */
#define FIL_PAGE_IBUF_FREE_LIST 4 /*!< Insert buffer free list */
@@ -173,17 +423,29 @@ extern fil_addr_t fil_addr_null;
#define FIL_PAGE_TYPE_BLOB 10 /*!< Uncompressed BLOB page */
#define FIL_PAGE_TYPE_ZBLOB 11 /*!< First compressed BLOB page */
#define FIL_PAGE_TYPE_ZBLOB2 12 /*!< Subsequent compressed BLOB page */
-#define FIL_PAGE_TYPE_LAST FIL_PAGE_TYPE_ZBLOB2
+#define FIL_PAGE_TYPE_UNKNOWN 13 /*!< In old tablespaces, garbage
+ in FIL_PAGE_TYPE is replaced with this
+ value when flushing pages. */
+
+/* File page types introduced in MySQL 5.7, not supported in MariaDB */
+//#define FIL_PAGE_COMPRESSED 14
+//#define FIL_PAGE_ENCRYPTED 15
+//#define FIL_PAGE_COMPRESSED_AND_ENCRYPTED 16
+//#define FIL_PAGE_ENCRYPTED_RTREE 17
+
+/** Used by i_s.cc to index into the text description. */
+#define FIL_PAGE_TYPE_LAST FIL_PAGE_TYPE_UNKNOWN
/*!< Last page type */
/* @} */
-/** Space types @{ */
-#define FIL_TABLESPACE 501 /*!< tablespace */
-#define FIL_LOG 502 /*!< redo log */
-/* @} */
+/** macro to check whether the page type is index (Btree or Rtree) type */
+#define fil_page_type_is_index(page_type) \
+ (page_type == FIL_PAGE_INDEX || page_type == FIL_PAGE_RTREE)
-/** Structure containing encryption specification */
-struct fil_space_crypt_t;
+/** Check whether the page is index page (either regular Btree index or Rtree
+index */
+#define fil_page_index_page_check(page) \
+ fil_page_type_is_index(fil_page_get_type(page))
/** Enum values for encryption table option */
enum fil_encryption_t {
@@ -195,6 +457,8 @@ enum fil_encryption_t {
FIL_ENCRYPTION_OFF
};
+#ifndef UNIV_INNOCHECKSUM
+
/** The number of fsyncs done to the log */
extern ulint fil_n_log_flushes;
@@ -203,180 +467,30 @@ extern ulint fil_n_pending_log_flushes;
/** Number of pending tablespace flushes */
extern ulint fil_n_pending_tablespace_flushes;
-/** Number of files currently open */
-extern ulint fil_n_file_opened;
-
-#ifndef UNIV_INNOCHECKSUM
-
-struct fil_space_t;
-
-struct fsp_open_info {
- ibool success; /*!< Has the tablespace been opened? */
- const char* check_msg; /*!< fil_check_first_page() message */
- ibool valid; /*!< Is the tablespace valid? */
- pfs_os_file_t file; /*!< File handle */
- char* filepath; /*!< File path to open */
- ulint id; /*!< Space ID */
- ulint flags; /*!< Tablespace flags */
- ulint encryption_error; /*!< if an encryption error occurs */
-#ifdef UNIV_LOG_ARCHIVE
- ulint arch_log_no; /*!< latest archived log file number */
-#endif /* UNIV_LOG_ARCHIVE */
- fil_space_crypt_t* crypt_data; /*!< crypt data */
- dict_table_t* table; /*!< table */
-};
-
-/** File node of a tablespace or the log data space */
-struct fil_node_t {
- fil_space_t* space; /*!< backpointer to the space where this node
- belongs */
- char* name; /*!< path to the file */
- ibool open; /*!< TRUE if file open */
- pfs_os_file_t handle; /*!< OS handle to the file, if file open */
- os_event_t sync_event;/*!< Condition event to group and
- serialize calls to fsync;
- os_event_set() and os_event_reset()
- are protected by fil_system_t::mutex */
- ibool is_raw_disk;/*!< TRUE if the 'file' is actually a raw
- device or a raw disk partition */
- ulint size; /*!< size of the file in database pages, 0 if
- not known yet; the possible last incomplete
- megabyte may be ignored if space == 0 */
- ulint n_pending;
- /*!< count of pending i/o's on this file;
- closing of the file is not allowed if
- this is > 0 */
- ulint n_pending_flushes;
- /*!< count of pending flushes on this file;
- closing of the file is not allowed if
- this is > 0 */
- ibool being_extended;
- /*!< TRUE if the node is currently
- being extended. */
- ib_int64_t modification_counter;/*!< when we write to the file we
- increment this by one */
- ib_int64_t flush_counter;/*!< up to what
- modification_counter value we have
- flushed the modifications to disk */
- ulint file_block_size;/*!< file system block size */
- UT_LIST_NODE_T(fil_node_t) chain;
- /*!< link field for the file chain */
- UT_LIST_NODE_T(fil_node_t) LRU;
- /*!< link field for the LRU list */
- ulint magic_n;/*!< FIL_NODE_MAGIC_N */
-};
-
-/** Value of fil_node_t::magic_n */
-#define FIL_NODE_MAGIC_N 89389
-
-/** Tablespace or log data space: let us call them by a common name space */
-struct fil_space_t {
- char* name; /*!< space name = the path to the first file in
- it */
- hash_node_t name_hash;/*!< hash chain the name_hash table */
- ulint id; /*!< space id */
- hash_node_t hash; /*!< hash chain node */
- ib_int64_t tablespace_version;
- /*!< in DISCARD/IMPORT this timestamp
- is used to check if we should ignore
- an insert buffer merge request for a
- page because it actually was for the
- previous incarnation of the space */
- bool stop_new_ops;
- /*!< we set this TRUE when we start
- deleting a single-table tablespace.
- When this is set following new ops
- are not allowed:
- * read IO request
- * ibuf merge
- * file flush
- Note that we can still possibly have
- new write operations because we don't
- check this flag when doing flush
- batches. */
- ulint purpose;/*!< FIL_TABLESPACE, FIL_LOG, or
- FIL_ARCH_LOG */
- UT_LIST_BASE_NODE_T(fil_node_t) chain;
- /*!< base node for the file chain */
- ulint size; /*!< space size in pages; 0 if a single-table
- tablespace whose size we do not know yet;
- last incomplete megabytes in data files may be
- ignored if space == 0 */
- ulint recv_size;
- /*!< recovered tablespace size in pages;
- 0 if no size change was read from the redo log,
- or if the size change was implemented */
- ulint flags; /*!< FSP_SPACE_FLAGS and FSP_FLAGS_MEM_ flags;
- see fsp0fsp.h,
- fsp_flags_is_valid(),
- fsp_flags_get_zip_size() */
- ulint n_reserved_extents;
- /*!< number of reserved free extents for
- ongoing operations like B-tree page split */
- ulint n_pending_flushes; /*!< this is positive when flushing
- the tablespace to disk; dropping of the
- tablespace is forbidden if this is positive */
- /** Number of pending buffer pool operations accessing the tablespace
- without holding a table lock or dict_operation_lock S-latch
- that would prevent the table (and tablespace) from being
- dropped. An example is change buffer merge.
- The tablespace cannot be dropped while this is nonzero,
- or while fil_node_t::n_pending is nonzero.
- Protected by fil_system->mutex. */
- ulint n_pending_ops;
- /** Number of pending block read or write operations
- (when a write is imminent or a read has recently completed).
- The tablespace object cannot be freed while this is nonzero,
- but it can be detached from fil_system.
- Note that fil_node_t::n_pending tracks actual pending I/O requests.
- Protected by fil_system->mutex. */
- ulint n_pending_ios;
-#ifndef UNIV_HOTBACKUP
- rw_lock_t latch; /*!< latch protecting the file space storage
- allocation */
-#endif /* !UNIV_HOTBACKUP */
- UT_LIST_NODE_T(fil_space_t) unflushed_spaces;
- /*!< list of spaces with at least one unflushed
- file we have written to */
- bool is_in_unflushed_spaces;
- /*!< true if this space is currently in
- unflushed_spaces */
- fil_space_crypt_t* crypt_data;
- /*!< tablespace crypt data or NULL */
- ulint file_block_size;
- /*!< file system block size */
-
- UT_LIST_NODE_T(fil_space_t) space_list;
- /*!< list of all spaces */
-
- /*!< Protected by fil_system */
- UT_LIST_NODE_T(fil_space_t) rotation_list;
- /*!< list of spaces needing
- key rotation */
-
- bool is_in_rotation_list;
- /*!< true if this space is
- currently in key rotation list */
-
- ulint magic_n;/*!< FIL_SPACE_MAGIC_N */
-
- /** @return whether the tablespace is about to be dropped or truncated */
- bool is_stopping() const
- {
- return stop_new_ops;
- }
-};
-
-/** Value of fil_space_t::magic_n */
-#define FIL_SPACE_MAGIC_N 89472
+/** Look up a tablespace.
+The caller should hold an InnoDB table lock or a MDL that prevents
+the tablespace from being dropped during the operation,
+or the caller should be in single-threaded crash recovery mode
+(no user connections that could drop tablespaces).
+If this is not the case, fil_space_acquire() and fil_space_release()
+should be used instead.
+@param[in] id tablespace ID
+@return tablespace, or NULL if not found */
+fil_space_t*
+fil_space_get(
+ ulint id)
+ MY_ATTRIBUTE((warn_unused_result));
/** The tablespace memory cache; also the totality of logs (the log
data space) is stored here; below we talk about tablespaces, but also
the ib_logfiles form a 'space' and it is handled here */
struct fil_system_t {
-#ifndef UNIV_HOTBACKUP
- ib_mutex_t mutex; /*!< The mutex protecting the cache */
-#endif /* !UNIV_HOTBACKUP */
+ fil_system_t()
+ : n_open(0), max_assigned_id(0), space_id_reuse_warned(false)
+ {
+ }
+
+ ib_mutex_t mutex; /*!< The mutex protecting the cache */
hash_table_t* spaces; /*!< The hash table of spaces in the
system; they are hashed on the space
id */
@@ -393,148 +507,147 @@ struct fil_system_t {
not put to this list: they are opened
after the startup, and kept open until
shutdown */
- UT_LIST_BASE_NODE_T(fil_space_t) unflushed_spaces;
- /*!< base node for the list of those
+ intrusive::list<fil_space_t, unflushed_spaces_tag_t> unflushed_spaces;
+ /*!< list of those
tablespaces whose files contain
unflushed writes; those spaces have
at least one file node where
- modification_counter > flush_counter */
+ needs_flush == true */
ulint n_open; /*!< number of files currently open */
ulint max_n_open; /*!< n_open is not allowed to exceed
this */
- ib_int64_t modification_counter;/*!< when we write to a file we
- increment this by one */
ulint max_assigned_id;/*!< maximum space id in the existing
tables, or assigned during the time
mysqld has been up; at an InnoDB
startup we scan the data dictionary
and set here the maximum of the
space id's of the tables there */
- ib_int64_t tablespace_version;
- /*!< a counter which is incremented for
- every space object memory creation;
- every space mem object gets a
- 'timestamp' from this; in DISCARD/
- IMPORT this is used to check if we
- should ignore an insert buffer merge
- request */
UT_LIST_BASE_NODE_T(fil_space_t) space_list;
/*!< list of all file spaces */
-
- UT_LIST_BASE_NODE_T(fil_space_t) rotation_list;
+ UT_LIST_BASE_NODE_T(fil_space_t) named_spaces;
+ /*!< list of all file spaces
+ for which a MLOG_FILE_NAME
+ record has been written since
+ the latest redo log checkpoint.
+ Protected only by log_sys->mutex. */
+ intrusive::list<fil_space_t, rotation_list_tag_t> rotation_list;
/*!< list of all file spaces needing
key rotation.*/
- ibool space_id_reuse_warned;
+ bool space_id_reuse_warned;
/* !< TRUE if fil_space_create()
has issued a warning about
potential space_id reuse */
+
+ /** Trigger a call to fil_node_t::read_page0()
+ @param[in] id tablespace identifier
+ @return tablespace
+ @retval NULL if the tablespace does not exist or cannot be read */
+ fil_space_t* read_page0(ulint id);
+
+ /** Return the next fil_space_t from key rotation list.
+ Once started, the caller must keep calling this until it returns NULL.
+ fil_space_acquire() and fil_space_release() are invoked here which
+ blocks a concurrent operation from dropping the tablespace.
+ @param[in] prev_space Previous tablespace or NULL to start
+ from beginning of fil_system->rotation
+ list
+ @param[in] recheck recheck of the tablespace is needed or
+ still encryption thread does write page0
+ for it
+ @param[in] key_version key version of the key state thread
+ If NULL, use the first fil_space_t on fil_system->space_list.
+ @return pointer to the next fil_space_t.
+ @retval NULL if this was the last */
+ fil_space_t* keyrotate_next(
+ fil_space_t* prev_space,
+ bool remove,
+ uint key_version);
};
/** The tablespace memory cache. This variable is NULL before the module is
initialized. */
extern fil_system_t* fil_system;
-#ifndef UNIV_HOTBACKUP
-/*******************************************************************//**
-Returns the version number of a tablespace, -1 if not found.
-@return version number, -1 if the tablespace does not exist in the
-memory cache */
-UNIV_INTERN
-ib_int64_t
-fil_space_get_version(
-/*==================*/
- ulint id); /*!< in: space id */
-/*******************************************************************//**
-Returns the latch of a file space.
-@return latch protecting storage allocation */
-UNIV_INTERN
+#include "fil0crypt.h"
+
+/** Returns the latch of a file space.
+@param[in] id space id
+@param[out] flags tablespace flags
+@return latch protecting storage allocation */
rw_lock_t*
fil_space_get_latch(
-/*================*/
- ulint id, /*!< in: space id */
- ulint* zip_size);/*!< out: compressed page size, or
- 0 for uncompressed tablespaces */
-/*******************************************************************//**
-Returns the type of a file space.
-@return FIL_TABLESPACE or FIL_LOG */
-UNIV_INTERN
-ulint
-fil_space_get_type(
-/*===============*/
- ulint id); /*!< in: space id */
-
-#endif /* !UNIV_HOTBACKUP */
-/*******************************************************************//**
-Appends a new file to the chain of files of a space. File must be closed.
-@return pointer to the file name, or NULL on error */
-UNIV_INTERN
-char*
-fil_node_create(
-/*============*/
- const char* name, /*!< in: file name (file must be closed) */
- ulint size, /*!< in: file size in database blocks, rounded
- downwards to an integer */
- ulint id, /*!< in: space id where to append */
- ibool is_raw) /*!< in: TRUE if a raw device or
- a raw disk partition */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
+ ulint id,
+ ulint* flags);
-#ifdef UNIV_LOG_ARCHIVE
-/****************************************************************//**
-Drops files from the start of a file space, so that its size is cut by
-the amount given. */
-UNIV_INTERN
+/** Gets the type of a file space.
+@param[in] id tablespace identifier
+@return file type */
+fil_type_t
+fil_space_get_type(
+ ulint id);
+
+/** Note that a tablespace has been imported.
+It is initially marked as FIL_TYPE_IMPORT so that no logging is
+done during the import process when the space ID is stamped to each page.
+Now we change it to FIL_SPACE_TABLESPACE to start redo and undo logging.
+NOTE: temporary tablespaces are never imported.
+@param[in] id tablespace identifier */
void
-fil_space_truncate_start(
-/*=====================*/
- ulint id, /*!< in: space id */
- ulint trunc_len); /*!< in: truncate by this much; it is an error
- if this does not equal to the combined size of
- some initial files in the space */
-#endif /* UNIV_LOG_ARCHIVE */
-/*******************************************************************//**
-Creates a space memory object and puts it to the 'fil system' hash table.
-If there is an error, prints an error message to the .err log.
-@param[in] name Space name
-@param[in] id Space id
-@param[in] flags Tablespace flags
-@param[in] purpose FIL_TABLESPACE or FIL_LOG if log
-@param[in] crypt_data Encryption information
-@param[in] create_table True if this is create table
-@param[in] mode Encryption mode
-@return TRUE if success */
-UNIV_INTERN
-bool
+fil_space_set_imported(
+ ulint id);
+
+/** Create a space memory object and put it to the fil_system hash table.
+Error messages are issued to the server log.
+@param[in] name tablespace name
+@param[in] id tablespace identifier
+@param[in] flags tablespace flags
+@param[in] purpose tablespace purpose
+@param[in,out] crypt_data encryption information
+@param[in] mode encryption mode
+@return pointer to created tablespace, to be filled in with fil_space_t::add()
+@retval NULL on failure (such as when the same tablespace exists) */
+fil_space_t*
fil_space_create(
const char* name,
ulint id,
ulint flags,
- ulint purpose,
+ fil_type_t purpose,
fil_space_crypt_t* crypt_data,
- bool create_table,
- fil_encryption_t mode = FIL_ENCRYPTION_DEFAULT);
+ fil_encryption_t mode = FIL_ENCRYPTION_DEFAULT)
+ MY_ATTRIBUTE((warn_unused_result));
/*******************************************************************//**
Assigns a new space id for a new single-table tablespace. This works simply by
incrementing the global counter. If 4 billion id's is not enough, we may need
to recycle id's.
-@return TRUE if assigned, FALSE if not */
-UNIV_INTERN
-ibool
+@return true if assigned, false if not */
+bool
fil_assign_new_space_id(
/*====================*/
ulint* space_id); /*!< in/out: space id */
-/*******************************************************************//**
-Returns the path from the first fil_node_t found for the space ID sent.
+
+/** Frees a space object from the tablespace memory cache.
+Closes the files in the chain but does not delete them.
+There must not be any pending i/o's or flushes on the files.
+@param[in] id tablespace identifier
+@param[in] x_latched whether the caller holds X-mode space->latch
+@return true if success */
+bool
+fil_space_free(
+ ulint id,
+ bool x_latched);
+
+/** Returns the path from the first fil_node_t found with this space ID.
The caller is responsible for freeing the memory allocated here for the
value returned.
-@return a copy of fil_node_t::path, NULL if space is zero or not found. */
-UNIV_INTERN
+@param[in] id Tablespace ID
+@return own: A copy of fil_node_t::path, NULL if space ID is zero
+or not found. */
char*
fil_space_get_first_path(
-/*=====================*/
- ulint id); /*!< in: space id */
+ ulint id);
+
/** Set the recovered size of a tablespace in pages.
@param id tablespace ID
@param size recovered size in pages */
@@ -544,8 +657,7 @@ fil_space_set_recv_size(ulint id, ulint size);
/*******************************************************************//**
Returns the size of the space in pages. The tablespace must be cached in the
memory cache.
-@return space size, 0 if space not found */
-UNIV_INTERN
+@return space size, 0 if space not found */
ulint
fil_space_get_size(
/*===============*/
@@ -553,34 +665,37 @@ fil_space_get_size(
/*******************************************************************//**
Returns the flags of the space. The tablespace must be cached
in the memory cache.
-@return flags, ULINT_UNDEFINED if space not found */
-UNIV_INTERN
+@return flags, ULINT_UNDEFINED if space not found */
ulint
fil_space_get_flags(
/*================*/
ulint id); /*!< in: space id */
-/*******************************************************************//**
-Returns the compressed page size of the space, or 0 if the space
-is not compressed. The tablespace must be cached in the memory cache.
-@return compressed page size, ULINT_UNDEFINED if space not found */
-UNIV_INTERN
-ulint
-fil_space_get_zip_size(
-/*===================*/
- ulint id); /*!< in: space id */
-/*******************************************************************//**
-Checks if the pair space, page_no refers to an existing page in a tablespace
-file space. The tablespace must be cached in the memory cache.
-@return TRUE if the address is meaningful */
-UNIV_INTERN
-ibool
-fil_check_adress_in_tablespace(
-/*===========================*/
- ulint id, /*!< in: space id */
- ulint page_no);/*!< in: page number */
+
+/** Open each fil_node_t of a named fil_space_t if not already open.
+@param[in] name Tablespace name
+@return true if all file nodes are opened. */
+bool
+fil_space_open(
+ const char* name);
+
+/** Close each fil_node_t of a named fil_space_t if open.
+@param[in] name Tablespace name */
+void
+fil_space_close(
+ const char* name);
+
+/** Returns the page size of the space and whether it is compressed or not.
+The tablespace must be cached in the memory cache.
+@param[in] id space id
+@param[out] found true if tablespace was found
+@return page size */
+const page_size_t
+fil_space_get_page_size(
+ ulint id,
+ bool* found);
+
/****************************************************************//**
Initializes the tablespace memory cache. */
-UNIV_INTERN
void
fil_init(
/*=====*/
@@ -588,7 +703,6 @@ fil_init(
ulint max_n_open); /*!< in: max number of open files */
/*******************************************************************//**
Initializes the tablespace memory cache. */
-UNIV_INTERN
void
fil_close(void);
/*===========*/
@@ -598,21 +712,18 @@ database server shutdown. This should be called at a server startup after the
space objects for the log and the system tablespace have been created. The
purpose of this operation is to make sure we never run out of file descriptors
if we need to read from the insert buffer or to write to the log. */
-UNIV_INTERN
void
fil_open_log_and_system_tablespace_files(void);
/*==========================================*/
/*******************************************************************//**
Closes all open files. There must not be any pending i/o's or not flushed
modifications in the files. */
-UNIV_INTERN
void
fil_close_all_files(void);
/*=====================*/
/*******************************************************************//**
Closes the redo log files. There must not be any pending i/o's or not
flushed modifications in the files. */
-UNIV_INTERN
void
fil_close_log_files(
/*================*/
@@ -620,14 +731,11 @@ fil_close_log_files(
/*******************************************************************//**
Sets the max tablespace id counter if the given number is bigger than the
previous value. */
-UNIV_INTERN
void
fil_set_max_space_id_if_bigger(
/*===========================*/
ulint max_id);/*!< in: maximum known id */
-#ifndef UNIV_HOTBACKUP
-
/** Write the flushed LSN to the page header of the first page in the
system tablespace.
@param[in] lsn flushed LSN
@@ -635,7 +743,7 @@ system tablespace.
dberr_t
fil_write_flushed_lsn(
lsn_t lsn)
- MY_ATTRIBUTE((warn_unused_result));
+MY_ATTRIBUTE((warn_unused_result));
/** Acquire a tablespace when it could be dropped concurrently.
Used by background threads that do not necessarily hold proper locks
@@ -653,15 +761,13 @@ fil_space_acquire_low(ulint id, bool silent)
Used by background threads that do not necessarily hold proper locks
for concurrency control.
@param[in] id tablespace ID
-@param[in] for_io whether to look up the tablespace while performing I/O
- (possibly executing TRUNCATE)
@return the tablespace
@retval NULL if missing or being deleted or truncated */
inline
fil_space_t*
fil_space_acquire(ulint id)
{
- return(fil_space_acquire_low(id, false));
+ return (fil_space_acquire_low(id, false));
}
/** Acquire a tablespace that may not exist.
@@ -674,12 +780,11 @@ inline
fil_space_t*
fil_space_acquire_silent(ulint id)
{
- return(fil_space_acquire_low(id, true));
+ return (fil_space_acquire_low(id, true));
}
/** Release a tablespace acquired with fil_space_acquire().
@param[in,out] space tablespace to release */
-UNIV_INTERN
void
fil_space_release(fil_space_t* space);
@@ -688,13 +793,11 @@ when it could be dropped concurrently.
@param[in] id tablespace ID
@return the tablespace
@retval NULL if missing */
-UNIV_INTERN
fil_space_t*
fil_space_acquire_for_io(ulint id);
/** Release a tablespace acquired with fil_space_acquire_for_io().
@param[in,out] space tablespace to release */
-UNIV_INTERN
void
fil_space_release_for_io(fil_space_t* space);
@@ -706,7 +809,6 @@ blocks a concurrent operation from dropping the tablespace.
If NULL, use the first fil_space_t on fil_system->space_list.
@return pointer to the next fil_space_t.
@retval NULL if this was the last */
-UNIV_INTERN
fil_space_t*
fil_space_next(
fil_space_t* prev_space)
@@ -716,14 +818,15 @@ fil_space_next(
Once started, the caller must keep calling this until it returns NULL.
fil_space_acquire() and fil_space_release() are invoked here which
blocks a concurrent operation from dropping the tablespace.
-@param[in,out] prev_space Pointer to the previous fil_space_t.
+@param[in] prev_space Previous tablespace or NULL to start
+ from beginning of fil_system->rotation list
+@param[in] remove Whether to remove the previous tablespace from
+ the rotation list
If NULL, use the first fil_space_t on fil_system->space_list.
@return pointer to the next fil_space_t.
@retval NULL if this was the last*/
-UNIV_INTERN
fil_space_t*
-fil_space_keyrotate_next(
- fil_space_t* prev_space)
+fil_space_keyrotate_next(fil_space_t* prev_space, bool remove)
MY_ATTRIBUTE((warn_unused_result));
/** Wrapper with reference-counting for a fil_space_t. */
@@ -737,7 +840,7 @@ public:
/** Constructor: Look up the tablespace and increment the
reference count if found.
@param[in] space_id tablespace ID
- @param[in] silent whether not to print any errors */
+ @param[in] silent whether not to display errors */
explicit FilSpace(ulint space_id, bool silent = false)
: m_space(fil_space_acquire_low(space_id, silent)) {}
@@ -769,6 +872,13 @@ public:
return(m_space);
}
+ /** Member accessor
+ @return the wrapped object */
+ const fil_space_t* operator->() const
+ {
+ return(m_space);
+ }
+
/** Explicit type conversion
@return the wrapped object */
const fil_space_t* operator()() const
@@ -781,68 +891,65 @@ private:
fil_space_t* m_space;
};
-/** Reads the flushed lsn, arch no, space_id and tablespace flag fields from
-the first page of a first data file at database startup.
-@param[in] data_file open data file
-@param[in] one_read_only true if first datafile is already
- read
-@param[out] flags FSP_SPACE_FLAGS
-@param[out] space_id tablepspace ID
-@param[out] min_arch_log_no min of archived log numbers in
- data files
-@param[out] max_arch_log_no max of archived log numbers in
- data files
-@param[out] flushed_lsn flushed lsn value
-@param[out] crypt_data encryption crypt data
-@param[in] check_first_page true if first page contents
- should be checked
-@retval NULL on success, or if innodb_force_recovery is set
-@return pointer to an error message string */
-UNIV_INTERN
-const char*
-fil_read_first_page(
- pfs_os_file_t data_file,
- ibool one_read_already,
- ulint* flags,
- ulint* space_id,
-#ifdef UNIV_LOG_ARCHIVE
- ulint* min_arch_log_no,
- ulint* max_arch_log_no,
-#endif /* UNIV_LOG_ARCHIVE */
- lsn_t* flushed_lsn,
- fil_space_crypt_t** crypt_data,
- bool check_first_page=true)
+/********************************************************//**
+Creates the database directory for a table if it does not exist yet. */
+void
+fil_create_directory_for_tablename(
+/*===============================*/
+ const char* name); /*!< in: name in the standard
+ 'databasename/tablename' format */
+/** Write redo log for renaming a file.
+@param[in] space_id tablespace id
+@param[in] old_name tablespace file name
+@param[in] new_name tablespace file name after renaming */
+void
+fil_name_write_rename(
+ ulint space_id,
+ const char* old_name,
+ const char* new_name);
+/********************************************************//**
+Recreates table indexes by applying
+TRUNCATE log record during recovery.
+@return DB_SUCCESS or error code */
+dberr_t
+fil_recreate_table(
+/*===============*/
+ ulint space_id, /*!< in: space id */
+ ulint format_flags, /*!< in: page format */
+ ulint flags, /*!< in: tablespace flags */
+ const char* name, /*!< in: table name */
+ truncate_t& truncate); /*!< in/out: The information of
+ TRUNCATE log record */
+/********************************************************//**
+Recreates the tablespace and table indexes by applying
+TRUNCATE log record during recovery.
+@return DB_SUCCESS or error code */
+dberr_t
+fil_recreate_tablespace(
+/*====================*/
+ ulint space_id, /*!< in: space id */
+ ulint format_flags, /*!< in: page format */
+ ulint flags, /*!< in: tablespace flags */
+ const char* name, /*!< in: table name */
+ truncate_t& truncate, /*!< in/out: The information of
+ TRUNCATE log record */
+ lsn_t recv_lsn); /*!< in: the end LSN of
+ the log record */
+/** Replay a file rename operation if possible.
+@param[in] space_id tablespace identifier
+@param[in] first_page_no first page number in the file
+@param[in] name old file name
+@param[in] new_name new file name
+@return whether the operation was successfully applied
+(the name did not exist, or new_name did not exist and
+name was successfully renamed to new_name) */
+bool
+fil_op_replay_rename(
+ ulint space_id,
+ ulint first_page_no,
+ const char* name,
+ const char* new_name)
MY_ATTRIBUTE((warn_unused_result));
-#endif /* !UNIV_HOTBACKUP */
-
-/*******************************************************************//**
-Parses the body of a log record written about an .ibd file operation. That is,
-the log record part after the standard (type, space id, page no) header of the
-log record.
-
-If desired, also replays the delete or rename operation if the .ibd file
-exists and the space id in it matches. Replays the create operation if a file
-at that path does not exist yet. If the database directory for the file to be
-created does not exist, then we create the directory, too.
-
-Note that mysqlbackup --apply-log sets fil_path_to_mysql_datadir to point to
-the datadir that we should use in replaying the file operations.
-@return end of log record, or NULL if the record was not completely
-contained between ptr and end_ptr */
-UNIV_INTERN
-byte*
-fil_op_log_parse_or_replay(
-/*=======================*/
- byte* ptr, /*!< in: buffer containing the log record body,
- or an initial segment of it, if the record does
- not fir completely between ptr and end_ptr */
- byte* end_ptr, /*!< in: buffer end */
- ulint type, /*!< in: the type of this log record */
- ulint space_id, /*!< in: the space id of the tablespace in
- question, or 0 if the log record should
- only be parsed but not replayed */
- ulint log_flags); /*!< in: redo log flags
- (stored in the page number parameter) */
/** Determine whether a table can be accessed in operations that are
not (necessarily) protected by meta-data locks.
@@ -852,27 +959,66 @@ but only by InnoDB table locks, which may be broken by
lock_remove_all_on_table().)
@param[in] table persistent table
checked @return whether the table is accessible */
-UNIV_INTERN bool fil_table_accessible(const dict_table_t* table)
+bool fil_table_accessible(const dict_table_t* table)
MY_ATTRIBUTE((warn_unused_result, nonnull));
/** Delete a tablespace and associated .ibd file.
@param[in] id tablespace identifier
-@param[in] drop_ahi whether to drop the adaptive hash index
+@param[in] if_exists whether to ignore missing tablespace
@return DB_SUCCESS or error */
-UNIV_INTERN
+dberr_t fil_delete_tablespace(ulint id, bool if_exists= false);
+
+/** Prepare to truncate an undo tablespace.
+@param[in] space_id undo tablespace id
+@return the tablespace
+@retval NULL if the tablespace does not exist */
+fil_space_t* fil_truncate_prepare(ulint space_id);
+
+/** Write log about an undo tablespace truncate operation. */
+void fil_truncate_log(fil_space_t* space, ulint size, mtr_t* mtr)
+ MY_ATTRIBUTE((nonnull));
+
+/** Truncate the tablespace to needed size.
+@param[in] space_id id of tablespace to truncate
+@param[in] size_in_pages truncate size.
+@return true if truncate was successful. */
+bool
+fil_truncate_tablespace(
+ ulint space_id,
+ ulint size_in_pages);
+
+/*******************************************************************//**
+Prepare for truncating a single-table tablespace. The tablespace
+must be cached in the memory cache.
+1) Check pending operations on a tablespace;
+2) Remove all insert buffer entries for the tablespace;
+@return DB_SUCCESS or error */
dberr_t
-fil_delete_tablespace(ulint id, bool drop_ahi = false);
+fil_prepare_for_truncate(
+/*=====================*/
+ ulint id); /*!< in: space id */
+
+/** Reinitialize the original tablespace header with the same space id
+for single tablespace
+@param[in] table table belongs to the tablespace
+@param[in] size size in blocks
+@param[in] trx Transaction covering truncate */
+void
+fil_reinit_space_header_for_table(
+ dict_table_t* table,
+ ulint size,
+ trx_t* trx);
+
/*******************************************************************//**
Closes a single-table tablespace. The tablespace must be cached in the
memory cache. Free all pages used by the tablespace.
-@return DB_SUCCESS or error */
-UNIV_INTERN
+@return DB_SUCCESS or error */
dberr_t
fil_close_tablespace(
/*=================*/
trx_t* trx, /*!< in/out: Transaction covering the close */
ulint id); /*!< in: space id */
-#ifndef UNIV_HOTBACKUP
+
/*******************************************************************//**
Discards a single-table tablespace. The tablespace must be cached in the
memory cache. Discarding is like deleting a tablespace, but
@@ -885,15 +1031,13 @@ memory cache. Discarding is like deleting a tablespace, but
3. When the user does IMPORT TABLESPACE, the tablespace will have the
same id as it originally had.
- 4. Free all the pages in use by the tablespace if rename=TRUE.
-@return DB_SUCCESS or error */
-UNIV_INTERN
+ 4. Free all the pages in use by the tablespace if rename=true.
+@return DB_SUCCESS or error */
dberr_t
fil_discard_tablespace(
/*===================*/
ulint id) /*!< in: space id */
MY_ATTRIBUTE((warn_unused_result));
-#endif /* !UNIV_HOTBACKUP */
/** Test if a tablespace file can be renamed to a new filepath by checking
if that the old filepath exists and the new filepath does not exist.
@@ -901,113 +1045,67 @@ if that the old filepath exists and the new filepath does not exist.
@param[in] old_path old filepath
@param[in] new_path new filepath
@param[in] is_discarded whether the tablespace is discarded
+@param[in] replace_new whether to ignore the existence of new_path
@return innodb error code */
dberr_t
fil_rename_tablespace_check(
ulint space_id,
const char* old_path,
const char* new_path,
- bool is_discarded);
+ bool is_discarded,
+ bool replace_new = false);
-/*******************************************************************//**
-Renames a single-table tablespace. The tablespace must be cached in the
-tablespace memory cache.
-@return TRUE if success */
-UNIV_INTERN
-ibool
+/** Rename a single-table tablespace.
+The tablespace must exist in the memory cache.
+@param[in] id tablespace identifier
+@param[in] old_path old file name
+@param[in] new_name new table name in the
+databasename/tablename format
+@param[in] new_path_in new file name,
+or NULL if it is located in the normal data directory
+@return true if success */
+bool
fil_rename_tablespace(
-/*==================*/
- const char* old_name_in, /*!< in: old table name in the
- standard databasename/tablename
- format of InnoDB, or NULL if we
- do the rename based on the space
- id only */
- ulint id, /*!< in: space id */
- const char* new_name, /*!< in: new table name in the
- standard databasename/tablename
- format of InnoDB */
- const char* new_path); /*!< in: new full datafile path
- if the tablespace is remotely
- located, or NULL if it is located
- in the normal data directory. */
+ ulint id,
+ const char* old_path,
+ const char* new_name,
+ const char* new_path_in);
/*******************************************************************//**
-Allocates a file name for a single-table tablespace. The string must be freed
-by caller with mem_free().
-@return own: file name */
-UNIV_INTERN
-char*
-fil_make_ibd_name(
-/*==============*/
- const char* name, /*!< in: table name or a dir path */
- bool is_full_path); /*!< in: TRUE if it is a dir path */
-/*******************************************************************//**
-Allocates a file name for a tablespace ISL file (InnoDB Symbolic Link).
-The string must be freed by caller with mem_free().
-@return own: file name */
-UNIV_INTERN
-char*
-fil_make_isl_name(
-/*==============*/
- const char* name); /*!< in: table name */
-/*******************************************************************//**
-Creates a new InnoDB Symbolic Link (ISL) file. It is always created
-under the 'datadir' of MySQL. The datadir is the directory of a
-running mysqld program. We can refer to it by simply using the path '.'.
-@return DB_SUCCESS or error code */
-UNIV_INTERN
-dberr_t
-fil_create_link_file(
-/*=================*/
- const char* tablename, /*!< in: tablename */
- const char* filepath); /*!< in: pathname of tablespace */
-/*******************************************************************//**
-Deletes an InnoDB Symbolic Link (ISL) file. */
-UNIV_INTERN
-void
-fil_delete_link_file(
-/*==================*/
- const char* tablename); /*!< in: name of table */
-/*******************************************************************//**
-Reads an InnoDB Symbolic Link (ISL) file.
-It is always created under the 'datadir' of MySQL. The name is of the
-form {databasename}/{tablename}. and the isl file is expected to be in a
-'{databasename}' directory called '{tablename}.isl'. The caller must free
-the memory of the null-terminated path returned if it is not null.
-@return own: filepath found in link file, NULL if not found. */
-UNIV_INTERN
+Allocates and builds a file name from a path, a table or tablespace name
+and a suffix. The string must be freed by caller with ut_free().
+@param[in] path NULL or the direcory path or the full path and filename.
+@param[in] name NULL if path is full, or Table/Tablespace name
+@param[in] suffix NULL or the file extention to use.
+@return own: file name */
char*
-fil_read_link_file(
-/*===============*/
- const char* name); /*!< in: tablespace name */
-
-#include "fil0crypt.h"
-
-/*******************************************************************//**
-Creates a new single-table tablespace to a database directory of MySQL.
-Database directories are under the 'datadir' of MySQL. The datadir is the
-directory of a running mysqld program. We can refer to it by simply the
-path '.'. Tables created with CREATE TEMPORARY TABLE we place in the temp
-dir of the mysqld server.
-@return DB_SUCCESS or error code */
-UNIV_INTERN
+fil_make_filepath(
+ const char* path,
+ const char* name,
+ ib_extention suffix,
+ bool strip_name);
+
+/** Create a tablespace file.
+@param[in] space_id Tablespace ID
+@param[in] name Tablespace name in dbname/tablename format.
+@param[in] path Path and filename of the datafile to create.
+@param[in] flags Tablespace flags
+@param[in] size Initial size of the tablespace file in pages,
+must be >= FIL_IBD_FILE_INITIAL_SIZE
+@param[in] mode MariaDB encryption mode
+@param[in] key_id MariaDB encryption key_id
+@return DB_SUCCESS or error code */
dberr_t
-fil_create_new_single_table_tablespace(
-/*===================================*/
- ulint space_id, /*!< in: space id */
- const char* tablename, /*!< in: the table name in the usual
- databasename/tablename format
- of InnoDB */
- const char* dir_path, /*!< in: NULL or a dir path */
- ulint flags, /*!< in: tablespace flags */
- ulint flags2, /*!< in: table flags2 */
- ulint size, /*!< in: the initial size of the
- tablespace file in pages,
- must be >= FIL_IBD_FILE_INITIAL_SIZE */
- fil_encryption_t mode, /*!< in: encryption mode */
- ulint key_id) /*!< in: encryption key_id */
+fil_ibd_create(
+ ulint space_id,
+ const char* name,
+ const char* path,
+ ulint flags,
+ ulint size,
+ fil_encryption_t mode,
+ uint32_t key_id)
MY_ATTRIBUTE((nonnull(2), warn_unused_result));
-#ifndef UNIV_HOTBACKUP
+
/** Try to adjust FSP_SPACE_FLAGS if they differ from the expectations.
(Typically when upgrading from MariaDB 10.1.0..10.1.20.)
@param[in] space_id tablespace ID
@@ -1030,113 +1128,96 @@ If the validate boolean is set, we read the first page of the file and
check that the space id in the file is what we expect. We assume that
this function runs much faster if no check is made, since accessing the
file inode probably is much faster (the OS caches them) than accessing
-the first page of the file. This boolean may be initially FALSE, but if
+the first page of the file. This boolean may be initially false, but if
a remote tablespace is found it will be changed to true.
If the fix_dict boolean is set, then it is safe to use an internal SQL
statement to update the dictionary tables if they are incorrect.
-@return DB_SUCCESS or error code */
-UNIV_INTERN
+@param[in] validate true if we should validate the tablespace
+@param[in] fix_dict true if the dictionary is available to be fixed
+@param[in] purpose FIL_TYPE_TABLESPACE or FIL_TYPE_TEMPORARY
+@param[in] id tablespace ID
+@param[in] flags expected FSP_SPACE_FLAGS
+@param[in] space_name tablespace name of the datafile
+If file-per-table, it is the table name in the databasename/tablename format
+@param[in] path_in expected filepath, usually read from dictionary
+@return DB_SUCCESS or error code */
dberr_t
-fil_open_single_table_tablespace(
-/*=============================*/
- bool validate, /*!< in: Do we validate tablespace? */
- bool fix_dict, /*!< in: Can we fix the dictionary? */
- ulint id, /*!< in: space id */
- ulint flags, /*!< in: expected FSP_SPACE_FLAGS */
- const char* tablename, /*!< in: table name in the
- databasename/tablename format */
- const char* filepath) /*!< in: tablespace filepath */
- __attribute__((nonnull(5), warn_unused_result));
-
-#endif /* !UNIV_HOTBACKUP */
-/********************************************************************//**
-At the server startup, if we need crash recovery, scans the database
-directories under the MySQL datadir, looking for .ibd files. Those files are
-single-table tablespaces. We need to know the space id in each of them so that
-we know into which file we should look to check the contents of a page stored
-in the doublewrite buffer, also to know where to apply log records where the
-space id is != 0.
-@return DB_SUCCESS or error number */
-UNIV_INTERN
-dberr_t
-fil_load_single_table_tablespaces(void);
-/*===================================*/
-/*******************************************************************//**
-Returns TRUE if a single-table tablespace does not exist in the memory cache,
-or is being deleted there.
-@return TRUE if does not exist or is being deleted */
-UNIV_INTERN
-ibool
-fil_tablespace_deleted_or_being_deleted_in_mem(
-/*===========================================*/
- ulint id, /*!< in: space id */
- ib_int64_t version);/*!< in: tablespace_version should be this; if
- you pass -1 as the value of this, then this
- parameter is ignored */
-/*******************************************************************//**
-Returns TRUE if a single-table tablespace exists in the memory cache.
-@return TRUE if exists */
-UNIV_INTERN
-ibool
-fil_tablespace_exists_in_mem(
-/*=========================*/
- ulint id); /*!< in: space id */
-#ifndef UNIV_HOTBACKUP
-/** Check if a matching tablespace exists in the InnoDB tablespace memory
-cache. Note that if we have not done a crash recovery at the database startup,
-there may be many tablespaces which are not yet in the memory cache.
-@return whether a matching tablespace exists in the memory cache */
-UNIV_INTERN
+fil_ibd_open(
+ bool validate,
+ bool fix_dict,
+ fil_type_t purpose,
+ ulint id,
+ ulint flags,
+ const char* tablename,
+ const char* path_in)
+ MY_ATTRIBUTE((warn_unused_result));
+
+enum fil_load_status {
+ /** The tablespace file(s) were found and valid. */
+ FIL_LOAD_OK,
+ /** The name no longer matches space_id */
+ FIL_LOAD_ID_CHANGED,
+ /** The file(s) were not found */
+ FIL_LOAD_NOT_FOUND,
+ /** The file(s) were not valid */
+ FIL_LOAD_INVALID
+};
+
+/** Open a single-file tablespace and add it to the InnoDB data structures.
+@param[in] space_id tablespace ID
+@param[in] filename path/to/databasename/tablename.ibd
+@param[out] space the tablespace, or NULL on error
+@return status of the operation */
+enum fil_load_status
+fil_ibd_load(
+ ulint space_id,
+ const char* filename,
+ fil_space_t*& space)
+ MY_ATTRIBUTE((warn_unused_result));
+
+
+/***********************************************************************//**
+A fault-tolerant function that tries to read the next file name in the
+directory. We retry 100 times if os_file_readdir_next_file() returns -1. The
+idea is to read as much good data as we can and jump over bad data.
+@return 0 if ok, -1 if error even after the retries, 1 if at the end
+of the directory */
+int
+fil_file_readdir_next_file(
+/*=======================*/
+ dberr_t* err, /*!< out: this is set to DB_ERROR if an error
+ was encountered, otherwise not changed */
+ const char* dirname,/*!< in: directory name or path */
+ os_file_dir_t dir, /*!< in: directory stream */
+ os_file_stat_t* info); /*!< in/out: buffer where the
+ info is returned */
+/** Determine if a matching tablespace exists in the InnoDB tablespace
+memory cache. Note that if we have not done a crash recovery at the database
+startup, there may be many tablespaces which are not yet in the memory cache.
+@param[in] id Tablespace ID
+@param[in] name Tablespace name used in fil_space_create().
+@param[in] table_flags table flags
+@return true if a matching tablespace exists in the memory cache */
bool
fil_space_for_table_exists_in_mem(
-/*==============================*/
- ulint id, /*!< in: space id */
- const char* name, /*!< in: table name in the standard
- 'databasename/tablename' format */
- bool print_error_if_does_not_exist,
- /*!< in: print detailed error
- information to the .err log if a
- matching tablespace is not found from
- memory */
- bool adjust_space, /*!< in: whether to adjust space id
- when find table space mismatch */
- mem_heap_t* heap, /*!< in: heap memory */
- table_id_t table_id, /*!< in: table id */
- ulint table_flags); /*!< in: table flags */
-#else /* !UNIV_HOTBACKUP */
-/********************************************************************//**
-Extends all tablespaces to the size stored in the space header. During the
-mysqlbackup --apply-log phase we extended the spaces on-demand so that log
-records could be appllied, but that may have left spaces still too small
-compared to the size stored in the space header. */
-UNIV_INTERN
-void
-fil_extend_tablespaces_to_stored_len(void);
-/*======================================*/
-#endif /* !UNIV_HOTBACKUP */
-/**********************************************************************//**
-Tries to extend a data file so that it would accommodate the number of pages
-given. The tablespace must be cached in the memory cache. If the space is big
-enough already, does nothing.
-@return TRUE if success */
-UNIV_INTERN
-ibool
-fil_extend_space_to_desired_size(
-/*=============================*/
- ulint* actual_size, /*!< out: size of the space after extension;
- if we ran out of disk space this may be lower
- than the desired size */
- ulint space_id, /*!< in: space id */
- ulint size_after_extend);/*!< in: desired size in pages after the
- extension; if the current space size is bigger
- than this already, the function does nothing */
+ ulint id,
+ const char* name,
+ ulint table_flags);
+
+/** Try to extend a tablespace if it is smaller than the specified size.
+@param[in,out] space tablespace
+@param[in] size desired size in pages
+@return whether the tablespace is at least as big as requested */
+bool
+fil_space_extend(
+ fil_space_t* space,
+ ulint size);
/*******************************************************************//**
Tries to reserve free extents in a file space.
-@return TRUE if succeed */
-UNIV_INTERN
-ibool
+@return true if succeed */
+bool
fil_space_reserve_free_extents(
/*===========================*/
ulint id, /*!< in: space id */
@@ -1144,7 +1225,6 @@ fil_space_reserve_free_extents(
ulint n_to_reserve); /*!< in: how many one wants to reserve */
/*******************************************************************//**
Releases free extents in a file space. */
-UNIV_INTERN
void
fil_space_release_free_extents(
/*===========================*/
@@ -1153,56 +1233,47 @@ fil_space_release_free_extents(
/*******************************************************************//**
Gets the number of reserved extents. If the database is silent, this number
should be zero. */
-UNIV_INTERN
ulint
fil_space_get_n_reserved_extents(
/*=============================*/
ulint id); /*!< in: space id */
-/********************************************************************//**
-Reads or writes data. This operation is asynchronous (aio).
-@return DB_SUCCESS, or DB_TABLESPACE_DELETED if we are trying to do
-i/o on a tablespace which does not exist */
-UNIV_INTERN
+
+/** Reads or writes data. This operation could be asynchronous (aio).
+
+@param[in] type IO context
+@param[in] sync true if synchronous aio is desired
+@param[in] page_id page id
+@param[in] page_size page size
+@param[in] byte_offset remainder of offset in bytes; in aio this
+ must be divisible by the OS block size
+@param[in] len how many bytes to read or write; this must
+ not cross a file boundary; in aio this must
+ be a block size multiple
+@param[in,out] buf buffer where to store read data or from where
+ to write; in aio this must be appropriately
+ aligned
+@param[in] message message for aio handler if non-sync aio
+ used, else ignored
+@param[in] ignore_missing_space true=ignore missing space during read
+@return DB_SUCCESS, DB_TABLESPACE_DELETED or DB_TABLESPACE_TRUNCATED
+if we are trying to do i/o on a tablespace which does not exist */
dberr_t
fil_io(
-/*===*/
- ulint type, /*!< in: OS_FILE_READ or OS_FILE_WRITE,
- ORed to OS_FILE_LOG, if a log i/o
- and ORed to OS_AIO_SIMULATED_WAKE_LATER
- if simulated aio and we want to post a
- batch of i/os; NOTE that a simulated batch
- may introduce hidden chances of deadlocks,
- because i/os are not actually handled until
- all have been posted: use with great
- caution! */
- bool sync, /*!< in: true if synchronous aio is desired */
- ulint space_id, /*!< in: space id */
- ulint zip_size, /*!< in: compressed page size in bytes;
- 0 for uncompressed pages */
- ulint block_offset, /*!< in: offset in number of blocks */
- ulint byte_offset, /*!< in: remainder of offset in bytes; in
- aio this must be divisible by the OS block
- size */
- ulint len, /*!< in: how many bytes to read or write; this
- must not cross a file boundary; in aio this
- must be a block size multiple */
- void* buf, /*!< in/out: buffer where to store read data
- or from where to write; in aio this must be
- appropriately aligned */
- void* message, /*!< in: message for aio handler if non-sync
- aio used, else ignored */
- ulint* write_size) /*!< in/out: Actual write size initialized
- after fist successfull trim
- operation for this page and if
- initialized we do not trim again if
- actual page size does not decrease. */
- __attribute__((nonnull(8)));
+ const IORequest& type,
+ bool sync,
+ const page_id_t page_id,
+ const page_size_t& page_size,
+ ulint byte_offset,
+ ulint len,
+ void* buf,
+ void* message,
+ bool ignore_missing_space = false);
+
/**********************************************************************//**
Waits for an aio operation to complete. This function is used to write the
handler for completed requests. The aio array of pending requests is divided
into segments (see os0file.cc for more info). The thread specifies which
segment it wants to wait for. */
-UNIV_INTERN
void
fil_aio_wait(
/*=========*/
@@ -1211,7 +1282,6 @@ fil_aio_wait(
/**********************************************************************//**
Flushes to disk possible writes cached by the OS. If the space does not exist
or is being dropped, does not do anything. */
-UNIV_INTERN
void
fil_flush(
/*======*/
@@ -1219,100 +1289,106 @@ fil_flush(
log files or a tablespace of the database) */
/** Flush a tablespace.
@param[in,out] space tablespace to flush */
-UNIV_INTERN
void
fil_flush(fil_space_t* space);
/** Flush to disk the writes in file spaces of the given type
possibly cached by the OS.
@param[in] purpose FIL_TYPE_TABLESPACE or FIL_TYPE_LOG */
-UNIV_INTERN
void
-fil_flush_file_spaces(ulint purpose);
+fil_flush_file_spaces(
+ fil_type_t purpose);
/******************************************************************//**
Checks the consistency of the tablespace cache.
-@return TRUE if ok */
-UNIV_INTERN
-ibool
+@return true if ok */
+bool
fil_validate(void);
/*==============*/
/********************************************************************//**
-Returns TRUE if file address is undefined.
-@return TRUE if undefined */
-UNIV_INTERN
-ibool
+Returns true if file address is undefined.
+@return true if undefined */
+bool
fil_addr_is_null(
/*=============*/
fil_addr_t addr); /*!< in: address */
/********************************************************************//**
Get the predecessor of a file page.
-@return FIL_PAGE_PREV */
-UNIV_INTERN
+@return FIL_PAGE_PREV */
ulint
fil_page_get_prev(
/*==============*/
const byte* page); /*!< in: file page */
/********************************************************************//**
Get the successor of a file page.
-@return FIL_PAGE_NEXT */
-UNIV_INTERN
+@return FIL_PAGE_NEXT */
ulint
fil_page_get_next(
/*==============*/
const byte* page); /*!< in: file page */
/*********************************************************************//**
Sets the file page type. */
-UNIV_INTERN
void
fil_page_set_type(
/*==============*/
byte* page, /*!< in/out: file page */
ulint type); /*!< in: type */
-/*********************************************************************//**
-Gets the file page type.
-@return type; NOTE that if the type has not been written to page, the
-return value not defined */
-UNIV_INTERN
-ulint
-fil_page_get_type(
-/*==============*/
- const byte* page); /*!< in: file page */
-/*******************************************************************//**
-Returns TRUE if a single-table tablespace is being deleted.
-@return TRUE if being deleted */
-UNIV_INTERN
-ibool
-fil_tablespace_is_being_deleted(
-/*============================*/
- ulint id); /*!< in: space id */
+#ifdef UNIV_DEBUG
+/** Increase redo skipped of a tablespace.
+@param[in] id space id */
+void
+fil_space_inc_redo_skipped_count(
+ ulint id);
+
+/** Decrease redo skipped of a tablespace.
+@param[in] id space id */
+void
+fil_space_dec_redo_skipped_count(
+ ulint id);
+#endif
/********************************************************************//**
Delete the tablespace file and any related files like .cfg.
This should not be called for temporary tables. */
-UNIV_INTERN
void
fil_delete_file(
/*============*/
const char* path); /*!< in: filepath of the ibd tablespace */
-/*******************************************************************//**
-Checks if a single-table tablespace for a given table name exists in the
-tablespace memory cache.
-@return space id, ULINT_UNDEFINED if not found */
-UNIV_INTERN
+/********************************************************************//**
+Looks for a pre-existing fil_space_t with the given tablespace ID
+and, if found, returns the name and filepath in newly allocated buffers that the caller must free.
+@param[in] space_id The tablespace ID to search for.
+@param[out] name Name of the tablespace found.
+@param[out] fileapth The filepath of the first datafile for thtablespace found.
+@return true if tablespace is found, false if not. */
+bool
+fil_space_read_name_and_filepath(
+ ulint space_id,
+ char** name,
+ char** filepath);
+
+/** Convert a file name to a tablespace name.
+@param[in] filename directory/databasename/tablename.ibd
+@return database/tablename string, to be freed with ut_free() */
+char*
+fil_path_to_space_name(
+ const char* filename);
+
+/** Returns the space ID based on the tablespace name.
+The tablespace must be found in the tablespace memory cache.
+This call is made from external to this module, so the mutex is not owned.
+@param[in] tablespace Tablespace name
+@return space ID if tablespace found, ULINT_UNDEFINED if space not. */
ulint
-fil_get_space_id_for_table(
-/*=======================*/
- const char* name); /*!< in: table name in the standard
- 'databasename/tablename' format */
+fil_space_get_id_by_name(
+ const char* tablespace);
/**
Iterate over all the spaces in the space list and fetch the
tablespace names. It will return a copy of the name that must be
freed by the caller using: delete[].
@return DB_SUCCESS if all OK. */
-UNIV_INTERN
dberr_t
fil_get_space_names(
/*================*/
@@ -1326,43 +1402,19 @@ fil_get_space_names(
@param[in] tmp_name temporary table name
@param[in,out] mtr mini-transaction
@return innodb error code */
-UNIV_INTERN
dberr_t
fil_mtr_rename_log(
const dict_table_t* old_table,
const dict_table_t* new_table,
const char* tmp_name,
mtr_t* mtr)
- MY_ATTRIBUTE((nonnull));
-
-/*******************************************************************//**
-Finds the given page_no of the given space id from the double write buffer,
-and copies it to the corresponding .ibd file.
-@return true if copy was successful, or false. */
-bool
-fil_user_tablespace_restore_page(
-/*==============================*/
- fsp_open_info* fsp, /* in: contains space id and .ibd
- file information */
- ulint page_no); /* in: page_no to obtain from double
- write buffer */
-
-/*******************************************************************//**
-Returns a pointer to the file_space_t that is in the memory cache
-associated with a space id.
-@return file_space_t pointer, NULL if space not found */
-fil_space_t*
-fil_space_get(
-/*==========*/
- ulint id); /*!< in: space id */
-#endif /* !UNIV_INNOCHECKSUM */
+ MY_ATTRIBUTE((nonnull, warn_unused_result));
/** Acquire the fil_system mutex. */
#define fil_system_enter() mutex_enter(&fil_system->mutex)
/** Release the fil_system mutex. */
#define fil_system_exit() mutex_exit(&fil_system->mutex)
-#ifndef UNIV_INNOCHECKSUM
/*******************************************************************//**
Returns the table space by a given id, NULL if not found. */
fil_space_t*
@@ -1370,6 +1422,92 @@ fil_space_get_by_id(
/*================*/
ulint id); /*!< in: space id */
+/** Look up a tablespace.
+@param[in] name tablespace name
+@return tablespace
+@retval NULL if not found */
+fil_space_t*
+fil_space_get_by_name(const char* name);
+
+/*******************************************************************//**
+by redo log.
+@param[in,out] space tablespace */
+void
+fil_names_dirty(
+ fil_space_t* space);
+
+/** Write MLOG_FILE_NAME records when a non-predefined persistent
+tablespace was modified for the first time since the latest
+fil_names_clear().
+@param[in,out] space tablespace
+@param[in,out] mtr mini-transaction */
+void
+fil_names_dirty_and_write(
+ fil_space_t* space,
+ mtr_t* mtr);
+
+/** Write MLOG_FILE_NAME records if a persistent tablespace was modified
+for the first time since the latest fil_names_clear().
+@param[in,out] space tablespace
+@param[in,out] mtr mini-transaction
+@return whether any MLOG_FILE_NAME record was written */
+inline MY_ATTRIBUTE((warn_unused_result))
+bool
+fil_names_write_if_was_clean(
+ fil_space_t* space,
+ mtr_t* mtr)
+{
+ ut_ad(log_mutex_own());
+
+ if (space == NULL) {
+ return(false);
+ }
+
+ const bool was_clean = space->max_lsn == 0;
+ ut_ad(space->max_lsn <= log_sys->lsn);
+ space->max_lsn = log_sys->lsn;
+
+ if (was_clean) {
+ fil_names_dirty_and_write(space, mtr);
+ }
+
+ return(was_clean);
+}
+
+/** During crash recovery, open a tablespace if it had not been opened
+yet, to get valid size and flags.
+@param[in,out] space tablespace */
+inline void fil_space_open_if_needed(fil_space_t* space)
+{
+ ut_d(extern volatile bool recv_recovery_on);
+ ut_ad(recv_recovery_on);
+
+ if (space->size == 0) {
+ /* Initially, size and flags will be set to 0,
+ until the files are opened for the first time.
+ fil_space_get_size() will open the file
+ and adjust the size and flags. */
+ ut_d(ulint size =) fil_space_get_size(space->id);
+ ut_ad(size == space->size);
+ }
+}
+
+/** On a log checkpoint, reset fil_names_dirty_and_write() flags
+and write out MLOG_FILE_NAME and MLOG_CHECKPOINT if needed.
+@param[in] lsn checkpoint LSN
+@param[in] do_write whether to always write MLOG_CHECKPOINT
+@return whether anything was written to the redo log
+@retval false if no flags were set and nothing written
+@retval true if anything was written to the redo log */
+bool
+fil_names_clear(
+ lsn_t lsn,
+ bool do_write);
+
+#ifdef UNIV_ENABLE_UNIT_TEST_MAKE_FILEPATH
+void test_make_filepath();
+#endif /* UNIV_ENABLE_UNIT_TEST_MAKE_FILEPATH */
+
/** Determine the block size of the data file.
@param[in] space tablespace
@param[in] offset page number
@@ -1377,12 +1515,8 @@ fil_space_get_by_id(
UNIV_INTERN
ulint
fil_space_get_block_size(const fil_space_t* space, unsigned offset);
-#endif /* UNIV_INNOCHECKSUM */
-#ifndef UNIV_INNOCHECKSUM
-#ifndef UNIV_NONINL
#include "fil0fil.ic"
-#endif
-#endif
+#endif /* UNIV_INNOCHECKSUM */
#endif /* fil0fil_h */
diff --git a/storage/innobase/include/fil0fil.ic b/storage/innobase/include/fil0fil.ic
index e9e681ffbef..1d1aaab61f5 100644
--- a/storage/innobase/include/fil0fil.ic
+++ b/storage/innobase/include/fil0fil.ic
@@ -35,12 +35,14 @@ fil_get_page_type_name(
ulint page_type) /*!< in: FIL_PAGE_TYPE */
{
switch(page_type) {
+ case FIL_PAGE_PAGE_COMPRESSED_ENCRYPTED:
+ return "PAGE_COMPRESSED_ENRYPTED";
case FIL_PAGE_PAGE_COMPRESSED:
return "PAGE_COMPRESSED";
- case FIL_PAGE_PAGE_COMPRESSED_ENCRYPTED:
- return "PAGE_COMPRESSED_ENCRYPTED";
case FIL_PAGE_INDEX:
return "INDEX";
+ case FIL_PAGE_RTREE:
+ return "RTREE";
case FIL_PAGE_UNDO_LOG:
return "UNDO LOG";
case FIL_PAGE_INODE:
@@ -65,22 +67,11 @@ fil_get_page_type_name(
return "ZBLOB";
case FIL_PAGE_TYPE_ZBLOB2:
return "ZBLOB2";
+ case FIL_PAGE_TYPE_UNKNOWN:
+ return "OLD UNKOWN PAGE TYPE";
+ default:
+ return "PAGE TYPE CORRUPTED";
}
-
- return "PAGE TYPE CORRUPTED";
-}
-
-/****************************************************************//**
-Get block size from fil node
-@return block size*/
-UNIV_INLINE
-ulint
-fil_node_get_block_size(
-/*====================*/
- fil_node_t* node) /*!< in: Node where to get block
- size */
-{
- return (node->file_block_size);
}
/****************************************************************//**
@@ -98,6 +89,7 @@ fil_page_type_validate(
if (!((page_type == FIL_PAGE_PAGE_COMPRESSED ||
page_type == FIL_PAGE_PAGE_COMPRESSED_ENCRYPTED ||
page_type == FIL_PAGE_INDEX ||
+ page_type == FIL_PAGE_RTREE ||
page_type == FIL_PAGE_UNDO_LOG ||
page_type == FIL_PAGE_INODE ||
page_type == FIL_PAGE_IBUF_FREE_LIST ||
@@ -109,31 +101,23 @@ fil_page_type_validate(
page_type == FIL_PAGE_TYPE_XDES ||
page_type == FIL_PAGE_TYPE_BLOB ||
page_type == FIL_PAGE_TYPE_ZBLOB ||
- page_type == FIL_PAGE_TYPE_ZBLOB2))) {
+ page_type == FIL_PAGE_TYPE_ZBLOB2 ||
+ page_type == FIL_PAGE_TYPE_UNKNOWN))) {
- ulint key_version = mach_read_from_4(page + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION);
- bool page_compressed = (page_type == FIL_PAGE_PAGE_COMPRESSED);
- bool page_compressed_encrypted = (page_type == FIL_PAGE_PAGE_COMPRESSED_ENCRYPTED);
ulint space = mach_read_from_4(page + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID);
ulint offset = mach_read_from_4(page + FIL_PAGE_OFFSET);
- ib_uint64_t lsn = mach_read_from_8(page + FIL_PAGE_LSN);
- ulint compressed_len = mach_read_from_2(page + FIL_PAGE_DATA);
fil_system_enter();
fil_space_t* rspace = fil_space_get_by_id(space);
fil_system_exit();
/* Dump out the page info */
- fprintf(stderr, "InnoDB: Page " ULINTPF ":" ULINTPF
- " name %s page_type " ULINTPF " page_type_name %s\n"
- "InnoDB: key_version " ULINTPF
- " page_compressed %d page_compressed_encrypted %d lsn "
- LSN_PF " compressed_len " ULINTPF "\n",
- space, offset, rspace->name, page_type,
- fil_get_page_type_name(page_type),
- key_version,
- page_compressed, page_compressed_encrypted,
- lsn, compressed_len);
- ut_error;
+ ib::fatal() << "Page " << space << ":" << offset
+ << " name " << (rspace ? rspace->name : "???")
+ << " page_type " << page_type
+ << " key_version "
+ << mach_read_from_4(page + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION)
+ << " lsn " << mach_read_from_8(page + FIL_PAGE_LSN)
+ << " compressed_len " << mach_read_from_2(page + FIL_PAGE_DATA);
return false;
}
diff --git a/storage/innobase/include/fil0pagecompress.h b/storage/innobase/include/fil0pagecompress.h
index c2f94247d2a..545e05da769 100644
--- a/storage/innobase/include/fil0pagecompress.h
+++ b/storage/innobase/include/fil0pagecompress.h
@@ -38,8 +38,8 @@ Created 11/12/2013 Jan Lindström jan.lindstrom@skysql.com
@param[in] encrypted whether the page will be subsequently encrypted
@return actual length of compressed page
@retval 0 if the page was not compressed */
-UNIV_INTERN ulint fil_page_compress(const byte* buf, byte* out_buf, ulint level,
- ulint block_size, bool encrypted)
+ulint fil_page_compress(const byte* buf, byte* out_buf, ulint level,
+ ulint block_size, bool encrypted)
MY_ATTRIBUTE((nonnull, warn_unused_result));
/** Decompress a page that may be subject to page_compressed compression.
@@ -48,32 +48,6 @@ UNIV_INTERN ulint fil_page_compress(const byte* buf, byte* out_buf, ulint level,
@return size of the compressed data
@retval 0 if decompression failed
@retval srv_page_size if the page was not compressed */
-UNIV_INTERN ulint fil_page_decompress(byte* tmp_buf, byte* buf)
+ulint fil_page_decompress(byte* tmp_buf, byte* buf)
MY_ATTRIBUTE((nonnull, warn_unused_result));
-
-/****************************************************************//**
-Get block size from fil node
-@return block size*/
-UNIV_INLINE
-ulint
-fil_node_get_block_size(
- fil_node_t* node); /*!< in: Node where to get block
- size */
-/*******************************************************************//**
-Find out wheather the page is page compressed
-@return true if page is page compressed*/
-UNIV_INLINE
-ibool
-fil_page_is_compressed(
-/*===================*/
- byte* buf); /*!< in: page */
-
-/*******************************************************************//**
-Find out wheather the page is page compressed
-@return true if page is page compressed*/
-UNIV_INLINE
-ibool
-fil_page_is_compressed_encrypted(
-/*=============================*/
- byte* buf); /*!< in: page */
#endif
diff --git a/storage/innobase/include/fsp0file.h b/storage/innobase/include/fsp0file.h
new file mode 100644
index 00000000000..3e5fa1369b0
--- /dev/null
+++ b/storage/innobase/include/fsp0file.h
@@ -0,0 +1,580 @@
+/*****************************************************************************
+
+Copyright (c) 2013, 2016, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2018, MariaDB Corporation.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/fsp0file.h
+Tablespace data file implementation.
+
+Created 2013-7-26 by Kevin Lewis
+*******************************************************/
+
+#ifndef fsp0file_h
+#define fsp0file_h
+
+#include "mem0mem.h"
+#include "os0file.h"
+#include "fil0fil.h"
+
+/** Types of raw partitions in innodb_data_file_path */
+enum device_t {
+ SRV_NOT_RAW = 0, /*!< Not a raw partition */
+ SRV_NEW_RAW, /*!< A 'newraw' partition, only to be
+ initialized */
+ SRV_OLD_RAW /*!< An initialized raw partition */
+};
+
+/** Data file control information. */
+class Datafile {
+
+ friend class Tablespace;
+ friend class SysTablespace;
+
+public:
+
+ Datafile()
+ :
+ m_name(),
+ m_filepath(),
+ m_filename(),
+ m_handle(),
+ m_open_flags(OS_FILE_OPEN),
+ m_size(),
+ m_order(),
+ m_type(SRV_NOT_RAW),
+ m_space_id(ULINT_UNDEFINED),
+ m_flags(),
+ m_exists(),
+ m_is_valid(),
+ m_first_page_buf(),
+ m_first_page(),
+ m_last_os_error(),
+ m_file_info()
+ {
+ /* No op */
+ }
+
+ Datafile(const char* name, ulint flags, ulint size, ulint order)
+ :
+ m_name(mem_strdup(name)),
+ m_filepath(),
+ m_filename(),
+ m_handle(),
+ m_open_flags(OS_FILE_OPEN),
+ m_size(size),
+ m_order(order),
+ m_type(SRV_NOT_RAW),
+ m_space_id(ULINT_UNDEFINED),
+ m_flags(flags),
+ m_exists(),
+ m_is_valid(),
+ m_first_page_buf(),
+ m_first_page(),
+ m_last_os_error(),
+ m_file_info()
+ {
+ ut_ad(m_name != NULL);
+ /* No op */
+ }
+
+ Datafile(const Datafile& file)
+ :
+ m_handle(file.m_handle),
+ m_open_flags(file.m_open_flags),
+ m_size(file.m_size),
+ m_order(file.m_order),
+ m_type(file.m_type),
+ m_space_id(file.m_space_id),
+ m_flags(file.m_flags),
+ m_exists(file.m_exists),
+ m_is_valid(file.m_is_valid),
+ m_first_page_buf(),
+ m_first_page(),
+ m_last_os_error(),
+ m_file_info()
+ {
+ m_name = mem_strdup(file.m_name);
+ ut_ad(m_name != NULL);
+
+ if (file.m_filepath != NULL) {
+ m_filepath = mem_strdup(file.m_filepath);
+ ut_a(m_filepath != NULL);
+ set_filename();
+ } else {
+ m_filepath = NULL;
+ m_filename = NULL;
+ }
+ }
+
+ virtual ~Datafile()
+ {
+ shutdown();
+ }
+
+ Datafile& operator=(const Datafile& file)
+ {
+ ut_a(this != &file);
+
+ ut_ad(m_name == NULL);
+ m_name = mem_strdup(file.m_name);
+ ut_a(m_name != NULL);
+
+ m_size = file.m_size;
+ m_order = file.m_order;
+ m_type = file.m_type;
+
+ ut_a(m_handle == OS_FILE_CLOSED);
+ m_handle = file.m_handle;
+
+ m_exists = file.m_exists;
+ m_is_valid = file.m_is_valid;
+ m_open_flags = file.m_open_flags;
+ m_space_id = file.m_space_id;
+ m_flags = file.m_flags;
+ m_last_os_error = 0;
+
+ if (m_filepath != NULL) {
+ ut_free(m_filepath);
+ m_filepath = NULL;
+ m_filename = NULL;
+ }
+
+ if (file.m_filepath != NULL) {
+ m_filepath = mem_strdup(file.m_filepath);
+ ut_a(m_filepath != NULL);
+ set_filename();
+ }
+
+ /* Do not make a copy of the first page,
+ it should be reread if needed */
+ m_first_page_buf = NULL;
+ m_first_page = NULL;
+
+ return(*this);
+ }
+
+ /** Initialize the name and flags of this datafile.
+ @param[in] name tablespace name, will be copied
+ @param[in] flags tablespace flags */
+ void init(const char* name, ulint flags);
+
+ /** Release the resources. */
+ virtual void shutdown();
+
+ /** Open a data file in read-only mode to check if it exists
+ so that it can be validated.
+ @param[in] strict whether to issue error messages
+ @return DB_SUCCESS or error code */
+ virtual dberr_t open_read_only(bool strict);
+
+ /** Open a data file in read-write mode during start-up so that
+ doublewrite pages can be restored and then it can be validated.
+ @param[in] read_only_mode if true, then readonly mode checks
+ are enforced.
+ @return DB_SUCCESS or error code */
+ virtual dberr_t open_read_write(bool read_only_mode)
+ MY_ATTRIBUTE((warn_unused_result));
+
+ /** Initialize OS specific file info. */
+ void init_file_info();
+
+ /** Close a data file.
+ @return DB_SUCCESS or error code */
+ dberr_t close();
+
+ /** Make a full filepath from a directory path and a filename.
+ Prepend the dirpath to filename using the extension given.
+ If dirpath is NULL, prepend the default datadir to filepath.
+ Store the result in m_filepath.
+ @param[in] dirpath directory path
+ @param[in] filename filename or filepath
+ @param[in] ext filename extension */
+ void make_filepath(
+ const char* dirpath,
+ const char* filename,
+ ib_extention ext);
+
+ /** Set the filepath by duplicating the filepath sent in */
+ void set_filepath(const char* filepath);
+
+ /** Allocate and set the datafile or tablespace name in m_name.
+ If a name is provided, use it; else extract a file-per-table
+ tablespace name from m_filepath. The value of m_name
+ will be freed in the destructor.
+ @param[in] name Tablespace Name if known, NULL if not */
+ void set_name(const char* name);
+
+ /** Validates the datafile and checks that it conforms with
+ the expected space ID and flags. The file should exist and be
+ successfully opened in order for this function to validate it.
+ @param[in] space_id The expected tablespace ID.
+ @param[in] flags The expected tablespace flags.
+ @retval DB_SUCCESS if tablespace is valid, DB_ERROR if not.
+ m_is_valid is also set true on success, else false. */
+ dberr_t validate_to_dd(ulint space_id, ulint flags)
+ MY_ATTRIBUTE((warn_unused_result));
+
+ /** Validates this datafile for the purpose of recovery.
+ The file should exist and be successfully opened. We initially
+ open it in read-only mode because we just want to read the SpaceID.
+ However, if the first page is corrupt and needs to be restored
+ from the doublewrite buffer, we will reopen it in write mode and
+ ry to restore that page.
+ @retval DB_SUCCESS if tablespace is valid, DB_ERROR if not.
+ m_is_valid is also set true on success, else false. */
+ dberr_t validate_for_recovery()
+ MY_ATTRIBUTE((warn_unused_result));
+
+ /** Checks the consistency of the first page of a datafile when the
+ tablespace is opened. This occurs before the fil_space_t is created
+ so the Space ID found here must not already be open.
+ m_is_valid is set true on success, else false.
+ @param[out] flush_lsn contents of FIL_PAGE_FILE_FLUSH_LSN
+ @retval DB_SUCCESS on if the datafile is valid
+ @retval DB_CORRUPTION if the datafile is not readable
+ @retval DB_TABLESPACE_EXISTS if there is a duplicate space_id */
+ dberr_t validate_first_page(lsn_t* flush_lsn)
+ MY_ATTRIBUTE((warn_unused_result));
+
+ /** Get Datafile::m_name.
+ @return m_name */
+ const char* name() const
+ {
+ return(m_name);
+ }
+
+ /** Get Datafile::m_filepath.
+ @return m_filepath */
+ const char* filepath() const
+ {
+ return(m_filepath);
+ }
+
+ /** Get Datafile::m_handle.
+ @return m_handle */
+ pfs_os_file_t handle() const
+ {
+ return(m_handle);
+ }
+
+ /** Get Datafile::m_order.
+ @return m_order */
+ ulint order() const
+ {
+ return(m_order);
+ }
+
+ /** Get Datafile::m_space_id.
+ @return m_space_id */
+ ulint space_id() const
+ {
+ return(m_space_id);
+ }
+
+ /** Get Datafile::m_flags.
+ @return m_flags */
+ ulint flags() const
+ {
+ return(m_flags);
+ }
+
+ /**
+ @return true if m_handle is open, false if not */
+ bool is_open() const
+ {
+ return(m_handle != OS_FILE_CLOSED);
+ }
+
+ /** Get Datafile::m_is_valid.
+ @return m_is_valid */
+ bool is_valid() const
+ {
+ return(m_is_valid);
+ }
+
+ /** Get the last OS error reported
+ @return m_last_os_error */
+ ulint last_os_error() const
+ {
+ return(m_last_os_error);
+ }
+
+ /** Check whether the file is empty.
+ @return true if file is empty */
+ bool is_empty_file() const
+ {
+#ifdef _WIN32
+ os_offset_t offset =
+ (os_offset_t) m_file_info.nFileSizeLow
+ | ((os_offset_t) m_file_info.nFileSizeHigh << 32);
+
+ return (offset == 0);
+#else
+ return (m_file_info.st_size == 0);
+#endif
+ }
+
+ /** Check if the file exist.
+ @return true if file exists. */
+ bool exists() const { return m_exists; }
+
+ /** Test if the filepath provided looks the same as this filepath
+ by string comparison. If they are two different paths to the same
+ file, same_as() will be used to show that after the files are opened.
+ @param[in] other filepath to compare with
+ @retval true if it is the same filename by char comparison
+ @retval false if it looks different */
+ bool same_filepath_as(const char* other) const;
+
+ /** Test if another opened datafile is the same file as this object.
+ @param[in] other Datafile to compare with
+ @return true if it is the same file, else false */
+ bool same_as(const Datafile& other) const;
+
+ /** Get access to the first data page.
+ It is valid after open_read_only() succeeded.
+ @return the first data page */
+ const byte* get_first_page() const { return(m_first_page); }
+
+private:
+ /** Free the filepath buffer. */
+ void free_filepath();
+
+ /** Set the filename pointer to the start of the file name
+ in the filepath. */
+ void set_filename()
+ {
+ if (m_filepath == NULL) {
+ return;
+ }
+
+ char* last_slash = strrchr(m_filepath, OS_PATH_SEPARATOR);
+
+ m_filename = last_slash ? last_slash + 1 : m_filepath;
+ }
+
+ /** Create/open a data file.
+ @param[in] read_only_mode if true, then readonly mode checks
+ are enforced.
+ @return DB_SUCCESS or error code */
+ dberr_t open_or_create(bool read_only_mode)
+ MY_ATTRIBUTE((warn_unused_result));
+
+ /** Reads a few significant fields from the first page of the
+ datafile, which must already be open.
+ @param[in] read_only_mode if true, then readonly mode checks
+ are enforced.
+ @return DB_SUCCESS or DB_IO_ERROR if page cannot be read */
+ dberr_t read_first_page(bool read_only_mode)
+ MY_ATTRIBUTE((warn_unused_result));
+
+ /** Free the first page from memory when it is no longer needed. */
+ void free_first_page();
+
+ /** Set the Datafile::m_open_flags.
+ @param open_flags The Open flags to set. */
+ void set_open_flags(os_file_create_t open_flags)
+ {
+ m_open_flags = open_flags;
+ };
+
+ /** Determine if this datafile is on a Raw Device
+ @return true if it is a RAW device. */
+ bool is_raw_device()
+ {
+ return(m_type != SRV_NOT_RAW);
+ }
+
+ /* DATA MEMBERS */
+
+ /** Datafile name at the tablespace location.
+ This is either the basename of the file if an absolute path
+ was entered, or it is the relative path to the datadir or
+ Tablespace::m_path. */
+ char* m_name;
+
+protected:
+ /** Physical file path with base name and extension */
+ char* m_filepath;
+
+private:
+ /** Determine the space id of the given file descriptor by reading
+ a few pages from the beginning of the .ibd file.
+ @return DB_SUCCESS if space id was successfully identified,
+ else DB_ERROR. */
+ dberr_t find_space_id();
+
+ /** Restore the first page of the tablespace from
+ the double write buffer.
+ @return whether the operation failed */
+ bool restore_from_doublewrite();
+
+ /** Points into m_filepath to the file name with extension */
+ char* m_filename;
+
+ /** Open file handle */
+ pfs_os_file_t m_handle;
+
+ /** Flags to use for opening the data file */
+ os_file_create_t m_open_flags;
+
+ /** size in database pages */
+ ulint m_size;
+
+ /** ordinal position of this datafile in the tablespace */
+ ulint m_order;
+
+ /** The type of the data file */
+ device_t m_type;
+
+ /** Tablespace ID. Contained in the datafile header.
+ If this is a system tablespace, FSP_SPACE_ID is only valid
+ in the first datafile. */
+ ulint m_space_id;
+
+ /** Tablespace flags. Contained in the datafile header.
+ If this is a system tablespace, FSP_SPACE_FLAGS are only valid
+ in the first datafile. */
+ ulint m_flags;
+
+ /** true if file already existed on startup */
+ bool m_exists;
+
+ /* true if the tablespace is valid */
+ bool m_is_valid;
+
+ /** Buffer to hold first page */
+ byte* m_first_page_buf;
+
+ /** Pointer to the first page held in the buffer above */
+ byte* m_first_page;
+
+protected:
+ /** Last OS error received so it can be reported if needed. */
+ ulint m_last_os_error;
+
+public:
+ /** Use the following to determine the uniqueness of this datafile. */
+#ifdef _WIN32
+ /* Use fields dwVolumeSerialNumber, nFileIndexLow, nFileIndexHigh. */
+ BY_HANDLE_FILE_INFORMATION m_file_info;
+#else
+ /* Use field st_ino. */
+ struct stat m_file_info;
+#endif /* WIN32 */
+};
+
+
+/** Data file control information. */
+class RemoteDatafile : public Datafile
+{
+private:
+ /** Link filename (full path) */
+ char* m_link_filepath;
+
+public:
+
+ RemoteDatafile()
+ :
+ m_link_filepath()
+ {
+ /* No op - base constructor is called. */
+ }
+
+ RemoteDatafile(const char* name, ulint size, ulint order)
+ :
+ m_link_filepath()
+ {
+ /* No op - base constructor is called. */
+ }
+
+ ~RemoteDatafile()
+ {
+ shutdown();
+ }
+
+ /** Release the resources. */
+ void shutdown();
+
+ /** Get the link filepath.
+ @return m_link_filepath */
+ const char* link_filepath() const
+ {
+ return(m_link_filepath);
+ }
+
+ /** Set the link filepath. Use default datadir, the base name of
+ the path provided without its suffix, plus DOT_ISL.
+ @param[in] path filepath which contains a basename to use.
+ If NULL, use m_name as the basename. */
+ void set_link_filepath(const char* path);
+
+ /** Create a link filename based on the contents of m_name,
+ open that file, and read the contents into m_filepath.
+ @retval DB_SUCCESS if remote linked tablespace file is opened and read.
+ @retval DB_CANNOT_OPEN_FILE if the link file does not exist. */
+ dberr_t open_link_file();
+
+ /** Delete an InnoDB Symbolic Link (ISL) file. */
+ void delete_link_file(void);
+
+ /** Open a handle to the file linked to in an InnoDB Symbolic Link file
+ in read-only mode so that it can be validated.
+ @param[in] strict whether to issue error messages
+ @return DB_SUCCESS or error code */
+ dberr_t open_read_only(bool strict);
+
+ /** Opens a handle to the file linked to in an InnoDB Symbolic Link
+ file in read-write mode so that it can be restored from doublewrite
+ and validated.
+ @param[in] read_only_mode If true, then readonly mode checks
+ are enforced.
+ @return DB_SUCCESS or error code */
+ dberr_t open_read_write(bool read_only_mode)
+ MY_ATTRIBUTE((warn_unused_result));
+
+ /******************************************************************
+ Global Static Functions; Cannot refer to data members.
+ ******************************************************************/
+
+ /** Creates a new InnoDB Symbolic Link (ISL) file. It is always
+ created under the 'datadir' of MySQL. The datadir is the directory
+ of a running mysqld program. We can refer to it by simply using
+ the path ".".
+ @param[in] name tablespace name
+ @param[in] filepath remote filepath of tablespace datafile
+ @return DB_SUCCESS or error code */
+ static dberr_t create_link_file(
+ const char* name,
+ const char* filepath);
+
+ /** Delete an InnoDB Symbolic Link (ISL) file by name.
+ @param[in] name tablespace name */
+ static void delete_link_file(const char* name);
+
+ /** Read an InnoDB Symbolic Link (ISL) file by name.
+ It is always created under the datadir of MySQL.
+ For file-per-table tablespaces, the isl file is expected to be
+ in a 'database' directory and called 'tablename.isl'.
+ The caller must free the memory returned if it is not null.
+ @param[in] link_filepath filepath of the ISL file
+ @return Filepath of the IBD file read from the ISL file */
+ static char* read_link_file(
+ const char* link_filepath);
+};
+#endif /* fsp0file_h */
diff --git a/storage/innobase/include/fsp0fsp.h b/storage/innobase/include/fsp0fsp.h
index 90939e02db1..a4e3b84b55e 100644
--- a/storage/innobase/include/fsp0fsp.h
+++ b/storage/innobase/include/fsp0fsp.h
@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2013, 2017, MariaDB Corporation. All Rights Reserved.
+Copyright (c) 2013, 2019, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -27,194 +27,26 @@ Created 12/18/1995 Heikki Tuuri
#ifndef fsp0fsp_h
#define fsp0fsp_h
-#include "univ.i"
-
#include "fsp0types.h"
#ifndef UNIV_INNOCHECKSUM
-#include "mtr0mtr.h"
#include "fut0lst.h"
-#include "ut0byte.h"
+#include "mtr0mtr.h"
#include "page0types.h"
+#include "rem0types.h"
+#include "ut0byte.h"
#endif /* !UNIV_INNOCHECKSUM */
-/* @defgroup fsp_flags InnoDB Tablespace Flag Constants @{ */
-
-/** Width of the POST_ANTELOPE flag */
-#define FSP_FLAGS_WIDTH_POST_ANTELOPE 1
-/** Number of flag bits used to indicate the tablespace zip page size */
-#define FSP_FLAGS_WIDTH_ZIP_SSIZE 4
-/** Width of the ATOMIC_BLOBS flag. The ability to break up a long
-column into an in-record prefix and an externally stored part is available
-to the two Barracuda row formats COMPRESSED and DYNAMIC. */
-#define FSP_FLAGS_WIDTH_ATOMIC_BLOBS 1
-/** Number of flag bits used to indicate the tablespace page size */
-#define FSP_FLAGS_WIDTH_PAGE_SSIZE 4
-/** Number of reserved bits */
-#define FSP_FLAGS_WIDTH_RESERVED 6
-/** Number of flag bits used to indicate the page compression */
-#define FSP_FLAGS_WIDTH_PAGE_COMPRESSION 1
-
-/** Width of all the currently known persistent tablespace flags */
-#define FSP_FLAGS_WIDTH (FSP_FLAGS_WIDTH_POST_ANTELOPE \
- + FSP_FLAGS_WIDTH_ZIP_SSIZE \
- + FSP_FLAGS_WIDTH_ATOMIC_BLOBS \
- + FSP_FLAGS_WIDTH_PAGE_SSIZE \
- + FSP_FLAGS_WIDTH_RESERVED \
- + FSP_FLAGS_WIDTH_PAGE_COMPRESSION)
-
-/** A mask of all the known/used bits in FSP_SPACE_FLAGS */
-#define FSP_FLAGS_MASK (~(~0U << FSP_FLAGS_WIDTH))
-
-/* FSP_SPACE_FLAGS position and name in MySQL 5.6/MariaDB 10.0 or older
-and MariaDB 10.1.20 or older MariaDB 10.1 and in MariaDB 10.1.21
-or newer.
-MySQL 5.6 MariaDB 10.1.x MariaDB 10.1.21
-====================================================================
-Below flags in same offset
-====================================================================
-0: POST_ANTELOPE 0:POST_ANTELOPE 0: POST_ANTELOPE
-1..4: ZIP_SSIZE(0..5) 1..4:ZIP_SSIZE(0..5) 1..4: ZIP_SSIZE(0..5)
-(NOTE: bit 4 is always 0)
-5: ATOMIC_BLOBS 5:ATOMIC_BLOBS 5: ATOMIC_BLOBS
-=====================================================================
-Below note the order difference:
-=====================================================================
-6..9: PAGE_SSIZE(3..7) 6: COMPRESSION 6..9: PAGE_SSIZE(3..7)
-10: DATA_DIR 7..10: COMP_LEVEL(0..9) 10: RESERVED (5.6 DATA_DIR)
-=====================================================================
-The flags below were in incorrect position in MariaDB 10.1,
-or have been introduced in MySQL 5.7 or 8.0:
-=====================================================================
-11: UNUSED 11..12:ATOMIC_WRITES 11: RESERVED (5.7 SHARED)
- 12: RESERVED (5.7 TEMPORARY)
- 13..15:PAGE_SSIZE(3..7) 13: RESERVED (5.7 ENCRYPTION)
- 14: RESERVED (8.0 SDI)
- 15: RESERVED
- 16: PAGE_SSIZE_msb(0) 16: COMPRESSION
- 17: DATA_DIR 17: UNUSED
- 18: UNUSED
-=====================================================================
-The flags below only exist in fil_space_t::flags, not in FSP_SPACE_FLAGS:
-=====================================================================
- 25: DATA_DIR
- 26..27: ATOMIC_WRITES
- 28..31: COMPRESSION_LEVEL
-*/
-
-/** A mask of the memory-only flags in fil_space_t::flags */
-#define FSP_FLAGS_MEM_MASK (~0U << FSP_FLAGS_MEM_DATA_DIR)
-
-/** Zero relative shift position of the DATA_DIR flag */
-#define FSP_FLAGS_MEM_DATA_DIR 25
-/** Zero relative shift position of the ATOMIC_WRITES field */
-#define FSP_FLAGS_MEM_ATOMIC_WRITES 26
-/** Zero relative shift position of the COMPRESSION_LEVEL field */
-#define FSP_FLAGS_MEM_COMPRESSION_LEVEL 28
-
-/** Zero relative shift position of the POST_ANTELOPE field */
-#define FSP_FLAGS_POS_POST_ANTELOPE 0
-/** Zero relative shift position of the ZIP_SSIZE field */
-#define FSP_FLAGS_POS_ZIP_SSIZE (FSP_FLAGS_POS_POST_ANTELOPE \
- + FSP_FLAGS_WIDTH_POST_ANTELOPE)
-/** Zero relative shift position of the ATOMIC_BLOBS field */
-#define FSP_FLAGS_POS_ATOMIC_BLOBS (FSP_FLAGS_POS_ZIP_SSIZE \
- + FSP_FLAGS_WIDTH_ZIP_SSIZE)
-/** Zero relative shift position of the start of the PAGE_SSIZE bits */
-#define FSP_FLAGS_POS_PAGE_SSIZE (FSP_FLAGS_POS_ATOMIC_BLOBS \
- + FSP_FLAGS_WIDTH_ATOMIC_BLOBS)
-/** Zero relative shift position of the start of the RESERVED bits
-these are only used in MySQL 5.7 and used for compatibility. */
-#define FSP_FLAGS_POS_RESERVED (FSP_FLAGS_POS_PAGE_SSIZE \
- + FSP_FLAGS_WIDTH_PAGE_SSIZE)
-/** Zero relative shift position of the PAGE_COMPRESSION field */
-#define FSP_FLAGS_POS_PAGE_COMPRESSION (FSP_FLAGS_POS_RESERVED \
- + FSP_FLAGS_WIDTH_RESERVED)
-
-/** Bit mask of the POST_ANTELOPE field */
-#define FSP_FLAGS_MASK_POST_ANTELOPE \
- ((~(~0U << FSP_FLAGS_WIDTH_POST_ANTELOPE)) \
- << FSP_FLAGS_POS_POST_ANTELOPE)
-/** Bit mask of the ZIP_SSIZE field */
-#define FSP_FLAGS_MASK_ZIP_SSIZE \
- ((~(~0U << FSP_FLAGS_WIDTH_ZIP_SSIZE)) \
- << FSP_FLAGS_POS_ZIP_SSIZE)
-/** Bit mask of the ATOMIC_BLOBS field */
-#define FSP_FLAGS_MASK_ATOMIC_BLOBS \
- ((~(~0U << FSP_FLAGS_WIDTH_ATOMIC_BLOBS)) \
- << FSP_FLAGS_POS_ATOMIC_BLOBS)
-/** Bit mask of the PAGE_SSIZE field */
-#define FSP_FLAGS_MASK_PAGE_SSIZE \
- ((~(~0U << FSP_FLAGS_WIDTH_PAGE_SSIZE)) \
- << FSP_FLAGS_POS_PAGE_SSIZE)
-/** Bit mask of the RESERVED1 field */
-#define FSP_FLAGS_MASK_RESERVED \
- ((~(~0U << FSP_FLAGS_WIDTH_RESERVED)) \
- << FSP_FLAGS_POS_RESERVED)
-/** Bit mask of the PAGE_COMPRESSION field */
-#define FSP_FLAGS_MASK_PAGE_COMPRESSION \
- ((~(~0U << FSP_FLAGS_WIDTH_PAGE_COMPRESSION)) \
- << FSP_FLAGS_POS_PAGE_COMPRESSION)
-
-/** Bit mask of the in-memory ATOMIC_WRITES field */
-#define FSP_FLAGS_MASK_MEM_ATOMIC_WRITES \
- (3U << FSP_FLAGS_MEM_ATOMIC_WRITES)
-
-/** Bit mask of the in-memory COMPRESSION_LEVEL field */
-#define FSP_FLAGS_MASK_MEM_COMPRESSION_LEVEL \
- (15U << FSP_FLAGS_MEM_COMPRESSION_LEVEL)
-
-/** Return the value of the POST_ANTELOPE field */
-#define FSP_FLAGS_GET_POST_ANTELOPE(flags) \
- ((flags & FSP_FLAGS_MASK_POST_ANTELOPE) \
- >> FSP_FLAGS_POS_POST_ANTELOPE)
-/** Return the value of the ZIP_SSIZE field */
-#define FSP_FLAGS_GET_ZIP_SSIZE(flags) \
- ((flags & FSP_FLAGS_MASK_ZIP_SSIZE) \
- >> FSP_FLAGS_POS_ZIP_SSIZE)
-/** Return the value of the ATOMIC_BLOBS field */
-#define FSP_FLAGS_HAS_ATOMIC_BLOBS(flags) \
- ((flags & FSP_FLAGS_MASK_ATOMIC_BLOBS) \
- >> FSP_FLAGS_POS_ATOMIC_BLOBS)
-/** Return the value of the PAGE_SSIZE field */
-#define FSP_FLAGS_GET_PAGE_SSIZE(flags) \
- ((flags & FSP_FLAGS_MASK_PAGE_SSIZE) \
- >> FSP_FLAGS_POS_PAGE_SSIZE)
-/** @return the RESERVED flags */
-#define FSP_FLAGS_GET_RESERVED(flags) \
- ((flags & FSP_FLAGS_MASK_RESERVED) \
- >> FSP_FLAGS_POS_RESERVED)
-/** @return the PAGE_COMPRESSION flag */
-#define FSP_FLAGS_HAS_PAGE_COMPRESSION(flags) \
- ((flags & FSP_FLAGS_MASK_PAGE_COMPRESSION) \
- >> FSP_FLAGS_POS_PAGE_COMPRESSION)
-
-/** Return the contents of the UNUSED bits */
-#define FSP_FLAGS_GET_UNUSED(flags) \
- (flags >> FSP_FLAGS_POS_UNUSED)
-
/** @return the PAGE_SSIZE flags for the current innodb_page_size */
#define FSP_FLAGS_PAGE_SSIZE() \
((UNIV_PAGE_SIZE == UNIV_PAGE_SIZE_ORIG) ? \
0 : (UNIV_PAGE_SIZE_SHIFT - UNIV_ZIP_SIZE_SHIFT_MIN + 1) \
<< FSP_FLAGS_POS_PAGE_SSIZE)
-/** @return the value of the DATA_DIR field */
-#define FSP_FLAGS_HAS_DATA_DIR(flags) \
- (flags & 1U << FSP_FLAGS_MEM_DATA_DIR)
-/** @return the COMPRESSION_LEVEL field */
-#define FSP_FLAGS_GET_PAGE_COMPRESSION_LEVEL(flags) \
- ((flags & FSP_FLAGS_MASK_MEM_COMPRESSION_LEVEL) \
- >> FSP_FLAGS_MEM_COMPRESSION_LEVEL)
-/** @return the ATOMIC_WRITES field */
-#define FSP_FLAGS_GET_ATOMIC_WRITES(flags) \
- ((flags & FSP_FLAGS_MASK_MEM_ATOMIC_WRITES) \
- >> FSP_FLAGS_MEM_ATOMIC_WRITES)
-
-/* Compatibility macros for MariaDB 10.1.20 or older 10.1 see
-table above. */
+/* @defgroup Compatibility macros for MariaDB 10.1.0 through 10.1.20;
+see the table in fsp0types.h @{ */
/** Zero relative shift position of the PAGE_COMPRESSION field */
#define FSP_FLAGS_POS_PAGE_COMPRESSION_MARIADB101 \
(FSP_FLAGS_POS_ATOMIC_BLOBS \
@@ -289,7 +121,7 @@ descriptor page, but used only in the first. */
< 64 pages, this number is 64, i.e.,
we have initialized the space
about the first extent, but have not
- physically allocted those pages to the
+ physically allocated those pages to the
file */
#define FSP_SPACE_FLAGS 16 /* fsp_space_t.flags, similar to
dict_table_t::flags */
@@ -322,7 +154,6 @@ descriptor page, but used only in the first. */
FSP_FREE_LIMIT at a time */
/* @} */
-
/* @defgroup File Segment Inode Constants (moved from fsp0fsp.c) @{ */
/* FILE SEGMENT INODE
@@ -370,9 +201,8 @@ typedef byte fseg_inode_t;
(16 + 3 * FLST_BASE_NODE_SIZE \
+ FSEG_FRAG_ARR_N_SLOTS * FSEG_FRAG_SLOT_SIZE)
-#define FSP_SEG_INODES_PER_PAGE(zip_size) \
- (((zip_size ? zip_size : UNIV_PAGE_SIZE) \
- - FSEG_ARR_OFFSET - 10) / FSEG_INODE_SIZE)
+#define FSP_SEG_INODES_PER_PAGE(page_size) \
+ ((page_size.physical() - FSEG_ARR_OFFSET - 10) / FSEG_INODE_SIZE)
/* Number of segment inodes which fit on a
single page */
@@ -457,62 +287,104 @@ the extent are free and which contain old tuple version to clean. */
/** Offset of the descriptor array on a descriptor page */
#define XDES_ARR_OFFSET (FSP_HEADER_OFFSET + FSP_HEADER_SIZE)
+#ifndef UNIV_INNOCHECKSUM
/* @} */
-#ifndef UNIV_INNOCHECKSUM
/**********************************************************************//**
Initializes the file space system. */
-UNIV_INTERN
void
fsp_init(void);
/*==========*/
+
/**********************************************************************//**
Gets the size of the system tablespace from the tablespace header. If
we do not have an auto-extending data file, this should be equal to
the size of the data files. If there is an auto-extending data file,
this can be smaller.
-@return size in pages */
-UNIV_INTERN
+@return size in pages */
ulint
fsp_header_get_tablespace_size(void);
/*================================*/
-/**********************************************************************//**
-Reads the file space size stored in the header page.
-@return tablespace size stored in the space header */
-UNIV_INTERN
+
+/** Calculate the number of pages to extend a datafile.
+We extend single-table tablespaces first one extent at a time,
+but 4 at a time for bigger tablespaces. It is not enough to extend always
+by one extent, because we need to add at least one extent to FSP_FREE.
+A single extent descriptor page will track many extents. And the extent
+that uses its extent descriptor page is put onto the FSP_FREE_FRAG list.
+Extents that do not use their extent descriptor page are added to FSP_FREE.
+The physical page size is used to determine how many extents are tracked
+on one extent descriptor page. See xdes_calc_descriptor_page().
+@param[in] page_size page_size of the datafile
+@param[in] size current number of pages in the datafile
+@return number of pages to extend the file. */
ulint
-fsp_get_size_low(
-/*=============*/
- page_t* page); /*!< in: header page (page 0 in the tablespace) */
+fsp_get_pages_to_extend_ibd(
+ const page_size_t& page_size,
+ ulint size);
+
+/** Calculate the number of physical pages in an extent for this file.
+@param[in] page_size page_size of the datafile
+@return number of pages in an extent for this file. */
+UNIV_INLINE
+ulint
+fsp_get_extent_size_in_pages(const page_size_t& page_size)
+{
+ return(FSP_EXTENT_SIZE * UNIV_PAGE_SIZE / page_size.physical());
+}
+
/**********************************************************************//**
Reads the space id from the first page of a tablespace.
-@return space id, ULINT UNDEFINED if error */
-UNIV_INTERN
+@return space id, ULINT UNDEFINED if error */
ulint
fsp_header_get_space_id(
/*====================*/
const page_t* page); /*!< in: first page of a tablespace */
-/**********************************************************************//**
-Reads the space flags from the first page of a tablespace.
-@return flags */
-UNIV_INTERN
+
+/** Read a tablespace header field.
+@param[in] page first page of a tablespace
+@param[in] field the header field
+@return the contents of the header field */
+inline
ulint
-fsp_header_get_flags(
-/*=================*/
- const page_t* page); /*!< in: first page of a tablespace */
-/**********************************************************************//**
-Reads the compressed page size from the first page of a tablespace.
-@return compressed page size in bytes, or 0 if uncompressed */
-UNIV_INTERN
+fsp_header_get_field(const page_t* page, ulint field)
+{
+ return(mach_read_from_4(FSP_HEADER_OFFSET + field + page));
+}
+
+/** Read the flags from the tablespace header page.
+@param[in] page first page of a tablespace
+@return the contents of FSP_SPACE_FLAGS */
+inline
ulint
-fsp_header_get_zip_size(
-/*====================*/
- const page_t* page); /*!< in: first page of a tablespace */
+fsp_header_get_flags(const page_t* page)
+{
+ return(fsp_header_get_field(page, FSP_SPACE_FLAGS));
+}
+
+/** Get the byte offset of encryption information in page 0.
+@param[in] ps page size
+@return byte offset relative to FSP_HEADER_OFFSET */
+inline MY_ATTRIBUTE((pure, warn_unused_result))
+ulint
+fsp_header_get_encryption_offset(const page_size_t& ps)
+{
+ return XDES_ARR_OFFSET + XDES_SIZE * ps.physical() / FSP_EXTENT_SIZE;
+}
+
+/** Check the encryption key from the first page of a tablespace.
+@param[in] fsp_flags tablespace flags
+@param[in] page first page of a tablespace
+@return true if success */
+bool
+fsp_header_check_encryption_key(
+ ulint fsp_flags,
+ page_t* page);
+
/**********************************************************************//**
Writes the space id and flags to a tablespace header. The flags contain
row type, physical/compressed page size, and logical/uncompressed page
size of the tablespace. */
-UNIV_INTERN
void
fsp_header_init_fields(
/*===================*/
@@ -524,28 +396,25 @@ fsp_header_init_fields(
@param[in] space_id space id
@param[in] size current size in blocks
@param[in,out] mtr mini-transaction */
-UNIV_INTERN
void
fsp_header_init(ulint space_id, ulint size, mtr_t* mtr);
/**********************************************************************//**
Increases the space size field of a space. */
-UNIV_INTERN
void
fsp_header_inc_size(
/*================*/
- ulint space, /*!< in: space id */
+ ulint space_id, /*!< in: space id */
ulint size_inc, /*!< in: size increment in pages */
mtr_t* mtr); /*!< in/out: mini-transaction */
/**********************************************************************//**
Creates a new segment.
@return the block where the segment header is placed, x-latched, NULL
if could not create segment because of lack of space */
-UNIV_INTERN
buf_block_t*
fseg_create(
/*========*/
- ulint space, /*!< in: space id */
+ ulint space_id,/*!< in: space id */
ulint page, /*!< in: page where the segment header is placed: if
this is != 0, the page must belong to another segment,
if this is 0, a new page will be allocated and it
@@ -557,11 +426,10 @@ fseg_create(
Creates a new segment.
@return the block where the segment header is placed, x-latched, NULL
if could not create segment because of lack of space */
-UNIV_INTERN
buf_block_t*
fseg_create_general(
/*================*/
- ulint space, /*!< in: space id */
+ ulint space_id,/*!< in: space id */
ulint page, /*!< in: page where the segment header is placed: if
this is != 0, the page must belong to another segment,
if this is 0, a new page will be allocated and it
@@ -578,8 +446,7 @@ fseg_create_general(
/**********************************************************************//**
Calculates the number of pages reserved by a segment, and how many pages are
currently used.
-@return number of reserved pages */
-UNIV_INTERN
+@return number of reserved pages */
ulint
fseg_n_reserved_pages(
/*==================*/
@@ -590,15 +457,15 @@ fseg_n_reserved_pages(
Allocates a single free page from a segment. This function implements
the intelligent allocation strategy which tries to minimize
file space fragmentation.
-@param[in/out] seg_header segment header
-@param[in] hint hint of which page would be desirable
-@param[in] direction if the new page is needed because
+@param[in,out] seg_header segment header
+@param[in] hint hint of which page would be desirable
+@param[in] direction if the new page is needed because
of an index page split, and records are
inserted there in order, into which
direction they go alphabetically: FSP_DOWN,
FSP_UP, FSP_NO_DIR
-@param[in/out] mtr mini-transaction
-@return X-latched block, or NULL if no page could be allocated */
+@param[in,out] mtr mini-transaction
+@return X-latched block, or NULL if no page could be allocated */
#define fseg_alloc_free_page(seg_header, hint, direction, mtr) \
fseg_alloc_free_page_general(seg_header, hint, direction, \
FALSE, mtr, mtr)
@@ -610,7 +477,6 @@ fragmentation.
@retval block, rw_lock_x_lock_count(&block->lock) == 1 if allocation succeeded
(init_mtr == mtr, or the page was not previously freed in mtr)
@retval block (not allocated or initialized) otherwise */
-UNIV_INTERN
buf_block_t*
fseg_alloc_free_page_general(
/*=========================*/
@@ -633,8 +499,8 @@ fseg_alloc_free_page_general(
If init_mtr!=mtr, but the page is already
latched in mtr, do not initialize the page. */
MY_ATTRIBUTE((warn_unused_result, nonnull));
-/**********************************************************************//**
-Reserves free pages from a tablespace. All mini-transactions which may
+
+/** Reserves free pages from a tablespace. All mini-transactions which may
use several pages from the tablespace should call this function beforehand
and reserve enough free extents so that they certainly will be able
to do their operation, like a B-tree page split, fully. Reservations
@@ -653,131 +519,198 @@ The purpose is to avoid dead end where the database is full but the
user cannot free any space because these freeing operations temporarily
reserve some space.
-Single-table tablespaces whose size is < 32 pages are a special case. In this
-function we would liberally reserve several 64 page extents for every page
-split or merge in a B-tree. But we do not want to waste disk space if the table
-only occupies < 32 pages. That is why we apply different rules in that special
-case, just ensuring that there are 3 free pages available.
-@return TRUE if we were able to make the reservation */
-UNIV_INTERN
-ibool
+Single-table tablespaces whose size is < FSP_EXTENT_SIZE pages are a special
+case. In this function we would liberally reserve several extents for
+every page split or merge in a B-tree. But we do not want to waste disk space
+if the table only occupies < FSP_EXTENT_SIZE pages. That is why we apply
+different rules in that special case, just ensuring that there are n_pages
+free pages available.
+
+@param[out] n_reserved number of extents actually reserved; if we
+ return true and the tablespace size is <
+ FSP_EXTENT_SIZE pages, then this can be 0,
+ otherwise it is n_ext
+@param[in] space_id tablespace identifier
+@param[in] n_ext number of extents to reserve
+@param[in] alloc_type page reservation type (FSP_BLOB, etc)
+@param[in,out] mtr the mini transaction
+@param[in] n_pages for small tablespaces (tablespace size is
+ less than FSP_EXTENT_SIZE), number of free
+ pages to reserve.
+@return true if we were able to make the reservation */
+bool
fsp_reserve_free_extents(
-/*=====================*/
- ulint* n_reserved,/*!< out: number of extents actually reserved; if we
- return TRUE and the tablespace size is < 64 pages,
- then this can be 0, otherwise it is n_ext */
- ulint space, /*!< in: space id */
- ulint n_ext, /*!< in: number of extents to reserve */
- ulint alloc_type,/*!< in: FSP_NORMAL, FSP_UNDO, or FSP_CLEANING */
- mtr_t* mtr); /*!< in: mini-transaction */
-/**********************************************************************//**
-This function should be used to get information on how much we still
-will be able to insert new data to the database without running out the
-tablespace. Only free extents are taken into account and we also subtract
-the safety margin required by the above function fsp_reserve_free_extents.
-@return available space in kB */
-UNIV_INTERN
-ullint
+ ulint* n_reserved,
+ ulint space_id,
+ ulint n_ext,
+ fsp_reserve_t alloc_type,
+ mtr_t* mtr,
+ ulint n_pages = 2);
+
+/** Calculate how many KiB of new data we will be able to insert to the
+tablespace without running out of space.
+@param[in] space_id tablespace ID
+@return available space in KiB
+@retval UINTMAX_MAX if unknown */
+uintmax_t
fsp_get_available_space_in_free_extents(
-/*====================================*/
- ulint space); /*!< in: space id */
+ ulint space_id);
+
+/** Calculate how many KiB of new data we will be able to insert to the
+tablespace without running out of space. Start with a space object that has
+been acquired by the caller who holds it for the calculation,
+@param[in] space tablespace object from fil_space_acquire()
+@return available space in KiB */
+uintmax_t
+fsp_get_available_space_in_free_extents(
+ const fil_space_t* space);
+
/**********************************************************************//**
Frees a single page of a segment. */
-UNIV_INTERN
void
-fseg_free_page(
-/*===========*/
+fseg_free_page_func(
fseg_header_t* seg_header, /*!< in: segment header */
- ulint space, /*!< in: space id */
+ ulint space_id, /*!< in: space id */
ulint page, /*!< in: page offset */
+#ifdef BTR_CUR_HASH_ADAPT
+ bool ahi, /*!< in: whether we may need to drop
+ the adaptive hash index */
+#endif /* BTR_CUR_HASH_ADAPT */
mtr_t* mtr); /*!< in/out: mini-transaction */
-/**********************************************************************//**
-Checks if a single page of a segment is free.
-@return true if free */
-UNIV_INTERN
+#ifdef BTR_CUR_HASH_ADAPT
+# define fseg_free_page(header, space_id, page, ahi, mtr) \
+ fseg_free_page_func(header, space_id, page, ahi, mtr)
+#else /* BTR_CUR_HASH_ADAPT */
+# define fseg_free_page(header, space_id, page, ahi, mtr) \
+ fseg_free_page_func(header, space_id, page, mtr)
+#endif /* BTR_CUR_HASH_ADAPT */
+/** Determine whether a page is free.
+@param[in,out] space tablespace
+@param[in] page page number
+@return whether the page is marked as free */
bool
-fseg_page_is_free(
-/*==============*/
- fseg_header_t* seg_header, /*!< in: segment header */
- ulint space, /*!< in: space id */
- ulint page) /*!< in: page offset */
+fseg_page_is_free(fil_space_t* space, unsigned page)
MY_ATTRIBUTE((nonnull, warn_unused_result));
/**********************************************************************//**
Frees part of a segment. This function can be used to free a segment
by repeatedly calling this function in different mini-transactions.
Doing the freeing in a single mini-transaction might result in
too big a mini-transaction.
-@return TRUE if freeing completed */
-UNIV_INTERN
+@return TRUE if freeing completed */
ibool
-fseg_free_step(
-/*===========*/
+fseg_free_step_func(
fseg_header_t* header, /*!< in, own: segment header; NOTE: if the header
resides on the first page of the frag list
of the segment, this pointer becomes obsolete
after the last freeing step */
- mtr_t* mtr); /*!< in/out: mini-transaction */
+#ifdef BTR_CUR_HASH_ADAPT
+ bool ahi, /*!< in: whether we may need to drop
+ the adaptive hash index */
+#endif /* BTR_CUR_HASH_ADAPT */
+ mtr_t* mtr) /*!< in/out: mini-transaction */
+ MY_ATTRIBUTE((warn_unused_result));
+#ifdef BTR_CUR_HASH_ADAPT
+# define fseg_free_step(header, ahi, mtr) fseg_free_step_func(header, ahi, mtr)
+#else /* BTR_CUR_HASH_ADAPT */
+# define fseg_free_step(header, ahi, mtr) fseg_free_step_func(header, mtr)
+#endif /* BTR_CUR_HASH_ADAPT */
/**********************************************************************//**
Frees part of a segment. Differs from fseg_free_step because this function
leaves the header page unfreed.
-@return TRUE if freeing completed, except the header page */
-UNIV_INTERN
+@return TRUE if freeing completed, except the header page */
ibool
-fseg_free_step_not_header(
-/*======================*/
+fseg_free_step_not_header_func(
fseg_header_t* header, /*!< in: segment header which must reside on
the first fragment page of the segment */
- mtr_t* mtr); /*!< in/out: mini-transaction */
-/***********************************************************************//**
-Checks if a page address is an extent descriptor page address.
-@return TRUE if a descriptor page */
+#ifdef BTR_CUR_HASH_ADAPT
+ bool ahi, /*!< in: whether we may need to drop
+ the adaptive hash index */
+#endif /* BTR_CUR_HASH_ADAPT */
+ mtr_t* mtr) /*!< in/out: mini-transaction */
+ MY_ATTRIBUTE((warn_unused_result));
+#ifdef BTR_CUR_HASH_ADAPT
+# define fseg_free_step_not_header(header, ahi, mtr) \
+ fseg_free_step_not_header_func(header, ahi, mtr)
+#else /* BTR_CUR_HASH_ADAPT */
+# define fseg_free_step_not_header(header, ahi, mtr) \
+ fseg_free_step_not_header_func(header, mtr)
+#endif /* BTR_CUR_HASH_ADAPT */
+
+/** Reset the page type.
+Data files created before MySQL 5.1.48 may contain garbage in FIL_PAGE_TYPE.
+In MySQL 3.23.53, only undo log pages and index pages were tagged.
+Any other pages were written with uninitialized bytes in FIL_PAGE_TYPE.
+@param[in] block block with invalid FIL_PAGE_TYPE
+@param[in] type expected page type
+@param[in,out] mtr mini-transaction */
+ATTRIBUTE_COLD
+void fil_block_reset_type(const buf_block_t& block, ulint type, mtr_t* mtr);
+
+/** Get the file page type.
+@param[in] page file page
+@return page type */
+inline uint16_t fil_page_get_type(const byte* page)
+{
+ return mach_read_from_2(page + FIL_PAGE_TYPE);
+}
+
+/** Check (and if needed, reset) the page type.
+Data files created before MySQL 5.1.48 may contain
+garbage in the FIL_PAGE_TYPE field.
+In MySQL 3.23.53, only undo log pages and index pages were tagged.
+Any other pages were written with uninitialized bytes in FIL_PAGE_TYPE.
+@param[in] page_id page number
+@param[in,out] page page with possibly invalid FIL_PAGE_TYPE
+@param[in] type expected page type
+@param[in,out] mtr mini-transaction */
+inline void
+fil_block_check_type(
+ const buf_block_t& block,
+ ulint type,
+ mtr_t* mtr)
+{
+ if (UNIV_UNLIKELY(type != fil_page_get_type(block.frame))) {
+ fil_block_reset_type(block, type, mtr);
+ }
+}
+
+/** Checks if a page address is an extent descriptor page address.
+@param[in] page_id page id
+@param[in] page_size page size
+@return TRUE if a descriptor page */
UNIV_INLINE
ibool
fsp_descr_page(
-/*===========*/
- ulint zip_size,/*!< in: compressed page size in bytes;
- 0 for uncompressed pages */
- ulint page_no);/*!< in: page number */
-/***********************************************************//**
-Parses a redo log record of a file page init.
-@return end of log record or NULL */
-UNIV_INTERN
-byte*
-fsp_parse_init_file_page(
-/*=====================*/
- byte* ptr, /*!< in: buffer */
- byte* end_ptr, /*!< in: buffer end */
- buf_block_t* block); /*!< in: block or NULL */
-/*******************************************************************//**
-Validates the file space system and its segments.
-@return TRUE if ok */
-UNIV_INTERN
-ibool
-fsp_validate(
-/*=========*/
- ulint space); /*!< in: space id */
-/*******************************************************************//**
-Prints info of a file space. */
-UNIV_INTERN
-void
-fsp_print(
-/*======*/
- ulint space); /*!< in: space id */
+ const page_id_t page_id,
+ const page_size_t& page_size);
+
+/** Initialize a file page whose prior contents should be ignored.
+@param[in,out] block buffer pool block */
+void fsp_apply_init_file_page(buf_block_t* block);
+
+/** Initialize a file page.
+@param[in] space tablespace
+@param[in,out] block file page
+@param[in,out] mtr mini-transaction */
+inline void fsp_init_file_page(
#ifdef UNIV_DEBUG
-/*******************************************************************//**
-Validates a segment.
-@return TRUE if ok */
-UNIV_INTERN
-ibool
-fseg_validate(
-/*==========*/
- fseg_header_t* header, /*!< in: segment header */
- mtr_t* mtr); /*!< in/out: mini-transaction */
-#endif /* UNIV_DEBUG */
+ const fil_space_t* space,
+#endif
+ buf_block_t* block, mtr_t* mtr)
+{
+ ut_d(space->modify_check(*mtr));
+ ut_ad(space->id == block->page.id.space());
+ fsp_apply_init_file_page(block);
+ mlog_write_initial_log_record(block->frame, MLOG_INIT_FILE_PAGE2, mtr);
+}
+
+#ifndef UNIV_DEBUG
+# define fsp_init_file_page(space, block, mtr) fsp_init_file_page(block, mtr)
+#endif
+
#ifdef UNIV_BTR_PRINT
/*******************************************************************//**
Writes info of a segment. */
-UNIV_INTERN
void
fseg_print(
/*=======*/
@@ -785,69 +718,6 @@ fseg_print(
mtr_t* mtr); /*!< in/out: mini-transaction */
#endif /* UNIV_BTR_PRINT */
-/** Validate the tablespace flags, which are stored in the
-tablespace header at offset FSP_SPACE_FLAGS.
-@param[in] flags the contents of FSP_SPACE_FLAGS
-@param[in] is_ibd whether this is an .ibd file (not system tablespace)
-@return whether the flags are correct (not in the buggy 10.1) format */
-MY_ATTRIBUTE((warn_unused_result, const))
-UNIV_INLINE
-bool
-fsp_flags_is_valid(ulint flags, bool is_ibd)
-{
- DBUG_EXECUTE_IF("fsp_flags_is_valid_failure",
- return(false););
- if (flags == 0) {
- return(true);
- }
- if (flags & ~FSP_FLAGS_MASK) {
- return(false);
- }
- if ((flags & (FSP_FLAGS_MASK_POST_ANTELOPE | FSP_FLAGS_MASK_ATOMIC_BLOBS))
- == FSP_FLAGS_MASK_ATOMIC_BLOBS) {
- /* If the "atomic blobs" flag (indicating
- ROW_FORMAT=DYNAMIC or ROW_FORMAT=COMPRESSED) flag
- is set, then the "post Antelope" (ROW_FORMAT!=REDUNDANT) flag
- must also be set. */
- return(false);
- }
- /* Bits 10..14 should be 0b0000d where d is the DATA_DIR flag
- of MySQL 5.6 and MariaDB 10.0, which we ignore.
- In the buggy FSP_SPACE_FLAGS written by MariaDB 10.1.0 to 10.1.20,
- bits 10..14 would be nonzero 0bsssaa where sss is
- nonzero PAGE_SSIZE (3, 4, 6, or 7)
- and aa is ATOMIC_WRITES (not 0b11). */
- if (FSP_FLAGS_GET_RESERVED(flags) & ~1U) {
- return(false);
- }
-
- const ulint ssize = FSP_FLAGS_GET_PAGE_SSIZE(flags);
- if (ssize == 1 || ssize == 2 || ssize == 5 || ssize & 8) {
- /* the page_size is not between 4k and 64k;
- 16k should be encoded as 0, not 5 */
- return(false);
- }
- const ulint zssize = FSP_FLAGS_GET_ZIP_SSIZE(flags);
- if (zssize == 0) {
- /* not ROW_FORMAT=COMPRESSED */
- } else if (zssize > (ssize ? ssize : 5)) {
- /* invalid KEY_BLOCK_SIZE */
- return(false);
- } else if (~flags & (FSP_FLAGS_MASK_POST_ANTELOPE
- | FSP_FLAGS_MASK_ATOMIC_BLOBS)) {
- /* both these flags should be set for
- ROW_FORMAT=COMPRESSED */
- return(false);
- }
-
- /* The flags do look valid. But, avoid misinterpreting
- buggy MariaDB 10.1 format flags for
- PAGE_COMPRESSED=1 PAGE_COMPRESSION_LEVEL={0,2,3}
- as valid-looking PAGE_SSIZE if this is known to be
- an .ibd file and we are using the default innodb_page_size=16k. */
- return(ssize == 0 || !is_ibd || srv_page_size != UNIV_PAGE_SIZE_ORIG);
-}
-
/** Convert FSP_SPACE_FLAGS from the buggy MariaDB 10.1.0..10.1.20 format.
@param[in] flags the contents of FSP_SPACE_FLAGS
@return the flags corrected from the buggy MariaDB 10.1 format
@@ -978,29 +848,19 @@ fsp_flags_match(ulint expected, ulint actual)
return(actual == expected);
}
-/********************************************************************//**
-Determine if the tablespace is compressed from dict_table_t::flags.
-@return TRUE if compressed, FALSE if not compressed */
-UNIV_INLINE
-ibool
-fsp_flags_is_compressed(
-/*====================*/
- ulint flags); /*!< in: tablespace flags */
-
-/********************************************************************//**
-Calculates the descriptor index within a descriptor page.
-@return descriptor index */
+/** Calculates the descriptor index within a descriptor page.
+@param[in] page_size page size
+@param[in] offset page offset
+@return descriptor index */
UNIV_INLINE
ulint
xdes_calc_descriptor_index(
-/*=======================*/
- ulint zip_size, /*!< in: compressed page size in bytes;
- 0 for uncompressed pages */
- ulint offset); /*!< in: page offset */
+ const page_size_t& page_size,
+ ulint offset);
/**********************************************************************//**
Gets a descriptor bit of a page.
-@return TRUE if free */
+@return TRUE if free */
UNIV_INLINE
ibool
xdes_get_bit(
@@ -1010,68 +870,18 @@ xdes_get_bit(
ulint offset);/*!< in: page offset within extent:
0 ... FSP_EXTENT_SIZE - 1 */
-/********************************************************************//**
-Calculates the page where the descriptor of a page resides.
-@return descriptor page offset */
+/** Calculates the page where the descriptor of a page resides.
+@param[in] page_size page size
+@param[in] offset page offset
+@return descriptor page offset */
UNIV_INLINE
ulint
xdes_calc_descriptor_page(
-/*======================*/
- ulint zip_size, /*!< in: compressed page size in bytes;
- 0 for uncompressed pages */
- ulint offset); /*!< in: page offset */
+ const page_size_t& page_size,
+ ulint offset);
-#endif /* !UNIV_INNOCHECKSUM */
+#endif /* UNIV_INNOCHECKSUM */
-/********************************************************************//**
-Extract the zip size from tablespace flags. A tablespace has only one
-physical page size whether that page is compressed or not.
-@return compressed page size of the file-per-table tablespace in bytes,
-or zero if the table is not compressed. */
-UNIV_INLINE
-ulint
-fsp_flags_get_zip_size(
-/*====================*/
- ulint flags); /*!< in: tablespace flags */
-/********************************************************************//**
-Extract the page size from tablespace flags.
-@return page size of the tablespace in bytes */
-UNIV_INLINE
-ulint
-fsp_flags_get_page_size(
-/*====================*/
- ulint flags); /*!< in: tablespace flags */
-
-/*********************************************************************
-Compute offset after xdes where crypt data can be stored
-@param[in] zip_size Compressed size or 0
-@return offset */
-UNIV_INTERN
-ulint
-fsp_header_get_crypt_offset(
- const ulint zip_size)
- MY_ATTRIBUTE((warn_unused_result));
-
-#define fsp_page_is_free(space,page,mtr) \
- fsp_page_is_free_func(space,page,mtr, __FILE__, __LINE__)
-
-#ifndef UNIV_INNOCHECKSUM
-/**********************************************************************//**
-Checks if a single page is free.
-@return true if free */
-UNIV_INTERN
-bool
-fsp_page_is_free_func(
-/*==============*/
- ulint space, /*!< in: space id */
- ulint page, /*!< in: page offset */
- mtr_t* mtr, /*!< in/out: mini-transaction */
- const char *file,
- ulint line);
-#endif
-
-#ifndef UNIV_NONINL
#include "fsp0fsp.ic"
-#endif
#endif
diff --git a/storage/innobase/include/fsp0fsp.ic b/storage/innobase/include/fsp0fsp.ic
index a59f000180d..d0f7fba4047 100644
--- a/storage/innobase/include/fsp0fsp.ic
+++ b/storage/innobase/include/fsp0fsp.ic
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1995, 2012, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2013, 2017, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
@@ -26,115 +26,38 @@ Created 12/18/1995 Heikki Tuuri
#ifndef UNIV_INNOCHECKSUM
-/***********************************************************************//**
-Checks if a page address is an extent descriptor page address.
-@return TRUE if a descriptor page */
+/** Checks if a page address is an extent descriptor page address.
+@param[in] page_id page id
+@param[in] page_size page size
+@return TRUE if a descriptor page */
UNIV_INLINE
ibool
fsp_descr_page(
-/*===========*/
- ulint zip_size,/*!< in: compressed page size in bytes;
- 0 for uncompressed pages */
- ulint page_no)/*!< in: page number */
+ const page_id_t page_id,
+ const page_size_t& page_size)
{
- ut_ad(ut_is_2pow(zip_size));
-
- if (!zip_size) {
- return((page_no & (UNIV_PAGE_SIZE - 1)) == FSP_XDES_OFFSET);
- }
-
- return((page_no & (zip_size - 1)) == FSP_XDES_OFFSET);
-}
-
-/********************************************************************//**
-Determine if the tablespace is compressed from dict_table_t::flags.
-@return TRUE if compressed, FALSE if not compressed */
-UNIV_INLINE
-ibool
-fsp_flags_is_compressed(
-/*====================*/
- ulint flags) /*!< in: tablespace flags */
-{
- return(FSP_FLAGS_GET_ZIP_SSIZE(flags) != 0);
-}
-
-#endif /* !UNIV_INNOCHECKSUM */
-
-/********************************************************************//**
-Extract the zip size from tablespace flags.
-@return compressed page size of the file-per-table tablespace in bytes,
-or zero if the table is not compressed. */
-UNIV_INLINE
-ulint
-fsp_flags_get_zip_size(
-/*===================*/
- ulint flags) /*!< in: tablespace flags */
-{
- ulint zip_size = 0;
- ulint ssize = FSP_FLAGS_GET_ZIP_SSIZE(flags);
-
- /* Convert from a 'log2 minus 9' to a page size in bytes. */
- if (ssize) {
- zip_size = ((UNIV_ZIP_SIZE_MIN >> 1) << ssize);
-
- ut_ad(zip_size <= UNIV_ZIP_SIZE_MAX);
- }
-
- return(zip_size);
+ return((page_id.page_no() & (page_size.physical() - 1))
+ == FSP_XDES_OFFSET);
}
-/********************************************************************//**
-Extract the page size from tablespace flags.
-@return page size of the tablespace in bytes */
-UNIV_INLINE
-ulint
-fsp_flags_get_page_size(
-/*====================*/
- ulint flags) /*!< in: tablespace flags */
-{
- ulint page_size = 0;
- ulint ssize = FSP_FLAGS_GET_PAGE_SSIZE(flags);
-
- /* Convert from a 'log2 minus 9' to a page size in bytes. */
- if (UNIV_UNLIKELY(ssize)) {
- page_size = ((UNIV_ZIP_SIZE_MIN >> 1) << ssize);
-
- ut_ad(page_size <= UNIV_PAGE_SIZE_MAX);
- } else {
- /* If the page size was not stored, then it is the
- original 16k. */
- page_size = UNIV_PAGE_SIZE_ORIG;
- }
-
- return(page_size);
-}
-
-#ifndef UNIV_INNOCHECKSUM
-/********************************************************************//**
-Calculates the descriptor index within a descriptor page.
-@return descriptor index */
+/** Calculates the descriptor index within a descriptor page.
+@param[in] page_size page size
+@param[in] offset page offset
+@return descriptor index */
UNIV_INLINE
ulint
xdes_calc_descriptor_index(
-/*=======================*/
- ulint zip_size, /*!< in: compressed page size in bytes;
- 0 for uncompressed pages */
- ulint offset) /*!< in: page offset */
+ const page_size_t& page_size,
+ ulint offset)
{
- ut_ad(ut_is_2pow(zip_size));
-
- if (zip_size == 0) {
- return(ut_2pow_remainder(offset, UNIV_PAGE_SIZE)
- / FSP_EXTENT_SIZE);
- } else {
- return(ut_2pow_remainder(offset, zip_size) / FSP_EXTENT_SIZE);
- }
+ return(ut_2pow_remainder(offset, page_size.physical())
+ / FSP_EXTENT_SIZE);
}
-#endif /* !UNIV_INNOCHECKSUM */
+#endif /*!UNIV_INNOCHECKSUM */
/**********************************************************************//**
Gets a descriptor bit of a page.
-@return TRUE if free */
+@return TRUE if free */
UNIV_INLINE
ibool
xdes_get_bit(
@@ -159,16 +82,15 @@ xdes_get_bit(
}
#ifndef UNIV_INNOCHECKSUM
-/********************************************************************//**
-Calculates the page where the descriptor of a page resides.
-@return descriptor page offset */
+/** Calculates the page where the descriptor of a page resides.
+@param[in] page_size page size
+@param[in] offset page offset
+@return descriptor page offset */
UNIV_INLINE
ulint
xdes_calc_descriptor_page(
-/*======================*/
- ulint zip_size, /*!< in: compressed page size in bytes;
- 0 for uncompressed pages */
- ulint offset) /*!< in: page offset */
+ const page_size_t& page_size,
+ ulint offset)
{
#ifndef DOXYGEN /* Doxygen gets confused by these */
# if UNIV_PAGE_SIZE_MAX <= XDES_ARR_OFFSET \
@@ -190,16 +112,13 @@ xdes_calc_descriptor_page(
+ (UNIV_ZIP_SIZE_MIN / FSP_EXTENT_SIZE)
* XDES_SIZE);
- ut_ad(ut_is_2pow(zip_size));
-
- if (zip_size == 0) {
- return(ut_2pow_round(offset, UNIV_PAGE_SIZE));
- } else {
- ut_ad(zip_size > XDES_ARR_OFFSET
- + (zip_size / FSP_EXTENT_SIZE) * XDES_SIZE);
- return(ut_2pow_round(offset, zip_size));
+#ifdef UNIV_DEBUG
+ if (page_size.is_compressed()) {
+ ut_a(page_size.physical() > XDES_ARR_OFFSET
+ + (page_size.physical() / FSP_EXTENT_SIZE) * XDES_SIZE);
}
-}
+#endif /* UNIV_DEBUG */
+ return(ut_2pow_round(offset, page_size.physical()));
+}
#endif /* !UNIV_INNOCHECKSUM */
-
diff --git a/storage/innobase/include/fsp0pagecompress.h b/storage/innobase/include/fsp0pagecompress.h
index 0dfbc258e05..fc0b907dfa7 100644
--- a/storage/innobase/include/fsp0pagecompress.h
+++ b/storage/innobase/include/fsp0pagecompress.h
@@ -59,17 +59,6 @@ fsp_flags_get_page_compression_level(
/*=================================*/
ulint flags); /*!< in: tablespace flags */
-/********************************************************************//**
-Determine the tablespace is using atomic writes from dict_table_t::flags.
-@return true if atomic writes is used, false if not */
-UNIV_INLINE
-atomic_writes_t
-fsp_flags_get_atomic_writes(
-/*========================*/
- ulint flags); /*!< in: tablespace flags */
-
-#ifndef UNIV_NONINL
#include "fsp0pagecompress.ic"
-#endif
#endif
diff --git a/storage/innobase/include/fsp0pagecompress.ic b/storage/innobase/include/fsp0pagecompress.ic
index 159b0526b8c..590a609c309 100644
--- a/storage/innobase/include/fsp0pagecompress.ic
+++ b/storage/innobase/include/fsp0pagecompress.ic
@@ -37,26 +37,15 @@ fsp_flags_get_page_compression_level(
return(FSP_FLAGS_GET_PAGE_COMPRESSION_LEVEL(flags));
}
-/********************************************************************//**
-Determine the tablespace is using atomic writes from dict_table_t::flags.
-@return true if atomic writes is used, false if not */
-UNIV_INLINE
-atomic_writes_t
-fsp_flags_get_atomic_writes(
-/*========================*/
- ulint flags) /*!< in: tablespace flags */
-{
- return((atomic_writes_t)FSP_FLAGS_GET_ATOMIC_WRITES(flags));
-}
/*******************************************************************//**
Find out wheather the page is page compressed
@return true if page is page compressed, false if not */
UNIV_INLINE
-ibool
+bool
fil_page_is_compressed(
/*===================*/
- byte* buf) /*!< in: page */
+ const byte* buf) /*!< in: page */
{
return(mach_read_from_2(buf+FIL_PAGE_TYPE) == FIL_PAGE_PAGE_COMPRESSED);
}
@@ -65,10 +54,10 @@ fil_page_is_compressed(
Find out wheather the page is page compressed
@return true if page is page compressed, false if not */
UNIV_INLINE
-ibool
+bool
fil_page_is_compressed_encrypted(
/*=============================*/
- byte* buf) /*!< in: page */
+ const byte* buf) /*!< in: page */
{
return(mach_read_from_2(buf+FIL_PAGE_TYPE) == FIL_PAGE_PAGE_COMPRESSED_ENCRYPTED);
}
diff --git a/storage/innobase/include/fsp0space.h b/storage/innobase/include/fsp0space.h
new file mode 100644
index 00000000000..5bd70e4f80d
--- /dev/null
+++ b/storage/innobase/include/fsp0space.h
@@ -0,0 +1,242 @@
+/*****************************************************************************
+
+Copyright (c) 2013, 2016, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2017, MariaDB Corporation.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/fsp0space.h
+Shared tablespace interface
+
+Created 2013-7-26 by Kevin Lewis
+*******************************************************/
+
+#ifndef fsp0space_h
+#define fsp0space_h
+
+#include "fsp0file.h"
+#include "fsp0fsp.h"
+#include "fsp0types.h"
+
+#include <vector>
+
+/** Data structure that contains the information about shared tablespaces.
+Currently this can be the system tablespace or a temporary table tablespace */
+class Tablespace {
+
+public:
+ typedef std::vector<Datafile, ut_allocator<Datafile> > files_t;
+
+ /** Data file information - each Datafile can be accessed globally */
+ files_t m_files;
+ /** Data file iterator */
+ typedef files_t::iterator iterator;
+ /** Data file iterator */
+ typedef files_t::const_iterator const_iterator;
+
+ Tablespace()
+ :
+ m_files(),
+ m_name(),
+ m_space_id(ULINT_UNDEFINED),
+ m_path(),
+ m_flags(),
+ m_ignore_read_only(false)
+ {
+ /* No op */
+ }
+
+ virtual ~Tablespace()
+ {
+ shutdown();
+ ut_ad(m_files.empty());
+ ut_ad(m_space_id == ULINT_UNDEFINED);
+ }
+
+ // Disable copying
+ Tablespace(const Tablespace&);
+ Tablespace& operator=(const Tablespace&);
+
+ /** Data file iterator */
+ const_iterator begin() const { return m_files.begin(); }
+ /** Data file iterator */
+ const_iterator end() const { return m_files.end(); }
+ /** Data file iterator */
+ iterator begin() { return m_files.begin(); }
+ /** Data file iterator */
+ iterator end() { return m_files.end(); }
+
+ void set_name(const char* name) { m_name = name; }
+ const char* name() const { return m_name; }
+
+ /** Set tablespace path and filename members.
+ @param[in] path where tablespace file(s) resides
+ @param[in] len length of the file path */
+ void set_path(const char* path, size_t len)
+ {
+ ut_ad(m_path == NULL);
+ m_path = mem_strdupl(path, len);
+ ut_ad(m_path != NULL);
+
+ os_normalize_path(m_path);
+ }
+
+ /** Set tablespace path and filename members.
+ @param[in] path where tablespace file(s) resides */
+ void set_path(const char* path)
+ {
+ set_path(path, strlen(path));
+ }
+
+ /** Get tablespace path
+ @return tablespace path */
+ const char* path() const
+ {
+ return(m_path);
+ }
+
+ /** Set the space id of the tablespace
+ @param[in] space_id tablespace ID to set */
+ void set_space_id(ulint space_id)
+ {
+ ut_ad(m_space_id == ULINT_UNDEFINED);
+ m_space_id = space_id;
+ }
+
+ /** Get the space id of the tablespace
+ @return m_space_id space id of the tablespace */
+ ulint space_id() const
+ {
+ return(m_space_id);
+ }
+
+ /** Set the tablespace flags
+ @param[in] fsp_flags tablespace flags */
+ void set_flags(ulint fsp_flags)
+ {
+ ut_ad(fsp_flags_is_valid(fsp_flags, false));
+ m_flags = fsp_flags;
+ }
+
+ /** Get the tablespace flags
+ @return m_flags tablespace flags */
+ ulint flags() const
+ {
+ return(m_flags);
+ }
+
+ /** Get the tablespace encryption mode
+ @return m_mode tablespace encryption mode */
+ fil_encryption_t encryption_mode() const
+ {
+ return (m_mode);
+ }
+
+ /** Get the tablespace encryption key_id
+ @return m_key_id tablespace encryption key_id */
+ uint32_t key_id() const
+ {
+ return (m_key_id);
+ }
+
+ /** Set Ignore Read Only Status for tablespace.
+ @param[in] read_only_status read only status indicator */
+ void set_ignore_read_only(bool read_only_status)
+ {
+ m_ignore_read_only = read_only_status;
+ }
+
+ /** Free the memory allocated by the Tablespace object */
+ void shutdown();
+
+ /** @return the sum of the file sizes of each Datafile */
+ ulint get_sum_of_sizes() const
+ {
+ ulint sum = 0;
+
+ for (const_iterator it = begin(); it != end(); ++it) {
+ sum += it->m_size;
+ }
+
+ return(sum);
+ }
+
+ /** Open or Create the data files if they do not exist.
+ @param[in] is_temp whether this is a temporary tablespace
+ @return DB_SUCCESS or error code */
+ dberr_t open_or_create(bool is_temp)
+ MY_ATTRIBUTE((warn_unused_result));
+
+ /** Delete all the data files. */
+ void delete_files();
+
+ /** Check if two tablespaces have common data file names.
+ @param[in] other_space Tablespace to check against this.
+ @return true if they have the same data filenames and paths */
+ bool intersection(const Tablespace* other_space);
+
+ /** Use the ADD DATAFILE path to create a Datafile object and add
+ it to the front of m_files. Parse the datafile path into a path
+ and a basename with extension 'ibd'. This datafile_path provided
+ may be an absolute or relative path, but it must end with the
+ extension .ibd and have a basename of at least 1 byte.
+
+ Set tablespace m_path member and add a Datafile with the filename.
+ @param[in] datafile_path full path of the tablespace file. */
+ dberr_t add_datafile(
+ const char* datafile_path);
+
+ /* Return a pointer to the first Datafile for this Tablespace
+ @return pointer to the first Datafile for this Tablespace*/
+ Datafile* first_datafile()
+ {
+ ut_a(!m_files.empty());
+ return(&m_files.front());
+ }
+private:
+ /**
+ @param[in] filename Name to lookup in the data files.
+ @return true if the filename exists in the data files */
+ bool find(const char* filename) const;
+
+ /** Note that the data file was found.
+ @param[in] file data file object */
+ void file_found(Datafile& file);
+
+ /* DATA MEMBERS */
+
+ /** Name of the tablespace. */
+ const char* m_name;
+
+ /** Tablespace ID */
+ ulint m_space_id;
+
+ /** Path where tablespace files will reside, not including a filename.*/
+ char* m_path;
+
+ /** Tablespace flags */
+ ulint m_flags;
+
+ /** Encryption mode and key_id */
+ fil_encryption_t m_mode;
+ uint32_t m_key_id;
+
+protected:
+ /** Ignore server read only configuration for this tablespace. */
+ bool m_ignore_read_only;
+};
+
+#endif /* fsp0space_h */
diff --git a/storage/innobase/include/fsp0sysspace.h b/storage/innobase/include/fsp0sysspace.h
new file mode 100644
index 00000000000..da19547fc36
--- /dev/null
+++ b/storage/innobase/include/fsp0sysspace.h
@@ -0,0 +1,298 @@
+/*****************************************************************************
+
+Copyright (c) 2013, 2016, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/fsp0sysspace.h
+Multi file, shared, system tablespace implementation.
+
+Created 2013-7-26 by Kevin Lewis
+*******************************************************/
+
+#ifndef fsp0sysspace_h
+#define fsp0sysspace_h
+
+#include "fsp0space.h"
+
+/** If the last data file is auto-extended, we add this many pages to it
+at a time. We have to make this public because it is a config variable. */
+extern ulong sys_tablespace_auto_extend_increment;
+
+#ifdef UNIV_DEBUG
+/** Control if extra debug checks need to be done for temporary tablespace.
+Default = true that is disable such checks.
+This variable is not exposed to end-user but still kept as variable for
+developer to enable it during debug. */
+extern bool srv_skip_temp_table_checks_debug;
+#endif /* UNIV_DEBUG */
+
+/** Data structure that contains the information about shared tablespaces.
+Currently this can be the system tablespace or a temporary table tablespace */
+class SysTablespace : public Tablespace
+{
+public:
+
+ SysTablespace()
+ :
+ m_auto_extend_last_file(),
+ m_last_file_size_max(),
+ m_created_new_raw(),
+ m_is_tablespace_full(false),
+ m_sanity_checks_done(false)
+ {
+ /* No op */
+ }
+
+ ~SysTablespace()
+ {
+ shutdown();
+ }
+
+ /** Set tablespace full status
+ @param[in] is_full true if full */
+ void set_tablespace_full_status(bool is_full)
+ {
+ m_is_tablespace_full = is_full;
+ }
+
+ /** Get tablespace full status
+ @return true if table is full */
+ bool get_tablespace_full_status()
+ {
+ return(m_is_tablespace_full);
+ }
+
+ /** Set sanity check status
+ @param[in] status true if sanity checks are done */
+ void set_sanity_check_status(bool status)
+ {
+ m_sanity_checks_done = status;
+ }
+
+ /** Get sanity check status
+ @return true if sanity checks are done */
+ bool get_sanity_check_status()
+ {
+ return(m_sanity_checks_done);
+ }
+
+ /** Parse the input params and populate member variables.
+ @param filepath path to data files
+ @param supports_raw true if it supports raw devices
+ @return true on success parse */
+ bool parse_params(const char* filepath, bool supports_raw);
+
+ /** Check the data file specification.
+ @param[out] create_new_db true if a new database
+ is to be created
+ @param[in] min_expected_size expected tablespace
+ size in bytes
+ @return DB_SUCCESS if all OK else error code */
+ dberr_t check_file_spec(
+ bool* create_new_db,
+ ulint min_expected_tablespace_size);
+
+ /** Free the memory allocated by parse() */
+ void shutdown();
+
+ /** Normalize the file size, convert to extents. */
+ void normalize();
+
+ /**
+ @return true if a new raw device was created. */
+ bool created_new_raw() const
+ {
+ return(m_created_new_raw);
+ }
+
+ /**
+ @return auto_extend value setting */
+ ulint can_auto_extend_last_file() const
+ {
+ return(m_auto_extend_last_file);
+ }
+
+ /** Set the last file size.
+ @param[in] size the size to set */
+ void set_last_file_size(ulint size)
+ {
+ ut_ad(!m_files.empty());
+ m_files.back().m_size = size;
+ }
+
+ /** Get the size of the last data file in the tablespace
+ @return the size of the last data file in the array */
+ ulint last_file_size() const
+ {
+ ut_ad(!m_files.empty());
+ return(m_files.back().m_size);
+ }
+
+ /**
+ @return the autoextend increment in pages. */
+ ulint get_autoextend_increment() const
+ {
+ return(sys_tablespace_auto_extend_increment
+ * ((1024 * 1024) / UNIV_PAGE_SIZE));
+ }
+
+ /**
+ @return next increment size */
+ ulint get_increment() const;
+
+ /** Open or create the data files
+ @param[in] is_temp whether this is a temporary tablespace
+ @param[in] create_new_db whether we are creating a new database
+ @param[out] sum_new_sizes sum of sizes of the new files added
+ @param[out] flush_lsn FIL_PAGE_FILE_FLUSH_LSN of first file
+ @return DB_SUCCESS or error code */
+ dberr_t open_or_create(
+ bool is_temp,
+ bool create_new_db,
+ ulint* sum_new_sizes,
+ lsn_t* flush_lsn)
+ MY_ATTRIBUTE((warn_unused_result));
+
+private:
+ /** Check the tablespace header for this tablespace.
+ @param[out] flushed_lsn the value of FIL_PAGE_FILE_FLUSH_LSN
+ @return DB_SUCCESS or error code */
+ dberr_t read_lsn_and_check_flags(lsn_t* flushed_lsn);
+
+ /**
+ @return true if the last file size is valid. */
+ bool is_valid_size() const
+ {
+ return(m_last_file_size_max >= last_file_size());
+ }
+
+ /**
+ @return true if configured to use raw devices */
+ bool has_raw_device();
+
+ /** Note that the data file was not found.
+ @param[in] file data file object
+ @param[out] create_new_db true if a new instance to be created
+ @return DB_SUCESS or error code */
+ dberr_t file_not_found(Datafile& file, bool* create_new_db);
+
+ /** Note that the data file was found.
+ @param[in,out] file data file object
+ @return true if a new instance to be created */
+ bool file_found(Datafile& file);
+
+ /** Create a data file.
+ @param[in,out] file data file object
+ @return DB_SUCCESS or error code */
+ dberr_t create(Datafile& file);
+
+ /** Create a data file.
+ @param[in,out] file data file object
+ @return DB_SUCCESS or error code */
+ dberr_t create_file(Datafile& file);
+
+ /** Open a data file.
+ @param[in,out] file data file object
+ @return DB_SUCCESS or error code */
+ dberr_t open_file(Datafile& file);
+
+ /** Set the size of the file.
+ @param[in,out] file data file object
+ @return DB_SUCCESS or error code */
+ dberr_t set_size(Datafile& file);
+
+ /** Convert a numeric string that optionally ends in G or M, to a
+ number containing megabytes.
+ @param[in] ptr string with a quantity in bytes
+ @param[out] megs the number in megabytes
+ @return next character in string */
+ static char* parse_units(char* ptr, ulint* megs);
+
+private:
+ enum file_status_t {
+ FILE_STATUS_VOID = 0, /** status not set */
+ FILE_STATUS_RW_PERMISSION_ERROR,/** permission error */
+ FILE_STATUS_READ_WRITE_ERROR, /** not readable/writable */
+ FILE_STATUS_NOT_REGULAR_FILE_ERROR /** not a regular file */
+ };
+
+ /** Verify the size of the physical file
+ @param[in] file data file object
+ @return DB_SUCCESS if OK else error code. */
+ dberr_t check_size(Datafile& file);
+
+ /** Check if a file can be opened in the correct mode.
+ @param[in,out] file data file object
+ @param[out] reason exact reason if file_status check failed.
+ @return DB_SUCCESS or error code. */
+ dberr_t check_file_status(
+ const Datafile& file,
+ file_status_t& reason);
+
+ /* DATA MEMBERS */
+
+ /** if true, then we auto-extend the last data file */
+ bool m_auto_extend_last_file;
+
+ /** if != 0, this tells the max size auto-extending may increase the
+ last data file size */
+ ulint m_last_file_size_max;
+
+ /** If the following is true we do not allow
+ inserts etc. This protects the user from forgetting
+ the 'newraw' keyword to my.cnf */
+ bool m_created_new_raw;
+
+ /** Tablespace full status */
+ bool m_is_tablespace_full;
+
+ /** if false, then sanity checks are still pending */
+ bool m_sanity_checks_done;
+};
+
+/* GLOBAL OBJECTS */
+
+/** The control info of the system tablespace. */
+extern SysTablespace srv_sys_space;
+
+/** The control info of a temporary table shared tablespace. */
+extern SysTablespace srv_tmp_space;
+
+/** Check if the space_id is for a system-tablespace (shared + temp).
+@param[in] id Space ID to check
+@return true if id is a system tablespace, false if not. */
+UNIV_INLINE
+bool
+is_system_tablespace(ulint id)
+{
+ return(id == TRX_SYS_SPACE || id == SRV_TMP_SPACE_ID);
+}
+
+/** Check if predefined shared tablespace.
+@return true if predefined shared tablespace */
+UNIV_INLINE
+bool
+is_predefined_tablespace(
+ ulint id)
+{
+ ut_ad(srv_sys_space.space_id() == TRX_SYS_SPACE);
+ ut_ad(TRX_SYS_SPACE == 0);
+ return(id == TRX_SYS_SPACE
+ || id == SRV_TMP_SPACE_ID
+ || srv_is_undo_tablespace(id));
+}
+#endif /* fsp0sysspace_h */
diff --git a/storage/innobase/include/fsp0types.h b/storage/innobase/include/fsp0types.h
index 88bd629289a..c0150262242 100644
--- a/storage/innobase/include/fsp0types.h
+++ b/storage/innobase/include/fsp0types.h
@@ -1,7 +1,7 @@
/*****************************************************************************
-Copyright (c) 1995, 2009, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2017, MariaDB Corporation. All Rights Reserved.
+Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2014, 2019, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -29,7 +29,14 @@ Created May 26, 2009 Vasil Dimov
#include "univ.i"
-#include "fil0fil.h" /* for FIL_PAGE_DATA */
+#ifndef UNIV_INNOCHECKSUM
+
+/** The fil_space_t::id of the redo log. All persistent tablespaces
+have a smaller fil_space_t::id. */
+#define SRV_LOG_SPACE_FIRST_ID 0xFFFFFFF0U
+/** The fil_space_t::id of the innodb_temporary tablespace. */
+#define SRV_TMP_SPACE_ID 0xFFFFFFFEU
+
#include "ut0byte.h"
/** @name Flags for inserting records in order
@@ -43,6 +50,7 @@ fseg_alloc_free_page) */
#define FSP_NO_DIR ((byte)113) /*!< no order */
/* @} */
+#endif /* !UNIV_INNOCHECKSUM */
/** File space extent size in pages
page size | file space extent size
----------+-----------------------
@@ -52,23 +60,23 @@ page size | file space extent size
32 KiB | 64 pages = 2 MiB
64 KiB | 64 pages = 4 MiB
*/
-/** File space extent size (one megabyte if default two or four if not) in pages */
-#define FSP_EXTENT_SIZE ((UNIV_PAGE_SIZE <= (16384) ? \
- (1048576U / UNIV_PAGE_SIZE) : \
+#define FSP_EXTENT_SIZE ((UNIV_PAGE_SIZE <= (16384) ? \
+ (1048576 / UNIV_PAGE_SIZE) : \
((UNIV_PAGE_SIZE <= (32768)) ? \
- (2097152U / UNIV_PAGE_SIZE) : \
- (4194304U / UNIV_PAGE_SIZE))))
+ (2097152 / UNIV_PAGE_SIZE) : \
+ (4194304 / UNIV_PAGE_SIZE))))
-/** File space extent size (four megabytes) in pages for MAX page size */
-#define FSP_EXTENT_SIZE_MAX (4194304U / UNIV_PAGE_SIZE_MAX)
+/** File space extent size (four megabyte) in pages for MAX page size */
+#define FSP_EXTENT_SIZE_MAX (4194304 / UNIV_PAGE_SIZE_MAX)
/** File space extent size (one megabyte) in pages for MIN page size */
-#define FSP_EXTENT_SIZE_MIN (1048576U / UNIV_PAGE_SIZE_MIN)
+#define FSP_EXTENT_SIZE_MIN (1048576 / UNIV_PAGE_SIZE_MIN)
/** On a page of any file segment, data may be put starting from this
offset */
#define FSEG_PAGE_DATA FIL_PAGE_DATA
+#ifndef UNIV_INNOCHECKSUM
/** @name File segment header
The file segment header points to the inode describing the file segment. */
/* @{ */
@@ -83,11 +91,63 @@ typedef byte fseg_header_t;
header, in bytes */
/* @} */
-/** Flags for fsp_reserve_free_extents @{ */
-#define FSP_NORMAL 1000000
-#define FSP_UNDO 2000000
-#define FSP_CLEANING 3000000
-/* @} */
+#ifdef UNIV_DEBUG
+
+struct mtr_t;
+
+/** A wrapper class to print the file segment header information. */
+class fseg_header
+{
+public:
+ /** Constructor of fseg_header.
+ @param[in] header the underlying file segment header object
+ @param[in] mtr the mini-transaction. No redo logs are
+ generated, only latches are checked within
+ mini-transaction */
+ fseg_header(
+ const fseg_header_t* header,
+ mtr_t* mtr)
+ :
+ m_header(header),
+ m_mtr(mtr)
+ {}
+
+ /** Print the file segment header to the given output stream.
+ @param[in,out] out the output stream into which the object
+ is printed.
+ @retval the output stream into which the object was printed. */
+ std::ostream&
+ to_stream(std::ostream& out) const;
+private:
+ /** The underlying file segment header */
+ const fseg_header_t* m_header;
+
+ /** The mini transaction, which is used mainly to check whether
+ appropriate latches have been taken by the calling thread. */
+ mtr_t* m_mtr;
+};
+
+/* Overloading the global output operator to print a file segment header
+@param[in,out] out the output stream into which object will be printed
+@param[in] header the file segment header to be printed
+@retval the output stream */
+inline
+std::ostream&
+operator<<(
+ std::ostream& out,
+ const fseg_header& header)
+{
+ return(header.to_stream(out));
+}
+#endif /* UNIV_DEBUG */
+
+/** Flags for fsp_reserve_free_extents */
+enum fsp_reserve_t {
+ FSP_NORMAL, /* reservation during normal B-tree operations */
+ FSP_UNDO, /* reservation done for undo logging */
+ FSP_CLEANING, /* reservation done during purge operations */
+ FSP_BLOB /* reservation being done for BLOB insertion */
+};
/* Number of pages described in a single descriptor page: currently each page
description takes less than 1 byte; a descriptor page is repeated every
@@ -128,4 +188,247 @@ every XDES_DESCRIBED_PER_PAGE pages in every tablespace. */
/*--------------------------------------*/
/* @} */
+/** Check if tablespace is system temporary.
+@param[in] space_id verify is checksum is enabled for given space.
+@return true if tablespace is system temporary. */
+inline
+bool
+fsp_is_system_temporary(ulint space_id)
+{
+ return(space_id == SRV_TMP_SPACE_ID);
+}
+
+#ifdef UNIV_DEBUG
+/** Skip some of the sanity checks that are time consuming even in debug mode
+and can affect frequent verification runs that are done to ensure stability of
+the product.
+@return true if check should be skipped for given space. */
+bool
+fsp_skip_sanity_check(
+ ulint space_id);
+#endif /* UNIV_DEBUG */
+
+#endif /* !UNIV_INNOCHECKSUM */
+
+/* @defgroup fsp_flags InnoDB Tablespace Flag Constants @{ */
+
+/** Width of the POST_ANTELOPE flag */
+#define FSP_FLAGS_WIDTH_POST_ANTELOPE 1
+/** Number of flag bits used to indicate the tablespace zip page size */
+#define FSP_FLAGS_WIDTH_ZIP_SSIZE 4
+/** Width of the ATOMIC_BLOBS flag. The ability to break up a long
+column into an in-record prefix and an externally stored part is available
+to the two Barracuda row formats COMPRESSED and DYNAMIC. */
+#define FSP_FLAGS_WIDTH_ATOMIC_BLOBS 1
+/** Number of flag bits used to indicate the tablespace page size */
+#define FSP_FLAGS_WIDTH_PAGE_SSIZE 4
+/** Number of reserved bits */
+#define FSP_FLAGS_WIDTH_RESERVED 6
+/** Number of flag bits used to indicate the page compression */
+#define FSP_FLAGS_WIDTH_PAGE_COMPRESSION 1
+
+/** Width of all the currently known persistent tablespace flags */
+#define FSP_FLAGS_WIDTH (FSP_FLAGS_WIDTH_POST_ANTELOPE \
+ + FSP_FLAGS_WIDTH_ZIP_SSIZE \
+ + FSP_FLAGS_WIDTH_ATOMIC_BLOBS \
+ + FSP_FLAGS_WIDTH_PAGE_SSIZE \
+ + FSP_FLAGS_WIDTH_RESERVED \
+ + FSP_FLAGS_WIDTH_PAGE_COMPRESSION)
+
+/** A mask of all the known/used bits in FSP_SPACE_FLAGS */
+#define FSP_FLAGS_MASK (~(~0U << FSP_FLAGS_WIDTH))
+
+/* FSP_SPACE_FLAGS position and name in MySQL 5.6/MariaDB 10.0 or older
+and MariaDB 10.1.20 or older MariaDB 10.1 and in MariaDB 10.1.21
+or newer.
+MySQL 5.6 MariaDB 10.1.x MariaDB 10.1.21
+====================================================================
+Below flags in same offset
+====================================================================
+0: POST_ANTELOPE 0:POST_ANTELOPE 0: POST_ANTELOPE
+1..4: ZIP_SSIZE(0..5) 1..4:ZIP_SSIZE(0..5) 1..4: ZIP_SSIZE(0..5)
+(NOTE: bit 4 is always 0)
+5: ATOMIC_BLOBS 5:ATOMIC_BLOBS 5: ATOMIC_BLOBS
+=====================================================================
+Below note the order difference:
+=====================================================================
+6..9: PAGE_SSIZE(3..7) 6: COMPRESSION 6..9: PAGE_SSIZE(3..7)
+10: DATA_DIR 7..10: COMP_LEVEL(0..9) 10: RESERVED (5.6 DATA_DIR)
+=====================================================================
+The flags below were in incorrect position in MariaDB 10.1,
+or have been introduced in MySQL 5.7 or 8.0:
+=====================================================================
+11: UNUSED 11..12:ATOMIC_WRITES 11: RESERVED (5.7 SHARED)
+ 12: RESERVED (5.7 TEMPORARY)
+ 13..15:PAGE_SSIZE(3..7) 13: RESERVED (5.7 ENCRYPTION)
+ 14: RESERVED (8.0 SDI)
+ 15: RESERVED
+ 16: PAGE_SSIZE_msb(0) 16: COMPRESSION
+ 17: DATA_DIR 17: UNUSED
+ 18: UNUSED
+=====================================================================
+The flags below only exist in fil_space_t::flags, not in FSP_SPACE_FLAGS:
+=====================================================================
+ 27: DATA_DIR
+ 28..31: COMPRESSION_LEVEL
+*/
+
+/** A mask of the memory-only flags in fil_space_t::flags */
+#define FSP_FLAGS_MEM_MASK (~0U << FSP_FLAGS_MEM_DATA_DIR)
+
+/** Zero relative shift position of the DATA_DIR flag */
+#define FSP_FLAGS_MEM_DATA_DIR 27
+/** Zero relative shift position of the COMPRESSION_LEVEL field */
+#define FSP_FLAGS_MEM_COMPRESSION_LEVEL 28
+
+/** Zero relative shift position of the POST_ANTELOPE field */
+#define FSP_FLAGS_POS_POST_ANTELOPE 0
+/** Zero relative shift position of the ZIP_SSIZE field */
+#define FSP_FLAGS_POS_ZIP_SSIZE (FSP_FLAGS_POS_POST_ANTELOPE \
+ + FSP_FLAGS_WIDTH_POST_ANTELOPE)
+/** Zero relative shift position of the ATOMIC_BLOBS field */
+#define FSP_FLAGS_POS_ATOMIC_BLOBS (FSP_FLAGS_POS_ZIP_SSIZE \
+ + FSP_FLAGS_WIDTH_ZIP_SSIZE)
+/** Zero relative shift position of the start of the PAGE_SSIZE bits */
+#define FSP_FLAGS_POS_PAGE_SSIZE (FSP_FLAGS_POS_ATOMIC_BLOBS \
+ + FSP_FLAGS_WIDTH_ATOMIC_BLOBS)
+/** Zero relative shift position of the start of the RESERVED bits
+these are only used in MySQL 5.7 and used for compatibility. */
+#define FSP_FLAGS_POS_RESERVED (FSP_FLAGS_POS_PAGE_SSIZE \
+ + FSP_FLAGS_WIDTH_PAGE_SSIZE)
+/** Zero relative shift position of the PAGE_COMPRESSION field */
+#define FSP_FLAGS_POS_PAGE_COMPRESSION (FSP_FLAGS_POS_RESERVED \
+ + FSP_FLAGS_WIDTH_RESERVED)
+
+/** Bit mask of the POST_ANTELOPE field */
+#define FSP_FLAGS_MASK_POST_ANTELOPE \
+ ((~(~0U << FSP_FLAGS_WIDTH_POST_ANTELOPE)) \
+ << FSP_FLAGS_POS_POST_ANTELOPE)
+/** Bit mask of the ZIP_SSIZE field */
+#define FSP_FLAGS_MASK_ZIP_SSIZE \
+ ((~(~0U << FSP_FLAGS_WIDTH_ZIP_SSIZE)) \
+ << FSP_FLAGS_POS_ZIP_SSIZE)
+/** Bit mask of the ATOMIC_BLOBS field */
+#define FSP_FLAGS_MASK_ATOMIC_BLOBS \
+ ((~(~0U << FSP_FLAGS_WIDTH_ATOMIC_BLOBS)) \
+ << FSP_FLAGS_POS_ATOMIC_BLOBS)
+/** Bit mask of the PAGE_SSIZE field */
+#define FSP_FLAGS_MASK_PAGE_SSIZE \
+ ((~(~0U << FSP_FLAGS_WIDTH_PAGE_SSIZE)) \
+ << FSP_FLAGS_POS_PAGE_SSIZE)
+/** Bit mask of the RESERVED1 field */
+#define FSP_FLAGS_MASK_RESERVED \
+ ((~(~0U << FSP_FLAGS_WIDTH_RESERVED)) \
+ << FSP_FLAGS_POS_RESERVED)
+/** Bit mask of the PAGE_COMPRESSION field */
+#define FSP_FLAGS_MASK_PAGE_COMPRESSION \
+ ((~(~0U << FSP_FLAGS_WIDTH_PAGE_COMPRESSION)) \
+ << FSP_FLAGS_POS_PAGE_COMPRESSION)
+
+/** Bit mask of the in-memory COMPRESSION_LEVEL field */
+#define FSP_FLAGS_MASK_MEM_COMPRESSION_LEVEL \
+ (15U << FSP_FLAGS_MEM_COMPRESSION_LEVEL)
+
+/** Return the value of the POST_ANTELOPE field */
+#define FSP_FLAGS_GET_POST_ANTELOPE(flags) \
+ ((flags & FSP_FLAGS_MASK_POST_ANTELOPE) \
+ >> FSP_FLAGS_POS_POST_ANTELOPE)
+/** Return the value of the ZIP_SSIZE field */
+#define FSP_FLAGS_GET_ZIP_SSIZE(flags) \
+ ((flags & FSP_FLAGS_MASK_ZIP_SSIZE) \
+ >> FSP_FLAGS_POS_ZIP_SSIZE)
+/** Return the value of the ATOMIC_BLOBS field */
+#define FSP_FLAGS_HAS_ATOMIC_BLOBS(flags) \
+ ((flags & FSP_FLAGS_MASK_ATOMIC_BLOBS) \
+ >> FSP_FLAGS_POS_ATOMIC_BLOBS)
+/** Return the value of the PAGE_SSIZE field */
+#define FSP_FLAGS_GET_PAGE_SSIZE(flags) \
+ ((flags & FSP_FLAGS_MASK_PAGE_SSIZE) \
+ >> FSP_FLAGS_POS_PAGE_SSIZE)
+/** @return the RESERVED flags */
+#define FSP_FLAGS_GET_RESERVED(flags) \
+ ((flags & FSP_FLAGS_MASK_RESERVED) \
+ >> FSP_FLAGS_POS_RESERVED)
+/** @return the PAGE_COMPRESSION flag */
+#define FSP_FLAGS_HAS_PAGE_COMPRESSION(flags) \
+ ((flags & FSP_FLAGS_MASK_PAGE_COMPRESSION) \
+ >> FSP_FLAGS_POS_PAGE_COMPRESSION)
+
+/** Return the contents of the UNUSED bits */
+#define FSP_FLAGS_GET_UNUSED(flags) \
+ (flags >> FSP_FLAGS_POS_UNUSED)
+
+/** @return the value of the DATA_DIR field */
+#define FSP_FLAGS_HAS_DATA_DIR(flags) \
+ (flags & 1U << FSP_FLAGS_MEM_DATA_DIR)
+/** @return the COMPRESSION_LEVEL field */
+#define FSP_FLAGS_GET_PAGE_COMPRESSION_LEVEL(flags) \
+ ((flags & FSP_FLAGS_MASK_MEM_COMPRESSION_LEVEL) \
+ >> FSP_FLAGS_MEM_COMPRESSION_LEVEL)
+
+/* @} */
+
+/** Validate the tablespace flags, which are stored in the
+tablespace header at offset FSP_SPACE_FLAGS.
+@param[in] flags the contents of FSP_SPACE_FLAGS
+@param[in] is_ibd whether this is an .ibd file (not system tablespace)
+@return whether the flags are correct (not in the buggy 10.1) format */
+MY_ATTRIBUTE((warn_unused_result, const))
+UNIV_INLINE
+bool
+fsp_flags_is_valid(ulint flags, bool is_ibd)
+{
+ DBUG_EXECUTE_IF("fsp_flags_is_valid_failure",
+ return(false););
+ if (flags == 0) {
+ return(true);
+ }
+ if (flags & ~FSP_FLAGS_MASK) {
+ return(false);
+ }
+ if ((flags & (FSP_FLAGS_MASK_POST_ANTELOPE | FSP_FLAGS_MASK_ATOMIC_BLOBS))
+ == FSP_FLAGS_MASK_ATOMIC_BLOBS) {
+ /* If the "atomic blobs" flag (indicating
+ ROW_FORMAT=DYNAMIC or ROW_FORMAT=COMPRESSED) flag
+ is set, then the "post Antelope" (ROW_FORMAT!=REDUNDANT) flag
+ must also be set. */
+ return(false);
+ }
+ /* Bits 10..14 should be 0b0000d where d is the DATA_DIR flag
+ of MySQL 5.6 and MariaDB 10.0, which we ignore.
+ In the buggy FSP_SPACE_FLAGS written by MariaDB 10.1.0 to 10.1.20,
+ bits 10..14 would be nonzero 0bsssaa where sss is
+ nonzero PAGE_SSIZE (3, 4, 6, or 7)
+ and aa is ATOMIC_WRITES (not 0b11). */
+ if (FSP_FLAGS_GET_RESERVED(flags) & ~1U) {
+ return(false);
+ }
+
+ const ulint ssize = FSP_FLAGS_GET_PAGE_SSIZE(flags);
+ if (ssize == 1 || ssize == 2 || ssize == 5 || ssize & 8) {
+ /* the page_size is not between 4k and 64k;
+ 16k should be encoded as 0, not 5 */
+ return(false);
+ }
+ const ulint zssize = FSP_FLAGS_GET_ZIP_SSIZE(flags);
+ if (zssize == 0) {
+ /* not ROW_FORMAT=COMPRESSED */
+ } else if (zssize > (ssize ? ssize : 5)) {
+ /* invalid KEY_BLOCK_SIZE */
+ return(false);
+ } else if (~flags & (FSP_FLAGS_MASK_POST_ANTELOPE
+ | FSP_FLAGS_MASK_ATOMIC_BLOBS)) {
+ /* both these flags should be set for
+ ROW_FORMAT=COMPRESSED */
+ return(false);
+ }
+
+ /* The flags do look valid. But, avoid misinterpreting
+ buggy MariaDB 10.1 format flags for
+ PAGE_COMPRESSED=1 PAGE_COMPRESSION_LEVEL={0,2,3}
+ as valid-looking PAGE_SSIZE if this is known to be
+ an .ibd file and we are using the default innodb_page_size=16k. */
+ return(ssize == 0 || !is_ibd || srv_page_size != UNIV_PAGE_SIZE_ORIG);
+}
+
#endif /* fsp0types_h */
diff --git a/storage/innobase/include/fts0ast.h b/storage/innobase/include/fts0ast.h
index b7d467e0082..bad040fdcda 100644
--- a/storage/innobase/include/fts0ast.h
+++ b/storage/innobase/include/fts0ast.h
@@ -28,9 +28,14 @@ Created 2007/03/16/03 Sunny Bains
#define INNOBASE_FST0AST_H
#include "mem0mem.h"
-#include "ha_prototypes.h"
-#define exit(x) abort()
+#ifdef UNIV_PFS_MEMORY
+
+#define malloc(A) ut_malloc_nokey(A)
+#define free(A) ut_free(A)
+#define realloc(P, A) ut_realloc(P, A)
+
+#endif /* UNIV_PFS_MEMORY */
/* The type of AST Node */
enum fts_ast_type_t {
@@ -38,6 +43,10 @@ enum fts_ast_type_t {
FTS_AST_NUMB, /*!< Number */
FTS_AST_TERM, /*!< Term (or word) */
FTS_AST_TEXT, /*!< Text string */
+ FTS_AST_PARSER_PHRASE_LIST, /*!< Phase for plugin parser
+ The difference from text type
+ is that we tokenize text into
+ term list */
FTS_AST_LIST, /*!< Expression list */
FTS_AST_SUBEXP_LIST /*!< Sub-Expression list */
};
@@ -142,9 +151,8 @@ fts_ast_term_set_wildcard(
fts_ast_node_t* node); /*!< in: term to change */
/********************************************************************
Set the proximity attribute of a text node. */
-
void
-fts_ast_term_set_distance(
+fts_ast_text_set_distance(
/*======================*/
fts_ast_node_t* node, /*!< in/out: text node */
ulint distance); /*!< in: the text proximity
@@ -152,7 +160,6 @@ fts_ast_term_set_distance(
/********************************************************************//**
Free a fts_ast_node_t instance.
@return next node to free */
-UNIV_INTERN
fts_ast_node_t*
fts_ast_free_node(
/*==============*/
@@ -173,14 +180,6 @@ fts_ast_node_print(
/*===============*/
fts_ast_node_t* node); /*!< in: ast node to print */
/********************************************************************
-For tracking node allocations, in case there is an during parsing.*/
-extern
-void
-fts_ast_state_add_node(
-/*===================*/
- fts_ast_state_t*state, /*!< in: ast state instance */
- fts_ast_node_t* node); /*!< in: node to add to state */
-/********************************************************************
Free node and expr allocations.*/
extern
void
@@ -188,10 +187,16 @@ fts_ast_state_free(
/*===============*/
fts_ast_state_t*state); /*!< in: state instance
to free */
+/** Check only union operation involved in the node
+@param[in] node ast node to check
+@return true if the node contains only union else false. */
+bool
+fts_ast_node_check_union(
+ fts_ast_node_t* node);
+
/******************************************************************//**
Traverse the AST - in-order traversal.
@return DB_SUCCESS if all went well */
-UNIV_INTERN
dberr_t
fts_ast_visit(
/*==========*/
@@ -204,22 +209,8 @@ fts_ast_visit(
operator, currently we only
ignore FTS_IGNORE operator */
MY_ATTRIBUTE((nonnull, warn_unused_result));
-/*****************************************************************//**
-Process (nested) sub-expression, create a new result set to store the
-sub-expression result by processing nodes under current sub-expression
-list. Merge the sub-expression result with that of parent expression list.
-@return DB_SUCCESS if all went well */
-UNIV_INTERN
-dberr_t
-fts_ast_visit_sub_exp(
-/*==================*/
- fts_ast_node_t* node, /*!< in: instance to traverse*/
- fts_ast_callback visitor, /*!< in: callback */
- void* arg) /*!< in: callback arg */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
/********************************************************************
Create a lex instance.*/
-UNIV_INTERN
fts_lexer_t*
fts_lexer_create(
/*=============*/
@@ -229,7 +220,6 @@ fts_lexer_create(
MY_ATTRIBUTE((nonnull, malloc, warn_unused_result));
/********************************************************************
Free an fts_lexer_t instance.*/
-UNIV_INTERN
void
fts_lexer_free(
/*===========*/
@@ -243,7 +233,6 @@ has one more byte than len
@param[in] str pointer to string
@param[in] len length of the string
@return ast string with NUL-terminator */
-UNIV_INTERN
fts_ast_string_t*
fts_ast_string_create(
const byte* str,
@@ -252,7 +241,6 @@ fts_ast_string_create(
/**
Free an ast string instance
@param[in,out] ast_str string to free */
-UNIV_INTERN
void
fts_ast_string_free(
fts_ast_string_t* ast_str);
@@ -262,20 +250,11 @@ Translate ast string of type FTS_AST_NUMB to unsigned long by strtoul
@param[in] str string to translate
@param[in] base the base
@return translated number */
-UNIV_INTERN
ulint
fts_ast_string_to_ul(
const fts_ast_string_t* ast_str,
int base);
-/**
-Print the ast string
-@param[in] str string to print */
-UNIV_INTERN
-void
-fts_ast_string_print(
- const fts_ast_string_t* ast_str);
-
/* String of length len.
We always store the string of length len with a terminating '\0',
regardless of there is any 0x00 in the string itself */
@@ -319,6 +298,9 @@ struct fts_ast_node_t {
already processed */
/** current transaction */
const trx_t* trx;
+ /* Used by plugin parser */
+ fts_ast_node_t* up_node; /*!< Direct up node */
+ bool go_up; /*!< Flag if go one level up */
};
/* To track state during parsing */
@@ -332,12 +314,34 @@ struct fts_ast_state_t {
fts_lexer_t* lexer; /*!< Lexer callback + arg */
CHARSET_INFO* charset; /*!< charset used for
tokenization */
+ /* Used by plugin parser */
+ fts_ast_node_t* cur_node; /*!< Current node into which
+ we add new node */
+ int depth; /*!< Depth of parsing state */
};
+/******************************************************************//**
+Create an AST term node, makes a copy of ptr for plugin parser
+@return node */
+extern
+fts_ast_node_t*
+fts_ast_create_node_term_for_parser(
+/*==========i=====================*/
+ void* arg, /*!< in: ast state */
+ const char* ptr, /*!< in: term string */
+ const ulint len); /*!< in: term string length */
+
+/******************************************************************//**
+Create an AST phrase list node for plugin parser
+@return node */
+extern
+fts_ast_node_t*
+fts_ast_create_node_phrase_list(
+/*============================*/
+ void* arg); /*!< in: ast state */
+
#ifdef UNIV_DEBUG
const char*
-fts_ast_oper_name_get(fts_ast_oper_t oper);
-const char*
fts_ast_node_type_get(fts_ast_type_t type);
#endif /* UNIV_DEBUG */
diff --git a/storage/innobase/include/fts0blex.h b/storage/innobase/include/fts0blex.h
index 10dc314d5d4..b16e7f2c495 100644
--- a/storage/innobase/include/fts0blex.h
+++ b/storage/innobase/include/fts0blex.h
@@ -694,7 +694,7 @@ extern int yylex (yyscan_t yyscanner);
#undef yyTABLES_NAME
#endif
-#line 73 "fts0blex.l"
+#line 74 "fts0blex.l"
#line 701 "../include/fts0blex.h"
diff --git a/storage/innobase/include/fts0fts.h b/storage/innobase/include/fts0fts.h
index 3beddd68722..e8a91b0ef55 100644
--- a/storage/innobase/include/fts0fts.h
+++ b/storage/innobase/include/fts0fts.h
@@ -27,12 +27,8 @@ Created 2011/09/02 Sunny Bains
#ifndef fts0fts_h
#define fts0fts_h
-#include "univ.i"
-
#include "data0type.h"
#include "data0types.h"
-#include "dict0types.h"
-#include "hash0hash.h"
#include "mem0mem.h"
#include "rem0types.h"
#include "row0types.h"
@@ -42,6 +38,7 @@ Created 2011/09/02 Sunny Bains
#include "ut0wqueue.h"
#include "que0types.h"
#include "ft_global.h"
+#include "mysql/plugin_ftparser.h"
/** "NULL" value of a document id. */
#define FTS_NULL_DOC_ID 0
@@ -66,7 +63,7 @@ optimize using a 4 byte Doc ID for FIC merge sort to reduce sort size */
#define MAX_DOC_ID_OPT_VAL 1073741824
/** Document id type. */
-typedef ib_uint64_t doc_id_t;
+typedef ib_id_t doc_id_t;
/** doc_id_t printf format */
#define FTS_DOC_ID_FORMAT IB_ID_FMT
@@ -86,12 +83,16 @@ those defined in mysql file ft_global.h */
#define FTS_BOOL 1
#define FTS_SORTED 2
#define FTS_EXPAND 4
-#define FTS_PROXIMITY 8
-#define FTS_PHRASE 16
-#define FTS_OPT_RANKING 32
+#define FTS_NO_RANKING 8
+#define FTS_PROXIMITY 16
+#define FTS_PHRASE 32
+#define FTS_OPT_RANKING 64
#define FTS_INDEX_TABLE_IND_NAME "FTS_INDEX_TABLE_IND"
+/** The number of FTS index partitions for a fulltext idnex */
+#define FTS_NUM_AUX_INDEX 6
+
/** Threshold where our optimize thread automatically kicks in */
#define FTS_OPTIMIZE_THRESHOLD 10000000
@@ -99,6 +100,30 @@ those defined in mysql file ft_global.h */
should not exceed FTS_DOC_ID_MAX_STEP */
#define FTS_DOC_ID_MAX_STEP 65535
+/** Maximum possible Fulltext word length in bytes (assuming mbmaxlen=4) */
+#define FTS_MAX_WORD_LEN (HA_FT_MAXCHARLEN * 4)
+
+/** Maximum possible Fulltext word length (in characters) */
+#define FTS_MAX_WORD_LEN_IN_CHAR HA_FT_MAXCHARLEN
+
+/** Number of columns in FTS AUX Tables */
+#define FTS_DELETED_TABLE_NUM_COLS 1
+#define FTS_CONFIG_TABLE_NUM_COLS 2
+#define FTS_AUX_INDEX_TABLE_NUM_COLS 5
+
+/** DELETED_TABLE(doc_id BIGINT UNSIGNED) */
+#define FTS_DELETED_TABLE_COL_LEN 8
+/** CONFIG_TABLE(key CHAR(50), value CHAR(200)) */
+#define FTS_CONFIG_TABLE_KEY_COL_LEN 50
+#define FTS_CONFIG_TABLE_VALUE_COL_LEN 200
+
+#define FTS_INDEX_FIRST_DOC_ID_LEN 8
+#define FTS_INDEX_LAST_DOC_ID_LEN 8
+#define FTS_INDEX_DOC_COUNT_LEN 4
+/* BLOB COLUMN, 0 means VARIABLE SIZE */
+#define FTS_INDEX_ILIST_LEN 0
+
+
/** Variable specifying the FTS parallel sort degree */
extern ulong fts_sort_pll_degree;
@@ -280,39 +305,52 @@ struct fts_table_t {
};
/** The state of the FTS sub system. */
-struct fts_t {
- /*!< mutex protecting bg_threads* and
- fts_add_wq. */
- ib_mutex_t bg_threads_mutex;
-
- /* Whether the ADDED table record sync-ed after
- crash recovery; protected by bg_threads mutex */
+class fts_t {
+public:
+ /** fts_t constructor.
+ @param[in] table table with FTS indexes
+ @param[in,out] heap memory heap where 'this' is stored */
+ fts_t(
+ const dict_table_t* table,
+ mem_heap_t* heap);
+
+ /** fts_t destructor. */
+ ~fts_t();
+
+ /** Mutex protecting bg_threads* and fts_add_wq. */
+ ib_mutex_t bg_threads_mutex;
+
+ /** Whether the ADDED table record sync-ed after
+ crash recovery; protected by bg_threads_mutex */
unsigned added_synced:1;
- /* Whether the table hold dict_sys->mutex;
- protected by bg_threads mutex */
+ /** Whether the table holds dict_sys->mutex;
+ protected by bg_threads_mutex */
unsigned dict_locked:1;
- ib_wqueue_t* add_wq; /*!< Work queue for scheduling jobs
- for the FTS 'Add' thread, or NULL
- if the thread has not yet been
- created. Each work item is a
- fts_trx_doc_ids_t*. */
+ /** Number of background threads accessing this table. */
+ ulint bg_threads;
+
+ /** Work queue for scheduling jobs for the FTS 'Add' thread, or NULL
+ if the thread has not yet been created. Each work item is a
+ fts_trx_doc_ids_t*. */
+ ib_wqueue_t* add_wq;
- fts_cache_t* cache; /*!< FTS memory buffer for this table,
- or NULL if the table has no FTS
- index. */
+ /** FTS memory buffer for this table, or NULL if the table has no FTS
+ index. */
+ fts_cache_t* cache;
- ulint doc_col; /*!< FTS doc id hidden column number
- in the CLUSTERED index. */
+ /** FTS doc id hidden column number in the CLUSTERED index. */
+ ulint doc_col;
- ib_vector_t* indexes; /*!< Vector of FTS indexes, this is
- mainly for caching purposes. */
+ /** Vector of FTS indexes, this is mainly for caching purposes. */
+ ib_vector_t* indexes;
- /* Whether the table was added to fts_optimize_wq();
+ /** Whether the table exists in fts_optimize_wq;
protected by fts_optimize_wq mutex */
bool in_queue;
- mem_heap_t* fts_heap; /*!< heap for fts_t allocation */
+ /** Heap for fts_t allocation. */
+ mem_heap_t* fts_heap;
};
struct fts_stopword_t;
@@ -344,12 +382,6 @@ extern ulong fts_min_token_size;
need a sync to free some memory */
extern bool fts_need_sync;
-/** Maximum possible Fulltext word length in bytes (assuming mbmaxlen=4) */
-#define FTS_MAX_WORD_LEN (HA_FT_MAXCHARLEN * 4)
-
-/** Maximum possible Fulltext word length (in characters) */
-#define FTS_MAX_WORD_LEN_IN_CHAR HA_FT_MAXCHARLEN
-
#define fts_que_graph_free(graph) \
do { \
mutex_enter(&dict_sys->mutex); \
@@ -359,7 +391,6 @@ do { \
/******************************************************************//**
Create a FTS cache. */
-UNIV_INTERN
fts_cache_t*
fts_cache_create(
/*=============*/
@@ -368,7 +399,6 @@ fts_cache_create(
/******************************************************************//**
Create a FTS index cache.
@return Index Cache */
-UNIV_INTERN
fts_index_cache_t*
fts_cache_index_cache_create(
/*=========================*/
@@ -379,18 +409,15 @@ fts_cache_index_cache_create(
Get the next available document id. This function creates a new
transaction to generate the document id.
@return DB_SUCCESS if OK */
-UNIV_INTERN
dberr_t
fts_get_next_doc_id(
/*================*/
const dict_table_t* table, /*!< in: table */
- doc_id_t* doc_id) /*!< out: new document id */
- MY_ATTRIBUTE((nonnull));
+ doc_id_t* doc_id);/*!< out: new document id */
/*********************************************************************//**
Update the next and last Doc ID in the CONFIG table to be the input
"doc_id" value (+ 1). We would do so after each FTS index build or
table truncate */
-UNIV_INTERN
void
fts_update_next_doc_id(
/*===================*/
@@ -402,7 +429,6 @@ fts_update_next_doc_id(
/******************************************************************//**
Create a new fts_doc_ids_t.
@return new fts_doc_ids_t. */
-UNIV_INTERN
fts_doc_ids_t*
fts_doc_ids_create(void);
/*=====================*/
@@ -415,7 +441,6 @@ inline void fts_doc_ids_free(fts_doc_ids_t* doc_ids)
/******************************************************************//**
Notify the FTS system about an operation on an FTS-indexed table. */
-UNIV_INTERN
void
fts_trx_add_op(
/*===========*/
@@ -423,13 +448,11 @@ fts_trx_add_op(
dict_table_t* table, /*!< in: table */
doc_id_t doc_id, /*!< in: doc id */
fts_row_state state, /*!< in: state of the row */
- ib_vector_t* fts_indexes) /*!< in: FTS indexes affected
+ ib_vector_t* fts_indexes); /*!< in: FTS indexes affected
(NULL=all) */
- MY_ATTRIBUTE((nonnull(1,2)));
/******************************************************************//**
Free an FTS trx. */
-UNIV_INTERN
void
fts_trx_free(
/*=========*/
@@ -440,7 +463,6 @@ Creates the common ancillary tables needed for supporting an FTS index
on the given table. row_mysql_lock_data_dictionary must have been
called before this.
@return DB_SUCCESS or error code */
-UNIV_INTERN
dberr_t
fts_create_common_tables(
/*=====================*/
@@ -450,25 +472,23 @@ fts_create_common_tables(
index */
const char* name, /*!< in: table name */
bool skip_doc_id_index) /*!< in: Skip index on doc id */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
+ MY_ATTRIBUTE((warn_unused_result));
/******************************************************************//**
Wrapper function of fts_create_index_tables_low(), create auxiliary
tables for an FTS index
@return DB_SUCCESS or error code */
-UNIV_INTERN
dberr_t
fts_create_index_tables(
/*====================*/
trx_t* trx, /*!< in: transaction handle */
const dict_index_t* index) /*!< in: the FTS index
instance */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
+ MY_ATTRIBUTE((warn_unused_result));
/******************************************************************//**
Creates the column specific ancillary tables needed for supporting an
FTS index on the given table. row_mysql_lock_data_dictionary must have
been called before this.
@return DB_SUCCESS or error code */
-UNIV_INTERN
dberr_t
fts_create_index_tables_low(
/*========================*/
@@ -478,62 +498,57 @@ fts_create_index_tables_low(
instance */
const char* table_name, /*!< in: the table name */
table_id_t table_id) /*!< in: the table id */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
+ MY_ATTRIBUTE((warn_unused_result));
/******************************************************************//**
Add the FTS document id hidden column. */
-UNIV_INTERN
void
fts_add_doc_id_column(
/*==================*/
dict_table_t* table, /*!< in/out: Table with FTS index */
- mem_heap_t* heap) /*!< in: temporary memory heap, or NULL */
- MY_ATTRIBUTE((nonnull(1)));
+ mem_heap_t* heap); /*!< in: temporary memory heap, or NULL */
/*********************************************************************//**
Drops the ancillary tables needed for supporting an FTS index on the
given table. row_mysql_lock_data_dictionary must have been called before
this.
@return DB_SUCCESS or error code */
-UNIV_INTERN
dberr_t
fts_drop_tables(
/*============*/
trx_t* trx, /*!< in: transaction */
- dict_table_t* table) /*!< in: table has the FTS
+ dict_table_t* table); /*!< in: table has the FTS
index */
- MY_ATTRIBUTE((nonnull));
/******************************************************************//**
The given transaction is about to be committed; do whatever is necessary
from the FTS system's POV.
@return DB_SUCCESS or error code */
-UNIV_INTERN
dberr_t
fts_commit(
/*=======*/
trx_t* trx) /*!< in: transaction */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-
-/*******************************************************************//**
-FTS Query entry point.
+ MY_ATTRIBUTE((warn_unused_result));
+
+/** FTS Query entry point.
+@param[in] trx transaction
+@param[in] index fts index to search
+@param[in] flags FTS search mode
+@param[in] query_str FTS query
+@param[in] query_len FTS query string len in bytes
+@param[in,out] result result doc ids
@return DB_SUCCESS if successful otherwise error code */
-UNIV_INTERN
dberr_t
fts_query(
-/*======*/
- trx_t* trx, /*!< in: transaction */
- dict_index_t* index, /*!< in: FTS index to search */
- uint flags, /*!< in: FTS search mode */
- const byte* query, /*!< in: FTS query */
- ulint query_len, /*!< in: FTS query string len
- in bytes */
- fts_result_t** result) /*!< out: query result, to be
- freed by the caller.*/
- MY_ATTRIBUTE((nonnull, warn_unused_result));
+ trx_t* trx,
+ dict_index_t* index,
+ uint flags,
+ const byte* query_str,
+ ulint query_len,
+ fts_result_t** result)
+ MY_ATTRIBUTE((warn_unused_result));
/******************************************************************//**
Retrieve the FTS Relevance Ranking result for doc with doc_id
@return the relevance ranking value. */
-UNIV_INTERN
float
fts_retrieve_ranking(
/*=================*/
@@ -543,7 +558,6 @@ fts_retrieve_ranking(
/******************************************************************//**
FTS Query sort result, returned by fts_query() on fts_ranking_t::rank. */
-UNIV_INTERN
void
fts_query_sort_result_on_rank(
/*==========================*/
@@ -552,7 +566,6 @@ fts_query_sort_result_on_rank(
/******************************************************************//**
FTS Query free result, returned by fts_query(). */
-UNIV_INTERN
void
fts_query_free_result(
/*==================*/
@@ -561,7 +574,6 @@ fts_query_free_result(
/******************************************************************//**
Extract the doc id from the FTS hidden column. */
-UNIV_INTERN
doc_id_t
fts_get_doc_id_from_row(
/*====================*/
@@ -569,30 +581,39 @@ fts_get_doc_id_from_row(
dtuple_t* row); /*!< in: row whose FTS doc id we
want to extract.*/
-/******************************************************************//**
-Extract the doc id from the FTS hidden column. */
-UNIV_INTERN
+/** Extract the doc id from the record that belongs to index.
+@param[in] table table
+@param[in] rec record contains FTS_DOC_ID
+@param[in] index index of rec
+@param[in] heap heap memory
+@return doc id that was extracted from rec */
doc_id_t
fts_get_doc_id_from_rec(
-/*====================*/
- dict_table_t* table, /*!< in: table */
- const rec_t* rec, /*!< in: rec */
- mem_heap_t* heap); /*!< in: heap */
-
-/******************************************************************//**
-Update the query graph with a new document id.
-@return Doc ID used */
-UNIV_INTERN
+ dict_table_t* table,
+ const rec_t* rec,
+ const dict_index_t* index,
+ mem_heap_t* heap);
+
+/** Add new fts doc id to the update vector.
+@param[in] table the table that contains the FTS index.
+@param[in,out] ufield the fts doc id field in the update vector.
+ No new memory is allocated for this in this
+ function.
+@param[in,out] next_doc_id the fts doc id that has been added to the
+ update vector. If 0, a new fts doc id is
+ automatically generated. The memory provided
+ for this argument will be used by the update
+ vector. Ensure that the life time of this
+ memory matches that of the update vector.
+@return the fts doc id used in the update vector */
doc_id_t
fts_update_doc_id(
-/*==============*/
- dict_table_t* table, /*!< in: table */
- upd_field_t* ufield, /*!< out: update node */
- doc_id_t* next_doc_id); /*!< out: buffer for writing */
+ dict_table_t* table,
+ upd_field_t* ufield,
+ doc_id_t* next_doc_id);
/******************************************************************//**
FTS initialize. */
-UNIV_INTERN
void
fts_startup(void);
/*==============*/
@@ -600,7 +621,6 @@ fts_startup(void);
/******************************************************************//**
Create an instance of fts_t.
@return instance of fts_t */
-UNIV_INTERN
fts_t*
fts_create(
/*=======*/
@@ -609,7 +629,6 @@ fts_create(
/**********************************************************************//**
Free the FTS resources. */
-UNIV_INTERN
void
fts_free(
/*=====*/
@@ -619,16 +638,13 @@ fts_free(
/*********************************************************************//**
Run OPTIMIZE on the given table.
@return DB_SUCCESS if all OK */
-UNIV_INTERN
dberr_t
fts_optimize_table(
/*===============*/
- dict_table_t* table) /*!< in: table to optimiza */
- MY_ATTRIBUTE((nonnull));
+ dict_table_t* table); /*!< in: table to optimiza */
/**********************************************************************//**
Startup the optimize thread and create the work queue. */
-UNIV_INTERN
void
fts_optimize_init(void);
/*====================*/
@@ -636,13 +652,12 @@ fts_optimize_init(void);
/****************************************************************//**
Drops index ancillary tables for a FTS index
@return DB_SUCCESS or error code */
-UNIV_INTERN
dberr_t
fts_drop_index_tables(
/*==================*/
trx_t* trx, /*!< in: transaction */
dict_index_t* index) /*!< in: Index to drop */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
+ MY_ATTRIBUTE((warn_unused_result));
/** Add the table to add to the OPTIMIZER's list.
@param[in] table table to add */
@@ -653,78 +668,53 @@ fts_optimize_add_table(
/******************************************************************//**
Remove the table from the OPTIMIZER's list. We do wait for
acknowledgement from the consumer of the message. */
-UNIV_INTERN
void
fts_optimize_remove_table(
/*======================*/
dict_table_t* table); /*!< in: table to remove */
+/** Shutdown fts optimize thread. */
+void
+fts_optimize_shutdown();
+
/** Send sync fts cache for the table.
@param[in] table table to sync */
-UNIV_INTERN
void
fts_optimize_request_sync_table(
dict_table_t* table);
/**********************************************************************//**
-Signal the optimize thread to prepare for shutdown. */
-UNIV_INTERN
-void
-fts_optimize_start_shutdown(void);
-/*==============================*/
-
-/**********************************************************************//**
-Inform optimize to clean up. */
-UNIV_INTERN
-void
-fts_optimize_end(void);
-/*===================*/
-
-/**********************************************************************//**
Take a FTS savepoint. */
-UNIV_INTERN
void
fts_savepoint_take(
/*===============*/
trx_t* trx, /*!< in: transaction */
fts_trx_t* fts_trx, /*!< in: fts transaction */
- const char* name) /*!< in: savepoint name */
- MY_ATTRIBUTE((nonnull));
+ const char* name); /*!< in: savepoint name */
+
/**********************************************************************//**
Refresh last statement savepoint. */
-UNIV_INTERN
void
fts_savepoint_laststmt_refresh(
/*===========================*/
- trx_t* trx) /*!< in: transaction */
- MY_ATTRIBUTE((nonnull));
+ trx_t* trx); /*!< in: transaction */
+
/**********************************************************************//**
Release the savepoint data identified by name. */
-UNIV_INTERN
void
fts_savepoint_release(
/*==================*/
trx_t* trx, /*!< in: transaction */
const char* name); /*!< in: savepoint name */
-/**********************************************************************//**
-Free the FTS cache. */
-UNIV_INTERN
-void
-fts_cache_destroy(
-/*==============*/
- fts_cache_t* cache); /*!< in: cache*/
-
/** Clear cache.
@param[in,out] cache fts cache */
-UNIV_INTERN
void
fts_cache_clear(
fts_cache_t* cache);
/*********************************************************************//**
Initialize things in cache. */
-UNIV_INTERN
void
fts_cache_init(
/*===========*/
@@ -732,7 +722,6 @@ fts_cache_init(
/*********************************************************************//**
Rollback to and including savepoint indentified by name. */
-UNIV_INTERN
void
fts_savepoint_rollback(
/*===================*/
@@ -741,7 +730,6 @@ fts_savepoint_rollback(
/*********************************************************************//**
Rollback to and including savepoint indentified by name. */
-UNIV_INTERN
void
fts_savepoint_rollback_last_stmt(
/*=============================*/
@@ -750,51 +738,20 @@ fts_savepoint_rollback_last_stmt(
/***********************************************************************//**
Drop all orphaned FTS auxiliary tables, those that don't have a parent
table or FTS index defined on them. */
-UNIV_INTERN
void
fts_drop_orphaned_tables(void);
/*==========================*/
-/* Get parent table name if it's a fts aux table
-@param[in] aux_table_name aux table name
-@param[in] aux_table_len aux table length
-@return parent table name, or NULL */
-char*
-fts_get_parent_table_name(
- const char* aux_table_name,
- ulint aux_table_len);
-
-/******************************************************************//**
-Since we do a horizontal split on the index table, we need to drop
-all the split tables.
-@return DB_SUCCESS or error code */
-UNIV_INTERN
-dberr_t
-fts_drop_index_split_tables(
-/*========================*/
- trx_t* trx, /*!< in: transaction */
- dict_index_t* index) /*!< in: fts instance */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-
/** Run SYNC on the table, i.e., write out data from the cache to the
FTS auxiliary INDEX table and clear the cache at the end.
@param[in,out] table fts table
-@param[in] unlock_cache whether unlock cache when write node
-@param[in] wait whether wait for existing sync to finish
-@param[in] has_dict whether has dict operation lock
+@param[in] wait whether to wait for existing sync to finish
@return DB_SUCCESS on success, error code on failure. */
-UNIV_INTERN
-dberr_t
-fts_sync_table(
- dict_table_t* table,
- bool unlock_cache,
- bool wait,
- bool has_dict);
+dberr_t fts_sync_table(dict_table_t* table, bool wait = true);
/****************************************************************//**
Free the query graph but check whether dict_sys->mutex is already
held */
-UNIV_INTERN
void
fts_que_graph_free_check_lock(
/*==========================*/
@@ -804,7 +761,6 @@ fts_que_graph_free_check_lock(
/****************************************************************//**
Create an FTS index cache. */
-UNIV_INTERN
CHARSET_INFO*
fts_index_get_charset(
/*==================*/
@@ -813,12 +769,20 @@ fts_index_get_charset(
/*********************************************************************//**
Get the initial Doc ID by consulting the CONFIG table
@return initial Doc ID */
-UNIV_INTERN
doc_id_t
fts_init_doc_id(
/*============*/
const dict_table_t* table); /*!< in: table */
+/* Get parent table name if it's a fts aux table
+@param[in] aux_table_name aux table name
+@param[in] aux_table_len aux table length
+@return parent table name, or NULL */
+char*
+fts_get_parent_table_name(
+ const char* aux_table_name,
+ ulint aux_table_len);
+
/******************************************************************//**
compare two character string according to their charset. */
extern
@@ -864,15 +828,31 @@ innobase_mysql_fts_get_token(
const byte* start, /*!< in: start of text */
const byte* end, /*!< in: one character past
end of text */
- fts_string_t* token, /*!< out: token's text */
- ulint* offset); /*!< out: offset to token,
- measured as characters from
- 'start' */
+ fts_string_t* token); /*!< out: token's text */
+
+/*************************************************************//**
+Get token char size by charset
+@return the number of token char size */
+ulint
+fts_get_token_size(
+/*===============*/
+ const CHARSET_INFO* cs, /*!< in: Character set */
+ const char* token, /*!< in: token */
+ ulint len); /*!< in: token length */
+
+/*************************************************************//**
+FULLTEXT tokenizer internal in MYSQL_FTPARSER_SIMPLE_MODE
+@return 0 if tokenize sucessfully */
+int
+fts_tokenize_document_internal(
+/*===========================*/
+ MYSQL_FTPARSER_PARAM* param, /*!< in: parser parameter */
+ const char* doc, /*!< in: document to tokenize */
+ int len); /*!< in: document length */
/*********************************************************************//**
Fetch COUNT(*) from specified table.
@return the number of rows in the table */
-UNIV_INTERN
ulint
fts_get_rows_count(
/*===============*/
@@ -881,7 +861,6 @@ fts_get_rows_count(
/*************************************************************//**
Get maximum Doc ID in a table if index "FTS_DOC_ID_INDEX" exists
@return max Doc ID or 0 if index "FTS_DOC_ID_INDEX" does not exist */
-UNIV_INTERN
doc_id_t
fts_get_max_doc_id(
/*===============*/
@@ -891,7 +870,6 @@ fts_get_max_doc_id(
Check whether user supplied stopword table exists and is of
the right format.
@return the stopword column charset if qualifies */
-UNIV_INTERN
CHARSET_INFO*
fts_valid_stopword_table(
/*=====================*/
@@ -900,7 +878,6 @@ fts_valid_stopword_table(
/****************************************************************//**
This function loads specified stopword into FTS cache
@return TRUE if success */
-UNIV_INTERN
ibool
fts_load_stopword(
/*==============*/
@@ -917,18 +894,8 @@ fts_load_stopword(
reload of FTS table */
/****************************************************************//**
-Create the vector of fts_get_doc_t instances.
-@return vector of fts_get_doc_t instances */
-UNIV_INTERN
-ib_vector_t*
-fts_get_docs_create(
-/*================*/
- fts_cache_t* cache); /*!< in: fts cache */
-
-/****************************************************************//**
Read the rows from the FTS index
@return DB_SUCCESS if OK */
-UNIV_INTERN
dberr_t
fts_table_fetch_doc_ids(
/*====================*/
@@ -942,7 +909,6 @@ used. There are documents that have not yet sync-ed to auxiliary
tables from last server abnormally shutdown, we will need to bring
such document into FTS cache before any further operations
@return TRUE if all OK */
-UNIV_INTERN
ibool
fts_init_index(
/*===========*/
@@ -951,7 +917,6 @@ fts_init_index(
have cache lock */
/*******************************************************************//**
Add a newly create index in FTS cache */
-UNIV_INTERN
void
fts_add_index(
/*==========*/
@@ -961,19 +926,16 @@ fts_add_index(
/*******************************************************************//**
Drop auxiliary tables related to an FTS index
@return DB_SUCCESS or error number */
-UNIV_INTERN
dberr_t
fts_drop_index(
/*===========*/
dict_table_t* table, /*!< in: Table where indexes are dropped */
dict_index_t* index, /*!< in: Index to be dropped */
- trx_t* trx) /*!< in: Transaction for the drop */
- MY_ATTRIBUTE((nonnull));
+ trx_t* trx); /*!< in: Transaction for the drop */
/****************************************************************//**
Rename auxiliary tables for all fts index for a table
@return DB_SUCCESS or error code */
-
dberr_t
fts_rename_aux_tables(
/*==================*/
@@ -985,10 +947,42 @@ fts_rename_aux_tables(
Check indexes in the fts->indexes is also present in index cache and
table->indexes list
@return TRUE if all indexes match */
-UNIV_INTERN
ibool
fts_check_cached_index(
/*===================*/
dict_table_t* table); /*!< in: Table where indexes are dropped */
-#endif /*!< fts0fts.h */
+/** Check if the all the auxillary tables associated with FTS index are in
+consistent state. For now consistency is check only by ensuring
+index->page_no != FIL_NULL
+@param[out] base_table table has host fts index
+@param[in,out] trx trx handler */
+void
+fts_check_corrupt(
+ dict_table_t* base_table,
+ trx_t* trx);
+
+/** Fetch the document from tuple, tokenize the text data and
+insert the text data into fts auxiliary table and
+its cache. Moreover this tuple fields doesn't contain any information
+about externally stored field. This tuple contains data directly
+converted from mysql.
+@param[in] ftt FTS transaction table
+@param[in] doc_id doc id
+@param[in] tuple tuple from where data can be retrieved
+ and tuple should be arranged in table
+ schema order. */
+void
+fts_add_doc_from_tuple(
+ fts_trx_table_t*ftt,
+ doc_id_t doc_id,
+ const dtuple_t* tuple);
+
+/** Create an FTS trx.
+@param[in,out] trx InnoDB Transaction
+@return FTS transaction. */
+fts_trx_t*
+fts_trx_create(
+ trx_t* trx);
+
+#endif /*!< fts0fts.h */
diff --git a/storage/innobase/include/fts0opt.h b/storage/innobase/include/fts0opt.h
index 29bb14e2f64..c527ad8e528 100644
--- a/storage/innobase/include/fts0opt.h
+++ b/storage/innobase/include/fts0opt.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 2001, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2001, 2014, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -30,7 +30,6 @@ extern ib_wqueue_t* fts_optimize_wq;
/********************************************************************
Callback function to fetch the rows in an FTS INDEX record. */
-UNIV_INTERN
ibool
fts_optimize_index_fetch_node(
/*==========================*/
diff --git a/storage/innobase/include/fts0plugin.h b/storage/innobase/include/fts0plugin.h
new file mode 100644
index 00000000000..18ec2d6dc00
--- /dev/null
+++ b/storage/innobase/include/fts0plugin.h
@@ -0,0 +1,50 @@
+/*****************************************************************************
+
+Copyright (c) 2013, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/******************************************************************//**
+@file include/fts0plugin.h
+Full text search plugin header file
+
+Created 2013/06/04 Shaohua Wang
+***********************************************************************/
+
+#ifndef INNOBASE_FTS0PLUGIN_H
+#define INNOBASE_FTS0PLUGIN_H
+
+#include "univ.i"
+
+extern struct st_mysql_ftparser fts_default_parser;
+
+struct fts_ast_state_t;
+
+#define PARSER_INIT(parser, arg) if (parser->init) { parser->init(arg); }
+#define PARSER_DEINIT(parser, arg) if (parser->deinit) { parser->deinit(arg); }
+
+/******************************************************************//**
+fts parse query by plugin parser.
+@return 0 if parse successfully, or return non-zero. */
+int
+fts_parse_by_parser(
+/*================*/
+ ibool mode, /*!< in: query boolean mode */
+ uchar* query, /*!< in: query string */
+ ulint len, /*!< in: query string length */
+ st_mysql_ftparser* parse, /*!< in: fts plugin parser */
+ fts_ast_state_t* state); /*!< in: query parser state */
+
+#endif /* INNOBASE_FTS0PLUGIN_H */
diff --git a/storage/innobase/include/fts0priv.h b/storage/innobase/include/fts0priv.h
index 1622eaa07ae..dd724aa12d4 100644
--- a/storage/innobase/include/fts0priv.h
+++ b/storage/innobase/include/fts0priv.h
@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 2011, 2018, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2019, MariaDB Corporation.
+Copyright (c) 2017, 2019, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -115,7 +115,6 @@ component.
/******************************************************************//**
Parse an SQL string. %s is replaced with the table's id.
@return query graph */
-UNIV_INTERN
que_t*
fts_parse_sql(
/*==========*/
@@ -126,7 +125,6 @@ fts_parse_sql(
/******************************************************************//**
Evaluate a parsed SQL statement
@return DB_SUCCESS or error code */
-UNIV_INTERN
dberr_t
fts_eval_sql(
/*=========*/
@@ -138,7 +136,6 @@ fts_eval_sql(
@param[in] fts_table metadata on fulltext-indexed table
@param[out] table_name a name up to MAX_FULL_NAME_LEN
@param[in] dict_locked whether dict_sys->mutex is being held */
-UNIV_INTERN
void fts_get_table_name(const fts_table_t* fts_table, char* table_name,
bool dict_locked = false)
MY_ATTRIBUTE((nonnull));
@@ -157,7 +154,6 @@ Two indexed columns named "subject" and "content":
"$sel0, $sel1",
info/ids: sel0 -> "subject", sel1 -> "content",
@return heap-allocated WHERE string */
-UNIV_INTERN
const char*
fts_get_select_columns_str(
/*=======================*/
@@ -177,7 +173,6 @@ ID */
Fetch document (= a single row's indexed text) with the given
document id.
@return: DB_SUCCESS if fetch is successful, else error */
-UNIV_INTERN
dberr_t
fts_doc_fetch_by_doc_id(
/*====================*/
@@ -197,7 +192,6 @@ fts_doc_fetch_by_doc_id(
Callback function for fetch that stores the text of an FTS document,
converting each column to UTF-16.
@return always FALSE */
-UNIV_INTERN
ibool
fts_query_expansion_fetch_doc(
/*==========================*/
@@ -207,7 +201,6 @@ fts_query_expansion_fetch_doc(
/********************************************************************
Write out a single word's data as new entry/entries in the INDEX table.
@return DB_SUCCESS if all OK. */
-UNIV_INTERN
dberr_t
fts_write_node(
/*===========*/
@@ -217,34 +210,22 @@ fts_write_node(
fts_string_t* word, /*!< in: word in UTF-8 */
fts_node_t* node) /*!< in: node columns */
MY_ATTRIBUTE((nonnull, warn_unused_result));
-/*******************************************************************//**
-Tokenize a document. */
-UNIV_INTERN
-void
-fts_tokenize_document(
-/*==================*/
- fts_doc_t* doc, /*!< in/out: document to
- tokenize */
- fts_doc_t* result) /*!< out: if provided, save
- result tokens here */
- MY_ATTRIBUTE((nonnull(1)));
-/*******************************************************************//**
-Continue to tokenize a document. */
-UNIV_INTERN
-void
-fts_tokenize_document_next(
-/*=======================*/
- fts_doc_t* doc, /*!< in/out: document to
- tokenize */
- ulint add_pos, /*!< in: add this position to all
- tokens from this tokenization */
- fts_doc_t* result) /*!< out: if provided, save
- result tokens here */
- MY_ATTRIBUTE((nonnull(1)));
+/** Check if a fts token is a stopword or less than fts_min_token_size
+or greater than fts_max_token_size.
+@param[in] token token string
+@param[in] stopwords stopwords rb tree
+@param[in] cs token charset
+@retval true if it is not stopword and length in range
+@retval false if it is stopword or length not in range */
+bool
+fts_check_token(
+ const fts_string_t* token,
+ const ib_rbt_t* stopwords,
+ const CHARSET_INFO* cs);
+
/******************************************************************//**
Initialize a document. */
-UNIV_INTERN
void
fts_doc_init(
/*=========*/
@@ -255,7 +236,6 @@ fts_doc_init(
Do a binary search for a doc id in the array
@return +ve index if found -ve index where it should be
inserted if not found */
-UNIV_INTERN
int
fts_bsearch(
/*========*/
@@ -266,7 +246,6 @@ fts_bsearch(
MY_ATTRIBUTE((nonnull, warn_unused_result));
/******************************************************************//**
Free document. */
-UNIV_INTERN
void
fts_doc_free(
/*=========*/
@@ -274,7 +253,6 @@ fts_doc_free(
MY_ATTRIBUTE((nonnull));
/******************************************************************//**
Free fts_optimizer_word_t instanace.*/
-UNIV_INTERN
void
fts_word_free(
/*==========*/
@@ -283,7 +261,6 @@ fts_word_free(
/******************************************************************//**
Read the rows from the FTS inde
@return DB_SUCCESS or error code */
-UNIV_INTERN
dberr_t
fts_index_fetch_nodes(
/*==================*/
@@ -295,17 +272,6 @@ fts_index_fetch_nodes(
fts_fetch_t* fetch) /*!< in: fetch callback.*/
MY_ATTRIBUTE((nonnull));
/******************************************************************//**
-Create a fts_optimizer_word_t instance.
-@return new instance */
-UNIV_INTERN
-fts_word_t*
-fts_word_init(
-/*==========*/
- fts_word_t* word, /*!< in: word to initialize */
- byte* utf8, /*!< in: UTF-8 string */
- ulint len) /*!< in: length of string in bytes */
- MY_ATTRIBUTE((nonnull));
-/******************************************************************//**
Compare two fts_trx_table_t instances, we actually compare the
table id's here.
@return < 0 if n1 < n2, 0 if n1 == n2, > 0 if n1 > n2 */
@@ -329,7 +295,6 @@ fts_trx_table_id_cmp(
/******************************************************************//**
Commit a transaction.
@return DB_SUCCESS if all OK */
-UNIV_INTERN
dberr_t
fts_sql_commit(
/*===========*/
@@ -338,7 +303,6 @@ fts_sql_commit(
/******************************************************************//**
Rollback a transaction.
@return DB_SUCCESS if all OK */
-UNIV_INTERN
dberr_t
fts_sql_rollback(
/*=============*/
@@ -348,7 +312,6 @@ fts_sql_rollback(
Parse an SQL string. %s is replaced with the table's id. Don't acquire
the dict mutex
@return query graph */
-UNIV_INTERN
que_t*
fts_parse_sql_no_dict_lock(
/*=======================*/
@@ -360,7 +323,6 @@ fts_parse_sql_no_dict_lock(
Get value from config table. The caller must ensure that enough
space is allocated for value to hold the column contents
@return DB_SUCCESS or error code */
-UNIV_INTERN
dberr_t
fts_config_get_value(
/*=================*/
@@ -376,7 +338,6 @@ Get value specific to an FTS index from the config table. The caller
must ensure that enough space is allocated for value to hold the
column contents.
@return DB_SUCCESS or error code */
-UNIV_INTERN
dberr_t
fts_config_get_index_value(
/*=======================*/
@@ -390,7 +351,6 @@ fts_config_get_index_value(
/******************************************************************//**
Set the value in the config table for name.
@return DB_SUCCESS or error code */
-UNIV_INTERN
dberr_t
fts_config_set_value(
/*=================*/
@@ -404,7 +364,6 @@ fts_config_set_value(
/****************************************************************//**
Set an ulint value in the config table.
@return DB_SUCCESS if all OK else error code */
-UNIV_INTERN
dberr_t
fts_config_set_ulint(
/*=================*/
@@ -416,7 +375,6 @@ fts_config_set_ulint(
/******************************************************************//**
Set the value specific to an FTS index in the config table.
@return DB_SUCCESS or error code */
-UNIV_INTERN
dberr_t
fts_config_set_index_value(
/*=======================*/
@@ -427,36 +385,11 @@ fts_config_set_index_value(
fts_string_t* value) /*!< out: value read from
config table */
MY_ATTRIBUTE((nonnull, warn_unused_result));
-/******************************************************************//**
-Increment the value in the config table for column name.
-@return DB_SUCCESS or error code */
-UNIV_INTERN
-dberr_t
-fts_config_increment_value(
-/*=======================*/
- trx_t* trx, /*!< transaction */
- fts_table_t* fts_table, /*!< in: the indexed FTS table */
- const char* name, /*!< in: increment config value
- for this parameter name */
- ulint delta) /*!< in: increment by this much */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-/******************************************************************//**
-Increment the per index value in the config table for column name.
-@return DB_SUCCESS or error code */
-UNIV_INTERN
-dberr_t
-fts_config_increment_index_value(
-/*=============================*/
- trx_t* trx, /*!< transaction */
- dict_index_t* index, /*!< in: FTS index */
- const char* name, /*!< in: increment config value
- for this parameter name */
- ulint delta) /*!< in: increment by this much */
- MY_ATTRIBUTE((nonnull));
+
+#ifdef FTS_OPTIMIZE_DEBUG
/******************************************************************//**
Get an ulint value from the config table.
@return DB_SUCCESS or error code */
-UNIV_INTERN
dberr_t
fts_config_get_index_ulint(
/*=======================*/
@@ -465,10 +398,11 @@ fts_config_get_index_ulint(
const char* name, /*!< in: param name */
ulint* int_value) /*!< out: value */
MY_ATTRIBUTE((nonnull, warn_unused_result));
+#endif /* FTS_OPTIMIZE_DEBUG */
+
/******************************************************************//**
Set an ulint value int the config table.
@return DB_SUCCESS or error code */
-UNIV_INTERN
dberr_t
fts_config_set_index_ulint(
/*=======================*/
@@ -480,7 +414,6 @@ fts_config_set_index_ulint(
/******************************************************************//**
Get an ulint value from the config table.
@return DB_SUCCESS or error code */
-UNIV_INTERN
dberr_t
fts_config_get_ulint(
/*=================*/
@@ -492,7 +425,6 @@ fts_config_get_ulint(
/******************************************************************//**
Search cache for word.
@return the word node vector if found else NULL */
-UNIV_INTERN
const ib_vector_t*
fts_cache_find_word(
/*================*/
@@ -501,43 +433,18 @@ fts_cache_find_word(
const fts_string_t*
text) /*!< in: word to search for */
MY_ATTRIBUTE((nonnull, warn_unused_result));
-/******************************************************************//**
-Check cache for deleted doc id.
-@return TRUE if deleted */
-UNIV_INTERN
-ibool
-fts_cache_is_deleted_doc_id(
-/*========================*/
- const fts_cache_t*
- cache, /*!< in: cache ito search */
- doc_id_t doc_id) /*!< in: doc id to search for */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
+
/******************************************************************//**
Append deleted doc ids to vector and sort the vector. */
-UNIV_INTERN
void
fts_cache_append_deleted_doc_ids(
/*=============================*/
const fts_cache_t*
cache, /*!< in: cache to use */
ib_vector_t* vector); /*!< in: append to this vector */
-#ifdef FTS_DOC_STATS_DEBUG
-/******************************************************************//**
-Get the total number of words in the FTS for a particular FTS index.
-@return DB_SUCCESS or error code */
-UNIV_INTERN
-dberr_t
-fts_get_total_word_count(
-/*=====================*/
- trx_t* trx, /*!< in: transaction */
- dict_index_t* index, /*!< in: for this index */
- ulint* total) /*!< out: total words */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-#endif
/******************************************************************//**
Search the index specific cache for a particular FTS index.
@return the index specific cache else NULL */
-UNIV_INTERN
fts_index_cache_t*
fts_find_index_cache(
/*================*/
@@ -549,7 +456,7 @@ fts_find_index_cache(
/******************************************************************//**
Write the table id to the given buffer (including final NUL). Buffer must be
at least FTS_AUX_MIN_TABLE_ID_LENGTH bytes long.
-@return number of bytes written */
+@return number of bytes written */
UNIV_INLINE
int
fts_write_object_id(
@@ -573,7 +480,6 @@ fts_read_object_id(
/******************************************************************//**
Get the table id.
@return number of bytes written */
-UNIV_INTERN
int
fts_get_table_id(
/*=============*/
@@ -587,11 +493,10 @@ fts_get_table_id(
@param[in] fts_table metadata on fulltext-indexed table
@param[in] dict_locked whether dict_sys->mutex is being held
@return the prefix, must be freed with ut_free() */
-UNIV_INTERN char* fts_get_table_name_prefix(const fts_table_t* fts_table)
+char* fts_get_table_name_prefix(const fts_table_t* fts_table)
MY_ATTRIBUTE((nonnull, malloc, warn_unused_result));
/******************************************************************//**
Add node positions. */
-UNIV_INTERN
void
fts_cache_node_add_positions(
/*=========================*/
@@ -604,16 +509,13 @@ fts_cache_node_add_positions(
/******************************************************************//**
Create the config table name for retrieving index specific value.
@return index config parameter name */
-UNIV_INTERN
char*
fts_config_create_index_param_name(
/*===============================*/
- const char* param, /*!< in: base name of param */
- const dict_index_t* index) /*!< in: index for config */
+ const char* param, /*!< in: base name of param */
+ const dict_index_t* index) /*!< in: index for config */
MY_ATTRIBUTE((nonnull, malloc, warn_unused_result));
-#ifndef UNIV_NONINL
#include "fts0priv.ic"
-#endif
#endif /* INNOBASE_FTS0PRIV_H */
diff --git a/storage/innobase/include/fts0priv.ic b/storage/innobase/include/fts0priv.ic
index 6d52edc75a1..ed737e520d6 100644
--- a/storage/innobase/include/fts0priv.ic
+++ b/storage/innobase/include/fts0priv.ic
@@ -26,7 +26,7 @@ Created 2011/11/12 Sunny Bains
/******************************************************************//**
Write the table id to the given buffer (including final NUL). Buffer must be
at least FTS_AUX_MIN_TABLE_ID_LENGTH bytes long.
-@return number of bytes written */
+@return number of bytes written */
UNIV_INLINE
int
fts_write_object_id(
@@ -46,36 +46,31 @@ fts_write_object_id(
/* Use this to construct old(5.6.14 and 5.7.3) windows
ambiguous aux table names */
DBUG_EXECUTE_IF("innodb_test_wrong_fts_aux_table_name",
- return(sprintf(str, "%016llu", id)););
+ return(sprintf(str, "%016llu", (ulonglong) id)););
#else /* _WIN32 */
/* Use this to construct old(5.6.14 and 5.7.3) windows
ambiguous aux table names */
DBUG_EXECUTE_IF("innodb_test_wrong_windows_fts_aux_table_name",
- return(sprintf(str, "%016" PRIu64, id)););
+ return(sprintf(str, "%016llu", (ulonglong) id)););
DBUG_EXECUTE_IF("innodb_test_wrong_fts_aux_table_name",
- return(sprintf(str, UINT64PFx, id)););
+ return(sprintf(str, "%016llx", (ulonglong) id)););
#endif /* _WIN32 */
/* As above, but this is only for those tables failing to rename. */
if (!hex_format) {
-#ifdef _WIN32
- // FIXME: Use ut_snprintf(), so does following one.
- return(sprintf(str, "%016llu", id));
-#else /* _WIN32 */
- return(sprintf(str, "%016" PRIu64, id));
-#endif /* _WIN32 */
+ return(sprintf(str, "%016llu", (ulonglong) id));
}
- return(sprintf(str, UINT64PFx, id));
+ return(sprintf(str, "%016llx", (ulonglong) id));
}
/******************************************************************//**
Read the table id from the string generated by fts_write_object_id().
-@return TRUE if parse successful */
+@return TRUE if parse successful */
UNIV_INLINE
ibool
fts_read_object_id(
@@ -91,7 +86,7 @@ fts_read_object_id(
/******************************************************************//**
Compare two fts_trx_table_t instances.
-@return < 0 if n1 < n2, 0 if n1 == n2, > 0 if n1 > n2 */
+@return < 0 if n1 < n2, 0 if n1 == n2, > 0 if n1 > n2 */
UNIV_INLINE
int
fts_trx_table_cmp(
@@ -99,8 +94,11 @@ fts_trx_table_cmp(
const void* p1, /*!< in: id1 */
const void* p2) /*!< in: id2 */
{
- const dict_table_t* table1 = (*(const fts_trx_table_t**) p1)->table;
- const dict_table_t* table2 = (*(const fts_trx_table_t**) p2)->table;
+ const dict_table_t* table1
+ = (*static_cast<const fts_trx_table_t* const*>(p1))->table;
+
+ const dict_table_t* table2
+ = (*static_cast<const fts_trx_table_t* const*>(p2))->table;
return((table1->id > table2->id)
? 1
@@ -119,8 +117,9 @@ fts_trx_table_id_cmp(
const void* p1, /*!< in: id1 */
const void* p2) /*!< in: id2 */
{
- const ullint* table_id = (const ullint*) p1;
- const dict_table_t* table2 = (*(const fts_trx_table_t**) p2)->table;
+ const uintmax_t* table_id = static_cast<const uintmax_t*>(p1);
+ const dict_table_t* table2
+ = (*static_cast<const fts_trx_table_t* const*>(p2))->table;
return((*table_id > table2->id)
? 1
diff --git a/storage/innobase/include/fts0tlex.h b/storage/innobase/include/fts0tlex.h
index 038cbb8858d..89655ca13d4 100644
--- a/storage/innobase/include/fts0tlex.h
+++ b/storage/innobase/include/fts0tlex.h
@@ -694,7 +694,7 @@ extern int yylex (yyscan_t yyscanner);
#undef yyTABLES_NAME
#endif
-#line 68 "fts0tlex.l"
+#line 69 "fts0tlex.l"
#line 701 "../include/fts0tlex.h"
diff --git a/storage/innobase/include/fts0tokenize.h b/storage/innobase/include/fts0tokenize.h
new file mode 100644
index 00000000000..2c4b2418ecb
--- /dev/null
+++ b/storage/innobase/include/fts0tokenize.h
@@ -0,0 +1,188 @@
+/*****************************************************************************
+
+Copyright (c) 2014, 2015, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/******************************************************************//**
+@file fts/fts0tokenize.cc
+Full Text Search plugin tokenizer refer to MyISAM
+
+Created 2014/11/17 Shaohua Wang
+***********************************************************************/
+
+#include "ft_global.h"
+#include "mysql/plugin_ftparser.h"
+#include "m_ctype.h"
+
+/* Macros and structs below are from ftdefs.h in MyISAM */
+/** Check a char is true word */
+#define true_word_char(c, ch) ((c) & (_MY_U | _MY_L | _MY_NMR) || (ch) == '_')
+
+/** Check if a char is misc word */
+#define misc_word_char(X) 0
+
+/** Boolean search syntax */
+static const char* fts_boolean_syntax = DEFAULT_FTB_SYNTAX;
+
+#define FTB_YES (fts_boolean_syntax[0])
+#define FTB_EGAL (fts_boolean_syntax[1])
+#define FTB_NO (fts_boolean_syntax[2])
+#define FTB_INC (fts_boolean_syntax[3])
+#define FTB_DEC (fts_boolean_syntax[4])
+#define FTB_LBR (fts_boolean_syntax[5])
+#define FTB_RBR (fts_boolean_syntax[6])
+#define FTB_NEG (fts_boolean_syntax[7])
+#define FTB_TRUNC (fts_boolean_syntax[8])
+#define FTB_LQUOT (fts_boolean_syntax[10])
+#define FTB_RQUOT (fts_boolean_syntax[11])
+
+/** FTS query token */
+typedef struct st_ft_word {
+ uchar* pos; /*!< word start pointer */
+ uint len; /*!< word len */
+ double weight; /*!< word weight, unused in innodb */
+} FT_WORD;
+
+/** Tokenizer for ngram referring to ft_get_word(ft_parser.c) in MyISAM.
+Differences: a. code format changed; b. stopword processing removed.
+@param[in] cs charset
+@param[in,out] start doc start pointer
+@param[in,out] end doc end pointer
+@param[in,out] word token
+@param[in,out] info token info
+@retval 0 eof
+@retval 1 word found
+@retval 2 left bracket
+@retval 3 right bracket
+@retval 4 stopword found */
+inline
+uchar
+fts_get_word(
+ const CHARSET_INFO* cs,
+ uchar** start,
+ uchar* end,
+ FT_WORD* word,
+ MYSQL_FTPARSER_BOOLEAN_INFO*
+ info)
+{
+ uchar* doc = *start;
+ int ctype;
+ uint mwc;
+ uint length;
+ int mbl;
+
+ info->yesno = (FTB_YES ==' ') ? 1 : (info->quot != 0);
+ info->weight_adjust = info->wasign = 0;
+ info->type = FT_TOKEN_EOF;
+
+ while (doc < end) {
+ for (; doc < end;
+ doc += (mbl > 0 ? mbl : (mbl < 0 ? -mbl : 1))) {
+ mbl = cs->cset->ctype(cs, &ctype, doc, end);
+
+ if (true_word_char(ctype, *doc)) {
+ break;
+ }
+
+ if (*doc == FTB_RQUOT && info->quot) {
+ *start = doc + 1;
+ info->type = FT_TOKEN_RIGHT_PAREN;
+
+ return(info->type);
+ }
+
+ if (!info->quot) {
+ if (*doc == FTB_LBR
+ || *doc == FTB_RBR
+ || *doc == FTB_LQUOT) {
+ /* param->prev=' '; */
+ *start = doc + 1;
+ if (*doc == FTB_LQUOT) {
+ info->quot = (char*)1;
+ }
+
+ info->type = (*doc == FTB_RBR ?
+ FT_TOKEN_RIGHT_PAREN :
+ FT_TOKEN_LEFT_PAREN);
+
+ return(info->type);
+ }
+
+ if (info->prev == ' ') {
+ if (*doc == FTB_YES) {
+ info->yesno = +1;
+ continue;
+ } else if (*doc == FTB_EGAL) {
+ info->yesno = 0;
+ continue;
+ } else if (*doc == FTB_NO) {
+ info->yesno = -1;
+ continue;
+ } else if (*doc == FTB_INC) {
+ info->weight_adjust++;
+ continue;
+ } else if (*doc == FTB_DEC) {
+ info->weight_adjust--;
+ continue;
+ } else if (*doc == FTB_NEG) {
+ info->wasign = !info->wasign;
+ continue;
+ }
+ }
+ }
+
+ info->prev = *doc;
+ info->yesno = (FTB_YES == ' ') ? 1 : (info->quot != 0);
+ info->weight_adjust = info->wasign = 0;
+ }
+
+ mwc = length = 0;
+ for (word->pos = doc;
+ doc < end;
+ length++, doc += (mbl > 0 ? mbl : (mbl < 0 ? -mbl : 1))) {
+ mbl = cs->cset->ctype(cs, &ctype, doc, end);
+
+ if (true_word_char(ctype, *doc)) {
+ mwc = 0;
+ } else if (!misc_word_char(*doc) || mwc) {
+ break;
+ } else {
+ mwc++;
+ }
+ }
+
+ /* Be sure *prev is true_word_char. */
+ info->prev = 'A';
+ word->len = (uint)(doc-word->pos) - mwc;
+
+ if ((info->trunc = (doc < end && *doc == FTB_TRUNC))) {
+ doc++;
+ }
+
+ /* We don't check stopword here. */
+ *start = doc;
+ info->type = FT_TOKEN_WORD;
+
+ return(info->type);
+ }
+
+ if (info->quot) {
+ *start = doc;
+ info->type = FT_TOKEN_RIGHT_PAREN;
+ }
+
+ return(info->type);
+}
diff --git a/storage/innobase/include/fts0types.h b/storage/innobase/include/fts0types.h
index 3cb01a92df0..a08a60b9e95 100644
--- a/storage/innobase/include/fts0types.h
+++ b/storage/innobase/include/fts0types.h
@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 2007, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2019, MariaDB Corporation.
+Copyright (c) 2017, 2019, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -27,16 +27,16 @@ Created 2007-03-27 Sunny Bains
#ifndef INNOBASE_FTS0TYPES_H
#define INNOBASE_FTS0TYPES_H
+#include "fts0fts.h"
+#include "fut0fut.h"
+#include "pars0pars.h"
#include "que0types.h"
#include "ut0byte.h"
-#include "fut0fut.h"
#include "ut0rbt.h"
-#include "fts0fts.h"
/** Types used within FTS. */
struct fts_que_t;
struct fts_node_t;
-struct fts_utf8_str_t;
/** Callbacks used within FTS. */
typedef pars_user_func_cb_t fts_sql_callback;
@@ -274,6 +274,10 @@ struct fts_doc_t {
same lifespan, most notably
the vector of token positions */
CHARSET_INFO* charset; /*!< Document's charset info */
+
+ st_mysql_ftparser* parser; /*!< fts plugin parser */
+
+ ib_rbt_t* stopwords; /*!< Stopwords */
};
/** A token and its positions within a document. */
@@ -289,33 +293,6 @@ struct fts_token_t {
extern const fts_index_selector_t fts_index_selector[];
/******************************************************************//**
-Compare two UTF-8 strings. */
-UNIV_INLINE
-int
-fts_utf8_string_cmp(
-/*================*/
- /*!< out:
- < 0 if n1 < n2,
- 0 if n1 == n2,
- > 0 if n1 > n2 */
- const void* p1, /*!< in: key */
- const void* p2); /*!< in: node */
-
-/******************************************************************//**
-Compare two UTF-8 strings, and return match (0) if
-passed in "key" value equals or is the prefix of the "node" value. */
-UNIV_INLINE
-int
-fts_utf8_string_cmp_prefix(
-/*=======================*/
- /*!< out:
- < 0 if n1 < n2,
- 0 if n1 == n2,
- > 0 if n1 > n2 */
- const void* p1, /*!< in: key */
- const void* p2); /*!< in: node */
-
-/******************************************************************//**
Compare two fts_trx_row_t instances doc_ids. */
UNIV_INLINE
int
@@ -365,11 +342,11 @@ fts_decode_vlc(
incremented by the number of bytes decoded */
/******************************************************************//**
-Duplicate an UTF-8 string. */
+Duplicate a string. */
UNIV_INLINE
void
-fts_utf8_string_dup(
-/*================*/
+fts_string_dup(
+/*===========*/
/*!< out:
< 0 if n1 < n2,
0 if n1 == n2,
@@ -401,43 +378,6 @@ fts_encode_int(
enough space */
/******************************************************************//**
-Decode a UTF-8 character.
-
-http://www.unicode.org/versions/Unicode4.0.0/ch03.pdf:
-
- Scalar Value 1st Byte 2nd Byte 3rd Byte 4th Byte
-00000000 0xxxxxxx 0xxxxxxx
-00000yyy yyxxxxxx 110yyyyy 10xxxxxx
-zzzzyyyy yyxxxxxx 1110zzzz 10yyyyyy 10xxxxxx
-000uuuzz zzzzyyyy yyxxxxxx 11110uuu 10zzzzzz 10yyyyyy 10xxxxxx
-
-This function decodes UTF-8 sequences up to 6 bytes (31 bits).
-
-On error *ptr will point to the first byte that was not correctly
-decoded. This will hopefully help in resyncing the input. */
-UNIV_INLINE
-ulint
-fts_utf8_decode(
-/*============*/
- /*!< out: UTF8_ERROR if *ptr
- did not point to a valid
- UTF-8 sequence, or the
- Unicode code point. */
- const byte** ptr); /*!< in/out: pointer to
- UTF-8 string. The
- pointer is advanced to
- the start of the next
- character. */
-
-/******************************************************************//**
-Lowercase an UTF-8 string. */
-UNIV_INLINE
-void
-fts_utf8_tolower(
-/*=============*/
- fts_string_t* str); /*!< in: string */
-
-/******************************************************************//**
Get the selected FTS aux INDEX suffix. */
UNIV_INLINE
const char*
@@ -445,38 +385,19 @@ fts_get_suffix(
/*===========*/
ulint selected); /*!< in: selected index */
-/********************************************************************
-Get the number of index selectors. */
-UNIV_INLINE
-ulint
-fts_get_n_selectors(void);
-/*=====================*/
-
-/******************************************************************//**
-Select the FTS auxiliary index for the given string.
+/** Select the FTS auxiliary index for the given character.
+@param[in] cs charset
+@param[in] str string
+@param[in] len string length in bytes
@return the index to use for the string */
UNIV_INLINE
ulint
fts_select_index(
-/*=============*/
- const CHARSET_INFO* cs, /*!< Charset */
- const byte* str, /*!< in: word string */
- ulint len); /*!< in: string length */
-
-/********************************************************************
-Select the next FTS auxiliary index for the given character.
-@return the next index to use for character */
-UNIV_INLINE
-ulint
-fts_select_next_index(
-/*==================*/
- const CHARSET_INFO* cs, /*!< Charset */
- const byte* str, /*!< in: string */
- ulint len); /*!< in: string length */
+ const CHARSET_INFO* cs,
+ const byte* str,
+ ulint len);
-#ifndef UNIV_NONINL
#include "fts0types.ic"
#include "fts0vlc.ic"
-#endif
#endif /* INNOBASE_FTS0TYPES_H */
diff --git a/storage/innobase/include/fts0types.ic b/storage/innobase/include/fts0types.ic
index 5e29cf6d8c5..e388d6257f6 100644
--- a/storage/innobase/include/fts0types.ic
+++ b/storage/innobase/include/fts0types.ic
@@ -1,6 +1,7 @@
/*****************************************************************************
-Copyright (c) 2007, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2007, 2015, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2017, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -26,23 +27,13 @@ Created 2007-03-27 Sunny Bains
#ifndef INNOBASE_FTS0TYPES_IC
#define INNOBASE_FTS0TYPES_IC
-#include <ctype.h>
-
-#include "rem0cmp.h"
-#include "ha_prototypes.h"
-
-extern const ulint UTF8_ERROR;
-
-/* Determine if a UTF-8 continuation byte is valid. */
-#define fts_utf8_is_valid(b) (((b) & 0xC0) == 0x80)
-
/******************************************************************//**
-Duplicate an UTF-8 string.
+Duplicate a string.
@return < 0 if n1 < n2, 0 if n1 == n2, > 0 if n1 > n2 */
UNIV_INLINE
void
-fts_utf8_string_dup(
-/*================*/
+fts_string_dup(
+/*===========*/
fts_string_t* dst, /*!< in: dup to here */
const fts_string_t* src, /*!< in: src string */
mem_heap_t* heap) /*!< in: heap to use */
@@ -103,183 +94,6 @@ fts_update_doc_id_cmp(
return((int)(up1->doc_id - up2->doc_id));
}
-
-/******************************************************************//**
-Lowercase an UTF-8 string. */
-UNIV_INLINE
-void
-fts_utf8_tolower(
-/*=============*/
- fts_string_t* str) /*!< in: string */
-{
- innobase_casedn_str((char*) str->f_str);
-}
-
-/******************************************************************//**
-Compare two UTF-8 strings.
-@return < 0 if n1 < n2, 0 if n1 == n2, > 0 if n1 > n2 */
-UNIV_INLINE
-int
-fts_utf8_string_cmp(
-/*================*/
- const void* p1, /*!< in: key */
- const void* p2) /*!< in: node */
-{
- const fts_string_t* s1 = (const fts_string_t*) p1;
- const fts_string_t* s2 = (const fts_string_t*) p2;
-
- return(cmp_data_data_slow_varchar(
- s1->f_str, s1->f_len, s2->f_str, s2->f_len));
-}
-
-/******************************************************************//**
-Compare two UTF-8 strings, and return match (0) if
-passed in "key" value equals or is the prefix of the "node" value.
-@return < 0 if n1 < n2, 0 if n1 == n2, > 0 if n1 > n2 */
-UNIV_INLINE
-int
-fts_utf8_string_cmp_prefix(
-/*=======================*/
- const void* p1, /*!< in: key */
- const void* p2) /*!< in: node */
-{
- int result;
- ulint len;
-
- const fts_string_t* s1 = (const fts_string_t*) p1;
- const fts_string_t* s2 = (const fts_string_t*) p2;
-
- len = ut_min(s1->f_len, s2->f_len);
-
- result = cmp_data_data_slow_varchar(s1->f_str, len, s2->f_str, len);
-
- if (result) {
- return(result);
- }
-
- if (s1->f_len > s2->f_len) {
- return(1);
- }
-
- return(0);
-}
-
-/******************************************************************//**
-Decode a UTF-8 character.
-
-http://www.unicode.org/versions/Unicode4.0.0/ch03.pdf:
-
- Scalar Value 1st Byte 2nd Byte 3rd Byte 4th Byte
-00000000 0xxxxxxx 0xxxxxxx
-00000yyy yyxxxxxx 110yyyyy 10xxxxxx
-zzzzyyyy yyxxxxxx 1110zzzz 10yyyyyy 10xxxxxx
-000uuuzz zzzzyyyy yyxxxxxx 11110uuu 10zzzzzz 10yyyyyy 10xxxxxx
-
-This function decodes UTF-8 sequences up to 6 bytes (31 bits).
-
-On error *ptr will point to the first byte that was not correctly
-decoded. This will hopefully help in resyncing the input.
-@return UTF8_ERROR if *ptr did not point to a valid
-UTF-8 sequence, or the Unicode code point. */
-UNIV_INLINE
-ulint
-fts_utf8_decode(
-/*============*/
- const byte** ptr) /*!< in/out: pointer to
- UTF-8 string. The
- pointer is advanced to
- the start of the next
- character. */
-{
- const byte* p = *ptr;
- ulint ch = *p++;
-#ifdef UNIV_DEBUG
- ulint min_ch;
-#endif /* UNIV_DEBUG */
-
- if (UNIV_LIKELY(ch < 0x80)) {
- /* 0xxxxxxx */
- } else if (UNIV_UNLIKELY(ch < 0xC0)) {
- /* A continuation byte cannot start a code. */
- goto err_exit;
- } else if (ch < 0xE0) {
- /* 110yyyyy 10xxxxxx */
- ch &= 0x1F;
- ut_d(min_ch = 0x80);
- goto get1;
- } else if (ch < 0xF0) {
- /* 1110zzzz 10yyyyyy 10xxxxxx */
- ch &= 0x0F;
- ut_d(min_ch = 0x800);
- goto get2;
- } else if (ch < 0xF8) {
- /* 11110uuu 10zzzzzz 10yyyyyy 10xxxxxx */
- ch &= 0x07;
- ut_d(min_ch = 0x10000);
- goto get3;
- } else if (ch < 0xFC) {
- /* 111110tt 10uuuuuu 10zzzzzz 10yyyyyy 10xxxxxx */
- ch &= 0x03;
- ut_d(min_ch = 0x200000);
- goto get4;
- } else if (ch < 0xFE) {
- /* 1111110s 10tttttt 10uuuuuu 10zzzzzz 10yyyyyy 10xxxxxx */
- ut_d(min_ch = 0x4000000);
- if (!fts_utf8_is_valid(*p)) {
- goto err_exit;
- }
- ch <<= 6;
- ch |= (*p++) & 0x3F;
-get4:
- if (!fts_utf8_is_valid(*p)) {
- goto err_exit;
- }
- ch <<= 6;
- ch |= (*p++) & 0x3F;
-get3:
- if (!fts_utf8_is_valid(*p)) {
- goto err_exit;
- }
- ch <<= 6;
- ch |= (*p++) & 0x3F;
-get2:
- if (!fts_utf8_is_valid(*p)) {
- goto err_exit;
- }
- ch <<= 6;
- ch |= (*p++) & 0x3F;
-get1:
- if (!fts_utf8_is_valid(*p)) {
- goto err_exit;
- }
- ch <<= 6;
- ch |= (*p++) & 0x3F;
-
- /* The following is needed in the 6-byte case
- when ulint is wider than 32 bits. */
- ch &= 0xFFFFFFFF;
-
- /* The code positions U+D800 to U+DFFF (UTF-16 surrogate pairs)
- and U+FFFE and U+FFFF cannot occur in valid UTF-8. */
-
- if ( (ch >= 0xD800 && ch <= 0xDFFF)
-#ifdef UNIV_DEBUG
- || ch < min_ch
-#endif /* UNIV_DEBUG */
- || ch == 0xFFFE || ch == 0xFFFF) {
-
- ch = UTF8_ERROR;
- }
- } else {
-err_exit:
- ch = UTF8_ERROR;
- }
-
- *ptr = p;
-
- return(ch);
-}
-
/******************************************************************//**
Get the first character's code position for FTS index partition */
extern
@@ -290,16 +104,38 @@ innobase_strnxfrm(
const uchar* p2, /*!< in: string */
const ulint len2); /*!< in: string length */
-/******************************************************************//**
-Select the FTS auxiliary index for the given character.
-@return the index to use for the string */
+/** Check if fts index charset is cjk
+@param[in] cs charset
+@retval true if the charset is cjk
+@retval false if not. */
+inline bool fts_is_charset_cjk(const CHARSET_INFO* cs)
+{
+ switch (cs->number) {
+ case 24: /* my_charset_gb2312_chinese_ci */
+ case 28: /* my_charset_gbk_chinese_ci */
+ case 1: /* my_charset_big5_chinese_ci */
+ case 12: /* my_charset_ujis_japanese_ci */
+ case 13: /* my_charset_sjis_japanese_ci */
+ case 95: /* my_charset_cp932_japanese_ci */
+ case 97: /* my_charset_eucjpms_japanese_ci */
+ case 19: /* my_charset_euckr_korean_ci */
+ return true;
+ default:
+ return false;
+ }
+}
+
+/** Select the FTS auxiliary index for the given character by range.
+@param[in] cs charset
+@param[in] str string
+@param[in] len string length
+@retval the index to use for the string */
UNIV_INLINE
ulint
-fts_select_index(
-/*=============*/
- const CHARSET_INFO* cs, /*!< in: Charset */
- const byte* str, /*!< in: string */
- ulint len) /*!< in: string length */
+fts_select_index_by_range(
+ const CHARSET_INFO* cs,
+ const byte* str,
+ ulint len)
{
ulint selected = 0;
ulint value = innobase_strnxfrm(cs, str, len);
@@ -323,37 +159,64 @@ fts_select_index(
return(selected - 1);
}
-/******************************************************************//**
-Select the next FTS auxiliary index for the given character.
-@return the next index to use for character */
+/** Select the FTS auxiliary index for the given character by hash.
+@param[in] cs charset
+@param[in] str string
+@param[in] len string length
+@retval the index to use for the string */
UNIV_INLINE
ulint
-fts_select_next_index(
-/*==================*/
- const CHARSET_INFO* cs, /*!< in: Charset */
- const byte* str, /*!< in: string */
- ulint len) /*!< in: string length */
+fts_select_index_by_hash(
+ const CHARSET_INFO* cs,
+ const byte* str,
+ ulint len)
{
- ulint selected = 0;
- ulint value = innobase_strnxfrm(cs, str, len);
+ int char_len;
+ ulong nr1 = 1;
+ ulong nr2 = 4;
- while (fts_index_selector[selected].value != 0) {
+ ut_ad(!(str == NULL && len > 0));
- if (fts_index_selector[selected].value == value) {
+ if (str == NULL || len == 0) {
+ return 0;
+ }
- return(selected + 1);
+ /* Get the first char */
+ /* JAN: TODO: MySQL 5.7 had
+ char_len = my_mbcharlen_ptr(cs, reinterpret_cast<const char*>(str),
+ reinterpret_cast<const char*>(str + len));
+ */
+ char_len = cs->cset->charlen(cs, str, str+len);
- } else if (fts_index_selector[selected].value > value) {
+ ut_ad(static_cast<ulint>(char_len) <= len);
- return(selected);
- }
+ /* Get collation hash code */
+ cs->coll->hash_sort(cs, str, char_len, &nr1, &nr2);
- ++selected;
- }
+ return(nr1 % FTS_NUM_AUX_INDEX);
+}
- ut_ad(selected > 0);
+/** Select the FTS auxiliary index for the given character.
+@param[in] cs charset
+@param[in] str string
+@param[in] len string length in bytes
+@retval the index to use for the string */
+UNIV_INLINE
+ulint
+fts_select_index(
+ const CHARSET_INFO* cs,
+ const byte* str,
+ ulint len)
+{
+ ulint selected;
- return((ulint) selected);
+ if (fts_is_charset_cjk(cs)) {
+ selected = fts_select_index_by_hash(cs, str, len);
+ } else {
+ selected = fts_select_index_by_range(cs, str, len);
+ }
+
+ return(selected);
}
/******************************************************************//**
@@ -367,22 +230,4 @@ fts_get_suffix(
return(fts_index_selector[selected].suffix);
}
-/******************************************************************//**
-Get the number of index selectors.
-@return The number of selectors */
-UNIV_INLINE
-ulint
-fts_get_n_selectors(void)
-/*=====================*/
-{
- ulint i = 0;
-
- // FIXME: This is a hack
- while (fts_index_selector[i].value != 0) {
- ++i;
- }
-
- return(i);
-}
-
#endif /* INNOBASE_FTS0TYPES_IC */
diff --git a/storage/innobase/include/fut0fut.h b/storage/innobase/include/fut0fut.h
index 4f7b9af2e5e..3c3f118bd68 100644
--- a/storage/innobase/include/fut0fut.h
+++ b/storage/innobase/include/fut0fut.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1995, 2009, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -27,29 +27,29 @@ Created 12/13/1995 Heikki Tuuri
#ifndef fut0fut_h
#define fut0fut_h
-#include "univ.i"
-
#include "fil0fil.h"
#include "mtr0mtr.h"
-/********************************************************************//**
-Gets a pointer to a file address and latches the page.
-@return pointer to a byte in a frame; the file page in the frame is
+/** Gets a pointer to a file address and latches the page.
+@param[in] space space id
+@param[in] page_size page size
+@param[in] addr file address
+@param[in] rw_latch RW_S_LATCH, RW_X_LATCH, RW_SX_LATCH
+@param[out] ptr_block file page
+@param[in,out] mtr mini-transaction
+@return pointer to a byte in (*ptr_block)->frame; the *ptr_block is
bufferfixed and latched */
UNIV_INLINE
byte*
fut_get_ptr(
-/*========*/
- ulint space, /*!< in: space id */
- ulint zip_size,/*!< in: compressed page size in bytes
- or 0 for uncompressed pages */
- fil_addr_t addr, /*!< in: file address */
- ulint rw_latch, /*!< in: RW_S_LATCH, RW_X_LATCH */
- mtr_t* mtr); /*!< in: mtr handle */
-
-#ifndef UNIV_NONINL
-#include "fut0fut.ic"
-#endif
+ ulint space,
+ const page_size_t& page_size,
+ fil_addr_t addr,
+ rw_lock_type_t rw_latch,
+ mtr_t* mtr,
+ buf_block_t** ptr_block = NULL)
+ MY_ATTRIBUTE((warn_unused_result));
-#endif
+#include "fut0fut.ic"
+#endif /* fut0fut_h */
diff --git a/storage/innobase/include/fut0fut.ic b/storage/innobase/include/fut0fut.ic
index fd46f2d7f3d..bba84d0d80e 100644
--- a/storage/innobase/include/fut0fut.ic
+++ b/storage/innobase/include/fut0fut.ic
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1995, 2009, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1995, 2015, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -26,31 +26,43 @@ Created 12/13/1995 Heikki Tuuri
#include "sync0rw.h"
#include "buf0buf.h"
-/********************************************************************//**
-Gets a pointer to a file address and latches the page.
-@return pointer to a byte in a frame; the file page in the frame is
+/** Gets a pointer to a file address and latches the page.
+@param[in] space space id
+@param[in] page_size page size
+@param[in] addr file address
+@param[in] rw_latch RW_S_LATCH, RW_X_LATCH, RW_SX_LATCH
+@param[in,out] mtr mini-transaction
+@param[out] ptr_block file page
+@return pointer to a byte in (*ptr_block)->frame; the *ptr_block is
bufferfixed and latched */
UNIV_INLINE
byte*
fut_get_ptr(
-/*========*/
- ulint space, /*!< in: space id */
- ulint zip_size,/*!< in: compressed page size in bytes
- or 0 for uncompressed pages */
- fil_addr_t addr, /*!< in: file address */
- ulint rw_latch, /*!< in: RW_S_LATCH, RW_X_LATCH */
- mtr_t* mtr) /*!< in: mtr handle */
+ ulint space,
+ const page_size_t& page_size,
+ fil_addr_t addr,
+ rw_lock_type_t rw_latch,
+ mtr_t* mtr,
+ buf_block_t** ptr_block)
{
buf_block_t* block;
- byte* ptr;
+ byte* ptr = NULL;
ut_ad(addr.boffset < UNIV_PAGE_SIZE);
- ut_ad((rw_latch == RW_S_LATCH) || (rw_latch == RW_X_LATCH));
+ ut_ad((rw_latch == RW_S_LATCH)
+ || (rw_latch == RW_X_LATCH)
+ || (rw_latch == RW_SX_LATCH));
+
+ block = buf_page_get(page_id_t(space, addr.page), page_size,
+ rw_latch, mtr);
- block = buf_page_get(space, zip_size, addr.page, rw_latch, mtr);
ptr = buf_block_get_frame(block) + addr.boffset;
buf_block_dbg_add_level(block, SYNC_NO_ORDER_CHECK);
+ if (ptr_block != NULL) {
+ *ptr_block = block;
+ }
+
return(ptr);
}
diff --git a/storage/innobase/include/fut0lst.h b/storage/innobase/include/fut0lst.h
index f8a99bf1bd4..187b673d2fd 100644
--- a/storage/innobase/include/fut0lst.h
+++ b/storage/innobase/include/fut0lst.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1995, 2009, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1995, 2014, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -26,7 +26,7 @@ Created 11/28/1995 Heikki Tuuri
#ifndef fut0lst_h
#define fut0lst_h
-#include "univ.i"
+#ifndef UNIV_INNOCHECKSUM
#include "fil0fil.h"
#include "mtr0mtr.h"
@@ -41,11 +41,12 @@ typedef byte flst_node_t;
/* The physical size of a list base node in bytes */
#define FLST_BASE_NODE_SIZE (4 + 2 * FIL_ADDR_SIZE)
+#endif /* !UNIV_INNOCHECKSUM */
/* The physical size of a list node in bytes */
#define FLST_NODE_SIZE (2 * FIL_ADDR_SIZE)
-#ifndef UNIV_HOTBACKUP
+#ifndef UNIV_INNOCHECKSUM
/********************************************************************//**
Initializes a list base node. */
UNIV_INLINE
@@ -56,7 +57,6 @@ flst_init(
mtr_t* mtr); /*!< in: mini-transaction handle */
/********************************************************************//**
Adds a node as the last node in a list. */
-UNIV_INTERN
void
flst_add_last(
/*==========*/
@@ -65,7 +65,6 @@ flst_add_last(
mtr_t* mtr); /*!< in: mini-transaction handle */
/********************************************************************//**
Adds a node as the first node in a list. */
-UNIV_INTERN
void
flst_add_first(
/*===========*/
@@ -73,46 +72,23 @@ flst_add_first(
flst_node_t* node, /*!< in: node to add */
mtr_t* mtr); /*!< in: mini-transaction handle */
/********************************************************************//**
-Inserts a node after another in a list. */
-UNIV_INTERN
-void
-flst_insert_after(
-/*==============*/
- flst_base_node_t* base, /*!< in: pointer to base node of list */
- flst_node_t* node1, /*!< in: node to insert after */
- flst_node_t* node2, /*!< in: node to add */
- mtr_t* mtr); /*!< in: mini-transaction handle */
-/********************************************************************//**
-Inserts a node before another in a list. */
-UNIV_INTERN
-void
-flst_insert_before(
-/*===============*/
- flst_base_node_t* base, /*!< in: pointer to base node of list */
- flst_node_t* node2, /*!< in: node to insert */
- flst_node_t* node3, /*!< in: node to insert before */
- mtr_t* mtr); /*!< in: mini-transaction handle */
-/********************************************************************//**
Removes a node. */
-UNIV_INTERN
void
flst_remove(
/*========*/
flst_base_node_t* base, /*!< in: pointer to base node of list */
flst_node_t* node2, /*!< in: node to remove */
mtr_t* mtr); /*!< in: mini-transaction handle */
-/********************************************************************//**
-Gets list length.
-@return length */
+/** Get the length of a list.
+@param[in] base base node
+@return length */
UNIV_INLINE
ulint
flst_get_len(
-/*=========*/
- const flst_base_node_t* base, /*!< in: pointer to base node */
- mtr_t* mtr); /*!< in: mini-transaction handle */
+ const flst_base_node_t* base);
/********************************************************************//**
Gets list first node address.
-@return file address */
+@return file address */
UNIV_INLINE
fil_addr_t
flst_get_first(
@@ -121,7 +97,7 @@ flst_get_first(
mtr_t* mtr); /*!< in: mini-transaction handle */
/********************************************************************//**
Gets list last node address.
-@return file address */
+@return file address */
UNIV_INLINE
fil_addr_t
flst_get_last(
@@ -130,7 +106,7 @@ flst_get_last(
mtr_t* mtr); /*!< in: mini-transaction handle */
/********************************************************************//**
Gets list next node address.
-@return file address */
+@return file address */
UNIV_INLINE
fil_addr_t
flst_get_next_addr(
@@ -139,7 +115,7 @@ flst_get_next_addr(
mtr_t* mtr); /*!< in: mini-transaction handle */
/********************************************************************//**
Gets list prev node address.
-@return file address */
+@return file address */
UNIV_INLINE
fil_addr_t
flst_get_prev_addr(
@@ -157,7 +133,7 @@ flst_write_addr(
mtr_t* mtr); /*!< in: mini-transaction handle */
/********************************************************************//**
Reads a file address.
-@return file address */
+@return file address */
UNIV_INLINE
fil_addr_t
flst_read_addr(
@@ -166,27 +142,15 @@ flst_read_addr(
mtr_t* mtr); /*!< in: mini-transaction handle */
/********************************************************************//**
Validates a file-based list.
-@return TRUE if ok */
-UNIV_INTERN
+@return TRUE if ok */
ibool
flst_validate(
/*==========*/
const flst_base_node_t* base, /*!< in: pointer to base node of list */
mtr_t* mtr1); /*!< in: mtr */
-/********************************************************************//**
-Prints info of a file-based list. */
-UNIV_INTERN
-void
-flst_print(
-/*=======*/
- const flst_base_node_t* base, /*!< in: pointer to base node of list */
- mtr_t* mtr); /*!< in: mtr */
-
-#ifndef UNIV_NONINL
#include "fut0lst.ic"
-#endif
-#endif /* !UNIV_HOTBACKUP */
+#endif /* !UNIV_INNOCHECKSUM */
#endif
diff --git a/storage/innobase/include/fut0lst.ic b/storage/innobase/include/fut0lst.ic
index ba617ff0db5..fae7fa078bf 100644
--- a/storage/innobase/include/fut0lst.ic
+++ b/storage/innobase/include/fut0lst.ic
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1995, 2009, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1995, 2014, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -54,7 +54,9 @@ flst_write_addr(
mtr_t* mtr) /*!< in: mini-transaction handle */
{
ut_ad(faddr && mtr);
- ut_ad(mtr_memo_contains_page(mtr, faddr, MTR_MEMO_PAGE_X_FIX));
+ ut_ad(mtr_memo_contains_page_flagged(mtr, faddr,
+ MTR_MEMO_PAGE_X_FIX
+ | MTR_MEMO_PAGE_SX_FIX));
ut_a(addr.page == FIL_NULL || addr.boffset >= FIL_PAGE_DATA);
ut_a(ut_align_offset(faddr, UNIV_PAGE_SIZE) >= FIL_PAGE_DATA);
@@ -65,7 +67,7 @@ flst_write_addr(
/********************************************************************//**
Reads a file address.
-@return file address */
+@return file address */
UNIV_INLINE
fil_addr_t
flst_read_addr(
@@ -94,29 +96,29 @@ flst_init(
flst_base_node_t* base, /*!< in: pointer to base node */
mtr_t* mtr) /*!< in: mini-transaction handle */
{
- ut_ad(mtr_memo_contains_page(mtr, base, MTR_MEMO_PAGE_X_FIX));
+ ut_ad(mtr_memo_contains_page_flagged(mtr, base,
+ MTR_MEMO_PAGE_X_FIX
+ | MTR_MEMO_PAGE_SX_FIX));
mlog_write_ulint(base + FLST_LEN, 0, MLOG_4BYTES, mtr);
flst_write_addr(base + FLST_FIRST, fil_addr_null, mtr);
flst_write_addr(base + FLST_LAST, fil_addr_null, mtr);
}
-/********************************************************************//**
-Gets list length.
-@return length */
+/** Get the length of a list.
+@param[in] base base node
+@return length */
UNIV_INLINE
ulint
flst_get_len(
-/*=========*/
- const flst_base_node_t* base, /*!< in: pointer to base node */
- mtr_t* mtr) /*!< in: mini-transaction handle */
+ const flst_base_node_t* base)
{
- return(mtr_read_ulint(base + FLST_LEN, MLOG_4BYTES, mtr));
+ return(mach_read_from_4(base + FLST_LEN));
}
/********************************************************************//**
Gets list first node address.
-@return file address */
+@return file address */
UNIV_INLINE
fil_addr_t
flst_get_first(
@@ -129,7 +131,7 @@ flst_get_first(
/********************************************************************//**
Gets list last node address.
-@return file address */
+@return file address */
UNIV_INLINE
fil_addr_t
flst_get_last(
@@ -142,7 +144,7 @@ flst_get_last(
/********************************************************************//**
Gets list next node address.
-@return file address */
+@return file address */
UNIV_INLINE
fil_addr_t
flst_get_next_addr(
@@ -155,7 +157,7 @@ flst_get_next_addr(
/********************************************************************//**
Gets list prev node address.
-@return file address */
+@return file address */
UNIV_INLINE
fil_addr_t
flst_get_prev_addr(
diff --git a/storage/innobase/include/gis0geo.h b/storage/innobase/include/gis0geo.h
new file mode 100644
index 00000000000..dea6d63f4e0
--- /dev/null
+++ b/storage/innobase/include/gis0geo.h
@@ -0,0 +1,150 @@
+/*****************************************************************************
+Copyright (c) 2014, 2015, Oracle and/or its affiliates. All rights reserved.
+Copyright (c) 2019, MariaDB Corporation.
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software Foundation,
+51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
+*****************************************************************************/
+
+/**************************************************//**
+@file gis0geo.h
+The r-tree define from MyISAM
+*******************************************************/
+
+#ifndef _gis0geo_h
+#define _gis0geo_h
+
+#include "my_global.h"
+#include "string.h"
+
+#define SPTYPE HA_KEYTYPE_DOUBLE
+#define SPLEN 8
+
+/* Since the mbr could be a point or a linestring, in this case, area of
+mbr is 0. So, we define this macro for calculating the area increasing
+when we need to enlarge the mbr. */
+#define LINE_MBR_WEIGHTS 0.001
+
+/* Types of "well-known binary representation" (wkb) format. */
+enum wkbType
+{
+ wkbPoint = 1,
+ wkbLineString = 2,
+ wkbPolygon = 3,
+ wkbMultiPoint = 4,
+ wkbMultiLineString = 5,
+ wkbMultiPolygon = 6,
+ wkbGeometryCollection = 7
+};
+
+/* Byte order of "well-known binary representation" (wkb) format. */
+enum wkbByteOrder
+{
+ wkbXDR = 0, /* Big Endian */
+ wkbNDR = 1 /* Little Endian */
+};
+
+/*************************************************************//**
+Calculate minimal bounding rectangle (mbr) of the spatial object
+stored in "well-known binary representation" (wkb) format.
+@return 0 if ok */
+int
+rtree_mbr_from_wkb(
+/*===============*/
+ const uchar* wkb, /*!< in: pointer to wkb. */
+ uint size, /*!< in: size of wkb. */
+ uint n_dims, /*!< in: dimensions. */
+ double* mbr); /*!< in/out: mbr. */
+
+/* Rtree split node structure. */
+struct rtr_split_node_t
+{
+ double square; /* square of the mbr.*/
+ int n_node; /* which group in.*/
+ uchar* key; /* key. */
+ double* coords; /* mbr. */
+};
+
+/*************************************************************//**
+Inline function for reserving coords */
+inline
+static
+double*
+reserve_coords(double **d_buffer, /*!< in/out: buffer. */
+ int n_dim) /*!< in: dimensions. */
+/*===========*/
+{
+ double *coords = *d_buffer;
+ (*d_buffer) += n_dim * 2;
+ return coords;
+}
+
+/*************************************************************//**
+Split rtree nodes.
+Return which group the first rec is in. */
+int
+split_rtree_node(
+/*=============*/
+ rtr_split_node_t* node, /*!< in: split nodes.*/
+ int n_entries, /*!< in: entries number.*/
+ int all_size, /*!< in: total key's size.*/
+ int key_size, /*!< in: key's size.*/
+ int min_size, /*!< in: minimal group size.*/
+ int size1, /*!< in: size of group.*/
+ int size2, /*!< in: initial group sizes */
+ double** d_buffer, /*!< in/out: buffer.*/
+ int n_dim, /*!< in: dimensions. */
+ uchar* first_rec); /*!< in: the first rec. */
+
+/*************************************************************//**
+Compares two keys a and b depending on nextflag
+nextflag can contain these flags:
+ MBR_INTERSECT(a,b) a overlaps b
+ MBR_CONTAIN(a,b) a contains b
+ MBR_DISJOINT(a,b) a disjoint b
+ MBR_WITHIN(a,b) a within b
+ MBR_EQUAL(a,b) All coordinates of MBRs are equal
+ MBR_DATA(a,b) Data reference is the same
+Returns 0 on success. */
+int
+rtree_key_cmp(
+/*==========*/
+ page_cur_mode_t mode, /*!< in: compare method. */
+ const uchar* b, /*!< in: first key. */
+ int b_len, /*!< in: first key len. */
+ const uchar* a, /*!< in: second key. */
+ int a_len); /*!< in: second key len. */
+
+/*************************************************************//**
+Calculates MBR_AREA(a+b) - MBR_AREA(a)
+Note: when 'a' and 'b' objects are far from each other,
+the area increase can be really big, so this function
+can return 'inf' as a result. */
+double
+rtree_area_increase(
+ const uchar* a, /*!< in: first mbr. */
+ const uchar* b, /*!< in: second mbr. */
+ int a_len, /*!< in: mbr length. */
+ double* ab_area); /*!< out: increased area. */
+
+/** Calculates overlapping area
+@param[in] a mbr a
+@param[in] b mbr b
+@param[in] mbr_len mbr length
+@return overlapping area */
+double
+rtree_area_overlapping(
+ const uchar* a,
+ const uchar* b,
+ int mbr_len);
+#endif
diff --git a/storage/innobase/include/gis0rtree.h b/storage/innobase/include/gis0rtree.h
new file mode 100644
index 00000000000..ffb6beb922b
--- /dev/null
+++ b/storage/innobase/include/gis0rtree.h
@@ -0,0 +1,537 @@
+/*****************************************************************************
+
+Copyright (c) 2014, 2016, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2017, 2018, MariaDB Corporation.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/******************************************************************//**
+@file include gis0rtree.h
+R-tree header file
+
+Created 2013/03/27 Jimmy Yang and Allen Lai
+***********************************************************************/
+
+#ifndef gis0rtree_h
+#define gis0rtree_h
+
+#include "btr0cur.h"
+#include "rem0types.h"
+
+/* Whether MBR 'a' contains 'b' */
+#define MBR_CONTAIN_CMP(a, b) \
+ ((((b)->xmin >= (a)->xmin) && ((b)->xmax <= (a)->xmax) \
+ && ((b)->ymin >= (a)->ymin) && ((b)->ymax <= (a)->ymax)))
+
+/* Whether MBR 'a' equals to 'b' */
+#define MBR_EQUAL_CMP(a, b) \
+ ((((b)->xmin == (a)->xmin) && ((b)->xmax == (a)->xmax)) \
+ && (((b)->ymin == (a)->ymin) && ((b)->ymax == (a)->ymax)))
+
+/* Whether MBR 'a' intersects 'b' */
+#define MBR_INTERSECT_CMP(a, b) \
+ ((((b)->xmin <= (a)->xmax) || ((b)->xmax >= (a)->xmin)) \
+ && (((b)->ymin <= (a)->ymax) || ((b)->ymax >= (a)->ymin)))
+
+/* Whether MBR 'a' and 'b' disjoint */
+#define MBR_DISJOINT_CMP(a, b) (!MBR_INTERSECT_CMP(a, b))
+
+/* Whether MBR 'a' within 'b' */
+#define MBR_WITHIN_CMP(a, b) \
+ ((((b)->xmin <= (a)->xmin) && ((b)->xmax >= (a)->xmax)) \
+ && (((b)->ymin <= (a)->ymin) && ((b)->ymax >= (a)->ymax)))
+
+/* Define it for rtree search mode checking. */
+#define RTREE_SEARCH_MODE(mode) \
+ (((mode) >= PAGE_CUR_CONTAIN) && ((mode <= PAGE_CUR_RTREE_GET_FATHER)))
+
+/* Geometry data header */
+#define GEO_DATA_HEADER_SIZE 4
+/**********************************************************************//**
+Builds a Rtree node pointer out of a physical record and a page number.
+@return own: node pointer */
+dtuple_t*
+rtr_index_build_node_ptr(
+/*=====================*/
+ const dict_index_t* index, /*!< in: index */
+ const rtr_mbr_t* mbr, /*!< in: mbr of lower page */
+ const rec_t* rec, /*!< in: record for which to build node
+ pointer */
+ ulint page_no,/*!< in: page number to put in node
+ pointer */
+ mem_heap_t* heap, /*!< in: memory heap where pointer
+ created */
+ ulint level); /*!< in: level of rec in tree:
+ 0 means leaf level */
+
+/*************************************************************//**
+Splits an R-tree index page to halves and inserts the tuple. It is assumed
+that mtr holds an x-latch to the index tree. NOTE: the tree x-latch is
+released within this function! NOTE that the operation of this
+function must always succeed, we cannot reverse it: therefore enough
+free disk space (2 pages) must be guaranteed to be available before
+this function is called.
+@return inserted record */
+rec_t*
+rtr_page_split_and_insert(
+/*======================*/
+ ulint flags, /*!< in: undo logging and locking flags */
+ btr_cur_t* cursor, /*!< in/out: cursor at which to insert; when the
+ function returns, the cursor is positioned
+ on the predecessor of the inserted record */
+ offset_t** offsets,/*!< out: offsets on inserted record */
+ mem_heap_t** heap, /*!< in/out: pointer to memory heap, or NULL */
+ const dtuple_t* tuple, /*!< in: tuple to insert */
+ ulint n_ext, /*!< in: number of externally stored columns */
+ mtr_t* mtr); /*!< in: mtr */
+
+/**************************************************************//**
+Sets the child node mbr in a node pointer. */
+UNIV_INLINE
+void
+rtr_page_cal_mbr(
+/*=============*/
+ const dict_index_t* index, /*!< in: index */
+ const buf_block_t* block, /*!< in: buffer block */
+ rtr_mbr_t* mbr, /*!< out: MBR encapsulates the page */
+ mem_heap_t* heap); /*!< in: heap for the memory
+ allocation */
+/*************************************************************//**
+Find the next matching record. This function will first exhaust
+the copied record listed in the rtr_info->matches vector before
+moving to next page
+@return true if there is next qualified record found, otherwise(if
+exhausted) false */
+bool
+rtr_pcur_move_to_next(
+/*==================*/
+ const dtuple_t* tuple, /*!< in: data tuple; NOTE: n_fields_cmp in
+ tuple must be set so that it cannot get
+ compared to the node ptr page number field! */
+ page_cur_mode_t mode, /*!< in: cursor search mode */
+ btr_pcur_t* cursor, /*!< in: persistent cursor; NOTE that the
+ function may release the page latch */
+ ulint cur_level,
+ /*!< in: current level */
+ mtr_t* mtr); /*!< in: mtr */
+
+/****************************************************************//**
+Searches the right position in rtree for a page cursor. */
+bool
+rtr_cur_search_with_match(
+/*======================*/
+ const buf_block_t* block, /*!< in: buffer block */
+ dict_index_t* index, /*!< in: index descriptor */
+ const dtuple_t* tuple, /*!< in: data tuple */
+ page_cur_mode_t mode, /*!< in: PAGE_CUR_L,
+ PAGE_CUR_LE, PAGE_CUR_G, or
+ PAGE_CUR_GE */
+ page_cur_t* cursor, /*!< in/out: page cursor */
+ rtr_info_t* rtr_info);/*!< in/out: search stack */
+
+/****************************************************************//**
+Calculate the area increased for a new record
+@return area increased */
+double
+rtr_rec_cal_increase(
+/*=================*/
+ const dtuple_t* dtuple, /*!< in: data tuple to insert, which
+ cause area increase */
+ const rec_t* rec, /*!< in: physical record which differs from
+ dtuple in some of the common fields, or which
+ has an equal number or more fields than
+ dtuple */
+ const offset_t* offsets,/*!< in: array returned by rec_get_offsets() */
+ double* area); /*!< out: increased area */
+
+/****************************************************************//**
+Following the right link to find the proper block for insert.
+@return the proper block.*/
+dberr_t
+rtr_ins_enlarge_mbr(
+/*=================*/
+ btr_cur_t* cursor, /*!< in: btr cursor */
+ que_thr_t* thr, /*!< in: query thread */
+ mtr_t* mtr); /*!< in: mtr */
+
+/********************************************************************//**
+*/
+void
+rtr_get_father_node(
+/*================*/
+ dict_index_t* index, /*!< in: index */
+ ulint level, /*!< in: the tree level of search */
+ const dtuple_t* tuple, /*!< in: data tuple; NOTE: n_fields_cmp in
+ tuple must be set so that it cannot get
+ compared to the node ptr page number field! */
+ btr_cur_t* sea_cur,/*!< in: search cursor */
+ btr_cur_t* cursor, /*!< in/out: tree cursor; the cursor page is
+ s- or x-latched */
+ ulint page_no,/*!< in: current page no */
+ mtr_t* mtr); /*!< in: mtr */
+
+/**************************************************************//**
+push a nonleaf index node to the search path */
+UNIV_INLINE
+void
+rtr_non_leaf_stack_push(
+/*====================*/
+ rtr_node_path_t* path, /*!< in/out: search path */
+ ulint pageno, /*!< in: pageno to insert */
+ node_seq_t seq_no, /*!< in: Node sequence num */
+ ulint level, /*!< in: index level */
+ ulint child_no, /*!< in: child page no */
+ btr_pcur_t* cursor, /*!< in: position cursor */
+ double mbr_inc); /*!< in: MBR needs to be
+ enlarged */
+
+/**************************************************************//**
+push a nonleaf index node to the search path for insertion */
+void
+rtr_non_leaf_insert_stack_push(
+/*===========================*/
+ dict_index_t* index, /*!< in: index descriptor */
+ rtr_node_path_t* path, /*!< in/out: search path */
+ ulint level, /*!< in: index level */
+ const buf_block_t* block, /*!< in: block of the page */
+ const rec_t* rec, /*!< in: positioned record */
+ double mbr_inc); /*!< in: MBR needs to be
+ enlarged */
+
+/*****************************************************************//**
+Allocates a new Split Sequence Number.
+@return new SSN id */
+UNIV_INLINE
+node_seq_t
+rtr_get_new_ssn_id(
+/*===============*/
+ dict_index_t* index); /*!< in: the index struct */
+
+/*****************************************************************//**
+Get the current Split Sequence Number.
+@return current SSN id */
+UNIV_INLINE
+node_seq_t
+rtr_get_current_ssn_id(
+/*===================*/
+ dict_index_t* index); /*!< in/out: the index struct */
+
+/********************************************************************//**
+Create a RTree search info structure */
+rtr_info_t*
+rtr_create_rtr_info(
+/******************/
+ bool need_prdt, /*!< in: Whether predicate lock is
+ needed */
+ bool init_matches, /*!< in: Whether to initiate the
+ "matches" structure for collecting
+ matched leaf records */
+ btr_cur_t* cursor, /*!< in: tree search cursor */
+ dict_index_t* index); /*!< in: index struct */
+
+/********************************************************************//**
+Update a btr_cur_t with rtr_info */
+void
+rtr_info_update_btr(
+/******************/
+ btr_cur_t* cursor, /*!< in/out: tree cursor */
+ rtr_info_t* rtr_info); /*!< in: rtr_info to set to the
+ cursor */
+
+/********************************************************************//**
+Update a btr_cur_t with rtr_info */
+void
+rtr_init_rtr_info(
+/****************/
+ rtr_info_t* rtr_info, /*!< in: rtr_info to set to the
+ cursor */
+ bool need_prdt, /*!< in: Whether predicate lock is
+ needed */
+ btr_cur_t* cursor, /*!< in: tree search cursor */
+ dict_index_t* index, /*!< in: index structure */
+ bool reinit); /*!< in: Whether this is a reinit */
+
+/**************************************************************//**
+Clean up Rtree cursor */
+void
+rtr_clean_rtr_info(
+/*===============*/
+ rtr_info_t* rtr_info, /*!< in: RTree search info */
+ bool free_all); /*!< in: need to free rtr_info itself */
+
+/****************************************************************//**
+Get the bounding box content from an index record*/
+void
+rtr_get_mbr_from_rec(
+/*=================*/
+ const rec_t* rec, /*!< in: data tuple */
+ const offset_t* offsets,/*!< in: offsets array */
+ rtr_mbr_t* mbr); /*!< out MBR */
+
+/****************************************************************//**
+Get the bounding box content from a MBR data record */
+void
+rtr_get_mbr_from_tuple(
+/*===================*/
+ const dtuple_t* dtuple, /*!< in: data tuple */
+ rtr_mbr* mbr); /*!< out: mbr to fill */
+
+/* Get the rtree page father.
+@param[in] offsets work area for the return value
+@param[in] index rtree index
+@param[in] block child page in the index
+@param[in] mtr mtr
+@param[in] sea_cur search cursor, contains information
+ about parent nodes in search
+@param[in] cursor cursor on node pointer record,
+ its page x-latched */
+void
+rtr_page_get_father(
+ dict_index_t* index,
+ buf_block_t* block,
+ mtr_t* mtr,
+ btr_cur_t* sea_cur,
+ btr_cur_t* cursor);
+
+/************************************************************//**
+Returns the father block to a page. It is assumed that mtr holds
+an X or SX latch on the tree.
+@return rec_get_offsets() of the node pointer record */
+offset_t*
+rtr_page_get_father_block(
+/*======================*/
+ offset_t* offsets,/*!< in: work area for the return value */
+ mem_heap_t* heap, /*!< in: memory heap to use */
+ dict_index_t* index, /*!< in: b-tree index */
+ buf_block_t* block, /*!< in: child page in the index */
+ mtr_t* mtr, /*!< in: mtr */
+ btr_cur_t* sea_cur,/*!< in: search cursor, contains information
+ about parent nodes in search */
+ btr_cur_t* cursor);/*!< out: cursor on node pointer record,
+ its page x-latched */
+/**************************************************************//**
+Store the parent path cursor
+@return number of cursor stored */
+ulint
+rtr_store_parent_path(
+/*==================*/
+ const buf_block_t* block, /*!< in: block of the page */
+ btr_cur_t* btr_cur,/*!< in/out: persistent cursor */
+ ulint latch_mode,
+ /*!< in: latch_mode */
+ ulint level, /*!< in: index level */
+ mtr_t* mtr); /*!< in: mtr */
+
+/**************************************************************//**
+Initializes and opens a persistent cursor to an index tree. It should be
+closed with btr_pcur_close. */
+void
+rtr_pcur_open_low(
+/*==============*/
+ dict_index_t* index, /*!< in: index */
+ ulint level, /*!< in: level in the btree */
+ const dtuple_t* tuple, /*!< in: tuple on which search done */
+ page_cur_mode_t mode, /*!< in: PAGE_CUR_L, ...;
+ NOTE that if the search is made using a unique
+ prefix of a record, mode should be
+ PAGE_CUR_LE, not PAGE_CUR_GE, as the latter
+ may end up on the previous page from the
+ record! */
+ ulint latch_mode,/*!< in: BTR_SEARCH_LEAF, ... */
+ btr_pcur_t* cursor, /*!< in: memory buffer for persistent cursor */
+ const char* file, /*!< in: file name */
+ unsigned line, /*!< in: line where called */
+ mtr_t* mtr); /*!< in: mtr */
+
+#define rtr_pcur_open(i,t,md,l,c,m) \
+ rtr_pcur_open_low(i,0,t,md,l,c,__FILE__,__LINE__,m)
+
+struct btr_cur_t;
+
+/*********************************************************//**
+Returns the R-Tree node stored in the parent search path
+@return pointer to R-Tree cursor component */
+UNIV_INLINE
+node_visit_t*
+rtr_get_parent_node(
+/*================*/
+ btr_cur_t* btr_cur, /*!< in: persistent cursor */
+ ulint level, /*!< in: index level of buffer page */
+ ulint is_insert); /*!< in: whether it is insert */
+
+/*********************************************************//**
+Returns the R-Tree cursor stored in the parent search path
+@return pointer to R-Tree cursor component */
+UNIV_INLINE
+btr_pcur_t*
+rtr_get_parent_cursor(
+/*==================*/
+ btr_cur_t* btr_cur, /*!< in: persistent cursor */
+ ulint level, /*!< in: index level of buffer page */
+ ulint is_insert); /*!< in: whether insert operation */
+
+/*************************************************************//**
+Copy recs from a page to new_block of rtree. */
+void
+rtr_page_copy_rec_list_end_no_locks(
+/*================================*/
+ buf_block_t* new_block, /*!< in: index page to copy to */
+ buf_block_t* block, /*!< in: index page of rec */
+ rec_t* rec, /*!< in: record on page */
+ dict_index_t* index, /*!< in: record descriptor */
+ mem_heap_t* heap, /*!< in/out: heap memory */
+ rtr_rec_move_t* rec_move, /*!< in: recording records moved */
+ ulint max_move, /*!< in: num of rec to move */
+ ulint* num_moved, /*!< out: num of rec to move */
+ mtr_t* mtr); /*!< in: mtr */
+
+/*************************************************************//**
+Copy recs till a specified rec from a page to new_block of rtree. */
+void
+rtr_page_copy_rec_list_start_no_locks(
+/*==================================*/
+ buf_block_t* new_block, /*!< in: index page to copy to */
+ buf_block_t* block, /*!< in: index page of rec */
+ rec_t* rec, /*!< in: record on page */
+ dict_index_t* index, /*!< in: record descriptor */
+ mem_heap_t* heap, /*!< in/out: heap memory */
+ rtr_rec_move_t* rec_move, /*!< in: recording records moved */
+ ulint max_move, /*!< in: num of rec to move */
+ ulint* num_moved, /*!< out: num of rec to move */
+ mtr_t* mtr); /*!< in: mtr */
+
+/****************************************************************//**
+Merge 2 mbrs and update the the mbr that cursor is on. */
+dberr_t
+rtr_merge_and_update_mbr(
+/*=====================*/
+ btr_cur_t* cursor, /*!< in/out: cursor */
+ btr_cur_t* cursor2, /*!< in: the other cursor */
+ offset_t* offsets, /*!< in: rec offsets */
+ offset_t* offsets2, /*!< in: rec offsets */
+ page_t* child_page, /*!< in: the child page. */
+ buf_block_t* merge_block, /*!< in: page to merge */
+ buf_block_t* block, /*!< in: page be merged */
+ dict_index_t* index, /*!< in: index */
+ mtr_t* mtr); /*!< in: mtr */
+
+/*************************************************************//**
+Deletes on the upper level the node pointer to a page. */
+void
+rtr_node_ptr_delete(
+/*================*/
+ dict_index_t* index, /*!< in: index tree */
+ btr_cur_t* sea_cur,/*!< in: search cursor, contains information
+ about parent nodes in search */
+ buf_block_t* block, /*!< in: page whose node pointer is deleted */
+ mtr_t* mtr); /*!< in: mtr */
+
+/****************************************************************//**
+Check two MBRs are identical or need to be merged */
+bool
+rtr_merge_mbr_changed(
+/*==================*/
+ btr_cur_t* cursor, /*!< in: cursor */
+ btr_cur_t* cursor2, /*!< in: the other cursor */
+ offset_t* offsets, /*!< in: rec offsets */
+ offset_t* offsets2, /*!< in: rec offsets */
+ rtr_mbr_t* new_mbr, /*!< out: MBR to update */
+ buf_block_t* merge_block, /*!< in: page to merge */
+ buf_block_t* block, /*!< in: page be merged */
+ dict_index_t* index); /*!< in: index */
+
+
+/**************************************************************//**
+Update the mbr field of a spatial index row.
+@return true if successful */
+bool
+rtr_update_mbr_field(
+/*=================*/
+ btr_cur_t* cursor, /*!< in: cursor pointed to rec.*/
+ offset_t* offsets, /*!< in: offsets on rec. */
+ btr_cur_t* cursor2, /*!< in/out: cursor pointed to rec
+ that should be deleted.
+ this cursor is for btr_compress to
+ delete the merged page's father rec.*/
+ page_t* child_page, /*!< in: child page. */
+ rtr_mbr_t* new_mbr, /*!< in: the new mbr. */
+ rec_t* new_rec, /*!< in: rec to use */
+ mtr_t* mtr); /*!< in: mtr */
+
+/**************************************************************//**
+Check whether a Rtree page is child of a parent page
+@return true if there is child/parent relationship */
+bool
+rtr_check_same_block(
+/*=================*/
+ dict_index_t* index, /*!< in: index tree */
+ btr_cur_t* cur, /*!< in/out: position at the parent entry
+ pointing to the child if successful */
+ buf_block_t* parentb,/*!< in: parent page to check */
+ buf_block_t* childb, /*!< in: child Page */
+ mem_heap_t* heap); /*!< in: memory heap */
+
+/*********************************************************************//**
+Sets pointer to the data and length in a field. */
+UNIV_INLINE
+void
+rtr_write_mbr(
+/*==========*/
+ byte* data, /*!< out: data */
+ const rtr_mbr_t* mbr); /*!< in: data */
+
+/*********************************************************************//**
+Sets pointer to the data and length in a field. */
+UNIV_INLINE
+void
+rtr_read_mbr(
+/*==========*/
+ const byte* data, /*!< in: data */
+ rtr_mbr_t* mbr); /*!< out: data */
+
+/**************************************************************//**
+Check whether a discarding page is in anyone's search path */
+void
+rtr_check_discard_page(
+/*===================*/
+ dict_index_t* index, /*!< in: index */
+ btr_cur_t* cursor, /*!< in: cursor on the page to discard: not on
+ the root page */
+ buf_block_t* block); /*!< in: block of page to be discarded */
+
+/********************************************************************//**
+Reinitialize a RTree search info */
+UNIV_INLINE
+void
+rtr_info_reinit_in_cursor(
+/************************/
+ btr_cur_t* cursor, /*!< in/out: tree cursor */
+ dict_index_t* index, /*!< in: index struct */
+ bool need_prdt); /*!< in: Whether predicate lock is
+ needed */
+
+/** Estimates the number of rows in a given area.
+@param[in] index index
+@param[in] tuple range tuple containing mbr, may also be empty tuple
+@param[in] mode search mode
+@return estimated number of rows */
+int64_t
+rtr_estimate_n_rows_in_range(
+ dict_index_t* index,
+ const dtuple_t* tuple,
+ page_cur_mode_t mode);
+
+#include "gis0rtree.ic"
+#endif /*!< gis0rtree.h */
diff --git a/storage/innobase/include/gis0rtree.ic b/storage/innobase/include/gis0rtree.ic
new file mode 100644
index 00000000000..525acb7ecf0
--- /dev/null
+++ b/storage/innobase/include/gis0rtree.ic
@@ -0,0 +1,276 @@
+/*****************************************************************************
+
+Copyright (c) 2014, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2017, MariaDB Corporation.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/******************************************************************//**
+@file include gis0rtree.h
+R-tree Inline code
+
+Created 2013/03/27 Jimmy Yang and Allen Lai
+***********************************************************************/
+
+/**************************************************************//**
+Sets the child node mbr in a node pointer. */
+UNIV_INLINE
+void
+rtr_page_cal_mbr(
+/*=============*/
+ const dict_index_t* index, /*!< in: index */
+ const buf_block_t* block, /*!< in: buffer block */
+ rtr_mbr_t* rtr_mbr,/*!< out: MBR encapsulates the page */
+ mem_heap_t* heap) /*!< in: heap for the memory
+ allocation */
+{
+ page_t* page;
+ rec_t* rec;
+ byte* field;
+ ulint len;
+ offset_t* offsets = NULL;
+ double bmin, bmax;
+ double* amin;
+ double* amax;
+ ulint inc = 0;
+ double* mbr;
+
+ rtr_mbr->xmin = DBL_MAX;
+ rtr_mbr->ymin = DBL_MAX;
+ rtr_mbr->xmax = -DBL_MAX;
+ rtr_mbr->ymax = -DBL_MAX;
+
+ mbr = reinterpret_cast<double*>(rtr_mbr);
+
+ page = buf_block_get_frame(block);
+
+ rec = page_rec_get_next(page_get_infimum_rec(page));
+ offsets = rec_get_offsets(rec, index, offsets, page_is_leaf(page),
+ ULINT_UNDEFINED, &heap);
+
+ do {
+ /* The mbr address is in the first field. */
+ field = rec_get_nth_field(rec, offsets, 0, &len);
+
+ ut_ad(len == DATA_MBR_LEN);
+ inc = 0;
+ for (unsigned i = 0; i < SPDIMS; i++) {
+ bmin = mach_double_read(field + inc);
+ bmax = mach_double_read(field + inc + sizeof(double));
+
+ amin = mbr + i * SPDIMS;
+ amax = mbr + i * SPDIMS + 1;
+
+ if (*amin > bmin)
+ *amin = bmin;
+ if (*amax < bmax)
+ *amax = bmax;
+
+ inc += 2 * sizeof(double);
+ }
+
+ rec = page_rec_get_next(rec);
+
+ if (rec == NULL) {
+ break;
+ }
+ } while (!page_rec_is_supremum(rec));
+}
+
+/**************************************************************//**
+push a nonleaf index node to the search path */
+UNIV_INLINE
+void
+rtr_non_leaf_stack_push(
+/*====================*/
+ rtr_node_path_t* path, /*!< in/out: search path */
+ ulint pageno, /*!< in: pageno to insert */
+ node_seq_t seq_no, /*!< in: Node sequence num */
+ ulint level, /*!< in: index page level */
+ ulint child_no, /*!< in: child page no */
+ btr_pcur_t* cursor, /*!< in: position cursor */
+ double mbr_inc) /*!< in: MBR needs to be
+ enlarged */
+{
+ node_visit_t insert_val;
+
+ insert_val.page_no = pageno;
+ insert_val.seq_no = seq_no;
+ insert_val.level = level;
+ insert_val.child_no = child_no;
+ insert_val.cursor = cursor;
+ insert_val.mbr_inc = mbr_inc;
+
+ path->push_back(insert_val);
+
+#ifdef RTR_SEARCH_DIAGNOSTIC
+ fprintf(stderr, "INNODB_RTR: Push page %d, level %d, seq %d"
+ " to search stack \n",
+ static_cast<int>(pageno), static_cast<int>(level),
+ static_cast<int>(seq_no));
+#endif /* RTR_SEARCH_DIAGNOSTIC */
+}
+
+/*****************************************************************//**
+Allocates a new Split Sequence Number.
+@return new SSN id */
+UNIV_INLINE
+node_seq_t
+rtr_get_new_ssn_id(
+/*===============*/
+ dict_index_t* index) /*!< in/out: the index struct */
+{
+ node_seq_t ssn;
+
+ mutex_enter(&(index->rtr_ssn.mutex));
+ ssn = ++index->rtr_ssn.seq_no;
+ mutex_exit(&(index->rtr_ssn.mutex));
+
+ return(ssn);
+}
+/*****************************************************************//**
+Get the current Split Sequence Number.
+@return current SSN id */
+UNIV_INLINE
+node_seq_t
+rtr_get_current_ssn_id(
+/*===================*/
+ dict_index_t* index) /*!< in: index struct */
+{
+ node_seq_t ssn;
+
+ mutex_enter(&(index->rtr_ssn.mutex));
+ ssn = index->rtr_ssn.seq_no;
+ mutex_exit(&(index->rtr_ssn.mutex));
+
+ return(ssn);
+}
+
+/*********************************************************************//**
+Sets pointer to the data and length in a field. */
+UNIV_INLINE
+void
+rtr_write_mbr(
+/*==========*/
+ byte* data, /*!< out: data */
+ const rtr_mbr_t* mbr) /*!< in: data */
+{
+ const double* my_mbr = reinterpret_cast<const double*>(mbr);
+
+ for (unsigned i = 0; i < SPDIMS * 2; i++) {
+ mach_double_write(data + i * sizeof(double), my_mbr[i]);
+ }
+}
+
+/*********************************************************************//**
+Sets pointer to the data and length in a field. */
+UNIV_INLINE
+void
+rtr_read_mbr(
+/*==========*/
+ const byte* data, /*!< in: data */
+ rtr_mbr_t* mbr) /*!< out: MBR */
+{
+ for (unsigned i = 0; i < SPDIMS * 2; i++) {
+ (reinterpret_cast<double*>(mbr))[i] = mach_double_read(
+ data
+ + i * sizeof(double));
+ }
+}
+
+/*********************************************************//**
+Returns the R-Tree node stored in the parent search path
+@return pointer to R-Tree cursor component in the parent path,
+NULL if parent path is empty or index is larger than num of items contained */
+UNIV_INLINE
+node_visit_t*
+rtr_get_parent_node(
+/*================*/
+ btr_cur_t* btr_cur, /*!< in: persistent cursor */
+ ulint level, /*!< in: index level of buffer page */
+ ulint is_insert) /*!< in: whether it is insert */
+{
+ ulint num;
+ ulint tree_height = btr_cur->tree_height;
+ node_visit_t* found_node = NULL;
+
+ if (level >= tree_height) {
+ return(NULL);
+ }
+
+ mutex_enter(&btr_cur->rtr_info->rtr_path_mutex);
+
+ num = btr_cur->rtr_info->parent_path->size();
+
+ if (!num) {
+ mutex_exit(&btr_cur->rtr_info->rtr_path_mutex);
+ return(NULL);
+ }
+
+ if (is_insert) {
+ ulint idx = tree_height - level - 1;
+ ut_ad(idx < num);
+
+ found_node = &(*btr_cur->rtr_info->parent_path)[idx];
+ } else {
+ node_visit_t* node;
+
+ while (num > 0) {
+ node = &(*btr_cur->rtr_info->parent_path)[num - 1];
+
+ if (node->level == level) {
+ found_node = node;
+ break;
+ }
+ num--;
+ }
+ }
+
+ mutex_exit(&btr_cur->rtr_info->rtr_path_mutex);
+
+ return(found_node);
+}
+
+/*********************************************************//**
+Returns the R-Tree cursor stored in the parent search path
+@return pointer to R-Tree cursor component */
+UNIV_INLINE
+btr_pcur_t*
+rtr_get_parent_cursor(
+/*==================*/
+ btr_cur_t* btr_cur, /*!< in: persistent cursor */
+ ulint level, /*!< in: index level of buffer page */
+ ulint is_insert) /*!< in: whether insert operation */
+{
+ node_visit_t* found_node = rtr_get_parent_node(
+ btr_cur, level, is_insert);
+
+ return((found_node) ? found_node->cursor : NULL);
+}
+
+/********************************************************************//**
+Reinitialize a R-Tree search info in btr_cur_t */
+UNIV_INLINE
+void
+rtr_info_reinit_in_cursor(
+/************************/
+ btr_cur_t* cursor, /*!< in/out: tree cursor */
+ dict_index_t* index, /*!< in: index struct */
+ bool need_prdt) /*!< in: Whether predicate lock is
+ needed */
+{
+ rtr_clean_rtr_info(cursor->rtr_info, false);
+ rtr_init_rtr_info(cursor->rtr_info, need_prdt, cursor, index, true);
+}
diff --git a/storage/innobase/include/gis0type.h b/storage/innobase/include/gis0type.h
new file mode 100644
index 00000000000..ee350ea56ce
--- /dev/null
+++ b/storage/innobase/include/gis0type.h
@@ -0,0 +1,159 @@
+/*****************************************************************************
+
+Copyright (c) 2014, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2018, MariaDB Corporation.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/******************************************************************//**
+@file include gis0type.h
+R-tree header file
+
+Created 2013/03/27 Jimmy Yang
+***********************************************************************/
+
+#ifndef gis0type_h
+#define gis0type_h
+
+#include "buf0buf.h"
+#include "data0type.h"
+#include "data0types.h"
+#include "dict0types.h"
+#include "ut0vec.h"
+#include "gis0geo.h"
+
+#include <vector>
+#include <list>
+
+/* Node Sequence Number. Only updated when page splits */
+typedef ib_uint32_t node_seq_t;
+
+/* RTree internal non-leaf Nodes to be searched, from root to leaf */
+typedef struct node_visit {
+ ulint page_no; /*!< the page number */
+ node_seq_t seq_no; /*!< the SSN (split sequence number */
+ ulint level; /*!< the page's index level */
+ ulint child_no; /*!< child page num if for parent
+ recording */
+ btr_pcur_t* cursor; /*!< cursor structure if we positioned
+ FIXME: there is no need to use whole
+ btr_pcur_t, just the position related
+ members */
+ double mbr_inc; /*!< whether this node needs to be
+ enlarged for insertion */
+} node_visit_t;
+
+typedef std::vector<node_visit_t, ut_allocator<node_visit_t> > rtr_node_path_t;
+
+typedef struct rtr_rec {
+ rec_t* r_rec; /*!< matched record */
+ bool locked; /*!< whether the record locked */
+} rtr_rec_t;
+
+typedef std::vector<rtr_rec_t, ut_allocator<rtr_rec_t> > rtr_rec_vector;
+
+/* Structure for matched records on the leaf page */
+typedef struct matched_rec {
+ byte* bufp; /*!< aligned buffer point */
+ byte rec_buf[UNIV_PAGE_SIZE_MAX * 2];
+ /*!< buffer used to copy matching rec */
+ buf_block_t block; /*!< the shadow buffer block */
+ ulint used; /*!< memory used */
+ rtr_rec_vector* matched_recs; /*!< vector holding the matching rec */
+ ib_mutex_t rtr_match_mutex;/*!< mutex protect the match_recs
+ vector */
+ bool valid; /*!< whether result in matched_recs
+ or this search is valid (page not
+ dropped) */
+ bool locked; /*!< whether these recs locked */
+} matched_rec_t;
+
+/* In memory representation of a minimum bounding rectangle */
+typedef struct rtr_mbr {
+ double xmin; /*!< minimum on x */
+ double xmax; /*!< maximum on x */
+ double ymin; /*!< minimum on y */
+ double ymax; /*!< maximum on y */
+} rtr_mbr_t;
+
+/* Maximum index level for R-Tree, this is consistent with BTR_MAX_LEVELS */
+#define RTR_MAX_LEVELS 100
+
+/* Number of pages we latch at leaf level when there is possible Tree
+modification (split, shrink), we always latch left, current
+and right pages */
+#define RTR_LEAF_LATCH_NUM 3
+
+/** Vectors holding the matching internal pages/nodes and leaf records */
+typedef struct rtr_info{
+ rtr_node_path_t*path; /*!< vector holding matching pages */
+ rtr_node_path_t*parent_path;
+ /*!< vector holding parent pages during
+ search */
+ matched_rec_t* matches;/*!< struct holding matching leaf records */
+ ib_mutex_t rtr_path_mutex;
+ /*!< mutex protect the "path" vector */
+ buf_block_t* tree_blocks[RTR_MAX_LEVELS + RTR_LEAF_LATCH_NUM];
+ /*!< tracking pages that would be locked
+ at leaf level, for future free */
+ ulint tree_savepoints[RTR_MAX_LEVELS + RTR_LEAF_LATCH_NUM];
+ /*!< savepoint used to release latches/blocks
+ on each level and leaf level */
+ rtr_mbr_t mbr; /*!< the search MBR */
+ que_thr_t* thr; /*!< the search thread */
+ mem_heap_t* heap; /*!< memory heap */
+ btr_cur_t* cursor; /*!< cursor used for search */
+ dict_index_t* index; /*!< index it is searching */
+ bool need_prdt_lock;
+ /*!< whether we will need predicate lock
+ the tree */
+ bool need_page_lock;
+ /*!< whether we will need predicate page lock
+ the tree */
+ bool allocated;/*!< whether this structure is allocate or
+ on stack */
+ bool mbr_adj;/*!< whether mbr will need to be enlarged
+ for an insertion operation */
+ bool fd_del; /*!< found deleted row */
+ const dtuple_t* search_tuple;
+ /*!< search tuple being used */
+ page_cur_mode_t search_mode;
+ /*!< current search mode */
+} rtr_info_t;
+
+typedef std::list<rtr_info_t*, ut_allocator<rtr_info_t*> > rtr_info_active;
+
+/* Tracking structure for all onoging search for an index */
+typedef struct rtr_info_track {
+ rtr_info_active* rtr_active; /*!< Active search info */
+ ib_mutex_t rtr_active_mutex;
+ /*!< mutex to protect
+ rtr_active */
+} rtr_info_track_t;
+
+/* Node Sequence Number and mutex protects it. */
+typedef struct rtree_ssn {
+ ib_mutex_t mutex; /*!< mutex protect the seq num */
+ node_seq_t seq_no; /*!< the SSN (node sequence number) */
+} rtr_ssn_t;
+
+/* This is to record the record movement between pages. Used for corresponding
+lock movement */
+typedef struct rtr_rec_move {
+ rec_t* old_rec; /*!< record being moved in old page */
+ rec_t* new_rec; /*!< new record location */
+ bool moved; /*!< whether lock are moved too */
+} rtr_rec_move_t;
+#endif /*!< gis0rtree.h */
diff --git a/storage/innobase/include/ha0ha.h b/storage/innobase/include/ha0ha.h
index 879f1269e24..1944309c8ec 100644
--- a/storage/innobase/include/ha0ha.h
+++ b/storage/innobase/include/ha0ha.h
@@ -1,6 +1,7 @@
/*****************************************************************************
Copyright (c) 1994, 2016, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2018, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -26,13 +27,12 @@ Created 8/18/1994 Heikki Tuuri
#ifndef ha0ha_h
#define ha0ha_h
-#include "univ.i"
-
#include "hash0hash.h"
#include "page0types.h"
#include "buf0types.h"
#include "rem0types.h"
+#ifdef BTR_CUR_HASH_ADAPT
/*************************************************************//**
Looks for an element in a hash table.
@return pointer to the data of the first hash table node in chain
@@ -47,7 +47,6 @@ ha_search_and_get_data(
Looks for an element when we know the pointer to the data and updates
the pointer to data if found.
@return TRUE if found */
-UNIV_INTERN
ibool
ha_search_and_update_if_found_func(
/*===============================*/
@@ -62,77 +61,67 @@ ha_search_and_update_if_found_func(
#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
/** Looks for an element when we know the pointer to the data and
updates the pointer to data if found.
-@param table in/out: hash table
-@param fold in: folded value of the searched data
-@param data in: pointer to the data
-@param new_block in: block containing new_data
-@param new_data in: new pointer to the data */
+@param table in/out: hash table
+@param fold in: folded value of the searched data
+@param data in: pointer to the data
+@param new_block in: block containing new_data
+@param new_data in: new pointer to the data */
# define ha_search_and_update_if_found(table,fold,data,new_block,new_data) \
ha_search_and_update_if_found_func(table,fold,data,new_block,new_data)
#else /* UNIV_AHI_DEBUG || UNIV_DEBUG */
/** Looks for an element when we know the pointer to the data and
updates the pointer to data if found.
-@param table in/out: hash table
-@param fold in: folded value of the searched data
-@param data in: pointer to the data
-@param new_block ignored: block containing new_data
-@param new_data in: new pointer to the data */
+@param table in/out: hash table
+@param fold in: folded value of the searched data
+@param data in: pointer to the data
+@param new_block ignored: block containing new_data
+@param new_data in: new pointer to the data */
# define ha_search_and_update_if_found(table,fold,data,new_block,new_data) \
ha_search_and_update_if_found_func(table,fold,data,new_data)
#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
+#endif /* BTR_CUR_HASH_ADAPT */
+
/*************************************************************//**
Creates a hash table with at least n array cells. The actual number
of cells is chosen to be a prime number slightly bigger than n.
-@return own: created table */
-UNIV_INTERN
+@return own: created table */
hash_table_t*
-ha_create_func(
-/*===========*/
- ulint n, /*!< in: number of array cells */
-#ifdef UNIV_SYNC_DEBUG
- ulint mutex_level, /*!< in: level of the mutexes in the latching
- order: this is used in the debug version */
-#endif /* UNIV_SYNC_DEBUG */
- ulint n_mutexes, /*!< in: number of mutexes to protect the
+ib_create(
+/*======*/
+ ulint n, /*!< in: number of array cells */
+ latch_id_t id, /*!< in: latch ID */
+ ulint n_mutexes,/*!< in: number of mutexes to protect the
hash table: must be a power of 2, or 0 */
- ulint type); /*!< in: type of datastructure for which
+ ulint type); /*!< in: type of datastructure for which
the memory heap is going to be used e.g.:
MEM_HEAP_FOR_BTR_SEARCH or
MEM_HEAP_FOR_PAGE_HASH */
-#ifdef UNIV_SYNC_DEBUG
-/** Creates a hash table.
-@return own: created table
-@param n_c in: number of array cells. The actual number of cells is
-chosen to be a slightly bigger prime number.
-@param level in: level of the mutexes in the latching order
-@param n_m in: number of mutexes to protect the hash table;
- must be a power of 2, or 0 */
-# define ib_create(n_c,n_m,type,level) ha_create_func(n_c,level,n_m,type)
-#else /* UNIV_SYNC_DEBUG */
-/** Creates a hash table.
-@return own: created table
-@param n_c in: number of array cells. The actual number of cells is
-chosen to be a slightly bigger prime number.
-@param level in: level of the mutexes in the latching order
-@param n_m in: number of mutexes to protect the hash table;
- must be a power of 2, or 0 */
-# define ib_create(n_c,n_m,type,level) ha_create_func(n_c,n_m,type)
-#endif /* UNIV_SYNC_DEBUG */
+
+/** Recreate a hash table with at least n array cells. The actual number
+of cells is chosen to be a prime number slightly bigger than n.
+The new cells are all cleared. The heaps are recreated.
+The sync objects are reused.
+@param[in,out] table hash table to be resuzed (to be freed later)
+@param[in] n number of array cells
+@return resized new table */
+hash_table_t*
+ib_recreate(
+ hash_table_t* table,
+ ulint n);
/*************************************************************//**
Empties a hash table and frees the memory heaps. */
-UNIV_INTERN
void
ha_clear(
/*=====*/
hash_table_t* table); /*!< in, own: hash table */
+#ifdef BTR_CUR_HASH_ADAPT
/*************************************************************//**
Inserts an entry into a hash table. If an entry with the same fold number
is found, its node is updated to point to the new data, and no new node
is inserted.
-@return TRUE if succeed, FALSE if no more memory could be allocated */
-UNIV_INTERN
+@return TRUE if succeed, FALSE if no more memory could be allocated */
ibool
ha_insert_for_fold_func(
/*====================*/
@@ -151,11 +140,11 @@ ha_insert_for_fold_func(
Inserts an entry into a hash table. If an entry with the same fold number
is found, its node is updated to point to the new data, and no new node
is inserted.
-@return TRUE if succeed, FALSE if no more memory could be allocated
-@param t in: hash table
-@param f in: folded value of data
-@param b in: buffer block containing the data
-@param d in: data, must not be NULL */
+@return TRUE if succeed, FALSE if no more memory could be allocated
+@param t in: hash table
+@param f in: folded value of data
+@param b in: buffer block containing the data
+@param d in: data, must not be NULL */
# define ha_insert_for_fold(t,f,b,d) do { \
ha_insert_for_fold_func(t,f,b,d); \
MONITOR_INC(MONITOR_ADAPTIVE_HASH_ROW_ADDED); \
@@ -165,11 +154,11 @@ is inserted.
Inserts an entry into a hash table. If an entry with the same fold number
is found, its node is updated to point to the new data, and no new node
is inserted.
-@return TRUE if succeed, FALSE if no more memory could be allocated
-@param t in: hash table
-@param f in: folded value of data
-@param b ignored: buffer block containing the data
-@param d in: data, must not be NULL */
+@return TRUE if succeed, FALSE if no more memory could be allocated
+@param t in: hash table
+@param f in: folded value of data
+@param b ignored: buffer block containing the data
+@param d in: data, must not be NULL */
# define ha_insert_for_fold(t,f,b,d) do { \
ha_insert_for_fold_func(t,f,d); \
MONITOR_INC(MONITOR_ADAPTIVE_HASH_ROW_ADDED); \
@@ -179,7 +168,7 @@ is inserted.
/*********************************************************//**
Looks for an element when we know the pointer to the data and deletes
it from the hash table if found.
-@return TRUE if found */
+@return TRUE if found */
UNIV_INLINE
ibool
ha_search_and_delete_if_found(
@@ -187,11 +176,10 @@ ha_search_and_delete_if_found(
hash_table_t* table, /*!< in: hash table */
ulint fold, /*!< in: folded value of the searched data */
const rec_t* data); /*!< in: pointer to the data */
-#ifndef UNIV_HOTBACKUP
+
/*****************************************************************//**
Removes from the chain determined by fold all nodes whose data pointer
points to the page given. */
-UNIV_INTERN
void
ha_remove_all_nodes_to_page(
/*========================*/
@@ -201,8 +189,7 @@ ha_remove_all_nodes_to_page(
#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
/*************************************************************//**
Validates a given range of the cells in hash table.
-@return TRUE if ok */
-UNIV_INTERN
+@return TRUE if ok */
ibool
ha_validate(
/*========*/
@@ -210,27 +197,19 @@ ha_validate(
ulint start_index, /*!< in: start index */
ulint end_index); /*!< in: end index */
#endif /* defined UNIV_AHI_DEBUG || defined UNIV_DEBUG */
-/*************************************************************//**
-Prints info of a hash table. */
-UNIV_INTERN
-void
-ha_print_info(
-/*==========*/
- FILE* file, /*!< in: file where to print */
- hash_table_t* table); /*!< in: hash table */
-#endif /* !UNIV_HOTBACKUP */
/** The hash table external chain node */
struct ha_node_t {
+ ulint fold; /*!< fold value for the data */
ha_node_t* next; /*!< next chain node or NULL if none */
#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
buf_block_t* block; /*!< buffer block containing the data, or NULL */
#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
- const rec_t* data; /*!< pointer to the data */
- ulint fold; /*!< fold value for the data */
+ const rec_t* data; /*!< pointer to the data */
};
+#endif /* BTR_CUR_HASH_ADAPT */
-#ifdef UNIV_DEBUG
+#if defined UNIV_DEBUG && defined BTR_CUR_HASH_ADAPT
/********************************************************************//**
Assert that the synchronization object in a hash operation involving
possible change in the hash table is held.
@@ -257,9 +236,6 @@ hash_assert_can_search(
#define hash_assert_can_search(t, f)
#endif /* UNIV_DEBUG */
-
-#ifndef UNIV_NONINL
#include "ha0ha.ic"
-#endif
#endif
diff --git a/storage/innobase/include/ha0ha.ic b/storage/innobase/include/ha0ha.ic
index b09a24dced3..0612ef1bb25 100644
--- a/storage/innobase/include/ha0ha.ic
+++ b/storage/innobase/include/ha0ha.ic
@@ -1,6 +1,7 @@
/*****************************************************************************
-Copyright (c) 1994, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1994, 2015, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2018, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -23,22 +24,14 @@ The hash table with external chains
Created 8/18/1994 Heikki Tuuri
*************************************************************************/
+#ifdef BTR_CUR_HASH_ADAPT
#include "ut0rnd.h"
#include "mem0mem.h"
#include "btr0types.h"
-/***********************************************************//**
-Deletes a hash node. */
-UNIV_INTERN
-void
-ha_delete_hash_node(
-/*================*/
- hash_table_t* table, /*!< in: hash table */
- ha_node_t* del_node); /*!< in: node to be deleted */
-
/******************************************************************//**
Gets a hash node data.
-@return pointer to the data */
+@return pointer to the data */
UNIV_INLINE
const rec_t*
ha_node_get_data(
@@ -68,33 +61,33 @@ ha_node_set_data_func(
#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
/** Sets hash node data.
-@param n in: hash chain node
-@param b in: buffer block containing the data
-@param d in: pointer to the data */
+@param n in: hash chain node
+@param b in: buffer block containing the data
+@param d in: pointer to the data */
# define ha_node_set_data(n,b,d) ha_node_set_data_func(n,b,d)
#else /* UNIV_AHI_DEBUG || UNIV_DEBUG */
/** Sets hash node data.
-@param n in: hash chain node
-@param b in: buffer block containing the data
-@param d in: pointer to the data */
+@param n in: hash chain node
+@param b in: buffer block containing the data
+@param d in: pointer to the data */
# define ha_node_set_data(n,b,d) ha_node_set_data_func(n,d)
#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
/******************************************************************//**
Gets the next node in a hash chain.
-@return next node, NULL if none */
+@return next node, NULL if none */
UNIV_INLINE
ha_node_t*
ha_chain_get_next(
/*==============*/
- ha_node_t* node) /*!< in: hash chain node */
+ const ha_node_t* node) /*!< in: hash chain node */
{
return(node->next);
}
/******************************************************************//**
Gets the first node in a hash chain.
-@return first node, NULL if none */
+@return first node, NULL if none */
UNIV_INLINE
ha_node_t*
ha_chain_get_first(
@@ -122,9 +115,9 @@ hash_assert_can_modify(
if (table->type == HASH_TABLE_SYNC_MUTEX) {
ut_ad(mutex_own(hash_get_mutex(table, fold)));
} else if (table->type == HASH_TABLE_SYNC_RW_LOCK) {
-# ifdef UNIV_SYNC_DEBUG
+# ifdef UNIV_DEBUG
rw_lock_t* lock = hash_get_lock(table, fold);
- ut_ad(rw_lock_own(lock, RW_LOCK_EX));
+ ut_ad(rw_lock_own(lock, RW_LOCK_X));
# endif
} else {
ut_ad(table->type == HASH_TABLE_SYNC_NONE);
@@ -145,11 +138,8 @@ hash_assert_can_search(
if (table->type == HASH_TABLE_SYNC_MUTEX) {
ut_ad(mutex_own(hash_get_mutex(table, fold)));
} else if (table->type == HASH_TABLE_SYNC_RW_LOCK) {
-# ifdef UNIV_SYNC_DEBUG
- rw_lock_t* lock = hash_get_lock(table, fold);
- ut_ad(rw_lock_own(lock, RW_LOCK_EX)
- || rw_lock_own(lock, RW_LOCK_SHARED));
-# endif
+ ut_ad(rw_lock_own_flagged(hash_get_lock(table, fold),
+ RW_LOCK_FLAG_X | RW_LOCK_FLAG_S));
} else {
ut_ad(table->type == HASH_TABLE_SYNC_NONE);
}
@@ -167,20 +157,17 @@ ha_search_and_get_data(
hash_table_t* table, /*!< in: hash table */
ulint fold) /*!< in: folded value of the searched data */
{
- ha_node_t* node;
-
hash_assert_can_search(table, fold);
ut_ad(btr_search_enabled);
- node = ha_chain_get_first(table, fold);
+ for (const ha_node_t* node = ha_chain_get_first(table, fold);
+ node != NULL;
+ node = ha_chain_get_next(node)) {
- while (node) {
if (node->fold == fold) {
return(node->data);
}
-
- node = ha_chain_get_next(node);
}
return(NULL);
@@ -188,7 +175,7 @@ ha_search_and_get_data(
/*********************************************************//**
Looks for an element when we know the pointer to the data.
-@return pointer to the hash table node, NULL if not found in the table */
+@return pointer to the hash table node, NULL if not found in the table */
UNIV_INLINE
ha_node_t*
ha_search_with_data(
@@ -217,10 +204,18 @@ ha_search_with_data(
return(NULL);
}
+/***********************************************************//**
+Deletes a hash node. */
+void
+ha_delete_hash_node(
+/*================*/
+ hash_table_t* table, /*!< in: hash table */
+ ha_node_t* del_node); /*!< in: node to be deleted */
+
/*********************************************************//**
Looks for an element when we know the pointer to the data, and deletes
it from the hash table, if found.
-@return TRUE if found */
+@return TRUE if found */
UNIV_INLINE
ibool
ha_search_and_delete_if_found(
@@ -244,3 +239,4 @@ ha_search_and_delete_if_found(
return(FALSE);
}
+#endif /* BTR_CUR_HASH_ADAPT */
diff --git a/storage/innobase/include/ha0storage.h b/storage/innobase/include/ha0storage.h
index d4e680a10a3..db23ddc66ed 100644
--- a/storage/innobase/include/ha0storage.h
+++ b/storage/innobase/include/ha0storage.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 2007, 2009, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2007, 2016, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -44,7 +44,7 @@ struct ha_storage_t;
/*******************************************************************//**
Creates a hash storage. If any of the parameters is 0, then a default
value is used.
-@return own: hash storage */
+@return own: hash storage */
UNIV_INLINE
ha_storage_t*
ha_storage_create(
@@ -62,8 +62,7 @@ data_len bytes need to be allocated) and the size of storage is going to
become more than "memlim" then "data" is not added and NULL is returned.
To disable this behavior "memlim" can be set to 0, which stands for
"no limit".
-@return pointer to the copy */
-UNIV_INTERN
+@return pointer to the copy */
const void*
ha_storage_put_memlim(
/*==================*/
@@ -74,10 +73,10 @@ ha_storage_put_memlim(
/*******************************************************************//**
Same as ha_storage_put_memlim() but without memory limit.
-@param storage in/out: hash storage
-@param data in: data to store
-@param data_len in: data length
-@return pointer to the copy of the string */
+@param storage in/out: hash storage
+@param data in: data to store
+@param data_len in: data length
+@return pointer to the copy of the string */
#define ha_storage_put(storage, data, data_len) \
ha_storage_put_memlim((storage), (data), (data_len), 0)
@@ -85,9 +84,9 @@ Same as ha_storage_put_memlim() but without memory limit.
Copies string into the storage and returns a pointer to the copy. If the
same string is already present, then pointer to it is returned.
Strings are considered to be equal if strcmp(str1, str2) == 0.
-@param storage in/out: hash storage
-@param str in: string to put
-@return pointer to the copy of the string */
+@param storage in/out: hash storage
+@param str in: string to put
+@return pointer to the copy of the string */
#define ha_storage_put_str(storage, str) \
((const char*) ha_storage_put((storage), (str), strlen(str) + 1))
@@ -96,10 +95,10 @@ Copies string into the storage and returns a pointer to the copy obeying
a memory limit.
If the same string is already present, then pointer to it is returned.
Strings are considered to be equal if strcmp(str1, str2) == 0.
-@param storage in/out: hash storage
-@param str in: string to put
-@param memlim in: memory limit to obey
-@return pointer to the copy of the string */
+@param storage in/out: hash storage
+@param str in: string to put
+@param memlim in: memory limit to obey
+@return pointer to the copy of the string */
#define ha_storage_put_str_memlim(storage, str, memlim) \
((const char*) ha_storage_put_memlim((storage), (str), \
strlen(str) + 1, (memlim)))
@@ -126,15 +125,13 @@ ha_storage_free(
/*******************************************************************//**
Gets the size of the memory used by a storage.
-@return bytes used */
+@return bytes used */
UNIV_INLINE
ulint
ha_storage_get_size(
/*================*/
const ha_storage_t* storage); /*!< in: hash storage */
-#ifndef UNIV_NONINL
#include "ha0storage.ic"
-#endif
#endif /* ha0storage_h */
diff --git a/storage/innobase/include/ha0storage.ic b/storage/innobase/include/ha0storage.ic
index 03df7be5e73..8cc487faf47 100644
--- a/storage/innobase/include/ha0storage.ic
+++ b/storage/innobase/include/ha0storage.ic
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 2007, 2009, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2007, 2013, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -25,8 +25,6 @@ its own storage, avoiding duplicates.
Created September 24, 2007 Vasil Dimov
*******************************************************/
-#include "univ.i"
-#include "ha0storage.h"
#include "hash0hash.h"
#include "mem0mem.h"
@@ -48,7 +46,7 @@ struct ha_storage_node_t {
/*******************************************************************//**
Creates a hash storage. If any of the parameters is 0, then a default
value is used.
-@return own: hash storage */
+@return own: hash storage */
UNIV_INLINE
ha_storage_t*
ha_storage_create(
@@ -127,7 +125,7 @@ ha_storage_free(
/*******************************************************************//**
Gets the size of the memory used by a storage.
-@return bytes used */
+@return bytes used */
UNIV_INLINE
ulint
ha_storage_get_size(
diff --git a/storage/innobase/include/ha_prototypes.h b/storage/innobase/include/ha_prototypes.h
index bcf8a893695..693dcd15163 100644
--- a/storage/innobase/include/ha_prototypes.h
+++ b/storage/innobase/include/ha_prototypes.h
@@ -1,6 +1,7 @@
/*****************************************************************************
Copyright (c) 2006, 2016, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2017, 2019, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -19,46 +20,27 @@ this program; if not, write to the Free Software Foundation, Inc.,
/*******************************************************************//**
@file include/ha_prototypes.h
Prototypes for global functions in ha_innodb.cc that are called by
-InnoDB C code
+InnoDB C code.
-Created 5/11/2006 Osku Salerma
+NOTE: This header is intended to insulate InnoDB from SQL names and functions.
+Do not include any headers other than univ.i into this unless they are very
+simple headers.
************************************************************************/
#ifndef HA_INNODB_PROTOTYPES_H
#define HA_INNODB_PROTOTYPES_H
-#include "my_dbug.h"
-#include "mysqld_error.h"
-#include "my_compare.h"
-#include "my_sys.h"
-#include "m_string.h"
-#include "debug_sync.h"
-#include "my_base.h"
+#include "univ.i"
-#include "trx0types.h"
-#include "m_ctype.h" /* CHARSET_INFO */
+#ifndef UNIV_INNOCHECKSUM
-// Forward declarations
-class Field;
-struct fts_string_t;
+/* Forward declarations */
+class THD;
-/*********************************************************************//**
-Wrapper around MySQL's copy_and_convert function.
-@return number of bytes copied to 'to' */
-UNIV_INTERN
-ulint
-innobase_convert_string(
-/*====================*/
- void* to, /*!< out: converted string */
- ulint to_length, /*!< in: number of bytes reserved
- for the converted string */
- CHARSET_INFO* to_cs, /*!< in: character set to convert to */
- const void* from, /*!< in: string to convert */
- ulint from_length, /*!< in: number of bytes to convert */
- CHARSET_INFO* from_cs, /*!< in: character set to convert
- from */
- uint* errors); /*!< out: number of errors encountered
- during the conversion */
+// JAN: TODO missing features:
+#undef MYSQL_FT_INIT_EXT
+#undef MYSQL_PFS
+#undef MYSQL_STORE_FTS_DOC_ID
/*******************************************************************//**
Formats the raw data in "data" (in InnoDB on-disk format) that is of
@@ -68,8 +50,7 @@ Not more than "buf_size" bytes are written to "buf".
The result is always NUL-terminated (provided buf_size > 0) and the
number of bytes that were written to "buf" is returned (including the
terminating NUL).
-@return number of bytes that were written */
-UNIV_INTERN
+@return number of bytes that were written */
ulint
innobase_raw_format(
/*================*/
@@ -83,35 +64,50 @@ innobase_raw_format(
/*****************************************************************//**
Invalidates the MySQL query cache for the table. */
-UNIV_INTERN
void
innobase_invalidate_query_cache(
/*============================*/
trx_t* trx, /*!< in: transaction which
modifies the table */
const char* full_name, /*!< in: concatenation of
- database name, null char NUL,
+ database name, path separator,
table name, null char NUL;
NOTE that in Windows this is
always in LOWER CASE! */
ulint full_name_len); /*!< in: full name length where
also the null chars count */
+/** Quote a standard SQL identifier like tablespace, index or column name.
+@param[in] file output stream
+@param[in] trx InnoDB transaction, or NULL
+@param[in] id identifier to quote */
+void
+innobase_quote_identifier(
+ FILE* file,
+ trx_t* trx,
+ const char* id);
+
+/** Quote an standard SQL identifier like tablespace, index or column name.
+Return the string as an std:string object.
+@param[in] trx InnoDB transaction, or NULL
+@param[in] id identifier to quote
+@return a std::string with id properly quoted. */
+std::string
+innobase_quote_identifier(
+ trx_t* trx,
+ const char* id);
+
/*****************************************************************//**
-Convert a table or index name to the MySQL system_charset_info (UTF-8)
-and quote it if needed.
-@return pointer to the end of buf */
-UNIV_INTERN
+Convert a table name to the MySQL system_charset_info (UTF-8).
+@return pointer to the end of buf */
char*
innobase_convert_name(
/*==================*/
char* buf, /*!< out: buffer for converted identifier */
ulint buflen, /*!< in: length of buf, in bytes */
- const char* id, /*!< in: identifier to convert */
+ const char* id, /*!< in: table name to convert */
ulint idlen, /*!< in: length of id, in bytes */
- THD* thd, /*!< in: MySQL connection thread, or NULL */
- ibool table_id);/*!< in: TRUE=id is a table or database name;
- FALSE=id is an index name */
+ THD* thd); /*!< in: MySQL connection thread, or NULL */
/******************************************************************//**
Returns true if the thread is the replication thread on the slave
@@ -119,8 +115,7 @@ server. Used in srv_conc_enter_innodb() to determine if the thread
should be allowed to enter InnoDB - the replication thread is treated
differently than other threads. Also used in
srv_conc_force_exit_innodb().
-@return true if thd is the replication thread */
-UNIV_INTERN
+@return true if thd is the replication thread */
ibool
thd_is_replication_slave_thread(
/*============================*/
@@ -131,8 +126,7 @@ Returns true if the transaction this thread is processing has edited
non-transactional tables. Used by the deadlock detector when deciding
which transaction to rollback in case of a deadlock - we try to avoid
rolling back transactions that have edited non-transactional tables.
-@return true if non-transactional tables have been edited */
-UNIV_INTERN
+@return true if non-transactional tables have been edited */
ibool
thd_has_edited_nontrans_tables(
/*===========================*/
@@ -147,7 +141,6 @@ unsigned long long thd_query_start_micro(const MYSQL_THD thd);
/*************************************************************//**
Prints info of a THD object (== user session thread) to the given file. */
-UNIV_INTERN
void
innobase_mysql_print_thd(
/*=====================*/
@@ -156,24 +149,6 @@ innobase_mysql_print_thd(
uint max_query_len); /*!< in: max query length to print, or 0 to
use the default max length */
-/*************************************************************//**
-InnoDB uses this function to compare two data fields for which the data type
-is such that we must use MySQL code to compare them.
-@return 1, 0, -1, if a is greater, equal, less than b, respectively */
-UNIV_INTERN
-int
-innobase_mysql_cmp(
-/*===============*/
- int mysql_type, /*!< in: MySQL type */
- uint charset_number, /*!< in: number of the charset */
- const unsigned char* a, /*!< in: data field */
- unsigned int a_length, /*!< in: data field length,
- not UNIV_SQL_NULL */
- const unsigned char* b, /*!< in: data field */
- unsigned int b_length) /*!< in: data field length,
- not UNIV_SQL_NULL */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-
/*****************************************************************//**
Log code calls this whenever log has been written and/or flushed up
to a new position. We use this to notify upper layer of a new commit
@@ -181,30 +156,24 @@ checkpoint when necessary.*/
UNIV_INTERN
void
innobase_mysql_log_notify(
-/*===============*/
+/*======================*/
ib_uint64_t write_lsn, /*!< in: LSN written to log file */
ib_uint64_t flush_lsn); /*!< in: LSN flushed to disk */
-/**************************************************************//**
-Converts a MySQL type to an InnoDB type. Note that this function returns
+/** Converts a MySQL type to an InnoDB type. Note that this function returns
the 'mtype' of InnoDB. InnoDB differentiates between MySQL's old <= 4.1
VARCHAR and the new true VARCHAR in >= 5.0.3 by the 'prtype'.
-@return DATA_BINARY, DATA_VARCHAR, ... */
-UNIV_INTERN
+@param[out] unsigned_flag DATA_UNSIGNED if an 'unsigned type';
+at least ENUM and SET, and unsigned integer types are 'unsigned types'
+@param[in] f MySQL Field
+@return DATA_BINARY, DATA_VARCHAR, ... */
ulint
get_innobase_type_from_mysql_type(
-/*==============================*/
- ulint* unsigned_flag, /*!< out: DATA_UNSIGNED if an
- 'unsigned type';
- at least ENUM and SET,
- and unsigned integer
- types are 'unsigned types' */
- const void* field) /*!< in: MySQL Field */
- MY_ATTRIBUTE((nonnull));
+ ulint* unsigned_flag,
+ const void* field);
/******************************************************************//**
Get the variable length bounds of the given character set. */
-UNIV_INTERN
void
innobase_get_cset_width(
/*====================*/
@@ -214,38 +183,23 @@ innobase_get_cset_width(
/******************************************************************//**
Compares NUL-terminated UTF-8 strings case insensitively.
-@return 0 if a=b, <0 if a<b, >1 if a>b */
-UNIV_INTERN
+@return 0 if a=b, <0 if a<b, >1 if a>b */
int
innobase_strcasecmp(
/*================*/
const char* a, /*!< in: first string to compare */
const char* b); /*!< in: second string to compare */
-/******************************************************************//**
-Compares NUL-terminated UTF-8 strings case insensitively. The
-second string contains wildcards.
-@return 0 if a match is found, 1 if not */
-UNIV_INTERN
-int
-innobase_wildcasecmp(
-/*=================*/
- const char* a, /*!< in: string to compare */
- const char* b); /*!< in: wildcard string to compare */
-
-/******************************************************************//**
-Strip dir name from a full path name and return only its file name.
+/** Strip dir name from a full path name and return only the file name
+@param[in] path_name full path name
@return file name or "null" if no file name */
-UNIV_INTERN
const char*
innobase_basename(
-/*==============*/
- const char* path_name); /*!< in: full path name */
+ const char* path_name);
/******************************************************************//**
Returns true if the thread is executing a SELECT statement.
-@return true if thd is executing SELECT */
-UNIV_INTERN
+@return true if thd is executing SELECT */
ibool
thd_is_select(
/*==========*/
@@ -253,29 +207,26 @@ thd_is_select(
/******************************************************************//**
Converts an identifier to a table name. */
-UNIV_INTERN
void
innobase_convert_from_table_id(
/*===========================*/
- struct charset_info_st* cs, /*!< in: the 'from' character set */
- char* to, /*!< out: converted identifier */
- const char* from, /*!< in: identifier to convert */
- ulint len); /*!< in: length of 'to', in bytes; should
- be at least 5 * strlen(to) + 1 */
+ CHARSET_INFO* cs, /*!< in: the 'from' character set */
+ char* to, /*!< out: converted identifier */
+ const char* from, /*!< in: identifier to convert */
+ ulint len); /*!< in: length of 'to', in bytes; should
+ be at least 5 * strlen(to) + 1 */
/******************************************************************//**
Converts an identifier to UTF-8. */
-UNIV_INTERN
void
innobase_convert_from_id(
/*=====================*/
- struct charset_info_st* cs, /*!< in: the 'from' character set */
- char* to, /*!< out: converted identifier */
- const char* from, /*!< in: identifier to convert */
- ulint len); /*!< in: length of 'to', in bytes;
- should be at least 3 * strlen(to) + 1 */
+ CHARSET_INFO* cs, /*!< in: the 'from' character set */
+ char* to, /*!< out: converted identifier */
+ const char* from, /*!< in: identifier to convert */
+ ulint len); /*!< in: length of 'to', in bytes;
+ should be at least 3 * strlen(to) + 1 */
/******************************************************************//**
Makes all characters in a NUL-terminated UTF-8 string lower case. */
-UNIV_INTERN
void
innobase_casedn_str(
/*================*/
@@ -295,29 +246,41 @@ int wsrep_innobase_mysql_sort(int mysql_type, uint charset_number,
/**********************************************************************//**
Determines the connection character set.
-@return connection character set */
-UNIV_INTERN
-struct charset_info_st*
+@return connection character set */
+CHARSET_INFO*
innobase_get_charset(
/*=================*/
THD* thd); /*!< in: MySQL thread handle */
-/**********************************************************************//**
-Determines the current SQL statement.
-@return SQL statement string */
-UNIV_INTERN
+
+/** Determines the current SQL statement.
+Thread unsafe, can only be called from the thread owning the THD.
+@param[in] thd MySQL thread handle
+@param[out] length Length of the SQL statement
+@return SQL statement string */
const char*
-innobase_get_stmt(
-/*==============*/
- THD* thd, /*!< in: MySQL thread handle */
- size_t* length) /*!< out: length of the SQL statement */
- MY_ATTRIBUTE((nonnull));
+innobase_get_stmt_unsafe(
+ THD* thd,
+ size_t* length);
+
+/** Determines the current SQL statement.
+Thread safe, can be called from any thread as the string is copied
+into the provided buffer.
+@param[in] thd MySQL thread handle
+@param[out] buf Buffer containing SQL statement
+@param[in] buflen Length of provided buffer
+@return Length of the SQL statement */
+size_t
+innobase_get_stmt_safe(
+ THD* thd,
+ char* buf,
+ size_t buflen);
+
/******************************************************************//**
This function is used to find the storage length in bytes of the first n
characters for prefix indexes using a multibyte character set. The function
finds charset information and returns length of prefix_len characters in the
index field in bytes.
-@return number of bytes occupied by the first n characters */
-UNIV_INTERN
+@return number of bytes occupied by the first n characters */
ulint
innobase_get_at_most_n_mbchars(
/*===========================*/
@@ -328,26 +291,6 @@ innobase_get_at_most_n_mbchars(
ulint data_len, /*!< in: length of the string in bytes */
const char* str); /*!< in: character string */
-/*************************************************************//**
-InnoDB index push-down condition check
-@return ICP_NO_MATCH, ICP_MATCH, or ICP_OUT_OF_RANGE */
-UNIV_INTERN
-enum icp_result
-innobase_index_cond(
-/*================*/
- void* file) /*!< in/out: pointer to ha_innobase */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-/******************************************************************//**
-Returns true if the thread supports XA,
-global value of innodb_supports_xa if thd is NULL.
-@return true if thd supports XA */
-UNIV_INTERN
-ibool
-thd_supports_xa(
-/*============*/
- THD* thd); /*!< in: thread handle, or NULL to query
- the global innodb_supports_xa */
-
/** Get status of innodb_tmpdir.
@param[in] thd thread handle, or NULL to query
the global innodb_tmpdir.
@@ -359,8 +302,7 @@ thd_innodb_tmpdir(
/******************************************************************//**
Returns the lock wait timeout for the current connection.
-@return the lock wait timeout, in seconds */
-UNIV_INTERN
+@return the lock wait timeout, in seconds */
ulong
thd_lock_wait_timeout(
/*==================*/
@@ -368,19 +310,25 @@ thd_lock_wait_timeout(
the global innodb_lock_wait_timeout */
/******************************************************************//**
Add up the time waited for the lock for the current query. */
-UNIV_INTERN
void
thd_set_lock_wait_time(
/*===================*/
THD* thd, /*!< in/out: thread handle */
ulint value); /*!< in: time waited for the lock */
+/** Get status of innodb_tmpdir.
+@param[in] thd thread handle, or NULL to query
+ the global innodb_tmpdir.
+@retval NULL if innodb_tmpdir="" */
+const char*
+thd_innodb_tmpdir(
+ THD* thd);
+
/**********************************************************************//**
Get the current setting of the table_cache_size global parameter. We do
a dirty read because for one there is no synchronization object and
secondly there is little harm in doing so even if we get a torn read.
-@return SQL statement string */
-UNIV_INTERN
+@return SQL statement string */
ulint
innobase_get_table_cache_size(void);
/*===============================*/
@@ -390,39 +338,13 @@ Get the current setting of the lower_case_table_names global parameter from
mysqld.cc. We do a dirty read because for one there is no synchronization
object and secondly there is little harm in doing so even if we get a torn
read.
-@return value of lower_case_table_names */
-UNIV_INTERN
+@return value of lower_case_table_names */
ulint
innobase_get_lower_case_table_names(void);
/*=====================================*/
-/*****************************************************************//**
-Frees a possible InnoDB trx object associated with the current THD.
-@return 0 or error number */
-UNIV_INTERN
-int
-innobase_close_thd(
-/*===============*/
- THD* thd); /*!< in: MySQL thread handle for
- which to close the connection */
-/*************************************************************//**
-Get the next token from the given string and store it in *token. */
-UNIV_INTERN
-ulint
-innobase_mysql_fts_get_token(
-/*=========================*/
- CHARSET_INFO* charset, /*!< in: Character set */
- const byte* start, /*!< in: start of text */
- const byte* end, /*!< in: one character past end of
- text */
- fts_string_t* token, /*!< out: token's text */
- ulint* offset); /*!< out: offset to token,
- measured as characters from
- 'start' */
-
/******************************************************************//**
compare two character string case insensitively according to their charset. */
-UNIV_INTERN
int
innobase_fts_text_case_cmp(
/*=======================*/
@@ -430,20 +352,10 @@ innobase_fts_text_case_cmp(
const void* p1, /*!< in: key */
const void* p2); /*!< in: node */
-/****************************************************************//**
-Get FTS field charset info from the field's prtype
-@return charset info */
-UNIV_INTERN
-CHARSET_INFO*
-innobase_get_fts_charset(
-/*=====================*/
- int mysql_type, /*!< in: MySQL type */
- uint charset_number);/*!< in: number of the charset */
/******************************************************************//**
Returns true if transaction should be flagged as read-only.
-@return true if the thd is marked as read-only */
-UNIV_INTERN
-ibool
+@return true if the thd is marked as read-only */
+bool
thd_trx_is_read_only(
/*=================*/
THD* thd); /*!< in/out: thread handle */
@@ -451,28 +363,22 @@ thd_trx_is_read_only(
/******************************************************************//**
Check if the transaction is an auto-commit transaction. TRUE also
implies that it is a SELECT (read-only) transaction.
-@return true if the transaction is an auto commit read-only transaction. */
-UNIV_INTERN
+@return true if the transaction is an auto commit read-only transaction. */
ibool
thd_trx_is_auto_commit(
/*===================*/
THD* thd); /*!< in: thread handle, or NULL */
/*****************************************************************//**
-A wrapper function of innobase_convert_name(), convert a table or
-index name to the MySQL system_charset_info (UTF-8) and quote it if needed.
-@return pointer to the end of buf */
-UNIV_INTERN
+A wrapper function of innobase_convert_name(), convert a table name
+to the MySQL system_charset_info (UTF-8) and quote it if needed.
+@return pointer to the end of buf */
void
innobase_format_name(
/*==================*/
- char* buf, /*!< out: buffer for converted
- identifier */
- ulint buflen, /*!< in: length of buf, in bytes */
- const char* name, /*!< in: index or table name
- to format */
- ibool is_index_name) /*!< in: index name */
- MY_ATTRIBUTE((nonnull));
+ char* buf, /*!< out: buffer for converted identifier */
+ ulint buflen, /*!< in: length of buf, in bytes */
+ const char* name); /*!< in: table name to format */
/** Corresponds to Sql_condition:enum_warning_level. */
enum ib_log_level_t {
@@ -493,7 +399,6 @@ void push_warning_printf(
THD *thd, Sql_condition::enum_warning_level level,
uint code, const char *format, ...);
*/
-UNIV_INTERN
void
ib_errf(
/*====*/
@@ -514,7 +419,6 @@ void push_warning_printf(
THD *thd, Sql_condition::enum_warning_level level,
uint code, const char *format, ...);
*/
-UNIV_INTERN
void
ib_senderrf(
/*========*/
@@ -523,22 +427,18 @@ ib_senderrf(
ib_uint32_t code, /*!< MySQL error code */
...); /*!< Args */
-/******************************************************************//**
-Write a message to the MySQL log, prefixed with "InnoDB: ".
-Wrapper around sql_print_information() */
-UNIV_INTERN
-void
-ib_logf(
-/*====*/
- ib_log_level_t level, /*!< in: warning level */
- const char* format, /*!< printf format */
- ...) /*!< Args */
- MY_ATTRIBUTE((format(printf, 2, 3)));
+extern const char* TROUBLESHOOTING_MSG;
+extern const char* TROUBLESHOOT_DATADICT_MSG;
+extern const char* BUG_REPORT_MSG;
+extern const char* FORCE_RECOVERY_MSG;
+extern const char* OPERATING_SYSTEM_ERROR_MSG;
+extern const char* FOREIGN_KEY_CONSTRAINTS_MSG;
+extern const char* SET_TRANSACTION_MSG;
+extern const char* INNODB_PARAMETERS_MSG;
/******************************************************************//**
Returns the NUL terminated value of glob_hostname.
-@return pointer to glob_hostname. */
-UNIV_INTERN
+@return pointer to glob_hostname. */
const char*
server_get_hostname();
/*=================*/
@@ -546,7 +446,6 @@ server_get_hostname();
/******************************************************************//**
Get the error message format string.
@return the format string or 0 if not found. */
-UNIV_INTERN
const char*
innobase_get_err_msg(
/*=================*/
@@ -569,8 +468,7 @@ values we want to reserve for multi-value inserts e.g.,
innobase_next_autoinc() will be called with increment set to 3 where
autoinc_lock_mode != TRADITIONAL because we want to reserve 3 values for
the multi-value INSERT above.
-@return the next value */
-UNIV_INTERN
+@return the next value */
ulonglong
innobase_next_autoinc(
/*==================*/
@@ -581,16 +479,6 @@ innobase_next_autoinc(
ulonglong max_value) /*!< in: max value for type */
MY_ATTRIBUTE((pure, warn_unused_result));
-/********************************************************************//**
-Get the upper limit of the MySQL integral and floating-point type.
-@return maximum allowed value for the field */
-UNIV_INTERN
-ulonglong
-innobase_get_int_col_max_value(
-/*===========================*/
- const Field* field) /*!< in: MySQL field */
- MY_ATTRIBUTE((nonnull, pure, warn_unused_result));
-
/**********************************************************************
Converts an identifier from my_charset_filename to UTF-8 charset. */
uint
@@ -600,12 +488,10 @@ innobase_convert_to_system_charset(
const char* from, /* in: identifier to convert */
ulint len, /* in: length of 'to', in bytes */
uint* errors); /* out: error return */
-
/**********************************************************************
Check if the length of the identifier exceeds the maximum allowed.
The input to this function is an identifier in charset my_charset_filename.
return true when length of identifier is too long. */
-UNIV_INTERN
my_bool
innobase_check_identifier_length(
/*=============================*/
@@ -617,9 +503,9 @@ Converts an identifier from my_charset_filename to UTF-8 charset. */
uint
innobase_convert_to_system_charset(
/*===============================*/
- char* to, /* out: converted identifier */
- const char* from, /* in: identifier to convert */
- ulint len, /* in: length of 'to', in bytes */
+ char* to, /* out: converted identifier */
+ const char* from, /* in: identifier to convert */
+ ulint len, /* in: length of 'to', in bytes */
uint* errors); /* out: error return */
/**********************************************************************
@@ -627,9 +513,9 @@ Converts an identifier from my_charset_filename to UTF-8 charset. */
uint
innobase_convert_to_filename_charset(
/*=================================*/
- char* to, /* out: converted identifier */
- const char* from, /* in: identifier to convert */
- ulint len); /* in: length of 'to', in bytes */
+ char* to, /* out: converted identifier */
+ const char* from, /* in: identifier to convert */
+ ulint len); /* in: length of 'to', in bytes */
/********************************************************************//**
Helper function to push warnings from InnoDB internals to SQL-layer. */
@@ -657,12 +543,60 @@ database name catenated to '/' and table name. An example:
test/mytable. On Windows normalization puts both the database name and the
table name always to lower case if "set_lower_case" is set to TRUE. */
void
-normalize_table_name_low(
-/*=====================*/
+normalize_table_name_c_low(
+/*=======================*/
char* norm_name, /*!< out: normalized name as a
null-terminated string */
const char* name, /*!< in: table name string */
ibool set_lower_case); /*!< in: TRUE if we want to set
name to lower case */
+/*************************************************************//**
+InnoDB index push-down condition check defined in ha_innodb.cc
+@return ICP_NO_MATCH, ICP_MATCH, or ICP_OUT_OF_RANGE */
+
+#include <my_compare.h>
+
+ICP_RESULT
+innobase_index_cond(
+/*================*/
+ void* file) /*!< in/out: pointer to ha_innobase */
+ MY_ATTRIBUTE((warn_unused_result));
+
+/******************************************************************//**
+Gets information on the durability property requested by thread.
+Used when writing either a prepare or commit record to the log
+buffer.
+@return the durability property. */
+
+#include <dur_prop.h>
+
+enum durability_properties
+thd_requested_durability(
+/*=====================*/
+ const THD* thd) /*!< in: thread handle */
+ MY_ATTRIBUTE((warn_unused_result));
+
+/** Update the system variable with the given value of the InnoDB
+buffer pool size.
+@param[in] buf_pool_size given value of buffer pool size.*/
+void
+innodb_set_buf_pool_size(ulonglong buf_pool_size);
+
+/** Create a MYSQL_THD for a background thread and mark it as such.
+@param name thread info for SHOW PROCESSLIST
+@return new MYSQL_THD */
+MYSQL_THD
+innobase_create_background_thd(const char* name);
+
+/** Destroy a background purge thread THD.
+@param[in] thd MYSQL_THD to destroy */
+void
+innobase_destroy_background_thd(MYSQL_THD);
+
+/** Close opened tables, free memory, delete items for a MYSQL_THD.
+@param[in] thd MYSQL_THD to reset */
+void
+innobase_reset_background_thd(MYSQL_THD);
+#endif /* !UNIV_INNOCHECKSUM */
#endif /* HA_INNODB_PROTOTYPES_H */
diff --git a/storage/innobase/include/handler0alter.h b/storage/innobase/include/handler0alter.h
index ca71edb42fc..2fdcedc5bb6 100644
--- a/storage/innobase/include/handler0alter.h
+++ b/storage/innobase/include/handler0alter.h
@@ -22,22 +22,22 @@ this program; if not, write to the Free Software Foundation, Inc.,
Smart ALTER TABLE
*******************************************************/
+#include "rem0types.h"
+
/*************************************************************//**
Copies an InnoDB record to table->record[0]. */
-UNIV_INTERN
void
innobase_rec_to_mysql(
/*==================*/
struct TABLE* table, /*!< in/out: MySQL table */
const rec_t* rec, /*!< in: record */
const dict_index_t* index, /*!< in: index */
- const ulint* offsets)/*!< in: rec_get_offsets(
+ const offset_t* offsets)/*!< in: rec_get_offsets(
rec, index, ...) */
MY_ATTRIBUTE((nonnull));
/*************************************************************//**
Copies an InnoDB index entry to table->record[0]. */
-UNIV_INTERN
void
innobase_fields_to_mysql(
/*=====================*/
@@ -48,7 +48,6 @@ innobase_fields_to_mysql(
/*************************************************************//**
Copies an InnoDB row to table->record[0]. */
-UNIV_INTERN
void
innobase_row_to_mysql(
/*==================*/
@@ -59,7 +58,6 @@ innobase_row_to_mysql(
/*************************************************************//**
Resets table->record[0]. */
-UNIV_INTERN
void
innobase_rec_reset(
/*===============*/
@@ -71,13 +69,12 @@ auto_increment_increment and auto_increment_offset variables. */
struct ib_sequence_t {
/**
- @param thd - the session
- @param start_value - the lower bound
- @param max_value - the upper bound (inclusive) */
+ @param thd the session
+ @param start_value the lower bound
+ @param max_value the upper bound (inclusive) */
ib_sequence_t(THD* thd, ulonglong start_value, ulonglong max_value);
- /**
- Postfix increment
+ /** Postfix increment
@return the value to insert */
ulonglong operator++(int) UNIV_NOTHROW;
diff --git a/storage/innobase/include/hash0hash.h b/storage/innobase/include/hash0hash.h
index aff679c5301..4f55b051d80 100644
--- a/storage/innobase/include/hash0hash.h
+++ b/storage/innobase/include/hash0hash.h
@@ -1,6 +1,7 @@
/*****************************************************************************
-Copyright (c) 1997, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1997, 2016, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2018, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -26,12 +27,8 @@ Created 5/20/1997 Heikki Tuuri
#ifndef hash0hash_h
#define hash0hash_h
-#include "univ.i"
#include "mem0mem.h"
-#ifndef UNIV_HOTBACKUP
-# include "sync0sync.h"
-# include "sync0rw.h"
-#endif /* !UNIV_HOTBACKUP */
+#include "sync0rw.h"
struct hash_table_t;
struct hash_cell_t;
@@ -56,65 +53,46 @@ enum hash_table_sync_t {
/*************************************************************//**
Creates a hash table with >= n array cells. The actual number
of cells is chosen to be a prime number slightly bigger than n.
-@return own: created table */
-UNIV_INTERN
+@return own: created table */
hash_table_t*
hash_create(
/*========*/
ulint n); /*!< in: number of array cells */
-#ifndef UNIV_HOTBACKUP
+
/*************************************************************//**
Creates a sync object array array to protect a hash table.
::sync_obj can be mutexes or rw_locks depening on the type of
hash table. */
-UNIV_INTERN
void
-hash_create_sync_obj_func(
-/*======================*/
+hash_create_sync_obj(
+/*=================*/
hash_table_t* table, /*!< in: hash table */
- enum hash_table_sync_t type, /*!< in: HASH_TABLE_SYNC_MUTEX
+ hash_table_sync_t type, /*!< in: HASH_TABLE_SYNC_MUTEX
or HASH_TABLE_SYNC_RW_LOCK */
-#ifdef UNIV_SYNC_DEBUG
- ulint sync_level,/*!< in: latching order level
- of the mutexes: used in the
- debug version */
-#endif /* UNIV_SYNC_DEBUG */
+ latch_id_t id, /*!< in: mutex/rw_lock ID */
ulint n_sync_obj);/*!< in: number of sync objects,
must be a power of 2 */
-#ifdef UNIV_SYNC_DEBUG
-# define hash_create_sync_obj(t, s, n, level) \
- hash_create_sync_obj_func(t, s, level, n)
-#else /* UNIV_SYNC_DEBUG */
-# define hash_create_sync_obj(t, s, n, level) \
- hash_create_sync_obj_func(t, s, n)
-#endif /* UNIV_SYNC_DEBUG */
-#endif /* !UNIV_HOTBACKUP */
/*************************************************************//**
Frees a hash table. */
-UNIV_INTERN
void
hash_table_free(
/*============*/
hash_table_t* table); /*!< in, own: hash table */
/**************************************************************//**
Calculates the hash value from a folded value.
-@return hashed value */
+@return hashed value */
UNIV_INLINE
ulint
hash_calc_hash(
/*===========*/
ulint fold, /*!< in: folded value */
hash_table_t* table); /*!< in: hash table */
-#ifndef UNIV_HOTBACKUP
/********************************************************************//**
Assert that the mutex for the table is held */
-# define HASH_ASSERT_OWN(TABLE, FOLD) \
+#define HASH_ASSERT_OWN(TABLE, FOLD) \
ut_ad((TABLE)->type != HASH_TABLE_SYNC_MUTEX \
|| (mutex_own(hash_get_mutex((TABLE), FOLD))));
-#else /* !UNIV_HOTBACKUP */
-# define HASH_ASSERT_OWN(TABLE, FOLD)
-#endif /* !UNIV_HOTBACKUP */
/*******************************************************************//**
Inserts a struct to a hash table. */
@@ -144,7 +122,6 @@ do {\
}\
} while (0)
-#ifdef WITH_WSREP
/*******************************************************************//**
Inserts a struct to the head of hash table. */
@@ -170,7 +147,6 @@ do { \
cell3333->node = DATA; \
} \
} while (0)
-#endif /*WITH_WSREP */
#ifdef UNIV_HASH_DEBUG
# define HASH_ASSERT_VALID(DATA) ut_a((void*) (DATA) != (void*) -1)
# define HASH_INVALIDATE(DATA, NAME) *(void**) (&DATA->NAME) = (void*) -1
@@ -208,6 +184,18 @@ do {\
HASH_INVALIDATE(DATA, NAME);\
} while (0)
+#define HASH_REPLACE(TYPE, NAME, TABLE, FOLD, DATA_OLD, DATA_NEW) \
+ do { \
+ (DATA_NEW)->NAME = (DATA_OLD)->NAME; \
+ \
+ hash_cell_t& cell3333 \
+ = TABLE->array[hash_calc_hash(FOLD, TABLE)]; \
+ TYPE** struct3333 = (TYPE**)&cell3333.node; \
+ while (*struct3333 != DATA_OLD) { \
+ struct3333 = &((*struct3333)->NAME); \
+ } \
+ *struct3333 = DATA_NEW; \
+ } while (0)
/*******************************************************************//**
Gets the first struct in a hash chain, NULL if none. */
@@ -268,7 +256,7 @@ do { \
/************************************************************//**
Gets the nth cell in a hash table.
-@return pointer to cell */
+@return pointer to cell */
UNIV_INLINE
hash_cell_t*
hash_get_nth_cell(
@@ -286,7 +274,7 @@ hash_table_clear(
/*************************************************************//**
Returns the number of cells in a hash table.
-@return number of cells */
+@return number of cells */
UNIV_INLINE
ulint
hash_get_n_cells(
@@ -352,7 +340,6 @@ do {\
mem_heap_free_top(hash_get_heap(TABLE, fold111), sizeof(TYPE));\
} while (0)
-#ifndef UNIV_HOTBACKUP
/****************************************************************//**
Move all hash table entries from OLD_TABLE to NEW_TABLE. */
@@ -364,10 +351,12 @@ do {\
cell_count2222 = hash_get_n_cells(OLD_TABLE);\
\
for (i2222 = 0; i2222 < cell_count2222; i2222++) {\
- NODE_TYPE* node2222 = HASH_GET_FIRST((OLD_TABLE), i2222);\
+ NODE_TYPE* node2222 = static_cast<NODE_TYPE*>(\
+ HASH_GET_FIRST((OLD_TABLE), i2222));\
\
while (node2222) {\
- NODE_TYPE* next2222 = node2222->PTR_NAME;\
+ NODE_TYPE* next2222 = static_cast<NODE_TYPE*>(\
+ node2222->PTR_NAME);\
ulint fold2222 = FOLD_FUNC(node2222);\
\
HASH_INSERT(NODE_TYPE, PTR_NAME, (NEW_TABLE),\
@@ -380,7 +369,7 @@ do {\
/************************************************************//**
Gets the sync object index for a fold value in a hash table.
-@return index */
+@return index */
UNIV_INLINE
ulint
hash_get_sync_obj_index(
@@ -389,7 +378,7 @@ hash_get_sync_obj_index(
ulint fold); /*!< in: fold */
/************************************************************//**
Gets the nth heap in a hash table.
-@return mem heap */
+@return mem heap */
UNIV_INLINE
mem_heap_t*
hash_get_nth_heap(
@@ -398,7 +387,7 @@ hash_get_nth_heap(
ulint i); /*!< in: index of the heap */
/************************************************************//**
Gets the heap for a fold value in a hash table.
-@return mem heap */
+@return mem heap */
UNIV_INLINE
mem_heap_t*
hash_get_heap(
@@ -407,7 +396,7 @@ hash_get_heap(
ulint fold); /*!< in: fold */
/************************************************************//**
Gets the nth mutex in a hash table.
-@return mutex */
+@return mutex */
UNIV_INLINE
ib_mutex_t*
hash_get_nth_mutex(
@@ -416,7 +405,7 @@ hash_get_nth_mutex(
ulint i); /*!< in: index of the mutex */
/************************************************************//**
Gets the nth rw_lock in a hash table.
-@return rw_lock */
+@return rw_lock */
UNIV_INLINE
rw_lock_t*
hash_get_nth_lock(
@@ -425,7 +414,7 @@ hash_get_nth_lock(
ulint i); /*!< in: index of the rw_lock */
/************************************************************//**
Gets the mutex for a fold value in a hash table.
-@return mutex */
+@return mutex */
UNIV_INLINE
ib_mutex_t*
hash_get_mutex(
@@ -434,123 +423,60 @@ hash_get_mutex(
ulint fold); /*!< in: fold */
/************************************************************//**
Gets the rw_lock for a fold value in a hash table.
-@return rw_lock */
+@return rw_lock */
UNIV_INLINE
rw_lock_t*
hash_get_lock(
/*==========*/
hash_table_t* table, /*!< in: hash table */
ulint fold); /*!< in: fold */
-/************************************************************//**
-Reserves the mutex for a fold value in a hash table. */
-UNIV_INTERN
-void
-hash_mutex_enter(
-/*=============*/
- hash_table_t* table, /*!< in: hash table */
- ulint fold); /*!< in: fold */
-/************************************************************//**
-Releases the mutex for a fold value in a hash table. */
-UNIV_INTERN
-void
-hash_mutex_exit(
-/*============*/
- hash_table_t* table, /*!< in: hash table */
- ulint fold); /*!< in: fold */
-/************************************************************//**
-Reserves all the mutexes of a hash table, in an ascending order. */
-UNIV_INTERN
-void
-hash_mutex_enter_all(
-/*=================*/
- hash_table_t* table); /*!< in: hash table */
-/************************************************************//**
-Releases all the mutexes of a hash table. */
-UNIV_INTERN
-void
-hash_mutex_exit_all(
-/*================*/
- hash_table_t* table); /*!< in: hash table */
-/************************************************************//**
-Releases all but the passed in mutex of a hash table. */
-UNIV_INTERN
-void
-hash_mutex_exit_all_but(
-/*====================*/
- hash_table_t* table, /*!< in: hash table */
- ib_mutex_t* keep_mutex); /*!< in: mutex to keep */
-/************************************************************//**
-s-lock a lock for a fold value in a hash table. */
-UNIV_INTERN
-void
-hash_lock_s(
-/*========*/
- hash_table_t* table, /*!< in: hash table */
- ulint fold); /*!< in: fold */
-/************************************************************//**
-x-lock a lock for a fold value in a hash table. */
-UNIV_INTERN
-void
-hash_lock_x(
-/*========*/
- hash_table_t* table, /*!< in: hash table */
- ulint fold); /*!< in: fold */
-/************************************************************//**
-unlock an s-lock for a fold value in a hash table. */
-UNIV_INTERN
-void
-hash_unlock_s(
-/*==========*/
- hash_table_t* table, /*!< in: hash table */
- ulint fold); /*!< in: fold */
-/************************************************************//**
-unlock x-lock for a fold value in a hash table. */
-UNIV_INTERN
-void
-hash_unlock_x(
-/*==========*/
- hash_table_t* table, /*!< in: hash table */
- ulint fold); /*!< in: fold */
+/** If not appropriate rw_lock for a fold value in a hash table,
+relock S-lock the another rw_lock until appropriate for a fold value.
+@param[in] hash_lock latched rw_lock to be confirmed
+@param[in] table hash table
+@param[in] fold fold value
+@return latched rw_lock */
+UNIV_INLINE
+rw_lock_t*
+hash_lock_s_confirm(
+ rw_lock_t* hash_lock,
+ hash_table_t* table,
+ ulint fold);
+
+/** If not appropriate rw_lock for a fold value in a hash table,
+relock X-lock the another rw_lock until appropriate for a fold value.
+@param[in] hash_lock latched rw_lock to be confirmed
+@param[in] table hash table
+@param[in] fold fold value
+@return latched rw_lock */
+UNIV_INLINE
+rw_lock_t*
+hash_lock_x_confirm(
+ rw_lock_t* hash_lock,
+ hash_table_t* table,
+ ulint fold);
+
/************************************************************//**
Reserves all the locks of a hash table, in an ascending order. */
-UNIV_INTERN
void
hash_lock_x_all(
/*============*/
hash_table_t* table); /*!< in: hash table */
/************************************************************//**
Releases all the locks of a hash table, in an ascending order. */
-UNIV_INTERN
void
hash_unlock_x_all(
/*==============*/
hash_table_t* table); /*!< in: hash table */
/************************************************************//**
Releases all but passed in lock of a hash table, */
-UNIV_INTERN
void
hash_unlock_x_all_but(
/*==================*/
hash_table_t* table, /*!< in: hash table */
rw_lock_t* keep_lock); /*!< in: lock to keep */
-#else /* !UNIV_HOTBACKUP */
-# define hash_get_heap(table, fold) ((table)->heap)
-# define hash_mutex_enter(table, fold) ((void) 0)
-# define hash_mutex_exit(table, fold) ((void) 0)
-# define hash_mutex_enter_all(table) ((void) 0)
-# define hash_mutex_exit_all(table) ((void) 0)
-# define hash_mutex_exit_all_but(t, m) ((void) 0)
-# define hash_lock_s(t, f) ((void) 0)
-# define hash_lock_x(t, f) ((void) 0)
-# define hash_unlock_s(t, f) ((void) 0)
-# define hash_unlock_x(t, f) ((void) 0)
-# define hash_lock_x_all(t) ((void) 0)
-# define hash_unlock_x_all(t) ((void) 0)
-# define hash_unlock_x_all_but(t, l) ((void) 0)
-#endif /* !UNIV_HOTBACKUP */
-
struct hash_cell_t{
void* node; /*!< hash chain node, NULL if none */
};
@@ -558,16 +484,16 @@ struct hash_cell_t{
/* The hash table structure */
struct hash_table_t {
enum hash_table_sync_t type; /*<! type of hash_table. */
-#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
-# ifndef UNIV_HOTBACKUP
+#ifdef BTR_CUR_HASH_ADAPT
+# if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
ibool adaptive;/* TRUE if this is the hash
table of the adaptive hash
index */
-# endif /* !UNIV_HOTBACKUP */
-#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
+# endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
+#endif /* BTR_CUR_HASH_ADAPT */
ulint n_cells;/* number of cells in the hash table */
hash_cell_t* array; /*!< pointer to cell array */
-#ifndef UNIV_HOTBACKUP
+
ulint n_sync_obj;/* if sync_objs != NULL, then
the number of either the number
of mutexes or the number of
@@ -587,7 +513,6 @@ struct hash_table_t {
can be allocated from these memory
heaps; there are then n_mutexes
many of these heaps */
-#endif /* !UNIV_HOTBACKUP */
mem_heap_t* heap;
#ifdef UNIV_DEBUG
ulint magic_n;
@@ -595,8 +520,6 @@ struct hash_table_t {
#endif /* UNIV_DEBUG */
};
-#ifndef UNIV_NONINL
#include "hash0hash.ic"
-#endif
#endif
diff --git a/storage/innobase/include/hash0hash.ic b/storage/innobase/include/hash0hash.ic
index f32c33e5da7..d6dd104572f 100644
--- a/storage/innobase/include/hash0hash.ic
+++ b/storage/innobase/include/hash0hash.ic
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1997, 2009, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1997, 2015, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -27,7 +27,7 @@ Created 5/20/1997 Heikki Tuuri
/************************************************************//**
Gets the nth cell in a hash table.
-@return pointer to cell */
+@return pointer to cell */
UNIV_INLINE
hash_cell_t*
hash_get_nth_cell(
@@ -58,7 +58,7 @@ hash_table_clear(
/*************************************************************//**
Returns the number of cells in a hash table.
-@return number of cells */
+@return number of cells */
UNIV_INLINE
ulint
hash_get_n_cells(
@@ -72,7 +72,7 @@ hash_get_n_cells(
/**************************************************************//**
Calculates the hash value from a folded value.
-@return hashed value */
+@return hashed value */
UNIV_INLINE
ulint
hash_calc_hash(
@@ -85,10 +85,9 @@ hash_calc_hash(
return(ut_hash_ulint(fold, table->n_cells));
}
-#ifndef UNIV_HOTBACKUP
/************************************************************//**
Gets the sync object index for a fold value in a hash table.
-@return index */
+@return index */
UNIV_INLINE
ulint
hash_get_sync_obj_index(
@@ -106,7 +105,7 @@ hash_get_sync_obj_index(
/************************************************************//**
Gets the nth heap in a hash table.
-@return mem heap */
+@return mem heap */
UNIV_INLINE
mem_heap_t*
hash_get_nth_heap(
@@ -124,7 +123,7 @@ hash_get_nth_heap(
/************************************************************//**
Gets the heap for a fold value in a hash table.
-@return mem heap */
+@return mem heap */
UNIV_INLINE
mem_heap_t*
hash_get_heap(
@@ -148,7 +147,7 @@ hash_get_heap(
/************************************************************//**
Gets the nth mutex in a hash table.
-@return mutex */
+@return mutex */
UNIV_INLINE
ib_mutex_t*
hash_get_nth_mutex(
@@ -166,7 +165,7 @@ hash_get_nth_mutex(
/************************************************************//**
Gets the mutex for a fold value in a hash table.
-@return mutex */
+@return mutex */
UNIV_INLINE
ib_mutex_t*
hash_get_mutex(
@@ -186,7 +185,7 @@ hash_get_mutex(
/************************************************************//**
Gets the nth rw_lock in a hash table.
-@return rw_lock */
+@return rw_lock */
UNIV_INLINE
rw_lock_t*
hash_get_nth_lock(
@@ -204,7 +203,7 @@ hash_get_nth_lock(
/************************************************************//**
Gets the rw_lock for a fold value in a hash table.
-@return rw_lock */
+@return rw_lock */
UNIV_INLINE
rw_lock_t*
hash_get_lock(
@@ -222,4 +221,57 @@ hash_get_lock(
return(hash_get_nth_lock(table, i));
}
-#endif /* !UNIV_HOTBACKUP */
+
+/** If not appropriate rw_lock for a fold value in a hash table,
+relock S-lock the another rw_lock until appropriate for a fold value.
+@param[in] hash_lock latched rw_lock to be confirmed
+@param[in] table hash table
+@param[in] fold fold value
+@return latched rw_lock */
+UNIV_INLINE
+rw_lock_t*
+hash_lock_s_confirm(
+ rw_lock_t* hash_lock,
+ hash_table_t* table,
+ ulint fold)
+{
+ ut_ad(rw_lock_own(hash_lock, RW_LOCK_S));
+
+ rw_lock_t* hash_lock_tmp = hash_get_lock(table, fold);
+
+ while (hash_lock_tmp != hash_lock) {
+ rw_lock_s_unlock(hash_lock);
+ hash_lock = hash_lock_tmp;
+ rw_lock_s_lock(hash_lock);
+ hash_lock_tmp = hash_get_lock(table, fold);
+ }
+
+ return(hash_lock);
+}
+
+/** If not appropriate rw_lock for a fold value in a hash table,
+relock X-lock the another rw_lock until appropriate for a fold value.
+@param[in] hash_lock latched rw_lock to be confirmed
+@param[in] table hash table
+@param[in] fold fold value
+@return latched rw_lock */
+UNIV_INLINE
+rw_lock_t*
+hash_lock_x_confirm(
+ rw_lock_t* hash_lock,
+ hash_table_t* table,
+ ulint fold)
+{
+ ut_ad(rw_lock_own(hash_lock, RW_LOCK_X));
+
+ rw_lock_t* hash_lock_tmp = hash_get_lock(table, fold);
+
+ while (hash_lock_tmp != hash_lock) {
+ rw_lock_x_unlock(hash_lock);
+ hash_lock = hash_lock_tmp;
+ rw_lock_x_lock(hash_lock);
+ hash_lock_tmp = hash_get_lock(table, fold);
+ }
+
+ return(hash_lock);
+}
diff --git a/storage/innobase/include/ib0mutex.h b/storage/innobase/include/ib0mutex.h
new file mode 100644
index 00000000000..a7289777e00
--- /dev/null
+++ b/storage/innobase/include/ib0mutex.h
@@ -0,0 +1,818 @@
+/*****************************************************************************
+
+Copyright (c) 2013, 2015, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2017, MariaDB Corporation. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/******************************************************************//**
+@file include/ib0mutex.h
+Policy based mutexes.
+
+Created 2013-03-26 Sunny Bains.
+***********************************************************************/
+
+#ifndef UNIV_INNOCHECKSUM
+
+#ifndef ib0mutex_h
+#define ib0mutex_h
+
+#include "ut0ut.h"
+#include "ut0rnd.h"
+#include "os0event.h"
+#include "sync0arr.h"
+
+/** OS mutex for tracking lock/unlock for debugging */
+template <template <typename> class Policy = NoPolicy>
+struct OSTrackMutex {
+
+ typedef Policy<OSTrackMutex> MutexPolicy;
+
+ explicit OSTrackMutex(bool destroy_mutex_at_exit = true)
+ UNIV_NOTHROW
+ {
+ ut_d(m_freed = true);
+ ut_d(m_locked = false);
+ ut_d(m_destroy_at_exit = destroy_mutex_at_exit);
+ }
+
+ ~OSTrackMutex() UNIV_NOTHROW
+ {
+ ut_ad(!m_destroy_at_exit || !m_locked);
+ }
+
+ /** Initialise the mutex.
+ @param[in] id Mutex ID
+ @param[in] filename File where mutex was created
+ @param[in] line Line in filename */
+ void init(
+ latch_id_t id,
+ const char* filename,
+ uint32_t line)
+ UNIV_NOTHROW
+ {
+ ut_ad(m_freed);
+ ut_ad(!m_locked);
+
+ m_mutex.init();
+
+ ut_d(m_freed = false);
+ }
+
+ /** Destroy the mutex */
+ void destroy() UNIV_NOTHROW
+ {
+ ut_ad(!m_locked);
+ ut_ad(!m_freed);
+
+ m_mutex.destroy();
+
+ ut_d(m_freed = true);
+ }
+
+ /** Release the mutex. */
+ void exit() UNIV_NOTHROW
+ {
+ ut_ad(m_locked);
+ ut_d(m_locked = false);
+ ut_ad(!m_freed);
+
+ m_mutex.exit();
+ }
+
+ /** Acquire the mutex.
+ @param[in] max_spins max number of spins
+ @param[in] max_delay max delay per spin
+ @param[in] filename from where called
+ @param[in] line within filename */
+ void enter(
+ uint32_t max_spins,
+ uint32_t max_delay,
+ const char* filename,
+ uint32_t line)
+ UNIV_NOTHROW
+ {
+ ut_ad(!m_freed);
+
+ m_mutex.enter();
+
+ ut_ad(!m_locked);
+ ut_d(m_locked = true);
+ }
+
+ /** @return true if locking succeeded */
+ bool try_lock() UNIV_NOTHROW
+ {
+ ut_ad(!m_freed);
+
+ bool locked = m_mutex.try_lock();
+
+ if (locked) {
+ ut_ad(!m_locked);
+ ut_d(m_locked = locked);
+ }
+
+ return(locked);
+ }
+
+ /** @return non-const version of the policy */
+ MutexPolicy& policy()
+ UNIV_NOTHROW
+ {
+ return(m_policy);
+ }
+
+ /** @return the const version of the policy */
+ const MutexPolicy& policy() const
+ UNIV_NOTHROW
+ {
+ return(m_policy);
+ }
+
+private:
+#ifdef UNIV_DEBUG
+ /** true if the mutex has not be initialized */
+ bool m_freed;
+
+ /** true if the mutex has been locked. */
+ bool m_locked;
+
+ /** Do/Dont destroy mutex at exit */
+ bool m_destroy_at_exit;
+#endif /* UNIV_DEBUG */
+
+ /** OS Mutex instance */
+ OSMutex m_mutex;
+
+ /** Policy data */
+ MutexPolicy m_policy;
+};
+
+
+#ifdef HAVE_IB_LINUX_FUTEX
+
+#include <linux/futex.h>
+#include <sys/syscall.h>
+
+/** Mutex implementation that used the Linux futex. */
+template <template <typename> class Policy = NoPolicy>
+struct TTASFutexMutex {
+
+ typedef Policy<TTASFutexMutex> MutexPolicy;
+
+ TTASFutexMutex() UNIV_NOTHROW
+ :
+ m_lock_word(MUTEX_STATE_UNLOCKED)
+ {
+ /* Check that lock_word is aligned. */
+ ut_ad(!((ulint) &m_lock_word % sizeof(ulint)));
+ }
+
+ ~TTASFutexMutex()
+ {
+ ut_a(m_lock_word == MUTEX_STATE_UNLOCKED);
+ }
+
+ /** Called when the mutex is "created". Note: Not from the constructor
+ but when the mutex is initialised.
+ @param[in] id Mutex ID
+ @param[in] filename File where mutex was created
+ @param[in] line Line in filename */
+ void init(
+ latch_id_t id,
+ const char* filename,
+ uint32_t line)
+ UNIV_NOTHROW
+ {
+ ut_a(m_lock_word == MUTEX_STATE_UNLOCKED);
+ }
+
+ /** Destroy the mutex. */
+ void destroy() UNIV_NOTHROW
+ {
+ /* The destructor can be called at shutdown. */
+ ut_a(m_lock_word == MUTEX_STATE_UNLOCKED);
+ }
+
+ /** Acquire the mutex.
+ @param[in] max_spins max number of spins
+ @param[in] max_delay max delay per spin
+ @param[in] filename from where called
+ @param[in] line within filename */
+ void enter(
+ uint32_t max_spins,
+ uint32_t max_delay,
+ const char* filename,
+ uint32_t line) UNIV_NOTHROW
+ {
+ uint32_t n_spins, n_waits;
+
+ for (n_spins= 0; n_spins < max_spins; n_spins++) {
+ if (try_lock()) {
+ m_policy.add(n_spins, 0);
+ return;
+ }
+
+ ut_delay(max_delay);
+ }
+
+ for (n_waits= 0;; n_waits++) {
+ if (my_atomic_fas32_explicit(&m_lock_word,
+ MUTEX_STATE_WAITERS,
+ MY_MEMORY_ORDER_ACQUIRE)
+ == MUTEX_STATE_UNLOCKED) {
+ break;
+ }
+
+ syscall(SYS_futex, &m_lock_word,
+ FUTEX_WAIT_PRIVATE, MUTEX_STATE_WAITERS,
+ 0, 0, 0);
+ }
+
+ m_policy.add(n_spins, n_waits);
+ }
+
+ /** Release the mutex. */
+ void exit() UNIV_NOTHROW
+ {
+ if (my_atomic_fas32_explicit(&m_lock_word,
+ MUTEX_STATE_UNLOCKED,
+ MY_MEMORY_ORDER_RELEASE)
+ == MUTEX_STATE_WAITERS) {
+ syscall(SYS_futex, &m_lock_word, FUTEX_WAKE_PRIVATE,
+ 1, 0, 0, 0);
+ }
+ }
+
+ /** Try and lock the mutex.
+ @return true if successful */
+ bool try_lock() UNIV_NOTHROW
+ {
+ int32 oldval = MUTEX_STATE_UNLOCKED;
+ return(my_atomic_cas32_strong_explicit(&m_lock_word, &oldval,
+ MUTEX_STATE_LOCKED,
+ MY_MEMORY_ORDER_ACQUIRE,
+ MY_MEMORY_ORDER_RELAXED));
+ }
+
+ /** @return non-const version of the policy */
+ MutexPolicy& policy() UNIV_NOTHROW
+ {
+ return(m_policy);
+ }
+
+ /** @return const version of the policy */
+ const MutexPolicy& policy() const UNIV_NOTHROW
+ {
+ return(m_policy);
+ }
+private:
+ /** Policy data */
+ MutexPolicy m_policy;
+
+ /** lock_word is the target of the atomic test-and-set instruction
+ when atomic operations are enabled. */
+ int32 m_lock_word;
+};
+
+#endif /* HAVE_IB_LINUX_FUTEX */
+
+template <template <typename> class Policy = NoPolicy>
+struct TTASMutex {
+
+ typedef Policy<TTASMutex> MutexPolicy;
+
+ TTASMutex() UNIV_NOTHROW
+ :
+ m_lock_word(MUTEX_STATE_UNLOCKED)
+ {
+ /* Check that lock_word is aligned. */
+ ut_ad(!((ulint) &m_lock_word % sizeof(ulint)));
+ }
+
+ ~TTASMutex()
+ {
+ ut_ad(m_lock_word == MUTEX_STATE_UNLOCKED);
+ }
+
+ /** Called when the mutex is "created". Note: Not from the constructor
+ but when the mutex is initialised.
+ @param[in] id Mutex ID
+ @param[in] filename File where mutex was created
+ @param[in] line Line in filename */
+ void init(
+ latch_id_t id,
+ const char* filename,
+ uint32_t line)
+ UNIV_NOTHROW
+ {
+ ut_ad(m_lock_word == MUTEX_STATE_UNLOCKED);
+ }
+
+ /** Destroy the mutex. */
+ void destroy() UNIV_NOTHROW
+ {
+ /* The destructor can be called at shutdown. */
+ ut_ad(m_lock_word == MUTEX_STATE_UNLOCKED);
+ }
+
+ /** Try and lock the mutex.
+ @return true on success */
+ bool try_lock() UNIV_NOTHROW
+ {
+ int32 oldval = MUTEX_STATE_UNLOCKED;
+ return(my_atomic_cas32_strong_explicit(&m_lock_word, &oldval,
+ MUTEX_STATE_LOCKED,
+ MY_MEMORY_ORDER_ACQUIRE,
+ MY_MEMORY_ORDER_RELAXED));
+ }
+
+ /** Release the mutex. */
+ void exit() UNIV_NOTHROW
+ {
+ ut_ad(m_lock_word == MUTEX_STATE_LOCKED);
+ my_atomic_store32_explicit(&m_lock_word, MUTEX_STATE_UNLOCKED,
+ MY_MEMORY_ORDER_RELEASE);
+ }
+
+ /** Acquire the mutex.
+ @param max_spins max number of spins
+ @param max_delay max delay per spin
+ @param filename from where called
+ @param line within filename */
+ void enter(
+ uint32_t max_spins,
+ uint32_t max_delay,
+ const char* filename,
+ uint32_t line) UNIV_NOTHROW
+ {
+ const uint32_t step = max_spins;
+ uint32_t n_spins = 0;
+
+ while (!try_lock()) {
+ ut_delay(max_delay);
+ if (++n_spins == max_spins) {
+ os_thread_yield();
+ max_spins+= step;
+ }
+ }
+
+ m_policy.add(n_spins, 0);
+ }
+
+ /** @return non-const version of the policy */
+ MutexPolicy& policy() UNIV_NOTHROW
+ {
+ return(m_policy);
+ }
+
+ /** @return const version of the policy */
+ const MutexPolicy& policy() const UNIV_NOTHROW
+ {
+ return(m_policy);
+ }
+
+private:
+ // Disable copying
+ TTASMutex(const TTASMutex&);
+ TTASMutex& operator=(const TTASMutex&);
+
+ /** Policy data */
+ MutexPolicy m_policy;
+
+ /** lock_word is the target of the atomic test-and-set instruction
+ when atomic operations are enabled. */
+ int32 m_lock_word;
+};
+
+template <template <typename> class Policy = NoPolicy>
+struct TTASEventMutex {
+
+ typedef Policy<TTASEventMutex> MutexPolicy;
+
+ TTASEventMutex()
+ UNIV_NOTHROW
+ :
+ m_lock_word(MUTEX_STATE_UNLOCKED),
+ m_event()
+ {
+ /* Check that lock_word is aligned. */
+ ut_ad(!((ulint) &m_lock_word % sizeof(ulint)));
+ }
+
+ ~TTASEventMutex()
+ UNIV_NOTHROW
+ {
+ ut_ad(m_lock_word == MUTEX_STATE_UNLOCKED);
+ }
+
+ /** Called when the mutex is "created". Note: Not from the constructor
+ but when the mutex is initialised.
+ @param[in] id Mutex ID
+ @param[in] filename File where mutex was created
+ @param[in] line Line in filename */
+ void init(
+ latch_id_t id,
+ const char* filename,
+ uint32_t line)
+ UNIV_NOTHROW
+ {
+ ut_a(m_event == 0);
+ ut_a(m_lock_word == MUTEX_STATE_UNLOCKED);
+
+ m_event = os_event_create(sync_latch_get_name(id));
+ }
+
+ /** This is the real desctructor. This mutex can be created in BSS and
+ its desctructor will be called on exit(). We can't call
+ os_event_destroy() at that stage. */
+ void destroy()
+ UNIV_NOTHROW
+ {
+ ut_ad(m_lock_word == MUTEX_STATE_UNLOCKED);
+
+ /* We have to free the event before InnoDB shuts down. */
+ os_event_destroy(m_event);
+ m_event = 0;
+ }
+
+ /** Try and lock the mutex. Note: POSIX returns 0 on success.
+ @return true on success */
+ bool try_lock()
+ UNIV_NOTHROW
+ {
+ int32 oldval = MUTEX_STATE_UNLOCKED;
+ return(my_atomic_cas32_strong_explicit(&m_lock_word, &oldval,
+ MUTEX_STATE_LOCKED,
+ MY_MEMORY_ORDER_ACQUIRE,
+ MY_MEMORY_ORDER_RELAXED));
+ }
+
+ /** Release the mutex. */
+ void exit()
+ UNIV_NOTHROW
+ {
+ if (my_atomic_fas32_explicit(&m_lock_word,
+ MUTEX_STATE_UNLOCKED,
+ MY_MEMORY_ORDER_RELEASE)
+ == MUTEX_STATE_WAITERS) {
+ os_event_set(m_event);
+ sync_array_object_signalled();
+ }
+ }
+
+ /** Acquire the mutex.
+ @param[in] max_spins max number of spins
+ @param[in] max_delay max delay per spin
+ @param[in] filename from where called
+ @param[in] line within filename */
+ void enter(
+ uint32_t max_spins,
+ uint32_t max_delay,
+ const char* filename,
+ uint32_t line)
+ UNIV_NOTHROW
+ {
+ uint32_t n_spins = 0;
+ uint32_t n_waits = 0;
+ const uint32_t step = max_spins;
+
+ while (!try_lock()) {
+ if (n_spins++ == max_spins) {
+ max_spins += step;
+ n_waits++;
+ os_thread_yield();
+
+ sync_cell_t* cell;
+ sync_array_t *sync_arr = sync_array_get_and_reserve_cell(
+ this,
+ (m_policy.get_id() == LATCH_ID_BUF_BLOCK_MUTEX
+ || m_policy.get_id() == LATCH_ID_BUF_POOL_ZIP)
+ ? SYNC_BUF_BLOCK
+ : SYNC_MUTEX,
+ filename, line, &cell);
+
+ int32 oldval = MUTEX_STATE_LOCKED;
+ my_atomic_cas32_strong_explicit(&m_lock_word, &oldval,
+ MUTEX_STATE_WAITERS,
+ MY_MEMORY_ORDER_RELAXED,
+ MY_MEMORY_ORDER_RELAXED);
+
+ if (oldval == MUTEX_STATE_UNLOCKED) {
+ sync_array_free_cell(sync_arr, cell);
+ } else {
+ sync_array_wait_event(sync_arr, cell);
+ }
+ } else {
+ ut_delay(max_delay);
+ }
+ }
+
+ m_policy.add(n_spins, n_waits);
+ }
+
+ /** @return the lock state. */
+ int32 state() const
+ UNIV_NOTHROW
+ {
+ return(my_atomic_load32_explicit(const_cast<int32*>
+ (&m_lock_word),
+ MY_MEMORY_ORDER_RELAXED));
+ }
+
+ /** The event that the mutex will wait in sync0arr.cc
+ @return even instance */
+ os_event_t event()
+ UNIV_NOTHROW
+ {
+ return(m_event);
+ }
+
+ /** @return non-const version of the policy */
+ MutexPolicy& policy()
+ UNIV_NOTHROW
+ {
+ return(m_policy);
+ }
+
+ /** @return const version of the policy */
+ const MutexPolicy& policy() const
+ UNIV_NOTHROW
+ {
+ return(m_policy);
+ }
+
+private:
+ /** Disable copying */
+ TTASEventMutex(const TTASEventMutex&);
+ TTASEventMutex& operator=(const TTASEventMutex&);
+
+ /** lock_word is the target of the atomic test-and-set instruction
+ when atomic operations are enabled. */
+ int32 m_lock_word;
+
+ /** Used by sync0arr.cc for the wait queue */
+ os_event_t m_event;
+
+ /** Policy data */
+ MutexPolicy m_policy;
+};
+
+/** Mutex interface for all policy mutexes. This class handles the interfacing
+with the Performance Schema instrumentation. */
+template <typename MutexImpl>
+struct PolicyMutex
+{
+ typedef MutexImpl MutexType;
+ typedef typename MutexImpl::MutexPolicy Policy;
+
+ PolicyMutex() UNIV_NOTHROW : m_impl()
+ {
+#ifdef UNIV_PFS_MUTEX
+ m_ptr = 0;
+#endif /* UNIV_PFS_MUTEX */
+ }
+
+ ~PolicyMutex() { }
+
+ /** @return non-const version of the policy */
+ Policy& policy() UNIV_NOTHROW
+ {
+ return(m_impl.policy());
+ }
+
+ /** @return const version of the policy */
+ const Policy& policy() const UNIV_NOTHROW
+ {
+ return(m_impl.policy());
+ }
+
+ /** Release the mutex. */
+ void exit() UNIV_NOTHROW
+ {
+#ifdef UNIV_PFS_MUTEX
+ pfs_exit();
+#endif /* UNIV_PFS_MUTEX */
+
+ policy().release(m_impl);
+
+ m_impl.exit();
+ }
+
+ /** Acquire the mutex.
+ @param n_spins max number of spins
+ @param n_delay max delay per spin
+ @param name filename where locked
+ @param line line number where locked */
+ void enter(
+ uint32_t n_spins,
+ uint32_t n_delay,
+ const char* name,
+ uint32_t line) UNIV_NOTHROW
+ {
+#ifdef UNIV_PFS_MUTEX
+ /* Note: locker is really an alias for state. That's why
+ it has to be in the same scope during pfs_end(). */
+
+ PSI_mutex_locker_state state;
+ PSI_mutex_locker* locker;
+
+ locker = pfs_begin_lock(&state, name, line);
+#endif /* UNIV_PFS_MUTEX */
+
+ policy().enter(m_impl, name, line);
+
+ m_impl.enter(n_spins, n_delay, name, line);
+
+ policy().locked(m_impl, name, line);
+#ifdef UNIV_PFS_MUTEX
+ pfs_end(locker, 0);
+#endif /* UNIV_PFS_MUTEX */
+ }
+
+ /** Try and lock the mutex, return 0 on SUCCESS and 1 otherwise.
+ @param name filename where locked
+ @param line line number where locked */
+ int trylock(const char* name, uint32_t line) UNIV_NOTHROW
+ {
+#ifdef UNIV_PFS_MUTEX
+ /* Note: locker is really an alias for state. That's why
+ it has to be in the same scope during pfs_end(). */
+
+ PSI_mutex_locker_state state;
+ PSI_mutex_locker* locker;
+
+ locker = pfs_begin_trylock(&state, name, line);
+#endif /* UNIV_PFS_MUTEX */
+
+ /* There is a subtlety here, we check the mutex ordering
+ after locking here. This is only done to avoid add and
+ then remove if the trylock was unsuccesful. */
+
+ int ret = m_impl.try_lock() ? 0 : 1;
+
+ if (ret == 0) {
+
+ policy().enter(m_impl, name, line);
+
+ policy().locked(m_impl, name, line);
+ }
+
+#ifdef UNIV_PFS_MUTEX
+ pfs_end(locker, 0);
+#endif /* UNIV_PFS_MUTEX */
+
+ return(ret);
+ }
+
+#ifdef UNIV_DEBUG
+ /** @return true if the thread owns the mutex. */
+ bool is_owned() const UNIV_NOTHROW
+ {
+ return(policy().is_owned());
+ }
+#endif /* UNIV_DEBUG */
+
+ /**
+ Initialise the mutex.
+
+ @param[in] id Mutex ID
+ @param[in] filename file where created
+ @param[in] line line number in file where created */
+ void init(
+ latch_id_t id,
+ const char* filename,
+ uint32_t line)
+ UNIV_NOTHROW
+ {
+#ifdef UNIV_PFS_MUTEX
+ pfs_add(sync_latch_get_pfs_key(id));
+#endif /* UNIV_PFS_MUTEX */
+
+ m_impl.init(id, filename, line);
+ policy().init(m_impl, id, filename, line);
+ }
+
+ /** Free resources (if any) */
+ void destroy() UNIV_NOTHROW
+ {
+#ifdef UNIV_PFS_MUTEX
+ pfs_del();
+#endif /* UNIV_PFS_MUTEX */
+ m_impl.destroy();
+ policy().destroy();
+ }
+
+ /** Required for os_event_t */
+ operator sys_mutex_t*() UNIV_NOTHROW
+ {
+ return(m_impl.operator sys_mutex_t*());
+ }
+
+#ifdef UNIV_PFS_MUTEX
+ /** Performance schema monitoring - register mutex with PFS.
+
+ Note: This is public only because we want to get around an issue
+ with registering a subset of buffer pool pages with PFS when
+ PFS_GROUP_BUFFER_SYNC is defined. Therefore this has to then
+ be called by external code (see buf0buf.cc).
+
+ @param key - Performance Schema key. */
+ void pfs_add(mysql_pfs_key_t key) UNIV_NOTHROW
+ {
+ ut_ad(m_ptr == 0);
+ m_ptr = PSI_MUTEX_CALL(init_mutex)(key, this);
+ }
+
+private:
+
+ /** Performance schema monitoring.
+ @param state - PFS locker state
+ @param name - file name where locked
+ @param line - line number in file where locked */
+ PSI_mutex_locker* pfs_begin_lock(
+ PSI_mutex_locker_state* state,
+ const char* name,
+ uint32_t line) UNIV_NOTHROW
+ {
+ if (m_ptr != 0) {
+ return(PSI_MUTEX_CALL(start_mutex_wait)(
+ state, m_ptr,
+ PSI_MUTEX_LOCK, name, (uint) line));
+ }
+
+ return(0);
+ }
+
+ /** Performance schema monitoring.
+ @param state - PFS locker state
+ @param name - file name where locked
+ @param line - line number in file where locked */
+ PSI_mutex_locker* pfs_begin_trylock(
+ PSI_mutex_locker_state* state,
+ const char* name,
+ uint32_t line) UNIV_NOTHROW
+ {
+ if (m_ptr != 0) {
+ return(PSI_MUTEX_CALL(start_mutex_wait)(
+ state, m_ptr,
+ PSI_MUTEX_TRYLOCK, name, (uint) line));
+ }
+
+ return(0);
+ }
+
+ /** Performance schema monitoring
+ @param locker - PFS identifier
+ @param ret - 0 for success and 1 for failure */
+ void pfs_end(PSI_mutex_locker* locker, int ret) UNIV_NOTHROW
+ {
+ if (locker != 0) {
+ PSI_MUTEX_CALL(end_mutex_wait)(locker, ret);
+ }
+ }
+
+ /** Performance schema monitoring - register mutex release */
+ void pfs_exit()
+ {
+ if (m_ptr != 0) {
+ PSI_MUTEX_CALL(unlock_mutex)(m_ptr);
+ }
+ }
+
+ /** Performance schema monitoring - deregister */
+ void pfs_del()
+ {
+ if (m_ptr != 0) {
+ PSI_MUTEX_CALL(destroy_mutex)(m_ptr);
+ m_ptr = 0;
+ }
+ }
+#endif /* UNIV_PFS_MUTEX */
+
+private:
+ /** The mutex implementation */
+ MutexImpl m_impl;
+
+#ifdef UNIV_PFS_MUTEX
+ /** The performance schema instrumentation hook. */
+ PSI_mutex* m_ptr;
+#endif /* UNIV_PFS_MUTEX */
+
+};
+
+#endif /* ib0mutex_h */
+
+#endif /* !UNIV_INNOCHECKSUM */
diff --git a/storage/innobase/include/ibuf0ibuf.h b/storage/innobase/include/ibuf0ibuf.h
index bfd1d75d8c3..a69b63ee16b 100644
--- a/storage/innobase/include/ibuf0ibuf.h
+++ b/storage/innobase/include/ibuf0ibuf.h
@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 1997, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2016, MariaDB Corporation.
+Copyright (c) 2016, 2019, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -27,14 +27,10 @@ Created 7/19/1997 Heikki Tuuri
#ifndef ibuf0ibuf_h
#define ibuf0ibuf_h
-#include "univ.i"
-
#include "mtr0mtr.h"
#include "dict0mem.h"
#include "fsp0fsp.h"
-
-#ifndef UNIV_HOTBACKUP
-# include "ibuf0types.h"
+#include "ibuf0types.h"
/** Default value for maximum on-disk size of change buffer in terms
of percentage of the buffer pool. */
@@ -94,13 +90,11 @@ free bits could momentarily be set too high. */
/******************************************************************//**
Creates the insert buffer data structure at a database startup.
@return DB_SUCCESS or failure */
-UNIV_INTERN
dberr_t
ibuf_init_at_db_start(void);
/*=======================*/
/*********************************************************************//**
Updates the max_size value for ibuf. */
-UNIV_INTERN
void
ibuf_max_size_update(
/*=================*/
@@ -109,7 +103,6 @@ ibuf_max_size_update(
/*********************************************************************//**
Reads the biggest tablespace id from the high end of the insert buffer
tree and updates the counter in fil_system. */
-UNIV_INTERN
void
ibuf_update_max_tablespace_id(void);
/*===============================*/
@@ -131,7 +124,6 @@ ibuf_mtr_commit(
MY_ATTRIBUTE((nonnull));
/*********************************************************************//**
Initializes an ibuf bitmap page. */
-UNIV_INTERN
void
ibuf_bitmap_page_init(
/*==================*/
@@ -146,7 +138,6 @@ buffer bitmap must never exceed the free space on a page. It is safe
to decrement or reset the bits in the bitmap in a mini-transaction
that is committed before the mini-transaction that affects the free
space. */
-UNIV_INTERN
void
ibuf_reset_free_bits(
/*=================*/
@@ -189,7 +180,6 @@ thread until mtr is committed. NOTE: The free bits in the insert
buffer bitmap must never exceed the free space on a page. It is safe
to set the free bits in the same mini-transaction that updated the
page. */
-UNIV_INTERN
void
ibuf_update_free_bits_low(
/*======================*/
@@ -208,7 +198,6 @@ thread until mtr is committed. NOTE: The free bits in the insert
buffer bitmap must never exceed the free space on a page. It is safe
to set the free bits in the same mini-transaction that updated the
page. */
-UNIV_INTERN
void
ibuf_update_free_bits_zip(
/*======================*/
@@ -221,12 +210,9 @@ virtually prevent any further operations until mtr is committed.
NOTE: The free bits in the insert buffer bitmap must never exceed the
free space on a page. It is safe to set the free bits in the same
mini-transaction that updated the pages. */
-UNIV_INTERN
void
ibuf_update_free_bits_for_two_pages_low(
/*====================================*/
- ulint zip_size,/*!< in: compressed page size in bytes;
- 0 for uncompressed pages */
buf_block_t* block1, /*!< in: index page */
buf_block_t* block2, /*!< in: index page */
mtr_t* mtr); /*!< in: mtr */
@@ -254,114 +240,124 @@ ibool
ibuf_inside(
/*========*/
const mtr_t* mtr) /*!< in: mini-transaction */
- MY_ATTRIBUTE((nonnull, pure));
-/***********************************************************************//**
-Checks if a page address is an ibuf bitmap page (level 3 page) address.
-@return TRUE if a bitmap page */
+ MY_ATTRIBUTE((warn_unused_result));
+
+/** Checks if a page address is an ibuf bitmap page (level 3 page) address.
+@param[in] page_id page id
+@param[in] page_size page size
+@return TRUE if a bitmap page */
UNIV_INLINE
ibool
ibuf_bitmap_page(
-/*=============*/
- ulint zip_size,/*!< in: compressed page size in bytes;
- 0 for uncompressed pages */
- ulint page_no);/*!< in: page number */
-/***********************************************************************//**
-Checks if a page is a level 2 or 3 page in the ibuf hierarchy of pages.
-Must not be called when recv_no_ibuf_operations==TRUE.
-@return TRUE if level 2 or level 3 page */
-UNIV_INTERN
+ const page_id_t page_id,
+ const page_size_t& page_size);
+
+/** Checks if a page is a level 2 or 3 page in the ibuf hierarchy of pages.
+Must not be called when recv_no_ibuf_operations==true.
+@param[in] page_id page id
+@param[in] page_size page size
+@param[in] x_latch FALSE if relaxed check (avoid latching the
+bitmap page)
+@param[in] file file name
+@param[in] line line where called
+@param[in,out] mtr mtr which will contain an x-latch to the
+bitmap page if the page is not one of the fixed address ibuf pages, or NULL,
+in which case a new transaction is created.
+@return TRUE if level 2 or level 3 page */
ibool
ibuf_page_low(
-/*==========*/
- ulint space, /*!< in: space id */
- ulint zip_size,/*!< in: compressed page size in bytes, or 0 */
- ulint page_no,/*!< in: page number */
+ const page_id_t page_id,
+ const page_size_t& page_size,
#ifdef UNIV_DEBUG
- ibool x_latch,/*!< in: FALSE if relaxed check
- (avoid latching the bitmap page) */
+ ibool x_latch,
#endif /* UNIV_DEBUG */
- const char* file, /*!< in: file name */
- ulint line, /*!< in: line where called */
- mtr_t* mtr) /*!< in: mtr which will contain an
- x-latch to the bitmap page if the page
- is not one of the fixed address ibuf
- pages, or NULL, in which case a new
- transaction is created. */
+ const char* file,
+ unsigned line,
+ mtr_t* mtr)
MY_ATTRIBUTE((warn_unused_result));
+
#ifdef UNIV_DEBUG
-/** Checks if a page is a level 2 or 3 page in the ibuf hierarchy of
-pages. Must not be called when recv_no_ibuf_operations==TRUE.
-@param space tablespace identifier
-@param zip_size compressed page size in bytes, or 0
-@param page_no page number
-@param mtr mini-transaction or NULL
+
+/** Checks if a page is a level 2 or 3 page in the ibuf hierarchy of pages.
+Must not be called when recv_no_ibuf_operations==true.
+@param[in] page_id tablespace/page identifier
+@param[in] page_size page size
+@param[in,out] mtr mini-transaction or NULL
@return TRUE if level 2 or level 3 page */
-# define ibuf_page(space, zip_size, page_no, mtr) \
- ibuf_page_low(space, zip_size, page_no, TRUE, __FILE__, __LINE__, mtr)
+# define ibuf_page(page_id, page_size, mtr) \
+ ibuf_page_low(page_id, page_size, TRUE, __FILE__, __LINE__, mtr)
+
#else /* UVIV_DEBUG */
-/** Checks if a page is a level 2 or 3 page in the ibuf hierarchy of
-pages. Must not be called when recv_no_ibuf_operations==TRUE.
-@param space tablespace identifier
-@param zip_size compressed page size in bytes, or 0
-@param page_no page number
-@param mtr mini-transaction or NULL
+
+/** Checks if a page is a level 2 or 3 page in the ibuf hierarchy of pages.
+Must not be called when recv_no_ibuf_operations==true.
+@param[in] page_id tablespace/page identifier
+@param[in] page_size page size
+@param[in,out] mtr mini-transaction or NULL
@return TRUE if level 2 or level 3 page */
-# define ibuf_page(space, zip_size, page_no, mtr) \
- ibuf_page_low(space, zip_size, page_no, __FILE__, __LINE__, mtr)
+# define ibuf_page(page_id, page_size, mtr) \
+ ibuf_page_low(page_id, page_size, __FILE__, __LINE__, mtr)
+
#endif /* UVIV_DEBUG */
/***********************************************************************//**
Frees excess pages from the ibuf free list. This function is called when an OS
thread calls fsp services to allocate a new file segment, or a new page to a
file segment, and the thread did not own the fsp latch before this call. */
-UNIV_INTERN
void
ibuf_free_excess_pages(void);
/*========================*/
-/*********************************************************************//**
-Buffer an operation in the insert/delete buffer, instead of doing it
+
+/** Buffer an operation in the insert/delete buffer, instead of doing it
directly to the disk page, if this is possible. Does not do it if the index
is clustered or unique.
-@return TRUE if success */
-UNIV_INTERN
+@param[in] op operation type
+@param[in] entry index entry to insert
+@param[in,out] index index where to insert
+@param[in] page_id page id where to insert
+@param[in] page_size page size
+@param[in,out] thr query thread
+@return TRUE if success */
ibool
ibuf_insert(
-/*========*/
- ibuf_op_t op, /*!< in: operation type */
- const dtuple_t* entry, /*!< in: index entry to insert */
- dict_index_t* index, /*!< in: index where to insert */
- ulint space, /*!< in: space id where to insert */
- ulint zip_size,/*!< in: compressed page size in bytes, or 0 */
- ulint page_no,/*!< in: page number where to insert */
- que_thr_t* thr); /*!< in: query thread */
-/*********************************************************************//**
-When an index page is read from a disk to the buffer pool, this function
+ ibuf_op_t op,
+ const dtuple_t* entry,
+ dict_index_t* index,
+ const page_id_t page_id,
+ const page_size_t& page_size,
+ que_thr_t* thr);
+
+/**
+Delete any buffered entries for a page.
+This prevents an infinite loop on slow shutdown
+in the case where the change buffer bitmap claims that no buffered
+changes exist, while entries exist in the change buffer tree.
+@param page_id page number for which there should be no unbuffered changes */
+ATTRIBUTE_COLD void ibuf_delete_recs(const page_id_t page_id);
+
+/** When an index page is read from a disk to the buffer pool, this function
applies any buffered operations to the page and deletes the entries from the
insert buffer. If the page is not read, but created in the buffer pool, this
function deletes its buffered entries from the insert buffer; there can
exist entries for such a page if the page belonged to an index which
-subsequently was dropped. */
-UNIV_INTERN
+subsequently was dropped.
+@param[in,out] block if page has been read from disk,
+pointer to the page x-latched, else NULL
+@param[in] page_id page id of the index page
+@param[in] update_ibuf_bitmap normally this is set to TRUE, but
+if we have deleted or are deleting the tablespace, then we naturally do not
+want to update a non-existent bitmap page */
void
ibuf_merge_or_delete_for_page(
-/*==========================*/
- buf_block_t* block, /*!< in: if page has been read from
- disk, pointer to the page x-latched,
- else NULL */
- ulint space, /*!< in: space id of the index page */
- ulint page_no,/*!< in: page number of the index page */
- ulint zip_size,/*!< in: compressed page size in bytes,
- or 0 */
- ibool update_ibuf_bitmap);/*!< in: normally this is set
- to TRUE, but if we have deleted or are
- deleting the tablespace, then we
- naturally do not want to update a
- non-existent bitmap page */
+ buf_block_t* block,
+ const page_id_t page_id,
+ const page_size_t* page_size,
+ ibool update_ibuf_bitmap);
+
/*********************************************************************//**
Deletes all entries in the insert buffer for a given space id. This is used
in DISCARD TABLESPACE and IMPORT TABLESPACE.
NOTE: this does not update the page free bitmaps in the space. The space will
become CORRUPT when you call this function! */
-UNIV_INTERN
void
ibuf_delete_for_discarded_space(
/*============================*/
@@ -373,29 +369,21 @@ based on the current size of the change buffer.
@return a lower limit for the combined size in bytes of entries which
will be merged from ibuf trees to the pages read, 0 if ibuf is
empty */
-UNIV_INTERN
ulint
ibuf_merge_in_background(
- bool full); /*!< in: TRUE if the caller wants to
- do a full contract based on PCT_IO(100).
- If FALSE then the size of contract
- batch is determined based on the
- current size of the ibuf tree. */
+ bool full);
/** Contracts insert buffer trees by reading pages referring to space_id
to the buffer pool.
@returns number of pages merged.*/
-UNIV_INTERN
ulint
ibuf_merge_space(
/*=============*/
ulint space); /*!< in: space id */
-#endif /* !UNIV_HOTBACKUP */
/*********************************************************************//**
Parses a redo log record of an ibuf bitmap page init.
-@return end of log record or NULL */
-UNIV_INTERN
+@return end of log record or NULL */
byte*
ibuf_parse_bitmap_init(
/*===================*/
@@ -403,29 +391,15 @@ ibuf_parse_bitmap_init(
byte* end_ptr,/*!< in: buffer end */
buf_block_t* block, /*!< in: block or NULL */
mtr_t* mtr); /*!< in: mtr or NULL */
-#ifndef UNIV_HOTBACKUP
-#ifdef UNIV_IBUF_COUNT_DEBUG
-/******************************************************************//**
-Gets the ibuf count for a given page.
-@return number of entries in the insert buffer currently buffered for
-this page */
-UNIV_INTERN
-ulint
-ibuf_count_get(
-/*===========*/
- ulint space, /*!< in: space id */
- ulint page_no);/*!< in: page number */
-#endif
+
/******************************************************************//**
Looks if the insert buffer is empty.
-@return true if empty */
-UNIV_INTERN
+@return true if empty */
bool
ibuf_is_empty(void);
/*===============*/
/******************************************************************//**
Prints info of ibuf. */
-UNIV_INTERN
void
ibuf_print(
/*=======*/
@@ -433,15 +407,13 @@ ibuf_print(
/********************************************************************
Read the first two bytes from a record's fourth field (counter field in new
records; something else in older records).
-@return "counter" field, or ULINT_UNDEFINED if for some reason it can't be read */
-UNIV_INTERN
+@return "counter" field, or ULINT_UNDEFINED if for some reason it can't be read */
ulint
ibuf_rec_get_counter(
/*=================*/
const rec_t* rec); /*!< in: ibuf record */
/******************************************************************//**
Closes insert buffer and frees the data structures. */
-UNIV_INTERN
void
ibuf_close(void);
/*============*/
@@ -449,7 +421,6 @@ ibuf_close(void);
/******************************************************************//**
Checks the insert buffer bitmaps on IMPORT TABLESPACE.
@return DB_SUCCESS or error code */
-UNIV_INTERN
dberr_t
ibuf_check_bitmap_on_import(
/*========================*/
@@ -457,21 +428,25 @@ ibuf_check_bitmap_on_import(
ulint space_id) /*!< in: tablespace identifier */
MY_ATTRIBUTE((nonnull, warn_unused_result));
+/** Updates free bits and buffered bits for bulk loaded page.
+@param[in] block index page
+@param]in] reset flag if reset free val */
+void
+ibuf_set_bitmap_for_bulk_load(
+ buf_block_t* block,
+ bool reset);
+
#define IBUF_HEADER_PAGE_NO FSP_IBUF_HEADER_PAGE_NO
#define IBUF_TREE_ROOT_PAGE_NO FSP_IBUF_TREE_ROOT_PAGE_NO
-#endif /* !UNIV_HOTBACKUP */
-
/* The ibuf header page currently contains only the file segment header
for the file segment from which the pages for the ibuf tree are allocated */
#define IBUF_HEADER PAGE_DATA
#define IBUF_TREE_SEG_HEADER 0 /* fseg header for ibuf tree */
/* The insert buffer tree itself is always located in space 0. */
-#define IBUF_SPACE_ID 0
+#define IBUF_SPACE_ID static_cast<ulint>(0)
-#ifndef UNIV_NONINL
#include "ibuf0ibuf.ic"
-#endif
#endif
diff --git a/storage/innobase/include/ibuf0ibuf.ic b/storage/innobase/include/ibuf0ibuf.ic
index d26a8c60a04..1b19d5450b7 100644
--- a/storage/innobase/include/ibuf0ibuf.ic
+++ b/storage/innobase/include/ibuf0ibuf.ic
@@ -25,7 +25,7 @@ Created 7/19/1997 Heikki Tuuri
#include "page0page.h"
#include "page0zip.h"
-#ifndef UNIV_HOTBACKUP
+#include "fsp0types.h"
#include "buf0lru.h"
/** An index page must contain at least UNIV_PAGE_SIZE /
@@ -43,7 +43,7 @@ ibuf_mtr_start(
mtr_t* mtr) /*!< out: mini-transaction */
{
mtr_start(mtr);
- mtr->inside_ibuf = TRUE;
+ mtr->enter_ibuf();
}
/***************************************************************//**
Commits an insert buffer mini-transaction. */
@@ -53,8 +53,9 @@ ibuf_mtr_commit(
/*============*/
mtr_t* mtr) /*!< in/out: mini-transaction */
{
- ut_ad(mtr->inside_ibuf);
- ut_d(mtr->inside_ibuf = FALSE);
+ ut_ad(mtr->is_inside_ibuf());
+ ut_d(mtr->exit_ibuf());
+
mtr_commit(mtr);
}
@@ -93,7 +94,6 @@ Sets the free bit of the page in the ibuf bitmap. This is done in a separate
mini-transaction, hence this operation does not restrict further work to only
ibuf bitmap operations, which would result if the latch to the bitmap page
were kept. */
-UNIV_INTERN
void
ibuf_set_free_bits_func(
/*====================*/
@@ -127,6 +127,7 @@ ibuf_should_try(
return(ibuf_use != IBUF_USE_NONE
&& ibuf->max_size != 0
&& !dict_index_is_clust(index)
+ && !dict_index_is_spatial(index)
&& index->table->quiesce == QUIESCE_NONE
&& (ignore_sec_unique || !dict_index_is_unique(index))
&& srv_force_recovery < SRV_FORCE_NO_IBUF_MERGE);
@@ -145,54 +146,39 @@ ibuf_inside(
/*========*/
const mtr_t* mtr) /*!< in: mini-transaction */
{
- return(mtr->inside_ibuf);
+ return(mtr->is_inside_ibuf());
}
-/***********************************************************************//**
-Checks if a page address is an ibuf bitmap page address.
-@return TRUE if a bitmap page */
+/** Checks if a page address is an ibuf bitmap page (level 3 page) address.
+@param[in] page_id page id
+@param[in] page_size page size
+@return TRUE if a bitmap page */
UNIV_INLINE
ibool
ibuf_bitmap_page(
-/*=============*/
- ulint zip_size,/*!< in: compressed page size in bytes;
- 0 for uncompressed pages */
- ulint page_no)/*!< in: page number */
+ const page_id_t page_id,
+ const page_size_t& page_size)
{
- ut_ad(ut_is_2pow(zip_size));
-
- if (!zip_size) {
- return((page_no & (UNIV_PAGE_SIZE - 1))
- == FSP_IBUF_BITMAP_OFFSET);
- }
-
- return((page_no & (zip_size - 1)) == FSP_IBUF_BITMAP_OFFSET);
+ return((page_id.page_no() & (page_size.physical() - 1))
+ == FSP_IBUF_BITMAP_OFFSET);
}
-/*********************************************************************//**
-Translates the free space on a page to a value in the ibuf bitmap.
-@return value for ibuf bitmap bits */
+/** Translates the free space on a page to a value in the ibuf bitmap.
+@param[in] page_size page size in bytes
+@param[in] max_ins_size maximum insert size after reorganize for
+the page
+@return value for ibuf bitmap bits */
UNIV_INLINE
ulint
ibuf_index_page_calc_free_bits(
-/*===========================*/
- ulint zip_size, /*!< in: compressed page size in bytes;
- 0 for uncompressed pages */
- ulint max_ins_size) /*!< in: maximum insert size after reorganize
- for the page */
+ ulint page_size,
+ ulint max_ins_size)
{
ulint n;
- ut_ad(ut_is_2pow(zip_size));
- ut_ad(!zip_size || zip_size > IBUF_PAGE_SIZE_PER_FREE_SPACE);
- ut_ad(zip_size <= UNIV_ZIP_SIZE_MAX);
+ ut_ad(ut_is_2pow(page_size));
+ ut_ad(page_size > IBUF_PAGE_SIZE_PER_FREE_SPACE);
- if (zip_size) {
- n = max_ins_size
- / (zip_size / IBUF_PAGE_SIZE_PER_FREE_SPACE);
- } else {
- n = max_ins_size
- / (UNIV_PAGE_SIZE / IBUF_PAGE_SIZE_PER_FREE_SPACE);
- }
+ n = max_ins_size / (page_size / IBUF_PAGE_SIZE_PER_FREE_SPACE);
if (n == 3) {
n = 2;
@@ -205,54 +191,43 @@ ibuf_index_page_calc_free_bits(
return(n);
}
-/*********************************************************************//**
-Translates the ibuf free bits to the free space on a page in bytes.
-@return maximum insert size after reorganize for the page */
+/** Translates the ibuf free bits to the free space on a page in bytes.
+@param[in] page_size page_size
+@param[in] bits value for ibuf bitmap bits
+@return maximum insert size after reorganize for the page */
UNIV_INLINE
ulint
ibuf_index_page_calc_free_from_bits(
-/*================================*/
- ulint zip_size,/*!< in: compressed page size in bytes;
- 0 for uncompressed pages */
- ulint bits) /*!< in: value for ibuf bitmap bits */
+ const page_size_t& page_size,
+ ulint bits)
{
ut_ad(bits < 4);
- ut_ad(ut_is_2pow(zip_size));
- ut_ad(!zip_size || zip_size > IBUF_PAGE_SIZE_PER_FREE_SPACE);
- ut_ad(zip_size <= UNIV_ZIP_SIZE_MAX);
-
- if (zip_size) {
- if (bits == 3) {
- return(4 * zip_size / IBUF_PAGE_SIZE_PER_FREE_SPACE);
- }
-
- return(bits * zip_size / IBUF_PAGE_SIZE_PER_FREE_SPACE);
- }
+ ut_ad(!page_size.is_compressed()
+ || page_size.physical() > IBUF_PAGE_SIZE_PER_FREE_SPACE);
if (bits == 3) {
- return(4 * UNIV_PAGE_SIZE / IBUF_PAGE_SIZE_PER_FREE_SPACE);
+ return(4 * page_size.physical()
+ / IBUF_PAGE_SIZE_PER_FREE_SPACE);
}
- return(bits * (UNIV_PAGE_SIZE / IBUF_PAGE_SIZE_PER_FREE_SPACE));
+ return(bits * (page_size.physical()
+ / IBUF_PAGE_SIZE_PER_FREE_SPACE));
}
/*********************************************************************//**
Translates the free space on a compressed page to a value in the ibuf bitmap.
-@return value for ibuf bitmap bits */
+@return value for ibuf bitmap bits */
UNIV_INLINE
ulint
ibuf_index_page_calc_free_zip(
/*==========================*/
- ulint zip_size,
- /*!< in: compressed page size in bytes */
const buf_block_t* block) /*!< in: buffer block */
{
ulint max_ins_size;
const page_zip_des_t* page_zip;
lint zip_max_ins;
- ut_ad(zip_size == buf_block_get_zip_size(block));
- ut_ad(zip_size);
+ ut_ad(block->page.size.is_compressed());
/* Consider the maximum insert size on the uncompressed page
without reorganizing the page. We must not assume anything
@@ -275,31 +250,29 @@ ibuf_index_page_calc_free_zip(
max_ins_size = (ulint) zip_max_ins;
}
- return(ibuf_index_page_calc_free_bits(zip_size, max_ins_size));
+ return(ibuf_index_page_calc_free_bits(block->page.size.physical(),
+ max_ins_size));
}
/*********************************************************************//**
Translates the free space on a page to a value in the ibuf bitmap.
-@return value for ibuf bitmap bits */
+@return value for ibuf bitmap bits */
UNIV_INLINE
ulint
ibuf_index_page_calc_free(
/*======================*/
- ulint zip_size,/*!< in: compressed page size in bytes;
- 0 for uncompressed pages */
const buf_block_t* block) /*!< in: buffer block */
{
- ut_ad(zip_size == buf_block_get_zip_size(block));
-
- if (!zip_size) {
+ if (!block->page.size.is_compressed()) {
ulint max_ins_size;
max_ins_size = page_get_max_insert_size_after_reorganize(
buf_block_get_frame(block), 1);
- return(ibuf_index_page_calc_free_bits(0, max_ins_size));
+ return(ibuf_index_page_calc_free_bits(
+ block->page.size.physical(), max_ins_size));
} else {
- return(ibuf_index_page_calc_free_zip(zip_size, block));
+ return(ibuf_index_page_calc_free_zip(block));
}
}
@@ -335,21 +308,22 @@ ibuf_update_free_bits_if_full(
ulint before;
ulint after;
- ut_ad(!buf_block_get_page_zip(block));
+ ut_ad(buf_block_get_page_zip(block) == NULL);
- before = ibuf_index_page_calc_free_bits(0, max_ins_size);
+ before = ibuf_index_page_calc_free_bits(
+ block->page.size.physical(), max_ins_size);
if (max_ins_size >= increase) {
#if ULINT32_UNDEFINED <= UNIV_PAGE_SIZE_MAX
# error "ULINT32_UNDEFINED <= UNIV_PAGE_SIZE_MAX"
#endif
- after = ibuf_index_page_calc_free_bits(0, max_ins_size
- - increase);
+ after = ibuf_index_page_calc_free_bits(
+ block->page.size.physical(), max_ins_size - increase);
#ifdef UNIV_IBUF_DEBUG
- ut_a(after <= ibuf_index_page_calc_free(0, block));
+ ut_a(after <= ibuf_index_page_calc_free(block));
#endif
} else {
- after = ibuf_index_page_calc_free(0, block);
+ after = ibuf_index_page_calc_free(block);
}
if (after == 0) {
@@ -365,4 +339,3 @@ ibuf_update_free_bits_if_full(
ibuf_set_free_bits(block, after, before);
}
}
-#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innobase/include/lock0iter.h b/storage/innobase/include/lock0iter.h
index e63ed54bbd0..a7e613959ae 100644
--- a/storage/innobase/include/lock0iter.h
+++ b/storage/innobase/include/lock0iter.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 2007, 2009, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2007, 2014, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -26,7 +26,6 @@ Created July 16, 2007 Vasil Dimov
#ifndef lock0iter_h
#define lock0iter_h
-#include "univ.i"
#include "lock0types.h"
struct lock_queue_iterator_t {
@@ -46,7 +45,6 @@ record is stored. It can be undefined (ULINT_UNDEFINED) in two cases:
bit_no is calculated in this function by using
lock_rec_find_set_bit(). There is exactly one bit set in the bitmap
of a wait lock. */
-UNIV_INTERN
void
lock_queue_iterator_reset(
/*======================*/
@@ -59,8 +57,7 @@ lock_queue_iterator_reset(
Gets the previous lock in the lock queue, returns NULL if there are no
more locks (i.e. the current lock is the first one). The iterator is
receded (if not-NULL is returned).
-@return previous lock or NULL */
-
+@return previous lock or NULL */
const lock_t*
lock_queue_iterator_get_prev(
/*=========================*/
diff --git a/storage/innobase/include/lock0lock.h b/storage/innobase/include/lock0lock.h
index d1ad4c403d4..6b30bc5ae1b 100644
--- a/storage/innobase/include/lock0lock.h
+++ b/storage/innobase/include/lock0lock.h
@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2020, MariaDB Corporation.
+Copyright (c) 2017, 2020, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -27,55 +27,61 @@ Created 5/7/1996 Heikki Tuuri
#ifndef lock0lock_h
#define lock0lock_h
-#include "univ.i"
#include "buf0types.h"
#include "trx0types.h"
#include "mtr0types.h"
#include "rem0types.h"
-#include "dict0types.h"
#include "que0types.h"
#include "lock0types.h"
-#include "read0types.h"
#include "hash0hash.h"
#include "srv0srv.h"
#include "ut0vec.h"
-
-#ifdef UNIV_DEBUG
-extern ibool lock_print_waits;
-#endif /* UNIV_DEBUG */
+#include "gis0rtree.h"
+#include "lock0prdt.h"
/** Alternatives for innodb_lock_schedule_algorithm, which can be changed by
setting innodb_lock_schedule_algorithm. */
enum innodb_lock_schedule_algorithm_t {
- INNODB_LOCK_SCHEDULE_ALGORITHM_FCFS, /*!< First Come First Served */
- INNODB_LOCK_SCHEDULE_ALGORITHM_VATS /*!< Variance-Aware-Transaction-Scheduling */
+ /*!< First Come First Served */
+ INNODB_LOCK_SCHEDULE_ALGORITHM_FCFS,
+ /*!< Variance-Aware-Transaction-Scheduling */
+ INNODB_LOCK_SCHEDULE_ALGORITHM_VATS
};
extern ulong innodb_lock_schedule_algorithm;
+// Forward declaration
+class ReadView;
+
+/** The value of innodb_deadlock_detect */
+extern my_bool innobase_deadlock_detect;
+
/*********************************************************************//**
Gets the size of a lock struct.
-@return size in bytes */
-UNIV_INTERN
+@return size in bytes */
ulint
lock_get_size(void);
/*===============*/
/*********************************************************************//**
Creates the lock system at database start. */
-UNIV_INTERN
void
lock_sys_create(
/*============*/
ulint n_cells); /*!< in: number of slots in lock hash table */
+/** Resize the lock hash table.
+@param[in] n_cells number of slots in lock hash table */
+void
+lock_sys_resize(
+ ulint n_cells);
+
/*********************************************************************//**
Closes the lock system at database shutdown. */
-UNIV_INTERN
void
lock_sys_close(void);
/*================*/
/*********************************************************************//**
Gets the heap_no of the smallest user record on a page.
-@return heap_no of smallest user record, or PAGE_HEAP_NO_SUPREMUM */
+@return heap_no of smallest user record, or PAGE_HEAP_NO_SUPREMUM */
UNIV_INLINE
ulint
lock_get_min_heap_no(
@@ -86,7 +92,6 @@ Updates the lock table when we have reorganized a page. NOTE: we copy
also the locks set on the infimum of the page; the infimum may carry
locks if an update of a record is occurring on the page, and its locks
were temporarily stored on the infimum. */
-UNIV_INTERN
void
lock_move_reorganize_page(
/*======================*/
@@ -97,7 +102,6 @@ lock_move_reorganize_page(
/*************************************************************//**
Moves the explicit locks on user records to another page if a record
list end is moved to another page. */
-UNIV_INTERN
void
lock_move_rec_list_end(
/*===================*/
@@ -108,7 +112,6 @@ lock_move_rec_list_end(
/*************************************************************//**
Moves the explicit locks on user records to another page if a record
list start is moved to another page. */
-UNIV_INTERN
void
lock_move_rec_list_start(
/*=====================*/
@@ -124,7 +127,6 @@ lock_move_rec_list_start(
were copied */
/*************************************************************//**
Updates the lock table when a page is split to the right. */
-UNIV_INTERN
void
lock_update_split_right(
/*====================*/
@@ -132,7 +134,6 @@ lock_update_split_right(
const buf_block_t* left_block); /*!< in: left page */
/*************************************************************//**
Updates the lock table when a page is merged to the right. */
-UNIV_INTERN
void
lock_update_merge_right(
/*====================*/
@@ -152,7 +153,6 @@ root page, even though they do not make sense on other than leaf
pages: the reason is that in a pessimistic update the infimum record
of the root page will act as a dummy carrier of the locks of the record
to be updated. */
-UNIV_INTERN
void
lock_update_root_raise(
/*===================*/
@@ -161,7 +161,6 @@ lock_update_root_raise(
/*************************************************************//**
Updates the lock table when a page is copied to another and the original page
is removed from the chain of leaf pages, except if page is the root! */
-UNIV_INTERN
void
lock_update_copy_and_discard(
/*=========================*/
@@ -171,7 +170,6 @@ lock_update_copy_and_discard(
NOT the root! */
/*************************************************************//**
Updates the lock table when a page is split to the left. */
-UNIV_INTERN
void
lock_update_split_left(
/*===================*/
@@ -179,7 +177,6 @@ lock_update_split_left(
const buf_block_t* left_block); /*!< in: left page */
/*************************************************************//**
Updates the lock table when a page is merged to the left. */
-UNIV_INTERN
void
lock_update_merge_left(
/*===================*/
@@ -191,7 +188,7 @@ lock_update_merge_left(
const buf_block_t* right_block); /*!< in: merged index page
which will be discarded */
/*************************************************************//**
-Updates the lock table when a page is splited and merged to
+Updates the lock table when a page is split and merged to
two pages. */
UNIV_INTERN
void
@@ -203,7 +200,6 @@ lock_update_split_and_merge(
/*************************************************************//**
Resets the original locks on heir and replaces them with gap type locks
inherited from rec. */
-UNIV_INTERN
void
lock_rec_reset_and_inherit_gap_locks(
/*=================================*/
@@ -219,7 +215,6 @@ lock_rec_reset_and_inherit_gap_locks(
donating record */
/*************************************************************//**
Updates the lock table when a page is discarded. */
-UNIV_INTERN
void
lock_update_discard(
/*================*/
@@ -231,7 +226,6 @@ lock_update_discard(
which will be discarded */
/*************************************************************//**
Updates the lock table when a new user record is inserted. */
-UNIV_INTERN
void
lock_update_insert(
/*===============*/
@@ -239,7 +233,6 @@ lock_update_insert(
const rec_t* rec); /*!< in: the inserted record */
/*************************************************************//**
Updates the lock table when a record is removed. */
-UNIV_INTERN
void
lock_update_delete(
/*===============*/
@@ -252,7 +245,6 @@ updated and the size of the record changes in the update. The record
is in such an update moved, perhaps to another page. The infimum record
acts as a dummy carrier record, taking care of lock releases while the
actual record is being moved. */
-UNIV_INTERN
void
lock_rec_store_on_page_infimum(
/*===========================*/
@@ -265,7 +257,6 @@ lock_rec_store_on_page_infimum(
/*********************************************************************//**
Restores the state of explicit lock requests on a single record, where the
state was stored on the infimum of the page. */
-UNIV_INTERN
void
lock_rec_restore_from_page_infimum(
/*===============================*/
@@ -279,8 +270,7 @@ lock_rec_restore_from_page_infimum(
the infimum */
/*********************************************************************//**
Determines if there are explicit record locks on a page.
-@return an explicit record lock on the page, or NULL if there are none */
-UNIV_INTERN
+@return an explicit record lock on the page, or NULL if there are none */
lock_t*
lock_rec_expl_exist_on_page(
/*========================*/
@@ -293,8 +283,7 @@ a record. If they do, first tests if the query thread should anyway
be suspended for some reason; if not, then puts the transaction and
the query thread to the lock wait state and inserts a waiting request
for a gap x-lock to the lock queue.
-@return DB_SUCCESS, DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
-UNIV_INTERN
+@return DB_SUCCESS, DB_LOCK_WAIT, or DB_DEADLOCK */
dberr_t
lock_rec_insert_check_and_lock(
/*===========================*/
@@ -309,7 +298,8 @@ lock_rec_insert_check_and_lock(
inserted record maybe should inherit
LOCK_GAP type locks from the successor
record */
- MY_ATTRIBUTE((nonnull(2,3,4,6,7), warn_unused_result));
+ MY_ATTRIBUTE((warn_unused_result));
+
/*********************************************************************//**
Checks if locks of other transactions prevent an immediate modify (update,
delete mark, or delete unmark) of a clustered index record. If they do,
@@ -317,8 +307,7 @@ first tests if the query thread should anyway be suspended for some
reason; if not, then puts the transaction and the query thread to the
lock wait state and inserts a waiting request for a record x-lock to the
lock queue.
-@return DB_SUCCESS, DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
-UNIV_INTERN
+@return DB_SUCCESS, DB_LOCK_WAIT, or DB_DEADLOCK */
dberr_t
lock_clust_rec_modify_check_and_lock(
/*=================================*/
@@ -328,14 +317,13 @@ lock_clust_rec_modify_check_and_lock(
const rec_t* rec, /*!< in: record which should be
modified */
dict_index_t* index, /*!< in: clustered index */
- const ulint* offsets,/*!< in: rec_get_offsets(rec, index) */
+ const offset_t* offsets,/*!< in: rec_get_offsets(rec, index) */
que_thr_t* thr) /*!< in: query thread */
- MY_ATTRIBUTE((warn_unused_result, nonnull));
+ MY_ATTRIBUTE((warn_unused_result));
/*********************************************************************//**
Checks if locks of other transactions prevent an immediate modify
(delete mark or delete unmark) of a secondary index record.
-@return DB_SUCCESS, DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
-UNIV_INTERN
+@return DB_SUCCESS, DB_LOCK_WAIT, or DB_DEADLOCK */
dberr_t
lock_sec_rec_modify_check_and_lock(
/*===============================*/
@@ -351,13 +339,11 @@ lock_sec_rec_modify_check_and_lock(
que_thr_t* thr, /*!< in: query thread
(can be NULL if BTR_NO_LOCKING_FLAG) */
mtr_t* mtr) /*!< in/out: mini-transaction */
- MY_ATTRIBUTE((warn_unused_result, nonnull(2,3,4,6)));
+ MY_ATTRIBUTE((warn_unused_result));
/*********************************************************************//**
Like lock_clust_rec_read_check_and_lock(), but reads a
secondary index record.
-@return DB_SUCCESS, DB_SUCCESS_LOCKED_REC, DB_LOCK_WAIT, DB_DEADLOCK,
-or DB_QUE_THR_SUSPENDED */
-UNIV_INTERN
+@return DB_SUCCESS, DB_SUCCESS_LOCKED_REC, DB_LOCK_WAIT, or DB_DEADLOCK */
dberr_t
lock_sec_rec_read_check_and_lock(
/*=============================*/
@@ -369,8 +355,8 @@ lock_sec_rec_read_check_and_lock(
be read or passed over by a
read cursor */
dict_index_t* index, /*!< in: secondary index */
- const ulint* offsets,/*!< in: rec_get_offsets(rec, index) */
- enum lock_mode mode, /*!< in: mode of the lock which
+ const offset_t* offsets,/*!< in: rec_get_offsets(rec, index) */
+ lock_mode mode, /*!< in: mode of the lock which
the read cursor should set on
records: LOCK_S or LOCK_X; the
latter is possible in
@@ -385,9 +371,7 @@ if the query thread should anyway be suspended for some reason; if not, then
puts the transaction and the query thread to the lock wait state and inserts a
waiting request for a record lock to the lock queue. Sets the requested mode
lock on the record.
-@return DB_SUCCESS, DB_SUCCESS_LOCKED_REC, DB_LOCK_WAIT, DB_DEADLOCK,
-or DB_QUE_THR_SUSPENDED */
-UNIV_INTERN
+@return DB_SUCCESS, DB_SUCCESS_LOCKED_REC, DB_LOCK_WAIT, or DB_DEADLOCK */
dberr_t
lock_clust_rec_read_check_and_lock(
/*===============================*/
@@ -399,8 +383,8 @@ lock_clust_rec_read_check_and_lock(
be read or passed over by a
read cursor */
dict_index_t* index, /*!< in: clustered index */
- const ulint* offsets,/*!< in: rec_get_offsets(rec, index) */
- enum lock_mode mode, /*!< in: mode of the lock which
+ const offset_t* offsets,/*!< in: rec_get_offsets(rec, index) */
+ lock_mode mode, /*!< in: mode of the lock which
the read cursor should set on
records: LOCK_S or LOCK_X; the
latter is possible in
@@ -417,8 +401,7 @@ waiting request for a record lock to the lock queue. Sets the requested mode
lock on the record. This is an alternative version of
lock_clust_rec_read_check_and_lock() that does not require the parameter
"offsets".
-@return DB_SUCCESS, DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
-UNIV_INTERN
+@return DB_SUCCESS, DB_LOCK_WAIT, or DB_DEADLOCK */
dberr_t
lock_clust_rec_read_check_and_lock_alt(
/*===================================*/
@@ -430,7 +413,7 @@ lock_clust_rec_read_check_and_lock_alt(
be read or passed over by a
read cursor */
dict_index_t* index, /*!< in: clustered index */
- enum lock_mode mode, /*!< in: mode of the lock which
+ lock_mode mode, /*!< in: mode of the lock which
the read cursor should set on
records: LOCK_S or LOCK_X; the
latter is possible in
@@ -438,20 +421,19 @@ lock_clust_rec_read_check_and_lock_alt(
ulint gap_mode,/*!< in: LOCK_ORDINARY, LOCK_GAP, or
LOCK_REC_NOT_GAP */
que_thr_t* thr) /*!< in: query thread */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
+ MY_ATTRIBUTE((warn_unused_result));
/*********************************************************************//**
Checks that a record is seen in a consistent read.
@return true if sees, or false if an earlier version of the record
should be retrieved */
-UNIV_INTERN
bool
lock_clust_rec_cons_read_sees(
/*==========================*/
const rec_t* rec, /*!< in: user record which should be read or
passed over by a read cursor */
dict_index_t* index, /*!< in: clustered index */
- const ulint* offsets,/*!< in: rec_get_offsets(rec, index) */
- read_view_t* view); /*!< in: consistent read view */
+ const offset_t* offsets,/*!< in: rec_get_offsets(rec, index) */
+ ReadView* view); /*!< in: consistent read view */
/*********************************************************************//**
Checks that a non-clustered index record is seen in a consistent read.
@@ -462,20 +444,19 @@ record.
@return true if certainly sees, or false if an earlier version of the
clustered index record might be needed */
-UNIV_INTERN
bool
lock_sec_rec_cons_read_sees(
/*========================*/
const rec_t* rec, /*!< in: user record which
should be read or passed over
by a read cursor */
- const read_view_t* view) /*!< in: consistent read view */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
+ const dict_index_t* index, /*!< in: index */
+ const ReadView* view) /*!< in: consistent read view */
+ MY_ATTRIBUTE((warn_unused_result));
/*********************************************************************//**
Locks the specified database table in the mode given. If the lock cannot
be granted immediately, the query thread is put to wait.
-@return DB_SUCCESS, DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
-UNIV_INTERN
+@return DB_SUCCESS, DB_LOCK_WAIT, or DB_DEADLOCK */
dberr_t
lock_table(
/*=======*/
@@ -483,22 +464,33 @@ lock_table(
does nothing */
dict_table_t* table, /*!< in/out: database table
in dictionary cache */
- enum lock_mode mode, /*!< in: lock mode */
+ lock_mode mode, /*!< in: lock mode */
que_thr_t* thr) /*!< in: query thread */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
+ MY_ATTRIBUTE((warn_unused_result));
/*********************************************************************//**
Creates a table IX lock object for a resurrected transaction. */
-UNIV_INTERN
void
lock_table_ix_resurrect(
/*====================*/
dict_table_t* table, /*!< in/out: table */
trx_t* trx); /*!< in/out: transaction */
+
+/** Sets a lock on a table based on the given mode.
+@param[in] table table to lock
+@param[in,out] trx transaction
+@param[in] mode LOCK_X or LOCK_S
+@return error code or DB_SUCCESS. */
+dberr_t
+lock_table_for_trx(
+ dict_table_t* table,
+ trx_t* trx,
+ enum lock_mode mode)
+ MY_ATTRIBUTE((nonnull, warn_unused_result));
+
/*************************************************************//**
Removes a granted record lock of a transaction from the queue and grants
locks to other transactions waiting in the queue if they now are entitled
to a lock. */
-UNIV_INTERN
void
lock_rec_unlock(
/*============*/
@@ -506,34 +498,29 @@ lock_rec_unlock(
set a record lock */
const buf_block_t* block, /*!< in: buffer block containing rec */
const rec_t* rec, /*!< in: record */
- enum lock_mode lock_mode);/*!< in: LOCK_S or LOCK_X */
-/*********************************************************************//**
-Releases a transaction's locks, and releases possible other transactions
-waiting because of these locks. Change the state of the transaction to
-TRX_STATE_COMMITTED_IN_MEMORY. */
-UNIV_INTERN
-void
-lock_trx_release_locks(
-/*===================*/
- trx_t* trx); /*!< in/out: transaction */
+ lock_mode lock_mode);/*!< in: LOCK_S or LOCK_X */
+
+/** Release the explicit locks of a committing transaction,
+and release possible other transactions waiting because of these locks. */
+void lock_trx_release_locks(trx_t* trx);
+
/*********************************************************************//**
-Removes locks on a table to be dropped or truncated.
+Removes locks on a table to be dropped or discarded.
If remove_also_table_sx_locks is TRUE then table-level S and X locks are
also removed in addition to other table-level and record-level locks.
No lock, that is going to be removed, is allowed to be a wait lock. */
-UNIV_INTERN
void
lock_remove_all_on_table(
/*=====================*/
dict_table_t* table, /*!< in: table to be dropped
- or truncated */
+ or discarded */
ibool remove_also_table_sx_locks);/*!< in: also removes
table S and X locks */
/*********************************************************************//**
Calculates the fold value of a page file address: used in inserting or
searching for a lock in the hash table.
-@return folded value */
+@return folded value */
UNIV_INLINE
ulint
lock_rec_fold(
@@ -544,20 +531,27 @@ lock_rec_fold(
/*********************************************************************//**
Calculates the hash value of a page file address: used in inserting or
searching for a lock in the hash table.
-@return hashed value */
+@return hashed value */
UNIV_INLINE
-ulint
+unsigned
lock_rec_hash(
/*==========*/
ulint space, /*!< in: space */
ulint page_no);/*!< in: page number */
+/*************************************************************//**
+Get the lock hash table */
+UNIV_INLINE
+hash_table_t*
+lock_hash_get(
+/*==========*/
+ ulint mode); /*!< in: lock mode */
+
/**********************************************************************//**
Looks for a set bit in a record lock bitmap. Returns ULINT_UNDEFINED,
if none found.
@return bit index == heap number of the record, or ULINT_UNDEFINED if
none found */
-UNIV_INTERN
ulint
lock_rec_find_set_bit(
/*==================*/
@@ -565,36 +559,8 @@ lock_rec_find_set_bit(
bit set */
/*********************************************************************//**
-Gets the source table of an ALTER TABLE transaction. The table must be
-covered by an IX or IS table lock.
-@return the source table of transaction, if it is covered by an IX or
-IS table lock; dest if there is no source table, and NULL if the
-transaction is locking more than two tables or an inconsistency is
-found */
-UNIV_INTERN
-dict_table_t*
-lock_get_src_table(
-/*===============*/
- trx_t* trx, /*!< in: transaction */
- dict_table_t* dest, /*!< in: destination of ALTER TABLE */
- enum lock_mode* mode); /*!< out: lock mode of the source table */
-/*********************************************************************//**
-Determine if the given table is exclusively "owned" by the given
-transaction, i.e., transaction holds LOCK_IX and possibly LOCK_AUTO_INC
-on the table.
-@return TRUE if table is only locked by trx, with LOCK_IX, and
-possibly LOCK_AUTO_INC */
-UNIV_INTERN
-ibool
-lock_is_table_exclusive(
-/*====================*/
- const dict_table_t* table, /*!< in: table */
- const trx_t* trx) /*!< in: transaction */
- MY_ATTRIBUTE((nonnull));
-/*********************************************************************//**
Checks if a lock request lock1 has to wait for request lock2.
-@return TRUE if lock1 has to wait for lock2 to be removed */
-UNIV_INTERN
+@return TRUE if lock1 has to wait for lock2 to be removed */
ibool
lock_has_to_wait(
/*=============*/
@@ -605,32 +571,36 @@ lock_has_to_wait(
locks are record locks */
/*********************************************************************//**
Reports that a transaction id is insensible, i.e., in the future. */
-UNIV_INTERN
void
lock_report_trx_id_insanity(
/*========================*/
trx_id_t trx_id, /*!< in: trx id */
const rec_t* rec, /*!< in: user record */
dict_index_t* index, /*!< in: index */
- const ulint* offsets, /*!< in: rec_get_offsets(rec, index) */
- trx_id_t max_trx_id) /*!< in: trx_sys_get_max_trx_id() */
- MY_ATTRIBUTE((nonnull));
+ const offset_t* offsets, /*!< in: rec_get_offsets(rec, index) */
+ trx_id_t max_trx_id); /*!< in: trx_sys_get_max_trx_id() */
/*********************************************************************//**
Prints info of locks for all transactions.
@return FALSE if not able to obtain lock mutex and exits without
printing info */
-UNIV_INTERN
ibool
lock_print_info_summary(
/*====================*/
FILE* file, /*!< in: file where to print */
ibool nowait) /*!< in: whether to wait for the lock mutex */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
+ MY_ATTRIBUTE((warn_unused_result));
+
+/** Prints transaction lock wait and MVCC state.
+@param[in,out] file file where to print
+@param[in] trx transaction
+@param[in] now current time */
+void
+lock_trx_print_wait_and_mvcc_state(FILE* file, const trx_t* trx, time_t now);
+
/*********************************************************************//**
Prints info of locks for each transaction. This function assumes that the
caller holds the lock mutex and more importantly it will release the lock
mutex on behalf of the caller. (This should be fixed in the future). */
-UNIV_INTERN
void
lock_print_info_all_transactions(
/*=============================*/
@@ -640,18 +610,25 @@ Return approximate number or record locks (bits set in the bitmap) for
this transaction. Since delete-marked records may be removed, the
record count will not be precise.
The caller must be holding lock_sys->mutex. */
-UNIV_INTERN
ulint
lock_number_of_rows_locked(
/*=======================*/
const trx_lock_t* trx_lock) /*!< in: transaction locks */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
+ MY_ATTRIBUTE((warn_unused_result));
+
+/*********************************************************************//**
+Return the number of table locks for a transaction.
+The caller must be holding lock_sys->mutex. */
+ulint
+lock_number_of_tables_locked(
+/*=========================*/
+ const trx_lock_t* trx_lock) /*!< in: transaction locks */
+ MY_ATTRIBUTE((warn_unused_result));
/*******************************************************************//**
Gets the type of a lock. Non-inline version for using outside of the
lock module.
-@return LOCK_TABLE or LOCK_REC */
-UNIV_INTERN
+@return LOCK_TABLE or LOCK_REC */
ulint
lock_get_type(
/*==========*/
@@ -669,8 +646,7 @@ lock_get_trx(
/*******************************************************************//**
Gets the id of the transaction owning a lock.
-@return transaction id */
-UNIV_INTERN
+@return transaction id */
trx_id_t
lock_get_trx_id(
/*============*/
@@ -679,8 +655,7 @@ lock_get_trx_id(
/*******************************************************************//**
Gets the mode of a lock in a human readable string.
The string should not be free()'d or modified.
-@return lock mode */
-UNIV_INTERN
+@return lock mode */
const char*
lock_get_mode_str(
/*==============*/
@@ -689,8 +664,7 @@ lock_get_mode_str(
/*******************************************************************//**
Gets the type of a lock in a human readable string.
The string should not be free()'d or modified.
-@return lock type */
-UNIV_INTERN
+@return lock type */
const char*
lock_get_type_str(
/*==============*/
@@ -698,27 +672,22 @@ lock_get_type_str(
/*******************************************************************//**
Gets the id of the table on which the lock is.
-@return id of the table */
-UNIV_INTERN
+@return id of the table */
table_id_t
lock_get_table_id(
/*==============*/
const lock_t* lock); /*!< in: lock */
-/*******************************************************************//**
-Gets the name of the table on which the lock is.
-The string should not be free()'d or modified.
-@return name of the table */
-UNIV_INTERN
-const char*
+/** Determine which table a lock is associated with.
+@param[in] lock the lock
+@return name of the table */
+const table_name_t&
lock_get_table_name(
-/*================*/
- const lock_t* lock); /*!< in: lock */
+ const lock_t* lock);
/*******************************************************************//**
For a record lock, gets the index on which the lock is.
-@return index */
-UNIV_INTERN
+@return index */
const dict_index_t*
lock_rec_get_index(
/*===============*/
@@ -727,8 +696,7 @@ lock_rec_get_index(
/*******************************************************************//**
For a record lock, gets the name of the index on which the lock is.
The string should not be free()'d or modified.
-@return name of the index */
-UNIV_INTERN
+@return name of the index */
const char*
lock_rec_get_index_name(
/*====================*/
@@ -736,8 +704,7 @@ lock_rec_get_index_name(
/*******************************************************************//**
For a record lock, gets the tablespace number on which the lock is.
-@return tablespace number */
-UNIV_INTERN
+@return tablespace number */
ulint
lock_rec_get_space_id(
/*==================*/
@@ -745,17 +712,15 @@ lock_rec_get_space_id(
/*******************************************************************//**
For a record lock, gets the page number on which the lock is.
-@return page number */
-UNIV_INTERN
+@return page number */
ulint
lock_rec_get_page_no(
/*=================*/
const lock_t* lock); /*!< in: lock */
/*******************************************************************//**
Check if there are any locks (table or rec) against table.
-@return TRUE if locks exist */
-UNIV_INTERN
-ibool
+@return TRUE if locks exist */
+bool
lock_table_has_locks(
/*=================*/
const dict_table_t* table); /*!< in: check if there are any locks
@@ -764,8 +729,8 @@ lock_table_has_locks(
/*********************************************************************//**
A thread which wakes up threads whose lock wait may have lasted too long.
-@return a dummy parameter */
-extern "C" UNIV_INTERN
+@return a dummy parameter */
+extern "C"
os_thread_ret_t
DECLARE_THREAD(lock_wait_timeout_thread)(
/*=====================================*/
@@ -775,7 +740,6 @@ DECLARE_THREAD(lock_wait_timeout_thread)(
/********************************************************************//**
Releases a user OS thread waiting for a lock to be released, if the
thread is already suspended. */
-UNIV_INTERN
void
lock_wait_release_thread_if_suspended(
/*==================================*/
@@ -788,7 +752,6 @@ occurs during the wait trx->error_state associated with thr is
!= DB_SUCCESS when we return. DB_LOCK_WAIT_TIMEOUT and DB_DEADLOCK
are possible errors. DB_DEADLOCK is returned if selective deadlock
resolution chose this transaction as a victim. */
-UNIV_INTERN
void
lock_wait_suspend_thread(
/*=====================*/
@@ -798,7 +761,6 @@ lock_wait_suspend_thread(
Unlocks AUTO_INC type locks that were possibly reserved by a trx. This
function should be called at the the end of an SQL statement, by the
connection thread that owns the transaction (trx->mysql_thd). */
-UNIV_INTERN
void
lock_unlock_table_autoinc(
/*======================*/
@@ -808,39 +770,45 @@ Check whether the transaction has already been rolled back because it
was selected as a deadlock victim, or if it has to wait then cancel
the wait lock.
@return DB_DEADLOCK, DB_LOCK_WAIT or DB_SUCCESS */
-UNIV_INTERN
dberr_t
lock_trx_handle_wait(
/*=================*/
- trx_t* trx) /*!< in/out: trx lock state */
- MY_ATTRIBUTE((nonnull));
+ trx_t* trx); /*!< in/out: trx lock state */
/*********************************************************************//**
Get the number of locks on a table.
@return number of locks */
-UNIV_INTERN
ulint
lock_table_get_n_locks(
/*===================*/
- const dict_table_t* table) /*!< in: table */
- MY_ATTRIBUTE((nonnull));
+ const dict_table_t* table); /*!< in: table */
+/*******************************************************************//**
+Initialise the trx lock list. */
+void
+lock_trx_lock_list_init(
+/*====================*/
+ trx_lock_list_t* lock_list); /*!< List to initialise */
+
+/*******************************************************************//**
+Set the lock system timeout event. */
+void
+lock_set_timeout_event();
+/*====================*/
#ifdef UNIV_DEBUG
/*********************************************************************//**
Checks that a transaction id is sensible, i.e., not in the future.
-@return true if ok */
-UNIV_INTERN
+@return true if ok */
bool
lock_check_trx_id_sanity(
/*=====================*/
trx_id_t trx_id, /*!< in: trx id */
const rec_t* rec, /*!< in: user record */
dict_index_t* index, /*!< in: index */
- const ulint* offsets) /*!< in: rec_get_offsets(rec, index) */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
+ const offset_t* offsets) /*!< in: rec_get_offsets(rec, index) */
+ MY_ATTRIBUTE((warn_unused_result));
/*******************************************************************//**
Check if the transaction holds any locks on the sys tables
or its records.
-@return the strongest lock found on any sys table or 0 for none */
-UNIV_INTERN
+@return the strongest lock found on any sys table or 0 for none */
const lock_t*
lock_trx_has_sys_table_locks(
/*=========================*/
@@ -849,8 +817,7 @@ lock_trx_has_sys_table_locks(
/*******************************************************************//**
Check if the transaction holds an exclusive lock on a record.
-@return whether the locks are held */
-UNIV_INTERN
+@return whether the locks are held */
bool
lock_trx_has_rec_x_lock(
/*====================*/
@@ -858,76 +825,34 @@ lock_trx_has_rec_x_lock(
const dict_table_t* table, /*!< in: table to check */
const buf_block_t* block, /*!< in: buffer block of the record */
ulint heap_no)/*!< in: record heap number */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
+ MY_ATTRIBUTE((warn_unused_result));
#endif /* UNIV_DEBUG */
-/** Lock modes and types */
-/* @{ */
-#define LOCK_MODE_MASK 0xFUL /*!< mask used to extract mode from the
- type_mode field in a lock */
-/** Lock types */
-/* @{ */
-#define LOCK_TABLE 16 /*!< table lock */
-#define LOCK_REC 32 /*!< record lock */
-#define LOCK_TYPE_MASK 0xF0UL /*!< mask used to extract lock type from the
- type_mode field in a lock */
-#if LOCK_MODE_MASK & LOCK_TYPE_MASK
-# error "LOCK_MODE_MASK & LOCK_TYPE_MASK"
-#endif
-
-#define LOCK_WAIT 256 /*!< Waiting lock flag; when set, it
- means that the lock has not yet been
- granted, it is just waiting for its
- turn in the wait queue */
-/* Precise modes */
-#define LOCK_ORDINARY 0 /*!< this flag denotes an ordinary
- next-key lock in contrast to LOCK_GAP
- or LOCK_REC_NOT_GAP */
-#define LOCK_GAP 512 /*!< when this bit is set, it means that the
- lock holds only on the gap before the record;
- for instance, an x-lock on the gap does not
- give permission to modify the record on which
- the bit is set; locks of this type are created
- when records are removed from the index chain
- of records */
-#define LOCK_REC_NOT_GAP 1024 /*!< this bit means that the lock is only on
- the index record and does NOT block inserts
- to the gap before the index record; this is
- used in the case when we retrieve a record
- with a unique key, and is also used in
- locking plain SELECTs (not part of UPDATE
- or DELETE) when the user has set the READ
- COMMITTED isolation level */
-#define LOCK_INSERT_INTENTION 2048 /*!< this bit is set when we place a waiting
- gap type record lock request in order to let
- an insert of an index record to wait until
- there are no conflicting locks by other
- transactions on the gap; note that this flag
- remains set when the waiting lock is granted,
- or if the lock is inherited to a neighboring
- record */
-
-#if (LOCK_WAIT|LOCK_GAP|LOCK_REC_NOT_GAP|LOCK_INSERT_INTENTION)&LOCK_MODE_MASK
-# error
-#endif
-#if (LOCK_WAIT|LOCK_GAP|LOCK_REC_NOT_GAP|LOCK_INSERT_INTENTION)&LOCK_TYPE_MASK
-# error
-#endif
-/* @} */
-
/** Lock operation struct */
struct lock_op_t{
dict_table_t* table; /*!< table to be locked */
- enum lock_mode mode; /*!< lock mode */
+ lock_mode mode; /*!< lock mode */
};
+typedef ib_mutex_t LockMutex;
+
/** The lock system struct */
struct lock_sys_t{
- ib_mutex_t mutex; /*!< Mutex protecting the
+ char pad1[CACHE_LINE_SIZE]; /*!< padding to prevent other
+ memory update hotspots from
+ residing on the same memory
+ cache line */
+ LockMutex mutex; /*!< Mutex protecting the
locks */
hash_table_t* rec_hash; /*!< hash table of the record
locks */
- ib_mutex_t wait_mutex; /*!< Mutex protecting the
+ hash_table_t* prdt_hash; /*!< hash table of the predicate
+ lock */
+ hash_table_t* prdt_page_hash; /*!< hash table of the page
+ lock */
+
+ char pad2[CACHE_LINE_SIZE]; /*!< Padding */
+ LockMutex wait_mutex; /*!< Mutex protecting the
next two fields */
srv_slot_t* waiting_threads; /*!< Array of user threads
suspended while waiting for
@@ -960,14 +885,125 @@ struct lock_sys_t{
is running */
};
+/*********************************************************************//**
+Creates a new record lock and inserts it to the lock queue. Does NOT check
+for deadlocks or lock compatibility!
+@return created lock */
+UNIV_INLINE
+lock_t*
+lock_rec_create(
+/*============*/
+#ifdef WITH_WSREP
+ lock_t* c_lock, /*!< conflicting lock */
+ que_thr_t* thr, /*!< thread owning trx */
+#endif
+ ulint type_mode,/*!< in: lock mode and wait
+ flag, type is ignored and
+ replaced by LOCK_REC */
+ const buf_block_t* block, /*!< in: buffer block containing
+ the record */
+ ulint heap_no,/*!< in: heap number of the record */
+ dict_index_t* index, /*!< in: index of record */
+ trx_t* trx, /*!< in,out: transaction */
+ bool caller_owns_trx_mutex);
+ /*!< in: true if caller owns
+ trx mutex */
+
+/*************************************************************//**
+Removes a record lock request, waiting or granted, from the queue. */
+void
+lock_rec_discard(
+/*=============*/
+ lock_t* in_lock); /*!< in: record lock object: all
+ record locks which are contained
+ in this lock object are removed */
+
+/** Create a new record lock and inserts it to the lock queue,
+without checking for deadlocks or conflicts.
+@param[in] type_mode lock mode and wait flag; type will be replaced
+ with LOCK_REC
+@param[in] space tablespace id
+@param[in] page_no index page number
+@param[in] page R-tree index page, or NULL
+@param[in] heap_no record heap number in the index page
+@param[in] index the index tree
+@param[in,out] trx transaction
+@param[in] holds_trx_mutex whether the caller holds trx->mutex
+@return created lock */
+lock_t*
+lock_rec_create_low(
+#ifdef WITH_WSREP
+ lock_t* c_lock, /*!< conflicting lock */
+ que_thr_t* thr, /*!< thread owning trx */
+#endif
+ ulint type_mode,
+ ulint space,
+ ulint page_no,
+ const page_t* page,
+ ulint heap_no,
+ dict_index_t* index,
+ trx_t* trx,
+ bool holds_trx_mutex);
+/** Enqueue a waiting request for a lock which cannot be granted immediately.
+Check for deadlocks.
+@param[in] type_mode the requested lock mode (LOCK_S or LOCK_X)
+ possibly ORed with LOCK_GAP or
+ LOCK_REC_NOT_GAP, ORed with
+ LOCK_INSERT_INTENTION if this
+ waiting lock request is set
+ when performing an insert of
+ an index record
+@param[in] block leaf page in the index
+@param[in] heap_no record heap number in the block
+@param[in] index index tree
+@param[in,out] thr query thread
+@param[in] prdt minimum bounding box (spatial index)
+@retval DB_LOCK_WAIT if the waiting lock was enqueued
+@retval DB_DEADLOCK if this transaction was chosen as the victim
+@retval DB_SUCCESS_LOCKED_REC if the other transaction was chosen as a victim
+ (or it happened to commit) */
+dberr_t
+lock_rec_enqueue_waiting(
+#ifdef WITH_WSREP
+ lock_t* c_lock, /*!< conflicting lock */
+#endif
+ ulint type_mode,
+ const buf_block_t* block,
+ ulint heap_no,
+ dict_index_t* index,
+ que_thr_t* thr,
+ lock_prdt_t* prdt);
+/*************************************************************//**
+Moves the explicit locks on user records to another page if a record
+list start is moved to another page. */
+void
+lock_rtr_move_rec_list(
+/*===================*/
+ const buf_block_t* new_block, /*!< in: index page to
+ move to */
+ const buf_block_t* block, /*!< in: index page */
+ rtr_rec_move_t* rec_move, /*!< in: recording records
+ moved */
+ ulint num_move); /*!< in: num of rec to move */
+
+/*************************************************************//**
+Removes record lock objects set on an index page which is discarded. This
+function does not move locks, or check for waiting locks, therefore the
+lock bitmaps must already be reset when this function is called. */
+void
+lock_rec_free_all_from_discard_page(
+/*================================*/
+ const buf_block_t* block); /*!< in: page to be discarded */
+
/** The lock system */
extern lock_sys_t* lock_sys;
/** Test if lock_sys->mutex can be acquired without waiting. */
-#define lock_mutex_enter_nowait() mutex_enter_nowait(&lock_sys->mutex)
+#define lock_mutex_enter_nowait() \
+ (lock_sys->mutex.trylock(__FILE__, __LINE__))
/** Test if lock_sys->mutex is owned. */
-#define lock_mutex_own() mutex_own(&lock_sys->mutex)
+#define lock_mutex_own() (lock_sys->mutex.is_owned())
/** Acquire the lock_sys->mutex. */
#define lock_mutex_enter() do { \
@@ -976,11 +1012,11 @@ extern lock_sys_t* lock_sys;
/** Release the lock_sys->mutex. */
#define lock_mutex_exit() do { \
- mutex_exit(&lock_sys->mutex); \
+ lock_sys->mutex.exit(); \
} while (0)
/** Test if lock_sys->wait_mutex is owned. */
-#define lock_wait_mutex_own() mutex_own(&lock_sys->wait_mutex)
+#define lock_wait_mutex_own() (lock_sys->wait_mutex.is_owned())
/** Acquire the lock_sys->wait_mutex. */
#define lock_wait_mutex_enter() do { \
@@ -989,7 +1025,7 @@ extern lock_sys_t* lock_sys;
/** Release the lock_sys->wait_mutex. */
#define lock_wait_mutex_exit() do { \
- mutex_exit(&lock_sys->wait_mutex); \
+ lock_sys->wait_mutex.exit(); \
} while (0)
#ifdef WITH_WSREP
@@ -1010,8 +1046,7 @@ lock_get_info(
const lock_t*);
#endif /* WITH_WSREP */
-#ifndef UNIV_NONINL
+
#include "lock0lock.ic"
-#endif
#endif
diff --git a/storage/innobase/include/lock0lock.ic b/storage/innobase/include/lock0lock.ic
index bce04e582d9..0ed933cba78 100644
--- a/storage/innobase/include/lock0lock.ic
+++ b/storage/innobase/include/lock0lock.ic
@@ -1,6 +1,7 @@
/*****************************************************************************
-Copyright (c) 1996, 2009, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1996, 2015, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2017, 2018, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -23,25 +24,14 @@ The transaction lock system
Created 5/7/1996 Heikki Tuuri
*******************************************************/
-#include "sync0sync.h"
-#include "srv0srv.h"
#include "dict0dict.h"
-#include "row0row.h"
-#include "trx0sys.h"
-#include "trx0trx.h"
#include "buf0buf.h"
#include "page0page.h"
-#include "page0cur.h"
-#include "row0vers.h"
-#include "que0que.h"
-#include "btr0cur.h"
-#include "read0read.h"
-#include "log0recv.h"
/*********************************************************************//**
Calculates the fold value of a page file address: used in inserting or
searching for a lock in the hash table.
-@return folded value */
+@return folded value */
UNIV_INLINE
ulint
lock_rec_fold(
@@ -55,21 +45,21 @@ lock_rec_fold(
/*********************************************************************//**
Calculates the hash value of a page file address: used in inserting or
searching for a lock in the hash table.
-@return hashed value */
+@return hashed value */
UNIV_INLINE
-ulint
+unsigned
lock_rec_hash(
/*==========*/
ulint space, /*!< in: space */
ulint page_no)/*!< in: page number */
{
- return(hash_calc_hash(lock_rec_fold(space, page_no),
- lock_sys->rec_hash));
+ return(unsigned(hash_calc_hash(lock_rec_fold(space, page_no),
+ lock_sys->rec_hash)));
}
/*********************************************************************//**
Gets the heap_no of the smallest user record on a page.
-@return heap_no of smallest user record, or PAGE_HEAP_NO_SUPREMUM */
+@return heap_no of smallest user record, or PAGE_HEAP_NO_SUPREMUM */
UNIV_INLINE
ulint
lock_get_min_heap_no(
@@ -90,3 +80,55 @@ lock_get_min_heap_no(
FALSE)));
}
}
+
+/*************************************************************//**
+Get the lock hash table */
+UNIV_INLINE
+hash_table_t*
+lock_hash_get(
+/*==========*/
+ ulint mode) /*!< in: lock mode */
+{
+ if (mode & LOCK_PREDICATE) {
+ return(lock_sys->prdt_hash);
+ } else if (mode & LOCK_PRDT_PAGE) {
+ return(lock_sys->prdt_page_hash);
+ } else {
+ return(lock_sys->rec_hash);
+ }
+}
+
+/*********************************************************************//**
+Creates a new record lock and inserts it to the lock queue. Does NOT check
+for deadlocks or lock compatibility!
+@return created lock */
+UNIV_INLINE
+lock_t*
+lock_rec_create(
+/*============*/
+#ifdef WITH_WSREP
+ lock_t* c_lock, /*!< conflicting lock */
+ que_thr_t* thr, /*!< thread owning trx */
+#endif
+ ulint type_mode,/*!< in: lock mode and wait
+ flag, type is ignored and
+ replaced by LOCK_REC */
+ const buf_block_t* block, /*!< in: buffer block containing
+ the record */
+ ulint heap_no,/*!< in: heap number of the record */
+ dict_index_t* index, /*!< in: index of record */
+ trx_t* trx, /*!< in,out: transaction */
+ bool caller_owns_trx_mutex)
+ /*!< in: TRUE if caller owns
+ trx mutex */
+{
+ btr_assert_not_corrupted(block, index);
+ return lock_rec_create_low(
+#ifdef WITH_WSREP
+ c_lock, thr,
+#endif
+ type_mode,
+ block->page.id.space(), block->page.id.page_no(),
+ block->frame, heap_no,
+ index, trx, caller_owns_trx_mutex);
+}
diff --git a/storage/innobase/include/lock0prdt.h b/storage/innobase/include/lock0prdt.h
new file mode 100644
index 00000000000..6a7b88eba1f
--- /dev/null
+++ b/storage/innobase/include/lock0prdt.h
@@ -0,0 +1,217 @@
+/*****************************************************************************
+
+Copyright (c) 2014, 2016, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2018, MariaDB Corporation.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/lock0prdt.h
+The predicate lock system
+
+Created 9/7/2013 Jimmy Yang
+*******************************************************/
+#ifndef lock0prdt_h
+#define lock0prdt_h
+
+#include "lock0lock.h"
+
+/* Predicate lock data */
+typedef struct lock_prdt {
+ void* data; /* Predicate data */
+ uint16 op; /* Predicate operator */
+} lock_prdt_t;
+
+/*********************************************************************//**
+Acquire a predicate lock on a block
+@return DB_SUCCESS, DB_LOCK_WAIT, or DB_DEADLOCK */
+dberr_t
+lock_prdt_lock(
+/*===========*/
+ buf_block_t* block, /*!< in/out: buffer block of rec */
+ lock_prdt_t* prdt, /*!< in: Predicate for the lock */
+ dict_index_t* index, /*!< in: secondary index */
+ enum lock_mode mode, /*!< in: mode of the lock which
+ the read cursor should set on
+ records: LOCK_S or LOCK_X; the
+ latter is possible in
+ SELECT FOR UPDATE */
+ ulint type_mode,
+ /*!< in: LOCK_PREDICATE or LOCK_PRDT_PAGE */
+ que_thr_t* thr, /*!< in: query thread
+ (can be NULL if BTR_NO_LOCKING_FLAG) */
+ mtr_t* mtr); /*!< in/out: mini-transaction */
+
+/*********************************************************************//**
+Acquire a "Page" lock on a block
+@return DB_SUCCESS, DB_LOCK_WAIT, or DB_DEADLOCK */
+dberr_t
+lock_place_prdt_page_lock(
+/*======================*/
+ ulint space, /*!< in: space for the page to lock */
+ ulint pageno, /*!< in: page number */
+ dict_index_t* index, /*!< in: secondary index */
+ que_thr_t* thr); /*!< in: query thread */
+
+/*********************************************************************//**
+Initiate a Predicate lock from a MBR */
+void
+lock_init_prdt_from_mbr(
+/*====================*/
+ lock_prdt_t* prdt, /*!< in/out: predicate to initialized */
+ rtr_mbr_t* mbr, /*!< in: Minimum Bounding Rectangle */
+ ulint mode, /*!< in: Search mode */
+ mem_heap_t* heap); /*!< in: heap for allocating memory */
+
+/*********************************************************************//**
+Get predicate lock's minimum bounding box
+@return the minimum bounding box*/
+lock_prdt_t*
+lock_get_prdt_from_lock(
+/*====================*/
+ const lock_t* lock); /*!< in: the lock */
+
+/*********************************************************************//**
+Checks if a predicate lock request for a new lock has to wait for
+request lock2.
+@return true if new lock has to wait for lock2 to be removed */
+bool
+lock_prdt_has_to_wait(
+/*==================*/
+ const trx_t* trx, /*!< in: trx of new lock */
+ ulint type_mode,/*!< in: precise mode of the new lock
+ to set: LOCK_S or LOCK_X, possibly
+ ORed to LOCK_PREDICATE or LOCK_PRDT_PAGE,
+ LOCK_INSERT_INTENTION */
+ lock_prdt_t* prdt, /*!< in: lock predicate to check */
+ const lock_t* lock2); /*!< in: another record lock; NOTE that
+ it is assumed that this has a lock bit
+ set on the same record as in the new
+ lock we are setting */
+
+/**************************************************************//**
+Update predicate lock when page splits */
+void
+lock_prdt_update_split(
+/*===================*/
+ buf_block_t* block, /*!< in/out: page to be split */
+ buf_block_t* new_block, /*!< in/out: the new half page */
+ lock_prdt_t* prdt, /*!< in: MBR on the old page */
+ lock_prdt_t* new_prdt, /*!< in: MBR on the new page */
+ ulint space, /*!< in: space id */
+ ulint page_no); /*!< in: page number */
+
+/**************************************************************//**
+Ajust locks from an ancester page of Rtree on the appropriate level . */
+void
+lock_prdt_update_parent(
+/*====================*/
+ buf_block_t* left_block, /*!< in/out: page to be split */
+ buf_block_t* right_block, /*!< in/out: the new half page */
+ lock_prdt_t* left_prdt, /*!< in: MBR on the old page */
+ lock_prdt_t* right_prdt, /*!< in: MBR on the new page */
+ lock_prdt_t* parent_prdt, /*!< in: original parent MBR */
+ ulint space, /*!< in: space id */
+ ulint page_no); /*!< in: page number */
+
+/*********************************************************************//**
+Checks if locks of other transactions prevent an immediate insert of
+a predicate record.
+@return DB_SUCCESS, DB_LOCK_WAIT, or DB_DEADLOCK */
+dberr_t
+lock_prdt_insert_check_and_lock(
+/*============================*/
+ ulint flags, /*!< in: if BTR_NO_LOCKING_FLAG bit is
+ set, does nothing */
+ const rec_t* rec, /*!< in: record after which to insert */
+ buf_block_t* block, /*!< in/out: buffer block of rec */
+ dict_index_t* index, /*!< in: index */
+ que_thr_t* thr, /*!< in: query thread */
+ mtr_t* mtr, /*!< in/out: mini-transaction */
+ lock_prdt_t* prdt); /*!< in: Minimum Bound Rectangle */
+
+/*********************************************************************//**
+Append a predicate to the lock */
+void
+lock_prdt_set_prdt(
+/*===============*/
+ lock_t* lock, /*!< in: lock */
+ const lock_prdt_t* prdt); /*!< in: Predicate */
+
+#if 0
+
+/*********************************************************************//**
+Checks if a predicate lock request for a new lock has to wait for
+request lock2.
+@return true if new lock has to wait for lock2 to be removed */
+UNIV_INLINE
+bool
+lock_prdt_has_to_wait(
+/*==================*/
+ const trx_t* trx, /*!< in: trx of new lock */
+ ulint type_mode,/*!< in: precise mode of the new lock
+ to set: LOCK_S or LOCK_X, possibly
+ ORed to LOCK_PREDICATE or LOCK_PRDT_PAGE,
+ LOCK_INSERT_INTENTION */
+ lock_prdt_t* prdt, /*!< in: lock predicate to check */
+ const lock_t* lock2); /*!< in: another record lock; NOTE that
+ it is assumed that this has a lock bit
+ set on the same record as in the new
+ lock we are setting */
+
+/*********************************************************************//**
+Get predicate lock's minimum bounding box
+@return the minimum bounding box*/
+UNIV_INLINE
+rtr_mbr_t*
+prdt_get_mbr_from_prdt(
+/*===================*/
+ const lock_prdt_t* prdt); /*!< in: the lock predicate */
+
+
+#endif
+/*************************************************************//**
+Moves the locks of a record to another record and resets the lock bits of
+the donating record. */
+void
+lock_prdt_rec_move(
+/*===============*/
+ const buf_block_t* receiver, /*!< in: buffer block containing
+ the receiving record */
+ const buf_block_t* donator); /*!< in: buffer block containing
+ the donating record */
+
+/** Check whether there are R-tree Page lock on a buffer page
+@param[in] trx trx to test the lock
+@param[in] space space id for the page
+@param[in] page_no page number
+@return true if there is none */
+bool
+lock_test_prdt_page_lock(
+/*=====================*/
+ const trx_t* trx,
+ ulint space,
+ ulint page_no);
+
+/** Removes predicate lock objects set on an index page which is discarded.
+@param[in] block page to be discarded
+@param[in] lock_hash lock hash */
+void
+lock_prdt_page_free_from_discard(
+/*=============================*/
+ const buf_block_t* block,
+ hash_table_t* lock_hash);
+
+#endif
diff --git a/storage/innobase/include/lock0priv.h b/storage/innobase/include/lock0priv.h
index ef502be9044..5e00e1bb6f1 100644
--- a/storage/innobase/include/lock0priv.h
+++ b/storage/innobase/include/lock0priv.h
@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 2007, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2015, 2016, MariaDB Corporation
+Copyright (c) 2015, 2018, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -34,64 +34,408 @@ those functions in lock/ */
#error Do not include lock0priv.h outside of the lock/ module
#endif
-#include "univ.i"
-#include "dict0types.h"
#include "hash0hash.h"
-#include "trx0types.h"
-#include "ut0lst.h"
-
-/** A table lock */
-struct lock_table_t {
- dict_table_t* table; /*!< database table in dictionary
- cache */
- UT_LIST_NODE_T(lock_t)
- locks; /*!< list of locks on the same
- table */
+#include "rem0types.h"
+#include "trx0trx.h"
+
+#ifndef UINT32_MAX
+#define UINT32_MAX (4294967295U)
+#endif
+
+/** Print the table lock into the given output stream
+@param[in,out] out the output stream
+@return the given output stream. */
+inline
+std::ostream& lock_table_t::print(std::ostream& out) const
+{
+ out << "[lock_table_t: name=" << table->name << "]";
+ return(out);
+}
+
+/** The global output operator is overloaded to conveniently
+print the lock_table_t object into the given output stream.
+@param[in,out] out the output stream
+@param[in] lock the table lock
+@return the given output stream */
+inline
+std::ostream&
+operator<<(std::ostream& out, const lock_table_t& lock)
+{
+ return(lock.print(out));
+}
+
+/** Convert the member 'type_mode' into a human readable string.
+@return human readable string */
+inline
+std::string
+ib_lock_t::type_mode_string() const
+{
+ std::ostringstream sout;
+ sout << type_string();
+ sout << " | " << lock_mode_string(mode());
+
+ if (is_record_not_gap()) {
+ sout << " | LOCK_REC_NOT_GAP";
+ }
+
+ if (is_waiting()) {
+ sout << " | LOCK_WAIT";
+ }
+
+ if (is_gap()) {
+ sout << " | LOCK_GAP";
+ }
+
+ if (is_insert_intention()) {
+ sout << " | LOCK_INSERT_INTENTION";
+ }
+ return(sout.str());
+}
+
+inline
+std::ostream&
+ib_lock_t::print(std::ostream& out) const
+{
+ out << "[lock_t: type_mode=" << type_mode << "("
+ << type_mode_string() << ")";
+
+ if (is_record_lock()) {
+ out << un_member.rec_lock;
+ } else {
+ out << un_member.tab_lock;
+ }
+
+ out << "]";
+ return(out);
+}
+
+inline
+std::ostream&
+operator<<(std::ostream& out, const ib_lock_t& lock)
+{
+ return(lock.print(out));
+}
+
+#ifdef UNIV_DEBUG
+extern ibool lock_print_waits;
+#endif /* UNIV_DEBUG */
+
+/** Restricts the length of search we will do in the waits-for
+graph of transactions */
+static const ulint LOCK_MAX_N_STEPS_IN_DEADLOCK_CHECK = 1000000;
+
+/** Restricts the search depth we will do in the waits-for graph of
+transactions */
+static const ulint LOCK_MAX_DEPTH_IN_DEADLOCK_CHECK = 200;
+
+/** When releasing transaction locks, this specifies how often we release
+the lock mutex for a moment to give also others access to it */
+static const ulint LOCK_RELEASE_INTERVAL = 1000;
+
+/* Safety margin when creating a new record lock: this many extra records
+can be inserted to the page without need to create a lock with a bigger
+bitmap */
+
+static const ulint LOCK_PAGE_BITMAP_MARGIN = 64;
+
+/* An explicit record lock affects both the record and the gap before it.
+An implicit x-lock does not affect the gap, it only locks the index
+record from read or update.
+
+If a transaction has modified or inserted an index record, then
+it owns an implicit x-lock on the record. On a secondary index record,
+a transaction has an implicit x-lock also if it has modified the
+clustered index record, the max trx id of the page where the secondary
+index record resides is >= trx id of the transaction (or database recovery
+is running), and there are no explicit non-gap lock requests on the
+secondary index record.
+
+This complicated definition for a secondary index comes from the
+implementation: we want to be able to determine if a secondary index
+record has an implicit x-lock, just by looking at the present clustered
+index record, not at the historical versions of the record. The
+complicated definition can be explained to the user so that there is
+nondeterminism in the access path when a query is answered: we may,
+or may not, access the clustered index record and thus may, or may not,
+bump into an x-lock set there.
+
+Different transaction can have conflicting locks set on the gap at the
+same time. The locks on the gap are purely inhibitive: an insert cannot
+be made, or a select cursor may have to wait if a different transaction
+has a conflicting lock on the gap. An x-lock on the gap does not give
+the right to insert into the gap.
+
+An explicit lock can be placed on a user record or the supremum record of
+a page. The locks on the supremum record are always thought to be of the gap
+type, though the gap bit is not set. When we perform an update of a record
+where the size of the record changes, we may temporarily store its explicit
+locks on the infimum record of the page, though the infimum otherwise never
+carries locks.
+
+A waiting record lock can also be of the gap type. A waiting lock request
+can be granted when there is no conflicting mode lock request by another
+transaction ahead of it in the explicit lock queue.
+
+In version 4.0.5 we added yet another explicit lock type: LOCK_REC_NOT_GAP.
+It only locks the record it is placed on, not the gap before the record.
+This lock type is necessary to emulate an Oracle-like READ COMMITTED isolation
+level.
+
+-------------------------------------------------------------------------
+RULE 1: If there is an implicit x-lock on a record, and there are non-gap
+-------
+lock requests waiting in the queue, then the transaction holding the implicit
+x-lock also has an explicit non-gap record x-lock. Therefore, as locks are
+released, we can grant locks to waiting lock requests purely by looking at
+the explicit lock requests in the queue.
+
+RULE 3: Different transactions cannot have conflicting granted non-gap locks
+-------
+on a record at the same time. However, they can have conflicting granted gap
+locks.
+RULE 4: If a there is a waiting lock request in a queue, no lock request,
+-------
+gap or not, can be inserted ahead of it in the queue. In record deletes
+and page splits new gap type locks can be created by the database manager
+for a transaction, and without rule 4, the waits-for graph of transactions
+might become cyclic without the database noticing it, as the deadlock check
+is only performed when a transaction itself requests a lock!
+-------------------------------------------------------------------------
+
+An insert is allowed to a gap if there are no explicit lock requests by
+other transactions on the next record. It does not matter if these lock
+requests are granted or waiting, gap bit set or not, with the exception
+that a gap type request set by another transaction to wait for
+its turn to do an insert is ignored. On the other hand, an
+implicit x-lock by another transaction does not prevent an insert, which
+allows for more concurrency when using an Oracle-style sequence number
+generator for the primary key with many transactions doing inserts
+concurrently.
+
+A modify of a record is allowed if the transaction has an x-lock on the
+record, or if other transactions do not have any non-gap lock requests on the
+record.
+
+A read of a single user record with a cursor is allowed if the transaction
+has a non-gap explicit, or an implicit lock on the record, or if the other
+transactions have no x-lock requests on the record. At a page supremum a
+read is always allowed.
+
+In summary, an implicit lock is seen as a granted x-lock only on the
+record, not on the gap. An explicit lock with no gap bit set is a lock
+both on the record and the gap. If the gap bit is set, the lock is only
+on the gap. Different transaction cannot own conflicting locks on the
+record at the same time, but they may own conflicting locks on the gap.
+Granted locks on a record give an access right to the record, but gap type
+locks just inhibit operations.
+
+NOTE: Finding out if some transaction has an implicit x-lock on a secondary
+index record can be cumbersome. We may have to look at previous versions of
+the corresponding clustered index record to find out if a delete marked
+secondary index record was delete marked by an active transaction, not by
+a committed one.
+
+FACT A: If a transaction has inserted a row, it can delete it any time
+without need to wait for locks.
+
+PROOF: The transaction has an implicit x-lock on every index record inserted
+for the row, and can thus modify each record without the need to wait. Q.E.D.
+
+FACT B: If a transaction has read some result set with a cursor, it can read
+it again, and retrieves the same result set, if it has not modified the
+result set in the meantime. Hence, there is no phantom problem. If the
+biggest record, in the alphabetical order, touched by the cursor is removed,
+a lock wait may occur, otherwise not.
+
+PROOF: When a read cursor proceeds, it sets an s-lock on each user record
+it passes, and a gap type s-lock on each page supremum. The cursor must
+wait until it has these locks granted. Then no other transaction can
+have a granted x-lock on any of the user records, and therefore cannot
+modify the user records. Neither can any other transaction insert into
+the gaps which were passed over by the cursor. Page splits and merges,
+and removal of obsolete versions of records do not affect this, because
+when a user record or a page supremum is removed, the next record inherits
+its locks as gap type locks, and therefore blocks inserts to the same gap.
+Also, if a page supremum is inserted, it inherits its locks from the successor
+record. When the cursor is positioned again at the start of the result set,
+the records it will touch on its course are either records it touched
+during the last pass or new inserted page supremums. It can immediately
+access all these records, and when it arrives at the biggest record, it
+notices that the result set is complete. If the biggest record was removed,
+lock wait can occur because the next record only inherits a gap type lock,
+and a wait may be needed. Q.E.D. */
+
+/* If an index record should be changed or a new inserted, we must check
+the lock on the record or the next. When a read cursor starts reading,
+we will set a record level s-lock on each record it passes, except on the
+initial record on which the cursor is positioned before we start to fetch
+records. Our index tree search has the convention that the B-tree
+cursor is positioned BEFORE the first possibly matching record in
+the search. Optimizations are possible here: if the record is searched
+on an equality condition to a unique key, we could actually set a special
+lock on the record, a lock which would not prevent any insert before
+this record. In the next key locking an x-lock set on a record also
+prevents inserts just before that record.
+ There are special infimum and supremum records on each page.
+A supremum record can be locked by a read cursor. This records cannot be
+updated but the lock prevents insert of a user record to the end of
+the page.
+ Next key locks will prevent the phantom problem where new rows
+could appear to SELECT result sets after the select operation has been
+performed. Prevention of phantoms ensures the serilizability of
+transactions.
+ What should we check if an insert of a new record is wanted?
+Only the lock on the next record on the same page, because also the
+supremum record can carry a lock. An s-lock prevents insertion, but
+what about an x-lock? If it was set by a searched update, then there
+is implicitly an s-lock, too, and the insert should be prevented.
+What if our transaction owns an x-lock to the next record, but there is
+a waiting s-lock request on the next record? If this s-lock was placed
+by a read cursor moving in the ascending order in the index, we cannot
+do the insert immediately, because when we finally commit our transaction,
+the read cursor should see also the new inserted record. So we should
+move the read cursor backward from the next record for it to pass over
+the new inserted record. This move backward may be too cumbersome to
+implement. If we in this situation just enqueue a second x-lock request
+for our transaction on the next record, then the deadlock mechanism
+notices a deadlock between our transaction and the s-lock request
+transaction. This seems to be an ok solution.
+ We could have the convention that granted explicit record locks,
+lock the corresponding records from changing, and also lock the gaps
+before them from inserting. A waiting explicit lock request locks the gap
+before from inserting. Implicit record x-locks, which we derive from the
+transaction id in the clustered index record, only lock the record itself
+from modification, not the gap before it from inserting.
+ How should we store update locks? If the search is done by a unique
+key, we could just modify the record trx id. Otherwise, we could put a record
+x-lock on the record. If the update changes ordering fields of the
+clustered index record, the inserted new record needs no record lock in
+lock table, the trx id is enough. The same holds for a secondary index
+record. Searched delete is similar to update.
+
+PROBLEM:
+What about waiting lock requests? If a transaction is waiting to make an
+update to a record which another modified, how does the other transaction
+know to send the end-lock-wait signal to the waiting transaction? If we have
+the convention that a transaction may wait for just one lock at a time, how
+do we preserve it if lock wait ends?
+
+PROBLEM:
+Checking the trx id label of a secondary index record. In the case of a
+modification, not an insert, is this necessary? A secondary index record
+is modified only by setting or resetting its deleted flag. A secondary index
+record contains fields to uniquely determine the corresponding clustered
+index record. A secondary index record is therefore only modified if we
+also modify the clustered index record, and the trx id checking is done
+on the clustered index record, before we come to modify the secondary index
+record. So, in the case of delete marking or unmarking a secondary index
+record, we do not have to care about trx ids, only the locks in the lock
+table must be checked. In the case of a select from a secondary index, the
+trx id is relevant, and in this case we may have to search the clustered
+index record.
+
+PROBLEM: How to update record locks when page is split or merged, or
+--------------------------------------------------------------------
+a record is deleted or updated?
+If the size of fields in a record changes, we perform the update by
+a delete followed by an insert. How can we retain the locks set or
+waiting on the record? Because a record lock is indexed in the bitmap
+by the heap number of the record, when we remove the record from the
+record list, it is possible still to keep the lock bits. If the page
+is reorganized, we could make a table of old and new heap numbers,
+and permute the bitmaps in the locks accordingly. We can add to the
+table a row telling where the updated record ended. If the update does
+not require a reorganization of the page, we can simply move the lock
+bits for the updated record to the position determined by its new heap
+number (we may have to allocate a new lock, if we run out of the bitmap
+in the old one).
+ A more complicated case is the one where the reinsertion of the
+updated record is done pessimistically, because the structure of the
+tree may change.
+
+PROBLEM: If a supremum record is removed in a page merge, or a record
+---------------------------------------------------------------------
+removed in a purge, what to do to the waiting lock requests? In a split to
+the right, we just move the lock requests to the new supremum. If a record
+is removed, we could move the waiting lock request to its inheritor, the
+next record in the index. But, the next record may already have lock
+requests on its own queue. A new deadlock check should be made then. Maybe
+it is easier just to release the waiting transactions. They can then enqueue
+new lock requests on appropriate records.
+
+PROBLEM: When a record is inserted, what locks should it inherit from the
+-------------------------------------------------------------------------
+upper neighbor? An insert of a new supremum record in a page split is
+always possible, but an insert of a new user record requires that the upper
+neighbor does not have any lock requests by other transactions, granted or
+waiting, in its lock queue. Solution: We can copy the locks as gap type
+locks, so that also the waiting locks are transformed to granted gap type
+locks on the inserted record. */
+
+/* LOCK COMPATIBILITY MATRIX
+ * IS IX S X AI
+ * IS + + + - +
+ * IX + + - - +
+ * S + - + - -
+ * X - - - - -
+ * AI + + - - -
+ *
+ * Note that for rows, InnoDB only acquires S or X locks.
+ * For tables, InnoDB normally acquires IS or IX locks.
+ * S or X table locks are only acquired for LOCK TABLES.
+ * Auto-increment (AI) locks are needed because of
+ * statement-level MySQL binlog.
+ * See also lock_mode_compatible().
+ */
+static const byte lock_compatibility_matrix[5][5] = {
+ /** IS IX S X AI */
+ /* IS */ { TRUE, TRUE, TRUE, FALSE, TRUE},
+ /* IX */ { TRUE, TRUE, FALSE, FALSE, TRUE},
+ /* S */ { TRUE, FALSE, TRUE, FALSE, FALSE},
+ /* X */ { FALSE, FALSE, FALSE, FALSE, FALSE},
+ /* AI */ { TRUE, TRUE, FALSE, FALSE, FALSE}
};
-/** Record lock for a page */
-struct lock_rec_t {
- ulint space; /*!< space id */
- ulint page_no; /*!< page number */
- ulint n_bits; /*!< number of bits in the lock
- bitmap; NOTE: the lock bitmap is
- placed immediately after the
- lock struct */
+/* STRONGER-OR-EQUAL RELATION (mode1=row, mode2=column)
+ * IS IX S X AI
+ * IS + - - - -
+ * IX + + - - -
+ * S + - + - -
+ * X + + + + +
+ * AI - - - - +
+ * See lock_mode_stronger_or_eq().
+ */
+static const byte lock_strength_matrix[5][5] = {
+ /** IS IX S X AI */
+ /* IS */ { TRUE, FALSE, FALSE, FALSE, FALSE},
+ /* IX */ { TRUE, TRUE, FALSE, FALSE, FALSE},
+ /* S */ { TRUE, FALSE, TRUE, FALSE, FALSE},
+ /* X */ { TRUE, TRUE, TRUE, TRUE, TRUE},
+ /* AI */ { FALSE, FALSE, FALSE, FALSE, TRUE}
};
-/** Lock struct; protected by lock_sys->mutex */
-struct lock_t {
- trx_t* trx; /*!< transaction owning the
- lock */
- UT_LIST_NODE_T(lock_t)
- trx_locks; /*!< list of the locks of the
- transaction */
- ulint type_mode; /*!< lock type, mode, LOCK_GAP or
- LOCK_REC_NOT_GAP,
- LOCK_INSERT_INTENTION,
- wait flag, ORed */
- hash_node_t hash; /*!< hash chain node for a record
- lock */
- dict_index_t* index; /*!< index for a record lock */
-
- /** time(NULL) of the lock request creation.
- Used for computing wait_time and diagnostics only.
- Note: bogus durations may be reported
- when the system time is adjusted! */
- time_t requested_time;
- /** Cumulated wait time in seconds.
- Note: may be bogus when the system time is adjusted! */
- ulint wait_time;
-
- union {
- lock_table_t tab_lock;/*!< table lock */
- lock_rec_t rec_lock;/*!< record lock */
- } un_member; /*!< lock details */
+/** Maximum depth of the DFS stack. */
+static const ulint MAX_STACK_SIZE = 4096;
+
+#define PRDT_HEAPNO PAGE_HEAP_NO_INFIMUM
+/** Record locking request status */
+enum lock_rec_req_status {
+ /** Failed to acquire a lock */
+ LOCK_REC_FAIL,
+ /** Succeeded in acquiring a lock (implicit or already acquired) */
+ LOCK_REC_SUCCESS,
+ /** Explicitly created a new lock */
+ LOCK_REC_SUCCESS_CREATED
};
+#ifdef UNIV_DEBUG
+/** The count of the types of locks. */
+static const ulint lock_types = UT_ARR_SIZE(lock_compatibility_matrix);
+#endif /* UNIV_DEBUG */
+
/*********************************************************************//**
Gets the type of a lock.
-@return LOCK_TABLE or LOCK_REC */
+@return LOCK_TABLE or LOCK_REC */
UNIV_INLINE
ulint
lock_get_type_low(
@@ -100,8 +444,7 @@ lock_get_type_low(
/*********************************************************************//**
Gets the previous record lock set on a record.
-@return previous lock on the same record, NULL if none exists */
-UNIV_INTERN
+@return previous lock on the same record, NULL if none exists */
const lock_t*
lock_rec_get_prev(
/*==============*/
@@ -111,7 +454,6 @@ lock_rec_get_prev(
/*********************************************************************//**
Cancels a waiting lock request and releases possible other transactions
waiting behind it. */
-UNIV_INTERN
void
lock_cancel_waiting_and_release(
/*============================*/
@@ -120,18 +462,229 @@ lock_cancel_waiting_and_release(
/*********************************************************************//**
Checks if some transaction has an implicit x-lock on a record in a clustered
index.
-@return transaction id of the transaction which has the x-lock, or 0 */
+@return transaction id of the transaction which has the x-lock, or 0 */
UNIV_INLINE
trx_id_t
lock_clust_rec_some_has_impl(
/*=========================*/
const rec_t* rec, /*!< in: user record */
const dict_index_t* index, /*!< in: clustered index */
- const ulint* offsets)/*!< in: rec_get_offsets(rec, index) */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
+ const offset_t* offsets)/*!< in: rec_get_offsets(rec, index) */
+ MY_ATTRIBUTE((warn_unused_result));
+
+/*********************************************************************//**
+Gets the first or next record lock on a page.
+@return next lock, NULL if none exists */
+UNIV_INLINE
+const lock_t*
+lock_rec_get_next_on_page_const(
+/*============================*/
+ const lock_t* lock); /*!< in: a record lock */
+
+/*********************************************************************//**
+Gets the nth bit of a record lock.
+@return TRUE if bit set also if i == ULINT_UNDEFINED return FALSE*/
+UNIV_INLINE
+ibool
+lock_rec_get_nth_bit(
+/*=================*/
+ const lock_t* lock, /*!< in: record lock */
+ ulint i); /*!< in: index of the bit */
+
+/*********************************************************************//**
+Gets the number of bits in a record lock bitmap.
+@return number of bits */
+UNIV_INLINE
+ulint
+lock_rec_get_n_bits(
+/*================*/
+ const lock_t* lock); /*!< in: record lock */
+
+/**********************************************************************//**
+Sets the nth bit of a record lock to TRUE. */
+UNIV_INLINE
+void
+lock_rec_set_nth_bit(
+/*=================*/
+ lock_t* lock, /*!< in: record lock */
+ ulint i); /*!< in: index of the bit */
+
+/** Reset the nth bit of a record lock.
+@param[in,out] lock record lock
+@param[in] i index of the bit that will be reset
+@return previous value of the bit */
+inline byte lock_rec_reset_nth_bit(lock_t* lock, ulint i)
+{
+ ut_ad(lock_get_type_low(lock) == LOCK_REC);
+ ut_ad(i < lock->un_member.rec_lock.n_bits);
+
+ byte* b = reinterpret_cast<byte*>(&lock[1]) + (i >> 3);
+ byte mask = byte(1U << (i & 7));
+ byte bit = *b & mask;
+ *b &= ~mask;
+
+ if (bit != 0) {
+ ut_ad(lock->trx->lock.n_rec_locks > 0);
+ --lock->trx->lock.n_rec_locks;
+ }
+
+ return(bit);
+}
+
+/*********************************************************************//**
+Gets the first or next record lock on a page.
+@return next lock, NULL if none exists */
+UNIV_INLINE
+lock_t*
+lock_rec_get_next_on_page(
+/*======================*/
+ lock_t* lock); /*!< in: a record lock */
+/*********************************************************************//**
+Gets the first record lock on a page, where the page is identified by its
+file address.
+@return first lock, NULL if none exists */
+UNIV_INLINE
+lock_t*
+lock_rec_get_first_on_page_addr(
+/*============================*/
+ hash_table_t* lock_hash, /* Lock hash table */
+ ulint space, /*!< in: space */
+ ulint page_no); /*!< in: page number */
+
+/*********************************************************************//**
+Gets the first record lock on a page, where the page is identified by a
+pointer to it.
+@return first lock, NULL if none exists */
+UNIV_INLINE
+lock_t*
+lock_rec_get_first_on_page(
+/*=======================*/
+ hash_table_t* lock_hash, /*!< in: lock hash table */
+ const buf_block_t* block); /*!< in: buffer block */
+
+
+/*********************************************************************//**
+Gets the next explicit lock request on a record.
+@return next lock, NULL if none exists or if heap_no == ULINT_UNDEFINED */
+UNIV_INLINE
+lock_t*
+lock_rec_get_next(
+/*==============*/
+ ulint heap_no,/*!< in: heap number of the record */
+ lock_t* lock); /*!< in: lock */
+
+/*********************************************************************//**
+Gets the next explicit lock request on a record.
+@return next lock, NULL if none exists or if heap_no == ULINT_UNDEFINED */
+UNIV_INLINE
+const lock_t*
+lock_rec_get_next_const(
+/*====================*/
+ ulint heap_no,/*!< in: heap number of the record */
+ const lock_t* lock); /*!< in: lock */
+
+/*********************************************************************//**
+Gets the first explicit lock request on a record.
+@return first lock, NULL if none exists */
+UNIV_INLINE
+lock_t*
+lock_rec_get_first(
+/*===============*/
+ hash_table_t* hash, /*!< in: hash chain the lock on */
+ const buf_block_t* block, /*!< in: block containing the record */
+ ulint heap_no);/*!< in: heap number of the record */
+
+/*********************************************************************//**
+Gets the mode of a lock.
+@return mode */
+UNIV_INLINE
+enum lock_mode
+lock_get_mode(
+/*==========*/
+ const lock_t* lock); /*!< in: lock */
+
+/*********************************************************************//**
+Calculates if lock mode 1 is compatible with lock mode 2.
+@return nonzero if mode1 compatible with mode2 */
+UNIV_INLINE
+ulint
+lock_mode_compatible(
+/*=================*/
+ enum lock_mode mode1, /*!< in: lock mode */
+ enum lock_mode mode2); /*!< in: lock mode */
+
+/*********************************************************************//**
+Calculates if lock mode 1 is stronger or equal to lock mode 2.
+@return nonzero if mode1 stronger or equal to mode2 */
+UNIV_INLINE
+ulint
+lock_mode_stronger_or_eq(
+/*=====================*/
+ enum lock_mode mode1, /*!< in: lock mode */
+ enum lock_mode mode2); /*!< in: lock mode */
+
+/*********************************************************************//**
+Gets the wait flag of a lock.
+@return LOCK_WAIT if waiting, 0 if not */
+UNIV_INLINE
+ulint
+lock_get_wait(
+/*==========*/
+ const lock_t* lock); /*!< in: lock */
+
+/*********************************************************************//**
+Looks for a suitable type record lock struct by the same trx on the same page.
+This can be used to save space when a new record lock should be set on a page:
+no new struct is needed, if a suitable old is found.
+@return lock or NULL */
+UNIV_INLINE
+lock_t*
+lock_rec_find_similar_on_page(
+/*==========================*/
+ ulint type_mode, /*!< in: lock type_mode field */
+ ulint heap_no, /*!< in: heap number of the record */
+ lock_t* lock, /*!< in: lock_rec_get_first_on_page() */
+ const trx_t* trx); /*!< in: transaction */
+
+/*********************************************************************//**
+Checks if a transaction has the specified table lock, or stronger. This
+function should only be called by the thread that owns the transaction.
+@return lock or NULL */
+UNIV_INLINE
+const lock_t*
+lock_table_has(
+/*===========*/
+ const trx_t* trx, /*!< in: transaction */
+ const dict_table_t* table, /*!< in: table */
+ enum lock_mode mode); /*!< in: lock mode */
+
+/** Set the wait status of a lock.
+@param[in,out] lock lock that will be waited for
+@param[in,out] trx transaction that will wait for the lock */
+inline void lock_set_lock_and_trx_wait(lock_t* lock, trx_t* trx)
+{
+ ut_ad(lock);
+ ut_ad(lock->trx == trx);
+ ut_ad(trx->lock.wait_lock == NULL);
+ ut_ad(lock_mutex_own());
+ ut_ad(trx_mutex_own(trx));
+
+ trx->lock.wait_lock = lock;
+ lock->type_mode |= LOCK_WAIT;
+}
+
+/** Reset the wait status of a lock.
+@param[in,out] lock lock that was possibly being waited for */
+inline void lock_reset_lock_and_trx_wait(lock_t* lock)
+{
+ ut_ad(lock_get_wait(lock));
+ ut_ad(lock_mutex_own());
+ ut_ad(lock->trx->lock.wait_lock == NULL
+ || lock->trx->lock.wait_lock == lock);
+ lock->trx->lock.wait_lock = NULL;
+ lock->type_mode &= ~LOCK_WAIT;
+}
-#ifndef UNIV_NONINL
#include "lock0priv.ic"
-#endif
#endif /* lock0priv_h */
diff --git a/storage/innobase/include/lock0priv.ic b/storage/innobase/include/lock0priv.ic
index 02c4ff093c8..7062e3f7082 100644
--- a/storage/innobase/include/lock0priv.ic
+++ b/storage/innobase/include/lock0priv.ic
@@ -1,6 +1,7 @@
/*****************************************************************************
-Copyright (c) 2007, 2009, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2007, 2014, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2018, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -32,9 +33,11 @@ methods but they are used only in that file. */
#error Do not include lock0priv.ic outside of the lock/ module
#endif
+#include "row0row.h"
+
/*********************************************************************//**
Gets the type of a lock.
-@return LOCK_TABLE or LOCK_REC */
+@return LOCK_TABLE or LOCK_REC */
UNIV_INLINE
ulint
lock_get_type_low(
@@ -49,14 +52,14 @@ lock_get_type_low(
/*********************************************************************//**
Checks if some transaction has an implicit x-lock on a record in a clustered
index.
-@return transaction id of the transaction which has the x-lock, or 0 */
+@return transaction id of the transaction which has the x-lock, or 0 */
UNIV_INLINE
trx_id_t
lock_clust_rec_some_has_impl(
/*=========================*/
const rec_t* rec, /*!< in: user record */
const dict_index_t* index, /*!< in: clustered index */
- const ulint* offsets)/*!< in: rec_get_offsets(rec, index) */
+ const offset_t* offsets)/*!< in: rec_get_offsets(rec, index) */
{
ut_ad(dict_index_is_clust(index));
ut_ad(page_rec_is_user_rec(rec));
@@ -64,4 +67,355 @@ lock_clust_rec_some_has_impl(
return(row_get_rec_trx_id(rec, index, offsets));
}
+/*********************************************************************//**
+Gets the number of bits in a record lock bitmap.
+@return number of bits */
+UNIV_INLINE
+ulint
+lock_rec_get_n_bits(
+/*================*/
+ const lock_t* lock) /*!< in: record lock */
+{
+ return(lock->un_member.rec_lock.n_bits);
+}
+
+/**********************************************************************//**
+Sets the nth bit of a record lock to TRUE. */
+UNIV_INLINE
+void
+lock_rec_set_nth_bit(
+/*=================*/
+ lock_t* lock, /*!< in: record lock */
+ ulint i) /*!< in: index of the bit */
+{
+ ulint byte_index;
+ ulint bit_index;
+
+ ut_ad(lock);
+ ut_ad(lock_get_type_low(lock) == LOCK_REC);
+ ut_ad(i < lock->un_member.rec_lock.n_bits);
+
+ byte_index = i / 8;
+ bit_index = i % 8;
+
+ ((byte*) &lock[1])[byte_index] |= 1 << bit_index;
+
+ ++lock->trx->lock.n_rec_locks;
+}
+
+/*********************************************************************//**
+Gets the first or next record lock on a page.
+@return next lock, NULL if none exists */
+UNIV_INLINE
+lock_t*
+lock_rec_get_next_on_page(
+/*======================*/
+ lock_t* lock) /*!< in: a record lock */
+{
+ return((lock_t*) lock_rec_get_next_on_page_const(lock));
+}
+
+/*********************************************************************//**
+Gets the first record lock on a page, where the page is identified by its
+file address.
+@return first lock, NULL if none exists */
+UNIV_INLINE
+lock_t*
+lock_rec_get_first_on_page_addr(
+/*============================*/
+ hash_table_t* lock_hash, /* Lock hash table */
+ ulint space, /*!< in: space */
+ ulint page_no) /*!< in: page number */
+{
+ ut_ad(lock_mutex_own());
+
+ for (lock_t* lock = static_cast<lock_t*>(
+ HASH_GET_FIRST(lock_hash,
+ lock_rec_hash(space, page_no)));
+ lock != NULL;
+ lock = static_cast<lock_t*>(HASH_GET_NEXT(hash, lock))) {
+
+ if (lock->un_member.rec_lock.space == space
+ && lock->un_member.rec_lock.page_no == page_no) {
+
+ return(lock);
+ }
+ }
+
+ return(NULL);
+}
+
+/*********************************************************************//**
+Gets the first record lock on a page, where the page is identified by a
+pointer to it.
+@return first lock, NULL if none exists */
+UNIV_INLINE
+lock_t*
+lock_rec_get_first_on_page(
+/*=======================*/
+ hash_table_t* lock_hash, /*!< in: lock hash table */
+ const buf_block_t* block) /*!< in: buffer block */
+{
+ ut_ad(lock_mutex_own());
+
+ ulint space = block->page.id.space();
+ ulint page_no = block->page.id.page_no();
+ ulint hash = buf_block_get_lock_hash_val(block);
+
+ for (lock_t* lock = static_cast<lock_t*>(
+ HASH_GET_FIRST(lock_hash, hash));
+ lock != NULL;
+ lock = static_cast<lock_t*>(HASH_GET_NEXT(hash, lock))) {
+
+ if (lock->un_member.rec_lock.space == space
+ && lock->un_member.rec_lock.page_no == page_no) {
+
+ return(lock);
+ }
+ }
+
+ return(NULL);
+}
+
+/*********************************************************************//**
+Gets the next explicit lock request on a record.
+@return next lock, NULL if none exists or if heap_no == ULINT_UNDEFINED */
+UNIV_INLINE
+lock_t*
+lock_rec_get_next(
+/*==============*/
+ ulint heap_no,/*!< in: heap number of the record */
+ lock_t* lock) /*!< in: lock */
+{
+ ut_ad(lock_mutex_own());
+
+ do {
+ ut_ad(lock_get_type_low(lock) == LOCK_REC);
+ lock = lock_rec_get_next_on_page(lock);
+ } while (lock && !lock_rec_get_nth_bit(lock, heap_no));
+
+ return(lock);
+}
+
+/*********************************************************************//**
+Gets the next explicit lock request on a record.
+@return next lock, NULL if none exists or if heap_no == ULINT_UNDEFINED */
+UNIV_INLINE
+const lock_t*
+lock_rec_get_next_const(
+/*====================*/
+ ulint heap_no,/*!< in: heap number of the record */
+ const lock_t* lock) /*!< in: lock */
+{
+ return(lock_rec_get_next(heap_no, (lock_t*) lock));
+}
+
+/*********************************************************************//**
+Gets the first explicit lock request on a record.
+@return first lock, NULL if none exists */
+UNIV_INLINE
+lock_t*
+lock_rec_get_first(
+/*===============*/
+ hash_table_t* hash, /*!< in: hash chain the lock on */
+ const buf_block_t* block, /*!< in: block containing the record */
+ ulint heap_no)/*!< in: heap number of the record */
+{
+ ut_ad(lock_mutex_own());
+
+ for (lock_t* lock = lock_rec_get_first_on_page(hash, block); lock;
+ lock = lock_rec_get_next_on_page(lock)) {
+ if (lock_rec_get_nth_bit(lock, heap_no)) {
+ return(lock);
+ }
+ }
+
+ return(NULL);
+}
+
+/*********************************************************************//**
+Gets the nth bit of a record lock.
+@return TRUE if bit set also if i == ULINT_UNDEFINED return FALSE*/
+UNIV_INLINE
+ibool
+lock_rec_get_nth_bit(
+/*=================*/
+ const lock_t* lock, /*!< in: record lock */
+ ulint i) /*!< in: index of the bit */
+{
+ const byte* b;
+
+ ut_ad(lock);
+ ut_ad(lock_get_type_low(lock) == LOCK_REC);
+
+ if (i >= lock->un_member.rec_lock.n_bits) {
+
+ return(FALSE);
+ }
+
+ b = ((const byte*) &lock[1]) + (i / 8);
+
+ return(1 & *b >> (i % 8));
+}
+
+/*********************************************************************//**
+Gets the first or next record lock on a page.
+@return next lock, NULL if none exists */
+UNIV_INLINE
+const lock_t*
+lock_rec_get_next_on_page_const(
+/*============================*/
+ const lock_t* lock) /*!< in: a record lock */
+{
+ ut_ad(lock_mutex_own());
+ ut_ad(lock_get_type_low(lock) == LOCK_REC);
+
+ ulint space = lock->un_member.rec_lock.space;
+ ulint page_no = lock->un_member.rec_lock.page_no;
+
+ while ((lock = static_cast<const lock_t*>(HASH_GET_NEXT(hash, lock)))
+ != NULL) {
+
+ if (lock->un_member.rec_lock.space == space
+ && lock->un_member.rec_lock.page_no == page_no) {
+
+ return(lock);
+ }
+ }
+
+ return(NULL);
+}
+
+/*********************************************************************//**
+Gets the mode of a lock.
+@return mode */
+UNIV_INLINE
+enum lock_mode
+lock_get_mode(
+/*==========*/
+ const lock_t* lock) /*!< in: lock */
+{
+ ut_ad(lock);
+
+ return(static_cast<enum lock_mode>(lock->type_mode & LOCK_MODE_MASK));
+}
+
+/*********************************************************************//**
+Calculates if lock mode 1 is compatible with lock mode 2.
+@return nonzero if mode1 compatible with mode2 */
+UNIV_INLINE
+ulint
+lock_mode_compatible(
+/*=================*/
+ enum lock_mode mode1, /*!< in: lock mode */
+ enum lock_mode mode2) /*!< in: lock mode */
+{
+ ut_ad((ulint) mode1 < lock_types);
+ ut_ad((ulint) mode2 < lock_types);
+
+ return(lock_compatibility_matrix[mode1][mode2]);
+}
+
+/*********************************************************************//**
+Calculates if lock mode 1 is stronger or equal to lock mode 2.
+@return nonzero if mode1 stronger or equal to mode2 */
+UNIV_INLINE
+ulint
+lock_mode_stronger_or_eq(
+/*=====================*/
+ enum lock_mode mode1, /*!< in: lock mode */
+ enum lock_mode mode2) /*!< in: lock mode */
+{
+ ut_ad((ulint) mode1 < lock_types);
+ ut_ad((ulint) mode2 < lock_types);
+
+ return(lock_strength_matrix[mode1][mode2]);
+}
+
+/*********************************************************************//**
+Gets the wait flag of a lock.
+@return LOCK_WAIT if waiting, 0 if not */
+UNIV_INLINE
+ulint
+lock_get_wait(
+/*==========*/
+ const lock_t* lock) /*!< in: lock */
+{
+ ut_ad(lock);
+
+ return(lock->type_mode & LOCK_WAIT);
+}
+
+/*********************************************************************//**
+Looks for a suitable type record lock struct by the same trx on the same page.
+This can be used to save space when a new record lock should be set on a page:
+no new struct is needed, if a suitable old is found.
+@return lock or NULL */
+UNIV_INLINE
+lock_t*
+lock_rec_find_similar_on_page(
+/*==========================*/
+ ulint type_mode, /*!< in: lock type_mode field */
+ ulint heap_no, /*!< in: heap number of the record */
+ lock_t* lock, /*!< in: lock_rec_get_first_on_page() */
+ const trx_t* trx) /*!< in: transaction */
+{
+ ut_ad(lock_mutex_own());
+
+ for (/* No op */;
+ lock != NULL;
+ lock = lock_rec_get_next_on_page(lock)) {
+
+ if (lock->trx == trx
+ && lock->type_mode == type_mode
+ && lock_rec_get_n_bits(lock) > heap_no) {
+
+ return(lock);
+ }
+ }
+
+ return(NULL);
+}
+
+/*********************************************************************//**
+Checks if a transaction has the specified table lock, or stronger. This
+function should only be called by the thread that owns the transaction.
+@return lock or NULL */
+UNIV_INLINE
+const lock_t*
+lock_table_has(
+/*===========*/
+ const trx_t* trx, /*!< in: transaction */
+ const dict_table_t* table, /*!< in: table */
+ lock_mode in_mode)/*!< in: lock mode */
+{
+ /* Look for stronger locks the same trx already has on the table */
+
+ for (lock_list::const_iterator it = trx->lock.table_locks.begin(),
+ end = trx->lock.table_locks.end(); it != end; ++it) {
+
+ const lock_t* lock = *it;
+
+ if (lock == NULL) {
+ continue;
+ }
+
+ lock_mode mode = lock_get_mode(lock);
+
+ ut_ad(trx == lock->trx);
+ ut_ad(lock_get_type_low(lock) & LOCK_TABLE);
+ ut_ad(lock->un_member.tab_lock.table != NULL);
+
+ if (table == lock->un_member.tab_lock.table
+ && lock_mode_stronger_or_eq(mode, in_mode)) {
+
+ ut_ad(!lock_get_wait(lock));
+
+ return(lock);
+ }
+ }
+
+ return(NULL);
+}
+
/* vim: set filetype=c: */
diff --git a/storage/innobase/include/lock0types.h b/storage/innobase/include/lock0types.h
index 90c6f6cea4c..1ae319e6b79 100644
--- a/storage/innobase/include/lock0types.h
+++ b/storage/innobase/include/lock0types.h
@@ -1,6 +1,7 @@
/*****************************************************************************
-Copyright (c) 1996, 2009, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1996, 2015, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2018, 2019, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -23,12 +24,17 @@ The transaction lock system global types
Created 5/7/1996 Heikki Tuuri
*******************************************************/
+#include "dict0types.h"
+#include "ut0lst.h"
+
#ifndef lock0types_h
#define lock0types_h
#define lock_t ib_lock_t
+
struct lock_t;
struct lock_sys_t;
+struct lock_table_t;
/* Basic lock modes */
enum lock_mode {
@@ -43,5 +49,225 @@ enum lock_mode {
LOCK_NONE_UNSET = 255
};
+/** Convert the given enum value into string.
+@param[in] mode the lock mode
+@return human readable string of the given enum value */
+inline
+const char* lock_mode_string(enum lock_mode mode)
+{
+ switch (mode) {
+ case LOCK_IS:
+ return("LOCK_IS");
+ case LOCK_IX:
+ return("LOCK_IX");
+ case LOCK_S:
+ return("LOCK_S");
+ case LOCK_X:
+ return("LOCK_X");
+ case LOCK_AUTO_INC:
+ return("LOCK_AUTO_INC");
+ case LOCK_NONE:
+ return("LOCK_NONE");
+ case LOCK_NONE_UNSET:
+ return("LOCK_NONE_UNSET");
+ default:
+ ut_error;
+ }
+}
+
+/** A table lock */
+struct lock_table_t {
+ dict_table_t* table; /*!< database table in dictionary
+ cache */
+ UT_LIST_NODE_T(ib_lock_t)
+ locks; /*!< list of locks on the same
+ table */
+ /** Print the table lock into the given output stream
+ @param[in,out] out the output stream
+ @return the given output stream. */
+ std::ostream& print(std::ostream& out) const;
+};
+
+/** Record lock for a page */
+struct lock_rec_t {
+ ib_uint32_t space; /*!< space id */
+ ib_uint32_t page_no; /*!< page number */
+ ib_uint32_t n_bits; /*!< number of bits in the lock
+ bitmap; NOTE: the lock bitmap is
+ placed immediately after the
+ lock struct */
+
+ /** Print the record lock into the given output stream
+ @param[in,out] out the output stream
+ @return the given output stream. */
+ std::ostream& print(std::ostream& out) const;
+};
+
+/** Print the record lock into the given output stream
+@param[in,out] out the output stream
+@return the given output stream. */
+inline
+std::ostream& lock_rec_t::print(std::ostream& out) const
+{
+ out << "[lock_rec_t: space=" << space << ", page_no=" << page_no
+ << ", n_bits=" << n_bits << "]";
+ return(out);
+}
+
+inline
+std::ostream&
+operator<<(std::ostream& out, const lock_rec_t& lock)
+{
+ return(lock.print(out));
+}
+#define LOCK_MODE_MASK 0xFUL /*!< mask used to extract mode from the
+ type_mode field in a lock */
+/** Lock types */
+/* @{ */
+#define LOCK_TABLE 16U /*!< table lock */
+#define LOCK_REC 32U /*!< record lock */
+#define LOCK_TYPE_MASK 0xF0UL /*!< mask used to extract lock type from the
+ type_mode field in a lock */
+#if LOCK_MODE_MASK & LOCK_TYPE_MASK
+# error "LOCK_MODE_MASK & LOCK_TYPE_MASK"
#endif
+
+#define LOCK_WAIT 256U /*!< Waiting lock flag; when set, it
+ means that the lock has not yet been
+ granted, it is just waiting for its
+ turn in the wait queue */
+/* Precise modes */
+#define LOCK_ORDINARY 0 /*!< this flag denotes an ordinary
+ next-key lock in contrast to LOCK_GAP
+ or LOCK_REC_NOT_GAP */
+#define LOCK_GAP 512U /*!< when this bit is set, it means that the
+ lock holds only on the gap before the record;
+ for instance, an x-lock on the gap does not
+ give permission to modify the record on which
+ the bit is set; locks of this type are created
+ when records are removed from the index chain
+ of records */
+#define LOCK_REC_NOT_GAP 1024U /*!< this bit means that the lock is only on
+ the index record and does NOT block inserts
+ to the gap before the index record; this is
+ used in the case when we retrieve a record
+ with a unique key, and is also used in
+ locking plain SELECTs (not part of UPDATE
+ or DELETE) when the user has set the READ
+ COMMITTED isolation level */
+#define LOCK_INSERT_INTENTION 2048U/*!< this bit is set when we place a waiting
+ gap type record lock request in order to let
+ an insert of an index record to wait until
+ there are no conflicting locks by other
+ transactions on the gap; note that this flag
+ remains set when the waiting lock is granted,
+ or if the lock is inherited to a neighboring
+ record */
+#define LOCK_PREDICATE 8192U /*!< Predicate lock */
+#define LOCK_PRDT_PAGE 16384U /*!< Page lock */
+
+
+#if (LOCK_WAIT|LOCK_GAP|LOCK_REC_NOT_GAP|LOCK_INSERT_INTENTION|LOCK_PREDICATE|LOCK_PRDT_PAGE)&LOCK_MODE_MASK
+# error
+#endif
+#if (LOCK_WAIT|LOCK_GAP|LOCK_REC_NOT_GAP|LOCK_INSERT_INTENTION|LOCK_PREDICATE|LOCK_PRDT_PAGE)&LOCK_TYPE_MASK
+# error
+#endif
+/* @} */
+
+/** Lock struct; protected by lock_sys->mutex */
+struct ib_lock_t
+{
+ trx_t* trx; /*!< transaction owning the
+ lock */
+ UT_LIST_NODE_T(ib_lock_t)
+ trx_locks; /*!< list of the locks of the
+ transaction */
+
+ dict_index_t* index; /*!< index for a record lock */
+
+ ib_lock_t* hash; /*!< hash chain node for a record
+ lock. The link node in a singly linked
+ list, used during hashing. */
+
+ /** time(NULL) of the lock request creation.
+ Used for computing wait_time and diagnostics only.
+ Note: bogus durations may be reported
+ when the system time is adjusted! */
+ time_t requested_time;
+ /** Cumulated wait time in seconds.
+ Note: may be bogus when the system time is adjusted! */
+ ulint wait_time;
+
+ union {
+ lock_table_t tab_lock;/*!< table lock */
+ lock_rec_t rec_lock;/*!< record lock */
+ } un_member; /*!< lock details */
+
+ ib_uint32_t type_mode; /*!< lock type, mode, LOCK_GAP or
+ LOCK_REC_NOT_GAP,
+ LOCK_INSERT_INTENTION,
+ wait flag, ORed */
+
+ /** Determine if the lock object is a record lock.
+ @return true if record lock, false otherwise. */
+ bool is_record_lock() const
+ {
+ return(type() == LOCK_REC);
+ }
+
+ bool is_waiting() const
+ {
+ return(type_mode & LOCK_WAIT);
+ }
+
+ bool is_gap() const
+ {
+ return(type_mode & LOCK_GAP);
+ }
+
+ bool is_record_not_gap() const
+ {
+ return(type_mode & LOCK_REC_NOT_GAP);
+ }
+
+ bool is_insert_intention() const
+ {
+ return(type_mode & LOCK_INSERT_INTENTION);
+ }
+
+ ulint type() const {
+ return(type_mode & LOCK_TYPE_MASK);
+ }
+
+ enum lock_mode mode() const
+ {
+ return(static_cast<enum lock_mode>(type_mode & LOCK_MODE_MASK));
+ }
+
+ /** Print the lock object into the given output stream.
+ @param[in,out] out the output stream
+ @return the given output stream. */
+ std::ostream& print(std::ostream& out) const;
+
+ /** Convert the member 'type_mode' into a human readable string.
+ @return human readable string */
+ std::string type_mode_string() const;
+
+ const char* type_string() const
+ {
+ switch (type_mode & LOCK_TYPE_MASK) {
+ case LOCK_REC:
+ return("LOCK_REC");
+ case LOCK_TABLE:
+ return("LOCK_TABLE");
+ default:
+ ut_error;
+ }
+ }
+};
+
+typedef UT_LIST_BASE_NODE_T(ib_lock_t) trx_lock_list_t;
+
+#endif /* lock0types_h */
diff --git a/storage/innobase/include/log0crypt.h b/storage/innobase/include/log0crypt.h
index 97ceac7b8a7..7a14b022e66 100644
--- a/storage/innobase/include/log0crypt.h
+++ b/storage/innobase/include/log0crypt.h
@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (C) 2013, 2015, Google Inc. All Rights Reserved.
-Copyright (C) 2014, 2018, MariaDB Corporation.
+Copyright (C) 2014, 2017, MariaDB Corporation. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -22,28 +22,25 @@ Innodb log encrypt/decrypt
Created 11/25/2013 Minli Zhu
Modified Jan Lindström jan.lindstrom@mariadb.com
+MDEV-11782: Rewritten for MariaDB 10.2 by Marko Mäkelä, MariaDB Corporation.
*******************************************************/
#ifndef log0crypt_h
#define log0crypt_h
-#include "univ.i"
-#include "ut0byte.h"
-#include "my_crypt.h"
-#include "os0file.h"
+#include "log0log.h"
-typedef int Crypt_result;
-
-/* If true, enable redo log encryption. */
+/** innodb_encrypt_log: whether to encrypt the redo log */
extern my_bool srv_encrypt_log;
-/***********************************************************************
-Set next checkpoint's key version to latest one, and generate new key */
+/** Initialize the redo log encryption key and random parameters
+when creating a new redo log.
+The random parameters will be persisted in the log checkpoint pages.
+@see log_crypt_write_checkpoint_buf()
+@see log_crypt_read_checkpoint_buf()
+@return whether the operation succeeded */
UNIV_INTERN
-void
-log_crypt_set_ver_and_key(
-/*======================*/
- ib_uint64_t next_checkpoint_no);/*!< in: next checkpoint no */
-
+bool
+log_crypt_init();
/*********************************************************************//**
Writes the crypto (version, msg and iv) info, which has been used for
@@ -55,120 +52,70 @@ log_crypt_write_checkpoint_buf(
/*===========================*/
byte* buf); /*!< in/out: checkpoint buffer */
-/*********************************************************************//**
-Read the crypto (version, msg and iv) info, which has been used for
-log blocks with lsn <= this checkpoint's lsn, from a log header's
-checkpoint buf. */
+/** Read the MariaDB 10.1 checkpoint crypto (version, msg and iv) info.
+@param[in] buf checkpoint buffer
+@return whether the operation was successful */
UNIV_INTERN
bool
-log_crypt_read_checkpoint_buf(
-/*===========================*/
- const byte* buf); /*!< in: checkpoint buffer */
+log_crypt_101_read_checkpoint(const byte* buf);
-/********************************************************
-Encrypt one or more log block before it is flushed to disk */
-UNIV_INTERN
-void
-log_encrypt_before_write(
-/*=====================*/
- ib_uint64_t next_checkpoint_no, /*!< in: log group to be flushed */
- byte* block, /*!< in/out: pointer to a log block */
- lsn_t lsn, /*!< in: log sequence number of
- the start of the buffer */
- const ulint size); /*!< in: size of log blocks */
-
-/********************************************************
-Decrypt a specified log segment after they are read from a log file to a buffer.
-*/
-UNIV_INTERN
-void
-log_decrypt_after_read(
-/*===================*/
- byte* frame, /*!< in/out: log segment */
- lsn_t lsn, /*!< in: log sequence number of the start
- of the buffer */
- const ulint size); /*!< in: log segment size */
-
-/* Error codes for crypt info */
-typedef enum {
- LOG_UNENCRYPTED = 0,
- LOG_CRYPT_KEY_NOT_FOUND = 1,
- LOG_DECRYPT_MAYBE_FAILED = 2
-} log_crypt_err_t;
-
-/********************************************************
-Check is the checkpoint information encrypted. This check
-is based on fact has log group crypt info and based
-on this crypt info was the key version different from
-unencrypted key version. There is no realible way to
-distinguish encrypted log block from corrupted log block,
-but if log block corruption is found this function is
-used to find out if log block is maybe encrypted but
-encryption key, key management plugin or encryption
-algorithm does not match.
-@return TRUE, if log block may be encrypted */
+/** Decrypt a MariaDB 10.1 redo log block.
+@param[in,out] buf log block
+@return whether the decryption was successful */
UNIV_INTERN
-ibool
-log_crypt_block_maybe_encrypted(
-/*============================*/
- const byte* log_block, /*!< in: log block */
- log_crypt_err_t* err_info); /*!< out: error info */
-
-/********************************************************
-Print crypt error message to error log */
+bool
+log_crypt_101_read_block(byte* buf);
+
+/** Read the checkpoint crypto (version, msg and iv) info.
+@param[in] buf checkpoint buffer
+@return whether the operation was successful */
UNIV_INTERN
-void
-log_crypt_print_error(
-/*==================*/
- log_crypt_err_t err_info); /*!< out: error info */
+bool
+log_crypt_read_checkpoint_buf(const byte* buf);
-/*********************************************************************//**
-Print checkpoint no from log block and all encryption keys from
-checkpoints if they are present. Used for problem analysis. */
+/** Encrypt or decrypt log blocks.
+@param[in,out] buf log blocks to encrypt or decrypt
+@param[in] lsn log sequence number of the start of the buffer
+@param[in] size size of the buffer, in bytes
+@param[in] decrypt whether to decrypt instead of encrypting */
+UNIV_INTERN
void
-log_crypt_print_checkpoint_keys(
-/*============================*/
- const byte* log_block);
+log_crypt(byte* buf, lsn_t lsn, ulint size, bool decrypt = false);
-/** Encrypt temporary log block.
-@param[in] src_block block to encrypt or decrypt
+/** Encrypt or decrypt a temporary file block.
+@param[in] src block to encrypt or decrypt
@param[in] size size of the block
-@param[out] dst_block destination block
+@param[out] dst destination block
@param[in] offs offset to block
-@param[in] space_id tablespace id
-@return true if successfull, false in case of failure
-*/
+@param[in] encrypt true=encrypt; false=decrypt
+@return whether the operation succeeded */
UNIV_INTERN
bool
log_tmp_block_encrypt(
- const byte* src_block,
- ulint size,
- byte* dst_block,
- os_offset_t offs,
- ulint space_id)
- MY_ATTRIBUTE((warn_unused_result));
-
-/** Decrypt temporary log block.
-@param[in] src_block block to encrypt or decrypt
+ const byte* src,
+ ulint size,
+ byte* dst,
+ uint64_t offs,
+ bool encrypt = true)
+ MY_ATTRIBUTE((warn_unused_result, nonnull));
+
+/** Decrypt a temporary file block.
+@param[in] src block to decrypt
@param[in] size size of the block
-@param[out] dst_block destination block
+@param[out] dst destination block
@param[in] offs offset to block
-@param[in] space_id tablespace id
-@return true if successfull, false in case of failure
-*/
-UNIV_INTERN
+@return whether the operation succeeded */
+inline
bool
log_tmp_block_decrypt(
- const byte* src_block,
- ulint size,
- byte* dst_block,
- os_offset_t offs,
- ulint space_id)
- MY_ATTRIBUTE((warn_unused_result));
-
-/** Find out is temporary log files encrypted.
-@return true if temporary log file should be encrypted, false if not */
-UNIV_INTERN
-bool
-log_tmp_is_encrypted() MY_ATTRIBUTE((warn_unused_result));
+ const byte* src,
+ ulint size,
+ byte* dst,
+ uint64_t offs)
+{
+ return(log_tmp_block_encrypt(src, size, dst, offs, false));
+}
+
+/** @return whether temporary files are encrypted */
+inline bool log_tmp_is_encrypted() { return srv_encrypt_log; }
#endif // log0crypt.h
diff --git a/storage/innobase/include/log0log.h b/storage/innobase/include/log0log.h
index a30bf563d11..0fd983a1a10 100644
--- a/storage/innobase/include/log0log.h
+++ b/storage/innobase/include/log0log.h
@@ -1,8 +1,8 @@
/*****************************************************************************
-Copyright (c) 1995, 2013, Oracle and/or its affiliates. All rights reserved.
+Copyright (c) 1995, 2017, Oracle and/or its affiliates. All rights reserved.
Copyright (c) 2009, Google Inc.
-Copyright (c) 2017, MariaDB Corporation. All Rights Reserved.
+Copyright (c) 2017, 2019, MariaDB Corporation.
Portions of this file contain modifications contributed and copyrighted by
Google, Inc. Those modifications are gratefully acknowledged and are described
@@ -34,77 +34,45 @@ Created 12/9/1995 Heikki Tuuri
#ifndef log0log_h
#define log0log_h
-#include "univ.i"
-#include "ut0byte.h"
-#include "ut0lst.h"
-#ifndef UNIV_HOTBACKUP
-#include "sync0sync.h"
+#include "dyn0buf.h"
#include "sync0rw.h"
-#endif /* !UNIV_HOTBACKUP */
-#include "log0crypt.h"
+#include "log0types.h"
+#include "os0event.h"
+#include "os0file.h"
-#define LSN_MAX IB_UINT64_MAX
-
-#define LSN_PF UINT64PF
+#ifndef UINT32_MAX
+#define UINT32_MAX (4294967295U)
+#endif
-/** Redo log buffer */
-struct log_t;
/** Redo log group */
struct log_group_t;
-#ifdef UNIV_DEBUG
-/** Flag: write to log file? */
-extern ibool log_do_write;
-/** Flag: enable debug output when writing to the log? */
-extern ibool log_debug_writes;
-#else /* UNIV_DEBUG */
-/** Write to log */
-# define log_do_write TRUE
-#endif /* UNIV_DEBUG */
-
-/** Wait modes for log_write_up_to @{ */
-#define LOG_NO_WAIT 91
-#define LOG_WAIT_ONE_GROUP 92
-#define LOG_WAIT_ALL_GROUPS 93
-/* @} */
-/** Maximum number of log groups in log_group_t::checkpoint_buf */
-#define LOG_MAX_N_GROUPS 32
+/** Magic value to use instead of log checksums when they are disabled */
+#define LOG_NO_CHECKSUM_MAGIC 0xDEADBEEFUL
-/*******************************************************************//**
-Calculates where in log files we find a specified lsn.
-@return log file number */
-UNIV_INTERN
-ulint
-log_calc_where_lsn_is(
-/*==================*/
- ib_int64_t* log_file_offset, /*!< out: offset in that file
- (including the header) */
- ib_uint64_t first_header_lsn, /*!< in: first log file start
- lsn */
- ib_uint64_t lsn, /*!< in: lsn whose position to
- determine */
- ulint n_log_files, /*!< in: total number of log
- files */
- ib_int64_t log_file_size); /*!< in: log file size
- (including the header) */
-#ifndef UNIV_HOTBACKUP
-/************************************************************//**
-Writes to the log the string given. The log must be released with
-log_release.
-@return end lsn of the log record, zero if did not succeed */
+/* Margin for the free space in the smallest log group, before a new query
+step which modifies the database, is started */
+
+#define LOG_CHECKPOINT_FREE_PER_THREAD (4 * UNIV_PAGE_SIZE)
+#define LOG_CHECKPOINT_EXTRA_FREE (8 * UNIV_PAGE_SIZE)
+
+typedef ulint (*log_checksum_func_t)(const byte* log_block);
+
+/** Pointer to the log checksum calculation function. Protected with
+log_sys->mutex. */
+extern log_checksum_func_t log_checksum_algorithm_ptr;
+
+/** Append a string to the log.
+@param[in] str string
+@param[in] len string length
+@param[out] start_lsn start LSN of the log record
+@return end lsn of the log record, zero if did not succeed */
UNIV_INLINE
lsn_t
log_reserve_and_write_fast(
-/*=======================*/
- const void* str, /*!< in: string */
- ulint len, /*!< in: string length */
- lsn_t* start_lsn);/*!< out: start lsn of the log record */
-/***********************************************************************//**
-Releases the log mutex. */
-UNIV_INLINE
-void
-log_release(void);
-/*=============*/
+ const void* str,
+ ulint len,
+ lsn_t* start_lsn);
/***********************************************************************//**
Checks if there is need for a log buffer flush or a new checkpoint, and does
this if yes. Any database operation should call this when it has modified
@@ -114,34 +82,45 @@ UNIV_INLINE
void
log_free_check(void);
/*================*/
-/************************************************************//**
-Opens the log for log_write_low. The log must be closed with log_close and
-released with log_release.
-@return start lsn of the log record */
-UNIV_INTERN
+
+/** Extends the log buffer.
+@param[in] len requested minimum size in bytes */
+void
+log_buffer_extend(
+ ulint len);
+
+/** Check margin not to overwrite transaction log from the last checkpoint.
+If would estimate the log write to exceed the log_group_capacity,
+waits for the checkpoint is done enough.
+@param[in] len length of the data to be written */
+
+void
+log_margin_checkpoint_age(
+ ulint len);
+
+/** Open the log for log_write_low. The log must be closed with log_close.
+@param[in] len length of the data to be written
+@return start lsn of the log record */
lsn_t
log_reserve_and_open(
-/*=================*/
- ulint len); /*!< in: length of data to be catenated */
+ ulint len);
/************************************************************//**
Writes to the log the string given. It is assumed that the caller holds the
log mutex. */
-UNIV_INTERN
void
log_write_low(
/*==========*/
- byte* str, /*!< in: string */
- ulint str_len); /*!< in: string length */
+ const byte* str, /*!< in: string */
+ ulint str_len); /*!< in: string length */
/************************************************************//**
Closes the log.
-@return lsn */
-UNIV_INTERN
+@return lsn */
lsn_t
log_close(void);
/*===========*/
/************************************************************//**
Gets the current lsn.
-@return current lsn */
+@return current lsn */
UNIV_INLINE
lsn_t
log_get_lsn(void);
@@ -163,7 +142,7 @@ log_get_flush_lsn(void);
/****************************************************************
Gets the log group capacity. It is OK to read the value without
holding log_sys->mutex because it is constant.
-@return log group capacity */
+@return log group capacity */
UNIV_INLINE
lsn_t
log_get_capacity(void);
@@ -171,37 +150,31 @@ log_get_capacity(void);
/****************************************************************
Get log_sys::max_modified_age_async. It is OK to read the value without
holding log_sys::mutex because it is constant.
-@return max_modified_age_async */
+@return max_modified_age_async */
UNIV_INLINE
lsn_t
log_get_max_modified_age_async(void);
/*================================*/
-/******************************************************//**
-Initializes the log. */
-UNIV_INTERN
+/** Initializes the redo logging subsystem. */
void
-log_init(void);
-/*==========*/
-/******************************************************************//**
-Inits a log group to the log system. */
-UNIV_INTERN
+log_sys_init();
+
+/** Initialize the redo log.
+@param[in] n_files number of files */
void
-log_group_init(
-/*===========*/
- ulint id, /*!< in: group id */
- ulint n_files, /*!< in: number of log files */
- lsn_t file_size, /*!< in: log file size in bytes */
- ulint space_id, /*!< in: space id of the file space
- which contains the log files of this
- group */
- ulint archive_space_id); /*!< in: space id of the file space
- which contains some archived log
- files for this group; currently, only
- for the first log group this is
- used */
+log_init(ulint n_files);
+/** Calculate the recommended highest values for lsn - last_checkpoint_lsn
+and lsn - buf_get_oldest_modification().
+@param[in] file_size requested innodb_log_file_size
+@retval true on success
+@retval false if the smallest log group is too small to
+accommodate the number of OS threads in the database server */
+bool
+log_set_capacity(ulonglong file_size)
+ MY_ATTRIBUTE((warn_unused_result));
+
/******************************************************//**
Completes an i/o to a log file. */
-UNIV_INTERN
void
log_io_complete(
/*============*/
@@ -211,227 +184,88 @@ This function is called, e.g., when a transaction wants to commit. It checks
that the log has been written to the log file up to the last log entry written
by the transaction. If there is a flush running, it waits and checks if the
flush flushed enough. If not, starts a new flush. */
-UNIV_INTERN
void
log_write_up_to(
/*============*/
lsn_t lsn, /*!< in: log sequence number up to which
the log should be written, LSN_MAX if not specified */
- ulint wait, /*!< in: LOG_NO_WAIT, LOG_WAIT_ONE_GROUP,
- or LOG_WAIT_ALL_GROUPS */
- ibool flush_to_disk);
- /*!< in: TRUE if we want the written log
+ bool flush_to_disk);
+ /*!< in: true if we want the written log
also to be flushed to disk */
-/****************************************************************//**
-Does a syncronous flush of the log buffer to disk. */
-UNIV_INTERN
+/** write to the log file up to the last log entry.
+@param[in] sync whether we want the written log
+also to be flushed to disk. */
void
-log_buffer_flush_to_disk(void);
-/*==========================*/
+log_buffer_flush_to_disk(
+ bool sync = true);
/****************************************************************//**
This functions writes the log buffer to the log file and if 'flush'
is set it forces a flush of the log file as well. This is meant to be
called from background master thread only as it does not wait for
the write (+ possible flush) to finish. */
-UNIV_INTERN
void
log_buffer_sync_in_background(
/*==========================*/
- ibool flush); /*<! in: flush the logs to disk */
-/******************************************************//**
-Makes a checkpoint. Note that this function does not flush dirty
+ bool flush); /*<! in: flush the logs to disk */
+/** Make a checkpoint. Note that this function does not flush dirty
blocks from the buffer pool: it only checks what is lsn of the oldest
modification in the pool, and writes information about the lsn in
-log files. Use log_make_checkpoint_at to flush also the pool.
-@return TRUE if success, FALSE if a checkpoint write was already running */
-UNIV_INTERN
-ibool
-log_checkpoint(
-/*===========*/
- ibool sync, /*!< in: TRUE if synchronous operation is
- desired */
- ibool write_always); /*!< in: the function normally checks if the
- the new checkpoint would have a greater
- lsn than the previous one: if not, then no
- physical write is done; by setting this
- parameter TRUE, a physical write will always be
- made to log files */
-/****************************************************************//**
-Makes a checkpoint at a given lsn or later. */
-UNIV_INTERN
-void
-log_make_checkpoint_at(
-/*===================*/
- lsn_t lsn, /*!< in: make a checkpoint at this or a
- later lsn, if LSN_MAX, makes
- a checkpoint at the latest lsn */
- ibool write_always); /*!< in: the function normally checks if
- the new checkpoint would have a
- greater lsn than the previous one: if
- not, then no physical write is done;
- by setting this parameter TRUE, a
- physical write will always be made to
- log files */
+log files. Use log_make_checkpoint() to flush also the pool.
+@param[in] sync whether to wait for the write to complete
+@return true if success, false if a checkpoint write was already running */
+bool log_checkpoint(bool sync);
+
+/** Make a checkpoint */
+void log_make_checkpoint();
+
/****************************************************************//**
Makes a checkpoint at the latest lsn and writes it to first page of each
data file in the database, so that we know that the file spaces contain
all modifications up to that lsn. This can only be called at database
shutdown. This function also writes all log in log files to the log archive. */
-UNIV_INTERN
void
logs_empty_and_mark_files_at_shutdown(void);
/*=======================================*/
-/******************************************************//**
-Reads a checkpoint info from a log group header to log_sys->checkpoint_buf. */
-UNIV_INTERN
-void
-log_group_read_checkpoint_info(
-/*===========================*/
- log_group_t* group, /*!< in: log group */
- ulint field); /*!< in: LOG_CHECKPOINT_1 or LOG_CHECKPOINT_2 */
-/*******************************************************************//**
-Gets info from a checkpoint about a log group. */
-UNIV_INTERN
+/** Read a log group header page to log_sys->checkpoint_buf.
+@param[in] group log group
+@param[in] header 0 or LOG_CHEKCPOINT_1 or LOG_CHECKPOINT2 */
void
-log_checkpoint_get_nth_group_info(
-/*==============================*/
- const byte* buf, /*!< in: buffer containing checkpoint info */
- ulint n, /*!< in: nth slot */
- ulint* file_no,/*!< out: archived file number */
- ulint* offset);/*!< out: archived file offset */
-/******************************************************//**
-Writes checkpoint info to groups. */
-UNIV_INTERN
+log_group_header_read(
+ const log_group_t* group,
+ ulint header);
+/** Write checkpoint info to the log header and invoke log_mutex_exit().
+@param[in] sync whether to wait for the write to complete
+@param[in] end_lsn start LSN of the MLOG_CHECKPOINT mini-transaction */
void
-log_groups_write_checkpoint_info(void);
-/*==================================*/
-/********************************************************************//**
-Starts an archiving operation.
-@return TRUE if succeed, FALSE if an archiving operation was already running */
-UNIV_INTERN
-ibool
-log_archive_do(
-/*===========*/
- ibool sync, /*!< in: TRUE if synchronous operation is desired */
- ulint* n_bytes);/*!< out: archive log buffer size, 0 if nothing to
- archive */
-/****************************************************************//**
-Writes the log contents to the archive up to the lsn when this function was
-called, and stops the archiving. When archiving is started again, the archived
-log file numbers start from a number one higher, so that the archiving will
-not write again to the archived log files which exist when this function
-returns.
-@return DB_SUCCESS or DB_ERROR */
-UNIV_INTERN
-ulint
-log_archive_stop(void);
-/*==================*/
-/****************************************************************//**
-Starts again archiving which has been stopped.
-@return DB_SUCCESS or DB_ERROR */
-UNIV_INTERN
-ulint
-log_archive_start(void);
-/*===================*/
-/****************************************************************//**
-Stop archiving the log so that a gap may occur in the archived log files.
-@return DB_SUCCESS or DB_ERROR */
-UNIV_INTERN
-ulint
-log_archive_noarchivelog(void);
-/*==========================*/
-/****************************************************************//**
-Start archiving the log so that a gap may occur in the archived log files.
-@return DB_SUCCESS or DB_ERROR */
-UNIV_INTERN
-ulint
-log_archive_archivelog(void);
-/*========================*/
-/******************************************************//**
-Generates an archived log file name. */
-UNIV_INTERN
-void
-log_archived_file_name_gen(
-/*=======================*/
- char* buf, /*!< in: buffer where to write */
- ulint id, /*!< in: group id */
- ulint file_no);/*!< in: file number */
-#else /* !UNIV_HOTBACKUP */
-/******************************************************//**
-Writes info to a buffer of a log group when log files are created in
-backup restoration. */
-UNIV_INTERN
-void
-log_reset_first_header_and_checkpoint(
-/*==================================*/
- byte* hdr_buf,/*!< in: buffer which will be written to the
- start of the first log file */
- ib_uint64_t start); /*!< in: lsn of the start of the first log file;
- we pretend that there is a checkpoint at
- start + LOG_BLOCK_HDR_SIZE */
-#endif /* !UNIV_HOTBACKUP */
-/********************************************************************//**
+log_write_checkpoint_info(bool sync, lsn_t end_lsn);
+
+/** Set extra data to be written to the redo log during checkpoint.
+@param[in] buf data to be appended on checkpoint, or NULL
+@return pointer to previous data to be appended on checkpoint */
+mtr_buf_t*
+log_append_on_checkpoint(
+ mtr_buf_t* buf);
+/**
Checks that there is enough free space in the log to start a new query step.
Flushes the log buffer or makes a new checkpoint if necessary. NOTE: this
function may only be called if the calling thread owns no synchronization
objects! */
-UNIV_INTERN
void
log_check_margins(void);
-/*===================*/
-#ifndef UNIV_HOTBACKUP
-/******************************************************//**
-Reads a specified log segment to a buffer. */
-UNIV_INTERN
-void
-log_group_read_log_seg(
-/*===================*/
- ulint type, /*!< in: LOG_ARCHIVE or LOG_RECOVER */
- byte* buf, /*!< in: buffer where to read */
- log_group_t* group, /*!< in: log group */
- lsn_t start_lsn, /*!< in: read area start */
- lsn_t end_lsn); /*!< in: read area end */
-/******************************************************//**
-Writes a buffer to a log file group. */
-UNIV_INTERN
-void
-log_group_write_buf(
-/*================*/
- log_group_t* group, /*!< in: log group */
- byte* buf, /*!< in: buffer */
- ulint len, /*!< in: buffer len; must be divisible
- by OS_FILE_LOG_BLOCK_SIZE */
- lsn_t start_lsn, /*!< in: start lsn of the buffer; must
- be divisible by
- OS_FILE_LOG_BLOCK_SIZE */
- ulint new_data_offset);/*!< in: start offset of new data in
- buf: this parameter is used to decide
- if we have to write a new log file
- header */
+
/********************************************************//**
Sets the field values in group to correspond to a given lsn. For this function
to work, the values must already be correctly initialized to correspond to
some lsn, for instance, a checkpoint lsn. */
-UNIV_INTERN
void
log_group_set_fields(
/*=================*/
log_group_t* group, /*!< in/out: group */
lsn_t lsn); /*!< in: lsn for which the values should be
set */
-/******************************************************//**
-Calculates the data capacity of a log group, when the log file headers are not
-included.
-@return capacity in bytes */
-UNIV_INTERN
-lsn_t
-log_group_get_capacity(
-/*===================*/
- const log_group_t* group); /*!< in: log group */
-#endif /* !UNIV_HOTBACKUP */
/************************************************************//**
Gets a log block flush bit.
-@return TRUE if this block was the first to be written in a log flush */
+@return TRUE if this block was the first to be written in a log flush */
UNIV_INLINE
ibool
log_block_get_flush_bit(
@@ -439,7 +273,7 @@ log_block_get_flush_bit(
const byte* log_block); /*!< in: log block */
/************************************************************//**
Gets a log block number stored in the header.
-@return log block number stored in the block header */
+@return log block number stored in the block header */
UNIV_INLINE
ulint
log_block_get_hdr_no(
@@ -447,7 +281,7 @@ log_block_get_hdr_no(
const byte* log_block); /*!< in: log block */
/************************************************************//**
Gets a log block data length.
-@return log block data length measured as a byte offset from the block start */
+@return log block data length measured as a byte offset from the block start */
UNIV_INLINE
ulint
log_block_get_data_len(
@@ -463,15 +297,31 @@ log_block_set_data_len(
ulint len); /*!< in: data length */
/************************************************************//**
Calculates the checksum for a log block.
-@return checksum */
+@return checksum */
UNIV_INLINE
ulint
log_block_calc_checksum(
/*====================*/
const byte* block); /*!< in: log block */
+
+/** Calculates the checksum for a log block using the CRC32 algorithm.
+@param[in] block log block
+@return checksum */
+UNIV_INLINE
+ulint
+log_block_calc_checksum_crc32(
+ const byte* block);
+
+/** Calculates the checksum for a log block using the "no-op" algorithm.
+@param[in] block the redo log block
+@return the calculated checksum value */
+UNIV_INLINE
+ulint
+log_block_calc_checksum_none(const byte* block);
+
/************************************************************//**
Gets a log block checksum field value.
-@return checksum */
+@return checksum */
UNIV_INLINE
ulint
log_block_get_checksum(
@@ -504,7 +354,7 @@ log_block_set_first_rec_group(
ulint offset); /*!< in: offset, 0 if none */
/************************************************************//**
Gets a log block checkpoint number field (4 lowest bytes).
-@return checkpoint no (4 lowest bytes) */
+@return checkpoint no (4 lowest bytes) */
UNIV_INLINE
ulint
log_block_get_checkpoint_no(
@@ -519,17 +369,8 @@ log_block_init(
byte* log_block, /*!< in: pointer to the log buffer */
lsn_t lsn); /*!< in: lsn within the log block */
/************************************************************//**
-Initializes a log block in the log buffer in the old, < 3.23.52 format, where
-there was no checksum yet. */
-UNIV_INLINE
-void
-log_block_init_in_old_format(
-/*=========================*/
- byte* log_block, /*!< in: pointer to the log buffer */
- lsn_t lsn); /*!< in: lsn within the log block */
-/************************************************************//**
Converts a lsn to a log block number.
-@return log block number, it is > 0 and <= 1G */
+@return log block number, it is > 0 and <= 1G */
UNIV_INLINE
ulint
log_block_convert_lsn_to_no(
@@ -537,60 +378,41 @@ log_block_convert_lsn_to_no(
lsn_t lsn); /*!< in: lsn of a byte within the block */
/******************************************************//**
Prints info of the log. */
-UNIV_INTERN
void
log_print(
/*======*/
FILE* file); /*!< in: file where to print */
/******************************************************//**
Peeks the current lsn.
-@return TRUE if success, FALSE if could not get the log system mutex */
-UNIV_INTERN
+@return TRUE if success, FALSE if could not get the log system mutex */
ibool
log_peek_lsn(
/*=========*/
lsn_t* lsn); /*!< out: if returns TRUE, current lsn is here */
/**********************************************************************//**
Refreshes the statistics used to print per-second averages. */
-UNIV_INTERN
void
log_refresh_stats(void);
/*===================*/
/********************************************************//**
Closes all log groups. */
-UNIV_INTERN
void
log_group_close_all(void);
/*=====================*/
-/********************************************************//**
-Shutdown the log system but do not release all the memory. */
-UNIV_INTERN
+/** Shut down the redo log subsystem. */
void
-log_shutdown(void);
-/*==============*/
-/********************************************************//**
-Free the log system data structures. */
-UNIV_INTERN
-void
-log_mem_free(void);
-/*==============*/
+log_shutdown();
-extern log_t* log_sys;
+/** Whether to generate and require checksums on the redo log pages */
+extern my_bool innodb_log_checksums;
/* Values used as flags */
#define LOG_FLUSH 7652559
#define LOG_CHECKPOINT 78656949
-#ifdef UNIV_LOG_ARCHIVE
-# define LOG_ARCHIVE 11122331
-#endif /* UNIV_LOG_ARCHIVE */
-#define LOG_RECOVER 98887331
/* The counting of lsn's starts from this value: this must be non-zero */
#define LOG_START_LSN ((lsn_t) (16 * OS_FILE_LOG_BLOCK_SIZE))
-#define LOG_BUFFER_SIZE (srv_log_buffer_size * UNIV_PAGE_SIZE)
-#define LOG_ARCHIVE_BUF_SIZE (srv_log_buffer_size * UNIV_PAGE_SIZE / 4)
-
/* Offsets of a log block header */
#define LOG_BLOCK_HDR_NO 0 /* block number which must be > 0 and
is allowed to wrap around at 2G; the
@@ -630,92 +452,72 @@ extern log_t* log_sys;
.._HDR_NO */
#define LOG_BLOCK_TRL_SIZE 4 /* trailer size in bytes */
-/* Offsets for a checkpoint field */
+/** Offsets inside the checkpoint pages (redo log format version 1) @{ */
+/** Checkpoint number */
#define LOG_CHECKPOINT_NO 0
+/** Log sequence number up to which all changes have been flushed */
#define LOG_CHECKPOINT_LSN 8
-#define LOG_CHECKPOINT_OFFSET_LOW32 16
-#define LOG_CHECKPOINT_LOG_BUF_SIZE 20
-#define LOG_CHECKPOINT_ARCHIVED_LSN 24
-#define LOG_CHECKPOINT_GROUP_ARRAY 32
-
-/* For each value smaller than LOG_MAX_N_GROUPS the following 8 bytes: */
-
-#define LOG_CHECKPOINT_ARCHIVED_FILE_NO 0
-#define LOG_CHECKPOINT_ARCHIVED_OFFSET 4
-
-#define LOG_CHECKPOINT_ARRAY_END (LOG_CHECKPOINT_GROUP_ARRAY\
- + LOG_MAX_N_GROUPS * 8)
-#define LOG_CHECKPOINT_CHECKSUM_1 LOG_CHECKPOINT_ARRAY_END
-#define LOG_CHECKPOINT_CHECKSUM_2 (4 + LOG_CHECKPOINT_ARRAY_END)
-#if 0
-#define LOG_CHECKPOINT_FSP_FREE_LIMIT (8 + LOG_CHECKPOINT_ARRAY_END)
- /*!< Not used (0);
- This used to contain the
- current fsp free limit in
- tablespace 0, in units of one
- megabyte.
-
- This information might have been used
- since mysqlbackup version 0.35 but
- before 1.41 to decide if unused ends of
- non-auto-extending data files
- in space 0 can be truncated.
-
- This information was made obsolete
- by mysqlbackup --compress. */
-#define LOG_CHECKPOINT_FSP_MAGIC_N (12 + LOG_CHECKPOINT_ARRAY_END)
- /*!< Not used (0);
- This magic number tells if the
- checkpoint contains the above field:
- the field was added to
- InnoDB-3.23.50 and
- removed from MySQL 5.6 */
-#define LOG_CHECKPOINT_FSP_MAGIC_N_VAL 1441231243
- /*!< if LOG_CHECKPOINT_FSP_MAGIC_N
- contains this value, then
- LOG_CHECKPOINT_FSP_FREE_LIMIT
- is valid */
-#endif
-#define LOG_CHECKPOINT_OFFSET_HIGH32 (16 + LOG_CHECKPOINT_ARRAY_END)
-#define LOG_CRYPT_VER (20 + LOG_CHECKPOINT_ARRAY_END)
-
-#define LOG_CRYPT_MAX_ENTRIES (5)
-#define LOG_CRYPT_ENTRY_SIZE (4 + 4 + 2 * MY_AES_BLOCK_SIZE)
-#define LOG_CRYPT_SIZE (1 + 1 + \
- (LOG_CRYPT_MAX_ENTRIES * \
- LOG_CRYPT_ENTRY_SIZE))
-
-#define LOG_CHECKPOINT_SIZE (20 + LOG_CHECKPOINT_ARRAY_END + \
- LOG_CRYPT_SIZE)
-
-/* Offsets of a log file header */
-#define LOG_GROUP_ID 0 /* log group number */
-#define LOG_FILE_START_LSN 4 /* lsn of the start of data in this
- log file */
-#define LOG_FILE_NO 12 /* 4-byte archived log file number;
- this field is only defined in an
- archived log file */
-#define LOG_FILE_WAS_CREATED_BY_HOT_BACKUP 16
- /* a 32-byte field which contains
- the string 'ibbackup' and the
- creation time if the log file was
- created by mysqlbackup --restore;
- when mysqld is first time started
- on the restored database, it can
- print helpful info for the user */
-#define LOG_FILE_ARCH_COMPLETED OS_FILE_LOG_BLOCK_SIZE
- /* this 4-byte field is TRUE when
- the writing of an archived log file
- has been completed; this field is
- only defined in an archived log file */
-#define LOG_FILE_END_LSN (OS_FILE_LOG_BLOCK_SIZE + 4)
- /* lsn where the archived log file
- at least extends: actually the
- archived log file may extend to a
- later lsn, as long as it is within the
- same log block as this lsn; this field
- is defined only when an archived log
- file has been completely written */
+/** Byte offset of the log record corresponding to LOG_CHECKPOINT_LSN */
+#define LOG_CHECKPOINT_OFFSET 16
+/** log_sys_t::buf_size at the time of the checkpoint (not used) */
+#define LOG_CHECKPOINT_LOG_BUF_SIZE 24
+/** MariaDB 10.2.5 encrypted redo log encryption key version (32 bits)*/
+#define LOG_CHECKPOINT_CRYPT_KEY 32
+/** MariaDB 10.2.5 encrypted redo log random nonce (32 bits) */
+#define LOG_CHECKPOINT_CRYPT_NONCE 36
+/** MariaDB 10.2.5 encrypted redo log random message (MY_AES_BLOCK_SIZE) */
+#define LOG_CHECKPOINT_CRYPT_MESSAGE 40
+/** start LSN of the MLOG_CHECKPOINT mini-transaction corresponding
+to this checkpoint, or 0 if the information has not been written */
+#define LOG_CHECKPOINT_END_LSN OS_FILE_LOG_BLOCK_SIZE - 16
+
+/* @} */
+
+/** Offsets of a log file header */
+/* @{ */
+/** Log file header format identifier (32-bit unsigned big-endian integer).
+This used to be called LOG_GROUP_ID and always written as 0,
+because InnoDB never supported more than one copy of the redo log. */
+#define LOG_HEADER_FORMAT 0
+/** Redo log subformat (originally 0). In format version 0, the
+LOG_FILE_START_LSN started here, 4 bytes earlier than LOG_HEADER_START_LSN,
+which the LOG_FILE_START_LSN was renamed to.
+Subformat 1 is for the fully redo-logged TRUNCATE
+(no MLOG_TRUNCATE records or extra log checkpoints or log files) */
+#define LOG_HEADER_SUBFORMAT 4
+/** LSN of the start of data in this log file (with format version 1;
+in format version 0, it was called LOG_FILE_START_LSN and at offset 4). */
+#define LOG_HEADER_START_LSN 8
+/** A null-terminated string which will contain either the string 'ibbackup'
+and the creation time if the log file was created by mysqlbackup --restore,
+or the MySQL version that created the redo log file. */
+#define LOG_HEADER_CREATOR 16
+/** End of the log file creator field. */
+#define LOG_HEADER_CREATOR_END (LOG_HEADER_CREATOR + 32)
+/** Contents of the LOG_HEADER_CREATOR field */
+#define LOG_HEADER_CREATOR_CURRENT \
+ "MariaDB " \
+ IB_TO_STR(MYSQL_VERSION_MAJOR) "." \
+ IB_TO_STR(MYSQL_VERSION_MINOR) "." \
+ IB_TO_STR(MYSQL_VERSION_PATCH)
+
+/** The redo log format identifier corresponding to the current format version.
+Stored in LOG_HEADER_FORMAT.
+To prevent crash-downgrade to earlier 10.2 due to the inability to
+roll back a retroactively introduced TRX_UNDO_RENAME_TABLE undo log record,
+MariaDB 10.2.18 and later will use the 10.3 format, but LOG_HEADER_SUBFORMAT
+1 instead of 0. MariaDB 10.3 will use subformat 0 (5.7-style TRUNCATE) or 2
+(MDEV-13564 backup-friendly TRUNCATE). */
+#define LOG_HEADER_FORMAT_10_3 103
+/** The old MariaDB 10.2.2..10.2.17 log format */
+#define LOG_HEADER_FORMAT_10_2 1
+/** Future MariaDB 10.4 log format */
+#define LOG_HEADER_FORMAT_10_4 104
+/** Encrypted MariaDB redo log */
+#define LOG_HEADER_FORMAT_ENCRYPTED (1U<<31)
+
+/* @} */
+
#define LOG_CHECKPOINT_1 OS_FILE_LOG_BLOCK_SIZE
/* first checkpoint field in the log
header; we write alternately to the
@@ -727,74 +529,83 @@ extern log_t* log_sys;
header */
#define LOG_FILE_HDR_SIZE (4 * OS_FILE_LOG_BLOCK_SIZE)
-#define LOG_GROUP_OK 301
-#define LOG_GROUP_CORRUPTED 302
+/* As long as fil_io() is used to handle log io, log group max size is limited
+by (maximum page number) * (minimum page size). Page number type is uint32_t.
+Remove this limitation if page number is no longer used for log file io. */
+static const ulonglong log_group_max_size =
+ ((ulonglong(UINT32_MAX) + 1) * UNIV_PAGE_SIZE_MIN - 1);
+
+/** The state of a log group */
+enum log_group_state_t {
+ /** No corruption detected */
+ LOG_GROUP_OK,
+ /** Corrupted */
+ LOG_GROUP_CORRUPTED
+};
+
+typedef ib_mutex_t LogSysMutex;
+typedef ib_mutex_t FlushOrderMutex;
/** Log group consists of a number of log files, each of the same size; a log
-group is implemented as a space in the sense of the module fil0fil. */
+group is implemented as a space in the sense of the module fil0fil.
+Currently, this is only protected by log_sys->mutex. However, in the case
+of log_write_up_to(), we will access some members only with the protection
+of log_sys->write_mutex, which should affect nothing for now. */
struct log_group_t{
- /* The following fields are protected by log_sys->mutex */
- ulint id; /*!< log group id */
- ulint n_files; /*!< number of files in the group */
- lsn_t file_size; /*!< individual log file size in bytes,
- including the log file header */
- ulint space_id; /*!< file space which implements the log
- group */
- ulint state; /*!< LOG_GROUP_OK or
- LOG_GROUP_CORRUPTED */
- lsn_t lsn; /*!< lsn used to fix coordinates within
- the log group */
- lsn_t lsn_offset; /*!< the offset of the above lsn */
- ulint n_pending_writes;/*!< number of currently pending flush
- writes for this log group */
- byte** file_header_bufs_ptr;/*!< unaligned buffers */
- byte** file_header_bufs;/*!< buffers for each file
- header in the group */
-#ifdef UNIV_LOG_ARCHIVE
- /*-----------------------------*/
- byte** archive_file_header_bufs_ptr;/*!< unaligned buffers */
- byte** archive_file_header_bufs;/*!< buffers for each file
- header in the group */
- ulint archive_space_id;/*!< file space which
- implements the log group
- archive */
- ulint archived_file_no;/*!< file number corresponding to
- log_sys->archived_lsn */
- ulint archived_offset;/*!< file offset corresponding to
- log_sys->archived_lsn, 0 if we have
- not yet written to the archive file
- number archived_file_no */
- ulint next_archived_file_no;/*!< during an archive write,
- until the write is completed, we
- store the next value for
- archived_file_no here: the write
- completion function then sets the new
- value to ..._file_no */
- ulint next_archived_offset; /*!< like the preceding field */
-#endif /* UNIV_LOG_ARCHIVE */
- /*-----------------------------*/
- lsn_t scanned_lsn; /*!< used only in recovery: recovery scan
- succeeded up to this lsn in this log
- group */
- byte* checkpoint_buf_ptr;/*!< unaligned checkpoint header */
- byte* checkpoint_buf; /*!< checkpoint header is written from
- this buffer to the group */
- UT_LIST_NODE_T(log_group_t)
- log_groups; /*!< list of log groups */
+ /** number of files in the group */
+ ulint n_files;
+ /** format of the redo log: e.g., LOG_HEADER_FORMAT_10_3 */
+ uint32_t format;
+ /** redo log subformat: 0 with separately logged TRUNCATE,
+ 1 with fully redo-logged TRUNCATE */
+ uint32_t subformat;
+ /** individual log file size in bytes, including the header */
+ lsn_t file_size;
+ /** corruption status */
+ log_group_state_t state;
+ /** lsn used to fix coordinates within the log group */
+ lsn_t lsn;
+ /** the byte offset of the above lsn */
+ lsn_t lsn_offset;
+
+ /** used only in recovery: recovery scan succeeded up to this
+ lsn in this log group */
+ lsn_t scanned_lsn;
+ /** unaligned checkpoint header */
+ byte* checkpoint_buf_ptr;
+ /** buffer for writing a checkpoint header */
+ byte* checkpoint_buf;
+
+ /** @return whether the redo log is encrypted */
+ bool is_encrypted() const
+ {
+ return((format & LOG_HEADER_FORMAT_ENCRYPTED) != 0);
+ }
+
+ /** @return capacity in bytes */
+ inline lsn_t capacity() const
+ {
+ return((file_size - LOG_FILE_HDR_SIZE) * n_files);
+ }
};
/** Redo log buffer */
struct log_t{
- byte pad[64]; /*!< padding to prevent other memory
+ char pad1[CACHE_LINE_SIZE];
+ /*!< Padding to prevent other memory
update hotspots from residing on the
same memory cache line */
lsn_t lsn; /*!< log sequence number */
ulint buf_free; /*!< first free offset within the log
- buffer */
-#ifndef UNIV_HOTBACKUP
- ib_mutex_t mutex; /*!< mutex protecting the log */
-
- ib_mutex_t log_flush_order_mutex;/*!< mutex to serialize access to
+ buffer in use */
+
+ char pad2[CACHE_LINE_SIZE];/*!< Padding */
+ LogSysMutex mutex; /*!< mutex protecting the log */
+ char pad3[CACHE_LINE_SIZE]; /*!< Padding */
+ LogSysMutex write_mutex; /*!< mutex protecting writing to log
+ file and accessing to log_group_t */
+ char pad4[CACHE_LINE_SIZE];/*!< Padding */
+ FlushOrderMutex log_flush_order_mutex;/*!< mutex to serialize access to
the flush list when we are putting
dirty blocks in the list. The idea
behind this mutex is to be able
@@ -802,23 +613,24 @@ struct log_t{
mtr_commit and still ensure that
insertions in the flush_list happen
in the LSN order. */
-#endif /* !UNIV_HOTBACKUP */
- byte* buf_ptr; /* unaligned log buffer */
- byte* buf; /*!< log buffer */
- ulint buf_size; /*!< log buffer size in bytes */
+ byte* buf_ptr; /*!< unaligned log buffer, which should
+ be of double of buf_size */
+ byte* buf; /*!< log buffer currently in use;
+ this could point to either the first
+ half of the aligned(buf_ptr) or the
+ second half in turns, so that log
+ write/flush to disk don't block
+ concurrent mtrs which will write
+ log to this buffer */
+ bool first_in_use; /*!< true if buf points to the first
+ half of the aligned(buf_ptr), false
+ if the second half */
+ ulint buf_size; /*!< log buffer size of each in bytes */
ulint max_buf_free; /*!< recommended maximum value of
- buf_free, after which the buffer is
- flushed */
- #ifdef UNIV_LOG_DEBUG
- ulint old_buf_free; /*!< value of buf free when log was
- last time opened; only in the debug
- version */
- ib_uint64_t old_lsn; /*!< value of lsn when log was
- last time opened; only in the
- debug version */
-#endif /* UNIV_LOG_DEBUG */
- ibool check_flush_or_checkpoint;
- /*!< this is set to TRUE when there may
+ buf_free for the buffer in use, after
+ which the buffer is flushed */
+ bool check_flush_or_checkpoint;
+ /*!< this is set when there may
be need to flush the log buffer, or
preflush buffer pool pages, or make
a checkpoint; this MUST be TRUE when
@@ -826,10 +638,9 @@ struct log_t{
max_checkpoint_age; this flag is
peeked at by log_free_check(), which
does not reserve the log mutex */
- UT_LIST_BASE_NODE_T(log_group_t)
- log_groups; /*!< log groups */
+ /** the redo log */
+ log_group_t log;
-#ifndef UNIV_HOTBACKUP
/** The fields involved in the log buffer flush @{ */
ulint buf_next_to_write;/*!< first offset in the log buffer
@@ -839,61 +650,17 @@ struct log_t{
later; this is advanced when a flush
operation is completed to all the log
groups */
- volatile bool is_extending; /*!< this is set to true during extend
- the log buffer size */
- lsn_t written_to_some_lsn;
- /*!< first log sequence number not yet
- written to any log group; for this to
- be advanced, it is enough that the
- write i/o has been completed for any
- one log group */
- lsn_t written_to_all_lsn;
- /*!< first log sequence number not yet
- written to some log group; for this to
- be advanced, it is enough that the
- write i/o has been completed for all
- log groups.
- Note that since InnoDB currently
- has only one log group therefore
- this value is redundant. Also it
- is possible that this value
- falls behind the
- flushed_to_disk_lsn transiently.
- It is appropriate to use either
- flushed_to_disk_lsn or
- write_lsn which are always
- up-to-date and accurate. */
- lsn_t write_lsn; /*!< end lsn for the current running
- write */
- ulint write_end_offset;/*!< the data in buffer has
- been written up to this offset
- when the current write ends:
- this field will then be copied
- to buf_next_to_write */
+ lsn_t write_lsn; /*!< last written lsn */
lsn_t current_flush_lsn;/*!< end lsn for the current running
write + flush operation */
lsn_t flushed_to_disk_lsn;
/*!< how far we have written the log
AND flushed to disk */
- ulint n_pending_writes;/*!< number of currently
- pending flushes or writes */
- /* NOTE on the 'flush' in names of the fields below: starting from
- 4.0.14, we separate the write of the log file and the actual fsync()
- or other method to flush it to disk. The names below should really
- be 'flush_or_write'! */
- os_event_t no_flush_event; /*!< this event is in the reset state
- when a flush or a write is running;
- os_event_set() and os_event_reset()
- are protected by log_sys_t::mutex */
- ibool one_flushed; /*!< during a flush, this is
- first FALSE and becomes TRUE
- when one log group has been
- written or flushed */
- os_event_t one_flushed_event;/*!< this event is reset when the
- flush or write has not yet completed
- for any log group; e.g., this means
- that a transaction has been committed
- when this is set;
+ ulint n_pending_flushes;/*!< number of currently
+ pending flushes; protected by
+ log_sys_t::mutex */
+ os_event_t flush_event; /*!< this event is in the reset state
+ when a flush is running;
os_event_set() and os_event_reset()
are protected by log_sys_t::mutex */
ulint n_log_ios; /*!< number of log i/os initiated thus
@@ -937,6 +704,13 @@ struct log_t{
/*!< latest checkpoint lsn */
lsn_t next_checkpoint_lsn;
/*!< next checkpoint lsn */
+ mtr_buf_t* append_on_checkpoint;
+ /*!< extra redo log records to write
+ during a checkpoint, or NULL if none.
+ The pointer is protected by
+ log_sys->mutex, and the data must
+ remain constant as long as this
+ pointer is not NULL. */
ulint n_pending_checkpoint_writes;
/*!< number of currently pending
checkpoint writes */
@@ -944,51 +718,23 @@ struct log_t{
checkpoint write is running; a thread
should wait for this without owning
the log mutex */
-#endif /* !UNIV_HOTBACKUP */
byte* checkpoint_buf_ptr;/* unaligned checkpoint header */
byte* checkpoint_buf; /*!< checkpoint header is read to this
buffer */
/* @} */
-#ifdef UNIV_LOG_ARCHIVE
- /** Fields involved in archiving @{ */
- ulint archiving_state;/*!< LOG_ARCH_ON, LOG_ARCH_STOPPING
- LOG_ARCH_STOPPED, LOG_ARCH_OFF */
- lsn_t archived_lsn; /*!< archiving has advanced to this
- lsn */
- lsn_t max_archived_lsn_age_async;
- /*!< recommended maximum age of
- archived_lsn, before we start
- asynchronous copying to the archive */
- lsn_t max_archived_lsn_age;
- /*!< maximum allowed age for
- archived_lsn */
- lsn_t next_archived_lsn;/*!< during an archive write,
- until the write is completed, we
- store the next value for
- archived_lsn here: the write
- completion function then sets the new
- value to archived_lsn */
- ulint archiving_phase;/*!< LOG_ARCHIVE_READ or
- LOG_ARCHIVE_WRITE */
- ulint n_pending_archive_ios;
- /*!< number of currently pending reads
- or writes in archiving */
- rw_lock_t archive_lock; /*!< this latch is x-locked when an
- archive write is running; a thread
- should wait for this without owning
- the log mutex */
- ulint archive_buf_size;/*!< size of archive_buf */
- byte* archive_buf; /*!< log segment is written to the
- archive from this buffer */
- os_event_t archiving_on; /*!< if archiving has been stopped;
- os_event_set() and os_event_reset()
- are protected by log_sys_t::mutex */
- /* @} */
-#endif /* UNIV_LOG_ARCHIVE */
+
+ /** @return whether the redo log is encrypted */
+ bool is_encrypted() const
+ {
+ return(log.is_encrypted());
+ }
};
+/** Redo log system */
+extern log_t* log_sys;
+
/** Test if flush order mutex is owned. */
-#define log_flush_order_mutex_own() \
+#define log_flush_order_mutex_own() \
mutex_own(&log_sys->log_flush_order_mutex)
/** Acquire the flush order mutex. */
@@ -1000,21 +746,53 @@ struct log_t{
mutex_exit(&log_sys->log_flush_order_mutex); \
} while (0)
-#ifdef UNIV_LOG_ARCHIVE
-/** Archiving state @{ */
-#define LOG_ARCH_ON 71
-#define LOG_ARCH_STOPPING 72
-#define LOG_ARCH_STOPPING2 73
-#define LOG_ARCH_STOPPED 74
-#define LOG_ARCH_OFF 75
-/* @} */
-#endif /* UNIV_LOG_ARCHIVE */
+/** Test if log sys mutex is owned. */
+#define log_mutex_own() mutex_own(&log_sys->mutex)
+
+/** Test if log sys write mutex is owned. */
+#define log_write_mutex_own() mutex_own(&log_sys->write_mutex)
+
+/** Acquire the log sys mutex. */
+#define log_mutex_enter() mutex_enter(&log_sys->mutex)
+
+/** Acquire the log sys write mutex. */
+#define log_write_mutex_enter() mutex_enter(&log_sys->write_mutex)
+
+/** Acquire all the log sys mutexes. */
+#define log_mutex_enter_all() do { \
+ mutex_enter(&log_sys->write_mutex); \
+ mutex_enter(&log_sys->mutex); \
+} while (0)
+
+/** Release the log sys mutex. */
+#define log_mutex_exit() mutex_exit(&log_sys->mutex)
+
+/** Release the log sys write mutex.*/
+#define log_write_mutex_exit() mutex_exit(&log_sys->write_mutex)
+
+/** Release all the log sys mutexes. */
+#define log_mutex_exit_all() do { \
+ mutex_exit(&log_sys->mutex); \
+ mutex_exit(&log_sys->write_mutex); \
+} while (0)
+
+/** Calculate the offset of an lsn within a log group.
+@param[in] lsn log sequence number
+@param[in] group log group
+@return offset within the log group */
+lsn_t
+log_group_calc_lsn_offset(
+ lsn_t lsn,
+ const log_group_t* group);
/* log scrubbing speed, in bytes/sec */
extern ulonglong innodb_scrub_log_speed;
-#ifndef UNIV_NONINL
+/** Event to wake up log_scrub_thread */
+extern os_event_t log_scrub_event;
+/** Whether log_scrub_thread is active */
+extern bool log_scrub_thread_active;
+
#include "log0log.ic"
-#endif
#endif
diff --git a/storage/innobase/include/log0log.ic b/storage/innobase/include/log0log.ic
index 7bcd7f13085..36caaedfaa2 100644
--- a/storage/innobase/include/log0log.ic
+++ b/storage/innobase/include/log0log.ic
@@ -1,6 +1,7 @@
/*****************************************************************************
-Copyright (c) 1995, 2010, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1995, 2015, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2017, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -23,29 +24,18 @@ Database log
Created 12/9/1995 Heikki Tuuri
*******************************************************/
-#include "os0file.h"
#include "mach0data.h"
-#include "mtr0mtr.h"
#include "srv0mon.h"
+#include "srv0srv.h"
+#include "ut0crc32.h"
-#ifdef UNIV_LOG_DEBUG
-/******************************************************//**
-Checks by parsing that the catenated log segment for a single mtr is
-consistent. */
-UNIV_INTERN
-ibool
-log_check_log_recs(
-/*===============*/
- const byte* buf, /*!< in: pointer to the start of
- the log segment in the
- log_sys->buf log buffer */
- ulint len, /*!< in: segment length in bytes */
- ib_uint64_t buf_start_lsn); /*!< in: buffer start lsn */
-#endif /* UNIV_LOG_DEBUG */
+#ifdef UNIV_LOG_LSN_DEBUG
+#include "mtr0types.h"
+#endif /* UNIV_LOG_LSN_DEBUG */
/************************************************************//**
Gets a log block flush bit.
-@return TRUE if this block was the first to be written in a log flush */
+@return TRUE if this block was the first to be written in a log flush */
UNIV_INLINE
ibool
log_block_get_flush_bit(
@@ -85,7 +75,7 @@ log_block_set_flush_bit(
/************************************************************//**
Gets a log block number stored in the header.
-@return log block number stored in the block header */
+@return log block number stored in the block header */
UNIV_INLINE
ulint
log_block_get_hdr_no(
@@ -115,7 +105,7 @@ log_block_set_hdr_no(
/************************************************************//**
Gets a log block data length.
-@return log block data length measured as a byte offset from the block start */
+@return log block data length measured as a byte offset from the block start */
UNIV_INLINE
ulint
log_block_get_data_len(
@@ -164,7 +154,7 @@ log_block_set_first_rec_group(
/************************************************************//**
Gets a log block checkpoint number field (4 lowest bytes).
-@return checkpoint no (4 lowest bytes) */
+@return checkpoint no (4 lowest bytes) */
UNIV_INLINE
ulint
log_block_get_checkpoint_no(
@@ -188,25 +178,38 @@ log_block_set_checkpoint_no(
/************************************************************//**
Converts a lsn to a log block number.
-@return log block number, it is > 0 and <= 1G */
+@return log block number, it is > 0 and <= 1G */
UNIV_INLINE
ulint
log_block_convert_lsn_to_no(
/*========================*/
lsn_t lsn) /*!< in: lsn of a byte within the block */
{
- return(((ulint) (lsn / OS_FILE_LOG_BLOCK_SIZE) & 0x3FFFFFFFUL) + 1);
+ return(((ulint) (lsn / OS_FILE_LOG_BLOCK_SIZE) &
+ DBUG_EVALUATE_IF("innodb_small_log_block_no_limit",
+ 0xFUL, 0x3FFFFFFFUL)) + 1);
}
/************************************************************//**
Calculates the checksum for a log block.
-@return checksum */
+@return checksum */
UNIV_INLINE
ulint
log_block_calc_checksum(
/*====================*/
const byte* block) /*!< in: log block */
{
+ return(log_checksum_algorithm_ptr(block));
+}
+
+/** Calculate the checksum for a log block using the pre-5.7.9 algorithm.
+@param[in] block log block
+@return checksum */
+UNIV_INLINE
+ulint
+log_block_calc_checksum_format_0(
+ const byte* block)
+{
ulint sum;
ulint sh;
ulint i;
@@ -228,9 +231,31 @@ log_block_calc_checksum(
return(sum);
}
+/** Calculate the checksum for a log block using the MySQL 5.7 algorithm.
+@param[in] block log block
+@return checksum */
+UNIV_INLINE
+ulint
+log_block_calc_checksum_crc32(
+ const byte* block)
+{
+ return(ut_crc32(block, OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_TRL_SIZE));
+}
+
+/** Calculates the checksum for a log block using the "no-op" algorithm.
+@param[in] block log block
+@return checksum */
+UNIV_INLINE
+ulint
+log_block_calc_checksum_none(
+ const byte* block)
+{
+ return(LOG_NO_CHECKSUM_MAGIC);
+}
+
/************************************************************//**
Gets a log block checksum field value.
-@return checksum */
+@return checksum */
UNIV_INLINE
ulint
log_block_get_checksum(
@@ -266,8 +291,6 @@ log_block_init(
{
ulint no;
- ut_ad(mutex_own(&(log_sys->mutex)));
-
no = log_block_convert_lsn_to_no(lsn);
log_block_set_hdr_no(log_block, no);
@@ -276,56 +299,44 @@ log_block_init(
log_block_set_first_rec_group(log_block, 0);
}
-/************************************************************//**
-Initializes a log block in the log buffer in the old format, where there
-was no checksum yet. */
-UNIV_INLINE
-void
-log_block_init_in_old_format(
-/*=========================*/
- byte* log_block, /*!< in: pointer to the log buffer */
- lsn_t lsn) /*!< in: lsn within the log block */
-{
- ulint no;
-
- ut_ad(mutex_own(&(log_sys->mutex)));
-
- no = log_block_convert_lsn_to_no(lsn);
-
- log_block_set_hdr_no(log_block, no);
- mach_write_to_4(log_block + OS_FILE_LOG_BLOCK_SIZE
- - LOG_BLOCK_CHECKSUM, no);
- log_block_set_data_len(log_block, LOG_BLOCK_HDR_SIZE);
- log_block_set_first_rec_group(log_block, 0);
-}
-
-#ifndef UNIV_HOTBACKUP
-/************************************************************//**
-Writes to the log the string given. The log must be released with
-log_release.
-@return end lsn of the log record, zero if did not succeed */
+/** Append a string to the log.
+@param[in] str string
+@param[in] len string length
+@param[out] start_lsn start LSN of the log record
+@return end lsn of the log record, zero if did not succeed */
UNIV_INLINE
lsn_t
log_reserve_and_write_fast(
-/*=======================*/
- const void* str, /*!< in: string */
- ulint len, /*!< in: string length */
- lsn_t* start_lsn)/*!< out: start lsn of the log record */
+ const void* str,
+ ulint len,
+ lsn_t* start_lsn)
{
- ulint data_len;
-#ifdef UNIV_LOG_LSN_DEBUG
- /* length of the LSN pseudo-record */
- ulint lsn_len;
-#endif /* UNIV_LOG_LSN_DEBUG */
+ ut_ad(log_mutex_own());
+ ut_ad(len > 0);
- mutex_enter(&log_sys->mutex);
#ifdef UNIV_LOG_LSN_DEBUG
- lsn_len = 1
+ /* Append a MLOG_LSN record after mtr_commit(), except when
+ the last bytes could be a MLOG_CHECKPOINT marker. We have special
+ handling when the log consists of only a single MLOG_CHECKPOINT
+ record since the latest checkpoint, and appending the
+ MLOG_LSN would ruin that.
+
+ Note that a longer redo log record could happen to end in what
+ looks like MLOG_CHECKPOINT, and we could be omitting MLOG_LSN
+ without reason. This is OK, because writing the MLOG_LSN is
+ just a 'best effort', aimed at finding log corruption due to
+ bugs in the redo log writing logic. */
+ const ulint lsn_len
+ = len >= SIZE_OF_MLOG_CHECKPOINT
+ && MLOG_CHECKPOINT == static_cast<const char*>(str)[
+ len - SIZE_OF_MLOG_CHECKPOINT]
+ ? 0
+ : 1
+ mach_get_compressed_size(log_sys->lsn >> 32)
+ mach_get_compressed_size(log_sys->lsn & 0xFFFFFFFFUL);
#endif /* UNIV_LOG_LSN_DEBUG */
- data_len = len
+ const ulint data_len = len
#ifdef UNIV_LOG_LSN_DEBUG
+ lsn_len
#endif /* UNIV_LOG_LSN_DEBUG */
@@ -336,39 +347,37 @@ log_reserve_and_write_fast(
/* The string does not fit within the current log block
or the log block would become full */
- mutex_exit(&log_sys->mutex);
-
return(0);
}
*start_lsn = log_sys->lsn;
#ifdef UNIV_LOG_LSN_DEBUG
- {
+ if (lsn_len) {
/* Write the LSN pseudo-record. */
byte* b = &log_sys->buf[log_sys->buf_free];
+
*b++ = MLOG_LSN | (MLOG_SINGLE_REC_FLAG & *(const byte*) str);
+
/* Write the LSN in two parts,
as a pseudo page number and space id. */
b += mach_write_compressed(b, log_sys->lsn >> 32);
b += mach_write_compressed(b, log_sys->lsn & 0xFFFFFFFFUL);
ut_a(b - lsn_len == &log_sys->buf[log_sys->buf_free]);
- memcpy(b, str, len);
+ ::memcpy(b, str, len);
+
len += lsn_len;
- }
-#else /* UNIV_LOG_LSN_DEBUG */
- memcpy(log_sys->buf + log_sys->buf_free, str, len);
+ } else
#endif /* UNIV_LOG_LSN_DEBUG */
+ memcpy(log_sys->buf + log_sys->buf_free, str, len);
+
+ log_block_set_data_len(
+ reinterpret_cast<byte*>(ut_align_down(
+ log_sys->buf + log_sys->buf_free,
+ OS_FILE_LOG_BLOCK_SIZE)),
+ data_len);
- log_block_set_data_len((byte*) ut_align_down(log_sys->buf
- + log_sys->buf_free,
- OS_FILE_LOG_BLOCK_SIZE),
- data_len);
-#ifdef UNIV_LOG_DEBUG
- log_sys->old_buf_free = log_sys->buf_free;
- log_sys->old_lsn = log_sys->lsn;
-#endif
log_sys->buf_free += len;
ut_ad(log_sys->buf_free <= log_sys->buf_size);
@@ -378,27 +387,12 @@ log_reserve_and_write_fast(
MONITOR_SET(MONITOR_LSN_CHECKPOINT_AGE,
log_sys->lsn - log_sys->last_checkpoint_lsn);
-#ifdef UNIV_LOG_DEBUG
- log_check_log_recs(log_sys->buf + log_sys->old_buf_free,
- log_sys->buf_free - log_sys->old_buf_free,
- log_sys->old_lsn);
-#endif
return(log_sys->lsn);
}
-/***********************************************************************//**
-Releases the log mutex. */
-UNIV_INLINE
-void
-log_release(void)
-/*=============*/
-{
- mutex_exit(&(log_sys->mutex));
-}
-
/************************************************************//**
Gets the current lsn.
-@return current lsn */
+@return current lsn */
UNIV_INLINE
lsn_t
log_get_lsn(void)
@@ -406,11 +400,11 @@ log_get_lsn(void)
{
lsn_t lsn;
- mutex_enter(&(log_sys->mutex));
+ log_mutex_enter();
lsn = log_sys->lsn;
- mutex_exit(&(log_sys->mutex));
+ log_mutex_exit();
return(lsn);
}
@@ -421,15 +415,14 @@ Gets the last lsn that is fully flushed to disk.
UNIV_INLINE
ib_uint64_t
log_get_flush_lsn(void)
-/*=============*/
{
ib_uint64_t lsn;
- mutex_enter(&(log_sys->mutex));
+ log_mutex_enter();
lsn = log_sys->flushed_to_disk_lsn;
- mutex_exit(&(log_sys->mutex));
+ log_mutex_exit();
return(lsn);
}
@@ -440,7 +433,7 @@ Gets the current lsn with a trylock
UNIV_INLINE
lsn_t
log_get_lsn_nowait(void)
-/*=============*/
+/*====================*/
{
lsn_t lsn=0;
@@ -457,7 +450,7 @@ log_get_lsn_nowait(void)
/****************************************************************
Gets the log group capacity. It is OK to read the value without
holding log_sys->mutex because it is constant.
-@return log group capacity */
+@return log group capacity */
UNIV_INLINE
lsn_t
log_get_capacity(void)
@@ -469,7 +462,7 @@ log_get_capacity(void)
/****************************************************************
Get log_sys::max_modified_age_async. It is OK to read the value without
holding log_sys::mutex because it is constant.
-@return max_modified_age_async */
+@return max_modified_age_async */
UNIV_INLINE
lsn_t
log_get_max_modified_age_async(void)
@@ -488,14 +481,27 @@ void
log_free_check(void)
/*================*/
{
-
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(sync_thread_levels_empty_except_dict());
-#endif /* UNIV_SYNC_DEBUG */
+ /* During row_log_table_apply(), this function will be called while we
+ are holding some latches. This is OK, as long as we are not holding
+ any latches on buffer blocks. */
+
+#ifdef UNIV_DEBUG
+ static const latch_level_t latches[] = {
+ SYNC_DICT, /* dict_sys->mutex during
+ commit_try_rebuild() */
+ SYNC_DICT_OPERATION, /* dict_operation_lock X-latch during
+ commit_try_rebuild() */
+ SYNC_FTS_CACHE, /* fts_cache_t::lock */
+ SYNC_INDEX_TREE /* index->lock */
+ };
+#endif /* UNIV_DEBUG */
+
+ ut_ad(!sync_check_iterate(
+ sync_allowed_latches(latches,
+ latches + UT_ARR_SIZE(latches))));
if (log_sys->check_flush_or_checkpoint) {
log_check_margins();
}
}
-#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innobase/include/log0recv.h b/storage/innobase/include/log0recv.h
index f030881edc9..b91312e81e2 100644
--- a/storage/innobase/include/log0recv.h
+++ b/storage/innobase/include/log0recv.h
@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 1997, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2017, MariaDB Corporation.
+Copyright (c) 2017, 2020, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -27,287 +27,147 @@ Created 9/20/1997 Heikki Tuuri
#ifndef log0recv_h
#define log0recv_h
-#include "univ.i"
#include "ut0byte.h"
#include "buf0types.h"
#include "hash0hash.h"
#include "log0log.h"
+#include "mtr0types.h"
+
#include <list>
+#include <vector>
-#ifdef UNIV_HOTBACKUP
-extern ibool recv_replay_file_ops;
-
-/*******************************************************************//**
-Reads the checkpoint info needed in hot backup.
-@return TRUE if success */
-UNIV_INTERN
-ibool
-recv_read_checkpoint_info_for_backup(
-/*=================================*/
- const byte* hdr, /*!< in: buffer containing the log group
- header */
- lsn_t* lsn, /*!< out: checkpoint lsn */
- lsn_t* offset, /*!< out: checkpoint offset in the log group */
- lsn_t* cp_no, /*!< out: checkpoint number */
- lsn_t* first_header_lsn)
- /*!< out: lsn of of the start of the
- first log file */
- MY_ATTRIBUTE((nonnull));
-/*******************************************************************//**
-Scans the log segment and n_bytes_scanned is set to the length of valid
-log scanned. */
-UNIV_INTERN
-void
-recv_scan_log_seg_for_backup(
-/*=========================*/
- byte* buf, /*!< in: buffer containing log data */
- ulint buf_len, /*!< in: data length in that buffer */
- lsn_t* scanned_lsn, /*!< in/out: lsn of buffer start,
- we return scanned lsn */
- ulint* scanned_checkpoint_no,
- /*!< in/out: 4 lowest bytes of the
- highest scanned checkpoint number so
- far */
- ulint* n_bytes_scanned);/*!< out: how much we were able to
- scan, smaller than buf_len if log
- data ended here */
-#endif /* UNIV_HOTBACKUP */
-/*******************************************************************//**
-Returns TRUE if recovery is currently running.
-@return recv_recovery_on */
-UNIV_INLINE
-ibool
-recv_recovery_is_on(void);
-/*=====================*/
-#ifdef UNIV_LOG_ARCHIVE
-/*******************************************************************//**
-Returns TRUE if recovery from backup is currently running.
-@return recv_recovery_from_backup_on */
-UNIV_INLINE
-ibool
-recv_recovery_from_backup_is_on(void);
-/*=================================*/
-#endif /* UNIV_LOG_ARCHIVE */
-/************************************************************************//**
-Applies the hashed log records to the page, if the page lsn is less than the
-lsn of a log record. This can be called when a buffer page has just been
-read in, or also for a page already in the buffer pool. */
-UNIV_INTERN
-void
-recv_recover_page_func(
-/*===================*/
-#ifndef UNIV_HOTBACKUP
- ibool just_read_in,
- /*!< in: TRUE if the i/o handler calls
- this for a freshly read page */
-#endif /* !UNIV_HOTBACKUP */
- buf_block_t* block); /*!< in/out: buffer block */
-#ifndef UNIV_HOTBACKUP
-/** Wrapper for recv_recover_page_func().
-Applies the hashed log records to the page, if the page lsn is less than the
-lsn of a log record. This can be called when a buffer page has just been
-read in, or also for a page already in the buffer pool.
-@param jri in: TRUE if just read in (the i/o handler calls this for
-a freshly read page)
-@param block in/out: the buffer block
-*/
-# define recv_recover_page(jri, block) recv_recover_page_func(jri, block)
-#else /* !UNIV_HOTBACKUP */
-/** Wrapper for recv_recover_page_func().
-Applies the hashed log records to the page, if the page lsn is less than the
-lsn of a log record. This can be called when a buffer page has just been
-read in, or also for a page already in the buffer pool.
-@param jri in: TRUE if just read in (the i/o handler calls this for
-a freshly read page)
-@param block in/out: the buffer block
-*/
-# define recv_recover_page(jri, block) recv_recover_page_func(block)
-#endif /* !UNIV_HOTBACKUP */
-
-/** Recovers from a checkpoint. When this function returns, the database is able
-to start processing of new user transactions, but the function
-recv_recovery_from_checkpoint_finish should be called later to complete
-the recovery and free the resources used in it.
-@param[in] type LOG_CHECKPOINT or LOG_ARCHIVE
-@param[in] limit_lsn recover up to this lsn if possible
-@param[in] flushed_lsn flushed lsn from first data file
-@return error code or DB_SUCCESS */
-UNIV_INTERN
-dberr_t
-recv_recovery_from_checkpoint_start_func(
-#ifdef UNIV_LOG_ARCHIVE
- ulint type,
- lsn_t limit_lsn,
-#endif /* UNIV_LOG_ARCHIVE */
- lsn_t flushed_lsn)
- MY_ATTRIBUTE((warn_unused_result));
-
-#ifdef UNIV_LOG_ARCHIVE
-/** Wrapper for recv_recovery_from_checkpoint_start_func().
-Recovers from a checkpoint. When this function returns, the database is able
-to start processing of new user transactions, but the function
-recv_recovery_from_checkpoint_finish should be called later to complete
-the recovery and free the resources used in it.
-@param type in: LOG_CHECKPOINT or LOG_ARCHIVE
-@param lim in: recover up to this log sequence number if possible
-@param lsn in: flushed log sequence number from first data file
-@return error code or DB_SUCCESS */
-# define recv_recovery_from_checkpoint_start(type,lim,lsn) \
- recv_recovery_from_checkpoint_start_func(type,lim,lsn)
-#else /* UNIV_LOG_ARCHIVE */
-/** Wrapper for recv_recovery_from_checkpoint_start_func().
-Recovers from a checkpoint. When this function returns, the database is able
-to start processing of new user transactions, but the function
-recv_recovery_from_checkpoint_finish should be called later to complete
-the recovery and free the resources used in it.
-@param type ignored: LOG_CHECKPOINT or LOG_ARCHIVE
-@param lim ignored: recover up to this log sequence number if possible
-@param lsn in: flushed log sequence number from first data file
-@return error code or DB_SUCCESS */
-# define recv_recovery_from_checkpoint_start(type,lim,lsn) \
- recv_recovery_from_checkpoint_start_func(lsn)
-#endif /* UNIV_LOG_ARCHIVE */
+/** Is recv_writer_thread active? */
+extern bool recv_writer_thread_active;
-/********************************************************//**
-Completes recovery from a checkpoint. */
-UNIV_INTERN
+/** @return whether recovery is currently running. */
+#define recv_recovery_is_on() UNIV_UNLIKELY(recv_recovery_on)
+
+/** Find the latest checkpoint in the log header.
+@param[out] max_field LOG_CHECKPOINT_1 or LOG_CHECKPOINT_2
+@return error code or DB_SUCCESS */
+dberr_t
+recv_find_max_checkpoint(ulint* max_field)
+ MY_ATTRIBUTE((nonnull, warn_unused_result));
+
+/** Reduces recv_sys->n_addrs for the corrupted page.
+This function should called when srv_force_recovery > 0.
+@param[in] page_id page id of the corrupted page */
+void recv_recover_corrupt_page(page_id_t page_id);
+
+/** Apply any buffered redo log to a page that was just read from a data file.
+@param[in,out] bpage buffer pool page */
+ATTRIBUTE_COLD void recv_recover_page(buf_page_t* bpage);
+
+/** Start recovering from a redo log checkpoint.
+@see recv_recovery_from_checkpoint_finish
+@param[in] flush_lsn FIL_PAGE_FILE_FLUSH_LSN
+of first system tablespace page
+@return error code or DB_SUCCESS */
+dberr_t
+recv_recovery_from_checkpoint_start(
+ lsn_t flush_lsn);
+/** Complete recovery from a checkpoint. */
void
recv_recovery_from_checkpoint_finish(void);
-/*======================================*/
/********************************************************//**
Initiates the rollback of active transactions. */
-UNIV_INTERN
void
recv_recovery_rollback_active(void);
/*===============================*/
-/*******************************************************//**
-Scans log from a buffer and stores new log data to the parsing buffer.
-Parses and hashes the log records if new data found. Unless
-UNIV_HOTBACKUP is defined, this function will apply log records
-automatically when the hash table becomes full.
-@return TRUE if limit_lsn has been reached, or not able to scan any
-more in this log group */
-UNIV_INTERN
-ibool
-recv_scan_log_recs(
-/*===============*/
- ulint available_memory,/*!< in: we let the hash table of recs
- to grow to this size, at the maximum */
- ibool store_to_hash, /*!< in: TRUE if the records should be
- stored to the hash table; this is set
- to FALSE if just debug checking is
- needed */
- const byte* buf, /*!< in: buffer containing a log
- segment or garbage */
- ulint len, /*!< in: buffer length */
- lsn_t start_lsn, /*!< in: buffer start lsn */
- lsn_t* contiguous_lsn, /*!< in/out: it is known that all log
- groups contain contiguous log data up
- to this lsn */
- lsn_t* group_scanned_lsn);/*!< out: scanning succeeded up to
- this lsn */
-/******************************************************//**
-Resets the logs. The contents of log files will be lost! */
-UNIV_INTERN
-void
-recv_reset_logs(
-/*============*/
-#ifdef UNIV_LOG_ARCHIVE
- ulint arch_log_no, /*!< in: next archived log file number */
- ibool new_logs_created,/*!< in: TRUE if resetting logs
- is done at the log creation;
- FALSE if it is done after
- archive recovery */
-#endif /* UNIV_LOG_ARCHIVE */
- lsn_t lsn); /*!< in: reset to this lsn
- rounded up to be divisible by
- OS_FILE_LOG_BLOCK_SIZE, after
- which we add
- LOG_BLOCK_HDR_SIZE */
-#ifdef UNIV_HOTBACKUP
-/******************************************************//**
-Creates new log files after a backup has been restored. */
-UNIV_INTERN
-void
-recv_reset_log_files_for_backup(
-/*============================*/
- const char* log_dir, /*!< in: log file directory path */
- ulint n_log_files, /*!< in: number of log files */
- lsn_t log_file_size, /*!< in: log file size */
- lsn_t lsn); /*!< in: new start lsn, must be
- divisible by OS_FILE_LOG_BLOCK_SIZE */
-#endif /* UNIV_HOTBACKUP */
-/********************************************************//**
-Creates the recovery system. */
-UNIV_INTERN
-void
-recv_sys_create(void);
-/*=================*/
-/**********************************************************//**
-Release recovery system mutexes. */
-UNIV_INTERN
+/** Clean up after recv_sys_init() */
void
-recv_sys_close(void);
-/*================*/
-/********************************************************//**
-Frees the recovery system memory. */
-UNIV_INTERN
+recv_sys_close();
+/** Initialize the redo log recovery subsystem. */
void
-recv_sys_mem_free(void);
-/*===================*/
+recv_sys_init();
/********************************************************//**
-Inits the recovery system for a recovery operation. */
-UNIV_INTERN
+Frees the recovery system. */
void
-recv_sys_init(
-/*==========*/
- ulint available_memory); /*!< in: available memory in bytes */
-#ifndef UNIV_HOTBACKUP
+recv_sys_debug_free(void);
+/*=====================*/
+
+/** Read a log segment to a buffer.
+@param[out] buf buffer
+@param[in] group redo log files
+@param[in, out] start_lsn in : read area start, out: the last read valid lsn
+@param[in] end_lsn read area end
+@param[out] invalid_block - invalid, (maybe incompletely written) block encountered
+@return false, if invalid block encountered (e.g checksum mismatch), true otherwise */
+bool
+log_group_read_log_seg(
+ byte* buf,
+ const log_group_t* group,
+ lsn_t* start_lsn,
+ lsn_t end_lsn);
+
/********************************************************//**
Reset the state of the recovery system variables. */
-UNIV_INTERN
void
recv_sys_var_init(void);
/*===================*/
-#endif /* !UNIV_HOTBACKUP */
+
/** Apply the hash table of stored log records to persistent data pages.
@param[in] last_batch whether the change buffer merge will be
performed as part of the operation */
-UNIV_INTERN
void
recv_apply_hashed_log_recs(bool last_batch);
-#ifdef UNIV_HOTBACKUP
-/*******************************************************************//**
-Applies log records in the hash table to a backup. */
-UNIV_INTERN
-void
-recv_apply_log_recs_for_backup(void);
-/*================================*/
-#endif
-#ifdef UNIV_LOG_ARCHIVE
-/********************************************************//**
-Recovers from archived log files, and also from log files, if they exist.
-@return error code or DB_SUCCESS */
-UNIV_INTERN
-ulint
-recv_recovery_from_archive_start(
-/*=============================*/
- lsn_t min_flushed_lsn,/*!< in: min flushed lsn field from the
- data files */
- lsn_t limit_lsn, /*!< in: recover up to this lsn if
- possible */
- ulint first_log_no); /*!< in: number of the first archived
- log file to use in the recovery; the
- file will be searched from
- INNOBASE_LOG_ARCH_DIR specified in
- server config file */
-/********************************************************//**
-Completes recovery from archive. */
-UNIV_INTERN
-void
-recv_recovery_from_archive_finish(void);
-/*===================================*/
-#endif /* UNIV_LOG_ARCHIVE */
+
+/** Whether to store redo log records to the hash table */
+enum store_t {
+ /** Do not store redo log records. */
+ STORE_NO,
+ /** Store redo log records. */
+ STORE_YES,
+ /** Store redo log records if the tablespace exists. */
+ STORE_IF_EXISTS
+};
+
+
+/** Adds data from a new log block to the parsing buffer of recv_sys if
+recv_sys->parse_start_lsn is non-zero.
+@param[in] log_block log block to add
+@param[in] scanned_lsn lsn of how far we were able to find
+ data in this log block
+@return true if more data added */
+bool recv_sys_add_to_parsing_buf(const byte* log_block, lsn_t scanned_lsn);
+
+/** Parse log records from a buffer and optionally store them to a
+hash table to wait merging to file pages.
+@param[in] checkpoint_lsn the LSN of the latest checkpoint
+@param[in] store whether to store page operations
+@param[in] available_memory memory to read the redo logs
+@param[in] apply whether to apply the records
+@return whether MLOG_CHECKPOINT record was seen the first time,
+or corruption was noticed */
+bool recv_parse_log_recs(
+ lsn_t checkpoint_lsn,
+ store_t* store,
+ ulint available_memory,
+ bool apply);
+
+/** Moves the parsing buffer data left to the buffer start */
+void recv_sys_justify_left_parsing_buf();
+
+/** Report optimized DDL operation (without redo log),
+corresponding to MLOG_INDEX_LOAD.
+@param[in] space_id tablespace identifier
+*/
+extern void (*log_optimized_ddl_op)(ulint space_id);
+
+/** Report backup-unfriendly TRUNCATE operation (with separate log file),
+corresponding to MLOG_TRUNCATE. */
+extern void (*log_truncate)();
+
+/** Report an operation to create, delete, or rename a file during backup.
+@param[in] space_id tablespace identifier
+@param[in] flags tablespace flags (NULL if not create)
+@param[in] name file name (not NUL-terminated)
+@param[in] len length of name, in bytes
+@param[in] new_name new file name (NULL if not rename)
+@param[in] new_len length of new_name, in bytes (0 if NULL) */
+extern void (*log_file_op)(ulint space_id, const byte* flags,
+ const byte* name, ulint len,
+ const byte* new_name, ulint new_len);
/** Block of log record data */
struct recv_data_t{
@@ -319,7 +179,7 @@ struct recv_data_t{
/** Stored log record struct */
struct recv_t{
- byte type; /*!< log record type */
+ mlog_id_t type; /*!< log record type */
ulint len; /*!< log record body length in bytes */
recv_data_t* data; /*!< chain of blocks containing the log record
body */
@@ -335,52 +195,40 @@ struct recv_t{
rec_list;/*!< list of log records for this page */
};
-/** States of recv_addr_t */
-enum recv_addr_state {
- /** not yet processed */
- RECV_NOT_PROCESSED,
- /** page is being read */
- RECV_BEING_READ,
- /** log records are being applied on the page */
- RECV_BEING_PROCESSED,
- /** log records have been applied on the page, or they have
- been discarded because the tablespace does not exist */
- RECV_PROCESSED
-};
-
-/** Hashed page file address struct */
-struct recv_addr_t{
- enum recv_addr_state state;
- /*!< recovery state of the page */
- unsigned space:32;/*!< space id */
- unsigned page_no:32;/*!< page number */
- UT_LIST_BASE_NODE_T(recv_t)
- rec_list;/*!< list of log records for this page */
- hash_node_t addr_hash;/*!< hash node in the hash bucket chain */
-};
-
struct recv_dblwr_t {
- void add(byte* page);
+ /** Add a page frame to the doublewrite recovery buffer. */
+ void add(byte* page) {
+ pages.push_back(page);
+ }
- byte* find_page(ulint space_id, ulint page_no);
+ /** Find a doublewrite copy of a page.
+ @param[in] space_id tablespace identifier
+ @param[in] page_no page number
+ @return page frame
+ @retval NULL if no page was found */
+ const byte* find_page(ulint space_id, ulint page_no);
- std::list<byte *> pages; /* Pages from double write buffer */
+ typedef std::list<byte*, ut_allocator<byte*> > list;
- void operator() () {
- pages.clear();
- }
+ /** Recovered doublewrite buffer page frames */
+ list pages;
};
/** Recovery system data structure */
struct recv_sys_t{
-#ifndef UNIV_HOTBACKUP
ib_mutex_t mutex; /*!< mutex protecting the fields apply_log_recs,
n_addrs, and the state field in each recv_addr
struct */
ib_mutex_t writer_mutex;/*!< mutex coordinating
flushing between recv_writer_thread and
the recovery thread. */
-#endif /* !UNIV_HOTBACKUP */
+ os_event_t flush_start;/*!< event to acticate
+ page cleaner threads */
+ os_event_t flush_end;/*!< event to signal that the page
+ cleaner has finished the request */
+ buf_flush_t flush_type;/*!< type of the flush request.
+ BUF_FLUSH_LRU: flush end of LRU, keeping free blocks.
+ BUF_FLUSH_LIST: flush all of blocks. */
ibool apply_log_recs;
/*!< this is TRUE when log rec application to
pages is allowed; this flag tells the
@@ -389,16 +237,6 @@ struct recv_sys_t{
ibool apply_batch_on;
/*!< this is TRUE when a log rec application
batch is running */
- lsn_t lsn; /*!< log sequence number */
- ulint last_log_buf_size;
- /*!< size of the log buffer when the database
- last time wrote to the log */
- byte* last_block;
- /*!< possible incomplete last recovered log
- block */
- byte* last_block_buf_start;
- /*!< the nonaligned start address of the
- preceding buffer */
byte* buf; /*!< buffer for parsing log records */
ulint len; /*!< amount of data in buf */
lsn_t parse_start_lsn;
@@ -418,28 +256,39 @@ struct recv_sys_t{
lsn_t recovered_lsn;
/*!< the log records have been parsed up to
this lsn */
- lsn_t limit_lsn;/*!< recovery should be made at most
- up to this lsn */
- ibool found_corrupt_log;
- /*!< this is set to TRUE if we during log
- scan find a corrupt log block, or a corrupt
- log record, or there is a log parsing
- buffer overflow */
+ bool found_corrupt_log;
+ /*!< set when finding a corrupt log
+ block or record, or there is a log
+ parsing buffer overflow */
+ bool found_corrupt_fs;
+ /*!< set when an inconsistency with
+ the file system contents is detected
+ during log scan or apply */
+ lsn_t mlog_checkpoint_lsn;
+ /*!< the LSN of a MLOG_CHECKPOINT
+ record, or 0 if none was parsed */
/** the time when progress was last reported */
time_t progress_time;
-#ifdef UNIV_LOG_ARCHIVE
- log_group_t* archive_group;
- /*!< in archive recovery: the log group whose
- archive is read */
-#endif /* !UNIV_LOG_ARCHIVE */
mem_heap_t* heap; /*!< memory heap of log records and file
addresses*/
hash_table_t* addr_hash;/*!< hash table of file addresses of pages */
ulint n_addrs;/*!< number of not processed hashed file
addresses in the hash table */
+ /** Undo tablespaces for which truncate has been logged
+ (indexed by id - srv_undo_space_id_start) */
+ struct trunc {
+ /** log sequence number of MLOG_FILE_CREATE2, or 0 if none */
+ lsn_t lsn;
+ /** truncated size of the tablespace, or 0 if not truncated */
+ unsigned pages;
+ } truncated_undo_spaces[127];
+
recv_dblwr_t dblwr;
+ /** Lastly added LSN to the hash table of log records. */
+ lsn_t last_stored_lsn;
+
/** Determine whether redo log recovery progress should be reported.
@param[in] time the current time
@return whether progress should be reported
@@ -461,7 +310,7 @@ extern recv_sys_t* recv_sys;
/** TRUE when applying redo log records during crash recovery; FALSE
otherwise. Note that this is FALSE while a background thread is
rolling back incomplete transactions. */
-extern ibool recv_recovery_on;
+extern volatile bool recv_recovery_on;
/** If the following is TRUE, the buffer pool file pages must be invalidated
after recovery and no ibuf operations are allowed; this becomes TRUE if
the log record hash table becomes too full, and log records must be merged
@@ -471,25 +320,19 @@ buffer pool before the pages have been recovered to the up-to-date state.
TRUE means that recovery is running and no operations on the log files
are allowed yet: the variable name is misleading. */
-extern ibool recv_no_ibuf_operations;
+extern bool recv_no_ibuf_operations;
/** TRUE when recv_init_crash_recovery() has been called. */
-extern ibool recv_needed_recovery;
+extern bool recv_needed_recovery;
#ifdef UNIV_DEBUG
/** TRUE if writing to the redo log (mtr_commit) is forbidden.
Protected by log_sys->mutex. */
-extern ibool recv_no_log_write;
+extern bool recv_no_log_write;
#endif /* UNIV_DEBUG */
/** TRUE if buf_page_is_corrupted() should check if the log sequence
number (FIL_PAGE_LSN) is in the future. Initially FALSE, and set by
-recv_recovery_from_checkpoint_start_func(). */
-extern ibool recv_lsn_checks_on;
-#ifdef UNIV_HOTBACKUP
-/** TRUE when the redo log is being backed up */
-extern ibool recv_is_making_a_backup;
-#endif /* UNIV_HOTBACKUP */
-/** Maximum page number encountered in the redo log */
-extern ulint recv_max_parsed_page_no;
+recv_recovery_from_checkpoint_start(). */
+extern bool recv_lsn_checks_on;
/** Size of the parsing buffer; it must accommodate RECV_SCAN_SIZE many
times! */
@@ -499,26 +342,22 @@ times! */
roll-forward */
#define RECV_SCAN_SIZE (4 * UNIV_PAGE_SIZE)
-/** This many frames must be left free in the buffer pool when we scan
-the log and store the scanned log records in the buffer pool: we will
-use these free frames to read in pages when we start applying the
-log records to the database. */
-extern ulint recv_n_pool_free_frames;
-
-/******************************************************//**
-Checks the 4-byte checksum to the trailer checksum field of a log
-block. We also accept a log block in the old format before
-InnoDB-3.23.52 where the checksum field contains the log block number.
-@return TRUE if ok, or if the log block may be in the format of InnoDB
-version predating 3.23.52 */
-ibool
-log_block_checksum_is_ok_or_old_format(
-/*===================================*/
- const byte* block, /*!< in: pointer to a log block */
- bool print_err); /*!< in print error ? */
-
-#ifndef UNIV_NONINL
-#include "log0recv.ic"
-#endif
+/** This is a low level function for the recovery system
+to create a page which has buffered intialized redo log records.
+@param[in] page_id page to be created using redo logs
+@return whether the page creation successfully */
+buf_block_t* recv_recovery_create_page_low(const page_id_t page_id);
+
+/** Recovery system creates a page which has buffered intialized
+redo log records.
+@param[in] page_id page to be created using redo logs
+@return block which contains page was initialized */
+inline buf_block_t* recv_recovery_create_page(const page_id_t page_id)
+{
+ if (UNIV_LIKELY(!recv_recovery_on))
+ return NULL;
+
+ return recv_recovery_create_page_low(page_id);
+}
#endif
diff --git a/storage/innobase/include/log0recv.ic b/storage/innobase/include/log0recv.ic
deleted file mode 100644
index 332d656255f..00000000000
--- a/storage/innobase/include/log0recv.ic
+++ /dev/null
@@ -1,53 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1997, 2009, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/log0recv.ic
-Recovery
-
-Created 9/20/1997 Heikki Tuuri
-*******************************************************/
-
-#include "univ.i"
-
-/*******************************************************************//**
-Returns TRUE if recovery is currently running.
-@return recv_recovery_on */
-UNIV_INLINE
-ibool
-recv_recovery_is_on(void)
-/*=====================*/
-{
- return(recv_recovery_on);
-}
-
-#ifdef UNIV_LOG_ARCHIVE
-/** TRUE when applying redo log records from an archived log file */
-extern ibool recv_recovery_from_backup_on;
-
-/*******************************************************************//**
-Returns TRUE if recovery from backup is currently running.
-@return recv_recovery_from_backup_on */
-UNIV_INLINE
-ibool
-recv_recovery_from_backup_is_on(void)
-/*=================================*/
-{
- return(recv_recovery_from_backup_on);
-}
-#endif /* UNIV_LOG_ARCHIVE */
diff --git a/storage/innobase/include/row0import.ic b/storage/innobase/include/log0types.h
index 75b45e7710a..56faa7467cf 100644
--- a/storage/innobase/include/row0import.ic
+++ b/storage/innobase/include/log0types.h
@@ -1,6 +1,12 @@
/*****************************************************************************
-Copyright (c) 2012, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved.
+
+Portions of this file contain modifications contributed and copyrighted by
+Google, Inc. Those modifications are gratefully acknowledged and are described
+briefly in the InnoDB documentation. The contributions by Google are
+incorporated with their permission, and subject to the conditions contained in
+the file COPYING.Google.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -17,9 +23,28 @@ this program; if not, write to the Free Software Foundation, Inc.,
*****************************************************************************/
/**************************************************//**
-@file include/row0import.ic
-
-Import tablespace inline functions.
+@file include/log0types.h
+Log types
-Created 2012-02-08 Sunny Bains
+Created 2013-03-15 Sunny Bains
*******************************************************/
+
+#ifndef log0types_h
+#define log0types_h
+
+#include "univ.i"
+
+/* Type used for all log sequence number storage and arithmetics */
+typedef ib_uint64_t lsn_t;
+
+#define LSN_MAX IB_UINT64_MAX
+
+#define LSN_PF UINT64PF
+
+/** The redo log manager */
+struct RedoLog;
+
+/** The recovery implementation */
+struct redo_recover_t;
+
+#endif /* log0types_h */
diff --git a/storage/innobase/include/mach0data.h b/storage/innobase/include/mach0data.h
index 24eee4a0071..8141c8a91e0 100644
--- a/storage/innobase/include/mach0data.h
+++ b/storage/innobase/include/mach0data.h
@@ -1,6 +1,7 @@
/*****************************************************************************
Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2017, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -27,10 +28,11 @@ Created 11/28/1995 Heikki Tuuri
#ifndef mach0data_h
#define mach0data_h
+#include "univ.i"
+
#ifndef UNIV_INNOCHECKSUM
-#include "univ.i"
-#include "ut0byte.h"
+#include "mtr0types.h"
/* The data and all fields are always stored in a database file
in the same format: ascii, big-endian, ... .
@@ -45,15 +47,14 @@ mach_write_to_1(
/*============*/
byte* b, /*!< in: pointer to byte where to store */
ulint n); /*!< in: ulint integer to be stored, >= 0, < 256 */
-/********************************************************//**
-The following function is used to fetch data from one byte.
-@return ulint integer, >= 0, < 256 */
+/** The following function is used to fetch data from one byte.
+@param[in] b pointer to a byte to read
+@return ulint integer, >= 0, < 256 */
UNIV_INLINE
-ulint
+uint8_t
mach_read_from_1(
-/*=============*/
- const byte* b) /*!< in: pointer to byte */
- MY_ATTRIBUTE((nonnull, pure));
+ const byte* b)
+ MY_ATTRIBUTE((warn_unused_result));
/*******************************************************//**
The following function is used to store data in two consecutive
bytes. We store the most significant byte to the lower address. */
@@ -63,22 +64,23 @@ mach_write_to_2(
/*============*/
byte* b, /*!< in: pointer to two bytes where to store */
ulint n); /*!< in: ulint integer to be stored, >= 0, < 64k */
-/********************************************************//**
-The following function is used to fetch data from two consecutive
+#endif /* !UNIV_INNOCHECKSUM */
+/** The following function is used to fetch data from 2 consecutive
bytes. The most significant byte is at the lowest address.
-@return ulint integer, >= 0, < 64k */
+@param[in] b pointer to 2 bytes where to store
+@return 2-byte integer, >= 0, < 64k */
UNIV_INLINE
-ulint
+uint16_t
mach_read_from_2(
-/*=============*/
- const byte* b) /*!< in: pointer to two bytes */
- MY_ATTRIBUTE((nonnull, pure));
+ const byte* b)
+ MY_ATTRIBUTE((warn_unused_result));
+#ifndef UNIV_INNOCHECKSUM
/********************************************************//**
The following function is used to convert a 16-bit data item
to the canonical format, for fast bytewise equality test
against memory.
-@return 16-bit integer in canonical format */
+@return 16-bit integer in canonical format */
UNIV_INLINE
uint16
mach_encode_2(
@@ -89,7 +91,7 @@ mach_encode_2(
The following function is used to convert a 16-bit data item
from the canonical format, for fast bytewise equality test
against memory.
-@return integer in machine-dependent format */
+@return integer in machine-dependent format */
UNIV_INLINE
ulint
mach_decode_2(
@@ -105,16 +107,15 @@ mach_write_to_3(
/*============*/
byte* b, /*!< in: pointer to 3 bytes where to store */
ulint n); /*!< in: ulint integer to be stored */
-/********************************************************//**
-The following function is used to fetch data from 3 consecutive
+/** The following function is used to fetch data from 3 consecutive
bytes. The most significant byte is at the lowest address.
-@return ulint integer */
+@param[in] b pointer to 3 bytes to read
+@return 32 bit integer */
UNIV_INLINE
-ulint
+uint32_t
mach_read_from_3(
-/*=============*/
- const byte* b) /*!< in: pointer to 3 bytes */
- MY_ATTRIBUTE((nonnull, pure));
+ const byte* b)
+ MY_ATTRIBUTE((warn_unused_result));
/*******************************************************//**
The following function is used to store data in four consecutive
bytes. We store the most significant byte to the lowest address. */
@@ -124,19 +125,18 @@ mach_write_to_4(
/*============*/
byte* b, /*!< in: pointer to four bytes where to store */
ulint n); /*!< in: ulint integer to be stored */
-/********************************************************//**
-The following function is used to fetch data from 4 consecutive
+/** The following function is used to fetch data from 4 consecutive
bytes. The most significant byte is at the lowest address.
-@return ulint integer */
+@param[in] b pointer to 4 bytes to read
+@return 32 bit integer */
UNIV_INLINE
-ulint
+uint32_t
mach_read_from_4(
-/*=============*/
- const byte* b) /*!< in: pointer to four bytes */
- MY_ATTRIBUTE((nonnull, pure));
+ const byte* b)
+ MY_ATTRIBUTE((warn_unused_result));
/*********************************************************//**
Writes a ulint in a compressed form (1..5 bytes).
-@return stored size in bytes */
+@return stored size in bytes */
UNIV_INLINE
ulint
mach_write_compressed(
@@ -145,22 +145,21 @@ mach_write_compressed(
ulint n); /*!< in: ulint integer to be stored */
/*********************************************************//**
Returns the size of an ulint when written in the compressed form.
-@return compressed size in bytes */
+@return compressed size in bytes */
UNIV_INLINE
ulint
mach_get_compressed_size(
/*=====================*/
ulint n) /*!< in: ulint integer to be stored */
MY_ATTRIBUTE((const));
-/*********************************************************//**
-Reads a ulint in a compressed form.
-@return read integer */
-UNIV_INLINE
-ulint
-mach_read_compressed(
-/*=================*/
- const byte* b) /*!< in: pointer to memory from where to read */
- MY_ATTRIBUTE((nonnull, pure));
+/** Read a 32-bit integer in a compressed form.
+@param[in,out] b pointer to memory where to read;
+advanced by the number of bytes consumed
+@return unsigned value */
+UNIV_INLINE
+ib_uint32_t
+mach_read_next_compressed(
+ const byte** b);
/*******************************************************//**
The following function is used to store data in 6 consecutive
bytes. We store the most significant byte to the lowest address. */
@@ -173,13 +172,13 @@ mach_write_to_6(
/********************************************************//**
The following function is used to fetch data from 6 consecutive
bytes. The most significant byte is at the lowest address.
-@return 48-bit integer */
+@return 48-bit integer */
UNIV_INLINE
ib_uint64_t
mach_read_from_6(
/*=============*/
const byte* b) /*!< in: pointer to 6 bytes */
- MY_ATTRIBUTE((nonnull, pure));
+ MY_ATTRIBUTE((warn_unused_result));
/*******************************************************//**
The following function is used to store data in 7 consecutive
bytes. We store the most significant byte to the lowest address. */
@@ -192,13 +191,13 @@ mach_write_to_7(
/********************************************************//**
The following function is used to fetch data from 7 consecutive
bytes. The most significant byte is at the lowest address.
-@return 56-bit integer */
+@return 56-bit integer */
UNIV_INLINE
ib_uint64_t
mach_read_from_7(
/*=============*/
const byte* b) /*!< in: pointer to 7 bytes */
- MY_ATTRIBUTE((nonnull, pure));
+ MY_ATTRIBUTE((warn_unused_result));
/*******************************************************//**
The following function is used to store data in 8 consecutive
bytes. We store the most significant byte to the lowest address. */
@@ -211,97 +210,77 @@ mach_write_to_8(
/********************************************************//**
The following function is used to fetch data from 8 consecutive
bytes. The most significant byte is at the lowest address.
-@return 64-bit integer */
+@return 64-bit integer */
UNIV_INLINE
ib_uint64_t
mach_read_from_8(
/*=============*/
const byte* b) /*!< in: pointer to 8 bytes */
- MY_ATTRIBUTE((nonnull, pure));
+ MY_ATTRIBUTE((warn_unused_result));
/*********************************************************//**
Writes a 64-bit integer in a compressed form (5..9 bytes).
-@return size in bytes */
+@return size in bytes */
UNIV_INLINE
ulint
-mach_ull_write_compressed(
+mach_u64_write_compressed(
/*======================*/
byte* b, /*!< in: pointer to memory where to store */
ib_uint64_t n); /*!< in: 64-bit integer to be stored */
-/*********************************************************//**
-Returns the size of a 64-bit integer when written in the compressed form.
-@return compressed size in bytes */
-UNIV_INLINE
-ulint
-mach_ull_get_compressed_size(
-/*=========================*/
- ib_uint64_t n); /*!< in: 64-bit integer to be stored */
-/*********************************************************//**
-Reads a 64-bit integer in a compressed form.
-@return the value read */
+/** Read a 64-bit integer in a compressed form.
+@param[in,out] b pointer to memory where to read;
+advanced by the number of bytes consumed
+@return unsigned value */
UNIV_INLINE
ib_uint64_t
-mach_ull_read_compressed(
-/*=====================*/
- const byte* b) /*!< in: pointer to memory from where to read */
- MY_ATTRIBUTE((nonnull, pure));
+mach_u64_read_next_compressed(
+ const byte** b);
/*********************************************************//**
Writes a 64-bit integer in a compressed form (1..11 bytes).
-@return size in bytes */
+@return size in bytes */
UNIV_INLINE
ulint
-mach_ull_write_much_compressed(
+mach_u64_write_much_compressed(
/*===========================*/
byte* b, /*!< in: pointer to memory where to store */
ib_uint64_t n); /*!< in: 64-bit integer to be stored */
/*********************************************************//**
-Returns the size of a 64-bit integer when written in the compressed form.
-@return compressed size in bytes */
-UNIV_INLINE
-ulint
-mach_ull_get_much_compressed_size(
-/*==============================*/
- ib_uint64_t n) /*!< in: 64-bit integer to be stored */
- MY_ATTRIBUTE((const));
-/*********************************************************//**
Reads a 64-bit integer in a compressed form.
-@return the value read */
+@return the value read */
UNIV_INLINE
ib_uint64_t
-mach_ull_read_much_compressed(
+mach_u64_read_much_compressed(
/*==========================*/
const byte* b) /*!< in: pointer to memory from where to read */
- MY_ATTRIBUTE((nonnull, pure));
-/*********************************************************//**
-Reads a ulint in a compressed form if the log record fully contains it.
-@return pointer to end of the stored field, NULL if not complete */
-UNIV_INTERN
-byte*
+ MY_ATTRIBUTE((warn_unused_result));
+/** Read a 32-bit integer in a compressed form.
+@param[in,out] ptr pointer to memory where to read;
+advanced by the number of bytes consumed, or set NULL if out of space
+@param[in] end_ptr end of the buffer
+@return unsigned value */
+ib_uint32_t
mach_parse_compressed(
-/*==================*/
- byte* ptr, /*!< in: pointer to buffer from where to read */
- byte* end_ptr,/*!< in: pointer to end of the buffer */
- ulint* val); /*!< out: read value */
-/*********************************************************//**
-Reads a 64-bit integer in a compressed form
-if the log record fully contains it.
-@return pointer to end of the stored field, NULL if not complete */
+ const byte** ptr,
+ const byte* end_ptr);
+/** Read a 64-bit integer in a compressed form.
+@param[in,out] ptr pointer to memory where to read;
+advanced by the number of bytes consumed, or set NULL if out of space
+@param[in] end_ptr end of the buffer
+@return unsigned value */
UNIV_INLINE
-byte*
-mach_ull_parse_compressed(
-/*======================*/
- byte* ptr, /*!< in: pointer to buffer from where to read */
- byte* end_ptr,/*!< in: pointer to end of the buffer */
- ib_uint64_t* val); /*!< out: read value */
-#ifndef UNIV_HOTBACKUP
+ib_uint64_t
+mach_u64_parse_compressed(
+ const byte** ptr,
+ const byte* end_ptr);
+
/*********************************************************//**
Reads a double. It is stored in a little-endian format.
-@return double read */
+@return double read */
UNIV_INLINE
double
mach_double_read(
/*=============*/
const byte* b) /*!< in: pointer to memory from where to read */
- MY_ATTRIBUTE((nonnull, pure));
+ MY_ATTRIBUTE((warn_unused_result));
/*********************************************************//**
Writes a double. It is stored in a little-endian format. */
UNIV_INLINE
@@ -312,13 +291,13 @@ mach_double_write(
double d); /*!< in: double */
/*********************************************************//**
Reads a float. It is stored in a little-endian format.
-@return float read */
+@return float read */
UNIV_INLINE
float
mach_float_read(
/*============*/
const byte* b) /*!< in: pointer to memory from where to read */
- MY_ATTRIBUTE((nonnull, pure));
+ MY_ATTRIBUTE((warn_unused_result));
/*********************************************************//**
Writes a float. It is stored in a little-endian format. */
UNIV_INLINE
@@ -329,14 +308,14 @@ mach_float_write(
float d); /*!< in: float */
/*********************************************************//**
Reads a ulint stored in the little-endian format.
-@return unsigned long int */
+@return unsigned long int */
UNIV_INLINE
ulint
mach_read_from_n_little_endian(
/*===========================*/
const byte* buf, /*!< in: from where to read */
ulint buf_size) /*!< in: from how many bytes to read */
- MY_ATTRIBUTE((nonnull, pure));
+ MY_ATTRIBUTE((warn_unused_result));
/*********************************************************//**
Writes a ulint in the little-endian format. */
UNIV_INLINE
@@ -348,13 +327,13 @@ mach_write_to_n_little_endian(
ulint n); /*!< in: unsigned long int to write */
/*********************************************************//**
Reads a ulint stored in the little-endian format.
-@return unsigned long int */
+@return unsigned long int */
UNIV_INLINE
ulint
mach_read_from_2_little_endian(
/*===========================*/
const byte* buf) /*!< in: from where to read */
- MY_ATTRIBUTE((nonnull, pure));
+ MY_ATTRIBUTE((warn_unused_result));
/*********************************************************//**
Writes a ulint in the little-endian format. */
UNIV_INLINE
@@ -366,7 +345,7 @@ mach_write_to_2_little_endian(
/*********************************************************//**
Convert integral type from storage byte order (big endian) to
host byte order.
-@return integer value */
+@return integer value */
UNIV_INLINE
ib_uint64_t
mach_read_int_type(
@@ -374,17 +353,6 @@ mach_read_int_type(
const byte* src, /*!< in: where to read from */
ulint len, /*!< in: length of src */
ibool unsigned_type); /*!< in: signed or unsigned flag */
-/***********************************************************//**
-Convert integral type from host byte order to (big-endian) storage
-byte order. */
-UNIV_INLINE
-void
-mach_write_int_type(
-/*================*/
- byte* dest, /*!< in: where to write*/
- const byte* src, /*!< in: where to read from */
- ulint len, /*!< in: length of src */
- bool usign); /*!< in: signed or unsigned flag */
/*************************************************************
Convert a ulonglong integer from host byte order to (big-endian)
@@ -398,21 +366,19 @@ mach_write_ulonglong(
ulint len, /*!< in: length of dest */
bool usign); /*!< in: signed or unsigned flag */
-/********************************************************//**
-Reads 1 - 4 bytes from a file page buffered in the buffer pool.
-@return value read */
+#endif /* !UNIV_INNOCHECKSUM */
+
+/** Read 1 to 4 bytes from a file page buffered in the buffer pool.
+@param[in] ptr pointer where to read
+@param[in] type MLOG_1BYTE, MLOG_2BYTES, or MLOG_4BYTES
+@return value read */
UNIV_INLINE
ulint
mach_read_ulint(
-/*============*/
- const byte* ptr, /*!< in: pointer from where to read */
- ulint type); /*!< in: MLOG_1BYTE, MLOG_2BYTES, MLOG_4BYTES */
+ const byte* ptr,
+ mlog_id_t type)
+ MY_ATTRIBUTE((warn_unused_result));
-#endif /* !UNIV_HOTBACKUP */
-#endif /* !UNIV_INNOCHECKSUM */
-
-#ifndef UNIV_NONINL
#include "mach0data.ic"
-#endif
#endif
diff --git a/storage/innobase/include/mach0data.ic b/storage/innobase/include/mach0data.ic
index c4837596f05..408044292a5 100644
--- a/storage/innobase/include/mach0data.ic
+++ b/storage/innobase/include/mach0data.ic
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1995, 2009, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1995, 2015, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2017, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
@@ -27,7 +27,7 @@ Created 11/28/1995 Heikki Tuuri
#ifndef UNIV_INNOCHECKSUM
-#include "ut0mem.h"
+#include "mtr0types.h"
/*******************************************************//**
The following function is used to store data in one byte. */
@@ -38,24 +38,12 @@ mach_write_to_1(
byte* b, /*!< in: pointer to byte where to store */
ulint n) /*!< in: ulint integer to be stored, >= 0, < 256 */
{
- ut_ad(b);
ut_ad((n & ~0xFFUL) == 0);
b[0] = (byte) n;
}
-#endif /* !UNIV_INNOCHECKSUM */
-/********************************************************//**
-The following function is used to fetch data from one byte.
-@return ulint integer, >= 0, < 256 */
-UNIV_INLINE
-ulint
-mach_read_from_1(
-/*=============*/
- const byte* b) /*!< in: pointer to byte */
-{
- return((ulint)(b[0]));
-}
+#endif /* !UNIV_INNOCHECKSUM */
/*******************************************************//**
The following function is used to store data in two consecutive
@@ -67,24 +55,33 @@ mach_write_to_2(
byte* b, /*!< in: pointer to two bytes where to store */
ulint n) /*!< in: ulint integer to be stored */
{
- ut_ad(b);
ut_ad((n & ~0xFFFFUL) == 0);
b[0] = (byte)(n >> 8);
b[1] = (byte)(n);
}
-/********************************************************//**
-The following function is used to fetch data from 2 consecutive
+/** The following function is used to fetch data from one byte.
+@param[in] b pointer to a byte to read
+@return ulint integer, >= 0, < 256 */
+UNIV_INLINE
+uint8_t
+mach_read_from_1(
+ const byte* b)
+{
+ return(uint8_t(*b));
+}
+
+/** The following function is used to fetch data from 2 consecutive
bytes. The most significant byte is at the lowest address.
-@return ulint integer */
+@param[in] b pointer to 2 bytes to read
+@return 2-byte integer, >= 0, < 64k */
UNIV_INLINE
-ulint
+uint16_t
mach_read_from_2(
-/*=============*/
- const byte* b) /*!< in: pointer to 2 bytes */
+ const byte* b)
{
- return(((ulint)(b[0]) << 8) | (ulint)(b[1]));
+ return(uint16_t(uint16_t(b[0]) << 8 | b[1]));
}
#ifndef UNIV_INNOCHECKSUM
@@ -93,7 +90,7 @@ mach_read_from_2(
The following function is used to convert a 16-bit data item
to the canonical format, for fast bytewise equality test
against memory.
-@return 16-bit integer in canonical format */
+@return 16-bit integer in canonical format */
UNIV_INLINE
uint16
mach_encode_2(
@@ -109,7 +106,7 @@ mach_encode_2(
The following function is used to convert a 16-bit data item
from the canonical format, for fast bytewise equality test
against memory.
-@return integer in machine-dependent format */
+@return integer in machine-dependent format */
UNIV_INLINE
ulint
mach_decode_2(
@@ -130,7 +127,6 @@ mach_write_to_3(
byte* b, /*!< in: pointer to 3 bytes where to store */
ulint n) /*!< in: ulint integer to be stored */
{
- ut_ad(b);
ut_ad((n & ~0xFFFFFFUL) == 0);
b[0] = (byte)(n >> 16);
@@ -138,22 +134,20 @@ mach_write_to_3(
b[2] = (byte)(n);
}
-/********************************************************//**
-The following function is used to fetch data from 3 consecutive
+/** The following function is used to fetch data from 3 consecutive
bytes. The most significant byte is at the lowest address.
-@return ulint integer */
+@param[in] b pointer to 3 bytes to read
+@return uint32_t integer */
UNIV_INLINE
-ulint
+uint32_t
mach_read_from_3(
-/*=============*/
- const byte* b) /*!< in: pointer to 3 bytes */
+ const byte* b)
{
- return( ((ulint)(b[0]) << 16)
- | ((ulint)(b[1]) << 8)
- | (ulint)(b[2])
+ return( (static_cast<uint32_t>(b[0]) << 16)
+ | (static_cast<uint32_t>(b[1]) << 8)
+ | static_cast<uint32_t>(b[2])
);
}
-
#endif /* !UNIV_INNOCHECKSUM */
/*******************************************************//**
@@ -166,28 +160,25 @@ mach_write_to_4(
byte* b, /*!< in: pointer to four bytes where to store */
ulint n) /*!< in: ulint integer to be stored */
{
- ut_ad(b);
-
b[0] = (byte)(n >> 24);
b[1] = (byte)(n >> 16);
b[2] = (byte)(n >> 8);
b[3] = (byte) n;
}
-/********************************************************//**
-The following function is used to fetch data from 4 consecutive
+/** The following function is used to fetch data from 4 consecutive
bytes. The most significant byte is at the lowest address.
-@return ulint integer */
+@param[in] b pointer to 4 bytes to read
+@return 32 bit integer */
UNIV_INLINE
-ulint
+uint32_t
mach_read_from_4(
-/*=============*/
- const byte* b) /*!< in: pointer to four bytes */
+ const byte* b)
{
- return( ((ulint)(b[0]) << 24)
- | ((ulint)(b[1]) << 16)
- | ((ulint)(b[2]) << 8)
- | (ulint)(b[3])
+ return( (static_cast<uint32_t>(b[0]) << 24)
+ | (static_cast<uint32_t>(b[1]) << 16)
+ | (static_cast<uint32_t>(b[2]) << 8)
+ | static_cast<uint32_t>(b[3])
);
}
@@ -200,7 +191,7 @@ the byte. If the most significant bit is zero, it means 1-byte storage,
else if the 2nd bit is 0, it means 2-byte storage, else if 3rd is 0,
it means 3-byte storage, else if 4th is 0, it means 4-byte storage,
else the storage is 5-byte.
-@return compressed size in bytes */
+@return compressed size in bytes */
UNIV_INLINE
ulint
mach_write_compressed(
@@ -208,22 +199,25 @@ mach_write_compressed(
byte* b, /*!< in: pointer to memory where to store */
ulint n) /*!< in: ulint integer (< 2^32) to be stored */
{
- ut_ad(b);
-
- if (n < 0x80UL) {
+ if (n < 0x80) {
+ /* 0nnnnnnn (7 bits) */
mach_write_to_1(b, n);
return(1);
- } else if (n < 0x4000UL) {
- mach_write_to_2(b, n | 0x8000UL);
+ } else if (n < 0x4000) {
+ /* 10nnnnnn nnnnnnnn (14 bits) */
+ mach_write_to_2(b, n | 0x8000);
return(2);
- } else if (n < 0x200000UL) {
- mach_write_to_3(b, n | 0xC00000UL);
+ } else if (n < 0x200000) {
+ /* 110nnnnn nnnnnnnn nnnnnnnn (21 bits) */
+ mach_write_to_3(b, n | 0xC00000);
return(3);
- } else if (n < 0x10000000UL) {
- mach_write_to_4(b, n | 0xE0000000UL);
+ } else if (n < 0x10000000) {
+ /* 1110nnnn nnnnnnnn nnnnnnnn nnnnnnnn (28 bits) */
+ mach_write_to_4(b, n | 0xE0000000);
return(4);
} else {
- mach_write_to_1(b, 0xF0UL);
+ /* 11110000 nnnnnnnn nnnnnnnn nnnnnnnn nnnnnnnn (32 bits) */
+ mach_write_to_1(b, 0xF0);
mach_write_to_4(b + 1, n);
return(5);
}
@@ -231,51 +225,106 @@ mach_write_compressed(
/*********************************************************//**
Returns the size of a ulint when written in the compressed form.
-@return compressed size in bytes */
+@return compressed size in bytes */
UNIV_INLINE
ulint
mach_get_compressed_size(
/*=====================*/
ulint n) /*!< in: ulint integer (< 2^32) to be stored */
{
- if (n < 0x80UL) {
+ if (n < 0x80) {
+ /* 0nnnnnnn (7 bits) */
return(1);
- } else if (n < 0x4000UL) {
+ } else if (n < 0x4000) {
+ /* 10nnnnnn nnnnnnnn (14 bits) */
return(2);
- } else if (n < 0x200000UL) {
+ } else if (n < 0x200000) {
+ /* 110nnnnn nnnnnnnn nnnnnnnn (21 bits) */
return(3);
- } else if (n < 0x10000000UL) {
+ } else if (n < 0x10000000) {
+ /* 1110nnnn nnnnnnnn nnnnnnnn nnnnnnnn (28 bits) */
return(4);
} else {
+ /* 11110000 nnnnnnnn nnnnnnnn nnnnnnnn nnnnnnnn (32 bits) */
return(5);
}
}
/*********************************************************//**
Reads a ulint in a compressed form.
-@return read integer (< 2^32) */
+@return read integer (< 2^32) */
UNIV_INLINE
ulint
mach_read_compressed(
/*=================*/
const byte* b) /*!< in: pointer to memory from where to read */
{
- ulint flag;
-
- flag = mach_read_from_1(b);
+ ulint val;
+
+ val = mach_read_from_1(b);
+
+ if (val < 0x80) {
+ /* 0nnnnnnn (7 bits) */
+ } else if (val < 0xC0) {
+ /* 10nnnnnn nnnnnnnn (14 bits) */
+ val = mach_read_from_2(b) & 0x3FFF;
+ ut_ad(val > 0x7F);
+ } else if (val < 0xE0) {
+ /* 110nnnnn nnnnnnnn nnnnnnnn (21 bits) */
+ val = mach_read_from_3(b) & 0x1FFFFF;
+ ut_ad(val > 0x3FFF);
+ } else if (val < 0xF0) {
+ /* 1110nnnn nnnnnnnn nnnnnnnn nnnnnnnn (28 bits) */
+ val = mach_read_from_4(b) & 0xFFFFFFF;
+ ut_ad(val > 0x1FFFFF);
+ } else {
+ /* 11110000 nnnnnnnn nnnnnnnn nnnnnnnn nnnnnnnn (32 bits) */
+ ut_ad(val == 0xF0);
+ val = mach_read_from_4(b + 1);
+ ut_ad(val > 0xFFFFFFF);
+ }
- if (flag < 0x80UL) {
- return(flag);
- } else if (flag < 0xC0UL) {
- return(mach_read_from_2(b) & 0x7FFFUL);
- } else if (flag < 0xE0UL) {
- return(mach_read_from_3(b) & 0x3FFFFFUL);
- } else if (flag < 0xF0UL) {
- return(mach_read_from_4(b) & 0x1FFFFFFFUL);
+ return(val);
+}
+
+/** Read a 32-bit integer in a compressed form.
+@param[in,out] b pointer to memory where to read;
+advanced by the number of bytes consumed
+@return unsigned value */
+UNIV_INLINE
+ib_uint32_t
+mach_read_next_compressed(
+ const byte** b)
+{
+ ulint val = mach_read_from_1(*b);
+
+ if (val < 0x80) {
+ /* 0nnnnnnn (7 bits) */
+ ++*b;
+ } else if (val < 0xC0) {
+ /* 10nnnnnn nnnnnnnn (14 bits) */
+ val = mach_read_from_2(*b) & 0x3FFF;
+ ut_ad(val > 0x7F);
+ *b += 2;
+ } else if (val < 0xE0) {
+ /* 110nnnnn nnnnnnnn nnnnnnnn (21 bits) */
+ val = mach_read_from_3(*b) & 0x1FFFFF;
+ ut_ad(val > 0x3FFF);
+ *b += 3;
+ } else if (val < 0xF0) {
+ /* 1110nnnn nnnnnnnn nnnnnnnn nnnnnnnn (28 bits) */
+ val = mach_read_from_4(*b) & 0xFFFFFFF;
+ ut_ad(val > 0x1FFFFF);
+ *b += 4;
} else {
- ut_ad(flag == 0xF0UL);
- return(mach_read_from_4(b + 1));
+ /* 11110000 nnnnnnnn nnnnnnnn nnnnnnnn nnnnnnnn (32 bits) */
+ ut_ad(val == 0xF0);
+ val = mach_read_from_4(*b + 1);
+ ut_ad(val > 0xFFFFFFF);
+ *b += 5;
}
+
+ return(static_cast<ib_uint32_t>(val));
}
/*******************************************************//**
@@ -288,8 +337,6 @@ mach_write_to_8(
void* b, /*!< in: pointer to 8 bytes where to store */
ib_uint64_t n) /*!< in: 64-bit integer to be stored */
{
- ut_ad(b);
-
mach_write_to_4(static_cast<byte*>(b), (ulint) (n >> 32));
mach_write_to_4(static_cast<byte*>(b) + 4, (ulint) n);
}
@@ -299,19 +346,20 @@ mach_write_to_8(
/********************************************************//**
The following function is used to fetch data from 8 consecutive
bytes. The most significant byte is at the lowest address.
-@return 64-bit integer */
+@return 64-bit integer */
UNIV_INLINE
ib_uint64_t
mach_read_from_8(
/*=============*/
const byte* b) /*!< in: pointer to 8 bytes */
{
- ib_uint64_t ull;
+ ib_uint64_t u64;
- ull = ((ib_uint64_t) mach_read_from_4(b)) << 32;
- ull |= (ib_uint64_t) mach_read_from_4(b + 4);
+ u64 = mach_read_from_4(b);
+ u64 <<= 32;
+ u64 |= mach_read_from_4(b + 4);
- return(ull);
+ return(u64);
}
#ifndef UNIV_INNOCHECKSUM
@@ -326,8 +374,6 @@ mach_write_to_7(
byte* b, /*!< in: pointer to 7 bytes where to store */
ib_uint64_t n) /*!< in: 56-bit integer */
{
- ut_ad(b);
-
mach_write_to_3(b, (ulint) (n >> 32));
mach_write_to_4(b + 3, (ulint) n);
}
@@ -335,7 +381,7 @@ mach_write_to_7(
/********************************************************//**
The following function is used to fetch data from 7 consecutive
bytes. The most significant byte is at the lowest address.
-@return 56-bit integer */
+@return 56-bit integer */
UNIV_INLINE
ib_uint64_t
mach_read_from_7(
@@ -355,8 +401,6 @@ mach_write_to_6(
byte* b, /*!< in: pointer to 6 bytes where to store */
ib_uint64_t n) /*!< in: 48-bit integer */
{
- ut_ad(b);
-
mach_write_to_2(b, (ulint) (n >> 32));
mach_write_to_4(b + 2, (ulint) n);
}
@@ -364,7 +408,7 @@ mach_write_to_6(
/********************************************************//**
The following function is used to fetch data from 6 consecutive
bytes. The most significant byte is at the lowest address.
-@return 48-bit integer */
+@return 48-bit integer */
UNIV_INLINE
ib_uint64_t
mach_read_from_6(
@@ -376,72 +420,50 @@ mach_read_from_6(
/*********************************************************//**
Writes a 64-bit integer in a compressed form (5..9 bytes).
-@return size in bytes */
+@return size in bytes */
UNIV_INLINE
ulint
-mach_ull_write_compressed(
+mach_u64_write_compressed(
/*======================*/
byte* b, /*!< in: pointer to memory where to store */
ib_uint64_t n) /*!< in: 64-bit integer to be stored */
{
- ulint size;
-
- ut_ad(b);
-
- size = mach_write_compressed(b, (ulint) (n >> 32));
+ ulint size = mach_write_compressed(b, (ulint) (n >> 32));
mach_write_to_4(b + size, (ulint) n);
return(size + 4);
}
-/*********************************************************//**
-Returns the size of a 64-bit integer when written in the compressed form.
-@return compressed size in bytes */
-UNIV_INLINE
-ulint
-mach_ull_get_compressed_size(
-/*=========================*/
- ib_uint64_t n) /*!< in: 64-bit integer to be stored */
-{
- return(4 + mach_get_compressed_size((ulint) (n >> 32)));
-}
-
-/*********************************************************//**
-Reads a 64-bit integer in a compressed form.
-@return the value read */
+/** Read a 64-bit integer in a compressed form.
+@param[in,out] b pointer to memory where to read;
+advanced by the number of bytes consumed
+@return unsigned value */
UNIV_INLINE
ib_uint64_t
-mach_ull_read_compressed(
-/*=====================*/
- const byte* b) /*!< in: pointer to memory from where to read */
+mach_u64_read_next_compressed(
+ const byte** b)
{
- ib_uint64_t n;
- ulint size;
-
- n = (ib_uint64_t) mach_read_compressed(b);
-
- size = mach_get_compressed_size((ulint) n);
-
- n <<= 32;
- n |= (ib_uint64_t) mach_read_from_4(b + size);
+ ib_uint64_t val;
- return(n);
+ val = mach_read_next_compressed(b);
+ val <<= 32;
+ val |= mach_read_from_4(*b);
+ *b += 4;
+ return(val);
}
/*********************************************************//**
Writes a 64-bit integer in a compressed form (1..11 bytes).
-@return size in bytes */
+@return size in bytes */
UNIV_INLINE
ulint
-mach_ull_write_much_compressed(
+mach_u64_write_much_compressed(
/*===========================*/
byte* b, /*!< in: pointer to memory where to store */
ib_uint64_t n) /*!< in: 64-bit integer to be stored */
{
ulint size;
- ut_ad(b);
-
if (!(n >> 32)) {
return(mach_write_compressed(b, (ulint) n));
}
@@ -455,92 +477,110 @@ mach_ull_write_much_compressed(
}
/*********************************************************//**
-Returns the size of a 64-bit integer when written in the compressed form.
-@return compressed size in bytes */
-UNIV_INLINE
-ulint
-mach_ull_get_much_compressed_size(
-/*==============================*/
- ib_uint64_t n) /*!< in: 64-bit integer to be stored */
-{
- if (!(n >> 32)) {
- return(mach_get_compressed_size((ulint) n));
- }
-
- return(1 + mach_get_compressed_size((ulint) (n >> 32))
- + mach_get_compressed_size((ulint) n & ULINT32_MASK));
-}
-
-/*********************************************************//**
Reads a 64-bit integer in a compressed form.
-@return the value read */
+@return the value read */
UNIV_INLINE
ib_uint64_t
-mach_ull_read_much_compressed(
+mach_u64_read_much_compressed(
/*==========================*/
const byte* b) /*!< in: pointer to memory from where to read */
{
ib_uint64_t n;
- ulint size;
- if (*b != (byte)0xFF) {
- n = 0;
- size = 0;
- } else {
- n = (ib_uint64_t) mach_read_compressed(b + 1);
-
- size = 1 + mach_get_compressed_size((ulint) n);
- n <<= 32;
+ if (*b != 0xFF) {
+ return(mach_read_compressed(b));
}
- n |= mach_read_compressed(b + size);
+ b++;
+ n = mach_read_next_compressed(&b);
+ n <<= 32;
+ n |= mach_read_compressed(b);
return(n);
}
-/*********************************************************//**
-Reads a 64-bit integer in a compressed form
-if the log record fully contains it.
-@return pointer to end of the stored field, NULL if not complete */
+/** Read a 64-bit integer in a compressed form.
+@param[in,out] b pointer to memory where to read;
+advanced by the number of bytes consumed
+@return unsigned value */
UNIV_INLINE
-byte*
-mach_ull_parse_compressed(
-/*======================*/
- byte* ptr, /* in: pointer to buffer from where to read */
- byte* end_ptr,/* in: pointer to end of the buffer */
- ib_uint64_t* val) /* out: read value */
-{
- ulint size;
-
- ut_ad(ptr);
- ut_ad(end_ptr);
- ut_ad(val);
-
- if (end_ptr < ptr + 5) {
-
- return(NULL);
+ib_uint64_t
+mach_read_next_much_compressed(
+ const byte** b)
+{
+ ib_uint64_t val = mach_read_from_1(*b);
+
+ if (val < 0x80) {
+ /* 0nnnnnnn (7 bits) */
+ ++*b;
+ } else if (val < 0xC0) {
+ /* 10nnnnnn nnnnnnnn (14 bits) */
+ val = mach_read_from_2(*b) & 0x3FFF;
+ ut_ad(val > 0x7F);
+ *b += 2;
+ } else if (val < 0xE0) {
+ /* 110nnnnn nnnnnnnn nnnnnnnn (21 bits) */
+ val = mach_read_from_3(*b) & 0x1FFFFF;
+ ut_ad(val > 0x3FFF);
+ *b += 3;
+ } else if (val < 0xF0) {
+ /* 1110nnnn nnnnnnnn nnnnnnnn nnnnnnnn (28 bits) */
+ val = mach_read_from_4(*b) & 0xFFFFFFF;
+ ut_ad(val > 0x1FFFFF);
+ *b += 4;
+ } else if (val == 0xF0) {
+ /* 11110000 nnnnnnnn nnnnnnnn nnnnnnnn nnnnnnnn (32 bits) */
+ val = mach_read_from_4(*b + 1);
+ ut_ad(val > 0xFFFFFFF);
+ *b += 5;
+ } else {
+ /* 11111111 followed by up to 64 bits */
+ ut_ad(val == 0xFF);
+ ++*b;
+ val = mach_read_next_compressed(b);
+ ut_ad(val > 0);
+ val <<= 32;
+ val |= mach_read_next_compressed(b);
}
- *val = mach_read_compressed(ptr);
+ return(val);
+}
- size = mach_get_compressed_size((ulint) *val);
+/** Read a 64-bit integer in a compressed form.
+@param[in,out] ptr pointer to memory where to read;
+advanced by the number of bytes consumed, or set NULL if out of space
+@param[in] end_ptr end of the buffer
+@return unsigned value */
+UNIV_INLINE
+ib_uint64_t
+mach_u64_parse_compressed(
+ const byte** ptr,
+ const byte* end_ptr)
+{
+ ib_uint64_t val = 0;
- ptr += size;
+ if (end_ptr < *ptr + 5) {
+ *ptr = NULL;
+ return(val);
+ }
- if (end_ptr < ptr + 4) {
+ val = mach_read_next_compressed(ptr);
- return(NULL);
+ if (end_ptr < *ptr + 4) {
+ *ptr = NULL;
+ return(val);
}
- *val <<= 32;
- *val |= mach_read_from_4(ptr);
+ val <<= 32;
+ val |= mach_read_from_4(*ptr);
+ *ptr += 4;
- return(ptr + 4);
+ return(val);
}
-#ifndef UNIV_HOTBACKUP
+
/*********************************************************//**
Reads a double. It is stored in a little-endian format.
-@return double read */
+@return double read */
UNIV_INLINE
double
mach_double_read(
@@ -589,7 +629,7 @@ mach_double_write(
/*********************************************************//**
Reads a float. It is stored in a little-endian format.
-@return float read */
+@return float read */
UNIV_INLINE
float
mach_float_read(
@@ -638,7 +678,7 @@ mach_float_write(
/*********************************************************//**
Reads a ulint stored in the little-endian format.
-@return unsigned long int */
+@return unsigned long int */
UNIV_INLINE
ulint
mach_read_from_n_little_endian(
@@ -702,7 +742,7 @@ mach_write_to_n_little_endian(
/*********************************************************//**
Reads a ulint stored in the little-endian format.
-@return unsigned long int */
+@return unsigned long int */
UNIV_INLINE
ulint
mach_read_from_2_little_endian(
@@ -734,7 +774,7 @@ mach_write_to_2_little_endian(
/*********************************************************//**
Convert integral type from storage byte order (big endian) to
host byte order.
-@return integer value */
+@return integer value */
UNIV_INLINE
ib_uint64_t
mach_read_int_type(
@@ -745,8 +785,8 @@ mach_read_int_type(
{
/* XXX this can be optimized on big-endian machines */
- ullint ret;
- uint i;
+ uintmax_t ret;
+ uint i;
if (unsigned_type || (src[0] & 0x80)) {
@@ -799,29 +839,6 @@ mach_swap_byte_order(
}
/*************************************************************
-Convert integral type from host byte order (big-endian) storage
-byte order. */
-UNIV_INLINE
-void
-mach_write_int_type(
-/*================*/
- byte* dest, /*!< in: where to write */
- const byte* src, /*!< in: where to read from */
- ulint len, /*!< in: length of src */
- bool usign) /*!< in: signed or unsigned flag */
-{
-#ifdef WORDS_BIGENDIAN
- memcpy(dest, src, len);
-#else
- mach_swap_byte_order(dest, src, len);
-#endif /* WORDS_BIGENDIAN */
-
- if (!usign) {
- *dest ^= 0x80;
- }
-}
-
-/*************************************************************
Convert a ulonglong integer from host byte order to (big-endian)
storage byte order. */
UNIV_INLINE
@@ -847,32 +864,30 @@ mach_write_ulonglong(
*dest ^= 0x80;
}
}
-#endif /* !UNIV_HOTBACKUP */
+
#endif /* !UNIV_INNOCHECKSUM */
-#ifndef UNIV_HOTBACKUP
-/********************************************************//**
-Reads 1 - 4 bytes from a file page buffered in the buffer pool.
-@return value read */
+/** Read 1 to 4 bytes from a file page buffered in the buffer pool.
+@param[in] ptr pointer where to read
+@param[in] type MLOG_1BYTE, MLOG_2BYTES, or MLOG_4BYTES
+@return value read */
UNIV_INLINE
ulint
mach_read_ulint(
-/*============*/
- const byte* ptr, /*!< in: pointer from where to read */
- ulint type) /*!< in: 1,2 or 4 bytes */
+ const byte* ptr,
+ mlog_id_t type)
{
switch (type) {
- case 1:
+ case MLOG_1BYTE:
return(mach_read_from_1(ptr));
- case 2:
+ case MLOG_2BYTES:
return(mach_read_from_2(ptr));
- case 4:
+ case MLOG_4BYTES:
return(mach_read_from_4(ptr));
default:
- ut_error;
+ break;
}
+ ut_error;
return(0);
}
-
-#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innobase/include/mem0dbg.h b/storage/innobase/include/mem0dbg.h
deleted file mode 100644
index 448e5f80f5d..00000000000
--- a/storage/innobase/include/mem0dbg.h
+++ /dev/null
@@ -1,150 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1994, 2010, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/mem0dbg.h
-The memory management: the debug code. This is not a compilation module,
-but is included in mem0mem.* !
-
-Created 6/9/1994 Heikki Tuuri
-*******************************************************/
-
-/* In the debug version each allocated field is surrounded with
-check fields whose sizes are given below */
-
-#ifdef UNIV_MEM_DEBUG
-# ifndef UNIV_HOTBACKUP
-/* The mutex which protects in the debug version the hash table
-containing the list of live memory heaps, and also the global
-variables in mem0dbg.cc. */
-extern ib_mutex_t mem_hash_mutex;
-# endif /* !UNIV_HOTBACKUP */
-
-#define MEM_FIELD_HEADER_SIZE ut_calc_align(2 * sizeof(ulint),\
- UNIV_MEM_ALIGNMENT)
-#define MEM_FIELD_TRAILER_SIZE sizeof(ulint)
-#else
-#define MEM_FIELD_HEADER_SIZE 0
-#endif
-
-
-/* Space needed when allocating for a user a field of
-length N. The space is allocated only in multiples of
-UNIV_MEM_ALIGNMENT. In the debug version there are also
-check fields at the both ends of the field. */
-#ifdef UNIV_MEM_DEBUG
-#define MEM_SPACE_NEEDED(N) ut_calc_align((N) + MEM_FIELD_HEADER_SIZE\
- + MEM_FIELD_TRAILER_SIZE, UNIV_MEM_ALIGNMENT)
-#else
-#define MEM_SPACE_NEEDED(N) ut_calc_align((N), UNIV_MEM_ALIGNMENT)
-#endif
-
-#if defined UNIV_MEM_DEBUG || defined UNIV_DEBUG
-/***************************************************************//**
-Checks a memory heap for consistency and prints the contents if requested.
-Outputs the sum of sizes of buffers given to the user (only in
-the debug version), the physical size of the heap and the number of
-blocks in the heap. In case of error returns 0 as sizes and number
-of blocks. */
-UNIV_INTERN
-void
-mem_heap_validate_or_print(
-/*=======================*/
- mem_heap_t* heap, /*!< in: memory heap */
- byte* top, /*!< in: calculate and validate only until
- this top pointer in the heap is reached,
- if this pointer is NULL, ignored */
- ibool print, /*!< in: if TRUE, prints the contents
- of the heap; works only in
- the debug version */
- ibool* error, /*!< out: TRUE if error */
- ulint* us_size,/*!< out: allocated memory
- (for the user) in the heap,
- if a NULL pointer is passed as this
- argument, it is ignored; in the
- non-debug version this is always -1 */
- ulint* ph_size,/*!< out: physical size of the heap,
- if a NULL pointer is passed as this
- argument, it is ignored */
- ulint* n_blocks); /*!< out: number of blocks in the heap,
- if a NULL pointer is passed as this
- argument, it is ignored */
-/**************************************************************//**
-Validates the contents of a memory heap.
-@return TRUE if ok */
-UNIV_INTERN
-ibool
-mem_heap_validate(
-/*==============*/
- mem_heap_t* heap); /*!< in: memory heap */
-#endif /* UNIV_MEM_DEBUG || UNIV_DEBUG */
-#ifdef UNIV_DEBUG
-/**************************************************************//**
-Checks that an object is a memory heap (or a block of it)
-@return TRUE if ok */
-UNIV_INTERN
-ibool
-mem_heap_check(
-/*===========*/
- mem_heap_t* heap); /*!< in: memory heap */
-#endif /* UNIV_DEBUG */
-#ifdef UNIV_MEM_DEBUG
-/*****************************************************************//**
-TRUE if no memory is currently allocated.
-@return TRUE if no heaps exist */
-UNIV_INTERN
-ibool
-mem_all_freed(void);
-/*===============*/
-/*****************************************************************//**
-Validates the dynamic memory
-@return TRUE if error */
-UNIV_INTERN
-ibool
-mem_validate_no_assert(void);
-/*=========================*/
-/************************************************************//**
-Validates the dynamic memory
-@return TRUE if ok */
-UNIV_INTERN
-ibool
-mem_validate(void);
-/*===============*/
-#endif /* UNIV_MEM_DEBUG */
-/************************************************************//**
-Tries to find neigboring memory allocation blocks and dumps to stderr
-the neighborhood of a given pointer. */
-UNIV_INTERN
-void
-mem_analyze_corruption(
-/*===================*/
- void* ptr); /*!< in: pointer to place of possible corruption */
-/*****************************************************************//**
-Prints information of dynamic memory usage and currently allocated memory
-heaps or buffers. Can only be used in the debug version. */
-UNIV_INTERN
-void
-mem_print_info(void);
-/*================*/
-/*****************************************************************//**
-Prints information of dynamic memory usage and currently allocated memory
-heaps or buffers since the last ..._print_info or..._print_new_info. */
-UNIV_INTERN
-void
-mem_print_new_info(void);
-/*====================*/
diff --git a/storage/innobase/include/mem0dbg.ic b/storage/innobase/include/mem0dbg.ic
deleted file mode 100644
index 69bb5fd8d90..00000000000
--- a/storage/innobase/include/mem0dbg.ic
+++ /dev/null
@@ -1,109 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1994, 2010, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/********************************************************************//**
-@file include/mem0dbg.ic
-The memory management: the debug code. This is not an independent
-compilation module but is included in mem0mem.*.
-
-Created 6/8/1994 Heikki Tuuri
-*************************************************************************/
-
-#ifdef UNIV_MEM_DEBUG
-extern ulint mem_current_allocated_memory;
-
-/******************************************************************//**
-Initializes an allocated memory field in the debug version. */
-UNIV_INTERN
-void
-mem_field_init(
-/*===========*/
- byte* buf, /*!< in: memory field */
- ulint n); /*!< in: how many bytes the user requested */
-/******************************************************************//**
-Erases an allocated memory field in the debug version. */
-UNIV_INTERN
-void
-mem_field_erase(
-/*============*/
- byte* buf, /*!< in: memory field */
- ulint n); /*!< in: how many bytes the user requested */
-/***************************************************************//**
-Initializes a buffer to a random combination of hex BA and BE.
-Used to initialize allocated memory. */
-UNIV_INTERN
-void
-mem_init_buf(
-/*=========*/
- byte* buf, /*!< in: pointer to buffer */
- ulint n); /*!< in: length of buffer */
-/***************************************************************//**
-Initializes a buffer to a random combination of hex DE and AD.
-Used to erase freed memory. */
-UNIV_INTERN
-void
-mem_erase_buf(
-/*==========*/
- byte* buf, /*!< in: pointer to buffer */
- ulint n); /*!< in: length of buffer */
-/***************************************************************//**
-Inserts a created memory heap to the hash table of
-current allocated memory heaps.
-Initializes the hash table when first called. */
-UNIV_INTERN
-void
-mem_hash_insert(
-/*============*/
- mem_heap_t* heap, /*!< in: the created heap */
- const char* file_name, /*!< in: file name of creation */
- ulint line); /*!< in: line where created */
-/***************************************************************//**
-Removes a memory heap (which is going to be freed by the caller)
-from the list of live memory heaps. Returns the size of the heap
-in terms of how much memory in bytes was allocated for the user of
-the heap (not the total space occupied by the heap).
-Also validates the heap.
-NOTE: This function does not free the storage occupied by the
-heap itself, only the node in the list of heaps. */
-UNIV_INTERN
-void
-mem_hash_remove(
-/*============*/
- mem_heap_t* heap, /*!< in: the heap to be freed */
- const char* file_name, /*!< in: file name of freeing */
- ulint line); /*!< in: line where freed */
-
-
-void
-mem_field_header_set_len(byte* field, ulint len);
-
-ulint
-mem_field_header_get_len(byte* field);
-
-void
-mem_field_header_set_check(byte* field, ulint check);
-
-ulint
-mem_field_header_get_check(byte* field);
-
-void
-mem_field_trailer_set_check(byte* field, ulint check);
-
-ulint
-mem_field_trailer_get_check(byte* field);
-#endif /* UNIV_MEM_DEBUG */
diff --git a/storage/innobase/include/mem0mem.h b/storage/innobase/include/mem0mem.h
index b01a0fdee73..0cd15ebb261 100644
--- a/storage/innobase/include/mem0mem.h
+++ b/storage/innobase/include/mem0mem.h
@@ -1,6 +1,7 @@
/*****************************************************************************
Copyright (c) 1994, 2016, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2017, 2019, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -26,26 +27,22 @@ Created 6/9/1994 Heikki Tuuri
#ifndef mem0mem_h
#define mem0mem_h
-#include "univ.i"
#include "ut0mem.h"
-#include "ut0byte.h"
#include "ut0rnd.h"
-#ifndef UNIV_HOTBACKUP
-# include "sync0sync.h"
-#endif /* UNIV_HOTBACKUP */
-#include "ut0lst.h"
#include "mach0data.h"
+#include <memory>
+
/* -------------------- MEMORY HEAPS ----------------------------- */
-/* A block of a memory heap consists of the info structure
+/** A block of a memory heap consists of the info structure
followed by an area of memory */
typedef struct mem_block_info_t mem_block_t;
-/* A memory heap is a nonempty linear list of memory blocks */
+/** A memory heap is a nonempty linear list of memory blocks */
typedef mem_block_t mem_heap_t;
-/* Types of allocation for memory heaps: DYNAMIC means allocation from the
+/** Types of allocation for memory heaps: DYNAMIC means allocation from the
dynamic memory pool of the C compiler, BUFFER means allocation from the
buffer pool; the latter method is used for very big heaps */
@@ -59,13 +56,13 @@ buffer pool; the latter method is used for very big heaps */
allocation functions can return
NULL. */
-/* Different type of heaps in terms of which datastructure is using them */
+/** Different type of heaps in terms of which datastructure is using them */
#define MEM_HEAP_FOR_BTR_SEARCH (MEM_HEAP_BTR_SEARCH | MEM_HEAP_BUFFER)
#define MEM_HEAP_FOR_PAGE_HASH (MEM_HEAP_DYNAMIC)
#define MEM_HEAP_FOR_RECV_SYS (MEM_HEAP_BUFFER)
#define MEM_HEAP_FOR_LOCK_HEAP (MEM_HEAP_BUFFER)
-/* The following start size is used for the first block in the memory heap if
+/** The following start size is used for the first block in the memory heap if
the size is not specified, i.e., 0 is given as the parameter in the call of
create. The standard size is the maximum (payload) size of the blocks used for
allocations of small buffers. */
@@ -74,147 +71,128 @@ allocations of small buffers. */
#define MEM_BLOCK_STANDARD_SIZE \
(UNIV_PAGE_SIZE >= 16384 ? 8000 : MEM_MAX_ALLOC_IN_BUF)
-/* If a memory heap is allowed to grow into the buffer pool, the following
+/** If a memory heap is allowed to grow into the buffer pool, the following
is the maximum size for a single allocated buffer: */
-#define MEM_MAX_ALLOC_IN_BUF (UNIV_PAGE_SIZE - 200)
+#define MEM_MAX_ALLOC_IN_BUF (UNIV_PAGE_SIZE - 200 + REDZONE_SIZE)
-/******************************************************************//**
-Initializes the memory system. */
-UNIV_INTERN
-void
-mem_init(
-/*=====*/
- ulint size); /*!< in: common pool size in bytes */
-/******************************************************************//**
-Closes the memory system. */
-UNIV_INTERN
-void
-mem_close(void);
-/*===========*/
+/** Space needed when allocating for a user a field of length N.
+The space is allocated only in multiples of UNIV_MEM_ALIGNMENT. */
+#define MEM_SPACE_NEEDED(N) UT_CALC_ALIGN((N), UNIV_MEM_ALIGNMENT)
#ifdef UNIV_DEBUG
-/**************************************************************//**
-Use this macro instead of the corresponding function! Macro for memory
-heap creation. */
-
-# define mem_heap_create(N) mem_heap_create_func( \
- (N), __FILE__, __LINE__, MEM_HEAP_DYNAMIC)
-/**************************************************************//**
-Use this macro instead of the corresponding function! Macro for memory
-heap creation. */
+/** Macro for memory heap creation.
+@param[in] size Desired start block size. */
+# define mem_heap_create(size) \
+ mem_heap_create_func((size), __FILE__, __LINE__, MEM_HEAP_DYNAMIC)
-# define mem_heap_create_typed(N, T) mem_heap_create_func( \
- (N), __FILE__, __LINE__, (T))
+/** Macro for memory heap creation.
+@param[in] size Desired start block size.
+@param[in] type Heap type */
+# define mem_heap_create_typed(size, type) \
+ mem_heap_create_func((size), __FILE__, __LINE__, (type))
#else /* UNIV_DEBUG */
-/**************************************************************//**
-Use this macro instead of the corresponding function! Macro for memory
-heap creation. */
+/** Macro for memory heap creation.
+@param[in] size Desired start block size. */
+# define mem_heap_create(size) mem_heap_create_func((size), MEM_HEAP_DYNAMIC)
-# define mem_heap_create(N) mem_heap_create_func( \
- (N), MEM_HEAP_DYNAMIC)
-/**************************************************************//**
-Use this macro instead of the corresponding function! Macro for memory
-heap creation. */
-
-# define mem_heap_create_typed(N, T) mem_heap_create_func( \
- (N), (T))
+/** Macro for memory heap creation.
+@param[in] size Desired start block size.
+@param[in] type Heap type */
+# define mem_heap_create_typed(size, type) \
+ mem_heap_create_func((size), (type))
#endif /* UNIV_DEBUG */
-/**************************************************************//**
-Use this macro instead of the corresponding function! Macro for memory
-heap freeing. */
-#define mem_heap_free(heap) mem_heap_free_func(\
- (heap), __FILE__, __LINE__)
-/*****************************************************************//**
-NOTE: Use the corresponding macros instead of this function. Creates a
-memory heap. For debugging purposes, takes also the file name and line as
-arguments.
+/** Creates a memory heap.
+NOTE: Use the corresponding macros instead of this function.
+A single user buffer of 'size' will fit in the block.
+0 creates a default size block.
+@param[in] size Desired start block size.
+@param[in] file_name File name where created
+@param[in] line Line where created
+@param[in] type Heap type
@return own: memory heap, NULL if did not succeed (only possible for
MEM_HEAP_BTR_SEARCH type heaps) */
UNIV_INLINE
mem_heap_t*
mem_heap_create_func(
-/*=================*/
- ulint n, /*!< in: desired start block size,
- this means that a single user buffer
- of size n will fit in the block,
- 0 creates a default size block */
+ ulint size,
#ifdef UNIV_DEBUG
- const char* file_name, /*!< in: file name where created */
- ulint line, /*!< in: line where created */
+ const char* file_name,
+ unsigned line,
#endif /* UNIV_DEBUG */
- ulint type); /*!< in: heap type */
-/*****************************************************************//**
-NOTE: Use the corresponding macro instead of this function. Frees the space
-occupied by a memory heap. In the debug version erases the heap memory
-blocks. */
+ ulint type);
+
+/** Frees the space occupied by a memory heap.
+NOTE: Use the corresponding macro instead of this function.
+@param[in] heap Heap to be freed */
UNIV_INLINE
void
-mem_heap_free_func(
-/*===============*/
- mem_heap_t* heap, /*!< in, own: heap to be freed */
- const char* file_name, /*!< in: file name where freed */
- ulint line); /*!< in: line where freed */
-/***************************************************************//**
-Allocates and zero-fills n bytes of memory from a memory heap.
-@return allocated, zero-filled storage */
+mem_heap_free(
+ mem_heap_t* heap);
+
+/** Allocates and zero-fills n bytes of memory from a memory heap.
+@param[in] heap memory heap
+@param[in] n number of bytes; if the heap is allowed to grow into
+the buffer pool, this must be <= MEM_MAX_ALLOC_IN_BUF
+@return allocated, zero-filled storage */
UNIV_INLINE
void*
mem_heap_zalloc(
-/*============*/
- mem_heap_t* heap, /*!< in: memory heap */
- ulint n); /*!< in: number of bytes; if the heap is allowed
- to grow into the buffer pool, this must be
- <= MEM_MAX_ALLOC_IN_BUF */
-/***************************************************************//**
-Allocates n bytes of memory from a memory heap.
+ mem_heap_t* heap,
+ ulint n);
+
+/** Allocates n bytes of memory from a memory heap.
+@param[in] heap memory heap
+@param[in] n number of bytes; if the heap is allowed to grow into
+the buffer pool, this must be <= MEM_MAX_ALLOC_IN_BUF
@return allocated storage, NULL if did not succeed (only possible for
MEM_HEAP_BTR_SEARCH type heaps) */
UNIV_INLINE
void*
mem_heap_alloc(
-/*===========*/
- mem_heap_t* heap, /*!< in: memory heap */
- ulint n); /*!< in: number of bytes; if the heap is allowed
- to grow into the buffer pool, this must be
- <= MEM_MAX_ALLOC_IN_BUF */
-/*****************************************************************//**
-Returns a pointer to the heap top.
-@return pointer to the heap top */
+ mem_heap_t* heap,
+ ulint n);
+
+/** Returns a pointer to the heap top.
+@param[in] heap memory heap
+@return pointer to the heap top */
UNIV_INLINE
byte*
mem_heap_get_heap_top(
-/*==================*/
- mem_heap_t* heap); /*!< in: memory heap */
-/*****************************************************************//**
-Frees the space in a memory heap exceeding the pointer given. The
-pointer must have been acquired from mem_heap_get_heap_top. The first
-memory block of the heap is not freed. */
+ mem_heap_t* heap);
+
+/** Frees the space in a memory heap exceeding the pointer given.
+The pointer must have been acquired from mem_heap_get_heap_top.
+The first memory block of the heap is not freed.
+@param[in] heap heap from which to free
+@param[in] old_top pointer to old top of heap */
UNIV_INLINE
void
mem_heap_free_heap_top(
-/*===================*/
- mem_heap_t* heap, /*!< in: heap from which to free */
- byte* old_top);/*!< in: pointer to old top of heap */
-/*****************************************************************//**
-Empties a memory heap. The first memory block of the heap is not freed. */
+ mem_heap_t* heap,
+ byte* old_top);
+
+/** Empties a memory heap.
+The first memory block of the heap is not freed.
+@param[in] heap heap to empty */
UNIV_INLINE
void
mem_heap_empty(
-/*===========*/
- mem_heap_t* heap); /*!< in: heap to empty */
-/*****************************************************************//**
-Returns a pointer to the topmost element in a memory heap.
+ mem_heap_t* heap);
+
+/** Returns a pointer to the topmost element in a memory heap.
The size of the element must be given.
-@return pointer to the topmost element */
+@param[in] heap memory heap
+@param[in] n size of the topmost element
+@return pointer to the topmost element */
UNIV_INLINE
void*
mem_heap_get_top(
-/*=============*/
- mem_heap_t* heap, /*!< in: memory heap */
- ulint n); /*!< in: size of the topmost element */
+ mem_heap_t* heap,
+ ulint n);
+
/*****************************************************************//**
Frees the topmost element in a memory heap.
The size of the element must be given. */
@@ -231,58 +209,10 @@ ulint
mem_heap_get_size(
/*==============*/
mem_heap_t* heap); /*!< in: heap */
-/**************************************************************//**
-Use this macro instead of the corresponding function!
-Macro for memory buffer allocation */
-
-#define mem_zalloc(N) memset(mem_alloc(N), 0, (N))
-
-#ifdef UNIV_DEBUG
-#define mem_alloc(N) mem_alloc_func((N), __FILE__, __LINE__, NULL)
-#define mem_alloc2(N,S) mem_alloc_func((N), __FILE__, __LINE__, (S))
-#else /* UNIV_DEBUG */
-#define mem_alloc(N) mem_alloc_func((N), NULL)
-#define mem_alloc2(N,S) mem_alloc_func((N), (S))
-#endif /* UNIV_DEBUG */
-
-/***************************************************************//**
-NOTE: Use the corresponding macro instead of this function.
-Allocates a single buffer of memory from the dynamic memory of
-the C compiler. Is like malloc of C. The buffer must be freed
-with mem_free.
-@return own: free storage */
-UNIV_INLINE
-void*
-mem_alloc_func(
-/*===========*/
- ulint n, /*!< in: requested size in bytes */
-#ifdef UNIV_DEBUG
- const char* file_name, /*!< in: file name where created */
- ulint line, /*!< in: line where created */
-#endif /* UNIV_DEBUG */
- ulint* size); /*!< out: allocated size in bytes,
- or NULL */
-
-/**************************************************************//**
-Use this macro instead of the corresponding function!
-Macro for memory buffer freeing */
-
-#define mem_free(PTR) mem_free_func((PTR), __FILE__, __LINE__)
-/***************************************************************//**
-NOTE: Use the corresponding macro instead of this function.
-Frees a single buffer of storage from
-the dynamic memory of C compiler. Similar to free of C. */
-UNIV_INLINE
-void
-mem_free_func(
-/*==========*/
- void* ptr, /*!< in, own: buffer to be freed */
- const char* file_name, /*!< in: file name where created */
- ulint line); /*!< in: line where created */
/**********************************************************************//**
Duplicates a NUL-terminated string.
-@return own: a copy of the string, must be deallocated with mem_free */
+@return own: a copy of the string, must be deallocated with ut_free */
UNIV_INLINE
char*
mem_strdup(
@@ -290,7 +220,7 @@ mem_strdup(
const char* str); /*!< in: string to be copied */
/**********************************************************************//**
Makes a NUL-terminated copy of a nonterminated string.
-@return own: a copy of the string, must be deallocated with mem_free */
+@return own: a copy of the string, must be deallocated with ut_free */
UNIV_INLINE
char*
mem_strdupl(
@@ -298,19 +228,34 @@ mem_strdupl(
const char* str, /*!< in: string to be copied */
ulint len); /*!< in: length of str, in bytes */
-/**********************************************************************//**
-Duplicates a NUL-terminated string, allocated from a memory heap.
-@return own: a copy of the string */
-UNIV_INTERN
+/** Duplicate a block of data, allocated from a memory heap.
+@param[in] heap memory heap where string is allocated
+@param[in] data block of data to be copied
+@param[in] len length of data, in bytes
+@return own: a copy of data */
+inline
+void*
+mem_heap_dup(mem_heap_t* heap, const void* data, size_t len)
+{
+ ut_ad(data || !len);
+ return UNIV_LIKELY(data != NULL)
+ ? memcpy(mem_heap_alloc(heap, len), data, len)
+ : NULL;
+}
+
+/** Duplicate a NUL-terminated string, allocated from a memory heap.
+@param[in] heap memory heap where string is allocated
+@param[in] str string to be copied
+@return own: a copy of the string */
char*
mem_heap_strdup(
-/*============*/
- mem_heap_t* heap, /*!< in: memory heap where string is allocated */
- const char* str); /*!< in: string to be copied */
+ mem_heap_t* heap,
+ const char* str);
+
/**********************************************************************//**
Makes a NUL-terminated copy of a nonterminated string,
allocated from a memory heap.
-@return own: a copy of the string */
+@return own: a copy of the string */
UNIV_INLINE
char*
mem_heap_strdupl(
@@ -321,8 +266,7 @@ mem_heap_strdupl(
/**********************************************************************//**
Concatenate two strings and return the result, using a memory heap.
-@return own: the result */
-UNIV_INTERN
+@return own: the result */
char*
mem_heap_strcat(
/*============*/
@@ -330,24 +274,12 @@ mem_heap_strcat(
const char* s1, /*!< in: string 1 */
const char* s2); /*!< in: string 2 */
-/**********************************************************************//**
-Duplicate a block of data, allocated from a memory heap.
-@return own: a copy of the data */
-UNIV_INTERN
-void*
-mem_heap_dup(
-/*=========*/
- mem_heap_t* heap, /*!< in: memory heap where copy is allocated */
- const void* data, /*!< in: data to be copied */
- ulint len); /*!< in: length of data, in bytes */
-
/****************************************************************//**
A simple sprintf replacement that dynamically allocates the space for the
formatted string from the given heap. This supports a very limited set of
the printf syntax: types 's' and 'u' and length modifier 'l' (which is
required for the 'u' type).
-@return heap-allocated formatted string */
-UNIV_INTERN
+@return heap-allocated formatted string */
char*
mem_heap_printf(
/*============*/
@@ -355,24 +287,23 @@ mem_heap_printf(
const char* format, /*!< in: format string */
...) MY_ATTRIBUTE ((format (printf, 2, 3)));
-#ifdef MEM_PERIODIC_CHECK
-/******************************************************************//**
-Goes through the list of all allocated mem blocks, checks their magic
-numbers, and reports possible corruption. */
-UNIV_INTERN
+#ifdef UNIV_DEBUG
+/** Validates the contents of a memory heap.
+Asserts that the memory heap is consistent
+@param[in] heap Memory heap to validate */
void
-mem_validate_all_blocks(void);
-/*=========================*/
-#endif
+mem_heap_validate(
+ const mem_heap_t* heap);
+
+#endif /* UNIV_DEBUG */
/*#######################################################################*/
/** The info structure stored at the beginning of a heap block */
struct mem_block_info_t {
- ulint magic_n;/* magic number for debugging */
#ifdef UNIV_DEBUG
char file_name[8];/* file name where the mem heap was created */
- ulint line; /*!< line number where the mem heap was created */
+ unsigned line; /*!< line number where the mem heap was created */
#endif /* UNIV_DEBUG */
UT_LIST_BASE_NODE_T(mem_block_t) base; /* In the first block in the
the list this is the base node of the list of blocks;
@@ -391,7 +322,7 @@ struct mem_block_info_t {
user data in the block */
ulint start; /*!< the value of the struct field 'free' at the
creation of the block */
-#ifndef UNIV_HOTBACKUP
+
void* free_block;
/* if the MEM_HEAP_BTR_SEARCH bit is set in type,
and this is the heap root, this can contain an
@@ -402,24 +333,11 @@ struct mem_block_info_t {
/* if this block has been allocated from the buffer
pool, this contains the buf_block_t handle;
otherwise, this is NULL */
-#endif /* !UNIV_HOTBACKUP */
-#ifdef MEM_PERIODIC_CHECK
- UT_LIST_NODE_T(mem_block_t) mem_block_list;
- /* List of all mem blocks allocated; protected
- by the mem_comm_pool mutex */
-#endif
};
-#define MEM_BLOCK_MAGIC_N 764741555
-#define MEM_FREED_BLOCK_MAGIC_N 547711122
-
/* Header size for a memory heap block */
-#define MEM_BLOCK_HEADER_SIZE ut_calc_align(sizeof(mem_block_info_t),\
- UNIV_MEM_ALIGNMENT)
-#include "mem0dbg.h"
+#define MEM_BLOCK_HEADER_SIZE UT_CALC_ALIGN(sizeof(mem_block_info_t),\
+ UNIV_MEM_ALIGNMENT)
-#ifndef UNIV_NONINL
#include "mem0mem.ic"
#endif
-
-#endif
diff --git a/storage/innobase/include/mem0mem.ic b/storage/innobase/include/mem0mem.ic
index 3010bbf5c02..9c7ae8c28f8 100644
--- a/storage/innobase/include/mem0mem.ic
+++ b/storage/innobase/include/mem0mem.ic
@@ -1,6 +1,7 @@
/*****************************************************************************
-Copyright (c) 1994, 2016, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1994, 2014, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2017, 2020, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -23,11 +24,6 @@ The memory management
Created 6/8/1994 Heikki Tuuri
*************************************************************************/
-#include "mem0dbg.ic"
-#ifndef UNIV_HOTBACKUP
-# include "mem0pool.h"
-#endif /* !UNIV_HOTBACKUP */
-
#ifdef UNIV_DEBUG
# define mem_heap_create_block(heap, n, type, file_name, line) \
mem_heap_create_block_func(heap, n, file_name, line, type)
@@ -43,7 +39,6 @@ Created 6/8/1994 Heikki Tuuri
Creates a memory heap block where data can be allocated.
@return own: memory heap block, NULL if did not succeed (only possible
for MEM_HEAP_BTR_SEARCH type heaps) */
-UNIV_INTERN
mem_block_t*
mem_heap_create_block_func(
/*=======================*/
@@ -52,37 +47,36 @@ mem_heap_create_block_func(
ulint n, /*!< in: number of bytes needed for user data */
#ifdef UNIV_DEBUG
const char* file_name,/*!< in: file name where created */
- ulint line, /*!< in: line where created */
+ unsigned line, /*!< in: line where created */
#endif /* UNIV_DEBUG */
ulint type); /*!< in: type of heap: MEM_HEAP_DYNAMIC or
MEM_HEAP_BUFFER */
+
/******************************************************************//**
Frees a block from a memory heap. */
-UNIV_INTERN
void
mem_heap_block_free(
/*================*/
mem_heap_t* heap, /*!< in: heap */
mem_block_t* block); /*!< in: block to free */
-#ifndef UNIV_HOTBACKUP
+
/******************************************************************//**
Frees the free_block field from a memory heap. */
-UNIV_INTERN
void
mem_heap_free_block_free(
/*=====================*/
mem_heap_t* heap); /*!< in: heap */
-#endif /* !UNIV_HOTBACKUP */
+
/***************************************************************//**
Adds a new block to a memory heap.
+@param[in] heap memory heap
+@param[in] n number of bytes needed
@return created block, NULL if did not succeed (only possible for
MEM_HEAP_BTR_SEARCH type heaps) */
-UNIV_INTERN
mem_block_t*
mem_heap_add_block(
-/*===============*/
- mem_heap_t* heap, /*!< in: memory heap */
- ulint n); /*!< in: number of bytes user needs */
+ mem_heap_t* heap,
+ ulint n);
UNIV_INLINE
void
@@ -150,44 +144,42 @@ mem_block_get_start(mem_block_t* block)
return(block->start);
}
-/***************************************************************//**
-Allocates and zero-fills n bytes of memory from a memory heap.
-@return allocated, zero-filled storage */
+/** Allocates and zero-fills n bytes of memory from a memory heap.
+@param[in] heap memory heap
+@param[in] n number of bytes; if the heap is allowed to grow into
+the buffer pool, this must be <= MEM_MAX_ALLOC_IN_BUF
+@return allocated, zero-filled storage */
UNIV_INLINE
void*
mem_heap_zalloc(
-/*============*/
- mem_heap_t* heap, /*!< in: memory heap */
- ulint n) /*!< in: number of bytes; if the heap is allowed
- to grow into the buffer pool, this must be
- <= MEM_MAX_ALLOC_IN_BUF */
+ mem_heap_t* heap,
+ ulint n)
{
ut_ad(heap);
ut_ad(!(heap->type & MEM_HEAP_BTR_SEARCH));
return(memset(mem_heap_alloc(heap, n), 0, n));
}
-/***************************************************************//**
-Allocates n bytes of memory from a memory heap.
+/** Allocates n bytes of memory from a memory heap.
+@param[in] heap memory heap
+@param[in] n number of bytes; if the heap is allowed to grow into
+the buffer pool, this must be <= MEM_MAX_ALLOC_IN_BUF
@return allocated storage, NULL if did not succeed (only possible for
MEM_HEAP_BTR_SEARCH type heaps) */
UNIV_INLINE
void*
mem_heap_alloc(
-/*===========*/
- mem_heap_t* heap, /*!< in: memory heap */
- ulint n) /*!< in: number of bytes; if the heap is allowed
- to grow into the buffer pool, this must be
- <= MEM_MAX_ALLOC_IN_BUF */
+ mem_heap_t* heap,
+ ulint n)
{
mem_block_t* block;
- void* buf;
+ byte* buf;
ulint free;
- ut_ad(mem_heap_check(heap));
-
block = UT_LIST_GET_LAST(heap->base);
+ n += REDZONE_SIZE;
+
ut_ad(!(block->type & MEM_HEAP_BUFFER) || (n <= MEM_MAX_ALLOC_IN_BUF));
/* Check if there is enough space in block. If not, create a new
@@ -210,36 +202,22 @@ mem_heap_alloc(
mem_block_set_free(block, free + MEM_SPACE_NEEDED(n));
-#ifdef UNIV_MEM_DEBUG
- UNIV_MEM_ALLOC(buf,
- n + MEM_FIELD_HEADER_SIZE + MEM_FIELD_TRAILER_SIZE);
-
- /* In the debug version write debugging info to the field */
- mem_field_init((byte*) buf, n);
-
- /* Advance buf to point at the storage which will be given to the
- caller */
- buf = (byte*) buf + MEM_FIELD_HEADER_SIZE;
-
-#endif
- UNIV_MEM_ALLOC(buf, n);
+ buf = buf + REDZONE_SIZE;
+ UNIV_MEM_ALLOC(buf, n - REDZONE_SIZE);
return(buf);
}
-/*****************************************************************//**
-Returns a pointer to the heap top.
-@return pointer to the heap top */
+/** Returns a pointer to the heap top.
+@param[in] heap memory heap
+@return pointer to the heap top */
UNIV_INLINE
byte*
mem_heap_get_heap_top(
-/*==================*/
- mem_heap_t* heap) /*!< in: memory heap */
+ mem_heap_t* heap)
{
mem_block_t* block;
byte* buf;
- ut_ad(mem_heap_check(heap));
-
block = UT_LIST_GET_LAST(heap->base);
buf = (byte*) block + mem_block_get_free(block);
@@ -247,37 +225,21 @@ mem_heap_get_heap_top(
return(buf);
}
-/*****************************************************************//**
-Frees the space in a memory heap exceeding the pointer given. The
-pointer must have been acquired from mem_heap_get_heap_top. The first
-memory block of the heap is not freed. */
+/** Frees the space in a memory heap exceeding the pointer given.
+The pointer must have been acquired from mem_heap_get_heap_top.
+The first memory block of the heap is not freed.
+@param[in] heap heap from which to free
+@param[in] old_top pointer to old top of heap */
UNIV_INLINE
void
mem_heap_free_heap_top(
-/*===================*/
- mem_heap_t* heap, /*!< in: heap from which to free */
- byte* old_top)/*!< in: pointer to old top of heap */
+ mem_heap_t* heap,
+ byte* old_top)
{
mem_block_t* block;
mem_block_t* prev_block;
-#if defined UNIV_MEM_DEBUG || defined UNIV_DEBUG
- ibool error;
- ulint total_size;
- ulint size;
- ut_ad(mem_heap_check(heap));
-
- /* Validate the heap and get its total allocated size */
- mem_heap_validate_or_print(heap, NULL, FALSE, &error, &total_size,
- NULL, NULL);
- ut_a(!error);
-
- /* Get the size below top pointer */
- mem_heap_validate_or_print(heap, old_top, FALSE, &error, &size, NULL,
- NULL);
- ut_a(!error);
-
-#endif
+ ut_d(mem_heap_validate(heap));
block = UT_LIST_GET_LAST(heap->base);
@@ -305,17 +267,7 @@ mem_heap_free_heap_top(
mem_block_set_free(block, old_top - (byte*) block);
ut_ad(mem_block_get_start(block) <= mem_block_get_free(block));
-#if defined UNIV_MEM_DEBUG
- UNIV_MEM_ALLOC(old_top, (byte*)block + block->len - old_top);
- /* In the debug version erase block from top up */
- mem_erase_buf(old_top, (byte*) block + block->len - old_top);
-
- /* Update allocated memory count */
- mutex_enter(&mem_hash_mutex);
- mem_current_allocated_memory -= (total_size - size);
- mutex_exit(&mem_hash_mutex);
-#endif /* UNIV_MEM_DEBUG */
- UNIV_MEM_FREE(old_top, (byte*)block + block->len - old_top);
+ UNIV_MEM_FREE(old_top, (byte*) block + block->len - old_top);
/* If free == start, we may free the block if it is not the first
one */
@@ -326,54 +278,39 @@ mem_heap_free_heap_top(
}
}
-/*****************************************************************//**
-Empties a memory heap. The first memory block of the heap is not freed. */
+/** Empties a memory heap.
+The first memory block of the heap is not freed.
+@param[in] heap heap to empty */
UNIV_INLINE
void
mem_heap_empty(
-/*===========*/
- mem_heap_t* heap) /*!< in: heap to empty */
+ mem_heap_t* heap)
{
mem_heap_free_heap_top(heap, (byte*) heap + mem_block_get_start(heap));
-#ifndef UNIV_HOTBACKUP
+
if (heap->free_block) {
mem_heap_free_block_free(heap);
}
-#endif /* !UNIV_HOTBACKUP */
}
-/*****************************************************************//**
-Returns a pointer to the topmost element in a memory heap. The size of the
-element must be given.
-@return pointer to the topmost element */
+/** Returns a pointer to the topmost element in a memory heap.
+The size of the element must be given.
+@param[in] heap memory heap
+@param[in] n size of the topmost element
+@return pointer to the topmost element */
UNIV_INLINE
void*
mem_heap_get_top(
-/*=============*/
- mem_heap_t* heap, /*!< in: memory heap */
- ulint n) /*!< in: size of the topmost element */
+ mem_heap_t* heap,
+ ulint n)
{
mem_block_t* block;
byte* buf;
- ut_ad(mem_heap_check(heap));
-
block = UT_LIST_GET_LAST(heap->base);
buf = (byte*) block + mem_block_get_free(block) - MEM_SPACE_NEEDED(n);
-#ifdef UNIV_MEM_DEBUG
- ut_ad(mem_block_get_start(block) <= (ulint) (buf - (byte*) block));
-
- /* In the debug version, advance buf to point at the storage which
- was given to the caller in the allocation*/
-
- buf += MEM_FIELD_HEADER_SIZE;
-
- /* Check that the field lengths agree */
- ut_ad(n == mem_field_header_get_len(buf));
-#endif
-
return((void*) buf);
}
@@ -389,21 +326,13 @@ mem_heap_free_top(
{
mem_block_t* block;
- ut_ad(mem_heap_check(heap));
+ n += REDZONE_SIZE;
block = UT_LIST_GET_LAST(heap->base);
/* Subtract the free field of block */
mem_block_set_free(block, mem_block_get_free(block)
- MEM_SPACE_NEEDED(n));
-#ifdef UNIV_MEM_DEBUG
-
- ut_ad(mem_block_get_start(block) <= mem_block_get_free(block));
-
- UNIV_MEM_ALLOC((byte*) block + mem_block_get_free(block), n);
- /* In the debug version check the consistency, and erase field */
- mem_field_erase((byte*) block + mem_block_get_free(block), n);
-#endif
/* If free == start, we may free the block if it is not the first
one */
@@ -416,86 +345,67 @@ mem_heap_free_top(
}
}
-/*****************************************************************//**
-NOTE: Use the corresponding macros instead of this function. Creates a
-memory heap. For debugging purposes, takes also the file name and line as
-argument.
+/** Creates a memory heap.
+NOTE: Use the corresponding macros instead of this function.
+A single user buffer of 'size' will fit in the block.
+0 creates a default size block.
+@param[in] size Desired start block size.
+@param[in] file_name File name where created
+@param[in] line Line where created
+@param[in] type Heap type
@return own: memory heap, NULL if did not succeed (only possible for
MEM_HEAP_BTR_SEARCH type heaps) */
UNIV_INLINE
mem_heap_t*
mem_heap_create_func(
-/*=================*/
- ulint n, /*!< in: desired start block size,
- this means that a single user buffer
- of size n will fit in the block,
- 0 creates a default size block */
+ ulint size,
#ifdef UNIV_DEBUG
- const char* file_name, /*!< in: file name where created */
- ulint line, /*!< in: line where created */
+ const char* file_name,
+ unsigned line,
#endif /* UNIV_DEBUG */
- ulint type) /*!< in: heap type */
+ ulint type)
{
mem_block_t* block;
- if (!n) {
- n = MEM_BLOCK_START_SIZE;
+ if (!size) {
+ size = MEM_BLOCK_START_SIZE;
}
- block = mem_heap_create_block(NULL, n, type, file_name, line);
+ block = mem_heap_create_block(NULL, size, type, file_name, line);
if (block == NULL) {
return(NULL);
}
- UT_LIST_INIT(block->base);
-
- /* Add the created block itself as the first block in the list */
- UT_LIST_ADD_FIRST(list, block->base, block);
+ /* The first block should not be in buffer pool,
+ because it might be relocated to resize buffer pool. */
+ ut_ad(block->buf_block == NULL);
-#ifdef UNIV_MEM_DEBUG
+ UT_LIST_INIT(block->base, &mem_block_t::list);
- mem_hash_insert(block, file_name, line);
-
-#endif
+ /* Add the created block itself as the first block in the list */
+ UT_LIST_ADD_FIRST(block->base, block);
return(block);
}
-/*****************************************************************//**
-NOTE: Use the corresponding macro instead of this function. Frees the space
-occupied by a memory heap. In the debug version erases the heap memory
-blocks. */
+/** Frees the space occupied by a memory heap.
+NOTE: Use the corresponding macro instead of this function.
+@param[in] heap Heap to be freed */
UNIV_INLINE
void
-mem_heap_free_func(
-/*===============*/
- mem_heap_t* heap, /*!< in, own: heap to be freed */
- const char* file_name MY_ATTRIBUTE((unused)),
- /*!< in: file name where freed */
- ulint line MY_ATTRIBUTE((unused)))
+mem_heap_free(
+ mem_heap_t* heap)
{
mem_block_t* block;
mem_block_t* prev_block;
- ut_ad(mem_heap_check(heap));
-
block = UT_LIST_GET_LAST(heap->base);
-#ifdef UNIV_MEM_DEBUG
-
- /* In the debug version remove the heap from the hash table of heaps
- and check its consistency */
-
- mem_hash_remove(heap, file_name, line);
-
-#endif
-#ifndef UNIV_HOTBACKUP
if (heap->free_block) {
mem_heap_free_block_free(heap);
}
-#endif /* !UNIV_HOTBACKUP */
while (block != NULL) {
/* Store the contents of info before freeing current block
@@ -509,73 +419,6 @@ mem_heap_free_func(
}
}
-/***************************************************************//**
-NOTE: Use the corresponding macro instead of this function.
-Allocates a single buffer of memory from the dynamic memory of
-the C compiler. Is like malloc of C. The buffer must be freed
-with mem_free.
-@return own: free storage */
-UNIV_INLINE
-void*
-mem_alloc_func(
-/*===========*/
- ulint n, /*!< in: desired number of bytes */
-#ifdef UNIV_DEBUG
- const char* file_name, /*!< in: file name where created */
- ulint line, /*!< in: line where created */
-#endif /* UNIV_DEBUG */
- ulint* size) /*!< out: allocated size in bytes,
- or NULL */
-{
- mem_heap_t* heap;
- void* buf;
-
- heap = mem_heap_create_at(n, file_name, line);
-
- /* Note that as we created the first block in the heap big enough
- for the buffer requested by the caller, the buffer will be in the
- first block and thus we can calculate the pointer to the heap from
- the pointer to the buffer when we free the memory buffer. */
-
- if (size) {
- /* Adjust the allocation to the actual size of the
- memory block. */
- ulint m = mem_block_get_len(heap)
- - mem_block_get_free(heap);
-#ifdef UNIV_MEM_DEBUG
- m -= MEM_FIELD_HEADER_SIZE + MEM_FIELD_TRAILER_SIZE;
-#endif /* UNIV_MEM_DEBUG */
- ut_ad(m >= n);
- n = m;
- *size = m;
- }
-
- buf = mem_heap_alloc(heap, n);
-
- ut_a((byte*) heap == (byte*) buf - MEM_BLOCK_HEADER_SIZE
- - MEM_FIELD_HEADER_SIZE);
- return(buf);
-}
-
-/***************************************************************//**
-NOTE: Use the corresponding macro instead of this function. Frees a single
-buffer of storage from the dynamic memory of the C compiler. Similar to the
-free of C. */
-UNIV_INLINE
-void
-mem_free_func(
-/*==========*/
- void* ptr, /*!< in, own: buffer to be freed */
- const char* file_name, /*!< in: file name where created */
- ulint line) /*!< in: line where created */
-{
- mem_heap_t* heap;
-
- heap = (mem_heap_t*)((byte*) ptr - MEM_BLOCK_HEADER_SIZE
- - MEM_FIELD_HEADER_SIZE);
- mem_heap_free_func(heap, file_name, line);
-}
-
/*****************************************************************//**
Returns the space in bytes occupied by a memory heap. */
UNIV_INLINE
@@ -584,24 +427,18 @@ mem_heap_get_size(
/*==============*/
mem_heap_t* heap) /*!< in: heap */
{
- ulint size = 0;
-
- ut_ad(mem_heap_check(heap));
+ ulint size = heap->total_size;
- size = heap->total_size;
-
-#ifndef UNIV_HOTBACKUP
if (heap->free_block) {
size += UNIV_PAGE_SIZE;
}
-#endif /* !UNIV_HOTBACKUP */
return(size);
}
/**********************************************************************//**
Duplicates a NUL-terminated string.
-@return own: a copy of the string, must be deallocated with mem_free */
+@return own: a copy of the string, must be deallocated with ut_free */
UNIV_INLINE
char*
mem_strdup(
@@ -609,12 +446,12 @@ mem_strdup(
const char* str) /*!< in: string to be copied */
{
ulint len = strlen(str) + 1;
- return((char*) memcpy(mem_alloc(len), str, len));
+ return(static_cast<char*>(memcpy(ut_malloc_nokey(len), str, len)));
}
/**********************************************************************//**
Makes a NUL-terminated copy of a nonterminated string.
-@return own: a copy of the string, must be deallocated with mem_free */
+@return own: a copy of the string, must be deallocated with ut_free */
UNIV_INLINE
char*
mem_strdupl(
@@ -622,15 +459,15 @@ mem_strdupl(
const char* str, /*!< in: string to be copied */
ulint len) /*!< in: length of str, in bytes */
{
- char* s = (char*) mem_alloc(len + 1);
+ char* s = static_cast<char*>(ut_malloc_nokey(len + 1));
s[len] = 0;
- return((char*) memcpy(s, str, len));
+ return(static_cast<char*>(memcpy(s, str, len)));
}
/**********************************************************************//**
Makes a NUL-terminated copy of a nonterminated string,
allocated from a memory heap.
-@return own: a copy of the string */
+@return own: a copy of the string */
UNIV_INLINE
char*
mem_heap_strdupl(
diff --git a/storage/innobase/include/mem0pool.h b/storage/innobase/include/mem0pool.h
deleted file mode 100644
index d6fb23f74ad..00000000000
--- a/storage/innobase/include/mem0pool.h
+++ /dev/null
@@ -1,121 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1994, 2009, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/mem0pool.h
-The lowest-level memory management
-
-Created 6/9/1994 Heikki Tuuri
-*******************************************************/
-
-#ifndef mem0pool_h
-#define mem0pool_h
-
-#include "univ.i"
-#include "os0file.h"
-#include "ut0lst.h"
-
-/** Memory pool */
-struct mem_pool_t;
-
-/** The common memory pool */
-extern mem_pool_t* mem_comm_pool;
-
-/** Memory area header */
-struct mem_area_t{
- ulint size_and_free; /*!< memory area size is obtained by
- anding with ~MEM_AREA_FREE; area in
- a free list if ANDing with
- MEM_AREA_FREE results in nonzero */
- UT_LIST_NODE_T(mem_area_t)
- free_list; /*!< free list node */
-};
-
-/** Each memory area takes this many extra bytes for control information */
-#define MEM_AREA_EXTRA_SIZE (ut_calc_align(sizeof(struct mem_area_t),\
- UNIV_MEM_ALIGNMENT))
-
-/********************************************************************//**
-Creates a memory pool.
-@return memory pool */
-UNIV_INTERN
-mem_pool_t*
-mem_pool_create(
-/*============*/
- ulint size); /*!< in: pool size in bytes */
-/********************************************************************//**
-Frees a memory pool. */
-UNIV_INTERN
-void
-mem_pool_free(
-/*==========*/
- mem_pool_t* pool); /*!< in, own: memory pool */
-/********************************************************************//**
-Allocates memory from a pool. NOTE: This low-level function should only be
-used in mem0mem.*!
-@return own: allocated memory buffer */
-UNIV_INTERN
-void*
-mem_area_alloc(
-/*===========*/
- ulint* psize, /*!< in: requested size in bytes; for optimum
- space usage, the size should be a power of 2
- minus MEM_AREA_EXTRA_SIZE;
- out: allocated size in bytes (greater than
- or equal to the requested size) */
- mem_pool_t* pool); /*!< in: memory pool */
-/********************************************************************//**
-Frees memory to a pool. */
-UNIV_INTERN
-void
-mem_area_free(
-/*==========*/
- void* ptr, /*!< in, own: pointer to allocated memory
- buffer */
- mem_pool_t* pool); /*!< in: memory pool */
-/********************************************************************//**
-Returns the amount of reserved memory.
-@return reserved mmeory in bytes */
-UNIV_INTERN
-ulint
-mem_pool_get_reserved(
-/*==================*/
- mem_pool_t* pool); /*!< in: memory pool */
-/********************************************************************//**
-Validates a memory pool.
-@return TRUE if ok */
-UNIV_INTERN
-ibool
-mem_pool_validate(
-/*==============*/
- mem_pool_t* pool); /*!< in: memory pool */
-/********************************************************************//**
-Prints info of a memory pool. */
-UNIV_INTERN
-void
-mem_pool_print_info(
-/*================*/
- FILE* outfile,/*!< in: output file to write to */
- mem_pool_t* pool); /*!< in: memory pool */
-
-
-#ifndef UNIV_NONINL
-#include "mem0pool.ic"
-#endif
-
-#endif
diff --git a/storage/innobase/include/mem0pool.ic b/storage/innobase/include/mem0pool.ic
deleted file mode 100644
index d99daf59e13..00000000000
--- a/storage/innobase/include/mem0pool.ic
+++ /dev/null
@@ -1,24 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1994, 2009, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/********************************************************************//**
-@file include/mem0pool.ic
-The lowest-level memory management
-
-Created 6/8/1994 Heikki Tuuri
-*************************************************************************/
diff --git a/storage/innobase/include/mtr0log.h b/storage/innobase/include/mtr0log.h
index 929f68122af..eaf2fad9e7f 100644
--- a/storage/innobase/include/mtr0log.h
+++ b/storage/innobase/include/mtr0log.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1995, 2009, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1995, 2014, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -26,26 +26,26 @@ Created 12/7/1995 Heikki Tuuri
#ifndef mtr0log_h
#define mtr0log_h
-#include "univ.i"
#include "mtr0mtr.h"
-#include "dict0types.h"
+#include "dyn0buf.h"
+
+// Forward declaration
+struct dict_index_t;
-#ifndef UNIV_HOTBACKUP
/********************************************************//**
Writes 1, 2 or 4 bytes to a file page. Writes the corresponding log
record to the mini-transaction log if mtr is not NULL. */
-UNIV_INTERN
void
mlog_write_ulint(
/*=============*/
- byte* ptr, /*!< in: pointer where to write */
- ulint val, /*!< in: value to write */
- byte type, /*!< in: MLOG_1BYTE, MLOG_2BYTES, MLOG_4BYTES */
- mtr_t* mtr); /*!< in: mini-transaction handle */
+ byte* ptr, /*!< in: pointer where to write */
+ ulint val, /*!< in: value to write */
+ mlog_id_t type, /*!< in: MLOG_1BYTE, MLOG_2BYTES, MLOG_4BYTES */
+ mtr_t* mtr); /*!< in: mini-transaction handle */
+
/********************************************************//**
Writes 8 bytes to a file page. Writes the corresponding log
record to the mini-transaction log, only if mtr is not NULL */
-UNIV_INTERN
void
mlog_write_ull(
/*===========*/
@@ -55,7 +55,6 @@ mlog_write_ull(
/********************************************************//**
Writes a string to a file page buffered in the buffer pool. Writes the
corresponding log record to the mini-transaction log. */
-UNIV_INTERN
void
mlog_write_string(
/*==============*/
@@ -66,7 +65,6 @@ mlog_write_string(
/********************************************************//**
Logs a write of a string to a file page buffered in the buffer pool.
Writes the corresponding log record to the mini-transaction log. */
-UNIV_INTERN
void
mlog_log_string(
/*============*/
@@ -76,40 +74,34 @@ mlog_log_string(
/********************************************************//**
Writes initial part of a log record consisting of one-byte item
type and four-byte space and page numbers. */
-UNIV_INTERN
void
mlog_write_initial_log_record(
/*==========================*/
const byte* ptr, /*!< in: pointer to (inside) a buffer
frame holding the file page where
modification is made */
- byte type, /*!< in: log item type: MLOG_1BYTE, ... */
+ mlog_id_t type, /*!< in: log item type: MLOG_1BYTE, ... */
mtr_t* mtr); /*!< in: mini-transaction handle */
/********************************************************//**
-Writes a log record about an .ibd file create/delete/rename.
-@return new value of log_ptr */
+Catenates 1 - 4 bytes to the mtr log. The value is not compressed. */
UNIV_INLINE
-byte*
-mlog_write_initial_log_record_for_file_op(
-/*======================================*/
- ulint type, /*!< in: MLOG_FILE_CREATE, MLOG_FILE_DELETE, or
- MLOG_FILE_RENAME */
- ulint space_id,/*!< in: space id, if applicable */
- ulint page_no,/*!< in: page number (not relevant currently) */
- byte* log_ptr,/*!< in: pointer to mtr log which has been opened */
- mtr_t* mtr); /*!< in: mtr */
+void
+mlog_catenate_ulint(
+/*================*/
+ mtr_buf_t* dyn_buf, /*!< in/out: buffer to write */
+ ulint val, /*!< in: value to write */
+ mlog_id_t type); /*!< in: type of value to write */
/********************************************************//**
Catenates 1 - 4 bytes to the mtr log. */
UNIV_INLINE
void
mlog_catenate_ulint(
/*================*/
- mtr_t* mtr, /*!< in: mtr */
- ulint val, /*!< in: value to write */
- ulint type); /*!< in: MLOG_1BYTE, MLOG_2BYTES, MLOG_4BYTES */
+ mtr_t* mtr, /*!< in: mtr */
+ ulint val, /*!< in: value to write */
+ mlog_id_t type); /*!< in: MLOG_1BYTE, MLOG_2BYTES, MLOG_4BYTES */
/********************************************************//**
Catenates n bytes to the mtr log. */
-UNIV_INTERN
void
mlog_catenate_string(
/*=================*/
@@ -122,8 +114,8 @@ UNIV_INLINE
void
mlog_catenate_ulint_compressed(
/*===========================*/
- mtr_t* mtr, /*!< in: mtr */
- ulint val); /*!< in: value to write */
+ mtr_t* mtr, /*!< in: mtr */
+ ulint val); /*!< in: value to write */
/********************************************************//**
Catenates a compressed 64-bit integer to mlog. */
UNIV_INLINE
@@ -134,27 +126,45 @@ mlog_catenate_ull_compressed(
ib_uint64_t val); /*!< in: value to write */
/********************************************************//**
Opens a buffer to mlog. It must be closed with mlog_close.
-@return buffer, NULL if log mode MTR_LOG_NONE */
+@return buffer, NULL if log mode MTR_LOG_NONE */
UNIV_INLINE
byte*
mlog_open(
/*======*/
- mtr_t* mtr, /*!< in: mtr */
- ulint size); /*!< in: buffer size in bytes; MUST be
- smaller than DYN_ARRAY_DATA_SIZE! */
+ mtr_t* mtr, /*!< in: mtr */
+ ulint size); /*!< in: buffer size in bytes; MUST be
+ smaller than DYN_ARRAY_DATA_SIZE! */
/********************************************************//**
Closes a buffer opened to mlog. */
UNIV_INLINE
void
mlog_close(
/*=======*/
- mtr_t* mtr, /*!< in: mtr */
- byte* ptr); /*!< in: buffer space from ptr up was not used */
+ mtr_t* mtr, /*!< in: mtr */
+ byte* ptr); /*!< in: buffer space from ptr up was
+ not used */
+
+/** Writes a log record about an operation.
+@param[in] type redo log record type
+@param[in] space_id tablespace identifier
+@param[in] page_no page number
+@param[in,out] log_ptr current end of mini-transaction log
+@param[in,out] mtr mini-transaction
+@return end of mini-transaction log */
+UNIV_INLINE
+byte*
+mlog_write_initial_log_record_low(
+ mlog_id_t type,
+ ulint space_id,
+ ulint page_no,
+ byte* log_ptr,
+ mtr_t* mtr);
+
/********************************************************//**
Writes the initial part of a log record (3..11 bytes).
If the implementation of this function is changed, all
size parameters to mlog_open() should be adjusted accordingly!
-@return new value of log_ptr */
+@return new value of log_ptr */
UNIV_INLINE
byte*
mlog_write_initial_log_record_fast(
@@ -162,42 +172,36 @@ mlog_write_initial_log_record_fast(
const byte* ptr, /*!< in: pointer to (inside) a buffer
frame holding the file page where
modification is made */
- byte type, /*!< in: log item type: MLOG_1BYTE, ... */
+ mlog_id_t type, /*!< in: log item type: MLOG_1BYTE, ... */
byte* log_ptr,/*!< in: pointer to mtr log which has
been opened */
mtr_t* mtr); /*!< in: mtr */
-#else /* !UNIV_HOTBACKUP */
-# define mlog_write_initial_log_record(ptr,type,mtr) ((void) 0)
-# define mlog_write_initial_log_record_fast(ptr,type,log_ptr,mtr) ((byte*) 0)
-#endif /* !UNIV_HOTBACKUP */
/********************************************************//**
Parses an initial log record written by mlog_write_initial_log_record.
-@return parsed record end, NULL if not a complete record */
-UNIV_INTERN
+@return parsed record end, NULL if not a complete record */
byte*
mlog_parse_initial_log_record(
/*==========================*/
- byte* ptr, /*!< in: buffer */
- byte* end_ptr,/*!< in: buffer end */
- byte* type, /*!< out: log record type: MLOG_1BYTE, ... */
- ulint* space, /*!< out: space id */
- ulint* page_no);/*!< out: page number */
+ const byte* ptr, /*!< in: buffer */
+ const byte* end_ptr,/*!< in: buffer end */
+ mlog_id_t* type, /*!< out: log record type: MLOG_1BYTE, ... */
+ ulint* space, /*!< out: space id */
+ ulint* page_no);/*!< out: page number */
/********************************************************//**
Parses a log record written by mlog_write_ulint or mlog_write_ull.
-@return parsed record end, NULL if not a complete record */
-UNIV_INTERN
+@return parsed record end, NULL if not a complete record */
byte*
mlog_parse_nbytes(
/*==============*/
- ulint type, /*!< in: log record type: MLOG_1BYTE, ... */
- byte* ptr, /*!< in: buffer */
- byte* end_ptr,/*!< in: buffer end */
- byte* page, /*!< in: page where to apply the log record, or NULL */
- void* page_zip);/*!< in/out: compressed page, or NULL */
+ mlog_id_t type, /*!< in: log record type: MLOG_1BYTE, ... */
+ const byte* ptr, /*!< in: buffer */
+ const byte* end_ptr,/*!< in: buffer end */
+ byte* page, /*!< in: page where to apply the log record,
+ or NULL */
+ void* page_zip);/*!< in/out: compressed page, or NULL */
/********************************************************//**
Parses a log record written by mlog_write_string.
-@return parsed record end, NULL if not a complete record */
-UNIV_INTERN
+@return parsed record end, NULL if not a complete record */
byte*
mlog_parse_string(
/*==============*/
@@ -206,30 +210,26 @@ mlog_parse_string(
byte* page, /*!< in: page where to apply the log record, or NULL */
void* page_zip);/*!< in/out: compressed page, or NULL */
-#ifndef UNIV_HOTBACKUP
/********************************************************//**
Opens a buffer for mlog, writes the initial log record and,
if needed, the field lengths of an index. Reserves space
for further log entries. The log entry must be closed with
mtr_close().
-@return buffer, NULL if log mode MTR_LOG_NONE */
-UNIV_INTERN
+@return buffer, NULL if log mode MTR_LOG_NONE */
byte*
mlog_open_and_write_index(
/*======================*/
mtr_t* mtr, /*!< in: mtr */
const byte* rec, /*!< in: index record or page */
const dict_index_t* index, /*!< in: record descriptor */
- byte type, /*!< in: log item type */
+ mlog_id_t type, /*!< in: log item type */
ulint size); /*!< in: requested buffer size in bytes
(if 0, calls mlog_close() and
returns NULL) */
-#endif /* !UNIV_HOTBACKUP */
/********************************************************//**
Parses a log record written by mlog_open_and_write_index.
-@return parsed record end, NULL if not a complete record */
-UNIV_INTERN
+@return parsed record end, NULL if not a complete record */
byte*
mlog_parse_index(
/*=============*/
@@ -238,14 +238,10 @@ mlog_parse_index(
ibool comp, /*!< in: TRUE=compact record format */
dict_index_t** index); /*!< out, own: dummy index */
-#ifndef UNIV_HOTBACKUP
-/* Insert, update, and maybe other functions may use this value to define an
+/** Insert, update, and maybe other functions may use this value to define an
extra mlog buffer size for variable size data */
#define MLOG_BUF_MARGIN 256
-#endif /* !UNIV_HOTBACKUP */
-#ifndef UNIV_NONINL
#include "mtr0log.ic"
-#endif
-#endif
+#endif /* mtr0log_h */
diff --git a/storage/innobase/include/mtr0log.ic b/storage/innobase/include/mtr0log.ic
index c239140785f..5cfc08622d5 100644
--- a/storage/innobase/include/mtr0log.ic
+++ b/storage/innobase/include/mtr0log.ic
@@ -1,6 +1,7 @@
/*****************************************************************************
-Copyright (c) 1995, 2012, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2017, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -23,36 +24,31 @@ Mini-transaction logging routines
Created 12/7/1995 Heikki Tuuri
*******************************************************/
-#include "mach0data.h"
-#include "ut0lst.h"
-#include "buf0buf.h"
#include "buf0dblwr.h"
#include "fsp0types.h"
-#include "trx0sys.h"
+#include "mach0data.h"
+#include "trx0types.h"
/********************************************************//**
Opens a buffer to mlog. It must be closed with mlog_close.
-@return buffer, NULL if log mode MTR_LOG_NONE */
+@return buffer, NULL if log mode MTR_LOG_NONE or MTR_LOG_NO_REDO */
UNIV_INLINE
byte*
mlog_open(
/*======*/
mtr_t* mtr, /*!< in: mtr */
ulint size) /*!< in: buffer size in bytes; MUST be
- smaller than DYN_ARRAY_DATA_SIZE! */
+ smaller than mtr_t::buf_t::MAX_DATA_SIZE! */
{
- dyn_array_t* mlog;
-
- mtr->modifications = TRUE;
+ mtr->set_modified();
- if (mtr_get_log_mode(mtr) == MTR_LOG_NONE) {
+ if (mtr_get_log_mode(mtr) == MTR_LOG_NONE
+ || mtr_get_log_mode(mtr) == MTR_LOG_NO_REDO) {
return(NULL);
}
- mlog = &(mtr->log);
-
- return(dyn_array_open(mlog, size));
+ return(mtr->get_log()->open(size));
}
/********************************************************//**
@@ -64,61 +60,64 @@ mlog_close(
mtr_t* mtr, /*!< in: mtr */
byte* ptr) /*!< in: buffer space from ptr up was not used */
{
- dyn_array_t* mlog;
-
ut_ad(mtr_get_log_mode(mtr) != MTR_LOG_NONE);
+ ut_ad(mtr_get_log_mode(mtr) != MTR_LOG_NO_REDO);
- mlog = &(mtr->log);
-
- dyn_array_close(mlog, ptr);
+ mtr->get_log()->close(ptr);
}
-#ifndef UNIV_HOTBACKUP
/********************************************************//**
Catenates 1 - 4 bytes to the mtr log. The value is not compressed. */
UNIV_INLINE
void
mlog_catenate_ulint(
/*================*/
- mtr_t* mtr, /*!< in: mtr */
- ulint val, /*!< in: value to write */
- ulint type) /*!< in: MLOG_1BYTE, MLOG_2BYTES, MLOG_4BYTES */
+ mtr_buf_t* mtr_buf, /*!< in/out: buffer to write */
+ ulint val, /*!< in: value to write */
+ mlog_id_t type) /*!< in: type of value to write */
{
- dyn_array_t* mlog;
- byte* ptr;
+ compile_time_assert(MLOG_1BYTE == 1);
+ compile_time_assert(MLOG_2BYTES == 2);
+ compile_time_assert(MLOG_4BYTES == 4);
+ compile_time_assert(MLOG_8BYTES == 8);
- if (mtr_get_log_mode(mtr) == MTR_LOG_NONE) {
+ byte* ptr = mtr_buf->push<byte*>(type);
- return;
- }
-
- mlog = &(mtr->log);
-
-#if MLOG_1BYTE != 1
-# error "MLOG_1BYTE != 1"
-#endif
-#if MLOG_2BYTES != 2
-# error "MLOG_2BYTES != 2"
-#endif
-#if MLOG_4BYTES != 4
-# error "MLOG_4BYTES != 4"
-#endif
-#if MLOG_8BYTES != 8
-# error "MLOG_8BYTES != 8"
-#endif
- ptr = (byte*) dyn_array_push(mlog, type);
-
- if (type == MLOG_4BYTES) {
+ switch (type) {
+ case MLOG_4BYTES:
mach_write_to_4(ptr, val);
- } else if (type == MLOG_2BYTES) {
+ break;
+ case MLOG_2BYTES:
mach_write_to_2(ptr, val);
- } else {
- ut_ad(type == MLOG_1BYTE);
+ break;
+ case MLOG_1BYTE:
mach_write_to_1(ptr, val);
+ break;
+ default:
+ ut_error;
}
}
/********************************************************//**
+Catenates 1 - 4 bytes to the mtr log. The value is not compressed. */
+UNIV_INLINE
+void
+mlog_catenate_ulint(
+/*================*/
+ mtr_t* mtr, /*!< in/out: mtr */
+ ulint val, /*!< in: value to write */
+ mlog_id_t type) /*!< in: MLOG_1BYTE, MLOG_2BYTES, MLOG_4BYTES */
+{
+ if (mtr_get_log_mode(mtr) == MTR_LOG_NONE
+ || mtr_get_log_mode(mtr) == MTR_LOG_NO_REDO) {
+
+ return;
+ }
+
+ mlog_catenate_ulint(mtr->get_log(), val, type);
+}
+
+/********************************************************//**
Catenates a compressed ulint to mlog. */
UNIV_INLINE
void
@@ -161,16 +160,52 @@ mlog_catenate_ull_compressed(
return;
}
- log_ptr += mach_ull_write_compressed(log_ptr, val);
+ log_ptr += mach_u64_write_compressed(log_ptr, val);
mlog_close(mtr, log_ptr);
}
+/** Writes a log record about an operation.
+@param[in] type redo log record type
+@param[in] space_id tablespace identifier
+@param[in] page_no page number
+@param[in,out] log_ptr current end of mini-transaction log
+@param[in,out] mtr mini-transaction
+@return end of mini-transaction log */
+UNIV_INLINE
+byte*
+mlog_write_initial_log_record_low(
+ mlog_id_t type,
+ ulint space_id,
+ ulint page_no,
+ byte* log_ptr,
+ mtr_t* mtr)
+{
+ ut_ad(type <= MLOG_BIGGEST_TYPE || EXTRA_CHECK_MLOG_NUMBER(type));
+ ut_ad(type == MLOG_FILE_NAME
+ || type == MLOG_FILE_DELETE
+ || type == MLOG_FILE_CREATE2
+ || type == MLOG_FILE_RENAME2
+ || type == MLOG_INDEX_LOAD
+ || type == MLOG_TRUNCATE
+ || type == MLOG_FILE_WRITE_CRYPT_DATA
+ || mtr->is_named_space(space_id));
+
+ mach_write_to_1(log_ptr, type);
+ log_ptr++;
+
+ log_ptr += mach_write_compressed(log_ptr, space_id);
+ log_ptr += mach_write_compressed(log_ptr, page_no);
+
+ mtr->added_rec();
+ return(log_ptr);
+}
+
/********************************************************//**
Writes the initial part of a log record (3..11 bytes).
If the implementation of this function is changed, all
size parameters to mlog_open() should be adjusted accordingly!
-@return new value of log_ptr */
+@return new value of log_ptr */
UNIV_INLINE
byte*
mlog_write_initial_log_record_fast(
@@ -178,21 +213,17 @@ mlog_write_initial_log_record_fast(
const byte* ptr, /*!< in: pointer to (inside) a buffer
frame holding the file page where
modification is made */
- byte type, /*!< in: log item type: MLOG_1BYTE, ... */
+ mlog_id_t type, /*!< in: log item type: MLOG_1BYTE, ... */
byte* log_ptr,/*!< in: pointer to mtr log which has
been opened */
- mtr_t* mtr) /*!< in: mtr */
+ mtr_t* mtr) /*!< in/out: mtr */
{
-#ifdef UNIV_DEBUG
- buf_block_t* block;
-#endif
const byte* page;
ulint space;
ulint offset;
- ut_ad(mtr_memo_contains_page(mtr, ptr, MTR_MEMO_PAGE_X_FIX));
- ut_ad(type <= MLOG_BIGGEST_TYPE || EXTRA_CHECK_MLOG_NUMBER(type));
- ut_ad(ptr && log_ptr);
+ ut_ad(log_ptr);
+ ut_d(mtr->memo_modify_page(ptr));
page = (const byte*) ut_align_down(ptr, UNIV_PAGE_SIZE);
space = mach_read_from_4(page + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID);
@@ -202,75 +233,16 @@ mlog_write_initial_log_record_fast(
the doublewrite buffer is located in pages
FSP_EXTENT_SIZE, ..., 3 * FSP_EXTENT_SIZE - 1 in the
system tablespace */
+
if (space == TRX_SYS_SPACE
&& offset >= FSP_EXTENT_SIZE && offset < 3 * FSP_EXTENT_SIZE) {
- if (buf_dblwr_being_created) {
- /* Do nothing: we only come to this branch in an
- InnoDB database creation. We do not redo log
- anything for the doublewrite buffer pages. */
- return(log_ptr);
- } else {
- fprintf(stderr,
- "Error: trying to redo log a record of type "
- "%d on page %lu of space %lu in the "
- "doublewrite buffer, continuing anyway.\n"
- "Please post a bug report to "
- "https://jira.mariadb.org/\n",
- type, offset, space);
- ut_ad(0);
- }
- }
-
- mach_write_to_1(log_ptr, type);
- log_ptr++;
- log_ptr += mach_write_compressed(log_ptr, space);
- log_ptr += mach_write_compressed(log_ptr, offset);
-
- mtr->n_log_recs++;
-
-#ifdef UNIV_LOG_DEBUG
- fprintf(stderr,
- "Adding to mtr log record type %lu space %lu page no %lu\n",
- (ulong) type, space, offset);
-#endif
-
-#ifdef UNIV_DEBUG
- /* We now assume that all x-latched pages have been modified! */
- block = (buf_block_t*) buf_block_align(ptr);
-
- if (!mtr_memo_contains(mtr, block, MTR_MEMO_MODIFY)) {
-
- mtr_memo_push(mtr, block, MTR_MEMO_MODIFY);
+ ut_ad(buf_dblwr_being_created);
+ /* Do nothing: we only come to this branch in an
+ InnoDB database creation. We do not redo log
+ anything for the doublewrite buffer pages. */
+ return(log_ptr);
}
-#endif
- return(log_ptr);
-}
-
-/********************************************************//**
-Writes a log record about an .ibd file create/delete/rename.
-@return new value of log_ptr */
-UNIV_INLINE
-byte*
-mlog_write_initial_log_record_for_file_op(
-/*======================================*/
- ulint type, /*!< in: MLOG_FILE_CREATE, MLOG_FILE_DELETE, or
- MLOG_FILE_RENAME */
- ulint space_id,/*!< in: space id, if applicable */
- ulint page_no,/*!< in: page number (not relevant currently) */
- byte* log_ptr,/*!< in: pointer to mtr log which has been opened */
- mtr_t* mtr) /*!< in: mtr */
-{
- ut_ad(log_ptr);
- mach_write_to_1(log_ptr, type);
- log_ptr++;
-
- /* We write dummy space id and page number */
- log_ptr += mach_write_compressed(log_ptr, space_id);
- log_ptr += mach_write_compressed(log_ptr, page_no);
-
- mtr->n_log_recs++;
-
- return(log_ptr);
+ return(mlog_write_initial_log_record_low(type, space, offset,
+ log_ptr, mtr));
}
-#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innobase/include/mtr0mtr.h b/storage/innobase/include/mtr0mtr.h
index 6c0ee2c7ac7..30d2e937f1f 100644
--- a/storage/innobase/include/mtr0mtr.h
+++ b/storage/innobase/include/mtr0mtr.h
@@ -1,8 +1,8 @@
/*****************************************************************************
-Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1995, 2017, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2012, Facebook Inc.
-Copyright (c) 2013, 2016, MariaDB Corporation
+Copyright (c) 2013, 2019, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -28,414 +28,469 @@ Created 11/26/1995 Heikki Tuuri
#ifndef mtr0mtr_h
#define mtr0mtr_h
-#include "univ.i"
-#include "mem0mem.h"
-#include "dyn0dyn.h"
-#include "buf0types.h"
-#include "sync0rw.h"
-#include "ut0byte.h"
+#include "log0types.h"
#include "mtr0types.h"
-#include "page0types.h"
-
-/* Logging modes for a mini-transaction */
-#define MTR_LOG_ALL 21 /* default mode: log all operations
- modifying disk-based data */
-#define MTR_LOG_NONE 22 /* log no operations */
-#define MTR_LOG_NO_REDO 23 /* Don't generate REDO */
-/*#define MTR_LOG_SPACE 23 */ /* log only operations modifying
- file space page allocation data
- (operations in fsp0fsp.* ) */
-#define MTR_LOG_SHORT_INSERTS 24 /* inserts are logged in a shorter
- form */
-
-/* Types for the mlock objects to store in the mtr memo; NOTE that the
-first 3 values must be RW_S_LATCH, RW_X_LATCH, RW_NO_LATCH */
-#define MTR_MEMO_PAGE_S_FIX RW_S_LATCH
-#define MTR_MEMO_PAGE_X_FIX RW_X_LATCH
-#define MTR_MEMO_BUF_FIX RW_NO_LATCH
-#ifdef UNIV_DEBUG
-# define MTR_MEMO_MODIFY 54
-#endif /* UNIV_DEBUG */
-#define MTR_MEMO_S_LOCK 55
-#define MTR_MEMO_X_LOCK 56
-
-/** @name Log item types
-The log items are declared 'byte' so that the compiler can warn if val
-and type parameters are switched in a call to mlog_write_ulint. NOTE!
-For 1 - 8 bytes, the flag value must give the length also! @{ */
-#define MLOG_SINGLE_REC_FLAG 128 /*!< if the mtr contains only
- one log record for one page,
- i.e., write_initial_log_record
- has been called only once,
- this flag is ORed to the type
- of that first log record */
-#define MLOG_1BYTE (1) /*!< one byte is written */
-#define MLOG_2BYTES (2) /*!< 2 bytes ... */
-#define MLOG_4BYTES (4) /*!< 4 bytes ... */
-#define MLOG_8BYTES (8) /*!< 8 bytes ... */
-#define MLOG_REC_INSERT ((byte)9) /*!< record insert */
-#define MLOG_REC_CLUST_DELETE_MARK ((byte)10) /*!< mark clustered index record
- deleted */
-#define MLOG_REC_SEC_DELETE_MARK ((byte)11) /*!< mark secondary index record
- deleted */
-#define MLOG_REC_UPDATE_IN_PLACE ((byte)13) /*!< update of a record,
- preserves record field sizes */
-#define MLOG_REC_DELETE ((byte)14) /*!< delete a record from a
- page */
-#define MLOG_LIST_END_DELETE ((byte)15) /*!< delete record list end on
- index page */
-#define MLOG_LIST_START_DELETE ((byte)16) /*!< delete record list start on
- index page */
-#define MLOG_LIST_END_COPY_CREATED ((byte)17) /*!< copy record list end to a
- new created index page */
-#define MLOG_PAGE_REORGANIZE ((byte)18) /*!< reorganize an
- index page in
- ROW_FORMAT=REDUNDANT */
-#define MLOG_PAGE_CREATE ((byte)19) /*!< create an index page */
-#define MLOG_UNDO_INSERT ((byte)20) /*!< insert entry in an undo
- log */
-#define MLOG_UNDO_ERASE_END ((byte)21) /*!< erase an undo log
- page end */
-#define MLOG_UNDO_INIT ((byte)22) /*!< initialize a page in an
- undo log */
-#define MLOG_UNDO_HDR_DISCARD ((byte)23) /*!< discard an update undo log
- header */
-#define MLOG_UNDO_HDR_REUSE ((byte)24) /*!< reuse an insert undo log
- header */
-#define MLOG_UNDO_HDR_CREATE ((byte)25) /*!< create an undo
- log header */
-#define MLOG_REC_MIN_MARK ((byte)26) /*!< mark an index
- record as the
- predefined minimum
- record */
-#define MLOG_IBUF_BITMAP_INIT ((byte)27) /*!< initialize an
- ibuf bitmap page */
-/*#define MLOG_FULL_PAGE ((byte)28) full contents of a page */
-#ifdef UNIV_LOG_LSN_DEBUG
-# define MLOG_LSN ((byte)28) /* current LSN */
-#endif
-#define MLOG_INIT_FILE_PAGE ((byte)29) /*!< this means that a
- file page is taken
- into use and the prior
- contents of the page
- should be ignored: in
- recovery we must not
- trust the lsn values
- stored to the file
- page */
-#define MLOG_WRITE_STRING ((byte)30) /*!< write a string to
- a page */
-#define MLOG_MULTI_REC_END ((byte)31) /*!< if a single mtr writes
- several log records,
- this log record ends the
- sequence of these records */
-#define MLOG_DUMMY_RECORD ((byte)32) /*!< dummy log record used to
- pad a log block full */
-#define MLOG_FILE_CREATE ((byte)33) /*!< log record about an .ibd
- file creation */
-#define MLOG_FILE_RENAME ((byte)34) /*!< log record about an .ibd
- file rename */
-#define MLOG_FILE_DELETE ((byte)35) /*!< log record about an .ibd
- file deletion */
-#define MLOG_COMP_REC_MIN_MARK ((byte)36) /*!< mark a compact
- index record as the
- predefined minimum
- record */
-#define MLOG_COMP_PAGE_CREATE ((byte)37) /*!< create a compact
- index page */
-#define MLOG_COMP_REC_INSERT ((byte)38) /*!< compact record insert */
-#define MLOG_COMP_REC_CLUST_DELETE_MARK ((byte)39)
- /*!< mark compact
- clustered index record
- deleted */
-#define MLOG_COMP_REC_SEC_DELETE_MARK ((byte)40)/*!< mark compact
- secondary index record
- deleted; this log
- record type is
- redundant, as
- MLOG_REC_SEC_DELETE_MARK
- is independent of the
- record format. */
-#define MLOG_COMP_REC_UPDATE_IN_PLACE ((byte)41)/*!< update of a
- compact record,
- preserves record field
- sizes */
-#define MLOG_COMP_REC_DELETE ((byte)42) /*!< delete a compact record
- from a page */
-#define MLOG_COMP_LIST_END_DELETE ((byte)43) /*!< delete compact record list
- end on index page */
-#define MLOG_COMP_LIST_START_DELETE ((byte)44) /*!< delete compact record list
- start on index page */
-#define MLOG_COMP_LIST_END_COPY_CREATED ((byte)45)
- /*!< copy compact
- record list end to a
- new created index
- page */
-#define MLOG_COMP_PAGE_REORGANIZE ((byte)46) /*!< reorganize an index page */
-#define MLOG_FILE_CREATE2 ((byte)47) /*!< log record about creating
- an .ibd file, with format */
-#define MLOG_ZIP_WRITE_NODE_PTR ((byte)48) /*!< write the node pointer of
- a record on a compressed
- non-leaf B-tree page */
-#define MLOG_ZIP_WRITE_BLOB_PTR ((byte)49) /*!< write the BLOB pointer
- of an externally stored column
- on a compressed page */
-#define MLOG_ZIP_WRITE_HEADER ((byte)50) /*!< write to compressed page
- header */
-#define MLOG_ZIP_PAGE_COMPRESS ((byte)51) /*!< compress an index page */
-#define MLOG_ZIP_PAGE_COMPRESS_NO_DATA ((byte)52)/*!< compress an index page
- without logging it's image */
-#define MLOG_ZIP_PAGE_REORGANIZE ((byte)53) /*!< reorganize a compressed
- page */
-#define MLOG_BIGGEST_TYPE ((byte)53) /*!< biggest value (used in
- assertions) */
-
-#define MLOG_FILE_WRITE_CRYPT_DATA ((byte)100) /*!< log record for
- writing/updating crypt data of
- a tablespace */
-
-#define EXTRA_CHECK_MLOG_NUMBER(x) \
- ((x) == MLOG_FILE_WRITE_CRYPT_DATA)
-
-/* @} */
-
-/** @name Flags for MLOG_FILE operations
-(stored in the page number parameter, called log_flags in the
-functions). The page number parameter was originally written as 0. @{ */
-#define MLOG_FILE_FLAG_TEMP 1 /*!< identifies TEMPORARY TABLE in
- MLOG_FILE_CREATE, MLOG_FILE_CREATE2 */
-/* @} */
-
-/* included here because it needs MLOG_LSN defined */
-#include "log0log.h"
-
-/***************************************************************//**
-Starts a mini-transaction. */
-UNIV_INLINE
-void
-mtr_start(
-/*======*/
- mtr_t* mtr) /*!< out: mini-transaction */
- MY_ATTRIBUTE((nonnull));
-/***************************************************************//**
-Commits a mini-transaction. */
-UNIV_INTERN
-void
-mtr_commit(
-/*=======*/
- mtr_t* mtr) /*!< in/out: mini-transaction */
- MY_ATTRIBUTE((nonnull));
-/**********************************************************//**
-Sets and returns a savepoint in mtr.
+#include "buf0types.h"
+#include "dyn0buf.h"
+
+/** Start a mini-transaction. */
+#define mtr_start(m) (m)->start()
+
+/** Commit a mini-transaction. */
+#define mtr_commit(m) (m)->commit()
+
+/** Set and return a savepoint in mtr.
@return savepoint */
-UNIV_INLINE
-ulint
-mtr_set_savepoint(
-/*==============*/
- mtr_t* mtr); /*!< in: mtr */
-#ifndef UNIV_HOTBACKUP
-/**********************************************************//**
-Releases the (index tree) s-latch stored in an mtr memo after a
-savepoint. */
-UNIV_INLINE
-void
-mtr_release_s_latch_at_savepoint(
-/*=============================*/
- mtr_t* mtr, /*!< in: mtr */
- ulint savepoint, /*!< in: savepoint */
- rw_lock_t* lock); /*!< in: latch to release */
-#else /* !UNIV_HOTBACKUP */
-# define mtr_release_s_latch_at_savepoint(mtr,savepoint,lock) ((void) 0)
-#endif /* !UNIV_HOTBACKUP */
-
-/**********************************************************//**
-Releases a buf_page stored in an mtr memo after a
+#define mtr_set_savepoint(m) (m)->get_savepoint()
+
+/** Release the (index tree) s-latch stored in an mtr memo after a
savepoint. */
-UNIV_INTERN
-void
-mtr_release_buf_page_at_savepoint(
-/*=============================*/
- mtr_t* mtr, /*!< in: mtr */
- ulint savepoint, /*!< in: savepoint */
- buf_block_t* block); /*!< in: block to release */
-
-/***************************************************************//**
-Gets the logging mode of a mini-transaction.
+#define mtr_release_s_latch_at_savepoint(m, s, l) \
+ (m)->release_s_latch_at_savepoint((s), (l))
+
+/** Get the logging mode of a mini-transaction.
@return logging mode: MTR_LOG_NONE, ... */
-UNIV_INLINE
-ulint
-mtr_get_log_mode(
-/*=============*/
- mtr_t* mtr); /*!< in: mtr */
-/***************************************************************//**
-Changes the logging mode of a mini-transaction.
+#define mtr_get_log_mode(m) (m)->get_log_mode()
+
+/** Change the logging mode of a mini-transaction.
@return old mode */
-UNIV_INLINE
-ulint
-mtr_set_log_mode(
-/*=============*/
- mtr_t* mtr, /*!< in: mtr */
- ulint mode); /*!< in: logging mode: MTR_LOG_NONE, ... */
-/********************************************************//**
-Reads 1 - 4 bytes from a file page buffered in the buffer pool.
+#define mtr_set_log_mode(m, d) (m)->set_log_mode((d))
+
+/** Read 1 - 4 bytes from a file page buffered in the buffer pool.
@return value read */
-UNIV_INTERN
-ulint
-mtr_read_ulint(
-/*===========*/
- const byte* ptr, /*!< in: pointer from where to read */
- ulint type, /*!< in: MLOG_1BYTE, MLOG_2BYTES, MLOG_4BYTES */
- mtr_t* mtr); /*!< in: mini-transaction handle */
-#ifndef UNIV_HOTBACKUP
-/*********************************************************************//**
-This macro locks an rw-lock in s-mode. */
-#define mtr_s_lock(B, MTR) mtr_s_lock_func((B), __FILE__, __LINE__,\
- (MTR))
-/*********************************************************************//**
-This macro locks an rw-lock in x-mode. */
-#define mtr_x_lock(B, MTR) mtr_x_lock_func((B), __FILE__, __LINE__,\
- (MTR))
-/*********************************************************************//**
-NOTE! Use the macro above!
-Locks a lock in s-mode. */
-UNIV_INLINE
-void
-mtr_s_lock_func(
-/*============*/
- rw_lock_t* lock, /*!< in: rw-lock */
- const char* file, /*!< in: file name */
- ulint line, /*!< in: line number */
- mtr_t* mtr); /*!< in: mtr */
-/*********************************************************************//**
-NOTE! Use the macro above!
-Locks a lock in x-mode. */
-UNIV_INLINE
-void
-mtr_x_lock_func(
-/*============*/
- rw_lock_t* lock, /*!< in: rw-lock */
- const char* file, /*!< in: file name */
- ulint line, /*!< in: line number */
- mtr_t* mtr); /*!< in: mtr */
-#endif /* !UNIV_HOTBACKUP */
-
-/***************************************************//**
-Releases an object in the memo stack.
+#define mtr_read_ulint(p, t, m) (m)->read_ulint((p), (t))
+
+/** Release an object in the memo stack.
@return true if released */
-UNIV_INTERN
-bool
-mtr_memo_release(
-/*=============*/
- mtr_t* mtr, /*!< in/out: mini-transaction */
- void* object, /*!< in: object */
- ulint type) /*!< in: object type: MTR_MEMO_S_LOCK, ... */
- MY_ATTRIBUTE((nonnull));
+#define mtr_memo_release(m, o, t) \
+ (m)->memo_release((o), (t))
+
#ifdef UNIV_DEBUG
-# ifndef UNIV_HOTBACKUP
-/**********************************************************//**
-Checks if memo contains the given item.
+
+/** Check if memo contains the given item. */
+#define mtr_is_block_fix(m, o, t, table) mtr_memo_contains(m, o, t)
+
+/** Check if memo contains the given page. */
+#define mtr_is_page_fix(m, p, t, table) mtr_memo_contains_page(m, p, t)
+
+/** Check if memo contains the given item.
@return TRUE if contains */
-UNIV_INLINE
-bool
-mtr_memo_contains(
-/*==============*/
- mtr_t* mtr, /*!< in: mtr */
- const void* object, /*!< in: object to search */
- ulint type) /*!< in: type of object */
- MY_ATTRIBUTE((warn_unused_result, nonnull));
-
-/**********************************************************//**
-Checks if memo contains the given page.
+#define mtr_memo_contains(m, o, t) \
+ (m)->memo_contains((m)->get_memo(), (o), (t))
+
+/** Check if memo contains the given page.
@return TRUE if contains */
-UNIV_INTERN
-ibool
-mtr_memo_contains_page(
-/*===================*/
- mtr_t* mtr, /*!< in: mtr */
- const byte* ptr, /*!< in: pointer to buffer frame */
- ulint type); /*!< in: type of object */
-/*********************************************************//**
-Prints info of an mtr handle. */
-UNIV_INTERN
-void
-mtr_print(
-/*======*/
- mtr_t* mtr); /*!< in: mtr */
-# else /* !UNIV_HOTBACKUP */
-# define mtr_memo_contains(mtr, object, type) TRUE
-# define mtr_memo_contains_page(mtr, ptr, type) TRUE
-# endif /* !UNIV_HOTBACKUP */
+#define mtr_memo_contains_page(m, p, t) \
+ (m)->memo_contains_page_flagged((p), (t))
#endif /* UNIV_DEBUG */
-/*######################################################################*/
-#define MTR_BUF_MEMO_SIZE 200 /* number of slots in memo */
+/** Print info of an mtr handle. */
+#define mtr_print(m) (m)->print()
-/***************************************************************//**
-Returns the log object of a mini-transaction buffer.
+/** Return the log object of a mini-transaction buffer.
@return log */
-UNIV_INLINE
-dyn_array_t*
-mtr_get_log(
-/*========*/
- mtr_t* mtr); /*!< in: mini-transaction */
-/***************************************************//**
-Pushes an object to an mtr memo stack. */
-UNIV_INLINE
+#define mtr_get_log(m) (m)->get_log()
+
+/** Push an object to an mtr memo stack. */
+#define mtr_memo_push(m, o, t) (m)->memo_push(o, t)
+
+/** Lock an rw-lock in s-mode. */
+#define mtr_s_lock(l, m) (m)->s_lock((l), __FILE__, __LINE__)
+
+/** Lock an rw-lock in x-mode. */
+#define mtr_x_lock(l, m) (m)->x_lock((l), __FILE__, __LINE__)
+
+/** Lock a tablespace in x-mode. */
+#define mtr_x_lock_space(s, m) (m)->x_lock_space((s), __FILE__, __LINE__)
+
+/** Lock an rw-lock in sx-mode. */
+#define mtr_sx_lock(l, m) (m)->sx_lock((l), __FILE__, __LINE__)
+
+#define mtr_memo_contains_flagged(m, p, l) \
+ (m)->memo_contains_flagged((p), (l))
+
+#define mtr_memo_contains_page_flagged(m, p, l) \
+ (m)->memo_contains_page_flagged((p), (l))
+
+#define mtr_release_block_at_savepoint(m, s, b) \
+ (m)->release_block_at_savepoint((s), (b))
+
+#define mtr_block_sx_latch_at_savepoint(m, s, b) \
+ (m)->sx_latch_at_savepoint((s), (b))
+
+#define mtr_block_x_latch_at_savepoint(m, s, b) \
+ (m)->x_latch_at_savepoint((s), (b))
+
+/** Check if a mini-transaction is dirtying a clean page.
+@param b block being x-fixed
+@return true if the mtr is dirtying a clean page. */
+#define mtr_block_dirtied(b) mtr_t::is_block_dirtied((b))
+
+/** Forward declaration of a tablespace object */
+struct fil_space_t;
+
+/** Append records to the system-wide redo log buffer.
+@param[in] log redo log records */
void
-mtr_memo_push(
-/*==========*/
- mtr_t* mtr, /*!< in: mtr */
- void* object, /*!< in: object */
- ulint type); /*!< in: object type: MTR_MEMO_S_LOCK, ... */
+mtr_write_log(
+ const mtr_buf_t* log);
/** Mini-transaction memo stack slot. */
-struct mtr_memo_slot_t{
- ulint type; /*!< type of the stored object (MTR_MEMO_S_LOCK, ...) */
- void* object; /*!< pointer to the object */
+struct mtr_memo_slot_t {
+ /** pointer to the object */
+ void* object;
+
+ /** type of the stored object (MTR_MEMO_S_LOCK, ...) */
+ ulint type;
};
-/* Mini-transaction handle and buffer */
-struct mtr_t{
+/** Mini-transaction handle and buffer */
+struct mtr_t {
+ mtr_t() : m_state(MTR_STATE_INIT) {}
+
+ /** Release the free extents that was reserved using
+ fsp_reserve_free_extents(). This is equivalent to calling
+ fil_space_release_free_extents(). This is intended for use
+ with index pages.
+ @param[in] n_reserved number of reserved extents */
+ void release_free_extents(ulint n_reserved);
+
+ /** Start a mini-transaction. */
+ void start();
+
+ /** Commit the mini-transaction. */
+ void commit();
+
+ /** Commit a mini-transaction that did not modify any pages,
+ but generated some redo log on a higher level, such as
+ MLOG_FILE_NAME records and a MLOG_CHECKPOINT marker.
+ The caller must invoke log_mutex_enter() and log_mutex_exit().
+ This is to be used at log_checkpoint().
+ @param[in] checkpoint_lsn the LSN of the log checkpoint
+ @param[in] write_mlog_checkpoint Write MLOG_CHECKPOINT marker
+ if it is enabled. */
+ void commit_checkpoint(
+ lsn_t checkpoint_lsn,
+ bool write_mlog_checkpoint);
+
+ /** Return current size of the buffer.
+ @return savepoint */
+ ulint get_savepoint() const {ut_ad(is_active()); return m_memo.size();}
+
+ /** Release the (index tree) s-latch stored in an mtr memo after a
+ savepoint.
+ @param savepoint value returned by @see set_savepoint.
+ @param lock latch to release */
+ inline void release_s_latch_at_savepoint(
+ ulint savepoint,
+ rw_lock_t* lock);
+
+ /** Release the block in an mtr memo after a savepoint. */
+ inline void release_block_at_savepoint(
+ ulint savepoint,
+ buf_block_t* block);
+
+ /** SX-latch a not yet latched block after a savepoint. */
+ inline void sx_latch_at_savepoint(ulint savepoint, buf_block_t* block);
+
+ /** X-latch a not yet latched block after a savepoint. */
+ inline void x_latch_at_savepoint(ulint savepoint, buf_block_t* block);
+
+ /** Get the logging mode.
+ @return logging mode */
+ inline mtr_log_t get_log_mode() const
+ MY_ATTRIBUTE((warn_unused_result));
+
+ /** Change the logging mode.
+ @param mode logging mode
+ @return old mode */
+ inline mtr_log_t set_log_mode(mtr_log_t mode);
+
+ /** Note that the mini-transaction is modifying the system tablespace
+ (for example, for the change buffer or for undo logs)
+ @return the system tablespace */
+ fil_space_t* set_sys_modified()
+ {
+ if (!m_sys_space) {
+ lookup_sys_space();
+ }
+ return m_sys_space;
+ }
+
+ /** Copy the tablespaces associated with the mini-transaction
+ (needed for generating MLOG_FILE_NAME records)
+ @param[in] mtr mini-transaction that may modify
+ the same set of tablespaces as this one */
+ void set_spaces(const mtr_t& mtr)
+ {
+ ut_ad(!m_user_space_id);
+ ut_ad(!m_user_space);
+ ut_ad(!m_undo_space);
+ ut_ad(!m_sys_space);
+
+ ut_d(m_user_space_id = mtr.m_user_space_id);
+ m_user_space = mtr.m_user_space;
+ m_undo_space = mtr.m_undo_space;
+ m_sys_space = mtr.m_sys_space;
+ }
+
+ /** Set the tablespace associated with the mini-transaction
+ (needed for generating a MLOG_FILE_NAME record)
+ @param[in] space_id user or system tablespace ID
+ @return the tablespace */
+ fil_space_t* set_named_space(ulint space_id)
+ {
+ ut_ad(!m_user_space_id);
+ ut_d(m_user_space_id = space_id);
+ if (!space_id) {
+ return(set_sys_modified());
+ } else {
+ lookup_user_space(space_id);
+ return m_user_space;
+ }
+ }
+
+ /** Set the tablespace associated with the mini-transaction
+ (needed for generating a MLOG_FILE_NAME record)
+ @param[in] space user or system tablespace */
+ void set_named_space(fil_space_t* space);
+
#ifdef UNIV_DEBUG
- ulint state; /*!< MTR_ACTIVE, MTR_COMMITTING, MTR_COMMITTED */
-#endif
- dyn_array_t memo; /*!< memo stack for locks etc. */
- dyn_array_t log; /*!< mini-transaction log */
- unsigned inside_ibuf:1;
- /*!< TRUE if inside ibuf changes */
- unsigned modifications:1;
- /*!< TRUE if the mini-transaction
- modified buffer pool pages */
- unsigned made_dirty:1;
- /*!< TRUE if mtr has made at least
- one buffer pool page dirty */
- ulint n_log_recs;
- /* count of how many page initial log records
- have been written to the mtr log */
- ulint n_freed_pages;
- /* number of pages that have been freed in
- this mini-transaction */
- ulint log_mode; /* specifies which operations should be
- logged; default value MTR_LOG_ALL */
- lsn_t start_lsn;/* start lsn of the possible log entry for
- this mtr */
- lsn_t end_lsn;/* end lsn of the possible log entry for
- this mtr */
+ /** Check the tablespace associated with the mini-transaction
+ (needed for generating a MLOG_FILE_NAME record)
+ @param[in] space tablespace
+ @return whether the mini-transaction is associated with the space */
+ bool is_named_space(ulint space) const;
+#endif /* UNIV_DEBUG */
+
+ /** Read 1 - 4 bytes from a file page buffered in the buffer pool.
+ @param ptr pointer from where to read
+ @param type) MLOG_1BYTE, MLOG_2BYTES, MLOG_4BYTES
+ @return value read */
+ inline ulint read_ulint(const byte* ptr, mlog_id_t type) const
+ MY_ATTRIBUTE((warn_unused_result));
+
+ /** Locks a rw-latch in S mode.
+ NOTE: use mtr_s_lock().
+ @param lock rw-lock
+ @param file file name from where called
+ @param line line number in file */
+ inline void s_lock(rw_lock_t* lock, const char* file, unsigned line);
+
+ /** Locks a rw-latch in X mode.
+ NOTE: use mtr_x_lock().
+ @param lock rw-lock
+ @param file file name from where called
+ @param line line number in file */
+ inline void x_lock(rw_lock_t* lock, const char* file, unsigned line);
+
+ /** Locks a rw-latch in X mode.
+ NOTE: use mtr_sx_lock().
+ @param lock rw-lock
+ @param file file name from where called
+ @param line line number in file */
+ inline void sx_lock(rw_lock_t* lock, const char* file, unsigned line);
+
+ /** Acquire a tablespace X-latch.
+ NOTE: use mtr_x_lock_space().
+ @param[in] space_id tablespace ID
+ @param[in] file file name from where called
+ @param[in] line line number in file
+ @return the tablespace object (never NULL) */
+ fil_space_t* x_lock_space(
+ ulint space_id,
+ const char* file,
+ unsigned line);
+
+ /** Release an object in the memo stack.
+ @param object object
+ @param type object type: MTR_MEMO_S_LOCK, ...
+ @return bool if lock released */
+ bool memo_release(const void* object, ulint type);
+ /** Release a page latch.
+ @param[in] ptr pointer to within a page frame
+ @param[in] type object type: MTR_MEMO_PAGE_X_FIX, ... */
+ void release_page(const void* ptr, mtr_memo_type_t type);
+
+ /** Note that the mini-transaction has modified data. */
+ void set_modified() { m_modifications = true; }
+
+ /** Set the state to not-modified. This will not log the
+ changes. This is only used during redo log apply, to avoid
+ logging the changes. */
+ void discard_modifications() { m_modifications = false; }
+
+ /** Get the LSN of commit().
+ @return the commit LSN
+ @retval 0 if the transaction only modified temporary tablespaces */
+ lsn_t commit_lsn() const
+ {
+ ut_ad(has_committed());
+ return(m_commit_lsn);
+ }
+
+ /** Note that we are inside the change buffer code. */
+ void enter_ibuf() { m_inside_ibuf = true; }
+
+ /** Note that we have exited from the change buffer code. */
+ void exit_ibuf() { m_inside_ibuf = false; }
+
+ /** @return true if we are inside the change buffer code */
+ bool is_inside_ibuf() const { return m_inside_ibuf; }
+
+ /*
+ @return true if the mini-transaction is active */
+ bool is_active() const { return m_state == MTR_STATE_ACTIVE; }
+
+ /** Get flush observer
+ @return flush observer */
+ FlushObserver* get_flush_observer() const { return m_flush_observer; }
+
+ /** Set flush observer
+ @param[in] observer flush observer */
+ void set_flush_observer(FlushObserver* observer)
+ {
+ ut_ad(observer == NULL || m_log_mode == MTR_LOG_NO_REDO);
+ m_flush_observer = observer;
+ }
+
#ifdef UNIV_DEBUG
- ulint magic_n;
+ /** Check if memo contains the given item.
+ @param memo memo stack
+ @param object, object to search
+ @param type type of object
+ @return true if contains */
+ static bool memo_contains(
+ const mtr_buf_t* memo,
+ const void* object,
+ ulint type)
+ MY_ATTRIBUTE((warn_unused_result));
+
+ /** Check if memo contains the given item.
+ @param object object to search
+ @param flags specify types of object (can be ORred) of
+ MTR_MEMO_PAGE_S_FIX ... values
+ @return true if contains */
+ bool memo_contains_flagged(const void* ptr, ulint flags) const;
+
+ /** Check if memo contains the given page.
+ @param[in] ptr pointer to within buffer frame
+ @param[in] flags specify types of object with OR of
+ MTR_MEMO_PAGE_S_FIX... values
+ @return the block
+ @retval NULL if not found */
+ buf_block_t* memo_contains_page_flagged(
+ const byte* ptr,
+ ulint flags) const;
+
+ /** Mark the given latched page as modified.
+ @param[in] ptr pointer to within buffer frame */
+ void memo_modify_page(const byte* ptr);
+
+ /** Print info of an mtr handle. */
+ void print() const;
+
+ /** @return true if the mini-transaction has committed */
+ bool has_committed() const { return m_state == MTR_STATE_COMMITTED; }
+
+ /** @return true if mini-transaction contains modifications. */
+ bool has_modifications() const { return m_modifications; }
+
+ /** @return the memo stack */
+ const mtr_buf_t* get_memo() const { return &m_memo; }
+
+ /** @return the memo stack */
+ mtr_buf_t* get_memo() { return &m_memo; }
#endif /* UNIV_DEBUG */
-};
+ /** @return true if a record was added to the mini-transaction */
+ bool is_dirty() const { return m_made_dirty; }
+
+ /** Note that a record has been added to the log */
+ void added_rec() { ++m_n_log_recs; }
+
+ /** Get the buffered redo log of this mini-transaction.
+ @return redo log */
+ const mtr_buf_t* get_log() const { return &m_log; }
+
+ /** Get the buffered redo log of this mini-transaction.
+ @return redo log */
+ mtr_buf_t* get_log() { return &m_log; }
+
+ /** Push an object to an mtr memo stack.
+ @param object object
+ @param type object type: MTR_MEMO_S_LOCK, ... */
+ inline void memo_push(void* object, mtr_memo_type_t type);
+
+ /** Check if this mini-transaction is dirtying a clean page.
+ @param block block being x-fixed
+ @return true if the mtr is dirtying a clean page. */
+ static inline bool is_block_dirtied(const buf_block_t* block)
+ MY_ATTRIBUTE((warn_unused_result));
+
+private:
+ /** Look up the system tablespace. */
+ void lookup_sys_space();
+ /** Look up the user tablespace.
+ @param[in] space_id tablespace ID */
+ void lookup_user_space(ulint space_id);
+
+ /** Prepare to write the mini-transaction log to the redo log buffer.
+ @return number of bytes to write in finish_write() */
+ inline ulint prepare_write();
+
+ /** Append the redo log records to the redo log buffer.
+ @param[in] len number of bytes to write
+ @return start_lsn */
+ inline lsn_t finish_write(ulint len);
+
+ /** Release the resources */
+ inline void release_resources();
+
+ /** memo stack for locks etc. */
+ mtr_buf_t m_memo;
+
+ /** mini-transaction log */
+ mtr_buf_t m_log;
+
+ /** true if mtr has made at least one buffer pool page dirty */
+ bool m_made_dirty;
+
+ /** true if inside ibuf changes */
+ bool m_inside_ibuf;
+
+ /** true if the mini-transaction modified buffer pool pages */
+ bool m_modifications;
+
+ /** Count of how many page initial log records have been
+ written to the mtr log */
+ ib_uint32_t m_n_log_recs;
+
+ /** specifies which operations should be logged; default
+ value MTR_LOG_ALL */
+ mtr_log_t m_log_mode;
#ifdef UNIV_DEBUG
-# define MTR_MAGIC_N 54551
+ /** Persistent user tablespace associated with the
+ mini-transaction, or 0 (TRX_SYS_SPACE) if none yet */
+ ulint m_user_space_id;
#endif /* UNIV_DEBUG */
+ /** User tablespace that is being modified by the mini-transaction */
+ fil_space_t* m_user_space;
+ /** Undo tablespace that is being modified by the mini-transaction */
+ fil_space_t* m_undo_space;
+ /** System tablespace if being modified by the mini-transaction */
+ fil_space_t* m_sys_space;
-#define MTR_ACTIVE 12231
-#define MTR_COMMITTING 56456
-#define MTR_COMMITTED 34676
+ /** State of the transaction */
+ mtr_state_t m_state;
+
+ /** Flush Observer */
+ FlushObserver* m_flush_observer;
+
+ /** LSN at commit time */
+ lsn_t m_commit_lsn;
+};
-#ifndef UNIV_NONINL
#include "mtr0mtr.ic"
-#endif
-#endif
+#endif /* mtr0mtr_h */
diff --git a/storage/innobase/include/mtr0mtr.ic b/storage/innobase/include/mtr0mtr.ic
index 5763c054a8f..7175ede0d6a 100644
--- a/storage/innobase/include/mtr0mtr.ic
+++ b/storage/innobase/include/mtr0mtr.ic
@@ -1,6 +1,7 @@
/*****************************************************************************
-Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1995, 2014, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2017, 2019, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -23,274 +24,257 @@ Mini-transaction buffer
Created 11/26/1995 Heikki Tuuri
*******************************************************/
-#ifndef UNIV_HOTBACKUP
-# include "sync0sync.h"
-# include "sync0rw.h"
-#endif /* !UNIV_HOTBACKUP */
-#include "mach0data.h"
-
-/***************************************************//**
-Checks if a mini-transaction is dirtying a clean page.
-@return TRUE if the mtr is dirtying a clean page. */
-UNIV_INTERN
-ibool
-mtr_block_dirtied(
-/*==============*/
- const buf_block_t* block) /*!< in: block being x-fixed */
- MY_ATTRIBUTE((nonnull,warn_unused_result));
-
-/***************************************************************//**
-Starts a mini-transaction. */
-UNIV_INLINE
-void
-mtr_start(
-/*======*/
- mtr_t* mtr) /*!< out: mini-transaction */
-{
- UNIV_MEM_INVALID(mtr, sizeof *mtr);
-
- dyn_array_create(&(mtr->memo));
- dyn_array_create(&(mtr->log));
+#include "buf0buf.h"
- mtr->log_mode = MTR_LOG_ALL;
- mtr->inside_ibuf = FALSE;
- mtr->modifications = FALSE;
- mtr->made_dirty = FALSE;
- mtr->n_log_recs = 0;
- mtr->n_freed_pages = 0;
+/** Check if a mini-transaction is dirtying a clean page.
+@return true if the mtr is dirtying a clean page. */
+bool
+mtr_t::is_block_dirtied(const buf_block_t* block)
+{
+ ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
+ ut_ad(block->page.buf_fix_count > 0);
- ut_d(mtr->state = MTR_ACTIVE);
- ut_d(mtr->magic_n = MTR_MAGIC_N);
+ /* It is OK to read oldest_modification because no
+ other thread can be performing a write of it and it
+ is only during write that the value is reset to 0. */
+ return(block->page.oldest_modification == 0);
}
-/***************************************************//**
+/**
Pushes an object to an mtr memo stack. */
-UNIV_INLINE
void
-mtr_memo_push(
-/*==========*/
- mtr_t* mtr, /*!< in: mtr */
- void* object, /*!< in: object */
- ulint type) /*!< in: object type: MTR_MEMO_S_LOCK, ... */
+mtr_t::memo_push(void* object, mtr_memo_type_t type)
{
- dyn_array_t* memo;
- mtr_memo_slot_t* slot;
-
- ut_ad(object);
+ ut_ad(is_active());
+ ut_ad(object != NULL);
ut_ad(type >= MTR_MEMO_PAGE_S_FIX);
- ut_ad(type <= MTR_MEMO_X_LOCK);
- ut_ad(mtr);
- ut_ad(mtr->magic_n == MTR_MAGIC_N);
- ut_ad(mtr->state == MTR_ACTIVE);
+ ut_ad(type <= MTR_MEMO_SX_LOCK);
+ ut_ad(ut_is_2pow(type));
/* If this mtr has x-fixed a clean page then we set
the made_dirty flag. This tells us if we need to
grab log_flush_order_mutex at mtr_commit so that we
can insert the dirtied page to the flush list. */
- if (type == MTR_MEMO_PAGE_X_FIX && !mtr->made_dirty) {
- mtr->made_dirty =
- mtr_block_dirtied((const buf_block_t*) object);
- }
- memo = &(mtr->memo);
+ if ((type == MTR_MEMO_PAGE_X_FIX || type == MTR_MEMO_PAGE_SX_FIX)
+ && !m_made_dirty) {
- slot = (mtr_memo_slot_t*) dyn_array_push(memo, sizeof *slot);
+ m_made_dirty = is_block_dirtied(
+ reinterpret_cast<const buf_block_t*>(object));
+ }
+
+ mtr_memo_slot_t* slot = m_memo.push<mtr_memo_slot_t*>(sizeof(*slot));
- slot->object = object;
slot->type = type;
+ slot->object = object;
}
-/**********************************************************//**
-Sets and returns a savepoint in mtr.
-@return savepoint */
-UNIV_INLINE
-ulint
-mtr_set_savepoint(
-/*==============*/
- mtr_t* mtr) /*!< in: mtr */
+/**
+Releases the (index tree) s-latch stored in an mtr memo after a
+savepoint. */
+void
+mtr_t::release_s_latch_at_savepoint(
+ ulint savepoint,
+ rw_lock_t* lock)
{
- dyn_array_t* memo;
+ ut_ad(is_active());
+ ut_ad(m_memo.size() > savepoint);
+
+ mtr_memo_slot_t* slot = m_memo.at<mtr_memo_slot_t*>(savepoint);
- ut_ad(mtr);
- ut_ad(mtr->magic_n == MTR_MAGIC_N);
- ut_ad(mtr->state == MTR_ACTIVE);
+ ut_ad(slot->object == lock);
+ ut_ad(slot->type == MTR_MEMO_S_LOCK);
- memo = &(mtr->memo);
+ rw_lock_s_unlock(lock);
- return(dyn_array_get_data_size(memo));
+ slot->object = NULL;
}
-#ifndef UNIV_HOTBACKUP
-/**********************************************************//**
-Releases the (index tree) s-latch stored in an mtr memo after a
-savepoint. */
-UNIV_INLINE
+/**
+SX-latches the not yet latched block after a savepoint. */
+
void
-mtr_release_s_latch_at_savepoint(
-/*=============================*/
- mtr_t* mtr, /*!< in: mtr */
- ulint savepoint, /*!< in: savepoint */
- rw_lock_t* lock) /*!< in: latch to release */
+mtr_t::sx_latch_at_savepoint(
+ ulint savepoint,
+ buf_block_t* block)
{
- mtr_memo_slot_t* slot;
- dyn_array_t* memo;
+ ut_ad(is_active());
+ ut_ad(m_memo.size() > savepoint);
- ut_ad(mtr);
- ut_ad(mtr->magic_n == MTR_MAGIC_N);
- ut_ad(mtr->state == MTR_ACTIVE);
+ ut_ad(!memo_contains_flagged(
+ block,
+ MTR_MEMO_PAGE_S_FIX
+ | MTR_MEMO_PAGE_X_FIX
+ | MTR_MEMO_PAGE_SX_FIX));
- memo = &(mtr->memo);
+ mtr_memo_slot_t* slot = m_memo.at<mtr_memo_slot_t*>(savepoint);
- ut_ad(dyn_array_get_data_size(memo) > savepoint);
+ ut_ad(slot->object == block);
- slot = (mtr_memo_slot_t*) dyn_array_get_element(memo, savepoint);
+ /* == RW_NO_LATCH */
+ ut_a(slot->type == MTR_MEMO_BUF_FIX);
- ut_ad(slot->object == lock);
- ut_ad(slot->type == MTR_MEMO_S_LOCK);
+ rw_lock_sx_lock(&block->lock);
- rw_lock_s_unlock(lock);
+ if (!m_made_dirty) {
+ m_made_dirty = is_block_dirtied(block);
+ }
- slot->object = NULL;
+ slot->type = MTR_MEMO_PAGE_SX_FIX;
}
-# ifdef UNIV_DEBUG
-/**********************************************************//**
-Checks if memo contains the given item.
-@return TRUE if contains */
-UNIV_INLINE
-bool
-mtr_memo_contains(
-/*==============*/
- mtr_t* mtr, /*!< in: mtr */
- const void* object, /*!< in: object to search */
- ulint type) /*!< in: type of object */
+/**
+X-latches the not yet latched block after a savepoint. */
+
+void
+mtr_t::x_latch_at_savepoint(
+ ulint savepoint,
+ buf_block_t* block)
{
- ut_ad(mtr);
- ut_ad(mtr->magic_n == MTR_MAGIC_N);
- ut_ad(mtr->state == MTR_ACTIVE || mtr->state == MTR_COMMITTING);
-
- for (const dyn_block_t* block = dyn_array_get_last_block(&mtr->memo);
- block;
- block = dyn_array_get_prev_block(&mtr->memo, block)) {
- const mtr_memo_slot_t* start
- = reinterpret_cast<mtr_memo_slot_t*>(
- dyn_block_get_data(block));
- mtr_memo_slot_t* slot
- = reinterpret_cast<mtr_memo_slot_t*>(
- dyn_block_get_data(block)
- + dyn_block_get_used(block));
-
- ut_ad(!(dyn_block_get_used(block) % sizeof(mtr_memo_slot_t)));
-
- while (slot-- != start) {
- if (object == slot->object && type == slot->type) {
- return(true);
- }
- }
+ ut_ad(is_active());
+ ut_ad(m_memo.size() > savepoint);
+
+ ut_ad(!memo_contains_flagged(
+ block,
+ MTR_MEMO_PAGE_S_FIX
+ | MTR_MEMO_PAGE_X_FIX
+ | MTR_MEMO_PAGE_SX_FIX));
+
+ mtr_memo_slot_t* slot = m_memo.at<mtr_memo_slot_t*>(savepoint);
+
+ ut_ad(slot->object == block);
+
+ /* == RW_NO_LATCH */
+ ut_a(slot->type == MTR_MEMO_BUF_FIX);
+
+ rw_lock_x_lock(&block->lock);
+
+ if (!m_made_dirty) {
+ m_made_dirty = is_block_dirtied(block);
}
- return(false);
+ slot->type = MTR_MEMO_PAGE_X_FIX;
}
-# endif /* UNIV_DEBUG */
-#endif /* !UNIV_HOTBACKUP */
-
-/***************************************************************//**
-Returns the log object of a mini-transaction buffer.
-@return log */
-UNIV_INLINE
-dyn_array_t*
-mtr_get_log(
-/*========*/
- mtr_t* mtr) /*!< in: mini-transaction */
+
+/**
+Releases the block in an mtr memo after a savepoint. */
+
+void
+mtr_t::release_block_at_savepoint(
+ ulint savepoint,
+ buf_block_t* block)
{
- ut_ad(mtr);
- ut_ad(mtr->magic_n == MTR_MAGIC_N);
+ ut_ad(is_active());
+
+ mtr_memo_slot_t* slot = m_memo.at<mtr_memo_slot_t*>(savepoint);
+
+ ut_a(slot->object == block);
+
+ buf_block_unfix(reinterpret_cast<buf_block_t*>(block));
+
+ buf_page_release_latch(block, slot->type);
- return(&(mtr->log));
+ slot->object = NULL;
}
-/***************************************************************//**
+/**
Gets the logging mode of a mini-transaction.
@return logging mode: MTR_LOG_NONE, ... */
-UNIV_INLINE
-ulint
-mtr_get_log_mode(
-/*=============*/
- mtr_t* mtr) /*!< in: mtr */
+
+mtr_log_t
+mtr_t::get_log_mode() const
{
- ut_ad(mtr);
- ut_ad(mtr->log_mode >= MTR_LOG_ALL);
- ut_ad(mtr->log_mode <= MTR_LOG_SHORT_INSERTS);
+ ut_ad(m_log_mode >= MTR_LOG_ALL);
+ ut_ad(m_log_mode <= MTR_LOG_SHORT_INSERTS);
- return(mtr->log_mode);
+ return m_log_mode;
}
-/***************************************************************//**
+/**
Changes the logging mode of a mini-transaction.
@return old mode */
-UNIV_INLINE
-ulint
-mtr_set_log_mode(
-/*=============*/
- mtr_t* mtr, /*!< in: mtr */
- ulint mode) /*!< in: logging mode: MTR_LOG_NONE, ... */
-{
- ulint old_mode;
- ut_ad(mtr);
+mtr_log_t
+mtr_t::set_log_mode(mtr_log_t mode)
+{
ut_ad(mode >= MTR_LOG_ALL);
ut_ad(mode <= MTR_LOG_SHORT_INSERTS);
- old_mode = mtr->log_mode;
-
- if ((mode == MTR_LOG_SHORT_INSERTS) && (old_mode == MTR_LOG_NONE)) {
- /* Do nothing */
- } else {
- mtr->log_mode = mode;
+ const mtr_log_t old_mode = m_log_mode;
+
+ switch (old_mode) {
+ case MTR_LOG_NO_REDO:
+ /* Once this mode is set, it must not be changed. */
+ ut_ad(mode == MTR_LOG_NO_REDO || mode == MTR_LOG_NONE);
+ return(old_mode);
+ case MTR_LOG_NONE:
+ if (mode == old_mode || mode == MTR_LOG_SHORT_INSERTS) {
+ /* Keep MTR_LOG_NONE. */
+ return(old_mode);
+ }
+ /* fall through */
+ case MTR_LOG_SHORT_INSERTS:
+ ut_ad(mode == MTR_LOG_ALL);
+ /* fall through */
+ case MTR_LOG_ALL:
+ /* MTR_LOG_NO_REDO can only be set before generating
+ any redo log records. */
+ ut_ad(mode != MTR_LOG_NO_REDO || m_n_log_recs == 0);
+ m_log_mode = mode;
+ return(old_mode);
}
- ut_ad(old_mode >= MTR_LOG_ALL);
- ut_ad(old_mode <= MTR_LOG_SHORT_INSERTS);
-
+ ut_ad(0);
return(old_mode);
}
-#ifndef UNIV_HOTBACKUP
-/*********************************************************************//**
+/**
Locks a lock in s-mode. */
-UNIV_INLINE
+
void
-mtr_s_lock_func(
-/*============*/
- rw_lock_t* lock, /*!< in: rw-lock */
- const char* file, /*!< in: file name */
- ulint line, /*!< in: line number */
- mtr_t* mtr) /*!< in: mtr */
+mtr_t::s_lock(rw_lock_t* lock, const char* file, unsigned line)
{
- ut_ad(mtr);
- ut_ad(lock);
-
rw_lock_s_lock_inline(lock, 0, file, line);
- mtr_memo_push(mtr, lock, MTR_MEMO_S_LOCK);
+ memo_push(lock, MTR_MEMO_S_LOCK);
}
-/*********************************************************************//**
+/**
Locks a lock in x-mode. */
-UNIV_INLINE
+
void
-mtr_x_lock_func(
-/*============*/
- rw_lock_t* lock, /*!< in: rw-lock */
- const char* file, /*!< in: file name */
- ulint line, /*!< in: line number */
- mtr_t* mtr) /*!< in: mtr */
+mtr_t::x_lock(rw_lock_t* lock, const char* file, unsigned line)
{
- ut_ad(mtr);
- ut_ad(lock);
-
rw_lock_x_lock_inline(lock, 0, file, line);
- mtr_memo_push(mtr, lock, MTR_MEMO_X_LOCK);
+ memo_push(lock, MTR_MEMO_X_LOCK);
+}
+
+/**
+Locks a lock in sx-mode. */
+
+void
+mtr_t::sx_lock(rw_lock_t* lock, const char* file, unsigned line)
+{
+ rw_lock_sx_lock_inline(lock, 0, file, line);
+
+ memo_push(lock, MTR_MEMO_SX_LOCK);
+}
+
+/**
+Reads 1 - 4 bytes from a file page buffered in the buffer pool.
+@return value read */
+
+ulint
+mtr_t::read_ulint(const byte* ptr, mlog_id_t type) const
+{
+ ut_ad(is_active());
+
+ ut_ad(memo_contains_page_flagged(
+ ptr,
+ MTR_MEMO_PAGE_S_FIX
+ | MTR_MEMO_PAGE_X_FIX
+ | MTR_MEMO_PAGE_SX_FIX));
+
+ return(mach_read_ulint(ptr, type));
}
-#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innobase/include/mtr0types.h b/storage/innobase/include/mtr0types.h
index 715e9d8b578..985ad7b81ea 100644
--- a/storage/innobase/include/mtr0types.h
+++ b/storage/innobase/include/mtr0types.h
@@ -1,6 +1,7 @@
/*****************************************************************************
-Copyright (c) 1995, 2009, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1995, 2015, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2017, 2019, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -26,6 +27,250 @@ Created 11/26/1995 Heikki Tuuri
#ifndef mtr0types_h
#define mtr0types_h
+#ifndef UNIV_INNOCHECKSUM
+#include "sync0rw.h"
+#endif /* UNIV_INNOCHECKSUM */
+
struct mtr_t;
-#endif
+/** Logging modes for a mini-transaction */
+enum mtr_log_t {
+ /** Default mode: log all operations modifying disk-based data */
+ MTR_LOG_ALL = 0,
+
+ /** Log no operations and dirty pages are not added to the flush list.
+ Set when applying log in crash recovery or when a modification of a
+ ROW_FORMAT=COMPRESSED page is attempted. */
+ MTR_LOG_NONE,
+
+ /** Don't generate REDO log but add dirty pages to flush list */
+ MTR_LOG_NO_REDO,
+
+ /** Inserts are logged in a shorter form */
+ MTR_LOG_SHORT_INSERTS
+};
+
+/** @name Log item types
+The log items are declared 'byte' so that the compiler can warn if val
+and type parameters are switched in a call to mlog_write_ulint. NOTE!
+For 1 - 8 bytes, the flag value must give the length also! @{ */
+enum mlog_id_t {
+ /** if the mtr contains only one log record for one page,
+ i.e., write_initial_log_record has been called only once,
+ this flag is ORed to the type of that first log record */
+ MLOG_SINGLE_REC_FLAG = 128,
+
+ /** one byte is written */
+ MLOG_1BYTE = 1,
+
+ /** 2 bytes ... */
+ MLOG_2BYTES = 2,
+
+ /** 4 bytes ... */
+ MLOG_4BYTES = 4,
+
+ /** 8 bytes ... */
+ MLOG_8BYTES = 8,
+
+ /** Record insert */
+ MLOG_REC_INSERT = 9,
+
+ /** Mark clustered index record deleted */
+ MLOG_REC_CLUST_DELETE_MARK = 10,
+
+ /** Mark secondary index record deleted */
+ MLOG_REC_SEC_DELETE_MARK = 11,
+
+ /** update of a record, preserves record field sizes */
+ MLOG_REC_UPDATE_IN_PLACE = 13,
+
+ /*!< Delete a record from a page */
+ MLOG_REC_DELETE = 14,
+
+ /** Delete record list end on index page */
+ MLOG_LIST_END_DELETE = 15,
+
+ /** Delete record list start on index page */
+ MLOG_LIST_START_DELETE = 16,
+
+ /** Copy record list end to a new created index page */
+ MLOG_LIST_END_COPY_CREATED = 17,
+
+ /** Reorganize an index page in ROW_FORMAT=REDUNDANT */
+ MLOG_PAGE_REORGANIZE = 18,
+
+ /** Create an index page */
+ MLOG_PAGE_CREATE = 19,
+
+ /** Insert entry in an undo log */
+ MLOG_UNDO_INSERT = 20,
+
+ /** erase an undo log page end */
+ MLOG_UNDO_ERASE_END = 21,
+
+ /** initialize a page in an undo log */
+ MLOG_UNDO_INIT = 22,
+
+ /** reuse an insert undo log header */
+ MLOG_UNDO_HDR_REUSE = 24,
+
+ /** create an undo log header */
+ MLOG_UNDO_HDR_CREATE = 25,
+
+ /** mark an index record as the predefined minimum record */
+ MLOG_REC_MIN_MARK = 26,
+
+ /** initialize an ibuf bitmap page */
+ MLOG_IBUF_BITMAP_INIT = 27,
+
+#ifdef UNIV_LOG_LSN_DEBUG
+ /** Current LSN */
+ MLOG_LSN = 28,
+#endif /* UNIV_LOG_LSN_DEBUG */
+
+ /** write a string to a page */
+ MLOG_WRITE_STRING = 30,
+
+ /** If a single mtr writes several log records, this log
+ record ends the sequence of these records */
+ MLOG_MULTI_REC_END = 31,
+
+ /** dummy log record used to pad a log block full */
+ MLOG_DUMMY_RECORD = 32,
+
+ /** log record about an .ibd file creation */
+ //MLOG_FILE_CREATE = 33,
+
+ /** rename databasename/tablename (no .ibd file name suffix) */
+ //MLOG_FILE_RENAME = 34,
+
+ /** delete a tablespace file that starts with (space_id,page_no) */
+ MLOG_FILE_DELETE = 35,
+
+ /** mark a compact index record as the predefined minimum record */
+ MLOG_COMP_REC_MIN_MARK = 36,
+
+ /** create a compact index page */
+ MLOG_COMP_PAGE_CREATE = 37,
+
+ /** compact record insert */
+ MLOG_COMP_REC_INSERT = 38,
+
+ /** mark compact clustered index record deleted */
+ MLOG_COMP_REC_CLUST_DELETE_MARK = 39,
+
+ /** update of a compact record, preserves record field sizes */
+ MLOG_COMP_REC_UPDATE_IN_PLACE = 41,
+
+ /** delete a compact record from a page */
+ MLOG_COMP_REC_DELETE = 42,
+
+ /** delete compact record list end on index page */
+ MLOG_COMP_LIST_END_DELETE = 43,
+
+ /*** delete compact record list start on index page */
+ MLOG_COMP_LIST_START_DELETE = 44,
+
+ /** copy compact record list end to a new created index page */
+ MLOG_COMP_LIST_END_COPY_CREATED = 45,
+
+ /** reorganize an index page */
+ MLOG_COMP_PAGE_REORGANIZE = 46,
+
+ /** log record about creating an .ibd file, with format */
+ MLOG_FILE_CREATE2 = 47,
+
+ /** write the node pointer of a record on a compressed
+ non-leaf B-tree page */
+ MLOG_ZIP_WRITE_NODE_PTR = 48,
+
+ /** write the BLOB pointer of an externally stored column
+ on a compressed page */
+ MLOG_ZIP_WRITE_BLOB_PTR = 49,
+
+ /** write to compressed page header */
+ MLOG_ZIP_WRITE_HEADER = 50,
+
+ /** compress an index page */
+ MLOG_ZIP_PAGE_COMPRESS = 51,
+
+ /** compress an index page without logging it's image */
+ MLOG_ZIP_PAGE_COMPRESS_NO_DATA = 52,
+
+ /** reorganize a compressed page */
+ MLOG_ZIP_PAGE_REORGANIZE = 53,
+
+ /** rename a tablespace file that starts with (space_id,page_no) */
+ MLOG_FILE_RENAME2 = 54,
+
+ /** note the first use of a tablespace file since checkpoint */
+ MLOG_FILE_NAME = 55,
+
+ /** note that all buffered log was written since a checkpoint */
+ MLOG_CHECKPOINT = 56,
+
+ /** Create a R-Tree index page */
+ MLOG_PAGE_CREATE_RTREE = 57,
+
+ /** create a R-tree compact page */
+ MLOG_COMP_PAGE_CREATE_RTREE = 58,
+
+ /** initialize a file page */
+ MLOG_INIT_FILE_PAGE2 = 59,
+
+ /** Table is being truncated. (Marked only for file-per-table) */
+ MLOG_TRUNCATE = 60,
+
+ /** notify that an index tree is being loaded without writing
+ redo log about individual pages */
+ MLOG_INDEX_LOAD = 61,
+
+ /** biggest value (used in assertions) */
+ MLOG_BIGGEST_TYPE = MLOG_INDEX_LOAD,
+
+ /** log record for writing/updating crypt data of
+ a tablespace */
+ MLOG_FILE_WRITE_CRYPT_DATA = 100,
+};
+
+/* @} */
+
+#define EXTRA_CHECK_MLOG_NUMBER(x) \
+ ((x) == MLOG_FILE_WRITE_CRYPT_DATA)
+
+/** Size of a MLOG_CHECKPOINT record in bytes.
+The record consists of a MLOG_CHECKPOINT byte followed by
+mach_write_to_8(checkpoint_lsn). */
+#define SIZE_OF_MLOG_CHECKPOINT 9
+
+#ifndef UNIV_INNOCHECKSUM
+/** Types for the mlock objects to store in the mtr memo; NOTE that the
+first 3 values must be RW_S_LATCH, RW_X_LATCH, RW_NO_LATCH */
+enum mtr_memo_type_t {
+ MTR_MEMO_PAGE_S_FIX = RW_S_LATCH,
+
+ MTR_MEMO_PAGE_X_FIX = RW_X_LATCH,
+
+ MTR_MEMO_PAGE_SX_FIX = RW_SX_LATCH,
+
+ MTR_MEMO_BUF_FIX = RW_NO_LATCH,
+
+#ifdef UNIV_DEBUG
+ MTR_MEMO_MODIFY = 16,
+#endif /* UNIV_DEBUG */
+
+ MTR_MEMO_S_LOCK = RW_S_LATCH << 5,
+
+ MTR_MEMO_X_LOCK = RW_X_LATCH << 5,
+
+ MTR_MEMO_SX_LOCK = RW_SX_LATCH << 5
+};
+#endif /* !UNIV_CHECKSUM */
+
+enum mtr_state_t {
+ MTR_STATE_INIT = 0,
+ MTR_STATE_ACTIVE,
+ MTR_STATE_COMMITTED
+};
+
+#endif /* mtr0types_h */
diff --git a/storage/innobase/include/os0api.h b/storage/innobase/include/os0api.h
new file mode 100644
index 00000000000..6f42d968c8e
--- /dev/null
+++ b/storage/innobase/include/os0api.h
@@ -0,0 +1,75 @@
+/***********************************************************************
+
+Copyright (c) 2017, MariaDB Corporation.
+
+This program is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the
+Free Software Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
+
+***********************************************************************/
+
+/**************************************************//**
+@file os0api.h
+The interface to the helper functions.
+These functions are used on os0file.h where
+including full full header is not feasible and
+implemented on buf0buf.cc and fil0fil.cc.
+*******************************************************/
+
+#ifndef OS_API_H
+#define OS_API_H 1
+
+/** Page control block */
+class buf_page_t;
+
+/** File Node */
+struct fil_node_t;
+
+/**
+Should we punch hole to deallocate unused portion of the page.
+@param[in] bpage Page control block
+@return true if punch hole should be used, false if not */
+bool
+buf_page_should_punch_hole(
+ const buf_page_t* bpage)
+ MY_ATTRIBUTE((warn_unused_result));
+
+/**
+Calculate the length of trim (punch_hole) operation.
+@param[in] bpage Page control block
+@param[in] write_length Write length
+@return length of the trim or zero. */
+ulint
+buf_page_get_trim_length(
+ const buf_page_t* bpage,
+ ulint write_length)
+ MY_ATTRIBUTE((warn_unused_result));
+
+/**
+Get should we punch hole to tablespace.
+@param[in] space Tablespace
+@return true, if punch hole should be tried, false if not. */
+bool
+fil_node_should_punch_hole(
+ const fil_node_t* node)
+ MY_ATTRIBUTE((warn_unused_result));
+
+/**
+Set punch hole to tablespace to given value.
+@param[in] space Tablespace
+@param[in] val value to be set. */
+void
+fil_space_set_punch_hole(
+ fil_node_t* node,
+ bool val);
+
+#endif /* OS_API_H */
diff --git a/storage/innobase/include/os0event.h b/storage/innobase/include/os0event.h
new file mode 100644
index 00000000000..55b9d054021
--- /dev/null
+++ b/storage/innobase/include/os0event.h
@@ -0,0 +1,135 @@
+/*****************************************************************************
+Copyright (c) 1995, 2014, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/os0event.h
+The interface to the operating system condition variables
+
+Created 2012-09-23 Sunny Bains (split from os0sync.h)
+*******************************************************/
+
+#ifndef os0event_h
+#define os0event_h
+
+#include "univ.i"
+
+// Forward declaration.
+struct os_event;
+typedef struct os_event* os_event_t;
+
+/** Denotes an infinite delay for os_event_wait_time() */
+#define OS_SYNC_INFINITE_TIME ULINT_UNDEFINED
+
+/** Return value of os_event_wait_time() when the time is exceeded */
+#define OS_SYNC_TIME_EXCEEDED 1
+
+/**
+Creates an event semaphore, i.e., a semaphore which may just have two states:
+signaled and nonsignaled. The created event is manual reset: it must be reset
+explicitly by calling os_event_reset().
+@return the event handle */
+os_event_t
+os_event_create(
+/*============*/
+ const char* name); /*!< in: the name of the event, if NULL
+ the event is created without a name */
+
+/**
+Sets an event semaphore to the signaled state: lets waiting threads
+proceed. */
+void
+os_event_set(
+/*=========*/
+ os_event_t event); /*!< in/out: event to set */
+
+/**
+Check if the event is set.
+@return true if set */
+bool
+os_event_is_set(
+/*============*/
+ const os_event_t event); /*!< in: event to set */
+
+/**
+Resets an event semaphore to the nonsignaled state. Waiting threads will
+stop to wait for the event.
+The return value should be passed to os_even_wait_low() if it is desired
+that this thread should not wait in case of an intervening call to
+os_event_set() between this os_event_reset() and the
+os_event_wait_low() call. See comments for os_event_wait_low(). */
+int64_t
+os_event_reset(
+/*===========*/
+ os_event_t event); /*!< in/out: event to reset */
+
+/**
+Frees an event object. */
+void
+os_event_destroy(
+/*=============*/
+ os_event_t& event); /*!< in/own: event to free */
+
+/**
+Waits for an event object until it is in the signaled state.
+
+Typically, if the event has been signalled after the os_event_reset()
+we'll return immediately because event->is_set == TRUE.
+There are, however, situations (e.g.: sync_array code) where we may
+lose this information. For example:
+
+thread A calls os_event_reset()
+thread B calls os_event_set() [event->is_set == TRUE]
+thread C calls os_event_reset() [event->is_set == FALSE]
+thread A calls os_event_wait() [infinite wait!]
+thread C calls os_event_wait() [infinite wait!]
+
+Where such a scenario is possible, to avoid infinite wait, the
+value returned by os_event_reset() should be passed in as
+reset_sig_count. */
+void
+os_event_wait_low(
+/*==============*/
+ os_event_t event, /*!< in/out: event to wait */
+ int64_t reset_sig_count);/*!< in: zero or the value
+ returned by previous call of
+ os_event_reset(). */
+
+/** Blocking infinite wait on an event, until signealled.
+@param e - event to wait on. */
+#define os_event_wait(e) os_event_wait_low((e), 0)
+
+/**
+Waits for an event object until it is in the signaled state or
+a timeout is exceeded. In Unix the timeout is always infinite.
+@return 0 if success, OS_SYNC_TIME_EXCEEDED if timeout was exceeded */
+ulint
+os_event_wait_time_low(
+/*===================*/
+ os_event_t event, /*!< in/out: event to wait */
+ ulint time_in_usec, /*!< in: timeout in
+ microseconds, or
+ OS_SYNC_INFINITE_TIME */
+ int64_t reset_sig_count); /*!< in: zero or the value
+ returned by previous call of
+ os_event_reset(). */
+
+/** Blocking timed wait on an event.
+@param e - event to wait on.
+@param t - timeout in microseconds */
+#define os_event_wait_time(e, t) os_event_wait_time_low((e), (t), 0)
+
+#endif /* !os0event_h */
diff --git a/storage/innobase/include/os0file.h b/storage/innobase/include/os0file.h
index 434b209d3ab..e85bf74201a 100644
--- a/storage/innobase/include/os0file.h
+++ b/storage/innobase/include/os0file.h
@@ -2,7 +2,7 @@
Copyright (c) 1995, 2017, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2009, Percona Inc.
-Copyright (c) 2013, 2017, MariaDB Corporation.
+Copyright (c) 2013, 2019, MariaDB Corporation.
Portions of this file contain modifications contributed and copyrighted
by Percona Inc.. Those modifications are
@@ -36,60 +36,75 @@ Created 10/21/1995 Heikki Tuuri
#ifndef os0file_h
#define os0file_h
-#include "univ.i"
+#include "page0size.h"
+#include "os0api.h"
-#ifndef __WIN__
+#ifndef _WIN32
#include <dirent.h>
#include <sys/stat.h>
#include <time.h>
-#endif
+#endif /* !_WIN32 */
/** File node of a tablespace or the log data space */
struct fil_node_t;
+struct fil_space_t;
+
+extern bool os_has_said_disk_full;
+extern my_bool srv_use_trim;
+
+/** File offset in bytes */
+typedef ib_uint64_t os_offset_t;
-extern ibool os_has_said_disk_full;
-/** Flag: enable debug printout for asynchronous i/o */
-extern ibool os_aio_print_debug;
+#ifdef _WIN32
-#ifdef __WIN__
+typedef HANDLE os_file_dir_t; /*!< directory stream */
/** We define always WIN_ASYNC_IO, and check at run-time whether
- the OS actually supports it: Win 95 does not, NT does. */
-#define WIN_ASYNC_IO
+the OS actually supports it: Win 95 does not, NT does. */
+# define WIN_ASYNC_IO
/** Use unbuffered I/O */
-#define UNIV_NON_BUFFERED_IO
-
-#endif
+# define UNIV_NON_BUFFERED_IO
-/** File offset in bytes */
-typedef ib_uint64_t os_offset_t;
-#ifdef __WIN__
/** File handle */
-# define os_file_t HANDLE
+typedef HANDLE os_file_t;
+
/** Convert a C file descriptor to a native file handle
-@param fd file descriptor
-@return native file handle */
+@param fd file descriptor
+@return native file handle */
# define OS_FILE_FROM_FD(fd) (HANDLE) _get_osfhandle(fd)
-#else
+
+#else /* _WIN32 */
+
+typedef DIR* os_file_dir_t; /*!< directory stream */
+
/** File handle */
typedef int os_file_t;
+
/** Convert a C file descriptor to a native file handle
-@param fd file descriptor
-@return native file handle */
+@param fd file descriptor
+@return native file handle */
# define OS_FILE_FROM_FD(fd) fd
-#endif
+
+#endif /* _WIN32 */
+
+static const os_file_t OS_FILE_CLOSED = os_file_t(~0);
/** File descriptor with optional PERFORMANCE_SCHEMA instrumentation */
struct pfs_os_file_t
{
+ /** Default constructor */
+ pfs_os_file_t(os_file_t file = OS_FILE_CLOSED) : m_file(file)
+#ifdef UNIV_PFS_IO
+ , m_psi(NULL)
+#endif
+ {}
+
/** The wrapped file handle */
os_file_t m_file;
#ifdef UNIV_PFS_IO
/** PERFORMANCE_SCHEMA descriptor */
struct PSI_file *m_psi;
- /** Default constructor */
- pfs_os_file_t() : m_file(), m_psi(NULL) {}
#endif
/** Implicit type conversion.
@return the wrapped file handle */
@@ -99,9 +114,6 @@ struct pfs_os_file_t
void operator=(os_file_t file) { m_file = file; }
};
-/** Umask for creating files */
-extern ulint os_innodb_umask;
-
/** The next value should be smaller or equal to the smallest sector size used
on any disk. A log block is required to be a portion of disk which is written
so that if the start and the end of a block get written to disk, then the
@@ -109,7 +121,7 @@ whole block gets written. This should be true even in most cases of a crash:
if this fails for a log block, then it is equivalent to a media failure in the
log. */
-#define OS_FILE_LOG_BLOCK_SIZE 512
+#define OS_FILE_LOG_BLOCK_SIZE 512U
/** Options for os_file_create_func @{ */
enum os_file_create_t {
@@ -134,93 +146,579 @@ enum os_file_create_t {
ON_ERROR_NO_EXIT is set */
};
-#define OS_FILE_READ_ONLY 333
-#define OS_FILE_READ_WRITE 444
-#define OS_FILE_READ_ALLOW_DELETE 555 /* for mysqlbackup */
+static const ulint OS_FILE_READ_ONLY = 333;
+static const ulint OS_FILE_READ_WRITE = 444;
+
+/** Used by MySQLBackup */
+static const ulint OS_FILE_READ_ALLOW_DELETE = 555;
/* Options for file_create */
-#define OS_FILE_AIO 61
-#define OS_FILE_NORMAL 62
+static const ulint OS_FILE_AIO = 61;
+static const ulint OS_FILE_NORMAL = 62;
/* @} */
/** Types for file create @{ */
-#define OS_DATA_FILE 100
-#define OS_LOG_FILE 101
+static const ulint OS_DATA_FILE = 100;
+static const ulint OS_LOG_FILE = 101;
+static const ulint OS_DATA_TEMP_FILE = 102;
/* @} */
/** Error codes from os_file_get_last_error @{ */
-#define OS_FILE_NAME_TOO_LONG 36
-#define OS_FILE_NOT_FOUND 71
-#define OS_FILE_DISK_FULL 72
-#define OS_FILE_ALREADY_EXISTS 73
-#define OS_FILE_PATH_ERROR 74
-#define OS_FILE_AIO_RESOURCES_RESERVED 75 /* wait for OS aio resources
- to become available again */
-#define OS_FILE_SHARING_VIOLATION 76
-#define OS_FILE_ERROR_NOT_SPECIFIED 77
-#define OS_FILE_INSUFFICIENT_RESOURCE 78
-#define OS_FILE_AIO_INTERRUPTED 79
-#define OS_FILE_OPERATION_ABORTED 80
-#define OS_FILE_ACCESS_VIOLATION 81
-#define OS_FILE_OPERATION_NOT_SUPPORTED 125
-#define OS_FILE_ERROR_MAX 200
+static const ulint OS_FILE_NAME_TOO_LONG = 36;
+static const ulint OS_FILE_NOT_FOUND = 71;
+static const ulint OS_FILE_DISK_FULL = 72;
+static const ulint OS_FILE_ALREADY_EXISTS = 73;
+static const ulint OS_FILE_PATH_ERROR = 74;
+
+/** wait for OS aio resources to become available again */
+static const ulint OS_FILE_AIO_RESOURCES_RESERVED = 75;
+
+static const ulint OS_FILE_SHARING_VIOLATION = 76;
+static const ulint OS_FILE_ERROR_NOT_SPECIFIED = 77;
+static const ulint OS_FILE_INSUFFICIENT_RESOURCE = 78;
+static const ulint OS_FILE_AIO_INTERRUPTED = 79;
+static const ulint OS_FILE_OPERATION_ABORTED = 80;
+static const ulint OS_FILE_ACCESS_VIOLATION = 81;
+static const ulint OS_FILE_OPERATION_NOT_SUPPORTED = 125;
+static const ulint OS_FILE_ERROR_MAX = 200;
/* @} */
-/** Types for aio operations @{ */
-#define OS_FILE_READ 10
-#define OS_FILE_WRITE 11
+/** Types for AIO operations @{ */
-#define OS_FILE_LOG 256 /* This can be ORed to type */
-/* @} */
+/** No transformations during read/write, write as is. */
+#define IORequestRead IORequest(IORequest::READ)
+#define IORequestWrite IORequest(IORequest::WRITE)
+#define IORequestLogRead IORequest(IORequest::LOG | IORequest::READ)
+#define IORequestLogWrite IORequest(IORequest::LOG | IORequest::WRITE)
-#define OS_AIO_N_PENDING_IOS_PER_THREAD 32 /*!< Win NT does not allow more
- than 64 */
-/** Modes for aio operations @{ */
-#define OS_AIO_NORMAL 21 /*!< Normal asynchronous i/o not for ibuf
- pages or ibuf bitmap pages */
-#define OS_AIO_IBUF 22 /*!< Asynchronous i/o for ibuf pages or ibuf
- bitmap pages */
-#define OS_AIO_LOG 23 /*!< Asynchronous i/o for the log */
-#define OS_AIO_SYNC 24 /*!< Asynchronous i/o where the calling thread
- will itself wait for the i/o to complete,
- doing also the job of the i/o-handler thread;
- can be used for any pages, ibuf or non-ibuf.
- This is used to save CPU time, as we can do
- with fewer thread switches. Plain synchronous
- i/o is not as good, because it must serialize
- the file seek and read or write, causing a
- bottleneck for parallelism. */
-
-#define OS_AIO_SIMULATED_WAKE_LATER 512 /*!< This can be ORed to mode
- in the call of os_aio(...),
- if the caller wants to post several i/o
- requests in a batch, and only after that
- wake the i/o-handler thread; this has
- effect only in simulated aio */
+
+/**
+The IO Context that is passed down to the low level IO code */
+class IORequest {
+public:
+ /** Flags passed in the request, they can be ORred together. */
+ enum {
+ READ = 1,
+ WRITE = 2,
+
+ /** Double write buffer recovery. */
+ DBLWR_RECOVER = 4,
+
+ /** Enumarations below can be ORed to READ/WRITE above*/
+
+ /** Data file */
+ DATA_FILE = 8,
+
+ /** Log file request*/
+ LOG = 16,
+
+ /** Disable partial read warnings */
+ DISABLE_PARTIAL_IO_WARNINGS = 32,
+
+ /** Do not to wake i/o-handler threads, but the caller will do
+ the waking explicitly later, in this way the caller can post
+ several requests in a batch; NOTE that the batch must not be
+ so big that it exhausts the slots in AIO arrays! NOTE that
+ a simulated batch may introduce hidden chances of deadlocks,
+ because I/Os are not actually handled until all
+ have been posted: use with great caution! */
+ DO_NOT_WAKE = 64,
+
+ /** Ignore failed reads of non-existent pages */
+ IGNORE_MISSING = 128,
+
+ /** Use punch hole if available*/
+ PUNCH_HOLE = 256,
+ };
+
+ /** Default constructor */
+ IORequest()
+ :
+ m_bpage(NULL),
+ m_fil_node(NULL),
+ m_type(READ)
+ {
+ /* No op */
+ }
+
+ /**
+ @param[in] type Request type, can be a value that is
+ ORed from the above enum */
+ explicit IORequest(ulint type)
+ :
+ m_bpage(NULL),
+ m_fil_node(NULL),
+ m_type(static_cast<uint16_t>(type))
+ {
+ if (!is_punch_hole_supported() || !srv_use_trim) {
+ clear_punch_hole();
+ }
+ }
+
+ /**
+ @param[in] type Request type, can be a value that is
+ ORed from the above enum
+ @param[in] bpage Page to be written */
+ IORequest(ulint type, buf_page_t* bpage)
+ :
+ m_bpage(bpage),
+ m_fil_node(NULL),
+ m_type(static_cast<uint16_t>(type))
+ {
+ if (bpage && buf_page_should_punch_hole(bpage)) {
+ set_punch_hole();
+ }
+
+ if (!is_punch_hole_supported() || !srv_use_trim) {
+ clear_punch_hole();
+ }
+ }
+
+ /** Destructor */
+ ~IORequest() { }
+
+ /** @return true if ignore missing flag is set */
+ static bool ignore_missing(ulint type)
+ MY_ATTRIBUTE((warn_unused_result))
+ {
+ return((type & IGNORE_MISSING) == IGNORE_MISSING);
+ }
+
+ /** @return true if it is a read request */
+ bool is_read() const
+ MY_ATTRIBUTE((warn_unused_result))
+ {
+ return((m_type & READ) == READ);
+ }
+
+ /** @return true if it is a write request */
+ bool is_write() const
+ MY_ATTRIBUTE((warn_unused_result))
+ {
+ return((m_type & WRITE) == WRITE);
+ }
+
+ /** @return true if it is a redo log write */
+ bool is_log() const
+ MY_ATTRIBUTE((warn_unused_result))
+ {
+ return((m_type & LOG) == LOG);
+ }
+
+ /** @return true if the simulated AIO thread should be woken up */
+ bool is_wake() const
+ MY_ATTRIBUTE((warn_unused_result))
+ {
+ return((m_type & DO_NOT_WAKE) == 0);
+ }
+
+ /** Clear the punch hole flag */
+ void clear_punch_hole()
+ {
+ m_type &= ~PUNCH_HOLE;
+ }
+
+ /** @return true if partial read warning disabled */
+ bool is_partial_io_warning_disabled() const
+ MY_ATTRIBUTE((warn_unused_result))
+ {
+ return((m_type & DISABLE_PARTIAL_IO_WARNINGS)
+ == DISABLE_PARTIAL_IO_WARNINGS);
+ }
+
+ /** Disable partial read warnings */
+ void disable_partial_io_warnings()
+ {
+ m_type |= DISABLE_PARTIAL_IO_WARNINGS;
+ }
+
+ /** @return true if missing files should be ignored */
+ bool ignore_missing() const
+ MY_ATTRIBUTE((warn_unused_result))
+ {
+ return(ignore_missing(m_type));
+ }
+
+ /** @return true if punch hole should be used */
+ bool punch_hole() const
+ MY_ATTRIBUTE((warn_unused_result))
+ {
+ return((m_type & PUNCH_HOLE) == PUNCH_HOLE);
+ }
+
+ /** @return true if the read should be validated */
+ bool validate() const
+ MY_ATTRIBUTE((warn_unused_result))
+ {
+ return(is_read() ^ is_write());
+ }
+
+ /** Set the punch hole flag */
+ void set_punch_hole()
+ {
+ if (is_punch_hole_supported() && srv_use_trim) {
+ m_type |= PUNCH_HOLE;
+ }
+ }
+
+ /** Clear the do not wake flag */
+ void clear_do_not_wake()
+ {
+ m_type &= ~DO_NOT_WAKE;
+ }
+
+ /** Set the pointer to file node for IO
+ @param[in] node File node */
+ void set_fil_node(fil_node_t* node)
+ {
+ if (!srv_use_trim ||
+ (node && !fil_node_should_punch_hole(node))) {
+ clear_punch_hole();
+ }
+
+ m_fil_node = node;
+ }
+
+ /** Compare two requests
+ @reutrn true if the are equal */
+ bool operator==(const IORequest& rhs) const
+ {
+ return(m_type == rhs.m_type);
+ }
+
+ /** Note that the IO is for double write recovery. */
+ void dblwr_recover()
+ {
+ m_type |= DBLWR_RECOVER;
+ }
+
+ /** @return true if the request is from the dblwr recovery */
+ bool is_dblwr_recover() const
+ MY_ATTRIBUTE((warn_unused_result))
+ {
+ return((m_type & DBLWR_RECOVER) == DBLWR_RECOVER);
+ }
+
+ /** @return true if punch hole is supported */
+ static bool is_punch_hole_supported()
+ {
+
+ /* In this debugging mode, we act as if punch hole is supported,
+ and then skip any calls to actually punch a hole here.
+ In this way, Transparent Page Compression is still being tested. */
+ DBUG_EXECUTE_IF("ignore_punch_hole",
+ return(true);
+ );
+
+#if defined(HAVE_FALLOC_PUNCH_HOLE_AND_KEEP_SIZE) || defined(_WIN32)
+ return(true);
+#else
+ return(false);
+#endif /* HAVE_FALLOC_PUNCH_HOLE_AND_KEEP_SIZE || _WIN32 */
+ }
+
+ ulint get_trim_length(ulint write_length) const
+ {
+ return (m_bpage ?
+ buf_page_get_trim_length(m_bpage, write_length)
+ : 0);
+ }
+
+ bool should_punch_hole() const {
+ return (m_fil_node ?
+ fil_node_should_punch_hole(m_fil_node)
+ : false);
+ }
+
+ void space_no_punch_hole() const {
+ if (m_fil_node) {
+ fil_space_set_punch_hole(m_fil_node, false);
+ }
+ }
+
+ /** Free storage space associated with a section of the file.
+ @param[in] fh Open file handle
+ @param[in] off Starting offset (SEEK_SET)
+ @param[in] len Size of the hole
+ @return DB_SUCCESS or error code */
+ dberr_t punch_hole(os_file_t fh, os_offset_t off, ulint len);
+
+private:
+ /** Page to be written on write operation. */
+ buf_page_t* m_bpage;
+
+ /** File node */
+ fil_node_t* m_fil_node;
+
+ /** Request type bit flags */
+ uint16_t m_type;
+};
+
/* @} */
-#define OS_WIN31 1 /*!< Microsoft Windows 3.x */
-#define OS_WIN95 2 /*!< Microsoft Windows 95 */
-#define OS_WINNT 3 /*!< Microsoft Windows NT 3.x */
-#define OS_WIN2000 4 /*!< Microsoft Windows 2000 */
-#define OS_WINXP 5 /*!< Microsoft Windows XP
- or Windows Server 2003 */
-#define OS_WINVISTA 6 /*!< Microsoft Windows Vista
- or Windows Server 2008 */
-#define OS_WIN7 7 /*!< Microsoft Windows 7
- or Windows Server 2008 R2 */
+/** Sparse file size information. */
+struct os_file_size_t {
+ /** Total size of file in bytes */
+ os_offset_t m_total_size;
+
+ /** If it is a sparse file then this is the number of bytes
+ actually allocated for the file. */
+ os_offset_t m_alloc_size;
+};
+
+/** Win NT does not allow more than 64 */
+static const ulint OS_AIO_N_PENDING_IOS_PER_THREAD = 32;
+/** Modes for aio operations @{ */
+/** Normal asynchronous i/o not for ibuf pages or ibuf bitmap pages */
+static const ulint OS_AIO_NORMAL = 21;
+
+/** Asynchronous i/o for ibuf pages or ibuf bitmap pages */
+static const ulint OS_AIO_IBUF = 22;
+
+/** Asynchronous i/o for the log */
+static const ulint OS_AIO_LOG = 23;
+
+/** Asynchronous i/o where the calling thread will itself wait for
+the i/o to complete, doing also the job of the i/o-handler thread;
+can be used for any pages, ibuf or non-ibuf. This is used to save
+CPU time, as we can do with fewer thread switches. Plain synchronous
+I/O is not as good, because it must serialize the file seek and read
+or write, causing a bottleneck for parallelism. */
+static const ulint OS_AIO_SYNC = 24;
+/* @} */
extern ulint os_n_file_reads;
extern ulint os_n_file_writes;
extern ulint os_n_fsyncs;
+/* File types for directory entry data type */
+
+enum os_file_type_t {
+ OS_FILE_TYPE_UNKNOWN = 0,
+ OS_FILE_TYPE_FILE, /* regular file */
+ OS_FILE_TYPE_DIR, /* directory */
+ OS_FILE_TYPE_LINK, /* symbolic link */
+ OS_FILE_TYPE_BLOCK /* block device */
+};
+
+/* Maximum path string length in bytes when referring to tables with in the
+'./databasename/tablename.ibd' path format; we can allocate at least 2 buffers
+of this size from the thread stack; that is why this should not be made much
+bigger than 4000 bytes. The maximum path length used by any storage engine
+in the server must be at least this big. */
+
+/* MySQL 5.7 my_global.h */
+#ifndef FN_REFLEN_SE
+#define FN_REFLEN_SE 4000
+#endif
+
+#define OS_FILE_MAX_PATH 4000
+#if (FN_REFLEN_SE < OS_FILE_MAX_PATH)
+# error "(FN_REFLEN_SE < OS_FILE_MAX_PATH)"
+#endif
+
+/** Struct used in fetching information of a file in a directory */
+struct os_file_stat_t {
+ char name[OS_FILE_MAX_PATH]; /*!< path to a file */
+ os_file_type_t type; /*!< file type */
+ os_offset_t size; /*!< file size in bytes */
+ os_offset_t alloc_size; /*!< Allocated size for
+ sparse files in bytes */
+ size_t block_size; /*!< Block size to use for IO
+ in bytes*/
+ time_t ctime; /*!< creation time */
+ time_t mtime; /*!< modification time */
+ time_t atime; /*!< access time */
+ bool rw_perm; /*!< true if can be opened
+ in read-write mode. Only valid
+ if type == OS_FILE_TYPE_FILE */
+};
+
+/** Create a temporary file. This function is like tmpfile(3), but
+the temporary file is created in the given parameter path. If the path
+is null then it will create the file in the mysql server configuration
+parameter (--tmpdir).
+@param[in] path location for creating temporary file
+@return temporary file handle, or NULL on error */
+FILE*
+os_file_create_tmpfile(
+ const char* path);
+
+/** The os_file_opendir() function opens a directory stream corresponding to the
+directory named by the dirname argument. The directory stream is positioned
+at the first entry. In both Unix and Windows we automatically skip the '.'
+and '..' items at the start of the directory listing.
+
+@param[in] dirname directory name; it must not contain a trailing
+ '\' or '/'
+@param[in] is_fatal true if we should treat an error as a fatal
+ error; if we try to open symlinks then we do
+ not wish a fatal error if it happens not to be
+ a directory
+@return directory stream, NULL if error */
+os_file_dir_t
+os_file_opendir(
+ const char* dirname,
+ bool is_fatal);
+
+/**
+Closes a directory stream.
+@param[in] dir directory stream
+@return 0 if success, -1 if failure */
+int
+os_file_closedir(
+ os_file_dir_t dir);
+
+/** This function returns information of the next file in the directory. We jump
+over the '.' and '..' entries in the directory.
+@param[in] dirname directory name or path
+@param[in] dir directory stream
+@param[out] info buffer where the info is returned
+@return 0 if ok, -1 if error, 1 if at the end of the directory */
+int
+os_file_readdir_next_file(
+ const char* dirname,
+ os_file_dir_t dir,
+ os_file_stat_t* info);
+
+/**
+This function attempts to create a directory named pathname. The new directory
+gets default permissions. On Unix, the permissions are (0770 & ~umask). If the
+directory exists already, nothing is done and the call succeeds, unless the
+fail_if_exists arguments is true.
+
+@param[in] pathname directory name as null-terminated string
+@param[in] fail_if_exists if true, pre-existing directory is treated
+ as an error.
+@return true if call succeeds, false on error */
+bool
+os_file_create_directory(
+ const char* pathname,
+ bool fail_if_exists);
+
+/** NOTE! Use the corresponding macro os_file_create_simple(), not directly
+this function!
+A simple function to open or create a file.
+@param[in] name name of the file or path as a null-terminated
+ string
+@param[in] create_mode create mode
+@param[in] access_type OS_FILE_READ_ONLY or OS_FILE_READ_WRITE
+@param[in] read_only if true read only mode checks are enforced
+@param[out] success true if succeed, false if error
+@return own: handle to the file, not defined if error, error number
+ can be retrieved with os_file_get_last_error */
+pfs_os_file_t
+os_file_create_simple_func(
+ const char* name,
+ ulint create_mode,
+ ulint access_type,
+ bool read_only,
+ bool* success);
+
+/** NOTE! Use the corresponding macro
+os_file_create_simple_no_error_handling(), not directly this function!
+A simple function to open or create a file.
+@param[in] name name of the file or path as a null-terminated string
+@param[in] create_mode create mode
+@param[in] access_type OS_FILE_READ_ONLY, OS_FILE_READ_WRITE, or
+ OS_FILE_READ_ALLOW_DELETE; the last option
+ is used by a backup program reading the file
+@param[in] read_only if true read only mode checks are enforced
+@param[out] success true if succeeded
+@return own: handle to the file, not defined if error, error number
+ can be retrieved with os_file_get_last_error */
+pfs_os_file_t
+os_file_create_simple_no_error_handling_func(
+ const char* name,
+ ulint create_mode,
+ ulint access_type,
+ bool read_only,
+ bool* success)
+ MY_ATTRIBUTE((warn_unused_result));
+
+#ifdef _WIN32
+#define os_file_set_nocache(fd, file_name, operation_name) do{}while(0)
+#else
+/** Tries to disable OS caching on an opened file descriptor.
+@param[in] fd file descriptor to alter
+@param[in] file_name file name, used in the diagnostic message
+@param[in] name "open" or "create"; used in the diagnostic
+ message */
+void
+os_file_set_nocache(
+/*================*/
+ int fd, /*!< in: file descriptor to alter */
+ const char* file_name,
+ const char* operation_name);
+#endif
+
+/** NOTE! Use the corresponding macro os_file_create(), not directly
+this function!
+Opens an existing file or creates a new.
+@param[in] name name of the file or path as a null-terminated
+ string
+@param[in] create_mode create mode
+@param[in] purpose OS_FILE_AIO, if asynchronous, non-buffered I/O
+ is desired, OS_FILE_NORMAL, if any normal file;
+ NOTE that it also depends on type, os_aio_..
+ and srv_.. variables whether we really use
+ async I/O or unbuffered I/O: look in the
+ function source code for the exact rules
+@param[in] type OS_DATA_FILE or OS_LOG_FILE
+@param[in] read_only if true read only mode checks are enforced
+@param[in] success true if succeeded
+@return own: handle to the file, not defined if error, error number
+ can be retrieved with os_file_get_last_error */
+pfs_os_file_t
+os_file_create_func(
+ const char* name,
+ ulint create_mode,
+ ulint purpose,
+ ulint type,
+ bool read_only,
+ bool* success)
+ MY_ATTRIBUTE((warn_unused_result));
+
+/** Deletes a file. The file has to be closed before calling this.
+@param[in] name file path as a null-terminated string
+@return true if success */
+bool
+os_file_delete_func(const char* name);
+
+/** Deletes a file if it exists. The file has to be closed before calling this.
+@param[in] name file path as a null-terminated string
+@param[out] exist indicate if file pre-exist
+@return true if success */
+bool
+os_file_delete_if_exists_func(const char* name, bool* exist);
+
+/** NOTE! Use the corresponding macro os_file_rename(), not directly
+this function!
+Renames a file (can also move it to another directory). It is safest that the
+file is closed before calling this function.
+@param[in] oldpath old file path as a null-terminated string
+@param[in] newpath new file path
+@return true if success */
+bool
+os_file_rename_func(const char* oldpath, const char* newpath);
+
+/** NOTE! Use the corresponding macro os_file_close(), not directly this
+function!
+Closes a file handle. In case of error, error number can be retrieved with
+os_file_get_last_error.
+@param[in] file own: handle to a file
+@return true if success */
+bool
+os_file_close_func(os_file_t file);
+
#ifdef UNIV_PFS_IO
+
/* Keys to register InnoDB I/O with performance schema */
-extern mysql_pfs_key_t innodb_file_data_key;
-extern mysql_pfs_key_t innodb_file_log_key;
-extern mysql_pfs_key_t innodb_file_temp_key;
+extern mysql_pfs_key_t innodb_data_file_key;
+extern mysql_pfs_key_t innodb_log_file_key;
+extern mysql_pfs_key_t innodb_temp_file_key;
/* Following four macros are instumentations to register
various file I/O operations with performance schema.
@@ -237,7 +735,7 @@ are used to register file deletion operations*/
do { \
locker = PSI_FILE_CALL(get_thread_file_name_locker)( \
state, key, op, name, &locker); \
- if (locker != NULL) { \
+ if (locker != NULL) { \
PSI_FILE_CALL(start_file_open_wait)( \
locker, src_file, src_line); \
} \
@@ -245,10 +743,9 @@ do { \
# define register_pfs_file_open_end(locker, file, result) \
do { \
- if (locker != NULL) { \
- file.m_psi = PSI_FILE_CALL( \
- end_file_open_wait)( \
- locker, result); \
+ if (locker != NULL) { \
+ file.m_psi = PSI_FILE_CALL(end_file_open_wait)( \
+ locker, result); \
} \
} while (0)
@@ -269,7 +766,7 @@ do { \
do { \
locker = PSI_FILE_CALL(get_thread_file_name_locker)( \
state, key, op, name, &locker); \
- if (UNIV_LIKELY(locker != NULL)) { \
+ if (locker != NULL) { \
PSI_FILE_CALL(start_file_close_wait)( \
locker, src_file, src_line); \
} \
@@ -277,7 +774,7 @@ do { \
# define register_pfs_file_close_end(locker, result) \
do { \
- if (UNIV_LIKELY(locker != NULL)) { \
+ if (locker != NULL) { \
PSI_FILE_CALL(end_file_close_wait)( \
locker, result); \
} \
@@ -286,9 +783,9 @@ do { \
# define register_pfs_file_io_begin(state, locker, file, count, op, \
src_file, src_line) \
do { \
- locker = PSI_FILE_CALL(get_thread_file_stream_locker)( \
+ locker = PSI_FILE_CALL(get_thread_file_stream_locker)( \
state, file.m_psi, op); \
- if (locker != NULL) { \
+ if (locker != NULL) { \
PSI_FILE_CALL(start_file_wait)( \
locker, count, src_file, src_line); \
} \
@@ -296,11 +793,10 @@ do { \
# define register_pfs_file_io_end(locker, count) \
do { \
- if (locker != NULL) { \
+ if (locker != NULL) { \
PSI_FILE_CALL(end_file_wait)(locker, count); \
} \
} while (0)
-#endif /* UNIV_PFS_IO */
/* Following macros/functions are file I/O APIs that would be performance
schema instrumented if "UNIV_PFS_IO" is defined. They would point to
@@ -320,48 +816,48 @@ os_file_write_int_fd
The wrapper functions have the prefix of "innodb_". */
-#ifdef UNIV_PFS_IO
-# define os_file_create(key, name, create, purpose, type, success, atomic_writes) \
+# define os_file_create(key, name, create, purpose, type, read_only, \
+ success) \
pfs_os_file_create_func(key, name, create, purpose, type, \
- success, atomic_writes, __FILE__, __LINE__)
+ read_only, success, __FILE__, __LINE__)
-# define os_file_create_simple(key, name, create, access, success) \
+# define os_file_create_simple(key, name, create, access, \
+ read_only, success) \
pfs_os_file_create_simple_func(key, name, create, access, \
- success, __FILE__, __LINE__)
+ read_only, success, __FILE__, __LINE__)
# define os_file_create_simple_no_error_handling( \
- key, name, create_mode, access, success, atomic_writes) \
+ key, name, create_mode, access, read_only, success) \
pfs_os_file_create_simple_no_error_handling_func( \
- key, name, create_mode, access, success, atomic_writes, __FILE__, __LINE__)
+ key, name, create_mode, access, \
+ read_only, success, __FILE__, __LINE__)
# define os_file_close(file) \
pfs_os_file_close_func(file, __FILE__, __LINE__)
-# define os_aio(type, is_log, mode, name, file, buf, offset, \
- n, page_size, message1, message2, write_size) \
- pfs_os_aio_func(type, is_log, mode, name, file, buf, offset, \
- n, page_size, message1, message2, write_size, \
- __FILE__, __LINE__)
-
+# define os_aio(type, mode, name, file, buf, offset, \
+ n, read_only, message1, message2) \
+ pfs_os_aio_func(type, mode, name, file, buf, offset, \
+ n, read_only, message1, message2, \
+ __FILE__, __LINE__)
-# define os_file_read(file, buf, offset, n) \
- pfs_os_file_read_func(file, buf, offset, n, __FILE__, __LINE__)
+# define os_file_read(type, file, buf, offset, n) \
+ pfs_os_file_read_func(type, file, buf, offset, n, __FILE__, __LINE__)
-# define os_file_read_no_error_handling(file, buf, offset, n) \
- pfs_os_file_read_no_error_handling_func(file, buf, offset, n, \
- __FILE__, __LINE__)
+# define os_file_read_no_error_handling(type, file, buf, offset, n, o) \
+ pfs_os_file_read_no_error_handling_func( \
+ type, file, buf, offset, n, o, __FILE__, __LINE__)
-# define os_file_read_no_error_handling_int_fd( \
- file, buf, offset, n) \
+# define os_file_read_no_error_handling_int_fd(type, file, buf, offset, n) \
pfs_os_file_read_no_error_handling_int_fd_func( \
- file, buf, offset, n, __FILE__, __LINE__)
+ type, file, buf, offset, n, __FILE__, __LINE__)
-# define os_file_write(name, file, buf, offset, n) \
- pfs_os_file_write_func(name, file, buf, offset, \
+# define os_file_write(type, name, file, buf, offset, n) \
+ pfs_os_file_write_func(type, name, file, buf, offset, \
n, __FILE__, __LINE__)
-# define os_file_write_int_fd(name, file, buf, offset, n) \
- pfs_os_file_write_int_fd_func(name, file, buf, offset, \
+# define os_file_write_int_fd(type, name, file, buf, offset, n) \
+ pfs_os_file_write_int_fd_func(type, name, file, buf, offset, \
n, __FILE__, __LINE__)
# define os_file_flush(file) \
@@ -373,698 +869,515 @@ The wrapper functions have the prefix of "innodb_". */
# define os_file_delete(key, name) \
pfs_os_file_delete_func(key, name, __FILE__, __LINE__)
-# define os_file_delete_if_exists(key, name) \
- pfs_os_file_delete_if_exists_func(key, name, __FILE__, __LINE__)
-#else /* UNIV_PFS_IO */
-
-/* If UNIV_PFS_IO is not defined, these I/O APIs point
-to original un-instrumented file I/O APIs */
-# define os_file_create(key, name, create, purpose, type, success, atomic_writes) \
- os_file_create_func(name, create, purpose, type, success, atomic_writes)
-
-# define os_file_create_simple(key, name, create_mode, access, success) \
- os_file_create_simple_func(name, create_mode, access, success)
-
-# define os_file_create_simple_no_error_handling( \
- key, name, create_mode, access, success, atomic_writes) \
- os_file_create_simple_no_error_handling_func( \
- name, create_mode, access, success, atomic_writes)
-
-# define os_file_close(file) \
- os_file_close_func(file)
-
-# define os_aio(type, is_log, mode, name, file, buf, offset, n, page_size, message1, \
- message2, write_size) \
- os_aio_func(type, is_log, mode, name, file, buf, offset, n, \
- page_size, message1, message2, write_size)
-
-# define os_file_read(file, buf, offset, n) \
- os_file_read_func(file, buf, offset, n)
-
-# define os_file_read_no_error_handling(file, buf, offset, n) \
- os_file_read_no_error_handling_func(file, buf, offset, n)
-# define os_file_read_no_error_handling_int_fd( \
- file, buf, offset, n) \
- os_file_read_no_error_handling_func(OS_FILE_FROM_FD(file), buf, offset, n)
-
-# define os_file_write_int_fd(name, file, buf, offset, n) \
- os_file_write_func(name, OS_FILE_FROM_FD(file), buf, offset, n)
-# define os_file_write(name, file, buf, offset, n) \
- os_file_write_func(name, file, buf, offset, n)
-
-
-# define os_file_flush(file) os_file_flush_func(file)
-
-# define os_file_rename(key, oldpath, newpath) \
- os_file_rename_func(oldpath, newpath)
-
-# define os_file_delete(key, name) os_file_delete_func(name)
-
-# define os_file_delete_if_exists(key, name) \
- os_file_delete_if_exists_func(name)
-
-#endif /* UNIV_PFS_IO */
-
-/* File types for directory entry data type */
-
-enum os_file_type_t {
- OS_FILE_TYPE_UNKNOWN = 0,
- OS_FILE_TYPE_FILE, /* regular file
- (or a character/block device) */
- OS_FILE_TYPE_DIR, /* directory */
- OS_FILE_TYPE_LINK /* symbolic link */
-};
-
-/* Maximum path string length in bytes when referring to tables with in the
-'./databasename/tablename.ibd' path format; we can allocate at least 2 buffers
-of this size from the thread stack; that is why this should not be made much
-bigger than 4000 bytes */
-#define OS_FILE_MAX_PATH 4000
-
-/** Struct used in fetching information of a file in a directory */
-struct os_file_stat_t {
- char name[OS_FILE_MAX_PATH]; /*!< path to a file */
- os_file_type_t type; /*!< file type */
- ib_int64_t size; /*!< file size */
- time_t ctime; /*!< creation time */
- time_t mtime; /*!< modification time */
- time_t atime; /*!< access time */
- bool rw_perm; /*!< true if can be opened
- in read-write mode. Only valid
- if type == OS_FILE_TYPE_FILE */
-};
-
-#ifdef __WIN__
-typedef HANDLE os_file_dir_t; /*!< directory stream */
-#else
-typedef DIR* os_file_dir_t; /*!< directory stream */
-#endif
-
-#ifdef __WIN__
-/***********************************************************************//**
-Gets the operating system version. Currently works only on Windows.
-@return OS_WIN95, OS_WIN31, OS_WINNT, OS_WIN2000, OS_WINXP, OS_WINVISTA,
-OS_WIN7. */
-UNIV_INTERN
-ulint
-os_get_os_version(void);
-/*===================*/
-#endif /* __WIN__ */
-#ifndef UNIV_HOTBACKUP
-/****************************************************************//**
-Creates the seek mutexes used in positioned reads and writes. */
-UNIV_INTERN
-void
-os_io_init_simple(void);
-/*===================*/
-
-
-/** Create a temporary file. This function is like tmpfile(3), but
-the temporary file is created in the given parameter path. If the path
-is null then it will create the file in the mysql server configuration
-parameter (--tmpdir).
-@param[in] path location for creating temporary file
-@return temporary file handle, or NULL on error */
-UNIV_INTERN
-FILE*
-os_file_create_tmpfile(
- const char* path);
-
-#endif /* !UNIV_HOTBACKUP */
-/***********************************************************************//**
-The os_file_opendir() function opens a directory stream corresponding to the
-directory named by the dirname argument. The directory stream is positioned
-at the first entry. In both Unix and Windows we automatically skip the '.'
-and '..' items at the start of the directory listing.
-@return directory stream, NULL if error */
-UNIV_INTERN
-os_file_dir_t
-os_file_opendir(
-/*============*/
- const char* dirname, /*!< in: directory name; it must not
- contain a trailing '\' or '/' */
- ibool error_is_fatal);/*!< in: TRUE if we should treat an
- error as a fatal error; if we try to
- open symlinks then we do not wish a
- fatal error if it happens not to be
- a directory */
-/***********************************************************************//**
-Closes a directory stream.
-@return 0 if success, -1 if failure */
-UNIV_INTERN
-int
-os_file_closedir(
-/*=============*/
- os_file_dir_t dir); /*!< in: directory stream */
-/***********************************************************************//**
-This function returns information of the next file in the directory. We jump
-over the '.' and '..' entries in the directory.
-@return 0 if ok, -1 if error, 1 if at the end of the directory */
-UNIV_INTERN
-int
-os_file_readdir_next_file(
-/*======================*/
- const char* dirname,/*!< in: directory name or path */
- os_file_dir_t dir, /*!< in: directory stream */
- os_file_stat_t* info); /*!< in/out: buffer where the info is returned */
-/*****************************************************************//**
-This function attempts to create a directory named pathname. The new directory
-gets default permissions. On Unix, the permissions are (0770 & ~umask). If the
-directory exists already, nothing is done and the call succeeds, unless the
-fail_if_exists arguments is true.
-@return TRUE if call succeeds, FALSE on error */
-UNIV_INTERN
-ibool
-os_file_create_directory(
-/*=====================*/
- const char* pathname, /*!< in: directory name as
- null-terminated string */
- ibool fail_if_exists);/*!< in: if TRUE, pre-existing directory
- is treated as an error. */
-/****************************************************************//**
-NOTE! Use the corresponding macro os_file_create_simple(), not directly
-this function!
-A simple function to open or create a file.
-@return own: handle to the file, not defined if error, error number
-can be retrieved with os_file_get_last_error */
-UNIV_INTERN
-os_file_t
-os_file_create_simple_func(
-/*=======================*/
- const char* name, /*!< in: name of the file or path as a
- null-terminated string */
- ulint create_mode,/*!< in: create mode */
- ulint access_type,/*!< in: OS_FILE_READ_ONLY or
- OS_FILE_READ_WRITE */
- ibool* success);/*!< out: TRUE if succeed, FALSE if error */
-/****************************************************************//**
-NOTE! Use the corresponding macro
-os_file_create_simple_no_error_handling(), not directly this function!
-A simple function to open or create a file.
-@return own: handle to the file, not defined if error, error number
-can be retrieved with os_file_get_last_error */
-UNIV_INTERN
-pfs_os_file_t
-os_file_create_simple_no_error_handling_func(
-/*=========================================*/
- const char* name, /*!< in: name of the file or path as a
- null-terminated string */
- ulint create_mode,/*!< in: create mode */
- ulint access_type,/*!< in: OS_FILE_READ_ONLY,
- OS_FILE_READ_WRITE, or
- OS_FILE_READ_ALLOW_DELETE; the last option is
- used by a backup program reading the file */
- ibool* success,/*!< out: TRUE if succeed, FALSE if error */
- ulint atomic_writes)/*!< in: atomic writes table option
- value */
- __attribute__((nonnull, warn_unused_result));
-/****************************************************************//**
-Tries to disable OS caching on an opened file descriptor. */
-UNIV_INTERN
-void
-os_file_set_nocache(
-/*================*/
- os_file_t fd, /*!< in: file descriptor to alter */
- const char* file_name, /*!< in: file name, used in the
- diagnostic message */
- const char* operation_name);/*!< in: "open" or "create"; used in the
- diagnostic message */
-/****************************************************************//**
-NOTE! Use the corresponding macro os_file_create(), not directly
-this function!
-Opens an existing file or creates a new.
-@return own: handle to the file, not defined if error, error number
-can be retrieved with os_file_get_last_error */
-UNIV_INTERN
-pfs_os_file_t
-os_file_create_func(
-/*================*/
- const char* name, /*!< in: name of the file or path as a
- null-terminated string */
- ulint create_mode,/*!< in: create mode */
- ulint purpose,/*!< in: OS_FILE_AIO, if asynchronous,
- non-buffered i/o is desired,
- OS_FILE_NORMAL, if any normal file;
- NOTE that it also depends on type, os_aio_..
- and srv_.. variables whether we really use
- async i/o or unbuffered i/o: look in the
- function source code for the exact rules */
- ulint type, /*!< in: OS_DATA_FILE or OS_LOG_FILE */
- ibool* success,/*!< out: TRUE if succeed, FALSE if error */
- ulint atomic_writes)/*!< in: atomic writes table option
- value */
- __attribute__((nonnull, warn_unused_result));
-/***********************************************************************//**
-Deletes a file. The file has to be closed before calling this.
-@return TRUE if success */
-UNIV_INTERN
-bool
-os_file_delete_func(
-/*================*/
- const char* name); /*!< in: file path as a null-terminated
- string */
-
-/***********************************************************************//**
-Deletes a file if it exists. The file has to be closed before calling this.
-@return TRUE if success */
-UNIV_INTERN
-bool
-os_file_delete_if_exists_func(
-/*==========================*/
- const char* name); /*!< in: file path as a null-terminated
- string */
-/***********************************************************************//**
-NOTE! Use the corresponding macro os_file_rename(), not directly
-this function!
-Renames a file (can also move it to another directory). It is safest that the
-file is closed before calling this function.
-@return TRUE if success */
-UNIV_INTERN
-ibool
-os_file_rename_func(
-/*================*/
- const char* oldpath, /*!< in: old file path as a
- null-terminated string */
- const char* newpath); /*!< in: new file path */
-/***********************************************************************//**
-NOTE! Use the corresponding macro os_file_close(), not directly this
-function!
-Closes a file handle. In case of error, error number can be retrieved with
-os_file_get_last_error.
-@return TRUE if success */
-UNIV_INTERN
-ibool
-os_file_close_func(
-/*===============*/
- os_file_t file); /*!< in, own: handle to a file */
+# define os_file_delete_if_exists(key, name, exist) \
+ pfs_os_file_delete_if_exists_func(key, name, exist, __FILE__, __LINE__)
-#ifdef UNIV_PFS_IO
-/****************************************************************//**
-NOTE! Please use the corresponding macro os_file_create_simple(),
+/** NOTE! Please use the corresponding macro os_file_create_simple(),
not directly this function!
A performance schema instrumented wrapper function for
os_file_create_simple() which opens or creates a file.
+@param[in] key Performance Schema Key
+@param[in] name name of the file or path as a null-terminated
+ string
+@param[in] create_mode create mode
+@param[in] access_type OS_FILE_READ_ONLY or OS_FILE_READ_WRITE
+@param[in] read_only if true read only mode checks are enforced
+@param[out] success true if succeeded
+@param[in] src_file file name where func invoked
+@param[in] src_line line where the func invoked
@return own: handle to the file, not defined if error, error number
-can be retrieved with os_file_get_last_error */
+ can be retrieved with os_file_get_last_error */
UNIV_INLINE
pfs_os_file_t
pfs_os_file_create_simple_func(
-/*===========================*/
- mysql_pfs_key_t key, /*!< in: Performance Schema Key */
- const char* name, /*!< in: name of the file or path as a
- null-terminated string */
- ulint create_mode,/*!< in: create mode */
- ulint access_type,/*!< in: OS_FILE_READ_ONLY or
- OS_FILE_READ_WRITE */
- ibool* success,/*!< out: TRUE if succeed, FALSE if error */
- const char* src_file,/*!< in: file name where func invoked */
- ulint src_line)/*!< in: line where the func invoked */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-
-/****************************************************************//**
-NOTE! Please use the corresponding macro
+ mysql_pfs_key_t key,
+ const char* name,
+ ulint create_mode,
+ ulint access_type,
+ bool read_only,
+ bool* success,
+ const char* src_file,
+ uint src_line)
+ MY_ATTRIBUTE((warn_unused_result));
+
+/** NOTE! Please use the corresponding macro
os_file_create_simple_no_error_handling(), not directly this function!
A performance schema instrumented wrapper function for
os_file_create_simple_no_error_handling(). Add instrumentation to
monitor file creation/open.
+@param[in] key Performance Schema Key
+@param[in] name name of the file or path as a null-terminated
+ string
+@param[in] create_mode create mode
+@param[in] access_type OS_FILE_READ_ONLY, OS_FILE_READ_WRITE, or
+ OS_FILE_READ_ALLOW_DELETE; the last option is
+ used by a backup program reading the file
+@param[in] read_only if true read only mode checks are enforced
+@param[out] success true if succeeded
+@param[in] src_file file name where func invoked
+@param[in] src_line line where the func invoked
@return own: handle to the file, not defined if error, error number
-can be retrieved with os_file_get_last_error */
+ can be retrieved with os_file_get_last_error */
UNIV_INLINE
pfs_os_file_t
pfs_os_file_create_simple_no_error_handling_func(
-/*=============================================*/
- mysql_pfs_key_t key, /*!< in: Performance Schema Key */
- const char* name, /*!< in: name of the file or path as a
- null-terminated string */
- ulint create_mode, /*!< in: file create mode */
- ulint access_type,/*!< in: OS_FILE_READ_ONLY,
- OS_FILE_READ_WRITE, or
- OS_FILE_READ_ALLOW_DELETE; the last option is
- used by a backup program reading the file */
- ibool* success,/*!< out: TRUE if succeed, FALSE if error */
- ulint atomic_writes,/*!< in: atomic writes table option
- value */
- const char* src_file,/*!< in: file name where func invoked */
- ulint src_line)/*!< in: line where the func invoked */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-
-/****************************************************************//**
-NOTE! Please use the corresponding macro os_file_create(), not directly
+ mysql_pfs_key_t key,
+ const char* name,
+ ulint create_mode,
+ ulint access_type,
+ bool read_only,
+ bool* success,
+ const char* src_file,
+ uint src_line)
+ MY_ATTRIBUTE((warn_unused_result));
+
+/** NOTE! Please use the corresponding macro os_file_create(), not directly
this function!
A performance schema wrapper function for os_file_create().
Add instrumentation to monitor file creation/open.
+@param[in] key Performance Schema Key
+@param[in] name name of the file or path as a null-terminated
+ string
+@param[in] create_mode create mode
+@param[in] purpose OS_FILE_AIO, if asynchronous, non-buffered I/O
+ is desired, OS_FILE_NORMAL, if any normal file;
+ NOTE that it also depends on type, os_aio_..
+ and srv_.. variables whether we really use
+ async I/O or unbuffered I/O: look in the
+ function source code for the exact rules
+@param[in] read_only if true read only mode checks are enforced
+@param[out] success true if succeeded
+@param[in] src_file file name where func invoked
+@param[in] src_line line where the func invoked
@return own: handle to the file, not defined if error, error number
-can be retrieved with os_file_get_last_error */
+ can be retrieved with os_file_get_last_error */
UNIV_INLINE
pfs_os_file_t
pfs_os_file_create_func(
-/*====================*/
- mysql_pfs_key_t key, /*!< in: Performance Schema Key */
- const char* name, /*!< in: name of the file or path as a
- null-terminated string */
- ulint create_mode,/*!< in: file create mode */
- ulint purpose,/*!< in: OS_FILE_AIO, if asynchronous,
- non-buffered i/o is desired,
- OS_FILE_NORMAL, if any normal file;
- NOTE that it also depends on type, os_aio_..
- and srv_.. variables whether we really use
- async i/o or unbuffered i/o: look in the
- function source code for the exact rules */
- ulint type, /*!< in: OS_DATA_FILE or OS_LOG_FILE */
- ibool* success,/*!< out: TRUE if succeed, FALSE if error */
- ulint atomic_writes,/*!< in: atomic writes table option
- value*/
- const char* src_file,/*!< in: file name where func invoked */
- ulint src_line)/*!< in: line where the func invoked */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
+ mysql_pfs_key_t key,
+ const char* name,
+ ulint create_mode,
+ ulint purpose,
+ ulint type,
+ bool read_only,
+ bool* success,
+ const char* src_file,
+ uint src_line)
+ MY_ATTRIBUTE((warn_unused_result));
-/***********************************************************************//**
-NOTE! Please use the corresponding macro os_file_close(), not directly
+/** NOTE! Please use the corresponding macro os_file_close(), not directly
this function!
A performance schema instrumented wrapper function for os_file_close().
-@return TRUE if success */
+@param[in] file handle to a file
+@param[in] src_file file name where func invoked
+@param[in] src_line line where the func invoked
+@return true if success */
UNIV_INLINE
-ibool
+bool
pfs_os_file_close_func(
-/*===================*/
- pfs_os_file_t file, /*!< in, own: handle to a file */
- const char* src_file,/*!< in: file name where func invoked */
- ulint src_line);/*!< in: line where the func invoked */
-/*******************************************************************//**
-NOTE! Please use the corresponding macro os_file_read(), not directly
+ pfs_os_file_t file,
+ const char* src_file,
+ uint src_line);
+
+/** NOTE! Please use the corresponding macro os_file_read(), not directly
this function!
This is the performance schema instrumented wrapper function for
os_file_read() which requests a synchronous read operation.
-@return TRUE if request was successful, FALSE if fail */
+@param[in] type IO request context
+@param[in] file Open file handle
+@param[out] buf buffer where to read
+@param[in] offset file offset where to read
+@param[in] n number of bytes to read
+@param[in] src_file file name where func invoked
+@param[in] src_line line where the func invoked
+@return DB_SUCCESS if request was successful */
UNIV_INLINE
-ibool
+dberr_t
pfs_os_file_read_func(
-/*==================*/
- pfs_os_file_t file, /*!< in: handle to a file */
- void* buf, /*!< in: buffer where to read */
- os_offset_t offset, /*!< in: file offset where to read */
- ulint n, /*!< in: number of bytes to read */
- const char* src_file,/*!< in: file name where func invoked */
- ulint src_line);/*!< in: line where the func invoked */
-
-/*******************************************************************//**
-NOTE! Please use the corresponding macro os_file_read_no_error_handling(),
+ const IORequest& type,
+ pfs_os_file_t file,
+ void* buf,
+ os_offset_t offset,
+ ulint n,
+ const char* src_file,
+ uint src_line);
+
+/** NOTE! Please use the corresponding macro os_file_read_no_error_handling(),
not directly this function!
This is the performance schema instrumented wrapper function for
os_file_read_no_error_handling_func() which requests a synchronous
read operation.
-@return TRUE if request was successful, FALSE if fail */
+@param[in] type IO request context
+@param[in] file Open file handle
+@param[out] buf buffer where to read
+@param[in] offset file offset where to read
+@param[in] n number of bytes to read
+@param[out] o number of bytes actually read
+@param[in] src_file file name where func invoked
+@param[in] src_line line where the func invoked
+@return DB_SUCCESS if request was successful */
UNIV_INLINE
-ibool
+dberr_t
pfs_os_file_read_no_error_handling_func(
-/*====================================*/
- pfs_os_file_t file, /*!< in: handle to a file */
- void* buf, /*!< in: buffer where to read */
- os_offset_t offset, /*!< in: file offset where to read */
- ulint n, /*!< in: number of bytes to read */
- const char* src_file,/*!< in: file name where func invoked */
- ulint src_line);/*!< in: line where the func invoked */
-
-/*******************************************************************//**
-NOTE! Please use the corresponding macro os_aio(), not directly this
+ const IORequest& type,
+ pfs_os_file_t file,
+ void* buf,
+ os_offset_t offset,
+ ulint n,
+ ulint* o,
+ const char* src_file,
+ uint src_line);
+
+/** NOTE! Please use the corresponding macro os_aio(), not directly this
function!
Performance schema wrapper function of os_aio() which requests
-an asynchronous i/o operation.
-@return TRUE if request was queued successfully, FALSE if fail */
+an asynchronous I/O operation.
+@param[in,out] type IO request context
+@param[in] mode IO mode
+@param[in] name Name of the file or path as NUL terminated
+ string
+@param[in] file Open file handle
+@param[out] buf buffer where to read
+@param[in] offset file offset where to read
+@param[in] n number of bytes to read
+@param[in] read_only if true read only mode checks are enforced
+@param[in,out] m1 Message for the AIO handler, (can be used to
+ identify a completed AIO operation); ignored
+ if mode is OS_AIO_SYNC
+@param[in,out] m2 message for the AIO handler (can be used to
+ identify a completed AIO operation); ignored
+ if mode is OS_AIO_SYNC
+@param[in] src_file file name where func invoked
+@param[in] src_line line where the func invoked
+@return DB_SUCCESS if request was queued successfully, FALSE if fail */
UNIV_INLINE
-ibool
+dberr_t
pfs_os_aio_func(
-/*============*/
- ulint type, /*!< in: OS_FILE_READ or OS_FILE_WRITE */
- ulint is_log, /*!< in: 1 is OS_FILE_LOG or 0 */
- ulint mode, /*!< in: OS_AIO_NORMAL etc. I/O mode */
- const char* name, /*!< in: name of the file or path as a
- null-terminated string */
- pfs_os_file_t file, /*!< in: handle to a file */
- void* buf, /*!< in: buffer where to read or from which
- to write */
- os_offset_t offset, /*!< in: file offset where to read or write */
- ulint n, /*!< in: number of bytes to read or write */
- ulint page_size, /*!< in: page size in bytes */
- fil_node_t* message1,/*!< in: message for the aio handler
- (can be used to identify a completed
- aio operation); ignored if mode is
- OS_AIO_SYNC */
- void* message2,/*!< in: message for the aio handler
- (can be used to identify a completed
- aio operation); ignored if mode is
- OS_AIO_SYNC */
- ulint* write_size,/*!< in/out: Actual write size initialized
- after fist successfull trim
- operation for this page and if
- initialized we do not trim again if
- actual page size does not decrease. */
- const char* src_file,/*!< in: file name where func invoked */
- ulint src_line);/*!< in: line where the func invoked */
-/*******************************************************************//**
-NOTE! Please use the corresponding macro os_file_write(), not directly
+ IORequest& type,
+ ulint mode,
+ const char* name,
+ pfs_os_file_t file,
+ void* buf,
+ os_offset_t offset,
+ ulint n,
+ bool read_only,
+ fil_node_t* m1,
+ void* m2,
+ const char* src_file,
+ uint src_line);
+
+/** NOTE! Please use the corresponding macro os_file_write(), not directly
this function!
This is the performance schema instrumented wrapper function for
os_file_write() which requests a synchronous write operation.
-@return TRUE if request was successful, FALSE if fail */
+@param[in] type IO request context
+@param[in] name Name of the file or path as NUL terminated
+ string
+@param[in] file Open file handle
+@param[out] buf buffer where to read
+@param[in] offset file offset where to read
+@param[in] n number of bytes to read
+@param[in] src_file file name where func invoked
+@param[in] src_line line where the func invoked
+@return DB_SUCCESS if request was successful */
UNIV_INLINE
-ibool
+dberr_t
pfs_os_file_write_func(
-/*===================*/
- const char* name, /*!< in: name of the file or path as a
- null-terminated string */
- pfs_os_file_t file, /*!< in: handle to a file */
- const void* buf, /*!< in: buffer from which to write */
- os_offset_t offset, /*!< in: file offset where to write */
- ulint n, /*!< in: number of bytes to write */
- const char* src_file,/*!< in: file name where func invoked */
- ulint src_line);/*!< in: line where the func invoked */
-/***********************************************************************//**
-NOTE! Please use the corresponding macro os_file_flush(), not directly
+ const IORequest& type,
+ const char* name,
+ pfs_os_file_t file,
+ const void* buf,
+ os_offset_t offset,
+ ulint n,
+ const char* src_file,
+ uint src_line);
+
+/** NOTE! Please use the corresponding macro os_file_flush(), not directly
this function!
This is the performance schema instrumented wrapper function for
os_file_flush() which flushes the write buffers of a given file to the disk.
Flushes the write buffers of a given file to the disk.
+@param[in] file Open file handle
+@param[in] src_file file name where func invoked
+@param[in] src_line line where the func invoked
@return TRUE if success */
UNIV_INLINE
-ibool
+bool
pfs_os_file_flush_func(
-/*===================*/
- pfs_os_file_t file, /*!< in, own: handle to a file */
- const char* src_file,/*!< in: file name where func invoked */
- ulint src_line);/*!< in: line where the func invoked */
+ pfs_os_file_t file,
+ const char* src_file,
+ uint src_line);
-/***********************************************************************//**
-NOTE! Please use the corresponding macro os_file_rename(), not directly
+/** NOTE! Please use the corresponding macro os_file_rename(), not directly
this function!
This is the performance schema instrumented wrapper function for
os_file_rename()
-@return TRUE if success */
+@param[in] key Performance Schema Key
+@param[in] oldpath old file path as a null-terminated string
+@param[in] newpath new file path
+@param[in] src_file file name where func invoked
+@param[in] src_line line where the func invoked
+@return true if success */
UNIV_INLINE
-ibool
+bool
pfs_os_file_rename_func(
-/*====================*/
- mysql_pfs_key_t key, /*!< in: Performance Schema Key */
- const char* oldpath,/*!< in: old file path as a null-terminated
- string */
- const char* newpath,/*!< in: new file path */
- const char* src_file,/*!< in: file name where func invoked */
- ulint src_line);/*!< in: line where the func invoked */
+ mysql_pfs_key_t key,
+ const char* oldpath,
+ const char* newpath,
+ const char* src_file,
+ uint src_line);
-/***********************************************************************//**
+/**
NOTE! Please use the corresponding macro os_file_delete(), not directly
this function!
This is the performance schema instrumented wrapper function for
os_file_delete()
-@return TRUE if success */
+@param[in] key Performance Schema Key
+@param[in] name old file path as a null-terminated string
+@param[in] src_file file name where func invoked
+@param[in] src_line line where the func invoked
+@return true if success */
UNIV_INLINE
bool
pfs_os_file_delete_func(
-/*====================*/
- mysql_pfs_key_t key, /*!< in: Performance Schema Key */
- const char* name, /*!< in: old file path as a null-terminated
- string */
- const char* src_file,/*!< in: file name where func invoked */
- ulint src_line);/*!< in: line where the func invoked */
+ mysql_pfs_key_t key,
+ const char* name,
+ const char* src_file,
+ uint src_line);
-/***********************************************************************//**
+/**
NOTE! Please use the corresponding macro os_file_delete_if_exists(), not
directly this function!
This is the performance schema instrumented wrapper function for
os_file_delete_if_exists()
-@return TRUE if success */
+@param[in] key Performance Schema Key
+@param[in] name old file path as a null-terminated string
+@param[in] exist indicate if file pre-exist
+@param[in] src_file file name where func invoked
+@param[in] src_line line where the func invoked
+@return true if success */
UNIV_INLINE
bool
pfs_os_file_delete_if_exists_func(
-/*==============================*/
- mysql_pfs_key_t key, /*!< in: Performance Schema Key */
- const char* name, /*!< in: old file path as a null-terminated
- string */
- const char* src_file,/*!< in: file name where func invoked */
- ulint src_line);/*!< in: line where the func invoked */
+ mysql_pfs_key_t key,
+ const char* name,
+ bool* exist,
+ const char* src_file,
+ uint src_line);
+
+#else /* UNIV_PFS_IO */
+
+/* If UNIV_PFS_IO is not defined, these I/O APIs point
+to original un-instrumented file I/O APIs */
+# define os_file_create(key, name, create, purpose, type, read_only, \
+ success) \
+ os_file_create_func(name, create, purpose, type, read_only, \
+ success)
+
+# define os_file_create_simple(key, name, create_mode, access, \
+ read_only, success) \
+ os_file_create_simple_func(name, create_mode, access, \
+ read_only, success)
+
+# define os_file_create_simple_no_error_handling( \
+ key, name, create_mode, access, read_only, success) \
+ os_file_create_simple_no_error_handling_func( \
+ name, create_mode, access, read_only, success)
+
+# define os_file_close(file) os_file_close_func(file)
+
+# define os_aio(type, mode, name, file, buf, offset, \
+ n, read_only, message1, message2) \
+ os_aio_func(type, mode, name, file, buf, offset, \
+ n, read_only, message1, message2)
+
+# define os_file_read(type, file, buf, offset, n) \
+ os_file_read_func(type, file, buf, offset, n)
+
+# define os_file_read_no_error_handling(type, file, buf, offset, n, o) \
+ os_file_read_no_error_handling_func(type, file, buf, offset, n, o)
+# define os_file_read_no_error_handling_int_fd(type, file, buf, offset, n) \
+ os_file_read_no_error_handling_func(type, OS_FILE_FROM_FD(file), buf, offset, n, NULL)
+
+# define os_file_write(type, name, file, buf, offset, n) \
+ os_file_write_func(type, name, file, buf, offset, n)
+# define os_file_write_int_fd(type, name, file, buf, offset, n) \
+ os_file_write_func(type, name, OS_FILE_FROM_FD(file), buf, offset, n)
+
+# define os_file_flush(file) os_file_flush_func(file)
+
+# define os_file_rename(key, oldpath, newpath) \
+ os_file_rename_func(oldpath, newpath)
+
+# define os_file_delete(key, name) os_file_delete_func(name)
+
+# define os_file_delete_if_exists(key, name, exist) \
+ os_file_delete_if_exists_func(name, exist)
+
#endif /* UNIV_PFS_IO */
-#ifdef UNIV_HOTBACKUP
-/***********************************************************************//**
-Closes a file handle.
-@return TRUE if success */
-UNIV_INTERN
-ibool
-os_file_close_no_error_handling(
-/*============================*/
- os_file_t file); /*!< in, own: handle to a file */
-#endif /* UNIV_HOTBACKUP */
-/***********************************************************************//**
-Gets a file size.
-@return file size, or (os_offset_t) -1 on failure */
-UNIV_INTERN
+/** Gets a file size.
+@param[in] file handle to a file
+@return file size if OK, else set m_total_size to ~0 and m_alloc_size
+ to errno */
+os_file_size_t
+os_file_get_size(
+ const char* filename)
+ MY_ATTRIBUTE((warn_unused_result));
+
+/** Gets a file size.
+@param[in] file handle to a file
+@return file size, or (os_offset_t) -1 on failure */
os_offset_t
os_file_get_size(
-/*=============*/
- pfs_os_file_t file) /*!< in: handle to a file */
+ os_file_t file)
MY_ATTRIBUTE((warn_unused_result));
-/** Set the size of a newly created file.
+
+/** Extend a file.
+
+On Windows, extending a file allocates blocks for the file,
+unless the file is sparse.
+
+On Unix, we will extend the file with ftruncate(), if
+file needs to be sparse. Otherwise posix_fallocate() is used
+when available, and if not, binary zeroes are added to the end
+of file.
+
@param[in] name file name
@param[in] file file handle
@param[in] size desired file size
@param[in] sparse whether to create a sparse file (no preallocating)
@return whether the operation succeeded */
-UNIV_INTERN
bool
os_file_set_size(
const char* name,
- pfs_os_file_t file,
+ os_file_t file,
os_offset_t size,
bool is_sparse = false)
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-/***********************************************************************//**
-Truncates a file at its current position.
-@return TRUE if success */
-UNIV_INTERN
-ibool
+ MY_ATTRIBUTE((warn_unused_result));
+
+/** Truncates a file at its current position.
+@param[in/out] file file to be truncated
+@return true if success */
+bool
os_file_set_eof(
-/*============*/
FILE* file); /*!< in: file to be truncated */
-/***********************************************************************//**
-NOTE! Use the corresponding macro os_file_flush(), not directly this function!
+
+/** Truncate a file to a specified size in bytes.
+@param[in] pathname file path
+@param[in] file file to be truncated
+@param[in] size size preserved in bytes
+@param[in] allow_shrink whether to allow the file to become smaller
+@return true if success */
+bool
+os_file_truncate(
+ const char* pathname,
+ os_file_t file,
+ os_offset_t size,
+ bool allow_shrink = false);
+
+/** NOTE! Use the corresponding macro os_file_flush(), not directly this
+function!
Flushes the write buffers of a given file to the disk.
-@return TRUE if success */
-UNIV_INTERN
-ibool
+@param[in] file handle to a file
+@return true if success */
+bool
os_file_flush_func(
-/*===============*/
- os_file_t file); /*!< in, own: handle to a file */
-/***********************************************************************//**
-Retrieves the last error number if an error occurs in a file io function.
+ os_file_t file);
+
+/** Retrieves the last error number if an error occurs in a file io function.
The number should be retrieved before any other OS calls (because they may
overwrite the error number). If the number is not known to this program,
the OS error number + 100 is returned.
-@return error number, or OS error number + 100 */
-UNIV_INTERN
+@param[in] report true if we want an error message printed
+ for all errors
+@return error number, or OS error number + 100 */
ulint
os_file_get_last_error(
-/*===================*/
- bool report_all_errors); /*!< in: TRUE if we want an error message
- printed of all errors */
-/*******************************************************************//**
-NOTE! Use the corresponding macro os_file_read(), not directly this function!
+ bool report);
+
+/** NOTE! Use the corresponding macro os_file_read(), not directly this
+function!
Requests a synchronous read operation.
-@return TRUE if request was successful, FALSE if fail */
-UNIV_INTERN
-ibool
+@param[in] type IO request context
+@param[in] file Open file handle
+@param[out] buf buffer where to read
+@param[in] offset file offset where to read
+@param[in] n number of bytes to read
+@return DB_SUCCESS if request was successful */
+dberr_t
os_file_read_func(
-/*==============*/
- os_file_t file, /*!< in: handle to a file */
- void* buf, /*!< in: buffer where to read */
- os_offset_t offset, /*!< in: file offset where to read */
- ulint n); /*!< in: number of bytes to read */
-/*******************************************************************//**
-Rewind file to its start, read at most size - 1 bytes from it to str, and
+ const IORequest& type,
+ os_file_t file,
+ void* buf,
+ os_offset_t offset,
+ ulint n)
+ MY_ATTRIBUTE((warn_unused_result));
+
+/** Rewind file to its start, read at most size - 1 bytes from it to str, and
NUL-terminate str. All errors are silently ignored. This function is
-mostly meant to be used with temporary files. */
-UNIV_INTERN
+mostly meant to be used with temporary files.
+@param[in,out] file file to read from
+@param[in,out] str buffer where to read
+@param[in] size size of buffer */
void
os_file_read_string(
-/*================*/
- FILE* file, /*!< in: file to read from */
- char* str, /*!< in: buffer where to read */
- ulint size); /*!< in: size of buffer */
-/*******************************************************************//**
-NOTE! Use the corresponding macro os_file_read_no_error_handling(),
+ FILE* file,
+ char* str,
+ ulint size);
+
+/** NOTE! Use the corresponding macro os_file_read_no_error_handling(),
not directly this function!
Requests a synchronous positioned read operation. This function does not do
any error handling. In case of error it returns FALSE.
-@return TRUE if request was successful, FALSE if fail */
-UNIV_INTERN
-ibool
+@param[in] type IO request context
+@param[in] file Open file handle
+@param[out] buf buffer where to read
+@param[in] offset file offset where to read
+@param[in] n number of bytes to read
+@param[out] o number of bytes actually read
+@return DB_SUCCESS or error code */
+dberr_t
os_file_read_no_error_handling_func(
-/*================================*/
- os_file_t file, /*!< in: handle to a file */
- void* buf, /*!< in: buffer where to read */
- os_offset_t offset, /*!< in: file offset where to read */
- ulint n); /*!< in: number of bytes to read */
+ const IORequest& type,
+ os_file_t file,
+ void* buf,
+ os_offset_t offset,
+ ulint n,
+ ulint* o)
+ MY_ATTRIBUTE((warn_unused_result));
-/*******************************************************************//**
-NOTE! Use the corresponding macro os_file_write(), not directly this
+/** NOTE! Use the corresponding macro os_file_write(), not directly this
function!
Requests a synchronous write operation.
-@return TRUE if request was successful, FALSE if fail */
-UNIV_INTERN
-ibool
+@param[in] type IO request context
+@param[in] file Open file handle
+@param[out] buf buffer where to read
+@param[in] offset file offset where to read
+@param[in] n number of bytes to read
+@return DB_SUCCESS if request was successful */
+dberr_t
os_file_write_func(
-/*===============*/
- const char* name, /*!< in: name of the file or path as a
- null-terminated string */
- os_file_t file, /*!< in: handle to a file */
- const void* buf, /*!< in: buffer from which to write */
- os_offset_t offset, /*!< in: file offset where to write */
- ulint n); /*!< in: number of bytes to write */
+ const IORequest& type,
+ const char* name,
+ os_file_t file,
+ const void* buf,
+ os_offset_t offset,
+ ulint n)
+ MY_ATTRIBUTE((warn_unused_result));
-/*******************************************************************//**
-Check the existence and type of the given file.
-@return TRUE if call succeeded */
-UNIV_INTERN
-ibool
+/** Check the existence and type of the given file.
+@param[in] path pathname of the file
+@param[out] exists true if file exists
+@param[out] type type of the file (if it exists)
+@return true if call succeeded */
+bool
os_file_status(
-/*===========*/
- const char* path, /*!< in: pathname of the file */
- ibool* exists, /*!< out: TRUE if file exists */
- os_file_type_t* type); /*!< out: type of the file (if it exists) */
-/****************************************************************//**
-The function os_file_dirname returns a directory component of a
-null-terminated pathname string. In the usual case, dirname returns
-the string up to, but not including, the final '/', and basename
-is the component following the final '/'. Trailing '/' characters
-are not counted as part of the pathname.
-
-If path does not contain a slash, dirname returns the string ".".
-
-Concatenating the string returned by dirname, a "/", and the basename
-yields a complete pathname.
-
-The return value is a copy of the directory component of the pathname.
-The copy is allocated from heap. It is the caller responsibility
-to free it after it is no longer needed.
-
-The following list of examples (taken from SUSv2) shows the strings
-returned by dirname and basename for different paths:
-
- path dirname basename
- "/usr/lib" "/usr" "lib"
- "/usr/" "/" "usr"
- "usr" "." "usr"
- "/" "/" "/"
- "." "." "."
- ".." "." ".."
-
-@return own: directory component of the pathname */
-UNIV_INTERN
-char*
-os_file_dirname(
-/*============*/
- const char* path); /*!< in: pathname */
-/****************************************************************//**
-This function returns a new path name after replacing the basename
+ const char* path,
+ bool* exists,
+ os_file_type_t* type);
+
+/** This function returns a new path name after replacing the basename
in an old path with a new basename. The old_path is a full path
name including the extension. The tablename is in the normal
form "databasename/tablename". The new base name is found after
@@ -1073,35 +1386,15 @@ the forward slash. Both input strings are null terminated.
This function allocates memory to be returned. It is the callers
responsibility to free the return value after it is no longer needed.
-@return own: new full pathname */
-UNIV_INTERN
+@param[in] old_path pathname
+@param[in] new_name new file name
+@return own: new full pathname */
char*
os_file_make_new_pathname(
-/*======================*/
- const char* old_path, /*!< in: pathname */
- const char* new_name); /*!< in: new file name */
-/****************************************************************//**
-This function returns a remote path name by combining a data directory
-path provided in a DATA DIRECTORY clause with the tablename which is
-in the form 'database/tablename'. It strips the file basename (which
-is the tablename) found after the last directory in the path provided.
-The full filepath created will include the database name as a directory
-under the path provided. The filename is the tablename with the '.ibd'
-extension. All input and output strings are null-terminated.
-
-This function allocates memory to be returned. It is the callers
-responsibility to free the return value after it is no longer needed.
+ const char* old_path,
+ const char* new_name);
-@return own: A full pathname; data_dir_path/databasename/tablename.ibd */
-UNIV_INTERN
-char*
-os_file_make_remote_pathname(
-/*=========================*/
- const char* data_dir_path, /*!< in: pathname */
- const char* tablename, /*!< in: tablename */
- const char* extention); /*!< in: file extention; ibd,cfg*/
-/****************************************************************//**
-This function reduces a null-terminated full remote path name into
+/** This function reduces a null-terminated full remote path name into
the path that is sent by MySQL for DATA DIRECTORY clause. It replaces
the 'databasename/tablename.ibd' found at the end of the path with just
'tablename'.
@@ -1111,268 +1404,266 @@ is allocated. The caller should allocate memory for the path sent in.
This function manipulates that path in place.
If the path format is not as expected, just return. The result is used
-to inform a SHOW CREATE TABLE command. */
-UNIV_INTERN
+to inform a SHOW CREATE TABLE command.
+@param[in,out] data_dir_path Full path/data_dir_path */
void
os_file_make_data_dir_path(
-/*========================*/
- char* data_dir_path); /*!< in/out: full path/data_dir_path */
-/****************************************************************//**
-Creates all missing subdirectories along the given path.
-@return TRUE if call succeeded FALSE otherwise */
-UNIV_INTERN
-ibool
+ char* data_dir_path);
+
+/** Create all missing subdirectories along the given path.
+@return DB_SUCCESS if OK, otherwise error code. */
+dberr_t
os_file_create_subdirs_if_needed(
-/*=============================*/
- const char* path); /*!< in: path name */
-/***********************************************************************
-Initializes the asynchronous io system. Creates one array each for ibuf
+ const char* path);
+
+#ifdef UNIV_ENABLE_UNIT_TEST_GET_PARENT_DIR
+/* Test the function os_file_get_parent_dir. */
+void
+unit_test_os_file_get_parent_dir();
+#endif /* UNIV_ENABLE_UNIT_TEST_GET_PARENT_DIR */
+
+/** Initializes the asynchronous io system. Creates one array each for ibuf
and log i/o. Also creates one array each for read and write where each
array is divided logically into n_read_segs and n_write_segs
respectively. The caller must create an i/o handler thread for each
segment in these arrays. This function also creates the sync array.
-No i/o handler thread needs to be created for that */
-UNIV_INTERN
-ibool
+No i/o handler thread needs to be created for that
+@param[in] n_read_segs number of reader threads
+@param[in] n_write_segs number of writer threads
+@param[in] n_slots_sync number of slots in the sync aio array */
+
+bool
os_aio_init(
-/*========*/
- ulint n_per_seg, /*<! in: maximum number of pending aio
- operations allowed per segment */
- ulint n_read_segs, /*<! in: number of reader threads */
- ulint n_write_segs, /*<! in: number of writer threads */
- ulint n_slots_sync); /*<! in: number of slots in the sync aio
- array */
-/***********************************************************************
+ ulint n_read_segs,
+ ulint n_write_segs,
+ ulint n_slots_sync);
+
+/**
Frees the asynchronous io system. */
-UNIV_INTERN
void
-os_aio_free(void);
-/*=============*/
+os_aio_free();
-/*******************************************************************//**
+/**
NOTE! Use the corresponding macro os_aio(), not directly this function!
Requests an asynchronous i/o operation.
-@return TRUE if request was queued successfully, FALSE if fail */
-UNIV_INTERN
-ibool
+@param[in,out] type IO request context
+@param[in] mode IO mode
+@param[in] name Name of the file or path as NUL terminated
+ string
+@param[in] file Open file handle
+@param[out] buf buffer where to read
+@param[in] offset file offset where to read
+@param[in] n number of bytes to read
+@param[in] read_only if true read only mode checks are enforced
+@param[in,out] m1 Message for the AIO handler, (can be used to
+ identify a completed AIO operation); ignored
+ if mode is OS_AIO_SYNC
+@param[in,out] m2 message for the AIO handler (can be used to
+ identify a completed AIO operation); ignored
+ if mode is OS_AIO_SYNC
+@return DB_SUCCESS or error code */
+dberr_t
os_aio_func(
-/*========*/
- ulint type, /*!< in: OS_FILE_READ or OS_FILE_WRITE */
- ulint is_log, /*!< in: 1 is OS_FILE_LOG or 0 */
- ulint mode, /*!< in: OS_AIO_NORMAL, ..., possibly ORed
- to OS_AIO_SIMULATED_WAKE_LATER: the
- last flag advises this function not to wake
- i/o-handler threads, but the caller will
- do the waking explicitly later, in this
- way the caller can post several requests in
- a batch; NOTE that the batch must not be
- so big that it exhausts the slots in aio
- arrays! NOTE that a simulated batch
- may introduce hidden chances of deadlocks,
- because i/os are not actually handled until
- all have been posted: use with great
- caution! */
- const char* name, /*!< in: name of the file or path as a
- null-terminated string */
- pfs_os_file_t file, /*!< in: handle to a file */
- void* buf, /*!< in: buffer where to read or from which
- to write */
- os_offset_t offset, /*!< in: file offset where to read or write */
- ulint n, /*!< in: number of bytes to read or write */
- ulint page_size, /*!< in: page size in bytes */
- fil_node_t* message1,/*!< in: message for the aio handler
- (can be used to identify a completed
- aio operation); ignored if mode is
- OS_AIO_SYNC */
- void* message2,/*!< in: message for the aio handler
- (can be used to identify a completed
- aio operation); ignored if mode is
- OS_AIO_SYNC */
- ulint* write_size);/*!< in/out: Actual write size initialized
- after fist successfull trim
- operation for this page and if
- initialized we do not trim again if
- actual page size does not decrease. */
-/************************************************************************//**
-Wakes up all async i/o threads so that they know to exit themselves in
+ IORequest& type,
+ ulint mode,
+ const char* name,
+ pfs_os_file_t file,
+ void* buf,
+ os_offset_t offset,
+ ulint n,
+ bool read_only,
+ fil_node_t* m1,
+ void* m2);
+
+/** Wakes up all async i/o threads so that they know to exit themselves in
shutdown. */
-UNIV_INTERN
void
-os_aio_wake_all_threads_at_shutdown(void);
-/*=====================================*/
-/************************************************************************//**
-Waits until there are no pending writes in os_aio_write_array. There can
+os_aio_wake_all_threads_at_shutdown();
+
+/** Waits until there are no pending writes in os_aio_write_array. There can
be other, synchronous, pending writes. */
-UNIV_INTERN
void
-os_aio_wait_until_no_pending_writes(void);
-/*=====================================*/
-/**********************************************************************//**
-Wakes up simulated aio i/o-handler threads if they have something to do. */
-UNIV_INTERN
+os_aio_wait_until_no_pending_writes();
+
+/** Wakes up simulated aio i/o-handler threads if they have something to do. */
void
-os_aio_simulated_wake_handler_threads(void);
-/*=======================================*/
+os_aio_simulated_wake_handler_threads();
+
#ifdef _WIN32
-/**********************************************************************//**
-This function can be called if one wants to post a batch of reads and
+/** This function can be called if one wants to post a batch of reads and
prefers an i/o-handler thread to handle them all at once later. You must
call os_aio_simulated_wake_handler_threads later to ensure the threads
are not left sleeping! */
-UNIV_INTERN
void
os_aio_simulated_put_read_threads_to_sleep();
#else /* _WIN32 */
# define os_aio_simulated_put_read_threads_to_sleep()
#endif /* _WIN32 */
-#ifdef WIN_ASYNC_IO
-/**********************************************************************//**
-This function is only used in Windows asynchronous i/o.
+/** This is the generic AIO handler interface function.
Waits for an aio operation to complete. This function is used to wait the
-for completed requests. The aio array of pending requests is divided
+for completed requests. The AIO array of pending requests is divided
into segments. The thread specifies which segment or slot it wants to wait
for. NOTE: this function will also take care of freeing the aio slot,
therefore no other thread is allowed to do the freeing!
-@return TRUE if the aio operation succeeded */
-UNIV_INTERN
-ibool
-os_aio_windows_handle(
-/*==================*/
- ulint segment, /*!< in: the number of the segment in the aio
- arrays to wait for; segment 0 is the ibuf
- i/o thread, segment 1 the log i/o thread,
- then follow the non-ibuf read threads, and as
- the last are the non-ibuf write threads; if
- this is ULINT_UNDEFINED, then it means that
- sync aio is used, and this parameter is
- ignored */
- ulint pos, /*!< this parameter is used only in sync aio:
- wait for the aio slot at this position */
- fil_node_t**message1, /*!< out: the messages passed with the aio
- request; note that also in the case where
- the aio operation failed, these output
- parameters are valid and can be used to
- restart the operation, for example */
- void** message2,
- ulint* type); /*!< out: OS_FILE_WRITE or ..._READ */
-#endif
-
-/**********************************************************************//**
-Does simulated aio. This function should be called by an i/o-handler
-thread.
-@return TRUE if the aio operation succeeded */
-UNIV_INTERN
-ibool
-os_aio_simulated_handle(
-/*====================*/
- ulint segment, /*!< in: the number of the segment in the aio
- arrays to wait for; segment 0 is the ibuf
- i/o thread, segment 1 the log i/o thread,
- then follow the non-ibuf read threads, and as
- the last are the non-ibuf write threads */
- fil_node_t**message1, /*!< out: the messages passed with the aio
- request; note that also in the case where
- the aio operation failed, these output
- parameters are valid and can be used to
- restart the operation, for example */
- void** message2,
- ulint* type); /*!< out: OS_FILE_WRITE or ..._READ */
-/**********************************************************************//**
-Validates the consistency of the aio system.
-@return TRUE if ok */
-UNIV_INTERN
-ibool
-os_aio_validate(void);
-/*=================*/
-/**********************************************************************//**
-Prints info of the aio arrays. */
-UNIV_INTERN
+@param[in] segment the number of the segment in the aio arrays to
+ wait for; segment 0 is the ibuf I/O thread,
+ segment 1 the log I/O thread, then follow the
+ non-ibuf read threads, and as the last are the
+ non-ibuf write threads; if this is
+ ULINT_UNDEFINED, then it means that sync AIO
+ is used, and this parameter is ignored
+@param[out] m1 the messages passed with the AIO request;
+ note that also in the case where the AIO
+ operation failed, these output parameters
+ are valid and can be used to restart the
+ operation, for example
+@param[out] m2 callback message
+@param[out] type OS_FILE_WRITE or ..._READ
+@return DB_SUCCESS or error code */
+dberr_t
+os_aio_handler(
+ ulint segment,
+ fil_node_t** m1,
+ void** m2,
+ IORequest* type);
+
+/** Prints info of the aio arrays.
+@param[in/out] file file where to print */
void
-os_aio_print(
-/*=========*/
- FILE* file); /*!< in: file where to print */
-/**********************************************************************//**
-Refreshes the statistics used to print per-second averages. */
-UNIV_INTERN
+os_aio_print(FILE* file);
+
+/** Refreshes the statistics used to print per-second averages. */
void
-os_aio_refresh_stats(void);
-/*======================*/
+os_aio_refresh_stats();
-#ifdef UNIV_DEBUG
-/**********************************************************************//**
-Checks that all slots in the system have been freed, that is, there are
+/** Checks that all slots in the system have been freed, that is, there are
no pending io operations. */
-UNIV_INTERN
-ibool
-os_aio_all_slots_free(void);
-/*=======================*/
+bool
+os_aio_all_slots_free();
+
+#ifdef UNIV_DEBUG
+
+/** Prints all pending IO
+@param[in] file file where to print */
+void
+os_aio_print_pending_io(FILE* file);
+
#endif /* UNIV_DEBUG */
-/*******************************************************************//**
-This function returns information about the specified file
-@return DB_SUCCESS if all OK */
-UNIV_INTERN
+/** This function returns information about the specified file
+@param[in] path pathname of the file
+@param[in] stat_info information of a file in a directory
+@param[in] check_rw_perm for testing whether the file can be opened
+ in RW mode
+@param[in] read_only if true read only mode checks are enforced
+@return DB_SUCCESS if all OK */
dberr_t
os_file_get_status(
-/*===============*/
- const char* path, /*!< in: pathname of the file */
- os_file_stat_t* stat_info, /*!< information of a file in a
- directory */
- bool check_rw_perm); /*!< in: for testing whether the
- file can be opened in RW mode */
-
-#if !defined(UNIV_HOTBACKUP)
-/** Create a temporary file in the location specified by the parameter
-path. If the path is null, then it will be created in tmpdir.
+ const char* path,
+ os_file_stat_t* stat_info,
+ bool check_rw_perm,
+ bool read_only);
+
+/** Creates a temporary file in the location specified by the parameter
+path. If the path is NULL then it will be created on --tmpdir location.
+This function is defined in ha_innodb.cc.
@param[in] path location for creating temporary file
@return temporary file descriptor, or < 0 on error */
-UNIV_INTERN
int
innobase_mysql_tmpfile(
const char* path);
-#endif /* !UNIV_HOTBACKUP */
+/** Set the file create umask
+@param[in] umask The umask to use for file creation. */
+void
+os_file_set_umask(ulint umask);
-#if defined(LINUX_NATIVE_AIO)
-/**************************************************************************
-This function is only used in Linux native asynchronous i/o.
-Waits for an aio operation to complete. This function is used to wait the
-for completed requests. The aio array of pending requests is divided
-into segments. The thread specifies which segment or slot it wants to wait
-for. NOTE: this function will also take care of freeing the aio slot,
-therefore no other thread is allowed to do the freeing!
-@return TRUE if the IO was successful */
-UNIV_INTERN
-ibool
-os_aio_linux_handle(
-/*================*/
- ulint global_seg, /*!< in: segment number in the aio array
- to wait for; segment 0 is the ibuf
- i/o thread, segment 1 is log i/o thread,
- then follow the non-ibuf read threads,
- and the last are the non-ibuf write
- threads. */
- fil_node_t**message1, /*!< out: the messages passed with the */
- void** message2, /*!< aio request; note that in case the
- aio operation failed, these output
- parameters are valid and can be used to
- restart the operation. */
- ulint* type); /*!< out: OS_FILE_WRITE or ..._READ */
-#endif /* LINUX_NATIVE_AIO */
-
-/****************************************************************//**
-Does error handling when a file operation fails.
-@return TRUE if we should retry the operation */
-ibool
-os_file_handle_error_no_exit(
-/*=========================*/
- const char* name, /*!< in: name of a file or NULL */
- const char* operation, /*!< in: operation */
- ibool on_error_silent,/*!< in: if TRUE then don't print
- any message to the log. */
- const char* file, /*!< in: file name */
- const ulint line); /*!< in: line */
+#ifdef _WIN32
+
+/**
+Make file sparse, on Windows.
+
+@param[in] file file handle
+@param[in] is_sparse if true, make file sparse,
+ otherwise "unsparse" the file
+@return true on success, false on error */
+bool os_file_set_sparse_win32(os_file_t file, bool is_sparse = true);
+
+/**
+Changes file size on Windows
+
+If file is extended, following happens the bytes between
+old and new EOF are zeros.
+
+If file is sparse, "virtual" block is added at the end of
+allocated area.
+
+If file is normal, file system allocates storage.
+
+@param[in] pathname file path
+@param[in] file file handle
+@param[in] size size to preserve in bytes
+@return true if success */
+bool
+os_file_change_size_win32(
+ const char* pathname,
+ os_file_t file,
+ os_offset_t size);
+
+#endif /*_WIN32 */
+
+/** Check if the file system supports sparse files.
+
+Warning: On POSIX systems we try and punch a hole from offset 0 to
+the system configured page size. This should only be called on an empty
+file.
+
+@param[in] fh File handle for the file - if opened
+@return true if the file system supports sparse files */
+bool
+os_is_sparse_file_supported(
+ os_file_t fh)
+ MY_ATTRIBUTE((warn_unused_result));
+
+/** Free storage space associated with a section of the file.
+@param[in] fh Open file handle
+@param[in] off Starting offset (SEEK_SET)
+@param[in] len Size of the hole
+@return DB_SUCCESS or error code */
+dberr_t
+os_file_punch_hole(
+ os_file_t fh,
+ os_offset_t off,
+ os_offset_t len)
+ MY_ATTRIBUTE((warn_unused_result));
+
+/** Normalizes a directory path for the current OS:
+On Windows, we convert '/' to '\', else we convert '\' to '/'.
+@param[in,out] str A null-terminated directory and file path */
+void os_normalize_path(char* str);
+
+/* Determine if a path is an absolute path or not.
+@param[in] OS directory or file path to evaluate
+@retval true if an absolute path
+@retval false if a relative path */
+UNIV_INLINE
+bool
+is_absolute_path(
+ const char* path)
+{
+ if (path[0] == OS_PATH_SEPARATOR) {
+ return(true);
+ }
+
+#ifdef _WIN32
+ if (path[1] == ':' && path[2] == OS_PATH_SEPARATOR) {
+ return(true);
+ }
+#endif /* _WIN32 */
+
+ return(false);
+}
/***********************************************************************//**
Try to get number of bytes per sector from file system.
@@ -1384,8 +1675,6 @@ os_file_get_block_size(
os_file_t file, /*!< in: handle to a file */
const char* name); /*!< in: file name */
-#ifndef UNIV_NONINL
#include "os0file.ic"
-#endif
-#endif
+#endif /* os0file_h */
diff --git a/storage/innobase/include/os0file.ic b/storage/innobase/include/os0file.ic
index f5fe35dd9c9..f363bd5135a 100644
--- a/storage/innobase/include/os0file.ic
+++ b/storage/innobase/include/os0file.ic
@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 2010, 2017, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2013, 2017, MariaDB Corporation.
+Copyright (c) 2013, 2019, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -24,90 +24,96 @@ The interface to the operating system file io
Created 2/20/2010 Jimmy Yang
*******************************************************/
-#include "univ.i"
-
#ifdef UNIV_PFS_IO
-/****************************************************************//**
-NOTE! Please use the corresponding macro os_file_create_simple(),
+/** NOTE! Please use the corresponding macro os_file_create_simple(),
not directly this function!
A performance schema instrumented wrapper function for
os_file_create_simple() which opens or creates a file.
+@param[in] key Performance Schema Key
+@param[in] name name of the file or path as a null-terminated
+ string
+@param[in] create_mode create mode
+@param[in] access_type OS_FILE_READ_ONLY or OS_FILE_READ_WRITE
+@param[in] read_only if true read only mode checks are enforced
+@param[out] success true if succeeded
+@param[in] src_file file name where func invoked
+@param[in] src_line line where the func invoked
@return own: handle to the file, not defined if error, error number
can be retrieved with os_file_get_last_error */
UNIV_INLINE
pfs_os_file_t
pfs_os_file_create_simple_func(
-/*===========================*/
- mysql_pfs_key_t key, /*!< in: Performance Schema Key */
- const char* name, /*!< in: name of the file or path as a
- null-terminated string */
- ulint create_mode,/*!< in: create mode */
- ulint access_type,/*!< in: OS_FILE_READ_ONLY or
- OS_FILE_READ_WRITE */
- ibool* success,/*!< out: TRUE if succeed, FALSE if error */
- const char* src_file,/*!< in: file name where func invoked */
- ulint src_line)/*!< in: line where the func invoked */
+ mysql_pfs_key_t key,
+ const char* name,
+ ulint create_mode,
+ ulint access_type,
+ bool read_only,
+ bool* success,
+ const char* src_file,
+ uint src_line)
{
- pfs_os_file_t file;
- struct PSI_file_locker* locker = NULL;
PSI_file_locker_state state;
+ struct PSI_file_locker* locker = NULL;
/* register a file open or creation depending on "create_mode" */
- register_pfs_file_open_begin(&state, locker, key,
- ((create_mode == OS_FILE_CREATE)
- ? PSI_FILE_CREATE
- : PSI_FILE_OPEN),
- name, src_file, src_line);
+ register_pfs_file_open_begin(
+ &state, locker, key,
+ (create_mode == OS_FILE_CREATE)
+ ? PSI_FILE_CREATE : PSI_FILE_OPEN,
+ name, src_file, src_line);
- file = os_file_create_simple_func(name, create_mode,
- access_type, success);
+ pfs_os_file_t file = os_file_create_simple_func(
+ name, create_mode, access_type, read_only, success);
/* Register psi value for the file */
register_pfs_file_open_end(locker, file,
- (*success == TRUE ? success : 0));
+ (*success == TRUE ? success : 0));
return(file);
}
-/****************************************************************//**
-NOTE! Please use the corresponding macro
+/** NOTE! Please use the corresponding macro
os_file_create_simple_no_error_handling(), not directly this function!
A performance schema instrumented wrapper function for
os_file_create_simple_no_error_handling(). Add instrumentation to
monitor file creation/open.
+@param[in] key Performance Schema Key
+@param[in] name name of the file or path as a null-terminated
+ string
+@param[in] create_mode create mode
+@param[in] access_type OS_FILE_READ_ONLY, OS_FILE_READ_WRITE, or
+ OS_FILE_READ_ALLOW_DELETE; the last option is
+ used by a backup program reading the file
+@param[in] read_only if true read only mode checks are enforced
+@param[out] success true if succeeded
+@param[in] src_file file name where func invoked
+@param[in] src_line line where the func invoked
@return own: handle to the file, not defined if error, error number
can be retrieved with os_file_get_last_error */
UNIV_INLINE
pfs_os_file_t
pfs_os_file_create_simple_no_error_handling_func(
-/*=============================================*/
- mysql_pfs_key_t key, /*!< in: Performance Schema Key */
- const char* name, /*!< in: name of the file or path as a
- null-terminated string */
- ulint create_mode, /*!< in: file create mode */
- ulint access_type,/*!< in: OS_FILE_READ_ONLY,
- OS_FILE_READ_WRITE, or
- OS_FILE_READ_ALLOW_DELETE; the last option is
- used by a backup program reading the file */
- ibool* success,/*!< out: TRUE if succeed, FALSE if error */
- ulint atomic_writes,/*!< in: atomic writes table option
- value */
- const char* src_file,/*!< in: file name where func invoked */
- ulint src_line)/*!< in: line where the func invoked */
+ mysql_pfs_key_t key,
+ const char* name,
+ ulint create_mode,
+ ulint access_type,
+ bool read_only,
+ bool* success,
+ const char* src_file,
+ uint src_line)
{
- pfs_os_file_t file;
- struct PSI_file_locker* locker = NULL;
PSI_file_locker_state state;
+ struct PSI_file_locker* locker = NULL;
/* register a file open or creation depending on "create_mode" */
- register_pfs_file_open_begin(&state, locker, key,
- ((create_mode == OS_FILE_CREATE)
- ? PSI_FILE_CREATE
- : PSI_FILE_OPEN),
- name, src_file, src_line);
+ register_pfs_file_open_begin(
+ &state, locker, key,
+ create_mode == OS_FILE_CREATE
+ ? PSI_FILE_CREATE : PSI_FILE_OPEN,
+ name, src_file, src_line);
- file = os_file_create_simple_no_error_handling_func(
- name, create_mode, access_type, success, atomic_writes);
+ pfs_os_file_t file = os_file_create_simple_no_error_handling_func(
+ name, create_mode, access_type, read_only, success);
register_pfs_file_open_end(locker, file,
(*success == TRUE ? success : 0));
@@ -115,196 +121,217 @@ pfs_os_file_create_simple_no_error_handling_func(
return(file);
}
-/****************************************************************//**
-NOTE! Please use the corresponding macro os_file_create(), not directly
+/** NOTE! Please use the corresponding macro os_file_create(), not directly
this function!
A performance schema wrapper function for os_file_create().
Add instrumentation to monitor file creation/open.
+@param[in] key Performance Schema Key
+@param[in] name name of the file or path as a null-terminated
+ string
+@param[in] create_mode create mode
+@param[in] purpose OS_FILE_AIO, if asynchronous, non-buffered I/O
+ is desired, OS_FILE_NORMAL, if any normal file;
+ NOTE that it also depends on type, os_aio_..
+ and srv_.. variables whether we really us
+ async I/O or unbuffered I/O: look in the
+ function source code for the exact rules
+@param[in] read_only if true read only mode checks are enforced
+@param[out] success true if succeeded
+@param[in] src_file file name where func invoked
+@param[in] src_line line where the func invoked
@return own: handle to the file, not defined if error, error number
can be retrieved with os_file_get_last_error */
UNIV_INLINE
pfs_os_file_t
pfs_os_file_create_func(
-/*====================*/
- mysql_pfs_key_t key, /*!< in: Performance Schema Key */
- const char* name, /*!< in: name of the file or path as a
- null-terminated string */
- ulint create_mode,/*!< in: file create mode */
- ulint purpose,/*!< in: OS_FILE_AIO, if asynchronous,
- non-buffered i/o is desired,
- OS_FILE_NORMAL, if any normal file;
- NOTE that it also depends on type, os_aio_..
- and srv_.. variables whether we really use
- async i/o or unbuffered i/o: look in the
- function source code for the exact rules */
- ulint type, /*!< in: OS_DATA_FILE or OS_LOG_FILE */
- ibool* success,/*!< out: TRUE if succeed, FALSE if error */
- ulint atomic_writes, /*!< in: atomic writes table option
- value */
- const char* src_file,/*!< in: file name where func invoked */
- ulint src_line)/*!< in: line where the func invoked */
+ mysql_pfs_key_t key,
+ const char* name,
+ ulint create_mode,
+ ulint purpose,
+ ulint type,
+ bool read_only,
+ bool* success,
+ const char* src_file,
+ uint src_line)
{
- pfs_os_file_t file;
- struct PSI_file_locker* locker = NULL;
PSI_file_locker_state state;
+ struct PSI_file_locker* locker = NULL;
/* register a file open or creation depending on "create_mode" */
- register_pfs_file_open_begin(&state, locker, key,
- ((create_mode == OS_FILE_CREATE)
- ? PSI_FILE_CREATE
- : PSI_FILE_OPEN),
- name, src_file, src_line);
+ register_pfs_file_open_begin(
+ &state, locker, key,
+ create_mode == OS_FILE_CREATE
+ ? PSI_FILE_CREATE : PSI_FILE_OPEN,
+ name, src_file, src_line);
- file = os_file_create_func(name, create_mode, purpose, type, success, atomic_writes);
+ pfs_os_file_t file = os_file_create_func(
+ name, create_mode, purpose, type, read_only, success);
register_pfs_file_open_end(locker, file,
(*success == TRUE ? success : 0));
return(file);
}
-
-/***********************************************************************//**
+/**
NOTE! Please use the corresponding macro os_file_close(), not directly
this function!
A performance schema instrumented wrapper function for os_file_close().
-@return TRUE if success */
+@param[in] file handle to a file
+@param[in] src_file file name where func invoked
+@param[in] src_line line where the func invoked
+@return true if success */
UNIV_INLINE
-ibool
+bool
pfs_os_file_close_func(
-/*===================*/
- pfs_os_file_t file, /*!< in, own: handle to a file */
- const char* src_file,/*!< in: file name where func invoked */
- ulint src_line)/*!< in: line where the func invoked */
+ pfs_os_file_t file,
+ const char* src_file,
+ uint src_line)
{
- ibool result;
- struct PSI_file_locker* locker = NULL;
PSI_file_locker_state state;
+ struct PSI_file_locker* locker = NULL;
/* register the file close */
- register_pfs_file_io_begin(&state, locker, file, 0, PSI_FILE_CLOSE,
- src_file, src_line);
+ register_pfs_file_io_begin(
+ &state, locker, file, 0, PSI_FILE_CLOSE, src_file, src_line);
- result = os_file_close_func(file);
+ bool result = os_file_close_func(file);
register_pfs_file_io_end(locker, 0);
return(result);
}
-/*******************************************************************//**
-NOTE! Please use the corresponding macro os_aio(), not directly this
+/** NOTE! Please use the corresponding macro os_aio(), not directly this
function!
-Performance schema instrumented wrapper function of os_aio() which
-requests an asynchronous i/o operation.
-@return TRUE if request was queued successfully, FALSE if fail */
+Performance schema wrapper function of os_aio() which requests
+an asynchronous i/o operation.
+@param[in,type] type IO request context
+@param[in] mode IO mode
+@param[in] name Name of the file or path as NUL terminated
+ string
+@param[in] file Open file handle
+@param[out] buf buffer where to read
+@param[in] offset file offset where to read
+@param[in] n number of bytes to read
+@param[in] read_only if true read only mode checks are enforced
+@param[in,out] m1 Message for the AIO handler, (can be used to
+ identify a completed AIO operation); ignored
+ if mode is OS_AIO_SYNC
+@param[in,out] m2 message for the AIO handler (can be used to
+ identify a completed AIO operation); ignored
+ if mode is OS_AIO_SYNC
+@param[in] src_file file name where func invoked
+@param[in] src_line line where the func invoked
+@return DB_SUCCESS if request was queued successfully, FALSE if fail */
UNIV_INLINE
-ibool
+dberr_t
pfs_os_aio_func(
-/*============*/
- ulint type, /*!< in: OS_FILE_READ or OS_FILE_WRITE */
- ulint is_log, /*!< in: 1 is OS_FILE_LOG or 0 */
- ulint mode, /*!< in: OS_AIO_NORMAL etc. I/O mode */
- const char* name, /*!< in: name of the file or path as a
- null-terminated string */
- pfs_os_file_t file, /*!< in: handle to a file */
- void* buf, /*!< in: buffer where to read or from which
- to write */
- os_offset_t offset, /*!< in: file offset where to read or write */
- ulint n, /*!< in: number of bytes to read or write */
- ulint page_size, /*!< in: page size in bytes */
- fil_node_t* message1,/*!< in: message for the aio handler
- (can be used to identify a completed
- aio operation); ignored if mode is
- OS_AIO_SYNC */
- void* message2,/*!< in: message for the aio handler
- (can be used to identify a completed
- aio operation); ignored if mode is
- OS_AIO_SYNC */
- ulint* write_size,/*!< in/out: Actual write size initialized
- after fist successfull trim
- operation for this page and if
- initialized we do not trim again if
- actual page size does not decrease. */
- const char* src_file,/*!< in: file name where func invoked */
- ulint src_line)/*!< in: line where the func invoked */
+ IORequest& type,
+ ulint mode,
+ const char* name,
+ pfs_os_file_t file,
+ void* buf,
+ os_offset_t offset,
+ ulint n,
+ bool read_only,
+ fil_node_t* m1,
+ void* m2,
+ const char* src_file,
+ uint src_line)
{
- ibool result;
- struct PSI_file_locker* locker = NULL;
PSI_file_locker_state state;
+ struct PSI_file_locker* locker = NULL;
+
+ ut_ad(type.validate());
/* Register the read or write I/O depending on "type" */
- register_pfs_file_io_begin(&state, locker, file, n,
- (type == OS_FILE_WRITE)
- ? PSI_FILE_WRITE
- : PSI_FILE_READ,
- src_file, src_line);
+ register_pfs_file_io_begin(
+ &state, locker, file, n,
+ type.is_write() ? PSI_FILE_WRITE : PSI_FILE_READ,
+ src_file, src_line);
- result = os_aio_func(type, is_log, mode, name, file, buf, offset,
- n, page_size, message1, message2, write_size);
+ dberr_t result = os_aio_func(
+ type, mode, name, file, buf, offset, n, read_only, m1, m2);
register_pfs_file_io_end(locker, n);
return(result);
}
-/*******************************************************************//**
-NOTE! Please use the corresponding macro os_file_read(), not directly
+/** NOTE! Please use the corresponding macro os_file_read(), not directly
this function!
This is the performance schema instrumented wrapper function for
os_file_read() which requests a synchronous read operation.
-@return TRUE if request was successful, FALSE if fail */
+@param[in] type IO request context
+@param[in] file Open file handle
+@param[out] buf buffer where to read
+@param[in] offset file offset where to read
+@param[in] n number of bytes to read
+@param[in] src_file file name where func invoked
+@param[in] src_line line where the func invoked
+@return DB_SUCCESS if request was successful */
UNIV_INLINE
-ibool
+dberr_t
pfs_os_file_read_func(
-/*==================*/
- pfs_os_file_t file, /*!< in: handle to a file */
- void* buf, /*!< in: buffer where to read */
- os_offset_t offset, /*!< in: file offset where to read */
- ulint n, /*!< in: number of bytes to read */
- const char* src_file,/*!< in: file name where func invoked */
- ulint src_line)/*!< in: line where the func invoked */
+ const IORequest& type,
+ pfs_os_file_t file,
+ void* buf,
+ os_offset_t offset,
+ ulint n,
+ const char* src_file,
+ uint src_line)
{
- ibool result;
- struct PSI_file_locker* locker = NULL;
PSI_file_locker_state state;
+ struct PSI_file_locker* locker = NULL;
+
+ ut_ad(type.validate());
- register_pfs_file_io_begin(&state, locker, file, n, PSI_FILE_READ,
- src_file, src_line);
+ register_pfs_file_io_begin(
+ &state, locker, file, n, PSI_FILE_READ, src_file, src_line);
- result = os_file_read_func(file, buf, offset, n);
+ dberr_t result;
+
+ result = os_file_read_func(type, file, buf, offset, n);
register_pfs_file_io_end(locker, n);
return(result);
}
-/*******************************************************************//**
-NOTE! Please use the corresponding macro
-os_file_read_no_error_handling(), not directly this function!
+/** NOTE! Please use the corresponding macro os_file_read_no_error_handling(),
+not directly this function!
This is the performance schema instrumented wrapper function for
-os_file_read_no_error_handling() which requests a synchronous
-positioned read operation. This function does not do any error
-handling. In case of error it returns FALSE.
-@return TRUE if request was successful, FALSE if fail */
+os_file_read_no_error_handling_func() which requests a synchronous
+read operation.
+@param[in] type IO request context
+@param[in] file Open file handle
+@param[out] buf buffer where to read
+@param[in] offset file offset where to read
+@param[in] n number of bytes to read
+@param[out] o number of bytes actually read
+@param[in] src_file file name where func invoked
+@param[in] src_line line where the func invoked
+@return DB_SUCCESS if request was successful */
UNIV_INLINE
-ibool
+dberr_t
pfs_os_file_read_no_error_handling_func(
-/*====================================*/
- pfs_os_file_t file, /*!< in: handle to a file */
- void* buf, /*!< in: buffer where to read */
- os_offset_t offset, /*!< in: file offset where to read */
- ulint n, /*!< in: number of bytes to read */
- const char* src_file,/*!< in: file name where func invoked */
- ulint src_line)/*!< in: line where the func invoked */
+ const IORequest& type,
+ pfs_os_file_t file,
+ void* buf,
+ os_offset_t offset,
+ ulint n,
+ ulint* o,
+ const char* src_file,
+ uint src_line)
{
- ibool result;
- struct PSI_file_locker* locker = NULL;
PSI_file_locker_state state;
+ struct PSI_file_locker* locker = NULL;
- register_pfs_file_io_begin(&state, locker, file, n, PSI_FILE_READ,
- src_file, src_line);
+ register_pfs_file_io_begin(
+ &state, locker, file, n, PSI_FILE_READ, src_file, src_line);
- result = os_file_read_no_error_handling_func(file, buf, offset, n);
+ dberr_t result = os_file_read_no_error_handling_func(
+ type, file, buf, offset, n, o);
register_pfs_file_io_end(locker, n);
@@ -312,223 +339,258 @@ pfs_os_file_read_no_error_handling_func(
}
/** NOTE! Please use the corresponding macro
-os_file_read_no_error_handling_int_fd(), not directly this function!
-This is the performance schema instrumented wrapper function for
-os_file_read_no_error_handling_int_fd_func() which requests a
-synchronous read operation.
-@return TRUE if request was successful, FALSE if fail */
+os_file_read_no_error_handling_int_fd() to request
+a synchronous read operation.
+@param[in] type read request
+@param[in] file file handle
+@param[out] buf buffer where to read
+@param[in] offset file offset where to read
+@param[in] n number of bytes to read
+@param[in] src_file caller file name
+@param[in] src_line caller line number
+@return error code
+@retval DB_SUCCESS if the operation succeeded */
UNIV_INLINE
-ibool
+dberr_t
pfs_os_file_read_no_error_handling_int_fd_func(
- int file, /*!< in: handle to a file */
- void* buf, /*!< in: buffer where to read */
- os_offset_t offset, /*!< in: file offset where to read */
- ulint n, /*!< in: number of bytes to read */
- const char* src_file,/*!< in: file name where func invoked */
- ulint src_line)/*!< in: line where the func invoked */
+ const IORequest& type,
+ int file,
+ void* buf,
+ os_offset_t offset,
+ ulint n,
+ const char* src_file,
+ uint src_line)
{
PSI_file_locker_state state;
- struct PSI_file_locker* locker;
- locker = PSI_FILE_CALL(get_thread_file_descriptor_locker)(
- &state, file, PSI_FILE_READ);
+ PSI_file_locker* locker = PSI_FILE_CALL(
+ get_thread_file_descriptor_locker)(
+ &state, file, PSI_FILE_READ);
if (locker != NULL) {
PSI_FILE_CALL(start_file_wait)(
locker, n,
__FILE__, __LINE__);
}
- ibool result = os_file_read_no_error_handling_func(
- OS_FILE_FROM_FD(file), buf, offset, n);
+
+ dberr_t err = os_file_read_no_error_handling_func(
+ type, OS_FILE_FROM_FD(file), buf, offset, n, NULL);
if (locker != NULL) {
PSI_FILE_CALL(end_file_wait)(locker, n);
}
- return(result);
+ return err;
}
-/*******************************************************************//**
-NOTE! Please use the corresponding macro os_file_write(), not directly
+/** NOTE! Please use the corresponding macro os_file_write(), not directly
this function!
This is the performance schema instrumented wrapper function for
os_file_write() which requests a synchronous write operation.
-@return TRUE if request was successful, FALSE if fail */
+@param[in] type IO request context
+@param[in] name Name of the file or path as NUL terminated
+ string
+@param[in] file Open file handle
+@param[out] buf buffer where to read
+@param[in] offset file offset where to read
+@param[in] n number of bytes to read
+@param[in] src_file file name where func invoked
+@param[in] src_line line where the func invoked
+@return error code
+@retval DB_SUCCESS if the request was successfully fulfilled */
UNIV_INLINE
-ibool
+dberr_t
pfs_os_file_write_func(
-/*===================*/
- const char* name, /*!< in: name of the file or path as a
- null-terminated string */
- pfs_os_file_t file, /*!< in: handle to a file */
- const void* buf, /*!< in: buffer from which to write */
- os_offset_t offset, /*!< in: file offset where to write */
- ulint n, /*!< in: number of bytes to write */
- const char* src_file,/*!< in: file name where func invoked */
- ulint src_line)/*!< in: line where the func invoked */
+ const IORequest& type,
+ const char* name,
+ pfs_os_file_t file,
+ const void* buf,
+ os_offset_t offset,
+ ulint n,
+ const char* src_file,
+ uint src_line)
{
- ibool result;
- struct PSI_file_locker* locker = NULL;
PSI_file_locker_state state;
+ struct PSI_file_locker* locker = NULL;
- register_pfs_file_io_begin(&state, locker, file, n, PSI_FILE_WRITE,
- src_file, src_line);
+ register_pfs_file_io_begin(
+ &state, locker, file, n, PSI_FILE_WRITE, src_file, src_line);
- result = os_file_write_func(name, file, buf, offset, n);
+ dberr_t result;
+
+ result = os_file_write_func(type, name, file, buf, offset, n);
register_pfs_file_io_end(locker, n);
return(result);
}
-/** NOTE! Please use the corresponding macro os_file_write(), not
-directly this function!
+/** NOTE! Please use the corresponding macro os_file_write_int_fd(),
+not directly this function!
This is the performance schema instrumented wrapper function for
-os_file_write() which requests a synchronous write operation.
-@return TRUE if request was successful, FALSE if fail */
+os_file_write_int_fd() which requests a synchronous write operation.
+@param[in] type write request
+@param[in] name file name
+@param[in] file file handle
+@param[in] buf buffer to write
+@param[in] offset file offset
+@param[in] n number of bytes
+@param[in] src_file file name where func invoked
+@param[in] src_line line where the func invoked
+@return error code
+@retval DB_SUCCESS if the operation succeeded */
UNIV_INLINE
-ibool
+dberr_t
pfs_os_file_write_int_fd_func(
- const char* name, /*!< in: name of the file or path as a
- null-terminated string */
- int file, /*!< in: handle to a file */
- const void* buf, /*!< in: buffer from which to write */
- os_offset_t offset, /*!< in: file offset where to write */
- ulint n, /*!< in: number of bytes to write */
- const char* src_file,/*!< in: file name where func invoked */
- ulint src_line)/*!< in: line where the func invoked */
+ const IORequest& type,
+ const char* name,
+ int file,
+ const void* buf,
+ os_offset_t offset,
+ ulint n,
+ const char* src_file,
+ uint src_line)
{
- PSI_file_locker_state state;
- struct PSI_file_locker* locker = NULL;
+ PSI_file_locker_state state;
+ struct PSI_file_locker* locker;
- locker = PSI_FILE_CALL(get_thread_file_descriptor_locker)(
- &state, file, PSI_FILE_WRITE);
- if (locker != NULL) {
+ locker = PSI_FILE_CALL(get_thread_file_descriptor_locker)(
+ &state, file, PSI_FILE_WRITE);
+ if (locker != NULL) {
PSI_FILE_CALL(start_file_wait)(
- locker, n,
- __FILE__, __LINE__);
- }
- ibool result = os_file_write_func(
- name, OS_FILE_FROM_FD(file), buf, offset, n);
+ locker, n,
+ __FILE__, __LINE__);
+ }
+
+ dberr_t err = os_file_write_func(
+ type, name, OS_FILE_FROM_FD(file), buf, offset, n);
if (locker != NULL) {
PSI_FILE_CALL(end_file_wait)(locker, n);
}
- return(result);
+ return err;
}
-/***********************************************************************//**
-NOTE! Please use the corresponding macro os_file_flush(), not directly
+/** NOTE! Please use the corresponding macro os_file_flush(), not directly
this function!
This is the performance schema instrumented wrapper function for
os_file_flush() which flushes the write buffers of a given file to the disk.
+Flushes the write buffers of a given file to the disk.
+@param[in] file Open file handle
+@param[in] src_file file name where func invoked
+@param[in] src_line line where the func invoked
@return TRUE if success */
UNIV_INLINE
-ibool
+bool
pfs_os_file_flush_func(
-/*===================*/
- pfs_os_file_t file, /*!< in, own: handle to a file */
- const char* src_file,/*!< in: file name where func invoked */
- ulint src_line)/*!< in: line where the func invoked */
+ pfs_os_file_t file,
+ const char* src_file,
+ uint src_line)
{
- ibool result;
- struct PSI_file_locker* locker = NULL;
PSI_file_locker_state state;
+ struct PSI_file_locker* locker = NULL;
+
+ register_pfs_file_io_begin(
+ &state, locker, file, 0, PSI_FILE_SYNC, src_file, src_line);
- register_pfs_file_io_begin(&state, locker, file, 0, PSI_FILE_SYNC,
- src_file, src_line);
- result = os_file_flush_func(file);
+ bool result = os_file_flush_func(file);
register_pfs_file_io_end(locker, 0);
return(result);
}
-/***********************************************************************//**
-NOTE! Please use the corresponding macro os_file_rename(), not directly
+/** NOTE! Please use the corresponding macro os_file_rename(), not directly
this function!
This is the performance schema instrumented wrapper function for
os_file_rename()
-@return TRUE if success */
+@param[in] key Performance Schema Key
+@param[in] oldpath old file path as a null-terminated string
+@param[in] newpath new file path
+@param[in] src_file file name where func invoked
+@param[in] src_line line where the func invoked
+@return true if success */
UNIV_INLINE
-ibool
+bool
pfs_os_file_rename_func(
-/*====================*/
- mysql_pfs_key_t key, /*!< in: Performance Schema Key */
- const char* oldpath,/*!< in: old file path as a null-terminated
- string */
- const char* newpath,/*!< in: new file path */
- const char* src_file,/*!< in: file name where func invoked */
- ulint src_line)/*!< in: line where the func invoked */
+ mysql_pfs_key_t key,
+ const char* oldpath,
+ const char* newpath,
+ const char* src_file,
+ uint src_line)
+
{
- ibool result;
- struct PSI_file_locker* locker = NULL;
PSI_file_locker_state state;
+ struct PSI_file_locker* locker = NULL;
- register_pfs_file_rename_begin(&state, locker, key, PSI_FILE_RENAME, newpath,
- src_file, src_line);
+ register_pfs_file_open_begin(
+ &state, locker, key, PSI_FILE_RENAME, newpath,
+ src_file, src_line);
- result = os_file_rename_func(oldpath, newpath);
+ bool result = os_file_rename_func(oldpath, newpath);
register_pfs_file_rename_end(locker, 0);
return(result);
}
-/***********************************************************************//**
-NOTE! Please use the corresponding macro os_file_delete(), not directly
+/** NOTE! Please use the corresponding macro os_file_delete(), not directly
this function!
This is the performance schema instrumented wrapper function for
os_file_delete()
-@return TRUE if success */
+@param[in] key Performance Schema Key
+@param[in] name old file path as a null-terminated string
+@param[in] src_file file name where func invoked
+@param[in] src_line line where the func invoked
+@return true if success */
UNIV_INLINE
bool
pfs_os_file_delete_func(
-/*====================*/
- mysql_pfs_key_t key, /*!< in: Performance Schema Key */
- const char* name, /*!< in: file path as a null-terminated
- string */
- const char* src_file, /*!< in: file name where func invoked */
- ulint src_line) /*!< in: line where the func invoked */
+ mysql_pfs_key_t key,
+ const char* name,
+ const char* src_file,
+ uint src_line)
{
- bool result;
- struct PSI_file_locker* locker = NULL;
PSI_file_locker_state state;
+ struct PSI_file_locker* locker = NULL;
- register_pfs_file_close_begin(&state, locker, key, PSI_FILE_DELETE,
- name, src_file, src_line);
+ register_pfs_file_close_begin(
+ &state, locker, key, PSI_FILE_DELETE, name, src_file, src_line);
- result = os_file_delete_func(name);
+ bool result = os_file_delete_func(name);
register_pfs_file_close_end(locker, 0);
return(result);
}
-/***********************************************************************//**
+/**
NOTE! Please use the corresponding macro os_file_delete_if_exists(), not
directly this function!
This is the performance schema instrumented wrapper function for
os_file_delete_if_exists()
-@return TRUE if success */
+@param[in] key Performance Schema Key
+@param[in] name old file path as a null-terminated string
+@param[in] exist indicate if file pre-exist
+@param[in] src_file file name where func invoked
+@param[in] src_line line where the func invoked
+@return true if success */
UNIV_INLINE
bool
pfs_os_file_delete_if_exists_func(
-/*==============================*/
- mysql_pfs_key_t key, /*!< in: Performance Schema Key */
- const char* name, /*!< in: file path as a null-terminated
- string */
- const char* src_file, /*!< in: file name where func invoked */
- ulint src_line) /*!< in: line where the func invoked */
+ mysql_pfs_key_t key,
+ const char* name,
+ bool* exist,
+ const char* src_file,
+ uint src_line)
{
- bool result;
- struct PSI_file_locker* locker = NULL;
PSI_file_locker_state state;
+ struct PSI_file_locker* locker = NULL;
- register_pfs_file_close_begin(&state, locker, key, PSI_FILE_DELETE,
- name, src_file, src_line);
+ register_pfs_file_close_begin(
+ &state, locker, key, PSI_FILE_DELETE, name, src_file, src_line);
- result = os_file_delete_if_exists_func(name);
+ bool result = os_file_delete_if_exists_func(name, exist);
register_pfs_file_close_end(locker, 0);
diff --git a/storage/innobase/include/os0once.h b/storage/innobase/include/os0once.h
index aaf2db11742..ad72370eefa 100644
--- a/storage/innobase/include/os0once.h
+++ b/storage/innobase/include/os0once.h
@@ -28,8 +28,6 @@ Created Feb 20, 2014 Vasil Dimov
#define os0once_h
#include "univ.i"
-
-#include "os0sync.h"
#include "ut0ut.h"
/** Execute a given function exactly once in a multi-threaded environment
@@ -67,7 +65,6 @@ public:
/** Finished execution. */
static const state_t DONE = 2;
-#ifdef HAVE_ATOMIC_BUILTINS
/** Call a given function or wait its execution to complete if it is
already called by another thread.
@param[in,out] state control variable
@@ -80,22 +77,19 @@ public:
void (*do_func)(void*),
void* do_func_arg)
{
- /* Avoid calling os_compare_and_swap_uint32() in the most
- common case. */
+ int32 oldval = NEVER_DONE;
+
+ /* Avoid calling my_atomic_cas32() in the most common case. */
if (*state == DONE) {
return;
}
- if (os_compare_and_swap_uint32(state,
- NEVER_DONE, IN_PROGRESS)) {
+ if (my_atomic_cas32((int32*) state, &oldval, IN_PROGRESS)) {
/* We are the first. Call the function. */
do_func(do_func_arg);
- const bool swapped = os_compare_and_swap_uint32(
- state, IN_PROGRESS, DONE);
-
- ut_a(swapped);
+ my_atomic_store32((int32*) state, DONE);
} else {
/* The state is not NEVER_DONE, so either it is
IN_PROGRESS (somebody is calling the function right
@@ -119,7 +113,6 @@ public:
}
}
}
-#endif /* HAVE_ATOMIC_BUILTINS */
};
#endif /* os0once_h */
diff --git a/storage/innobase/include/os0proc.h b/storage/innobase/include/os0proc.h
index 3103bddb972..9b0b3cbf628 100644
--- a/storage/innobase/include/os0proc.h
+++ b/storage/innobase/include/os0proc.h
@@ -1,6 +1,7 @@
/*****************************************************************************
-Copyright (c) 1995, 2009, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2017, 2019, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -37,41 +38,28 @@ Created 9/30/1995 Heikki Tuuri
typedef void* os_process_t;
typedef unsigned long int os_process_id_t;
-extern ibool os_use_large_pages;
-/* Large page size. This may be a boot-time option on some platforms */
-extern ulint os_large_page_size;
-
-/****************************************************************//**
-Converts the current process id to a number. It is not guaranteed that the
-number is unique. In Linux returns the 'process number' of the current
-thread. That number is the same as one sees in 'top', for example. In Linux
-the thread id is not the same as one sees in 'top'.
-@return process id as a number */
-UNIV_INTERN
+/** The total amount of memory currently allocated from the operating
+system with os_mem_alloc_large(). */
+extern ulint os_total_large_mem_allocated;
+
+/** Converts the current process id to a number.
+@return process id as a number */
ulint
os_proc_get_number(void);
-/*====================*/
-/****************************************************************//**
-Allocates large pages memory.
-@return allocated memory */
-UNIV_INTERN
+
+/** Allocates large pages memory.
+@param[in,out] n Number of bytes to allocate
+@return allocated memory */
void*
os_mem_alloc_large(
-/*===============*/
- ulint* n); /*!< in/out: number of bytes */
-/****************************************************************//**
-Frees large pages memory. */
-UNIV_INTERN
+ ulint* n);
+
+/** Frees large pages memory.
+@param[in] ptr pointer returned by os_mem_alloc_large()
+@param[in] size size returned by os_mem_alloc_large() */
void
os_mem_free_large(
-/*==============*/
- void *ptr, /*!< in: pointer returned by
- os_mem_alloc_large() */
- ulint size); /*!< in: size returned by
- os_mem_alloc_large() */
-
-#ifndef UNIV_NONINL
-#include "os0proc.ic"
-#endif
+ void *ptr,
+ ulint size);
#endif
diff --git a/storage/innobase/include/os0proc.ic b/storage/innobase/include/os0proc.ic
deleted file mode 100644
index 4726eb56f6d..00000000000
--- a/storage/innobase/include/os0proc.ic
+++ /dev/null
@@ -1,27 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1995, 2009, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/os0proc.ic
-The interface to the operating system
-process control primitives
-
-Created 9/30/1995 Heikki Tuuri
-*******************************************************/
-
-
diff --git a/storage/innobase/include/os0sync.h b/storage/innobase/include/os0sync.h
deleted file mode 100644
index c4185e814ca..00000000000
--- a/storage/innobase/include/os0sync.h
+++ /dev/null
@@ -1,948 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2008, Google Inc.
-Copyright (c) 2017, MariaDB Corporation.
-
-Portions of this file contain modifications contributed and copyrighted by
-Google, Inc. Those modifications are gratefully acknowledged and are described
-briefly in the InnoDB documentation. The contributions by Google are
-incorporated with their permission, and subject to the conditions contained in
-the file COPYING.Google.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/os0sync.h
-The interface to the operating system
-synchronization primitives.
-
-Created 9/6/1995 Heikki Tuuri
-*******************************************************/
-
-#ifndef os0sync_h
-#define os0sync_h
-
-#include "univ.i"
-#include "ut0lst.h"
-
-/** CPU cache line size */
-#ifdef __powerpc__
-#define CACHE_LINE_SIZE 128
-#else
-#define CACHE_LINE_SIZE 64
-#endif
-
-#ifdef HAVE_WINDOWS_ATOMICS
-typedef LONG lock_word_t; /*!< On Windows, InterlockedExchange operates
- on LONG variable */
-#elif defined(HAVE_ATOMIC_BUILTINS) && !defined(HAVE_ATOMIC_BUILTINS_BYTE)
-typedef ulint lock_word_t;
-#else
-
-#define IB_LOCK_WORD_IS_BYTE
-
-typedef byte lock_word_t;
-
-#endif /* HAVE_WINDOWS_ATOMICS */
-
-#ifdef __WIN__
-/** Native event (slow)*/
-typedef HANDLE os_native_event_t;
-/** Native mutex */
-typedef CRITICAL_SECTION fast_mutex_t;
-/** Native condition variable. */
-typedef CONDITION_VARIABLE os_cond_t;
-#else
-/** Native mutex */
-typedef pthread_mutex_t fast_mutex_t;
-/** Native condition variable */
-typedef pthread_cond_t os_cond_t;
-#endif
-
-/** Structure that includes Performance Schema Probe pfs_psi
-in the os_fast_mutex structure if UNIV_PFS_MUTEX is defined */
-struct os_fast_mutex_t {
- fast_mutex_t mutex; /*!< os_fast_mutex */
-#ifdef UNIV_PFS_MUTEX
- struct PSI_mutex* pfs_psi;/*!< The performance schema
- instrumentation hook */
-#endif
-};
-
-/** Operating system event handle */
-typedef struct os_event* os_event_t;
-
-/** An asynchronous signal sent between threads */
-struct os_event {
-#ifdef __WIN__
- HANDLE handle; /*!< kernel event object, slow,
- used on older Windows */
-#endif
- os_fast_mutex_t os_mutex; /*!< this mutex protects the next
- fields */
- ibool is_set; /*!< this is TRUE when the event is
- in the signaled state, i.e., a thread
- does not stop if it tries to wait for
- this event */
- ib_int64_t signal_count; /*!< this is incremented each time
- the event becomes signaled */
- os_cond_t cond_var; /*!< condition variable is used in
- waiting for the event */
- UT_LIST_NODE_T(os_event_t) os_event_list;
- /*!< list of all created events */
-};
-
-/** Denotes an infinite delay for os_event_wait_time() */
-#define OS_SYNC_INFINITE_TIME ULINT_UNDEFINED
-
-/** Return value of os_event_wait_time() when the time is exceeded */
-#define OS_SYNC_TIME_EXCEEDED 1
-
-/** Operating system mutex handle */
-typedef struct os_mutex_t* os_ib_mutex_t;
-
-/** Mutex protecting counts and the event and OS 'slow' mutex lists */
-extern os_ib_mutex_t os_sync_mutex;
-
-/** This is incremented by 1 in os_thread_create and decremented by 1 in
-os_thread_exit */
-extern ulint os_thread_count;
-
-extern ulint os_event_count;
-extern ulint os_mutex_count;
-extern ulint os_fast_mutex_count;
-
-/*********************************************************//**
-Initializes global event and OS 'slow' mutex lists. */
-UNIV_INTERN
-void
-os_sync_init(void);
-/*==============*/
-/*********************************************************//**
-Frees created events and OS 'slow' mutexes. */
-UNIV_INTERN
-void
-os_sync_free(void);
-/*==============*/
-/*********************************************************//**
-Creates an event semaphore, i.e., a semaphore which may just have two states:
-signaled and nonsignaled. The created event is manual reset: it must be reset
-explicitly by calling sync_os_reset_event.
-@return the event handle */
-UNIV_INTERN
-os_event_t
-os_event_create(void);
-/*==================*/
-/**********************************************************//**
-Sets an event semaphore to the signaled state: lets waiting threads
-proceed. */
-UNIV_INTERN
-void
-os_event_set(
-/*=========*/
- os_event_t event); /*!< in: event to set */
-/**********************************************************//**
-Resets an event semaphore to the nonsignaled state. Waiting threads will
-stop to wait for the event.
-The return value should be passed to os_even_wait_low() if it is desired
-that this thread should not wait in case of an intervening call to
-os_event_set() between this os_event_reset() and the
-os_event_wait_low() call. See comments for os_event_wait_low(). */
-UNIV_INTERN
-ib_int64_t
-os_event_reset(
-/*===========*/
- os_event_t event); /*!< in: event to reset */
-/**********************************************************//**
-Frees an event object. */
-UNIV_INTERN
-void
-os_event_free(
-/*==========*/
- os_event_t event); /*!< in: event to free */
-
-/**********************************************************//**
-Waits for an event object until it is in the signaled state.
-
-Typically, if the event has been signalled after the os_event_reset()
-we'll return immediately because event->is_set == TRUE.
-There are, however, situations (e.g.: sync_array code) where we may
-lose this information. For example:
-
-thread A calls os_event_reset()
-thread B calls os_event_set() [event->is_set == TRUE]
-thread C calls os_event_reset() [event->is_set == FALSE]
-thread A calls os_event_wait() [infinite wait!]
-thread C calls os_event_wait() [infinite wait!]
-
-Where such a scenario is possible, to avoid infinite wait, the
-value returned by os_event_reset() should be passed in as
-reset_sig_count. */
-UNIV_INTERN
-void
-os_event_wait_low(
-/*==============*/
- os_event_t event, /*!< in: event to wait */
- ib_int64_t reset_sig_count);/*!< in: zero or the value
- returned by previous call of
- os_event_reset(). */
-
-#define os_event_wait(event) os_event_wait_low(event, 0)
-#define os_event_wait_time(event, t) os_event_wait_time_low(event, t, 0)
-
-/**********************************************************//**
-Waits for an event object until it is in the signaled state or
-a timeout is exceeded. In Unix the timeout is always infinite.
-@return 0 if success, OS_SYNC_TIME_EXCEEDED if timeout was exceeded */
-UNIV_INTERN
-ulint
-os_event_wait_time_low(
-/*===================*/
- os_event_t event, /*!< in: event to wait */
- ulint time_in_usec, /*!< in: timeout in
- microseconds, or
- OS_SYNC_INFINITE_TIME */
- ib_int64_t reset_sig_count); /*!< in: zero or the value
- returned by previous call of
- os_event_reset(). */
-/*********************************************************//**
-Creates an operating system mutex semaphore. Because these are slow, the
-mutex semaphore of InnoDB itself (ib_mutex_t) should be used where possible.
-@return the mutex handle */
-UNIV_INTERN
-os_ib_mutex_t
-os_mutex_create(void);
-/*=================*/
-/**********************************************************//**
-Acquires ownership of a mutex semaphore. */
-UNIV_INTERN
-void
-os_mutex_enter(
-/*===========*/
- os_ib_mutex_t mutex); /*!< in: mutex to acquire */
-/**********************************************************//**
-Releases ownership of a mutex. */
-UNIV_INTERN
-void
-os_mutex_exit(
-/*==========*/
- os_ib_mutex_t mutex); /*!< in: mutex to release */
-/**********************************************************//**
-Frees an mutex object. */
-UNIV_INTERN
-void
-os_mutex_free(
-/*==========*/
- os_ib_mutex_t mutex); /*!< in: mutex to free */
-/**********************************************************//**
-Acquires ownership of a fast mutex. Currently in Windows this is the same
-as os_fast_mutex_lock!
-@return 0 if success, != 0 if was reserved by another thread */
-UNIV_INLINE
-ulint
-os_fast_mutex_trylock(
-/*==================*/
- os_fast_mutex_t* fast_mutex); /*!< in: mutex to acquire */
-
-/**********************************************************************
-Following os_fast_ mutex APIs would be performance schema instrumented:
-
-os_fast_mutex_init
-os_fast_mutex_lock
-os_fast_mutex_unlock
-os_fast_mutex_free
-
-These mutex APIs will point to corresponding wrapper functions that contain
-the performance schema instrumentation.
-
-NOTE! The following macro should be used in mutex operation, not the
-corresponding function. */
-
-#ifdef UNIV_PFS_MUTEX
-# define os_fast_mutex_init(K, M) \
- pfs_os_fast_mutex_init(K, M)
-
-# define os_fast_mutex_lock(M) \
- pfs_os_fast_mutex_lock(M, __FILE__, __LINE__)
-
-# define os_fast_mutex_unlock(M) pfs_os_fast_mutex_unlock(M)
-
-# define os_fast_mutex_free(M) pfs_os_fast_mutex_free(M)
-
-/*********************************************************//**
-NOTE! Please use the corresponding macro os_fast_mutex_init(), not directly
-this function!
-A wrapper function for os_fast_mutex_init_func(). Initializes an operating
-system fast mutex semaphore. */
-UNIV_INLINE
-void
-pfs_os_fast_mutex_init(
-/*===================*/
- PSI_mutex_key key, /*!< in: Performance Schema
- key */
- os_fast_mutex_t* fast_mutex); /*!< out: fast mutex */
-/**********************************************************//**
-NOTE! Please use the corresponding macro os_fast_mutex_free(), not directly
-this function!
-Wrapper function for pfs_os_fast_mutex_free(). Also destroys the performance
-schema probes when freeing the mutex */
-UNIV_INLINE
-void
-pfs_os_fast_mutex_free(
-/*===================*/
- os_fast_mutex_t* fast_mutex); /*!< in/out: mutex to free */
-/**********************************************************//**
-NOTE! Please use the corresponding macro os_fast_mutex_lock, not directly
-this function!
-Wrapper function of os_fast_mutex_lock. Acquires ownership of a fast mutex. */
-UNIV_INLINE
-void
-pfs_os_fast_mutex_lock(
-/*===================*/
- os_fast_mutex_t* fast_mutex, /*!< in/out: mutex to acquire */
- const char* file_name, /*!< in: file name where
- locked */
- ulint line); /*!< in: line where locked */
-/**********************************************************//**
-NOTE! Please use the corresponding macro os_fast_mutex_unlock, not directly
-this function!
-Wrapper function of os_fast_mutex_unlock. Releases ownership of a fast mutex. */
-UNIV_INLINE
-void
-pfs_os_fast_mutex_unlock(
-/*=====================*/
- os_fast_mutex_t* fast_mutex); /*!< in/out: mutex to release */
-
-#else /* UNIV_PFS_MUTEX */
-
-# define os_fast_mutex_init(K, M) \
- os_fast_mutex_init_func(&((os_fast_mutex_t*)(M))->mutex)
-
-# define os_fast_mutex_lock(M) \
- os_fast_mutex_lock_func(&((os_fast_mutex_t*)(M))->mutex)
-
-# define os_fast_mutex_unlock(M) \
- os_fast_mutex_unlock_func(&((os_fast_mutex_t*)(M))->mutex)
-
-# define os_fast_mutex_free(M) \
- os_fast_mutex_free_func(&((os_fast_mutex_t*)(M))->mutex)
-#endif /* UNIV_PFS_MUTEX */
-
-/**********************************************************//**
-Acquires ownership of a fast mutex. Implies a full memory barrier even on
-platforms such as PowerPC where this is not normally required.
-@return 0 if success, != 0 if was reserved by another thread */
-UNIV_INLINE
-ulint
-os_fast_mutex_trylock_full_barrier(
-/*==================*/
- os_fast_mutex_t* fast_mutex); /*!< in: mutex to acquire */
-/**********************************************************//**
-Releases ownership of a fast mutex. */
-UNIV_INTERN
-void
-os_fast_mutex_unlock_func(
-/*======================*/
- fast_mutex_t* fast_mutex); /*!< in: mutex to release */
-/**********************************************************//**
-Releases ownership of a fast mutex. Implies a full memory barrier even on
-platforms such as PowerPC where this is not normally required. */
-UNIV_INTERN
-void
-os_fast_mutex_unlock_full_barrier(
-/*=================*/
- os_fast_mutex_t* fast_mutex); /*!< in: mutex to release */
-/*********************************************************//**
-Initializes an operating system fast mutex semaphore. */
-UNIV_INTERN
-void
-os_fast_mutex_init_func(
-/*====================*/
- fast_mutex_t* fast_mutex); /*!< in: fast mutex */
-/**********************************************************//**
-Acquires ownership of a fast mutex. */
-UNIV_INTERN
-void
-os_fast_mutex_lock_func(
-/*====================*/
- fast_mutex_t* fast_mutex); /*!< in: mutex to acquire */
-/**********************************************************//**
-Frees an mutex object. */
-UNIV_INTERN
-void
-os_fast_mutex_free_func(
-/*====================*/
- fast_mutex_t* fast_mutex); /*!< in: mutex to free */
-
-/**********************************************************//**
-Atomic compare-and-swap and increment for InnoDB. */
-
-#if defined(HAVE_IB_GCC_ATOMIC_BUILTINS)
-
-# define HAVE_ATOMIC_BUILTINS
-
-# ifdef HAVE_IB_GCC_ATOMIC_BUILTINS_BYTE
-# define HAVE_ATOMIC_BUILTINS_BYTE
-# endif
-
-# ifdef HAVE_IB_GCC_ATOMIC_BUILTINS_64
-# define HAVE_ATOMIC_BUILTINS_64
-# endif
-
-/**********************************************************//**
-Returns true if swapped, ptr is pointer to target, old_val is value to
-compare to, new_val is the value to swap in. */
-
-# define os_compare_and_swap(ptr, old_val, new_val) \
- __sync_bool_compare_and_swap(ptr, old_val, new_val)
-
-# define os_compare_and_swap_ulint(ptr, old_val, new_val) \
- os_compare_and_swap(ptr, old_val, new_val)
-
-# define os_compare_and_swap_lint(ptr, old_val, new_val) \
- os_compare_and_swap(ptr, old_val, new_val)
-
-# define os_compare_and_swap_uint32(ptr, old_val, new_val) \
- os_compare_and_swap(ptr, old_val, new_val)
-
-# ifdef HAVE_IB_ATOMIC_PTHREAD_T_GCC
-# define os_compare_and_swap_thread_id(ptr, old_val, new_val) \
- os_compare_and_swap(ptr, old_val, new_val)
-# define INNODB_RW_LOCKS_USE_ATOMICS
-# define IB_ATOMICS_STARTUP_MSG \
- "Mutexes and rw_locks use GCC atomic builtins"
-# else /* HAVE_IB_ATOMIC_PTHREAD_T_GCC */
-# define IB_ATOMICS_STARTUP_MSG \
- "Mutexes use GCC atomic builtins, rw_locks do not"
-# endif /* HAVE_IB_ATOMIC_PTHREAD_T_GCC */
-
-/**********************************************************//**
-Returns the resulting value, ptr is pointer to target, amount is the
-amount of increment. */
-
-# define os_atomic_increment(ptr, amount) \
- __sync_add_and_fetch(ptr, amount)
-
-# define os_atomic_increment_lint(ptr, amount) \
- os_atomic_increment(ptr, amount)
-
-# define os_atomic_increment_uint32(ptr, amount ) \
- os_atomic_increment(ptr, amount)
-
-# define os_atomic_increment_ulint(ptr, amount) \
- os_atomic_increment(ptr, amount)
-
-# define os_atomic_increment_uint64(ptr, amount) \
- os_atomic_increment(ptr, amount)
-
-/* Returns the resulting value, ptr is pointer to target, amount is the
-amount to decrement. */
-
-# define os_atomic_decrement(ptr, amount) \
- __sync_sub_and_fetch(ptr, amount)
-
-# define os_atomic_decrement_uint32(ptr, amount) \
- os_atomic_decrement(ptr, amount)
-
-# define os_atomic_decrement_lint(ptr, amount) \
- os_atomic_decrement(ptr, amount)
-
-# define os_atomic_decrement_ulint(ptr, amount) \
- os_atomic_decrement(ptr, amount)
-
-# define os_atomic_decrement_uint64(ptr, amount) \
- os_atomic_decrement(ptr, amount)
-
-# if defined(HAVE_ATOMIC_BUILTINS)
-
-/** Do an atomic test and set.
-@param[in,out] ptr Memory location to set to non-zero
-@return the previous value */
-inline
-lock_word_t
-os_atomic_test_and_set(volatile lock_word_t* ptr)
-{
- return(__sync_lock_test_and_set(ptr, 1));
-}
-
-/** Do an atomic release.
-@param[in,out] ptr Memory location to write to
-@return the previous value */
-inline
-void
-os_atomic_clear(volatile lock_word_t* ptr)
-{
- __sync_lock_release(ptr);
-}
-
-# elif defined(HAVE_IB_GCC_ATOMIC_TEST_AND_SET)
-
-/** Do an atomic test-and-set.
-@param[in,out] ptr Memory location to set to non-zero
-@return the previous value */
-inline
-lock_word_t
-os_atomic_test_and_set(volatile lock_word_t* ptr)
-{
- return(__atomic_test_and_set(ptr, __ATOMIC_ACQUIRE));
-}
-
-/** Do an atomic clear.
-@param[in,out] ptr Memory location to set to zero */
-inline
-void
-os_atomic_clear(volatile lock_word_t* ptr)
-{
- __atomic_clear(ptr, __ATOMIC_RELEASE);
-}
-
-# else
-
-# error "Unsupported platform"
-
-# endif /* HAVE_IB_GCC_ATOMIC_TEST_AND_SET */
-
-#if defined(__powerpc__) || defined(__aarch64__)
-/*
- os_atomic_test_and_set_byte_release() should imply a release barrier before
- setting, and a full barrier after. But __sync_lock_test_and_set() is only
- documented as an aquire barrier. So on PowerPC we need to add the full
- barrier explicitly. */
-# define os_atomic_test_and_set_byte_release(ptr, new_val) \
- do { __sync_lock_release(ptr); \
- __sync_synchronize(); } while (0)
-#else
-/*
- On x86, __sync_lock_test_and_set() happens to be full barrier, due to
- LOCK prefix.
-*/
-# define os_atomic_test_and_set_byte_release(ptr, new_val) \
- __sync_lock_test_and_set(ptr, (byte) new_val)
-#endif
-/*
- os_atomic_test_and_set_byte_acquire() is a full memory barrier on x86. But
- in general, just an aquire barrier should be sufficient. */
-# define os_atomic_test_and_set_byte_acquire(ptr, new_val) \
- __sync_lock_test_and_set(ptr, (byte) new_val)
-
-#elif defined(HAVE_IB_SOLARIS_ATOMICS)
-
-# define HAVE_ATOMIC_BUILTINS
-# define HAVE_ATOMIC_BUILTINS_BYTE
-# define HAVE_ATOMIC_BUILTINS_64
-
-/* If not compiling with GCC or GCC doesn't support the atomic
-intrinsics and running on Solaris >= 10 use Solaris atomics */
-
-# include <atomic.h>
-
-/**********************************************************//**
-Returns true if swapped, ptr is pointer to target, old_val is value to
-compare to, new_val is the value to swap in. */
-
-# define os_compare_and_swap_uint32(ptr, old_val, new_val) \
- (atomic_cas_32(ptr, old_val, new_val) == old_val)
-
-# define os_compare_and_swap_ulint(ptr, old_val, new_val) \
- (atomic_cas_ulong(ptr, old_val, new_val) == old_val)
-
-# define os_compare_and_swap_lint(ptr, old_val, new_val) \
- ((lint) atomic_cas_ulong((ulong_t*) ptr, old_val, new_val) == old_val)
-
-# ifdef HAVE_IB_ATOMIC_PTHREAD_T_SOLARIS
-# if SIZEOF_PTHREAD_T == 4
-# define os_compare_and_swap_thread_id(ptr, old_val, new_val) \
- ((pthread_t) atomic_cas_32(ptr, old_val, new_val) == old_val)
-# elif SIZEOF_PTHREAD_T == 8
-# define os_compare_and_swap_thread_id(ptr, old_val, new_val) \
- ((pthread_t) atomic_cas_64(ptr, old_val, new_val) == old_val)
-# else
-# error "SIZEOF_PTHREAD_T != 4 or 8"
-# endif /* SIZEOF_PTHREAD_T CHECK */
-# define INNODB_RW_LOCKS_USE_ATOMICS
-# define IB_ATOMICS_STARTUP_MSG \
- "Mutexes and rw_locks use Solaris atomic functions"
-# else /* HAVE_IB_ATOMIC_PTHREAD_T_SOLARIS */
-# define IB_ATOMICS_STARTUP_MSG \
- "Mutexes use Solaris atomic functions, rw_locks do not"
-# endif /* HAVE_IB_ATOMIC_PTHREAD_T_SOLARIS */
-
-/**********************************************************//**
-Returns the resulting value, ptr is pointer to target, amount is the
-amount of increment. */
-
-# define os_atomic_increment_uint32(ptr, amount) \
- atomic_add_32_nv(ptr, amount)
-
-# define os_atomic_increment_ulint(ptr, amount) \
- atomic_add_long_nv(ptr, amount)
-
-# define os_atomic_increment_lint(ptr, amount) \
- os_atomic_increment_ulint((ulong_t*) ptr, amount)
-
-# define os_atomic_increment_uint64(ptr, amount) \
- atomic_add_64_nv(ptr, amount)
-
-/* Returns the resulting value, ptr is pointer to target, amount is the
-amount to decrement. */
-
-# define os_atomic_decrement_uint32(ptr, amount) \
- os_atomic_increment_uint32(ptr, -(amount))
-
-# define os_atomic_decrement_lint(ptr, amount) \
- os_atomic_increment_ulint((ulong_t*) ptr, -(amount))
-
-# define os_atomic_decrement_ulint(ptr, amount) \
- os_atomic_increment_ulint(ptr, -(amount))
-
-# define os_atomic_decrement_uint64(ptr, amount) \
- os_atomic_increment_uint64(ptr, -(amount))
-
-# ifdef IB_LOCK_WORD_IS_BYTE
-
-/** Do an atomic xchg and set to non-zero.
-@param[in,out] ptr Memory location to set to non-zero
-@return the previous value */
-inline
-lock_word_t
-os_atomic_test_and_set(volatile lock_word_t* ptr)
-{
- return(atomic_swap_uchar(ptr, 1));
-}
-
-/** Do an atomic xchg and set to zero.
-@param[in,out] ptr Memory location to set to zero
-@return the previous value */
-inline
-lock_word_t
-os_atomic_clear(volatile lock_word_t* ptr)
-{
- return(atomic_swap_uchar(ptr, 0));
-}
-
-# else
-
-/** Do an atomic xchg and set to non-zero.
-@param[in,out] ptr Memory location to set to non-zero
-@return the previous value */
-inline
-lock_word_t
-os_atomic_test_and_set(volatile lock_word_t* ptr)
-{
- return(atomic_swap_ulong(ptr, 1));
-}
-
-/** Do an atomic xchg and set to zero.
-@param[in,out] ptr Memory location to set to zero
-@return the previous value */
-inline
-lock_word_t
-os_atomic_clear(volatile lock_word_t* ptr)
-{
- return(atomic_swap_ulong(ptr, 0));
-}
-
-# endif /* IB_LOCK_WORD_IS_BYTE */
-
-# define os_atomic_test_and_set_byte_acquire(ptr, new_val) \
- atomic_swap_uchar(ptr, new_val)
-
-# define os_atomic_test_and_set_byte_release(ptr, new_val) \
- atomic_swap_uchar(ptr, new_val)
-
-#elif defined(HAVE_WINDOWS_ATOMICS)
-
-# define HAVE_ATOMIC_BUILTINS
-# define HAVE_ATOMIC_BUILTINS_BYTE
-# define HAVE_ATOMIC_BUILTINS_64
-
-/**********************************************************//**
-Atomic compare and exchange of signed integers (both 32 and 64 bit).
-@return value found before the exchange.
-If it is not equal to old_value the exchange did not happen. */
-UNIV_INLINE
-lint
-win_cmp_and_xchg_lint(
-/*==================*/
- volatile lint* ptr, /*!< in/out: source/destination */
- lint new_val, /*!< in: exchange value */
- lint old_val); /*!< in: value to compare to */
-
-/**********************************************************//**
-Atomic addition of signed integers.
-@return Initial value of the variable pointed to by ptr */
-UNIV_INLINE
-lint
-win_xchg_and_add(
-/*=============*/
- volatile lint* ptr, /*!< in/out: address of destination */
- lint val); /*!< in: number to be added */
-
-/**********************************************************//**
-Atomic compare and exchange of unsigned integers.
-@return value found before the exchange.
-If it is not equal to old_value the exchange did not happen. */
-UNIV_INLINE
-ulint
-win_cmp_and_xchg_ulint(
-/*===================*/
- volatile ulint* ptr, /*!< in/out: source/destination */
- ulint new_val, /*!< in: exchange value */
- ulint old_val); /*!< in: value to compare to */
-
-/**********************************************************//**
-Atomic compare and exchange of 32 bit unsigned integers.
-@return value found before the exchange.
-If it is not equal to old_value the exchange did not happen. */
-UNIV_INLINE
-DWORD
-win_cmp_and_xchg_dword(
-/*===================*/
- volatile DWORD* ptr, /*!< in/out: source/destination */
- DWORD new_val, /*!< in: exchange value */
- DWORD old_val); /*!< in: value to compare to */
-
-/**********************************************************//**
-Returns true if swapped, ptr is pointer to target, old_val is value to
-compare to, new_val is the value to swap in. */
-
-# define os_compare_and_swap_uint32(ptr, old_val, new_val) \
- (InterlockedCompareExchange(reinterpret_cast<volatile long*>(ptr), \
- new_val, old_val) == old_val)
-
-# define os_compare_and_swap_ulint(ptr, old_val, new_val) \
- (win_cmp_and_xchg_ulint(ptr, new_val, old_val) == old_val)
-
-# define os_compare_and_swap_lint(ptr, old_val, new_val) \
- (win_cmp_and_xchg_lint(ptr, new_val, old_val) == old_val)
-
-/* windows thread objects can always be passed to windows atomic functions */
-# define os_compare_and_swap_thread_id(ptr, old_val, new_val) \
- (win_cmp_and_xchg_dword(ptr, new_val, old_val) == old_val)
-
-# define INNODB_RW_LOCKS_USE_ATOMICS
-# define IB_ATOMICS_STARTUP_MSG \
- "Mutexes and rw_locks use Windows interlocked functions"
-
-/**********************************************************//**
-Returns the resulting value, ptr is pointer to target, amount is the
-amount of increment. */
-
-# define os_atomic_increment_lint(ptr, amount) \
- (win_xchg_and_add(ptr, amount) + amount)
-
-# define os_atomic_increment_uint32(ptr, amount) \
- ((ulint) InterlockedExchangeAdd((long*) ptr, amount))
-
-# define os_atomic_increment_ulint(ptr, amount) \
- ((ulint) (win_xchg_and_add((lint*) ptr, (lint) amount) + amount))
-
-# define os_atomic_increment_uint64(ptr, amount) \
- ((ib_uint64_t) (InterlockedExchangeAdd64( \
- (ib_int64_t*) ptr, \
- (ib_int64_t) amount) + amount))
-
-/**********************************************************//**
-Returns the resulting value, ptr is pointer to target, amount is the
-amount to decrement. There is no atomic substract function on Windows */
-
-# define os_atomic_decrement_uint32(ptr, amount) \
- ((ulint) InterlockedExchangeAdd((long*) ptr, (-amount)))
-
-# define os_atomic_decrement_lint(ptr, amount) \
- (win_xchg_and_add(ptr, -(lint) amount) - amount)
-
-# define os_atomic_decrement_ulint(ptr, amount) \
- ((ulint) (win_xchg_and_add((lint*) ptr, -(lint) amount) - amount))
-
-# define os_atomic_decrement_uint64(ptr, amount) \
- ((ib_uint64_t) (InterlockedExchangeAdd64( \
- (ib_int64_t*) ptr, \
- -(ib_int64_t) amount) - amount))
-
-/** Do an atomic test and set.
-InterlockedExchange() operates on LONG, and the LONG will be clobbered
-@param[in,out] ptr Memory location to set to non-zero
-@return the previous value */
-inline
-lock_word_t
-os_atomic_test_and_set(volatile lock_word_t* ptr)
-{
- return(InterlockedExchange(ptr, 1));
-}
-
-/** Do an atomic release.
-InterlockedExchange() operates on LONG, and the LONG will be clobbered
-@param[in,out] ptr Memory location to set to zero
-@return the previous value */
-inline
-lock_word_t
-os_atomic_clear(volatile lock_word_t* ptr)
-{
- return(InterlockedExchange(ptr, 0));
-}
-
-# define os_atomic_lock_release_byte(ptr) \
- (void) InterlockedExchange(ptr, 0)
-
-#else
-# define IB_ATOMICS_STARTUP_MSG \
- "Mutexes and rw_locks use InnoDB's own implementation"
-#endif
-#ifdef HAVE_ATOMIC_BUILTINS
-#define os_atomic_inc_ulint(m,v,d) os_atomic_increment_ulint(v, d)
-#define os_atomic_dec_ulint(m,v,d) os_atomic_decrement_ulint(v, d)
-#else
-#define os_atomic_inc_ulint(m,v,d) os_atomic_inc_ulint_func(m, v, d)
-#define os_atomic_dec_ulint(m,v,d) os_atomic_dec_ulint_func(m, v, d)
-#endif /* HAVE_ATOMIC_BUILTINS */
-
-/**********************************************************//**
-Following macros are used to update specified counter atomically
-if HAVE_ATOMIC_BUILTINS defined. Otherwise, use mutex passed in
-for synchronization */
-#ifdef HAVE_ATOMIC_BUILTINS
-#define os_increment_counter_by_amount(mutex, counter, amount) \
- (void) os_atomic_increment_ulint(&counter, amount)
-
-#define os_decrement_counter_by_amount(mutex, counter, amount) \
- (void) os_atomic_increment_ulint(&counter, (-((lint) amount)))
-#else
-#define os_increment_counter_by_amount(mutex, counter, amount) \
- do { \
- mutex_enter(&(mutex)); \
- (counter) += (amount); \
- mutex_exit(&(mutex)); \
- } while (0)
-
-#define os_decrement_counter_by_amount(mutex, counter, amount) \
- do { \
- ut_a(counter >= amount); \
- mutex_enter(&(mutex)); \
- (counter) -= (amount); \
- mutex_exit(&(mutex)); \
- } while (0)
-#endif /* HAVE_ATOMIC_BUILTINS */
-
-#define os_inc_counter(mutex, counter) \
- os_increment_counter_by_amount(mutex, counter, 1)
-
-#define os_dec_counter(mutex, counter) \
- do { \
- os_decrement_counter_by_amount(mutex, counter, 1);\
- } while (0);
-
-/** barrier definitions for memory ordering */
-#if defined(HAVE_IB_GCC_ATOMIC_THREAD_FENCE)
-# define HAVE_MEMORY_BARRIER
-# define os_rmb __atomic_thread_fence(__ATOMIC_ACQUIRE)
-# define os_wmb __atomic_thread_fence(__ATOMIC_RELEASE)
-# define os_mb __atomic_thread_fence(__ATOMIC_SEQ_CST)
-
-# define IB_MEMORY_BARRIER_STARTUP_MSG \
- "GCC builtin __atomic_thread_fence() is used for memory barrier"
-
-#elif defined(HAVE_IB_GCC_SYNC_SYNCHRONISE)
-# define HAVE_MEMORY_BARRIER
-# define os_rmb __sync_synchronize()
-# define os_wmb __sync_synchronize()
-# define os_mb __sync_synchronize()
-# define IB_MEMORY_BARRIER_STARTUP_MSG \
- "GCC builtin __sync_synchronize() is used for memory barrier"
-
-#elif defined(HAVE_IB_MACHINE_BARRIER_SOLARIS)
-# define HAVE_MEMORY_BARRIER
-# include <mbarrier.h>
-# define os_rmb __machine_r_barrier()
-# define os_wmb __machine_w_barrier()
-# define os_mb __machine_rw_barrier()
-# define IB_MEMORY_BARRIER_STARTUP_MSG \
- "Solaris memory ordering functions are used for memory barrier"
-
-#elif defined(HAVE_WINDOWS_MM_FENCE)
-# define HAVE_MEMORY_BARRIER
-# include <intrin.h>
-# define os_rmb _mm_lfence()
-# define os_wmb _mm_sfence()
-# define os_mb _mm_mfence()
-# define IB_MEMORY_BARRIER_STARTUP_MSG \
- "_mm_lfence() and _mm_sfence() are used for memory barrier"
-
-#else
-# define os_rmb do { } while(0)
-# define os_wmb do { } while(0)
-# define os_mb do { } while(0)
-# define IB_MEMORY_BARRIER_STARTUP_MSG \
- "Memory barrier is not used"
-#endif
-
-
-/** Simple counter aligned to CACHE_LINE_SIZE
-@tparam Type the integer type of the counter
-@tparam atomic whether to use atomic memory access */
-template <typename Type = ulint, bool atomic = false>
-struct MY_ALIGNED(CACHE_LINE_SIZE) simple_counter
-{
- /** Increment the counter */
- Type inc() { return add(1); }
- /** Decrement the counter */
- Type dec() { return sub(1); }
-
- /** Add to the counter
- @param[in] i amount to be added
- @return the value of the counter after adding */
- Type add(Type i)
- {
- compile_time_assert(!atomic || sizeof(Type) == sizeof(ulint));
- if (atomic) {
- /* GCC would perform a type check in this code
- also in case the template is instantiated with
- simple_counter<Type=not_ulint, atomic=false>.
- On Solaris, os_atomic_increment_ulint() maps
- to atomic_add_long_nv(), which expects the
- parameter to be correctly typed. */
- return os_atomic_increment_ulint(
- reinterpret_cast<ulint*>(&m_counter), i);
- } else {
- return m_counter += i;
- }
- }
- /** Subtract from the counter
- @param[in] i amount to be subtracted
- @return the value of the counter after adding */
- Type sub(Type i)
- {
- compile_time_assert(!atomic || sizeof(Type) == sizeof(ulint));
- if (atomic) {
- return os_atomic_decrement_ulint(&m_counter, i);
- } else {
- return m_counter -= i;
- }
- }
-
- /** @return the value of the counter (non-atomic access)! */
- operator Type() const { return m_counter; }
-
-private:
- /** The counter */
- Type m_counter;
-};
-
-#ifndef UNIV_NONINL
-#include "os0sync.ic"
-#endif
-
-#endif
diff --git a/storage/innobase/include/os0sync.ic b/storage/innobase/include/os0sync.ic
deleted file mode 100644
index f0eba9b440f..00000000000
--- a/storage/innobase/include/os0sync.ic
+++ /dev/null
@@ -1,266 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1995, 2011, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/os0sync.ic
-The interface to the operating system synchronization primitives.
-
-Created 9/6/1995 Heikki Tuuri
-*******************************************************/
-
-#ifdef __WIN__
-#include <winbase.h>
-#endif
-
-/**********************************************************//**
-Acquires ownership of a fast mutex.
-@return 0 if success, != 0 if was reserved by another thread */
-UNIV_INLINE
-ulint
-os_fast_mutex_trylock(
-/*==================*/
- os_fast_mutex_t* fast_mutex) /*!< in: mutex to acquire */
-{
- fast_mutex_t* mutex = &fast_mutex->mutex;
-
-#ifdef __WIN__
- return(!TryEnterCriticalSection(mutex));
-#else
- /* NOTE that the MySQL my_pthread.h redefines pthread_mutex_trylock
- so that it returns 0 on success. In the operating system
- libraries, HP-UX-10.20 follows the old Posix 1003.4a Draft 4 and
- returns 1 on success (but MySQL remaps that to 0), while Linux,
- FreeBSD, Solaris, AIX, Tru64 Unix, HP-UX-11.0 return 0 on success. */
-
- return((ulint) pthread_mutex_trylock(mutex));
-#endif
-}
-
-#ifdef UNIV_PFS_MUTEX
-/*********************************************************//**
-NOTE! Please use the corresponding macro os_fast_mutex_init(), not directly
-this function!
-A wrapper function for os_fast_mutex_init_func(). Initializes an operating
-system fast mutex semaphore. */
-UNIV_INLINE
-void
-pfs_os_fast_mutex_init(
-/*===================*/
- PSI_mutex_key key, /*!< in: Performance Schema
- key */
- os_fast_mutex_t* fast_mutex) /*!< out: fast mutex */
-{
-#ifdef HAVE_PSI_MUTEX_INTERFACE
- fast_mutex->pfs_psi = PSI_MUTEX_CALL(init_mutex)(key, &fast_mutex->mutex);
-#else
- fast_mutex->pfs_psi = NULL;
-#endif
-
- os_fast_mutex_init_func(&fast_mutex->mutex);
-}
-/******************************************************************//**
-NOTE! Please use the corresponding macro os_fast_mutex_free(), not directly
-this function!
-Wrapper function for pfs_os_fast_mutex_free(). Also destroys the performance
-schema probes when freeing the mutex */
-UNIV_INLINE
-void
-pfs_os_fast_mutex_free(
-/*===================*/
- os_fast_mutex_t* fast_mutex) /*!< in/out: mutex */
-{
-#ifdef HAVE_PSI_MUTEX_INTERFACE
- if (fast_mutex->pfs_psi != NULL)
- PSI_MUTEX_CALL(destroy_mutex)(fast_mutex->pfs_psi);
-#endif
- fast_mutex->pfs_psi = NULL;
-
- os_fast_mutex_free_func(&fast_mutex->mutex);
-}
-/**********************************************************//**
-NOTE! Please use the corresponding macro os_fast_mutex_lock, not directly
-this function!
-Wrapper function of os_fast_mutex_lock_func. Acquires ownership of a fast
-mutex. */
-UNIV_INLINE
-void
-pfs_os_fast_mutex_lock(
-/*===================*/
- os_fast_mutex_t* fast_mutex, /*!< in/out: mutex to acquire */
- const char* file_name, /*!< in: file name where
- locked */
- ulint line) /*!< in: line where locked */
-{
-#ifdef HAVE_PSI_MUTEX_INTERFACE
- if (fast_mutex->pfs_psi != NULL)
- {
- PSI_mutex_locker* locker;
- PSI_mutex_locker_state state;
-
- locker = PSI_MUTEX_CALL(start_mutex_wait)(
- &state, fast_mutex->pfs_psi,
- PSI_MUTEX_LOCK, file_name,
- static_cast<uint>(line));
-
- os_fast_mutex_lock_func(&fast_mutex->mutex);
-
- if (locker != NULL)
- PSI_MUTEX_CALL(end_mutex_wait)(locker, 0);
- }
- else
-#endif
- {
- os_fast_mutex_lock_func(&fast_mutex->mutex);
- }
-
- return;
-}
-/**********************************************************//**
-NOTE! Please use the corresponding macro os_fast_mutex_unlock, not directly
-this function!
-Wrapper function of os_fast_mutex_unlock_func. Releases ownership of a
-fast mutex. */
-UNIV_INLINE
-void
-pfs_os_fast_mutex_unlock(
-/*=====================*/
- os_fast_mutex_t* fast_mutex) /*!< in/out: mutex to release */
-{
-#ifdef HAVE_PSI_MUTEX_INTERFACE
- if (fast_mutex->pfs_psi != NULL)
- PSI_MUTEX_CALL(unlock_mutex)(fast_mutex->pfs_psi);
-#endif
-
- os_fast_mutex_unlock_func(&fast_mutex->mutex);
-}
-#endif /* UNIV_PFS_MUTEX */
-
-#ifdef HAVE_WINDOWS_ATOMICS
-
-/* Use inline functions to make 64 and 32 bit versions of windows atomic
-functions so that typecasts are evaluated at compile time. Take advantage
-that lint is either __int64 or long int and windows atomic functions work
-on __int64 and LONG */
-
-/**********************************************************//**
-Atomic compare and exchange of unsigned integers.
-@return value found before the exchange.
-If it is not equal to old_value the exchange did not happen. */
-UNIV_INLINE
-lint
-win_cmp_and_xchg_lint(
-/*==================*/
- volatile lint* ptr, /*!< in/out: source/destination */
- lint new_val, /*!< in: exchange value */
- lint old_val) /*!< in: value to compare to */
-{
-# ifdef _WIN64
- return(InterlockedCompareExchange64(ptr, new_val, old_val));
-# else
- return(InterlockedCompareExchange(ptr, new_val, old_val));
-# endif
-}
-
-/**********************************************************//**
-Atomic addition of signed integers.
-@return Initial value of the variable pointed to by ptr */
-UNIV_INLINE
-lint
-win_xchg_and_add(
-/*=============*/
- volatile lint* ptr, /*!< in/out: address of destination */
- lint val) /*!< in: number to be added */
-{
-#ifdef _WIN64
- return(InterlockedExchangeAdd64(ptr, val));
-#else
- return(InterlockedExchangeAdd(ptr, val));
-#endif
-}
-
-/**********************************************************//**
-Atomic compare and exchange of unsigned integers.
-@return value found before the exchange.
-If it is not equal to old_value the exchange did not happen. */
-UNIV_INLINE
-ulint
-win_cmp_and_xchg_ulint(
-/*===================*/
- volatile ulint* ptr, /*!< in/out: source/destination */
- ulint new_val, /*!< in: exchange value */
- ulint old_val) /*!< in: value to compare to */
-{
- return((ulint) win_cmp_and_xchg_lint(
- (volatile lint*) ptr,
- (lint) new_val,
- (lint) old_val));
-}
-
-/**********************************************************//**
-Atomic compare and exchange of 32-bit unsigned integers.
-@return value found before the exchange.
-If it is not equal to old_value the exchange did not happen. */
-UNIV_INLINE
-DWORD
-win_cmp_and_xchg_dword(
-/*===================*/
- volatile DWORD* ptr, /*!< in/out: source/destination */
- DWORD new_val, /*!< in: exchange value */
- DWORD old_val) /*!< in: value to compare to */
-{
- ut_ad(sizeof(DWORD) == sizeof(LONG)); /* We assume this. */
- return(InterlockedCompareExchange(
- (volatile LONG*) ptr,
- (LONG) new_val,
- (LONG) old_val));
-}
-
-#endif /* HAVE_WINDOWS_ATOMICS */
-
-
-/**********************************************************//**
-Acquires ownership of a fast mutex. Implies a full memory barrier even on
-platforms such as PowerPC where this is not normally required.
-@return 0 if success, != 0 if was reserved by another thread */
-UNIV_INLINE
-ulint
-os_fast_mutex_trylock_full_barrier(
-/*==================*/
- os_fast_mutex_t* fast_mutex) /*!< in: mutex to acquire */
-{
-#ifdef __WIN__
- if (TryEnterCriticalSection(&fast_mutex->mutex)) {
-
- return(0);
- } else {
-
- return(1);
- }
-#else
- /* NOTE that the MySQL my_pthread.h redefines pthread_mutex_trylock
- so that it returns 0 on success. In the operating system
- libraries, HP-UX-10.20 follows the old Posix 1003.4a Draft 4 and
- returns 1 on success (but MySQL remaps that to 0), while Linux,
- FreeBSD, Solaris, AIX, Tru64 Unix, HP-UX-11.0 return 0 on success. */
-
-#ifdef __powerpc__
- os_mb;
-#endif
- return((ulint) pthread_mutex_trylock(&fast_mutex->mutex));
-#endif
-}
diff --git a/storage/innobase/include/os0thread.h b/storage/innobase/include/os0thread.h
index dd129edef2c..0218dea97bd 100644
--- a/storage/innobase/include/os0thread.h
+++ b/storage/innobase/include/os0thread.h
@@ -1,6 +1,7 @@
/*****************************************************************************
Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2017, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -41,8 +42,8 @@ can wait inside InnoDB */
#define OS_THREAD_PRIORITY_NORMAL 2
#define OS_THREAD_PRIORITY_ABOVE_NORMAL 3
-#ifdef __WIN__
-typedef void* os_thread_t;
+#ifdef _WIN32
+typedef DWORD os_thread_t;
typedef DWORD os_thread_id_t; /*!< In Windows the thread id
is an unsigned long int */
extern "C" {
@@ -62,7 +63,7 @@ don't access the arguments and don't return any value, we should be safe. */
#else
typedef pthread_t os_thread_t;
-typedef os_thread_t os_thread_id_t; /*!< In Unix we use the thread
+typedef pthread_t os_thread_id_t; /*!< In Unix we use the thread
handle itself as the id of
the thread */
extern "C" { typedef void* (*os_thread_func_t)(void*); }
@@ -71,7 +72,7 @@ extern "C" { typedef void* (*os_thread_func_t)(void*); }
#define DECLARE_THREAD(func) func
#define os_thread_create(f,a,i) os_thread_create_func(f, a, i)
-#endif /* __WIN__ */
+#endif /* _WIN32 */
/* Define a function pointer type to use in a typecast */
typedef void* (*os_posix_f_t) (void*);
@@ -79,12 +80,14 @@ typedef void* (*os_posix_f_t) (void*);
#ifdef HAVE_PSI_INTERFACE
/* Define for performance schema registration key */
typedef unsigned int mysql_pfs_key_t;
-#endif
+#endif /* HAVE_PSI_INTERFACE */
+
+/** Number of threads active. */
+extern ulint os_thread_count;
/***************************************************************//**
Compares two thread ids for equality.
-@return TRUE if equal */
-UNIV_INTERN
+@return TRUE if equal */
ibool
os_thread_eq(
/*=========*/
@@ -93,20 +96,18 @@ os_thread_eq(
/****************************************************************//**
Converts an OS thread id to a ulint. It is NOT guaranteed that the ulint is
unique for the thread though!
-@return thread identifier as a number */
-UNIV_INTERN
+@return thread identifier as a number */
ulint
os_thread_pf(
/*=========*/
os_thread_id_t a); /*!< in: OS thread identifier */
/****************************************************************//**
Creates a new thread of execution. The execution starts from
-the function given. The start function takes a void* parameter
-and returns a ulint.
+the function given.
NOTE: We count the number of threads in os_thread_exit(). A created
-thread should always use that to exit and not use return() to exit.
-@return handle to the thread */
-UNIV_INTERN
+thread should always use that to exit so thatthe thread count will be
+decremented.
+We do not return an error code because if there is one, we crash here. */
os_thread_t
os_thread_create_func(
/*==================*/
@@ -120,46 +121,32 @@ os_thread_create_func(
/** Waits until the specified thread completes and joins it.
Its return value is ignored.
@param[in,out] thread thread to join */
-UNIV_INTERN
void
os_thread_join(
- os_thread_t thread);
+ os_thread_id_t thread);
+
+/** Exits the current thread.
+@param[in] detach if true, the thread will be detached right before
+exiting. If false, another thread is responsible for joining this thread */
+ATTRIBUTE_NORETURN ATTRIBUTE_COLD
+void os_thread_exit(bool detach = true);
-/*****************************************************************//**
-Exits the current thread. */
-UNIV_INTERN
-void
-os_thread_exit(
-/*===========*/
- void* exit_value, /*!< in: exit value; in Windows this void*
- is cast as a DWORD */
- bool detach = true) /*!< in: if true, the thread will be detached
- right before exiting. If false, another thread
- is responsible for joining this thread. */
- UNIV_COLD MY_ATTRIBUTE((noreturn));
/*****************************************************************//**
Returns the thread identifier of current thread.
-@return current thread identifier */
-UNIV_INTERN
+@return current thread identifier */
os_thread_id_t
os_thread_get_curr_id(void);
/*========================*/
/*****************************************************************//**
Advises the os to give up remainder of the thread's time slice. */
-UNIV_INTERN
void
os_thread_yield(void);
/*=================*/
/*****************************************************************//**
The thread sleeps at least the time given in microseconds. */
-UNIV_INTERN
void
os_thread_sleep(
/*============*/
ulint tm); /*!< in: time in microseconds */
-#ifndef UNIV_NONINL
-#include "os0thread.ic"
-#endif
-
#endif
diff --git a/storage/innobase/include/os0thread.ic b/storage/innobase/include/os0thread.ic
deleted file mode 100644
index 15f9b277fa1..00000000000
--- a/storage/innobase/include/os0thread.ic
+++ /dev/null
@@ -1,25 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1995, 2009, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/os0thread.ic
-The interface to the operating system
-process and thread control primitives
-
-Created 9/8/1995 Heikki Tuuri
-*******************************************************/
diff --git a/storage/innobase/include/page0cur.h b/storage/innobase/include/page0cur.h
index aec7c63563c..d91f1bd7ba7 100644
--- a/storage/innobase/include/page0cur.h
+++ b/storage/innobase/include/page0cur.h
@@ -1,6 +1,7 @@
/*****************************************************************************
Copyright (c) 1994, 2016, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2018, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -26,36 +27,18 @@ Created 10/4/1994 Heikki Tuuri
#ifndef page0cur_h
#define page0cur_h
-#include "univ.i"
-
#include "buf0types.h"
#include "page0page.h"
+#include "rem0types.h"
#include "rem0rec.h"
#include "data0data.h"
#include "mtr0mtr.h"
-
-
-#define PAGE_CUR_ADAPT
-
-/* Page cursor search modes; the values must be in this order! */
-
-#define PAGE_CUR_UNSUPP 0
-#define PAGE_CUR_G 1
-#define PAGE_CUR_GE 2
-#define PAGE_CUR_L 3
-#define PAGE_CUR_LE 4
-/*#define PAGE_CUR_LE_OR_EXTENDS 5*/ /* This is a search mode used in
- "column LIKE 'abc%' ORDER BY column DESC";
- we have to find strings which are <= 'abc' or
- which extend it */
-#ifdef UNIV_SEARCH_DEBUG
-# define PAGE_CUR_DBG 6 /* As PAGE_CUR_LE, but skips search shortcut */
-#endif /* UNIV_SEARCH_DEBUG */
+#include "gis0type.h"
#ifdef UNIV_DEBUG
/*********************************************************//**
Gets pointer to the page frame where the cursor is positioned.
-@return page */
+@return page */
UNIV_INLINE
page_t*
page_cur_get_page(
@@ -63,7 +46,7 @@ page_cur_get_page(
page_cur_t* cur); /*!< in: page cursor */
/*********************************************************//**
Gets pointer to the buffer block where the cursor is positioned.
-@return page */
+@return page */
UNIV_INLINE
buf_block_t*
page_cur_get_block(
@@ -71,7 +54,7 @@ page_cur_get_block(
page_cur_t* cur); /*!< in: page cursor */
/*********************************************************//**
Gets pointer to the page frame where the cursor is positioned.
-@return page */
+@return page */
UNIV_INLINE
page_zip_des_t*
page_cur_get_page_zip(
@@ -79,7 +62,7 @@ page_cur_get_page_zip(
page_cur_t* cur); /*!< in: page cursor */
/*********************************************************//**
Gets the record where the cursor is positioned.
-@return record */
+@return record */
UNIV_INLINE
rec_t*
page_cur_get_rec(
@@ -111,7 +94,7 @@ page_cur_set_after_last(
page_cur_t* cur); /*!< in: cursor */
/*********************************************************//**
Returns TRUE if the cursor is before first user record on page.
-@return TRUE if at start */
+@return TRUE if at start */
UNIV_INLINE
ibool
page_cur_is_before_first(
@@ -119,7 +102,7 @@ page_cur_is_before_first(
const page_cur_t* cur); /*!< in: cursor */
/*********************************************************//**
Returns TRUE if the cursor is after last user record.
-@return TRUE if at end */
+@return TRUE if at end */
UNIV_INLINE
ibool
page_cur_is_after_last(
@@ -136,13 +119,6 @@ page_cur_position(
the record */
page_cur_t* cur); /*!< out: page cursor */
/**********************************************************//**
-Invalidates a page cursor by setting the record pointer NULL. */
-UNIV_INLINE
-void
-page_cur_invalidate(
-/*================*/
- page_cur_t* cur); /*!< out: page cursor */
-/**********************************************************//**
Moves the cursor to the next record on page. */
UNIV_INLINE
void
@@ -156,7 +132,7 @@ void
page_cur_move_to_prev(
/*==================*/
page_cur_t* cur); /*!< in/out: cursor; not before first */
-#ifndef UNIV_HOTBACKUP
+
/***********************************************************//**
Inserts a record next to page cursor. Returns pointer to inserted record if
succeed, i.e., enough space available, NULL otherwise. The cursor stays at
@@ -168,7 +144,7 @@ if this is a compressed leaf page in a secondary index.
This has to be done either within the same mini-transaction,
or by invoking ibuf_reset_free_bits() before mtr_commit().
-@return pointer to record if succeed, NULL otherwise */
+@return pointer to record if succeed, NULL otherwise */
UNIV_INLINE
rec_t*
page_cur_tuple_insert(
@@ -176,12 +152,14 @@ page_cur_tuple_insert(
page_cur_t* cursor, /*!< in/out: a page cursor */
const dtuple_t* tuple, /*!< in: pointer to a data tuple */
dict_index_t* index, /*!< in: record descriptor */
- ulint** offsets,/*!< out: offsets on *rec */
+ offset_t** offsets,/*!< out: offsets on *rec */
mem_heap_t** heap, /*!< in/out: pointer to memory heap, or NULL */
ulint n_ext, /*!< in: number of externally stored columns */
- mtr_t* mtr) /*!< in: mini-transaction handle, or NULL */
+ mtr_t* mtr, /*!< in: mini-transaction handle, or NULL */
+ bool use_cache = false)
+ /*!< in: if true, then use record cache to
+ hold the tuple converted record. */
MY_ATTRIBUTE((nonnull(1,2,3,4,5), warn_unused_result));
-#endif /* !UNIV_HOTBACKUP */
/***********************************************************//**
Inserts a record next to page cursor. Returns pointer to inserted record if
succeed, i.e., enough space available, NULL otherwise. The cursor stays at
@@ -193,7 +171,7 @@ if this is a compressed leaf page in a secondary index.
This has to be done either within the same mini-transaction,
or by invoking ibuf_reset_free_bits() before mtr_commit().
-@return pointer to record if succeed, NULL otherwise */
+@return pointer to record if succeed, NULL otherwise */
UNIV_INLINE
rec_t*
page_cur_rec_insert(
@@ -201,14 +179,13 @@ page_cur_rec_insert(
page_cur_t* cursor, /*!< in/out: a page cursor */
const rec_t* rec, /*!< in: record to insert */
dict_index_t* index, /*!< in: record descriptor */
- ulint* offsets,/*!< in/out: rec_get_offsets(rec, index) */
+ offset_t* offsets,/*!< in/out: rec_get_offsets(rec, index) */
mtr_t* mtr); /*!< in: mini-transaction handle, or NULL */
/***********************************************************//**
Inserts a record next to page cursor on an uncompressed page.
Returns pointer to inserted record if succeed, i.e., enough
space available, NULL otherwise. The cursor stays at the same position.
-@return pointer to record if succeed, NULL otherwise */
-UNIV_INTERN
+@return pointer to record if succeed, NULL otherwise */
rec_t*
page_cur_insert_rec_low(
/*====================*/
@@ -216,9 +193,10 @@ page_cur_insert_rec_low(
which the new record is inserted */
dict_index_t* index, /*!< in: record descriptor */
const rec_t* rec, /*!< in: pointer to a physical record */
- ulint* offsets,/*!< in/out: rec_get_offsets(rec, index) */
+ offset_t* offsets,/*!< in/out: rec_get_offsets(rec, index) */
mtr_t* mtr) /*!< in: mini-transaction handle, or NULL */
MY_ATTRIBUTE((nonnull(1,2,3,4), warn_unused_result));
+
/***********************************************************//**
Inserts a record next to page cursor on a compressed and uncompressed
page. Returns pointer to inserted record if succeed, i.e.,
@@ -230,15 +208,14 @@ if this is a compressed leaf page in a secondary index.
This has to be done either within the same mini-transaction,
or by invoking ibuf_reset_free_bits() before mtr_commit().
-@return pointer to record if succeed, NULL otherwise */
-UNIV_INTERN
+@return pointer to record if succeed, NULL otherwise */
rec_t*
page_cur_insert_rec_zip(
/*====================*/
page_cur_t* cursor, /*!< in/out: page cursor */
dict_index_t* index, /*!< in: record descriptor */
const rec_t* rec, /*!< in: pointer to a physical record */
- ulint* offsets,/*!< in/out: rec_get_offsets(rec, index) */
+ offset_t* offsets,/*!< in/out: rec_get_offsets(rec, index) */
mtr_t* mtr) /*!< in: mini-transaction handle, or NULL */
MY_ATTRIBUTE((nonnull(1,2,3,4), warn_unused_result));
/*************************************************************//**
@@ -249,7 +226,6 @@ IMPORTANT: The caller will have to update IBUF_BITMAP_FREE
if this is a compressed leaf page in a secondary index.
This has to be done either within the same mini-transaction,
or by invoking ibuf_reset_free_bits() before mtr_commit(). */
-UNIV_INTERN
void
page_copy_rec_list_end_to_created_page(
/*===================================*/
@@ -260,84 +236,128 @@ page_copy_rec_list_end_to_created_page(
/***********************************************************//**
Deletes a record at the page cursor. The cursor is moved to the
next record after the deleted one. */
-UNIV_INTERN
void
page_cur_delete_rec(
/*================*/
page_cur_t* cursor, /*!< in/out: a page cursor */
const dict_index_t* index, /*!< in: record descriptor */
- const ulint* offsets,/*!< in: rec_get_offsets(
+ const offset_t* offsets,/*!< in: rec_get_offsets(
cursor->rec, index) */
mtr_t* mtr); /*!< in: mini-transaction handle */
-#ifndef UNIV_HOTBACKUP
-/****************************************************************//**
-Searches the right position for a page cursor.
-@return number of matched fields on the left */
+
+/** Search the right position for a page cursor.
+@param[in] block buffer block
+@param[in] index index tree
+@param[in] tuple data tuple
+@param[in] mode PAGE_CUR_L, PAGE_CUR_LE, PAGE_CUR_G, or PAGE_CUR_GE
+@param[out] cursor page cursor
+@return number of matched fields on the left */
UNIV_INLINE
ulint
page_cur_search(
-/*============*/
- const buf_block_t* block, /*!< in: buffer block */
- const dict_index_t* index, /*!< in: record descriptor */
- const dtuple_t* tuple, /*!< in: data tuple */
- ulint mode, /*!< in: PAGE_CUR_L,
- PAGE_CUR_LE, PAGE_CUR_G, or
- PAGE_CUR_GE */
- page_cur_t* cursor);/*!< out: page cursor */
+ const buf_block_t* block,
+ const dict_index_t* index,
+ const dtuple_t* tuple,
+ page_cur_mode_t mode,
+ page_cur_t* cursor);
+
+/** Search the right position for a page cursor.
+@param[in] block buffer block
+@param[in] index index tree
+@param[in] tuple data tuple
+@param[out] cursor page cursor
+@return number of matched fields on the left */
+UNIV_INLINE
+ulint
+page_cur_search(
+ const buf_block_t* block,
+ const dict_index_t* index,
+ const dtuple_t* tuple,
+ page_cur_t* cursor);
+
/****************************************************************//**
Searches the right position for a page cursor. */
-UNIV_INTERN
void
page_cur_search_with_match(
/*=======================*/
const buf_block_t* block, /*!< in: buffer block */
const dict_index_t* index, /*!< in: record descriptor */
const dtuple_t* tuple, /*!< in: data tuple */
- ulint mode, /*!< in: PAGE_CUR_L,
+ page_cur_mode_t mode, /*!< in: PAGE_CUR_L,
PAGE_CUR_LE, PAGE_CUR_G, or
PAGE_CUR_GE */
ulint* iup_matched_fields,
/*!< in/out: already matched
fields in upper limit record */
- ulint* iup_matched_bytes,
- /*!< in/out: already matched
- bytes in a field not yet
- completely matched */
ulint* ilow_matched_fields,
/*!< in/out: already matched
fields in lower limit record */
+ page_cur_t* cursor, /*!< out: page cursor */
+ rtr_info_t* rtr_info);/*!< in/out: rtree search stack */
+#ifdef BTR_CUR_HASH_ADAPT
+/** Search the right position for a page cursor.
+@param[in] block buffer block
+@param[in] index index tree
+@param[in] tuple key to be searched for
+@param[in] mode search mode
+@param[in,out] iup_matched_fields already matched fields in the
+upper limit record
+@param[in,out] iup_matched_bytes already matched bytes in the
+first partially matched field in the upper limit record
+@param[in,out] ilow_matched_fields already matched fields in the
+lower limit record
+@param[in,out] ilow_matched_bytes already matched bytes in the
+first partially matched field in the lower limit record
+@param[out] cursor page cursor */
+void
+page_cur_search_with_match_bytes(
+ const buf_block_t* block,
+ const dict_index_t* index,
+ const dtuple_t* tuple,
+ page_cur_mode_t mode,
+ ulint* iup_matched_fields,
+ ulint* iup_matched_bytes,
+ ulint* ilow_matched_fields,
ulint* ilow_matched_bytes,
- /*!< in/out: already matched
- bytes in a field not yet
- completely matched */
- page_cur_t* cursor);/*!< out: page cursor */
+ page_cur_t* cursor);
+#endif /* BTR_CUR_HASH_ADAPT */
/***********************************************************//**
Positions a page cursor on a randomly chosen user record on a page. If there
are no user records, sets the cursor on the infimum record. */
-UNIV_INTERN
void
page_cur_open_on_rnd_user_rec(
/*==========================*/
buf_block_t* block, /*!< in: page */
page_cur_t* cursor);/*!< out: page cursor */
-#endif /* !UNIV_HOTBACKUP */
+/** Write a redo log record of inserting a record into an index page.
+@param[in] insert_rec inserted record
+@param[in] rec_size rec_get_size(insert_rec)
+@param[in] cursor_rec predecessor of insert_rec
+@param[in,out] index index tree
+@param[in,out] mtr mini-transaction */
+void
+page_cur_insert_rec_write_log(
+ const rec_t* insert_rec,
+ ulint rec_size,
+ const rec_t* cursor_rec,
+ dict_index_t* index,
+ mtr_t* mtr)
+ MY_ATTRIBUTE((nonnull));
/***********************************************************//**
Parses a log record of a record insert on a page.
-@return end of log record or NULL */
-UNIV_INTERN
+@return end of log record or NULL */
byte*
page_cur_parse_insert_rec(
/*======================*/
ibool is_short,/*!< in: TRUE if short inserts */
- byte* ptr, /*!< in: buffer */
- byte* end_ptr,/*!< in: buffer end */
+ const byte* ptr, /*!< in: buffer */
+ const byte* end_ptr,/*!< in: buffer end */
buf_block_t* block, /*!< in: page or NULL */
dict_index_t* index, /*!< in: record descriptor */
mtr_t* mtr); /*!< in: mtr or NULL */
/**********************************************************//**
Parses a log record of copying a record list end to a new created page.
-@return end of log record or NULL */
-UNIV_INTERN
+@return end of log record or NULL */
byte*
page_parse_copy_rec_list_to_created_page(
/*=====================================*/
@@ -348,8 +368,7 @@ page_parse_copy_rec_list_to_created_page(
mtr_t* mtr); /*!< in: mtr or NULL */
/***********************************************************//**
Parses log record of a record delete on a page.
-@return pointer to record end or NULL */
-UNIV_INTERN
+@return pointer to record end or NULL */
byte*
page_cur_parse_delete_rec(
/*======================*/
@@ -361,8 +380,7 @@ page_cur_parse_delete_rec(
/*******************************************************//**
Removes the record from a leaf page. This function does not log
any changes. It is used by the IMPORT tablespace functions.
-@return true if success, i.e., the page did not become too empty */
-UNIV_INTERN
+@return true if success, i.e., the page did not become too empty */
bool
page_delete_rec(
/*============*/
@@ -371,17 +389,17 @@ page_delete_rec(
page_cur_t* pcur, /*!< in/out: page cursor on record
to delete */
page_zip_des_t* page_zip,/*!< in: compressed page descriptor */
- const ulint* offsets);/*!< in: offsets for record */
+ const offset_t* offsets);/*!< in: offsets for record */
/** Index page cursor */
struct page_cur_t{
- byte* rec; /*!< pointer to a record on page */
+ const dict_index_t* index;
+ rec_t* rec; /*!< pointer to a record on page */
+ offset_t* offsets;
buf_block_t* block; /*!< pointer to the block containing rec */
};
-#ifndef UNIV_NONINL
#include "page0cur.ic"
-#endif
#endif
diff --git a/storage/innobase/include/page0cur.ic b/storage/innobase/include/page0cur.ic
index 9d0e8408c54..982bda83c32 100644
--- a/storage/innobase/include/page0cur.ic
+++ b/storage/innobase/include/page0cur.ic
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1994, 2013, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1994, 2014, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2015, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
@@ -32,7 +32,7 @@ Created 10/4/1994 Heikki Tuuri
/*********************************************************//**
Gets pointer to the page frame where the cursor is positioned.
-@return page */
+@return page */
UNIV_INLINE
page_t*
page_cur_get_page(
@@ -50,7 +50,7 @@ page_cur_get_page(
/*********************************************************//**
Gets pointer to the buffer block where the cursor is positioned.
-@return page */
+@return page */
UNIV_INLINE
buf_block_t*
page_cur_get_block(
@@ -68,7 +68,7 @@ page_cur_get_block(
/*********************************************************//**
Gets pointer to the page frame where the cursor is positioned.
-@return page */
+@return page */
UNIV_INLINE
page_zip_des_t*
page_cur_get_page_zip(
@@ -80,7 +80,7 @@ page_cur_get_page_zip(
/*********************************************************//**
Gets the record where the cursor is positioned.
-@return record */
+@return record */
UNIV_INLINE
rec_t*
page_cur_get_rec(
@@ -127,7 +127,7 @@ page_cur_set_after_last(
/*********************************************************//**
Returns TRUE if the cursor is before first user record on page.
-@return TRUE if at start */
+@return TRUE if at start */
UNIV_INLINE
ibool
page_cur_is_before_first(
@@ -141,7 +141,7 @@ page_cur_is_before_first(
/*********************************************************//**
Returns TRUE if the cursor is after last user record.
-@return TRUE if at end */
+@return TRUE if at end */
UNIV_INLINE
ibool
page_cur_is_after_last(
@@ -172,20 +172,6 @@ page_cur_position(
}
/**********************************************************//**
-Invalidates a page cursor by setting the record pointer NULL. */
-UNIV_INLINE
-void
-page_cur_invalidate(
-/*================*/
- page_cur_t* cur) /*!< out: page cursor */
-{
- ut_ad(cur);
-
- cur->rec = NULL;
- cur->block = NULL;
-}
-
-/**********************************************************//**
Moves the cursor to the next record on page. */
UNIV_INLINE
void
@@ -211,36 +197,47 @@ page_cur_move_to_prev(
cur->rec = page_rec_get_prev(cur->rec);
}
-#ifndef UNIV_HOTBACKUP
-/****************************************************************//**
-Searches the right position for a page cursor.
-@return number of matched fields on the left */
+/** Search the right position for a page cursor.
+@param[in] block buffer block
+@param[in] index index tree
+@param[in] tuple data tuple
+@param[in] mode PAGE_CUR_L, PAGE_CUR_LE, PAGE_CUR_G, or PAGE_CUR_GE
+@param[out] cursor page cursor
+@return number of matched fields on the left */
UNIV_INLINE
ulint
page_cur_search(
-/*============*/
- const buf_block_t* block, /*!< in: buffer block */
- const dict_index_t* index, /*!< in: record descriptor */
- const dtuple_t* tuple, /*!< in: data tuple */
- ulint mode, /*!< in: PAGE_CUR_L,
- PAGE_CUR_LE, PAGE_CUR_G, or
- PAGE_CUR_GE */
- page_cur_t* cursor) /*!< out: page cursor */
+ const buf_block_t* block,
+ const dict_index_t* index,
+ const dtuple_t* tuple,
+ page_cur_mode_t mode,
+ page_cur_t* cursor)
{
- ulint low_matched_fields = 0;
- ulint low_matched_bytes = 0;
- ulint up_matched_fields = 0;
- ulint up_matched_bytes = 0;
+ ulint low_match = 0;
+ ulint up_match = 0;
ut_ad(dtuple_check_typed(tuple));
page_cur_search_with_match(block, index, tuple, mode,
- &up_matched_fields,
- &up_matched_bytes,
- &low_matched_fields,
- &low_matched_bytes,
- cursor);
- return(low_matched_fields);
+ &up_match, &low_match, cursor, NULL);
+ return(low_match);
+}
+
+/** Search the right position for a page cursor.
+@param[in] block buffer block
+@param[in] index index tree
+@param[in] tuple data tuple
+@param[out] cursor page cursor
+@return number of matched fields on the left */
+UNIV_INLINE
+ulint
+page_cur_search(
+ const buf_block_t* block,
+ const dict_index_t* index,
+ const dtuple_t* tuple,
+ page_cur_t* cursor)
+{
+ return(page_cur_search(block, index, tuple, PAGE_CUR_LE, cursor));
}
/***********************************************************//**
@@ -254,7 +251,7 @@ if this is a compressed leaf page in a secondary index.
This has to be done either within the same mini-transaction,
or by invoking ibuf_reset_free_bits() before mtr_commit().
-@return pointer to record if succeed, NULL otherwise */
+@return pointer to record if succeed, NULL otherwise */
UNIV_INLINE
rec_t*
page_cur_tuple_insert(
@@ -262,14 +259,16 @@ page_cur_tuple_insert(
page_cur_t* cursor, /*!< in/out: a page cursor */
const dtuple_t* tuple, /*!< in: pointer to a data tuple */
dict_index_t* index, /*!< in: record descriptor */
- ulint** offsets,/*!< out: offsets on *rec */
+ offset_t** offsets,/*!< out: offsets on *rec */
mem_heap_t** heap, /*!< in/out: pointer to memory heap, or NULL */
ulint n_ext, /*!< in: number of externally stored columns */
- mtr_t* mtr) /*!< in: mini-transaction handle, or NULL */
+ mtr_t* mtr, /*!< in: mini-transaction handle, or NULL */
+ bool use_cache)
+ /*!< in: if true, then use record cache to
+ hold the tuple converted record. */
{
- ulint size
- = rec_get_converted_size(index, tuple, n_ext);
rec_t* rec;
+ ulint size = rec_get_converted_size(index, tuple, n_ext);
if (!*heap) {
*heap = mem_heap_create(size
@@ -280,8 +279,10 @@ page_cur_tuple_insert(
rec = rec_convert_dtuple_to_rec((byte*) mem_heap_alloc(*heap, size),
index, tuple, n_ext);
- *offsets = rec_get_offsets(
- rec, index, *offsets, ULINT_UNDEFINED, heap);
+
+ *offsets = rec_get_offsets(rec, index, *offsets,
+ page_is_leaf(cursor->block->frame),
+ ULINT_UNDEFINED, heap);
if (buf_block_get_page_zip(cursor->block)) {
rec = page_cur_insert_rec_zip(
@@ -294,7 +295,6 @@ page_cur_tuple_insert(
ut_ad(!rec || !cmp_dtuple_rec(tuple, rec, *offsets));
return(rec);
}
-#endif /* !UNIV_HOTBACKUP */
/***********************************************************//**
Inserts a record next to page cursor. Returns pointer to inserted record if
@@ -307,7 +307,7 @@ if this is a compressed leaf page in a secondary index.
This has to be done either within the same mini-transaction,
or by invoking ibuf_reset_free_bits() before mtr_commit().
-@return pointer to record if succeed, NULL otherwise */
+@return pointer to record if succeed, NULL otherwise */
UNIV_INLINE
rec_t*
page_cur_rec_insert(
@@ -315,7 +315,7 @@ page_cur_rec_insert(
page_cur_t* cursor, /*!< in/out: a page cursor */
const rec_t* rec, /*!< in: record to insert */
dict_index_t* index, /*!< in: record descriptor */
- ulint* offsets,/*!< in/out: rec_get_offsets(rec, index) */
+ offset_t* offsets,/*!< in/out: rec_get_offsets(rec, index) */
mtr_t* mtr) /*!< in: mini-transaction handle, or NULL */
{
if (buf_block_get_page_zip(cursor->block)) {
diff --git a/storage/innobase/include/page0page.h b/storage/innobase/include/page0page.h
index 64ae31905b4..54edf034ac6 100644
--- a/storage/innobase/include/page0page.h
+++ b/storage/innobase/include/page0page.h
@@ -1,7 +1,6 @@
/*****************************************************************************
-
-Copyright (c) 1994, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2013, 2018, MariaDB Corporation.
+Copyright (c) 1994, 2019, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2013, 2020, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -27,16 +26,17 @@ Created 2/2/1994 Heikki Tuuri
#ifndef page0page_h
#define page0page_h
-#include "univ.i"
-
#include "page0types.h"
#ifndef UNIV_INNOCHECKSUM
#include "fil0fil.h"
#include "buf0buf.h"
#include "data0data.h"
#include "dict0dict.h"
+#include "rem0types.h"
#include "rem0rec.h"
+#endif /* !UNIV_INNOCHECKSUM*/
#include "fsp0fsp.h"
+#ifndef UNIV_INNOCHECKSUM
#include "mtr0mtr.h"
#ifdef UNIV_MATERIALIZE
@@ -44,14 +44,13 @@ Created 2/2/1994 Heikki Tuuri
#define UNIV_INLINE
#endif
-#endif /* !UNIV_INNOCHECKSUM */
-
/* PAGE HEADER
===========
Index page header starts at the first offset left free by the FIL-module */
typedef byte page_header_t;
+#endif /* !UNIV_INNOCHECKSUM */
#define PAGE_HEADER FSEG_PAGE_DATA /* index page header starts at this
offset */
@@ -69,10 +68,12 @@ typedef byte page_header_t;
#define PAGE_N_DIRECTION 14 /* number of consecutive inserts to the same
direction */
#define PAGE_N_RECS 16 /* number of user records on the page */
-#define PAGE_MAX_TRX_ID 18 /* highest id of a trx which may have modified
- a record on the page; trx_id_t; defined only
- in secondary indexes and in the insert buffer
- tree */
+/** The largest DB_TRX_ID that may have modified a record on the page;
+Defined only in secondary index leaf pages and in change buffer leaf pages.
+Otherwise written as 0. @see PAGE_ROOT_AUTO_INC */
+#define PAGE_MAX_TRX_ID 18
+/** The AUTO_INCREMENT value (on persistent clustered index root pages). */
+#define PAGE_ROOT_AUTO_INC PAGE_MAX_TRX_ID
#define PAGE_HEADER_PRIV_END 26 /* end of private data structure of the page
header which are set in a page create */
/*----*/
@@ -82,6 +83,7 @@ typedef byte page_header_t;
#define PAGE_INDEX_ID 28 /* index id where the page belongs.
This field should not be written to after
page creation. */
+
#define PAGE_BTR_SEG_LEAF 36 /* file segment header for the leaf pages in
a B-tree: defined only on the root page of a
B-tree, but not in the root of an ibuf tree */
@@ -121,8 +123,6 @@ typedef byte page_header_t;
a new-style compact page */
/*-----------------------------*/
-#ifndef UNIV_INNOCHECKSUM
-
/* Heap numbers */
#define PAGE_HEAP_NO_INFIMUM 0 /* page infimum */
#define PAGE_HEAP_NO_SUPREMUM 1 /* page supremum */
@@ -138,6 +138,8 @@ typedef byte page_header_t;
#define PAGE_SAME_PAGE 4
#define PAGE_NO_DIRECTION 5
+#ifndef UNIV_INNOCHECKSUM
+
/* PAGE DIRECTORY
==============
*/
@@ -164,25 +166,196 @@ directory. */
#define PAGE_DIR_SLOT_MIN_N_OWNED 4
extern my_bool srv_immediate_scrub_data_uncompressed;
+#endif /* UNIV_INNOCHECKSUM */
-/************************************************************//**
-Gets the start of a page.
-@return start of the page */
-UNIV_INLINE
+/** Get the start of a page frame.
+@param[in] ptr pointer within a page frame
+@return start of the page frame */
+MY_ATTRIBUTE((const))
+inline
page_t*
-page_align(
-/*=======*/
- const void* ptr) /*!< in: pointer to page frame */
- MY_ATTRIBUTE((const));
-/************************************************************//**
-Gets the offset within a page.
-@return offset from the start of the page */
-UNIV_INLINE
+page_align(const void* ptr)
+{
+ return(static_cast<page_t*>(ut_align_down(ptr, srv_page_size)));
+}
+
+/** Gets the byte offset within a page frame.
+@param[in] ptr pointer within a page frame
+@return offset from the start of the page */
+MY_ATTRIBUTE((const))
+inline
ulint
-page_offset(
-/*========*/
- const void* ptr) /*!< in: pointer to page frame */
- MY_ATTRIBUTE((const));
+page_offset(const void* ptr)
+{
+ return(ut_align_offset(ptr, srv_page_size));
+}
+
+/** Determine whether an index page is not in ROW_FORMAT=REDUNDANT.
+@param[in] page index page
+@return nonzero if ROW_FORMAT is one of COMPACT,DYNAMIC,COMPRESSED
+@retval 0 if ROW_FORMAT=REDUNDANT */
+inline
+byte
+page_is_comp(const page_t* page)
+{
+ ut_ad(!ut_align_offset(page, UNIV_ZIP_SIZE_MIN));
+ return(page[PAGE_HEADER + PAGE_N_HEAP] & 0x80);
+}
+
+/** Determine whether an index page is empty.
+@param[in] page index page
+@return whether the page is empty (PAGE_N_RECS = 0) */
+inline
+bool
+page_is_empty(const page_t* page)
+{
+ ut_ad(!ut_align_offset(page, UNIV_ZIP_SIZE_MIN));
+ return !*reinterpret_cast<const uint16_t*>(PAGE_HEADER + PAGE_N_RECS
+ + page);
+}
+
+/** Determine whether an index page contains garbage.
+@param[in] page index page
+@return whether the page contains garbage (PAGE_GARBAGE is not 0) */
+inline
+bool
+page_has_garbage(const page_t* page)
+{
+ ut_ad(!ut_align_offset(page, UNIV_ZIP_SIZE_MIN));
+ return *reinterpret_cast<const uint16_t*>(PAGE_HEADER + PAGE_GARBAGE
+ + page);
+}
+
+/** Determine whether an B-tree or R-tree index page is a leaf page.
+@param[in] page index page
+@return true if the page is a leaf (PAGE_LEVEL = 0) */
+inline
+bool
+page_is_leaf(const page_t* page)
+{
+ ut_ad(!ut_align_offset(page, UNIV_ZIP_SIZE_MIN));
+ return !*reinterpret_cast<const uint16_t*>(PAGE_HEADER + PAGE_LEVEL
+ + page);
+}
+
+#ifndef UNIV_INNOCHECKSUM
+/** Determine whether an index page record is not in ROW_FORMAT=REDUNDANT.
+@param[in] rec record in an index page frame (not a copy)
+@return nonzero if ROW_FORMAT is one of COMPACT,DYNAMIC,COMPRESSED
+@retval 0 if ROW_FORMAT=REDUNDANT */
+inline
+byte
+page_rec_is_comp(const byte* rec)
+{
+ return(page_is_comp(page_align(rec)));
+}
+
+/** Determine the offset of the infimum record on the page.
+@param[in] page index page
+@return offset of the infimum record in record list, relative from page */
+inline
+unsigned
+page_get_infimum_offset(const page_t* page)
+{
+ ut_ad(!page_offset(page));
+ return page_is_comp(page) ? PAGE_NEW_INFIMUM : PAGE_OLD_INFIMUM;
+}
+
+/** Determine the offset of the supremum record on the page.
+@param[in] page index page
+@return offset of the supremum record in record list, relative from page */
+inline
+unsigned
+page_get_supremum_offset(const page_t* page)
+{
+ ut_ad(!page_offset(page));
+ return page_is_comp(page) ? PAGE_NEW_SUPREMUM : PAGE_OLD_SUPREMUM;
+}
+
+/** Determine whether an index page record is a user record.
+@param[in] offset record offset in the page
+@retval true if a user record
+@retval false if the infimum or supremum pseudo-record */
+inline
+bool
+page_rec_is_user_rec_low(ulint offset)
+{
+ compile_time_assert(PAGE_OLD_INFIMUM >= PAGE_NEW_INFIMUM);
+ compile_time_assert(PAGE_OLD_SUPREMUM >= PAGE_NEW_SUPREMUM);
+ compile_time_assert(PAGE_NEW_INFIMUM < PAGE_OLD_SUPREMUM);
+ compile_time_assert(PAGE_OLD_INFIMUM < PAGE_NEW_SUPREMUM);
+ compile_time_assert(PAGE_NEW_SUPREMUM < PAGE_OLD_SUPREMUM_END);
+ compile_time_assert(PAGE_OLD_SUPREMUM < PAGE_NEW_SUPREMUM_END);
+ ut_ad(offset >= PAGE_NEW_INFIMUM);
+ ut_ad(offset <= srv_page_size - PAGE_EMPTY_DIR_START);
+
+ return(offset != PAGE_NEW_SUPREMUM
+ && offset != PAGE_NEW_INFIMUM
+ && offset != PAGE_OLD_INFIMUM
+ && offset != PAGE_OLD_SUPREMUM);
+}
+
+/** Determine if a record is the supremum record on an index page.
+@param[in] offset record offset in an index page
+@return true if the supremum record */
+inline
+bool
+page_rec_is_supremum_low(ulint offset)
+{
+ ut_ad(offset >= PAGE_NEW_INFIMUM);
+ ut_ad(offset <= srv_page_size - PAGE_EMPTY_DIR_START);
+ return(offset == PAGE_NEW_SUPREMUM || offset == PAGE_OLD_SUPREMUM);
+}
+
+/** Determine if a record is the infimum record on an index page.
+@param[in] offset record offset in an index page
+@return true if the infimum record */
+inline
+bool
+page_rec_is_infimum_low(ulint offset)
+{
+ ut_ad(offset >= PAGE_NEW_INFIMUM);
+ ut_ad(offset <= srv_page_size - PAGE_EMPTY_DIR_START);
+ return(offset == PAGE_NEW_INFIMUM || offset == PAGE_OLD_INFIMUM);
+}
+
+/** Determine whether an B-tree or R-tree index record is in a leaf page.
+@param[in] rec index record in an index page
+@return true if the record is in a leaf page */
+inline
+bool
+page_rec_is_leaf(const page_t* rec)
+{
+ const page_t* page = page_align(rec);
+ ut_ad(ulint(rec - page) >= page_get_infimum_offset(page));
+ bool leaf = page_is_leaf(page);
+ ut_ad(!page_rec_is_comp(rec)
+ || !page_rec_is_user_rec_low(ulint(rec - page))
+ || leaf == !rec_get_node_ptr_flag(rec));
+ return leaf;
+}
+
+/** Determine whether an index page record is a user record.
+@param[in] rec record in an index page
+@return true if a user record */
+inline
+bool
+page_rec_is_user_rec(const rec_t* rec);
+
+/** Determine whether an index page record is the supremum record.
+@param[in] rec record in an index page
+@return true if the supremum record */
+inline
+bool
+page_rec_is_supremum(const rec_t* rec);
+
+/** Determine whether an index page record is the infimum record.
+@param[in] rec record in an index page
+@return true if the infimum record */
+inline
+bool
+page_rec_is_infimum(const rec_t* rec);
+
/*************************************************************//**
Returns the max trx id field value. */
UNIV_INLINE
@@ -192,7 +365,6 @@ page_get_max_trx_id(
const page_t* page); /*!< in: page */
/*************************************************************//**
Sets the max trx id field value. */
-UNIV_INTERN
void
page_set_max_trx_id(
/*================*/
@@ -212,14 +384,63 @@ page_update_max_trx_id(
uncompressed part will be updated, or NULL */
trx_id_t trx_id, /*!< in: transaction id */
mtr_t* mtr); /*!< in/out: mini-transaction */
+
+/** Persist the AUTO_INCREMENT value on a clustered index root page.
+@param[in,out] block clustered index root page
+@param[in] index clustered index
+@param[in] autoinc next available AUTO_INCREMENT value
+@param[in,out] mtr mini-transaction
+@param[in] reset whether to reset the AUTO_INCREMENT
+ to a possibly smaller value than currently
+ exists in the page */
+void
+page_set_autoinc(
+ buf_block_t* block,
+ const dict_index_t* index MY_ATTRIBUTE((unused)),
+ ib_uint64_t autoinc,
+ mtr_t* mtr,
+ bool reset)
+ MY_ATTRIBUTE((nonnull));
+
+/** Read the AUTO_INCREMENT value from a clustered index root page.
+@param[in] page clustered index root page
+@return the persisted AUTO_INCREMENT value */
+MY_ATTRIBUTE((nonnull, warn_unused_result))
+UNIV_INLINE
+ib_uint64_t
+page_get_autoinc(const page_t* page);
+
+/*************************************************************//**
+Returns the RTREE SPLIT SEQUENCE NUMBER (FIL_RTREE_SPLIT_SEQ_NUM).
+@return SPLIT SEQUENCE NUMBER */
+UNIV_INLINE
+node_seq_t
+page_get_ssn_id(
+/*============*/
+ const page_t* page); /*!< in: page */
+/*************************************************************//**
+Sets the RTREE SPLIT SEQUENCE NUMBER field value */
+UNIV_INLINE
+void
+page_set_ssn_id(
+/*============*/
+ buf_block_t* block, /*!< in/out: page */
+ page_zip_des_t* page_zip,/*!< in/out: compressed page whose
+ uncompressed part will be updated, or NULL */
+ node_seq_t ssn_id, /*!< in: split sequence id */
+ mtr_t* mtr); /*!< in/out: mini-transaction */
+
+#endif /* !UNIV_INNOCHECKSUM */
/*************************************************************//**
Reads the given header field. */
UNIV_INLINE
-ulint
+uint16_t
page_header_get_field(
/*==================*/
const page_t* page, /*!< in: page */
ulint field); /*!< in: PAGE_N_DIR_SLOTS, ... */
+
+#ifndef UNIV_INNOCHECKSUM
/*************************************************************//**
Sets the given header field. */
UNIV_INLINE
@@ -233,14 +454,14 @@ page_header_set_field(
ulint val); /*!< in: value */
/*************************************************************//**
Returns the offset stored in the given header field.
-@return offset from the start of the page, or 0 */
+@return offset from the start of the page, or 0 */
UNIV_INLINE
-ulint
+uint16_t
page_header_get_offs(
/*=================*/
const page_t* page, /*!< in: page */
ulint field) /*!< in: PAGE_FREE, ... */
- MY_ATTRIBUTE((nonnull, pure));
+ MY_ATTRIBUTE((warn_unused_result));
/*************************************************************//**
Returns the pointer stored in the given header field, or NULL. */
@@ -258,7 +479,7 @@ page_header_set_ptr(
uncompressed part will be updated, or NULL */
ulint field, /*!< in/out: PAGE_FREE, ... */
const byte* ptr); /*!< in: pointer or NULL*/
-#ifndef UNIV_HOTBACKUP
+
/*************************************************************//**
Resets the last insert info field in the page header. Writes to mlog
about this operation. */
@@ -270,31 +491,13 @@ page_header_reset_last_insert(
page_zip_des_t* page_zip,/*!< in/out: compressed page whose
uncompressed part will be updated, or NULL */
mtr_t* mtr); /*!< in: mtr */
-#endif /* !UNIV_HOTBACKUP */
-/************************************************************//**
-Gets the offset of the first record on the page.
-@return offset of the first record in record list, relative from page */
-UNIV_INLINE
-ulint
-page_get_infimum_offset(
-/*====================*/
- const page_t* page); /*!< in: page which must have record(s) */
-/************************************************************//**
-Gets the offset of the last record on the page.
-@return offset of the last record in record list, relative from page */
-UNIV_INLINE
-ulint
-page_get_supremum_offset(
-/*=====================*/
- const page_t* page); /*!< in: page which must have record(s) */
#define page_get_infimum_rec(page) ((page) + page_get_infimum_offset(page))
#define page_get_supremum_rec(page) ((page) + page_get_supremum_offset(page))
/************************************************************//**
Returns the nth record of the record list.
This is the inverse function of page_rec_get_n_recs_before().
-@return nth record */
-UNIV_INTERN
+@return nth record */
const rec_t*
page_rec_get_nth_const(
/*===================*/
@@ -304,7 +507,7 @@ page_rec_get_nth_const(
/************************************************************//**
Returns the nth record of the record list.
This is the inverse function of page_rec_get_n_recs_before().
-@return nth record */
+@return nth record */
UNIV_INLINE
rec_t*
page_rec_get_nth(
@@ -313,12 +516,11 @@ page_rec_get_nth(
ulint nth) /*!< in: nth record */
MY_ATTRIBUTE((nonnull, warn_unused_result));
-#ifndef UNIV_HOTBACKUP
/************************************************************//**
Returns the middle record of the records on the page. If there is an
even number of records in the list, returns the first record of the
upper half-list.
-@return middle record */
+@return middle record */
UNIV_INLINE
rec_t*
page_get_middle_rec(
@@ -326,75 +528,47 @@ page_get_middle_rec(
page_t* page) /*!< in: page */
MY_ATTRIBUTE((nonnull, warn_unused_result));
/*************************************************************//**
-Compares a data tuple to a physical record. Differs from the function
-cmp_dtuple_rec_with_match in the way that the record must reside on an
-index page, and also page infimum and supremum records can be given in
-the parameter rec. These are considered as the negative infinity and
-the positive infinity in the alphabetical order.
-@return 1, 0, -1, if dtuple is greater, equal, less than rec,
-respectively, when only the common first fields are compared */
-UNIV_INLINE
-int
-page_cmp_dtuple_rec_with_match(
-/*===========================*/
- const dtuple_t* dtuple, /*!< in: data tuple */
- const rec_t* rec, /*!< in: physical record on a page; may also
- be page infimum or supremum, in which case
- matched-parameter values below are not
- affected */
- const ulint* offsets,/*!< in: array returned by rec_get_offsets() */
- ulint* matched_fields, /*!< in/out: number of already completely
- matched fields; when function returns
- contains the value for current comparison */
- ulint* matched_bytes); /*!< in/out: number of already matched
- bytes within the first field not completely
- matched; when function returns contains the
- value for current comparison */
-#endif /* !UNIV_HOTBACKUP */
-#endif /* !UNIV_INNOCHECKSUM */
-/*************************************************************//**
Gets the page number.
-@return page number */
+@return page number */
UNIV_INLINE
ulint
page_get_page_no(
/*=============*/
const page_t* page); /*!< in: page */
-#ifndef UNIV_INNOCHECKSUM
+
/*************************************************************//**
Gets the tablespace identifier.
-@return space id */
+@return space id */
UNIV_INLINE
ulint
page_get_space_id(
/*==============*/
const page_t* page); /*!< in: page */
-#endif /* !UNIV_INNOCHECKSUM */
+
/*************************************************************//**
Gets the number of user records on page (the infimum and supremum records
are not user records).
-@return number of user records */
+@return number of user records */
UNIV_INLINE
-ulint
+uint16_t
page_get_n_recs(
/*============*/
const page_t* page); /*!< in: index page */
-#ifndef UNIV_INNOCHECKSUM
+
/***************************************************************//**
Returns the number of records before the given record in chain.
The number includes infimum and supremum records.
This is the inverse function of page_rec_get_nth().
-@return number of records */
-UNIV_INTERN
+@return number of records */
ulint
page_rec_get_n_recs_before(
/*=======================*/
const rec_t* rec); /*!< in: the physical record */
/*************************************************************//**
Gets the number of records in the heap.
-@return number of user records */
+@return number of user records */
UNIV_INLINE
-ulint
+uint16_t
page_dir_get_n_heap(
/*================*/
const page_t* page); /*!< in: index page */
@@ -413,9 +587,9 @@ page_dir_set_n_heap(
ulint n_heap);/*!< in: number of records */
/*************************************************************//**
Gets the number of dir slots in directory.
-@return number of slots */
+@return number of slots */
UNIV_INLINE
-ulint
+uint16_t
page_dir_get_n_slots(
/*=================*/
const page_t* page); /*!< in: index page */
@@ -432,7 +606,7 @@ page_dir_set_n_slots(
#ifdef UNIV_DEBUG
/*************************************************************//**
Gets pointer to nth directory slot.
-@return pointer to dir slot */
+@return pointer to dir slot */
UNIV_INLINE
page_dir_slot_t*
page_dir_get_nth_slot(
@@ -440,13 +614,13 @@ page_dir_get_nth_slot(
const page_t* page, /*!< in: index page */
ulint n); /*!< in: position */
#else /* UNIV_DEBUG */
-# define page_dir_get_nth_slot(page, n) \
- ((page) + UNIV_PAGE_SIZE - PAGE_DIR \
- - (n + 1) * PAGE_DIR_SLOT_SIZE)
+# define page_dir_get_nth_slot(page, n) \
+ ((page) + (srv_page_size - PAGE_DIR \
+ - (n + 1) * PAGE_DIR_SLOT_SIZE))
#endif /* UNIV_DEBUG */
/**************************************************************//**
Used to check the consistency of a record on a page.
-@return TRUE if succeed */
+@return TRUE if succeed */
UNIV_INLINE
ibool
page_rec_check(
@@ -454,7 +628,7 @@ page_rec_check(
const rec_t* rec); /*!< in: record */
/***************************************************************//**
Gets the record pointed to by a directory slot.
-@return pointer to record */
+@return pointer to record */
UNIV_INLINE
const rec_t*
page_dir_slot_get_rec(
@@ -470,7 +644,7 @@ page_dir_slot_set_rec(
rec_t* rec); /*!< in: record on the page */
/***************************************************************//**
Gets the number of records owned by a directory slot.
-@return number of records */
+@return number of records */
UNIV_INLINE
ulint
page_dir_slot_get_n_owned(
@@ -497,69 +671,54 @@ page_dir_calc_reserved_space(
ulint n_recs); /*!< in: number of records */
/***************************************************************//**
Looks for the directory slot which owns the given record.
-@return the directory slot number */
-UNIV_INTERN
+@return the directory slot number */
ulint
page_dir_find_owner_slot(
/*=====================*/
const rec_t* rec); /*!< in: the physical record */
-/************************************************************//**
-Determine whether the page is in new-style compact format.
-@return nonzero if the page is in compact format, zero if it is in
-old-style format */
-UNIV_INLINE
-ulint
-page_is_comp(
-/*=========*/
- const page_t* page); /*!< in: index page */
-/************************************************************//**
-TRUE if the record is on a page in compact format.
-@return nonzero if in compact format */
-UNIV_INLINE
-ulint
-page_rec_is_comp(
-/*=============*/
- const rec_t* rec); /*!< in: record */
+
/***************************************************************//**
Returns the heap number of a record.
-@return heap number */
+@return heap number */
UNIV_INLINE
ulint
page_rec_get_heap_no(
/*=================*/
const rec_t* rec); /*!< in: the physical record */
-#endif /* !UNIV_INNOCHECKSUM */
-/************************************************************//**
-Determine whether the page is a B-tree leaf.
-@return true if the page is a B-tree leaf (PAGE_LEVEL = 0) */
-UNIV_INLINE
-bool
-page_is_leaf(
-/*=========*/
- const page_t* page) /*!< in: page */
- MY_ATTRIBUTE((nonnull, pure));
-#ifndef UNIV_INNOCHECKSUM
-/************************************************************//**
-Determine whether the page is empty.
-@return true if the page is empty (PAGE_N_RECS = 0) */
-UNIV_INLINE
-bool
-page_is_empty(
-/*==========*/
- const page_t* page) /*!< in: page */
- MY_ATTRIBUTE((nonnull, pure));
-/************************************************************//**
-Determine whether the page contains garbage.
-@return true if the page contains garbage (PAGE_GARBAGE is not 0) */
-UNIV_INLINE
-bool
-page_has_garbage(
-/*=============*/
- const page_t* page) /*!< in: page */
- MY_ATTRIBUTE((nonnull, pure));
+
+/** Determine whether a page has any siblings.
+@param[in] page page frame
+@return true if the page has any siblings */
+inline bool page_has_siblings(const page_t* page)
+{
+ compile_time_assert(!(FIL_PAGE_PREV % 8));
+ compile_time_assert(FIL_PAGE_NEXT == FIL_PAGE_PREV + 4);
+ compile_time_assert(FIL_NULL == 0xffffffff);
+ return *reinterpret_cast<const uint64_t*>(page + FIL_PAGE_PREV)
+ != ~uint64_t(0);
+}
+
+/** Determine whether a page has a predecessor.
+@param[in] page page frame
+@return true if the page has a predecessor */
+inline bool page_has_prev(const page_t* page)
+{
+ return *reinterpret_cast<const uint32_t*>(page + FIL_PAGE_PREV)
+ != FIL_NULL;
+}
+
+/** Determine whether a page has a successor.
+@param[in] page page frame
+@return true if the page has a successor */
+inline bool page_has_next(const page_t* page)
+{
+ return *reinterpret_cast<const uint32_t*>(page + FIL_PAGE_NEXT)
+ != FIL_NULL;
+}
+
/************************************************************//**
Gets the pointer to the next record on the page.
-@return pointer to next record */
+@return pointer to next record */
UNIV_INLINE
const rec_t*
page_rec_get_next_low(
@@ -568,7 +727,7 @@ page_rec_get_next_low(
ulint comp); /*!< in: nonzero=compact page layout */
/************************************************************//**
Gets the pointer to the next record on the page.
-@return pointer to next record */
+@return pointer to next record */
UNIV_INLINE
rec_t*
page_rec_get_next(
@@ -576,7 +735,7 @@ page_rec_get_next(
rec_t* rec); /*!< in: pointer to record */
/************************************************************//**
Gets the pointer to the next record on the page.
-@return pointer to next record */
+@return pointer to next record */
UNIV_INLINE
const rec_t*
page_rec_get_next_const(
@@ -586,7 +745,7 @@ page_rec_get_next_const(
Gets the pointer to the next non delete-marked record on the page.
If all subsequent records are delete-marked, then this function
will return the supremum record.
-@return pointer to next non delete-marked record or pointer to supremum */
+@return pointer to next non delete-marked record or pointer to supremum */
UNIV_INLINE
const rec_t*
page_rec_get_next_non_del_marked(
@@ -604,7 +763,7 @@ page_rec_set_next(
must not be page infimum */
/************************************************************//**
Gets the pointer to the previous record.
-@return pointer to previous record */
+@return pointer to previous record */
UNIV_INLINE
const rec_t*
page_rec_get_prev_const(
@@ -613,94 +772,87 @@ page_rec_get_prev_const(
infimum */
/************************************************************//**
Gets the pointer to the previous record.
-@return pointer to previous record */
+@return pointer to previous record */
UNIV_INLINE
rec_t*
page_rec_get_prev(
/*==============*/
rec_t* rec); /*!< in: pointer to record,
must not be page infimum */
+
/************************************************************//**
-TRUE if the record is a user record on the page.
-@return TRUE if a user record */
-UNIV_INLINE
-ibool
-page_rec_is_user_rec_low(
-/*=====================*/
- ulint offset) /*!< in: record offset on page */
- MY_ATTRIBUTE((const));
-/************************************************************//**
-TRUE if the record is the supremum record on a page.
-@return TRUE if the supremum record */
+true if the record is the first user record on a page.
+@return true if the first user record */
UNIV_INLINE
-ibool
-page_rec_is_supremum_low(
-/*=====================*/
- ulint offset) /*!< in: record offset on page */
- MY_ATTRIBUTE((const));
+bool
+page_rec_is_first(
+/*==============*/
+ const rec_t* rec, /*!< in: record */
+ const page_t* page) /*!< in: page */
+ MY_ATTRIBUTE((warn_unused_result));
+
/************************************************************//**
-TRUE if the record is the infimum record on a page.
-@return TRUE if the infimum record */
+true if the record is the second user record on a page.
+@return true if the second user record */
UNIV_INLINE
-ibool
-page_rec_is_infimum_low(
-/*====================*/
- ulint offset) /*!< in: record offset on page */
- MY_ATTRIBUTE((const));
+bool
+page_rec_is_second(
+/*===============*/
+ const rec_t* rec, /*!< in: record */
+ const page_t* page) /*!< in: page */
+ MY_ATTRIBUTE((warn_unused_result));
/************************************************************//**
-TRUE if the record is a user record on the page.
-@return TRUE if a user record */
+true if the record is the last user record on a page.
+@return true if the last user record */
UNIV_INLINE
-ibool
-page_rec_is_user_rec(
-/*=================*/
- const rec_t* rec) /*!< in: record */
- MY_ATTRIBUTE((const));
+bool
+page_rec_is_last(
+/*=============*/
+ const rec_t* rec, /*!< in: record */
+ const page_t* page) /*!< in: page */
+ MY_ATTRIBUTE((warn_unused_result));
+
/************************************************************//**
-TRUE if the record is the supremum record on a page.
-@return TRUE if the supremum record */
+true if distance between the records (measured in number of times we have to
+move to the next record) is at most the specified value
+@param[in] left_rec lefter record
+@param[in] right_rec righter record
+@param[in] val specified value to compare
+@return true if the distance is smaller than the value */
UNIV_INLINE
-ibool
-page_rec_is_supremum(
-/*=================*/
- const rec_t* rec) /*!< in: record */
- MY_ATTRIBUTE((const));
+bool
+page_rec_distance_is_at_most(
+/*=========================*/
+ const rec_t* left_rec,
+ const rec_t* right_rec,
+ ulint val)
+ MY_ATTRIBUTE((warn_unused_result));
/************************************************************//**
-TRUE if the record is the infimum record on a page.
-@return TRUE if the infimum record */
+true if the record is the second last user record on a page.
+@return true if the second last user record */
UNIV_INLINE
-ibool
-page_rec_is_infimum(
-/*================*/
- const rec_t* rec) /*!< in: record */
- MY_ATTRIBUTE((const));
+bool
+page_rec_is_second_last(
+/*====================*/
+ const rec_t* rec, /*!< in: record */
+ const page_t* page) /*!< in: page */
+ MY_ATTRIBUTE((warn_unused_result));
+
/***************************************************************//**
Looks for the record which owns the given record.
-@return the owner record */
+@return the owner record */
UNIV_INLINE
rec_t*
page_rec_find_owner_rec(
/*====================*/
rec_t* rec); /*!< in: the physical record */
-#ifndef UNIV_HOTBACKUP
-/***********************************************************************//**
-Write a 32-bit field in a data dictionary record. */
-UNIV_INLINE
-void
-page_rec_write_field(
-/*=================*/
- rec_t* rec, /*!< in/out: record to update */
- ulint i, /*!< in: index of the field to update */
- ulint val, /*!< in: value to write */
- mtr_t* mtr) /*!< in/out: mini-transaction */
- MY_ATTRIBUTE((nonnull));
-#endif /* !UNIV_HOTBACKUP */
+
/************************************************************//**
Returns the maximum combined size of records which can be inserted on top
of record heap.
-@return maximum combined size for inserted records */
+@return maximum combined size for inserted records */
UNIV_INLINE
ulint
page_get_max_insert_size(
@@ -710,7 +862,7 @@ page_get_max_insert_size(
/************************************************************//**
Returns the maximum combined size of records which can be inserted on top
of record heap if page is first reorganized.
-@return maximum combined size for inserted records */
+@return maximum combined size for inserted records */
UNIV_INLINE
ulint
page_get_max_insert_size_after_reorganize(
@@ -719,7 +871,7 @@ page_get_max_insert_size_after_reorganize(
ulint n_recs);/*!< in: number of records */
/*************************************************************//**
Calculates free space if a page is emptied.
-@return free space */
+@return free space */
UNIV_INLINE
ulint
page_get_free_space_of_empty(
@@ -729,7 +881,7 @@ page_get_free_space_of_empty(
/**********************************************************//**
Returns the base extra size of a physical record. This is the
size of the fixed header, independent of the record size.
-@return REC_N_NEW_EXTRA_BYTES or REC_N_OLD_EXTRA_BYTES */
+@return REC_N_NEW_EXTRA_BYTES or REC_N_OLD_EXTRA_BYTES */
UNIV_INLINE
ulint
page_rec_get_base_extra_size(
@@ -738,9 +890,9 @@ page_rec_get_base_extra_size(
/************************************************************//**
Returns the sum of the sizes of the records in the record list
excluding the infimum and supremum records.
-@return data in bytes */
+@return data in bytes */
UNIV_INLINE
-ulint
+uint16_t
page_get_data_size(
/*===============*/
const page_t* page); /*!< in: index page */
@@ -760,8 +912,7 @@ page_mem_alloc_free(
ulint need); /*!< in: number of bytes allocated */
/************************************************************//**
Allocates a block of memory from the heap of an index page.
-@return pointer to start of allocated buffer, or NULL if allocation fails */
-UNIV_INTERN
+@return pointer to start of allocated buffer, or NULL if allocation fails */
byte*
page_mem_alloc_heap(
/*================*/
@@ -785,36 +936,42 @@ page_mem_free(
rec_t* rec, /*!< in: pointer to the (origin of)
record */
const dict_index_t* index, /*!< in: index of rec */
- const ulint* offsets);/*!< in: array returned by
+ const offset_t* offsets);/*!< in: array returned by
rec_get_offsets() */
/**********************************************************//**
Create an uncompressed B-tree index page.
-@return pointer to the page */
-UNIV_INTERN
+@return pointer to the page */
page_t*
page_create(
/*========*/
buf_block_t* block, /*!< in: a buffer block where the
page is created */
mtr_t* mtr, /*!< in: mini-transaction handle */
- ulint comp); /*!< in: nonzero=compact page format */
+ ulint comp, /*!< in: nonzero=compact page format */
+ bool is_rtree); /*!< in: if creating R-tree page */
/**********************************************************//**
Create a compressed B-tree index page.
-@return pointer to the page */
-UNIV_INTERN
+@return pointer to the page */
page_t*
page_create_zip(
/*============*/
- buf_block_t* block, /*!< in/out: a buffer frame where the
- page is created */
- dict_index_t* index, /*!< in: the index of the page */
- ulint level, /*!< in: the B-tree level of the page */
- trx_id_t max_trx_id, /*!< in: PAGE_MAX_TRX_ID */
- mtr_t* mtr) /*!< in/out: mini-transaction */
- MY_ATTRIBUTE((nonnull));
+ buf_block_t* block, /*!< in/out: a buffer frame
+ where the page is created */
+ dict_index_t* index, /*!< in: the index of the
+ page, or NULL when applying
+ TRUNCATE log
+ record during recovery */
+ ulint level, /*!< in: the B-tree level of
+ the page */
+ trx_id_t max_trx_id, /*!< in: PAGE_MAX_TRX_ID */
+ const redo_page_compress_t* page_comp_info,
+ /*!< in: used for applying
+ TRUNCATE log
+ record during recovery */
+ mtr_t* mtr); /*!< in/out: mini-transaction
+ handle */
/**********************************************************//**
Empty a previously created B-tree index page. */
-UNIV_INTERN
void
page_create_empty(
/*==============*/
@@ -830,7 +987,6 @@ IMPORTANT: The caller will have to update IBUF_BITMAP_FREE
if new_block is a compressed leaf page in a secondary index.
This has to be done either within the same mini-transaction,
or by invoking ibuf_reset_free_bits() before mtr_commit(). */
-UNIV_INTERN
void
page_copy_rec_list_end_no_locks(
/*============================*/
@@ -851,7 +1007,6 @@ or by invoking ibuf_reset_free_bits() before mtr_commit().
@return pointer to the original successor of the infimum record on
new_page, or NULL on zip overflow (new_block will be decompressed) */
-UNIV_INTERN
rec_t*
page_copy_rec_list_end(
/*===================*/
@@ -873,7 +1028,6 @@ or by invoking ibuf_reset_free_bits() before mtr_commit().
@return pointer to the original predecessor of the supremum record on
new_page, or NULL on zip overflow (new_block will be decompressed) */
-UNIV_INTERN
rec_t*
page_copy_rec_list_start(
/*=====================*/
@@ -886,7 +1040,6 @@ page_copy_rec_list_start(
/*************************************************************//**
Deletes records from a page from a given record onward, including that record.
The infimum and supremum records are not deleted. */
-UNIV_INTERN
void
page_delete_rec_list_end(
/*=====================*/
@@ -903,7 +1056,6 @@ page_delete_rec_list_end(
/*************************************************************//**
Deletes records from page, up to the given record, NOT including
that record. Infimum and supremum records are not deleted. */
-UNIV_INTERN
void
page_delete_rec_list_start(
/*=======================*/
@@ -923,7 +1075,6 @@ or by invoking ibuf_reset_free_bits() before mtr_commit().
@return TRUE on success; FALSE on compression failure (new_block will
be decompressed) */
-UNIV_INTERN
ibool
page_move_rec_list_end(
/*===================*/
@@ -942,8 +1093,7 @@ if new_block is a compressed leaf page in a secondary index.
This has to be done either within the same mini-transaction,
or by invoking ibuf_reset_free_bits() before mtr_commit().
-@return TRUE on success; FALSE on compression failure */
-UNIV_INTERN
+@return TRUE on success; FALSE on compression failure */
ibool
page_move_rec_list_start(
/*=====================*/
@@ -955,7 +1105,6 @@ page_move_rec_list_start(
MY_ATTRIBUTE((nonnull(1, 2, 4, 5)));
/****************************************************************//**
Splits a directory slot which owns too many records. */
-UNIV_INTERN
void
page_dir_split_slot(
/*================*/
@@ -969,7 +1118,6 @@ Tries to balance the given directory slot with too few records
with the upper neighbor, so that there are at least the minimum number
of records owned by the slot; this may result in the merging of
two slots. */
-UNIV_INTERN
void
page_dir_balance_slot(
/*==================*/
@@ -979,12 +1127,11 @@ page_dir_balance_slot(
MY_ATTRIBUTE((nonnull(1)));
/**********************************************************//**
Parses a log record of a record list end or start deletion.
-@return end of log record or NULL */
-UNIV_INTERN
+@return end of log record or NULL */
byte*
page_parse_delete_rec_list(
/*=======================*/
- byte type, /*!< in: MLOG_LIST_END_DELETE,
+ mlog_id_t type, /*!< in: MLOG_LIST_END_DELETE,
MLOG_LIST_START_DELETE,
MLOG_COMP_LIST_END_DELETE or
MLOG_COMP_LIST_START_DELETE */
@@ -993,33 +1140,28 @@ page_parse_delete_rec_list(
buf_block_t* block, /*!< in/out: buffer block or NULL */
dict_index_t* index, /*!< in: record descriptor */
mtr_t* mtr); /*!< in: mtr or NULL */
-/***********************************************************//**
-Parses a redo log record of creating a page.
-@return end of log record or NULL */
-UNIV_INTERN
-byte*
+/** Parses a redo log record of creating a page.
+@param[in,out] block buffer block, or NULL
+@param[in] comp nonzero=compact page format
+@param[in] is_rtree whether it is rtree page */
+void
page_parse_create(
-/*==============*/
- byte* ptr, /*!< in: buffer */
- byte* end_ptr,/*!< in: buffer end */
- ulint comp, /*!< in: nonzero=compact page format */
- buf_block_t* block, /*!< in: block or NULL */
- mtr_t* mtr); /*!< in: mtr or NULL */
-#ifndef UNIV_HOTBACKUP
+ buf_block_t* block,
+ ulint comp,
+ bool is_rtree);
+
/************************************************************//**
Prints record contents including the data relevant only in
the index page context. */
-UNIV_INTERN
void
page_rec_print(
/*===========*/
const rec_t* rec, /*!< in: physical record */
- const ulint* offsets);/*!< in: record descriptor */
+ const offset_t* offsets);/*!< in: record descriptor */
# ifdef UNIV_BTR_PRINT
/***************************************************************//**
This is used to print the contents of the directory for
debugging purposes. */
-UNIV_INTERN
void
page_dir_print(
/*===========*/
@@ -1028,7 +1170,6 @@ page_dir_print(
/***************************************************************//**
This is used to print the contents of the page record list for
debugging purposes. */
-UNIV_INTERN
void
page_print_list(
/*============*/
@@ -1037,7 +1178,6 @@ page_print_list(
ulint pr_n); /*!< in: print n first and n last entries */
/***************************************************************//**
Prints the info in a page header. */
-UNIV_INTERN
void
page_header_print(
/*==============*/
@@ -1045,7 +1185,6 @@ page_header_print(
/***************************************************************//**
This is used to print the contents of the page for
debugging purposes. */
-UNIV_INTERN
void
page_print(
/*=======*/
@@ -1056,33 +1195,31 @@ page_print(
ulint rn); /*!< in: print rn first and last records
in directory */
# endif /* UNIV_BTR_PRINT */
-#endif /* !UNIV_HOTBACKUP */
/***************************************************************//**
The following is used to validate a record on a page. This function
differs from rec_validate as it can also check the n_owned field and
the heap_no field.
-@return TRUE if ok */
-UNIV_INTERN
+@return TRUE if ok */
ibool
page_rec_validate(
/*==============*/
const rec_t* rec, /*!< in: physical record */
- const ulint* offsets);/*!< in: array returned by rec_get_offsets() */
+ const offset_t* offsets);/*!< in: array returned by rec_get_offsets() */
+#ifdef UNIV_DEBUG
/***************************************************************//**
Checks that the first directory slot points to the infimum record and
the last to the supremum. This function is intended to track if the
bug fixed in 4.0.14 has caused corruption to users' databases. */
-UNIV_INTERN
void
page_check_dir(
/*===========*/
const page_t* page); /*!< in: index page */
+#endif /* UNIV_DEBUG */
/***************************************************************//**
This function checks the consistency of an index page when we do not
know the index. This is also resilient so that this should never crash
even if the page is total garbage.
-@return TRUE if ok */
-UNIV_INTERN
+@return TRUE if ok */
ibool
page_simple_validate_old(
/*=====================*/
@@ -1091,16 +1228,14 @@ page_simple_validate_old(
This function checks the consistency of an index page when we do not
know the index. This is also resilient so that this should never crash
even if the page is total garbage.
-@return TRUE if ok */
-UNIV_INTERN
+@return TRUE if ok */
ibool
page_simple_validate_new(
/*=====================*/
const page_t* page); /*!< in: index page in ROW_FORMAT!=REDUNDANT */
/***************************************************************//**
This function checks the consistency of an index page.
-@return TRUE if ok */
-UNIV_INTERN
+@return TRUE if ok */
ibool
page_validate(
/*==========*/
@@ -1109,8 +1244,7 @@ page_validate(
the page record type definition */
/***************************************************************//**
Looks in the page record list for a record with the given heap number.
-@return record, NULL if not found */
-
+@return record, NULL if not found */
const rec_t*
page_find_rec_with_heap_no(
/*=======================*/
@@ -1120,7 +1254,6 @@ page_find_rec_with_heap_no(
@param[in] page index tree leaf page
@return the last record, not delete-marked
@retval infimum record if all records are delete-marked */
-
const rec_t*
page_find_rec_max_not_deleted(
const page_t* page);
@@ -1132,9 +1265,7 @@ page_find_rec_max_not_deleted(
#endif /* !UNIV_INNOCHECKSUM */
-#ifndef UNIV_NONINL
#include "page0page.ic"
-#endif
#endif
diff --git a/storage/innobase/include/page0page.ic b/storage/innobase/include/page0page.ic
index 638ff609e67..75bfa56e2a6 100644
--- a/storage/innobase/include/page0page.ic
+++ b/storage/innobase/include/page0page.ic
@@ -1,6 +1,7 @@
/*****************************************************************************
-Copyright (c) 1994, 2013, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1994, 2019, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2016, 2020, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -23,15 +24,15 @@ Index page routines
Created 2/2/1994 Heikki Tuuri
*******************************************************/
-#ifndef UNIV_INNOCHECKSUM
+#ifndef page0page_ic
+#define page0page_ic
+#ifndef UNIV_INNOCHECKSUM
#include "mach0data.h"
#ifdef UNIV_DEBUG
# include "log0recv.h"
#endif /* !UNIV_DEBUG */
-#ifndef UNIV_HOTBACKUP
-# include "rem0cmp.h"
-#endif /* !UNIV_HOTBACKUP */
+#include "rem0cmp.h"
#include "mtr0log.h"
#include "page0zip.h"
@@ -40,31 +41,6 @@ Created 2/2/1994 Heikki Tuuri
#define UNIV_INLINE
#endif
-#endif /* !UNIV_INNOCHECKSUM */
-/************************************************************//**
-Gets the start of a page.
-@return start of the page */
-UNIV_INLINE
-page_t*
-page_align(
-/*=======*/
- const void* ptr) /*!< in: pointer to page frame */
-{
- return((page_t*) ut_align_down(ptr, UNIV_PAGE_SIZE));
-}
-
-#ifndef UNIV_INNOCHECKSUM
-/************************************************************//**
-Gets the offset within a page.
-@return offset from the start of the page */
-UNIV_INLINE
-ulint
-page_offset(
-/*========*/
- const void* ptr) /*!< in: pointer to page frame */
-{
- return(ut_align_offset(ptr, UNIV_PAGE_SIZE));
-}
/*************************************************************//**
Returns the max trx id field value. */
UNIV_INLINE
@@ -108,11 +84,69 @@ page_update_max_trx_id(
}
}
+/** Read the AUTO_INCREMENT value from a clustered index root page.
+@param[in] page clustered index root page
+@return the persisted AUTO_INCREMENT value */
+UNIV_INLINE
+ib_uint64_t
+page_get_autoinc(const page_t* page)
+{
+ ut_ad(fil_page_index_page_check(page));
+ ut_ad(!page_has_siblings(page));
+ return(mach_read_from_8(PAGE_HEADER + PAGE_ROOT_AUTO_INC + page));
+}
+
+/*************************************************************//**
+Returns the RTREE SPLIT SEQUENCE NUMBER (FIL_RTREE_SPLIT_SEQ_NUM).
+@return SPLIT SEQUENCE NUMBER */
+UNIV_INLINE
+node_seq_t
+page_get_ssn_id(
+/*============*/
+ const page_t* page) /*!< in: page */
+{
+ ut_ad(page);
+
+ return(static_cast<node_seq_t>(
+ mach_read_from_8(page + FIL_RTREE_SPLIT_SEQ_NUM)));
+}
+
+/*************************************************************//**
+Sets the RTREE SPLIT SEQUENCE NUMBER field value */
+UNIV_INLINE
+void
+page_set_ssn_id(
+/*============*/
+ buf_block_t* block, /*!< in/out: page */
+ page_zip_des_t* page_zip,/*!< in/out: compressed page whose
+ uncompressed part will be updated, or NULL */
+ node_seq_t ssn_id, /*!< in: transaction id */
+ mtr_t* mtr) /*!< in/out: mini-transaction */
+{
+ page_t* page = buf_block_get_frame(block);
+
+ ut_ad(!mtr || mtr_memo_contains_flagged(mtr, block,
+ MTR_MEMO_PAGE_SX_FIX
+ | MTR_MEMO_PAGE_X_FIX));
+
+ if (page_zip) {
+ mach_write_to_8(page + FIL_RTREE_SPLIT_SEQ_NUM, ssn_id);
+ page_zip_write_header(page_zip,
+ page + FIL_RTREE_SPLIT_SEQ_NUM,
+ 8, mtr);
+ } else if (mtr) {
+ mlog_write_ull(page + FIL_RTREE_SPLIT_SEQ_NUM, ssn_id, mtr);
+ } else {
+ mach_write_to_8(page + FIL_RTREE_SPLIT_SEQ_NUM, ssn_id);
+ }
+}
+
#endif /* !UNIV_INNOCHECKSUM */
+
/*************************************************************//**
Reads the given header field. */
UNIV_INLINE
-ulint
+uint16_t
page_header_get_field(
/*==================*/
const page_t* page, /*!< in: page */
@@ -139,8 +173,11 @@ page_header_set_field(
{
ut_ad(page);
ut_ad(field <= PAGE_N_RECS);
- ut_ad(field == PAGE_N_HEAP || val < UNIV_PAGE_SIZE);
- ut_ad(field != PAGE_N_HEAP || (val & 0x7fff) < UNIV_PAGE_SIZE);
+#if 0 /* FIXME: MDEV-19344 hits this */
+ ut_ad(field != PAGE_N_RECS || val);
+#endif
+ ut_ad(field == PAGE_N_HEAP || val < srv_page_size);
+ ut_ad(field != PAGE_N_HEAP || (val & 0x7fff) < srv_page_size);
mach_write_to_2(page + PAGE_HEADER + field, val);
if (page_zip) {
@@ -151,21 +188,19 @@ page_header_set_field(
/*************************************************************//**
Returns the offset stored in the given header field.
-@return offset from the start of the page, or 0 */
+@return offset from the start of the page, or 0 */
UNIV_INLINE
-ulint
+uint16_t
page_header_get_offs(
/*=================*/
const page_t* page, /*!< in: page */
ulint field) /*!< in: PAGE_FREE, ... */
{
- ulint offs;
-
ut_ad((field == PAGE_FREE)
|| (field == PAGE_LAST_INSERT)
|| (field == PAGE_HEAP_TOP));
- offs = page_header_get_field(page, field);
+ uint16_t offs = page_header_get_field(page, field);
ut_ad((field != PAGE_HEAP_TOP) || offs);
@@ -194,7 +229,7 @@ page_header_set_ptr(
if (ptr == NULL) {
offs = 0;
} else {
- offs = ptr - page;
+ offs = ulint(ptr - page);
}
ut_ad((field != PAGE_HEAP_TOP) || offs);
@@ -202,7 +237,6 @@ page_header_set_ptr(
page_header_set_field(page, page_zip, field, offs);
}
-#ifndef UNIV_HOTBACKUP
/*************************************************************//**
Resets the last insert info field in the page header. Writes to mlog
about this operation. */
@@ -215,7 +249,8 @@ page_header_reset_last_insert(
uncompressed part will be updated, or NULL */
mtr_t* mtr) /*!< in: mtr */
{
- ut_ad(page && mtr);
+ ut_ad(page != NULL);
+ ut_ad(mtr != NULL);
if (page_zip) {
mach_write_to_2(page + (PAGE_HEADER + PAGE_LAST_INSERT), 0);
@@ -227,26 +262,10 @@ page_header_reset_last_insert(
MLOG_2BYTES, mtr);
}
}
-#endif /* !UNIV_HOTBACKUP */
-
-#endif /* !UNIV_INNOCHECKSUM */
-
-#ifndef UNIV_INNOCHECKSUM
-/************************************************************//**
-TRUE if the record is on a page in compact format.
-@return nonzero if in compact format */
-UNIV_INLINE
-ulint
-page_rec_is_comp(
-/*=============*/
- const rec_t* rec) /*!< in: record */
-{
- return(page_is_comp(page_align(rec)));
-}
/***************************************************************//**
Returns the heap number of a record.
-@return heap number */
+@return heap number */
UNIV_INLINE
ulint
page_rec_get_heap_no(
@@ -260,195 +279,126 @@ page_rec_get_heap_no(
}
}
-#endif /* !UNIV_INNOCHECKSUM */
-/************************************************************//**
-Determine whether the page is a B-tree leaf.
-@return true if the page is a B-tree leaf (PAGE_LEVEL = 0) */
-UNIV_INLINE
+/** Determine whether an index page record is a user record.
+@param[in] rec record in an index page
+@return true if a user record */
+inline
bool
-page_is_leaf(
-/*=========*/
- const page_t* page) /*!< in: page */
+page_rec_is_user_rec(const rec_t* rec)
{
- return(!*(const uint16*) (page + (PAGE_HEADER + PAGE_LEVEL)));
+ ut_ad(page_rec_check(rec));
+ return(page_rec_is_user_rec_low(page_offset(rec)));
}
-#ifndef UNIV_INNOCHECKSUM
-/************************************************************//**
-Determine whether the page is empty.
-@return true if the page is empty (PAGE_N_RECS = 0) */
-UNIV_INLINE
+/** Determine whether an index page record is the supremum record.
+@param[in] rec record in an index page
+@return true if the supremum record */
+inline
bool
-page_is_empty(
-/*==========*/
- const page_t* page) /*!< in: page */
+page_rec_is_supremum(const rec_t* rec)
{
- return(!*(const uint16*) (page + (PAGE_HEADER + PAGE_N_RECS)));
+ ut_ad(page_rec_check(rec));
+ return(page_rec_is_supremum_low(page_offset(rec)));
}
-/************************************************************//**
-Determine whether the page contains garbage.
-@return true if the page contains garbage (PAGE_GARBAGE is not 0) */
-UNIV_INLINE
+/** Determine whether an index page record is the infimum record.
+@param[in] rec record in an index page
+@return true if the infimum record */
+inline
bool
-page_has_garbage(
-/*=============*/
- const page_t* page) /*!< in: page */
+page_rec_is_infimum(const rec_t* rec)
{
- return(!!*(const uint16*) (page + (PAGE_HEADER + PAGE_GARBAGE)));
+ ut_ad(page_rec_check(rec));
+ return(page_rec_is_infimum_low(page_offset(rec)));
}
/************************************************************//**
-Gets the offset of the first record on the page.
-@return offset of the first record in record list, relative from page */
+true if the record is the first user record on a page.
+@return true if the first user record */
UNIV_INLINE
-ulint
-page_get_infimum_offset(
-/*====================*/
- const page_t* page) /*!< in: page which must have record(s) */
+bool
+page_rec_is_first(
+/*==============*/
+ const rec_t* rec, /*!< in: record */
+ const page_t* page) /*!< in: page */
{
- ut_ad(page);
- ut_ad(!page_offset(page));
+ ut_ad(page_get_n_recs(page) > 0);
- if (page_is_comp(page)) {
- return(PAGE_NEW_INFIMUM);
- } else {
- return(PAGE_OLD_INFIMUM);
- }
+ return(page_rec_get_next_const(page_get_infimum_rec(page)) == rec);
}
/************************************************************//**
-Gets the offset of the last record on the page.
-@return offset of the last record in record list, relative from page */
+true if the record is the second user record on a page.
+@return true if the second user record */
UNIV_INLINE
-ulint
-page_get_supremum_offset(
-/*=====================*/
- const page_t* page) /*!< in: page which must have record(s) */
+bool
+page_rec_is_second(
+/*===============*/
+ const rec_t* rec, /*!< in: record */
+ const page_t* page) /*!< in: page */
{
- ut_ad(page);
- ut_ad(!page_offset(page));
+ ut_ad(page_get_n_recs(page) > 1);
- if (page_is_comp(page)) {
- return(PAGE_NEW_SUPREMUM);
- } else {
- return(PAGE_OLD_SUPREMUM);
- }
+ return(page_rec_get_next_const(
+ page_rec_get_next_const(page_get_infimum_rec(page))) == rec);
}
/************************************************************//**
-TRUE if the record is a user record on the page.
-@return TRUE if a user record */
+true if the record is the last user record on a page.
+@return true if the last user record */
UNIV_INLINE
-ibool
-page_rec_is_user_rec_low(
-/*=====================*/
- ulint offset) /*!< in: record offset on page */
+bool
+page_rec_is_last(
+/*=============*/
+ const rec_t* rec, /*!< in: record */
+ const page_t* page) /*!< in: page */
{
- ut_ad(offset >= PAGE_NEW_INFIMUM);
-#if PAGE_OLD_INFIMUM < PAGE_NEW_INFIMUM
-# error "PAGE_OLD_INFIMUM < PAGE_NEW_INFIMUM"
-#endif
-#if PAGE_OLD_SUPREMUM < PAGE_NEW_SUPREMUM
-# error "PAGE_OLD_SUPREMUM < PAGE_NEW_SUPREMUM"
-#endif
-#if PAGE_NEW_INFIMUM > PAGE_OLD_SUPREMUM
-# error "PAGE_NEW_INFIMUM > PAGE_OLD_SUPREMUM"
-#endif
-#if PAGE_OLD_INFIMUM > PAGE_NEW_SUPREMUM
-# error "PAGE_OLD_INFIMUM > PAGE_NEW_SUPREMUM"
-#endif
-#if PAGE_NEW_SUPREMUM > PAGE_OLD_SUPREMUM_END
-# error "PAGE_NEW_SUPREMUM > PAGE_OLD_SUPREMUM_END"
-#endif
-#if PAGE_OLD_SUPREMUM > PAGE_NEW_SUPREMUM_END
-# error "PAGE_OLD_SUPREMUM > PAGE_NEW_SUPREMUM_END"
-#endif
- ut_ad(offset <= UNIV_PAGE_SIZE - PAGE_EMPTY_DIR_START);
+ ut_ad(page_get_n_recs(page) > 0);
- return(offset != PAGE_NEW_SUPREMUM
- && offset != PAGE_NEW_INFIMUM
- && offset != PAGE_OLD_INFIMUM
- && offset != PAGE_OLD_SUPREMUM);
+ return(page_rec_get_next_const(rec) == page_get_supremum_rec(page));
}
/************************************************************//**
-TRUE if the record is the supremum record on a page.
-@return TRUE if the supremum record */
+true if distance between the records (measured in number of times we have to
+move to the next record) is at most the specified value */
UNIV_INLINE
-ibool
-page_rec_is_supremum_low(
-/*=====================*/
- ulint offset) /*!< in: record offset on page */
+bool
+page_rec_distance_is_at_most(
+/*=========================*/
+ const rec_t* left_rec,
+ const rec_t* right_rec,
+ ulint val)
{
- ut_ad(offset >= PAGE_NEW_INFIMUM);
- ut_ad(offset <= UNIV_PAGE_SIZE - PAGE_EMPTY_DIR_START);
-
- return(offset == PAGE_NEW_SUPREMUM
- || offset == PAGE_OLD_SUPREMUM);
+ for (ulint i = 0; i <= val; i++) {
+ if (left_rec == right_rec) {
+ return (true);
+ }
+ left_rec = page_rec_get_next_const(left_rec);
+ }
+ return (false);
}
/************************************************************//**
-TRUE if the record is the infimum record on a page.
-@return TRUE if the infimum record */
+true if the record is the second last user record on a page.
+@return true if the second last user record */
UNIV_INLINE
-ibool
-page_rec_is_infimum_low(
+bool
+page_rec_is_second_last(
/*====================*/
- ulint offset) /*!< in: record offset on page */
-{
- ut_ad(offset >= PAGE_NEW_INFIMUM);
- ut_ad(offset <= UNIV_PAGE_SIZE - PAGE_EMPTY_DIR_START);
-
- return(offset == PAGE_NEW_INFIMUM || offset == PAGE_OLD_INFIMUM);
-}
-
-/************************************************************//**
-TRUE if the record is a user record on the page.
-@return TRUE if a user record */
-UNIV_INLINE
-ibool
-page_rec_is_user_rec(
-/*=================*/
- const rec_t* rec) /*!< in: record */
-{
- ut_ad(page_rec_check(rec));
-
- return(page_rec_is_user_rec_low(page_offset(rec)));
-}
-
-/************************************************************//**
-TRUE if the record is the supremum record on a page.
-@return TRUE if the supremum record */
-UNIV_INLINE
-ibool
-page_rec_is_supremum(
-/*=================*/
- const rec_t* rec) /*!< in: record */
-{
- ut_ad(page_rec_check(rec));
-
- return(page_rec_is_supremum_low(page_offset(rec)));
-}
-
-/************************************************************//**
-TRUE if the record is the infimum record on a page.
-@return TRUE if the infimum record */
-UNIV_INLINE
-ibool
-page_rec_is_infimum(
-/*================*/
- const rec_t* rec) /*!< in: record */
+ const rec_t* rec, /*!< in: record */
+ const page_t* page) /*!< in: page */
{
- ut_ad(page_rec_check(rec));
+ ut_ad(page_get_n_recs(page) > 1);
+ ut_ad(!page_rec_is_last(rec, page));
- return(page_rec_is_infimum_low(page_offset(rec)));
+ return(page_rec_get_next_const(
+ page_rec_get_next_const(rec)) == page_get_supremum_rec(page));
}
/************************************************************//**
Returns the nth record of the record list.
This is the inverse function of page_rec_get_n_recs_before().
-@return nth record */
+@return nth record */
UNIV_INLINE
rec_t*
page_rec_get_nth(
@@ -459,78 +409,28 @@ page_rec_get_nth(
return((rec_t*) page_rec_get_nth_const(page, nth));
}
-#ifndef UNIV_HOTBACKUP
/************************************************************//**
Returns the middle record of the records on the page. If there is an
even number of records in the list, returns the first record of the
upper half-list.
-@return middle record */
+@return middle record */
UNIV_INLINE
rec_t*
page_get_middle_rec(
/*================*/
page_t* page) /*!< in: page */
{
- ulint middle = (page_get_n_recs(page) + PAGE_HEAP_NO_USER_LOW) / 2;
+ ulint middle = (ulint(page_get_n_recs(page))
+ + PAGE_HEAP_NO_USER_LOW) / 2;
return(page_rec_get_nth(page, middle));
}
-/*************************************************************//**
-Compares a data tuple to a physical record. Differs from the function
-cmp_dtuple_rec_with_match in the way that the record must reside on an
-index page, and also page infimum and supremum records can be given in
-the parameter rec. These are considered as the negative infinity and
-the positive infinity in the alphabetical order.
-@return 1, 0, -1, if dtuple is greater, equal, less than rec,
-respectively, when only the common first fields are compared */
-UNIV_INLINE
-int
-page_cmp_dtuple_rec_with_match(
-/*===========================*/
- const dtuple_t* dtuple, /*!< in: data tuple */
- const rec_t* rec, /*!< in: physical record on a page; may also
- be page infimum or supremum, in which case
- matched-parameter values below are not
- affected */
- const ulint* offsets,/*!< in: array returned by rec_get_offsets() */
- ulint* matched_fields, /*!< in/out: number of already completely
- matched fields; when function returns
- contains the value for current comparison */
- ulint* matched_bytes) /*!< in/out: number of already matched
- bytes within the first field not completely
- matched; when function returns contains the
- value for current comparison */
-{
- ulint rec_offset;
-
- ut_ad(dtuple_check_typed(dtuple));
- ut_ad(rec_offs_validate(rec, NULL, offsets));
- ut_ad(!rec_offs_comp(offsets) == !page_rec_is_comp(rec));
-
- rec_offset = page_offset(rec);
-
- if (rec_offset == PAGE_NEW_INFIMUM
- || rec_offset == PAGE_OLD_INFIMUM) {
-
- return(1);
-
- } else if (rec_offset == PAGE_NEW_SUPREMUM
- || rec_offset == PAGE_OLD_SUPREMUM) {
-
- return(-1);
- }
-
- return(cmp_dtuple_rec_with_match(dtuple, rec, offsets,
- matched_fields,
- matched_bytes));
-}
-#endif /* !UNIV_HOTBACKUP */
-
#endif /* !UNIV_INNOCHECKSUM */
+
/*************************************************************//**
Gets the page number.
-@return page number */
+@return page number */
UNIV_INLINE
ulint
page_get_page_no(
@@ -544,7 +444,7 @@ page_get_page_no(
#ifndef UNIV_INNOCHECKSUM
/*************************************************************//**
Gets the tablespace identifier.
-@return space id */
+@return space id */
UNIV_INLINE
ulint
page_get_space_id(
@@ -556,12 +456,13 @@ page_get_space_id(
}
#endif /* !UNIV_INNOCHECKSUM */
+
/*************************************************************//**
Gets the number of user records on page (infimum and supremum records
are not user records).
-@return number of user records */
+@return number of user records */
UNIV_INLINE
-ulint
+uint16_t
page_get_n_recs(
/*============*/
const page_t* page) /*!< in: index page */
@@ -572,9 +473,9 @@ page_get_n_recs(
#ifndef UNIV_INNOCHECKSUM
/*************************************************************//**
Gets the number of dir slots in directory.
-@return number of slots */
+@return number of slots */
UNIV_INLINE
-ulint
+uint16_t
page_dir_get_n_slots(
/*=================*/
const page_t* page) /*!< in: index page */
@@ -597,9 +498,9 @@ page_dir_set_n_slots(
/*************************************************************//**
Gets the number of records in the heap.
-@return number of user records */
+@return number of user records */
UNIV_INLINE
-ulint
+uint16_t
page_dir_get_n_heap(
/*================*/
const page_t* page) /*!< in: index page */
@@ -622,7 +523,7 @@ page_dir_set_n_heap(
ulint n_heap) /*!< in: number of records */
{
ut_ad(n_heap < 0x8000);
- ut_ad(!page_zip || n_heap
+ ut_ad(!page_zip || uint16_t(n_heap)
== (page_header_get_field(page, PAGE_N_HEAP) & 0x7fff) + 1);
page_header_set_field(page, page_zip, PAGE_N_HEAP, n_heap
@@ -633,7 +534,7 @@ page_dir_set_n_heap(
#ifdef UNIV_DEBUG
/*************************************************************//**
Gets pointer to nth directory slot.
-@return pointer to dir slot */
+@return pointer to dir slot */
UNIV_INLINE
page_dir_slot_t*
page_dir_get_nth_slot(
@@ -644,14 +545,14 @@ page_dir_get_nth_slot(
ut_ad(page_dir_get_n_slots(page) > n);
return((page_dir_slot_t*)
- page + UNIV_PAGE_SIZE - PAGE_DIR
+ page + srv_page_size - PAGE_DIR
- (n + 1) * PAGE_DIR_SLOT_SIZE);
}
#endif /* UNIV_DEBUG */
/**************************************************************//**
Used to check the consistency of a record on a page.
-@return TRUE if succeed */
+@return TRUE if succeed */
UNIV_INLINE
ibool
page_rec_check(
@@ -670,7 +571,7 @@ page_rec_check(
/***************************************************************//**
Gets the record pointed to by a directory slot.
-@return pointer to record */
+@return pointer to record */
UNIV_INLINE
const rec_t*
page_dir_slot_get_rec(
@@ -696,7 +597,7 @@ page_dir_slot_set_rec(
/***************************************************************//**
Gets the number of records owned by a directory slot.
-@return number of records */
+@return number of records */
UNIV_INLINE
ulint
page_dir_slot_get_n_owned(
@@ -746,7 +647,7 @@ page_dir_calc_reserved_space(
/************************************************************//**
Gets the pointer to the next record on the page.
-@return pointer to next record */
+@return pointer to next record */
UNIV_INLINE
const rec_t*
page_rec_get_next_low(
@@ -763,7 +664,7 @@ page_rec_get_next_low(
offs = rec_get_next_offs(rec, comp);
- if (offs >= UNIV_PAGE_SIZE) {
+ if (offs >= srv_page_size) {
fprintf(stderr,
"InnoDB: Next record offset is nonsensical %lu"
" in record at offset %lu\n"
@@ -772,20 +673,23 @@ page_rec_get_next_low(
(void*) rec,
(ulong) page_get_space_id(page),
(ulong) page_get_page_no(page));
- buf_page_print(page, 0);
-
ut_error;
} else if (offs == 0) {
return(NULL);
}
+ ut_ad(page_rec_is_infimum(rec)
+ || (!page_is_leaf(page) && !page_has_prev(page))
+ || !(rec_get_info_bits(page + offs, comp)
+ & REC_INFO_MIN_REC_FLAG));
+
return(page + offs);
}
/************************************************************//**
Gets the pointer to the next record on the page.
-@return pointer to next record */
+@return pointer to next record */
UNIV_INLINE
rec_t*
page_rec_get_next(
@@ -797,7 +701,7 @@ page_rec_get_next(
/************************************************************//**
Gets the pointer to the next record on the page.
-@return pointer to next record */
+@return pointer to next record */
UNIV_INLINE
const rec_t*
page_rec_get_next_const(
@@ -811,7 +715,7 @@ page_rec_get_next_const(
Gets the pointer to the next non delete-marked record on the page.
If all subsequent records are delete-marked, then this function
will return the supremum record.
-@return pointer to next non delete-marked record or pointer to supremum */
+@return pointer to next non delete-marked record or pointer to supremum */
UNIV_INLINE
const rec_t*
page_rec_get_next_non_del_marked(
@@ -862,7 +766,7 @@ page_rec_set_next(
/************************************************************//**
Gets the pointer to the previous record.
-@return pointer to previous record */
+@return pointer to previous record */
UNIV_INLINE
const rec_t*
page_rec_get_prev_const(
@@ -909,7 +813,7 @@ page_rec_get_prev_const(
/************************************************************//**
Gets the pointer to the previous record.
-@return pointer to previous record */
+@return pointer to previous record */
UNIV_INLINE
rec_t*
page_rec_get_prev(
@@ -922,7 +826,7 @@ page_rec_get_prev(
/***************************************************************//**
Looks for the record which owns the given record.
-@return the owner record */
+@return the owner record */
UNIV_INLINE
rec_t*
page_rec_find_owner_rec(
@@ -947,54 +851,36 @@ page_rec_find_owner_rec(
/**********************************************************//**
Returns the base extra size of a physical record. This is the
size of the fixed header, independent of the record size.
-@return REC_N_NEW_EXTRA_BYTES or REC_N_OLD_EXTRA_BYTES */
+@return REC_N_NEW_EXTRA_BYTES or REC_N_OLD_EXTRA_BYTES */
UNIV_INLINE
ulint
page_rec_get_base_extra_size(
/*=========================*/
const rec_t* rec) /*!< in: physical record */
{
-#if REC_N_NEW_EXTRA_BYTES + 1 != REC_N_OLD_EXTRA_BYTES
-# error "REC_N_NEW_EXTRA_BYTES + 1 != REC_N_OLD_EXTRA_BYTES"
-#endif
+ compile_time_assert(REC_N_NEW_EXTRA_BYTES + 1
+ == REC_N_OLD_EXTRA_BYTES);
return(REC_N_NEW_EXTRA_BYTES + (ulint) !page_rec_is_comp(rec));
}
-#endif /* !UNIV_INNOCHECKSUM */
-
-/************************************************************//**
-Determine whether the page is in new-style compact format.
-@return nonzero if the page is in compact format, zero if it is in
-old-style format */
-UNIV_INLINE
-ulint
-page_is_comp(
-/*=========*/
- const page_t* page) /*!< in: index page */
-{
- return(page_header_get_field(page, PAGE_N_HEAP) & 0x8000);
-}
+#endif /* UNIV_INNOCHECKSUM */
/************************************************************//**
Returns the sum of the sizes of the records in the record list, excluding
the infimum and supremum records.
-@return data in bytes */
+@return data in bytes */
UNIV_INLINE
-ulint
+uint16_t
page_get_data_size(
/*===============*/
const page_t* page) /*!< in: index page */
{
- ulint ret;
-
- ret = (ulint)(page_header_get_field(page, PAGE_HEAP_TOP)
- - (page_is_comp(page)
- ? PAGE_NEW_SUPREMUM_END
- : PAGE_OLD_SUPREMUM_END)
- - page_header_get_field(page, PAGE_GARBAGE));
-
- ut_ad(ret < UNIV_PAGE_SIZE);
-
+ uint16_t ret = page_header_get_field(page, PAGE_HEAP_TOP)
+ - (page_is_comp(page)
+ ? PAGE_NEW_SUPREMUM_END
+ : PAGE_OLD_SUPREMUM_END)
+ - page_header_get_field(page, PAGE_GARBAGE);
+ ut_ad(ret < srv_page_size);
return(ret);
}
@@ -1034,7 +920,7 @@ page_mem_alloc_free(
/*************************************************************//**
Calculates free space if a page is emptied.
-@return free space */
+@return free space */
UNIV_INLINE
ulint
page_get_free_space_of_empty(
@@ -1042,41 +928,18 @@ page_get_free_space_of_empty(
ulint comp) /*!< in: nonzero=compact page layout */
{
if (comp) {
- return((ulint)(UNIV_PAGE_SIZE
+ return((ulint)(srv_page_size
- PAGE_NEW_SUPREMUM_END
- PAGE_DIR
- 2 * PAGE_DIR_SLOT_SIZE));
}
- return((ulint)(UNIV_PAGE_SIZE
+ return((ulint)(srv_page_size
- PAGE_OLD_SUPREMUM_END
- PAGE_DIR
- 2 * PAGE_DIR_SLOT_SIZE));
}
-#ifndef UNIV_HOTBACKUP
-/***********************************************************************//**
-Write a 32-bit field in a data dictionary record. */
-UNIV_INLINE
-void
-page_rec_write_field(
-/*=================*/
- rec_t* rec, /*!< in/out: record to update */
- ulint i, /*!< in: index of the field to update */
- ulint val, /*!< in: value to write */
- mtr_t* mtr) /*!< in/out: mini-transaction */
-{
- byte* data;
- ulint len;
-
- data = rec_get_nth_field_old(rec, i, &len);
-
- ut_ad(len == 4);
-
- mlog_write_ulint(data, val, MLOG_4BYTES, mtr);
-}
-#endif /* !UNIV_HOTBACKUP */
-
/************************************************************//**
Each user record on a page, and also the deleted user records in the heap
takes its size plus the fraction of the dir cell size /
@@ -1084,7 +947,7 @@ PAGE_DIR_SLOT_MIN_N_OWNED bytes for it. If the sum of these exceeds the
value of page_get_free_space_of_empty, the insert is impossible, otherwise
it is allowed. This function returns the maximum combined size of records
which can be inserted on top of the record heap.
-@return maximum combined size for inserted records */
+@return maximum combined size for inserted records */
UNIV_INLINE
ulint
page_get_max_insert_size(
@@ -1126,7 +989,7 @@ page_get_max_insert_size(
/************************************************************//**
Returns the maximum combined size of records which can be inserted on top
of the record heap if a page is first reorganized.
-@return maximum combined size for inserted records */
+@return maximum combined size for inserted records */
UNIV_INLINE
ulint
page_get_max_insert_size_after_reorganize(
@@ -1162,7 +1025,7 @@ page_mem_free(
rec_t* rec, /*!< in: pointer to the
(origin of) record */
const dict_index_t* index, /*!< in: index of rec */
- const ulint* offsets) /*!< in: array returned by
+ const offset_t* offsets) /*!< in: array returned by
rec_get_offsets() */
{
rec_t* free;
@@ -1171,11 +1034,9 @@ page_mem_free(
ut_ad(rec_offs_validate(rec, index, offsets));
free = page_header_get_ptr(page, PAGE_FREE);
- bool scrub = srv_immediate_scrub_data_uncompressed;
- if (scrub) {
+ if (srv_immediate_scrub_data_uncompressed) {
/* scrub record */
- uint size = rec_offs_data_size(offsets);
- memset(rec, 0, size);
+ memset(rec, 0, rec_offs_data_size(offsets));
}
page_rec_set_next(rec, free);
@@ -1190,7 +1051,7 @@ page_mem_free(
page_zip_dir_delete(page_zip, rec, index, offsets, free);
} else {
page_header_set_field(page, page_zip, PAGE_N_RECS,
- page_get_n_recs(page) - 1);
+ ulint(page_get_n_recs(page)) - 1);
}
}
@@ -1200,3 +1061,5 @@ page_mem_free(
#undef UNIV_INLINE
#define UNIV_INLINE UNIV_INLINE_ORIGINAL
#endif
+
+#endif
diff --git a/storage/innobase/include/page0size.h b/storage/innobase/include/page0size.h
index 0b3fa905cf5..74fcfb106ea 100644
--- a/storage/innobase/include/page0size.h
+++ b/storage/innobase/include/page0size.h
@@ -27,7 +27,6 @@ Created Nov 14, 2013 Vasil Dimov
#ifndef page0size_t
#define page0size_t
-#include "univ.i"
#include "fsp0types.h"
#define FIELD_REF_SIZE 20
@@ -35,7 +34,7 @@ Created Nov 14, 2013 Vasil Dimov
/** A BLOB field reference full of zero, for use in assertions and
tests.Initially, BLOB field references are set to zero, in
dtuple_convert_big_rec(). */
-extern const byte field_ref_zero[FIELD_REF_SIZE];
+extern const byte field_ref_zero[UNIV_PAGE_SIZE_MAX];
#define PAGE_SIZE_T_SIZE_BITS 17
@@ -176,7 +175,6 @@ private:
unsigned m_is_compressed:1;
};
-#ifndef UNIV_INNOCHECKSUM
/* Overloading the global output operator to conveniently print an object
of type the page_size_t.
@param[in,out] out the output stream
@@ -193,7 +191,6 @@ operator<<(
<< ", compressed=" << obj.is_compressed() << "]";
return(out);
}
-#endif
extern page_size_t univ_page_size;
diff --git a/storage/innobase/include/page0types.h b/storage/innobase/include/page0types.h
index 3f7a653f8f8..4debd639fa4 100644
--- a/storage/innobase/include/page0types.h
+++ b/storage/innobase/include/page0types.h
@@ -26,18 +26,17 @@ Created 2/2/1994 Heikki Tuuri
#ifndef page0types_h
#define page0types_h
-#include <map>
-
-#include "univ.i"
#include "dict0types.h"
#include "mtr0types.h"
-#include "sync0types.h"
-#include "os0thread.h"
+#include "rem0types.h"
+
+#include <map>
/** Eliminates a name collision on HP-UX */
#define page_t ib_page_t
/** Type of the index page */
typedef byte page_t;
+#ifndef UNIV_INNOCHECKSUM
/** Index page cursor */
struct page_cur_t;
@@ -61,6 +60,42 @@ ssize, which is the number of shifts from 512. */
# error "PAGE_ZIP_SSIZE_MAX >= (1 << PAGE_ZIP_SSIZE_BITS)"
#endif
+/* Page cursor search modes; the values must be in this order! */
+enum page_cur_mode_t {
+ PAGE_CUR_UNSUPP = 0,
+ PAGE_CUR_G = 1,
+ PAGE_CUR_GE = 2,
+ PAGE_CUR_L = 3,
+ PAGE_CUR_LE = 4,
+
+/* PAGE_CUR_LE_OR_EXTENDS = 5,*/ /* This is a search mode used in
+ "column LIKE 'abc%' ORDER BY column DESC";
+ we have to find strings which are <= 'abc' or
+ which extend it */
+
+/* These search mode is for search R-tree index. */
+ PAGE_CUR_CONTAIN = 7,
+ PAGE_CUR_INTERSECT = 8,
+ PAGE_CUR_WITHIN = 9,
+ PAGE_CUR_DISJOINT = 10,
+ PAGE_CUR_MBR_EQUAL = 11,
+ PAGE_CUR_RTREE_INSERT = 12,
+ PAGE_CUR_RTREE_LOCATE = 13,
+ PAGE_CUR_RTREE_GET_FATHER = 14
+};
+
+
+/** The information used for compressing a page when applying
+TRUNCATE log record during recovery */
+struct redo_page_compress_t {
+ ulint type; /*!< index type */
+ index_id_t index_id; /*!< index id */
+ ulint n_fields; /*!< number of index fields */
+ ulint field_len; /*!< the length of index field */
+ const byte* fields; /*!< index field information */
+ ulint trx_id_pos; /*!< position of trx-id column. */
+};
+
/** Compressed page descriptor */
struct page_zip_des_t
{
@@ -108,21 +143,21 @@ struct page_zip_stat_t {
};
/** Compression statistics types */
-typedef std::map<index_id_t, page_zip_stat_t> page_zip_stat_per_index_t;
+typedef std::map<
+ index_id_t,
+ page_zip_stat_t,
+ std::less<index_id_t>,
+ ut_allocator<std::pair<const index_id_t, page_zip_stat_t> > >
+ page_zip_stat_per_index_t;
/** Statistics on compression, indexed by page_zip_des_t::ssize - 1 */
-extern page_zip_stat_t page_zip_stat[PAGE_ZIP_SSIZE_MAX];
+extern page_zip_stat_t page_zip_stat[PAGE_ZIP_SSIZE_MAX];
/** Statistics on compression, indexed by dict_index_t::id */
-extern page_zip_stat_per_index_t page_zip_stat_per_index;
-extern ib_mutex_t page_zip_stat_per_index_mutex;
-#ifdef HAVE_PSI_INTERFACE
-extern mysql_pfs_key_t page_zip_stat_per_index_mutex_key;
-#endif /* HAVE_PSI_INTERFACE */
+extern page_zip_stat_per_index_t page_zip_stat_per_index;
/**********************************************************************//**
Write the "deleted" flag of a record on a compressed page. The flag must
already have been written on the uncompressed page. */
-UNIV_INTERN
void
page_zip_rec_set_deleted(
/*=====================*/
@@ -134,7 +169,6 @@ page_zip_rec_set_deleted(
/**********************************************************************//**
Write the "owned" flag of a record on a compressed page. The n_owned field
must already have been written on the uncompressed page. */
-UNIV_INTERN
void
page_zip_rec_set_owned(
/*===================*/
@@ -145,20 +179,18 @@ page_zip_rec_set_owned(
/**********************************************************************//**
Shift the dense page directory when a record is deleted. */
-UNIV_INTERN
void
page_zip_dir_delete(
/*================*/
page_zip_des_t* page_zip,/*!< in/out: compressed page */
byte* rec, /*!< in: deleted record */
dict_index_t* index, /*!< in: index of rec */
- const ulint* offsets,/*!< in: rec_get_offsets(rec) */
+ const offset_t* offsets,/*!< in: rec_get_offsets(rec) */
const byte* free) /*!< in: previous start of the free list */
MY_ATTRIBUTE((nonnull(1,2,3,4)));
/**********************************************************************//**
Add a slot to the dense page directory. */
-UNIV_INTERN
void
page_zip_dir_add_slot(
/*==================*/
@@ -166,4 +198,5 @@ page_zip_dir_add_slot(
ulint is_clustered) /*!< in: nonzero for clustered index,
zero for others */
MY_ATTRIBUTE((nonnull));
+#endif /* !UNIV_INNOCHECKSUM */
#endif
diff --git a/storage/innobase/include/page0zip.h b/storage/innobase/include/page0zip.h
index f45dd412ffc..01b51ea2a0b 100644
--- a/storage/innobase/include/page0zip.h
+++ b/storage/innobase/include/page0zip.h
@@ -1,9 +1,8 @@
-
/*****************************************************************************
Copyright (c) 2005, 2016, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2012, Facebook Inc.
-Copyright (c) 2013, 2016, MariaDB Corporation
+Copyright (c) 2017, 2020, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -34,36 +33,62 @@ Created June 2005 by Marko Makela
# define UNIV_INLINE
#endif
+#ifdef UNIV_INNOCHECKSUM
+#include "buf0buf.h"
+#include "ut0crc32.h"
+#include "buf0checksum.h"
+#include "mach0data.h"
+#include "zlib.h"
+#endif /* UNIV_INNOCHECKSUM */
+
+#ifndef UNIV_INNOCHECKSUM
+#include "mtr0types.h"
#include "page0types.h"
+#endif /* !UNIV_INNOCHECKSUM */
+
#include "buf0types.h"
+#include "rem0types.h"
+
#ifndef UNIV_INNOCHECKSUM
-#include "mtr0types.h"
#include "dict0types.h"
#include "srv0srv.h"
#include "trx0types.h"
#include "mem0mem.h"
-#endif /* !UNIV_INNOCHECKSUM */
/* Compression level to be used by zlib. Settable by user. */
extern uint page_zip_level;
/* Default compression level. */
#define DEFAULT_COMPRESSION_LEVEL 6
+/** Start offset of the area that will be compressed */
+#define PAGE_ZIP_START PAGE_NEW_SUPREMUM_END
+/** Size of an compressed page directory entry */
+#define PAGE_ZIP_DIR_SLOT_SIZE 2
+/** Predefine the sum of DIR_SLOT, TRX_ID & ROLL_PTR */
+#define PAGE_ZIP_CLUST_LEAF_SLOT_SIZE \
+ (PAGE_ZIP_DIR_SLOT_SIZE \
+ + DATA_TRX_ID_LEN \
+ + DATA_ROLL_PTR_LEN)
+/** Mask of record offsets */
+#define PAGE_ZIP_DIR_SLOT_MASK 0x3fffU
+/** 'owned' flag */
+#define PAGE_ZIP_DIR_SLOT_OWNED 0x4000U
+/** 'deleted' flag */
+#define PAGE_ZIP_DIR_SLOT_DEL 0x8000U
/* Whether or not to log compressed page images to avoid possible
compression algorithm changes in zlib. */
extern my_bool page_zip_log_pages;
-#ifndef UNIV_INNOCHECKSUM
/**********************************************************************//**
Determine the size of a compressed page in bytes.
-@return size in bytes */
+@return size in bytes */
UNIV_INLINE
ulint
page_zip_get_size(
/*==============*/
const page_zip_des_t* page_zip) /*!< in: compressed page */
- MY_ATTRIBUTE((nonnull, pure));
+ MY_ATTRIBUTE((warn_unused_result));
/**********************************************************************//**
Set the size of a compressed page in bytes. */
UNIV_INLINE
@@ -73,32 +98,40 @@ page_zip_set_size(
page_zip_des_t* page_zip, /*!< in/out: compressed page */
ulint size); /*!< in: size in bytes */
-#ifndef UNIV_HOTBACKUP
-/**********************************************************************//**
-Determine if a record is so big that it needs to be stored externally.
-@return FALSE if the entire record can be stored locally on the page */
+/** Determine if a record is so big that it needs to be stored externally.
+@param[in] rec_size length of the record in bytes
+@param[in] comp nonzero=compact format
+@param[in] n_fields number of fields in the record; ignored if
+tablespace is not compressed
+@param[in] page_size page size
+@return FALSE if the entire record can be stored locally on the page */
UNIV_INLINE
ibool
page_zip_rec_needs_ext(
-/*===================*/
- ulint rec_size, /*!< in: length of the record in bytes */
- ulint comp, /*!< in: nonzero=compact format */
- ulint n_fields, /*!< in: number of fields in the record;
- ignored if zip_size == 0 */
- ulint zip_size) /*!< in: compressed page size in bytes, or 0 */
- MY_ATTRIBUTE((const));
+ ulint rec_size,
+ ulint comp,
+ ulint n_fields,
+ const page_size_t& page_size)
+ MY_ATTRIBUTE((warn_unused_result));
/**********************************************************************//**
Determine the guaranteed free space on an empty page.
-@return minimum payload size on the page */
-UNIV_INTERN
+@return minimum payload size on the page */
ulint
page_zip_empty_size(
/*================*/
ulint n_fields, /*!< in: number of columns in the index */
ulint zip_size) /*!< in: compressed page size in bytes */
MY_ATTRIBUTE((const));
-#endif /* !UNIV_HOTBACKUP */
+
+/** Check whether a tuple is too big for compressed table
+@param[in] index dict index object
+@param[in] entry entry for the index
+@return true if it's too big, otherwise false */
+bool
+page_zip_is_too_big(
+ const dict_index_t* index,
+ const dtuple_t* entry);
/**********************************************************************//**
Initialize a compressed page descriptor. */
@@ -111,36 +144,54 @@ page_zip_des_init(
/**********************************************************************//**
Configure the zlib allocator to use the given memory heap. */
-UNIV_INTERN
void
page_zip_set_alloc(
/*===============*/
void* stream, /*!< in/out: zlib stream */
mem_heap_t* heap); /*!< in: memory heap to use */
-#endif /* !UNIV_INNOCHECKSUM */
/**********************************************************************//**
Compress a page.
@return TRUE on success, FALSE on failure; page_zip will be left
intact on failure. */
-UNIV_INTERN
ibool
page_zip_compress(
/*==============*/
- page_zip_des_t* page_zip,/*!< in: size; out: data, n_blobs,
- m_start, m_end, m_nonempty */
- const page_t* page, /*!< in: uncompressed page */
- dict_index_t* index, /*!< in: index of the B-tree node */
- ulint level, /*!< in: compression level */
- mtr_t* mtr) /*!< in: mini-transaction, or NULL */
- MY_ATTRIBUTE((nonnull(1,2,3)));
+ page_zip_des_t* page_zip, /*!< in: size; out: data,
+ n_blobs, m_start, m_end,
+ m_nonempty */
+ const page_t* page, /*!< in: uncompressed page */
+ dict_index_t* index, /*!< in: index of the B-tree
+ node */
+ ulint level, /*!< in: commpression level */
+ const redo_page_compress_t* page_comp_info,
+ /*!< in: used for applying
+ TRUNCATE log
+ record during recovery */
+ mtr_t* mtr); /*!< in/out: mini-transaction,
+ or NULL */
+
+/**********************************************************************//**
+Write the index information for the compressed page.
+@return used size of buf */
+ulint
+page_zip_fields_encode(
+/*===================*/
+ ulint n, /*!< in: number of fields
+ to compress */
+ const dict_index_t* index, /*!< in: index comprising
+ at least n fields */
+ ulint trx_id_pos,
+ /*!< in: position of the trx_id column
+ in the index, or ULINT_UNDEFINED if
+ this is a non-leaf page */
+ byte* buf); /*!< out: buffer of (n + 1) * 2 bytes */
/**********************************************************************//**
Decompress a page. This function should tolerate errors on the compressed
page. Instead of letting assertions fail, it will return FALSE if an
inconsistency is detected.
-@return TRUE on success, FALSE on failure */
-UNIV_INTERN
+@return TRUE on success, FALSE on failure */
ibool
page_zip_decompress(
/*================*/
@@ -153,11 +204,10 @@ page_zip_decompress(
after page creation */
MY_ATTRIBUTE((nonnull(1,2)));
-#ifndef UNIV_INNOCHECKSUM
#ifdef UNIV_DEBUG
/**********************************************************************//**
Validate a compressed page descriptor.
-@return TRUE if ok */
+@return TRUE if ok */
UNIV_INLINE
ibool
page_zip_simple_validate(
@@ -165,13 +215,11 @@ page_zip_simple_validate(
const page_zip_des_t* page_zip); /*!< in: compressed page
descriptor */
#endif /* UNIV_DEBUG */
-#endif /* !UNIV_INNOCHECKSUM */
#ifdef UNIV_ZIP_DEBUG
/**********************************************************************//**
Check that the compressed and decompressed pages match.
-@return TRUE if valid, FALSE if not */
-UNIV_INTERN
+@return TRUE if valid, FALSE if not */
ibool
page_zip_validate_low(
/*==================*/
@@ -183,7 +231,6 @@ page_zip_validate_low(
MY_ATTRIBUTE((nonnull(1,2)));
/**********************************************************************//**
Check that the compressed and decompressed pages match. */
-UNIV_INTERN
ibool
page_zip_validate(
/*==============*/
@@ -193,7 +240,6 @@ page_zip_validate(
MY_ATTRIBUTE((nonnull(1,2)));
#endif /* UNIV_ZIP_DEBUG */
-#ifndef UNIV_INNOCHECKSUM
/**********************************************************************//**
Determine how big record can be inserted without recompressing the page.
@return a positive number indicating the maximum size of a record
@@ -204,11 +250,11 @@ page_zip_max_ins_size(
/*==================*/
const page_zip_des_t* page_zip,/*!< in: compressed page */
ibool is_clust)/*!< in: TRUE if clustered index */
- MY_ATTRIBUTE((nonnull, pure));
+ MY_ATTRIBUTE((warn_unused_result));
/**********************************************************************//**
Determine if enough space is available in the modification log.
-@return TRUE if page_zip_write_rec() will succeed */
+@return TRUE if page_zip_write_rec() will succeed */
UNIV_INLINE
ibool
page_zip_available(
@@ -218,7 +264,7 @@ page_zip_available(
ulint length, /*!< in: combined size of the record */
ulint create) /*!< in: nonzero=add the record to
the heap */
- MY_ATTRIBUTE((nonnull, pure));
+ MY_ATTRIBUTE((warn_unused_result));
/**********************************************************************//**
Write data to the uncompressed header portion of a page. The data must
@@ -236,21 +282,19 @@ page_zip_write_header(
/**********************************************************************//**
Write an entire record on the compressed page. The data must already
have been written to the uncompressed page. */
-UNIV_INTERN
void
page_zip_write_rec(
/*===============*/
page_zip_des_t* page_zip,/*!< in/out: compressed page */
const byte* rec, /*!< in: record being written */
dict_index_t* index, /*!< in: the index the record belongs to */
- const ulint* offsets,/*!< in: rec_get_offsets(rec, index) */
+ const offset_t* offsets,/*!< in: rec_get_offsets(rec, index) */
ulint create) /*!< in: nonzero=insert, zero=update */
MY_ATTRIBUTE((nonnull));
/***********************************************************//**
Parses a log record of writing a BLOB pointer of a record.
-@return end of log record or NULL */
-UNIV_INTERN
+@return end of log record or NULL */
byte*
page_zip_parse_write_blob_ptr(
/*==========================*/
@@ -262,7 +306,6 @@ page_zip_parse_write_blob_ptr(
/**********************************************************************//**
Write a BLOB pointer of a record on the leaf page of a clustered index.
The information must already have been updated on the uncompressed page. */
-UNIV_INTERN
void
page_zip_write_blob_ptr(
/*====================*/
@@ -270,16 +313,14 @@ page_zip_write_blob_ptr(
const byte* rec, /*!< in/out: record whose data is being
written */
dict_index_t* index, /*!< in: index of the page */
- const ulint* offsets,/*!< in: rec_get_offsets(rec, index) */
+ const offset_t* offsets,/*!< in: rec_get_offsets(rec, index) */
ulint n, /*!< in: column index */
- mtr_t* mtr) /*!< in: mini-transaction handle,
+ mtr_t* mtr); /*!< in: mini-transaction handle,
or NULL if no logging is needed */
- MY_ATTRIBUTE((nonnull(1,2,3,4)));
/***********************************************************//**
Parses a log record of writing the node pointer of a record.
-@return end of log record or NULL */
-UNIV_INTERN
+@return end of log record or NULL */
byte*
page_zip_parse_write_node_ptr(
/*==========================*/
@@ -290,7 +331,6 @@ page_zip_parse_write_node_ptr(
/**********************************************************************//**
Write the node pointer of a record on a non-leaf compressed page. */
-UNIV_INTERN
void
page_zip_write_node_ptr(
/*====================*/
@@ -298,18 +338,16 @@ page_zip_write_node_ptr(
byte* rec, /*!< in/out: record */
ulint size, /*!< in: data size of rec */
ulint ptr, /*!< in: node pointer */
- mtr_t* mtr) /*!< in: mini-transaction, or NULL */
- MY_ATTRIBUTE((nonnull(1,2)));
+ mtr_t* mtr); /*!< in: mini-transaction, or NULL */
/**********************************************************************//**
Write the trx_id and roll_ptr of a record on a B-tree leaf node page. */
-UNIV_INTERN
void
page_zip_write_trx_id_and_roll_ptr(
/*===============================*/
page_zip_des_t* page_zip,/*!< in/out: compressed page */
byte* rec, /*!< in/out: record */
- const ulint* offsets,/*!< in: rec_get_offsets(rec, index) */
+ const offset_t* offsets,/*!< in: rec_get_offsets(rec, index) */
ulint trx_id_col,/*!< in: column number of TRX_ID in rec */
trx_id_t trx_id, /*!< in: transaction identifier */
roll_ptr_t roll_ptr)/*!< in: roll_ptr */
@@ -318,7 +356,6 @@ page_zip_write_trx_id_and_roll_ptr(
/**********************************************************************//**
Write the "deleted" flag of a record on a compressed page. The flag must
already have been written on the uncompressed page. */
-UNIV_INTERN
void
page_zip_rec_set_deleted(
/*=====================*/
@@ -330,7 +367,6 @@ page_zip_rec_set_deleted(
/**********************************************************************//**
Write the "owned" flag of a record on a compressed page. The n_owned field
must already have been written on the uncompressed page. */
-UNIV_INTERN
void
page_zip_rec_set_owned(
/*===================*/
@@ -341,7 +377,6 @@ page_zip_rec_set_owned(
/**********************************************************************//**
Insert a record to the dense page directory. */
-UNIV_INTERN
void
page_zip_dir_insert(
/*================*/
@@ -354,21 +389,19 @@ page_zip_dir_insert(
/**********************************************************************//**
Shift the dense page directory and the array of BLOB pointers
when a record is deleted. */
-UNIV_INTERN
void
page_zip_dir_delete(
/*================*/
page_zip_des_t* page_zip, /*!< in/out: compressed page */
byte* rec, /*!< in: deleted record */
const dict_index_t* index, /*!< in: index of rec */
- const ulint* offsets, /*!< in: rec_get_offsets(rec) */
+ const offset_t* offsets, /*!< in: rec_get_offsets(rec) */
const byte* free) /*!< in: previous start of
the free list */
MY_ATTRIBUTE((nonnull(1,2,3,4)));
/**********************************************************************//**
Add a slot to the dense page directory. */
-UNIV_INTERN
void
page_zip_dir_add_slot(
/*==================*/
@@ -379,8 +412,7 @@ page_zip_dir_add_slot(
/***********************************************************//**
Parses a log record of writing to the header of a page.
-@return end of log record or NULL */
-UNIV_INTERN
+@return end of log record or NULL */
byte*
page_zip_parse_write_header(
/*========================*/
@@ -416,7 +448,6 @@ bits in the same mini-transaction in such a way that the modification
will be redo-logged.
@return TRUE on success, FALSE on failure; page_zip will be left
intact on failure, but page will be overwritten. */
-UNIV_INTERN
ibool
page_zip_reorganize(
/*================*/
@@ -427,15 +458,12 @@ page_zip_reorganize(
dict_index_t* index, /*!< in: index of the B-tree node */
mtr_t* mtr) /*!< in: mini-transaction */
MY_ATTRIBUTE((nonnull));
-#endif /* !UNIV_INNOCHECKSUM */
-#ifndef UNIV_HOTBACKUP
/**********************************************************************//**
Copy the records of a page byte for byte. Do not copy the page header
or trailer, except those B-tree header fields that are directly
related to the storage of records. Also copy PAGE_MAX_TRX_ID.
NOTE: The caller must update the lock table and the adaptive hash index. */
-UNIV_INTERN
void
page_zip_copy_recs(
/*===============*/
@@ -446,45 +474,40 @@ page_zip_copy_recs(
const page_zip_des_t* src_zip, /*!< in: compressed page */
const page_t* src, /*!< in: page */
dict_index_t* index, /*!< in: index of the B-tree */
- mtr_t* mtr) /*!< in: mini-transaction */
- MY_ATTRIBUTE((nonnull));
-#endif /* !UNIV_HOTBACKUP */
+ mtr_t* mtr); /*!< in: mini-transaction */
-/**********************************************************************//**
-Parses a log record of compressing an index page.
-@return end of log record or NULL */
-UNIV_INTERN
-byte*
-page_zip_parse_compress(
-/*====================*/
- byte* ptr, /*!< in: buffer */
- byte* end_ptr,/*!< in: buffer end */
- page_t* page, /*!< out: uncompressed page */
- page_zip_des_t* page_zip)/*!< out: compressed page */
- MY_ATTRIBUTE((nonnull(1,2)));
+/** Parse and optionally apply MLOG_ZIP_PAGE_COMPRESS.
+@param[in] ptr log record
+@param[in] end_ptr end of log
+@param[in,out] block ROW_FORMAT=COMPRESSED block, or NULL for parsing only
+@return end of log record
+@retval NULL if the log record is incomplete */
+byte* page_zip_parse_compress(const byte* ptr, const byte* end_ptr,
+ buf_block_t* block);
-/**********************************************************************//**
-Calculate the compressed page checksum.
-@return page checksum */
-UNIV_INTERN
-ulint
+#endif /* !UNIV_INNOCHECKSUM */
+
+/** Calculate the compressed page checksum.
+@param[in] data compressed page
+@param[in] size size of compressed page
+@param[in] algo algorithm to use
+@return page checksum */
+uint32_t
page_zip_calc_checksum(
-/*===================*/
- const void* data, /*!< in: compressed page */
- ulint size, /*!< in: size of compressed page */
- srv_checksum_algorithm_t algo) /*!< in: algorithm to use */
- MY_ATTRIBUTE((nonnull));
+ const void* data,
+ ulint size,
+ srv_checksum_algorithm_t algo
+#ifdef INNODB_BUG_ENDIAN_CRC32
+ /** for crc32, use the big-endian bug-compatible crc32 variant */
+ , bool use_legacy_big_endian = false
+#endif
+);
-/**********************************************************************//**
-Verify a compressed page's checksum.
-@return TRUE if the stored checksum is valid according to the value of
-innodb_checksum_algorithm */
-UNIV_INTERN
-ibool
-page_zip_verify_checksum(
-/*=====================*/
- const void* data, /*!< in: compressed page */
- ulint size); /*!< in: size of compressed page */
+/** Validate the checksum on a ROW_FORMAT=COMPRESSED page.
+@param data ROW_FORMAT=COMPRESSED page
+@param size size of the page, in bytes
+@return whether the stored checksum matches innodb_checksum_algorithm */
+bool page_zip_verify_checksum(const byte *data, size_t size);
#ifndef UNIV_INNOCHECKSUM
/**********************************************************************//**
@@ -499,7 +522,7 @@ page_zip_compress_write_log_no_data(
mtr_t* mtr); /*!< in: mtr */
/**********************************************************************//**
Parses a log record of compressing an index page without the data.
-@return end of log record or NULL */
+@return end of log record or NULL */
UNIV_INLINE
byte*
page_zip_parse_compress_no_data(
@@ -519,37 +542,12 @@ void
page_zip_reset_stat_per_index();
/*===========================*/
-#endif /* !UNIV_INNOCHECKSUM */
-
-#ifndef UNIV_HOTBACKUP
-/** Check if a pointer to an uncompressed page matches a compressed page.
-When we IMPORT a tablespace the blocks and accompanying frames are allocted
-from outside the buffer pool.
-@param ptr pointer to an uncompressed page frame
-@param page_zip compressed page descriptor
-@return TRUE if ptr and page_zip refer to the same block */
-# define PAGE_ZIP_MATCH(ptr, page_zip) \
- (((page_zip)->m_external \
- && (page_align(ptr) + UNIV_PAGE_SIZE == (page_zip)->data)) \
- || buf_frame_get_page_zip(ptr) == (page_zip))
-#else /* !UNIV_HOTBACKUP */
-/** Check if a pointer to an uncompressed page matches a compressed page.
-@param ptr pointer to an uncompressed page frame
-@param page_zip compressed page descriptor
-@return TRUE if ptr and page_zip refer to the same block */
-# define PAGE_ZIP_MATCH(ptr, page_zip) \
- (page_align(ptr) + UNIV_PAGE_SIZE == (page_zip)->data)
-#endif /* !UNIV_HOTBACKUP */
-
#ifdef UNIV_MATERIALIZE
# undef UNIV_INLINE
# define UNIV_INLINE UNIV_INLINE_ORIGINAL
#endif
-#ifndef UNIV_INNOCHECKSUM
-#ifndef UNIV_NONINL
-# include "page0zip.ic"
-#endif
+#include "page0zip.ic"
#endif /* !UNIV_INNOCHECKSUM */
#endif /* page0zip_h */
diff --git a/storage/innobase/include/page0zip.ic b/storage/innobase/include/page0zip.ic
index 45979e4d084..5345aa19dd5 100644
--- a/storage/innobase/include/page0zip.ic
+++ b/storage/innobase/include/page0zip.ic
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 2005, 2013, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2005, 2016, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2012, Facebook Inc.
Copyright (c) 2017, MariaDB Corporation.
@@ -33,6 +33,7 @@ Created June 2005 by Marko Makela
#include "page0zip.h"
#include "mtr0log.h"
#include "page0page.h"
+#include "srv0srv.h"
/* The format of compressed pages is as follows.
@@ -101,20 +102,9 @@ In summary, the compressed page looks like this:
- deleted records (free list) in link order
*/
-/** Start offset of the area that will be compressed */
-#define PAGE_ZIP_START PAGE_NEW_SUPREMUM_END
-/** Size of an compressed page directory entry */
-#define PAGE_ZIP_DIR_SLOT_SIZE 2
-/** Mask of record offsets */
-#define PAGE_ZIP_DIR_SLOT_MASK 0x3fff
-/** 'owned' flag */
-#define PAGE_ZIP_DIR_SLOT_OWNED 0x4000
-/** 'deleted' flag */
-#define PAGE_ZIP_DIR_SLOT_DEL 0x8000
-
/**********************************************************************//**
Determine the size of a compressed page in bytes.
-@return size in bytes */
+@return size in bytes */
UNIV_INLINE
ulint
page_zip_get_size(
@@ -144,11 +134,11 @@ page_zip_set_size(
ulint size) /*!< in: size in bytes */
{
if (size) {
- int ssize;
+ unsigned ssize;
ut_ad(ut_is_2pow(size));
- for (ssize = 1; size > (ulint) (512 << ssize); ssize++) {
+ for (ssize = 1; size > (512U << ssize); ssize++) {
}
page_zip->ssize = ssize;
@@ -159,24 +149,27 @@ page_zip_set_size(
ut_ad(page_zip_get_size(page_zip) == size);
}
-#ifndef UNIV_HOTBACKUP
-/**********************************************************************//**
-Determine if a record is so big that it needs to be stored externally.
-@return FALSE if the entire record can be stored locally on the page */
+/** Determine if a record is so big that it needs to be stored externally.
+@param[in] rec_size length of the record in bytes
+@param[in] comp nonzero=compact format
+@param[in] n_fields number of fields in the record; ignored if
+tablespace is not compressed
+@param[in] page_size page size
+@return FALSE if the entire record can be stored locally on the page */
UNIV_INLINE
ibool
page_zip_rec_needs_ext(
-/*===================*/
- ulint rec_size, /*!< in: length of the record in bytes */
- ulint comp, /*!< in: nonzero=compact format */
- ulint n_fields, /*!< in: number of fields in the record;
- ignored if zip_size == 0 */
- ulint zip_size) /*!< in: compressed page size in bytes, or 0 */
+ ulint rec_size,
+ ulint comp,
+ ulint n_fields,
+ const page_size_t& page_size)
{
+ /* FIXME: row size check is this function seems to be the most correct.
+ Put it in a separate function and use in more places of InnoDB */
+
ut_ad(rec_size
> ulint(comp ? REC_N_NEW_EXTRA_BYTES : REC_N_OLD_EXTRA_BYTES));
- ut_ad(ut_is_2pow(zip_size));
- ut_ad(comp || !zip_size);
+ ut_ad(comp || !page_size.is_compressed());
#if UNIV_PAGE_SIZE_MAX > COMPRESSED_REC_MAX_DATA_SIZE
if (comp ? rec_size >= COMPRESSED_REC_MAX_DATA_SIZE :
@@ -185,7 +178,7 @@ page_zip_rec_needs_ext(
}
#endif
- if (zip_size) {
+ if (page_size.is_compressed()) {
ut_ad(comp);
/* On a compressed page, there is a two-byte entry in
the dense page directory for every record. But there
@@ -194,18 +187,17 @@ page_zip_rec_needs_ext(
the encoded heap number. Check also the available space
on the uncompressed page. */
return(rec_size - (REC_N_NEW_EXTRA_BYTES - 2 - 1)
- >= page_zip_empty_size(n_fields, zip_size)
+ >= page_zip_empty_size(n_fields, page_size.physical())
|| rec_size >= page_get_free_space_of_empty(TRUE) / 2);
}
return(rec_size >= page_get_free_space_of_empty(comp) / 2);
}
-#endif /* !UNIV_HOTBACKUP */
#ifdef UNIV_DEBUG
/**********************************************************************//**
Validate a compressed page descriptor.
-@return TRUE if ok */
+@return TRUE if ok */
UNIV_INLINE
ibool
page_zip_simple_validate(
@@ -282,14 +274,14 @@ page_zip_max_ins_size(
trailer_len += PAGE_ZIP_DIR_SLOT_SIZE;
- return((lint) page_zip_get_size(page_zip)
- - trailer_len - page_zip->m_end
- - (REC_N_NEW_EXTRA_BYTES - 2));
+ return(lint(page_zip_get_size(page_zip)
+ - trailer_len - page_zip->m_end
+ - (REC_N_NEW_EXTRA_BYTES - 2)));
}
/**********************************************************************//**
Determine if enough space is available in the modification log.
-@return TRUE if enough space is available */
+@return TRUE if enough space is available */
UNIV_INLINE
ibool
page_zip_available(
@@ -339,7 +331,6 @@ page_zip_des_init(
/**********************************************************************//**
Write a log record of writing to the uncompressed header portion of a page. */
-UNIV_INTERN
void
page_zip_write_header_log(
/*======================*/
@@ -364,7 +355,6 @@ page_zip_write_header(
{
ulint pos;
- ut_ad(PAGE_ZIP_MATCH(str, page_zip));
ut_ad(page_zip_simple_validate(page_zip));
UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
@@ -378,9 +368,7 @@ page_zip_write_header(
/* ut_ad(page_zip_validate(page_zip, str - pos)); */
if (mtr) {
-#ifndef UNIV_HOTBACKUP
page_zip_write_header_log(str, length, mtr);
-#endif /* !UNIV_HOTBACKUP */
}
}
@@ -406,7 +394,7 @@ page_zip_compress_write_log_no_data(
/**********************************************************************//**
Parses a log record of compressing an index page without the data.
-@return end of log record or NULL */
+@return end of log record or NULL */
UNIV_INLINE
byte*
page_zip_parse_compress_no_data(
@@ -429,7 +417,7 @@ page_zip_parse_compress_no_data(
was successful. Crash in this case. */
if (page
- && !page_zip_compress(page_zip, page, index, level, NULL)) {
+ && !page_zip_compress(page_zip, page, index, level, NULL, NULL)) {
ut_error;
}
diff --git a/storage/innobase/include/pars0grm.h b/storage/innobase/include/pars0grm.h
index d3228f7c0f9..58d424abfdc 100644
--- a/storage/innobase/include/pars0grm.h
+++ b/storage/innobase/include/pars0grm.h
@@ -1,14 +1,14 @@
-/* A Bison parser, made by GNU Bison 2.3. */
+/* A Bison parser, made by GNU Bison 3.4.2. */
-/* Skeleton interface for Bison's Yacc-like parsers in C
+/* Bison interface for Yacc-like parsers in C
- Copyright (C) 1984, 1989, 1990, 2000, 2001, 2002, 2003, 2004, 2005, 2006
- Free Software Foundation, Inc.
+ Copyright (C) 1984, 1989-1990, 2000-2015, 2018-2019 Free Software Foundation,
+ Inc.
- This program is free software; you can redistribute it and/or modify
+ This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
- the Free Software Foundation; either version 2, or (at your option)
- any later version.
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,9 +16,7 @@
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 51 Franklin Street, Fifth Floor,
- Boston, MA 02110-1335 USA. */
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
/* As a special exception, you may create a larger work that contains
part or all of the Bison parser skeleton and distribute that work
@@ -33,229 +31,115 @@
This special exception was added by the Free Software Foundation in
version 2.2 of Bison. */
-/* Tokens. */
+/* Undocumented macros, especially those whose name start with YY_,
+ are private implementation details. Do not rely on them. */
+
+#ifndef YY_YY_PARS0GRM_TAB_H_INCLUDED
+# define YY_YY_PARS0GRM_TAB_H_INCLUDED
+/* Debug traces. */
+#ifndef YYDEBUG
+# define YYDEBUG 0
+#endif
+#if YYDEBUG
+extern int yydebug;
+#endif
+
+/* Token type. */
#ifndef YYTOKENTYPE
# define YYTOKENTYPE
- /* Put the tokens into the symbol table, so that GDB and other debuggers
- know about them. */
- enum yytokentype {
- PARS_INT_LIT = 258,
- PARS_FLOAT_LIT = 259,
- PARS_STR_LIT = 260,
- PARS_FIXBINARY_LIT = 261,
- PARS_BLOB_LIT = 262,
- PARS_NULL_LIT = 263,
- PARS_ID_TOKEN = 264,
- PARS_AND_TOKEN = 265,
- PARS_OR_TOKEN = 266,
- PARS_NOT_TOKEN = 267,
- PARS_GE_TOKEN = 268,
- PARS_LE_TOKEN = 269,
- PARS_NE_TOKEN = 270,
- PARS_PROCEDURE_TOKEN = 271,
- PARS_IN_TOKEN = 272,
- PARS_OUT_TOKEN = 273,
- PARS_BINARY_TOKEN = 274,
- PARS_BLOB_TOKEN = 275,
- PARS_INT_TOKEN = 276,
- PARS_INTEGER_TOKEN = 277,
- PARS_FLOAT_TOKEN = 278,
- PARS_CHAR_TOKEN = 279,
- PARS_IS_TOKEN = 280,
- PARS_BEGIN_TOKEN = 281,
- PARS_END_TOKEN = 282,
- PARS_IF_TOKEN = 283,
- PARS_THEN_TOKEN = 284,
- PARS_ELSE_TOKEN = 285,
- PARS_ELSIF_TOKEN = 286,
- PARS_LOOP_TOKEN = 287,
- PARS_WHILE_TOKEN = 288,
- PARS_RETURN_TOKEN = 289,
- PARS_SELECT_TOKEN = 290,
- PARS_SUM_TOKEN = 291,
- PARS_COUNT_TOKEN = 292,
- PARS_DISTINCT_TOKEN = 293,
- PARS_FROM_TOKEN = 294,
- PARS_WHERE_TOKEN = 295,
- PARS_FOR_TOKEN = 296,
- PARS_DDOT_TOKEN = 297,
- PARS_READ_TOKEN = 298,
- PARS_ORDER_TOKEN = 299,
- PARS_BY_TOKEN = 300,
- PARS_ASC_TOKEN = 301,
- PARS_DESC_TOKEN = 302,
- PARS_INSERT_TOKEN = 303,
- PARS_INTO_TOKEN = 304,
- PARS_VALUES_TOKEN = 305,
- PARS_UPDATE_TOKEN = 306,
- PARS_SET_TOKEN = 307,
- PARS_DELETE_TOKEN = 308,
- PARS_CURRENT_TOKEN = 309,
- PARS_OF_TOKEN = 310,
- PARS_CREATE_TOKEN = 311,
- PARS_TABLE_TOKEN = 312,
- PARS_INDEX_TOKEN = 313,
- PARS_UNIQUE_TOKEN = 314,
- PARS_CLUSTERED_TOKEN = 315,
- PARS_DOES_NOT_FIT_IN_MEM_TOKEN = 316,
- PARS_ON_TOKEN = 317,
- PARS_ASSIGN_TOKEN = 318,
- PARS_DECLARE_TOKEN = 319,
- PARS_CURSOR_TOKEN = 320,
- PARS_SQL_TOKEN = 321,
- PARS_OPEN_TOKEN = 322,
- PARS_FETCH_TOKEN = 323,
- PARS_CLOSE_TOKEN = 324,
- PARS_NOTFOUND_TOKEN = 325,
- PARS_TO_CHAR_TOKEN = 326,
- PARS_TO_NUMBER_TOKEN = 327,
- PARS_TO_BINARY_TOKEN = 328,
- PARS_BINARY_TO_NUMBER_TOKEN = 329,
- PARS_SUBSTR_TOKEN = 330,
- PARS_REPLSTR_TOKEN = 331,
- PARS_CONCAT_TOKEN = 332,
- PARS_INSTR_TOKEN = 333,
- PARS_LENGTH_TOKEN = 334,
- PARS_SYSDATE_TOKEN = 335,
- PARS_PRINTF_TOKEN = 336,
- PARS_ASSERT_TOKEN = 337,
- PARS_RND_TOKEN = 338,
- PARS_RND_STR_TOKEN = 339,
- PARS_ROW_PRINTF_TOKEN = 340,
- PARS_COMMIT_TOKEN = 341,
- PARS_ROLLBACK_TOKEN = 342,
- PARS_WORK_TOKEN = 343,
- PARS_UNSIGNED_TOKEN = 344,
- PARS_EXIT_TOKEN = 345,
- PARS_FUNCTION_TOKEN = 346,
- PARS_LOCK_TOKEN = 347,
- PARS_SHARE_TOKEN = 348,
- PARS_MODE_TOKEN = 349,
- PARS_LIKE_TOKEN = 350,
- PARS_LIKE_TOKEN_EXACT = 351,
- PARS_LIKE_TOKEN_PREFIX = 352,
- PARS_LIKE_TOKEN_SUFFIX = 353,
- PARS_LIKE_TOKEN_SUBSTR = 354,
- PARS_TABLE_NAME_TOKEN = 355,
- PARS_COMPACT_TOKEN = 356,
- PARS_BLOCK_SIZE_TOKEN = 357,
- PARS_BIGINT_TOKEN = 358,
- NEG = 359
- };
+ enum yytokentype
+ {
+ PARS_INT_LIT = 258,
+ PARS_FLOAT_LIT = 259,
+ PARS_STR_LIT = 260,
+ PARS_NULL_LIT = 261,
+ PARS_ID_TOKEN = 262,
+ PARS_AND_TOKEN = 263,
+ PARS_OR_TOKEN = 264,
+ PARS_NOT_TOKEN = 265,
+ PARS_GE_TOKEN = 266,
+ PARS_LE_TOKEN = 267,
+ PARS_NE_TOKEN = 268,
+ PARS_PROCEDURE_TOKEN = 269,
+ PARS_IN_TOKEN = 270,
+ PARS_INT_TOKEN = 271,
+ PARS_CHAR_TOKEN = 272,
+ PARS_IS_TOKEN = 273,
+ PARS_BEGIN_TOKEN = 274,
+ PARS_END_TOKEN = 275,
+ PARS_IF_TOKEN = 276,
+ PARS_THEN_TOKEN = 277,
+ PARS_ELSE_TOKEN = 278,
+ PARS_ELSIF_TOKEN = 279,
+ PARS_LOOP_TOKEN = 280,
+ PARS_WHILE_TOKEN = 281,
+ PARS_RETURN_TOKEN = 282,
+ PARS_SELECT_TOKEN = 283,
+ PARS_COUNT_TOKEN = 284,
+ PARS_FROM_TOKEN = 285,
+ PARS_WHERE_TOKEN = 286,
+ PARS_FOR_TOKEN = 287,
+ PARS_DDOT_TOKEN = 288,
+ PARS_ORDER_TOKEN = 289,
+ PARS_BY_TOKEN = 290,
+ PARS_ASC_TOKEN = 291,
+ PARS_DESC_TOKEN = 292,
+ PARS_INSERT_TOKEN = 293,
+ PARS_INTO_TOKEN = 294,
+ PARS_VALUES_TOKEN = 295,
+ PARS_UPDATE_TOKEN = 296,
+ PARS_SET_TOKEN = 297,
+ PARS_DELETE_TOKEN = 298,
+ PARS_CURRENT_TOKEN = 299,
+ PARS_OF_TOKEN = 300,
+ PARS_CREATE_TOKEN = 301,
+ PARS_TABLE_TOKEN = 302,
+ PARS_INDEX_TOKEN = 303,
+ PARS_UNIQUE_TOKEN = 304,
+ PARS_CLUSTERED_TOKEN = 305,
+ PARS_ON_TOKEN = 306,
+ PARS_ASSIGN_TOKEN = 307,
+ PARS_DECLARE_TOKEN = 308,
+ PARS_CURSOR_TOKEN = 309,
+ PARS_SQL_TOKEN = 310,
+ PARS_OPEN_TOKEN = 311,
+ PARS_FETCH_TOKEN = 312,
+ PARS_CLOSE_TOKEN = 313,
+ PARS_NOTFOUND_TOKEN = 314,
+ PARS_TO_BINARY_TOKEN = 315,
+ PARS_SUBSTR_TOKEN = 316,
+ PARS_CONCAT_TOKEN = 317,
+ PARS_INSTR_TOKEN = 318,
+ PARS_LENGTH_TOKEN = 319,
+ PARS_COMMIT_TOKEN = 320,
+ PARS_ROLLBACK_TOKEN = 321,
+ PARS_WORK_TOKEN = 322,
+ PARS_EXIT_TOKEN = 323,
+ PARS_FUNCTION_TOKEN = 324,
+ PARS_LOCK_TOKEN = 325,
+ PARS_SHARE_TOKEN = 326,
+ PARS_MODE_TOKEN = 327,
+ PARS_LIKE_TOKEN = 328,
+ PARS_LIKE_TOKEN_EXACT = 329,
+ PARS_LIKE_TOKEN_PREFIX = 330,
+ PARS_LIKE_TOKEN_SUFFIX = 331,
+ PARS_LIKE_TOKEN_SUBSTR = 332,
+ PARS_TABLE_NAME_TOKEN = 333,
+ PARS_BIGINT_TOKEN = 334,
+ NEG = 335
+ };
#endif
-/* Tokens. */
-#define PARS_INT_LIT 258
-#define PARS_FLOAT_LIT 259
-#define PARS_STR_LIT 260
-#define PARS_FIXBINARY_LIT 261
-#define PARS_BLOB_LIT 262
-#define PARS_NULL_LIT 263
-#define PARS_ID_TOKEN 264
-#define PARS_AND_TOKEN 265
-#define PARS_OR_TOKEN 266
-#define PARS_NOT_TOKEN 267
-#define PARS_GE_TOKEN 268
-#define PARS_LE_TOKEN 269
-#define PARS_NE_TOKEN 270
-#define PARS_PROCEDURE_TOKEN 271
-#define PARS_IN_TOKEN 272
-#define PARS_OUT_TOKEN 273
-#define PARS_BINARY_TOKEN 274
-#define PARS_BLOB_TOKEN 275
-#define PARS_INT_TOKEN 276
-#define PARS_INTEGER_TOKEN 277
-#define PARS_FLOAT_TOKEN 278
-#define PARS_CHAR_TOKEN 279
-#define PARS_IS_TOKEN 280
-#define PARS_BEGIN_TOKEN 281
-#define PARS_END_TOKEN 282
-#define PARS_IF_TOKEN 283
-#define PARS_THEN_TOKEN 284
-#define PARS_ELSE_TOKEN 285
-#define PARS_ELSIF_TOKEN 286
-#define PARS_LOOP_TOKEN 287
-#define PARS_WHILE_TOKEN 288
-#define PARS_RETURN_TOKEN 289
-#define PARS_SELECT_TOKEN 290
-#define PARS_SUM_TOKEN 291
-#define PARS_COUNT_TOKEN 292
-#define PARS_DISTINCT_TOKEN 293
-#define PARS_FROM_TOKEN 294
-#define PARS_WHERE_TOKEN 295
-#define PARS_FOR_TOKEN 296
-#define PARS_DDOT_TOKEN 297
-#define PARS_READ_TOKEN 298
-#define PARS_ORDER_TOKEN 299
-#define PARS_BY_TOKEN 300
-#define PARS_ASC_TOKEN 301
-#define PARS_DESC_TOKEN 302
-#define PARS_INSERT_TOKEN 303
-#define PARS_INTO_TOKEN 304
-#define PARS_VALUES_TOKEN 305
-#define PARS_UPDATE_TOKEN 306
-#define PARS_SET_TOKEN 307
-#define PARS_DELETE_TOKEN 308
-#define PARS_CURRENT_TOKEN 309
-#define PARS_OF_TOKEN 310
-#define PARS_CREATE_TOKEN 311
-#define PARS_TABLE_TOKEN 312
-#define PARS_INDEX_TOKEN 313
-#define PARS_UNIQUE_TOKEN 314
-#define PARS_CLUSTERED_TOKEN 315
-#define PARS_DOES_NOT_FIT_IN_MEM_TOKEN 316
-#define PARS_ON_TOKEN 317
-#define PARS_ASSIGN_TOKEN 318
-#define PARS_DECLARE_TOKEN 319
-#define PARS_CURSOR_TOKEN 320
-#define PARS_SQL_TOKEN 321
-#define PARS_OPEN_TOKEN 322
-#define PARS_FETCH_TOKEN 323
-#define PARS_CLOSE_TOKEN 324
-#define PARS_NOTFOUND_TOKEN 325
-#define PARS_TO_CHAR_TOKEN 326
-#define PARS_TO_NUMBER_TOKEN 327
-#define PARS_TO_BINARY_TOKEN 328
-#define PARS_BINARY_TO_NUMBER_TOKEN 329
-#define PARS_SUBSTR_TOKEN 330
-#define PARS_REPLSTR_TOKEN 331
-#define PARS_CONCAT_TOKEN 332
-#define PARS_INSTR_TOKEN 333
-#define PARS_LENGTH_TOKEN 334
-#define PARS_SYSDATE_TOKEN 335
-#define PARS_PRINTF_TOKEN 336
-#define PARS_ASSERT_TOKEN 337
-#define PARS_RND_TOKEN 338
-#define PARS_RND_STR_TOKEN 339
-#define PARS_ROW_PRINTF_TOKEN 340
-#define PARS_COMMIT_TOKEN 341
-#define PARS_ROLLBACK_TOKEN 342
-#define PARS_WORK_TOKEN 343
-#define PARS_UNSIGNED_TOKEN 344
-#define PARS_EXIT_TOKEN 345
-#define PARS_FUNCTION_TOKEN 346
-#define PARS_LOCK_TOKEN 347
-#define PARS_SHARE_TOKEN 348
-#define PARS_MODE_TOKEN 349
-#define PARS_LIKE_TOKEN 350
-#define PARS_LIKE_TOKEN_EXACT 351
-#define PARS_LIKE_TOKEN_PREFIX 352
-#define PARS_LIKE_TOKEN_SUFFIX 353
-#define PARS_LIKE_TOKEN_SUBSTR 354
-#define PARS_TABLE_NAME_TOKEN 355
-#define PARS_COMPACT_TOKEN 356
-#define PARS_BLOCK_SIZE_TOKEN 357
-#define PARS_BIGINT_TOKEN 358
-#define NEG 359
-
-
-
+/* Value type. */
#if ! defined YYSTYPE && ! defined YYSTYPE_IS_DECLARED
typedef int YYSTYPE;
-# define yystype YYSTYPE /* obsolescent; will be withdrawn */
-# define YYSTYPE_IS_DECLARED 1
# define YYSTYPE_IS_TRIVIAL 1
+# define YYSTYPE_IS_DECLARED 1
#endif
+
extern YYSTYPE yylval;
+int yyparse (void);
+
+#endif /* !YY_YY_PARS0GRM_TAB_H_INCLUDED */
diff --git a/storage/innobase/include/pars0opt.h b/storage/innobase/include/pars0opt.h
index ce3e4410ee7..07a726eac68 100644
--- a/storage/innobase/include/pars0opt.h
+++ b/storage/innobase/include/pars0opt.h
@@ -1,6 +1,7 @@
/*****************************************************************************
-Copyright (c) 1997, 2009, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1997, 2016, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2017, 2018, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -26,18 +27,14 @@ Created 12/21/1997 Heikki Tuuri
#ifndef pars0opt_h
#define pars0opt_h
-#include "univ.i"
#include "que0types.h"
-#include "usr0types.h"
#include "pars0sym.h"
-#include "dict0types.h"
#include "row0sel.h"
/*******************************************************************//**
Optimizes a select. Decides which indexes to tables to use. The tables
are accessed in the order that they were written to the FROM part in the
select statement. */
-UNIV_INTERN
void
opt_search_plan(
/*============*/
@@ -49,7 +46,6 @@ already exist in the list. If the column is already in the list, puts a value
indirection to point to the occurrence in the column list, except if the
column occurrence we are looking at is in the column list, in which case
nothing is done. */
-UNIV_INTERN
void
opt_find_all_cols(
/*==============*/
@@ -60,16 +56,13 @@ opt_find_all_cols(
to add new found columns */
plan_t* plan, /*!< in: plan or NULL */
que_node_t* exp); /*!< in: expression or condition */
+#ifdef UNIV_SQL_DEBUG
/********************************************************************//**
Prints info of a query plan. */
-UNIV_INTERN
void
opt_print_query_plan(
/*=================*/
sel_node_t* sel_node); /*!< in: select node */
-
-#ifndef UNIV_NONINL
-#include "pars0opt.ic"
-#endif
+#endif /* UNIV_SQL_DEBUG */
#endif
diff --git a/storage/innobase/include/pars0opt.ic b/storage/innobase/include/pars0opt.ic
deleted file mode 100644
index 99075c81d39..00000000000
--- a/storage/innobase/include/pars0opt.ic
+++ /dev/null
@@ -1,24 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1997, 2009, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/pars0opt.ic
-Simple SQL optimizer
-
-Created 12/21/1997 Heikki Tuuri
-*******************************************************/
diff --git a/storage/innobase/include/pars0pars.h b/storage/innobase/include/pars0pars.h
index 1032862b20d..f54c50e5b85 100644
--- a/storage/innobase/include/pars0pars.h
+++ b/storage/innobase/include/pars0pars.h
@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2019, MariaDB Corporation.
+Copyright (c) 2017, 2019, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -27,13 +27,12 @@ Created 11/19/1996 Heikki Tuuri
#ifndef pars0pars_h
#define pars0pars_h
-#include "univ.i"
#include "que0types.h"
-#include "usr0types.h"
#include "pars0types.h"
#include "row0types.h"
#include "trx0types.h"
#include "ut0vec.h"
+#include "row0mysql.h"
/** Type of the user functions. The first argument is always InnoDB-supplied
and varies in type, while 'user_arg' is a user-supplied argument. The
@@ -45,39 +44,19 @@ typedef ibool (*pars_user_func_cb_t)(void* arg, void* user_arg);
information */
extern int yydebug;
-#ifdef UNIV_SQL_DEBUG
-/** If the following is set TRUE, the lexer will print the SQL string
-as it tokenizes it */
-extern ibool pars_print_lexed;
-#endif /* UNIV_SQL_DEBUG */
-
/* Global variable used while parsing a single procedure or query : the code is
NOT re-entrant */
extern sym_tab_t* pars_sym_tab_global;
-extern pars_res_word_t pars_to_char_token;
-extern pars_res_word_t pars_to_number_token;
extern pars_res_word_t pars_to_binary_token;
-extern pars_res_word_t pars_binary_to_number_token;
extern pars_res_word_t pars_substr_token;
-extern pars_res_word_t pars_replstr_token;
extern pars_res_word_t pars_concat_token;
extern pars_res_word_t pars_length_token;
extern pars_res_word_t pars_instr_token;
-extern pars_res_word_t pars_sysdate_token;
-extern pars_res_word_t pars_printf_token;
-extern pars_res_word_t pars_assert_token;
-extern pars_res_word_t pars_rnd_token;
-extern pars_res_word_t pars_rnd_str_token;
extern pars_res_word_t pars_count_token;
-extern pars_res_word_t pars_sum_token;
-extern pars_res_word_t pars_distinct_token;
-extern pars_res_word_t pars_binary_token;
-extern pars_res_word_t pars_blob_token;
extern pars_res_word_t pars_int_token;
extern pars_res_word_t pars_bigint_token;
extern pars_res_word_t pars_char_token;
-extern pars_res_word_t pars_float_token;
extern pars_res_word_t pars_update_token;
extern pars_res_word_t pars_asc_token;
extern pars_res_word_t pars_desc_token;
@@ -99,8 +78,7 @@ yyparse(void);
/*************************************************************//**
Parses an SQL string returning the query graph.
-@return own: the query graph */
-UNIV_INTERN
+@return own: the query graph */
que_t*
pars_sql(
/*=====*/
@@ -109,7 +87,6 @@ pars_sql(
/*************************************************************//**
Retrieves characters to the lexical analyzer.
@return number of characters copied or 0 on EOF */
-UNIV_INTERN
size_t
pars_get_lex_chars(
/*===============*/
@@ -118,15 +95,13 @@ pars_get_lex_chars(
in the buffer */
/*************************************************************//**
Called by yyparse on error. */
-UNIV_INTERN
void
yyerror(
/*====*/
const char* s); /*!< in: error message string */
/*********************************************************************//**
Parses a variable declaration.
-@return own: symbol table node of type SYM_VAR */
-UNIV_INTERN
+@return own: symbol table node of type SYM_VAR */
sym_node_t*
pars_variable_declaration(
/*======================*/
@@ -135,8 +110,7 @@ pars_variable_declaration(
pars_res_word_t* type); /*!< in: pointer to a type token */
/*********************************************************************//**
Parses a function expression.
-@return own: function node in a query tree */
-UNIV_INTERN
+@return own: function node in a query tree */
func_node_t*
pars_func(
/*======*/
@@ -145,8 +119,7 @@ pars_func(
/*************************************************************************
Rebind a LIKE search string. NOTE: We ignore any '%' characters embedded
within the search string.
-@return own: function node in a query tree */
-UNIV_INTERN
+@return own: function node in a query tree */
int
pars_like_rebind(
/*=============*/
@@ -155,8 +128,7 @@ pars_like_rebind(
ulint len); /* in: length of literal to (re) bind*/
/*********************************************************************//**
Parses an operator expression.
-@return own: function node in a query tree */
-UNIV_INTERN
+@return own: function node in a query tree */
func_node_t*
pars_op(
/*====*/
@@ -166,8 +138,7 @@ pars_op(
operator */
/*********************************************************************//**
Parses an ORDER BY clause. Order by a single column only is supported.
-@return own: order-by node in a query tree */
-UNIV_INTERN
+@return own: order-by node in a query tree */
order_node_t*
pars_order_by(
/*==========*/
@@ -176,8 +147,7 @@ pars_order_by(
/*********************************************************************//**
Parses a select list; creates a query graph node for the whole SELECT
statement.
-@return own: select node in a query tree */
-UNIV_INTERN
+@return own: select node in a query tree */
sel_node_t*
pars_select_list(
/*=============*/
@@ -185,8 +155,7 @@ pars_select_list(
sym_node_t* into_list); /*!< in: variables list or NULL */
/*********************************************************************//**
Parses a cursor declaration.
-@return sym_node */
-UNIV_INTERN
+@return sym_node */
que_node_t*
pars_cursor_declaration(
/*====================*/
@@ -195,8 +164,7 @@ pars_cursor_declaration(
sel_node_t* select_node); /*!< in: select node */
/*********************************************************************//**
Parses a function declaration.
-@return sym_node */
-UNIV_INTERN
+@return sym_node */
que_node_t*
pars_function_declaration(
/*======================*/
@@ -204,8 +172,7 @@ pars_function_declaration(
table */
/*********************************************************************//**
Parses a select statement.
-@return own: select node in a query tree */
-UNIV_INTERN
+@return own: select node in a query tree */
sel_node_t*
pars_select_statement(
/*==================*/
@@ -219,8 +186,7 @@ pars_select_statement(
order_node_t* order_by); /*!< in: NULL or an order-by node */
/*********************************************************************//**
Parses a column assignment in an update.
-@return column assignment node */
-UNIV_INTERN
+@return column assignment node */
col_assign_node_t*
pars_column_assignment(
/*===================*/
@@ -228,8 +194,7 @@ pars_column_assignment(
que_node_t* exp); /*!< in: value to assign */
/*********************************************************************//**
Parses a delete or update statement start.
-@return own: update node in a query tree */
-UNIV_INTERN
+@return own: update node in a query tree */
upd_node_t*
pars_update_statement_start(
/*========================*/
@@ -239,8 +204,7 @@ pars_update_statement_start(
if delete */
/*********************************************************************//**
Parses an update or delete statement.
-@return own: update node in a query tree */
-UNIV_INTERN
+@return own: update node in a query tree */
upd_node_t*
pars_update_statement(
/*==================*/
@@ -250,8 +214,7 @@ pars_update_statement(
que_node_t* search_cond); /*!< in: search condition or NULL */
/*********************************************************************//**
Parses an insert statement.
-@return own: update node in a query tree */
-UNIV_INTERN
+@return own: update node in a query tree */
ins_node_t*
pars_insert_statement(
/*==================*/
@@ -259,21 +222,8 @@ pars_insert_statement(
que_node_t* values_list, /*!< in: value expression list or NULL */
sel_node_t* select); /*!< in: select condition or NULL */
/*********************************************************************//**
-Parses a procedure parameter declaration.
-@return own: symbol table node of type SYM_VAR */
-UNIV_INTERN
-sym_node_t*
-pars_parameter_declaration(
-/*=======================*/
- sym_node_t* node, /*!< in: symbol table node allocated for the
- id of the parameter */
- ulint param_type,
- /*!< in: PARS_INPUT or PARS_OUTPUT */
- pars_res_word_t* type); /*!< in: pointer to a type token */
-/*********************************************************************//**
Parses an elsif element.
-@return elsif node */
-UNIV_INTERN
+@return elsif node */
elsif_node_t*
pars_elsif_element(
/*===============*/
@@ -281,8 +231,7 @@ pars_elsif_element(
que_node_t* stat_list); /*!< in: statement list */
/*********************************************************************//**
Parses an if-statement.
-@return if-statement node */
-UNIV_INTERN
+@return if-statement node */
if_node_t*
pars_if_statement(
/*==============*/
@@ -291,8 +240,7 @@ pars_if_statement(
que_node_t* else_part); /*!< in: else-part statement list */
/*********************************************************************//**
Parses a for-loop-statement.
-@return for-statement node */
-UNIV_INTERN
+@return for-statement node */
for_node_t*
pars_for_statement(
/*===============*/
@@ -302,8 +250,7 @@ pars_for_statement(
que_node_t* stat_list); /*!< in: statement list */
/*********************************************************************//**
Parses a while-statement.
-@return while-statement node */
-UNIV_INTERN
+@return while-statement node */
while_node_t*
pars_while_statement(
/*=================*/
@@ -311,22 +258,19 @@ pars_while_statement(
que_node_t* stat_list); /*!< in: statement list */
/*********************************************************************//**
Parses an exit statement.
-@return exit statement node */
-UNIV_INTERN
+@return exit statement node */
exit_node_t*
pars_exit_statement(void);
/*=====================*/
/*********************************************************************//**
Parses a return-statement.
-@return return-statement node */
-UNIV_INTERN
+@return return-statement node */
return_node_t*
pars_return_statement(void);
/*=======================*/
/*********************************************************************//**
Parses a procedure call.
-@return function node */
-UNIV_INTERN
+@return function node */
func_node_t*
pars_procedure_call(
/*================*/
@@ -334,8 +278,7 @@ pars_procedure_call(
que_node_t* args); /*!< in: argument list */
/*********************************************************************//**
Parses an assignment statement.
-@return assignment statement node */
-UNIV_INTERN
+@return assignment statement node */
assign_node_t*
pars_assignment_statement(
/*======================*/
@@ -344,8 +287,7 @@ pars_assignment_statement(
/*********************************************************************//**
Parses a fetch statement. into_list or user_func (but not both) must be
non-NULL.
-@return fetch statement node */
-UNIV_INTERN
+@return fetch statement node */
fetch_node_t*
pars_fetch_statement(
/*=================*/
@@ -354,8 +296,7 @@ pars_fetch_statement(
sym_node_t* user_func); /*!< in: user function name, or NULL */
/*********************************************************************//**
Parses an open or close cursor statement.
-@return fetch statement node */
-UNIV_INTERN
+@return fetch statement node */
open_node_t*
pars_open_statement(
/*================*/
@@ -364,30 +305,26 @@ pars_open_statement(
sym_node_t* cursor); /*!< in: cursor node */
/*********************************************************************//**
Parses a row_printf-statement.
-@return row_printf-statement node */
-UNIV_INTERN
+@return row_printf-statement node */
row_printf_node_t*
pars_row_printf_statement(
/*======================*/
sel_node_t* sel_node); /*!< in: select node */
/*********************************************************************//**
Parses a commit statement.
-@return own: commit node struct */
-UNIV_INTERN
+@return own: commit node struct */
commit_node_t*
pars_commit_statement(void);
/*=======================*/
/*********************************************************************//**
Parses a rollback statement.
-@return own: rollback node struct */
-UNIV_INTERN
+@return own: rollback node struct */
roll_node_t*
pars_rollback_statement(void);
/*=========================*/
/*********************************************************************//**
Parses a column definition at a table creation.
-@return column sym table node */
-UNIV_INTERN
+@return column sym table node */
sym_node_t*
pars_column_def(
/*============*/
@@ -396,37 +333,20 @@ pars_column_def(
pars_res_word_t* type, /*!< in: data type */
sym_node_t* len, /*!< in: length of column, or
NULL */
- void* is_unsigned, /*!< in: if not NULL, column
- is of type UNSIGNED. */
void* is_not_null); /*!< in: if not NULL, column
is of type NOT NULL. */
/*********************************************************************//**
Parses a table creation operation.
-@return table create subgraph */
-UNIV_INTERN
+@return table create subgraph */
tab_node_t*
pars_create_table(
/*==============*/
sym_node_t* table_sym, /*!< in: table name node in the symbol
table */
- sym_node_t* column_defs, /*!< in: list of column names */
- sym_node_t* compact, /* in: non-NULL if COMPACT table. */
- sym_node_t* block_size, /* in: block size (can be NULL) */
- void* not_fit_in_memory);
- /*!< in: a non-NULL pointer means that
- this is a table which in simulations
- should be simulated as not fitting
- in memory; thread is put to sleep
- to simulate disk accesses; NOTE that
- this flag is not stored to the data
- dictionary on disk, and the database
- will forget about non-NULL value if
- it has to reload the table definition
- from disk */
+ sym_node_t* column_defs); /*!< in: list of column names */
/*********************************************************************//**
Parses an index creation operation.
-@return index create subgraph */
-UNIV_INTERN
+@return index create subgraph */
ind_node_t*
pars_create_index(
/*==============*/
@@ -439,14 +359,12 @@ pars_create_index(
sym_node_t* column_list); /*!< in: list of column names */
/*********************************************************************//**
Parses a procedure definition.
-@return query fork node */
-UNIV_INTERN
+@return query fork node */
que_fork_t*
pars_procedure_definition(
/*======================*/
sym_node_t* sym_node, /*!< in: procedure id node in the symbol
table */
- sym_node_t* param_list, /*!< in: parameter declaration list */
que_node_t* stat_list); /*!< in: statement list */
/*************************************************************//**
@@ -454,38 +372,37 @@ Parses a stored procedure call, when this is not within another stored
procedure, that is, the client issues a procedure call directly.
In MySQL/InnoDB, stored InnoDB procedures are invoked via the
parsed procedure tree, not via InnoDB SQL, so this function is not used.
-@return query graph */
-UNIV_INTERN
+@return query graph */
que_fork_t*
pars_stored_procedure_call(
/*=======================*/
sym_node_t* sym_node); /*!< in: stored procedure name */
-/******************************************************************//**
-Completes a query graph by adding query thread and fork nodes
+/** Completes a query graph by adding query thread and fork nodes
above it and prepares the graph for running. The fork created is of
type QUE_FORK_MYSQL_INTERFACE.
-@return query thread node to run */
-UNIV_INTERN
+@param[in] node root node for an incomplete query
+ graph, or NULL for dummy graph
+@param[in] trx transaction handle
+@param[in] heap memory heap from which allocated
+@param[in] prebuilt row prebuilt structure
+@return query thread node to run */
que_thr_t*
pars_complete_graph_for_exec(
-/*=========================*/
- que_node_t* node, /*!< in: root node for an incomplete
- query graph, or NULL for dummy graph */
- trx_t* trx, /*!< in: transaction handle */
- mem_heap_t* heap) /*!< in: memory heap from which allocated */
+ que_node_t* node,
+ trx_t* trx,
+ mem_heap_t* heap,
+ row_prebuilt_t* prebuilt)
MY_ATTRIBUTE((nonnull(2,3), warn_unused_result));
/****************************************************************//**
Create parser info struct.
-@return own: info struct */
-UNIV_INTERN
+@return own: info struct */
pars_info_t*
pars_info_create(void);
/*==================*/
/****************************************************************//**
Free info struct and everything it contains. */
-UNIV_INTERN
void
pars_info_free(
/*===========*/
@@ -493,7 +410,6 @@ pars_info_free(
/****************************************************************//**
Add bound literal. */
-UNIV_INTERN
void
pars_info_add_literal(
/*==================*/
@@ -508,7 +424,6 @@ pars_info_add_literal(
/****************************************************************//**
Equivalent to pars_info_add_literal(info, name, str, strlen(str),
DATA_VARCHAR, DATA_ENGLISH). */
-UNIV_INTERN
void
pars_info_add_str_literal(
/*======================*/
@@ -518,7 +433,6 @@ pars_info_add_str_literal(
/********************************************************************
If the literal value already exists then it rebinds otherwise it
creates a new entry.*/
-UNIV_INTERN
void
pars_info_bind_literal(
/*===================*/
@@ -531,7 +445,6 @@ pars_info_bind_literal(
/********************************************************************
If the literal value already exists then it rebinds otherwise it
creates a new entry.*/
-UNIV_INTERN
void
pars_info_bind_varchar_literal(
/*===========================*/
@@ -548,7 +461,6 @@ pars_info_add_literal(info, name, buf, 4, DATA_INT, 0);
except that the buffer is dynamically allocated from the info struct's
heap. */
-UNIV_INTERN
void
pars_info_bind_int4_literal(
/*=======================*/
@@ -558,7 +470,6 @@ pars_info_bind_int4_literal(
/********************************************************************
If the literal value already exists then it rebinds otherwise it
creates a new entry. */
-UNIV_INTERN
void
pars_info_bind_int8_literal(
/*=======================*/
@@ -567,7 +478,6 @@ pars_info_bind_int8_literal(
const ib_uint64_t* val); /*!< in: value */
/****************************************************************//**
Add user function. */
-UNIV_INTERN
void
pars_info_bind_function(
/*===================*/
@@ -577,7 +487,6 @@ pars_info_bind_function(
void* arg); /*!< in: user-supplied argument */
/****************************************************************//**
Add bound id. */
-UNIV_INTERN
void
pars_info_bind_id(
/*=============*/
@@ -594,7 +503,6 @@ pars_info_add_literal(info, name, buf, 4, DATA_INT, 0);
except that the buffer is dynamically allocated from the info struct's
heap. */
-UNIV_INTERN
void
pars_info_add_int4_literal(
/*=======================*/
@@ -611,7 +519,6 @@ pars_info_add_literal(info, name, buf, 8, DATA_FIXBINARY, 0);
except that the buffer is dynamically allocated from the info struct's
heap. */
-UNIV_INTERN
void
pars_info_add_ull_literal(
/*======================*/
@@ -622,7 +529,6 @@ pars_info_add_ull_literal(
/****************************************************************//**
If the literal value already exists then it rebinds otherwise it
creates a new entry. */
-UNIV_INTERN
void
pars_info_bind_ull_literal(
/*=======================*/
@@ -633,7 +539,6 @@ pars_info_bind_ull_literal(
/****************************************************************//**
Add bound id. */
-UNIV_INTERN
void
pars_info_add_id(
/*=============*/
@@ -643,8 +548,7 @@ pars_info_add_id(
/****************************************************************//**
Get bound literal with the given name.
-@return bound literal, or NULL if not found */
-UNIV_INTERN
+@return bound literal, or NULL if not found */
pars_bound_lit_t*
pars_info_get_bound_lit(
/*====================*/
@@ -653,8 +557,7 @@ pars_info_get_bound_lit(
/****************************************************************//**
Get bound id with the given name.
-@return bound id, or NULL if not found */
-UNIV_INTERN
+@return bound id, or NULL if not found */
pars_bound_id_t*
pars_info_get_bound_id(
/*===================*/
@@ -663,7 +566,6 @@ pars_info_get_bound_id(
/******************************************************************//**
Release any resources used by the lexer. */
-UNIV_INTERN
void
pars_lexer_close(void);
/*==================*/
@@ -740,7 +642,6 @@ struct proc_node_t{
que_common_t common; /*!< type: QUE_NODE_PROC */
sym_node_t* proc_id; /*!< procedure name symbol in the symbol
table of this same procedure */
- sym_node_t* param_list; /*!< input and output parameters */
que_node_t* stat_list; /*!< statement list */
sym_tab_t* sym_tab; /*!< symbol table of this procedure */
};
@@ -815,13 +716,9 @@ struct col_assign_node_t{
#define PARS_FUNC_LOGICAL 2 /*!< AND, OR, NOT */
#define PARS_FUNC_CMP 3 /*!< comparison operators */
#define PARS_FUNC_PREDEFINED 4 /*!< TO_NUMBER, SUBSTR, ... */
-#define PARS_FUNC_AGGREGATE 5 /*!< COUNT, DISTINCT, SUM */
+#define PARS_FUNC_AGGREGATE 5 /*!< COUNT */
#define PARS_FUNC_OTHER 6 /*!< these are not real functions,
e.g., := */
/* @} */
-#ifndef UNIV_NONINL
-#include "pars0pars.ic"
-#endif
-
#endif
diff --git a/storage/innobase/include/pars0pars.ic b/storage/innobase/include/pars0pars.ic
deleted file mode 100644
index f788f4479bf..00000000000
--- a/storage/innobase/include/pars0pars.ic
+++ /dev/null
@@ -1,24 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1996, 2009, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/pars0pars.ic
-SQL parser
-
-Created 11/19/1996 Heikki Tuuri
-*******************************************************/
diff --git a/storage/innobase/include/pars0sym.h b/storage/innobase/include/pars0sym.h
index d423f84ab57..59f6cc315de 100644
--- a/storage/innobase/include/pars0sym.h
+++ b/storage/innobase/include/pars0sym.h
@@ -1,6 +1,7 @@
/*****************************************************************************
-Copyright (c) 1997, 2009, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1997, 2016, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2017, 2019, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -26,17 +27,13 @@ Created 12/15/1997 Heikki Tuuri
#ifndef pars0sym_h
#define pars0sym_h
-#include "univ.i"
#include "que0types.h"
-#include "usr0types.h"
-#include "dict0types.h"
#include "pars0types.h"
#include "row0types.h"
/******************************************************************//**
Creates a symbol table for a single stored procedure or query.
-@return own: symbol table */
-UNIV_INTERN
+@return own: symbol table */
sym_tab_t*
sym_tab_create(
/*===========*/
@@ -45,15 +42,13 @@ sym_tab_create(
Frees the memory allocated dynamically AFTER parsing phase for variables
etc. in the symbol table. Does not free the mem heap where the table was
originally created. Frees also SQL explicit cursor definitions. */
-UNIV_INTERN
void
sym_tab_free_private(
/*=================*/
sym_tab_t* sym_tab); /*!< in, own: symbol table */
/******************************************************************//**
Adds an integer literal to a symbol table.
-@return symbol table node */
-UNIV_INTERN
+@return symbol table node */
sym_node_t*
sym_tab_add_int_lit(
/*================*/
@@ -61,8 +56,7 @@ sym_tab_add_int_lit(
ulint val); /*!< in: integer value */
/******************************************************************//**
Adds an string literal to a symbol table.
-@return symbol table node */
-UNIV_INTERN
+@return symbol table node */
sym_node_t*
sym_tab_add_str_lit(
/*================*/
@@ -72,8 +66,7 @@ sym_tab_add_str_lit(
ulint len); /*!< in: string length */
/******************************************************************//**
Add a bound literal to a symbol table.
-@return symbol table node */
-UNIV_INTERN
+@return symbol table node */
sym_node_t*
sym_tab_add_bound_lit(
/*==================*/
@@ -82,7 +75,6 @@ sym_tab_add_bound_lit(
ulint* lit_type); /*!< out: type of literal (PARS_*_LIT) */
/**********************************************************************
Rebind literal to a node in the symbol table. */
-
sym_node_t*
sym_tab_rebind_lit(
/*===============*/
@@ -92,16 +84,14 @@ sym_tab_rebind_lit(
ulint length); /* in: length of data */
/******************************************************************//**
Adds an SQL null literal to a symbol table.
-@return symbol table node */
-UNIV_INTERN
+@return symbol table node */
sym_node_t*
sym_tab_add_null_lit(
/*=================*/
sym_tab_t* sym_tab); /*!< in: symbol table */
/******************************************************************//**
Adds an identifier to a symbol table.
-@return symbol table node */
-UNIV_INTERN
+@return symbol table node */
sym_node_t*
sym_tab_add_id(
/*===========*/
@@ -111,8 +101,7 @@ sym_tab_add_id(
/******************************************************************//**
Add a bound identifier to a symbol table.
-@return symbol table node */
-UNIV_INTERN
+@return symbol table node */
sym_node_t*
sym_tab_add_bound_id(
/*===========*/
@@ -235,7 +224,7 @@ struct sym_tab_t{
/*!< SQL string to parse */
size_t string_len;
/*!< SQL string length */
- int next_char_pos;
+ size_t next_char_pos;
/*!< position of the next character in
sql_string to give to the lexical
analyzer */
@@ -251,8 +240,4 @@ struct sym_tab_t{
allocate space */
};
-#ifndef UNIV_NONINL
-#include "pars0sym.ic"
-#endif
-
#endif
diff --git a/storage/innobase/include/pars0sym.ic b/storage/innobase/include/pars0sym.ic
deleted file mode 100644
index 6401a525cc5..00000000000
--- a/storage/innobase/include/pars0sym.ic
+++ /dev/null
@@ -1,24 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1997, 2009, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/pars0sym.ic
-SQL parser symbol table
-
-Created 12/15/1997 Heikki Tuuri
-*******************************************************/
diff --git a/storage/innobase/include/que0que.h b/storage/innobase/include/que0que.h
index 4ec24f4081b..8489551e64d 100644
--- a/storage/innobase/include/que0que.h
+++ b/storage/innobase/include/que0que.h
@@ -1,6 +1,7 @@
/*****************************************************************************
-Copyright (c) 1996, 2012, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2017, 2019, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -26,28 +27,20 @@ Created 5/27/1996 Heikki Tuuri
#ifndef que0que_h
#define que0que_h
-#include "univ.i"
#include "data0data.h"
-#include "dict0types.h"
#include "trx0trx.h"
#include "trx0roll.h"
#include "srv0srv.h"
-#include "usr0types.h"
#include "que0types.h"
#include "row0types.h"
#include "pars0types.h"
-/* If the following flag is set TRUE, the module will print trace info
-of SQL execution in the UNIV_SQL_DEBUG version */
-extern ibool que_trace_on;
-
/** Mutex protecting the query threads. */
extern ib_mutex_t que_thr_mutex;
/***********************************************************************//**
Creates a query graph fork node.
-@return own: fork node */
-UNIV_INTERN
+@return own: fork node */
que_fork_t*
que_fork_create(
/*============*/
@@ -79,26 +72,25 @@ que_node_set_parent(
/*================*/
que_node_t* node, /*!< in: graph node */
que_node_t* parent);/*!< in: parent */
-/***********************************************************************//**
-Creates a query graph thread node.
-@return own: query thread node */
-UNIV_INTERN
+/** Creates a query graph thread node.
+@param[in] parent parent node, i.e., a fork node
+@param[in] heap memory heap where created
+@param[in] prebuilt row prebuilt structure
+@return own: query thread node */
que_thr_t*
que_thr_create(
-/*===========*/
- que_fork_t* parent, /*!< in: parent node, i.e., a fork node */
- mem_heap_t* heap); /*!< in: memory heap where created */
+ que_fork_t* parent,
+ mem_heap_t* heap,
+ row_prebuilt_t* prebuilt);
/**********************************************************************//**
Frees a query graph, but not the heap where it was created. Does not free
explicit cursor declarations, they are freed in que_graph_free. */
-UNIV_INTERN
void
que_graph_free_recursive(
/*=====================*/
que_node_t* node); /*!< in: query graph node */
/**********************************************************************//**
Frees a query graph. */
-UNIV_INTERN
void
que_graph_free(
/*===========*/
@@ -111,8 +103,7 @@ que_graph_free(
Stops a query thread if graph or trx is in a state requiring it. The
conditions are tested in the order (1) graph, (2) trx. The lock_sys_t::mutex
has to be reserved.
-@return TRUE if stopped */
-UNIV_INTERN
+@return TRUE if stopped */
ibool
que_thr_stop(
/*=========*/
@@ -120,7 +111,6 @@ que_thr_stop(
/**********************************************************************//**
Moves a thread from another state to the QUE_THR_RUNNING state. Increments
the n_active_thrs counters of the query graph and transaction. */
-UNIV_INTERN
void
que_thr_move_to_run_state_for_mysql(
/*================================*/
@@ -129,7 +119,6 @@ que_thr_move_to_run_state_for_mysql(
/**********************************************************************//**
A patch for MySQL used to 'stop' a dummy query thread used in MySQL
select, when there is no error or lock wait. */
-UNIV_INTERN
void
que_thr_stop_for_mysql_no_error(
/*============================*/
@@ -140,14 +129,12 @@ A patch for MySQL used to 'stop' a dummy query thread used in MySQL. The
query thread is stopped and made inactive, except in the case where
it was put to the lock wait state in lock0lock.cc, but the lock has already
been granted or the transaction chosen as a victim in deadlock resolution. */
-UNIV_INTERN
void
que_thr_stop_for_mysql(
/*===================*/
que_thr_t* thr); /*!< in: query thread */
/**********************************************************************//**
Run a query thread. Handles lock waits. */
-UNIV_INTERN
void
que_run_threads(
/*============*/
@@ -157,8 +144,7 @@ Moves a suspended query thread to the QUE_THR_RUNNING state and release
a worker thread to execute it. This function should be used to end
the wait state of a query thread waiting for a lock or a stored procedure
completion.
-@return query thread instance of thread to wakeup or NULL */
-UNIV_INTERN
+@return query thread instance of thread to wakeup or NULL */
que_thr_t*
que_thr_end_lock_wait(
/*==================*/
@@ -172,7 +158,6 @@ is returned.
@return a query thread of the graph moved to QUE_THR_RUNNING state, or
NULL; the query thread should be executed by que_run_threads by the
caller */
-UNIV_INTERN
que_thr_t*
que_fork_start_command(
/*===================*/
@@ -200,7 +185,7 @@ UNIV_INLINE
ulint
que_node_get_type(
/*==============*/
- que_node_t* node); /*!< in: graph node */
+ const que_node_t* node); /*!< in: graph node */
/***********************************************************************//**
Gets pointer to the value data type field of a graph node. */
UNIV_INLINE
@@ -217,7 +202,7 @@ que_node_get_val(
que_node_t* node); /*!< in: graph node */
/***********************************************************************//**
Gets the value buffer size of a graph node.
-@return val buffer size, not defined if val.data == NULL in node */
+@return val buffer size, not defined if val.data == NULL in node */
UNIV_INLINE
ulint
que_node_get_val_buf_size(
@@ -240,7 +225,7 @@ que_node_get_next(
que_node_t* node); /*!< in: node in a list */
/*********************************************************************//**
Gets the parent node of a query graph node.
-@return parent node or NULL */
+@return parent node or NULL */
UNIV_INLINE
que_node_t*
que_node_get_parent(
@@ -249,15 +234,14 @@ que_node_get_parent(
/****************************************************************//**
Get the first containing loop node (e.g. while_node_t or for_node_t) for the
given node, or NULL if the node is not within a loop.
-@return containing loop node, or NULL. */
-UNIV_INTERN
+@return containing loop node, or NULL. */
que_node_t*
que_node_get_containing_loop_node(
/*==============================*/
que_node_t* node); /*!< in: node */
/*********************************************************************//**
Catenates a query graph node to a list of them, possible empty list.
-@return one-way list of nodes */
+@return one-way list of nodes */
UNIV_INLINE
que_node_t*
que_node_list_add_last(
@@ -274,7 +258,7 @@ que_node_list_get_last(
que_node_t* node_list); /* in: node list, or NULL */
/*********************************************************************//**
Gets a query graph node list length.
-@return length, for NULL list 0 */
+@return length, for NULL list 0 */
UNIV_INLINE
ulint
que_node_list_get_len(
@@ -293,7 +277,7 @@ que_thr_peek_stop(
que_thr_t* thr); /*!< in: query thread */
/***********************************************************************//**
Returns TRUE if the query graph is for a SELECT statement.
-@return TRUE if a select */
+@return TRUE if a select */
UNIV_INLINE
ibool
que_graph_is_select(
@@ -301,15 +285,13 @@ que_graph_is_select(
que_t* graph); /*!< in: graph */
/**********************************************************************//**
Prints info of an SQL query graph node. */
-UNIV_INTERN
void
que_node_print_info(
/*================*/
que_node_t* node); /*!< in: query graph node */
/*********************************************************************//**
Evaluate the given SQL
-@return error code or DB_SUCCESS */
-UNIV_INTERN
+@return error code or DB_SUCCESS */
dberr_t
que_eval_sql(
/*=========*/
@@ -325,26 +307,38 @@ Round robin scheduler.
@return a query thread of the graph moved to QUE_THR_RUNNING state, or
NULL; the query thread should be executed by que_run_threads by the
caller */
-UNIV_INTERN
que_thr_t*
que_fork_scheduler_round_robin(
/*===========================*/
que_fork_t* fork, /*!< in: a query fork */
que_thr_t* thr); /*!< in: current pos */
-/*********************************************************************//**
-Initialise the query sub-system. */
-UNIV_INTERN
-void
-que_init(void);
-/*==========*/
+/** Query thread states */
+enum que_thr_state_t {
+ QUE_THR_RUNNING,
+ QUE_THR_PROCEDURE_WAIT,
+ /** in selects this means that the thread is at the end of its
+ result set (or start, in case of a scroll cursor); in other
+ statements, this means the thread has done its task */
+ QUE_THR_COMPLETED,
+ QUE_THR_COMMAND_WAIT,
+ QUE_THR_LOCK_WAIT,
+ QUE_THR_SUSPENDED
+};
-/*********************************************************************//**
-Close the query sub-system. */
-UNIV_INTERN
-void
-que_close(void);
-/*===========*/
+/** Query thread lock states */
+enum que_thr_lock_t {
+ QUE_THR_LOCK_NOLOCK,
+ QUE_THR_LOCK_ROW,
+ QUE_THR_LOCK_TABLE
+};
+
+/** From where the cursor position is counted */
+enum que_cur_t {
+ QUE_CUR_NOT_DEFINED,
+ QUE_CUR_START,
+ QUE_CUR_END
+};
/* Query graph query thread node: the fields are protected by the
trx_t::mutex with the exceptions named below */
@@ -355,7 +349,7 @@ struct que_thr_t{
corruption */
que_node_t* child; /*!< graph child node */
que_t* graph; /*!< graph where this node belongs */
- ulint state; /*!< state of the query thread */
+ que_thr_state_t state; /*!< state of the query thread */
ibool is_active; /*!< TRUE if the thread has been set
to the run state in
que_thr_move_to_run_state, but not
@@ -389,6 +383,11 @@ struct que_thr_t{
ulint fk_cascade_depth; /*!< maximum cascading call depth
supported for foreign key constraint
related delete/updates */
+ row_prebuilt_t* prebuilt; /*!< prebuilt structure processed by
+ the query thread */
+
+ /** a slot of srv_sys.sys_threads, for DEBUG_SYNC in purge thread */
+ ut_d(srv_slot_t* thread_slot;)
};
#define QUE_THR_MAGIC_N 8476583
@@ -464,64 +463,6 @@ struct que_fork_t{
/* Flag which is ORed to control structure statement node types */
#define QUE_NODE_CONTROL_STAT 1024
-/* Query graph node types */
-#define QUE_NODE_LOCK 1
-#define QUE_NODE_INSERT 2
-#define QUE_NODE_UPDATE 4
-#define QUE_NODE_CURSOR 5
-#define QUE_NODE_SELECT 6
-#define QUE_NODE_AGGREGATE 7
-#define QUE_NODE_FORK 8
-#define QUE_NODE_THR 9
-#define QUE_NODE_UNDO 10
-#define QUE_NODE_COMMIT 11
-#define QUE_NODE_ROLLBACK 12
-#define QUE_NODE_PURGE 13
-#define QUE_NODE_CREATE_TABLE 14
-#define QUE_NODE_CREATE_INDEX 15
-#define QUE_NODE_SYMBOL 16
-#define QUE_NODE_RES_WORD 17
-#define QUE_NODE_FUNC 18
-#define QUE_NODE_ORDER 19
-#define QUE_NODE_PROC (20 + QUE_NODE_CONTROL_STAT)
-#define QUE_NODE_IF (21 + QUE_NODE_CONTROL_STAT)
-#define QUE_NODE_WHILE (22 + QUE_NODE_CONTROL_STAT)
-#define QUE_NODE_ASSIGNMENT 23
-#define QUE_NODE_FETCH 24
-#define QUE_NODE_OPEN 25
-#define QUE_NODE_COL_ASSIGNMENT 26
-#define QUE_NODE_FOR (27 + QUE_NODE_CONTROL_STAT)
-#define QUE_NODE_RETURN 28
-#define QUE_NODE_ROW_PRINTF 29
-#define QUE_NODE_ELSIF 30
-#define QUE_NODE_CALL 31
-#define QUE_NODE_EXIT 32
-
-/* Query thread states */
-#define QUE_THR_RUNNING 1
-#define QUE_THR_PROCEDURE_WAIT 2
-#define QUE_THR_COMPLETED 3 /* in selects this means that the
- thread is at the end of its result set
- (or start, in case of a scroll cursor);
- in other statements, this means the
- thread has done its task */
-#define QUE_THR_COMMAND_WAIT 4
-#define QUE_THR_LOCK_WAIT 5
-#define QUE_THR_SUSPENDED 7
-#define QUE_THR_ERROR 8
-
-/* Query thread lock states */
-#define QUE_THR_LOCK_NOLOCK 0
-#define QUE_THR_LOCK_ROW 1
-#define QUE_THR_LOCK_TABLE 2
-
-/* From where the cursor position is counted */
-#define QUE_CUR_NOT_DEFINED 1
-#define QUE_CUR_START 2
-#define QUE_CUR_END 3
-
-#ifndef UNIV_NONINL
#include "que0que.ic"
-#endif
#endif
diff --git a/storage/innobase/include/que0que.ic b/storage/innobase/include/que0que.ic
index 1775467781a..5b775820df7 100644
--- a/storage/innobase/include/que0que.ic
+++ b/storage/innobase/include/que0que.ic
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1996, 2010, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1996, 2013, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -23,8 +23,6 @@ Query graph
Created 5/27/1996 Heikki Tuuri
*******************************************************/
-#include "usr0sess.h"
-
/***********************************************************************//**
Gets the trx of a query thread. */
UNIV_INLINE
@@ -84,11 +82,9 @@ UNIV_INLINE
ulint
que_node_get_type(
/*==============*/
- que_node_t* node) /*!< in: graph node */
+ const que_node_t* node) /*!< in: graph node */
{
- ut_ad(node);
-
- return(((que_common_t*) node)->type);
+ return(reinterpret_cast<const que_common_t*>(node)->type);
}
/***********************************************************************//**
@@ -106,7 +102,7 @@ que_node_get_val(
/***********************************************************************//**
Gets the value buffer size of a graph node.
-@return val buffer size, not defined if val.data == NULL in node */
+@return val buffer size, not defined if val.data == NULL in node */
UNIV_INLINE
ulint
que_node_get_val_buf_size(
@@ -161,7 +157,7 @@ que_node_get_data_type(
/*********************************************************************//**
Catenates a query graph node to a list of them, possible empty list.
-@return one-way list of nodes */
+@return one-way list of nodes */
UNIV_INLINE
que_node_t*
que_node_list_add_last(
@@ -216,7 +212,7 @@ que_node_list_get_last(
}
/*********************************************************************//**
Gets the next list node in a list of query graph nodes.
-@return next node in a list of nodes */
+@return next node in a list of nodes */
UNIV_INLINE
que_node_t*
que_node_get_next(
@@ -228,7 +224,7 @@ que_node_get_next(
/*********************************************************************//**
Gets a query graph node list length.
-@return length, for NULL list 0 */
+@return length, for NULL list 0 */
UNIV_INLINE
ulint
que_node_list_get_len(
@@ -251,7 +247,7 @@ que_node_list_get_len(
/*********************************************************************//**
Gets the parent node of a query graph node.
-@return parent node or NULL */
+@return parent node or NULL */
UNIV_INLINE
que_node_t*
que_node_get_parent(
@@ -292,7 +288,7 @@ que_thr_peek_stop(
/***********************************************************************//**
Returns TRUE if the query graph is for a SELECT statement.
-@return TRUE if a select */
+@return TRUE if a select */
UNIV_INLINE
ibool
que_graph_is_select(
@@ -307,3 +303,4 @@ que_graph_is_select(
return(FALSE);
}
+
diff --git a/storage/innobase/include/que0types.h b/storage/innobase/include/que0types.h
index 025cf818ab3..d9005095d3c 100644
--- a/storage/innobase/include/que0types.h
+++ b/storage/innobase/include/que0types.h
@@ -1,6 +1,7 @@
/*****************************************************************************
Copyright (c) 1996, 2009, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2019, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -27,7 +28,6 @@ Created 5/27/1996 Heikki Tuuri
#define que0types_h
#include "data0data.h"
-#include "dict0types.h"
/* Pseudotype for all graph nodes */
typedef void que_node_t;
@@ -37,6 +37,39 @@ typedef struct que_fork_t que_t;
struct que_thr_t;
+/* Query graph node types */
+#define QUE_NODE_LOCK 1
+#define QUE_NODE_INSERT 2
+#define QUE_NODE_UPDATE 4
+#define QUE_NODE_CURSOR 5
+#define QUE_NODE_SELECT 6
+#define QUE_NODE_AGGREGATE 7
+#define QUE_NODE_FORK 8
+#define QUE_NODE_THR 9
+#define QUE_NODE_UNDO 10
+#define QUE_NODE_COMMIT 11
+#define QUE_NODE_ROLLBACK 12
+#define QUE_NODE_PURGE 13
+#define QUE_NODE_CREATE_TABLE 14
+#define QUE_NODE_CREATE_INDEX 15
+#define QUE_NODE_SYMBOL 16
+#define QUE_NODE_RES_WORD 17
+#define QUE_NODE_FUNC 18
+#define QUE_NODE_ORDER 19
+#define QUE_NODE_PROC (20 + QUE_NODE_CONTROL_STAT)
+#define QUE_NODE_IF (21 + QUE_NODE_CONTROL_STAT)
+#define QUE_NODE_WHILE (22 + QUE_NODE_CONTROL_STAT)
+#define QUE_NODE_ASSIGNMENT 23
+#define QUE_NODE_FETCH 24
+#define QUE_NODE_OPEN 25
+#define QUE_NODE_COL_ASSIGNMENT 26
+#define QUE_NODE_FOR (27 + QUE_NODE_CONTROL_STAT)
+#define QUE_NODE_RETURN 28
+#define QUE_NODE_ROW_PRINTF 29
+#define QUE_NODE_ELSIF 30
+#define QUE_NODE_CALL 31
+#define QUE_NODE_EXIT 32
+
/* Common struct at the beginning of each query graph node; the name of this
substruct must be 'common' */
@@ -52,6 +85,12 @@ struct que_common_t{
symbol node or a function node, then we
have to free the data field in val
explicitly */
+
+ /** Constructor */
+ que_common_t(ulint type, que_node_t* parent) :
+ type(type), parent(parent), brother(NULL),
+ val(), val_buf_size(0)
+ {}
};
#endif
diff --git a/storage/innobase/include/read0read.h b/storage/innobase/include/read0read.h
index fb30719b652..359db1d8c39 100644
--- a/storage/innobase/include/read0read.h
+++ b/storage/innobase/include/read0read.h
@@ -1,6 +1,7 @@
/*****************************************************************************
-Copyright (c) 1997, 2016, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1997, 2013, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2019, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -26,168 +27,98 @@ Created 2/16/1997 Heikki Tuuri
#ifndef read0read_h
#define read0read_h
-#include "univ.i"
-
-
-#include "ut0byte.h"
-#include "ut0lst.h"
-#include "trx0trx.h"
#include "read0types.h"
-/*********************************************************************//**
-Opens a read view where exactly the transactions serialized before this
-point in time are seen in the view.
-@return own: read view struct */
-UNIV_INTERN
-read_view_t*
-read_view_open_now(
-/*===============*/
- trx_id_t cr_trx_id, /*!< in: trx_id of creating
- transaction, or 0 used in purge */
- mem_heap_t* heap); /*!< in: memory heap from which
- allocated */
-/*********************************************************************//**
-Makes a copy of the oldest existing read view, or opens a new. The view
-must be closed with ..._close.
-@return own: read view struct */
-UNIV_INTERN
-read_view_t*
-read_view_purge_open(
-/*=================*/
- mem_heap_t* heap); /*!< in: memory heap from which
- allocated */
-/*********************************************************************//**
-Remove a read view from the trx_sys->view_list. */
-UNIV_INLINE
-void
-read_view_remove(
-/*=============*/
- read_view_t* view, /*!< in: read view, can be 0 */
- bool own_mutex); /*!< in: true if caller owns the
- trx_sys_t::mutex */
-/*********************************************************************//**
-Closes a consistent read view for MySQL. This function is called at an SQL
-statement end if the trx isolation level is <= TRX_ISO_READ_COMMITTED. */
-UNIV_INTERN
-void
-read_view_close_for_mysql(
-/*======================*/
- trx_t* trx); /*!< in: trx which has a read view */
-/*********************************************************************//**
-Checks if a read view sees the specified transaction.
-@return true if sees */
-UNIV_INLINE
-bool
-read_view_sees_trx_id(
-/*==================*/
- const read_view_t* view, /*!< in: read view */
- trx_id_t trx_id) /*!< in: trx id */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-/*********************************************************************//**
-Prints a read view to stderr. */
-UNIV_INTERN
-void
-read_view_print(
-/*============*/
- const read_view_t* view); /*!< in: read view */
-/*********************************************************************//**
-Create a consistent cursor view for mysql to be used in cursors. In this
-consistent read view modifications done by the creating transaction or future
-transactions are not visible. */
-UNIV_INTERN
-cursor_view_t*
-read_cursor_view_create_for_mysql(
-/*==============================*/
- trx_t* cr_trx);/*!< in: trx where cursor view is created */
-/*********************************************************************//**
-Close a given consistent cursor view for mysql and restore global read view
-back to a transaction read view. */
-UNIV_INTERN
-void
-read_cursor_view_close_for_mysql(
-/*=============================*/
- trx_t* trx, /*!< in: trx */
- cursor_view_t* curview); /*!< in: cursor view to be closed */
-/*********************************************************************//**
-This function sets a given consistent cursor view to a transaction
-read view if given consistent cursor view is not NULL. Otherwise, function
-restores a global read view to a transaction read view. */
-UNIV_INTERN
-void
-read_cursor_set_for_mysql(
-/*======================*/
- trx_t* trx, /*!< in: transaction where cursor is set */
- cursor_view_t* curview);/*!< in: consistent cursor view to be set */
-
-/** Read view lists the trx ids of those transactions for which a consistent
-read should not see the modifications to the database. */
-
-struct read_view_t{
- ulint type; /*!< VIEW_NORMAL, VIEW_HIGH_GRANULARITY */
- undo_no_t undo_no;/*!< 0 or if type is
- VIEW_HIGH_GRANULARITY
- transaction undo_no when this high-granularity
- consistent read view was created */
- trx_id_t low_limit_no;
- /*!< The view does not need to see the undo
- logs for transactions whose transaction number
- is strictly smaller (<) than this value: they
- can be removed in purge if not needed by other
- views */
- trx_id_t low_limit_id;
- /*!< The read should not see any transaction
- with trx id >= this value. In other words,
- this is the "high water mark". */
- trx_id_t up_limit_id;
- /*!< The read should see all trx ids which
- are strictly smaller (<) than this value.
- In other words,
- this is the "low water mark". */
- ulint n_trx_ids;
- /*!< Number of cells in the trx_ids array */
- trx_id_t* trx_ids;/*!< Additional trx ids which the read should
- not see: typically, these are the read-write
- active transactions at the time when the read
- is serialized, except the reading transaction
- itself; the trx ids in this array are in a
- descending order. These trx_ids should be
- between the "low" and "high" water marks,
- that is, up_limit_id and low_limit_id. */
- trx_id_t creator_trx_id;
- /*!< trx id of creating transaction, or
- 0 used in purge */
- UT_LIST_NODE_T(read_view_t) view_list;
- /*!< List of read views in trx_sys */
+#include <algorithm>
+
+/** The MVCC read view manager */
+class MVCC {
+public:
+ /** Constructor
+ @param size Number of views to pre-allocate */
+ explicit MVCC(ulint size);
+
+ /** Destructor.
+ Free all the views in the m_free list */
+ ~MVCC();
+
+ /**
+ Allocate and create a view.
+ @param view view owned by this class created for the
+ caller. Must be freed by calling close()
+ @param trx transaction creating the view */
+ void view_open(ReadView*& view, trx_t* trx);
+
+ /**
+ Close a view created by view_open().
+ @param view view allocated by view_open()
+ @param own_mutex whether the caller owns trx_sys_t::mutex */
+ void view_close(ReadView*& view, bool own_mutex);
+
+ /**
+ Release a view that is inactive but not closed. Caller must own
+ the trx_sys_t::mutex.
+ @param view View to release */
+ void view_release(ReadView*& view);
+
+ /** Clones the oldest view and stores it in view. No need to
+ call view_close(). The caller owns the view that is passed in.
+ It will also move the closed views from the m_views list to the
+ m_free list. This function is called by Purge to create it view.
+ @param view Preallocated view, owned by the caller */
+ void clone_oldest_view(ReadView* view);
+
+ /**
+ @return the number of active views */
+ ulint size() const;
+
+ /**
+ @return true if the view is active and valid */
+ static bool is_view_active(ReadView* view)
+ {
+ ut_a(view != reinterpret_cast<ReadView*>(0x1));
+
+ return(view != NULL && !(intptr_t(view) & 0x1));
+ }
+
+ /**
+ Set the view creator transaction id. Note: This shouldbe set only
+ for views created by RW transactions. */
+ static void set_view_creator_trx_id(ReadView* view, trx_id_t id);
+
+private:
+
+ /**
+ Validates a read view list. */
+ bool validate() const;
+
+ /**
+ Find a free view from the active list, if none found then allocate
+ a new view. This function will also attempt to move delete marked
+ views from the active list to the freed list.
+ @return a view to use */
+ inline ReadView* get_view();
+
+ /**
+ Get the oldest view in the system. It will also move the delete
+ marked read views from the views list to the freed list.
+ @return oldest view if found or NULL */
+ inline ReadView* get_oldest_view() const;
+
+private:
+ // Prevent copying
+ MVCC(const MVCC&);
+ MVCC& operator=(const MVCC&);
+
+private:
+ typedef UT_LIST_BASE_NODE_T(ReadView) view_list_t;
+
+ /** Free views ready for reuse. */
+ view_list_t m_free;
+
+ /** Active and closed views, the closed views will have the
+ creator trx id set to TRX_ID_MAX */
+ view_list_t m_views;
};
-/** Read view types @{ */
-#define VIEW_NORMAL 1 /*!< Normal consistent read view
- where transaction does not see changes
- made by active transactions except
- creating transaction. */
-#define VIEW_HIGH_GRANULARITY 2 /*!< High-granularity read view where
- transaction does not see changes
- made by active transactions and own
- changes after a point in time when this
- read view was created. */
-/* @} */
-
-/** Implement InnoDB framework to support consistent read views in
-cursors. This struct holds both heap where consistent read view
-is allocated and pointer to a read view. */
-
-struct cursor_view_t{
- mem_heap_t* heap;
- /*!< Memory heap for the cursor view */
- read_view_t* read_view;
- /*!< Consistent read view of the cursor*/
- ulint n_mysql_tables_in_use;
- /*!< number of Innobase tables used in the
- processing of this cursor */
-};
-
-#ifndef UNIV_NONINL
-#include "read0read.ic"
-#endif
-
-#endif
+#endif /* read0read_h */
diff --git a/storage/innobase/include/read0read.ic b/storage/innobase/include/read0read.ic
deleted file mode 100644
index ed2b2dae388..00000000000
--- a/storage/innobase/include/read0read.ic
+++ /dev/null
@@ -1,148 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1997, 2012, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/read0read.ic
-Cursor read
-
-Created 2/16/1997 Heikki Tuuri
-*******************************************************/
-
-#include "trx0sys.h"
-
-#ifdef UNIV_DEBUG
-/*********************************************************************//**
-Validates a read view object. */
-static
-bool
-read_view_validate(
-/*===============*/
- const read_view_t* view) /*!< in: view to validate */
-{
- ut_ad(mutex_own(&trx_sys->mutex));
-
- /* Check that the view->trx_ids array is in descending order. */
- for (ulint i = 1; i < view->n_trx_ids; ++i) {
-
- ut_a(view->trx_ids[i] < view->trx_ids[i - 1]);
- }
-
- return(true);
-}
-
-/** Functor to validate the view list. */
-struct ViewCheck {
-
- ViewCheck() : m_prev_view(0) { }
-
- void operator()(const read_view_t* view)
- {
- ut_a(m_prev_view == NULL
- || m_prev_view->low_limit_no >= view->low_limit_no);
-
- m_prev_view = view;
- }
-
- const read_view_t* m_prev_view;
-};
-
-/*********************************************************************//**
-Validates a read view list. */
-static
-bool
-read_view_list_validate(void)
-/*=========================*/
-{
- ut_ad(mutex_own(&trx_sys->mutex));
-
- ut_list_map(trx_sys->view_list, &read_view_t::view_list, ViewCheck());
-
- return(true);
-}
-#endif /* UNIV_DEBUG */
-
-/*********************************************************************//**
-Checks if a read view sees the specified transaction.
-@return true if sees */
-UNIV_INLINE
-bool
-read_view_sees_trx_id(
-/*==================*/
- const read_view_t* view, /*!< in: read view */
- trx_id_t trx_id) /*!< in: trx id */
-{
- if (trx_id < view->up_limit_id) {
-
- return(true);
- } else if (trx_id >= view->low_limit_id) {
-
- return(false);
- } else {
- ulint lower = 0;
- ulint upper = view->n_trx_ids - 1;
-
- ut_a(view->n_trx_ids > 0);
-
- do {
- ulint mid = (lower + upper) >> 1;
- trx_id_t mid_id = view->trx_ids[mid];
-
- if (mid_id == trx_id) {
- return(FALSE);
- } else if (mid_id < trx_id) {
- if (mid > 0) {
- upper = mid - 1;
- } else {
- break;
- }
- } else {
- lower = mid + 1;
- }
- } while (lower <= upper);
- }
-
- return(true);
-}
-
-/*********************************************************************//**
-Remove a read view from the trx_sys->view_list. */
-UNIV_INLINE
-void
-read_view_remove(
-/*=============*/
- read_view_t* view, /*!< in: read view, can be 0 */
- bool own_mutex) /*!< in: true if caller owns the
- trx_sys_t::mutex */
-{
- if (view != 0) {
- if (!own_mutex) {
- mutex_enter(&trx_sys->mutex);
- }
-
- ut_ad(read_view_validate(view));
-
- UT_LIST_REMOVE(view_list, trx_sys->view_list, view);
-
- ut_ad(read_view_list_validate());
-
- if (!own_mutex) {
- mutex_exit(&trx_sys->mutex);
- }
- }
-}
-
diff --git a/storage/innobase/include/read0types.h b/storage/innobase/include/read0types.h
index 1ad501d931d..520b0324310 100644
--- a/storage/innobase/include/read0types.h
+++ b/storage/innobase/include/read0types.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1997, 2009, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1997, 2016, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -26,7 +26,305 @@ Created 2/16/1997 Heikki Tuuri
#ifndef read0types_h
#define read0types_h
-struct read_view_t;
-struct cursor_view_t;
+#include "dict0mem.h"
+#include "trx0types.h"
+#include <algorithm>
+
+// Friend declaration
+class MVCC;
+
+/** Read view lists the trx ids of those transactions for which a consistent
+read should not see the modifications to the database. */
+
+class ReadView {
+ /** This is similar to a std::vector but it is not a drop
+ in replacement. It is specific to ReadView. */
+ class ids_t {
+ typedef trx_ids_t::value_type value_type;
+
+ /**
+ Constructor */
+ ids_t() : m_ptr(), m_size(), m_reserved() { }
+
+ /**
+ Destructor */
+ ~ids_t() { UT_DELETE_ARRAY(m_ptr); }
+
+ /**
+ Try and increase the size of the array. Old elements are
+ copied across. It is a no-op if n is < current size.
+
+ @param n Make space for n elements */
+ void reserve(ulint n);
+
+ /**
+ Resize the array, sets the current element count.
+ @param n new size of the array, in elements */
+ void resize(ulint n)
+ {
+ ut_ad(n <= capacity());
+
+ m_size = n;
+ }
+
+ /**
+ Reset the size to 0 */
+ void clear() { resize(0); }
+
+ /**
+ @return the capacity of the array in elements */
+ ulint capacity() const { return(m_reserved); }
+
+ /**
+ Copy and overwrite the current array contents
+
+ @param start Source array
+ @param end Pointer to end of array */
+ void assign(const value_type* start, const value_type* end);
+
+ /**
+ Insert the value in the correct slot, preserving the order.
+ Doesn't check for duplicates. */
+ void insert(value_type value);
+
+ /**
+ @return the value of the first element in the array */
+ value_type front() const
+ {
+ ut_ad(!empty());
+
+ return(m_ptr[0]);
+ }
+
+ /**
+ @return the value of the last element in the array */
+ value_type back() const
+ {
+ ut_ad(!empty());
+
+ return(m_ptr[m_size - 1]);
+ }
+
+ /**
+ Append a value to the array.
+ @param value the value to append */
+ void push_back(value_type value);
+
+ /**
+ @return a pointer to the start of the array */
+ trx_id_t* data() { return(m_ptr); };
+
+ /**
+ @return a const pointer to the start of the array */
+ const trx_id_t* data() const { return(m_ptr); };
+
+ /**
+ @return the number of elements in the array */
+ ulint size() const { return(m_size); }
+
+ /**
+ @return true if size() == 0 */
+ bool empty() const { return(size() == 0); }
+
+ private:
+ // Prevent copying
+ ids_t(const ids_t&);
+ ids_t& operator=(const ids_t&);
+
+ private:
+ /** Memory for the array */
+ value_type* m_ptr;
+
+ /** Number of active elements in the array */
+ ulint m_size;
+
+ /** Size of m_ptr in elements */
+ ulint m_reserved;
+
+ friend class ReadView;
+ };
+public:
+ ReadView();
+ ~ReadView();
+ /** Check whether transaction id is valid.
+ @param[in] id transaction id to check
+ @param[in] name table name */
+ static void check_trx_id_sanity(
+ trx_id_t id,
+ const table_name_t& name);
+
+ /** Check whether the changes by id are visible.
+ @param[in] id transaction id to check against the view
+ @param[in] name table name
+ @return whether the view sees the modifications of id. */
+ bool changes_visible(
+ trx_id_t id,
+ const table_name_t& name) const
+ MY_ATTRIBUTE((warn_unused_result))
+ {
+ ut_ad(id > 0);
+
+ if (id < m_up_limit_id || id == m_creator_trx_id) {
+
+ return(true);
+ }
+
+ check_trx_id_sanity(id, name);
+
+ if (id >= m_low_limit_id) {
+
+ return(false);
+
+ } else if (m_ids.empty()) {
+
+ return(true);
+ }
+
+ const ids_t::value_type* p = m_ids.data();
+
+ return(!std::binary_search(p, p + m_ids.size(), id));
+ }
+
+ /**
+ @param id transaction to check
+ @return true if view sees transaction id */
+ bool sees(trx_id_t id) const
+ {
+ return(id < m_up_limit_id);
+ }
+
+ /**
+ Mark the view as closed */
+ void close()
+ {
+ ut_ad(m_creator_trx_id != TRX_ID_MAX);
+ m_creator_trx_id = TRX_ID_MAX;
+ }
+
+ /**
+ @return true if the view is closed */
+ bool is_closed() const
+ {
+ return(m_closed);
+ }
+
+ /**
+ Write the limits to the file.
+ @param file file to write to */
+ void print_limits(FILE* file) const
+ {
+ fprintf(file,
+ "Trx read view will not see trx with"
+ " id >= " TRX_ID_FMT ", sees < " TRX_ID_FMT "\n",
+ m_low_limit_id, m_up_limit_id);
+ }
+
+ /**
+ @return the low limit no */
+ trx_id_t low_limit_no() const
+ {
+ return(m_low_limit_no);
+ }
+
+ /**
+ @return the low limit id */
+ trx_id_t low_limit_id() const
+ {
+ return(m_low_limit_id);
+ }
+
+ /**
+ @return true if there are no transaction ids in the snapshot */
+ bool empty() const
+ {
+ return(m_ids.empty());
+ }
+
+#ifdef UNIV_DEBUG
+ /**
+ @param rhs view to compare with
+ @return truen if this view is less than or equal rhs */
+ bool le(const ReadView* rhs) const
+ {
+ return(m_low_limit_no <= rhs->m_low_limit_no);
+ }
+
+ trx_id_t up_limit_id() const
+ {
+ return(m_up_limit_id);
+ }
+#endif /* UNIV_DEBUG */
+private:
+ /**
+ Copy the transaction ids from the source vector */
+ inline void copy_trx_ids(const trx_ids_t& trx_ids);
+
+ /**
+ Opens a read view where exactly the transactions serialized before this
+ point in time are seen in the view.
+ @param id Creator transaction id */
+ inline void prepare(trx_id_t id);
+
+ /**
+ Complete the read view creation */
+ inline void complete();
+
+ /**
+ Copy state from another view. Must call copy_complete() to finish.
+ @param other view to copy from */
+ inline void copy_prepare(const ReadView& other);
+
+ /**
+ Complete the copy, insert the creator transaction id into the
+ m_trx_ids too and adjust the m_up_limit_id *, if required */
+ inline void copy_complete();
+
+ /**
+ Set the creator transaction id, existing id must be 0 */
+ void creator_trx_id(trx_id_t id)
+ {
+ ut_ad(m_creator_trx_id == 0);
+ m_creator_trx_id = id;
+ }
+
+ friend class MVCC;
+
+private:
+ // Disable copying
+ ReadView(const ReadView&);
+ ReadView& operator=(const ReadView&);
+
+private:
+ /** The read should not see any transaction with trx id >= this
+ value. In other words, this is the "high water mark". */
+ trx_id_t m_low_limit_id;
+
+ /** The read should see all trx ids which are strictly
+ smaller (<) than this value. In other words, this is the
+ low water mark". */
+ trx_id_t m_up_limit_id;
+
+ /** trx id of creating transaction, set to TRX_ID_MAX for free
+ views. */
+ trx_id_t m_creator_trx_id;
+
+ /** Set of RW transactions that was active when this snapshot
+ was taken */
+ ids_t m_ids;
+
+ /** The view does not need to see the undo logs for transactions
+ whose transaction number is strictly smaller (<) than this value:
+ they can be removed in purge if not needed by other views */
+ trx_id_t m_low_limit_no;
+
+ /** AC-NL-RO transaction view that has been "closed". */
+ bool m_closed;
+
+ typedef UT_LIST_NODE_T(ReadView) node_t;
+
+ /** List of read views in trx_sys */
+ byte pad1[64 - sizeof(node_t)];
+ node_t m_view_list;
+};
#endif
diff --git a/storage/innobase/include/rem0cmp.h b/storage/innobase/include/rem0cmp.h
index 828e1175e6e..af1b145b0d9 100644
--- a/storage/innobase/include/rem0cmp.h
+++ b/storage/innobase/include/rem0cmp.h
@@ -1,6 +1,7 @@
/*****************************************************************************
Copyright (c) 1994, 2016, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2017, 2020, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -26,16 +27,14 @@ Created 7/1/1994 Heikki Tuuri
#ifndef rem0cmp_h
#define rem0cmp_h
-#include "univ.i"
#include "data0data.h"
#include "data0type.h"
-#include "dict0dict.h"
-#include "rem0rec.h"
+#include "rem0types.h"
+#include "page0types.h"
/*************************************************************//**
Returns TRUE if two columns are equal for comparison purposes.
-@return TRUE if the columns are considered equal in comparisons */
-UNIV_INTERN
+@return TRUE if the columns are considered equal in comparisons */
ibool
cmp_cols_are_equal(
/*===============*/
@@ -43,259 +42,195 @@ cmp_cols_are_equal(
const dict_col_t* col2, /*!< in: column 2 */
ibool check_charsets);
/*!< in: whether to check charsets */
-/*************************************************************//**
-This function is used to compare two data fields for which we know the
-data type.
-@return 1, 0, -1, if data1 is greater, equal, less than data2, respectively */
-UNIV_INLINE
+/** Compare two data fields.
+@param[in] mtype main type
+@param[in] prtype precise type
+@param[in] data1 data field
+@param[in] len1 length of data1 in bytes, or UNIV_SQL_NULL
+@param[in] data2 data field
+@param[in] len2 length of data2 in bytes, or UNIV_SQL_NULL
+@return the comparison result of data1 and data2
+@retval 0 if data1 is equal to data2
+@retval negative if data1 is less than data2
+@retval positive if data1 is greater than data2 */
int
cmp_data_data(
-/*==========*/
- ulint mtype, /*!< in: main type */
- ulint prtype, /*!< in: precise type */
- const byte* data1, /*!< in: data field (== a pointer to a memory
- buffer) */
- ulint len1, /*!< in: data field length or UNIV_SQL_NULL */
- const byte* data2, /*!< in: data field (== a pointer to a memory
- buffer) */
- ulint len2); /*!< in: data field length or UNIV_SQL_NULL */
-/*************************************************************//**
-This function is used to compare two data fields for which we know the
-data type.
-@return 1, 0, -1, if data1 is greater, equal, less than data2, respectively */
-UNIV_INTERN
-int
-cmp_data_data_slow(
-/*===============*/
- ulint mtype, /*!< in: main type */
- ulint prtype, /*!< in: precise type */
- const byte* data1, /*!< in: data field (== a pointer to a memory
- buffer) */
- ulint len1, /*!< in: data field length or UNIV_SQL_NULL */
- const byte* data2, /*!< in: data field (== a pointer to a memory
- buffer) */
- ulint len2); /*!< in: data field length or UNIV_SQL_NULL */
+ ulint mtype,
+ ulint prtype,
+ const byte* data1,
+ ulint len1,
+ const byte* data2,
+ ulint len2)
+ MY_ATTRIBUTE((warn_unused_result));
-/*****************************************************************
-This function is used to compare two data fields for which we know the
-data type to be VARCHAR.
-@return 1, 0, -1, if lhs is greater, equal, less than rhs, respectively */
-UNIV_INTERN
-int
-cmp_data_data_slow_varchar(
-/*=======================*/
- const byte* lhs, /* in: data field (== a pointer to a memory
- buffer) */
- ulint lhs_len,/* in: data field length or UNIV_SQL_NULL */
- const byte* rhs, /* in: data field (== a pointer to a memory
- buffer) */
- ulint rhs_len);/* in: data field length or UNIV_SQL_NULL */
-/*****************************************************************
-This function is used to compare two varchar/char fields. The comparison
-is for the LIKE operator.
-@return 1, 0, -1, if lhs is greater, equal, less than rhs, respectively */
-UNIV_INTERN
-int
-cmp_data_data_slow_like_prefix(
-/*===========================*/
- const byte* data1, /* in: data field (== a pointer to a memory
- buffer) */
- ulint len1, /* in: data field length or UNIV_SQL_NULL */
- const byte* data2, /* in: data field (== a pointer to a memory
- buffer) */
- ulint len2); /* in: data field length or UNIV_SQL_NULL */
-/*****************************************************************
-This function is used to compare two varchar/char fields. The comparison
-is for the LIKE operator.
-@return 1, 0, -1, if data1 is greater, equal, less than data2, respectively */
-UNIV_INTERN
-int
-cmp_data_data_slow_like_suffix(
-/*===========================*/
- const byte* data1, /* in: data field (== a pointer to a memory
- buffer) */
- ulint len1, /* in: data field length or UNIV_SQL_NULL */
- const byte* data2, /* in: data field (== a pointer to a memory
- buffer) */
- ulint len2); /* in: data field length or UNIV_SQL_NULL */
-/*****************************************************************
-This function is used to compare two varchar/char fields. The comparison
-is for the LIKE operator.
-@return 1, 0, -1, if data1 is greater, equal, less than data2, respectively */
-UNIV_INTERN
-int
-cmp_data_data_slow_like_substr(
-/*===========================*/
- const byte* data1, /* in: data field (== a pointer to a memory
- buffer) */
- ulint len1, /* in: data field length or UNIV_SQL_NULL */
- const byte* data2, /* in: data field (== a pointer to a memory
- buffer) */
- ulint len2); /* in: data field length or UNIV_SQL_NULL */
-/*************************************************************//**
-This function is used to compare two dfields where at least the first
-has its data type field set.
-@return 1, 0, -1, if dfield1 is greater, equal, less than dfield2,
-respectively */
+/** Compare two data fields.
+@param[in] dfield1 data field; must have type field set
+@param[in] dfield2 data field
+@return the comparison result of dfield1 and dfield2
+@retval 0 if dfield1 is equal to dfield2
+@retval negative if dfield1 is less than dfield2
+@retval positive if dfield1 is greater than dfield2 */
UNIV_INLINE
int
cmp_dfield_dfield(
/*==============*/
const dfield_t* dfield1,/*!< in: data field; must have type field set */
const dfield_t* dfield2);/*!< in: data field */
-/*************************************************************//**
-This function is used to compare a data tuple to a physical record.
-Only dtuple->n_fields_cmp first fields are taken into account for
-the data tuple! If we denote by n = n_fields_cmp, then rec must
-have either m >= n fields, or it must differ from dtuple in some of
-the m fields rec has. If rec has an externally stored field we do not
-compare it but return with value 0 if such a comparison should be
-made.
-@return 1, 0, -1, if dtuple is greater, equal, less than rec,
-respectively, when only the common first fields are compared, or until
-the first externally stored field in rec */
-UNIV_INTERN
+
+
+/** Compare a GIS data tuple to a physical record.
+@param[in] dtuple data tuple
+@param[in] rec R-tree record
+@param[in] offsets rec_get_offsets(rec)
+@param[in] mode compare mode
+@retval negative if dtuple is less than rec */
+int
+cmp_dtuple_rec_with_gis(
+/*====================*/
+ const dtuple_t* dtuple,
+ const rec_t* rec,
+ const offset_t* offsets,
+ page_cur_mode_t mode)
+ MY_ATTRIBUTE((nonnull));
+
+/** Compare a GIS data tuple to a physical record in rtree non-leaf node.
+We need to check the page number field, since we don't store pk field in
+rtree non-leaf node.
+@param[in] dtuple data tuple
+@param[in] rec R-tree record
+@param[in] offsets rec_get_offsets(rec)
+@param[in] mode compare mode
+@retval negative if dtuple is less than rec */
+int
+cmp_dtuple_rec_with_gis_internal(
+ const dtuple_t* dtuple,
+ const rec_t* rec,
+ const offset_t* offsets);
+
+/** Compare a data tuple to a physical record.
+@param[in] dtuple data tuple
+@param[in] rec B-tree record
+@param[in] offsets rec_get_offsets(rec)
+@param[in] n_cmp number of fields to compare
+@param[in,out] matched_fields number of completely matched fields
+@return the comparison result of dtuple and rec
+@retval 0 if dtuple is equal to rec
+@retval negative if dtuple is less than rec
+@retval positive if dtuple is greater than rec */
int
cmp_dtuple_rec_with_match_low(
-/*==========================*/
- const dtuple_t* dtuple, /*!< in: data tuple */
- const rec_t* rec, /*!< in: physical record which differs from
- dtuple in some of the common fields, or which
- has an equal number or more fields than
- dtuple */
- const ulint* offsets,/*!< in: array returned by rec_get_offsets() */
- ulint n_cmp, /*!< in: number of fields to compare */
- ulint* matched_fields,
- /*!< in/out: number of already completely
- matched fields; when function returns,
- contains the value for current comparison */
- ulint* matched_bytes)
- /*!< in/out: number of already matched
- bytes within the first field not completely
- matched; when function returns, contains the
- value for current comparison */
+ const dtuple_t* dtuple,
+ const rec_t* rec,
+ const offset_t* offsets,
+ ulint n_cmp,
+ ulint* matched_fields)
MY_ATTRIBUTE((nonnull));
-#define cmp_dtuple_rec_with_match(tuple,rec,offsets,fields,bytes) \
+#define cmp_dtuple_rec_with_match(tuple,rec,offsets,fields) \
cmp_dtuple_rec_with_match_low( \
- tuple,rec,offsets,dtuple_get_n_fields_cmp(tuple),fields,bytes)
-/**************************************************************//**
-Compares a data tuple to a physical record.
+ tuple,rec,offsets,dtuple_get_n_fields_cmp(tuple),fields)
+/** Compare a data tuple to a physical record.
+@param[in] dtuple data tuple
+@param[in] rec B-tree or R-tree index record
+@param[in] index index tree
+@param[in] offsets rec_get_offsets(rec)
+@param[in,out] matched_fields number of completely matched fields
+@param[in,out] matched_bytes number of matched bytes in the first
+field that is not matched
+@return the comparison result of dtuple and rec
+@retval 0 if dtuple is equal to rec
+@retval negative if dtuple is less than rec
+@retval positive if dtuple is greater than rec */
+int
+cmp_dtuple_rec_with_match_bytes(
+ const dtuple_t* dtuple,
+ const rec_t* rec,
+ const dict_index_t* index,
+ const offset_t* offsets,
+ ulint* matched_fields,
+ ulint* matched_bytes)
+ MY_ATTRIBUTE((warn_unused_result));
+/** Compare a data tuple to a physical record.
@see cmp_dtuple_rec_with_match
-@return 1, 0, -1, if dtuple is greater, equal, less than rec, respectively */
-UNIV_INTERN
+@param[in] dtuple data tuple
+@param[in] rec B-tree record
+@param[in] offsets rec_get_offsets(rec)
+@return the comparison result of dtuple and rec
+@retval 0 if dtuple is equal to rec
+@retval negative if dtuple is less than rec
+@retval positive if dtuple is greater than rec */
int
cmp_dtuple_rec(
-/*===========*/
- const dtuple_t* dtuple, /*!< in: data tuple */
- const rec_t* rec, /*!< in: physical record */
- const ulint* offsets);/*!< in: array returned by rec_get_offsets() */
+ const dtuple_t* dtuple,
+ const rec_t* rec,
+ const offset_t* offsets);
/**************************************************************//**
Checks if a dtuple is a prefix of a record. The last field in dtuple
is allowed to be a prefix of the corresponding field in the record.
-@return TRUE if prefix */
-UNIV_INTERN
+@return TRUE if prefix */
ibool
cmp_dtuple_is_prefix_of_rec(
/*========================*/
const dtuple_t* dtuple, /*!< in: data tuple */
const rec_t* rec, /*!< in: physical record */
- const ulint* offsets);/*!< in: array returned by rec_get_offsets() */
-/*************************************************************//**
-Compare two physical records that contain the same number of columns,
+ const offset_t* offsets);/*!< in: array returned by rec_get_offsets() */
+/** Compare two physical records that contain the same number of columns,
none of which are stored externally.
-@retval 1 if rec1 (including non-ordering columns) is greater than rec2
-@retval -1 if rec1 (including non-ordering columns) is less than rec2
+@retval positive if rec1 (including non-ordering columns) is greater than rec2
+@retval negative if rec1 (including non-ordering columns) is less than rec2
@retval 0 if rec1 is a duplicate of rec2 */
-UNIV_INTERN
int
cmp_rec_rec_simple(
/*===============*/
const rec_t* rec1, /*!< in: physical record */
const rec_t* rec2, /*!< in: physical record */
- const ulint* offsets1,/*!< in: rec_get_offsets(rec1, ...) */
- const ulint* offsets2,/*!< in: rec_get_offsets(rec2, ...) */
+ const offset_t* offsets1,/*!< in: rec_get_offsets(rec1, ...) */
+ const offset_t* offsets2,/*!< in: rec_get_offsets(rec2, ...) */
const dict_index_t* index, /*!< in: data dictionary index */
struct TABLE* table) /*!< in: MySQL table, for reporting
duplicate key value if applicable,
or NULL */
MY_ATTRIBUTE((nonnull(1,2,3,4), warn_unused_result));
-/*************************************************************//**
-This function is used to compare two physical records. Only the common
-first fields are compared, and if an externally stored field is
-encountered, then 0 is returned.
-@return 1, 0, -1 if rec1 is greater, equal, less, respectively */
-UNIV_INTERN
-int
-cmp_rec_rec_with_match(
-/*===================*/
- const rec_t* rec1, /*!< in: physical record */
- const rec_t* rec2, /*!< in: physical record */
- const ulint* offsets1,/*!< in: rec_get_offsets(rec1, index) */
- const ulint* offsets2,/*!< in: rec_get_offsets(rec2, index) */
- dict_index_t* index, /*!< in: data dictionary index */
- ibool nulls_unequal,
- /* in: TRUE if this is for index statistics
- cardinality estimation, and innodb_stats_method
- is "nulls_unequal" or "nulls_ignored" */
- ulint* matched_fields, /*!< in/out: number of already completely
- matched fields; when the function returns,
- contains the value the for current
- comparison */
- ulint* matched_bytes);/*!< in/out: number of already matched
- bytes within the first field not completely
- matched; when the function returns, contains
- the value for the current comparison */
-/*************************************************************//**
-This function is used to compare two physical records. Only the common
-first fields are compared.
-@return 1, 0 , -1 if rec1 is greater, equal, less, respectively, than
-rec2; only the common first fields are compared */
-UNIV_INLINE
+
+/** Compare two B-tree or R-tree records.
+Only the common first fields are compared, and externally stored field
+are treated as equal.
+@param[in] rec1 record (possibly not on an index page)
+@param[in] rec2 B-tree or R-tree record in an index page
+@param[in] offsets1 rec_get_offsets(rec1, index)
+@param[in] offsets2 rec_get_offsets(rec2, index)
+@param[in] nulls_unequal true if this is for index cardinality
+ statistics estimation with
+ innodb_stats_method=nulls_unequal
+ or innodb_stats_method=nulls_ignored
+@param[out] matched_fields number of completely matched fields
+ within the first field not completely matched
+@retval 0 if rec1 is equal to rec2
+@retval negative if rec1 is less than rec2
+@retval positive if rec1 is greater than rec2 */
int
cmp_rec_rec(
-/*========*/
- const rec_t* rec1, /*!< in: physical record */
- const rec_t* rec2, /*!< in: physical record */
- const ulint* offsets1,/*!< in: rec_get_offsets(rec1, index) */
- const ulint* offsets2,/*!< in: rec_get_offsets(rec2, index) */
- dict_index_t* index); /*!< in: data dictionary index */
+ const rec_t* rec1,
+ const rec_t* rec2,
+ const offset_t* offsets1,
+ const offset_t* offsets2,
+ const dict_index_t* index,
+ bool nulls_unequal = false,
+ ulint* matched_fields = NULL)
+ MY_ATTRIBUTE((nonnull(1,2,3,4,5)));
-/*****************************************************************
-This function is used to compare two dfields where at least the first
-has its data type field set. */
-UNIV_INTERN
-int
-cmp_dfield_dfield_like_prefix(
-/*==========================*/
- /* out: 1, 0, -1, if dfield1 is greater, equal,
- less than dfield2, respectively */
- dfield_t* dfield1,/* in: data field; must have type field set */
- dfield_t* dfield2);/* in: data field */
-/*****************************************************************
-This function is used to compare two dfields where at least the first
-has its data type field set. */
+/** Compare two data fields.
+@param[in] dfield1 data field
+@param[in] dfield2 data field
+@return the comparison result of dfield1 and dfield2
+@retval 0 if dfield1 is equal to dfield2, or a prefix of dfield1
+@retval negative if dfield1 is less than dfield2
+@retval positive if dfield1 is greater than dfield2 */
UNIV_INLINE
int
-cmp_dfield_dfield_like_substr(
-/*==========================*/
- /* out: 1, 0, -1, if dfield1 is greater, equal,
- less than dfield2, respectively */
- dfield_t* dfield1,/* in: data field; must have type field set */
- dfield_t* dfield2);/* in: data field */
-/*****************************************************************
-This function is used to compare two dfields where at least the first
-has its data type field set. */
-UNIV_INLINE
-int
-cmp_dfield_dfield_like_suffix(
-/*==========================*/
- /* out: 1, 0, -1, if dfield1 is greater, equal,
- less than dfield2, respectively */
- dfield_t* dfield1,/* in: data field; must have type field set */
- dfield_t* dfield2);/* in: data field */
+cmp_dfield_dfield_like_prefix(
+ const dfield_t* dfield1,
+ const dfield_t* dfield2);
-#ifndef UNIV_NONINL
#include "rem0cmp.ic"
-#endif
#endif
diff --git a/storage/innobase/include/rem0cmp.ic b/storage/innobase/include/rem0cmp.ic
index 9dee05a768c..4230543615a 100644
--- a/storage/innobase/include/rem0cmp.ic
+++ b/storage/innobase/include/rem0cmp.ic
@@ -1,6 +1,7 @@
/*****************************************************************************
-Copyright (c) 1994, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1994, 2014, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2020, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -23,91 +24,21 @@ Comparison services for records
Created 7/1/1994 Heikki Tuuri
************************************************************************/
-/*************************************************************//**
-This function is used to compare two data fields for which we know the
-data type.
-@return 1, 0, -1, if data1 is greater, equal, less than data2, respectively */
-UNIV_INLINE
-int
-cmp_data_data(
-/*==========*/
- ulint mtype, /*!< in: main type */
- ulint prtype, /*!< in: precise type */
- const byte* data1, /*!< in: data field (== a pointer to a memory
- buffer) */
- ulint len1, /*!< in: data field length or UNIV_SQL_NULL */
- const byte* data2, /*!< in: data field (== a pointer to a memory
- buffer) */
- ulint len2) /*!< in: data field length or UNIV_SQL_NULL */
-{
- return(cmp_data_data_slow(mtype, prtype, data1, len1, data2, len2));
-}
+#include <mysql_com.h>
+#include <my_sys.h>
-/*****************************************************************
-This function is used to compare two (CHAR) data fields for the LIKE
-operator. */
-UNIV_INLINE
-int
-cmp_data_data_like_prefix(
-/*======================*/
- /* out: 1, 0, -1, if data1 is greater, equal,
- less than data2, respectively */
- byte* data1, /* in: data field (== a pointer to a memory
- buffer) */
- ulint len1, /* in: data field length or UNIV_SQL_NULL */
- byte* data2, /* in: data field (== a pointer to a memory
- buffer) */
- ulint len2) /* in: data field length or UNIV_SQL_NULL */
-{
- return(cmp_data_data_slow_like_prefix(data1, len1, data2, len2));
-}
-/*****************************************************************
-This function is used to compare two (CHAR) data fields for the LIKE
-operator. */
-UNIV_INLINE
-int
-cmp_data_data_like_suffix(
-/*======================*/
- /* out: 1, 0, -1, if data1 is greater, equal,
- less than data2, respectively */
- byte* data1, /* in: data field (== a pointer to a memory
- buffer) */
- ulint len1, /* in: data field length or UNIV_SQL_NULL */
- byte* data2, /* in: data field (== a pointer to a memory
- buffer) */
- ulint len2) /* in: data field length or UNIV_SQL_NULL */
-{
- return(cmp_data_data_slow_like_suffix(data1, len1, data2, len2));
-}
-/*****************************************************************
-This function is used to compare two (CHAR) data fields for the LIKE
-operator. */
-UNIV_INLINE
-int
-cmp_data_data_like_substr(
-/*======================*/
- /* out: 1, 0, -1, if data1 is greater, equal,
- less than data2, respectively */
- byte* data1, /* in: data field (== a pointer to a memory
- buffer) */
- ulint len1, /* in: data field length or UNIV_SQL_NULL */
- byte* data2, /* in: data field (== a pointer to a memory
- buffer) */
- ulint len2) /* in: data field length or UNIV_SQL_NULL */
-{
- return(cmp_data_data_slow_like_substr(data1, len1, data2, len2));
-}
-/*************************************************************//**
-This function is used to compare two dfields where at least the first
-has its data type field set.
-@return 1, 0, -1, if dfield1 is greater, equal, less than dfield2,
-respectively */
+/** Compare two data fields.
+@param[in] dfield1 data field; must have type field set
+@param[in] dfield2 data field
+@return the comparison result of dfield1 and dfield2
+@retval 0 if dfield1 is equal to dfield2
+@retval negative if dfield1 is less than dfield2
+@retval positive if dfield1 is greater than dfield2 */
UNIV_INLINE
int
cmp_dfield_dfield(
-/*==============*/
- const dfield_t* dfield1,/*!< in: data field; must have type field set */
- const dfield_t* dfield2)/*!< in: data field */
+ const dfield_t* dfield1,
+ const dfield_t* dfield2)
{
const dtype_t* type;
@@ -122,65 +53,56 @@ cmp_dfield_dfield(
dfield_get_len(dfield2)));
}
-/*****************************************************************
-This function is used to compare two dfields where at least the first
-has its data type field set. */
+/** Compare two data fields.
+@param[in] dfield1 data field
+@param[in] dfield2 data field
+@return the comparison result of dfield1 and dfield2
+@retval 0 if dfield1 is equal to dfield2, or a prefix of dfield1
+@retval negative if dfield1 is less than dfield2
+@retval positive if dfield1 is greater than dfield2 */
UNIV_INLINE
int
-cmp_dfield_dfield_like_suffix(
-/*==========================*/
- /* out: 1, 0, -1, if dfield1 is greater, equal,
- less than dfield2, respectively */
- dfield_t* dfield1,/* in: data field; must have type field set */
- dfield_t* dfield2)/* in: data field */
+cmp_dfield_dfield_like_prefix(
+ const dfield_t* dfield1,
+ const dfield_t* dfield2)
{
- ut_ad(dfield_check_typed(dfield1));
+ const dtype_t* type;
- return(cmp_data_data_like_suffix(
- (byte*) dfield_get_data(dfield1),
- dfield_get_len(dfield1),
- (byte*) dfield_get_data(dfield2),
- dfield_get_len(dfield2)));
-}
-
-/*****************************************************************
-This function is used to compare two dfields where at least the first
-has its data type field set. */
-UNIV_INLINE
-int
-cmp_dfield_dfield_like_substr(
-/*==========================*/
- /* out: 1, 0, -1, if dfield1 is greater, equal,
- less than dfield2, respectively */
- dfield_t* dfield1,/* in: data field; must have type field set */
- dfield_t* dfield2)/* in: data field */
-{
ut_ad(dfield_check_typed(dfield1));
+ ut_ad(dfield_check_typed(dfield2));
- return(cmp_data_data_like_substr(
- (byte*) dfield_get_data(dfield1),
- dfield_get_len(dfield1),
- (byte*) dfield_get_data(dfield2),
- dfield_get_len(dfield2)));
-}
-/*************************************************************//**
-This function is used to compare two physical records. Only the common
-first fields are compared.
-@return 1, 0 , -1 if rec1 is greater, equal, less, respectively, than
-rec2; only the common first fields are compared */
-UNIV_INLINE
-int
-cmp_rec_rec(
-/*========*/
- const rec_t* rec1, /*!< in: physical record */
- const rec_t* rec2, /*!< in: physical record */
- const ulint* offsets1,/*!< in: rec_get_offsets(rec1, index) */
- const ulint* offsets2,/*!< in: rec_get_offsets(rec2, index) */
- dict_index_t* index) /*!< in: data dictionary index */
-{
- ulint match_f = 0;
- ulint match_b = 0;
+ type = dfield_get_type(dfield1);
- return(cmp_rec_rec_with_match(rec1, rec2, offsets1, offsets2, index,
- FALSE, &match_f, &match_b));
+#ifdef UNIV_DEBUG
+ switch (type->prtype & DATA_MYSQL_TYPE_MASK) {
+ case MYSQL_TYPE_BIT:
+ case MYSQL_TYPE_STRING:
+ case MYSQL_TYPE_VAR_STRING:
+ case MYSQL_TYPE_TINY_BLOB:
+ case MYSQL_TYPE_MEDIUM_BLOB:
+ case MYSQL_TYPE_BLOB:
+ case MYSQL_TYPE_LONG_BLOB:
+ case MYSQL_TYPE_VARCHAR:
+ break;
+ default:
+ ut_error;
+ }
+#endif /* UNIV_DEBUG */
+
+ uint cs_num = (uint) dtype_get_charset_coll(type->prtype);
+
+ if (CHARSET_INFO* cs = get_charset(cs_num, MYF(MY_WME))) {
+ return(cs->coll->strnncoll(
+ cs,
+ static_cast<const uchar*>(
+ dfield_get_data(dfield1)),
+ dfield_get_len(dfield1),
+ static_cast<const uchar*>(
+ dfield_get_data(dfield2)),
+ dfield_get_len(dfield2),
+ 1));
+ }
+
+ ib::fatal() << "Unable to find charset-collation " << cs_num;
+ return(0);
}
diff --git a/storage/innobase/include/rem0rec.h b/storage/innobase/include/rem0rec.h
index 4d10e98f45b..72ee96b2887 100644
--- a/storage/innobase/include/rem0rec.h
+++ b/storage/innobase/include/rem0rec.h
@@ -1,6 +1,7 @@
/*****************************************************************************
Copyright (c) 1994, 2016, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2017, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -27,12 +28,14 @@ Created 5/30/1994 Heikki Tuuri
#define rem0rec_h
#ifndef UNIV_INNOCHECKSUM
-#include "univ.i"
#include "data0data.h"
#include "rem0types.h"
#include "mtr0types.h"
#include "page0types.h"
-#endif /* !UNIV_INNOCHECKSUM */
+#include "trx0types.h"
+#endif /*! UNIV_INNOCHECKSUM */
+#include <ostream>
+#include <sstream>
/* Info bit denoting the predefined minimum record: this bit is set
if and only if the record is the first user record on a non-leaf
@@ -68,62 +71,105 @@ The status is stored in the low-order bits. */
/* Length of a B-tree node pointer, in bytes */
#define REC_NODE_PTR_SIZE 4
+#ifndef UNIV_INNOCHECKSUM
/** SQL null flag in a 1-byte offset of ROW_FORMAT=REDUNDANT records */
-#define REC_1BYTE_SQL_NULL_MASK 0x80UL
+static const offset_t REC_1BYTE_SQL_NULL_MASK= 0x80;
/** SQL null flag in a 2-byte offset of ROW_FORMAT=REDUNDANT records */
-#define REC_2BYTE_SQL_NULL_MASK 0x8000UL
+static const offset_t REC_2BYTE_SQL_NULL_MASK= 0x8000;
/** In a 2-byte offset of ROW_FORMAT=REDUNDANT records, the second most
significant bit denotes that the tail of a field is stored off-page. */
-#define REC_2BYTE_EXTERN_MASK 0x4000UL
+static const offset_t REC_2BYTE_EXTERN_MASK= 0x4000;
+
+static const size_t RECORD_OFFSET= 2;
+static const size_t INDEX_OFFSET=
+ RECORD_OFFSET + sizeof(rec_t *) / sizeof(offset_t);
-#ifdef UNIV_DEBUG
-/* Length of the rec_get_offsets() header */
-# define REC_OFFS_HEADER_SIZE 4
-#else /* UNIV_DEBUG */
/* Length of the rec_get_offsets() header */
-# define REC_OFFS_HEADER_SIZE 2
+static const size_t REC_OFFS_HEADER_SIZE=
+#ifdef UNIV_DEBUG
+ sizeof(rec_t *) / sizeof(offset_t) +
+ sizeof(dict_index_t *) / sizeof(offset_t) +
#endif /* UNIV_DEBUG */
+ 2;
/* Number of elements that should be initially allocated for the
offsets[] array, first passed to rec_get_offsets() */
-#define REC_OFFS_NORMAL_SIZE 100
-#define REC_OFFS_SMALL_SIZE 10
+static const size_t REC_OFFS_NORMAL_SIZE= 300;
+static const size_t REC_OFFS_SMALL_SIZE= 18;
+static const size_t REC_OFFS_SEC_INDEX_SIZE=
+ /* PK max key parts */ 16 + /* sec idx max key parts */ 16 +
+ /* child page number for non-leaf pages */ 1;
+
+/* Offset consists of two parts: 2 upper bits is type and all other bits is
+value */
+
+enum field_type_t
+{
+ /** normal field */
+ STORED_IN_RECORD= 0 << 14,
+ /** this field is stored off-page */
+ STORED_OFFPAGE= 1 << 14,
+ /** just an SQL NULL */
+ SQL_NULL= 2 << 14
+};
+
+/** without 2 upper bits */
+static const offset_t DATA_MASK= 0x3fff;
+/** 2 upper bits */
+static const offset_t TYPE_MASK= ~DATA_MASK;
+inline field_type_t get_type(offset_t n)
+{
+ return static_cast<field_type_t>(n & TYPE_MASK);
+}
+inline void set_type(offset_t &n, field_type_t type)
+{
+ n= (n & DATA_MASK) | static_cast<offset_t>(type);
+}
+inline offset_t get_value(offset_t n) { return n & DATA_MASK; }
+inline offset_t combine(offset_t value, field_type_t type)
+{
+ return get_value(value) | static_cast<offset_t>(type);
+}
+
+/** Compact flag ORed to the extra size returned by rec_offs_base()[0] */
+static const offset_t REC_OFFS_COMPACT= 1 << 15;
+/** External flag in offsets returned by rec_offs_base()[0] */
+static const offset_t REC_OFFS_EXTERNAL= 1 << 14;
-#ifndef UNIV_INNOCHECKSUM
/******************************************************//**
The following function is used to get the pointer of the next chained record
on the same page.
-@return pointer to the next chained record, or NULL if none */
+@return pointer to the next chained record, or NULL if none */
UNIV_INLINE
const rec_t*
rec_get_next_ptr_const(
/*===================*/
const rec_t* rec, /*!< in: physical record */
ulint comp) /*!< in: nonzero=compact page format */
- MY_ATTRIBUTE((nonnull, pure, warn_unused_result));
+ MY_ATTRIBUTE((warn_unused_result));
/******************************************************//**
The following function is used to get the pointer of the next chained record
on the same page.
-@return pointer to the next chained record, or NULL if none */
+@return pointer to the next chained record, or NULL if none */
UNIV_INLINE
rec_t*
rec_get_next_ptr(
/*=============*/
rec_t* rec, /*!< in: physical record */
ulint comp) /*!< in: nonzero=compact page format */
- MY_ATTRIBUTE((nonnull, pure, warn_unused_result));
+ MY_ATTRIBUTE((warn_unused_result));
/******************************************************//**
The following function is used to get the offset of the
next chained record on the same page.
-@return the page offset of the next chained record, or 0 if none */
+@return the page offset of the next chained record, or 0 if none */
UNIV_INLINE
ulint
rec_get_next_offs(
/*==============*/
const rec_t* rec, /*!< in: physical record */
ulint comp) /*!< in: nonzero=compact page format */
- MY_ATTRIBUTE((nonnull, pure, warn_unused_result));
+ MY_ATTRIBUTE((warn_unused_result));
/******************************************************//**
The following function is used to set the next record offset field
of an old-style record. */
@@ -147,34 +193,49 @@ rec_set_next_offs_new(
/******************************************************//**
The following function is used to get the number of fields
in an old-style record.
-@return number of data fields */
+@return number of data fields */
UNIV_INLINE
ulint
rec_get_n_fields_old(
/*=================*/
const rec_t* rec) /*!< in: physical record */
- MY_ATTRIBUTE((nonnull, pure, warn_unused_result));
+ MY_ATTRIBUTE((warn_unused_result));
/******************************************************//**
The following function is used to get the number of fields
in a record.
-@return number of data fields */
+@return number of data fields */
UNIV_INLINE
ulint
rec_get_n_fields(
/*=============*/
const rec_t* rec, /*!< in: physical record */
const dict_index_t* index) /*!< in: record descriptor */
- MY_ATTRIBUTE((nonnull, pure, warn_unused_result));
+ MY_ATTRIBUTE((warn_unused_result));
+
+/** Confirms the n_fields of the entry is sane with comparing the other
+record in the same page specified
+@param[in] index index
+@param[in] rec record of the same page
+@param[in] entry index entry
+@return true if n_fields is sane */
+UNIV_INLINE
+bool
+rec_n_fields_is_sane(
+ dict_index_t* index,
+ const rec_t* rec,
+ const dtuple_t* entry)
+ MY_ATTRIBUTE((warn_unused_result));
+
/******************************************************//**
The following function is used to get the number of records owned by the
previous directory record.
-@return number of owned records */
+@return number of owned records */
UNIV_INLINE
ulint
rec_get_n_owned_old(
/*================*/
const rec_t* rec) /*!< in: old-style physical record */
- MY_ATTRIBUTE((nonnull, pure, warn_unused_result));
+ MY_ATTRIBUTE((warn_unused_result));
/******************************************************//**
The following function is used to set the number of owned records. */
UNIV_INLINE
@@ -187,13 +248,13 @@ rec_set_n_owned_old(
/******************************************************//**
The following function is used to get the number of records owned by the
previous directory record.
-@return number of owned records */
+@return number of owned records */
UNIV_INLINE
ulint
rec_get_n_owned_new(
/*================*/
const rec_t* rec) /*!< in: new-style physical record */
- MY_ATTRIBUTE((nonnull, pure, warn_unused_result));
+ MY_ATTRIBUTE((warn_unused_result));
/******************************************************//**
The following function is used to set the number of owned records. */
UNIV_INLINE
@@ -207,14 +268,14 @@ rec_set_n_owned_new(
/******************************************************//**
The following function is used to retrieve the info bits of
a record.
-@return info bits */
+@return info bits */
UNIV_INLINE
ulint
rec_get_info_bits(
/*==============*/
const rec_t* rec, /*!< in: physical record */
ulint comp) /*!< in: nonzero=compact page format */
- MY_ATTRIBUTE((nonnull, pure, warn_unused_result));
+ MY_ATTRIBUTE((warn_unused_result));
/******************************************************//**
The following function is used to set the info bits of a record. */
UNIV_INLINE
@@ -235,13 +296,13 @@ rec_set_info_bits_new(
MY_ATTRIBUTE((nonnull));
/******************************************************//**
The following function retrieves the status bits of a new-style record.
-@return status bits */
+@return status bits */
UNIV_INLINE
ulint
rec_get_status(
/*===========*/
const rec_t* rec) /*!< in: physical record */
- MY_ATTRIBUTE((nonnull, pure, warn_unused_result));
+ MY_ATTRIBUTE((warn_unused_result));
/******************************************************//**
The following function is used to set the status bits of a new-style record. */
@@ -256,14 +317,14 @@ rec_set_status(
/******************************************************//**
The following function is used to retrieve the info and status
bits of a record. (Only compact records have status bits.)
-@return info bits */
+@return info bits */
UNIV_INLINE
ulint
rec_get_info_and_status_bits(
/*=========================*/
const rec_t* rec, /*!< in: physical record */
ulint comp) /*!< in: nonzero=compact page format */
- MY_ATTRIBUTE((nonnull, pure, warn_unused_result));
+ MY_ATTRIBUTE((warn_unused_result));
/******************************************************//**
The following function is used to set the info and status
bits of a record. (Only compact records have status bits.) */
@@ -277,14 +338,14 @@ rec_set_info_and_status_bits(
/******************************************************//**
The following function tells if record is delete marked.
-@return nonzero if delete marked */
+@return nonzero if delete marked */
UNIV_INLINE
ulint
rec_get_deleted_flag(
/*=================*/
const rec_t* rec, /*!< in: physical record */
ulint comp) /*!< in: nonzero=compact page format */
- MY_ATTRIBUTE((nonnull, pure, warn_unused_result));
+ MY_ATTRIBUTE((warn_unused_result));
/******************************************************//**
The following function is used to set the deleted bit. */
UNIV_INLINE
@@ -306,23 +367,23 @@ rec_set_deleted_flag_new(
MY_ATTRIBUTE((nonnull(1)));
/******************************************************//**
The following function tells if a new-style record is a node pointer.
-@return TRUE if node pointer */
+@return TRUE if node pointer */
UNIV_INLINE
ibool
rec_get_node_ptr_flag(
/*==================*/
const rec_t* rec) /*!< in: physical record */
- MY_ATTRIBUTE((nonnull, pure, warn_unused_result));
+ MY_ATTRIBUTE((warn_unused_result));
/******************************************************//**
The following function is used to get the order number
of an old-style record in the heap of the index page.
-@return heap order number */
+@return heap order number */
UNIV_INLINE
ulint
rec_get_heap_no_old(
/*================*/
const rec_t* rec) /*!< in: physical record */
- MY_ATTRIBUTE((nonnull, pure, warn_unused_result));
+ MY_ATTRIBUTE((warn_unused_result));
/******************************************************//**
The following function is used to set the heap number
field in an old-style record. */
@@ -336,13 +397,13 @@ rec_set_heap_no_old(
/******************************************************//**
The following function is used to get the order number
of a new-style record in the heap of the index page.
-@return heap order number */
+@return heap order number */
UNIV_INLINE
ulint
rec_get_heap_no_new(
/*================*/
const rec_t* rec) /*!< in: physical record */
- MY_ATTRIBUTE((nonnull, pure, warn_unused_result));
+ MY_ATTRIBUTE((warn_unused_result));
/******************************************************//**
The following function is used to set the heap number
field in a new-style record. */
@@ -356,13 +417,13 @@ rec_set_heap_no_new(
/******************************************************//**
The following function is used to test whether the data offsets
in the record are stored in one-byte or two-byte format.
-@return TRUE if 1-byte form */
+@return TRUE if 1-byte form */
UNIV_INLINE
ibool
rec_get_1byte_offs_flag(
/*====================*/
const rec_t* rec) /*!< in: physical record */
- MY_ATTRIBUTE((nonnull, pure, warn_unused_result));
+ MY_ATTRIBUTE((warn_unused_result));
/******************************************************//**
The following function is used to set the 1-byte offsets flag. */
@@ -378,14 +439,14 @@ rec_set_1byte_offs_flag(
Returns the offset of nth field end if the record is stored in the 1-byte
offsets form. If the field is SQL null, the flag is ORed in the returned
value.
-@return offset of the start of the field, SQL null flag ORed */
+@return offset of the start of the field, SQL null flag ORed */
UNIV_INLINE
-ulint
+uint8_t
rec_1_get_field_end_info(
/*=====================*/
const rec_t* rec, /*!< in: record */
ulint n) /*!< in: field index */
- MY_ATTRIBUTE((nonnull, pure, warn_unused_result));
+ MY_ATTRIBUTE((warn_unused_result));
/******************************************************//**
Returns the offset of nth field end if the record is stored in the 2-byte
@@ -394,12 +455,12 @@ value.
@return offset of the start of the field, SQL null flag and extern
storage flag ORed */
UNIV_INLINE
-ulint
+offset_t
rec_2_get_field_end_info(
/*=====================*/
const rec_t* rec, /*!< in: record */
ulint n) /*!< in: field index */
- MY_ATTRIBUTE((nonnull, pure, warn_unused_result));
+ MY_ATTRIBUTE((warn_unused_result));
/******************************************************//**
Returns nonzero if the field is stored off-page.
@@ -411,13 +472,12 @@ rec_2_is_field_extern(
/*==================*/
const rec_t* rec, /*!< in: record */
ulint n) /*!< in: field index */
- MY_ATTRIBUTE((nonnull, pure, warn_unused_result));
+ MY_ATTRIBUTE((warn_unused_result));
/******************************************************//**
Determine how many of the first n columns in a compact
physical record are stored externally.
-@return number of externally stored columns */
-UNIV_INTERN
+@return number of externally stored columns */
ulint
rec_get_n_extern_new(
/*=================*/
@@ -426,46 +486,47 @@ rec_get_n_extern_new(
ulint n) /*!< in: number of columns to scan */
MY_ATTRIBUTE((nonnull, warn_unused_result));
-/******************************************************//**
-The following function determines the offsets to each field
-in the record. It can reuse a previously allocated array.
-@return the new offsets */
-UNIV_INTERN
-ulint*
+/** Determine the offsets to each field in an index record.
+@param[in] rec physical record
+@param[in] index the index that the record belongs to
+@param[in,out] offsets array comprising offsets[0] allocated elements,
+ or an array from rec_get_offsets(), or NULL
+@param[in] leaf whether this is a leaf-page record
+@param[in] n_fields maximum number of offsets to compute
+ (ULINT_UNDEFINED to compute all offsets)
+@param[in,out] heap memory heap
+@return the new offsets */
+offset_t*
rec_get_offsets_func(
-/*=================*/
- const rec_t* rec, /*!< in: physical record */
- const dict_index_t* index, /*!< in: record descriptor */
- ulint* offsets,/*!< in/out: array consisting of
- offsets[0] allocated elements,
- or an array from rec_get_offsets(),
- or NULL */
- ulint n_fields,/*!< in: maximum number of
- initialized fields
- (ULINT_UNDEFINED if all fields) */
+ const rec_t* rec,
+ const dict_index_t* index,
+ offset_t* offsets,
+#ifdef UNIV_DEBUG
+ bool leaf,
+#endif /* UNIV_DEBUG */
+ ulint n_fields,
#ifdef UNIV_DEBUG
const char* file, /*!< in: file name where called */
- ulint line, /*!< in: line number where called */
+ unsigned line, /*!< in: line number where called */
#endif /* UNIV_DEBUG */
mem_heap_t** heap) /*!< in/out: memory heap */
#ifdef UNIV_DEBUG
- MY_ATTRIBUTE((nonnull(1,2,5,7),warn_unused_result));
+ MY_ATTRIBUTE((nonnull(1,2,6,8),warn_unused_result));
#else /* UNIV_DEBUG */
MY_ATTRIBUTE((nonnull(1,2,5),warn_unused_result));
#endif /* UNIV_DEBUG */
#ifdef UNIV_DEBUG
-# define rec_get_offsets(rec,index,offsets,n,heap) \
- rec_get_offsets_func(rec,index,offsets,n,__FILE__,__LINE__,heap)
+# define rec_get_offsets(rec, index, offsets, leaf, n, heap) \
+ rec_get_offsets_func(rec,index,offsets,leaf,n,__FILE__,__LINE__,heap)
#else /* UNIV_DEBUG */
-# define rec_get_offsets(rec, index, offsets, n, heap) \
+# define rec_get_offsets(rec, index, offsets, leaf, n, heap) \
rec_get_offsets_func(rec, index, offsets, n, heap)
#endif /* UNIV_DEBUG */
/******************************************************//**
The following function determines the offsets to each field
in the record. It can reuse a previously allocated array. */
-UNIV_INTERN
void
rec_get_offsets_reverse(
/*====================*/
@@ -476,20 +537,20 @@ rec_get_offsets_reverse(
const dict_index_t* index, /*!< in: record descriptor */
ulint node_ptr,/*!< in: nonzero=node pointer,
0=leaf node */
- ulint* offsets)/*!< in/out: array consisting of
+ offset_t* offsets)/*!< in/out: array consisting of
offsets[0] allocated elements */
MY_ATTRIBUTE((nonnull));
#ifdef UNIV_DEBUG
/************************************************************//**
Validates offsets returned by rec_get_offsets().
-@return TRUE if valid */
+@return TRUE if valid */
UNIV_INLINE
ibool
rec_offs_validate(
/*==============*/
const rec_t* rec, /*!< in: record or NULL */
const dict_index_t* index, /*!< in: record descriptor or NULL */
- const ulint* offsets)/*!< in: array returned by
+ const offset_t* offsets)/*!< in: array returned by
rec_get_offsets() */
MY_ATTRIBUTE((nonnull(3), warn_unused_result));
/************************************************************//**
@@ -501,7 +562,7 @@ rec_offs_make_valid(
/*================*/
const rec_t* rec, /*!< in: record */
const dict_index_t* index, /*!< in: record descriptor */
- ulint* offsets)/*!< in: array returned by
+ offset_t* offsets)/*!< in: array returned by
rec_get_offsets() */
MY_ATTRIBUTE((nonnull));
#else
@@ -511,8 +572,7 @@ rec_offs_make_valid(
/************************************************************//**
The following function is used to get the offset to the nth
data field in an old-style record.
-@return offset to the field */
-UNIV_INTERN
+@return offset to the field */
ulint
rec_get_nth_field_offs_old(
/*=======================*/
@@ -527,23 +587,23 @@ rec_get_nth_field_offs_old(
Gets the physical size of an old-style field.
Also an SQL null may have a field of size > 0,
if the data type is of a fixed size.
-@return field size in bytes */
+@return field size in bytes */
UNIV_INLINE
ulint
rec_get_nth_field_size(
/*===================*/
const rec_t* rec, /*!< in: record */
ulint n) /*!< in: index of the field */
- MY_ATTRIBUTE((nonnull, pure, warn_unused_result));
+ MY_ATTRIBUTE((warn_unused_result));
/************************************************************//**
The following function is used to get an offset to the nth
data field in a record.
-@return offset from the origin of rec */
+@return offset from the origin of rec */
UNIV_INLINE
-ulint
+offset_t
rec_get_nth_field_offs(
/*===================*/
- const ulint* offsets,/*!< in: array returned by rec_get_offsets() */
+ const offset_t* offsets,/*!< in: array returned by rec_get_offsets() */
ulint n, /*!< in: index of the field */
ulint* len) /*!< out: length of the field; UNIV_SQL_NULL
if SQL null */
@@ -553,73 +613,81 @@ rec_get_nth_field_offs(
/******************************************************//**
Determine if the offsets are for a record in the new
compact format.
-@return nonzero if compact format */
+@return nonzero if compact format */
UNIV_INLINE
ulint
rec_offs_comp(
/*==========*/
- const ulint* offsets)/*!< in: array returned by rec_get_offsets() */
- MY_ATTRIBUTE((nonnull, pure, warn_unused_result));
+ const offset_t* offsets)/*!< in: array returned by rec_get_offsets() */
+ MY_ATTRIBUTE((warn_unused_result));
/******************************************************//**
Determine if the offsets are for a record containing
externally stored columns.
-@return nonzero if externally stored */
+@return nonzero if externally stored */
UNIV_INLINE
ulint
rec_offs_any_extern(
/*================*/
- const ulint* offsets)/*!< in: array returned by rec_get_offsets() */
- MY_ATTRIBUTE((nonnull, pure, warn_unused_result));
+ const offset_t* offsets)/*!< in: array returned by rec_get_offsets() */
+ MY_ATTRIBUTE((warn_unused_result));
/******************************************************//**
Determine if the offsets are for a record containing null BLOB pointers.
-@return first field containing a null BLOB pointer, or NULL if none found */
+@return first field containing a null BLOB pointer, or NULL if none found */
UNIV_INLINE
const byte*
rec_offs_any_null_extern(
/*=====================*/
const rec_t* rec, /*!< in: record */
- const ulint* offsets) /*!< in: rec_get_offsets(rec) */
- MY_ATTRIBUTE((nonnull, pure, warn_unused_result));
+ const offset_t* offsets) /*!< in: rec_get_offsets(rec) */
+ MY_ATTRIBUTE((warn_unused_result));
/******************************************************//**
Returns nonzero if the extern bit is set in nth field of rec.
-@return nonzero if externally stored */
+@return nonzero if externally stored */
UNIV_INLINE
ulint
rec_offs_nth_extern(
/*================*/
- const ulint* offsets,/*!< in: array returned by rec_get_offsets() */
+ const offset_t* offsets,/*!< in: array returned by rec_get_offsets() */
ulint n) /*!< in: nth field */
- MY_ATTRIBUTE((nonnull, pure, warn_unused_result));
+ MY_ATTRIBUTE((warn_unused_result));
+
+/** Mark the nth field as externally stored.
+@param[in] offsets array returned by rec_get_offsets()
+@param[in] n nth field */
+void
+rec_offs_make_nth_extern(
+ offset_t* offsets,
+ const ulint n);
/******************************************************//**
Returns nonzero if the SQL NULL bit is set in nth field of rec.
-@return nonzero if SQL NULL */
+@return nonzero if SQL NULL */
UNIV_INLINE
ulint
rec_offs_nth_sql_null(
/*==================*/
- const ulint* offsets,/*!< in: array returned by rec_get_offsets() */
+ const offset_t* offsets,/*!< in: array returned by rec_get_offsets() */
ulint n) /*!< in: nth field */
- MY_ATTRIBUTE((nonnull, pure, warn_unused_result));
+ MY_ATTRIBUTE((warn_unused_result));
/******************************************************//**
Gets the physical size of a field.
-@return length of field */
+@return length of field */
UNIV_INLINE
ulint
rec_offs_nth_size(
/*==============*/
- const ulint* offsets,/*!< in: array returned by rec_get_offsets() */
+ const offset_t* offsets,/*!< in: array returned by rec_get_offsets() */
ulint n) /*!< in: nth field */
- MY_ATTRIBUTE((nonnull, pure, warn_unused_result));
+ MY_ATTRIBUTE((warn_unused_result));
/******************************************************//**
Returns the number of extern bits set in a record.
-@return number of externally stored fields */
+@return number of externally stored fields */
UNIV_INLINE
ulint
rec_offs_n_extern(
/*==============*/
- const ulint* offsets)/*!< in: array returned by rec_get_offsets() */
- MY_ATTRIBUTE((nonnull, pure, warn_unused_result));
+ const offset_t* offsets)/*!< in: array returned by rec_get_offsets() */
+ MY_ATTRIBUTE((warn_unused_result));
/***********************************************************//**
This is used to modify the value of an already existing field in a record.
The previous value must have exactly the same size as the new value. If len
@@ -631,7 +699,7 @@ void
rec_set_nth_field(
/*==============*/
rec_t* rec, /*!< in: record */
- const ulint* offsets,/*!< in: array returned by rec_get_offsets() */
+ const offset_t* offsets,/*!< in: array returned by rec_get_offsets() */
ulint n, /*!< in: index number of the field */
const void* data, /*!< in: pointer to the data if not SQL null */
ulint len) /*!< in: length of the data or UNIV_SQL_NULL.
@@ -645,23 +713,23 @@ The following function returns the data size of an old-style physical
record, that is the sum of field lengths. SQL null fields
are counted as length 0 fields. The value returned by the function
is the distance from record origin to record end in bytes.
-@return size */
+@return size */
UNIV_INLINE
ulint
rec_get_data_size_old(
/*==================*/
const rec_t* rec) /*!< in: physical record */
- MY_ATTRIBUTE((nonnull, pure, warn_unused_result));
+ MY_ATTRIBUTE((warn_unused_result));
/**********************************************************//**
The following function returns the number of allocated elements
for an array of offsets.
-@return number of elements */
+@return number of elements */
UNIV_INLINE
ulint
rec_offs_get_n_alloc(
/*=================*/
- const ulint* offsets)/*!< in: array for rec_get_offsets() */
- MY_ATTRIBUTE((nonnull, pure, warn_unused_result));
+ const offset_t* offsets)/*!< in: array for rec_get_offsets() */
+ MY_ATTRIBUTE((warn_unused_result));
/**********************************************************//**
The following function sets the number of allocated elements
for an array of offsets. */
@@ -669,7 +737,7 @@ UNIV_INLINE
void
rec_offs_set_n_alloc(
/*=================*/
- ulint* offsets, /*!< out: array for rec_get_offsets(),
+ offset_t*offsets, /*!< out: array for rec_get_offsets(),
must be allocated */
ulint n_alloc) /*!< in: number of elements */
MY_ATTRIBUTE((nonnull));
@@ -677,126 +745,124 @@ rec_offs_set_n_alloc(
rec_offs_set_n_alloc(offsets, (sizeof offsets) / sizeof *offsets)
/**********************************************************//**
The following function returns the number of fields in a record.
-@return number of fields */
+@return number of fields */
UNIV_INLINE
ulint
rec_offs_n_fields(
/*==============*/
- const ulint* offsets)/*!< in: array returned by rec_get_offsets() */
- MY_ATTRIBUTE((nonnull, pure, warn_unused_result));
+ const offset_t* offsets)/*!< in: array returned by rec_get_offsets() */
+ MY_ATTRIBUTE((warn_unused_result));
/**********************************************************//**
The following function returns the data size of a physical
record, that is the sum of field lengths. SQL null fields
are counted as length 0 fields. The value returned by the function
is the distance from record origin to record end in bytes.
-@return size */
+@return size */
UNIV_INLINE
ulint
rec_offs_data_size(
/*===============*/
- const ulint* offsets)/*!< in: array returned by rec_get_offsets() */
- MY_ATTRIBUTE((nonnull, pure, warn_unused_result));
+ const offset_t* offsets)/*!< in: array returned by rec_get_offsets() */
+ MY_ATTRIBUTE((warn_unused_result));
/**********************************************************//**
Returns the total size of record minus data size of record.
The value returned by the function is the distance from record
start to record origin in bytes.
-@return size */
+@return size */
UNIV_INLINE
ulint
rec_offs_extra_size(
/*================*/
- const ulint* offsets)/*!< in: array returned by rec_get_offsets() */
- MY_ATTRIBUTE((nonnull, pure, warn_unused_result));
+ const offset_t* offsets)/*!< in: array returned by rec_get_offsets() */
+ MY_ATTRIBUTE((warn_unused_result));
/**********************************************************//**
Returns the total size of a physical record.
-@return size */
+@return size */
UNIV_INLINE
ulint
rec_offs_size(
/*==========*/
- const ulint* offsets)/*!< in: array returned by rec_get_offsets() */
- MY_ATTRIBUTE((nonnull, pure, warn_unused_result));
+ const offset_t* offsets)/*!< in: array returned by rec_get_offsets() */
+ MY_ATTRIBUTE((warn_unused_result));
#ifdef UNIV_DEBUG
/**********************************************************//**
Returns a pointer to the start of the record.
-@return pointer to start */
+@return pointer to start */
UNIV_INLINE
byte*
rec_get_start(
/*==========*/
const rec_t* rec, /*!< in: pointer to record */
- const ulint* offsets)/*!< in: array returned by rec_get_offsets() */
- MY_ATTRIBUTE((nonnull, pure, warn_unused_result));
+ const offset_t* offsets)/*!< in: array returned by rec_get_offsets() */
+ MY_ATTRIBUTE((warn_unused_result));
/**********************************************************//**
Returns a pointer to the end of the record.
-@return pointer to end */
+@return pointer to end */
UNIV_INLINE
byte*
rec_get_end(
/*========*/
const rec_t* rec, /*!< in: pointer to record */
- const ulint* offsets)/*!< in: array returned by rec_get_offsets() */
- MY_ATTRIBUTE((nonnull, pure, warn_unused_result));
+ const offset_t* offsets)/*!< in: array returned by rec_get_offsets() */
+ MY_ATTRIBUTE((warn_unused_result));
#else /* UNIV_DEBUG */
# define rec_get_start(rec, offsets) ((rec) - rec_offs_extra_size(offsets))
# define rec_get_end(rec, offsets) ((rec) + rec_offs_data_size(offsets))
#endif /* UNIV_DEBUG */
-/***************************************************************//**
-Copies a physical record to a buffer.
-@return pointer to the origin of the copy */
+
+/** Copy a physical record to a buffer.
+@param[in] buf buffer
+@param[in] rec physical record
+@param[in] offsets array returned by rec_get_offsets()
+@return pointer to the origin of the copy */
UNIV_INLINE
rec_t*
rec_copy(
-/*=====*/
- void* buf, /*!< in: buffer */
- const rec_t* rec, /*!< in: physical record */
- const ulint* offsets)/*!< in: array returned by rec_get_offsets() */
- MY_ATTRIBUTE((nonnull));
-#ifndef UNIV_HOTBACKUP
-/**********************************************************//**
-Determines the size of a data tuple prefix in a temporary file.
-@return total size */
-UNIV_INTERN
+ void* buf,
+ const rec_t* rec,
+ const offset_t* offsets);
+
+/** Determine the size of a data tuple prefix in a temporary file.
+@param[in] index clustered or secondary index
+@param[in] fields data fields
+@param[in] n_fields number of data fields
+@param[out] extra record header size
+@return total size, in bytes */
ulint
rec_get_converted_size_temp(
-/*========================*/
- const dict_index_t* index, /*!< in: record descriptor */
- const dfield_t* fields, /*!< in: array of data fields */
- ulint n_fields,/*!< in: number of data fields */
- ulint* extra) /*!< out: extra size */
- MY_ATTRIBUTE((warn_unused_result, nonnull));
+ const dict_index_t* index,
+ const dfield_t* fields,
+ ulint n_fields,
+ ulint* extra)
+ MY_ATTRIBUTE((warn_unused_result, nonnull(1,2)));
/******************************************************//**
Determine the offset to each field in temporary file.
@see rec_convert_dtuple_to_temp() */
-UNIV_INTERN
void
rec_init_offsets_temp(
/*==================*/
const rec_t* rec, /*!< in: temporary file record */
const dict_index_t* index, /*!< in: record descriptor */
- ulint* offsets)/*!< in/out: array of offsets;
+ offset_t* offsets)/*!< in/out: array of offsets;
in: n=rec_offs_n_fields(offsets) */
MY_ATTRIBUTE((nonnull));
/*********************************************************//**
Builds a temporary file record out of a data tuple.
@see rec_init_offsets_temp() */
-UNIV_INTERN
void
rec_convert_dtuple_to_temp(
/*=======================*/
rec_t* rec, /*!< out: record */
const dict_index_t* index, /*!< in: record descriptor */
const dfield_t* fields, /*!< in: array of data fields */
- ulint n_fields) /*!< in: number of fields */
- MY_ATTRIBUTE((nonnull));
+ ulint n_fields); /*!< in: number of fields */
/**************************************************************//**
Copies the first n fields of a physical record to a new physical record in
a buffer.
-@return own: copied record */
-UNIV_INTERN
+@return own: copied record */
rec_t*
rec_copy_prefix_to_buf(
/*===================*/
@@ -809,28 +875,26 @@ rec_copy_prefix_to_buf(
or NULL */
ulint* buf_size) /*!< in/out: buffer size */
MY_ATTRIBUTE((nonnull));
-/************************************************************//**
-Folds a prefix of a physical record to a ulint.
-@return the folded value */
+/** Fold a prefix of a physical record.
+@param[in] rec index record
+@param[in] offsets return value of rec_get_offsets()
+@param[in] n_fields number of complete fields to fold
+@param[in] n_bytes number of bytes to fold in the last field
+@param[in] index_id index tree ID
+@return the folded value */
UNIV_INLINE
ulint
rec_fold(
-/*=====*/
- const rec_t* rec, /*!< in: the physical record */
- const ulint* offsets, /*!< in: array returned by
- rec_get_offsets() */
- ulint n_fields, /*!< in: number of complete
- fields to fold */
- ulint n_bytes, /*!< in: number of bytes to fold
- in an incomplete last field */
- index_id_t tree_id) /*!< in: index tree id */
- MY_ATTRIBUTE((nonnull, pure, warn_unused_result));
-#endif /* !UNIV_HOTBACKUP */
+ const rec_t* rec,
+ const offset_t* offsets,
+ ulint n_fields,
+ ulint n_bytes,
+ index_id_t tree_id)
+ MY_ATTRIBUTE((warn_unused_result));
/*********************************************************//**
Builds a physical record out of a data tuple and
stores it into the given buffer.
-@return pointer to the origin of physical record */
-UNIV_INTERN
+@return pointer to the origin of physical record */
rec_t*
rec_convert_dtuple_to_rec(
/*======================*/
@@ -840,11 +904,11 @@ rec_convert_dtuple_to_rec(
const dtuple_t* dtuple, /*!< in: data tuple */
ulint n_ext) /*!< in: number of
externally stored columns */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
+ MY_ATTRIBUTE((warn_unused_result));
/**********************************************************//**
Returns the extra size of an old-style physical record if we know its
data size and number of fields.
-@return extra size */
+@return extra size */
UNIV_INLINE
ulint
rec_get_converted_extra_size(
@@ -855,8 +919,7 @@ rec_get_converted_extra_size(
MY_ATTRIBUTE((const));
/**********************************************************//**
Determines the size of a data tuple prefix in ROW_FORMAT=COMPACT.
-@return total size */
-UNIV_INTERN
+@return total size */
ulint
rec_get_converted_size_comp_prefix(
/*===============================*/
@@ -867,8 +930,7 @@ rec_get_converted_size_comp_prefix(
MY_ATTRIBUTE((warn_unused_result, nonnull(1,2)));
/**********************************************************//**
Determines the size of a data tuple in ROW_FORMAT=COMPACT.
-@return total size */
-UNIV_INTERN
+@return total size */
ulint
rec_get_converted_size_comp(
/*========================*/
@@ -884,7 +946,7 @@ rec_get_converted_size_comp(
/**********************************************************//**
The following function returns the size of a data tuple when converted to
a physical record.
-@return size */
+@return size */
UNIV_INLINE
ulint
rec_get_converted_size(
@@ -893,66 +955,68 @@ rec_get_converted_size(
const dtuple_t* dtuple, /*!< in: data tuple */
ulint n_ext) /*!< in: number of externally stored columns */
MY_ATTRIBUTE((warn_unused_result, nonnull));
-#ifndef UNIV_HOTBACKUP
-/**************************************************************//**
-Copies the first n fields of a physical record to a data tuple.
-The fields are copied to the memory heap. */
-UNIV_INTERN
+/** Copy the first n fields of a (copy of a) physical record to a data tuple.
+The fields are copied into the memory heap.
+@param[out] tuple data tuple
+@param[in] rec index record, or a copy thereof
+@param[in] is_leaf whether rec is a leaf page record
+@param[in] n_fields number of fields to copy
+@param[in,out] heap memory heap */
void
-rec_copy_prefix_to_dtuple(
-/*======================*/
- dtuple_t* tuple, /*!< out: data tuple */
- const rec_t* rec, /*!< in: physical record */
- const dict_index_t* index, /*!< in: record descriptor */
- ulint n_fields, /*!< in: number of fields
- to copy */
- mem_heap_t* heap) /*!< in: memory heap */
+rec_copy_prefix_to_dtuple_func(
+ dtuple_t* tuple,
+ const rec_t* rec,
+ const dict_index_t* index,
+#ifdef UNIV_DEBUG
+ bool is_leaf,
+#endif /* UNIV_DEBUG */
+ ulint n_fields,
+ mem_heap_t* heap)
MY_ATTRIBUTE((nonnull));
-#endif /* !UNIV_HOTBACKUP */
+#ifdef UNIV_DEBUG
+# define rec_copy_prefix_to_dtuple(tuple,rec,index,leaf,n_fields,heap) \
+ rec_copy_prefix_to_dtuple_func(tuple,rec,index,leaf,n_fields,heap)
+#else /* UNIV_DEBUG */
+# define rec_copy_prefix_to_dtuple(tuple,rec,index,leaf,n_fields,heap) \
+ rec_copy_prefix_to_dtuple_func(tuple,rec,index,n_fields,heap)
+#endif /* UNIV_DEBUG */
/***************************************************************//**
Validates the consistency of a physical record.
-@return TRUE if ok */
-UNIV_INTERN
+@return TRUE if ok */
ibool
rec_validate(
/*=========*/
const rec_t* rec, /*!< in: physical record */
- const ulint* offsets)/*!< in: array returned by rec_get_offsets() */
+ const offset_t* offsets)/*!< in: array returned by rec_get_offsets() */
MY_ATTRIBUTE((nonnull));
/***************************************************************//**
Prints an old-style physical record. */
-UNIV_INTERN
void
rec_print_old(
/*==========*/
FILE* file, /*!< in: file where to print */
const rec_t* rec) /*!< in: physical record */
MY_ATTRIBUTE((nonnull));
-#ifndef UNIV_HOTBACKUP
/***************************************************************//**
-Prints a physical record in ROW_FORMAT=COMPACT. Ignores the
-record header. */
-UNIV_INTERN
+Prints a spatial index record. */
void
-rec_print_comp(
-/*===========*/
+rec_print_mbr_rec(
+/*==========*/
FILE* file, /*!< in: file where to print */
const rec_t* rec, /*!< in: physical record */
- const ulint* offsets)/*!< in: array returned by rec_get_offsets() */
+ const offset_t* offsets)/*!< in: array returned by rec_get_offsets() */
MY_ATTRIBUTE((nonnull));
/***************************************************************//**
Prints a physical record. */
-UNIV_INTERN
void
rec_print_new(
/*==========*/
FILE* file, /*!< in: file where to print */
const rec_t* rec, /*!< in: physical record */
- const ulint* offsets)/*!< in: array returned by rec_get_offsets() */
+ const offset_t* offsets)/*!< in: array returned by rec_get_offsets() */
MY_ATTRIBUTE((nonnull));
/***************************************************************//**
Prints a physical record. */
-UNIV_INTERN
void
rec_print(
/*======*/
@@ -961,19 +1025,128 @@ rec_print(
const dict_index_t* index) /*!< in: record descriptor */
MY_ATTRIBUTE((nonnull));
+/** Pretty-print a record.
+@param[in,out] o output stream
+@param[in] rec physical record
+@param[in] info rec_get_info_bits(rec)
+@param[in] offsets rec_get_offsets(rec) */
+void
+rec_print(
+ std::ostream& o,
+ const rec_t* rec,
+ ulint info,
+ const offset_t* offsets);
+
+/** Wrapper for pretty-printing a record */
+struct rec_index_print
+{
+ /** Constructor */
+ rec_index_print(const rec_t* rec, const dict_index_t* index) :
+ m_rec(rec), m_index(index)
+ {}
+
+ /** Record */
+ const rec_t* m_rec;
+ /** Index */
+ const dict_index_t* m_index;
+};
+
+/** Display a record.
+@param[in,out] o output stream
+@param[in] r record to display
+@return the output stream */
+std::ostream&
+operator<<(std::ostream& o, const rec_index_print& r);
+
+/** Wrapper for pretty-printing a record */
+struct rec_offsets_print
+{
+ /** Constructor */
+ rec_offsets_print(const rec_t* rec, const offset_t* offsets) :
+ m_rec(rec), m_offsets(offsets)
+ {}
+
+ /** Record */
+ const rec_t* m_rec;
+ /** Offsets to each field */
+ const offset_t* m_offsets;
+};
+
+/** Display a record.
+@param[in,out] o output stream
+@param[in] r record to display
+@return the output stream */
+std::ostream&
+operator<<(std::ostream& o, const rec_offsets_print& r);
+
+# ifndef DBUG_OFF
+/** Pretty-printer of records and tuples */
+class rec_printer : public std::ostringstream {
+public:
+ /** Construct a pretty-printed record.
+ @param rec record with header
+ @param offsets rec_get_offsets(rec, ...) */
+ rec_printer(const rec_t* rec, const offset_t* offsets)
+ :
+ std::ostringstream ()
+ {
+ rec_print(*this, rec,
+ rec_get_info_bits(rec, rec_offs_comp(offsets)),
+ offsets);
+ }
+
+ /** Construct a pretty-printed record.
+ @param rec record, possibly lacking header
+ @param info rec_get_info_bits(rec)
+ @param offsets rec_get_offsets(rec, ...) */
+ rec_printer(const rec_t* rec, ulint info, const offset_t* offsets)
+ :
+ std::ostringstream ()
+ {
+ rec_print(*this, rec, info, offsets);
+ }
+
+ /** Construct a pretty-printed tuple.
+ @param tuple data tuple */
+ rec_printer(const dtuple_t* tuple)
+ :
+ std::ostringstream ()
+ {
+ dtuple_print(*this, tuple);
+ }
+
+ /** Construct a pretty-printed tuple.
+ @param field array of data tuple fields
+ @param n number of fields */
+ rec_printer(const dfield_t* field, ulint n)
+ :
+ std::ostringstream ()
+ {
+ dfield_print(*this, field, n);
+ }
+
+ /** Destructor */
+ virtual ~rec_printer() {}
+
+private:
+ /** Copy constructor */
+ rec_printer(const rec_printer& other);
+ /** Assignment operator */
+ rec_printer& operator=(const rec_printer& other);
+};
+# endif /* !DBUG_OFF */
+
# ifdef UNIV_DEBUG
-/************************************************************//**
-Reads the DB_TRX_ID of a clustered index record.
-@return the value of DB_TRX_ID */
-UNIV_INTERN
+/** Read the DB_TRX_ID of a clustered index record.
+@param[in] rec clustered index record
+@param[in] index clustered index
+@return the value of DB_TRX_ID */
trx_id_t
rec_get_trx_id(
-/*===========*/
- const rec_t* rec, /*!< in: record */
- const dict_index_t* index) /*!< in: clustered index */
+ const rec_t* rec,
+ const dict_index_t* index)
MY_ATTRIBUTE((nonnull, warn_unused_result));
# endif /* UNIV_DEBUG */
-#endif /* UNIV_HOTBACKUP */
/* Maximum lengths for the data in a physical record if the offsets
are given in one byte (resp. two byte) format. */
@@ -983,12 +1156,12 @@ are given in one byte (resp. two byte) format. */
/* The data size of record must not be larger than this on
REDUNDANT row format because we reserve two upmost bits in a
two byte offset for special purposes */
-#define REDUNDANT_REC_MAX_DATA_SIZE (16383)
+#define REDUNDANT_REC_MAX_DATA_SIZE (16383)
/* The data size of record must be smaller than this on
COMPRESSED row format because we reserve two upmost bits in a
two byte offset for special purposes */
-#define COMPRESSED_REC_MAX_DATA_SIZE (16384)
+#define COMPRESSED_REC_MAX_DATA_SIZE (16384)
#ifdef WITH_WSREP
int wsrep_rec_get_foreign_key(
@@ -999,9 +1172,8 @@ int wsrep_rec_get_foreign_key(
dict_index_t* index_ref, /* in: index for referenced table */
ibool new_protocol); /* in: protocol > 1 */
#endif /* WITH_WSREP */
-#ifndef UNIV_NONINL
+
#include "rem0rec.ic"
-#endif
#endif /* !UNIV_INNOCHECKSUM */
-#endif
+#endif /* rem0rec_h */
diff --git a/storage/innobase/include/rem0rec.ic b/storage/innobase/include/rem0rec.ic
index e0d0682ff60..eae1c52a2a5 100644
--- a/storage/innobase/include/rem0rec.ic
+++ b/storage/innobase/include/rem0rec.ic
@@ -1,7 +1,7 @@
/*****************************************************************************
-Copyright (c) 1994, 2015, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2017, 2019, MariaDB Corporation.
+Copyright (c) 1994, 2019, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2017, 2020, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -27,17 +27,9 @@ Created 5/30/1994 Heikki Tuuri
#include "mach0data.h"
#include "ut0byte.h"
#include "dict0dict.h"
+#include "dict0boot.h"
#include "btr0types.h"
-/* Compact flag ORed to the extra size returned by rec_get_offsets() */
-#define REC_OFFS_COMPACT ((ulint) 1 << 31)
-/* SQL NULL flag in offsets returned by rec_get_offsets() */
-#define REC_OFFS_SQL_NULL ((ulint) 1 << 31)
-/* External flag in offsets returned by rec_get_offsets() */
-#define REC_OFFS_EXTERNAL ((ulint) 1 << 30)
-/* Mask for offsets returned by rec_get_offsets() */
-#define REC_OFFS_MASK (REC_OFFS_EXTERNAL - 1)
-
/* Offsets of the bit-fields in an old-style record. NOTE! In the table the
most significant bytes and bits are written below less significant.
@@ -137,7 +129,6 @@ and the shift needed to obtain each bit-field of the record. */
/***********************************************************//**
Sets the value of the ith field SQL null bit of an old-style record. */
-UNIV_INTERN
void
rec_set_nth_field_null_bit(
/*=======================*/
@@ -147,7 +138,6 @@ rec_set_nth_field_null_bit(
/***********************************************************//**
Sets an old-style record field to SQL null.
The physical size of the field is not changed. */
-UNIV_INTERN
void
rec_set_nth_field_sql_null(
/*=======================*/
@@ -239,7 +229,7 @@ rec_set_bit_field_2(
/******************************************************//**
The following function is used to get the pointer of the next chained record
on the same page.
-@return pointer to the next chained record, or NULL if none */
+@return pointer to the next chained record, or NULL if none */
UNIV_INLINE
const rec_t*
rec_get_next_ptr_const(
@@ -295,7 +285,7 @@ rec_get_next_ptr_const(
/******************************************************//**
The following function is used to get the pointer of the next chained record
on the same page.
-@return pointer to the next chained record, or NULL if none */
+@return pointer to the next chained record, or NULL if none */
UNIV_INLINE
rec_t*
rec_get_next_ptr(
@@ -309,7 +299,7 @@ rec_get_next_ptr(
/******************************************************//**
The following function is used to get the offset of the next chained record
on the same page.
-@return the page offset of the next chained record, or 0 if none */
+@return the page offset of the next chained record, or 0 if none */
UNIV_INLINE
ulint
rec_get_next_offs(
@@ -412,7 +402,7 @@ rec_set_next_offs_new(
/******************************************************//**
The following function is used to get the number of fields
in an old-style record.
-@return number of data fields */
+@return number of data fields */
UNIV_INLINE
ulint
rec_get_n_fields_old(
@@ -452,7 +442,7 @@ rec_set_n_fields_old(
/******************************************************//**
The following function retrieves the status bits of a new-style record.
-@return status bits */
+@return status bits */
UNIV_INLINE
ulint
rec_get_status(
@@ -473,7 +463,7 @@ rec_get_status(
/******************************************************//**
The following function is used to get the number of fields
in a record.
-@return number of data fields */
+@return number of data fields */
UNIV_INLINE
ulint
rec_get_n_fields(
@@ -502,10 +492,32 @@ rec_get_n_fields(
}
}
+/** Confirms the n_fields of the entry is sane with comparing the other
+record in the same page specified
+@param[in] index index
+@param[in] rec record of the same page
+@param[in] entry index entry
+@return true if n_fields is sane */
+UNIV_INLINE
+bool
+rec_n_fields_is_sane(
+ dict_index_t* index,
+ const rec_t* rec,
+ const dtuple_t* entry)
+{
+ return(rec_get_n_fields(rec, index)
+ == dtuple_get_n_fields(entry)
+ /* a record for older SYS_INDEXES table
+ (missing merge_threshold column) is acceptable. */
+ || (index->table->id == DICT_INDEXES_ID
+ && rec_get_n_fields(rec, index)
+ == dtuple_get_n_fields(entry) - 1));
+}
+
/******************************************************//**
The following function is used to get the number of records owned by the
previous directory record.
-@return number of owned records */
+@return number of owned records */
UNIV_INLINE
ulint
rec_get_n_owned_old(
@@ -532,7 +544,7 @@ rec_set_n_owned_old(
/******************************************************//**
The following function is used to get the number of records owned by the
previous directory record.
-@return number of owned records */
+@return number of owned records */
UNIV_INLINE
ulint
rec_get_n_owned_new(
@@ -560,9 +572,22 @@ rec_set_n_owned_new(
}
}
+#ifdef UNIV_DEBUG
+/** Check if the info bits are valid.
+@param[in] bits info bits to check
+@return true if valid */
+inline
+bool
+rec_info_bits_valid(
+ ulint bits)
+{
+ return(0 == (bits & ~(REC_INFO_DELETED_FLAG | REC_INFO_MIN_REC_FLAG)));
+}
+#endif /* UNIV_DEBUG */
+
/******************************************************//**
The following function is used to retrieve the info bits of a record.
-@return info bits */
+@return info bits */
UNIV_INLINE
ulint
rec_get_info_bits(
@@ -570,9 +595,11 @@ rec_get_info_bits(
const rec_t* rec, /*!< in: physical record */
ulint comp) /*!< in: nonzero=compact page format */
{
- return(rec_get_bit_field_1(
- rec, comp ? REC_NEW_INFO_BITS : REC_OLD_INFO_BITS,
- REC_INFO_BITS_MASK, REC_INFO_BITS_SHIFT));
+ const ulint val = rec_get_bit_field_1(
+ rec, comp ? REC_NEW_INFO_BITS : REC_OLD_INFO_BITS,
+ REC_INFO_BITS_MASK, REC_INFO_BITS_SHIFT);
+ ut_ad(rec_info_bits_valid(val));
+ return(val);
}
/******************************************************//**
@@ -584,6 +611,7 @@ rec_set_info_bits_old(
rec_t* rec, /*!< in: old-style physical record */
ulint bits) /*!< in: info bits */
{
+ ut_ad(rec_info_bits_valid(bits));
rec_set_bit_field_1(rec, bits, REC_OLD_INFO_BITS,
REC_INFO_BITS_MASK, REC_INFO_BITS_SHIFT);
}
@@ -596,6 +624,7 @@ rec_set_info_bits_new(
rec_t* rec, /*!< in/out: new-style physical record */
ulint bits) /*!< in: info bits */
{
+ ut_ad(rec_info_bits_valid(bits));
rec_set_bit_field_1(rec, bits, REC_NEW_INFO_BITS,
REC_INFO_BITS_MASK, REC_INFO_BITS_SHIFT);
}
@@ -616,7 +645,7 @@ rec_set_status(
/******************************************************//**
The following function is used to retrieve the info and status
bits of a record. (Only compact records have status bits.)
-@return info bits */
+@return info bits */
UNIV_INLINE
ulint
rec_get_info_and_status_bits(
@@ -657,7 +686,7 @@ rec_set_info_and_status_bits(
/******************************************************//**
The following function tells if record is delete marked.
-@return nonzero if delete marked */
+@return nonzero if delete marked */
UNIV_INLINE
ulint
rec_get_deleted_flag(
@@ -727,7 +756,7 @@ rec_set_deleted_flag_new(
/******************************************************//**
The following function tells if a new-style record is a node pointer.
-@return TRUE if node pointer */
+@return TRUE if node pointer */
UNIV_INLINE
ibool
rec_get_node_ptr_flag(
@@ -740,7 +769,7 @@ rec_get_node_ptr_flag(
/******************************************************//**
The following function is used to get the order number
of an old-style record in the heap of the index page.
-@return heap order number */
+@return heap order number */
UNIV_INLINE
ulint
rec_get_heap_no_old(
@@ -768,7 +797,7 @@ rec_set_heap_no_old(
/******************************************************//**
The following function is used to get the order number
of a new-style record in the heap of the index page.
-@return heap order number */
+@return heap order number */
UNIV_INLINE
ulint
rec_get_heap_no_new(
@@ -796,7 +825,7 @@ rec_set_heap_no_new(
/******************************************************//**
The following function is used to test whether the data offsets in the record
are stored in one-byte or two-byte format.
-@return TRUE if 1-byte form */
+@return TRUE if 1-byte form */
UNIV_INLINE
ibool
rec_get_1byte_offs_flag(
@@ -833,9 +862,9 @@ rec_set_1byte_offs_flag(
Returns the offset of nth field end if the record is stored in the 1-byte
offsets form. If the field is SQL null, the flag is ORed in the returned
value.
-@return offset of the start of the field, SQL null flag ORed */
+@return offset of the start of the field, SQL null flag ORed */
UNIV_INLINE
-ulint
+uint8_t
rec_1_get_field_end_info(
/*=====================*/
const rec_t* rec, /*!< in: record */
@@ -854,7 +883,7 @@ value.
@return offset of the start of the field, SQL null flag and extern
storage flag ORed */
UNIV_INLINE
-ulint
+offset_t
rec_2_get_field_end_info(
/*=====================*/
const rec_t* rec, /*!< in: record */
@@ -888,12 +917,12 @@ the fields. */
/**********************************************************//**
The following function returns the number of allocated elements
for an array of offsets.
-@return number of elements */
+@return number of elements */
UNIV_INLINE
ulint
rec_offs_get_n_alloc(
/*=================*/
- const ulint* offsets)/*!< in: array for rec_get_offsets() */
+ const offset_t* offsets)/*!< in: array for rec_get_offsets() */
{
ulint n_alloc;
ut_ad(offsets);
@@ -910,23 +939,23 @@ UNIV_INLINE
void
rec_offs_set_n_alloc(
/*=================*/
- ulint* offsets, /*!< out: array for rec_get_offsets(),
+ offset_t*offsets, /*!< out: array for rec_get_offsets(),
must be allocated */
ulint n_alloc) /*!< in: number of elements */
{
ut_ad(n_alloc > REC_OFFS_HEADER_SIZE);
UNIV_MEM_ALLOC(offsets, n_alloc * sizeof *offsets);
- offsets[0] = n_alloc;
+ offsets[0] = static_cast<offset_t>(n_alloc);
}
/**********************************************************//**
The following function returns the number of fields in a record.
-@return number of fields */
+@return number of fields */
UNIV_INLINE
ulint
rec_offs_n_fields(
/*==============*/
- const ulint* offsets)/*!< in: array returned by rec_get_offsets() */
+ const offset_t* offsets)/*!< in: array returned by rec_get_offsets() */
{
ulint n_fields;
ut_ad(offsets);
@@ -940,29 +969,29 @@ rec_offs_n_fields(
/************************************************************//**
Validates offsets returned by rec_get_offsets().
-@return TRUE if valid */
+@return TRUE if valid */
UNIV_INLINE
ibool
rec_offs_validate(
/*==============*/
const rec_t* rec, /*!< in: record or NULL */
const dict_index_t* index, /*!< in: record descriptor or NULL */
- const ulint* offsets)/*!< in: array returned by
+ const offset_t* offsets)/*!< in: array returned by
rec_get_offsets() */
{
ulint i = rec_offs_n_fields(offsets);
ulint last = ULINT_MAX;
- ulint comp = *rec_offs_base(offsets) & REC_OFFS_COMPACT;
+ bool comp = rec_offs_base(offsets)[0] & REC_OFFS_COMPACT;
if (rec) {
- ut_ad((ulint) rec == offsets[2]);
+ ut_ad(!memcmp(&rec, &offsets[RECORD_OFFSET], sizeof(rec)));
if (!comp) {
ut_a(rec_get_n_fields_old(rec) >= i);
}
}
if (index) {
ulint max_n_fields;
- ut_ad((ulint) index == offsets[3]);
+ ut_ad(!memcmp(&index, &offsets[INDEX_OFFSET], sizeof(index)));
max_n_fields = ut_max(
dict_index_get_n_fields(index),
dict_index_get_n_unique_in_tree(index) + 1);
@@ -987,7 +1016,7 @@ rec_offs_validate(
ut_a(!index->n_def || i <= max_n_fields);
}
while (i--) {
- ulint curr = rec_offs_base(offsets)[1 + i] & REC_OFFS_MASK;
+ offset_t curr = get_value(rec_offs_base(offsets)[1 + i]);
ut_a(curr <= last);
last = curr;
}
@@ -1003,63 +1032,55 @@ rec_offs_make_valid(
/*================*/
const rec_t* rec, /*!< in: record */
const dict_index_t* index, /*!< in: record descriptor */
- ulint* offsets)/*!< in: array returned by
+ offset_t* offsets)/*!< in: array returned by
rec_get_offsets() */
{
ut_ad(rec);
ut_ad(index);
ut_ad(offsets);
ut_ad(rec_get_n_fields(rec, index) >= rec_offs_n_fields(offsets));
- offsets[2] = (ulint) rec;
- offsets[3] = (ulint) index;
+ memcpy(&offsets[RECORD_OFFSET], &rec, sizeof(rec));
+ memcpy(&offsets[INDEX_OFFSET], &index, sizeof(index));
}
#endif /* UNIV_DEBUG */
/************************************************************//**
The following function is used to get an offset to the nth
data field in a record.
-@return offset from the origin of rec */
+@return offset from the origin of rec */
UNIV_INLINE
-ulint
+offset_t
rec_get_nth_field_offs(
/*===================*/
- const ulint* offsets,/*!< in: array returned by rec_get_offsets() */
+ const offset_t* offsets,/*!< in: array returned by rec_get_offsets() */
ulint n, /*!< in: index of the field */
ulint* len) /*!< out: length of the field; UNIV_SQL_NULL
if SQL null */
{
- ulint offs;
- ulint length;
ut_ad(n < rec_offs_n_fields(offsets));
- if (n == 0) {
- offs = 0;
- } else {
- offs = rec_offs_base(offsets)[n] & REC_OFFS_MASK;
- }
-
- length = rec_offs_base(offsets)[1 + n];
+ offset_t offs = n == 0 ? 0 : get_value(rec_offs_base(offsets)[n]);
+ offset_t next_offs = rec_offs_base(offsets)[1 + n];
- if (length & REC_OFFS_SQL_NULL) {
- length = UNIV_SQL_NULL;
+ if (get_type(next_offs) == SQL_NULL) {
+ *len = UNIV_SQL_NULL;
} else {
- length &= REC_OFFS_MASK;
- length -= offs;
+
+ *len = get_value(next_offs) - offs;
}
- *len = length;
return(offs);
}
/******************************************************//**
Determine if the offsets are for a record in the new
compact format.
-@return nonzero if compact format */
+@return nonzero if compact format */
UNIV_INLINE
ulint
rec_offs_comp(
/*==========*/
- const ulint* offsets)/*!< in: array returned by rec_get_offsets() */
+ const offset_t* offsets)/*!< in: array returned by rec_get_offsets() */
{
ut_ad(rec_offs_validate(NULL, NULL, offsets));
return(*rec_offs_base(offsets) & REC_OFFS_COMPACT);
@@ -1068,12 +1089,12 @@ rec_offs_comp(
/******************************************************//**
Determine if the offsets are for a record containing
externally stored columns.
-@return nonzero if externally stored */
+@return nonzero if externally stored */
UNIV_INLINE
ulint
rec_offs_any_extern(
/*================*/
- const ulint* offsets)/*!< in: array returned by rec_get_offsets() */
+ const offset_t* offsets)/*!< in: array returned by rec_get_offsets() */
{
ut_ad(rec_offs_validate(NULL, NULL, offsets));
return(*rec_offs_base(offsets) & REC_OFFS_EXTERNAL);
@@ -1081,13 +1102,13 @@ rec_offs_any_extern(
/******************************************************//**
Determine if the offsets are for a record containing null BLOB pointers.
-@return first field containing a null BLOB pointer, or NULL if none found */
+@return first field containing a null BLOB pointer, or NULL if none found */
UNIV_INLINE
const byte*
rec_offs_any_null_extern(
/*=====================*/
const rec_t* rec, /*!< in: record */
- const ulint* offsets) /*!< in: rec_get_offsets(rec) */
+ const offset_t* offsets) /*!< in: rec_get_offsets(rec) */
{
ulint i;
ut_ad(rec_offs_validate(rec, NULL, offsets));
@@ -1117,61 +1138,61 @@ rec_offs_any_null_extern(
/******************************************************//**
Returns nonzero if the extern bit is set in nth field of rec.
-@return nonzero if externally stored */
+@return nonzero if externally stored */
UNIV_INLINE
ulint
rec_offs_nth_extern(
/*================*/
- const ulint* offsets,/*!< in: array returned by rec_get_offsets() */
+ const offset_t* offsets,/*!< in: array returned by rec_get_offsets() */
ulint n) /*!< in: nth field */
{
ut_ad(rec_offs_validate(NULL, NULL, offsets));
ut_ad(n < rec_offs_n_fields(offsets));
- return(rec_offs_base(offsets)[1 + n] & REC_OFFS_EXTERNAL);
+ return get_type(rec_offs_base(offsets)[1 + n]) == STORED_OFFPAGE;
}
/******************************************************//**
Returns nonzero if the SQL NULL bit is set in nth field of rec.
-@return nonzero if SQL NULL */
+@return nonzero if SQL NULL */
UNIV_INLINE
ulint
rec_offs_nth_sql_null(
/*==================*/
- const ulint* offsets,/*!< in: array returned by rec_get_offsets() */
+ const offset_t* offsets,/*!< in: array returned by rec_get_offsets() */
ulint n) /*!< in: nth field */
{
ut_ad(rec_offs_validate(NULL, NULL, offsets));
ut_ad(n < rec_offs_n_fields(offsets));
- return(rec_offs_base(offsets)[1 + n] & REC_OFFS_SQL_NULL);
+ return get_type(rec_offs_base(offsets)[1 + n]) == SQL_NULL;
}
/******************************************************//**
Gets the physical size of a field.
-@return length of field */
+@return length of field */
UNIV_INLINE
ulint
rec_offs_nth_size(
/*==============*/
- const ulint* offsets,/*!< in: array returned by rec_get_offsets() */
+ const offset_t* offsets,/*!< in: array returned by rec_get_offsets() */
ulint n) /*!< in: nth field */
{
ut_ad(rec_offs_validate(NULL, NULL, offsets));
ut_ad(n < rec_offs_n_fields(offsets));
if (!n) {
- return(rec_offs_base(offsets)[1 + n] & REC_OFFS_MASK);
+ return get_value(rec_offs_base(offsets)[1 + n]);
}
- return((rec_offs_base(offsets)[1 + n] - rec_offs_base(offsets)[n])
- & REC_OFFS_MASK);
+ return get_value((rec_offs_base(offsets)[1 + n]))
+ - get_value(rec_offs_base(offsets)[n]);
}
/******************************************************//**
Returns the number of extern bits set in a record.
-@return number of externally stored fields */
+@return number of externally stored fields */
UNIV_INLINE
ulint
rec_offs_n_extern(
/*==============*/
- const ulint* offsets)/*!< in: array returned by rec_get_offsets() */
+ const offset_t* offsets)/*!< in: array returned by rec_get_offsets() */
{
ulint n = 0;
@@ -1194,7 +1215,7 @@ offsets form. If the field is SQL null, the flag is ORed in the returned
value. This function and the 2-byte counterpart are defined here because the
C-compiler was not able to sum negative and positive constant offsets, and
warned of constant arithmetic overflow within the compiler.
-@return offset of the start of the PREVIOUS field, SQL null flag ORed */
+@return offset of the start of the PREVIOUS field, SQL null flag ORed */
UNIV_INLINE
ulint
rec_1_get_prev_field_end_info(
@@ -1212,7 +1233,7 @@ rec_1_get_prev_field_end_info(
Returns the offset of n - 1th field end if the record is stored in the 2-byte
offsets form. If the field is SQL null, the flag is ORed in the returned
value.
-@return offset of the start of the PREVIOUS field, SQL null flag ORed */
+@return offset of the start of the PREVIOUS field, SQL null flag ORed */
UNIV_INLINE
ulint
rec_2_get_prev_field_end_info(
@@ -1263,7 +1284,7 @@ rec_2_set_field_end_info(
/******************************************************//**
Returns the offset of nth field start if the record is stored in the 1-byte
offsets form.
-@return offset of the start of the field */
+@return offset of the start of the field */
UNIV_INLINE
ulint
rec_1_get_field_start_offs(
@@ -1286,7 +1307,7 @@ rec_1_get_field_start_offs(
/******************************************************//**
Returns the offset of nth field start if the record is stored in the 2-byte
offsets form.
-@return offset of the start of the field */
+@return offset of the start of the field */
UNIV_INLINE
ulint
rec_2_get_field_start_offs(
@@ -1311,7 +1332,7 @@ The following function is used to read the offset of the start of a data field
in the record. The start of an SQL null field is the end offset of the
previous non-null field, or 0, if none exists. If n is the number of the last
field + 1, then the end offset of the last field is returned.
-@return offset of the start of the field */
+@return offset of the start of the field */
UNIV_INLINE
ulint
rec_get_field_start_offs(
@@ -1339,7 +1360,7 @@ rec_get_field_start_offs(
Gets the physical size of an old-style field.
Also an SQL null may have a field of size > 0,
if the data type is of a fixed size.
-@return field size in bytes */
+@return field size in bytes */
UNIV_INLINE
ulint
rec_get_nth_field_size(
@@ -1369,7 +1390,7 @@ void
rec_set_nth_field(
/*==============*/
rec_t* rec, /*!< in: record */
- const ulint* offsets,/*!< in: array returned by rec_get_offsets() */
+ const offset_t* offsets,/*!< in: array returned by rec_get_offsets() */
ulint n, /*!< in: index number of the field */
const void* data, /*!< in: pointer to the data
if not SQL null */
@@ -1406,7 +1427,7 @@ The following function returns the data size of an old-style physical
record, that is the sum of field lengths. SQL null fields
are counted as length 0 fields. The value returned by the function
is the distance from record origin to record end in bytes.
-@return size */
+@return size */
UNIV_INLINE
ulint
rec_get_data_size_old(
@@ -1424,16 +1445,16 @@ UNIV_INLINE
void
rec_offs_set_n_fields(
/*==================*/
- ulint* offsets, /*!< in/out: array returned by
+ offset_t* offsets, /*!< in/out: array returned by
rec_get_offsets() */
- ulint n_fields) /*!< in: number of fields */
+ ulint n_fields) /*!< in: number of fields */
{
ut_ad(offsets);
ut_ad(n_fields > 0);
ut_ad(n_fields <= REC_MAX_N_FIELDS);
ut_ad(n_fields + REC_OFFS_HEADER_SIZE
<= rec_offs_get_n_alloc(offsets));
- offsets[1] = n_fields;
+ offsets[1] = static_cast<offset_t>(n_fields);
}
/**********************************************************//**
@@ -1441,18 +1462,17 @@ The following function returns the data size of a physical
record, that is the sum of field lengths. SQL null fields
are counted as length 0 fields. The value returned by the function
is the distance from record origin to record end in bytes.
-@return size */
+@return size */
UNIV_INLINE
ulint
rec_offs_data_size(
/*===============*/
- const ulint* offsets)/*!< in: array returned by rec_get_offsets() */
+ const offset_t* offsets)/*!< in: array returned by rec_get_offsets() */
{
ulint size;
ut_ad(rec_offs_validate(NULL, NULL, offsets));
- size = rec_offs_base(offsets)[rec_offs_n_fields(offsets)]
- & REC_OFFS_MASK;
+ size = get_value(rec_offs_base(offsets)[rec_offs_n_fields(offsets)]);
ut_ad(size < UNIV_PAGE_SIZE);
return(size);
}
@@ -1461,12 +1481,12 @@ rec_offs_data_size(
Returns the total size of record minus data size of record. The value
returned by the function is the distance from record start to record origin
in bytes.
-@return size */
+@return size */
UNIV_INLINE
ulint
rec_offs_extra_size(
/*================*/
- const ulint* offsets)/*!< in: array returned by rec_get_offsets() */
+ const offset_t* offsets)/*!< in: array returned by rec_get_offsets() */
{
ulint size;
ut_ad(rec_offs_validate(NULL, NULL, offsets));
@@ -1477,12 +1497,12 @@ rec_offs_extra_size(
/**********************************************************//**
Returns the total size of a physical record.
-@return size */
+@return size */
UNIV_INLINE
ulint
rec_offs_size(
/*==========*/
- const ulint* offsets)/*!< in: array returned by rec_get_offsets() */
+ const offset_t* offsets)/*!< in: array returned by rec_get_offsets() */
{
return(rec_offs_data_size(offsets) + rec_offs_extra_size(offsets));
}
@@ -1490,13 +1510,13 @@ rec_offs_size(
#ifdef UNIV_DEBUG
/**********************************************************//**
Returns a pointer to the end of the record.
-@return pointer to end */
+@return pointer to end */
UNIV_INLINE
byte*
rec_get_end(
/*========*/
const rec_t* rec, /*!< in: pointer to record */
- const ulint* offsets)/*!< in: array returned by rec_get_offsets() */
+ const offset_t* offsets)/*!< in: array returned by rec_get_offsets() */
{
ut_ad(rec_offs_validate(rec, NULL, offsets));
return(const_cast<rec_t*>(rec + rec_offs_data_size(offsets)));
@@ -1504,29 +1524,30 @@ rec_get_end(
/**********************************************************//**
Returns a pointer to the start of the record.
-@return pointer to start */
+@return pointer to start */
UNIV_INLINE
byte*
rec_get_start(
/*==========*/
const rec_t* rec, /*!< in: pointer to record */
- const ulint* offsets)/*!< in: array returned by rec_get_offsets() */
+ const offset_t* offsets)/*!< in: array returned by rec_get_offsets() */
{
ut_ad(rec_offs_validate(rec, NULL, offsets));
return(const_cast<rec_t*>(rec - rec_offs_extra_size(offsets)));
}
#endif /* UNIV_DEBUG */
-/***************************************************************//**
-Copies a physical record to a buffer.
-@return pointer to the origin of the copy */
+/** Copy a physical record to a buffer.
+@param[in] buf buffer
+@param[in] rec physical record
+@param[in] offsets array returned by rec_get_offsets()
+@return pointer to the origin of the copy */
UNIV_INLINE
rec_t*
rec_copy(
-/*=====*/
- void* buf, /*!< in: buffer */
- const rec_t* rec, /*!< in: physical record */
- const ulint* offsets)/*!< in: array returned by rec_get_offsets() */
+ void* buf,
+ const rec_t* rec,
+ const offset_t* offsets)
{
ulint extra_len;
ulint data_len;
@@ -1547,7 +1568,7 @@ rec_copy(
/**********************************************************//**
Returns the extra size of an old-style physical record if we know its
data size and number of fields.
-@return extra size */
+@return extra size */
UNIV_INLINE
ulint
rec_get_converted_extra_size(
@@ -1567,7 +1588,7 @@ rec_get_converted_extra_size(
/**********************************************************//**
The following function returns the size of a data tuple when converted to
a physical record.
-@return size */
+@return size */
UNIV_INLINE
ulint
rec_get_converted_size(
@@ -1581,12 +1602,19 @@ rec_get_converted_size(
ut_ad(dtuple_check_typed(dtuple));
- ut_ad(dict_index_is_univ(index)
+ ut_ad(dict_index_is_ibuf(index)
+
|| dtuple_get_n_fields(dtuple)
- == (((dtuple_get_info_bits(dtuple) & REC_NEW_STATUS_MASK)
- == REC_STATUS_NODE_PTR)
- ? dict_index_get_n_unique_in_tree(index) + 1
- : dict_index_get_n_fields(index)));
+ == (((dtuple_get_info_bits(dtuple) & REC_NEW_STATUS_MASK)
+ == REC_STATUS_NODE_PTR)
+ ? dict_index_get_n_unique_in_tree_nonleaf(index) + 1
+ : dict_index_get_n_fields(index))
+
+ /* a record for older SYS_INDEXES table
+ (missing merge_threshold column) is acceptable. */
+ || (index->table->id == DICT_INDEXES_ID
+ && dtuple_get_n_fields(dtuple)
+ == dict_index_get_n_fields(index) - 1));
if (dict_table_is_comp(index->table)) {
return(rec_get_converted_size_comp(index,
@@ -1598,6 +1626,11 @@ rec_get_converted_size(
data_size = dtuple_get_data_size(dtuple, 0);
+ /* If primary key is being updated then the new record inherits
+ externally stored fields from the delete-marked old record.
+ In that case, n_ext may be less value than
+ dtuple_get_n_ext(tuple). */
+ ut_ad(n_ext <= dtuple_get_n_ext(dtuple));
extra_size = rec_get_converted_extra_size(
data_size, dtuple_get_n_fields(dtuple), n_ext);
@@ -1611,7 +1644,7 @@ rec_get_converted_size(
support multiple page sizes. At that time, we will need
to consider the node pointer on these universal btrees. */
- if (dict_index_is_univ(index)) {
+ if (dict_index_is_ibuf(index)) {
/* This is for the insert buffer B-tree.
All fields in the leaf tuple ascend to the
parent node plus the child page pointer. */
@@ -1639,23 +1672,21 @@ rec_get_converted_size(
return(data_size + extra_size);
}
-#ifndef UNIV_HOTBACKUP
-/************************************************************//**
-Folds a prefix of a physical record to a ulint. Folds only existing fields,
-that is, checks that we do not run out of the record.
-@return the folded value */
+/** Fold a prefix of a physical record.
+@param[in] rec index record
+@param[in] offsets return value of rec_get_offsets()
+@param[in] n_fields number of complete fields to fold
+@param[in] n_bytes number of bytes to fold in the last field
+@param[in] index_id index tree ID
+@return the folded value */
UNIV_INLINE
ulint
rec_fold(
-/*=====*/
- const rec_t* rec, /*!< in: the physical record */
- const ulint* offsets, /*!< in: array returned by
- rec_get_offsets() */
- ulint n_fields, /*!< in: number of complete
- fields to fold */
- ulint n_bytes, /*!< in: number of bytes to fold
- in an incomplete last field */
- index_id_t tree_id) /*!< in: index tree id */
+ const rec_t* rec,
+ const offset_t* offsets,
+ ulint n_fields,
+ ulint n_bytes,
+ index_id_t tree_id)
{
ulint i;
const byte* data;
@@ -1665,7 +1696,7 @@ rec_fold(
ut_ad(rec_offs_validate(rec, NULL, offsets));
ut_ad(rec_validate(rec, offsets));
- ut_ad(n_fields + n_bytes > 0);
+ ut_ad(n_fields > 0 || n_bytes > 0);
n_fields_rec = rec_offs_n_fields(offsets);
ut_ad(n_fields <= n_fields_rec);
@@ -1705,4 +1736,3 @@ rec_fold(
return(fold);
}
-#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innobase/include/rem0types.h b/storage/innobase/include/rem0types.h
index cc59bd91076..4482517fc4e 100644
--- a/storage/innobase/include/rem0types.h
+++ b/storage/innobase/include/rem0types.h
@@ -1,6 +1,7 @@
/*****************************************************************************
Copyright (c) 1994, 2012, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2019, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -29,6 +30,9 @@ Created 5/30/1994 Heikki Tuuri
/* We define the physical record simply as an array of bytes */
typedef byte rec_t;
+/** This type represents a field offset in a rec_t* */
+typedef unsigned short int offset_t;
+
/* Maximum values for various fields (for non-blob tuples) */
#define REC_MAX_N_FIELDS (1024 - 1)
#define REC_MAX_HEAP_NO (2 * 8192 - 1)
diff --git a/storage/innobase/include/row0ext.h b/storage/innobase/include/row0ext.h
index 02f46867da6..11a6bfa4667 100644
--- a/storage/innobase/include/row0ext.h
+++ b/storage/innobase/include/row0ext.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 2006, 2009, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2006, 2016, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -26,16 +26,15 @@ Created September 2006 Marko Makela
#ifndef row0ext_h
#define row0ext_h
-#include "univ.i"
-#include "row0types.h"
#include "data0types.h"
#include "mem0mem.h"
#include "dict0types.h"
+#include "page0size.h"
+#include "row0types.h"
/********************************************************************//**
Creates a cache of column prefixes of externally stored columns.
-@return own: column prefix cache */
-UNIV_INTERN
+@return own: column prefix cache */
row_ext_t*
row_ext_create(
/*===========*/
@@ -92,11 +91,12 @@ struct row_ext_t{
REC_ANTELOPE_MAX_INDEX_COL_LEN or
REC_VERSION_56_MAX_INDEX_COL_LEN depending
on row format */
+ page_size_t page_size;
+ /*!< page size of the externally stored
+ columns */
ulint len[1]; /*!< prefix lengths; 0 if not cached */
};
-#ifndef UNIV_NONINL
#include "row0ext.ic"
-#endif
#endif
diff --git a/storage/innobase/include/row0ftsort.h b/storage/innobase/include/row0ftsort.h
index 7b99eb3773b..73f815604d8 100644
--- a/storage/innobase/include/row0ftsort.h
+++ b/storage/innobase/include/row0ftsort.h
@@ -27,14 +27,12 @@ Created 10/13/2010 Jimmy Yang
#ifndef row0ftsort_h
#define row0ftsort_h
-#include "univ.i"
#include "data0data.h"
-#include "dict0types.h"
-#include "row0mysql.h"
#include "fts0fts.h"
-#include "fts0types.h"
#include "fts0priv.h"
+#include "rem0types.h"
#include "row0merge.h"
+#include "btr0bulk.h"
/** This structure defineds information the scan thread will fetch
and put to the linked list for parallel tokenization/sort threads
@@ -53,7 +51,6 @@ struct fts_doc_item {
tokenization threads and sort threads. */
typedef UT_LIST_BASE_NODE_T(fts_doc_item_t) fts_doc_list_t;
-#define FTS_NUM_AUX_INDEX 6
#define FTS_PLL_MERGE 1
/** Sort information passed to each individual parallel sort thread */
@@ -98,6 +95,15 @@ struct fts_psort_t {
ib_mutex_t mutex; /*!< mutex for fts_doc_list */
};
+/** Row fts token for plugin parser */
+struct row_fts_token_t {
+ fts_string_t* text; /*!< token */
+ UT_LIST_NODE_T(row_fts_token_t)
+ token_list; /*!< next token link */
+};
+
+typedef UT_LIST_BASE_NODE_T(row_fts_token_t) fts_token_list_t;
+
/** Structure stores information from string tokenization operation */
struct fts_tokenize_ctx {
ulint processed_len; /*!< processed string length */
@@ -111,13 +117,16 @@ struct fts_tokenize_ctx {
ib_rbt_t* cached_stopword;/*!< in: stopword list */
dfield_t sort_field[FTS_NUM_FIELDS_SORT];
/*!< in: sort field */
+ fts_token_list_t fts_token_list;
fts_tokenize_ctx() :
processed_len(0), init_pos(0), buf_used(0),
- rows_added(), cached_stopword(NULL), sort_field()
+ rows_added(), cached_stopword(NULL), sort_field(),
+ fts_token_list()
{
memset(rows_added, 0, sizeof rows_added);
memset(sort_field, 0, sizeof sort_field);
+ UT_LIST_INIT(fts_token_list, &row_fts_token_t::token_list);
}
};
@@ -126,13 +135,16 @@ typedef struct fts_tokenize_ctx fts_tokenize_ctx_t;
/** Structure stores information needed for the insertion phase of FTS
parallel sort. */
struct fts_psort_insert {
- trx_t* trx; /*!< Transaction used for insertion */
- que_t** ins_graph; /*!< insert graph */
- fts_table_t fts_table; /*!< auxiliary table */
CHARSET_INFO* charset; /*!< charset info */
mem_heap_t* heap; /*!< heap */
ibool opt_doc_id_size;/*!< Whether to use smaller (4 bytes)
integer for Doc ID */
+ BtrBulk* btr_bulk; /*!< Bulk load instance */
+ dtuple_t* tuple; /*!< Tuple to insert */
+
+#ifdef UNIV_DEBUG
+ ulint aux_index_id; /*!< Auxiliary index id */
+#endif
};
typedef struct fts_psort_insert fts_psort_insert_t;
@@ -166,7 +178,6 @@ tokenized doc string. The index has three "fields":
3) Word's position in original 'doc'.
@return dict_index_t structure for the fts sort index */
-UNIV_INTERN
dict_index_t*
row_merge_create_fts_sort_index(
/*============================*/
@@ -183,7 +194,6 @@ row_merge_create_fts_sort_index(
/********************************************************************//**
Initialize FTS parallel sort structures.
@return TRUE if all successful */
-UNIV_INTERN
ibool
row_fts_psort_info_init(
/*====================*/
@@ -204,7 +214,6 @@ row_fts_psort_info_init(
/********************************************************************//**
Clean up and deallocate FTS parallel sort structures, and close
temparary merge sort files */
-UNIV_INTERN
void
row_fts_psort_info_destroy(
/*=======================*/
@@ -212,60 +221,26 @@ row_fts_psort_info_destroy(
fts_psort_t* merge_info); /*!< parallel merge info */
/********************************************************************//**
Free up merge buffers when merge sort is done */
-UNIV_INTERN
void
row_fts_free_pll_merge_buf(
/*=======================*/
fts_psort_t* psort_info); /*!< in: parallel sort info */
/*********************************************************************//**
-Function performs parallel tokenization of the incoming doc strings.
-@return OS_THREAD_DUMMY_RETURN */
-UNIV_INTERN
-os_thread_ret_t
-fts_parallel_tokenization(
-/*======================*/
- void* arg); /*!< in: psort_info for the thread */
-/*********************************************************************//**
Start the parallel tokenization and parallel merge sort */
-UNIV_INTERN
void
row_fts_start_psort(
/*================*/
fts_psort_t* psort_info); /*!< in: parallel sort info */
/*********************************************************************//**
-Function performs the merge and insertion of the sorted records.
-@return OS_THREAD_DUMMY_RETURN */
-UNIV_INTERN
-os_thread_ret_t
-fts_parallel_merge(
-/*===============*/
- void* arg); /*!< in: parallel merge info */
-/*********************************************************************//**
Kick off the parallel merge and insert thread */
-UNIV_INTERN
void
row_fts_start_parallel_merge(
/*=========================*/
fts_psort_t* merge_info); /*!< in: parallel sort info */
/********************************************************************//**
-Read sorted FTS data files and insert data tuples to auxillary tables.
-@return DB_SUCCESS or error number */
-UNIV_INTERN
-void
-row_fts_insert_tuple(
-/*=================*/
- fts_psort_insert_t*
- ins_ctx, /*!< in: insert context */
- fts_tokenizer_word_t* word, /*!< in: last processed
- tokenized word */
- ib_vector_t* positions, /*!< in: word position */
- doc_id_t* in_doc_id, /*!< in: last item doc id */
- dtuple_t* dtuple); /*!< in: entry to insert */
-/********************************************************************//**
Propagate a newly added record up one level in the selection tree
@return parent where this value propagated to */
-UNIV_INTERN
int
row_merge_fts_sel_propagate(
/*========================*/
@@ -273,13 +248,12 @@ row_merge_fts_sel_propagate(
int* sel_tree, /*<! in: selection tree */
ulint level, /*<! in: selection tree level */
const mrec_t** mrec, /*<! in: sort record */
- ulint** offsets, /*<! in: record offsets */
+ offset_t** offsets, /*<! in: record offsets */
dict_index_t* index); /*<! in: FTS index */
/********************************************************************//**
Read sorted file containing index data tuples and insert these data
tuples to the index
@return DB_SUCCESS or error number */
-UNIV_INTERN
dberr_t
row_fts_merge_insert(
/*=================*/
diff --git a/storage/innobase/include/row0import.h b/storage/innobase/include/row0import.h
index 4f9f372ffa1..b553f169c91 100644
--- a/storage/innobase/include/row0import.h
+++ b/storage/innobase/include/row0import.h
@@ -1,6 +1,7 @@
/*****************************************************************************
Copyright (c) 2012, 2016, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2017, 2019, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -26,8 +27,6 @@ Created 2012-02-08 by Sunny Bains
#ifndef row0import_h
#define row0import_h
-#include "univ.i"
-#include "db0err.h"
#include "dict0types.h"
// Forward declarations
@@ -38,8 +37,7 @@ struct row_prebuilt_t;
/*****************************************************************//**
Imports a tablespace. The space id in the .ibd file must match the space id
of the table in the data dictionary.
-@return error code or DB_SUCCESS */
-UNIV_INTERN
+@return error code or DB_SUCCESS */
dberr_t
row_import_for_mysql(
/*=================*/
@@ -51,7 +49,6 @@ row_import_for_mysql(
/*****************************************************************//**
Update the DICT_TF2_DISCARDED flag in SYS_TABLES.
@return DB_SUCCESS or error code. */
-UNIV_INTERN
dberr_t
row_import_update_discarded_flag(
/*=============================*/
@@ -66,26 +63,13 @@ row_import_update_discarded_flag(
dict_sys_t:: mutex. */
MY_ATTRIBUTE((nonnull, warn_unused_result));
-/*****************************************************************//**
-Update the (space, root page) of a table's indexes from the values
-in the data dictionary.
+/** Update the root page numbers and tablespace ID of a table.
+@param[in,out] trx dictionary transaction
+@param[in,out] table persistent table
+@param[in] reset whether to reset the fields to FIL_NULL
@return DB_SUCCESS or error code */
-UNIV_INTERN
dberr_t
-row_import_update_index_root(
-/*=========================*/
- trx_t* trx, /*!< in/out: transaction that
- covers the update */
- const dict_table_t* table, /*!< in: Table for which we want
- to set the root page_no */
- bool reset, /*!< in: if true then set to
- FIL_NUL */
- bool dict_locked) /*!< in: Set to true if the
- caller already owns the
- dict_sys_t:: mutex. */
+row_import_update_index_root(trx_t* trx, dict_table_t* table, bool reset)
MY_ATTRIBUTE((nonnull, warn_unused_result));
-#ifndef UNIV_NONINL
-#include "row0import.ic"
-#endif
#endif /* row0import_h */
diff --git a/storage/innobase/include/row0ins.h b/storage/innobase/include/row0ins.h
index 54e7fa6d1fb..27fe442f6ff 100644
--- a/storage/innobase/include/row0ins.h
+++ b/storage/innobase/include/row0ins.h
@@ -1,6 +1,7 @@
/*****************************************************************************
Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2017, 2019, 2020 MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -26,12 +27,11 @@ Created 4/20/1996 Heikki Tuuri
#ifndef row0ins_h
#define row0ins_h
-#include "univ.i"
#include "data0data.h"
#include "que0types.h"
-#include "dict0types.h"
#include "trx0types.h"
#include "row0types.h"
+#include <vector>
/***************************************************************//**
Checks if foreign key constraint fails for an index entry. Sets shared locks
@@ -39,7 +39,6 @@ which lock either the success or the failure of the constraint. NOTE that
the caller must have a shared latch on dict_foreign_key_check_lock.
@return DB_SUCCESS, DB_LOCK_WAIT, DB_NO_REFERENCED_ROW, or
DB_ROW_IS_REFERENCED */
-UNIV_INTERN
dberr_t
row_ins_check_foreign_constraint(
/*=============================*/
@@ -56,8 +55,7 @@ row_ins_check_foreign_constraint(
MY_ATTRIBUTE((nonnull, warn_unused_result));
/*********************************************************************//**
Creates an insert node struct.
-@return own: insert node struct */
-UNIV_INTERN
+@return own: insert node struct */
ins_node_t*
ins_node_create(
/*============*/
@@ -68,7 +66,6 @@ ins_node_create(
Sets a new row to insert for an INS_DIRECT node. This function is only used
if we have constructed the row separately, which is a rare case; this
function is quite slow. */
-UNIV_INTERN
void
ins_node_set_new_row(
/*=================*/
@@ -85,7 +82,6 @@ the delete marked record.
@retval DB_LOCK_WAIT on lock wait when !(flags & BTR_NO_LOCKING_FLAG)
@retval DB_FAIL if retry with BTR_MODIFY_TREE is needed
@return error code */
-UNIV_INTERN
dberr_t
row_ins_clust_index_entry_low(
/*==========================*/
@@ -98,7 +94,8 @@ row_ins_clust_index_entry_low(
dtuple_t* entry, /*!< in/out: index entry to insert */
ulint n_ext, /*!< in: number of externally stored columns */
que_thr_t* thr) /*!< in: query thread or NULL */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
+ MY_ATTRIBUTE((warn_unused_result));
+
/***************************************************************//**
Tries to insert an entry into a secondary index. If a record with exactly the
same fields is found, the other record is necessarily marked deleted.
@@ -107,7 +104,6 @@ It is then unmarked. Otherwise, the entry is just inserted to the index.
@retval DB_LOCK_WAIT on lock wait when !(flags & BTR_NO_LOCKING_FLAG)
@retval DB_FAIL if retry with BTR_MODIFY_TREE is needed
@return error code */
-UNIV_INTERN
dberr_t
row_ins_sec_index_entry_low(
/*========================*/
@@ -123,40 +119,14 @@ row_ins_sec_index_entry_low(
trx_id_t trx_id, /*!< in: PAGE_MAX_TRX_ID during
row_log_table_apply(), or 0 */
que_thr_t* thr) /*!< in: query thread */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-/***************************************************************//**
-Tries to insert the externally stored fields (off-page columns)
-of a clustered index entry.
-@return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
-UNIV_INTERN
-dberr_t
-row_ins_index_entry_big_rec_func(
-/*=============================*/
- const dtuple_t* entry, /*!< in/out: index entry to insert */
- const big_rec_t* big_rec,/*!< in: externally stored fields */
- ulint* offsets,/*!< in/out: rec offsets */
- mem_heap_t** heap, /*!< in/out: memory heap */
- dict_index_t* index, /*!< in: index */
- const char* file, /*!< in: file name of caller */
-#ifndef DBUG_OFF
- const void* thd, /*!< in: connection, or NULL */
-#endif /* DBUG_OFF */
- ulint line) /*!< in: line number of caller */
- MY_ATTRIBUTE((nonnull(1,2,3,4,5,6), warn_unused_result));
-#ifdef DBUG_OFF
-# define row_ins_index_entry_big_rec(e,big,ofs,heap,index,thd,file,line) \
- row_ins_index_entry_big_rec_func(e,big,ofs,heap,index,file,line)
-#else /* DBUG_OFF */
-# define row_ins_index_entry_big_rec(e,big,ofs,heap,index,thd,file,line) \
- row_ins_index_entry_big_rec_func(e,big,ofs,heap,index,file,thd,line)
-#endif /* DBUG_OFF */
+ MY_ATTRIBUTE((warn_unused_result));
+
/***************************************************************//**
Inserts an entry into a clustered index. Tries first optimistic,
then pessimistic descent down the tree. If the entry matches enough
to a delete marked record, performs the insert by updating or delete
unmarking the delete marked record.
-@return DB_SUCCESS, DB_LOCK_WAIT, DB_DUPLICATE_KEY, or some other error code */
-UNIV_INTERN
+@return DB_SUCCESS, DB_LOCK_WAIT, DB_DUPLICATE_KEY, or some other error code */
dberr_t
row_ins_clust_index_entry(
/*======================*/
@@ -164,26 +134,24 @@ row_ins_clust_index_entry(
dtuple_t* entry, /*!< in/out: index entry to insert */
que_thr_t* thr, /*!< in: query thread */
ulint n_ext) /*!< in: number of externally stored columns */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
+ MY_ATTRIBUTE((warn_unused_result));
/***************************************************************//**
Inserts an entry into a secondary index. Tries first optimistic,
then pessimistic descent down the tree. If the entry matches enough
to a delete marked record, performs the insert by updating or delete
unmarking the delete marked record.
-@return DB_SUCCESS, DB_LOCK_WAIT, DB_DUPLICATE_KEY, or some other error code */
-UNIV_INTERN
+@return DB_SUCCESS, DB_LOCK_WAIT, DB_DUPLICATE_KEY, or some other error code */
dberr_t
row_ins_sec_index_entry(
/*====================*/
dict_index_t* index, /*!< in: secondary index */
dtuple_t* entry, /*!< in/out: index entry to insert */
que_thr_t* thr) /*!< in: query thread */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
+ MY_ATTRIBUTE((warn_unused_result));
/***********************************************************//**
Inserts a row to a table. This is a high-level function used in
SQL execution graphs.
-@return query thread to run next or NULL */
-UNIV_INTERN
+@return query thread to run next or NULL */
que_thr_t*
row_ins_step(
/*=========*/
@@ -192,7 +160,10 @@ row_ins_step(
/* Insert node structure */
struct ins_node_t{
- que_common_t common; /*!< node type: QUE_NODE_INSERT */
+ ins_node_t() : common(QUE_NODE_INSERT, NULL), entry(entry_list.end())
+ {
+ }
+ que_common_t common; /*!< node type: QUE_NODE_INSERT */
ulint ins_type;/* INS_VALUES, INS_SEARCHED, or INS_DIRECT */
dtuple_t* row; /*!< row to insert */
dict_table_t* table; /*!< table where to insert */
@@ -202,15 +173,17 @@ struct ins_node_t{
ulint state; /*!< node execution state */
dict_index_t* index; /*!< NULL, or the next index where the index
entry should be inserted */
- dtuple_t* entry; /*!< NULL, or entry to insert in the index;
+ std::vector<dtuple_t*>
+ entry_list;/* list of entries, one for each index */
+ std::vector<dtuple_t*>::iterator
+ entry; /*!< NULL, or entry to insert in the index;
after a successful insert of the entry,
this should be reset to NULL */
- UT_LIST_BASE_NODE_T(dtuple_t)
- entry_list;/* list of entries, one for each index */
- byte* row_id_buf;/* buffer for the row id sys field in row */
+ /** buffer for the system columns */
+ byte sys_buf[DATA_ROW_ID_LEN
+ + DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN];
trx_id_t trx_id; /*!< trx id or the last trx which executed the
node */
- byte* trx_id_buf;/* buffer for the trx id sys field in row */
mem_heap_t* entry_sys_heap;
/* memory heap used as auxiliary storage;
entry_list and sys fields are stored here;
@@ -233,8 +206,4 @@ struct ins_node_t{
#define INS_NODE_INSERT_ENTRIES 3 /* index entries should be built and
inserted */
-#ifndef UNIV_NONINL
-#include "row0ins.ic"
-#endif
-
#endif
diff --git a/storage/innobase/include/row0ins.ic b/storage/innobase/include/row0ins.ic
deleted file mode 100644
index f1f0a449726..00000000000
--- a/storage/innobase/include/row0ins.ic
+++ /dev/null
@@ -1,26 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1996, 2009, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/row0ins.ic
-Insert into a table
-
-Created 4/20/1996 Heikki Tuuri
-*******************************************************/
-
-
diff --git a/storage/innobase/include/row0log.h b/storage/innobase/include/row0log.h
index 6d91e25e6bd..6f8860a7f8c 100644
--- a/storage/innobase/include/row0log.h
+++ b/storage/innobase/include/row0log.h
@@ -1,6 +1,7 @@
/*****************************************************************************
Copyright (c) 2011, 2016, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2017, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -26,14 +27,14 @@ Created 2011-05-26 Marko Makela
#ifndef row0log_h
#define row0log_h
-#include "univ.i"
+#include "que0types.h"
#include "mtr0types.h"
#include "row0types.h"
#include "rem0types.h"
#include "data0types.h"
-#include "dict0types.h"
#include "trx0types.h"
-#include "que0types.h"
+
+class ut_stage_alter_t;
extern ulint onlineddl_rowlog_rows;
extern ulint onlineddl_rowlog_pct_used;
@@ -43,7 +44,6 @@ extern ulint onlineddl_pct_progress;
Allocate the row log for an index and flag the index
for online creation.
@retval true if success, false if not */
-UNIV_INTERN
bool
row_log_allocate(
/*=============*/
@@ -62,7 +62,6 @@ row_log_allocate(
/******************************************************//**
Free the row log for an index that was being created online. */
-UNIV_INTERN
void
row_log_free(
/*=========*/
@@ -81,8 +80,8 @@ row_log_abort_sec(
/******************************************************//**
Try to log an operation to a secondary index that is
(or was) being created.
-@retval true if the operation was logged or can be ignored
-@retval false if online index creation is not taking place */
+@retval true if the operation was logged or can be ignored
+@retval false if online index creation is not taking place */
UNIV_INLINE
bool
row_log_online_op_try(
@@ -94,7 +93,6 @@ row_log_online_op_try(
MY_ATTRIBUTE((nonnull, warn_unused_result));
/******************************************************//**
Logs an operation to a secondary index that is (or was) being created. */
-UNIV_INTERN
void
row_log_online_op(
/*==============*/
@@ -102,12 +100,11 @@ row_log_online_op(
const dtuple_t* tuple, /*!< in: index tuple */
trx_id_t trx_id) /*!< in: transaction ID for insert,
or 0 for delete */
- UNIV_COLD MY_ATTRIBUTE((nonnull));
+ ATTRIBUTE_COLD __attribute__((nonnull));
/******************************************************//**
Gets the error status of the online index rebuild log.
@return DB_SUCCESS or error code */
-UNIV_INTERN
dberr_t
row_log_table_get_error(
/*====================*/
@@ -115,10 +112,19 @@ row_log_table_get_error(
that is being rebuilt online */
MY_ATTRIBUTE((nonnull, warn_unused_result));
+/** Check whether a virtual column is indexed in the new table being
+created during alter table
+@param[in] index cluster index
+@param[in] v_no virtual column number
+@return true if it is indexed, else false */
+bool
+row_log_col_is_indexed(
+ const dict_index_t* index,
+ ulint v_no);
+
/******************************************************//**
Logs a delete operation to a table that is being rebuilt.
This will be merged in row_log_table_apply_delete(). */
-UNIV_INTERN
void
row_log_table_delete(
/*=================*/
@@ -126,15 +132,14 @@ row_log_table_delete(
page X-latched */
dict_index_t* index, /*!< in/out: clustered index, S-latched
or X-latched */
- const ulint* offsets,/*!< in: rec_get_offsets(rec,index) */
+ const offset_t* offsets,/*!< in: rec_get_offsets(rec,index) */
const byte* sys) /*!< in: DB_TRX_ID,DB_ROLL_PTR that should
be logged, or NULL to use those in rec */
- UNIV_COLD MY_ATTRIBUTE((nonnull(1,2,3)));
+ ATTRIBUTE_COLD __attribute__((nonnull(1,2,3)));
/******************************************************//**
Logs an update operation to a table that is being rebuilt.
This will be merged in row_log_table_apply_update(). */
-UNIV_INTERN
void
row_log_table_update(
/*=================*/
@@ -142,17 +147,15 @@ row_log_table_update(
page X-latched */
dict_index_t* index, /*!< in/out: clustered index, S-latched
or X-latched */
- const ulint* offsets,/*!< in: rec_get_offsets(rec,index) */
- const dtuple_t* old_pk) /*!< in: row_log_table_get_pk()
+ const offset_t* offsets,/*!< in: rec_get_offsets(rec,index) */
+ const dtuple_t* old_pk);/*!< in: row_log_table_get_pk()
before the update */
- UNIV_COLD MY_ATTRIBUTE((nonnull(1,2,3)));
/******************************************************//**
Constructs the old PRIMARY KEY and DB_TRX_ID,DB_ROLL_PTR
of a table that is being rebuilt.
@return tuple of PRIMARY KEY,DB_TRX_ID,DB_ROLL_PTR in the rebuilt table,
or NULL if the PRIMARY KEY definition does not change */
-UNIV_INTERN
const dtuple_t*
row_log_table_get_pk(
/*=================*/
@@ -160,17 +163,16 @@ row_log_table_get_pk(
page X-latched */
dict_index_t* index, /*!< in/out: clustered index, S-latched
or X-latched */
- const ulint* offsets,/*!< in: rec_get_offsets(rec,index),
+ const offset_t* offsets,/*!< in: rec_get_offsets(rec,index),
or NULL */
byte* sys, /*!< out: DB_TRX_ID,DB_ROLL_PTR for
row_log_table_delete(), or NULL */
mem_heap_t** heap) /*!< in/out: memory heap where allocated */
- UNIV_COLD MY_ATTRIBUTE((nonnull(1,2,5), warn_unused_result));
+ ATTRIBUTE_COLD __attribute__((nonnull(1,2,5), warn_unused_result));
/******************************************************//**
Logs an insert to a table that is being rebuilt.
This will be merged in row_log_table_apply_insert(). */
-UNIV_INTERN
void
row_log_table_insert(
/*=================*/
@@ -178,67 +180,78 @@ row_log_table_insert(
page X-latched */
dict_index_t* index, /*!< in/out: clustered index, S-latched
or X-latched */
- const ulint* offsets)/*!< in: rec_get_offsets(rec,index) */
- UNIV_COLD MY_ATTRIBUTE((nonnull));
+ const offset_t* offsets);/*!< in: rec_get_offsets(rec,index) */
/******************************************************//**
Notes that a BLOB is being freed during online ALTER TABLE. */
-UNIV_INTERN
void
row_log_table_blob_free(
/*====================*/
dict_index_t* index, /*!< in/out: clustered index, X-latched */
ulint page_no)/*!< in: starting page number of the BLOB */
- UNIV_COLD MY_ATTRIBUTE((nonnull));
+ ATTRIBUTE_COLD __attribute__((nonnull));
/******************************************************//**
Notes that a BLOB is being allocated during online ALTER TABLE. */
-UNIV_INTERN
void
row_log_table_blob_alloc(
/*=====================*/
dict_index_t* index, /*!< in/out: clustered index, X-latched */
ulint page_no)/*!< in: starting page number of the BLOB */
- UNIV_COLD MY_ATTRIBUTE((nonnull));
-/******************************************************//**
-Apply the row_log_table log to a table upon completing rebuild.
+ ATTRIBUTE_COLD __attribute__((nonnull));
+
+/** Apply the row_log_table log to a table upon completing rebuild.
+@param[in] thr query graph
+@param[in] old_table old table
+@param[in,out] table MySQL table (for reporting duplicates)
+@param[in,out] stage performance schema accounting object, used by
+ALTER TABLE. stage->begin_phase_log_table() will be called initially and then
+stage->inc() will be called for each block of log that is applied.
@return DB_SUCCESS, or error code on failure */
-UNIV_INTERN
dberr_t
row_log_table_apply(
-/*================*/
- que_thr_t* thr, /*!< in: query graph */
- dict_table_t* old_table,
- /*!< in: old table */
- struct TABLE* table) /*!< in/out: MySQL table
- (for reporting duplicates) */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
+ que_thr_t* thr,
+ dict_table_t* old_table,
+ struct TABLE* table,
+ ut_stage_alter_t* stage)
+ MY_ATTRIBUTE((warn_unused_result));
/******************************************************//**
Get the latest transaction ID that has invoked row_log_online_op()
during online creation.
@return latest transaction ID, or 0 if nothing was logged */
-UNIV_INTERN
trx_id_t
row_log_get_max_trx(
/*================*/
dict_index_t* index) /*!< in: index, must be locked */
MY_ATTRIBUTE((nonnull, warn_unused_result));
-/******************************************************//**
-Merge the row log to the index upon completing index creation.
+/** Apply the row log to the index upon completing index creation.
+@param[in] trx transaction (for checking if the operation was
+interrupted)
+@param[in,out] index secondary index
+@param[in,out] table MySQL table (for reporting duplicates)
+@param[in,out] stage performance schema accounting object, used by
+ALTER TABLE. stage->begin_phase_log_index() will be called initially and then
+stage->inc() will be called for each block of log that is applied.
@return DB_SUCCESS, or error code on failure */
-UNIV_INTERN
dberr_t
row_log_apply(
-/*==========*/
- trx_t* trx, /*!< in: transaction (for checking if
- the operation was interrupted) */
- dict_index_t* index, /*!< in/out: secondary index */
- struct TABLE* table) /*!< in/out: MySQL table
- (for reporting duplicates) */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
+ const trx_t* trx,
+ dict_index_t* index,
+ struct TABLE* table,
+ ut_stage_alter_t* stage)
+ MY_ATTRIBUTE((warn_unused_result));
+
+#ifdef HAVE_PSI_STAGE_INTERFACE
+/** Estimate how much work is to be done by the log apply phase
+of an ALTER TABLE for this index.
+@param[in] index index whose log to assess
+@return work to be done by log-apply in abstract units
+*/
+ulint
+row_log_estimate_work(
+ const dict_index_t* index);
+#endif /* HAVE_PSI_STAGE_INTERFACE */
-#ifndef UNIV_NONINL
#include "row0log.ic"
-#endif
#endif /* row0log.h */
diff --git a/storage/innobase/include/row0log.ic b/storage/innobase/include/row0log.ic
index 53f99da15fd..ba7eb7b025c 100644
--- a/storage/innobase/include/row0log.ic
+++ b/storage/innobase/include/row0log.ic
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 2011, 2012, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2011, 2015, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -33,9 +33,7 @@ row_log_abort_sec(
/*===============*/
dict_index_t* index) /*!< in/out: index (x-latched) */
{
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(rw_lock_own(dict_index_get_lock(index), RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
+ ut_ad(rw_lock_own(dict_index_get_lock(index), RW_LOCK_X));
ut_ad(!dict_index_is_clust(index));
dict_index_set_online_status(index, ONLINE_INDEX_ABORTED);
@@ -45,8 +43,8 @@ row_log_abort_sec(
/******************************************************//**
Try to log an operation to a secondary index that is
(or was) being created.
-@retval true if the operation was logged or can be ignored
-@retval false if online index creation is not taking place */
+@retval true if the operation was logged or can be ignored
+@retval false if online index creation is not taking place */
UNIV_INLINE
bool
row_log_online_op_try(
@@ -56,10 +54,10 @@ row_log_online_op_try(
trx_id_t trx_id) /*!< in: transaction ID for insert,
or 0 for delete */
{
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(rw_lock_own(dict_index_get_lock(index), RW_LOCK_SHARED)
- || rw_lock_own(dict_index_get_lock(index), RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
+
+ ut_ad(rw_lock_own_flagged(
+ dict_index_get_lock(index),
+ RW_LOCK_FLAG_S | RW_LOCK_FLAG_X | RW_LOCK_FLAG_SX));
switch (dict_index_get_online_status(index)) {
case ONLINE_INDEX_COMPLETE:
diff --git a/storage/innobase/include/row0merge.h b/storage/innobase/include/row0merge.h
index a3738f8fb02..8e7ca5de046 100644
--- a/storage/innobase/include/row0merge.h
+++ b/storage/innobase/include/row0merge.h
@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 2005, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2015, 2017, MariaDB Corporation.
+Copyright (c) 2015, 2020, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -27,19 +27,19 @@ Created 13/06/2005 Jan Lindstrom
#ifndef row0merge_h
#define row0merge_h
-#include "univ.i"
-#include "data0data.h"
-#include "dict0types.h"
-#include "trx0types.h"
#include "que0types.h"
+#include "trx0types.h"
#include "mtr0mtr.h"
#include "rem0types.h"
#include "rem0rec.h"
-#include "read0types.h"
#include "btr0types.h"
#include "row0mysql.h"
#include "lock0types.h"
#include "srv0srv.h"
+#include "ut0stage.h"
+
+/* Reserve free space from every block for key_version */
+#define ROW_MERGE_RESERVE_SIZE 4
/* Cluster index read task is mandatory */
#define COST_READ_CLUSTERED_INDEX 1.0
@@ -108,17 +108,21 @@ struct index_field_t {
ulint col_no; /*!< column offset */
ulint prefix_len; /*!< column prefix length, or 0
if indexing the whole column */
+ bool is_v_col; /*!< whether this is a virtual column */
};
/** Definition of an index being created */
struct index_def_t {
const char* name; /*!< index name */
+ bool rebuild; /*!< whether the table is rebuilt */
ulint ind_type; /*!< 0, DICT_UNIQUE,
or DICT_CLUSTERED */
ulint key_number; /*!< MySQL key number,
or ULINT_UNDEFINED if none */
ulint n_fields; /*!< number of fields in index */
index_field_t* fields; /*!< field definitions */
+ st_mysql_ftparser*
+ parser; /*!< fulltext parser plugin */
};
/** Structure for reporting duplicate records. */
@@ -134,40 +138,39 @@ struct row_merge_dup_t {
/*************************************************************//**
Report a duplicate key. */
-UNIV_INTERN
void
row_merge_dup_report(
/*=================*/
row_merge_dup_t* dup, /*!< in/out: for reporting duplicates */
const dfield_t* entry) /*!< in: duplicate index entry */
MY_ATTRIBUTE((nonnull));
+
/*********************************************************************//**
Sets an exclusive lock on a table, for the duration of creating indexes.
-@return error code or DB_SUCCESS */
-UNIV_INTERN
+@return error code or DB_SUCCESS */
dberr_t
row_merge_lock_table(
/*=================*/
trx_t* trx, /*!< in/out: transaction */
dict_table_t* table, /*!< in: table to lock */
enum lock_mode mode) /*!< in: LOCK_X or LOCK_S */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
+ MY_ATTRIBUTE((nonnull(1,2), warn_unused_result));
+
/*********************************************************************//**
Drop indexes that were created before an error occurred.
The data dictionary must have been locked exclusively by the caller,
because the transaction will not be committed. */
-UNIV_INTERN
void
row_merge_drop_indexes_dict(
/*========================*/
trx_t* trx, /*!< in/out: dictionary transaction */
table_id_t table_id)/*!< in: table identifier */
MY_ATTRIBUTE((nonnull));
+
/*********************************************************************//**
Drop those indexes which were created before an error occurred.
The data dictionary must have been locked exclusively by the caller,
because the transaction will not be committed. */
-UNIV_INTERN
void
row_merge_drop_indexes(
/*===================*/
@@ -176,18 +179,17 @@ row_merge_drop_indexes(
ibool locked) /*!< in: TRUE=table locked,
FALSE=may need to do a lazy drop */
MY_ATTRIBUTE((nonnull));
+
/*********************************************************************//**
Drop all partially created indexes during crash recovery. */
-UNIV_INTERN
void
row_merge_drop_temp_indexes(void);
/*=============================*/
/** Create temporary merge files in the given paramater path, and if
UNIV_PFS_IO defined, register the file descriptor with Performance Schema.
-@param[in] path location for creating temporary merge files.
+@param[in] path location for creating temporary merge files, or NULL
@return File descriptor */
-UNIV_INTERN
int
row_merge_file_create_low(
const char* path)
@@ -195,29 +197,16 @@ row_merge_file_create_low(
/*********************************************************************//**
Destroy a merge file. And de-register the file from Performance Schema
if UNIV_PFS_IO is defined. */
-UNIV_INTERN
void
row_merge_file_destroy_low(
/*=======================*/
int fd); /*!< in: merge file descriptor */
/*********************************************************************//**
-Provide a new pathname for a table that is being renamed if it belongs to
-a file-per-table tablespace. The caller is responsible for freeing the
-memory allocated for the return value.
-@return new pathname of tablespace file, or NULL if space = 0 */
-UNIV_INTERN
-char*
-row_make_new_pathname(
-/*==================*/
- dict_table_t* table, /*!< in: table to be renamed */
- const char* new_name); /*!< in: new name */
-/*********************************************************************//**
Rename the tables in the data dictionary. The data dictionary must
have been locked exclusively by the caller, because the transaction
will not be committed.
-@return error code or DB_SUCCESS */
-UNIV_INTERN
+@return error code or DB_SUCCESS */
dberr_t
row_merge_rename_tables_dict(
/*=========================*/
@@ -233,96 +222,122 @@ row_merge_rename_tables_dict(
Rename an index in the dictionary that was created. The data
dictionary must have been locked exclusively by the caller, because
the transaction will not be committed.
-@return DB_SUCCESS if all OK */
-UNIV_INTERN
+@return DB_SUCCESS if all OK */
dberr_t
row_merge_rename_index_to_add(
/*==========================*/
trx_t* trx, /*!< in/out: transaction */
table_id_t table_id, /*!< in: table identifier */
index_id_t index_id) /*!< in: index identifier */
- MY_ATTRIBUTE((nonnull));
+ MY_ATTRIBUTE((nonnull(1), warn_unused_result));
+
/*********************************************************************//**
Rename an index in the dictionary that is to be dropped. The data
dictionary must have been locked exclusively by the caller, because
the transaction will not be committed.
-@return DB_SUCCESS if all OK */
-UNIV_INTERN
+@return DB_SUCCESS if all OK */
dberr_t
row_merge_rename_index_to_drop(
/*===========================*/
trx_t* trx, /*!< in/out: transaction */
table_id_t table_id, /*!< in: table identifier */
index_id_t index_id) /*!< in: index identifier */
- MY_ATTRIBUTE((nonnull));
-/*********************************************************************//**
-Create the index and load in to the dictionary.
-@return index, or NULL on error */
-UNIV_INTERN
+ MY_ATTRIBUTE((nonnull(1), warn_unused_result));
+
+/** Create the index and load in to the dictionary.
+@param[in,out] trx trx (sets error_state)
+@param[in,out] table the index is on this table
+@param[in] index_def the index definition
+@param[in] add_v new virtual columns added along with add
+ index call
+@return index, or NULL on error */
dict_index_t*
row_merge_create_index(
-/*===================*/
- trx_t* trx, /*!< in/out: trx (sets error_state) */
- dict_table_t* table, /*!< in: the index is on this table */
- const index_def_t* index_def); /*!< in: the index definition */
+ trx_t* trx,
+ dict_table_t* table,
+ const index_def_t* index_def,
+ const dict_add_v_col_t* add_v)
+ MY_ATTRIBUTE((warn_unused_result));
+
/*********************************************************************//**
Check if a transaction can use an index.
-@return TRUE if index can be used by the transaction else FALSE */
-UNIV_INTERN
-ibool
+@return whether the index can be used by the transaction */
+bool
row_merge_is_index_usable(
/*======================*/
const trx_t* trx, /*!< in: transaction */
- const dict_index_t* index); /*!< in: index to check */
+ const dict_index_t* index) /*!< in: index to check */
+ MY_ATTRIBUTE((nonnull, warn_unused_result));
+
/*********************************************************************//**
Drop a table. The caller must have ensured that the background stats
thread is not processing the table. This can be done by calling
dict_stats_wait_bg_to_stop_using_table() after locking the dictionary and
before calling this function.
-@return DB_SUCCESS or error code */
-UNIV_INTERN
+@return DB_SUCCESS or error code */
dberr_t
row_merge_drop_table(
/*=================*/
trx_t* trx, /*!< in: transaction */
dict_table_t* table) /*!< in: table instance to drop */
- MY_ATTRIBUTE((nonnull));
-/*********************************************************************//**
-Build indexes on a table by reading a clustered index,
-creating a temporary file containing index entries, merge sorting
-these index entries and inserting sorted index entries to indexes.
-@return DB_SUCCESS or error code */
-UNIV_INTERN
+ MY_ATTRIBUTE((nonnull, warn_unused_result));
+
+/** Write an MLOG_INDEX_LOAD record to indicate in the redo-log
+that redo-logging of individual index pages was disabled, and
+the flushing of such pages to the data files was completed.
+@param[in] index an index tree on which redo logging was disabled */
+void row_merge_write_redo(const dict_index_t* index);
+
+/** Build indexes on a table by reading a clustered index, creating a temporary
+file containing index entries, merge sorting these index entries and inserting
+sorted index entries to indexes.
+@param[in] trx transaction
+@param[in] old_table table where rows are read from
+@param[in] new_table table where indexes are created; identical to
+old_table unless creating a PRIMARY KEY
+@param[in] online true if creating indexes online
+@param[in] indexes indexes to be created
+@param[in] key_numbers MySQL key numbers
+@param[in] n_indexes size of indexes[]
+@param[in,out] table MySQL table, for reporting erroneous key value
+if applicable
+@param[in] add_cols default values of added columns, or NULL
+@param[in] col_map mapping of old column numbers to new ones, or
+NULL if old_table == new_table
+@param[in] add_autoinc number of added AUTO_INCREMENT columns, or
+ULINT_UNDEFINED if none is added
+@param[in,out] sequence autoinc sequence
+@param[in] skip_pk_sort whether the new PRIMARY KEY will follow
+existing order
+@param[in,out] stage performance schema accounting object, used by
+ALTER TABLE. stage->begin_phase_read_pk() will be called at the beginning of
+this function and it will be passed to other functions for further accounting.
+@param[in] add_v new virtual columns added along with indexes
+@param[in] eval_table mysql table used to evaluate virtual column
+ value, see innobase_get_computed_value().
+@return DB_SUCCESS or error code */
dberr_t
row_merge_build_indexes(
-/*====================*/
- trx_t* trx, /*!< in: transaction */
- dict_table_t* old_table, /*!< in: table where rows are
- read from */
- dict_table_t* new_table, /*!< in: table where indexes are
- created; identical to old_table
- unless creating a PRIMARY KEY */
- bool online, /*!< in: true if creating indexes
- online */
- dict_index_t** indexes, /*!< in: indexes to be created */
- const ulint* key_numbers, /*!< in: MySQL key numbers */
- ulint n_indexes, /*!< in: size of indexes[] */
- struct TABLE* table, /*!< in/out: MySQL table, for
- reporting erroneous key value
- if applicable */
- const dtuple_t* add_cols, /*!< in: default values of
- added columns, or NULL */
- const ulint* col_map, /*!< in: mapping of old column
- numbers to new ones, or NULL
- if old_table == new_table */
- ulint add_autoinc, /*!< in: number of added
- AUTO_INCREMENT column, or
- ULINT_UNDEFINED if none is added */
- ib_sequence_t& sequence) /*!< in/out: autoinc sequence */
- MY_ATTRIBUTE((nonnull(1,2,3,5,6,8), warn_unused_result));
+ trx_t* trx,
+ dict_table_t* old_table,
+ dict_table_t* new_table,
+ bool online,
+ dict_index_t** indexes,
+ const ulint* key_numbers,
+ ulint n_indexes,
+ struct TABLE* table,
+ const dtuple_t* add_cols,
+ const ulint* col_map,
+ ulint add_autoinc,
+ ib_sequence_t& sequence,
+ bool skip_pk_sort,
+ ut_stage_alter_t* stage,
+ const dict_add_v_col_t* add_v,
+ struct TABLE* eval_table)
+ MY_ATTRIBUTE((warn_unused_result));
+
/********************************************************************//**
Write a buffer to a block. */
-UNIV_INTERN
void
row_merge_buf_write(
/*================*/
@@ -330,9 +345,9 @@ row_merge_buf_write(
const merge_file_t* of, /*!< in: output file */
row_merge_block_t* block) /*!< out: buffer for writing to file */
MY_ATTRIBUTE((nonnull));
+
/********************************************************************//**
Sort a buffer. */
-UNIV_INTERN
void
row_merge_buf_sort(
/*===============*/
@@ -340,9 +355,12 @@ row_merge_buf_sort(
row_merge_dup_t* dup) /*!< in/out: reporter of duplicates
(NULL if non-unique index) */
MY_ATTRIBUTE((nonnull(1)));
+
/********************************************************************//**
Write a merge block to the file system.
-@return TRUE if request was successful, FALSE if fail */
+@return whether the request was completed successfully
+@retval false on error
+@retval true on success */
UNIV_INTERN
bool
row_merge_write(
@@ -354,10 +372,10 @@ row_merge_write(
void* crypt_buf, /*!< in: crypt buf or NULL */
ulint space) /*!< in: space id */
MY_ATTRIBUTE((warn_unused_result));
+
/********************************************************************//**
Empty a sort buffer.
@return sort buffer */
-UNIV_INTERN
row_merge_buf_t*
row_merge_buf_empty(
/*================*/
@@ -366,64 +384,72 @@ row_merge_buf_empty(
/** Create a merge file in the given location.
@param[out] merge_file merge file structure
-@param[in] path location for creating temporary file
+@param[in] path location for creating temporary file, or NULL
@return file descriptor, or -1 on failure */
-UNIV_INTERN
int
row_merge_file_create(
merge_file_t* merge_file,
- const char* path);
-
-/*********************************************************************//**
-Merge disk files.
+ const char* path)
+ MY_ATTRIBUTE((warn_unused_result, nonnull(1)));
+
+/** Merge disk files.
+@param[in] trx transaction
+@param[in] dup descriptor of index being created
+@param[in,out] file file containing index entries
+@param[in,out] block 3 buffers
+@param[in,out] tmpfd temporary file handle
+@param[in] update_progress true, if we should update progress status
+@param[in] pct_progress total progress percent until now
+@param[in] pct_ocst current progress percent
+@param[in] crypt_block crypt buf or NULL
+@param[in] space space_id
+@param[in,out] stage performance schema accounting object, used by
+ALTER TABLE. If not NULL, stage->begin_phase_sort() will be called initially
+and then stage->inc() will be called for each record processed.
@return DB_SUCCESS or error code */
-UNIV_INTERN
dberr_t
row_merge_sort(
/*===========*/
- trx_t* trx, /*!< in: transaction */
- const row_merge_dup_t* dup, /*!< in: descriptor of
- index being created */
- merge_file_t* file, /*!< in/out: file containing
- index entries */
- row_merge_block_t* block, /*!< in/out: 3 buffers */
- int* tmpfd, /*!< in/out: temporary file handle */
- const bool update_progress, /*!< in: update progress status variable or not */
- const float pct_progress, /*!< in: total progress percent until now */
- const float pct_cost, /*!< in: current progress percent */
- row_merge_block_t* crypt_block, /*!< in: crypt buf or NULL */
- ulint space) /*!< in: space id */
+ trx_t* trx,
+ const row_merge_dup_t* dup,
+ merge_file_t* file,
+ row_merge_block_t* block,
+ int* tmpfd,
+ const bool update_progress,
+ const double pct_progress,
+ const double pct_cost,
+ row_merge_block_t* crypt_block,
+ ulint space,
+ ut_stage_alter_t* stage = NULL)
MY_ATTRIBUTE((warn_unused_result));
/*********************************************************************//**
Allocate a sort buffer.
@return own: sort buffer */
-UNIV_INTERN
row_merge_buf_t*
row_merge_buf_create(
/*=================*/
dict_index_t* index) /*!< in: secondary index */
MY_ATTRIBUTE((warn_unused_result, nonnull, malloc));
+
/*********************************************************************//**
Deallocate a sort buffer. */
-UNIV_INTERN
void
row_merge_buf_free(
/*===============*/
row_merge_buf_t* buf) /*!< in,own: sort buffer to be freed */
MY_ATTRIBUTE((nonnull));
+
/*********************************************************************//**
Destroy a merge file. */
-UNIV_INTERN
void
row_merge_file_destroy(
/*===================*/
merge_file_t* merge_file) /*!< in/out: merge file structure */
MY_ATTRIBUTE((nonnull));
-/********************************************************************//**
-Read a merge block from the file system.
-@return TRUE if request was successful, FALSE if fail */
-UNIV_INTERN
+
+/** Read a merge block from the file system.
+@return whether the request was completed successfully */
bool
row_merge_read(
/*===========*/
@@ -439,7 +465,6 @@ row_merge_read(
/********************************************************************//**
Read a merge record.
@return pointer to next record, or NULL on I/O error or end of list */
-UNIV_INTERN
const byte*
row_merge_read_rec(
/*===============*/
@@ -452,9 +477,8 @@ row_merge_read_rec(
const mrec_t** mrec, /*!< out: pointer to merge record,
or NULL on end of list
(non-NULL on I/O error) */
- ulint* offsets,/*!< out: offsets of mrec */
+ offset_t* offsets,/*!< out: offsets of mrec */
row_merge_block_t* crypt_block, /*!< in: crypt buf or NULL */
ulint space) /*!< in: space id */
MY_ATTRIBUTE((warn_unused_result));
-
#endif /* row0merge.h */
diff --git a/storage/innobase/include/row0mysql.h b/storage/innobase/include/row0mysql.h
index 2069caf7f3f..60f53221c9a 100644
--- a/storage/innobase/include/row0mysql.h
+++ b/storage/innobase/include/row0mysql.h
@@ -1,7 +1,7 @@
/*****************************************************************************
-Copyright (c) 2000, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2015, 2019, MariaDB Corporation.
+Copyright (c) 2000, 2017, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2017, 2019, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -28,18 +28,17 @@ Created 9/17/2000 Heikki Tuuri
#ifndef row0mysql_h
#define row0mysql_h
-#include "univ.i"
-#include "data0data.h"
#include "que0types.h"
-#include "dict0types.h"
#include "trx0types.h"
#include "row0types.h"
-#include "btr0pcur.h"
-#include "trx0types.h"
-#include "fil0crypt.h"
+#include "btr0types.h"
+#include "lock0types.h"
+#include "fil0fil.h"
+#include "fts0fts.h"
+#include "gis0type.h"
-// Forward declaration
-struct SysIndexCallback;
+#include "sql_list.h"
+#include "sql_cmd.h"
extern ibool row_rollback_on_timeout;
@@ -47,7 +46,6 @@ struct row_prebuilt_t;
/*******************************************************************//**
Frees the blob heap in prebuilt when no longer needed. */
-UNIV_INTERN
void
row_mysql_prebuilt_free_blob_heap(
/*==============================*/
@@ -58,7 +56,6 @@ Stores a >= 5.0.3 format true VARCHAR length to dest, in the MySQL row
format.
@return pointer to the data, we skip the 1 or 2 bytes at the start
that are used to store the len */
-UNIV_INTERN
byte*
row_mysql_store_true_var_len(
/*=========================*/
@@ -70,7 +67,6 @@ Reads a >= 5.0.3 format true VARCHAR length, in the MySQL row format, and
returns a pointer to the data.
@return pointer to the data, we skip the 1 or 2 bytes at the start
that are used to store the len */
-UNIV_INTERN
const byte*
row_mysql_read_true_varchar(
/*========================*/
@@ -80,7 +76,6 @@ row_mysql_read_true_varchar(
or 2 bytes */
/*******************************************************************//**
Stores a reference to a BLOB in the MySQL format. */
-UNIV_INTERN
void
row_mysql_store_blob_ref(
/*=====================*/
@@ -97,8 +92,7 @@ row_mysql_store_blob_ref(
header! */
/*******************************************************************//**
Reads a reference to a BLOB in the MySQL format.
-@return pointer to BLOB data */
-UNIV_INTERN
+@return pointer to BLOB data */
const byte*
row_mysql_read_blob_ref(
/*====================*/
@@ -107,9 +101,24 @@ row_mysql_read_blob_ref(
MySQL format */
ulint col_len); /*!< in: BLOB reference length
(not BLOB length) */
+/*******************************************************************//**
+Converts InnoDB geometry data format to MySQL data format. */
+void
+row_mysql_store_geometry(
+/*=====================*/
+ byte* dest, /*!< in/out: where to store */
+ ulint dest_len, /*!< in: dest buffer size: determines into
+ how many bytes the geometry length is stored,
+ the space for the length may vary from 1
+ to 4 bytes */
+ const byte* src, /*!< in: geometry data; if the value to store
+ is SQL NULL this should be NULL pointer */
+ ulint src_len); /*!< in: geometry length; if the value to store
+ is SQL NULL this should be 0; remember
+ also to set the NULL bit in the MySQL record
+ header! */
/**************************************************************//**
Pad a column with spaces. */
-UNIV_INTERN
void
row_mysql_pad_col(
/*==============*/
@@ -122,8 +131,7 @@ row_mysql_pad_col(
Stores a non-SQL-NULL field given in the MySQL format in the InnoDB format.
The counterpart of this function is row_sel_field_store_in_mysql_format() in
row0sel.cc.
-@return up to which byte we used buf in the conversion */
-UNIV_INTERN
+@return up to which byte we used buf in the conversion */
byte*
row_mysql_store_col_in_innobase_format(
/*===================================*/
@@ -159,7 +167,6 @@ row_mysql_store_col_in_innobase_format(
Handles user errors and lock waits detected by the database engine.
@return true if it was a lock wait and we should continue running the
query thread */
-UNIV_INTERN
bool
row_mysql_handle_errors(
/*====================*/
@@ -172,8 +179,7 @@ row_mysql_handle_errors(
MY_ATTRIBUTE((nonnull(1,2)));
/********************************************************************//**
Create a prebuilt struct for a MySQL table handle.
-@return own: a prebuilt struct */
-UNIV_INTERN
+@return own: a prebuilt struct */
row_prebuilt_t*
row_create_prebuilt(
/*================*/
@@ -182,7 +188,6 @@ row_create_prebuilt(
the MySQL format */
/********************************************************************//**
Free a prebuilt struct for a MySQL table handle. */
-UNIV_INTERN
void
row_prebuilt_free(
/*==============*/
@@ -191,57 +196,45 @@ row_prebuilt_free(
/*********************************************************************//**
Updates the transaction pointers in query graphs stored in the prebuilt
struct. */
-UNIV_INTERN
void
row_update_prebuilt_trx(
/*====================*/
row_prebuilt_t* prebuilt, /*!< in/out: prebuilt struct
in MySQL handle */
trx_t* trx); /*!< in: transaction handle */
+
/*********************************************************************//**
Sets an AUTO_INC type lock on the table mentioned in prebuilt. The
AUTO_INC lock gives exclusive access to the auto-inc counter of the
table. The lock is reserved only for the duration of an SQL statement.
It is not compatible with another AUTO_INC or exclusive lock on the
table.
-@return error code or DB_SUCCESS */
-UNIV_INTERN
+@return error code or DB_SUCCESS */
dberr_t
row_lock_table_autoinc_for_mysql(
/*=============================*/
row_prebuilt_t* prebuilt) /*!< in: prebuilt struct in the MySQL
table handle */
MY_ATTRIBUTE((nonnull, warn_unused_result));
-/*********************************************************************//**
-Sets a table lock on the table mentioned in prebuilt.
-@return error code or DB_SUCCESS */
-UNIV_INTERN
+
+/** Lock a table.
+@param[in,out] prebuilt table handle
+@return error code or DB_SUCCESS */
dberr_t
-row_lock_table_for_mysql(
-/*=====================*/
- row_prebuilt_t* prebuilt, /*!< in: prebuilt struct in the MySQL
- table handle */
- dict_table_t* table, /*!< in: table to lock, or NULL
- if prebuilt->table should be
- locked as
- prebuilt->select_lock_type */
- ulint mode) /*!< in: lock mode of table
- (ignored if table==NULL) */
- MY_ATTRIBUTE((nonnull(1)));
-/*********************************************************************//**
-Does an insert for MySQL.
-@return error code or DB_SUCCESS */
-UNIV_INTERN
+row_lock_table(row_prebuilt_t* prebuilt);
+
+/** Does an insert for MySQL.
+@param[in] mysql_rec row in the MySQL format
+@param[in,out] prebuilt prebuilt struct in MySQL handle
+@return error code or DB_SUCCESS*/
dberr_t
row_insert_for_mysql(
-/*=================*/
- byte* mysql_rec, /*!< in: row in the MySQL format */
- row_prebuilt_t* prebuilt) /*!< in: prebuilt struct in MySQL
- handle */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
+ const byte* mysql_rec,
+ row_prebuilt_t* prebuilt)
+ MY_ATTRIBUTE((warn_unused_result));
+
/*********************************************************************//**
Builds a dummy query graph used in selects. */
-UNIV_INTERN
void
row_prebuild_sel_graph(
/*===================*/
@@ -251,104 +244,71 @@ row_prebuild_sel_graph(
Gets pointer to a prebuilt update vector used in updates. If the update
graph has not yet been built in the prebuilt struct, then this function
first builds it.
-@return prebuilt update vector */
-UNIV_INTERN
+@return prebuilt update vector */
upd_t*
row_get_prebuilt_update_vector(
/*===========================*/
row_prebuilt_t* prebuilt); /*!< in: prebuilt struct in MySQL
handle */
-/*********************************************************************//**
-Checks if a table is such that we automatically created a clustered
-index on it (on row id).
-@return TRUE if the clustered index was generated automatically */
-UNIV_INTERN
-ibool
-row_table_got_default_clust_index(
-/*==============================*/
- const dict_table_t* table); /*!< in: table */
-/*********************************************************************//**
-Does an update or delete of a row for MySQL.
-@return error code or DB_SUCCESS */
-UNIV_INTERN
+/** Does an update or delete of a row for MySQL.
+@param[in,out] prebuilt prebuilt struct in MySQL handle
+@return error code or DB_SUCCESS */
dberr_t
-row_update_for_mysql(
-/*=================*/
- byte* mysql_rec, /*!< in: the row to be updated, in
- the MySQL format */
- row_prebuilt_t* prebuilt) /*!< in: prebuilt struct in MySQL
- handle */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-/*********************************************************************//**
-This can only be used when srv_locks_unsafe_for_binlog is TRUE or this
+row_update_for_mysql(row_prebuilt_t* prebuilt)
+ MY_ATTRIBUTE((warn_unused_result));
+
+/** This can only be used when srv_locks_unsafe_for_binlog is TRUE or this
session is using a READ COMMITTED or READ UNCOMMITTED isolation level.
Before calling this function row_search_for_mysql() must have
initialized prebuilt->new_rec_locks to store the information which new
record locks really were set. This function removes a newly set
clustered index record lock under prebuilt->pcur or
prebuilt->clust_pcur. Thus, this implements a 'mini-rollback' that
-releases the latest clustered index record lock we set. */
-UNIV_INTERN
+releases the latest clustered index record lock we set.
+@param[in,out] prebuilt prebuilt struct in MySQL handle
+@param[in] has_latches_on_recs TRUE if called so that we have the
+ latches on the records under pcur
+ and clust_pcur, and we do not need
+ to reposition the cursors. */
void
row_unlock_for_mysql(
-/*=================*/
- row_prebuilt_t* prebuilt, /*!< in/out: prebuilt struct in MySQL
- handle */
- ibool has_latches_on_recs)/*!< in: TRUE if called
- so that we have the latches on
- the records under pcur and
- clust_pcur, and we do not need
- to reposition the cursors. */
- MY_ATTRIBUTE((nonnull));
-/*********************************************************************//**
-Checks if a table name contains the string "/#sql" which denotes temporary
-tables in MySQL.
-@return true if temporary table */
-UNIV_INTERN
-bool
-row_is_mysql_tmp_table_name(
-/*========================*/
- const char* name) MY_ATTRIBUTE((warn_unused_result));
- /*!< in: table name in the form
- 'database/tablename' */
+ row_prebuilt_t* prebuilt,
+ ibool has_latches_on_recs);
/*********************************************************************//**
Creates an query graph node of 'update' type to be used in the MySQL
interface.
-@return own: update node */
-UNIV_INTERN
+@return own: update node */
upd_node_t*
row_create_update_node_for_mysql(
/*=============================*/
dict_table_t* table, /*!< in: table to update */
mem_heap_t* heap); /*!< in: mem heap from which allocated */
+
/**********************************************************************//**
Does a cascaded delete or set null in a foreign key operation.
-@return error code or DB_SUCCESS */
-UNIV_INTERN
+@return error code or DB_SUCCESS */
dberr_t
row_update_cascade_for_mysql(
/*=========================*/
- que_thr_t* thr, /*!< in: query thread */
- upd_node_t* node, /*!< in: update node used in the cascade
- or set null operation */
- dict_table_t* table) /*!< in: table where we do the operation */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
+ que_thr_t* thr, /*!< in: query thread */
+ upd_node_t* node, /*!< in: update node used in the cascade
+ or set null operation */
+ dict_table_t* table) /*!< in: table where we do the operation */
+ MY_ATTRIBUTE((nonnull, warn_unused_result));
/*********************************************************************//**
Locks the data dictionary exclusively for performing a table create or other
data dictionary modification operation. */
-UNIV_INTERN
void
row_mysql_lock_data_dictionary_func(
/*================================*/
trx_t* trx, /*!< in/out: transaction */
const char* file, /*!< in: file name */
- ulint line); /*!< in: line number */
+ unsigned line); /*!< in: line number */
#define row_mysql_lock_data_dictionary(trx) \
row_mysql_lock_data_dictionary_func(trx, __FILE__, __LINE__)
/*********************************************************************//**
Unlocks the data dictionary exclusive lock. */
-UNIV_INTERN
void
row_mysql_unlock_data_dictionary(
/*=============================*/
@@ -356,31 +316,24 @@ row_mysql_unlock_data_dictionary(
/*********************************************************************//**
Locks the data dictionary in shared mode from modifications, for performing
foreign key check, rollback, or other operation invisible to MySQL. */
-UNIV_INTERN
void
row_mysql_freeze_data_dictionary_func(
/*==================================*/
trx_t* trx, /*!< in/out: transaction */
const char* file, /*!< in: file name */
- ulint line); /*!< in: line number */
+ unsigned line); /*!< in: line number */
#define row_mysql_freeze_data_dictionary(trx) \
row_mysql_freeze_data_dictionary_func(trx, __FILE__, __LINE__)
/*********************************************************************//**
Unlocks the data dictionary shared lock. */
-UNIV_INTERN
void
row_mysql_unfreeze_data_dictionary(
/*===============================*/
trx_t* trx); /*!< in/out: transaction */
/*********************************************************************//**
-Creates a table for MySQL. If the name of the table ends in
-one of "innodb_monitor", "innodb_lock_monitor", "innodb_tablespace_monitor",
-"innodb_table_monitor", then this will also start the printing of monitor
-output by the master thread. If the table name ends in "innodb_mem_validate",
-InnoDB will try to invoke mem_validate(). On failure the transaction will
-be rolled back.
-@return error code or DB_SUCCESS */
-UNIV_INTERN
+Creates a table for MySQL. On failure the transaction will be rolled back
+and the 'table' object will be freed.
+@return error code or DB_SUCCESS */
dberr_t
row_create_table_for_mysql(
/*=======================*/
@@ -388,16 +341,14 @@ row_create_table_for_mysql(
(will be freed, or on DB_SUCCESS
added to the data dictionary cache) */
trx_t* trx, /*!< in/out: transaction */
- bool commit, /*!< in: if true, commit the transaction */
fil_encryption_t mode, /*!< in: encryption mode */
- ulint key_id) /*!< in: encryption key_id */
- __attribute__((nonnull, warn_unused_result));
+ uint32_t key_id) /*!< in: encryption key_id */
+ MY_ATTRIBUTE((warn_unused_result));
+
/*********************************************************************//**
-Does an index creation operation for MySQL. TODO: currently failure
-to create an index results in dropping the whole table! This is no problem
-currently as all indexes must be created at the same time as the table.
-@return error number or DB_SUCCESS */
-UNIV_INTERN
+Create an index when creating a table.
+On failure, the caller must drop the table!
+@return error number or DB_SUCCESS */
dberr_t
row_create_index_for_mysql(
/*=======================*/
@@ -410,54 +361,30 @@ row_create_index_for_mysql(
index columns, which are
then checked for not being too
large. */
- MY_ATTRIBUTE((nonnull(1,2), warn_unused_result));
-/*********************************************************************//**
-Scans a table create SQL string and adds to the data dictionary
-the foreign key constraints declared in the string. This function
-should be called after the indexes for a table have been created.
-Each foreign key constraint must be accompanied with indexes in
-bot participating tables. The indexes are allowed to contain more
-fields than mentioned in the constraint.
-@return error code or DB_SUCCESS */
-UNIV_INTERN
-dberr_t
-row_table_add_foreign_constraints(
-/*==============================*/
- trx_t* trx, /*!< in: transaction */
- const char* sql_string, /*!< in: table create statement where
- foreign keys are declared like:
- FOREIGN KEY (a, b) REFERENCES table2(c, d),
- table2 can be written also with the
- database name before it: test.table2 */
- size_t sql_length, /*!< in: length of sql_string */
- const char* name, /*!< in: table full name in the
- normalized form
- database_name/table_name */
- ibool reject_fks) /*!< in: if TRUE, fail with error
- code DB_CANNOT_ADD_CONSTRAINT if
- any foreign keys are found. */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
+ MY_ATTRIBUTE((warn_unused_result));
/*********************************************************************//**
The master thread in srv0srv.cc calls this regularly to drop tables which
we must drop in background after queries to them have ended. Such lazy
dropping of tables is needed in ALTER TABLE on Unix.
-@return how many tables dropped + remaining tables in list */
-UNIV_INTERN
+@return how many tables dropped + remaining tables in list */
ulint
row_drop_tables_for_mysql_in_background(void);
/*=========================================*/
/*********************************************************************//**
Get the background drop list length. NOTE: the caller must own the kernel
mutex!
-@return how many tables in list */
-UNIV_INTERN
+@return how many tables in list */
ulint
row_get_background_drop_list_len_low(void);
/*======================================*/
+
+/** Drop garbage tables during recovery. */
+void
+row_mysql_drop_garbage_tables();
+
/*********************************************************************//**
Sets an exclusive lock on a table.
-@return error code or DB_SUCCESS */
-UNIV_INTERN
+@return error code or DB_SUCCESS */
dberr_t
row_mysql_lock_table(
/*=================*/
@@ -467,51 +394,34 @@ row_mysql_lock_table(
const char* op_info) /*!< in: string for trx->op_info */
MY_ATTRIBUTE((nonnull, warn_unused_result));
-/*********************************************************************//**
-Truncates a table for MySQL.
-@return error code or DB_SUCCESS */
-UNIV_INTERN
-dberr_t
-row_truncate_table_for_mysql(
-/*=========================*/
- dict_table_t* table, /*!< in: table handle */
- trx_t* trx) /*!< in: transaction handle */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-/*********************************************************************//**
-Drops a table for MySQL. If the name of the dropped table ends in
-one of "innodb_monitor", "innodb_lock_monitor", "innodb_tablespace_monitor",
-"innodb_table_monitor", then this will also stop the printing of monitor
-output by the master thread. If the data dictionary was not already locked
-by the transaction, the transaction will be committed. Otherwise, the
-data dictionary will remain locked.
-@return error code or DB_SUCCESS */
-UNIV_INTERN
+/** Drop a table.
+If the data dictionary was not already locked by the transaction,
+the transaction will be committed. Otherwise, the data dictionary
+will remain locked.
+@param[in] name Table name
+@param[in,out] trx Transaction handle
+@param[in] sqlcom type of SQL operation
+@param[in] create_failed true=create table failed
+ because e.g. foreign key column
+@param[in] nonatomic Whether it is permitted to release
+ and reacquire dict_operation_lock
+@return error code */
dberr_t
row_drop_table_for_mysql(
-/*=====================*/
- const char* name, /*!< in: table name */
- trx_t* trx, /*!< in: dictionary transaction handle */
- bool drop_db,/*!< in: true=dropping whole database */
- bool create_failed,/*!<in: TRUE=create table failed
- because e.g. foreign key column
- type mismatch. */
- bool nonatomic = true)
- /*!< in: whether it is permitted
- to release and reacquire dict_operation_lock */
- MY_ATTRIBUTE((nonnull));
-/*********************************************************************//**
-Drop all temporary tables during crash recovery. */
-UNIV_INTERN
-void
-row_mysql_drop_temp_tables(void);
-/*============================*/
+ const char* name,
+ trx_t* trx,
+ enum_sql_command sqlcom,
+ bool create_failed = false,
+ bool nonatomic = true);
+
+/** Drop a table after failed CREATE TABLE. */
+dberr_t row_drop_table_after_create_fail(const char* name, trx_t* trx);
/*********************************************************************//**
Discards the tablespace of a table which stored in an .ibd file. Discarding
means that this function deletes the .ibd file and assigns a new table id for
-the table. Also the flag table->ibd_file_missing is set TRUE.
-@return error code or DB_SUCCESS */
-UNIV_INTERN
+the table. Also the file_unreadable flag is set.
+@return error code or DB_SUCCESS */
dberr_t
row_discard_tablespace_for_mysql(
/*=============================*/
@@ -521,71 +431,62 @@ row_discard_tablespace_for_mysql(
/*****************************************************************//**
Imports a tablespace. The space id in the .ibd file must match the space id
of the table in the data dictionary.
-@return error code or DB_SUCCESS */
-UNIV_INTERN
+@return error code or DB_SUCCESS */
dberr_t
row_import_tablespace_for_mysql(
/*============================*/
dict_table_t* table, /*!< in/out: table */
row_prebuilt_t* prebuilt) /*!< in: prebuilt struct in MySQL */
MY_ATTRIBUTE((nonnull, warn_unused_result));
-/*********************************************************************//**
-Drops a database for MySQL.
-@return error code or DB_SUCCESS */
-UNIV_INTERN
+
+/** Drop a database for MySQL.
+@param[in] name database name which ends at '/'
+@param[in] trx transaction handle
+@param[out] found number of dropped tables/partitions
+@return error code or DB_SUCCESS */
dberr_t
row_drop_database_for_mysql(
-/*========================*/
- const char* name, /*!< in: database name which ends to '/' */
- trx_t* trx) /*!< in: transaction handle */
- MY_ATTRIBUTE((nonnull));
+ const char* name,
+ trx_t* trx,
+ ulint* found);
+
/*********************************************************************//**
Renames a table for MySQL.
-@return error code or DB_SUCCESS */
-UNIV_INTERN
+@return error code or DB_SUCCESS */
dberr_t
row_rename_table_for_mysql(
/*=======================*/
const char* old_name, /*!< in: old table name */
const char* new_name, /*!< in: new table name */
trx_t* trx, /*!< in/out: transaction */
- bool commit) /*!< in: whether to commit trx */
+ bool commit, /*!< in: whether to commit trx */
+ bool use_fk) /*!< in: whether to parse and enforce
+ FOREIGN KEY constraints */
MY_ATTRIBUTE((nonnull, warn_unused_result));
+
/*********************************************************************//**
-Checks that the index contains entries in an ascending order, unique
-constraint is not broken, and calculates the number of index entries
+Scans an index for either COOUNT(*) or CHECK TABLE.
+If CHECK TABLE; Checks that the index contains entries in an ascending order,
+unique constraint is not broken, and calculates the number of index entries
in the read view of the current transaction.
-@return true if ok */
-UNIV_INTERN
-bool
-row_check_index_for_mysql(
-/*======================*/
+@return DB_SUCCESS or other error */
+dberr_t
+row_scan_index_for_mysql(
+/*=====================*/
row_prebuilt_t* prebuilt, /*!< in: prebuilt struct
in MySQL handle */
const dict_index_t* index, /*!< in: index */
ulint* n_rows) /*!< out: number of entries
seen in the consistent read */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-/*********************************************************************//**
-Determines if a table is a magic monitor table.
-@return true if monitor table */
-UNIV_INTERN
-bool
-row_is_magic_monitor_table(
-/*=======================*/
- const char* table_name) /*!< in: name of the table, in the
- form database/table_name */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
+ MY_ATTRIBUTE((warn_unused_result));
/*********************************************************************//**
Initialize this module */
-UNIV_INTERN
void
row_mysql_init(void);
/*================*/
/*********************************************************************//**
Close this module */
-UNIV_INTERN
void
row_mysql_close(void);
/*=================*/
@@ -644,6 +545,7 @@ struct mysql_row_templ_t {
ulint is_unsigned; /*!< if a column type is an integer
type and this field is != 0, then
it is an unsigned integer type */
+ ulint is_virtual; /*!< if a column is a virtual column */
};
#define MYSQL_FETCH_CACHE_SIZE 8
@@ -669,10 +571,6 @@ struct row_prebuilt_t {
an SQL statement: we may have to set
an intention lock on the table,
create a consistent read view etc. */
- unsigned mysql_has_locked:1;/*!< this is set TRUE when MySQL
- calls external_lock on this handle
- with a lock flag, and set FALSE when
- with the F_UNLOCK flag */
unsigned clust_index_was_generated:1;
/*!< if the user did not define a
primary key in MySQL, then Innobase
@@ -712,8 +610,9 @@ struct row_prebuilt_t {
is set but we later optimize out the
clustered index lookup */
unsigned templ_contains_blob:1;/*!< TRUE if the template contains
- a column with DATA_BLOB ==
- get_innobase_type_from_mysql_type();
+ a column with DATA_LARGE_MTYPE(
+ get_innobase_type_from_mysql_type())
+ is TRUE;
not to be confused with InnoDB
externally stored columns
(VARCHAR can be off-page too) */
@@ -750,9 +649,9 @@ struct row_prebuilt_t {
trx_id or n_indexes mismatch. */
que_fork_t* upd_graph; /*!< Innobase SQL query graph used
in updates or deletes */
- btr_pcur_t pcur; /*!< persistent cursor used in selects
+ btr_pcur_t* pcur; /*!< persistent cursor used in selects
and updates */
- btr_pcur_t clust_pcur; /*!< persistent cursor used in
+ btr_pcur_t* clust_pcur; /*!< persistent cursor used in
some selects and updates */
que_fork_t* sel_graph; /*!< dummy query graph used in
selects */
@@ -845,6 +744,8 @@ struct row_prebuilt_t {
mem_heap_t* old_vers_heap; /*!< memory heap where a previous
version is built in consistent read */
bool in_fts_query; /*!< Whether we are in a FTS query */
+ bool fts_doc_id_in_read_set; /*!< true if table has externally
+ defined FTS_DOC_ID coulmn. */
/*----------------------*/
ulonglong autoinc_last_value;
/*!< last value of AUTO-INC interval */
@@ -868,12 +769,14 @@ struct row_prebuilt_t {
ulint idx_cond_n_cols;/*!< Number of fields in idx_cond_cols.
0 if and only if idx_cond == NULL. */
/*----------------------*/
+
+ /*----------------------*/
+ rtr_info_t* rtr_info; /*!< R-tree Search Info */
+ /*----------------------*/
+
ulint magic_n2; /*!< this should be the same as
magic_n */
- /*----------------------*/
- unsigned innodb_api:1; /*!< whether this is a InnoDB API
- query */
- const rec_t* innodb_api_rec; /*!< InnoDB API search result */
+
byte* srch_key_val1; /*!< buffer used in converting
search key values from MySQL format
to InnoDB format.*/
@@ -881,7 +784,14 @@ struct row_prebuilt_t {
search key values from MySQL format
to InnoDB format.*/
uint srch_key_val_len; /*!< Size of search key */
+ /** Disable prefetch. */
+ bool m_no_prefetch;
+
+ /** Return materialized key for secondary index scan */
+ bool m_read_virtual_key;
+ /** The MySQL table object */
+ TABLE* m_mysql_table;
};
/** Callback for row_mysql_sys_index_iterate() */
@@ -889,11 +799,90 @@ struct SysIndexCallback {
virtual ~SysIndexCallback() { }
/** Callback method
- @param mtr - current mini transaction
- @param pcur - persistent cursor. */
+ @param mtr current mini transaction
+ @param pcur persistent cursor. */
virtual void operator()(mtr_t* mtr, btr_pcur_t* pcur) throw() = 0;
};
+
+/** Storage for calculating virtual columns */
+
+class String;
+struct VCOL_STORAGE
+{
+ TABLE *maria_table;
+ byte *innobase_record;
+ byte *maria_record;
+ String *blob_value_storage;
+};
+
+/**
+ Allocate a heap and record for calculating virtual fields
+ Used mainly for virtual fields in indexes
+
+@param[in] thd MariaDB THD
+@param[in] index Index in use
+@param[out] heap Heap that holds temporary row
+@param[in,out] mysql_table MariaDB table
+@param[out] rec Pointer to allocated MariaDB record
+@param[out] storage Internal storage for blobs etc
+
+@return FALSE ok
+@return TRUE malloc failure
+*/
+
+bool innobase_allocate_row_for_vcol(
+ THD * thd,
+ dict_index_t* index,
+ mem_heap_t** heap,
+ TABLE** table,
+ byte** record,
+ VCOL_STORAGE** storage);
+
+/** Free memory allocated by innobase_allocate_row_for_vcol() */
+void innobase_free_row_for_vcol(VCOL_STORAGE *storage);
+
+/** Get the computed value by supplying the base column values.
+@param[in,out] row the data row
+@param[in] col virtual column
+@param[in] index index on the virtual column
+@param[in,out] local_heap heap memory for processing large data etc.
+@param[in,out] heap memory heap that copies the actual index row
+@param[in] ifield index field
+@param[in] thd MySQL thread handle
+@param[in,out] mysql_table mysql table object
+@param[in] old_table during ALTER TABLE, this is the old table
+ or NULL.
+@param[in] parent_update update vector for the parent row
+@param[in] foreign foreign key information
+@return the field filled with computed value */
+dfield_t*
+innobase_get_computed_value(
+ dtuple_t* row,
+ const dict_v_col_t* col,
+ const dict_index_t* index,
+ mem_heap_t** local_heap,
+ mem_heap_t* heap,
+ const dict_field_t* ifield,
+ THD* thd,
+ TABLE* mysql_table,
+ byte* mysql_rec,
+ const dict_table_t* old_table,
+ upd_t* parent_update,
+ dict_foreign_t* foreign);
+
+/** Get the computed value by supplying the base column values.
+@param[in,out] table the table whose virtual column
+ template to be built */
+TABLE* innobase_init_vc_templ(dict_table_t* table);
+
+/** Change dbname and table name in table->vc_templ.
+@param[in,out] table the table whose virtual column template
+dbname and tbname to be renamed. */
+void
+innobase_rename_vc_templ(
+ dict_table_t* table);
+
#define ROW_PREBUILT_FETCH_MAGIC_N 465765687
#define ROW_MYSQL_WHOLE_ROW 0
@@ -911,8 +900,10 @@ struct SysIndexCallback {
#define ROW_READ_TRY_SEMI_CONSISTENT 1
#define ROW_READ_DID_SEMI_CONSISTENT 2
-#ifndef UNIV_NONINL
-#include "row0mysql.ic"
-#endif
+#ifdef UNIV_DEBUG
+/** Wait for the background drop list to become empty. */
+void
+row_wait_for_background_drop_list_empty();
+#endif /* UNIV_DEBUG */
#endif /* row0mysql.h */
diff --git a/storage/innobase/include/row0mysql.ic b/storage/innobase/include/row0mysql.ic
deleted file mode 100644
index 554b16e668e..00000000000
--- a/storage/innobase/include/row0mysql.ic
+++ /dev/null
@@ -1,24 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 2001, 2009, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/row0mysql.ic
-MySQL interface for Innobase
-
-Created 1/23/2001 Heikki Tuuri
-*******************************************************/
diff --git a/storage/innobase/include/row0purge.h b/storage/innobase/include/row0purge.h
index 87c2083c965..c4ddff4243c 100644
--- a/storage/innobase/include/row0purge.h
+++ b/storage/innobase/include/row0purge.h
@@ -1,6 +1,7 @@
/*****************************************************************************
Copyright (c) 1997, 2016, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2017, 2019, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -26,30 +27,15 @@ Created 3/14/1997 Heikki Tuuri
#ifndef row0purge_h
#define row0purge_h
-#include "univ.i"
-#include "data0data.h"
+#include "que0types.h"
#include "btr0types.h"
#include "btr0pcur.h"
-#include "dict0types.h"
#include "trx0types.h"
-#include "que0types.h"
#include "row0types.h"
-#include "row0purge.h"
#include "ut0vec.h"
+#include "row0mysql.h"
-/********************************************************************//**
-Creates a purge node to a query graph.
-@return own: purge node */
-UNIV_INTERN
-purge_node_t*
-row_purge_node_create(
-/*==================*/
- que_thr_t* parent, /*!< in: parent node, i.e., a
- thr node */
- mem_heap_t* heap) /*!< in: memory heap where created */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-/***********************************************************//**
-Determines if it is possible to remove a secondary index entry.
+/** Determines if it is possible to remove a secondary index entry.
Removal is possible if the secondary index entry does not refer to any
not delete marked version of a clustered index record where DB_TRX_ID
is newer than the purge view.
@@ -62,20 +48,31 @@ inserts a record that the secondary index entry would refer to.
However, in that case, the user transaction would also re-insert the
secondary index entry after purge has removed it and released the leaf
page latch.
-@return true if the secondary index record can be purged */
-UNIV_INTERN
+@param[in,out] node row purge node
+@param[in] index secondary index
+@param[in] entry secondary index entry
+@param[in,out] sec_pcur secondary index cursor or NULL
+ if it is called for purge buffering
+ operation.
+@param[in,out] sec_mtr mini-transaction which holds
+ secondary index entry or NULL if it is
+ called for purge buffering operation.
+@param[in] is_tree true=pessimistic purge,
+ false=optimistic (leaf-page only)
+@return true if the secondary index record can be purged */
bool
row_purge_poss_sec(
-/*===============*/
- purge_node_t* node, /*!< in/out: row purge node */
- dict_index_t* index, /*!< in: secondary index */
- const dtuple_t* entry) /*!< in: secondary index entry */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
+ purge_node_t* node,
+ dict_index_t* index,
+ const dtuple_t* entry,
+ btr_pcur_t* sec_pcur=NULL,
+ mtr_t* sec_mtr=NULL,
+ bool is_tree=false);
+
/***************************************************************
Does the purge operation for a single undo log record. This is a high-level
function used in an SQL execution graph.
-@return query thread to run next or NULL */
-UNIV_INTERN
+@return query thread to run next or NULL */
que_thr_t*
row_purge_step(
/*===========*/
@@ -91,10 +88,17 @@ struct purge_node_t{
roll_ptr_t roll_ptr;/* roll pointer to undo log record */
ib_vector_t* undo_recs;/*!< Undo recs to purge */
- undo_no_t undo_no;/* undo number of the record */
+ undo_no_t undo_no;/*!< undo number of the record */
- ulint rec_type;/* undo log record type: TRX_UNDO_INSERT_REC,
+ ulint rec_type;/*!< undo log record type: TRX_UNDO_INSERT_REC,
... */
+private:
+ /** latest unavailable table ID (do not bother looking up again) */
+ table_id_t unavailable_table_id;
+ /** the latest modification of the table definition identified by
+ unavailable_table_id, or TRX_ID_MAX */
+ trx_id_t def_trx_id;
+public:
dict_table_t* table; /*!< table where purge is done */
ulint cmpl_info;/* compiler analysis info of an update */
@@ -111,13 +115,33 @@ struct purge_node_t{
mem_heap_t* heap; /*!< memory heap used as auxiliary storage for
row; this must be emptied after a successful
purge of a row */
- ibool found_clust;/* TRUE if the clustered index record
+ ibool found_clust;/*!< whether the clustered index record
determined by ref was found in the clustered
index, and we were able to position pcur on
it */
btr_pcur_t pcur; /*!< persistent cursor used in searching the
clustered index record */
- ibool done; /* Debug flag */
+#ifdef UNIV_DEBUG
+ /** whether the operation is in progress */
+ bool in_progress;
+#endif
+ trx_id_t trx_id; /*!< trx id for this purging record */
+
+ /** Virtual column information about opening of MariaDB table.
+ It resets after processing each undo log record. */
+ purge_vcol_info_t vcol_info;
+
+ /** Constructor */
+ explicit purge_node_t(que_thr_t* parent) :
+ common(QUE_NODE_PURGE, parent),
+ undo_recs(NULL),
+ unavailable_table_id(0),
+ heap(mem_heap_create(256)),
+#ifdef UNIV_DEBUG
+ in_progress(false),
+#endif
+ vcol_info()
+ {}
#ifdef UNIV_DEBUG
/***********************************************************//**
@@ -127,12 +151,59 @@ struct purge_node_t{
each other if the found_clust flag is set.
@return true if the persistent cursor is consistent with
the ref member.*/
- bool validate_pcur();
+ bool validate_pcur();
#endif
-};
-#ifndef UNIV_NONINL
-#include "row0purge.ic"
-#endif
+ /** Whether purge failed to open the maria table for virtual column
+ computation.
+ @return true if the table failed to open. */
+ bool vcol_op_failed() const { return !vcol_info.validate(); }
+
+ /** Determine if a table should be skipped in purge.
+ @param[in] table_id table identifier
+ @return whether to skip the table lookup and processing */
+ bool is_skipped(table_id_t id) const
+ {
+ return id == unavailable_table_id && trx_id <= def_trx_id;
+ }
+
+ /** Remember that a table should be skipped in purge.
+ @param[in] id table identifier
+ @param[in] limit last transaction for which to skip */
+ void skip(table_id_t id, trx_id_t limit)
+ {
+ DBUG_ASSERT(limit >= trx_id || !srv_safe_truncate);
+ unavailable_table_id = id;
+ def_trx_id = limit;
+ }
+
+ /** Start processing an undo log record. */
+ void start()
+ {
+ ut_ad(in_progress);
+ DBUG_ASSERT(common.type == QUE_NODE_PURGE);
+
+ table = NULL;
+ row = NULL;
+ ref = NULL;
+ index = NULL;
+ update = NULL;
+ found_clust = FALSE;
+ rec_type = ULINT_UNDEFINED;
+ cmpl_info = ULINT_UNDEFINED;
+ }
+
+ /** Reset the state at end
+ @return the query graph parent */
+ que_node_t* end()
+ {
+ DBUG_ASSERT(common.type == QUE_NODE_PURGE);
+ undo_recs = NULL;
+ ut_d(in_progress = false);
+ vcol_info.reset();
+ mem_heap_empty(heap);
+ return common.parent;
+ }
+};
#endif
diff --git a/storage/innobase/include/row0purge.ic b/storage/innobase/include/row0purge.ic
deleted file mode 100644
index c86822458e9..00000000000
--- a/storage/innobase/include/row0purge.ic
+++ /dev/null
@@ -1,25 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1997, 2009, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-
-/**************************************************//**
-@file include/row0purge.ic
-Purge obsolete records
-
-Created 3/14/1997 Heikki Tuuri
-*******************************************************/
diff --git a/storage/innobase/include/row0quiesce.h b/storage/innobase/include/row0quiesce.h
index 00ee62a309c..b05b7666b0b 100644
--- a/storage/innobase/include/row0quiesce.h
+++ b/storage/innobase/include/row0quiesce.h
@@ -1,6 +1,7 @@
/*****************************************************************************
Copyright (c) 2012, 2016, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2017, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -27,7 +28,6 @@ Created 2012-02-08 by Sunny Bains
#ifndef row0quiesce_h
#define row0quiesce_h
-#include "univ.i"
#include "dict0types.h"
struct trx_t;
@@ -37,7 +37,6 @@ struct trx_t;
/*********************************************************************//**
Quiesce the tablespace that the table resides in. */
-UNIV_INTERN
void
row_quiesce_table_start(
/*====================*/
@@ -48,7 +47,6 @@ row_quiesce_table_start(
/*********************************************************************//**
Set a table's quiesce state.
@return DB_SUCCESS or errro code. */
-UNIV_INTERN
dberr_t
row_quiesce_set_state(
/*==================*/
@@ -59,7 +57,6 @@ row_quiesce_set_state(
/*********************************************************************//**
Cleanup after table quiesce. */
-UNIV_INTERN
void
row_quiesce_table_complete(
/*=======================*/
@@ -67,8 +64,4 @@ row_quiesce_table_complete(
trx_t* trx) /*!< in/out: transaction/session */
MY_ATTRIBUTE((nonnull));
-#ifndef UNIV_NONINL
-#include "row0quiesce.ic"
-#endif
-
#endif /* row0quiesce_h */
diff --git a/storage/innobase/include/row0row.h b/storage/innobase/include/row0row.h
index 286d2eea208..b7030e91098 100644
--- a/storage/innobase/include/row0row.h
+++ b/storage/innobase/include/row0row.h
@@ -1,6 +1,7 @@
/*****************************************************************************
Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2016, 2020, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -26,57 +27,59 @@ Created 4/20/1996 Heikki Tuuri
#ifndef row0row_h
#define row0row_h
-#include "univ.i"
-#include "data0data.h"
-#include "dict0types.h"
-#include "trx0types.h"
#include "que0types.h"
+#include "ibuf0ibuf.h"
+#include "trx0types.h"
#include "mtr0mtr.h"
#include "rem0types.h"
-#include "read0types.h"
#include "row0types.h"
#include "btr0types.h"
/*********************************************************************//**
Gets the offset of the DB_TRX_ID field, in bytes relative to the origin of
a clustered index record.
-@return offset of DATA_TRX_ID */
+@return offset of DATA_TRX_ID */
UNIV_INLINE
ulint
row_get_trx_id_offset(
/*==================*/
const dict_index_t* index, /*!< in: clustered index */
- const ulint* offsets)/*!< in: record offsets */
+ const offset_t* offsets)/*!< in: record offsets */
MY_ATTRIBUTE((nonnull, warn_unused_result));
/*********************************************************************//**
Reads the trx id field from a clustered index record.
-@return value of the field */
+@return value of the field */
UNIV_INLINE
trx_id_t
row_get_rec_trx_id(
/*===============*/
const rec_t* rec, /*!< in: record */
const dict_index_t* index, /*!< in: clustered index */
- const ulint* offsets)/*!< in: rec_get_offsets(rec, index) */
+ const offset_t* offsets)/*!< in: rec_get_offsets(rec, index) */
MY_ATTRIBUTE((nonnull, warn_unused_result));
/*********************************************************************//**
Reads the roll pointer field from a clustered index record.
-@return value of the field */
+@return value of the field */
UNIV_INLINE
roll_ptr_t
row_get_rec_roll_ptr(
/*=================*/
const rec_t* rec, /*!< in: record */
const dict_index_t* index, /*!< in: clustered index */
- const ulint* offsets)/*!< in: rec_get_offsets(rec, index) */
+ const offset_t* offsets)/*!< in: rec_get_offsets(rec, index) */
MY_ATTRIBUTE((nonnull, warn_unused_result));
+
+/* Flags for row build type. */
+#define ROW_BUILD_NORMAL 0 /*!< build index row */
+#define ROW_BUILD_FOR_PURGE 1 /*!< build row for purge. */
+#define ROW_BUILD_FOR_UNDO 2 /*!< build row for undo. */
+#define ROW_BUILD_FOR_INSERT 3 /*!< build row for insert. */
/*****************************************************************//**
When an insert or purge to a table is performed, this function builds
the entry to be inserted into or purged from an index on the table.
@return index entry which should be inserted or purged
@retval NULL if the externally stored columns in the clustered index record
are unavailable and ext != NULL, or row is missing some needed columns. */
-UNIV_INTERN
dtuple_t*
row_build_index_entry_low(
/*======================*/
@@ -85,9 +88,12 @@ row_build_index_entry_low(
const row_ext_t* ext, /*!< in: externally stored column
prefixes, or NULL */
dict_index_t* index, /*!< in: index on the table */
- mem_heap_t* heap) /*!< in: memory heap from which
+ mem_heap_t* heap, /*!< in: memory heap from which
the memory for the index entry
is allocated */
+ ulint flag) /*!< in: ROW_BUILD_NORMAL,
+ ROW_BUILD_FOR_PURGE
+ or ROW_BUILD_FOR_UNDO */
MY_ATTRIBUTE((warn_unused_result, nonnull(1,3,4)));
/*****************************************************************//**
When an insert or purge to a table is performed, this function builds
@@ -111,8 +117,7 @@ row_build_index_entry(
/*******************************************************************//**
An inverse function to row_build_index_entry. Builds a row from a
record in a clustered index.
-@return own: row built; see the NOTE below! */
-UNIV_INTERN
+@return own: row built; see the NOTE below! */
dtuple_t*
row_build(
/*======*/
@@ -133,7 +138,7 @@ row_build(
this record must be at least
s-latched and the latch held
as long as the row dtuple is used! */
- const ulint* offsets,/*!< in: rec_get_offsets(rec,index)
+ const offset_t* offsets,/*!< in: rec_get_offsets(rec,index)
or NULL, in which case this function
will invoke rec_get_offsets() */
const dict_table_t* col_table,
@@ -153,46 +158,75 @@ row_build(
row_ext_t** ext, /*!< out, own: cache of
externally stored column
prefixes, or NULL */
- mem_heap_t* heap) /*!< in: memory heap from which
+ mem_heap_t* heap); /*!< in: memory heap from which
the memory needed is allocated */
- MY_ATTRIBUTE((nonnull(2,3,9)));
+
+/** An inverse function to row_build_index_entry. Builds a row from a
+record in a clustered index, with possible indexing on ongoing
+addition of new virtual columns.
+@param[in] type ROW_COPY_POINTERS or ROW_COPY_DATA;
+@param[in] index clustered index
+@param[in] rec record in the clustered index
+@param[in] offsets rec_get_offsets(rec,index) or NULL
+@param[in] col_table table, to check which
+ externally stored columns
+ occur in the ordering columns
+ of an index, or NULL if
+ index->table should be
+ consulted instead
+@param[in] add_cols default values of added columns, or NULL
+@param[in] add_v new virtual columns added
+ along with new indexes
+@param[in] col_map mapping of old column
+ numbers to new ones, or NULL
+@param[in] ext cache of externally stored column
+ prefixes, or NULL
+@param[in] heap memory heap from which
+ the memory needed is allocated
+@return own: row built */
+dtuple_t*
+row_build_w_add_vcol(
+ ulint type,
+ const dict_index_t* index,
+ const rec_t* rec,
+ const offset_t* offsets,
+ const dict_table_t* col_table,
+ const dtuple_t* add_cols,
+ const dict_add_v_col_t* add_v,
+ const ulint* col_map,
+ row_ext_t** ext,
+ mem_heap_t* heap);
+
/*******************************************************************//**
Converts an index record to a typed data tuple.
@return index entry built; does not set info_bits, and the data fields
in the entry will point directly to rec */
-UNIV_INTERN
dtuple_t*
row_rec_to_index_entry_low(
/*=======================*/
const rec_t* rec, /*!< in: record in the index */
const dict_index_t* index, /*!< in: index */
- const ulint* offsets,/*!< in: rec_get_offsets(rec, index) */
- ulint* n_ext, /*!< out: number of externally
- stored columns */
+ const offset_t* offsets,/*!< in: rec_get_offsets(rec, index) */
mem_heap_t* heap) /*!< in: memory heap from which
the memory needed is allocated */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
+ MY_ATTRIBUTE((warn_unused_result));
/*******************************************************************//**
Converts an index record to a typed data tuple. NOTE that externally
stored (often big) fields are NOT copied to heap.
-@return own: index entry built */
-UNIV_INTERN
+@return own: index entry built */
dtuple_t*
row_rec_to_index_entry(
/*===================*/
const rec_t* rec, /*!< in: record in the index */
const dict_index_t* index, /*!< in: index */
- const ulint* offsets,/*!< in/out: rec_get_offsets(rec) */
- ulint* n_ext, /*!< out: number of externally
- stored columns */
+ const offset_t* offsets,/*!< in/out: rec_get_offsets(rec) */
mem_heap_t* heap) /*!< in: memory heap from which
the memory needed is allocated */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
+ MY_ATTRIBUTE((warn_unused_result));
/*******************************************************************//**
Builds from a secondary index record a row reference with which we can
search the clustered index record.
-@return own: row reference built; see the NOTE below! */
-UNIV_INTERN
+@return own: row reference built; see the NOTE below! */
dtuple_t*
row_build_row_ref(
/*==============*/
@@ -210,11 +244,10 @@ row_build_row_ref(
as long as the row reference is used! */
mem_heap_t* heap) /*!< in: memory heap from which the memory
needed is allocated */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
+ MY_ATTRIBUTE((warn_unused_result));
/*******************************************************************//**
Builds from a secondary index record a row reference with which we can
search the clustered index record. */
-UNIV_INTERN
void
row_build_row_ref_in_tuple(
/*=======================*/
@@ -229,7 +262,7 @@ row_build_row_ref_in_tuple(
held as long as the row
reference is used! */
const dict_index_t* index, /*!< in: secondary index */
- ulint* offsets,/*!< in: rec_get_offsets(rec, index)
+ offset_t* offsets,/*!< in: rec_get_offsets(rec, index)
or NULL */
trx_t* trx) /*!< in: transaction or NULL */
MY_ATTRIBUTE((nonnull(1,2,3)));
@@ -248,12 +281,11 @@ row_build_row_ref_fast(
const rec_t* rec, /*!< in: record in the index; must be
preserved while ref is used, as we do
not copy field values to heap */
- const ulint* offsets);/*!< in: array returned by rec_get_offsets() */
+ const offset_t* offsets);/*!< in: array returned by rec_get_offsets() */
/***************************************************************//**
Searches the clustered index record for a row, if we have the row
reference.
-@return TRUE if found */
-UNIV_INTERN
+@return TRUE if found */
ibool
row_search_on_row_ref(
/*==================*/
@@ -267,8 +299,7 @@ row_search_on_row_ref(
/*********************************************************************//**
Fetches the clustered index record for a secondary index record. The latches
on the secondary index record are preserved.
-@return record or NULL, if no record found */
-UNIV_INTERN
+@return record or NULL, if no record found */
rec_t*
row_get_clust_rec(
/*==============*/
@@ -279,6 +310,22 @@ row_get_clust_rec(
mtr_t* mtr) /*!< in: mtr */
MY_ATTRIBUTE((nonnull, warn_unused_result));
+/** Parse the integer data from specified data, which could be
+DATA_INT, DATA_FLOAT or DATA_DOUBLE. If the value is less than 0
+and the type is not unsigned then we reset the value to 0
+@param[in] data data to read
+@param[in] len length of data
+@param[in] mtype mtype of data
+@param[in] unsigned_type if the data is unsigned
+@return the integer value from the data */
+inline
+ib_uint64_t
+row_parse_int(
+ const byte* data,
+ ulint len,
+ ulint mtype,
+ bool unsigned_type);
+
/** Result of row_search_index_entry */
enum row_search_result {
ROW_FOUND = 0, /*!< the record was found */
@@ -294,8 +341,7 @@ enum row_search_result {
/***************************************************************//**
Searches an index record.
-@return whether the record was found or buffered */
-UNIV_INTERN
+@return whether the record was found or buffered */
enum row_search_result
row_search_index_entry(
/*===================*/
@@ -322,8 +368,7 @@ Not more than "buf_size" bytes are written to "buf".
The result is always NUL-terminated (provided buf_size is positive) and the
number of bytes that were written to "buf" is returned (including the
terminating NUL).
-@return number of bytes that were written */
-UNIV_INTERN
+@return number of bytes that were written */
ulint
row_raw_format(
/*===========*/
@@ -336,8 +381,34 @@ row_raw_format(
in bytes */
MY_ATTRIBUTE((nonnull, warn_unused_result));
-#ifndef UNIV_NONINL
+/** Prepare to start a mini-transaction to modify an index.
+@param[in,out] mtr mini-transaction
+@param[in,out] index possibly secondary index
+@param[in] pessimistic whether this is a pessimistic operation */
+inline
+void
+row_mtr_start(mtr_t* mtr, dict_index_t* index, bool pessimistic)
+{
+ mtr->start();
+
+ switch (index->space) {
+ case IBUF_SPACE_ID:
+ if (pessimistic
+ && !(index->type & (DICT_UNIQUE | DICT_SPATIAL))) {
+ ibuf_free_excess_pages();
+ }
+ break;
+ case SRV_TMP_SPACE_ID:
+ mtr->set_log_mode(MTR_LOG_NO_REDO);
+ break;
+ default:
+ mtr->set_named_space(index->space);
+ break;
+ }
+
+ log_free_check();
+}
+
#include "row0row.ic"
-#endif
#endif
diff --git a/storage/innobase/include/row0row.ic b/storage/innobase/include/row0row.ic
index d40e02dc48f..3c5ed282709 100644
--- a/storage/innobase/include/row0row.ic
+++ b/storage/innobase/include/row0row.ic
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1996, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1996, 2015, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -30,13 +30,13 @@ Created 4/20/1996 Heikki Tuuri
/*********************************************************************//**
Gets the offset of the DB_TRX_ID field, in bytes relative to the origin of
a clustered index record.
-@return offset of DATA_TRX_ID */
+@return offset of DATA_TRX_ID */
UNIV_INLINE
ulint
row_get_trx_id_offset(
/*==================*/
const dict_index_t* index, /*!< in: clustered index */
- const ulint* offsets)/*!< in: record offsets */
+ const offset_t* offsets)/*!< in: record offsets */
{
ulint pos;
ulint offset;
@@ -56,14 +56,14 @@ row_get_trx_id_offset(
/*********************************************************************//**
Reads the trx id field from a clustered index record.
-@return value of the field */
+@return value of the field */
UNIV_INLINE
trx_id_t
row_get_rec_trx_id(
/*===============*/
const rec_t* rec, /*!< in: record */
const dict_index_t* index, /*!< in: clustered index */
- const ulint* offsets)/*!< in: rec_get_offsets(rec, index) */
+ const offset_t* offsets)/*!< in: rec_get_offsets(rec, index) */
{
ulint offset;
@@ -81,14 +81,14 @@ row_get_rec_trx_id(
/*********************************************************************//**
Reads the roll pointer field from a clustered index record.
-@return value of the field */
+@return value of the field */
UNIV_INLINE
roll_ptr_t
row_get_rec_roll_ptr(
/*=================*/
const rec_t* rec, /*!< in: record */
const dict_index_t* index, /*!< in: clustered index */
- const ulint* offsets)/*!< in: rec_get_offsets(rec, index) */
+ const offset_t* offsets)/*!< in: rec_get_offsets(rec, index) */
{
ulint offset;
@@ -126,7 +126,8 @@ row_build_index_entry(
dtuple_t* entry;
ut_ad(dtuple_check_typed(row));
- entry = row_build_index_entry_low(row, ext, index, heap);
+ entry = row_build_index_entry_low(row, ext, index, heap,
+ ROW_BUILD_NORMAL);
ut_ad(!entry || dtuple_check_typed(entry));
return(entry);
}
@@ -146,7 +147,7 @@ row_build_row_ref_fast(
const rec_t* rec, /*!< in: record in the index; must be
preserved while ref is used, as we do
not copy field values to heap */
- const ulint* offsets)/*!< in: array returned by rec_get_offsets() */
+ const offset_t* offsets)/*!< in: array returned by rec_get_offsets() */
{
dfield_t* dfield;
const byte* field;
@@ -172,3 +173,52 @@ row_build_row_ref_fast(
}
}
}
+
+/** Parse the integer data from specified data, which could be
+DATA_INT, DATA_FLOAT or DATA_DOUBLE. If the value is less than 0
+and the type is not unsigned then we reset the value to 0
+@param[in] data data to read
+@param[in] len length of data
+@param[in] mtype mtype of data
+@param[in] unsigned_type if the data is unsigned
+@return the integer value from the data */
+ib_uint64_t
+row_parse_int(
+ const byte* data,
+ ulint len,
+ ulint mtype,
+ bool unsigned_type)
+{
+ ib_uint64_t value = 0;
+
+ switch (mtype) {
+ case DATA_INT:
+
+ ut_a(len <= sizeof value);
+ value = mach_read_int_type(data, len, unsigned_type);
+ break;
+
+ case DATA_FLOAT:
+
+ ut_a(len == sizeof(float));
+ value = static_cast<ib_uint64_t>(mach_float_read(data));
+ break;
+
+ case DATA_DOUBLE:
+
+ ut_a(len == sizeof(double));
+ value = static_cast<ib_uint64_t>(mach_double_read(data));
+ break;
+
+ default:
+ ut_error;
+
+ }
+
+ if (!unsigned_type && static_cast<int64_t>(value) < 0) {
+ value = 0;
+ }
+
+ return(value);
+}
+
diff --git a/storage/innobase/include/row0sel.h b/storage/innobase/include/row0sel.h
index 7ca30815b53..d9c08243a91 100644
--- a/storage/innobase/include/row0sel.h
+++ b/storage/innobase/include/row0sel.h
@@ -1,6 +1,7 @@
/*****************************************************************************
-Copyright (c) 1997, 2017, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1997, 2017, Oracle and/or its affiliates.
+Copyright (c) 2017, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -26,22 +27,19 @@ Created 12/19/1997 Heikki Tuuri
#ifndef row0sel_h
#define row0sel_h
-#include "univ.i"
#include "data0data.h"
#include "que0types.h"
-#include "dict0types.h"
#include "trx0types.h"
+#include "read0types.h"
#include "row0types.h"
#include "que0types.h"
#include "pars0sym.h"
#include "btr0pcur.h"
-#include "read0read.h"
#include "row0mysql.h"
/*********************************************************************//**
Creates a select node struct.
-@return own: select node struct */
-UNIV_INTERN
+@return own: select node struct */
sel_node_t*
sel_node_create(
/*============*/
@@ -49,7 +47,6 @@ sel_node_create(
/*********************************************************************//**
Frees the memory private to a select node when a query graph is freed,
does not free the heap where the node was originally created. */
-UNIV_INTERN
void
sel_node_free_private(
/*==================*/
@@ -57,14 +54,13 @@ sel_node_free_private(
/*********************************************************************//**
Frees a prefetch buffer for a column, including the dynamically allocated
memory for data stored there. */
-UNIV_INTERN
void
sel_col_prefetch_buf_free(
/*======================*/
sel_buf_t* prefetch_buf); /*!< in, own: prefetch buffer */
/*********************************************************************//**
Gets the plan node for the nth table in a join.
-@return plan node */
+@return plan node */
UNIV_INLINE
plan_t*
sel_node_get_nth_plan(
@@ -74,15 +70,14 @@ sel_node_get_nth_plan(
/**********************************************************************//**
Performs a select step. This is a high-level function used in SQL execution
graphs.
-@return query thread to run next or NULL */
-UNIV_INTERN
+@return query thread to run next or NULL */
que_thr_t*
row_sel_step(
/*=========*/
que_thr_t* thr); /*!< in: query thread */
/**********************************************************************//**
Performs an execution step of an open or close cursor statement node.
-@return query thread to run next or NULL */
+@return query thread to run next or NULL */
UNIV_INLINE
que_thr_t*
open_step(
@@ -90,36 +85,37 @@ open_step(
que_thr_t* thr); /*!< in: query thread */
/**********************************************************************//**
Performs a fetch for a cursor.
-@return query thread to run next or NULL */
-UNIV_INTERN
+@return query thread to run next or NULL */
que_thr_t*
fetch_step(
/*=======*/
que_thr_t* thr); /*!< in: query thread */
-/****************************************************************//**
-Sample callback function for fetch that prints each row.
-@return always returns non-NULL */
-UNIV_INTERN
-void*
-row_fetch_print(
-/*============*/
- void* row, /*!< in: sel_node_t* */
- void* user_arg); /*!< in: not used */
/***********************************************************//**
Prints a row in a select result.
-@return query thread to run next or NULL */
-UNIV_INTERN
+@return query thread to run next or NULL */
que_thr_t*
row_printf_step(
/*============*/
que_thr_t* thr); /*!< in: query thread */
+
+/** Copy used fields from cached row.
+Copy cache record field by field, don't touch fields that
+are not covered by current key.
+@param[out] buf Where to copy the MySQL row.
+@param[in] cached_rec What to copy (in MySQL row format).
+@param[in] prebuilt prebuilt struct. */
+void
+row_sel_copy_cached_fields_for_mysql(
+ byte* buf,
+ const byte* cached_rec,
+ row_prebuilt_t* prebuilt);
+
/****************************************************************//**
Converts a key value stored in MySQL format to an Innobase dtuple. The last
field of the key value may be just a prefix of a fixed length field: hence
the parameter key_len. But currently we do not allow search keys where the
last field is only a prefix of the full key field len and print a warning if
such appears. */
-UNIV_INTERN
void
row_sel_convert_mysql_key_to_innobase(
/*==================================*/
@@ -139,21 +135,72 @@ row_sel_convert_mysql_key_to_innobase(
const byte* key_ptr, /*!< in: MySQL key value */
ulint key_len, /*!< in: MySQL key value length */
trx_t* trx); /*!< in: transaction */
-/********************************************************************//**
-Searches for rows in the database. This is used in the interface to
+
+
+/** Searches for rows in the database. This is used in the interface to
MySQL. This function opens a cursor, and also implements fetch next
and fetch prev. NOTE that if we do a search with a full key value
from a unique index (ROW_SEL_EXACT), then we will not store the cursor
position and fetch next or fetch prev must not be tried to the cursor!
+
+@param[out] buf buffer for the fetched row in MySQL format
+@param[in] mode search mode PAGE_CUR_L
+@param[in,out] prebuilt prebuilt struct for the table handler;
+ this contains the info to search_tuple,
+ index; if search tuple contains 0 field then
+ we position the cursor at start or the end of
+ index, depending on 'mode'
+@param[in] match_mode 0 or ROW_SEL_EXACT or ROW_SEL_EXACT_PREFIX
+@param[in] direction 0 or ROW_SEL_NEXT or ROW_SEL_PREV;
+ Note: if this is != 0, then prebuilt must has a
+ pcur with stored position! In opening of a
+ cursor 'direction' should be 0.
@return DB_SUCCESS, DB_RECORD_NOT_FOUND, DB_END_OF_INDEX, DB_DEADLOCK,
-DB_LOCK_TABLE_FULL, or DB_TOO_BIG_RECORD */
-UNIV_INTERN
+DB_LOCK_TABLE_FULL, DB_CORRUPTION, or DB_TOO_BIG_RECORD */
+UNIV_INLINE
dberr_t
row_search_for_mysql(
+ byte* buf,
+ page_cur_mode_t mode,
+ row_prebuilt_t* prebuilt,
+ ulint match_mode,
+ ulint direction)
+ MY_ATTRIBUTE((warn_unused_result));
+
+/** Searches for rows in the database using cursor.
+Function is mainly used for tables that are shared across connections and
+so it employs technique that can help re-construct the rows that
+transaction is suppose to see.
+It also has optimization such as pre-caching the rows, using AHI, etc.
+
+@param[out] buf buffer for the fetched row in MySQL format
+@param[in] mode search mode PAGE_CUR_L
+@param[in,out] prebuilt prebuilt struct for the table handler;
+ this contains the info to search_tuple,
+ index; if search tuple contains 0 field then
+ we position the cursor at start or the end of
+ index, depending on 'mode'
+@param[in] match_mode 0 or ROW_SEL_EXACT or ROW_SEL_EXACT_PREFIX
+@param[in] direction 0 or ROW_SEL_NEXT or ROW_SEL_PREV;
+ Note: if this is != 0, then prebuilt must has a
+ pcur with stored position! In opening of a
+ cursor 'direction' should be 0.
+@return DB_SUCCESS or error code */
+dberr_t
+row_search_mvcc(
+ byte* buf,
+ page_cur_mode_t mode,
+ row_prebuilt_t* prebuilt,
+ ulint match_mode,
+ ulint direction)
+ MY_ATTRIBUTE((warn_unused_result));
+
+/********************************************************************//**
+Count rows in a R-Tree leaf level.
+@return DB_SUCCESS if successful */
+dberr_t
+row_count_rtree_recs(
/*=================*/
- byte* buf, /*!< in/out: buffer for the fetched
- row in the MySQL format */
- ulint mode, /*!< in: search mode PAGE_CUR_L, ... */
row_prebuilt_t* prebuilt, /*!< in: prebuilt struct for the
table handle; this contains the info
of search_tuple, index; if search
@@ -161,35 +208,15 @@ row_search_for_mysql(
position the cursor at the start or
the end of the index, depending on
'mode' */
- ulint match_mode, /*!< in: 0 or ROW_SEL_EXACT or
- ROW_SEL_EXACT_PREFIX */
- ulint direction) /*!< in: 0 or ROW_SEL_NEXT or
- ROW_SEL_PREV; NOTE: if this is != 0,
- then prebuilt must have a pcur
- with stored position! In opening of a
- cursor 'direction' should be 0. */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-/*******************************************************************//**
-Checks if MySQL at the moment is allowed for this table to retrieve a
-consistent read result, or store it to the query cache.
-@return TRUE if storing or retrieving from the query cache is permitted */
-UNIV_INTERN
-ibool
-row_search_check_if_query_cache_permitted(
-/*======================================*/
- trx_t* trx, /*!< in: transaction object */
- const char* norm_name); /*!< in: concatenation of database name,
- '/' char, table name */
-/*******************************************************************//**
-Read the max AUTOINC value from an index.
-@return DB_SUCCESS if all OK else error code */
-UNIV_INTERN
-dberr_t
-row_search_max_autoinc(
-/*===================*/
- dict_index_t* index, /*!< in: index to search */
- const char* col_name, /*!< in: autoinc column name */
- ib_uint64_t* value) /*!< out: AUTOINC value read */
+ ulint* n_rows); /*!< out: number of entries
+ seen in the consistent read */
+
+/** Read the max AUTOINC value from an index.
+@param[in] index index starting with an AUTO_INCREMENT column
+@return the largest AUTO_INCREMENT value
+@retval 0 if no records were found */
+ib_uint64_t
+row_search_max_autoinc(dict_index_t* index)
MY_ATTRIBUTE((nonnull, warn_unused_result));
/** A structure for caching column values for prefetched rows */
@@ -245,7 +272,7 @@ struct plan_t{
for each field in the search
tuple */
dtuple_t* tuple; /*!< search tuple */
- ulint mode; /*!< search mode: PAGE_CUR_G, ... */
+ page_cur_mode_t mode; /*!< search mode: PAGE_CUR_G, ... */
ulint n_exact_match; /*!< number of first fields in
the search tuple which must be
exactly matched */
@@ -324,7 +351,7 @@ struct sel_node_t{
containing the search plan and the
search data structures */
que_node_t* search_cond; /*!< search condition */
- read_view_t* read_view; /*!< if the query is a non-locking
+ ReadView* read_view; /*!< if the query is a non-locking
consistent read, its read view is
placed here, otherwise NULL */
ibool consistent_read;/*!< TRUE if the select is a consistent,
@@ -372,9 +399,7 @@ struct fetch_node_t{
further rows and the cursor is
modified so (cursor % NOTFOUND) is
true. If it returns not-NULL,
- continue normally. See
- row_fetch_print() for an example
- (and a useful debugging tool). */
+ continue normally. */
};
/** Open or close cursor operation type */
@@ -414,8 +439,45 @@ enum row_sel_match_mode {
of a fixed length column) */
};
-#ifndef UNIV_NONINL
+#ifdef UNIV_DEBUG
+/** Convert a non-SQL-NULL field from Innobase format to MySQL format. */
+# define row_sel_field_store_in_mysql_format(dest,templ,idx,field,src,len) \
+ row_sel_field_store_in_mysql_format_func(dest,templ,idx,field,src,len)
+#else /* UNIV_DEBUG */
+/** Convert a non-SQL-NULL field from Innobase format to MySQL format. */
+# define row_sel_field_store_in_mysql_format(dest,templ,idx,field,src,len) \
+ row_sel_field_store_in_mysql_format_func(dest,templ,src,len)
+#endif /* UNIV_DEBUG */
+
+/**************************************************************//**
+Stores a non-SQL-NULL field in the MySQL format. The counterpart of this
+function is row_mysql_store_col_in_innobase_format() in row0mysql.cc. */
+
+void
+row_sel_field_store_in_mysql_format_func(
+/*=====================================*/
+ byte* dest, /*!< in/out: buffer where to store; NOTE
+ that BLOBs are not in themselves
+ stored here: the caller must allocate
+ and copy the BLOB into buffer before,
+ and pass the pointer to the BLOB in
+ 'data' */
+ const mysql_row_templ_t* templ,
+ /*!< in: MySQL column template.
+ Its following fields are referenced:
+ type, is_unsigned, mysql_col_len,
+ mbminlen, mbmaxlen */
+#ifdef UNIV_DEBUG
+ const dict_index_t* index,
+ /*!< in: InnoDB index */
+ ulint field_no,
+ /*!< in: templ->rec_field_no or
+ templ->clust_rec_field_no or
+ templ->icp_rec_field_no */
+#endif /* UNIV_DEBUG */
+ const byte* data, /*!< in: data to store */
+ ulint len); /*!< in: length of the data */
+
#include "row0sel.ic"
-#endif
#endif
diff --git a/storage/innobase/include/row0sel.ic b/storage/innobase/include/row0sel.ic
index 1585a41521e..7880605ca8f 100644
--- a/storage/innobase/include/row0sel.ic
+++ b/storage/innobase/include/row0sel.ic
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1997, 2009, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1997, 2014, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -27,7 +27,7 @@ Created 12/19/1997 Heikki Tuuri
/*********************************************************************//**
Gets the plan node for the nth table in a join.
-@return plan node */
+@return plan node */
UNIV_INLINE
plan_t*
sel_node_get_nth_plan(
@@ -55,7 +55,7 @@ sel_node_reset_cursor(
/**********************************************************************//**
Performs an execution step of an open or close cursor statement node.
-@return query thread to run next or NULL */
+@return query thread to run next or NULL */
UNIV_INLINE
que_thr_t*
open_step(
@@ -103,3 +103,36 @@ open_step(
return(thr);
}
+
+
+/** Searches for rows in the database. This is used in the interface to
+MySQL. This function opens a cursor, and also implements fetch next
+and fetch prev. NOTE that if we do a search with a full key value
+from a unique index (ROW_SEL_EXACT), then we will not store the cursor
+position and fetch next or fetch prev must not be tried to the cursor!
+
+@param[out] buf buffer for the fetched row in MySQL format
+@param[in] mode search mode PAGE_CUR_L
+@param[in,out] prebuilt prebuilt struct for the table handler;
+ this contains the info to search_tuple,
+ index; if search tuple contains 0 field then
+ we position the cursor at start or the end of
+ index, depending on 'mode'
+@param[in] match_mode 0 or ROW_SEL_EXACT or ROW_SEL_EXACT_PREFIX
+@param[in] direction 0 or ROW_SEL_NEXT or ROW_SEL_PREV;
+ Note: if this is != 0, then prebuilt must has a
+ pcur with stored position! In opening of a
+ cursor 'direction' should be 0.
+@return DB_SUCCESS, DB_RECORD_NOT_FOUND, DB_END_OF_INDEX, DB_DEADLOCK,
+DB_LOCK_TABLE_FULL, DB_CORRUPTION, or DB_TOO_BIG_RECORD */
+UNIV_INLINE
+dberr_t
+row_search_for_mysql(
+ byte* buf,
+ page_cur_mode_t mode,
+ row_prebuilt_t* prebuilt,
+ ulint match_mode,
+ ulint direction)
+{
+ return(row_search_mvcc(buf, mode, prebuilt, match_mode, direction));
+}
diff --git a/storage/innobase/include/row0trunc.h b/storage/innobase/include/row0trunc.h
new file mode 100644
index 00000000000..bd890fe7b73
--- /dev/null
+++ b/storage/innobase/include/row0trunc.h
@@ -0,0 +1,428 @@
+/*****************************************************************************
+
+Copyright (c) 2013, 2015, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2018, MariaDB Corporation.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/row0trunc.h
+TRUNCATE implementation
+
+Created 2013-04-25 Krunal Bauskar
+*******************************************************/
+
+#ifndef row0trunc_h
+#define row0trunc_h
+
+#include "row0mysql.h"
+#include "dict0boot.h"
+#include "fil0fil.h"
+#include "srv0start.h"
+
+#include <vector>
+
+/** The information of TRUNCATE log record.
+This class handles the recovery stage of TRUNCATE table. */
+class truncate_t {
+
+public:
+ /**
+ Constructor
+
+ @param old_table_id old table id assigned to table before truncate
+ @param new_table_id new table id that will be assigned to table
+ after truncate
+ @param dir_path directory path */
+ truncate_t(
+ table_id_t old_table_id,
+ table_id_t new_table_id,
+ const char* dir_path);
+
+ /**
+ Constructor
+
+ @param log_file_name parse the log file during recovery to populate
+ information related to table to truncate */
+ truncate_t(const char* log_file_name);
+
+ /**
+ Consturctor
+
+ @param space_id space in which table reisde
+ @param name table name
+ @param tablespace_flags tablespace flags use for recreating tablespace
+ @param log_flags page format flag
+ @param recv_lsn lsn of redo log record. */
+ truncate_t(
+ ulint space_id,
+ const char* name,
+ ulint tablespace_flags,
+ ulint log_flags,
+ lsn_t recv_lsn);
+
+ /** Destructor */
+ ~truncate_t();
+
+ /** The index information of MLOG_FILE_TRUNCATE redo record */
+ struct index_t {
+
+ /* Default copy constructor and destructor should be OK. */
+
+ index_t();
+
+ /**
+ Set the truncate log values for a compressed table.
+ @return DB_CORRUPTION or error code */
+ dberr_t set(const dict_index_t* index);
+
+ typedef std::vector<byte, ut_allocator<byte> > fields_t;
+
+ /** Index id */
+ index_id_t m_id;
+
+ /** Index type */
+ ulint m_type;
+
+ /** Root Page Number */
+ ulint m_root_page_no;
+
+ /** New Root Page Number.
+ Note: This field is not persisted to TRUNCATE log but used
+ during truncate table fix-up for updating SYS_XXXX tables. */
+ ulint m_new_root_page_no;
+
+ /** Number of index fields */
+ ulint m_n_fields;
+
+ /** DATA_TRX_ID column position. */
+ ulint m_trx_id_pos;
+
+ /** Compressed table field meta data, encode by
+ page_zip_fields_encode. Empty for non-compressed tables.
+ Should be NUL terminated. */
+ fields_t m_fields;
+ };
+
+ /**
+ @return the directory path, can be NULL */
+ const char* get_dir_path() const
+ {
+ return(m_dir_path);
+ }
+
+ /**
+ Register index information
+
+ @param index index information logged as part of truncate log. */
+ void add(index_t& index)
+ {
+ m_indexes.push_back(index);
+ }
+
+ /**
+ Add table to truncate post recovery.
+
+ @param ptr table information need to complete truncate of table. */
+ static void add(truncate_t* ptr)
+ {
+ s_tables.push_back(ptr);
+ }
+
+ /**
+ Clear registered index vector */
+ void clear()
+ {
+ m_indexes.clear();
+ }
+
+ /**
+ @return old table id of the table to truncate */
+ table_id_t old_table_id() const
+ {
+ return(m_old_table_id);
+ }
+
+ /**
+ @return new table id of the table to truncate */
+ table_id_t new_table_id() const
+ {
+ return(m_new_table_id);
+ }
+
+ /**
+ Update root page number in SYS_XXXX tables.
+
+ @param trx transaction object
+ @param table_id table id for which information needs to
+ be updated.
+ @param reserve_dict_mutex if TRUE, acquire/release
+ dict_sys->mutex around call to pars_sql.
+ @param mark_index_corrupted if true, then mark index corrupted
+ @return DB_SUCCESS or error code */
+ dberr_t update_root_page_no(
+ trx_t* trx,
+ table_id_t table_id,
+ ibool reserve_dict_mutex,
+ bool mark_index_corrupted) const;
+
+ /** Create an index for a table.
+ @param[in] table_name table name, for which to create
+ the index
+ @param[in] space_id space id where we have to
+ create the index
+ @param[in] page_size page size of the .ibd file
+ @param[in] index_type type of index to truncate
+ @param[in] index_id id of index to truncate
+ @param[in] btr_redo_create_info control info for ::btr_create()
+ @param[in,out] mtr mini-transaction covering the
+ create index
+ @return root page no or FIL_NULL on failure */
+ ulint create_index(
+ const char* table_name,
+ ulint space_id,
+ const page_size_t& page_size,
+ ulint index_type,
+ index_id_t index_id,
+ const btr_create_t& btr_redo_create_info,
+ mtr_t* mtr) const;
+
+ /** Create the indexes for a table
+ @param[in] table_name table name, for which to create the
+ indexes
+ @param[in] space_id space id where we have to create the
+ indexes
+ @param[in] page_size page size of the .ibd file
+ @param[in] flags tablespace flags
+ @param[in] format_flags page format flags
+ @return DB_SUCCESS or error code. */
+ dberr_t create_indexes(
+ const char* table_name,
+ ulint space_id,
+ const page_size_t& page_size,
+ ulint flags,
+ ulint format_flags);
+
+ /** Check if index has been modified since TRUNCATE log snapshot
+ was recorded.
+ @param space_id space_id where table/indexes resides.
+ @return true if modified else false */
+ bool is_index_modified_since_logged(
+ ulint space_id,
+ ulint root_page_no) const;
+
+ /** Drop indexes for a table.
+ @param space_id space_id where table/indexes resides.
+ @return DB_SUCCESS or error code. */
+ void drop_indexes(ulint space_id) const;
+
+ /**
+ Parses log record during recovery
+ @param start_ptr buffer containing log body to parse
+ @param end_ptr buffer end
+
+ @return DB_SUCCESS or error code */
+ dberr_t parse(
+ byte* start_ptr,
+ const byte* end_ptr);
+
+ /** Parse MLOG_TRUNCATE log record from REDO log file during recovery.
+ @param[in,out] start_ptr buffer containing log body to parse
+ @param[in] end_ptr buffer end
+ @param[in] space_id tablespace identifier
+ @return parsed upto or NULL. */
+ static byte* parse_redo_entry(
+ byte* start_ptr,
+ const byte* end_ptr,
+ ulint space_id);
+
+ /**
+ Write a log record for truncating a single-table tablespace.
+
+ @param start_ptr buffer to write log record
+ @param end_ptr buffer end
+ @param space_id space id
+ @param tablename the table name in the usual
+ databasename/tablename format of InnoDB
+ @param flags tablespace flags
+ @param format_flags page format
+ @param lsn lsn while logging */
+ dberr_t write(
+ byte* start_ptr,
+ byte* end_ptr,
+ ulint space_id,
+ const char* tablename,
+ ulint flags,
+ ulint format_flags,
+ lsn_t lsn) const;
+
+ /**
+ @return number of indexes parsed from the truncate log record */
+ size_t indexes() const;
+
+ /**
+ Truncate a single-table tablespace. The tablespace must be cached
+ in the memory cache.
+
+ Note: This is defined in fil0fil.cc because it needs to access some
+ types that are local to that file.
+
+ @param space_id space id
+ @param dir_path directory path
+ @param tablename the table name in the usual
+ databasename/tablename format of InnoDB
+ @param flags tablespace flags
+ @param default_size if true, truncate to default size if tablespace
+ is being newly re-initialized.
+ @return DB_SUCCESS or error */
+ static dberr_t truncate(
+ ulint space_id,
+ const char* dir_path,
+ const char* tablename,
+ ulint flags,
+ bool default_size);
+
+ /**
+ Fix the table truncate by applying information parsed from TRUNCATE log.
+ Fix-up includes re-creating table (drop and re-create indexes)
+ @return error code or DB_SUCCESS */
+ static dberr_t fixup_tables_in_system_tablespace();
+
+ /**
+ Fix the table truncate by applying information parsed from TRUNCATE log.
+ Fix-up includes re-creating tablespace.
+ @return error code or DB_SUCCESS */
+ static dberr_t fixup_tables_in_non_system_tablespace();
+
+ /**
+ Check whether a tablespace was truncated during recovery
+ @param space_id tablespace id to check
+ @return true if the tablespace was truncated */
+ static bool is_tablespace_truncated(ulint space_id);
+
+ /** Was tablespace truncated (on crash before checkpoint).
+ If the MLOG_TRUNCATE redo-record is still available then tablespace
+ was truncated and checkpoint is yet to happen.
+ @param[in] space_id tablespace id to check.
+ @return true if tablespace was truncated. */
+ static bool was_tablespace_truncated(ulint space_id);
+
+ /** Get the lsn associated with space.
+ @param[in] space_id tablespace id to check.
+ @return associated lsn. */
+ static lsn_t get_truncated_tablespace_init_lsn(ulint space_id);
+
+private:
+ typedef std::vector<index_t, ut_allocator<index_t> > indexes_t;
+
+ /** Space ID of tablespace */
+ ulint m_space_id;
+
+ /** ID of table that is being truncated. */
+ table_id_t m_old_table_id;
+
+ /** New ID that will be assigned to table on truncation. */
+ table_id_t m_new_table_id;
+
+ /** Data dir path of tablespace */
+ char* m_dir_path;
+
+ /** Table name */
+ char* m_tablename;
+
+ /** Tablespace Flags */
+ ulint m_tablespace_flags;
+
+ /** Format flags (log flags; stored in page-no field of header) */
+ ulint m_format_flags;
+
+ /** Index meta-data */
+ indexes_t m_indexes;
+
+ /** LSN of TRUNCATE log record. */
+ lsn_t m_log_lsn;
+
+ /** Log file name. */
+ char* m_log_file_name;
+
+ /** Encryption information of the table */
+ fil_encryption_t m_encryption;
+ uint32_t m_key_id;
+
+ /** Vector of tables to truncate. */
+ typedef std::vector<truncate_t*, ut_allocator<truncate_t*> >
+ tables_t;
+
+ /** Information about tables to truncate post recovery */
+ static tables_t s_tables;
+
+ /** Information about truncated table
+ This is case when truncate is complete but checkpoint hasn't. */
+ typedef std::map<ulint, lsn_t> truncated_tables_t;
+ static truncated_tables_t s_truncated_tables;
+
+public:
+ /** If true then fix-up of table is active and so while creating
+ index instead of grabbing information from dict_index_t, grab it
+ from parsed truncate log record. */
+ static bool s_fix_up_active;
+};
+
+/**
+Parse truncate log file. */
+class TruncateLogParser {
+
+public:
+
+ /**
+ Scan and Parse truncate log files.
+
+ @param dir_path look for log directory in following path
+ @return DB_SUCCESS or error code. */
+ static dberr_t scan_and_parse(
+ const char* dir_path);
+
+private:
+ typedef std::vector<char*, ut_allocator<char*> >
+ trunc_log_files_t;
+
+private:
+ /**
+ Scan to find out truncate log file from the given directory path.
+
+ @param dir_path look for log directory in following path.
+ @param log_files cache to hold truncate log file name found.
+ @return DB_SUCCESS or error code. */
+ static dberr_t scan(
+ const char* dir_path,
+ trunc_log_files_t& log_files);
+
+ /**
+ Parse the log file and populate table to truncate information.
+ (Add this table to truncate information to central vector that is then
+ used by truncate fix-up routine to fix-up truncate action of the table.)
+
+ @param log_file_name log file to parse
+ @return DB_SUCCESS or error code. */
+ static dberr_t parse(
+ const char* log_file_name);
+};
+
+/** MySQL 5.7 TRUNCATE TABLE.
+@param table table being truncated
+@param trx transaction covering the truncate
+@return error code or DB_SUCCESS */
+dberr_t row_truncate_table_for_mysql(dict_table_t* table, trx_t* trx);
+#endif /* row0trunc_h */
diff --git a/storage/innobase/include/row0types.h b/storage/innobase/include/row0types.h
index cb0d280e78d..5f1e46c6a4d 100644
--- a/storage/innobase/include/row0types.h
+++ b/storage/innobase/include/row0types.h
@@ -1,6 +1,7 @@
/*****************************************************************************
Copyright (c) 1996, 2012, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2018, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -52,4 +53,98 @@ struct row_log_t;
/* MySQL data types */
struct TABLE;
+/** Purge virtual column node information. */
+struct purge_vcol_info_t
+{
+private:
+ /** Is there a possible need to evaluate virtual columns? */
+ bool requested;
+ /** Do we have to evaluate virtual columns (using mariadb_table)? */
+ bool used;
+
+ /** True if it is used for the first time. */
+ bool first_use;
+
+ /** MariaDB table opened for virtual column computation. */
+ TABLE* mariadb_table;
+
+public:
+ /** Default constructor */
+ purge_vcol_info_t() :
+ requested(false), used(false), first_use(false),
+ mariadb_table(NULL)
+ {}
+ /** Reset the state. */
+ void reset()
+ {
+ requested = false;
+ used = false;
+ first_use = false;
+ mariadb_table = NULL;
+ }
+
+ /** Validate the virtual column information.
+ @return true if the mariadb table opened successfully
+ or doesn't try to calculate virtual column. */
+ bool validate() const { return !used || mariadb_table; }
+
+ /** @return the table handle for evaluating virtual columns */
+ TABLE* table() const { return mariadb_table; }
+
+ /** Set the table handle for evaluating virtual columns.
+ @param[in] table table handle */
+ void set_table(TABLE* table)
+ {
+ ut_ad(!table || is_first_fetch());
+ mariadb_table = table;
+ }
+
+ /** Note that virtual column information may be needed. */
+ void set_requested()
+ {
+ ut_ad(!used);
+ ut_ad(!first_use);
+ ut_ad(!mariadb_table);
+ requested = true;
+ }
+
+ /** @return whether the virtual column information may be needed */
+ bool is_requested() const { return requested; }
+
+ /** Note that the virtual column information is needed. */
+ void set_used()
+ {
+ ut_ad(requested);
+
+ if (first_use) {
+ first_use = false;
+ ut_ad(used);
+ return;
+ }
+
+ if (!used) {
+ first_use = used = true;
+ }
+ }
+
+ /** @return whether the virtual column information is needed */
+ bool is_used() const
+ {
+ ut_ad(!first_use || used);
+ ut_ad(!used || requested);
+ ut_ad(used || !mariadb_table);
+ return used;
+ }
+
+ /** Check whether it fetches mariadb table for the first time.
+ @return true if first time tries to open mariadb table. */
+ bool is_first_fetch() const
+ {
+ ut_ad(!first_use || used);
+ ut_ad(!used || requested);
+ ut_ad(used || !mariadb_table);
+ return first_use;
+ }
+};
+
#endif
diff --git a/storage/innobase/include/row0uins.h b/storage/innobase/include/row0uins.h
index b36f75c28f4..a98779697f7 100644
--- a/storage/innobase/include/row0uins.h
+++ b/storage/innobase/include/row0uins.h
@@ -1,6 +1,7 @@
/*****************************************************************************
Copyright (c) 1997, 2016, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2017, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -26,9 +27,7 @@ Created 2/25/1997 Heikki Tuuri
#ifndef row0uins_h
#define row0uins_h
-#include "univ.i"
#include "data0data.h"
-#include "dict0types.h"
#include "trx0types.h"
#include "que0types.h"
#include "row0types.h"
@@ -40,15 +39,12 @@ the same clustered index unique key did not have any record, even delete
marked, at the time of the insert. InnoDB is eager in a rollback:
if it figures out that an index record will be removed in the purge
anyway, it will remove it in the rollback.
-@return DB_SUCCESS */
-UNIV_INTERN
+@return DB_SUCCESS */
dberr_t
row_undo_ins(
/*=========*/
- undo_node_t* node) /*!< in: row undo node */
+ undo_node_t* node, /*!< in: row undo node */
+ que_thr_t* thr) /*!< in: query thread */
MY_ATTRIBUTE((nonnull, warn_unused_result));
-#ifndef UNIV_NONINL
-#include "row0uins.ic"
-#endif
#endif
diff --git a/storage/innobase/include/row0uins.ic b/storage/innobase/include/row0uins.ic
deleted file mode 100644
index a9cd2f81159..00000000000
--- a/storage/innobase/include/row0uins.ic
+++ /dev/null
@@ -1,25 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1997, 2009, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/row0uins.ic
-Fresh insert undo
-
-Created 2/25/1997 Heikki Tuuri
-*******************************************************/
-
diff --git a/storage/innobase/include/row0umod.h b/storage/innobase/include/row0umod.h
index cfaa25ff528..5032e10351b 100644
--- a/storage/innobase/include/row0umod.h
+++ b/storage/innobase/include/row0umod.h
@@ -1,6 +1,7 @@
/*****************************************************************************
Copyright (c) 1997, 2016, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2017, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -26,9 +27,7 @@ Created 2/27/1997 Heikki Tuuri
#ifndef row0umod_h
#define row0umod_h
-#include "univ.i"
#include "data0data.h"
-#include "dict0types.h"
#include "trx0types.h"
#include "que0types.h"
#include "row0types.h"
@@ -36,17 +35,12 @@ Created 2/27/1997 Heikki Tuuri
/***********************************************************//**
Undoes a modify operation on a row of a table.
-@return DB_SUCCESS or error code */
-UNIV_INTERN
+@return DB_SUCCESS or error code */
dberr_t
row_undo_mod(
/*=========*/
undo_node_t* node, /*!< in: row undo node */
que_thr_t* thr) /*!< in: query thread */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-
-#ifndef UNIV_NONINL
-#include "row0umod.ic"
-#endif
+ MY_ATTRIBUTE((warn_unused_result));
#endif
diff --git a/storage/innobase/include/row0umod.ic b/storage/innobase/include/row0umod.ic
deleted file mode 100644
index 0b2a59d2095..00000000000
--- a/storage/innobase/include/row0umod.ic
+++ /dev/null
@@ -1,24 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1997, 2009, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/row0umod.ic
-Undo modify of a row
-
-Created 2/27/1997 Heikki Tuuri
-*******************************************************/
diff --git a/storage/innobase/include/row0undo.h b/storage/innobase/include/row0undo.h
index b0e57c9f611..a461b96b919 100644
--- a/storage/innobase/include/row0undo.h
+++ b/storage/innobase/include/row0undo.h
@@ -1,6 +1,7 @@
/*****************************************************************************
-Copyright (c) 1997, 2009, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1997, 2016, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2017, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -26,20 +27,15 @@ Created 1/8/1997 Heikki Tuuri
#ifndef row0undo_h
#define row0undo_h
-#include "univ.i"
-#include "mtr0mtr.h"
#include "trx0sys.h"
#include "btr0types.h"
#include "btr0pcur.h"
-#include "dict0types.h"
-#include "trx0types.h"
#include "que0types.h"
#include "row0types.h"
/********************************************************************//**
Creates a row undo node to a query graph.
-@return own: undo node */
-UNIV_INTERN
+@return own: undo node */
undo_node_t*
row_undo_node_create(
/*=================*/
@@ -51,18 +47,17 @@ Looks for the clustered index record when node has the row reference.
The pcur in node is used in the search. If found, stores the row to node,
and stores the position of pcur, and detaches it. The pcur must be closed
by the caller in any case.
-@return TRUE if found; NOTE the node->pcur must be closed by the
+@return true if found; NOTE the node->pcur must be closed by the
caller, regardless of the return value */
-UNIV_INTERN
-ibool
+bool
row_undo_search_clust_to_pcur(
/*==========================*/
- undo_node_t* node); /*!< in: row undo node */
+ undo_node_t* node) /*!< in/out: row undo node */
+ MY_ATTRIBUTE((warn_unused_result));
/***********************************************************//**
Undoes a row operation in a table. This is a high-level function used
in SQL execution graphs.
-@return query thread to run next or NULL */
-UNIV_INTERN
+@return query thread to run next or NULL */
que_thr_t*
row_undo_step(
/*==========*/
@@ -127,9 +122,4 @@ struct undo_node_t{
on a row */
};
-
-#ifndef UNIV_NONINL
-#include "row0undo.ic"
-#endif
-
#endif
diff --git a/storage/innobase/include/row0undo.ic b/storage/innobase/include/row0undo.ic
deleted file mode 100644
index f28893e402d..00000000000
--- a/storage/innobase/include/row0undo.ic
+++ /dev/null
@@ -1,24 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1997, 2009, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/row0undo.ic
-Row undo
-
-Created 1/8/1997 Heikki Tuuri
-*******************************************************/
diff --git a/storage/innobase/include/row0upd.h b/storage/innobase/include/row0upd.h
index 5caedc0ba7a..cca86590f74 100644
--- a/storage/innobase/include/row0upd.h
+++ b/storage/innobase/include/row0upd.h
@@ -1,7 +1,7 @@
/*****************************************************************************
-Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2018, MariaDB Corporation.
+Copyright (c) 1996, 2018, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2018, 2020, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -27,22 +27,18 @@ Created 12/27/1996 Heikki Tuuri
#ifndef row0upd_h
#define row0upd_h
-#include "univ.i"
#include "data0data.h"
+#include "rem0types.h"
#include "row0types.h"
#include "btr0types.h"
-#include "dict0types.h"
#include "trx0types.h"
-
-#ifndef UNIV_HOTBACKUP
-# include "btr0pcur.h"
-# include "que0types.h"
-# include "pars0types.h"
-#endif /* !UNIV_HOTBACKUP */
+#include "btr0pcur.h"
+#include "que0types.h"
+#include "pars0types.h"
/*********************************************************************//**
Creates an update vector object.
-@return own: update vector object */
+@return own: update vector object */
UNIV_INLINE
upd_t*
upd_create(
@@ -52,7 +48,7 @@ upd_create(
/*********************************************************************//**
Returns the number of fields in the update vector == number of columns
to be updated by an update vector.
-@return number of fields */
+@return number of fields */
UNIV_INLINE
ulint
upd_get_n_fields(
@@ -61,7 +57,7 @@ upd_get_n_fields(
#ifdef UNIV_DEBUG
/*********************************************************************//**
Returns the nth field of an update vector.
-@return update vector field */
+@return update vector field */
UNIV_INLINE
upd_field_t*
upd_get_nth_field(
@@ -71,7 +67,7 @@ upd_get_nth_field(
#else
# define upd_get_nth_field(update, n) ((update)->fields + (n))
#endif
-#ifndef UNIV_HOTBACKUP
+
/*********************************************************************//**
Sets an index field number to be updated by an update vector field. */
UNIV_INLINE
@@ -81,23 +77,33 @@ upd_field_set_field_no(
upd_field_t* upd_field, /*!< in: update vector field */
ulint field_no, /*!< in: field number in a clustered
index */
- dict_index_t* index, /*!< in: index */
- trx_t* trx); /*!< in: transaction */
+ dict_index_t* index);
+
+/** set field number to a update vector field, marks this field is updated
+@param[in,out] upd_field update vector field
+@param[in] field_no virtual column sequence num
+@param[in] index index */
+UNIV_INLINE
+void
+upd_field_set_v_field_no(
+ upd_field_t* upd_field,
+ ulint field_no,
+ dict_index_t* index);
/*********************************************************************//**
Returns a field of an update vector by field_no.
-@return update vector field, or NULL */
+@return update vector field, or NULL */
UNIV_INLINE
const upd_field_t*
upd_get_field_by_field_no(
/*======================*/
const upd_t* update, /*!< in: update vector */
- ulint no) /*!< in: field_no */
- MY_ATTRIBUTE((nonnull, pure));
+ ulint no, /*!< in: field_no */
+ bool is_virtual) /*!< in: if it is a virtual column */
+ MY_ATTRIBUTE((warn_unused_result));
/*********************************************************************//**
Writes into the redo log the values of trx id and roll ptr and enough info
to determine their positions within a clustered index record.
-@return new pointer to mlog */
-UNIV_INTERN
+@return new pointer to mlog */
byte*
row_upd_write_sys_vals_to_log(
/*==========================*/
@@ -118,12 +124,11 @@ row_upd_rec_sys_fields(
page_zip_des_t* page_zip,/*!< in/out: compressed page whose
uncompressed part will be updated, or NULL */
dict_index_t* index, /*!< in: clustered index */
- const ulint* offsets,/*!< in: rec_get_offsets(rec, index) */
+ const offset_t* offsets,/*!< in: rec_get_offsets(rec, index) */
const trx_t* trx, /*!< in: transaction */
roll_ptr_t roll_ptr);/*!< in: DB_ROLL_PTR to the undo log */
/*********************************************************************//**
Sets the trx id or roll ptr field of a clustered index entry. */
-UNIV_INTERN
void
row_upd_index_entry_sys_field(
/*==========================*/
@@ -136,15 +141,13 @@ row_upd_index_entry_sys_field(
ib_uint64_t val); /*!< in: value to write */
/*********************************************************************//**
Creates an update node for a query graph.
-@return own: update node */
-UNIV_INTERN
+@return own: update node */
upd_node_t*
upd_node_create(
/*============*/
mem_heap_t* heap); /*!< in: mem heap where created */
/***********************************************************//**
Writes to the redo log the new values of the fields occurring in the index. */
-UNIV_INTERN
void
row_upd_index_write_log(
/*====================*/
@@ -159,79 +162,82 @@ Returns TRUE if row update changes size of some field in index or if some
field to be updated is stored externally in rec or update.
@return TRUE if the update changes the size of some field in index or
the field is external in rec or update */
-UNIV_INTERN
ibool
row_upd_changes_field_size_or_external(
/*===================================*/
dict_index_t* index, /*!< in: index */
- const ulint* offsets,/*!< in: rec_get_offsets(rec, index) */
+ const offset_t* offsets,/*!< in: rec_get_offsets(rec, index) */
const upd_t* update);/*!< in: update vector */
/***********************************************************//**
Returns true if row update contains disowned external fields.
@return true if the update contains disowned external fields. */
-UNIV_INTERN
bool
row_upd_changes_disowned_external(
/*==============================*/
const upd_t* update) /*!< in: update vector */
MY_ATTRIBUTE((nonnull, warn_unused_result));
-#endif /* !UNIV_HOTBACKUP */
/***********************************************************//**
Replaces the new column values stored in the update vector to the
record given. No field size changes are allowed. This function is
usually invoked on a clustered index. The only use case for a
secondary index is row_ins_sec_index_entry_by_modify() or its
counterpart in ibuf_insert_to_index_page(). */
-UNIV_INTERN
void
row_upd_rec_in_place(
/*=================*/
rec_t* rec, /*!< in/out: record where replaced */
dict_index_t* index, /*!< in: the index the record belongs to */
- const ulint* offsets,/*!< in: array returned by rec_get_offsets() */
+ const offset_t* offsets,/*!< in: array returned by rec_get_offsets() */
const upd_t* update, /*!< in: update vector */
page_zip_des_t* page_zip);/*!< in: compressed page with enough space
available, or NULL */
-#ifndef UNIV_HOTBACKUP
+
/***************************************************************//**
Builds an update vector from those fields which in a secondary index entry
differ from a record that has the equal ordering fields. NOTE: we compare
the fields as binary strings!
-@return own: update vector of differing fields */
-UNIV_INTERN
+@return own: update vector of differing fields */
upd_t*
row_upd_build_sec_rec_difference_binary(
/*====================================*/
const rec_t* rec, /*!< in: secondary index record */
dict_index_t* index, /*!< in: index */
- const ulint* offsets,/*!< in: rec_get_offsets(rec, index) */
+ const offset_t* offsets,/*!< in: rec_get_offsets(rec, index) */
const dtuple_t* entry, /*!< in: entry to insert */
mem_heap_t* heap) /*!< in: memory heap from which allocated */
MY_ATTRIBUTE((warn_unused_result, nonnull));
-/***************************************************************//**
-Builds an update vector from those fields, excluding the roll ptr and
+/** Builds an update vector from those fields, excluding the roll ptr and
trx id fields, which in an index entry differ from a record that has
the equal ordering fields. NOTE: we compare the fields as binary strings!
+@param[in] index clustered index
+@param[in] entry clustered index entry to insert
+@param[in] rec clustered index record
+@param[in] offsets rec_get_offsets(rec,index), or NULL
+@param[in] no_sys skip the system columns
+ DB_TRX_ID and DB_ROLL_PTR
+@param[in] trx transaction (for diagnostics),
+ or NULL
+@param[in] heap memory heap from which allocated
+@param[in,out] mysql_table NULL, or mysql table object when
+ user thread invokes dml
+@param[out] error error number in case of failure
@return own: update vector of differing fields, excluding roll ptr and
trx id */
-UNIV_INTERN
-const upd_t*
+upd_t*
row_upd_build_difference_binary(
-/*============================*/
- dict_index_t* index, /*!< in: clustered index */
- const dtuple_t* entry, /*!< in: entry to insert */
- const rec_t* rec, /*!< in: clustered index record */
- const ulint* offsets,/*!< in: rec_get_offsets(rec,index), or NULL */
- bool no_sys, /*!< in: skip the system columns
- DB_TRX_ID and DB_ROLL_PTR */
- trx_t* trx, /*!< in: transaction (for diagnostics),
- or NULL */
- mem_heap_t* heap) /*!< in: memory heap from which allocated */
- MY_ATTRIBUTE((nonnull(1,2,3,7), warn_unused_result));
+ dict_index_t* index,
+ const dtuple_t* entry,
+ const rec_t* rec,
+ const offset_t* offsets,
+ bool no_sys,
+ trx_t* trx,
+ mem_heap_t* heap,
+ TABLE* mysql_table,
+ dberr_t* error)
+ MY_ATTRIBUTE((nonnull(1,2,3,7,9), warn_unused_result));
/***********************************************************//**
Replaces the new column values stored in the update vector to the index entry
given. */
-UNIV_INTERN
void
row_upd_index_replace_new_col_vals_index_pos(
/*=========================================*/
@@ -254,7 +260,6 @@ row_upd_index_replace_new_col_vals_index_pos(
/***********************************************************//**
Replaces the new column values stored in the update vector to the index entry
given. */
-UNIV_INTERN
void
row_upd_index_replace_new_col_vals(
/*===============================*/
@@ -272,7 +277,6 @@ row_upd_index_replace_new_col_vals(
MY_ATTRIBUTE((nonnull));
/***********************************************************//**
Replaces the new column values stored in the update vector. */
-UNIV_INTERN
void
row_upd_replace(
/*============*/
@@ -287,6 +291,23 @@ row_upd_replace(
const upd_t* update, /*!< in: an update vector built for the
clustered index */
mem_heap_t* heap); /*!< in: memory heap */
+/** Replaces the virtual column values stored in a dtuple with that of
+a update vector.
+@param[in,out] row dtuple whose column to be updated
+@param[in] table table
+@param[in] update an update vector built for the clustered index
+@param[in] upd_new update to new or old value
+@param[in,out] undo_row undo row (if needs to be updated)
+@param[in] ptr remaining part in update undo log */
+void
+row_upd_replace_vcol(
+ dtuple_t* row,
+ const dict_table_t* table,
+ const upd_t* update,
+ bool upd_new,
+ dtuple_t* undo_row,
+ const byte* ptr);
+
/***********************************************************//**
Checks if an update vector changes an ordering field of an index record.
@@ -294,7 +315,6 @@ This function is fast if the update vector is short or the number of ordering
fields in the index is small. Otherwise, this can be quadratic.
NOTE: we compare the fields as binary strings!
@return TRUE if update vector changes an ordering field in the index record */
-UNIV_INTERN
ibool
row_upd_changes_ord_field_binary_func(
/*==================================*/
@@ -309,21 +329,22 @@ row_upd_changes_ord_field_binary_func(
row and the data values in update are not
known when this function is called, e.g., at
compile time */
- const row_ext_t*ext) /*!< NULL, or prefixes of the externally
+ const row_ext_t*ext, /*!< NULL, or prefixes of the externally
stored columns in the old row */
+ ulint flag) /*!< in: ROW_BUILD_NORMAL,
+ ROW_BUILD_FOR_PURGE or ROW_BUILD_FOR_UNDO */
MY_ATTRIBUTE((nonnull(1,2), warn_unused_result));
#ifdef UNIV_DEBUG
# define row_upd_changes_ord_field_binary(index,update,thr,row,ext) \
- row_upd_changes_ord_field_binary_func(index,update,thr,row,ext)
+ row_upd_changes_ord_field_binary_func(index,update,thr,row,ext,0)
#else /* UNIV_DEBUG */
# define row_upd_changes_ord_field_binary(index,update,thr,row,ext) \
- row_upd_changes_ord_field_binary_func(index,update,row,ext)
+ row_upd_changes_ord_field_binary_func(index,update,row,ext,0)
#endif /* UNIV_DEBUG */
/***********************************************************//**
Checks if an FTS indexed column is affected by an UPDATE.
@return offset within fts_t::indexes if FTS indexed column updated else
ULINT_UNDEFINED */
-UNIV_INTERN
ulint
row_upd_changes_fts_column(
/*=======================*/
@@ -332,7 +353,6 @@ row_upd_changes_fts_column(
/***********************************************************//**
Checks if an FTS Doc ID column is affected by an UPDATE.
@return whether Doc ID column is affected */
-UNIV_INTERN
bool
row_upd_changes_doc_id(
/*===================*/
@@ -346,7 +366,6 @@ fields in the index is small. Otherwise, this can be quadratic.
NOTE: we compare the fields as binary strings!
@return TRUE if update vector may change an ordering field in an index
record */
-UNIV_INTERN
ibool
row_upd_changes_some_index_ord_field_binary(
/*========================================*/
@@ -355,47 +374,42 @@ row_upd_changes_some_index_ord_field_binary(
/***********************************************************//**
Updates a row in a table. This is a high-level function used
in SQL execution graphs.
-@return query thread to run next or NULL */
-UNIV_INTERN
+@return query thread to run next or NULL */
que_thr_t*
row_upd_step(
/*=========*/
que_thr_t* thr); /*!< in: query thread */
-#endif /* !UNIV_HOTBACKUP */
/*********************************************************************//**
Parses the log data of system field values.
-@return log data end or NULL */
-UNIV_INTERN
+@return log data end or NULL */
byte*
row_upd_parse_sys_vals(
/*===================*/
- byte* ptr, /*!< in: buffer */
- byte* end_ptr,/*!< in: buffer end */
+ const byte* ptr, /*!< in: buffer */
+ const byte* end_ptr,/*!< in: buffer end */
ulint* pos, /*!< out: TRX_ID position in record */
trx_id_t* trx_id, /*!< out: trx id */
roll_ptr_t* roll_ptr);/*!< out: roll ptr */
/*********************************************************************//**
Updates the trx id and roll ptr field in a clustered index record in database
recovery. */
-UNIV_INTERN
void
row_upd_rec_sys_fields_in_recovery(
/*===============================*/
rec_t* rec, /*!< in/out: record */
page_zip_des_t* page_zip,/*!< in/out: compressed page, or NULL */
- const ulint* offsets,/*!< in: array returned by rec_get_offsets() */
+ const offset_t* offsets,/*!< in: array returned by rec_get_offsets() */
ulint pos, /*!< in: TRX_ID position in rec */
trx_id_t trx_id, /*!< in: transaction id */
roll_ptr_t roll_ptr);/*!< in: roll ptr of the undo log record */
/*********************************************************************//**
Parses the log data written by row_upd_index_write_log.
-@return log data end or NULL */
-UNIV_INTERN
+@return log data end or NULL */
byte*
row_upd_index_parse(
/*================*/
- byte* ptr, /*!< in: buffer */
- byte* end_ptr,/*!< in: buffer end */
+ const byte* ptr, /*!< in: buffer */
+ const byte* end_ptr,/*!< in: buffer end */
mem_heap_t* heap, /*!< in: memory heap where update vector is
built */
upd_t** update_out);/*!< out: update vector */
@@ -407,8 +421,9 @@ struct upd_field_t{
the clustered index, but in updating
a secondary index record in btr0cur.cc
this is the position in the secondary
- index */
-#ifndef UNIV_HOTBACKUP
+ index. If this field is a virtual
+ column, then field_no represents
+ the nth virtual column in the table */
unsigned orig_len:16; /*!< original length of the locally
stored part of an externally stored
column, or 0 */
@@ -416,19 +431,64 @@ struct upd_field_t{
value: it refers to column values and
constants in the symbol table of the
query graph */
-#endif /* !UNIV_HOTBACKUP */
dfield_t new_val; /*!< new value for the column */
+ dfield_t* old_v_val; /*!< old value for the virtual column */
};
+
+/* check whether an update field is on virtual column */
+#define upd_fld_is_virtual_col(upd_fld) \
+ (((upd_fld)->new_val.type.prtype & DATA_VIRTUAL) == DATA_VIRTUAL)
+
+/* set DATA_VIRTUAL bit on update field to show it is a virtual column */
+#define upd_fld_set_virtual_col(upd_fld) \
+ ((upd_fld)->new_val.type.prtype |= DATA_VIRTUAL)
+
/* Update vector structure */
struct upd_t{
+ mem_heap_t* heap; /*!< heap from which memory allocated */
ulint info_bits; /*!< new value of info bits to record;
default is 0 */
+ dtuple_t* old_vrow; /*!< pointer to old row, used for
+ virtual column update now */
ulint n_fields; /*!< number of update fields */
upd_field_t* fields; /*!< array of update fields */
+
+ /** Append an update field to the end of array
+ @param[in] field an update field */
+ void append(const upd_field_t& field)
+ {
+ fields[n_fields++] = field;
+ }
+
+ /** Determine if the given field_no is modified.
+ @return true if modified, false otherwise. */
+ bool is_modified(const ulint field_no) const
+ {
+ for (ulint i = 0; i < n_fields; ++i) {
+ if (field_no == fields[i].field_no) {
+ return(true);
+ }
+ }
+ return(false);
+ }
+
+#ifdef UNIV_DEBUG
+ bool validate() const
+ {
+ for (ulint i = 0; i < n_fields; ++i) {
+ dfield_t* field = &fields[i].new_val;
+ if (dfield_is_ext(field)) {
+ ut_ad(dfield_get_len(field)
+ >= BTR_EXTERN_FIELD_REF_SIZE);
+ }
+ }
+ return(true);
+ }
+#endif // UNIV_DEBUG
+
};
-#ifndef UNIV_HOTBACKUP
/* Update node structure which also implements the delete operation
of a row */
@@ -447,8 +507,9 @@ struct upd_node_t{
upd_node_t* cascade_node;/* NULL or an update node template which
is used to implement ON DELETE/UPDATE CASCADE
or ... SET NULL for foreign keys */
- mem_heap_t* cascade_heap;/* NULL or a mem heap where the cascade
- node is created */
+ mem_heap_t* cascade_heap;
+ /*!< NULL or a mem heap where cascade
+ node is created.*/
sel_node_t* select; /*!< query graph subtree implementing a base
table cursor: the rows returned will be
updated */
@@ -496,6 +557,7 @@ struct upd_node_t{
que_node_t* col_assign_list;
/* column assignment list */
ulint magic_n;
+
};
#define UPD_NODE_MAGIC_N 1579975
@@ -511,11 +573,6 @@ struct upd_node_t{
#define UPD_NODE_INSERT_CLUSTERED 3 /* clustered index record should be
inserted, old record is already delete
marked */
-#define UPD_NODE_INSERT_BLOB 4 /* clustered index record should be
- inserted, old record is already
- delete-marked; non-updated BLOBs
- should be inherited by the new record
- and disowned by the old record */
#define UPD_NODE_UPDATE_ALL_SEC 5 /* an ordering field of the clustered
index record was changed, or this is
a delete operation: should update
@@ -531,10 +588,7 @@ struct upd_node_t{
#define UPD_NODE_NO_SIZE_CHANGE 2 /* no record field size will be
changed in the update */
-#endif /* !UNIV_HOTBACKUP */
-#ifndef UNIV_NONINL
#include "row0upd.ic"
-#endif
#endif
diff --git a/storage/innobase/include/row0upd.ic b/storage/innobase/include/row0upd.ic
index 9a0b0d3c4e1..68280ff567a 100644
--- a/storage/innobase/include/row0upd.ic
+++ b/storage/innobase/include/row0upd.ic
@@ -1,7 +1,7 @@
/*****************************************************************************
-Copyright (c) 1996, 2012, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2018, MariaDB Corporation.
+Copyright (c) 1996, 2015, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2017, 2018, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -25,17 +25,15 @@ Created 12/27/1996 Heikki Tuuri
*******************************************************/
#include "mtr0log.h"
-#ifndef UNIV_HOTBACKUP
-# include "trx0trx.h"
-# include "trx0undo.h"
-# include "row0row.h"
-# include "lock0lock.h"
-#endif /* !UNIV_HOTBACKUP */
+#include "trx0trx.h"
+#include "trx0undo.h"
+#include "row0row.h"
+#include "lock0lock.h"
#include "page0zip.h"
/*********************************************************************//**
Creates an update vector object.
-@return own: update vector object */
+@return own: update vector object */
UNIV_INLINE
upd_t*
upd_create(
@@ -45,11 +43,12 @@ upd_create(
{
upd_t* update;
- update = (upd_t*) mem_heap_zalloc(heap, sizeof(upd_t));
+ update = static_cast<upd_t*>(mem_heap_zalloc(
+ heap, sizeof(upd_t) + sizeof(upd_field_t) * n));
update->n_fields = n;
- update->fields = (upd_field_t*)
- mem_heap_zalloc(heap, sizeof(upd_field_t) * n);
+ update->fields = reinterpret_cast<upd_field_t*>(&update[1]);
+ update->heap = heap;
return(update);
}
@@ -57,7 +56,7 @@ upd_create(
/*********************************************************************//**
Returns the number of fields in the update vector == number of columns
to be updated by an update vector.
-@return number of fields */
+@return number of fields */
UNIV_INLINE
ulint
upd_get_n_fields(
@@ -72,7 +71,7 @@ upd_get_n_fields(
#ifdef UNIV_DEBUG
/*********************************************************************//**
Returns the nth field of an update vector.
-@return update vector field */
+@return update vector field */
UNIV_INLINE
upd_field_t*
upd_get_nth_field(
@@ -87,7 +86,6 @@ upd_get_nth_field(
}
#endif /* UNIV_DEBUG */
-#ifndef UNIV_HOTBACKUP
/*********************************************************************//**
Sets an index field number to be updated by an update vector field. */
UNIV_INLINE
@@ -97,41 +95,54 @@ upd_field_set_field_no(
upd_field_t* upd_field, /*!< in: update vector field */
ulint field_no, /*!< in: field number in a clustered
index */
- dict_index_t* index, /*!< in: index */
- trx_t* trx) /*!< in: transaction */
+ dict_index_t* index) /*!< in: index */
{
- upd_field->field_no = field_no;
+ upd_field->field_no = unsigned(field_no);
upd_field->orig_len = 0;
+ dict_col_copy_type(dict_index_get_nth_col(index, field_no),
+ dfield_get_type(&upd_field->new_val));
+}
- if (field_no >= dict_index_get_n_fields(index)) {
- fprintf(stderr,
- "InnoDB: Error: trying to access field %lu in ",
- (ulong) field_no);
- dict_index_name_print(stderr, trx, index);
- fprintf(stderr, "\n"
- "InnoDB: but index only has %lu fields\n",
- (ulong) dict_index_get_n_fields(index));
- ut_ad(0);
- }
+/** set field number to a update vector field, marks this field is updated.
+@param[in,out] upd_field update vector field
+@param[in] field_no virtual column sequence num
+@param[in] index index */
+UNIV_INLINE
+void
+upd_field_set_v_field_no(
+ upd_field_t* upd_field,
+ ulint field_no,
+ dict_index_t* index)
+{
+ ut_a(field_no < dict_table_get_n_v_cols(index->table));
+ upd_field->field_no = unsigned(field_no);
+ upd_field->orig_len = 0;
- dict_col_copy_type(dict_index_get_nth_col(index, field_no),
+ dict_col_copy_type(&dict_table_get_nth_v_col(
+ index->table, field_no)->m_col,
dfield_get_type(&upd_field->new_val));
}
/*********************************************************************//**
Returns a field of an update vector by field_no.
-@return update vector field, or NULL */
+@return update vector field, or NULL */
UNIV_INLINE
const upd_field_t*
upd_get_field_by_field_no(
/*======================*/
const upd_t* update, /*!< in: update vector */
- ulint no) /*!< in: field_no */
+ ulint no, /*!< in: field_no */
+ bool is_virtual) /*!< in: if it is virtual column */
{
ulint i;
for (i = 0; i < upd_get_n_fields(update); i++) {
const upd_field_t* uf = upd_get_nth_field(update, i);
+ /* matches only if the field matches that of is_virtual */
+ if ((!is_virtual) != (!upd_fld_is_virtual_col(uf))) {
+ continue;
+ }
+
if (uf->field_no == no) {
return(uf);
@@ -152,7 +163,7 @@ row_upd_rec_sys_fields(
page_zip_des_t* page_zip,/*!< in/out: compressed page whose
uncompressed part will be updated, or NULL */
dict_index_t* index, /*!< in: clustered index */
- const ulint* offsets,/*!< in: rec_get_offsets(rec, index) */
+ const offset_t* offsets,/*!< in: rec_get_offsets(rec, index) */
const trx_t* trx, /*!< in: transaction */
roll_ptr_t roll_ptr)/*!< in: DB_ROLL_PTR to the undo log */
{
@@ -185,4 +196,3 @@ row_upd_rec_sys_fields(
trx_write_roll_ptr(rec + offset + DATA_TRX_ID_LEN, roll_ptr);
}
}
-#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innobase/include/row0vers.h b/storage/innobase/include/row0vers.h
index 2586385abba..2a76e27e226 100644
--- a/storage/innobase/include/row0vers.h
+++ b/storage/innobase/include/row0vers.h
@@ -1,6 +1,7 @@
/*****************************************************************************
Copyright (c) 1997, 2016, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2017, 2019, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -26,67 +27,81 @@ Created 2/6/1997 Heikki Tuuri
#ifndef row0vers_h
#define row0vers_h
-#include "univ.i"
#include "data0data.h"
-#include "dict0types.h"
#include "trx0types.h"
#include "que0types.h"
#include "rem0types.h"
#include "mtr0mtr.h"
-#include "read0types.h"
+#include "dict0mem.h"
+#include "row0types.h"
-/*****************************************************************//**
-Finds out if an active transaction has inserted or modified a secondary
+// Forward declaration
+class ReadView;
+
+/** Determine if an active transaction has inserted or modified a secondary
index record.
-@return 0 if committed, else the active transaction id;
-NOTE that this function can return false positives but never false
-negatives. The caller must confirm all positive results by calling
-trx_is_active() while holding lock_sys->mutex. */
-UNIV_INTERN
-trx_id_t
+@param[in] rec secondary index record
+@param[in] index secondary index
+@param[in] offsets rec_get_offsets(rec, index)
+@return the active transaction; state must be rechecked after
+trx_mutex_enter(), and trx->release_reference() must be invoked
+@retval NULL if the record was committed */
+trx_t*
row_vers_impl_x_locked(
-/*===================*/
- const rec_t* rec, /*!< in: record in a secondary index */
- dict_index_t* index, /*!< in: the secondary index */
- const ulint* offsets);/*!< in: rec_get_offsets(rec, index) */
+ const rec_t* rec,
+ dict_index_t* index,
+ const offset_t* offsets);
+
/*****************************************************************//**
Finds out if we must preserve a delete marked earlier version of a clustered
index record, because it is >= the purge view.
-@return TRUE if earlier version should be preserved */
-UNIV_INTERN
+@param[in] trx_id transaction id in the version
+@param[in] name table name
+@param[in,out] mtr mini transaction holding the latch on the
+ clustered index record; it will also hold
+ the latch on purge_view
+@return TRUE if earlier version should be preserved */
ibool
row_vers_must_preserve_del_marked(
/*==============================*/
- trx_id_t trx_id, /*!< in: transaction id in the version */
- mtr_t* mtr); /*!< in: mtr holding the latch on the
- clustered index record; it will also
- hold the latch on purge_view */
-/*****************************************************************//**
-Finds out if a version of the record, where the version >= the current
+ trx_id_t trx_id,
+ const table_name_t& name,
+ mtr_t* mtr);
+
+/** Finds out if a version of the record, where the version >= the current
purge view, should have ientry as its secondary index entry. We check
if there is any not delete marked version of the record where the trx
id >= purge view, and the secondary index entry == ientry; exactly in
this case we return TRUE.
-@return TRUE if earlier version should have */
-UNIV_INTERN
-ibool
+@param[in] also_curr TRUE if also rec is included in the versions
+ to search; otherwise only versions prior
+ to it are searched
+@param[in] rec record in the clustered index; the caller
+ must have a latch on the page
+@param[in] mtr mtr holding the latch on rec; it will
+ also hold the latch on purge_view
+@param[in] index secondary index
+@param[in] ientry secondary index entry
+@param[in] roll_ptr roll_ptr for the purge record
+@param[in] trx_id transaction ID on the purging record
+@param[in,out] vcol_info virtual column information for purge thread.
+@return TRUE if earlier version should have */
+bool
row_vers_old_has_index_entry(
-/*=========================*/
- ibool also_curr,/*!< in: TRUE if also rec is included in the
- versions to search; otherwise only versions
- prior to it are searched */
- const rec_t* rec, /*!< in: record in the clustered index; the
- caller must have a latch on the page */
- mtr_t* mtr, /*!< in: mtr holding the latch on rec; it will
- also hold the latch on purge_view */
- dict_index_t* index, /*!< in: the secondary index */
- const dtuple_t* ientry);/*!< in: the secondary index entry */
+ bool also_curr,
+ const rec_t* rec,
+ mtr_t* mtr,
+ dict_index_t* index,
+ const dtuple_t* ientry,
+ roll_ptr_t roll_ptr,
+ trx_id_t trx_id,
+ purge_vcol_info_t* vcol_info=NULL);
+
/*****************************************************************//**
Constructs the version of a clustered index record which a consistent
read should see. We assume that the trx id stored in rec is such that
the consistent read should not see rec in its present version.
-@return DB_SUCCESS or DB_MISSING_HISTORY */
-UNIV_INTERN
+@return DB_SUCCESS or DB_MISSING_HISTORY */
dberr_t
row_vers_build_for_consistent_read(
/*===============================*/
@@ -97,25 +112,24 @@ row_vers_build_for_consistent_read(
mtr_t* mtr, /*!< in: mtr holding the latch on rec; it will
also hold the latch on purge_view */
dict_index_t* index, /*!< in: the clustered index */
- ulint** offsets,/*!< in/out: offsets returned by
+ offset_t** offsets,/*!< in/out: offsets returned by
rec_get_offsets(rec, index) */
- read_view_t* view, /*!< in: the consistent read view */
+ ReadView* view, /*!< in: the consistent read view */
mem_heap_t** offset_heap,/*!< in/out: memory heap from which
the offsets are allocated */
mem_heap_t* in_heap,/*!< in: memory heap from which the memory for
*old_vers is allocated; memory for possible
intermediate versions is allocated and freed
locally within the function */
- rec_t** old_vers)/*!< out, own: old version, or NULL
+ rec_t** old_vers,/*!< out, own: old version, or NULL
if the history is missing or the record
does not exist in the view, that is,
it was freshly inserted afterwards */
- MY_ATTRIBUTE((nonnull(1,2,3,4,5,6,7)));
+ dtuple_t** vrow); /*!< out: reports virtual column info if any */
/*****************************************************************//**
Constructs the last committed version of a clustered index record,
which should be seen by a semi-consistent read. */
-UNIV_INTERN
void
row_vers_build_for_semi_consistent_read(
/*====================================*/
@@ -125,7 +139,7 @@ row_vers_build_for_semi_consistent_read(
of this records */
mtr_t* mtr, /*!< in: mtr holding the latch on rec */
dict_index_t* index, /*!< in: the clustered index */
- ulint** offsets,/*!< in/out: offsets returned by
+ offset_t** offsets,/*!< in/out: offsets returned by
rec_get_offsets(rec, index) */
mem_heap_t** offset_heap,/*!< in/out: memory heap from which
the offsets are allocated */
@@ -133,14 +147,10 @@ row_vers_build_for_semi_consistent_read(
*old_vers is allocated; memory for possible
intermediate versions is allocated and freed
locally within the function */
- const rec_t** old_vers)/*!< out: rec, old version, or NULL if the
+ const rec_t** old_vers,/*!< out: rec, old version, or NULL if the
record does not exist in the view, that is,
it was freshly inserted afterwards */
- MY_ATTRIBUTE((nonnull(1,2,3,4,5)));
-
-
-#ifndef UNIV_NONINL
-#include "row0vers.ic"
-#endif
+ dtuple_t** vrow); /*!< out: holds virtual column info if any
+ is updated in the view */
#endif
diff --git a/storage/innobase/include/row0vers.ic b/storage/innobase/include/row0vers.ic
deleted file mode 100644
index 117c692f62b..00000000000
--- a/storage/innobase/include/row0vers.ic
+++ /dev/null
@@ -1,30 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1997, 2009, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/row0vers.ic
-Row versions
-
-Created 2/6/1997 Heikki Tuuri
-*******************************************************/
-
-#include "row0row.h"
-#include "dict0dict.h"
-#include "read0read.h"
-#include "page0page.h"
-#include "log0recv.h"
diff --git a/storage/innobase/include/srv0conc.h b/storage/innobase/include/srv0conc.h
index 7d08041dea5..d6682e19539 100644
--- a/storage/innobase/include/srv0conc.h
+++ b/storage/innobase/include/srv0conc.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 2011, 2012, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2011, 2014, Oracle and/or its affiliates. All Rights Reserved.
Portions of this file contain modifications contributed and copyrighted by
Google, Inc. Those modifications are gratefully acknowledged and are described
@@ -52,32 +52,18 @@ we could get a deadlock. Value of 0 will disable the concurrency check. */
extern ulong srv_thread_concurrency;
-/*********************************************************************//**
-Initialise the concurrency management data structures */
-void
-srv_conc_init(void);
-/*===============*/
-
-/*********************************************************************//**
-Free the concurrency management data structures */
-void
-srv_conc_free(void);
-/*===============*/
-
+struct row_prebuilt_t;
/*********************************************************************//**
Puts an OS thread to wait if there are too many concurrent threads
-(>= srv_thread_concurrency) inside InnoDB. The threads wait in a FIFO queue. */
-UNIV_INTERN
+(>= srv_thread_concurrency) inside InnoDB. The threads wait in a FIFO queue.
+@param[in,out] prebuilt row prebuilt handler */
void
srv_conc_enter_innodb(
-/*==================*/
- trx_t* trx); /*!< in: transaction object associated
- with the thread */
+ row_prebuilt_t* prebuilt);
/*********************************************************************//**
This lets a thread enter InnoDB regardless of the number of threads inside
InnoDB. This must be called when a thread ends a lock wait. */
-UNIV_INTERN
void
srv_conc_force_enter_innodb(
/*========================*/
@@ -87,7 +73,6 @@ srv_conc_force_enter_innodb(
/*********************************************************************//**
This must be called when a thread exits InnoDB in a lock wait or at the
end of an SQL statement. */
-UNIV_INTERN
void
srv_conc_force_exit_innodb(
/*=======================*/
@@ -96,14 +81,12 @@ srv_conc_force_exit_innodb(
/*********************************************************************//**
Get the count of threads waiting inside InnoDB. */
-UNIV_INTERN
ulint
srv_conc_get_waiting_threads(void);
/*==============================*/
/*********************************************************************//**
Get the count of threads active inside InnoDB. */
-UNIV_INTERN
ulint
srv_conc_get_active_threads(void);
/*==============================*/
diff --git a/storage/innobase/include/srv0mon.h b/storage/innobase/include/srv0mon.h
index 736a1a66aa4..343cb0e741a 100644
--- a/storage/innobase/include/srv0mon.h
+++ b/storage/innobase/include/srv0mon.h
@@ -1,6 +1,6 @@
/***********************************************************************
-Copyright (c) 2010, 2013, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2010, 2015, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2012, Facebook Inc.
Copyright (c) 2013, 2019, MariaDB Corporation.
@@ -30,8 +30,13 @@ Created 12/15/2009 Jimmy Yang
#define srv0mon_h
#include "univ.i"
-#ifndef UNIV_HOTBACKUP
+#ifndef __STDC_LIMIT_MACROS
+/* Required for FreeBSD so that INT64_MAX is defined. */
+#define __STDC_LIMIT_MACROS
+#endif /* __STDC_LIMIT_MACROS */
+
+#include <stdint.h>
/** Possible status values for "mon_status" in "struct monitor_value" */
enum monitor_running_status {
@@ -42,7 +47,7 @@ enum monitor_running_status {
typedef enum monitor_running_status monitor_running_t;
/** Monitor counter value type */
-typedef ib_int64_t mon_type_t;
+typedef int64_t mon_type_t;
/** Two monitor structures are defined in this file. One is
"monitor_value_t" which contains dynamic counter values for each
@@ -98,9 +103,15 @@ enum monitor_type_t {
};
/** Counter minimum value is initialized to be max value of
- mon_type_t (ib_int64_t) */
-#define MIN_RESERVED ((mon_type_t) (IB_UINT64_MAX >> 1))
-#define MAX_RESERVED (~MIN_RESERVED)
+ mon_type_t (int64_t) */
+#ifndef INT64_MAX
+#define INT64_MAX (9223372036854775807LL)
+#endif
+#ifndef INT64_MIN
+#define INT64_MIN (-9223372036854775807LL-1)
+#endif
+#define MIN_RESERVED INT64_MAX
+#define MAX_RESERVED INT64_MIN
/** This enumeration defines internal monitor identifier used internally
to identify each particular counter. Its value indexes into two arrays,
@@ -125,7 +136,6 @@ enum monitor_id_t {
MONITOR_TABLE_OPEN,
MONITOR_TABLE_CLOSE,
MONITOR_TABLE_REFERENCE,
- MONITOR_OVLD_META_MEM_POOL,
/* Lock manager related counters */
MONITOR_MODULE_LOCK,
@@ -175,7 +185,6 @@ enum monitor_id_t {
MONITOR_FLUSH_BATCH_SCANNED,
MONITOR_FLUSH_BATCH_SCANNED_NUM_CALL,
MONITOR_FLUSH_BATCH_SCANNED_PER_CALL,
- MONITOR_FLUSH_HP_RESCAN,
MONITOR_FLUSH_BATCH_TOTAL_PAGE,
MONITOR_FLUSH_BATCH_COUNT,
MONITOR_FLUSH_BATCH_PAGES,
@@ -183,6 +192,24 @@ enum monitor_id_t {
MONITOR_FLUSH_NEIGHBOR_COUNT,
MONITOR_FLUSH_NEIGHBOR_PAGES,
MONITOR_FLUSH_N_TO_FLUSH_REQUESTED,
+
+ MONITOR_FLUSH_N_TO_FLUSH_BY_AGE,
+ MONITOR_FLUSH_ADAPTIVE_AVG_TIME_SLOT,
+ MONITOR_LRU_BATCH_FLUSH_AVG_TIME_SLOT,
+
+ MONITOR_FLUSH_ADAPTIVE_AVG_TIME_THREAD,
+ MONITOR_LRU_BATCH_FLUSH_AVG_TIME_THREAD,
+ MONITOR_FLUSH_ADAPTIVE_AVG_TIME_EST,
+ MONITOR_LRU_BATCH_FLUSH_AVG_TIME_EST,
+ MONITOR_FLUSH_AVG_TIME,
+
+ MONITOR_FLUSH_ADAPTIVE_AVG_PASS,
+ MONITOR_LRU_BATCH_FLUSH_AVG_PASS,
+ MONITOR_FLUSH_AVG_PASS,
+
+ MONITOR_LRU_GET_FREE_LOOPS,
+ MONITOR_LRU_GET_FREE_WAITS,
+
MONITOR_FLUSH_AVG_PAGE_RATE,
MONITOR_FLUSH_LSN_AVG_RATE,
MONITOR_FLUSH_PCT_FOR_DIRTY,
@@ -300,12 +327,13 @@ enum monitor_id_t {
MONITOR_OVLD_BUF_OLDEST_LSN,
MONITOR_OVLD_MAX_AGE_ASYNC,
MONITOR_OVLD_MAX_AGE_SYNC,
- MONITOR_PENDING_LOG_WRITE,
+ MONITOR_PENDING_LOG_FLUSH,
MONITOR_PENDING_CHECKPOINT_WRITE,
MONITOR_LOG_IO,
MONITOR_OVLD_LOG_WAITS,
MONITOR_OVLD_LOG_WRITE_REQUEST,
MONITOR_OVLD_LOG_WRITES,
+ MONITOR_OVLD_LOG_PADDED,
/* Page Manager related counters */
MONITOR_MODULE_PAGE,
@@ -315,16 +343,8 @@ enum monitor_id_t {
MONITOR_PAD_DECREMENTS,
/* New monitor variables for page compression */
MONITOR_OVLD_PAGE_COMPRESS_SAVED,
- MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT512,
- MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT1024,
- MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT2048,
- MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT4096,
- MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT8192,
- MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT16384,
- MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT32768,
MONITOR_OVLD_PAGES_PAGE_COMPRESSED,
MONITOR_OVLD_PAGE_COMPRESSED_TRIM_OP,
- MONITOR_OVLD_PAGE_COMPRESSED_TRIM_OP_SAVED,
MONITOR_OVLD_PAGES_PAGE_DECOMPRESSED,
MONITOR_OVLD_PAGES_PAGE_COMPRESSION_ERROR,
@@ -341,16 +361,20 @@ enum monitor_id_t {
MONITOR_INDEX_REORG_SUCCESSFUL,
MONITOR_INDEX_DISCARD,
+#ifdef BTR_CUR_HASH_ADAPT
/* Adaptive Hash Index related counters */
MONITOR_MODULE_ADAPTIVE_HASH,
MONITOR_OVLD_ADAPTIVE_HASH_SEARCH,
+#endif /* BTR_CUR_HASH_ADAPT */
MONITOR_OVLD_ADAPTIVE_HASH_SEARCH_BTREE,
+#ifdef BTR_CUR_HASH_ADAPT
MONITOR_ADAPTIVE_HASH_PAGE_ADDED,
MONITOR_ADAPTIVE_HASH_PAGE_REMOVED,
MONITOR_ADAPTIVE_HASH_ROW_ADDED,
MONITOR_ADAPTIVE_HASH_ROW_REMOVED,
MONITOR_ADAPTIVE_HASH_ROW_REMOVE_NOT_FOUND,
MONITOR_ADAPTIVE_HASH_ROW_UPDATED,
+#endif /* BTR_CUR_HASH_ADAPT */
/* Tablespace related counters */
MONITOR_MODULE_FIL_SYSTEM,
@@ -387,10 +411,13 @@ enum monitor_id_t {
MONITOR_OVLD_SRV_PAGE_SIZE,
MONITOR_OVLD_RWLOCK_S_SPIN_WAITS,
MONITOR_OVLD_RWLOCK_X_SPIN_WAITS,
+ MONITOR_OVLD_RWLOCK_SX_SPIN_WAITS,
MONITOR_OVLD_RWLOCK_S_SPIN_ROUNDS,
MONITOR_OVLD_RWLOCK_X_SPIN_ROUNDS,
+ MONITOR_OVLD_RWLOCK_SX_SPIN_ROUNDS,
MONITOR_OVLD_RWLOCK_S_OS_WAITS,
MONITOR_OVLD_RWLOCK_X_OS_WAITS,
+ MONITOR_OVLD_RWLOCK_SX_OS_WAITS,
/* Data DML related counters */
MONITOR_MODULE_DML_STATS,
@@ -409,6 +436,8 @@ enum monitor_id_t {
MONITOR_BACKGROUND_DROP_TABLE,
MONITOR_ONLINE_CREATE_INDEX,
MONITOR_PENDING_ALTER_TABLE,
+ MONITOR_ALTER_TABLE_SORT_FILES,
+ MONITOR_ALTER_TABLE_LOG_FILES,
MONITOR_MODULE_ICP,
MONITOR_ICP_ATTEMPTS,
@@ -416,6 +445,10 @@ enum monitor_id_t {
MONITOR_ICP_OUT_OF_RANGE,
MONITOR_ICP_MATCH,
+ /* Mutex/RW-Lock related counters */
+ MONITOR_MODULE_LATCHES,
+ MONITOR_LATCHES,
+
/* This is used only for control system to turn
on/off and reset all monitor counters */
MONITOR_ALL_COUNTER,
@@ -568,57 +601,15 @@ on the counters */
} \
}
-/** Increment a monitor counter under mutex protection.
-Use MONITOR_INC if appropriate mutex protection already exists.
-@param mutex mutex to acquire and release
-@param monitor monitor to be incremented by 1
-@param enabled whether the monitor is enabled */
-#define MONITOR_MUTEX_INC_LOW(mutex, monitor, enabled) \
- ut_ad(!mutex_own(mutex)); \
- if (enabled) { \
- mutex_enter(mutex); \
- if (++MONITOR_VALUE(monitor) > MONITOR_MAX_VALUE(monitor)) { \
- MONITOR_MAX_VALUE(monitor) = MONITOR_VALUE(monitor); \
- } \
- mutex_exit(mutex); \
- }
-/** Increment a monitor counter under mutex protection.
-Use MONITOR_INC if appropriate mutex protection already exists.
-@param mutex mutex to acquire and release
-@param monitor monitor to be incremented by 1 */
-#define MONITOR_MUTEX_INC(mutex, monitor) \
- MONITOR_MUTEX_INC_LOW(mutex, monitor, MONITOR_IS_ON(monitor))
-/** Decrement a monitor counter under mutex protection.
-Use MONITOR_DEC if appropriate mutex protection already exists.
-@param mutex mutex to acquire and release
-@param monitor monitor to be decremented by 1
-@param enabled whether the monitor is enabled */
-#define MONITOR_MUTEX_DEC_LOW(mutex, monitor, enabled) \
- ut_ad(!mutex_own(mutex)); \
- if (MONITOR_IS_ON(monitor)) { \
- mutex_enter(mutex); \
- if (--MONITOR_VALUE(monitor) < MONITOR_MIN_VALUE(monitor)) { \
- MONITOR_MIN_VALUE(monitor) = MONITOR_VALUE(monitor); \
- } \
- mutex_exit(mutex); \
- }
-/** Decrement a monitor counter under mutex protection.
-Use MONITOR_DEC if appropriate mutex protection already exists.
-@param mutex mutex to acquire and release
-@param monitor monitor to be decremented by 1 */
-#define MONITOR_MUTEX_DEC(mutex, monitor) \
- MONITOR_MUTEX_DEC_LOW(mutex, monitor, MONITOR_IS_ON(monitor))
-
-#if defined HAVE_ATOMIC_BUILTINS_64
/** Atomically increment a monitor counter.
Use MONITOR_INC if appropriate mutex protection exists.
@param monitor monitor to be incremented by 1
@param enabled whether the monitor is enabled */
-# define MONITOR_ATOMIC_INC_LOW(monitor, enabled) \
- if (enabled) { \
+#define MONITOR_ATOMIC_INC_LOW(monitor, enabled) \
+ if (enabled) { \
ib_uint64_t value; \
- value = os_atomic_increment_uint64( \
- (ib_uint64_t*) &MONITOR_VALUE(monitor), 1); \
+ value = my_atomic_add64( \
+ (int64*) &MONITOR_VALUE(monitor), 1) + 1; \
/* Note: This is not 100% accurate because of the \
inherent race, we ignore it due to performance. */ \
if (value > (ib_uint64_t) MONITOR_MAX_VALUE(monitor)) { \
@@ -630,50 +621,17 @@ Use MONITOR_INC if appropriate mutex protection exists.
Use MONITOR_DEC if appropriate mutex protection exists.
@param monitor monitor to be decremented by 1
@param enabled whether the monitor is enabled */
-# define MONITOR_ATOMIC_DEC_LOW(monitor, enabled) \
+#define MONITOR_ATOMIC_DEC_LOW(monitor, enabled) \
if (enabled) { \
ib_uint64_t value; \
- value = os_atomic_decrement_uint64( \
- (ib_uint64_t*) &MONITOR_VALUE(monitor), 1); \
+ value = my_atomic_add64( \
+ (int64*) &MONITOR_VALUE(monitor), -1) - 1; \
/* Note: This is not 100% accurate because of the \
inherent race, we ignore it due to performance. */ \
if (value < (ib_uint64_t) MONITOR_MIN_VALUE(monitor)) { \
MONITOR_MIN_VALUE(monitor) = value; \
} \
}
-# define srv_mon_create() ((void) 0)
-# define srv_mon_free() ((void) 0)
-#else /* HAVE_ATOMIC_BUILTINS_64 */
-# include "sync0types.h"
-/** Mutex protecting atomic operations on platforms that lack
-built-in operations for atomic memory access */
-extern ib_mutex_t monitor_mutex;
-/****************************************************************//**
-Initialize the monitor subsystem. */
-UNIV_INTERN
-void
-srv_mon_create(void);
-/*================*/
-/****************************************************************//**
-Close the monitor subsystem. */
-UNIV_INTERN
-void
-srv_mon_free(void);
-/*==============*/
-
-/** Atomically increment a monitor counter.
-Use MONITOR_INC if appropriate mutex protection exists.
-@param monitor monitor to be incremented by 1
-@param enabled whether the monitor is enabled */
-# define MONITOR_ATOMIC_INC_LOW(monitor, enabled) \
- MONITOR_MUTEX_INC_LOW(&monitor_mutex, monitor, enabled)
-/** Atomically decrement a monitor counter.
-Use MONITOR_DEC if appropriate mutex protection exists.
-@param monitor monitor to be decremented by 1
-@param enabled whether the monitor is enabled */
-# define MONITOR_ATOMIC_DEC_LOW(monitor, enabled) \
- MONITOR_MUTEX_DEC_LOW(&monitor_mutex, monitor, enabled)
-#endif /* HAVE_ATOMIC_BUILTINS_64 */
/** Atomically increment a monitor counter if it is enabled.
Use MONITOR_INC if appropriate mutex protection exists.
@@ -755,12 +713,12 @@ could already be checked as a module group */
/** Add time difference between now and input "value" (in seconds) to the
monitor counter
-@param monitor monitor to update for the time difference
-@param value the start time value */
+@param monitor monitor to update for the time difference
+@param value the start time value */
#define MONITOR_INC_TIME_IN_MICRO_SECS(monitor, value) \
MONITOR_CHECK_DEFINED(value); \
if (MONITOR_IS_ON(monitor)) { \
- ullint old_time = (value); \
+ uintmax_t old_time = value; \
value = microsecond_interval_timer(); \
MONITOR_VALUE(monitor) += (mon_type_t) (value - old_time);\
}
@@ -768,13 +726,13 @@ monitor counter
/** This macro updates 3 counters in one call. However, it only checks the
main/first monitor counter 'monitor', to see it is on or off to decide
whether to do the update.
-@param monitor the main monitor counter to update. It accounts for
+@param monitor the main monitor counter to update. It accounts for
the accumulative value for the counter.
-@param monitor_n_calls counter that counts number of times this macro is
+@param monitor_n_calls counter that counts number of times this macro is
called
-@param monitor_per_call counter that records the current and max value of
+@param monitor_per_call counter that records the current and max value of
each incremental value
-@param value incremental value to record this time */
+@param value incremental value to record this time */
#define MONITOR_INC_VALUE_CUMULATIVE( \
monitor, monitor_n_calls, monitor_per_call, value) \
MONITOR_CHECK_DEFINED(value); \
@@ -860,9 +818,8 @@ compensated by mon_last_value if accumulated value is required. */
/****************************************************************//**
Get monitor's monitor_info_t by its monitor id (index into the
innodb_counter_info array
-@return Point to corresponding monitor_info_t, or NULL if no such
+@return Point to corresponding monitor_info_t, or NULL if no such
monitor */
-UNIV_INTERN
monitor_info_t*
srv_mon_get_info(
/*=============*/
@@ -871,9 +828,8 @@ srv_mon_get_info(
/****************************************************************//**
Get monitor's name by its monitor id (index into the
innodb_counter_info array
-@return corresponding monitor name, or NULL if no such
+@return corresponding monitor name, or NULL if no such
monitor */
-UNIV_INTERN
const char*
srv_mon_get_name(
/*=============*/
@@ -883,9 +839,8 @@ srv_mon_get_name(
/****************************************************************//**
Turn on/off/reset monitor counters in a module. If module_value
is NUM_MONITOR then turn on all monitor counters.
-@return 0 if successful, or the first monitor that cannot be
+@return 0 if successful, or the first monitor that cannot be
turned on because it is already turned on. */
-UNIV_INTERN
void
srv_mon_set_module_control(
/*=======================*/
@@ -902,7 +857,6 @@ mechanism to start/stop and reset the counters, so we simulate these
controls by remembering the corresponding counter values when the
corresponding monitors are turned on/off/reset, and do appropriate
mathematics to deduct the actual value. */
-UNIV_INTERN
void
srv_mon_process_existing_counter(
/*=============================*/
@@ -913,7 +867,7 @@ srv_mon_process_existing_counter(
/*************************************************************//**
This function is used to calculate the maximum counter value
since the start of monitor counter
-@return max counter value since start. */
+@return max counter value since start. */
UNIV_INLINE
mon_type_t
srv_mon_calc_max_since_start(
@@ -922,7 +876,7 @@ srv_mon_calc_max_since_start(
/*************************************************************//**
This function is used to calculate the minimum counter value
since the start of monitor counter
-@return min counter value since start. */
+@return min counter value since start. */
UNIV_INLINE
mon_type_t
srv_mon_calc_min_since_start(
@@ -931,7 +885,6 @@ srv_mon_calc_min_since_start(
/*************************************************************//**
Reset a monitor, create a new base line with the current monitor
value. This baseline is recorded by MONITOR_VALUE_RESET(monitor) */
-UNIV_INTERN
void
srv_mon_reset(
/*==========*/
@@ -945,17 +898,10 @@ srv_mon_reset_all(
monitor_id_t monitor); /*!< in: monitor id*/
/*************************************************************//**
Turn on monitor counters that are marked as default ON. */
-UNIV_INTERN
void
srv_mon_default_on(void);
/*====================*/
-#ifndef UNIV_NONINL
#include "srv0mon.ic"
-#endif
-#else /* !UNIV_HOTBACKUP */
-# define MONITOR_INC(x) ((void) 0)
-# define MONITOR_DEC(x) ((void) 0)
-#endif /* !UNIV_HOTBACKUP */
#endif
diff --git a/storage/innobase/include/srv0mon.ic b/storage/innobase/include/srv0mon.ic
index 291deac5326..158345b2f8c 100644
--- a/storage/innobase/include/srv0mon.ic
+++ b/storage/innobase/include/srv0mon.ic
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 2010, 2012, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2010, 2013, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -26,7 +26,7 @@ Created 1/20/2010 Jimmy Yang
/*************************************************************//**
This function is used to calculate the maximum counter value
since the start of monitor counter
-@return max counter value since start. */
+@return max counter value since start. */
UNIV_INLINE
mon_type_t
srv_mon_calc_max_since_start(
@@ -61,7 +61,7 @@ srv_mon_calc_max_since_start(
/*************************************************************//**
This function is used to calculate the minimum counter value
since the start of monitor counter
-@return min counter value since start. */
+@return min counter value since start. */
UNIV_INLINE
mon_type_t
srv_mon_calc_min_since_start(
@@ -103,9 +103,9 @@ srv_mon_reset_all(
{
/* Do not reset all counter values if monitor is still on. */
if (MONITOR_IS_ON(monitor)) {
- fprintf(stderr, "InnoDB: Cannot reset all values for "
- "monitor counter %s while it is on. Please "
- "turn it off and retry. \n",
+ fprintf(stderr, "InnoDB: Cannot reset all values for"
+ " monitor counter %s while it is on. Please"
+ " turn it off and retry.\n",
srv_mon_get_name(monitor));
} else {
MONITOR_RESET_ALL(monitor);
diff --git a/storage/innobase/include/srv0srv.h b/storage/innobase/include/srv0srv.h
index 37e249f3f07..767e24e9265 100644
--- a/storage/innobase/include/srv0srv.h
+++ b/storage/innobase/include/srv0srv.h
@@ -3,7 +3,7 @@
Copyright (c) 1995, 2017, Oracle and/or its affiliates. All rights reserved.
Copyright (c) 2008, 2009, Google Inc.
Copyright (c) 2009, Percona Inc.
-Copyright (c) 2013, 2017, MariaDB Corporation.
+Copyright (c) 2013, 2018, MariaDB Corporation.
Portions of this file contain modifications contributed and copyrighted by
Google, Inc. Those modifications are gratefully acknowledged and are described
@@ -42,23 +42,24 @@ Created 10/10/1995 Heikki Tuuri
#ifndef srv0srv_h
#define srv0srv_h
-#include "univ.i"
-#ifndef UNIV_HOTBACKUP
#include "log0log.h"
-#include "sync0sync.h"
-#include "os0sync.h"
+#include "os0event.h"
#include "que0types.h"
#include "trx0types.h"
#include "srv0conc.h"
#include "buf0checksum.h"
-#include "ut0counter.h"
+#include "fil0fil.h"
-/* Global counters used inside InnoDB. */
-struct srv_stats_t {
+#include "mysql/psi/mysql_stage.h"
+#include "mysql/psi/psi.h"
+
+/** Global counters used inside InnoDB. */
+struct srv_stats_t
+{
typedef ib_counter_t<ulint, 64> ulint_ctr_64_t;
typedef simple_counter<lsn_t> lsn_ctr_1_t;
typedef simple_counter<ulint> ulint_ctr_1_t;
- typedef simple_counter<ib_int64_t> ib_int64_ctr_1_t;
+ typedef simple_counter<int64_t> int64_ctr_1_t;
/** Count the amount of data written in total (in bytes) */
ulint_ctr_1_t data_written;
@@ -69,6 +70,9 @@ struct srv_stats_t {
/** Number of physical writes to the log performed */
ulint_ctr_1_t log_writes;
+ /** Amount of data padded for log write ahead */
+ ulint_ctr_1_t log_padded;
+
/** Amount of data written to the log files in bytes */
lsn_ctr_1_t os_log_written;
@@ -105,20 +109,6 @@ struct srv_stats_t {
/** Number of bytes saved by page compression */
ulint_ctr_64_t page_compression_saved;
- /** Number of 512Byte TRIM by page compression */
- ulint_ctr_64_t page_compression_trim_sect512;
- /** Number of 1K TRIM by page compression */
- ulint_ctr_64_t page_compression_trim_sect1024;
- /** Number of 2K TRIM by page compression */
- ulint_ctr_64_t page_compression_trim_sect2048;
- /** Number of 4K TRIM by page compression */
- ulint_ctr_64_t page_compression_trim_sect4096;
- /** Number of 8K TRIM by page compression */
- ulint_ctr_64_t page_compression_trim_sect8192;
- /** Number of 16K TRIM by page compression */
- ulint_ctr_64_t page_compression_trim_sect16384;
- /** Number of 32K TRIM by page compression */
- ulint_ctr_64_t page_compression_trim_sect32768;
/* Number of index pages written */
ulint_ctr_64_t index_pages_written;
/* Number of non index pages written */
@@ -127,8 +117,6 @@ struct srv_stats_t {
ulint_ctr_64_t pages_page_compressed;
/* Number of TRIM operations induced by page compression */
ulint_ctr_64_t page_compressed_trim_op;
- /* Number of TRIM operations saved by using actual write size knowledge */
- ulint_ctr_64_t page_compressed_trim_op_saved;
/* Number of pages decompressed with page compression */
ulint_ctr_64_t pages_page_decompressed;
/* Number of page compression errors */
@@ -150,7 +138,7 @@ struct srv_stats_t {
ulint_ctr_1_t data_read;
/** Wait time of database locks */
- ib_int64_ctr_1_t n_lock_wait_time;
+ int64_ctr_1_t n_lock_wait_time;
/** Number of database lock waits */
ulint_ctr_1_t n_lock_wait_count;
@@ -194,8 +182,17 @@ struct srv_stats_t {
/** Number of encryption_get_latest_key_version calls */
ulint_ctr_64_t n_key_requests;
+ /** Number of log scrub operations */
+ ulint_ctr_64_t n_log_scrubs;
+
/** Number of spaces in keyrotation list */
ulint_ctr_64_t key_rotation_list_length;
+
+ /** Number of temporary tablespace blocks encrypted */
+ ulint_ctr_64_t n_temp_blocks_encrypted;
+
+ /** Number of temporary tablespace blocks decrypted */
+ ulint_ctr_64_t n_temp_blocks_decrypted;
};
extern const char* srv_main_thread_op_info;
@@ -215,6 +212,9 @@ extern os_event_t srv_error_event;
Set on shutdown or by buf_dump_start() or buf_load_start(). */
extern os_event_t srv_buf_dump_event;
+/** The buffer pool resize thread waits on this event. */
+extern os_event_t srv_buf_resize_event;
+
/** The buffer pool dump/load file name */
#define SRV_BUF_DUMP_FILENAME_DEFAULT "ib_buffer_pool"
extern char* srv_buf_dump_filename;
@@ -229,19 +229,14 @@ extern char srv_disable_sort_file_cache;
/* If the last data file is auto-extended, we add this many pages to it
at a time */
-#define SRV_AUTO_EXTEND_INCREMENT \
- (srv_auto_extend_increment * ((1024 * 1024) / UNIV_PAGE_SIZE))
+#define SRV_AUTO_EXTEND_INCREMENT (srv_sys_space.get_autoextend_increment())
+/** Mutex protecting page_zip_stat_per_index */
+extern ib_mutex_t page_zip_stat_per_index_mutex;
/* Mutex for locking srv_monitor_file. Not created if srv_read_only_mode */
extern ib_mutex_t srv_monitor_file_mutex;
/* Temporary file for innodb monitor output */
extern FILE* srv_monitor_file;
-/* Mutex for locking srv_dict_tmpfile. Only created if !srv_read_only_mode.
-This mutex has a very high rank; threads reserving it should not
-be holding any InnoDB latches. */
-extern ib_mutex_t srv_dict_tmpfile_mutex;
-/* Temporary file for output from the data dictionary */
-extern FILE* srv_dict_tmpfile;
/* Mutex for locking srv_misc_tmpfile. Only created if !srv_read_only_mode.
This mutex has a very low rank; threads reserving it should not
acquire any further latches or sleep before releasing this one. */
@@ -253,10 +248,6 @@ extern FILE* srv_misc_tmpfile;
extern char* srv_data_home;
-#ifdef UNIV_LOG_ARCHIVE
-extern char* srv_arch_dir;
-#endif /* UNIV_LOG_ARCHIVE */
-
/** Set if InnoDB must operate in read-only mode. We don't do any
recovery and open all tables in RO mode instead of RW mode. We don't
sync the max trx id to disk either. */
@@ -267,12 +258,13 @@ extern my_bool high_level_read_only;
/** store to its own file each table created by an user; data
dictionary tables are in the system tablespace 0 */
extern my_bool srv_file_per_table;
+/** whether to use backup-safe TRUNCATE and crash-safe RENAME
+instead of the MySQL 5.7 WL#6501 TRUNCATE TABLE implementation */
+extern my_bool srv_safe_truncate;
/** Sleep delay for threads waiting to enter InnoDB. In micro-seconds. */
extern ulong srv_thread_sleep_delay;
-#if defined(HAVE_ATOMIC_BUILTINS)
/** Maximum sleep delay (in micro-seconds), value of 0 disables it.*/
extern ulong srv_adaptive_max_sleep_delay;
-#endif /* HAVE_ATOMIC_BUILTINS */
/** The file format to use on new *.ibd files. */
extern ulint srv_file_format;
@@ -299,11 +291,6 @@ extern my_bool srv_numa_interleave;
/* Use trim operation */
extern my_bool srv_use_trim;
-/* Use posix fallocate */
-#ifdef HAVE_POSIX_FALLOCATE
-extern my_bool srv_use_posix_fallocate;
-#endif
-
/* Use atomic writes i.e disable doublewrite buffer */
extern my_bool srv_use_atomic_writes;
@@ -311,8 +298,8 @@ extern my_bool srv_use_atomic_writes;
extern ulong innodb_compression_algorithm;
/* Number of flush threads */
-#define MTFLUSH_MAX_WORKER 64
-#define MTFLUSH_DEFAULT_WORKER 8
+#define MTFLUSH_MAX_WORKER 64
+#define MTFLUSH_DEFAULT_WORKER 8
/* Number of threads used for multi-threaded flush */
extern long srv_mtflush_threads;
@@ -320,10 +307,8 @@ extern long srv_mtflush_threads;
/* If this flag is TRUE, then we will use multi threaded flush. */
extern my_bool srv_use_mtflush;
-#ifdef __WIN__
-extern ibool srv_use_native_conditions;
-#endif /* __WIN__ */
-#endif /* !UNIV_HOTBACKUP */
+/** TRUE if the server was successfully started */
+extern bool srv_was_started;
/** Server undo tablespaces directory, can be absolute path. */
extern char* srv_undo_dir;
@@ -334,69 +319,104 @@ extern ulong srv_undo_tablespaces;
/** The number of UNDO tablespaces that are open and ready to use. */
extern ulint srv_undo_tablespaces_open;
-/* The number of undo segments to use */
+/** The number of UNDO tablespaces that are active (hosting some rollback
+segment). It is quite possible that some of the tablespaces doesn't host
+any of the rollback-segment based on configuration used. */
+extern ulint srv_undo_tablespaces_active;
+
+/** Undo tablespaces starts with space_id. */
+extern ulint srv_undo_space_id_start;
+
+/** Check whether given space id is undo tablespace id
+@param[in] space_id space id to check
+@return true if it is undo tablespace else false. */
+inline
+bool
+srv_is_undo_tablespace(ulint space_id)
+{
+ return srv_undo_space_id_start > 0
+ && space_id >= srv_undo_space_id_start
+ && space_id < (srv_undo_space_id_start
+ + srv_undo_tablespaces_open);
+}
+
+/** The number of undo segments to use */
extern ulong srv_undo_logs;
-extern ulint srv_n_data_files;
-extern char** srv_data_file_names;
-extern ulint* srv_data_file_sizes;
-extern ulint* srv_data_file_is_raw_partition;
+/** Maximum size of undo tablespace. */
+extern unsigned long long srv_max_undo_log_size;
extern uint srv_n_fil_crypt_threads;
extern uint srv_n_fil_crypt_threads_started;
-extern ibool srv_auto_extend_last_data_file;
-extern ulint srv_last_file_size_max;
-extern char* srv_log_group_home_dir;
-#ifndef UNIV_HOTBACKUP
-extern ulong srv_auto_extend_increment;
+/** Rate at which UNDO records should be purged. */
+extern ulong srv_purge_rseg_truncate_frequency;
-extern ibool srv_created_new_raw;
+/** Enable or Disable Truncate of UNDO tablespace. */
+extern my_bool srv_undo_log_truncate;
/* Optimize prefix index queries to skip cluster index lookup when possible */
/* Enables or disables this prefix optimization. Disabled by default. */
extern my_bool srv_prefix_index_cluster_optimization;
+/** Default size of UNDO tablespace while it is created new. */
+extern const ulint SRV_UNDO_TABLESPACE_SIZE_IN_PAGES;
+
+extern char* srv_log_group_home_dir;
+
/** Maximum number of srv_n_log_files, or innodb_log_files_in_group */
#define SRV_N_LOG_FILES_MAX 100
extern ulong srv_n_log_files;
-extern ib_uint64_t srv_log_file_size;
-extern ib_uint64_t srv_log_file_size_requested;
+/** The InnoDB redo log file size, or 0 when changing the redo log format
+at startup (while disallowing writes to the redo log). */
+extern ulonglong srv_log_file_size;
extern ulint srv_log_buffer_size;
extern ulong srv_flush_log_at_trx_commit;
extern uint srv_flush_log_at_timeout;
+extern ulong srv_log_write_ahead_size;
extern char srv_adaptive_flushing;
+extern my_bool srv_flush_sync;
#ifdef WITH_INNODB_DISALLOW_WRITES
/* When this event is reset we do not allow any file writes to take place. */
extern os_event_t srv_allow_writes_event;
#endif /* WITH_INNODB_DISALLOW_WRITES */
+
/* If this flag is TRUE, then we will load the indexes' (and tables') metadata
even if they are marked as "corrupted". Mostly it is for DBA to process
corrupted index and table */
extern my_bool srv_load_corrupted;
-/* The sort order table of the MySQL latin1_swedish_ci character set
-collation */
-extern const byte* srv_latin1_ordering;
-#ifndef UNIV_HOTBACKUP
-extern my_bool srv_use_sys_malloc;
-#else
-extern ibool srv_use_sys_malloc;
-#endif /* UNIV_HOTBACKUP */
-extern ulint srv_buf_pool_size; /*!< requested size in bytes */
-extern ulint srv_buf_pool_instances; /*!< requested number of buffer pool instances */
-extern ulong srv_n_page_hash_locks; /*!< number of locks to
- protect buf_pool->page_hash */
-extern ulong srv_LRU_scan_depth; /*!< Scan depth for LRU
- flush batch */
-extern ulong srv_flush_neighbors; /*!< whether or not to flush
- neighbors of a block */
-extern ulint srv_buf_pool_old_size; /*!< previously requested size */
-extern ulint srv_buf_pool_curr_size; /*!< current size in bytes */
+/** Requested size in bytes */
+extern ulint srv_buf_pool_size;
+/** Minimum pool size in bytes */
+extern const ulint srv_buf_pool_min_size;
+/** Default pool size in bytes */
+extern const ulint srv_buf_pool_def_size;
+/** Requested buffer pool chunk size. Each buffer pool instance consists
+of one or more chunks. */
+extern ulong srv_buf_pool_chunk_unit;
+/** Requested number of buffer pool instances */
+extern ulong srv_buf_pool_instances;
+/** Default number of buffer pool instances */
+extern const ulong srv_buf_pool_instances_default;
+/** Number of locks to protect buf_pool->page_hash */
+extern ulong srv_n_page_hash_locks;
+/** Scan depth for LRU flush batch i.e.: number of blocks scanned*/
+extern ulong srv_LRU_scan_depth;
+/** Whether or not to flush neighbors of a block */
extern ulong srv_buf_pool_dump_pct; /*!< dump that may % of each buffer
pool during BP dump */
-extern ulint srv_mem_pool_size;
+extern ulong srv_flush_neighbors;
+/** Previously requested size */
+extern ulint srv_buf_pool_old_size;
+/** Current size as scaling factor for the other components */
+extern ulint srv_buf_pool_base_size;
+/** Current size in bytes */
+extern ulint srv_buf_pool_curr_size;
+/** Dump this % of each buffer pool during BP dump */
+extern ulong srv_buf_pool_dump_pct;
+/** Lock table size in bytes */
extern ulint srv_lock_table_size;
extern ulint srv_n_file_io_threads;
@@ -404,6 +424,7 @@ extern my_bool srv_random_read_ahead;
extern ulong srv_read_ahead_threshold;
extern ulint srv_n_read_io_threads;
extern ulint srv_n_write_io_threads;
+
/* Defragmentation, Origianlly facebook default value is 100, but it's too high */
#define SRV_DEFRAGMENT_FREQUENCY_DEFAULT 40
extern my_bool srv_defragment;
@@ -416,6 +437,8 @@ extern ulonglong srv_defragment_interval;
extern ulong srv_idle_flush_pct;
+extern uint srv_change_buffer_max_size;
+
/* Number of IO operations per second the server can do */
extern ulong srv_io_capacity;
@@ -434,18 +457,12 @@ to treat NULL value when collecting statistics. It is not defined
as enum type because the configure option takes unsigned integer type. */
extern ulong srv_innodb_stats_method;
-#ifdef UNIV_LOG_ARCHIVE
-extern ibool srv_log_archive_on;
-extern ibool srv_archive_recovery;
-extern ib_uint64_t srv_archive_recovery_limit_lsn;
-#endif /* UNIV_LOG_ARCHIVE */
-
extern char* srv_file_flush_method_str;
-extern ulint srv_unix_file_flush_method;
-extern ulint srv_win_file_flush_method;
extern ulint srv_max_n_open_files;
+extern ulong srv_n_page_cleaners;
+
extern double srv_max_dirty_pages_pct;
extern double srv_max_dirty_pages_pct_lwm;
@@ -454,26 +471,33 @@ extern ulong srv_flushing_avg_loops;
extern ulong srv_force_recovery;
-extern ulint srv_fast_shutdown; /*!< If this is 1, do not do a
+extern uint srv_fast_shutdown; /*!< If this is 1, do not do a
purge and index buffer merge.
If this 2, do not even flush the
buffer pool to data files at the
shutdown: we effectively 'crash'
InnoDB (but lose no committed
transactions). */
+
+/** Signal to shut down InnoDB (NULL if shutdown was signaled, or if
+running in innodb_read_only mode, srv_read_only_mode) */
+extern st_my_thread_var *srv_running;
+
extern ibool srv_innodb_status;
extern unsigned long long srv_stats_transient_sample_pages;
extern my_bool srv_stats_persistent;
extern unsigned long long srv_stats_persistent_sample_pages;
extern my_bool srv_stats_auto_recalc;
+extern my_bool srv_stats_include_delete_marked;
extern unsigned long long srv_stats_modified_counter;
extern my_bool srv_stats_sample_traditional;
-extern my_bool srv_stats_include_delete_marked;
extern ibool srv_use_doublewrite_buf;
extern ulong srv_doublewrite_batch_size;
+extern ulong srv_checksum_algorithm;
+extern double srv_max_buf_pool_modified_pct;
extern my_bool srv_force_primary_key;
extern double srv_max_buf_pool_modified_pct;
@@ -481,17 +505,48 @@ extern ulong srv_max_purge_lag;
extern ulong srv_max_purge_lag_delay;
extern ulong srv_replication_delay;
+
+extern my_bool innodb_encrypt_temporary_tables;
+
/*-------------------------------------------*/
+/** Modes of operation */
+enum srv_operation_mode {
+ /** Normal mode (MariaDB Server) */
+ SRV_OPERATION_NORMAL,
+ /** Mariabackup taking a backup */
+ SRV_OPERATION_BACKUP,
+ /** Mariabackup restoring a backup for subsequent --copy-back */
+ SRV_OPERATION_RESTORE,
+ /** Mariabackup restoring a backup with rolling back prepared XA's*/
+ SRV_OPERATION_RESTORE_ROLLBACK_XA,
+ /** Mariabackup restoring the incremental part of a backup */
+ SRV_OPERATION_RESTORE_DELTA,
+ /** Mariabackup restoring a backup for subsequent --export */
+ SRV_OPERATION_RESTORE_EXPORT
+};
+
+/** Current mode of operation */
+extern enum srv_operation_mode srv_operation;
+
+inline bool is_mariabackup_restore()
+{
+ /* To rollback XA's trx_sys must be initialized, the rest is the same
+ as regular backup restore, that is why we join this two operations in
+ the most cases. */
+ return srv_operation == SRV_OPERATION_RESTORE
+ || srv_operation == SRV_OPERATION_RESTORE_ROLLBACK_XA;
+}
+
+inline bool is_mariabackup_restore_or_export()
+{
+ return is_mariabackup_restore()
+ || srv_operation == SRV_OPERATION_RESTORE_EXPORT;
+}
+
extern my_bool srv_print_innodb_monitor;
extern my_bool srv_print_innodb_lock_monitor;
-extern ibool srv_print_innodb_tablespace_monitor;
extern ibool srv_print_verbose_log;
-#define DEPRECATED_MSG_INNODB_TABLE_MONITOR \
- "Using innodb_table_monitor is deprecated and it may be removed " \
- "in future releases. Please use the InnoDB INFORMATION_SCHEMA " \
- "tables instead, see " REFMAN "innodb-i_s-tables.html"
-extern ibool srv_print_innodb_table_monitor;
extern bool srv_monitor_active;
extern bool srv_error_monitor_active;
@@ -499,6 +554,9 @@ extern bool srv_error_monitor_active;
/* TRUE during the lifetime of the buffer pool dump/load thread */
extern bool srv_buf_dump_thread_active;
+/* true during the lifetime of the buffer pool resize thread */
+extern bool srv_buf_resize_thread_active;
+
/* TRUE during the lifetime of the stats thread */
extern bool srv_dict_stats_thread_active;
@@ -508,46 +566,31 @@ extern my_bool srv_scrub_log;
extern ulong srv_n_spin_wait_rounds;
extern ulong srv_n_free_tickets_to_enter;
extern ulong srv_thread_sleep_delay;
-extern ulong srv_spin_wait_delay;
-extern ibool srv_priority_boost;
+extern uint srv_spin_wait_delay;
extern ulint srv_truncated_status_writes;
-extern ulint srv_available_undo_logs;
-
-extern ulint srv_mem_pool_size;
-extern ulint srv_lock_table_size;
-
-#ifdef UNIV_DEBUG
-extern ibool srv_print_thread_releases;
-extern ibool srv_print_lock_waits;
-extern ibool srv_print_buf_io;
-extern ibool srv_print_log_io;
-extern ibool srv_print_latch_waits;
-#else /* UNIV_DEBUG */
-# define srv_print_thread_releases FALSE
-# define srv_print_lock_waits FALSE
-# define srv_print_buf_io FALSE
-# define srv_print_log_io FALSE
-# define srv_print_latch_waits FALSE
-#endif /* UNIV_DEBUG */
+/** Number of initialized rollback segments for persistent undo log */
+extern ulong srv_available_undo_logs;
#if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG
extern my_bool srv_ibuf_disable_background_merge;
#endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */
#ifdef UNIV_DEBUG
+extern my_bool srv_sync_debug;
extern my_bool srv_purge_view_update_only_debug;
+
+/** Value of MySQL global used to disable master thread. */
+extern my_bool srv_master_thread_disabled_debug;
+/** InnoDB system tablespace to set during recovery */
extern uint srv_sys_space_size_debug;
+/** whether redo log files have been created at startup */
+extern bool srv_log_files_created;
#endif /* UNIV_DEBUG */
#define SRV_SEMAPHORE_WAIT_EXTENSION 7200
extern ulint srv_dml_needed_delay;
-#ifndef HAVE_ATOMIC_BUILTINS
-/** Mutex protecting some server global variables. */
-extern ib_mutex_t server_mutex;
-#endif /* !HAVE_ATOMIC_BUILTINS */
-
#define SRV_MAX_N_IO_THREADS 130
/* Array of English strings describing the current state of an
@@ -586,29 +629,37 @@ that semaphore times out in InnoDB */
#define DEFAULT_SRV_FATAL_SEMAPHORE_TIMEOUT 600
extern ulong srv_fatal_semaphore_wait_threshold;
-/** Enable semaphore request instrumentation */
-extern my_bool srv_instrument_semaphores;
-
/** Buffer pool dump status frequence in percentages */
extern ulong srv_buf_dump_status_frequency;
+#define srv_max_purge_threads 32
+
# ifdef UNIV_PFS_THREAD
/* Keys to register InnoDB threads with performance schema */
-extern mysql_pfs_key_t buf_page_cleaner_thread_key;
-extern mysql_pfs_key_t trx_rollback_clean_thread_key;
+extern mysql_pfs_key_t buf_dump_thread_key;
+extern mysql_pfs_key_t dict_stats_thread_key;
extern mysql_pfs_key_t io_handler_thread_key;
-extern mysql_pfs_key_t srv_lock_timeout_thread_key;
+extern mysql_pfs_key_t io_ibuf_thread_key;
+extern mysql_pfs_key_t io_log_thread_key;
+extern mysql_pfs_key_t io_read_thread_key;
+extern mysql_pfs_key_t io_write_thread_key;
+extern mysql_pfs_key_t page_cleaner_thread_key;
+extern mysql_pfs_key_t recv_writer_thread_key;
extern mysql_pfs_key_t srv_error_monitor_thread_key;
-extern mysql_pfs_key_t srv_monitor_thread_key;
+extern mysql_pfs_key_t srv_lock_timeout_thread_key;
extern mysql_pfs_key_t srv_master_thread_key;
+extern mysql_pfs_key_t srv_monitor_thread_key;
extern mysql_pfs_key_t srv_purge_thread_key;
-extern mysql_pfs_key_t recv_writer_thread_key;
+extern mysql_pfs_key_t srv_worker_thread_key;
+extern mysql_pfs_key_t trx_rollback_clean_thread_key;
/* This macro register the current thread and its key with performance
schema */
# define pfs_register_thread(key) \
do { \
struct PSI_thread* psi = PSI_THREAD_CALL(new_thread)(key, NULL, 0);\
+ /* JAN: TODO: MYSQL 5.7 PSI \
+ PSI_THREAD_CALL(set_thread_os_id)(psi); */ \
PSI_THREAD_CALL(set_thread)(psi); \
} while (0)
@@ -617,47 +668,72 @@ do { \
do { \
PSI_THREAD_CALL(delete_current_thread)(); \
} while (0)
+# else
+# define pfs_register_thread(key)
+# define pfs_delete_thread()
# endif /* UNIV_PFS_THREAD */
-#endif /* !UNIV_HOTBACKUP */
+#ifdef HAVE_PSI_STAGE_INTERFACE
+/** Performance schema stage event for monitoring ALTER TABLE progress
+everything after flush log_make_checkpoint(). */
+extern PSI_stage_info srv_stage_alter_table_end;
+
+/** Performance schema stage event for monitoring ALTER TABLE progress
+log_make_checkpoint(). */
+extern PSI_stage_info srv_stage_alter_table_flush;
+
+/** Performance schema stage event for monitoring ALTER TABLE progress
+row_merge_insert_index_tuples(). */
+extern PSI_stage_info srv_stage_alter_table_insert;
+
+/** Performance schema stage event for monitoring ALTER TABLE progress
+row_log_apply(). */
+extern PSI_stage_info srv_stage_alter_table_log_index;
+
+/** Performance schema stage event for monitoring ALTER TABLE progress
+row_log_table_apply(). */
+extern PSI_stage_info srv_stage_alter_table_log_table;
+
+/** Performance schema stage event for monitoring ALTER TABLE progress
+row_merge_sort(). */
+extern PSI_stage_info srv_stage_alter_table_merge_sort;
+
+/** Performance schema stage event for monitoring ALTER TABLE progress
+row_merge_read_clustered_index(). */
+extern PSI_stage_info srv_stage_alter_table_read_pk_internal_sort;
+
+/** Performance schema stage event for monitoring buffer pool load progress. */
+extern PSI_stage_info srv_stage_buffer_pool_load;
+#endif /* HAVE_PSI_STAGE_INTERFACE */
-/** Types of raw partitions in innodb_data_file_path */
-enum {
- SRV_NOT_RAW = 0, /*!< Not a raw partition */
- SRV_NEW_RAW, /*!< A 'newraw' partition, only to be
- initialized */
- SRV_OLD_RAW /*!< An initialized raw partition */
-};
/** Alternatives for the file flush option in Unix; see the InnoDB manual
about what these mean */
-enum {
- SRV_UNIX_FSYNC = 1, /*!< fsync, the default */
- SRV_UNIX_O_DSYNC, /*!< open log files in O_SYNC mode */
- SRV_UNIX_LITTLESYNC, /*!< do not call os_file_flush()
+enum srv_flush_t {
+ SRV_FSYNC = 1, /*!< fsync, the default */
+ SRV_O_DSYNC, /*!< open log files in O_SYNC mode */
+ SRV_LITTLESYNC, /*!< do not call os_file_flush()
when writing data files, but do flush
after writing to log files */
- SRV_UNIX_NOSYNC, /*!< do not flush after writing */
- SRV_UNIX_O_DIRECT, /*!< invoke os_file_set_nocache() on
+ SRV_NOSYNC, /*!< do not flush after writing */
+ SRV_O_DIRECT, /*!< invoke os_file_set_nocache() on
data files. This implies using
non-buffered IO but still using fsync,
the reason for which is that some FS
do not flush meta-data when
unbuffered IO happens */
- SRV_UNIX_O_DIRECT_NO_FSYNC
+ SRV_O_DIRECT_NO_FSYNC,
/*!< do not use fsync() when using
direct IO i.e.: it can be set to avoid
the fsync() call that we make when
using SRV_UNIX_O_DIRECT. However, in
this case user/DBA should be sure about
the integrity of the meta-data */
+ SRV_ALL_O_DIRECT_FSYNC
+ /*!< Traditional Windows appoach to open
+ all files without caching, and do FileFlushBuffers()*/
};
-
-/** Alternatives for file i/o in Windows */
-enum {
- SRV_WIN_IO_NORMAL = 1, /*!< buffered I/O */
- SRV_WIN_IO_UNBUFFERED /*!< unbuffered I/O; this is the default */
-};
+extern enum srv_flush_t srv_file_flush_method;
/** Alternatives for srv_force_recovery. Non-zero values are intended
to help the user get a damaged database up so that he can dump intact
@@ -696,7 +772,6 @@ enum srv_stats_method_name_enum {
typedef enum srv_stats_method_name_enum srv_stats_method_name_t;
-#ifndef UNIV_HOTBACKUP
/** Types of threads existing in the system. */
enum srv_thread_type {
SRV_NONE, /*!< None */
@@ -710,32 +785,16 @@ enum srv_thread_type {
/*********************************************************************//**
Boots Innobase server. */
-UNIV_INTERN
void
srv_boot(void);
/*==========*/
/*********************************************************************//**
-Initializes the server. */
-UNIV_INTERN
-void
-srv_init(void);
-/*==========*/
-/*********************************************************************//**
Frees the data structures created in srv_init(). */
-UNIV_INTERN
void
srv_free(void);
/*==========*/
/*********************************************************************//**
-Initializes the synchronization primitives, memory system, and the thread
-local storage. */
-UNIV_INTERN
-void
-srv_general_init(void);
-/*==================*/
-/*********************************************************************//**
Sets the info describing an i/o thread current state. */
-UNIV_INTERN
void
srv_set_io_thread_op_info(
/*======================*/
@@ -744,41 +803,30 @@ srv_set_io_thread_op_info(
state */
/*********************************************************************//**
Resets the info describing an i/o thread current state. */
-UNIV_INTERN
void
srv_reset_io_thread_op_info();
-/*=========================*/
-/*******************************************************************//**
-Tells the purge thread that there has been activity in the database
-and wakes up the purge thread if it is suspended (not sleeping). Note
-that there is a small chance that the purge thread stays suspended
-(we do not protect our operation with the srv_sys_t:mutex, for
-performance reasons). */
-UNIV_INTERN
+
+/** Wake up the purge threads if there is work to do. */
void
-srv_wake_purge_thread_if_not_active(void);
-/*=====================================*/
-/*******************************************************************//**
-Tells the Innobase server that there has been activity in the database
-and wakes up the master thread if it is suspended (not sleeping). Used
-in the MySQL interface. Note that there is a small chance that the master
-thread stays suspended (we do not protect our operation with the kernel
-mutex, for performace reasons). */
-UNIV_INTERN
+srv_wake_purge_thread_if_not_active();
+/** Wake up the InnoDB master thread if it was suspended (not sleeping). */
void
-srv_active_wake_master_thread(void);
-/*===============================*/
-/*******************************************************************//**
-Wakes up the master thread if it is suspended or being suspended. */
-UNIV_INTERN
+srv_active_wake_master_thread_low();
+
+#define srv_active_wake_master_thread() \
+ do { \
+ if (!srv_read_only_mode) { \
+ srv_active_wake_master_thread_low(); \
+ } \
+ } while (0)
+/** Wake up the master thread if it is suspended or being suspended. */
void
-srv_wake_master_thread(void);
-/*========================*/
+srv_wake_master_thread();
+
/******************************************************************//**
Outputs to a file the output of the InnoDB Monitor.
@return FALSE if not all information printed
due to failure to obtain necessary mutex */
-UNIV_INTERN
ibool
srv_printf_innodb_monitor(
/*======================*/
@@ -792,7 +840,6 @@ srv_printf_innodb_monitor(
/******************************************************************//**
Function to pass InnoDB status variables to MySQL */
-UNIV_INTERN
void
srv_export_innodb_status(void);
/*==========================*/
@@ -800,21 +847,18 @@ srv_export_innodb_status(void);
Get current server activity count. We don't hold srv_sys::mutex while
reading this value as it is only used in heuristics.
@return activity count. */
-UNIV_INTERN
ulint
srv_get_activity_count(void);
/*========================*/
/*******************************************************************//**
Check if there has been any activity.
@return FALSE if no change in activity counter. */
-UNIV_INTERN
ibool
srv_check_activity(
/*===============*/
ulint old_activity_count); /*!< old activity count */
/******************************************************************//**
Increment the server activity counter. */
-UNIV_INTERN
void
srv_inc_activity_count(void);
/*=========================*/
@@ -822,7 +866,6 @@ srv_inc_activity_count(void);
/**********************************************************************//**
Enqueues a task to server task queue and releases a worker thread, if there
is a suspended one. */
-UNIV_INTERN
void
srv_que_task_enqueue_low(
/*=====================*/
@@ -833,7 +876,6 @@ Check whether any background thread is active. If so, return the thread
type.
@return SRV_NONE if all are are suspended or have exited, thread
type if any are still active. */
-UNIV_INTERN
enum srv_thread_type
srv_get_active_thread_type(void);
/*============================*/
@@ -842,8 +884,7 @@ extern "C" {
/*********************************************************************//**
A thread which prints the info output by various InnoDB monitors.
-@return a dummy parameter */
-UNIV_INTERN
+@return a dummy parameter */
os_thread_ret_t
DECLARE_THREAD(srv_monitor_thread)(
/*===============================*/
@@ -852,8 +893,7 @@ DECLARE_THREAD(srv_monitor_thread)(
/*********************************************************************//**
The master thread controlling the server.
-@return a dummy parameter */
-UNIV_INTERN
+@return a dummy parameter */
os_thread_ret_t
DECLARE_THREAD(srv_master_thread)(
/*==============================*/
@@ -863,8 +903,7 @@ DECLARE_THREAD(srv_master_thread)(
/*************************************************************************
A thread which prints warnings about semaphore waits which have lasted
too long. These can be used to track bugs which cause hangs.
-@return a dummy parameter */
-UNIV_INTERN
+@return a dummy parameter */
os_thread_ret_t
DECLARE_THREAD(srv_error_monitor_thread)(
/*=====================================*/
@@ -873,8 +912,7 @@ DECLARE_THREAD(srv_error_monitor_thread)(
/*********************************************************************//**
Purge coordinator thread that schedules the purge tasks.
-@return a dummy parameter */
-UNIV_INTERN
+@return a dummy parameter */
os_thread_ret_t
DECLARE_THREAD(srv_purge_coordinator_thread)(
/*=========================================*/
@@ -883,8 +921,7 @@ DECLARE_THREAD(srv_purge_coordinator_thread)(
/*********************************************************************//**
Worker thread that reads tasks from the work queue and executes them.
-@return a dummy parameter */
-UNIV_INTERN
+@return a dummy parameter */
os_thread_ret_t
DECLARE_THREAD(srv_worker_thread)(
/*==============================*/
@@ -894,8 +931,7 @@ DECLARE_THREAD(srv_worker_thread)(
/**********************************************************************//**
Get count of tasks in the queue.
-@return number of tasks in queue */
-UNIV_INTERN
+@return number of tasks in queue */
ulint
srv_get_task_queue_length(void);
/*===========================*/
@@ -907,17 +943,44 @@ srv_get_task_queue_length(void);
void
srv_release_threads(enum srv_thread_type type, ulint n);
-/** Wake up the purge threads. */
-UNIV_INTERN
+/** Wakeup the purge threads. */
void
srv_purge_wakeup();
-/** Check whether given space id is undo tablespace id
-@param[in] space_id space id to check
-@return true if it is undo tablespace else false. */
+/** Shut down the purge threads. */
+void srv_purge_shutdown();
+
+/** Check if tablespace is being truncated.
+(Ignore system-tablespace as we don't re-create the tablespace
+and so some of the action that are suppressed by this function
+for independent tablespace are not applicable to system-tablespace).
+@param space_id space_id to check for truncate action
+@return true if being truncated, false if not being
+ truncated or tablespace is system-tablespace. */
bool
-srv_is_undo_tablespace(
- ulint space_id);
+srv_is_tablespace_truncated(ulint space_id);
+
+/** Check if tablespace was truncated.
+@param[in] space space object to check for truncate action
+@return true if tablespace was truncated and we still have an active
+MLOG_TRUNCATE REDO log record. */
+bool
+srv_was_tablespace_truncated(const fil_space_t* space);
+
+#ifdef UNIV_DEBUG
+/** Disables master thread. It's used by:
+ SET GLOBAL innodb_master_thread_disabled_debug = 1 (0).
+@param[in] thd thread handle
+@param[in] var pointer to system variable
+@param[out] var_ptr where the formal string goes
+@param[in] save immediate result from check function */
+void
+srv_master_thread_disabled_debug_update(
+ THD* thd,
+ struct st_mysql_sys_var* var,
+ void* var_ptr,
+ const void* save);
+#endif /* UNIV_DEBUG */
/** Status variables to be passed to MySQL */
struct export_var_t{
@@ -929,8 +992,9 @@ struct export_var_t{
ulint innodb_data_writes; /*!< I/O write requests */
ulint innodb_data_written; /*!< Data bytes written */
ulint innodb_data_reads; /*!< I/O read requests */
- char innodb_buffer_pool_dump_status[512];/*!< Buf pool dump status */
- char innodb_buffer_pool_load_status[512];/*!< Buf pool load status */
+ char innodb_buffer_pool_dump_status[OS_FILE_MAX_PATH + 128];/*!< Buf pool dump status */
+ char innodb_buffer_pool_load_status[OS_FILE_MAX_PATH + 128];/*!< Buf pool load status */
+ char innodb_buffer_pool_resize_status[512];/*!< Buf pool resize status */
ulint innodb_buffer_pool_pages_total; /*!< Buffer pool size */
ulint innodb_buffer_pool_pages_data; /*!< Data pages */
ulint innodb_buffer_pool_bytes_data; /*!< File bytes used */
@@ -966,7 +1030,7 @@ struct export_var_t{
ulint innodb_pages_written; /*!< buf_pool->stat.n_pages_written */
ulint innodb_row_lock_waits; /*!< srv_n_lock_wait_count */
ulint innodb_row_lock_current_waits; /*!< srv_n_lock_wait_current_count */
- ib_int64_t innodb_row_lock_time; /*!< srv_n_lock_wait_time
+ int64_t innodb_row_lock_time; /*!< srv_n_lock_wait_time
/ 1000 */
ulint innodb_row_lock_time_avg; /*!< srv_n_lock_wait_time
/ 1000
@@ -981,10 +1045,12 @@ struct export_var_t{
ulint innodb_system_rows_inserted; /*!< srv_n_system_rows_inserted */
ulint innodb_system_rows_updated; /*!< srv_n_system_rows_updated */
ulint innodb_system_rows_deleted; /*!< srv_n_system_rows_deleted*/
- ulint innodb_num_open_files; /*!< fil_n_file_opened */
+ ulint innodb_num_open_files; /*!< fil_system_t::n_open */
ulint innodb_truncated_status_writes; /*!< srv_truncated_status_writes */
ulint innodb_available_undo_logs; /*!< srv_available_undo_logs
*/
+ /** Number of undo tablespace truncation operations */
+ ulong innodb_undo_truncations;
ulint innodb_defragment_compression_failures; /*!< Number of
defragment re-compression
failures */
@@ -999,46 +1065,24 @@ struct export_var_t{
of used row log buffer */
ulint innodb_onlineddl_pct_progress; /*!< Online alter progress */
-#ifdef UNIV_DEBUG
- ulint innodb_purge_trx_id_age; /*!< rw_max_trx_id - purged trx_id */
- ulint innodb_purge_view_trx_id_age; /*!< rw_max_trx_id
- - purged view's min trx_id */
-#endif /* UNIV_DEBUG */
-
- ib_int64_t innodb_page_compression_saved;/*!< Number of bytes saved
- by page compression */
- ib_int64_t innodb_page_compression_trim_sect512;/*!< Number of 512b TRIM
- by page compression */
- ib_int64_t innodb_page_compression_trim_sect1024;/*!< Number of 1K TRIM
- by page compression */
- ib_int64_t innodb_page_compression_trim_sect2048;/*!< Number of 2K TRIM
- by page compression */
- ib_int64_t innodb_page_compression_trim_sect4096;/*!< Number of 4K byte TRIM
- by page compression */
- ib_int64_t innodb_page_compression_trim_sect8192;/*!< Number of 8K TRIM
- by page compression */
- ib_int64_t innodb_page_compression_trim_sect16384;/*!< Number of 16K TRIM
- by page compression */
- ib_int64_t innodb_page_compression_trim_sect32768;/*!< Number of 32K TRIM
+ int64_t innodb_page_compression_saved;/*!< Number of bytes saved
by page compression */
- ib_int64_t innodb_index_pages_written; /*!< Number of index pages
+ int64_t innodb_index_pages_written; /*!< Number of index pages
written */
- ib_int64_t innodb_non_index_pages_written; /*!< Number of non index pages
+ int64_t innodb_non_index_pages_written; /*!< Number of non index pages
written */
- ib_int64_t innodb_pages_page_compressed;/*!< Number of pages
+ int64_t innodb_pages_page_compressed;/*!< Number of pages
compressed by page compression */
- ib_int64_t innodb_page_compressed_trim_op;/*!< Number of TRIM operations
+ int64_t innodb_page_compressed_trim_op;/*!< Number of TRIM operations
induced by page compression */
- ib_int64_t innodb_page_compressed_trim_op_saved;/*!< Number of TRIM operations
- saved by page compression */
- ib_int64_t innodb_pages_page_decompressed;/*!< Number of pages
+ int64_t innodb_pages_page_decompressed;/*!< Number of pages
decompressed by page
compression */
- ib_int64_t innodb_pages_page_compression_error;/*!< Number of page
+ int64_t innodb_pages_page_compression_error;/*!< Number of page
compression errors */
- ib_int64_t innodb_pages_encrypted; /*!< Number of pages
+ int64_t innodb_pages_encrypted; /*!< Number of pages
encrypted */
- ib_int64_t innodb_pages_decrypted; /*!< Number of pages
+ int64_t innodb_pages_decrypted; /*!< Number of pages
decrypted */
/*!< Number of merge blocks encrypted */
@@ -1050,6 +1094,12 @@ struct export_var_t{
/*!< Number of row log blocks decrypted */
ib_int64_t innodb_n_rowlog_blocks_decrypted;
+ /* Number of temporary tablespace pages encrypted */
+ ib_int64_t innodb_n_temp_blocks_encrypted;
+
+ /* Number of temporary tablespace pages decrypted */
+ ib_int64_t innodb_n_temp_blocks_decrypted;
+
ulint innodb_sec_rec_cluster_reads; /*!< srv_sec_rec_cluster_reads */
ulint innodb_sec_rec_cluster_reads_avoided;/*!< srv_sec_rec_cluster_reads_avoided */
@@ -1058,8 +1108,8 @@ struct export_var_t{
ulint innodb_encryption_rotation_pages_modified;
ulint innodb_encryption_rotation_pages_flushed;
ulint innodb_encryption_rotation_estimated_iops;
- ib_int64_t innodb_encryption_key_requests;
- ib_int64_t innodb_key_rotation_list_length;
+ int64_t innodb_encryption_key_requests;
+ int64_t innodb_key_rotation_list_length;
ulint innodb_scrub_page_reorganizations;
ulint innodb_scrub_page_splits;
@@ -1067,6 +1117,7 @@ struct export_var_t{
ulint innodb_scrub_page_split_failures_out_of_filespace;
ulint innodb_scrub_page_split_failures_missing_index;
ulint innodb_scrub_page_split_failures_unknown;
+ int64_t innodb_scrub_log;
};
/** Thread slot in the thread table. */
@@ -1096,21 +1147,23 @@ struct srv_slot_t{
to do */
que_thr_t* thr; /*!< suspended query thread
(only used for user threads) */
+#ifdef UNIV_DEBUG
+ struct debug_sync_t {
+ UT_LIST_NODE_T(debug_sync_t) debug_sync_list;
+ };
+ UT_LIST_BASE_NODE_T(debug_sync_t) debug_sync;
+ rw_lock_t debug_sync_lock;
+#endif
};
-#else /* !UNIV_HOTBACKUP */
-# define srv_use_adaptive_hash_indexes FALSE
-# define srv_use_native_aio FALSE
-# define srv_numa_interleave FALSE
-# define srv_force_recovery 0UL
-# define srv_set_io_thread_op_info(t,info) ((void) 0)
-# define srv_reset_io_thread_op_info() ((void) 0)
-# define srv_is_being_started 0
-# define srv_win_file_flush_method SRV_WIN_IO_UNBUFFERED
-# define srv_unix_file_flush_method SRV_UNIX_O_DSYNC
-# define srv_start_raw_disk_in_use 0
-# define srv_file_per_table 1
-#endif /* !UNIV_HOTBACKUP */
+#ifdef UNIV_DEBUG
+typedef void srv_slot_callback_t(srv_slot_t*, const void*);
+
+void srv_for_each_thread(srv_thread_type type,
+ srv_slot_callback_t callback,
+ const void *arg);
+#endif
+
#ifdef WITH_WSREP
UNIV_INTERN
void
diff --git a/storage/innobase/include/srv0srv.ic b/storage/innobase/include/srv0srv.ic
deleted file mode 100644
index e0cb8f54607..00000000000
--- a/storage/innobase/include/srv0srv.ic
+++ /dev/null
@@ -1,24 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1995, 2009, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/srv0srv.ic
-Server main program
-
-Created 10/4/1995 Heikki Tuuri
-*******************************************************/
diff --git a/storage/innobase/include/srv0start.h b/storage/innobase/include/srv0start.h
index 89c8b50b206..8b39733335d 100644
--- a/storage/innobase/include/srv0start.h
+++ b/storage/innobase/include/srv0start.h
@@ -27,69 +27,42 @@ Created 10/10/1995 Heikki Tuuri
#ifndef srv0start_h
#define srv0start_h
-#include "univ.i"
#include "log0log.h"
#include "ut0byte.h"
-#ifdef __WIN__
-#define SRV_PATH_SEPARATOR '\\'
-#else
-#define SRV_PATH_SEPARATOR '/'
-#endif
+// Forward declaration
+struct dict_table_t;
+
+/** If buffer pool is less than the size,
+only one buffer pool instance is used. */
+#define BUF_POOL_SIZE_THRESHOLD (1024 * 1024 * 1024)
+
+/** Open the configured number of dedicated undo tablespaces.
+@param[in] create_new_db whether the database is being initialized
+@return DB_SUCCESS or error code */
+dberr_t
+srv_undo_tablespaces_init(bool create_new_db);
-/*********************************************************************//**
-Normalizes a directory path for Windows: converts slashes to backslashes.
-*/
-UNIV_INTERN
-void
-srv_normalize_path_for_win(
-/*=======================*/
- char* str); /*!< in/out: null-terminated character string */
-/*********************************************************************//**
-Reads the data files and their sizes from a character string given in
-the .cnf file.
-@return TRUE if ok, FALSE on parse error */
-UNIV_INTERN
-ibool
-srv_parse_data_file_paths_and_sizes(
-/*================================*/
- char* str); /*!< in/out: the data file path string */
-/*********************************************************************//**
-Frees the memory allocated by srv_parse_data_file_paths_and_sizes()
-and srv_parse_log_group_home_dirs(). */
-UNIV_INTERN
-void
-srv_free_paths_and_sizes(void);
-/*==========================*/
-/*********************************************************************//**
-Adds a slash or a backslash to the end of a string if it is missing
-and the string is not empty.
-@return string which has the separator if the string is not empty */
-UNIV_INTERN
-char*
-srv_add_path_separator_if_needed(
-/*=============================*/
- char* str); /*!< in: null-terminated character string */
-#ifndef UNIV_HOTBACKUP
/****************************************************************//**
Starts Innobase and creates a new database if database files
are not found and the user wants.
-@return DB_SUCCESS or error code */
-UNIV_INTERN
+@return DB_SUCCESS or error code */
dberr_t
innobase_start_or_create_for_mysql();
/** Shut down InnoDB. */
-UNIV_INTERN
void
innodb_shutdown();
+/** Shut down background threads that can generate undo log. */
+void
+srv_shutdown_bg_undo_sources();
+
/*************************************************************//**
Copy the file path component of the physical file to parameter. It will
copy up to and including the terminating path separator.
@return number of bytes copied or ULINT_UNDEFINED if destination buffer
is smaller than the path to be copied. */
-UNIV_INTERN
ulint
srv_path_copy(
/*==========*/
@@ -99,43 +72,46 @@ srv_path_copy(
const char* table_name) /*!< in: source table name */
MY_ATTRIBUTE((nonnull, warn_unused_result));
-/*****************************************************************//**
-Get the meta-data filename from the table name. */
-UNIV_INTERN
+/** Get the meta-data filename from the table name for a
+single-table tablespace.
+@param[in] table table object
+@param[out] filename filename
+@param[in] max_len filename max length */
void
srv_get_meta_data_filename(
-/*======================*/
- dict_table_t* table, /*!< in: table */
- char* filename, /*!< out: filename */
- ulint max_len) /*!< in: filename max length */
- MY_ATTRIBUTE((nonnull));
+ dict_table_t* table,
+ char* filename,
+ ulint max_len);
+
+/** Get the encryption-data filename from the table name for a
+single-table tablespace.
+@param[in] table table object
+@param[out] filename filename
+@param[in] max_len filename max length */
+void
+srv_get_encryption_data_filename(
+ dict_table_t* table,
+ char* filename,
+ ulint max_len);
/** Log sequence number at shutdown */
extern lsn_t srv_shutdown_lsn;
/** Log sequence number immediately after startup */
extern lsn_t srv_start_lsn;
-#ifdef HAVE_DARWIN_THREADS
-/** TRUE if the F_FULLFSYNC option is available */
-extern ibool srv_have_fullfsync;
-#endif
-
/** TRUE if the server is being started */
-extern ibool srv_is_being_started;
-/** TRUE if the server was successfully started */
-extern ibool srv_was_started;
+extern bool srv_is_being_started;
+/** TRUE if SYS_TABLESPACES is available for lookups */
+extern bool srv_sys_tablespaces_open;
/** TRUE if the server is being started, before rolling back any
incomplete transactions */
-extern ibool srv_startup_is_before_trx_rollback_phase;
+extern bool srv_startup_is_before_trx_rollback_phase;
/** TRUE if a raw partition is in use */
extern ibool srv_start_raw_disk_in_use;
-/** Undo tablespaces starts with space_id. */
-extern ulint srv_undo_space_id_start;
-
/** Shutdown state */
-enum srv_shutdown_state {
+enum srv_shutdown_t {
SRV_SHUTDOWN_NONE = 0, /*!< Database running normally */
SRV_SHUTDOWN_CLEANUP, /*!< Cleaning up in
logs_empty_and_mark_files_at_shutdown() */
@@ -155,10 +131,8 @@ extern bool srv_undo_sources;
/** At a shutdown this value climbs from SRV_SHUTDOWN_NONE to
SRV_SHUTDOWN_CLEANUP and then to SRV_SHUTDOWN_LAST_PHASE, and so on */
-extern enum srv_shutdown_state srv_shutdown_state;
-#endif /* !UNIV_HOTBACKUP */
-
-/** Log 'spaces' have id's >= this */
-#define SRV_LOG_SPACE_FIRST_ID 0xFFFFFFF0UL
+extern enum srv_shutdown_t srv_shutdown_state;
+/** Files comprising the system tablespace */
+extern pfs_os_file_t files[1000];
#endif
diff --git a/storage/innobase/include/sync0arr.h b/storage/innobase/include/sync0arr.h
index e352cd27558..7a8366b933b 100644
--- a/storage/innobase/include/sync0arr.h
+++ b/storage/innobase/include/sync0arr.h
@@ -1,6 +1,7 @@
/*****************************************************************************
Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2015, 2017, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -27,12 +28,10 @@ Created 9/5/1995 Heikki Tuuri
#define sync0arr_h
#include "univ.i"
-#include "ut0lst.h"
-#include "ut0mem.h"
-#include "os0thread.h"
-/** Synchonization cell */
+/** Synchronization wait array cell */
struct sync_cell_t;
+
/** Synchronization wait array */
struct sync_array_t;
@@ -42,112 +41,79 @@ in the instance for waiting for an object. The event of the cell is
reset to nonsignalled state.
If reserving cell of the instance fails, try to get another new
instance until we can reserve an empty cell of it.
-@return the instance found, never NULL. */
+@return the sync array found, never NULL. */
UNIV_INLINE
sync_array_t*
sync_array_get_and_reserve_cell(
-/*============================*/
void* object, /*!< in: pointer to the object to wait for */
ulint type, /*!< in: lock request type */
const char* file, /*!< in: file where requested */
- ulint line, /*!< in: line where requested */
- ulint* index); /*!< out: index of the reserved cell */
+ unsigned line, /*!< in: line where requested */
+ sync_cell_t** cell); /*!< out: the cell reserved, never NULL */
/******************************************************************//**
Reserves a wait array cell for waiting for an object.
-The event of the cell is reset to nonsignalled state.
-@return true if free cell is found, otherwise false */
-UNIV_INTERN
-bool
+The event of the cell is reset to nonsignalled state. */
+sync_cell_t*
sync_array_reserve_cell(
-/*====================*/
sync_array_t* arr, /*!< in: wait array */
void* object, /*!< in: pointer to the object to wait for */
ulint type, /*!< in: lock request type */
const char* file, /*!< in: file where requested */
- ulint line, /*!< in: line where requested */
- ulint* index); /*!< out: index of the reserved cell */
+ unsigned line); /*!< in: line where requested */
+
/******************************************************************//**
This function should be called when a thread starts to wait on
a wait array cell. In the debug version this function checks
if the wait for a semaphore will result in a deadlock, in which
case prints info and asserts. */
-UNIV_INTERN
void
sync_array_wait_event(
-/*==================*/
sync_array_t* arr, /*!< in: wait array */
- ulint index); /*!< in: index of the reserved cell */
+ sync_cell_t*& cell); /*!< in: the reserved cell */
+
/******************************************************************//**
Frees the cell. NOTE! sync_array_wait_event frees the cell
automatically! */
-UNIV_INTERN
void
sync_array_free_cell(
-/*=================*/
sync_array_t* arr, /*!< in: wait array */
- ulint index); /*!< in: index of the cell in array */
+ sync_cell_t*& cell); /*!< in: the reserved cell */
+
/**********************************************************************//**
Note that one of the wait objects was signalled. */
-UNIV_INTERN
void
-sync_array_object_signalled(void);
-/*=============================*/
+sync_array_object_signalled();
/**********************************************************************//**
-If the wakeup algorithm does not work perfectly at semaphore relases,
-this function will do the waking (see the comment in mutex_exit). This
-function should be called about every 1 second in the server. */
-UNIV_INTERN
-void
-sync_arr_wake_threads_if_sema_free(void);
-/*====================================*/
-/**********************************************************************//**
Prints warnings of long semaphore waits to stderr.
-@return TRUE if fatal semaphore wait threshold was exceeded */
-UNIV_INTERN
+@return TRUE if fatal semaphore wait threshold was exceeded */
ibool
sync_array_print_long_waits(
-/*========================*/
os_thread_id_t* waiter, /*!< out: longest waiting thread */
- const void** sema) /*!< out: longest-waited-for semaphore */
- MY_ATTRIBUTE((nonnull));
-/********************************************************************//**
-Validates the integrity of the wait array. Checks
-that the number of reserved cells equals the count variable. */
-UNIV_INTERN
-void
-sync_array_validate(
-/*================*/
- sync_array_t* arr); /*!< in: sync wait array */
+ const void** sema); /*!< out: longest-waited-for semaphore */
+
/**********************************************************************//**
Prints info of the wait array. */
-UNIV_INTERN
void
sync_array_print(
-/*=============*/
FILE* file); /*!< in: file where to print */
/**********************************************************************//**
Create the primary system wait array(s), they are protected by an OS mutex */
-UNIV_INTERN
void
sync_array_init(
-/*============*/
ulint n_threads); /*!< in: Number of slots to create */
+
/**********************************************************************//**
Close sync array wait sub-system. */
-UNIV_INTERN
void
-sync_array_close(void);
-/*==================*/
+sync_array_close();
/**********************************************************************//**
Get an instance of the sync wait array. */
-UNIV_INTERN
+UNIV_INLINE
sync_array_t*
-sync_array_get(void);
-/*================*/
-
+sync_array_get();
/**********************************************************************//**
Prints info of the wait array without using any mutexes/semaphores. */
UNIV_INTERN
@@ -164,8 +130,6 @@ sync_array_get_nth_cell(
sync_array_t* arr, /*!< in: sync array */
ulint n); /*!< in: index */
-#ifndef UNIV_NONINL
#include "sync0arr.ic"
-#endif
-#endif
+#endif /* sync0arr_h */
diff --git a/storage/innobase/include/sync0arr.ic b/storage/innobase/include/sync0arr.ic
index fab2fdde6f3..9163d5b6614 100644
--- a/storage/innobase/include/sync0arr.ic
+++ b/storage/innobase/include/sync0arr.ic
@@ -1,6 +1,7 @@
/*****************************************************************************
-Copyright (c) 1995, 2013, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1995, 2015, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2017, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -25,8 +26,27 @@ Inline code
Created 9/5/1995 Heikki Tuuri
*******************************************************/
-/** User configured sync array size */
-extern ulong srv_sync_array_size;
+extern ulint sync_array_size;
+extern sync_array_t** sync_wait_array;
+
+#include "ut0counter.h"
+
+/**********************************************************************//**
+Get an instance of the sync wait array.
+@return an instance of the sync wait array. */
+
+UNIV_INLINE
+sync_array_t*
+sync_array_get()
+/*============*/
+{
+ if (sync_array_size <= 1) {
+ return(sync_wait_array[0]);
+ }
+
+ return(sync_wait_array[default_indexer_t<>::get_rnd_index()
+ % sync_array_size]);
+}
/******************************************************************//**
Get an instance of the sync wait array and reserve a wait array cell
@@ -34,31 +54,33 @@ in the instance for waiting for an object. The event of the cell is
reset to nonsignalled state.
If reserving cell of the instance fails, try to get another new
instance until we can reserve an empty cell of it.
-@return the instance found, never NULL. */
+@return the sync array reserved, never NULL. */
UNIV_INLINE
sync_array_t*
sync_array_get_and_reserve_cell(
/*============================*/
- void* object, /*!< in: pointer to the object to wait for */
+ void* object, /*!< in: pointer to the object to wait for */
ulint type, /*!< in: lock request type */
const char* file, /*!< in: file where requested */
- ulint line, /*!< in: line where requested */
- ulint* index) /*!< out: index of the reserved cell */
+ unsigned line, /*!< in: line where requested */
+ sync_cell_t** cell) /*!< out: the cell reserved, never NULL */
{
- sync_array_t* sync_arr;
- bool reserved = false;
+ sync_array_t* sync_arr = NULL;
- for (ulint i = 0; i < srv_sync_array_size && !reserved; ++i) {
+ *cell = NULL;
+ for (ulint i = 0; i < sync_array_size && *cell == NULL; ++i) {
+ /* Although the sync_array is get in a random way currently,
+ we still try at most sync_array_size times, in case any
+ of the sync_array we get is full */
sync_arr = sync_array_get();
- reserved = sync_array_reserve_cell(sync_arr, object, type,
- file, line, index);
- }
+ *cell = sync_array_reserve_cell(sync_arr, object, type,
+ file, line);
+ }
/* This won't be true every time, for the loop above may execute
more than srv_sync_array_size times to reserve a cell.
But an assertion here makes the code more solid. */
- ut_a(reserved);
+ ut_a(*cell != NULL);
- return sync_arr;
+ return(sync_arr);
}
-
diff --git a/storage/innobase/include/sync0debug.h b/storage/innobase/include/sync0debug.h
new file mode 100644
index 00000000000..55ea99cd47b
--- /dev/null
+++ b/storage/innobase/include/sync0debug.h
@@ -0,0 +1,105 @@
+/*****************************************************************************
+
+Copyright (c) 2013, 2015, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2017, MariaDB Corporation.
+
+Portions of this file contain modifications contributed and copyrighted by
+Google, Inc. Those modifications are gratefully acknowledged and are described
+briefly in the InnoDB documentation. The contributions by Google are
+incorporated with their permission, and subject to the conditions contained in
+the file COPYING.Google.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/sync0debug.h
+Debug checks for latches, header file
+
+Created 2012-08-21 Sunny Bains
+*******************************************************/
+
+#ifndef sync0debug_h
+#define sync0debug_h
+
+#include "univ.i"
+
+/** Initializes the synchronization data structures. */
+void
+sync_check_init();
+
+/** Free the InnoDB synchronization data structures. */
+void
+sync_check_close();
+
+#ifdef UNIV_DEBUG
+/** Enable sync order checking. */
+void
+sync_check_enable();
+
+/** Check if it is OK to acquire the latch.
+@param[in] latch latch type */
+void
+sync_check_lock_validate(const latch_t* latch);
+
+/** Note that the lock has been granted
+@param[in] latch latch type */
+void
+sync_check_lock_granted(const latch_t* latch);
+
+/** Check if it is OK to acquire the latch.
+@param[in] latch latch type
+@param[in] level the level of the mutex */
+void
+sync_check_lock(const latch_t* latch, latch_level_t level);
+
+/**
+Check if it is OK to re-acquire the lock. */
+void
+sync_check_relock(const latch_t* latch);
+
+/** Removes a latch from the thread level array if it is found there.
+@param[in] latch to unlock */
+void
+sync_check_unlock(const latch_t* latch);
+
+/** Checks if the level array for the current thread contains a
+mutex or rw-latch at the specified level.
+@param[in] level to find
+@return a matching latch, or NULL if not found */
+const latch_t*
+sync_check_find(latch_level_t level);
+
+/** Checks that the level array for the current thread is empty.
+Terminate iteration if the functor returns true.
+@param[in] functor called for each element.
+@return true if the functor returns true for any element */
+bool
+sync_check_iterate(const sync_check_functor_t& functor);
+
+/** Acquires the debug mutex. We cannot use the mutex defined in sync0sync,
+because the debug mutex is also acquired in sync0arr while holding the OS
+mutex protecting the sync array, and the ordinary mutex_enter might
+recursively call routines in sync0arr, leading to a deadlock on the OS
+mutex. */
+void
+rw_lock_debug_mutex_enter();
+
+/** Releases the debug mutex. */
+void
+rw_lock_debug_mutex_exit();
+
+#endif /* UNIV_DEBUG */
+
+#endif /* !sync0debug_h */
diff --git a/storage/innobase/include/sync0policy.h b/storage/innobase/include/sync0policy.h
new file mode 100644
index 00000000000..c7c348bd489
--- /dev/null
+++ b/storage/innobase/include/sync0policy.h
@@ -0,0 +1,540 @@
+/*****************************************************************************
+
+Copyright (c) 2013, 2016, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2017, 2018, MariaDB Corporation.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/******************************************************************//**
+@file include/sync0policy.h
+Policies for mutexes.
+
+Created 2012-08-21 Sunny Bains.
+***********************************************************************/
+
+#ifndef sync0policy_h
+#define sync0policy_h
+
+#include "ut0rnd.h"
+#include "os0thread.h"
+#include "srv0mon.h"
+
+#ifdef UNIV_DEBUG
+
+# define MUTEX_MAGIC_N 979585UL
+
+template <typename Mutex>
+class MutexDebug {
+public:
+
+ /** For passing context to SyncDebug */
+ struct Context : public latch_t {
+
+ /** Constructor */
+ Context()
+ :
+ m_mutex(),
+ m_filename(),
+ m_line(),
+ m_thread_id(os_thread_id_t(ULINT_UNDEFINED))
+ {
+ /* No op */
+ }
+
+ /** Create the context for SyncDebug
+ @param[in] id ID of the latch to track */
+ Context(latch_id_t id)
+ :
+ latch_t(id)
+ {
+ ut_ad(id != LATCH_ID_NONE);
+ }
+
+ /** Set to locked state
+ @param[in] mutex The mutex to acquire
+ @param[in] filename File name from where to acquire
+ @param[in] line Line number in filename */
+ void locked(
+ const Mutex* mutex,
+ const char* filename,
+ unsigned line)
+ UNIV_NOTHROW
+ {
+ m_mutex = mutex;
+
+ m_thread_id = os_thread_get_curr_id();
+
+ m_filename = filename;
+
+ m_line = line;
+ }
+
+ /** Reset to unlock state */
+ void release()
+ UNIV_NOTHROW
+ {
+ m_mutex = NULL;
+
+ m_thread_id = os_thread_id_t(ULINT_UNDEFINED);
+
+ m_filename = NULL;
+
+ m_line = 0;
+ }
+
+ /** Print information about the latch
+ @return the string representation */
+ virtual std::string to_string() const
+ UNIV_NOTHROW
+ {
+ std::ostringstream msg;
+
+ msg << m_mutex->policy().to_string();
+
+ if (os_thread_pf(m_thread_id) != ULINT_UNDEFINED) {
+
+ msg << " addr: " << m_mutex
+ << " acquired: " << locked_from().c_str();
+
+ } else {
+ msg << "Not locked";
+ }
+
+ return(msg.str());
+ }
+
+ /** @return the name of the file and line number in the file
+ from where the mutex was acquired "filename:line" */
+ virtual std::string locked_from() const
+ {
+ std::ostringstream msg;
+
+ msg << sync_basename(m_filename) << ":" << m_line;
+
+ return(std::string(msg.str()));
+ }
+
+ /** Mutex to check for lock order violation */
+ const Mutex* m_mutex;
+
+ /** Filename from where enter was called */
+ const char* m_filename;
+
+ /** Line mumber in filename */
+ unsigned m_line;
+
+ /** Thread ID of the thread that own(ed) the mutex */
+ os_thread_id_t m_thread_id;
+ };
+
+ /** Constructor. */
+ MutexDebug()
+ :
+ m_magic_n(),
+ m_context()
+ UNIV_NOTHROW
+ {
+ /* No op */
+ }
+
+ /* Destructor */
+ virtual ~MutexDebug() { }
+
+ /** Mutex is being destroyed. */
+ void destroy() UNIV_NOTHROW
+ {
+ ut_ad(m_context.m_thread_id == os_thread_id_t(ULINT_UNDEFINED));
+
+ m_magic_n = 0;
+
+ m_context.m_thread_id = 0;
+ }
+
+ /** Called when the mutex is "created". Note: Not from the constructor
+ but when the mutex is initialised.
+ @param[in] id Mutex ID */
+ void init(latch_id_t id)
+ UNIV_NOTHROW;
+
+ /** Called when an attempt is made to lock the mutex
+ @param[in] mutex Mutex instance to be locked
+ @param[in] filename Filename from where it was called
+ @param[in] line Line number from where it was called */
+ void enter(
+ const Mutex* mutex,
+ const char* filename,
+ unsigned line)
+ UNIV_NOTHROW;
+
+ /** Called when the mutex is locked
+ @param[in] mutex Mutex instance that was locked
+ @param[in] filename Filename from where it was called
+ @param[in] line Line number from where it was called */
+ void locked(
+ const Mutex* mutex,
+ const char* filename,
+ unsigned line)
+ UNIV_NOTHROW;
+
+ /** Called when the mutex is released
+ @param[in] mutx Mutex that was released */
+ void release(const Mutex* mutex)
+ UNIV_NOTHROW;
+
+ /** @return true if thread owns the mutex */
+ bool is_owned() const UNIV_NOTHROW
+ {
+ return(os_thread_eq(
+ m_context.m_thread_id,
+ os_thread_get_curr_id()));
+ }
+
+ /** @return the name of the file from the mutex was acquired */
+ const char* get_enter_filename() const
+ UNIV_NOTHROW
+ {
+ return(m_context.m_filename);
+ }
+
+ /** @return the name of the file from the mutex was acquired */
+ unsigned get_enter_line() const
+ UNIV_NOTHROW
+ {
+ return(m_context.m_line);
+ }
+
+ /** @return id of the thread that was trying to acquire the mutex */
+ os_thread_id_t get_thread_id() const
+ UNIV_NOTHROW
+ {
+ return(m_context.m_thread_id);
+ }
+
+ /** Magic number to check for memory corruption. */
+ ulint m_magic_n;
+
+ /** Latch state of the mutex owner */
+ Context m_context;
+};
+#endif /* UNIV_DEBUG */
+
+/* Do nothing */
+template <typename Mutex>
+struct NoPolicy {
+ /** Default constructor. */
+ NoPolicy() { }
+
+ void init(const Mutex&, latch_id_t, const char*, uint32_t)
+ UNIV_NOTHROW { }
+ void destroy() UNIV_NOTHROW { }
+ void enter(const Mutex&, const char*, unsigned line) UNIV_NOTHROW { }
+ void add(uint32_t, uint32_t) UNIV_NOTHROW { }
+ void locked(const Mutex&, const char*, ulint) UNIV_NOTHROW { }
+ void release(const Mutex&) UNIV_NOTHROW { }
+ std::string to_string() const { return(""); };
+ latch_id_t get_id() const;
+};
+
+/** Collect the metrics per mutex instance, no aggregation. */
+template <typename Mutex>
+struct GenericPolicy
+#ifdef UNIV_DEBUG
+: public MutexDebug<Mutex>
+#endif /* UNIV_DEBUG */
+{
+public:
+ typedef Mutex MutexType;
+
+ /** Constructor. */
+ GenericPolicy()
+ UNIV_NOTHROW
+ :
+#ifdef UNIV_DEBUG
+ MutexDebug<MutexType>(),
+#endif /* UNIV_DEBUG */
+ m_count(),
+ m_id()
+ { }
+
+ /** Destructor */
+ ~GenericPolicy() { }
+
+ /** Called when the mutex is "created". Note: Not from the constructor
+ but when the mutex is initialised.
+ @param[in] mutex Mutex instance to track
+ @param[in] id Mutex ID
+ @param[in] filename File where mutex was created
+ @param[in] line Line in filename */
+ void init(
+ const MutexType& mutex,
+ latch_id_t id,
+ const char* filename,
+ uint32_t line)
+ UNIV_NOTHROW
+ {
+ m_id = id;
+
+ latch_meta_t& meta = sync_latch_get_meta(id);
+
+ ut_ad(meta.get_id() == id);
+
+ meta.get_counter()->single_register(&m_count);
+
+ sync_file_created_register(this, filename, uint16_t(line));
+
+ ut_d(MutexDebug<MutexType>::init(m_id));
+ }
+
+ /** Called when the mutex is destroyed. */
+ void destroy()
+ UNIV_NOTHROW
+ {
+ latch_meta_t& meta = sync_latch_get_meta(m_id);
+
+ meta.get_counter()->single_deregister(&m_count);
+
+ sync_file_created_deregister(this);
+
+ ut_d(MutexDebug<MutexType>::destroy());
+ }
+
+ /** Called after a successful mutex acquire.
+ @param[in] n_spins Number of times the thread did
+ spins while trying to acquire the mutex
+ @param[in] n_waits Number of times the thread waited
+ in some type of OS queue */
+ void add(
+ uint32_t n_spins,
+ uint32_t n_waits)
+ UNIV_NOTHROW
+ {
+ /* Currently global on/off. Keeps things simple and fast */
+
+ if (!m_count.m_enabled) {
+
+ return;
+ }
+
+ m_count.m_spins += n_spins;
+ m_count.m_waits += n_waits;
+
+ ++m_count.m_calls;
+ }
+
+ /** Called when an attempt is made to lock the mutex
+ @param[in] mutex Mutex instance to be locked
+ @param[in] filename Filename from where it was called
+ @param[in] line Line number from where it was called */
+ void enter(
+ const MutexType& mutex,
+ const char* filename,
+ unsigned line)
+ UNIV_NOTHROW
+ {
+ ut_d(MutexDebug<MutexType>::enter(&mutex, filename, line));
+ }
+
+ /** Called when the mutex is locked
+ @param[in] mutex Mutex instance that is locked
+ @param[in] filename Filename from where it was called
+ @param[in] line Line number from where it was called */
+ void locked(
+ const MutexType& mutex,
+ const char* filename,
+ unsigned line)
+ UNIV_NOTHROW
+ {
+ ut_d(MutexDebug<MutexType>::locked(&mutex, filename, line));
+ }
+
+ /** Called when the mutex is released
+ @param[in] mutex Mutex instance that is released */
+ void release(const MutexType& mutex)
+ UNIV_NOTHROW
+ {
+ ut_d(MutexDebug<MutexType>::release(&mutex));
+ }
+
+ /** Print the information about the latch
+ @return the string representation */
+ std::string print() const
+ UNIV_NOTHROW;
+
+ /** @return the latch ID */
+ latch_id_t get_id() const
+ UNIV_NOTHROW
+ {
+ return(m_id);
+ }
+
+ /** @return the string representation */
+ std::string to_string() const;
+
+private:
+ typedef latch_meta_t::CounterType Counter;
+
+ /** The user visible counters, registered with the meta-data. */
+ Counter::Count m_count;
+
+ /** Latch meta data ID */
+ latch_id_t m_id;
+};
+
+/** Track agregate metrics policy, used by the page mutex. There are just
+too many of them to count individually. */
+template <typename Mutex>
+class BlockMutexPolicy
+#ifdef UNIV_DEBUG
+: public MutexDebug<Mutex>
+#endif /* UNIV_DEBUG */
+{
+public:
+ typedef Mutex MutexType;
+ typedef typename latch_meta_t::CounterType::Count Count;
+
+ /** Default constructor. */
+ BlockMutexPolicy()
+ :
+#ifdef UNIV_DEBUG
+ MutexDebug<MutexType>(),
+#endif /* UNIV_DEBUG */
+ m_count(),
+ m_id()
+ {
+ /* Do nothing */
+ }
+
+ /** Destructor */
+ ~BlockMutexPolicy() { }
+
+ /** Called when the mutex is "created". Note: Not from the constructor
+ but when the mutex is initialised.
+ @param[in] mutex Mutex instance to track
+ @param[in] id Mutex ID
+ @param[in] filename File where mutex was created
+ @param[in] line Line in filename */
+ void init(
+ const MutexType& mutex,
+ latch_id_t id,
+ const char* filename,
+ uint32_t line)
+ UNIV_NOTHROW
+ {
+ /* It can be LATCH_ID_BUF_BLOCK_MUTEX or
+ LATCH_ID_BUF_POOL_ZIP. Unfortunately, they
+ are mapped to the same mutex type in the
+ buffer pool code. */
+
+ m_id = id;
+
+ latch_meta_t& meta = sync_latch_get_meta(m_id);
+
+ ut_ad(meta.get_id() == id);
+
+ m_count = meta.get_counter()->sum_register();
+
+ ut_d(MutexDebug<MutexType>::init(m_id));
+ }
+
+ /** Called when the mutex is destroyed. */
+ void destroy()
+ UNIV_NOTHROW
+ {
+ m_count = NULL;
+ ut_d(MutexDebug<MutexType>::destroy());
+ }
+
+ /** Called after a successful mutex acquire.
+ @param[in] n_spins Number of times the thread did
+ spins while trying to acquire the mutex
+ @param[in] n_waits Number of times the thread waited
+ in some type of OS queue */
+ void add(
+ uint32_t n_spins,
+ uint32_t n_waits)
+ UNIV_NOTHROW
+ {
+ if (!m_count->m_enabled) {
+
+ return;
+ }
+
+ m_count->m_spins += n_spins;
+ m_count->m_waits += n_waits;
+
+ ++m_count->m_calls;
+ }
+
+ /** Called when the mutex is locked
+ @param[in] mutex Mutex instance that is locked
+ @param[in] filename Filename from where it was called
+ @param[in] line Line number from where it was called */
+ void locked(
+ const MutexType& mutex,
+ const char* filename,
+ unsigned line)
+ UNIV_NOTHROW
+ {
+ ut_d(MutexDebug<MutexType>::locked(&mutex, filename, line));
+ }
+
+ /** Called when the mutex is released
+ @param[in] mutex Mutex instance that is released */
+ void release(const MutexType& mutex)
+ UNIV_NOTHROW
+ {
+ ut_d(MutexDebug<MutexType>::release(&mutex));
+ }
+
+ /** Called when an attempt is made to lock the mutex
+ @param[in] mutex Mutex instance to be locked
+ @param[in] filename Filename from where it was called
+ @param[in] line Line number from where it was called */
+ void enter(
+ const MutexType& mutex,
+ const char* filename,
+ unsigned line)
+ UNIV_NOTHROW
+ {
+ ut_d(MutexDebug<MutexType>::enter(&mutex, filename, line));
+ }
+
+ /** Print the information about the latch
+ @return the string representation */
+ std::string print() const
+ UNIV_NOTHROW;
+
+ /** @return the latch ID */
+ latch_id_t get_id() const
+ {
+ return(m_id);
+ }
+
+ /** @return the string representation */
+ std::string to_string() const;
+
+private:
+ typedef latch_meta_t::CounterType Counter;
+
+ /** The user visible counters, registered with the meta-data. */
+ Counter::Count* m_count;
+
+ /** Latch meta data ID */
+ latch_id_t m_id;
+};
+
+#include "sync0policy.ic"
+
+#endif /* sync0policy_h */
diff --git a/storage/innobase/include/sync0policy.ic b/storage/innobase/include/sync0policy.ic
new file mode 100644
index 00000000000..b86dee0a3b8
--- /dev/null
+++ b/storage/innobase/include/sync0policy.ic
@@ -0,0 +1,101 @@
+/*****************************************************************************
+
+Copyright (c) 2013, 2015, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2017, MariaDB Corporation.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/******************************************************************//**
+@file include/sync0policy.ic
+Policy for mutexes.
+
+Created 2012-08-21 Sunny Bains.
+***********************************************************************/
+
+#include "sync0debug.h"
+
+template <typename Mutex>
+std::string GenericPolicy<Mutex>::to_string() const
+{
+ return(sync_mutex_to_string(get_id(), sync_file_created_get(this)));
+}
+
+template <typename Mutex>
+std::string BlockMutexPolicy<Mutex>::to_string() const
+{
+ /* I don't think it makes sense to keep track of the file name
+ and line number for each block mutex. Too much of overhead. Use the
+ latch id to figure out the location from the source. */
+ return(sync_mutex_to_string(get_id(), "buf0buf.cc:0"));
+}
+
+#ifdef UNIV_DEBUG
+
+template <typename Mutex>
+void MutexDebug<Mutex>::init(latch_id_t id)
+ UNIV_NOTHROW
+{
+ m_context.m_id = id;
+
+ m_context.release();
+
+ m_magic_n = MUTEX_MAGIC_N;
+}
+
+template <typename Mutex>
+void MutexDebug<Mutex>::enter(
+ const Mutex* mutex,
+ const char* name,
+ unsigned line)
+ UNIV_NOTHROW
+{
+ ut_ad(!is_owned());
+
+ Context context(m_context.get_id());
+
+ context.locked(mutex, name, line);
+
+ /* Check for latch order violation. */
+
+ sync_check_lock_validate(&context);
+}
+
+template <typename Mutex>
+void MutexDebug<Mutex>::locked(
+ const Mutex* mutex,
+ const char* name,
+ unsigned line)
+ UNIV_NOTHROW
+{
+ ut_ad(!is_owned());
+ ut_ad(m_context.m_thread_id == os_thread_id_t(ULINT_UNDEFINED));
+
+ m_context.locked(mutex, name, line);
+
+ sync_check_lock_granted(&m_context);
+}
+
+template <typename Mutex>
+void MutexDebug<Mutex>::release(const Mutex* mutex)
+ UNIV_NOTHROW
+{
+ ut_ad(is_owned());
+
+ m_context.release();
+
+ sync_check_unlock(&m_context);
+}
+
+#endif /* UNIV_DEBUG */
diff --git a/storage/innobase/include/sync0rw.h b/storage/innobase/include/sync0rw.h
index 54b6d46c58d..e0451d66de1 100644
--- a/storage/innobase/include/sync0rw.h
+++ b/storage/innobase/include/sync0rw.h
@@ -2,6 +2,7 @@
Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2008, Google Inc.
+Copyright (c) 2017, 2019, MariaDB Corporation.
Portions of this file contain modifications contributed and copyrighted by
Google, Inc. Those modifications are gratefully acknowledged and are described
@@ -33,117 +34,85 @@ Created 9/11/1995 Heikki Tuuri
#ifndef sync0rw_h
#define sync0rw_h
-#include "univ.i"
-#ifndef UNIV_HOTBACKUP
-#include "ut0lst.h"
-#include "ut0counter.h"
-#include "sync0sync.h"
-#include "os0sync.h"
-
-/** Enable semaphore request instrumentation */
-extern my_bool srv_instrument_semaphores;
-
-/* The following undef is to prevent a name conflict with a macro
-in MySQL: */
-#undef rw_lock_t
-#endif /* !UNIV_HOTBACKUP */
+#include "os0event.h"
+#include "ut0mutex.h"
/** Counters for RW locks. */
struct rw_lock_stats_t {
- typedef ib_counter_t<ib_int64_t, IB_N_SLOTS> ib_int64_counter_t;
+ typedef ib_counter_t<int64_t, IB_N_SLOTS> int64_counter_t;
/** number of spin waits on rw-latches,
resulted during shared (read) locks */
- ib_int64_counter_t rw_s_spin_wait_count;
+ int64_counter_t rw_s_spin_wait_count;
/** number of spin loop rounds on rw-latches,
resulted during shared (read) locks */
- ib_int64_counter_t rw_s_spin_round_count;
+ int64_counter_t rw_s_spin_round_count;
/** number of OS waits on rw-latches,
resulted during shared (read) locks */
- ib_int64_counter_t rw_s_os_wait_count;
-
- /** number of unlocks (that unlock shared locks),
- set only when UNIV_SYNC_PERF_STAT is defined */
- ib_int64_counter_t rw_s_exit_count;
+ int64_counter_t rw_s_os_wait_count;
/** number of spin waits on rw-latches,
resulted during exclusive (write) locks */
- ib_int64_counter_t rw_x_spin_wait_count;
+ int64_counter_t rw_x_spin_wait_count;
/** number of spin loop rounds on rw-latches,
resulted during exclusive (write) locks */
- ib_int64_counter_t rw_x_spin_round_count;
+ int64_counter_t rw_x_spin_round_count;
/** number of OS waits on rw-latches,
resulted during exclusive (write) locks */
- ib_int64_counter_t rw_x_os_wait_count;
+ int64_counter_t rw_x_os_wait_count;
- /** number of unlocks (that unlock exclusive locks),
- set only when UNIV_SYNC_PERF_STAT is defined */
- ib_int64_counter_t rw_x_exit_count;
+ /** number of spin waits on rw-latches,
+ resulted during sx locks */
+ int64_counter_t rw_sx_spin_wait_count;
+
+ /** number of spin loop rounds on rw-latches,
+ resulted during sx locks */
+ int64_counter_t rw_sx_spin_round_count;
+
+ /** number of OS waits on rw-latches,
+ resulted during sx locks */
+ int64_counter_t rw_sx_os_wait_count;
};
-/* Latch types; these are used also in btr0btr.h: keep the numerical values
-smaller than 30 and the order of the numerical values like below! */
-#define RW_S_LATCH 1
-#define RW_X_LATCH 2
-#define RW_NO_LATCH 3
+/* Latch types; these are used also in btr0btr.h and mtr0mtr.h: keep the
+numerical values smaller than 30 (smaller than BTR_MODIFY_TREE and
+MTR_MEMO_MODIFY) and the order of the numerical values like below! and they
+should be 2pow value to be used also as ORed combination of flag. */
+enum rw_lock_type_t {
+ RW_S_LATCH = 1,
+ RW_X_LATCH = 2,
+ RW_SX_LATCH = 4,
+ RW_NO_LATCH = 8
+};
-#ifndef UNIV_HOTBACKUP
-/* We decrement lock_word by this amount for each x_lock. It is also the
+/* We decrement lock_word by X_LOCK_DECR for each x_lock. It is also the
start value for the lock_word, meaning that it limits the maximum number
-of concurrent read locks before the rw_lock breaks. The current value of
-0x00100000 allows 1,048,575 concurrent readers and 2047 recursive writers.*/
-#define X_LOCK_DECR 0x00100000
+of concurrent read locks before the rw_lock breaks. */
+/* We decrement lock_word by X_LOCK_HALF_DECR for sx_lock. */
+#define X_LOCK_DECR 0x20000000
+#define X_LOCK_HALF_DECR 0x10000000
+#ifdef rw_lock_t
+#undef rw_lock_t
+#endif
struct rw_lock_t;
-#ifdef UNIV_SYNC_DEBUG
+
+#ifdef UNIV_DEBUG
struct rw_lock_debug_t;
-#endif /* UNIV_SYNC_DEBUG */
+#endif /* UNIV_DEBUG */
typedef UT_LIST_BASE_NODE_T(rw_lock_t) rw_lock_list_t;
-extern rw_lock_list_t rw_lock_list;
-extern ib_mutex_t rw_lock_list_mutex;
-
-#ifdef UNIV_SYNC_DEBUG
-/* The global mutex which protects debug info lists of all rw-locks.
-To modify the debug info list of an rw-lock, this mutex has to be
-acquired in addition to the mutex protecting the lock. */
-extern os_fast_mutex_t rw_lock_debug_mutex;
-#endif /* UNIV_SYNC_DEBUG */
+extern rw_lock_list_t rw_lock_list;
+extern ib_mutex_t rw_lock_list_mutex;
/** Counters for RW locks. */
extern rw_lock_stats_t rw_lock_stats;
-#ifdef UNIV_PFS_RWLOCK
-/* Following are rwlock keys used to register with MySQL
-performance schema */
-# ifdef UNIV_LOG_ARCHIVE
-extern mysql_pfs_key_t archive_lock_key;
-# endif /* UNIV_LOG_ARCHIVE */
-extern mysql_pfs_key_t btr_search_latch_key;
-extern mysql_pfs_key_t buf_block_lock_key;
-# ifdef UNIV_SYNC_DEBUG
-extern mysql_pfs_key_t buf_block_debug_latch_key;
-# endif /* UNIV_SYNC_DEBUG */
-extern mysql_pfs_key_t dict_operation_lock_key;
-extern mysql_pfs_key_t checkpoint_lock_key;
-extern mysql_pfs_key_t fil_space_latch_key;
-extern mysql_pfs_key_t fts_cache_rw_lock_key;
-extern mysql_pfs_key_t fts_cache_init_rw_lock_key;
-extern mysql_pfs_key_t trx_i_s_cache_lock_key;
-extern mysql_pfs_key_t trx_purge_latch_key;
-extern mysql_pfs_key_t index_tree_rw_lock_key;
-extern mysql_pfs_key_t index_online_log_key;
-extern mysql_pfs_key_t dict_table_stats_key;
-extern mysql_pfs_key_t trx_sys_rw_lock_key;
-extern mysql_pfs_key_t hash_table_rw_lock_key;
-#endif /* UNIV_PFS_RWLOCK */
-
-
#ifndef UNIV_PFS_RWLOCK
/******************************************************************//**
Creates, or rather, initializes an rw-lock object in a specified memory
@@ -153,16 +122,11 @@ is necessary only if the memory block containing it is freed.
if MySQL performance schema is enabled and "UNIV_PFS_RWLOCK" is
defined, the rwlock are instrumented with performance schema probes. */
# ifdef UNIV_DEBUG
-# ifdef UNIV_SYNC_DEBUG
-# define rw_lock_create(K, L, level) \
- rw_lock_create_func((L), (level), #L, __FILE__, __LINE__)
-# else /* UNIV_SYNC_DEBUG */
-# define rw_lock_create(K, L, level) \
- rw_lock_create_func((L), #L, __FILE__, __LINE__)
-# endif/* UNIV_SYNC_DEBUG */
+# define rw_lock_create(K, L, level) \
+ rw_lock_create_func((L), (level), __FILE__, __LINE__)
# else /* UNIV_DEBUG */
# define rw_lock_create(K, L, level) \
- rw_lock_create_func((L), #L, __FILE__, __LINE__)
+ rw_lock_create_func((L), __FILE__, __LINE__)
# endif /* UNIV_DEBUG */
/**************************************************************//**
@@ -178,18 +142,46 @@ unlocking, not the corresponding function. */
# define rw_lock_s_lock_gen(M, P) \
rw_lock_s_lock_func((M), (P), __FILE__, __LINE__)
-# define rw_lock_s_lock_gen_nowait(M, P) \
- rw_lock_s_lock_low((M), (P), __FILE__, __LINE__)
-
# define rw_lock_s_lock_nowait(M, F, L) \
rw_lock_s_lock_low((M), 0, (F), (L))
-# ifdef UNIV_SYNC_DEBUG
+# ifdef UNIV_DEBUG
# define rw_lock_s_unlock_gen(L, P) rw_lock_s_unlock_func(P, L)
# else
# define rw_lock_s_unlock_gen(L, P) rw_lock_s_unlock_func(L)
-# endif
+# endif /* UNIV_DEBUG */
+
+#define rw_lock_sx_lock(L) \
+ rw_lock_sx_lock_func((L), 0, __FILE__, __LINE__)
+
+#define rw_lock_sx_lock_inline(M, P, F, L) \
+ rw_lock_sx_lock_func((M), (P), (F), (L))
+
+#define rw_lock_sx_lock_gen(M, P) \
+ rw_lock_sx_lock_func((M), (P), __FILE__, __LINE__)
+
+#define rw_lock_sx_lock_nowait(M, P) \
+ rw_lock_sx_lock_low((M), (P), __FILE__, __LINE__)
+
+#define rw_lock_sx_lock(L) \
+ rw_lock_sx_lock_func((L), 0, __FILE__, __LINE__)
+
+#define rw_lock_sx_lock_inline(M, P, F, L) \
+ rw_lock_sx_lock_func((M), (P), (F), (L))
+#define rw_lock_sx_lock_gen(M, P) \
+ rw_lock_sx_lock_func((M), (P), __FILE__, __LINE__)
+
+#define rw_lock_sx_lock_nowait(M, P) \
+ rw_lock_sx_lock_low((M), (P), __FILE__, __LINE__)
+
+# ifdef UNIV_DEBUG
+# define rw_lock_sx_unlock(L) rw_lock_sx_unlock_func(0, L)
+# define rw_lock_sx_unlock_gen(L, P) rw_lock_sx_unlock_func(P, L)
+# else /* UNIV_DEBUG */
+# define rw_lock_sx_unlock(L) rw_lock_sx_unlock_func(L)
+# define rw_lock_sx_unlock_gen(L, P) rw_lock_sx_unlock_func(L)
+# endif /* UNIV_DEBUG */
# define rw_lock_x_lock(M) \
rw_lock_x_lock_func((M), 0, __FILE__, __LINE__)
@@ -206,7 +198,7 @@ unlocking, not the corresponding function. */
# define rw_lock_x_lock_func_nowait_inline(M, F, L) \
rw_lock_x_lock_func_nowait((M), (F), (L))
-# ifdef UNIV_SYNC_DEBUG
+# ifdef UNIV_DEBUG
# define rw_lock_x_unlock_gen(L, P) rw_lock_x_unlock_func(P, L)
# else
# define rw_lock_x_unlock_gen(L, P) rw_lock_x_unlock_func(L)
@@ -218,16 +210,11 @@ unlocking, not the corresponding function. */
/* Following macros point to Performance Schema instrumented functions. */
# ifdef UNIV_DEBUG
-# ifdef UNIV_SYNC_DEBUG
-# define rw_lock_create(K, L, level) \
- pfs_rw_lock_create_func((K), (L), (level), #L, __FILE__, __LINE__)
-# else /* UNIV_SYNC_DEBUG */
# define rw_lock_create(K, L, level) \
- pfs_rw_lock_create_func((K), (L), #L, __FILE__, __LINE__)
-# endif/* UNIV_SYNC_DEBUG */
+ pfs_rw_lock_create_func((K), (L), (level), __FILE__, __LINE__)
# else /* UNIV_DEBUG */
# define rw_lock_create(K, L, level) \
- pfs_rw_lock_create_func((K), (L), #L, __FILE__, __LINE__)
+ pfs_rw_lock_create_func((K), (L), __FILE__, __LINE__)
# endif /* UNIV_DEBUG */
/******************************************************************
@@ -243,18 +230,35 @@ unlocking, not the corresponding function. */
# define rw_lock_s_lock_gen(M, P) \
pfs_rw_lock_s_lock_func((M), (P), __FILE__, __LINE__)
-# define rw_lock_s_lock_gen_nowait(M, P) \
- pfs_rw_lock_s_lock_low((M), (P), __FILE__, __LINE__)
-
# define rw_lock_s_lock_nowait(M, F, L) \
pfs_rw_lock_s_lock_low((M), 0, (F), (L))
-# ifdef UNIV_SYNC_DEBUG
+# ifdef UNIV_DEBUG
# define rw_lock_s_unlock_gen(L, P) pfs_rw_lock_s_unlock_func(P, L)
# else
# define rw_lock_s_unlock_gen(L, P) pfs_rw_lock_s_unlock_func(L)
# endif
+# define rw_lock_sx_lock(M) \
+ pfs_rw_lock_sx_lock_func((M), 0, __FILE__, __LINE__)
+
+# define rw_lock_sx_lock_inline(M, P, F, L) \
+ pfs_rw_lock_sx_lock_func((M), (P), (F), (L))
+
+# define rw_lock_sx_lock_gen(M, P) \
+ pfs_rw_lock_sx_lock_func((M), (P), __FILE__, __LINE__)
+
+#define rw_lock_sx_lock_nowait(M, P) \
+ pfs_rw_lock_sx_lock_low((M), (P), __FILE__, __LINE__)
+
+# ifdef UNIV_DEBUG
+# define rw_lock_sx_unlock(L) pfs_rw_lock_sx_unlock_func(0, L)
+# define rw_lock_sx_unlock_gen(L, P) pfs_rw_lock_sx_unlock_func(P, L)
+# else
+# define rw_lock_sx_unlock(L) pfs_rw_lock_sx_unlock_func(L)
+# define rw_lock_sx_unlock_gen(L, P) pfs_rw_lock_sx_unlock_func(L)
+# endif
+
# define rw_lock_x_lock(M) \
pfs_rw_lock_x_lock_func((M), 0, __FILE__, __LINE__)
@@ -270,7 +274,7 @@ unlocking, not the corresponding function. */
# define rw_lock_x_lock_func_nowait_inline(M, F, L) \
pfs_rw_lock_x_lock_func_nowait((M), (F), (L))
-# ifdef UNIV_SYNC_DEBUG
+# ifdef UNIV_DEBUG
# define rw_lock_x_unlock_gen(L, P) pfs_rw_lock_x_unlock_func(P, L)
# else
# define rw_lock_x_unlock_gen(L, P) pfs_rw_lock_x_unlock_func(L)
@@ -278,7 +282,7 @@ unlocking, not the corresponding function. */
# define rw_lock_free(M) pfs_rw_lock_free_func(M)
-#endif /* UNIV_PFS_RWLOCK */
+#endif /* !UNIV_PFS_RWLOCK */
#define rw_lock_s_unlock(L) rw_lock_s_unlock_gen(L, 0)
#define rw_lock_x_unlock(L) rw_lock_x_unlock_gen(L, 0)
@@ -288,43 +292,37 @@ Creates, or rather, initializes an rw-lock object in a specified memory
location (which must be appropriately aligned). The rw-lock is initialized
to the non-locked state. Explicit freeing of the rw-lock with rw_lock_free
is necessary only if the memory block containing it is freed. */
-UNIV_INTERN
void
rw_lock_create_func(
/*================*/
rw_lock_t* lock, /*!< in: pointer to memory */
#ifdef UNIV_DEBUG
-# ifdef UNIV_SYNC_DEBUG
- ulint level, /*!< in: level */
-# endif /* UNIV_SYNC_DEBUG */
+ latch_level_t level, /*!< in: level */
#endif /* UNIV_DEBUG */
- const char* cmutex_name, /*!< in: mutex name */
const char* cfile_name, /*!< in: file name where created */
- ulint cline); /*!< in: file line where created */
+ unsigned cline); /*!< in: file line where created */
/******************************************************************//**
Calling this function is obligatory only if the memory buffer containing
the rw-lock is freed. Removes an rw-lock object from the global list. The
rw-lock is checked to be in the non-locked state. */
-UNIV_INTERN
void
rw_lock_free_func(
/*==============*/
- rw_lock_t* lock); /*!< in: rw-lock */
+ rw_lock_t* lock); /*!< in/out: rw-lock */
#ifdef UNIV_DEBUG
/******************************************************************//**
Checks that the rw-lock has been initialized and that there are no
simultaneous shared and exclusive locks.
-@return TRUE */
-UNIV_INTERN
-ibool
+@return true */
+bool
rw_lock_validate(
/*=============*/
- rw_lock_t* lock); /*!< in: rw-lock */
+ const rw_lock_t* lock); /*!< in: rw-lock */
#endif /* UNIV_DEBUG */
/******************************************************************//**
Low-level function which tries to lock an rw-lock in s-mode. Performs no
spinning.
-@return TRUE if success */
+@return TRUE if success */
UNIV_INLINE
ibool
rw_lock_s_lock_low(
@@ -334,13 +332,13 @@ rw_lock_s_lock_low(
/*!< in: pass value; != 0, if the lock will be
passed to another thread to unlock */
const char* file_name, /*!< in: file name where lock requested */
- ulint line); /*!< in: line where requested */
+ unsigned line); /*!< in: line where requested */
/******************************************************************//**
NOTE! Use the corresponding macro, not directly this function, except if
you supply the file name and line number. Lock an rw-lock in shared mode
for the current thread. If the rw-lock is locked in exclusive mode, or
there is an exclusive lock request waiting, the function spins a preset
-time (controlled by SYNC_SPIN_ROUNDS), waiting for the lock, before
+time (controlled by srv_n_spin_wait_rounds), waiting for the lock, before
suspending the thread. */
UNIV_INLINE
void
@@ -350,41 +348,40 @@ rw_lock_s_lock_func(
ulint pass, /*!< in: pass value; != 0, if the lock will
be passed to another thread to unlock */
const char* file_name,/*!< in: file name where lock requested */
- ulint line); /*!< in: line where requested */
+ unsigned line); /*!< in: line where requested */
/******************************************************************//**
NOTE! Use the corresponding macro, not directly this function! Lock an
rw-lock in exclusive mode for the current thread if the lock can be
obtained immediately.
-@return TRUE if success */
+@return TRUE if success */
UNIV_INLINE
ibool
rw_lock_x_lock_func_nowait(
/*=======================*/
rw_lock_t* lock, /*!< in: pointer to rw-lock */
const char* file_name,/*!< in: file name where lock requested */
- ulint line); /*!< in: line where requested */
+ unsigned line); /*!< in: line where requested */
/******************************************************************//**
Releases a shared mode lock. */
UNIV_INLINE
void
rw_lock_s_unlock_func(
/*==================*/
-#ifdef UNIV_SYNC_DEBUG
+#ifdef UNIV_DEBUG
ulint pass, /*!< in: pass value; != 0, if the lock may have
been passed to another thread to unlock */
-#endif
+#endif /* UNIV_DEBUG */
rw_lock_t* lock); /*!< in/out: rw-lock */
/******************************************************************//**
NOTE! Use the corresponding macro, not directly this function! Lock an
rw-lock in exclusive mode for the current thread. If the rw-lock is locked
in shared or exclusive mode, or there is an exclusive lock request waiting,
-the function spins a preset time (controlled by SYNC_SPIN_ROUNDS), waiting
+the function spins a preset time (controlled by srv_n_spin_wait_rounds), waiting
for the lock, before suspending the thread. If the same thread has an x-lock
on the rw-lock, locking succeed, with the following exception: if pass != 0,
only a single x-lock may be taken on the lock. NOTE: If the same thread has
an s-lock, locking does not succeed! */
-UNIV_INTERN
void
rw_lock_x_lock_func(
/*================*/
@@ -392,18 +389,59 @@ rw_lock_x_lock_func(
ulint pass, /*!< in: pass value; != 0, if the lock will
be passed to another thread to unlock */
const char* file_name,/*!< in: file name where lock requested */
- ulint line); /*!< in: line where requested */
+ unsigned line); /*!< in: line where requested */
+/******************************************************************//**
+Low-level function for acquiring an sx lock.
+@return FALSE if did not succeed, TRUE if success. */
+ibool
+rw_lock_sx_lock_low(
+/*================*/
+ rw_lock_t* lock, /*!< in: pointer to rw-lock */
+ ulint pass, /*!< in: pass value; != 0, if the lock will
+ be passed to another thread to unlock */
+ const char* file_name,/*!< in: file name where lock requested */
+ unsigned line); /*!< in: line where requested */
+/******************************************************************//**
+NOTE! Use the corresponding macro, not directly this function! Lock an
+rw-lock in SX mode for the current thread. If the rw-lock is locked
+in exclusive mode, or there is an exclusive lock request waiting,
+the function spins a preset time (controlled by SYNC_SPIN_ROUNDS), waiting
+for the lock, before suspending the thread. If the same thread has an x-lock
+on the rw-lock, locking succeed, with the following exception: if pass != 0,
+only a single sx-lock may be taken on the lock. NOTE: If the same thread has
+an s-lock, locking does not succeed! */
+void
+rw_lock_sx_lock_func(
+/*=================*/
+ rw_lock_t* lock, /*!< in: pointer to rw-lock */
+ ulint pass, /*!< in: pass value; != 0, if the lock will
+ be passed to another thread to unlock */
+ const char* file_name,/*!< in: file name where lock requested */
+ unsigned line); /*!< in: line where requested */
/******************************************************************//**
Releases an exclusive mode lock. */
UNIV_INLINE
void
rw_lock_x_unlock_func(
/*==================*/
-#ifdef UNIV_SYNC_DEBUG
+#ifdef UNIV_DEBUG
ulint pass, /*!< in: pass value; != 0, if the lock may have
been passed to another thread to unlock */
-#endif
+#endif /* UNIV_DEBUG */
+ rw_lock_t* lock); /*!< in/out: rw-lock */
+
+/******************************************************************//**
+Releases an sx mode lock. */
+UNIV_INLINE
+void
+rw_lock_sx_unlock_func(
+/*===================*/
+#ifdef UNIV_DEBUG
+ ulint pass, /*!< in: pass value; != 0, if the lock may have
+ been passed to another thread to unlock */
+#endif /* UNIV_DEBUG */
rw_lock_t* lock); /*!< in/out: rw-lock */
+
/******************************************************************//**
This function is used in the insert buffer to move the ownership of an
x-latch on a buffer frame to the current thread. The x-latch was set by
@@ -412,7 +450,6 @@ read was done. The ownership is moved because we want that the current
thread is able to acquire a second x-latch which is stored in an mtr.
This, in turn, is needed to pass the debug checks of index page
operations. */
-UNIV_INTERN
void
rw_lock_x_lock_move_ownership(
/*==========================*/
@@ -421,32 +458,33 @@ rw_lock_x_lock_move_ownership(
/******************************************************************//**
Returns the value of writer_count for the lock. Does not reserve the lock
mutex, so the caller must be sure it is not changed during the call.
-@return value of writer_count */
+@return value of writer_count */
UNIV_INLINE
ulint
rw_lock_get_x_lock_count(
/*=====================*/
const rw_lock_t* lock); /*!< in: rw-lock */
-/********************************************************************//**
-Check if there are threads waiting for the rw-lock.
-@return 1 if waiters, 0 otherwise */
+/******************************************************************//**
+Returns the number of sx-lock for the lock. Does not reserve the lock
+mutex, so the caller must be sure it is not changed during the call.
+@return value of writer_count */
UNIV_INLINE
ulint
-rw_lock_get_waiters(
-/*================*/
+rw_lock_get_sx_lock_count(
+/*======================*/
const rw_lock_t* lock); /*!< in: rw-lock */
/******************************************************************//**
Returns the write-status of the lock - this function made more sense
with the old rw_lock implementation.
-@return RW_LOCK_NOT_LOCKED, RW_LOCK_EX, RW_LOCK_WAIT_EX */
+@return RW_LOCK_NOT_LOCKED, RW_LOCK_X, RW_LOCK_X_WAIT, RW_LOCK_SX */
UNIV_INLINE
ulint
rw_lock_get_writer(
/*===============*/
const rw_lock_t* lock); /*!< in: rw-lock */
/******************************************************************//**
-Returns the number of readers.
-@return number of readers */
+Returns the number of readers (s-locks).
+@return number of readers */
UNIV_INLINE
ulint
rw_lock_get_reader_count(
@@ -455,111 +493,64 @@ rw_lock_get_reader_count(
/******************************************************************//**
Decrements lock_word the specified amount if it is greater than 0.
This is used by both s_lock and x_lock operations.
-@return TRUE if decr occurs */
+@return true if decr occurs */
UNIV_INLINE
-ibool
+bool
rw_lock_lock_word_decr(
/*===================*/
rw_lock_t* lock, /*!< in/out: rw-lock */
- ulint amount); /*!< in: amount to decrement */
-/******************************************************************//**
-Increments lock_word the specified amount and returns new value.
-@return lock->lock_word after increment */
-UNIV_INLINE
-lint
-rw_lock_lock_word_incr(
-/*===================*/
- rw_lock_t* lock, /*!< in/out: rw-lock */
- ulint amount); /*!< in: amount to increment */
-/******************************************************************//**
-This function sets the lock->writer_thread and lock->recursive fields.
-For platforms where we are using atomic builtins instead of lock->mutex
-it sets the lock->writer_thread field using atomics to ensure memory
-ordering. Note that it is assumed that the caller of this function
-effectively owns the lock i.e.: nobody else is allowed to modify
-lock->writer_thread at this point in time.
-The protocol is that lock->writer_thread MUST be updated BEFORE the
-lock->recursive flag is set. */
-UNIV_INLINE
-void
-rw_lock_set_writer_id_and_recursion_flag(
-/*=====================================*/
- rw_lock_t* lock, /*!< in/out: lock to work on */
- ibool recursive); /*!< in: TRUE if recursion
- allowed */
-#ifdef UNIV_SYNC_DEBUG
+ ulint amount, /*!< in: amount to decrement */
+ lint threshold); /*!< in: threshold of judgement */
+#ifdef UNIV_DEBUG
/******************************************************************//**
Checks if the thread has locked the rw-lock in the specified mode, with
the pass value == 0. */
-UNIV_INTERN
ibool
rw_lock_own(
/*========*/
rw_lock_t* lock, /*!< in: rw-lock */
- ulint lock_type) /*!< in: lock type: RW_LOCK_SHARED,
- RW_LOCK_EX */
+ ulint lock_type) /*!< in: lock type: RW_LOCK_S,
+ RW_LOCK_X */
MY_ATTRIBUTE((warn_unused_result));
-#endif /* UNIV_SYNC_DEBUG */
+
/******************************************************************//**
-Checks if somebody has locked the rw-lock in the specified mode. */
-UNIV_INTERN
-ibool
+Checks if the thread has locked the rw-lock in the specified mode, with
+the pass value == 0. */
+bool
+rw_lock_own_flagged(
+/*================*/
+ const rw_lock_t* lock, /*!< in: rw-lock */
+ rw_lock_flags_t flags) /*!< in: specify lock types with
+ OR of the rw_lock_flag_t values */
+ MY_ATTRIBUTE((warn_unused_result));
+#endif /* UNIV_DEBUG */
+/******************************************************************//**
+Checks if somebody has locked the rw-lock in the specified mode.
+@return true if locked */
+bool
rw_lock_is_locked(
/*==============*/
rw_lock_t* lock, /*!< in: rw-lock */
- ulint lock_type); /*!< in: lock type: RW_LOCK_SHARED,
- RW_LOCK_EX */
-#ifdef UNIV_SYNC_DEBUG
-/***************************************************************//**
-Prints debug info of an rw-lock. */
-UNIV_INTERN
-void
-rw_lock_print(
-/*==========*/
- rw_lock_t* lock); /*!< in: rw-lock */
+ ulint lock_type); /*!< in: lock type: RW_LOCK_S,
+ RW_LOCK_X or RW_LOCK_SX */
+#ifdef UNIV_DEBUG
/***************************************************************//**
Prints debug info of currently locked rw-locks. */
-UNIV_INTERN
void
rw_lock_list_print_info(
/*====================*/
- FILE* file); /*!< in: file where to print */
-/***************************************************************//**
-Returns the number of currently locked rw-locks.
-Works only in the debug version.
-@return number of locked rw-locks */
-UNIV_INTERN
-ulint
-rw_lock_n_locked(void);
-/*==================*/
+ FILE* file); /*!< in: file where to print */
/*#####################################################################*/
-/******************************************************************//**
-Acquires the debug mutex. We cannot use the mutex defined in sync0sync,
-because the debug mutex is also acquired in sync0arr while holding the OS
-mutex protecting the sync array, and the ordinary mutex_enter might
-recursively call routines in sync0arr, leading to a deadlock on the OS
-mutex. */
-UNIV_INTERN
-void
-rw_lock_debug_mutex_enter(void);
-/*===========================*/
-/******************************************************************//**
-Releases the debug mutex. */
-UNIV_INTERN
-void
-rw_lock_debug_mutex_exit(void);
-/*==========================*/
/*********************************************************************//**
Prints info of a debug struct. */
-UNIV_INTERN
void
rw_lock_debug_print(
/*================*/
FILE* f, /*!< in: output stream */
- rw_lock_debug_t* info); /*!< in: debug struct */
-#endif /* UNIV_SYNC_DEBUG */
+ const rw_lock_debug_t* info); /*!< in: debug struct */
+#endif /* UNIV_DEBUG */
/* NOTE! The structure appears here only for the compiler to know its size.
Do not use its fields directly! */
@@ -571,73 +562,87 @@ shared locks are allowed. To prevent starving of a writer blocked by
readers, a writer may queue for x-lock by decrementing lock_word: no
new readers will be let in while the thread waits for readers to
exit. */
-struct rw_lock_t {
+
+struct rw_lock_t
+#ifdef UNIV_DEBUG
+ : public latch_t
+#endif /* UNIV_DEBUG */
+{
+ /** Holds the state of the lock. */
volatile lint lock_word;
- /*!< Holds the state of the lock. */
- volatile ulint waiters;/*!< 1: there are waiters */
- volatile ibool recursive;/*!< Default value FALSE which means the lock
- is non-recursive. The value is typically set
- to TRUE making normal rw_locks recursive. In
- case of asynchronous IO, when a non-zero
- value of 'pass' is passed then we keep the
- lock non-recursive.
- This flag also tells us about the state of
- writer_thread field. If this flag is set
- then writer_thread MUST contain the thread
- id of the current x-holder or wait-x thread.
- This flag must be reset in x_unlock
- functions before incrementing the lock_word */
+
+ /** 1: there are waiters */
+ volatile uint32_t waiters;
+
+ /** number of granted SX locks. */
+ volatile ulint sx_recursive;
+
+ /** This is TRUE if the writer field is RW_LOCK_X_WAIT; this field
+ is located far from the memory update hotspot fields which are at
+ the start of this struct, thus we can peek this field without
+ causing much memory bus traffic */
+ bool writer_is_wait_ex;
+
+ /** The value is typically set to thread id of a writer thread making
+ normal rw_locks recursive. In case of asynchronous IO, when a non-zero
+ value of 'pass' is passed then we keep the lock non-recursive.
+
+ writer_thread must be reset in x_unlock functions before incrementing
+ the lock_word. */
volatile os_thread_id_t writer_thread;
- /*!< Thread id of writer thread. Is only
- guaranteed to have sane and non-stale
- value iff recursive flag is set. */
- os_event_t event; /*!< Used by sync0arr.cc for thread queueing */
+
+ /** Used by sync0arr.cc for thread queueing */
+ os_event_t event;
+
+ /** Event for next-writer to wait on. A thread must decrement
+ lock_word before waiting. */
os_event_t wait_ex_event;
- /*!< Event for next-writer to wait on. A thread
- must decrement lock_word before waiting. */
-#ifndef INNODB_RW_LOCKS_USE_ATOMICS
- ib_mutex_t mutex; /*!< The mutex protecting rw_lock_t */
-#endif /* INNODB_RW_LOCKS_USE_ATOMICS */
+ /** File name where lock created */
+ const char* cfile_name;
+
+ /** last s-lock file/line is not guaranteed to be correct */
+ const char* last_s_file_name;
+
+ /** File name where last x-locked */
+ const char* last_x_file_name;
+
+ /** Line where created */
+ unsigned cline:13;
+
+ /** If 1 then the rw-lock is a block lock */
+ unsigned is_block_lock:1;
+
+ /** Line number where last time s-locked */
+ unsigned last_s_line:14;
+
+ /** Line number where last time x-locked */
+ unsigned last_x_line:14;
+
+ /** Count of os_waits. May not be accurate */
+ uint32_t count_os_wait;
+
+ /** All allocated rw locks are put into a list */
UT_LIST_NODE_T(rw_lock_t) list;
- /*!< All allocated rw locks are put into a
- list */
-#ifdef UNIV_SYNC_DEBUG
- UT_LIST_BASE_NODE_T(rw_lock_debug_t) debug_list;
- /*!< In the debug version: pointer to the debug
- info list of the lock */
- ulint level; /*!< Level in the global latching order. */
-#endif /* UNIV_SYNC_DEBUG */
+
#ifdef UNIV_PFS_RWLOCK
- struct PSI_rwlock *pfs_psi;/*!< The instrumentation hook */
-#endif
- ulint count_os_wait; /*!< Count of os_waits. May not be accurate */
- const char* cfile_name;/*!< File name where lock created */
- const char* lock_name; /*!< lock name */
- os_thread_id_t thread_id;/*!< thread id */
- const char* file_name;/*!< File name where the lock was obtained */
- ulint line; /*!< Line where the rw-lock was locked */
- /* last s-lock file/line is not guaranteed to be correct */
- const char* last_s_file_name;/*!< File name where last s-locked */
- const char* last_x_file_name;/*!< File name where last x-locked */
- ibool writer_is_wait_ex;
- /*!< This is TRUE if the writer field is
- RW_LOCK_WAIT_EX; this field is located far
- from the memory update hotspot fields which
- are at the start of this struct, thus we can
- peek this field without causing much memory
- bus traffic */
- unsigned cline:14; /*!< Line where created */
- unsigned last_s_line:14; /*!< Line number where last time s-locked */
- unsigned last_x_line:14; /*!< Line number where last time x-locked */
+ /** The instrumentation hook */
+ struct PSI_rwlock* pfs_psi;
+#endif /* UNIV_PFS_RWLOCK */
+
#ifdef UNIV_DEBUG
- ulint magic_n; /*!< RW_LOCK_MAGIC_N */
-/** Value of rw_lock_t::magic_n */
-#define RW_LOCK_MAGIC_N 22643
+ virtual std::string to_string() const;
+ virtual std::string locked_from() const;
+
+ /** In the debug version: pointer to the debug info list of the lock */
+ UT_LIST_BASE_NODE_T(rw_lock_debug_t) debug_list;
+
+ /** Level in the global latching order. */
+ latch_level_t level;
#endif /* UNIV_DEBUG */
-};
-#ifdef UNIV_SYNC_DEBUG
+};
+#ifdef UNIV_DEBUG
/** The structure for storing debug info of an rw-lock. All access to this
structure must be protected by rw_lock_debug_mutex_enter(). */
struct rw_lock_debug_t {
@@ -645,15 +650,15 @@ struct rw_lock_debug_t {
os_thread_id_t thread_id; /*!< The thread id of the thread which
locked the rw-lock */
ulint pass; /*!< Pass value given in the lock operation */
- ulint lock_type; /*!< Type of the lock: RW_LOCK_EX,
- RW_LOCK_SHARED, RW_LOCK_WAIT_EX */
+ ulint lock_type; /*!< Type of the lock: RW_LOCK_X,
+ RW_LOCK_S, RW_LOCK_X_WAIT */
const char* file_name;/*!< File name where the lock was obtained */
- ulint line; /*!< Line where the rw-lock was locked */
+ unsigned line; /*!< Line where the rw-lock was locked */
UT_LIST_NODE_T(rw_lock_debug_t) list;
/*!< Debug structs are linked in a two-way
list */
};
-#endif /* UNIV_SYNC_DEBUG */
+#endif /* UNIV_DEBUG */
/* For performance schema instrumentation, a new set of rwlock
wrap functions are created if "UNIV_PFS_RWLOCK" is defined.
@@ -676,6 +681,8 @@ rw_lock_s_lock()
rw_lock_s_lock_gen()
rw_lock_s_lock_nowait()
rw_lock_s_unlock_gen()
+rw_lock_sx_lock()
+rw_lock_sx_unlock_gen()
rw_lock_free()
*/
@@ -692,13 +699,10 @@ pfs_rw_lock_create_func(
performance schema */
rw_lock_t* lock, /*!< in: rw lock */
#ifdef UNIV_DEBUG
-# ifdef UNIV_SYNC_DEBUG
- ulint level, /*!< in: level */
-# endif /* UNIV_SYNC_DEBUG */
+ latch_level_t level, /*!< in: level */
#endif /* UNIV_DEBUG */
- const char* cmutex_name, /*!< in: mutex name */
const char* cfile_name, /*!< in: file name where created */
- ulint cline); /*!< in: file line where created */
+ unsigned cline); /*!< in: file line where created */
/******************************************************************//**
Performance schema instrumented wrap function for rw_lock_x_lock_func()
@@ -712,7 +716,7 @@ pfs_rw_lock_x_lock_func(
ulint pass, /*!< in: pass value; != 0, if the lock will
be passed to another thread to unlock */
const char* file_name,/*!< in: file name where lock requested */
- ulint line); /*!< in: line where requested */
+ unsigned line); /*!< in: line where requested */
/******************************************************************//**
Performance schema instrumented wrap function for
rw_lock_x_lock_func_nowait()
@@ -724,7 +728,7 @@ pfs_rw_lock_x_lock_func_nowait(
/*===========================*/
rw_lock_t* lock, /*!< in: pointer to rw-lock */
const char* file_name,/*!< in: file name where lock requested */
- ulint line); /*!< in: line where requested */
+ unsigned line); /*!< in: line where requested */
/******************************************************************//**
Performance schema instrumented wrap function for rw_lock_s_lock_func()
NOTE! Please use the corresponding macro rw_lock_s_lock(), not directly
@@ -737,7 +741,7 @@ pfs_rw_lock_s_lock_func(
ulint pass, /*!< in: pass value; != 0, if the lock will
be passed to another thread to unlock */
const char* file_name,/*!< in: file name where lock requested */
- ulint line); /*!< in: line where requested */
+ unsigned line); /*!< in: line where requested */
/******************************************************************//**
Performance schema instrumented wrap function for rw_lock_s_lock_func()
NOTE! Please use the corresponding macro rw_lock_s_lock(), not directly
@@ -752,7 +756,7 @@ pfs_rw_lock_s_lock_low(
lock will be passed to another
thread to unlock */
const char* file_name, /*!< in: file name where lock requested */
- ulint line); /*!< in: line where requested */
+ unsigned line); /*!< in: line where requested */
/******************************************************************//**
Performance schema instrumented wrap function for rw_lock_x_lock_func()
NOTE! Please use the corresponding macro rw_lock_x_lock(), not directly
@@ -765,7 +769,7 @@ pfs_rw_lock_x_lock_func(
ulint pass, /*!< in: pass value; != 0, if the lock will
be passed to another thread to unlock */
const char* file_name,/*!< in: file name where lock requested */
- ulint line); /*!< in: line where requested */
+ unsigned line); /*!< in: line where requested */
/******************************************************************//**
Performance schema instrumented wrap function for rw_lock_s_unlock_func()
NOTE! Please use the corresponding macro rw_lock_s_unlock(), not directly
@@ -774,25 +778,65 @@ UNIV_INLINE
void
pfs_rw_lock_s_unlock_func(
/*======================*/
-#ifdef UNIV_SYNC_DEBUG
+#ifdef UNIV_DEBUG
ulint pass, /*!< in: pass value; != 0, if the
lock may have been passed to another
thread to unlock */
-#endif
+#endif /* UNIV_DEBUG */
rw_lock_t* lock); /*!< in/out: rw-lock */
/******************************************************************//**
-Performance schema instrumented wrap function for rw_lock_s_unlock_func()
+Performance schema instrumented wrap function for rw_lock_x_unlock_func()
NOTE! Please use the corresponding macro rw_lock_x_unlock(), not directly
this function! */
UNIV_INLINE
void
pfs_rw_lock_x_unlock_func(
/*======================*/
-#ifdef UNIV_SYNC_DEBUG
+#ifdef UNIV_DEBUG
ulint pass, /*!< in: pass value; != 0, if the
lock may have been passed to another
thread to unlock */
-#endif
+#endif /* UNIV_DEBUG */
+ rw_lock_t* lock); /*!< in/out: rw-lock */
+/******************************************************************//**
+Performance schema instrumented wrap function for rw_lock_sx_lock_func()
+NOTE! Please use the corresponding macro rw_lock_sx_lock(), not directly
+this function! */
+UNIV_INLINE
+void
+pfs_rw_lock_sx_lock_func(
+/*====================*/
+ rw_lock_t* lock, /*!< in: pointer to rw-lock */
+ ulint pass, /*!< in: pass value; != 0, if the lock will
+ be passed to another thread to unlock */
+ const char* file_name,/*!< in: file name where lock requested */
+ unsigned line); /*!< in: line where requested */
+/******************************************************************//**
+Performance schema instrumented wrap function for rw_lock_sx_lock_nowait()
+NOTE! Please use the corresponding macro, not directly
+this function! */
+UNIV_INLINE
+ibool
+pfs_rw_lock_sx_lock_low(
+/*================*/
+ rw_lock_t* lock, /*!< in: pointer to rw-lock */
+ ulint pass, /*!< in: pass value; != 0, if the lock will
+ be passed to another thread to unlock */
+ const char* file_name,/*!< in: file name where lock requested */
+ unsigned line); /*!< in: line where requested */
+/******************************************************************//**
+Performance schema instrumented wrap function for rw_lock_sx_unlock_func()
+NOTE! Please use the corresponding macro rw_lock_sx_unlock(), not directly
+this function! */
+UNIV_INLINE
+void
+pfs_rw_lock_sx_unlock_func(
+/*======================*/
+#ifdef UNIV_DEBUG
+ ulint pass, /*!< in: pass value; != 0, if the
+ lock may have been passed to another
+ thread to unlock */
+#endif /* UNIV_DEBUG */
rw_lock_t* lock); /*!< in/out: rw-lock */
/******************************************************************//**
Performance schema instrumented wrap function for rw_lock_free_func()
@@ -805,10 +849,6 @@ pfs_rw_lock_free_func(
rw_lock_t* lock); /*!< in: rw-lock */
#endif /* UNIV_PFS_RWLOCK */
-
-#ifndef UNIV_NONINL
#include "sync0rw.ic"
-#endif
-#endif /* !UNIV_HOTBACKUP */
-#endif
+#endif /* sync0rw.h */
diff --git a/storage/innobase/include/sync0rw.ic b/storage/innobase/include/sync0rw.ic
index a2e82775c89..d0be5f0ece1 100644
--- a/storage/innobase/include/sync0rw.ic
+++ b/storage/innobase/include/sync0rw.ic
@@ -2,6 +2,7 @@
Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2008, Google Inc.
+Copyright (c) 2017, MariaDB Corporation. All Rights Reserved.
Portions of this file contain modifications contributed and copyrighted by
Google, Inc. Those modifications are gratefully acknowledged and are described
@@ -30,12 +31,13 @@ The read-write lock (for threads)
Created 9/11/1995 Heikki Tuuri
*******************************************************/
+#include "os0event.h"
+
/******************************************************************//**
Lock an rw-lock in shared mode for the current thread. If the rw-lock is
locked in exclusive mode, or there is an exclusive lock request waiting,
-the function spins a preset time (controlled by SYNC_SPIN_ROUNDS),
+the function spins a preset time (controlled by srv_n_spin_wait_rounds),
waiting for the lock before suspending the thread. */
-UNIV_INTERN
void
rw_lock_s_lock_spin(
/*================*/
@@ -43,11 +45,10 @@ rw_lock_s_lock_spin(
ulint pass, /*!< in: pass value; != 0, if the lock will
be passed to another thread to unlock */
const char* file_name,/*!< in: file name where lock requested */
- ulint line); /*!< in: line where requested */
-#ifdef UNIV_SYNC_DEBUG
+ unsigned line); /*!< in: line where requested */
+#ifdef UNIV_DEBUG
/******************************************************************//**
Inserts the debug information for an rw-lock. */
-UNIV_INTERN
void
rw_lock_add_debug_info(
/*===================*/
@@ -55,70 +56,21 @@ rw_lock_add_debug_info(
ulint pass, /*!< in: pass value */
ulint lock_type, /*!< in: lock type */
const char* file_name, /*!< in: file where requested */
- ulint line); /*!< in: line where requested */
+ unsigned line); /*!< in: line where requested */
/******************************************************************//**
Removes a debug information struct for an rw-lock. */
-UNIV_INTERN
void
rw_lock_remove_debug_info(
/*======================*/
rw_lock_t* lock, /*!< in: rw-lock */
ulint pass, /*!< in: pass value */
ulint lock_type); /*!< in: lock type */
-#endif /* UNIV_SYNC_DEBUG */
-
-/********************************************************************//**
-Check if there are threads waiting for the rw-lock.
-@return 1 if waiters, 0 otherwise */
-UNIV_INLINE
-ulint
-rw_lock_get_waiters(
-/*================*/
- const rw_lock_t* lock) /*!< in: rw-lock */
-{
- return(lock->waiters);
-}
-
-/********************************************************************//**
-Sets lock->waiters to 1. It is not an error if lock->waiters is already
-1. On platforms where ATOMIC builtins are used this function enforces a
-memory barrier. */
-UNIV_INLINE
-void
-rw_lock_set_waiter_flag(
-/*====================*/
- rw_lock_t* lock) /*!< in/out: rw-lock */
-{
-#ifdef INNODB_RW_LOCKS_USE_ATOMICS
- (void) os_compare_and_swap_ulint(&lock->waiters, 0, 1);
-#else /* INNODB_RW_LOCKS_USE_ATOMICS */
- lock->waiters = 1;
- os_wmb;
-#endif /* INNODB_RW_LOCKS_USE_ATOMICS */
-}
-
-/********************************************************************//**
-Resets lock->waiters to 0. It is not an error if lock->waiters is already
-0. On platforms where ATOMIC builtins are used this function enforces a
-memory barrier. */
-UNIV_INLINE
-void
-rw_lock_reset_waiter_flag(
-/*======================*/
- rw_lock_t* lock) /*!< in/out: rw-lock */
-{
-#ifdef INNODB_RW_LOCKS_USE_ATOMICS
- (void) os_compare_and_swap_ulint(&lock->waiters, 1, 0);
-#else /* INNODB_RW_LOCKS_USE_ATOMICS */
- lock->waiters = 0;
- os_wmb;
-#endif /* INNODB_RW_LOCKS_USE_ATOMICS */
-}
+#endif /* UNIV_DEBUG */
/******************************************************************//**
Returns the write-status of the lock - this function made more sense
with the old rw_lock implementation.
-@return RW_LOCK_NOT_LOCKED, RW_LOCK_EX, RW_LOCK_WAIT_EX */
+@return RW_LOCK_NOT_LOCKED, RW_LOCK_X, RW_LOCK_X_WAIT, RW_LOCK_SX */
UNIV_INLINE
ulint
rw_lock_get_writer(
@@ -126,21 +78,31 @@ rw_lock_get_writer(
const rw_lock_t* lock) /*!< in: rw-lock */
{
lint lock_word = lock->lock_word;
- if (lock_word > 0) {
+
+ ut_ad(lock_word <= X_LOCK_DECR);
+ if (lock_word > X_LOCK_HALF_DECR) {
/* return NOT_LOCKED in s-lock state, like the writer
member of the old lock implementation. */
return(RW_LOCK_NOT_LOCKED);
- } else if ((lock_word == 0) || (lock_word <= -X_LOCK_DECR)) {
- return(RW_LOCK_EX);
+ } else if (lock_word > 0) {
+ /* sx-locked, no x-locks */
+ return(RW_LOCK_SX);
+ } else if (lock_word == 0
+ || lock_word == -X_LOCK_HALF_DECR
+ || lock_word <= -X_LOCK_DECR) {
+ /* x-lock with sx-lock is also treated as RW_LOCK_EX */
+ return(RW_LOCK_X);
} else {
- ut_ad(lock_word > -X_LOCK_DECR);
- return(RW_LOCK_WAIT_EX);
+ /* x-waiter with sx-lock is also treated as RW_LOCK_WAIT_EX
+ e.g. -X_LOCK_HALF_DECR < lock_word < 0 : without sx
+ -X_LOCK_DECR < lock_word < -X_LOCK_HALF_DECR : with sx */
+ return(RW_LOCK_X_WAIT);
}
}
/******************************************************************//**
-Returns the number of readers.
-@return number of readers */
+Returns the number of readers (s-locks).
+@return number of readers */
UNIV_INLINE
ulint
rw_lock_get_reader_count(
@@ -148,31 +110,35 @@ rw_lock_get_reader_count(
const rw_lock_t* lock) /*!< in: rw-lock */
{
lint lock_word = lock->lock_word;
- if (lock_word > 0) {
- /* s-locked, no x-waiters */
+ ut_ad(lock_word <= X_LOCK_DECR);
+
+ if (lock_word > X_LOCK_HALF_DECR) {
+ /* s-locked, no x-waiter */
return(X_LOCK_DECR - lock_word);
- } else if (lock_word < 0 && lock_word > -X_LOCK_DECR) {
- /* s-locked, with x-waiters */
+ } else if (lock_word > 0) {
+ /* s-locked, with sx-locks only */
+ return(X_LOCK_HALF_DECR - lock_word);
+ } else if (lock_word == 0) {
+ /* x-locked */
+ return(0);
+ } else if (lock_word > -X_LOCK_HALF_DECR) {
+ /* s-locked, with x-waiter */
return((ulint)(-lock_word));
+ } else if (lock_word == -X_LOCK_HALF_DECR) {
+ /* x-locked with sx-locks */
+ return(0);
+ } else if (lock_word > -X_LOCK_DECR) {
+ /* s-locked, with x-waiter and sx-lock */
+ return((ulint)(-(lock_word + X_LOCK_HALF_DECR)));
}
+ /* no s-locks */
return(0);
}
-#ifndef INNODB_RW_LOCKS_USE_ATOMICS
-UNIV_INLINE
-ib_mutex_t*
-rw_lock_get_mutex(
-/*==============*/
- rw_lock_t* lock)
-{
- return(&(lock->mutex));
-}
-#endif
-
/******************************************************************//**
Returns the value of writer_count for the lock. Does not reserve the lock
mutex, so the caller must be sure it is not changed during the call.
-@return value of writer_count */
+@return value of writer_count */
UNIV_INLINE
ulint
rw_lock_get_x_lock_count(
@@ -180,127 +146,88 @@ rw_lock_get_x_lock_count(
const rw_lock_t* lock) /*!< in: rw-lock */
{
lint lock_copy = lock->lock_word;
- if ((lock_copy != 0) && (lock_copy > -X_LOCK_DECR)) {
+ ut_ad(lock_copy <= X_LOCK_DECR);
+
+ if (lock_copy == 0 || lock_copy == -X_LOCK_HALF_DECR) {
+ /* "1 x-lock" or "1 x-lock + sx-locks" */
+ return(1);
+ } else if (lock_copy > -X_LOCK_DECR) {
+ /* s-locks, one or more sx-locks if > 0, or x-waiter if < 0 */
return(0);
+ } else if (lock_copy > -(X_LOCK_DECR + X_LOCK_HALF_DECR)) {
+ /* no s-lock, no sx-lock, 2 or more x-locks.
+ First 2 x-locks are set with -X_LOCK_DECR,
+ all other recursive x-locks are set with -1 */
+ return(2 - (lock_copy + X_LOCK_DECR));
+ } else {
+ /* no s-lock, 1 or more sx-lock, 2 or more x-locks.
+ First 2 x-locks are set with -(X_LOCK_DECR + X_LOCK_HALF_DECR),
+ all other recursive x-locks are set with -1 */
+ return(2 - (lock_copy + X_LOCK_DECR + X_LOCK_HALF_DECR));
}
- return((lock_copy == 0) ? 1 : (2 - (lock_copy + X_LOCK_DECR)));
}
/******************************************************************//**
-Two different implementations for decrementing the lock_word of a rw_lock:
-one for systems supporting atomic operations, one for others. This does
-does not support recusive x-locks: they should be handled by the caller and
-need not be atomic since they are performed by the current lock holder.
-Returns true if the decrement was made, false if not.
-@return TRUE if decr occurs */
+Returns the number of sx-lock for the lock. Does not reserve the lock
+mutex, so the caller must be sure it is not changed during the call.
+@return value of sx-lock count */
UNIV_INLINE
-ibool
-rw_lock_lock_word_decr(
-/*===================*/
- rw_lock_t* lock, /*!< in/out: rw-lock */
- ulint amount) /*!< in: amount to decrement */
+ulint
+rw_lock_get_sx_lock_count(
+/*======================*/
+ const rw_lock_t* lock) /*!< in: rw-lock */
{
-#ifdef INNODB_RW_LOCKS_USE_ATOMICS
- lint local_lock_word;
+#ifdef UNIV_DEBUG
+ lint lock_copy = lock->lock_word;
- os_rmb;
- local_lock_word = lock->lock_word;
- while (local_lock_word > 0) {
- if (os_compare_and_swap_lint(&lock->lock_word,
- local_lock_word,
- local_lock_word - amount)) {
- return(TRUE);
- }
- local_lock_word = lock->lock_word;
+ ut_ad(lock_copy <= X_LOCK_DECR);
+
+ while (lock_copy < 0) {
+ lock_copy += X_LOCK_DECR;
}
- return(FALSE);
-#else /* INNODB_RW_LOCKS_USE_ATOMICS */
- ibool success = FALSE;
- mutex_enter(&(lock->mutex));
- if (lock->lock_word > 0) {
- lock->lock_word -= amount;
- success = TRUE;
+
+ if (lock_copy > 0 && lock_copy <= X_LOCK_HALF_DECR) {
+ return(lock->sx_recursive);
}
- mutex_exit(&(lock->mutex));
- return(success);
-#endif /* INNODB_RW_LOCKS_USE_ATOMICS */
+
+ return(0);
+#else /* UNIV_DEBUG */
+ return(lock->sx_recursive);
+#endif /* UNIV_DEBUG */
}
/******************************************************************//**
-Increments lock_word the specified amount and returns new value.
-@return lock->lock_word after increment */
+Two different implementations for decrementing the lock_word of a rw_lock:
+one for systems supporting atomic operations, one for others. This does
+does not support recusive x-locks: they should be handled by the caller and
+need not be atomic since they are performed by the current lock holder.
+Returns true if the decrement was made, false if not.
+@return true if decr occurs */
UNIV_INLINE
-lint
-rw_lock_lock_word_incr(
+bool
+rw_lock_lock_word_decr(
/*===================*/
rw_lock_t* lock, /*!< in/out: rw-lock */
- ulint amount) /*!< in: amount of increment */
+ ulint amount, /*!< in: amount to decrement */
+ lint threshold) /*!< in: threshold of judgement */
{
-#ifdef INNODB_RW_LOCKS_USE_ATOMICS
- return(os_atomic_increment_lint(&lock->lock_word, amount));
-#else /* INNODB_RW_LOCKS_USE_ATOMICS */
lint local_lock_word;
- mutex_enter(&(lock->mutex));
-
- lock->lock_word += amount;
local_lock_word = lock->lock_word;
-
- mutex_exit(&(lock->mutex));
-
- return(local_lock_word);
-#endif /* INNODB_RW_LOCKS_USE_ATOMICS */
-}
-
-/******************************************************************//**
-This function sets the lock->writer_thread and lock->recursive fields.
-For platforms where we are using atomic builtins instead of lock->mutex
-it sets the lock->writer_thread field using atomics to ensure memory
-ordering. Note that it is assumed that the caller of this function
-effectively owns the lock i.e.: nobody else is allowed to modify
-lock->writer_thread at this point in time.
-The protocol is that lock->writer_thread MUST be updated BEFORE the
-lock->recursive flag is set. */
-UNIV_INLINE
-void
-rw_lock_set_writer_id_and_recursion_flag(
-/*=====================================*/
- rw_lock_t* lock, /*!< in/out: lock to work on */
- ibool recursive) /*!< in: TRUE if recursion
- allowed */
-{
- os_thread_id_t curr_thread = os_thread_get_curr_id();
-
-#ifdef INNODB_RW_LOCKS_USE_ATOMICS
- os_thread_id_t local_thread;
- ibool success;
-
- /* Prevent Valgrind warnings about writer_thread being
- uninitialized. It does not matter if writer_thread is
- uninitialized, because we are comparing writer_thread against
- itself, and the operation should always succeed. */
- UNIV_MEM_VALID(&lock->writer_thread, sizeof lock->writer_thread);
-
- local_thread = lock->writer_thread;
- success = os_compare_and_swap_thread_id(
- &lock->writer_thread, local_thread, curr_thread);
- ut_a(success);
- lock->recursive = recursive;
-
-#else /* INNODB_RW_LOCKS_USE_ATOMICS */
-
- mutex_enter(&lock->mutex);
- lock->writer_thread = curr_thread;
- lock->recursive = recursive;
- mutex_exit(&lock->mutex);
-
-#endif /* INNODB_RW_LOCKS_USE_ATOMICS */
+ while (local_lock_word > threshold) {
+ if (my_atomic_caslint(&lock->lock_word,
+ &local_lock_word,
+ local_lock_word - amount)) {
+ return(true);
+ }
+ }
+ return(false);
}
/******************************************************************//**
Low-level function which tries to lock an rw-lock in s-mode. Performs no
spinning.
-@return TRUE if success */
+@return TRUE if success */
UNIV_INLINE
ibool
rw_lock_s_lock_low(
@@ -310,27 +237,20 @@ rw_lock_s_lock_low(
/*!< in: pass value; != 0, if the lock will be
passed to another thread to unlock */
const char* file_name, /*!< in: file name where lock requested */
- ulint line) /*!< in: line where requested */
+ unsigned line) /*!< in: line where requested */
{
- if (!rw_lock_lock_word_decr(lock, 1)) {
+ if (!rw_lock_lock_word_decr(lock, 1, 0)) {
/* Locking did not succeed */
return(FALSE);
}
-#ifdef UNIV_SYNC_DEBUG
- rw_lock_add_debug_info(lock, pass, RW_LOCK_SHARED, file_name, line);
-#endif
+ ut_d(rw_lock_add_debug_info(lock, pass, RW_LOCK_S, file_name, line));
+
/* These debugging values are not set safely: they may be incorrect
or even refer to a line that is invalid for the file name. */
lock->last_s_file_name = file_name;
lock->last_s_line = line;
- if (srv_instrument_semaphores) {
- lock->thread_id = os_thread_get_curr_id();
- lock->file_name = file_name;
- lock->line = line;
- }
-
return(TRUE); /* locking succeeded */
}
@@ -338,7 +258,7 @@ rw_lock_s_lock_low(
NOTE! Use the corresponding macro, not directly this function! Lock an
rw-lock in shared mode for the current thread. If the rw-lock is locked
in exclusive mode, or there is an exclusive lock request waiting, the
-function spins a preset time (controlled by SYNC_SPIN_ROUNDS), waiting for
+function spins a preset time (controlled by srv_n_spin_wait_rounds), waiting for
the lock, before suspending the thread. */
UNIV_INLINE
void
@@ -348,7 +268,7 @@ rw_lock_s_lock_func(
ulint pass, /*!< in: pass value; != 0, if the lock will
be passed to another thread to unlock */
const char* file_name,/*!< in: file name where lock requested */
- ulint line) /*!< in: line where requested */
+ unsigned line) /*!< in: line where requested */
{
/* NOTE: As we do not know the thread ids for threads which have
s-locked a latch, and s-lockers will be served only after waiting
@@ -361,20 +281,13 @@ rw_lock_s_lock_func(
the threads which have s-locked a latch. This would use some CPU
time. */
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(!rw_lock_own(lock, RW_LOCK_SHARED)); /* see NOTE above */
- ut_ad(!rw_lock_own(lock, RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
+ ut_ad(!rw_lock_own_flagged(lock, RW_LOCK_FLAG_X | RW_LOCK_FLAG_S));
- if (rw_lock_s_lock_low(lock, pass, file_name, line)) {
+ if (!rw_lock_s_lock_low(lock, pass, file_name, line)) {
- return; /* Success */
- } else {
/* Did not succeed, try spin wait */
rw_lock_s_lock_spin(lock, pass, file_name, line);
-
- return;
}
}
@@ -382,48 +295,33 @@ rw_lock_s_lock_func(
NOTE! Use the corresponding macro, not directly this function! Lock an
rw-lock in exclusive mode for the current thread if the lock can be
obtained immediately.
-@return TRUE if success */
+@return TRUE if success */
UNIV_INLINE
ibool
rw_lock_x_lock_func_nowait(
/*=======================*/
rw_lock_t* lock, /*!< in: pointer to rw-lock */
const char* file_name,/*!< in: file name where lock requested */
- ulint line) /*!< in: line where requested */
+ unsigned line) /*!< in: line where requested */
{
- ibool success;
- ibool local_recursive= lock->recursive;
-
-#ifdef INNODB_RW_LOCKS_USE_ATOMICS
- success = os_compare_and_swap_lint(&lock->lock_word, X_LOCK_DECR, 0);
-#else
-
- success = FALSE;
- mutex_enter(&(lock->mutex));
- if (lock->lock_word == X_LOCK_DECR) {
- lock->lock_word = 0;
- success = TRUE;
- }
- mutex_exit(&(lock->mutex));
-
-#endif
- /* Note: recursive must be loaded before writer_thread see
- comment for rw_lock_set_writer_id_and_recursion_flag().
- To achieve this we load it before os_compare_and_swap_lint(),
- which implies full memory barrier in current implementation. */
- if (success) {
- rw_lock_set_writer_id_and_recursion_flag(lock, TRUE);
-
- } else if (local_recursive
- && os_thread_eq(lock->writer_thread,
- os_thread_get_curr_id())) {
+ lint oldval = X_LOCK_DECR;
+
+ if (my_atomic_caslint(&lock->lock_word, &oldval, 0)) {
+ lock->writer_thread = os_thread_get_curr_id();
+
+ } else if (os_thread_eq(lock->writer_thread, os_thread_get_curr_id())) {
/* Relock: this lock_word modification is safe since no other
threads can modify (lock, unlock, or reserve) lock_word while
there is an exclusive writer and this is the writer thread. */
- if (lock->lock_word == 0) {
- lock->lock_word = -X_LOCK_DECR;
- } else {
+ if (lock->lock_word == 0 || lock->lock_word == -X_LOCK_HALF_DECR) {
+ /* There are 1 x-locks */
+ lock->lock_word -= X_LOCK_DECR;
+ } else if (lock->lock_word <= -X_LOCK_DECR) {
+ /* There are 2 or more x-locks */
lock->lock_word--;
+ } else {
+ /* Failure */
+ return(FALSE);
}
/* Watch for too many recursive locks */
@@ -433,15 +331,8 @@ rw_lock_x_lock_func_nowait(
/* Failure */
return(FALSE);
}
-#ifdef UNIV_SYNC_DEBUG
- rw_lock_add_debug_info(lock, 0, RW_LOCK_EX, file_name, line);
-#endif
-
- if (srv_instrument_semaphores) {
- lock->thread_id = os_thread_get_curr_id();
- lock->file_name = file_name;
- lock->line = line;
- }
+
+ ut_d(rw_lock_add_debug_info(lock, 0, RW_LOCK_X, file_name, line));
lock->last_x_file_name = file_name;
lock->last_x_line = line;
@@ -457,22 +348,21 @@ UNIV_INLINE
void
rw_lock_s_unlock_func(
/*==================*/
-#ifdef UNIV_SYNC_DEBUG
+#ifdef UNIV_DEBUG
ulint pass, /*!< in: pass value; != 0, if the lock may have
been passed to another thread to unlock */
-#endif
+#endif /* UNIV_DEBUG */
rw_lock_t* lock) /*!< in/out: rw-lock */
{
ut_ad(lock->lock_word > -X_LOCK_DECR);
ut_ad(lock->lock_word != 0);
ut_ad(lock->lock_word < X_LOCK_DECR);
-#ifdef UNIV_SYNC_DEBUG
- rw_lock_remove_debug_info(lock, pass, RW_LOCK_SHARED);
-#endif
+ ut_d(rw_lock_remove_debug_info(lock, pass, RW_LOCK_S));
/* Increment lock_word to indicate 1 less reader */
- if (rw_lock_lock_word_incr(lock, 1) == 0) {
+ lint lock_word = my_atomic_addlint(&lock->lock_word, 1) + 1;
+ if (lock_word == 0 || lock_word == -X_LOCK_HALF_DECR) {
/* wait_ex waiter exists. It may not be asleep, but we signal
anyway. We do not wake other waiters, because they can't
@@ -483,10 +373,6 @@ rw_lock_s_unlock_func(
}
ut_ad(rw_lock_validate(lock));
-
-#ifdef UNIV_SYNC_PERF_STAT
- rw_s_exit_count++;
-#endif
}
/******************************************************************//**
@@ -495,55 +381,98 @@ UNIV_INLINE
void
rw_lock_x_unlock_func(
/*==================*/
-#ifdef UNIV_SYNC_DEBUG
+#ifdef UNIV_DEBUG
ulint pass, /*!< in: pass value; != 0, if the lock may have
been passed to another thread to unlock */
-#endif
+#endif /* UNIV_DEBUG */
rw_lock_t* lock) /*!< in/out: rw-lock */
{
- ut_ad(lock->lock_word == 0 || lock->lock_word <= -X_LOCK_DECR);
-
- /* lock->recursive flag also indicates if lock->writer_thread is
- valid or stale. If we are the last of the recursive callers
- then we must unset lock->recursive flag to indicate that the
- lock->writer_thread is now stale.
- Note that since we still hold the x-lock we can safely read the
- lock_word. */
+ ut_ad(lock->lock_word == 0 || lock->lock_word == -X_LOCK_HALF_DECR
+ || lock->lock_word <= -X_LOCK_DECR);
+
if (lock->lock_word == 0) {
/* Last caller in a possible recursive chain. */
- lock->recursive = FALSE;
+ lock->writer_thread = 0;
}
-#ifdef UNIV_SYNC_DEBUG
- rw_lock_remove_debug_info(lock, pass, RW_LOCK_EX);
-#endif
+ ut_d(rw_lock_remove_debug_info(lock, pass, RW_LOCK_X));
- ulint x_lock_incr;
- if (lock->lock_word == 0) {
- x_lock_incr = X_LOCK_DECR;
- } else if (lock->lock_word == -X_LOCK_DECR) {
- x_lock_incr = X_LOCK_DECR;
- } else {
- ut_ad(lock->lock_word < -X_LOCK_DECR);
- x_lock_incr = 1;
- }
+ if (lock->lock_word == 0 || lock->lock_word == -X_LOCK_HALF_DECR) {
+ /* There is 1 x-lock */
+ /* atomic increment is needed, because it is last */
+ if (my_atomic_addlint(&lock->lock_word, X_LOCK_DECR) <= -X_LOCK_DECR) {
+ ut_error;
+ }
- if (rw_lock_lock_word_incr(lock, x_lock_incr) == X_LOCK_DECR) {
- /* Lock is now free. May have to signal read/write waiters.
+ /* This no longer has an X-lock but it may still have
+ an SX-lock. So it is now free for S-locks by other threads.
+ We need to signal read/write waiters.
We do not need to signal wait_ex waiters, since they cannot
exist when there is a writer. */
if (lock->waiters) {
- rw_lock_reset_waiter_flag(lock);
+ my_atomic_store32((int32*) &lock->waiters, 0);
os_event_set(lock->event);
sync_array_object_signalled();
}
+ } else if (lock->lock_word == -X_LOCK_DECR
+ || lock->lock_word == -(X_LOCK_DECR + X_LOCK_HALF_DECR)) {
+ /* There are 2 x-locks */
+ lock->lock_word += X_LOCK_DECR;
+ } else {
+ /* There are more than 2 x-locks. */
+ ut_ad(lock->lock_word < -X_LOCK_DECR);
+ lock->lock_word += 1;
}
ut_ad(rw_lock_validate(lock));
+}
-#ifdef UNIV_SYNC_PERF_STAT
- rw_x_exit_count++;
-#endif
+/******************************************************************//**
+Releases a sx mode lock. */
+UNIV_INLINE
+void
+rw_lock_sx_unlock_func(
+/*===================*/
+#ifdef UNIV_DEBUG
+ ulint pass, /*!< in: pass value; != 0, if the lock may have
+ been passed to another thread to unlock */
+#endif /* UNIV_DEBUG */
+ rw_lock_t* lock) /*!< in/out: rw-lock */
+{
+ ut_ad(rw_lock_get_sx_lock_count(lock));
+ ut_ad(lock->sx_recursive > 0);
+
+ --lock->sx_recursive;
+
+ ut_d(rw_lock_remove_debug_info(lock, pass, RW_LOCK_SX));
+
+ if (lock->sx_recursive == 0) {
+ /* Last caller in a possible recursive chain. */
+ if (lock->lock_word > 0) {
+ lock->writer_thread = 0;
+
+ if (my_atomic_addlint(&lock->lock_word, X_LOCK_HALF_DECR) <= 0) {
+ ut_error;
+ }
+ /* Lock is now free. May have to signal read/write
+ waiters. We do not need to signal wait_ex waiters,
+ since they cannot exist when there is an sx-lock
+ holder. */
+ if (lock->waiters) {
+ my_atomic_store32((int32*) &lock->waiters, 0);
+ os_event_set(lock->event);
+ sync_array_object_signalled();
+ }
+ } else {
+ /* still has x-lock */
+ ut_ad(lock->lock_word == -X_LOCK_HALF_DECR
+ || lock->lock_word <= -(X_LOCK_DECR
+ + X_LOCK_HALF_DECR));
+ lock->lock_word += X_LOCK_HALF_DECR;
+ }
+ }
+
+ ut_ad(rw_lock_validate(lock));
}
#ifdef UNIV_PFS_RWLOCK
@@ -558,27 +487,23 @@ pfs_rw_lock_create_func(
/*====================*/
mysql_pfs_key_t key, /*!< in: key registered with
performance schema */
- rw_lock_t* lock, /*!< in: pointer to memory */
+ rw_lock_t* lock, /*!< in/out: pointer to memory */
# ifdef UNIV_DEBUG
-# ifdef UNIV_SYNC_DEBUG
- ulint level, /*!< in: level */
-# endif /* UNIV_SYNC_DEBUG */
+ latch_level_t level, /*!< in: level */
# endif /* UNIV_DEBUG */
- const char* cmutex_name, /*!< in: mutex name */
const char* cfile_name, /*!< in: file name where created */
- ulint cline) /*!< in: file line where created */
+ unsigned cline) /*!< in: file line where created */
{
+ ut_d(new(lock) rw_lock_t());
+
/* Initialize the rwlock for performance schema */
lock->pfs_psi = PSI_RWLOCK_CALL(init_rwlock)(key, lock);
/* The actual function to initialize an rwlock */
rw_lock_create_func(lock,
-# ifdef UNIV_DEBUG
-# ifdef UNIV_SYNC_DEBUG
+#ifdef UNIV_DEBUG
level,
-# endif /* UNIV_SYNC_DEBUG */
-# endif /* UNIV_DEBUG */
- cmutex_name,
+#endif /* UNIV_DEBUG */
cfile_name,
cline);
}
@@ -594,16 +519,19 @@ pfs_rw_lock_x_lock_func(
ulint pass, /*!< in: pass value; != 0, if the lock will
be passed to another thread to unlock */
const char* file_name,/*!< in: file name where lock requested */
- ulint line) /*!< in: line where requested */
+ unsigned line) /*!< in: line where requested */
{
- if (lock->pfs_psi != NULL)
- {
+ if (lock->pfs_psi != NULL) {
PSI_rwlock_locker* locker;
PSI_rwlock_locker_state state;
- /* Record the entry of rw x lock request in performance schema */
+ /* Record the acquisition of a read-write lock in exclusive
+ mode in performance schema */
+/* MySQL 5.7 New PSI */
+#define PSI_RWLOCK_EXCLUSIVELOCK PSI_RWLOCK_WRITELOCK
+
locker = PSI_RWLOCK_CALL(start_rwlock_wrwait)(
- &state, lock->pfs_psi, PSI_RWLOCK_WRITELOCK,
+ &state, lock->pfs_psi, PSI_RWLOCK_EXCLUSIVELOCK,
file_name, static_cast<uint>(line));
rw_lock_x_lock_func(
@@ -612,9 +540,7 @@ pfs_rw_lock_x_lock_func(
if (locker != NULL) {
PSI_RWLOCK_CALL(end_rwlock_wrwait)(locker, 0);
}
- }
- else
- {
+ } else {
rw_lock_x_lock_func(lock, pass, file_name, line);
}
}
@@ -623,7 +549,7 @@ Performance schema instrumented wrap function for
rw_lock_x_lock_func_nowait()
NOTE! Please use the corresponding macro rw_lock_x_lock_func(),
not directly this function!
-@return TRUE if success */
+@return TRUE if success */
UNIV_INLINE
ibool
pfs_rw_lock_x_lock_func_nowait(
@@ -631,18 +557,20 @@ pfs_rw_lock_x_lock_func_nowait(
rw_lock_t* lock, /*!< in: pointer to rw-lock */
const char* file_name,/*!< in: file name where lock
requested */
- ulint line) /*!< in: line where requested */
+ unsigned line) /*!< in: line where requested */
{
- ibool ret;
+ ibool ret;
- if (lock->pfs_psi != NULL)
- {
+ if (lock->pfs_psi != NULL) {
PSI_rwlock_locker* locker;
- PSI_rwlock_locker_state state;
+ PSI_rwlock_locker_state state;
+
+ /* Record the acquisition of a read-write trylock in exclusive
+ mode in performance schema */
- /* Record the entry of rw x lock request in performance schema */
+#define PSI_RWLOCK_TRYEXCLUSIVELOCK PSI_RWLOCK_TRYWRITELOCK
locker = PSI_RWLOCK_CALL(start_rwlock_wrwait)(
- &state, lock->pfs_psi, PSI_RWLOCK_WRITELOCK,
+ &state, lock->pfs_psi, PSI_RWLOCK_TRYEXCLUSIVELOCK,
file_name, static_cast<uint>(line));
ret = rw_lock_x_lock_func_nowait(lock, file_name, line);
@@ -651,9 +579,7 @@ pfs_rw_lock_x_lock_func_nowait(
PSI_RWLOCK_CALL(end_rwlock_wrwait)(
locker, static_cast<int>(ret));
}
- }
- else
- {
+ } else {
ret = rw_lock_x_lock_func_nowait(lock, file_name, line);
}
@@ -669,8 +595,7 @@ pfs_rw_lock_free_func(
/*==================*/
rw_lock_t* lock) /*!< in: pointer to rw-lock */
{
- if (lock->pfs_psi != NULL)
- {
+ if (lock->pfs_psi != NULL) {
PSI_RWLOCK_CALL(destroy_rwlock)(lock->pfs_psi);
lock->pfs_psi = NULL;
}
@@ -691,16 +616,16 @@ pfs_rw_lock_s_lock_func(
thread to unlock */
const char* file_name,/*!< in: file name where lock
requested */
- ulint line) /*!< in: line where requested */
+ unsigned line) /*!< in: line where requested */
{
- if (lock->pfs_psi != NULL)
- {
+ if (lock->pfs_psi != NULL) {
PSI_rwlock_locker* locker;
PSI_rwlock_locker_state state;
+#define PSI_RWLOCK_SHAREDLOCK PSI_RWLOCK_READLOCK
/* Instrumented to inform we are aquiring a shared rwlock */
locker = PSI_RWLOCK_CALL(start_rwlock_rdwait)(
- &state, lock->pfs_psi, PSI_RWLOCK_READLOCK,
+ &state, lock->pfs_psi, PSI_RWLOCK_SHAREDLOCK,
file_name, static_cast<uint>(line));
rw_lock_s_lock_func(lock, pass, file_name, line);
@@ -708,19 +633,50 @@ pfs_rw_lock_s_lock_func(
if (locker != NULL) {
PSI_RWLOCK_CALL(end_rwlock_rdwait)(locker, 0);
}
- }
- else
- {
+ } else {
rw_lock_s_lock_func(lock, pass, file_name, line);
}
+}
+/******************************************************************//**
+Performance schema instrumented wrap function for rw_lock_sx_lock_func()
+NOTE! Please use the corresponding macro rw_lock_sx_lock(), not
+directly this function! */
+UNIV_INLINE
+void
+pfs_rw_lock_sx_lock_func(
+/*====================*/
+ rw_lock_t* lock, /*!< in: pointer to rw-lock */
+ ulint pass, /*!< in: pass value; != 0, if the
+ lock will be passed to another
+ thread to unlock */
+ const char* file_name,/*!< in: file name where lock
+ requested */
+ unsigned line) /*!< in: line where requested */
+{
+ if (lock->pfs_psi != NULL) {
+ PSI_rwlock_locker* locker;
+ PSI_rwlock_locker_state state;
+
+#define PSI_RWLOCK_SHAREDEXCLUSIVELOCK PSI_RWLOCK_WRITELOCK
+ /* Instrumented to inform we are aquiring a shared rwlock */
+ locker = PSI_RWLOCK_CALL(start_rwlock_wrwait)(
+ &state, lock->pfs_psi, PSI_RWLOCK_SHAREDEXCLUSIVELOCK,
+ file_name, static_cast<uint>(line));
+
+ rw_lock_sx_lock_func(lock, pass, file_name, line);
- return;
+ if (locker != NULL) {
+ PSI_RWLOCK_CALL(end_rwlock_wrwait)(locker, 0);
+ }
+ } else {
+ rw_lock_sx_lock_func(lock, pass, file_name, line);
+ }
}
/******************************************************************//**
Performance schema instrumented wrap function for rw_lock_s_lock_func()
NOTE! Please use the corresponding macro rw_lock_s_lock(), not
directly this function!
-@return TRUE if success */
+@return TRUE if success */
UNIV_INLINE
ibool
pfs_rw_lock_s_lock_low(
@@ -730,18 +686,18 @@ pfs_rw_lock_s_lock_low(
lock will be passed to another
thread to unlock */
const char* file_name, /*!< in: file name where lock requested */
- ulint line) /*!< in: line where requested */
+ unsigned line) /*!< in: line where requested */
{
- ibool ret;
+ ibool ret;
- if (lock->pfs_psi != NULL)
- {
+ if (lock->pfs_psi != NULL) {
PSI_rwlock_locker* locker;
PSI_rwlock_locker_state state;
+#define PSI_RWLOCK_TRYSHAREDLOCK PSI_RWLOCK_TRYREADLOCK
/* Instrumented to inform we are aquiring a shared rwlock */
locker = PSI_RWLOCK_CALL(start_rwlock_rdwait)(
- &state, lock->pfs_psi, PSI_RWLOCK_READLOCK,
+ &state, lock->pfs_psi, PSI_RWLOCK_TRYSHAREDLOCK,
file_name, static_cast<uint>(line));
ret = rw_lock_s_lock_low(lock, pass, file_name, line);
@@ -750,15 +706,54 @@ pfs_rw_lock_s_lock_low(
PSI_RWLOCK_CALL(end_rwlock_rdwait)(
locker, static_cast<int>(ret));
}
- }
- else
- {
+ } else {
ret = rw_lock_s_lock_low(lock, pass, file_name, line);
}
return(ret);
}
+/******************************************************************//**
+Performance schema instrumented wrap function for rw_lock_sx_lock_nowait()
+NOTE! Please use the corresponding macro, not
+directly this function!
+@return TRUE if success */
+UNIV_INLINE
+ibool
+pfs_rw_lock_sx_lock_low(
+/*====================*/
+ rw_lock_t* lock, /*!< in: pointer to rw-lock */
+ ulint pass, /*!< in: pass value; != 0, if the
+ lock will be passed to another
+ thread to unlock */
+ const char* file_name, /*!< in: file name where lock requested */
+ unsigned line) /*!< in: line where requested */
+{
+ ibool ret;
+
+ if (lock->pfs_psi != NULL) {
+ PSI_rwlock_locker* locker;
+ PSI_rwlock_locker_state state;
+
+#define PSI_RWLOCK_TRYSHAREDEXCLUSIVELOCK PSI_RWLOCK_TRYWRITELOCK
+ /* Instrumented to inform we are aquiring a shared
+ exclusive rwlock */
+ locker = PSI_RWLOCK_CALL(start_rwlock_rdwait)(
+ &state, lock->pfs_psi,
+ PSI_RWLOCK_TRYSHAREDEXCLUSIVELOCK,
+ file_name, static_cast<uint>(line));
+
+ ret = rw_lock_sx_lock_low(lock, pass, file_name, line);
+
+ if (locker != NULL) {
+ PSI_RWLOCK_CALL(end_rwlock_rdwait)(
+ locker, static_cast<int>(ret));
+ }
+ } else {
+ ret = rw_lock_sx_lock_low(lock, pass, file_name, line);
+ }
+ return(ret);
+}
/******************************************************************//**
Performance schema instrumented wrap function for rw_lock_x_unlock_func()
NOTE! Please use the corresponding macro rw_lock_x_unlock(), not directly
@@ -767,21 +762,49 @@ UNIV_INLINE
void
pfs_rw_lock_x_unlock_func(
/*======================*/
-#ifdef UNIV_SYNC_DEBUG
+#ifdef UNIV_DEBUG
ulint pass, /*!< in: pass value; != 0, if the
lock may have been passed to another
thread to unlock */
-#endif
+#endif /* UNIV_DEBUG */
rw_lock_t* lock) /*!< in/out: rw-lock */
{
/* Inform performance schema we are unlocking the lock */
- if (lock->pfs_psi != NULL)
+ if (lock->pfs_psi != NULL) {
PSI_RWLOCK_CALL(unlock_rwlock)(lock->pfs_psi);
+ }
rw_lock_x_unlock_func(
-#ifdef UNIV_SYNC_DEBUG
+#ifdef UNIV_DEBUG
pass,
-#endif
+#endif /* UNIV_DEBUG */
+ lock);
+}
+
+/******************************************************************//**
+Performance schema instrumented wrap function for rw_lock_sx_unlock_func()
+NOTE! Please use the corresponding macro rw_lock_sx_unlock(), not directly
+this function! */
+UNIV_INLINE
+void
+pfs_rw_lock_sx_unlock_func(
+/*======================*/
+#ifdef UNIV_DEBUG
+ ulint pass, /*!< in: pass value; != 0, if the
+ lock may have been passed to another
+ thread to unlock */
+#endif /* UNIV_DEBUG */
+ rw_lock_t* lock) /*!< in/out: rw-lock */
+{
+ /* Inform performance schema we are unlocking the lock */
+ if (lock->pfs_psi != NULL) {
+ PSI_RWLOCK_CALL(unlock_rwlock)(lock->pfs_psi);
+ }
+
+ rw_lock_sx_unlock_func(
+#ifdef UNIV_DEBUG
+ pass,
+#endif /* UNIV_DEBUG */
lock);
}
@@ -793,21 +816,22 @@ UNIV_INLINE
void
pfs_rw_lock_s_unlock_func(
/*======================*/
-#ifdef UNIV_SYNC_DEBUG
+#ifdef UNIV_DEBUG
ulint pass, /*!< in: pass value; != 0, if the
lock may have been passed to another
thread to unlock */
-#endif
+#endif /* UNIV_DEBUG */
rw_lock_t* lock) /*!< in/out: rw-lock */
{
/* Inform performance schema we are unlocking the lock */
- if (lock->pfs_psi != NULL)
+ if (lock->pfs_psi != NULL) {
PSI_RWLOCK_CALL(unlock_rwlock)(lock->pfs_psi);
+ }
rw_lock_s_unlock_func(
-#ifdef UNIV_SYNC_DEBUG
+#ifdef UNIV_DEBUG
pass,
-#endif
+#endif /* UNIV_DEBUG */
lock);
}
diff --git a/storage/innobase/include/sync0sync.h b/storage/innobase/include/sync0sync.h
index ba95b889c05..0d8bd0a4509 100644
--- a/storage/innobase/include/sync0sync.h
+++ b/storage/innobase/include/sync0sync.h
@@ -35,19 +35,6 @@ Created 9/5/1995 Heikki Tuuri
#define sync0sync_h
#include "univ.i"
-#include "sync0types.h"
-#include "ut0lst.h"
-#include "ut0mem.h"
-#include "os0thread.h"
-#include "os0sync.h"
-#include "sync0arr.h"
-
-/** Enable semaphore request instrumentation */
-extern my_bool srv_instrument_semaphores;
-
-#if defined(UNIV_DEBUG) && !defined(UNIV_HOTBACKUP)
-extern "C" my_bool timed_mutexes;
-#endif /* UNIV_DEBUG && !UNIV_HOTBACKUP */
#if defined UNIV_PFS_MUTEX || defined UNIV_PFS_RWLOCK
@@ -82,759 +69,73 @@ extern mysql_pfs_key_t ibuf_bitmap_mutex_key;
extern mysql_pfs_key_t ibuf_mutex_key;
extern mysql_pfs_key_t ibuf_pessimistic_insert_mutex_key;
extern mysql_pfs_key_t log_sys_mutex_key;
+extern mysql_pfs_key_t log_sys_write_mutex_key;
+extern mysql_pfs_key_t log_cmdq_mutex_key;
extern mysql_pfs_key_t log_flush_order_mutex_key;
-# ifndef HAVE_ATOMIC_BUILTINS
-extern mysql_pfs_key_t server_mutex_key;
-# endif /* !HAVE_ATOMIC_BUILTINS */
-# ifdef UNIV_MEM_DEBUG
-extern mysql_pfs_key_t mem_hash_mutex_key;
-# endif /* UNIV_MEM_DEBUG */
-extern mysql_pfs_key_t mem_pool_mutex_key;
extern mysql_pfs_key_t mutex_list_mutex_key;
-extern mysql_pfs_key_t purge_sys_bh_mutex_key;
+extern mysql_pfs_key_t recalc_pool_mutex_key;
+extern mysql_pfs_key_t page_cleaner_mutex_key;
+extern mysql_pfs_key_t purge_sys_pq_mutex_key;
extern mysql_pfs_key_t recv_sys_mutex_key;
extern mysql_pfs_key_t recv_writer_mutex_key;
-extern mysql_pfs_key_t rseg_mutex_key;
-# ifdef UNIV_SYNC_DEBUG
+extern mysql_pfs_key_t rtr_active_mutex_key;
+extern mysql_pfs_key_t rtr_match_mutex_key;
+extern mysql_pfs_key_t rtr_path_mutex_key;
+extern mysql_pfs_key_t rtr_ssn_mutex_key;
+extern mysql_pfs_key_t redo_rseg_mutex_key;
+extern mysql_pfs_key_t noredo_rseg_mutex_key;
+extern mysql_pfs_key_t page_zip_stat_per_index_mutex_key;
+# ifdef UNIV_DEBUG
extern mysql_pfs_key_t rw_lock_debug_mutex_key;
-# endif /* UNIV_SYNC_DEBUG */
+# endif /* UNIV_DEBUG */
extern mysql_pfs_key_t rw_lock_list_mutex_key;
extern mysql_pfs_key_t rw_lock_mutex_key;
-extern mysql_pfs_key_t srv_dict_tmpfile_mutex_key;
extern mysql_pfs_key_t srv_innodb_monitor_mutex_key;
extern mysql_pfs_key_t srv_misc_tmpfile_mutex_key;
-extern mysql_pfs_key_t srv_threads_mutex_key;
extern mysql_pfs_key_t srv_monitor_file_mutex_key;
-# ifdef UNIV_SYNC_DEBUG
-extern mysql_pfs_key_t sync_thread_mutex_key;
-# endif /* UNIV_SYNC_DEBUG */
extern mysql_pfs_key_t buf_dblwr_mutex_key;
extern mysql_pfs_key_t trx_undo_mutex_key;
extern mysql_pfs_key_t trx_mutex_key;
-extern mysql_pfs_key_t lock_sys_mutex_key;
-extern mysql_pfs_key_t lock_sys_wait_mutex_key;
+extern mysql_pfs_key_t trx_pool_mutex_key;
+extern mysql_pfs_key_t trx_pool_manager_mutex_key;
+extern mysql_pfs_key_t lock_mutex_key;
+extern mysql_pfs_key_t lock_wait_mutex_key;
extern mysql_pfs_key_t trx_sys_mutex_key;
extern mysql_pfs_key_t srv_sys_mutex_key;
-extern mysql_pfs_key_t srv_sys_tasks_mutex_key;
-#ifndef HAVE_ATOMIC_BUILTINS
-extern mysql_pfs_key_t srv_conc_mutex_key;
-#endif /* !HAVE_ATOMIC_BUILTINS */
-#ifndef HAVE_ATOMIC_BUILTINS_64
-extern mysql_pfs_key_t monitor_mutex_key;
-#endif /* !HAVE_ATOMIC_BUILTINS_64 */
-extern mysql_pfs_key_t event_os_mutex_key;
-extern mysql_pfs_key_t ut_list_mutex_key;
-extern mysql_pfs_key_t os_mutex_key;
+extern mysql_pfs_key_t srv_threads_mutex_key;
+extern mysql_pfs_key_t event_mutex_key;
+extern mysql_pfs_key_t event_manager_mutex_key;
+extern mysql_pfs_key_t sync_array_mutex_key;
+extern mysql_pfs_key_t thread_mutex_key;
extern mysql_pfs_key_t zip_pad_mutex_key;
+extern mysql_pfs_key_t row_drop_list_mutex_key;
#endif /* UNIV_PFS_MUTEX */
-/******************************************************************//**
-Initializes the synchronization data structures. */
-UNIV_INTERN
-void
-sync_init(void);
-/*===========*/
-/******************************************************************//**
-Frees the resources in synchronization data structures. */
-UNIV_INTERN
-void
-sync_close(void);
-/*===========*/
-
-#undef mutex_free /* Fix for MacOS X */
-
-#ifdef UNIV_PFS_MUTEX
-/**********************************************************************
-Following mutex APIs would be performance schema instrumented
-if "UNIV_PFS_MUTEX" is defined:
-
-mutex_create
-mutex_enter
-mutex_exit
-mutex_enter_nowait
-mutex_free
-
-These mutex APIs will point to corresponding wrapper functions that contain
-the performance schema instrumentation if "UNIV_PFS_MUTEX" is defined.
-The instrumented wrapper functions have the prefix of "innodb_".
-
-NOTE! The following macro should be used in mutex operation, not the
-corresponding function. */
-
-/******************************************************************//**
-Creates, or rather, initializes a mutex object to a specified memory
-location (which must be appropriately aligned). The mutex is initialized
-in the reset state. Explicit freeing of the mutex with mutex_free is
-necessary only if the memory block containing it is freed. */
+#ifdef UNIV_PFS_RWLOCK
+/* Following are rwlock keys used to register with MySQL
+performance schema */
+extern mysql_pfs_key_t btr_search_latch_key;
+extern mysql_pfs_key_t buf_block_lock_key;
# ifdef UNIV_DEBUG
-# ifdef UNIV_SYNC_DEBUG
-# define mutex_create(K, M, level) \
- pfs_mutex_create_func((K), (M), #M, (level), __FILE__, __LINE__)
-# else
-# define mutex_create(K, M, level) \
- pfs_mutex_create_func((K), (M), #M, __FILE__, __LINE__)
-# endif/* UNIV_SYNC_DEBUG */
-# else
-# define mutex_create(K, M, level) \
- pfs_mutex_create_func((K), (M), #M, __FILE__, __LINE__)
-# endif /* UNIV_DEBUG */
-
-# define mutex_enter(M) \
- pfs_mutex_enter_func((M), __FILE__, __LINE__)
-
-# define mutex_enter_nowait(M) \
- pfs_mutex_enter_nowait_func((M), __FILE__, __LINE__)
-
-# define mutex_exit(M) pfs_mutex_exit_func(M)
-
-# define mutex_free(M) pfs_mutex_free_func(M)
-
-#else /* UNIV_PFS_MUTEX */
-
-/* If "UNIV_PFS_MUTEX" is not defined, the mutex APIs point to
-original non-instrumented functions */
-# ifdef UNIV_DEBUG
-# ifdef UNIV_SYNC_DEBUG
-# define mutex_create(K, M, level) \
- mutex_create_func((M), #M, (level), __FILE__, __LINE__)
-# else /* UNIV_SYNC_DEBUG */
-# define mutex_create(K, M, level) \
- mutex_create_func((M), #M, __FILE__, __LINE__)
-# endif /* UNIV_SYNC_DEBUG */
-# else /* UNIV_DEBUG */
-# define mutex_create(K, M, level) \
- mutex_create_func((M), #M, __FILE__, __LINE__)
-# endif /* UNIV_DEBUG */
-
-# define mutex_enter(M) mutex_enter_func((M), __FILE__, __LINE__)
-
-# define mutex_enter_nowait(M) \
- mutex_enter_nowait_func((M), __FILE__, __LINE__)
-
-# define mutex_exit(M) mutex_exit_func(M)
-
-# define mutex_free(M) mutex_free_func(M)
-
-#endif /* UNIV_PFS_MUTEX */
-
-/******************************************************************//**
-Creates, or rather, initializes a mutex object in a specified memory
-location (which must be appropriately aligned). The mutex is initialized
-in the reset state. Explicit freeing of the mutex with mutex_free is
-necessary only if the memory block containing it is freed. */
-UNIV_INTERN
-void
-mutex_create_func(
-/*==============*/
- ib_mutex_t* mutex, /*!< in: pointer to memory */
- const char* cmutex_name, /*!< in: mutex name */
-#ifdef UNIV_DEBUG
-# ifdef UNIV_SYNC_DEBUG
- ulint level, /*!< in: level */
-# endif /* UNIV_SYNC_DEBUG */
-#endif /* UNIV_DEBUG */
- const char* cfile_name, /*!< in: file name where created */
- ulint cline); /*!< in: file line where created */
-
-/******************************************************************//**
-NOTE! Use the corresponding macro mutex_free(), not directly this function!
-Calling this function is obligatory only if the memory buffer containing
-the mutex is freed. Removes a mutex object from the mutex list. The mutex
-is checked to be in the reset state. */
-UNIV_INTERN
-void
-mutex_free_func(
-/*============*/
- ib_mutex_t* mutex); /*!< in: mutex */
-/**************************************************************//**
-NOTE! The following macro should be used in mutex locking, not the
-corresponding function. */
-
-/* NOTE! currently same as mutex_enter! */
-
-#define mutex_enter_fast(M) mutex_enter_func((M), __FILE__, __LINE__)
-/******************************************************************//**
-NOTE! Use the corresponding macro in the header file, not this function
-directly. Locks a mutex for the current thread. If the mutex is reserved
-the function spins a preset time (controlled by SYNC_SPIN_ROUNDS) waiting
-for the mutex before suspending the thread. */
-UNIV_INLINE
-void
-mutex_enter_func(
-/*=============*/
- ib_mutex_t* mutex, /*!< in: pointer to mutex */
- const char* file_name, /*!< in: file name where locked */
- ulint line); /*!< in: line where locked */
-/********************************************************************//**
-NOTE! Use the corresponding macro in the header file, not this function
-directly. Tries to lock the mutex for the current thread. If the lock is not
-acquired immediately, returns with return value 1.
-@return 0 if succeed, 1 if not */
-UNIV_INTERN
-ulint
-mutex_enter_nowait_func(
-/*====================*/
- ib_mutex_t* mutex, /*!< in: pointer to mutex */
- const char* file_name, /*!< in: file name where mutex
- requested */
- ulint line); /*!< in: line where requested */
-/******************************************************************//**
-NOTE! Use the corresponding macro mutex_exit(), not directly this function!
-Unlocks a mutex owned by the current thread. */
-UNIV_INLINE
-void
-mutex_exit_func(
-/*============*/
- ib_mutex_t* mutex); /*!< in: pointer to mutex */
-
-
-#ifdef UNIV_PFS_MUTEX
-/******************************************************************//**
-NOTE! Please use the corresponding macro mutex_create(), not directly
-this function!
-A wrapper function for mutex_create_func(), registers the mutex
-with peformance schema if "UNIV_PFS_MUTEX" is defined when
-creating the mutex */
-UNIV_INLINE
-void
-pfs_mutex_create_func(
-/*==================*/
- PSI_mutex_key key, /*!< in: Performance Schema key */
- ib_mutex_t* mutex, /*!< in: pointer to memory */
- const char* cmutex_name, /*!< in: mutex name */
-# ifdef UNIV_DEBUG
-# ifdef UNIV_SYNC_DEBUG
- ulint level, /*!< in: level */
-# endif /* UNIV_SYNC_DEBUG */
+extern mysql_pfs_key_t buf_block_debug_latch_key;
# endif /* UNIV_DEBUG */
- const char* cfile_name, /*!< in: file name where created */
- ulint cline); /*!< in: file line where created */
-/******************************************************************//**
-NOTE! Please use the corresponding macro mutex_enter(), not directly
-this function!
-This is a performance schema instrumented wrapper function for
-mutex_enter_func(). */
-UNIV_INLINE
-void
-pfs_mutex_enter_func(
-/*=================*/
- ib_mutex_t* mutex, /*!< in: pointer to mutex */
- const char* file_name, /*!< in: file name where locked */
- ulint line); /*!< in: line where locked */
-/********************************************************************//**
-NOTE! Please use the corresponding macro mutex_enter_nowait(), not directly
-this function!
-This is a performance schema instrumented wrapper function for
-mutex_enter_nowait_func.
-@return 0 if succeed, 1 if not */
-UNIV_INLINE
-ulint
-pfs_mutex_enter_nowait_func(
-/*========================*/
- ib_mutex_t* mutex, /*!< in: pointer to mutex */
- const char* file_name, /*!< in: file name where mutex
- requested */
- ulint line); /*!< in: line where requested */
-/******************************************************************//**
-NOTE! Please use the corresponding macro mutex_exit(), not directly
-this function!
-A wrap function of mutex_exit_func() with peformance schema instrumentation.
-Unlocks a mutex owned by the current thread. */
-UNIV_INLINE
-void
-pfs_mutex_exit_func(
-/*================*/
- ib_mutex_t* mutex); /*!< in: pointer to mutex */
-
-/******************************************************************//**
-NOTE! Please use the corresponding macro mutex_free(), not directly
-this function!
-Wrapper function for mutex_free_func(). Also destroys the performance
-schema probes when freeing the mutex */
-UNIV_INLINE
-void
-pfs_mutex_free_func(
-/*================*/
- ib_mutex_t* mutex); /*!< in: mutex */
-
-#endif /* UNIV_PFS_MUTEX */
-
-#ifdef UNIV_SYNC_DEBUG
-/******************************************************************//**
-Returns TRUE if no mutex or rw-lock is currently locked.
-Works only in the debug version.
-@return TRUE if no mutexes and rw-locks reserved */
-UNIV_INTERN
-ibool
-sync_all_freed(void);
-/*================*/
-#endif /* UNIV_SYNC_DEBUG */
-/*#####################################################################
-FUNCTION PROTOTYPES FOR DEBUGGING */
-/*******************************************************************//**
-Prints wait info of the sync system. */
-UNIV_INTERN
-void
-sync_print_wait_info(
-/*=================*/
- FILE* file); /*!< in: file where to print */
-/*******************************************************************//**
-Prints info of the sync system. */
-UNIV_INTERN
-void
-sync_print(
-/*=======*/
- FILE* file); /*!< in: file where to print */
-#ifdef UNIV_DEBUG
-/******************************************************************//**
-Checks that the mutex has been initialized.
-@return TRUE */
-UNIV_INTERN
-ibool
-mutex_validate(
-/*===========*/
- const ib_mutex_t* mutex); /*!< in: mutex */
-/******************************************************************//**
-Checks that the current thread owns the mutex. Works only
-in the debug version.
-@return TRUE if owns */
-UNIV_INTERN
-ibool
-mutex_own(
-/*======*/
- const ib_mutex_t* mutex) /*!< in: mutex */
- MY_ATTRIBUTE((warn_unused_result));
-#endif /* UNIV_DEBUG */
-#ifdef UNIV_SYNC_DEBUG
-/******************************************************************//**
-Adds a latch and its level in the thread level array. Allocates the memory
-for the array if called first time for this OS thread. Makes the checks
-against other latch levels stored in the array for this thread. */
-UNIV_INTERN
-void
-sync_thread_add_level(
-/*==================*/
- void* latch, /*!< in: pointer to a mutex or an rw-lock */
- ulint level, /*!< in: level in the latching order; if
- SYNC_LEVEL_VARYING, nothing is done */
- ibool relock) /*!< in: TRUE if re-entering an x-lock */
- MY_ATTRIBUTE((nonnull));
-/******************************************************************//**
-Removes a latch from the thread level array if it is found there.
-@return TRUE if found in the array; it is no error if the latch is
-not found, as we presently are not able to determine the level for
-every latch reservation the program does */
-UNIV_INTERN
-ibool
-sync_thread_reset_level(
-/*====================*/
- void* latch); /*!< in: pointer to a mutex or an rw-lock */
-/******************************************************************//**
-Checks if the level array for the current thread contains a
-mutex or rw-latch at the specified level.
-@return a matching latch, or NULL if not found */
-UNIV_INTERN
-void*
-sync_thread_levels_contains(
-/*========================*/
- ulint level); /*!< in: latching order level
- (SYNC_DICT, ...)*/
-/******************************************************************//**
-Checks that the level array for the current thread is empty.
-@return a latch, or NULL if empty except the exceptions specified below */
-UNIV_INTERN
-void*
-sync_thread_levels_nonempty_gen(
-/*============================*/
- ibool dict_mutex_allowed) /*!< in: TRUE if dictionary mutex is
- allowed to be owned by the thread */
- MY_ATTRIBUTE((warn_unused_result));
-/******************************************************************//**
-Checks if the level array for the current thread is empty,
-except for data dictionary latches. */
-#define sync_thread_levels_empty_except_dict() \
- (!sync_thread_levels_nonempty_gen(TRUE))
-/******************************************************************//**
-Checks if the level array for the current thread is empty,
-except for the btr_search_latch.
-@return a latch, or NULL if empty except the exceptions specified below */
-UNIV_INTERN
-void*
-sync_thread_levels_nonempty_trx(
-/*============================*/
- ibool has_search_latch)
- /*!< in: TRUE if and only if the thread
- is supposed to hold btr_search_latch */
- MY_ATTRIBUTE((warn_unused_result));
-
-/******************************************************************//**
-Gets the debug information for a reserved mutex. */
-UNIV_INTERN
-void
-mutex_get_debug_info(
-/*=================*/
- ib_mutex_t* mutex, /*!< in: mutex */
- const char** file_name, /*!< out: file where requested */
- ulint* line, /*!< out: line where requested */
- os_thread_id_t* thread_id); /*!< out: id of the thread which owns
- the mutex */
-/******************************************************************//**
-Counts currently reserved mutexes. Works only in the debug version.
-@return number of reserved mutexes */
-UNIV_INTERN
-ulint
-mutex_n_reserved(void);
-/*==================*/
-#endif /* UNIV_SYNC_DEBUG */
-/******************************************************************//**
-NOT to be used outside this module except in debugging! Gets the value
-of the lock word. */
-UNIV_INLINE
-lock_word_t
-mutex_get_lock_word(
-/*================*/
- const ib_mutex_t* mutex); /*!< in: mutex */
-#ifdef UNIV_SYNC_DEBUG
-/******************************************************************//**
-NOT to be used outside this module except in debugging! Gets the waiters
-field in a mutex.
-@return value to set */
-UNIV_INLINE
-ulint
-mutex_get_waiters(
-/*==============*/
- const ib_mutex_t* mutex); /*!< in: mutex */
-#endif /* UNIV_SYNC_DEBUG */
-
-/*
- LATCHING ORDER WITHIN THE DATABASE
- ==================================
-
-The mutex or latch in the central memory object, for instance, a rollback
-segment object, must be acquired before acquiring the latch or latches to
-the corresponding file data structure. In the latching order below, these
-file page object latches are placed immediately below the corresponding
-central memory object latch or mutex.
-
-Synchronization object Notes
----------------------- -----
-
-Dictionary mutex If we have a pointer to a dictionary
-| object, e.g., a table, it can be
-| accessed without reserving the
-| dictionary mutex. We must have a
-| reservation, a memoryfix, to the
-| appropriate table object in this case,
-| and the table must be explicitly
-| released later.
-V
-Dictionary header
-|
-V
-Secondary index tree latch The tree latch protects also all
-| the B-tree non-leaf pages. These
-V can be read with the page only
-Secondary index non-leaf bufferfixed to save CPU time,
-| no s-latch is needed on the page.
-| Modification of a page requires an
-| x-latch on the page, however. If a
-| thread owns an x-latch to the tree,
-| it is allowed to latch non-leaf pages
-| even after it has acquired the fsp
-| latch.
-V
-Secondary index leaf The latch on the secondary index leaf
-| can be kept while accessing the
-| clustered index, to save CPU time.
-V
-Clustered index tree latch To increase concurrency, the tree
-| latch is usually released when the
-| leaf page latch has been acquired.
-V
-Clustered index non-leaf
-|
-V
-Clustered index leaf
-|
-V
-Transaction system header
-|
-V
-Transaction undo mutex The undo log entry must be written
-| before any index page is modified.
-| Transaction undo mutex is for the undo
-| logs the analogue of the tree latch
-| for a B-tree. If a thread has the
-| trx undo mutex reserved, it is allowed
-| to latch the undo log pages in any
-| order, and also after it has acquired
-| the fsp latch.
-V
-Rollback segment mutex The rollback segment mutex must be
-| reserved, if, e.g., a new page must
-| be added to an undo log. The rollback
-| segment and the undo logs in its
-| history list can be seen as an
-| analogue of a B-tree, and the latches
-| reserved similarly, using a version of
-| lock-coupling. If an undo log must be
-| extended by a page when inserting an
-| undo log record, this corresponds to
-| a pessimistic insert in a B-tree.
-V
-Rollback segment header
-|
-V
-Purge system latch
-|
-V
-Undo log pages If a thread owns the trx undo mutex,
-| or for a log in the history list, the
-| rseg mutex, it is allowed to latch
-| undo log pages in any order, and even
-| after it has acquired the fsp latch.
-| If a thread does not have the
-| appropriate mutex, it is allowed to
-| latch only a single undo log page in
-| a mini-transaction.
-V
-File space management latch If a mini-transaction must allocate
-| several file pages, it can do that,
-| because it keeps the x-latch to the
-| file space management in its memo.
-V
-File system pages
-|
-V
-lock_sys_wait_mutex Mutex protecting lock timeout data
-|
-V
-lock_sys_mutex Mutex protecting lock_sys_t
-|
-V
-trx_sys->mutex Mutex protecting trx_sys_t
-|
-V
-Threads mutex Background thread scheduling mutex
-|
-V
-query_thr_mutex Mutex protecting query threads
-|
-V
-trx_mutex Mutex protecting trx_t fields
-|
-V
-Search system mutex
-|
-V
-Buffer pool mutex
-|
-V
-Log mutex
-|
-Any other latch
-|
-V
-Memory pool mutex */
-
-/* Latching order levels. If you modify these, you have to also update
-sync_thread_add_level(). */
-
-/* User transaction locks are higher than any of the latch levels below:
-no latches are allowed when a thread goes to wait for a normal table
-or row lock! */
-#define SYNC_USER_TRX_LOCK 9999
-#define SYNC_NO_ORDER_CHECK 3000 /* this can be used to suppress
- latching order checking */
-#define SYNC_LEVEL_VARYING 2000 /* Level is varying. Only used with
- buffer pool page locks, which do not
- have a fixed level, but instead have
- their level set after the page is
- locked; see e.g.
- ibuf_bitmap_get_map_page(). */
-#define SYNC_TRX_I_S_RWLOCK 1910 /* Used for
- trx_i_s_cache_t::rw_lock */
-#define SYNC_TRX_I_S_LAST_READ 1900 /* Used for
- trx_i_s_cache_t::last_read_mutex */
-#define SYNC_FILE_FORMAT_TAG 1200 /* Used to serialize access to the
- file format tag */
-#define SYNC_DICT_OPERATION 1010 /* table create, drop, etc. reserve
- this in X-mode; implicit or backround
- operations purge, rollback, foreign
- key checks reserve this in S-mode */
-#define SYNC_FTS_CACHE 1005 /* FTS cache rwlock */
-#define SYNC_DICT 1000
-#define SYNC_DICT_AUTOINC_MUTEX 999
-#define SYNC_STATS_AUTO_RECALC 997
-#define SYNC_DICT_HEADER 995
-#define SYNC_IBUF_HEADER 914
-#define SYNC_IBUF_PESS_INSERT_MUTEX 912
-/*-------------------------------*/
-#define SYNC_INDEX_TREE 900
-#define SYNC_TREE_NODE_NEW 892
-#define SYNC_TREE_NODE_FROM_HASH 891
-#define SYNC_TREE_NODE 890
-#define SYNC_PURGE_LATCH 800
-#define SYNC_TRX_UNDO 700
-#define SYNC_RSEG 600
-#define SYNC_RSEG_HEADER_NEW 591
-#define SYNC_RSEG_HEADER 590
-#define SYNC_TRX_UNDO_PAGE 570
-#define SYNC_EXTERN_STORAGE 500
-#define SYNC_FSP 400
-#define SYNC_FSP_PAGE 395
-#define SYNC_STATS_DEFRAG 390
-/*------------------------------------- Change buffer headers */
-#define SYNC_IBUF_MUTEX 370 /* ibuf_mutex */
-/*------------------------------------- Change buffer tree */
-#define SYNC_IBUF_INDEX_TREE 360
-#define SYNC_IBUF_TREE_NODE_NEW 359
-#define SYNC_IBUF_TREE_NODE 358
-#define SYNC_IBUF_BITMAP_MUTEX 351
-#define SYNC_IBUF_BITMAP 350
-/*------------------------------------- Change log for online create index */
-#define SYNC_INDEX_ONLINE_LOG 340
-/*------------------------------------- MySQL query cache mutex */
-/*------------------------------------- MySQL binlog mutex */
-/*-------------------------------*/
-#define SYNC_LOCK_WAIT_SYS 300
-#define SYNC_LOCK_SYS 299
-#define SYNC_TRX_SYS 298
-#define SYNC_TRX 297
-#define SYNC_THREADS 295
-#define SYNC_REC_LOCK 294
-#define SYNC_TRX_SYS_HEADER 290
-#define SYNC_PURGE_QUEUE 200
-#define SYNC_LOG 170
-#define SYNC_LOG_FLUSH_ORDER 147
-#define SYNC_RECV 168
-#define SYNC_FTS_TOKENIZE 167
-#define SYNC_FTS_CACHE_INIT 166 /* Used for FTS cache initialization */
-#define SYNC_FTS_BG_THREADS 165
-#define SYNC_FTS_OPTIMIZE 164 // FIXME: is this correct number, test
-#define SYNC_WORK_QUEUE 162
-#define SYNC_SEARCH_SYS 160 /* NOTE that if we have a memory
- heap that can be extended to the
- buffer pool, its logical level is
- SYNC_SEARCH_SYS, as memory allocation
- can call routines there! Otherwise
- the level is SYNC_MEM_HASH. */
-#define SYNC_BUF_POOL 150 /* Buffer pool mutex */
-#define SYNC_BUF_PAGE_HASH 149 /* buf_pool->page_hash rw_lock */
-#define SYNC_BUF_BLOCK 146 /* Block mutex */
-#define SYNC_BUF_FLUSH_LIST 145 /* Buffer flush list mutex */
-#define SYNC_DOUBLEWRITE 140
-#define SYNC_ANY_LATCH 135
-#define SYNC_MEM_HASH 131
-#define SYNC_MEM_POOL 130
-
-/* Codes used to designate lock operations */
-#define RW_LOCK_NOT_LOCKED 350
-#define RW_LOCK_EX 351
-#define RW_LOCK_EXCLUSIVE 351
-#define RW_LOCK_SHARED 352
-#define RW_LOCK_WAIT_EX 353
-#define SYNC_MUTEX 354
-
-/* NOTE! The structure appears here only for the compiler to know its size.
-Do not use its fields directly! The structure used in the spin lock
-implementation of a mutual exclusion semaphore. */
-
-/** InnoDB mutex */
-struct ib_mutex_t {
- os_event_t event; /*!< Used by sync0arr.cc for the wait queue */
- volatile lock_word_t lock_word; /*!< lock_word is the target
- of the atomic test-and-set instruction when
- atomic operations are enabled. */
-
-#if !defined(HAVE_ATOMIC_BUILTINS)
- os_fast_mutex_t
- os_fast_mutex; /*!< We use this OS mutex in place of lock_word
- when atomic operations are not enabled */
-#endif
- ulint waiters; /*!< This ulint is set to 1 if there are (or
- may be) threads waiting in the global wait
- array for this mutex to be released.
- Otherwise, this is 0. */
- UT_LIST_NODE_T(ib_mutex_t) list; /*!< All allocated mutexes are put into
- a list. Pointers to the next and prev. */
-#ifdef UNIV_SYNC_DEBUG
- ulint level; /*!< Level in the global latching order */
-#endif /* UNIV_SYNC_DEBUG */
-
- const char* file_name; /*!< File where the mutex was locked */
- ulint line; /*!< Line where the mutex was locked */
- const char* cfile_name;/*!< File name where mutex created */
- ulint cline; /*!< Line where created */
- ulong count_os_wait; /*!< count of os_wait */
- const char* cmutex_name; /*!< mutex name */
- os_thread_id_t thread_id; /*!< The thread id of the thread
- which locked the mutex. */
-#ifdef UNIV_DEBUG
-
-/** Value of mutex_t::magic_n */
-# define MUTEX_MAGIC_N 979585UL
- ulint magic_n; /*!< MUTEX_MAGIC_N */
- ulint ib_mutex_type; /*!< 0=usual mutex, 1=rw_lock mutex */
-#endif /* UNIV_DEBUG */
-#ifdef UNIV_PFS_MUTEX
- struct PSI_mutex* pfs_psi; /*!< The performance schema
- instrumentation hook */
-#endif
-};
-
-/** Constant determining how long spin wait is continued before suspending
-the thread. A value 600 rounds on a 1995 100 MHz Pentium seems to correspond
-to 20 microseconds. */
-
-#define SYNC_SPIN_ROUNDS srv_n_spin_wait_rounds
-
-/** The number of mutex_exit calls. Intended for performance monitoring. */
-extern ib_int64_t mutex_exit_count;
-
-#ifdef UNIV_SYNC_DEBUG
-/** Latching order checks start when this is set TRUE */
-extern ibool sync_order_checks_on;
-#endif /* UNIV_SYNC_DEBUG */
-
-/** This variable is set to TRUE when sync_init is called */
-extern ibool sync_initialized;
-
-/** Global list of database mutexes (not OS mutexes) created. */
-typedef UT_LIST_BASE_NODE_T(ib_mutex_t) ut_list_base_node_t;
-/** Global list of database mutexes (not OS mutexes) created. */
-extern ut_list_base_node_t mutex_list;
-
-/** Mutex protecting the mutex_list variable */
-extern ib_mutex_t mutex_list_mutex;
-
-#ifndef HAVE_ATOMIC_BUILTINS
-/**********************************************************//**
-Function that uses a mutex to decrement a variable atomically */
-UNIV_INLINE
-void
-os_atomic_dec_ulint_func(
-/*=====================*/
- ib_mutex_t* mutex, /*!< in: mutex guarding the
- decrement */
- volatile ulint* var, /*!< in/out: variable to
- decrement */
- ulint delta); /*!< in: delta to decrement */
-/**********************************************************//**
-Function that uses a mutex to increment a variable atomically */
-UNIV_INLINE
-void
-os_atomic_inc_ulint_func(
-/*=====================*/
- ib_mutex_t* mutex, /*!< in: mutex guarding the
- increment */
- volatile ulint* var, /*!< in/out: variable to
- increment */
- ulint delta); /*!< in: delta to increment */
-#endif /* !HAVE_ATOMIC_BUILTINS */
-
-#ifndef UNIV_NONINL
-#include "sync0sync.ic"
-#endif
-
-#endif
+extern mysql_pfs_key_t dict_operation_lock_key;
+extern mysql_pfs_key_t checkpoint_lock_key;
+extern mysql_pfs_key_t fil_space_latch_key;
+extern mysql_pfs_key_t fts_cache_rw_lock_key;
+extern mysql_pfs_key_t fts_cache_init_rw_lock_key;
+extern mysql_pfs_key_t trx_i_s_cache_lock_key;
+extern mysql_pfs_key_t trx_purge_latch_key;
+extern mysql_pfs_key_t index_tree_rw_lock_key;
+extern mysql_pfs_key_t index_online_log_key;
+extern mysql_pfs_key_t dict_table_stats_key;
+extern mysql_pfs_key_t trx_sys_rw_lock_key;
+extern mysql_pfs_key_t hash_table_locks_key;
+#endif /* UNIV_PFS_RWLOCK */
+
+/** Prints info of the sync system.
+@param[in] file where to print */
+void
+sync_print(FILE* file);
+
+#endif /* !sync0sync_h */
diff --git a/storage/innobase/include/sync0sync.ic b/storage/innobase/include/sync0sync.ic
deleted file mode 100644
index 470a774408f..00000000000
--- a/storage/innobase/include/sync0sync.ic
+++ /dev/null
@@ -1,415 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1995, 2015, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2008, Google Inc.
-
-Portions of this file contain modifications contributed and copyrighted by
-Google, Inc. Those modifications are gratefully acknowledged and are described
-briefly in the InnoDB documentation. The contributions by Google are
-incorporated with their permission, and subject to the conditions contained in
-the file COPYING.Google.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/sync0sync.ic
-Mutex, the basic synchronization primitive
-
-Created 9/5/1995 Heikki Tuuri
-*******************************************************/
-
-/******************************************************************//**
-Sets the waiters field in a mutex. */
-UNIV_INTERN
-void
-mutex_set_waiters(
-/*==============*/
- ib_mutex_t* mutex, /*!< in: mutex */
- ulint n); /*!< in: value to set */
-/******************************************************************//**
-Reserves a mutex for the current thread. If the mutex is reserved, the
-function spins a preset time (controlled by SYNC_SPIN_ROUNDS) waiting
-for the mutex before suspending the thread. */
-UNIV_INTERN
-void
-mutex_spin_wait(
-/*============*/
- ib_mutex_t* mutex, /*!< in: pointer to mutex */
- const char* file_name, /*!< in: file name where mutex
- requested */
- ulint line); /*!< in: line where requested */
-#ifdef UNIV_SYNC_DEBUG
-/******************************************************************//**
-Sets the debug information for a reserved mutex. */
-UNIV_INTERN
-void
-mutex_set_debug_info(
-/*=================*/
- ib_mutex_t* mutex, /*!< in: mutex */
- const char* file_name, /*!< in: file where requested */
- ulint line); /*!< in: line where requested */
-#endif /* UNIV_SYNC_DEBUG */
-/******************************************************************//**
-Releases the threads waiting in the primary wait array for this mutex. */
-UNIV_INTERN
-void
-mutex_signal_object(
-/*================*/
- ib_mutex_t* mutex); /*!< in: mutex */
-
-/******************************************************************//**
-Performs an atomic test-and-set instruction to the lock_word field of a
-mutex.
-@return the previous value of lock_word: 0 or 1 */
-UNIV_INLINE
-lock_word_t
-ib_mutex_test_and_set(
-/*==================*/
- ib_mutex_t* mutex) /*!< in: mutex */
-{
-#if defined(HAVE_ATOMIC_BUILTINS)
- return(os_atomic_test_and_set(&mutex->lock_word));
-#else
- ibool ret;
-
- ret = os_fast_mutex_trylock_full_barrier(&(mutex->os_fast_mutex));
-
- if (ret == 0) {
- /* We check that os_fast_mutex_trylock does not leak
- and allow race conditions */
- ut_a(mutex->lock_word == 0);
-
- mutex->lock_word = 1;
- }
-
- return((byte) ret);
-#endif /* HAVE_ATOMIC_BUILTINS */
-}
-
-/******************************************************************//**
-Performs a reset instruction to the lock_word field of a mutex. This
-instruction also serializes memory operations to the program order. */
-UNIV_INLINE
-void
-mutex_reset_lock_word(
-/*==================*/
- ib_mutex_t* mutex) /*!< in: mutex */
-{
-#if defined(HAVE_ATOMIC_BUILTINS)
- os_atomic_clear(&mutex->lock_word);
-#else
- mutex->lock_word = 0;
-
- os_fast_mutex_unlock(&(mutex->os_fast_mutex));
-#endif /* HAVE_ATOMIC_BUILTINS */
-}
-
-/******************************************************************//**
-Gets the value of the lock word. */
-UNIV_INLINE
-lock_word_t
-mutex_get_lock_word(
-/*================*/
- const ib_mutex_t* mutex) /*!< in: mutex */
-{
- ut_ad(mutex);
-
- return(mutex->lock_word);
-}
-
-/******************************************************************//**
-Gets the waiters field in a mutex.
-@return value to set */
-UNIV_INLINE
-ulint
-mutex_get_waiters(
-/*==============*/
- const ib_mutex_t* mutex) /*!< in: mutex */
-{
- const volatile ulint* ptr; /*!< declared volatile to ensure that
- the value is read from memory */
- ut_ad(mutex);
-
- ptr = &(mutex->waiters);
-
- return(*ptr); /* Here we assume that the read of a single
- word from memory is atomic */
-}
-
-/******************************************************************//**
-NOTE! Use the corresponding macro mutex_exit(), not directly this function!
-Unlocks a mutex owned by the current thread. */
-UNIV_INLINE
-void
-mutex_exit_func(
-/*============*/
- ib_mutex_t* mutex) /*!< in: pointer to mutex */
-{
- ut_ad(mutex_own(mutex));
-
- mutex->thread_id = (os_thread_id_t) ULINT_UNDEFINED;
-
-#ifdef UNIV_SYNC_DEBUG
- sync_thread_reset_level(mutex);
-#endif
- mutex_reset_lock_word(mutex);
-
- /* A problem: we assume that mutex_reset_lock word
- is a memory barrier, that is when we read the waiters
- field next, the read must be serialized in memory
- after the reset. A speculative processor might
- perform the read first, which could leave a waiting
- thread hanging indefinitely.
-
- Our current solution call every second
- sync_arr_wake_threads_if_sema_free()
- to wake up possible hanging threads if
- they are missed in mutex_signal_object. */
-
- /* We add a memory barrier to prevent reading of the
- number of waiters before releasing the lock. */
-
- os_mb;
-
- if (mutex_get_waiters(mutex) != 0) {
-
- mutex_signal_object(mutex);
- }
-
-#ifdef UNIV_SYNC_PERF_STAT
- mutex_exit_count++;
-#endif
-}
-
-/******************************************************************//**
-Locks a mutex for the current thread. If the mutex is reserved, the function
-spins a preset time (controlled by SYNC_SPIN_ROUNDS), waiting for the mutex
-before suspending the thread. */
-UNIV_INLINE
-void
-mutex_enter_func(
-/*=============*/
- ib_mutex_t* mutex, /*!< in: pointer to mutex */
- const char* file_name, /*!< in: file name where locked */
- ulint line) /*!< in: line where locked */
-{
- ut_ad(mutex_validate(mutex));
-#ifndef WITH_WSREP
- /* this cannot be be granted when BF trx kills a trx in lock wait state */
- ut_ad(!mutex_own(mutex));
-#endif /* WITH_WSREP */
-
- /* Note that we do not peek at the value of lock_word before trying
- the atomic test_and_set; we could peek, and possibly save time. */
-
- if (!ib_mutex_test_and_set(mutex)) {
- mutex->thread_id = os_thread_get_curr_id();
-#ifdef UNIV_SYNC_DEBUG
- mutex_set_debug_info(mutex, file_name, line);
-#endif
- if (srv_instrument_semaphores) {
- mutex->file_name = file_name;
- mutex->line = line;
- }
-
- return; /* Succeeded! */
- }
-
- mutex_spin_wait(mutex, file_name, line);
-}
-
-#ifdef UNIV_PFS_MUTEX
-/******************************************************************//**
-NOTE! Please use the corresponding macro mutex_enter(), not directly
-this function!
-This is a performance schema instrumented wrapper function for
-mutex_enter_func(). */
-UNIV_INLINE
-void
-pfs_mutex_enter_func(
-/*=================*/
- ib_mutex_t* mutex, /*!< in: pointer to mutex */
- const char* file_name, /*!< in: file name where locked */
- ulint line) /*!< in: line where locked */
-{
- if (mutex->pfs_psi != NULL) {
- PSI_mutex_locker* locker;
- PSI_mutex_locker_state state;
-
- locker = PSI_MUTEX_CALL(start_mutex_wait)(
- &state, mutex->pfs_psi,
- PSI_MUTEX_LOCK, file_name,
- static_cast<uint>(line));
-
- mutex_enter_func(mutex, file_name, line);
-
- if (locker != NULL) {
- PSI_MUTEX_CALL(end_mutex_wait)(locker, 0);
- }
- } else {
- mutex_enter_func(mutex, file_name, line);
- }
-}
-
-/********************************************************************//**
-NOTE! Please use the corresponding macro mutex_enter_nowait(), not directly
-this function!
-This is a performance schema instrumented wrapper function for
-mutex_enter_nowait_func.
-@return 0 if succeed, 1 if not */
-UNIV_INLINE
-ulint
-pfs_mutex_enter_nowait_func(
-/*========================*/
- ib_mutex_t* mutex, /*!< in: pointer to mutex */
- const char* file_name, /*!< in: file name where mutex
- requested */
- ulint line) /*!< in: line where requested */
-{
- ulint ret;
-
- if (mutex->pfs_psi != NULL) {
- PSI_mutex_locker* locker;
- PSI_mutex_locker_state state;
-
- locker = PSI_MUTEX_CALL(start_mutex_wait)(
- &state, mutex->pfs_psi,
- PSI_MUTEX_TRYLOCK, file_name,
- static_cast<uint>(line));
-
- ret = mutex_enter_nowait_func(mutex, file_name, line);
-
- if (locker != NULL) {
- PSI_MUTEX_CALL(end_mutex_wait)(locker, (int) ret);
- }
- } else {
- ret = mutex_enter_nowait_func(mutex, file_name, line);
- }
-
- return(ret);
-}
-
-/******************************************************************//**
-NOTE! Please use the corresponding macro mutex_exit(), not directly
-this function!
-A wrap function of mutex_exit_func() with performance schema instrumentation.
-Unlocks a mutex owned by the current thread. */
-UNIV_INLINE
-void
-pfs_mutex_exit_func(
-/*================*/
- ib_mutex_t* mutex) /*!< in: pointer to mutex */
-{
- if (mutex->pfs_psi != NULL) {
- PSI_MUTEX_CALL(unlock_mutex)(mutex->pfs_psi);
- }
-
- mutex_exit_func(mutex);
-}
-
-/******************************************************************//**
-NOTE! Please use the corresponding macro mutex_create(), not directly
-this function!
-A wrapper function for mutex_create_func(), registers the mutex
-with performance schema if "UNIV_PFS_MUTEX" is defined when
-creating the mutex */
-UNIV_INLINE
-void
-pfs_mutex_create_func(
-/*==================*/
- mysql_pfs_key_t key, /*!< in: Performance Schema key */
- ib_mutex_t* mutex, /*!< in: pointer to memory */
- const char* cmutex_name, /*!< in: mutex name */
-# ifdef UNIV_DEBUG
-# ifdef UNIV_SYNC_DEBUG
- ulint level, /*!< in: level */
-# endif /* UNIV_SYNC_DEBUG */
-# endif /* UNIV_DEBUG */
- const char* cfile_name, /*!< in: file name where created */
- ulint cline) /*!< in: file line where created */
-{
- mutex->pfs_psi = PSI_MUTEX_CALL(init_mutex)(key, mutex);
-
- mutex_create_func(mutex,
- cmutex_name,
-# ifdef UNIV_DEBUG
-# ifdef UNIV_SYNC_DEBUG
- level,
-# endif /* UNIV_SYNC_DEBUG */
-# endif /* UNIV_DEBUG */
- cfile_name,
- cline);
-}
-
-/******************************************************************//**
-NOTE! Please use the corresponding macro mutex_free(), not directly
-this function!
-Wrapper function for mutex_free_func(). Also destroys the performance
-schema probes when freeing the mutex */
-UNIV_INLINE
-void
-pfs_mutex_free_func(
-/*================*/
- ib_mutex_t* mutex) /*!< in: mutex */
-{
- if (mutex->pfs_psi != NULL) {
- PSI_MUTEX_CALL(destroy_mutex)(mutex->pfs_psi);
- mutex->pfs_psi = NULL;
- }
-
- mutex_free_func(mutex);
-}
-
-#endif /* UNIV_PFS_MUTEX */
-
-#ifndef HAVE_ATOMIC_BUILTINS
-/**********************************************************//**
-Function that uses a mutex to decrement a variable atomically */
-UNIV_INLINE
-void
-os_atomic_dec_ulint_func(
-/*=====================*/
- ib_mutex_t* mutex, /*!< in: mutex guarding the dec */
- volatile ulint* var, /*!< in/out: variable to decrement */
- ulint delta) /*!< in: delta to decrement */
-{
- mutex_enter(mutex);
-
- /* I don't think we will encounter a situation where
- this check will not be required. */
- ut_ad(*var >= delta);
-
- *var -= delta;
-
- mutex_exit(mutex);
-}
-
-/**********************************************************//**
-Function that uses a mutex to increment a variable atomically */
-UNIV_INLINE
-void
-os_atomic_inc_ulint_func(
-/*=====================*/
- ib_mutex_t* mutex, /*!< in: mutex guarding the increment */
- volatile ulint* var, /*!< in/out: variable to increment */
- ulint delta) /*!< in: delta to increment */
-{
- mutex_enter(mutex);
-
- *var += delta;
-
- mutex_exit(mutex);
-}
-#endif /* !HAVE_ATOMIC_BUILTINS */
diff --git a/storage/innobase/include/sync0types.h b/storage/innobase/include/sync0types.h
index b73ed0c4280..9b1443523c7 100644
--- a/storage/innobase/include/sync0types.h
+++ b/storage/innobase/include/sync0types.h
@@ -1,6 +1,7 @@
/*****************************************************************************
-Copyright (c) 1995, 2009, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2017, 2018, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -26,6 +27,1225 @@ Created 9/5/1995 Heikki Tuuri
#ifndef sync0types_h
#define sync0types_h
-struct ib_mutex_t;
+#include <vector>
+#include <my_atomic.h>
+#include "ut0new.h"
+
+#ifdef _WIN32
+/** Native mutex */
+typedef CRITICAL_SECTION sys_mutex_t;
+#else
+/** Native mutex */
+typedef pthread_mutex_t sys_mutex_t;
+#endif /* _WIN32 */
+
+/** Mutex states. */
+enum mutex_state_t {
+ /** Mutex is free */
+ MUTEX_STATE_UNLOCKED = 0,
+
+ /** Mutex is acquired by some thread. */
+ MUTEX_STATE_LOCKED = 1,
+
+ /** Mutex is contended and there are threads waiting on the lock. */
+ MUTEX_STATE_WAITERS = 2
+};
+
+/*
+ LATCHING ORDER WITHIN THE DATABASE
+ ==================================
+
+The mutex or latch in the central memory object, for instance, a rollback
+segment object, must be acquired before acquiring the latch or latches to
+the corresponding file data structure. In the latching order below, these
+file page object latches are placed immediately below the corresponding
+central memory object latch or mutex.
+
+Synchronization object Notes
+---------------------- -----
+
+Dictionary mutex If we have a pointer to a dictionary
+| object, e.g., a table, it can be
+| accessed without reserving the
+| dictionary mutex. We must have a
+| reservation, a memoryfix, to the
+| appropriate table object in this case,
+| and the table must be explicitly
+| released later.
+V
+Dictionary header
+|
+V
+Secondary index tree latch The tree latch protects also all
+| the B-tree non-leaf pages. These
+V can be read with the page only
+Secondary index non-leaf bufferfixed to save CPU time,
+| no s-latch is needed on the page.
+| Modification of a page requires an
+| x-latch on the page, however. If a
+| thread owns an x-latch to the tree,
+| it is allowed to latch non-leaf pages
+| even after it has acquired the fsp
+| latch.
+V
+Secondary index leaf The latch on the secondary index leaf
+| can be kept while accessing the
+| clustered index, to save CPU time.
+V
+Clustered index tree latch To increase concurrency, the tree
+| latch is usually released when the
+| leaf page latch has been acquired.
+V
+Clustered index non-leaf
+|
+V
+Clustered index leaf
+|
+V
+Transaction system header
+|
+V
+Transaction undo mutex The undo log entry must be written
+| before any index page is modified.
+| Transaction undo mutex is for the undo
+| logs the analogue of the tree latch
+| for a B-tree. If a thread has the
+| trx undo mutex reserved, it is allowed
+| to latch the undo log pages in any
+| order, and also after it has acquired
+| the fsp latch.
+V
+Rollback segment mutex The rollback segment mutex must be
+| reserved, if, e.g., a new page must
+| be added to an undo log. The rollback
+| segment and the undo logs in its
+| history list can be seen as an
+| analogue of a B-tree, and the latches
+| reserved similarly, using a version of
+| lock-coupling. If an undo log must be
+| extended by a page when inserting an
+| undo log record, this corresponds to
+| a pessimistic insert in a B-tree.
+V
+Rollback segment header
+|
+V
+Purge system latch
+|
+V
+Undo log pages If a thread owns the trx undo mutex,
+| or for a log in the history list, the
+| rseg mutex, it is allowed to latch
+| undo log pages in any order, and even
+| after it has acquired the fsp latch.
+| If a thread does not have the
+| appropriate mutex, it is allowed to
+| latch only a single undo log page in
+| a mini-transaction.
+V
+File space management latch If a mini-transaction must allocate
+| several file pages, it can do that,
+| because it keeps the x-latch to the
+| file space management in its memo.
+V
+File system pages
+|
+V
+lock_sys_wait_mutex Mutex protecting lock timeout data
+|
+V
+lock_sys_mutex Mutex protecting lock_sys_t
+|
+V
+trx_sys->mutex Mutex protecting trx_sys_t
+|
+V
+Threads mutex Background thread scheduling mutex
+|
+V
+query_thr_mutex Mutex protecting query threads
+|
+V
+trx_mutex Mutex protecting trx_t fields
+|
+V
+Search system mutex
+|
+V
+Buffer pool mutex
+|
+V
+Log mutex
+|
+Any other latch
+|
+V
+Memory pool mutex */
+
+/** Latching order levels. If you modify these, you have to also update
+LatchDebug internals in sync0debug.cc */
+
+enum latch_level_t {
+ SYNC_UNKNOWN = 0,
+
+ SYNC_MUTEX = 1,
+
+ RW_LOCK_SX,
+ RW_LOCK_X_WAIT,
+ RW_LOCK_S,
+ RW_LOCK_X,
+ RW_LOCK_NOT_LOCKED,
+
+ SYNC_MONITOR_MUTEX,
+
+ SYNC_ANY_LATCH,
+
+ SYNC_DOUBLEWRITE,
+
+ SYNC_BUF_FLUSH_LIST,
+
+ SYNC_BUF_BLOCK,
+ SYNC_BUF_PAGE_HASH,
+
+ SYNC_BUF_POOL,
+
+ SYNC_POOL,
+ SYNC_POOL_MANAGER,
+
+ SYNC_SEARCH_SYS,
+
+ SYNC_WORK_QUEUE,
+
+ SYNC_FTS_TOKENIZE,
+ SYNC_FTS_OPTIMIZE,
+ SYNC_FTS_BG_THREADS,
+ SYNC_FTS_CACHE_INIT,
+ SYNC_RECV,
+ SYNC_LOG_FLUSH_ORDER,
+ SYNC_LOG,
+ SYNC_LOG_WRITE,
+ SYNC_PAGE_CLEANER,
+ SYNC_PURGE_QUEUE,
+ SYNC_TRX_SYS_HEADER,
+ SYNC_REC_LOCK,
+ SYNC_THREADS,
+ SYNC_TRX,
+ SYNC_TRX_SYS,
+ SYNC_LOCK_SYS,
+ SYNC_LOCK_WAIT_SYS,
+
+ SYNC_INDEX_ONLINE_LOG,
+
+ SYNC_IBUF_BITMAP,
+ SYNC_IBUF_BITMAP_MUTEX,
+ SYNC_IBUF_TREE_NODE,
+ SYNC_IBUF_TREE_NODE_NEW,
+ SYNC_IBUF_INDEX_TREE,
+
+ SYNC_IBUF_MUTEX,
+
+ SYNC_FSP_PAGE,
+ SYNC_FSP,
+ SYNC_EXTERN_STORAGE,
+ SYNC_TRX_UNDO_PAGE,
+ SYNC_RSEG_HEADER,
+ SYNC_RSEG_HEADER_NEW,
+ SYNC_NOREDO_RSEG,
+ SYNC_REDO_RSEG,
+ SYNC_TRX_UNDO,
+ SYNC_PURGE_LATCH,
+ SYNC_TREE_NODE,
+ SYNC_TREE_NODE_FROM_HASH,
+ SYNC_TREE_NODE_NEW,
+ SYNC_IBUF_PESS_INSERT_MUTEX,
+ SYNC_INDEX_TREE,
+
+ SYNC_IBUF_HEADER,
+ SYNC_DICT_HEADER,
+ SYNC_STATS_AUTO_RECALC,
+ SYNC_DICT_AUTOINC_MUTEX,
+ SYNC_DICT,
+ SYNC_FTS_CACHE,
+
+ SYNC_FILE_FORMAT_TAG,
+
+ SYNC_DICT_OPERATION,
+
+ SYNC_TRX_I_S_LAST_READ,
+
+ SYNC_TRX_I_S_RWLOCK,
+
+ SYNC_RECV_WRITER,
+
+ /** Level is varying. Only used with buffer pool page locks, which
+ do not have a fixed level, but instead have their level set after
+ the page is locked; see e.g. ibuf_bitmap_get_map_page(). */
+
+ SYNC_LEVEL_VARYING,
+
+ /** This can be used to suppress order checking. */
+ SYNC_NO_ORDER_CHECK,
+
+ /** Maximum level value */
+ SYNC_LEVEL_MAX = SYNC_NO_ORDER_CHECK
+};
+
+/** Each latch has an ID. This id is used for creating the latch and to look
+up its meta-data. See sync0debug.c. */
+enum latch_id_t {
+ LATCH_ID_NONE = 0,
+ LATCH_ID_AUTOINC,
+ LATCH_ID_BUF_BLOCK_MUTEX,
+ LATCH_ID_BUF_POOL,
+ LATCH_ID_BUF_POOL_ZIP,
+ LATCH_ID_CACHE_LAST_READ,
+ LATCH_ID_DICT_FOREIGN_ERR,
+ LATCH_ID_DICT_SYS,
+ LATCH_ID_FILE_FORMAT_MAX,
+ LATCH_ID_FIL_SYSTEM,
+ LATCH_ID_FLUSH_LIST,
+ LATCH_ID_FTS_BG_THREADS,
+ LATCH_ID_FTS_DELETE,
+ LATCH_ID_FTS_OPTIMIZE,
+ LATCH_ID_FTS_DOC_ID,
+ LATCH_ID_FTS_PLL_TOKENIZE,
+ LATCH_ID_HASH_TABLE_MUTEX,
+ LATCH_ID_IBUF_BITMAP,
+ LATCH_ID_IBUF,
+ LATCH_ID_IBUF_PESSIMISTIC_INSERT,
+ LATCH_ID_LOG_SYS,
+ LATCH_ID_LOG_WRITE,
+ LATCH_ID_LOG_FLUSH_ORDER,
+ LATCH_ID_LIST,
+ LATCH_ID_MUTEX_LIST,
+ LATCH_ID_PAGE_CLEANER,
+ LATCH_ID_PURGE_SYS_PQ,
+ LATCH_ID_RECALC_POOL,
+ LATCH_ID_RECV_SYS,
+ LATCH_ID_RECV_WRITER,
+ LATCH_ID_REDO_RSEG,
+ LATCH_ID_NOREDO_RSEG,
+ LATCH_ID_RW_LOCK_DEBUG,
+ LATCH_ID_RTR_SSN_MUTEX,
+ LATCH_ID_RTR_ACTIVE_MUTEX,
+ LATCH_ID_RTR_MATCH_MUTEX,
+ LATCH_ID_RTR_PATH_MUTEX,
+ LATCH_ID_RW_LOCK_LIST,
+ LATCH_ID_RW_LOCK_MUTEX,
+ LATCH_ID_SRV_INNODB_MONITOR,
+ LATCH_ID_SRV_MISC_TMPFILE,
+ LATCH_ID_SRV_MONITOR_FILE,
+ LATCH_ID_BUF_DBLWR,
+ LATCH_ID_TRX_UNDO,
+ LATCH_ID_TRX_POOL,
+ LATCH_ID_TRX_POOL_MANAGER,
+ LATCH_ID_TRX,
+ LATCH_ID_LOCK_SYS,
+ LATCH_ID_LOCK_SYS_WAIT,
+ LATCH_ID_TRX_SYS,
+ LATCH_ID_SRV_SYS,
+ LATCH_ID_SRV_SYS_TASKS,
+ LATCH_ID_PAGE_ZIP_STAT_PER_INDEX,
+ LATCH_ID_EVENT_MANAGER,
+ LATCH_ID_EVENT_MUTEX,
+ LATCH_ID_SYNC_ARRAY_MUTEX,
+ LATCH_ID_ZIP_PAD_MUTEX,
+ LATCH_ID_OS_AIO_READ_MUTEX,
+ LATCH_ID_OS_AIO_WRITE_MUTEX,
+ LATCH_ID_OS_AIO_LOG_MUTEX,
+ LATCH_ID_OS_AIO_IBUF_MUTEX,
+ LATCH_ID_OS_AIO_SYNC_MUTEX,
+ LATCH_ID_ROW_DROP_LIST,
+ LATCH_ID_INDEX_ONLINE_LOG,
+ LATCH_ID_WORK_QUEUE,
+ LATCH_ID_BTR_SEARCH,
+ LATCH_ID_BUF_BLOCK_LOCK,
+ LATCH_ID_BUF_BLOCK_DEBUG,
+ LATCH_ID_DICT_OPERATION,
+ LATCH_ID_CHECKPOINT,
+ LATCH_ID_FIL_SPACE,
+ LATCH_ID_FTS_CACHE,
+ LATCH_ID_FTS_CACHE_INIT,
+ LATCH_ID_TRX_I_S_CACHE,
+ LATCH_ID_TRX_PURGE,
+ LATCH_ID_IBUF_INDEX_TREE,
+ LATCH_ID_INDEX_TREE,
+ LATCH_ID_DICT_TABLE_STATS,
+ LATCH_ID_HASH_TABLE_RW_LOCK,
+ LATCH_ID_BUF_CHUNK_MAP_LATCH,
+ LATCH_ID_SYNC_DEBUG_MUTEX,
+ LATCH_ID_SCRUB_STAT_MUTEX,
+ LATCH_ID_DEFRAGMENT_MUTEX,
+ LATCH_ID_BTR_DEFRAGMENT_MUTEX,
+ LATCH_ID_MTFLUSH_THREAD_MUTEX,
+ LATCH_ID_MTFLUSH_MUTEX,
+ LATCH_ID_FIL_CRYPT_MUTEX,
+ LATCH_ID_FIL_CRYPT_STAT_MUTEX,
+ LATCH_ID_FIL_CRYPT_DATA_MUTEX,
+ LATCH_ID_FIL_CRYPT_THREADS_MUTEX,
+ LATCH_ID_TEST_MUTEX,
+ LATCH_ID_MAX = LATCH_ID_TEST_MUTEX
+};
+
+#ifndef UNIV_INNOCHECKSUM
+/** OS mutex, without any policy. It is a thin wrapper around the
+system mutexes. The interface is different from the policy mutexes,
+to ensure that it is called directly and not confused with the
+policy mutexes. */
+struct OSMutex {
+
+ /** Constructor */
+ OSMutex()
+ UNIV_NOTHROW
+ {
+ ut_d(m_freed = true);
+ }
+
+ /** Create the mutex by calling the system functions. */
+ void init()
+ UNIV_NOTHROW
+ {
+ ut_ad(m_freed);
+
+#ifdef _WIN32
+ InitializeCriticalSection((LPCRITICAL_SECTION) &m_mutex);
+#else
+ {
+ int ret = pthread_mutex_init(&m_mutex, NULL);
+ ut_a(ret == 0);
+ }
+#endif /* _WIN32 */
+
+ ut_d(m_freed = false);
+ }
+
+ /** Destructor */
+ ~OSMutex() { }
+
+ /** Destroy the mutex */
+ void destroy()
+ UNIV_NOTHROW
+ {
+ ut_ad(!m_freed);
+#ifdef _WIN32
+ DeleteCriticalSection((LPCRITICAL_SECTION) &m_mutex);
+#else
+ int ret;
+
+ ret = pthread_mutex_destroy(&m_mutex);
+
+ if (ret != 0) {
+
+ ib::error()
+ << "Return value " << ret << " when calling "
+ << "pthread_mutex_destroy().";
+ }
+#endif /* _WIN32 */
+ ut_d(m_freed = true);
+ }
+
+ /** Release the mutex. */
+ void exit()
+ UNIV_NOTHROW
+ {
+ ut_ad(!m_freed);
+#ifdef _WIN32
+ LeaveCriticalSection(&m_mutex);
+#else
+ int ret = pthread_mutex_unlock(&m_mutex);
+ ut_a(ret == 0);
+#endif /* _WIN32 */
+ }
+
+ /** Acquire the mutex. */
+ void enter()
+ UNIV_NOTHROW
+ {
+ ut_ad(!m_freed);
+#ifdef _WIN32
+ EnterCriticalSection((LPCRITICAL_SECTION) &m_mutex);
+#else
+ int ret = pthread_mutex_lock(&m_mutex);
+ ut_a(ret == 0);
+#endif /* _WIN32 */
+ }
+
+ /** @return true if locking succeeded */
+ bool try_lock()
+ UNIV_NOTHROW
+ {
+ ut_ad(!m_freed);
+#ifdef _WIN32
+ return(TryEnterCriticalSection(&m_mutex) != 0);
+#else
+ return(pthread_mutex_trylock(&m_mutex) == 0);
+#endif /* _WIN32 */
+ }
+
+ /** Required for os_event_t */
+ operator sys_mutex_t*()
+ UNIV_NOTHROW
+ {
+ return(&m_mutex);
+ }
+
+private:
+#ifdef UNIV_DEBUG
+ /** true if the mutex has been freed/destroyed. */
+ bool m_freed;
+#endif /* UNIV_DEBUG */
+
+ sys_mutex_t m_mutex;
+};
+
+#ifdef UNIV_PFS_MUTEX
+/** Latch element.
+Used for mutexes which have PFS keys defined under UNIV_PFS_MUTEX.
+@param[in] id Latch id
+@param[in] level Latch level
+@param[in] key PFS key */
+# define LATCH_ADD_MUTEX(id, level, key) latch_meta[LATCH_ID_ ## id] =\
+ UT_NEW_NOKEY(latch_meta_t(LATCH_ID_ ## id, #id, level, #level, key))
+
+#ifdef UNIV_PFS_RWLOCK
+/** Latch element.
+Used for rwlocks which have PFS keys defined under UNIV_PFS_RWLOCK.
+@param[in] id Latch id
+@param[in] level Latch level
+@param[in] key PFS key */
+# define LATCH_ADD_RWLOCK(id, level, key) latch_meta[LATCH_ID_ ## id] =\
+ UT_NEW_NOKEY(latch_meta_t(LATCH_ID_ ## id, #id, level, #level, key))
+#else
+# define LATCH_ADD_RWLOCK(id, level, key) latch_meta[LATCH_ID_ ## id] =\
+ UT_NEW_NOKEY(latch_meta_t(LATCH_ID_ ## id, #id, level, #level, \
+ PSI_NOT_INSTRUMENTED))
+#endif /* UNIV_PFS_RWLOCK */
+
+#else
+# define LATCH_ADD_MUTEX(id, level, key) latch_meta[LATCH_ID_ ## id] =\
+ UT_NEW_NOKEY(latch_meta_t(LATCH_ID_ ## id, #id, level, #level))
+# define LATCH_ADD_RWLOCK(id, level, key) latch_meta[LATCH_ID_ ## id] =\
+ UT_NEW_NOKEY(latch_meta_t(LATCH_ID_ ## id, #id, level, #level))
+#endif /* UNIV_PFS_MUTEX */
+
+/** Default latch counter */
+class LatchCounter {
+
+public:
+ /** The counts we collect for a mutex */
+ struct Count {
+
+ /** Constructor */
+ Count()
+ UNIV_NOTHROW
+ :
+ m_spins(),
+ m_waits(),
+ m_calls(),
+ m_enabled()
+ {
+ /* No op */
+ }
+
+ /** Rest the values to zero */
+ void reset()
+ UNIV_NOTHROW
+ {
+ m_spins = 0;
+ m_waits = 0;
+ m_calls = 0;
+ }
+
+ /** Number of spins trying to acquire the latch. */
+ uint32_t m_spins;
+
+ /** Number of waits trying to acquire the latch */
+ uint32_t m_waits;
+
+ /** Number of times it was called */
+ uint32_t m_calls;
+
+ /** true if enabled */
+ bool m_enabled;
+ };
+
+ /** Constructor */
+ LatchCounter()
+ UNIV_NOTHROW
+ :
+ m_active(false)
+ {
+ m_mutex.init();
+ }
+
+ /** Destructor */
+ ~LatchCounter()
+ UNIV_NOTHROW
+ {
+ m_mutex.destroy();
+
+ for (Counters::iterator it = m_counters.begin();
+ it != m_counters.end();
+ ++it) {
+
+ Count* count = *it;
+
+ UT_DELETE(count);
+ }
+ }
+
+ /** Reset all counters to zero. It is not protected by any
+ mutex and we don't care about atomicity. Unless it is a
+ demonstrated problem. The information collected is not
+ required for the correct functioning of the server. */
+ void reset()
+ UNIV_NOTHROW
+ {
+ m_mutex.enter();
+
+ Counters::iterator end = m_counters.end();
+
+ for (Counters::iterator it = m_counters.begin();
+ it != end;
+ ++it) {
+
+ (*it)->reset();
+ }
+
+ m_mutex.exit();
+ }
+
+ /** @return the aggregate counter */
+ Count* sum_register()
+ UNIV_NOTHROW
+ {
+ m_mutex.enter();
+
+ Count* count;
+
+ if (m_counters.empty()) {
+ count = UT_NEW_NOKEY(Count());
+ m_counters.push_back(count);
+ } else {
+ ut_a(m_counters.size() == 1);
+ count = m_counters[0];
+ }
+
+ m_mutex.exit();
+
+ return(count);
+ }
+
+ /** Register a single instance counter */
+ void single_register(Count* count)
+ UNIV_NOTHROW
+ {
+ m_mutex.enter();
+
+ m_counters.push_back(count);
+
+ m_mutex.exit();
+ }
+
+ /** Deregister a single instance counter
+ @param[in] count The count instance to deregister */
+ void single_deregister(Count* count)
+ UNIV_NOTHROW
+ {
+ m_mutex.enter();
+
+ m_counters.erase(
+ std::remove(
+ m_counters.begin(),
+ m_counters.end(), count),
+ m_counters.end());
+
+ m_mutex.exit();
+ }
+
+ /** Iterate over the counters */
+ template <typename Callback>
+ void iterate(Callback& callback) const
+ UNIV_NOTHROW
+ {
+ Counters::const_iterator end = m_counters.end();
+
+ for (Counters::const_iterator it = m_counters.begin();
+ it != end;
+ ++it) {
+
+ callback(*it);
+ }
+ }
+
+ /** Disable the monitoring */
+ void enable()
+ UNIV_NOTHROW
+ {
+ m_mutex.enter();
+
+ Counters::const_iterator end = m_counters.end();
+
+ for (Counters::const_iterator it = m_counters.begin();
+ it != end;
+ ++it) {
+
+ (*it)->m_enabled = true;
+ }
+
+ m_active = true;
+
+ m_mutex.exit();
+ }
+
+ /** Disable the monitoring */
+ void disable()
+ UNIV_NOTHROW
+ {
+ m_mutex.enter();
+
+ Counters::const_iterator end = m_counters.end();
+
+ for (Counters::const_iterator it = m_counters.begin();
+ it != end;
+ ++it) {
+
+ (*it)->m_enabled = false;
+ }
+
+ m_active = false;
+
+ m_mutex.exit();
+ }
+
+ /** @return if monitoring is active */
+ bool is_enabled() const
+ UNIV_NOTHROW
+ {
+ return(m_active);
+ }
+
+private:
+ /* Disable copying */
+ LatchCounter(const LatchCounter&);
+ LatchCounter& operator=(const LatchCounter&);
+
+private:
+ typedef OSMutex Mutex;
+ typedef std::vector<Count*> Counters;
+
+ /** Mutex protecting m_counters */
+ Mutex m_mutex;
+
+ /** Counters for the latches */
+ Counters m_counters;
+
+ /** if true then we collect the data */
+ bool m_active;
+};
+
+/** Latch meta data */
+template <typename Counter = LatchCounter>
+class LatchMeta {
+
+public:
+ typedef Counter CounterType;
+
+#ifdef UNIV_PFS_MUTEX
+ typedef mysql_pfs_key_t pfs_key_t;
+#endif /* UNIV_PFS_MUTEX */
+
+ /** Constructor */
+ LatchMeta()
+ :
+ m_id(LATCH_ID_NONE),
+ m_name(),
+ m_level(SYNC_UNKNOWN),
+ m_level_name()
+#ifdef UNIV_PFS_MUTEX
+ ,m_pfs_key()
+#endif /* UNIV_PFS_MUTEX */
+ {
+ }
+
+ /** Destructor */
+ ~LatchMeta() { }
+
+ /** Constructor
+ @param[in] id Latch id
+ @param[in] name Latch name
+ @param[in] level Latch level
+ @param[in] level_name Latch level text representation
+ @param[in] key PFS key */
+ LatchMeta(
+ latch_id_t id,
+ const char* name,
+ latch_level_t level,
+ const char* level_name
+#ifdef UNIV_PFS_MUTEX
+ ,pfs_key_t key
+#endif /* UNIV_PFS_MUTEX */
+ )
+ :
+ m_id(id),
+ m_name(name),
+ m_level(level),
+ m_level_name(level_name)
+#ifdef UNIV_PFS_MUTEX
+ ,m_pfs_key(key)
+#endif /* UNIV_PFS_MUTEX */
+ {
+ /* No op */
+ }
+
+ /* Less than operator.
+ @param[in] rhs Instance to compare against
+ @return true if this.get_id() < rhs.get_id() */
+ bool operator<(const LatchMeta& rhs) const
+ {
+ return(get_id() < rhs.get_id());
+ }
+
+ /** @return the latch id */
+ latch_id_t get_id() const
+ {
+ return(m_id);
+ }
+
+ /** @return the latch name */
+ const char* get_name() const
+ {
+ return(m_name);
+ }
+
+ /** @return the latch level */
+ latch_level_t get_level() const
+ {
+ return(m_level);
+ }
+
+ /** @return the latch level name */
+ const char* get_level_name() const
+ {
+ return(m_level_name);
+ }
+
+#ifdef UNIV_PFS_MUTEX
+ /** @return the PFS key for the latch */
+ pfs_key_t get_pfs_key() const
+ {
+ return(m_pfs_key);
+ }
+#endif /* UNIV_PFS_MUTEX */
+
+ /** @return the counter instance */
+ Counter* get_counter()
+ {
+ return(&m_counter);
+ }
+
+private:
+ /** Latch id */
+ latch_id_t m_id;
+
+ /** Latch name */
+ const char* m_name;
+
+ /** Latch level in the ordering */
+ latch_level_t m_level;
+
+ /** Latch level text representation */
+ const char* m_level_name;
+
+#ifdef UNIV_PFS_MUTEX
+ /** PFS key */
+ pfs_key_t m_pfs_key;
+#endif /* UNIV_PFS_MUTEX */
+
+ /** For gathering latch statistics */
+ Counter m_counter;
+};
+
+typedef LatchMeta<LatchCounter> latch_meta_t;
+typedef std::vector<latch_meta_t*, ut_allocator<latch_meta_t*> > LatchMetaData;
+
+/** Note: This is accessed without any mutex protection. It is initialised
+at startup and elements should not be added to or removed from it after
+that. See sync_latch_meta_init() */
+extern LatchMetaData latch_meta;
+
+/** Get the latch meta-data from the latch ID
+@param[in] id Latch ID
+@return the latch meta data */
+inline
+latch_meta_t&
+sync_latch_get_meta(latch_id_t id)
+{
+ ut_ad(static_cast<size_t>(id) < latch_meta.size());
+ ut_ad(id == latch_meta[id]->get_id());
+
+ return(*latch_meta[id]);
+}
+
+/** Fetch the counter for the latch
+@param[in] id Latch ID
+@return the latch counter */
+inline
+latch_meta_t::CounterType*
+sync_latch_get_counter(latch_id_t id)
+{
+ latch_meta_t& meta = sync_latch_get_meta(id);
+
+ return(meta.get_counter());
+}
+
+/** Get the latch name from the latch ID
+@param[in] id Latch ID
+@return the name, will assert if not found */
+inline
+const char*
+sync_latch_get_name(latch_id_t id)
+{
+ const latch_meta_t& meta = sync_latch_get_meta(id);
+
+ return(meta.get_name());
+}
+
+/** Get the latch ordering level
+@param[in] id Latch id to lookup
+@return the latch level */
+inline
+latch_level_t
+sync_latch_get_level(latch_id_t id)
+{
+ const latch_meta_t& meta = sync_latch_get_meta(id);
+
+ return(meta.get_level());
+}
+
+#ifdef UNIV_PFS_MUTEX
+/** Get the latch PFS key from the latch ID
+@param[in] id Latch ID
+@return the PFS key */
+inline
+mysql_pfs_key_t
+sync_latch_get_pfs_key(latch_id_t id)
+{
+ const latch_meta_t& meta = sync_latch_get_meta(id);
+
+ return(meta.get_pfs_key());
+}
#endif
+
+/** String representation of the filename and line number where the
+latch was created
+@param[in] id Latch ID
+@param[in] created Filename and line number where it was crated
+@return the string representation */
+std::string
+sync_mutex_to_string(
+ latch_id_t id,
+ const std::string& created);
+
+/** Get the latch name from a sync level
+@param[in] level Latch level to lookup
+@return 0 if not found. */
+const char*
+sync_latch_get_name(latch_level_t level);
+
+/** Print the filename "basename"
+@return the basename */
+const char*
+sync_basename(const char* filename);
+
+/** Register a latch, called when it is created
+@param[in] ptr Latch instance that was created
+@param[in] filename Filename where it was created
+@param[in] line Line number in filename */
+void
+sync_file_created_register(
+ const void* ptr,
+ const char* filename,
+ uint16_t line);
+
+/** Deregister a latch, called when it is destroyed
+@param[in] ptr Latch to be destroyed */
+void
+sync_file_created_deregister(const void* ptr);
+
+/** Get the string where the file was created. Its format is "name:line"
+@param[in] ptr Latch instance
+@return created information or "" if can't be found */
+std::string
+sync_file_created_get(const void* ptr);
+
+#ifdef UNIV_DEBUG
+
+/** All (ordered) latches, used in debugging, must derive from this class. */
+struct latch_t {
+
+ /** Constructor
+ @param[in] id The latch ID */
+ explicit latch_t(latch_id_t id = LATCH_ID_NONE)
+ UNIV_NOTHROW
+ :
+ m_id(id),
+ m_rw_lock(),
+ m_temp_fsp() { }
+
+ /** Destructor */
+ virtual ~latch_t() UNIV_NOTHROW { }
+
+ /** @return the latch ID */
+ latch_id_t get_id() const
+ {
+ return(m_id);
+ }
+
+ /** @return true if it is a rw-lock */
+ bool is_rw_lock() const
+ UNIV_NOTHROW
+ {
+ return(m_rw_lock);
+ }
+
+ /** Print the latch context
+ @return the string representation */
+ virtual std::string to_string() const = 0;
+
+ /** @return "filename:line" from where the latch was last locked */
+ virtual std::string locked_from() const = 0;
+
+ /** @return the latch level */
+ latch_level_t get_level() const
+ UNIV_NOTHROW
+ {
+ ut_a(m_id != LATCH_ID_NONE);
+
+ return(sync_latch_get_level(m_id));
+ }
+
+ /** @return true if the latch is for a temporary file space*/
+ bool is_temp_fsp() const
+ UNIV_NOTHROW
+ {
+ return(m_temp_fsp);
+ }
+
+ /** Set the temporary tablespace flag. (For internal temporary
+ tables, MySQL 5.7 does not always acquire the index->lock. We
+ need to figure out the context and add some special rules
+ during the checks.) */
+ void set_temp_fsp()
+ UNIV_NOTHROW
+ {
+ ut_ad(get_id() == LATCH_ID_FIL_SPACE);
+ m_temp_fsp = true;
+ }
+
+ /** @return the latch name, m_id must be set */
+ const char* get_name() const
+ UNIV_NOTHROW
+ {
+ ut_a(m_id != LATCH_ID_NONE);
+
+ return(sync_latch_get_name(m_id));
+ }
+
+ /** Latch ID */
+ latch_id_t m_id;
+
+ /** true if it is a rw-lock. In debug mode, rw_lock_t derives from
+ this class and sets this variable. */
+ bool m_rw_lock;
+
+ /** true if it is an temporary space latch */
+ bool m_temp_fsp;
+};
+
+/** Subclass this to iterate over a thread's acquired latch levels. */
+struct sync_check_functor_t {
+ virtual ~sync_check_functor_t() { }
+ virtual bool operator()(const latch_level_t) const = 0;
+};
+
+/** Check that no latch is being held.
+@tparam some_allowed whether some latches are allowed to be held */
+template<bool some_allowed = false>
+struct sync_checker : public sync_check_functor_t
+{
+ /** Check the latching constraints
+ @param[in] level The level held by the thread
+ @return whether a latch violation was detected */
+ bool operator()(const latch_level_t level) const
+ {
+ if (some_allowed) {
+ switch (level) {
+ case SYNC_RECV_WRITER:
+ /* This only happens in
+ recv_apply_hashed_log_recs. */
+ case SYNC_DICT:
+ case SYNC_DICT_OPERATION:
+ case SYNC_FTS_CACHE:
+ case SYNC_NO_ORDER_CHECK:
+ return(false);
+ default:
+ return(true);
+ }
+ }
+
+ return(true);
+ }
+};
+
+/** The strict latch checker (no InnoDB latches may be held) */
+typedef struct sync_checker<false> sync_check;
+/** The sloppy latch checker (can hold InnoDB dictionary or SQL latches) */
+typedef struct sync_checker<true> dict_sync_check;
+
+/** Functor to check for given latching constraints. */
+struct sync_allowed_latches : public sync_check_functor_t {
+
+ /** Constructor
+ @param[in] from first element in an array of latch_level_t
+ @param[in] to last element in an array of latch_level_t */
+ sync_allowed_latches(
+ const latch_level_t* from,
+ const latch_level_t* to)
+ : begin(from), end(to) { }
+
+ /** Checks whether the given latch_t violates the latch constraint.
+ This object maintains a list of allowed latch levels, and if the given
+ latch belongs to a latch level that is not there in the allowed list,
+ then it is a violation.
+
+ @param[in] latch The latch level to check
+ @return true if there is a latch violation */
+ bool operator()(const latch_level_t level) const
+ {
+ return(std::find(begin, end, level) == end);
+ }
+
+private:
+ /** First element in an array of allowed latch levels */
+ const latch_level_t* const begin;
+ /** First element after the end of the array of allowed latch levels */
+ const latch_level_t* const end;
+};
+
+/** Get the latch id from a latch name.
+@param[in] id Latch name
+@return LATCH_ID_NONE. */
+latch_id_t
+sync_latch_get_id(const char* name);
+
+typedef ulint rw_lock_flags_t;
+
+/* Flags to specify lock types for rw_lock_own_flagged() */
+enum rw_lock_flag_t {
+ RW_LOCK_FLAG_S = 1 << 0,
+ RW_LOCK_FLAG_X = 1 << 1,
+ RW_LOCK_FLAG_SX = 1 << 2
+};
+
+#endif /* UNIV_DBEUG */
+
+#endif /* UNIV_INNOCHECKSUM */
+
+#ifdef _WIN64
+static inline ulint my_atomic_addlint(ulint *A, ulint B)
+{
+ return ulint(my_atomic_add64((volatile int64*)A, B));
+}
+
+static inline ulint my_atomic_loadlint(const ulint *A)
+{
+ return ulint(my_atomic_load64((volatile int64*)A));
+}
+
+static inline lint my_atomic_addlint(volatile lint *A, lint B)
+{
+ return my_atomic_add64((volatile int64*)A, B);
+}
+
+static inline lint my_atomic_loadlint(const lint *A)
+{
+ return lint(my_atomic_load64((volatile int64*)A));
+}
+
+static inline void my_atomic_storelint(ulint *A, ulint B)
+{
+ my_atomic_store64((volatile int64*)A, B);
+}
+
+static inline lint my_atomic_caslint(volatile lint *A, lint *B, lint C)
+{
+ return my_atomic_cas64((volatile int64*)A, (int64 *)B, C);
+}
+
+static inline ulint my_atomic_caslint(ulint *A, ulint *B, ulint C)
+{
+ return my_atomic_cas64((volatile int64*)A, (int64 *)B, (int64)C);
+}
+
+#else
+#define my_atomic_addlint my_atomic_addlong
+#define my_atomic_loadlint my_atomic_loadlong
+#define my_atomic_caslint my_atomic_caslong
+#endif
+
+/** Simple counter aligned to CACHE_LINE_SIZE
+@tparam Type the integer type of the counter
+@tparam atomic whether to use atomic memory access */
+template <typename Type = ulint, bool atomic = false>
+struct MY_ALIGNED(CPU_LEVEL1_DCACHE_LINESIZE) simple_counter
+{
+ /** Increment the counter */
+ Type inc() { return add(1); }
+ /** Decrement the counter */
+ Type dec() { return sub(1); }
+
+ /** Add to the counter
+ @param[in] i amount to be added
+ @return the value of the counter after adding */
+ Type add(Type i)
+ {
+ compile_time_assert(!atomic || sizeof(Type) == sizeof(lint));
+ if (atomic) {
+#ifdef _MSC_VER
+// Suppress type conversion/ possible loss of data warning
+#pragma warning (push)
+#pragma warning (disable : 4244)
+#endif
+ return Type(my_atomic_addlint(reinterpret_cast<ulint*>
+ (&m_counter), i));
+#ifdef _MSC_VER
+#pragma warning (pop)
+#endif
+ } else {
+ return m_counter += i;
+ }
+ }
+ /** Subtract from the counter
+ @param[in] i amount to be subtracted
+ @return the value of the counter after adding */
+ Type sub(Type i)
+ {
+ compile_time_assert(!atomic || sizeof(Type) == sizeof(lint));
+ if (atomic) {
+ return Type(my_atomic_addlint(&m_counter, -lint(i)));
+ } else {
+ return m_counter -= i;
+ }
+ }
+
+ /** @return the value of the counter (non-atomic access)! */
+ operator Type() const { return m_counter; }
+
+private:
+ /** The counter */
+ Type m_counter;
+};
+
+#endif /* sync0types_h */
diff --git a/storage/innobase/include/trx0i_s.h b/storage/innobase/include/trx0i_s.h
index 77ab4f406cb..7e766072272 100644
--- a/storage/innobase/include/trx0i_s.h
+++ b/storage/innobase/include/trx0i_s.h
@@ -1,7 +1,7 @@
/*****************************************************************************
-Copyright (c) 2007, 2011, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2019, MariaDB Corporation.
+Copyright (c) 2007, 2015, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2017, 2019, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -29,10 +29,8 @@ Created July 17, 2007 Vasil Dimov
#ifndef trx0i_s_h
#define trx0i_s_h
-#include "univ.i"
#include "trx0types.h"
#include "dict0types.h"
-#include "ut0ut.h"
/** The maximum amount of memory that can be consumed by innodb_trx,
innodb_locks and innodb_lock_waits information schema tables. */
@@ -134,14 +132,12 @@ struct i_s_trx_row_t {
/*!< pointer to a row
in innodb_locks if trx
is waiting, or NULL */
- time_t trx_wait_started; /*!< trx->lock.wait_started */
- ullint trx_weight; /*!< TRX_WEIGHT() */
+ time_t trx_wait_started; /*!< trx_t->lock.wait_started */
+ uintmax_t trx_weight; /*!< TRX_WEIGHT() */
ulint trx_mysql_thread_id; /*!< thd_get_thread_id() */
const char* trx_query; /*!< MySQL statement being
executed in the transaction */
- struct charset_info_st* trx_query_cs;
- /*!< charset encode the MySQL
- statement */
+ CHARSET_INFO* trx_query_cs; /*!< the charset of trx_query */
const char* trx_operation_state; /*!< trx_t::op_info */
ulint trx_tables_in_use;/*!< n_mysql_tables_in_use in
trx_t */
@@ -154,7 +150,7 @@ struct i_s_trx_row_t {
/*!< mem_heap_get_size(
trx->lock_heap) */
ulint trx_rows_locked;/*!< lock_number_of_rows_locked() */
- ullint trx_rows_modified;/*!< trx_t::undo_no */
+ uintmax_t trx_rows_modified;/*!< trx_t::undo_no */
ulint trx_concurrency_tickets;
/*!< n_tickets_to_enter_innodb in
trx_t */
@@ -166,10 +162,6 @@ struct i_s_trx_row_t {
/*!< check_foreigns in trx_t */
const char* trx_foreign_key_error;
/*!< detailed_error in trx_t */
- ibool trx_has_search_latch;
- /*!< has_search_latch in trx_t */
- ulint trx_search_latch_timeout;
- /*!< search_latch_timeout in trx_t */
ulint trx_is_read_only;
/*!< trx_t::read_only */
ulint trx_is_autocommit_non_locking;
@@ -201,14 +193,12 @@ extern trx_i_s_cache_t* trx_i_s_cache;
/*******************************************************************//**
Initialize INFORMATION SCHEMA trx related cache. */
-UNIV_INTERN
void
trx_i_s_cache_init(
/*===============*/
trx_i_s_cache_t* cache); /*!< out: cache to init */
/*******************************************************************//**
Free the INFORMATION SCHEMA trx related cache. */
-UNIV_INTERN
void
trx_i_s_cache_free(
/*===============*/
@@ -216,7 +206,6 @@ trx_i_s_cache_free(
/*******************************************************************//**
Issue a shared/read lock on the tables cache. */
-UNIV_INTERN
void
trx_i_s_cache_start_read(
/*=====================*/
@@ -224,7 +213,6 @@ trx_i_s_cache_start_read(
/*******************************************************************//**
Release a shared/read lock on the tables cache. */
-UNIV_INTERN
void
trx_i_s_cache_end_read(
/*===================*/
@@ -232,7 +220,6 @@ trx_i_s_cache_end_read(
/*******************************************************************//**
Issue an exclusive/write lock on the tables cache. */
-UNIV_INTERN
void
trx_i_s_cache_start_write(
/*======================*/
@@ -240,7 +227,6 @@ trx_i_s_cache_start_write(
/*******************************************************************//**
Release an exclusive/write lock on the tables cache. */
-UNIV_INTERN
void
trx_i_s_cache_end_write(
/*====================*/
@@ -250,8 +236,7 @@ trx_i_s_cache_end_write(
/*******************************************************************//**
Retrieves the number of used rows in the cache for a given
INFORMATION SCHEMA table.
-@return number of rows */
-UNIV_INTERN
+@return number of rows */
ulint
trx_i_s_cache_get_rows_used(
/*========================*/
@@ -261,8 +246,7 @@ trx_i_s_cache_get_rows_used(
/*******************************************************************//**
Retrieves the nth row in the cache for a given INFORMATION SCHEMA
table.
-@return row */
-UNIV_INTERN
+@return row */
void*
trx_i_s_cache_get_nth_row(
/*======================*/
@@ -272,8 +256,7 @@ trx_i_s_cache_get_nth_row(
/*******************************************************************//**
Update the transactions cache if it has not been read for some time.
-@return 0 - fetched, 1 - not */
-UNIV_INTERN
+@return 0 - fetched, 1 - not */
int
trx_i_s_possibly_fetch_data_into_cache(
/*===================================*/
@@ -282,13 +265,11 @@ trx_i_s_possibly_fetch_data_into_cache(
/*******************************************************************//**
Returns TRUE if the data in the cache is truncated due to the memory
limit posed by TRX_I_S_MEM_LIMIT.
-@return TRUE if truncated */
-UNIV_INTERN
+@return TRUE if truncated */
ibool
trx_i_s_cache_is_truncated(
/*=======================*/
trx_i_s_cache_t* cache); /*!< in: cache */
-
/** The maximum length of a resulting lock_id_size in
trx_i_s_create_lock_id(), not including the terminating NUL.
":%lu:%lu:%lu" -> 63 chars */
@@ -299,8 +280,7 @@ Crafts a lock id string from a i_s_locks_row_t object. Returns its
second argument. This function aborts if there is not enough space in
lock_id. Be sure to provide at least TRX_I_S_LOCK_ID_MAX_LEN + 1 if you
want to be 100% sure that it will not abort.
-@return resulting lock id */
-UNIV_INTERN
+@return resulting lock id */
char*
trx_i_s_create_lock_id(
/*===================*/
diff --git a/storage/innobase/include/trx0purge.h b/storage/innobase/include/trx0purge.h
index f5b3b6efa7e..73d497dd64a 100644
--- a/storage/innobase/include/trx0purge.h
+++ b/storage/innobase/include/trx0purge.h
@@ -1,7 +1,7 @@
/*****************************************************************************
-Copyright (c) 1996, 2011, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2017, MariaDB Corporation. All Rights Reserved.
+Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2017, 2018, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -27,17 +27,11 @@ Created 3/26/1996 Heikki Tuuri
#ifndef trx0purge_h
#define trx0purge_h
-#include "univ.i"
-#include "trx0types.h"
-#include "mtr0mtr.h"
#include "trx0sys.h"
#include "que0types.h"
#include "page0page.h"
-#include "usr0sess.h"
-#include "fil0fil.h"
-/** The global data structure coordinating a purge */
-extern trx_purge_t* purge_sys;
+#include <queue>
/** A dummy undo record used as a return value when we have a whole undo log
which needs no purge */
@@ -46,43 +40,26 @@ extern trx_undo_rec_t trx_purge_dummy_rec;
/********************************************************************//**
Calculates the file address of an undo log header when we have the file
address of its history list node.
-@return file address of the log */
+@return file address of the log */
UNIV_INLINE
fil_addr_t
trx_purge_get_log_from_hist(
/*========================*/
fil_addr_t node_addr); /*!< in: file address of the history
list node of the log */
-/********************************************************************//**
-Creates the global purge system control structure and inits the history
-mutex. */
-UNIV_INTERN
-void
-trx_purge_sys_create(
-/*=================*/
- ulint n_purge_threads,/*!< in: number of purge threads */
- ib_bh_t* ib_bh); /*!< in/own: UNDO log min binary heap*/
-/********************************************************************//**
-Frees the global purge system control structure. */
-UNIV_INTERN
-void
-trx_purge_sys_close(void);
-/*======================*/
/************************************************************************
Adds the update undo log as the first log in the history list. Removes the
update undo log segment from the rseg slot if it is too big for reuse. */
-UNIV_INTERN
void
trx_purge_add_update_undo_to_history(
/*=================================*/
- trx_t* trx, /*!< in: transaction */
- page_t* undo_page, /*!< in: update undo log header page,
- x-latched */
- mtr_t* mtr); /*!< in: mtr */
+ trx_t* trx, /*!< in: transaction */
+ page_t* undo_page, /*!< in: update undo log header page,
+ x-latched */
+ mtr_t* mtr); /*!< in: mtr */
/*******************************************************************//**
This function runs a purge batch.
-@return number of undo log pages handled in the batch */
-UNIV_INTERN
+@return number of undo log pages handled in the batch */
ulint
trx_purge(
/*======*/
@@ -90,16 +67,19 @@ trx_purge(
submit to task queue. */
ulint limit, /*!< in: the maximum number of
records to purge in one batch */
- bool truncate); /*!< in: truncate history if true */
+ bool truncate /*!< in: truncate history if true */
+#ifdef UNIV_DEBUG
+ , srv_slot_t *slot /*!< in/out: purge coordinator
+ thread slot */
+#endif
+);
/*******************************************************************//**
Stop purge and wait for it to stop, move to PURGE_STATE_STOP. */
-UNIV_INTERN
void
trx_purge_stop(void);
/*================*/
/*******************************************************************//**
Resume purge, move to PURGE_STATE_RUN. */
-UNIV_INTERN
void
trx_purge_run(void);
/*================*/
@@ -116,29 +96,367 @@ enum purge_state_t {
/*******************************************************************//**
Get the purge state.
@return purge state. */
-UNIV_INTERN
purge_state_t
trx_purge_state(void);
/*=================*/
+/** Rollback segements from a given transaction with trx-no
+scheduled for purge. */
+class TrxUndoRsegs {
+private:
+ typedef std::vector<trx_rseg_t*, ut_allocator<trx_rseg_t*> >
+ trx_rsegs_t;
+public:
+ typedef trx_rsegs_t::iterator iterator;
+
+ /** Default constructor */
+ TrxUndoRsegs() : m_trx_no() { }
+
+ explicit TrxUndoRsegs(trx_id_t trx_no)
+ :
+ m_trx_no(trx_no)
+ {
+ // Do nothing
+ }
+
+ /** Get transaction number
+ @return trx_id_t - get transaction number. */
+ trx_id_t get_trx_no() const
+ {
+ return(m_trx_no);
+ }
+
+ /** Add rollback segment.
+ @param rseg rollback segment to add. */
+ void push_back(trx_rseg_t* rseg)
+ {
+ m_rsegs.push_back(rseg);
+ }
+
+ /** Erase the element pointed by given iterator.
+ @param[in] iterator iterator */
+ void erase(iterator& it)
+ {
+ m_rsegs.erase(it);
+ }
+
+ /** Number of registered rsegs.
+ @return size of rseg list. */
+ ulint size() const
+ {
+ return(m_rsegs.size());
+ }
+
+ /**
+ @return an iterator to the first element */
+ iterator begin()
+ {
+ return(m_rsegs.begin());
+ }
+
+ /**
+ @return an iterator to the end */
+ iterator end()
+ {
+ return(m_rsegs.end());
+ }
+
+ /** Append rollback segments from referred instance to current
+ instance. */
+ void append(const TrxUndoRsegs& append_from)
+ {
+ ut_ad(get_trx_no() == append_from.get_trx_no());
+
+ m_rsegs.insert(m_rsegs.end(),
+ append_from.m_rsegs.begin(),
+ append_from.m_rsegs.end());
+ }
+
+ /** Compare two TrxUndoRsegs based on trx_no.
+ @param elem1 first element to compare
+ @param elem2 second element to compare
+ @return true if elem1 > elem2 else false.*/
+ bool operator()(const TrxUndoRsegs& lhs, const TrxUndoRsegs& rhs)
+ {
+ return(lhs.m_trx_no > rhs.m_trx_no);
+ }
+
+ /** Compiler defined copy-constructor/assignment operator
+ should be fine given that there is no reference to a memory
+ object outside scope of class object.*/
+
+private:
+ /** The rollback segments transaction number. */
+ trx_id_t m_trx_no;
+
+ /** Rollback segments of a transaction, scheduled for purge. */
+ trx_rsegs_t m_rsegs;
+};
+
+typedef std::priority_queue<
+ TrxUndoRsegs,
+ std::vector<TrxUndoRsegs, ut_allocator<TrxUndoRsegs> >,
+ TrxUndoRsegs> purge_pq_t;
+
+/**
+Chooses the rollback segment with the smallest trx_no. */
+struct TrxUndoRsegsIterator {
+
+ /** Constructor */
+ TrxUndoRsegsIterator();
+
+ /** Sets the next rseg to purge in purge_sys.
+ @return whether anything is to be purged */
+ bool set_next();
+
+private:
+ // Disable copying
+ TrxUndoRsegsIterator(const TrxUndoRsegsIterator&);
+ TrxUndoRsegsIterator& operator=(const TrxUndoRsegsIterator&);
+
+ /** The current element to process */
+ TrxUndoRsegs m_trx_undo_rsegs;
+
+ /** Track the current element in m_trx_undo_rseg */
+ TrxUndoRsegs::iterator m_iter;
+
+ /** Sentinel value */
+ static const TrxUndoRsegs NullElement;
+};
+
/** This is the purge pointer/iterator. We need both the undo no and the
transaction no up to which purge has parsed and applied the records. */
struct purge_iter_t {
+ purge_iter_t()
+ :
+ trx_no(),
+ undo_no(),
+ undo_rseg_space(ULINT_UNDEFINED)
+ {
+ // Do nothing
+ }
+
trx_id_t trx_no; /*!< Purge has advanced past all
transactions whose number is less
than this */
undo_no_t undo_no; /*!< Purge has advanced past all records
whose undo number is less than this */
+ ulint undo_rseg_space;
+ /*!< Last undo record resided in this
+ space id. */
};
+
+/* Namespace to hold all the related functions and variables need for truncate
+of undo tablespace. */
+namespace undo {
+
+ typedef std::vector<ulint> undo_spaces_t;
+ typedef std::vector<trx_rseg_t*> rseg_for_trunc_t;
+
+ /** Mark completion of undo truncate action by writing magic number to
+ the log file and then removing it from the disk.
+ If we are going to remove it from disk then why write magic number ?
+ This is to safeguard from unlink (file-system) anomalies that will keep
+ the link to the file even after unlink action is successfull and
+ ref-count = 0.
+ @param[in] space_id id of the undo tablespace to truncate.*/
+ void done(ulint space_id);
+
+ /** Check if TRUNCATE_DDL_LOG file exist.
+ @param[in] space_id id of the undo tablespace.
+ @return true if exist else false. */
+ bool is_log_present(ulint space_id);
+
+ /** Track UNDO tablespace mark for truncate. */
+ class Truncate {
+ public:
+
+ Truncate()
+ :
+ m_undo_for_trunc(ULINT_UNDEFINED),
+ m_rseg_for_trunc(),
+ m_scan_start(1),
+ m_purge_rseg_truncate_frequency(
+ static_cast<ulint>(
+ srv_purge_rseg_truncate_frequency))
+ {
+ /* Do Nothing. */
+ }
+
+ /** Clear the cached rollback segment. Normally done
+ when purge is about to shutdown. */
+ void clear()
+ {
+ reset();
+ rseg_for_trunc_t temp;
+ m_rseg_for_trunc.swap(temp);
+ }
+
+ /** Is tablespace selected for truncate.
+ @return true if undo tablespace is marked for truncate */
+ bool is_marked() const
+ {
+ return(!(m_undo_for_trunc == ULINT_UNDEFINED));
+ }
+
+ /** Mark the tablespace for truncate.
+ @param[in] undo_id tablespace for truncate. */
+ void mark(ulint undo_id)
+ {
+ m_undo_for_trunc = undo_id;
+
+ m_scan_start = (undo_id + 1)
+ % (srv_undo_tablespaces_active + 1);
+ if (m_scan_start == 0) {
+ /* Note: UNDO tablespace ids starts from 1. */
+ m_scan_start = 1;
+ }
+
+ /* We found an UNDO-tablespace to truncate so set the
+ local purge rseg truncate frequency to 1. This will help
+ accelerate the purge action and in turn truncate. */
+ m_purge_rseg_truncate_frequency = 1;
+ }
+
+ /** Get the tablespace marked for truncate.
+ @return tablespace id marked for truncate. */
+ ulint get_marked_space_id() const
+ {
+ return(m_undo_for_trunc);
+ }
+
+ /** Add rseg to truncate vector.
+ @param[in,out] rseg rseg for truncate */
+ void add_rseg_to_trunc(trx_rseg_t* rseg)
+ {
+ m_rseg_for_trunc.push_back(rseg);
+ }
+
+ /** Get number of rsegs registered for truncate.
+ @return return number of rseg that belongs to tablespace mark
+ for truncate. */
+ ulint rsegs_size() const
+ {
+ return(m_rseg_for_trunc.size());
+ }
+
+ /** Get ith registered rseg.
+ @param[in] id index of rseg to get.
+ @return reference to registered rseg. */
+ trx_rseg_t* get_ith_rseg(ulint id)
+ {
+ ut_ad(id < m_rseg_for_trunc.size());
+ return(m_rseg_for_trunc.at(id));
+ }
+
+ /** Reset for next rseg truncate. */
+ void reset()
+ {
+ m_undo_for_trunc = ULINT_UNDEFINED;
+ m_rseg_for_trunc.clear();
+
+ /* Sync with global value as we are done with
+ truncate now. */
+ m_purge_rseg_truncate_frequency = static_cast<ulint>(
+ srv_purge_rseg_truncate_frequency);
+ }
+
+ /** Get the tablespace id to start scanning from.
+ @return id of UNDO tablespace to start scanning from. */
+ ulint get_scan_start() const
+ {
+ return(m_scan_start);
+ }
+
+ /** Check if the tablespace needs fix-up (based on presence of
+ DDL truncate log)
+ @param space_id space id of the undo tablespace to check
+ @return true if fix up is needed else false */
+ bool needs_fix_up(ulint space_id) const
+ {
+ return(is_log_present(space_id));
+ }
+
+ /** Add undo tablespace to truncate vector.
+ @param[in] space_id space id of tablespace to
+ truncate */
+ static void add_space_to_trunc_list(ulint space_id)
+ {
+ s_spaces_to_truncate.push_back(space_id);
+ }
+
+ /** Clear the truncate vector. */
+ static void clear_trunc_list()
+ {
+ s_spaces_to_truncate.clear();
+ }
+
+ /** Is tablespace marked for truncate.
+ @param[in] space_id space id to check
+ @return true if marked for truncate, else false. */
+ static bool is_tablespace_truncated(ulint space_id)
+ {
+ return(std::find(s_spaces_to_truncate.begin(),
+ s_spaces_to_truncate.end(), space_id)
+ != s_spaces_to_truncate.end());
+ }
+
+ /** Was a tablespace truncated at startup
+ @param[in] space_id space id to check
+ @return whether space_id was truncated at startup */
+ static bool was_tablespace_truncated(ulint space_id)
+ {
+ return(std::find(s_fix_up_spaces.begin(),
+ s_fix_up_spaces.end(),
+ space_id)
+ != s_fix_up_spaces.end());
+ }
+
+ /** Get local rseg purge truncate frequency
+ @return rseg purge truncate frequency. */
+ ulint get_rseg_truncate_frequency() const
+ {
+ return(m_purge_rseg_truncate_frequency);
+ }
+
+ private:
+ /** UNDO tablespace is mark for truncate. */
+ ulint m_undo_for_trunc;
+
+ /** rseg that resides in UNDO tablespace is marked for
+ truncate. */
+ rseg_for_trunc_t m_rseg_for_trunc;
+
+ /** Start scanning for UNDO tablespace from this space_id.
+ This is to avoid bias selection of one tablespace always. */
+ ulint m_scan_start;
+
+ /** Rollback segment(s) purge frequency. This is local
+ value maintained along with global value. It is set to global
+ value on start but when tablespace is marked for truncate it
+ is updated to 1 and then minimum value among 2 is used by
+ purge action. */
+ ulint m_purge_rseg_truncate_frequency;
+
+ /** List of UNDO tablespace(s) to truncate. */
+ static undo_spaces_t s_spaces_to_truncate;
+ public:
+ /** Undo tablespaces that were truncated at startup */
+ static undo_spaces_t s_fix_up_spaces;
+ }; /* class Truncate */
+
+}; /* namespace undo */
+
/** The control structure used in the purge operation */
-struct trx_purge_t{
- sess_t* sess; /*!< System session running the purge
- query */
- trx_t* trx; /*!< System transaction running the
- purge query: this trx is not in the
- trx list of the trx system and it
- never ends */
+class purge_sys_t
+{
+public:
+ /** Construct the purge system. */
+ purge_sys_t();
+ /** Destruct the purge system. */
+ ~purge_sys_t();
+
rw_lock_t latch; /*!< The latch protecting the purge
view. A purge operation must acquire an
x-latch here for the instant at which
@@ -148,7 +466,7 @@ struct trx_purge_t{
protects state and running */
os_event_t event; /*!< State signal event;
os_event_set() and os_event_reset()
- are protected by trx_purge_t::latch
+ are protected by purge_sys_t::latch
X-lock */
ulint n_stop; /*!< Counter to track number stops */
volatile bool running; /*!< true, if purge is active,
@@ -158,11 +476,11 @@ struct trx_purge_t{
without holding the latch. */
que_t* query; /*!< The query graph which will do the
parallelized purge operation */
- read_view_t* view; /*!< The purge will not remove undo logs
+ ReadView view; /*!< The purge will not remove undo logs
which are >= this view (purge view) */
- volatile ulint n_submitted; /*!< Count of total tasks submitted
+ ulint n_submitted; /*!< Count of total tasks submitted
to the task queue */
- volatile ulint n_completed; /*!< Count of total tasks completed */
+ ulint n_completed; /*!< Count of total tasks completed */
/*------------------------------*/
/* The following two fields form the 'purge pointer' which advances
@@ -182,11 +500,8 @@ struct trx_purge_t{
purged already accurately. */
#endif /* UNIV_DEBUG */
/*-----------------------------*/
- ibool next_stored; /*!< TRUE if the info of the next record
- to purge is stored below: if yes, then
- the transaction number and the undo
- number of the record are stored in
- purge_trx_no and purge_undo_no above */
+ bool next_stored; /*!< whether rseg holds the next record
+ to purge */
trx_rseg_t* rseg; /*!< Rollback segment for the next undo
record to purge */
ulint page_no; /*!< Page number for the next undo
@@ -198,25 +513,30 @@ struct trx_purge_t{
ulint hdr_page_no; /*!< Header page of the undo log where
the next record to purge belongs */
ulint hdr_offset; /*!< Header byte offset on the page */
- /*-----------------------------*/
- mem_heap_t* heap; /*!< Temporary storage used during a
- purge: can be emptied after purge
- completes */
- /*-----------------------------*/
- ib_bh_t* ib_bh; /*!< Binary min-heap, ordered on
- rseg_queue_t::trx_no. It is protected
- by the bh_mutex */
- ib_mutex_t bh_mutex; /*!< Mutex protecting ib_bh */
+
+
+ TrxUndoRsegsIterator
+ rseg_iter; /*!< Iterator to get the next rseg
+ to process */
+
+ purge_pq_t purge_queue; /*!< Binary min-heap, ordered on
+ TrxUndoRsegs::trx_no. It is protected
+ by the pq_mutex */
+ PQMutex pq_mutex; /*!< Mutex protecting purge_queue */
+
+ undo::Truncate undo_trunc; /*!< Track UNDO tablespace marked
+ for truncate. */
};
+/** The global data structure coordinating a purge */
+extern purge_sys_t* purge_sys;
+
/** Info required to purge a record */
struct trx_purge_rec_t {
trx_undo_rec_t* undo_rec; /*!< Record to purge */
roll_ptr_t roll_ptr; /*!< File pointr to UNDO record */
};
-#ifndef UNIV_NONINL
#include "trx0purge.ic"
-#endif
-#endif
+#endif /* trx0purge_h */
diff --git a/storage/innobase/include/trx0purge.ic b/storage/innobase/include/trx0purge.ic
index a32ecefe4b5..0ccff6f7798 100644
--- a/storage/innobase/include/trx0purge.ic
+++ b/storage/innobase/include/trx0purge.ic
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1996, 2009, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1996, 2013, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -28,7 +28,7 @@ Created 3/26/1996 Heikki Tuuri
/********************************************************************//**
Calculates the file address of an undo log header when we have the file
address of its history list node.
-@return file address of the log */
+@return file address of the log */
UNIV_INLINE
fil_addr_t
trx_purge_get_log_from_hist(
@@ -41,22 +41,23 @@ trx_purge_get_log_from_hist(
return(node_addr);
}
-#ifdef UNIV_DEBUG
/********************************************************************//**
address of its history list node.
-@return TRUE if purge_sys_t::limit <= purge_sys_t::iter*/
+@return true if purge_sys_t::limit <= purge_sys_t::iter */
UNIV_INLINE
-ibool
+bool
trx_purge_check_limit(void)
/*=======================*/
{
- ut_ad(purge_sys->limit.trx_no <= purge_sys->iter.trx_no);
-
- if (purge_sys->limit.trx_no == purge_sys->iter.trx_no) {
- ut_ad(purge_sys->limit.undo_no <= purge_sys->iter.undo_no);
- }
-
- return(TRUE);
+ /* limit is used to track till what point purge element has been
+ processed and so limit <= iter.
+ undo_no ordering is enforced only within the same rollback segment.
+ If a transaction uses multiple rollback segments then we need to
+ consider the rollback segment space id too. */
+ return(purge_sys->iter.trx_no > purge_sys->limit.trx_no
+ || (purge_sys->iter.trx_no == purge_sys->limit.trx_no
+ && ((purge_sys->iter.undo_no >= purge_sys->limit.undo_no)
+ || (purge_sys->iter.undo_rseg_space
+ != purge_sys->limit.undo_rseg_space))));
}
-#endif /* UNIV_DEBUG */
diff --git a/storage/innobase/include/trx0rec.h b/storage/innobase/include/trx0rec.h
index cfb3bd81206..1be38545eb0 100644
--- a/storage/innobase/include/trx0rec.h
+++ b/storage/innobase/include/trx0rec.h
@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2017, 2018, MariaDB Corporation.
+Copyright (c) 2017, 2019, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -27,20 +27,17 @@ Created 3/26/1996 Heikki Tuuri
#ifndef trx0rec_h
#define trx0rec_h
-#include "univ.i"
#include "trx0types.h"
#include "row0types.h"
#include "mtr0mtr.h"
-#include "dict0types.h"
-#include "data0data.h"
#include "rem0types.h"
-
-#ifndef UNIV_HOTBACKUP
-# include "que0types.h"
+#include "page0types.h"
+#include "row0log.h"
+#include "que0types.h"
/***********************************************************************//**
Copies the undo record to the heap.
-@return own: copy of undo log record */
+@return own: copy of undo log record */
UNIV_INLINE
trx_undo_rec_t*
trx_undo_rec_copy(
@@ -49,7 +46,7 @@ trx_undo_rec_copy(
mem_heap_t* heap); /*!< in: heap where copied */
/**********************************************************************//**
Reads the undo log record type.
-@return record type */
+@return record type */
UNIV_INLINE
ulint
trx_undo_rec_get_type(
@@ -57,7 +54,7 @@ trx_undo_rec_get_type(
const trx_undo_rec_t* undo_rec); /*!< in: undo log record */
/**********************************************************************//**
Reads from an undo log record the record compiler info.
-@return compiler info */
+@return compiler info */
UNIV_INLINE
ulint
trx_undo_rec_get_cmpl_info(
@@ -65,7 +62,7 @@ trx_undo_rec_get_cmpl_info(
const trx_undo_rec_t* undo_rec); /*!< in: undo log record */
/**********************************************************************//**
Returns TRUE if an undo log record contains an extern storage field.
-@return TRUE if extern */
+@return TRUE if extern */
UNIV_INLINE
ibool
trx_undo_rec_get_extern_storage(
@@ -73,21 +70,12 @@ trx_undo_rec_get_extern_storage(
const trx_undo_rec_t* undo_rec); /*!< in: undo log record */
/**********************************************************************//**
Reads the undo log record number.
-@return undo no */
+@return undo no */
UNIV_INLINE
undo_no_t
trx_undo_rec_get_undo_no(
/*=====================*/
const trx_undo_rec_t* undo_rec); /*!< in: undo log record */
-/**********************************************************************//**
-Returns the start of the undo record data area.
-@return offset to the data area */
-UNIV_INLINE
-ulint
-trx_undo_rec_get_offset(
-/*====================*/
- undo_no_t undo_no) /*!< in: undo no read from node */
- MY_ATTRIBUTE((const));
/**********************************************************************//**
Returns the start of the undo record data area. */
@@ -96,8 +84,7 @@ Returns the start of the undo record data area. */
/**********************************************************************//**
Reads from an undo log record the general parameters.
-@return remaining part of undo log record after reading these values */
-UNIV_INTERN
+@return remaining part of undo log record after reading these values */
byte*
trx_undo_rec_get_pars(
/*==================*/
@@ -113,8 +100,7 @@ trx_undo_rec_get_pars(
MY_ATTRIBUTE((nonnull));
/*******************************************************************//**
Builds a row reference from an undo log record.
-@return pointer to remaining part of undo record */
-UNIV_INTERN
+@return pointer to remaining part of undo record */
byte*
trx_undo_rec_get_row_ref(
/*=====================*/
@@ -128,25 +114,14 @@ trx_undo_rec_get_row_ref(
dtuple_t** ref, /*!< out, own: row reference */
mem_heap_t* heap); /*!< in: memory heap from which the memory
needed is allocated */
-/*******************************************************************//**
-Skips a row reference from an undo log record.
-@return pointer to remaining part of undo record */
-UNIV_INTERN
-byte*
-trx_undo_rec_skip_row_ref(
-/*======================*/
- byte* ptr, /*!< in: remaining part in update undo log
- record, at the start of the row reference */
- dict_index_t* index); /*!< in: clustered index */
/**********************************************************************//**
Reads from an undo log update record the system field values of the old
version.
-@return remaining part of undo log record after reading these values */
-UNIV_INTERN
+@return remaining part of undo log record after reading these values */
byte*
trx_undo_update_rec_get_sys_cols(
/*=============================*/
- byte* ptr, /*!< in: remaining part of undo
+ const byte* ptr, /*!< in: remaining part of undo
log record after reading
general parameters */
trx_id_t* trx_id, /*!< out: trx id */
@@ -156,11 +131,10 @@ trx_undo_update_rec_get_sys_cols(
Builds an update vector based on a remaining part of an undo log record.
@return remaining part of the record, NULL if an error detected, which
means that the record is corrupted */
-UNIV_INTERN
byte*
trx_undo_update_rec_get_update(
/*===========================*/
- byte* ptr, /*!< in: remaining part in update undo log
+ const byte* ptr, /*!< in: remaining part in update undo log
record, after reading the row reference
NOTE that this copy of the undo log record must
be preserved as long as the update vector is
@@ -175,7 +149,6 @@ trx_undo_update_rec_get_update(
trx_id_t trx_id, /*!< in: transaction id from this undorecord */
roll_ptr_t roll_ptr,/*!< in: roll pointer from this undo record */
ulint info_bits,/*!< in: info bits from this undo record */
- trx_t* trx, /*!< in: transaction */
mem_heap_t* heap, /*!< in: memory heap from which the memory
needed is allocated */
upd_t** upd); /*!< out, own: update vector */
@@ -183,12 +156,11 @@ trx_undo_update_rec_get_update(
Builds a partial row from an update undo log record, for purge.
It contains the columns which occur as ordering in any index of the table.
Any missing columns are indicated by col->mtype == DATA_MISSING.
-@return pointer to remaining part of undo record */
-UNIV_INTERN
+@return pointer to remaining part of undo record */
byte*
trx_undo_rec_get_partial_row(
/*=========================*/
- byte* ptr, /*!< in: remaining part in update undo log
+ const byte* ptr, /*!< in: remaining part in update undo log
record of a suitable type, at the start of
the stored index columns;
NOTE that this copy of the undo log record must
@@ -204,13 +176,18 @@ trx_undo_rec_get_partial_row(
mem_heap_t* heap) /*!< in: memory heap from which the memory
needed is allocated */
MY_ATTRIBUTE((nonnull, warn_unused_result));
+/** Report a RENAME TABLE operation.
+@param[in,out] trx transaction
+@param[in] table table that is being renamed
+@return DB_SUCCESS or error code */
+dberr_t trx_undo_report_rename(trx_t* trx, const dict_table_t* table)
+ MY_ATTRIBUTE((nonnull, warn_unused_result));
/***********************************************************************//**
Writes information to an undo log about an insert, update, or a delete marking
of a clustered index record. This information is used in a rollback of the
transaction and in consistent reads that must look to the history of this
transaction.
-@return DB_SUCCESS or error code */
-UNIV_INTERN
+@return DB_SUCCESS or error code */
dberr_t
trx_undo_report_row_operation(
/*==========================*/
@@ -218,29 +195,35 @@ trx_undo_report_row_operation(
dict_index_t* index, /*!< in: clustered index */
const dtuple_t* clust_entry, /*!< in: in the case of an insert,
index entry to insert into the
- clustered index, otherwise NULL */
+ clustered index; in updates,
+ may contain a clustered index
+ record tuple that also contains
+ virtual columns of the table;
+ otherwise, NULL */
const upd_t* update, /*!< in: in the case of an update,
the update vector, otherwise NULL */
ulint cmpl_info, /*!< in: compiler info on secondary
index updates */
const rec_t* rec, /*!< in: case of an update or delete
marking, the record in the clustered
- index, otherwise NULL */
- const ulint* offsets, /*!< in: rec_get_offsets(rec) */
+ index; NULL if insert */
+ const offset_t* offsets, /*!< in: rec_get_offsets(rec) */
roll_ptr_t* roll_ptr) /*!< out: DB_ROLL_PTR to the
undo log record */
MY_ATTRIBUTE((nonnull(1,2,8), warn_unused_result));
-/******************************************************************//**
-Copies an undo record to heap. This function can be called if we know that
-the undo log record exists.
-@return own: copy of the record */
-UNIV_INTERN
-trx_undo_rec_t*
-trx_undo_get_undo_rec_low(
-/*======================*/
- roll_ptr_t roll_ptr, /*!< in: roll pointer to record */
- mem_heap_t* heap) /*!< in: memory heap where copied */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
+
+/** status bit used for trx_undo_prev_version_build() */
+
+/** TRX_UNDO_PREV_IN_PURGE tells trx_undo_prev_version_build() that it
+is being called purge view and we would like to get the purge record
+even it is in the purge view (in normal case, it will return without
+fetching the purge record */
+#define TRX_UNDO_PREV_IN_PURGE 0x1
+
+/** This tells trx_undo_prev_version_build() to fetch the old value in
+the undo log (which is the after image for an update) */
+#define TRX_UNDO_GET_OLD_V_VALUE 0x2
+
/*******************************************************************//**
Build a previous version of a clustered index record. The caller must
hold a latch on the index page of the clustered index record.
@@ -248,7 +231,6 @@ hold a latch on the index page of the clustered index record.
or the table has been rebuilt
@retval false if the previous version is earlier than purge_view,
which means that it may have been removed */
-UNIV_INTERN
bool
trx_undo_prev_version_build(
/*========================*/
@@ -258,18 +240,25 @@ trx_undo_prev_version_build(
index_rec page and purge_view */
const rec_t* rec, /*!< in: version of a clustered index record */
dict_index_t* index, /*!< in: clustered index */
- ulint* offsets,/*!< in/out: rec_get_offsets(rec, index) */
+ offset_t* offsets,/*!< in/out: rec_get_offsets(rec, index) */
mem_heap_t* heap, /*!< in: memory heap from which the memory
needed is allocated */
- rec_t** old_vers)/*!< out, own: previous version, or NULL if
+ rec_t** old_vers,/*!< out, own: previous version, or NULL if
rec is the first inserted version, or if
history data has been deleted */
- MY_ATTRIBUTE((nonnull));
-#endif /* !UNIV_HOTBACKUP */
+ mem_heap_t* v_heap, /* !< in: memory heap used to create vrow
+ dtuple if it is not yet created. This heap
+ diffs from "heap" above in that it could be
+ prebuilt->old_vers_heap for selection */
+ dtuple_t** vrow, /*!< out: virtual column info, if any */
+ ulint v_status);
+ /*!< in: status determine if it is going
+ into this function by purge thread or not.
+ And if we read "after image" of undo log */
+
/***********************************************************//**
Parses a redo log record of adding an undo log record.
-@return end of log record or NULL */
-UNIV_INTERN
+@return end of log record or NULL */
byte*
trx_undo_parse_add_undo_rec(
/*========================*/
@@ -278,8 +267,7 @@ trx_undo_parse_add_undo_rec(
page_t* page); /*!< in: page or NULL */
/***********************************************************//**
Parses a redo log record of erasing of an undo page end.
-@return end of log record or NULL */
-UNIV_INTERN
+@return end of log record or NULL */
byte*
trx_undo_parse_erase_page_end(
/*==========================*/
@@ -288,12 +276,56 @@ trx_undo_parse_erase_page_end(
page_t* page, /*!< in: page or NULL */
mtr_t* mtr); /*!< in: mtr or NULL */
-#ifndef UNIV_HOTBACKUP
+/** Read from an undo log record a non-virtual column value.
+@param[in,out] ptr pointer to remaining part of the undo record
+@param[in,out] field stored field
+@param[in,out] len length of the field, or UNIV_SQL_NULL
+@param[in,out] orig_len original length of the locally stored part
+of an externally stored column, or 0
+@return remaining part of undo log record after reading these values */
+byte*
+trx_undo_rec_get_col_val(
+ const byte* ptr,
+ const byte** field,
+ ulint* len,
+ ulint* orig_len);
+
+/** Read virtual column value from undo log
+@param[in] table the table
+@param[in] ptr undo log pointer
+@param[in,out] row the dtuple to fill
+@param[in] in_purge whether this is called by purge */
+void
+trx_undo_read_v_cols(
+ const dict_table_t* table,
+ const byte* ptr,
+ dtuple_t* row,
+ bool in_purge);
+
+/** Read virtual column index from undo log if the undo log contains such
+info, and verify the column is still indexed, and output its position
+@param[in] table the table
+@param[in] ptr undo log pointer
+@param[in] first_v_col if this is the first virtual column, which
+ has the version marker
+@param[in,out] is_undo_log his function is used to parse both undo log,
+ and online log for virtual columns. So
+ check to see if this is undo log
+@param[out] field_no the column number
+@return remaining part of undo log record after reading these values */
+const byte*
+trx_undo_read_v_idx(
+ const dict_table_t* table,
+ const byte* ptr,
+ bool first_v_col,
+ bool* is_undo_log,
+ ulint* field_no);
/* Types of an undo log record: these have to be smaller than 16, as the
compilation info multiplied by 16 is ORed to this value in an undo log
record */
+#define TRX_UNDO_RENAME_TABLE 9 /*!< RENAME TABLE */
#define TRX_UNDO_INSERT_REC 11 /* fresh insert into clustered index */
#define TRX_UNDO_UPD_EXIST_REC 12 /* update of a non-delete-marked
record */
@@ -302,17 +334,13 @@ record */
fields of the record can change */
#define TRX_UNDO_DEL_MARK_REC 14 /* delete marking of a record; fields
do not change */
-#define TRX_UNDO_CMPL_INFO_MULT 16 /* compilation info is multiplied by
+#define TRX_UNDO_CMPL_INFO_MULT 16U /* compilation info is multiplied by
this and ORed to the type above */
-#define TRX_UNDO_UPD_EXTERN 128 /* This bit can be ORed to type_cmpl
+#define TRX_UNDO_UPD_EXTERN 128U /* This bit can be ORed to type_cmpl
to denote that we updated external
storage fields: used by purge to
free the external storage */
-#ifndef UNIV_NONINL
#include "trx0rec.ic"
-#endif
-
-#endif /* !UNIV_HOTBACKUP */
#endif /* trx0rec_h */
diff --git a/storage/innobase/include/trx0rec.ic b/storage/innobase/include/trx0rec.ic
index 57d1e9aff0b..ecae142d8f5 100644
--- a/storage/innobase/include/trx0rec.ic
+++ b/storage/innobase/include/trx0rec.ic
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1996, 2012, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1996, 2014, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -23,10 +23,9 @@ Transaction undo log record
Created 3/26/1996 Heikki Tuuri
*******************************************************/
-#ifndef UNIV_HOTBACKUP
/**********************************************************************//**
Reads from an undo log record the record type.
-@return record type */
+@return record type */
UNIV_INLINE
ulint
trx_undo_rec_get_type(
@@ -38,7 +37,7 @@ trx_undo_rec_get_type(
/**********************************************************************//**
Reads from an undo log record the record compiler info.
-@return compiler info */
+@return compiler info */
UNIV_INLINE
ulint
trx_undo_rec_get_cmpl_info(
@@ -50,7 +49,7 @@ trx_undo_rec_get_cmpl_info(
/**********************************************************************//**
Returns TRUE if an undo log record contains an extern storage field.
-@return TRUE if extern */
+@return TRUE if extern */
UNIV_INLINE
ibool
trx_undo_rec_get_extern_storage(
@@ -67,7 +66,7 @@ trx_undo_rec_get_extern_storage(
/**********************************************************************//**
Reads the undo log record number.
-@return undo no */
+@return undo no */
UNIV_INLINE
undo_no_t
trx_undo_rec_get_undo_no(
@@ -78,24 +77,12 @@ trx_undo_rec_get_undo_no(
ptr = undo_rec + 3;
- return(mach_ull_read_much_compressed(ptr));
-}
-
-/**********************************************************************//**
-Returns the start of the undo record data area.
-@return offset to the data area */
-UNIV_INLINE
-ulint
-trx_undo_rec_get_offset(
-/*====================*/
- undo_no_t undo_no) /*!< in: undo no read from node */
-{
- return(3 + mach_ull_get_much_compressed_size(undo_no));
+ return(mach_u64_read_much_compressed(ptr));
}
/***********************************************************************//**
Copies the undo record to the heap.
-@return own: copy of undo log record */
+@return own: copy of undo log record */
UNIV_INLINE
trx_undo_rec_t*
trx_undo_rec_copy(
@@ -108,6 +95,8 @@ trx_undo_rec_copy(
len = mach_read_from_2(undo_rec)
- ut_align_offset(undo_rec, UNIV_PAGE_SIZE);
ut_ad(len < UNIV_PAGE_SIZE);
- return((trx_undo_rec_t*) mem_heap_dup(heap, undo_rec, len));
+ trx_undo_rec_t* rec = static_cast<trx_undo_rec_t*>(
+ mem_heap_dup(heap, undo_rec, len));
+ mach_write_to_2(rec, len);
+ return rec;
}
-#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innobase/include/trx0roll.h b/storage/innobase/include/trx0roll.h
index fcafd1b5bac..cae548d442b 100644
--- a/storage/innobase/include/trx0roll.h
+++ b/storage/innobase/include/trx0roll.h
@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2017, MariaDB Corporation.
+Copyright (c) 2015, 2019, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -27,9 +27,7 @@ Created 3/26/1996 Heikki Tuuri
#ifndef trx0roll_h
#define trx0roll_h
-#include "univ.i"
#include "trx0trx.h"
-#include "trx0types.h"
#include "mtr0mtr.h"
#include "trx0sys.h"
@@ -41,73 +39,30 @@ Determines if this transaction is rolling back an incomplete transaction
in crash recovery.
@return TRUE if trx is an incomplete transaction that is being rolled
back in crash recovery */
-UNIV_INTERN
ibool
trx_is_recv(
/*========*/
const trx_t* trx); /*!< in: transaction */
/*******************************************************************//**
Returns a transaction savepoint taken at this point in time.
-@return savepoint */
-UNIV_INTERN
+@return savepoint */
trx_savept_t
trx_savept_take(
/*============*/
trx_t* trx); /*!< in: transaction */
-/*******************************************************************//**
-Frees an undo number array. */
-UNIV_INTERN
-void
-trx_undo_arr_free(
-/*==============*/
- trx_undo_arr_t* arr); /*!< in: undo number array */
-/*******************************************************************//**
-Returns pointer to nth element in an undo number array.
-@return pointer to the nth element */
-UNIV_INLINE
-trx_undo_inf_t*
-trx_undo_arr_get_nth_info(
-/*======================*/
- trx_undo_arr_t* arr, /*!< in: undo number array */
- ulint n); /*!< in: position */
-/********************************************************************//**
-Pops the topmost record when the two undo logs of a transaction are seen
-as a single stack of records ordered by their undo numbers. Inserts the
-undo number of the popped undo record to the array of currently processed
-undo numbers in the transaction. When the query thread finishes processing
-of this undo record, it must be released with trx_undo_rec_release.
-@return undo log record copied to heap, NULL if none left, or if the
-undo number of the top record would be less than the limit */
-UNIV_INTERN
+
+/** Get the last undo log record of a transaction (for rollback).
+@param[in,out] trx transaction
+@param[out] roll_ptr DB_ROLL_PTR to the undo record
+@param[in,out] heap memory heap for allocation
+@return undo log record copied to heap
+@retval NULL if none left or the roll_limit (savepoint) was reached */
trx_undo_rec_t*
-trx_roll_pop_top_rec_of_trx(
-/*========================*/
- trx_t* trx, /*!< in: transaction */
- undo_no_t limit, /*!< in: least undo number we need */
- roll_ptr_t* roll_ptr,/*!< out: roll pointer to undo record */
- mem_heap_t* heap); /*!< in: memory heap where copied */
-/********************************************************************//**
-Reserves an undo log record for a query thread to undo. This should be
-called if the query thread gets the undo log record not using the pop
-function above.
-@return TRUE if succeeded */
-UNIV_INTERN
-ibool
-trx_undo_rec_reserve(
-/*=================*/
- trx_t* trx, /*!< in/out: transaction */
- undo_no_t undo_no);/*!< in: undo number of the record */
-/*******************************************************************//**
-Releases a reserved undo record. */
-UNIV_INTERN
-void
-trx_undo_rec_release(
-/*=================*/
- trx_t* trx, /*!< in/out: transaction */
- undo_no_t undo_no);/*!< in: undo number */
+trx_roll_pop_top_rec_of_trx(trx_t* trx, roll_ptr_t* roll_ptr, mem_heap_t* heap)
+ MY_ATTRIBUTE((nonnull, warn_unused_result));
+
/** Report progress when rolling back a row of a recovered transaction.
@return whether the rollback should be aborted due to pending shutdown */
-UNIV_INTERN
bool
trx_roll_must_shutdown();
/*******************************************************************//**
@@ -115,7 +70,6 @@ Rollback or clean up any incomplete transactions which were
encountered in crash recovery. If the transaction already was
committed, then we clean up a possible insert undo log. If the
transaction was not yet committed, then we roll it back. */
-UNIV_INTERN
void
trx_rollback_or_clean_recovered(
/*============================*/
@@ -127,8 +81,8 @@ encountered in crash recovery. If the transaction already was
committed, then we clean up a possible insert undo log. If the
transaction was not yet committed, then we roll it back.
Note: this is done in a background thread.
-@return a dummy parameter */
-extern "C" UNIV_INTERN
+@return a dummy parameter */
+extern "C"
os_thread_ret_t
DECLARE_THREAD(trx_rollback_or_clean_all_recovered)(
/*================================================*/
@@ -137,24 +91,21 @@ DECLARE_THREAD(trx_rollback_or_clean_all_recovered)(
os_thread_create */
/*********************************************************************//**
Creates a rollback command node struct.
-@return own: rollback node struct */
-UNIV_INTERN
+@return own: rollback node struct */
roll_node_t*
roll_node_create(
/*=============*/
mem_heap_t* heap); /*!< in: mem heap where created */
/***********************************************************//**
Performs an execution step for a rollback command node in a query graph.
-@return query thread to run next, or NULL */
-UNIV_INTERN
+@return query thread to run next, or NULL */
que_thr_t*
trx_rollback_step(
/*==============*/
que_thr_t* thr); /*!< in: query thread */
/*******************************************************************//**
Rollback a transaction used in MySQL.
-@return error code or DB_SUCCESS */
-UNIV_INTERN
+@return error code or DB_SUCCESS */
dberr_t
trx_rollback_for_mysql(
/*===================*/
@@ -162,8 +113,7 @@ trx_rollback_for_mysql(
MY_ATTRIBUTE((nonnull));
/*******************************************************************//**
Rollback the latest SQL statement for MySQL.
-@return error code or DB_SUCCESS */
-UNIV_INTERN
+@return error code or DB_SUCCESS */
dberr_t
trx_rollback_last_sql_stat_for_mysql(
/*=================================*/
@@ -171,8 +121,7 @@ trx_rollback_last_sql_stat_for_mysql(
MY_ATTRIBUTE((nonnull));
/*******************************************************************//**
Rollback a transaction to a given savepoint or do a complete rollback.
-@return error code or DB_SUCCESS */
-UNIV_INTERN
+@return error code or DB_SUCCESS */
dberr_t
trx_rollback_to_savepoint(
/*======================*/
@@ -190,13 +139,12 @@ the row, these locks are naturally released in the rollback. Savepoints which
were set after this savepoint are deleted.
@return if no savepoint of the name found then DB_NO_SAVEPOINT,
otherwise DB_SUCCESS */
-UNIV_INTERN
dberr_t
trx_rollback_to_savepoint_for_mysql(
/*================================*/
trx_t* trx, /*!< in: transaction handle */
const char* savepoint_name, /*!< in: savepoint name */
- ib_int64_t* mysql_binlog_cache_pos) /*!< out: the MySQL binlog cache
+ int64_t* mysql_binlog_cache_pos) /*!< out: the MySQL binlog cache
position corresponding to this
savepoint; MySQL needs this
information to remove the
@@ -208,14 +156,13 @@ Creates a named savepoint. If the transaction is not yet started, starts it.
If there is already a savepoint of the same name, this call erases that old
savepoint and replaces it with a new. Savepoints are deleted in a transaction
commit or rollback.
-@return always DB_SUCCESS */
-UNIV_INTERN
+@return always DB_SUCCESS */
dberr_t
trx_savepoint_for_mysql(
/*====================*/
trx_t* trx, /*!< in: transaction handle */
const char* savepoint_name, /*!< in: savepoint name */
- ib_int64_t binlog_cache_pos) /*!< in: MySQL binlog cache
+ int64_t binlog_cache_pos) /*!< in: MySQL binlog cache
position corresponding to this
connection at the time of the
savepoint */
@@ -225,7 +172,6 @@ Releases a named savepoint. Savepoints which
were set after this savepoint are deleted.
@return if no savepoint of the name found then DB_NO_SAVEPOINT,
otherwise DB_SUCCESS */
-UNIV_INTERN
dberr_t
trx_release_savepoint_for_mysql(
/*============================*/
@@ -234,7 +180,6 @@ trx_release_savepoint_for_mysql(
MY_ATTRIBUTE((nonnull, warn_unused_result));
/*******************************************************************//**
Frees savepoint structs starting from savep. */
-UNIV_INTERN
void
trx_roll_savepoints_free(
/*=====================*/
@@ -242,41 +187,20 @@ trx_roll_savepoints_free(
trx_named_savept_t* savep); /*!< in: free all savepoints > this one;
if this is NULL, free all savepoints
of trx */
-
-/** A cell of trx_undo_arr_t; used during a rollback and a purge */
-struct trx_undo_inf_t{
- ibool in_use; /*!< true if cell is being used */
- trx_id_t trx_no; /*!< transaction number: not defined during
- a rollback */
- undo_no_t undo_no;/*!< undo number of an undo record */
-};
-
-/** During a rollback and a purge, undo numbers of undo records currently being
-processed are stored in this array */
-
-struct trx_undo_arr_t{
- ulint n_cells; /*!< number of cells in the array */
- ulint n_used; /*!< number of cells in use */
- trx_undo_inf_t* infos; /*!< the array of undo infos */
- mem_heap_t* heap; /*!< memory heap from which allocated */
-};
-
/** Rollback node states */
enum roll_node_state {
ROLL_NODE_NONE = 0, /*!< Unknown state */
ROLL_NODE_SEND, /*!< about to send a rollback signal to
the transaction */
ROLL_NODE_WAIT /*!< rollback signal sent to the
- transaction, waiting for completion */
+ transaction, waiting for completion */
};
/** Rollback command node in a query graph */
struct roll_node_t{
que_common_t common; /*!< node type: QUE_NODE_ROLLBACK */
enum roll_node_state state; /*!< node execution state */
- ibool partial;/*!< TRUE if we want a partial
- rollback */
- trx_savept_t savept; /*!< savepoint to which to
+ const trx_savept_t* savept; /*!< savepoint to which to
roll back, in the case of a
partial rollback */
que_thr_t* undo_thr;/*!< undo query graph */
@@ -287,7 +211,7 @@ struct trx_named_savept_t{
char* name; /*!< savepoint name */
trx_savept_t savept; /*!< the undo number corresponding to
the savepoint */
- ib_int64_t mysql_binlog_cache_pos;
+ int64_t mysql_binlog_cache_pos;
/*!< the MySQL binlog cache position
corresponding to this savepoint, not
defined if the MySQL binlogging is not
@@ -297,8 +221,6 @@ struct trx_named_savept_t{
transaction */
};
-#ifndef UNIV_NONINL
#include "trx0roll.ic"
-#endif
#endif
diff --git a/storage/innobase/include/trx0roll.ic b/storage/innobase/include/trx0roll.ic
index e6f9dd5ebcb..79b8e9083dd 100644
--- a/storage/innobase/include/trx0roll.ic
+++ b/storage/innobase/include/trx0roll.ic
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1996, 2009, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1996, 2013, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -23,18 +23,40 @@ Transaction rollback
Created 3/26/1996 Heikki Tuuri
*******************************************************/
+#ifdef UNIV_DEBUG
/*******************************************************************//**
-Returns pointer to nth element in an undo number array.
-@return pointer to the nth element */
+Check if undo numbering is maintained while processing undo records
+for rollback.
+@return true if undo numbering is maintained. */
UNIV_INLINE
-trx_undo_inf_t*
-trx_undo_arr_get_nth_info(
-/*======================*/
- trx_undo_arr_t* arr, /*!< in: undo number array */
- ulint n) /*!< in: position */
+bool
+trx_roll_check_undo_rec_ordering(
+/*=============================*/
+ undo_no_t curr_undo_rec_no, /*!< in: record number of
+ undo record to process. */
+ ulint curr_undo_space_id, /*!< in: space-id of rollback
+ segment that contains the
+ undo record to process. */
+ const trx_t* trx) /*!< in: transaction */
{
- ut_ad(arr);
- ut_ad(n < arr->n_cells);
-
- return(arr->infos + n);
+ /* Each transaction now can have multiple rollback segments.
+ If a transaction involves temp and non-temp tables, both the rollback
+ segments will be active. In this case undo records will be distrubuted
+ across the two rollback segments.
+ CASE-1: UNDO action will apply all undo records from one rollback
+ segment before moving to next. This means undo record numbers can't be
+ sequential but ordering is still enforced as next undo record number
+ should be < processed undo record number.
+ CASE-2: For normal rollback (not initiated by crash) all rollback
+ segments will be active (including non-redo).
+ Based on transaction operation pattern undo record number of first
+ undo record from this new rollback segment can be > last undo number
+ from previous rollback segment and so we ignore this check if
+ rollback segments are switching. Once switched new rollback segment
+ should re-follow undo record number pattern (as mentioned in CASE-1). */
+
+ return(curr_undo_space_id != trx->undo_rseg_space
+ || curr_undo_rec_no + 1 <= trx->undo_no);
}
+#endif /* UNIV_DEBUG */
+
diff --git a/storage/innobase/include/trx0rseg.h b/storage/innobase/include/trx0rseg.h
index acf2f9d04fb..8ca17998df4 100644
--- a/storage/innobase/include/trx0rseg.h
+++ b/storage/innobase/include/trx0rseg.h
@@ -1,7 +1,7 @@
/*****************************************************************************
-Copyright (c) 1996, 2011, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2017, MariaDB Corporation.
+Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2017, 2019, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -27,38 +27,38 @@ Created 3/26/1996 Heikki Tuuri
#ifndef trx0rseg_h
#define trx0rseg_h
-#include "univ.i"
#include "trx0types.h"
#include "trx0sys.h"
-#include "ut0bh.h"
-
-/******************************************************************//**
-Gets a rollback segment header.
-@return rollback segment header, page x-latched */
+#include "fut0lst.h"
+#include <vector>
+
+/** Gets a rollback segment header.
+@param[in] space space where placed
+@param[in] page_no page number of the header
+@param[in,out] mtr mini-transaction
+@return rollback segment header, page x-latched */
UNIV_INLINE
trx_rsegf_t*
trx_rsegf_get(
-/*==========*/
- ulint space, /*!< in: space where placed */
- ulint zip_size, /*!< in: compressed page size in bytes
- or 0 for uncompressed pages */
- ulint page_no, /*!< in: page number of the header */
- mtr_t* mtr); /*!< in: mtr */
-/******************************************************************//**
-Gets a newly created rollback segment header.
-@return rollback segment header, page x-latched */
+ ulint space,
+ ulint page_no,
+ mtr_t* mtr);
+
+/** Gets a newly created rollback segment header.
+@param[in] space space where placed
+@param[in] page_no page number of the header
+@param[in,out] mtr mini-transaction
+@return rollback segment header, page x-latched */
UNIV_INLINE
trx_rsegf_t*
trx_rsegf_get_new(
-/*==============*/
- ulint space, /*!< in: space where placed */
- ulint zip_size, /*!< in: compressed page size in bytes
- or 0 for uncompressed pages */
- ulint page_no, /*!< in: page number of the header */
- mtr_t* mtr); /*!< in: mtr */
+ ulint space,
+ ulint page_no,
+ mtr_t* mtr);
+
/***************************************************************//**
Gets the file page number of the nth undo log slot.
-@return page number of the undo log segment */
+@return page number of the undo log segment */
UNIV_INLINE
ulint
trx_rsegf_get_nth_undo(
@@ -78,60 +78,49 @@ trx_rsegf_set_nth_undo(
mtr_t* mtr); /*!< in: mtr */
/****************************************************************//**
Looks for a free slot for an undo log segment.
-@return slot index or ULINT_UNDEFINED if not found */
+@return slot index or ULINT_UNDEFINED if not found */
UNIV_INLINE
ulint
trx_rsegf_undo_find_free(
/*=====================*/
trx_rsegf_t* rsegf, /*!< in: rollback segment header */
mtr_t* mtr); /*!< in: mtr */
-/******************************************************************//**
-Looks for a rollback segment, based on the rollback segment id.
-@return rollback segment */
-UNIV_INLINE
-trx_rseg_t*
-trx_rseg_get_on_id(
-/*===============*/
- ulint id); /*!< in: rollback segment id */
-/****************************************************************//**
-Creates a rollback segment header. This function is called only when
-a new rollback segment is created in the database.
-@return page number of the created segment, FIL_NULL if fail */
-UNIV_INTERN
-ulint
+
+/** Creates a rollback segment header.
+This function is called only when a new rollback segment is created in
+the database.
+@param[in] space space id
+@param[in] max_size max size in pages
+@param[in] rseg_slot_no rseg id == slot number in trx sys
+@param[in,out] mtr mini-transaction
+@return the created rollback segment
+@retval NULL on failure */
+buf_block_t*
trx_rseg_header_create(
-/*===================*/
- ulint space, /*!< in: space id */
- ulint zip_size, /*!< in: compressed page size in bytes
- or 0 for uncompressed pages */
- ulint max_size, /*!< in: max size in pages */
- ulint rseg_slot_no, /*!< in: rseg id == slot number in trx sys */
- mtr_t* mtr); /*!< in: mtr */
-/*********************************************************************//**
-Creates the memory copies for rollback segments and initializes the
-rseg array in trx_sys at a database startup. */
-UNIV_INTERN
+ ulint space,
+ ulint max_size,
+ ulint rseg_slot_no,
+ mtr_t* mtr);
+
+/** Initialize the rollback segments in memory at database startup. */
void
-trx_rseg_array_init(
-/*================*/
- trx_sysf_t* sys_header, /*!< in/out: trx system header */
- ib_bh_t* ib_bh, /*!< in: rseg queue */
- mtr_t* mtr); /*!< in/out: mtr */
-/***************************************************************************
-Free's an instance of the rollback segment in memory. */
-UNIV_INTERN
+trx_rseg_array_init();
+
+/** Free a rollback segment in memory. */
void
-trx_rseg_mem_free(
-/*==============*/
- trx_rseg_t* rseg); /*!< in, own: instance to free */
+trx_rseg_mem_free(trx_rseg_t* rseg);
-/** Create a rollback segment.
-@param[in] space undo tablespace ID
+/** Create a persistent rollback segment.
+@param[in] space_id system or undo tablespace id
@return pointer to new rollback segment
@retval NULL on failure */
-UNIV_INTERN
trx_rseg_t*
-trx_rseg_create(ulint space);
+trx_rseg_create(ulint space_id)
+ MY_ATTRIBUTE((warn_unused_result));
+
+/** Create the temporary rollback segments. */
+void
+trx_temp_rseg_create();
/********************************************************************
Get the number of unique rollback tablespaces in use except space id 0.
@@ -139,7 +128,6 @@ The last space id will be the sentinel value ULINT_UNDEFINED. The array
will be sorted on space id. Note: space_ids should have have space for
TRX_SYS_N_RSEGS + 1 elements.
@return number of unique rollback tablespaces in use. */
-UNIV_INTERN
ulint
trx_rseg_get_n_undo_tablespaces(
/*============================*/
@@ -151,51 +139,85 @@ trx_rseg_get_n_undo_tablespaces(
/* Maximum number of transactions supported by a single rollback segment */
#define TRX_RSEG_MAX_N_TRXS (TRX_RSEG_N_SLOTS / 2)
-/* The rollback segment memory object */
-struct trx_rseg_t{
+/** The rollback segment memory object */
+struct trx_rseg_t {
/*--------------------------------------------------------*/
- ulint id; /*!< rollback segment id == the index of
- its slot in the trx system file copy */
- ib_mutex_t mutex; /*!< mutex protecting the fields in this
- struct except id, which is constant */
- ulint space; /*!< space where the rollback segment is
- header is placed */
- ulint zip_size;/* compressed page size of space
- in bytes, or 0 for uncompressed spaces */
- ulint page_no;/* page number of the rollback segment
- header */
- ulint max_size;/* maximum allowed size in pages */
- ulint curr_size;/* current size in pages */
+ /** rollback segment id == the index of its slot in the trx
+ system file copy */
+ ulint id;
+
+ /** mutex protecting the fields in this struct except id,space,page_no
+ which are constant */
+ RsegMutex mutex;
+
+ /** space where the rollback segment header is placed */
+ ulint space;
+
+ /** page number of the rollback segment header */
+ ulint page_no;
+
+ /** maximum allowed size in pages */
+ ulint max_size;
+
+ /** current size in pages */
+ ulint curr_size;
+
/*--------------------------------------------------------*/
/* Fields for update undo logs */
- UT_LIST_BASE_NODE_T(trx_undo_t) update_undo_list;
- /* List of update undo logs */
- UT_LIST_BASE_NODE_T(trx_undo_t) update_undo_cached;
- /* List of update undo log segments
- cached for fast reuse */
+ /** List of update undo logs */
+ UT_LIST_BASE_NODE_T(trx_undo_t) update_undo_list;
+
+ /** List of update undo log segments cached for fast reuse */
+ UT_LIST_BASE_NODE_T(trx_undo_t) update_undo_cached;
+
/*--------------------------------------------------------*/
/* Fields for insert undo logs */
+ /** List of insert undo logs */
UT_LIST_BASE_NODE_T(trx_undo_t) insert_undo_list;
- /* List of insert undo logs */
+
+ /** List of insert undo log segments cached for fast reuse */
UT_LIST_BASE_NODE_T(trx_undo_t) insert_undo_cached;
- /* List of insert undo log segments
- cached for fast reuse */
+
/*--------------------------------------------------------*/
- ulint last_page_no; /*!< Page number of the last not yet
- purged log header in the history list;
- FIL_NULL if all list purged */
- ulint last_offset; /*!< Byte offset of the last not yet
- purged log header */
- trx_id_t last_trx_no; /*!< Transaction number of the last not
- yet purged log */
- ibool last_del_marks; /*!< TRUE if the last not yet purged log
- needs purging */
-};
-/** For prioritising the rollback segments for purge. */
-struct rseg_queue_t {
- trx_id_t trx_no; /*!< trx_rseg_t::last_trx_no */
- trx_rseg_t* rseg; /*!< Rollback segment */
+ /** Page number of the last not yet purged log header in the history
+ list; FIL_NULL if all list purged */
+ ulint last_page_no;
+
+ /** Byte offset of the last not yet purged log header */
+ ulint last_offset;
+
+ /** Transaction number of the last not yet purged log */
+ trx_id_t last_trx_no;
+
+ /** TRUE if the last not yet purged log needs purging */
+ ibool last_del_marks;
+
+ /** Reference counter to track rseg allocated transactions. */
+ ulint trx_ref_count;
+
+ /** If true, then skip allocating this rseg as it reside in
+ UNDO-tablespace marked for truncate. */
+ bool skip_allocation;
+
+ /** @return whether the rollback segment is persistent */
+ bool is_persistent() const
+ {
+ ut_ad(space == SRV_TMP_SPACE_ID
+ || space == TRX_SYS_SPACE
+ || (srv_undo_space_id_start > 0
+ && space >= srv_undo_space_id_start
+ && space <= srv_undo_space_id_start
+ + TRX_SYS_MAX_UNDO_SPACES));
+ ut_ad(space == SRV_TMP_SPACE_ID
+ || space == TRX_SYS_SPACE
+ || (srv_undo_space_id_start > 0
+ && space >= srv_undo_space_id_start
+ && space <= srv_undo_space_id_start
+ + srv_undo_tablespaces_open)
+ || !srv_was_started);
+ return(space != SRV_TMP_SPACE_ID);
+ }
};
/* Undo log segment slot in a rollback segment header */
@@ -224,8 +246,6 @@ struct rseg_queue_t {
/* Undo log segment slots */
/*-------------------------------------------------------------*/
-#ifndef UNIV_NONINL
#include "trx0rseg.ic"
-#endif
#endif
diff --git a/storage/innobase/include/trx0rseg.ic b/storage/innobase/include/trx0rseg.ic
index 4bcbebaf6d3..eed487176e8 100644
--- a/storage/innobase/include/trx0rseg.ic
+++ b/storage/innobase/include/trx0rseg.ic
@@ -1,6 +1,7 @@
/*****************************************************************************
-Copyright (c) 1996, 2009, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1996, 2013, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2017, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -25,25 +26,31 @@ Created 3/26/1996 Heikki Tuuri
#include "srv0srv.h"
#include "mtr0log.h"
-#include "trx0sys.h"
-/******************************************************************//**
-Gets a rollback segment header.
-@return rollback segment header, page x-latched */
+/** Gets a rollback segment header.
+@param[in] space space where placed
+@param[in] page_no page number of the header
+@param[in,out] mtr mini-transaction
+@return rollback segment header, page x-latched */
UNIV_INLINE
trx_rsegf_t*
trx_rsegf_get(
-/*==========*/
- ulint space, /*!< in: space where placed */
- ulint zip_size, /*!< in: compressed page size in bytes
- or 0 for uncompressed pages */
- ulint page_no, /*!< in: page number of the header */
- mtr_t* mtr) /*!< in: mtr */
+ ulint space,
+ ulint page_no,
+ mtr_t* mtr)
{
buf_block_t* block;
trx_rsegf_t* header;
- block = buf_page_get(space, zip_size, page_no, RW_X_LATCH, mtr);
+ ut_ad(space <= srv_undo_space_id_start + srv_undo_tablespaces_active
+ || space == SRV_TMP_SPACE_ID
+ || !srv_was_started);
+ ut_ad(space <= srv_undo_space_id_start + TRX_SYS_MAX_UNDO_SPACES
+ || space == SRV_TMP_SPACE_ID);
+
+ block = buf_page_get(
+ page_id_t(space, page_no), univ_page_size, RW_X_LATCH, mtr);
+
buf_block_dbg_add_level(block, SYNC_RSEG_HEADER);
header = TRX_RSEG + buf_block_get_frame(block);
@@ -51,23 +58,28 @@ trx_rsegf_get(
return(header);
}
-/******************************************************************//**
-Gets a newly created rollback segment header.
-@return rollback segment header, page x-latched */
+/** Gets a newly created rollback segment header.
+@param[in] space space where placed
+@param[in] page_no page number of the header
+@param[in,out] mtr mini-transaction
+@return rollback segment header, page x-latched */
UNIV_INLINE
trx_rsegf_t*
trx_rsegf_get_new(
-/*==============*/
- ulint space, /*!< in: space where placed */
- ulint zip_size, /*!< in: compressed page size in bytes
- or 0 for uncompressed pages */
- ulint page_no, /*!< in: page number of the header */
- mtr_t* mtr) /*!< in: mtr */
+ ulint space,
+ ulint page_no,
+ mtr_t* mtr)
{
buf_block_t* block;
trx_rsegf_t* header;
- block = buf_page_get(space, zip_size, page_no, RW_X_LATCH, mtr);
+ ut_ad(space <= srv_undo_tablespaces_active || space == SRV_TMP_SPACE_ID
+ || !srv_was_started);
+ ut_ad(space <= TRX_SYS_MAX_UNDO_SPACES || space == SRV_TMP_SPACE_ID);
+
+ block = buf_page_get(
+ page_id_t(space, page_no), univ_page_size, RW_X_LATCH, mtr);
+
buf_block_dbg_add_level(block, SYNC_RSEG_HEADER_NEW);
header = TRX_RSEG + buf_block_get_frame(block);
@@ -77,7 +89,7 @@ trx_rsegf_get_new(
/***************************************************************//**
Gets the file page number of the nth undo log slot.
-@return page number of the undo log segment */
+@return page number of the undo log segment */
UNIV_INLINE
ulint
trx_rsegf_get_nth_undo(
@@ -86,12 +98,7 @@ trx_rsegf_get_nth_undo(
ulint n, /*!< in: index of slot */
mtr_t* mtr) /*!< in: mtr */
{
- if (n >= TRX_RSEG_N_SLOTS) {
- fprintf(stderr,
- "InnoDB: Error: trying to get slot %lu of rseg\n",
- (ulong) n);
- ut_error;
- }
+ ut_a(n < TRX_RSEG_N_SLOTS);
return(mtr_read_ulint(rsegf + TRX_RSEG_UNDO_SLOTS
+ n * TRX_RSEG_SLOT_SIZE, MLOG_4BYTES, mtr));
@@ -108,12 +115,7 @@ trx_rsegf_set_nth_undo(
ulint page_no,/*!< in: page number of the undo log segment */
mtr_t* mtr) /*!< in: mtr */
{
- if (n >= TRX_RSEG_N_SLOTS) {
- fprintf(stderr,
- "InnoDB: Error: trying to set slot %lu of rseg\n",
- (ulong) n);
- ut_error;
- }
+ ut_a(n < TRX_RSEG_N_SLOTS);
mlog_write_ulint(rsegf + TRX_RSEG_UNDO_SLOTS + n * TRX_RSEG_SLOT_SIZE,
page_no, MLOG_4BYTES, mtr);
@@ -121,7 +123,7 @@ trx_rsegf_set_nth_undo(
/****************************************************************//**
Looks for a free slot for an undo log segment.
-@return slot index or ULINT_UNDEFINED if not found */
+@return slot index or ULINT_UNDEFINED if not found */
UNIV_INLINE
ulint
trx_rsegf_undo_find_free(
@@ -131,37 +133,22 @@ trx_rsegf_undo_find_free(
{
ulint i;
ulint page_no;
+ ulint max_slots = TRX_RSEG_N_SLOTS;
- for (i = 0;
-#ifndef UNIV_DEBUG
- i < TRX_RSEG_N_SLOTS;
-#else
- i < (trx_rseg_n_slots_debug ? trx_rseg_n_slots_debug : TRX_RSEG_N_SLOTS);
+#ifdef UNIV_DEBUG
+ if (trx_rseg_n_slots_debug) {
+ max_slots = ut_min(static_cast<ulint>(trx_rseg_n_slots_debug),
+ static_cast<ulint>(TRX_RSEG_N_SLOTS));
+ }
#endif
- i++) {
+ for (i = 0; i < max_slots; i++) {
page_no = trx_rsegf_get_nth_undo(rsegf, i, mtr);
if (page_no == FIL_NULL) {
-
return(i);
}
}
return(ULINT_UNDEFINED);
}
-
-/******************************************************************//**
-Looks for a rollback segment, based on the rollback segment id.
-@return rollback segment */
-UNIV_INLINE
-trx_rseg_t*
-trx_rseg_get_on_id(
-/*===============*/
- ulint id) /*!< in: rollback segment id */
-{
- ut_a(id < TRX_SYS_N_RSEGS);
-
- return(trx_sys->rseg_array[id]);
-}
-
diff --git a/storage/innobase/include/trx0sys.h b/storage/innobase/include/trx0sys.h
index 6b1ff959a8a..c4b1636cfd2 100644
--- a/storage/innobase/include/trx0sys.h
+++ b/storage/innobase/include/trx0sys.h
@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2017, MariaDB Corporation.
+Copyright (c) 2017, 2019, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -27,101 +27,56 @@ Created 3/26/1996 Heikki Tuuri
#ifndef trx0sys_h
#define trx0sys_h
-#include "univ.i"
-
-#include "trx0types.h"
-#include "fsp0types.h"
-#include "fil0fil.h"
#include "buf0buf.h"
-#ifndef UNIV_HOTBACKUP
+#include "fil0fil.h"
+#include "trx0types.h"
+#include "mem0mem.h"
#include "mtr0mtr.h"
#include "ut0byte.h"
#include "mem0mem.h"
-#include "sync0sync.h"
#include "ut0lst.h"
-#include "ut0bh.h"
#include "read0types.h"
#include "page0types.h"
-#include "ut0bh.h"
+#include "ut0mutex.h"
+#include "trx0trx.h"
#ifdef WITH_WSREP
#include "trx0xa.h"
#endif /* WITH_WSREP */
-typedef UT_LIST_BASE_NODE_T(trx_t) trx_list_t;
-
-/** In a MySQL replication slave, in crash recovery we store the master log
-file name and position here. */
-/* @{ */
-/** Master binlog file name */
-extern char trx_sys_mysql_master_log_name[];
-/** Master binlog file position. We have successfully got the updates
-up to this position. -1 means that no crash recovery was needed, or
-there was no master log position info inside InnoDB.*/
-extern ib_int64_t trx_sys_mysql_master_log_pos;
-/* @} */
+typedef UT_LIST_BASE_NODE_T(trx_t) trx_ut_list_t;
-/** If this MySQL server uses binary logging, after InnoDB has been inited
-and if it has done a crash recovery, we store the binlog file name and position
-here. */
-/* @{ */
-/** Binlog file name */
-extern char trx_sys_mysql_bin_log_name[];
-/** Binlog file position, or -1 if unknown */
-extern ib_int64_t trx_sys_mysql_bin_log_pos;
-/* @} */
+// Forward declaration
+class MVCC;
+class ReadView;
/** The transaction system */
extern trx_sys_t* trx_sys;
-/***************************************************************//**
-Checks if a page address is the trx sys header page.
-@return TRUE if trx sys header page */
-UNIV_INLINE
-ibool
-trx_sys_hdr_page(
-/*=============*/
- ulint space, /*!< in: space */
- ulint page_no);/*!< in: page number */
-/*****************************************************************//**
-Creates and initializes the central memory structures for the transaction
-system. This is called when the database is started.
-@return min binary heap of rsegs to purge */
-UNIV_INTERN
-ib_bh_t*
-trx_sys_init_at_db_start(void);
-/*==========================*/
+/** Checks if a page address is the trx sys header page.
+@param[in] page_id page id
+@return true if trx sys header page */
+inline bool trx_sys_hdr_page(const page_id_t page_id);
+
+/** Initialize the transaction system main-memory data structures. */
+void trx_sys_init_at_db_start();
+
/*****************************************************************//**
-Creates the trx_sys instance and initializes ib_bh and mutex. */
-UNIV_INTERN
+Creates the trx_sys instance and initializes purge_queue and mutex. */
void
trx_sys_create(void);
/*================*/
/*****************************************************************//**
Creates and initializes the transaction system at the database creation. */
-UNIV_INTERN
void
trx_sys_create_sys_pages(void);
/*==========================*/
-/****************************************************************//**
-Looks for a free slot for a rollback segment in the trx system file copy.
-@return slot index or ULINT_UNDEFINED if not found */
-UNIV_INTERN
+/** @return an unallocated rollback segment slot in the TRX_SYS header
+@retval ULINT_UNDEFINED if not found */
ulint
-trx_sysf_rseg_find_free(
-/*====================*/
- mtr_t* mtr); /*!< in: mtr */
-/***************************************************************//**
-Gets the pointer in the nth slot of the rseg array.
-@return pointer to rseg object, NULL if slot not in use */
-UNIV_INLINE
-trx_rseg_t*
-trx_sys_get_nth_rseg(
-/*=================*/
- trx_sys_t* sys, /*!< in: trx system */
- ulint n); /*!< in: index of slot */
+trx_sysf_rseg_find_free(mtr_t* mtr);
/**********************************************************************//**
Gets a pointer to the transaction system file copy and x-locks its page.
-@return pointer to system file copy, page x-locked */
+@return pointer to system file copy, page x-locked */
UNIV_INLINE
trx_sysf_t*
trx_sysf_get(
@@ -130,7 +85,7 @@ trx_sysf_get(
/*****************************************************************//**
Gets the space of the nth rollback segment slot in the trx system
file copy.
-@return space id */
+@return space id */
UNIV_INLINE
ulint
trx_sysf_rseg_get_space(
@@ -141,7 +96,7 @@ trx_sysf_rseg_get_space(
/*****************************************************************//**
Gets the page number of the nth rollback segment slot in the trx system
file copy.
-@return page number, FIL_NULL if slot unused */
+@return page number, FIL_NULL if slot unused */
UNIV_INLINE
ulint
trx_sysf_rseg_get_page_no(
@@ -174,11 +129,11 @@ trx_sysf_rseg_set_page_no(
mtr_t* mtr); /*!< in: mtr */
/*****************************************************************//**
Allocates a new transaction id.
-@return new, allocated trx id */
+@return new, allocated trx id */
UNIV_INLINE
trx_id_t
-trx_sys_get_new_trx_id(void);
-/*========================*/
+trx_sys_get_new_trx_id();
+/*===================*/
/*****************************************************************//**
Determines the maximum transaction id.
@return maximum currently allocated trx id; will be stale after the
@@ -207,7 +162,7 @@ trx_write_trx_id(
Reads a trx id from an index page. In case that the id size changes in
some future version, this function should be used instead of
mach_read_...
-@return id */
+@return id */
UNIV_INLINE
trx_id_t
trx_read_trx_id(
@@ -215,10 +170,7 @@ trx_read_trx_id(
const byte* ptr); /*!< in: pointer to memory from where to read */
/****************************************************************//**
Looks for the trx instance with the given id in the rw trx_list.
-The caller must be holding trx_sys->mutex.
-@return the trx handle or NULL if not found;
-the pointer must not be dereferenced unless lock_sys->mutex was
-acquired before calling this function and is still being held */
+@return the trx handle or NULL if not found */
UNIV_INLINE
trx_t*
trx_get_rw_trx_by_id(
@@ -229,50 +181,26 @@ Returns the minimum trx id in rw trx list. This is the smallest id for which
the trx can possibly be active. (But, you must look at the trx->state to
find out if the minimum trx id transaction itself is active, or already
committed.)
-@return the minimum trx id, or trx_sys->max_trx_id if the trx list is empty */
+@return the minimum trx id, or trx_sys->max_trx_id if the trx list is empty */
UNIV_INLINE
trx_id_t
trx_rw_min_trx_id(void);
/*===================*/
-/****************************************************************//**
-Checks if a rw transaction with the given id is active. Caller must hold
-trx_sys->mutex in shared mode. If the caller is not holding
-lock_sys->mutex, the transaction may already have been committed.
-@return transaction instance if active, or NULL;
-the pointer must not be dereferenced unless lock_sys->mutex was
-acquired before calling this function and is still being held */
-UNIV_INLINE
-trx_t*
-trx_rw_is_active_low(
-/*=================*/
- trx_id_t trx_id, /*!< in: trx id of the transaction */
- ibool* corrupt); /*!< in: NULL or pointer to a flag
- that will be set if corrupt */
-/****************************************************************//**
-Checks if a rw transaction with the given id is active. If the caller is
-not holding lock_sys->mutex, the transaction may already have been
-committed.
-@return transaction instance if active, or NULL;
-the pointer must not be dereferenced unless lock_sys->mutex was
-acquired before calling this function and is still being held */
-UNIV_INLINE
-trx_t*
-trx_rw_is_active(
-/*=============*/
- trx_id_t trx_id, /*!< in: trx id of the transaction */
- ibool* corrupt); /*!< in: NULL or pointer to a flag
- that will be set if corrupt */
-#ifdef UNIV_DEBUG
-/****************************************************************//**
-Checks whether a trx is in one of rw_trx_list or ro_trx_list.
-@return TRUE if is in */
-UNIV_INTERN
-ibool
-trx_in_trx_list(
-/*============*/
- const trx_t* in_trx) /*!< in: transaction */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-#endif /* UNIV_DEBUG */
+/** Look up a rw transaction with the given id.
+@param[in] trx_id transaction identifier
+@param[out] corrupt flag that will be set if trx_id is corrupted
+@return transaction; its state should be rechecked after acquiring trx_t::mutex
+@retval NULL if there is no transaction identified by trx_id. */
+inline trx_t* trx_rw_is_active_low(trx_id_t trx_id, bool* corrupt);
+
+/** Look up a rw transaction with the given id.
+@param[in] trx_id transaction identifier
+@param[out] corrupt flag that will be set if trx_id is corrupted
+@param[in] ref_count whether to increment trx->n_ref
+@return transaction; its state should be rechecked after acquiring trx_t::mutex
+@retval NULL if there is no active transaction identified by trx_id. */
+inline trx_t* trx_rw_is_active(trx_id_t trx_id, bool* corrupt, bool ref_count);
+
#if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG
/***********************************************************//**
Assert that a transaction has been recovered.
@@ -289,26 +217,17 @@ Updates the offset information about the end of the MySQL binlog entry
which corresponds to the transaction just being committed. In a MySQL
replication slave updates the latest master binlog position up to which
replication has proceeded. */
-UNIV_INTERN
void
trx_sys_update_mysql_binlog_offset(
/*===============================*/
const char* file_name,/*!< in: MySQL log file name */
- ib_int64_t offset, /*!< in: position in that log file */
- ulint field, /*!< in: offset of the MySQL log info field in
- the trx sys header */
-#ifdef WITH_WSREP
+ int64_t offset, /*!< in: position in that log file */
trx_sysf_t* sys_header, /*!< in: trx sys header */
-#endif /* WITH_WSREP */
mtr_t* mtr); /*!< in: mtr */
-/*****************************************************************//**
-Prints to stderr the MySQL binlog offset info in the trx system header if
-the magic number shows it valid. */
-UNIV_INTERN
+/** Display the MySQL binlog offset info if it is present in the trx
+system header. */
void
-trx_sys_print_mysql_binlog_offset(void);
-/*===================================*/
-
+trx_sys_print_mysql_binlog_offset();
#ifdef WITH_WSREP
/** Update WSREP XID info in sys_header of TRX_SYS_PAGE_NO = 5.
@@ -322,53 +241,42 @@ trx_sys_update_wsrep_checkpoint(
trx_sysf_t* sys_header,
mtr_t* mtr);
-/** Read WSREP XID from sys_header of TRX_SYS_PAGE_NO = 5.
-@param[out] xid Transaction XID
-@return true on success, false on error. */
+/** Read WSREP checkpoint XID from sys header.
+@param[out] xid WSREP XID
+@return whether the checkpoint was present */
UNIV_INTERN
bool
trx_sys_read_wsrep_checkpoint(XID* xid);
-
#endif /* WITH_WSREP */
-/*****************************************************************//**
-Prints to stderr the MySQL master log offset info in the trx system header if
-the magic number shows it valid. */
-UNIV_INTERN
-void
-trx_sys_print_mysql_master_log_pos(void);
-/*====================================*/
-/*****************************************************************//**
-Initializes the tablespace tag system. */
-UNIV_INTERN
+/** Initializes the tablespace tag system. */
void
trx_sys_file_format_init(void);
/*==========================*/
+
/*****************************************************************//**
Closes the tablespace tag system. */
-UNIV_INTERN
void
trx_sys_file_format_close(void);
/*===========================*/
+
/********************************************************************//**
Tags the system table space with minimum format id if it has not been
tagged yet.
WARNING: This function is only called during the startup and AFTER the
redo log application during recovery has finished. */
-UNIV_INTERN
void
trx_sys_file_format_tag_init(void);
/*==============================*/
+
/*****************************************************************//**
Shutdown/Close the transaction system. */
-UNIV_INTERN
void
trx_sys_close(void);
/*===============*/
/*****************************************************************//**
Get the name representation of the file format from its id.
-@return pointer to the name */
-UNIV_INTERN
+@return pointer to the name */
const char*
trx_sys_file_format_id_to_name(
/*===========================*/
@@ -376,23 +284,17 @@ trx_sys_file_format_id_to_name(
/*****************************************************************//**
Set the file format id unconditionally except if it's already the
same value.
-@return TRUE if value updated */
-UNIV_INTERN
+@return TRUE if value updated */
ibool
trx_sys_file_format_max_set(
/*========================*/
ulint format_id, /*!< in: file format id */
const char** name); /*!< out: max file format name or
NULL if not needed. */
-/*********************************************************************
-Creates the rollback segments
-@return number of rollback segments that are active. */
-UNIV_INTERN
-ulint
-trx_sys_create_rsegs(
-/*=================*/
- ulint n_spaces, /*!< number of tablespaces for UNDO logs */
- ulint n_rsegs); /*!< number of rollback segments to create */
+/** Create the rollback segments.
+@return whether the creation succeeded */
+bool
+trx_sys_create_rsegs();
/*****************************************************************//**
Get the number of transaction in the system, independent of their state.
@return count of transactions in trx_sys_t::trx_list */
@@ -404,58 +306,18 @@ trx_sys_get_n_rw_trx(void);
/*********************************************************************
Check if there are any active (non-prepared) transactions.
@return total number of active transactions or 0 if none */
-UNIV_INTERN
ulint
trx_sys_any_active_transactions(void);
/*=================================*/
-#else /* !UNIV_HOTBACKUP */
-/*****************************************************************//**
-Prints to stderr the MySQL binlog info in the system header if the
-magic number shows it valid. */
-UNIV_INTERN
-void
-trx_sys_print_mysql_binlog_offset_from_page(
-/*========================================*/
- const byte* page); /*!< in: buffer containing the trx
- system header page, i.e., page number
- TRX_SYS_PAGE_NO in the tablespace */
-/*****************************************************************//**
-Reads the file format id from the first system table space file.
-Even if the call succeeds and returns TRUE, the returned format id
-may be ULINT_UNDEFINED signalling that the format id was not present
-in the data file.
-@return TRUE if call succeeds */
-UNIV_INTERN
-ibool
-trx_sys_read_file_format_id(
-/*========================*/
- const char *pathname, /*!< in: pathname of the first system
- table space file */
- ulint *format_id); /*!< out: file format of the system table
- space */
-/*****************************************************************//**
-Reads the file format id from the given per-table data file.
-@return TRUE if call succeeds */
-UNIV_INTERN
-ibool
-trx_sys_read_pertable_file_format_id(
-/*=================================*/
- const char *pathname, /*!< in: pathname of a per-table
- datafile */
- ulint *format_id); /*!< out: file format of the per-table
- data file */
-#endif /* !UNIV_HOTBACKUP */
/*****************************************************************//**
Get the name representation of the file format from its id.
-@return pointer to the max format name */
-UNIV_INTERN
+@return pointer to the max format name */
const char*
trx_sys_file_format_max_get(void);
/*=============================*/
/*****************************************************************//**
Check for the max file format tag stored on disk.
-@return DB_SUCCESS or error code */
-UNIV_INTERN
+@return DB_SUCCESS or error code */
dberr_t
trx_sys_file_format_max_check(
/*==========================*/
@@ -463,8 +325,7 @@ trx_sys_file_format_max_check(
/********************************************************************//**
Update the file format tag in the system tablespace only if the given
format id is greater than the known max id.
-@return TRUE if format_id was bigger than the known max id */
-UNIV_INTERN
+@return TRUE if format_id was bigger than the known max id */
ibool
trx_sys_file_format_max_upgrade(
/*============================*/
@@ -472,31 +333,32 @@ trx_sys_file_format_max_upgrade(
ulint format_id); /*!< in: file format identifier */
/*****************************************************************//**
Get the name representation of the file format from its id.
-@return pointer to the name */
-UNIV_INTERN
+@return pointer to the name */
const char*
trx_sys_file_format_id_to_name(
/*===========================*/
const ulint id); /*!< in: id of the file format */
+/**
+Add the transaction to the RW transaction set
+@param trx transaction instance to add */
+UNIV_INLINE
+void
+trx_sys_rw_trx_add(trx_t* trx);
+
#ifdef UNIV_DEBUG
/*************************************************************//**
-Validate the trx_sys_t::trx_list. */
-UNIV_INTERN
-ibool
-trx_sys_validate_trx_list(void);
-/*===========================*/
+Validate the trx_sys_t::rw_trx_list.
+@return true if the list is valid */
+bool
+trx_sys_validate_trx_list();
+/*========================*/
#endif /* UNIV_DEBUG */
-/* The automatically created system rollback segment has this id */
+/** The automatically created system rollback segment has this id */
#define TRX_SYS_SYSTEM_RSEG_ID 0
-/* Space id and page no where the trx system file copy resides */
-#define TRX_SYS_SPACE 0 /* the SYSTEM tablespace */
-#include "fsp0fsp.h"
-#define TRX_SYS_PAGE_NO FSP_TRX_SYS_PAGE_NO
-
-/* The offset of the transaction system header on the page */
+/** The offset of the transaction system header on the page */
#define TRX_SYS FSEG_PAGE_DATA
/** Transaction system header */
@@ -528,14 +390,10 @@ byte, therefore 128; each slot is currently 8 bytes in size. If you want
to raise the level to 256 then you will need to fix some assertions that
impose the 7 bit restriction. e.g., mach_write_to_3() */
#define TRX_SYS_N_RSEGS 128
-/* Originally, InnoDB defined TRX_SYS_N_RSEGS as 256 but created only one
-rollback segment. It initialized some arrays with this number of entries.
-We must remember this limit in order to keep file compatibility. */
-#define TRX_SYS_OLD_N_RSEGS 256
-
-/** Maximum length of MySQL binlog file name, in bytes.
-@see trx_sys_mysql_master_log_name
-@see trx_sys_mysql_bin_log_name */
+/** Maximum number of undo tablespaces (not counting the system tablespace) */
+#define TRX_SYS_MAX_UNDO_SPACES (TRX_SYS_N_RSEGS - 1)
+
+/** Maximum length of MySQL binlog file name, in bytes. */
#define TRX_SYS_MYSQL_LOG_NAME_LEN 512
/** Contents of TRX_SYS_MYSQL_LOG_MAGIC_N_FLD */
#define TRX_SYS_MYSQL_LOG_MAGIC_N 873422344
@@ -543,19 +401,13 @@ We must remember this limit in order to keep file compatibility. */
#if UNIV_PAGE_SIZE_MIN < 4096
# error "UNIV_PAGE_SIZE_MIN < 4096"
#endif
-/** The offset of the MySQL replication info in the trx system header;
-this contains the same fields as TRX_SYS_MYSQL_LOG_INFO below */
-#define TRX_SYS_MYSQL_MASTER_LOG_INFO (UNIV_PAGE_SIZE - 2000)
-
/** The offset of the MySQL binlog offset info in the trx system header */
#define TRX_SYS_MYSQL_LOG_INFO (UNIV_PAGE_SIZE - 1000)
#define TRX_SYS_MYSQL_LOG_MAGIC_N_FLD 0 /*!< magic number which is
TRX_SYS_MYSQL_LOG_MAGIC_N
if we have valid data in the
MySQL binlog info */
-#define TRX_SYS_MYSQL_LOG_OFFSET_HIGH 4 /*!< high 4 bytes of the offset
- within that file */
-#define TRX_SYS_MYSQL_LOG_OFFSET_LOW 8 /*!< low 4 bytes of the offset
+#define TRX_SYS_MYSQL_LOG_OFFSET 4 /*!< the 64-bit offset
within that file */
#define TRX_SYS_MYSQL_LOG_NAME 12 /*!< MySQL log file name */
@@ -619,7 +471,7 @@ FIXED WSREP XID info offsets for 4k page size 10.0.32-galera
*/
#ifdef WITH_WSREP
/** The offset to WSREP XID headers */
-#define TRX_SYS_WSREP_XID_INFO (ut_max(UNIV_PAGE_SIZE - 3500, 1596))
+#define TRX_SYS_WSREP_XID_INFO std::max(srv_page_size - 3500, 1596UL)
#define TRX_SYS_WSREP_XID_MAGIC_N_FLD 0
#define TRX_SYS_WSREP_XID_MAGIC_N 0x77737265
@@ -700,77 +552,100 @@ identifier is added to this 64-bit constant. */
| TRX_SYS_FILE_FORMAT_TAG_MAGIC_N_LOW)
/* @} */
-#ifndef UNIV_HOTBACKUP
/** The transaction system central memory data structure. */
-struct trx_sys_t{
+struct trx_sys_t {
- ib_mutex_t mutex; /*!< mutex protecting most fields in
+ TrxSysMutex mutex; /*!< mutex protecting most fields in
this structure except when noted
otherwise */
- ulint n_prepared_trx; /*!< Number of transactions currently
- in the XA PREPARED state */
- ulint n_prepared_recovered_trx; /*!< Number of transactions
- currently in XA PREPARED state that are
- also recovered. Such transactions cannot
- be added during runtime. They can only
- occur after recovery if mysqld crashed
- while there were XA PREPARED
- transactions. We disable query cache
- if such transactions exist. */
- trx_id_t max_trx_id; /*!< The smallest number not yet
+
+ MVCC* mvcc; /*!< Multi version concurrency control
+ manager */
+ volatile trx_id_t
+ max_trx_id; /*!< The smallest number not yet
assigned as a transaction id or
- transaction number */
+ transaction number. This is declared
+ volatile because it can be accessed
+ without holding any mutex during
+ AC-NL-RO view creation. */
+ trx_ut_list_t serialisation_list;
+ /*!< Ordered on trx_t::no of all the
+ currenrtly active RW transactions */
#ifdef UNIV_DEBUG
- trx_id_t rw_max_trx_id; /*!< Max trx id of read-write transactions
- which exist or existed */
-#endif
- trx_list_t rw_trx_list; /*!< List of active and committed in
+ trx_id_t rw_max_trx_id; /*!< Max trx id of read-write
+ transactions which exist or existed */
+#endif /* UNIV_DEBUG */
+
+ /** Avoid false sharing */
+ const char pad1[CACHE_LINE_SIZE];
+ trx_ut_list_t rw_trx_list; /*!< List of active and committed in
memory read-write transactions, sorted
on trx id, biggest first. Recovered
transactions are always on this list. */
- trx_list_t ro_trx_list; /*!< List of active and committed in
- memory read-only transactions, sorted
- on trx id, biggest first. NOTE:
- The order for read-only transactions
- is not necessary. We should exploit
- this and increase concurrency during
- add/remove. */
- trx_list_t mysql_trx_list; /*!< List of transactions created
- for MySQL. All transactions on
- ro_trx_list are on mysql_trx_list. The
- rw_trx_list can contain system
- transactions and recovered transactions
- that will not be in the mysql_trx_list.
- There can be active non-locking
- auto-commit read only transactions that
- are on this list but not on ro_trx_list.
+
+ /** Avoid false sharing */
+ const char pad2[CACHE_LINE_SIZE];
+ trx_ut_list_t mysql_trx_list; /*!< List of transactions created
+ for MySQL. All user transactions are
+ on mysql_trx_list. The rw_trx_list
+ can contain system transactions and
+ recovered transactions that will not
+ be in the mysql_trx_list.
mysql_trx_list may additionally contain
transactions that have not yet been
started in InnoDB. */
- trx_rseg_t* const rseg_array[TRX_SYS_N_RSEGS];
+
+ trx_ids_t rw_trx_ids; /*!< Array of Read write transaction IDs
+ for MVCC snapshot. A ReadView would take
+ a snapshot of these transactions whose
+ changes are not visible to it. We should
+ remove transactions from the list before
+ committing in memory and releasing locks
+ to ensure right order of removal and
+ consistent snapshot. */
+
+ /** Avoid false sharing */
+ const char pad3[CACHE_LINE_SIZE];
+ /** Temporary rollback segments */
+ trx_rseg_t* temp_rsegs[TRX_SYS_N_RSEGS];
+ /** Avoid false sharing */
+ const char pad4[CACHE_LINE_SIZE];
+
+ trx_rseg_t* rseg_array[TRX_SYS_N_RSEGS];
/*!< Pointer array to rollback
segments; NULL if slot not in use;
created and destroyed in
single-threaded mode; not protected
by any mutex, because it is read-only
during multi-threaded operation */
- ulint rseg_history_len;/*!< Length of the TRX_RSEG_HISTORY
+ ulint rseg_history_len;
+ /*!< Length of the TRX_RSEG_HISTORY
list (update undo logs for committed
transactions), protected by
rseg->mutex */
- UT_LIST_BASE_NODE_T(read_view_t) view_list;
- /*!< List of read views sorted
- on trx no, biggest first */
+
+ TrxIdSet rw_trx_set; /*!< Mapping from transaction id
+ to transaction instance */
};
/** When a trx id which is zero modulo this number (which must be a power of
two) is assigned, the field TRX_SYS_TRX_ID_STORE on the transaction system
page is updated */
-#define TRX_SYS_TRX_ID_WRITE_MARGIN 256
-#endif /* !UNIV_HOTBACKUP */
+#define TRX_SYS_TRX_ID_WRITE_MARGIN ((trx_id_t) 256)
+
+/** Test if trx_sys->mutex is owned. */
+#define trx_sys_mutex_own() (trx_sys->mutex.is_owned())
+
+/** Acquire the trx_sys->mutex. */
+#define trx_sys_mutex_enter() do { \
+ mutex_enter(&trx_sys->mutex); \
+} while (0)
+
+/** Release the trx_sys->mutex. */
+#define trx_sys_mutex_exit() do { \
+ trx_sys->mutex.exit(); \
+} while (0)
-#ifndef UNIV_NONINL
#include "trx0sys.ic"
-#endif
#endif
diff --git a/storage/innobase/include/trx0sys.ic b/storage/innobase/include/trx0sys.ic
index 04c858118da..0a4d583671f 100644
--- a/storage/innobase/include/trx0sys.ic
+++ b/storage/innobase/include/trx0sys.ic
@@ -1,6 +1,7 @@
/*****************************************************************************
-Copyright (c) 1996, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1996, 2015, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2018, 2019, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -25,9 +26,8 @@ Created 3/26/1996 Heikki Tuuri
#include "trx0trx.h"
#include "data0type.h"
-#ifndef UNIV_HOTBACKUP
-# include "srv0srv.h"
-# include "mtr0log.h"
+#include "srv0srv.h"
+#include "mtr0log.h"
/* The typedef for rseg slot in the file copy */
typedef byte trx_sysf_rseg_t;
@@ -47,63 +47,41 @@ typedef byte trx_sysf_rseg_t;
/*****************************************************************//**
Writes the value of max_trx_id to the file based trx system header. */
-UNIV_INTERN
void
trx_sys_flush_max_trx_id(void);
/*==========================*/
-/***************************************************************//**
-Checks if a page address is the trx sys header page.
-@return TRUE if trx sys header page */
-UNIV_INLINE
-ibool
-trx_sys_hdr_page(
-/*=============*/
- ulint space, /*!< in: space */
- ulint page_no)/*!< in: page number */
+/** Checks if a page address is the trx sys header page.
+@param[in] page_id page id
+@return true if trx sys header page */
+inline bool trx_sys_hdr_page(const page_id_t page_id)
{
- if ((space == TRX_SYS_SPACE) && (page_no == TRX_SYS_PAGE_NO)) {
-
- return(TRUE);
- }
-
- return(FALSE);
-}
-
-/***************************************************************//**
-Gets the pointer in the nth slot of the rseg array.
-@return pointer to rseg object, NULL if slot not in use */
-UNIV_INLINE
-trx_rseg_t*
-trx_sys_get_nth_rseg(
-/*=================*/
- trx_sys_t* sys, /*!< in: trx system */
- ulint n) /*!< in: index of slot */
-{
- ut_ad(n < TRX_SYS_N_RSEGS);
-
- return(sys->rseg_array[n]);
+ return(page_id.space() == TRX_SYS_SPACE
+ && page_id.page_no() == TRX_SYS_PAGE_NO);
}
/**********************************************************************//**
Gets a pointer to the transaction system header and x-latches its page.
-@return pointer to system header, page x-latched. */
+@return pointer to system header, page x-latched. */
UNIV_INLINE
trx_sysf_t*
trx_sysf_get(
/*=========*/
mtr_t* mtr) /*!< in: mtr */
{
- buf_block_t* block;
- trx_sysf_t* header;
+ buf_block_t* block = NULL;
+ trx_sysf_t* header = NULL;
ut_ad(mtr);
- block = buf_page_get(TRX_SYS_SPACE, 0, TRX_SYS_PAGE_NO,
- RW_X_LATCH, mtr);
- buf_block_dbg_add_level(block, SYNC_TRX_SYS_HEADER);
+ block = buf_page_get(page_id_t(TRX_SYS_SPACE, TRX_SYS_PAGE_NO),
+ univ_page_size, RW_X_LATCH, mtr);
- header = TRX_SYS + buf_block_get_frame(block);
+ if (block) {
+ buf_block_dbg_add_level(block, SYNC_TRX_SYS_HEADER);
+
+ header = TRX_SYS + buf_block_get_frame(block);
+ }
return(header);
}
@@ -111,7 +89,7 @@ trx_sysf_get(
/*****************************************************************//**
Gets the space of the nth rollback segment slot in the trx system
file copy.
-@return space id */
+@return space id */
UNIV_INLINE
ulint
trx_sysf_rseg_get_space(
@@ -131,7 +109,7 @@ trx_sysf_rseg_get_space(
/*****************************************************************//**
Gets the page number of the nth rollback segment slot in the trx system
header.
-@return page number, FIL_NULL if slot unused */
+@return page number, FIL_NULL if slot unused */
UNIV_INLINE
ulint
trx_sysf_rseg_get_page_no(
@@ -192,7 +170,6 @@ trx_sysf_rseg_set_page_no(
page_no,
MLOG_4BYTES, mtr);
}
-#endif /* !UNIV_HOTBACKUP */
/*****************************************************************//**
Writes a trx id to an index page. In case that the id size changes in
@@ -208,15 +185,15 @@ trx_write_trx_id(
#if DATA_TRX_ID_LEN != 6
# error "DATA_TRX_ID_LEN != 6"
#endif
+ ut_ad(id > 0);
mach_write_to_6(ptr, id);
}
-#ifndef UNIV_HOTBACKUP
/*****************************************************************//**
Reads a trx id from an index page. In case that the id size changes in
some future version, this function should be used instead of
mach_read_...
-@return id */
+@return id */
UNIV_INLINE
trx_id_t
trx_read_trx_id(
@@ -232,7 +209,7 @@ trx_read_trx_id(
/****************************************************************//**
Looks for the trx handle with the given id in rw_trx_list.
The caller must be holding trx_sys->mutex.
-@return the trx handle or NULL if not found;
+@return the trx handle or NULL if not found;
the pointer must not be dereferenced unless lock_sys->mutex was
acquired before calling this function and is still being held */
UNIV_INLINE
@@ -241,57 +218,18 @@ trx_get_rw_trx_by_id(
/*=================*/
trx_id_t trx_id) /*!< in: trx id to search for */
{
- trx_t* trx;
- ulint len;
- trx_t* first;
-
- ut_ad(mutex_own(&trx_sys->mutex));
+ ut_ad(trx_id > 0);
+ ut_ad(trx_sys_mutex_own());
- len = UT_LIST_GET_LEN(trx_sys->rw_trx_list);
-
- if (len == 0) {
+ if (trx_sys->rw_trx_set.empty()) {
return(NULL);
}
- /* Because the list is ordered on trx id in descending order,
- we try to speed things up a bit. */
-
- trx = UT_LIST_GET_FIRST(trx_sys->rw_trx_list);
- assert_trx_in_rw_list(trx);
+ TrxIdSet::iterator it;
- if (trx_id == trx->id) {
- return(trx);
- } else if (len == 1 || trx_id > trx->id) {
- return(NULL);
- }
+ it = trx_sys->rw_trx_set.find(TrxTrack(trx_id));
- first = trx;
-
- trx = UT_LIST_GET_LAST(trx_sys->rw_trx_list);
- assert_trx_in_rw_list(trx);
-
- if (trx_id == trx->id) {
- return(trx);
- } else if (len == 2 || trx_id < trx->id) {
- return(NULL);
- }
-
- /* Search the list from the lower end (tail). */
- if (trx_id < (first->id + trx->id) >> 1) {
- for (trx = UT_LIST_GET_PREV(trx_list, trx);
- trx != NULL && trx_id > trx->id;
- trx = UT_LIST_GET_PREV(trx_list, trx)) {
- assert_trx_in_rw_list(trx);
- }
- } else {
- for (trx = UT_LIST_GET_NEXT(trx_list, first);
- trx != NULL && trx_id < trx->id;
- trx = UT_LIST_GET_NEXT(trx_list, trx)) {
- assert_trx_in_rw_list(trx);
- }
- }
-
- return((trx != NULL && trx->id == trx_id) ? trx : NULL);
+ return(it == trx_sys->rw_trx_set.end() ? NULL : it->m_trx);
}
/****************************************************************//**
@@ -299,18 +237,17 @@ Returns the minimum trx id in trx list. This is the smallest id for which
the trx can possibly be active. (But, you must look at the trx->state
to find out if the minimum trx id transaction itself is active, or already
committed.). The caller must be holding the trx_sys_t::mutex in shared mode.
-@return the minimum trx id, or trx_sys->max_trx_id if the trx list is empty */
+@return the minimum trx id, or trx_sys->max_trx_id if the trx list is empty */
UNIV_INLINE
trx_id_t
trx_rw_min_trx_id_low(void)
/*=======================*/
{
trx_id_t id;
- const trx_t* trx;
- ut_ad(mutex_own(&trx_sys->mutex));
+ ut_ad(trx_sys_mutex_own());
- trx = UT_LIST_GET_LAST(trx_sys->rw_trx_list);
+ const trx_t* trx = UT_LIST_GET_LAST(trx_sys->rw_trx_list);
if (trx == NULL) {
id = trx_sys->max_trx_id;
@@ -334,12 +271,12 @@ trx_assert_recovered(
{
const trx_t* trx;
- mutex_enter(&trx_sys->mutex);
+ trx_sys_mutex_enter();
trx = trx_get_rw_trx_by_id(trx_id);
ut_a(trx->is_recovered);
- mutex_exit(&trx_sys->mutex);
+ trx_sys_mutex_exit();
return(TRUE);
}
@@ -350,102 +287,100 @@ Returns the minimum trx id in rw trx list. This is the smallest id for which
the rw trx can possibly be active. (But, you must look at the trx->state
to find out if the minimum trx id transaction itself is active, or already
committed.)
-@return the minimum trx id, or trx_sys->max_trx_id if rw trx list is empty */
+@return the minimum trx id, or trx_sys->max_trx_id if rw trx list is empty */
UNIV_INLINE
trx_id_t
trx_rw_min_trx_id(void)
/*===================*/
{
- trx_id_t id;
+ trx_sys_mutex_enter();
- mutex_enter(&trx_sys->mutex);
+ trx_id_t id = trx_rw_min_trx_id_low();
- id = trx_rw_min_trx_id_low();
-
- mutex_exit(&trx_sys->mutex);
+ trx_sys_mutex_exit();
return(id);
}
-/****************************************************************//**
-Checks if a rw transaction with the given id is active. Caller must hold
-trx_sys->mutex. If the caller is not holding lock_sys->mutex, the
-transaction may already have been committed.
-@return transaction instance if active, or NULL;
-the pointer must not be dereferenced unless lock_sys->mutex was
-acquired before calling this function and is still being held */
-UNIV_INLINE
-trx_t*
-trx_rw_is_active_low(
-/*=================*/
- trx_id_t trx_id, /*!< in: trx id of the transaction */
- ibool* corrupt) /*!< in: NULL or pointer to a flag
- that will be set if corrupt */
+/** Look up a rw transaction with the given id.
+@param[in] trx_id transaction identifier
+@param[out] corrupt flag that will be set if trx_id is corrupted
+@return transaction; its state should be rechecked after acquiring trx_t::mutex
+@retval NULL if there is no transaction identified by trx_id. */
+inline trx_t* trx_rw_is_active_low(trx_id_t trx_id, bool* corrupt)
{
- trx_t* trx;
-
- ut_ad(mutex_own(&trx_sys->mutex));
+ ut_ad(trx_sys_mutex_own());
if (trx_id < trx_rw_min_trx_id_low()) {
-
- trx = NULL;
} else if (trx_id >= trx_sys->max_trx_id) {
/* There must be corruption: we let the caller handle the
diagnostic prints in this case. */
- trx = NULL;
if (corrupt != NULL) {
- *corrupt = TRUE;
- }
- } else {
- trx = trx_get_rw_trx_by_id(trx_id);
-
- if (trx != NULL
- && trx_state_eq(trx, TRX_STATE_COMMITTED_IN_MEMORY)) {
-
- trx = NULL;
+ *corrupt = true;
}
+ } else if (trx_t* trx = trx_get_rw_trx_by_id(trx_id)) {
+ return trx;
}
- return(trx);
+ return NULL;
}
-/****************************************************************//**
-Checks if a rw transaction with the given id is active. If the caller is
-not holding lock_sys->mutex, the transaction may already have been
-committed.
-@return transaction instance if active, or NULL;
-the pointer must not be dereferenced unless lock_sys->mutex was
-acquired before calling this function and is still being held */
-UNIV_INLINE
-trx_t*
-trx_rw_is_active(
-/*=============*/
- trx_id_t trx_id, /*!< in: trx id of the transaction */
- ibool* corrupt) /*!< in: NULL or pointer to a flag
- that will be set if corrupt */
+/** Look up a rw transaction with the given id.
+@param[in] trx_id transaction identifier
+@param[out] corrupt flag that will be set if trx_id is corrupted
+@param[in] ref_count whether to increment trx->n_ref
+@return transaction; its state should be rechecked after acquiring trx_t::mutex
+@retval NULL if there is no active transaction identified by trx_id. */
+inline trx_t* trx_rw_is_active(trx_id_t trx_id, bool* corrupt, bool ref_count)
{
- trx_t* trx;
-
- mutex_enter(&trx_sys->mutex);
-
- trx = trx_rw_is_active_low(trx_id, corrupt);
+ ut_ad(trx_id);
+
+ trx_sys_mutex_enter();
+
+ trx_t* trx = trx_rw_is_active_low(trx_id, corrupt);
+
+ if (trx && ref_count) {
+ TrxMutex* trx_mutex = &trx->mutex;
+ mutex_enter(trx_mutex);
+ ut_ad(!trx_state_eq(trx, TRX_STATE_NOT_STARTED));
+ ut_ad(trx->id == trx_id);
+ if (trx_state_eq(trx, TRX_STATE_COMMITTED_IN_MEMORY)) {
+ /* We have an early state check here to avoid
+ committer starvation in a wait loop for
+ transaction references, when there's a stream of
+ trx_rw_is_active() calls from other threads.
+ The trx->state may change to COMMITTED after
+ trx_mutex is released, and it will have to be
+ rechecked by the caller after reacquiring the mutex. */
+ trx = NULL;
+ } else {
+ /* The reference could be safely incremented after
+ releasing one of trx_mutex or trx_sys->mutex.
+ Holding trx->mutex here may prevent a few false
+ references that could have a negative performance
+ impact on trx_commit_in_memory(). */
+ trx->reference();
+ }
+ mutex_exit(trx_mutex);
+ }
- mutex_exit(&trx_sys->mutex);
+ trx_sys_mutex_exit();
return(trx);
}
/*****************************************************************//**
Allocates a new transaction id.
-@return new, allocated trx id */
+@return new, allocated trx id */
UNIV_INLINE
trx_id_t
-trx_sys_get_new_trx_id(void)
-/*========================*/
+trx_sys_get_new_trx_id()
+/*====================*/
{
- ut_ad(mutex_own(&trx_sys->mutex));
+ /* wsrep_fake_trx_id violates this assert */
+ ut_ad(trx_sys_mutex_own());
/* VERY important: after the database is started, max_trx_id value is
divisible by TRX_SYS_TRX_ID_WRITE_MARGIN, and the following if
@@ -454,7 +389,7 @@ trx_sys_get_new_trx_id(void)
Thus trx id values will not overlap when the database is
repeatedly started! */
- if (!(trx_sys->max_trx_id % (trx_id_t) TRX_SYS_TRX_ID_WRITE_MARGIN)) {
+ if (!(trx_sys->max_trx_id % TRX_SYS_TRX_ID_WRITE_MARGIN)) {
trx_sys_flush_max_trx_id();
}
@@ -471,24 +406,24 @@ trx_id_t
trx_sys_get_max_trx_id(void)
/*========================*/
{
-#if UNIV_WORD_SIZE < DATA_TRX_ID_LEN
- trx_id_t max_trx_id;
-#endif
-
- ut_ad(!mutex_own(&trx_sys->mutex));
+ ut_ad(!trx_sys_mutex_own());
#if UNIV_WORD_SIZE < DATA_TRX_ID_LEN
/* Avoid torn reads. */
- mutex_enter(&trx_sys->mutex);
- max_trx_id = trx_sys->max_trx_id;
- mutex_exit(&trx_sys->mutex);
+
+ trx_sys_mutex_enter();
+
+ trx_id_t max_trx_id = trx_sys->max_trx_id;
+
+ trx_sys_mutex_exit();
+
return(max_trx_id);
#else
/* Perform a dirty read. Callers should be prepared for stale
values, and we know that the value fits in a machine word, so
that it will be read and written atomically. */
return(trx_sys->max_trx_id);
-#endif
+#endif /* UNIV_WORD_SIZE < DATA_TRX_ID_LEN */
}
/*****************************************************************//**
@@ -501,12 +436,24 @@ trx_sys_get_n_rw_trx(void)
{
ulint n_trx;
- mutex_enter(&trx_sys->mutex);
+ trx_sys_mutex_enter();
n_trx = UT_LIST_GET_LEN(trx_sys->rw_trx_list);
- mutex_exit(&trx_sys->mutex);
+ trx_sys_mutex_exit();
return(n_trx);
}
-#endif /* !UNIV_HOTBACKUP */
+
+/**
+Add the transaction to the RW transaction set
+@param trx transaction instance to add */
+UNIV_INLINE
+void
+trx_sys_rw_trx_add(trx_t* trx)
+{
+ ut_ad(trx->id != 0);
+
+ trx_sys->rw_trx_set.insert(TrxTrack(trx->id, trx));
+ ut_d(trx->in_rw_trx_list = true);
+}
diff --git a/storage/innobase/include/trx0trx.h b/storage/innobase/include/trx0trx.h
index 9aa3daea4e1..60e6fc58089 100644
--- a/storage/innobase/include/trx0trx.h
+++ b/storage/innobase/include/trx0trx.h
@@ -27,33 +27,25 @@ Created 3/26/1996 Heikki Tuuri
#ifndef trx0trx_h
#define trx0trx_h
-#include "univ.i"
#include "trx0types.h"
-#include "dict0types.h"
-#ifndef UNIV_HOTBACKUP
#include "lock0types.h"
-#include "log0log.h"
-#include "usr0types.h"
#include "que0types.h"
#include "mem0mem.h"
-#include "read0types.h"
#include "trx0xa.h"
#include "ut0vec.h"
#include "fts0fts.h"
-/** Dummy session used currently in MySQL interface */
-extern sess_t* trx_dummy_sess;
+#include <vector>
+#include <set>
+
+// Forward declaration
+struct mtr_t;
+class ReadView;
+class FlushObserver;
+class ut_stage_alter_t;
-/********************************************************************//**
-Releases the search latch if trx has reserved it. */
-UNIV_INLINE
-void
-trx_search_latch_release_if_reserved(
-/*=================================*/
- trx_t* trx); /*!< in: transaction */
/******************************************************************//**
Set detailed error message for the transaction. */
-UNIV_INTERN
void
trx_set_detailed_error(
/*===================*/
@@ -62,7 +54,6 @@ trx_set_detailed_error(
/*************************************************************//**
Set detailed error message for the transaction from a file. Note that the
file is rewinded before reading from it. */
-UNIV_INTERN
void
trx_set_detailed_error_from_file(
/*=============================*/
@@ -70,7 +61,7 @@ trx_set_detailed_error_from_file(
FILE* file); /*!< in: file to read message from */
/****************************************************************//**
Retrieves the error_info field from a trx.
-@return the error info */
+@return the error info */
UNIV_INLINE
const dict_index_t*
trx_get_error_info(
@@ -78,108 +69,139 @@ trx_get_error_info(
const trx_t* trx); /*!< in: trx object */
/********************************************************************//**
Creates a transaction object for MySQL.
-@return own: transaction object */
-UNIV_INTERN
+@return own: transaction object */
trx_t*
trx_allocate_for_mysql(void);
/*========================*/
/********************************************************************//**
Creates a transaction object for background operations by the master thread.
-@return own: transaction object */
-UNIV_INTERN
+@return own: transaction object */
trx_t*
trx_allocate_for_background(void);
/*=============================*/
-/********************************************************************//**
-Frees a transaction object of a background operation of the master thread. */
-UNIV_INTERN
+
+/** Frees and initialize a transaction object instantinated during recovery.
+@param trx trx object to free and initialize during recovery */
void
-trx_free_for_background(
-/*====================*/
- trx_t* trx); /*!< in, own: trx object */
+trx_free_resurrected(trx_t* trx);
+
+/** Free a transaction that was allocated by background or user threads.
+@param trx trx object to free */
+void
+trx_free_for_background(trx_t* trx);
+
/********************************************************************//**
At shutdown, frees a transaction object that is in the PREPARED state. */
-UNIV_INTERN
void
trx_free_prepared(
/*==============*/
- trx_t* trx) /*!< in, own: trx object */
- UNIV_COLD MY_ATTRIBUTE((nonnull));
-/********************************************************************//**
-Frees a transaction object for MySQL. */
-UNIV_INTERN
-void
-trx_free_for_mysql(
-/*===============*/
trx_t* trx); /*!< in, own: trx object */
-/****************************************************************//**
-Creates trx objects for transactions and initializes the trx list of
-trx_sys at database start. Rollback segment and undo log lists must
-already exist when this function is called, because the lists of
-transactions to be rolled back or cleaned up are built based on the
-undo log lists. */
-UNIV_INTERN
+
+/** Free a transaction object for MySQL.
+@param[in,out] trx transaction */
void
-trx_lists_init_at_db_start(void);
-/*============================*/
+trx_free_for_mysql(trx_t* trx);
-#ifdef UNIV_DEBUG
-#define trx_start_if_not_started_xa(t) \
- { \
- (t)->start_line = __LINE__; \
- (t)->start_file = __FILE__; \
- trx_start_if_not_started_xa_low((t)); \
- }
-#else
-#define trx_start_if_not_started_xa(t) \
- trx_start_if_not_started_xa_low((t))
-#endif /* UNIV_DEBUG */
+/** Disconnect a transaction from MySQL.
+@param[in,out] trx transaction */
+void
+trx_disconnect_plain(trx_t* trx);
+
+/** Disconnect a prepared transaction from MySQL.
+@param[in,out] trx transaction */
+void
+trx_disconnect_prepared(trx_t* trx);
+
+/** Initialize (resurrect) transactions at startup. */
+void
+trx_lists_init_at_db_start();
/*************************************************************//**
Starts the transaction if it is not yet started. */
-UNIV_INTERN
void
trx_start_if_not_started_xa_low(
/*============================*/
- trx_t* trx); /*!< in: transaction */
+ trx_t* trx, /*!< in/out: transaction */
+ bool read_write); /*!< in: true if read write transaction */
/*************************************************************//**
Starts the transaction if it is not yet started. */
-UNIV_INTERN
void
trx_start_if_not_started_low(
/*=========================*/
- trx_t* trx); /*!< in: transaction */
+ trx_t* trx, /*!< in/out: transaction */
+ bool read_write); /*!< in: true if read write transaction */
+
+/*************************************************************//**
+Starts a transaction for internal processing. */
+void
+trx_start_internal_low(
+/*===================*/
+ trx_t* trx); /*!< in/out: transaction */
+
+/** Starts a read-only transaction for internal processing.
+@param[in,out] trx transaction to be started */
+void
+trx_start_internal_read_only_low(
+ trx_t* trx);
#ifdef UNIV_DEBUG
-#define trx_start_if_not_started(t) \
- { \
+#define trx_start_if_not_started_xa(t, rw) \
+ do { \
(t)->start_line = __LINE__; \
(t)->start_file = __FILE__; \
- trx_start_if_not_started_low((t)); \
- }
+ trx_start_if_not_started_xa_low((t), rw); \
+ } while (false)
+
+#define trx_start_if_not_started(t, rw) \
+ do { \
+ (t)->start_line = __LINE__; \
+ (t)->start_file = __FILE__; \
+ trx_start_if_not_started_low((t), rw); \
+ } while (false)
+
+#define trx_start_internal(t) \
+ do { \
+ (t)->start_line = __LINE__; \
+ (t)->start_file = __FILE__; \
+ trx_start_internal_low((t)); \
+ } while (false)
+
+#define trx_start_internal_read_only(t) \
+ do { \
+ (t)->start_line = __LINE__; \
+ (t)->start_file = __FILE__; \
+ trx_start_internal_read_only_low(t); \
+ } while (false)
#else
-#define trx_start_if_not_started(t) \
- trx_start_if_not_started_low((t))
+#define trx_start_if_not_started(t, rw) \
+ trx_start_if_not_started_low((t), rw)
+
+#define trx_start_internal(t) \
+ trx_start_internal_low((t))
+
+#define trx_start_internal_read_only(t) \
+ trx_start_internal_read_only_low(t)
+
+#define trx_start_if_not_started_xa(t, rw) \
+ trx_start_if_not_started_xa_low((t), (rw))
#endif /* UNIV_DEBUG */
/*************************************************************//**
Starts the transaction for a DDL operation. */
-UNIV_INTERN
void
trx_start_for_ddl_low(
/*==================*/
trx_t* trx, /*!< in/out: transaction */
- trx_dict_op_t op) /*!< in: dictionary operation type */
- MY_ATTRIBUTE((nonnull));
+ trx_dict_op_t op); /*!< in: dictionary operation type */
#ifdef UNIV_DEBUG
#define trx_start_for_ddl(t, o) \
- { \
+ do { \
ut_ad((t)->start_file == 0); \
(t)->start_line = __LINE__; \
(t)->start_file = __FILE__; \
trx_start_for_ddl_low((t), (o)); \
- }
+ } while (0)
#else
#define trx_start_for_ddl(t, o) \
trx_start_for_ddl_low((t), (o))
@@ -187,79 +209,62 @@ trx_start_for_ddl_low(
/****************************************************************//**
Commits a transaction. */
-UNIV_INTERN
void
trx_commit(
/*=======*/
- trx_t* trx) /*!< in/out: transaction */
- MY_ATTRIBUTE((nonnull));
+ trx_t* trx); /*!< in/out: transaction */
+
/****************************************************************//**
Commits a transaction and a mini-transaction. */
-UNIV_INTERN
void
trx_commit_low(
/*===========*/
trx_t* trx, /*!< in/out: transaction */
- mtr_t* mtr) /*!< in/out: mini-transaction (will be committed),
+ mtr_t* mtr); /*!< in/out: mini-transaction (will be committed),
or NULL if trx made no modifications */
- MY_ATTRIBUTE((nonnull(1)));
/****************************************************************//**
Cleans up a transaction at database startup. The cleanup is needed if
the transaction already got to the middle of a commit when the database
crashed, and we cannot roll it back. */
-UNIV_INTERN
void
trx_cleanup_at_db_startup(
/*======================*/
trx_t* trx); /*!< in: transaction */
/**********************************************************************//**
Does the transaction commit for MySQL.
-@return DB_SUCCESS or error number */
-UNIV_INTERN
+@return DB_SUCCESS or error number */
dberr_t
trx_commit_for_mysql(
/*=================*/
trx_t* trx); /*!< in/out: transaction */
-/**********************************************************************//**
-Does the transaction prepare for MySQL. */
-UNIV_INTERN
-void
-trx_prepare_for_mysql(
-/*==================*/
- trx_t* trx); /*!< in/out: trx handle */
+/** XA PREPARE a transaction.
+@param[in,out] trx transaction to prepare */
+void trx_prepare_for_mysql(trx_t* trx);
/**********************************************************************//**
This function is used to find number of prepared transactions and
their transaction objects for a recovery.
-@return number of prepared transactions */
-UNIV_INTERN
+@return number of prepared transactions */
int
trx_recover_for_mysql(
/*==================*/
XID* xid_list, /*!< in/out: prepared transactions */
ulint len); /*!< in: number of slots in xid_list */
-/*******************************************************************//**
-This function is used to find one X/Open XA distributed transaction
-which is in the prepared state
-@return trx or NULL; on match, the trx->xid will be invalidated;
-note that the trx may have been committed, unless the caller is
-holding lock_sys->mutex */
-UNIV_INTERN
-trx_t *
-trx_get_trx_by_xid(
-/*===============*/
- const XID* xid); /*!< in: X/Open XA transaction identifier */
+/** Look up an X/Open distributed transaction in XA PREPARE state.
+@param[in] xid X/Open XA transaction identifier
+@return transaction on match (the trx_t::xid will be invalidated);
+note that the trx may have been committed before the caller acquires
+trx_t::mutex
+@retval NULL if no match */
+trx_t* trx_get_trx_by_xid(const XID* xid);
/**********************************************************************//**
If required, flushes the log to disk if we called trx_commit_for_mysql()
with trx->flush_log_later == TRUE. */
-UNIV_INTERN
void
trx_commit_complete_for_mysql(
/*==========================*/
- trx_t* trx) /*!< in/out: transaction */
- MY_ATTRIBUTE((nonnull));
+ trx_t* trx); /*!< in/out: transaction */
/**********************************************************************//**
Marks the latest SQL statement ended. */
-UNIV_INTERN
void
trx_mark_sql_stat_end(
/*==================*/
@@ -267,32 +272,44 @@ trx_mark_sql_stat_end(
/********************************************************************//**
Assigns a read view for a consistent read query. All the consistent reads
within the same transaction will get the same read view, which is created
-when this function is first called for a new started transaction.
-@return consistent read view */
-UNIV_INTERN
-read_view_t*
+when this function is first called for a new started transaction. */
+ReadView*
trx_assign_read_view(
/*=================*/
trx_t* trx); /*!< in: active transaction */
+
+/****************************************************************//**
+@return the transaction's read view or NULL if one not assigned. */
+UNIV_INLINE
+ReadView*
+trx_get_read_view(
+/*==============*/
+ trx_t* trx);
+
+/****************************************************************//**
+@return the transaction's read view or NULL if one not assigned. */
+UNIV_INLINE
+const ReadView*
+trx_get_read_view(
+/*==============*/
+ const trx_t* trx);
+
/****************************************************************//**
Prepares a transaction for commit/rollback. */
-UNIV_INTERN
void
trx_commit_or_rollback_prepare(
/*===========================*/
trx_t* trx); /*!< in/out: transaction */
/*********************************************************************//**
Creates a commit command node struct.
-@return own: commit node struct */
-UNIV_INTERN
+@return own: commit node struct */
commit_node_t*
trx_commit_node_create(
/*===================*/
mem_heap_t* heap); /*!< in: mem heap where created */
/***********************************************************//**
Performs an execution step for a commit type node in a query graph.
-@return query thread to run next, or NULL */
-UNIV_INTERN
+@return query thread to run next, or NULL */
que_thr_t*
trx_commit_step(
/*============*/
@@ -301,7 +318,6 @@ trx_commit_step(
/**********************************************************************//**
Prints info about a transaction.
Caller must hold trx_sys->mutex. */
-UNIV_INTERN
void
trx_print_low(
/*==========*/
@@ -316,23 +332,20 @@ trx_print_low(
/*!< in: lock_number_of_rows_locked(&trx->lock) */
ulint n_trx_locks,
/*!< in: length of trx->lock.trx_locks */
- ulint heap_size)
+ ulint heap_size);
/*!< in: mem_heap_get_size(trx->lock.lock_heap) */
- MY_ATTRIBUTE((nonnull));
/**********************************************************************//**
Prints info about a transaction.
The caller must hold lock_sys->mutex and trx_sys->mutex.
When possible, use trx_print() instead. */
-UNIV_INTERN
void
trx_print_latched(
/*==============*/
FILE* f, /*!< in: output stream */
const trx_t* trx, /*!< in: transaction */
- ulint max_query_len) /*!< in: max query length to print,
+ ulint max_query_len); /*!< in: max query length to print,
or 0 to use the default max length */
- MY_ATTRIBUTE((nonnull));
#ifdef WITH_WSREP
/**********************************************************************//**
@@ -344,7 +357,6 @@ without locking lock_sys->mutex. */
UNIV_INTERN
void
wsrep_trx_print_locking(
-/*==============*/
FILE* f, /*!< in: output stream */
const trx_t* trx, /*!< in: transaction */
ulint max_query_len) /*!< in: max query length to print,
@@ -354,25 +366,23 @@ wsrep_trx_print_locking(
/**********************************************************************//**
Prints info about a transaction.
Acquires and releases lock_sys->mutex and trx_sys->mutex. */
-UNIV_INTERN
void
trx_print(
/*======*/
FILE* f, /*!< in: output stream */
const trx_t* trx, /*!< in: transaction */
- ulint max_query_len) /*!< in: max query length to print,
+ ulint max_query_len); /*!< in: max query length to print,
or 0 to use the default max length */
- MY_ATTRIBUTE((nonnull));
/**********************************************************************//**
Determine if a transaction is a dictionary operation.
-@return dictionary operation mode */
+@return dictionary operation mode */
UNIV_INLINE
enum trx_dict_op_t
trx_get_dict_operation(
/*===================*/
const trx_t* trx) /*!< in: transaction */
- MY_ATTRIBUTE((pure));
+ MY_ATTRIBUTE((warn_unused_result));
/**********************************************************************//**
Flag a transaction a dictionary operation. */
UNIV_INLINE
@@ -383,17 +393,14 @@ trx_set_dict_operation(
enum trx_dict_op_t op); /*!< in: operation, not
TRX_DICT_OP_NONE */
-#ifndef UNIV_HOTBACKUP
/**********************************************************************//**
Determines if a transaction is in the given state.
The caller must hold trx_sys->mutex, or it must be the thread
that is serving a running transaction.
-A running transaction must be in trx_sys->ro_trx_list or trx_sys->rw_trx_list
-unless it is a non-locking autocommit read only transaction, which is only
-in trx_sys->mysql_trx_list.
-@return TRUE if trx->state == state */
+A running RW transaction must be in trx_sys->rw_trx_list.
+@return TRUE if trx->state == state */
UNIV_INLINE
-ibool
+bool
trx_state_eq(
/*=========*/
const trx_t* trx, /*!< in: transaction */
@@ -411,53 +418,38 @@ trx_state_eq(
Asserts that a transaction has been started.
The caller must hold trx_sys->mutex.
@return TRUE if started */
-UNIV_INTERN
ibool
trx_assert_started(
/*===============*/
const trx_t* trx) /*!< in: transaction */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
+ MY_ATTRIBUTE((warn_unused_result));
# endif /* UNIV_DEBUG */
/**********************************************************************//**
Determines if the currently running transaction has been interrupted.
-@return TRUE if interrupted */
-UNIV_INTERN
+@return TRUE if interrupted */
ibool
trx_is_interrupted(
/*===============*/
const trx_t* trx); /*!< in: transaction */
-/**********************************************************************//**
-Determines if the currently running transaction is in strict mode.
-@return TRUE if strict */
-UNIV_INTERN
-ibool
-trx_is_strict(
-/*==========*/
- trx_t* trx); /*!< in: transaction */
-#else /* !UNIV_HOTBACKUP */
-#define trx_is_interrupted(trx) FALSE
-#endif /* !UNIV_HOTBACKUP */
/*******************************************************************//**
Calculates the "weight" of a transaction. The weight of one transaction
is estimated as the number of altered rows + the number of locked rows.
-@param t transaction
-@return transaction weight */
+@param t transaction
+@return transaction weight */
#define TRX_WEIGHT(t) ((t)->undo_no + UT_LIST_GET_LEN((t)->lock.trx_locks))
/*******************************************************************//**
Compares the "weight" (or size) of two transactions. Transactions that
have edited non-transactional tables are considered heavier than ones
that have not.
-@return TRUE if weight(a) >= weight(b) */
-UNIV_INTERN
-ibool
+@return true if weight(a) >= weight(b) */
+bool
trx_weight_ge(
/*==========*/
- const trx_t* a, /*!< in: the first transaction to be compared */
- const trx_t* b); /*!< in: the second transaction to be compared */
-
+ const trx_t* a, /*!< in: the transaction to be compared */
+ const trx_t* b); /*!< in: the transaction to be compared */
/* Maximum length of a string that can be returned by
trx_get_que_state_str(). */
#define TRX_QUE_STATE_STR_MAX_LEN 12 /* "ROLLING BACK" */
@@ -465,64 +457,82 @@ trx_get_que_state_str(). */
/*******************************************************************//**
Retrieves transaction's que state in a human readable string. The string
should not be free()'d or modified.
-@return string in the data segment */
+@return string in the data segment */
UNIV_INLINE
const char*
trx_get_que_state_str(
/*==================*/
const trx_t* trx); /*!< in: transaction */
-/****************************************************************//**
-Assign a read-only transaction a rollback-segment, if it is attempting
-to write to a TEMPORARY table. */
-UNIV_INTERN
+/** Retreieves the transaction ID.
+In a given point in time it is guaranteed that IDs of the running
+transactions are unique. The values returned by this function for readonly
+transactions may be reused, so a subsequent RO transaction may get the same ID
+as a RO transaction that existed in the past. The values returned by this
+function should be used for printing purposes only.
+@param[in] trx transaction whose id to retrieve
+@return transaction id */
+UNIV_INLINE
+trx_id_t
+trx_get_id_for_print(
+ const trx_t* trx);
+
+/** Create the trx_t pool */
void
-trx_assign_rseg(
-/*============*/
- trx_t* trx); /*!< A read-only transaction that
- needs to be assigned a RBS. */
-/*******************************************************************//**
+trx_pool_init();
+
+/** Destroy the trx_t pool */
+void
+trx_pool_close();
+
+/**
+Set the transaction as a read-write transaction if it is not already
+tagged as such.
+@param[in,out] trx Transaction that needs to be "upgraded" to RW from RO */
+void
+trx_set_rw_mode(
+ trx_t* trx);
+
+/**
Transactions that aren't started by the MySQL server don't set
the trx_t::mysql_thd field. For such transactions we set the lock
wait timeout to 0 instead of the user configured value that comes
from innodb_lock_wait_timeout via trx_t::mysql_thd.
-@param trx transaction
-@return lock wait timeout in seconds */
-#define trx_lock_wait_timeout_get(trx) \
- ((trx)->mysql_thd != NULL \
- ? thd_lock_wait_timeout((trx)->mysql_thd) \
+@param trx transaction
+@return lock wait timeout in seconds */
+#define trx_lock_wait_timeout_get(t) \
+ ((t)->mysql_thd != NULL \
+ ? thd_lock_wait_timeout((t)->mysql_thd) \
: 0)
-/*******************************************************************//**
+/**
Determine if the transaction is a non-locking autocommit select
(implied read-only).
-@param t transaction
-@return true if non-locking autocommit select transaction. */
+@param t transaction
+@return true if non-locking autocommit select transaction. */
#define trx_is_autocommit_non_locking(t) \
((t)->auto_commit && (t)->will_lock == 0)
-/*******************************************************************//**
+/**
Determine if the transaction is a non-locking autocommit select
with an explicit check for the read-only status.
-@param t transaction
-@return true if non-locking autocommit read-only transaction. */
+@param t transaction
+@return true if non-locking autocommit read-only transaction. */
#define trx_is_ac_nl_ro(t) \
((t)->read_only && trx_is_autocommit_non_locking((t)))
-/*******************************************************************//**
+/**
Assert that the transaction is in the trx_sys_t::rw_trx_list */
#define assert_trx_in_rw_list(t) do { \
ut_ad(!(t)->read_only); \
- assert_trx_in_list(t); \
+ ut_ad((t)->in_rw_trx_list \
+ == !((t)->read_only || !(t)->rsegs.m_redo.rseg)); \
+ check_trx_state(t); \
} while (0)
-/*******************************************************************//**
-Assert that the transaction is either in trx_sys->ro_trx_list or
-trx_sys->rw_trx_list but not both and it cannot be an autocommit
-non-locking select */
-#define assert_trx_in_list(t) do { \
- ut_ad((t)->in_ro_trx_list == (t)->read_only); \
- ut_ad((t)->in_rw_trx_list == !(t)->read_only); \
+/**
+Check transaction state */
+#define check_trx_state(t) do { \
ut_ad(!trx_is_autocommit_non_locking((t))); \
switch ((t)->state) { \
case TRX_STATE_PREPARED: \
@@ -536,10 +546,34 @@ non-locking select */
ut_error; \
} while (0)
+/** Check if transaction is free so that it can be re-initialized.
+@param t transaction handle */
+#define assert_trx_is_free(t) do { \
+ ut_ad(trx_state_eq((t), TRX_STATE_NOT_STARTED)); \
+ ut_ad(!(t)->id); \
+ ut_ad(!(t)->has_logged()); \
+ ut_ad(!(t)->is_referenced()); \
+ ut_ad(!MVCC::is_view_active((t)->read_view)); \
+ ut_ad((t)->lock.wait_thr == NULL); \
+ ut_ad(UT_LIST_GET_LEN((t)->lock.trx_locks) == 0); \
+ ut_ad((t)->lock.table_locks.empty()); \
+ ut_ad(!(t)->autoinc_locks \
+ || ib_vector_is_empty((t)->autoinc_locks)); \
+ ut_ad((t)->dict_operation == TRX_DICT_OP_NONE); \
+} while(0)
+
+/** Check if transaction is in-active so that it can be freed and put back to
+transaction pool.
+@param t transaction handle */
+#define assert_trx_is_inactive(t) do { \
+ assert_trx_is_free((t)); \
+ ut_ad((t)->dict_operation_lock_mode == 0); \
+} while(0)
+
#ifdef UNIV_DEBUG
/*******************************************************************//**
Assert that an autocommit non-locking select cannot be in the
-ro_trx_list nor the rw_trx_list and that it is a read-only transaction.
+rw_trx_list and that it is a read-only transaction.
The tranasction must be in the mysql_trx_list. */
# define assert_trx_nonlocking_or_in_list(t) \
do { \
@@ -547,23 +581,24 @@ The tranasction must be in the mysql_trx_list. */
trx_state_t t_state = (t)->state; \
ut_ad((t)->read_only); \
ut_ad(!(t)->is_recovered); \
- ut_ad(!(t)->in_ro_trx_list); \
ut_ad(!(t)->in_rw_trx_list); \
ut_ad((t)->in_mysql_trx_list); \
ut_ad(t_state == TRX_STATE_NOT_STARTED \
|| t_state == TRX_STATE_ACTIVE); \
} else { \
- assert_trx_in_list(t); \
+ check_trx_state(t); \
} \
} while (0)
#else /* UNIV_DEBUG */
/*******************************************************************//**
Assert that an autocommit non-locking slect cannot be in the
-ro_trx_list nor the rw_trx_list and that it is a read-only transaction.
+rw_trx_list and that it is a read-only transaction.
The tranasction must be in the mysql_trx_list. */
# define assert_trx_nonlocking_or_in_list(trx) ((void)0)
#endif /* UNIV_DEBUG */
+typedef std::vector<ib_lock_t*, ut_allocator<ib_lock_t*> > lock_list;
+
/*******************************************************************//**
Latching protocol for trx_lock_t::que_state. trx_lock_t::que_state
captures the state of the query thread during the execution of a query.
@@ -606,12 +641,12 @@ struct trx_lock_t {
ib_uint64_t deadlock_mark; /*!< A mark field that is initialized
to and checked against lock_mark_counter
by lock_deadlock_recursive(). */
- ibool was_chosen_as_deadlock_victim;
+ bool was_chosen_as_deadlock_victim;
/*!< when the transaction decides to
- wait for a lock, it sets this to FALSE;
+ wait for a lock, it sets this to false;
if another transaction chooses this
transaction as a victim in deadlock
- resolution, it sets this to TRUE.
+ resolution, it sets this to true.
Protected by trx->mutex. */
time_t wait_started; /*!< lock wait started at this time,
protected only by lock_sys->mutex */
@@ -624,20 +659,32 @@ struct trx_lock_t {
only be modified by the thread that is
serving the running transaction. */
+ /** Pre-allocated record locks */
+ struct {
+ ib_lock_t lock; byte pad[256];
+ } rec_pool[8];
+
+ /** Pre-allocated table locks */
+ ib_lock_t table_pool[8];
+
+ /** Next available rec_pool[] entry */
+ unsigned rec_cached;
+
+ /** Next available table_pool[] entry */
+ unsigned table_cached;
+
mem_heap_t* lock_heap; /*!< memory heap for trx_locks;
protected by lock_sys->mutex */
- UT_LIST_BASE_NODE_T(lock_t)
- trx_locks; /*!< locks requested
- by the transaction;
+ trx_lock_list_t trx_locks; /*!< locks requested by the transaction;
insertions are protected by trx->mutex
and lock_sys->mutex; removals are
protected by lock_sys->mutex */
- ib_vector_t* table_locks; /*!< All table locks requested by this
+ lock_list table_locks; /*!< All table locks requested by this
transaction, including AUTOINC locks */
- ibool cancel; /*!< TRUE if the transaction is being
+ bool cancel; /*!< true if the transaction is being
rolled back either via deadlock
detection or due to lock timeout. The
caller has to acquire the trx_t::mutex
@@ -648,9 +695,17 @@ struct trx_lock_t {
mutex to prevent recursive deadlocks.
Protected by both the lock sys mutex
and the trx_t::mutex. */
+ ulint n_rec_locks; /*!< number of rec locks in this trx */
};
-#define TRX_MAGIC_N 91118598
+/** Type used to store the list of tables that are modified by a given
+transaction. We store pointers to the table objects in memory because
+we know that a table object will not be destroyed while a transaction
+that modified it is running. */
+typedef std::set<
+ dict_table_t*,
+ std::less<dict_table_t*>,
+ ut_allocator<dict_table_t*> > trx_mod_tables_t;
/** The transaction handle
@@ -680,8 +735,8 @@ so without holding any mutex. The following are exceptions to this:
* trx_rollback_resurrected() may access resurrected (connectionless)
transactions while the system is already processing new user
-transactions. The trx_sys->mutex prevents a race condition between it
-and lock_trx_release_locks() [invoked by trx_commit()].
+transactions. The trx_sys->mutex and trx->is_recovered prevent
+a race condition between it and trx_commit().
* trx_print_low() may access transactions not associated with the current
thread. The caller must be holding trx_sys->mutex and lock_sys->mutex.
@@ -693,23 +748,67 @@ holding trx_sys->mutex exclusively.
* The locking code (in particular, lock_deadlock_recursive() and
lock_rec_convert_impl_to_expl()) will access transactions associated
to other connections. The locks of transactions are protected by
-lock_sys->mutex and sometimes by trx->mutex. */
+lock_sys->mutex (insertions also by trx->mutex). */
-enum trx_abort_t {
- TRX_SERVER_ABORT = 0,
-#ifdef WITH_WSREP
- TRX_WSREP_ABORT,
-#endif
- TRX_REPLICATION_ABORT
+/** Represents an instance of rollback segment along with its state variables.*/
+struct trx_undo_ptr_t {
+ trx_rseg_t* rseg; /*!< rollback segment assigned to the
+ transaction, or NULL if not assigned
+ yet */
+ trx_undo_t* insert_undo; /*!< pointer to the insert undo log, or
+ NULL if no inserts performed yet */
+ trx_undo_t* update_undo; /*!< pointer to the update undo log, or
+ NULL if no update performed yet */
};
-struct trx_t{
- ulint magic_n;
+/** An instance of temporary rollback segment. */
+struct trx_temp_undo_t {
+ /** temporary rollback segment, or NULL if not assigned yet */
+ trx_rseg_t* rseg;
+ /** pointer to the undo log, or NULL if nothing logged yet */
+ trx_undo_t* undo;
+};
+
+/** Rollback segments assigned to a transaction for undo logging. */
+struct trx_rsegs_t {
+ /** undo log ptr holding reference to a rollback segment that resides in
+ system/undo tablespace used for undo logging of tables that needs
+ to be recovered on crash. */
+ trx_undo_ptr_t m_redo;
+
+ /** undo log for temporary tables; discarded immediately after
+ transaction commit/rollback */
+ trx_temp_undo_t m_noredo;
+};
- ib_mutex_t mutex; /*!< Mutex protecting the fields
- state and lock
- (except some fields of lock, which
- are protected by lock_sys->mutex) */
+struct trx_t {
+private:
+ /**
+ Count of references.
+
+ We can't release the locks nor commit the transaction until this reference
+ is 0. We can change the state to TRX_STATE_COMMITTED_IN_MEMORY to signify
+ that it is no longer "active".
+ */
+
+ int32_t n_ref;
+
+
+public:
+ TrxMutex mutex; /*!< Mutex protecting the fields
+ state and lock (except some fields
+ of lock, which are protected by
+ lock_sys->mutex) */
+
+ trx_id_t id; /*!< transaction id */
+
+ trx_id_t no; /*!< transaction serialization number:
+ max trx id shortly before the
+ transaction is moved to
+ COMMITTED_IN_MEMORY state.
+ Protected by trx_sys_t::mutex
+ when trx->in_rw_trx_list. Initially
+ set to TRX_ID_MAX. */
/** State of the trx from the point of view of concurrency control
and the valid state transitions.
@@ -736,47 +835,44 @@ struct trx_t{
Recovered XA:
* NOT_STARTED -> PREPARED -> COMMITTED -> (freed)
- XA (2PC) (shutdown before ROLLBACK or COMMIT):
+ XA (2PC) (shutdown or disconnect before ROLLBACK or COMMIT):
* NOT_STARTED -> PREPARED -> (freed)
+ Disconnected XA can become recovered:
+ * ... -> ACTIVE -> PREPARED (connected) -> PREPARED (disconnected)
+ Disconnected means from mysql e.g due to the mysql client disconnection.
Latching and various transaction lists membership rules:
XA (2PC) transactions are always treated as non-autocommit.
Transitions to ACTIVE or NOT_STARTED occur when
- !in_rw_trx_list and !in_ro_trx_list (no trx_sys->mutex needed).
+ !in_rw_trx_list (no trx_sys->mutex needed).
Autocommit non-locking read-only transactions move between states
- without holding any mutex. They are !in_rw_trx_list, !in_ro_trx_list.
+ without holding any mutex. They are !in_rw_trx_list.
+
+ All transactions, unless they are determined to be ac-nl-ro,
+ explicitly tagged as read-only or read-write, will first be put
+ on the read-only transaction list. Only when a !read-only transaction
+ in the read-only list tries to acquire an X or IX lock on a table
+ do we remove it from the read-only list and put it on the read-write
+ list. During this switch we assign it a rollback segment.
When a transaction is NOT_STARTED, it can be in_mysql_trx_list if
- it is a user transaction. It cannot be in ro_trx_list or rw_trx_list.
+ it is a user transaction. It cannot be in rw_trx_list.
ACTIVE->PREPARED->COMMITTED is only possible when trx->in_rw_trx_list.
The transition ACTIVE->PREPARED is protected by trx_sys->mutex.
ACTIVE->COMMITTED is possible when the transaction is in
- ro_trx_list or rw_trx_list.
+ rw_trx_list.
- Transitions to COMMITTED are protected by both lock_sys->mutex
- and trx->mutex.
-
- NOTE: Some of these state change constraints are an overkill,
- currently only required for a consistent view for printing stats.
- This unnecessarily adds a huge cost for the general case.
-
- NOTE: In the future we should add read only transactions to the
- ro_trx_list the first time they try to acquire a lock ie. by default
- we treat all read-only transactions as non-locking. */
+ Transitions to COMMITTED are protected by trx_t::mutex. */
trx_state_t state;
- trx_lock_t lock; /*!< Information about the transaction
- locks and state. Protected by
- trx->mutex or lock_sys->mutex
- or both */
- bool is_recovered; /*!< false=normal transaction,
- true=recovered, must be rolled back,
- protected by trx_sys->mutex when
- trx->in_rw_trx_list holds */
+ /** whether this is a recovered transaction that should be
+ rolled back by trx_rollback_or_clean_recovered().
+ Protected by trx_t::mutex for transactions that are in trx_sys. */
+ bool is_recovered;
#ifdef WITH_WSREP
/** whether wsrep_on(mysql_thd) held at the start of transaction */
bool wsrep;
@@ -785,12 +881,28 @@ struct trx_t{
bool is_wsrep() const { return false; }
#endif /* WITH_WSREP */
+ ReadView* read_view; /*!< consistent read view used in the
+ transaction, or NULL if not yet set */
+
+ UT_LIST_NODE_T(trx_t)
+ trx_list; /*!< list of transactions;
+ protected by trx_sys->mutex. */
+ UT_LIST_NODE_T(trx_t)
+ no_list; /*!< Required during view creation
+ to check for the view limit for
+ transactions that are committing */
+
+ trx_lock_t lock; /*!< Information about the transaction
+ locks and state. Protected by
+ lock_sys->mutex (insertions also
+ by trx_t::mutex). */
+
/* These fields are not protected by any mutex. */
const char* op_info; /*!< English text describing the
current operation, or an empty
string */
ulint isolation_level;/*!< TRX_ISO_REPEATABLE_READ, ... */
- ulint check_foreigns; /*!< normally TRUE, but if the user
+ bool check_foreigns; /*!< normally TRUE, but if the user
wants to suppress foreign key checks,
(in table imports, for example) we
set this FALSE */
@@ -799,135 +911,95 @@ struct trx_t{
commit between multiple storage engines and the binary log. When
an engine participates in a transaction, it's responsible for
registering itself using the trans_register_ha() API. */
- unsigned is_registered:1;/* This flag is set to 1 after the
+ bool is_registered; /* This flag is set to true after the
transaction has been registered with
the coordinator using the XA API, and
- is set to 0 after commit or rollback. */
- unsigned active_commit_ordered:1;/* 1 if owns prepare mutex, if
- this is set to 1 then registered should
- also be set to 1. This is used in the
- XA code */
+ is set to false after commit or
+ rollback. */
+ unsigned active_commit_ordered:1;/* 1 if owns prepare mutex */
/*------------------------------*/
- ulint check_unique_secondary;
+ bool check_unique_secondary;
/*!< normally TRUE, but if the user
wants to speed up inserts by
suppressing unique key checks
for secondary indexes when we decide
if we can use the insert buffer for
them, we set this FALSE */
- ulint support_xa; /*!< normally we do the XA two-phase
- commit steps, but by setting this to
- FALSE, one can save CPU time and about
- 150 bytes in the undo log size as then
- we skip XA steps */
- ulint flush_log_later;/* In 2PC, we hold the
+ bool flush_log_later;/* In 2PC, we hold the
prepare_commit mutex across
both phases. In that case, we
defer flush of the logs to disk
until after we release the
mutex. */
- ulint must_flush_log_later;/*!< this flag is set to TRUE in
+ bool must_flush_log_later;/*!< this flag is set to TRUE in
trx_commit() if flush_log_later was
TRUE, and there were modifications by
the transaction; in that case we must
flush the log in
trx_commit_complete_for_mysql() */
ulint duplicates; /*!< TRX_DUP_IGNORE | TRX_DUP_REPLACE */
- ulint has_search_latch;
- /*!< TRUE if this trx has latched the
- search system latch in S-mode */
- ulint search_latch_timeout;
- /*!< If we notice that someone is
- waiting for our S-lock on the search
- latch to be released, we wait in
- row0sel.cc for BTR_SEA_TIMEOUT new
- searches until we try to keep
- the search latch again over
- calls from MySQL; this is intended
- to reduce contention on the search
- latch */
- trx_dict_op_t dict_operation; /**< @see enum trx_dict_op */
+ trx_dict_op_t dict_operation; /**< @see enum trx_dict_op_t */
/* Fields protected by the srv_conc_mutex. */
- ulint declared_to_be_inside_innodb;
+ bool declared_to_be_inside_innodb;
/*!< this is TRUE if we have declared
this transaction in
srv_conc_enter_innodb to be inside the
InnoDB engine */
- ulint n_tickets_to_enter_innodb;
+ ib_uint32_t n_tickets_to_enter_innodb;
/*!< this can be > 0 only when
declared_to_... is TRUE; when we come
to srv_conc_innodb_enter, if the value
here is > 0, we decrement this by 1 */
- ulint dict_operation_lock_mode;
+ ib_uint32_t dict_operation_lock_mode;
/*!< 0, RW_S_LATCH, or RW_X_LATCH:
the latch mode trx currently holds
on dict_operation_lock. Protected
by dict_operation_lock. */
- trx_id_t no; /*!< transaction serialization number:
- max trx id shortly before the
- transaction is moved to
- COMMITTED_IN_MEMORY state.
- Protected by trx_sys_t::mutex
- when trx->in_rw_trx_list. Initially
- set to TRX_ID_MAX. */
-
/** wall-clock time of the latest transition to TRX_STATE_ACTIVE;
used for diagnostic purposes only */
time_t start_time;
/** microsecond_interval_timer() of transaction start */
ulonglong start_time_micro;
- trx_id_t id; /*!< transaction id */
- XID xid; /*!< X/Open XA transaction
- identification to identify a
- transaction branch */
lsn_t commit_lsn; /*!< lsn at the time of the commit */
table_id_t table_id; /*!< Table to drop iff dict_operation
== TRX_DICT_OP_TABLE, or 0. */
/*------------------------------*/
THD* mysql_thd; /*!< MySQL thread handle corresponding
to this trx, or NULL */
- trx_abort_t abort_type; /*!< Transaction abort type*/
const char* mysql_log_file_name;
/*!< if MySQL binlog is used, this field
contains a pointer to the latest file
name; this is NULL if binlog is not
used */
- ib_int64_t mysql_log_offset;
+ int64_t mysql_log_offset;
/*!< if MySQL binlog is used, this
field contains the end offset of the
binlog entry */
/*------------------------------*/
- ulint n_mysql_tables_in_use; /*!< number of Innobase tables
+ ib_uint32_t n_mysql_tables_in_use; /*!< number of Innobase tables
used in the processing of the current
SQL statement in MySQL */
- ulint mysql_n_tables_locked;
+ ib_uint32_t mysql_n_tables_locked;
/*!< how many tables the current SQL
statement uses, except those
in consistent read */
/*------------------------------*/
- UT_LIST_NODE_T(trx_t)
- trx_list; /*!< list of transactions;
- protected by trx_sys->mutex.
- The same node is used for both
- trx_sys_t::ro_trx_list and
- trx_sys_t::rw_trx_list */
#ifdef UNIV_DEBUG
/** The following two fields are mutually exclusive. */
/* @{ */
- ibool in_ro_trx_list; /*!< TRUE if in trx_sys->ro_trx_list */
- ibool in_rw_trx_list; /*!< TRUE if in trx_sys->rw_trx_list */
+ bool in_rw_trx_list; /*!< true if in trx_sys->rw_trx_list */
/* @} */
#endif /* UNIV_DEBUG */
UT_LIST_NODE_T(trx_t)
mysql_trx_list; /*!< list of transactions created for
MySQL; protected by trx_sys->mutex */
#ifdef UNIV_DEBUG
- ibool in_mysql_trx_list;
- /*!< TRUE if in
+ bool in_mysql_trx_list;
+ /*!< true if in
trx_sys->mysql_trx_list */
#endif /* UNIV_DEBUG */
/*------------------------------*/
@@ -942,31 +1014,18 @@ struct trx_t{
ulint error_key_num; /*!< if the index creation fails to a
duplicate key error, a mysql key
number of that index is stored here */
- sess_t* sess; /*!< session of the trx, NULL if none */
que_t* graph; /*!< query currently run in the session,
or NULL if none; NOTE that the query
belongs to the session, and it can
survive over a transaction commit, if
it is a stored procedure with a COMMIT
WORK statement, for instance */
- mem_heap_t* global_read_view_heap;
- /*!< memory heap for the global read
- view */
- read_view_t* global_read_view;
- /*!< consistent read view associated
- to a transaction or NULL */
- read_view_t* read_view; /*!< consistent read view used in the
- transaction or NULL, this read view
- if defined can be normal read view
- associated to a transaction (i.e.
- same as global_read_view) or read view
- associated to a cursor */
/*------------------------------*/
UT_LIST_BASE_NODE_T(trx_named_savept_t)
trx_savepoints; /*!< savepoints set with SAVEPOINT ...,
oldest first */
/*------------------------------*/
- ib_mutex_t undo_mutex; /*!< mutex protecting the fields in this
+ UndoMutex undo_mutex; /*!< mutex protecting the fields in this
section (down to undo_no_arr), EXCEPT
last_sql_stat_start, which can be
accessed only when we know that there
@@ -979,25 +1038,23 @@ struct trx_t{
with no gaps; thus it represents
the number of modified/inserted
rows in a transaction */
+ ulint undo_rseg_space;
+ /*!< space id where last undo record
+ was written */
trx_savept_t last_sql_stat_start;
/*!< undo_no when the last sql statement
was started: in case of an error, trx
is rolled back down to this undo
number; see note at undo_mutex! */
- trx_rseg_t* rseg; /*!< rollback segment assigned to the
- transaction, or NULL if not assigned
- yet */
- trx_undo_t* insert_undo; /*!< pointer to the insert undo log, or
- NULL if no inserts performed yet */
- trx_undo_t* update_undo; /*!< pointer to the update undo log, or
- NULL if no update performed yet */
+ trx_rsegs_t rsegs; /* rollback segments for undo logging */
undo_no_t roll_limit; /*!< least undo number to undo during
- a rollback */
+ a partial rollback; 0 otherwise */
+#ifdef UNIV_DEBUG
+ bool in_rollback; /*!< true when the transaction is
+ executing a partial or full rollback */
+#endif /* UNIV_DEBUG */
ulint pages_undone; /*!< number of undo log pages undone
since the last undo log truncation */
- trx_undo_arr_t* undo_no_arr; /*!< array of undo numbers of undo log
- records which are currently processed
- by a rollback operation */
/*------------------------------*/
ulint n_autoinc_rows; /*!< no. of AUTO-INC rows required for
an SQL statement. This is useful for
@@ -1009,43 +1066,54 @@ struct trx_t{
when the trx instance is destroyed.
Protected by lock_sys->mutex. */
/*------------------------------*/
- ibool read_only; /*!< TRUE if transaction is flagged
+ bool read_only; /*!< true if transaction is flagged
as a READ-ONLY transaction.
- if !auto_commit || will_lock > 0
- then it will added to the list
- trx_sys_t::ro_trx_list. A read only
+ if auto_commit && will_lock == 0
+ then it will be handled as a
+ AC-NL-RO-SELECT (Auto Commit Non-Locking
+ Read Only Select). A read only
transaction will not be assigned an
- UNDO log. Non-locking auto-commit
- read-only transaction will not be on
- either list. */
- ibool auto_commit; /*!< TRUE if it is an autocommit */
- ulint will_lock; /*!< Will acquire some locks. Increment
+ UNDO log. */
+ bool auto_commit; /*!< true if it is an autocommit */
+ ib_uint32_t will_lock; /*!< Will acquire some locks. Increment
each time we determine that a lock will
be acquired by the MySQL layer. */
- bool ddl; /*!< true if it is a transaction that
- is being started for a DDL operation */
/*------------------------------*/
fts_trx_t* fts_trx; /*!< FTS information, or NULL if
transaction hasn't modified tables
with FTS indexes (yet). */
doc_id_t fts_next_doc_id;/* The document id used for updates */
/*------------------------------*/
- ulint flush_tables; /*!< if "covering" the FLUSH TABLES",
+ ib_uint32_t flush_tables; /*!< if "covering" the FLUSH TABLES",
count of tables being flushed. */
/*------------------------------*/
+ bool ddl; /*!< true if it is an internal
+ transaction for DDL */
+ bool internal; /*!< true if it is a system/internal
+ transaction background task. This
+ includes DDL transactions too. Such
+ transactions are always treated as
+ read-write. */
+ /*------------------------------*/
#ifdef UNIV_DEBUG
- ulint start_line; /*!< Track where it was started from */
+ unsigned start_line; /*!< Track where it was started from */
const char* start_file; /*!< Filename where it was started */
#endif /* UNIV_DEBUG */
- /*------------------------------*/
- bool api_trx; /*!< trx started by InnoDB API */
- bool api_auto_commit;/*!< automatic commit */
- bool read_write; /*!< if read and write operation */
+ XID* xid; /*!< X/Open XA transaction
+ identification to identify a
+ transaction branch */
+ trx_mod_tables_t mod_tables; /*!< List of tables that were modified
+ by this transaction */
/*------------------------------*/
- char detailed_error[256]; /*!< detailed error message for last
+ char* detailed_error; /*!< detailed error message for last
error, or empty. */
+private:
+ /** flush observer used to track flushing of non-redo logged pages
+ during bulk create index */
+ FlushObserver* flush_observer;
+public:
/* Lock wait statistics */
ulint n_rec_lock_waits;
/*!< Number of record lock waits,
@@ -1063,8 +1131,94 @@ struct trx_t{
#ifdef WITH_WSREP
os_event_t wsrep_event; /* event waited for in srv_conc_slot */
#endif /* WITH_WSREP */
+
+ ulint magic_n;
+
+ /** @return whether any persistent undo log has been generated */
+ bool has_logged_persistent() const
+ {
+ return(rsegs.m_redo.insert_undo || rsegs.m_redo.update_undo);
+ }
+
+ /** @return whether any undo log has been generated */
+ bool has_logged() const
+ {
+ return(has_logged_persistent() || rsegs.m_noredo.undo);
+ }
+
+ /** @return rollback segment for modifying temporary tables */
+ trx_rseg_t* get_temp_rseg()
+ {
+ if (trx_rseg_t* rseg = rsegs.m_noredo.rseg) {
+ ut_ad(id != 0);
+ return(rseg);
+ }
+
+ return(assign_temp_rseg());
+ }
+
+ /** Set the innodb_log_optimize_ddl page flush observer
+ @param[in] space_id tablespace id
+ @param[in,out] stage performance_schema accounting */
+ void set_flush_observer(ulint space_id, ut_stage_alter_t* stage);
+
+ /** Remove the flush observer */
+ void remove_flush_observer();
+
+ /** @return the flush observer */
+ FlushObserver* get_flush_observer() const
+ {
+ return flush_observer;
+ }
+
+ /** Transition to committed state, to release implicit locks. */
+ inline void commit_state();
+
+ /** Release any explicit locks of a committing transaction. */
+ inline void release_locks();
+
+
+ bool is_referenced()
+ {
+ return my_atomic_load32_explicit(&n_ref, MY_MEMORY_ORDER_RELAXED) > 0;
+ }
+
+
+ void reference()
+ {
+#ifdef UNIV_DEBUG
+ int32_t old_n_ref=
+#endif
+ my_atomic_add32_explicit(&n_ref, 1, MY_MEMORY_ORDER_RELAXED);
+ ut_ad(old_n_ref >= 0);
+ }
+
+
+ void release_reference()
+ {
+#ifdef UNIV_DEBUG
+ int32_t old_n_ref=
+#endif
+ my_atomic_add32_explicit(&n_ref, -1, MY_MEMORY_ORDER_RELAXED);
+ ut_ad(old_n_ref > 0);
+ }
+
+
+private:
+ /** Assign a rollback segment for modifying temporary tables.
+ @return the assigned rollback segment */
+ trx_rseg_t* assign_temp_rseg();
};
+/**
+Check if transaction is started.
+@param[in] trx Transaction whose state we need to check
+@reutrn true if transaction is in state started */
+inline bool trx_is_started(const trx_t* trx)
+{
+ return trx->state != TRX_STATE_NOT_STARTED;
+}
+
/* Transaction isolation levels (trx->isolation_level) */
#define TRX_ISO_READ_UNCOMMITTED 0 /* dirty read: non-locking
SELECTs are performed so that
@@ -1102,23 +1256,9 @@ struct trx_t{
/* Treatment of duplicate values (trx->duplicates; for example, in inserts).
Multiple flags can be combined with bitwise OR. */
-#define TRX_DUP_IGNORE 1 /* duplicate rows are to be updated */
-#define TRX_DUP_REPLACE 2 /* duplicate rows are to be replaced */
-
-
-/* Types of a trx signal */
-#define TRX_SIG_NO_SIGNAL 0
-#define TRX_SIG_TOTAL_ROLLBACK 1
-#define TRX_SIG_ROLLBACK_TO_SAVEPT 2
-#define TRX_SIG_COMMIT 3
-#define TRX_SIG_BREAK_EXECUTION 5
+#define TRX_DUP_IGNORE 1U /* duplicate rows are to be updated */
+#define TRX_DUP_REPLACE 2U /* duplicate rows are to be replaced */
-/* Sender types of a signal */
-#define TRX_SIG_SELF 0 /* sent by the session itself, or
- by an error occurring within this
- session */
-#define TRX_SIG_OTHER_SESS 1 /* sent by another session (which
- must hold rights to this) */
/** Commit node states */
enum commit_node_state {
@@ -1149,27 +1289,6 @@ struct commit_node_t{
mutex_exit(&t->mutex); \
} while (0)
-/** @brief The latch protecting the adaptive search system
-
-This latch protects the
-(1) hash index;
-(2) columns of a record to which we have a pointer in the hash index;
-
-but does NOT protect:
-
-(3) next record offset field in a record;
-(4) next or previous records on the same page.
-
-Bear in mind (3) and (4) when using the hash index.
-*/
-extern rw_lock_t* btr_search_latch_temp;
-
-/** The latch protecting the adaptive search system */
-#define btr_search_latch (*btr_search_latch_temp)
-
-#ifndef UNIV_NONINL
#include "trx0trx.ic"
-#endif
-#endif /* !UNIV_HOTBACKUP */
#endif
diff --git a/storage/innobase/include/trx0trx.ic b/storage/innobase/include/trx0trx.ic
index d037696a087..4a5b1ba717f 100644
--- a/storage/innobase/include/trx0trx.ic
+++ b/storage/innobase/include/trx0trx.ic
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1996, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1996, 2015, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2016, 2019, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
@@ -24,16 +24,16 @@ The transaction
Created 3/26/1996 Heikki Tuuri
*******************************************************/
+#include "read0read.h"
+
/**********************************************************************//**
Determines if a transaction is in the given state.
The caller must hold trx_sys->mutex, or it must be the thread
that is serving a running transaction.
-A running transaction must be in trx_sys->ro_trx_list or trx_sys->rw_trx_list
-unless it is a non-locking autocommit read only transaction, which is only
-in trx_sys->mysql_trx_list.
-@return TRUE if trx->state == state */
+A running RW transaction must be in trx_sys->rw_trx_list.
+@return TRUE if trx->state == state */
UNIV_INLINE
-ibool
+bool
trx_state_eq(
/*=========*/
const trx_t* trx, /*!< in: transaction */
@@ -54,21 +54,24 @@ trx_state_eq(
return(trx->state == state);
case TRX_STATE_ACTIVE:
+
assert_trx_nonlocking_or_in_list(trx);
return(state == trx->state);
case TRX_STATE_COMMITTED_IN_MEMORY:
- assert_trx_in_list(trx);
+
+ check_trx_state(trx);
return(state == trx->state);
case TRX_STATE_NOT_STARTED:
- /* This state is not allowed for running transactions. */
+ /* These states are not allowed for running transactions. */
ut_a(state == TRX_STATE_NOT_STARTED
|| (relaxed
&& thd_get_error_number(trx->mysql_thd)));
+
ut_ad(!trx->in_rw_trx_list);
- ut_ad(!trx->in_ro_trx_list);
- return(state == trx->state);
+
+ return(true);
}
ut_error;
#endif /* UNIV_DEBUG */
@@ -77,7 +80,7 @@ trx_state_eq(
/****************************************************************//**
Retrieves the error_info field from a trx.
-@return the error info */
+@return the error info */
UNIV_INLINE
const dict_index_t*
trx_get_error_info(
@@ -90,7 +93,7 @@ trx_get_error_info(
/*******************************************************************//**
Retrieves transaction's que state in a human readable string. The string
should not be free()'d or modified.
-@return string in the data segment */
+@return string in the data segment */
UNIV_INLINE
const char*
trx_get_que_state_str(
@@ -112,9 +115,45 @@ trx_get_que_state_str(
}
}
+/** Retreieves the transaction ID.
+In a given point in time it is guaranteed that IDs of the running
+transactions are unique. The values returned by this function for readonly
+transactions may be reused, so a subsequent RO transaction may get the same ID
+as a RO transaction that existed in the past. The values returned by this
+function should be used for printing purposes only.
+@param[in] trx transaction whose id to retrieve
+@return transaction id */
+UNIV_INLINE
+trx_id_t
+trx_get_id_for_print(
+ const trx_t* trx)
+{
+ /* Readonly and transactions whose intentions are unknown (whether
+ they will eventually do a WRITE) don't have trx_t::id assigned (it is
+ 0 for those transactions). Transaction IDs in
+ innodb_trx.trx_id,
+ innodb_locks.lock_id,
+ innodb_locks.lock_trx_id,
+ innodb_lock_waits.requesting_trx_id,
+ innodb_lock_waits.blocking_trx_id should match because those tables
+ could be used in an SQL JOIN on those columns. Also trx_t::id is
+ printed by SHOW ENGINE INNODB STATUS, and in logs, so we must have the
+ same value printed everywhere consistently. */
+
+ /* DATA_TRX_ID_LEN is the storage size in bytes. */
+ static const trx_id_t max_trx_id
+ = (1ULL << (DATA_TRX_ID_LEN * CHAR_BIT)) - 1;
+
+ ut_ad(trx->id <= max_trx_id);
+
+ return(trx->id != 0
+ ? trx->id
+ : reinterpret_cast<trx_id_t>(trx) | (max_trx_id + 1));
+}
+
/**********************************************************************//**
Determine if a transaction is a dictionary operation.
-@return dictionary operation mode */
+@return dictionary operation mode */
UNIV_INLINE
enum trx_dict_op_t
trx_get_dict_operation(
@@ -171,18 +210,24 @@ ok:
trx->dict_operation = op;
}
-/********************************************************************//**
-Releases the search latch if trx has reserved it. */
+/**
+@param trx Get the active view for this transaction, if one exists
+@return the transaction's read view or NULL if one not assigned. */
UNIV_INLINE
-void
-trx_search_latch_release_if_reserved(
-/*=================================*/
- trx_t* trx) /*!< in: transaction */
+ReadView*
+trx_get_read_view(
+ trx_t* trx)
{
- if (trx->has_search_latch) {
- rw_lock_s_unlock(&btr_search_latch);
-
- trx->has_search_latch = FALSE;
- }
+ return(!MVCC::is_view_active(trx->read_view) ? NULL : trx->read_view);
}
+/**
+@param trx Get the active view for this transaction, if one exists
+@return the transaction's read view or NULL if one not assigned. */
+UNIV_INLINE
+const ReadView*
+trx_get_read_view(
+ const trx_t* trx)
+{
+ return(!MVCC::is_view_active(trx->read_view) ? NULL : trx->read_view);
+}
diff --git a/storage/innobase/include/trx0types.h b/storage/innobase/include/trx0types.h
index 1c869bdd4ac..097aea519a9 100644
--- a/storage/innobase/include/trx0types.h
+++ b/storage/innobase/include/trx0types.h
@@ -1,7 +1,7 @@
/*****************************************************************************
-Copyright (c) 1996, 2013, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2019, MariaDB Corporation.
+Copyright (c) 1996, 2014, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2017, 2019, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -28,13 +28,28 @@ Created 3/26/1996 Heikki Tuuri
#define trx0types_h
#include "ut0byte.h"
+#include "ut0mutex.h"
+
+#include <set>
+#include <vector>
+
+//#include <unordered_set>
/** printf(3) format used for printing DB_TRX_ID and other system fields */
-#define TRX_ID_FMT IB_ID_FMT
+#define TRX_ID_FMT IB_ID_FMT
/** maximum length that a formatted trx_t::id could take, not including
the terminating NUL character. */
-#define TRX_ID_MAX_LEN 17
+static const ulint TRX_ID_MAX_LEN = 17;
+
+/** Space id of the transaction system page (the system tablespace) */
+static const ulint TRX_SYS_SPACE = 0;
+
+/** Page number of the transaction system page */
+#define TRX_SYS_PAGE_NO FSP_TRX_SYS_PAGE_NO
+
+/** Random value to check for corruption of trx_t */
+static const ulint TRX_MAGIC_N = 91118598;
/** Transaction execution states when trx->state == TRX_STATE_ACTIVE */
enum trx_que_t {
@@ -48,6 +63,7 @@ enum trx_que_t {
/** Transaction states (trx_t::state) */
enum trx_state_t {
TRX_STATE_NOT_STARTED,
+
TRX_STATE_ACTIVE,
/** XA PREPARE has been executed; only XA COMMIT or XA ROLLBACK
are possible */
@@ -86,12 +102,6 @@ struct trx_sig_t;
struct trx_rseg_t;
/** Transaction undo log */
struct trx_undo_t;
-/** Array of undo numbers of undo records being rolled back or purged */
-struct trx_undo_arr_t;
-/** A cell of trx_undo_arr_t */
-struct trx_undo_inf_t;
-/** The control structure used in the purge operation */
-struct trx_purge_t;
/** Rollback command node in a query graph */
struct roll_node_t;
/** Commit command node in a query graph */
@@ -100,21 +110,6 @@ struct commit_node_t;
struct trx_named_savept_t;
/* @} */
-/** Rollback contexts */
-enum trx_rb_ctx {
- RB_NONE = 0, /*!< no rollback */
- RB_NORMAL, /*!< normal rollback */
- RB_RECOVERY_PURGE_REC,
- /*!< rolling back an incomplete transaction,
- in crash recovery, rolling back an
- INSERT that was performed by updating a
- delete-marked record; if the delete-marked record
- no longer exists in an active read view, it will
- be purged */
- RB_RECOVERY /*!< rolling back an incomplete transaction,
- in crash recovery */
-};
-
/** Row identifier (DB_ROW_ID, DATA_ROW_ID) */
typedef ib_id_t row_id_t;
/** Transaction identifier (DB_TRX_ID, DATA_TRX_ID) */
@@ -147,6 +142,61 @@ typedef byte trx_upagef_t;
/** Undo log record */
typedef byte trx_undo_rec_t;
+
/* @} */
-#endif
+typedef ib_mutex_t RsegMutex;
+typedef ib_mutex_t TrxMutex;
+typedef ib_mutex_t UndoMutex;
+typedef ib_mutex_t PQMutex;
+typedef ib_mutex_t TrxSysMutex;
+
+typedef std::vector<trx_id_t, ut_allocator<trx_id_t> > trx_ids_t;
+
+/** Mapping read-write transactions from id to transaction instance, for
+creating read views and during trx id lookup for MVCC and locking. */
+struct TrxTrack {
+ explicit TrxTrack(trx_id_t id, trx_t* trx = NULL)
+ :
+ m_id(id),
+ m_trx(trx)
+ {
+ // Do nothing
+ }
+
+ trx_id_t m_id;
+ trx_t* m_trx;
+};
+
+struct TrxTrackHash {
+ size_t operator()(const TrxTrack& key) const
+ {
+ return(size_t(key.m_id));
+ }
+};
+
+/**
+Comparator for TrxMap */
+struct TrxTrackHashCmp {
+
+ bool operator() (const TrxTrack& lhs, const TrxTrack& rhs) const
+ {
+ return(lhs.m_id == rhs.m_id);
+ }
+};
+
+/**
+Comparator for TrxMap */
+struct TrxTrackCmp {
+
+ bool operator() (const TrxTrack& lhs, const TrxTrack& rhs) const
+ {
+ return(lhs.m_id < rhs.m_id);
+ }
+};
+
+//typedef std::unordered_set<TrxTrack, TrxTrackHash, TrxTrackHashCmp> TrxIdSet;
+typedef std::set<TrxTrack, TrxTrackCmp, ut_allocator<TrxTrack> >
+ TrxIdSet;
+
+#endif /* trx0types_h */
diff --git a/storage/innobase/include/trx0undo.h b/storage/innobase/include/trx0undo.h
index 210b7a433cb..bf2d3c7f7f7 100644
--- a/storage/innobase/include/trx0undo.h
+++ b/storage/innobase/include/trx0undo.h
@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2013, 2016, MariaDB Corporation
+Copyright (c) 2017, 2018, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -28,18 +28,20 @@ Created 3/26/1996 Heikki Tuuri
#define trx0undo_h
#ifndef UNIV_INNOCHECKSUM
-
-#include "univ.i"
-#include "trx0types.h"
-#include "mtr0mtr.h"
#include "trx0sys.h"
-#include "page0types.h"
-#include "trx0xa.h"
-#ifndef UNIV_HOTBACKUP
+/** The LSB of the "is insert" flag in DB_ROLL_PTR */
+#define ROLL_PTR_INSERT_FLAG_POS 55
+/** The LSB of the 7-bit trx_rseg_t::id in DB_ROLL_PTR */
+#define ROLL_PTR_RSEG_ID_POS 48
+/** The LSB of the 32-bit undo log page number in DB_ROLL_PTR */
+#define ROLL_PTR_PAGE_POS 16
+/** The LSB of the 16-bit byte offset within an undo log page in DB_ROLL_PTR */
+#define ROLL_PTR_BYTE_POS 0
+
/***********************************************************************//**
Builds a roll pointer.
-@return roll pointer */
+@return roll pointer */
UNIV_INLINE
roll_ptr_t
trx_undo_build_roll_ptr(
@@ -62,7 +64,7 @@ trx_undo_decode_roll_ptr(
entry within page */
/***********************************************************************//**
Returns TRUE if the roll pointer is of the insert type.
-@return TRUE if insert undo log */
+@return TRUE if insert undo log */
UNIV_INLINE
ibool
trx_undo_roll_ptr_is_insert(
@@ -70,14 +72,13 @@ trx_undo_roll_ptr_is_insert(
roll_ptr_t roll_ptr); /*!< in: roll pointer */
/***********************************************************************//**
Returns true if the record is of the insert type.
-@return true if the record was freshly inserted (not updated). */
+@return true if the record was freshly inserted (not updated). */
UNIV_INLINE
bool
trx_undo_trx_id_is_insert(
/*======================*/
const byte* trx_id) /*!< in: DB_TRX_ID, followed by DB_ROLL_PTR */
- MY_ATTRIBUTE((nonnull, pure, warn_unused_result));
-#endif /* !UNIV_HOTBACKUP */
+ MY_ATTRIBUTE((warn_unused_result));
/*****************************************************************//**
Writes a roll ptr to an index page. In case that the size changes in
some future version, this function should be used instead of
@@ -93,41 +94,33 @@ trx_write_roll_ptr(
Reads a roll ptr from an index page. In case that the roll ptr size
changes in some future version, this function should be used instead of
mach_read_...
-@return roll ptr */
+@return roll ptr */
UNIV_INLINE
roll_ptr_t
trx_read_roll_ptr(
/*==============*/
const byte* ptr); /*!< in: pointer to memory from where to read */
-#ifndef UNIV_HOTBACKUP
-/******************************************************************//**
-Gets an undo log page and x-latches it.
-@return pointer to page x-latched */
+
+/** Gets an undo log page and x-latches it.
+@param[in] page_id page id
+@param[in,out] mtr mini-transaction
+@return pointer to page x-latched */
UNIV_INLINE
page_t*
-trx_undo_page_get(
-/*==============*/
- ulint space, /*!< in: space where placed */
- ulint zip_size, /*!< in: compressed page size in bytes
- or 0 for uncompressed pages */
- ulint page_no, /*!< in: page number */
- mtr_t* mtr); /*!< in: mtr */
-/******************************************************************//**
-Gets an undo log page and s-latches it.
-@return pointer to page s-latched */
+trx_undo_page_get(const page_id_t page_id, mtr_t* mtr);
+
+/** Gets an undo log page and s-latches it.
+@param[in] page_id page id
+@param[in,out] mtr mini-transaction
+@return pointer to page s-latched */
UNIV_INLINE
page_t*
-trx_undo_page_get_s_latched(
-/*========================*/
- ulint space, /*!< in: space where placed */
- ulint zip_size, /*!< in: compressed page size in bytes
- or 0 for uncompressed pages */
- ulint page_no, /*!< in: page number */
- mtr_t* mtr); /*!< in: mtr */
+trx_undo_page_get_s_latched(const page_id_t page_id, mtr_t* mtr);
+
/******************************************************************//**
Returns the previous undo record on the page in the specified log, or
NULL if none exists.
-@return pointer to record, NULL if none */
+@return pointer to record, NULL if none */
UNIV_INLINE
trx_undo_rec_t*
trx_undo_page_get_prev_rec(
@@ -138,7 +131,7 @@ trx_undo_page_get_prev_rec(
/******************************************************************//**
Returns the next undo log record on the page in the specified log, or
NULL if none exists.
-@return pointer to record, NULL if none */
+@return pointer to record, NULL if none */
UNIV_INLINE
trx_undo_rec_t*
trx_undo_page_get_next_rec(
@@ -149,7 +142,7 @@ trx_undo_page_get_next_rec(
/******************************************************************//**
Returns the last undo record on the page in the specified undo log, or
NULL if none exists.
-@return pointer to record, NULL if none */
+@return pointer to record, NULL if none */
UNIV_INLINE
trx_undo_rec_t*
trx_undo_page_get_last_rec(
@@ -160,7 +153,7 @@ trx_undo_page_get_last_rec(
/******************************************************************//**
Returns the first undo record on the page in the specified undo log, or
NULL if none exists.
-@return pointer to record, NULL if none */
+@return pointer to record, NULL if none */
UNIV_INLINE
trx_undo_rec_t*
trx_undo_page_get_first_rec(
@@ -170,8 +163,7 @@ trx_undo_page_get_first_rec(
ulint offset);/*!< in: undo log header offset on page */
/***********************************************************************//**
Gets the previous record in an undo log.
-@return undo log record, the page s-latched, NULL if none */
-UNIV_INTERN
+@return undo log record, the page s-latched, NULL if none */
trx_undo_rec_t*
trx_undo_get_prev_rec(
/*==================*/
@@ -182,8 +174,7 @@ trx_undo_get_prev_rec(
mtr_t* mtr); /*!< in: mtr */
/***********************************************************************//**
Gets the next record in an undo log.
-@return undo log record, the page s-latched, NULL if none */
-UNIV_INTERN
+@return undo log record, the page s-latched, NULL if none */
trx_undo_rec_t*
trx_undo_get_next_rec(
/*==================*/
@@ -191,207 +182,171 @@ trx_undo_get_next_rec(
ulint page_no,/*!< in: undo log header page number */
ulint offset, /*!< in: undo log header offset on page */
mtr_t* mtr); /*!< in: mtr */
-/***********************************************************************//**
-Gets the first record in an undo log.
-@return undo log record, the page latched, NULL if none */
-UNIV_INTERN
+
+/** Gets the first record in an undo log.
+@param[in] space undo log header space
+@param[in] page_no undo log header page number
+@param[in] offset undo log header offset on page
+@param[in] mode latching mode: RW_S_LATCH or RW_X_LATCH
+@param[in,out] mtr mini-transaction
+@return undo log record, the page latched, NULL if none */
trx_undo_rec_t*
trx_undo_get_first_rec(
-/*===================*/
- ulint space, /*!< in: undo log header space */
- ulint zip_size,/*!< in: compressed page size in bytes
- or 0 for uncompressed pages */
- ulint page_no,/*!< in: undo log header page number */
- ulint offset, /*!< in: undo log header offset on page */
- ulint mode, /*!< in: latching mode: RW_S_LATCH or RW_X_LATCH */
- mtr_t* mtr); /*!< in: mtr */
-/********************************************************************//**
-Tries to add a page to the undo log segment where the undo log is placed.
-@return X-latched block if success, else NULL */
-UNIV_INTERN
+ ulint space,
+ ulint page_no,
+ ulint offset,
+ ulint mode,
+ mtr_t* mtr);
+
+/** Allocate an undo log page.
+@param[in,out] trx transaction
+@param[in,out] undo undo log
+@param[in,out] mtr mini-transaction that does not hold any page latch
+@return X-latched block if success
+@retval NULL on failure */
buf_block_t*
-trx_undo_add_page(
-/*==============*/
- trx_t* trx, /*!< in: transaction */
- trx_undo_t* undo, /*!< in: undo log memory object */
- mtr_t* mtr) /*!< in: mtr which does not have a latch to any
- undo log page; the caller must have reserved
- the rollback segment mutex */
+trx_undo_add_page(trx_t* trx, trx_undo_t* undo, mtr_t* mtr)
MY_ATTRIBUTE((nonnull, warn_unused_result));
-/********************************************************************//**
-Frees the last undo log page.
-The caller must hold the rollback segment mutex. */
-UNIV_INTERN
+
+/** Free the last undo log page. The caller must hold the rseg mutex.
+@param[in,out] undo undo log
+@param[in,out] mtr mini-transaction that does not hold any undo log page
+ or that has allocated the undo log page */
void
-trx_undo_free_last_page_func(
-/*==========================*/
-#ifdef UNIV_DEBUG
- const trx_t* trx, /*!< in: transaction */
-#endif /* UNIV_DEBUG */
- trx_undo_t* undo, /*!< in/out: undo log memory copy */
- mtr_t* mtr) /*!< in/out: mini-transaction which does not
- have a latch to any undo log page or which
- has allocated the undo log page */
+trx_undo_free_last_page(trx_undo_t* undo, mtr_t* mtr)
MY_ATTRIBUTE((nonnull));
-#ifdef UNIV_DEBUG
-# define trx_undo_free_last_page(trx,undo,mtr) \
- trx_undo_free_last_page_func(trx,undo,mtr)
-#else /* UNIV_DEBUG */
-# define trx_undo_free_last_page(trx,undo,mtr) \
- trx_undo_free_last_page_func(undo,mtr)
-#endif /* UNIV_DEBUG */
-/***********************************************************************//**
-Truncates an undo log from the end. This function is used during a rollback
-to free space from an undo log. */
-UNIV_INTERN
+/** Truncate the tail of an undo log during rollback.
+@param[in,out] undo undo log
+@param[in] limit all undo logs after this limit will be discarded
+@param[in] is_temp whether this is temporary undo log */
void
-trx_undo_truncate_end_func(
-/*=======================*/
-#ifdef UNIV_DEBUG
- const trx_t* trx, /*!< in: transaction whose undo log it is */
-#endif /* UNIV_DEBUG */
- trx_undo_t* undo, /*!< in/out: undo log */
- undo_no_t limit) /*!< in: all undo records with undo number
- >= this value should be truncated */
+trx_undo_truncate_end(trx_undo_t* undo, undo_no_t limit, bool is_temp)
MY_ATTRIBUTE((nonnull));
-#ifdef UNIV_DEBUG
-# define trx_undo_truncate_end(trx,undo,limit) \
- trx_undo_truncate_end_func(trx,undo,limit)
-#else /* UNIV_DEBUG */
-# define trx_undo_truncate_end(trx,undo,limit) \
- trx_undo_truncate_end_func(undo,limit)
-#endif /* UNIV_DEBUG */
-/***********************************************************************//**
-Truncates an undo log from the start. This function is used during a purge
-operation. */
-UNIV_INTERN
+/** Truncate the head of an undo log.
+NOTE that only whole pages are freed; the header page is not
+freed, but emptied, if all the records there are below the limit.
+@param[in,out] rseg rollback segment
+@param[in] hdr_page_no header page number
+@param[in] hdr_offset header offset on the page
+@param[in] limit first undo number to preserve
+(everything below the limit will be truncated) */
void
trx_undo_truncate_start(
-/*====================*/
- trx_rseg_t* rseg, /*!< in: rollback segment */
- ulint space, /*!< in: space id of the log */
- ulint hdr_page_no, /*!< in: header page number */
- ulint hdr_offset, /*!< in: header offset on the page */
- undo_no_t limit); /*!< in: all undo pages with
- undo numbers < this value
- should be truncated; NOTE that
- the function only frees whole
- pages; the header page is not
- freed, but emptied, if all the
- records there are < limit */
+ trx_rseg_t* rseg,
+ ulint hdr_page_no,
+ ulint hdr_offset,
+ undo_no_t limit);
/********************************************************************//**
Initializes the undo log lists for a rollback segment memory copy.
This function is only called when the database is started or a new
rollback segment created.
-@return the combined size of undo log segments in pages */
-UNIV_INTERN
+@return the combined size of undo log segments in pages */
ulint
trx_undo_lists_init(
/*================*/
trx_rseg_t* rseg); /*!< in: rollback segment memory object */
-/**********************************************************************//**
-Assigns an undo log for a transaction. A new undo log is created or a cached
-undo log reused.
-@return DB_SUCCESS if undo log assign successful, possible error codes
-are: DB_TOO_MANY_CONCURRENT_TRXS DB_OUT_OF_FILE_SPACE DB_READ_ONLY
-DB_OUT_OF_MEMORY */
-UNIV_INTERN
+/** Mark that an undo log header belongs to a data dictionary transaction.
+@param[in] trx dictionary transaction
+@param[in,out] undo undo log
+@param[in,out] mtr mini-transaction */
+void trx_undo_mark_as_dict(const trx_t* trx, trx_undo_t* undo, mtr_t* mtr);
+/** Assign an undo log for a transaction.
+A new undo log is created or a cached undo log reused.
+@param[in,out] trx transaction
+@param[in] rseg rollback segment
+@param[out] undo the undo log
+@param[in] type TRX_UNDO_INSERT or TRX_UNDO_UPDATE
+@retval DB_SUCCESS on success
+@retval DB_TOO_MANY_CONCURRENT_TRXS
+@retval DB_OUT_OF_FILE_SPACE
+@retval DB_READ_ONLY
+@retval DB_OUT_OF_MEMORY */
dberr_t
trx_undo_assign_undo(
-/*=================*/
- trx_t* trx, /*!< in: transaction */
- ulint type) /*!< in: TRX_UNDO_INSERT or TRX_UNDO_UPDATE */
+ trx_t* trx,
+ trx_rseg_t* rseg,
+ trx_undo_t** undo,
+ ulint type)
MY_ATTRIBUTE((nonnull, warn_unused_result));
/******************************************************************//**
Sets the state of the undo log segment at a transaction finish.
-@return undo log segment header page, x-latched */
-UNIV_INTERN
+@return undo log segment header page, x-latched */
page_t*
trx_undo_set_state_at_finish(
/*=========================*/
trx_undo_t* undo, /*!< in: undo log memory copy */
mtr_t* mtr); /*!< in: mtr */
-/******************************************************************//**
-Sets the state of the undo log segment at a transaction prepare.
-@return undo log segment header page, x-latched */
-UNIV_INTERN
+
+/** Set the state of the undo log segment at a XA PREPARE or XA ROLLBACK.
+@param[in,out] trx transaction
+@param[in,out] undo insert_undo or update_undo log
+@param[in] rollback false=XA PREPARE, true=XA ROLLBACK
+@param[in,out] mtr mini-transaction
+@return undo log segment header page, x-latched */
page_t*
trx_undo_set_state_at_prepare(
-/*==========================*/
- trx_t* trx, /*!< in: transaction */
- trx_undo_t* undo, /*!< in: undo log memory copy */
- mtr_t* mtr); /*!< in: mtr */
+ trx_t* trx,
+ trx_undo_t* undo,
+ bool rollback,
+ mtr_t* mtr);
/**********************************************************************//**
Adds the update undo log header as the first in the history list, and
frees the memory object, or puts it to the list of cached update undo log
segments. */
-UNIV_INTERN
void
trx_undo_update_cleanup(
/*====================*/
- trx_t* trx, /*!< in: trx owning the update undo log */
- page_t* undo_page, /*!< in: update undo log header page,
- x-latched */
- mtr_t* mtr); /*!< in: mtr */
-/******************************************************************//**
-Frees or caches an insert undo log after a transaction commit or rollback.
-Knowledge of inserts is not needed after a commit or rollback, therefore
-the data can be discarded. */
-UNIV_INTERN
+ trx_t* trx, /*!< in: trx owning the update
+ undo log */
+ page_t* undo_page, /*!< in: update undo log header page,
+ x-latched */
+ mtr_t* mtr); /*!< in: mtr */
+
+/** Free an insert or temporary undo log after commit or rollback.
+The information is not needed after a commit or rollback, therefore
+the data can be discarded.
+@param[in,out] undo undo log
+@param[in] is_temp whether this is temporary undo log */
void
-trx_undo_insert_cleanup(
-/*====================*/
- trx_t* trx); /*!< in: transaction handle */
+trx_undo_commit_cleanup(trx_undo_t* undo, bool is_temp);
/********************************************************************//**
At shutdown, frees the undo logs of a PREPARED transaction. */
-UNIV_INTERN
void
trx_undo_free_prepared(
/*===================*/
trx_t* trx) /*!< in/out: PREPARED transaction */
- UNIV_COLD MY_ATTRIBUTE((nonnull));
-#endif /* !UNIV_HOTBACKUP */
+ ATTRIBUTE_COLD __attribute__((nonnull));
+
/***********************************************************//**
Parses the redo log entry of an undo log page initialization.
-@return end of log record or NULL */
-UNIV_INTERN
+@return end of log record or NULL */
byte*
trx_undo_parse_page_init(
/*=====================*/
- byte* ptr, /*!< in: buffer */
- byte* end_ptr,/*!< in: buffer end */
- page_t* page, /*!< in: page or NULL */
- mtr_t* mtr); /*!< in: mtr or NULL */
-/***********************************************************//**
-Parses the redo log entry of an undo log page header create or reuse.
-@return end of log record or NULL */
-UNIV_INTERN
+ const byte* ptr, /*!< in: buffer */
+ const byte* end_ptr,/*!< in: buffer end */
+ page_t* page, /*!< in: page or NULL */
+ mtr_t* mtr); /*!< in: mtr or NULL */
+/** Parse the redo log entry of an undo log page header create or reuse.
+@param[in] type MLOG_UNDO_HDR_CREATE or MLOG_UNDO_HDR_REUSE
+@param[in] ptr redo log record
+@param[in] end_ptr end of log buffer
+@param[in,out] page page frame or NULL
+@param[in,out] mtr mini-transaction or NULL
+@return end of log record or NULL */
byte*
trx_undo_parse_page_header(
-/*=======================*/
- ulint type, /*!< in: MLOG_UNDO_HDR_CREATE or MLOG_UNDO_HDR_REUSE */
- byte* ptr, /*!< in: buffer */
- byte* end_ptr,/*!< in: buffer end */
- page_t* page, /*!< in: page or NULL */
- mtr_t* mtr); /*!< in: mtr or NULL */
-/***********************************************************//**
-Parses the redo log entry of an undo log page header discard.
-@return end of log record or NULL */
-UNIV_INTERN
-byte*
-trx_undo_parse_discard_latest(
-/*==========================*/
- byte* ptr, /*!< in: buffer */
- byte* end_ptr,/*!< in: buffer end */
- page_t* page, /*!< in: page or NULL */
- mtr_t* mtr); /*!< in: mtr or NULL */
+ mlog_id_t type,
+ const byte* ptr,
+ const byte* end_ptr,
+ page_t* page,
+ mtr_t* mtr);
/************************************************************************
Frees an undo log memory copy. */
-UNIV_INTERN
void
trx_undo_mem_free(
/*==============*/
@@ -416,12 +371,12 @@ trx_undo_mem_free(
#define TRX_UNDO_PREPARED 5 /* contains an undo log of an
prepared transaction */
-#ifndef UNIV_HOTBACKUP
#ifndef UNIV_INNOCHECKSUM
+
/** Transaction undo log memory object; this is protected by the undo_mutex
in the corresponding transaction object */
-struct trx_undo_t{
+struct trx_undo_t {
/*-----------------------------*/
ulint id; /*!< undo log slot number within the
rollback segment */
@@ -448,8 +403,6 @@ struct trx_undo_t{
/*-----------------------------*/
ulint space; /*!< space id where the undo log
placed */
- ulint zip_size; /*!< compressed page size of space
- in bytes, or 0 for uncompressed */
ulint hdr_page_no; /*!< page number of the header page in
the undo log */
ulint hdr_offset; /*!< header offset of the undo log on
@@ -471,13 +424,14 @@ struct trx_undo_t{
undo_no_t top_undo_no; /*!< undo number of the latest record */
buf_block_t* guess_block; /*!< guess for the buffer block where
the top page might reside */
+ ulint withdraw_clock; /*!< the withdraw clock value of the
+ buffer pool when guess_block was stored */
/*-----------------------------*/
UT_LIST_NODE_T(trx_undo_t) undo_list;
/*!< undo log objects in the rollback
segment are chained into lists */
};
#endif /* !UNIV_INNOCHECKSUM */
-#endif /* !UNIV_HOTBACKUP */
/** The offset of the undo log page header on pages of the undo log */
#define TRX_UNDO_PAGE_HDR FSEG_PAGE_DATA
@@ -525,6 +479,9 @@ log segment */
/* @{ */
/*-------------------------------------------------------------*/
#define TRX_UNDO_STATE 0 /*!< TRX_UNDO_ACTIVE, ... */
+
+#ifndef UNIV_INNOCHECKSUM
+
#define TRX_UNDO_LAST_LOG 2 /*!< Offset of the last undo log header
on the segment header page, 0 if
none */
@@ -539,7 +496,6 @@ log segment */
#define TRX_UNDO_SEG_HDR_SIZE (4 + FSEG_HEADER_SIZE + FLST_BASE_NODE_SIZE)
/* @} */
-
/** The undo log header. There can be several undo log headers on the first
page of an update undo log segment. */
/* @{ */
@@ -604,10 +560,7 @@ quite a large overhead. */
with the XA XID */
/* @} */
-#ifndef UNIV_INNOCHECKSUM
-#ifndef UNIV_NONINL
#include "trx0undo.ic"
-#endif
#endif /* !UNIV_INNOCHECKSUM */
#endif
diff --git a/storage/innobase/include/trx0undo.ic b/storage/innobase/include/trx0undo.ic
index 4ab197f5767..2e26e6547c3 100644
--- a/storage/innobase/include/trx0undo.ic
+++ b/storage/innobase/include/trx0undo.ic
@@ -1,6 +1,7 @@
/*****************************************************************************
-Copyright (c) 1996, 2009, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1996, 2013, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2017, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -26,10 +27,9 @@ Created 3/26/1996 Heikki Tuuri
#include "data0type.h"
#include "page0page.h"
-#ifndef UNIV_HOTBACKUP
/***********************************************************************//**
Builds a roll pointer.
-@return roll pointer */
+@return roll pointer */
UNIV_INLINE
roll_ptr_t
trx_undo_build_roll_ptr(
@@ -47,9 +47,9 @@ trx_undo_build_roll_ptr(
ut_ad(rseg_id < TRX_SYS_N_RSEGS);
ut_ad(offset < 65536);
- roll_ptr = (roll_ptr_t) is_insert << 55
- | (roll_ptr_t) rseg_id << 48
- | (roll_ptr_t) page_no << 16
+ roll_ptr = (roll_ptr_t) is_insert << ROLL_PTR_INSERT_FLAG_POS
+ | (roll_ptr_t) rseg_id << ROLL_PTR_RSEG_ID_POS
+ | (roll_ptr_t) page_no << ROLL_PTR_PAGE_POS
| offset;
return(roll_ptr);
}
@@ -85,7 +85,7 @@ trx_undo_decode_roll_ptr(
/***********************************************************************//**
Returns TRUE if the roll pointer is of the insert type.
-@return TRUE if insert undo log */
+@return TRUE if insert undo log */
UNIV_INLINE
ibool
trx_undo_roll_ptr_is_insert(
@@ -104,7 +104,7 @@ trx_undo_roll_ptr_is_insert(
/***********************************************************************//**
Returns true if the record is of the insert type.
-@return true if the record was freshly inserted (not updated). */
+@return true if the record was freshly inserted (not updated). */
UNIV_INLINE
bool
trx_undo_trx_id_is_insert(
@@ -116,7 +116,6 @@ trx_undo_trx_id_is_insert(
#endif
return(static_cast<bool>(trx_id[DATA_TRX_ID_LEN] >> 7));
}
-#endif /* !UNIV_HOTBACKUP */
/*****************************************************************//**
Writes a roll ptr to an index page. In case that the size changes in
@@ -140,7 +139,7 @@ trx_write_roll_ptr(
Reads a roll ptr from an index page. In case that the roll ptr size
changes in some future version, this function should be used instead of
mach_read_...
-@return roll ptr */
+@return roll ptr */
UNIV_INLINE
roll_ptr_t
trx_read_roll_ptr(
@@ -153,42 +152,33 @@ trx_read_roll_ptr(
return(mach_read_from_7(ptr));
}
-#ifndef UNIV_HOTBACKUP
-/******************************************************************//**
-Gets an undo log page and x-latches it.
-@return pointer to page x-latched */
+/** Gets an undo log page and x-latches it.
+@param[in] page_id page id
+@param[in,out] mtr mini-transaction
+@return pointer to page x-latched */
UNIV_INLINE
page_t*
-trx_undo_page_get(
-/*==============*/
- ulint space, /*!< in: space where placed */
- ulint zip_size, /*!< in: compressed page size in bytes
- or 0 for uncompressed pages */
- ulint page_no, /*!< in: page number */
- mtr_t* mtr) /*!< in: mtr */
+trx_undo_page_get(const page_id_t page_id, mtr_t* mtr)
{
- buf_block_t* block = buf_page_get(space, zip_size, page_no,
+ buf_block_t* block = buf_page_get(page_id, univ_page_size,
RW_X_LATCH, mtr);
+
buf_block_dbg_add_level(block, SYNC_TRX_UNDO_PAGE);
return(buf_block_get_frame(block));
}
-/******************************************************************//**
-Gets an undo log page and s-latches it.
-@return pointer to page s-latched */
+/** Gets an undo log page and s-latches it.
+@param[in] page_id page id
+@param[in,out] mtr mini-transaction
+@return pointer to page s-latched */
UNIV_INLINE
page_t*
-trx_undo_page_get_s_latched(
-/*========================*/
- ulint space, /*!< in: space where placed */
- ulint zip_size, /*!< in: compressed page size in bytes
- or 0 for uncompressed pages */
- ulint page_no, /*!< in: page number */
- mtr_t* mtr) /*!< in: mtr */
+trx_undo_page_get_s_latched(const page_id_t page_id, mtr_t* mtr)
{
- buf_block_t* block = buf_page_get(space, zip_size, page_no,
+ buf_block_t* block = buf_page_get(page_id, univ_page_size,
RW_S_LATCH, mtr);
+
buf_block_dbg_add_level(block, SYNC_TRX_UNDO_PAGE);
return(buf_block_get_frame(block));
@@ -197,7 +187,7 @@ trx_undo_page_get_s_latched(
/******************************************************************//**
Returns the start offset of the undo log records of the specified undo
log on the page.
-@return start offset */
+@return start offset */
UNIV_INLINE
ulint
trx_undo_page_get_start(
@@ -222,7 +212,7 @@ trx_undo_page_get_start(
/******************************************************************//**
Returns the end offset of the undo log records of the specified undo
log on the page.
-@return end offset */
+@return end offset */
UNIV_INLINE
ulint
trx_undo_page_get_end(
@@ -255,7 +245,7 @@ trx_undo_page_get_end(
/******************************************************************//**
Returns the previous undo record on the page in the specified log, or
NULL if none exists.
-@return pointer to record, NULL if none */
+@return pointer to record, NULL if none */
UNIV_INLINE
trx_undo_rec_t*
trx_undo_page_get_prev_rec(
@@ -282,7 +272,7 @@ trx_undo_page_get_prev_rec(
/******************************************************************//**
Returns the next undo log record on the page in the specified log, or
NULL if none exists.
-@return pointer to record, NULL if none */
+@return pointer to record, NULL if none */
UNIV_INLINE
trx_undo_rec_t*
trx_undo_page_get_next_rec(
@@ -312,7 +302,7 @@ trx_undo_page_get_next_rec(
/******************************************************************//**
Returns the last undo record on the page in the specified undo log, or
NULL if none exists.
-@return pointer to record, NULL if none */
+@return pointer to record, NULL if none */
UNIV_INLINE
trx_undo_rec_t*
trx_undo_page_get_last_rec(
@@ -338,7 +328,7 @@ trx_undo_page_get_last_rec(
/******************************************************************//**
Returns the first undo record on the page in the specified undo log, or
NULL if none exists.
-@return pointer to record, NULL if none */
+@return pointer to record, NULL if none */
UNIV_INLINE
trx_undo_rec_t*
trx_undo_page_get_first_rec(
@@ -360,4 +350,3 @@ trx_undo_page_get_first_rec(
return(undo_page + start);
}
-#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innobase/include/univ.i b/storage/innobase/include/univ.i
index 8a9e1fc5e0d..cb81b68e2ab 100644
--- a/storage/innobase/include/univ.i
+++ b/storage/innobase/include/univ.i
@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 1994, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2013, 2017, MariaDB Corporation.
+Copyright (c) 2013, 2020, MariaDB Corporation.
Copyright (c) 2008, Google Inc.
Portions of this file contain modifications contributed and copyrighted by
@@ -34,18 +34,14 @@ Created 1/20/1994 Heikki Tuuri
#ifndef univ_i
#define univ_i
-#ifdef UNIV_HOTBACKUP
-#include "hb_univ.i"
-#endif /* UNIV_HOTBACKUP */
-
/* aux macros to convert M into "123" (string) if M is defined like
#define M 123 */
#define _IB_TO_STR(s) #s
#define IB_TO_STR(s) _IB_TO_STR(s)
#define INNODB_VERSION_MAJOR 5
-#define INNODB_VERSION_MINOR 6
-#define INNODB_VERSION_BUGFIX 47
+#define INNODB_VERSION_MINOR 7
+#define INNODB_VERSION_BUGFIX 29
/* The following is the InnoDB version as shown in
SELECT plugin_version FROM information_schema.plugins;
@@ -61,10 +57,6 @@ component, i.e. we show M.N.P as M.N */
IB_TO_STR(INNODB_VERSION_MINOR) "." \
IB_TO_STR(INNODB_VERSION_BUGFIX)
-#define REFMAN "http://dev.mysql.com/doc/refman/" \
- IB_TO_STR(INNODB_VERSION_MAJOR) "." \
- IB_TO_STR(INNODB_VERSION_MINOR) "/en/"
-
/** How far ahead should we tell the service manager the timeout
(time in seconds) */
#define INNODB_EXTEND_TIMEOUT_INTERVAL 30
@@ -79,68 +71,59 @@ the virtual method table (vtable) in GCC 3. */
# define ha_innobase ha_innodb
#endif /* MYSQL_DYNAMIC_PLUGIN */
-#if (defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)) && !defined(MYSQL_SERVER) && !defined(__WIN__)
-# undef __WIN__
-# define __WIN__
-
+#if defined(_WIN32)
# include <windows.h>
+#endif /* _WIN32 */
-# ifdef _NT_
-# define __NT__
-# endif
+/* Include a minimum number of SQL header files so that few changes
+made in SQL code cause a complete InnoDB rebuild. These headers are
+used throughout InnoDB but do not include too much themselves. They
+support cross-platform development and expose comonly used SQL names. */
-#else
-/* The defines used with MySQL */
+#include <my_global.h>
-/* Include two header files from MySQL to make the Unix flavor used
-in compiling more Posix-compatible. These headers also define __WIN__
-if we are compiling on Windows. */
+/* JAN: TODO: missing 5.7 header */
+#ifdef HAVE_MY_THREAD_H
+//# include <my_thread.h>
+#endif
-#ifndef UNIV_HOTBACKUP
-# include <my_global.h>
-# include <my_pthread.h>
-#endif /* UNIV_HOTBACKUP */
+#ifndef UNIV_INNOCHECKSUM
+# include <m_string.h>
+# include <mysqld_error.h>
+#endif /* !UNIV_INNOCHECKSUM */
/* Include <sys/stat.h> to get S_I... macros defined for os0file.cc */
-# include <sys/stat.h>
-# if !defined(__WIN__)
-# include <sys/mman.h> /* mmap() for os0proc.cc */
-# endif
-
-/* Include the header file generated by GNU autoconf */
-# ifndef __WIN__
-# ifndef UNIV_HOTBACKUP
-# include "my_config.h"
-# endif /* UNIV_HOTBACKUP */
-# endif
-
-# ifdef HAVE_SCHED_H
-# include <sched.h>
-# endif
-
-/* We only try to do explicit inlining of functions with gcc and
-Sun Studio */
-
-# ifdef HAVE_PREAD
-# define HAVE_PWRITE
-# endif
+#include <sys/stat.h>
-#endif /* #if (defined(WIN32) || ... */
+#ifndef _WIN32
+# include <sys/mman.h> /* mmap() for os0proc.cc */
+# include <sched.h>
+# include "my_config.h"
+#endif
-#ifndef __WIN__
+#include <stdint.h>
#include <inttypes.h>
-#endif /* !__WIN__ */
+#ifdef HAVE_UNISTD_H
+#include <unistd.h>
+#endif
+
+#include "my_pthread.h"
/* Following defines are to enable performance schema
-instrumentation in each of four InnoDB modules if
+instrumentation in each of five InnoDB modules if
HAVE_PSI_INTERFACE is defined. */
-#if defined HAVE_PSI_INTERFACE && !defined UNIV_HOTBACKUP
+#ifdef HAVE_PSI_INTERFACE
# define UNIV_PFS_MUTEX
# define UNIV_PFS_RWLOCK
-
# define UNIV_PFS_IO
# define UNIV_PFS_THREAD
+// JAN: TODO: MySQL 5.7 PSI
+// # include "mysql/psi/psi.h" /* HAVE_PSI_MEMORY_INTERFACE */
+# ifdef HAVE_PSI_MEMORY_INTERFACE
+# define UNIV_PFS_MEMORY
+# endif /* HAVE_PSI_MEMORY_INTERFACE */
+
/* There are mutexes/rwlocks that we want to exclude from
instrumentation even if their corresponding performance schema
define is set. And this PFS_NOT_INSTRUMENTED is used
@@ -150,28 +133,61 @@ be excluded from instrumentation. */
# define PFS_IS_INSTRUMENTED(key) ((key) != PFS_NOT_INSTRUMENTED)
+/* JAN: TODO: missing 5.7 header */
+#ifdef HAVE_PFS_THREAD_PROVIDER_H
+/* For PSI_MUTEX_CALL() and similar. */
+#include "pfs_thread_provider.h"
+#endif
+
+#include "mysql/psi/mysql_thread.h"
+/* For PSI_FILE_CALL(). */
+/* JAN: TODO: missing 5.7 header */
+#ifdef HAVE_PFS_FILE_PROVIDER_H
+#include "pfs_file_provider.h"
+#endif
+
+#include "mysql/psi/mysql_file.h"
+
#endif /* HAVE_PSI_INTERFACE */
-#ifdef __WIN__
+#ifdef _WIN32
# define YY_NO_UNISTD_H 1
-#endif /* __WIN__ */
+/* VC++ tries to optimise for size by default, from V8+. The size of
+the pointer to member depends on whether the type is defined before the
+compiler sees the type in the translation unit. This default behaviour
+can cause the pointer to be a different size in different translation
+units, depending on the above rule. We force optimise for size behaviour
+for all cases. This is used by ut0lst.h related code. */
+# pragma pointers_to_members(full_generality, multiple_inheritance)
+#endif /* _WIN32 */
/* DEBUG VERSION CONTROL
===================== */
/* When this macro is defined then additional test functions will be
compiled. These functions live at the end of each relevant source file
-and have "test_" prefix. These functions are not called from anywhere in
-the code, they can be called from gdb after
+and have "test_" prefix. These functions can be called from the end of
+innobase_init() or they can be called from gdb after
innobase_start_or_create_for_mysql() has executed using the call
-command. Not tested on Windows. */
+command. */
/*
#define UNIV_COMPILE_TEST_FUNCS
+#define UNIV_ENABLE_UNIT_TEST_GET_PARENT_DIR
+#define UNIV_ENABLE_UNIT_TEST_MAKE_FILEPATH
+#define UNIV_ENABLE_UNIT_TEST_DICT_STATS
+#define UNIV_ENABLE_UNIT_TEST_ROW_RAW_FORMAT_INT
*/
-#if defined HAVE_valgrind && defined HAVE_VALGRIND
+#if defined HAVE_valgrind && defined HAVE_VALGRIND_MEMCHECK_H
# define UNIV_DEBUG_VALGRIND
-#endif /* HAVE_VALGRIND */
+#endif
+
+#ifdef DBUG_OFF
+# undef UNIV_DEBUG
+#elif !defined UNIV_DEBUG
+# define UNIV_DEBUG
+#endif
+
#if 0
#define UNIV_DEBUG_VALGRIND /* Enable extra
Valgrind instrumentation */
@@ -183,34 +199,18 @@ command. Not tested on Windows. */
debugging without UNIV_DEBUG */
#define UNIV_BLOB_LIGHT_DEBUG /* Enable off-page column
debugging without UNIV_DEBUG */
-#define UNIV_DEBUG /* Enable ut_ad() assertions
- and disable UNIV_INLINE */
#define UNIV_DEBUG_LOCK_VALIDATE /* Enable
ut_ad(lock_rec_validate_page())
assertions. */
-#define UNIV_DEBUG_FILE_ACCESSES /* Enable freed block access
- debugging without UNIV_DEBUG */
#define UNIV_LRU_DEBUG /* debug the buffer pool LRU */
#define UNIV_HASH_DEBUG /* debug HASH_ macros */
-#define UNIV_LIST_DEBUG /* debug UT_LIST_ macros */
#define UNIV_LOG_LSN_DEBUG /* write LSN to the redo log;
this will break redo log file compatibility, but it may be useful when
debugging redo log application problems. */
-#define UNIV_MEM_DEBUG /* detect memory leaks etc */
#define UNIV_IBUF_DEBUG /* debug the insert buffer */
-#define UNIV_BLOB_DEBUG /* track BLOB ownership;
-assumes that no BLOBs survive server restart */
-#define UNIV_IBUF_COUNT_DEBUG /* debug the insert buffer;
-this limits the database to IBUF_COUNT_N_SPACES and IBUF_COUNT_N_PAGES,
-and the insert buffer must be empty when the database is started */
#define UNIV_PERF_DEBUG /* debug flag that enables
light weight performance
related stuff. */
-#define UNIV_SYNC_DEBUG /* debug mutex and latch
-operations (very slow); also UNIV_DEBUG must be defined */
-#define UNIV_SEARCH_DEBUG /* debug B-tree comparisons */
-#define UNIV_SYNC_PERF_STAT /* operation counts for
- rw-locks and mutexes */
#define UNIV_SEARCH_PERF_STAT /* statistics for the
adaptive hash index */
#define UNIV_SRV_PRINT_LATCH_WAITS /* enable diagnostic output
@@ -234,59 +234,33 @@ operations (very slow); also UNIV_DEBUG must be defined */
#define UNIV_BTR_DEBUG /* check B-tree links */
#define UNIV_LIGHT_MEM_DEBUG /* light memory debugging */
-/*
-#define UNIV_SQL_DEBUG
-#define UNIV_LOG_DEBUG
-*/
- /* the above option prevents forcing of log to disk
- at a buffer page write: it should be tested with this
- option off; also some ibuf tests are suppressed */
+// #define UNIV_SQL_DEBUG
/* Linkage specifier for non-static InnoDB symbols (variables and functions)
that are only referenced from within InnoDB, not from MySQL. We disable the
GCC visibility directive on all Sun operating systems because there is no
easy way to get it to work. See http://bugs.mysql.com/bug.php?id=52263. */
-#define MY_ATTRIBUTE __attribute__
#if defined(__GNUC__) && (__GNUC__ >= 4) && !defined(sun) || defined(__INTEL_COMPILER)
-# define UNIV_INTERN MY_ATTRIBUTE((visibility ("hidden")))
+# define UNIV_INTERN __attribute__((visibility ("hidden")))
#else
# define UNIV_INTERN
#endif
-#if defined(INNODB_COMPILER_HINTS) \
- && defined __GNUC__ \
- && (__GNUC__ > 4 || __GNUC__ == 4 && __GNUC_MINOR__ >= 3)
-/** Starting with GCC 4.3, the "cold" attribute is used to inform the
-compiler that a function is unlikely executed. The function is
-optimized for size rather than speed and on many targets it is placed
-into special subsection of the text section so all cold functions
-appears close together improving code locality of non-cold parts of
-program. The paths leading to call of cold functions within code are
-marked as unlikely by the branch prediction mechanism. optimize a
-rarely invoked function for size instead for speed. */
-# define UNIV_COLD MY_ATTRIBUTE((cold))
+
+#ifndef MY_ATTRIBUTE
+#if defined(__GNUC__)
+# define MY_ATTRIBUTE(A) __attribute__(A)
#else
-# define UNIV_COLD /* empty */
+# define MY_ATTRIBUTE(A)
+#endif
#endif
-
-#ifndef UNIV_MUST_NOT_INLINE
-/* Definition for inline version */
#define UNIV_INLINE static inline
-#else /* !UNIV_MUST_NOT_INLINE */
-/* If we want to compile a noninlined version we use the following macro
-definitions: */
-
-#define UNIV_NONINL
-#define UNIV_INLINE UNIV_INTERN
-
-#endif /* !UNIV_MUST_NOT_INLINE */
-
#define UNIV_WORD_SIZE SIZEOF_SIZE_T
/** The following alignment is used in memory allocations in memory heap
management to ensure correct alignment for doubles etc. */
-#define UNIV_MEM_ALIGNMENT 8
+#define UNIV_MEM_ALIGNMENT 8U
/*
DATABASE VERSION CONTROL
@@ -350,6 +324,12 @@ typedef enum innodb_file_formats_enum innodb_file_formats_t;
#define IF_SNAPPY(A,B) B
#endif
+#if defined (HAVE_FALLOC_PUNCH_HOLE_AND_KEEP_SIZE) || defined(_WIN32)
+#define IF_PUNCH_HOLE(A,B) A
+#else
+#define IF_PUNCH_HOLE(A,B) B
+#endif
+
/** The universal page size of the database */
#define UNIV_PAGE_SIZE ((ulint) srv_page_size)
@@ -378,25 +358,30 @@ and 2 bits for flags. This limits the uncompressed page size to 16k.
#define UNIV_PAGE_SSIZE_ORIG (UNIV_PAGE_SIZE_SHIFT_ORIG - 9)
/** Minimum page size InnoDB currently supports. */
-#define UNIV_PAGE_SIZE_MIN (1 << UNIV_PAGE_SIZE_SHIFT_MIN)
+#define UNIV_PAGE_SIZE_MIN (1U << UNIV_PAGE_SIZE_SHIFT_MIN)
/** Maximum page size InnoDB currently supports. */
-#define UNIV_PAGE_SIZE_MAX (1 << UNIV_PAGE_SIZE_SHIFT_MAX)
+#define UNIV_PAGE_SIZE_MAX (1U << UNIV_PAGE_SIZE_SHIFT_MAX)
/** Default page size for InnoDB tablespaces. */
-#define UNIV_PAGE_SIZE_DEF (1 << UNIV_PAGE_SIZE_SHIFT_DEF)
+#define UNIV_PAGE_SIZE_DEF (1U << UNIV_PAGE_SIZE_SHIFT_DEF)
/** Original 16k page size for InnoDB tablespaces. */
-#define UNIV_PAGE_SIZE_ORIG (1 << UNIV_PAGE_SIZE_SHIFT_ORIG)
+#define UNIV_PAGE_SIZE_ORIG (1U << UNIV_PAGE_SIZE_SHIFT_ORIG)
/** Smallest compressed page size */
-#define UNIV_ZIP_SIZE_MIN (1 << UNIV_ZIP_SIZE_SHIFT_MIN)
+#define UNIV_ZIP_SIZE_MIN (1U << UNIV_ZIP_SIZE_SHIFT_MIN)
/** Largest compressed page size */
-#define UNIV_ZIP_SIZE_MAX (1 << UNIV_ZIP_SIZE_SHIFT_MAX)
+#define UNIV_ZIP_SIZE_MAX (1U << UNIV_ZIP_SIZE_SHIFT_MAX)
-/** Number of supported page sizes (The convention 'ssize' is used
-for 'log2 minus 9' or the number of shifts starting with 512.)
-This number varies depending on UNIV_PAGE_SIZE. */
-#define UNIV_PAGE_SSIZE_MAX \
- (UNIV_PAGE_SIZE_SHIFT - UNIV_ZIP_SIZE_SHIFT_MIN + 1)
+/** Largest possible ssize for an uncompressed page.
+(The convention 'ssize' is used for 'log2 minus 9' or the number of
+shifts starting with 512.)
+This max number varies depending on UNIV_PAGE_SIZE. */
+#define UNIV_PAGE_SSIZE_MAX \
+ static_cast<ulint>(UNIV_PAGE_SIZE_SHIFT - UNIV_ZIP_SIZE_SHIFT_MIN + 1)
+
+/** Smallest possible ssize for an uncompressed page. */
+#define UNIV_PAGE_SSIZE_MIN \
+ static_cast<ulint>(UNIV_PAGE_SIZE_SHIFT_MIN - UNIV_ZIP_SIZE_SHIFT_MIN + 1)
/** Maximum number of parallel threads in a parallelized operation */
#define UNIV_MAX_PARALLELISM 32
@@ -424,6 +409,10 @@ database name and table name. In addition, 14 bytes is added for:
#define MAX_FULL_NAME_LEN \
(MAX_TABLE_NAME_LEN + MAX_DATABASE_NAME_LEN + 14)
+/** Maximum length of the compression alogrithm string. Currently we support
+only (NONE | ZLIB | LZ4). */
+#define MAX_COMPRESSION_LEN 4
+
/** The maximum length in bytes that a database name can occupy when stored in
UTF8, including the terminating '\0', see dict_fs2utf8(). You must include
mysql_com.h if you are to use this macro. */
@@ -439,65 +428,54 @@ mysql_com.h if you are to use this macro. */
==========================
*/
-/* Note that inside MySQL 'byte' is defined as char on Linux! */
-#define byte unsigned char
-
-/* Another basic type we use is unsigned long integer which should be equal to
-the word size of the machine, that is on a 32-bit platform 32 bits, and on a
-64-bit platform 64 bits. We also give the printf format for the type as a
-macro ULINTPF. */
+/** Unsigned octet of bits */
+typedef unsigned char byte;
+/** Machine-word-width unsigned integer */
+typedef size_t ulint;
+/** Machine-word-width signed integer */
+typedef ssize_t lint;
+/** ulint format for the printf() family of functions */
+#define ULINTPF "%zu"
+/** ulint hexadecimal format for the printf() family of functions */
+#define ULINTPFx "%zx"
#ifdef _WIN32
/* Use the integer types and formatting strings defined in Visual Studio. */
# define UINT32PF "%u"
# define INT64PF "%lld"
-# define UINT64PF "%llu"
+# define UINT64scan "llu"
+# define UINT64PFx "%016llx"
+#elif defined __APPLE__
+/* Apple prefers to call the 64-bit types 'long long'
+in both 32-bit and 64-bit environments. */
+# define UINT32PF "%" PRIu32
+# define INT64PF "%lld"
+# define UINT64scan "llu"
# define UINT64PFx "%016llx"
-typedef __int64 ib_int64_t;
-typedef unsigned __int64 ib_uint64_t;
-typedef unsigned __int32 ib_uint32_t;
#else
/* Use the integer types and formatting strings defined in the C99 standard. */
# define UINT32PF "%" PRIu32
# define INT64PF "%" PRId64
-# define UINT64PF "%" PRIu64
+# define UINT64scan PRIu64
# define UINT64PFx "%016" PRIx64
-typedef int64_t ib_int64_t;
-typedef uint64_t ib_uint64_t;
-typedef uint32_t ib_uint32_t;
#endif
-#define IB_ID_FMT UINT64PF
-
-/* Type used for all log sequence number storage and arithmetics */
-typedef ib_uint64_t lsn_t;
-
-#ifdef _WIN64
-typedef unsigned __int64 ulint;
-typedef __int64 lint;
-# define ULINTPF UINT64PF
-#else
-typedef unsigned long int ulint;
-typedef long int lint;
-# define ULINTPF "%lu"
-#endif /* _WIN64 */
-
-#ifndef UNIV_HOTBACKUP
-typedef unsigned long long int ullint;
-#endif /* UNIV_HOTBACKUP */
-
#ifdef UNIV_INNOCHECKSUM
extern bool strict_verify;
extern FILE* log_file;
extern unsigned long long cur_page_num;
#endif /* UNIV_INNOCHECKSUM */
-#ifndef __WIN__
-#if SIZEOF_LONG != SIZEOF_VOIDP
-#error "Error: InnoDB's ulint must be of the same size as void*"
-#endif
-#endif
+typedef int64_t ib_int64_t;
+typedef uint64_t ib_uint64_t;
+typedef uint32_t ib_uint32_t;
+
+#define UINT64PF "%" UINT64scan
+#define IB_ID_FMT UINT64PF
+
+/** Log sequence number (also used for redo log byte arithmetics) */
+typedef ib_uint64_t lsn_t;
/** The 'undefined' value for a ulint */
#define ULINT_UNDEFINED ((ulint)(-1))
@@ -519,12 +497,13 @@ extern unsigned long long cur_page_num;
#define IB_UINT64_MAX ((ib_uint64_t) (~0ULL))
/** The generic InnoDB system object identifier data type */
-typedef ib_uint64_t ib_id_t;
-#define IB_ID_MAX IB_UINT64_MAX
-
-/** The 'undefined' value for a ullint */
-#define ULLINT_UNDEFINED ((ullint)(-1))
+typedef ib_uint64_t ib_id_t;
+#define IB_ID_MAX (~(ib_id_t) 0)
+#define IB_ID_FMT UINT64PF
+#ifndef UINTMAX_MAX
+#define UINTMAX_MAX IB_UINT64_MAX
+#endif
/** This 'ibool' type is used within Innobase. Remember that different included
headers may define 'bool' differently. Do not assume that 'bool' is a ulint! */
#define ibool ulint
@@ -553,8 +532,7 @@ contains the sum of the following flag and the locally stored len. */
#define UNIV_EXTERN_STORAGE_FIELD (UNIV_SQL_NULL - UNIV_PAGE_SIZE_DEF)
-#if defined(__GNUC__) && (__GNUC__ > 2) && ! defined(__INTEL_COMPILER)
-#define HAVE_GCC_GT_2
+#if defined(__GNUC__)
/* Tell the compiler that variable/function is unused. */
# define UNIV_UNUSED MY_ATTRIBUTE ((unused))
#else
@@ -562,11 +540,11 @@ contains the sum of the following flag and the locally stored len. */
#endif /* CHECK FOR GCC VER_GT_2 */
/* Some macros to improve branch prediction and reduce cache misses */
-#if defined(INNODB_COMPILER_HINTS) && defined(HAVE_GCC_GT_2)
+#if defined(COMPILER_HINTS) && defined(__GNUC__)
/* Tell the compiler that 'expr' probably evaluates to 'constant'. */
# define UNIV_EXPECT(expr,constant) __builtin_expect(expr, constant)
/* Tell the compiler that a pointer is likely to be NULL */
-# define UNIV_LIKELY_NULL(ptr) __builtin_expect((ulint) ptr, 0)
+# define UNIV_LIKELY_NULL(ptr) __builtin_expect((ptr) != 0, 0)
/* Minimize cache-miss latency by moving data at addr into a cache before
it is read. */
# define UNIV_PREFETCH_R(addr) __builtin_prefetch(addr, 0, 3)
@@ -575,28 +553,30 @@ it is read or written. */
# define UNIV_PREFETCH_RW(addr) __builtin_prefetch(addr, 1, 3)
/* Sun Studio includes sun_prefetch.h as of version 5.9 */
-#elif (defined(__SUNPRO_C) && __SUNPRO_C >= 0x590) \
- || (defined(__SUNPRO_CC) && __SUNPRO_CC >= 0x590)
+#elif (defined(__SUNPRO_C) || defined(__SUNPRO_CC))
# include <sun_prefetch.h>
-#if __SUNPRO_C >= 0x550
-# undef UNIV_INTERN
-# define UNIV_INTERN __hidden
-#endif /* __SUNPRO_C >= 0x550 */
-
# define UNIV_EXPECT(expr,value) (expr)
# define UNIV_LIKELY_NULL(expr) (expr)
-# if defined(INNODB_COMPILER_HINTS)
+# if defined(COMPILER_HINTS)
//# define UNIV_PREFETCH_R(addr) sun_prefetch_read_many((void*) addr)
# define UNIV_PREFETCH_R(addr) ((void) 0)
# define UNIV_PREFETCH_RW(addr) sun_prefetch_write_many(addr)
# else
# define UNIV_PREFETCH_R(addr) ((void) 0)
# define UNIV_PREFETCH_RW(addr) ((void) 0)
-# endif /* INNODB_COMPILER_HINTS */
+# endif /* COMPILER_HINTS */
+# elif defined __WIN__ && defined COMPILER_HINTS
+# include <xmmintrin.h>
+# define UNIV_EXPECT(expr,value) (expr)
+# define UNIV_LIKELY_NULL(expr) (expr)
+// __MM_HINT_T0 - (temporal data)
+// prefetch data into all levels of the cache hierarchy.
+# define UNIV_PREFETCH_R(addr) _mm_prefetch((char *) addr, _MM_HINT_T0)
+# define UNIV_PREFETCH_RW(addr) _mm_prefetch((char *) addr, _MM_HINT_T0)
#else
/* Dummy versions of the macros */
# define UNIV_EXPECT(expr,value) (expr)
@@ -617,18 +597,25 @@ it is read or written. */
Windows, so define a typedef for it and a macro to use at the end of such
functions. */
-#ifdef __WIN__
-typedef ulint os_thread_ret_t;
-#define OS_THREAD_DUMMY_RETURN return(0)
+#ifdef _WIN32
+typedef DWORD os_thread_ret_t;
+# define OS_THREAD_DUMMY_RETURN return(0)
+# define OS_PATH_SEPARATOR '\\'
+# define OS_PATH_SEPARATOR_ALT '/'
#else
typedef void* os_thread_ret_t;
-#define OS_THREAD_DUMMY_RETURN return(NULL)
+# define OS_THREAD_DUMMY_RETURN return(NULL)
+# define OS_PATH_SEPARATOR '/'
+# define OS_PATH_SEPARATOR_ALT '\\'
#endif
#include <stdio.h>
+#include "db0err.h"
#include "ut0dbg.h"
+#include "ut0lst.h"
#include "ut0ut.h"
-#include "db0err.h"
+#include "sync0types.h"
+
#include <my_valgrind.h>
/* define UNIV macros in terms of my_valgrind.h */
#define UNIV_MEM_INVALID(addr, size) MEM_UNDEFINED(addr, size)
@@ -683,4 +670,10 @@ typedef void* os_thread_ret_t;
extern ulong srv_page_size_shift;
extern ulong srv_page_size;
+static const size_t UNIV_SECTOR_SIZE = 512;
+
+/* Dimension of spatial object we support so far. It has its root in
+myisam/sp_defs.h. We only support 2 dimension data */
+#define SPDIMS 2
+
#endif
diff --git a/storage/innobase/include/usr0sess.h b/storage/innobase/include/usr0sess.h
deleted file mode 100644
index 5978332d581..00000000000
--- a/storage/innobase/include/usr0sess.h
+++ /dev/null
@@ -1,77 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1996, 2009, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/usr0sess.h
-Sessions
-
-Created 6/25/1996 Heikki Tuuri
-*******************************************************/
-
-#ifndef usr0sess_h
-#define usr0sess_h
-
-#include "univ.i"
-#include "ut0byte.h"
-#include "trx0types.h"
-#include "srv0srv.h"
-#include "trx0types.h"
-#include "usr0types.h"
-#include "que0types.h"
-#include "data0data.h"
-#include "rem0rec.h"
-
-/*********************************************************************//**
-Opens a session.
-@return own: session object */
-UNIV_INTERN
-sess_t*
-sess_open(void);
-/*============*/
-/*********************************************************************//**
-Closes a session, freeing the memory occupied by it. */
-UNIV_INTERN
-void
-sess_close(
-/*=======*/
- sess_t* sess); /* in, own: session object */
-
-/* The session handle. This data structure is only used by purge and is
-not really necessary. We should get rid of it. */
-struct sess_t{
- ulint state; /*!< state of the session */
- trx_t* trx; /*!< transaction object permanently
- assigned for the session: the
- transaction instance designated by the
- trx id changes, but the memory
- structure is preserved */
- UT_LIST_BASE_NODE_T(que_t)
- graphs; /*!< query graphs belonging to this
- session */
-};
-
-/* Session states */
-#define SESS_ACTIVE 1
-#define SESS_ERROR 2 /* session contains an error message
- which has not yet been communicated
- to the client */
-#ifndef UNIV_NONINL
-#include "usr0sess.ic"
-#endif
-
-#endif
diff --git a/storage/innobase/include/usr0sess.ic b/storage/innobase/include/usr0sess.ic
deleted file mode 100644
index 65e404fb7d1..00000000000
--- a/storage/innobase/include/usr0sess.ic
+++ /dev/null
@@ -1,24 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1996, 2009, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/usr0sess.ic
-Sessions
-
-Created 6/25/1996 Heikki Tuuri
-*******************************************************/
diff --git a/storage/innobase/include/usr0types.h b/storage/innobase/include/usr0types.h
deleted file mode 100644
index 693bb239a54..00000000000
--- a/storage/innobase/include/usr0types.h
+++ /dev/null
@@ -1,31 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1996, 2009, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/usr0types.h
-Users and sessions global types
-
-Created 6/25/1996 Heikki Tuuri
-*******************************************************/
-
-#ifndef usr0types_h
-#define usr0types_h
-
-struct sess_t;
-
-#endif
diff --git a/storage/innobase/include/ut0bh.h b/storage/innobase/include/ut0bh.h
deleted file mode 100644
index bde310a7d44..00000000000
--- a/storage/innobase/include/ut0bh.h
+++ /dev/null
@@ -1,152 +0,0 @@
-/***************************************************************************//**
-
-Copyright (c) 2011, 2013, Oracle Corpn. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/******************************************************************//**
-@file include/ut0bh.h
-Binary min-heap interface.
-
-Created 2010-05-28 by Sunny Bains
-*******************************************************/
-
-#ifndef INNOBASE_UT0BH_H
-#define INNOBASE_UT0BH_H
-
-#include "univ.i"
-
-/** Comparison function for objects in the binary heap. */
-typedef int (*ib_bh_cmp_t)(const void* p1, const void* p2);
-
-struct ib_bh_t;
-
-/**********************************************************************//**
-Get the number of elements in the binary heap.
-@return number of elements */
-UNIV_INLINE
-ulint
-ib_bh_size(
-/*=======*/
- const ib_bh_t* ib_bh); /*!< in: instance */
-
-/**********************************************************************//**
-Test if binary heap is empty.
-@return TRUE if empty. */
-UNIV_INLINE
-ibool
-ib_bh_is_empty(
-/*===========*/
- const ib_bh_t* ib_bh); /*!< in: instance */
-
-/**********************************************************************//**
-Test if binary heap is full.
-@return TRUE if full. */
-UNIV_INLINE
-ibool
-ib_bh_is_full(
-/*===========*/
- const ib_bh_t* ib_bh); /*!< in: instance */
-
-/**********************************************************************//**
-Get a pointer to the element.
-@return pointer to element */
-UNIV_INLINE
-void*
-ib_bh_get(
-/*=======*/
- ib_bh_t* ib_bh, /*!< in: instance */
- ulint i); /*!< in: index */
-
-/**********************************************************************//**
-Copy an element to the binary heap.
-@return pointer to copied element */
-UNIV_INLINE
-void*
-ib_bh_set(
-/*======*/
- ib_bh_t* ib_bh, /*!< in/out: instance */
- ulint i, /*!< in: index */
- const void* elem); /*!< in: element to add */
-
-/**********************************************************************//**
-Return the first element from the binary heap.
-@return pointer to first element or NULL if empty. */
-UNIV_INLINE
-void*
-ib_bh_first(
-/*========*/
- ib_bh_t* ib_bh); /*!< in: instance */
-
-/**********************************************************************//**
-Return the last element from the binary heap.
-@return pointer to last element or NULL if empty. */
-UNIV_INLINE
-void*
-ib_bh_last(
-/*========*/
- ib_bh_t* ib_bh); /*!< in/out: instance */
-
-/**********************************************************************//**
-Create a binary heap.
-@return a new binary heap */
-UNIV_INTERN
-ib_bh_t*
-ib_bh_create(
-/*=========*/
- ib_bh_cmp_t compare, /*!< in: comparator */
- ulint sizeof_elem, /*!< in: size of one element */
- ulint max_elems); /*!< in: max elements allowed */
-
-/**********************************************************************//**
-Free a binary heap.
-@return a new binary heap */
-UNIV_INTERN
-void
-ib_bh_free(
-/*=======*/
- ib_bh_t* ib_bh); /*!< in,own: instance */
-
-/**********************************************************************//**
-Add an element to the binary heap. Note: The element is copied.
-@return pointer to added element or NULL if full. */
-UNIV_INTERN
-void*
-ib_bh_push(
-/*=======*/
- ib_bh_t* ib_bh, /*!< in/out: instance */
- const void* elem); /*!< in: element to add */
-
-/**********************************************************************//**
-Remove the first element from the binary heap. */
-UNIV_INTERN
-void
-ib_bh_pop(
-/*======*/
- ib_bh_t* ib_bh); /*!< in/out: instance */
-
-/** Binary heap data structure */
-struct ib_bh_t {
- ulint max_elems; /*!< max elements allowed */
- ulint n_elems; /*!< current size */
- ulint sizeof_elem; /*!< sizeof element */
- ib_bh_cmp_t compare; /*!< comparator */
-};
-
-#ifndef UNIV_NONINL
-#include "ut0bh.ic"
-#endif
-
-#endif /* INNOBASE_UT0BH_H */
diff --git a/storage/innobase/include/ut0bh.ic b/storage/innobase/include/ut0bh.ic
deleted file mode 100644
index 579a487fab8..00000000000
--- a/storage/innobase/include/ut0bh.ic
+++ /dev/null
@@ -1,125 +0,0 @@
-/***************************************************************************//**
-
-Copyright (c) 2011, 2013, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/******************************************************************//**
-@file include/ut0bh.ic
-Binary min-heap implementation.
-
-Created 2011-01-15 by Sunny Bains
-*******************************************************/
-
-#include "ut0bh.h"
-#include "ut0mem.h" /* For ut_memcpy() */
-
-/**********************************************************************//**
-Get the number of elements in the binary heap.
-@return number of elements */
-UNIV_INLINE
-ulint
-ib_bh_size(
-/*=======*/
- const ib_bh_t* ib_bh) /*!< in: instance */
-{
- return(ib_bh->n_elems);
-}
-
-/**********************************************************************//**
-Test if binary heap is empty.
-@return TRUE if empty. */
-UNIV_INLINE
-ibool
-ib_bh_is_empty(
-/*===========*/
- const ib_bh_t* ib_bh) /*!< in: instance */
-{
- return(ib_bh_size(ib_bh) == 0);
-}
-
-/**********************************************************************//**
-Test if binary heap is full.
-@return TRUE if full. */
-UNIV_INLINE
-ibool
-ib_bh_is_full(
-/*===========*/
- const ib_bh_t* ib_bh) /*!< in: instance */
-{
- return(ib_bh_size(ib_bh) >= ib_bh->max_elems);
-}
-
-/**********************************************************************//**
-Get a pointer to the element.
-@return pointer to element */
-UNIV_INLINE
-void*
-ib_bh_get(
-/*=======*/
- ib_bh_t* ib_bh, /*!< in: instance */
- ulint i) /*!< in: index */
-{
- byte* ptr = (byte*) (ib_bh + 1);
-
- ut_a(i < ib_bh_size(ib_bh));
-
- return(ptr + (ib_bh->sizeof_elem * i));
-}
-
-/**********************************************************************//**
-Copy an element to the binary heap.
-@return pointer to copied element */
-UNIV_INLINE
-void*
-ib_bh_set(
-/*======*/
- ib_bh_t* ib_bh, /*!< in/out: instance */
- ulint i, /*!< in: index */
- const void* elem) /*!< in: element to add */
-{
- void* ptr = ib_bh_get(ib_bh, i);
-
- ut_memcpy(ptr, elem, ib_bh->sizeof_elem);
-
- return(ptr);
-}
-
-/**********************************************************************//**
-Return the first element from the binary heap.
-@return pointer to first element or NULL if empty. */
-UNIV_INLINE
-void*
-ib_bh_first(
-/*========*/
- ib_bh_t* ib_bh) /*!< in: instance */
-{
- return(ib_bh_is_empty(ib_bh) ? NULL : ib_bh_get(ib_bh, 0));
-}
-
-/**********************************************************************//**
-Return the last element from the binary heap.
-@return pointer to last element or NULL if empty. */
-UNIV_INLINE
-void*
-ib_bh_last(
-/*========*/
- ib_bh_t* ib_bh) /*!< in/out: instance */
-{
- return(ib_bh_is_empty(ib_bh)
- ? NULL
- : ib_bh_get(ib_bh, ib_bh_size(ib_bh) - 1));
-}
-
diff --git a/storage/innobase/include/ut0byte.h b/storage/innobase/include/ut0byte.h
index eb75ecd0fee..4ce931e0189 100644
--- a/storage/innobase/include/ut0byte.h
+++ b/storage/innobase/include/ut0byte.h
@@ -32,7 +32,7 @@ Created 1/20/1994 Heikki Tuuri
/*******************************************************//**
Creates a 64-bit integer out of two 32-bit integers.
-@return created integer */
+@return created integer */
UNIV_INLINE
ib_uint64_t
ut_ull_create(
@@ -43,7 +43,7 @@ ut_ull_create(
/********************************************************//**
Rounds a 64-bit integer downward to a multiple of a power of 2.
-@return rounded value */
+@return rounded value */
UNIV_INLINE
ib_uint64_t
ut_uint64_align_down(
@@ -53,7 +53,7 @@ ut_uint64_align_down(
which must be a power of 2 */
/********************************************************//**
Rounds ib_uint64_t upward to a multiple of a power of 2.
-@return rounded value */
+@return rounded value */
UNIV_INLINE
ib_uint64_t
ut_uint64_align_up(
@@ -63,7 +63,7 @@ ut_uint64_align_up(
which must be a power of 2 */
/*********************************************************//**
The following function rounds up a pointer to the nearest aligned address.
-@return aligned pointer */
+@return aligned pointer */
UNIV_INLINE
void*
ut_align(
@@ -73,7 +73,7 @@ ut_align(
/*********************************************************//**
The following function rounds down a pointer to the nearest
aligned address.
-@return aligned pointer */
+@return aligned pointer */
UNIV_INLINE
void*
ut_align_down(
@@ -84,7 +84,7 @@ ut_align_down(
/*********************************************************//**
The following function computes the offset of a pointer from the nearest
aligned address.
-@return distance from aligned pointer */
+@return distance from aligned pointer */
UNIV_INLINE
ulint
ut_align_offset(
@@ -94,7 +94,7 @@ ut_align_offset(
MY_ATTRIBUTE((const));
/*****************************************************************//**
Gets the nth bit of a ulint.
-@return TRUE if nth bit is 1; 0th bit is defined to be the least significant */
+@return TRUE if nth bit is 1; 0th bit is defined to be the least significant */
UNIV_INLINE
ibool
ut_bit_get_nth(
@@ -103,7 +103,7 @@ ut_bit_get_nth(
ulint n); /*!< in: nth bit requested */
/*****************************************************************//**
Sets the nth bit of a ulint.
-@return the ulint with the bit set as requested */
+@return the ulint with the bit set as requested */
UNIV_INLINE
ulint
ut_bit_set_nth(
@@ -112,8 +112,6 @@ ut_bit_set_nth(
ulint n, /*!< in: nth bit requested */
ibool val); /*!< in: value for the bit to set */
-#ifndef UNIV_NONINL
#include "ut0byte.ic"
-#endif
#endif
diff --git a/storage/innobase/include/ut0byte.ic b/storage/innobase/include/ut0byte.ic
index 504744ad21e..b5b3d73fea8 100644
--- a/storage/innobase/include/ut0byte.ic
+++ b/storage/innobase/include/ut0byte.ic
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1994, 2009, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1994, 2013, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -25,7 +25,7 @@ Created 5/30/1994 Heikki Tuuri
/*******************************************************//**
Creates a 64-bit integer out of two 32-bit integers.
-@return created integer */
+@return created integer */
UNIV_INLINE
ib_uint64_t
ut_ull_create(
@@ -40,7 +40,7 @@ ut_ull_create(
/********************************************************//**
Rounds a 64-bit integer downward to a multiple of a power of 2.
-@return rounded value */
+@return rounded value */
UNIV_INLINE
ib_uint64_t
ut_uint64_align_down(
@@ -57,7 +57,7 @@ ut_uint64_align_down(
/********************************************************//**
Rounds ib_uint64_t upward to a multiple of a power of 2.
-@return rounded value */
+@return rounded value */
UNIV_INLINE
ib_uint64_t
ut_uint64_align_up(
@@ -76,7 +76,7 @@ ut_uint64_align_up(
/*********************************************************//**
The following function rounds up a pointer to the nearest aligned address.
-@return aligned pointer */
+@return aligned pointer */
UNIV_INLINE
void*
ut_align(
@@ -96,7 +96,7 @@ ut_align(
/*********************************************************//**
The following function rounds down a pointer to the nearest
aligned address.
-@return aligned pointer */
+@return aligned pointer */
UNIV_INLINE
void*
ut_align_down(
@@ -116,7 +116,7 @@ ut_align_down(
/*********************************************************//**
The following function computes the offset of a pointer from the nearest
aligned address.
-@return distance from aligned pointer */
+@return distance from aligned pointer */
UNIV_INLINE
ulint
ut_align_offset(
@@ -135,7 +135,7 @@ ut_align_offset(
/*****************************************************************//**
Gets the nth bit of a ulint.
-@return TRUE if nth bit is 1; 0th bit is defined to be the least significant */
+@return TRUE if nth bit is 1; 0th bit is defined to be the least significant */
UNIV_INLINE
ibool
ut_bit_get_nth(
@@ -152,7 +152,7 @@ ut_bit_get_nth(
/*****************************************************************//**
Sets the nth bit of a ulint.
-@return the ulint with the bit set as requested */
+@return the ulint with the bit set as requested */
UNIV_INLINE
ulint
ut_bit_set_nth(
diff --git a/storage/innobase/include/ut0counter.h b/storage/innobase/include/ut0counter.h
index d2a6c1eb3e3..a04a674751c 100644
--- a/storage/innobase/include/ut0counter.h
+++ b/storage/innobase/include/ut0counter.h
@@ -1,7 +1,7 @@
/*****************************************************************************
-Copyright (c) 2012, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2017, MariaDB Corporation.
+Copyright (c) 2012, 2015, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2017, 2018, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -25,15 +25,20 @@ Counter utility class
Created 2012/04/12 by Sunny Bains
*******************************************************/
-#ifndef UT0COUNTER_H
-#define UT0COUNTER_H
+#ifndef ut0counter_h
+#define ut0counter_h
-#include "univ.i"
-#include <string.h>
#include "os0thread.h"
-#include "os0sync.h"
+#include "my_rdtsc.h"
#include "my_atomic.h"
+/** CPU cache line size */
+#ifdef CPU_LEVEL1_DCACHE_LINESIZE
+# define CACHE_LINE_SIZE CPU_LEVEL1_DCACHE_LINESIZE
+#else
+# error CPU_LEVEL1_DCACHE_LINESIZE is undefined
+#endif /* CPU_LEVEL1_DCACHE_LINESIZE */
+
/** Default number of slots to use in ib_counter_t */
#define IB_N_SLOTS 64
@@ -41,47 +46,46 @@ Created 2012/04/12 by Sunny Bains
template <typename Type, int N>
struct generic_indexer_t {
/** @return offset within m_counter */
- size_t offset(size_t index) const UNIV_NOTHROW {
+ static size_t offset(size_t index) UNIV_NOTHROW
+ {
return(((index % N) + 1) * (CACHE_LINE_SIZE / sizeof(Type)));
}
};
-#ifdef HAVE_SCHED_GETCPU
-#include <utmpx.h>
-/** Use the cpu id to index into the counter array. If it fails then
-use the thread id. */
-template <typename Type, int N>
-struct get_sched_indexer_t : public generic_indexer_t<Type, N> {
- /* @return result from sched_getcpu(), the thread id if it fails. */
- size_t get_rnd_index() const UNIV_NOTHROW {
-
- size_t cpu = sched_getcpu();
- if (cpu == -1) {
- cpu = (lint) os_thread_get_curr_id();
+/** Use the result of my_timer_cycles(), which mainly uses RDTSC for cycles,
+to index into the counter array. See the comments for my_timer_cycles() */
+template <typename Type=ulint, int N=1>
+struct counter_indexer_t : public generic_indexer_t<Type, N> {
+ /** @return result from RDTSC or similar functions. */
+ static size_t get_rnd_index() UNIV_NOTHROW
+ {
+ size_t c = static_cast<size_t>(my_timer_cycles());
+
+ if (c != 0) {
+ return(c);
+ } else {
+ /* We may go here if my_timer_cycles() returns 0,
+ so we have to have the plan B for the counter. */
+#if !defined(_WIN32)
+ return(size_t(os_thread_get_curr_id()));
+#else
+ LARGE_INTEGER cnt;
+ QueryPerformanceCounter(&cnt);
+
+ return(static_cast<size_t>(cnt.QuadPart));
+#endif /* !_WIN32 */
}
-
- return(cpu);
- }
-};
-#endif /* HAVE_SCHED_GETCPU */
-
-/** Use the thread id to index into the counter array. */
-template <typename Type, int N>
-struct thread_id_indexer_t : public generic_indexer_t<Type, N> {
- /* @return a random number, currently we use the thread id. Where
- thread id is represented as a pointer, it may not work as
- effectively. */
- size_t get_rnd_index() const UNIV_NOTHROW {
- return((lint) os_thread_get_curr_id());
}
/** @return a random offset to the array */
- size_t get_rnd_offset() const UNIV_NOTHROW
+ static size_t get_rnd_offset() UNIV_NOTHROW
{
return(generic_indexer_t<Type, N>::offset(get_rnd_index()));
}
};
+#define default_indexer_t counter_indexer_t
+
/** Class for using fuzzy counters. The counter is relaxed atomic
so the results are not guaranteed to be 100% accurate but close
enough. Creates an array of counters and separates each element by the
@@ -89,7 +93,7 @@ CACHE_LINE_SIZE bytes */
template <
typename Type,
int N = IB_N_SLOTS,
- template<typename, int> class Indexer = thread_id_indexer_t>
+ template<typename, int> class Indexer = default_indexer_t>
struct MY_ALIGNED(CACHE_LINE_SIZE) ib_counter_t
{
/** Increment the counter by 1. */
@@ -154,4 +158,4 @@ private:
Type m_counter[(N + 1) * (CACHE_LINE_SIZE / sizeof(Type))];
};
-#endif /* UT0COUNTER_H */
+#endif /* ut0counter_h */
diff --git a/storage/innobase/include/ut0crc32.h b/storage/innobase/include/ut0crc32.h
index 391c239c56d..178be98fee6 100644
--- a/storage/innobase/include/ut0crc32.h
+++ b/storage/innobase/include/ut0crc32.h
@@ -1,6 +1,7 @@
/*****************************************************************************
-Copyright (c) 2011, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2011, 2015, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2016, 2018, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -29,24 +30,29 @@ Created Aug 10, 2011 Vasil Dimov
#include "univ.i"
/********************************************************************//**
-Initializes the data structures used by ut_crc32(). Does not do any
+Initializes the data structures used by ut_crc32*(). Does not do any
allocations, would not hurt if called twice, but would be pointless. */
-UNIV_INTERN
void
ut_crc32_init();
/*===========*/
/********************************************************************//**
Calculates CRC32.
-@param ptr - data over which to calculate CRC32.
-@param len - data length in bytes.
+@param ptr - data over which to calculate CRC32.
+@param len - data length in bytes.
@return CRC32 (CRC-32C, using the GF(2) primitive polynomial 0x11EDC6F41,
or 0x1EDC6F41 without the high-order bit) */
-typedef ib_uint32_t (*ib_ut_crc32_t)(const byte* ptr, ulint len);
+typedef uint32_t (*ut_crc32_func_t)(const byte* ptr, ulint len);
-extern ib_ut_crc32_t ut_crc32;
+/** Pointer to CRC32 calculation function. */
+extern ut_crc32_func_t ut_crc32;
-extern bool ut_crc32_sse2_enabled;
-extern bool ut_crc32_power8_enabled;
+#ifdef INNODB_BUG_ENDIAN_CRC32
+/** Pointer to CRC32 calculation function, which uses big-endian byte order
+when converting byte strings to integers internally. */
+extern ut_crc32_func_t ut_crc32_legacy_big_endian;
+#endif /* INNODB_BUG_ENDIAN_CRC32 */
+
+extern const char* ut_crc32_implementation;
#endif /* ut0crc32_h */
diff --git a/storage/innobase/include/ut0dbg.h b/storage/innobase/include/ut0dbg.h
index efc670184b8..5d3fa1cf987 100644
--- a/storage/innobase/include/ut0dbg.h
+++ b/storage/innobase/include/ut0dbg.h
@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 1994, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2017, MariaDB Corporation.
+Copyright (c) 2017, 2019, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -33,100 +33,146 @@ Created 1/30/1994 Heikki Tuuri
#define ut_error assert(0)
#else /* !UNIV_INNOCHECKSUM */
-#include "univ.i"
-#include <stdlib.h>
-#include "os0thread.h"
-
-#if defined(__GNUC__) && (__GNUC__ > 2)
-/** Test if an assertion fails.
-@param EXPR assertion expression
-@return nonzero if EXPR holds, zero if not */
-# define UT_DBG_FAIL(EXPR) UNIV_UNLIKELY(!((ulint)(EXPR)))
-#else
-/** This is used to eliminate compiler warnings */
-extern ulint ut_dbg_zero;
-/** Test if an assertion fails.
-@param EXPR assertion expression
-@return nonzero if EXPR holds, zero if not */
-# define UT_DBG_FAIL(EXPR) !((ulint)(EXPR) + ut_dbg_zero)
-#endif
+/* Do not include univ.i because univ.i includes this. */
/*************************************************************//**
Report a failed assertion. */
-UNIV_INTERN
+ATTRIBUTE_NORETURN ATTRIBUTE_COLD __attribute__((nonnull(2)))
void
ut_dbg_assertion_failed(
/*====================*/
const char* expr, /*!< in: the failed assertion */
const char* file, /*!< in: source file containing the assertion */
- ulint line) /*!< in: line number of the assertion */
- UNIV_COLD MY_ATTRIBUTE((nonnull(2)));
-
-/** Abort the execution. */
-# define UT_DBG_PANIC abort()
+ unsigned line); /*!< in: line number of the assertion */
/** Abort execution if EXPR does not evaluate to nonzero.
-@param EXPR assertion expression that should hold */
+@param EXPR assertion expression that should hold */
#define ut_a(EXPR) do { \
- if (UT_DBG_FAIL(EXPR)) { \
+ if (UNIV_UNLIKELY(!(ulint) (EXPR))) { \
ut_dbg_assertion_failed(#EXPR, \
- __FILE__, (ulint) __LINE__); \
- UT_DBG_PANIC; \
+ __FILE__, __LINE__); \
} \
} while (0)
/** Abort execution. */
-#define ut_error do { \
- ut_dbg_assertion_failed(0, __FILE__, (ulint) __LINE__); \
- UT_DBG_PANIC; \
-} while (0)
+#define ut_error \
+ ut_dbg_assertion_failed(0, __FILE__, __LINE__)
+/** Debug assertion */
+#define ut_ad DBUG_ASSERT
#ifdef UNIV_DEBUG
-/** Debug assertion. Does nothing unless UNIV_DEBUG is defined. */
-#define ut_ad(EXPR) ut_a(EXPR)
/** Debug statement. Does nothing unless UNIV_DEBUG is defined. */
#define ut_d(EXPR) EXPR
#else
-/** Debug assertion. Does nothing unless UNIV_DEBUG is defined. */
-#define ut_ad(EXPR)
/** Debug statement. Does nothing unless UNIV_DEBUG is defined. */
#define ut_d(EXPR)
#endif
-/** Silence warnings about an unused variable by doing a null assignment.
-@param A the unused variable */
-#define UT_NOT_USED(A) A = A
+#if defined(HAVE_SYS_TIME_H) && defined(HAVE_SYS_RESOURCE_H)
-#ifdef UNIV_COMPILE_TEST_FUNCS
+#define HAVE_UT_CHRONO_T
#include <sys/types.h>
#include <sys/time.h>
#include <sys/resource.h>
-/** structure used for recording usage statistics */
-struct speedo_t {
- struct rusage ru; /*!< getrusage() result */
- struct timeval tv; /*!< gettimeofday() result */
+/** A "chronometer" used to clock snippets of code.
+Example usage:
+ ut_chrono_t ch("this loop");
+ for (;;) { ... }
+ ch.show();
+would print the timings of the for() loop, prefixed with "this loop:" */
+class ut_chrono_t {
+public:
+ /** Constructor.
+ @param[in] name chrono's name, used when showing the values */
+ ut_chrono_t(
+ const char* name)
+ :
+ m_name(name),
+ m_show_from_destructor(true)
+ {
+ reset();
+ }
+
+ /** Resets the chrono (records the current time in it). */
+ void
+ reset()
+ {
+ gettimeofday(&m_tv, NULL);
+
+ getrusage(RUSAGE_SELF, &m_ru);
+ }
+
+ /** Shows the time elapsed and usage statistics since the last reset. */
+ void
+ show()
+ {
+ struct rusage ru_now;
+ struct timeval tv_now;
+ struct timeval tv_diff;
+
+ getrusage(RUSAGE_SELF, &ru_now);
+
+ gettimeofday(&tv_now, NULL);
+
+#ifndef timersub
+#define timersub(a, b, r) \
+ do { \
+ (r)->tv_sec = (a)->tv_sec - (b)->tv_sec; \
+ (r)->tv_usec = (a)->tv_usec - (b)->tv_usec; \
+ if ((r)->tv_usec < 0) { \
+ (r)->tv_sec--; \
+ (r)->tv_usec += 1000000; \
+ } \
+ } while (0)
+#endif /* timersub */
+
+#define CHRONO_PRINT(type, tvp) \
+ fprintf(stderr, "%s: %s% 5ld.%06ld sec\n", \
+ m_name, type, \
+ static_cast<long>((tvp)->tv_sec), \
+ static_cast<long>((tvp)->tv_usec))
+
+ timersub(&tv_now, &m_tv, &tv_diff);
+ CHRONO_PRINT("real", &tv_diff);
+
+ timersub(&ru_now.ru_utime, &m_ru.ru_utime, &tv_diff);
+ CHRONO_PRINT("user", &tv_diff);
+
+ timersub(&ru_now.ru_stime, &m_ru.ru_stime, &tv_diff);
+ CHRONO_PRINT("sys ", &tv_diff);
+ }
+
+ /** Cause the timings not to be printed from the destructor. */
+ void end()
+ {
+ m_show_from_destructor = false;
+ }
+
+ /** Destructor. */
+ ~ut_chrono_t()
+ {
+ if (m_show_from_destructor) {
+ show();
+ }
+ }
+
+private:
+ /** Name of this chronometer. */
+ const char* m_name;
+
+ /** True if the current timings should be printed by the destructor. */
+ bool m_show_from_destructor;
+
+ /** getrusage() result as of the last reset(). */
+ struct rusage m_ru;
+
+ /** gettimeofday() result as of the last reset(). */
+ struct timeval m_tv;
};
-/*******************************************************************//**
-Resets a speedo (records the current time in it). */
-UNIV_INTERN
-void
-speedo_reset(
-/*=========*/
- speedo_t* speedo); /*!< out: speedo */
-
-/*******************************************************************//**
-Shows the time elapsed and usage statistics since the last reset of a
-speedo. */
-UNIV_INTERN
-void
-speedo_show(
-/*========*/
- const speedo_t* speedo); /*!< in: speedo */
-
-#endif /* UNIV_COMPILE_TEST_FUNCS */
+#endif /* HAVE_SYS_TIME_H && HAVE_SYS_RESOURCE_H */
#endif /* !UNIV_INNOCHECKSUM */
diff --git a/storage/innobase/include/ut0list.h b/storage/innobase/include/ut0list.h
index 4f35624acbf..7e27e10884b 100644
--- a/storage/innobase/include/ut0list.h
+++ b/storage/innobase/include/ut0list.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 2006, 2009, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2006, 2016, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -54,46 +54,21 @@ struct ib_list_node_t;
/****************************************************************//**
Create a new list using mem_alloc. Lists created with this function must be
freed with ib_list_free.
-@return list */
-UNIV_INTERN
+@return list */
ib_list_t*
ib_list_create(void);
/*=================*/
-
-/****************************************************************//**
-Create a new list using the given heap. ib_list_free MUST NOT BE CALLED for
-lists created with this function.
-@return list */
-UNIV_INTERN
-ib_list_t*
-ib_list_create_heap(
-/*================*/
- mem_heap_t* heap); /*!< in: memory heap to use */
-
/****************************************************************//**
Free a list. */
-UNIV_INTERN
void
ib_list_free(
/*=========*/
ib_list_t* list); /*!< in: list */
/****************************************************************//**
-Add the data to the start of the list.
-@return new list node */
-UNIV_INTERN
-ib_list_node_t*
-ib_list_add_first(
-/*==============*/
- ib_list_t* list, /*!< in: list */
- void* data, /*!< in: data */
- mem_heap_t* heap); /*!< in: memory heap to use */
-
-/****************************************************************//**
Add the data to the end of the list.
-@return new list node */
-UNIV_INTERN
+@return new list node */
ib_list_node_t*
ib_list_add_last(
/*=============*/
@@ -102,21 +77,7 @@ ib_list_add_last(
mem_heap_t* heap); /*!< in: memory heap to use */
/****************************************************************//**
-Add the data after the indicated node.
-@return new list node */
-UNIV_INTERN
-ib_list_node_t*
-ib_list_add_after(
-/*==============*/
- ib_list_t* list, /*!< in: list */
- ib_list_node_t* prev_node, /*!< in: node preceding new node (can
- be NULL) */
- void* data, /*!< in: data */
- mem_heap_t* heap); /*!< in: memory heap to use */
-
-/****************************************************************//**
Remove the node from the list. */
-UNIV_INTERN
void
ib_list_remove(
/*===========*/
@@ -125,7 +86,7 @@ ib_list_remove(
/****************************************************************//**
Get the first node in the list.
-@return first node, or NULL */
+@return first node, or NULL */
UNIV_INLINE
ib_list_node_t*
ib_list_get_first(
@@ -134,7 +95,7 @@ ib_list_get_first(
/****************************************************************//**
Get the last node in the list.
-@return last node, or NULL */
+@return last node, or NULL */
UNIV_INLINE
ib_list_node_t*
ib_list_get_last(
@@ -163,8 +124,6 @@ ib_list_len(
struct ib_list_t {
ib_list_node_t* first; /*!< first node */
ib_list_node_t* last; /*!< last node */
- ibool is_heap_list; /*!< TRUE if this list was
- allocated through a heap */
};
/* A list node. */
@@ -182,8 +141,6 @@ struct ib_list_helper_t {
void* data; /*!< user data */
};
-#ifndef UNIV_NONINL
#include "ut0list.ic"
-#endif
#endif
diff --git a/storage/innobase/include/ut0list.ic b/storage/innobase/include/ut0list.ic
index 89f411a2a64..3bdba52bfaa 100644
--- a/storage/innobase/include/ut0list.ic
+++ b/storage/innobase/include/ut0list.ic
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 2006, 2009, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2006, 2013, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -25,7 +25,7 @@ Created 4/26/2006 Osku Salerma
/****************************************************************//**
Get the first node in the list.
-@return first node, or NULL */
+@return first node, or NULL */
UNIV_INLINE
ib_list_node_t*
ib_list_get_first(
@@ -37,7 +37,7 @@ ib_list_get_first(
/****************************************************************//**
Get the last node in the list.
-@return last node, or NULL */
+@return last node, or NULL */
UNIV_INLINE
ib_list_node_t*
ib_list_get_last(
diff --git a/storage/innobase/include/ut0lst.h b/storage/innobase/include/ut0lst.h
index 741d55dca67..9a5f3059826 100644
--- a/storage/innobase/include/ut0lst.h
+++ b/storage/innobase/include/ut0lst.h
@@ -1,6 +1,7 @@
/*****************************************************************************
-Copyright (c) 1995, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1995, 2015, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2019, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -21,122 +22,150 @@ this program; if not, write to the Free Software Foundation, Inc.,
List utilities
Created 9/10/1995 Heikki Tuuri
+Rewritten by Sunny Bains Dec 2011.
***********************************************************************/
#ifndef ut0lst_h
#define ut0lst_h
-#include "univ.i"
+/* Do not include univ.i because univ.i includes this. */
+
+#include "ut0dbg.h"
+
+/* This module implements the two-way linear list. Note that a single
+list node may belong to two or more lists, but is only on one list
+at a time. */
/*******************************************************************//**
-Return offset of F in POD T.
-@param T - POD pointer
-@param F - Field in T */
-#define IB_OFFSETOF(T, F) \
- (reinterpret_cast<byte*>(&(T)->F) - reinterpret_cast<byte*>(T))
+The two way list node.
+@param TYPE the list node type name */
+template <typename Type>
+struct ut_list_node {
+ Type* prev; /*!< pointer to the previous
+ node, NULL if start of list */
+ Type* next; /*!< pointer to next node,
+ NULL if end of list */
+
+ void reverse()
+ {
+ Type* tmp = prev;
+ prev = next;
+ next = tmp;
+ }
+};
-/* This module implements the two-way linear list which should be used
-if a list is used in the database. Note that a single struct may belong
-to two or more lists, provided that the list are given different names.
-An example of the usage of the lists can be found in fil0fil.cc. */
+/** Macro used for legacy reasons */
+#define UT_LIST_NODE_T(t) ut_list_node<t>
/*******************************************************************//**
-This macro expands to the unnamed type definition of a struct which acts
-as the two-way list base node. The base node contains pointers
-to both ends of the list and a count of nodes in the list (excluding
-the base node from the count).
-@param TYPE the name of the list node data type */
-template <typename TYPE>
+The two-way list base node. The base node contains pointers to both ends
+of the list and a count of nodes in the list (excluding the base node
+from the count). We also store a pointer to the member field so that it
+doesn't have to be specified when doing list operations.
+@param Type the type of the list element
+@param NodePtr field member pointer that points to the list node */
+template <typename Type, typename NodePtr>
struct ut_list_base {
- typedef TYPE elem_type;
-
- ulint count; /*!< count of nodes in list */
- TYPE* start; /*!< pointer to list start, NULL if empty */
- TYPE* end; /*!< pointer to list end, NULL if empty */
+ typedef Type elem_type;
+ typedef NodePtr node_ptr;
+ typedef ut_list_node<Type> node_type;
+
+ ulint count; /*!< count of nodes in list */
+ elem_type* start; /*!< pointer to list start,
+ NULL if empty */
+ elem_type* end; /*!< pointer to list end,
+ NULL if empty */
+ node_ptr node; /*!< Pointer to member field
+ that is used as a link node */
+#ifdef UNIV_DEBUG
+ ulint init; /*!< UT_LIST_INITIALISED if
+ the list was initialised with
+ UT_LIST_INIT() */
+#endif /* UNIV_DEBUG */
+
+ void reverse()
+ {
+ Type* tmp = start;
+ start = end;
+ end = tmp;
+ }
};
-#define UT_LIST_BASE_NODE_T(TYPE) ut_list_base<TYPE>
+#define UT_LIST_BASE_NODE_T(t) ut_list_base<t, ut_list_node<t> t::*>
+
+#ifdef UNIV_DEBUG
+# define UT_LIST_INITIALISED 0xCAFE
+# define UT_LIST_INITIALISE(b) (b).init = UT_LIST_INITIALISED
+# define UT_LIST_IS_INITIALISED(b) ut_a(((b).init == UT_LIST_INITIALISED))
+#else
+# define UT_LIST_INITIALISE(b)
+# define UT_LIST_IS_INITIALISED(b)
+#endif /* UNIV_DEBUG */
/*******************************************************************//**
-This macro expands to the unnamed type definition of a struct which
-should be embedded in the nodes of the list, the node type must be a struct.
-This struct contains the pointers to next and previous nodes in the list.
-The name of the field in the node struct should be the name given
-to the list.
-@param TYPE the list node type name */
-/* Example:
-struct LRU_node_t {
- UT_LIST_NODE_T(LRU_node_t) LRU_list;
- ...
+Note: This is really the list constructor. We should be able to use
+placement new here.
+Initializes the base node of a two-way list.
+@param b the list base node
+@param pmf point to member field that will be used as the link node */
+#define UT_LIST_INIT(b, pmf) \
+{ \
+ (b).count = 0; \
+ (b).start = 0; \
+ (b).end = 0; \
+ (b).node = pmf; \
+ UT_LIST_INITIALISE(b); \
}
-The example implements an LRU list of name LRU_list. Its nodes are of type
-LRU_node_t. */
-template <typename TYPE>
-struct ut_list_node {
- TYPE* prev; /*!< pointer to the previous node,
- NULL if start of list */
- TYPE* next; /*!< pointer to next node, NULL if end of list */
-};
+/** Functor for accessing the embedded node within a list element. This is
+required because some lists can have the node emebedded inside a nested
+struct/union. See lock0priv.h (table locks) for an example. It provides a
+specialised functor to grant access to the list node. */
+template <typename Type>
+struct GenericGetNode {
-#define UT_LIST_NODE_T(TYPE) ut_list_node<TYPE>
+ typedef ut_list_node<Type> node_type;
-/*******************************************************************//**
-Get the list node at offset.
-@param elem - list element
-@param offset - offset within element.
-@return reference to list node. */
-template <typename Type>
-ut_list_node<Type>&
-ut_elem_get_node(Type& elem, size_t offset)
-{
- ut_a(offset < sizeof(elem));
+ GenericGetNode(node_type Type::* node) : m_node(node) {}
- return(*reinterpret_cast<ut_list_node<Type>*>(
- reinterpret_cast<byte*>(&elem) + offset));
-}
+ node_type& operator() (Type& elem)
+ {
+ return(elem.*m_node);
+ }
-/*******************************************************************//**
-Initializes the base node of a two-way list.
-@param BASE the list base node
-*/
-#define UT_LIST_INIT(BASE)\
-{\
- (BASE).count = 0;\
- (BASE).start = NULL;\
- (BASE).end = NULL;\
-}\
+ node_type Type::*m_node;
+};
/*******************************************************************//**
Adds the node as the first element in a two-way linked list.
-@param list the base node (not a pointer to it)
-@param elem the element to add
-@param offset offset of list node in elem. */
-template <typename List, typename Type>
+@param list the base node (not a pointer to it)
+@param elem the element to add */
+template <typename List>
void
ut_list_prepend(
- List& list,
- Type& elem,
- size_t offset)
+ List& list,
+ typename List::elem_type* elem)
{
- ut_list_node<Type>& elem_node = ut_elem_get_node(elem, offset);
+ typename List::node_type& elem_node = elem->*list.node;
+
+ UT_LIST_IS_INITIALISED(list);
- elem_node.prev = 0;
- elem_node.next = list.start;
+ elem_node.prev = 0;
+ elem_node.next = list.start;
if (list.start != 0) {
- ut_list_node<Type>& base_node =
- ut_elem_get_node(*list.start, offset);
+ typename List::node_type& base_node =
+ list.start->*list.node;
- ut_ad(list.start != &elem);
+ ut_ad(list.start != elem);
- base_node.prev = &elem;
+ base_node.prev = elem;
}
- list.start = &elem;
+ list.start = elem;
if (list.end == 0) {
- list.end = &elem;
+ list.end = elem;
}
++list.count;
@@ -144,42 +173,41 @@ ut_list_prepend(
/*******************************************************************//**
Adds the node as the first element in a two-way linked list.
-@param NAME list name
-@param LIST the base node (not a pointer to it)
-@param ELEM the element to add */
-#define UT_LIST_ADD_FIRST(NAME, LIST, ELEM) \
- ut_list_prepend(LIST, *ELEM, IB_OFFSETOF(ELEM, NAME))
+@param LIST the base node (not a pointer to it)
+@param ELEM the element to add */
+#define UT_LIST_ADD_FIRST(LIST, ELEM) ut_list_prepend(LIST, ELEM)
/*******************************************************************//**
Adds the node as the last element in a two-way linked list.
-@param list list
-@param elem the element to add
-@param offset offset of list node in elem */
-template <typename List, typename Type>
+@param list list
+@param elem the element to add
+@param get_node to get the list node for that element */
+template <typename List, typename Functor>
void
ut_list_append(
- List& list,
- Type& elem,
- size_t offset)
+ List& list,
+ typename List::elem_type* elem,
+ Functor get_node)
{
- ut_list_node<Type>& elem_node = ut_elem_get_node(elem, offset);
+ typename List::node_type& node = get_node(*elem);
- elem_node.next = 0;
- elem_node.prev = list.end;
+ UT_LIST_IS_INITIALISED(list);
+
+ node.next = 0;
+ node.prev = list.end;
if (list.end != 0) {
- ut_list_node<Type>& base_node =
- ut_elem_get_node(*list.end, offset);
+ typename List::node_type& base_node = get_node(*list.end);
- ut_ad(list.end != &elem);
+ ut_ad(list.end != elem);
- base_node.next = &elem;
+ base_node.next = elem;
}
- list.end = &elem;
+ list.end = elem;
if (list.start == 0) {
- list.start = &elem;
+ list.start = elem;
}
++list.count;
@@ -187,45 +215,57 @@ ut_list_append(
/*******************************************************************//**
Adds the node as the last element in a two-way linked list.
-@param NAME list name
-@param LIST list
-@param ELEM the element to add */
-#define UT_LIST_ADD_LAST(NAME, LIST, ELEM)\
- ut_list_append(LIST, *ELEM, IB_OFFSETOF(ELEM, NAME))
+@param list list
+@param elem the element to add */
+template <typename List>
+void
+ut_list_append(
+ List& list,
+ typename List::elem_type* elem)
+{
+ ut_list_append(
+ list, elem,
+ GenericGetNode<typename List::elem_type>(list.node));
+}
+
+/*******************************************************************//**
+Adds the node as the last element in a two-way linked list.
+@param LIST list base node (not a pointer to it)
+@param ELEM the element to add */
+#define UT_LIST_ADD_LAST(LIST, ELEM) ut_list_append(LIST, ELEM)
/*******************************************************************//**
Inserts a ELEM2 after ELEM1 in a list.
-@param list the base node
-@param elem1 node after which ELEM2 is inserted
-@param elem2 node being inserted after NODE1
-@param offset offset of list node in elem1 and elem2 */
-template <typename List, typename Type>
+@param list the base node
+@param elem1 node after which ELEM2 is inserted
+@param elem2 node being inserted after ELEM1 */
+template <typename List>
void
ut_list_insert(
- List& list,
- Type& elem1,
- Type& elem2,
- size_t offset)
+ List& list,
+ typename List::elem_type* elem1,
+ typename List::elem_type* elem2)
{
- ut_ad(&elem1 != &elem2);
+ ut_ad(elem1 != elem2);
+ UT_LIST_IS_INITIALISED(list);
- ut_list_node<Type>& elem1_node = ut_elem_get_node(elem1, offset);
- ut_list_node<Type>& elem2_node = ut_elem_get_node(elem2, offset);
+ typename List::node_type& elem1_node = elem1->*list.node;
+ typename List::node_type& elem2_node = elem2->*list.node;
- elem2_node.prev = &elem1;
+ elem2_node.prev = elem1;
elem2_node.next = elem1_node.next;
if (elem1_node.next != NULL) {
- ut_list_node<Type>& next_node =
- ut_elem_get_node(*elem1_node.next, offset);
+ typename List::node_type& next_node =
+ elem1_node.next->*list.node;
- next_node.prev = &elem2;
+ next_node.prev = elem2;
}
- elem1_node.next = &elem2;
+ elem1_node.next = elem2;
- if (list.end == &elem1) {
- list.end = &elem2;
+ if (list.end == elem1) {
+ list.end = elem2;
}
++list.count;
@@ -233,132 +273,174 @@ ut_list_insert(
/*******************************************************************//**
Inserts a ELEM2 after ELEM1 in a list.
-@param NAME list name
-@param LIST the base node
-@param ELEM1 node after which ELEM2 is inserted
-@param ELEM2 node being inserted after ELEM1 */
-#define UT_LIST_INSERT_AFTER(NAME, LIST, ELEM1, ELEM2)\
- ut_list_insert(LIST, *ELEM1, *ELEM2, IB_OFFSETOF(ELEM1, NAME))
-
-#ifdef UNIV_LIST_DEBUG
-/** Invalidate the pointers in a list node.
-@param NAME list name
-@param N pointer to the node that was removed */
-# define UT_LIST_REMOVE_CLEAR(N) \
- (N).next = (Type*) -1; \
- (N).prev = (N).next
-#else
-/** Invalidate the pointers in a list node.
-@param NAME list name
-@param N pointer to the node that was removed */
-# define UT_LIST_REMOVE_CLEAR(N)
-#endif /* UNIV_LIST_DEBUG */
+@param LIST list base node (not a pointer to it)
+@param ELEM1 node after which ELEM2 is inserted
+@param ELEM2 node being inserted after ELEM1 */
+#define UT_LIST_INSERT_AFTER(LIST, ELEM1, ELEM2) \
+ ut_list_insert(LIST, ELEM1, ELEM2)
+
+/*******************************************************************//**
+Inserts a ELEM2 after ELEM1 in a list.
+@param list the base node
+@param elem1 node after which ELEM2 is inserted
+@param elem2 node being inserted after ELEM1
+@param get_node to get the list node for that element */
+
+template <typename List, typename Functor>
+void
+ut_list_insert(
+ List& list,
+ typename List::elem_type* elem1,
+ typename List::elem_type* elem2,
+ Functor get_node)
+{
+ ut_ad(elem1 != elem2);
+ UT_LIST_IS_INITIALISED(list);
+
+ typename List::node_type& elem1_node = get_node(*elem1);
+ typename List::node_type& elem2_node = get_node(*elem2);
+
+ elem2_node.prev = elem1;
+ elem2_node.next = elem1_node.next;
+
+ if (elem1_node.next != NULL) {
+ typename List::node_type& next_node =
+ get_node(*elem1_node.next);
+
+ next_node.prev = elem2;
+ }
+
+ elem1_node.next = elem2;
+
+ if (list.end == elem1) {
+ list.end = elem2;
+ }
+ ++list.count;
+
+}
/*******************************************************************//**
Removes a node from a two-way linked list.
-@param list the base node (not a pointer to it)
-@param elem node to be removed from the list
-@param offset offset of list node within elem */
-template <typename List, typename Type>
+@param list the base node (not a pointer to it)
+@param node member node within list element that is to be removed
+@param get_node functor to get the list node from elem */
+template <typename List, typename Functor>
void
ut_list_remove(
- List& list,
- Type& elem,
- size_t offset)
+ List& list,
+ typename List::node_type& node,
+ Functor get_node)
{
- ut_list_node<Type>& elem_node = ut_elem_get_node(elem, offset);
-
ut_a(list.count > 0);
+ UT_LIST_IS_INITIALISED(list);
- if (elem_node.next != NULL) {
- ut_list_node<Type>& next_node =
- ut_elem_get_node(*elem_node.next, offset);
+ if (node.next != NULL) {
+ typename List::node_type& next_node =
+ get_node(*node.next);
- next_node.prev = elem_node.prev;
+ next_node.prev = node.prev;
} else {
- list.end = elem_node.prev;
+ list.end = node.prev;
}
- if (elem_node.prev != NULL) {
- ut_list_node<Type>& prev_node =
- ut_elem_get_node(*elem_node.prev, offset);
+ if (node.prev != NULL) {
+ typename List::node_type& prev_node =
+ get_node(*node.prev);
- prev_node.next = elem_node.next;
+ prev_node.next = node.next;
} else {
- list.start = elem_node.next;
+ list.start = node.next;
}
- UT_LIST_REMOVE_CLEAR(elem_node);
+ node.next = 0;
+ node.prev = 0;
--list.count;
}
/*******************************************************************//**
Removes a node from a two-way linked list.
- aram NAME list name
-@param LIST the base node (not a pointer to it)
-@param ELEM node to be removed from the list */
-#define UT_LIST_REMOVE(NAME, LIST, ELEM) \
- ut_list_remove(LIST, *ELEM, IB_OFFSETOF(ELEM, NAME))
+@param list the base node (not a pointer to it)
+@param elem element to be removed from the list
+@param get_node functor to get the list node from elem */
+template <typename List, typename Functor>
+void
+ut_list_remove(
+ List& list,
+ typename List::elem_type* elem,
+ Functor get_node)
+{
+ ut_list_remove(list, get_node(*elem), get_node);
+}
+
+/*******************************************************************//**
+Removes a node from a two-way linked list.
+@param list the base node (not a pointer to it)
+@param elem element to be removed from the list */
+template <typename List>
+void
+ut_list_remove(
+ List& list,
+ typename List::elem_type* elem)
+{
+ ut_list_remove(
+ list, elem->*list.node,
+ GenericGetNode<typename List::elem_type>(list.node));
+}
+
+/*******************************************************************//**
+Removes a node from a two-way linked list.
+@param LIST the base node (not a pointer to it)
+@param ELEM node to be removed from the list */
+#define UT_LIST_REMOVE(LIST, ELEM) ut_list_remove(LIST, ELEM)
/********************************************************************//**
Gets the next node in a two-way list.
-@param NAME list name
-@param N pointer to a node
-@return the successor of N in NAME, or NULL */
-#define UT_LIST_GET_NEXT(NAME, N)\
- (((N)->NAME).next)
+@param NAME list name
+@param N pointer to a node
+@return the successor of N in NAME, or NULL */
+#define UT_LIST_GET_NEXT(NAME, N) (((N)->NAME).next)
/********************************************************************//**
Gets the previous node in a two-way list.
-@param NAME list name
-@param N pointer to a node
-@return the predecessor of N in NAME, or NULL */
-#define UT_LIST_GET_PREV(NAME, N)\
- (((N)->NAME).prev)
+@param NAME list name
+@param N pointer to a node
+@return the predecessor of N in NAME, or NULL */
+#define UT_LIST_GET_PREV(NAME, N) (((N)->NAME).prev)
/********************************************************************//**
Alternative macro to get the number of nodes in a two-way list, i.e.,
its length.
-@param BASE the base node (not a pointer to it).
-@return the number of nodes in the list */
-#define UT_LIST_GET_LEN(BASE)\
- (BASE).count
+@param BASE the base node (not a pointer to it).
+@return the number of nodes in the list */
+#define UT_LIST_GET_LEN(BASE) (BASE).count
/********************************************************************//**
Gets the first node in a two-way list.
-@param BASE the base node (not a pointer to it)
-@return first node, or NULL if the list is empty */
-#define UT_LIST_GET_FIRST(BASE)\
- (BASE).start
+@param BASE the base node (not a pointer to it)
+@return first node, or NULL if the list is empty */
+#define UT_LIST_GET_FIRST(BASE) (BASE).start
/********************************************************************//**
Gets the last node in a two-way list.
-@param BASE the base node (not a pointer to it)
-@return last node, or NULL if the list is empty */
-#define UT_LIST_GET_LAST(BASE)\
- (BASE).end
+@param BASE the base node (not a pointer to it)
+@return last node, or NULL if the list is empty */
+#define UT_LIST_GET_LAST(BASE) (BASE).end
-struct NullValidate { void operator()(const void* elem) { } };
+struct NullValidate { void operator()(const void*) const {} };
-/********************************************************************//**
-Iterate over all the elements and call the functor for each element.
-@param list base node (not a pointer to it)
-@param functor Functor that is called for each element in the list
-@parm node pointer to member node within list element */
+/** Iterate over all the elements and call the functor for each element.
+@param[in] list base node (not a pointer to it)
+@param[in,out] functor Functor that is called for each element in the list */
template <typename List, class Functor>
-void
-ut_list_map(
- List& list,
- ut_list_node<typename List::elem_type>
- List::elem_type::*node,
- Functor functor)
+inline void ut_list_map(const List& list, Functor& functor)
{
- ulint count = 0;
+ ulint count = 0;
- for (typename List::elem_type* elem = list.start;
- elem != 0;
- elem = (elem->*node).next, ++count) {
+ UT_LIST_IS_INITIALISED(list);
+
+ for (typename List::elem_type* elem = list.start; elem;
+ elem = (elem->*list.node).next, ++count) {
functor(elem);
}
@@ -366,43 +448,121 @@ ut_list_map(
ut_a(count == list.count);
}
-/********************************************************************//**
-Checks the consistency of a two-way list.
-@param list base node (not a pointer to it)
-@param functor Functor that is called for each element in the list
-@parm node pointer to member node within list element */
+/** Iterate over all the elements and call the functor for each element.
+@param[in] list base node (not a pointer to it)
+@param[in] functor Functor that is called for each element in the list */
template <typename List, class Functor>
-void
-ut_list_validate(
- List& list,
- ut_list_node<typename List::elem_type>
- List::elem_type::*node,
- Functor functor = NullValidate())
+inline void ut_list_map(const List& list, const Functor& functor)
{
- ut_list_map(list, node, functor);
+ ulint count = 0;
+ UT_LIST_IS_INITIALISED(list);
+
+ for (typename List::elem_type* elem = list.start; elem;
+ elem = (elem->*list.node).next, ++count) {
+
+ functor(elem);
+ }
+
+ ut_a(count == list.count);
+}
+
+/** Check the consistency of a doubly linked list.
+@param[in] list base node (not a pointer to it)
+@param[in,out] functor Functor that is called for each element in the list */
+template <typename List, class Functor>
+void ut_list_validate(const List& list, Functor& functor)
+{
+ ut_list_map(list, functor);
+
+ /* Validate the list backwards. */
ulint count = 0;
for (typename List::elem_type* elem = list.end;
elem != 0;
- elem = (elem->*node).prev, ++count) {
+ elem = (elem->*list.node).prev) {
+ ++count;
+ }
- functor(elem);
+ ut_a(count == list.count);
+}
+
+/** Check the consistency of a doubly linked list.
+@param[in] list base node (not a pointer to it)
+@param[in] functor Functor that is called for each element in the list */
+template <typename List, class Functor>
+inline void ut_list_validate(const List& list, const Functor& functor)
+{
+ ut_list_map(list, functor);
+
+ /* Validate the list backwards. */
+ ulint count = 0;
+
+ for (typename List::elem_type* elem = list.end;
+ elem != 0;
+ elem = (elem->*list.node).prev) {
+ ++count;
}
ut_a(count == list.count);
}
-/********************************************************************//**
-Checks the consistency of a two-way list.
-@param NAME the name of the list
-@param TYPE node type
-@param LIST base node (not a pointer to it)
-@param FUNCTOR called for each list element */
-#define UT_LIST_VALIDATE(NAME, TYPE, LIST, FUNCTOR) \
- ut_list_validate(LIST, &TYPE::NAME, FUNCTOR)
-
-#define UT_LIST_CHECK(NAME, TYPE, LIST) \
- ut_list_validate(LIST, &TYPE::NAME, NullValidate())
+template <typename List>
+inline void ut_list_validate(const List& list)
+{
+ ut_list_validate(list, NullValidate());
+}
+
+#ifdef UNIV_DEBUG
+template <typename List>
+inline void ut_list_reverse(List& list)
+{
+ UT_LIST_IS_INITIALISED(list);
+
+ for (typename List::elem_type* elem = list.start;
+ elem != 0;
+ elem = (elem->*list.node).prev) {
+ (elem->*list.node).reverse();
+ }
+
+ list.reverse();
+}
+
+/** Check if the given element exists in the list.
+@param[in,out] list the list object
+@param[in] elem the element of the list which will be checked */
+template <typename List>
+inline bool ut_list_exists(const List& list, typename List::elem_type* elem)
+{
+ for (typename List::elem_type* e1 = UT_LIST_GET_FIRST(list); e1;
+ e1 = (e1->*list.node).next) {
+ if (elem == e1) {
+ return true;
+ }
+ }
+ return false;
+}
+#endif
+
+/** Move the given element to the beginning of the list.
+@param[in,out] list the list object
+@param[in] elem the element of the list which will be moved
+ to the beginning of the list. */
+template <typename List>
+void
+ut_list_move_to_front(
+ List& list,
+ typename List::elem_type* elem)
+{
+ ut_ad(ut_list_exists(list, elem));
+
+ if (UT_LIST_GET_FIRST(list) != elem) {
+ ut_list_remove(list, elem);
+ ut_list_prepend(list, elem);
+ }
+}
+
+#ifdef UNIV_DEBUG
+#endif
#endif /* ut0lst.h */
diff --git a/storage/innobase/include/ut0mem.h b/storage/innobase/include/ut0mem.h
index 12f93764dfa..32d557d4f2a 100644
--- a/storage/innobase/include/ut0mem.h
+++ b/storage/innobase/include/ut0mem.h
@@ -1,6 +1,7 @@
/*****************************************************************************
Copyright (c) 1994, 2016, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2019, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -26,199 +27,70 @@ Created 5/30/1994 Heikki Tuuri
#ifndef ut0mem_h
#define ut0mem_h
-#include "univ.i"
-#include <string.h>
-#ifndef UNIV_HOTBACKUP
-# include "os0sync.h"
-
-/** The total amount of memory currently allocated from the operating
-system with os_mem_alloc_large() or malloc(). Does not count malloc()
-if srv_use_sys_malloc is set. Protected by ut_list_mutex. */
-extern ulint ut_total_allocated_memory;
-
-/** Mutex protecting ut_total_allocated_memory and ut_mem_block_list */
-extern os_fast_mutex_t ut_list_mutex;
-#endif /* !UNIV_HOTBACKUP */
+#include "os0event.h"
+#include "ut0mutex.h"
/** Wrapper for memcpy(3). Copy memory area when the source and
target are not overlapping.
-* @param dest in: copy to
-* @param sour in: copy from
-* @param n in: number of bytes to copy
-* @return dest */
+@param[in,out] dest copy to
+@param[in] src copy from
+@param[in] n number of bytes to copy
+@return dest */
UNIV_INLINE
void*
-ut_memcpy(void* dest, const void* sour, ulint n);
+ut_memcpy(void* dest, const void* src, ulint n);
/** Wrapper for memmove(3). Copy memory area when the source and
target are overlapping.
-* @param dest in: copy to
-* @param sour in: copy from
-* @param n in: number of bytes to copy
-* @return dest */
+@param[in,out] dest Move to
+@param[in] src Move from
+@param[in] n number of bytes to move
+@return dest */
UNIV_INLINE
void*
ut_memmove(void* dest, const void* sour, ulint n);
/** Wrapper for memcmp(3). Compare memory areas.
-* @param str1 in: first memory block to compare
-* @param str2 in: second memory block to compare
-* @param n in: number of bytes to compare
-* @return negative, 0, or positive if str1 is smaller, equal,
+@param[in] str1 first memory block to compare
+@param[in] str2 second memory block to compare
+@param[in] n number of bytes to compare
+@return negative, 0, or positive if str1 is smaller, equal,
or greater than str2, respectively. */
UNIV_INLINE
int
ut_memcmp(const void* str1, const void* str2, ulint n);
-/**********************************************************************//**
-Initializes the mem block list at database startup. */
-UNIV_INTERN
-void
-ut_mem_init(void);
-/*=============*/
-
-/**********************************************************************//**
-Allocates memory.
-@return own: allocated memory */
-UNIV_INTERN
-void*
-ut_malloc_low(
-/*==========*/
- ulint n, /*!< in: number of bytes to allocate */
- ibool assert_on_error) /*!< in: if TRUE, we crash mysqld if
- the memory cannot be allocated */
- MY_ATTRIBUTE((malloc));
-/**********************************************************************//**
-Allocates memory. */
-#define ut_malloc(n) ut_malloc_low(n, TRUE)
-/**********************************************************************//**
-Frees a memory block allocated with ut_malloc. Freeing a NULL pointer is
-a nop. */
-UNIV_INTERN
-void
-ut_free(
-/*====*/
- void* ptr); /*!< in, own: memory block, can be NULL */
-#ifndef UNIV_HOTBACKUP
-/**********************************************************************//**
-Implements realloc. This is needed by /pars/lexyy.cc. Otherwise, you should not
-use this function because the allocation functions in mem0mem.h are the
-recommended ones in InnoDB.
-
-man realloc in Linux, 2004:
-
- realloc() changes the size of the memory block pointed to
- by ptr to size bytes. The contents will be unchanged to
- the minimum of the old and new sizes; newly allocated mem­
- ory will be uninitialized. If ptr is NULL, the call is
- equivalent to malloc(size); if size is equal to zero, the
- call is equivalent to free(ptr). Unless ptr is NULL, it
- must have been returned by an earlier call to malloc(),
- calloc() or realloc().
-
-RETURN VALUE
- realloc() returns a pointer to the newly allocated memory,
- which is suitably aligned for any kind of variable and may
- be different from ptr, or NULL if the request fails. If
- size was equal to 0, either NULL or a pointer suitable to
- be passed to free() is returned. If realloc() fails the
- original block is left untouched - it is not freed or
- moved.
-@return own: pointer to new mem block or NULL */
-UNIV_INTERN
-void*
-ut_realloc(
-/*=======*/
- void* ptr, /*!< in: pointer to old block or NULL */
- ulint size); /*!< in: desired size */
-/**********************************************************************//**
-Frees in shutdown all allocated memory not freed yet. */
-UNIV_INTERN
-void
-ut_free_all_mem(void);
-/*=================*/
-#endif /* !UNIV_HOTBACKUP */
-
/** Wrapper for strcpy(3). Copy a NUL-terminated string.
-* @param dest in: copy to
-* @param sour in: copy from
-* @return dest */
+@param[in,out] dest Destination to copy to
+@param[in] src Source to copy from
+@return dest */
UNIV_INLINE
char*
-ut_strcpy(char* dest, const char* sour);
+ut_strcpy(char* dest, const char* src);
/** Wrapper for strlen(3). Determine the length of a NUL-terminated string.
-* @param str in: string
-* @return length of the string in bytes, excluding the terminating NUL */
+@param[in] str string
+@return length of the string in bytes, excluding the terminating NUL */
UNIV_INLINE
ulint
ut_strlen(const char* str);
/** Wrapper for strcmp(3). Compare NUL-terminated strings.
-* @param str1 in: first string to compare
-* @param str2 in: second string to compare
-* @return negative, 0, or positive if str1 is smaller, equal,
+@param[in] str1 first string to compare
+@param[in] str2 second string to compare
+@return negative, 0, or positive if str1 is smaller, equal,
or greater than str2, respectively. */
UNIV_INLINE
int
ut_strcmp(const char* str1, const char* str2);
-/**********************************************************************//**
-Copies up to size - 1 characters from the NUL-terminated string src to
-dst, NUL-terminating the result. Returns strlen(src), so truncation
-occurred if the return value >= size.
-@return strlen(src) */
-UNIV_INTERN
-ulint
-ut_strlcpy(
-/*=======*/
- char* dst, /*!< in: destination buffer */
- const char* src, /*!< in: source buffer */
- ulint size); /*!< in: size of destination buffer */
-
-/**********************************************************************//**
-Like ut_strlcpy, but if src doesn't fit in dst completely, copies the last
-(size - 1) bytes of src, not the first.
-@return strlen(src) */
-UNIV_INTERN
-ulint
-ut_strlcpy_rev(
-/*===========*/
- char* dst, /*!< in: destination buffer */
- const char* src, /*!< in: source buffer */
- ulint size); /*!< in: size of destination buffer */
-
-/**********************************************************************//**
-Return the number of times s2 occurs in s1. Overlapping instances of s2
-are only counted once.
-@return the number of times s2 occurs in s1 */
-UNIV_INTERN
-ulint
-ut_strcount(
-/*========*/
- const char* s1, /*!< in: string to search in */
- const char* s2); /*!< in: string to search for */
-
-/**********************************************************************//**
-Replace every occurrence of s1 in str with s2. Overlapping instances of s1
-are only replaced once.
-@return own: modified string, must be freed with mem_free() */
-UNIV_INTERN
-char*
-ut_strreplace(
-/*==========*/
- const char* str, /*!< in: string to operate on */
- const char* s1, /*!< in: string to replace */
- const char* s2); /*!< in: string to replace s1 with */
-
/********************************************************************
Concatenate 3 strings.*/
-
char*
ut_str3cat(
/*=======*/
/* out, own: concatenated string, must be
- freed with mem_free() */
+ freed with ut_free() */
const char* s1, /* in: string 1 */
const char* s2, /* in: string 2 */
const char* s3); /* in: string 3 */
@@ -228,7 +100,7 @@ Converts a raw binary data to a NUL-terminated hex string. The output is
truncated if there is not enough space in "hex", make sure "hex_size" is at
least (2 * raw_size + 1) if you do not want this to happen. Returns the
actual number of characters written to "hex" (including the NUL).
-@return number of chars written */
+@return number of chars written */
UNIV_INLINE
ulint
ut_raw_to_hex(
@@ -243,7 +115,7 @@ Adds single quotes to the start and end of string and escapes any quotes
by doubling them. Returns the number of bytes that were written to "buf"
(including the terminating NUL). If buf_size is too small then the
trailing bytes from "str" are discarded.
-@return number of bytes that were written */
+@return number of bytes that were written */
UNIV_INLINE
ulint
ut_str_sql_format(
@@ -254,8 +126,6 @@ ut_str_sql_format(
ulint buf_size); /*!< in: output buffer size
in bytes */
-#ifndef UNIV_NONINL
#include "ut0mem.ic"
-#endif
#endif
diff --git a/storage/innobase/include/ut0mem.ic b/storage/innobase/include/ut0mem.ic
index f102bf959b4..8c8788a38aa 100644
--- a/storage/innobase/include/ut0mem.ic
+++ b/storage/innobase/include/ut0mem.ic
@@ -1,6 +1,7 @@
/*****************************************************************************
-Copyright (c) 1994, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1994, 2014, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2017, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -28,35 +29,35 @@ Created 5/30/1994 Heikki Tuuri
/** Wrapper for memcpy(3). Copy memory area when the source and
target are not overlapping.
-* @param dest in: copy to
-* @param sour in: copy from
-* @param n in: number of bytes to copy
-* @return dest */
+@param[in,out] dest copy to
+@param[in] src copy from
+@param[in] n number of bytes to copy
+@return dest */
UNIV_INLINE
void*
-ut_memcpy(void* dest, const void* sour, ulint n)
+ut_memcpy(void* dest, const void* src, ulint n)
{
- return(memcpy(dest, sour, n));
+ return(memcpy(dest, src, n));
}
/** Wrapper for memmove(3). Copy memory area when the source and
target are overlapping.
-* @param dest in: copy to
-* @param sour in: copy from
-* @param n in: number of bytes to copy
-* @return dest */
+@param[in,out] dest Move to
+@param[in] src Move from
+@param[in] n number of bytes to move
+@return dest */
UNIV_INLINE
void*
-ut_memmove(void* dest, const void* sour, ulint n)
+ut_memmove(void* dest, const void* src, ulint n)
{
- return(memmove(dest, sour, n));
+ return(memmove(dest, src, n));
}
/** Wrapper for memcmp(3). Compare memory areas.
-* @param str1 in: first memory block to compare
-* @param str2 in: second memory block to compare
-* @param n in: number of bytes to compare
-* @return negative, 0, or positive if str1 is smaller, equal,
+@param[in] str1 first memory block to compare
+@param[in] str2 second memory block to compare
+@param[in] n number of bytes to compare
+@return negative, 0, or positive if str1 is smaller, equal,
or greater than str2, respectively. */
UNIV_INLINE
int
@@ -66,19 +67,19 @@ ut_memcmp(const void* str1, const void* str2, ulint n)
}
/** Wrapper for strcpy(3). Copy a NUL-terminated string.
-* @param dest in: copy to
-* @param sour in: copy from
-* @return dest */
+@param[in,out] dest Destination to copy to
+@param[in] src Source to copy from
+@return dest */
UNIV_INLINE
char*
-ut_strcpy(char* dest, const char* sour)
+ut_strcpy(char* dest, const char* src)
{
- return(strcpy(dest, sour));
+ return(strcpy(dest, src));
}
/** Wrapper for strlen(3). Determine the length of a NUL-terminated string.
-* @param str in: string
-* @return length of the string in bytes, excluding the terminating NUL */
+@param[in] str string
+@return length of the string in bytes, excluding the terminating NUL */
UNIV_INLINE
ulint
ut_strlen(const char* str)
@@ -87,9 +88,9 @@ ut_strlen(const char* str)
}
/** Wrapper for strcmp(3). Compare NUL-terminated strings.
-* @param str1 in: first string to compare
-* @param str2 in: second string to compare
-* @return negative, 0, or positive if str1 is smaller, equal,
+@param[in] str1 first string to compare
+@param[in] str2 second string to compare
+@return negative, 0, or positive if str1 is smaller, equal,
or greater than str2, respectively. */
UNIV_INLINE
int
@@ -103,7 +104,7 @@ Converts a raw binary data to a NUL-terminated hex string. The output is
truncated if there is not enough space in "hex", make sure "hex_size" is at
least (2 * raw_size + 1) if you do not want this to happen. Returns the
actual number of characters written to "hex" (including the NUL).
-@return number of chars written */
+@return number of chars written */
UNIV_INLINE
ulint
ut_raw_to_hex(
@@ -118,15 +119,15 @@ ut_raw_to_hex(
#define MK_UINT16(a, b) (((uint16) (a)) << 8 | (uint16) (b))
-#define UINT16_GET_A(u) ((unsigned char) ((u) >> 8))
-#define UINT16_GET_B(u) ((unsigned char) ((u) & 0xFF))
+#define UINT16_GET_A(u) ((char) ((u) >> 8))
+#define UINT16_GET_B(u) ((char) ((u) & 0xFF))
#else /* WORDS_BIGENDIAN */
#define MK_UINT16(a, b) (((uint16) (b)) << 8 | (uint16) (a))
-#define UINT16_GET_A(u) ((unsigned char) ((u) & 0xFF))
-#define UINT16_GET_B(u) ((unsigned char) ((u) >> 8))
+#define UINT16_GET_A(u) ((char) ((u) & 0xFF))
+#define UINT16_GET_B(u) ((char) ((u) >> 8))
#endif /* WORDS_BIGENDIAN */
@@ -223,7 +224,7 @@ Adds single quotes to the start and end of string and escapes any quotes
by doubling them. Returns the number of bytes that were written to "buf"
(including the terminating NUL). If buf_size is too small then the
trailing bytes from "str" are discarded.
-@return number of bytes that were written */
+@return number of bytes that were written */
UNIV_INLINE
ulint
ut_str_sql_format(
diff --git a/storage/innobase/include/ut0mutex.h b/storage/innobase/include/ut0mutex.h
new file mode 100644
index 00000000000..1f99ee17a24
--- /dev/null
+++ b/storage/innobase/include/ut0mutex.h
@@ -0,0 +1,200 @@
+/*****************************************************************************
+
+Copyright (c) 2012, 2015, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2017, MariaDB Corporation.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/******************************************************************//**
+@file include/ut0mutex.h
+Policy based mutexes.
+
+Created 2012-03-24 Sunny Bains.
+***********************************************************************/
+
+#ifndef UNIV_INNOCHECKSUM
+
+#ifndef ut0mutex_h
+#define ut0mutex_h
+
+#include "sync0policy.h"
+#include "ib0mutex.h"
+
+/** Create a typedef using the MutexType<PolicyType>
+@param[in] M Mutex type
+@param[in[ P Policy type
+@param[in] T The resulting typedef alias */
+#define UT_MUTEX_TYPE(M, P, T) typedef PolicyMutex<M<P> > T;
+
+typedef OSMutex EventMutex;
+
+# ifdef HAVE_IB_LINUX_FUTEX
+UT_MUTEX_TYPE(TTASFutexMutex, GenericPolicy, FutexMutex);
+UT_MUTEX_TYPE(TTASFutexMutex, BlockMutexPolicy, BlockFutexMutex);
+# endif /* HAVE_IB_LINUX_FUTEX */
+
+UT_MUTEX_TYPE(TTASMutex, GenericPolicy, SpinMutex);
+UT_MUTEX_TYPE(TTASMutex, BlockMutexPolicy, BlockSpinMutex);
+
+UT_MUTEX_TYPE(OSTrackMutex, GenericPolicy, SysMutex);
+UT_MUTEX_TYPE(OSTrackMutex, BlockMutexPolicy, BlockSysMutex);
+
+UT_MUTEX_TYPE(TTASEventMutex, GenericPolicy, SyncArrayMutex);
+UT_MUTEX_TYPE(TTASEventMutex, BlockMutexPolicy, BlockSyncArrayMutex);
+
+#ifdef MUTEX_FUTEX
+/** The default mutex type. */
+typedef FutexMutex ib_mutex_t;
+typedef BlockFutexMutex ib_bpmutex_t;
+#define MUTEX_TYPE "Uses futexes"
+#elif defined(MUTEX_SYS)
+typedef SysMutex ib_mutex_t;
+typedef BlockSysMutex ib_bpmutex_t;
+#define MUTEX_TYPE "Uses system mutexes"
+#elif defined(MUTEX_EVENT)
+typedef SyncArrayMutex ib_mutex_t;
+typedef BlockSyncArrayMutex ib_bpmutex_t;
+#define MUTEX_TYPE "Uses event mutexes"
+#else
+#error "ib_mutex_t type is unknown"
+#endif /* MUTEX_FUTEX */
+
+extern uint srv_spin_wait_delay;
+extern ulong srv_n_spin_wait_rounds;
+
+#define mutex_create(I, M) mutex_init((M), (I), \
+ __FILE__, __LINE__)
+
+#define mutex_enter_loc(M,file,line) (M)->enter( \
+ uint32_t(srv_n_spin_wait_rounds), \
+ uint32_t(srv_spin_wait_delay), \
+ file, line)
+#define mutex_enter(M) mutex_enter_loc(M, __FILE__, __LINE__)
+
+#define mutex_enter_nospin(M) (M)->enter( \
+ 0, \
+ 0, \
+ __FILE__, uint32_t(__LINE__))
+
+#define mutex_enter_nowait(M) (M)->trylock(__FILE__, \
+ uint32_t(__LINE__))
+
+#define mutex_exit(M) (M)->exit()
+
+#define mutex_free(M) mutex_destroy(M)
+
+#ifdef UNIV_DEBUG
+/**
+Checks that the mutex has been initialized. */
+#define mutex_validate(M) (M)->validate()
+
+/**
+Checks that the current thread owns the mutex. Works only
+in the debug version. */
+#define mutex_own(M) (M)->is_owned()
+#else
+#define mutex_own(M) /* No op */
+#define mutex_validate(M) /* No op */
+#endif /* UNIV_DEBUG */
+
+/** Iterate over the mutex meta data */
+class MutexMonitor {
+public:
+ /** Constructor */
+ MutexMonitor() { }
+
+ /** Destructor */
+ ~MutexMonitor() { }
+
+ /** Enable the mutex monitoring */
+ void enable();
+
+ /** Disable the mutex monitoring */
+ void disable();
+
+ /** Reset the mutex monitoring values */
+ void reset();
+
+ /** Invoke the callback for each active mutex collection
+ @param[in,out] callback Functor to call
+ @return false if callback returned false */
+ template<typename Callback>
+ bool iterate(Callback& callback) const
+ UNIV_NOTHROW
+ {
+ LatchMetaData::iterator end = latch_meta.end();
+
+ for (LatchMetaData::iterator it = latch_meta.begin();
+ it != end;
+ ++it) {
+
+ /* Some of the slots will be null in non-debug mode */
+
+ if (*it == NULL) {
+ continue;
+ }
+
+ latch_meta_t* latch_meta = *it;
+
+ bool ret = callback(*latch_meta);
+
+ if (!ret) {
+ return(ret);
+ }
+ }
+
+ return(true);
+ }
+};
+
+/** Defined in sync0sync.cc */
+extern MutexMonitor mutex_monitor;
+
+/**
+Creates, or rather, initializes a mutex object in a specified memory
+location (which must be appropriately aligned). The mutex is initialized
+in the reset state. Explicit freeing of the mutex with mutex_free is
+necessary only if the memory block containing it is freed.
+Add the mutex instance to the global mutex list.
+@param[in,out] mutex mutex to initialise
+@param[in] id The mutex ID (Latch ID)
+@param[in] filename Filename from where it was called
+@param[in] line Line number in filename from where called */
+template <typename Mutex>
+void mutex_init(
+ Mutex* mutex,
+ latch_id_t id,
+ const char* file_name,
+ uint32_t line)
+{
+ new(mutex) Mutex();
+
+ mutex->init(id, file_name, line);
+}
+
+/**
+Removes a mutex instance from the mutex list. The mutex is checked to
+be in the reset state.
+@param[in,out] mutex mutex instance to destroy */
+template <typename Mutex>
+void mutex_destroy(
+ Mutex* mutex)
+{
+ mutex->destroy();
+}
+
+#endif /* ut0mutex_h */
+
+#endif /* UNIV_INNOCHECKSUM */
diff --git a/storage/innobase/include/ut0new.h b/storage/innobase/include/ut0new.h
new file mode 100644
index 00000000000..3bd9ce3045e
--- /dev/null
+++ b/storage/innobase/include/ut0new.h
@@ -0,0 +1,898 @@
+/*****************************************************************************
+
+Copyright (c) 2014, 2015, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2017, 2019, MariaDB Corporation.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file ut/ut0new.h
+Instrumented memory allocator.
+
+Created May 26, 2014 Vasil Dimov
+*******************************************************/
+
+/** Dynamic memory allocation within InnoDB guidelines.
+All dynamic (heap) memory allocations (malloc(3), strdup(3), etc, "new",
+various std:: containers that allocate memory internally), that are done
+within InnoDB are instrumented. This means that InnoDB uses a custom set
+of functions for allocating memory, rather than calling e.g. "new" directly.
+
+Here follows a cheat sheet on what InnoDB functions to use whenever a
+standard one would have been used.
+
+Creating new objects with "new":
+--------------------------------
+Standard:
+ new expression
+ or
+ new(std::nothrow) expression
+InnoDB, default instrumentation:
+ UT_NEW_NOKEY(expression)
+InnoDB, custom instrumentation, preferred:
+ UT_NEW(expression, key)
+
+Destroying objects, created with "new":
+---------------------------------------
+Standard:
+ delete ptr
+InnoDB:
+ UT_DELETE(ptr)
+
+Creating new arrays with "new[]":
+---------------------------------
+Standard:
+ new type[num]
+ or
+ new(std::nothrow) type[num]
+InnoDB, default instrumentation:
+ UT_NEW_ARRAY_NOKEY(type, num)
+InnoDB, custom instrumentation, preferred:
+ UT_NEW_ARRAY(type, num, key)
+
+Destroying arrays, created with "new[]":
+----------------------------------------
+Standard:
+ delete[] ptr
+InnoDB:
+ UT_DELETE_ARRAY(ptr)
+
+Declaring a type with a std:: container, e.g. std::vector:
+----------------------------------------------------------
+Standard:
+ std::vector<t>
+InnoDB:
+ std::vector<t, ut_allocator<t> >
+
+Declaring objects of some std:: type:
+-------------------------------------
+Standard:
+ std::vector<t> v
+InnoDB, default instrumentation:
+ std::vector<t, ut_allocator<t> > v
+InnoDB, custom instrumentation, preferred:
+ std::vector<t, ut_allocator<t> > v(ut_allocator<t>(key))
+
+Raw block allocation (as usual in C++, consider whether using "new" would
+not be more appropriate):
+-------------------------------------------------------------------------
+Standard:
+ malloc(num)
+InnoDB, default instrumentation:
+ ut_malloc_nokey(num)
+InnoDB, custom instrumentation, preferred:
+ ut_malloc(num, key)
+
+Raw block resize:
+-----------------
+Standard:
+ realloc(ptr, new_size)
+InnoDB:
+ ut_realloc(ptr, new_size)
+
+Raw block deallocation:
+-----------------------
+Standard:
+ free(ptr)
+InnoDB:
+ ut_free(ptr)
+
+Note: the expression passed to UT_NEW() or UT_NEW_NOKEY() must always end
+with (), thus:
+Standard:
+ new int
+InnoDB:
+ UT_NEW_NOKEY(int())
+*/
+
+#ifndef ut0new_h
+#define ut0new_h
+
+#include <algorithm> /* std::min() */
+#include <limits> /* std::numeric_limits */
+#include <map> /* std::map */
+
+#include <stddef.h>
+#include <stdlib.h> /* malloc() */
+#include <string.h> /* strlen(), strrchr(), strncmp() */
+
+#include "my_global.h" /* needed for headers from mysql/psi/ */
+/* JAN: TODO: missing 5.7 header */
+#ifdef HAVE_MYSQL_MEMORY_H
+#include "mysql/psi/mysql_memory.h" /* PSI_MEMORY_CALL() */
+#endif
+
+#include "mysql/psi/psi_memory.h" /* PSI_memory_key, PSI_memory_info */
+
+#include "os0proc.h" /* os_mem_alloc_large() */
+#include "os0thread.h" /* os_thread_sleep() */
+#include "ut0ut.h" /* ut_strcmp_functor, ut_basename_noext() */
+
+#define OUT_OF_MEMORY_MSG \
+ "Check if you should increase the swap file or ulimits of your" \
+ " operating system. Note that on most 32-bit computers the process" \
+ " memory space is limited to 2 GB or 4 GB."
+
+/** Maximum number of retries to allocate memory. */
+extern const size_t alloc_max_retries;
+
+/** Keys for registering allocations with performance schema.
+Pointers to these variables are supplied to PFS code via the pfs_info[]
+array and the PFS code initializes them via PSI_MEMORY_CALL(register_memory)().
+mem_key_other and mem_key_std are special in the following way (see also
+ut_allocator::get_mem_key()):
+* If the caller has not provided a key and the file name of the caller is
+ unknown, then mem_key_std will be used. This happens only when called from
+ within std::* containers.
+* If the caller has not provided a key and the file name of the caller is
+ known, but is not amongst the predefined names (see ut_new_boot()) then
+ mem_key_other will be used. Generally this should not happen and if it
+ happens then that means that the list of predefined names must be extended.
+Keep this list alphabetically sorted. */
+extern PSI_memory_key mem_key_ahi;
+extern PSI_memory_key mem_key_buf_buf_pool;
+extern PSI_memory_key mem_key_dict_stats_bg_recalc_pool_t;
+extern PSI_memory_key mem_key_dict_stats_index_map_t;
+extern PSI_memory_key mem_key_dict_stats_n_diff_on_level;
+extern PSI_memory_key mem_key_other;
+extern PSI_memory_key mem_key_row_log_buf;
+extern PSI_memory_key mem_key_row_merge_sort;
+extern PSI_memory_key mem_key_std;
+extern PSI_memory_key mem_key_trx_sys_t_rw_trx_ids;
+
+/** Setup the internal objects needed for UT_NEW() to operate.
+This must be called before the first call to UT_NEW(). */
+void
+ut_new_boot();
+
+#ifdef UNIV_PFS_MEMORY
+
+/** Retrieve a memory key (registered with PFS), given a portion of the file
+name of the caller.
+@param[in] file portion of the filename - basename without an extension
+@return registered memory key or PSI_NOT_INSTRUMENTED if not found */
+PSI_memory_key
+ut_new_get_key_by_file(
+ const char* file);
+
+#endif /* UNIV_PFS_MEMORY */
+
+/** A structure that holds the necessary data for performance schema
+accounting. An object of this type is put in front of each allocated block
+of memory when allocation is done by ut_allocator::allocate(). This is
+because the data is needed even when freeing the memory. Users of
+ut_allocator::allocate_large() are responsible for maintaining this
+themselves. */
+struct ut_new_pfx_t {
+
+#ifdef UNIV_PFS_MEMORY
+
+ /** Performance schema key. Assigned to a name at startup via
+ PSI_MEMORY_CALL(register_memory)() and later used for accounting
+ allocations and deallocations with
+ PSI_MEMORY_CALL(memory_alloc)(key, size, owner) and
+ PSI_MEMORY_CALL(memory_free)(key, size, owner). */
+ PSI_memory_key m_key;
+
+ /**
+ Thread owner.
+ Instrumented thread that owns the allocated memory.
+ This state is used by the performance schema to maintain
+ per thread statistics,
+ when memory is given from thread A to thread B.
+ */
+ struct PSI_thread *m_owner;
+
+#endif /* UNIV_PFS_MEMORY */
+
+ /** Size of the allocated block in bytes, including this prepended
+ aux structure (for ut_allocator::allocate()). For example if InnoDB
+ code requests to allocate 100 bytes, and sizeof(ut_new_pfx_t) is 16,
+ then 116 bytes are allocated in total and m_size will be 116.
+ ut_allocator::allocate_large() does not prepend this struct to the
+ allocated block and its users are responsible for maintaining it
+ and passing it later to ut_allocator::deallocate_large(). */
+ size_t m_size;
+#if SIZEOF_VOIDP == 4
+ /** Pad the header size to a multiple of 64 bits on 32-bit systems,
+ so that the payload will be aligned to 64 bits. */
+ size_t pad;
+#endif
+};
+
+/** Allocator class for allocating memory from inside std::* containers.
+@tparam T type of allocated object
+@tparam oom_fatal whether to commit suicide when running out of memory */
+template <class T, bool oom_fatal = true>
+class ut_allocator {
+public:
+ typedef T* pointer;
+ typedef const T* const_pointer;
+ typedef T& reference;
+ typedef const T& const_reference;
+ typedef T value_type;
+ typedef size_t size_type;
+ typedef ptrdiff_t difference_type;
+
+ /** Default constructor. */
+ explicit
+ ut_allocator(PSI_memory_key key = PSI_NOT_INSTRUMENTED)
+#ifdef UNIV_PFS_MEMORY
+ : m_key(key)
+#endif /* UNIV_PFS_MEMORY */
+ {
+ }
+
+ /** Constructor from allocator of another type. */
+ template <class U>
+ ut_allocator(
+ const ut_allocator<U>& other)
+#ifdef UNIV_PFS_MEMORY
+ : m_key(other.m_key)
+#endif /* UNIV_PFS_MEMORY */
+ {
+ }
+
+ /** Return the maximum number of objects that can be allocated by
+ this allocator. */
+ size_type
+ max_size() const
+ {
+ const size_type s_max = std::numeric_limits<size_type>::max();
+
+#ifdef UNIV_PFS_MEMORY
+ return((s_max - sizeof(ut_new_pfx_t)) / sizeof(T));
+#else
+ return(s_max / sizeof(T));
+#endif /* UNIV_PFS_MEMORY */
+ }
+
+ /** Allocate a chunk of memory that can hold 'n_elements' objects of
+ type 'T' and trace the allocation.
+ If the allocation fails this method may throw an exception. This
+ is mandated by the standard and if it returns NULL instead, then
+ STL containers that use it (e.g. std::vector) may get confused.
+ After successfull allocation the returned pointer must be passed
+ to ut_allocator::deallocate() when no longer needed.
+ @param[in] n_elements number of elements
+ @param[in] hint pointer to a nearby memory location,
+ unused by this implementation
+ @param[in] file file name of the caller
+ @param[in] set_to_zero if true, then the returned memory is
+ initialized with 0x0 bytes.
+ @return pointer to the allocated memory */
+ pointer
+ allocate(
+ size_type n_elements,
+ const_pointer hint = NULL,
+ const char* file = NULL,
+ bool set_to_zero = false,
+ bool throw_on_error = true)
+ {
+ if (n_elements == 0) {
+ return(NULL);
+ }
+
+ if (n_elements > max_size()) {
+ if (throw_on_error) {
+ throw(std::bad_alloc());
+ } else {
+ return(NULL);
+ }
+ }
+
+ void* ptr;
+ size_t total_bytes = n_elements * sizeof(T);
+
+#ifdef UNIV_PFS_MEMORY
+ /* The header size must not ruin the 64-bit alignment
+ on 32-bit systems. Some allocated structures use
+ 64-bit fields. */
+ ut_ad((sizeof(ut_new_pfx_t) & 7) == 0);
+ total_bytes += sizeof(ut_new_pfx_t);
+#endif /* UNIV_PFS_MEMORY */
+
+ for (size_t retries = 1; ; retries++) {
+
+ if (set_to_zero) {
+ ptr = calloc(1, total_bytes);
+ } else {
+ ptr = malloc(total_bytes);
+ }
+
+ if (ptr != NULL || retries >= alloc_max_retries) {
+ break;
+ }
+
+ os_thread_sleep(1000000 /* 1 second */);
+ }
+
+ if (ptr == NULL) {
+ ib::fatal_or_error(oom_fatal)
+ << "Cannot allocate " << total_bytes
+ << " bytes of memory after "
+ << alloc_max_retries << " retries over "
+ << alloc_max_retries << " seconds. OS error: "
+ << strerror(errno) << " (" << errno << "). "
+ << OUT_OF_MEMORY_MSG;
+ if (throw_on_error) {
+ throw(std::bad_alloc());
+ } else {
+ return(NULL);
+ }
+ }
+
+#ifdef UNIV_PFS_MEMORY
+ ut_new_pfx_t* pfx = static_cast<ut_new_pfx_t*>(ptr);
+
+ allocate_trace(total_bytes, file, pfx);
+
+ return(reinterpret_cast<pointer>(pfx + 1));
+#else
+ return(reinterpret_cast<pointer>(ptr));
+#endif /* UNIV_PFS_MEMORY */
+ }
+
+ /** Free a memory allocated by allocate() and trace the deallocation.
+ @param[in,out] ptr pointer to memory to free */
+ void deallocate(pointer ptr, size_type)
+ {
+#ifdef UNIV_PFS_MEMORY
+ if (ptr == NULL) {
+ return;
+ }
+
+ ut_new_pfx_t* pfx = reinterpret_cast<ut_new_pfx_t*>(ptr) - 1;
+
+ deallocate_trace(pfx);
+
+ free(pfx);
+#else
+ free(ptr);
+#endif /* UNIV_PFS_MEMORY */
+ }
+
+ /** Create an object of type 'T' using the value 'val' over the
+ memory pointed by 'p'. */
+ void
+ construct(
+ pointer p,
+ const T& val)
+ {
+ new(p) T(val);
+ }
+
+ /** Destroy an object pointed by 'p'. */
+ void
+ destroy(
+ pointer p)
+ {
+ p->~T();
+ }
+
+ /** Return the address of an object. */
+ pointer
+ address(
+ reference x) const
+ {
+ return(&x);
+ }
+
+ /** Return the address of a const object. */
+ const_pointer
+ address(
+ const_reference x) const
+ {
+ return(&x);
+ }
+
+ template <class U>
+ struct rebind {
+ typedef ut_allocator<U> other;
+ };
+
+ /* The following are custom methods, not required by the standard. */
+
+#ifdef UNIV_PFS_MEMORY
+
+ /** realloc(3)-like method.
+ The passed in ptr must have been returned by allocate() and the
+ pointer returned by this method must be passed to deallocate() when
+ no longer needed.
+ @param[in,out] ptr old pointer to reallocate
+ @param[in] n_elements new number of elements to allocate
+ @param[in] file file name of the caller
+ @return newly allocated memory */
+ pointer
+ reallocate(
+ void* ptr,
+ size_type n_elements,
+ const char* file)
+ {
+ if (n_elements == 0) {
+ deallocate(static_cast<pointer>(ptr));
+ return(NULL);
+ }
+
+ if (ptr == NULL) {
+ return(allocate(n_elements, NULL, file, false, false));
+ }
+
+ if (n_elements > max_size()) {
+ return(NULL);
+ }
+
+ ut_new_pfx_t* pfx_old;
+ ut_new_pfx_t* pfx_new;
+ size_t total_bytes;
+
+ pfx_old = reinterpret_cast<ut_new_pfx_t*>(ptr) - 1;
+
+ total_bytes = n_elements * sizeof(T) + sizeof(ut_new_pfx_t);
+
+ for (size_t retries = 1; ; retries++) {
+
+ pfx_new = static_cast<ut_new_pfx_t*>(
+ realloc(pfx_old, total_bytes));
+
+ if (pfx_new != NULL || retries >= alloc_max_retries) {
+ break;
+ }
+
+ os_thread_sleep(1000000 /* 1 second */);
+ }
+
+ if (pfx_new == NULL) {
+ ib::fatal_or_error(oom_fatal)
+ << "Cannot reallocate " << total_bytes
+ << " bytes of memory after "
+ << alloc_max_retries << " retries over "
+ << alloc_max_retries << " seconds. OS error: "
+ << strerror(errno) << " (" << errno << "). "
+ << OUT_OF_MEMORY_MSG;
+ return(NULL);
+ }
+
+ /* pfx_new still contains the description of the old block
+ that was presumably freed by realloc(). */
+ deallocate_trace(pfx_new);
+
+ /* pfx_new is set here to describe the new block. */
+ allocate_trace(total_bytes, file, pfx_new);
+
+ return(reinterpret_cast<pointer>(pfx_new + 1));
+ }
+
+ /** Allocate, trace the allocation and construct 'n_elements' objects
+ of type 'T'. If the allocation fails or if some of the constructors
+ throws an exception, then this method will return NULL. It does not
+ throw exceptions. After successfull completion the returned pointer
+ must be passed to delete_array() when no longer needed.
+ @param[in] n_elements number of elements to allocate
+ @param[in] file file name of the caller
+ @return pointer to the first allocated object or NULL */
+ pointer
+ new_array(
+ size_type n_elements,
+ const char* file)
+ {
+ T* p = allocate(n_elements, NULL, file, false, false);
+
+ if (p == NULL) {
+ return(NULL);
+ }
+
+ T* first = p;
+ size_type i;
+
+ try {
+ for (i = 0; i < n_elements; i++) {
+ new(p) T;
+ ++p;
+ }
+ } catch (...) {
+ for (size_type j = 0; j < i; j++) {
+ --p;
+ p->~T();
+ }
+
+ deallocate(first);
+
+ throw;
+ }
+
+ return(first);
+ }
+
+ /** Destroy, deallocate and trace the deallocation of an array created
+ by new_array().
+ @param[in,out] ptr pointer to the first object in the array */
+ void
+ delete_array(
+ T* ptr)
+ {
+ if (ptr == NULL) {
+ return;
+ }
+
+ const size_type n_elements = n_elements_allocated(ptr);
+
+ T* p = ptr + n_elements - 1;
+
+ for (size_type i = 0; i < n_elements; i++) {
+ p->~T();
+ --p;
+ }
+
+ deallocate(ptr);
+ }
+
+#endif /* UNIV_PFS_MEMORY */
+
+ /** Allocate a large chunk of memory that can hold 'n_elements'
+ objects of type 'T' and trace the allocation.
+ @param[in] n_elements number of elements
+ @param[out] pfx storage for the description of the
+ allocated memory. The caller must provide space for this one and keep
+ it until the memory is no longer needed and then pass it to
+ deallocate_large().
+ @return pointer to the allocated memory or NULL */
+ pointer
+ allocate_large(
+ size_type n_elements,
+ ut_new_pfx_t* pfx)
+ {
+ if (n_elements == 0 || n_elements > max_size()) {
+ return(NULL);
+ }
+
+ ulint n_bytes = n_elements * sizeof(T);
+
+ pointer ptr = reinterpret_cast<pointer>(
+ os_mem_alloc_large(&n_bytes));
+
+#ifdef UNIV_PFS_MEMORY
+ if (ptr != NULL) {
+ allocate_trace(n_bytes, NULL, pfx);
+ }
+#else
+ pfx->m_size = n_bytes;
+#endif /* UNIV_PFS_MEMORY */
+
+ return(ptr);
+ }
+
+ /** Free a memory allocated by allocate_large() and trace the
+ deallocation.
+ @param[in,out] ptr pointer to memory to free
+ @param[in] pfx descriptor of the memory, as returned by
+ allocate_large(). */
+ void
+ deallocate_large(
+ pointer ptr,
+ const ut_new_pfx_t* pfx)
+ {
+#ifdef UNIV_PFS_MEMORY
+ deallocate_trace(pfx);
+#endif /* UNIV_PFS_MEMORY */
+
+ os_mem_free_large(ptr, pfx->m_size);
+ }
+
+#ifdef UNIV_PFS_MEMORY
+
+ /** Get the performance schema key to use for tracing allocations.
+ @param[in] file file name of the caller or NULL if unknown
+ @return performance schema key */
+ PSI_memory_key
+ get_mem_key(
+ const char* file) const
+ {
+ if (m_key != PSI_NOT_INSTRUMENTED) {
+ return(m_key);
+ }
+
+ if (file == NULL) {
+ return(mem_key_std);
+ }
+
+ /* e.g. "btr0cur", derived from "/path/to/btr0cur.cc" */
+ char keyname[FILENAME_MAX];
+ const size_t len = ut_basename_noext(file, keyname,
+ sizeof(keyname));
+ /* If sizeof(keyname) was not enough then the output would
+ be truncated, assert that this did not happen. */
+ ut_a(len < sizeof(keyname));
+
+ const PSI_memory_key key = ut_new_get_key_by_file(keyname);
+
+ if (key != PSI_NOT_INSTRUMENTED) {
+ return(key);
+ }
+
+ return(mem_key_other);
+ }
+
+private:
+
+ /** Retrieve the size of a memory block allocated by new_array().
+ @param[in] ptr pointer returned by new_array().
+ @return size of memory block */
+ size_type
+ n_elements_allocated(
+ const_pointer ptr)
+ {
+ const ut_new_pfx_t* pfx
+ = reinterpret_cast<const ut_new_pfx_t*>(ptr) - 1;
+
+ const size_type user_bytes
+ = pfx->m_size - sizeof(ut_new_pfx_t);
+
+ ut_ad(user_bytes % sizeof(T) == 0);
+
+ return(user_bytes / sizeof(T));
+ }
+
+ /** Trace a memory allocation.
+ After the accounting, the data needed for tracing the deallocation
+ later is written into 'pfx'.
+ The PFS event name is picked on the following criteria:
+ 1. If key (!= PSI_NOT_INSTRUMENTED) has been specified when constructing
+ this ut_allocator object, then the name associated with that key will
+ be used (this is the recommended approach for new code)
+ 2. Otherwise, if "file" is NULL, then the name associated with
+ mem_key_std will be used
+ 3. Otherwise, if an entry is found by ut_new_get_key_by_file(), that
+ corresponds to "file", that will be used (see ut_new_boot())
+ 4. Otherwise, the name associated with mem_key_other will be used.
+ @param[in] size number of bytes that were allocated
+ @param[in] file file name of the caller or NULL if unknown
+ @param[out] pfx placeholder to store the info which will be
+ needed when freeing the memory */
+ void
+ allocate_trace(
+ size_t size,
+ const char* file,
+ ut_new_pfx_t* pfx)
+ {
+ const PSI_memory_key key = get_mem_key(file);
+
+ pfx->m_key = PSI_MEMORY_CALL(memory_alloc)(key, size, & pfx->m_owner);
+ pfx->m_size = size;
+ }
+
+ /** Trace a memory deallocation.
+ @param[in] pfx info for the deallocation */
+ void
+ deallocate_trace(
+ const ut_new_pfx_t* pfx)
+ {
+ PSI_MEMORY_CALL(memory_free)(pfx->m_key, pfx->m_size, pfx->m_owner);
+ }
+
+ /** Performance schema key. */
+ PSI_memory_key m_key;
+
+#endif /* UNIV_PFS_MEMORY */
+
+private:
+
+ /** Assignment operator, not used, thus disabled (private). */
+ template <class U>
+ void
+ operator=(
+ const ut_allocator<U>&);
+};
+
+/** Compare two allocators of the same type.
+As long as the type of A1 and A2 is the same, a memory allocated by A1
+could be freed by A2 even if the pfs mem key is different. */
+template <typename T>
+inline
+bool
+operator==(
+ const ut_allocator<T>& lhs,
+ const ut_allocator<T>& rhs)
+{
+ return(true);
+}
+
+/** Compare two allocators of the same type. */
+template <typename T>
+inline
+bool
+operator!=(
+ const ut_allocator<T>& lhs,
+ const ut_allocator<T>& rhs)
+{
+ return(!(lhs == rhs));
+}
+
+#ifdef UNIV_PFS_MEMORY
+
+/** Allocate, trace the allocation and construct an object.
+Use this macro instead of 'new' within InnoDB.
+For example: instead of
+ Foo* f = new Foo(args);
+use:
+ Foo* f = UT_NEW(Foo(args), mem_key_some);
+Upon failure to allocate the memory, this macro may return NULL. It
+will not throw exceptions. After successfull allocation the returned
+pointer must be passed to UT_DELETE() when no longer needed.
+@param[in] expr any expression that could follow "new"
+@param[in] key performance schema memory tracing key
+@return pointer to the created object or NULL */
+#define UT_NEW(expr, key) \
+ /* Placement new will return NULL and not attempt to construct an
+ object if the passed in pointer is NULL, e.g. if allocate() has
+ failed to allocate memory and has returned NULL. */ \
+ ::new(ut_allocator<byte>(key).allocate( \
+ sizeof expr, NULL, __FILE__, false, false)) expr
+
+/** Allocate, trace the allocation and construct an object.
+Use this macro instead of 'new' within InnoDB and instead of UT_NEW()
+when creating a dedicated memory key is not feasible.
+For example: instead of
+ Foo* f = new Foo(args);
+use:
+ Foo* f = UT_NEW_NOKEY(Foo(args));
+Upon failure to allocate the memory, this macro may return NULL. It
+will not throw exceptions. After successfull allocation the returned
+pointer must be passed to UT_DELETE() when no longer needed.
+@param[in] expr any expression that could follow "new"
+@return pointer to the created object or NULL */
+#define UT_NEW_NOKEY(expr) UT_NEW(expr, PSI_NOT_INSTRUMENTED)
+
+/** Destroy, deallocate and trace the deallocation of an object created by
+UT_NEW() or UT_NEW_NOKEY().
+We can't instantiate ut_allocator without having the type of the object, thus
+we redirect this to a templated function. */
+#define UT_DELETE(ptr) ut_delete(ptr)
+
+/** Destroy and account object created by UT_NEW() or UT_NEW_NOKEY().
+@param[in,out] ptr pointer to the object */
+template <typename T>
+inline
+void
+ut_delete(
+ T* ptr)
+{
+ if (ptr == NULL) {
+ return;
+ }
+
+ ut_allocator<T> allocator;
+
+ allocator.destroy(ptr);
+ allocator.deallocate(ptr);
+}
+
+/** Allocate and account 'n_elements' objects of type 'type'.
+Use this macro to allocate memory within InnoDB instead of 'new[]'.
+The returned pointer must be passed to UT_DELETE_ARRAY().
+@param[in] type type of objects being created
+@param[in] n_elements number of objects to create
+@param[in] key performance schema memory tracing key
+@return pointer to the first allocated object or NULL */
+#define UT_NEW_ARRAY(type, n_elements, key) \
+ ut_allocator<type>(key).new_array(n_elements, __FILE__)
+
+/** Allocate and account 'n_elements' objects of type 'type'.
+Use this macro to allocate memory within InnoDB instead of 'new[]' and
+instead of UT_NEW_ARRAY() when it is not feasible to create a dedicated key.
+@param[in] type type of objects being created
+@param[in] n_elements number of objects to create
+@return pointer to the first allocated object or NULL */
+#define UT_NEW_ARRAY_NOKEY(type, n_elements) \
+ UT_NEW_ARRAY(type, n_elements, PSI_NOT_INSTRUMENTED)
+
+/** Destroy, deallocate and trace the deallocation of an array created by
+UT_NEW_ARRAY() or UT_NEW_ARRAY_NOKEY().
+We can't instantiate ut_allocator without having the type of the object, thus
+we redirect this to a templated function. */
+#define UT_DELETE_ARRAY(ptr) ut_delete_array(ptr)
+
+/** Destroy and account objects created by UT_NEW_ARRAY() or
+UT_NEW_ARRAY_NOKEY().
+@param[in,out] ptr pointer to the first object in the array */
+template <typename T>
+inline
+void
+ut_delete_array(
+ T* ptr)
+{
+ ut_allocator<T>().delete_array(ptr);
+}
+
+#define ut_malloc(n_bytes, key) static_cast<void*>( \
+ ut_allocator<byte>(key).allocate( \
+ n_bytes, NULL, __FILE__, false, false))
+
+#define ut_zalloc(n_bytes, key) static_cast<void*>( \
+ ut_allocator<byte>(key).allocate( \
+ n_bytes, NULL, __FILE__, true, false))
+
+#define ut_malloc_nokey(n_bytes) static_cast<void*>( \
+ ut_allocator<byte>(PSI_NOT_INSTRUMENTED).allocate( \
+ n_bytes, NULL, __FILE__, false, false))
+
+#define ut_zalloc_nokey(n_bytes) static_cast<void*>( \
+ ut_allocator<byte>(PSI_NOT_INSTRUMENTED).allocate( \
+ n_bytes, NULL, __FILE__, true, false))
+
+#define ut_zalloc_nokey_nofatal(n_bytes) static_cast<void*>( \
+ ut_allocator<byte, false>(PSI_NOT_INSTRUMENTED).allocate( \
+ n_bytes, NULL, __FILE__, true, false))
+
+#define ut_realloc(ptr, n_bytes) static_cast<void*>( \
+ ut_allocator<byte>(PSI_NOT_INSTRUMENTED).reallocate( \
+ ptr, n_bytes, __FILE__))
+
+#define ut_free(ptr) ut_allocator<byte>(PSI_NOT_INSTRUMENTED).deallocate( \
+ reinterpret_cast<byte*>(ptr))
+
+#else /* UNIV_PFS_MEMORY */
+
+/* Fallbacks when memory tracing is disabled at compile time. */
+
+#define UT_NEW(expr, key) ::new(std::nothrow) expr
+#define UT_NEW_NOKEY(expr) ::new(std::nothrow) expr
+#define UT_DELETE(ptr) ::delete ptr
+
+#define UT_NEW_ARRAY(type, n_elements, key) \
+ ::new(std::nothrow) type[n_elements]
+
+#define UT_NEW_ARRAY_NOKEY(type, n_elements) \
+ ::new(std::nothrow) type[n_elements]
+
+#define UT_DELETE_ARRAY(ptr) ::delete[] ptr
+
+#define ut_malloc(n_bytes, key) ::malloc(n_bytes)
+
+#define ut_zalloc(n_bytes, key) ::calloc(1, n_bytes)
+
+#define ut_malloc_nokey(n_bytes) ::malloc(n_bytes)
+
+#define ut_zalloc_nokey(n_bytes) ::calloc(1, n_bytes)
+
+#define ut_zalloc_nokey_nofatal(n_bytes) ::calloc(1, n_bytes)
+
+#define ut_realloc(ptr, n_bytes) ::realloc(ptr, n_bytes)
+
+#define ut_free(ptr) ::free(ptr)
+
+#endif /* UNIV_PFS_MEMORY */
+
+#endif /* ut0new_h */
diff --git a/storage/innobase/include/ut0pool.h b/storage/innobase/include/ut0pool.h
new file mode 100644
index 00000000000..f7bc4c5ebdf
--- /dev/null
+++ b/storage/innobase/include/ut0pool.h
@@ -0,0 +1,384 @@
+/*****************************************************************************
+
+Copyright (c) 2013, 2014, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/******************************************************************//**
+@file include/ut0pool.h
+Object pool.
+
+Created 2012-Feb-26 Sunny Bains
+***********************************************************************/
+
+#ifndef ut0pool_h
+#define ut0pool_h
+
+#include <vector>
+#include <queue>
+#include <functional>
+
+#include "ut0new.h"
+
+/** Allocate the memory for the object in blocks. We keep the objects sorted
+on pointer so that they are closer together in case they have to be iterated
+over in a list. */
+template <typename Type, typename Factory, typename LockStrategy>
+struct Pool {
+
+ typedef Type value_type;
+
+ // FIXME: Add an assertion to check alignment and offset is
+ // as we expect it. Also, sizeof(void*) can be 8, can we impove on this.
+ struct Element {
+ Pool* m_pool;
+ value_type m_type;
+ };
+
+ /** Constructor
+ @param size size of the memory block */
+ Pool(size_t size)
+ :
+ m_end(),
+ m_start(),
+ m_size(size),
+ m_last()
+ {
+ ut_a(size >= sizeof(Element));
+
+ m_lock_strategy.create();
+
+ ut_a(m_start == 0);
+
+ m_start = reinterpret_cast<Element*>(ut_zalloc_nokey(m_size));
+
+ m_last = m_start;
+
+ m_end = &m_start[m_size / sizeof(*m_start)];
+
+ /* Note: Initialise only a small subset, even though we have
+ allocated all the memory. This is required only because PFS
+ (MTR) results change if we instantiate too many mutexes up
+ front. */
+
+ init(ut_min(size_t(16), size_t(m_end - m_start)));
+
+ ut_ad(m_pqueue.size() <= size_t(m_last - m_start));
+ }
+
+ /** Destructor */
+ ~Pool()
+ {
+ m_lock_strategy.destroy();
+
+ for (Element* elem = m_start; elem != m_last; ++elem) {
+
+ ut_ad(elem->m_pool == this);
+ /* Unpoison the memory for AddressSanitizer */
+ MEM_UNDEFINED(&elem->m_type, sizeof elem->m_type);
+ /* Declare the contents as initialized for Valgrind;
+ we checked this in mem_free(). */
+ UNIV_MEM_VALID(&elem->m_type, sizeof elem->m_type);
+ Factory::destroy(&elem->m_type);
+ }
+
+ ut_free(m_start);
+ m_end = m_last = m_start = 0;
+ m_size = 0;
+ }
+
+ /** Get an object from the pool.
+ @retrun a free instance or NULL if exhausted. */
+ Type* get()
+ {
+ Element* elem;
+
+ m_lock_strategy.enter();
+
+ if (!m_pqueue.empty()) {
+
+ elem = m_pqueue.top();
+ m_pqueue.pop();
+
+ } else if (m_last < m_end) {
+
+ /* Initialise the remaining elements. */
+ init(m_end - m_last);
+
+ ut_ad(!m_pqueue.empty());
+
+ elem = m_pqueue.top();
+ m_pqueue.pop();
+ } else {
+ elem = NULL;
+ }
+
+#if defined HAVE_valgrind || defined __SANITIZE_ADDRESS__
+ if (elem) {
+ /* Unpoison the memory for AddressSanitizer */
+ MEM_UNDEFINED(&elem->m_type, sizeof elem->m_type);
+ /* Declare the memory initialized for Valgrind.
+ The trx_t that are released to the pool are
+ actually initialized; we checked that by
+ UNIV_MEM_ASSERT_RW() in mem_free() below. */
+ UNIV_MEM_VALID(&elem->m_type, sizeof elem->m_type);
+ }
+#endif
+
+ m_lock_strategy.exit();
+ return elem ? &elem->m_type : NULL;
+ }
+
+ /** Add the object to the pool.
+ @param ptr object to free */
+ static void mem_free(value_type* ptr)
+ {
+ Element* elem;
+ byte* p = reinterpret_cast<byte*>(ptr + 1);
+
+ elem = reinterpret_cast<Element*>(p - sizeof(*elem));
+ UNIV_MEM_ASSERT_RW(&elem->m_type, sizeof elem->m_type);
+
+ elem->m_pool->m_lock_strategy.enter();
+
+ elem->m_pool->putl(elem);
+ MEM_NOACCESS(&elem->m_type, sizeof elem->m_type);
+
+ elem->m_pool->m_lock_strategy.exit();
+ }
+
+protected:
+ // Disable copying
+ Pool(const Pool&);
+ Pool& operator=(const Pool&);
+
+private:
+
+ /* We only need to compare on pointer address. */
+ typedef std::priority_queue<
+ Element*,
+ std::vector<Element*, ut_allocator<Element*> >,
+ std::greater<Element*> > pqueue_t;
+
+ /** Release the object to the free pool
+ @param elem element to free */
+ void putl(Element* elem)
+ {
+ ut_ad(elem >= m_start && elem < m_last);
+
+ ut_ad(Factory::debug(&elem->m_type));
+
+ m_pqueue.push(elem);
+ }
+
+ /** Initialise the elements.
+ @param n_elems Number of elements to initialise */
+ void init(size_t n_elems)
+ {
+ ut_ad(size_t(m_end - m_last) >= n_elems);
+
+ for (size_t i = 0; i < n_elems; ++i, ++m_last) {
+
+ m_last->m_pool = this;
+ Factory::init(&m_last->m_type);
+ m_pqueue.push(m_last);
+ }
+
+ ut_ad(m_last <= m_end);
+ }
+
+private:
+ /** Pointer to the last element */
+ Element* m_end;
+
+ /** Pointer to the first element */
+ Element* m_start;
+
+ /** Size of the block in bytes */
+ size_t m_size;
+
+ /** Upper limit of used space */
+ Element* m_last;
+
+ /** Priority queue ordered on the pointer addresse. */
+ pqueue_t m_pqueue;
+
+ /** Lock strategy to use */
+ LockStrategy m_lock_strategy;
+};
+
+template <typename Pool, typename LockStrategy>
+struct PoolManager {
+
+ typedef Pool PoolType;
+ typedef typename PoolType::value_type value_type;
+
+ PoolManager(size_t size)
+ :
+ m_size(size)
+ {
+ create();
+ }
+
+ ~PoolManager()
+ {
+ destroy();
+
+ ut_a(m_pools.empty());
+ }
+
+ /** Get an element from one of the pools.
+ @return instance or NULL if pool is empty. */
+ value_type* get()
+ {
+ size_t index = 0;
+ size_t delay = 1;
+ value_type* ptr = NULL;
+
+ do {
+ m_lock_strategy.enter();
+
+ ut_ad(!m_pools.empty());
+
+ size_t n_pools = m_pools.size();
+
+ PoolType* pool = m_pools[index % n_pools];
+
+ m_lock_strategy.exit();
+
+ ptr = pool->get();
+
+ if (ptr == 0 && (index / n_pools) > 2) {
+
+ if (!add_pool(n_pools)) {
+
+ ib::error() << "Failed to allocate"
+ " memory for a pool of size "
+ << m_size << " bytes. Will"
+ " wait for " << delay
+ << " seconds for a thread to"
+ " free a resource";
+
+ /* There is nothing much we can do
+ except crash and burn, however lets
+ be a little optimistic and wait for
+ a resource to be freed. */
+ os_thread_sleep(delay * 1000000);
+
+ if (delay < 32) {
+ delay <<= 1;
+ }
+
+ } else {
+ delay = 1;
+ }
+ }
+
+ ++index;
+
+ } while (ptr == NULL);
+
+ return(ptr);
+ }
+
+ static void mem_free(value_type* ptr)
+ {
+ PoolType::mem_free(ptr);
+ }
+
+private:
+ /** Add a new pool
+ @param n_pools Number of pools that existed when the add pool was
+ called.
+ @return true on success */
+ bool add_pool(size_t n_pools)
+ {
+ bool added = false;
+
+ m_lock_strategy.enter();
+
+ if (n_pools < m_pools.size()) {
+ /* Some other thread already added a pool. */
+ added = true;
+ } else {
+ PoolType* pool;
+
+ ut_ad(n_pools == m_pools.size());
+
+ pool = UT_NEW_NOKEY(PoolType(m_size));
+
+ if (pool != NULL) {
+
+ ut_ad(n_pools <= m_pools.size());
+
+ m_pools.push_back(pool);
+
+ ib::info() << "Number of pools: "
+ << m_pools.size();
+
+ added = true;
+ }
+ }
+
+ ut_ad(n_pools < m_pools.size() || !added);
+
+ m_lock_strategy.exit();
+
+ return(added);
+ }
+
+ /** Create the pool manager. */
+ void create()
+ {
+ ut_a(m_size > sizeof(value_type));
+ m_lock_strategy.create();
+
+ add_pool(0);
+ }
+
+ /** Release the resources. */
+ void destroy()
+ {
+ typename Pools::iterator it;
+ typename Pools::iterator end = m_pools.end();
+
+ for (it = m_pools.begin(); it != end; ++it) {
+ PoolType* pool = *it;
+
+ UT_DELETE(pool);
+ }
+
+ m_pools.clear();
+
+ m_lock_strategy.destroy();
+ }
+private:
+ // Disable copying
+ PoolManager(const PoolManager&);
+ PoolManager& operator=(const PoolManager&);
+
+ typedef std::vector<PoolType*, ut_allocator<PoolType*> > Pools;
+
+ /** Size of each block */
+ size_t m_size;
+
+ /** Pools managed this manager */
+ Pools m_pools;
+
+ /** Lock strategy to use */
+ LockStrategy m_lock_strategy;
+};
+
+#endif /* ut0pool_h */
diff --git a/storage/innobase/include/ut0rbt.h b/storage/innobase/include/ut0rbt.h
index dd483836709..38071165c3f 100644
--- a/storage/innobase/include/ut0rbt.h
+++ b/storage/innobase/include/ut0rbt.h
@@ -1,6 +1,6 @@
-/***************************************************************************//**
+/*****************************************************************************
-Copyright (c) 2007, 2010, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2007, 2015, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -26,7 +26,6 @@ Created 2007-03-20 Sunny Bains
#define INNOBASE_UT0RBT_H
#if !defined(IB_RBT_TESTING)
-#include "univ.i"
#include "ut0mem.h"
#else
#include <stdio.h>
@@ -111,15 +110,13 @@ struct ib_rbt_bound_t {
/**********************************************************************//**
Free an instance of a red black tree */
-UNIV_INTERN
void
rbt_free(
/*=====*/
ib_rbt_t* tree); /*!< in: rb tree to free */
/**********************************************************************//**
Create an instance of a red black tree
-@return rb tree instance */
-UNIV_INTERN
+@return rb tree instance */
ib_rbt_t*
rbt_create(
/*=======*/
@@ -128,8 +125,7 @@ rbt_create(
/**********************************************************************//**
Create an instance of a red black tree, whose comparison function takes
an argument
-@return rb tree instance */
-UNIV_INTERN
+@return rb tree instance */
ib_rbt_t*
rbt_create_arg_cmp(
/*===============*/
@@ -139,7 +135,6 @@ rbt_create_arg_cmp(
void* cmp_arg); /*!< in: compare fn arg */
/**********************************************************************//**
Delete a node from the red black tree, identified by key */
-UNIV_INTERN
ibool
rbt_delete(
/*=======*/
@@ -149,8 +144,7 @@ rbt_delete(
/**********************************************************************//**
Remove a node from the red black tree, NOTE: This function will not delete
the node instance, THAT IS THE CALLERS RESPONSIBILITY.
-@return the deleted node with the const. */
-UNIV_INTERN
+@return the deleted node with the const. */
ib_rbt_node_t*
rbt_remove_node(
/*============*/
@@ -161,19 +155,8 @@ rbt_remove_node(
because the caller has access
only to const nodes.*/
/**********************************************************************//**
-Return a node from the red black tree, identified by
-key, NULL if not found
-@return node if found else return NULL */
-UNIV_INTERN
-const ib_rbt_node_t*
-rbt_lookup(
-/*=======*/
- const ib_rbt_t* tree, /*!< in: rb tree to search */
- const void* key); /*!< in: key to lookup */
-/**********************************************************************//**
Add data to the red black tree, identified by key (no dups yet!)
-@return inserted node */
-UNIV_INTERN
+@return inserted node */
const ib_rbt_node_t*
rbt_insert(
/*=======*/
@@ -183,8 +166,7 @@ rbt_insert(
copied to the node.*/
/**********************************************************************//**
Add a new node to the tree, useful for data that is pre-sorted.
-@return appended node */
-UNIV_INTERN
+@return appended node */
const ib_rbt_node_t*
rbt_add_node(
/*=========*/
@@ -194,24 +176,21 @@ rbt_add_node(
to the node */
/**********************************************************************//**
Return the left most data node in the tree
-@return left most node */
-UNIV_INTERN
+@return left most node */
const ib_rbt_node_t*
rbt_first(
/*======*/
const ib_rbt_t* tree); /*!< in: rb tree */
/**********************************************************************//**
Return the right most data node in the tree
-@return right most node */
-UNIV_INTERN
+@return right most node */
const ib_rbt_node_t*
rbt_last(
/*=====*/
const ib_rbt_t* tree); /*!< in: rb tree */
/**********************************************************************//**
Return the next node from current.
-@return successor node to current that is passed in. */
-UNIV_INTERN
+@return successor node to current that is passed in. */
const ib_rbt_node_t*
rbt_next(
/*=====*/
@@ -220,8 +199,7 @@ rbt_next(
current);
/**********************************************************************//**
Return the prev node from current.
-@return precedessor node to current that is passed in */
-UNIV_INTERN
+@return precedessor node to current that is passed in */
const ib_rbt_node_t*
rbt_prev(
/*=====*/
@@ -229,29 +207,10 @@ rbt_prev(
const ib_rbt_node_t* /* in: current node */
current);
/**********************************************************************//**
-Find the node that has the lowest key that is >= key.
-@return node that satisfies the lower bound constraint or NULL */
-UNIV_INTERN
-const ib_rbt_node_t*
-rbt_lower_bound(
-/*============*/
- const ib_rbt_t* tree, /*!< in: rb tree */
- const void* key); /*!< in: key to search */
-/**********************************************************************//**
-Find the node that has the greatest key that is <= key.
-@return node that satisifies the upper bound constraint or NULL */
-UNIV_INTERN
-const ib_rbt_node_t*
-rbt_upper_bound(
-/*============*/
- const ib_rbt_t* tree, /*!< in: rb tree */
- const void* key); /*!< in: key to search */
-/**********************************************************************//**
Search for the key, a node will be retuned in parent.last, whether it
was found or not. If not found then parent.last will contain the
parent node for the possibly new key otherwise the matching node.
-@return result of last comparison */
-UNIV_INTERN
+@return result of last comparison */
int
rbt_search(
/*=======*/
@@ -262,8 +221,7 @@ rbt_search(
Search for the key, a node will be retuned in parent.last, whether it
was found or not. If not found then parent.last will contain the
parent node for the possibly new key otherwise the matching node.
-@return result of last comparison */
-UNIV_INTERN
+@return result of last comparison */
int
rbt_search_cmp(
/*===========*/
@@ -275,50 +233,22 @@ rbt_search_cmp(
arg_compare); /*!< in: fn to compare items
with argument */
/**********************************************************************//**
-Clear the tree, deletes (and free's) all the nodes. */
-UNIV_INTERN
-void
-rbt_clear(
-/*======*/
- ib_rbt_t* tree); /*!< in: rb tree */
-/**********************************************************************//**
Merge the node from dst into src. Return the number of nodes merged.
-@return no. of recs merged */
-UNIV_INTERN
+@return no. of recs merged */
ulint
rbt_merge_uniq(
/*===========*/
ib_rbt_t* dst, /*!< in: dst rb tree */
const ib_rbt_t* src); /*!< in: src rb tree */
-/**********************************************************************//**
-Merge the node from dst into src. Return the number of nodes merged.
-Delete the nodes from src after copying node to dst. As a side effect
-the duplicates will be left untouched in the src, since we don't support
-duplicates (yet). NOTE: src and dst must be similar, the function doesn't
-check for this condition (yet).
-@return no. of recs merged */
-UNIV_INTERN
-ulint
-rbt_merge_uniq_destructive(
-/*=======================*/
- ib_rbt_t* dst, /*!< in: dst rb tree */
- ib_rbt_t* src); /*!< in: src rb tree */
+#if defined UNIV_DEBUG || defined IB_RBT_TESTING
/**********************************************************************//**
Verify the integrity of the RB tree. For debugging. 0 failure else height
of tree (in count of black nodes).
-@return TRUE if OK FALSE if tree invalid. */
-UNIV_INTERN
+@return TRUE if OK FALSE if tree invalid. */
ibool
rbt_validate(
/*=========*/
const ib_rbt_t* tree); /*!< in: tree to validate */
-/**********************************************************************//**
-Iterate over the tree in depth first order. */
-UNIV_INTERN
-void
-rbt_print(
-/*======*/
- const ib_rbt_t* tree, /*!< in: tree to traverse */
- ib_rbt_print_node print); /*!< in: print function */
+#endif /* UNIV_DEBUG || IB_RBT_TESTING */
#endif /* INNOBASE_UT0RBT_H */
diff --git a/storage/innobase/include/ut0rnd.h b/storage/innobase/include/ut0rnd.h
index 0f8474225fa..9af8687bfd0 100644
--- a/storage/innobase/include/ut0rnd.h
+++ b/storage/innobase/include/ut0rnd.h
@@ -1,6 +1,7 @@
/*****************************************************************************
Copyright (c) 1994, 2016, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2019, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -26,62 +27,55 @@ Created 1/20/1994 Heikki Tuuri
#ifndef ut0rnd_h
#define ut0rnd_h
-#include "univ.i"
-
-#ifndef UNIV_INNOCHECKSUM
-
#include "ut0byte.h"
+#include <my_sys.h>
-/** The 'character code' for end of field or string (used
-in folding records */
-#define UT_END_OF_FIELD 257
+#ifndef UNIV_INNOCHECKSUM
+/** Seed value of ut_rnd_gen() */
+extern int32 ut_rnd_current;
+
+/** @return a pseudo-random 32-bit number */
+inline uint32_t ut_rnd_gen()
+{
+ /* This is a Galois linear-feedback shift register.
+ https://en.wikipedia.org/wiki/Linear-feedback_shift_register#Galois_LFSRs
+ The generating primitive Galois Field polynomial is the Castagnoli
+ polynomial that was made popular by CRC-32C:
+ x^32+x^28+x^27+x^26+x^25+x^23+x^22+x^20+
+ x^19+x^18+x^14+x^13+x^11+x^10+x^9+x^8+x^6+1 */
+ const uint32_t crc32c= 0x1edc6f41;
+
+ uint32_t rnd= my_atomic_load32_explicit(&ut_rnd_current,
+ MY_MEMORY_ORDER_RELAXED);
+
+ if (UNIV_UNLIKELY(rnd == 0))
+ {
+ rnd= static_cast<uint32_t>(my_interval_timer());
+ if (!rnd) rnd= 1;
+ }
+ else
+ {
+ bool lsb= rnd & 1;
+ rnd>>= 1;
+ if (lsb)
+ rnd^= crc32c;
+ }
+
+ my_atomic_store32_explicit(&ut_rnd_current, rnd, MY_MEMORY_ORDER_RELAXED);
+ return rnd;
+}
+
+/** @return a random number between 0 and n-1, inclusive */
+inline ulint ut_rnd_interval(ulint n)
+{
+ return n > 1 ? static_cast<ulint>(ut_rnd_gen() % n) : 0;
+}
-/********************************************************//**
-This is used to set the random number seed. */
-UNIV_INLINE
-void
-ut_rnd_set_seed(
-/*============*/
- ulint seed); /*!< in: seed */
-/********************************************************//**
-The following function generates a series of 'random' ulint integers.
-@return the next 'random' number */
-UNIV_INLINE
-ulint
-ut_rnd_gen_next_ulint(
-/*==================*/
- ulint rnd); /*!< in: the previous random number value */
-/*********************************************************//**
-The following function generates 'random' ulint integers which
-enumerate the value space (let there be N of them) of ulint integers
-in a pseudo-random fashion. Note that the same integer is repeated
-always after N calls to the generator.
-@return the 'random' number */
-UNIV_INLINE
-ulint
-ut_rnd_gen_ulint(void);
-/*==================*/
-/********************************************************//**
-Generates a random integer from a given interval.
-@return the 'random' number */
-UNIV_INLINE
-ulint
-ut_rnd_interval(
-/*============*/
- ulint low, /*!< in: low limit; can generate also this value */
- ulint high); /*!< in: high limit; can generate also this value */
-/*********************************************************//**
-Generates a random iboolean value.
-@return the random value */
-UNIV_INLINE
-ibool
-ut_rnd_gen_ibool(void);
-/*=================*/
/*******************************************************//**
The following function generates a hash value for a ulint integer
to a hash table of size table_size, which should be a prime or some
random number to work reliably.
-@return hash value */
+@return hash value */
UNIV_INLINE
ulint
ut_hash_ulint(
@@ -90,7 +84,7 @@ ut_hash_ulint(
ulint table_size); /*!< in: hash table size */
/*************************************************************//**
Folds a 64-bit integer.
-@return folded value */
+@return folded value */
UNIV_INLINE
ulint
ut_fold_ull(
@@ -99,18 +93,17 @@ ut_fold_ull(
MY_ATTRIBUTE((const));
/*************************************************************//**
Folds a character string ending in the null character.
-@return folded value */
+@return folded value */
UNIV_INLINE
ulint
ut_fold_string(
/*===========*/
const char* str) /*!< in: null-terminated string */
- MY_ATTRIBUTE((pure));
+ MY_ATTRIBUTE((warn_unused_result));
/***********************************************************//**
Looks for a prime number slightly greater than the given argument.
The prime is chosen so that it is not near any power of 2.
-@return prime */
-UNIV_INTERN
+@return prime */
ulint
ut_find_prime(
/*==========*/
@@ -121,7 +114,7 @@ ut_find_prime(
/*************************************************************//**
Folds a pair of ulints.
-@return folded value */
+@return folded value */
UNIV_INLINE
ulint
ut_fold_ulint_pair(
@@ -131,7 +124,7 @@ ut_fold_ulint_pair(
MY_ATTRIBUTE((const));
/*************************************************************//**
Folds a binary string.
-@return folded value */
+@return folded value */
UNIV_INLINE
ulint
ut_fold_binary(
@@ -140,9 +133,6 @@ ut_fold_binary(
ulint len) /*!< in: length */
MY_ATTRIBUTE((pure));
-
-#ifndef UNIV_NONINL
#include "ut0rnd.ic"
-#endif
#endif
diff --git a/storage/innobase/include/ut0rnd.ic b/storage/innobase/include/ut0rnd.ic
index ce8c7203c8e..c0105160a42 100644
--- a/storage/innobase/include/ut0rnd.ic
+++ b/storage/innobase/include/ut0rnd.ic
@@ -1,7 +1,7 @@
/*****************************************************************************
-Copyright (c) 1994, 2009, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2017, MariaDB Corporation.
+Copyright (c) 1994, 2016, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2017, 2019, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -29,123 +29,11 @@ Created 5/30/1994 Heikki Tuuri
#ifndef UNIV_INNOCHECKSUM
-#define UT_RND1 151117737
-#define UT_RND2 119785373
-#define UT_RND3 85689495
-#define UT_RND4 76595339
-#define UT_SUM_RND2 98781234
-#define UT_SUM_RND3 126792457
-#define UT_SUM_RND4 63498502
-#define UT_XOR_RND1 187678878
-#define UT_XOR_RND2 143537923
-
-/** Seed value of ut_rnd_gen_ulint() */
-extern ulint ut_rnd_ulint_counter;
-
-/********************************************************//**
-This is used to set the random number seed. */
-UNIV_INLINE
-void
-ut_rnd_set_seed(
-/*============*/
- ulint seed) /*!< in: seed */
-{
- ut_rnd_ulint_counter = seed;
-}
-
-/********************************************************//**
-The following function generates a series of 'random' ulint integers.
-@return the next 'random' number */
-UNIV_INLINE
-ulint
-ut_rnd_gen_next_ulint(
-/*==================*/
- ulint rnd) /*!< in: the previous random number value */
-{
- ulint n_bits;
-
- n_bits = 8 * sizeof(ulint);
-
- rnd = UT_RND2 * rnd + UT_SUM_RND3;
- rnd = UT_XOR_RND1 ^ rnd;
- rnd = (rnd << 20) + (rnd >> (n_bits - 20));
- rnd = UT_RND3 * rnd + UT_SUM_RND4;
- rnd = UT_XOR_RND2 ^ rnd;
- rnd = (rnd << 20) + (rnd >> (n_bits - 20));
- rnd = UT_RND1 * rnd + UT_SUM_RND2;
-
- return(rnd);
-}
-
-/********************************************************//**
-The following function generates 'random' ulint integers which
-enumerate the value space of ulint integers in a pseudo random
-fashion. Note that the same integer is repeated always after
-2 to power 32 calls to the generator (if ulint is 32-bit).
-@return the 'random' number */
-UNIV_INLINE
-ulint
-ut_rnd_gen_ulint(void)
-/*==================*/
-{
- ulint rnd;
-
- ut_rnd_ulint_counter = UT_RND1 * ut_rnd_ulint_counter + UT_RND2;
-
- rnd = ut_rnd_gen_next_ulint(ut_rnd_ulint_counter);
-
- return(rnd);
-}
-
-/********************************************************//**
-Generates a random integer from a given interval.
-@return the 'random' number */
-UNIV_INLINE
-ulint
-ut_rnd_interval(
-/*============*/
- ulint low, /*!< in: low limit; can generate also this value */
- ulint high) /*!< in: high limit; can generate also this value */
-{
- ulint rnd;
-
- ut_ad(high >= low);
-
- if (low == high) {
-
- return(low);
- }
-
- rnd = ut_rnd_gen_ulint();
-
- return(low + (rnd % (high - low)));
-}
-
-/*********************************************************//**
-Generates a random iboolean value.
-@return the random value */
-UNIV_INLINE
-ibool
-ut_rnd_gen_ibool(void)
-/*=================*/
-{
- ulint x;
-
- x = ut_rnd_gen_ulint();
-
- if (((x >> 20) + (x >> 15)) & 1) {
-
- return(TRUE);
- }
-
- return(FALSE);
-}
-
/*******************************************************//**
The following function generates a hash value for a ulint integer
to a hash table of size table_size, which should be a prime
or some random number for the hash table to work reliably.
-@return hash value */
+@return hash value */
UNIV_INLINE
ulint
ut_hash_ulint(
@@ -161,7 +49,7 @@ ut_hash_ulint(
/*************************************************************//**
Folds a 64-bit integer.
-@return folded value */
+@return folded value */
UNIV_INLINE
ulint
ut_fold_ull(
@@ -174,7 +62,7 @@ ut_fold_ull(
/*************************************************************//**
Folds a character string ending in the null character.
-@return folded value */
+@return folded value */
UNIV_INLINE
ulint
ut_fold_string(
@@ -197,7 +85,7 @@ ut_fold_string(
/*************************************************************//**
Folds a pair of ulints.
-@return folded value */
+@return folded value */
UNIV_INLINE
ulint
ut_fold_ulint_pair(
@@ -211,7 +99,7 @@ ut_fold_ulint_pair(
/*************************************************************//**
Folds a binary string.
-@return folded value */
+@return folded value */
UNIV_INLINE
ulint
ut_fold_binary(
diff --git a/storage/innobase/include/ut0sort.h b/storage/innobase/include/ut0sort.h
index 12f482078ff..4f1d4c04901 100644
--- a/storage/innobase/include/ut0sort.h
+++ b/storage/innobase/include/ut0sort.h
@@ -26,8 +26,6 @@ Created 11/9/1995 Heikki Tuuri
#ifndef ut0sort_h
#define ut0sort_h
-#include "univ.i"
-
/* This module gives a macro definition of the body of
a standard sort function for an array of elements of any
type. The comparison function is given as a parameter to
diff --git a/storage/innobase/include/ut0stage.h b/storage/innobase/include/ut0stage.h
new file mode 100644
index 00000000000..a369daa8bb3
--- /dev/null
+++ b/storage/innobase/include/ut0stage.h
@@ -0,0 +1,593 @@
+/*****************************************************************************
+
+Copyright (c) 2014, 2015, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2017, MariaDB Corporation.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file ut/ut0stage.h
+Supplementary code to performance schema stage instrumentation.
+
+Created Nov 12, 2014 Vasil Dimov
+*******************************************************/
+
+#ifndef ut0stage_h
+#define ut0stage_h
+
+#include <algorithm>
+#include <math.h>
+
+#include "my_global.h" /* needed for headers from mysql/psi/ */
+
+#include "mysql/psi/mysql_stage.h" /* mysql_stage_inc_work_completed */
+#include "mysql/psi/psi.h" /* HAVE_PSI_STAGE_INTERFACE, PSI_stage_progress */
+
+#include "dict0mem.h" /* dict_index_t */
+#include "row0log.h" /* row_log_estimate_work() */
+#include "srv0srv.h" /* ut_stage_alter_t */
+
+#ifdef HAVE_PSI_STAGE_INTERFACE
+
+typedef void PSI_stage_progress;
+
+/** Class used to report ALTER TABLE progress via performance_schema.
+The only user of this class is the ALTER TABLE code and it calls the methods
+in the following order
+constructor
+begin_phase_read_pk()
+ multiple times:
+ n_pk_recs_inc() // once per record read
+ inc() // once per page read
+end_phase_read_pk()
+if any new indexes are being added, for each one:
+ begin_phase_sort()
+ multiple times:
+ inc() // once per record sorted
+ begin_phase_insert()
+ multiple times:
+ inc() // once per record inserted
+ being_phase_log_index()
+ multiple times:
+ inc() // once per log-block applied
+begin_phase_flush()
+ multiple times:
+ inc() // once per page flushed
+begin_phase_log_table()
+ multiple times:
+ inc() // once per log-block applied
+begin_phase_end()
+destructor
+
+This class knows the specifics of each phase and tries to increment the
+progress in an even manner across the entire ALTER TABLE lifetime. */
+class ut_stage_alter_t {
+public:
+ /** Constructor.
+ @param[in] pk primary key of the old table */
+ explicit
+ ut_stage_alter_t(
+ const dict_index_t* pk)
+ :
+ m_progress(NULL),
+ m_pk(pk),
+ m_n_pk_recs(0),
+ m_n_pk_pages(0),
+ m_n_recs_processed(0),
+ m_n_flush_pages(0),
+ m_cur_phase(NOT_STARTED)
+ {
+ }
+
+ /** Destructor. */
+ ~ut_stage_alter_t();
+
+ /** Flag an ALTER TABLE start (read primary key phase).
+ @param[in] n_sort_indexes number of indexes that will be sorted
+ during ALTER TABLE, used for estimating the total work to be done */
+ void
+ begin_phase_read_pk(
+ ulint n_sort_indexes);
+
+ /** Increment the number of records in PK (table) with 1.
+ This is used to get more accurate estimate about the number of
+ records per page which is needed because some phases work on
+ per-page basis while some work on per-record basis and we want
+ to get the progress as even as possible. */
+ void
+ n_pk_recs_inc();
+
+ /** Flag either one record or one page processed, depending on the
+ current phase.
+ @param[in] inc_val flag this many units processed at once */
+ void
+ inc(
+ ulint inc_val = 1);
+
+ /** Flag the end of reading of the primary key.
+ Here we know the exact number of pages and records and calculate
+ the number of records per page and refresh the estimate. */
+ void
+ end_phase_read_pk();
+
+ /** Flag the beginning of the sort phase.
+ @param[in] sort_multi_factor since merge sort processes
+ one page more than once we only update the estimate once per this
+ many pages processed. */
+ void
+ begin_phase_sort(
+ double sort_multi_factor);
+
+ /** Flag the beginning of the insert phase. */
+ void
+ begin_phase_insert();
+
+ /** Flag the beginning of the flush phase.
+ @param[in] n_flush_pages this many pages are going to be
+ flushed */
+ void
+ begin_phase_flush(
+ ulint n_flush_pages);
+
+ /** Flag the beginning of the log index phase. */
+ void
+ begin_phase_log_index();
+
+ /** Flag the beginning of the log table phase. */
+ void
+ begin_phase_log_table();
+
+ /** Flag the beginning of the end phase. */
+ void
+ begin_phase_end();
+
+private:
+
+ /** Update the estimate of total work to be done. */
+ void
+ reestimate();
+
+ /** Change the current phase.
+ @param[in] new_stage pointer to the new stage to change to */
+ void
+ change_phase(
+ const PSI_stage_info* new_stage);
+
+ /** Performance schema accounting object. */
+ /* TODO: MySQL 5.7 PSI */
+ PSI_stage_progress* m_progress;
+
+ /** Old table PK. Used for calculating the estimate. */
+ const dict_index_t* m_pk;
+
+ /** Number of records in the primary key (table), including delete
+ marked records. */
+ ulint m_n_pk_recs;
+
+ /** Number of leaf pages in the primary key. */
+ ulint m_n_pk_pages;
+
+ /** Estimated number of records per page in the primary key. */
+ double m_n_recs_per_page;
+
+ /** Number of indexes that are being added. */
+ ulint m_n_sort_indexes;
+
+ /** During the sort phase, increment the counter once per this
+ many pages processed. This is because sort processes one page more
+ than once. */
+ ulint m_sort_multi_factor;
+
+ /** Number of records processed during sort & insert phases. We
+ need to increment the counter only once page, or once per
+ recs-per-page records. */
+ ulint m_n_recs_processed;
+
+ /** Number of pages to flush. */
+ ulint m_n_flush_pages;
+
+ /** Current phase. */
+ enum {
+ NOT_STARTED = 0,
+ READ_PK = 1,
+ SORT = 2,
+ INSERT = 3,
+ FLUSH = 4,
+ /* JAN: TODO: MySQL 5.7 vrs. MariaDB sql/log.h
+ LOG_INDEX = 5,
+ LOG_TABLE = 6, */
+ LOG_INNODB_INDEX = 5,
+ LOG_INNODB_TABLE = 6,
+ END = 7,
+ } m_cur_phase;
+};
+
+/** Destructor. */
+inline
+ut_stage_alter_t::~ut_stage_alter_t()
+{
+ if (m_progress == NULL) {
+ return;
+ }
+
+ /* TODO: MySQL 5.7 PSI: Set completed = estimated before we quit.
+ mysql_stage_set_work_completed(
+ m_progress,
+ mysql_stage_get_work_estimated(m_progress));
+
+ mysql_end_stage();
+ */
+}
+
+/** Flag an ALTER TABLE start (read primary key phase).
+@param[in] n_sort_indexes number of indexes that will be sorted
+during ALTER TABLE, used for estimating the total work to be done */
+inline
+void
+ut_stage_alter_t::begin_phase_read_pk(
+ ulint n_sort_indexes)
+{
+ m_n_sort_indexes = n_sort_indexes;
+
+ m_cur_phase = READ_PK;
+
+ /* TODO: MySQL 5.7 PSI
+ m_progress = mysql_set_stage(
+ srv_stage_alter_table_read_pk_internal_sort.m_key);
+
+ mysql_stage_set_work_completed(m_progress, 0);
+ */
+ reestimate();
+}
+
+/** Increment the number of records in PK (table) with 1.
+This is used to get more accurate estimate about the number of
+records per page which is needed because some phases work on
+per-page basis while some work on per-record basis and we want
+to get the progress as even as possible. */
+inline
+void
+ut_stage_alter_t::n_pk_recs_inc()
+{
+ m_n_pk_recs++;
+}
+
+/** Flag either one record or one page processed, depending on the
+current phase. */
+inline
+void
+ut_stage_alter_t::inc(ulint)
+{
+ if (m_progress == NULL) {
+ return;
+ }
+
+ ulint multi_factor = 1;
+ bool should_proceed = true;
+
+ switch (m_cur_phase) {
+ case NOT_STARTED:
+ ut_error;
+ case READ_PK:
+ m_n_pk_pages++;
+#if 0 /* TODO: MySQL 5.7 PSI */
+ ut_ad(inc_val == 1);
+ /* Overall the read pk phase will read all the pages from the
+ PK and will do work, proportional to the number of added
+ indexes, thus when this is called once per read page we
+ increment with 1 + m_n_sort_indexes */
+ inc_val = 1 + m_n_sort_indexes;
+#endif
+ break;
+ case SORT:
+ multi_factor = m_sort_multi_factor;
+ /* fall through */
+ case INSERT: {
+ /* Increment the progress every nth record. During
+ sort and insert phases, this method is called once per
+ record processed. We need fractional point numbers here
+ because "records per page" is such a number naturally and
+ to avoid rounding skew we want, for example: if there are
+ (double) N records per page, then the work_completed
+ should be incremented on the inc() calls round(k*N),
+ for k=1,2,3... */
+ const double every_nth = m_n_recs_per_page * multi_factor;
+
+ const ulint k = static_cast<ulint>(
+ round(m_n_recs_processed / every_nth));
+
+ const ulint nth = static_cast<ulint>(
+ round(k * every_nth));
+
+ should_proceed = m_n_recs_processed == nth;
+
+ m_n_recs_processed++;
+
+ break;
+ }
+ case FLUSH:
+ break;
+ /* JAN: TODO: MySQL 5.7
+ case LOG_INDEX:
+ break;
+ case LOG_TABLE:
+ break; */
+ case LOG_INNODB_INDEX:
+ case LOG_INNODB_TABLE:
+ break;
+ case END:
+ break;
+ }
+
+ if (should_proceed) {
+ /* TODO: MySQL 5.7 PSI
+ mysql_stage_inc_work_completed(m_progress, inc_val);
+ */
+ reestimate();
+ }
+}
+
+/** Flag the end of reading of the primary key.
+Here we know the exact number of pages and records and calculate
+the number of records per page and refresh the estimate. */
+inline
+void
+ut_stage_alter_t::end_phase_read_pk()
+{
+ reestimate();
+
+ if (m_n_pk_pages == 0) {
+ /* The number of pages in the PK could be 0 if the tree is
+ empty. In this case we set m_n_recs_per_page to 1 to avoid
+ division by zero later. */
+ m_n_recs_per_page = 1.0;
+ } else {
+ m_n_recs_per_page = std::max(
+ static_cast<double>(m_n_pk_recs) / m_n_pk_pages,
+ 1.0);
+ }
+}
+
+/** Flag the beginning of the sort phase.
+@param[in] sort_multi_factor since merge sort processes
+one page more than once we only update the estimate once per this
+many pages processed. */
+inline
+void
+ut_stage_alter_t::begin_phase_sort(
+ double sort_multi_factor)
+{
+ if (sort_multi_factor <= 1.0) {
+ m_sort_multi_factor = 1;
+ } else {
+ m_sort_multi_factor = static_cast<ulint>(
+ round(sort_multi_factor));
+ }
+
+ change_phase(&srv_stage_alter_table_merge_sort);
+}
+
+/** Flag the beginning of the insert phase. */
+inline
+void
+ut_stage_alter_t::begin_phase_insert()
+{
+ change_phase(&srv_stage_alter_table_insert);
+}
+
+/** Flag the beginning of the flush phase.
+@param[in] n_flush_pages this many pages are going to be
+flushed */
+inline
+void
+ut_stage_alter_t::begin_phase_flush(
+ ulint n_flush_pages)
+{
+ m_n_flush_pages = n_flush_pages;
+
+ reestimate();
+
+ change_phase(&srv_stage_alter_table_flush);
+}
+
+/** Flag the beginning of the log index phase. */
+inline
+void
+ut_stage_alter_t::begin_phase_log_index()
+{
+ change_phase(&srv_stage_alter_table_log_index);
+}
+
+/** Flag the beginning of the log table phase. */
+inline
+void
+ut_stage_alter_t::begin_phase_log_table()
+{
+ change_phase(&srv_stage_alter_table_log_table);
+}
+
+/** Flag the beginning of the end phase. */
+inline
+void
+ut_stage_alter_t::begin_phase_end()
+{
+ change_phase(&srv_stage_alter_table_end);
+}
+
+/** Update the estimate of total work to be done. */
+inline
+void
+ut_stage_alter_t::reestimate()
+{
+ if (m_progress == NULL) {
+ return;
+ }
+
+ /* During the log table phase we calculate the estimate as
+ work done so far + log size remaining. */
+ if (m_cur_phase == LOG_INNODB_TABLE) {
+ /* TODO: MySQL 5.7 PSI
+ mysql_stage_set_work_estimated(
+ m_progress,
+ mysql_stage_get_work_completed(m_progress)
+ + row_log_estimate_work(m_pk));
+ */
+ return;
+ }
+
+ /* During the other phases we use a formula, regardless of
+ how much work has been done so far. */
+
+ /* For number of pages in the PK - if the PK has not been
+ read yet, use stat_n_leaf_pages (approximate), otherwise
+ use the exact number we gathered. */
+ const ulint n_pk_pages
+ = m_cur_phase != READ_PK
+ ? m_n_pk_pages
+ : m_pk->stat_n_leaf_pages;
+
+ /* If flush phase has not started yet and we do not know how
+ many pages are to be flushed, then use a wild guess - the
+ number of pages in the PK / 2. */
+ if (m_n_flush_pages == 0) {
+ m_n_flush_pages = n_pk_pages / 2;
+ }
+
+ ulonglong estimate __attribute__((unused))
+ = n_pk_pages
+ * (1 /* read PK */
+ + m_n_sort_indexes /* row_merge_buf_sort() inside the
+ read PK per created index */
+ + m_n_sort_indexes * 2 /* sort & insert per created index */)
+ + m_n_flush_pages
+ + row_log_estimate_work(m_pk);
+
+ /* Prevent estimate < completed */
+ /* TODO: MySQL 5.7 PSI
+ estimate = std::max(estimate,
+ mysql_stage_get_work_completed(m_progress));
+
+ mysql_stage_set_work_estimated(m_progress, estimate);
+ */
+}
+
+/** Change the current phase.
+@param[in] new_stage pointer to the new stage to change to */
+inline
+void
+ut_stage_alter_t::change_phase(
+ const PSI_stage_info* new_stage)
+{
+ if (m_progress == NULL) {
+ return;
+ }
+
+ if (new_stage == &srv_stage_alter_table_read_pk_internal_sort) {
+ m_cur_phase = READ_PK;
+ } else if (new_stage == &srv_stage_alter_table_merge_sort) {
+ m_cur_phase = SORT;
+ } else if (new_stage == &srv_stage_alter_table_insert) {
+ m_cur_phase = INSERT;
+ } else if (new_stage == &srv_stage_alter_table_flush) {
+ m_cur_phase = FLUSH;
+ /* JAN: TODO: MySQL 5.7 used LOG_INDEX and LOG_TABLE */
+ } else if (new_stage == &srv_stage_alter_table_log_index) {
+ m_cur_phase = LOG_INNODB_INDEX;
+ } else if (new_stage == &srv_stage_alter_table_log_table) {
+ m_cur_phase = LOG_INNODB_TABLE;
+ } else if (new_stage == &srv_stage_alter_table_end) {
+ m_cur_phase = END;
+ } else {
+ ut_error;
+ }
+
+ /* TODO: MySQL 5.7 PSI
+ const ulonglong c = mysql_stage_get_work_completed(m_progress);
+ const ulonglong e = mysql_stage_get_work_estimated(m_progress);
+
+ m_progress = mysql_set_stage(new_stage->m_key);
+
+ mysql_stage_set_work_completed(m_progress, c);
+ mysql_stage_set_work_estimated(m_progress, e);
+ */
+}
+#else /* HAVE_PSI_STAGE_INTERFACE */
+
+class ut_stage_alter_t {
+public:
+ explicit
+ ut_stage_alter_t(
+ const dict_index_t* pk)
+ {
+ }
+
+ void
+ begin_phase_read_pk(
+ ulint n_sort_indexes)
+ {
+ }
+
+ void
+ n_pk_recs_inc()
+ {
+ }
+
+ void
+ inc(
+ ulint inc_val = 1)
+ {
+ }
+
+ void
+ end_phase_read_pk()
+ {
+ }
+
+ void
+ begin_phase_sort(
+ double sort_multi_factor)
+ {
+ }
+
+ void
+ begin_phase_insert()
+ {
+ }
+
+ void
+ begin_phase_flush(
+ ulint n_flush_pages)
+ {
+ }
+
+ void
+ begin_phase_log_index()
+ {
+ }
+
+ void
+ begin_phase_log_table()
+ {
+ }
+
+ void
+ begin_phase_end()
+ {
+ }
+};
+
+#endif /* HAVE_PSI_STAGE_INTERFACE */
+
+#endif /* ut0stage_h */
diff --git a/storage/innobase/include/ut0ut.h b/storage/innobase/include/ut0ut.h
index da94e78d6bd..000d8b6b379 100644
--- a/storage/innobase/include/ut0ut.h
+++ b/storage/innobase/include/ut0ut.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1994, 2017, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1994, 2016, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2019, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
@@ -27,179 +27,148 @@ Created 1/20/1994 Heikki Tuuri
#ifndef ut0ut_h
#define ut0ut_h
-#include "univ.i"
+/* Do not include univ.i because univ.i includes this. */
+
+#include <ostream>
+#include <sstream>
+#include <string.h>
#ifndef UNIV_INNOCHECKSUM
#include "db0err.h"
-#ifndef UNIV_HOTBACKUP
-# include "os0sync.h" /* for HAVE_ATOMIC_BUILTINS */
-#endif /* UNIV_HOTBACKUP */
-
-#endif /* !UNIV_INNOCHECKSUM */
-
#include <time.h>
+
#ifndef MYSQL_SERVER
#include <ctype.h>
-#endif
+#endif /* MYSQL_SERVER */
-#include <stdarg.h> /* for va_list */
+#include <stdarg.h>
#include <string>
-/** Index name prefix in fast index creation */
-#define TEMP_INDEX_PREFIX '\377'
/** Index name prefix in fast index creation, as a string constant */
#define TEMP_INDEX_PREFIX_STR "\377"
-/* In order to call a piece of code, when a function returns or when the
-scope ends, use this utility class. It will invoke the given function
-object in its destructor. */
-template<typename F>
-struct ut_when_dtor {
- ut_when_dtor(F& p) : f(p) {}
- ~ut_when_dtor() {
- f();
- }
-private:
- F& f;
-};
-
-#ifndef UNIV_INNOCHECKSUM
-#ifndef UNIV_HOTBACKUP
-# if defined(HAVE_PAUSE_INSTRUCTION)
+#ifdef HAVE_PAUSE_INSTRUCTION
/* According to the gcc info page, asm volatile means that the
instruction has important side-effects and must not be removed.
Also asm volatile may trigger a memory barrier (spilling all registers
to memory). */
-# ifdef __SUNPRO_CC
-# define UT_RELAX_CPU() asm ("pause" )
-# else
-# define UT_RELAX_CPU() __asm__ __volatile__ ("pause")
-# endif /* __SUNPRO_CC */
-
-# elif defined(HAVE_FAKE_PAUSE_INSTRUCTION)
-# define UT_RELAX_CPU() __asm__ __volatile__ ("rep; nop")
-# elif defined(HAVE_WINDOWS_ATOMICS)
+# ifdef __SUNPRO_CC
+# define UT_RELAX_CPU() asm ("pause" )
+# else
+# define UT_RELAX_CPU() __asm__ __volatile__ ("pause")
+# endif /* __SUNPRO_CC */
+
+#elif defined(HAVE_FAKE_PAUSE_INSTRUCTION)
+# define UT_RELAX_CPU() __asm__ __volatile__ ("rep; nop")
+#elif defined _WIN32
/* In the Win32 API, the x86 PAUSE instruction is executed by calling
the YieldProcessor macro defined in WinNT.h. It is a CPU architecture-
independent way by using YieldProcessor. */
-# define UT_RELAX_CPU() YieldProcessor()
-# elif defined(__powerpc__)
-#include <sys/platform/ppc.h>
-# define UT_RELAX_CPU() __ppc_get_timebase()
-# else
-# define UT_RELAX_CPU() ((void)0) /* avoid warning for an empty statement */
-# endif
+# define UT_RELAX_CPU() YieldProcessor()
+#elif defined(__powerpc__) && defined __GLIBC__
+# include <sys/platform/ppc.h>
+# define UT_RELAX_CPU() __ppc_get_timebase()
+#else
+# define UT_RELAX_CPU() do { \
+ volatile int32 volatile_var; \
+ int32 oldval= 0; \
+ my_atomic_cas32(&volatile_var, &oldval, 1); \
+ } while (0)
+#endif
#if defined (__GNUC__)
-# define UT_COMPILER_BARRIER() __asm__ __volatile__ ("":::"memory")
+# define UT_COMPILER_BARRIER() __asm__ __volatile__ ("":::"memory")
#elif defined (_MSC_VER)
-# define UT_COMPILER_BARRIER() _ReadWriteBarrier()
+# define UT_COMPILER_BARRIER() _ReadWriteBarrier()
#else
-# define UT_COMPILER_BARRIER()
+# define UT_COMPILER_BARRIER()
#endif
-# if defined(HAVE_HMT_PRIORITY_INSTRUCTION)
-#include <sys/platform/ppc.h>
-# define UT_LOW_PRIORITY_CPU() __ppc_set_ppr_low()
-# define UT_RESUME_PRIORITY_CPU() __ppc_set_ppr_med()
-# else
-# define UT_LOW_PRIORITY_CPU() ((void)0)
-# define UT_RESUME_PRIORITY_CPU() ((void)0)
-# endif
-#endif /* !UNIV_HOTBACKUP */
+#if defined(HAVE_HMT_PRIORITY_INSTRUCTION)
+# include <sys/platform/ppc.h>
+# define UT_LOW_PRIORITY_CPU() __ppc_set_ppr_low()
+# define UT_RESUME_PRIORITY_CPU() __ppc_set_ppr_med()
+#else
+# define UT_LOW_PRIORITY_CPU() ((void)0)
+# define UT_RESUME_PRIORITY_CPU() ((void)0)
+#endif
-template <class T> T ut_min(T a, T b) { return(a < b ? a : b); }
-template <class T> T ut_max(T a, T b) { return(a > b ? a : b); }
+#define ut_max std::max
+#define ut_min std::min
-/******************************************************//**
-Calculates the minimum of two ulints.
-@return minimum */
-UNIV_INLINE
-ulint
-ut_min(
-/*===*/
- ulint n1, /*!< in: first number */
- ulint n2); /*!< in: second number */
-/******************************************************//**
-Calculates the maximum of two ulints.
-@return maximum */
-UNIV_INLINE
-ulint
-ut_max(
-/*===*/
- ulint n1, /*!< in: first number */
- ulint n2); /*!< in: second number */
-/****************************************************************//**
-Calculates minimum of two ulint-pairs. */
+/** Calculate the minimum of two pairs.
+@param[out] min_hi MSB of the minimum pair
+@param[out] min_lo LSB of the minimum pair
+@param[in] a_hi MSB of the first pair
+@param[in] a_lo LSB of the first pair
+@param[in] b_hi MSB of the second pair
+@param[in] b_lo LSB of the second pair */
UNIV_INLINE
void
ut_pair_min(
-/*========*/
- ulint* a, /*!< out: more significant part of minimum */
- ulint* b, /*!< out: less significant part of minimum */
- ulint a1, /*!< in: more significant part of first pair */
- ulint b1, /*!< in: less significant part of first pair */
- ulint a2, /*!< in: more significant part of second pair */
- ulint b2); /*!< in: less significant part of second pair */
+ ulint* min_hi,
+ ulint* min_lo,
+ ulint a_hi,
+ ulint a_lo,
+ ulint b_hi,
+ ulint b_lo);
/******************************************************//**
Compares two ulints.
-@return 1 if a > b, 0 if a == b, -1 if a < b */
+@return 1 if a > b, 0 if a == b, -1 if a < b */
UNIV_INLINE
int
ut_ulint_cmp(
/*=========*/
ulint a, /*!< in: ulint */
ulint b); /*!< in: ulint */
-/*******************************************************//**
-Compares two pairs of ulints.
-@return -1 if a < b, 0 if a == b, 1 if a > b */
+/** Compare two pairs of integers.
+@param[in] a_h more significant part of first pair
+@param[in] a_l less significant part of first pair
+@param[in] b_h more significant part of second pair
+@param[in] b_l less significant part of second pair
+@return comparison result of (a_h,a_l) and (b_h,b_l)
+@retval -1 if (a_h,a_l) is less than (b_h,b_l)
+@retval 0 if (a_h,a_l) is equal to (b_h,b_l)
+@retval 1 if (a_h,a_l) is greater than (b_h,b_l) */
UNIV_INLINE
int
ut_pair_cmp(
-/*========*/
- ulint a1, /*!< in: more significant part of first pair */
- ulint a2, /*!< in: less significant part of first pair */
- ulint b1, /*!< in: more significant part of second pair */
- ulint b2); /*!< in: less significant part of second pair */
-#endif /* !UNIV_INNOCHECKSUM */
-/*************************************************************//**
-Determines if a number is zero or a power of two.
-@param n in: number
-@return nonzero if n is zero or a power of two; zero otherwise */
-#define ut_is_2pow(n) UNIV_LIKELY(!((n) & ((n) - 1)))
+ ulint a_h,
+ ulint a_l,
+ ulint b_h,
+ ulint b_l)
+ MY_ATTRIBUTE((warn_unused_result));
+
/*************************************************************//**
Calculates fast the remainder of n/m when m is a power of two.
-@param n in: numerator
-@param m in: denominator, must be a power of two
-@return the remainder of n/m */
-#define ut_2pow_remainder(n, m) ((n) & ((m) - 1))
+@param n in: numerator
+@param m in: denominator, must be a power of two
+@return the remainder of n/m */
+template <typename T> inline T ut_2pow_remainder(T n, T m){return n & (m - 1);}
/*************************************************************//**
Calculates the biggest multiple of m that is not bigger than n
when m is a power of two. In other words, rounds n down to m * k.
-@param n in: number to round down
-@param m in: alignment, must be a power of two
-@return n rounded down to the biggest possible integer multiple of m */
-#define ut_2pow_round(n, m) ((n) & ~((m) - 1))
-/** Align a number down to a multiple of a power of two.
-@param n in: number to round down
-@param m in: alignment, must be a power of two
-@return n rounded down to the biggest possible integer multiple of m */
-#define ut_calc_align_down(n, m) ut_2pow_round(n, m)
+@param n in: number to round down
+@param m in: alignment, must be a power of two
+@return n rounded down to the biggest possible integer multiple of m */
+template <typename T> inline T ut_2pow_round(T n, T m) { return n & ~(m - 1); }
/********************************************************//**
Calculates the smallest multiple of m that is not smaller than n
when m is a power of two. In other words, rounds n up to m * k.
-@param n in: number to round up
-@param m in: alignment, must be a power of two
-@return n rounded up to the smallest possible integer multiple of m */
-#define ut_calc_align(n, m) (((n) + ((m) - 1)) & ~((m) - 1))
-#ifndef UNIV_INNOCHECKSUM
+@param n in: number to round up
+@param m in: alignment, must be a power of two
+@return n rounded up to the smallest possible integer multiple of m */
+#define UT_CALC_ALIGN(n, m) ((n + m - 1) & ~(m - 1))
+template <typename T> inline T ut_calc_align(T n, T m)
+{ return UT_CALC_ALIGN(n, m); }
+
/*************************************************************//**
Calculates fast the 2-logarithm of a number, rounded upward to an
integer.
-@return logarithm in the base 2, rounded upward */
+@return logarithm in the base 2, rounded upward */
UNIV_INLINE
ulint
ut_2_log(
@@ -207,7 +176,7 @@ ut_2_log(
ulint n); /*!< in: number */
/*************************************************************//**
Calculates 2 to power n.
-@return 2 to power n */
+@return 2 to power n */
UNIV_INLINE
ulint
ut_2_exp(
@@ -215,75 +184,72 @@ ut_2_exp(
ulint n); /*!< in: number */
/*************************************************************//**
Calculates fast the number rounded up to the nearest power of 2.
-@return first power of 2 which is >= n */
-UNIV_INTERN
+@return first power of 2 which is >= n */
ulint
ut_2_power_up(
/*==========*/
ulint n) /*!< in: number != 0 */
MY_ATTRIBUTE((const));
-#endif /* !UNIV_INNOCHECKSUM */
-
/** Determine how many bytes (groups of 8 bits) are needed to
store the given number of bits.
-@param b in: bits
-@return number of bytes (octets) needed to represent b */
+@param b in: bits
+@return number of bytes (octets) needed to represent b */
#define UT_BITS_IN_BYTES(b) (((b) + 7) / 8)
-#ifndef UNIV_INNOCHECKSUM
/**********************************************************//**
Returns the number of milliseconds since some epoch. The
value may wrap around. It should only be used for heuristic
purposes.
@return ms since epoch */
-UNIV_INTERN
ulint
ut_time_ms(void);
/*============*/
#endif /* !UNIV_INNOCHECKSUM */
+/** Determines if a number is zero or a power of two.
+@param[in] n number
+@return nonzero if n is zero or a power of two; zero otherwise */
+#define ut_is_2pow(n) UNIV_LIKELY(!((n) & ((n) - 1)))
+
+/** Functor that compares two C strings. Can be used as a comparator for
+e.g. std::map that uses char* as keys. */
+struct ut_strcmp_functor
+{
+ bool operator()(
+ const char* a,
+ const char* b) const
+ {
+ return(strcmp(a, b) < 0);
+ }
+};
+
/**********************************************************//**
Prints a timestamp to a file. */
-UNIV_INTERN
void
ut_print_timestamp(
/*===============*/
FILE* file) /*!< in: file where to print */
- UNIV_COLD MY_ATTRIBUTE((nonnull));
+ ATTRIBUTE_COLD __attribute__((nonnull));
#ifndef UNIV_INNOCHECKSUM
/**********************************************************//**
Sprintfs a timestamp to a buffer, 13..14 chars plus terminating NUL. */
-UNIV_INTERN
void
ut_sprintf_timestamp(
/*=================*/
char* buf); /*!< in: buffer where to sprintf */
-#ifdef UNIV_HOTBACKUP
-/**********************************************************//**
-Sprintfs a timestamp to a buffer with no spaces and with ':' characters
-replaced by '_'. */
-UNIV_INTERN
-void
-ut_sprintf_timestamp_without_extra_chars(
-/*=====================================*/
- char* buf); /*!< in: buffer where to sprintf */
-#else /* UNIV_HOTBACKUP */
/*************************************************************//**
Runs an idle loop on CPU. The argument gives the desired delay
in microseconds on 100 MHz Pentium + Visual C++.
-@return dummy value */
-UNIV_INTERN
+@return dummy value */
void
ut_delay(
/*=====*/
ulint delay); /*!< in: delay in microseconds on 100 MHz Pentium */
-#endif /* UNIV_HOTBACKUP */
/*************************************************************//**
Prints the contents of a memory buffer in hex and ascii. */
-UNIV_INTERN
void
ut_print_buf(
/*=========*/
@@ -291,163 +257,257 @@ ut_print_buf(
const void* buf, /*!< in: memory buffer */
ulint len); /*!< in: length of the buffer */
-/**********************************************************************//**
-Outputs a NUL-terminated file name, quoted with apostrophes. */
-UNIV_INTERN
+/*************************************************************//**
+Prints the contents of a memory buffer in hex. */
+void
+ut_print_buf_hex(
+/*=============*/
+ std::ostream& o, /*!< in/out: output stream */
+ const void* buf, /*!< in: memory buffer */
+ ulint len) /*!< in: length of the buffer */
+ MY_ATTRIBUTE((nonnull));
+/*************************************************************//**
+Prints the contents of a memory buffer in hex and ascii. */
void
-ut_print_filename(
-/*==============*/
- FILE* f, /*!< in: output stream */
- const char* name); /*!< in: name to print */
+ut_print_buf(
+/*=========*/
+ std::ostream& o, /*!< in/out: output stream */
+ const void* buf, /*!< in: memory buffer */
+ ulint len) /*!< in: length of the buffer */
+ MY_ATTRIBUTE((nonnull));
-#ifndef UNIV_HOTBACKUP
/* Forward declaration of transaction handle */
struct trx_t;
-/**********************************************************************//**
-Outputs a fixed-length string, quoted as an SQL identifier.
+/** Get a fixed-length string, quoted as an SQL identifier.
If the string contains a slash '/', the string will be
output as two identifiers separated by a period (.),
-as in SQL database_name.identifier. */
-UNIV_INTERN
-void
-ut_print_name(
-/*==========*/
- FILE* f, /*!< in: output stream */
- const trx_t* trx, /*!< in: transaction */
- ibool table_id,/*!< in: TRUE=print a table name,
- FALSE=print other identifier */
- const char* name); /*!< in: name to print */
+as in SQL database_name.identifier.
+ @param [in] trx transaction (NULL=no quotes).
+ @param [in] name table name.
+ @retval String quoted as an SQL identifier.
+*/
+std::string
+ut_get_name(
+ const trx_t* trx,
+ const char* name);
/**********************************************************************//**
Outputs a fixed-length string, quoted as an SQL identifier.
If the string contains a slash '/', the string will be
output as two identifiers separated by a period (.),
as in SQL database_name.identifier. */
-UNIV_INTERN
void
-ut_print_namel(
-/*===========*/
- FILE* f, /*!< in: output stream */
- const trx_t* trx, /*!< in: transaction (NULL=no quotes) */
- ibool table_id,/*!< in: TRUE=print a table name,
- FALSE=print other identifier */
- const char* name, /*!< in: name to print */
- ulint namelen);/*!< in: length of name */
-/**********************************************************************//**
-Outputs a fixed-length string, quoted as an SQL identifier.
-If the string contains a slash '/', the string will be
-output as two identifiers separated by a period (.),
-as in SQL database_name.identifier. */
-UNIV_INTERN
-std::string
-ut_get_name(
-/*=========*/
- const trx_t* trx, /*!< in: transaction (NULL=no quotes) */
- ibool table_id,/*!< in: TRUE=print a table name,
- FALSE=print other identifier */
- const char* name); /*!< in: name to print */
-/**********************************************************************//**
-Formats a table or index name, quoted as an SQL identifier. If the name
-contains a slash '/', the result will contain two identifiers separated by
-a period (.), as in SQL database_name.identifier.
+ut_print_name(
+/*==========*/
+ FILE* ef, /*!< in: stream */
+ const trx_t* trx, /*!< in: transaction */
+ const char* name); /*!< in: table name to print */
+/** Format a table name, quoted as an SQL identifier.
+If the name contains a slash '/', the result will contain two
+identifiers separated by a period (.), as in SQL
+database_name.table_name.
+@see table_name_t
+@param[in] name table or index name
+@param[out] formatted formatted result, will be NUL-terminated
+@param[in] formatted_size size of the buffer in bytes
@return pointer to 'formatted' */
-UNIV_INTERN
char*
ut_format_name(
-/*===========*/
- const char* name, /*!< in: table or index name, must be
- '\0'-terminated */
- ibool is_table, /*!< in: if TRUE then 'name' is a table
- name */
- char* formatted, /*!< out: formatted result, will be
- '\0'-terminated */
- ulint formatted_size);/*!< out: no more than this number of
- bytes will be written to 'formatted' */
+ const char* name,
+ char* formatted,
+ ulint formatted_size);
/**********************************************************************//**
Catenate files. */
-UNIV_INTERN
void
ut_copy_file(
/*=========*/
FILE* dest, /*!< in: output file */
FILE* src); /*!< in: input file to be appended to output */
-#endif /* !UNIV_HOTBACKUP */
-
-#ifdef __WIN__
-/**********************************************************************//**
-A substitute for vsnprintf(3), formatted output conversion into
-a limited buffer. Note: this function DOES NOT return the number of
-characters that would have been printed if the buffer was unlimited because
-VC's _vsnprintf() returns -1 in this case and we would need to call
-_vscprintf() in addition to estimate that but we would need another copy
-of "ap" for that and VC does not provide va_copy(). */
-UNIV_INTERN
-void
-ut_vsnprintf(
-/*=========*/
- char* str, /*!< out: string */
- size_t size, /*!< in: str size */
- const char* fmt, /*!< in: format */
- va_list ap); /*!< in: format values */
-
-/**********************************************************************//**
-A substitute for snprintf(3), formatted output conversion into
-a limited buffer.
-@return number of characters that would have been printed if the size
-were unlimited, not including the terminating '\0'. */
-UNIV_INTERN
-int
-ut_snprintf(
-/*========*/
- char* str, /*!< out: string */
- size_t size, /*!< in: str size */
- const char* fmt, /*!< in: format */
- ...); /*!< in: format values */
-#else
-/**********************************************************************//**
-A wrapper for vsnprintf(3), formatted output conversion into
-a limited buffer. Note: this function DOES NOT return the number of
-characters that would have been printed if the buffer was unlimited because
-VC's _vsnprintf() returns -1 in this case and we would need to call
-_vscprintf() in addition to estimate that but we would need another copy
-of "ap" for that and VC does not provide va_copy(). */
-# define ut_vsnprintf(buf, size, fmt, ap) \
- ((void) vsnprintf(buf, size, fmt, ap))
-/**********************************************************************//**
-A wrapper for snprintf(3), formatted output conversion into
-a limited buffer. */
-# define ut_snprintf snprintf
-#endif /* __WIN__ */
/*************************************************************//**
Convert an error number to a human readable text message. The
returned string is static and should not be freed or modified.
-@return string, describing the error */
-UNIV_INTERN
+@return string, describing the error */
const char*
ut_strerr(
/*======*/
dberr_t num); /*!< in: error number */
-/****************************************************************
-Sort function for ulint arrays. */
-UNIV_INTERN
-void
-ut_ulint_sort(
-/*==========*/
- ulint* arr, /*!< in/out: array to sort */
- ulint* aux_arr, /*!< in/out: aux array to use in sort */
- ulint low, /*!< in: lower bound */
- ulint high) /*!< in: upper bound */
- MY_ATTRIBUTE((nonnull));
+#endif /* !UNIV_INNOCHECKSUM */
-#ifndef UNIV_NONINL
-#include "ut0ut.ic"
-#endif
+#ifdef UNIV_PFS_MEMORY
-#endif /* !UNIV_INNOCHECKSUM */
+/** Extract the basename of a file without its extension.
+For example, extract "foo0bar" out of "/path/to/foo0bar.cc".
+@param[in] file file path, e.g. "/path/to/foo0bar.cc"
+@param[out] base result, e.g. "foo0bar"
+@param[in] base_size size of the output buffer 'base', if there
+is not enough space, then the result will be truncated, but always
+'\0'-terminated
+@return number of characters that would have been printed if the size
+were unlimited (not including the final ‘\0’) */
+size_t
+ut_basename_noext(
+ const char* file,
+ char* base,
+ size_t base_size);
+
+#endif /* UNIV_PFS_MEMORY */
+
+namespace ib {
+
+/** This is a wrapper class, used to print any unsigned integer type
+in hexadecimal format. The main purpose of this data type is to
+overload the global operator<<, so that we can print the given
+wrapper value in hex. */
+struct hex {
+ explicit hex(uintmax_t t): m_val(t) {}
+ const uintmax_t m_val;
+};
+
+/** This is an overload of the global operator<< for the user defined type
+ib::hex. The unsigned value held in the ib::hex wrapper class will be printed
+into the given output stream in hexadecimal format.
+@param[in,out] lhs the output stream into which rhs is written.
+@param[in] rhs the object to be written into lhs.
+@retval reference to the output stream. */
+inline
+std::ostream&
+operator<<(
+ std::ostream& lhs,
+ const hex& rhs)
+{
+ std::ios_base::fmtflags ff = lhs.flags();
+ lhs << std::showbase << std::hex << rhs.m_val;
+ lhs.setf(ff);
+ return(lhs);
+}
+
+/** The class logger is the base class of all the error log related classes.
+It contains a std::ostringstream object. The main purpose of this class is
+to forward operator<< to the underlying std::ostringstream object. Do not
+use this class directly, instead use one of the derived classes. */
+class logger {
+public:
+ template<typename T>
+ ATTRIBUTE_COLD
+ logger& operator<<(const T& rhs)
+ {
+ m_oss << rhs;
+ return(*this);
+ }
+
+ /** Write the given buffer to the internal string stream object.
+ @param[in] buf the buffer whose contents will be logged.
+ @param[in] count the length of the buffer buf.
+ @return the output stream into which buffer was written. */
+ ATTRIBUTE_COLD
+ std::ostream&
+ write(
+ const char* buf,
+ std::streamsize count)
+ {
+ return(m_oss.write(buf, count));
+ }
+
+ /** Write the given buffer to the internal string stream object.
+ @param[in] buf the buffer whose contents will be logged.
+ @param[in] count the length of the buffer buf.
+ @return the output stream into which buffer was written. */
+ ATTRIBUTE_COLD
+ std::ostream&
+ write(
+ const byte* buf,
+ std::streamsize count)
+ {
+ return(m_oss.write(reinterpret_cast<const char*>(buf), count));
+ }
+
+ std::ostringstream m_oss;
+protected:
+ /* This class must not be used directly, hence making the default
+ constructor protected. */
+ ATTRIBUTE_COLD
+ logger() {}
+};
+
+/** The class info is used to emit informational log messages. It is to be
+used similar to std::cout. But the log messages will be emitted only when
+the dtor is called. The preferred usage of this class is to make use of
+unnamed temporaries as follows:
+
+info() << "The server started successfully.";
+
+In the above usage, the temporary object will be destroyed at the end of the
+statement and hence the log message will be emitted at the end of the
+statement. If a named object is created, then the log message will be emitted
+only when it goes out of scope or destroyed. */
+class info : public logger {
+public:
+ ATTRIBUTE_COLD
+ ~info();
+};
+
+/** The class warn is used to emit warnings. Refer to the documentation of
+class info for further details. */
+class warn : public logger {
+public:
+ ATTRIBUTE_COLD
+ ~warn();
+};
+
+/** The class error is used to emit error messages. Refer to the
+documentation of class info for further details. */
+class error : public logger {
+public:
+ ATTRIBUTE_COLD
+ ~error();
+};
+
+/** The class fatal is used to emit an error message and stop the server
+by crashing it. Use this class when MySQL server needs to be stopped
+immediately. Refer to the documentation of class info for usage details. */
+class fatal : public logger {
+public:
+ ATTRIBUTE_NORETURN
+ ~fatal();
+};
+
+/** Emit an error message if the given predicate is true, otherwise emit a
+warning message */
+class error_or_warn : public logger {
+public:
+ ATTRIBUTE_COLD
+ error_or_warn(bool pred)
+ : m_error(pred)
+ {}
+
+ ATTRIBUTE_COLD
+ ~error_or_warn();
+private:
+ const bool m_error;
+};
+
+/** Emit a fatal message if the given predicate is true, otherwise emit a
+error message. */
+class fatal_or_error : public logger {
+public:
+ ATTRIBUTE_COLD
+ fatal_or_error(bool pred)
+ : m_fatal(pred)
+ {}
+
+ ATTRIBUTE_COLD
+ ~fatal_or_error();
+private:
+ const bool m_fatal;
+};
+
+} // namespace ib
+
+#include "ut0ut.ic"
#endif
diff --git a/storage/innobase/include/ut0ut.ic b/storage/innobase/include/ut0ut.ic
index 380c588738d..73feaf82c6a 100644
--- a/storage/innobase/include/ut0ut.ic
+++ b/storage/innobase/include/ut0ut.ic
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1994, 2009, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1994, 2015, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -23,60 +23,40 @@ Various utilities
Created 5/30/1994 Heikki Tuuri
*******************************************************************/
-/******************************************************//**
-Calculates the minimum of two ulints.
-@return minimum */
-UNIV_INLINE
-ulint
-ut_min(
-/*===*/
- ulint n1, /*!< in: first number */
- ulint n2) /*!< in: second number */
-{
- return((n1 <= n2) ? n1 : n2);
-}
-
-/******************************************************//**
-Calculates the maximum of two ulints.
-@return maximum */
-UNIV_INLINE
-ulint
-ut_max(
-/*===*/
- ulint n1, /*!< in: first number */
- ulint n2) /*!< in: second number */
-{
- return((n1 <= n2) ? n2 : n1);
-}
+#include <algorithm>
-/****************************************************************//**
-Calculates minimum of two ulint-pairs. */
+/** Calculate the minimum of two pairs.
+@param[out] min_hi MSB of the minimum pair
+@param[out] min_lo LSB of the minimum pair
+@param[in] a_hi MSB of the first pair
+@param[in] a_lo LSB of the first pair
+@param[in] b_hi MSB of the second pair
+@param[in] b_lo LSB of the second pair */
UNIV_INLINE
void
ut_pair_min(
-/*========*/
- ulint* a, /*!< out: more significant part of minimum */
- ulint* b, /*!< out: less significant part of minimum */
- ulint a1, /*!< in: more significant part of first pair */
- ulint b1, /*!< in: less significant part of first pair */
- ulint a2, /*!< in: more significant part of second pair */
- ulint b2) /*!< in: less significant part of second pair */
+ ulint* min_hi,
+ ulint* min_lo,
+ ulint a_hi,
+ ulint a_lo,
+ ulint b_hi,
+ ulint b_lo)
{
- if (a1 == a2) {
- *a = a1;
- *b = ut_min(b1, b2);
- } else if (a1 < a2) {
- *a = a1;
- *b = b1;
+ if (a_hi == b_hi) {
+ *min_hi = a_hi;
+ *min_lo = std::min(a_lo, b_lo);
+ } else if (a_hi < b_hi) {
+ *min_hi = a_hi;
+ *min_lo = a_lo;
} else {
- *a = a2;
- *b = b2;
+ *min_hi = b_hi;
+ *min_lo = b_lo;
}
}
/******************************************************//**
Compares two ulints.
-@return 1 if a > b, 0 if a == b, -1 if a < b */
+@return 1 if a > b, 0 if a == b, -1 if a < b */
UNIV_INLINE
int
ut_ulint_cmp(
@@ -93,35 +73,36 @@ ut_ulint_cmp(
}
}
-/*******************************************************//**
-Compares two pairs of ulints.
-@return -1 if a < b, 0 if a == b, 1 if a > b */
+/** Compare two pairs of integers.
+@param[in] a_h more significant part of first pair
+@param[in] a_l less significant part of first pair
+@param[in] b_h more significant part of second pair
+@param[in] b_l less significant part of second pair
+@return comparison result of (a_h,a_l) and (b_h,b_l)
+@retval -1 if (a_h,a_l) is less than (b_h,b_l)
+@retval 0 if (a_h,a_l) is equal to (b_h,b_l)
+@retval 1 if (a_h,a_l) is greater than (b_h,b_l) */
UNIV_INLINE
int
ut_pair_cmp(
-/*========*/
- ulint a1, /*!< in: more significant part of first pair */
- ulint a2, /*!< in: less significant part of first pair */
- ulint b1, /*!< in: more significant part of second pair */
- ulint b2) /*!< in: less significant part of second pair */
+ ulint a_h,
+ ulint a_l,
+ ulint b_h,
+ ulint b_l)
{
- if (a1 > b1) {
- return(1);
- } else if (a1 < b1) {
+ if (a_h < b_h) {
return(-1);
- } else if (a2 > b2) {
+ }
+ if (a_h > b_h) {
return(1);
- } else if (a2 < b2) {
- return(-1);
- } else {
- return(0);
}
+ return(ut_ulint_cmp(a_l, b_l));
}
/*************************************************************//**
Calculates fast the 2-logarithm of a number, rounded upward to an
integer.
-@return logarithm in the base 2, rounded upward */
+@return logarithm in the base 2, rounded upward */
UNIV_INLINE
ulint
ut_2_log(
@@ -151,7 +132,7 @@ ut_2_log(
/*************************************************************//**
Calculates 2 to power n.
-@return 2 to power n */
+@return 2 to power n */
UNIV_INLINE
ulint
ut_2_exp(
diff --git a/storage/innobase/include/ut0vec.h b/storage/innobase/include/ut0vec.h
index a08380fff00..cfdaee607be 100644
--- a/storage/innobase/include/ut0vec.h
+++ b/storage/innobase/include/ut0vec.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 2006, 2009, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2006, 2016, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -26,7 +26,6 @@ Created 4/6/2006 Osku Salerma
#ifndef IB_VECTOR_H
#define IB_VECTOR_H
-#include "univ.i"
#include "mem0mem.h"
struct ib_alloc_t;
@@ -64,7 +63,6 @@ freeing it when done with the vector.
/********************************************************************
Create a new vector with the given initial size. */
-UNIV_INTERN
ib_vector_t*
ib_vector_create(
/*=============*/
@@ -124,7 +122,6 @@ ib_vector_size(
/********************************************************************
Increase the size of the vector. */
-UNIV_INTERN
void
ib_vector_resize(
/*=============*/
@@ -142,7 +139,7 @@ ib_vector_is_empty(
/****************************************************************//**
Get the n'th element.
-@return n'th element */
+@return n'th element */
UNIV_INLINE
void*
ib_vector_get(
@@ -161,7 +158,7 @@ ib_vector_get_const(
ulint n); /* in: element index to get */
/****************************************************************//**
Get last element. The vector must not be empty.
-@return last element */
+@return last element */
UNIV_INLINE
void*
ib_vector_get_last(
@@ -263,53 +260,6 @@ ib_heap_allocator_free(
/*===================*/
ib_alloc_t* ib_ut_alloc); /* in: alloc instace to free */
-/********************************************************************
-Wrapper for ut_free(). */
-UNIV_INLINE
-void
-ib_ut_free(
-/*=======*/
- ib_alloc_t* allocator, /* in: allocator */
- void* ptr); /* in: size in bytes */
-
-/********************************************************************
-Wrapper for ut_malloc(). */
-UNIV_INLINE
-void*
-ib_ut_malloc(
-/*=========*/
- /* out: pointer to allocated memory */
- ib_alloc_t* allocator, /* in: allocator */
- ulint size); /* in: size in bytes */
-
-/********************************************************************
-Wrapper for ut_realloc(). */
-UNIV_INLINE
-void*
-ib_ut_resize(
-/*=========*/
- /* out: pointer to reallocated
- memory */
- ib_alloc_t* allocator, /* in: allocator */
- void* old_ptr, /* in: pointer to memory */
- ulint old_size, /* in: old size in bytes */
- ulint new_size); /* in: new size in bytes */
-
-/********************************************************************
-Create a heap allocator that uses the passed in heap. */
-UNIV_INLINE
-ib_alloc_t*
-ib_ut_allocator_create(void);
-/*=========================*/
-
-/********************************************************************
-Create a heap allocator that uses the passed in heap. */
-UNIV_INLINE
-void
-ib_ut_allocator_free(
-/*=================*/
- ib_alloc_t* ib_ut_alloc); /* in: alloc instace to free */
-
/* Allocator used by ib_vector_t. */
struct ib_alloc_t {
ib_mem_alloc_t mem_malloc; /* For allocating memory */
@@ -330,8 +280,6 @@ struct ib_vector_t {
ulint sizeof_value;
};
-#ifndef UNIV_NONINL
#include "ut0vec.ic"
-#endif
#endif /* IB_VECTOR_H */
diff --git a/storage/innobase/include/ut0vec.ic b/storage/innobase/include/ut0vec.ic
index 1d4c0539cd6..531f0f22ae0 100644
--- a/storage/innobase/include/ut0vec.ic
+++ b/storage/innobase/include/ut0vec.ic
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 2006, 2009, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2006, 2014, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -54,6 +54,7 @@ ib_heap_free(
/********************************************************************
The default ib_vector_t heap resize. Since we can't resize the heap
we have to copy the elements from the old ptr to the new ptr.
+We always assume new_size >= old_size, so the buffer won't overflow.
Uses mem_heap_alloc(). */
UNIV_INLINE
void*
@@ -67,6 +68,7 @@ ib_heap_resize(
void* new_ptr;
mem_heap_t* heap = (mem_heap_t*) allocator->arg;
+ ut_a(new_size >= old_size);
new_ptr = mem_heap_alloc(heap, new_size);
memcpy(new_ptr, old_ptr, old_size);
@@ -105,74 +107,6 @@ ib_heap_allocator_free(
}
/********************************************************************
-Wrapper around ut_malloc(). */
-UNIV_INLINE
-void*
-ib_ut_malloc(
-/*=========*/
- ib_alloc_t* allocator UNIV_UNUSED, /* in: allocator */
- ulint size) /* in: size in bytes */
-{
- return(ut_malloc(size));
-}
-
-/********************************************************************
-Wrapper around ut_free(). */
-UNIV_INLINE
-void
-ib_ut_free(
-/*=======*/
- ib_alloc_t* allocator UNIV_UNUSED, /* in: allocator */
- void* ptr) /* in: size in bytes */
-{
- ut_free(ptr);
-}
-
-/********************************************************************
-Wrapper aroung ut_realloc(). */
-UNIV_INLINE
-void*
-ib_ut_resize(
-/*=========*/
- ib_alloc_t* allocator UNIV_UNUSED, /* in: allocator */
- void* old_ptr, /* in: pointer to memory */
- ulint old_size UNIV_UNUSED,/* in: old size in bytes */
- ulint new_size) /* in: new size in bytes */
-{
- return(ut_realloc(old_ptr, new_size));
-}
-
-/********************************************************************
-Create a ut allocator. */
-UNIV_INLINE
-ib_alloc_t*
-ib_ut_allocator_create(void)
-/*========================*/
-{
- ib_alloc_t* ib_ut_alloc;
-
- ib_ut_alloc = (ib_alloc_t*) ut_malloc(sizeof(*ib_ut_alloc));
-
- ib_ut_alloc->arg = NULL;
- ib_ut_alloc->mem_release = ib_ut_free;
- ib_ut_alloc->mem_malloc = ib_ut_malloc;
- ib_ut_alloc->mem_resize = ib_ut_resize;
-
- return(ib_ut_alloc);
-}
-
-/********************************************************************
-Free a ut allocator. */
-UNIV_INLINE
-void
-ib_ut_allocator_free(
-/*=================*/
- ib_alloc_t* ib_ut_alloc) /* in: alloc instace to free */
-{
- ut_free(ib_ut_alloc);
-}
-
-/********************************************************************
Get number of elements in vector. */
UNIV_INLINE
ulint
@@ -214,7 +148,7 @@ ib_vector_get_const(
}
/****************************************************************//**
Get last element. The vector must not be empty.
-@return last element */
+@return last element */
UNIV_INLINE
void*
ib_vector_get_last(
@@ -286,7 +220,7 @@ ib_vector_last_const(
/****************************************************************//**
Remove the last element from the vector.
-@return last vector element */
+@return last vector element */
UNIV_INLINE
void*
ib_vector_pop(
@@ -392,24 +326,13 @@ ib_vector_free(
/*===========*/
ib_vector_t* vec) /* in, own: vector */
{
- /* Currently we only support two types of allocators, heap
- and ut_malloc(), when the heap is freed all the elements are
- freed too. With ut allocator, we need to free the elements,
- the vector instance and the allocator separately. */
+ /* Currently we only support one type of allocator - heap,
+ when the heap is freed all the elements are freed too. */
/* Only the heap allocator uses the arg field. */
- if (vec->allocator->arg) {
- mem_heap_free((mem_heap_t*) vec->allocator->arg);
- } else {
- ib_alloc_t* allocator;
-
- allocator = vec->allocator;
+ ut_ad(vec->allocator->arg != NULL);
- allocator->mem_release(allocator, vec->data);
- allocator->mem_release(allocator, vec);
-
- ib_ut_allocator_free(allocator);
- }
+ mem_heap_free((mem_heap_t*) vec->allocator->arg);
}
/********************************************************************
diff --git a/storage/innobase/include/ut0wqueue.h b/storage/innobase/include/ut0wqueue.h
index 4b0014e3091..5a895f4ea3c 100644
--- a/storage/innobase/include/ut0wqueue.h
+++ b/storage/innobase/include/ut0wqueue.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 2006, 2009, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2006, 2014, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2017, 2019, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
@@ -35,33 +35,41 @@ processing.
#include "ut0list.h"
#include "mem0mem.h"
-#include "os0sync.h"
-#include "sync0types.h"
-struct ib_wqueue_t;
+// Forward declaration
+struct ib_list_t;
+
+/** Work queue */
+struct ib_wqueue_t
+{
+ /** Mutex protecting everything */
+ ib_mutex_t mutex;
+ /** Work item list */
+ ib_list_t* items;
+ /** event we use to signal additions to list;
+ os_event_set() and os_event_reset() are protected by the mutex */
+ os_event_t event;
+};
/****************************************************************//**
Create a new work queue.
-@return work queue */
-UNIV_INTERN
+@return work queue */
ib_wqueue_t*
-ib_wqueue_create(void);
-/*===================*/
+ib_wqueue_create();
+/*===============*/
/****************************************************************//**
Free a work queue. */
-UNIV_INTERN
void
ib_wqueue_free(
/*===========*/
- ib_wqueue_t* wq); /*!< in: work queue */
+ ib_wqueue_t* wq); /*!< in: work queue */
/** Add a work item to the queue.
@param[in,out] wq work queue
@param[in] item work item
@param[in,out] heap memory heap to use for allocating list node
@param[in] wq_locked work queue mutex locked */
-UNIV_INTERN
void
ib_wqueue_add(ib_wqueue_t* wq, void* item, mem_heap_t* heap,
bool wq_locked = false);
@@ -73,16 +81,14 @@ bool ib_wqueue_is_empty(ib_wqueue_t* wq);
/****************************************************************//**
Wait for a work item to appear in the queue.
-@return work item */
-UNIV_INTERN
+@return work item */
void*
ib_wqueue_wait(
/*===========*/
- ib_wqueue_t* wq); /*!< in: work queue */
+ ib_wqueue_t* wq); /*!< in: work queue */
/********************************************************************
Wait for a work item to appear in the queue for specified time. */
-
void*
ib_wqueue_timedwait(
/*================*/
@@ -97,7 +103,6 @@ void*
ib_wqueue_nowait(
/*=============*/
ib_wqueue_t* wq); /*<! in: work queue */
-
/********************************************************************
Get number of items on queue.
@return number of items on queue */
@@ -106,16 +111,4 @@ ib_wqueue_len(
/*==========*/
ib_wqueue_t* wq); /*<! in: work queue */
-/** Work queue */
-struct ib_wqueue_t
-{
- /** Mutex protecting everything */
- ib_mutex_t mutex;
- /** Work item list */
- ib_list_t* items;
- /** event we use to signal additions to list;
- os_event_set() and os_event_reset() are protected by the mutex */
- os_event_t event;
-};
-
-#endif
+#endif /* IB_WORK_QUEUE_H */
diff --git a/storage/innobase/innodb.cmake b/storage/innobase/innodb.cmake
new file mode 100644
index 00000000000..523176b4530
--- /dev/null
+++ b/storage/innobase/innodb.cmake
@@ -0,0 +1,288 @@
+# Copyright (c) 2006, 2016, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2017, 2019, MariaDB Corporation.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; version 2 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+
+# This is the CMakeLists for InnoDB
+
+INCLUDE(CheckFunctionExists)
+INCLUDE(CheckCSourceCompiles)
+INCLUDE(CheckCSourceRuns)
+INCLUDE(lz4.cmake)
+INCLUDE(lzo.cmake)
+INCLUDE(lzma.cmake)
+INCLUDE(bzip2.cmake)
+INCLUDE(snappy.cmake)
+INCLUDE(numa)
+INCLUDE(TestBigEndian)
+
+MYSQL_CHECK_LZ4()
+MYSQL_CHECK_LZO()
+MYSQL_CHECK_LZMA()
+MYSQL_CHECK_BZIP2()
+MYSQL_CHECK_SNAPPY()
+MYSQL_CHECK_NUMA()
+TEST_BIG_ENDIAN(IS_BIG_ENDIAN)
+
+IF(CMAKE_CROSSCOMPILING)
+ # Use CHECK_C_SOURCE_COMPILES instead of CHECK_C_SOURCE_RUNS when
+ # cross-compiling. Not as precise, but usually good enough.
+ # This only make sense for atomic tests in this file, this trick doesn't
+ # work in a general case.
+ MACRO(CHECK_C_SOURCE SOURCE VAR)
+ CHECK_C_SOURCE_COMPILES("${SOURCE}" "${VAR}")
+ ENDMACRO()
+ELSE()
+ MACRO(CHECK_C_SOURCE SOURCE VAR)
+ CHECK_C_SOURCE_RUNS("${SOURCE}" "${VAR}")
+ ENDMACRO()
+ENDIF()
+
+## MySQL 5.7 LZ4 (not needed)
+##IF(LZ4_INCLUDE_DIR AND LZ4_LIBRARY)
+## ADD_DEFINITIONS(-DHAVE_LZ4=1)
+## INCLUDE_DIRECTORIES(${LZ4_INCLUDE_DIR})
+##ENDIF()
+
+# OS tests
+IF(UNIX)
+ IF(CMAKE_SYSTEM_NAME STREQUAL "Linux")
+
+ ADD_DEFINITIONS("-DUNIV_LINUX -D_GNU_SOURCE=1")
+
+ CHECK_INCLUDE_FILES (libaio.h HAVE_LIBAIO_H)
+ CHECK_LIBRARY_EXISTS(aio io_queue_init "" HAVE_LIBAIO)
+
+ IF(HAVE_LIBAIO_H AND HAVE_LIBAIO)
+ ADD_DEFINITIONS(-DLINUX_NATIVE_AIO=1)
+ LINK_LIBRARIES(aio)
+ ENDIF()
+ IF(HAVE_LIBNUMA)
+ LINK_LIBRARIES(numa)
+ ENDIF()
+ ELSEIF(CMAKE_SYSTEM_NAME MATCHES "HP*")
+ ADD_DEFINITIONS("-DUNIV_HPUX")
+ ELSEIF(CMAKE_SYSTEM_NAME STREQUAL "AIX")
+ ADD_DEFINITIONS("-DUNIV_AIX")
+ ELSEIF(CMAKE_SYSTEM_NAME STREQUAL "SunOS")
+ ADD_DEFINITIONS("-DUNIV_SOLARIS")
+ ENDIF()
+ENDIF()
+
+OPTION(INNODB_COMPILER_HINTS "Compile InnoDB with compiler hints" ON)
+MARK_AS_ADVANCED(INNODB_COMPILER_HINTS)
+
+IF(INNODB_COMPILER_HINTS)
+ ADD_DEFINITIONS("-DCOMPILER_HINTS")
+ENDIF()
+
+SET(MUTEXTYPE "event" CACHE STRING "Mutex type: event, sys or futex")
+
+IF(CMAKE_CXX_COMPILER_ID MATCHES "GNU")
+# After: WL#5825 Using C++ Standard Library with MySQL code
+# we no longer use -fno-exceptions
+# SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-exceptions")
+
+# Add -Wconversion if compiling with GCC
+## As of Mar 15 2011 this flag causes 3573+ warnings. If you are reading this
+## please fix them and enable the following code:
+#SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wconversion")
+
+ IF (CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64" OR
+ CMAKE_SYSTEM_PROCESSOR MATCHES "i386" AND
+ CMAKE_CXX_COMPILER_VERSION VERSION_LESS 4.6)
+ INCLUDE(CheckCXXCompilerFlag)
+ CHECK_CXX_COMPILER_FLAG("-fno-builtin-memcmp" HAVE_NO_BUILTIN_MEMCMP)
+ IF (HAVE_NO_BUILTIN_MEMCMP)
+ # Work around http://gcc.gnu.org/bugzilla/show_bug.cgi?id=43052
+ SET_SOURCE_FILES_PROPERTIES(${CMAKE_CURRENT_SOURCE_DIR}/rem/rem0cmp.cc
+ PROPERTIES COMPILE_FLAGS -fno-builtin-memcmp)
+ ENDIF()
+ ENDIF()
+ENDIF()
+
+# Enable InnoDB's UNIV_DEBUG in debug builds
+SET(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -DUNIV_DEBUG")
+
+OPTION(WITH_INNODB_AHI "Include innodb_adaptive_hash_index" ON)
+OPTION(WITH_INNODB_ROOT_GUESS "Cache index root block descriptors" ON)
+IF(WITH_INNODB_AHI)
+ ADD_DEFINITIONS(-DBTR_CUR_HASH_ADAPT -DBTR_CUR_ADAPT)
+ IF(NOT WITH_INNODB_ROOT_GUESS)
+ MESSAGE(WARNING "WITH_INNODB_AHI implies WITH_INNODB_ROOT_GUESS")
+ ENDIF()
+ELSEIF(WITH_INNODB_ROOT_GUESS)
+ ADD_DEFINITIONS(-DBTR_CUR_ADAPT)
+ENDIF()
+
+OPTION(WITH_INNODB_BUG_ENDIAN_CRC32 "Weaken innodb_checksum_algorithm=crc32 by supporting upgrade from big-endian systems running 5.6/10.0/10.1" ${IS_BIG_ENDIAN})
+IF(WITH_INNODB_BUG_ENDIAN_CRC32)
+ ADD_DEFINITIONS(-DINNODB_BUG_ENDIAN_CRC32)
+ENDIF()
+
+OPTION(WITH_INNODB_EXTRA_DEBUG "Enable extra InnoDB debug checks" OFF)
+IF(WITH_INNODB_EXTRA_DEBUG)
+ IF(NOT CMAKE_BUILD_TYPE STREQUAL "Debug")
+ MESSAGE(FATAL_ERROR "WITH_INNODB_EXTRA_DEBUG can be enabled only in debug builds")
+ ENDIF()
+
+ SET(EXTRA_DEBUG_FLAGS "")
+ IF(WITH_INNODB_AHI)
+ SET(EXTRA_DEBUG_FLAGS "${EXTRA_DEBUG_FLAGS} -DUNIV_AHI_DEBUG")
+ ENDIF()
+ SET(EXTRA_DEBUG_FLAGS "${EXTRA_DEBUG_FLAGS} -DUNIV_DDL_DEBUG")
+ SET(EXTRA_DEBUG_FLAGS "${EXTRA_DEBUG_FLAGS} -DUNIV_DEBUG_FILE_ACCESSES")
+ SET(EXTRA_DEBUG_FLAGS "${EXTRA_DEBUG_FLAGS} -DUNIV_ZIP_DEBUG")
+
+ SET(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} ${EXTRA_DEBUG_FLAGS}")
+ SET(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} ${EXTRA_DEBUG_FLAGS}")
+ENDIF()
+
+CHECK_FUNCTION_EXISTS(sched_getcpu HAVE_SCHED_GETCPU)
+IF(HAVE_SCHED_GETCPU)
+ ADD_DEFINITIONS(-DHAVE_SCHED_GETCPU=1)
+ENDIF()
+
+CHECK_FUNCTION_EXISTS(nanosleep HAVE_NANOSLEEP)
+IF(HAVE_NANOSLEEP)
+ ADD_DEFINITIONS(-DHAVE_NANOSLEEP=1)
+ENDIF()
+
+IF(HAVE_FALLOC_PUNCH_HOLE_AND_KEEP_SIZE)
+ ADD_DEFINITIONS(-DHAVE_FALLOC_PUNCH_HOLE_AND_KEEP_SIZE=1)
+ENDIF()
+
+IF(NOT MSVC)
+ # Work around MDEV-18417, MDEV-18656, MDEV-18417
+ IF(WITH_ASAN AND CMAKE_COMPILER_IS_GNUCC AND
+ CMAKE_C_COMPILER_VERSION VERSION_LESS "6.0.0")
+ SET_SOURCE_FILES_PROPERTIES(trx/trx0rec.cc PROPERTIES COMPILE_FLAGS -O1)
+ ENDIF()
+
+ # workaround for old gcc on x86, gcc atomic ops only work under -march=i686
+ IF(CMAKE_SYSTEM_PROCESSOR STREQUAL "i686" AND CMAKE_COMPILER_IS_GNUCC AND
+ CMAKE_C_COMPILER_VERSION VERSION_LESS "4.4.0")
+ SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -march=i686")
+ SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=i686")
+ ENDIF()
+
+# Only use futexes on Linux if GCC atomics are available
+IF(NOT MSVC AND NOT CMAKE_CROSSCOMPILING)
+ CHECK_C_SOURCE_RUNS(
+ "
+ #include <stdio.h>
+ #include <unistd.h>
+ #include <errno.h>
+ #include <assert.h>
+ #include <linux/futex.h>
+ #include <unistd.h>
+ #include <sys/syscall.h>
+
+ int futex_wait(int* futex, int v) {
+ return(syscall(SYS_futex, futex, FUTEX_WAIT_PRIVATE, v, NULL, NULL, 0));
+ }
+
+ int futex_signal(int* futex) {
+ return(syscall(SYS_futex, futex, FUTEX_WAKE, 1, NULL, NULL, 0));
+ }
+
+ int main() {
+ int ret;
+ int m = 1;
+
+ /* It is setup to fail and return EWOULDBLOCK. */
+ ret = futex_wait(&m, 0);
+ assert(ret == -1 && errno == EWOULDBLOCK);
+ /* Shouldn't wake up any threads. */
+ assert(futex_signal(&m) == 0);
+
+ return(0);
+ }"
+ HAVE_IB_LINUX_FUTEX)
+ENDIF()
+
+IF(HAVE_IB_LINUX_FUTEX)
+ ADD_DEFINITIONS(-DHAVE_IB_LINUX_FUTEX=1)
+ENDIF()
+
+ENDIF(NOT MSVC)
+
+CHECK_FUNCTION_EXISTS(vasprintf HAVE_VASPRINTF)
+
+CHECK_CXX_SOURCE_COMPILES("struct t1{ int a; char *b; }; struct t1 c= { .a=1, .b=0 }; main() { }" HAVE_C99_INITIALIZERS)
+IF(HAVE_C99_INITIALIZERS)
+ ADD_DEFINITIONS(-DHAVE_C99_INITIALIZERS)
+ENDIF()
+
+SET(MUTEXTYPE "event" CACHE STRING "Mutex type: event, sys or futex")
+
+IF(MUTEXTYPE MATCHES "event")
+ ADD_DEFINITIONS(-DMUTEX_EVENT)
+ELSEIF(MUTEXTYPE MATCHES "futex" AND DEFINED HAVE_IB_LINUX_FUTEX)
+ ADD_DEFINITIONS(-DMUTEX_FUTEX)
+ELSE()
+ ADD_DEFINITIONS(-DMUTEX_SYS)
+ENDIF()
+
+OPTION(WITH_INNODB_DISALLOW_WRITES "InnoDB freeze writes patch from Google" ${WITH_WSREP})
+IF (WITH_INNODB_DISALLOW_WRITES)
+ ADD_DEFINITIONS(-DWITH_INNODB_DISALLOW_WRITES)
+ENDIF()
+
+
+# Include directories under innobase
+INCLUDE_DIRECTORIES(${CMAKE_SOURCE_DIR}/storage/innobase/include
+ ${CMAKE_SOURCE_DIR}/storage/innobase/handler)
+
+# Sun Studio bug with -xO2
+IF(CMAKE_CXX_COMPILER_ID MATCHES "SunPro"
+ AND CMAKE_CXX_FLAGS_RELEASE MATCHES "O2"
+ AND NOT CMAKE_BUILD_TYPE STREQUAL "Debug")
+ # Sun Studio 12 crashes with -xO2 flag, but not with higher optimization
+ # -xO3
+ SET_SOURCE_FILES_PROPERTIES(${CMAKE_CURRENT_SOURCE_DIR}/rem/rem0rec.cc
+ PROPERTIES COMPILE_FLAGS -xO3)
+ENDIF()
+
+# Removing compiler optimizations for innodb/mem/* files on 64-bit Windows
+# due to 64-bit compiler error, See MySQL Bug #19424, #36366, #34297
+IF (MSVC AND CMAKE_SIZEOF_VOID_P EQUAL 8)
+ SET_SOURCE_FILES_PROPERTIES(mem/mem0mem.cc mem/mem0pool.cc
+ PROPERTIES COMPILE_FLAGS -Od)
+ENDIF()
+
+# Avoid generating Hardware Capabilities due to crc32 instructions
+IF(CMAKE_SYSTEM_NAME MATCHES "SunOS" AND CMAKE_SYSTEM_PROCESSOR MATCHES "i386")
+ MY_CHECK_CXX_COMPILER_FLAG("-Wa,-nH")
+ IF(have_CXX__Wa__nH)
+ ADD_COMPILE_FLAGS(
+ ut/ut0crc32.cc
+ COMPILE_FLAGS "-Wa,-nH"
+ )
+ ENDIF()
+ENDIF()
+
+IF(MSVC)
+ # Avoid "unreferenced label" warning in generated file
+ GET_FILENAME_COMPONENT(_SRC_DIR ${CMAKE_CURRENT_LIST_FILE} PATH)
+ SET_SOURCE_FILES_PROPERTIES(${_SRC_DIR}/pars/pars0grm.c
+ PROPERTIES COMPILE_FLAGS "/wd4102")
+ SET_SOURCE_FILES_PROPERTIES(${_SRC_DIR}/pars/lexyy.c
+ PROPERTIES COMPILE_FLAGS "/wd4003")
+ENDIF()
+
+# Include directories under innobase
+INCLUDE_DIRECTORIES(${CMAKE_SOURCE_DIR}/storage/innobase/include
+ ${CMAKE_SOURCE_DIR}/storage/innobase/handler
+ ${CMAKE_SOURCE_DIR}/libbinlogevents/include )
diff --git a/storage/innobase/lock/lock0iter.cc b/storage/innobase/lock/lock0iter.cc
index 56bb69fa41c..7a7130eddb9 100644
--- a/storage/innobase/lock/lock0iter.cc
+++ b/storage/innobase/lock/lock0iter.cc
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 2007, 2009, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2007, 2014, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -26,12 +26,10 @@ Created July 16, 2007 Vasil Dimov
#define LOCK_MODULE_IMPLEMENTATION
-#include "univ.i"
+#include "dict0mem.h"
#include "lock0iter.h"
#include "lock0lock.h"
#include "lock0priv.h"
-#include "ut0dbg.h"
-#include "ut0lst.h"
/*******************************************************************//**
Initialize lock queue iterator so that it starts to iterate from
@@ -42,7 +40,6 @@ record is stored. It can be undefined (ULINT_UNDEFINED) in two cases:
bit_no is calculated in this function by using
lock_rec_find_set_bit(). There is exactly one bit set in the bitmap
of a wait lock. */
-UNIV_INTERN
void
lock_queue_iterator_reset(
/*======================*/
@@ -78,8 +75,7 @@ lock_queue_iterator_reset(
Gets the previous lock in the lock queue, returns NULL if there are no
more locks (i.e. the current lock is the first one). The iterator is
receded (if not-NULL is returned).
-@return previous lock or NULL */
-UNIV_INTERN
+@return previous lock or NULL */
const lock_t*
lock_queue_iterator_get_prev(
/*=========================*/
diff --git a/storage/innobase/lock/lock0lock.cc b/storage/innobase/lock/lock0lock.cc
index 0c4e40067d1..b17f2903e6e 100644
--- a/storage/innobase/lock/lock0lock.cc
+++ b/storage/innobase/lock/lock0lock.cc
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1996, 2017, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2014, 2020, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
@@ -26,435 +26,272 @@ Created 5/7/1996 Heikki Tuuri
#define LOCK_MODULE_IMPLEMENTATION
-#include "lock0lock.h"
-#include "lock0priv.h"
+#include "univ.i"
-#ifdef UNIV_NONINL
-#include "lock0lock.ic"
-#include "lock0priv.ic"
-#endif
+#include <mysql/service_thd_error_context.h>
+#include <sql_class.h>
-#include "ha_prototypes.h"
-#include "usr0sess.h"
-#include "trx0purge.h"
+#include "lock0lock.h"
+#include "lock0priv.h"
#include "dict0mem.h"
-#include "dict0boot.h"
+#include "trx0purge.h"
#include "trx0sys.h"
-#include "pars0pars.h" /* pars_complete_graph_for_exec() */
-#include "que0que.h" /* que_node_get_parent() */
-#include "row0mysql.h" /* row_mysql_handle_errors() */
-#include "row0sel.h" /* sel_node_create(), sel_node_t */
-#include "row0types.h" /* sel_node_t */
-#include "srv0mon.h"
#include "ut0vec.h"
-#include "btr0btr.h"
-#include "dict0boot.h"
+#include "btr0cur.h"
+#include "row0sel.h"
+#include "row0mysql.h"
+#include "row0vers.h"
+#include "pars0pars.h"
+
#include <set>
-#include "mysql/plugin.h"
+#ifdef WITH_WSREP
#include <mysql/service_wsrep.h>
+#endif /* WITH_WSREP */
-#include <string>
-#include <sstream>
+/** Lock scheduling algorithm */
+ulong innodb_lock_schedule_algorithm;
-/* Restricts the length of search we will do in the waits-for
-graph of transactions */
-#define LOCK_MAX_N_STEPS_IN_DEADLOCK_CHECK 1000000
+/** The value of innodb_deadlock_detect */
+my_bool innobase_deadlock_detect;
-/* Restricts the search depth we will do in the waits-for graph of
-transactions */
-#define LOCK_MAX_DEPTH_IN_DEADLOCK_CHECK 200
+/*********************************************************************//**
+Checks if a waiting record lock request still has to wait in a queue.
+@return lock that is causing the wait */
+static
+const lock_t*
+lock_rec_has_to_wait_in_queue(
+/*==========================*/
+ const lock_t* wait_lock); /*!< in: waiting record lock */
-/* When releasing transaction locks, this specifies how often we release
-the lock mutex for a moment to give also others access to it */
+/** Grant a lock to a waiting lock request and release the waiting transaction
+after lock_reset_lock_and_trx_wait() has been called. */
+static void lock_grant_after_reset(lock_t* lock);
-#define LOCK_RELEASE_INTERVAL 1000
+extern "C" void thd_rpl_deadlock_check(MYSQL_THD thd, MYSQL_THD other_thd);
+extern "C" int thd_need_wait_reports(const MYSQL_THD thd);
+extern "C" int thd_need_ordering_with(const MYSQL_THD thd, const MYSQL_THD other_thd);
-/* Safety margin when creating a new record lock: this many extra records
-can be inserted to the page without need to create a lock with a bigger
-bitmap */
+extern "C" int thd_deadlock_victim_preference(const MYSQL_THD thd1, const MYSQL_THD thd2);
-#define LOCK_PAGE_BITMAP_MARGIN 64
+/** Pretty-print a table lock.
+@param[in,out] file output stream
+@param[in] lock table lock */
+static void lock_table_print(FILE* file, const lock_t* lock);
-/** Lock scheduling algorithm */
-ulong innodb_lock_schedule_algorithm = INNODB_LOCK_SCHEDULE_ALGORITHM_FCFS;
-
-/* An explicit record lock affects both the record and the gap before it.
-An implicit x-lock does not affect the gap, it only locks the index
-record from read or update.
-
-If a transaction has modified or inserted an index record, then
-it owns an implicit x-lock on the record. On a secondary index record,
-a transaction has an implicit x-lock also if it has modified the
-clustered index record, the max trx id of the page where the secondary
-index record resides is >= trx id of the transaction (or database recovery
-is running), and there are no explicit non-gap lock requests on the
-secondary index record.
+/** Pretty-print a record lock.
+@param[in,out] file output stream
+@param[in] lock record lock
+@param[in,out] mtr mini-transaction for accessing the record */
+static void lock_rec_print(FILE* file, const lock_t* lock, mtr_t& mtr);
+
+/** Deadlock checker. */
+class DeadlockChecker {
+public:
+ /** Check if a joining lock request results in a deadlock.
+ If a deadlock is found, we will resolve the deadlock by
+ choosing a victim transaction and rolling it back.
+ We will attempt to resolve all deadlocks.
+
+ @param[in] lock the lock request
+ @param[in,out] trx transaction requesting the lock
+
+ @return trx if it was chosen as victim
+ @retval NULL if another victim was chosen,
+ or there is no deadlock (any more) */
+ static const trx_t* check_and_resolve(const lock_t* lock, trx_t* trx);
+
+private:
+ /** Do a shallow copy. Default destructor OK.
+ @param trx the start transaction (start node)
+ @param wait_lock lock that a transaction wants
+ @param mark_start visited node counter
+ @param report_waiters whether to call thd_rpl_deadlock_check() */
+ DeadlockChecker(
+ const trx_t* trx,
+ const lock_t* wait_lock,
+ ib_uint64_t mark_start,
+ bool report_waiters)
+ :
+ m_cost(),
+ m_start(trx),
+ m_too_deep(),
+ m_wait_lock(wait_lock),
+ m_mark_start(mark_start),
+ m_n_elems(),
+ m_report_waiters(report_waiters)
+ {
+ }
-This complicated definition for a secondary index comes from the
-implementation: we want to be able to determine if a secondary index
-record has an implicit x-lock, just by looking at the present clustered
-index record, not at the historical versions of the record. The
-complicated definition can be explained to the user so that there is
-nondeterminism in the access path when a query is answered: we may,
-or may not, access the clustered index record and thus may, or may not,
-bump into an x-lock set there.
-
-Different transaction can have conflicting locks set on the gap at the
-same time. The locks on the gap are purely inhibitive: an insert cannot
-be made, or a select cursor may have to wait if a different transaction
-has a conflicting lock on the gap. An x-lock on the gap does not give
-the right to insert into the gap.
-
-An explicit lock can be placed on a user record or the supremum record of
-a page. The locks on the supremum record are always thought to be of the gap
-type, though the gap bit is not set. When we perform an update of a record
-where the size of the record changes, we may temporarily store its explicit
-locks on the infimum record of the page, though the infimum otherwise never
-carries locks.
-
-A waiting record lock can also be of the gap type. A waiting lock request
-can be granted when there is no conflicting mode lock request by another
-transaction ahead of it in the explicit lock queue.
-
-In version 4.0.5 we added yet another explicit lock type: LOCK_REC_NOT_GAP.
-It only locks the record it is placed on, not the gap before the record.
-This lock type is necessary to emulate an Oracle-like READ COMMITTED isolation
-level.
-
--------------------------------------------------------------------------
-RULE 1: If there is an implicit x-lock on a record, and there are non-gap
--------
-lock requests waiting in the queue, then the transaction holding the implicit
-x-lock also has an explicit non-gap record x-lock. Therefore, as locks are
-released, we can grant locks to waiting lock requests purely by looking at
-the explicit lock requests in the queue.
-
-RULE 3: Different transactions cannot have conflicting granted non-gap locks
--------
-on a record at the same time. However, they can have conflicting granted gap
-locks.
-RULE 4: If a there is a waiting lock request in a queue, no lock request,
--------
-gap or not, can be inserted ahead of it in the queue. In record deletes
-and page splits new gap type locks can be created by the database manager
-for a transaction, and without rule 4, the waits-for graph of transactions
-might become cyclic without the database noticing it, as the deadlock check
-is only performed when a transaction itself requests a lock!
--------------------------------------------------------------------------
-
-An insert is allowed to a gap if there are no explicit lock requests by
-other transactions on the next record. It does not matter if these lock
-requests are granted or waiting, gap bit set or not, with the exception
-that a gap type request set by another transaction to wait for
-its turn to do an insert is ignored. On the other hand, an
-implicit x-lock by another transaction does not prevent an insert, which
-allows for more concurrency when using an Oracle-style sequence number
-generator for the primary key with many transactions doing inserts
-concurrently.
-
-A modify of a record is allowed if the transaction has an x-lock on the
-record, or if other transactions do not have any non-gap lock requests on the
-record.
+ /** Check if the search is too deep. */
+ bool is_too_deep() const
+ {
+ return(m_n_elems > LOCK_MAX_DEPTH_IN_DEADLOCK_CHECK
+ || m_cost > LOCK_MAX_N_STEPS_IN_DEADLOCK_CHECK);
+ }
-A read of a single user record with a cursor is allowed if the transaction
-has a non-gap explicit, or an implicit lock on the record, or if the other
-transactions have no x-lock requests on the record. At a page supremum a
-read is always allowed.
-
-In summary, an implicit lock is seen as a granted x-lock only on the
-record, not on the gap. An explicit lock with no gap bit set is a lock
-both on the record and the gap. If the gap bit is set, the lock is only
-on the gap. Different transaction cannot own conflicting locks on the
-record at the same time, but they may own conflicting locks on the gap.
-Granted locks on a record give an access right to the record, but gap type
-locks just inhibit operations.
-
-NOTE: Finding out if some transaction has an implicit x-lock on a secondary
-index record can be cumbersome. We may have to look at previous versions of
-the corresponding clustered index record to find out if a delete marked
-secondary index record was delete marked by an active transaction, not by
-a committed one.
-
-FACT A: If a transaction has inserted a row, it can delete it any time
-without need to wait for locks.
-
-PROOF: The transaction has an implicit x-lock on every index record inserted
-for the row, and can thus modify each record without the need to wait. Q.E.D.
-
-FACT B: If a transaction has read some result set with a cursor, it can read
-it again, and retrieves the same result set, if it has not modified the
-result set in the meantime. Hence, there is no phantom problem. If the
-biggest record, in the alphabetical order, touched by the cursor is removed,
-a lock wait may occur, otherwise not.
-
-PROOF: When a read cursor proceeds, it sets an s-lock on each user record
-it passes, and a gap type s-lock on each page supremum. The cursor must
-wait until it has these locks granted. Then no other transaction can
-have a granted x-lock on any of the user records, and therefore cannot
-modify the user records. Neither can any other transaction insert into
-the gaps which were passed over by the cursor. Page splits and merges,
-and removal of obsolete versions of records do not affect this, because
-when a user record or a page supremum is removed, the next record inherits
-its locks as gap type locks, and therefore blocks inserts to the same gap.
-Also, if a page supremum is inserted, it inherits its locks from the successor
-record. When the cursor is positioned again at the start of the result set,
-the records it will touch on its course are either records it touched
-during the last pass or new inserted page supremums. It can immediately
-access all these records, and when it arrives at the biggest record, it
-notices that the result set is complete. If the biggest record was removed,
-lock wait can occur because the next record only inherits a gap type lock,
-and a wait may be needed. Q.E.D. */
-
-/* If an index record should be changed or a new inserted, we must check
-the lock on the record or the next. When a read cursor starts reading,
-we will set a record level s-lock on each record it passes, except on the
-initial record on which the cursor is positioned before we start to fetch
-records. Our index tree search has the convention that the B-tree
-cursor is positioned BEFORE the first possibly matching record in
-the search. Optimizations are possible here: if the record is searched
-on an equality condition to a unique key, we could actually set a special
-lock on the record, a lock which would not prevent any insert before
-this record. In the next key locking an x-lock set on a record also
-prevents inserts just before that record.
- There are special infimum and supremum records on each page.
-A supremum record can be locked by a read cursor. This records cannot be
-updated but the lock prevents insert of a user record to the end of
-the page.
- Next key locks will prevent the phantom problem where new rows
-could appear to SELECT result sets after the select operation has been
-performed. Prevention of phantoms ensures the serilizability of
-transactions.
- What should we check if an insert of a new record is wanted?
-Only the lock on the next record on the same page, because also the
-supremum record can carry a lock. An s-lock prevents insertion, but
-what about an x-lock? If it was set by a searched update, then there
-is implicitly an s-lock, too, and the insert should be prevented.
-What if our transaction owns an x-lock to the next record, but there is
-a waiting s-lock request on the next record? If this s-lock was placed
-by a read cursor moving in the ascending order in the index, we cannot
-do the insert immediately, because when we finally commit our transaction,
-the read cursor should see also the new inserted record. So we should
-move the read cursor backward from the next record for it to pass over
-the new inserted record. This move backward may be too cumbersome to
-implement. If we in this situation just enqueue a second x-lock request
-for our transaction on the next record, then the deadlock mechanism
-notices a deadlock between our transaction and the s-lock request
-transaction. This seems to be an ok solution.
- We could have the convention that granted explicit record locks,
-lock the corresponding records from changing, and also lock the gaps
-before them from inserting. A waiting explicit lock request locks the gap
-before from inserting. Implicit record x-locks, which we derive from the
-transaction id in the clustered index record, only lock the record itself
-from modification, not the gap before it from inserting.
- How should we store update locks? If the search is done by a unique
-key, we could just modify the record trx id. Otherwise, we could put a record
-x-lock on the record. If the update changes ordering fields of the
-clustered index record, the inserted new record needs no record lock in
-lock table, the trx id is enough. The same holds for a secondary index
-record. Searched delete is similar to update.
-
-PROBLEM:
-What about waiting lock requests? If a transaction is waiting to make an
-update to a record which another modified, how does the other transaction
-know to send the end-lock-wait signal to the waiting transaction? If we have
-the convention that a transaction may wait for just one lock at a time, how
-do we preserve it if lock wait ends?
-
-PROBLEM:
-Checking the trx id label of a secondary index record. In the case of a
-modification, not an insert, is this necessary? A secondary index record
-is modified only by setting or resetting its deleted flag. A secondary index
-record contains fields to uniquely determine the corresponding clustered
-index record. A secondary index record is therefore only modified if we
-also modify the clustered index record, and the trx id checking is done
-on the clustered index record, before we come to modify the secondary index
-record. So, in the case of delete marking or unmarking a secondary index
-record, we do not have to care about trx ids, only the locks in the lock
-table must be checked. In the case of a select from a secondary index, the
-trx id is relevant, and in this case we may have to search the clustered
-index record.
-
-PROBLEM: How to update record locks when page is split or merged, or
---------------------------------------------------------------------
-a record is deleted or updated?
-If the size of fields in a record changes, we perform the update by
-a delete followed by an insert. How can we retain the locks set or
-waiting on the record? Because a record lock is indexed in the bitmap
-by the heap number of the record, when we remove the record from the
-record list, it is possible still to keep the lock bits. If the page
-is reorganized, we could make a table of old and new heap numbers,
-and permute the bitmaps in the locks accordingly. We can add to the
-table a row telling where the updated record ended. If the update does
-not require a reorganization of the page, we can simply move the lock
-bits for the updated record to the position determined by its new heap
-number (we may have to allocate a new lock, if we run out of the bitmap
-in the old one).
- A more complicated case is the one where the reinsertion of the
-updated record is done pessimistically, because the structure of the
-tree may change.
-
-PROBLEM: If a supremum record is removed in a page merge, or a record
----------------------------------------------------------------------
-removed in a purge, what to do to the waiting lock requests? In a split to
-the right, we just move the lock requests to the new supremum. If a record
-is removed, we could move the waiting lock request to its inheritor, the
-next record in the index. But, the next record may already have lock
-requests on its own queue. A new deadlock check should be made then. Maybe
-it is easier just to release the waiting transactions. They can then enqueue
-new lock requests on appropriate records.
-
-PROBLEM: When a record is inserted, what locks should it inherit from the
--------------------------------------------------------------------------
-upper neighbor? An insert of a new supremum record in a page split is
-always possible, but an insert of a new user record requires that the upper
-neighbor does not have any lock requests by other transactions, granted or
-waiting, in its lock queue. Solution: We can copy the locks as gap type
-locks, so that also the waiting locks are transformed to granted gap type
-locks on the inserted record. */
-
-#define LOCK_STACK_SIZE OS_THREAD_MAX_N
-
-/* LOCK COMPATIBILITY MATRIX
- * IS IX S X AI
- * IS + + + - +
- * IX + + - - +
- * S + - + - -
- * X - - - - -
- * AI + + - - -
- *
- * Note that for rows, InnoDB only acquires S or X locks.
- * For tables, InnoDB normally acquires IS or IX locks.
- * S or X table locks are only acquired for LOCK TABLES.
- * Auto-increment (AI) locks are needed because of
- * statement-level MySQL binlog.
- * See also lock_mode_compatible().
- */
-static const byte lock_compatibility_matrix[5][5] = {
- /** IS IX S X AI */
- /* IS */ { TRUE, TRUE, TRUE, FALSE, TRUE},
- /* IX */ { TRUE, TRUE, FALSE, FALSE, TRUE},
- /* S */ { TRUE, FALSE, TRUE, FALSE, FALSE},
- /* X */ { FALSE, FALSE, FALSE, FALSE, FALSE},
- /* AI */ { TRUE, TRUE, FALSE, FALSE, FALSE}
-};
+ /** Save current state.
+ @param lock lock to push on the stack.
+ @param heap_no the heap number to push on the stack.
+ @return false if stack is full. */
+ bool push(const lock_t* lock, ulint heap_no)
+ {
+ ut_ad((lock_get_type_low(lock) & LOCK_REC)
+ || (lock_get_type_low(lock) & LOCK_TABLE));
-/* STRONGER-OR-EQUAL RELATION (mode1=row, mode2=column)
- * IS IX S X AI
- * IS + - - - -
- * IX + + - - -
- * S + - + - -
- * X + + + + +
- * AI - - - - +
- * See lock_mode_stronger_or_eq().
- */
-static const byte lock_strength_matrix[5][5] = {
- /** IS IX S X AI */
- /* IS */ { TRUE, FALSE, FALSE, FALSE, FALSE},
- /* IX */ { TRUE, TRUE, FALSE, FALSE, FALSE},
- /* S */ { TRUE, FALSE, TRUE, FALSE, FALSE},
- /* X */ { TRUE, TRUE, TRUE, TRUE, TRUE},
- /* AI */ { FALSE, FALSE, FALSE, FALSE, TRUE}
-};
+ ut_ad(((lock_get_type_low(lock) & LOCK_TABLE) != 0)
+ == (heap_no == ULINT_UNDEFINED));
-/** Deadlock check context. */
-struct lock_deadlock_ctx_t {
- const trx_t* start; /*!< Joining transaction that is
- requesting a lock in an incompatible
- mode */
+ /* Ensure that the stack is bounded. */
+ if (m_n_elems >= UT_ARR_SIZE(s_states)) {
+ return(false);
+ }
- const lock_t* wait_lock; /*!< Lock that trx wants */
+ state_t& state = s_states[m_n_elems++];
- ib_uint64_t mark_start; /*!< Value of lock_mark_count at
- the start of the deadlock check. */
+ state.m_lock = lock;
+ state.m_wait_lock = m_wait_lock;
+ state.m_heap_no =heap_no;
- ulint depth; /*!< Stack depth */
+ return(true);
+ }
- ulint cost; /*!< Calculation steps thus far */
+ /** Restore state.
+ @param[out] lock current lock
+ @param[out] heap_no current heap_no */
+ void pop(const lock_t*& lock, ulint& heap_no)
+ {
+ ut_a(m_n_elems > 0);
- ibool too_deep; /*!< TRUE if search was too deep and
- was aborted */
-};
+ const state_t& state = s_states[--m_n_elems];
-/** DFS visited node information used during deadlock checking. */
-struct lock_stack_t {
- const lock_t* lock; /*!< Current lock */
- const lock_t* wait_lock; /*!< Waiting for lock */
- ulint heap_no; /*!< heap number if rec lock */
-};
+ lock = state.m_lock;
+ heap_no = state.m_heap_no;
+ m_wait_lock = state.m_wait_lock;
+ }
-/** Pretty-print a table lock.
-@param[in,out] file output stream
-@param[in] lock table lock
-@param[in] now current time */
-static void lock_table_print(FILE* file, const lock_t* lock, time_t now);
+ /** Check whether the node has been visited.
+ @param lock lock to check
+ @return true if the node has been visited */
+ bool is_visited(const lock_t* lock) const
+ {
+ return(lock->trx->lock.deadlock_mark > m_mark_start);
+ }
-/** Pretty-print a record lock.
-@param[in,out] file output stream
-@param[in] lock record lock
-@param[in] now current time
-@param[in,out] mtr mini-transaction */
-static void lock_rec_print(FILE* file, const lock_t* lock, time_t now,
- mtr_t* mtr = NULL);
+ /** Get the next lock in the queue that is owned by a transaction
+ whose sub-tree has not already been searched.
+ Note: "next" here means PREV for table locks.
+ @param lock Lock in queue
+ @param heap_no heap_no if lock is a record lock else ULINT_UNDEFINED
+ @return next lock or NULL if at end of queue */
+ const lock_t* get_next_lock(const lock_t* lock, ulint heap_no) const;
-/*********************************************************************//**
-Checks if a waiting record lock request still has to wait in a queue.
-@return lock that is causing the wait */
-static
-const lock_t*
-lock_rec_has_to_wait_in_queue(
-/*==========================*/
- const lock_t* wait_lock); /*!< in: waiting record lock */
+ /** Get the first lock to search. The search starts from the current
+ wait_lock. What we are really interested in is an edge from the
+ current wait_lock's owning transaction to another transaction that has
+ a lock ahead in the queue. We skip locks where the owning transaction's
+ sub-tree has already been searched.
-/*************************************************************//**
-Grants a lock to a waiting lock request and releases the waiting transaction.
-The caller must hold lock_sys->mutex. */
-static
-void
-lock_grant(
-/*=======*/
- lock_t* lock, /*!< in/out: waiting lock request */
- bool owns_trx_mutex); /*!< in: whether lock->trx->mutex is owned */
+ Note: The record locks are traversed from the oldest lock to the
+ latest. For table locks we go from latest to oldest.
-extern "C" void thd_report_wait_for(MYSQL_THD thd, MYSQL_THD other_thd);
-extern "C" int thd_need_wait_for(const MYSQL_THD thd);
-extern "C"
-int thd_need_ordering_with(const MYSQL_THD thd, const MYSQL_THD other_thd);
+ For record locks, we first position the iterator on first lock on
+ the page and then reposition on the actual heap_no. This is required
+ due to the way the record lock has is implemented.
-extern "C"
-int thd_deadlock_victim_preference(const MYSQL_THD thd1, const MYSQL_THD thd2);
+ @param[out] heap_no if rec lock, else ULINT_UNDEFINED.
-/** Stack to use during DFS search. Currently only a single stack is required
-because there is no parallel deadlock check. This stack is protected by
-the lock_sys_t::mutex. */
-static lock_stack_t* lock_stack;
+ @return first lock or NULL */
+ const lock_t* get_first_lock(ulint* heap_no) const;
-#ifdef UNIV_DEBUG
-/** The count of the types of locks. */
-static const ulint lock_types = UT_ARR_SIZE(lock_compatibility_matrix);
-#endif /* UNIV_DEBUG */
+ /** Notify that a deadlock has been detected and print the conflicting
+ transaction info.
+ @param lock lock causing deadlock */
+ void notify(const lock_t* lock) const;
+
+ /** Select the victim transaction that should be rolledback.
+ @return victim transaction */
+ const trx_t* select_victim() const;
+
+ /** Rollback transaction selected as the victim. */
+ void trx_rollback();
+
+ /** Looks iteratively for a deadlock. Note: the joining transaction
+ may have been granted its lock by the deadlock checks.
+
+ @return 0 if no deadlock else the victim transaction.*/
+ const trx_t* search();
+
+ /** Print transaction data to the deadlock file and possibly to stderr.
+ @param trx transaction
+ @param max_query_len max query length to print */
+ static void print(const trx_t* trx, ulint max_query_len);
+
+ /** rewind(3) the file used for storing the latest detected deadlock
+ and print a heading message to stderr if printing of all deadlocks to
+ stderr is enabled. */
+ static void start_print();
+
+ /** Print lock data to the deadlock file and possibly to stderr.
+ @param lock record or table type lock */
+ static void print(const lock_t* lock);
+
+ /** Print a message to the deadlock file and possibly to stderr.
+ @param msg message to print */
+ static void print(const char* msg);
+
+ /** Print info about transaction that was rolled back.
+ @param trx transaction rolled back
+ @param lock lock trx wants */
+ static void rollback_print(const trx_t* trx, const lock_t* lock);
+
+private:
+ /** DFS state information, used during deadlock checking. */
+ struct state_t {
+ const lock_t* m_lock; /*!< Current lock */
+ const lock_t* m_wait_lock; /*!< Waiting for lock */
+ ulint m_heap_no; /*!< heap number if rec lock */
+ };
+
+ /** Used in deadlock tracking. Protected by lock_sys->mutex. */
+ static ib_uint64_t s_lock_mark_counter;
+
+ /** Calculation steps thus far. It is the count of the nodes visited. */
+ ulint m_cost;
+
+ /** Joining transaction that is requesting a lock in an
+ incompatible mode */
+ const trx_t* m_start;
-#ifdef UNIV_PFS_MUTEX
-/* Key to register mutex with performance schema */
-UNIV_INTERN mysql_pfs_key_t lock_sys_mutex_key;
-/* Key to register mutex with performance schema */
-UNIV_INTERN mysql_pfs_key_t lock_sys_wait_mutex_key;
-#endif /* UNIV_PFS_MUTEX */
-
-/* Buffer to collect THDs to report waits for. */
-struct thd_wait_reports {
- struct thd_wait_reports *next; /*!< List link */
- ulint used; /*!< How many elements in waitees[] */
- trx_t *waitees[64]; /*!< Trxs for thd_report_wait_for() */
+ /** TRUE if search was too deep and was aborted */
+ bool m_too_deep;
+
+ /** Lock that trx wants */
+ const lock_t* m_wait_lock;
+
+ /** Value of lock_mark_count at the start of the deadlock check. */
+ ib_uint64_t m_mark_start;
+
+ /** Number of states pushed onto the stack */
+ size_t m_n_elems;
+
+ /** This is to avoid malloc/free calls. */
+ static state_t s_states[MAX_STACK_SIZE];
+
+ /** Set if thd_rpl_deadlock_check() should be called for waits. */
+ const bool m_report_waiters;
};
+/** Counter to mark visited nodes during deadlock search. */
+ib_uint64_t DeadlockChecker::s_lock_mark_counter = 0;
-#ifdef UNIV_DEBUG
-UNIV_INTERN ibool lock_print_waits = FALSE;
+/** The stack used for deadlock searches. */
+DeadlockChecker::state_t DeadlockChecker::s_states[MAX_STACK_SIZE];
+#ifdef UNIV_DEBUG
/*********************************************************************//**
Validates the lock system.
-@return TRUE if ok */
+@return TRUE if ok */
static
bool
lock_validate();
@@ -462,95 +299,52 @@ lock_validate();
/*********************************************************************//**
Validates the record lock queues on a page.
-@return TRUE if ok */
+@return TRUE if ok */
static
ibool
lock_rec_validate_page(
/*===================*/
const buf_block_t* block) /*!< in: buffer block */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
+ MY_ATTRIBUTE((warn_unused_result));
#endif /* UNIV_DEBUG */
/* The lock system */
-UNIV_INTERN lock_sys_t* lock_sys = NULL;
+lock_sys_t* lock_sys = NULL;
/** We store info on the latest deadlock error to this buffer. InnoDB
Monitor will then fetch it and print */
-UNIV_INTERN ibool lock_deadlock_found = FALSE;
+static bool lock_deadlock_found = false;
+
/** Only created if !srv_read_only_mode */
static FILE* lock_latest_err_file;
-/** Check if a joining lock request results in a deadlock. If a deadlock is
-found this function will resolve the dadlock by choosing a victim transaction
-and rolling it back. It will attempt to resolve all deadlocks.
-@param[in] trx joining transaction
-@param[in] lock the requested lock
-@param[in] now current time
-@return trx->id of the victim transaction
-@retval 0 if some other transaction was chosen as a victim and
-rolled back, or no deadlock was found. */
-static trx_id_t lock_deadlock_check_and_resolve(const trx_t* trx,
- const lock_t* lock,
- time_t now);
-
-/*********************************************************************//**
-Gets the nth bit of a record lock.
-@return TRUE if bit set also if i == ULINT_UNDEFINED return FALSE*/
-UNIV_INLINE
-ibool
-lock_rec_get_nth_bit(
-/*=================*/
- const lock_t* lock, /*!< in: record lock */
- ulint i) /*!< in: index of the bit */
-{
- const byte* b;
-
- ut_ad(lock);
- ut_ad(lock_get_type_low(lock) == LOCK_REC);
-
- if (i >= lock->un_member.rec_lock.n_bits) {
-
- return(FALSE);
- }
-
- b = ((const byte*) &lock[1]) + (i / 8);
-
- return(1 & *b >> (i % 8));
-}
-
/*********************************************************************//**
Reports that a transaction id is insensible, i.e., in the future. */
-UNIV_INTERN
void
lock_report_trx_id_insanity(
/*========================*/
trx_id_t trx_id, /*!< in: trx id */
const rec_t* rec, /*!< in: user record */
dict_index_t* index, /*!< in: index */
- const ulint* offsets, /*!< in: rec_get_offsets(rec, index) */
+ const offset_t* offsets, /*!< in: rec_get_offsets(rec, index) */
trx_id_t max_trx_id) /*!< in: trx_sys_get_max_trx_id() */
{
- ut_print_timestamp(stderr);
- fputs(" InnoDB: Error: transaction id associated with record\n",
- stderr);
- rec_print_new(stderr, rec, offsets);
- fputs("InnoDB: in ", stderr);
- dict_index_name_print(stderr, NULL, index);
- fprintf(stderr, "\n"
- "InnoDB: is " TRX_ID_FMT " which is higher than the"
- " global trx id counter " TRX_ID_FMT "!\n"
- "InnoDB: The table is corrupt. You have to do"
- " dump + drop + reimport.\n",
- trx_id, max_trx_id);
+ ib::error()
+ << "Transaction id " << trx_id
+ << " associated with record" << rec_offsets_print(rec, offsets)
+ << " in index " << index->name
+ << " of table " << index->table->name
+ << " is greater than the global counter " << max_trx_id
+ << "! The table is corrupted.";
}
/*********************************************************************//**
Checks that a transaction id is sensible, i.e., not in the future.
-@return true if ok */
+@return true if ok */
#ifdef UNIV_DEBUG
-UNIV_INTERN
+
#else
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
+static MY_ATTRIBUTE((warn_unused_result))
#endif
bool
lock_check_trx_id_sanity(
@@ -558,19 +352,16 @@ lock_check_trx_id_sanity(
trx_id_t trx_id, /*!< in: trx id */
const rec_t* rec, /*!< in: user record */
dict_index_t* index, /*!< in: index */
- const ulint* offsets) /*!< in: rec_get_offsets(rec, index) */
+ const offset_t* offsets) /*!< in: rec_get_offsets(rec, index) */
{
- bool is_ok;
- trx_id_t max_trx_id;
-
ut_ad(rec_offs_validate(rec, index, offsets));
- max_trx_id = trx_sys_get_max_trx_id();
- is_ok = trx_id < max_trx_id;
+ trx_id_t max_trx_id = trx_sys_get_max_trx_id();
+ bool is_ok = trx_id < max_trx_id;
- if (UNIV_UNLIKELY(!is_ok)) {
- lock_report_trx_id_insanity(trx_id,
- rec, index, offsets, max_trx_id);
+ if (!is_ok) {
+ lock_report_trx_id_insanity(
+ trx_id, rec, index, offsets, max_trx_id);
}
return(is_ok);
@@ -580,28 +371,34 @@ lock_check_trx_id_sanity(
Checks that a record is seen in a consistent read.
@return true if sees, or false if an earlier version of the record
should be retrieved */
-UNIV_INTERN
bool
lock_clust_rec_cons_read_sees(
/*==========================*/
const rec_t* rec, /*!< in: user record which should be read or
passed over by a read cursor */
dict_index_t* index, /*!< in: clustered index */
- const ulint* offsets,/*!< in: rec_get_offsets(rec, index) */
- read_view_t* view) /*!< in: consistent read view */
+ const offset_t* offsets,/*!< in: rec_get_offsets(rec, index) */
+ ReadView* view) /*!< in: consistent read view */
{
- trx_id_t trx_id;
-
ut_ad(dict_index_is_clust(index));
ut_ad(page_rec_is_user_rec(rec));
ut_ad(rec_offs_validate(rec, index, offsets));
+ /* Temp-tables are not shared across connections and multiple
+ transactions from different connections cannot simultaneously
+ operate on same temp-table and so read of temp-table is
+ always consistent read. */
+ if (srv_read_only_mode || dict_table_is_temporary(index->table)) {
+ ut_ad(view == 0 || dict_table_is_temporary(index->table));
+ return(true);
+ }
+
/* NOTE that we call this function while holding the search
system latch. */
- trx_id = row_get_rec_trx_id(rec, index, offsets);
+ trx_id_t trx_id = row_get_rec_trx_id(rec, index, offsets);
- return(read_view_sees_trx_id(view, trx_id));
+ return(view->changes_visible(trx_id, index->table->name));
}
/*********************************************************************//**
@@ -614,17 +411,15 @@ record.
@return true if certainly sees, or false if an earlier version of the
clustered index record might be needed */
-UNIV_INTERN
bool
lock_sec_rec_cons_read_sees(
/*========================*/
const rec_t* rec, /*!< in: user record which
should be read or passed over
by a read cursor */
- const read_view_t* view) /*!< in: consistent read view */
+ const dict_index_t* index, /*!< in: index */
+ const ReadView* view) /*!< in: consistent read view */
{
- trx_id_t max_trx_id;
-
ut_ad(page_rec_is_user_rec(rec));
/* NOTE that we might call this function while holding the search
@@ -633,17 +428,26 @@ lock_sec_rec_cons_read_sees(
if (recv_recovery_is_on()) {
return(false);
+
+ } else if (dict_table_is_temporary(index->table)) {
+
+ /* Temp-tables are not shared across connections and multiple
+ transactions from different connections cannot simultaneously
+ operate on same temp-table and so read of temp-table is
+ always consistent read. */
+
+ return(true);
}
- max_trx_id = page_get_max_trx_id(page_align(rec));
- ut_ad(max_trx_id);
+ trx_id_t max_trx_id = page_get_max_trx_id(page_align(rec));
- return(max_trx_id < view->up_limit_id);
+ ut_ad(max_trx_id > 0);
+
+ return(view->sees(max_trx_id));
}
/*********************************************************************//**
Creates the lock system at database start. */
-UNIV_INTERN
void
lock_sys_create(
/*============*/
@@ -651,13 +455,9 @@ lock_sys_create(
{
ulint lock_sys_sz;
- lock_sys_sz = sizeof(*lock_sys)
- + OS_THREAD_MAX_N * sizeof(srv_slot_t);
+ lock_sys_sz = sizeof(*lock_sys) + OS_THREAD_MAX_N * sizeof(srv_slot_t);
- lock_sys = static_cast<lock_sys_t*>(mem_zalloc(lock_sys_sz));
-
- lock_stack = static_cast<lock_stack_t*>(
- mem_zalloc(sizeof(*lock_stack) * LOCK_STACK_SIZE));
+ lock_sys = static_cast<lock_sys_t*>(ut_zalloc_nokey(lock_sys_sz));
void* ptr = &lock_sys[1];
@@ -665,14 +465,15 @@ lock_sys_create(
lock_sys->last_slot = lock_sys->waiting_threads;
- mutex_create(lock_sys_mutex_key, &lock_sys->mutex, SYNC_LOCK_SYS);
+ mutex_create(LATCH_ID_LOCK_SYS, &lock_sys->mutex);
- mutex_create(lock_sys_wait_mutex_key,
- &lock_sys->wait_mutex, SYNC_LOCK_WAIT_SYS);
+ mutex_create(LATCH_ID_LOCK_SYS_WAIT, &lock_sys->wait_mutex);
- lock_sys->timeout_event = os_event_create();
+ lock_sys->timeout_event = os_event_create(0);
lock_sys->rec_hash = hash_create(n_cells);
+ lock_sys->prdt_hash = hash_create(n_cells);
+ lock_sys->prdt_page_hash = hash_create(n_cells);
if (!srv_read_only_mode) {
lock_latest_err_file = os_file_create_tmpfile(NULL);
@@ -680,288 +481,126 @@ lock_sys_create(
}
}
-/*********************************************************************//**
-Closes the lock system at database shutdown. */
-UNIV_INTERN
-void
-lock_sys_close(void)
-/*================*/
-{
- if (lock_latest_err_file != NULL) {
- fclose(lock_latest_err_file);
- lock_latest_err_file = NULL;
- }
-
- hash_table_free(lock_sys->rec_hash);
-
- mutex_free(&lock_sys->mutex);
- mutex_free(&lock_sys->wait_mutex);
-
- mem_free(lock_stack);
- mem_free(lock_sys);
-
- lock_sys = NULL;
- lock_stack = NULL;
-}
-
-/*********************************************************************//**
-Gets the size of a lock struct.
-@return size in bytes */
-UNIV_INTERN
-ulint
-lock_get_size(void)
-/*===============*/
-{
- return((ulint) sizeof(lock_t));
-}
-
-/*********************************************************************//**
-Gets the mode of a lock.
-@return mode */
-UNIV_INLINE
-enum lock_mode
-lock_get_mode(
-/*==========*/
- const lock_t* lock) /*!< in: lock */
-{
- ut_ad(lock);
-
- return(static_cast<enum lock_mode>(lock->type_mode & LOCK_MODE_MASK));
-}
-
-/*********************************************************************//**
-Gets the wait flag of a lock.
-@return LOCK_WAIT if waiting, 0 if not */
-UNIV_INLINE
+/** Calculates the fold value of a lock: used in migrating the hash table.
+@param[in] lock record lock object
+@return folded value */
+static
ulint
-lock_get_wait(
-/*==========*/
- const lock_t* lock) /*!< in: lock */
+lock_rec_lock_fold(
+ const lock_t* lock)
{
- ut_ad(lock);
-
- return(lock->type_mode & LOCK_WAIT);
+ return(lock_rec_fold(lock->un_member.rec_lock.space,
+ lock->un_member.rec_lock.page_no));
}
-/*********************************************************************//**
-Gets the source table of an ALTER TABLE transaction. The table must be
-covered by an IX or IS table lock.
-@return the source table of transaction, if it is covered by an IX or
-IS table lock; dest if there is no source table, and NULL if the
-transaction is locking more than two tables or an inconsistency is
-found */
-UNIV_INTERN
-dict_table_t*
-lock_get_src_table(
-/*===============*/
- trx_t* trx, /*!< in: transaction */
- dict_table_t* dest, /*!< in: destination of ALTER TABLE */
- enum lock_mode* mode) /*!< out: lock mode of the source table */
+/** Resize the lock hash tables.
+@param[in] n_cells number of slots in lock hash table */
+void
+lock_sys_resize(
+ ulint n_cells)
{
- dict_table_t* src;
- lock_t* lock;
+ hash_table_t* old_hash;
- ut_ad(!lock_mutex_own());
-
- src = NULL;
- *mode = LOCK_NONE;
-
- /* The trx mutex protects the trx_locks for our purposes.
- Other transactions could want to convert one of our implicit
- record locks to an explicit one. For that, they would need our
- trx mutex. Waiting locks can be removed while only holding
- lock_sys->mutex, but this is a running transaction and cannot
- thus be holding any waiting locks. */
- trx_mutex_enter(trx);
+ lock_mutex_enter();
- for (lock = UT_LIST_GET_FIRST(trx->lock.trx_locks);
- lock != NULL;
- lock = UT_LIST_GET_NEXT(trx_locks, lock)) {
- lock_table_t* tab_lock;
- enum lock_mode lock_mode;
- if (!(lock_get_type_low(lock) & LOCK_TABLE)) {
- /* We are only interested in table locks. */
- continue;
- }
- tab_lock = &lock->un_member.tab_lock;
- if (dest == tab_lock->table) {
- /* We are not interested in the destination table. */
- continue;
- } else if (!src) {
- /* This presumably is the source table. */
- src = tab_lock->table;
- if (UT_LIST_GET_LEN(src->locks) != 1
- || UT_LIST_GET_FIRST(src->locks) != lock) {
- /* We only support the case when
- there is only one lock on this table. */
- src = NULL;
- goto func_exit;
- }
- } else if (src != tab_lock->table) {
- /* The transaction is locking more than
- two tables (src and dest): abort */
- src = NULL;
- goto func_exit;
- }
-
- /* Check that the source table is locked by
- LOCK_IX or LOCK_IS. */
- lock_mode = lock_get_mode(lock);
- if (lock_mode == LOCK_IX || lock_mode == LOCK_IS) {
- if (*mode != LOCK_NONE && *mode != lock_mode) {
- /* There are multiple locks on src. */
- src = NULL;
- goto func_exit;
+ old_hash = lock_sys->rec_hash;
+ lock_sys->rec_hash = hash_create(n_cells);
+ HASH_MIGRATE(old_hash, lock_sys->rec_hash, lock_t, hash,
+ lock_rec_lock_fold);
+ hash_table_free(old_hash);
+
+ old_hash = lock_sys->prdt_hash;
+ lock_sys->prdt_hash = hash_create(n_cells);
+ HASH_MIGRATE(old_hash, lock_sys->prdt_hash, lock_t, hash,
+ lock_rec_lock_fold);
+ hash_table_free(old_hash);
+
+ old_hash = lock_sys->prdt_page_hash;
+ lock_sys->prdt_page_hash = hash_create(n_cells);
+ HASH_MIGRATE(old_hash, lock_sys->prdt_page_hash, lock_t, hash,
+ lock_rec_lock_fold);
+ hash_table_free(old_hash);
+
+ /* need to update block->lock_hash_val */
+ for (ulint i = 0; i < srv_buf_pool_instances; ++i) {
+ buf_pool_t* buf_pool = buf_pool_from_array(i);
+
+ buf_pool_mutex_enter(buf_pool);
+ buf_page_t* bpage;
+ bpage = UT_LIST_GET_FIRST(buf_pool->LRU);
+
+ while (bpage != NULL) {
+ if (buf_page_get_state(bpage)
+ == BUF_BLOCK_FILE_PAGE) {
+ buf_block_t* block;
+ block = reinterpret_cast<buf_block_t*>(
+ bpage);
+
+ block->lock_hash_val
+ = lock_rec_hash(
+ bpage->id.space(),
+ bpage->id.page_no());
}
- *mode = lock_mode;
+ bpage = UT_LIST_GET_NEXT(LRU, bpage);
}
+ buf_pool_mutex_exit(buf_pool);
}
- if (!src) {
- /* No source table lock found: flag the situation to caller */
- src = dest;
- }
-
-func_exit:
- trx_mutex_exit(trx);
- return(src);
+ lock_mutex_exit();
}
/*********************************************************************//**
-Determine if the given table is exclusively "owned" by the given
-transaction, i.e., transaction holds LOCK_IX and possibly LOCK_AUTO_INC
-on the table.
-@return TRUE if table is only locked by trx, with LOCK_IX, and
-possibly LOCK_AUTO_INC */
-UNIV_INTERN
-ibool
-lock_is_table_exclusive(
-/*====================*/
- const dict_table_t* table, /*!< in: table */
- const trx_t* trx) /*!< in: transaction */
+Closes the lock system at database shutdown. */
+void
+lock_sys_close(void)
+/*================*/
{
- const lock_t* lock;
- ibool ok = FALSE;
+ if (lock_latest_err_file != NULL) {
+ fclose(lock_latest_err_file);
+ lock_latest_err_file = NULL;
+ }
- ut_ad(table);
- ut_ad(trx);
+ hash_table_free(lock_sys->rec_hash);
+ hash_table_free(lock_sys->prdt_hash);
+ hash_table_free(lock_sys->prdt_page_hash);
- lock_mutex_enter();
+ os_event_destroy(lock_sys->timeout_event);
- for (lock = UT_LIST_GET_FIRST(table->locks);
- lock != NULL;
- lock = UT_LIST_GET_NEXT(locks, &lock->un_member.tab_lock)) {
- if (lock->trx != trx) {
- /* A lock on the table is held
- by some other transaction. */
- goto not_ok;
- }
+ mutex_destroy(&lock_sys->mutex);
+ mutex_destroy(&lock_sys->wait_mutex);
- if (!(lock_get_type_low(lock) & LOCK_TABLE)) {
- /* We are interested in table locks only. */
- continue;
- }
+ srv_slot_t* slot = lock_sys->waiting_threads;
- switch (lock_get_mode(lock)) {
- case LOCK_IX:
- ok = TRUE;
- break;
- case LOCK_AUTO_INC:
- /* It is allowed for trx to hold an
- auto_increment lock. */
- break;
- default:
-not_ok:
- /* Other table locks than LOCK_IX are not allowed. */
- ok = FALSE;
- goto func_exit;
+ for (ulint i = 0; i < OS_THREAD_MAX_N; i++, ++slot) {
+ if (slot->event != NULL) {
+ os_event_destroy(slot->event);
}
}
-func_exit:
- lock_mutex_exit();
+ ut_free(lock_sys);
- return(ok);
+ lock_sys = NULL;
}
/*********************************************************************//**
-Sets the wait flag of a lock and the back pointer in trx to lock. */
-UNIV_INLINE
-void
-lock_set_lock_and_trx_wait(
-/*=======================*/
- lock_t* lock, /*!< in: lock */
- trx_t* trx) /*!< in/out: trx */
+Gets the size of a lock struct.
+@return size in bytes */
+ulint
+lock_get_size(void)
+/*===============*/
{
- ut_ad(lock);
- ut_ad(lock->trx == trx);
- ut_ad(trx->lock.wait_lock == NULL);
- ut_ad(lock_mutex_own());
- ut_ad(trx_mutex_own(trx));
-
- trx->lock.wait_lock = lock;
- lock->type_mode |= LOCK_WAIT;
+ return((ulint) sizeof(lock_t));
}
-/**********************************************************************//**
-The back pointer to a waiting lock request in the transaction is set to NULL
-and the wait bit in lock type_mode is reset. */
-UNIV_INLINE
-void
-lock_reset_lock_and_trx_wait(
-/*=========================*/
- lock_t* lock) /*!< in/out: record lock */
+static inline void lock_grant_have_trx_mutex(lock_t* lock)
{
- ut_ad(lock_get_wait(lock));
- ut_ad(lock_mutex_own());
-
- if (lock->trx->lock.wait_lock &&
- lock->trx->lock.wait_lock != lock) {
- const char* stmt=NULL;
- const char* stmt2=NULL;
- size_t stmt_len;
- trx_id_t trx_id = 0;
- stmt = lock->trx->mysql_thd
- ? innobase_get_stmt(lock->trx->mysql_thd, &stmt_len)
- : NULL;
-
- if (lock->trx->lock.wait_lock &&
- lock->trx->lock.wait_lock->trx) {
- trx_id = lock->trx->lock.wait_lock->trx->id;
- stmt2 = lock->trx->lock.wait_lock->trx->mysql_thd
- ? innobase_get_stmt(
- lock->trx->lock.wait_lock
- ->trx->mysql_thd, &stmt_len)
- : NULL;
- }
-
- ib_logf(IB_LOG_LEVEL_INFO,
- "Trx id " TRX_ID_FMT
- " is waiting a lock "
- " for this trx id " TRX_ID_FMT
- " wait_lock %p",
- lock->trx->id,
- trx_id,
- lock->trx->lock.wait_lock);
-
- if (stmt) {
- ib_logf(IB_LOG_LEVEL_INFO, " SQL1: %s\n", stmt);
- }
-
- if (stmt2) {
- ib_logf(IB_LOG_LEVEL_INFO, " SQL2: %s\n", stmt2);
- }
-
- ut_ad(lock->trx->lock.wait_lock == lock);
- }
-
- lock->trx->lock.wait_lock = NULL;
- lock->type_mode &= ~LOCK_WAIT;
+ lock_reset_lock_and_trx_wait(lock);
+ lock_grant_after_reset(lock);
}
/*********************************************************************//**
Gets the gap flag of a record lock.
-@return LOCK_GAP or 0 */
+@return LOCK_GAP or 0 */
UNIV_INLINE
ulint
lock_rec_get_gap(
@@ -976,7 +615,7 @@ lock_rec_get_gap(
/*********************************************************************//**
Gets the LOCK_REC_NOT_GAP flag of a record lock.
-@return LOCK_REC_NOT_GAP or 0 */
+@return LOCK_REC_NOT_GAP or 0 */
UNIV_INLINE
ulint
lock_rec_get_rec_not_gap(
@@ -991,7 +630,7 @@ lock_rec_get_rec_not_gap(
/*********************************************************************//**
Gets the waiting insert flag of a record lock.
-@return LOCK_INSERT_INTENTION or 0 */
+@return LOCK_INSERT_INTENTION or 0 */
UNIV_INLINE
ulint
lock_rec_get_insert_intention(
@@ -1005,47 +644,14 @@ lock_rec_get_insert_intention(
}
/*********************************************************************//**
-Calculates if lock mode 1 is stronger or equal to lock mode 2.
-@return nonzero if mode1 stronger or equal to mode2 */
-UNIV_INLINE
-ulint
-lock_mode_stronger_or_eq(
-/*=====================*/
- enum lock_mode mode1, /*!< in: lock mode */
- enum lock_mode mode2) /*!< in: lock mode */
-{
- ut_ad((ulint) mode1 < lock_types);
- ut_ad((ulint) mode2 < lock_types);
-
- return(lock_strength_matrix[mode1][mode2]);
-}
-
-/*********************************************************************//**
-Calculates if lock mode 1 is compatible with lock mode 2.
-@return nonzero if mode1 compatible with mode2 */
-UNIV_INLINE
-ulint
-lock_mode_compatible(
-/*=================*/
- enum lock_mode mode1, /*!< in: lock mode */
- enum lock_mode mode2) /*!< in: lock mode */
-{
- ut_ad((ulint) mode1 < lock_types);
- ut_ad((ulint) mode2 < lock_types);
-
- return(lock_compatibility_matrix[mode1][mode2]);
-}
-
-/*********************************************************************//**
Checks if a lock request for a new lock has to wait for request lock2.
-@return TRUE if new lock has to wait for lock2 to be removed */
+@return TRUE if new lock has to wait for lock2 to be removed */
UNIV_INLINE
ibool
lock_rec_has_to_wait(
/*=================*/
-#ifdef WITH_WSREP
- ibool for_locking, /*!< is caller locking or releasing */
-#endif /* WITH_WSREP */
+ bool for_locking,
+ /*!< in is called locking or releasing */
const trx_t* trx, /*!< in: trx of new lock */
ulint type_mode,/*!< in: precise mode of the new lock
to set: LOCK_S or LOCK_X, possibly
@@ -1055,7 +661,8 @@ lock_rec_has_to_wait(
it is assumed that this has a lock bit
set on the same record as in the new
lock we are setting */
- ibool lock_is_on_supremum) /*!< in: TRUE if we are setting the
+ bool lock_is_on_supremum)
+ /*!< in: TRUE if we are setting the
lock on the 'supremum' record of an
index page: we know then that the lock
request is really for a 'gap' type lock */
@@ -1064,7 +671,7 @@ lock_rec_has_to_wait(
ut_ad(lock_get_type_low(lock2) == LOCK_REC);
if (trx != lock2->trx
- && !lock_mode_compatible(static_cast<enum lock_mode>(
+ && !lock_mode_compatible(static_cast<lock_mode>(
LOCK_MODE_MASK & type_mode),
lock_get_mode(lock2))) {
@@ -1147,61 +754,76 @@ lock_rec_has_to_wait(
thread, we need to look at trx ordering and lock types */
if (wsrep_thd_is_BF(trx->mysql_thd, FALSE) &&
wsrep_thd_is_BF(lock2->trx->mysql_thd, TRUE)) {
+ mtr_t mtr;
+
if (wsrep_debug) {
- fprintf(stderr,
- "BF-BF lock conflict, locking: %lu\n",
- for_locking);
- lock_rec_print(stderr, lock2, time(NULL));
+ ib::info() <<
+ "BF-BF lock conflict, locking: " << for_locking;
+ lock_rec_print(stderr, lock2, mtr);
+ ib::info() << " SQL1: "
+ << wsrep_thd_query(trx->mysql_thd);
+ ib::info() << " SQL2: "
+ << wsrep_thd_query(lock2->trx->mysql_thd);
}
if (wsrep_trx_order_before(trx->mysql_thd,
lock2->trx->mysql_thd) &&
(type_mode & LOCK_MODE_MASK) == LOCK_X &&
- (lock2->type_mode & LOCK_MODE_MASK) == LOCK_X)
- {
+ (lock2->type_mode & LOCK_MODE_MASK) == LOCK_X) {
if (for_locking || wsrep_debug) {
/* exclusive lock conflicts are not
accepted */
- fprintf(stderr,
+ ib::info() <<
"BF-BF X lock conflict,"
- "mode: %lu supremum: %lu\n",
- type_mode, lock_is_on_supremum);
- fprintf(stderr,
- "conflicts states: my %d locked %d\n",
- wsrep_thd_conflict_state(trx->mysql_thd, FALSE),
- wsrep_thd_conflict_state(lock2->trx->mysql_thd, FALSE) );
- lock_rec_print(stderr, lock2,
- time(NULL));
- if (for_locking) return FALSE;
- //abort();
+ "mode: " << type_mode <<
+ " supremum: " << lock_is_on_supremum;
+ ib::info() <<
+ "conflicts states: my "
+ << wsrep_thd_conflict_state(trx->mysql_thd, FALSE)
+ << " locked "
+ << wsrep_thd_conflict_state(lock2->trx->mysql_thd, FALSE);
+ lock_rec_print(stderr, lock2, mtr);
+ ib::info() << " SQL1: "
+ << wsrep_thd_query(trx->mysql_thd);
+ ib::info() << " SQL2: "
+ << wsrep_thd_query(lock2->trx->mysql_thd);
+
+ if (for_locking) {
+ return FALSE;
+ }
}
} else {
/* if lock2->index->n_uniq <=
lock2->index->n_user_defined_cols
operation is on uniq index
*/
- if (wsrep_debug) fprintf(stderr,
- "BF conflict, modes: %lu %lu, "
- "idx: %s-%s n_uniq %u n_user %u\n",
- type_mode, lock2->type_mode,
- lock2->index->name,
- lock2->index->table_name,
- lock2->index->n_uniq,
- lock2->index->n_user_defined_cols);
+ if (wsrep_debug) {
+ ib::info() <<
+ "BF conflict, modes: "
+ << type_mode << ":" << lock2->type_mode
+ << " idx: " << lock2->index->name()
+ << " table: " << lock2->index->table->name.m_name
+ << " n_uniq: " << lock2->index->n_uniq
+ << " n_user: " << lock2->index->n_user_defined_cols;
+ ib::info() << " SQL1: "
+ << wsrep_thd_query(trx->mysql_thd);
+ ib::info() << " SQL2: "
+ << wsrep_thd_query(lock2->trx->mysql_thd);
+ }
return FALSE;
}
}
#endif /* WITH_WSREP */
+
return(TRUE);
}
-
+
return(FALSE);
}
/*********************************************************************//**
Checks if a lock request lock1 has to wait for request lock2.
-@return TRUE if lock1 has to wait for lock2 to be removed */
-UNIV_INTERN
+@return TRUE if lock1 has to wait for lock2 to be removed */
ibool
lock_has_to_wait(
/*=============*/
@@ -1222,14 +844,17 @@ lock_has_to_wait(
/* If this lock request is for a supremum record
then the second bit on the lock bitmap is set */
-#ifdef WITH_WSREP
- return(lock_rec_has_to_wait(FALSE, lock1->trx,
-#else
- return(lock_rec_has_to_wait(lock1->trx,
-#endif /* WITH_WSREP */
- lock1->type_mode, lock2,
- lock_rec_get_nth_bit(
- lock1, 1)));
+ if (lock1->type_mode
+ & (LOCK_PREDICATE | LOCK_PRDT_PAGE)) {
+ return(lock_prdt_has_to_wait(
+ lock1->trx, lock1->type_mode,
+ lock_get_prdt_from_lock(lock1),
+ lock2));
+ } else {
+ return(lock_rec_has_to_wait(false,
+ lock1->trx, lock1->type_mode, lock2,
+ lock_rec_get_nth_bit(lock1, true)));
+ }
}
return(TRUE);
@@ -1240,54 +865,17 @@ lock_has_to_wait(
/*============== RECORD LOCK BASIC FUNCTIONS ============================*/
-/*********************************************************************//**
-Gets the number of bits in a record lock bitmap.
-@return number of bits */
-UNIV_INLINE
-ulint
-lock_rec_get_n_bits(
-/*================*/
- const lock_t* lock) /*!< in: record lock */
-{
- return(lock->un_member.rec_lock.n_bits);
-}
-
-/**********************************************************************//**
-Sets the nth bit of a record lock to TRUE. */
-UNIV_INLINE
-void
-lock_rec_set_nth_bit(
-/*=================*/
- lock_t* lock, /*!< in: record lock */
- ulint i) /*!< in: index of the bit */
-{
- ulint byte_index;
- ulint bit_index;
-
- ut_ad(lock);
- ut_ad(lock_get_type_low(lock) == LOCK_REC);
- ut_ad(i < lock->un_member.rec_lock.n_bits);
-
- byte_index = i / 8;
- bit_index = i % 8;
-
- ((byte*) &lock[1])[byte_index] |= 1 << bit_index;
-}
-
/**********************************************************************//**
Looks for a set bit in a record lock bitmap. Returns ULINT_UNDEFINED,
if none found.
@return bit index == heap number of the record, or ULINT_UNDEFINED if
none found */
-UNIV_INTERN
ulint
lock_rec_find_set_bit(
/*==================*/
const lock_t* lock) /*!< in: record lock with at least one bit set */
{
- ulint i;
-
- for (i = 0; i < lock_rec_get_n_bits(lock); i++) {
+ for (ulint i = 0; i < lock_rec_get_n_bits(lock); ++i) {
if (lock_rec_get_nth_bit(lock, i)) {
@@ -1298,112 +886,9 @@ lock_rec_find_set_bit(
return(ULINT_UNDEFINED);
}
-/**********************************************************************//**
-Resets the nth bit of a record lock. */
-UNIV_INLINE
-void
-lock_rec_reset_nth_bit(
-/*===================*/
- lock_t* lock, /*!< in: record lock */
- ulint i) /*!< in: index of the bit which must be set to TRUE
- when this function is called */
-{
- ulint byte_index;
- ulint bit_index;
-
- ut_ad(lock);
- ut_ad(lock_get_type_low(lock) == LOCK_REC);
- ut_ad(i < lock->un_member.rec_lock.n_bits);
-
- byte_index = i / 8;
- bit_index = i % 8;
-
- ((byte*) &lock[1])[byte_index] &= ~(1 << bit_index);
-}
-
-/*********************************************************************//**
-Gets the first or next record lock on a page.
-@return next lock, NULL if none exists */
-UNIV_INLINE
-const lock_t*
-lock_rec_get_next_on_page_const(
-/*============================*/
- const lock_t* lock) /*!< in: a record lock */
-{
- ulint space;
- ulint page_no;
-
- ut_ad(lock_mutex_own());
- ut_ad(lock_get_type_low(lock) == LOCK_REC);
-
- space = lock->un_member.rec_lock.space;
- page_no = lock->un_member.rec_lock.page_no;
-
- for (;;) {
- lock = static_cast<const lock_t*>(HASH_GET_NEXT(hash, lock));
-
- if (!lock) {
-
- break;
- }
-
- if ((lock->un_member.rec_lock.space == space)
- && (lock->un_member.rec_lock.page_no == page_no)) {
-
- break;
- }
- }
-
- return(lock);
-}
-
-/*********************************************************************//**
-Gets the first or next record lock on a page.
-@return next lock, NULL if none exists */
-UNIV_INLINE
-lock_t*
-lock_rec_get_next_on_page(
-/*======================*/
- lock_t* lock) /*!< in: a record lock */
-{
- return((lock_t*) lock_rec_get_next_on_page_const(lock));
-}
-
-/*********************************************************************//**
-Gets the first record lock on a page, where the page is identified by its
-file address.
-@return first lock, NULL if none exists */
-UNIV_INLINE
-lock_t*
-lock_rec_get_first_on_page_addr(
-/*============================*/
- ulint space, /*!< in: space */
- ulint page_no)/*!< in: page number */
-{
- lock_t* lock;
-
- ut_ad(lock_mutex_own());
-
- for (lock = static_cast<lock_t*>(
- HASH_GET_FIRST(lock_sys->rec_hash,
- lock_rec_hash(space, page_no)));
- lock != NULL;
- lock = static_cast<lock_t*>(HASH_GET_NEXT(hash, lock))) {
-
- if (lock->un_member.rec_lock.space == space
- && lock->un_member.rec_lock.page_no == page_no) {
-
- break;
- }
- }
-
- return(lock);
-}
-
/*********************************************************************//**
Determines if there are explicit record locks on a page.
-@return an explicit record lock on the page, or NULL if there are none */
-UNIV_INTERN
+@return an explicit record lock on the page, or NULL if there are none */
lock_t*
lock_rec_expl_exist_on_page(
/*========================*/
@@ -1413,104 +898,15 @@ lock_rec_expl_exist_on_page(
lock_t* lock;
lock_mutex_enter();
- lock = lock_rec_get_first_on_page_addr(space, page_no);
+ /* Only used in ibuf pages, so rec_hash is good enough */
+ lock = lock_rec_get_first_on_page_addr(lock_sys->rec_hash,
+ space, page_no);
lock_mutex_exit();
return(lock);
}
/*********************************************************************//**
-Gets the first record lock on a page, where the page is identified by a
-pointer to it.
-@return first lock, NULL if none exists */
-UNIV_INLINE
-lock_t*
-lock_rec_get_first_on_page(
-/*=======================*/
- const buf_block_t* block) /*!< in: buffer block */
-{
- ulint hash;
- lock_t* lock;
- ulint space = buf_block_get_space(block);
- ulint page_no = buf_block_get_page_no(block);
-
- ut_ad(lock_mutex_own());
-
- hash = buf_block_get_lock_hash_val(block);
-
- for (lock = static_cast<lock_t*>(
- HASH_GET_FIRST( lock_sys->rec_hash, hash));
- lock != NULL;
- lock = static_cast<lock_t*>(HASH_GET_NEXT(hash, lock))) {
-
- if ((lock->un_member.rec_lock.space == space)
- && (lock->un_member.rec_lock.page_no == page_no)) {
-
- break;
- }
- }
-
- return(lock);
-}
-
-/*********************************************************************//**
-Gets the next explicit lock request on a record.
-@return next lock, NULL if none exists or if heap_no == ULINT_UNDEFINED */
-UNIV_INLINE
-lock_t*
-lock_rec_get_next(
-/*==============*/
- ulint heap_no,/*!< in: heap number of the record */
- lock_t* lock) /*!< in: lock */
-{
- ut_ad(lock_mutex_own());
-
- do {
- ut_ad(lock_get_type_low(lock) == LOCK_REC);
- lock = lock_rec_get_next_on_page(lock);
- } while (lock && !lock_rec_get_nth_bit(lock, heap_no));
-
- return(lock);
-}
-
-/*********************************************************************//**
-Gets the next explicit lock request on a record.
-@return next lock, NULL if none exists or if heap_no == ULINT_UNDEFINED */
-UNIV_INLINE
-const lock_t*
-lock_rec_get_next_const(
-/*====================*/
- ulint heap_no,/*!< in: heap number of the record */
- const lock_t* lock) /*!< in: lock */
-{
- return(lock_rec_get_next(heap_no, (lock_t*) lock));
-}
-
-/*********************************************************************//**
-Gets the first explicit lock request on a record.
-@return first lock, NULL if none exists */
-UNIV_INLINE
-lock_t*
-lock_rec_get_first(
-/*===============*/
- const buf_block_t* block, /*!< in: block containing the record */
- ulint heap_no)/*!< in: heap number of the record */
-{
- lock_t* lock;
-
- ut_ad(lock_mutex_own());
-
- for (lock = lock_rec_get_first_on_page(block); lock;
- lock = lock_rec_get_next_on_page(lock)) {
- if (lock_rec_get_nth_bit(lock, heap_no)) {
- break;
- }
- }
-
- return(lock);
-}
-
-/*********************************************************************//**
Resets the record lock bitmap to zero. NOTE: does not touch the wait_lock
pointer in the transaction! This function is used in lock object creation
and resetting. */
@@ -1536,7 +932,7 @@ lock_rec_bitmap_reset(
/*********************************************************************//**
Copies a record lock to heap.
-@return copy of lock */
+@return copy of lock */
static
lock_t*
lock_rec_copy(
@@ -1555,18 +951,18 @@ lock_rec_copy(
/*********************************************************************//**
Gets the previous record lock set on a record.
-@return previous lock on the same record, NULL if none exists */
-UNIV_INTERN
+@return previous lock on the same record, NULL if none exists */
const lock_t*
lock_rec_get_prev(
/*==============*/
const lock_t* in_lock,/*!< in: record lock */
ulint heap_no)/*!< in: heap number of the record */
{
- lock_t* lock;
- ulint space;
- ulint page_no;
- lock_t* found_lock = NULL;
+ lock_t* lock;
+ ulint space;
+ ulint page_no;
+ lock_t* found_lock = NULL;
+ hash_table_t* hash;
ut_ad(lock_mutex_own());
ut_ad(lock_get_type_low(in_lock) == LOCK_REC);
@@ -1574,7 +970,9 @@ lock_rec_get_prev(
space = in_lock->un_member.rec_lock.space;
page_no = in_lock->un_member.rec_lock.page_no;
- for (lock = lock_rec_get_first_on_page_addr(space, page_no);
+ hash = lock_hash_get(in_lock->type_mode);
+
+ for (lock = lock_rec_get_first_on_page_addr(hash, space, page_no);
/* No op */;
lock = lock_rec_get_next_on_page(lock)) {
@@ -1592,63 +990,12 @@ lock_rec_get_prev(
}
}
-/*============= FUNCTIONS FOR ANALYZING TABLE LOCK QUEUE ================*/
-
-/*********************************************************************//**
-Checks if a transaction has the specified table lock, or stronger. This
-function should only be called by the thread that owns the transaction.
-@return lock or NULL */
-UNIV_INLINE
-const lock_t*
-lock_table_has(
-/*===========*/
- const trx_t* trx, /*!< in: transaction */
- const dict_table_t* table, /*!< in: table */
- enum lock_mode mode) /*!< in: lock mode */
-{
- lint i;
-
- if (ib_vector_is_empty(trx->lock.table_locks)) {
- return(NULL);
- }
-
- /* Look for stronger locks the same trx already has on the table */
-
- for (i = ib_vector_size(trx->lock.table_locks) - 1; i >= 0; --i) {
- const lock_t* lock;
- enum lock_mode lock_mode;
-
- lock = *static_cast<const lock_t**>(
- ib_vector_get(trx->lock.table_locks, i));
-
- if (lock == NULL) {
- continue;
- }
-
- lock_mode = lock_get_mode(lock);
-
- ut_ad(trx == lock->trx);
- ut_ad(lock_get_type_low(lock) & LOCK_TABLE);
- ut_ad(lock->un_member.tab_lock.table != NULL);
-
- if (table == lock->un_member.tab_lock.table
- && lock_mode_stronger_or_eq(lock_mode, mode)) {
-
- ut_ad(!lock_get_wait(lock));
-
- return(lock);
- }
- }
-
- return(NULL);
-}
-
/*============= FUNCTIONS FOR ANALYZING RECORD LOCK QUEUE ================*/
/*********************************************************************//**
Checks if a transaction has a GRANTED explicit lock on rec stronger or equal
to precise_mode.
-@return lock or NULL */
+@return lock or NULL */
UNIV_INLINE
lock_t*
lock_rec_has_expl(
@@ -1670,7 +1017,7 @@ lock_rec_has_expl(
|| (precise_mode & LOCK_MODE_MASK) == LOCK_X);
ut_ad(!(precise_mode & LOCK_INSERT_INTENTION));
- for (lock = lock_rec_get_first(block, heap_no);
+ for (lock = lock_rec_get_first(lock_sys->rec_hash, block, heap_no);
lock != NULL;
lock = lock_rec_get_next(heap_no, lock)) {
@@ -1678,7 +1025,7 @@ lock_rec_has_expl(
&& !lock_rec_get_insert_intention(lock)
&& lock_mode_stronger_or_eq(
lock_get_mode(lock),
- static_cast<enum lock_mode>(
+ static_cast<lock_mode>(
precise_mode & LOCK_MODE_MASK))
&& !lock_get_wait(lock)
&& (!lock_rec_get_rec_not_gap(lock)
@@ -1695,48 +1042,41 @@ lock_rec_has_expl(
return(NULL);
}
-#ifdef WITH_WSREP
-static
-void
-lock_rec_discard(lock_t* in_lock);
-#endif
#ifdef UNIV_DEBUG
/*********************************************************************//**
Checks if some other transaction has a lock request in the queue.
-@return lock or NULL */
+@return lock or NULL */
static
lock_t*
lock_rec_other_has_expl_req(
/*========================*/
- enum lock_mode mode, /*!< in: LOCK_S or LOCK_X */
- ulint gap, /*!< in: LOCK_GAP if also gap
- locks are taken into account,
- or 0 if not */
- ulint wait, /*!< in: LOCK_WAIT if also
- waiting locks are taken into
- account, or 0 if not */
+ lock_mode mode, /*!< in: LOCK_S or LOCK_X */
const buf_block_t* block, /*!< in: buffer block containing
the record */
+ bool wait, /*!< in: whether also waiting locks
+ are taken into account */
ulint heap_no,/*!< in: heap number of the record */
const trx_t* trx) /*!< in: transaction, or NULL if
requests by all transactions
are taken into account */
{
- lock_t* lock;
ut_ad(lock_mutex_own());
ut_ad(mode == LOCK_X || mode == LOCK_S);
- ut_ad(gap == 0 || gap == LOCK_GAP);
- ut_ad(wait == 0 || wait == LOCK_WAIT);
- for (lock = lock_rec_get_first(block, heap_no);
+ /* Only GAP lock can be on SUPREMUM, and we are not looking for
+ GAP lock */
+ if (heap_no == PAGE_HEAP_NO_SUPREMUM) {
+ return(NULL);
+ }
+
+ for (lock_t* lock = lock_rec_get_first(lock_sys->rec_hash,
+ block, heap_no);
lock != NULL;
lock = lock_rec_get_next(heap_no, lock)) {
if (lock->trx != trx
- && (gap
- || !(lock_rec_get_gap(lock)
- || heap_no == PAGE_HEAP_NO_SUPREMUM))
+ && !lock_rec_get_gap(lock)
&& (wait || !lock_get_wait(lock))
&& lock_mode_stronger_or_eq(lock_get_mode(lock), mode)) {
@@ -1752,17 +1092,19 @@ lock_rec_other_has_expl_req(
static
void
wsrep_kill_victim(
+/*==============*/
const trx_t * const trx,
const lock_t *lock)
{
- ut_ad(lock_mutex_own());
- ut_ad(trx_mutex_own(lock->trx));
+ ut_ad(lock_mutex_own());
+ ut_ad(trx_mutex_own(lock->trx));
/* quit for native mysql */
if (!trx->is_wsrep()) return;
my_bool bf_this = wsrep_thd_is_BF(trx->mysql_thd, FALSE);
my_bool bf_other = wsrep_thd_is_BF(lock->trx->mysql_thd, TRUE);
+ mtr_t mtr;
if ((bf_this && !bf_other) ||
(bf_this && bf_other && wsrep_trx_order_before(
@@ -1770,60 +1112,57 @@ wsrep_kill_victim(
if (lock->trx->lock.que_state == TRX_QUE_LOCK_WAIT) {
if (wsrep_debug) {
- fprintf(stderr, "WSREP: BF victim waiting\n");
+ ib::info() << "WSREP: BF victim waiting\n";
}
/* cannot release lock, until our lock
is in the queue*/
} else if (lock->trx != trx) {
if (wsrep_log_conflicts) {
if (bf_this) {
- fputs("\n*** Priority TRANSACTION:\n",
- stderr);
+ ib::info() << "*** Priority TRANSACTION:";
} else {
- fputs("\n*** Victim TRANSACTION:\n",
- stderr);
+ ib::info() << "*** Victim TRANSACTION:";
}
wsrep_trx_print_locking(stderr, trx, 3000);
if (bf_other) {
- fputs("\n*** Priority TRANSACTION:\n",
- stderr);
+ ib::info() << "*** Priority TRANSACTION:";
} else {
- fputs("\n*** Victim TRANSACTION:\n",
- stderr);
+ ib::info() << "*** Victim TRANSACTION:";
}
wsrep_trx_print_locking(stderr, lock->trx, 3000);
- fputs("*** WAITING FOR THIS LOCK TO BE GRANTED:\n",
- stderr);
-
- time_t now = time(NULL);
+ ib::info() << "*** WAITING FOR THIS LOCK TO BE GRANTED:";
if (lock_get_type(lock) == LOCK_REC) {
- lock_rec_print(stderr, lock, now);
+ lock_rec_print(stderr, lock, mtr);
} else {
- lock_table_print(stderr, lock, now);
+ lock_table_print(stderr, lock);
}
+
+ ib::info() << " SQL1: "
+ << wsrep_thd_query(trx->mysql_thd);
+ ib::info() << " SQL2: "
+ << wsrep_thd_query(lock->trx->mysql_thd);
}
- lock->trx->abort_type = TRX_WSREP_ABORT;
wsrep_innobase_kill_one_trx(trx->mysql_thd,
- (const trx_t*) trx, lock->trx, TRUE);
- lock->trx->abort_type = TRX_SERVER_ABORT;
+ trx, lock->trx, TRUE);
}
}
}
-#endif
+#endif /* WITH_WSREP */
+
/*********************************************************************//**
Checks if some other transaction has a conflicting explicit lock request
in the queue, so that we have to wait.
-@return lock or NULL */
+@return lock or NULL */
static
lock_t*
lock_rec_other_has_conflicting(
/*===========================*/
- enum lock_mode mode, /*!< in: LOCK_S or LOCK_X,
+ ulint mode, /*!< in: LOCK_S or LOCK_X,
possibly ORed to LOCK_GAP or
LOC_REC_NOT_GAP,
LOCK_INSERT_INTENTION */
@@ -1833,18 +1172,17 @@ lock_rec_other_has_conflicting(
const trx_t* trx) /*!< in: our transaction */
{
lock_t* lock;
- ibool is_supremum;
ut_ad(lock_mutex_own());
- is_supremum = (heap_no == PAGE_HEAP_NO_SUPREMUM);
+ bool is_supremum = (heap_no == PAGE_HEAP_NO_SUPREMUM);
- for (lock = lock_rec_get_first(block, heap_no);
+ for (lock = lock_rec_get_first(lock_sys->rec_hash, block, heap_no);
lock != NULL;
lock = lock_rec_get_next(heap_no, lock)) {
+ if (lock_rec_has_to_wait(true, trx, mode, lock, is_supremum)) {
#ifdef WITH_WSREP
- if (lock_rec_has_to_wait(TRUE, trx, mode, lock, is_supremum)) {
if (trx->is_wsrep()) {
trx_mutex_enter(lock->trx);
/* Below function will roll back either trx
@@ -1853,41 +1191,7 @@ lock_rec_other_has_conflicting(
wsrep_kill_victim(const_cast<trx_t*>(trx), lock);
trx_mutex_exit(lock->trx);
}
-#else
- if (lock_rec_has_to_wait(trx, mode, lock, is_supremum)) {
#endif /* WITH_WSREP */
-
- return(lock);
- }
- }
-
- return(NULL);
-}
-
-/*********************************************************************//**
-Looks for a suitable type record lock struct by the same trx on the same page.
-This can be used to save space when a new record lock should be set on a page:
-no new struct is needed, if a suitable old is found.
-@return lock or NULL */
-UNIV_INLINE
-lock_t*
-lock_rec_find_similar_on_page(
-/*==========================*/
- ulint type_mode, /*!< in: lock type_mode field */
- ulint heap_no, /*!< in: heap number of the record */
- lock_t* lock, /*!< in: lock_rec_get_first_on_page() */
- const trx_t* trx) /*!< in: transaction */
-{
- ut_ad(lock_mutex_own());
-
- for (/* No op */;
- lock != NULL;
- lock = lock_rec_get_next_on_page(lock)) {
-
- if (lock->trx == trx
- && lock->type_mode == type_mode
- && lock_rec_get_n_bits(lock) > heap_no) {
-
return(lock);
}
}
@@ -1898,24 +1202,24 @@ lock_rec_find_similar_on_page(
/*********************************************************************//**
Checks if some transaction has an implicit x-lock on a record in a secondary
index.
-@return transaction id of the transaction which has the x-lock, or 0;
+@return transaction id of the transaction which has the x-lock, or 0;
NOTE that this function can return false positives but never false
negatives. The caller must confirm all positive results by calling
trx_is_active(). */
static
-trx_id_t
+trx_t*
lock_sec_rec_some_has_impl(
/*=======================*/
const rec_t* rec, /*!< in: user record */
dict_index_t* index, /*!< in: secondary index */
- const ulint* offsets)/*!< in: rec_get_offsets(rec, index) */
+ const offset_t* offsets)/*!< in: rec_get_offsets(rec, index) */
{
- trx_id_t trx_id;
+ trx_t* trx;
trx_id_t max_trx_id;
const page_t* page = page_align(rec);
ut_ad(!lock_mutex_own());
- ut_ad(!mutex_own(&trx_sys->mutex));
+ ut_ad(!trx_sys_mutex_own());
ut_ad(!dict_index_is_clust(index));
ut_ad(page_rec_is_user_rec(rec));
ut_ad(rec_offs_validate(rec, index, offsets));
@@ -1924,29 +1228,25 @@ lock_sec_rec_some_has_impl(
/* Some transaction may have an implicit x-lock on the record only
if the max trx id for the page >= min trx id for the trx list, or
- database recovery is running. We do not write the changes of a page
- max trx id to the log, and therefore during recovery, this value
- for a page may be incorrect. */
+ database recovery is running. */
if (max_trx_id < trx_rw_min_trx_id() && !recv_recovery_is_on()) {
- trx_id = 0;
+ trx = 0;
} else if (!lock_check_trx_id_sanity(max_trx_id, rec, index, offsets)) {
- buf_page_print(page, 0);
-
/* The page is corrupt: try to avoid a crash by returning 0 */
- trx_id = 0;
+ trx = 0;
/* In this case it is possible that some transaction has an implicit
x-lock. We have to look in the clustered index. */
} else {
- trx_id = row_vers_impl_x_locked(rec, index, offsets);
+ trx = row_vers_impl_x_locked(rec, index, offsets);
}
- return(trx_id);
+ return(trx);
}
#ifdef UNIV_DEBUG
@@ -1962,7 +1262,7 @@ lock_rec_other_trx_holds_expl(
ulint precise_mode, /*!< in: LOCK_S or LOCK_X
possibly ORed to LOCK_GAP or
LOCK_REC_NOT_GAP. */
- trx_id_t trx_id, /*!< in: trx holding implicit
+ trx_t* trx, /*!< in: trx holding implicit
lock on rec */
const rec_t* rec, /*!< in: user record */
const buf_block_t* block) /*!< in: buffer block
@@ -1971,30 +1271,31 @@ lock_rec_other_trx_holds_expl(
trx_t* holds = NULL;
lock_mutex_enter();
+ mutex_enter(&trx_sys->mutex);
+ trx_mutex_enter(trx);
- if (trx_t *impl_trx = trx_rw_is_active(trx_id, NULL)) {
- ulint heap_no = page_rec_get_heap_no(rec);
- mutex_enter(&trx_sys->mutex);
+ ut_ad(!trx_state_eq(trx, TRX_STATE_NOT_STARTED));
+ if (!trx_state_eq(trx, TRX_STATE_COMMITTED_IN_MEMORY)) {
+ const ulint heap_no = page_rec_get_heap_no(rec);
for (trx_t* t = UT_LIST_GET_FIRST(trx_sys->rw_trx_list);
t != NULL;
t = UT_LIST_GET_NEXT(trx_list, t)) {
- lock_t *expl_lock = lock_rec_has_expl(
+ lock_t* expl_lock = lock_rec_has_expl(
precise_mode, block, heap_no, t);
-
- if (expl_lock && expl_lock->trx != impl_trx) {
+ if (expl_lock && expl_lock->trx != trx) {
/* An explicit lock is held by trx other than
the trx holding the implicit lock. */
holds = expl_lock->trx;
break;
}
}
-
- mutex_exit(&trx_sys->mutex);
- }
+ }
lock_mutex_exit();
+ mutex_exit(&trx_sys->mutex);
+ trx_mutex_exit(trx);
return(holds);
}
@@ -2005,14 +1306,26 @@ Return approximate number or record locks (bits set in the bitmap) for
this transaction. Since delete-marked records may be removed, the
record count will not be precise.
The caller must be holding lock_sys->mutex. */
-UNIV_INTERN
ulint
lock_number_of_rows_locked(
/*=======================*/
const trx_lock_t* trx_lock) /*!< in: transaction locks */
{
+ ut_ad(lock_mutex_own());
+
+ return(trx_lock->n_rec_locks);
+}
+
+/*********************************************************************//**
+Return the number of table locks for a transaction.
+The caller must be holding lock_sys->mutex. */
+ulint
+lock_number_of_tables_locked(
+/*=========================*/
+ const trx_lock_t* trx_lock) /*!< in: transaction locks */
+{
const lock_t* lock;
- ulint n_records = 0;
+ ulint n_tables = 0;
ut_ad(lock_mutex_own());
@@ -2020,19 +1333,12 @@ lock_number_of_rows_locked(
lock != NULL;
lock = UT_LIST_GET_NEXT(trx_locks, lock)) {
- if (lock_get_type_low(lock) == LOCK_REC) {
- ulint n_bit;
- ulint n_bits = lock_rec_get_n_bits(lock);
-
- for (n_bit = 0; n_bit < n_bits; n_bit++) {
- if (lock_rec_get_nth_bit(lock, n_bit)) {
- n_records++;
- }
- }
+ if (lock_get_type_low(lock) == LOCK_TABLE) {
+ n_tables++;
}
}
- return(n_records);
+ return(n_tables);
}
/*============== RECORD LOCK CREATION AND QUEUE MANAGEMENT =============*/
@@ -2041,98 +1347,74 @@ lock_number_of_rows_locked(
static
void
wsrep_print_wait_locks(
-/*============*/
+/*===================*/
lock_t* c_lock) /* conflicting lock to print */
{
if (wsrep_debug && c_lock->trx->lock.wait_lock != c_lock) {
- fprintf(stderr, "WSREP: c_lock != wait lock\n");
- time_t now = time(NULL);
+ mtr_t mtr;
+ ib::info() << "WSREP: c_lock != wait lock";
+ ib::info() << " SQL: "
+ << wsrep_thd_query(c_lock->trx->mysql_thd);
if (lock_get_type_low(c_lock) & LOCK_TABLE) {
- lock_table_print(stderr, c_lock, now);
+ lock_table_print(stderr, c_lock);
} else {
- lock_rec_print(stderr, c_lock, now);
+ lock_rec_print(stderr, c_lock, mtr);
}
if (lock_get_type_low(c_lock->trx->lock.wait_lock) & LOCK_TABLE) {
- lock_table_print(stderr, c_lock->trx->lock.wait_lock,
- now);
+ lock_table_print(stderr, c_lock->trx->lock.wait_lock);
} else {
lock_rec_print(stderr, c_lock->trx->lock.wait_lock,
- now);
+ mtr);
}
}
}
#endif /* WITH_WSREP */
-static
-void
-lock_rec_insert_to_head(
- lock_t *in_lock, /*!< in: lock to be insert */
- ulint rec_fold) /*!< in: rec_fold of the page */
-{
- hash_cell_t* cell;
- lock_t* node;
-
- if (in_lock == NULL) {
- return;
- }
-
- cell = hash_get_nth_cell(lock_sys->rec_hash,
- hash_calc_hash(rec_fold, lock_sys->rec_hash));
- node = (lock_t *) cell->node;
- if (node != in_lock) {
- cell->node = in_lock;
- in_lock->hash = node;
- }
-}
-
-/*********************************************************************//**
-Creates a new record lock and inserts it to the lock queue. Does NOT check
-for deadlocks or lock compatibility!
-@return created lock */
-static
+/** Create a new record lock and inserts it to the lock queue,
+without checking for deadlocks or conflicts.
+@param[in] type_mode lock mode and wait flag; type will be replaced
+ with LOCK_REC
+@param[in] space tablespace id
+@param[in] page_no index page number
+@param[in] page R-tree index page, or NULL
+@param[in] heap_no record heap number in the index page
+@param[in] index the index tree
+@param[in,out] trx transaction
+@param[in] holds_trx_mutex whether the caller holds trx->mutex
+@return created lock */
lock_t*
-lock_rec_create(
-/*============*/
+lock_rec_create_low(
#ifdef WITH_WSREP
- lock_t* const c_lock, /* conflicting lock */
- que_thr_t* thr,
+ lock_t* c_lock, /*!< conflicting lock */
+ que_thr_t* thr, /*!< thread owning trx */
#endif
- ulint type_mode,/*!< in: lock mode and wait
- flag, type is ignored and
- replaced by LOCK_REC */
- const buf_block_t* block, /*!< in: buffer block containing
- the record */
- ulint heap_no,/*!< in: heap number of the record */
- dict_index_t* index, /*!< in: index of record */
- trx_t* trx, /*!< in/out: transaction */
- ibool caller_owns_trx_mutex)
- /*!< in: TRUE if caller owns
- trx mutex */
+ ulint type_mode,
+ ulint space,
+ ulint page_no,
+ const page_t* page,
+ ulint heap_no,
+ dict_index_t* index,
+ trx_t* trx,
+ bool holds_trx_mutex)
{
lock_t* lock;
- ulint page_no;
- ulint space;
- ulint rec_fold;
ulint n_bits;
ulint n_bytes;
- bool wait_lock;
- const page_t* page;
ut_ad(lock_mutex_own());
- ut_ad(caller_owns_trx_mutex == trx_mutex_own(trx));
+ ut_ad(holds_trx_mutex == trx_mutex_own(trx));
ut_ad(dict_index_is_clust(index) || !dict_index_is_online_ddl(index));
+#ifdef UNIV_DEBUG
/* Non-locking autocommit read-only transactions should not set
- any locks. */
- assert_trx_in_list(trx);
-
- space = buf_block_get_space(block);
- page_no = buf_block_get_page_no(block);
- page = block->frame;
-
- btr_assert_not_corrupted(block, index);
+ any locks. See comment in trx_set_rw_mode explaining why this
+ conditional check is required in debug code. */
+ if (holds_trx_mutex) {
+ check_trx_state(trx);
+ }
+#endif /* UNIV_DEBUG */
/* If rec is the supremum record, then we reset the gap and
LOCK_REC_NOT_GAP bits, as all locks on the supremum are
@@ -2140,55 +1422,68 @@ lock_rec_create(
if (UNIV_UNLIKELY(heap_no == PAGE_HEAP_NO_SUPREMUM)) {
ut_ad(!(type_mode & LOCK_REC_NOT_GAP));
-
type_mode = type_mode & ~(LOCK_GAP | LOCK_REC_NOT_GAP);
}
- wait_lock = type_mode & LOCK_WAIT;
+ if (UNIV_LIKELY(!(type_mode & (LOCK_PREDICATE | LOCK_PRDT_PAGE)))) {
+ /* Make lock bitmap bigger by a safety margin */
+ n_bits = page_dir_get_n_heap(page) + LOCK_PAGE_BITMAP_MARGIN;
+ n_bytes = 1 + n_bits / 8;
+ } else {
+ ut_ad(heap_no == PRDT_HEAPNO);
- /* Make lock bitmap bigger by a safety margin */
- n_bits = page_dir_get_n_heap(page) + LOCK_PAGE_BITMAP_MARGIN;
- n_bytes = 1 + n_bits / 8;
+ /* The lock is always on PAGE_HEAP_NO_INFIMUM (0), so
+ we only need 1 bit (which round up to 1 byte) for
+ lock bit setting */
+ n_bytes = 1;
- lock = static_cast<lock_t*>(
- mem_heap_alloc(trx->lock.lock_heap, sizeof(lock_t) + n_bytes));
+ if (type_mode & LOCK_PREDICATE) {
+ ulint tmp = UNIV_WORD_SIZE - 1;
- lock->trx = trx;
+ /* We will attach predicate structure after lock.
+ Make sure the memory is aligned on 8 bytes,
+ the mem_heap_alloc will align it with
+ MEM_SPACE_NEEDED anyway. */
+ n_bytes = (n_bytes + sizeof(lock_prdt_t) + tmp) & ~tmp;
+ ut_ad(n_bytes == sizeof(lock_prdt_t) + UNIV_WORD_SIZE);
+ }
+ }
+
+ if (trx->lock.rec_cached >= UT_ARR_SIZE(trx->lock.rec_pool)
+ || sizeof *lock + n_bytes > sizeof *trx->lock.rec_pool) {
+ lock = static_cast<lock_t*>(
+ mem_heap_alloc(trx->lock.lock_heap,
+ sizeof *lock + n_bytes));
+ } else {
+ lock = &trx->lock.rec_pool[trx->lock.rec_cached++].lock;
+ }
+ lock->trx = trx;
lock->type_mode = (type_mode & ~LOCK_TYPE_MASK) | LOCK_REC;
lock->index = index;
+ lock->un_member.rec_lock.space = uint32_t(space);
+ lock->un_member.rec_lock.page_no = uint32_t(page_no);
- lock->un_member.rec_lock.space = space;
- lock->un_member.rec_lock.page_no = page_no;
- lock->un_member.rec_lock.n_bits = n_bytes * 8;
- rec_fold = lock_rec_fold(space, page_no);
-
- /* Reset to zero the bitmap which resides immediately after the
- lock struct */
-
+ if (UNIV_LIKELY(!(type_mode & (LOCK_PREDICATE | LOCK_PRDT_PAGE)))) {
+ lock->un_member.rec_lock.n_bits = uint32_t(n_bytes * 8);
+ } else {
+ /* Predicate lock always on INFIMUM (0) */
+ lock->un_member.rec_lock.n_bits = 8;
+ }
lock_rec_bitmap_reset(lock);
-
- /* Set the bit corresponding to rec */
lock_rec_set_nth_bit(lock, heap_no);
-
- lock->requested_time = time(NULL);
- lock->wait_time = 0;
-
index->table->n_rec_locks++;
-
- ut_ad(index->table->n_ref_count > 0 || !index->table->can_be_evicted);
+ ut_ad(index->table->get_ref_count() > 0 || !index->table->can_be_evicted);
#ifdef WITH_WSREP
- if (c_lock && trx->is_wsrep() &&
- wsrep_thd_is_BF(trx->mysql_thd, FALSE)) {
+ if (c_lock && trx->is_wsrep()
+ && wsrep_thd_is_BF(trx->mysql_thd, FALSE)) {
lock_t *hash = (lock_t *)c_lock->hash;
lock_t *prev = NULL;
- while (hash &&
- wsrep_thd_is_BF(((lock_t *)hash)->trx->mysql_thd, TRUE) &&
- wsrep_trx_order_before(
- ((lock_t *)hash)->trx->mysql_thd,
- trx->mysql_thd)) {
+ while (hash && wsrep_thd_is_BF(hash->trx->mysql_thd, TRUE)
+ && wsrep_trx_order_before(hash->trx->mysql_thd,
+ trx->mysql_thd)) {
prev = hash;
hash = (lock_t *)hash->hash;
}
@@ -2213,9 +1508,8 @@ lock_rec_create(
trx->lock.que_state = TRX_QUE_LOCK_WAIT;
lock_set_lock_and_trx_wait(lock, trx);
- UT_LIST_ADD_LAST(trx_locks, trx->lock.trx_locks, lock);
+ UT_LIST_ADD_LAST(trx->lock.trx_locks, lock);
- ut_ad(thr != NULL);
trx->lock.wait_thr = thr;
thr->state = QUE_THR_LOCK_WAIT;
@@ -2223,70 +1517,58 @@ lock_rec_create(
victim lock release. This will eventually call
lock_grant, which wants to grant trx mutex again
*/
- if (caller_owns_trx_mutex) {
+ if (holds_trx_mutex) {
trx_mutex_exit(trx);
}
lock_cancel_waiting_and_release(
c_lock->trx->lock.wait_lock);
- if (caller_owns_trx_mutex) {
+ if (holds_trx_mutex) {
trx_mutex_enter(trx);
}
trx_mutex_exit(c_lock->trx);
if (wsrep_debug) {
- fprintf(
- stderr,
- "WSREP: c_lock canceled " TRX_ID_FMT "\n",
- c_lock->trx->id);
+ ib::info() << "WSREP: c_lock canceled "
+ << ib::hex(c_lock->trx->id)
+ << " SQL: "
+ << wsrep_thd_query(
+ c_lock->trx->mysql_thd);
}
/* have to bail out here to avoid lock_set_lock... */
return(lock);
}
trx_mutex_exit(c_lock->trx);
- } else if (innodb_lock_schedule_algorithm == INNODB_LOCK_SCHEDULE_ALGORITHM_VATS
- && !thd_is_replication_slave_thread(lock->trx->mysql_thd)) {
- if (wait_lock) {
- HASH_INSERT(lock_t, hash, lock_sys->rec_hash, rec_fold, lock);
- } else {
- lock_rec_insert_to_head(lock, rec_fold);
- }
- } else {
- HASH_INSERT(lock_t, hash, lock_sys->rec_hash, rec_fold, lock);
- }
-#else
- if (innodb_lock_schedule_algorithm == INNODB_LOCK_SCHEDULE_ALGORITHM_VATS
- && !thd_is_replication_slave_thread(lock->trx->mysql_thd)) {
- if (wait_lock) {
- HASH_INSERT(lock_t, hash, lock_sys->rec_hash, rec_fold, lock);
- } else {
- lock_rec_insert_to_head(lock, rec_fold);
- }
+ } else
+#endif /* WITH_WSREP */
+ if (!(type_mode & (LOCK_WAIT | LOCK_PREDICATE | LOCK_PRDT_PAGE))
+ && innodb_lock_schedule_algorithm
+ == INNODB_LOCK_SCHEDULE_ALGORITHM_VATS
+ && !thd_is_replication_slave_thread(trx->mysql_thd)) {
+ HASH_PREPEND(lock_t, hash, lock_sys->rec_hash,
+ lock_rec_fold(space, page_no), lock);
} else {
- HASH_INSERT(lock_t, hash, lock_sys->rec_hash, rec_fold, lock);
+ HASH_INSERT(lock_t, hash, lock_hash_get(type_mode),
+ lock_rec_fold(space, page_no), lock);
}
-#endif /* WITH_WSREP */
- if (!caller_owns_trx_mutex) {
+ if (!holds_trx_mutex) {
trx_mutex_enter(trx);
}
ut_ad(trx_mutex_own(trx));
-
if (type_mode & LOCK_WAIT) {
lock_set_lock_and_trx_wait(lock, trx);
}
-
- UT_LIST_ADD_LAST(trx_locks, trx->lock.trx_locks, lock);
-
- if (!caller_owns_trx_mutex) {
+ UT_LIST_ADD_LAST(trx->lock.trx_locks, lock);
+ if (!holds_trx_mutex) {
trx_mutex_exit(trx);
}
-
MONITOR_INC(MONITOR_RECLOCK_CREATED);
MONITOR_INC(MONITOR_NUM_RECLOCK);
- return(lock);
+
+ return lock;
}
/*********************************************************************//**
@@ -2294,6 +1576,7 @@ Check if lock1 has higher priority than lock2.
NULL has lowest priority.
If neither of them is wait lock, the first one has higher priority.
If only one of them is a wait lock, it has lower priority.
+If either is a high priority transaction, the lock has higher priority.
Otherwise, the one with an older transaction has higher priority.
@returns true if lock1 has higher priority, false otherwise. */
static bool has_higher_priority(lock_t *lock1, lock_t *lock2)
@@ -2302,13 +1585,13 @@ static bool has_higher_priority(lock_t *lock1, lock_t *lock2)
return false;
} else if (lock2 == NULL) {
return true;
- }
- // No preference. Compre them by wait mode and trx age.
- if (!lock_get_wait(lock1)) {
- return true;
- } else if (!lock_get_wait(lock2)) {
- return false;
- }
+ }
+ // Granted locks has higher priority.
+ if (!lock_get_wait(lock1)) {
+ return true;
+ } else if (!lock_get_wait(lock2)) {
+ return false;
+ }
return lock1->trx->start_time_micro <= lock2->trx->start_time_micro;
}
@@ -2327,13 +1610,15 @@ lock_rec_insert_by_trx_age(
ulint rec_fold;
lock_t* node;
lock_t* next;
+ hash_table_t* hash;
hash_cell_t* cell;
space = in_lock->un_member.rec_lock.space;
page_no = in_lock->un_member.rec_lock.page_no;
rec_fold = lock_rec_fold(space, page_no);
- cell = hash_get_nth_cell(lock_sys->rec_hash,
- hash_calc_hash(rec_fold, lock_sys->rec_hash));
+ hash = lock_hash_get(in_lock->type_mode);
+ cell = hash_get_nth_cell(hash,
+ hash_calc_hash(rec_fold, hash));
node = (lock_t *) cell->node;
// If in_lock is not a wait lock, we insert it to the head of the list.
@@ -2341,7 +1626,7 @@ lock_rec_insert_by_trx_age(
cell->node = in_lock;
in_lock->hash = node;
if (lock_get_wait(in_lock)) {
- lock_grant(in_lock, true);
+ lock_grant_have_trx_mutex(in_lock);
return DB_SUCCESS_LOCKED_REC;
}
return DB_SUCCESS;
@@ -2355,7 +1640,7 @@ lock_rec_insert_by_trx_age(
in_lock->hash = next;
if (lock_get_wait(in_lock) && !lock_rec_has_to_wait_in_queue(in_lock)) {
- lock_grant(in_lock, true);
+ lock_grant_have_trx_mutex(in_lock);
if (cell->node != in_lock) {
// Move it to the front of the queue
node->hash = in_lock->hash;
@@ -2378,6 +1663,7 @@ lock_queue_validate(
ulint space;
ulint page_no;
ulint rec_fold;
+ hash_table_t* hash;
hash_cell_t* cell;
lock_t* next;
bool wait_lock = false;
@@ -2389,8 +1675,9 @@ lock_queue_validate(
space = in_lock->un_member.rec_lock.space;
page_no = in_lock->un_member.rec_lock.page_no;
rec_fold = lock_rec_fold(space, page_no);
- cell = hash_get_nth_cell(lock_sys->rec_hash,
- hash_calc_hash(rec_fold, lock_sys->rec_hash));
+ hash = lock_hash_get(in_lock->type_mode);
+ cell = hash_get_nth_cell(hash,
+ hash_calc_hash(rec_fold, hash));
next = (lock_t *) cell->node;
while (next != NULL) {
// If this is a granted lock, check that there's no wait lock before it.
@@ -2399,171 +1686,148 @@ lock_queue_validate(
} else {
wait_lock = true;
}
- next = (lock_t *) next->hash;
+ next = next->hash;
}
return true;
}
#endif /* UNIV_DEBUG */
-/*********************************************************************//**
-Enqueues a waiting request for a lock which cannot be granted immediately.
-Checks for deadlocks.
-@return DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED, or
-DB_SUCCESS_LOCKED_REC; DB_SUCCESS_LOCKED_REC means that
-there was a deadlock, but another transaction was chosen as a victim,
-and we got the lock immediately: no need to wait then */
static
+void
+lock_rec_insert_to_head(
+ lock_t *in_lock, /*!< in: lock to be insert */
+ ulint rec_fold) /*!< in: rec_fold of the page */
+{
+ hash_table_t* hash;
+ hash_cell_t* cell;
+ lock_t* node;
+
+ if (in_lock == NULL) {
+ return;
+ }
+
+ hash = lock_hash_get(in_lock->type_mode);
+ cell = hash_get_nth_cell(hash,
+ hash_calc_hash(rec_fold, hash));
+ node = (lock_t *) cell->node;
+ if (node != in_lock) {
+ cell->node = in_lock;
+ in_lock->hash = node;
+ }
+}
+
+/** Enqueue a waiting request for a lock which cannot be granted immediately.
+Check for deadlocks.
+@param[in] type_mode the requested lock mode (LOCK_S or LOCK_X)
+ possibly ORed with LOCK_GAP or
+ LOCK_REC_NOT_GAP, ORed with
+ LOCK_INSERT_INTENTION if this
+ waiting lock request is set
+ when performing an insert of
+ an index record
+@param[in] block leaf page in the index
+@param[in] heap_no record heap number in the block
+@param[in] index index tree
+@param[in,out] thr query thread
+@param[in] prdt minimum bounding box (spatial index)
+@retval DB_LOCK_WAIT if the waiting lock was enqueued
+@retval DB_DEADLOCK if this transaction was chosen as the victim
+@retval DB_SUCCESS_LOCKED_REC if the other transaction was chosen as a victim
+ (or it happened to commit) */
dberr_t
lock_rec_enqueue_waiting(
-/*=====================*/
#ifdef WITH_WSREP
- lock_t* c_lock, /* conflicting lock */
+ lock_t* c_lock, /*!< conflicting lock */
#endif
- ulint type_mode,/*!< in: lock mode this
- transaction is requesting:
- LOCK_S or LOCK_X, possibly
- ORed with LOCK_GAP or
- LOCK_REC_NOT_GAP, ORed with
- LOCK_INSERT_INTENTION if this
- waiting lock request is set
- when performing an insert of
- an index record */
- const buf_block_t* block, /*!< in: buffer block containing
- the record */
- ulint heap_no,/*!< in: heap number of the record */
- dict_index_t* index, /*!< in: index of record */
- que_thr_t* thr) /*!< in: query thread */
+ ulint type_mode,
+ const buf_block_t* block,
+ ulint heap_no,
+ dict_index_t* index,
+ que_thr_t* thr,
+ lock_prdt_t* prdt)
{
- trx_t* trx;
- lock_t* lock;
- trx_id_t victim_trx_id;
- ulint space;
- ulint page_no;
- dberr_t err;
-
ut_ad(lock_mutex_own());
ut_ad(!srv_read_only_mode);
ut_ad(dict_index_is_clust(index) || !dict_index_is_online_ddl(index));
- trx = thr_get_trx(thr);
+ trx_t* trx = thr_get_trx(thr);
ut_ad(trx_mutex_own(trx));
-
- /* Test if there already is some other reason to suspend thread:
- we do not enqueue a lock request if the query thread should be
- stopped anyway */
-
- if (que_thr_stop(thr)) {
- ut_error;
-
- return(DB_QUE_THR_SUSPENDED);
- }
+ ut_a(!que_thr_stop(thr));
switch (trx_get_dict_operation(trx)) {
case TRX_DICT_OP_NONE:
break;
case TRX_DICT_OP_TABLE:
case TRX_DICT_OP_INDEX:
- ut_print_timestamp(stderr);
- fputs(" InnoDB: Error: a record lock wait happens"
- " in a dictionary operation!\n"
- "InnoDB: ", stderr);
- dict_index_name_print(stderr, trx, index);
- fputs(".\n"
- "InnoDB: Submit a detailed bug report"
- " to https://jira.mariadb.org/\n",
- stderr);
+ ib::error() << "A record lock wait happens in a dictionary"
+ " operation. index "
+ << index->name
+ << " of table "
+ << index->table->name
+ << ". " << BUG_REPORT_MSG;
ut_ad(0);
}
/* Enqueue the lock request that will wait to be granted, note that
we already own the trx mutex. */
- lock = lock_rec_create(
+ lock_t* lock = lock_rec_create(
#ifdef WITH_WSREP
- c_lock, thr,
-#endif /* WITH_WSREP */
+ c_lock, thr,
+#endif
type_mode | LOCK_WAIT, block, heap_no, index, trx, TRUE);
- /* Release the mutex to obey the latching order.
- This is safe, because lock_deadlock_check_and_resolve()
- is invoked when a lock wait is enqueued for the currently
- running transaction. Because trx is a running transaction
- (it is not currently suspended because of a lock wait),
- its state can only be changed by this thread, which is
- currently associated with the transaction. */
-
- trx_mutex_exit(trx);
-
- const time_t now = time(NULL);
- victim_trx_id = lock_deadlock_check_and_resolve(trx, lock, now);
-
- trx_mutex_enter(trx);
-
- if (victim_trx_id != 0) {
-
- ut_ad(victim_trx_id == trx->id);
+ if (prdt && type_mode & LOCK_PREDICATE) {
+ lock_prdt_set_prdt(lock, prdt);
+ }
+ if (ut_d(const trx_t* victim =)
+ DeadlockChecker::check_and_resolve(lock, trx)) {
+ ut_ad(victim == trx);
lock_reset_lock_and_trx_wait(lock);
lock_rec_reset_nth_bit(lock, heap_no);
+ return DB_DEADLOCK;
+ }
- return(DB_DEADLOCK);
-
- } else if (trx->lock.wait_lock == NULL) {
-
+ if (!trx->lock.wait_lock) {
/* If there was a deadlock but we chose another
transaction as a victim, it is possible that we
already have the lock now granted! */
+#ifdef WITH_WSREP
+ if (wsrep_debug) {
+ ib::info() << "WSREP: BF thread got lock granted early, ID " << ib::hex(trx->id)
+ << " query: " << wsrep_thd_query(trx->mysql_thd);
+ }
+#endif
+ return DB_SUCCESS_LOCKED_REC;
+ }
- err = DB_SUCCESS_LOCKED_REC;
- } else {
- trx->lock.que_state = TRX_QUE_LOCK_WAIT;
-
- trx->lock.was_chosen_as_deadlock_victim = FALSE;
- trx->lock.wait_started = now;
-
- ut_a(que_thr_stop(thr));
+ trx->lock.que_state = TRX_QUE_LOCK_WAIT;
-#ifdef UNIV_DEBUG
- if (lock_print_waits) {
- fprintf(stderr, "Lock wait for trx " TRX_ID_FMT " in index ",
- trx->id);
- ut_print_name(stderr, trx, FALSE, index->name);
- }
-#endif /* UNIV_DEBUG */
+ trx->lock.was_chosen_as_deadlock_victim = false;
+ trx->lock.wait_started = time(NULL);
- MONITOR_INC(MONITOR_LOCKREC_WAIT);
+ ut_a(que_thr_stop(thr));
- trx->n_rec_lock_waits++;
+ DBUG_LOG("ib_lock", "trx " << ib::hex(trx->id)
+ << " waits for lock in index " << index->name
+ << " of table " << index->table->name);
- err = DB_LOCK_WAIT;
- }
+ MONITOR_INC(MONITOR_LOCKREC_WAIT);
-#ifdef WITH_WSREP
- if (!lock_get_wait(lock) && wsrep_thd_is_BF(trx->mysql_thd, FALSE)) {
- if (wsrep_debug) {
- fprintf(stderr, "WSREP: BF thread got lock granted early, ID " TRX_ID_FMT
- "\n",
- lock->trx->id);
- }
- return(DB_SUCCESS);
- }
-#endif /* WITH_WSREP */
- // Move it only when it does not cause a deadlock.
- if (err != DB_DEADLOCK
- && innodb_lock_schedule_algorithm
- == INNODB_LOCK_SCHEDULE_ALGORITHM_VATS
- && !thd_is_replication_slave_thread(lock->trx->mysql_thd)) {
- space = buf_block_get_space(block);
- page_no = buf_block_get_page_no(block);
+ if (innodb_lock_schedule_algorithm
+ == INNODB_LOCK_SCHEDULE_ALGORITHM_VATS
+ && !prdt
+ && !thd_is_replication_slave_thread(lock->trx->mysql_thd)) {
HASH_DELETE(lock_t, hash, lock_sys->rec_hash,
- lock_rec_fold(space, page_no), lock);
+ lock_rec_lock_fold(lock), lock);
dberr_t res = lock_rec_insert_by_trx_age(lock);
if (res != DB_SUCCESS) {
return res;
}
}
- return err;
+ return DB_LOCK_WAIT;
}
/*********************************************************************//**
@@ -2573,9 +1837,9 @@ on the record, and the request to be added is not a waiting request, we
can reuse a suitable record lock object already existing on the same page,
just setting the appropriate bit in its bitmap. This is a low-level function
which does NOT check for deadlocks or lock compatibility!
-@return lock where the bit was set */
+@return lock where the bit was set */
static
-lock_t*
+void
lock_rec_add_to_queue(
/*==================*/
ulint type_mode,/*!< in: lock mode, wait, gap
@@ -2586,18 +1850,15 @@ lock_rec_add_to_queue(
ulint heap_no,/*!< in: heap number of the record */
dict_index_t* index, /*!< in: index of record */
trx_t* trx, /*!< in/out: transaction */
- ibool caller_owns_trx_mutex)
+ bool caller_owns_trx_mutex)
/*!< in: TRUE if caller owns the
transaction mutex */
{
- lock_t* lock;
- lock_t* first_lock;
-
+#ifdef UNIV_DEBUG
ut_ad(lock_mutex_own());
ut_ad(caller_owns_trx_mutex == trx_mutex_own(trx));
ut_ad(dict_index_is_clust(index)
|| dict_index_get_online_status(index) != ONLINE_INDEX_CREATION);
-#ifdef UNIV_DEBUG
switch (type_mode & LOCK_MODE_MASK) {
case LOCK_X:
case LOCK_S:
@@ -2607,21 +1868,32 @@ lock_rec_add_to_queue(
}
if (!(type_mode & (LOCK_WAIT | LOCK_GAP))) {
- enum lock_mode mode = (type_mode & LOCK_MODE_MASK) == LOCK_S
+ lock_mode mode = (type_mode & LOCK_MODE_MASK) == LOCK_S
? LOCK_X
: LOCK_S;
const lock_t* other_lock
- = lock_rec_other_has_expl_req(mode, 0, LOCK_WAIT,
- block, heap_no, trx);
+ = lock_rec_other_has_expl_req(
+ mode, block, false, heap_no, trx);
#ifdef WITH_WSREP
- /* this can potentionally assert with wsrep */
- if (wsrep_thd_is_wsrep(trx->mysql_thd)) {
- if (wsrep_debug && other_lock) {
- fprintf(stderr,
- "WSREP: InnoDB assert ignored\n");
- }
- } else {
- ut_a(!other_lock);
+ //ut_a(!other_lock || (wsrep_thd_is_BF(trx->mysql_thd, FALSE) &&
+ // wsrep_thd_is_BF(other_lock->trx->mysql_thd, TRUE)));
+ if (other_lock && trx->is_wsrep() &&
+ !wsrep_thd_is_BF(trx->mysql_thd, FALSE) &&
+ !wsrep_thd_is_BF(other_lock->trx->mysql_thd, TRUE)) {
+
+ ib::info() << "WSREP BF lock conflict for my lock:\n BF:" <<
+ ((wsrep_thd_is_BF(trx->mysql_thd, FALSE)) ? "BF" : "normal") << " exec: " <<
+ wsrep_thd_exec_mode(trx->mysql_thd) << " conflict: " <<
+ wsrep_thd_conflict_state(trx->mysql_thd, false) << " seqno: " <<
+ wsrep_thd_trx_seqno(trx->mysql_thd) << " SQL: " <<
+ wsrep_thd_query(trx->mysql_thd);
+ trx_t* otrx = other_lock->trx;
+ ib::info() << "WSREP other lock:\n BF:" <<
+ ((wsrep_thd_is_BF(otrx->mysql_thd, FALSE)) ? "BF" : "normal") << " exec: " <<
+ wsrep_thd_exec_mode(otrx->mysql_thd) << " conflict: " <<
+ wsrep_thd_conflict_state(otrx->mysql_thd, false) << " seqno: " <<
+ wsrep_thd_trx_seqno(otrx->mysql_thd) << " SQL: " <<
+ wsrep_thd_query(otrx->mysql_thd);
}
#else
ut_a(!other_lock);
@@ -2636,40 +1908,33 @@ lock_rec_add_to_queue(
try to avoid unnecessary memory consumption of a new record lock
struct for a gap type lock */
- if (UNIV_UNLIKELY(heap_no == PAGE_HEAP_NO_SUPREMUM)) {
+ if (heap_no == PAGE_HEAP_NO_SUPREMUM) {
ut_ad(!(type_mode & LOCK_REC_NOT_GAP));
/* There should never be LOCK_REC_NOT_GAP on a supremum
record, but let us play safe */
- type_mode = type_mode & ~(LOCK_GAP | LOCK_REC_NOT_GAP);
+ type_mode &= ~(LOCK_GAP | LOCK_REC_NOT_GAP);
}
+ lock_t* lock;
+ lock_t* first_lock;
+ hash_table_t* hash = lock_hash_get(type_mode);
+
/* Look for a waiting lock request on the same record or on a gap */
- for (first_lock = lock = lock_rec_get_first_on_page(block);
+ for (first_lock = lock = lock_rec_get_first_on_page(hash, block);
lock != NULL;
lock = lock_rec_get_next_on_page(lock)) {
if (lock_get_wait(lock)
&& lock_rec_get_nth_bit(lock, heap_no)) {
-#ifdef WITH_WSREP
- if (wsrep_thd_is_BF(trx->mysql_thd, FALSE)) {
- if (wsrep_debug) {
- fprintf(stderr,
- "BF skipping wait: "
- TRX_ID_FMT "\n",
- trx->id);
- lock_rec_print(stderr, lock,
- time(NULL));
- }
- } else
-#endif
- goto somebody_waits;
+
+ break;
}
}
- if (UNIV_LIKELY(!(type_mode & LOCK_WAIT))) {
+ if (lock == NULL && !(type_mode & LOCK_WAIT)) {
/* Look for a similar record lock on the same page:
if one is found and there are no waiting lock requests,
@@ -2678,36 +1943,21 @@ lock_rec_add_to_queue(
lock = lock_rec_find_similar_on_page(
type_mode, heap_no, first_lock, trx);
- if (lock) {
+ if (lock != NULL) {
lock_rec_set_nth_bit(lock, heap_no);
- return(lock);
+ return;
}
}
-somebody_waits:
+ lock_rec_create(
#ifdef WITH_WSREP
- return(lock_rec_create(NULL, NULL,
- type_mode, block, heap_no, index, trx,
- caller_owns_trx_mutex));
-#else
- return(lock_rec_create(
- type_mode, block, heap_no, index, trx,
- caller_owns_trx_mutex));
-#endif /* WITH_WSREP */
+ NULL, NULL,
+#endif
+ type_mode, block, heap_no, index, trx, caller_owns_trx_mutex);
}
-/** Record locking request status */
-enum lock_rec_req_status {
- /** Failed to acquire a lock */
- LOCK_REC_FAIL,
- /** Succeeded in acquiring a lock (implicit or already acquired) */
- LOCK_REC_SUCCESS,
- /** Explicitly created a new lock */
- LOCK_REC_SUCCESS_CREATED
-};
-
/*********************************************************************//**
This is a fast routine for locking a record in the most common cases:
there are no explicit locks on the page, or there is just one lock, owned
@@ -2717,10 +1967,10 @@ explicit locks. This function sets a normal next-key lock, or in the case of
a page supremum record, a gap type lock.
@return whether the locking succeeded */
UNIV_INLINE
-enum lock_rec_req_status
+lock_rec_req_status
lock_rec_lock_fast(
/*===============*/
- ibool impl, /*!< in: if TRUE, no lock is set
+ bool impl, /*!< in: if TRUE, no lock is set
if no wait is necessary: we
assume that the caller will
set an implicit lock */
@@ -2733,15 +1983,13 @@ lock_rec_lock_fast(
dict_index_t* index, /*!< in: index of record */
que_thr_t* thr) /*!< in: query thread */
{
- lock_t* lock;
- trx_t* trx;
- enum lock_rec_req_status status = LOCK_REC_SUCCESS;
-
ut_ad(lock_mutex_own());
+ ut_ad(!srv_read_only_mode);
ut_ad((LOCK_MODE_MASK & mode) != LOCK_S
|| lock_table_has(thr_get_trx(thr), index->table, LOCK_IS));
ut_ad((LOCK_MODE_MASK & mode) != LOCK_X
- || lock_table_has(thr_get_trx(thr), index->table, LOCK_IX));
+ || lock_table_has(thr_get_trx(thr), index->table, LOCK_IX)
+ || srv_read_only_mode);
ut_ad((LOCK_MODE_MASK & mode) == LOCK_S
|| (LOCK_MODE_MASK & mode) == LOCK_X);
ut_ad(mode - (LOCK_MODE_MASK & mode) == LOCK_GAP
@@ -2751,21 +1999,22 @@ lock_rec_lock_fast(
DBUG_EXECUTE_IF("innodb_report_deadlock", return(LOCK_REC_FAIL););
- lock = lock_rec_get_first_on_page(block);
+ lock_t* lock = lock_rec_get_first_on_page(lock_sys->rec_hash, block);
- trx = thr_get_trx(thr);
+ trx_t* trx = thr_get_trx(thr);
+
+ lock_rec_req_status status = LOCK_REC_SUCCESS;
if (lock == NULL) {
if (!impl) {
/* Note that we don't own the trx mutex. */
-#ifdef WITH_WSREP
- lock = lock_rec_create(NULL, thr,
- mode, block, heap_no, index, trx, FALSE);
-#else
lock = lock_rec_create(
- mode, block, heap_no, index, trx, FALSE);
-#endif /* WITH_WSREP */
+#ifdef WITH_WSREP
+ NULL, NULL,
+#endif
+ mode, block, heap_no, index, trx, false);
}
+
status = LOCK_REC_SUCCESS_CREATED;
} else {
trx_mutex_enter(trx);
@@ -2797,8 +2046,7 @@ This is the general, and slower, routine for locking a record. This is a
low-level function which does NOT look at implicit locks! Checks lock
compatibility within explicit locks. This function sets a normal next-key
lock, or in the case of a page supremum record, a gap type lock.
-@return DB_SUCCESS, DB_SUCCESS_LOCKED_REC, DB_LOCK_WAIT, DB_DEADLOCK,
-or DB_QUE_THR_SUSPENDED */
+@return DB_SUCCESS, DB_SUCCESS_LOCKED_REC, DB_LOCK_WAIT, or DB_DEADLOCK */
static
dberr_t
lock_rec_lock_slow(
@@ -2816,13 +2064,8 @@ lock_rec_lock_slow(
dict_index_t* index, /*!< in: index of record */
que_thr_t* thr) /*!< in: query thread */
{
- trx_t* trx;
-#ifdef WITH_WSREP
- lock_t* c_lock(NULL);
-#endif
- dberr_t err = DB_SUCCESS;
-
ut_ad(lock_mutex_own());
+ ut_ad(!srv_read_only_mode);
ut_ad((LOCK_MODE_MASK & mode) != LOCK_S
|| lock_table_has(thr_get_trx(thr), index->table, LOCK_IS));
ut_ad((LOCK_MODE_MASK & mode) != LOCK_X
@@ -2836,47 +2079,38 @@ lock_rec_lock_slow(
DBUG_EXECUTE_IF("innodb_report_deadlock", return(DB_DEADLOCK););
- trx = thr_get_trx(thr);
+ dberr_t err;
+ trx_t* trx = thr_get_trx(thr);
+
trx_mutex_enter(trx);
if (lock_rec_has_expl(mode, block, heap_no, trx)) {
-
- /* The trx already has a strong enough lock on rec: do
- nothing */
+ /* The trx already has a strong enough lock: do nothing */
+ err = DB_SUCCESS;
+ } else if (
#ifdef WITH_WSREP
- } else if ((c_lock = lock_rec_other_has_conflicting(
- static_cast<enum lock_mode>(mode),
- block, heap_no, trx))) {
-#else
- } else if (lock_rec_other_has_conflicting(
- static_cast<enum lock_mode>(mode),
- block, heap_no, trx)) {
+ lock_t* c_lock =
#endif /* WITH_WSREP */
-
+ lock_rec_other_has_conflicting(
+ static_cast<enum lock_mode>(mode),
+ block, heap_no, trx)) {
/* If another transaction has a non-gap conflicting
request in the queue, as this transaction does not
have a lock strong enough already granted on the
record, we have to wait. */
-
-#ifdef WITH_WSREP
- /* c_lock is NULL here if jump to enqueue_waiting happened
- but it's ok because lock is not NULL in that case and c_lock
- is not used. */
- err = lock_rec_enqueue_waiting(c_lock,
- mode, block, heap_no, index, thr);
-#else
err = lock_rec_enqueue_waiting(
- mode, block, heap_no, index, thr);
+#ifdef WITH_WSREP
+ c_lock,
#endif /* WITH_WSREP */
-
+ mode, block, heap_no, index, thr, NULL);
} else if (!impl) {
/* Set the requested lock on the record, note that
we already own the transaction mutex. */
-
lock_rec_add_to_queue(
LOCK_REC | mode, block, heap_no, index, trx, TRUE);
-
err = DB_SUCCESS_LOCKED_REC;
+ } else {
+ err = DB_SUCCESS;
}
trx_mutex_exit(trx);
@@ -2890,13 +2124,12 @@ possible, enqueues a waiting lock request. This is a low-level function
which does NOT look at implicit locks! Checks lock compatibility within
explicit locks. This function sets a normal next-key lock, or in the case
of a page supremum record, a gap type lock.
-@return DB_SUCCESS, DB_SUCCESS_LOCKED_REC, DB_LOCK_WAIT, DB_DEADLOCK,
-or DB_QUE_THR_SUSPENDED */
+@return DB_SUCCESS, DB_SUCCESS_LOCKED_REC, DB_LOCK_WAIT, or DB_DEADLOCK */
static
dberr_t
lock_rec_lock(
/*==========*/
- ibool impl, /*!< in: if TRUE, no lock is set
+ bool impl, /*!< in: if true, no lock is set
if no wait is necessary: we
assume that the caller will
set an implicit lock */
@@ -2910,6 +2143,7 @@ lock_rec_lock(
que_thr_t* thr) /*!< in: query thread */
{
ut_ad(lock_mutex_own());
+ ut_ad(!srv_read_only_mode);
ut_ad((LOCK_MODE_MASK & mode) != LOCK_S
|| lock_table_has(thr_get_trx(thr), index->table, LOCK_IS));
ut_ad((LOCK_MODE_MASK & mode) != LOCK_X
@@ -2939,7 +2173,7 @@ lock_rec_lock(
/*********************************************************************//**
Checks if a waiting record lock request still has to wait in a queue.
-@return lock that is causing the wait */
+@return lock that is causing the wait */
static
const lock_t*
lock_rec_has_to_wait_in_queue(
@@ -2952,6 +2186,7 @@ lock_rec_has_to_wait_in_queue(
ulint heap_no;
ulint bit_mask;
ulint bit_offset;
+ hash_table_t* hash;
ut_ad(lock_mutex_own());
ut_ad(lock_get_wait(wait_lock));
@@ -2962,9 +2197,11 @@ lock_rec_has_to_wait_in_queue(
heap_no = lock_rec_find_set_bit(wait_lock);
bit_offset = heap_no / 8;
- bit_mask = static_cast<ulint>(1 << (heap_no % 8));
+ bit_mask = static_cast<ulint>(1) << (heap_no % 8);
+
+ hash = lock_hash_get(wait_lock->type_mode);
- for (lock = lock_rec_get_first_on_page_addr(space, page_no);
+ for (lock = lock_rec_get_first_on_page_addr(hash, space, page_no);
lock != wait_lock;
lock = lock_rec_get_next_on_page_const(lock)) {
@@ -2977,19 +2214,19 @@ lock_rec_has_to_wait_in_queue(
if (wsrep_thd_is_BF(wait_lock->trx->mysql_thd, FALSE) &&
wsrep_thd_is_BF(lock->trx->mysql_thd, TRUE)) {
if (wsrep_debug) {
- time_t now = time(NULL);
- fprintf(stderr,
- "BF-BF lock conflict " TRX_ID_FMT
- " : " TRX_ID_FMT "\n",
- wait_lock->trx->id,
- lock->trx->id);
- lock_rec_print(stderr, wait_lock, now);
- lock_rec_print(stderr, lock, now);
+ mtr_t mtr;
+ ib::info() << "WSREP: waiting BF trx: " << ib::hex(wait_lock->trx->id)
+ << " query: " << wsrep_thd_query(wait_lock->trx->mysql_thd);
+ lock_rec_print(stderr, wait_lock, mtr);
+ ib::info() << "WSREP: do not wait another BF trx: " << ib::hex(lock->trx->id)
+ << " query: " << wsrep_thd_query(lock->trx->mysql_thd);
+ lock_rec_print(stderr, lock, mtr);
}
/* don't wait for another BF lock */
continue;
}
-#endif
+#endif /* WITH_WSREP */
+
return(lock);
}
}
@@ -2997,31 +2234,19 @@ lock_rec_has_to_wait_in_queue(
return(NULL);
}
-/*************************************************************//**
-Grants a lock to a waiting lock request and releases the waiting transaction.
-The caller must hold lock_sys->mutex but not lock->trx->mutex. */
-static
-void
-lock_grant(
-/*=======*/
- lock_t* lock, /*!< in/out: waiting lock request */
- bool owns_trx_mutex) /*!< in: whether lock->trx->mutex is owned */
+/** Grant a lock to a waiting lock request and release the waiting transaction
+after lock_reset_lock_and_trx_wait() has been called. */
+static void lock_grant_after_reset(lock_t* lock)
{
ut_ad(lock_mutex_own());
-
- lock_reset_lock_and_trx_wait(lock);
-
- if (!owns_trx_mutex) {
- trx_mutex_enter(lock->trx);
- }
+ ut_ad(trx_mutex_own(lock->trx));
if (lock_get_mode(lock) == LOCK_AUTO_INC) {
dict_table_t* table = lock->un_member.tab_lock.table;
- if (UNIV_UNLIKELY(table->autoinc_trx == lock->trx)) {
- fprintf(stderr,
- "InnoDB: Error: trx already had"
- " an AUTO-INC lock!\n");
+ if (table->autoinc_trx == lock->trx) {
+ ib::error() << "Transaction already had an"
+ << " AUTO-INC lock!";
} else {
table->autoinc_trx = lock->trx;
@@ -3029,12 +2254,8 @@ lock_grant(
}
}
-#ifdef UNIV_DEBUG
- if (lock_print_waits) {
- fprintf(stderr, "Lock wait for trx " TRX_ID_FMT " ends\n",
- lock->trx->id);
- }
-#endif /* UNIV_DEBUG */
+ DBUG_PRINT("ib_lock", ("wait for trx " TRX_ID_FMT " ends",
+ trx_get_id_for_print(lock->trx)));
/* If we are resolving a deadlock by choosing another transaction
as a victim, then our original transaction may not be in the
@@ -3050,23 +2271,15 @@ lock_grant(
lock_wait_release_thread_if_suspended(thr);
}
}
+}
- const time_t now = time(NULL);
-
- /* Cumulate total lock wait time for statistics */
- if (lock_get_type_low(lock) & LOCK_TABLE) {
- lock->trx->total_table_lock_wait_time +=
- (ulint)difftime(now, lock->trx->lock.wait_started);
- } else {
- lock->trx->total_rec_lock_wait_time +=
- (ulint)difftime(now, lock->trx->lock.wait_started);
- }
-
- lock->wait_time = (ulint)difftime(now, lock->requested_time);
-
- if (!owns_trx_mutex) {
- trx_mutex_exit(lock->trx);
- }
+/** Grant a lock to a waiting lock request and release the waiting transaction. */
+static void lock_grant(lock_t* lock)
+{
+ lock_reset_lock_and_trx_wait(lock);
+ trx_mutex_enter(lock->trx);
+ lock_grant_after_reset(lock);
+ trx_mutex_exit(lock->trx);
}
/*************************************************************//**
@@ -3106,17 +2319,13 @@ lock_rec_cancel(
static
void
-lock_grant_and_move_on_page(
- ulint space,
- ulint page_no)
+lock_grant_and_move_on_page(ulint rec_fold, ulint space, ulint page_no)
{
lock_t* lock;
- lock_t* next;
- lock_t* previous;
- ulint rec_fold = lock_rec_fold(space, page_no);
-
- previous = (lock_t *) hash_get_nth_cell(lock_sys->rec_hash,
- hash_calc_hash(rec_fold, lock_sys->rec_hash))->node;
+ lock_t* previous = static_cast<lock_t*>(
+ hash_get_nth_cell(lock_sys->rec_hash,
+ hash_calc_hash(rec_fold, lock_sys->rec_hash))
+ ->node);
if (previous == NULL) {
return;
}
@@ -3125,27 +2334,24 @@ lock_grant_and_move_on_page(
lock = previous;
}
else {
- next = (lock_t *) previous->hash;
- while (next &&
- (next->un_member.rec_lock.space != space ||
- next->un_member.rec_lock.page_no != page_no)) {
- previous = next;
- next = (lock_t *) previous->hash;
+ while (previous->hash &&
+ (previous->hash->un_member.rec_lock.space != space ||
+ previous->hash->un_member.rec_lock.page_no != page_no)) {
+ previous = previous->hash;
}
- lock = (lock_t *) previous->hash;
+ lock = previous->hash;
}
ut_ad(previous->hash == lock || previous == lock);
/* Grant locks if there are no conflicting locks ahead.
Move granted locks to the head of the list. */
- for (;lock != NULL;) {
+ while (lock) {
/* If the lock is a wait lock on this page, and it does not need to wait. */
- if ((lock->un_member.rec_lock.space == space)
- && (lock->un_member.rec_lock.page_no == page_no)
- && lock_get_wait(lock)
- && !lock_rec_has_to_wait_in_queue(lock)) {
-
- lock_grant(lock, false);
+ if (lock_get_wait(lock)
+ && lock->un_member.rec_lock.space == space
+ && lock->un_member.rec_lock.page_no == page_no
+ && !lock_rec_has_to_wait_in_queue(lock)) {
+ lock_grant(lock);
if (previous != NULL) {
/* Move the lock to the head of the list. */
@@ -3164,72 +2370,62 @@ lock_grant_and_move_on_page(
}
}
-/*************************************************************//**
-Removes a record lock request, waiting or granted, from the queue and
-grants locks to other transactions in the queue if they now are entitled
-to a lock. NOTE: all record locks contained in in_lock are removed. */
-static
-void
-lock_rec_dequeue_from_page(
-/*=======================*/
- lock_t* in_lock) /*!< in: record lock object: all
- record locks which are contained in
- this lock object are removed;
- transactions waiting behind will
- get their lock requests granted,
- if they are now qualified to it */
+/** Remove a record lock request, waiting or granted, from the queue and
+grant locks to other transactions in the queue if they now are entitled
+to a lock. NOTE: all record locks contained in in_lock are removed.
+@param[in,out] in_lock record lock */
+static void lock_rec_dequeue_from_page(lock_t* in_lock)
{
ulint space;
ulint page_no;
- lock_t* lock;
- trx_lock_t* trx_lock;
+ hash_table_t* lock_hash;
ut_ad(lock_mutex_own());
ut_ad(lock_get_type_low(in_lock) == LOCK_REC);
/* We may or may not be holding in_lock->trx->mutex here. */
- trx_lock = &in_lock->trx->lock;
-
space = in_lock->un_member.rec_lock.space;
page_no = in_lock->un_member.rec_lock.page_no;
in_lock->index->table->n_rec_locks--;
- HASH_DELETE(lock_t, hash, lock_sys->rec_hash,
- lock_rec_fold(space, page_no), in_lock);
+ lock_hash = lock_hash_get(in_lock->type_mode);
- UT_LIST_REMOVE(trx_locks, trx_lock->trx_locks, in_lock);
+ ulint rec_fold = lock_rec_fold(space, page_no);
+
+ HASH_DELETE(lock_t, hash, lock_hash, rec_fold, in_lock);
+ UT_LIST_REMOVE(in_lock->trx->lock.trx_locks, in_lock);
MONITOR_INC(MONITOR_RECLOCK_REMOVED);
MONITOR_DEC(MONITOR_NUM_RECLOCK);
if (innodb_lock_schedule_algorithm
- == INNODB_LOCK_SCHEDULE_ALGORITHM_FCFS ||
- thd_is_replication_slave_thread(in_lock->trx->mysql_thd)) {
- /* Check if waiting locks in the queue can now be granted: grant
- locks if there are no conflicting locks ahead. Stop at the first
- X lock that is waiting or has been granted. */
-
- for (lock = lock_rec_get_first_on_page_addr(space, page_no);
+ == INNODB_LOCK_SCHEDULE_ALGORITHM_FCFS
+ || lock_hash != lock_sys->rec_hash
+ || thd_is_replication_slave_thread(in_lock->trx->mysql_thd)) {
+ /* Check if waiting locks in the queue can now be granted:
+ grant locks if there are no conflicting locks ahead. Stop at
+ the first X lock that is waiting or has been granted. */
+
+ for (lock_t* lock = lock_rec_get_first_on_page_addr(
+ lock_hash, space, page_no);
lock != NULL;
lock = lock_rec_get_next_on_page(lock)) {
if (lock_get_wait(lock)
&& !lock_rec_has_to_wait_in_queue(lock)) {
-
/* Grant the lock */
ut_ad(lock->trx != in_lock->trx);
- lock_grant(lock, false);
+ lock_grant(lock);
}
}
} else {
- lock_grant_and_move_on_page(space, page_no);
+ lock_grant_and_move_on_page(rec_fold, space, page_no);
}
}
/*************************************************************//**
Removes a record lock request, waiting or granted, from the queue. */
-static
void
lock_rec_discard(
/*=============*/
@@ -3251,10 +2447,10 @@ lock_rec_discard(
in_lock->index->table->n_rec_locks--;
- HASH_DELETE(lock_t, hash, lock_sys->rec_hash,
- lock_rec_fold(space, page_no), in_lock);
+ HASH_DELETE(lock_t, hash, lock_hash_get(in_lock->type_mode),
+ lock_rec_fold(space, page_no), in_lock);
- UT_LIST_REMOVE(trx_locks, trx_lock->trx_locks, in_lock);
+ UT_LIST_REMOVE(trx_lock->trx_locks, in_lock);
MONITOR_INC(MONITOR_RECLOCK_REMOVED);
MONITOR_DEC(MONITOR_NUM_RECLOCK);
@@ -3266,21 +2462,16 @@ function does not move locks, or check for waiting locks, therefore the
lock bitmaps must already be reset when this function is called. */
static
void
-lock_rec_free_all_from_discard_page(
-/*================================*/
- const buf_block_t* block) /*!< in: page to be discarded */
+lock_rec_free_all_from_discard_page_low(
+/*====================================*/
+ ulint space,
+ ulint page_no,
+ hash_table_t* lock_hash)
{
- ulint space;
- ulint page_no;
lock_t* lock;
lock_t* next_lock;
- ut_ad(lock_mutex_own());
-
- space = buf_block_get_space(block);
- page_no = buf_block_get_page_no(block);
-
- lock = lock_rec_get_first_on_page_addr(space, page_no);
+ lock = lock_rec_get_first_on_page_addr(lock_hash, space, page_no);
while (lock != NULL) {
ut_ad(lock_rec_find_set_bit(lock) == ULINT_UNDEFINED);
@@ -3294,6 +2485,31 @@ lock_rec_free_all_from_discard_page(
}
}
+/*************************************************************//**
+Removes record lock objects set on an index page which is discarded. This
+function does not move locks, or check for waiting locks, therefore the
+lock bitmaps must already be reset when this function is called. */
+void
+lock_rec_free_all_from_discard_page(
+/*================================*/
+ const buf_block_t* block) /*!< in: page to be discarded */
+{
+ ulint space;
+ ulint page_no;
+
+ ut_ad(lock_mutex_own());
+
+ space = block->page.id.space();
+ page_no = block->page.id.page_no();
+
+ lock_rec_free_all_from_discard_page_low(
+ space, page_no, lock_sys->rec_hash);
+ lock_rec_free_all_from_discard_page_low(
+ space, page_no, lock_sys->prdt_hash);
+ lock_rec_free_all_from_discard_page_low(
+ space, page_no, lock_sys->prdt_page_hash);
+}
+
/*============= RECORD LOCK MOVING AND INHERITING ===================*/
/*************************************************************//**
@@ -3301,8 +2517,9 @@ Resets the lock bits for a single record. Releases transactions waiting for
lock requests here. */
static
void
-lock_rec_reset_and_release_wait(
-/*============================*/
+lock_rec_reset_and_release_wait_low(
+/*================================*/
+ hash_table_t* hash, /*!< in: hash table */
const buf_block_t* block, /*!< in: buffer block containing
the record */
ulint heap_no)/*!< in: heap number of record */
@@ -3311,7 +2528,7 @@ lock_rec_reset_and_release_wait(
ut_ad(lock_mutex_own());
- for (lock = lock_rec_get_first(block, heap_no);
+ for (lock = lock_rec_get_first(hash, block, heap_no);
lock != NULL;
lock = lock_rec_get_next(heap_no, lock)) {
@@ -3324,6 +2541,26 @@ lock_rec_reset_and_release_wait(
}
/*************************************************************//**
+Resets the lock bits for a single record. Releases transactions waiting for
+lock requests here. */
+static
+void
+lock_rec_reset_and_release_wait(
+/*============================*/
+ const buf_block_t* block, /*!< in: buffer block containing
+ the record */
+ ulint heap_no)/*!< in: heap number of record */
+{
+ lock_rec_reset_and_release_wait_low(
+ lock_sys->rec_hash, block, heap_no);
+
+ lock_rec_reset_and_release_wait_low(
+ lock_sys->prdt_hash, block, PAGE_HEAP_NO_INFIMUM);
+ lock_rec_reset_and_release_wait_low(
+ lock_sys->prdt_page_hash, block, PAGE_HEAP_NO_INFIMUM);
+}
+
+/*************************************************************//**
Makes a record to inherit the locks (except LOCK_INSERT_INTENTION type)
of another record as gap type locks, but does not reset the lock bits of
the other record. Also waiting lock requests on rec are inherited as
@@ -3351,9 +2588,9 @@ lock_rec_inherit_to_gap(
READ COMMITTED isolation level, we do not want locks set
by an UPDATE or a DELETE to be inherited as gap type locks. But we
DO want S-locks/X-locks(taken for replace) set by a consistency
- constraint to be inherited also then */
+ constraint to be inherited also then. */
- for (lock = lock_rec_get_first(block, heap_no);
+ for (lock = lock_rec_get_first(lock_sys->rec_hash, block, heap_no);
lock != NULL;
lock = lock_rec_get_next(heap_no, lock)) {
@@ -3363,7 +2600,6 @@ lock_rec_inherit_to_gap(
<= TRX_ISO_READ_COMMITTED)
&& lock_get_mode(lock) ==
(lock->trx->duplicates ? LOCK_S : LOCK_X))) {
-
lock_rec_add_to_queue(
LOCK_REC | LOCK_GAP | lock_get_mode(lock),
heir_block, heir_heap_no, lock->index,
@@ -3392,7 +2628,7 @@ lock_rec_inherit_to_gap_if_gap_lock(
lock_mutex_enter();
- for (lock = lock_rec_get_first(block, heap_no);
+ for (lock = lock_rec_get_first(lock_sys->rec_hash, block, heap_no);
lock != NULL;
lock = lock_rec_get_next(heap_no, lock)) {
@@ -3415,8 +2651,9 @@ Moves the locks of a record to another record and resets the lock bits of
the donating record. */
static
void
-lock_rec_move(
-/*==========*/
+lock_rec_move_low(
+/*==============*/
+ hash_table_t* lock_hash, /*!< in: hash table to use */
const buf_block_t* receiver, /*!< in: buffer block containing
the receiving record */
const buf_block_t* donator, /*!< in: buffer block containing
@@ -3432,9 +2669,14 @@ lock_rec_move(
ut_ad(lock_mutex_own());
- ut_ad(lock_rec_get_first(receiver, receiver_heap_no) == NULL);
+ /* If the lock is predicate lock, it resides on INFIMUM record */
+ ut_ad(lock_rec_get_first(
+ lock_hash, receiver, receiver_heap_no) == NULL
+ || lock_hash == lock_sys->prdt_hash
+ || lock_hash == lock_sys->prdt_page_hash);
- for (lock = lock_rec_get_first(donator, donator_heap_no);
+ for (lock = lock_rec_get_first(lock_hash,
+ donator, donator_heap_no);
lock != NULL;
lock = lock_rec_get_next(donator_heap_no, lock)) {
@@ -3442,7 +2684,7 @@ lock_rec_move(
lock_rec_reset_nth_bit(lock, donator_heap_no);
- if (UNIV_UNLIKELY(type_mode & LOCK_WAIT)) {
+ if (type_mode & LOCK_WAIT) {
lock_reset_lock_and_trx_wait(lock);
}
@@ -3454,7 +2696,63 @@ lock_rec_move(
lock->index, lock->trx, FALSE);
}
- ut_ad(lock_rec_get_first(donator, donator_heap_no) == NULL);
+ ut_ad(lock_rec_get_first(lock_sys->rec_hash,
+ donator, donator_heap_no) == NULL);
+}
+
+/** Move all the granted locks to the front of the given lock list.
+All the waiting locks will be at the end of the list.
+@param[in,out] lock_list the given lock list. */
+static
+void
+lock_move_granted_locks_to_front(
+ UT_LIST_BASE_NODE_T(lock_t)& lock_list)
+{
+ lock_t* lock;
+
+ bool seen_waiting_lock = false;
+
+ for (lock = UT_LIST_GET_FIRST(lock_list); lock;
+ lock = UT_LIST_GET_NEXT(trx_locks, lock)) {
+
+ if (!seen_waiting_lock) {
+ if (lock->is_waiting()) {
+ seen_waiting_lock = true;
+ }
+ continue;
+ }
+
+ ut_ad(seen_waiting_lock);
+
+ if (!lock->is_waiting()) {
+ lock_t* prev = UT_LIST_GET_PREV(trx_locks, lock);
+ ut_a(prev);
+ ut_list_move_to_front(lock_list, lock);
+ lock = prev;
+ }
+ }
+}
+
+/*************************************************************//**
+Moves the locks of a record to another record and resets the lock bits of
+the donating record. */
+UNIV_INLINE
+void
+lock_rec_move(
+/*==========*/
+ const buf_block_t* receiver, /*!< in: buffer block containing
+ the receiving record */
+ const buf_block_t* donator, /*!< in: buffer block containing
+ the donating record */
+ ulint receiver_heap_no,/*!< in: heap_no of the record
+ which gets the locks; there
+ must be no lock requests
+ on it! */
+ ulint donator_heap_no)/*!< in: heap_no of the record
+ which gives the locks */
+{
+ lock_rec_move_low(lock_sys->rec_hash, receiver, donator,
+ receiver_heap_no, donator_heap_no);
}
/*************************************************************//**
@@ -3462,7 +2760,6 @@ Updates the lock table when we have reorganized a page. NOTE: we copy
also the locks set on the infimum of the page; the infimum may carry
locks if an update of a record is occurring on the page, and its locks
were temporarily stored on the infimum. */
-UNIV_INTERN
void
lock_move_reorganize_page(
/*======================*/
@@ -3478,7 +2775,8 @@ lock_move_reorganize_page(
lock_mutex_enter();
- lock = lock_rec_get_first_on_page(block);
+ /* FIXME: This needs to deal with predicate lock too */
+ lock = lock_rec_get_first_on_page(lock_sys->rec_hash, block);
if (lock == NULL) {
lock_mutex_exit();
@@ -3492,13 +2790,13 @@ lock_move_reorganize_page(
bitmaps in the original locks; chain the copies of the locks
using the trx_locks field in them. */
- UT_LIST_INIT(old_locks);
+ UT_LIST_INIT(old_locks, &lock_t::trx_locks);
do {
/* Make a copy of the lock */
lock_t* old_lock = lock_rec_copy(lock, heap);
- UT_LIST_ADD_LAST(trx_locks, old_locks, old_lock);
+ UT_LIST_ADD_LAST(old_locks, old_lock);
/* Reset bitmap of lock */
lock_rec_bitmap_reset(lock);
@@ -3514,86 +2812,62 @@ lock_move_reorganize_page(
comp = page_is_comp(block->frame);
ut_ad(comp == page_is_comp(oblock->frame));
+ lock_move_granted_locks_to_front(old_locks);
+
+ DBUG_EXECUTE_IF("do_lock_reverse_page_reorganize",
+ ut_list_reverse(old_locks););
+
for (lock = UT_LIST_GET_FIRST(old_locks); lock;
lock = UT_LIST_GET_NEXT(trx_locks, lock)) {
+
/* NOTE: we copy also the locks set on the infimum and
supremum of the page; the infimum may carry locks if an
update of a record is occurring on the page, and its locks
were temporarily stored on the infimum */
- page_cur_t cur1;
- page_cur_t cur2;
-
- page_cur_set_before_first(block, &cur1);
- page_cur_set_before_first(oblock, &cur2);
+ const rec_t* rec1 = page_get_infimum_rec(
+ buf_block_get_frame(block));
+ const rec_t* rec2 = page_get_infimum_rec(
+ buf_block_get_frame(oblock));
/* Set locks according to old locks */
for (;;) {
ulint old_heap_no;
ulint new_heap_no;
- ut_ad(comp || !memcmp(page_cur_get_rec(&cur1),
- page_cur_get_rec(&cur2),
- rec_get_data_size_old(
- page_cur_get_rec(
- &cur2))));
- if (UNIV_LIKELY(comp)) {
- old_heap_no = rec_get_heap_no_new(
- page_cur_get_rec(&cur2));
- new_heap_no = rec_get_heap_no_new(
- page_cur_get_rec(&cur1));
- } else {
- old_heap_no = rec_get_heap_no_old(
- page_cur_get_rec(&cur2));
- new_heap_no = rec_get_heap_no_old(
- page_cur_get_rec(&cur1));
- }
+ if (comp) {
+ old_heap_no = rec_get_heap_no_new(rec2);
+ new_heap_no = rec_get_heap_no_new(rec1);
- if (lock_rec_get_nth_bit(lock, old_heap_no)) {
+ rec1 = page_rec_get_next_low(rec1, TRUE);
+ rec2 = page_rec_get_next_low(rec2, TRUE);
+ } else {
+ old_heap_no = rec_get_heap_no_old(rec2);
+ new_heap_no = rec_get_heap_no_old(rec1);
+ ut_ad(!memcmp(rec1, rec2,
+ rec_get_data_size_old(rec2)));
- /* Clear the bit in old_lock. */
- ut_d(lock_rec_reset_nth_bit(lock,
- old_heap_no));
+ rec1 = page_rec_get_next_low(rec1, FALSE);
+ rec2 = page_rec_get_next_low(rec2, FALSE);
+ }
+ /* Clear the bit in old_lock. */
+ if (old_heap_no < lock->un_member.rec_lock.n_bits
+ && lock_rec_reset_nth_bit(lock, old_heap_no)) {
/* NOTE that the old lock bitmap could be too
small for the new heap number! */
lock_rec_add_to_queue(
lock->type_mode, block, new_heap_no,
lock->index, lock->trx, FALSE);
-
- /* if (new_heap_no == PAGE_HEAP_NO_SUPREMUM
- && lock_get_wait(lock)) {
- fprintf(stderr,
- "---\n--\n!!!Lock reorg: supr type %lu\n",
- lock->type_mode);
- } */
}
- if (UNIV_UNLIKELY
- (new_heap_no == PAGE_HEAP_NO_SUPREMUM)) {
-
+ if (new_heap_no == PAGE_HEAP_NO_SUPREMUM) {
ut_ad(old_heap_no == PAGE_HEAP_NO_SUPREMUM);
break;
}
-
- page_cur_move_to_next(&cur1);
- page_cur_move_to_next(&cur2);
}
-#ifdef UNIV_DEBUG
- {
- ulint i = lock_rec_find_set_bit(lock);
-
- /* Check that all locks were moved. */
- if (UNIV_UNLIKELY(i != ULINT_UNDEFINED)) {
- fprintf(stderr,
- "lock_move_reorganize_page():"
- " %lu not moved in %p\n",
- (ulong) i, (void*) lock);
- ut_error;
- }
- }
-#endif /* UNIV_DEBUG */
+ ut_ad(lock_rec_find_set_bit(lock) == ULINT_UNDEFINED);
}
lock_mutex_exit();
@@ -3608,7 +2882,6 @@ lock_move_reorganize_page(
/*************************************************************//**
Moves the explicit locks on user records to another page if a record
list end is moved to another page. */
-UNIV_INTERN
void
lock_move_rec_list_end(
/*===================*/
@@ -3620,6 +2893,9 @@ lock_move_rec_list_end(
lock_t* lock;
const ulint comp = page_rec_is_comp(rec);
+ ut_ad(buf_block_get_frame(block) == page_align(rec));
+ ut_ad(comp == page_is_comp(buf_block_get_frame(new_block)));
+
lock_mutex_enter();
/* Note: when we move locks from record to record, waiting locks
@@ -3628,61 +2904,73 @@ lock_move_rec_list_end(
table to the end of the hash chain, and lock_rec_add_to_queue
does not reuse locks if there are waiters in the queue. */
- for (lock = lock_rec_get_first_on_page(block); lock;
+ for (lock = lock_rec_get_first_on_page(lock_sys->rec_hash, block); lock;
lock = lock_rec_get_next_on_page(lock)) {
- page_cur_t cur1;
- page_cur_t cur2;
+ const rec_t* rec1 = rec;
+ const rec_t* rec2;
const ulint type_mode = lock->type_mode;
- page_cur_position(rec, block, &cur1);
+ if (comp) {
+ if (page_offset(rec1) == PAGE_NEW_INFIMUM) {
+ rec1 = page_rec_get_next_low(rec1, TRUE);
+ }
- if (page_cur_is_before_first(&cur1)) {
- page_cur_move_to_next(&cur1);
- }
+ rec2 = page_rec_get_next_low(
+ buf_block_get_frame(new_block)
+ + PAGE_NEW_INFIMUM, TRUE);
+ } else {
+ if (page_offset(rec1) == PAGE_OLD_INFIMUM) {
+ rec1 = page_rec_get_next_low(rec1, FALSE);
+ }
- page_cur_set_before_first(new_block, &cur2);
- page_cur_move_to_next(&cur2);
+ rec2 = page_rec_get_next_low(
+ buf_block_get_frame(new_block)
+ + PAGE_OLD_INFIMUM, FALSE);
+ }
/* Copy lock requests on user records to new page and
reset the lock bits on the old */
- while (!page_cur_is_after_last(&cur1)) {
- ulint heap_no;
+ for (;;) {
+ ulint rec1_heap_no;
+ ulint rec2_heap_no;
if (comp) {
- heap_no = rec_get_heap_no_new(
- page_cur_get_rec(&cur1));
- } else {
- heap_no = rec_get_heap_no_old(
- page_cur_get_rec(&cur1));
- ut_ad(!memcmp(page_cur_get_rec(&cur1),
- page_cur_get_rec(&cur2),
- rec_get_data_size_old(
- page_cur_get_rec(&cur2))));
- }
+ rec1_heap_no = rec_get_heap_no_new(rec1);
- if (lock_rec_get_nth_bit(lock, heap_no)) {
- lock_rec_reset_nth_bit(lock, heap_no);
+ if (rec1_heap_no == PAGE_HEAP_NO_SUPREMUM) {
+ break;
+ }
- if (UNIV_UNLIKELY(type_mode & LOCK_WAIT)) {
- lock_reset_lock_and_trx_wait(lock);
+ rec2_heap_no = rec_get_heap_no_new(rec2);
+ rec1 = page_rec_get_next_low(rec1, TRUE);
+ rec2 = page_rec_get_next_low(rec2, TRUE);
+ } else {
+ rec1_heap_no = rec_get_heap_no_old(rec1);
+
+ if (rec1_heap_no == PAGE_HEAP_NO_SUPREMUM) {
+ break;
}
- if (comp) {
- heap_no = rec_get_heap_no_new(
- page_cur_get_rec(&cur2));
- } else {
- heap_no = rec_get_heap_no_old(
- page_cur_get_rec(&cur2));
+ rec2_heap_no = rec_get_heap_no_old(rec2);
+
+ ut_ad(!memcmp(rec1, rec2,
+ rec_get_data_size_old(rec2)));
+
+ rec1 = page_rec_get_next_low(rec1, FALSE);
+ rec2 = page_rec_get_next_low(rec2, FALSE);
+ }
+
+ if (rec1_heap_no < lock->un_member.rec_lock.n_bits
+ && lock_rec_reset_nth_bit(lock, rec1_heap_no)) {
+ if (type_mode & LOCK_WAIT) {
+ lock_reset_lock_and_trx_wait(lock);
}
lock_rec_add_to_queue(
- type_mode, new_block, heap_no,
+ type_mode, new_block, rec2_heap_no,
lock->index, lock->trx, FALSE);
}
-
- page_cur_move_to_next(&cur1);
- page_cur_move_to_next(&cur2);
}
}
@@ -3697,7 +2985,6 @@ lock_move_rec_list_end(
/*************************************************************//**
Moves the explicit locks on user records to another page if a record
list start is moved to another page. */
-UNIV_INTERN
void
lock_move_rec_list_start(
/*=====================*/
@@ -3718,62 +3005,62 @@ lock_move_rec_list_start(
ut_ad(block->frame == page_align(rec));
ut_ad(new_block->frame == page_align(old_end));
+ ut_ad(comp == page_rec_is_comp(old_end));
lock_mutex_enter();
- for (lock = lock_rec_get_first_on_page(block); lock;
+ for (lock = lock_rec_get_first_on_page(lock_sys->rec_hash, block); lock;
lock = lock_rec_get_next_on_page(lock)) {
- page_cur_t cur1;
- page_cur_t cur2;
+ const rec_t* rec1;
+ const rec_t* rec2;
const ulint type_mode = lock->type_mode;
- page_cur_set_before_first(block, &cur1);
- page_cur_move_to_next(&cur1);
-
- page_cur_position(old_end, new_block, &cur2);
- page_cur_move_to_next(&cur2);
+ if (comp) {
+ rec1 = page_rec_get_next_low(
+ buf_block_get_frame(block)
+ + PAGE_NEW_INFIMUM, TRUE);
+ rec2 = page_rec_get_next_low(old_end, TRUE);
+ } else {
+ rec1 = page_rec_get_next_low(
+ buf_block_get_frame(block)
+ + PAGE_OLD_INFIMUM, FALSE);
+ rec2 = page_rec_get_next_low(old_end, FALSE);
+ }
/* Copy lock requests on user records to new page and
reset the lock bits on the old */
- while (page_cur_get_rec(&cur1) != rec) {
- ulint heap_no;
+ while (rec1 != rec) {
+ ulint rec1_heap_no;
+ ulint rec2_heap_no;
if (comp) {
- heap_no = rec_get_heap_no_new(
- page_cur_get_rec(&cur1));
+ rec1_heap_no = rec_get_heap_no_new(rec1);
+ rec2_heap_no = rec_get_heap_no_new(rec2);
+
+ rec1 = page_rec_get_next_low(rec1, TRUE);
+ rec2 = page_rec_get_next_low(rec2, TRUE);
} else {
- heap_no = rec_get_heap_no_old(
- page_cur_get_rec(&cur1));
- ut_ad(!memcmp(page_cur_get_rec(&cur1),
- page_cur_get_rec(&cur2),
- rec_get_data_size_old(
- page_cur_get_rec(
- &cur2))));
- }
+ rec1_heap_no = rec_get_heap_no_old(rec1);
+ rec2_heap_no = rec_get_heap_no_old(rec2);
- if (lock_rec_get_nth_bit(lock, heap_no)) {
- lock_rec_reset_nth_bit(lock, heap_no);
+ ut_ad(!memcmp(rec1, rec2,
+ rec_get_data_size_old(rec2)));
- if (UNIV_UNLIKELY(type_mode & LOCK_WAIT)) {
- lock_reset_lock_and_trx_wait(lock);
- }
+ rec1 = page_rec_get_next_low(rec1, FALSE);
+ rec2 = page_rec_get_next_low(rec2, FALSE);
+ }
- if (comp) {
- heap_no = rec_get_heap_no_new(
- page_cur_get_rec(&cur2));
- } else {
- heap_no = rec_get_heap_no_old(
- page_cur_get_rec(&cur2));
+ if (rec1_heap_no < lock->un_member.rec_lock.n_bits
+ && lock_rec_reset_nth_bit(lock, rec1_heap_no)) {
+ if (type_mode & LOCK_WAIT) {
+ lock_reset_lock_and_trx_wait(lock);
}
lock_rec_add_to_queue(
- type_mode, new_block, heap_no,
+ type_mode, new_block, rec2_heap_no,
lock->index, lock->trx, FALSE);
}
-
- page_cur_move_to_next(&cur1);
- page_cur_move_to_next(&cur2);
}
#ifdef UNIV_DEBUG
@@ -3782,14 +3069,11 @@ lock_move_rec_list_start(
for (i = PAGE_HEAP_NO_USER_LOW;
i < lock_rec_get_n_bits(lock); i++) {
- if (UNIV_UNLIKELY
- (lock_rec_get_nth_bit(lock, i))) {
-
- fprintf(stderr,
- "lock_move_rec_list_start():"
- " %lu not moved in %p\n",
- (ulong) i, (void*) lock);
- ut_error;
+ if (lock_rec_get_nth_bit(lock, i)) {
+ ib::fatal()
+ << "lock_move_rec_list_start():"
+ << i << " not moved in "
+ << (void*) lock;
}
}
}
@@ -3804,8 +3088,87 @@ lock_move_rec_list_start(
}
/*************************************************************//**
+Moves the explicit locks on user records to another page if a record
+list start is moved to another page. */
+void
+lock_rtr_move_rec_list(
+/*===================*/
+ const buf_block_t* new_block, /*!< in: index page to
+ move to */
+ const buf_block_t* block, /*!< in: index page */
+ rtr_rec_move_t* rec_move, /*!< in: recording records
+ moved */
+ ulint num_move) /*!< in: num of rec to move */
+{
+ lock_t* lock;
+ ulint comp;
+
+ if (!num_move) {
+ return;
+ }
+
+ comp = page_rec_is_comp(rec_move[0].old_rec);
+
+ ut_ad(block->frame == page_align(rec_move[0].old_rec));
+ ut_ad(new_block->frame == page_align(rec_move[0].new_rec));
+ ut_ad(comp == page_rec_is_comp(rec_move[0].new_rec));
+
+ lock_mutex_enter();
+
+ for (lock = lock_rec_get_first_on_page(lock_sys->rec_hash, block); lock;
+ lock = lock_rec_get_next_on_page(lock)) {
+ ulint moved = 0;
+ const rec_t* rec1;
+ const rec_t* rec2;
+ const ulint type_mode = lock->type_mode;
+
+ /* Copy lock requests on user records to new page and
+ reset the lock bits on the old */
+
+ while (moved < num_move) {
+ ulint rec1_heap_no;
+ ulint rec2_heap_no;
+
+ rec1 = rec_move[moved].old_rec;
+ rec2 = rec_move[moved].new_rec;
+
+ if (comp) {
+ rec1_heap_no = rec_get_heap_no_new(rec1);
+ rec2_heap_no = rec_get_heap_no_new(rec2);
+
+ } else {
+ rec1_heap_no = rec_get_heap_no_old(rec1);
+ rec2_heap_no = rec_get_heap_no_old(rec2);
+
+ ut_ad(!memcmp(rec1, rec2,
+ rec_get_data_size_old(rec2)));
+ }
+
+ if (rec1_heap_no < lock->un_member.rec_lock.n_bits
+ && lock_rec_reset_nth_bit(lock, rec1_heap_no)) {
+ if (type_mode & LOCK_WAIT) {
+ lock_reset_lock_and_trx_wait(lock);
+ }
+
+ lock_rec_add_to_queue(
+ type_mode, new_block, rec2_heap_no,
+ lock->index, lock->trx, FALSE);
+
+ rec_move[moved].moved = true;
+ }
+
+ moved++;
+ }
+ }
+
+ lock_mutex_exit();
+
+#ifdef UNIV_DEBUG_LOCK_VALIDATE
+ ut_ad(lock_rec_validate_page(block));
+#endif
+}
+/*************************************************************//**
Updates the lock table when a page is split to the right. */
-UNIV_INTERN
void
lock_update_split_right(
/*====================*/
@@ -3833,7 +3196,6 @@ lock_update_split_right(
/*************************************************************//**
Updates the lock table when a page is merged to the right. */
-UNIV_INTERN
void
lock_update_merge_right(
/*====================*/
@@ -3860,12 +3222,22 @@ lock_update_merge_right(
/* Reset the locks on the supremum of the left page, releasing
waiting transactions */
- lock_rec_reset_and_release_wait(left_block,
- PAGE_HEAP_NO_SUPREMUM);
+ lock_rec_reset_and_release_wait_low(
+ lock_sys->rec_hash, left_block, PAGE_HEAP_NO_SUPREMUM);
+
+#ifdef UNIV_DEBUG
+ /* there should exist no page lock on the left page,
+ otherwise, it will be blocked from merge */
+ ulint space = left_block->page.id.space();
+ ulint page_no = left_block->page.id.page_no();
+ ut_ad(lock_rec_get_first_on_page_addr(
+ lock_sys->prdt_page_hash, space, page_no) == NULL);
+#endif /* UNIV_DEBUG */
lock_rec_free_all_from_discard_page(left_block);
lock_mutex_exit();
+
}
/*************************************************************//**
@@ -3875,7 +3247,6 @@ root page, even though they do not make sense on other than leaf
pages: the reason is that in a pessimistic update the infimum record
of the root page will act as a dummy carrier of the locks of the record
to be updated. */
-UNIV_INTERN
void
lock_update_root_raise(
/*===================*/
@@ -3895,7 +3266,6 @@ lock_update_root_raise(
/*************************************************************//**
Updates the lock table when a page is copied to another and the original page
is removed from the chain of leaf pages, except if page is the root! */
-UNIV_INTERN
void
lock_update_copy_and_discard(
/*=========================*/
@@ -3918,7 +3288,6 @@ lock_update_copy_and_discard(
/*************************************************************//**
Updates the lock table when a page is split to the left. */
-UNIV_INTERN
void
lock_update_split_left(
/*===================*/
@@ -3940,7 +3309,6 @@ lock_update_split_left(
/*************************************************************//**
Updates the lock table when a page is merged to the left. */
-UNIV_INTERN
void
lock_update_merge_left(
/*===================*/
@@ -3972,8 +3340,8 @@ lock_update_merge_left(
/* Reset the locks on the supremum of the left page,
releasing waiting transactions */
- lock_rec_reset_and_release_wait(left_block,
- PAGE_HEAP_NO_SUPREMUM);
+ lock_rec_reset_and_release_wait_low(
+ lock_sys->rec_hash, left_block, PAGE_HEAP_NO_SUPREMUM);
}
/* Move the locks from the supremum of right page to the supremum
@@ -3982,48 +3350,17 @@ lock_update_merge_left(
lock_rec_move(left_block, right_block,
PAGE_HEAP_NO_SUPREMUM, PAGE_HEAP_NO_SUPREMUM);
- lock_rec_free_all_from_discard_page(right_block);
-
- lock_mutex_exit();
-}
-
-/*************************************************************//**
-Updates the lock table when a page is split and merged to
-two pages. */
-UNIV_INTERN
-void
-lock_update_split_and_merge(
- const buf_block_t* left_block, /*!< in: left page to which merged */
- const rec_t* orig_pred, /*!< in: original predecessor of
- supremum on the left page before merge*/
- const buf_block_t* right_block) /*!< in: right page from which merged */
-{
- const rec_t* left_next_rec;
-
- ut_a(left_block && right_block);
- ut_a(orig_pred);
-
- lock_mutex_enter();
-
- left_next_rec = page_rec_get_next_const(orig_pred);
-
- /* Inherit the locks on the supremum of the left page to the
- first record which was moved from the right page */
- lock_rec_inherit_to_gap(
- left_block, left_block,
- page_rec_get_heap_no(left_next_rec),
- PAGE_HEAP_NO_SUPREMUM);
-
- /* Reset the locks on the supremum of the left page,
- releasing waiting transactions */
- lock_rec_reset_and_release_wait(left_block,
- PAGE_HEAP_NO_SUPREMUM);
+#ifdef UNIV_DEBUG
+ /* there should exist no page lock on the right page,
+ otherwise, it will be blocked from merge */
+ ulint space = right_block->page.id.space();
+ ulint page_no = right_block->page.id.page_no();
+ lock_t* lock_test = lock_rec_get_first_on_page_addr(
+ lock_sys->prdt_page_hash, space, page_no);
+ ut_ad(!lock_test);
+#endif /* UNIV_DEBUG */
- /* Inherit the locks to the supremum of the left page from the
- successor of the infimum on the right page */
- lock_rec_inherit_to_gap(left_block, right_block,
- PAGE_HEAP_NO_SUPREMUM,
- lock_get_min_heap_no(right_block));
+ lock_rec_free_all_from_discard_page(right_block);
lock_mutex_exit();
}
@@ -4031,7 +3368,6 @@ lock_update_split_and_merge(
/*************************************************************//**
Resets the original locks on heir and replaces them with gap type locks
inherited from rec. */
-UNIV_INTERN
void
lock_rec_reset_and_inherit_gap_locks(
/*=================================*/
@@ -4057,7 +3393,6 @@ lock_rec_reset_and_inherit_gap_locks(
/*************************************************************//**
Updates the lock table when a page is discarded. */
-UNIV_INTERN
void
lock_update_discard(
/*================*/
@@ -4074,53 +3409,60 @@ lock_update_discard(
lock_mutex_enter();
- if (!lock_rec_get_first_on_page(block)) {
- /* No locks exist on page, nothing to do */
+ if (lock_rec_get_first_on_page(lock_sys->rec_hash, block)) {
+ ut_ad(!lock_rec_get_first_on_page(lock_sys->prdt_hash, block));
+ ut_ad(!lock_rec_get_first_on_page(lock_sys->prdt_page_hash,
+ block));
+ /* Inherit all the locks on the page to the record and
+ reset all the locks on the page */
- lock_mutex_exit();
+ if (page_is_comp(page)) {
+ rec = page + PAGE_NEW_INFIMUM;
- return;
- }
+ do {
+ heap_no = rec_get_heap_no_new(rec);
- /* Inherit all the locks on the page to the record and reset all
- the locks on the page */
-
- if (page_is_comp(page)) {
- rec = page + PAGE_NEW_INFIMUM;
+ lock_rec_inherit_to_gap(heir_block, block,
+ heir_heap_no, heap_no);
- do {
- heap_no = rec_get_heap_no_new(rec);
+ lock_rec_reset_and_release_wait(
+ block, heap_no);
- lock_rec_inherit_to_gap(heir_block, block,
- heir_heap_no, heap_no);
-
- lock_rec_reset_and_release_wait(block, heap_no);
+ rec = page + rec_get_next_offs(rec, TRUE);
+ } while (heap_no != PAGE_HEAP_NO_SUPREMUM);
+ } else {
+ rec = page + PAGE_OLD_INFIMUM;
- rec = page + rec_get_next_offs(rec, TRUE);
- } while (heap_no != PAGE_HEAP_NO_SUPREMUM);
- } else {
- rec = page + PAGE_OLD_INFIMUM;
+ do {
+ heap_no = rec_get_heap_no_old(rec);
- do {
- heap_no = rec_get_heap_no_old(rec);
+ lock_rec_inherit_to_gap(heir_block, block,
+ heir_heap_no, heap_no);
- lock_rec_inherit_to_gap(heir_block, block,
- heir_heap_no, heap_no);
+ lock_rec_reset_and_release_wait(
+ block, heap_no);
- lock_rec_reset_and_release_wait(block, heap_no);
+ rec = page + rec_get_next_offs(rec, FALSE);
+ } while (heap_no != PAGE_HEAP_NO_SUPREMUM);
+ }
- rec = page + rec_get_next_offs(rec, FALSE);
- } while (heap_no != PAGE_HEAP_NO_SUPREMUM);
+ lock_rec_free_all_from_discard_page_low(
+ block->page.id.space(), block->page.id.page_no(),
+ lock_sys->rec_hash);
+ } else {
+ lock_rec_free_all_from_discard_page_low(
+ block->page.id.space(), block->page.id.page_no(),
+ lock_sys->prdt_hash);
+ lock_rec_free_all_from_discard_page_low(
+ block->page.id.space(), block->page.id.page_no(),
+ lock_sys->prdt_page_hash);
}
- lock_rec_free_all_from_discard_page(block);
-
lock_mutex_exit();
}
/*************************************************************//**
Updates the lock table when a new user record is inserted. */
-UNIV_INTERN
void
lock_update_insert(
/*===============*/
@@ -4151,7 +3493,6 @@ lock_update_insert(
/*************************************************************//**
Updates the lock table when a record is removed. */
-UNIV_INTERN
void
lock_update_delete(
/*===============*/
@@ -4196,7 +3537,6 @@ updated and the size of the record changes in the update. The record
is moved in such an update, perhaps to another page. The infimum record
acts as a dummy carrier record, taking care of lock releases while the
actual record is being moved. */
-UNIV_INTERN
void
lock_rec_store_on_page_infimum(
/*===========================*/
@@ -4221,7 +3561,6 @@ lock_rec_store_on_page_infimum(
/*********************************************************************//**
Restores the state of explicit lock requests on a single record, where the
state was stored on the infimum of the page. */
-UNIV_INTERN
void
lock_rec_restore_from_page_infimum(
/*===============================*/
@@ -4243,703 +3582,41 @@ lock_rec_restore_from_page_infimum(
lock_mutex_exit();
}
-/*=========== DEADLOCK CHECKING ======================================*/
-
-/*********************************************************************//**
-rewind(3) the file used for storing the latest detected deadlock and
-print a heading message to stderr if printing of all deadlocks to stderr
-is enabled. */
-UNIV_INLINE
-void
-lock_deadlock_start_print()
-/*=======================*/
-{
- ut_ad(lock_mutex_own());
- ut_ad(!srv_read_only_mode);
-
- rewind(lock_latest_err_file);
- ut_print_timestamp(lock_latest_err_file);
-
- if (srv_print_all_deadlocks) {
- ut_print_timestamp(stderr);
- fprintf(stderr, "InnoDB: transactions deadlock detected, "
- "dumping detailed information.\n");
- ut_print_timestamp(stderr);
- }
-}
-
-/*********************************************************************//**
-Print a message to the deadlock file and possibly to stderr. */
-UNIV_INLINE
-void
-lock_deadlock_fputs(
-/*================*/
- const char* msg) /*!< in: message to print */
-{
- if (!srv_read_only_mode) {
- fputs(msg, lock_latest_err_file);
-
- if (srv_print_all_deadlocks) {
- fputs(msg, stderr);
- }
- }
-}
-
-/*********************************************************************//**
-Print transaction data to the deadlock file and possibly to stderr. */
-UNIV_INLINE
-void
-lock_deadlock_trx_print(
-/*====================*/
- const trx_t* trx, /*!< in: transaction */
- ulint max_query_len) /*!< in: max query length to print,
- or 0 to use the default max length */
-{
- ut_ad(lock_mutex_own());
- ut_ad(!srv_read_only_mode);
-
- ulint n_rec_locks = lock_number_of_rows_locked(&trx->lock);
- ulint n_trx_locks = UT_LIST_GET_LEN(trx->lock.trx_locks);
- ulint heap_size = mem_heap_get_size(trx->lock.lock_heap);
-
- mutex_enter(&trx_sys->mutex);
-
- trx_print_low(lock_latest_err_file, trx, max_query_len,
- n_rec_locks, n_trx_locks, heap_size);
-
- if (srv_print_all_deadlocks) {
- trx_print_low(stderr, trx, max_query_len,
- n_rec_locks, n_trx_locks, heap_size);
- }
-
- mutex_exit(&trx_sys->mutex);
-}
-
-/** Print lock data to the deadlock file and possibly to stderr.
-@param[in] lock record or table lock
-@param[in] now current time */
-static void lock_deadlock_lock_print(const lock_t* lock, time_t now)
-{
- ut_ad(lock_mutex_own());
- ut_ad(!srv_read_only_mode);
-
- if (lock_get_type_low(lock) == LOCK_REC) {
- mtr_t mtr;
- lock_rec_print(lock_latest_err_file, lock, now, &mtr);
-
- if (srv_print_all_deadlocks) {
- lock_rec_print(stderr, lock, now, &mtr);
- }
- } else {
- lock_table_print(lock_latest_err_file, lock, now);
-
- if (srv_print_all_deadlocks) {
- lock_table_print(stderr, lock, now);
- }
- }
-}
-
-/** Used in deadlock tracking. Protected by lock_sys->mutex. */
-static ib_uint64_t lock_mark_counter = 0;
-
-/** Check if the search is too deep. */
-#define lock_deadlock_too_deep(c) \
- (c->depth > LOCK_MAX_DEPTH_IN_DEADLOCK_CHECK \
- || c->cost > LOCK_MAX_N_STEPS_IN_DEADLOCK_CHECK)
-
-/********************************************************************//**
-Get the next lock in the queue that is owned by a transaction whose
-sub-tree has not already been searched.
-@return next lock or NULL if at end of queue */
-static
-const lock_t*
-lock_get_next_lock(
-/*===============*/
- const lock_deadlock_ctx_t*
- ctx, /*!< in: deadlock context */
- const lock_t* lock, /*!< in: lock in the queue */
- ulint heap_no)/*!< in: heap no if rec lock else
- ULINT_UNDEFINED */
-{
- ut_ad(lock_mutex_own());
-
- do {
- if (lock_get_type_low(lock) == LOCK_REC) {
- ut_ad(heap_no != ULINT_UNDEFINED);
- lock = lock_rec_get_next_const(heap_no, lock);
- } else {
- ut_ad(heap_no == ULINT_UNDEFINED);
- ut_ad(lock_get_type_low(lock) == LOCK_TABLE);
-
- lock = UT_LIST_GET_NEXT(un_member.tab_lock.locks, lock);
- }
- } while (lock != NULL
- && lock->trx->lock.deadlock_mark > ctx->mark_start);
-
- ut_ad(lock == NULL
- || lock_get_type_low(lock) == lock_get_type_low(ctx->wait_lock));
-
- return(lock);
-}
-
-/********************************************************************//**
-Get the first lock to search. The search starts from the current
-wait_lock. What we are really interested in is an edge from the
-current wait_lock's owning transaction to another transaction that has
-a lock ahead in the queue. We skip locks where the owning transaction's
-sub-tree has already been searched.
-@return first lock or NULL */
-static
-const lock_t*
-lock_get_first_lock(
-/*================*/
- const lock_deadlock_ctx_t*
- ctx, /*!< in: deadlock context */
- ulint* heap_no)/*!< out: heap no if rec lock,
- else ULINT_UNDEFINED */
-{
- const lock_t* lock;
-
- ut_ad(lock_mutex_own());
-
- lock = ctx->wait_lock;
-
- if (lock_get_type_low(lock) == LOCK_REC) {
-
- *heap_no = lock_rec_find_set_bit(lock);
- ut_ad(*heap_no != ULINT_UNDEFINED);
-
- lock = lock_rec_get_first_on_page_addr(
- lock->un_member.rec_lock.space,
- lock->un_member.rec_lock.page_no);
-
- /* Position on the first lock on the physical record. */
- if (!lock_rec_get_nth_bit(lock, *heap_no)) {
- lock = lock_rec_get_next_const(*heap_no, lock);
- }
-
- } else {
- *heap_no = ULINT_UNDEFINED;
- ut_ad(lock_get_type_low(lock) == LOCK_TABLE);
- dict_table_t* table = lock->un_member.tab_lock.table;
- lock = UT_LIST_GET_FIRST(table->locks);
- }
-
- ut_a(lock != NULL);
- ut_a(lock != ctx->wait_lock ||
- innodb_lock_schedule_algorithm == INNODB_LOCK_SCHEDULE_ALGORITHM_VATS);
- ut_ad(lock_get_type_low(lock) == lock_get_type_low(ctx->wait_lock));
-
- return(lock);
-}
-
-/********************************************************************//**
-Notify that a deadlock has been detected and print the conflicting
-transaction info. */
-static
-void
-lock_deadlock_notify(
-/*=================*/
- const lock_deadlock_ctx_t* ctx, /*!< in: deadlock context */
- const lock_t* lock) /*!< in: lock causing
- deadlock */
-{
- ut_ad(lock_mutex_own());
- ut_ad(!srv_read_only_mode);
-
- const time_t now = time(NULL);
-
- lock_deadlock_start_print();
-
- lock_deadlock_fputs("\n*** (1) TRANSACTION:\n");
-
- lock_deadlock_trx_print(ctx->wait_lock->trx, 3000);
-
- lock_deadlock_fputs("*** (1) WAITING FOR THIS LOCK TO BE GRANTED:\n");
-
- lock_deadlock_lock_print(ctx->wait_lock, now);
-
- lock_deadlock_fputs("*** (2) TRANSACTION:\n");
-
- lock_deadlock_trx_print(lock->trx, 3000);
-
- lock_deadlock_fputs("*** (2) HOLDS THE LOCK(S):\n");
-
- lock_deadlock_lock_print(lock, now);
-
- /* It is possible that the joining transaction was granted its
- lock when we rolled back some other waiting transaction. */
-
- if (ctx->start->lock.wait_lock != 0) {
- lock_deadlock_fputs(
- "*** (2) WAITING FOR THIS LOCK TO BE GRANTED:\n");
-
- lock_deadlock_lock_print(ctx->start->lock.wait_lock, now);
- }
-
-#ifdef UNIV_DEBUG
- if (lock_print_waits) {
- fputs("Deadlock detected\n", stderr);
- }
-#endif /* UNIV_DEBUG */
-}
-
-/********************************************************************//**
-Select the victim transaction that should be rolledback.
-@return victim transaction */
-static
-const trx_t*
-lock_deadlock_select_victim(
-/*========================*/
- const lock_deadlock_ctx_t* ctx) /*!< in: deadlock context */
-{
- ut_ad(lock_mutex_own());
- ut_ad(ctx->start->lock.wait_lock != 0);
- ut_ad(ctx->wait_lock->trx != ctx->start);
-
- if (trx_weight_ge(ctx->wait_lock->trx, ctx->start)) {
- /* The joining transaction is 'smaller',
- choose it as the victim and roll it back. */
-
-#ifdef WITH_WSREP
- if (wsrep_thd_is_BF(ctx->start->mysql_thd, TRUE)) {
- return(ctx->wait_lock->trx);
- }
- else
-#endif /* WITH_WSREP */
- return(ctx->start);
- }
-
-#ifdef WITH_WSREP
- if (wsrep_thd_is_BF(ctx->wait_lock->trx->mysql_thd, TRUE)) {
- return(ctx->start);
- }
- else
-#endif /* WITH_WSREP */
- return(ctx->wait_lock->trx);
-}
-
-/********************************************************************//**
-Pop the deadlock search state from the stack.
-@return stack slot instance that was on top of the stack. */
-static
-const lock_stack_t*
-lock_deadlock_pop(
-/*==============*/
- lock_deadlock_ctx_t* ctx) /*!< in/out: context */
-{
- ut_ad(lock_mutex_own());
-
- ut_ad(ctx->depth > 0);
-
- return(&lock_stack[--ctx->depth]);
-}
-
-/********************************************************************//**
-Push the deadlock search state onto the stack.
-@return slot that was used in the stack */
-static
-lock_stack_t*
-lock_deadlock_push(
-/*===============*/
- lock_deadlock_ctx_t* ctx, /*!< in/out: context */
- const lock_t* lock, /*!< in: current lock */
- ulint heap_no) /*!< in: heap number */
-{
- ut_ad(lock_mutex_own());
-
- /* Save current search state. */
-
- if (LOCK_STACK_SIZE > ctx->depth) {
- lock_stack_t* stack;
-
- stack = &lock_stack[ctx->depth++];
-
- stack->lock = lock;
- stack->heap_no = heap_no;
- stack->wait_lock = ctx->wait_lock;
-
- return(stack);
- }
-
- return(NULL);
-}
-
-/********************************************************************//**
-Looks iteratively for a deadlock. Note: the joining transaction may
-have been granted its lock by the deadlock checks.
-@return 0 if no deadlock else the victim transaction id.*/
-static
-trx_id_t
-lock_deadlock_search(
-/*=================*/
- lock_deadlock_ctx_t* ctx, /*!< in/out: deadlock context */
- struct thd_wait_reports*waitee_ptr) /*!< in/out: list of waitees */
-{
- const lock_t* lock;
- ulint heap_no;
-
- ut_ad(lock_mutex_own());
- ut_ad(!trx_mutex_own(ctx->start));
-
- ut_ad(ctx->start != NULL);
- ut_ad(ctx->wait_lock != NULL);
- assert_trx_in_list(ctx->wait_lock->trx);
- ut_ad(ctx->mark_start <= lock_mark_counter);
-
- /* Look at the locks ahead of wait_lock in the lock queue. */
- lock = lock_get_first_lock(ctx, &heap_no);
-
- for (;;) {
-
- /* We should never visit the same sub-tree more than once. */
- ut_ad(lock == NULL
- || lock->trx->lock.deadlock_mark <= ctx->mark_start);
-
- while (ctx->depth > 0 && lock == NULL) {
- const lock_stack_t* stack;
-
- /* Restore previous search state. */
-
- stack = lock_deadlock_pop(ctx);
-
- lock = stack->lock;
- heap_no = stack->heap_no;
- ctx->wait_lock = stack->wait_lock;
-
- lock = lock_get_next_lock(ctx, lock, heap_no);
- }
-
- if (lock == NULL) {
- break;
- } else if (lock == ctx->wait_lock) {
-
- /* We can mark this subtree as searched */
- ut_ad(lock->trx->lock.deadlock_mark <= ctx->mark_start);
-
- lock->trx->lock.deadlock_mark = ++lock_mark_counter;
-
- /* We are not prepared for an overflow. This 64-bit
- counter should never wrap around. At 10^9 increments
- per second, it would take 10^3 years of uptime. */
-
- ut_ad(lock_mark_counter > 0);
-
- lock = NULL;
-
- } else if (!lock_has_to_wait(ctx->wait_lock, lock)) {
-
- /* No conflict, next lock */
- lock = lock_get_next_lock(ctx, lock, heap_no);
-
- } else if (lock->trx == ctx->start) {
-
- /* Found a cycle. */
-
- lock_deadlock_notify(ctx, lock);
-
- return(lock_deadlock_select_victim(ctx)->id);
-
- } else if (lock_deadlock_too_deep(ctx)) {
-
- /* Search too deep to continue. */
-
- ctx->too_deep = TRUE;
-
-#ifdef WITH_WSREP
- if (wsrep_thd_is_BF(ctx->start->mysql_thd, TRUE)) {
- return(ctx->wait_lock->trx->id);
- }
- else
-#endif /* WITH_WSREP */
- /* Select the joining transaction as the victim. */
- return(ctx->start->id);
-
- } else {
- /* We do not need to report autoinc locks to the upper
- layer. These locks are released before commit, so they
- can not cause deadlocks with binlog-fixed commit
- order. */
- if (waitee_ptr &&
- (lock_get_type_low(lock) != LOCK_TABLE ||
- lock_get_mode(lock) != LOCK_AUTO_INC)) {
- if (waitee_ptr->used ==
- sizeof(waitee_ptr->waitees) /
- sizeof(waitee_ptr->waitees[0])) {
- waitee_ptr->next =
- (struct thd_wait_reports *)
- mem_alloc(sizeof(*waitee_ptr));
- waitee_ptr = waitee_ptr->next;
- if (!waitee_ptr) {
- ctx->too_deep = TRUE;
- return(ctx->start->id);
- }
- waitee_ptr->next = NULL;
- waitee_ptr->used = 0;
- }
- waitee_ptr->waitees[waitee_ptr->used++] = lock->trx;
- }
-
- if (lock->trx->lock.que_state == TRX_QUE_LOCK_WAIT) {
-
- /* Another trx ahead has requested a lock in an
- incompatible mode, and is itself waiting for a lock. */
-
- ++ctx->cost;
-
- /* Save current search state. */
- if (!lock_deadlock_push(ctx, lock, heap_no)) {
-
- /* Unable to save current search state, stack
- size not big enough. */
-
- ctx->too_deep = TRUE;
-#ifdef WITH_WSREP
- if (wsrep_thd_is_BF(ctx->start->mysql_thd, TRUE))
- return(lock->trx->id);
- else
-#endif /* WITH_WSREP */
-
- return(ctx->start->id);
- }
-
- ctx->wait_lock = lock->trx->lock.wait_lock;
- lock = lock_get_first_lock(ctx, &heap_no);
-
- if (lock->trx->lock.deadlock_mark > ctx->mark_start) {
- lock = lock_get_next_lock(ctx, lock, heap_no);
- }
-
- } else {
- lock = lock_get_next_lock(ctx, lock, heap_no);
- }
- }
- }
-
- ut_a(lock == NULL && ctx->depth == 0);
-
- /* No deadlock found. */
- return(0);
-}
-
-/** Print info about transaction that was rolled back.
-@param[in] trx victim transaction
-@param[in] lock the requested lock
-@param[in] now current time */
-static void lock_deadlock_joining_trx_print(const trx_t* trx,
- const lock_t* lock, time_t now)
-{
- ut_ad(lock_mutex_own());
- ut_ad(!srv_read_only_mode);
-
- /* If the lock search exceeds the max step
- or the max depth, the current trx will be
- the victim. Print its information. */
- lock_deadlock_start_print();
-
- lock_deadlock_fputs(
- "TOO DEEP OR LONG SEARCH IN THE LOCK TABLE"
- " WAITS-FOR GRAPH, WE WILL ROLL BACK"
- " FOLLOWING TRANSACTION \n\n"
- "*** TRANSACTION:\n");
-
- lock_deadlock_trx_print(trx, 3000);
-
- lock_deadlock_fputs("*** WAITING FOR THIS LOCK TO BE GRANTED:\n");
-
- lock_deadlock_lock_print(lock, now);
-}
-
-/********************************************************************//**
-Rollback transaction selected as the victim. */
-static
-void
-lock_deadlock_trx_rollback(
-/*=======================*/
- lock_deadlock_ctx_t* ctx) /*!< in: deadlock context */
-{
- trx_t* trx;
-
- ut_ad(lock_mutex_own());
-
- trx = ctx->wait_lock->trx;
-
- lock_deadlock_fputs("*** WE ROLL BACK TRANSACTION (1)\n");
-
- trx_mutex_enter(trx);
-
- trx->lock.was_chosen_as_deadlock_victim = TRUE;
-
- lock_cancel_waiting_and_release(trx->lock.wait_lock);
-
- trx_mutex_exit(trx);
-}
-
-static
-void
-lock_report_waiters_to_mysql(
-/*=======================*/
- struct thd_wait_reports* waitee_buf_ptr, /*!< in: set of trxs */
- THD* mysql_thd, /*!< in: THD */
- trx_id_t victim_trx_id) /*!< in: Trx selected
- as deadlock victim, if
- any */
-{
- struct thd_wait_reports* p;
- struct thd_wait_reports* q;
- ulint i;
-
- p = waitee_buf_ptr;
- while (p) {
- i = 0;
- while (i < p->used) {
- trx_t *w_trx = p->waitees[i];
- /* There is no need to report waits to a trx already
- selected as a victim. */
- if (w_trx->id != victim_trx_id) {
- /* If thd_report_wait_for() decides to kill the
- transaction, then we will get a call back into
- innobase_kill_query. We mark this by setting
- current_lock_mutex_owner, so we can avoid trying
- to recursively take lock_sys->mutex. */
- w_trx->abort_type = TRX_REPLICATION_ABORT;
- thd_report_wait_for(mysql_thd, w_trx->mysql_thd);
- w_trx->abort_type = TRX_SERVER_ABORT;
- }
- ++i;
- }
- q = p->next;
- if (p != waitee_buf_ptr) {
- mem_free(p);
- }
- p = q;
- }
-}
-
-
-/** Check if a joining lock request results in a deadlock. If a deadlock is
-found this function will resolve the dadlock by choosing a victim transaction
-and rolling it back. It will attempt to resolve all deadlocks.
-@param[in] trx joining transaction
-@param[in] lock the requested lock
-@param[in] now current time
-@return trx->id of the victim transaction
-@retval 0 if some other transaction was chosen as a victim and
-rolled back, or no deadlock was found. */
-static trx_id_t lock_deadlock_check_and_resolve(const trx_t* trx,
- const lock_t* lock, time_t now)
-{
- trx_id_t victim_trx_id;
- struct thd_wait_reports waitee_buf;
- struct thd_wait_reports*waitee_buf_ptr;
- THD* start_mysql_thd;
-
- ut_ad(trx != NULL);
- ut_ad(lock != NULL);
- ut_ad(lock_mutex_own());
- assert_trx_in_list(trx);
-
- start_mysql_thd = trx->mysql_thd;
- if (start_mysql_thd && thd_need_wait_for(start_mysql_thd)) {
- waitee_buf_ptr = &waitee_buf;
- } else {
- waitee_buf_ptr = NULL;
- }
-
- /* Try and resolve as many deadlocks as possible. */
- do {
- lock_deadlock_ctx_t ctx;
-
- /* Reset the context. */
- ctx.cost = 0;
- ctx.depth = 0;
- ctx.start = trx;
- ctx.too_deep = FALSE;
- ctx.wait_lock = lock;
- ctx.mark_start = lock_mark_counter;
-
- if (waitee_buf_ptr) {
- waitee_buf_ptr->next = NULL;
- waitee_buf_ptr->used = 0;
- }
-
- victim_trx_id = lock_deadlock_search(&ctx, waitee_buf_ptr);
-
- /* Report waits to upper layer, as needed. */
- if (waitee_buf_ptr) {
- lock_report_waiters_to_mysql(waitee_buf_ptr,
- start_mysql_thd,
- victim_trx_id);
- }
-
- /* Search too deep, we rollback the joining transaction. */
- if (ctx.too_deep) {
-
- ut_a(trx == ctx.start);
- ut_a(victim_trx_id == trx->id);
-
-#ifdef WITH_WSREP
- if (!wsrep_thd_is_BF(ctx.start->mysql_thd, TRUE)); else
-#endif /* WITH_WSREP */
- if (!srv_read_only_mode) {
- lock_deadlock_joining_trx_print(trx, lock,
- now);
- }
-
- MONITOR_INC(MONITOR_DEADLOCK);
-
- } else if (victim_trx_id != 0 && victim_trx_id != trx->id) {
-
- ut_ad(victim_trx_id == ctx.wait_lock->trx->id);
- lock_deadlock_trx_rollback(&ctx);
-
- lock_deadlock_found = TRUE;
-
- MONITOR_INC(MONITOR_DEADLOCK);
- }
-
- } while (victim_trx_id != 0 && victim_trx_id != trx->id);
-
- /* If the joining transaction was selected as the victim. */
- if (victim_trx_id != 0) {
- ut_a(victim_trx_id == trx->id);
-
- lock_deadlock_fputs("*** WE ROLL BACK TRANSACTION (2)\n");
+/*========================= TABLE LOCKS ==============================*/
- lock_deadlock_found = TRUE;
+/** Functor for accessing the embedded node within a table lock. */
+struct TableLockGetNode {
+ ut_list_node<lock_t>& operator() (lock_t& elem)
+ {
+ return(elem.un_member.tab_lock.locks);
}
-
- return(victim_trx_id);
-}
-
-/*========================= TABLE LOCKS ==============================*/
+};
/*********************************************************************//**
Creates a table lock object and adds it as the last in the lock queue
of the table. Does NOT check for deadlocks or lock compatibility.
-@return own: new lock object */
+@return own: new lock object */
UNIV_INLINE
lock_t*
lock_table_create(
/*==============*/
-#ifdef WITH_WSREP
- lock_t* c_lock, /*!< in: conflicting lock */
-#endif
dict_table_t* table, /*!< in/out: database table
in dictionary cache */
ulint type_mode,/*!< in: lock mode possibly ORed with
LOCK_WAIT */
- trx_t* trx) /*!< in: trx */
+ trx_t* trx /*!< in: trx */
+#ifdef WITH_WSREP
+ , lock_t* c_lock = NULL /*!< in: conflicting lock */
+#endif
+ )
{
- lock_t* lock;
+ lock_t* lock;
ut_ad(table && trx);
ut_ad(lock_mutex_own());
ut_ad(trx_mutex_own(trx));
- /* Non-locking autocommit read-only transactions should not set
- any locks. */
- assert_trx_in_list(trx);
+ check_trx_state(trx);
if ((type_mode & LOCK_MODE_MASK) == LOCK_AUTO_INC) {
++table->n_waiting_or_granted_auto_inc_locks;
@@ -4955,65 +3632,78 @@ lock_table_create(
table->autoinc_trx = trx;
ib_vector_push(trx->autoinc_locks, &lock);
+
+ } else if (trx->lock.table_cached
+ < UT_ARR_SIZE(trx->lock.table_pool)) {
+ lock = &trx->lock.table_pool[trx->lock.table_cached++];
} else {
+
lock = static_cast<lock_t*>(
mem_heap_alloc(trx->lock.lock_heap, sizeof(*lock)));
+
}
- lock->type_mode = type_mode | LOCK_TABLE;
+ lock->type_mode = ib_uint32_t(type_mode | LOCK_TABLE);
lock->trx = trx;
- lock->requested_time = time(NULL);
- lock->wait_time = 0;
lock->un_member.tab_lock.table = table;
- ut_ad(table->n_ref_count > 0 || !table->can_be_evicted);
+ ut_ad(table->get_ref_count() > 0 || !table->can_be_evicted);
- UT_LIST_ADD_LAST(trx_locks, trx->lock.trx_locks, lock);
+ UT_LIST_ADD_LAST(trx->lock.trx_locks, lock);
#ifdef WITH_WSREP
if (c_lock && trx->is_wsrep()) {
- if (wsrep_thd_is_wsrep(trx->mysql_thd)
- && wsrep_thd_is_BF(trx->mysql_thd, FALSE)) {
- UT_LIST_INSERT_AFTER(
- un_member.tab_lock.locks, table->locks, c_lock, lock);
+ if (wsrep_thd_is_BF(trx->mysql_thd, FALSE)) {
+ ut_list_insert(table->locks, c_lock, lock,
+ TableLockGetNode());
+ if (wsrep_debug) {
+ ib::info() << "table lock BF conflict for "
+ << ib::hex(c_lock->trx->id)
+ << " SQL: "
+ << wsrep_thd_query(
+ c_lock->trx->mysql_thd);
+ }
} else {
- UT_LIST_ADD_LAST(un_member.tab_lock.locks, table->locks, lock);
+ ut_list_append(table->locks, lock, TableLockGetNode());
}
trx_mutex_enter(c_lock->trx);
if (c_lock->trx->lock.que_state == TRX_QUE_LOCK_WAIT) {
-
c_lock->trx->lock.was_chosen_as_deadlock_victim = TRUE;
if (wsrep_debug) {
wsrep_print_wait_locks(c_lock);
- wsrep_print_wait_locks(c_lock->trx->lock.wait_lock);
}
/* The lock release will call lock_grant(),
which would acquire trx->mutex again. */
trx_mutex_exit(trx);
- lock_cancel_waiting_and_release(c_lock->trx->lock.wait_lock);
+ lock_cancel_waiting_and_release(
+ c_lock->trx->lock.wait_lock);
trx_mutex_enter(trx);
if (wsrep_debug) {
- fprintf(stderr, "WSREP: c_lock canceled " TRX_ID_FMT "\n",
- c_lock->trx->id);
+ ib::info() << "WSREP: c_lock canceled "
+ << ib::hex(c_lock->trx->id)
+ << " SQL: "
+ << wsrep_thd_query(
+ c_lock->trx->mysql_thd);
}
}
+
trx_mutex_exit(c_lock->trx);
} else
#endif /* WITH_WSREP */
- UT_LIST_ADD_LAST(un_member.tab_lock.locks, table->locks, lock);
+ ut_list_append(table->locks, lock, TableLockGetNode());
- if (UNIV_UNLIKELY(type_mode & LOCK_WAIT)) {
+ if (type_mode & LOCK_WAIT) {
lock_set_lock_and_trx_wait(lock, trx);
}
- ib_vector_push(lock->trx->lock.table_locks, &lock);
+ lock->trx->lock.table_locks.push_back(lock);
MONITOR_INC(MONITOR_TABLELOCK_CREATED);
MONITOR_INC(MONITOR_NUM_TABLELOCK);
@@ -5086,7 +3776,7 @@ lock_table_remove_autoinc_lock(
autoinc_lock = *static_cast<lock_t**>(
ib_vector_get(trx->autoinc_locks, i));
- if (UNIV_LIKELY(autoinc_lock == lock)) {
+ if (autoinc_lock == lock) {
void* null_var = NULL;
ib_vector_set(trx->autoinc_locks, i, &null_var);
return;
@@ -5145,8 +3835,8 @@ lock_table_remove_low(
table->n_waiting_or_granted_auto_inc_locks--;
}
- UT_LIST_REMOVE(trx_locks, trx->lock.trx_locks, lock);
- UT_LIST_REMOVE(un_member.tab_lock.locks, table->locks, lock);
+ UT_LIST_REMOVE(trx->lock.trx_locks, lock);
+ ut_list_remove(table->locks, lock, TableLockGetNode());
MONITOR_INC(MONITOR_TABLELOCK_REMOVED);
MONITOR_DEC(MONITOR_NUM_TABLELOCK);
@@ -5155,89 +3845,61 @@ lock_table_remove_low(
/*********************************************************************//**
Enqueues a waiting request for a table lock which cannot be granted
immediately. Checks for deadlocks.
-@return DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED, or
-DB_SUCCESS; DB_SUCCESS means that there was a deadlock, but another
-transaction was chosen as a victim, and we got the lock immediately:
-no need to wait then */
+@retval DB_LOCK_WAIT if the waiting lock was enqueued
+@retval DB_DEADLOCK if this transaction was chosen as the victim
+@retval DB_SUCCESS if the other transaction committed or aborted */
static
dberr_t
lock_table_enqueue_waiting(
/*=======================*/
-#ifdef WITH_WSREP
- lock_t* c_lock, /*!< in: conflicting lock */
-#endif
ulint mode, /*!< in: lock mode this transaction is
requesting */
dict_table_t* table, /*!< in/out: table */
- que_thr_t* thr) /*!< in: query thread */
+ que_thr_t* thr /*!< in: query thread */
+#ifdef WITH_WSREP
+ , lock_t* c_lock /*!< in: conflicting lock or NULL */
+#endif
+)
{
trx_t* trx;
lock_t* lock;
- trx_id_t victim_trx_id;
ut_ad(lock_mutex_own());
ut_ad(!srv_read_only_mode);
trx = thr_get_trx(thr);
ut_ad(trx_mutex_own(trx));
-
- /* Test if there already is some other reason to suspend thread:
- we do not enqueue a lock request if the query thread should be
- stopped anyway */
-
- if (que_thr_stop(thr)) {
- ut_error;
-
- return(DB_QUE_THR_SUSPENDED);
- }
+ ut_a(!que_thr_stop(thr));
switch (trx_get_dict_operation(trx)) {
case TRX_DICT_OP_NONE:
break;
case TRX_DICT_OP_TABLE:
case TRX_DICT_OP_INDEX:
- ut_print_timestamp(stderr);
- fputs(" InnoDB: Error: a table lock wait happens"
- " in a dictionary operation!\n"
- "InnoDB: Table name ", stderr);
- ut_print_name(stderr, trx, TRUE, table->name);
- fputs(".\n"
- "InnoDB: Submit a detailed bug report"
- " to https://jira.mariadb.org/\n",
- stderr);
+ ib::error() << "A table lock wait happens in a dictionary"
+ " operation. Table " << table->name
+ << ". " << BUG_REPORT_MSG;
ut_ad(0);
}
- /* Enqueue the lock request that will wait to be granted */
-
#ifdef WITH_WSREP
- if (trx->lock.was_chosen_as_deadlock_victim && trx->is_wsrep()) {
+ if (trx->is_wsrep() && trx->lock.was_chosen_as_deadlock_victim) {
return(DB_DEADLOCK);
}
-
- lock = lock_table_create(c_lock, table, mode | LOCK_WAIT, trx);
-#else
- lock = lock_table_create(table, mode | LOCK_WAIT, trx);
#endif /* WITH_WSREP */
- /* Release the mutex to obey the latching order.
- This is safe, because lock_deadlock_check_and_resolve()
- is invoked when a lock wait is enqueued for the currently
- running transaction. Because trx is a running transaction
- (it is not currently suspended because of a lock wait),
- its state can only be changed by this thread, which is
- currently associated with the transaction. */
-
- trx_mutex_exit(trx);
-
- const time_t now = time(NULL);
-
- victim_trx_id = lock_deadlock_check_and_resolve(trx, lock, now);
+ /* Enqueue the lock request that will wait to be granted */
+ lock = lock_table_create(table, mode | LOCK_WAIT, trx
+#ifdef WITH_WSREP
+ , c_lock
+#endif
+ );
- trx_mutex_enter(trx);
+ const trx_t* victim_trx =
+ DeadlockChecker::check_and_resolve(lock, trx);
- if (victim_trx_id != 0) {
- ut_ad(victim_trx_id == trx->id);
+ if (victim_trx != 0) {
+ ut_ad(victim_trx == trx);
/* The order here is important, we don't want to
lose the state of the lock before calling remove. */
@@ -5245,6 +3907,7 @@ lock_table_enqueue_waiting(
lock_reset_lock_and_trx_wait(lock);
return(DB_DEADLOCK);
+
} else if (trx->lock.wait_lock == NULL) {
/* Deadlock resolution chose another transaction as a victim,
and we accidentally got our lock granted! */
@@ -5254,9 +3917,8 @@ lock_table_enqueue_waiting(
trx->lock.que_state = TRX_QUE_LOCK_WAIT;
- trx->lock.wait_started = now;
- trx->lock.was_chosen_as_deadlock_victim = FALSE;
- trx->n_table_lock_waits++;
+ trx->lock.wait_started = time(NULL);
+ trx->lock.was_chosen_as_deadlock_victim = false;
ut_a(que_thr_stop(thr));
@@ -5268,7 +3930,7 @@ lock_table_enqueue_waiting(
/*********************************************************************//**
Checks if other transactions have an incompatible mode lock request in
the lock queue.
-@return lock or NULL */
+@return lock or NULL */
UNIV_INLINE
lock_t*
lock_table_other_has_incompatible(
@@ -5279,7 +3941,7 @@ lock_table_other_has_incompatible(
waiting locks are taken into
account, or 0 if not */
const dict_table_t* table, /*!< in: table */
- enum lock_mode mode) /*!< in: lock mode */
+ lock_mode mode) /*!< in: lock mode */
{
lock_t* lock;
@@ -5294,18 +3956,18 @@ lock_table_other_has_incompatible(
&& (wait || !lock_get_wait(lock))) {
#ifdef WITH_WSREP
- if(wsrep_thd_is_wsrep(trx->mysql_thd)) {
+ if (lock->trx->is_wsrep()) {
if (wsrep_debug) {
- fprintf(stderr, "WSREP: trx "
- TRX_ID_FMT
- " table lock abort\n",
- trx->id);
+ ib::info() << "WSREP: table lock abort for table:"
+ << table->name.m_name;
+ ib::info() << " SQL: "
+ << wsrep_thd_query(lock->trx->mysql_thd);
}
trx_mutex_enter(lock->trx);
wsrep_kill_victim((trx_t *)trx, (lock_t *)lock);
trx_mutex_exit(lock->trx);
}
-#endif
+#endif /* WITH_WSREP */
return(lock);
}
@@ -5317,8 +3979,7 @@ lock_table_other_has_incompatible(
/*********************************************************************//**
Locks the specified database table in the mode given. If the lock cannot
be granted immediately, the query thread is put to wait.
-@return DB_SUCCESS, DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
-UNIV_INTERN
+@return DB_SUCCESS, DB_LOCK_WAIT, or DB_DEADLOCK */
dberr_t
lock_table(
/*=======*/
@@ -5326,20 +3987,20 @@ lock_table(
does nothing */
dict_table_t* table, /*!< in/out: database table
in dictionary cache */
- enum lock_mode mode, /*!< in: lock mode */
+ lock_mode mode, /*!< in: lock mode */
que_thr_t* thr) /*!< in: query thread */
{
-#ifdef WITH_WSREP
- lock_t *c_lock = NULL;
-#endif
trx_t* trx;
dberr_t err;
lock_t* wait_for;
- ut_ad(table != NULL);
- ut_ad(thr != NULL);
+ ut_ad(table && thr);
- if (flags & BTR_NO_LOCKING_FLAG) {
+ /* Given limited visibility of temp-table we can avoid
+ locking overhead */
+ if ((flags & BTR_NO_LOCKING_FLAG)
+ || srv_read_only_mode
+ || dict_table_is_temporary(table)) {
return(DB_SUCCESS);
}
@@ -5358,21 +4019,28 @@ lock_table(
return(DB_SUCCESS);
}
+ /* Read only transactions can write to temp tables, we don't want
+ to promote them to RW transactions. Their updates cannot be visible
+ to other transactions. Therefore we can keep them out
+ of the read views. */
+
+ if ((mode == LOCK_IX || mode == LOCK_X)
+ && !trx->read_only
+ && trx->rsegs.m_redo.rseg == 0) {
+
+ trx_set_rw_mode(trx);
+ }
+
lock_mutex_enter();
DBUG_EXECUTE_IF("fatal-semaphore-timeout",
- { os_thread_sleep(3600000000U); });
+ { os_thread_sleep(3600000000LL); });
/* We have to check if the new lock is compatible with any locks
other transactions have in the table lock queue. */
-#ifdef WITH_WSREP
- wait_for = lock_table_other_has_incompatible(
- trx, LOCK_WAIT, table, mode);
-#else
wait_for = lock_table_other_has_incompatible(
trx, LOCK_WAIT, table, mode);
-#endif
trx_mutex_enter(trx);
@@ -5380,17 +4048,13 @@ lock_table(
mode: this trx may have to wait */
if (wait_for != NULL) {
+ err = lock_table_enqueue_waiting(mode | flags, table, thr
#ifdef WITH_WSREP
- err = lock_table_enqueue_waiting(wait_for, mode | flags, table, thr);
-#else
- err = lock_table_enqueue_waiting(mode | flags, table, thr);
+ , wait_for
#endif
+ );
} else {
-#ifdef WITH_WSREP
- lock_table_create(c_lock, table, mode | flags, trx);
-#else
lock_table_create(table, mode | flags, trx);
-#endif
ut_a(!flags || mode == LOCK_S || mode == LOCK_X);
@@ -5406,7 +4070,6 @@ lock_table(
/*********************************************************************//**
Creates a table IX lock object for a resurrected transaction. */
-UNIV_INTERN
void
lock_table_ix_resurrect(
/*====================*/
@@ -5428,20 +4091,16 @@ lock_table_ix_resurrect(
trx, LOCK_WAIT, table, LOCK_IX));
trx_mutex_enter(trx);
-#ifdef WITH_WSREP
- lock_table_create(NULL, table, LOCK_IX, trx);
-#else
lock_table_create(table, LOCK_IX, trx);
-#endif
lock_mutex_exit();
trx_mutex_exit(trx);
}
/*********************************************************************//**
Checks if a waiting table lock request still has to wait in a queue.
-@return TRUE if still has to wait */
+@return TRUE if still has to wait */
static
-ibool
+bool
lock_table_has_to_wait_in_queue(
/*============================*/
const lock_t* wait_lock) /*!< in: waiting table lock */
@@ -5460,11 +4119,11 @@ lock_table_has_to_wait_in_queue(
if (lock_has_to_wait(wait_lock, lock)) {
- return(TRUE);
+ return(true);
}
}
- return(FALSE);
+ return(false);
}
/*************************************************************//**
@@ -5479,12 +4138,10 @@ lock_table_dequeue(
behind will get their lock requests granted, if
they are now qualified to it */
{
- lock_t* lock;
-
ut_ad(lock_mutex_own());
ut_a(lock_get_type_low(in_lock) == LOCK_TABLE);
- lock = UT_LIST_GET_NEXT(un_member.tab_lock.locks, in_lock);
+ lock_t* lock = UT_LIST_GET_NEXT(un_member.tab_lock.locks, in_lock);
lock_table_remove_low(in_lock);
@@ -5500,15 +4157,70 @@ lock_table_dequeue(
/* Grant the lock */
ut_ad(in_lock->trx != lock->trx);
- lock_grant(lock, false);
+ lock_grant(lock);
+ }
+ }
+}
+
+/** Sets a lock on a table based on the given mode.
+@param[in] table table to lock
+@param[in,out] trx transaction
+@param[in] mode LOCK_X or LOCK_S
+@return error code or DB_SUCCESS. */
+dberr_t
+lock_table_for_trx(
+ dict_table_t* table,
+ trx_t* trx,
+ enum lock_mode mode)
+{
+ mem_heap_t* heap;
+ que_thr_t* thr;
+ dberr_t err;
+ sel_node_t* node;
+ heap = mem_heap_create(512);
+
+ node = sel_node_create(heap);
+ thr = pars_complete_graph_for_exec(node, trx, heap, NULL);
+ thr->graph->state = QUE_FORK_ACTIVE;
+
+ /* We use the select query graph as the dummy graph needed
+ in the lock module call */
+
+ thr = static_cast<que_thr_t*>(
+ que_fork_get_first_thr(
+ static_cast<que_fork_t*>(que_node_get_parent(thr))));
+
+ que_thr_move_to_run_state_for_mysql(thr, trx);
+
+run_again:
+ thr->run_node = thr;
+ thr->prev_node = thr->common.parent;
+
+ err = lock_table(0, table, mode, thr);
+
+ trx->error_state = err;
+
+ if (UNIV_LIKELY(err == DB_SUCCESS)) {
+ que_thr_stop_for_mysql_no_error(thr, trx);
+ } else {
+ que_thr_stop_for_mysql(thr);
+
+ if (row_mysql_handle_errors(&err, trx, thr, NULL)) {
+ goto run_again;
}
}
+
+ que_graph_free(thr->graph);
+ trx->op_info = "";
+
+ return(err);
}
/*=========================== LOCK RELEASE ==============================*/
static
void
lock_grant_and_move_on_rec(
+ hash_table_t* lock_hash,
lock_t* first_lock,
ulint heap_no)
{
@@ -5522,8 +4234,8 @@ lock_grant_and_move_on_rec(
page_no = first_lock->un_member.rec_lock.page_no;
rec_fold = lock_rec_fold(space, page_no);
- previous = (lock_t *) hash_get_nth_cell(lock_sys->rec_hash,
- hash_calc_hash(rec_fold, lock_sys->rec_hash))->node;
+ previous = (lock_t *) hash_get_nth_cell(lock_hash,
+ hash_calc_hash(rec_fold, lock_hash))->node;
if (previous == NULL) {
return;
}
@@ -5532,9 +4244,9 @@ lock_grant_and_move_on_rec(
} else {
while (previous->hash &&
previous->hash != first_lock) {
- previous = (lock_t *) previous->hash;
+ previous = previous->hash;
}
- lock = (lock_t *) previous->hash;
+ lock = previous->hash;
}
/* Grant locks if there are no conflicting locks ahead.
Move granted locks to the head of the list. */
@@ -5547,7 +4259,7 @@ lock_grant_and_move_on_rec(
&& lock_get_wait(lock)
&& !lock_rec_has_to_wait_in_queue(lock)) {
- lock_grant(lock, false);
+ lock_grant(lock);
if (previous != NULL) {
/* Move the lock to the head of the list. */
@@ -5570,7 +4282,6 @@ lock_grant_and_move_on_rec(
Removes a granted record lock of a transaction from the queue and grants
locks to other transactions waiting in the queue if they now are entitled
to a lock. */
-UNIV_INTERN
void
lock_rec_unlock(
/*============*/
@@ -5578,13 +4289,11 @@ lock_rec_unlock(
set a record lock */
const buf_block_t* block, /*!< in: buffer block containing rec */
const rec_t* rec, /*!< in: record */
- enum lock_mode lock_mode)/*!< in: LOCK_S or LOCK_X */
+ lock_mode lock_mode)/*!< in: LOCK_S or LOCK_X */
{
lock_t* first_lock;
lock_t* lock;
ulint heap_no;
- const char* stmt;
- size_t stmt_len;
ut_ad(trx);
ut_ad(rec);
@@ -5597,7 +4306,7 @@ lock_rec_unlock(
lock_mutex_enter();
trx_mutex_enter(trx);
- first_lock = lock_rec_get_first(block, heap_no);
+ first_lock = lock_rec_get_first(lock_sys->rec_hash, block, heap_no);
/* Find the last lock with the same lock_mode and transaction
on the record. */
@@ -5612,13 +4321,16 @@ lock_rec_unlock(
lock_mutex_exit();
trx_mutex_exit(trx);
- stmt = innobase_get_stmt(trx->mysql_thd, &stmt_len);
-
- ib_logf(IB_LOG_LEVEL_ERROR,
- "unlock row could not find a %u mode lock on the record;"
- " statement=%.*s",
- lock_mode,
- (int) stmt_len, stmt);
+ {
+ ib::error err;
+ err << "Unlock row could not find a " << lock_mode
+ << " mode lock on the record. Current statement: ";
+ size_t stmt_len;
+ if (const char* stmt = innobase_get_stmt_unsafe(
+ trx->mysql_thd, &stmt_len)) {
+ err.write(stmt, stmt_len);
+ }
+ }
return;
@@ -5639,17 +4351,58 @@ released:
/* Grant the lock */
ut_ad(trx != lock->trx);
- lock_grant(lock, false);
+ lock_grant(lock);
}
}
} else {
- lock_grant_and_move_on_rec(first_lock, heap_no);
+ lock_grant_and_move_on_rec(lock_sys->rec_hash, first_lock, heap_no);
}
lock_mutex_exit();
trx_mutex_exit(trx);
}
+#ifdef UNIV_DEBUG
+/*********************************************************************//**
+Check if a transaction that has X or IX locks has set the dict_op
+code correctly. */
+static
+void
+lock_check_dict_lock(
+/*==================*/
+ const lock_t* lock) /*!< in: lock to check */
+{
+ if (lock_get_type_low(lock) == LOCK_REC) {
+
+ /* Check if the transcation locked a record
+ in a system table in X mode. It should have set
+ the dict_op code correctly if it did. */
+ if (lock->index->table->id < DICT_HDR_FIRST_ID
+ && lock_get_mode(lock) == LOCK_X) {
+
+ ut_ad(lock_get_mode(lock) != LOCK_IX);
+ ut_ad(lock->trx->dict_operation != TRX_DICT_OP_NONE);
+ }
+ } else {
+ ut_ad(lock_get_type_low(lock) & LOCK_TABLE);
+
+ const dict_table_t* table;
+
+ table = lock->un_member.tab_lock.table;
+
+ /* Check if the transcation locked a system table
+ in IX mode. It should have set the dict_op code
+ correctly if it did. */
+ if (table->id < DICT_HDR_FIRST_ID
+ && (lock_get_mode(lock) == LOCK_X
+ || lock_get_mode(lock) == LOCK_IX)) {
+
+ ut_ad(lock->trx->dict_operation != TRX_DICT_OP_NONE);
+ }
+ }
+}
+#endif /* UNIV_DEBUG */
+
/*********************************************************************//**
Releases transaction locks, and releases possible other transactions waiting
because of these locks. */
@@ -5661,49 +4414,24 @@ lock_release(
{
lock_t* lock;
ulint count = 0;
- trx_id_t max_trx_id;
+ trx_id_t max_trx_id = trx_sys_get_max_trx_id();
ut_ad(lock_mutex_own());
ut_ad(!trx_mutex_own(trx));
- max_trx_id = trx_sys_get_max_trx_id();
-
for (lock = UT_LIST_GET_LAST(trx->lock.trx_locks);
lock != NULL;
lock = UT_LIST_GET_LAST(trx->lock.trx_locks)) {
- if (lock_get_type_low(lock) == LOCK_REC) {
+ ut_d(lock_check_dict_lock(lock));
-#ifdef UNIV_DEBUG
- /* Check if the transcation locked a record
- in a system table in X mode. It should have set
- the dict_op code correctly if it did. */
- if (lock->index->table->id < DICT_HDR_FIRST_ID
- && lock_get_mode(lock) == LOCK_X) {
-
- ut_ad(lock_get_mode(lock) != LOCK_IX);
- ut_ad(trx->dict_operation != TRX_DICT_OP_NONE);
- }
-#endif /* UNIV_DEBUG */
+ if (lock_get_type_low(lock) == LOCK_REC) {
lock_rec_dequeue_from_page(lock);
} else {
dict_table_t* table;
table = lock->un_member.tab_lock.table;
-#ifdef UNIV_DEBUG
- ut_ad(lock_get_type_low(lock) & LOCK_TABLE);
-
- /* Check if the transcation locked a system table
- in IX mode. It should have set the dict_op code
- correctly if it did. */
- if (table->id < DICT_HDR_FIRST_ID
- && (lock_get_mode(lock) == LOCK_X
- || lock_get_mode(lock) == LOCK_IX)) {
-
- ut_ad(trx->dict_operation != TRX_DICT_OP_NONE);
- }
-#endif /* UNIV_DEBUG */
if (lock_get_mode(lock) != LOCK_IS
&& trx->undo_no != 0) {
@@ -5731,18 +4459,6 @@ lock_release(
++count;
}
-
- /* We don't remove the locks one by one from the vector for
- efficiency reasons. We simply reset it because we would have
- released all the locks anyway. */
-
- ib_vector_reset(trx->lock.table_locks);
-
- ut_a(UT_LIST_GET_LEN(trx->lock.trx_locks) == 0);
- ut_a(ib_vector_is_empty(trx->autoinc_locks));
- ut_a(ib_vector_is_empty(trx->lock.table_locks));
-
- mem_heap_empty(trx->lock.lock_heap);
}
/* True if a lock mode is S or X */
@@ -5758,7 +4474,6 @@ lock_trx_table_locks_remove(
/*========================*/
const lock_t* lock_to_remove) /*!< in: lock to remove */
{
- lint i;
trx_t* trx = lock_to_remove->trx;
ut_ad(lock_mutex_own());
@@ -5770,23 +4485,16 @@ lock_trx_table_locks_remove(
ut_ad(trx_mutex_own(trx));
}
- for (i = ib_vector_size(trx->lock.table_locks) - 1; i >= 0; --i) {
- const lock_t* lock;
-
- lock = *static_cast<lock_t**>(
- ib_vector_get(trx->lock.table_locks, i));
-
- if (lock == NULL) {
- continue;
- }
+ for (lock_list::iterator it = trx->lock.table_locks.begin(),
+ end = trx->lock.table_locks.end(); it != end; ++it) {
+ const lock_t* lock = *it;
- ut_a(trx == lock->trx);
- ut_a(lock_get_type_low(lock) & LOCK_TABLE);
- ut_a(lock->un_member.tab_lock.table != NULL);
+ ut_ad(!lock || trx == lock->trx);
+ ut_ad(!lock || lock_get_type_low(lock) & LOCK_TABLE);
+ ut_ad(!lock || lock->un_member.tab_lock.table);
if (lock == lock_to_remove) {
- void* null_var = NULL;
- ib_vector_set(trx->lock.table_locks, i, &null_var);
+ *it = NULL;
if (!trx->lock.cancel) {
trx_mutex_exit(trx);
@@ -5859,21 +4567,17 @@ lock_remove_recovered_trx_record_locks(
held on records in this table or on the
table itself */
{
- trx_t* trx;
- ulint n_recovered_trx = 0;
-
ut_a(table != NULL);
ut_ad(lock_mutex_own());
+ ulint n_recovered_trx = 0;
+
mutex_enter(&trx_sys->mutex);
- for (trx = UT_LIST_GET_FIRST(trx_sys->rw_trx_list);
+ for (trx_t* trx = UT_LIST_GET_FIRST(trx_sys->rw_trx_list);
trx != NULL;
trx = UT_LIST_GET_NEXT(trx_list, trx)) {
- lock_t* lock;
- lock_t* next_lock;
-
assert_trx_in_rw_list(trx);
if (!trx->is_recovered) {
@@ -5884,7 +4588,9 @@ lock_remove_recovered_trx_record_locks(
implicit locks cannot be converted to explicit ones
while we are scanning the explicit locks. */
- for (lock = UT_LIST_GET_FIRST(trx->lock.trx_locks);
+ lock_t* next_lock;
+
+ for (lock_t* lock = UT_LIST_GET_FIRST(trx->lock.trx_locks);
lock != NULL;
lock = next_lock) {
@@ -5921,16 +4627,15 @@ lock_remove_recovered_trx_record_locks(
}
/*********************************************************************//**
-Removes locks on a table to be dropped or truncated.
+Removes locks on a table to be dropped or discarded.
If remove_also_table_sx_locks is TRUE then table-level S and X locks are
also removed in addition to other table-level and record-level locks.
No lock, that is going to be removed, is allowed to be a wait lock. */
-UNIV_INTERN
void
lock_remove_all_on_table(
/*=====================*/
dict_table_t* table, /*!< in: table to be dropped
- or truncated */
+ or discarded */
ibool remove_also_table_sx_locks)/*!< in: also removes
table S and X locks */
{
@@ -5996,29 +4701,30 @@ lock_remove_all_on_table(
lock_mutex_exit();
}
-/*===================== VALIDATION AND DEBUGGING ====================*/
+/*===================== VALIDATION AND DEBUGGING ====================*/
/** Pretty-print a table lock.
@param[in,out] file output stream
-@param[in] lock table lock
-@param[in] now current time */
-static void lock_table_print(FILE* file, const lock_t* lock, time_t now)
+@param[in] lock table lock */
+static void lock_table_print(FILE* file, const lock_t* lock)
{
ut_ad(lock_mutex_own());
ut_a(lock_get_type_low(lock) == LOCK_TABLE);
fputs("TABLE LOCK table ", file);
- ut_print_name(file, lock->trx, TRUE,
- lock->un_member.tab_lock.table->name);
- fprintf(file, " trx id " TRX_ID_FMT, lock->trx->id);
+ ut_print_name(file, lock->trx,
+ lock->un_member.tab_lock.table->name.m_name);
+ fprintf(file, " trx id " TRX_ID_FMT, trx_get_id_for_print(lock->trx));
if (lock_get_mode(lock) == LOCK_S) {
fputs(" lock mode S", file);
} else if (lock_get_mode(lock) == LOCK_X) {
+ ut_ad(lock->trx->id != 0);
fputs(" lock mode X", file);
} else if (lock_get_mode(lock) == LOCK_IS) {
fputs(" lock mode IS", file);
} else if (lock_get_mode(lock) == LOCK_IX) {
+ ut_ad(lock->trx->id != 0);
fputs(" lock mode IX", file);
} else if (lock_get_mode(lock) == LOCK_AUTO_INC) {
fputs(" lock mode AUTO-INC", file);
@@ -6031,20 +4737,14 @@ static void lock_table_print(FILE* file, const lock_t* lock, time_t now)
fputs(" waiting", file);
}
- fprintf(file, " lock hold time %lu wait time before grant %lu ",
- (ulint)difftime(now, lock->requested_time),
- lock->wait_time);
-
putc('\n', file);
}
/** Pretty-print a record lock.
@param[in,out] file output stream
@param[in] lock record lock
-@param[in] now current time
-@param[in,out] mtr mini-transaction */
-static void lock_rec_print(FILE* file, const lock_t* lock, time_t now,
- mtr_t* mtr)
+@param[in,out] mtr mini-transaction for accessing the record */
+static void lock_rec_print(FILE* file, const lock_t* lock, mtr_t& mtr)
{
ulint space;
ulint page_no;
@@ -6055,18 +4755,13 @@ static void lock_rec_print(FILE* file, const lock_t* lock, time_t now,
space = lock->un_member.rec_lock.space;
page_no = lock->un_member.rec_lock.page_no;
- fprintf(file, "RECORD LOCKS space id %lu page no %lu n bits %lu ",
+ fprintf(file, "RECORD LOCKS space id %lu page no %lu n bits %lu "
+ "index %s of table ",
(ulong) space, (ulong) page_no,
- (ulong) lock_rec_get_n_bits(lock));
-
- dict_index_name_print(file, lock->trx, lock->index);
-
- /* Print number of table locks */
- fprintf(file, " trx table locks %lu total table locks %lu ",
- ib_vector_size(lock->trx->lock.table_locks),
- UT_LIST_GET_LEN(lock->index->table->locks));
-
- fprintf(file, " trx id " TRX_ID_FMT, lock->trx->id);
+ (ulong) lock_rec_get_n_bits(lock),
+ lock->index->name());
+ ut_print_name(file, lock->trx, lock->index->table_name);
+ fprintf(file, " trx id " TRX_ID_FMT, trx_get_id_for_print(lock->trx));
if (lock_get_mode(lock) == LOCK_S) {
fputs(" lock mode S", file);
@@ -6092,23 +4787,16 @@ static void lock_rec_print(FILE* file, const lock_t* lock, time_t now,
fputs(" waiting", file);
}
- fprintf(file, " lock hold time %lu wait time before grant %lu ",
- (ulint)difftime(now, lock->requested_time),
- lock->wait_time);
-
putc('\n', file);
- if (!mtr) {
- return;
- }
-
mem_heap_t* heap = NULL;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- ulint* offsets = offsets_;
+ offset_t offsets_[REC_OFFS_NORMAL_SIZE];
+ offset_t* offsets = offsets_;
rec_offs_init(offsets_);
- mtr_start(mtr);
- const buf_block_t* block = buf_page_try_get(space, page_no, mtr);
+ mtr.start();
+ const buf_block_t* block = buf_page_try_get(page_id_t(space, page_no),
+ &mtr);
for (ulint i = 0; i < lock_rec_get_n_bits(lock); ++i) {
@@ -6119,13 +4807,14 @@ static void lock_rec_print(FILE* file, const lock_t* lock, time_t now,
fprintf(file, "Record lock, heap no %lu", (ulong) i);
if (block) {
+ ut_ad(page_is_leaf(block->frame));
const rec_t* rec;
rec = page_find_rec_with_heap_no(
buf_block_get_frame(block), i);
offsets = rec_get_offsets(
- rec, lock->index, offsets,
+ rec, lock->index, offsets, true,
ULINT_UNDEFINED, &heap);
putc(' ', file);
@@ -6135,7 +4824,8 @@ static void lock_rec_print(FILE* file, const lock_t* lock, time_t now,
putc('\n', file);
}
- mtr_commit(mtr);
+ mtr.commit();
+
if (UNIV_LIKELY_NULL(heap)) {
mem_heap_free(heap);
}
@@ -6151,7 +4841,7 @@ http://bugs.mysql.com/36942 */
#ifdef PRINT_NUM_OF_LOCK_STRUCTS
/*********************************************************************//**
Calculates the number of record lock structs in the record lock hash table.
-@return number of record locks */
+@return number of record locks */
static
ulint
lock_get_n_rec_locks(void)
@@ -6183,12 +4873,11 @@ lock_get_n_rec_locks(void)
Prints info of locks for all transactions.
@return FALSE if not able to obtain lock mutex
and exits without printing info */
-UNIV_INTERN
ibool
lock_print_info_summary(
/*====================*/
FILE* file, /*!< in: file where to print */
- ibool nowait) /*!< in: whether to wait for the lock mutex */
+ ibool nowait) /*!< in: whether to wait for the lock mutex */
{
/* if nowait is FALSE, wait on the lock mutex,
otherwise return immediately if fail to obtain the
@@ -6196,8 +4885,8 @@ lock_print_info_summary(
if (!nowait) {
lock_mutex_enter();
} else if (lock_mutex_enter_nowait()) {
- fputs("FAIL TO OBTAIN LOCK MUTEX, "
- "SKIP LOCK INFO PRINTING\n", file);
+ fputs("FAIL TO OBTAIN LOCK MUTEX,"
+ " SKIP LOCK INFO PRINTING\n", file);
return(FALSE);
}
@@ -6268,260 +4957,170 @@ lock_print_info_summary(
return(TRUE);
}
-/*********************************************************************//**
-Prints info of locks for each transaction. This function assumes that the
-caller holds the lock mutex and more importantly it will release the lock
-mutex on behalf of the caller. (This should be fixed in the future). */
-UNIV_INTERN
+/** Prints transaction lock wait and MVCC state.
+@param[in,out] file file where to print
+@param[in] trx transaction
+@param[in] now current time */
void
-lock_print_info_all_transactions(
-/*=============================*/
- FILE* file) /*!< in: file where to print */
+lock_trx_print_wait_and_mvcc_state(FILE* file, const trx_t* trx, time_t now)
{
- const lock_t* lock;
- ibool load_page_first = TRUE;
- ulint nth_trx = 0;
- ulint nth_lock = 0;
- ulint i;
- mtr_t mtr;
- const trx_t* trx;
- trx_list_t* trx_list = &trx_sys->rw_trx_list;
-
- fprintf(file, "LIST OF TRANSACTIONS FOR EACH SESSION:\n");
-
- ut_ad(lock_mutex_own());
-
- const time_t now = time(NULL);
+ fprintf(file, "---");
- mutex_enter(&trx_sys->mutex);
-
- /* First print info on non-active transactions */
-
- /* NOTE: information of auto-commit non-locking read-only
- transactions will be omitted here. The information will be
- available from INFORMATION_SCHEMA.INNODB_TRX. */
-
- for (trx = UT_LIST_GET_FIRST(trx_sys->mysql_trx_list);
- trx != NULL;
- trx = UT_LIST_GET_NEXT(mysql_trx_list, trx)) {
+ trx_print_latched(file, trx, 600);
- ut_ad(trx->in_mysql_trx_list);
+ const ReadView* read_view = trx_get_read_view(trx);
- /* See state transitions and locking rules in trx0trx.h */
-
- if (trx_state_eq(trx, TRX_STATE_NOT_STARTED)) {
- fputs("---", file);
- trx_print_latched(file, trx, 600);
- }
+ if (read_view != NULL) {
+ read_view->print_limits(file);
}
-loop:
- /* Since we temporarily release lock_sys->mutex and
- trx_sys->mutex when reading a database page in below,
- variable trx may be obsolete now and we must loop
- through the trx list to get probably the same trx,
- or some other trx. */
-
- for (trx = UT_LIST_GET_FIRST(*trx_list), i = 0;
- trx && (i < nth_trx);
- trx = UT_LIST_GET_NEXT(trx_list, trx), i++) {
+ if (trx->lock.que_state == TRX_QUE_LOCK_WAIT) {
- assert_trx_in_list(trx);
- ut_ad(trx->read_only == (trx_list == &trx_sys->ro_trx_list));
- }
-
- ut_ad(trx == NULL
- || trx->read_only == (trx_list == &trx_sys->ro_trx_list));
+ fprintf(file,
+ "------- TRX HAS BEEN WAITING %lu SEC"
+ " FOR THIS LOCK TO BE GRANTED:\n",
+ (ulong) difftime(now, trx->lock.wait_started));
- if (trx == NULL) {
- /* Check the read-only transaction list next. */
- if (trx_list == &trx_sys->rw_trx_list) {
- trx_list = &trx_sys->ro_trx_list;
- nth_trx = 0;
- nth_lock = 0;
- goto loop;
+ if (lock_get_type_low(trx->lock.wait_lock) == LOCK_REC) {
+ mtr_t mtr;
+ lock_rec_print(file, trx->lock.wait_lock, mtr);
+ } else {
+ lock_table_print(file, trx->lock.wait_lock);
}
- lock_mutex_exit();
- mutex_exit(&trx_sys->mutex);
-
- ut_ad(lock_validate());
-
- return;
+ fprintf(file, "------------------\n");
}
+}
- assert_trx_in_list(trx);
-
- if (nth_lock == 0) {
- fputs("---", file);
+/*********************************************************************//**
+Prints info of locks for a transaction. */
+static
+void
+lock_trx_print_locks(
+/*=================*/
+ FILE* file, /*!< in/out: File to write */
+ const trx_t* trx) /*!< in: current transaction */
+{
+ mtr_t mtr;
+ uint32_t i= 0;
+ /* Iterate over the transaction's locks. */
+ for (lock_t *lock = UT_LIST_GET_FIRST(trx->lock.trx_locks);
+ lock != NULL;
+ lock = UT_LIST_GET_NEXT(trx_locks, lock)) {
+ if (lock_get_type_low(lock) == LOCK_REC) {
- trx_print_latched(file, trx, 600);
+ lock_rec_print(file, lock, mtr);
+ } else {
+ ut_ad(lock_get_type_low(lock) & LOCK_TABLE);
- if (trx->read_view) {
- fprintf(file,
- "Trx read view will not see trx with"
- " id >= " TRX_ID_FMT
- ", sees < " TRX_ID_FMT "\n",
- trx->read_view->low_limit_id,
- trx->read_view->up_limit_id);
+ lock_table_print(file, lock);
}
- /* Total trx lock waits and times */
- fprintf(file, "Trx #rec lock waits %lu #table lock waits %lu\n",
- trx->n_rec_lock_waits, trx->n_table_lock_waits);
- fprintf(file, "Trx total rec lock wait time %lu SEC\n",
- trx->total_rec_lock_wait_time);
- fprintf(file, "Trx total table lock wait time %lu SEC\n",
- trx->total_table_lock_wait_time);
-
- if (trx->lock.que_state == TRX_QUE_LOCK_WAIT) {
+ if (++i == 10) {
fprintf(file,
- "------- TRX HAS BEEN WAITING %lu SEC"
- " FOR THIS LOCK TO BE GRANTED:\n",
- (ulong) difftime(now, trx->lock.wait_started));
+ "10 LOCKS PRINTED FOR THIS TRX:"
+ " SUPPRESSING FURTHER PRINTS\n");
- if (lock_get_type_low(trx->lock.wait_lock) == LOCK_REC) {
- lock_rec_print(file, trx->lock.wait_lock, now,
- &mtr);
- } else {
- lock_table_print(file, trx->lock.wait_lock,
- now);
- }
-
- fputs("------------------\n", file);
+ break;
}
}
+}
- if (!srv_print_innodb_lock_monitor) {
- nth_trx++;
- goto loop;
- }
-
- i = 0;
-
- /* Look at the note about the trx loop above why we loop here:
- lock may be an obsolete pointer now. */
-
- lock = UT_LIST_GET_FIRST(trx->lock.trx_locks);
-
- while (lock && (i < nth_lock)) {
- lock = UT_LIST_GET_NEXT(trx_locks, lock);
- i++;
- }
-
- if (lock == NULL) {
- nth_trx++;
- nth_lock = 0;
-
- goto loop;
- }
-
- if (lock_get_type_low(lock) == LOCK_REC) {
- if (load_page_first) {
- ulint space_id = lock->un_member.rec_lock.space;
- /* Check if the space is exists or not. only
- when the space is valid, try to get the page. */
- fil_space_t* space = fil_space_acquire(space_id);
- ulint page_no = lock->un_member.rec_lock.page_no;
-
- if (!space) {
-
- /* It is a single table tablespace and
- the .ibd file is missing (TRUNCATE
- TABLE probably stole the locks): just
- print the lock without attempting to
- load the page in the buffer pool. */
-
- fprintf(file, "RECORD LOCKS on"
- " non-existing space: " ULINTPF "\n",
- space_id);
- goto print_rec;
- }
-
- const ulint zip_size = fsp_flags_get_zip_size(space->flags);
-
- lock_mutex_exit();
- mutex_exit(&trx_sys->mutex);
-
- DEBUG_SYNC_C("innodb_monitor_before_lock_page_read");
-
- if (space) {
- mtr_start(&mtr);
-
- buf_page_get_gen(space_id, zip_size, page_no,
- RW_NO_LATCH, NULL,
- BUF_GET_POSSIBLY_FREED,
- __FILE__, __LINE__, &mtr);
-
- mtr_commit(&mtr);
-
- }
-
-
- fil_space_release(space);
+/** Functor to display all transactions (except recovered ones) */
+struct lock_print_info
+{
+ lock_print_info(FILE* file, time_t now) : file(file), now(now) {}
- load_page_first = FALSE;
+ void operator()(const trx_t* trx) const
+ {
+ ut_ad(mutex_own(&trx_sys->mutex));
+ ut_ad(trx->in_mysql_trx_list);
+ lock_trx_print_wait_and_mvcc_state(file, trx, now);
- lock_mutex_enter();
+ if (trx->will_lock && srv_print_innodb_lock_monitor)
+ lock_trx_print_locks(file, trx);
+ }
- mutex_enter(&trx_sys->mutex);
+ FILE* const file;
+ const time_t now;
+};
- goto loop;
- }
+/** Functor to display recovered read-write transactions */
+struct lock_print_info_rw_recovered
+{
+ lock_print_info_rw_recovered(FILE* file, time_t now) : file(file),now(now) {}
-print_rec:
- lock_rec_print(file, lock, now, &mtr);
- } else {
- ut_ad(lock_get_type_low(lock) & LOCK_TABLE);
+ void operator()(const trx_t* trx) const
+ {
+ ut_ad(mutex_own(&trx_sys->mutex));
+ ut_ad(trx->in_rw_trx_list);
+ if (trx->mysql_thd)
+ return;
+ ut_ad(!trx->in_mysql_trx_list);
- lock_table_print(file, lock, now);
- }
+ lock_trx_print_wait_and_mvcc_state(file, trx, now);
- load_page_first = TRUE;
+ if (trx->will_lock && srv_print_innodb_lock_monitor)
+ lock_trx_print_locks(file, trx);
+ }
- nth_lock++;
+ FILE* const file;
+ const time_t now;
+};
- if (nth_lock >= 10) {
- fputs("10 LOCKS PRINTED FOR THIS TRX:"
- " SUPPRESSING FURTHER PRINTS\n",
- file);
+/*********************************************************************//**
+Prints info of locks for each transaction. This function assumes that the
+caller holds the lock mutex and more importantly it will release the lock
+mutex on behalf of the caller. (This should be fixed in the future). */
+void
+lock_print_info_all_transactions(
+/*=============================*/
+ FILE* file) /*!< in/out: file where to print */
+{
+ ut_ad(lock_mutex_own());
- nth_trx++;
- nth_lock = 0;
- }
+ fprintf(file, "LIST OF TRANSACTIONS FOR EACH SESSION:\n");
+ const time_t now = time(NULL);
- goto loop;
+ mutex_enter(&trx_sys->mutex);
+ ut_list_map(trx_sys->mysql_trx_list, lock_print_info(file, now));
+ ut_list_map(trx_sys->rw_trx_list,
+ lock_print_info_rw_recovered(file, now));
+ mutex_exit(&trx_sys->mutex);
+ lock_mutex_exit();
+ ut_ad(lock_validate());
}
#ifdef UNIV_DEBUG
/*********************************************************************//**
Find the the lock in the trx_t::trx_lock_t::table_locks vector.
-@return TRUE if found */
+@return true if found */
static
-ibool
+bool
lock_trx_table_locks_find(
/*======================*/
trx_t* trx, /*!< in: trx to validate */
const lock_t* find_lock) /*!< in: lock to find */
{
- lint i;
- ibool found = FALSE;
+ bool found = false;
- trx_mutex_enter(trx);
+ ut_ad(trx_mutex_own(trx));
- for (i = ib_vector_size(trx->lock.table_locks) - 1; i >= 0; --i) {
- const lock_t* lock;
+ for (lock_list::const_iterator it = trx->lock.table_locks.begin(),
+ end = trx->lock.table_locks.end(); it != end; ++it) {
- lock = *static_cast<const lock_t**>(
- ib_vector_get(trx->lock.table_locks, i));
+ const lock_t* lock = *it;
if (lock == NULL) {
+
continue;
+
} else if (lock == find_lock) {
+
/* Can't be duplicates. */
ut_a(!found);
- found = TRUE;
+ found = true;
}
ut_a(trx == lock->trx);
@@ -6529,14 +5128,12 @@ lock_trx_table_locks_find(
ut_a(lock->un_member.tab_lock.table != NULL);
}
- trx_mutex_exit(trx);
-
return(found);
}
/*********************************************************************//**
Validates the lock queue on a table.
-@return TRUE if ok */
+@return TRUE if ok */
static
ibool
lock_table_queue_validate(
@@ -6546,7 +5143,7 @@ lock_table_queue_validate(
const lock_t* lock;
ut_ad(lock_mutex_own());
- ut_ad(mutex_own(&trx_sys->mutex));
+ ut_ad(trx_sys_mutex_own());
for (lock = UT_LIST_GET_FIRST(table->locks);
lock != NULL;
@@ -6554,21 +5151,20 @@ lock_table_queue_validate(
/* lock->trx->state cannot change from or to NOT_STARTED
while we are holding the trx_sys->mutex. It may change
- from ACTIVE to PREPARED, but it may not change to
- COMMITTED, because we are holding the lock_sys->mutex. */
+ from ACTIVE or PREPARED to PREPARED or COMMITTED. */
+ trx_mutex_enter(lock->trx);
ut_ad(trx_assert_started(lock->trx));
-
- if (!lock_get_wait(lock)) {
-
+ if (trx_state_eq(lock->trx, TRX_STATE_COMMITTED_IN_MEMORY)) {
+ } else if (!lock_get_wait(lock)) {
ut_a(!lock_table_other_has_incompatible(
lock->trx, 0, table,
lock_get_mode(lock)));
} else {
-
ut_a(lock_table_has_to_wait_in_queue(lock));
}
ut_a(lock_trx_table_locks_find(lock->trx, lock));
+ trx_mutex_exit(lock->trx);
}
return(TRUE);
@@ -6576,7 +5172,7 @@ lock_table_queue_validate(
/*********************************************************************//**
Validates the lock queue on a single record.
-@return TRUE if ok */
+@return TRUE if ok */
static
ibool
lock_rec_queue_validate(
@@ -6588,9 +5184,8 @@ lock_rec_queue_validate(
const buf_block_t* block, /*!< in: buffer block containing rec */
const rec_t* rec, /*!< in: record to look at */
const dict_index_t* index, /*!< in: index, or NULL if not known */
- const ulint* offsets)/*!< in: rec_get_offsets(rec, index) */
+ const offset_t* offsets)/*!< in: rec_get_offsets(rec, index) */
{
- const trx_t* impl_trx;
const lock_t* lock;
ulint heap_no;
@@ -6611,52 +5206,89 @@ lock_rec_queue_validate(
if (!page_rec_is_user_rec(rec)) {
- for (lock = lock_rec_get_first(block, heap_no);
+ for (lock = lock_rec_get_first(lock_sys->rec_hash,
+ block, heap_no);
lock != NULL;
lock = lock_rec_get_next_const(heap_no, lock)) {
- ut_a(trx_in_trx_list(lock->trx));
+ ut_ad(!index || lock->index == index);
- if (lock_get_wait(lock)) {
- ut_a(lock_rec_has_to_wait_in_queue(lock));
- }
-
- if (index) {
- ut_a(lock->index == index);
- }
+ trx_mutex_enter(lock->trx);
+ ut_ad(!trx_is_ac_nl_ro(lock->trx));
+ ut_ad(trx_state_eq(lock->trx,
+ TRX_STATE_COMMITTED_IN_MEMORY)
+ || !lock_get_wait(lock)
+ || lock_rec_has_to_wait_in_queue(lock));
+ trx_mutex_exit(lock->trx);
}
goto func_exit;
}
- if (!index);
- else if (dict_index_is_clust(index)) {
- trx_id_t trx_id;
-
- /* Unlike the non-debug code, this invariant can only succeed
- if the check and assertion are covered by the lock mutex. */
-
- trx_id = lock_clust_rec_some_has_impl(rec, index, offsets);
- impl_trx = trx_rw_is_active_low(trx_id, NULL);
+ ut_ad(page_rec_is_leaf(rec));
+ ut_ad(lock_mutex_own());
- ut_ad(lock_mutex_own());
- /* impl_trx cannot be committed until lock_mutex_exit()
- because lock_trx_release_locks() acquires lock_sys->mutex */
+ if (!index || !index->is_primary()) {
+ /* Nothing we can do */
+ } else if (trx_t* impl_trx = trx_rw_is_active_low(
+ lock_clust_rec_some_has_impl(rec, index, offsets),
+ NULL)) {
+ /* impl_trx could have been committed before we
+ acquire its mutex, but not thereafter. */
+
+ mutex_enter(&impl_trx->mutex);
+ ut_ad(impl_trx->state != TRX_STATE_NOT_STARTED);
+ if (impl_trx->state == TRX_STATE_COMMITTED_IN_MEMORY) {
+ } else if (const lock_t* other_lock
+ = lock_rec_other_has_expl_req(
+ LOCK_S, block, true, heap_no,
+ impl_trx)) {
+ /* The impl_trx is holding an implicit lock on the
+ given record 'rec'. So there cannot be another
+ explicit granted lock. Also, there can be another
+ explicit waiting lock only if the impl_trx has an
+ explicit granted lock. */
- if (impl_trx != NULL
- && lock_rec_other_has_expl_req(LOCK_S, 0, LOCK_WAIT,
- block, heap_no, impl_trx)) {
+#ifdef WITH_WSREP
+ if (other_lock->trx->is_wsrep()) {
+ if (!lock_get_wait(other_lock) ) {
+ ib::info() << "WSREP impl BF lock conflict for my impl lock:\n BF:" <<
+ ((wsrep_thd_is_BF(impl_trx->mysql_thd, FALSE)) ? "BF" : "normal") << " exec: " <<
+ wsrep_thd_exec_mode(impl_trx->mysql_thd) << " conflict: " <<
+ wsrep_thd_conflict_state(impl_trx->mysql_thd, false) << " seqno: " <<
+ wsrep_thd_trx_seqno(impl_trx->mysql_thd) << " SQL: " <<
+ wsrep_thd_query(impl_trx->mysql_thd);
+
+ trx_t* otrx = other_lock->trx;
+
+ ib::info() << "WSREP other lock:\n BF:" <<
+ ((wsrep_thd_is_BF(otrx->mysql_thd, FALSE)) ? "BF" : "normal") << " exec: " <<
+ wsrep_thd_exec_mode(otrx->mysql_thd) << " conflict: " <<
+ wsrep_thd_conflict_state(otrx->mysql_thd, false) << " seqno: " <<
+ wsrep_thd_trx_seqno(otrx->mysql_thd) << " SQL: " <<
+ wsrep_thd_query(otrx->mysql_thd);
+ }
- ut_a(lock_rec_has_expl(LOCK_X | LOCK_REC_NOT_GAP,
- block, heap_no, impl_trx));
+ if (!lock_rec_has_expl(LOCK_X | LOCK_REC_NOT_GAP,
+ block, heap_no,
+ impl_trx)) {
+ ib::info() << "WSREP impl BF lock conflict";
+ }
+ } else
+#endif /* WITH_WSREP */
+ ut_ad(lock_get_wait(other_lock));
+ ut_ad(lock_rec_has_expl(LOCK_X | LOCK_REC_NOT_GAP,
+ block, heap_no, impl_trx));
}
+
+ mutex_exit(&impl_trx->mutex);
}
- for (lock = lock_rec_get_first(block, heap_no);
+ for (lock = lock_rec_get_first(lock_sys->rec_hash, block, heap_no);
lock != NULL;
lock = lock_rec_get_next_const(heap_no, lock)) {
- ut_a(trx_in_trx_list(lock->trx));
+ ut_ad(!trx_is_ac_nl_ro(lock->trx));
if (index) {
ut_a(lock->index == index);
@@ -6664,7 +5296,7 @@ lock_rec_queue_validate(
if (!lock_rec_get_gap(lock) && !lock_get_wait(lock)) {
- enum lock_mode mode;
+ lock_mode mode;
if (lock_get_mode(lock) == LOCK_S) {
mode = LOCK_X;
@@ -6674,7 +5306,7 @@ lock_rec_queue_validate(
const lock_t* other_lock
= lock_rec_other_has_expl_req(
- mode, 0, 0, block, heap_no,
+ mode, block, false, heap_no,
lock->trx);
#ifdef WITH_WSREP
ut_a(!other_lock
@@ -6684,12 +5316,8 @@ lock_rec_queue_validate(
#else
ut_a(!other_lock);
#endif /* WITH_WSREP */
+ } else if (lock_get_wait(lock) && !lock_rec_get_gap(lock)) {
-
- } else if (lock_get_wait(lock) && !lock_rec_get_gap(lock)
- && innodb_lock_schedule_algorithm == INNODB_LOCK_SCHEDULE_ALGORITHM_FCFS) {
- // If using VATS, it's possible that a wait lock is inserted to a place in the list
- // such that it does not need to wait.
ut_a(lock_rec_has_to_wait_in_queue(lock));
}
}
@@ -6708,7 +5336,7 @@ func_exit:
/*********************************************************************//**
Validates the record lock queues on a page.
-@return TRUE if ok */
+@return TRUE if ok */
static
ibool
lock_rec_validate_page(
@@ -6721,8 +5349,8 @@ lock_rec_validate_page(
ulint nth_bit = 0;
ulint i;
mem_heap_t* heap = NULL;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- ulint* offsets = offsets_;
+ offset_t offsets_[REC_OFFS_NORMAL_SIZE];
+ offset_t* offsets = offsets_;
rec_offs_init(offsets_);
ut_ad(!lock_mutex_own());
@@ -6730,16 +5358,15 @@ lock_rec_validate_page(
lock_mutex_enter();
mutex_enter(&trx_sys->mutex);
loop:
- lock = lock_rec_get_first_on_page_addr(buf_block_get_space(block),
- buf_block_get_page_no(block));
+ lock = lock_rec_get_first_on_page_addr(
+ lock_sys->rec_hash,
+ block->page.id.space(), block->page.id.page_no());
if (!lock) {
goto function_exit;
}
-#if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG
- ut_a(!block->page.file_page_was_freed);
-#endif
+ ut_ad(!block->page.file_page_was_freed);
for (i = 0; i < nth_lock; i++) {
@@ -6750,28 +5377,24 @@ loop:
}
}
- ut_a(trx_in_trx_list(lock->trx));
+ ut_ad(!trx_is_ac_nl_ro(lock->trx));
-# ifdef UNIV_SYNC_DEBUG
/* Only validate the record queues when this thread is not
- holding a space->latch. Deadlocks are possible due to
- latching order violation when UNIV_DEBUG is defined while
- UNIV_SYNC_DEBUG is not. */
- if (!sync_thread_levels_contains(SYNC_FSP))
-# endif /* UNIV_SYNC_DEBUG */
+ holding a space->latch. */
+ if (!sync_check_find(SYNC_FSP))
for (i = nth_bit; i < lock_rec_get_n_bits(lock); i++) {
- if (i == 1 || lock_rec_get_nth_bit(lock, i)) {
+ if (i == PAGE_HEAP_NO_SUPREMUM
+ || lock_rec_get_nth_bit(lock, i)) {
rec = page_find_rec_with_heap_no(block->frame, i);
ut_a(rec);
+ ut_ad(!lock_rec_get_nth_bit(lock, i)
+ || page_rec_is_leaf(rec));
offsets = rec_get_offsets(rec, lock->index, offsets,
- ULINT_UNDEFINED, &heap);
-#if 0
- fprintf(stderr,
- "Validating %u %u\n",
- block->page.space, block->page.offset);
-#endif
+ true, ULINT_UNDEFINED,
+ &heap);
+
/* If this thread is holding the file space
latch (fil_space_t::latch), the following
check WILL break the latching order and may
@@ -6795,7 +5418,7 @@ function_exit:
lock_mutex_exit();
mutex_exit(&trx_sys->mutex);
- if (UNIV_LIKELY_NULL(heap)) {
+ if (heap != NULL) {
mem_heap_free(heap);
}
return(TRUE);
@@ -6803,20 +5426,19 @@ function_exit:
/*********************************************************************//**
Validates the table locks.
-@return TRUE if ok */
+@return TRUE if ok */
static
ibool
lock_validate_table_locks(
/*======================*/
- const trx_list_t* trx_list) /*!< in: trx list */
+ const trx_ut_list_t* trx_list) /*!< in: trx list */
{
const trx_t* trx;
ut_ad(lock_mutex_own());
- ut_ad(mutex_own(&trx_sys->mutex));
+ ut_ad(trx_sys_mutex_own());
- ut_ad(trx_list == &trx_sys->rw_trx_list
- || trx_list == &trx_sys->ro_trx_list);
+ ut_ad(trx_list == &trx_sys->rw_trx_list);
for (trx = UT_LIST_GET_FIRST(*trx_list);
trx != NULL;
@@ -6824,8 +5446,7 @@ lock_validate_table_locks(
const lock_t* lock;
- assert_trx_in_list(trx);
- ut_ad(trx->read_only == (trx_list == &trx_sys->ro_trx_list));
+ check_trx_state(trx);
for (lock = UT_LIST_GET_FIRST(trx->lock.trx_locks);
lock != NULL;
@@ -6845,7 +5466,7 @@ lock_validate_table_locks(
/*********************************************************************//**
Validate record locks up to a limit.
@return lock at limit or NULL if no more locks in the hash bucket */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
+static MY_ATTRIBUTE((warn_unused_result))
const lock_t*
lock_rec_validate(
/*==============*/
@@ -6855,7 +5476,7 @@ lock_rec_validate(
(space, page_no) */
{
ut_ad(lock_mutex_own());
- ut_ad(mutex_own(&trx_sys->mutex));
+ ut_ad(trx_sys_mutex_own());
for (const lock_t* lock = static_cast<const lock_t*>(
HASH_GET_FIRST(lock_sys->rec_hash, start));
@@ -6864,8 +5485,8 @@ lock_rec_validate(
ib_uint64_t current;
- ut_a(trx_in_trx_list(lock->trx));
- ut_a(lock_get_type(lock) == LOCK_REC);
+ ut_ad(!trx_is_ac_nl_ro(lock->trx));
+ ut_ad(lock_get_type(lock) == LOCK_REC);
current = ut_ull_create(
lock->un_member.rec_lock.space,
@@ -6900,17 +5521,29 @@ lock_rec_block_validate(
/* Make sure that the tablespace is not deleted while we are
trying to access the page. */
if (fil_space_t* space = fil_space_acquire_silent(space_id)) {
-
+ dberr_t err = DB_SUCCESS;
mtr_start(&mtr);
+
block = buf_page_get_gen(
- space_id, fsp_flags_get_zip_size(space->flags),
- page_no, RW_X_LATCH, NULL,
+ page_id_t(space_id, page_no),
+ page_size_t(space->flags),
+ RW_X_LATCH, NULL,
BUF_GET_POSSIBLY_FREED,
- __FILE__, __LINE__, &mtr);
+ __FILE__, __LINE__, &mtr, &err);
+
+ if (err != DB_SUCCESS) {
+ ib::error() << "Lock rec block validate failed for tablespace "
+ << space->name
+ << " space_id " << space_id
+ << " page_no " << page_no << " err " << err;
+ }
+
if (block) {
buf_block_dbg_add_level(block, SYNC_NO_ORDER_CHECK);
+
ut_ad(lock_rec_validate_page(block));
}
+
mtr_commit(&mtr);
fil_space_release(space);
@@ -6919,36 +5552,39 @@ lock_rec_block_validate(
/*********************************************************************//**
Validates the lock system.
-@return TRUE if ok */
+@return TRUE if ok */
static
bool
lock_validate()
/*===========*/
{
- typedef std::pair<ulint, ulint> page_addr_t;
- typedef std::set<page_addr_t> page_addr_set;
- page_addr_set pages;
+ typedef std::pair<ulint, ulint> page_addr_t;
+ typedef std::set<
+ page_addr_t,
+ std::less<page_addr_t>,
+ ut_allocator<page_addr_t> > page_addr_set;
+
+ page_addr_set pages;
lock_mutex_enter();
mutex_enter(&trx_sys->mutex);
ut_a(lock_validate_table_locks(&trx_sys->rw_trx_list));
- ut_a(lock_validate_table_locks(&trx_sys->ro_trx_list));
/* Iterate over all the record locks and validate the locks. We
don't want to hog the lock_sys_t::mutex and the trx_sys_t::mutex.
Release both mutexes during the validation check. */
for (ulint i = 0; i < hash_get_n_cells(lock_sys->rec_hash); i++) {
- const lock_t* lock;
ib_uint64_t limit = 0;
- while ((lock = lock_rec_validate(i, &limit)) != 0) {
-
- ulint space = lock->un_member.rec_lock.space;
- ulint page_no = lock->un_member.rec_lock.page_no;
-
- pages.insert(std::make_pair(space, page_no));
+ while (const lock_t* lock = lock_rec_validate(i, &limit)) {
+ if (lock_rec_find_set_bit(lock) == ULINT_UNDEFINED) {
+ /* The lock bitmap is empty; ignore it. */
+ continue;
+ }
+ const lock_rec_t& l = lock->un_member.rec_lock;
+ pages.insert(std::make_pair(l.space, l.page_no));
}
}
@@ -6972,8 +5608,7 @@ a record. If they do, first tests if the query thread should anyway
be suspended for some reason; if not, then puts the transaction and
the query thread to the lock wait state and inserts a waiting request
for a gap x-lock to the lock queue.
-@return DB_SUCCESS, DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
-UNIV_INTERN
+@return DB_SUCCESS, DB_LOCK_WAIT, or DB_DEADLOCK */
dberr_t
lock_rec_insert_check_and_lock(
/*===========================*/
@@ -6989,29 +5624,26 @@ lock_rec_insert_check_and_lock(
LOCK_GAP type locks from the successor
record */
{
- const rec_t* next_rec;
- trx_t* trx;
- lock_t* lock;
- dberr_t err;
- ulint next_rec_heap_no;
- ibool inherit_in = *inherit;
-#ifdef WITH_WSREP
- lock_t* c_lock=NULL;
-#endif
-
ut_ad(block->frame == page_align(rec));
ut_ad(!dict_index_is_online_ddl(index)
- || dict_index_is_clust(index)
+ || index->is_primary()
|| (flags & BTR_CREATE_FLAG));
+ ut_ad(mtr->is_named_space(index->space));
if (flags & BTR_NO_LOCKING_FLAG) {
return(DB_SUCCESS);
}
- trx = thr_get_trx(thr);
- next_rec = page_rec_get_next_const(rec);
- next_rec_heap_no = page_rec_get_heap_no(next_rec);
+ ut_ad(!index->table->is_temporary());
+ ut_ad(page_is_leaf(block->frame));
+
+ dberr_t err;
+ lock_t* lock;
+ ibool inherit_in = *inherit;
+ trx_t* trx = thr_get_trx(thr);
+ const rec_t* next_rec = page_rec_get_next_const(rec);
+ ulint heap_no = page_rec_get_heap_no(next_rec);
lock_mutex_enter();
/* Because this code is invoked for a running transaction by
@@ -7023,9 +5655,9 @@ lock_rec_insert_check_and_lock(
BTR_NO_LOCKING_FLAG and skip the locking altogether. */
ut_ad(lock_table_has(trx, index->table, LOCK_IX));
- lock = lock_rec_get_first(block, next_rec_heap_no);
+ lock = lock_rec_get_first(lock_sys->rec_hash, block, heap_no);
- if (UNIV_LIKELY(lock == NULL)) {
+ if (lock == NULL) {
/* We optimize CPU time usage in the simplest case */
lock_mutex_exit();
@@ -7042,6 +5674,12 @@ lock_rec_insert_check_and_lock(
return(DB_SUCCESS);
}
+ /* Spatial index does not use GAP lock protection. It uses
+ "predicate lock" to protect the "range" */
+ if (dict_index_is_spatial(index)) {
+ return(DB_SUCCESS);
+ }
+
*inherit = TRUE;
/* If another transaction has an explicit lock request which locks
@@ -7054,30 +5692,21 @@ lock_rec_insert_check_and_lock(
had to wait for their insert. Both had waiting gap type lock requests
on the successor, which produced an unnecessary deadlock. */
+ const ulint type_mode = LOCK_X | LOCK_GAP | LOCK_INSERT_INTENTION;
+
+ if (
#ifdef WITH_WSREP
- if ((c_lock = lock_rec_other_has_conflicting(
- static_cast<enum lock_mode>(
- LOCK_X | LOCK_GAP | LOCK_INSERT_INTENTION),
- block, next_rec_heap_no, trx))) {
-#else
- if (lock_rec_other_has_conflicting(
- static_cast<enum lock_mode>(
- LOCK_X | LOCK_GAP | LOCK_INSERT_INTENTION),
- block, next_rec_heap_no, trx)) {
+ lock_t* c_lock =
#endif /* WITH_WSREP */
-
+ lock_rec_other_has_conflicting(type_mode, block, heap_no, trx)) {
/* Note that we may get DB_SUCCESS also here! */
trx_mutex_enter(trx);
-#ifdef WITH_WSREP
- err = lock_rec_enqueue_waiting(c_lock,
- LOCK_X | LOCK_GAP | LOCK_INSERT_INTENTION,
- block, next_rec_heap_no, index, thr);
-#else
err = lock_rec_enqueue_waiting(
- LOCK_X | LOCK_GAP | LOCK_INSERT_INTENTION,
- block, next_rec_heap_no, index, thr);
+#ifdef WITH_WSREP
+ c_lock,
#endif /* WITH_WSREP */
+ type_mode, block, heap_no, index, thr, NULL);
trx_mutex_exit(trx);
} else {
@@ -7094,10 +5723,10 @@ lock_rec_insert_check_and_lock(
if (!inherit_in || dict_index_is_clust(index)) {
break;
}
+
/* Update the page max trx id field */
- page_update_max_trx_id(block,
- buf_block_get_page_zip(block),
- trx->id, mtr);
+ page_update_max_trx_id(
+ block, buf_block_get_page_zip(block), trx->id, mtr);
default:
/* We only care about the two return values. */
break;
@@ -7106,17 +5735,17 @@ lock_rec_insert_check_and_lock(
#ifdef UNIV_DEBUG
{
mem_heap_t* heap = NULL;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- const ulint* offsets;
+ offset_t offsets_[REC_OFFS_NORMAL_SIZE];
+ const offset_t* offsets;
rec_offs_init(offsets_);
- offsets = rec_get_offsets(next_rec, index, offsets_,
+ offsets = rec_get_offsets(next_rec, index, offsets_, true,
ULINT_UNDEFINED, &heap);
ut_ad(lock_rec_queue_validate(
FALSE, block, next_rec, index, offsets));
- if (UNIV_LIKELY_NULL(heap)) {
+ if (heap != NULL) {
mem_heap_free(heap);
}
}
@@ -7126,6 +5755,43 @@ lock_rec_insert_check_and_lock(
}
/*********************************************************************//**
+Creates an explicit record lock for a running transaction that currently only
+has an implicit lock on the record. The transaction instance must have a
+reference count > 0 so that it can't be committed and freed before this
+function has completed. */
+static
+void
+lock_rec_convert_impl_to_expl_for_trx(
+/*==================================*/
+ const buf_block_t* block, /*!< in: buffer block of rec */
+ const rec_t* rec, /*!< in: user record on page */
+ dict_index_t* index, /*!< in: index of record */
+ trx_t* trx, /*!< in/out: active transaction */
+ ulint heap_no)/*!< in: rec heap number to lock */
+{
+ ut_ad(page_rec_is_leaf(rec));
+
+ DEBUG_SYNC_C("before_lock_rec_convert_impl_to_expl_for_trx");
+ lock_mutex_enter();
+ trx_mutex_enter(trx);
+ ut_ad(trx->is_referenced());
+ ut_ad(!trx_state_eq(trx, TRX_STATE_NOT_STARTED));
+
+ if (!trx_state_eq(trx, TRX_STATE_COMMITTED_IN_MEMORY)
+ && !lock_rec_has_expl(LOCK_X | LOCK_REC_NOT_GAP,
+ block, heap_no, trx)) {
+ lock_rec_add_to_queue(LOCK_REC | LOCK_X | LOCK_REC_NOT_GAP,
+ block, heap_no, index, trx, true);
+ }
+
+ lock_mutex_exit();
+ trx_mutex_exit(trx);
+ trx->release_reference();
+
+ DEBUG_SYNC_C("after_lock_rec_convert_impl_to_expl_for_trx");
+}
+
+/*********************************************************************//**
If a transaction has an implicit x-lock on a record, but no explicit x-lock
set on the record, sets one for it. */
static
@@ -7135,9 +5801,9 @@ lock_rec_convert_impl_to_expl(
const buf_block_t* block, /*!< in: buffer block of rec */
const rec_t* rec, /*!< in: user record on page */
dict_index_t* index, /*!< in: index of record */
- const ulint* offsets)/*!< in: rec_get_offsets(rec, index) */
+ const offset_t* offsets)/*!< in: rec_get_offsets(rec, index) */
{
- trx_id_t trx_id;
+ trx_t* trx;
ut_ad(!lock_mutex_own());
ut_ad(page_rec_is_user_rec(rec));
@@ -7145,47 +5811,31 @@ lock_rec_convert_impl_to_expl(
ut_ad(!page_rec_is_comp(rec) == !rec_offs_comp(offsets));
if (dict_index_is_clust(index)) {
+ trx_id_t trx_id;
+
trx_id = lock_clust_rec_some_has_impl(rec, index, offsets);
- /* The clustered index record was last modified by
- this transaction. The transaction may have been
- committed a long time ago. */
+
+ trx = trx_rw_is_active(trx_id, NULL, true);
} else {
ut_ad(!dict_index_is_online_ddl(index));
- trx_id = lock_sec_rec_some_has_impl(rec, index, offsets);
- /* The transaction can be committed before the
- trx_is_active(trx_id, NULL) check below, because we are not
- holding lock_mutex. */
- ut_ad(!lock_rec_other_trx_holds_expl(LOCK_S | LOCK_REC_NOT_GAP,
- trx_id, rec, block));
+ trx = lock_sec_rec_some_has_impl(rec, index, offsets);
+
+ ut_ad(!trx || !lock_rec_other_trx_holds_expl(
+ LOCK_S | LOCK_REC_NOT_GAP, trx, rec, block));
}
- if (trx_id != 0) {
- trx_t* impl_trx;
+ if (trx != 0) {
ulint heap_no = page_rec_get_heap_no(rec);
- lock_mutex_enter();
+ ut_ad(trx->is_referenced());
/* If the transaction is still active and has no
- explicit x-lock set on the record, set one for it */
-
- impl_trx = trx_rw_is_active(trx_id, NULL);
-
- /* impl_trx cannot be committed until lock_mutex_exit()
- because lock_trx_release_locks() acquires lock_sys->mutex */
-
- if (impl_trx != NULL
- && !lock_rec_has_expl(LOCK_X | LOCK_REC_NOT_GAP, block,
- heap_no, impl_trx)) {
- ulint type_mode = (LOCK_REC | LOCK_X
- | LOCK_REC_NOT_GAP);
-
- lock_rec_add_to_queue(
- type_mode, block, heap_no, index,
- impl_trx, FALSE);
- }
+ explicit x-lock set on the record, set one for it.
+ trx cannot be committed until the ref count is zero. */
- lock_mutex_exit();
+ lock_rec_convert_impl_to_expl_for_trx(
+ block, rec, index, trx, heap_no);
}
}
@@ -7196,8 +5846,7 @@ first tests if the query thread should anyway be suspended for some
reason; if not, then puts the transaction and the query thread to the
lock wait state and inserts a waiting request for a record x-lock to the
lock queue.
-@return DB_SUCCESS, DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
-UNIV_INTERN
+@return DB_SUCCESS, DB_LOCK_WAIT, or DB_DEADLOCK */
dberr_t
lock_clust_rec_modify_check_and_lock(
/*=================================*/
@@ -7207,7 +5856,7 @@ lock_clust_rec_modify_check_and_lock(
const rec_t* rec, /*!< in: record which should be
modified */
dict_index_t* index, /*!< in: clustered index */
- const ulint* offsets,/*!< in: rec_get_offsets(rec, index) */
+ const offset_t* offsets,/*!< in: rec_get_offsets(rec, index) */
que_thr_t* thr) /*!< in: query thread */
{
dberr_t err;
@@ -7221,6 +5870,7 @@ lock_clust_rec_modify_check_and_lock(
return(DB_SUCCESS);
}
+ ut_ad(!dict_table_is_temporary(index->table));
heap_no = rec_offs_comp(offsets)
? rec_get_heap_no_new(rec)
@@ -7232,9 +5882,8 @@ lock_clust_rec_modify_check_and_lock(
lock_rec_convert_impl_to_expl(block, rec, index, offsets);
lock_mutex_enter();
- trx_t* trx = thr_get_trx(thr);
- ut_ad(lock_table_has(trx, index->table, LOCK_IX));
+ ut_ad(lock_table_has(thr_get_trx(thr), index->table, LOCK_IX));
err = lock_rec_lock(TRUE, LOCK_X | LOCK_REC_NOT_GAP,
block, heap_no, index, thr);
@@ -7245,7 +5894,7 @@ lock_clust_rec_modify_check_and_lock(
ut_ad(lock_rec_queue_validate(FALSE, block, rec, index, offsets));
- if (UNIV_UNLIKELY(err == DB_SUCCESS_LOCKED_REC)) {
+ if (err == DB_SUCCESS_LOCKED_REC) {
err = DB_SUCCESS;
}
@@ -7255,8 +5904,7 @@ lock_clust_rec_modify_check_and_lock(
/*********************************************************************//**
Checks if locks of other transactions prevent an immediate modify (delete
mark or delete unmark) of a secondary index record.
-@return DB_SUCCESS, DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
-UNIV_INTERN
+@return DB_SUCCESS, DB_LOCK_WAIT, or DB_DEADLOCK */
dberr_t
lock_sec_rec_modify_check_and_lock(
/*===============================*/
@@ -7279,11 +5927,13 @@ lock_sec_rec_modify_check_and_lock(
ut_ad(!dict_index_is_clust(index));
ut_ad(!dict_index_is_online_ddl(index) || (flags & BTR_CREATE_FLAG));
ut_ad(block->frame == page_align(rec));
+ ut_ad(mtr->is_named_space(index->space));
if (flags & BTR_NO_LOCKING_FLAG) {
return(DB_SUCCESS);
}
+ ut_ad(!dict_table_is_temporary(index->table));
heap_no = page_rec_get_heap_no(rec);
@@ -7292,10 +5942,9 @@ lock_sec_rec_modify_check_and_lock(
index record, and this would not have been possible if another active
transaction had modified this secondary index record. */
- trx_t* trx = thr_get_trx(thr);
lock_mutex_enter();
- ut_ad(lock_table_has(trx, index->table, LOCK_IX));
+ ut_ad(lock_table_has(thr_get_trx(thr), index->table, LOCK_IX));
err = lock_rec_lock(TRUE, LOCK_X | LOCK_REC_NOT_GAP,
block, heap_no, index, thr);
@@ -7307,17 +5956,17 @@ lock_sec_rec_modify_check_and_lock(
#ifdef UNIV_DEBUG
{
mem_heap_t* heap = NULL;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- const ulint* offsets;
+ offset_t offsets_[REC_OFFS_NORMAL_SIZE];
+ const offset_t* offsets;
rec_offs_init(offsets_);
- offsets = rec_get_offsets(rec, index, offsets_,
+ offsets = rec_get_offsets(rec, index, offsets_, true,
ULINT_UNDEFINED, &heap);
ut_ad(lock_rec_queue_validate(
FALSE, block, rec, index, offsets));
- if (UNIV_LIKELY_NULL(heap)) {
+ if (heap != NULL) {
mem_heap_free(heap);
}
}
@@ -7340,9 +5989,7 @@ lock_sec_rec_modify_check_and_lock(
/*********************************************************************//**
Like lock_clust_rec_read_check_and_lock(), but reads a
secondary index record.
-@return DB_SUCCESS, DB_SUCCESS_LOCKED_REC, DB_LOCK_WAIT, DB_DEADLOCK,
-or DB_QUE_THR_SUSPENDED */
-UNIV_INTERN
+@return DB_SUCCESS, DB_SUCCESS_LOCKED_REC, DB_LOCK_WAIT, or DB_DEADLOCK */
dberr_t
lock_sec_rec_read_check_and_lock(
/*=============================*/
@@ -7354,8 +6001,8 @@ lock_sec_rec_read_check_and_lock(
be read or passed over by a
read cursor */
dict_index_t* index, /*!< in: secondary index */
- const ulint* offsets,/*!< in: rec_get_offsets(rec, index) */
- enum lock_mode mode, /*!< in: mode of the lock which
+ const offset_t* offsets,/*!< in: rec_get_offsets(rec, index) */
+ lock_mode mode, /*!< in: mode of the lock which
the read cursor should set on
records: LOCK_S or LOCK_X; the
latter is possible in
@@ -7374,7 +6021,9 @@ lock_sec_rec_read_check_and_lock(
ut_ad(rec_offs_validate(rec, index, offsets));
ut_ad(mode == LOCK_X || mode == LOCK_S);
- if (flags & BTR_NO_LOCKING_FLAG) {
+ if ((flags & BTR_NO_LOCKING_FLAG)
+ || srv_read_only_mode
+ || dict_table_is_temporary(index->table)) {
return(DB_SUCCESS);
}
@@ -7392,13 +6041,12 @@ lock_sec_rec_read_check_and_lock(
lock_rec_convert_impl_to_expl(block, rec, index, offsets);
}
- trx_t* trx = thr_get_trx(thr);
lock_mutex_enter();
ut_ad(mode != LOCK_X
- || lock_table_has(trx, index->table, LOCK_IX));
+ || lock_table_has(thr_get_trx(thr), index->table, LOCK_IX));
ut_ad(mode != LOCK_S
- || lock_table_has(trx, index->table, LOCK_IS));
+ || lock_table_has(thr_get_trx(thr), index->table, LOCK_IS));
err = lock_rec_lock(FALSE, mode | gap_mode,
block, heap_no, index, thr);
@@ -7419,9 +6067,7 @@ if the query thread should anyway be suspended for some reason; if not, then
puts the transaction and the query thread to the lock wait state and inserts a
waiting request for a record lock to the lock queue. Sets the requested mode
lock on the record.
-@return DB_SUCCESS, DB_SUCCESS_LOCKED_REC, DB_LOCK_WAIT, DB_DEADLOCK,
-or DB_QUE_THR_SUSPENDED */
-UNIV_INTERN
+@return DB_SUCCESS, DB_SUCCESS_LOCKED_REC, DB_LOCK_WAIT, or DB_DEADLOCK */
dberr_t
lock_clust_rec_read_check_and_lock(
/*===============================*/
@@ -7433,8 +6079,8 @@ lock_clust_rec_read_check_and_lock(
be read or passed over by a
read cursor */
dict_index_t* index, /*!< in: clustered index */
- const ulint* offsets,/*!< in: rec_get_offsets(rec, index) */
- enum lock_mode mode, /*!< in: mode of the lock which
+ const offset_t* offsets,/*!< in: rec_get_offsets(rec, index) */
+ lock_mode mode, /*!< in: mode of the lock which
the read cursor should set on
records: LOCK_S or LOCK_X; the
latter is possible in
@@ -7453,28 +6099,28 @@ lock_clust_rec_read_check_and_lock(
|| gap_mode == LOCK_REC_NOT_GAP);
ut_ad(rec_offs_validate(rec, index, offsets));
- if (flags & BTR_NO_LOCKING_FLAG) {
+ if ((flags & BTR_NO_LOCKING_FLAG)
+ || srv_read_only_mode
+ || dict_table_is_temporary(index->table)) {
return(DB_SUCCESS);
}
heap_no = page_rec_get_heap_no(rec);
- if (UNIV_LIKELY(heap_no != PAGE_HEAP_NO_SUPREMUM)) {
+ if (heap_no != PAGE_HEAP_NO_SUPREMUM) {
lock_rec_convert_impl_to_expl(block, rec, index, offsets);
}
lock_mutex_enter();
- trx_t* trx = thr_get_trx(thr);
ut_ad(mode != LOCK_X
- || lock_table_has(trx, index->table, LOCK_IX));
+ || lock_table_has(thr_get_trx(thr), index->table, LOCK_IX));
ut_ad(mode != LOCK_S
- || lock_table_has(trx, index->table, LOCK_IS));
+ || lock_table_has(thr_get_trx(thr), index->table, LOCK_IS));
- err = lock_rec_lock(FALSE, mode | gap_mode,
- block, heap_no, index, thr);
+ err = lock_rec_lock(FALSE, mode | gap_mode, block, heap_no, index, thr);
MONITOR_INC(MONITOR_NUM_RECLOCK_REQ);
@@ -7482,6 +6128,8 @@ lock_clust_rec_read_check_and_lock(
ut_ad(lock_rec_queue_validate(FALSE, block, rec, index, offsets));
+ DEBUG_SYNC_C("after_lock_clust_rec_read_check_and_lock");
+
return(err);
}
/*********************************************************************//**
@@ -7493,8 +6141,7 @@ waiting request for a record lock to the lock queue. Sets the requested mode
lock on the record. This is an alternative version of
lock_clust_rec_read_check_and_lock() that does not require the parameter
"offsets".
-@return DB_SUCCESS, DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
-UNIV_INTERN
+@return DB_SUCCESS, DB_LOCK_WAIT, or DB_DEADLOCK */
dberr_t
lock_clust_rec_read_check_and_lock_alt(
/*===================================*/
@@ -7506,7 +6153,7 @@ lock_clust_rec_read_check_and_lock_alt(
be read or passed over by a
read cursor */
dict_index_t* index, /*!< in: clustered index */
- enum lock_mode mode, /*!< in: mode of the lock which
+ lock_mode mode, /*!< in: mode of the lock which
the read cursor should set on
records: LOCK_S or LOCK_X; the
latter is possible in
@@ -7516,12 +6163,13 @@ lock_clust_rec_read_check_and_lock_alt(
que_thr_t* thr) /*!< in: query thread */
{
mem_heap_t* tmp_heap = NULL;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- ulint* offsets = offsets_;
+ offset_t offsets_[REC_OFFS_NORMAL_SIZE];
+ offset_t* offsets = offsets_;
dberr_t err;
rec_offs_init(offsets_);
- offsets = rec_get_offsets(rec, index, offsets,
+ ut_ad(page_rec_is_leaf(rec));
+ offsets = rec_get_offsets(rec, index, offsets, true,
ULINT_UNDEFINED, &tmp_heap);
err = lock_clust_rec_read_check_and_lock(flags, block, rec, index,
offsets, mode, gap_mode, thr);
@@ -7529,7 +6177,7 @@ lock_clust_rec_read_check_and_lock_alt(
mem_heap_free(tmp_heap);
}
- if (UNIV_UNLIKELY(err == DB_SUCCESS_LOCKED_REC)) {
+ if (err == DB_SUCCESS_LOCKED_REC) {
err = DB_SUCCESS;
}
@@ -7613,8 +6261,7 @@ lock_release_autoinc_locks(
/*******************************************************************//**
Gets the type of a lock. Non-inline version for using outside of the
lock module.
-@return LOCK_TABLE or LOCK_REC */
-UNIV_INTERN
+@return LOCK_TABLE or LOCK_REC */
ulint
lock_get_type(
/*==========*/
@@ -7624,35 +6271,20 @@ lock_get_type(
}
/*******************************************************************//**
-Gets the trx of the lock. Non-inline version for using outside of the
-lock module.
-@return trx_t* */
-UNIV_INTERN
-trx_t*
-lock_get_trx(
-/*=========*/
- const lock_t* lock) /*!< in: lock */
-{
- return (lock->trx);
-}
-
-/*******************************************************************//**
Gets the id of the transaction owning a lock.
-@return transaction id */
-UNIV_INTERN
+@return transaction id */
trx_id_t
lock_get_trx_id(
/*============*/
const lock_t* lock) /*!< in: lock */
{
- return(lock->trx->id);
+ return(trx_get_id_for_print(lock->trx));
}
/*******************************************************************//**
Gets the mode of a lock in a human readable string.
The string should not be free()'d or modified.
-@return lock mode */
-UNIV_INTERN
+@return lock mode */
const char*
lock_get_mode_str(
/*==============*/
@@ -7698,8 +6330,7 @@ lock_get_mode_str(
/*******************************************************************//**
Gets the type of a lock in a human readable string.
The string should not be free()'d or modified.
-@return lock type */
-UNIV_INTERN
+@return lock type */
const char*
lock_get_type_str(
/*==============*/
@@ -7717,7 +6348,7 @@ lock_get_type_str(
/*******************************************************************//**
Gets the table on which the lock is.
-@return table */
+@return table */
UNIV_INLINE
dict_table_t*
lock_get_table(
@@ -7739,8 +6370,7 @@ lock_get_table(
/*******************************************************************//**
Gets the id of the table on which the lock is.
-@return id of the table */
-UNIV_INTERN
+@return id of the table */
table_id_t
lock_get_table_id(
/*==============*/
@@ -7753,27 +6383,19 @@ lock_get_table_id(
return(table->id);
}
-/*******************************************************************//**
-Gets the name of the table on which the lock is.
-The string should not be free()'d or modified.
-@return name of the table */
-UNIV_INTERN
-const char*
+/** Determine which table a lock is associated with.
+@param[in] lock the lock
+@return name of the table */
+const table_name_t&
lock_get_table_name(
-/*================*/
- const lock_t* lock) /*!< in: lock */
+ const lock_t* lock)
{
- dict_table_t* table;
-
- table = lock_get_table(lock);
-
- return(table->name);
+ return(lock_get_table(lock)->name);
}
/*******************************************************************//**
For a record lock, gets the index on which the lock is.
-@return index */
-UNIV_INTERN
+@return index */
const dict_index_t*
lock_rec_get_index(
/*===============*/
@@ -7789,8 +6411,7 @@ lock_rec_get_index(
/*******************************************************************//**
For a record lock, gets the name of the index on which the lock is.
The string should not be free()'d or modified.
-@return name of the index */
-UNIV_INTERN
+@return name of the index */
const char*
lock_rec_get_index_name(
/*====================*/
@@ -7805,8 +6426,7 @@ lock_rec_get_index_name(
/*******************************************************************//**
For a record lock, gets the tablespace number on which the lock is.
-@return tablespace number */
-UNIV_INTERN
+@return tablespace number */
ulint
lock_rec_get_space_id(
/*==================*/
@@ -7819,8 +6439,7 @@ lock_rec_get_space_id(
/*******************************************************************//**
For a record lock, gets the page number on which the lock is.
-@return page number */
-UNIV_INTERN
+@return page number */
ulint
lock_rec_get_page_no(
/*=================*/
@@ -7834,7 +6453,6 @@ lock_rec_get_page_no(
/*********************************************************************//**
Cancels a waiting lock request and releases possible other transactions
waiting behind it. */
-UNIV_INTERN
void
lock_cancel_waiting_and_release(
/*============================*/
@@ -7845,7 +6463,7 @@ lock_cancel_waiting_and_release(
ut_ad(lock_mutex_own());
ut_ad(trx_mutex_own(lock->trx));
- lock->trx->lock.cancel = TRUE;
+ lock->trx->lock.cancel = true;
if (lock_get_type_low(lock) == LOCK_REC) {
@@ -7859,6 +6477,8 @@ lock_cancel_waiting_and_release(
}
lock_table_dequeue(lock);
+ /* Remove the lock from table lock vector too. */
+ lock_trx_table_locks_remove(lock);
}
/* Reset the wait flag and the back pointer to lock in trx. */
@@ -7873,14 +6493,13 @@ lock_cancel_waiting_and_release(
lock_wait_release_thread_if_suspended(thr);
}
- lock->trx->lock.cancel = FALSE;
+ lock->trx->lock.cancel = false;
}
/*********************************************************************//**
Unlocks AUTO_INC type locks that were possibly reserved by a trx. This
function should be called at the the end of an SQL statement, by the
connection thread that owns the transaction (trx->mysql_thd). */
-UNIV_INTERN
void
lock_unlock_table_autoinc(
/*======================*/
@@ -7889,8 +6508,10 @@ lock_unlock_table_autoinc(
ut_ad(!lock_mutex_own());
ut_ad(!trx_mutex_own(trx));
ut_ad(!trx->lock.wait_lock);
+
/* This can be invoked on NOT_STARTED, ACTIVE, PREPARED,
but not COMMITTED transactions. */
+
ut_ad(trx_state_eq(trx, TRX_STATE_NOT_STARTED)
|| !trx_state_eq(trx, TRX_STATE_COMMITTED_IN_MEMORY));
@@ -7907,88 +6528,28 @@ lock_unlock_table_autoinc(
}
}
-/*********************************************************************//**
-Releases a transaction's locks, and releases possible other transactions
-waiting because of these locks. Change the state of the transaction to
-TRX_STATE_COMMITTED_IN_MEMORY. */
-UNIV_INTERN
-void
-lock_trx_release_locks(
-/*===================*/
- trx_t* trx) /*!< in/out: transaction */
+/** Release the explicit locks of a committing transaction,
+and release possible other transactions waiting because of these locks. */
+void lock_trx_release_locks(trx_t* trx)
{
- assert_trx_in_list(trx);
-
- if (trx_state_eq(trx, TRX_STATE_PREPARED)
- || trx_state_eq(trx, TRX_STATE_PREPARED_RECOVERED)) {
- mutex_enter(&trx_sys->mutex);
- ut_a(trx_sys->n_prepared_trx > 0);
- trx_sys->n_prepared_trx--;
- if (trx->is_recovered) {
- ut_a(trx_sys->n_prepared_recovered_trx > 0);
- trx_sys->n_prepared_recovered_trx--;
- }
- mutex_exit(&trx_sys->mutex);
- } else {
- ut_ad(trx_state_eq(trx, TRX_STATE_ACTIVE)
- || (trx_state_eq(trx, TRX_STATE_COMMITTED_IN_MEMORY)
- && trx->is_recovered
- && !UT_LIST_GET_LEN(trx->lock.trx_locks)));
- }
+ ut_ad(UT_LIST_GET_LEN(trx->lock.trx_locks));
- /* The transition of trx->state to TRX_STATE_COMMITTED_IN_MEMORY
- is protected by both the lock_sys->mutex and the trx->mutex. */
lock_mutex_enter();
- trx_mutex_enter(trx);
-
- /* The following assignment makes the transaction committed in memory
- and makes its changes to data visible to other transactions.
- NOTE that there is a small discrepancy from the strict formal
- visibility rules here: a human user of the database can see
- modifications made by another transaction T even before the necessary
- log segment has been flushed to the disk. If the database happens to
- crash before the flush, the user has seen modifications from T which
- will never be a committed transaction. However, any transaction T2
- which sees the modifications of the committing transaction T, and
- which also itself makes modifications to the database, will get an lsn
- larger than the committing transaction T. In the case where the log
- flush fails, and T never gets committed, also T2 will never get
- committed. */
-
- /*--------------------------------------*/
- trx->state = TRX_STATE_COMMITTED_IN_MEMORY;
- /*--------------------------------------*/
-
- /* If the background thread trx_rollback_or_clean_recovered()
- is still active then there is a chance that the rollback
- thread may see this trx as COMMITTED_IN_MEMORY and goes ahead
- to clean it up calling trx_cleanup_at_db_startup(). This can
- happen in the case we are committing a trx here that is left
- in PREPARED state during the crash. Note that commit of the
- rollback of a PREPARED trx happens in the recovery thread
- while the rollback of other transactions happen in the
- background thread. To avoid this race we unconditionally unset
- the is_recovered flag. */
-
- trx->is_recovered = FALSE;
-
- trx_mutex_exit(trx);
-
lock_release(trx);
+ trx->lock.n_rec_locks = 0;
+ /* We don't remove the locks one by one from the vector for
+ efficiency reasons. We simply reset it because we would have
+ released all the locks anyway. */
+
+ trx->lock.table_locks.clear();
+ ut_ad(UT_LIST_GET_LEN(trx->lock.trx_locks) == 0);
+ ut_ad(ib_vector_is_empty(trx->autoinc_locks));
lock_mutex_exit();
+ mem_heap_empty(trx->lock.lock_heap);
}
-/*********************************************************************//**
-Check whether the transaction has already been rolled back because it
-was selected as a deadlock victim, or if it has to wait then cancel
-the wait lock.
-@return DB_DEADLOCK, DB_LOCK_WAIT or DB_SUCCESS */
-UNIV_INTERN
-dberr_t
-lock_trx_handle_wait(
-/*=================*/
- trx_t* trx) /*!< in/out: trx lock state */
+static inline dberr_t lock_trx_handle_wait_low(trx_t* trx)
{
ut_ad(lock_mutex_own());
ut_ad(trx_mutex_own(trx));
@@ -8006,9 +6567,26 @@ lock_trx_handle_wait(
}
/*********************************************************************//**
+Check whether the transaction has already been rolled back because it
+was selected as a deadlock victim, or if it has to wait then cancel
+the wait lock.
+@return DB_DEADLOCK, DB_LOCK_WAIT or DB_SUCCESS */
+dberr_t
+lock_trx_handle_wait(
+/*=================*/
+ trx_t* trx) /*!< in/out: trx lock state */
+{
+ lock_mutex_enter();
+ trx_mutex_enter(trx);
+ dberr_t err = lock_trx_handle_wait_low(trx);
+ lock_mutex_exit();
+ trx_mutex_exit(trx);
+ return err;
+}
+
+/*********************************************************************//**
Get the number of locks on a table.
@return number of locks */
-UNIV_INTERN
ulint
lock_table_get_n_locks(
/*===================*/
@@ -8028,7 +6606,7 @@ lock_table_get_n_locks(
#ifdef UNIV_DEBUG
/*******************************************************************//**
Do an exhaustive check for any locks (table or rec) against the table.
-@return lock if found */
+@return lock if found */
static
const lock_t*
lock_table_locks_lookup(
@@ -8037,25 +6615,19 @@ lock_table_locks_lookup(
any locks held on records in
this table or on the table
itself */
- const trx_list_t* trx_list) /*!< in: trx list to check */
+ const trx_ut_list_t* trx_list) /*!< in: trx list to check */
{
- trx_t* trx;
-
ut_a(table != NULL);
ut_ad(lock_mutex_own());
- ut_ad(mutex_own(&trx_sys->mutex));
-
- ut_ad(trx_list == &trx_sys->rw_trx_list
- || trx_list == &trx_sys->ro_trx_list);
+ ut_ad(trx_sys_mutex_own());
- for (trx = UT_LIST_GET_FIRST(*trx_list);
+ for (trx_t* trx = UT_LIST_GET_FIRST(*trx_list);
trx != NULL;
trx = UT_LIST_GET_NEXT(trx_list, trx)) {
+ const lock_t* lock;
- const lock_t* lock;
-
- assert_trx_in_list(trx);
- ut_ad(trx->read_only == (trx_list == &trx_sys->ro_trx_list));
+ trx_mutex_enter(trx);
+ check_trx_state(trx);
for (lock = UT_LIST_GET_FIRST(trx->lock.trx_locks);
lock != NULL;
@@ -8067,23 +6639,28 @@ lock_table_locks_lookup(
ut_ad(!dict_index_is_online_ddl(lock->index)
|| dict_index_is_clust(lock->index));
if (lock->index->table == table) {
- return(lock);
+ break;
}
} else if (lock->un_member.tab_lock.table == table) {
- return(lock);
+ break;
}
}
+
+ trx_mutex_exit(trx);
+
+ if (lock) {
+ return lock;
+ }
}
- return(NULL);
+ return NULL;
}
#endif /* UNIV_DEBUG */
/*******************************************************************//**
Check if there are any locks (table or rec) against table.
-@return TRUE if table has either table or record locks. */
-UNIV_INTERN
-ibool
+@return true if table has either table or record locks. */
+bool
lock_table_has_locks(
/*=================*/
const dict_table_t* table) /*!< in: check if there are any locks
@@ -8101,7 +6678,6 @@ lock_table_has_locks(
mutex_enter(&trx_sys->mutex);
ut_ad(!lock_table_locks_lookup(table, &trx_sys->rw_trx_list));
- ut_ad(!lock_table_locks_lookup(table, &trx_sys->ro_trx_list));
mutex_exit(&trx_sys->mutex);
}
@@ -8112,29 +6688,57 @@ lock_table_has_locks(
return(has_locks);
}
+/*******************************************************************//**
+Initialise the table lock list. */
+void
+lock_table_lock_list_init(
+/*======================*/
+ table_lock_list_t* lock_list) /*!< List to initialise */
+{
+ UT_LIST_INIT(*lock_list, &lock_table_t::locks);
+}
+
+/*******************************************************************//**
+Initialise the trx lock list. */
+void
+lock_trx_lock_list_init(
+/*====================*/
+ trx_lock_list_t* lock_list) /*!< List to initialise */
+{
+ UT_LIST_INIT(*lock_list, &lock_t::trx_locks);
+}
+
+/*******************************************************************//**
+Set the lock system timeout event. */
+void
+lock_set_timeout_event()
+/*====================*/
+{
+ os_event_set(lock_sys->timeout_event);
+}
+
#ifdef UNIV_DEBUG
/*******************************************************************//**
Check if the transaction holds any locks on the sys tables
or its records.
-@return the strongest lock found on any sys table or 0 for none */
-UNIV_INTERN
+@return the strongest lock found on any sys table or 0 for none */
const lock_t*
lock_trx_has_sys_table_locks(
/*=========================*/
const trx_t* trx) /*!< in: transaction to check */
{
- lint i;
const lock_t* strongest_lock = 0;
lock_mode strongest = LOCK_NONE;
lock_mutex_enter();
+ const lock_list::const_iterator end = trx->lock.table_locks.end();
+ lock_list::const_iterator it = trx->lock.table_locks.begin();
+
/* Find a valid mode. Note: ib_vector_size() can be 0. */
- for (i = ib_vector_size(trx->lock.table_locks) - 1; i >= 0; --i) {
- const lock_t* lock;
- lock = *static_cast<const lock_t**>(
- ib_vector_get(trx->lock.table_locks, i));
+ for (/* No op */; it != end; ++it) {
+ const lock_t* lock = *it;
if (lock != NULL
&& dict_is_sys_table(lock->un_member.tab_lock.table->id)) {
@@ -8151,11 +6755,8 @@ lock_trx_has_sys_table_locks(
return(NULL);
}
- for (/* No op */; i >= 0; --i) {
- const lock_t* lock;
-
- lock = *static_cast<const lock_t**>(
- ib_vector_get(trx->lock.table_locks, i));
+ for (/* No op */; it != end; ++it) {
+ const lock_t* lock = *it;
if (lock == NULL) {
continue;
@@ -8182,8 +6783,7 @@ lock_trx_has_sys_table_locks(
/*******************************************************************//**
Check if the transaction holds an exclusive lock on a record.
-@return whether the locks are held */
-UNIV_INTERN
+@return whether the locks are held */
bool
lock_trx_has_rec_x_lock(
/*====================*/
@@ -8195,39 +6795,539 @@ lock_trx_has_rec_x_lock(
ut_ad(heap_no > PAGE_HEAP_NO_SUPREMUM);
lock_mutex_enter();
- ut_a(lock_table_has(trx, table, LOCK_IX));
+ ut_a(lock_table_has(trx, table, LOCK_IX)
+ || dict_table_is_temporary(table));
ut_a(lock_rec_has_expl(LOCK_X | LOCK_REC_NOT_GAP,
- block, heap_no, trx));
+ block, heap_no, trx)
+ || dict_table_is_temporary(table));
lock_mutex_exit();
return(true);
}
#endif /* UNIV_DEBUG */
-/*******************************************************************//**
-Get lock mode and table/index name
-@return string containing lock info */
-std::string
-lock_get_info(
- const lock_t* lock)
-{
- std::string info;
- std::string mode("mode ");
- std::string index("index ");
- std::string table("table ");
- std::string n_uniq(" n_uniq");
- std::string n_user(" n_user");
- std::string lock_mode((lock_get_mode_str(lock)));
- std::string iname(lock->index->name);
- std::string tname(lock->index->table_name);
-
-#define SSTR( x ) reinterpret_cast< std::ostringstream & >( \
- ( std::ostringstream() << std::dec << x ) ).str()
-
- info = mode + lock_mode
- + index + iname
- + table + tname
- + n_uniq + SSTR(lock->index->n_uniq)
- + n_user + SSTR(lock->index->n_user_defined_cols);
-
- return info;
+/** rewind(3) the file used for storing the latest detected deadlock and
+print a heading message to stderr if printing of all deadlocks to stderr
+is enabled. */
+void
+DeadlockChecker::start_print()
+{
+ ut_ad(lock_mutex_own());
+
+ rewind(lock_latest_err_file);
+ ut_print_timestamp(lock_latest_err_file);
+
+ if (srv_print_all_deadlocks) {
+ ib::info() << "Transactions deadlock detected, dumping"
+ " detailed information.";
+ }
+}
+
+/** Print a message to the deadlock file and possibly to stderr.
+@param msg message to print */
+void
+DeadlockChecker::print(const char* msg)
+{
+ fputs(msg, lock_latest_err_file);
+
+ if (srv_print_all_deadlocks) {
+ ib::info() << msg;
+ }
+}
+
+/** Print transaction data to the deadlock file and possibly to stderr.
+@param trx transaction
+@param max_query_len max query length to print */
+void
+DeadlockChecker::print(const trx_t* trx, ulint max_query_len)
+{
+ ut_ad(lock_mutex_own());
+
+ ulint n_rec_locks = lock_number_of_rows_locked(&trx->lock);
+ ulint n_trx_locks = UT_LIST_GET_LEN(trx->lock.trx_locks);
+ ulint heap_size = mem_heap_get_size(trx->lock.lock_heap);
+
+ mutex_enter(&trx_sys->mutex);
+
+ trx_print_low(lock_latest_err_file, trx, max_query_len,
+ n_rec_locks, n_trx_locks, heap_size);
+
+ if (srv_print_all_deadlocks) {
+ trx_print_low(stderr, trx, max_query_len,
+ n_rec_locks, n_trx_locks, heap_size);
+ }
+
+ mutex_exit(&trx_sys->mutex);
+}
+
+/** Print lock data to the deadlock file and possibly to stderr.
+@param lock record or table type lock */
+void
+DeadlockChecker::print(const lock_t* lock)
+{
+ ut_ad(lock_mutex_own());
+
+ if (lock_get_type_low(lock) == LOCK_REC) {
+ mtr_t mtr;
+ lock_rec_print(lock_latest_err_file, lock, mtr);
+
+ if (srv_print_all_deadlocks) {
+ lock_rec_print(stderr, lock, mtr);
+ }
+ } else {
+ lock_table_print(lock_latest_err_file, lock);
+
+ if (srv_print_all_deadlocks) {
+ lock_table_print(stderr, lock);
+ }
+ }
+}
+
+/** Get the next lock in the queue that is owned by a transaction whose
+sub-tree has not already been searched.
+Note: "next" here means PREV for table locks.
+
+@param lock Lock in queue
+@param heap_no heap_no if lock is a record lock else ULINT_UNDEFINED
+
+@return next lock or NULL if at end of queue */
+const lock_t*
+DeadlockChecker::get_next_lock(const lock_t* lock, ulint heap_no) const
+{
+ ut_ad(lock_mutex_own());
+
+ do {
+ if (lock_get_type_low(lock) == LOCK_REC) {
+ ut_ad(heap_no != ULINT_UNDEFINED);
+ lock = lock_rec_get_next_const(heap_no, lock);
+ } else {
+ ut_ad(heap_no == ULINT_UNDEFINED);
+ ut_ad(lock_get_type_low(lock) == LOCK_TABLE);
+
+ lock = UT_LIST_GET_NEXT(
+ un_member.tab_lock.locks, lock);
+ }
+
+ } while (lock != NULL && is_visited(lock));
+
+ ut_ad(lock == NULL
+ || lock_get_type_low(lock) == lock_get_type_low(m_wait_lock));
+
+ return(lock);
+}
+
+/** Get the first lock to search. The search starts from the current
+wait_lock. What we are really interested in is an edge from the
+current wait_lock's owning transaction to another transaction that has
+a lock ahead in the queue. We skip locks where the owning transaction's
+sub-tree has already been searched.
+
+Note: The record locks are traversed from the oldest lock to the
+latest. For table locks we go from latest to oldest.
+
+For record locks, we first position the "iterator" on the first lock on
+the page and then reposition on the actual heap_no. This is required
+due to the way the record lock has is implemented.
+
+@param[out] heap_no if rec lock, else ULINT_UNDEFINED.
+@return first lock or NULL */
+const lock_t*
+DeadlockChecker::get_first_lock(ulint* heap_no) const
+{
+ ut_ad(lock_mutex_own());
+
+ const lock_t* lock = m_wait_lock;
+
+ if (lock_get_type_low(lock) == LOCK_REC) {
+ hash_table_t* lock_hash;
+
+ lock_hash = lock->type_mode & LOCK_PREDICATE
+ ? lock_sys->prdt_hash
+ : lock_sys->rec_hash;
+
+ /* We are only interested in records that match the heap_no. */
+ *heap_no = lock_rec_find_set_bit(lock);
+
+ ut_ad(*heap_no <= 0xffff);
+ ut_ad(*heap_no != ULINT_UNDEFINED);
+
+ /* Find the locks on the page. */
+ lock = lock_rec_get_first_on_page_addr(
+ lock_hash,
+ lock->un_member.rec_lock.space,
+ lock->un_member.rec_lock.page_no);
+
+ /* Position on the first lock on the physical record.*/
+ if (!lock_rec_get_nth_bit(lock, *heap_no)) {
+ lock = lock_rec_get_next_const(*heap_no, lock);
+ }
+
+ ut_a(!lock_get_wait(lock));
+ } else {
+ /* Table locks don't care about the heap_no. */
+ *heap_no = ULINT_UNDEFINED;
+ ut_ad(lock_get_type_low(lock) == LOCK_TABLE);
+ dict_table_t* table = lock->un_member.tab_lock.table;
+ lock = UT_LIST_GET_FIRST(table->locks);
+ }
+
+ /* Must find at least two locks, otherwise there cannot be a
+ waiting lock, secondly the first lock cannot be the wait_lock. */
+ ut_a(lock != NULL);
+ ut_a(lock != m_wait_lock ||
+ (innodb_lock_schedule_algorithm
+ == INNODB_LOCK_SCHEDULE_ALGORITHM_VATS
+ && !thd_is_replication_slave_thread(lock->trx->mysql_thd)));
+
+ /* Check that the lock type doesn't change. */
+ ut_ad(lock_get_type_low(lock) == lock_get_type_low(m_wait_lock));
+
+ return(lock);
+}
+
+/** Notify that a deadlock has been detected and print the conflicting
+transaction info.
+@param lock lock causing deadlock */
+void
+DeadlockChecker::notify(const lock_t* lock) const
+{
+ ut_ad(lock_mutex_own());
+
+ start_print();
+
+ print("\n*** (1) TRANSACTION:\n");
+
+ print(m_wait_lock->trx, 3000);
+
+ print("*** (1) WAITING FOR THIS LOCK TO BE GRANTED:\n");
+
+ print(m_wait_lock);
+
+ print("*** (2) TRANSACTION:\n");
+
+ print(lock->trx, 3000);
+
+ print("*** (2) HOLDS THE LOCK(S):\n");
+
+ print(lock);
+
+ /* It is possible that the joining transaction was granted its
+ lock when we rolled back some other waiting transaction. */
+
+ if (m_start->lock.wait_lock != 0) {
+ print("*** (2) WAITING FOR THIS LOCK TO BE GRANTED:\n");
+
+ print(m_start->lock.wait_lock);
+ }
+
+ DBUG_PRINT("ib_lock", ("deadlock detected"));
+}
+
+/** Select the victim transaction that should be rolledback.
+@return victim transaction */
+const trx_t*
+DeadlockChecker::select_victim() const
+{
+ ut_ad(lock_mutex_own());
+ ut_ad(m_start->lock.wait_lock != 0);
+ ut_ad(m_wait_lock->trx != m_start);
+
+ if (trx_weight_ge(m_wait_lock->trx, m_start)) {
+ /* The joining transaction is 'smaller',
+ choose it as the victim and roll it back. */
+#ifdef WITH_WSREP
+ if (wsrep_thd_is_BF(m_start->mysql_thd, TRUE)) {
+ return(m_wait_lock->trx);
+ }
+#endif /* WITH_WSREP */
+ return(m_start);
+ }
+
+#ifdef WITH_WSREP
+ if (wsrep_thd_is_BF(m_wait_lock->trx->mysql_thd, TRUE)) {
+ return(m_start);
+ }
+#endif /* WITH_WSREP */
+
+ return(m_wait_lock->trx);
+}
+
+/** Looks iteratively for a deadlock. Note: the joining transaction may
+have been granted its lock by the deadlock checks.
+@return 0 if no deadlock else the victim transaction instance.*/
+const trx_t*
+DeadlockChecker::search()
+{
+ ut_ad(lock_mutex_own());
+ ut_ad(!trx_mutex_own(m_start));
+
+ ut_ad(m_start != NULL);
+ ut_ad(m_wait_lock != NULL);
+ check_trx_state(m_wait_lock->trx);
+ ut_ad(m_mark_start <= s_lock_mark_counter);
+
+ /* Look at the locks ahead of wait_lock in the lock queue. */
+ ulint heap_no;
+ const lock_t* lock = get_first_lock(&heap_no);
+
+ for (;;) {
+ /* We should never visit the same sub-tree more than once. */
+ ut_ad(lock == NULL || !is_visited(lock));
+
+ while (m_n_elems > 0 && lock == NULL) {
+
+ /* Restore previous search state. */
+
+ pop(lock, heap_no);
+
+ lock = get_next_lock(lock, heap_no);
+ }
+
+ if (lock == NULL) {
+ break;
+ }
+
+ if (lock == m_wait_lock) {
+
+ /* We can mark this subtree as searched */
+ ut_ad(lock->trx->lock.deadlock_mark <= m_mark_start);
+
+ lock->trx->lock.deadlock_mark = ++s_lock_mark_counter;
+
+ /* We are not prepared for an overflow. This 64-bit
+ counter should never wrap around. At 10^9 increments
+ per second, it would take 10^3 years of uptime. */
+
+ ut_ad(s_lock_mark_counter > 0);
+
+ /* Backtrack */
+ lock = NULL;
+ continue;
+ }
+
+ if (!lock_has_to_wait(m_wait_lock, lock)) {
+ /* No conflict, next lock */
+ lock = get_next_lock(lock, heap_no);
+ continue;
+ }
+
+ if (lock->trx == m_start) {
+ /* Found a cycle. */
+ notify(lock);
+ return select_victim();
+ }
+
+ if (is_too_deep()) {
+ /* Search too deep to continue. */
+ m_too_deep = true;
+ return m_start;
+ }
+
+ /* We do not need to report autoinc locks to the upper
+ layer. These locks are released before commit, so they
+ can not cause deadlocks with binlog-fixed commit
+ order. */
+ if (m_report_waiters
+ && (lock_get_type_low(lock) != LOCK_TABLE
+ || lock_get_mode(lock) != LOCK_AUTO_INC)) {
+ thd_rpl_deadlock_check(m_start->mysql_thd,
+ lock->trx->mysql_thd);
+ }
+
+ if (lock->trx->lock.que_state == TRX_QUE_LOCK_WAIT) {
+ /* Another trx ahead has requested a lock in an
+ incompatible mode, and is itself waiting for a lock. */
+
+ ++m_cost;
+
+ if (!push(lock, heap_no)) {
+ m_too_deep = true;
+ return m_start;
+ }
+
+ m_wait_lock = lock->trx->lock.wait_lock;
+
+ lock = get_first_lock(&heap_no);
+
+ if (is_visited(lock)) {
+ lock = get_next_lock(lock, heap_no);
+ }
+ } else {
+ lock = get_next_lock(lock, heap_no);
+ }
+ }
+
+ ut_a(lock == NULL && m_n_elems == 0);
+
+ /* No deadlock found. */
+ return(0);
+}
+
+/** Print info about transaction that was rolled back.
+@param trx transaction rolled back
+@param lock lock trx wants */
+void
+DeadlockChecker::rollback_print(const trx_t* trx, const lock_t* lock)
+{
+ ut_ad(lock_mutex_own());
+
+ /* If the lock search exceeds the max step
+ or the max depth, the current trx will be
+ the victim. Print its information. */
+ start_print();
+
+ print("TOO DEEP OR LONG SEARCH IN THE LOCK TABLE"
+ " WAITS-FOR GRAPH, WE WILL ROLL BACK"
+ " FOLLOWING TRANSACTION \n\n"
+ "*** TRANSACTION:\n");
+
+ print(trx, 3000);
+
+ print("*** WAITING FOR THIS LOCK TO BE GRANTED:\n");
+
+ print(lock);
+}
+
+/** Rollback transaction selected as the victim. */
+void
+DeadlockChecker::trx_rollback()
+{
+ ut_ad(lock_mutex_own());
+
+ trx_t* trx = m_wait_lock->trx;
+
+ print("*** WE ROLL BACK TRANSACTION (1)\n");
+
+ trx_mutex_enter(trx);
+
+ trx->lock.was_chosen_as_deadlock_victim = true;
+
+ lock_cancel_waiting_and_release(trx->lock.wait_lock);
+
+ trx_mutex_exit(trx);
+}
+
+/** Check if a joining lock request results in a deadlock.
+If a deadlock is found, we will resolve the deadlock by
+choosing a victim transaction and rolling it back.
+We will attempt to resolve all deadlocks.
+
+@param[in] lock the lock request
+@param[in,out] trx transaction requesting the lock
+
+@return trx if it was chosen as victim
+@retval NULL if another victim was chosen,
+or there is no deadlock (any more) */
+const trx_t*
+DeadlockChecker::check_and_resolve(const lock_t* lock, trx_t* trx)
+{
+ ut_ad(lock_mutex_own());
+ ut_ad(trx_mutex_own(trx));
+ check_trx_state(trx);
+ ut_ad(!srv_read_only_mode);
+
+ if (!innobase_deadlock_detect) {
+ return(NULL);
+ }
+
+ /* Release the mutex to obey the latching order.
+ This is safe, because DeadlockChecker::check_and_resolve()
+ is invoked when a lock wait is enqueued for the currently
+ running transaction. Because m_trx is a running transaction
+ (it is not currently suspended because of a lock wait),
+ its state can only be changed by this thread, which is
+ currently associated with the transaction. */
+
+ trx_mutex_exit(trx);
+
+ const trx_t* victim_trx;
+ const bool report_waiters = trx->mysql_thd
+ && thd_need_wait_reports(trx->mysql_thd);
+
+ /* Try and resolve as many deadlocks as possible. */
+ do {
+ DeadlockChecker checker(trx, lock, s_lock_mark_counter,
+ report_waiters);
+
+ victim_trx = checker.search();
+
+ /* Search too deep, we rollback the joining transaction only
+ if it is possible to rollback. Otherwise we rollback the
+ transaction that is holding the lock that the joining
+ transaction wants. */
+ if (checker.is_too_deep()) {
+
+ ut_ad(trx == checker.m_start);
+ ut_ad(trx == victim_trx);
+
+ rollback_print(victim_trx, lock);
+
+ MONITOR_INC(MONITOR_DEADLOCK);
+
+ break;
+
+ } else if (victim_trx != NULL && victim_trx != trx) {
+
+ ut_ad(victim_trx == checker.m_wait_lock->trx);
+
+ checker.trx_rollback();
+
+ lock_deadlock_found = true;
+
+ MONITOR_INC(MONITOR_DEADLOCK);
+ }
+
+ } while (victim_trx != NULL && victim_trx != trx);
+
+ /* If the joining transaction was selected as the victim. */
+ if (victim_trx != NULL) {
+
+ print("*** WE ROLL BACK TRANSACTION (2)\n");
+
+ lock_deadlock_found = true;
+ }
+
+ trx_mutex_enter(trx);
+
+ return(victim_trx);
+}
+
+/*************************************************************//**
+Updates the lock table when a page is split and merged to
+two pages. */
+UNIV_INTERN
+void
+lock_update_split_and_merge(
+ const buf_block_t* left_block, /*!< in: left page to which merged */
+ const rec_t* orig_pred, /*!< in: original predecessor of
+ supremum on the left page before merge*/
+ const buf_block_t* right_block) /*!< in: right page from which merged */
+{
+ const rec_t* left_next_rec;
+
+ ut_a(left_block && right_block);
+ ut_a(orig_pred);
+
+ lock_mutex_enter();
+
+ left_next_rec = page_rec_get_next_const(orig_pred);
+
+ /* Inherit the locks on the supremum of the left page to the
+ first record which was moved from the right page */
+ lock_rec_inherit_to_gap(
+ left_block, left_block,
+ page_rec_get_heap_no(left_next_rec),
+ PAGE_HEAP_NO_SUPREMUM);
+
+ /* Reset the locks on the supremum of the left page,
+ releasing waiting transactions */
+ lock_rec_reset_and_release_wait(left_block,
+ PAGE_HEAP_NO_SUPREMUM);
+
+ /* Inherit the locks to the supremum of the left page from the
+ successor of the infimum on the right page */
+ lock_rec_inherit_to_gap(left_block, right_block,
+ PAGE_HEAP_NO_SUPREMUM,
+ lock_get_min_heap_no(right_block));
+
+ lock_mutex_exit();
}
diff --git a/storage/innobase/lock/lock0prdt.cc b/storage/innobase/lock/lock0prdt.cc
new file mode 100644
index 00000000000..5100388c5e0
--- /dev/null
+++ b/storage/innobase/lock/lock0prdt.cc
@@ -0,0 +1,1060 @@
+/*****************************************************************************
+
+Copyright (c) 2014, 2016, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2018, MariaDB Corporation.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file lock/lock0prdt.cc
+The transaction lock system
+
+Created 9/7/2013 Jimmy Yang
+*******************************************************/
+
+#define LOCK_MODULE_IMPLEMENTATION
+
+#include "lock0lock.h"
+#include "lock0priv.h"
+#include "lock0prdt.h"
+#include "dict0mem.h"
+#include "que0que.h"
+
+/*********************************************************************//**
+Get a minimum bounding box from a Predicate
+@return the minimum bounding box */
+UNIV_INLINE
+rtr_mbr_t*
+prdt_get_mbr_from_prdt(
+/*===================*/
+ const lock_prdt_t* prdt) /*!< in: the lock predicate */
+{
+ rtr_mbr_t* mbr_loc = reinterpret_cast<rtr_mbr_t*>(prdt->data);
+
+ return(mbr_loc);
+}
+
+/*********************************************************************//**
+Get a predicate from a lock
+@return the predicate */
+lock_prdt_t*
+lock_get_prdt_from_lock(
+/*====================*/
+ const lock_t* lock) /*!< in: the lock */
+{
+ lock_prdt_t* prdt = reinterpret_cast<lock_prdt_t*>(
+ &((reinterpret_cast<byte*>(
+ const_cast<lock_t*>(&lock[1])))[
+ UNIV_WORD_SIZE]));
+
+ return(prdt);
+}
+
+/*********************************************************************//**
+Get a minimum bounding box directly from a lock
+@return the minimum bounding box*/
+UNIV_INLINE
+rtr_mbr_t*
+lock_prdt_get_mbr_from_lock(
+/*========================*/
+ const lock_t* lock) /*!< in: the lock */
+{
+ ut_ad(lock->type_mode & LOCK_PREDICATE);
+
+ lock_prdt_t* prdt = lock_get_prdt_from_lock(lock);
+
+ rtr_mbr_t* mbr_loc = prdt_get_mbr_from_prdt(prdt);
+
+ return(mbr_loc);
+}
+
+/*********************************************************************//**
+Append a predicate to the lock */
+void
+lock_prdt_set_prdt(
+/*===============*/
+ lock_t* lock, /*!< in: lock */
+ const lock_prdt_t* prdt) /*!< in: Predicate */
+{
+ ut_ad(lock->type_mode & LOCK_PREDICATE);
+
+ memcpy(&(((byte*) &lock[1])[UNIV_WORD_SIZE]), prdt, sizeof *prdt);
+}
+
+
+/** Check whether two predicate locks are compatible with each other
+@param[in] prdt1 first predicate lock
+@param[in] prdt2 second predicate lock
+@param[in] op predicate comparison operator
+@return true if consistent */
+static
+bool
+lock_prdt_consistent(
+ lock_prdt_t* prdt1,
+ lock_prdt_t* prdt2,
+ ulint op)
+{
+ bool ret = false;
+ rtr_mbr_t* mbr1 = prdt_get_mbr_from_prdt(prdt1);
+ rtr_mbr_t* mbr2 = prdt_get_mbr_from_prdt(prdt2);
+ ulint action;
+
+ if (op) {
+ action = op;
+ } else {
+ if (prdt2->op != 0 && (prdt1->op != prdt2->op)) {
+ return(false);
+ }
+
+ action = prdt1->op;
+ }
+
+ switch (action) {
+ case PAGE_CUR_CONTAIN:
+ ret = MBR_CONTAIN_CMP(mbr1, mbr2);
+ break;
+ case PAGE_CUR_DISJOINT:
+ ret = MBR_DISJOINT_CMP(mbr1, mbr2);
+ break;
+ case PAGE_CUR_MBR_EQUAL:
+ ret = MBR_EQUAL_CMP(mbr1, mbr2);
+ break;
+ case PAGE_CUR_INTERSECT:
+ ret = MBR_INTERSECT_CMP(mbr1, mbr2);
+ break;
+ case PAGE_CUR_WITHIN:
+ ret = MBR_WITHIN_CMP(mbr1, mbr2);
+ break;
+ default:
+ ib::error() << "invalid operator " << action;
+ ut_error;
+ }
+
+ return(ret);
+}
+
+/*********************************************************************//**
+Checks if a predicate lock request for a new lock has to wait for
+another lock.
+@return true if new lock has to wait for lock2 to be released */
+bool
+lock_prdt_has_to_wait(
+/*==================*/
+ const trx_t* trx, /*!< in: trx of new lock */
+ ulint type_mode,/*!< in: precise mode of the new lock
+ to set: LOCK_S or LOCK_X, possibly
+ ORed to LOCK_PREDICATE or LOCK_PRDT_PAGE,
+ LOCK_INSERT_INTENTION */
+ lock_prdt_t* prdt, /*!< in: lock predicate to check */
+ const lock_t* lock2) /*!< in: another record lock; NOTE that
+ it is assumed that this has a lock bit
+ set on the same record as in the new
+ lock we are setting */
+{
+ lock_prdt_t* cur_prdt = lock_get_prdt_from_lock(lock2);
+
+ ut_ad(trx && lock2);
+ ut_ad((lock2->type_mode & LOCK_PREDICATE && type_mode & LOCK_PREDICATE)
+ || (lock2->type_mode & LOCK_PRDT_PAGE
+ && type_mode & LOCK_PRDT_PAGE));
+
+ ut_ad(type_mode & (LOCK_PREDICATE | LOCK_PRDT_PAGE));
+
+ if (trx != lock2->trx
+ && !lock_mode_compatible(static_cast<lock_mode>(
+ LOCK_MODE_MASK & type_mode),
+ lock_get_mode(lock2))) {
+
+ /* If it is a page lock, then return true (conflict) */
+ if (type_mode & LOCK_PRDT_PAGE) {
+ ut_ad(lock2->type_mode & LOCK_PRDT_PAGE);
+
+ return(true);
+ }
+
+ /* Predicate lock does not conflicts with non-predicate lock */
+ if (!(lock2->type_mode & LOCK_PREDICATE)) {
+ return(FALSE);
+ }
+
+ ut_ad(lock2->type_mode & LOCK_PREDICATE);
+
+ if (!(type_mode & LOCK_INSERT_INTENTION)) {
+ /* PREDICATE locks without LOCK_INSERT_INTENTION flag
+ do not need to wait for anything. This is because
+ different users can have conflicting lock types
+ on predicates. */
+
+ return(FALSE);
+ }
+
+ if (lock2->type_mode & LOCK_INSERT_INTENTION) {
+
+ /* No lock request needs to wait for an insert
+ intention lock to be removed. This makes it similar
+ to GAP lock, that allows conflicting insert intention
+ locks */
+ return(FALSE);
+ }
+
+ if (!lock_prdt_consistent(cur_prdt, prdt, 0)) {
+ return(false);
+ }
+
+ return(TRUE);
+ }
+
+ return(FALSE);
+}
+
+/*********************************************************************//**
+Checks if a transaction has a GRANTED stronger or equal predicate lock
+on the page
+@return lock or NULL */
+UNIV_INLINE
+lock_t*
+lock_prdt_has_lock(
+/*===============*/
+ ulint precise_mode, /*!< in: LOCK_S or LOCK_X */
+ ulint type_mode, /*!< in: LOCK_PREDICATE etc. */
+ const buf_block_t* block, /*!< in: buffer block
+ containing the record */
+ lock_prdt_t* prdt, /*!< in: The predicate to be
+ attached to the new lock */
+ const trx_t* trx) /*!< in: transaction */
+{
+ lock_t* lock;
+
+ ut_ad(lock_mutex_own());
+ ut_ad((precise_mode & LOCK_MODE_MASK) == LOCK_S
+ || (precise_mode & LOCK_MODE_MASK) == LOCK_X);
+ ut_ad(!(precise_mode & LOCK_INSERT_INTENTION));
+
+ for (lock = lock_rec_get_first(
+ lock_hash_get(type_mode), block, PRDT_HEAPNO);
+ lock != NULL;
+ lock = lock_rec_get_next(PRDT_HEAPNO, lock)) {
+ ut_ad(lock->type_mode & (LOCK_PREDICATE | LOCK_PRDT_PAGE));
+
+ if (lock->trx == trx
+ && !(lock->type_mode & LOCK_INSERT_INTENTION)
+ && !lock_get_wait(lock)
+ && lock_mode_stronger_or_eq(
+ lock_get_mode(lock),
+ static_cast<lock_mode>(
+ precise_mode & LOCK_MODE_MASK))) {
+ if (lock->type_mode & LOCK_PRDT_PAGE) {
+ return(lock);
+ }
+
+ ut_ad(lock->type_mode & LOCK_PREDICATE);
+ lock_prdt_t* cur_prdt = lock_get_prdt_from_lock(
+ lock);
+
+ /* if the lock predicate operator is the same
+ as the one to look, and prdicate test is successful,
+ then we find a lock */
+ if (cur_prdt->op == prdt->op
+ && lock_prdt_consistent(cur_prdt, prdt, 0)) {
+
+ return(lock);
+ }
+ }
+ }
+
+ return(NULL);
+}
+
+/*********************************************************************//**
+Checks if some other transaction has a conflicting predicate
+lock request in the queue, so that we have to wait.
+@return lock or NULL */
+static
+lock_t*
+lock_prdt_other_has_conflicting(
+/*============================*/
+ ulint mode, /*!< in: LOCK_S or LOCK_X,
+ possibly ORed to LOCK_PREDICATE or
+ LOCK_PRDT_PAGE, LOCK_INSERT_INTENTION */
+ const buf_block_t* block, /*!< in: buffer block containing
+ the record */
+ lock_prdt_t* prdt, /*!< in: Predicates (currently)
+ the Minimum Bounding Rectangle)
+ the new lock will be on */
+ const trx_t* trx) /*!< in: our transaction */
+{
+ ut_ad(lock_mutex_own());
+
+ for (lock_t* lock = lock_rec_get_first(
+ lock_hash_get(mode), block, PRDT_HEAPNO);
+ lock != NULL;
+ lock = lock_rec_get_next(PRDT_HEAPNO, lock)) {
+
+ if (lock->trx == trx) {
+ continue;
+ }
+
+ if (lock_prdt_has_to_wait(trx, mode, prdt, lock)) {
+ return(lock);
+ }
+ }
+
+ return(NULL);
+}
+
+/*********************************************************************//**
+Reset the Minimum Bounding Rectangle (to a large area) */
+static
+void
+lock_prdt_enlarge_mbr(
+/*==================*/
+ const lock_t* lock, /*!< in/out: lock to modify */
+ rtr_mbr_t* mbr) /*!< in: Minimum Bounding Rectangle */
+{
+ rtr_mbr_t* cur_mbr = lock_prdt_get_mbr_from_lock(lock);
+
+ if (cur_mbr->xmin > mbr->xmin) {
+ cur_mbr->xmin = mbr->xmin;
+ }
+
+ if (cur_mbr->ymin > mbr->ymin) {
+ cur_mbr->ymin = mbr->ymin;
+ }
+
+ if (cur_mbr->xmax < mbr->xmax) {
+ cur_mbr->xmax = mbr->xmax;
+ }
+
+ if (cur_mbr->ymax < mbr->ymax) {
+ cur_mbr->ymax = mbr->ymax;
+ }
+}
+
+/*********************************************************************//**
+Reset the predicates to a "covering" (larger) predicates */
+static
+void
+lock_prdt_enlarge_prdt(
+/*===================*/
+ lock_t* lock, /*!< in/out: lock to modify */
+ lock_prdt_t* prdt) /*!< in: predicate */
+{
+ rtr_mbr_t* mbr = prdt_get_mbr_from_prdt(prdt);
+
+ lock_prdt_enlarge_mbr(lock, mbr);
+}
+
+/*********************************************************************//**
+Check two predicates' MBRs are the same
+@return true if they are the same */
+static
+bool
+lock_prdt_is_same(
+/*==============*/
+ lock_prdt_t* prdt1, /*!< in: MBR with the lock */
+ lock_prdt_t* prdt2) /*!< in: MBR with the lock */
+{
+ rtr_mbr_t* mbr1 = prdt_get_mbr_from_prdt(prdt1);
+ rtr_mbr_t* mbr2 = prdt_get_mbr_from_prdt(prdt2);
+
+ if (prdt1->op == prdt2->op && MBR_EQUAL_CMP(mbr1, mbr2)) {
+ return(true);
+ }
+
+ return(false);
+}
+
+/*********************************************************************//**
+Looks for a similar predicate lock struct by the same trx on the same page.
+This can be used to save space when a new record lock should be set on a page:
+no new struct is needed, if a suitable old one is found.
+@return lock or NULL */
+static
+lock_t*
+lock_prdt_find_on_page(
+/*===================*/
+ ulint type_mode, /*!< in: lock type_mode field */
+ const buf_block_t* block, /*!< in: buffer block */
+ lock_prdt_t* prdt, /*!< in: MBR with the lock */
+ const trx_t* trx) /*!< in: transaction */
+{
+ lock_t* lock;
+
+ ut_ad(lock_mutex_own());
+
+ for (lock = lock_rec_get_first_on_page(lock_hash_get(type_mode), block);
+ lock != NULL;
+ lock = lock_rec_get_next_on_page(lock)) {
+
+ if (lock->trx == trx
+ && lock->type_mode == type_mode) {
+ if (lock->type_mode & LOCK_PRDT_PAGE) {
+ return(lock);
+ }
+
+ ut_ad(lock->type_mode & LOCK_PREDICATE);
+
+ if (lock_prdt_is_same(lock_get_prdt_from_lock(lock),
+ prdt)) {
+ return(lock);
+ }
+ }
+ }
+
+ return(NULL);
+}
+
+/*********************************************************************//**
+Adds a predicate lock request in the predicate lock queue.
+@return lock where the bit was set */
+static
+lock_t*
+lock_prdt_add_to_queue(
+/*===================*/
+ ulint type_mode,/*!< in: lock mode, wait, predicate
+ etc. flags; type is ignored
+ and replaced by LOCK_REC */
+ const buf_block_t* block, /*!< in: buffer block containing
+ the record */
+ dict_index_t* index, /*!< in: index of record */
+ trx_t* trx, /*!< in/out: transaction */
+ lock_prdt_t* prdt, /*!< in: Minimum Bounding Rectangle
+ the new lock will be on */
+ bool caller_owns_trx_mutex)
+ /*!< in: TRUE if caller owns the
+ transaction mutex */
+{
+ ut_ad(lock_mutex_own());
+ ut_ad(caller_owns_trx_mutex == trx_mutex_own(trx));
+ ut_ad(!dict_index_is_clust(index) && !dict_index_is_online_ddl(index));
+ ut_ad(type_mode & (LOCK_PREDICATE | LOCK_PRDT_PAGE));
+
+#ifdef UNIV_DEBUG
+ switch (type_mode & LOCK_MODE_MASK) {
+ case LOCK_X:
+ case LOCK_S:
+ break;
+ default:
+ ut_error;
+ }
+#endif /* UNIV_DEBUG */
+
+ type_mode |= LOCK_REC;
+
+ /* Look for a waiting lock request on the same record or on a gap */
+
+ lock_t* lock;
+
+ for (lock = lock_rec_get_first_on_page(lock_hash_get(type_mode), block);
+ lock != NULL;
+ lock = lock_rec_get_next_on_page(lock)) {
+
+ if (lock_get_wait(lock)
+ && lock_rec_get_nth_bit(lock, PRDT_HEAPNO)
+ && lock->type_mode & (LOCK_PREDICATE | LOCK_PRDT_PAGE)) {
+
+ break;
+ }
+ }
+
+ if (lock == NULL && !(type_mode & LOCK_WAIT)) {
+
+ /* Look for a similar record lock on the same page:
+ if one is found and there are no waiting lock requests,
+ we can just set the bit */
+
+ lock = lock_prdt_find_on_page(type_mode, block, prdt, trx);
+
+ if (lock != NULL) {
+
+ if (lock->type_mode & LOCK_PREDICATE) {
+ lock_prdt_enlarge_prdt(lock, prdt);
+ }
+
+ return(lock);
+ }
+ }
+
+ lock = lock_rec_create(
+#ifdef WITH_WSREP
+ NULL, NULL, /* FIXME: replicate SPATIAL INDEX locks */
+#endif
+ type_mode, block, PRDT_HEAPNO, index, trx,
+ caller_owns_trx_mutex);
+
+ if (lock->type_mode & LOCK_PREDICATE) {
+ lock_prdt_set_prdt(lock, prdt);
+ }
+
+ return lock;
+}
+
+/*********************************************************************//**
+Checks if locks of other transactions prevent an immediate insert of
+a predicate record.
+@return DB_SUCCESS, DB_LOCK_WAIT, or DB_DEADLOCK */
+dberr_t
+lock_prdt_insert_check_and_lock(
+/*============================*/
+ ulint flags, /*!< in: if BTR_NO_LOCKING_FLAG bit is
+ set, does nothing */
+ const rec_t* rec, /*!< in: record after which to insert */
+ buf_block_t* block, /*!< in/out: buffer block of rec */
+ dict_index_t* index, /*!< in: index */
+ que_thr_t* thr, /*!< in: query thread */
+ mtr_t* mtr, /*!< in/out: mini-transaction */
+ lock_prdt_t* prdt) /*!< in: Predicates with Minimum Bound
+ Rectangle */
+{
+ ut_ad(block->frame == page_align(rec));
+
+ if (flags & BTR_NO_LOCKING_FLAG) {
+
+ return(DB_SUCCESS);
+ }
+
+ ut_ad(!dict_table_is_temporary(index->table));
+ ut_ad(!dict_index_is_clust(index));
+
+ trx_t* trx = thr_get_trx(thr);
+
+ lock_mutex_enter();
+
+ /* Because this code is invoked for a running transaction by
+ the thread that is serving the transaction, it is not necessary
+ to hold trx->mutex here. */
+
+ ut_ad(lock_table_has(trx, index->table, LOCK_IX));
+
+ lock_t* lock;
+
+ /* Only need to check locks on prdt_hash */
+ lock = lock_rec_get_first(lock_sys->prdt_hash, block, PRDT_HEAPNO);
+
+ if (lock == NULL) {
+ lock_mutex_exit();
+
+ /* Update the page max trx id field */
+ page_update_max_trx_id(block, buf_block_get_page_zip(block),
+ trx->id, mtr);
+
+ return(DB_SUCCESS);
+ }
+
+ ut_ad(lock->type_mode & LOCK_PREDICATE);
+
+ dberr_t err;
+
+ /* If another transaction has an explicit lock request which locks
+ the predicate, waiting or granted, on the successor, the insert
+ has to wait.
+
+ Similar to GAP lock, we do not consider lock from inserts conflicts
+ with each other */
+
+ const ulint mode = LOCK_X | LOCK_PREDICATE | LOCK_INSERT_INTENTION;
+
+ const lock_t* wait_for = lock_prdt_other_has_conflicting(
+ mode, block, prdt, trx);
+
+ if (wait_for != NULL) {
+ rtr_mbr_t* mbr = prdt_get_mbr_from_prdt(prdt);
+
+ /* Allocate MBR on the lock heap */
+ lock_init_prdt_from_mbr(prdt, mbr, 0, trx->lock.lock_heap);
+
+ /* Note that we may get DB_SUCCESS also here! */
+ trx_mutex_enter(trx);
+
+ err = lock_rec_enqueue_waiting(
+#ifdef WITH_WSREP
+ NULL, /* FIXME: replicate SPATIAL INDEX locks */
+#endif
+ LOCK_X | LOCK_PREDICATE | LOCK_INSERT_INTENTION,
+ block, PRDT_HEAPNO, index, thr, prdt);
+
+ trx_mutex_exit(trx);
+ } else {
+ err = DB_SUCCESS;
+ }
+
+ lock_mutex_exit();
+
+ switch (err) {
+ case DB_SUCCESS_LOCKED_REC:
+ err = DB_SUCCESS;
+ /* fall through */
+ case DB_SUCCESS:
+ /* Update the page max trx id field */
+ page_update_max_trx_id(block,
+ buf_block_get_page_zip(block),
+ trx->id, mtr);
+ default:
+ /* We only care about the two return values. */
+ break;
+ }
+
+ return(err);
+}
+
+/**************************************************************//**
+Check whether any predicate lock in parent needs to propagate to
+child page after split. */
+void
+lock_prdt_update_parent(
+/*====================*/
+ buf_block_t* left_block, /*!< in/out: page to be split */
+ buf_block_t* right_block, /*!< in/out: the new half page */
+ lock_prdt_t* left_prdt, /*!< in: MBR on the old page */
+ lock_prdt_t* right_prdt, /*!< in: MBR on the new page */
+ lock_prdt_t* parent_prdt, /*!< in: original parent MBR */
+ ulint space, /*!< in: parent space id */
+ ulint page_no) /*!< in: parent page number */
+{
+ lock_t* lock;
+
+ lock_mutex_enter();
+
+ /* Get all locks in parent */
+ for (lock = lock_rec_get_first_on_page_addr(
+ lock_sys->prdt_hash, space, page_no);
+ lock;
+ lock = lock_rec_get_next_on_page(lock)) {
+ lock_prdt_t* lock_prdt;
+ ulint op = PAGE_CUR_DISJOINT;
+
+ ut_ad(lock);
+
+ if (!(lock->type_mode & LOCK_PREDICATE)
+ || (lock->type_mode & LOCK_MODE_MASK) == LOCK_X) {
+ continue;
+ }
+
+ lock_prdt = lock_get_prdt_from_lock(lock);
+
+ /* Check each lock in parent to see if it intersects with
+ left or right child */
+ if (!lock_prdt_consistent(lock_prdt, left_prdt, op)
+ && !lock_prdt_find_on_page(lock->type_mode, left_block,
+ lock_prdt, lock->trx)) {
+ lock_prdt_add_to_queue(lock->type_mode,
+ left_block, lock->index,
+ lock->trx, lock_prdt,
+ FALSE);
+ }
+
+ if (!lock_prdt_consistent(lock_prdt, right_prdt, op)
+ && !lock_prdt_find_on_page(lock->type_mode, right_block,
+ lock_prdt, lock->trx)) {
+ lock_prdt_add_to_queue(lock->type_mode, right_block,
+ lock->index, lock->trx,
+ lock_prdt, FALSE);
+ }
+ }
+
+ lock_mutex_exit();
+}
+
+/**************************************************************//**
+Update predicate lock when page splits */
+static
+void
+lock_prdt_update_split_low(
+/*=======================*/
+ buf_block_t* block, /*!< in/out: page to be split */
+ buf_block_t* new_block, /*!< in/out: the new half page */
+ lock_prdt_t* prdt, /*!< in: MBR on the old page */
+ lock_prdt_t* new_prdt, /*!< in: MBR on the new page */
+ ulint space, /*!< in: space id */
+ ulint page_no, /*!< in: page number */
+ ulint type_mode) /*!< in: LOCK_PREDICATE or
+ LOCK_PRDT_PAGE */
+{
+ lock_t* lock;
+
+ lock_mutex_enter();
+
+ for (lock = lock_rec_get_first_on_page_addr(
+ lock_hash_get(type_mode), space, page_no);
+ lock;
+ lock = lock_rec_get_next_on_page(lock)) {
+ ut_ad(lock);
+
+ /* First dealing with Page Lock */
+ if (lock->type_mode & LOCK_PRDT_PAGE) {
+ /* Duplicate the lock to new page */
+ trx_mutex_enter(lock->trx);
+ lock_prdt_add_to_queue(lock->type_mode,
+ new_block,
+ lock->index,
+ lock->trx, NULL, TRUE);
+
+ trx_mutex_exit(lock->trx);
+ continue;
+ }
+
+ /* Now dealing with Predicate Lock */
+ lock_prdt_t* lock_prdt;
+ ulint op = PAGE_CUR_DISJOINT;
+
+ ut_ad(lock->type_mode & LOCK_PREDICATE);
+
+ /* No need to duplicate waiting X locks */
+ if ((lock->type_mode & LOCK_MODE_MASK) == LOCK_X) {
+ continue;
+ }
+
+ lock_prdt = lock_get_prdt_from_lock(lock);
+
+ if (lock_prdt_consistent(lock_prdt, prdt, op)) {
+
+ if (!lock_prdt_consistent(lock_prdt, new_prdt, op)) {
+ /* Move the lock to new page */
+ trx_mutex_enter(lock->trx);
+ lock_prdt_add_to_queue(lock->type_mode,
+ new_block,
+ lock->index,
+ lock->trx, lock_prdt,
+ TRUE);
+ trx_mutex_exit(lock->trx);
+ }
+ } else if (!lock_prdt_consistent(lock_prdt, new_prdt, op)) {
+ /* Duplicate the lock to new page */
+ trx_mutex_enter(lock->trx);
+ lock_prdt_add_to_queue(lock->type_mode,
+ new_block,
+ lock->index,
+ lock->trx, lock_prdt, TRUE);
+
+ trx_mutex_exit(lock->trx);
+ }
+ }
+
+ lock_mutex_exit();
+}
+
+/**************************************************************//**
+Update predicate lock when page splits */
+void
+lock_prdt_update_split(
+/*===================*/
+ buf_block_t* block, /*!< in/out: page to be split */
+ buf_block_t* new_block, /*!< in/out: the new half page */
+ lock_prdt_t* prdt, /*!< in: MBR on the old page */
+ lock_prdt_t* new_prdt, /*!< in: MBR on the new page */
+ ulint space, /*!< in: space id */
+ ulint page_no) /*!< in: page number */
+{
+ lock_prdt_update_split_low(block, new_block, prdt, new_prdt,
+ space, page_no, LOCK_PREDICATE);
+
+ lock_prdt_update_split_low(block, new_block, NULL, NULL,
+ space, page_no, LOCK_PRDT_PAGE);
+}
+
+/*********************************************************************//**
+Initiate a Predicate Lock from a MBR */
+void
+lock_init_prdt_from_mbr(
+/*====================*/
+ lock_prdt_t* prdt, /*!< in/out: predicate to initialized */
+ rtr_mbr_t* mbr, /*!< in: Minimum Bounding Rectangle */
+ ulint mode, /*!< in: Search mode */
+ mem_heap_t* heap) /*!< in: heap for allocating memory */
+{
+ memset(prdt, 0, sizeof(*prdt));
+
+ if (heap != NULL) {
+ prdt->data = mem_heap_alloc(heap, sizeof(*mbr));
+ ut_memcpy(prdt->data, mbr, sizeof(*mbr));
+ } else {
+ prdt->data = static_cast<void*>(mbr);
+ }
+
+ prdt->op = static_cast<uint16>(mode);
+}
+
+/*********************************************************************//**
+Acquire a predicate lock on a block
+@return DB_SUCCESS, DB_LOCK_WAIT, or DB_DEADLOCK */
+dberr_t
+lock_prdt_lock(
+/*===========*/
+ buf_block_t* block, /*!< in/out: buffer block of rec */
+ lock_prdt_t* prdt, /*!< in: Predicate for the lock */
+ dict_index_t* index, /*!< in: secondary index */
+ lock_mode mode, /*!< in: mode of the lock which
+ the read cursor should set on
+ records: LOCK_S or LOCK_X; the
+ latter is possible in
+ SELECT FOR UPDATE */
+ ulint type_mode,
+ /*!< in: LOCK_PREDICATE or LOCK_PRDT_PAGE */
+ que_thr_t* thr, /*!< in: query thread
+ (can be NULL if BTR_NO_LOCKING_FLAG) */
+ mtr_t* mtr) /*!< in/out: mini-transaction */
+{
+ trx_t* trx = thr_get_trx(thr);
+ dberr_t err = DB_SUCCESS;
+ lock_rec_req_status status = LOCK_REC_SUCCESS;
+
+ if (trx->read_only || dict_table_is_temporary(index->table)) {
+ return(DB_SUCCESS);
+ }
+
+ ut_ad(!dict_index_is_clust(index));
+ ut_ad(!dict_index_is_online_ddl(index));
+ ut_ad(type_mode & (LOCK_PREDICATE | LOCK_PRDT_PAGE));
+
+ hash_table_t* hash = type_mode == LOCK_PREDICATE
+ ? lock_sys->prdt_hash
+ : lock_sys->prdt_page_hash;
+
+ /* Another transaction cannot have an implicit lock on the record,
+ because when we come here, we already have modified the clustered
+ index record, and this would not have been possible if another active
+ transaction had modified this secondary index record. */
+
+ lock_mutex_enter();
+
+ const ulint prdt_mode = mode | type_mode;
+ lock_t* lock = lock_rec_get_first_on_page(hash, block);
+
+ if (lock == NULL) {
+ lock = lock_rec_create(
+#ifdef WITH_WSREP
+ NULL, NULL, /* FIXME: replicate SPATIAL INDEX locks */
+#endif
+ mode | type_mode, block, PRDT_HEAPNO,
+ index, trx, FALSE);
+
+ status = LOCK_REC_SUCCESS_CREATED;
+ } else {
+ trx_mutex_enter(trx);
+
+ if (lock_rec_get_next_on_page(lock)
+ || lock->trx != trx
+ || lock->type_mode != (LOCK_REC | prdt_mode)
+ || lock_rec_get_n_bits(lock) == 0
+ || ((type_mode & LOCK_PREDICATE)
+ && (!lock_prdt_consistent(
+ lock_get_prdt_from_lock(lock), prdt, 0)))) {
+
+ lock = lock_prdt_has_lock(
+ mode, type_mode, block, prdt, trx);
+
+ if (lock == NULL) {
+
+ lock_t* wait_for;
+
+ wait_for = lock_prdt_other_has_conflicting(
+ prdt_mode, block, prdt, trx);
+
+ if (wait_for != NULL) {
+
+ err = lock_rec_enqueue_waiting(
+#ifdef WITH_WSREP
+ NULL, /* FIXME: replicate
+ SPATIAL INDEX locks */
+#endif
+ mode | type_mode,
+ block, PRDT_HEAPNO,
+ index, thr, prdt);
+ } else {
+
+ lock_prdt_add_to_queue(
+ prdt_mode, block, index, trx,
+ prdt, true);
+
+ status = LOCK_REC_SUCCESS;
+ }
+ }
+
+ trx_mutex_exit(trx);
+
+ } else {
+ trx_mutex_exit(trx);
+
+ if (!lock_rec_get_nth_bit(lock, PRDT_HEAPNO)) {
+ lock_rec_set_nth_bit(lock, PRDT_HEAPNO);
+ status = LOCK_REC_SUCCESS_CREATED;
+ }
+ }
+ }
+
+ lock_mutex_exit();
+
+ if (status == LOCK_REC_SUCCESS_CREATED && type_mode == LOCK_PREDICATE) {
+ /* Append the predicate in the lock record */
+ lock_prdt_set_prdt(lock, prdt);
+ }
+
+ return(err);
+}
+
+/*********************************************************************//**
+Acquire a "Page" lock on a block
+@return DB_SUCCESS, DB_LOCK_WAIT, or DB_DEADLOCK */
+dberr_t
+lock_place_prdt_page_lock(
+/*======================*/
+ ulint space, /*!< in: space for the page to lock */
+ ulint page_no, /*!< in: page number */
+ dict_index_t* index, /*!< in: secondary index */
+ que_thr_t* thr) /*!< in: query thread */
+{
+ ut_ad(thr != NULL);
+ ut_ad(!srv_read_only_mode);
+
+ ut_ad(!dict_index_is_clust(index));
+ ut_ad(!dict_index_is_online_ddl(index));
+
+ /* Another transaction cannot have an implicit lock on the record,
+ because when we come here, we already have modified the clustered
+ index record, and this would not have been possible if another active
+ transaction had modified this secondary index record. */
+
+ lock_mutex_enter();
+
+ const lock_t* lock = lock_rec_get_first_on_page_addr(
+ lock_sys->prdt_page_hash, space, page_no);
+
+ const ulint mode = LOCK_S | LOCK_PRDT_PAGE;
+ trx_t* trx = thr_get_trx(thr);
+
+ if (lock != NULL) {
+
+ trx_mutex_enter(trx);
+
+ /* Find a matching record lock owned by this transaction. */
+
+ while (lock != NULL && lock->trx != trx) {
+
+ lock = lock_rec_get_next_on_page_const(lock);
+ }
+
+ ut_ad(lock == NULL || lock->type_mode == (mode | LOCK_REC));
+ ut_ad(lock == NULL || lock_rec_get_n_bits(lock) != 0);
+
+ trx_mutex_exit(trx);
+ }
+
+ if (lock == NULL) {
+ lock = lock_rec_create_low(
+#ifdef WITH_WSREP
+ NULL, NULL, /* FIXME: replicate SPATIAL INDEX locks */
+#endif
+ mode, space, page_no, NULL, PRDT_HEAPNO,
+ index, trx, FALSE);
+
+#ifdef PRDT_DIAG
+ printf("GIS_DIAGNOSTIC: page lock %d\n", (int) page_no);
+#endif /* PRDT_DIAG */
+ }
+
+ lock_mutex_exit();
+
+ return(DB_SUCCESS);
+}
+
+/** Check whether there are R-tree Page lock on a page
+@param[in] trx trx to test the lock
+@param[in] space space id for the page
+@param[in] page_no page number
+@return true if there is none */
+bool
+lock_test_prdt_page_lock(
+ const trx_t* trx,
+ ulint space,
+ ulint page_no)
+{
+ lock_t* lock;
+
+ lock_mutex_enter();
+
+ lock = lock_rec_get_first_on_page_addr(
+ lock_sys->prdt_page_hash, space, page_no);
+
+ lock_mutex_exit();
+
+ return(lock == NULL || trx == lock->trx);
+}
+
+/*************************************************************//**
+Moves the locks of a page to another page and resets the lock bits of
+the donating records. */
+void
+lock_prdt_rec_move(
+/*===============*/
+ const buf_block_t* receiver, /*!< in: buffer block containing
+ the receiving record */
+ const buf_block_t* donator) /*!< in: buffer block containing
+ the donating record */
+{
+ lock_t* lock;
+
+ if (!lock_sys->prdt_hash) {
+ return;
+ }
+
+ lock_mutex_enter();
+
+ for (lock = lock_rec_get_first(lock_sys->prdt_hash,
+ donator, PRDT_HEAPNO);
+ lock != NULL;
+ lock = lock_rec_get_next(PRDT_HEAPNO, lock)) {
+
+ const ulint type_mode = lock->type_mode;
+ lock_prdt_t* lock_prdt = lock_get_prdt_from_lock(lock);
+
+ lock_rec_reset_nth_bit(lock, PRDT_HEAPNO);
+ lock_reset_lock_and_trx_wait(lock);
+
+ lock_prdt_add_to_queue(
+ type_mode, receiver, lock->index, lock->trx,
+ lock_prdt, FALSE);
+ }
+
+ lock_mutex_exit();
+}
+
+/** Removes predicate lock objects set on an index page which is discarded.
+@param[in] block page to be discarded
+@param[in] lock_hash lock hash */
+void
+lock_prdt_page_free_from_discard(
+ const buf_block_t* block,
+ hash_table_t* lock_hash)
+{
+ lock_t* lock;
+ lock_t* next_lock;
+ ulint space;
+ ulint page_no;
+
+ ut_ad(lock_mutex_own());
+
+ space = block->page.id.space();
+ page_no = block->page.id.page_no();
+
+ lock = lock_rec_get_first_on_page_addr(lock_hash, space, page_no);
+
+ while (lock != NULL) {
+ next_lock = lock_rec_get_next_on_page(lock);
+
+ lock_rec_discard(lock);
+
+ lock = next_lock;
+ }
+}
diff --git a/storage/innobase/lock/lock0wait.cc b/storage/innobase/lock/lock0wait.cc
index 8f39c555c6a..5d0d41ef494 100644
--- a/storage/innobase/lock/lock0wait.cc
+++ b/storage/innobase/lock/lock0wait.cc
@@ -26,16 +26,17 @@ Created 25/5/2010 Sunny Bains
#define LOCK_MODULE_IMPLEMENTATION
+#include "univ.i"
+#include <mysql/service_thd_wait.h>
+#include <mysql/service_wsrep.h>
+
#include "srv0mon.h"
#include "que0que.h"
#include "lock0lock.h"
#include "row0mysql.h"
#include "srv0start.h"
-#include "ha_prototypes.h"
#include "lock0priv.h"
-#include <mysql/service_wsrep.h>
-
/*********************************************************************//**
Print the contents of the lock_sys_t::waiting_threads array. */
static
@@ -43,14 +44,11 @@ void
lock_wait_table_print(void)
/*=======================*/
{
- ulint i;
- const srv_slot_t* slot;
-
ut_ad(lock_wait_mutex_own());
- slot = lock_sys->waiting_threads;
+ const srv_slot_t* slot = lock_sys->waiting_threads;
- for (i = 0; i < OS_THREAD_MAX_N; i++, ++slot) {
+ for (ulint i = 0; i < OS_THREAD_MAX_N; i++, ++slot) {
fprintf(stderr,
"Slot %lu: thread type %lu,"
@@ -127,7 +125,7 @@ lock_wait_table_release_slot(
/*********************************************************************//**
Reserves a slot in the thread table for the current user OS thread.
-@return reserved slot */
+@return reserved slot */
static
srv_slot_t*
lock_wait_table_reserve_slot(
@@ -151,7 +149,7 @@ lock_wait_table_reserve_slot(
slot->thr->slot = slot;
if (slot->event == NULL) {
- slot->event = os_event_create();
+ slot->event = os_event_create(0);
ut_a(slot->event);
}
@@ -171,16 +169,10 @@ lock_wait_table_reserve_slot(
}
}
- ut_print_timestamp(stderr);
-
- fprintf(stderr,
- " InnoDB: There appear to be %lu user"
- " threads currently waiting\n"
- "InnoDB: inside InnoDB, which is the"
- " upper limit. Cannot continue operation.\n"
- "InnoDB: As a last thing, we print"
- " a list of waiting threads.\n", (ulong) OS_THREAD_MAX_N);
-
+ ib::error() << "There appear to be " << OS_THREAD_MAX_N << " user"
+ " threads currently waiting inside InnoDB, which is the upper"
+ " limit. Cannot continue operation. Before aborting, we print"
+ " a list of waiting threads.";
lock_wait_table_print();
ut_error;
@@ -189,21 +181,35 @@ lock_wait_table_reserve_slot(
#ifdef WITH_WSREP
/*********************************************************************//**
-check if lock timeout was for priority thread,
+check if lock timeout was for priority thread,
as a side effect trigger lock monitor
@param[in] trx transaction owning the lock
@param[in] locked true if trx and lock_sys_mutex is ownd
-@return false for regular lock timeout */
+@return false for regular lock timeout */
static
bool
wsrep_is_BF_lock_timeout(
const trx_t* trx,
bool locked = true)
{
- if (trx->is_wsrep() && wsrep_thd_is_BF(trx->mysql_thd, FALSE)) {
- fprintf(stderr, "WSREP: BF lock wait long for trx " TRX_ID_FMT "\n", trx->id);
- srv_print_innodb_monitor = TRUE;
- srv_print_innodb_lock_monitor = TRUE;
+ if (trx->is_wsrep() && wsrep_thd_is_BF(trx->mysql_thd, FALSE)
+ && trx->error_state != DB_DEADLOCK) {
+ ib::info() << "WSREP: BF lock wait long for trx:" << ib::hex(trx->id)
+ << " query: " << wsrep_thd_query(trx->mysql_thd);
+ if (!locked) {
+ lock_mutex_enter();
+ }
+
+ ut_ad(lock_mutex_own());
+
+ wsrep_trx_print_locking(stderr, trx, 3000);
+
+ if (!locked) {
+ lock_mutex_exit();
+ }
+
+ srv_print_innodb_monitor = TRUE;
+ srv_print_innodb_lock_monitor = TRUE;
os_event_set(srv_monitor_event);
return true;
}
@@ -217,7 +223,6 @@ occurs during the wait trx->error_state associated with thr is
!= DB_SUCCESS when we return. DB_LOCK_WAIT_TIMEOUT and DB_DEADLOCK
are possible errors. DB_DEADLOCK is returned if selective deadlock
resolution chose this transaction as a victim. */
-UNIV_INTERN
void
lock_wait_suspend_thread(
/*=====================*/
@@ -226,7 +231,6 @@ lock_wait_suspend_thread(
{
srv_slot_t* slot;
trx_t* trx;
- ulint had_dict_lock;
ibool was_declared_inside_innodb;
ulong lock_wait_timeout;
@@ -258,7 +262,7 @@ lock_wait_suspend_thread(
if (trx->lock.was_chosen_as_deadlock_victim) {
trx->error_state = DB_DEADLOCK;
- trx->lock.was_chosen_as_deadlock_victim = FALSE;
+ trx->lock.was_chosen_as_deadlock_victim = false;
}
lock_wait_mutex_exit();
@@ -296,7 +300,7 @@ lock_wait_suspend_thread(
lock_mutex_exit();
}
- had_dict_lock = trx->dict_operation_lock_mode;
+ ulint had_dict_lock = trx->dict_operation_lock_mode;
switch (had_dict_lock) {
case 0:
@@ -384,21 +388,27 @@ lock_wait_suspend_thread(
/* Record the lock wait time for this thread */
thd_set_lock_wait_time(trx->mysql_thd, diff_time);
+
+ DBUG_EXECUTE_IF("lock_instrument_slow_query_log",
+ os_thread_sleep(1000););
+ }
+
+ /* The transaction is chosen as deadlock victim during sleep. */
+ if (trx->error_state == DB_DEADLOCK) {
+ return;
}
if (lock_wait_timeout < 100000000
- && wait_time > (double) lock_wait_timeout) {
+ && wait_time > (double) lock_wait_timeout
#ifdef WITH_WSREP
- if (!trx->is_wsrep() ||
- (!wsrep_is_BF_lock_timeout(trx) &&
- trx->error_state != DB_DEADLOCK)) {
+ && (!trx->is_wsrep()
+ || (!wsrep_is_BF_lock_timeout(trx, false)
+ && trx->error_state != DB_DEADLOCK))
#endif /* WITH_WSREP */
+ ) {
- trx->error_state = DB_LOCK_WAIT_TIMEOUT;
+ trx->error_state = DB_LOCK_WAIT_TIMEOUT;
-#ifdef WITH_WSREP
- }
-#endif /* WITH_WSREP */
MONITOR_INC(MONITOR_TIMEOUT);
}
@@ -411,7 +421,6 @@ lock_wait_suspend_thread(
/********************************************************************//**
Releases a user OS thread waiting for a lock to be released, if the
thread is already suspended. */
-UNIV_INTERN
void
lock_wait_release_thread_if_suspended(
/*==================================*/
@@ -432,7 +441,7 @@ lock_wait_release_thread_if_suspended(
if (trx->lock.was_chosen_as_deadlock_victim) {
trx->error_state = DB_DEADLOCK;
- trx->lock.was_chosen_as_deadlock_victim = FALSE;
+ trx->lock.was_chosen_as_deadlock_victim = false;
}
os_event_set(thr->slot->event);
@@ -472,13 +481,14 @@ lock_wait_check_and_cancel(
trx_mutex_enter(trx);
- if (trx->lock.wait_lock) {
+ if (trx->lock.wait_lock != NULL) {
ut_a(trx->lock.que_state == TRX_QUE_LOCK_WAIT);
+
#ifdef WITH_WSREP
if (!wsrep_is_BF_lock_timeout(trx)) {
#endif /* WITH_WSREP */
- lock_cancel_waiting_and_release(trx->lock.wait_lock);
+ lock_cancel_waiting_and_release(trx->lock.wait_lock);
#ifdef WITH_WSREP
}
#endif /* WITH_WSREP */
@@ -492,12 +502,12 @@ lock_wait_check_and_cancel(
/*********************************************************************//**
A thread which wakes up threads whose lock wait may have lasted too long.
-@return a dummy parameter */
-extern "C" UNIV_INTERN
+@return a dummy parameter */
+extern "C"
os_thread_ret_t
DECLARE_THREAD(lock_wait_timeout_thread)(void*)
{
- ib_int64_t sig_count = 0;
+ int64_t sig_count = 0;
os_event_t event = lock_sys->timeout_event;
ut_ad(!srv_read_only_mode);
@@ -549,7 +559,8 @@ DECLARE_THREAD(lock_wait_timeout_thread)(void*)
/* We count the number of threads in os_thread_exit(). A created
thread should always use that to exit and not use return() to exit. */
- os_thread_exit(NULL);
+ os_thread_exit();
OS_THREAD_DUMMY_RETURN;
}
+
diff --git a/storage/innobase/log/log0crypt.cc b/storage/innobase/log/log0crypt.cc
index a1d63476161..f1297921839 100644
--- a/storage/innobase/log/log0crypt.cc
+++ b/storage/innobase/log/log0crypt.cc
@@ -22,717 +22,386 @@ Innodb log encrypt/decrypt
Created 11/25/2013 Minli Zhu Google
Modified Jan Lindström jan.lindstrom@mariadb.com
+MDEV-11782: Rewritten for MariaDB 10.2 by Marko Mäkelä, MariaDB Corporation.
*******************************************************/
#include "m_string.h"
#include "log0crypt.h"
#include <mysql/service_my_crypt.h>
-#include "log0log.h"
+#include "log0crypt.h"
#include "srv0start.h" // for srv_start_lsn
#include "log0recv.h" // for recv_sys
-#include "ha_prototypes.h" // IB_LOG_
-
-
-/* Used for debugging */
-// #define DEBUG_CRYPT 1
-#define UNENCRYPTED_KEY_VER 0
-
-/* If true, enable redo log encryption. */
-extern my_bool srv_encrypt_log;
-
-
-#include <algorithm> // std::sort
-#include <deque>
-
-/* If true, enable redo log encryption. */
-UNIV_INTERN my_bool srv_encrypt_log = FALSE;
-/*
- Sub system type for InnoDB redo log crypto.
- Set and used to validate crypto msg.
-*/
-static const byte redo_log_purpose_byte = 0x02;
+/** innodb_encrypt_log: whether to encrypt the redo log */
+my_bool srv_encrypt_log;
+/** Redo log encryption key ID */
#define LOG_DEFAULT_ENCRYPTION_KEY 1
-/*
- Store this many keys into each checkpoint info
-*/
-static const size_t kMaxSavedKeys = LOG_CRYPT_MAX_ENTRIES;
+typedef union {
+ uint32_t words[MY_AES_BLOCK_SIZE / sizeof(uint32_t)];
+ byte bytes[MY_AES_BLOCK_SIZE];
+} aes_block_t;
struct crypt_info_t {
- ib_uint64_t checkpoint_no; /*!< checkpoint no */
+ ulint checkpoint_no; /*!< checkpoint no; 32 bits */
uint key_version; /*!< mysqld key version */
- byte crypt_msg[MY_AES_BLOCK_SIZE];
- byte crypt_key[MY_AES_BLOCK_SIZE];
- byte crypt_nonce[MY_AES_BLOCK_SIZE];
+ /** random string for encrypting the key */
+ aes_block_t crypt_msg;
+ /** the secret key */
+ aes_block_t crypt_key;
+ /** a random string for the per-block initialization vector */
+ union {
+ uint32_t word;
+ byte bytes[4];
+ } crypt_nonce;
};
-static std::deque<crypt_info_t> crypt_info;
-
-/*********************************************************************//**
-Get crypt info from checkpoint.
-@return a crypt info or NULL if not present. */
-static
-const crypt_info_t*
-get_crypt_info(
-/*===========*/
- ib_uint64_t checkpoint_no)
-{
- /* so that no one is modifying array while we search */
- ut_ad(mutex_own(&(log_sys->mutex)));
- size_t items = crypt_info.size();
-
- /* a log block only stores 4-bytes of checkpoint no */
- checkpoint_no &= 0xFFFFFFFF;
- for (size_t i = 0; i < items; i++) {
- struct crypt_info_t* it = &crypt_info[i];
+/** The crypt info */
+static crypt_info_t info;
- if (it->checkpoint_no == checkpoint_no) {
- return it;
- }
- }
-
- /* If checkpoint contains more than one key and we did not
- find the correct one use the first one. */
- if (items) {
- return (&crypt_info[0]);
- }
+/** Initialization vector used for temporary files/tablespace */
+static byte tmp_iv[MY_AES_BLOCK_SIZE];
- return NULL;
-}
+/** Crypt info when upgrading from 10.1 */
+static crypt_info_t infos[5 * 2];
+/** First unused slot in infos[] */
+static size_t infos_used;
/*********************************************************************//**
-Get crypt info from log block
-@return a crypt info or NULL if not present. */
-static
-const crypt_info_t*
-get_crypt_info(
-/*===========*/
- const byte* log_block)
+Get a log block's start lsn.
+@return a log block's start lsn */
+static inline
+lsn_t
+log_block_get_start_lsn(
+/*====================*/
+ lsn_t lsn, /*!< in: checkpoint lsn */
+ ulint log_block_no) /*!< in: log block number */
{
- ib_uint64_t checkpoint_no = log_block_get_checkpoint_no(log_block);
- return get_crypt_info(checkpoint_no);
+ lsn_t start_lsn =
+ (lsn & (lsn_t)0xffffffff00000000ULL) |
+ (((log_block_no - 1) & (lsn_t)0x3fffffff) << 9);
+ return start_lsn;
}
-/*********************************************************************//**
-Print checkpoint no from log block and all encryption keys from
-checkpoints if they are present. Used for problem analysis. */
+/** Encrypt or decrypt log blocks.
+@param[in,out] buf log blocks to encrypt or decrypt
+@param[in] lsn log sequence number of the start of the buffer
+@param[in] size size of the buffer, in bytes
+@param[in] decrypt whether to decrypt instead of encrypting */
+UNIV_INTERN
void
-log_crypt_print_checkpoint_keys(
-/*============================*/
- const byte* log_block)
+log_crypt(byte* buf, lsn_t lsn, ulint size, bool decrypt)
{
- ib_uint64_t checkpoint_no = log_block_get_checkpoint_no(log_block);
-
- if (crypt_info.size()) {
- fprintf(stderr,
- "InnoDB: redo log checkpoint: " UINT64PF " [ chk key ]: ",
- checkpoint_no);
- for (size_t i = 0; i < crypt_info.size(); i++) {
- struct crypt_info_t* it = &crypt_info[i];
- fprintf(stderr, "[ " UINT64PF " %u ] ",
- it->checkpoint_no,
- it->key_version);
- }
- fprintf(stderr, "\n");
- }
-}
-
-/*********************************************************************//**
-Call AES CTR to encrypt/decrypt log blocks. */
-static
-Crypt_result
-log_blocks_crypt(
-/*=============*/
- const byte* block, /*!< in: blocks before encrypt/decrypt*/
- lsn_t lsn, /*!< in: log sequence number of the start
- of the buffer */
- ulint size, /*!< in: size of block */
- byte* dst_block, /*!< out: blocks after encrypt/decrypt */
- int what, /*!< in: encrypt or decrypt*/
- const crypt_info_t* crypt_info) /*!< in: crypt info or NULL */
-{
- byte *log_block = (byte*)block;
- Crypt_result rc = MY_AES_OK;
- uint dst_len;
- byte aes_ctr_counter[MY_AES_BLOCK_SIZE];
-
- const uint src_len = OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_HDR_SIZE;
- for (ulint i = 0; i < size ; i += OS_FILE_LOG_BLOCK_SIZE,
- lsn += OS_FILE_LOG_BLOCK_SIZE) {
- ulint log_block_no = log_block_get_hdr_no(log_block);
-
- const crypt_info_t* info = crypt_info == NULL ? get_crypt_info(log_block) :
- crypt_info;
-#ifdef DEBUG_CRYPT
- fprintf(stderr,
- "%s %lu chkpt: %lu key: %u lsn: %lu\n",
- what == ENCRYPTION_FLAG_ENCRYPT ? "crypt" : "decrypt",
- log_block_no,
- log_block_get_checkpoint_no(log_block),
- info ? info->key_version : 0,
- log_block_start_lsn);
-#endif
- /* If no key is found from checkpoint assume the log_block
- to be unencrypted. If checkpoint contains the encryption key
- compare log_block current checksum, if checksum matches,
- block can't be encrypted. */
- if (info == NULL ||
- info->key_version == UNENCRYPTED_KEY_VER ||
- (log_block_checksum_is_ok_or_old_format(log_block, false) &&
- what == ENCRYPTION_FLAG_DECRYPT)) {
- memcpy(dst_block, log_block, OS_FILE_LOG_BLOCK_SIZE);
- goto next;
- }
-
- ut_ad(what == ENCRYPTION_FLAG_DECRYPT ? !log_block_checksum_is_ok_or_old_format(log_block, false) :
- log_block_checksum_is_ok_or_old_format(log_block, false));
-
- // Assume log block header is not encrypted
- memcpy(dst_block, log_block, LOG_BLOCK_HDR_SIZE);
-
- // aes_ctr_counter = nonce(3-byte) + start lsn to a log block
- // (8-byte) + lbn (4-byte) + abn
- // (1-byte, only 5 bits are used). "+" means concatenate.
- bzero(aes_ctr_counter, MY_AES_BLOCK_SIZE);
- memcpy(aes_ctr_counter, info->crypt_nonce, 3);
- mach_write_to_8(aes_ctr_counter + 3, lsn);
- mach_write_to_4(aes_ctr_counter + 11, log_block_no);
- bzero(aes_ctr_counter + 15, 1);
-
- int rc;
- rc = encryption_crypt(log_block + LOG_BLOCK_HDR_SIZE, src_len,
- dst_block + LOG_BLOCK_HDR_SIZE, &dst_len,
- (unsigned char*)(info->crypt_key), 16,
- aes_ctr_counter, MY_AES_BLOCK_SIZE,
- what | ENCRYPTION_FLAG_NOPAD,
- LOG_DEFAULT_ENCRYPTION_KEY,
- info->key_version);
-
- ut_a(rc == MY_AES_OK);
- ut_a(dst_len == src_len);
-next:
- log_block += OS_FILE_LOG_BLOCK_SIZE;
- dst_block += OS_FILE_LOG_BLOCK_SIZE;
- }
-
- return rc;
-}
-
-/** Encrypt/decrypt temporary log blocks.
+ ut_ad(size % OS_FILE_LOG_BLOCK_SIZE == 0);
+ ut_a(info.key_version);
-@param[in] src_block block to encrypt or decrypt
-@param[in] size size of the block
-@param[out] dst_block destination block
-@param[in] what ENCRYPTION_FLAG_ENCRYPT or
- ENCRYPTION_FLAG_DECRYPT
-@param[in] offs offset to block
-@param[in] space_id tablespace id
-@return true if successful, false in case of failure
-*/
-static
-bool
-log_tmp_blocks_crypt(
- const byte* src_block,
- ulint size,
- byte* dst_block,
- int what,
- os_offset_t offs,
- ulint space_id)
-{
- Crypt_result rc = MY_AES_OK;
uint dst_len;
- byte aes_ctr_counter[MY_AES_BLOCK_SIZE];
- byte is_encrypt= what == ENCRYPTION_FLAG_ENCRYPT;
- const crypt_info_t* info = static_cast<const crypt_info_t*>(&crypt_info[0]);
+ uint32_t aes_ctr_iv[MY_AES_BLOCK_SIZE / sizeof(uint32_t)];
+ compile_time_assert(sizeof(uint32_t) == 4);
- // AES_CTR_COUNTER = space_id + offs
+#define LOG_CRYPT_HDR_SIZE 4
+ lsn &= ~lsn_t(OS_FILE_LOG_BLOCK_SIZE - 1);
- bzero(aes_ctr_counter, MY_AES_BLOCK_SIZE);
- mach_write_to_8(aes_ctr_counter, space_id);
- mach_write_to_8(aes_ctr_counter + 8, offs);
+ for (const byte* const end = buf + size; buf != end;
+ buf += OS_FILE_LOG_BLOCK_SIZE, lsn += OS_FILE_LOG_BLOCK_SIZE) {
+ uint32_t dst[(OS_FILE_LOG_BLOCK_SIZE - LOG_CRYPT_HDR_SIZE)
+ / sizeof(uint32_t)];
- rc = encryption_crypt(src_block, size,
- dst_block, &dst_len,
- (unsigned char*)(info->crypt_key), 16,
- aes_ctr_counter, MY_AES_BLOCK_SIZE,
- what | ENCRYPTION_FLAG_NOPAD,
- LOG_DEFAULT_ENCRYPTION_KEY,
- info->key_version);
+ /* The log block number is not encrypted. */
+ *aes_ctr_iv =
+#ifdef WORDS_BIGENDIAN
+ ~LOG_BLOCK_FLUSH_BIT_MASK
+#else
+ ~(LOG_BLOCK_FLUSH_BIT_MASK >> 24)
+#endif
+ & (*dst = *reinterpret_cast<const uint32_t*>(
+ buf + LOG_BLOCK_HDR_NO));
+#if LOG_BLOCK_HDR_NO + 4 != LOG_CRYPT_HDR_SIZE
+# error "LOG_BLOCK_HDR_NO has been moved; redo log format affected!"
+#endif
+ aes_ctr_iv[1] = info.crypt_nonce.word;
+ mach_write_to_8(reinterpret_cast<byte*>(aes_ctr_iv + 2), lsn);
+ ut_ad(log_block_get_start_lsn(lsn,
+ log_block_get_hdr_no(buf))
+ == lsn);
+
+ int rc = encryption_crypt(
+ buf + LOG_CRYPT_HDR_SIZE, sizeof dst,
+ reinterpret_cast<byte*>(dst), &dst_len,
+ const_cast<byte*>(info.crypt_key.bytes),
+ sizeof info.crypt_key,
+ reinterpret_cast<byte*>(aes_ctr_iv), sizeof aes_ctr_iv,
+ decrypt
+ ? ENCRYPTION_FLAG_DECRYPT | ENCRYPTION_FLAG_NOPAD
+ : ENCRYPTION_FLAG_ENCRYPT | ENCRYPTION_FLAG_NOPAD,
+ LOG_DEFAULT_ENCRYPTION_KEY,
+ info.key_version);
- if (rc != MY_AES_OK) {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "%s failed for temporary log file with rc = %d",
- is_encrypt ? "Encryption" : "Decryption",
- rc);
- return false;
+ ut_a(rc == MY_AES_OK);
+ ut_a(dst_len == sizeof dst);
+ memcpy(buf + LOG_CRYPT_HDR_SIZE, dst, sizeof dst);
}
-
- return true;
}
-/** Get crypt info
-@return pointer to log crypt info or NULL
-*/
-inline
-const crypt_info_t*
-get_crypt_info()
+/** Generate crypt key from crypt msg.
+@param[in,out] info encryption key
+@param[in] upgrade whether to use the key in MariaDB 10.1 format
+@return whether the operation was successful */
+static bool init_crypt_key(crypt_info_t* info, bool upgrade = false)
{
- mutex_enter(&log_sys->mutex);
- const crypt_info_t* info = get_crypt_info(log_sys->next_checkpoint_no);
- mutex_exit(&log_sys->mutex);
-
- return info;
-}
-
-/** Find out is temporary log files encrypted.
-@return true if temporary log file should be encrypted, false if not */
-UNIV_INTERN
-bool
-log_tmp_is_encrypted()
-{
- const crypt_info_t* info = get_crypt_info();
-
- if (info == NULL || info->key_version == UNENCRYPTED_KEY_VER) {
+ byte mysqld_key[MY_AES_MAX_KEY_LENGTH];
+ uint keylen = sizeof mysqld_key;
+
+ compile_time_assert(16 == sizeof info->crypt_key);
+
+ if (uint rc = encryption_key_get(LOG_DEFAULT_ENCRYPTION_KEY,
+ info->key_version, mysqld_key,
+ &keylen)) {
+ ib::error()
+ << "Obtaining redo log encryption key version "
+ << info->key_version << " failed (" << rc
+ << "). Maybe the key or the required encryption "
+ "key management plugin was not found.";
return false;
}
- return true;
-}
-
-/** Encrypt temporary log block.
-@param[in] src_block block to encrypt or decrypt
-@param[in] size size of the block
-@param[out] dst_block destination block
-@param[in] offs offset to block
-@param[in] space_id tablespace id
-@return true if successfull, false in case of failure
-*/
-UNIV_INTERN
-bool
-log_tmp_block_encrypt(
- const byte* src_block,
- ulint size,
- byte* dst_block,
- os_offset_t offs,
- ulint space_id)
-{
- return (log_tmp_blocks_crypt(src_block, size, dst_block,
- ENCRYPTION_FLAG_ENCRYPT, offs, space_id));
-}
-
-/** Decrypt temporary log block.
-@param[in] src_block block to encrypt or decrypt
-@param[in] size size of the block
-@param[out] dst_block destination block
-@param[in] offs offset to block
-@param[in] space_id tablespace id
-@return true if successfull, false in case of failure
-*/
-UNIV_INTERN
-bool
-log_tmp_block_decrypt(
- const byte* src_block,
- ulint size,
- byte* dst_block,
- os_offset_t offs,
- ulint space_id)
-{
- return (log_tmp_blocks_crypt(src_block, size, dst_block,
- ENCRYPTION_FLAG_DECRYPT, offs, space_id));
-}
-
-/*********************************************************************//**
-Generate crypt key from crypt msg.
-@return true if successfull, false if not. */
-static
-bool
-init_crypt_key(
-/*===========*/
- crypt_info_t* info) /*< in/out: crypt info */
-{
- if (info->key_version == UNENCRYPTED_KEY_VER) {
- memset(info->crypt_key, 0, sizeof(info->crypt_key));
- memset(info->crypt_msg, 0, sizeof(info->crypt_msg));
- memset(info->crypt_nonce, 0, sizeof(info->crypt_nonce));
- return true;
- }
-
- byte mysqld_key[MY_AES_MAX_KEY_LENGTH] = {0};
- uint keylen= sizeof(mysqld_key);
- uint rc;
-
- rc = encryption_key_get(LOG_DEFAULT_ENCRYPTION_KEY, info->key_version, mysqld_key, &keylen);
-
- if (rc) {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Redo log crypto: getting mysqld crypto key "
- "from key version failed err = %u. Reason could be that requested"
- " key_version %u is not found or required encryption "
- " key management is not found.", rc, info->key_version);
- return false;
+ if (upgrade) {
+ while (keylen < sizeof mysqld_key) {
+ mysqld_key[keylen++] = 0;
+ }
}
uint dst_len;
- int err= my_aes_crypt(MY_AES_ECB, ENCRYPTION_FLAG_NOPAD|ENCRYPTION_FLAG_ENCRYPT,
- info->crypt_msg, sizeof(info->crypt_msg), //src, srclen
- info->crypt_key, &dst_len, //dst, &dstlen
- (unsigned char*)&mysqld_key, sizeof(mysqld_key),
- NULL, 0);
+ int err= my_aes_crypt(MY_AES_ECB,
+ ENCRYPTION_FLAG_NOPAD | ENCRYPTION_FLAG_ENCRYPT,
+ info->crypt_msg.bytes, sizeof info->crypt_msg,
+ info->crypt_key.bytes, &dst_len,
+ mysqld_key, keylen, NULL, 0);
if (err != MY_AES_OK || dst_len != MY_AES_BLOCK_SIZE) {
- fprintf(stderr,
- "\nInnodb redo log crypto: getting redo log crypto key "
- "failed err = %d len = %u.\n", err, dst_len);
+ ib::error() << "Getting redo log crypto key failed: err = "
+ << err << ", len = " << dst_len;
return false;
}
return true;
}
-/*********************************************************************//**
-Compare function for checkpoint numbers
-@return true if first checkpoint is larger than second one */
-static
+/** Initialize the redo log encryption key and random parameters
+when creating a new redo log.
+The random parameters will be persisted in the log checkpoint pages.
+@see log_crypt_write_checkpoint_buf()
+@see log_crypt_read_checkpoint_buf()
+@return whether the operation succeeded */
+UNIV_INTERN
bool
-mysort(const crypt_info_t& i,
- const crypt_info_t& j)
+log_crypt_init()
{
- return i.checkpoint_no > j.checkpoint_no;
-}
+ info.key_version = encryption_key_get_latest_version(
+ LOG_DEFAULT_ENCRYPTION_KEY);
-/*********************************************************************//**
-Add crypt info to set if it is not already present
-@return true if successfull, false if not- */
-static
-bool
-add_crypt_info(
-/*===========*/
- crypt_info_t* info, /*!< in: crypt info */
- bool checkpoint_read)/*!< in: do we read checkpoint */
-{
- const crypt_info_t* found=NULL;
- /* so that no one is searching array while we modify it */
- ut_ad(mutex_own(&(log_sys->mutex)));
-
- found = get_crypt_info(info->checkpoint_no);
-
- /* If one crypt info is found then we add a new one only if we
- are reading checkpoint from the log. New checkpoints will always
- use the first created crypt info. */
- if (found != NULL &&
- ( found->checkpoint_no == info->checkpoint_no || !checkpoint_read)) {
- // already present...
- return true;
+ if (info.key_version == ENCRYPTION_KEY_VERSION_INVALID) {
+ ib::error() << "innodb_encrypt_log: cannot get key version";
+ info.key_version = 0;
+ return false;
}
- if (!init_crypt_key(info)) {
+ if (my_random_bytes(tmp_iv, MY_AES_BLOCK_SIZE) != MY_AES_OK
+ || my_random_bytes(info.crypt_msg.bytes, sizeof info.crypt_msg)
+ != MY_AES_OK
+ || my_random_bytes(info.crypt_nonce.bytes, sizeof info.crypt_nonce)
+ != MY_AES_OK) {
+ ib::error() << "innodb_encrypt_log: my_random_bytes() failed";
return false;
}
- crypt_info.push_back(*info);
-
- /* a log block only stores 4-bytes of checkpoint no */
- crypt_info.back().checkpoint_no &= 0xFFFFFFFF;
-
- // keep keys sorted, assuming that last added key will be used most
- std::sort(crypt_info.begin(), crypt_info.end(), mysort);
-
- return true;
+ return init_crypt_key(&info);
}
-/*********************************************************************//**
-Set next checkpoint's key version to latest one, and generate current
-key. Key version 0 means no encryption. */
+/** Read the MariaDB 10.1 checkpoint crypto (version, msg and iv) info.
+@param[in] buf checkpoint buffer
+@return whether the operation was successful */
UNIV_INTERN
-void
-log_crypt_set_ver_and_key(
-/*======================*/
- ib_uint64_t next_checkpoint_no)
+bool
+log_crypt_101_read_checkpoint(const byte* buf)
{
- crypt_info_t info;
- info.checkpoint_no = next_checkpoint_no;
+ buf += 20 + 32 * 9;
- if (!srv_encrypt_log) {
- info.key_version = UNENCRYPTED_KEY_VER;
- } else {
- info.key_version = encryption_key_get_latest_version(LOG_DEFAULT_ENCRYPTION_KEY);
- }
+ const size_t n = *buf++ == 2 ? std::min(unsigned(*buf++), 5U) : 0;
- if (info.key_version == UNENCRYPTED_KEY_VER) {
- memset(info.crypt_msg, 0, sizeof(info.crypt_msg));
- memset(info.crypt_nonce, 0, sizeof(info.crypt_nonce));
- } else {
- if (my_random_bytes(info.crypt_msg, MY_AES_BLOCK_SIZE) != MY_AES_OK) {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Redo log crypto: generate "
- "%u-byte random number as crypto msg failed.",
- MY_AES_BLOCK_SIZE);
- ut_error;
+ for (size_t i = 0; i < n; i++) {
+ struct crypt_info_t& info = infos[infos_used];
+ unsigned checkpoint_no = mach_read_from_4(buf);
+ for (size_t j = 0; j < infos_used; j++) {
+ if (infos[j].checkpoint_no == checkpoint_no) {
+ /* Do not overwrite an existing slot. */
+ goto next_slot;
+ }
}
-
- if (my_random_bytes(info.crypt_nonce, MY_AES_BLOCK_SIZE) != MY_AES_OK) {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Redo log crypto: generate "
- "%u-byte random number as AES_CTR nonce failed.",
- MY_AES_BLOCK_SIZE);
- ut_error;
+ if (infos_used >= UT_ARR_SIZE(infos)) {
+ ut_ad(!"too many checkpoint pages");
+ goto next_slot;
}
+ infos_used++;
+ info.checkpoint_no = checkpoint_no;
+ info.key_version = mach_read_from_4(buf + 4);
+ memcpy(info.crypt_msg.bytes, buf + 8, sizeof info.crypt_msg);
+ memcpy(info.crypt_nonce.bytes, buf + 24,
+ sizeof info.crypt_nonce);
+ if (!init_crypt_key(&info, true)) {
+ return false;
+ }
+next_slot:
+ buf += 4 + 4 + 2 * MY_AES_BLOCK_SIZE;
}
- add_crypt_info(&info, false);
+ return true;
}
-/********************************************************
-Encrypt one or more log block before it is flushed to disk */
+/** Decrypt a MariaDB 10.1 redo log block.
+@param[in,out] buf log block
+@return whether the decryption was successful */
UNIV_INTERN
-void
-log_encrypt_before_write(
-/*=====================*/
- ib_uint64_t next_checkpoint_no, /*!< in: log group to be flushed */
- byte* block, /*!< in/out: pointer to a log block */
- lsn_t lsn, /*!< in: log sequence number of
- the start of the buffer */
- const ulint size) /*!< in: size of log blocks */
+bool
+log_crypt_101_read_block(byte* buf)
{
- ut_ad(size % OS_FILE_LOG_BLOCK_SIZE == 0);
-
- const crypt_info_t* info = get_crypt_info(next_checkpoint_no);
- if (info == NULL) {
- return;
- }
-
- /* If the key is not encrypted or user has requested not to
- encrypt, do not change log block. */
- if (info->key_version == UNENCRYPTED_KEY_VER || !srv_encrypt_log) {
- return;
- }
-
- byte* dst_frame = (byte*)malloc(size);
-
- //encrypt log blocks content
- Crypt_result result = log_blocks_crypt(
- block, lsn, size, dst_frame, ENCRYPTION_FLAG_ENCRYPT, NULL);
-
- if (result == MY_AES_OK) {
- ut_ad(block[0] == dst_frame[0]);
- memcpy(block, dst_frame, size);
+ ut_ad(log_block_calc_checksum_format_0(buf)
+ != log_block_get_checksum(buf));
+ const uint32_t checkpoint_no
+ = uint32_t(log_block_get_checkpoint_no(buf));
+ const crypt_info_t* info = infos;
+ for (const crypt_info_t* const end = info + infos_used; info < end;
+ info++) {
+ if (info->key_version
+ && info->checkpoint_no == checkpoint_no) {
+ goto found;
+ }
}
- free(dst_frame);
- if (unlikely(result != MY_AES_OK)) {
- ut_error;
+ if (infos_used == 0) {
+ return false;
}
-}
-
-/********************************************************
-Decrypt a specified log segment after they are read from a log file to a buffer.
-*/
-void
-log_decrypt_after_read(
-/*===================*/
- byte* frame, /*!< in/out: log segment */
- lsn_t lsn, /*!< in: log sequence number of the start
- of the buffer */
- const ulint size) /*!< in: log segment size */
-{
- ut_ad(size % OS_FILE_LOG_BLOCK_SIZE == 0);
- byte* dst_frame = (byte*)malloc(size);
+ /* MariaDB Server 10.1 would use the first key if it fails to
+ find a key for the current checkpoint. */
+ info = infos;
+found:
+ byte dst[OS_FILE_LOG_BLOCK_SIZE];
+ uint dst_len;
+ byte aes_ctr_iv[MY_AES_BLOCK_SIZE];
- // decrypt log blocks content
- Crypt_result result = log_blocks_crypt(
- frame, lsn, size, dst_frame, ENCRYPTION_FLAG_DECRYPT, NULL);
+ const uint src_len = OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_HDR_SIZE;
- if (result == MY_AES_OK) {
- memcpy(frame, dst_frame, size);
+ ulint log_block_no = log_block_get_hdr_no(buf);
+
+ /* The log block header is not encrypted. */
+ memcpy(dst, buf, LOG_BLOCK_HDR_SIZE);
+
+ memcpy(aes_ctr_iv, info->crypt_nonce.bytes, 3);
+ mach_write_to_8(aes_ctr_iv + 3,
+ log_block_get_start_lsn(srv_start_lsn, log_block_no));
+ memcpy(aes_ctr_iv + 11, buf, 4);
+ aes_ctr_iv[11] &= ~(LOG_BLOCK_FLUSH_BIT_MASK >> 24);
+ aes_ctr_iv[15] = 0;
+
+ int rc = encryption_crypt(buf + LOG_BLOCK_HDR_SIZE, src_len,
+ dst + LOG_BLOCK_HDR_SIZE, &dst_len,
+ const_cast<byte*>(info->crypt_key.bytes),
+ MY_AES_BLOCK_SIZE,
+ aes_ctr_iv, MY_AES_BLOCK_SIZE,
+ ENCRYPTION_FLAG_DECRYPT
+ | ENCRYPTION_FLAG_NOPAD,
+ LOG_DEFAULT_ENCRYPTION_KEY,
+ info->key_version);
+
+ if (rc != MY_AES_OK || dst_len != src_len) {
+ return false;
}
- free(dst_frame);
- if (unlikely(result != MY_AES_OK)) {
- ut_error;
- }
+ memcpy(buf, dst, sizeof dst);
+ return true;
}
-/*********************************************************************//**
-Writes the crypto (version, msg and iv) info, which has been used for
-log blocks with lsn <= this checkpoint's lsn, to a log header's
-checkpoint buf. */
+/** Add the encryption information to a redo log checkpoint buffer.
+@param[in,out] buf checkpoint buffer */
UNIV_INTERN
void
-log_crypt_write_checkpoint_buf(
-/*===========================*/
- byte* buf) /*!< in/out: checkpoint buffer */
+log_crypt_write_checkpoint_buf(byte* buf)
{
- byte *save = buf;
-
- // Only write kMaxSavedKeys (sort keys to remove oldest)
- std::sort(crypt_info.begin(), crypt_info.end(), mysort);
- while (crypt_info.size() > kMaxSavedKeys) {
- crypt_info.pop_back();
- }
-
- bool encrypted = false;
- for (size_t i = 0; i < crypt_info.size(); i++) {
- const crypt_info_t & it = crypt_info[i];
- if (it.key_version != UNENCRYPTED_KEY_VER) {
- encrypted = true;
- break;
- }
- }
-
- if (encrypted == false) {
- // if no encryption is inuse then zero out
- // crypt data for upward/downward compability
- memset(buf + LOG_CRYPT_VER, 0, LOG_CRYPT_SIZE);
- return;
- }
-
- ib_uint64_t checkpoint_no = mach_read_from_8(buf + LOG_CHECKPOINT_NO);
- buf += LOG_CRYPT_VER;
-
- mach_write_to_1(buf + 0, redo_log_purpose_byte);
- mach_write_to_1(buf + 1, crypt_info.size());
- buf += 2;
- for (size_t i = 0; i < crypt_info.size(); i++) {
- struct crypt_info_t* it = &crypt_info[i];
- mach_write_to_4(buf + 0, it->checkpoint_no);
- mach_write_to_4(buf + 4, it->key_version);
- memcpy(buf + 8, it->crypt_msg, MY_AES_BLOCK_SIZE);
- memcpy(buf + 24, it->crypt_nonce, MY_AES_BLOCK_SIZE);
- buf += LOG_CRYPT_ENTRY_SIZE;
- }
-
-#ifdef DEBUG_CRYPT
- fprintf(stderr, "write chk: %lu [ chk key ]: ", checkpoint_no);
- for (size_t i = 0; i < crypt_info.size(); i++) {
- struct crypt_info_t* it = &crypt_info[i];
- fprintf(stderr, "[ %lu %u ] ",
- it->checkpoint_no,
- it->key_version);
- }
- fprintf(stderr, "\n");
-#else
- (void)checkpoint_no; // unused variable
-#endif
- ut_a((ulint)(buf - save) <= OS_FILE_LOG_BLOCK_SIZE);
+ ut_ad(info.key_version);
+ compile_time_assert(16 == sizeof info.crypt_msg);
+ compile_time_assert(LOG_CHECKPOINT_CRYPT_MESSAGE
+ - LOG_CHECKPOINT_CRYPT_NONCE
+ == sizeof info.crypt_nonce);
+
+ memcpy(buf + LOG_CHECKPOINT_CRYPT_MESSAGE, info.crypt_msg.bytes,
+ sizeof info.crypt_msg);
+ memcpy(buf + LOG_CHECKPOINT_CRYPT_NONCE, info.crypt_nonce.bytes,
+ sizeof info.crypt_nonce);
+ mach_write_to_4(buf + LOG_CHECKPOINT_CRYPT_KEY, info.key_version);
}
-/*********************************************************************//**
-Read the crypto (version, msg and iv) info, which has been used for
-log blocks with lsn <= this checkpoint's lsn, from a log header's
-checkpoint buf. */
+/** Read the checkpoint crypto (version, msg and iv) info.
+@param[in] buf checkpoint buffer
+@return whether the operation was successful */
UNIV_INTERN
bool
-log_crypt_read_checkpoint_buf(
-/*===========================*/
- const byte* buf) { /*!< in: checkpoint buffer */
-
- buf += LOG_CRYPT_VER;
-
- byte scheme = buf[0];
- if (scheme != redo_log_purpose_byte) {
- return true;
- }
- buf++;
- size_t n = buf[0];
- buf++;
+log_crypt_read_checkpoint_buf(const byte* buf)
+{
+ info.checkpoint_no = mach_read_from_4(buf + (LOG_CHECKPOINT_NO + 4));
+ info.key_version = mach_read_from_4(buf + LOG_CHECKPOINT_CRYPT_KEY);
- for (size_t i = 0; i < n; i++) {
- struct crypt_info_t info;
- info.checkpoint_no = mach_read_from_4(buf + 0);
- info.key_version = mach_read_from_4(buf + 4);
- memcpy(info.crypt_msg, buf + 8, MY_AES_BLOCK_SIZE);
- memcpy(info.crypt_nonce, buf + 24, MY_AES_BLOCK_SIZE);
+#if MY_AES_BLOCK_SIZE != 16
+# error "MY_AES_BLOCK_SIZE != 16; redo log checkpoint format affected"
+#endif
+ compile_time_assert(16 == sizeof info.crypt_msg);
+ compile_time_assert(LOG_CHECKPOINT_CRYPT_MESSAGE
+ - LOG_CHECKPOINT_CRYPT_NONCE
+ == sizeof info.crypt_nonce);
- if (!add_crypt_info(&info, true)) {
- return false;
- }
- buf += LOG_CRYPT_ENTRY_SIZE;
- }
+ memcpy(info.crypt_msg.bytes, buf + LOG_CHECKPOINT_CRYPT_MESSAGE,
+ sizeof info.crypt_msg);
+ memcpy(info.crypt_nonce.bytes, buf + LOG_CHECKPOINT_CRYPT_NONCE,
+ sizeof info.crypt_nonce);
-#ifdef DEBUG_CRYPT
- fprintf(stderr, "read [ chk key ]: ");
- for (size_t i = 0; i < crypt_info.size(); i++) {
- struct crypt_info_t* it = &crypt_info[i];
- fprintf(stderr, "[ %lu %u ] ",
- it->checkpoint_no,
- it->key_version);
- }
- fprintf(stderr, "\n");
-#endif
- return true;
+ return init_crypt_key(&info);
}
-/********************************************************
-Check is the checkpoint information encrypted. This check
-is based on fact has log group crypt info and based
-on this crypt info was the key version different from
-unencrypted key version. There is no realible way to
-distinguish encrypted log block from corrupted log block,
-but if log block corruption is found this function is
-used to find out if log block is maybe encrypted but
-encryption key, key management plugin or encryption
-algorithm does not match.
-@return TRUE, if log block may be encrypted */
+/** Encrypt or decrypt a temporary file block.
+@param[in] src block to encrypt or decrypt
+@param[in] size size of the block
+@param[out] dst destination block
+@param[in] offs offset to block
+@param[in] encrypt true=encrypt; false=decrypt
+@return whether the operation succeeded */
UNIV_INTERN
-ibool
-log_crypt_block_maybe_encrypted(
-/*============================*/
- const byte* log_block, /*!< in: log block */
- log_crypt_err_t* err_info) /*!< out: error info */
+bool
+log_tmp_block_encrypt(
+ const byte* src,
+ ulint size,
+ byte* dst,
+ uint64_t offs,
+ bool encrypt)
{
- ibool maybe_encrypted = FALSE;
- const crypt_info_t* crypt_info;
-
- *err_info = LOG_UNENCRYPTED;
- crypt_info = get_crypt_info(log_block);
-
- if (crypt_info &&
- crypt_info->key_version != UNENCRYPTED_KEY_VER) {
- byte mysqld_key[MY_AES_BLOCK_SIZE] = {0};
- uint keylen= sizeof(mysqld_key);
-
- /* Log block contains crypt info and based on key
- version block could be encrypted. */
- *err_info = LOG_DECRYPT_MAYBE_FAILED;
- maybe_encrypted = TRUE;
+ uint dst_len;
+ uint64_t iv[MY_AES_BLOCK_SIZE / sizeof(uint64_t)];
+ iv[0] = offs;
+ memcpy(iv + 1, tmp_iv, sizeof iv - sizeof *iv);
+
+ int rc = encryption_crypt(
+ src, size, dst, &dst_len,
+ const_cast<byte*>(info.crypt_key.bytes), sizeof info.crypt_key,
+ reinterpret_cast<byte*>(iv), sizeof iv,
+ encrypt
+ ? ENCRYPTION_FLAG_ENCRYPT|ENCRYPTION_FLAG_NOPAD
+ : ENCRYPTION_FLAG_DECRYPT|ENCRYPTION_FLAG_NOPAD,
+ LOG_DEFAULT_ENCRYPTION_KEY, info.key_version);
- if (encryption_key_get(LOG_DEFAULT_ENCRYPTION_KEY,
- crypt_info->key_version, mysqld_key, &keylen)) {
- *err_info = LOG_CRYPT_KEY_NOT_FOUND;
- }
+ if (rc != MY_AES_OK) {
+ ib::error() << (encrypt ? "Encryption" : "Decryption")
+ << " failed for temporary file: " << rc;
}
- return (maybe_encrypted);
-}
-
-/********************************************************
-Print crypt error message to error log */
-UNIV_INTERN
-void
-log_crypt_print_error(
-/*==================*/
- log_crypt_err_t err_info) /*!< out: error info */
-{
- switch(err_info) {
- case LOG_CRYPT_KEY_NOT_FOUND:
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Redo log crypto: getting mysqld crypto key "
- "from key version failed. Reason could be that "
- "requested key version is not found or required "
- "encryption key management plugin is not found.");
- break;
- case LOG_DECRYPT_MAYBE_FAILED:
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Redo log crypto: failed to decrypt log block. "
- "Reason could be that requested key version is "
- "not found, required encryption key management "
- "plugin is not found or configured encryption "
- "algorithm and/or method does not match.");
- break;
- default:
- ut_error; /* Real bug */
- }
+ return rc == MY_AES_OK;
}
diff --git a/storage/innobase/log/log0log.cc b/storage/innobase/log/log0log.cc
index 86d483ae6f7..c3eaa05b680 100644
--- a/storage/innobase/log/log0log.cc
+++ b/storage/innobase/log/log0log.cc
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1995, 2017, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2009, Google Inc.
Copyright (c) 2014, 2019, MariaDB Corporation.
@@ -31,17 +31,12 @@ Database log
Created 12/9/1995 Heikki Tuuri
*******************************************************/
-#include "log0log.h"
-
-#ifdef UNIV_NONINL
-#include "log0log.ic"
-#endif
-
-#ifndef UNIV_HOTBACKUP
-#if MYSQL_VERSION_ID < 100200
-# include <my_service_manager.h>
-#endif
+#include "univ.i"
+#include <debug_sync.h>
+#include <my_service_manager.h>
+#include "log0log.h"
+#include "log0crypt.h"
#include "mem0mem.h"
#include "buf0buf.h"
#include "buf0flu.h"
@@ -57,9 +52,7 @@ Created 12/9/1995 Heikki Tuuri
#include "trx0trx.h"
#include "trx0roll.h"
#include "srv0mon.h"
-
-/* Used for debugging */
-// #define DEBUG_CRYPT 1
+#include "sync0sync.h"
/*
General philosophy of InnoDB redo-logs:
@@ -87,39 +80,26 @@ reduce the size of the log.
*/
-/* Global log system variable */
-UNIV_INTERN log_t* log_sys = NULL;
+/** Redo log system */
+log_t* log_sys = NULL;
+
+/** Whether to generate and require checksums on the redo log pages */
+my_bool innodb_log_checksums;
+
+/** Pointer to the log checksum calculation function */
+log_checksum_func_t log_checksum_algorithm_ptr;
/* Next log block number to do dummy record filling if no log records written
for a while */
static ulint next_lbn_to_pad = 0;
-#ifdef UNIV_PFS_RWLOCK
-UNIV_INTERN mysql_pfs_key_t checkpoint_lock_key;
-# ifdef UNIV_LOG_ARCHIVE
-UNIV_INTERN mysql_pfs_key_t archive_lock_key;
-# endif
-#endif /* UNIV_PFS_RWLOCK */
-
-#ifdef UNIV_PFS_MUTEX
-UNIV_INTERN mysql_pfs_key_t log_sys_mutex_key;
-UNIV_INTERN mysql_pfs_key_t log_flush_order_mutex_key;
-#endif /* UNIV_PFS_MUTEX */
-
-#ifdef UNIV_DEBUG
-UNIV_INTERN ibool log_do_write = TRUE;
-#endif /* UNIV_DEBUG */
-
/* These control how often we print warnings if the last checkpoint is too
old */
-UNIV_INTERN ibool log_has_printed_chkp_warning = FALSE;
-UNIV_INTERN time_t log_last_warning_time;
+static bool log_has_printed_chkp_warning = false;
+static time_t log_last_warning_time;
-#ifdef UNIV_LOG_ARCHIVE
-/* Pointer to this variable is used as the i/o-message when we do i/o to an
-archive */
-UNIV_INTERN byte log_archive_io;
-#endif /* UNIV_LOG_ARCHIVE */
+static bool log_has_printed_chkp_margine_warning = false;
+static time_t log_last_margine_warning_time;
/* A margin for free space in the log buffer before a log entry is catenated */
#define LOG_BUF_WRITE_MARGIN (4 * OS_FILE_LOG_BLOCK_SIZE)
@@ -128,12 +108,6 @@ UNIV_INTERN byte log_archive_io;
#define LOG_BUF_FLUSH_RATIO 2
#define LOG_BUF_FLUSH_MARGIN (LOG_BUF_WRITE_MARGIN + 4 * UNIV_PAGE_SIZE)
-/* Margin for the free space in the smallest log group, before a new query
-step which modifies the database, is started */
-
-#define LOG_CHECKPOINT_FREE_PER_THREAD (4 * UNIV_PAGE_SIZE)
-#define LOG_CHECKPOINT_EXTRA_FREE (8 * UNIV_PAGE_SIZE)
-
/* This parameter controls asynchronous making of a new checkpoint; the value
should be bigger than LOG_POOL_PREFLUSH_RATIO_SYNC */
@@ -146,24 +120,14 @@ should be bigger than LOG_POOL_PREFLUSH_RATIO_SYNC */
the previous */
#define LOG_POOL_PREFLUSH_RATIO_ASYNC 8
-/* Extra margin, in addition to one log file, used in archiving */
-#define LOG_ARCHIVE_EXTRA_MARGIN (4 * UNIV_PAGE_SIZE)
-
-/* This parameter controls asynchronous writing to the archive */
-#define LOG_ARCHIVE_RATIO_ASYNC 16
-
/* Codes used in unlocking flush latches */
#define LOG_UNLOCK_NONE_FLUSHED_LOCK 1
#define LOG_UNLOCK_FLUSH_LOCK 2
-/* States of an archiving operation */
-#define LOG_ARCHIVE_READ 1
-#define LOG_ARCHIVE_WRITE 2
-
-/** Event to wake up the log scrub thread */
-static os_event_t log_scrub_event;
-
-static bool log_scrub_thread_active;
+/** Event to wake up log_scrub_thread */
+os_event_t log_scrub_event;
+/** Whether log_scrub_thread is active */
+bool log_scrub_thread_active;
extern "C" UNIV_INTERN
os_thread_ret_t
@@ -175,19 +139,11 @@ static
void
log_io_complete_checkpoint(void);
/*============================*/
-#ifdef UNIV_LOG_ARCHIVE
-/******************************************************//**
-Completes an archiving i/o. */
-static
-void
-log_io_complete_archive(void);
-/*=========================*/
-#endif /* UNIV_LOG_ARCHIVE */
/****************************************************************//**
Returns the oldest modified block lsn in the pool, or log_sys->lsn if none
exists.
-@return LSN of oldest modification */
+@return LSN of oldest modification */
static
lsn_t
log_buf_pool_get_oldest_modification(void)
@@ -195,7 +151,7 @@ log_buf_pool_get_oldest_modification(void)
{
lsn_t lsn;
- ut_ad(mutex_own(&(log_sys->mutex)));
+ ut_ad(log_mutex_own());
lsn = buf_pool_get_oldest_modification();
@@ -208,204 +164,206 @@ log_buf_pool_get_oldest_modification(void)
}
/** Extends the log buffer.
-@param[in] len requested minimum size in bytes */
-static
+@param[in] len requested minimum size in bytes */
void
log_buffer_extend(
ulint len)
{
- ulint move_start;
- ulint move_end;
- byte tmp_buf[OS_FILE_LOG_BLOCK_SIZE];
-
- mutex_enter(&(log_sys->mutex));
+ const ulint new_log_buffer_size = (len >> srv_page_size_shift) + 1;
+ const ulint new_buf_size = (new_log_buffer_size
+ << (srv_page_size_shift + 1))
+ + OS_FILE_LOG_BLOCK_SIZE;
+ byte* new_buf_ptr = static_cast<byte*>(ut_malloc_nokey(new_buf_size));
- while (log_sys->is_extending) {
- /* Another thread is trying to extend already.
- Needs to wait for. */
- mutex_exit(&(log_sys->mutex));
+ log_mutex_enter();
- log_buffer_flush_to_disk();
+ const ulint size = srv_log_buffer_size << srv_page_size_shift;
- mutex_enter(&(log_sys->mutex));
-
- if (srv_log_buffer_size > len / UNIV_PAGE_SIZE) {
- /* Already extended enough by the others */
- mutex_exit(&(log_sys->mutex));
- return;
- }
+ if (len <= size) {
+ /* Already extended enough by the others */
+ log_mutex_exit();
+ ut_free(new_buf_ptr);
+ return;
}
- log_sys->is_extending = true;
+ ib::warn() << "The transaction log size is too large"
+ " for innodb_log_buffer_size (" << len
+ << " >= " << size << " / 2). Trying to extend it.";
- while (log_sys->n_pending_writes != 0
- || ut_calc_align_down(log_sys->buf_free,
- OS_FILE_LOG_BLOCK_SIZE)
- != ut_calc_align_down(log_sys->buf_next_to_write,
- OS_FILE_LOG_BLOCK_SIZE)) {
- /* Buffer might have >1 blocks to write still. */
- mutex_exit(&(log_sys->mutex));
+ byte* old_buf_ptr = log_sys->buf_ptr;
+ const byte* begin = log_sys->buf;
+ const byte* end = begin + log_sys->buf_free;
- log_buffer_flush_to_disk();
+ log_sys->buf_ptr = new_buf_ptr;
+ srv_log_buffer_size = new_log_buffer_size;
+ log_sys->buf_size = size;
+ log_sys->buf
+ = static_cast<byte*>(ut_align(new_buf_ptr, OS_FILE_LOG_BLOCK_SIZE));
- mutex_enter(&(log_sys->mutex));
+ if (!log_sys->first_in_use) {
+ log_sys->buf += size;
}
- move_start = ut_calc_align_down(
- log_sys->buf_free,
- OS_FILE_LOG_BLOCK_SIZE);
- move_end = log_sys->buf_free;
+ memcpy(log_sys->buf, begin, end - begin);
- /* store the last log block in buffer */
- ut_memcpy(tmp_buf, log_sys->buf + move_start,
- move_end - move_start);
-
- log_sys->buf_free -= move_start;
- log_sys->buf_next_to_write -= move_start;
-
- /* reallocate log buffer */
- srv_log_buffer_size = len / UNIV_PAGE_SIZE + 1;
- mem_free(log_sys->buf_ptr);
- log_sys->buf_ptr = static_cast<byte*>(
- mem_zalloc(LOG_BUFFER_SIZE + OS_FILE_LOG_BLOCK_SIZE));
- log_sys->buf = static_cast<byte*>(
- ut_align(log_sys->buf_ptr, OS_FILE_LOG_BLOCK_SIZE));
- log_sys->buf_size = LOG_BUFFER_SIZE;
- log_sys->max_buf_free = log_sys->buf_size / LOG_BUF_FLUSH_RATIO
+ log_sys->max_buf_free = size / LOG_BUF_FLUSH_RATIO
- LOG_BUF_FLUSH_MARGIN;
- /* restore the last log block */
- ut_memcpy(log_sys->buf, tmp_buf, move_end - move_start);
-
- ut_ad(log_sys->is_extending);
- log_sys->is_extending = false;
+ log_mutex_exit();
- mutex_exit(&(log_sys->mutex));
+ ut_free(old_buf_ptr);
- ib_logf(IB_LOG_LEVEL_INFO,
- "innodb_log_buffer_size was extended to %lu.",
- LOG_BUFFER_SIZE);
+ ib::info() << "innodb_log_buffer_size was extended to "
+ << size << ".";
}
-/************************************************************//**
-Opens the log for log_write_low. The log must be closed with log_close and
-released with log_release.
-@return start lsn of the log record */
-UNIV_INTERN
-lsn_t
-log_reserve_and_open(
-/*=================*/
- ulint len) /*!< in: length of data to be catenated */
+/** Calculate actual length in redo buffer and file including
+block header and trailer.
+@param[in] len length to write
+@return actual length to write including header and trailer. */
+static inline
+ulint
+log_calculate_actual_len(
+ ulint len)
{
- log_t* log = log_sys;
- ulint len_upper_limit;
-#ifdef UNIV_LOG_ARCHIVE
- lsn_t archived_lsn_age;
- ulint dummy;
-#endif /* UNIV_LOG_ARCHIVE */
-#ifdef UNIV_DEBUG
- ulint count = 0;
-#endif /* UNIV_DEBUG */
+ ut_ad(log_mutex_own());
- if (len >= log->buf_size / 2) {
- DBUG_EXECUTE_IF("ib_log_buffer_is_short_crash",
- DBUG_SUICIDE(););
+ /* actual length stored per block */
+ const ulint len_per_blk = OS_FILE_LOG_BLOCK_SIZE
+ - (LOG_BLOCK_HDR_SIZE + LOG_BLOCK_TRL_SIZE);
- /* log_buffer is too small. try to extend instead of crash. */
- ib_logf(IB_LOG_LEVEL_WARN,
- "The transaction log size is too large"
- " for innodb_log_buffer_size (%lu >= %lu / 2). "
- "Trying to extend it.",
- len, LOG_BUFFER_SIZE);
+ /* actual data length in last block already written */
+ ulint extra_len = (log_sys->buf_free % OS_FILE_LOG_BLOCK_SIZE);
- log_buffer_extend((len + 1) * 2);
- }
-loop:
- mutex_enter(&(log->mutex));
- ut_ad(!recv_no_log_write);
+ ut_ad(extra_len >= LOG_BLOCK_HDR_SIZE);
+ extra_len -= LOG_BLOCK_HDR_SIZE;
- if (log->is_extending) {
+ /* total extra length for block header and trailer */
+ extra_len = ((len + extra_len) / len_per_blk)
+ * (LOG_BLOCK_HDR_SIZE + LOG_BLOCK_TRL_SIZE);
- mutex_exit(&(log->mutex));
+ return(len + extra_len);
+}
- /* Log buffer size is extending. Writing up to the next block
- should wait for the extending finished. */
+/** Check margin not to overwrite transaction log from the last checkpoint.
+If would estimate the log write to exceed the log_group_capacity,
+waits for the checkpoint is done enough.
+@param[in] len length of the data to be written */
- os_thread_sleep(100000);
+void
+log_margin_checkpoint_age(
+ ulint len)
+{
+ ulint margin = log_calculate_actual_len(len);
- ut_ad(++count < 50);
+ ut_ad(log_mutex_own());
- goto loop;
- }
+ if (margin > log_sys->log_group_capacity) {
+ /* return with warning output to avoid deadlock */
+ if (!log_has_printed_chkp_margine_warning
+ || difftime(time(NULL),
+ log_last_margine_warning_time) > 15) {
+ log_has_printed_chkp_margine_warning = true;
+ log_last_margine_warning_time = time(NULL);
- /* Calculate an upper limit for the space the string may take in the
- log buffer */
+ ib::error() << "The transaction log files are too"
+ " small for the single transaction log (size="
+ << len << "). So, the last checkpoint age"
+ " might exceed the log group capacity "
+ << log_sys->log_group_capacity << ".";
+ }
- len_upper_limit = LOG_BUF_WRITE_MARGIN + (5 * len) / 4;
+ return;
+ }
- if (log->buf_free + len_upper_limit > log->buf_size) {
+ /* Our margin check should ensure that we never reach this condition.
+ Try to do checkpoint once. We cannot keep waiting here as it might
+ result in hang in case the current mtr has latch on oldest lsn */
+ if (log_sys->lsn - log_sys->last_checkpoint_lsn + margin
+ > log_sys->log_group_capacity) {
+ /* The log write of 'len' might overwrite the transaction log
+ after the last checkpoint. Makes checkpoint. */
- mutex_exit(&(log->mutex));
+ bool flushed_enough = false;
- /* Not enough free space, do a syncronous flush of the log
- buffer */
+ if (log_sys->lsn - log_buf_pool_get_oldest_modification()
+ + margin
+ <= log_sys->log_group_capacity) {
+ flushed_enough = true;
+ }
- log_buffer_flush_to_disk();
+ log_sys->check_flush_or_checkpoint = true;
+ log_mutex_exit();
- srv_stats.log_waits.inc();
+ DEBUG_SYNC_C("margin_checkpoint_age_rescue");
- ut_ad(++count < 50);
+ if (!flushed_enough) {
+ os_thread_sleep(100000);
+ }
+ log_checkpoint(true);
- goto loop;
+ log_mutex_enter();
}
-#ifdef UNIV_LOG_ARCHIVE
- if (log->archiving_state != LOG_ARCH_OFF) {
+ return;
+}
- archived_lsn_age = log->lsn - log->archived_lsn;
- if (archived_lsn_age + len_upper_limit
- > log->max_archived_lsn_age) {
- /* Not enough free archived space in log groups: do a
- synchronous archive write batch: */
+/** Open the log for log_write_low. The log must be closed with log_close.
+@param[in] len length of the data to be written
+@return start lsn of the log record */
+lsn_t
+log_reserve_and_open(
+ ulint len)
+{
+ ulint len_upper_limit;
+#ifdef UNIV_DEBUG
+ ulint count = 0;
+#endif /* UNIV_DEBUG */
- mutex_exit(&(log->mutex));
+loop:
+ ut_ad(log_mutex_own());
- ut_ad(len_upper_limit <= log->max_archived_lsn_age);
+ /* Calculate an upper limit for the space the string may take in the
+ log buffer */
- log_archive_do(TRUE, &dummy);
+ len_upper_limit = LOG_BUF_WRITE_MARGIN + srv_log_write_ahead_size
+ + (5 * len) / 4;
- ut_ad(++count < 50);
+ if (log_sys->buf_free + len_upper_limit > log_sys->buf_size) {
+ log_mutex_exit();
- goto loop;
- }
+ DEBUG_SYNC_C("log_buf_size_exceeded");
+
+ /* Not enough free space, do a write of the log buffer */
+ log_buffer_sync_in_background(false);
+
+ srv_stats.log_waits.inc();
+
+ ut_ad(++count < 50);
+
+ log_mutex_enter();
+ goto loop;
}
-#endif /* UNIV_LOG_ARCHIVE */
-#ifdef UNIV_LOG_DEBUG
- log->old_buf_free = log->buf_free;
- log->old_lsn = log->lsn;
-#endif
- return(log->lsn);
+ return(log_sys->lsn);
}
/************************************************************//**
Writes to the log the string given. It is assumed that the caller holds the
log mutex. */
-UNIV_INTERN
void
log_write_low(
/*==========*/
- byte* str, /*!< in: string */
- ulint str_len) /*!< in: string length */
+ const byte* str, /*!< in: string */
+ ulint str_len) /*!< in: string length */
{
log_t* log = log_sys;
ulint len;
ulint data_len;
byte* log_block;
- ut_ad(mutex_own(&(log->mutex)));
+ ut_ad(log_mutex_own());
part_loop:
- ut_ad(!recv_no_log_write);
/* Calculate a part length */
data_len = (log->buf_free % OS_FILE_LOG_BLOCK_SIZE) + str_len;
@@ -462,8 +420,7 @@ part_loop:
/************************************************************//**
Closes the log.
-@return lsn */
-UNIV_INTERN
+@return lsn */
lsn_t
log_close(void)
/*===========*/
@@ -475,8 +432,7 @@ log_close(void)
log_t* log = log_sys;
lsn_t checkpoint_age;
- ut_ad(mutex_own(&(log->mutex)));
- ut_ad(!recv_no_log_write);
+ ut_ad(log_mutex_own());
lsn = log->lsn;
@@ -497,37 +453,26 @@ log_close(void)
if (log->buf_free > log->max_buf_free) {
- log->check_flush_or_checkpoint = TRUE;
+ log->check_flush_or_checkpoint = true;
}
checkpoint_age = lsn - log->last_checkpoint_lsn;
if (checkpoint_age >= log->log_group_capacity) {
- /* TODO: split btr_store_big_rec_extern_fields() into small
- steps so that we can release all latches in the middle, and
- call log_free_check() to ensure we never write over log written
- after the latest checkpoint. In principle, we should split all
- big_rec operations, but other operations are smaller. */
+ DBUG_EXECUTE_IF(
+ "print_all_chkp_warnings",
+ log_has_printed_chkp_warning = false;);
if (!log_has_printed_chkp_warning
|| difftime(time(NULL), log_last_warning_time) > 15) {
- log_has_printed_chkp_warning = TRUE;
+ log_has_printed_chkp_warning = true;
log_last_warning_time = time(NULL);
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: ERROR: the age of the last"
- " checkpoint is " LSN_PF ",\n"
- "InnoDB: which exceeds the log group"
- " capacity " LSN_PF ".\n"
- "InnoDB: If you are using big"
- " BLOB or TEXT rows, you must set the\n"
- "InnoDB: combined size of log files"
- " at least 10 times bigger than the\n"
- "InnoDB: largest such row.\n",
- checkpoint_age,
- log->log_group_capacity);
+ ib::error() << "The age of the last checkpoint is "
+ << checkpoint_age << ", which exceeds the log"
+ " group capacity " << log->log_group_capacity
+ << ".";
}
}
@@ -542,69 +487,17 @@ log_close(void)
|| lsn - oldest_lsn > log->max_modified_age_sync
|| checkpoint_age > log->max_checkpoint_age_async) {
- log->check_flush_or_checkpoint = TRUE;
+ log->check_flush_or_checkpoint = true;
}
function_exit:
-#ifdef UNIV_LOG_DEBUG
- log_check_log_recs(log->buf + log->old_buf_free,
- log->buf_free - log->old_buf_free, log->old_lsn);
-#endif
-
return(lsn);
}
/******************************************************//**
-Pads the current log block full with dummy log records. Used in producing
-consistent archived log files and scrubbing redo log. */
-static
-void
-log_pad_current_log_block(void)
-/*===========================*/
-{
- byte b = MLOG_DUMMY_RECORD;
- ulint pad_length;
- ulint i;
- ib_uint64_t lsn;
-
- /* We retrieve lsn only because otherwise gcc crashed on HP-UX */
- lsn = log_reserve_and_open(OS_FILE_LOG_BLOCK_SIZE);
-
- pad_length = OS_FILE_LOG_BLOCK_SIZE
- - (log_sys->buf_free % OS_FILE_LOG_BLOCK_SIZE)
- - LOG_BLOCK_TRL_SIZE;
-
- for (i = 0; i < pad_length; i++) {
- log_write_low(&b, 1);
- }
-
- lsn = log_sys->lsn;
-
- log_close();
- log_release();
-
- ut_a(lsn % OS_FILE_LOG_BLOCK_SIZE == LOG_BLOCK_HDR_SIZE);
-}
-
-/******************************************************//**
-Calculates the data capacity of a log group, when the log file headers are not
-included.
-@return capacity in bytes */
-UNIV_INTERN
-lsn_t
-log_group_get_capacity(
-/*===================*/
- const log_group_t* group) /*!< in: log group */
-{
- ut_ad(mutex_own(&(log_sys->mutex)));
-
- return((group->file_size - LOG_FILE_HDR_SIZE) * group->n_files);
-}
-
-/******************************************************//**
Calculates the offset within a log group, when the log file headers are not
included.
-@return size offset (<= offset) */
+@return size offset (<= offset) */
UNIV_INLINE
lsn_t
log_group_calc_size_offset(
@@ -613,7 +506,9 @@ log_group_calc_size_offset(
log group */
const log_group_t* group) /*!< in: log group */
{
- ut_ad(mutex_own(&(log_sys->mutex)));
+ /* The lsn parameters are updated while holding both the mutexes
+ and it is ok to have either of them while reading */
+ ut_ad(log_mutex_own() || log_write_mutex_own());
return(offset - LOG_FILE_HDR_SIZE * (1 + offset / group->file_size));
}
@@ -621,7 +516,7 @@ log_group_calc_size_offset(
/******************************************************//**
Calculates the offset within a log group, when the log file headers are
included.
-@return real offset (>= offset) */
+@return real offset (>= offset) */
UNIV_INLINE
lsn_t
log_group_calc_real_offset(
@@ -630,21 +525,22 @@ log_group_calc_real_offset(
log group */
const log_group_t* group) /*!< in: log group */
{
- ut_ad(mutex_own(&(log_sys->mutex)));
+ /* The lsn parameters are updated while holding both the mutexes
+ and it is ok to have either of them while reading */
+ ut_ad(log_mutex_own() || log_write_mutex_own());
return(offset + LOG_FILE_HDR_SIZE
* (1 + offset / (group->file_size - LOG_FILE_HDR_SIZE)));
}
-/******************************************************//**
-Calculates the offset of an lsn within a log group.
-@return offset within the log group */
-static
+/** Calculate the offset of an lsn within a log group.
+@param[in] lsn log sequence number
+@param[in] group log group
+@return offset within the log group */
lsn_t
log_group_calc_lsn_offset(
-/*======================*/
- lsn_t lsn, /*!< in: lsn */
- const log_group_t* group) /*!< in: log group */
+ lsn_t lsn,
+ const log_group_t* group)
{
lsn_t gr_lsn;
lsn_t gr_lsn_size_offset;
@@ -652,13 +548,16 @@ log_group_calc_lsn_offset(
lsn_t group_size;
lsn_t offset;
- ut_ad(mutex_own(&(log_sys->mutex)));
+ /* The lsn parameters are updated while holding both the mutexes
+ and it is ok to have either of them while reading */
+ ut_ad(log_mutex_own() || log_write_mutex_own());
gr_lsn = group->lsn;
- gr_lsn_size_offset = log_group_calc_size_offset(group->lsn_offset, group);
+ gr_lsn_size_offset = log_group_calc_size_offset(
+ group->lsn_offset, group);
- group_size = log_group_get_capacity(group);
+ group_size = group->capacity();
if (lsn >= gr_lsn) {
@@ -681,58 +580,11 @@ log_group_calc_lsn_offset(
return(log_group_calc_real_offset(offset, group));
}
-#endif /* !UNIV_HOTBACKUP */
-
-#ifdef UNIV_DEBUG
-UNIV_INTERN ibool log_debug_writes = FALSE;
-#endif /* UNIV_DEBUG */
-
-/*******************************************************************//**
-Calculates where in log files we find a specified lsn.
-@return log file number */
-UNIV_INTERN
-ulint
-log_calc_where_lsn_is(
-/*==================*/
- ib_int64_t* log_file_offset, /*!< out: offset in that file
- (including the header) */
- ib_uint64_t first_header_lsn, /*!< in: first log file start
- lsn */
- ib_uint64_t lsn, /*!< in: lsn whose position to
- determine */
- ulint n_log_files, /*!< in: total number of log
- files */
- ib_int64_t log_file_size) /*!< in: log file size
- (including the header) */
-{
- ib_int64_t capacity = log_file_size - LOG_FILE_HDR_SIZE;
- ulint file_no;
- ib_int64_t add_this_many;
- if (lsn < first_header_lsn) {
- add_this_many = 1 + (first_header_lsn - lsn)
- / (capacity * (ib_int64_t) n_log_files);
- lsn += add_this_many
- * capacity * (ib_int64_t) n_log_files;
- }
-
- ut_a(lsn >= first_header_lsn);
-
- file_no = ((ulint)((lsn - first_header_lsn) / capacity))
- % n_log_files;
- *log_file_offset = (lsn - first_header_lsn) % capacity;
-
- *log_file_offset = *log_file_offset + LOG_FILE_HDR_SIZE;
-
- return(file_no);
-}
-
-#ifndef UNIV_HOTBACKUP
/********************************************************//**
Sets the field values in group to correspond to a given lsn. For this function
to work, the values must already be correctly initialized to correspond to
some lsn, for instance, a checkpoint lsn. */
-UNIV_INTERN
void
log_group_set_fields(
/*=================*/
@@ -744,24 +596,20 @@ log_group_set_fields(
group->lsn = lsn;
}
-/*****************************************************************//**
-Calculates the recommended highest values for lsn - last_checkpoint_lsn,
-lsn - buf_get_oldest_modification(), and lsn - max_archive_lsn_age.
-@return error value FALSE if the smallest log group is too small to
+/** Calculate the recommended highest values for lsn - last_checkpoint_lsn
+and lsn - buf_get_oldest_modification().
+@param[in] file_size requested innodb_log_file_size
+@retval true on success
+@retval false if the smallest log group is too small to
accommodate the number of OS threads in the database server */
-static
-ibool
-log_calc_max_ages(void)
-/*===================*/
+bool
+log_set_capacity(ulonglong file_size)
{
lsn_t margin;
ulint free;
- lsn_t smallest_capacity = ((srv_log_file_size_requested
- << srv_page_size_shift)
- - LOG_FILE_HDR_SIZE)
+ lsn_t smallest_capacity = (file_size - LOG_FILE_HDR_SIZE)
* srv_n_log_files;
-
/* Add extra safety */
smallest_capacity -= smallest_capacity / 10;
@@ -773,15 +621,19 @@ log_calc_max_ages(void)
free = LOG_CHECKPOINT_FREE_PER_THREAD * (10 + srv_thread_concurrency)
+ LOG_CHECKPOINT_EXTRA_FREE;
if (free >= smallest_capacity / 2) {
- ib_logf(IB_LOG_LEVEL_FATAL,
- "The combined size of ib_logfiles"
- " should be bigger than\n"
- "InnoDB: 200 kB * innodb_thread_concurrency.");
+ ib::error() << "Cannot continue operation. ib_logfiles are too"
+ " small for innodb_thread_concurrency="
+ << srv_thread_concurrency << ". The combined size of"
+ " ib_logfiles should be bigger than"
+ " 200 kB * innodb_thread_concurrency. "
+ << INNODB_PARAMETERS_MSG;
+ return(false);
}
+
margin = smallest_capacity - free;
margin = margin - margin / 10; /* Add still some extra safety */
- mutex_enter(&log_sys->mutex);
+ log_mutex_enter();
log_sys->log_group_capacity = smallest_capacity;
@@ -794,128 +646,70 @@ log_calc_max_ages(void)
/ LOG_POOL_CHECKPOINT_RATIO_ASYNC;
log_sys->max_checkpoint_age = margin;
-#ifdef UNIV_LOG_ARCHIVE
- lsn_t archive_margin = smallest_capacity
- - (srv_log_file_size_requested - LOG_FILE_HDR_SIZE)
- - LOG_ARCHIVE_EXTRA_MARGIN;
- log_sys->max_archived_lsn_age = archive_margin;
-
- log_sys->max_archived_lsn_age_async = archive_margin
- - archive_margin / LOG_ARCHIVE_RATIO_ASYNC;
-#endif /* UNIV_LOG_ARCHIVE */
- mutex_exit(&log_sys->mutex);
+ log_mutex_exit();
return(true);
}
-/******************************************************//**
-Initializes the log. */
-UNIV_INTERN
+/** Initializes the redo logging subsystem. */
void
-log_init(void)
-/*==========*/
+log_sys_init()
{
- log_sys = static_cast<log_t*>(mem_alloc(sizeof(log_t)));
+ log_sys = static_cast<log_t*>(ut_zalloc_nokey(sizeof(log_t)));
- mutex_create(log_sys_mutex_key, &log_sys->mutex, SYNC_LOG);
+ mutex_create(LATCH_ID_LOG_SYS, &log_sys->mutex);
+ mutex_create(LATCH_ID_LOG_WRITE, &log_sys->write_mutex);
- mutex_create(log_flush_order_mutex_key,
- &log_sys->log_flush_order_mutex,
- SYNC_LOG_FLUSH_ORDER);
-
- mutex_enter(&(log_sys->mutex));
+ mutex_create(LATCH_ID_LOG_FLUSH_ORDER, &log_sys->log_flush_order_mutex);
/* Start the lsn from one log block from zero: this way every
log record has a start lsn != zero, a fact which we will use */
log_sys->lsn = LOG_START_LSN;
- ut_a(LOG_BUFFER_SIZE >= 16 * OS_FILE_LOG_BLOCK_SIZE);
- ut_a(LOG_BUFFER_SIZE >= 4 * UNIV_PAGE_SIZE);
+ ut_ad(srv_log_buffer_size >= 4);
- log_sys->buf_ptr = static_cast<byte*>(
- mem_zalloc(LOG_BUFFER_SIZE + OS_FILE_LOG_BLOCK_SIZE));
+ log_sys->buf_size = srv_log_buffer_size << srv_page_size_shift;
+ log_sys->buf_ptr = static_cast<byte*>(
+ ut_zalloc_nokey(log_sys->buf_size * 2 + OS_FILE_LOG_BLOCK_SIZE));
+ TRASH_ALLOC(log_sys->buf_ptr,
+ log_sys->buf_size * 2 + OS_FILE_LOG_BLOCK_SIZE);
log_sys->buf = static_cast<byte*>(
ut_align(log_sys->buf_ptr, OS_FILE_LOG_BLOCK_SIZE));
- log_sys->buf_size = LOG_BUFFER_SIZE;
- log_sys->is_extending = false;
+ log_sys->first_in_use = true;
log_sys->max_buf_free = log_sys->buf_size / LOG_BUF_FLUSH_RATIO
- LOG_BUF_FLUSH_MARGIN;
- log_sys->check_flush_or_checkpoint = TRUE;
- UT_LIST_INIT(log_sys->log_groups);
-
- log_sys->n_log_ios = 0;
+ log_sys->check_flush_or_checkpoint = true;
log_sys->n_log_ios_old = log_sys->n_log_ios;
log_sys->last_printout_time = time(NULL);
/*----------------------------*/
- log_sys->buf_next_to_write = 0;
-
- log_sys->write_lsn = 0;
- log_sys->current_flush_lsn = 0;
- log_sys->flushed_to_disk_lsn = 0;
-
- log_sys->written_to_some_lsn = log_sys->lsn;
- log_sys->written_to_all_lsn = log_sys->lsn;
-
- log_sys->n_pending_writes = 0;
-
- log_sys->no_flush_event = os_event_create();
-
- os_event_set(log_sys->no_flush_event);
+ log_sys->write_lsn = log_sys->lsn;
- log_sys->one_flushed_event = os_event_create();
+ log_sys->flush_event = os_event_create(0);
- os_event_set(log_sys->one_flushed_event);
+ os_event_set(log_sys->flush_event);
/*----------------------------*/
- log_sys->next_checkpoint_no = 0;
log_sys->last_checkpoint_lsn = log_sys->lsn;
- log_sys->n_pending_checkpoint_writes = 0;
-
- rw_lock_create(checkpoint_lock_key, &log_sys->checkpoint_lock,
- SYNC_NO_ORDER_CHECK);
+ rw_lock_create(
+ checkpoint_lock_key, &log_sys->checkpoint_lock,
+ SYNC_NO_ORDER_CHECK);
log_sys->checkpoint_buf_ptr = static_cast<byte*>(
- mem_zalloc(2 * OS_FILE_LOG_BLOCK_SIZE));
+ ut_zalloc_nokey(2 * OS_FILE_LOG_BLOCK_SIZE));
log_sys->checkpoint_buf = static_cast<byte*>(
ut_align(log_sys->checkpoint_buf_ptr, OS_FILE_LOG_BLOCK_SIZE));
/*----------------------------*/
-#ifdef UNIV_LOG_ARCHIVE
- /* Under MySQL, log archiving is always off */
- log_sys->archiving_state = LOG_ARCH_OFF;
- log_sys->archived_lsn = log_sys->lsn;
- log_sys->next_archived_lsn = 0;
-
- log_sys->n_pending_archive_ios = 0;
-
- rw_lock_create(archive_lock_key, &log_sys->archive_lock,
- SYNC_NO_ORDER_CHECK);
-
- log_sys->archive_buf = NULL;
-
- /* ut_align(
- ut_malloc(LOG_ARCHIVE_BUF_SIZE
- + OS_FILE_LOG_BLOCK_SIZE),
- OS_FILE_LOG_BLOCK_SIZE); */
- log_sys->archive_buf_size = 0;
-
- /* memset(log_sys->archive_buf, '\0', LOG_ARCHIVE_BUF_SIZE); */
-
- log_sys->archiving_on = os_event_create();
-#endif /* UNIV_LOG_ARCHIVE */
-
- /*----------------------------*/
-
log_block_init(log_sys->buf, log_sys->lsn);
log_block_set_first_rec_group(log_sys->buf, LOG_BLOCK_HDR_SIZE);
@@ -925,255 +719,68 @@ log_init(void)
MONITOR_SET(MONITOR_LSN_CHECKPOINT_AGE,
log_sys->lsn - log_sys->last_checkpoint_lsn);
- mutex_exit(&(log_sys->mutex));
-
log_scrub_thread_active = !srv_read_only_mode && srv_scrub_log;
if (log_scrub_thread_active) {
- log_scrub_event = os_event_create();
+ log_scrub_event = os_event_create("log_scrub_event");
os_thread_create(log_scrub_thread, NULL, NULL);
}
-
-#ifdef UNIV_LOG_DEBUG
- recv_sys_create();
- recv_sys_init(buf_pool_get_curr_size());
-
- recv_sys->parse_start_lsn = log_sys->lsn;
- recv_sys->scanned_lsn = log_sys->lsn;
- recv_sys->scanned_checkpoint_no = 0;
- recv_sys->recovered_lsn = log_sys->lsn;
- recv_sys->limit_lsn = LSN_MAX;
-#endif
}
-/******************************************************************//**
-Inits a log group to the log system. */
-UNIV_INTERN
+/** Initialize the redo log.
+@param[in] n_files number of files */
void
-log_group_init(
-/*===========*/
- ulint id, /*!< in: group id */
- ulint n_files, /*!< in: number of log files */
- lsn_t file_size, /*!< in: log file size in bytes */
- ulint space_id, /*!< in: space id of the file space
- which contains the log files of this
- group */
- ulint archive_space_id MY_ATTRIBUTE((unused)))
- /*!< in: space id of the file space
- which contains some archived log
- files for this group; currently, only
- for the first log group this is
- used */
+log_init(ulint n_files)
{
- ulint i;
+ log_group_t* group = &log_sys->log;
- log_group_t* group;
-
- group = static_cast<log_group_t*>(mem_alloc(sizeof(log_group_t)));
-
- group->id = id;
group->n_files = n_files;
- group->file_size = file_size;
- group->space_id = space_id;
+ group->subformat = srv_safe_truncate;
+ if (srv_safe_truncate) {
+ group->format = srv_encrypt_log
+ ? LOG_HEADER_FORMAT_10_3 | LOG_HEADER_FORMAT_ENCRYPTED
+ : LOG_HEADER_FORMAT_10_3;
+ } else {
+ group->format = srv_encrypt_log
+ ? LOG_HEADER_FORMAT_10_2 | LOG_HEADER_FORMAT_ENCRYPTED
+ : LOG_HEADER_FORMAT_10_2;
+ }
+ group->file_size = srv_log_file_size;
group->state = LOG_GROUP_OK;
group->lsn = LOG_START_LSN;
group->lsn_offset = LOG_FILE_HDR_SIZE;
- group->n_pending_writes = 0;
-
- group->file_header_bufs_ptr = static_cast<byte**>(
- mem_zalloc(sizeof(byte*) * n_files));
-
- group->file_header_bufs = static_cast<byte**>(
- mem_zalloc(sizeof(byte**) * n_files));
-
-#ifdef UNIV_LOG_ARCHIVE
- group->archive_file_header_bufs_ptr = static_cast<byte*>(
- mem_zalloc( sizeof(byte*) * n_files));
-
- group->archive_file_header_bufs = static_cast<byte*>(
- mem_zalloc(sizeof(byte*) * n_files));
-#endif /* UNIV_LOG_ARCHIVE */
-
- for (i = 0; i < n_files; i++) {
- group->file_header_bufs_ptr[i] = static_cast<byte*>(
- mem_zalloc(LOG_FILE_HDR_SIZE + OS_FILE_LOG_BLOCK_SIZE));
-
- group->file_header_bufs[i] = static_cast<byte*>(
- ut_align(group->file_header_bufs_ptr[i],
- OS_FILE_LOG_BLOCK_SIZE));
-
-#ifdef UNIV_LOG_ARCHIVE
- group->archive_file_header_bufs_ptr[i] = static_cast<byte*>(
- mem_zalloc(LOG_FILE_HDR_SIZE + OS_FILE_LOG_BLOCK_SIZE));
-
- group->archive_file_header_bufs[i] = static_cast<byte*>(
- ut_align(group->archive_file_header_bufs_ptr[i],
- OS_FILE_LOG_BLOCK_SIZE));
-#endif /* UNIV_LOG_ARCHIVE */
- }
-
-#ifdef UNIV_LOG_ARCHIVE
- group->archive_space_id = archive_space_id;
-
- group->archived_file_no = 0;
- group->archived_offset = 0;
-#endif /* UNIV_LOG_ARCHIVE */
group->checkpoint_buf_ptr = static_cast<byte*>(
- mem_zalloc(2 * OS_FILE_LOG_BLOCK_SIZE));
+ ut_zalloc_nokey(2 * OS_FILE_LOG_BLOCK_SIZE));
group->checkpoint_buf = static_cast<byte*>(
ut_align(group->checkpoint_buf_ptr,OS_FILE_LOG_BLOCK_SIZE));
-
- UT_LIST_ADD_LAST(log_groups, log_sys->log_groups, group);
-
- ut_a(log_calc_max_ages());
-}
-
-/******************************************************************//**
-Does the unlockings needed in flush i/o completion. */
-UNIV_INLINE
-void
-log_flush_do_unlocks(
-/*=================*/
- ulint code) /*!< in: any ORed combination of LOG_UNLOCK_FLUSH_LOCK
- and LOG_UNLOCK_NONE_FLUSHED_LOCK */
-{
- ut_ad(mutex_own(&(log_sys->mutex)));
-
- /* NOTE that we must own the log mutex when doing the setting of the
- events: this is because transactions will wait for these events to
- be set, and at that moment the log flush they were waiting for must
- have ended. If the log mutex were not reserved here, the i/o-thread
- calling this function might be preempted for a while, and when it
- resumed execution, it might be that a new flush had been started, and
- this function would erroneously signal the NEW flush as completed.
- Thus, the changes in the state of these events are performed
- atomically in conjunction with the changes in the state of
- log_sys->n_pending_writes etc. */
-
- if (code & LOG_UNLOCK_NONE_FLUSHED_LOCK) {
- os_event_set(log_sys->one_flushed_event);
- }
-
- if (code & LOG_UNLOCK_FLUSH_LOCK) {
- os_event_set(log_sys->no_flush_event);
- }
-}
-
-/******************************************************************//**
-Checks if a flush is completed for a log group and does the completion
-routine if yes.
-@return LOG_UNLOCK_NONE_FLUSHED_LOCK or 0 */
-UNIV_INLINE
-ulint
-log_group_check_flush_completion(
-/*=============================*/
- log_group_t* group) /*!< in: log group */
-{
- ut_ad(mutex_own(&(log_sys->mutex)));
-
- if (!log_sys->one_flushed && group->n_pending_writes == 0) {
-#ifdef UNIV_DEBUG
- if (log_debug_writes) {
- fprintf(stderr,
- "Log flushed first to group %lu\n",
- (ulong) group->id);
- }
-#endif /* UNIV_DEBUG */
- log_sys->written_to_some_lsn = log_sys->write_lsn;
- log_sys->one_flushed = TRUE;
-
- return(LOG_UNLOCK_NONE_FLUSHED_LOCK);
- }
-
-#ifdef UNIV_DEBUG
- if (log_debug_writes && (group->n_pending_writes == 0)) {
-
- fprintf(stderr, "Log flushed to group %lu\n",
- (ulong) group->id);
- }
-#endif /* UNIV_DEBUG */
- return(0);
-}
-
-/******************************************************//**
-Checks if a flush is completed and does the completion routine if yes.
-@return LOG_UNLOCK_FLUSH_LOCK or 0 */
-static
-ulint
-log_sys_check_flush_completion(void)
-/*================================*/
-{
- ulint move_start;
- ulint move_end;
-
- ut_ad(mutex_own(&(log_sys->mutex)));
-
- if (log_sys->n_pending_writes == 0) {
-
- log_sys->written_to_all_lsn = log_sys->write_lsn;
- log_sys->buf_next_to_write = log_sys->write_end_offset;
-
- if (log_sys->write_end_offset > log_sys->max_buf_free / 2) {
- /* Move the log buffer content to the start of the
- buffer */
-
- move_start = ut_calc_align_down(
- log_sys->write_end_offset,
- OS_FILE_LOG_BLOCK_SIZE);
- move_end = ut_calc_align(log_sys->buf_free,
- OS_FILE_LOG_BLOCK_SIZE);
-
- ut_memmove(log_sys->buf, log_sys->buf + move_start,
- move_end - move_start);
- log_sys->buf_free -= move_start;
-
- log_sys->buf_next_to_write -= move_start;
- }
-
- return(LOG_UNLOCK_FLUSH_LOCK);
- }
-
- return(0);
}
/******************************************************//**
Completes an i/o to a log file. */
-UNIV_INTERN
void
log_io_complete(
/*============*/
log_group_t* group) /*!< in: log group or a dummy pointer */
{
- ulint unlock;
-
-#ifdef UNIV_LOG_ARCHIVE
- if ((byte*) group == &log_archive_io) {
- /* It was an archive write */
-
- log_io_complete_archive();
-
- return;
- }
-#endif /* UNIV_LOG_ARCHIVE */
-
if ((ulint) group & 0x1UL) {
/* It was a checkpoint write */
group = (log_group_t*)((ulint) group - 1);
- if (srv_unix_file_flush_method != SRV_UNIX_O_DSYNC
- && srv_unix_file_flush_method != SRV_UNIX_NOSYNC) {
-
- fil_flush(group->space_id);
+ switch (srv_file_flush_method) {
+ case SRV_O_DSYNC:
+ case SRV_NOSYNC:
+ break;
+ case SRV_FSYNC:
+ case SRV_LITTLESYNC:
+ case SRV_O_DIRECT:
+ case SRV_O_DIRECT_NO_FSYNC:
+ case SRV_ALL_O_DIRECT_FSYNC:
+ fil_flush(SRV_LOG_SPACE_FIRST_ID);
}
-#ifdef UNIV_DEBUG
- if (log_debug_writes) {
- fprintf(stderr,
- "Checkpoint info written to group %lu\n",
- group->id);
- }
-#endif /* UNIV_DEBUG */
+
+ DBUG_PRINT("ib_log", ("checkpoint info written"));
log_io_complete_checkpoint();
return;
@@ -1181,29 +788,6 @@ log_io_complete(
ut_error; /*!< We currently use synchronous writing of the
logs and cannot end up here! */
-
- if (srv_unix_file_flush_method != SRV_UNIX_O_DSYNC
- && srv_unix_file_flush_method != SRV_UNIX_NOSYNC
- && srv_flush_log_at_trx_commit != 2) {
-
- fil_flush(group->space_id);
- }
-
- mutex_enter(&(log_sys->mutex));
- ut_ad(!recv_no_log_write);
-
- ut_a(group->n_pending_writes > 0);
- ut_a(log_sys->n_pending_writes > 0);
-
- group->n_pending_writes--;
- log_sys->n_pending_writes--;
-
- unlock = log_group_check_flush_completion(group);
- unlock = unlock | log_sys_check_flush_completion();
-
- log_flush_do_unlocks(unlock);
-
- mutex_exit(&(log_sys->mutex));
}
/******************************************************//**
@@ -1218,43 +802,49 @@ log_group_file_header_flush(
lsn_t start_lsn) /*!< in: log file data starts at this
lsn */
{
- byte* buf;
lsn_t dest_offset;
- ut_ad(mutex_own(&(log_sys->mutex)));
+ ut_ad(log_write_mutex_own());
ut_ad(!recv_no_log_write);
ut_a(nth_file < group->n_files);
+ ut_ad((group->format & ~LOG_HEADER_FORMAT_ENCRYPTED)
+ == (srv_safe_truncate
+ ? LOG_HEADER_FORMAT_10_3
+ : LOG_HEADER_FORMAT_10_2));
+
+ // man 2 open suggests this buffer to be aligned by 512 for O_DIRECT
+ MY_ALIGNED(OS_FILE_LOG_BLOCK_SIZE)
+ byte buf[OS_FILE_LOG_BLOCK_SIZE] = {0};
+
+ mach_write_to_4(buf + LOG_HEADER_FORMAT, group->format);
+ mach_write_to_4(buf + LOG_HEADER_SUBFORMAT, srv_safe_truncate);
+ mach_write_to_8(buf + LOG_HEADER_START_LSN, start_lsn);
+ strcpy(reinterpret_cast<char*>(buf) + LOG_HEADER_CREATOR,
+ LOG_HEADER_CREATOR_CURRENT);
+ ut_ad(LOG_HEADER_CREATOR_END - LOG_HEADER_CREATOR
+ >= sizeof LOG_HEADER_CREATOR_CURRENT);
+ log_block_set_checksum(buf, log_block_calc_checksum_crc32(buf));
- buf = *(group->file_header_bufs + nth_file);
+ dest_offset = nth_file * group->file_size;
- mach_write_to_4(buf + LOG_GROUP_ID, group->id);
- mach_write_to_8(buf + LOG_FILE_START_LSN, start_lsn);
+ DBUG_PRINT("ib_log", ("write " LSN_PF
+ " file " ULINTPF " header",
+ start_lsn, nth_file));
- /* Wipe over possible label of mysqlbackup --restore */
- memcpy(buf + LOG_FILE_WAS_CREATED_BY_HOT_BACKUP, " ", 4);
+ log_sys->n_log_ios++;
- dest_offset = nth_file * group->file_size;
+ srv_stats.os_log_pending_writes.inc();
-#ifdef UNIV_DEBUG
- if (log_debug_writes) {
- fprintf(stderr,
- "Writing log file header to group %lu file %lu\n",
- (ulong) group->id, (ulong) nth_file);
- }
-#endif /* UNIV_DEBUG */
- if (log_do_write) {
- log_sys->n_log_ios++;
+ const ulint page_no
+ = (ulint) (dest_offset / univ_page_size.physical());
- srv_stats.os_log_pending_writes.inc();
+ fil_io(IORequestLogWrite, true,
+ page_id_t(SRV_LOG_SPACE_FIRST_ID, page_no),
+ univ_page_size,
+ (ulint) (dest_offset % univ_page_size.physical()),
+ OS_FILE_LOG_BLOCK_SIZE, buf, group);
- fil_io(OS_FILE_WRITE | OS_FILE_LOG, true, group->space_id, 0,
- (ulint) (dest_offset / UNIV_PAGE_SIZE),
- (ulint) (dest_offset % UNIV_PAGE_SIZE),
- OS_FILE_LOG_BLOCK_SIZE,
- buf, group, 0);
-
- srv_stats.os_log_pending_writes.dec();
- }
+ srv_stats.os_log_pending_writes.dec();
}
/******************************************************//**
@@ -1272,7 +862,7 @@ log_block_store_checksum(
/******************************************************//**
Writes a buffer to a log file group. */
-UNIV_INTERN
+static
void
log_group_write_buf(
/*================*/
@@ -1280,6 +870,9 @@ log_group_write_buf(
byte* buf, /*!< in: buffer */
ulint len, /*!< in: buffer len; must be divisible
by OS_FILE_LOG_BLOCK_SIZE */
+#ifdef UNIV_DEBUG
+ ulint pad_len, /*!< in: pad len in the buffer len */
+#endif /* UNIV_DEBUG */
lsn_t start_lsn, /*!< in: start lsn of the buffer; must
be divisible by
OS_FILE_LOG_BLOCK_SIZE */
@@ -1289,20 +882,15 @@ log_group_write_buf(
header */
{
ulint write_len;
- ibool write_header;
+ bool write_header = new_data_offset == 0;
lsn_t next_offset;
ulint i;
- ut_ad(mutex_own(&(log_sys->mutex)));
+ ut_ad(log_write_mutex_own());
ut_ad(!recv_no_log_write);
ut_a(len % OS_FILE_LOG_BLOCK_SIZE == 0);
ut_a(start_lsn % OS_FILE_LOG_BLOCK_SIZE == 0);
- if (new_data_offset == 0) {
- write_header = TRUE;
- } else {
- write_header = FALSE;
- }
loop:
if (len == 0) {
@@ -1311,8 +899,8 @@ loop:
next_offset = log_group_calc_lsn_offset(start_lsn, group);
- if ((next_offset % group->file_size == LOG_FILE_HDR_SIZE)
- && write_header) {
+ if (write_header
+ && next_offset % group->file_size == LOG_FILE_HDR_SIZE) {
/* We start to write a new log file instance in the group */
ut_a(next_offset / group->file_size <= ULINT_MAX);
@@ -1335,99 +923,146 @@ loop:
write_len = len;
}
-#ifdef UNIV_DEBUG
- if (log_debug_writes) {
-
- fprintf(stderr,
- "Writing log file segment to group %lu"
- " offset " LSN_PF " len %lu\n"
- "start lsn " LSN_PF "\n"
- "First block n:o %lu last block n:o %lu\n",
- (ulong) group->id, next_offset,
- write_len,
- start_lsn,
- (ulong) log_block_get_hdr_no(buf),
- (ulong) log_block_get_hdr_no(
- buf + write_len - OS_FILE_LOG_BLOCK_SIZE));
- ut_a(log_block_get_hdr_no(buf)
- == log_block_convert_lsn_to_no(start_lsn));
-
- for (i = 0; i < write_len / OS_FILE_LOG_BLOCK_SIZE; i++) {
-
- ut_a(log_block_get_hdr_no(buf) + i
- == log_block_get_hdr_no(
- buf + i * OS_FILE_LOG_BLOCK_SIZE));
- }
- }
-#endif /* UNIV_DEBUG */
+ DBUG_PRINT("ib_log",
+ ("write " LSN_PF " to " LSN_PF
+ ": len " ULINTPF
+ " blocks " ULINTPF ".." ULINTPF,
+ start_lsn, next_offset,
+ write_len,
+ log_block_get_hdr_no(buf),
+ log_block_get_hdr_no(
+ buf + write_len
+ - OS_FILE_LOG_BLOCK_SIZE)));
+
+ ut_ad(pad_len >= len
+ || log_block_get_hdr_no(buf)
+ == log_block_convert_lsn_to_no(start_lsn));
+
/* Calculate the checksums for each log block and write them to
the trailer fields of the log blocks */
for (i = 0; i < write_len / OS_FILE_LOG_BLOCK_SIZE; i++) {
+#ifdef UNIV_DEBUG
+ ulint hdr_no_2 = log_block_get_hdr_no(buf) + i;
+ DBUG_EXECUTE_IF("innodb_small_log_block_no_limit",
+ hdr_no_2 = ((hdr_no_2 - 1) & 0xFUL) + 1;);
+#endif
+ ut_ad(pad_len >= len
+ || i * OS_FILE_LOG_BLOCK_SIZE >= len - pad_len
+ || log_block_get_hdr_no(buf + i * OS_FILE_LOG_BLOCK_SIZE) == hdr_no_2);
log_block_store_checksum(buf + i * OS_FILE_LOG_BLOCK_SIZE);
}
- if (log_do_write) {
- log_sys->n_log_ios++;
+ log_sys->n_log_ios++;
- srv_stats.os_log_pending_writes.inc();
+ srv_stats.os_log_pending_writes.inc();
- ut_a(next_offset / UNIV_PAGE_SIZE <= ULINT_MAX);
+ ut_a(next_offset / UNIV_PAGE_SIZE <= ULINT_MAX);
- log_encrypt_before_write(log_sys->next_checkpoint_no,
- buf, start_lsn, write_len);
+ const ulint page_no
+ = (ulint) (next_offset / univ_page_size.physical());
- fil_io(OS_FILE_WRITE | OS_FILE_LOG, true, group->space_id, 0,
- (ulint) (next_offset / UNIV_PAGE_SIZE),
- (ulint) (next_offset % UNIV_PAGE_SIZE), write_len, buf,
- group, 0);
+ fil_io(IORequestLogWrite, true,
+ page_id_t(SRV_LOG_SPACE_FIRST_ID, page_no),
+ univ_page_size,
+ (ulint) (next_offset % UNIV_PAGE_SIZE), write_len, buf,
+ group);
- srv_stats.os_log_pending_writes.dec();
+ srv_stats.os_log_pending_writes.dec();
- srv_stats.os_log_written.add(write_len);
- srv_stats.log_writes.inc();
- }
+ srv_stats.os_log_written.add(write_len);
+ srv_stats.log_writes.inc();
if (write_len < len) {
start_lsn += write_len;
len -= write_len;
buf += write_len;
- write_header = TRUE;
+ write_header = true;
goto loop;
}
}
-/******************************************************//**
-This function is called, e.g., when a transaction wants to commit. It checks
-that the log has been written to the log file up to the last log entry written
-by the transaction. If there is a flush running, it waits and checks if the
-flush flushed enough. If not, starts a new flush. */
-UNIV_INTERN
+/** Flush the recently written changes to the log file.
+and invoke log_mutex_enter(). */
+static
+void
+log_write_flush_to_disk_low()
+{
+ /* FIXME: This is not holding log_sys->mutex while
+ calling os_event_set()! */
+ ut_a(log_sys->n_pending_flushes == 1); /* No other threads here */
+
+ bool do_flush = srv_file_flush_method != SRV_O_DSYNC;
+
+ if (do_flush) {
+ fil_flush(SRV_LOG_SPACE_FIRST_ID);
+ }
+
+
+ log_mutex_enter();
+ if (do_flush) {
+ log_sys->flushed_to_disk_lsn = log_sys->current_flush_lsn;
+ }
+
+ log_sys->n_pending_flushes--;
+
+ os_event_set(log_sys->flush_event);
+}
+
+/** Switch the log buffer in use, and copy the content of last block
+from old log buffer to the head of the to be used one. Thus, buf_free and
+buf_next_to_write would be changed accordingly */
+static inline
+void
+log_buffer_switch()
+{
+ ut_ad(log_mutex_own());
+ ut_ad(log_write_mutex_own());
+
+ const byte* old_buf = log_sys->buf;
+ ulint area_end = ut_calc_align(
+ log_sys->buf_free, ulint(OS_FILE_LOG_BLOCK_SIZE));
+
+ if (log_sys->first_in_use) {
+ log_sys->first_in_use = false;
+ ut_ad(log_sys->buf == ut_align(log_sys->buf_ptr,
+ OS_FILE_LOG_BLOCK_SIZE));
+ log_sys->buf += log_sys->buf_size;
+ } else {
+ log_sys->first_in_use = true;
+ log_sys->buf -= log_sys->buf_size;
+ ut_ad(log_sys->buf == ut_align(log_sys->buf_ptr,
+ OS_FILE_LOG_BLOCK_SIZE));
+ }
+
+ /* Copy the last block to new buf */
+ ut_memcpy(log_sys->buf,
+ old_buf + area_end - OS_FILE_LOG_BLOCK_SIZE,
+ OS_FILE_LOG_BLOCK_SIZE);
+
+ log_sys->buf_free %= OS_FILE_LOG_BLOCK_SIZE;
+ log_sys->buf_next_to_write = log_sys->buf_free;
+}
+
+/** Ensure that the log has been written to the log file up to a given
+log entry (such as that of a transaction commit). Start a new write, or
+wait and check if an already running write is covering the request.
+@param[in] lsn log sequence number that should be
+included in the redo log file write
+@param[in] flush_to_disk whether the written log should also
+be flushed to the file system */
void
log_write_up_to(
-/*============*/
- lsn_t lsn, /*!< in: log sequence number up to which
- the log should be written,
- LSN_MAX if not specified */
- ulint wait, /*!< in: LOG_NO_WAIT, LOG_WAIT_ONE_GROUP,
- or LOG_WAIT_ALL_GROUPS */
- ibool flush_to_disk)
- /*!< in: TRUE if we want the written log
- also to be flushed to disk */
+ lsn_t lsn,
+ bool flush_to_disk)
{
- log_group_t* group;
- ulint start_offset;
- ulint end_offset;
- ulint area_start;
- ulint area_end;
#ifdef UNIV_DEBUG
ulint loop_count = 0;
#endif /* UNIV_DEBUG */
- ulint unlock;
- ib_uint64_t write_lsn;
- ib_uint64_t flush_lsn;
+ byte* write_buf;
+ lsn_t write_lsn;
ut_ad(!srv_read_only_mode);
@@ -1439,115 +1074,134 @@ log_write_up_to(
}
loop:
- ut_ad(++loop_count < 100);
-
- mutex_enter(&(log_sys->mutex));
- ut_ad(!recv_no_log_write);
-
- if (flush_to_disk
- && log_sys->flushed_to_disk_lsn >= lsn) {
-
- mutex_exit(&(log_sys->mutex));
-
+ ut_ad(++loop_count < 128);
+
+#if UNIV_WORD_SIZE > 7
+ /* We can do a dirty read of LSN. */
+ /* NOTE: Currently doesn't do dirty read for
+ (flush_to_disk == true) case, because the log_mutex
+ contention also works as the arbitrator for write-IO
+ (fsync) bandwidth between log files and data files. */
+ if (!flush_to_disk && log_sys->write_lsn >= lsn) {
return;
}
+#endif
- if (!flush_to_disk
- && (log_sys->written_to_all_lsn >= lsn
- || (log_sys->written_to_some_lsn >= lsn
- && wait != LOG_WAIT_ALL_GROUPS))) {
+ log_write_mutex_enter();
+ ut_ad(!recv_no_log_write);
- mutex_exit(&(log_sys->mutex));
+ lsn_t limit_lsn = flush_to_disk
+ ? log_sys->flushed_to_disk_lsn
+ : log_sys->write_lsn;
+ if (limit_lsn >= lsn) {
+ log_write_mutex_exit();
return;
}
- if (log_sys->n_pending_writes > 0) {
- /* A write (+ possibly flush to disk) is running */
+ /* If it is a write call we should just go ahead and do it
+ as we checked that write_lsn is not where we'd like it to
+ be. If we have to flush as well then we check if there is a
+ pending flush and based on that we wait for it to finish
+ before proceeding further. */
+ if (flush_to_disk
+ && (log_sys->n_pending_flushes > 0
+ || !os_event_is_set(log_sys->flush_event))) {
+ /* Figure out if the current flush will do the job
+ for us. */
+ bool work_done = log_sys->current_flush_lsn >= lsn;
- if (flush_to_disk
- && log_sys->current_flush_lsn >= lsn) {
- /* The write + flush will write enough: wait for it to
- complete */
+ log_write_mutex_exit();
- goto do_waits;
- }
+ os_event_wait(log_sys->flush_event);
- if (!flush_to_disk
- && log_sys->write_lsn >= lsn) {
- /* The write will write enough: wait for it to
- complete */
-
- goto do_waits;
+ if (work_done) {
+ return;
+ } else {
+ goto loop;
}
-
- mutex_exit(&(log_sys->mutex));
-
- /* Wait for the write to complete and try to start a new
- write */
-
- os_event_wait(log_sys->no_flush_event);
-
- goto loop;
}
+ log_mutex_enter();
if (!flush_to_disk
&& log_sys->buf_free == log_sys->buf_next_to_write) {
/* Nothing to write and no flush to disk requested */
-
- mutex_exit(&(log_sys->mutex));
-
+ log_mutex_exit_all();
return;
}
-#ifdef UNIV_DEBUG
- if (log_debug_writes) {
- fprintf(stderr,
- "Writing log from " LSN_PF " up to lsn " LSN_PF "\n",
- log_sys->written_to_all_lsn,
- log_sys->lsn);
- }
-#endif /* UNIV_DEBUG */
- log_sys->n_pending_writes++;
+ ulint start_offset;
+ ulint end_offset;
+ ulint area_start;
+ ulint area_end;
+ ulong write_ahead_size = srv_log_write_ahead_size;
+ ulint pad_size;
- group = UT_LIST_GET_FIRST(log_sys->log_groups);
- group->n_pending_writes++; /*!< We assume here that we have only
- one log group! */
+ DBUG_PRINT("ib_log", ("write " LSN_PF " to " LSN_PF,
+ log_sys->write_lsn,
+ log_sys->lsn));
+ if (flush_to_disk) {
+ log_sys->n_pending_flushes++;
+ log_sys->current_flush_lsn = log_sys->lsn;
+ os_event_reset(log_sys->flush_event);
- os_event_reset(log_sys->no_flush_event);
- os_event_reset(log_sys->one_flushed_event);
+ if (log_sys->buf_free == log_sys->buf_next_to_write) {
+ /* Nothing to write, flush only */
+ log_mutex_exit_all();
+ log_write_flush_to_disk_low();
+ log_mutex_exit();
+ return;
+ }
+ }
start_offset = log_sys->buf_next_to_write;
end_offset = log_sys->buf_free;
- area_start = ut_calc_align_down(start_offset, OS_FILE_LOG_BLOCK_SIZE);
- area_end = ut_calc_align(end_offset, OS_FILE_LOG_BLOCK_SIZE);
+ area_start = ut_2pow_round(start_offset,
+ ulint(OS_FILE_LOG_BLOCK_SIZE));
+ area_end = ut_calc_align(end_offset, ulint(OS_FILE_LOG_BLOCK_SIZE));
ut_ad(area_end - area_start > 0);
- log_sys->write_lsn = log_sys->lsn;
-
- if (flush_to_disk) {
- log_sys->current_flush_lsn = log_sys->lsn;
- }
-
- log_sys->one_flushed = FALSE;
-
log_block_set_flush_bit(log_sys->buf + area_start, TRUE);
log_block_set_checkpoint_no(
log_sys->buf + area_end - OS_FILE_LOG_BLOCK_SIZE,
log_sys->next_checkpoint_no);
- /* Copy the last, incompletely written, log block a log block length
- up, so that when the flush operation writes from the log buffer, the
- segment to write will not be changed by writers to the log */
-
- ut_memcpy(log_sys->buf + area_end,
- log_sys->buf + area_end - OS_FILE_LOG_BLOCK_SIZE,
- OS_FILE_LOG_BLOCK_SIZE);
-
- log_sys->buf_free += OS_FILE_LOG_BLOCK_SIZE;
- log_sys->write_end_offset = log_sys->buf_free;
+ write_lsn = log_sys->lsn;
+ write_buf = log_sys->buf;
+
+ log_buffer_switch();
+
+ log_group_set_fields(&log_sys->log, log_sys->write_lsn);
+
+ log_mutex_exit();
+ /* Erase the end of the last log block. */
+ memset(write_buf + end_offset, 0,
+ ~end_offset & (OS_FILE_LOG_BLOCK_SIZE - 1));
+
+ /* Calculate pad_size if needed. */
+ pad_size = 0;
+ if (write_ahead_size > OS_FILE_LOG_BLOCK_SIZE) {
+ lsn_t end_offset;
+ ulint end_offset_in_unit;
+ end_offset = log_group_calc_lsn_offset(
+ ut_uint64_align_up(write_lsn,
+ OS_FILE_LOG_BLOCK_SIZE),
+ &log_sys->log);
+ end_offset_in_unit = (ulint) (end_offset % write_ahead_size);
+
+ if (end_offset_in_unit > 0
+ && (area_end - area_start) > end_offset_in_unit) {
+ /* The first block in the unit was initialized
+ after the last writing.
+ Needs to be written padded data once. */
+ pad_size = std::min(
+ ulint(write_ahead_size) - end_offset_in_unit,
+ log_sys->buf_size - area_end);
+ ::memset(write_buf + area_end, 0, pad_size);
+ }
+ }
if (UNIV_UNLIKELY(srv_shutdown_state != SRV_SHUTDOWN_NONE)) {
service_manager_extend_timeout(INNODB_EXTEND_TIMEOUT_INTERVAL,
@@ -1556,99 +1210,52 @@ loop:
log_sys->write_lsn, lsn);
}
- group = UT_LIST_GET_FIRST(log_sys->log_groups);
+ if (log_sys->is_encrypted()) {
+ log_crypt(write_buf + area_start, log_sys->write_lsn,
+ area_end - area_start);
+ }
/* Do the write to the log files */
+ log_group_write_buf(
+ &log_sys->log, write_buf + area_start,
+ area_end - area_start + pad_size,
+#ifdef UNIV_DEBUG
+ pad_size,
+#endif /* UNIV_DEBUG */
+ ut_uint64_align_down(log_sys->write_lsn,
+ OS_FILE_LOG_BLOCK_SIZE),
+ start_offset - area_start);
+ srv_stats.log_padded.add(pad_size);
+ log_sys->write_lsn = write_lsn;
- while (group) {
- log_group_write_buf(
- group, log_sys->buf + area_start,
- area_end - area_start,
- ut_uint64_align_down(log_sys->written_to_all_lsn,
- OS_FILE_LOG_BLOCK_SIZE),
- start_offset - area_start);
-
- log_group_set_fields(group, log_sys->write_lsn);
-
- group = UT_LIST_GET_NEXT(log_groups, group);
- }
-
- mutex_exit(&(log_sys->mutex));
- if (srv_unix_file_flush_method == SRV_UNIX_O_DSYNC) {
- /* O_DSYNC means the OS did not buffer the log file at all:
+ if (srv_file_flush_method == SRV_O_DSYNC) {
+ /* O_SYNC means the OS did not buffer the log file at all:
so we have also flushed to disk what we have written */
-
- log_sys->flushed_to_disk_lsn = log_sys->write_lsn;
-
- } else if (flush_to_disk) {
-
- group = UT_LIST_GET_FIRST(log_sys->log_groups);
-
- fil_flush(group->space_id);
log_sys->flushed_to_disk_lsn = log_sys->write_lsn;
}
- mutex_enter(&(log_sys->mutex));
-
- group = UT_LIST_GET_FIRST(log_sys->log_groups);
-
- ut_a(group->n_pending_writes == 1);
- ut_a(log_sys->n_pending_writes == 1);
-
- group->n_pending_writes--;
- log_sys->n_pending_writes--;
+ log_write_mutex_exit();
- unlock = log_group_check_flush_completion(group);
- unlock = unlock | log_sys_check_flush_completion();
-
- log_flush_do_unlocks(unlock);
-
- write_lsn = log_sys->write_lsn;
- flush_lsn = log_sys->flushed_to_disk_lsn;
-
- mutex_exit(&(log_sys->mutex));
-
- innobase_mysql_log_notify(write_lsn, flush_lsn);
-
- return;
-
-do_waits:
- mutex_exit(&(log_sys->mutex));
+ if (flush_to_disk) {
+ log_write_flush_to_disk_low();
+ ib_uint64_t write_lsn = log_sys->write_lsn;
+ ib_uint64_t flush_lsn = log_sys->flushed_to_disk_lsn;
+ log_mutex_exit();
- switch (wait) {
- case LOG_WAIT_ONE_GROUP:
- os_event_wait(log_sys->one_flushed_event);
- break;
- case LOG_WAIT_ALL_GROUPS:
- os_event_wait(log_sys->no_flush_event);
- break;
-#ifdef UNIV_DEBUG
- case LOG_NO_WAIT:
- break;
- default:
- ut_error;
-#endif /* UNIV_DEBUG */
+ innobase_mysql_log_notify(write_lsn, flush_lsn);
}
}
-/****************************************************************//**
-Does a syncronous flush of the log buffer to disk. */
-UNIV_INTERN
+/** write to the log file up to the last log entry.
+@param[in] sync whether we want the written log
+also to be flushed to disk. */
void
-log_buffer_flush_to_disk(void)
-/*==========================*/
+log_buffer_flush_to_disk(
+ bool sync)
{
- lsn_t lsn;
-
ut_ad(!srv_read_only_mode);
- mutex_enter(&(log_sys->mutex));
-
- lsn = log_sys->lsn;
-
- mutex_exit(&(log_sys->mutex));
-
- log_write_up_to(lsn, LOG_WAIT_ALL_GROUPS, TRUE);
+ log_write_up_to(log_get_lsn(), sync);
}
/****************************************************************//**
@@ -1656,21 +1263,28 @@ This functions writes the log buffer to the log file and if 'flush'
is set it forces a flush of the log file as well. This is meant to be
called from background master thread only as it does not wait for
the write (+ possible flush) to finish. */
-UNIV_INTERN
void
log_buffer_sync_in_background(
/*==========================*/
- ibool flush) /*!< in: flush the logs to disk */
+ bool flush) /*!< in: flush the logs to disk */
{
lsn_t lsn;
- mutex_enter(&(log_sys->mutex));
+ log_mutex_enter();
lsn = log_sys->lsn;
- mutex_exit(&(log_sys->mutex));
+ if (flush
+ && log_sys->n_pending_flushes > 0
+ && log_sys->current_flush_lsn >= lsn) {
+ /* The write + flush will write enough */
+ log_mutex_exit();
+ return;
+ }
- log_write_up_to(lsn, LOG_NO_WAIT, flush);
+ log_mutex_exit();
+
+ log_write_up_to(lsn, flush);
}
/********************************************************************
@@ -1685,42 +1299,33 @@ log_flush_margin(void)
log_t* log = log_sys;
lsn_t lsn = 0;
- mutex_enter(&(log->mutex));
+ log_mutex_enter();
if (log->buf_free > log->max_buf_free) {
-
- if (log->n_pending_writes > 0) {
- /* A flush is running: hope that it will provide enough
- free space */
- } else {
- lsn = log->lsn;
- }
+ /* We can write during flush */
+ lsn = log->lsn;
}
- mutex_exit(&(log->mutex));
+ log_mutex_exit();
if (lsn) {
- log_write_up_to(lsn, LOG_NO_WAIT, FALSE);
+ log_write_up_to(lsn, false);
}
}
-/****************************************************************//**
-Advances the smallest lsn for which there are unflushed dirty blocks in the
-buffer pool. NOTE: this function may only be called if the calling thread owns
-no synchronization objects!
+/** Advances the smallest lsn for which there are unflushed dirty blocks in the
+buffer pool.
+NOTE: this function may only be called if the calling thread owns no
+synchronization objects!
+@param[in] new_oldest try to advance oldest_modified_lsn at least to
+this lsn
@return false if there was a flush batch of the same type running,
which means that we could not start this flush batch */
-static
-bool
-log_preflush_pool_modified_pages(
-/*=============================*/
- lsn_t new_oldest) /*!< in: try to advance oldest_modified_lsn
- at least to this lsn */
+static bool log_preflush_pool_modified_pages(lsn_t new_oldest)
{
bool success;
- ulint n_pages;
- if (recv_recovery_on) {
+ if (recv_recovery_is_on()) {
/* If the recovery is running, we must first apply all
log records to their respective file pages to get the
right modify lsn values to these pages: otherwise, there
@@ -1729,23 +1334,40 @@ log_preflush_pool_modified_pages(
not know how up-to-date the disk version of the database is,
and we could not make a new checkpoint on the basis of the
info on the buffer pool only. */
-
recv_apply_hashed_log_recs(true);
}
- success = buf_flush_list(ULINT_MAX, new_oldest, &n_pages);
+ if (new_oldest == LSN_MAX
+ || !buf_page_cleaner_is_active
+ || srv_is_being_started) {
- buf_flush_wait_batch_end(NULL, BUF_FLUSH_LIST);
+ ulint n_pages;
- if (!success) {
- MONITOR_INC(MONITOR_FLUSH_SYNC_WAITS);
- }
+ success = buf_flush_lists(ULINT_MAX, new_oldest, &n_pages);
- MONITOR_INC_VALUE_CUMULATIVE(
- MONITOR_FLUSH_SYNC_TOTAL_PAGE,
- MONITOR_FLUSH_SYNC_COUNT,
- MONITOR_FLUSH_SYNC_PAGES,
- n_pages);
+ buf_flush_wait_batch_end(NULL, BUF_FLUSH_LIST);
+
+ if (!success) {
+ MONITOR_INC(MONITOR_FLUSH_SYNC_WAITS);
+ }
+
+ MONITOR_INC_VALUE_CUMULATIVE(
+ MONITOR_FLUSH_SYNC_TOTAL_PAGE,
+ MONITOR_FLUSH_SYNC_COUNT,
+ MONITOR_FLUSH_SYNC_PAGES,
+ n_pages);
+ } else {
+ /* better to wait for flushed by page cleaner */
+
+ if (srv_flush_sync) {
+ /* wake page cleaner for IO burst */
+ buf_flush_request_force(new_oldest);
+ }
+
+ buf_flush_wait_flushed(new_oldest);
+
+ success = true;
+ }
return(success);
}
@@ -1757,7 +1379,7 @@ void
log_complete_checkpoint(void)
/*=========================*/
{
- ut_ad(mutex_own(&(log_sys->mutex)));
+ ut_ad(log_mutex_own());
ut_ad(log_sys->n_pending_checkpoint_writes == 0);
log_sys->next_checkpoint_no++;
@@ -1766,6 +1388,11 @@ log_complete_checkpoint(void)
MONITOR_SET(MONITOR_LSN_CHECKPOINT_AGE,
log_sys->lsn - log_sys->last_checkpoint_lsn);
+ DBUG_PRINT("ib_log", ("checkpoint ended at " LSN_PF
+ ", flushed to " LSN_PF,
+ log_sys->last_checkpoint_lsn,
+ log_sys->flushed_to_disk_lsn));
+
rw_lock_x_unlock_gen(&(log_sys->checkpoint_lock), LOG_CHECKPOINT);
}
@@ -1776,403 +1403,280 @@ void
log_io_complete_checkpoint(void)
/*============================*/
{
- mutex_enter(&(log_sys->mutex));
+ MONITOR_DEC(MONITOR_PENDING_CHECKPOINT_WRITE);
- ut_ad(log_sys->n_pending_checkpoint_writes > 0);
+ log_mutex_enter();
- log_sys->n_pending_checkpoint_writes--;
+ ut_ad(log_sys->n_pending_checkpoint_writes > 0);
- if (log_sys->n_pending_checkpoint_writes == 0) {
+ if (--log_sys->n_pending_checkpoint_writes == 0) {
log_complete_checkpoint();
}
- mutex_exit(&(log_sys->mutex));
+ log_mutex_exit();
}
-/*******************************************************************//**
-Writes info to a checkpoint about a log group. */
+/** Write checkpoint info to the log header.
+@param[in] end_lsn start LSN of the MLOG_CHECKPOINT mini-transaction */
static
void
-log_checkpoint_set_nth_group_info(
-/*==============================*/
- byte* buf, /*!< in: buffer for checkpoint info */
- ulint n, /*!< in: nth slot */
- ulint file_no,/*!< in: archived file number */
- ulint offset) /*!< in: archived file offset */
+log_group_checkpoint(lsn_t end_lsn)
{
- ut_ad(n < LOG_MAX_N_GROUPS);
-
- mach_write_to_4(buf + LOG_CHECKPOINT_GROUP_ARRAY
- + 8 * n + LOG_CHECKPOINT_ARCHIVED_FILE_NO, file_no);
- mach_write_to_4(buf + LOG_CHECKPOINT_GROUP_ARRAY
- + 8 * n + LOG_CHECKPOINT_ARCHIVED_OFFSET, offset);
-}
-
-/*******************************************************************//**
-Gets info from a checkpoint about a log group. */
-UNIV_INTERN
-void
-log_checkpoint_get_nth_group_info(
-/*==============================*/
- const byte* buf, /*!< in: buffer containing checkpoint info */
- ulint n, /*!< in: nth slot */
- ulint* file_no,/*!< out: archived file number */
- ulint* offset) /*!< out: archived file offset */
-{
- ut_ad(n < LOG_MAX_N_GROUPS);
-
- *file_no = mach_read_from_4(buf + LOG_CHECKPOINT_GROUP_ARRAY
- + 8 * n + LOG_CHECKPOINT_ARCHIVED_FILE_NO);
- *offset = mach_read_from_4(buf + LOG_CHECKPOINT_GROUP_ARRAY
- + 8 * n + LOG_CHECKPOINT_ARCHIVED_OFFSET);
-}
-
-/******************************************************//**
-Writes the checkpoint info to a log group header. */
-static
-void
-log_group_checkpoint(
-/*=================*/
- log_group_t* group) /*!< in: log group */
-{
- log_group_t* group2;
-#ifdef UNIV_LOG_ARCHIVE
- ib_uint64_t archived_lsn;
- ib_uint64_t next_archived_lsn;
-#endif /* UNIV_LOG_ARCHIVE */
lsn_t lsn_offset;
- ulint write_offset;
- ulint fold;
byte* buf;
- ulint i;
ut_ad(!srv_read_only_mode);
- ut_ad(mutex_own(&(log_sys->mutex)));
-#if LOG_CHECKPOINT_SIZE > OS_FILE_LOG_BLOCK_SIZE
-# error "LOG_CHECKPOINT_SIZE > OS_FILE_LOG_BLOCK_SIZE"
-#endif
+ ut_ad(log_mutex_own());
+ ut_ad(end_lsn == 0 || end_lsn >= log_sys->next_checkpoint_lsn);
+ ut_ad(end_lsn <= log_sys->lsn);
+ ut_ad(end_lsn + SIZE_OF_MLOG_CHECKPOINT <= log_sys->lsn
+ || srv_shutdown_state != SRV_SHUTDOWN_NONE);
+
+ DBUG_PRINT("ib_log", ("checkpoint " UINT64PF " at " LSN_PF
+ " written",
+ log_sys->next_checkpoint_no,
+ log_sys->next_checkpoint_lsn));
+
+ log_group_t* group = &log_sys->log;
buf = group->checkpoint_buf;
+ memset(buf, 0, OS_FILE_LOG_BLOCK_SIZE);
mach_write_to_8(buf + LOG_CHECKPOINT_NO, log_sys->next_checkpoint_no);
mach_write_to_8(buf + LOG_CHECKPOINT_LSN, log_sys->next_checkpoint_lsn);
- log_crypt_write_checkpoint_buf(buf);
+ if (log_sys->is_encrypted()) {
+ log_crypt_write_checkpoint_buf(buf);
+ }
lsn_offset = log_group_calc_lsn_offset(log_sys->next_checkpoint_lsn,
group);
- mach_write_to_4(buf + LOG_CHECKPOINT_OFFSET_LOW32,
- lsn_offset & 0xFFFFFFFFUL);
- mach_write_to_4(buf + LOG_CHECKPOINT_OFFSET_HIGH32,
- lsn_offset >> 32);
+ mach_write_to_8(buf + LOG_CHECKPOINT_OFFSET, lsn_offset);
+ mach_write_to_8(buf + LOG_CHECKPOINT_LOG_BUF_SIZE, log_sys->buf_size);
+ mach_write_to_8(buf + LOG_CHECKPOINT_END_LSN, end_lsn);
- mach_write_to_4(buf + LOG_CHECKPOINT_LOG_BUF_SIZE, log_sys->buf_size);
+ log_block_set_checksum(buf, log_block_calc_checksum_crc32(buf));
-#ifdef UNIV_LOG_ARCHIVE
- if (log_sys->archiving_state == LOG_ARCH_OFF) {
- archived_lsn = LSN_MAX;
- } else {
- archived_lsn = log_sys->archived_lsn;
+ MONITOR_INC(MONITOR_PENDING_CHECKPOINT_WRITE);
- if (archived_lsn != log_sys->next_archived_lsn) {
- next_archived_lsn = log_sys->next_archived_lsn;
- /* For debugging only */
- }
- }
-
- mach_write_to_8(buf + LOG_CHECKPOINT_ARCHIVED_LSN, archived_lsn);
-#else /* UNIV_LOG_ARCHIVE */
- mach_write_to_8(buf + LOG_CHECKPOINT_ARCHIVED_LSN, LSN_MAX);
-#endif /* UNIV_LOG_ARCHIVE */
-
- for (i = 0; i < LOG_MAX_N_GROUPS; i++) {
- log_checkpoint_set_nth_group_info(buf, i, 0, 0);
- }
-
- group2 = UT_LIST_GET_FIRST(log_sys->log_groups);
-
- while (group2) {
- log_checkpoint_set_nth_group_info(buf, group2->id,
-#ifdef UNIV_LOG_ARCHIVE
- group2->archived_file_no,
- group2->archived_offset
-#else /* UNIV_LOG_ARCHIVE */
- 0, 0
-#endif /* UNIV_LOG_ARCHIVE */
- );
-
- group2 = UT_LIST_GET_NEXT(log_groups, group2);
- }
-
- fold = ut_fold_binary(buf, LOG_CHECKPOINT_CHECKSUM_1);
- mach_write_to_4(buf + LOG_CHECKPOINT_CHECKSUM_1, fold);
+ log_sys->n_log_ios++;
- fold = ut_fold_binary(buf + LOG_CHECKPOINT_LSN,
- LOG_CHECKPOINT_CHECKSUM_2 - LOG_CHECKPOINT_LSN);
- mach_write_to_4(buf + LOG_CHECKPOINT_CHECKSUM_2, fold);
+ MONITOR_INC(MONITOR_LOG_IO);
- /* We alternate the physical place of the checkpoint info in the first
- log file */
+ ut_ad(LOG_CHECKPOINT_1 < univ_page_size.physical());
+ ut_ad(LOG_CHECKPOINT_2 < univ_page_size.physical());
- if ((log_sys->next_checkpoint_no & 1) == 0) {
- write_offset = LOG_CHECKPOINT_1;
- } else {
- write_offset = LOG_CHECKPOINT_2;
+ if (log_sys->n_pending_checkpoint_writes++ == 0) {
+ rw_lock_x_lock_gen(&log_sys->checkpoint_lock,
+ LOG_CHECKPOINT);
}
- if (log_do_write) {
- if (log_sys->n_pending_checkpoint_writes == 0) {
-
- rw_lock_x_lock_gen(&(log_sys->checkpoint_lock),
- LOG_CHECKPOINT);
- }
-
- log_sys->n_pending_checkpoint_writes++;
-
- log_sys->n_log_ios++;
+ /* Note: We alternate the physical place of the checkpoint info.
+ See the (next_checkpoint_no & 1) below. */
- /* We send as the last parameter the group machine address
- added with 1, as we want to distinguish between a normal log
- file write and a checkpoint field write */
+ /* We send as the last parameter the group machine address
+ added with 1, as we want to distinguish between a normal log
+ file write and a checkpoint field write */
- fil_io(OS_FILE_WRITE | OS_FILE_LOG, false, group->space_id, 0,
- write_offset / UNIV_PAGE_SIZE,
- write_offset % UNIV_PAGE_SIZE,
- OS_FILE_LOG_BLOCK_SIZE,
- buf, ((byte*) group + 1), 0);
+ fil_io(IORequestLogWrite, false,
+ page_id_t(SRV_LOG_SPACE_FIRST_ID, 0),
+ univ_page_size,
+ (log_sys->next_checkpoint_no & 1)
+ ? LOG_CHECKPOINT_2 : LOG_CHECKPOINT_1,
+ OS_FILE_LOG_BLOCK_SIZE,
+ buf, (byte*) group + 1);
- ut_ad(((ulint) group & 0x1UL) == 0);
- }
+ ut_ad(((ulint) group & 0x1UL) == 0);
}
-#endif /* !UNIV_HOTBACKUP */
-#ifdef UNIV_HOTBACKUP
-/******************************************************//**
-Writes info to a buffer of a log group when log files are created in
-backup restoration. */
-UNIV_INTERN
+/** Read a log group header page to log_sys->checkpoint_buf.
+@param[in] group log group
+@param[in] header 0 or LOG_CHEKCPOINT_1 or LOG_CHECKPOINT2 */
void
-log_reset_first_header_and_checkpoint(
-/*==================================*/
- byte* hdr_buf,/*!< in: buffer which will be written to the
- start of the first log file */
- ib_uint64_t start) /*!< in: lsn of the start of the first log file;
- we pretend that there is a checkpoint at
- start + LOG_BLOCK_HDR_SIZE */
+log_group_header_read(
+ const log_group_t* group,
+ ulint header)
{
- ulint fold;
- byte* buf;
- ib_uint64_t lsn;
-
- mach_write_to_4(hdr_buf + LOG_GROUP_ID, 0);
- mach_write_to_8(hdr_buf + LOG_FILE_START_LSN, start);
-
- lsn = start + LOG_BLOCK_HDR_SIZE;
-
- /* Write the label of mysqlbackup --restore */
- strcpy((char*) hdr_buf + LOG_FILE_WAS_CREATED_BY_HOT_BACKUP,
- "ibbackup ");
- ut_sprintf_timestamp((char*) hdr_buf
- + (LOG_FILE_WAS_CREATED_BY_HOT_BACKUP
- + (sizeof "ibbackup ") - 1));
- buf = hdr_buf + LOG_CHECKPOINT_1;
-
- mach_write_to_8(buf + LOG_CHECKPOINT_NO, 0);
- mach_write_to_8(buf + LOG_CHECKPOINT_LSN, lsn);
-
- log_crypt_write_checkpoint_buf(buf);
-
- mach_write_to_4(buf + LOG_CHECKPOINT_OFFSET_LOW32,
- LOG_FILE_HDR_SIZE + LOG_BLOCK_HDR_SIZE);
- mach_write_to_4(buf + LOG_CHECKPOINT_OFFSET_HIGH32, 0);
-
- mach_write_to_4(buf + LOG_CHECKPOINT_LOG_BUF_SIZE, 2 * 1024 * 1024);
+ ut_ad(log_mutex_own());
- mach_write_to_8(buf + LOG_CHECKPOINT_ARCHIVED_LSN, LSN_MAX);
-
- fold = ut_fold_binary(buf, LOG_CHECKPOINT_CHECKSUM_1);
- mach_write_to_4(buf + LOG_CHECKPOINT_CHECKSUM_1, fold);
+ log_sys->n_log_ios++;
- fold = ut_fold_binary(buf + LOG_CHECKPOINT_LSN,
- LOG_CHECKPOINT_CHECKSUM_2 - LOG_CHECKPOINT_LSN);
- mach_write_to_4(buf + LOG_CHECKPOINT_CHECKSUM_2, fold);
+ MONITOR_INC(MONITOR_LOG_IO);
- /* Starting from InnoDB-3.23.50, we should also write info on
- allocated size in the tablespace, but unfortunately we do not
- know it here */
+ fil_io(IORequestLogRead, true,
+ page_id_t(SRV_LOG_SPACE_FIRST_ID,
+ header / univ_page_size.physical()),
+ univ_page_size, header % univ_page_size.physical(),
+ OS_FILE_LOG_BLOCK_SIZE, log_sys->checkpoint_buf, NULL);
}
-#endif /* UNIV_HOTBACKUP */
-#ifndef UNIV_HOTBACKUP
-/******************************************************//**
-Reads a checkpoint info from a log group header to log_sys->checkpoint_buf. */
-UNIV_INTERN
+/** Write checkpoint info to the log header and invoke log_mutex_exit().
+@param[in] sync whether to wait for the write to complete
+@param[in] end_lsn start LSN of the MLOG_CHECKPOINT mini-transaction */
void
-log_group_read_checkpoint_info(
-/*===========================*/
- log_group_t* group, /*!< in: log group */
- ulint field) /*!< in: LOG_CHECKPOINT_1 or LOG_CHECKPOINT_2 */
+log_write_checkpoint_info(bool sync, lsn_t end_lsn)
{
- ut_ad(mutex_own(&(log_sys->mutex)));
+ ut_ad(log_mutex_own());
+ ut_ad(!srv_read_only_mode);
- log_sys->n_log_ios++;
+ log_group_checkpoint(end_lsn);
- fil_io(OS_FILE_READ | OS_FILE_LOG, true, group->space_id, 0,
- field / UNIV_PAGE_SIZE, field % UNIV_PAGE_SIZE,
- OS_FILE_LOG_BLOCK_SIZE, log_sys->checkpoint_buf, NULL, 0);
-}
+ log_mutex_exit();
-/******************************************************//**
-Writes checkpoint info to groups. */
-UNIV_INTERN
-void
-log_groups_write_checkpoint_info(void)
-/*==================================*/
-{
- log_group_t* group;
-
- ut_ad(mutex_own(&(log_sys->mutex)));
+ MONITOR_INC(MONITOR_NUM_CHECKPOINT);
- if (!srv_read_only_mode) {
- for (group = UT_LIST_GET_FIRST(log_sys->log_groups);
- group;
- group = UT_LIST_GET_NEXT(log_groups, group)) {
+ if (sync) {
+ /* Wait for the checkpoint write to complete */
+ rw_lock_s_lock(&log_sys->checkpoint_lock);
+ rw_lock_s_unlock(&log_sys->checkpoint_lock);
- log_group_checkpoint(group);
- }
+ DBUG_EXECUTE_IF(
+ "crash_after_checkpoint",
+ DBUG_SUICIDE(););
}
}
-/******************************************************//**
-Makes a checkpoint. Note that this function does not flush dirty
+/** Set extra data to be written to the redo log during checkpoint.
+@param[in] buf data to be appended on checkpoint, or NULL
+@return pointer to previous data to be appended on checkpoint */
+mtr_buf_t*
+log_append_on_checkpoint(
+ mtr_buf_t* buf)
+{
+ log_mutex_enter();
+ mtr_buf_t* old = log_sys->append_on_checkpoint;
+ log_sys->append_on_checkpoint = buf;
+ log_mutex_exit();
+ return(old);
+}
+
+/** Make a checkpoint. Note that this function does not flush dirty
blocks from the buffer pool: it only checks what is lsn of the oldest
modification in the pool, and writes information about the lsn in
-log files. Use log_make_checkpoint_at to flush also the pool.
-@return TRUE if success, FALSE if a checkpoint write was already running */
-UNIV_INTERN
-ibool
-log_checkpoint(
-/*===========*/
- ibool sync, /*!< in: TRUE if synchronous operation is
- desired */
- ibool write_always) /*!< in: the function normally checks if the
- the new checkpoint would have a greater
- lsn than the previous one: if not, then no
- physical write is done; by setting this
- parameter TRUE, a physical write will always be
- made to log files */
+log files. Use log_make_checkpoint() to flush also the pool.
+@param[in] sync whether to wait for the write to complete
+@return true if success, false if a checkpoint write was already running */
+bool log_checkpoint(bool sync)
{
lsn_t oldest_lsn;
ut_ad(!srv_read_only_mode);
+ DBUG_EXECUTE_IF("no_checkpoint",
+ /* We sleep for a long enough time, forcing
+ the checkpoint doesn't happen any more. */
+ os_thread_sleep(360000000););
+
if (recv_recovery_is_on()) {
recv_apply_hashed_log_recs(true);
}
- if (srv_unix_file_flush_method != SRV_UNIX_NOSYNC) {
- fil_flush_file_spaces(FIL_TABLESPACE);
+ switch (srv_file_flush_method) {
+ case SRV_NOSYNC:
+ break;
+ case SRV_O_DSYNC:
+ case SRV_FSYNC:
+ case SRV_LITTLESYNC:
+ case SRV_O_DIRECT:
+ case SRV_O_DIRECT_NO_FSYNC:
+ case SRV_ALL_O_DIRECT_FSYNC:
+ fil_flush_file_spaces(FIL_TYPE_TABLESPACE);
}
- mutex_enter(&(log_sys->mutex));
+ log_mutex_enter();
ut_ad(!recv_no_log_write);
oldest_lsn = log_buf_pool_get_oldest_modification();
- mutex_exit(&(log_sys->mutex));
-
/* Because log also contains headers and dummy log records,
- if the buffer pool contains no dirty buffers, oldest_lsn
- gets the value log_sys->lsn from the previous function,
- and we must make sure that the log is flushed up to that
- lsn. If there are dirty buffers in the buffer pool, then our
- write-ahead-logging algorithm ensures that the log has been flushed
- up to oldest_lsn. */
+ log_buf_pool_get_oldest_modification() will return log_sys->lsn
+ if the buffer pool contains no dirty buffers.
+ We must make sure that the log is flushed up to that lsn.
+ If there are dirty buffers in the buffer pool, then our
+ write-ahead-logging algorithm ensures that the log has been
+ flushed up to oldest_lsn. */
+
+ ut_ad(oldest_lsn >= log_sys->last_checkpoint_lsn);
+ if (oldest_lsn
+ > log_sys->last_checkpoint_lsn + SIZE_OF_MLOG_CHECKPOINT) {
+ /* Some log has been written since the previous checkpoint. */
+ } else if (srv_shutdown_state != SRV_SHUTDOWN_NONE) {
+ /* MariaDB 10.3 startup expects the redo log file to be
+ logically empty (not even containing a MLOG_CHECKPOINT record)
+ after a clean shutdown. Perform an extra checkpoint at
+ shutdown. */
+ } else {
+ /* Do nothing, because nothing was logged (other than
+ a MLOG_CHECKPOINT marker) since the previous checkpoint. */
+ log_mutex_exit();
+ return(true);
+ }
+ /* Repeat the MLOG_FILE_NAME records after the checkpoint, in
+ case some log records between the checkpoint and log_sys->lsn
+ need them. Finally, write a MLOG_CHECKPOINT marker. Redo log
+ apply expects to see a MLOG_CHECKPOINT after the checkpoint,
+ except on clean shutdown, where the log will be empty after
+ the checkpoint.
+ It is important that we write out the redo log before any
+ further dirty pages are flushed to the tablespace files. At
+ this point, because log_mutex_own(), mtr_commit() in other
+ threads will be blocked, and no pages can be added to the
+ flush lists. */
+ lsn_t flush_lsn = oldest_lsn;
+ const lsn_t end_lsn = log_sys->lsn;
+ const bool do_write
+ = srv_shutdown_state == SRV_SHUTDOWN_NONE
+ || flush_lsn != end_lsn;
- log_write_up_to(oldest_lsn, LOG_WAIT_ALL_GROUPS, TRUE);
+ if (fil_names_clear(flush_lsn, do_write)) {
+ ut_ad(log_sys->lsn >= end_lsn + SIZE_OF_MLOG_CHECKPOINT);
+ flush_lsn = log_sys->lsn;
+ }
- mutex_enter(&(log_sys->mutex));
+ log_mutex_exit();
- if (!write_always
- && log_sys->last_checkpoint_lsn >= oldest_lsn) {
+ log_write_up_to(flush_lsn, true);
- mutex_exit(&(log_sys->mutex));
+ log_mutex_enter();
- return(TRUE);
- }
+ ut_ad(log_sys->flushed_to_disk_lsn >= flush_lsn);
+ ut_ad(flush_lsn >= oldest_lsn);
- ut_ad(log_sys->flushed_to_disk_lsn >= oldest_lsn);
+ if (log_sys->last_checkpoint_lsn >= oldest_lsn) {
+ log_mutex_exit();
+ return(true);
+ }
if (log_sys->n_pending_checkpoint_writes > 0) {
/* A checkpoint write is running */
-
- mutex_exit(&(log_sys->mutex));
+ log_mutex_exit();
if (sync) {
/* Wait for the checkpoint write to complete */
- rw_lock_s_lock(&(log_sys->checkpoint_lock));
- rw_lock_s_unlock(&(log_sys->checkpoint_lock));
+ rw_lock_s_lock(&log_sys->checkpoint_lock);
+ rw_lock_s_unlock(&log_sys->checkpoint_lock);
}
- return(FALSE);
+ return(false);
}
log_sys->next_checkpoint_lsn = oldest_lsn;
-#ifdef UNIV_DEBUG
- if (log_debug_writes) {
- fprintf(stderr, "Making checkpoint no "
- LSN_PF " at lsn " LSN_PF "\n",
- log_sys->next_checkpoint_no,
- oldest_lsn);
- }
-#endif /* UNIV_DEBUG */
-
- /* generate key version and key used to encrypt future blocks,
- *
- * NOTE: the +1 is as the next_checkpoint_no will be updated once
- * the checkpoint info has been written and THEN blocks will be encrypted
- * with new key
- */
- log_crypt_set_ver_and_key(log_sys->next_checkpoint_no + 1);
- log_groups_write_checkpoint_info();
+ log_write_checkpoint_info(sync, end_lsn);
+ ut_ad(!log_mutex_own());
- MONITOR_INC(MONITOR_NUM_CHECKPOINT);
-
- mutex_exit(&(log_sys->mutex));
-
- if (sync) {
- /* Wait for the checkpoint write to complete */
- rw_lock_s_lock(&(log_sys->checkpoint_lock));
- rw_lock_s_unlock(&(log_sys->checkpoint_lock));
- }
-
- return(TRUE);
+ return(true);
}
-/****************************************************************//**
-Makes a checkpoint at a given lsn or later. */
-UNIV_INTERN
-void
-log_make_checkpoint_at(
-/*===================*/
- lsn_t lsn, /*!< in: make a checkpoint at this or a
- later lsn, if LSN_MAX, makes
- a checkpoint at the latest lsn */
- ibool write_always) /*!< in: the function normally checks if
- the new checkpoint would have a
- greater lsn than the previous one: if
- not, then no physical write is done;
- by setting this parameter TRUE, a
- physical write will always be made to
- log files */
+/** Make a checkpoint */
+void log_make_checkpoint()
{
/* Preflush pages synchronously */
- while (!log_preflush_pool_modified_pages(lsn)) {
+ while (!log_preflush_pool_modified_pages(LSN_MAX)) {
/* Flush as much as we can */
}
- while (!log_checkpoint(TRUE, write_always)) {
+ while (!log_checkpoint(true)) {
/* Force a checkpoint */
}
}
@@ -2192,20 +1696,15 @@ log_checkpoint_margin(void)
lsn_t checkpoint_age;
ib_uint64_t advance;
lsn_t oldest_lsn;
- ibool checkpoint_sync;
- ibool do_checkpoint;
bool success;
loop:
- checkpoint_sync = FALSE;
- do_checkpoint = FALSE;
advance = 0;
- mutex_enter(&(log->mutex));
+ log_mutex_enter();
ut_ad(!recv_no_log_write);
- if (log->check_flush_or_checkpoint == FALSE) {
- mutex_exit(&(log->mutex));
-
+ if (!log->check_flush_or_checkpoint) {
+ log_mutex_exit();
return;
}
@@ -2216,29 +1715,30 @@ loop:
if (age > log->max_modified_age_sync) {
/* A flush is urgent: we have to do a synchronous preflush */
- advance = 2 * (age - log->max_modified_age_sync);
+ advance = age - log->max_modified_age_sync;
}
checkpoint_age = log->lsn - log->last_checkpoint_lsn;
+ bool checkpoint_sync;
+ bool do_checkpoint;
+
if (checkpoint_age > log->max_checkpoint_age) {
/* A checkpoint is urgent: we do it synchronously */
-
- checkpoint_sync = TRUE;
-
- do_checkpoint = TRUE;
-
+ checkpoint_sync = true;
+ do_checkpoint = true;
} else if (checkpoint_age > log->max_checkpoint_age_async) {
/* A checkpoint is not urgent: do it asynchronously */
-
- do_checkpoint = TRUE;
-
- log->check_flush_or_checkpoint = FALSE;
+ do_checkpoint = true;
+ checkpoint_sync = false;
+ log->check_flush_or_checkpoint = false;
} else {
- log->check_flush_or_checkpoint = FALSE;
+ do_checkpoint = false;
+ checkpoint_sync = false;
+ log->check_flush_or_checkpoint = false;
}
- mutex_exit(&(log->mutex));
+ log_mutex_exit();
if (advance) {
lsn_t new_oldest = oldest_lsn + advance;
@@ -2249,17 +1749,17 @@ loop:
and can proceed. If it did not succeed, there was another
thread doing a flush at the same time. */
if (!success) {
- mutex_enter(&(log->mutex));
+ log_mutex_enter();
- log->check_flush_or_checkpoint = TRUE;
+ log->check_flush_or_checkpoint = true;
- mutex_exit(&(log->mutex));
+ log_mutex_exit();
goto loop;
}
}
if (do_checkpoint) {
- log_checkpoint(checkpoint_sync, FALSE);
+ log_checkpoint(checkpoint_sync);
if (checkpoint_sync) {
@@ -2268,928 +1768,24 @@ loop:
}
}
-/******************************************************//**
-Reads a specified log segment to a buffer. */
-UNIV_INTERN
-void
-log_group_read_log_seg(
-/*===================*/
- ulint type, /*!< in: LOG_ARCHIVE or LOG_RECOVER */
- byte* buf, /*!< in: buffer where to read */
- log_group_t* group, /*!< in: log group */
- lsn_t start_lsn, /*!< in: read area start */
- lsn_t end_lsn) /*!< in: read area end */
-{
- ulint len;
- lsn_t source_offset;
- bool sync;
-
- ut_ad(mutex_own(&(log_sys->mutex)));
-
- sync = (type == LOG_RECOVER);
-loop:
- source_offset = log_group_calc_lsn_offset(start_lsn, group);
-
- ut_a(end_lsn - start_lsn <= ULINT_MAX);
- len = (ulint) (end_lsn - start_lsn);
-
- ut_ad(len != 0);
-
- if ((source_offset % group->file_size) + len > group->file_size) {
-
- /* If the above condition is true then len (which is ulint)
- is > the expression below, so the typecast is ok */
- len = (ulint) (group->file_size -
- (source_offset % group->file_size));
- }
-
-#ifdef UNIV_LOG_ARCHIVE
- if (type == LOG_ARCHIVE) {
-
- log_sys->n_pending_archive_ios++;
- }
-#endif /* UNIV_LOG_ARCHIVE */
-
- log_sys->n_log_ios++;
-
- ut_a(source_offset / UNIV_PAGE_SIZE <= ULINT_MAX);
-
- fil_io(OS_FILE_READ | OS_FILE_LOG, sync, group->space_id, 0,
- (ulint) (source_offset / UNIV_PAGE_SIZE),
- (ulint) (source_offset % UNIV_PAGE_SIZE),
- len, buf, NULL, 0);
-
-#ifdef DEBUG_CRYPT
- fprintf(stderr, "BEFORE DECRYPT: block: %lu checkpoint: %lu %.8lx %.8lx offset %lu\n",
- log_block_get_hdr_no(buf),
- log_block_get_checkpoint_no(buf),
- log_block_calc_checksum(buf),
- log_block_get_checksum(buf), source_offset);
-#endif
-
- log_decrypt_after_read(buf, start_lsn, len);
-
-#ifdef DEBUG_CRYPT
- fprintf(stderr, "AFTER DECRYPT: block: %lu checkpoint: %lu %.8lx %.8lx\n",
- log_block_get_hdr_no(buf),
- log_block_get_checkpoint_no(buf),
- log_block_calc_checksum(buf),
- log_block_get_checksum(buf));
-#endif
-
- start_lsn += len;
- buf += len;
-
- if (recv_sys->report(time(NULL))) {
- ib_logf(IB_LOG_LEVEL_INFO, "Read redo log up to LSN=" LSN_PF,
- start_lsn);
- service_manager_extend_timeout(INNODB_EXTEND_TIMEOUT_INTERVAL,
- "Read redo log up to LSN=" LSN_PF,
- start_lsn);
- }
-
- if (start_lsn != end_lsn) {
-
- goto loop;
- }
-}
-
-#ifdef UNIV_LOG_ARCHIVE
-/******************************************************//**
-Generates an archived log file name. */
-UNIV_INTERN
-void
-log_archived_file_name_gen(
-/*=======================*/
- char* buf, /*!< in: buffer where to write */
- ulint id MY_ATTRIBUTE((unused)),
- /*!< in: group id;
- currently we only archive the first group */
- ulint file_no)/*!< in: file number */
-{
- sprintf(buf, "%sib_arch_log_%010lu", srv_arch_dir, (ulong) file_no);
-}
-
-/******************************************************//**
-Writes a log file header to a log file space. */
-static
-void
-log_group_archive_file_header_write(
-/*================================*/
- log_group_t* group, /*!< in: log group */
- ulint nth_file, /*!< in: header to the nth file in the
- archive log file space */
- ulint file_no, /*!< in: archived file number */
- ib_uint64_t start_lsn) /*!< in: log file data starts at this
- lsn */
-{
- byte* buf;
- ulint dest_offset;
-
- ut_ad(mutex_own(&(log_sys->mutex)));
-
- ut_a(nth_file < group->n_files);
-
- buf = *(group->archive_file_header_bufs + nth_file);
-
- mach_write_to_4(buf + LOG_GROUP_ID, group->id);
- mach_write_to_8(buf + LOG_FILE_START_LSN, start_lsn);
- mach_write_to_4(buf + LOG_FILE_NO, file_no);
-
- mach_write_to_4(buf + LOG_FILE_ARCH_COMPLETED, FALSE);
-
- dest_offset = nth_file * group->file_size;
-
- log_sys->n_log_ios++;
-
- fil_io(OS_FILE_WRITE | OS_FILE_LOG, true, group->archive_space_id,
- dest_offset / UNIV_PAGE_SIZE,
- dest_offset % UNIV_PAGE_SIZE,
- 2 * OS_FILE_LOG_BLOCK_SIZE,
- buf, &log_archive_io, 0);
-}
-
-/******************************************************//**
-Writes a log file header to a completed archived log file. */
-static
-void
-log_group_archive_completed_header_write(
-/*=====================================*/
- log_group_t* group, /*!< in: log group */
- ulint nth_file, /*!< in: header to the nth file in the
- archive log file space */
- ib_uint64_t end_lsn) /*!< in: end lsn of the file */
-{
- byte* buf;
- ulint dest_offset;
-
- ut_ad(mutex_own(&(log_sys->mutex)));
- ut_a(nth_file < group->n_files);
-
- buf = *(group->archive_file_header_bufs + nth_file);
-
- mach_write_to_4(buf + LOG_FILE_ARCH_COMPLETED, TRUE);
- mach_write_to_8(buf + LOG_FILE_END_LSN, end_lsn);
-
- dest_offset = nth_file * group->file_size + LOG_FILE_ARCH_COMPLETED;
-
- log_sys->n_log_ios++;
-
- fil_io(OS_FILE_WRITE | OS_FILE_LOG, true, group->archive_space_id,
- dest_offset / UNIV_PAGE_SIZE,
- dest_offset % UNIV_PAGE_SIZE,
- OS_FILE_LOG_BLOCK_SIZE,
- buf + LOG_FILE_ARCH_COMPLETED,
- &log_archive_io, 0);
-}
-
-/******************************************************//**
-Does the archive writes for a single log group. */
-static
-void
-log_group_archive(
-/*==============*/
- log_group_t* group) /*!< in: log group */
-{
- os_file_t file_handle;
- lsn_t start_lsn;
- lsn_t end_lsn;
- char name[1024];
- byte* buf;
- ulint len;
- ibool ret;
- lsn_t next_offset;
- ulint n_files;
- ulint open_mode;
-
- ut_ad(mutex_own(&(log_sys->mutex)));
-
- start_lsn = log_sys->archived_lsn;
-
- ut_a(start_lsn % OS_FILE_LOG_BLOCK_SIZE == 0);
-
- end_lsn = log_sys->next_archived_lsn;
-
- ut_a(end_lsn % OS_FILE_LOG_BLOCK_SIZE == 0);
-
- buf = log_sys->archive_buf;
-
- n_files = 0;
-
- next_offset = group->archived_offset;
-loop:
- if ((next_offset % group->file_size == 0)
- || (fil_space_get_size(group->archive_space_id) == 0)) {
-
- /* Add the file to the archive file space; create or open the
- file */
-
- if (next_offset % group->file_size == 0) {
- open_mode = OS_FILE_CREATE;
- } else {
- open_mode = OS_FILE_OPEN;
- }
-
- log_archived_file_name_gen(name, group->id,
- group->archived_file_no + n_files);
-
- file_handle = os_file_create(innodb_file_log_key,
- name, open_mode,
- OS_FILE_AIO,
- OS_DATA_FILE, &ret);
-
- if (!ret && (open_mode == OS_FILE_CREATE)) {
- file_handle = os_file_create(
- innodb_file_log_key, name, OS_FILE_OPEN,
- OS_FILE_AIO, OS_DATA_FILE, &ret);
- }
-
- if (!ret) {
- fprintf(stderr,
- "InnoDB: Cannot create or open"
- " archive log file %s.\n"
- "InnoDB: Cannot continue operation.\n"
- "InnoDB: Check that the log archive"
- " directory exists,\n"
- "InnoDB: you have access rights to it, and\n"
- "InnoDB: there is space available.\n", name);
- exit(1);
- }
-
-#ifdef UNIV_DEBUG
- if (log_debug_writes) {
- fprintf(stderr, "Created archive file %s\n", name);
- }
-#endif /* UNIV_DEBUG */
-
- ret = os_file_close(file_handle);
-
- ut_a(ret);
-
- /* Add the archive file as a node to the space */
-
- fil_node_create(name, group->file_size / UNIV_PAGE_SIZE,
- group->archive_space_id, FALSE);
-
- if (next_offset % group->file_size == 0) {
- log_group_archive_file_header_write(
- group, n_files,
- group->archived_file_no + n_files,
- start_lsn);
-
- next_offset += LOG_FILE_HDR_SIZE;
- }
- }
-
- len = end_lsn - start_lsn;
-
- if (group->file_size < (next_offset % group->file_size) + len) {
-
- len = group->file_size - (next_offset % group->file_size);
- }
-
-#ifdef UNIV_DEBUG
- if (log_debug_writes) {
- fprintf(stderr,
- "Archiving starting at lsn " LSN_PF ", len %lu"
- " to group %lu\n",
- start_lsn,
- (ulong) len, (ulong) group->id);
- }
-#endif /* UNIV_DEBUG */
-
- log_sys->n_pending_archive_ios++;
-
- log_sys->n_log_ios++;
-
- //TODO (jonaso): This must be dead code??
- log_encrypt_before_write(log_sys->next_checkpoint_no,
- buf, start_lsn, len);
-
- fil_io(OS_FILE_WRITE | OS_FILE_LOG, false, group->archive_space_id,
- (ulint) (next_offset / UNIV_PAGE_SIZE),
- (ulint) (next_offset % UNIV_PAGE_SIZE),
- ut_calc_align(len, OS_FILE_LOG_BLOCK_SIZE), buf,
- &log_archive_io, 0);
-
- start_lsn += len;
- next_offset += len;
- buf += len;
-
- if (next_offset % group->file_size == 0) {
- n_files++;
- }
-
- if (end_lsn != start_lsn) {
-
- goto loop;
- }
-
- group->next_archived_file_no = group->archived_file_no + n_files;
- group->next_archived_offset = next_offset % group->file_size;
-
- ut_a(group->next_archived_offset % OS_FILE_LOG_BLOCK_SIZE == 0);
-}
-
-/*****************************************************//**
-(Writes to the archive of each log group.) Currently, only the first
-group is archived. */
-static
-void
-log_archive_groups(void)
-/*====================*/
-{
- log_group_t* group;
-
- ut_ad(mutex_own(&(log_sys->mutex)));
-
- group = UT_LIST_GET_FIRST(log_sys->log_groups);
-
- log_group_archive(group);
-}
-
-/*****************************************************//**
-Completes the archiving write phase for (each log group), currently,
-the first log group. */
-static
-void
-log_archive_write_complete_groups(void)
-/*===================================*/
-{
- log_group_t* group;
- ulint end_offset;
- ulint trunc_files;
- ulint n_files;
- ib_uint64_t start_lsn;
- ib_uint64_t end_lsn;
- ulint i;
-
- ut_ad(mutex_own(&(log_sys->mutex)));
-
- group = UT_LIST_GET_FIRST(log_sys->log_groups);
-
- group->archived_file_no = group->next_archived_file_no;
- group->archived_offset = group->next_archived_offset;
-
- /* Truncate from the archive file space all but the last
- file, or if it has been written full, all files */
-
- n_files = (UNIV_PAGE_SIZE
- * fil_space_get_size(group->archive_space_id))
- / group->file_size;
- ut_ad(n_files > 0);
-
- end_offset = group->archived_offset;
-
- if (end_offset % group->file_size == 0) {
-
- trunc_files = n_files;
- } else {
- trunc_files = n_files - 1;
- }
-
-#ifdef UNIV_DEBUG
- if (log_debug_writes && trunc_files) {
- fprintf(stderr,
- "Complete file(s) archived to group %lu\n",
- (ulong) group->id);
- }
-#endif /* UNIV_DEBUG */
-
- /* Calculate the archive file space start lsn */
- start_lsn = log_sys->next_archived_lsn
- - (end_offset - LOG_FILE_HDR_SIZE + trunc_files
- * (group->file_size - LOG_FILE_HDR_SIZE));
- end_lsn = start_lsn;
-
- for (i = 0; i < trunc_files; i++) {
-
- end_lsn += group->file_size - LOG_FILE_HDR_SIZE;
-
- /* Write a notice to the headers of archived log
- files that the file write has been completed */
-
- log_group_archive_completed_header_write(group, i, end_lsn);
- }
-
- fil_space_truncate_start(group->archive_space_id,
- trunc_files * group->file_size);
-
-#ifdef UNIV_DEBUG
- if (log_debug_writes) {
- fputs("Archiving writes completed\n", stderr);
- }
-#endif /* UNIV_DEBUG */
-}
-
-/******************************************************//**
-Completes an archiving i/o. */
-static
-void
-log_archive_check_completion_low(void)
-/*==================================*/
-{
- ut_ad(mutex_own(&(log_sys->mutex)));
-
- if (log_sys->n_pending_archive_ios == 0
- && log_sys->archiving_phase == LOG_ARCHIVE_READ) {
-
-#ifdef UNIV_DEBUG
- if (log_debug_writes) {
- fputs("Archiving read completed\n", stderr);
- }
-#endif /* UNIV_DEBUG */
-
- /* Archive buffer has now been read in: start archive writes */
-
- log_sys->archiving_phase = LOG_ARCHIVE_WRITE;
-
- log_archive_groups();
- }
-
- if (log_sys->n_pending_archive_ios == 0
- && log_sys->archiving_phase == LOG_ARCHIVE_WRITE) {
-
- log_archive_write_complete_groups();
-
- log_sys->archived_lsn = log_sys->next_archived_lsn;
-
- rw_lock_x_unlock_gen(&(log_sys->archive_lock), LOG_ARCHIVE);
- }
-}
-
-/******************************************************//**
-Completes an archiving i/o. */
-static
-void
-log_io_complete_archive(void)
-/*=========================*/
-{
- log_group_t* group;
-
- mutex_enter(&(log_sys->mutex));
-
- group = UT_LIST_GET_FIRST(log_sys->log_groups);
-
- mutex_exit(&(log_sys->mutex));
-
- fil_flush(group->archive_space_id);
-
- mutex_enter(&(log_sys->mutex));
-
- ut_ad(log_sys->n_pending_archive_ios > 0);
-
- log_sys->n_pending_archive_ios--;
-
- log_archive_check_completion_low();
-
- mutex_exit(&(log_sys->mutex));
-}
-
-/********************************************************************//**
-Starts an archiving operation.
-@return TRUE if succeed, FALSE if an archiving operation was already running */
-UNIV_INTERN
-ibool
-log_archive_do(
-/*===========*/
- ibool sync, /*!< in: TRUE if synchronous operation is desired */
- ulint* n_bytes)/*!< out: archive log buffer size, 0 if nothing to
- archive */
-{
- ibool calc_new_limit;
- ib_uint64_t start_lsn;
- ib_uint64_t limit_lsn;
-
- calc_new_limit = TRUE;
-loop:
- mutex_enter(&(log_sys->mutex));
-
- switch (log_sys->archiving_state) {
- case LOG_ARCH_OFF:
-arch_none:
- mutex_exit(&(log_sys->mutex));
-
- *n_bytes = 0;
-
- return(TRUE);
- case LOG_ARCH_STOPPED:
- case LOG_ARCH_STOPPING2:
- mutex_exit(&(log_sys->mutex));
-
- os_event_wait(log_sys->archiving_on);
-
- goto loop;
- }
-
- start_lsn = log_sys->archived_lsn;
-
- if (calc_new_limit) {
- ut_a(log_sys->archive_buf_size % OS_FILE_LOG_BLOCK_SIZE == 0);
- limit_lsn = start_lsn + log_sys->archive_buf_size;
-
- *n_bytes = log_sys->archive_buf_size;
-
- if (limit_lsn >= log_sys->lsn) {
-
- limit_lsn = ut_uint64_align_down(
- log_sys->lsn, OS_FILE_LOG_BLOCK_SIZE);
- }
- }
-
- if (log_sys->archived_lsn >= limit_lsn) {
-
- goto arch_none;
- }
-
- if (log_sys->written_to_all_lsn < limit_lsn) {
-
- mutex_exit(&(log_sys->mutex));
-
- log_write_up_to(limit_lsn, LOG_WAIT_ALL_GROUPS, TRUE);
-
- calc_new_limit = FALSE;
-
- goto loop;
- }
-
- if (log_sys->n_pending_archive_ios > 0) {
- /* An archiving operation is running */
-
- mutex_exit(&(log_sys->mutex));
-
- if (sync) {
- rw_lock_s_lock(&(log_sys->archive_lock));
- rw_lock_s_unlock(&(log_sys->archive_lock));
- }
-
- *n_bytes = log_sys->archive_buf_size;
-
- return(FALSE);
- }
-
- rw_lock_x_lock_gen(&(log_sys->archive_lock), LOG_ARCHIVE);
-
- log_sys->archiving_phase = LOG_ARCHIVE_READ;
-
- log_sys->next_archived_lsn = limit_lsn;
-
-#ifdef UNIV_DEBUG
- if (log_debug_writes) {
- fprintf(stderr,
- "Archiving from lsn " LSN_PF " to lsn " LSN_PF "\n",
- log_sys->archived_lsn, limit_lsn);
- }
-#endif /* UNIV_DEBUG */
-
- /* Read the log segment to the archive buffer */
-
- log_group_read_log_seg(LOG_ARCHIVE, log_sys->archive_buf,
- UT_LIST_GET_FIRST(log_sys->log_groups),
- start_lsn, limit_lsn);
-
- mutex_exit(&(log_sys->mutex));
-
- if (sync) {
- rw_lock_s_lock(&(log_sys->archive_lock));
- rw_lock_s_unlock(&(log_sys->archive_lock));
- }
-
- *n_bytes = log_sys->archive_buf_size;
-
- return(TRUE);
-}
-
-/****************************************************************//**
-Writes the log contents to the archive at least up to the lsn when this
-function was called. */
-static
-void
-log_archive_all(void)
-/*=================*/
-{
- ib_uint64_t present_lsn;
- ulint dummy;
-
- mutex_enter(&(log_sys->mutex));
-
- if (log_sys->archiving_state == LOG_ARCH_OFF) {
- mutex_exit(&(log_sys->mutex));
-
- return;
- }
-
- present_lsn = log_sys->lsn;
-
- mutex_exit(&(log_sys->mutex));
-
- log_pad_current_log_block();
-
- for (;;) {
- mutex_enter(&(log_sys->mutex));
-
- if (present_lsn <= log_sys->archived_lsn) {
-
- mutex_exit(&(log_sys->mutex));
-
- return;
- }
-
- mutex_exit(&(log_sys->mutex));
-
- log_archive_do(TRUE, &dummy);
- }
-}
-
-/*****************************************************//**
-Closes the possible open archive log file (for each group) the first group,
-and if it was open, increments the group file count by 2, if desired. */
-static
-void
-log_archive_close_groups(
-/*=====================*/
- ibool increment_file_count) /*!< in: TRUE if we want to increment
- the file count */
-{
- log_group_t* group;
- ulint trunc_len;
-
- ut_ad(mutex_own(&(log_sys->mutex)));
-
- if (log_sys->archiving_state == LOG_ARCH_OFF) {
-
- return;
- }
-
- group = UT_LIST_GET_FIRST(log_sys->log_groups);
-
- trunc_len = UNIV_PAGE_SIZE
- * fil_space_get_size(group->archive_space_id);
- if (trunc_len > 0) {
- ut_a(trunc_len == group->file_size);
-
- /* Write a notice to the headers of archived log
- files that the file write has been completed */
-
- log_group_archive_completed_header_write(
- group, 0, log_sys->archived_lsn);
-
- fil_space_truncate_start(group->archive_space_id,
- trunc_len);
- if (increment_file_count) {
- group->archived_offset = 0;
- group->archived_file_no += 2;
- }
-
-#ifdef UNIV_DEBUG
- if (log_debug_writes) {
- fprintf(stderr,
- "Incrementing arch file no to %lu"
- " in log group %lu\n",
- (ulong) group->archived_file_no + 2,
- (ulong) group->id);
- }
-#endif /* UNIV_DEBUG */
- }
-}
-
-/****************************************************************//**
-Writes the log contents to the archive up to the lsn when this function was
-called, and stops the archiving. When archiving is started again, the archived
-log file numbers start from 2 higher, so that the archiving will not write
-again to the archived log files which exist when this function returns.
-@return DB_SUCCESS or DB_ERROR */
-UNIV_INTERN
-ulint
-log_archive_stop(void)
-/*==================*/
-{
- ibool success;
-
- mutex_enter(&(log_sys->mutex));
-
- if (log_sys->archiving_state != LOG_ARCH_ON) {
-
- mutex_exit(&(log_sys->mutex));
-
- return(DB_ERROR);
- }
-
- log_sys->archiving_state = LOG_ARCH_STOPPING;
-
- mutex_exit(&(log_sys->mutex));
-
- log_archive_all();
-
- mutex_enter(&(log_sys->mutex));
-
- log_sys->archiving_state = LOG_ARCH_STOPPING2;
- os_event_reset(log_sys->archiving_on);
-
- mutex_exit(&(log_sys->mutex));
-
- /* Wait for a possible archiving operation to end */
-
- rw_lock_s_lock(&(log_sys->archive_lock));
- rw_lock_s_unlock(&(log_sys->archive_lock));
-
- mutex_enter(&(log_sys->mutex));
-
- /* Close all archived log files, incrementing the file count by 2,
- if appropriate */
-
- log_archive_close_groups(TRUE);
-
- mutex_exit(&(log_sys->mutex));
-
- /* Make a checkpoint, so that if recovery is needed, the file numbers
- of new archived log files will start from the right value */
-
- success = FALSE;
-
- while (!success) {
- success = log_checkpoint(TRUE, TRUE);
- }
-
- mutex_enter(&(log_sys->mutex));
-
- log_sys->archiving_state = LOG_ARCH_STOPPED;
-
- mutex_exit(&(log_sys->mutex));
-
- return(DB_SUCCESS);
-}
-
-/****************************************************************//**
-Starts again archiving which has been stopped.
-@return DB_SUCCESS or DB_ERROR */
-UNIV_INTERN
-ulint
-log_archive_start(void)
-/*===================*/
-{
- mutex_enter(&(log_sys->mutex));
-
- if (log_sys->archiving_state != LOG_ARCH_STOPPED) {
-
- mutex_exit(&(log_sys->mutex));
-
- return(DB_ERROR);
- }
-
- log_sys->archiving_state = LOG_ARCH_ON;
-
- os_event_set(log_sys->archiving_on);
-
- mutex_exit(&(log_sys->mutex));
-
- return(DB_SUCCESS);
-}
-
-/****************************************************************//**
-Stop archiving the log so that a gap may occur in the archived log files.
-@return DB_SUCCESS or DB_ERROR */
-UNIV_INTERN
-ulint
-log_archive_noarchivelog(void)
-/*==========================*/
-{
-loop:
- mutex_enter(&(log_sys->mutex));
-
- if (log_sys->archiving_state == LOG_ARCH_STOPPED
- || log_sys->archiving_state == LOG_ARCH_OFF) {
-
- log_sys->archiving_state = LOG_ARCH_OFF;
-
- os_event_set(log_sys->archiving_on);
-
- mutex_exit(&(log_sys->mutex));
-
- return(DB_SUCCESS);
- }
-
- mutex_exit(&(log_sys->mutex));
-
- log_archive_stop();
-
- os_thread_sleep(500000);
-
- goto loop;
-}
-
-/****************************************************************//**
-Start archiving the log so that a gap may occur in the archived log files.
-@return DB_SUCCESS or DB_ERROR */
-UNIV_INTERN
-ulint
-log_archive_archivelog(void)
-/*========================*/
-{
- mutex_enter(&(log_sys->mutex));
-
- if (log_sys->archiving_state == LOG_ARCH_OFF) {
-
- log_sys->archiving_state = LOG_ARCH_ON;
-
- log_sys->archived_lsn
- = ut_uint64_align_down(log_sys->lsn,
- OS_FILE_LOG_BLOCK_SIZE);
- mutex_exit(&(log_sys->mutex));
-
- return(DB_SUCCESS);
- }
-
- mutex_exit(&(log_sys->mutex));
-
- return(DB_ERROR);
-}
-
-/****************************************************************//**
-Tries to establish a big enough margin of free space in the log groups, such
-that a new log entry can be catenated without an immediate need for
-archiving. */
-static
-void
-log_archive_margin(void)
-/*====================*/
-{
- log_t* log = log_sys;
- ulint age;
- ibool sync;
- ulint dummy;
-loop:
- mutex_enter(&(log->mutex));
-
- if (log->archiving_state == LOG_ARCH_OFF) {
- mutex_exit(&(log->mutex));
-
- return;
- }
-
- age = log->lsn - log->archived_lsn;
-
- if (age > log->max_archived_lsn_age) {
-
- /* An archiving is urgent: we have to do synchronous i/o */
-
- sync = TRUE;
-
- } else if (age > log->max_archived_lsn_age_async) {
-
- /* An archiving is not urgent: we do asynchronous i/o */
-
- sync = FALSE;
- } else {
- /* No archiving required yet */
-
- mutex_exit(&(log->mutex));
-
- return;
- }
-
- mutex_exit(&(log->mutex));
-
- log_archive_do(sync, &dummy);
-
- if (sync == TRUE) {
- /* Check again that enough was written to the archive */
-
- goto loop;
- }
-}
-#endif /* UNIV_LOG_ARCHIVE */
-
-/********************************************************************//**
+/**
Checks that there is enough free space in the log to start a new query step.
Flushes the log buffer or makes a new checkpoint if necessary. NOTE: this
function may only be called if the calling thread owns no synchronization
objects! */
-UNIV_INTERN
void
log_check_margins(void)
-/*===================*/
{
-loop:
- log_flush_margin();
-
- log_checkpoint_margin();
-
-#ifdef UNIV_LOG_ARCHIVE
- log_archive_margin();
-#endif /* UNIV_LOG_ARCHIVE */
-
- mutex_enter(&(log_sys->mutex));
- ut_ad(!recv_no_log_write);
-
- if (log_sys->check_flush_or_checkpoint) {
-
- mutex_exit(&(log_sys->mutex));
-
- goto loop;
- }
+ bool check;
- mutex_exit(&(log_sys->mutex));
+ do {
+ log_flush_margin();
+ log_checkpoint_margin();
+ log_mutex_enter();
+ ut_ad(!recv_no_log_write);
+ check = log_sys->check_flush_or_checkpoint;
+ log_mutex_exit();
+ } while (check);
}
/****************************************************************//**
@@ -3197,32 +1793,43 @@ Makes a checkpoint at the latest lsn and writes it to first page of each
data file in the database, so that we know that the file spaces contain
all modifications up to that lsn. This can only be called at database
shutdown. This function also writes all log in log files to the log archive. */
-UNIV_INTERN
void
logs_empty_and_mark_files_at_shutdown(void)
/*=======================================*/
{
lsn_t lsn;
-#ifdef UNIV_LOG_ARCHIVE
- ulint arch_log_no;
-#endif
ulint count = 0;
- ulint pending_io;
- ibool server_busy;
- ib_logf(IB_LOG_LEVEL_INFO, "Starting shutdown...");
+ ib::info() << "Starting shutdown...";
/* Wait until the master thread and all other operations are idle: our
algorithm only works if the server is idle at shutdown */
srv_shutdown_state = SRV_SHUTDOWN_CLEANUP;
loop:
+ ut_ad(lock_sys || !srv_was_started);
+ ut_ad(log_sys || !srv_was_started);
+ ut_ad(fil_system || !srv_was_started);
+ os_event_set(srv_buf_resize_event);
+
if (!srv_read_only_mode) {
os_event_set(srv_error_event);
os_event_set(srv_monitor_event);
os_event_set(srv_buf_dump_event);
- os_event_set(lock_sys->timeout_event);
- os_event_set(dict_stats_event);
+ if (lock_sys) {
+ os_event_set(lock_sys->timeout_event);
+ }
+ if (dict_stats_event) {
+ os_event_set(dict_stats_event);
+ } else {
+ ut_ad(!srv_dict_stats_thread_active);
+ }
+ if (recv_sys && recv_sys->flush_start) {
+ /* This is in case recv_writer_thread was never
+ started, or buf_flush_page_cleaner_coordinator
+ failed to notice its termination. */
+ os_event_set(recv_sys->flush_start);
+ }
}
#define COUNT_INTERVAL 600U
#define CHECK_INTERVAL 100000U
@@ -3238,14 +1845,14 @@ loop:
if (ulint total_trx = srv_was_started && !srv_read_only_mode
&& srv_force_recovery < SRV_FORCE_NO_TRX_UNDO
? trx_sys_any_active_transactions() : 0) {
+
if (srv_print_verbose_log && count > COUNT_INTERVAL) {
service_manager_extend_timeout(
COUNT_INTERVAL * CHECK_INTERVAL/1000000 * 2,
"Waiting for %lu active transactions to finish",
(ulong) total_trx);
- ib_logf(IB_LOG_LEVEL_INFO,
- "Waiting for %lu active transactions to finish",
- (ulong) total_trx);
+ ib::info() << "Waiting for " << total_trx << " active"
+ << " transactions to finish";
count = 0;
}
@@ -3260,12 +1867,16 @@ loop:
thread_name = "srv_error_monitor_thread";
} else if (srv_monitor_active) {
thread_name = "srv_monitor_thread";
+ } else if (srv_buf_resize_thread_active) {
+ thread_name = "buf_resize_thread";
+ goto wait_suspend_loop;
} else if (srv_dict_stats_thread_active) {
thread_name = "dict_stats_thread";
- } else if (lock_sys->timeout_thread_active) {
+ } else if (lock_sys && lock_sys->timeout_thread_active) {
thread_name = "lock_wait_timeout_thread";
} else if (srv_buf_dump_thread_active) {
thread_name = "buf_dump_thread";
+ goto wait_suspend_loop;
} else if (btr_defragment_thread_active) {
thread_name = "btr_defragment_thread";
} else if (srv_fast_shutdown != 2 && trx_rollback_or_clean_is_active) {
@@ -3281,8 +1892,8 @@ wait_suspend_loop:
COUNT_INTERVAL * CHECK_INTERVAL/1000000 * 2,
"Waiting for %s to exit", thread_name);
if (srv_print_verbose_log && count > COUNT_INTERVAL) {
- ib_logf(IB_LOG_LEVEL_INFO,
- "Waiting for %s to exit", thread_name);
+ ib::info() << "Waiting for " << thread_name
+ << "to exit";
count = 0;
}
goto loop;
@@ -3300,15 +1911,14 @@ wait_suspend_loop:
thread_name = "fil_crypt_thread";
goto wait_suspend_loop;
case SRV_PURGE:
+ case SRV_WORKER:
+ ut_ad(!"purge was not shut down");
srv_purge_wakeup();
thread_name = "purge thread";
goto wait_suspend_loop;
case SRV_MASTER:
thread_name = "master thread";
goto wait_suspend_loop;
- case SRV_WORKER:
- thread_name = "worker threads";
- goto wait_suspend_loop;
}
/* At this point only page_cleaner should be active. We wait
@@ -3324,69 +1934,55 @@ wait_suspend_loop:
if (srv_print_verbose_log && count > COUNT_INTERVAL) {
service_manager_extend_timeout(COUNT_INTERVAL * CHECK_INTERVAL/1000000 * 2,
"Waiting for page cleaner");
- ib_logf(IB_LOG_LEVEL_INFO,
- "Waiting for page_cleaner to "
- "finish flushing of buffer pool");
+ ib::info() << "Waiting for page_cleaner to "
+ "finish flushing of buffer pool";
count = 0;
}
}
- if (buf_flush_event) {
- os_event_free(buf_flush_event);
- buf_flush_event = NULL;
- }
-
if (log_scrub_thread_active) {
ut_ad(!srv_read_only_mode);
os_event_set(log_scrub_event);
}
- mutex_enter(&log_sys->mutex);
- server_busy = log_scrub_thread_active
- || log_sys->n_pending_checkpoint_writes
-#ifdef UNIV_LOG_ARCHIVE
- || log_sys->n_pending_archive_ios
-#endif /* UNIV_LOG_ARCHIVE */
- || log_sys->n_pending_writes;
- mutex_exit(&log_sys->mutex);
-
- if (server_busy) {
- if (srv_print_verbose_log && count > 600) {
- ib_logf(IB_LOG_LEVEL_INFO,
- "Pending checkpoint_writes: %lu. "
- "Pending log flush writes: %lu",
- (ulong) log_sys->n_pending_checkpoint_writes,
- (ulong) log_sys->n_pending_writes);
- count = 0;
+ if (log_sys) {
+ log_mutex_enter();
+ const ulint n_write = log_sys->n_pending_checkpoint_writes;
+ const ulint n_flush = log_sys->n_pending_flushes;
+ log_mutex_exit();
+
+ if (log_scrub_thread_active || n_write || n_flush) {
+ if (srv_print_verbose_log && count > 600) {
+ ib::info() << "Pending checkpoint_writes: "
+ << n_write
+ << ". Pending log flush writes: "
+ << n_flush;
+ count = 0;
+ }
+ goto loop;
}
- goto loop;
}
ut_ad(!log_scrub_thread_active);
- pending_io = buf_pool_check_no_pending_io();
-
- if (pending_io) {
+ if (!buf_pool_ptr) {
+ ut_ad(!srv_was_started);
+ } else if (ulint pending_io = buf_pool_check_no_pending_io()) {
if (srv_print_verbose_log && count > 600) {
- ib_logf(IB_LOG_LEVEL_INFO,
- "Waiting for %lu buffer page I/Os to complete",
- (ulong) pending_io);
+ ib::info() << "Waiting for " << pending_io << " buffer"
+ " page I/Os to complete";
count = 0;
}
goto loop;
}
-#ifdef UNIV_LOG_ARCHIVE
- log_archive_all();
-#endif /* UNIV_LOG_ARCHIVE */
- if (srv_fast_shutdown == 2) {
- if (!srv_read_only_mode) {
- ib_logf(IB_LOG_LEVEL_INFO,
- "MySQL has requested a very fast shutdown "
- "without flushing the InnoDB buffer pool to "
- "data files. At the next mysqld startup "
- "InnoDB will do a crash recovery!");
+ if (srv_fast_shutdown == 2 || !srv_was_started) {
+ if (!srv_read_only_mode && srv_was_started) {
+ ib::info() << "MySQL has requested a very fast"
+ " shutdown without flushing the InnoDB buffer"
+ " pool to data files. At the next mysqld"
+ " startup InnoDB will do a crash recovery!";
/* In this fastest shutdown we do not flush the
buffer pool:
@@ -3403,49 +1999,33 @@ wait_suspend_loop:
srv_shutdown_state = SRV_SHUTDOWN_LAST_PHASE;
- fil_close_all_files();
+ if (fil_system) {
+ fil_close_all_files();
+ }
return;
}
if (!srv_read_only_mode) {
service_manager_extend_timeout(INNODB_EXTEND_TIMEOUT_INTERVAL,
"ensuring dirty buffer pool are written to log");
- log_make_checkpoint_at(LSN_MAX, TRUE);
+ log_make_checkpoint();
- mutex_enter(&log_sys->mutex);
+ log_mutex_enter();
lsn = log_sys->lsn;
- if (lsn != log_sys->last_checkpoint_lsn
-#ifdef UNIV_LOG_ARCHIVE
- || (srv_log_archive_on
- && lsn != log_sys->archived_lsn + LOG_BLOCK_HDR_SIZE)
-#endif /* UNIV_LOG_ARCHIVE */
- ) {
+ const bool lsn_changed = lsn != log_sys->last_checkpoint_lsn;
+ ut_ad(lsn >= log_sys->last_checkpoint_lsn);
- mutex_exit(&log_sys->mutex);
+ log_mutex_exit();
+ if (lsn_changed) {
goto loop;
}
-#ifdef UNIV_LOG_ARCHIVE
- arch_log_no = 0;
-
- UT_LIST_GET_FIRST(log_sys->log_groups)->archived_file_no;
-
- if (!UT_LIST_GET_FIRST(log_sys->log_groups)->archived_offset) {
-
- arch_log_no--;
- }
-
- log_archive_close_groups(TRUE);
-#endif /* UNIV_LOG_ARCHIVE */
-
- mutex_exit(&log_sys->mutex);
-
/* Ensure that all buffered changes are written to the
redo log before fil_close_all_files(). */
- fil_flush_file_spaces(FIL_LOG);
+ fil_flush_file_spaces(FIL_TYPE_LOG);
} else {
lsn = srv_start_lsn;
}
@@ -3453,20 +2033,18 @@ wait_suspend_loop:
srv_shutdown_state = SRV_SHUTDOWN_LAST_PHASE;
/* Make some checks that the server really is quiet */
- srv_thread_type type = srv_get_active_thread_type();
- ut_a(type == SRV_NONE);
+ ut_a(srv_get_active_thread_type() == SRV_NONE);
service_manager_extend_timeout(INNODB_EXTEND_TIMEOUT_INTERVAL,
"Free innodb buffer pool");
buf_all_freed();
- ut_a(lsn == log_sys->lsn);
+ ut_a(lsn == log_sys->lsn
+ || srv_force_recovery == SRV_FORCE_NO_LOG_REDO);
if (lsn < srv_start_lsn) {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Log sequence number at shutdown " LSN_PF " "
- "is lower than at startup " LSN_PF "!",
- lsn, srv_start_lsn);
+ ib::error() << "Shutdown LSN=" << lsn
+ << " is less than start LSN=" << srv_start_lsn;
}
srv_shutdown_lsn = lsn;
@@ -3475,78 +2053,23 @@ wait_suspend_loop:
dberr_t err = fil_write_flushed_lsn(lsn);
if (err != DB_SUCCESS) {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Failed to write flush lsn to the "
- "system tablespace at shutdown err=%s",
- ut_strerr(err));
+ ib::error() << "Writing flushed lsn " << lsn
+ << " failed; error=" << err;
}
}
fil_close_all_files();
/* Make some checks that the server really is quiet */
- type = srv_get_active_thread_type();
- ut_a(type == SRV_NONE);
-
- ut_a(lsn == log_sys->lsn);
-}
+ ut_a(srv_get_active_thread_type() == SRV_NONE);
-#ifdef UNIV_LOG_DEBUG
-/******************************************************//**
-Checks by parsing that the catenated log segment for a single mtr is
-consistent. */
-UNIV_INTERN
-ibool
-log_check_log_recs(
-/*===============*/
- const byte* buf, /*!< in: pointer to the start of
- the log segment in the
- log_sys->buf log buffer */
- ulint len, /*!< in: segment length in bytes */
- ib_uint64_t buf_start_lsn) /*!< in: buffer start lsn */
-{
- ib_uint64_t contiguous_lsn;
- ib_uint64_t scanned_lsn;
- const byte* start;
- const byte* end;
- byte* buf1;
- byte* scan_buf;
-
- ut_ad(mutex_own(&(log_sys->mutex)));
-
- if (len == 0) {
-
- return(TRUE);
- }
-
- start = ut_align_down(buf, OS_FILE_LOG_BLOCK_SIZE);
- end = ut_align(buf + len, OS_FILE_LOG_BLOCK_SIZE);
-
- buf1 = mem_alloc((end - start) + OS_FILE_LOG_BLOCK_SIZE);
- scan_buf = ut_align(buf1, OS_FILE_LOG_BLOCK_SIZE);
-
- ut_memcpy(scan_buf, start, end - start);
-
- recv_scan_log_recs((buf_pool_get_n_pages()
- - (recv_n_pool_free_frames * srv_buf_pool_instances))
- * UNIV_PAGE_SIZE, FALSE, scan_buf, end - start,
- ut_uint64_align_down(buf_start_lsn,
- OS_FILE_LOG_BLOCK_SIZE),
- &contiguous_lsn, &scanned_lsn);
-
- ut_a(scanned_lsn == buf_start_lsn + len);
- ut_a(recv_sys->recovered_lsn == scanned_lsn);
-
- mem_free(buf1);
-
- return(TRUE);
+ ut_a(lsn == log_sys->lsn
+ || srv_force_recovery == SRV_FORCE_NO_LOG_REDO);
}
-#endif /* UNIV_LOG_DEBUG */
/******************************************************//**
Peeks the current lsn.
-@return TRUE if success, FALSE if could not get the log system mutex */
-UNIV_INTERN
+@return TRUE if success, FALSE if could not get the log system mutex */
ibool
log_peek_lsn(
/*=========*/
@@ -3555,7 +2078,7 @@ log_peek_lsn(
if (0 == mutex_enter_nowait(&(log_sys->mutex))) {
*lsn = log_sys->lsn;
- mutex_exit(&(log_sys->mutex));
+ log_mutex_exit();
return(TRUE);
}
@@ -3565,7 +2088,6 @@ log_peek_lsn(
/******************************************************//**
Prints info of the log. */
-UNIV_INTERN
void
log_print(
/*======*/
@@ -3574,7 +2096,7 @@ log_print(
double time_elapsed;
time_t current_time;
- mutex_enter(&(log_sys->mutex));
+ log_mutex_enter();
fprintf(file,
"Log sequence number " LSN_PF "\n"
@@ -3596,23 +2118,24 @@ log_print(
}
fprintf(file,
- "%lu pending log writes, %lu pending chkp writes\n"
- "%lu log i/o's done, %.2f log i/o's/second\n",
- (ulong) log_sys->n_pending_writes,
- (ulong) log_sys->n_pending_checkpoint_writes,
- (ulong) log_sys->n_log_ios,
- ((double)(log_sys->n_log_ios - log_sys->n_log_ios_old)
- / time_elapsed));
+ ULINTPF " pending log flushes, "
+ ULINTPF " pending chkp writes\n"
+ ULINTPF " log i/o's done, %.2f log i/o's/second\n",
+ log_sys->n_pending_flushes,
+ log_sys->n_pending_checkpoint_writes,
+ log_sys->n_log_ios,
+ static_cast<double>(
+ log_sys->n_log_ios - log_sys->n_log_ios_old)
+ / time_elapsed);
log_sys->n_log_ios_old = log_sys->n_log_ios;
log_sys->last_printout_time = current_time;
- mutex_exit(&(log_sys->mutex));
+ log_mutex_exit();
}
/**********************************************************************//**
Refreshes the statistics used to print per-second averages. */
-UNIV_INTERN
void
log_refresh_stats(void)
/*===================*/
@@ -3621,109 +2144,96 @@ log_refresh_stats(void)
log_sys->last_printout_time = time(NULL);
}
-/********************************************************//**
-Closes a log group. */
+/** Close a log group.
+@param[in,out] group log group to close */
static
void
-log_group_close(
-/*===========*/
- log_group_t* group) /* in,own: log group to close */
+log_group_close(log_group_t* group)
{
- ulint i;
-
- for (i = 0; i < group->n_files; i++) {
- mem_free(group->file_header_bufs_ptr[i]);
-#ifdef UNIV_LOG_ARCHIVE
- mem_free(group->archive_file_header_bufs_ptr[i]);
-#endif /* UNIV_LOG_ARCHIVE */
- }
-
- mem_free(group->file_header_bufs_ptr);
- mem_free(group->file_header_bufs);
-
-#ifdef UNIV_LOG_ARCHIVE
- mem_free(group->archive_file_header_bufs_ptr);
- mem_free(group->archive_file_header_bufs);
-#endif /* UNIV_LOG_ARCHIVE */
-
- mem_free(group->checkpoint_buf_ptr);
-
- mem_free(group);
+ ut_free(group->checkpoint_buf_ptr);
+ group->n_files = 0;
+ group->checkpoint_buf_ptr = NULL;
}
/********************************************************//**
Closes all log groups. */
-UNIV_INTERN
void
log_group_close_all(void)
/*=====================*/
{
- log_group_t* group;
-
- group = UT_LIST_GET_FIRST(log_sys->log_groups);
-
- while (UT_LIST_GET_LEN(log_sys->log_groups) > 0) {
- log_group_t* prev_group = group;
-
- group = UT_LIST_GET_NEXT(log_groups, group);
- UT_LIST_REMOVE(log_groups, log_sys->log_groups, prev_group);
-
- log_group_close(prev_group);
- }
+ log_group_close(&log_sys->log);
}
-/********************************************************//**
-Shutdown the log system but do not release all the memory. */
-UNIV_INTERN
+/** Shut down the redo log subsystem. */
void
-log_shutdown(void)
-/*==============*/
+log_shutdown()
{
log_group_close_all();
- mem_free(log_sys->buf_ptr);
+ ut_free(log_sys->buf_ptr);
log_sys->buf_ptr = NULL;
log_sys->buf = NULL;
- mem_free(log_sys->checkpoint_buf_ptr);
+ ut_free(log_sys->checkpoint_buf_ptr);
log_sys->checkpoint_buf_ptr = NULL;
log_sys->checkpoint_buf = NULL;
- os_event_free(log_sys->no_flush_event);
- os_event_free(log_sys->one_flushed_event);
+ os_event_destroy(log_sys->flush_event);
rw_lock_free(&log_sys->checkpoint_lock);
mutex_free(&log_sys->mutex);
+ mutex_free(&log_sys->write_mutex);
+ mutex_free(&log_sys->log_flush_order_mutex);
if (!srv_read_only_mode && srv_scrub_log) {
- os_event_free(log_scrub_event);
- log_scrub_event = NULL;
+ os_event_destroy(log_scrub_event);
}
-#ifdef UNIV_LOG_ARCHIVE
- rw_lock_free(&log_sys->archive_lock);
-#endif /* UNIV_LOG_ARCHIVE */
-
-#ifdef UNIV_LOG_DEBUG
- recv_sys_debug_free();
-#endif
-
recv_sys_close();
+ ut_free(log_sys);
+ log_sys = NULL;
}
-/********************************************************//**
-Free the log system data structures. */
-UNIV_INTERN
+/******************************************************//**
+Pads the current log block full with dummy log records. Used in producing
+consistent archived log files and scrubbing redo log. */
+static
void
-log_mem_free(void)
-/*==============*/
+log_pad_current_log_block(void)
+/*===========================*/
{
- if (log_sys != NULL) {
- recv_sys_mem_free();
- mem_free(log_sys);
+ byte b = MLOG_DUMMY_RECORD;
+ ulint pad_length;
+ ulint i;
+ lsn_t lsn;
+
+ ut_ad(!recv_no_log_write);
+ /* We retrieve lsn only because otherwise gcc crashed on HP-UX */
+ lsn = log_reserve_and_open(OS_FILE_LOG_BLOCK_SIZE);
- log_sys = NULL;
+ pad_length = OS_FILE_LOG_BLOCK_SIZE
+ - (log_sys->buf_free % OS_FILE_LOG_BLOCK_SIZE)
+ - LOG_BLOCK_TRL_SIZE;
+ if (pad_length
+ == (OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_HDR_SIZE
+ - LOG_BLOCK_TRL_SIZE)) {
+
+ pad_length = 0;
+ }
+
+ if (pad_length) {
+ srv_stats.n_log_scrubs.inc();
+ }
+
+ for (i = 0; i < pad_length; i++) {
+ log_write_low(&b, 1);
}
+
+ lsn = log_sys->lsn;
+
+ log_close();
+
+ ut_a(lsn % OS_FILE_LOG_BLOCK_SIZE == LOG_BLOCK_HDR_SIZE);
}
/*****************************************************************//*
@@ -3734,12 +2244,16 @@ void
log_scrub()
/*=========*/
{
+ log_mutex_enter();
ulint cur_lbn = log_block_convert_lsn_to_no(log_sys->lsn);
+
if (next_lbn_to_pad == cur_lbn)
{
log_pad_current_log_block();
}
+
next_lbn_to_pad = log_block_convert_lsn_to_no(log_sys->lsn);
+ log_mutex_exit();
}
/* log scrubbing speed, in bytes/sec */
@@ -3760,7 +2274,7 @@ DECLARE_THREAD(log_scrub_thread)(void*)
/* log scrubbing interval in µs. */
ulonglong interval = 1000*1000*512/innodb_scrub_log_speed;
- os_event_wait_time(log_scrub_event, interval);
+ os_event_wait_time(log_scrub_event, static_cast<ulint>(interval));
log_scrub();
@@ -3771,8 +2285,7 @@ DECLARE_THREAD(log_scrub_thread)(void*)
/* We count the number of threads in os_thread_exit(). A created
thread should always use that to exit and not use return() to exit. */
- os_thread_exit(NULL);
+ os_thread_exit();
OS_THREAD_DUMMY_RETURN;
}
-#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innobase/log/log0recv.cc b/storage/innobase/log/log0recv.cc
index 3b3c7c23224..4ef22468f05 100644
--- a/storage/innobase/log/log0recv.cc
+++ b/storage/innobase/log/log0recv.cc
@@ -2,7 +2,7 @@
Copyright (c) 1997, 2017, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2012, Facebook Inc.
-Copyright (c) 2013, 2019, MariaDB Corporation.
+Copyright (c) 2013, 2020, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -25,21 +25,19 @@ Recovery
Created 9/20/1997 Heikki Tuuri
*******************************************************/
-// First include (the generated) my_config.h, to get correct platform defines.
-#include "my_config.h"
-#include <stdio.h> // Solaris/x86 header file bug
+#include "univ.i"
-#include <vector>
+#include <map>
+#include <string>
#include <my_service_manager.h>
#include "log0recv.h"
-#ifdef UNIV_NONINL
-#include "log0recv.ic"
+#ifdef HAVE_MY_AES_H
+#include <my_aes.h>
#endif
#include "log0crypt.h"
-
#include "mem0mem.h"
#include "buf0buf.h"
#include "buf0flu.h"
@@ -53,64 +51,39 @@ Created 9/20/1997 Heikki Tuuri
#include "trx0undo.h"
#include "trx0rec.h"
#include "fil0fil.h"
-#include "fil0crypt.h"
-#ifndef UNIV_HOTBACKUP
-# include "buf0rea.h"
-# include "srv0srv.h"
-# include "srv0start.h"
-# include "trx0roll.h"
-# include "row0merge.h"
-# include "sync0sync.h"
-#else /* !UNIV_HOTBACKUP */
-
-
-/** This is set to FALSE if the backup was originally taken with the
-mysqlbackup --include regexp option: then we do not want to create tables in
-directories which were not included */
-UNIV_INTERN ibool recv_replay_file_ops = TRUE;
-#endif /* !UNIV_HOTBACKUP */
+#include "row0trunc.h"
+#include "buf0rea.h"
+#include "srv0srv.h"
+#include "srv0start.h"
+#include "trx0roll.h"
+#include "row0merge.h"
/** Log records are stored in the hash table in chunks at most of this size;
this must be less than UNIV_PAGE_SIZE as it is stored in the buffer pool */
-#define RECV_DATA_BLOCK_SIZE (MEM_MAX_ALLOC_IN_BUF - sizeof(recv_data_t))
+#define RECV_DATA_BLOCK_SIZE (MEM_MAX_ALLOC_IN_BUF - sizeof(recv_data_t) - REDZONE_SIZE)
/** Read-ahead area in applying log records to file pages */
#define RECV_READ_AHEAD_AREA 32
/** The recovery system */
-UNIV_INTERN recv_sys_t* recv_sys;
+recv_sys_t* recv_sys;
/** TRUE when applying redo log records during crash recovery; FALSE
otherwise. Note that this is FALSE while a background thread is
rolling back incomplete transactions. */
-UNIV_INTERN ibool recv_recovery_on;
-#ifdef UNIV_LOG_ARCHIVE
-/** TRUE when applying redo log records from an archived log file */
-UNIV_INTERN ibool recv_recovery_from_backup_on;
-#endif /* UNIV_LOG_ARCHIVE */
+volatile bool recv_recovery_on;
-#ifndef UNIV_HOTBACKUP
/** TRUE when recv_init_crash_recovery() has been called. */
-UNIV_INTERN ibool recv_needed_recovery;
-# ifdef UNIV_DEBUG
+bool recv_needed_recovery;
+#ifdef UNIV_DEBUG
/** TRUE if writing to the redo log (mtr_commit) is forbidden.
Protected by log_sys->mutex. */
-UNIV_INTERN ibool recv_no_log_write = FALSE;
-# endif /* UNIV_DEBUG */
+bool recv_no_log_write = false;
+#endif /* UNIV_DEBUG */
/** TRUE if buf_page_is_corrupted() should check if the log sequence
number (FIL_PAGE_LSN) is in the future. Initially FALSE, and set by
-recv_recovery_from_checkpoint_start_func(). */
-UNIV_INTERN ibool recv_lsn_checks_on;
-
-/** There are two conditions under which we scan the logs, the first
-is normal startup and the second is when we do a recovery from an
-archive.
-This flag is set if we are doing a scan from the last checkpoint during
-startup. If we find log entries that were written after the last checkpoint
-we know that the server was not cleanly shutdown. We must then initialize
-the crash recovery environment before attempting to store these entries in
-the log hash table. */
-static ibool recv_log_scan_is_startup_type;
+recv_recovery_from_checkpoint_start(). */
+bool recv_lsn_checks_on;
/** If the following is TRUE, the buffer pool file pages must be invalidated
after recovery and no ibuf operations are allowed; this becomes TRUE if
@@ -121,146 +94,620 @@ buffer pool before the pages have been recovered to the up-to-date state.
TRUE means that recovery is running and no operations on the log files
are allowed yet: the variable name is misleading. */
-UNIV_INTERN ibool recv_no_ibuf_operations;
-/** TRUE when the redo log is being backed up */
-# define recv_is_making_a_backup FALSE
-/** TRUE when recovering from a backed up redo log file */
-# define recv_is_from_backup FALSE
-#else /* !UNIV_HOTBACKUP */
-# define recv_needed_recovery FALSE
-/** TRUE when the redo log is being backed up */
-UNIV_INTERN ibool recv_is_making_a_backup = FALSE;
-/** TRUE when recovering from a backed up redo log file */
-UNIV_INTERN ibool recv_is_from_backup = FALSE;
-# define buf_pool_get_curr_size() (5 * 1024 * 1024)
-#endif /* !UNIV_HOTBACKUP */
+bool recv_no_ibuf_operations;
/** The type of the previous parsed redo log record */
-static ulint recv_previous_parsed_rec_type;
+static mlog_id_t recv_previous_parsed_rec_type;
/** The offset of the previous parsed redo log record */
static ulint recv_previous_parsed_rec_offset;
/** The 'multi' flag of the previous parsed redo log record */
static ulint recv_previous_parsed_rec_is_multi;
-/** Maximum page number encountered in the redo log */
-UNIV_INTERN ulint recv_max_parsed_page_no;
-
-/** This many frames must be left free in the buffer pool when we scan
-the log and store the scanned log records in the buffer pool: we will
-use these free frames to read in pages when we start applying the
-log records to the database.
-This is the default value. If the actual size of the buffer pool is
-larger than 10 MB we'll set this value to 512. */
-UNIV_INTERN ulint recv_n_pool_free_frames;
-
/** The maximum lsn we see for a page during the recovery process. If this
is bigger than the lsn we are able to scan up to, that is an indication that
the recovery failed and the database may be corrupt. */
-UNIV_INTERN lsn_t recv_max_page_lsn;
+static lsn_t recv_max_page_lsn;
#ifdef UNIV_PFS_THREAD
-UNIV_INTERN mysql_pfs_key_t trx_rollback_clean_thread_key;
+mysql_pfs_key_t trx_rollback_clean_thread_key;
+mysql_pfs_key_t recv_writer_thread_key;
#endif /* UNIV_PFS_THREAD */
-#ifdef UNIV_PFS_MUTEX
-UNIV_INTERN mysql_pfs_key_t recv_sys_mutex_key;
-#endif /* UNIV_PFS_MUTEX */
+/** Is recv_writer_thread active? */
+bool recv_writer_thread_active;
+
+#ifndef DBUG_OFF
+/** Return string name of the redo log record type.
+@param[in] type record log record enum
+@return string name of record log record */
+static const char* get_mlog_string(mlog_id_t type);
+#endif /* !DBUG_OFF */
+
+/** Tablespace item during recovery */
+struct file_name_t {
+ /** Tablespace file name (MLOG_FILE_NAME) */
+ std::string name;
+ /** Tablespace object (NULL if not valid or not found) */
+ fil_space_t* space;
+
+ /** Tablespace status. */
+ enum fil_status {
+ /** Normal tablespace */
+ NORMAL,
+ /** Deleted tablespace */
+ DELETED,
+ /** Missing tablespace */
+ MISSING
+ };
+
+ /** Status of the tablespace */
+ fil_status status;
+
+ /** FSP_SIZE of tablespace */
+ ulint size;
+
+ /** the log sequence number of the last observed MLOG_INDEX_LOAD
+ record for the tablespace */
+ lsn_t enable_lsn;
+
+ /** Constructor */
+ file_name_t(std::string name_, bool deleted) :
+ name(name_), space(NULL), status(deleted ? DELETED: NORMAL),
+ size(0), enable_lsn(0) {}
+
+ /** Report a MLOG_INDEX_LOAD operation, meaning that
+ mlog_init for any earlier LSN must be skipped.
+ @param lsn log sequence number of the MLOG_INDEX_LOAD */
+ void mlog_index_load(lsn_t lsn)
+ {
+ if (enable_lsn < lsn) enable_lsn = lsn;
+ }
+};
+
+/** Map of dirty tablespaces during recovery */
+typedef std::map<
+ ulint,
+ file_name_t,
+ std::less<ulint>,
+ ut_allocator<std::pair<const ulint, file_name_t> > > recv_spaces_t;
+
+static recv_spaces_t recv_spaces;
+
+/** States of recv_addr_t */
+enum recv_addr_state {
+ /** not yet processed */
+ RECV_NOT_PROCESSED,
+ /** not processed; the page will be reinitialized */
+ RECV_WILL_NOT_READ,
+ /** page is being read */
+ RECV_BEING_READ,
+ /** log records are being applied on the page */
+ RECV_BEING_PROCESSED,
+ /** log records have been applied on the page */
+ RECV_PROCESSED,
+ /** log records have been discarded because the tablespace
+ does not exist */
+ RECV_DISCARDED
+};
+
+/** Hashed page file address struct */
+struct recv_addr_t{
+ /** recovery state of the page */
+ recv_addr_state state;
+ /** tablespace identifier */
+ unsigned space:32;
+ /** page number */
+ unsigned page_no:32;
+ /** list of log records for this page */
+ UT_LIST_BASE_NODE_T(recv_t) rec_list;
+ /** hash node in the hash bucket chain */
+ hash_node_t addr_hash;
+};
+
+/** Report optimized DDL operation (without redo log),
+corresponding to MLOG_INDEX_LOAD.
+@param[in] space_id tablespace identifier
+*/
+void (*log_optimized_ddl_op)(ulint space_id);
+
+/** Report backup-unfriendly TRUNCATE operation (with separate log file),
+corresponding to MLOG_TRUNCATE. */
+void (*log_truncate)();
+
+/** Report an operation to create, delete, or rename a file during backup.
+@param[in] space_id tablespace identifier
+@param[in] flags tablespace flags (NULL if not create)
+@param[in] name file name (not NUL-terminated)
+@param[in] len length of name, in bytes
+@param[in] new_name new file name (NULL if not rename)
+@param[in] new_len length of new_name, in bytes (0 if NULL) */
+void (*log_file_op)(ulint space_id, const byte* flags,
+ const byte* name, ulint len,
+ const byte* new_name, ulint new_len);
+
+/** Information about initializing page contents during redo log processing */
+class mlog_init_t
+{
+public:
+ /** A page initialization operation that was parsed from
+ the redo log */
+ struct init {
+ /** log sequence number of the page initialization */
+ lsn_t lsn;
+ /** Whether btr_page_create() avoided a read of the page.
+
+ At the end of the last recovery batch, ibuf_merge()
+ will invoke change buffer merge for pages that reside
+ in the buffer pool. (In the last batch, loading pages
+ would trigger change buffer merge.) */
+ bool created;
+ };
+
+private:
+ typedef std::map<const page_id_t, init,
+ std::less<const page_id_t>,
+ ut_allocator<std::pair<const page_id_t, init> > >
+ map;
+ /** Map of page initialization operations.
+ FIXME: Merge this to recv_sys->addr_hash! */
+ map inits;
+public:
+ /** Record that a page will be initialized by the redo log.
+ @param[in] space tablespace identifier
+ @param[in] page_no page number
+ @param[in] lsn log sequence number */
+ void add(ulint space, ulint page_no, lsn_t lsn)
+ {
+ ut_ad(mutex_own(&recv_sys->mutex));
+ const init init = { lsn, false };
+ std::pair<map::iterator, bool> p = inits.insert(
+ map::value_type(page_id_t(space, page_no), init));
+ ut_ad(!p.first->second.created);
+ if (!p.second && p.first->second.lsn < init.lsn) {
+ p.first->second = init;
+ }
+ }
-#ifndef UNIV_HOTBACKUP
-# ifdef UNIV_PFS_THREAD
-UNIV_INTERN mysql_pfs_key_t recv_writer_thread_key;
-# endif /* UNIV_PFS_THREAD */
+ /** Get the last stored lsn of the page id and its respective
+ init/load operation.
+ @param[in] page_id page id
+ @param[in,out] init initialize log or load log
+ @return the latest page initialization;
+ not valid after releasing recv_sys->mutex. */
+ init& last(page_id_t page_id)
+ {
+ ut_ad(mutex_own(&recv_sys->mutex));
+ return inits.find(page_id)->second;
+ }
+
+ /** At the end of each recovery batch, reset the 'created' flags. */
+ void reset()
+ {
+ ut_ad(mutex_own(&recv_sys->mutex));
+ ut_ad(recv_no_ibuf_operations);
+ for (map::iterator i= inits.begin(); i != inits.end(); i++) {
+ i->second.created = false;
+ }
+ }
-# ifdef UNIV_PFS_MUTEX
-UNIV_INTERN mysql_pfs_key_t recv_writer_mutex_key;
-# endif /* UNIV_PFS_MUTEX */
+ /** On the last recovery batch, merge buffered changes to those
+ pages that were initialized by buf_page_create() and still reside
+ in the buffer pool. Stale pages are not allowed in the buffer pool.
+
+ Note: When MDEV-14481 implements redo log apply in the
+ background, we will have to ensure that buf_page_get_gen()
+ will not deliver stale pages to users (pages on which the
+ change buffer was not merged yet). Normally, the change
+ buffer merge is performed on I/O completion. Maybe, add a
+ flag to buf_page_t and perform the change buffer merge on
+ the first actual access?
+ @param[in,out] mtr dummy mini-transaction */
+ void ibuf_merge(mtr_t& mtr)
+ {
+ ut_ad(mutex_own(&recv_sys->mutex));
+ ut_ad(!recv_no_ibuf_operations);
+ mtr.start();
+
+ for (map::const_iterator i= inits.begin(); i != inits.end();
+ i++) {
+ if (!i->second.created) {
+ continue;
+ }
+ if (buf_block_t* block = buf_page_get_low(
+ i->first, univ_page_size, RW_X_LATCH, NULL,
+ BUF_GET_IF_IN_POOL, __FILE__, __LINE__,
+ &mtr, NULL)) {
+ mutex_exit(&recv_sys->mutex);
+ ibuf_merge_or_delete_for_page(
+ block, i->first,
+ &block->page.size, true);
+ mtr.commit();
+ mtr.start();
+ mutex_enter(&recv_sys->mutex);
+ }
+ }
-/** Flag indicating if recv_writer thread is active. */
-static volatile bool recv_writer_thread_active;
-UNIV_INTERN os_thread_t recv_writer_thread_handle = 0;
-#endif /* !UNIV_HOTBACKUP */
+ mtr.commit();
+ }
-/* prototypes */
+ /** Clear the data structure */
+ void clear() { inits.clear(); }
+};
-#ifndef UNIV_HOTBACKUP
-/*******************************************************//**
-Initialize crash recovery environment. Can be called iff
-recv_needed_recovery == FALSE. */
-static
-void
-recv_init_crash_recovery(void);
-/*===========================*/
-#endif /* !UNIV_HOTBACKUP */
+static mlog_init_t mlog_init;
-/********************************************************//**
-Creates the recovery system. */
-UNIV_INTERN
-void
-recv_sys_create(void)
-/*=================*/
+/** Process a MLOG_CREATE2 record that indicates that a tablespace
+is being shrunk in size.
+@param[in] space_id tablespace identifier
+@param[in] pages trimmed size of the file, in pages
+@param[in] lsn log sequence number of the operation */
+static void recv_addr_trim(ulint space_id, unsigned pages, lsn_t lsn)
{
- if (recv_sys != NULL) {
+ DBUG_ENTER("recv_addr_trim");
+ DBUG_LOG("ib_log",
+ "discarding log beyond end of tablespace "
+ << page_id_t(space_id, pages) << " before LSN " << lsn);
+ ut_ad(mutex_own(&recv_sys->mutex));
+ for (ulint i = recv_sys->addr_hash->n_cells; i--; ) {
+ hash_cell_t* const cell = hash_get_nth_cell(
+ recv_sys->addr_hash, i);
+ for (recv_addr_t* addr = static_cast<recv_addr_t*>(cell->node),
+ *next;
+ addr; addr = next) {
+ next = static_cast<recv_addr_t*>(addr->addr_hash);
+
+ if (addr->space != space_id || addr->page_no < pages) {
+ continue;
+ }
+ for (recv_t* recv = UT_LIST_GET_FIRST(addr->rec_list);
+ recv; ) {
+ recv_t* n = UT_LIST_GET_NEXT(rec_list, recv);
+ if (recv->start_lsn < lsn) {
+ DBUG_PRINT("ib_log",
+ ("Discarding %s for"
+ " page %u:%u at " LSN_PF,
+ get_mlog_string(
+ recv->type),
+ addr->space, addr->page_no,
+ recv->start_lsn));
+ UT_LIST_REMOVE(addr->rec_list, recv);
+ }
+ recv = n;
+ }
+ }
+ }
+ if (fil_space_t* space = fil_space_get(space_id)) {
+ ut_ad(UT_LIST_GET_LEN(space->chain) == 1);
+ fil_node_t* file = UT_LIST_GET_FIRST(space->chain);
+ ut_ad(file->is_open());
+ os_file_truncate(file->name, file->handle,
+ os_offset_t(pages) << srv_page_size_shift,
+ true);
+ }
+ DBUG_VOID_RETURN;
+}
+
+/** Process a file name from a MLOG_FILE_* record.
+@param[in,out] name file name
+@param[in] len length of the file name
+@param[in] space_id the tablespace ID
+@param[in] deleted whether this is a MLOG_FILE_DELETE record */
+static
+void
+fil_name_process(
+ char* name,
+ ulint len,
+ ulint space_id,
+ bool deleted)
+{
+ if (srv_operation == SRV_OPERATION_BACKUP) {
return;
}
- recv_sys = static_cast<recv_sys_t*>(mem_zalloc(sizeof(*recv_sys)));
+ ut_ad(srv_operation == SRV_OPERATION_NORMAL
+ || is_mariabackup_restore_or_export());
- mutex_create(recv_sys_mutex_key, &recv_sys->mutex, SYNC_RECV);
+ /* We will also insert space=NULL into the map, so that
+ further checks can ensure that a MLOG_FILE_NAME record was
+ scanned before applying any page records for the space_id. */
-#ifndef UNIV_HOTBACKUP
- mutex_create(recv_writer_mutex_key, &recv_sys->writer_mutex,
- SYNC_LEVEL_VARYING);
-#endif /* !UNIV_HOTBACKUP */
+ os_normalize_path(name);
+ file_name_t fname(std::string(name, len - 1), deleted);
+ std::pair<recv_spaces_t::iterator,bool> p = recv_spaces.insert(
+ std::make_pair(space_id, fname));
+ ut_ad(p.first->first == space_id);
- recv_sys->heap = NULL;
- recv_sys->addr_hash = NULL;
+ file_name_t& f = p.first->second;
+
+ if (deleted) {
+ /* Got MLOG_FILE_DELETE */
+
+ if (!p.second && f.status != file_name_t::DELETED) {
+ f.status = file_name_t::DELETED;
+ if (f.space != NULL) {
+ fil_space_free(space_id, false);
+ f.space = NULL;
+ }
+ }
+
+ ut_ad(f.space == NULL);
+ } else if (p.second // the first MLOG_FILE_NAME or MLOG_FILE_RENAME2
+ || f.name != fname.name) {
+ fil_space_t* space;
+
+ /* Check if the tablespace file exists and contains
+ the space_id. If not, ignore the file after displaying
+ a note. Abort if there are multiple files with the
+ same space_id. */
+ switch (fil_ibd_load(space_id, name, space)) {
+ case FIL_LOAD_OK:
+ ut_ad(space != NULL);
+
+ if (f.space == NULL || f.space == space) {
+
+ if (f.size && f.space == NULL) {
+ fil_space_set_recv_size(space->id, f.size);
+ }
+
+ f.name = fname.name;
+ f.space = space;
+ f.status = file_name_t::NORMAL;
+ } else {
+ ib::error() << "Tablespace " << space_id
+ << " has been found in two places: '"
+ << f.name << "' and '" << name << "'."
+ " You must delete one of them.";
+ recv_sys->found_corrupt_fs = true;
+ }
+ break;
+
+ case FIL_LOAD_ID_CHANGED:
+ ut_ad(space == NULL);
+ break;
+
+ case FIL_LOAD_NOT_FOUND:
+ /* No matching tablespace was found; maybe it
+ was renamed, and we will find a subsequent
+ MLOG_FILE_* record. */
+ ut_ad(space == NULL);
+
+ if (srv_force_recovery) {
+ /* Without innodb_force_recovery,
+ missing tablespaces will only be
+ reported in
+ recv_init_crash_recovery_spaces().
+ Enable some more diagnostics when
+ forcing recovery. */
+
+ ib::info()
+ << "At LSN: " << recv_sys->recovered_lsn
+ << ": unable to open file " << name
+ << " for tablespace " << space_id;
+ }
+ break;
+
+ case FIL_LOAD_INVALID:
+ ut_ad(space == NULL);
+ if (srv_force_recovery == 0) {
+ ib::warn() << "We do not continue the crash"
+ " recovery, because the table may"
+ " become corrupt if we cannot apply"
+ " the log records in the InnoDB log to"
+ " it. To fix the problem and start"
+ " mysqld:";
+ ib::info() << "1) If there is a permission"
+ " problem in the file and mysqld"
+ " cannot open the file, you should"
+ " modify the permissions.";
+ ib::info() << "2) If the tablespace is not"
+ " needed, or you can restore an older"
+ " version from a backup, then you can"
+ " remove the .ibd file, and use"
+ " --innodb_force_recovery=1 to force"
+ " startup without this file.";
+ ib::info() << "3) If the file system or the"
+ " disk is broken, and you cannot"
+ " remove the .ibd file, you can set"
+ " --innodb_force_recovery.";
+ recv_sys->found_corrupt_fs = true;
+ break;
+ }
+
+ ib::info() << "innodb_force_recovery was set to "
+ << srv_force_recovery << ". Continuing crash"
+ " recovery even though we cannot access the"
+ " files for tablespace " << space_id << ".";
+ break;
+ }
+ }
}
-/********************************************************//**
-Release recovery system mutexes. */
-UNIV_INTERN
-void
-recv_sys_close(void)
-/*================*/
+/** Parse or process a MLOG_FILE_* record.
+@param[in] ptr redo log record
+@param[in] end end of the redo log buffer
+@param[in] space_id the tablespace ID
+@param[in] first_page_no first page number in the file
+@param[in] type MLOG_FILE_NAME or MLOG_FILE_DELETE
+or MLOG_FILE_CREATE2 or MLOG_FILE_RENAME2
+@param[in] apply whether to apply the record
+@return pointer to next redo log record
+@retval NULL if this log record was truncated */
+static
+byte*
+fil_name_parse(
+ byte* ptr,
+ const byte* end,
+ ulint space_id,
+ ulint first_page_no,
+ mlog_id_t type,
+ bool apply)
{
- if (recv_sys != NULL) {
- if (recv_sys->addr_hash != NULL) {
- hash_table_free(recv_sys->addr_hash);
+ if (type == MLOG_FILE_CREATE2) {
+ if (end < ptr + 4) {
+ return(NULL);
}
+ ptr += 4;
+ }
- if (recv_sys->heap != NULL) {
- mem_heap_free(recv_sys->heap);
+ if (end < ptr + 2) {
+ return(NULL);
+ }
+
+ ulint len = mach_read_from_2(ptr);
+ ptr += 2;
+ if (end < ptr + len) {
+ return(NULL);
+ }
+
+ /* MLOG_FILE_* records should only be written for
+ user-created tablespaces. The name must be long enough
+ and end in .ibd. */
+ bool corrupt = is_predefined_tablespace(space_id)
+ || len < sizeof "/a.ibd\0"
+ || (!first_page_no != !memcmp(ptr + len - 5, DOT_IBD, 5));
+
+ if (!corrupt && !memchr(ptr, OS_PATH_SEPARATOR, len)) {
+ if (byte* c = static_cast<byte*>
+ (memchr(ptr, OS_PATH_SEPARATOR_ALT, len))) {
+ ut_ad(c >= ptr);
+ ut_ad(c < ptr + len);
+ do {
+ *c = OS_PATH_SEPARATOR;
+ } while ((c = static_cast<byte*>
+ (memchr(ptr, OS_PATH_SEPARATOR_ALT,
+ len - ulint(c - ptr)))) != NULL);
+ } else {
+ corrupt = true;
+ }
+ }
+
+ byte* end_ptr = ptr + len;
+
+ switch (type) {
+ default:
+ ut_ad(0); // the caller checked this
+ /* fall through */
+ case MLOG_FILE_NAME:
+ if (corrupt) {
+ ib::error() << "MLOG_FILE_NAME incorrect:" << ptr;
+ recv_sys->found_corrupt_log = true;
+ break;
}
- if (recv_sys->buf != NULL) {
- ut_free(recv_sys->buf);
+ fil_name_process(
+ reinterpret_cast<char*>(ptr), len, space_id, false);
+ break;
+ case MLOG_FILE_DELETE:
+ if (corrupt) {
+ ib::error() << "MLOG_FILE_DELETE incorrect:" << ptr;
+ recv_sys->found_corrupt_log = true;
+ break;
}
- if (recv_sys->last_block_buf_start != NULL) {
- mem_free(recv_sys->last_block_buf_start);
+ fil_name_process(
+ reinterpret_cast<char*>(ptr), len, space_id, true);
+ /* fall through */
+ case MLOG_FILE_CREATE2:
+ if (first_page_no) {
+ ut_ad(first_page_no
+ == SRV_UNDO_TABLESPACE_SIZE_IN_PAGES);
+ ut_a(srv_is_undo_tablespace(space_id));
+ compile_time_assert(
+ UT_ARR_SIZE(recv_sys->truncated_undo_spaces)
+ == TRX_SYS_MAX_UNDO_SPACES);
+ recv_sys_t::trunc& t = recv_sys->truncated_undo_spaces[
+ space_id - srv_undo_space_id_start];
+ t.lsn = recv_sys->recovered_lsn;
+ t.pages = uint32_t(first_page_no);
+ } else if (log_file_op) {
+ log_file_op(space_id,
+ type == MLOG_FILE_CREATE2 ? ptr - 4 : NULL,
+ ptr, len, NULL, 0);
+ }
+ break;
+ case MLOG_FILE_RENAME2:
+ if (corrupt) {
+ ib::error() << "MLOG_FILE_RENAME2 incorrect:" << ptr;
+ recv_sys->found_corrupt_log = true;
}
-#ifndef UNIV_HOTBACKUP
- ut_ad(!recv_writer_thread_active);
- mutex_free(&recv_sys->writer_mutex);
-#endif /* !UNIV_HOTBACKUP */
+ /* The new name follows the old name. */
+ byte* new_name = end_ptr + 2;
+ if (end < new_name) {
+ return(NULL);
+ }
- mutex_free(&recv_sys->mutex);
+ ulint new_len = mach_read_from_2(end_ptr);
- mem_free(recv_sys);
- recv_sys = NULL;
+ if (end < end_ptr + 2 + new_len) {
+ return(NULL);
+ }
+
+ end_ptr += 2 + new_len;
+
+ corrupt = corrupt
+ || new_len < sizeof "/a.ibd\0"
+ || memcmp(new_name + new_len - 5, DOT_IBD, 5) != 0;
+
+ if (!corrupt && !memchr(new_name, OS_PATH_SEPARATOR, new_len)) {
+ if (byte* c = static_cast<byte*>
+ (memchr(new_name, OS_PATH_SEPARATOR_ALT,
+ new_len))) {
+ ut_ad(c >= new_name);
+ ut_ad(c < new_name + new_len);
+ do {
+ *c = OS_PATH_SEPARATOR;
+ } while ((c = static_cast<byte*>
+ (memchr(ptr, OS_PATH_SEPARATOR_ALT,
+ new_len
+ - ulint(c - new_name))))
+ != NULL);
+ } else {
+ corrupt = true;
+ }
+ }
+
+ if (corrupt) {
+ ib::error() << "MLOG_FILE_RENAME2 new_name incorrect:" << ptr
+ << " new_name: " << new_name;
+ recv_sys->found_corrupt_log = true;
+ break;
+ }
+
+ fil_name_process(
+ reinterpret_cast<char*>(ptr), len,
+ space_id, false);
+ fil_name_process(
+ reinterpret_cast<char*>(new_name), new_len,
+ space_id, false);
+
+ if (log_file_op) {
+ log_file_op(space_id, NULL,
+ ptr, len, new_name, new_len);
+ }
+
+ if (!apply) {
+ break;
+ }
+ if (!fil_op_replay_rename(
+ space_id, first_page_no,
+ reinterpret_cast<const char*>(ptr),
+ reinterpret_cast<const char*>(new_name))) {
+ recv_sys->found_corrupt_fs = true;
+ }
}
+
+ return(end_ptr);
}
-/********************************************************//**
-Frees the recovery system memory. */
-UNIV_INTERN
+/** Clean up after recv_sys_init() */
void
-recv_sys_mem_free(void)
-/*===================*/
+recv_sys_close()
{
if (recv_sys != NULL) {
+ recv_sys->dblwr.pages.clear();
+
if (recv_sys->addr_hash != NULL) {
hash_table_free(recv_sys->addr_hash);
}
@@ -269,55 +716,42 @@ recv_sys_mem_free(void)
mem_heap_free(recv_sys->heap);
}
- if (recv_sys->buf != NULL) {
- ut_free(recv_sys->buf);
+ if (recv_sys->flush_start != NULL) {
+ os_event_destroy(recv_sys->flush_start);
}
- if (recv_sys->last_block_buf_start != NULL) {
- mem_free(recv_sys->last_block_buf_start);
+ if (recv_sys->flush_end != NULL) {
+ os_event_destroy(recv_sys->flush_end);
}
- mem_free(recv_sys);
+ ut_free(recv_sys->buf);
+
+ ut_ad(!recv_writer_thread_active);
+ mutex_free(&recv_sys->writer_mutex);
+
+ mutex_free(&recv_sys->mutex);
+
+ ut_free(recv_sys);
recv_sys = NULL;
}
+
+ recv_spaces.clear();
+ mlog_init.clear();
}
-#ifndef UNIV_HOTBACKUP
/************************************************************
Reset the state of the recovery system variables. */
-UNIV_INTERN
void
recv_sys_var_init(void)
/*===================*/
{
- recv_lsn_checks_on = FALSE;
-
- recv_n_pool_free_frames = 256;
-
- recv_recovery_on = FALSE;
-
-#ifdef UNIV_LOG_ARCHIVE
- recv_recovery_from_backup_on = FALSE;
-#endif /* UNIV_LOG_ARCHIVE */
-
- recv_needed_recovery = FALSE;
-
- recv_lsn_checks_on = FALSE;
-
- recv_log_scan_is_startup_type = FALSE;
-
- recv_no_ibuf_operations = FALSE;
-
- recv_previous_parsed_rec_type = 999999;
-
+ recv_recovery_on = false;
+ recv_needed_recovery = false;
+ recv_lsn_checks_on = false;
+ recv_no_ibuf_operations = false;
+ recv_previous_parsed_rec_type = MLOG_SINGLE_REC_FLAG;
recv_previous_parsed_rec_offset = 0;
-
recv_previous_parsed_rec_is_multi = 0;
-
- recv_max_parsed_page_no = 0;
-
- recv_n_pool_free_frames = 256;
-
recv_max_page_lsn = 0;
}
@@ -325,7 +759,7 @@ recv_sys_var_init(void)
recv_writer thread tasked with flushing dirty pages from the buffer
pools.
@return a dummy parameter */
-extern "C" UNIV_INTERN
+extern "C"
os_thread_ret_t
DECLARE_THREAD(recv_writer_thread)(
/*===============================*/
@@ -341,26 +775,29 @@ DECLARE_THREAD(recv_writer_thread)(
#endif /* UNIV_PFS_THREAD */
#ifdef UNIV_DEBUG_THREAD_CREATION
- fprintf(stderr, "InnoDB: recv_writer thread running, id %lu\n",
- os_thread_pf(os_thread_get_curr_id()));
+ ib::info() << "recv_writer thread running, id "
+ << os_thread_pf(os_thread_get_curr_id());
#endif /* UNIV_DEBUG_THREAD_CREATION */
while (srv_shutdown_state == SRV_SHUTDOWN_NONE) {
/* Wait till we get a signal to clean the LRU list.
Bounded by max wait time of 100ms. */
- ib_int64_t sig_count = os_event_reset(buf_flush_event);
+ ib_uint64_t sig_count = os_event_reset(buf_flush_event);
os_event_wait_time_low(buf_flush_event, 100000, sig_count);
mutex_enter(&recv_sys->writer_mutex);
- if (!recv_recovery_on) {
+ if (!recv_recovery_is_on()) {
mutex_exit(&recv_sys->writer_mutex);
break;
}
/* Flush pages from end of LRU if required */
- buf_flush_LRU_tail();
+ os_event_reset(recv_sys->flush_end);
+ recv_sys->flush_type = BUF_FLUSH_LRU;
+ os_event_set(recv_sys->flush_start);
+ os_event_wait(recv_sys->flush_end);
mutex_exit(&recv_sys->writer_mutex);
}
@@ -371,66 +808,38 @@ DECLARE_THREAD(recv_writer_thread)(
/* We count the number of threads in os_thread_exit().
A created thread should always use that to exit and not
use return() to exit. */
- os_thread_exit(NULL);
+ os_thread_exit();
OS_THREAD_DUMMY_RETURN;
}
-#endif /* !UNIV_HOTBACKUP */
-/************************************************************
-Inits the recovery system for a recovery operation. */
-UNIV_INTERN
+/** Initialize the redo log recovery subsystem. */
void
-recv_sys_init(
-/*==========*/
- ulint available_memory) /*!< in: available memory in bytes */
+recv_sys_init()
{
- if (recv_sys->heap != NULL) {
+ ut_ad(recv_sys == NULL);
- return;
- }
+ recv_sys = static_cast<recv_sys_t*>(ut_zalloc_nokey(sizeof(*recv_sys)));
-#ifndef UNIV_HOTBACKUP
- mutex_enter(&(recv_sys->mutex));
+ mutex_create(LATCH_ID_RECV_SYS, &recv_sys->mutex);
+ mutex_create(LATCH_ID_RECV_WRITER, &recv_sys->writer_mutex);
- recv_sys->heap = mem_heap_create_typed(256,
- MEM_HEAP_FOR_RECV_SYS);
-#else /* !UNIV_HOTBACKUP */
- recv_sys->heap = mem_heap_create(256);
- recv_is_from_backup = TRUE;
-#endif /* !UNIV_HOTBACKUP */
+ recv_sys->heap = mem_heap_create_typed(256, MEM_HEAP_FOR_RECV_SYS);
- /* Set appropriate value of recv_n_pool_free_frames. */
- if (buf_pool_get_curr_size() >= (10 * 1024 * 1024)) {
- /* Buffer pool of size greater than 10 MB. */
- recv_n_pool_free_frames = 512;
+ if (!srv_read_only_mode) {
+ recv_sys->flush_start = os_event_create(0);
+ recv_sys->flush_end = os_event_create(0);
}
- recv_sys->buf = static_cast<byte*>(ut_malloc(RECV_PARSING_BUF_SIZE));
- recv_sys->len = 0;
- recv_sys->recovered_offset = 0;
-
- recv_sys->addr_hash = hash_create(available_memory / 512);
- recv_sys->n_addrs = 0;
+ recv_sys->buf = static_cast<byte*>(
+ ut_malloc_nokey(RECV_PARSING_BUF_SIZE));
- recv_sys->apply_log_recs = FALSE;
- recv_sys->apply_batch_on = FALSE;
-
- recv_sys->last_block_buf_start = static_cast<byte*>(
- mem_alloc(2 * OS_FILE_LOG_BLOCK_SIZE));
-
- recv_sys->last_block = static_cast<byte*>(ut_align(
- recv_sys->last_block_buf_start, OS_FILE_LOG_BLOCK_SIZE));
-
- recv_sys->found_corrupt_log = FALSE;
+ recv_sys->addr_hash = hash_create(buf_pool_get_curr_size() / 512);
recv_sys->progress_time = time(NULL);
-
recv_max_page_lsn = 0;
/* Call the constructor for recv_sys_t::dblwr member */
new (&recv_sys->dblwr) recv_dblwr_t();
-
- mutex_exit(&(recv_sys->mutex));
}
/** Empty a fully processed hash table. */
@@ -447,11 +856,8 @@ recv_sys_empty_hash()
recv_sys->addr_hash = hash_create(buf_pool_get_curr_size() / 512);
}
-#ifndef UNIV_HOTBACKUP
-# ifndef UNIV_LOG_DEBUG
/********************************************************//**
Frees the recovery system. */
-static
void
recv_sys_debug_free(void)
/*=====================*/
@@ -461,167 +867,143 @@ recv_sys_debug_free(void)
hash_table_free(recv_sys->addr_hash);
mem_heap_free(recv_sys->heap);
ut_free(recv_sys->buf);
- mem_free(recv_sys->last_block_buf_start);
recv_sys->buf = NULL;
recv_sys->heap = NULL;
recv_sys->addr_hash = NULL;
- recv_sys->last_block_buf_start = NULL;
- mutex_exit(&(recv_sys->mutex));
+ /* wake page cleaner up to progress */
+ if (!srv_read_only_mode) {
+ ut_ad(!recv_recovery_is_on());
+ ut_ad(!recv_writer_thread_active);
+ os_event_reset(buf_flush_event);
+ os_event_set(recv_sys->flush_start);
+ }
- /* Free up the flush_rbt. */
- buf_flush_free_flush_rbt();
+ mutex_exit(&(recv_sys->mutex));
}
-# endif /* UNIV_LOG_DEBUG */
-# ifdef UNIV_LOG_ARCHIVE
-/********************************************************//**
-Truncates possible corrupted or extra records from a log group. */
-static
-void
-recv_truncate_group(
-/*================*/
- log_group_t* group, /*!< in: log group */
- lsn_t recovered_lsn, /*!< in: recovery succeeded up to this
- lsn */
- lsn_t limit_lsn, /*!< in: this was the limit for
- recovery */
- lsn_t checkpoint_lsn, /*!< in: recovery was started from this
- checkpoint */
- lsn_t archived_lsn) /*!< in: the log has been archived up to
- this lsn */
+/** Read a log segment to a buffer.
+@param[out] buf buffer
+@param[in] group redo log files
+@param[in, out] start_lsn in : read area start, out: the last read valid lsn
+@param[in] end_lsn read area end
+@param[out] invalid_block - invalid, (maybe incompletely written) block encountered
+@return false, if invalid block encountered (e.g checksum mismatch), true otherwise */
+bool
+log_group_read_log_seg(
+ byte* buf,
+ const log_group_t* group,
+ lsn_t *start_lsn,
+ lsn_t end_lsn)
{
- lsn_t start_lsn;
- lsn_t end_lsn;
- lsn_t finish_lsn1;
- lsn_t finish_lsn2;
- lsn_t finish_lsn;
-
- if (archived_lsn == LSN_MAX) {
- /* Checkpoint was taken in the NOARCHIVELOG mode */
- archived_lsn = checkpoint_lsn;
- }
+ ulint len;
+ lsn_t source_offset;
+ bool success = true;
+ ut_ad(log_mutex_own());
+ ut_ad(!(*start_lsn % OS_FILE_LOG_BLOCK_SIZE));
+ ut_ad(!(end_lsn % OS_FILE_LOG_BLOCK_SIZE));
- finish_lsn1 = ut_uint64_align_down(archived_lsn,
- OS_FILE_LOG_BLOCK_SIZE)
- + log_group_get_capacity(group);
+loop:
+ source_offset = log_group_calc_lsn_offset(*start_lsn, group);
- finish_lsn2 = ut_uint64_align_up(recovered_lsn,
- OS_FILE_LOG_BLOCK_SIZE)
- + recv_sys->last_log_buf_size;
+ ut_a(end_lsn - *start_lsn <= ULINT_MAX);
+ len = (ulint) (end_lsn - *start_lsn);
- if (limit_lsn != LSN_MAX) {
- /* We do not know how far we should erase log records: erase
- as much as possible */
+ ut_ad(len != 0);
- finish_lsn = finish_lsn1;
- } else {
- /* It is enough to erase the length of the log buffer */
- finish_lsn = finish_lsn1 < finish_lsn2
- ? finish_lsn1 : finish_lsn2;
+ const bool at_eof = (source_offset % group->file_size) + len
+ > group->file_size;
+ if (at_eof) {
+ /* If the above condition is true then len (which is ulint)
+ is > the expression below, so the typecast is ok */
+ len = (ulint) (group->file_size -
+ (source_offset % group->file_size));
}
- ut_a(RECV_SCAN_SIZE <= log_sys->buf_size);
+ log_sys->n_log_ios++;
- memset(log_sys->buf, 0, RECV_SCAN_SIZE);
+ MONITOR_INC(MONITOR_LOG_IO);
- start_lsn = ut_uint64_align_down(recovered_lsn,
- OS_FILE_LOG_BLOCK_SIZE);
+ ut_a(source_offset / UNIV_PAGE_SIZE <= ULINT_MAX);
- if (start_lsn != recovered_lsn) {
- /* Copy the last incomplete log block to the log buffer and
- edit its data length: */
- lsn_t diff = recovered_lsn - start_lsn;
+ const ulint page_no
+ = (ulint) (source_offset / univ_page_size.physical());
- ut_a(diff <= 0xFFFFUL);
+ fil_io(IORequestLogRead, true,
+ page_id_t(SRV_LOG_SPACE_FIRST_ID, page_no),
+ univ_page_size,
+ (ulint) (source_offset % univ_page_size.physical()),
+ len, buf, NULL);
- ut_memcpy(log_sys->buf, recv_sys->last_block,
- OS_FILE_LOG_BLOCK_SIZE);
- log_block_set_data_len(log_sys->buf, (ulint) diff);
- }
+ for (ulint l = 0; l < len; l += OS_FILE_LOG_BLOCK_SIZE,
+ buf += OS_FILE_LOG_BLOCK_SIZE,
+ (*start_lsn) += OS_FILE_LOG_BLOCK_SIZE) {
+ const ulint block_number = log_block_get_hdr_no(buf);
- if (start_lsn >= finish_lsn) {
-
- return;
- }
-
- for (;;) {
- ulint len;
-
- end_lsn = start_lsn + RECV_SCAN_SIZE;
-
- if (end_lsn > finish_lsn) {
-
- end_lsn = finish_lsn;
+ if (block_number != log_block_convert_lsn_to_no(*start_lsn)) {
+ /* Garbage or an incompletely written log block.
+ We will not report any error, because this can
+ happen when InnoDB was killed while it was
+ writing redo log. We simply treat this as an
+ abrupt end of the redo log. */
+fail:
+ end_lsn = *start_lsn;
+ success = false;
+ break;
}
- len = (ulint) (end_lsn - start_lsn);
-
- log_group_write_buf(group, log_sys->buf, len, start_lsn, 0);
- if (end_lsn >= finish_lsn) {
+ if (innodb_log_checksums || group->is_encrypted()) {
+ ulint crc = log_block_calc_checksum_crc32(buf);
+ ulint cksum = log_block_get_checksum(buf);
+
+ DBUG_EXECUTE_IF("log_intermittent_checksum_mismatch", {
+ static int block_counter;
+ if (block_counter++ == 0) {
+ cksum = crc + 1;
+ }
+ });
+
+ if (crc != cksum) {
+ ib::error() << "Invalid log block checksum."
+ << " block: " << block_number
+ << " checkpoint no: "
+ << log_block_get_checkpoint_no(buf)
+ << " expected: " << crc
+ << " found: " << cksum;
+ goto fail;
+ }
- return;
+ if (group->is_encrypted()) {
+ log_crypt(buf, *start_lsn,
+ OS_FILE_LOG_BLOCK_SIZE, true);
+ }
}
- memset(log_sys->buf, 0, RECV_SCAN_SIZE);
-
- start_lsn = end_lsn;
+ ulint dl = log_block_get_data_len(buf);
+ if (dl < LOG_BLOCK_HDR_SIZE
+ || (dl > OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_TRL_SIZE
+ && dl != OS_FILE_LOG_BLOCK_SIZE)) {
+ recv_sys->found_corrupt_log = true;
+ goto fail;
+ }
}
-}
-/********************************************************//**
-Copies the log segment between group->recovered_lsn and recovered_lsn from the
-most up-to-date log group to group, so that it contains the latest log data. */
-static
-void
-recv_copy_group(
-/*============*/
- log_group_t* up_to_date_group, /*!< in: the most up-to-date log
- group */
- log_group_t* group, /*!< in: copy to this log
- group */
- lsn_t recovered_lsn) /*!< in: recovery succeeded up
- to this lsn */
-{
- lsn_t start_lsn;
- lsn_t end_lsn;
-
- if (group->scanned_lsn >= recovered_lsn) {
-
- return;
+ if (recv_sys->report(time(NULL))) {
+ ib::info() << "Read redo log up to LSN=" << *start_lsn;
+ service_manager_extend_timeout(INNODB_EXTEND_TIMEOUT_INTERVAL,
+ "Read redo log up to LSN=" LSN_PF,
+ *start_lsn);
}
- ut_a(RECV_SCAN_SIZE <= log_sys->buf_size);
-
- start_lsn = ut_uint64_align_down(group->scanned_lsn,
- OS_FILE_LOG_BLOCK_SIZE);
- for (;;) {
- ulint len;
-
- end_lsn = start_lsn + RECV_SCAN_SIZE;
-
- if (end_lsn > recovered_lsn) {
- end_lsn = ut_uint64_align_up(recovered_lsn,
- OS_FILE_LOG_BLOCK_SIZE);
- }
-
- log_group_read_log_seg(LOG_RECOVER, log_sys->buf,
- up_to_date_group, start_lsn, end_lsn);
-
- len = (ulint) (end_lsn - start_lsn);
-
- log_group_write_buf(group, log_sys->buf, len, start_lsn, 0);
+ if (*start_lsn != end_lsn) {
+ goto loop;
+ }
- if (end_lsn >= recovered_lsn) {
+ return(success);
+}
- return;
- }
- start_lsn = end_lsn;
- }
-}
-# endif /* UNIV_LOG_ARCHIVE */
/********************************************************//**
Copies a log segment from the most up-to-date log group to the other log
@@ -630,423 +1012,454 @@ about the latest checkpoint to the groups, and inits the fields in the group
memory structs to up-to-date values. */
static
void
-recv_synchronize_groups(
-/*====================*/
-#ifdef UNIV_LOG_ARCHIVE
- log_group_t* up_to_date_group /*!< in: the most up-to-date
- log group */
-#endif
- )
+recv_synchronize_groups()
{
- lsn_t start_lsn;
- lsn_t end_lsn;
- lsn_t recovered_lsn;
-
- recovered_lsn = recv_sys->recovered_lsn;
+ const lsn_t recovered_lsn = recv_sys->recovered_lsn;
/* Read the last recovered log block to the recovery system buffer:
the block is always incomplete */
- start_lsn = ut_uint64_align_down(recovered_lsn,
- OS_FILE_LOG_BLOCK_SIZE);
- end_lsn = ut_uint64_align_up(recovered_lsn, OS_FILE_LOG_BLOCK_SIZE);
-
- ut_a(start_lsn != end_lsn);
-
- log_group_read_log_seg(LOG_RECOVER, recv_sys->last_block,
-#ifdef UNIV_LOG_ARCHIVE
- up_to_date_group,
-#else /* UNIV_LOG_ARCHIVE */
- UT_LIST_GET_FIRST(log_sys->log_groups),
-#endif /* UNIV_LOG_ARCHIVE */
- start_lsn, end_lsn);
-
- for (log_group_t* group = UT_LIST_GET_FIRST(log_sys->log_groups);
- group;
- group = UT_LIST_GET_NEXT(log_groups, group)) {
-#ifdef UNIV_LOG_ARCHIVE
- if (group != up_to_date_group) {
-
- /* Copy log data if needed */
+ lsn_t start_lsn = ut_uint64_align_down(recovered_lsn,
+ OS_FILE_LOG_BLOCK_SIZE);
+ log_group_read_log_seg(log_sys->buf, &log_sys->log,
+ &start_lsn, start_lsn + OS_FILE_LOG_BLOCK_SIZE);
- recv_copy_group(group, up_to_date_group,
- recovered_lsn);
- }
-#endif /* UNIV_LOG_ARCHIVE */
- /* Update the fields in the group struct to correspond to
- recovered_lsn */
+ /* Update the fields in the group struct to correspond to
+ recovered_lsn */
- log_group_set_fields(group, recovered_lsn);
- ut_a(log_sys);
+ log_group_set_fields(&log_sys->log, recovered_lsn);
- }
- /* Copy the checkpoint info to the groups; remember that we have
+ /* Copy the checkpoint info to the log; remember that we have
incremented checkpoint_no by one, and the info will not be written
over the max checkpoint info, thus making the preservation of max
checkpoint info on disk certain */
- log_groups_write_checkpoint_info();
-
- mutex_exit(&(log_sys->mutex));
-
- /* Wait for the checkpoint write to complete */
- rw_lock_s_lock(&(log_sys->checkpoint_lock));
- rw_lock_s_unlock(&(log_sys->checkpoint_lock));
-
- mutex_enter(&(log_sys->mutex));
+ if (!srv_read_only_mode) {
+ log_write_checkpoint_info(true, 0);
+ log_mutex_enter();
+ }
}
-#endif /* !UNIV_HOTBACKUP */
-/***********************************************************************//**
-Checks the consistency of the checkpoint info
-@return TRUE if ok */
+/** Check the consistency of a log header block.
+@param[in] log header block
+@return true if ok */
static
-ibool
-recv_check_cp_is_consistent(
-/*========================*/
- const byte* buf) /*!< in: buffer containing checkpoint info */
+bool
+recv_check_log_header_checksum(
+ const byte* buf)
{
- ulint fold;
-
- fold = ut_fold_binary(buf, LOG_CHECKPOINT_CHECKSUM_1);
-
- if ((fold & 0xFFFFFFFFUL) != mach_read_from_4(
- buf + LOG_CHECKPOINT_CHECKSUM_1)) {
- return(FALSE);
- }
-
- fold = ut_fold_binary(buf + LOG_CHECKPOINT_LSN,
- LOG_CHECKPOINT_CHECKSUM_2 - LOG_CHECKPOINT_LSN);
-
- if ((fold & 0xFFFFFFFFUL) != mach_read_from_4(
- buf + LOG_CHECKPOINT_CHECKSUM_2)) {
- return(FALSE);
- }
-
- return(TRUE);
+ return(log_block_get_checksum(buf)
+ == log_block_calc_checksum_crc32(buf));
}
-#ifndef UNIV_HOTBACKUP
-/********************************************************//**
-Looks for the maximum consistent checkpoint from the log groups.
-@return error code or DB_SUCCESS */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
+/** Find the latest checkpoint in the format-0 log header.
+@param[out] max_group log group, or NULL
+@param[out] max_field LOG_CHECKPOINT_1 or LOG_CHECKPOINT_2
+@return error code or DB_SUCCESS */
+static MY_ATTRIBUTE((warn_unused_result))
dberr_t
-recv_find_max_checkpoint(
-/*=====================*/
- log_group_t** max_group, /*!< out: max group */
- ulint* max_field) /*!< out: LOG_CHECKPOINT_1 or
- LOG_CHECKPOINT_2 */
+recv_find_max_checkpoint_0(log_group_t** max_group, ulint* max_field)
{
- log_group_t* group;
- ib_uint64_t max_no;
+ log_group_t* group = &log_sys->log;
+ ib_uint64_t max_no = 0;
ib_uint64_t checkpoint_no;
- ulint field;
- byte* buf;
+ byte* buf = log_sys->checkpoint_buf;
- group = UT_LIST_GET_FIRST(log_sys->log_groups);
+ ut_ad(group->format == 0);
- max_no = 0;
- *max_group = NULL;
- *max_field = 0;
+ /** Offset of the first checkpoint checksum */
+ static const uint CHECKSUM_1 = 288;
+ /** Offset of the second checkpoint checksum */
+ static const uint CHECKSUM_2 = CHECKSUM_1 + 4;
+ /** Most significant bits of the checkpoint offset */
+ static const uint OFFSET_HIGH32 = CHECKSUM_2 + 12;
+ /** Least significant bits of the checkpoint offset */
+ static const uint OFFSET_LOW32 = 16;
- buf = log_sys->checkpoint_buf;
+ *max_group = NULL;
- while (group) {
- group->state = LOG_GROUP_CORRUPTED;
+ for (ulint field = LOG_CHECKPOINT_1; field <= LOG_CHECKPOINT_2;
+ field += LOG_CHECKPOINT_2 - LOG_CHECKPOINT_1) {
+ log_group_header_read(group, field);
+
+ if (static_cast<uint32_t>(ut_fold_binary(buf, CHECKSUM_1))
+ != mach_read_from_4(buf + CHECKSUM_1)
+ || static_cast<uint32_t>(
+ ut_fold_binary(buf + LOG_CHECKPOINT_LSN,
+ CHECKSUM_2 - LOG_CHECKPOINT_LSN))
+ != mach_read_from_4(buf + CHECKSUM_2)) {
+ DBUG_LOG("ib_log",
+ "invalid pre-10.2.2 checkpoint " << field);
+ continue;
+ }
- for (field = LOG_CHECKPOINT_1; field <= LOG_CHECKPOINT_2;
- field += LOG_CHECKPOINT_2 - LOG_CHECKPOINT_1) {
+ checkpoint_no = mach_read_from_8(
+ buf + LOG_CHECKPOINT_NO);
- log_group_read_checkpoint_info(group, field);
+ if (!log_crypt_101_read_checkpoint(buf)) {
+ ib::error() << "Decrypting checkpoint failed";
+ continue;
+ }
- if (!recv_check_cp_is_consistent(buf)) {
-#ifdef UNIV_DEBUG
- if (log_debug_writes) {
- fprintf(stderr,
- "InnoDB: Checkpoint in group"
- " %lu at %lu invalid, %lu\n",
- (ulong) group->id,
- (ulong) field,
- (ulong) mach_read_from_4(
- buf
- + LOG_CHECKPOINT_CHECKSUM_1));
+ DBUG_PRINT("ib_log",
+ ("checkpoint " UINT64PF " at " LSN_PF " found",
+ checkpoint_no,
+ mach_read_from_8(buf + LOG_CHECKPOINT_LSN)));
- }
-#endif /* UNIV_DEBUG */
- goto not_consistent;
- }
+ if (checkpoint_no >= max_no) {
+ *max_group = group;
+ *max_field = field;
+ max_no = checkpoint_no;
group->state = LOG_GROUP_OK;
group->lsn = mach_read_from_8(
buf + LOG_CHECKPOINT_LSN);
- group->lsn_offset = mach_read_from_4(
- buf + LOG_CHECKPOINT_OFFSET_LOW32);
- group->lsn_offset |= ((lsn_t) mach_read_from_4(
- buf + LOG_CHECKPOINT_OFFSET_HIGH32)) << 32;
- checkpoint_no = mach_read_from_8(
- buf + LOG_CHECKPOINT_NO);
-
- if (!log_crypt_read_checkpoint_buf(buf)) {
- return DB_ERROR;
- }
-
-#ifdef UNIV_DEBUG
- if (log_debug_writes) {
- fprintf(stderr,
- "InnoDB: Checkpoint number %lu"
- " found in group %lu\n",
- (ulong) checkpoint_no,
- (ulong) group->id);
- }
-#endif /* UNIV_DEBUG */
-
- if (checkpoint_no >= max_no) {
- *max_group = group;
- *max_field = field;
- max_no = checkpoint_no;
- }
-
-not_consistent:
- ;
+ group->lsn_offset = static_cast<ib_uint64_t>(
+ mach_read_from_4(buf + OFFSET_HIGH32)) << 32
+ | mach_read_from_4(buf + OFFSET_LOW32);
}
-
- group = UT_LIST_GET_NEXT(log_groups, group);
}
- if (*max_group == NULL) {
-
- fprintf(stderr,
- "InnoDB: No valid checkpoint found.\n"
- "InnoDB: A downgrade from MariaDB 10.2.2"
- " or later is not supported.\n"
- "InnoDB: If this error appears when you are"
- " creating an InnoDB database,\n"
- "InnoDB: the problem may be that during"
- " an earlier attempt you managed\n"
- "InnoDB: to create the InnoDB data files,"
- " but log file creation failed.\n"
- "InnoDB: If that is the case, please refer to\n"
- "InnoDB: " REFMAN "error-creating-innodb.html\n");
- return(DB_ERROR);
+ if (*max_group != NULL) {
+ return(DB_SUCCESS);
}
- return(DB_SUCCESS);
+ ib::error() << "Upgrade after a crash is not supported."
+ " This redo log was created before MariaDB 10.2.2,"
+ " and we did not find a valid checkpoint."
+ " Please follow the instructions at"
+ " https://mariadb.com/kb/en/library/upgrading/";
+ return(DB_ERROR);
}
-#else /* !UNIV_HOTBACKUP */
-/*******************************************************************//**
-Reads the checkpoint info needed in hot backup.
-@return TRUE if success */
-UNIV_INTERN
-ibool
-recv_read_checkpoint_info_for_backup(
-/*=================================*/
- const byte* hdr, /*!< in: buffer containing the log group
- header */
- lsn_t* lsn, /*!< out: checkpoint lsn */
- lsn_t* offset, /*!< out: checkpoint offset in the log group */
- lsn_t* cp_no, /*!< out: checkpoint number */
- lsn_t* first_header_lsn)
- /*!< out: lsn of of the start of the
- first log file */
-{
- ulint max_cp = 0;
- ib_uint64_t max_cp_no = 0;
- const byte* cp_buf;
- cp_buf = hdr + LOG_CHECKPOINT_1;
+/** Determine if a pre-MySQL 5.7.9/MariaDB 10.2.2 redo log is clean.
+@param[in] lsn checkpoint LSN
+@param[in] crypt whether the log might be encrypted
+@return error code
+@retval DB_SUCCESS if the redo log is clean
+@retval DB_ERROR if the redo log is corrupted or dirty */
+static dberr_t recv_log_format_0_recover(lsn_t lsn, bool crypt)
+{
+ log_mutex_enter();
+ log_group_t* group = &log_sys->log;
+ const lsn_t source_offset
+ = log_group_calc_lsn_offset(lsn, group);
+ log_mutex_exit();
+ const ulint page_no
+ = (ulint) (source_offset / univ_page_size.physical());
+ byte* buf = log_sys->buf;
+
+ static const char* NO_UPGRADE_RECOVERY_MSG =
+ "Upgrade after a crash is not supported."
+ " This redo log was created before MariaDB 10.2.2";
+ static const char* NO_UPGRADE_RTFM_MSG =
+ ". Please follow the instructions at "
+ "https://mariadb.com/kb/en/library/upgrading/";
+
+ fil_io(IORequestLogRead, true,
+ page_id_t(SRV_LOG_SPACE_FIRST_ID, page_no),
+ univ_page_size,
+ (ulint) ((source_offset & ~(OS_FILE_LOG_BLOCK_SIZE - 1))
+ % univ_page_size.physical()),
+ OS_FILE_LOG_BLOCK_SIZE, buf, NULL);
+
+ if (log_block_calc_checksum_format_0(buf)
+ != log_block_get_checksum(buf)
+ && !log_crypt_101_read_block(buf)) {
+ ib::error() << NO_UPGRADE_RECOVERY_MSG
+ << ", and it appears corrupted"
+ << NO_UPGRADE_RTFM_MSG;
+ return(DB_CORRUPTION);
+ }
- if (recv_check_cp_is_consistent(cp_buf)) {
- max_cp_no = mach_read_from_8(cp_buf + LOG_CHECKPOINT_NO);
- max_cp = LOG_CHECKPOINT_1;
+ if (log_block_get_data_len(buf)
+ == (source_offset & (OS_FILE_LOG_BLOCK_SIZE - 1))) {
+ } else if (crypt) {
+ ib::error() << "Cannot decrypt log for upgrading."
+ " The encrypted log was created before MariaDB 10.2.2"
+ << NO_UPGRADE_RTFM_MSG;
+ return DB_ERROR;
+ } else {
+ ib::error() << NO_UPGRADE_RECOVERY_MSG
+ << NO_UPGRADE_RTFM_MSG;
+ return(DB_ERROR);
}
- cp_buf = hdr + LOG_CHECKPOINT_2;
+ /* Mark the redo log for upgrading. */
+ srv_log_file_size = 0;
+ recv_sys->parse_start_lsn = recv_sys->recovered_lsn
+ = recv_sys->scanned_lsn
+ = recv_sys->mlog_checkpoint_lsn = lsn;
+ log_sys->last_checkpoint_lsn = log_sys->next_checkpoint_lsn
+ = log_sys->lsn = log_sys->write_lsn
+ = log_sys->current_flush_lsn = log_sys->flushed_to_disk_lsn
+ = lsn;
+ log_sys->next_checkpoint_no = 0;
+ return(DB_SUCCESS);
+}
- if (recv_check_cp_is_consistent(cp_buf)) {
- if (mach_read_from_8(cp_buf + LOG_CHECKPOINT_NO) > max_cp_no) {
- max_cp = LOG_CHECKPOINT_2;
- }
+/** Determine if a redo log from MariaDB 10.3 is clean.
+@return error code
+@retval DB_SUCCESS if the redo log is clean
+@retval DB_CORRUPTION if the redo log is corrupted
+@retval DB_ERROR if the redo log is not empty */
+static
+dberr_t
+recv_log_recover_10_3()
+{
+ log_group_t* group = &log_sys->log;
+ const lsn_t lsn = group->lsn;
+ const lsn_t source_offset = log_group_calc_lsn_offset(lsn, group);
+ const ulint page_no
+ = (ulint) (source_offset / univ_page_size.physical());
+ byte* buf = log_sys->buf;
+
+ fil_io(IORequestLogRead, true,
+ page_id_t(SRV_LOG_SPACE_FIRST_ID, page_no),
+ univ_page_size,
+ (ulint) ((source_offset & ~(OS_FILE_LOG_BLOCK_SIZE - 1))
+ % univ_page_size.physical()),
+ OS_FILE_LOG_BLOCK_SIZE, buf, NULL);
+
+ if (log_block_calc_checksum(buf) != log_block_get_checksum(buf)) {
+ return(DB_CORRUPTION);
}
- if (max_cp == 0) {
- return(FALSE);
+ if (group->is_encrypted()) {
+ log_crypt(buf, lsn, OS_FILE_LOG_BLOCK_SIZE, true);
}
- cp_buf = hdr + max_cp;
-
- *lsn = mach_read_from_8(cp_buf + LOG_CHECKPOINT_LSN);
- *offset = mach_read_from_4(
- cp_buf + LOG_CHECKPOINT_OFFSET_LOW32);
- *offset |= ((lsn_t) mach_read_from_4(
- cp_buf + LOG_CHECKPOINT_OFFSET_HIGH32)) << 32;
+ /* On a clean shutdown, the redo log will be logically empty
+ after the checkpoint lsn. */
- *cp_no = mach_read_from_8(cp_buf + LOG_CHECKPOINT_NO);
-
- *first_header_lsn = mach_read_from_8(hdr + LOG_FILE_START_LSN);
+ if (log_block_get_data_len(buf)
+ != (source_offset & (OS_FILE_LOG_BLOCK_SIZE - 1))) {
+ return(DB_ERROR);
+ }
- return(TRUE);
+ /* Mark the redo log for downgrading. */
+ srv_log_file_size = 0;
+ recv_sys->parse_start_lsn = recv_sys->recovered_lsn
+ = recv_sys->scanned_lsn
+ = recv_sys->mlog_checkpoint_lsn = lsn;
+ log_sys->last_checkpoint_lsn = log_sys->next_checkpoint_lsn
+ = log_sys->lsn = log_sys->write_lsn
+ = log_sys->current_flush_lsn = log_sys->flushed_to_disk_lsn
+ = lsn;
+ log_sys->next_checkpoint_no = 0;
+ return(DB_SUCCESS);
}
-#endif /* !UNIV_HOTBACKUP */
-
-/******************************************************//**
-Checks the 4-byte checksum to the trailer checksum field of a log
-block. We also accept a log block in the old format before
-InnoDB-3.23.52 where the checksum field contains the log block number.
-@return TRUE if ok, or if the log block may be in the format of InnoDB
-version predating 3.23.52 */
-ibool
-log_block_checksum_is_ok_or_old_format(
-/*===================================*/
- const byte* block, /*!< in: pointer to a log block */
- bool print_err) /*!< in print error ? */
+
+/** Find the latest checkpoint in the log header.
+@param[out] max_field LOG_CHECKPOINT_1 or LOG_CHECKPOINT_2
+@return error code or DB_SUCCESS */
+dberr_t
+recv_find_max_checkpoint(ulint* max_field)
{
-#ifdef UNIV_LOG_DEBUG
- return(TRUE);
-#endif /* UNIV_LOG_DEBUG */
- if (log_block_calc_checksum(block) == log_block_get_checksum(block)) {
+ log_group_t* group;
+ ib_uint64_t max_no;
+ ib_uint64_t checkpoint_no;
+ ulint field;
+ byte* buf;
- return(TRUE);
- }
+ group = &log_sys->log;
+
+ max_no = 0;
+ *max_field = 0;
- if (log_block_get_hdr_no(block) == log_block_get_checksum(block)) {
+ buf = log_sys->checkpoint_buf;
- /* We assume the log block is in the format of
- InnoDB version < 3.23.52 and the block is ok */
-#if 0
- fprintf(stderr,
- "InnoDB: Scanned old format < InnoDB-3.23.52"
- " log block number %lu\n",
- log_block_get_hdr_no(block));
-#endif
- return(TRUE);
+ group->state = LOG_GROUP_CORRUPTED;
+
+ log_group_header_read(group, 0);
+ /* Check the header page checksum. There was no
+ checksum in the first redo log format (version 0). */
+ group->format = mach_read_from_4(buf + LOG_HEADER_FORMAT);
+ group->subformat = group->format
+ ? mach_read_from_4(buf + LOG_HEADER_SUBFORMAT)
+ : 0;
+ if (group->format != 0
+ && !recv_check_log_header_checksum(buf)) {
+ ib::error() << "Invalid redo log header checksum.";
+ return(DB_CORRUPTION);
}
- if (print_err) {
- fprintf(stderr, "BROKEN: block: %lu checkpoint: %lu %.8lx %.8lx\n",
- log_block_get_hdr_no(block),
- log_block_get_checkpoint_no(block),
- log_block_calc_checksum(block),
- log_block_get_checksum(block));
+ char creator[LOG_HEADER_CREATOR_END - LOG_HEADER_CREATOR + 1];
+
+ memcpy(creator, buf + LOG_HEADER_CREATOR, sizeof creator);
+ /* Ensure that the string is NUL-terminated. */
+ creator[LOG_HEADER_CREATOR_END - LOG_HEADER_CREATOR] = 0;
+
+ switch (group->format) {
+ case 0:
+ return(recv_find_max_checkpoint_0(&group, max_field));
+ case LOG_HEADER_FORMAT_10_2:
+ case LOG_HEADER_FORMAT_10_2 | LOG_HEADER_FORMAT_ENCRYPTED:
+ case LOG_HEADER_FORMAT_10_3:
+ case LOG_HEADER_FORMAT_10_3 | LOG_HEADER_FORMAT_ENCRYPTED:
+ case LOG_HEADER_FORMAT_10_4:
+ /* We can only parse the unencrypted LOG_HEADER_FORMAT_10_4.
+ The encrypted format uses a larger redo log block trailer. */
+ break;
+ default:
+ ib::error() << "Unsupported redo log format."
+ " The redo log was created"
+ " with " << creator <<
+ ". Please follow the instructions at "
+ "https://mariadb.com/kb/en/library/upgrading/";
+ /* Do not issue a message about a possibility
+ to cleanly shut down the newer server version
+ and to remove the redo logs, because the
+ format of the system data structures may
+ radically change after MySQL 5.7. */
+ return(DB_ERROR);
}
- return(FALSE);
-}
+ for (field = LOG_CHECKPOINT_1; field <= LOG_CHECKPOINT_2;
+ field += LOG_CHECKPOINT_2 - LOG_CHECKPOINT_1) {
-#ifdef UNIV_HOTBACKUP
-/*******************************************************************//**
-Scans the log segment and n_bytes_scanned is set to the length of valid
-log scanned. */
-UNIV_INTERN
-void
-recv_scan_log_seg_for_backup(
-/*=========================*/
- byte* buf, /*!< in: buffer containing log data */
- ulint buf_len, /*!< in: data length in that buffer */
- lsn_t* scanned_lsn, /*!< in/out: lsn of buffer start,
- we return scanned lsn */
- ulint* scanned_checkpoint_no,
- /*!< in/out: 4 lowest bytes of the
- highest scanned checkpoint number so
- far */
- ulint* n_bytes_scanned)/*!< out: how much we were able to
- scan, smaller than buf_len if log
- data ended here */
-{
- ulint data_len;
- byte* log_block;
- ulint no;
-
- *n_bytes_scanned = 0;
+ log_group_header_read(group, field);
- for (log_block = buf; log_block < buf + buf_len;
- log_block += OS_FILE_LOG_BLOCK_SIZE) {
+ const ulint crc32 = log_block_calc_checksum_crc32(buf);
+ const ulint cksum = log_block_get_checksum(buf);
- no = log_block_get_hdr_no(log_block);
-
-#if 0
- fprintf(stderr, "Log block header no %lu\n", no);
-#endif
-
- if (no != log_block_convert_lsn_to_no(*scanned_lsn)
- || !log_block_checksum_is_ok_or_old_format(log_block)) {
-#if 0
- fprintf(stderr,
- "Log block n:o %lu, scanned lsn n:o %lu\n",
- no, log_block_convert_lsn_to_no(*scanned_lsn));
-#endif
- /* Garbage or an incompletely written log block */
-
- log_block += OS_FILE_LOG_BLOCK_SIZE;
-#if 0
- fprintf(stderr,
- "Next log block n:o %lu\n",
- log_block_get_hdr_no(log_block));
-#endif
- break;
+ if (crc32 != cksum) {
+ DBUG_PRINT("ib_log",
+ ("invalid checkpoint,"
+ " at " ULINTPF
+ ", checksum " ULINTPFx
+ " expected " ULINTPFx,
+ field, cksum, crc32));
+ continue;
}
- if (*scanned_checkpoint_no > 0
- && log_block_get_checkpoint_no(log_block)
- < *scanned_checkpoint_no
- && *scanned_checkpoint_no
- - log_block_get_checkpoint_no(log_block)
- > 0x80000000UL) {
-
- /* Garbage from a log buffer flush which was made
- before the most recent database recovery */
-#if 0
- fprintf(stderr,
- "Scanned cp n:o %lu, block cp n:o %lu\n",
- *scanned_checkpoint_no,
- log_block_get_checkpoint_no(log_block));
-#endif
- break;
+ if (group->is_encrypted()
+ && !log_crypt_read_checkpoint_buf(buf)) {
+ ib::error() << "Reading checkpoint"
+ " encryption info failed.";
+ continue;
}
- data_len = log_block_get_data_len(log_block);
+ checkpoint_no = mach_read_from_8(
+ buf + LOG_CHECKPOINT_NO);
- *scanned_checkpoint_no
- = log_block_get_checkpoint_no(log_block);
- *scanned_lsn += data_len;
+ DBUG_PRINT("ib_log",
+ ("checkpoint " UINT64PF " at " LSN_PF " found",
+ checkpoint_no, mach_read_from_8(
+ buf + LOG_CHECKPOINT_LSN)));
- *n_bytes_scanned += data_len;
+ if (checkpoint_no >= max_no) {
+ *max_field = field;
+ max_no = checkpoint_no;
+ group->state = LOG_GROUP_OK;
+ group->lsn = mach_read_from_8(
+ buf + LOG_CHECKPOINT_LSN);
+ group->lsn_offset = mach_read_from_8(
+ buf + LOG_CHECKPOINT_OFFSET);
+ log_sys->next_checkpoint_no = checkpoint_no;
+ }
+ }
- if (data_len < OS_FILE_LOG_BLOCK_SIZE) {
- /* Log data ends here */
+ if (*max_field == 0) {
+ /* Before 10.2.2, we could get here during database
+ initialization if we created an ib_logfile0 file that
+ was filled with zeroes, and were killed. After
+ 10.2.2, we would reject such a file already earlier,
+ when checking the file header. */
+ ib::error() << "No valid checkpoint found"
+ " (corrupted redo log)."
+ " You can try --innodb-force-recovery=6"
+ " as a last resort.";
+ return(DB_ERROR);
+ }
-#if 0
- fprintf(stderr, "Log block data len %lu\n",
- data_len);
-#endif
+ switch (group->format) {
+ case LOG_HEADER_FORMAT_10_3:
+ case LOG_HEADER_FORMAT_10_3 | LOG_HEADER_FORMAT_ENCRYPTED:
+ if (group->subformat == 1) {
+ /* 10.2 with new crash-safe TRUNCATE */
break;
}
+ /* fall through */
+ case LOG_HEADER_FORMAT_10_4:
+ if (srv_operation == SRV_OPERATION_BACKUP) {
+ ib::error()
+ << "Incompatible redo log format."
+ " The redo log was created with " << creator;
+ return DB_ERROR;
+ }
+ dberr_t err = recv_log_recover_10_3();
+ if (err != DB_SUCCESS) {
+ ib::error()
+ << "Downgrade after a crash is not supported."
+ " The redo log was created with " << creator
+ << (err == DB_ERROR
+ ? "." : ", and it appears corrupted.");
+ }
+ return(err);
}
+
+ return(DB_SUCCESS);
}
-#endif /* UNIV_HOTBACKUP */
-/*******************************************************************//**
-Tries to parse a single log record body and also applies it to a page if
-specified. File ops are parsed, but not applied in this function.
-@return log record end, NULL if not a complete record */
+/** Try to parse a single log record body and also applies it if
+specified.
+@param[in] type redo log entry type
+@param[in] ptr redo log record body
+@param[in] end_ptr end of buffer
+@param[in] space_id tablespace identifier
+@param[in] page_no page number
+@param[in] apply whether to apply the record
+@param[in,out] block buffer block, or NULL if
+a page log record should not be applied
+or if it is a MLOG_FILE_ operation
+@param[in,out] mtr mini-transaction, or NULL if
+a page log record should not be applied
+@return log record end, NULL if not a complete record */
static
byte*
recv_parse_or_apply_log_rec_body(
-/*=============================*/
- byte type, /*!< in: type */
- byte* ptr, /*!< in: pointer to a buffer */
- byte* end_ptr,/*!< in: pointer to the buffer end */
- buf_block_t* block, /*!< in/out: buffer block or NULL; if
- not NULL, then the log record is
- applied to the page, and the log
- record should be complete then */
- mtr_t* mtr, /*!< in: mtr or NULL; should be non-NULL
- if and only if block is non-NULL */
- ulint space_id)
- /*!< in: tablespace id obtained by
- parsing initial log record */
+ mlog_id_t type,
+ byte* ptr,
+ byte* end_ptr,
+ ulint space_id,
+ ulint page_no,
+ bool apply,
+ buf_block_t* block,
+ mtr_t* mtr)
{
+ ut_ad(!block == !mtr);
+ ut_ad(!apply || recv_sys->mlog_checkpoint_lsn != 0);
+
+ switch (type) {
+ case MLOG_FILE_NAME:
+ case MLOG_FILE_DELETE:
+ case MLOG_FILE_CREATE2:
+ case MLOG_FILE_RENAME2:
+ ut_ad(block == NULL);
+ /* Collect the file names when parsing the log,
+ before applying any log records. */
+ return(fil_name_parse(ptr, end_ptr, space_id, page_no, type,
+ apply));
+ case MLOG_INDEX_LOAD:
+ if (end_ptr < ptr + 8) {
+ return(NULL);
+ }
+ return(ptr + 8);
+ case MLOG_TRUNCATE:
+ if (log_truncate) {
+ ut_ad(srv_operation != SRV_OPERATION_NORMAL);
+ log_truncate();
+ recv_sys->found_corrupt_fs = true;
+ return NULL;
+ }
+ return(truncate_t::parse_redo_entry(ptr, end_ptr, space_id));
+
+ default:
+ break;
+ }
+
dict_index_t* index = NULL;
page_t* page;
page_zip_des_t* page_zip;
@@ -1054,18 +1467,41 @@ recv_parse_or_apply_log_rec_body(
ulint page_type;
#endif /* UNIV_DEBUG */
- ut_ad(!block == !mtr);
-
if (block) {
+ /* Applying a page log record. */
+ ut_ad(apply);
page = block->frame;
page_zip = buf_block_get_page_zip(block);
ut_d(page_type = fil_page_get_type(page));
+ } else if (apply
+ && !is_predefined_tablespace(space_id)
+ && recv_spaces.find(space_id) == recv_spaces.end()) {
+ if (recv_sys->recovered_lsn < recv_sys->mlog_checkpoint_lsn) {
+ /* We have not seen all records between the
+ checkpoint and MLOG_CHECKPOINT. There should be
+ a MLOG_FILE_DELETE for this tablespace later. */
+ recv_spaces.insert(
+ std::make_pair(space_id,
+ file_name_t("", false)));
+ goto parse_log;
+ }
+
+ ib::error() << "Missing MLOG_FILE_NAME or MLOG_FILE_DELETE"
+ " for redo log record " << type << " (page "
+ << space_id << ":" << page_no << ") at "
+ << recv_sys->recovered_lsn << ".";
+ recv_sys->found_corrupt_log = true;
+ return(NULL);
} else {
+parse_log:
+ /* Parsing a page log record. */
page = NULL;
page_zip = NULL;
ut_d(page_type = FIL_PAGE_TYPE_ALLOCATED);
}
+ const byte* old_ptr = ptr;
+
switch (type) {
#ifdef UNIV_LOG_LSN_DEBUG
case MLOG_LSN:
@@ -1073,11 +1509,122 @@ recv_parse_or_apply_log_rec_body(
break;
#endif /* UNIV_LOG_LSN_DEBUG */
case MLOG_1BYTE: case MLOG_2BYTES: case MLOG_4BYTES: case MLOG_8BYTES:
- /* Note that crypt data can be set to empty page */
+#ifdef UNIV_DEBUG
+ if (page && page_type == FIL_PAGE_TYPE_ALLOCATED
+ && end_ptr >= ptr + 2) {
+ /* It is OK to set FIL_PAGE_TYPE and certain
+ list node fields on an empty page. Any other
+ write is not OK. */
+
+ /* NOTE: There may be bogus assertion failures for
+ dict_hdr_create(), trx_rseg_header_create(),
+ trx_sys_create_doublewrite_buf(), and
+ trx_sysf_create().
+ These are only called during database creation. */
+ ulint offs = mach_read_from_2(ptr);
+
+ switch (type) {
+ default:
+ ut_error;
+ case MLOG_2BYTES:
+ /* Note that this can fail when the
+ redo log been written with something
+ older than InnoDB Plugin 1.0.4. */
+ ut_ad(offs == FIL_PAGE_TYPE
+ || srv_is_undo_tablespace(space_id)
+ || offs == IBUF_TREE_SEG_HEADER
+ + IBUF_HEADER + FSEG_HDR_OFFSET
+ || offs == PAGE_BTR_IBUF_FREE_LIST
+ + PAGE_HEADER + FIL_ADDR_BYTE
+ || offs == PAGE_BTR_IBUF_FREE_LIST
+ + PAGE_HEADER + FIL_ADDR_BYTE
+ + FIL_ADDR_SIZE
+ || offs == PAGE_BTR_SEG_LEAF
+ + PAGE_HEADER + FSEG_HDR_OFFSET
+ || offs == PAGE_BTR_SEG_TOP
+ + PAGE_HEADER + FSEG_HDR_OFFSET
+ || offs == PAGE_BTR_IBUF_FREE_LIST_NODE
+ + PAGE_HEADER + FIL_ADDR_BYTE
+ + 0 /*FLST_PREV*/
+ || offs == PAGE_BTR_IBUF_FREE_LIST_NODE
+ + PAGE_HEADER + FIL_ADDR_BYTE
+ + FIL_ADDR_SIZE /*FLST_NEXT*/);
+ break;
+ case MLOG_4BYTES:
+ /* Note that this can fail when the
+ redo log been written with something
+ older than InnoDB Plugin 1.0.4. */
+ ut_ad(0
+ /* fil_crypt_rotate_page() writes this */
+ || offs == FIL_PAGE_SPACE_ID
+ || srv_is_undo_tablespace(space_id)
+ || offs == IBUF_TREE_SEG_HEADER
+ + IBUF_HEADER + FSEG_HDR_SPACE
+ || offs == IBUF_TREE_SEG_HEADER
+ + IBUF_HEADER + FSEG_HDR_PAGE_NO
+ || offs == PAGE_BTR_IBUF_FREE_LIST
+ + PAGE_HEADER/* flst_init */
+ || offs == PAGE_BTR_IBUF_FREE_LIST
+ + PAGE_HEADER + FIL_ADDR_PAGE
+ || offs == PAGE_BTR_IBUF_FREE_LIST
+ + PAGE_HEADER + FIL_ADDR_PAGE
+ + FIL_ADDR_SIZE
+ || offs == PAGE_BTR_SEG_LEAF
+ + PAGE_HEADER + FSEG_HDR_PAGE_NO
+ || offs == PAGE_BTR_SEG_LEAF
+ + PAGE_HEADER + FSEG_HDR_SPACE
+ || offs == PAGE_BTR_SEG_TOP
+ + PAGE_HEADER + FSEG_HDR_PAGE_NO
+ || offs == PAGE_BTR_SEG_TOP
+ + PAGE_HEADER + FSEG_HDR_SPACE
+ || offs == PAGE_BTR_IBUF_FREE_LIST_NODE
+ + PAGE_HEADER + FIL_ADDR_PAGE
+ + 0 /*FLST_PREV*/
+ || offs == PAGE_BTR_IBUF_FREE_LIST_NODE
+ + PAGE_HEADER + FIL_ADDR_PAGE
+ + FIL_ADDR_SIZE /*FLST_NEXT*/);
+ break;
+ }
+ }
+#endif /* UNIV_DEBUG */
ptr = mlog_parse_nbytes(type, ptr, end_ptr, page, page_zip);
+ if (ptr != NULL && page != NULL
+ && page_no == 0 && type == MLOG_4BYTES) {
+ ulint offs = mach_read_from_2(old_ptr);
+ switch (offs) {
+ fil_space_t* space;
+ ulint val;
+ default:
+ break;
+ case FSP_HEADER_OFFSET + FSP_SPACE_FLAGS:
+ case FSP_HEADER_OFFSET + FSP_SIZE:
+ case FSP_HEADER_OFFSET + FSP_FREE_LIMIT:
+ case FSP_HEADER_OFFSET + FSP_FREE + FLST_LEN:
+ space = fil_space_get(space_id);
+ ut_a(space != NULL);
+ val = mach_read_from_4(page + offs);
+
+ switch (offs) {
+ case FSP_HEADER_OFFSET + FSP_SPACE_FLAGS:
+ space->flags = val;
+ break;
+ case FSP_HEADER_OFFSET + FSP_SIZE:
+ space->size_in_header = val;
+ break;
+ case FSP_HEADER_OFFSET + FSP_FREE_LIMIT:
+ space->free_limit = val;
+ break;
+ case FSP_HEADER_OFFSET + FSP_FREE + FLST_LEN:
+ space->free_len = val;
+ ut_ad(val == flst_get_len(
+ page + offs));
+ break;
+ }
+ }
+ }
break;
case MLOG_REC_INSERT: case MLOG_COMP_REC_INSERT:
- ut_ad(!page || page_type == FIL_PAGE_INDEX);
+ ut_ad(!page || fil_page_type_is_index(page_type));
if (NULL != (ptr = mlog_parse_index(
ptr, end_ptr,
@@ -1091,7 +1638,7 @@ recv_parse_or_apply_log_rec_body(
}
break;
case MLOG_REC_CLUST_DELETE_MARK: case MLOG_COMP_REC_CLUST_DELETE_MARK:
- ut_ad(!page || page_type == FIL_PAGE_INDEX);
+ ut_ad(!page || fil_page_type_is_index(page_type));
if (NULL != (ptr = mlog_parse_index(
ptr, end_ptr,
@@ -1104,24 +1651,13 @@ recv_parse_or_apply_log_rec_body(
ptr, end_ptr, page, page_zip, index);
}
break;
- case MLOG_COMP_REC_SEC_DELETE_MARK:
- ut_ad(!page || page_type == FIL_PAGE_INDEX);
- /* This log record type is obsolete, but we process it for
- backward compatibility with MySQL 5.0.3 and 5.0.4. */
- ut_a(!page || page_is_comp(page));
- ut_a(!page_zip);
- ptr = mlog_parse_index(ptr, end_ptr, TRUE, &index);
- if (!ptr) {
- break;
- }
- /* Fall through */
case MLOG_REC_SEC_DELETE_MARK:
- ut_ad(!page || page_type == FIL_PAGE_INDEX);
+ ut_ad(!page || fil_page_type_is_index(page_type));
ptr = btr_cur_parse_del_mark_set_sec_rec(ptr, end_ptr,
page, page_zip);
break;
case MLOG_REC_UPDATE_IN_PLACE: case MLOG_COMP_REC_UPDATE_IN_PLACE:
- ut_ad(!page || page_type == FIL_PAGE_INDEX);
+ ut_ad(!page || fil_page_type_is_index(page_type));
if (NULL != (ptr = mlog_parse_index(
ptr, end_ptr,
@@ -1136,7 +1672,7 @@ recv_parse_or_apply_log_rec_body(
break;
case MLOG_LIST_END_DELETE: case MLOG_COMP_LIST_END_DELETE:
case MLOG_LIST_START_DELETE: case MLOG_COMP_LIST_START_DELETE:
- ut_ad(!page || page_type == FIL_PAGE_INDEX);
+ ut_ad(!page || fil_page_type_is_index(page_type));
if (NULL != (ptr = mlog_parse_index(
ptr, end_ptr,
@@ -1151,7 +1687,7 @@ recv_parse_or_apply_log_rec_body(
}
break;
case MLOG_LIST_END_COPY_CREATED: case MLOG_COMP_LIST_END_COPY_CREATED:
- ut_ad(!page || page_type == FIL_PAGE_INDEX);
+ ut_ad(!page || fil_page_type_is_index(page_type));
if (NULL != (ptr = mlog_parse_index(
ptr, end_ptr,
@@ -1167,7 +1703,7 @@ recv_parse_or_apply_log_rec_body(
case MLOG_PAGE_REORGANIZE:
case MLOG_COMP_PAGE_REORGANIZE:
case MLOG_ZIP_PAGE_REORGANIZE:
- ut_ad(!page || page_type == FIL_PAGE_INDEX);
+ ut_ad(!page || fil_page_type_is_index(page_type));
if (NULL != (ptr = mlog_parse_index(
ptr, end_ptr,
@@ -1185,9 +1721,11 @@ recv_parse_or_apply_log_rec_body(
case MLOG_PAGE_CREATE: case MLOG_COMP_PAGE_CREATE:
/* Allow anything in page_type when creating a page. */
ut_a(!page_zip);
- ptr = page_parse_create(ptr, end_ptr,
- type == MLOG_COMP_PAGE_CREATE,
- block, mtr);
+ page_parse_create(block, type == MLOG_COMP_PAGE_CREATE, false);
+ break;
+ case MLOG_PAGE_CREATE_RTREE: case MLOG_COMP_PAGE_CREATE_RTREE:
+ page_parse_create(block, type == MLOG_COMP_PAGE_CREATE_RTREE,
+ true);
break;
case MLOG_UNDO_INSERT:
ut_ad(!page || page_type == FIL_PAGE_UNDO_LOG);
@@ -1201,10 +1739,6 @@ recv_parse_or_apply_log_rec_body(
/* Allow anything in page_type when creating a page. */
ptr = trx_undo_parse_page_init(ptr, end_ptr, page, mtr);
break;
- case MLOG_UNDO_HDR_DISCARD:
- ut_ad(!page || page_type == FIL_PAGE_UNDO_LOG);
- ptr = trx_undo_parse_discard_latest(ptr, end_ptr, page, mtr);
- break;
case MLOG_UNDO_HDR_CREATE:
case MLOG_UNDO_HDR_REUSE:
ut_ad(!page || page_type == FIL_PAGE_UNDO_LOG);
@@ -1212,7 +1746,7 @@ recv_parse_or_apply_log_rec_body(
page, mtr);
break;
case MLOG_REC_MIN_MARK: case MLOG_COMP_REC_MIN_MARK:
- ut_ad(!page || page_type == FIL_PAGE_INDEX);
+ ut_ad(!page || fil_page_type_is_index(page_type));
/* On a compressed page, MLOG_COMP_REC_MIN_MARK
will be followed by MLOG_COMP_REC_DELETE
or MLOG_ZIP_WRITE_HEADER(FIL_PAGE_PREV, FIL_NULL)
@@ -1223,7 +1757,7 @@ recv_parse_or_apply_log_rec_body(
page, mtr);
break;
case MLOG_REC_DELETE: case MLOG_COMP_REC_DELETE:
- ut_ad(!page || page_type == FIL_PAGE_INDEX);
+ ut_ad(!page || fil_page_type_is_index(page_type));
if (NULL != (ptr = mlog_parse_index(
ptr, end_ptr,
@@ -1240,51 +1774,31 @@ recv_parse_or_apply_log_rec_body(
/* Allow anything in page_type when creating a page. */
ptr = ibuf_parse_bitmap_init(ptr, end_ptr, block, mtr);
break;
- case MLOG_INIT_FILE_PAGE:
+ case MLOG_INIT_FILE_PAGE2:
/* Allow anything in page_type when creating a page. */
- ptr = fsp_parse_init_file_page(ptr, end_ptr, block);
+ if (block) fsp_apply_init_file_page(block);
break;
case MLOG_WRITE_STRING:
- /* Allow setting crypt_data also for empty page */
ptr = mlog_parse_string(ptr, end_ptr, page, page_zip);
break;
- case MLOG_FILE_RENAME:
- /* Do not rerun file-based log entries if this is
- IO completion from a page read. */
- if (page == NULL) {
- ptr = fil_op_log_parse_or_replay(ptr, end_ptr, type,
- space_id, 0);
- }
- break;
- case MLOG_FILE_CREATE:
- case MLOG_FILE_DELETE:
- case MLOG_FILE_CREATE2:
- /* Do not rerun file-based log entries if this is
- IO completion from a page read. */
- if (page == NULL) {
- ptr = fil_op_log_parse_or_replay(ptr, end_ptr,
- type, 0, 0);
- }
- break;
case MLOG_ZIP_WRITE_NODE_PTR:
- ut_ad(!page || page_type == FIL_PAGE_INDEX);
+ ut_ad(!page || fil_page_type_is_index(page_type));
ptr = page_zip_parse_write_node_ptr(ptr, end_ptr,
page, page_zip);
break;
case MLOG_ZIP_WRITE_BLOB_PTR:
- ut_ad(!page || page_type == FIL_PAGE_INDEX);
+ ut_ad(!page || fil_page_type_is_index(page_type));
ptr = page_zip_parse_write_blob_ptr(ptr, end_ptr,
page, page_zip);
break;
case MLOG_ZIP_WRITE_HEADER:
- ut_ad(!page || page_type == FIL_PAGE_INDEX);
+ ut_ad(!page || fil_page_type_is_index(page_type));
ptr = page_zip_parse_write_header(ptr, end_ptr,
page, page_zip);
break;
case MLOG_ZIP_PAGE_COMPRESS:
/* Allow anything in page_type when creating a page. */
- ptr = page_zip_parse_compress(ptr, end_ptr,
- page, page_zip);
+ ptr = page_zip_parse_compress(ptr, end_ptr, block);
break;
case MLOG_ZIP_PAGE_COMPRESS_NO_DATA:
if (NULL != (ptr = mlog_parse_index(
@@ -1306,7 +1820,10 @@ recv_parse_or_apply_log_rec_body(
break;
default:
ptr = NULL;
- recv_sys->found_corrupt_log = TRUE;
+ ib::error() << "Incorrect log record type "
+ << ib::hex(unsigned(type));
+
+ recv_sys->found_corrupt_log = true;
}
if (index) {
@@ -1322,7 +1839,7 @@ recv_parse_or_apply_log_rec_body(
/*********************************************************************//**
Calculates the fold value of a page file address: used in inserting or
searching for a log record in the hash table.
-@return folded value */
+@return folded value */
UNIV_INLINE
ulint
recv_fold(
@@ -1336,7 +1853,7 @@ recv_fold(
/*********************************************************************//**
Calculates the hash value of a page file address: used in inserting or
searching for a log record in the hash table.
-@return folded value */
+@return folded value */
UNIV_INLINE
ulint
recv_hash(
@@ -1349,7 +1866,7 @@ recv_hash(
/*********************************************************************//**
Gets the hashed file address struct for a page.
-@return file address struct, NULL if not found from the hash table */
+@return file address struct, NULL if not found from the hash table */
static
recv_addr_t*
recv_get_fil_addr_struct(
@@ -1357,6 +1874,8 @@ recv_get_fil_addr_struct(
ulint space, /*!< in: space id */
ulint page_no)/*!< in: page number */
{
+ ut_ad(mutex_own(&recv_sys->mutex));
+
recv_addr_t* recv_addr;
for (recv_addr = static_cast<recv_addr_t*>(
@@ -1382,13 +1901,13 @@ static
void
recv_add_to_hash_table(
/*===================*/
- byte type, /*!< in: log record type */
- ulint space, /*!< in: space id */
- ulint page_no, /*!< in: page number */
- byte* body, /*!< in: log record body */
- byte* rec_end, /*!< in: log record end */
- lsn_t start_lsn, /*!< in: start lsn of the mtr */
- lsn_t end_lsn) /*!< in: end lsn of the mtr */
+ mlog_id_t type, /*!< in: log record type */
+ ulint space, /*!< in: space id */
+ ulint page_no, /*!< in: page number */
+ byte* body, /*!< in: log record body */
+ byte* rec_end, /*!< in: log record end */
+ lsn_t start_lsn, /*!< in: start lsn of the mtr */
+ lsn_t end_lsn) /*!< in: end lsn of the mtr */
{
recv_t* recv;
ulint len;
@@ -1396,12 +1915,14 @@ recv_add_to_hash_table(
recv_data_t** prev_field;
recv_addr_t* recv_addr;
- if (fil_tablespace_deleted_or_being_deleted_in_mem(space, -1)) {
- /* The tablespace does not exist any more: do not store the
- log record */
-
- return;
- }
+ ut_ad(type != MLOG_FILE_DELETE);
+ ut_ad(type != MLOG_FILE_CREATE2);
+ ut_ad(type != MLOG_FILE_RENAME2);
+ ut_ad(type != MLOG_FILE_NAME);
+ ut_ad(type != MLOG_DUMMY_RECORD);
+ ut_ad(type != MLOG_CHECKPOINT);
+ ut_ad(type != MLOG_INDEX_LOAD);
+ ut_ad(type != MLOG_TRUNCATE);
len = rec_end - body;
@@ -1423,18 +1944,26 @@ recv_add_to_hash_table(
recv_addr->page_no = page_no;
recv_addr->state = RECV_NOT_PROCESSED;
- UT_LIST_INIT(recv_addr->rec_list);
+ UT_LIST_INIT(recv_addr->rec_list, &recv_t::rec_list);
HASH_INSERT(recv_addr_t, addr_hash, recv_sys->addr_hash,
recv_fold(space, page_no), recv_addr);
recv_sys->n_addrs++;
-#if 0
- fprintf(stderr, "Inserting log rec for space %lu, page %lu\n",
- space, page_no);
-#endif
}
- UT_LIST_ADD_LAST(rec_list, recv_addr->rec_list, recv);
+ switch (type) {
+ case MLOG_INIT_FILE_PAGE2:
+ case MLOG_ZIP_PAGE_COMPRESS:
+ /* Ignore any earlier redo log records for this page. */
+ ut_ad(recv_addr->state == RECV_NOT_PROCESSED
+ || recv_addr->state == RECV_WILL_NOT_READ);
+ recv_addr->state = RECV_WILL_NOT_READ;
+ mlog_init.add(space, page_no, start_lsn);
+ default:
+ break;
+ }
+
+ UT_LIST_ADD_LAST(recv_addr->rec_list, recv);
prev_field = &(recv->data);
@@ -1498,209 +2027,151 @@ recv_data_copy_to_buf(
}
}
-/************************************************************************//**
-Applies the hashed log records to the page, if the page lsn is less than the
-lsn of a log record. This can be called when a buffer page has just been
-read in, or also for a page already in the buffer pool. */
-UNIV_INTERN
-void
-recv_recover_page_func(
-/*===================*/
-#ifndef UNIV_HOTBACKUP
- ibool just_read_in,
- /*!< in: TRUE if the i/o handler calls
- this for a freshly read page */
-#endif /* !UNIV_HOTBACKUP */
- buf_block_t* block) /*!< in/out: buffer block */
+/** Apply the hashed log records to the page, if the page lsn is less than the
+lsn of a log record.
+@param[in,out] block buffer pool page
+@param[in,out] mtr mini-transaction
+@param[in,out] recv_addr recovery address
+@param[in] init_lsn the initial LSN where to start recovery */
+static void recv_recover_page(buf_block_t* block, mtr_t& mtr,
+ recv_addr_t* recv_addr, lsn_t init_lsn = 0)
{
page_t* page;
page_zip_des_t* page_zip;
- recv_addr_t* recv_addr;
- recv_t* recv;
- byte* buf;
- lsn_t start_lsn;
- lsn_t end_lsn;
- lsn_t page_lsn;
- lsn_t page_newest_lsn;
- ibool modification_to_page;
-#ifndef UNIV_HOTBACKUP
- ibool success;
-#endif /* !UNIV_HOTBACKUP */
- mtr_t mtr;
-
- mutex_enter(&(recv_sys->mutex));
-
- if (recv_sys->apply_log_recs == FALSE) {
-
- /* Log records should not be applied now */
-
- mutex_exit(&(recv_sys->mutex));
-
- return;
- }
-
- recv_addr = recv_get_fil_addr_struct(buf_block_get_space(block),
- buf_block_get_page_no(block));
-
- if ((recv_addr == NULL)
- || (recv_addr->state == RECV_BEING_PROCESSED)
- || (recv_addr->state == RECV_PROCESSED)) {
- mutex_exit(&(recv_sys->mutex));
+ ut_ad(mutex_own(&recv_sys->mutex));
+ ut_ad(recv_sys->apply_log_recs);
+ ut_ad(recv_needed_recovery);
+ ut_ad(recv_addr->state != RECV_BEING_PROCESSED);
+ ut_ad(recv_addr->state != RECV_PROCESSED);
- return;
+ if (UNIV_UNLIKELY(srv_print_verbose_log == 2)) {
+ fprintf(stderr, "Applying log to page %u:%u\n",
+ recv_addr->space, recv_addr->page_no);
}
-#if 0
- fprintf(stderr, "Recovering space %lu, page %lu\n",
- buf_block_get_space(block), buf_block_get_page_no(block));
-#endif
+ DBUG_LOG("ib_log", "Applying log to page " << block->page.id);
recv_addr->state = RECV_BEING_PROCESSED;
-
- mutex_exit(&(recv_sys->mutex));
-
- mtr_start(&mtr);
- mtr_set_log_mode(&mtr, MTR_LOG_NONE);
+ mutex_exit(&recv_sys->mutex);
page = block->frame;
page_zip = buf_block_get_page_zip(block);
-#ifndef UNIV_HOTBACKUP
- if (just_read_in) {
- /* Move the ownership of the x-latch on the page to
- this OS thread, so that we can acquire a second
- x-latch on it. This is needed for the operations to
- the page to pass the debug checks. */
-
- rw_lock_x_lock_move_ownership(&block->lock);
+ /* The page may have been modified in the buffer pool.
+ FIL_PAGE_LSN would only be updated right before flushing. */
+ lsn_t page_lsn = buf_page_get_newest_modification(&block->page);
+ if (!page_lsn) {
+ page_lsn = mach_read_from_8(page + FIL_PAGE_LSN);
}
- success = buf_page_get_known_nowait(RW_X_LATCH, block,
- BUF_KEEP_OLD,
- __FILE__, __LINE__,
- &mtr);
- ut_a(success);
-
- buf_block_dbg_add_level(block, SYNC_NO_ORDER_CHECK);
-#endif /* !UNIV_HOTBACKUP */
-
- /* Read the newest modification lsn from the page */
- page_lsn = mach_read_from_8(page + FIL_PAGE_LSN);
-
-#ifndef UNIV_HOTBACKUP
- /* It may be that the page has been modified in the buffer
- pool: read the newest modification lsn there */
+ lsn_t start_lsn = 0, end_lsn = 0;
- page_newest_lsn = buf_page_get_newest_modification(&block->page);
-
- if (page_newest_lsn) {
-
- page_lsn = page_newest_lsn;
+ if (srv_is_tablespace_truncated(recv_addr->space)) {
+ /* The table will be truncated after applying
+ normal redo log records. */
+ goto skip_log;
}
-#else /* !UNIV_HOTBACKUP */
- /* In recovery from a backup we do not really use the buffer pool */
- page_newest_lsn = 0;
-#endif /* !UNIV_HOTBACKUP */
-
- modification_to_page = FALSE;
- start_lsn = end_lsn = 0;
-
- recv = UT_LIST_GET_FIRST(recv_addr->rec_list);
- while (recv) {
+ for (recv_t* recv = UT_LIST_GET_FIRST(recv_addr->rec_list);
+ recv; recv = UT_LIST_GET_NEXT(rec_list, recv)) {
+ ut_ad(recv->start_lsn);
end_lsn = recv->end_lsn;
-
- if (recv->len > RECV_DATA_BLOCK_SIZE) {
- /* We have to copy the record body to a separate
- buffer */
-
- buf = static_cast<byte*>(mem_alloc(recv->len));
-
- recv_data_copy_to_buf(buf, recv);
+ ut_ad(end_lsn <= log_sys->log.scanned_lsn);
+
+ if (recv->start_lsn < page_lsn) {
+ /* Ignore this record, because there are later changes
+ for this page. */
+ DBUG_LOG("ib_log", "apply skip "
+ << get_mlog_string(recv->type)
+ << " LSN " << recv->start_lsn << " < "
+ << page_lsn);
+ } else if (recv->start_lsn < init_lsn) {
+ DBUG_LOG("ib_log", "init skip "
+ << get_mlog_string(recv->type)
+ << " LSN " << recv->start_lsn << " < "
+ << init_lsn);
+ } else if (srv_was_tablespace_truncated(
+ fil_space_get(recv_addr->space))
+ && recv->start_lsn
+ < truncate_t::get_truncated_tablespace_init_lsn(
+ recv_addr->space)) {
+ /* If per-table tablespace was truncated and
+ there exist REDO records before truncate that
+ are to be applied as part of recovery
+ (checkpoint didn't happen since truncate was
+ done) skip such records using lsn check as
+ they may not stand valid post truncate. */
} else {
- buf = ((byte*)(recv->data)) + sizeof(recv_data_t);
- }
-
- if (recv->type == MLOG_INIT_FILE_PAGE) {
- page_lsn = page_newest_lsn;
-
- memset(FIL_PAGE_LSN + page, 0, 8);
- memset(UNIV_PAGE_SIZE - FIL_PAGE_END_LSN_OLD_CHKSUM
- + page, 0, 8);
-
- if (page_zip) {
- memset(FIL_PAGE_LSN + page_zip->data, 0, 8);
+ if (!start_lsn) {
+ start_lsn = recv->start_lsn;
}
- }
- if (recv->start_lsn >= page_lsn) {
+ if (UNIV_UNLIKELY(srv_print_verbose_log == 2)) {
+ fprintf(stderr, "apply " LSN_PF ":"
+ " %d len " ULINTPF " page %u:%u\n",
+ recv->start_lsn, recv->type, recv->len,
+ recv_addr->space, recv_addr->page_no);
+ }
- lsn_t end_lsn;
+ DBUG_LOG("ib_log", "apply " << recv->start_lsn << ": "
+ << get_mlog_string(recv->type)
+ << " len " << recv->len
+ << " page " << block->page.id);
- if (!modification_to_page) {
+ byte* buf;
- modification_to_page = TRUE;
- start_lsn = recv->start_lsn;
+ if (recv->len > RECV_DATA_BLOCK_SIZE) {
+ /* We have to copy the record body to
+ a separate buffer */
+ buf = static_cast<byte*>
+ (ut_malloc_nokey(recv->len));
+ recv_data_copy_to_buf(buf, recv);
+ } else {
+ buf = reinterpret_cast<byte*>(recv->data)
+ + sizeof *recv->data;
}
- DBUG_PRINT("ib_log",
- ("apply " LSN_PF ": %u len %u "
- "page %u:%u", recv->start_lsn,
- (unsigned) recv->type,
- (unsigned) recv->len,
- (unsigned) recv_addr->space,
- (unsigned) recv_addr->page_no));
-
- recv_parse_or_apply_log_rec_body(recv->type, buf,
- buf + recv->len,
- block, &mtr,
- recv_addr->space);
+ recv_parse_or_apply_log_rec_body(
+ recv->type, buf, buf + recv->len,
+ block->page.id.space(),
+ block->page.id.page_no(), true, block, &mtr);
end_lsn = recv->start_lsn + recv->len;
mach_write_to_8(FIL_PAGE_LSN + page, end_lsn);
- mach_write_to_8(UNIV_PAGE_SIZE
+ mach_write_to_8(srv_page_size
- FIL_PAGE_END_LSN_OLD_CHKSUM
+ page, end_lsn);
if (page_zip) {
- mach_write_to_8(FIL_PAGE_LSN
- + page_zip->data, end_lsn);
+ mach_write_to_8(FIL_PAGE_LSN + page_zip->data,
+ end_lsn);
}
- }
- if (recv->len > RECV_DATA_BLOCK_SIZE) {
- mem_free(buf);
+ if (recv->len > RECV_DATA_BLOCK_SIZE) {
+ ut_free(buf);
+ }
}
-
- recv = UT_LIST_GET_NEXT(rec_list, recv);
}
+skip_log:
#ifdef UNIV_ZIP_DEBUG
- if (fil_page_get_type(page) == FIL_PAGE_INDEX) {
- page_zip_des_t* page_zip = buf_block_get_page_zip(block);
-
- ut_a(!page_zip
- || page_zip_validate_low(page_zip, page, NULL, FALSE));
- }
+ ut_ad(!fil_page_index_page_check(page)
+ || !page_zip
+ || page_zip_validate_low(page_zip, page, NULL, FALSE));
#endif /* UNIV_ZIP_DEBUG */
-#ifndef UNIV_HOTBACKUP
- if (modification_to_page) {
- ut_a(block);
-
+ if (start_lsn) {
log_flush_order_mutex_enter();
buf_flush_recv_note_modification(block, start_lsn, end_lsn);
log_flush_order_mutex_exit();
}
-#endif /* !UNIV_HOTBACKUP */
/* Make sure that committing mtr does not change the modification
lsn values of page */
- mtr.modifications = FALSE;
-
- mtr_commit(&mtr);
+ mtr.discard_modifications();
+ mtr.commit();
time_t now = time(NULL);
@@ -1710,141 +2181,316 @@ recv_recover_page_func(
recv_max_page_lsn = page_lsn;
}
+ ut_ad(recv_addr->state == RECV_BEING_PROCESSED);
recv_addr->state = RECV_PROCESSED;
ut_a(recv_sys->n_addrs > 0);
if (ulint n = --recv_sys->n_addrs) {
if (recv_sys->report(now)) {
- ib_logf(IB_LOG_LEVEL_INFO,
- "To recover: " ULINTPF " pages from log", n);
+ ib::info() << "To recover: " << n << " pages from log";
service_manager_extend_timeout(
INNODB_EXTEND_TIMEOUT_INTERVAL, "To recover: " ULINTPF " pages from log", n);
}
}
-
- mutex_exit(&recv_sys->mutex);
}
-#ifndef UNIV_HOTBACKUP
-/*******************************************************************//**
-Reads in pages which have hashed log records, from an area around a given
-page number.
-@return number of pages found */
-static
-ulint
-recv_read_in_area(
-/*==============*/
- ulint space, /*!< in: space */
- ulint zip_size,/*!< in: compressed page size in bytes, or 0 */
- ulint page_no)/*!< in: page number */
+/** Reduces recv_sys->n_addrs for the corrupted page.
+This function should called when srv_force_recovery > 0.
+@param[in] page_id page id of the corrupted page */
+void recv_recover_corrupt_page(page_id_t page_id)
{
- recv_addr_t* recv_addr;
- ulint page_nos[RECV_READ_AHEAD_AREA];
- ulint low_limit;
- ulint n;
-
- low_limit = page_no - (page_no % RECV_READ_AHEAD_AREA);
-
- n = 0;
-
- for (page_no = low_limit; page_no < low_limit + RECV_READ_AHEAD_AREA;
- page_no++) {
- recv_addr = recv_get_fil_addr_struct(space, page_no);
+ ut_ad(srv_force_recovery);
+ mutex_enter(&recv_sys->mutex);
- if (recv_addr && !buf_page_peek(space, page_no)) {
+ if (!recv_sys->apply_log_recs) {
+ } else if (recv_addr_t* recv_addr = recv_get_fil_addr_struct(
+ page_id.space(), page_id.page_no())) {
+ switch (recv_addr->state) {
+ case RECV_WILL_NOT_READ:
+ ut_ad(!"wrong state");
+ break;
+ case RECV_BEING_PROCESSED:
+ case RECV_PROCESSED:
+ break;
+ default:
+ recv_addr->state = RECV_PROCESSED;
+ ut_ad(recv_sys->n_addrs);
+ recv_sys->n_addrs--;
+ }
+ }
- mutex_enter(&(recv_sys->mutex));
+ mutex_exit(&recv_sys->mutex);
+}
- if (recv_addr->state == RECV_NOT_PROCESSED) {
- recv_addr->state = RECV_BEING_READ;
+/** Apply any buffered redo log to a page that was just read from a data file.
+@param[in,out] bpage buffer pool page */
+void recv_recover_page(buf_page_t* bpage)
+{
+ mtr_t mtr;
+ mtr.start();
+ mtr.set_log_mode(MTR_LOG_NONE);
+
+ ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_FILE_PAGE);
+ buf_block_t* block = reinterpret_cast<buf_block_t*>(bpage);
+
+ /* Move the ownership of the x-latch on the page to
+ this OS thread, so that we can acquire a second
+ x-latch on it. This is needed for the operations to
+ the page to pass the debug checks. */
+ rw_lock_x_lock_move_ownership(&block->lock);
+ buf_block_dbg_add_level(block, SYNC_NO_ORDER_CHECK);
+ ibool success = buf_page_get_known_nowait(
+ RW_X_LATCH, block, BUF_KEEP_OLD,
+ __FILE__, __LINE__, &mtr);
+ ut_a(success);
- page_nos[n] = page_no;
+ mutex_enter(&recv_sys->mutex);
+ if (!recv_sys->apply_log_recs) {
+ } else if (recv_addr_t* recv_addr = recv_get_fil_addr_struct(
+ bpage->id.space(), bpage->id.page_no())) {
+ switch (recv_addr->state) {
+ case RECV_BEING_PROCESSED:
+ case RECV_PROCESSED:
+ break;
+ default:
+ recv_recover_page(block, mtr, recv_addr);
+ goto func_exit;
+ }
+ }
- n++;
- }
+ mtr.commit();
+func_exit:
+ mutex_exit(&recv_sys->mutex);
+ ut_ad(mtr.has_committed());
+}
- mutex_exit(&(recv_sys->mutex));
+/** Reads in pages which have hashed log records, from an area around a given
+page number.
+@param[in] page_id page id */
+static void recv_read_in_area(const page_id_t page_id)
+{
+ ulint page_nos[RECV_READ_AHEAD_AREA];
+ ulint page_no = page_id.page_no()
+ - (page_id.page_no() % RECV_READ_AHEAD_AREA);
+ ulint* p = page_nos;
+
+ for (const ulint up_limit = page_no + RECV_READ_AHEAD_AREA;
+ page_no < up_limit; page_no++) {
+ recv_addr_t* recv_addr = recv_get_fil_addr_struct(
+ page_id.space(), page_no);
+ if (recv_addr
+ && recv_addr->state == RECV_NOT_PROCESSED
+ && !buf_page_peek(page_id_t(page_id.space(), page_no))) {
+ recv_addr->state = RECV_BEING_READ;
+ *p++ = page_no;
}
}
- buf_read_recv_pages(FALSE, space, zip_size, page_nos, n);
- return(n);
+ mutex_exit(&recv_sys->mutex);
+ buf_read_recv_pages(FALSE, page_id.space(), page_nos,
+ ulint(p - page_nos));
+ mutex_enter(&recv_sys->mutex);
+}
+
+/** This is another low level function for the recovery system
+to create a page which has buffered page intialization redo log records.
+@param[in] page_id page to be created using redo logs
+@param[in,out] recv_addr Hashed redo logs for the given page id
+@return whether the page creation successfully */
+static buf_block_t* recv_recovery_create_page_low(const page_id_t page_id,
+ recv_addr_t* recv_addr)
+{
+ mtr_t mtr;
+ mlog_init_t::init& i = mlog_init.last(page_id);
+ const lsn_t end_lsn = UT_LIST_GET_LAST(recv_addr->rec_list)->end_lsn;
+
+ if (end_lsn < i.lsn)
+ {
+ DBUG_LOG("ib_log", "skip log for page "
+ << page_id
+ << " LSN " << end_lsn
+ << " < " << i.lsn);
+ recv_addr->state = RECV_PROCESSED;
+ignore:
+ ut_a(recv_sys->n_addrs);
+ recv_sys->n_addrs--;
+ return NULL;
+ }
+
+ fil_space_t* space = fil_space_acquire(recv_addr->space);
+ if (!space)
+ {
+ recv_addr->state = RECV_PROCESSED;
+ goto ignore;
+ }
+
+ if (space->enable_lsn)
+ {
+init_fail:
+ fil_space_release(space);
+ recv_addr->state = RECV_NOT_PROCESSED;
+ return NULL;
+ }
+
+ /* Determine if a tablespace could be for an internal table
+ for FULLTEXT INDEX. For those tables, no MLOG_INDEX_LOAD record
+ used to be written when redo logging was disabled. Hence, we
+ cannot optimize away page reads, because all the redo
+ log records for initializing and modifying the page in the
+ past could be older than the page in the data file.
+
+ The check is too broad, causing all
+ tables whose names start with FTS_ to skip the optimization. */
+
+ if (strstr(space->name, "/FTS_"))
+ goto init_fail;
+
+ mtr.start();
+ mtr.set_log_mode(MTR_LOG_NONE);
+ buf_block_t* block = buf_page_create(page_id, page_size_t(space->flags),
+ &mtr);
+ if (recv_addr->state == RECV_PROCESSED)
+ /* The page happened to exist in the buffer pool, or it was
+ just being read in. Before buf_page_get_with_no_latch() returned,
+ all changes must have been applied to the page already. */
+ mtr.commit();
+ else
+ {
+ i.created = true;
+ buf_block_dbg_add_level(block, SYNC_NO_ORDER_CHECK);
+ mtr.x_latch_at_savepoint(0, block);
+ recv_recover_page(block, mtr, recv_addr, i.lsn);
+ ut_ad(mtr.has_committed());
+ }
+
+ fil_space_release(space);
+ return block;
+}
+
+/** This is a low level function for the recovery system
+to create a page which has buffered intialized redo log records.
+@param[in] page_id page to be created using redo logs
+@return whether the page creation successfully */
+buf_block_t* recv_recovery_create_page_low(const page_id_t page_id)
+{
+ buf_block_t* block= NULL;
+ mutex_enter(&recv_sys->mutex);
+ recv_addr_t* recv_addr= recv_get_fil_addr_struct(page_id.space(),
+ page_id.page_no());
+ if (recv_addr && recv_addr->state == RECV_WILL_NOT_READ)
+ {
+ block= recv_recovery_create_page_low(page_id, recv_addr);
+ }
+ mutex_exit(&recv_sys->mutex);
+ return block;
}
/** Apply the hash table of stored log records to persistent data pages.
@param[in] last_batch whether the change buffer merge will be
performed as part of the operation */
-
-UNIV_INTERN
-void
-recv_apply_hashed_log_recs(bool last_batch)
+void recv_apply_hashed_log_recs(bool last_batch)
{
- for (;;) {
- mutex_enter(&recv_sys->mutex);
+ ut_ad(srv_operation == SRV_OPERATION_NORMAL
+ || is_mariabackup_restore_or_export());
- if (!recv_sys->apply_batch_on) {
- break;
- }
+ mutex_enter(&recv_sys->mutex);
- if (recv_sys->found_corrupt_log) {
- mutex_exit(&recv_sys->mutex);
+ while (recv_sys->apply_batch_on) {
+ bool abort = recv_sys->found_corrupt_log;
+ mutex_exit(&recv_sys->mutex);
+
+ if (abort) {
return;
}
- mutex_exit(&recv_sys->mutex);
os_thread_sleep(500000);
+ mutex_enter(&recv_sys->mutex);
}
- ut_ad(!last_batch == mutex_own(&log_sys->mutex));
+ ut_ad(!last_batch == log_mutex_own());
- if (!last_batch) {
- recv_no_ibuf_operations = TRUE;
- }
+ recv_no_ibuf_operations
+ = !last_batch || is_mariabackup_restore_or_export();
+
+ ut_d(recv_no_log_write = recv_no_ibuf_operations);
if (ulint n = recv_sys->n_addrs) {
const char* msg = last_batch
? "Starting final batch to recover "
: "Starting a batch to recover ";
- ib_logf(IB_LOG_LEVEL_INFO,
- "%s" ULINTPF " pages from redo log", msg, n);
+ ib::info() << msg << n << " pages from redo log.";
sd_notifyf(0, "STATUS=%s" ULINTPF " pages from redo log",
msg, n);
}
-
recv_sys->apply_log_recs = TRUE;
recv_sys->apply_batch_on = TRUE;
+ for (ulint id = srv_undo_tablespaces_open; id--; ) {
+ recv_sys_t::trunc& t = recv_sys->truncated_undo_spaces[id];
+ if (t.lsn) {
+ recv_addr_trim(id + srv_undo_space_id_start, t.pages,
+ t.lsn);
+ }
+ }
+
+ mtr_t mtr;
+
for (ulint i = 0; i < hash_get_n_cells(recv_sys->addr_hash); i++) {
for (recv_addr_t* recv_addr = static_cast<recv_addr_t*>(
HASH_GET_FIRST(recv_sys->addr_hash, i));
recv_addr;
recv_addr = static_cast<recv_addr_t*>(
HASH_GET_NEXT(addr_hash, recv_addr))) {
+ if (!UT_LIST_GET_LEN(recv_addr->rec_list)) {
+ignore:
+ ut_a(recv_sys->n_addrs);
+ recv_sys->n_addrs--;
+ continue;
+ }
- ulint space = recv_addr->space;
- ulint zip_size = fil_space_get_zip_size(space);
- ulint page_no = recv_addr->page_no;
+ switch (recv_addr->state) {
+ case RECV_BEING_READ:
+ case RECV_BEING_PROCESSED:
+ case RECV_PROCESSED:
+ continue;
+ case RECV_DISCARDED:
+ goto ignore;
+ case RECV_NOT_PROCESSED:
+ case RECV_WILL_NOT_READ:
+ break;
+ }
- if (recv_addr->state == RECV_NOT_PROCESSED) {
- mutex_exit(&recv_sys->mutex);
+ if (srv_is_tablespace_truncated(recv_addr->space)) {
+ /* Avoid applying REDO log for the tablespace
+ that is schedule for TRUNCATE. */
+ recv_addr->state = RECV_DISCARDED;
+ goto ignore;
+ }
+
+ const page_id_t page_id(recv_addr->space,
+ recv_addr->page_no);
- if (buf_page_peek(space, page_no)) {
- mtr_t mtr;
- mtr_start(&mtr);
- buf_block_t* block = buf_page_get(
- space, zip_size, page_no,
- RW_X_LATCH, &mtr);
+ if (recv_addr->state == RECV_NOT_PROCESSED) {
+apply:
+ mtr.start();
+ mtr.set_log_mode(MTR_LOG_NONE);
+ if (buf_block_t* block = buf_page_get_low(
+ page_id, univ_page_size,
+ RW_X_LATCH, NULL,
+ BUF_GET_IF_IN_POOL,
+ __FILE__, __LINE__, &mtr, NULL)) {
buf_block_dbg_add_level(
block, SYNC_NO_ORDER_CHECK);
-
- recv_recover_page(FALSE, block);
- mtr_commit(&mtr);
+ recv_recover_page(block, mtr,
+ recv_addr);
+ ut_ad(mtr.has_committed());
} else {
- recv_read_in_area(space, zip_size,
- page_no);
+ mtr.commit();
+ recv_read_in_area(page_id);
}
-
- mutex_enter(&recv_sys->mutex);
+ } else if (!recv_recovery_create_page_low(
+ page_id, recv_addr)) {
+ goto apply;
}
}
}
@@ -1852,10 +2498,17 @@ recv_apply_hashed_log_recs(bool last_batch)
/* Wait until all the pages have been processed */
while (recv_sys->n_addrs != 0) {
+ const bool abort = recv_sys->found_corrupt_log
+ || recv_sys->found_corrupt_fs;
+
+ if (recv_sys->found_corrupt_fs && !srv_force_recovery) {
+ ib::info() << "Set innodb_force_recovery=1"
+ " to ignore corrupted pages.";
+ }
mutex_exit(&(recv_sys->mutex));
- if (recv_sys->found_corrupt_log) {
+ if (abort) {
return;
}
@@ -1865,14 +2518,11 @@ recv_apply_hashed_log_recs(bool last_batch)
}
if (!last_batch) {
- bool success;
-
/* Flush all the file pages to disk and invalidate them in
the buffer pool */
- ut_d(recv_no_log_write = TRUE);
mutex_exit(&(recv_sys->mutex));
- mutex_exit(&(log_sys->mutex));
+ log_mutex_exit();
/* Stop the recv_writer thread from issuing any LRU
flush batches. */
@@ -1881,22 +2531,22 @@ recv_apply_hashed_log_recs(bool last_batch)
/* Wait for any currently run batch to end. */
buf_flush_wait_LRU_batch_end();
- success = buf_flush_list(ULINT_MAX, LSN_MAX, NULL);
-
- ut_a(success);
-
- buf_flush_wait_batch_end(NULL, BUF_FLUSH_LIST);
+ os_event_reset(recv_sys->flush_end);
+ recv_sys->flush_type = BUF_FLUSH_LIST;
+ os_event_set(recv_sys->flush_start);
+ os_event_wait(recv_sys->flush_end);
buf_pool_invalidate();
/* Allow batches from recv_writer thread. */
mutex_exit(&recv_sys->writer_mutex);
- mutex_enter(&(log_sys->mutex));
+ log_mutex_enter();
mutex_enter(&(recv_sys->mutex));
- ut_d(recv_no_log_write = FALSE);
-
- recv_no_ibuf_operations = FALSE;
+ mlog_init.reset();
+ } else if (!recv_no_ibuf_operations) {
+ /* We skipped this in buf_page_create(). */
+ mlog_init.ibuf_merge(mtr);
}
recv_sys->apply_log_recs = FALSE;
@@ -1906,181 +2556,73 @@ recv_apply_hashed_log_recs(bool last_batch)
mutex_exit(&recv_sys->mutex);
}
-#else /* !UNIV_HOTBACKUP */
-/*******************************************************************//**
-Applies log records in the hash table to a backup. */
-UNIV_INTERN
-void
-recv_apply_log_recs_for_backup(void)
-/*================================*/
-{
- recv_addr_t* recv_addr;
- ulint n_hash_cells;
- buf_block_t* block;
- ulint actual_size;
- ibool success;
- ulint error;
- ulint i;
-
- recv_sys->apply_log_recs = TRUE;
- recv_sys->apply_batch_on = TRUE;
-
- block = back_block1;
-
- n_hash_cells = hash_get_n_cells(recv_sys->addr_hash);
-
- for (i = 0; i < n_hash_cells; i++) {
- /* The address hash table is externally chained */
- recv_addr = hash_get_nth_cell(recv_sys->addr_hash, i)->node;
-
- while (recv_addr != NULL) {
-
- ulint zip_size
- = fil_space_get_zip_size(recv_addr->space);
-
- if (zip_size == ULINT_UNDEFINED) {
-#if 0
- fprintf(stderr,
- "InnoDB: Warning: cannot apply"
- " log record to"
- " tablespace %lu page %lu,\n"
- "InnoDB: because tablespace with"
- " that id does not exist.\n",
- recv_addr->space, recv_addr->page_no);
-#endif
- recv_addr->state = RECV_PROCESSED;
-
- ut_a(recv_sys->n_addrs);
- recv_sys->n_addrs--;
- goto skip_this_recv_addr;
- }
-
- /* We simulate a page read made by the buffer pool, to
- make sure the recovery apparatus works ok. We must init
- the block. */
-
- buf_page_init_for_backup_restore(
- recv_addr->space, recv_addr->page_no,
- zip_size, block);
-
- /* Extend the tablespace's last file if the page_no
- does not fall inside its bounds; we assume the last
- file is auto-extending, and mysqlbackup copied the file
- when it still was smaller */
-
- success = fil_extend_space_to_desired_size(
- &actual_size,
- recv_addr->space, recv_addr->page_no + 1);
- if (!success) {
- fprintf(stderr,
- "InnoDB: Fatal error: cannot extend"
- " tablespace %u to hold %u pages\n",
- recv_addr->space, recv_addr->page_no);
-
- exit(1);
- }
-
- /* Read the page from the tablespace file using the
- fil0fil.cc routines */
-
- if (zip_size) {
- error = fil_io(OS_FILE_READ, true,
- recv_addr->space, zip_size,
- recv_addr->page_no, 0, zip_size,
- block->page.zip.data, NULL, 0);
- if (error == DB_SUCCESS
- && !buf_zip_decompress(block, TRUE)) {
- exit(1);
- }
- } else {
- error = fil_io(OS_FILE_READ, true,
- recv_addr->space, 0,
- recv_addr->page_no, 0,
- UNIV_PAGE_SIZE,
- block->frame, NULL, 0);
- }
-
- if (error != DB_SUCCESS) {
- fprintf(stderr,
- "InnoDB: Fatal error: cannot read"
- " from tablespace"
- " %lu page number %lu\n",
- (ulong) recv_addr->space,
- (ulong) recv_addr->page_no);
-
- exit(1);
- }
-
- /* Apply the log records to this page */
- recv_recover_page(FALSE, block);
-
- /* Write the page back to the tablespace file using the
- fil0fil.cc routines */
-
- buf_flush_init_for_writing(
- block->frame, buf_block_get_page_zip(block),
- mach_read_from_8(block->frame + FIL_PAGE_LSN));
-
- if (zip_size) {
- error = fil_io(OS_FILE_WRITE, true,
- recv_addr->space, zip_size,
- recv_addr->page_no, 0,
- zip_size,
- block->page.zip.data, NULL, 0);
- } else {
- error = fil_io(OS_FILE_WRITE, true,
- recv_addr->space, 0,
- recv_addr->page_no, 0,
- UNIV_PAGE_SIZE,
- block->frame, NULL, 0);
- }
-skip_this_recv_addr:
- recv_addr = HASH_GET_NEXT(addr_hash, recv_addr);
- }
- }
- sd_notify(0, "STATUS=InnoDB: Apply batch for backup completed");
-
- recv_sys_empty_hash();
-}
-#endif /* !UNIV_HOTBACKUP */
-
-/*******************************************************************//**
-Tries to parse a single log record and returns its length.
-@return length of the record, or 0 if the record was not complete */
+/** Tries to parse a single log record.
+@param[out] type log record type
+@param[in] ptr pointer to a buffer
+@param[in] end_ptr end of the buffer
+@param[out] space_id tablespace identifier
+@param[out] page_no page number
+@param[in] apply whether to apply MLOG_FILE_* records
+@param[out] body start of log record body
+@return length of the record, or 0 if the record was not complete */
static
ulint
recv_parse_log_rec(
-/*===============*/
- byte* ptr, /*!< in: pointer to a buffer */
- byte* end_ptr,/*!< in: pointer to the buffer end */
- byte* type, /*!< out: type */
- ulint* space, /*!< out: space id */
- ulint* page_no,/*!< out: page number */
- byte** body) /*!< out: log record body start */
+ mlog_id_t* type,
+ byte* ptr,
+ byte* end_ptr,
+ ulint* space,
+ ulint* page_no,
+ bool apply,
+ byte** body)
{
byte* new_ptr;
*body = NULL;
+ UNIV_MEM_INVALID(type, sizeof *type);
+ UNIV_MEM_INVALID(space, sizeof *space);
+ UNIV_MEM_INVALID(page_no, sizeof *page_no);
+ UNIV_MEM_INVALID(body, sizeof *body);
+
if (ptr == end_ptr) {
return(0);
}
- if (*ptr == MLOG_MULTI_REC_END) {
-
- *type = *ptr;
-
- return(1);
- }
-
- if (*ptr == MLOG_DUMMY_RECORD) {
- *type = *ptr;
-
- *space = ULINT_UNDEFINED - 1; /* For debugging */
+ switch (*ptr) {
+#ifdef UNIV_LOG_LSN_DEBUG
+ case MLOG_LSN | MLOG_SINGLE_REC_FLAG:
+ case MLOG_LSN:
+ new_ptr = mlog_parse_initial_log_record(
+ ptr, end_ptr, type, space, page_no);
+ if (new_ptr != NULL) {
+ const lsn_t lsn = static_cast<lsn_t>(
+ *space) << 32 | *page_no;
+ ut_a(lsn == recv_sys->recovered_lsn);
+ }
+ *type = MLOG_LSN;
+ return(new_ptr - ptr);
+#endif /* UNIV_LOG_LSN_DEBUG */
+ case MLOG_MULTI_REC_END:
+ case MLOG_DUMMY_RECORD:
+ *type = static_cast<mlog_id_t>(*ptr);
return(1);
+ case MLOG_CHECKPOINT:
+ if (end_ptr < ptr + SIZE_OF_MLOG_CHECKPOINT) {
+ return(0);
+ }
+ *type = static_cast<mlog_id_t>(*ptr);
+ return(SIZE_OF_MLOG_CHECKPOINT);
+ case MLOG_MULTI_REC_END | MLOG_SINGLE_REC_FLAG:
+ case MLOG_DUMMY_RECORD | MLOG_SINGLE_REC_FLAG:
+ case MLOG_CHECKPOINT | MLOG_SINGLE_REC_FLAG:
+ ib::error() << "Incorrect log record type "
+ << ib::hex(unsigned(*ptr));
+ recv_sys->found_corrupt_log = true;
+ return(0);
}
new_ptr = mlog_parse_initial_log_record(ptr, end_ptr, type, space,
@@ -2092,34 +2634,33 @@ recv_parse_log_rec(
return(0);
}
-#ifdef UNIV_LOG_LSN_DEBUG
- if (*type == MLOG_LSN) {
- lsn_t lsn = (lsn_t) *space << 32 | *page_no;
-# ifdef UNIV_LOG_DEBUG
- ut_a(lsn == log_sys->old_lsn);
-# else /* UNIV_LOG_DEBUG */
- ut_a(lsn == recv_sys->recovered_lsn);
-# endif /* UNIV_LOG_DEBUG */
- }
-#endif /* UNIV_LOG_LSN_DEBUG */
+ const byte* old_ptr = new_ptr;
+ new_ptr = recv_parse_or_apply_log_rec_body(
+ *type, new_ptr, end_ptr, *space, *page_no, apply, NULL, NULL);
- byte* old_ptr = new_ptr;
- new_ptr = recv_parse_or_apply_log_rec_body(*type, new_ptr, end_ptr,
- NULL, NULL, *space);
if (UNIV_UNLIKELY(new_ptr == NULL)) {
-
return(0);
}
if (*page_no == 0 && *type == MLOG_4BYTES
+ && apply
&& mach_read_from_2(old_ptr) == FSP_HEADER_OFFSET + FSP_SIZE) {
- ulint size;
- mach_parse_compressed(old_ptr + 2, end_ptr, &size);
- fil_space_set_recv_size(*space, size);
- }
+ old_ptr += 2;
+
+ ulint size = mach_parse_compressed(&old_ptr, end_ptr);
+
+ recv_spaces_t::iterator it = recv_spaces.find(*space);
- if (*page_no > recv_max_parsed_page_no) {
- recv_max_parsed_page_no = *page_no;
+ ut_ad(!recv_sys->mlog_checkpoint_lsn
+ || *space == TRX_SYS_SPACE
+ || srv_is_undo_tablespace(*space)
+ || it != recv_spaces.end());
+
+ if (it != recv_spaces.end() && !it->second.space) {
+ it->second.size = size;
+ }
+
+ fil_space_set_recv_size(*space, size);
}
return(new_ptr - ptr);
@@ -2150,120 +2691,128 @@ recv_calc_lsn_on_data_add(
return(lsn + lsn_len);
}
-#ifdef UNIV_LOG_DEBUG
-/*******************************************************//**
-Checks that the parser recognizes incomplete initial segments of a log
-record as incomplete. */
+/** Prints diagnostic info of corrupt log.
+@param[in] ptr pointer to corrupt log record
+@param[in] type type of the log record (could be garbage)
+@param[in] space tablespace ID (could be garbage)
+@param[in] page_no page number (could be garbage)
+@return whether processing should continue */
static
-void
-recv_check_incomplete_log_recs(
-/*===========================*/
- byte* ptr, /*!< in: pointer to a complete log record */
- ulint len) /*!< in: length of the log record */
+bool
+recv_report_corrupt_log(
+ const byte* ptr,
+ int type,
+ ulint space,
+ ulint page_no)
{
- ulint i;
- byte type;
- ulint space;
- ulint page_no;
- byte* body;
+ ib::error() <<
+ "############### CORRUPT LOG RECORD FOUND ##################";
+
+ const ulint ptr_offset = ulint(ptr - recv_sys->buf);
- for (i = 0; i < len; i++) {
- ut_a(0 == recv_parse_log_rec(ptr, ptr + i, &type, &space,
- &page_no, &body));
+ ib::info() << "Log record type " << type << ", page " << space << ":"
+ << page_no << ". Log parsing proceeded successfully up to "
+ << recv_sys->recovered_lsn << ". Previous log record type "
+ << recv_previous_parsed_rec_type << ", is multi "
+ << recv_previous_parsed_rec_is_multi << " Recv offset "
+ << ptr_offset << ", prev "
+ << recv_previous_parsed_rec_offset;
+
+ ut_ad(ptr <= recv_sys->buf + recv_sys->len);
+
+ const ulint limit = 100;
+ const ulint prev_offset = std::min(recv_previous_parsed_rec_offset,
+ ptr_offset);
+ const ulint before = std::min(prev_offset, limit);
+ const ulint after = std::min(recv_sys->len - ptr_offset, limit);
+
+ ib::info() << "Hex dump starting " << before << " bytes before and"
+ " ending " << after << " bytes after the corrupted record:";
+
+ const byte* start = recv_sys->buf + prev_offset - before;
+
+ ut_print_buf(stderr, start, ulint(ptr - start) + after);
+ putc('\n', stderr);
+
+ if (!srv_force_recovery) {
+ ib::info() << "Set innodb_force_recovery to ignore this error.";
+ return(false);
}
+
+ ib::warn() << "The log file may have been corrupt and it is possible"
+ " that the log scan did not proceed far enough in recovery!"
+ " Please run CHECK TABLE on your InnoDB tables to check"
+ " that they are ok! If mysqld crashes after this recovery; "
+ << FORCE_RECOVERY_MSG;
+ return(true);
}
-#endif /* UNIV_LOG_DEBUG */
-/*******************************************************//**
-Prints diagnostic info of corrupt log. */
-static
-void
-recv_report_corrupt_log(
-/*====================*/
- byte* ptr, /*!< in: pointer to corrupt log record */
- byte type, /*!< in: type of the record */
- ulint space, /*!< in: space id, this may also be garbage */
- ulint page_no)/*!< in: page number, this may also be garbage */
+/** Report a MLOG_INDEX_LOAD operation.
+@param[in] space_id tablespace id
+@param[in] page_no page number
+@param[in] lsn log sequence number */
+ATTRIBUTE_COLD static void
+recv_mlog_index_load(ulint space_id, ulint page_no, lsn_t lsn)
{
- fprintf(stderr,
- "InnoDB: ############### CORRUPT LOG RECORD FOUND\n"
- "InnoDB: Log record type %lu, space id %lu, page number %lu\n"
- "InnoDB: Log parsing proceeded successfully up to " LSN_PF "\n"
- "InnoDB: Previous log record type %lu, is multi %lu\n"
- "InnoDB: Recv offset %lu, prev %lu\n",
- (ulong) type, (ulong) space, (ulong) page_no,
- recv_sys->recovered_lsn,
- (ulong) recv_previous_parsed_rec_type,
- (ulong) recv_previous_parsed_rec_is_multi,
- (ulong) (ptr - recv_sys->buf),
- (ulong) recv_previous_parsed_rec_offset);
-
- if ((ulint)(ptr - recv_sys->buf + 100)
- > recv_previous_parsed_rec_offset
- && (ulint)(ptr - recv_sys->buf + 100
- - recv_previous_parsed_rec_offset)
- < 200000) {
- fputs("InnoDB: Hex dump of corrupt log starting"
- " 100 bytes before the start\n"
- "InnoDB: of the previous log rec,\n"
- "InnoDB: and ending 100 bytes after the start"
- " of the corrupt rec:\n",
- stderr);
-
- ut_print_buf(stderr,
- recv_sys->buf
- + recv_previous_parsed_rec_offset - 100,
- ptr - recv_sys->buf + 200
- - recv_previous_parsed_rec_offset);
- putc('\n', stderr);
- }
-
-#ifndef UNIV_HOTBACKUP
- if (!srv_force_recovery) {
- fputs("InnoDB: Set innodb_force_recovery"
- " to ignore this error.\n", stderr);
+ recv_spaces_t::iterator it = recv_spaces.find(space_id);
+ if (it != recv_spaces.end()) {
+ it->second.mlog_index_load(lsn);
}
-#endif /* !UNIV_HOTBACKUP */
- fputs("InnoDB: WARNING: the log file may have been corrupt and it\n"
- "InnoDB: is possible that the log scan did not proceed\n"
- "InnoDB: far enough in recovery! Please run CHECK TABLE\n"
- "InnoDB: on your InnoDB tables to check that they are ok!\n"
- "InnoDB: If mysqld crashes after this recovery, look at\n"
- "InnoDB: " REFMAN "forcing-innodb-recovery.html\n"
- "InnoDB: about forcing recovery.\n", stderr);
+ if (log_optimized_ddl_op) {
+ log_optimized_ddl_op(space_id);
+ }
+}
- fflush(stderr);
+/** Check whether read redo log memory exceeds the available memory
+of buffer pool. Store last_stored_lsn if it is not in last phase
+@param[in] store whether to store page operations
+@param[in] available_mem Available memory in buffer pool to
+ read redo logs. */
+static bool recv_sys_heap_check(store_t* store, ulint available_mem)
+{
+ if (*store != STORE_NO
+ && mem_heap_get_size(recv_sys->heap) >= available_mem)
+ {
+ if (*store == STORE_YES)
+ recv_sys->last_stored_lsn= recv_sys->recovered_lsn;
+
+ *store= STORE_NO;
+ DBUG_PRINT("ib_log",("Ran out of memory and last "
+ "stored lsn " LSN_PF " last stored offset "
+ ULINTPF "\n",recv_sys->recovered_lsn,
+ recv_sys->recovered_offset));
+ return true;
+ }
+
+ return false;
}
-/*******************************************************//**
-Parses log records from a buffer and stores them to a hash table to wait
-merging to file pages.
-@return currently always returns FALSE */
-static
-ibool
-recv_parse_log_recs(
-/*================*/
- ibool store_to_hash, /*!< in: TRUE if the records should be stored
- to the hash table; this is set to FALSE if just
- debug checking is needed */
- dberr_t* err) /*!< out: DB_SUCCESS if successfull,
- DB_ERROR if parsing fails. */
+/** Parse log records from a buffer and optionally store them to a
+hash table to wait merging to file pages.
+@param[in] checkpoint_lsn the LSN of the latest checkpoint
+@param[in] store whether to store page operations
+@param[in] available_mem memory to read the redo logs
+@param[in] apply whether to apply the records
+@return whether MLOG_CHECKPOINT record was seen the first time,
+or corruption was noticed */
+bool recv_parse_log_recs(lsn_t checkpoint_lsn, store_t* store,
+ ulint available_mem, bool apply)
{
- byte* ptr;
- byte* end_ptr;
- ulint single_rec;
- ulint len;
- ulint total_len;
- lsn_t new_recovered_lsn;
- lsn_t old_lsn;
- byte type;
- ulint space;
- ulint page_no;
- byte* body;
- ulint n_recs;
-
- ut_ad(mutex_own(&(log_sys->mutex)));
+ byte* ptr;
+ byte* end_ptr;
+ bool single_rec;
+ ulint len;
+ lsn_t new_recovered_lsn;
+ lsn_t old_lsn;
+ mlog_id_t type;
+ ulint space;
+ ulint page_no;
+ byte* body;
+ const bool last_phase = (*store == STORE_IF_EXISTS);
+
+ ut_ad(log_mutex_own());
+ ut_ad(mutex_own(&recv_sys->mutex));
ut_ad(recv_sys->parse_start_lsn != 0);
loop:
ptr = recv_sys->buf + recv_sys->recovered_offset;
@@ -2272,30 +2821,49 @@ loop:
if (ptr == end_ptr) {
- return(FALSE);
+ return(false);
+ }
+
+ /* Check for memory overflow and ignore the parsing of remaining
+ redo log records if InnoDB ran out of memory */
+ if (recv_sys_heap_check(store, available_mem) && last_phase) {
+ return false;
}
- single_rec = (ulint)*ptr & MLOG_SINGLE_REC_FLAG;
+ switch (*ptr) {
+ case MLOG_CHECKPOINT:
+#ifdef UNIV_LOG_LSN_DEBUG
+ case MLOG_LSN:
+#endif /* UNIV_LOG_LSN_DEBUG */
+ case MLOG_DUMMY_RECORD:
+ single_rec = true;
+ break;
+ default:
+ single_rec = !!(*ptr & MLOG_SINGLE_REC_FLAG);
+ }
- if (single_rec || *ptr == MLOG_DUMMY_RECORD) {
- /* The mtr only modified a single page, or this is a file op */
+ if (single_rec) {
+ /* The mtr did not modify multiple pages */
old_lsn = recv_sys->recovered_lsn;
/* Try to parse a log record, fetching its type, space id,
page no, and a pointer to the body of the log record */
- len = recv_parse_log_rec(ptr, end_ptr, &type, &space,
- &page_no, &body);
+ len = recv_parse_log_rec(&type, ptr, end_ptr, &space,
+ &page_no, apply, &body);
- if (len == 0 || recv_sys->found_corrupt_log) {
- if (recv_sys->found_corrupt_log) {
+ if (recv_sys->found_corrupt_log) {
+ recv_report_corrupt_log(ptr, type, space, page_no);
+ return(true);
+ }
- recv_report_corrupt_log(ptr,
- type, space, page_no);
- }
+ if (recv_sys->found_corrupt_fs) {
+ return(true);
+ }
- return(FALSE);
+ if (len == 0) {
+ return(false);
}
new_recovered_lsn = recv_calc_lsn_on_data_add(old_lsn, len);
@@ -2305,114 +2873,157 @@ loop:
that also the next log block should have been scanned
in */
- return(FALSE);
+ return(false);
}
- recv_previous_parsed_rec_type = (ulint) type;
+ recv_previous_parsed_rec_type = type;
recv_previous_parsed_rec_offset = recv_sys->recovered_offset;
recv_previous_parsed_rec_is_multi = 0;
recv_sys->recovered_offset += len;
recv_sys->recovered_lsn = new_recovered_lsn;
- DBUG_PRINT("ib_log",
- ("scan " LSN_PF ": log rec %u len %u "
- "page %u:%u", old_lsn,
- (unsigned) type, (unsigned) len,
- (unsigned) space, (unsigned) page_no));
-
- if (type == MLOG_DUMMY_RECORD) {
+ switch (type) {
+ lsn_t lsn;
+ case MLOG_DUMMY_RECORD:
/* Do nothing */
+ break;
+ case MLOG_CHECKPOINT:
+#if SIZE_OF_MLOG_CHECKPOINT != 1 + 8
+# error SIZE_OF_MLOG_CHECKPOINT != 1 + 8
+#endif
+ lsn = mach_read_from_8(ptr + 1);
- } else if (!store_to_hash) {
- /* In debug checking, update a replicate page
- according to the log record, and check that it
- becomes identical with the original page */
-#ifdef UNIV_LOG_DEBUG
- recv_check_incomplete_log_recs(ptr, len);
-#endif/* UNIV_LOG_DEBUG */
-
- } else if (type == MLOG_FILE_CREATE
- || type == MLOG_FILE_CREATE2
- || type == MLOG_FILE_RENAME
- || type == MLOG_FILE_DELETE) {
- ut_a(space);
-#ifdef UNIV_HOTBACKUP
- if (recv_replay_file_ops) {
-
- /* In mysqlbackup --apply-log, replay an .ibd
- file operation, if possible; note that
- fil_path_to_mysql_datadir is set in mysqlbackup
- to point to the datadir we should use there */
-
- if (NULL == fil_op_log_parse_or_replay(
- body, end_ptr, type,
- space, page_no)) {
- fprintf(stderr,
- "InnoDB: Error: file op"
- " log record of type %lu"
- " space %lu not complete in\n"
- "InnoDB: the replay phase."
- " Path %s\n",
- (ulint) type, space,
- (char*)(body + 2));
-
- *err = DB_ERROR;
- return(FALSE);
+ if (UNIV_UNLIKELY(srv_print_verbose_log == 2)) {
+ fprintf(stderr,
+ "MLOG_CHECKPOINT(" LSN_PF ") %s at "
+ LSN_PF "\n", lsn,
+ lsn != checkpoint_lsn ? "ignored"
+ : recv_sys->mlog_checkpoint_lsn
+ ? "reread" : "read",
+ recv_sys->recovered_lsn);
+ }
+
+ DBUG_PRINT("ib_log",
+ ("MLOG_CHECKPOINT(" LSN_PF ") %s at "
+ LSN_PF,
+ lsn,
+ lsn != checkpoint_lsn ? "ignored"
+ : recv_sys->mlog_checkpoint_lsn
+ ? "reread" : "read",
+ recv_sys->recovered_lsn));
+
+ if (lsn == checkpoint_lsn) {
+ if (recv_sys->mlog_checkpoint_lsn) {
+ ut_ad(recv_sys->mlog_checkpoint_lsn
+ <= recv_sys->recovered_lsn);
+ break;
}
+ recv_sys->mlog_checkpoint_lsn
+ = recv_sys->recovered_lsn;
+ return(true);
}
-#endif
- /* In normal mysqld crash recovery we do not try to
- replay file operations */
+ break;
#ifdef UNIV_LOG_LSN_DEBUG
- } else if (type == MLOG_LSN) {
+ case MLOG_LSN:
/* Do not add these records to the hash table.
The page number and space id fields are misused
for something else. */
+ break;
#endif /* UNIV_LOG_LSN_DEBUG */
- } else {
- recv_add_to_hash_table(type, space, page_no, body,
- ptr + len, old_lsn,
- recv_sys->recovered_lsn);
+ default:
+ switch (*store) {
+ case STORE_NO:
+ break;
+ case STORE_IF_EXISTS:
+ if (fil_space_get_flags(space)
+ == ULINT_UNDEFINED) {
+ break;
+ }
+ /* fall through */
+ case STORE_YES:
+ recv_add_to_hash_table(
+ type, space, page_no, body,
+ ptr + len, old_lsn,
+ recv_sys->recovered_lsn);
+ }
+ /* fall through */
+ case MLOG_INDEX_LOAD:
+ if (type == MLOG_INDEX_LOAD) {
+ recv_mlog_index_load(space, page_no, old_lsn);
+ }
+ /* fall through */
+ case MLOG_FILE_NAME:
+ case MLOG_FILE_DELETE:
+ case MLOG_FILE_CREATE2:
+ case MLOG_FILE_RENAME2:
+ case MLOG_TRUNCATE:
+ /* These were already handled by
+ recv_parse_log_rec() and
+ recv_parse_or_apply_log_rec_body(). */
+ DBUG_PRINT("ib_log",
+ ("scan " LSN_PF ": log rec %s"
+ " len " ULINTPF
+ " page " ULINTPF ":" ULINTPF,
+ old_lsn, get_mlog_string(type),
+ len, space, page_no));
}
} else {
/* Check that all the records associated with the single mtr
are included within the buffer */
- total_len = 0;
- n_recs = 0;
+ ulint total_len = 0;
+ ulint n_recs = 0;
+ bool only_mlog_file = true;
+ ulint mlog_rec_len = 0;
for (;;) {
- len = recv_parse_log_rec(ptr, end_ptr, &type, &space,
- &page_no, &body);
- if (len == 0 || recv_sys->found_corrupt_log) {
+ len = recv_parse_log_rec(
+ &type, ptr, end_ptr, &space, &page_no,
+ false, &body);
- if (recv_sys->found_corrupt_log) {
+ if (recv_sys->found_corrupt_log) {
+corrupted_log:
+ recv_report_corrupt_log(
+ ptr, type, space, page_no);
+ return(true);
+ }
- recv_report_corrupt_log(
- ptr, type, space, page_no);
- }
+ if (ptr == end_ptr) {
+ } else if (type == MLOG_CHECKPOINT
+ || (*ptr & MLOG_SINGLE_REC_FLAG)) {
+ recv_sys->found_corrupt_log = true;
+ goto corrupted_log;
+ }
- return(FALSE);
+ if (recv_sys->found_corrupt_fs) {
+ return(true);
}
- recv_previous_parsed_rec_type = (ulint) type;
+ if (len == 0) {
+ return(false);
+ }
+
+ recv_previous_parsed_rec_type = type;
recv_previous_parsed_rec_offset
= recv_sys->recovered_offset + total_len;
recv_previous_parsed_rec_is_multi = 1;
-#ifdef UNIV_LOG_DEBUG
- if ((!store_to_hash) && (type != MLOG_MULTI_REC_END)) {
- recv_check_incomplete_log_recs(ptr, len);
+ /* MLOG_FILE_NAME redo log records doesn't make changes
+ to persistent data. If only MLOG_FILE_NAME redo
+ log record exists then reset the parsing buffer pointer
+ by changing recovered_lsn and recovered_offset. */
+ if (type != MLOG_FILE_NAME && only_mlog_file == true) {
+ only_mlog_file = false;
}
-#endif /* UNIV_LOG_DEBUG */
- DBUG_PRINT("ib_log",
- ("scan " LSN_PF ": multi-log rec %u "
- "len %u page %u:%u",
- recv_sys->recovered_lsn,
- (unsigned) type, (unsigned) len,
- (unsigned) space, (unsigned) page_no));
+ if (only_mlog_file) {
+ new_recovered_lsn = recv_calc_lsn_on_data_add(
+ recv_sys->recovered_lsn, len);
+ mlog_rec_len += len;
+ recv_sys->recovered_offset += len;
+ recv_sys->recovered_lsn = new_recovered_lsn;
+ }
total_len += len;
n_recs++;
@@ -2420,11 +3031,23 @@ loop:
ptr += len;
if (type == MLOG_MULTI_REC_END) {
-
- /* Found the end mark for the records */
-
+ DBUG_PRINT("ib_log",
+ ("scan " LSN_PF
+ ": multi-log end"
+ " total_len " ULINTPF
+ " n=" ULINTPF,
+ recv_sys->recovered_lsn,
+ total_len, n_recs));
+ total_len -= mlog_rec_len;
break;
}
+
+ DBUG_PRINT("ib_log",
+ ("scan " LSN_PF ": multi-log rec %s"
+ " len " ULINTPF
+ " page " ULINTPF ":" ULINTPF,
+ recv_sys->recovered_lsn,
+ get_mlog_string(type), len, space, page_no));
}
new_recovered_lsn = recv_calc_lsn_on_data_add(
@@ -2435,7 +3058,7 @@ loop:
that also the next log block should have been scanned
in */
- return(FALSE);
+ return(false);
}
/* Add all the records to the hash table */
@@ -2444,36 +3067,71 @@ loop:
for (;;) {
old_lsn = recv_sys->recovered_lsn;
- len = recv_parse_log_rec(ptr, end_ptr, &type, &space,
- &page_no, &body);
- if (recv_sys->found_corrupt_log) {
+ /* This will apply MLOG_FILE_ records. We
+ had to skip them in the first scan, because we
+ did not know if the mini-transaction was
+ completely recovered (until MLOG_MULTI_REC_END). */
+ len = recv_parse_log_rec(
+ &type, ptr, end_ptr, &space, &page_no,
+ apply, &body);
+
+ if (recv_sys->found_corrupt_log
+ && !recv_report_corrupt_log(
+ ptr, type, space, page_no)) {
+ return(true);
+ }
- recv_report_corrupt_log(ptr,
- type, space, page_no);
+ if (recv_sys->found_corrupt_fs) {
+ return(true);
}
ut_a(len != 0);
- ut_a(0 == ((ulint)*ptr & MLOG_SINGLE_REC_FLAG));
+ ut_a(!(*ptr & MLOG_SINGLE_REC_FLAG));
recv_sys->recovered_offset += len;
recv_sys->recovered_lsn
= recv_calc_lsn_on_data_add(old_lsn, len);
- if (type == MLOG_MULTI_REC_END) {
+ switch (type) {
+ case MLOG_MULTI_REC_END:
/* Found the end mark for the records */
-
- break;
- }
-
- if (store_to_hash
+ goto loop;
#ifdef UNIV_LOG_LSN_DEBUG
- && type != MLOG_LSN
+ case MLOG_LSN:
+ /* Do not add these records to the hash table.
+ The page number and space id fields are misused
+ for something else. */
+ break;
#endif /* UNIV_LOG_LSN_DEBUG */
- ) {
- recv_add_to_hash_table(type, space, page_no,
- body, ptr + len,
- old_lsn,
- new_recovered_lsn);
+ case MLOG_INDEX_LOAD:
+ recv_mlog_index_load(space, page_no, old_lsn);
+ break;
+ case MLOG_FILE_NAME:
+ case MLOG_FILE_DELETE:
+ case MLOG_FILE_CREATE2:
+ case MLOG_FILE_RENAME2:
+ case MLOG_TRUNCATE:
+ /* These were already handled by
+ recv_parse_log_rec() and
+ recv_parse_or_apply_log_rec_body(). */
+ break;
+ default:
+ switch (*store) {
+ case STORE_NO:
+ break;
+ case STORE_IF_EXISTS:
+ if (fil_space_get_flags(space)
+ == ULINT_UNDEFINED) {
+ break;
+ }
+ /* fall through */
+ case STORE_YES:
+ recv_add_to_hash_table(
+ type, space, page_no,
+ body, ptr + len,
+ old_lsn,
+ new_recovered_lsn);
+ }
}
ptr += len;
@@ -2483,17 +3141,13 @@ loop:
goto loop;
}
-/*******************************************************//**
-Adds data from a new log block to the parsing buffer of recv_sys if
+/** Adds data from a new log block to the parsing buffer of recv_sys if
recv_sys->parse_start_lsn is non-zero.
-@return TRUE if more data added */
-static
-ibool
-recv_sys_add_to_parsing_buf(
-/*========================*/
- const byte* log_block, /*!< in: log block */
- lsn_t scanned_lsn) /*!< in: lsn of how far we were able
- to find data in this log block */
+@param[in] log_block log block to add
+@param[in] scanned_lsn lsn of how far we were able to find
+ data in this log block
+@return true if more data added */
+bool recv_sys_add_to_parsing_buf(const byte* log_block, lsn_t scanned_lsn)
{
ulint more_len;
ulint data_len;
@@ -2505,19 +3159,18 @@ recv_sys_add_to_parsing_buf(
if (!recv_sys->parse_start_lsn) {
/* Cannot start parsing yet because no start point for
it found */
-
- return(FALSE);
+ return(false);
}
data_len = log_block_get_data_len(log_block);
if (recv_sys->parse_start_lsn >= scanned_lsn) {
- return(FALSE);
+ return(false);
} else if (recv_sys->scanned_lsn >= scanned_lsn) {
- return(FALSE);
+ return(false);
} else if (recv_sys->parse_start_lsn > recv_sys->scanned_lsn) {
more_len = (ulint) (scanned_lsn - recv_sys->parse_start_lsn);
@@ -2526,8 +3179,7 @@ recv_sys_add_to_parsing_buf(
}
if (more_len == 0) {
-
- return(FALSE);
+ return(false);
}
ut_ad(data_len >= more_len);
@@ -2555,127 +3207,64 @@ recv_sys_add_to_parsing_buf(
ut_a(recv_sys->len <= RECV_PARSING_BUF_SIZE);
}
- return(TRUE);
+ return(true);
}
-/*******************************************************//**
-Moves the parsing buffer data left to the buffer start. */
-static
-void
-recv_sys_justify_left_parsing_buf(void)
-/*===================================*/
+/** Moves the parsing buffer data left to the buffer start. */
+void recv_sys_justify_left_parsing_buf()
{
- ut_memmove(recv_sys->buf, recv_sys->buf + recv_sys->recovered_offset,
- recv_sys->len - recv_sys->recovered_offset);
+ memmove(recv_sys->buf,
+ recv_sys->buf + recv_sys->recovered_offset,
+ recv_sys->len - recv_sys->recovered_offset);
recv_sys->len -= recv_sys->recovered_offset;
recv_sys->recovered_offset = 0;
}
-/*******************************************************//**
-Scans log from a buffer and stores new log data to the parsing buffer.
-Parses and hashes the log records if new data found. Unless
-UNIV_HOTBACKUP is defined, this function will apply log records
-automatically when the hash table becomes full.
-@return TRUE if limit_lsn has been reached, or not able to scan any
-more in this log group */
-UNIV_INTERN
-ibool
-recv_scan_log_recs(
-/*===============*/
- ulint available_memory,/*!< in: we let the hash table of recs
- to grow to this size, at the maximum */
- ibool store_to_hash, /*!< in: TRUE if the records should be
- stored to the hash table; this is set
- to FALSE if just debug checking is
- needed */
- const byte* buf, /*!< in: buffer containing a log
- segment or garbage */
- ulint len, /*!< in: buffer length */
- lsn_t start_lsn, /*!< in: buffer start lsn */
- lsn_t* contiguous_lsn, /*!< in/out: it is known that all log
- groups contain contiguous log data up
- to this lsn */
- lsn_t* group_scanned_lsn,/*!< out: scanning succeeded up to
- this lsn */
- dberr_t* err) /*!< out: error code or DB_SUCCESS */
+/** Scan redo log from a buffer and stores new log data to the parsing buffer.
+Parse and hash the log records if new data found.
+Apply log records automatically when the hash table becomes full.
+@param[in] available_mem we let the hash table of recs to
+ grow to this size, at the maximum
+@param[in,out] store_to_hash whether the records should be
+ stored to the hash table; this is
+ reset if just debug checking is
+ needed, or when the available_mem
+ runs out
+@param[in] log_block log segment
+@param[in] checkpoint_lsn latest checkpoint LSN
+@param[in] start_lsn buffer start LSN
+@param[in] end_lsn buffer end LSN
+@param[in,out] contiguous_lsn it is known that all groups contain
+ contiguous log data upto this lsn
+@param[out] group_scanned_lsn scanning succeeded upto this lsn
+@return true if not able to scan any more in this log group */
+static bool recv_scan_log_recs(
+ ulint available_mem,
+ store_t* store_to_hash,
+ const byte* log_block,
+ lsn_t checkpoint_lsn,
+ lsn_t start_lsn,
+ lsn_t end_lsn,
+ lsn_t* contiguous_lsn,
+ lsn_t* group_scanned_lsn)
{
- const byte* log_block;
- ulint no;
- lsn_t scanned_lsn;
- ibool finished;
+ lsn_t scanned_lsn = start_lsn;
+ bool finished = false;
ulint data_len;
- ibool more_data;
- bool maybe_encrypted=false;
-
+ bool more_data = false;
+ bool apply = recv_sys->mlog_checkpoint_lsn != 0;
+ ulint recv_parsing_buf_size = RECV_PARSING_BUF_SIZE;
+ const bool last_phase = (*store_to_hash == STORE_IF_EXISTS);
ut_ad(start_lsn % OS_FILE_LOG_BLOCK_SIZE == 0);
- ut_ad(len % OS_FILE_LOG_BLOCK_SIZE == 0);
- ut_ad(len >= OS_FILE_LOG_BLOCK_SIZE);
- ut_a(store_to_hash <= TRUE);
-
- finished = FALSE;
-
- log_block = buf;
- scanned_lsn = start_lsn;
- more_data = FALSE;
- *err = DB_SUCCESS;
+ ut_ad(end_lsn % OS_FILE_LOG_BLOCK_SIZE == 0);
+ ut_ad(end_lsn >= start_lsn + OS_FILE_LOG_BLOCK_SIZE);
+ const byte* const log_end = log_block
+ + ulint(end_lsn - start_lsn);
do {
- log_crypt_err_t log_crypt_err;
-
- no = log_block_get_hdr_no(log_block);
- /*
- fprintf(stderr, "Log block header no %lu\n", no);
-
- fprintf(stderr, "Scanned lsn no %lu\n",
- log_block_convert_lsn_to_no(scanned_lsn));
- */
- if (no != log_block_convert_lsn_to_no(scanned_lsn)) {
- /* Garbage or an incompletely written log block.
- We will not report any error; because this can happen
- when InnoDB was killed while it was writing
- redo log. We simply treat this as an abrupt end of the
- redo log. */
- finished = true;
- break;
- } else if (!log_block_checksum_is_ok_or_old_format(
- log_block, true)) {
-
- fprintf(stderr,
- "InnoDB: Log block no %lu at"
- " lsn " LSN_PF " has\n"
- "InnoDB: ok header, but checksum field"
- " contains %lu, should be %lu\n",
- (ulong) no,
- scanned_lsn,
- (ulong) log_block_get_checksum(log_block),
- (ulong) log_block_calc_checksum(log_block));
-
- maybe_encrypted = log_crypt_block_maybe_encrypted(log_block,
- &log_crypt_err);
-
- /* Print checkpoint encryption keys if present */
- log_crypt_print_checkpoint_keys(log_block);
- finished = TRUE;
-
- if (maybe_encrypted) {
- /* Log block maybe encrypted finish processing*/
- log_crypt_print_error(log_crypt_err);
- *err = DB_ERROR;
- return (TRUE);
- }
-
- /* Stop if we encounter a garbage log block */
- if (!srv_force_recovery) {
- fputs("InnoDB: Set innodb_force_recovery"
- " to ignore this error.\n", stderr);
- *err = DB_ERROR;
- return (TRUE);
- }
-
- break;
- }
+ ut_ad(!finished);
if (log_block_get_flush_bit(log_block)) {
/* This block was a start of a log flush operation:
@@ -2692,26 +3281,16 @@ recv_scan_log_recs(
data_len = log_block_get_data_len(log_block);
- if ((store_to_hash || (data_len == OS_FILE_LOG_BLOCK_SIZE))
- && scanned_lsn + data_len > recv_sys->scanned_lsn
- && (recv_sys->scanned_checkpoint_no > 0)
- && (log_block_get_checkpoint_no(log_block)
- < recv_sys->scanned_checkpoint_no)
+ if (scanned_lsn + data_len > recv_sys->scanned_lsn
+ && log_block_get_checkpoint_no(log_block)
+ < recv_sys->scanned_checkpoint_no
&& (recv_sys->scanned_checkpoint_no
- log_block_get_checkpoint_no(log_block)
> 0x80000000UL)) {
/* Garbage from a log buffer flush which was made
before the most recent database recovery */
-
- finished = TRUE;
-#ifdef UNIV_LOG_DEBUG
- /* This is not really an error, but currently
- we stop here in the debug version: */
-
- *err = DB_ERROR;
- return (TRUE);
-#endif
+ finished = true;
break;
}
@@ -2729,56 +3308,61 @@ recv_scan_log_recs(
scanned_lsn += data_len;
- if (scanned_lsn > recv_sys->scanned_lsn) {
-
- /* We have found more entries. If this scan is
- of startup type, we must initiate crash recovery
- environment before parsing these log records. */
+ if (data_len == LOG_BLOCK_HDR_SIZE + SIZE_OF_MLOG_CHECKPOINT
+ && scanned_lsn == checkpoint_lsn + SIZE_OF_MLOG_CHECKPOINT
+ && log_block[LOG_BLOCK_HDR_SIZE] == MLOG_CHECKPOINT
+ && checkpoint_lsn == mach_read_from_8(LOG_BLOCK_HDR_SIZE
+ + 1 + log_block)) {
+ /* The redo log is logically empty. */
+ ut_ad(recv_sys->mlog_checkpoint_lsn == 0
+ || recv_sys->mlog_checkpoint_lsn
+ == checkpoint_lsn);
+ recv_sys->mlog_checkpoint_lsn = checkpoint_lsn;
+ DBUG_PRINT("ib_log", ("found empty log; LSN=" LSN_PF,
+ scanned_lsn));
+ finished = true;
+ break;
+ }
-#ifndef UNIV_HOTBACKUP
- if (recv_log_scan_is_startup_type
- && !recv_needed_recovery) {
- if (!srv_read_only_mode) {
- ib_logf(IB_LOG_LEVEL_INFO,
- "Starting crash recovery from "
- "checkpoint LSN=" LSN_PF,
- recv_sys->scanned_lsn);
+ if (scanned_lsn > recv_sys->scanned_lsn) {
+ ut_ad(!srv_log_files_created);
+ if (!recv_needed_recovery) {
+ recv_needed_recovery = true;
- recv_init_crash_recovery();
- } else {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "innodb_read_only prevents"
- " crash recovery");
- recv_needed_recovery = TRUE;
- return(TRUE);
+ if (srv_read_only_mode) {
+ ib::warn() << "innodb_read_only"
+ " prevents crash recovery";
+ return(true);
}
+
+ ib::info() << "Starting crash recovery from"
+ " checkpoint LSN="
+ << recv_sys->scanned_lsn;
}
-#endif /* !UNIV_HOTBACKUP */
/* We were able to find more log data: add it to the
parsing buffer if parse_start_lsn is already
non-zero */
+ DBUG_EXECUTE_IF(
+ "reduce_recv_parsing_buf",
+ recv_parsing_buf_size
+ = (70 * 1024);
+ );
+
if (recv_sys->len + 4 * OS_FILE_LOG_BLOCK_SIZE
- >= RECV_PARSING_BUF_SIZE) {
- fprintf(stderr,
- "InnoDB: Error: log parsing"
- " buffer overflow."
- " Recovery may have failed!\n");
+ >= recv_parsing_buf_size) {
+ ib::error() << "Log parsing buffer overflow."
+ " Recovery may have failed!";
- recv_sys->found_corrupt_log = TRUE;
+ recv_sys->found_corrupt_log = true;
-#ifndef UNIV_HOTBACKUP
if (!srv_force_recovery) {
- fputs("InnoDB: Set"
- " innodb_force_recovery"
- " to ignore this error.\n",
- stderr);
- *err = DB_ERROR;
- return (TRUE);
+ ib::error()
+ << "Set innodb_force_recovery"
+ " to ignore this error.";
+ return(true);
}
-#endif /* !UNIV_HOTBACKUP */
-
} else if (!recv_sys->found_corrupt_log) {
more_data = recv_sys_add_to_parsing_buf(
log_block, scanned_lsn);
@@ -2789,440 +3373,585 @@ recv_scan_log_recs(
= log_block_get_checkpoint_no(log_block);
}
+ /* During last phase of scanning, there can be redo logs
+ left in recv_sys->buf to parse & store it in recv_sys->heap */
+ if (last_phase
+ && recv_sys->recovered_lsn < recv_sys->scanned_lsn) {
+ more_data = true;
+ }
+
if (data_len < OS_FILE_LOG_BLOCK_SIZE) {
/* Log data for this group ends here */
-
- finished = TRUE;
+ finished = true;
break;
} else {
log_block += OS_FILE_LOG_BLOCK_SIZE;
}
- } while (log_block < buf + len && !finished);
+ } while (log_block < log_end);
*group_scanned_lsn = scanned_lsn;
+ mutex_enter(&recv_sys->mutex);
+
if (more_data && !recv_sys->found_corrupt_log) {
/* Try to parse more log records */
- recv_parse_log_recs(store_to_hash, err);
-
- if (*err != DB_SUCCESS) {
- return (TRUE);
+ if (recv_parse_log_recs(checkpoint_lsn,
+ store_to_hash, available_mem,
+ apply)) {
+ ut_ad(recv_sys->found_corrupt_log
+ || recv_sys->found_corrupt_fs
+ || recv_sys->mlog_checkpoint_lsn
+ == recv_sys->recovered_lsn);
+ finished = true;
+ goto func_exit;
}
-#ifndef UNIV_HOTBACKUP
- if (store_to_hash
- && mem_heap_get_size(recv_sys->heap) > available_memory) {
+ recv_sys_heap_check(store_to_hash, available_mem);
- /* Hash table of log records has grown too big:
- empty it; FALSE means no ibuf operations
- allowed, as we cannot add new records to the
- log yet: they would be produced by ibuf
- operations */
-
- recv_apply_hashed_log_recs(false);
- }
-#endif /* !UNIV_HOTBACKUP */
-
- if (recv_sys->recovered_offset > RECV_PARSING_BUF_SIZE / 4) {
+ if (recv_sys->recovered_offset > recv_parsing_buf_size / 4) {
/* Move parsing buffer data to the buffer start */
-
recv_sys_justify_left_parsing_buf();
}
+
+ /* Need to re-parse the redo log which're stored
+ in recv_sys->buf */
+ if (last_phase && *store_to_hash == STORE_NO) {
+ finished = false;
+ }
}
+func_exit:
+ mutex_exit(&recv_sys->mutex);
return(finished);
}
-#ifndef UNIV_HOTBACKUP
-/*******************************************************//**
-Scans log from a buffer and stores new log data to the parsing buffer. Parses
-and hashes the log records if new data found. */
+/** Scans log from a buffer and stores new log data to the parsing buffer.
+Parses and hashes the log records if new data found.
+@param[in,out] group log group
+@param[in] checkpoint_lsn latest checkpoint log sequence number
+@param[in,out] contiguous_lsn log sequence number
+until which all redo log has been scanned
+@param[in] last_phase whether changes
+can be applied to the tablespaces
+@return whether rescan is needed (not everything was stored) */
static
-void
+bool
recv_group_scan_log_recs(
-/*=====================*/
- log_group_t* group, /*!< in: log group */
- lsn_t* contiguous_lsn, /*!< in/out: it is known that all log
- groups contain contiguous log data up
- to this lsn */
- lsn_t* group_scanned_lsn,/*!< out: scanning succeeded up to
- this lsn */
- dberr_t* err) /*!< out: error code or DB_SUCCESS */
+ log_group_t* group,
+ lsn_t checkpoint_lsn,
+ lsn_t* contiguous_lsn,
+ bool last_phase)
{
- ibool finished;
+ DBUG_ENTER("recv_group_scan_log_recs");
+ DBUG_ASSERT(!last_phase || recv_sys->mlog_checkpoint_lsn > 0);
+
+ mutex_enter(&recv_sys->mutex);
+ recv_sys->len = 0;
+ recv_sys->recovered_offset = 0;
+ recv_sys->n_addrs = 0;
+ recv_sys_empty_hash();
+ srv_start_lsn = *contiguous_lsn;
+ recv_sys->parse_start_lsn = *contiguous_lsn;
+ recv_sys->scanned_lsn = *contiguous_lsn;
+ recv_sys->recovered_lsn = *contiguous_lsn;
+ recv_sys->scanned_checkpoint_no = 0;
+ recv_previous_parsed_rec_type = MLOG_SINGLE_REC_FLAG;
+ recv_previous_parsed_rec_offset = 0;
+ recv_previous_parsed_rec_is_multi = 0;
+ ut_ad(recv_max_page_lsn == 0);
+ ut_ad(last_phase || !recv_writer_thread_active);
+ mutex_exit(&recv_sys->mutex);
+
lsn_t start_lsn;
lsn_t end_lsn;
+ store_t store_to_hash = recv_sys->mlog_checkpoint_lsn == 0
+ ? STORE_NO : (last_phase ? STORE_IF_EXISTS : STORE_YES);
+ ulint available_mem = (buf_pool_get_n_pages() * 2 / 3)
+ << srv_page_size_shift;
- finished = FALSE;
- *err = DB_SUCCESS;
-
- start_lsn = *contiguous_lsn;
+ group->scanned_lsn = end_lsn = *contiguous_lsn = ut_uint64_align_down(
+ *contiguous_lsn, OS_FILE_LOG_BLOCK_SIZE);
- while (!finished) {
- end_lsn = start_lsn + RECV_SCAN_SIZE;
+ do {
+ if (last_phase && store_to_hash == STORE_NO) {
+ store_to_hash = STORE_IF_EXISTS;
+ /* We must not allow change buffer
+ merge here, because it would generate
+ redo log records before we have
+ finished the redo log scan. */
+ recv_apply_hashed_log_recs(false);
+ /* Rescan the redo logs from last stored lsn */
+ end_lsn = recv_sys->recovered_lsn;
+ }
- log_group_read_log_seg(LOG_RECOVER, log_sys->buf,
- group, start_lsn, end_lsn);
+ start_lsn = ut_uint64_align_down(end_lsn,
+ OS_FILE_LOG_BLOCK_SIZE);
+ end_lsn = start_lsn;
+ log_group_read_log_seg(
+ log_sys->buf, group, &end_lsn,
+ start_lsn + RECV_SCAN_SIZE);
+ } while (end_lsn != start_lsn
+ && !recv_scan_log_recs(
+ available_mem, &store_to_hash, log_sys->buf,
+ checkpoint_lsn, start_lsn, end_lsn,
+ contiguous_lsn, &group->scanned_lsn));
+
+ if (recv_sys->found_corrupt_log || recv_sys->found_corrupt_fs) {
+ DBUG_RETURN(false);
+ }
- finished = recv_scan_log_recs(
- (buf_pool_get_n_pages()
- - (recv_n_pool_free_frames * srv_buf_pool_instances))
- * UNIV_PAGE_SIZE,
- TRUE, log_sys->buf, RECV_SCAN_SIZE,
- start_lsn, contiguous_lsn, group_scanned_lsn,
- err);
+ DBUG_PRINT("ib_log", ("%s " LSN_PF " completed",
+ last_phase ? "rescan" : "scan",
+ group->scanned_lsn));
- if (*err != DB_SUCCESS) {
- break;
- }
+ DBUG_RETURN(store_to_hash == STORE_NO);
+}
- start_lsn = end_lsn;
+/** Report a missing tablespace for which page-redo log exists.
+@param[in] err previous error code
+@param[in] i tablespace descriptor
+@return new error code */
+static
+dberr_t
+recv_init_missing_space(dberr_t err, const recv_spaces_t::const_iterator& i)
+{
+ if (is_mariabackup_restore_or_export()) {
+ ib::warn() << "Tablespace " << i->first << " was not"
+ " found at " << i->second.name << " when"
+ " restoring a (partial?) backup. All redo log"
+ " for this file will be ignored!";
+ return(err);
}
-#ifdef UNIV_DEBUG
- if (log_debug_writes) {
- fprintf(stderr,
- "InnoDB: Scanned group %lu up to"
- " log sequence number " LSN_PF "\n",
- (ulong) group->id,
- *group_scanned_lsn);
+ if (srv_force_recovery == 0) {
+ ib::error() << "Tablespace " << i->first << " was not"
+ " found at " << i->second.name << ".";
+
+ if (err == DB_SUCCESS) {
+ ib::error() << "Set innodb_force_recovery=1 to"
+ " ignore this and to permanently lose"
+ " all changes to the tablespace.";
+ err = DB_TABLESPACE_NOT_FOUND;
+ }
+ } else {
+ ib::warn() << "Tablespace " << i->first << " was not"
+ " found at " << i->second.name << ", and"
+ " innodb_force_recovery was set. All redo log"
+ " for this tablespace will be ignored!";
}
-#endif /* UNIV_DEBUG */
+
+ return(err);
}
-/*******************************************************//**
-Initialize crash recovery environment. Can be called iff
-recv_needed_recovery == FALSE. */
-static
-void
-recv_init_crash_recovery(void)
-/*==========================*/
+/** Report the missing tablespace and discard the redo logs for the deleted
+tablespace.
+@param[in] rescan rescan of redo logs is needed
+ if hash table ran out of memory
+@param[out] missing_tablespace missing tablespace exists or not
+@return error code or DB_SUCCESS. */
+static MY_ATTRIBUTE((warn_unused_result))
+dberr_t
+recv_validate_tablespace(bool rescan, bool& missing_tablespace)
{
- ut_ad(!srv_read_only_mode);
- ut_a(!recv_needed_recovery);
+ dberr_t err = DB_SUCCESS;
- recv_needed_recovery = TRUE;
+ for (ulint h = 0; h < hash_get_n_cells(recv_sys->addr_hash); h++) {
+ for (recv_addr_t* recv_addr = static_cast<recv_addr_t*>(
+ HASH_GET_FIRST(recv_sys->addr_hash, h));
+ recv_addr != 0;
+ recv_addr = static_cast<recv_addr_t*>(
+ HASH_GET_NEXT(addr_hash, recv_addr))) {
- fil_load_single_table_tablespaces();
+ const ulint space = recv_addr->space;
- /* If we are using the doublewrite method, we will
- check if there are half-written pages in data files,
- and restore them from the doublewrite buffer if
- possible */
+ if (is_predefined_tablespace(space)) {
+ continue;
+ }
- service_manager_extend_timeout(
- INNODB_EXTEND_TIMEOUT_INTERVAL, "Starting Innodb crash recovery");
+ recv_spaces_t::iterator i = recv_spaces.find(space);
+ ut_ad(i != recv_spaces.end());
+
+ switch (i->second.status) {
+ case file_name_t::MISSING:
+ err = recv_init_missing_space(err, i);
+ i->second.status = file_name_t::DELETED;
+ /* fall through */
+ case file_name_t::DELETED:
+ recv_addr->state = RECV_DISCARDED;
+ /* fall through */
+ case file_name_t::NORMAL:
+ continue;
+ }
+ ut_ad(0);
+ }
+ }
- if (srv_force_recovery < SRV_FORCE_NO_LOG_REDO) {
- buf_dblwr_process();
+ if (err != DB_SUCCESS) {
+ return(err);
+ }
- /* Spawn the background thread to flush dirty pages
- from the buffer pools. */
- recv_writer_thread_active = true;
- recv_writer_thread_handle = os_thread_create(
- recv_writer_thread, 0, 0);
+ /* When rescan is not needed then recv_sys->addr_hash will have
+ all space id belongs to redo log. If rescan is needed and
+ innodb_force_recovery > 0 then InnoDB can ignore missing tablespace. */
+ for (recv_spaces_t::iterator i = recv_spaces.begin();
+ i != recv_spaces.end(); i++) {
+
+ if (i->second.status != file_name_t::MISSING) {
+ continue;
+ }
+
+ missing_tablespace = true;
+
+ if (srv_force_recovery > 0) {
+ ib::warn() << "Tablespace " << i->first
+ <<" was not found at " << i->second.name
+ <<", and innodb_force_recovery was set."
+ <<" All redo log for this tablespace"
+ <<" will be ignored!";
+ continue;
+ }
+
+ if (!rescan) {
+ ib::info() << "Tablespace " << i->first
+ << " was not found at '"
+ << i->second.name << "', but there"
+ <<" were no modifications either.";
+ }
}
+
+ if (!rescan || srv_force_recovery > 0) {
+ missing_tablespace = false;
+ }
+
+ return DB_SUCCESS;
}
-/** Recovers from a checkpoint. When this function returns, the database is able
-to start processing of new user transactions, but the function
-recv_recovery_from_checkpoint_finish should be called later to complete
-the recovery and free the resources used in it.
-@param[in] type LOG_CHECKPOINT or LOG_ARCHIVE
-@param[in] limit_lsn recover up to this lsn if possible
-@param[in] flushed_lsn flushed lsn from first data file
-@return error code or DB_SUCCESS */
-UNIV_INTERN
+/** Check if all tablespaces were found for crash recovery.
+@param[in] rescan rescan of redo logs is needed
+@param[out] missing_tablespace missing table exists
+@return error code or DB_SUCCESS */
+static MY_ATTRIBUTE((warn_unused_result))
dberr_t
-recv_recovery_from_checkpoint_start_func(
-#ifdef UNIV_LOG_ARCHIVE
- ulint type,
- lsn_t limit_lsn,
-#endif /* UNIV_LOG_ARCHIVE */
- lsn_t flushed_lsn)
+recv_init_crash_recovery_spaces(bool rescan, bool& missing_tablespace)
+{
+ bool flag_deleted = false;
+
+ ut_ad(!srv_read_only_mode);
+ ut_ad(recv_needed_recovery);
+
+ for (recv_spaces_t::iterator i = recv_spaces.begin();
+ i != recv_spaces.end(); i++) {
+ ut_ad(!is_predefined_tablespace(i->first));
+ ut_ad(i->second.status != file_name_t::DELETED || !i->second.space);
+
+ if (i->second.status == file_name_t::DELETED) {
+ /* The tablespace was deleted,
+ so we can ignore any redo log for it. */
+ flag_deleted = true;
+ } else if (i->second.space != NULL) {
+ /* The tablespace was found, and there
+ are some redo log records for it. */
+ fil_names_dirty(i->second.space);
+ i->second.space->enable_lsn = i->second.enable_lsn;
+ } else if (i->second.name == "") {
+ ib::error() << "Missing MLOG_FILE_NAME"
+ " or MLOG_FILE_DELETE"
+ " before MLOG_CHECKPOINT for tablespace "
+ << i->first;
+ recv_sys->found_corrupt_log = true;
+ return(DB_CORRUPTION);
+ } else {
+ i->second.status = file_name_t::MISSING;
+ flag_deleted = true;
+ }
+
+ ut_ad(i->second.status == file_name_t::DELETED || i->second.name != "");
+ }
+
+ if (flag_deleted) {
+ return recv_validate_tablespace(rescan, missing_tablespace);
+ }
+
+ return DB_SUCCESS;
+}
+
+/** Start recovering from a redo log checkpoint.
+@see recv_recovery_from_checkpoint_finish
+@param[in] flush_lsn FIL_PAGE_FILE_FLUSH_LSN
+of first system tablespace page
+@return error code or DB_SUCCESS */
+dberr_t
+recv_recovery_from_checkpoint_start(lsn_t flush_lsn)
{
- log_group_t* group;
- log_group_t* max_cp_group;
ulint max_cp_field;
lsn_t checkpoint_lsn;
+ bool rescan;
ib_uint64_t checkpoint_no;
- lsn_t group_scanned_lsn = 0;
lsn_t contiguous_lsn;
-#ifdef UNIV_LOG_ARCHIVE
- log_group_t* up_to_date_group;
- lsn_t archived_lsn;
-#endif /* UNIV_LOG_ARCHIVE */
byte* buf;
- byte log_hdr_buf[LOG_FILE_HDR_SIZE];
- dberr_t err;
+ dberr_t err = DB_SUCCESS;
+
+ ut_ad(srv_operation == SRV_OPERATION_NORMAL
+ || is_mariabackup_restore_or_export());
/* Initialize red-black tree for fast insertions into the
flush_list during recovery process. */
buf_flush_init_flush_rbt();
- ut_when_dtor<recv_dblwr_t> tmp(recv_sys->dblwr);
-
-#ifdef UNIV_LOG_ARCHIVE
- ut_ad(type != LOG_CHECKPOINT || limit_lsn == LSN_MAX);
-/** TRUE when recovering from a checkpoint */
-# define TYPE_CHECKPOINT (type == LOG_CHECKPOINT)
-/** Recover up to this log sequence number */
-# define LIMIT_LSN limit_lsn
-#else /* UNIV_LOG_ARCHIVE */
-/** TRUE when recovering from a checkpoint */
-# define TYPE_CHECKPOINT 1
-/** Recover up to this log sequence number */
-# define LIMIT_LSN LSN_MAX
-#endif /* UNIV_LOG_ARCHIVE */
-
if (srv_force_recovery >= SRV_FORCE_NO_LOG_REDO) {
- ib_logf(IB_LOG_LEVEL_INFO,
- "The user has set SRV_FORCE_NO_LOG_REDO on, "
- "skipping log redo");
+ ib::info() << "innodb_force_recovery=6 skips redo log apply";
return(DB_SUCCESS);
}
- recv_recovery_on = TRUE;
-
- recv_sys->limit_lsn = LIMIT_LSN;
+ recv_recovery_on = true;
- mutex_enter(&(log_sys->mutex));
+ log_mutex_enter();
/* Look for the latest checkpoint from any of the log groups */
- err = recv_find_max_checkpoint(&max_cp_group, &max_cp_field);
+ err = recv_find_max_checkpoint(&max_cp_field);
if (err != DB_SUCCESS) {
-
- mutex_exit(&(log_sys->mutex));
-
+skip_apply:
+ log_mutex_exit();
return(err);
}
- log_group_read_checkpoint_info(max_cp_group, max_cp_field);
+ switch (log_sys->log.format) {
+ case 0:
+ break;
+ case LOG_HEADER_FORMAT_10_2:
+ case LOG_HEADER_FORMAT_10_2 | LOG_HEADER_FORMAT_ENCRYPTED:
+ break;
+ case LOG_HEADER_FORMAT_10_3:
+ case LOG_HEADER_FORMAT_10_3 | LOG_HEADER_FORMAT_ENCRYPTED:
+ if (log_sys->log.subformat == 1) {
+ /* 10.2 with new crash-safe TRUNCATE */
+ break;
+ }
+ /* fall through */
+ default:
+ /* This must be a clean log from a newer version. */
+ goto skip_apply;
+ }
+
+ log_group_header_read(&log_sys->log, max_cp_field);
buf = log_sys->checkpoint_buf;
checkpoint_lsn = mach_read_from_8(buf + LOG_CHECKPOINT_LSN);
checkpoint_no = mach_read_from_8(buf + LOG_CHECKPOINT_NO);
-#ifdef UNIV_LOG_ARCHIVE
- archived_lsn = mach_read_from_8(buf + LOG_CHECKPOINT_ARCHIVED_LSN);
-#endif /* UNIV_LOG_ARCHIVE */
- /* Read the first log file header to print a note if this is
- a recovery from a restored InnoDB Hot Backup */
+ /* Start reading the log groups from the checkpoint lsn up. The
+ variable contiguous_lsn contains an lsn up to which the log is
+ known to be contiguously written to all log groups. */
- fil_io(OS_FILE_READ | OS_FILE_LOG, true, max_cp_group->space_id, 0,
- 0, 0, LOG_FILE_HDR_SIZE,
- log_hdr_buf, max_cp_group, 0);
+ recv_sys->mlog_checkpoint_lsn = 0;
- if (0 == ut_memcmp(log_hdr_buf + LOG_FILE_WAS_CREATED_BY_HOT_BACKUP,
- (byte*)"ibbackup", (sizeof "ibbackup") - 1)) {
-
- if (srv_read_only_mode) {
+ ut_ad(RECV_SCAN_SIZE <= log_sys->buf_size);
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Cannot restore from mysqlbackup, InnoDB "
- "running in read-only mode!");
+ const lsn_t end_lsn = mach_read_from_8(
+ buf + LOG_CHECKPOINT_END_LSN);
- return(DB_ERROR);
+ ut_ad(recv_sys->n_addrs == 0);
+ contiguous_lsn = checkpoint_lsn;
+ switch (log_sys->log.format) {
+ case 0:
+ log_mutex_exit();
+ return recv_log_format_0_recover(checkpoint_lsn,
+ buf[20 + 32 * 9] == 2);
+ default:
+ if (end_lsn == 0) {
+ break;
}
-
- /* This log file was created by mysqlbackup --restore: print
- a note to the user about it */
-
- ib_logf(IB_LOG_LEVEL_INFO,
- "The log file was created by mysqlbackup --apply-log "
- "at %s. The following crash recovery is part of a "
- "normal restore.",
- log_hdr_buf + LOG_FILE_WAS_CREATED_BY_HOT_BACKUP);
-
- /* Wipe over the label now */
-
- memset(log_hdr_buf + LOG_FILE_WAS_CREATED_BY_HOT_BACKUP,
- ' ', 4);
- /* Write to the log file to wipe over the label */
- fil_io(OS_FILE_WRITE | OS_FILE_LOG, true,
- max_cp_group->space_id, 0,
- 0, 0, OS_FILE_LOG_BLOCK_SIZE,
- log_hdr_buf, max_cp_group, 0);
+ if (end_lsn >= checkpoint_lsn) {
+ contiguous_lsn = end_lsn;
+ break;
+ }
+ recv_sys->found_corrupt_log = true;
+ log_mutex_exit();
+ return(DB_ERROR);
}
-#ifdef UNIV_LOG_ARCHIVE
- group = UT_LIST_GET_FIRST(log_sys->log_groups);
-
- while (group) {
- log_checkpoint_get_nth_group_info(buf, group->id,
- &(group->archived_file_no),
- &(group->archived_offset));
+ /* Look for MLOG_CHECKPOINT. */
+ log_group_t* group = &log_sys->log;
+ recv_group_scan_log_recs(group, checkpoint_lsn, &contiguous_lsn,
+ false);
+ /* The first scan should not have stored or applied any records. */
+ ut_ad(recv_sys->n_addrs == 0);
+ ut_ad(!recv_sys->found_corrupt_fs);
- group = UT_LIST_GET_NEXT(log_groups, group);
+ if (srv_read_only_mode && recv_needed_recovery) {
+ log_mutex_exit();
+ return(DB_READ_ONLY);
}
-#endif /* UNIV_LOG_ARCHIVE */
- if (TYPE_CHECKPOINT) {
- /* Start reading the log groups from the checkpoint lsn up. The
- variable contiguous_lsn contains an lsn up to which the log is
- known to be contiguously written to all log groups. */
- recv_sys->parse_start_lsn = checkpoint_lsn;
- recv_sys->scanned_lsn = checkpoint_lsn;
- recv_sys->scanned_checkpoint_no = 0;
- recv_sys->recovered_lsn = checkpoint_lsn;
- srv_start_lsn = checkpoint_lsn;
+ if (recv_sys->found_corrupt_log && !srv_force_recovery) {
+ log_mutex_exit();
+ ib::warn() << "Log scan aborted at LSN " << contiguous_lsn;
+ return(DB_ERROR);
}
- contiguous_lsn = ut_uint64_align_down(recv_sys->scanned_lsn,
- OS_FILE_LOG_BLOCK_SIZE);
-#ifdef UNIV_LOG_ARCHIVE
- if (TYPE_CHECKPOINT) {
- up_to_date_group = max_cp_group;
+ if (recv_sys->mlog_checkpoint_lsn == 0) {
+ lsn_t scan_lsn = group->scanned_lsn;
+ if (!srv_read_only_mode && scan_lsn != checkpoint_lsn) {
+ log_mutex_exit();
+ ib::error err;
+ err << "Missing MLOG_CHECKPOINT";
+ if (end_lsn) {
+ err << " at " << end_lsn;
+ }
+ err << " between the checkpoint " << checkpoint_lsn
+ << " and the end " << scan_lsn << ".";
+ return(DB_ERROR);
+ }
+
+ group->scanned_lsn = checkpoint_lsn;
+ rescan = false;
} else {
- ulint capacity;
- dberr_t err;
+ contiguous_lsn = checkpoint_lsn;
+ rescan = recv_group_scan_log_recs(
+ group, checkpoint_lsn, &contiguous_lsn, false);
- /* Try to recover the remaining part from logs: first from
- the logs of the archived group */
+ if ((recv_sys->found_corrupt_log && !srv_force_recovery)
+ || recv_sys->found_corrupt_fs) {
+ log_mutex_exit();
+ return(DB_ERROR);
+ }
+ }
- group = recv_sys->archive_group;
- capacity = log_group_get_capacity(group);
+ /* NOTE: we always do a 'recovery' at startup, but only if
+ there is something wrong we will print a message to the
+ user about recovery: */
+
+ if (flush_lsn == checkpoint_lsn + SIZE_OF_MLOG_CHECKPOINT
+ && recv_sys->mlog_checkpoint_lsn == checkpoint_lsn) {
+ /* The redo log is logically empty. */
+ } else if (checkpoint_lsn != flush_lsn) {
+ ut_ad(!srv_log_files_created);
+
+ if (checkpoint_lsn + SIZE_OF_MLOG_CHECKPOINT < flush_lsn) {
+ ib::warn() << "Are you sure you are using the"
+ " right ib_logfiles to start up the database?"
+ " Log sequence number in the ib_logfiles is "
+ << checkpoint_lsn << ", less than the"
+ " log sequence number in the first system"
+ " tablespace file header, " << flush_lsn << ".";
+ }
- if (recv_sys->scanned_lsn > checkpoint_lsn + capacity
- || checkpoint_lsn > recv_sys->scanned_lsn + capacity) {
+ if (!recv_needed_recovery) {
- mutex_exit(&(log_sys->mutex));
+ ib::info() << "The log sequence number " << flush_lsn
+ << " in the system tablespace does not match"
+ " the log sequence number " << checkpoint_lsn
+ << " in the ib_logfiles!";
- /* The group does not contain enough log: probably
- an archived log file was missing or corrupt */
+ if (srv_read_only_mode) {
+ ib::error() << "innodb_read_only"
+ " prevents crash recovery";
+ log_mutex_exit();
+ return(DB_READ_ONLY);
+ }
- return(DB_ERROR);
+ recv_needed_recovery = true;
}
+ }
- recv_group_scan_log_recs(group, &contiguous_lsn,
- &group_scanned_lsn, &err);
-
- if (err != DB_SUCCESS || recv_sys->scanned_lsn < checkpoint_lsn) {
+ log_sys->lsn = recv_sys->recovered_lsn;
- mutex_exit(&(log_sys->mutex));
+ if (recv_needed_recovery) {
+ bool missing_tablespace = false;
- /* The group did not contain enough log: an archived
- log file was missing or invalid, or the log group
- was corrupt */
+ err = recv_init_crash_recovery_spaces(
+ rescan, missing_tablespace);
- return(DB_ERROR);
+ if (err != DB_SUCCESS) {
+ log_mutex_exit();
+ return(err);
}
- group->scanned_lsn = group_scanned_lsn;
- up_to_date_group = group;
- }
-#endif /* UNIV_LOG_ARCHIVE */
-
- ut_ad(RECV_SCAN_SIZE <= log_sys->buf_size);
-
- group = UT_LIST_GET_FIRST(log_sys->log_groups);
+ /* If there is any missing tablespace and rescan is needed
+ then there is a possiblity that hash table will not contain
+ all space ids redo logs. Rescan the remaining unstored
+ redo logs for the validation of missing tablespace. */
+ ut_ad(rescan || !missing_tablespace);
-#ifdef UNIV_LOG_ARCHIVE
- if ((type == LOG_ARCHIVE) && (group == recv_sys->archive_group)) {
- group = UT_LIST_GET_NEXT(log_groups, group);
- }
-#endif /* UNIV_LOG_ARCHIVE */
+ while (missing_tablespace) {
+ DBUG_PRINT("ib_log", ("Rescan of redo log to validate "
+ "the missing tablespace. Scan "
+ "from last stored LSN " LSN_PF,
+ recv_sys->last_stored_lsn));
- /* Set the flag to publish that we are doing startup scan. */
- recv_log_scan_is_startup_type = TYPE_CHECKPOINT;
- while (group) {
-#ifdef UNIV_LOG_ARCHIVE
- lsn_t old_scanned_lsn = recv_sys->scanned_lsn;
-#endif /* UNIV_LOG_ARCHIVE */
- dberr_t err = DB_SUCCESS;
+ lsn_t recent_stored_lsn = recv_sys->last_stored_lsn;
+ rescan = recv_group_scan_log_recs(
+ group, checkpoint_lsn,
+ &recent_stored_lsn, false);
- recv_group_scan_log_recs(group, &contiguous_lsn,
- &group_scanned_lsn, &err);
+ ut_ad(!recv_sys->found_corrupt_fs);
- if (err != DB_SUCCESS) {
- return (err);
- }
+ missing_tablespace = false;
- group->scanned_lsn = group_scanned_lsn;
+ err = recv_sys->found_corrupt_log
+ ? DB_ERROR
+ : recv_validate_tablespace(
+ rescan, missing_tablespace);
-#ifdef UNIV_LOG_ARCHIVE
- if (old_scanned_lsn < group_scanned_lsn) {
- /* We found a more up-to-date group */
+ if (err != DB_SUCCESS) {
+ log_mutex_exit();
+ return err;
+ }
- up_to_date_group = group;
+ rescan = true;
}
- if ((type == LOG_ARCHIVE)
- && (group == recv_sys->archive_group)) {
- group = UT_LIST_GET_NEXT(log_groups, group);
+ if (srv_operation == SRV_OPERATION_NORMAL) {
+ buf_dblwr_process();
}
-#endif /* UNIV_LOG_ARCHIVE */
- group = UT_LIST_GET_NEXT(log_groups, group);
- }
+ ut_ad(srv_force_recovery <= SRV_FORCE_NO_UNDO_LOG_SCAN);
- /* Done with startup scan. Clear the flag. */
- recv_log_scan_is_startup_type = FALSE;
+ /* Spawn the background thread to flush dirty pages
+ from the buffer pools. */
+ recv_writer_thread_active = true;
+ os_thread_create(recv_writer_thread, 0, 0);
- if (srv_read_only_mode && recv_needed_recovery) {
- return(DB_READ_ONLY);
- }
+ if (rescan) {
+ contiguous_lsn = checkpoint_lsn;
- if (TYPE_CHECKPOINT) {
- /* NOTE: we always do a 'recovery' at startup, but only if
- there is something wrong we will print a message to the
- user about recovery: */
+ recv_group_scan_log_recs(group, checkpoint_lsn,
+ &contiguous_lsn, true);
- if (checkpoint_lsn != flushed_lsn) {
- if (!recv_needed_recovery) {
- ib_logf(IB_LOG_LEVEL_INFO,
- "The log sequence number "
- LSN_PF
- " in ibdata file do not match"
- " the log sequence number "
- LSN_PF
- " in the ib_logfiles!",
- flushed_lsn,
- checkpoint_lsn);
-
- if (!srv_read_only_mode) {
- recv_init_crash_recovery();
- } else {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Can't initiate database "
- "recovery, running "
- "in read-only-mode.");
- return(DB_READ_ONLY);
- }
+ if ((recv_sys->found_corrupt_log
+ && !srv_force_recovery)
+ || recv_sys->found_corrupt_fs) {
+ log_mutex_exit();
+ return(DB_ERROR);
}
}
+ } else {
+ ut_ad(!rescan || recv_sys->n_addrs == 0);
}
/* We currently have only one log group */
- if (group_scanned_lsn < checkpoint_lsn
- || group_scanned_lsn < recv_max_page_lsn) {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "We scanned the log up to "
- LSN_PF ". A checkpoint was at " LSN_PF
- " and the maximum LSN on a database page was " LSN_PF
- ". It is possible that the database is now corrupt!",
- group_scanned_lsn, checkpoint_lsn, recv_max_page_lsn);
- }
- if (recv_sys->recovered_lsn < checkpoint_lsn) {
-
- mutex_exit(&(log_sys->mutex));
+ if (group->scanned_lsn < checkpoint_lsn
+ || group->scanned_lsn < recv_max_page_lsn) {
- if (recv_sys->recovered_lsn >= LIMIT_LSN) {
+ ib::error() << "We scanned the log up to " << group->scanned_lsn
+ << ". A checkpoint was at " << checkpoint_lsn << " and"
+ " the maximum LSN on a database page was "
+ << recv_max_page_lsn << ". It is possible that the"
+ " database is now corrupt!";
+ }
- return(DB_SUCCESS);
- }
+ if (recv_sys->recovered_lsn < checkpoint_lsn) {
+ log_mutex_exit();
- /* No harm in trying to do RO access. */
- if (!srv_read_only_mode) {
- return (DB_READ_ONLY);
- }
+ ib::error() << "Recovered only to lsn:"
+ << recv_sys->recovered_lsn << " checkpoint_lsn: " << checkpoint_lsn;
return(DB_ERROR);
}
@@ -3232,17 +3961,8 @@ recv_recovery_from_checkpoint_start_func(
log_sys->next_checkpoint_lsn = checkpoint_lsn;
log_sys->next_checkpoint_no = checkpoint_no + 1;
- /* here the checkpoint info is written without any redo logging ongoing
- * and next_checkpoint_no is updated directly hence no +1 */
- log_crypt_set_ver_and_key(log_sys->next_checkpoint_no);
-
-#ifdef UNIV_LOG_ARCHIVE
- log_sys->archived_lsn = archived_lsn;
- recv_synchronize_groups(up_to_date_group);
-#else /* UNIV_LOG_ARCHIVE */
recv_synchronize_groups();
-#endif /* UNIV_LOG_ARCHIVE */
if (!recv_needed_recovery) {
ut_a(checkpoint_lsn == recv_sys->recovered_lsn);
@@ -3250,29 +3970,24 @@ recv_recovery_from_checkpoint_start_func(
srv_start_lsn = recv_sys->recovered_lsn;
}
- log_sys->lsn = recv_sys->recovered_lsn;
-
- ut_memcpy(log_sys->buf, recv_sys->last_block, OS_FILE_LOG_BLOCK_SIZE);
-
log_sys->buf_free = (ulint) log_sys->lsn % OS_FILE_LOG_BLOCK_SIZE;
log_sys->buf_next_to_write = log_sys->buf_free;
- log_sys->written_to_some_lsn = log_sys->lsn;
- log_sys->written_to_all_lsn = log_sys->lsn;
+ log_sys->write_lsn = log_sys->lsn;
log_sys->last_checkpoint_lsn = checkpoint_lsn;
+ if (!srv_read_only_mode && srv_operation == SRV_OPERATION_NORMAL) {
+ /* Write a MLOG_CHECKPOINT marker as the first thing,
+ before generating any other redo log. This ensures
+ that subsequent crash recovery will be possible even
+ if the server were killed soon after this. */
+ fil_names_clear(log_sys->last_checkpoint_lsn, true);
+ }
+
MONITOR_SET(MONITOR_LSN_CHECKPOINT_AGE,
log_sys->lsn - log_sys->last_checkpoint_lsn);
- log_sys->next_checkpoint_no = checkpoint_no + 1;
- log_crypt_set_ver_and_key(log_sys->next_checkpoint_no);
-
-#ifdef UNIV_LOG_ARCHIVE
- if (archived_lsn == LSN_MAX) {
-
- log_sys->archiving_state = LOG_ARCH_OFF;
- }
-#endif /* UNIV_LOG_ARCHIVE */
+ log_sys->next_checkpoint_no = ++checkpoint_no;
mutex_enter(&recv_sys->mutex);
@@ -3280,48 +3995,21 @@ recv_recovery_from_checkpoint_start_func(
mutex_exit(&recv_sys->mutex);
- mutex_exit(&log_sys->mutex);
+ log_mutex_exit();
- recv_lsn_checks_on = TRUE;
+ recv_lsn_checks_on = true;
/* The database is now ready to start almost normal processing of user
transactions: transaction rollbacks and the application of the log
records in the hash table can be run in background. */
return(DB_SUCCESS);
-
-#undef TYPE_CHECKPOINT
-#undef LIMIT_LSN
}
-/********************************************************//**
-Completes recovery from a checkpoint. */
-UNIV_INTERN
+/** Complete recovery from a checkpoint. */
void
recv_recovery_from_checkpoint_finish(void)
-/*======================================*/
{
- if (recv_needed_recovery) {
- trx_sys_print_mysql_master_log_pos();
- trx_sys_print_mysql_binlog_offset();
- }
-
- if (recv_sys->found_corrupt_log) {
-
- fprintf(stderr,
- "InnoDB: WARNING: the log file may have been"
- " corrupt and it\n"
- "InnoDB: is possible that the log scan or parsing"
- " did not proceed\n"
- "InnoDB: far enough in recovery. Please run"
- " CHECK TABLE\n"
- "InnoDB: on your InnoDB tables to check that"
- " they are ok!\n"
- "InnoDB: It may be safest to recover your"
- " InnoDB database from\n"
- "InnoDB: a backup!\n");
- }
-
/* Make sure that the recv_writer thread is done. This is
required because it grabs various mutexes and we want to
ensure that when we enable sync_order_checks there is no
@@ -3329,10 +4017,10 @@ recv_recovery_from_checkpoint_finish(void)
mutex_enter(&recv_sys->writer_mutex);
/* Free the resources of the recovery system */
- recv_recovery_on = FALSE;
+ recv_recovery_on = false;
/* By acquring the mutex we ensure that the recv_writer thread
- won't trigger any more LRU batchtes. Now wait for currently
+ won't trigger any more LRU batches. Now wait for currently
in progress batches to finish. */
buf_flush_wait_LRU_batch_end();
@@ -3343,48 +4031,30 @@ recv_recovery_from_checkpoint_finish(void)
++count;
os_thread_sleep(100000);
if (srv_print_verbose_log && count > 600) {
- ib_logf(IB_LOG_LEVEL_INFO,
- "Waiting for recv_writer to "
- "finish flushing of buffer pool");
+ ib::info() << "Waiting for recv_writer to"
+ " finish flushing of buffer pool";
count = 0;
}
}
-#ifdef __WIN__
- if (recv_writer_thread_handle) {
- CloseHandle(recv_writer_thread_handle);
- }
-#endif /* __WIN__ */
-
-#ifndef UNIV_LOG_DEBUG
recv_sys_debug_free();
-#endif
- /* Roll back any recovered data dictionary transactions, so
- that the data dictionary tables will be free of any locks.
- The data dictionary latch should guarantee that there is at
- most one data dictionary transaction active at a time. */
- if (srv_force_recovery < SRV_FORCE_NO_TRX_UNDO) {
- trx_rollback_or_clean_recovered(FALSE);
- }
+
+ /* Free up the flush_rbt. */
+ buf_flush_free_flush_rbt();
}
/********************************************************//**
Initiates the rollback of active transactions. */
-UNIV_INTERN
void
recv_recovery_rollback_active(void)
/*===============================*/
{
-#ifdef UNIV_SYNC_DEBUG
- /* Wait for a while so that created threads have time to suspend
- themselves before we switch the latching order checks on */
- os_thread_sleep(1000000);
-
ut_ad(!recv_writer_thread_active);
- /* Switch latching order checks on in sync0sync.cc */
- sync_order_checks_on = TRUE;
-#endif
+ /* Switch latching order checks on in sync0debug.cc, if
+ --innodb-sync-debug=true (default) */
+ ut_d(sync_check_enable());
+
/* We can't start any (DDL) transactions if UNDO logging
has been disabled, additionally disable ROLLBACK of recovered
user transactions. */
@@ -3393,8 +4063,9 @@ recv_recovery_rollback_active(void)
/* Drop partially created indexes. */
row_merge_drop_temp_indexes();
- /* Drop temporary tables. */
- row_mysql_drop_temp_tables();
+ /* Drop garbage tables. */
+ if (srv_safe_truncate)
+ row_mysql_drop_garbage_tables();
/* Drop any auxiliary tables that were not dropped when the
parent table was dropped. This can happen if the parent table
@@ -3410,534 +4081,202 @@ recv_recovery_rollback_active(void)
}
}
-/******************************************************//**
-Resets the logs. The contents of log files will be lost! */
-UNIV_INTERN
-void
-recv_reset_logs(
-/*============*/
-#ifdef UNIV_LOG_ARCHIVE
- ulint arch_log_no, /*!< in: next archived log file number */
- ibool new_logs_created,/*!< in: TRUE if resetting logs
- is done at the log creation;
- FALSE if it is done after
- archive recovery */
-#endif /* UNIV_LOG_ARCHIVE */
- lsn_t lsn) /*!< in: reset to this lsn
- rounded up to be divisible by
- OS_FILE_LOG_BLOCK_SIZE, after
- which we add
- LOG_BLOCK_HDR_SIZE */
+/** Find a doublewrite copy of a page.
+@param[in] space_id tablespace identifier
+@param[in] page_no page number
+@return page frame
+@retval NULL if no page was found */
+const byte*
+recv_dblwr_t::find_page(ulint space_id, ulint page_no)
{
- log_group_t* group;
-
- ut_ad(mutex_own(&(log_sys->mutex)));
-
- log_sys->lsn = ut_uint64_align_up(lsn, OS_FILE_LOG_BLOCK_SIZE);
-
- group = UT_LIST_GET_FIRST(log_sys->log_groups);
-
- while (group) {
- group->lsn = log_sys->lsn;
- group->lsn_offset = LOG_FILE_HDR_SIZE;
-#ifdef UNIV_LOG_ARCHIVE
- group->archived_file_no = arch_log_no;
- group->archived_offset = 0;
-
- if (!new_logs_created) {
- recv_truncate_group(group, group->lsn, group->lsn,
- group->lsn, group->lsn);
- }
-#endif /* UNIV_LOG_ARCHIVE */
-
- group = UT_LIST_GET_NEXT(log_groups, group);
- }
-
- log_sys->buf_next_to_write = 0;
- log_sys->written_to_some_lsn = log_sys->lsn;
- log_sys->written_to_all_lsn = log_sys->lsn;
-
- log_sys->next_checkpoint_no = 0;
- log_sys->last_checkpoint_lsn = 0;
-
-#ifdef UNIV_LOG_ARCHIVE
- log_sys->archived_lsn = log_sys->lsn;
-#endif /* UNIV_LOG_ARCHIVE */
-
- memset(log_sys->buf, 0, log_sys->buf_size);
- log_block_init(log_sys->buf, log_sys->lsn);
- log_block_set_first_rec_group(log_sys->buf, LOG_BLOCK_HDR_SIZE);
-
- log_sys->buf_free = LOG_BLOCK_HDR_SIZE;
- log_sys->lsn += LOG_BLOCK_HDR_SIZE;
-
- MONITOR_SET(MONITOR_LSN_CHECKPOINT_AGE,
- (log_sys->lsn - log_sys->last_checkpoint_lsn));
-
- mutex_exit(&(log_sys->mutex));
-
- /* Reset the checkpoint fields in logs */
-
- log_make_checkpoint_at(LSN_MAX, TRUE);
-
- mutex_enter(&(log_sys->mutex));
+ const byte *result= NULL;
+ lsn_t max_lsn= 0;
+
+ for (list::const_iterator i = pages.begin(); i != pages.end(); ++i)
+ {
+ const byte *page= *i;
+ if (page_get_page_no(page) != page_no ||
+ page_get_space_id(page) != space_id)
+ continue;
+ const lsn_t lsn= mach_read_from_8(page + FIL_PAGE_LSN);
+ if (lsn <= max_lsn)
+ continue;
+ max_lsn= lsn;
+ result= page;
+ }
+
+ return result;
}
-#endif /* !UNIV_HOTBACKUP */
-#ifdef UNIV_HOTBACKUP
-/******************************************************//**
-Creates new log files after a backup has been restored. */
-UNIV_INTERN
-void
-recv_reset_log_files_for_backup(
-/*============================*/
- const char* log_dir, /*!< in: log file directory path */
- ulint n_log_files, /*!< in: number of log files */
- lsn_t log_file_size, /*!< in: log file size */
- lsn_t lsn) /*!< in: new start lsn, must be
- divisible by OS_FILE_LOG_BLOCK_SIZE */
+#ifndef DBUG_OFF
+/** Return string name of the redo log record type.
+@param[in] type record log record enum
+@return string name of record log record */
+static const char* get_mlog_string(mlog_id_t type)
{
- os_file_t log_file;
- ibool success;
- byte* buf;
- ulint i;
- ulint log_dir_len;
- char name[5000];
- static const char ib_logfile_basename[] = "ib_logfile";
-
- log_dir_len = strlen(log_dir);
- /* full path name of ib_logfile consists of log dir path + basename
- + number. This must fit in the name buffer.
- */
- ut_a(log_dir_len + strlen(ib_logfile_basename) + 11 < sizeof(name));
-
- buf = ut_malloc(LOG_FILE_HDR_SIZE + OS_FILE_LOG_BLOCK_SIZE);
- memset(buf, '\0', LOG_FILE_HDR_SIZE + OS_FILE_LOG_BLOCK_SIZE);
-
- for (i = 0; i < n_log_files; i++) {
-
- sprintf(name, "%s%s%lu", log_dir,
- ib_logfile_basename, (ulong) i);
-
- log_file = os_file_create_simple(innodb_file_log_key,
- name, OS_FILE_CREATE,
- OS_FILE_READ_WRITE,
- &success);
- if (!success) {
- fprintf(stderr,
- "InnoDB: Cannot create %s. Check that"
- " the file does not exist yet.\n", name);
-
- exit(1);
- }
-
- fprintf(stderr,
- "Setting log file size to %llu\n",
- log_file_size);
-
- success = os_file_set_size(name, log_file, log_file_size);
-
- if (!success) {
- fprintf(stderr,
- "InnoDB: Cannot set %s size to %llu\n",
- name, log_file_size);
- exit(1);
- }
-
- os_file_flush(log_file);
- os_file_close(log_file);
- }
-
- /* We pretend there is a checkpoint at lsn + LOG_BLOCK_HDR_SIZE */
-
- log_reset_first_header_and_checkpoint(buf, lsn);
-
- log_block_init_in_old_format(buf + LOG_FILE_HDR_SIZE, lsn);
- log_block_set_first_rec_group(buf + LOG_FILE_HDR_SIZE,
- LOG_BLOCK_HDR_SIZE);
- sprintf(name, "%s%s%lu", log_dir, ib_logfile_basename, (ulong)0);
-
- log_file = os_file_create_simple(innodb_file_log_key,
- name, OS_FILE_OPEN,
- OS_FILE_READ_WRITE, &success);
- if (!success) {
- fprintf(stderr, "InnoDB: Cannot open %s.\n", name);
-
- exit(1);
- }
-
- os_file_write(name, log_file, buf, 0,
- LOG_FILE_HDR_SIZE + OS_FILE_LOG_BLOCK_SIZE);
- os_file_flush(log_file);
- os_file_close(log_file);
-
- ut_free(buf);
-}
-#endif /* UNIV_HOTBACKUP */
-
-#ifdef UNIV_LOG_ARCHIVE
-/* Dead code */
-/******************************************************//**
-Reads from the archive of a log group and performs recovery.
-@return TRUE if no more complete consistent archive files */
-static
-ibool
-log_group_recover_from_archive_file(
-/*================================*/
- log_group_t* group) /*!< in: log group */
-{
- os_file_t file_handle;
- ib_uint64_t start_lsn;
- ib_uint64_t file_end_lsn;
- ib_uint64_t dummy_lsn;
- ib_uint64_t scanned_lsn;
- ulint len;
- ibool ret;
- byte* buf;
- os_offset_t read_offset;
- os_offset_t file_size;
- int input_char;
- char name[10000];
- dberr_t err;
-
- ut_a(0);
-
-try_open_again:
- buf = log_sys->buf;
-
- /* Add the file to the archive file space; open the file */
-
- log_archived_file_name_gen(name, group->id, group->archived_file_no);
-
- file_handle = os_file_create(innodb_file_log_key,
- name, OS_FILE_OPEN,
- OS_FILE_LOG, OS_FILE_AIO, &ret);
-
- if (ret == FALSE) {
-ask_again:
- fprintf(stderr,
- "InnoDB: Do you want to copy additional"
- " archived log files\n"
- "InnoDB: to the directory\n");
- fprintf(stderr,
- "InnoDB: or were these all the files needed"
- " in recovery?\n");
- fprintf(stderr,
- "InnoDB: (Y == copy more files; N == this is all)?");
-
- input_char = getchar();
-
- if (input_char == (int) 'N') {
-
- return(TRUE);
- } else if (input_char == (int) 'Y') {
-
- goto try_open_again;
- } else {
- goto ask_again;
- }
- }
-
- file_size = os_file_get_size(file_handle);
- ut_a(file_size != (os_offset_t) -1);
-
- fprintf(stderr, "InnoDB: Opened archived log file %s\n", name);
-
- ret = os_file_close(file_handle);
-
- if (file_size < LOG_FILE_HDR_SIZE) {
- fprintf(stderr,
- "InnoDB: Archive file header incomplete %s\n", name);
-
- return(TRUE);
- }
-
- ut_a(ret);
-
- /* Add the archive file as a node to the space */
-
- fil_node_create(name, 1 + file_size / UNIV_PAGE_SIZE,
- group->archive_space_id, FALSE);
-#if RECV_SCAN_SIZE < LOG_FILE_HDR_SIZE
-# error "RECV_SCAN_SIZE < LOG_FILE_HDR_SIZE"
-#endif
-
- /* Read the archive file header */
- fil_io(OS_FILE_READ | OS_FILE_LOG, true, group->archive_space_id, 0, 0,
- LOG_FILE_HDR_SIZE, buf, NULL, 0);
-
- /* Check if the archive file header is consistent */
-
- if (mach_read_from_4(buf + LOG_GROUP_ID) != group->id
- || mach_read_from_4(buf + LOG_FILE_NO)
- != group->archived_file_no) {
- fprintf(stderr,
- "InnoDB: Archive file header inconsistent %s\n", name);
-
- return(TRUE);
- }
-
- if (!mach_read_from_4(buf + LOG_FILE_ARCH_COMPLETED)) {
- fprintf(stderr,
- "InnoDB: Archive file not completely written %s\n",
- name);
-
- return(TRUE);
- }
-
- start_lsn = mach_read_from_8(buf + LOG_FILE_START_LSN);
- file_end_lsn = mach_read_from_8(buf + LOG_FILE_END_LSN);
-
- if (!recv_sys->scanned_lsn) {
-
- if (recv_sys->parse_start_lsn < start_lsn) {
- fprintf(stderr,
- "InnoDB: Archive log file %s"
- " starts from too big a lsn\n",
- name);
- return(TRUE);
- }
-
- recv_sys->scanned_lsn = start_lsn;
- }
-
- if (recv_sys->scanned_lsn != start_lsn) {
-
- fprintf(stderr,
- "InnoDB: Archive log file %s starts from"
- " a wrong lsn\n",
- name);
- return(TRUE);
- }
-
- read_offset = LOG_FILE_HDR_SIZE;
-
- for (;;) {
- len = RECV_SCAN_SIZE;
-
- if (read_offset + len > file_size) {
- len = ut_calc_align_down(file_size - read_offset,
- OS_FILE_LOG_BLOCK_SIZE);
- }
-
- if (len == 0) {
-
- break;
- }
-
-#ifdef UNIV_DEBUG
- if (log_debug_writes) {
- fprintf(stderr,
- "InnoDB: Archive read starting at"
- " lsn %llu, len %lu from file %s\n",
- start_lsn,
- (ulong) len, name);
- }
-#endif /* UNIV_DEBUG */
+ switch (type) {
+ case MLOG_SINGLE_REC_FLAG:
+ return("MLOG_SINGLE_REC_FLAG");
- fil_io(OS_FILE_READ | OS_FILE_LOG, true,
- group->archive_space_id, read_offset / UNIV_PAGE_SIZE,
- read_offset % UNIV_PAGE_SIZE, len, buf, NULL, 0);
+ case MLOG_1BYTE:
+ return("MLOG_1BYTE");
- ret = recv_scan_log_recs(
- (buf_pool_get_n_pages()
- - (recv_n_pool_free_frames * srv_buf_pool_instances))
- * UNIV_PAGE_SIZE, TRUE, buf, len, start_lsn,
- &dummy_lsn, &scanned_lsn, &err);
+ case MLOG_2BYTES:
+ return("MLOG_2BYTES");
- if (err != DB_SUCCESS) {
- return (FALSE);
- }
+ case MLOG_4BYTES:
+ return("MLOG_4BYTES");
- if (scanned_lsn == file_end_lsn) {
+ case MLOG_8BYTES:
+ return("MLOG_8BYTES");
- return(FALSE);
- }
+ case MLOG_REC_INSERT:
+ return("MLOG_REC_INSERT");
- if (ret) {
- fprintf(stderr,
- "InnoDB: Archive log file %s"
- " does not scan right\n",
- name);
- return(TRUE);
- }
-
- read_offset += len;
- start_lsn += len;
-
- ut_ad(start_lsn == scanned_lsn);
- }
+ case MLOG_REC_CLUST_DELETE_MARK:
+ return("MLOG_REC_CLUST_DELETE_MARK");
- return(FALSE);
-}
+ case MLOG_REC_SEC_DELETE_MARK:
+ return("MLOG_REC_SEC_DELETE_MARK");
-/********************************************************//**
-Recovers from archived log files, and also from log files, if they exist.
-@return error code or DB_SUCCESS */
-UNIV_INTERN
-ulint
-recv_recovery_from_archive_start(
-/*=============================*/
- ib_uint64_t min_flushed_lsn,/*!< in: min flushed lsn field from the
- data files */
- ib_uint64_t limit_lsn, /*!< in: recover up to this lsn if
- possible */
- ulint first_log_no) /*!< in: number of the first archived
- log file to use in the recovery; the
- file will be searched from
- INNOBASE_LOG_ARCH_DIR specified in
- server config file */
-{
- log_group_t* group;
- ulint group_id;
- ulint trunc_len;
- ibool ret;
- ulint err;
+ case MLOG_REC_UPDATE_IN_PLACE:
+ return("MLOG_REC_UPDATE_IN_PLACE");
- ut_a(0);
+ case MLOG_REC_DELETE:
+ return("MLOG_REC_DELETE");
- recv_sys_create();
- recv_sys_init(buf_pool_get_curr_size());
+ case MLOG_LIST_END_DELETE:
+ return("MLOG_LIST_END_DELETE");
- recv_recovery_on = TRUE;
- recv_recovery_from_backup_on = TRUE;
+ case MLOG_LIST_START_DELETE:
+ return("MLOG_LIST_START_DELETE");
- recv_sys->limit_lsn = limit_lsn;
+ case MLOG_LIST_END_COPY_CREATED:
+ return("MLOG_LIST_END_COPY_CREATED");
- group_id = 0;
+ case MLOG_PAGE_REORGANIZE:
+ return("MLOG_PAGE_REORGANIZE");
- group = UT_LIST_GET_FIRST(log_sys->log_groups);
+ case MLOG_PAGE_CREATE:
+ return("MLOG_PAGE_CREATE");
- while (group) {
- if (group->id == group_id) {
+ case MLOG_UNDO_INSERT:
+ return("MLOG_UNDO_INSERT");
- break;
- }
+ case MLOG_UNDO_ERASE_END:
+ return("MLOG_UNDO_ERASE_END");
- group = UT_LIST_GET_NEXT(log_groups, group);
- }
+ case MLOG_UNDO_INIT:
+ return("MLOG_UNDO_INIT");
- if (!group) {
- fprintf(stderr,
- "InnoDB: There is no log group defined with id %lu!\n",
- (ulong) group_id);
- return(DB_ERROR);
- }
+ case MLOG_UNDO_HDR_REUSE:
+ return("MLOG_UNDO_HDR_REUSE");
- group->archived_file_no = first_log_no;
+ case MLOG_UNDO_HDR_CREATE:
+ return("MLOG_UNDO_HDR_CREATE");
- recv_sys->parse_start_lsn = min_flushed_lsn;
+ case MLOG_REC_MIN_MARK:
+ return("MLOG_REC_MIN_MARK");
- recv_sys->scanned_lsn = 0;
- recv_sys->scanned_checkpoint_no = 0;
- recv_sys->recovered_lsn = recv_sys->parse_start_lsn;
+ case MLOG_IBUF_BITMAP_INIT:
+ return("MLOG_IBUF_BITMAP_INIT");
- recv_sys->archive_group = group;
+#ifdef UNIV_LOG_LSN_DEBUG
+ case MLOG_LSN:
+ return("MLOG_LSN");
+#endif /* UNIV_LOG_LSN_DEBUG */
- ret = FALSE;
+ case MLOG_WRITE_STRING:
+ return("MLOG_WRITE_STRING");
- mutex_enter(&(log_sys->mutex));
+ case MLOG_MULTI_REC_END:
+ return("MLOG_MULTI_REC_END");
- while (!ret) {
- ret = log_group_recover_from_archive_file(group);
+ case MLOG_DUMMY_RECORD:
+ return("MLOG_DUMMY_RECORD");
- /* Close and truncate a possible processed archive file
- from the file space */
+ case MLOG_FILE_DELETE:
+ return("MLOG_FILE_DELETE");
- trunc_len = UNIV_PAGE_SIZE
- * fil_space_get_size(group->archive_space_id);
- if (trunc_len > 0) {
- fil_space_truncate_start(group->archive_space_id,
- trunc_len);
- }
+ case MLOG_COMP_REC_MIN_MARK:
+ return("MLOG_COMP_REC_MIN_MARK");
- group->archived_file_no++;
- }
+ case MLOG_COMP_PAGE_CREATE:
+ return("MLOG_COMP_PAGE_CREATE");
- if (recv_sys->recovered_lsn < limit_lsn) {
+ case MLOG_COMP_REC_INSERT:
+ return("MLOG_COMP_REC_INSERT");
- if (!recv_sys->scanned_lsn) {
+ case MLOG_COMP_REC_CLUST_DELETE_MARK:
+ return("MLOG_COMP_REC_CLUST_DELETE_MARK");
- recv_sys->scanned_lsn = recv_sys->parse_start_lsn;
- }
+ case MLOG_COMP_REC_UPDATE_IN_PLACE:
+ return("MLOG_COMP_REC_UPDATE_IN_PLACE");
- mutex_exit(&(log_sys->mutex));
+ case MLOG_COMP_REC_DELETE:
+ return("MLOG_COMP_REC_DELETE");
- err = recv_recovery_from_checkpoint_start(LOG_ARCHIVE,
- limit_lsn,
- LSN_MAX,
- LSN_MAX);
- if (err != DB_SUCCESS) {
+ case MLOG_COMP_LIST_END_DELETE:
+ return("MLOG_COMP_LIST_END_DELETE");
- return(err);
- }
+ case MLOG_COMP_LIST_START_DELETE:
+ return("MLOG_COMP_LIST_START_DELETE");
- mutex_enter(&(log_sys->mutex));
- }
+ case MLOG_COMP_LIST_END_COPY_CREATED:
+ return("MLOG_COMP_LIST_END_COPY_CREATED");
- if (limit_lsn != LSN_MAX) {
+ case MLOG_COMP_PAGE_REORGANIZE:
+ return("MLOG_COMP_PAGE_REORGANIZE");
- recv_apply_hashed_log_recs(false);
+ case MLOG_FILE_CREATE2:
+ return("MLOG_FILE_CREATE2");
- recv_reset_logs(0, FALSE, recv_sys->recovered_lsn);
- }
+ case MLOG_ZIP_WRITE_NODE_PTR:
+ return("MLOG_ZIP_WRITE_NODE_PTR");
- mutex_exit(&(log_sys->mutex));
+ case MLOG_ZIP_WRITE_BLOB_PTR:
+ return("MLOG_ZIP_WRITE_BLOB_PTR");
- return(DB_SUCCESS);
-}
+ case MLOG_ZIP_WRITE_HEADER:
+ return("MLOG_ZIP_WRITE_HEADER");
-/********************************************************//**
-Completes recovery from archive. */
-UNIV_INTERN
-void
-recv_recovery_from_archive_finish(void)
-/*===================================*/
-{
- recv_recovery_from_checkpoint_finish();
+ case MLOG_ZIP_PAGE_COMPRESS:
+ return("MLOG_ZIP_PAGE_COMPRESS");
- recv_recovery_from_backup_on = FALSE;
-}
-#endif /* UNIV_LOG_ARCHIVE */
+ case MLOG_ZIP_PAGE_COMPRESS_NO_DATA:
+ return("MLOG_ZIP_PAGE_COMPRESS_NO_DATA");
+ case MLOG_ZIP_PAGE_REORGANIZE:
+ return("MLOG_ZIP_PAGE_REORGANIZE");
-void recv_dblwr_t::add(byte* page)
-{
- pages.push_back(page);
-}
+ case MLOG_FILE_RENAME2:
+ return("MLOG_FILE_RENAME2");
-byte* recv_dblwr_t::find_page(ulint space_id, ulint page_no)
-{
- std::vector<byte*> matches;
- byte* result = 0;
+ case MLOG_FILE_NAME:
+ return("MLOG_FILE_NAME");
- for (std::list<byte*>::iterator i = pages.begin();
- i != pages.end(); ++i) {
+ case MLOG_CHECKPOINT:
+ return("MLOG_CHECKPOINT");
- if ((page_get_space_id(*i) == space_id)
- && (page_get_page_no(*i) == page_no)) {
- matches.push_back(*i);
- }
- }
+ case MLOG_PAGE_CREATE_RTREE:
+ return("MLOG_PAGE_CREATE_RTREE");
- if (matches.size() == 1) {
- result = matches[0];
- } else if (matches.size() > 1) {
+ case MLOG_COMP_PAGE_CREATE_RTREE:
+ return("MLOG_COMP_PAGE_CREATE_RTREE");
- lsn_t max_lsn = 0;
- lsn_t page_lsn = 0;
+ case MLOG_INIT_FILE_PAGE2:
+ return("MLOG_INIT_FILE_PAGE2");
- for (std::vector<byte*>::iterator i = matches.begin();
- i != matches.end(); ++i) {
+ case MLOG_INDEX_LOAD:
+ return("MLOG_INDEX_LOAD");
- page_lsn = mach_read_from_8(*i + FIL_PAGE_LSN);
+ case MLOG_TRUNCATE:
+ return("MLOG_TRUNCATE");
- if (page_lsn > max_lsn) {
- max_lsn = page_lsn;
- result = *i;
- }
- }
+ case MLOG_FILE_WRITE_CRYPT_DATA:
+ return("MLOG_FILE_WRITE_CRYPT_DATA");
}
-
- return(result);
+ DBUG_ASSERT(0);
+ return(NULL);
}
+#endif /* !DBUG_OFF */
diff --git a/storage/innobase/lz4.cmake b/storage/innobase/lz4.cmake
new file mode 100644
index 00000000000..e901378eafc
--- /dev/null
+++ b/storage/innobase/lz4.cmake
@@ -0,0 +1,56 @@
+# Copyright (C) 2014, SkySQL Ab. All Rights Reserved.
+#
+# This program is free software; you can redistribute it and/or modify it under
+# the terms of the GNU General Public License as published by the Free Software
+# Foundation; version 2 of the License.
+#
+# This program is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along with
+# this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA
+
+SET(WITH_INNODB_LZ4 AUTO CACHE STRING
+ "Build with lz4. Possible values are 'ON', 'OFF', 'AUTO' and default is 'AUTO'")
+
+MACRO (MYSQL_CHECK_LZ4)
+ IF (WITH_INNODB_LZ4 STREQUAL "ON" OR WITH_INNODB_LZ4 STREQUAL "AUTO")
+ CHECK_INCLUDE_FILES(lz4.h HAVE_LZ4_H)
+ CHECK_LIBRARY_EXISTS(lz4 LZ4_compress_limitedOutput "" HAVE_LZ4_SHARED_LIB)
+ CHECK_LIBRARY_EXISTS(lz4 LZ4_compress_default "" HAVE_LZ4_COMPRESS_DEFAULT)
+
+ IF (HAVE_LZ4_SHARED_LIB AND HAVE_LZ4_H)
+ ADD_DEFINITIONS(-DHAVE_LZ4=1)
+ IF (HAVE_LZ4_COMPRESS_DEFAULT)
+ ADD_DEFINITIONS(-DHAVE_LZ4_COMPRESS_DEFAULT=1)
+ ENDIF()
+ LINK_LIBRARIES(lz4)
+ ELSE()
+ IF (WITH_INNODB_LZ4 STREQUAL "ON")
+ MESSAGE(FATAL_ERROR "Required lz4 library is not found")
+ ENDIF()
+ ENDIF()
+ ENDIF()
+ENDMACRO()
+
+MACRO (MYSQL_CHECK_LZ4_STATIC)
+ IF (WITH_INNODB_LZ4 STREQUAL "ON" OR WITH_INNODB_LZ4 STREQUAL "AUTO")
+ CHECK_INCLUDE_FILES(lz4.h HAVE_LZ4_H)
+ CHECK_LIBRARY_EXISTS(liblz4.a LZ4_compress_limitedOutput "" HAVE_LZ4_LIB)
+ CHECK_LIBRARY_EXISTS(liblz3.a LZ4_compress_default "" HAVE_LZ4_COMPRESS_DEFAULT)
+
+ IF(HAVE_LZ4_LIB AND HAVE_LZ4_H)
+ ADD_DEFINITIONS(-DHAVE_LZ4=1)
+ IF (HAVE_LZ4_COMPRESS_DEFAULT)
+ ADD_DEFINITIONS(-DHAVE_LZ4_COMPRESS_DEFAULT=1)
+ ENDIF()
+ LINK_LIBRARIES(liblz4.a)
+ ELSE()
+ IF (WITH_INNODB_LZ4 STREQUAL "ON")
+ MESSAGE(FATAL_ERROR "Required lz4 library is not found")
+ ENDIF()
+ ENDIF()
+ ENDIF()
+ENDMACRO()
diff --git a/storage/innobase/lzma.cmake b/storage/innobase/lzma.cmake
new file mode 100644
index 00000000000..93de0a2934d
--- /dev/null
+++ b/storage/innobase/lzma.cmake
@@ -0,0 +1,33 @@
+# Copyright (C) 2014, SkySQL Ab. All Rights Reserved.
+#
+# This program is free software; you can redistribute it and/or modify it under
+# the terms of the GNU General Public License as published by the Free Software
+# Foundation; version 2 of the License.
+#
+# This program is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along with
+# this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA
+
+SET(WITH_INNODB_LZMA AUTO CACHE STRING
+ "Build with lzma. Possible values are 'ON', 'OFF', 'AUTO' and default is 'AUTO'")
+
+MACRO (MYSQL_CHECK_LZMA)
+ IF (WITH_INNODB_LZMA STREQUAL "ON" OR WITH_INNODB_LZMA STREQUAL "AUTO")
+ CHECK_INCLUDE_FILES(lzma.h HAVE_LZMA_H)
+ CHECK_LIBRARY_EXISTS(lzma lzma_stream_buffer_decode "" HAVE_LZMA_DECODE)
+ CHECK_LIBRARY_EXISTS(lzma lzma_easy_buffer_encode "" HAVE_LZMA_ENCODE)
+
+ IF (HAVE_LZMA_DECODE AND HAVE_LZMA_ENCODE AND HAVE_LZMA_H)
+ ADD_DEFINITIONS(-DHAVE_LZMA=1)
+ LINK_LIBRARIES(lzma)
+ ELSE()
+ IF (WITH_INNODB_LZMA STREQUAL "ON")
+ MESSAGE(FATAL_ERROR "Required lzma library is not found")
+ ENDIF()
+ ENDIF()
+ ENDIF()
+ENDMACRO()
diff --git a/storage/innobase/lzo.cmake b/storage/innobase/lzo.cmake
new file mode 100644
index 00000000000..236eac2d1e5
--- /dev/null
+++ b/storage/innobase/lzo.cmake
@@ -0,0 +1,48 @@
+# Copyright (C) 2014, SkySQL Ab. All Rights Reserved.
+#
+# This program is free software; you can redistribute it and/or modify it under
+# the terms of the GNU General Public License as published by the Free Software
+# Foundation; version 2 of the License.
+#
+# This program is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along with
+# this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA
+
+SET(WITH_INNODB_LZO AUTO CACHE STRING
+ "Build with lzo. Possible values are 'ON', 'OFF', 'AUTO' and default is 'AUTO'")
+
+MACRO (MYSQL_CHECK_LZO_STATIC)
+ IF (WITH_INNODB_LZO STREQUAL "ON" OR WITH_INNODB_LZO STREQUAL "AUTO")
+ CHECK_INCLUDE_FILES(lzo/lzo1x.h HAVE_LZO_H)
+ CHECK_LIBRARY_EXISTS(liblzo2.a lzo1x_1_compress "" HAVE_LZO_LIB)
+
+ IF(HAVE_LZO_LIB AND HAVE_LZO_H)
+ ADD_DEFINITIONS(-DHAVE_LZO=1)
+ LINK_LIBRARIES(liblzo2.a)
+ ELSE()
+ IF (WITH_INNODB_LZO STREQUAL "ON")
+ MESSAGE(FATAL_ERROR "Required lzo library is not found")
+ ENDIF()
+ ENDIF()
+ ENDIF()
+ENDMACRO()
+
+MACRO (MYSQL_CHECK_LZO)
+ IF (WITH_INNODB_LZO STREQUAL "ON" OR WITH_INNODB_LZO STREQUAL "AUTO")
+ CHECK_INCLUDE_FILES(lzo/lzo1x.h HAVE_LZO_H)
+ CHECK_LIBRARY_EXISTS(lzo2 lzo1x_1_compress "" HAVE_LZO_SHARED_LIB)
+
+ IF(HAVE_LZO_SHARED_LIB AND HAVE_LZO_H)
+ ADD_DEFINITIONS(-DHAVE_LZO=1)
+ LINK_LIBRARIES(lzo2)
+ ELSE()
+ IF (WITH_INNODB_LZO STREQUAL "ON")
+ MESSAGE(FATAL_ERROR "Required lzo library is not found")
+ ENDIF()
+ ENDIF()
+ ENDIF()
+ENDMACRO()
diff --git a/storage/innobase/mach/mach0data.cc b/storage/innobase/mach/mach0data.cc
index 064d997cd7c..85533908d16 100644
--- a/storage/innobase/mach/mach0data.cc
+++ b/storage/innobase/mach/mach0data.cc
@@ -26,35 +26,29 @@ Created 11/28/1995 Heikki Tuuri
#include "mach0data.h"
-#ifdef UNIV_NONINL
-#include "mach0data.ic"
-#endif
-
-/*********************************************************//**
-Reads a ulint in a compressed form if the log record fully contains it.
-@return pointer to end of the stored field, NULL if not complete */
-UNIV_INTERN
-byte*
+/** Read a 32-bit integer in a compressed form.
+@param[in,out] ptr pointer to memory where to read;
+advanced by the number of bytes consumed, or set NULL if out of space
+@param[in] end_ptr end of the buffer
+@return unsigned value */
+ib_uint32_t
mach_parse_compressed(
-/*==================*/
- byte* ptr, /*!< in: pointer to buffer from where to read */
- byte* end_ptr,/*!< in: pointer to end of the buffer */
- ulint* val) /*!< out: read value (< 2^32) */
+ const byte** ptr,
+ const byte* end_ptr)
{
- ulint flag;
-
- ut_ad(ptr && end_ptr && val);
+ ulint val;
- if (ptr >= end_ptr) {
-
- return(NULL);
+ if (*ptr >= end_ptr) {
+ *ptr = NULL;
+ return(0);
}
- flag = mach_read_from_1(ptr);
+ val = mach_read_from_1(*ptr);
- if (flag < 0x80UL) {
- *val = flag;
- return(ptr + 1);
+ if (val < 0x80) {
+ /* 0nnnnnnn (7 bits) */
+ ++*ptr;
+ return(static_cast<ib_uint32_t>(val));
}
/* Workaround GCC bug
@@ -70,42 +64,48 @@ mach_parse_compressed(
__atomic_thread_fence(__ATOMIC_ACQUIRE);
#endif
- if (flag < 0xC0UL) {
- if (end_ptr < ptr + 2) {
- return(NULL);
+ if (val < 0xC0) {
+ /* 10nnnnnn nnnnnnnn (14 bits) */
+ if (end_ptr >= *ptr + 2) {
+ val = mach_read_from_2(*ptr) & 0x3FFF;
+ ut_ad(val > 0x7F);
+ *ptr += 2;
+ return(static_cast<ib_uint32_t>(val));
}
-
- *val = mach_read_from_2(ptr) & 0x7FFFUL;
-
- return(ptr + 2);
+ *ptr = NULL;
+ return(0);
}
#ifdef DEPLOY_FENCE
__atomic_thread_fence(__ATOMIC_ACQUIRE);
#endif
- if (flag < 0xE0UL) {
- if (end_ptr < ptr + 3) {
- return(NULL);
+ if (val < 0xE0) {
+ /* 110nnnnn nnnnnnnn nnnnnnnn (21 bits) */
+ if (end_ptr >= *ptr + 3) {
+ val = mach_read_from_3(*ptr) & 0x1FFFFF;
+ ut_ad(val > 0x3FFF);
+ *ptr += 3;
+ return(static_cast<ib_uint32_t>(val));
}
-
- *val = mach_read_from_3(ptr) & 0x3FFFFFUL;
-
- return(ptr + 3);
+ *ptr = NULL;
+ return(0);
}
#ifdef DEPLOY_FENCE
__atomic_thread_fence(__ATOMIC_ACQUIRE);
#endif
- if (flag < 0xF0UL) {
- if (end_ptr < ptr + 4) {
- return(NULL);
+ if (val < 0xF0) {
+ /* 1110nnnn nnnnnnnn nnnnnnnn nnnnnnnn (28 bits) */
+ if (end_ptr >= *ptr + 4) {
+ val = mach_read_from_4(*ptr) & 0xFFFFFFF;
+ ut_ad(val > 0x1FFFFF);
+ *ptr += 4;
+ return(static_cast<ib_uint32_t>(val));
}
-
- *val = mach_read_from_4(ptr) & 0x1FFFFFFFUL;
-
- return(ptr + 4);
+ *ptr = NULL;
+ return(0);
}
#ifdef DEPLOY_FENCE
@@ -114,12 +114,16 @@ mach_parse_compressed(
#undef DEPLOY_FENCE
- ut_ad(flag == 0xF0UL);
+ ut_ad(val == 0xF0);
- if (end_ptr < ptr + 5) {
- return(NULL);
+ /* 11110000 nnnnnnnn nnnnnnnn nnnnnnnn nnnnnnnn (32 bits) */
+ if (end_ptr >= *ptr + 5) {
+ val = mach_read_from_4(*ptr + 1);
+ ut_ad(val > 0xFFFFFFF);
+ *ptr += 5;
+ return(static_cast<ib_uint32_t>(val));
}
- *val = mach_read_from_4(ptr + 1);
- return(ptr + 5);
+ *ptr = NULL;
+ return(0);
}
diff --git a/storage/innobase/mem/mem0dbg.cc b/storage/innobase/mem/mem0dbg.cc
deleted file mode 100644
index 2d8db48db2e..00000000000
--- a/storage/innobase/mem/mem0dbg.cc
+++ /dev/null
@@ -1,1050 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1994, 2016, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/********************************************************************//**
-@file mem/mem0dbg.cc
-The memory management: the debug code. This is not a compilation module,
-but is included in mem0mem.* !
-
-Created 6/9/1994 Heikki Tuuri
-*************************************************************************/
-
-#ifdef UNIV_MEM_DEBUG
-# ifndef UNIV_HOTBACKUP
-# include "ha_prototypes.h"
-/* The mutex which protects in the debug version the hash table
-containing the list of live memory heaps, and also the global
-variables below. */
-UNIV_INTERN ib_mutex_t mem_hash_mutex;
-
-#ifdef UNIV_PFS_MUTEX
-/* Key to register mem_hash_mutex with performance schema */
-UNIV_INTERN mysql_pfs_key_t mem_hash_mutex_key;
-#endif /* UNIV_PFS_MUTEX */
-
-# endif /* !UNIV_HOTBACKUP */
-
-/* The following variables contain information about the
-extent of memory allocations. Only used in the debug version.
-Protected by mem_hash_mutex above. */
-
-static ulint mem_n_created_heaps = 0;
-static ulint mem_n_allocations = 0;
-static ulint mem_total_allocated_memory = 0;
-UNIV_INTERN ulint mem_current_allocated_memory = 0;
-static ulint mem_max_allocated_memory = 0;
-# ifndef UNIV_HOTBACKUP
-static ulint mem_last_print_info = 0;
-static ibool mem_hash_initialized = FALSE;
-# endif /* !UNIV_HOTBACKUP */
-
-/* Size of the hash table for memory management tracking */
-#define MEM_HASH_SIZE 997
-
-/* The node of the list containing currently allocated memory heaps */
-
-struct mem_hash_node_t {
- UT_LIST_NODE_T(mem_hash_node_t)
- list; /*!< hash list node */
- mem_heap_t* heap; /*!< memory heap */
- const char* file_name;/* file where heap was created*/
- ulint line; /*!< file line of creation */
- ulint nth_heap;/* this is the nth heap created */
- UT_LIST_NODE_T(mem_hash_node_t)
- all_list;/* list of all created heaps */
-};
-
-typedef UT_LIST_BASE_NODE_T(mem_hash_node_t) mem_hash_cell_t;
-
-/* The hash table of allocated heaps */
-static mem_hash_cell_t mem_hash_table[MEM_HASH_SIZE];
-
-/* The base node of the list of all allocated heaps */
-static mem_hash_cell_t mem_all_list_base;
-
-
-
-UNIV_INLINE
-mem_hash_cell_t*
-mem_hash_get_nth_cell(ulint i);
-
-/* Accessor function for the hash table. Returns a pointer to the
-table cell. */
-UNIV_INLINE
-mem_hash_cell_t*
-mem_hash_get_nth_cell(ulint i)
-{
- ut_a(i < MEM_HASH_SIZE);
-
- return(&(mem_hash_table[i]));
-}
-
-/* Accessor functions for a memory field in the debug version */
-UNIV_INTERN
-void
-mem_field_header_set_len(byte* field, ulint len)
-{
- mach_write_to_4(field - 2 * sizeof(ulint), len);
-}
-
-UNIV_INTERN
-ulint
-mem_field_header_get_len(byte* field)
-{
- return(mach_read_from_4(field - 2 * sizeof(ulint)));
-}
-
-UNIV_INTERN
-void
-mem_field_header_set_check(byte* field, ulint check)
-{
- mach_write_to_4(field - sizeof(ulint), check);
-}
-
-UNIV_INTERN
-ulint
-mem_field_header_get_check(byte* field)
-{
- return(mach_read_from_4(field - sizeof(ulint)));
-}
-
-UNIV_INTERN
-void
-mem_field_trailer_set_check(byte* field, ulint check)
-{
- mach_write_to_4(field + mem_field_header_get_len(field), check);
-}
-
-UNIV_INTERN
-ulint
-mem_field_trailer_get_check(byte* field)
-{
- return(mach_read_from_4(field
- + mem_field_header_get_len(field)));
-}
-#endif /* UNIV_MEM_DEBUG */
-
-#ifndef UNIV_HOTBACKUP
-/******************************************************************//**
-Initializes the memory system. */
-UNIV_INTERN
-void
-mem_init(
-/*=====*/
- ulint size) /*!< in: common pool size in bytes */
-{
-#ifdef UNIV_MEM_DEBUG
-
- ulint i;
-
- /* Initialize the hash table */
- ut_a(FALSE == mem_hash_initialized);
-
- mutex_create(mem_hash_mutex_key, &mem_hash_mutex, SYNC_MEM_HASH);
-
- for (i = 0; i < MEM_HASH_SIZE; i++) {
- UT_LIST_INIT(*mem_hash_get_nth_cell(i));
- }
-
- UT_LIST_INIT(mem_all_list_base);
-
- mem_hash_initialized = TRUE;
-#endif
-
- if (UNIV_LIKELY(srv_use_sys_malloc)) {
- /* When innodb_use_sys_malloc is set, the
- mem_comm_pool won't be used for any allocations. We
- create a dummy mem_comm_pool, because some statistics
- and debugging code relies on it being initialized. */
- size = 1;
- }
-
- mem_comm_pool = mem_pool_create(size);
-}
-
-/******************************************************************//**
-Closes the memory system. */
-UNIV_INTERN
-void
-mem_close(void)
-/*===========*/
-{
- mem_pool_free(mem_comm_pool);
- mem_comm_pool = NULL;
-#ifdef UNIV_MEM_DEBUG
- mutex_free(&mem_hash_mutex);
- mem_hash_initialized = FALSE;
-#endif /* UNIV_MEM_DEBUG */
-}
-#endif /* !UNIV_HOTBACKUP */
-
-#ifdef UNIV_MEM_DEBUG
-/******************************************************************//**
-Initializes an allocated memory field in the debug version. */
-UNIV_INTERN
-void
-mem_field_init(
-/*===========*/
- byte* buf, /*!< in: memory field */
- ulint n) /*!< in: how many bytes the user requested */
-{
- ulint rnd;
- byte* usr_buf;
-
- usr_buf = buf + MEM_FIELD_HEADER_SIZE;
-
- /* In the debug version write the length field and the
- check fields to the start and the end of the allocated storage.
- The field header consists of a length field and
- a random number field, in this order. The field trailer contains
- the same random number as a check field. */
-
- mem_field_header_set_len(usr_buf, n);
-
- rnd = ut_rnd_gen_ulint();
-
- mem_field_header_set_check(usr_buf, rnd);
- mem_field_trailer_set_check(usr_buf, rnd);
-
- /* Update the memory allocation information */
-
- mutex_enter(&mem_hash_mutex);
-
- mem_total_allocated_memory += n;
- mem_current_allocated_memory += n;
- mem_n_allocations++;
-
- if (mem_current_allocated_memory > mem_max_allocated_memory) {
- mem_max_allocated_memory = mem_current_allocated_memory;
- }
-
- mutex_exit(&mem_hash_mutex);
-
- /* In the debug version set the buffer to a random
- combination of 0xBA and 0xBE */
-
- mem_init_buf(usr_buf, n);
-}
-
-/******************************************************************//**
-Erases an allocated memory field in the debug version. */
-UNIV_INTERN
-void
-mem_field_erase(
-/*============*/
- byte* buf, /*!< in: memory field */
- ulint n MY_ATTRIBUTE((unused)))
- /*!< in: how many bytes the user requested */
-{
- byte* usr_buf;
-
- usr_buf = buf + MEM_FIELD_HEADER_SIZE;
-
- mutex_enter(&mem_hash_mutex);
- mem_current_allocated_memory -= n;
- mutex_exit(&mem_hash_mutex);
-
- /* Check that the field lengths agree */
- ut_ad(n == (ulint) mem_field_header_get_len(usr_buf));
-
- /* In the debug version, set the freed space to a random
- combination of 0xDE and 0xAD */
-
- mem_erase_buf(buf, MEM_SPACE_NEEDED(n));
-}
-
-/***************************************************************//**
-Initializes a buffer to a random combination of hex BA and BE.
-Used to initialize allocated memory. */
-UNIV_INTERN
-void
-mem_init_buf(
-/*=========*/
- byte* buf, /*!< in: pointer to buffer */
- ulint n) /*!< in: length of buffer */
-{
- byte* ptr;
-
- UNIV_MEM_ASSERT_W(buf, n);
-
- for (ptr = buf; ptr < buf + n; ptr++) {
-
- if (ut_rnd_gen_ibool()) {
- *ptr = 0xBA;
- } else {
- *ptr = 0xBE;
- }
- }
-
- UNIV_MEM_INVALID(buf, n);
-}
-
-/***************************************************************//**
-Initializes a buffer to a random combination of hex DE and AD.
-Used to erase freed memory. */
-UNIV_INTERN
-void
-mem_erase_buf(
-/*==========*/
- byte* buf, /*!< in: pointer to buffer */
- ulint n) /*!< in: length of buffer */
-{
- byte* ptr;
-
- UNIV_MEM_ASSERT_W(buf, n);
-
- for (ptr = buf; ptr < buf + n; ptr++) {
- if (ut_rnd_gen_ibool()) {
- *ptr = 0xDE;
- } else {
- *ptr = 0xAD;
- }
- }
-
- UNIV_MEM_FREE(buf, n);
-}
-
-/***************************************************************//**
-Inserts a created memory heap to the hash table of current allocated
-memory heaps. */
-UNIV_INTERN
-void
-mem_hash_insert(
-/*============*/
- mem_heap_t* heap, /*!< in: the created heap */
- const char* file_name, /*!< in: file name of creation */
- ulint line) /*!< in: line where created */
-{
- mem_hash_node_t* new_node;
- ulint cell_no ;
-
- ut_ad(mem_heap_check(heap));
-
- mutex_enter(&mem_hash_mutex);
-
- cell_no = ut_hash_ulint((ulint) heap, MEM_HASH_SIZE);
-
- /* Allocate a new node to the list */
- new_node = static_cast<mem_hash_node_t*>(ut_malloc(sizeof(*new_node)));
-
- new_node->heap = heap;
- new_node->file_name = file_name;
- new_node->line = line;
- new_node->nth_heap = mem_n_created_heaps;
-
- /* Insert into lists */
- UT_LIST_ADD_FIRST(list, *mem_hash_get_nth_cell(cell_no), new_node);
-
- UT_LIST_ADD_LAST(all_list, mem_all_list_base, new_node);
-
- mem_n_created_heaps++;
-
- mutex_exit(&mem_hash_mutex);
-}
-
-/***************************************************************//**
-Removes a memory heap (which is going to be freed by the caller)
-from the list of live memory heaps. Returns the size of the heap
-in terms of how much memory in bytes was allocated for the user of
-the heap (not the total space occupied by the heap).
-Also validates the heap.
-NOTE: This function does not free the storage occupied by the
-heap itself, only the node in the list of heaps. */
-UNIV_INTERN
-void
-mem_hash_remove(
-/*============*/
- mem_heap_t* heap, /*!< in: the heap to be freed */
- const char* file_name, /*!< in: file name of freeing */
- ulint line) /*!< in: line where freed */
-{
- mem_hash_node_t* node;
- ulint cell_no;
- ibool error;
- ulint size;
-
- ut_ad(mem_heap_check(heap));
-
- mutex_enter(&mem_hash_mutex);
-
- cell_no = ut_hash_ulint((ulint) heap, MEM_HASH_SIZE);
-
- /* Look for the heap in the hash table list */
- node = UT_LIST_GET_FIRST(*mem_hash_get_nth_cell(cell_no));
-
- while (node != NULL) {
- if (node->heap == heap) {
-
- break;
- }
-
- node = UT_LIST_GET_NEXT(list, node);
- }
-
- if (node == NULL) {
- fprintf(stderr,
- "Memory heap or buffer freed in %s line %lu"
- " did not exist.\n",
- innobase_basename(file_name), (ulong) line);
- ut_error;
- }
-
- /* Remove from lists */
- UT_LIST_REMOVE(list, *mem_hash_get_nth_cell(cell_no), node);
-
- UT_LIST_REMOVE(all_list, mem_all_list_base, node);
-
- /* Validate the heap which will be freed */
- mem_heap_validate_or_print(node->heap, NULL, FALSE, &error, &size,
- NULL, NULL);
- if (error) {
- fprintf(stderr,
- "Inconsistency in memory heap or"
- " buffer n:o %lu created\n"
- "in %s line %lu and tried to free in %s line %lu.\n"
- "Hex dump of 400 bytes around memory heap"
- " first block start:\n",
- node->nth_heap,
- innobase_basename(node->file_name), (ulong) node->line,
- innobase_basename(file_name), (ulong) line);
- ut_print_buf(stderr, (byte*) node->heap - 200, 400);
- fputs("\nDump of the mem heap:\n", stderr);
- mem_heap_validate_or_print(node->heap, NULL, TRUE, &error,
- &size, NULL, NULL);
- ut_error;
- }
-
- /* Free the memory occupied by the node struct */
- ut_free(node);
-
- mem_current_allocated_memory -= size;
-
- mutex_exit(&mem_hash_mutex);
-}
-#endif /* UNIV_MEM_DEBUG */
-
-#if defined UNIV_MEM_DEBUG || defined UNIV_DEBUG
-/***************************************************************//**
-Checks a memory heap for consistency and prints the contents if requested.
-Outputs the sum of sizes of buffers given to the user (only in
-the debug version), the physical size of the heap and the number of
-blocks in the heap. In case of error returns 0 as sizes and number
-of blocks. */
-UNIV_INTERN
-void
-mem_heap_validate_or_print(
-/*=======================*/
- mem_heap_t* heap, /*!< in: memory heap */
- byte* top MY_ATTRIBUTE((unused)),
- /*!< in: calculate and validate only until
- this top pointer in the heap is reached,
- if this pointer is NULL, ignored */
- ibool print, /*!< in: if TRUE, prints the contents
- of the heap; works only in
- the debug version */
- ibool* error, /*!< out: TRUE if error */
- ulint* us_size,/*!< out: allocated memory
- (for the user) in the heap,
- if a NULL pointer is passed as this
- argument, it is ignored; in the
- non-debug version this is always -1 */
- ulint* ph_size,/*!< out: physical size of the heap,
- if a NULL pointer is passed as this
- argument, it is ignored */
- ulint* n_blocks) /*!< out: number of blocks in the heap,
- if a NULL pointer is passed as this
- argument, it is ignored */
-{
- mem_block_t* block;
- ulint total_len = 0;
- ulint block_count = 0;
- ulint phys_len = 0;
-#ifdef UNIV_MEM_DEBUG
- ulint len;
- byte* field;
- byte* user_field;
- ulint check_field;
-#endif
-
- /* Pessimistically, we set the parameters to error values */
- if (us_size != NULL) {
- *us_size = 0;
- }
- if (ph_size != NULL) {
- *ph_size = 0;
- }
- if (n_blocks != NULL) {
- *n_blocks = 0;
- }
- *error = TRUE;
-
- block = heap;
-
- if (block->magic_n != MEM_BLOCK_MAGIC_N) {
- return;
- }
-
- if (print) {
- fputs("Memory heap:", stderr);
- }
-
- while (block != NULL) {
- phys_len += mem_block_get_len(block);
-
- if ((block->type == MEM_HEAP_BUFFER)
- && (mem_block_get_len(block) > UNIV_PAGE_SIZE)) {
-
- fprintf(stderr,
- "InnoDB: Error: mem block %p"
- " length %lu > UNIV_PAGE_SIZE\n",
- (void*) block,
- (ulong) mem_block_get_len(block));
- /* error */
-
- return;
- }
-
-#ifdef UNIV_MEM_DEBUG
- /* We can trace the fields of the block only in the debug
- version */
- if (print) {
- fprintf(stderr, " Block %ld:", block_count);
- }
-
- field = (byte*) block + mem_block_get_start(block);
-
- if (top && (field == top)) {
-
- goto completed;
- }
-
- while (field < (byte*) block + mem_block_get_free(block)) {
-
- /* Calculate the pointer to the storage
- which was given to the user */
-
- user_field = field + MEM_FIELD_HEADER_SIZE;
-
- len = mem_field_header_get_len(user_field);
-
- if (print) {
- ut_print_buf(stderr, user_field, len);
- putc('\n', stderr);
- }
-
- total_len += len;
- check_field = mem_field_header_get_check(user_field);
-
- if (check_field
- != mem_field_trailer_get_check(user_field)) {
- /* error */
-
- fprintf(stderr,
- "InnoDB: Error: block %lx mem"
- " field %lx len %lu\n"
- "InnoDB: header check field is"
- " %lx but trailer %lx\n",
- (ulint) block,
- (ulint) field, len, check_field,
- mem_field_trailer_get_check(
- user_field));
-
- return;
- }
-
- /* Move to next field */
- field = field + MEM_SPACE_NEEDED(len);
-
- if (top && (field == top)) {
-
- goto completed;
- }
-
- }
-
- /* At the end check that we have arrived to the first free
- position */
-
- if (field != (byte*) block + mem_block_get_free(block)) {
- /* error */
-
- fprintf(stderr,
- "InnoDB: Error: block %lx end of"
- " mem fields %lx\n"
- "InnoDB: but block free at %lx\n",
- (ulint) block, (ulint) field,
- (ulint)((byte*) block
- + mem_block_get_free(block)));
-
- return;
- }
-
-#endif
-
- block = UT_LIST_GET_NEXT(list, block);
- block_count++;
- }
-#ifdef UNIV_MEM_DEBUG
-completed:
-#endif
- if (us_size != NULL) {
- *us_size = total_len;
- }
- if (ph_size != NULL) {
- *ph_size = phys_len;
- }
- if (n_blocks != NULL) {
- *n_blocks = block_count;
- }
- *error = FALSE;
-}
-
-/**************************************************************//**
-Prints the contents of a memory heap. */
-static
-void
-mem_heap_print(
-/*===========*/
- mem_heap_t* heap) /*!< in: memory heap */
-{
- ibool error;
- ulint us_size;
- ulint phys_size;
- ulint n_blocks;
-
- ut_ad(mem_heap_check(heap));
-
- mem_heap_validate_or_print(heap, NULL, TRUE, &error,
- &us_size, &phys_size, &n_blocks);
- fprintf(stderr,
- "\nheap type: %lu; size: user size %lu;"
- " physical size %lu; blocks %lu.\n",
- (ulong) heap->type, (ulong) us_size,
- (ulong) phys_size, (ulong) n_blocks);
- ut_a(!error);
-}
-
-/**************************************************************//**
-Validates the contents of a memory heap.
-@return TRUE if ok */
-UNIV_INTERN
-ibool
-mem_heap_validate(
-/*==============*/
- mem_heap_t* heap) /*!< in: memory heap */
-{
- ibool error;
- ulint us_size;
- ulint phys_size;
- ulint n_blocks;
-
- ut_ad(mem_heap_check(heap));
-
- mem_heap_validate_or_print(heap, NULL, FALSE, &error, &us_size,
- &phys_size, &n_blocks);
- if (error) {
- mem_heap_print(heap);
- }
-
- ut_a(!error);
-
- return(TRUE);
-}
-#endif /* UNIV_MEM_DEBUG || UNIV_DEBUG */
-
-#ifdef UNIV_DEBUG
-/**************************************************************//**
-Checks that an object is a memory heap (or a block of it).
-@return TRUE if ok */
-UNIV_INTERN
-ibool
-mem_heap_check(
-/*===========*/
- mem_heap_t* heap) /*!< in: memory heap */
-{
- ut_a(heap->magic_n == MEM_BLOCK_MAGIC_N);
-
- return(TRUE);
-}
-#endif /* UNIV_DEBUG */
-
-#ifdef UNIV_MEM_DEBUG
-/*****************************************************************//**
-TRUE if no memory is currently allocated.
-@return TRUE if no heaps exist */
-UNIV_INTERN
-ibool
-mem_all_freed(void)
-/*===============*/
-{
- mem_hash_node_t* node;
- ulint heap_count = 0;
- ulint i;
-
- mem_validate();
-
- mutex_enter(&mem_hash_mutex);
-
- for (i = 0; i < MEM_HASH_SIZE; i++) {
-
- node = UT_LIST_GET_FIRST(*mem_hash_get_nth_cell(i));
- while (node != NULL) {
- heap_count++;
- node = UT_LIST_GET_NEXT(list, node);
- }
- }
-
- mutex_exit(&mem_hash_mutex);
-
- if (heap_count == 0) {
-# ifndef UNIV_HOTBACKUP
- ut_a(mem_pool_get_reserved(mem_comm_pool) == 0);
-# endif /* !UNIV_HOTBACKUP */
-
- return(TRUE);
- } else {
- return(FALSE);
- }
-}
-
-/*****************************************************************//**
-Validates the dynamic memory allocation system.
-@return TRUE if error */
-UNIV_INTERN
-ibool
-mem_validate_no_assert(void)
-/*========================*/
-{
- mem_hash_node_t* node;
- ulint n_heaps = 0;
- ulint allocated_mem;
- ulint ph_size;
- ulint total_allocated_mem = 0;
- ibool error = FALSE;
- ulint n_blocks;
- ulint i;
-
-# ifndef UNIV_HOTBACKUP
- mem_pool_validate(mem_comm_pool);
-# endif /* !UNIV_HOTBACKUP */
-
- mutex_enter(&mem_hash_mutex);
-
- for (i = 0; i < MEM_HASH_SIZE; i++) {
-
- node = UT_LIST_GET_FIRST(*mem_hash_get_nth_cell(i));
-
- while (node != NULL) {
- n_heaps++;
-
- mem_heap_validate_or_print(node->heap, NULL,
- FALSE, &error,
- &allocated_mem,
- &ph_size, &n_blocks);
-
- if (error) {
- fprintf(stderr,
- "\nERROR!!!!!!!!!!!!!!!!!!!"
- "!!!!!!!!!!!!!!!!!!!!!!!\n\n"
- "Inconsistency in memory heap"
- " or buffer created\n"
- "in %s line %lu.\n",
- innobase_basename(node->file_name),
- node->line);
-
- mutex_exit(&mem_hash_mutex);
-
- return(TRUE);
- }
-
- total_allocated_mem += allocated_mem;
- node = UT_LIST_GET_NEXT(list, node);
- }
- }
-
- if ((n_heaps == 0) && (mem_current_allocated_memory != 0)) {
- error = TRUE;
- }
-
- if (mem_total_allocated_memory < mem_current_allocated_memory) {
- error = TRUE;
- }
-
- if (mem_max_allocated_memory > mem_total_allocated_memory) {
- error = TRUE;
- }
-
- if (mem_n_created_heaps < n_heaps) {
- error = TRUE;
- }
-
- mutex_exit(&mem_hash_mutex);
-
- return(error);
-}
-
-/************************************************************//**
-Validates the dynamic memory
-@return TRUE if ok */
-UNIV_INTERN
-ibool
-mem_validate(void)
-/*==============*/
-{
- ut_a(!mem_validate_no_assert());
-
- return(TRUE);
-}
-#endif /* UNIV_MEM_DEBUG */
-
-/************************************************************//**
-Tries to find neigboring memory allocation blocks and dumps to stderr
-the neighborhood of a given pointer. */
-UNIV_INTERN
-void
-mem_analyze_corruption(
-/*===================*/
- void* ptr) /*!< in: pointer to place of possible corruption */
-{
- byte* p;
- ulint i;
- ulint dist;
-
- fputs("InnoDB: Apparent memory corruption: mem dump ", stderr);
- ut_print_buf(stderr, (byte*) ptr - 250, 500);
-
- fputs("\nInnoDB: Scanning backward trying to find"
- " previous allocated mem blocks\n", stderr);
-
- p = (byte*) ptr;
- dist = 0;
-
- for (i = 0; i < 10; i++) {
- for (;;) {
- if (((ulint) p) % 4 == 0) {
-
- if (*((ulint*) p) == MEM_BLOCK_MAGIC_N) {
- fprintf(stderr,
- "Mem block at - %lu,"
- " file %s, line %lu\n",
- (ulong) dist,
- (p + sizeof(ulint)),
- (ulong)
- (*(ulint*)(p + 8
- + sizeof(ulint))));
-
- break;
- }
-
- if (*((ulint*) p) == MEM_FREED_BLOCK_MAGIC_N) {
- fprintf(stderr,
- "Freed mem block at - %lu,"
- " file %s, line %lu\n",
- (ulong) dist,
- (p + sizeof(ulint)),
- (ulong)
- (*(ulint*)(p + 8
- + sizeof(ulint))));
-
- break;
- }
- }
-
- p--;
- dist++;
- }
-
- p--;
- dist++;
- }
-
- fprintf(stderr,
- "InnoDB: Scanning forward trying to find next"
- " allocated mem blocks\n");
-
- p = (byte*) ptr;
- dist = 0;
-
- for (i = 0; i < 10; i++) {
- for (;;) {
- if (((ulint) p) % 4 == 0) {
-
- if (*((ulint*) p) == MEM_BLOCK_MAGIC_N) {
- fprintf(stderr,
- "Mem block at + %lu, file %s,"
- " line %lu\n",
- (ulong) dist,
- (p + sizeof(ulint)),
- (ulong)
- (*(ulint*)(p + 8
- + sizeof(ulint))));
-
- break;
- }
-
- if (*((ulint*) p) == MEM_FREED_BLOCK_MAGIC_N) {
- fprintf(stderr,
- "Freed mem block at + %lu,"
- " file %s, line %lu\n",
- (ulong) dist,
- (p + sizeof(ulint)),
- (ulong)
- (*(ulint*)(p + 8
- + sizeof(ulint))));
-
- break;
- }
- }
-
- p++;
- dist++;
- }
-
- p++;
- dist++;
- }
-}
-
-#ifndef UNIV_HOTBACKUP
-/*****************************************************************//**
-Prints information of dynamic memory usage and currently allocated
-memory heaps or buffers. Can only be used in the debug version. */
-static
-void
-mem_print_info_low(
-/*===============*/
- ibool print_all) /*!< in: if TRUE, all heaps are printed,
- else only the heaps allocated after the
- previous call of this function */
-{
-#ifdef UNIV_MEM_DEBUG
- mem_hash_node_t* node;
- ulint n_heaps = 0;
- ulint allocated_mem;
- ulint ph_size;
- ulint total_allocated_mem = 0;
- ibool error;
- ulint n_blocks;
-#endif
- FILE* outfile;
-
- /* outfile = fopen("ibdebug", "a"); */
-
- outfile = stdout;
-
- fprintf(outfile, "\n");
- fprintf(outfile,
- "________________________________________________________\n");
- fprintf(outfile, "MEMORY ALLOCATION INFORMATION\n\n");
-
-#ifndef UNIV_MEM_DEBUG
-
- UT_NOT_USED(print_all);
-
- mem_pool_print_info(outfile, mem_comm_pool);
-
- fprintf(outfile,
- "Sorry, non-debug version cannot give more memory info\n");
-
- /* fclose(outfile); */
-
- return;
-#else
- mutex_enter(&mem_hash_mutex);
-
- fprintf(outfile, "LIST OF CREATED HEAPS AND ALLOCATED BUFFERS: \n\n");
-
- if (!print_all) {
- fprintf(outfile, "AFTER THE LAST PRINT INFO\n");
- }
-
- node = UT_LIST_GET_FIRST(mem_all_list_base);
-
- while (node != NULL) {
- n_heaps++;
-
- if (!print_all && node->nth_heap < mem_last_print_info) {
-
- goto next_heap;
- }
-
- mem_heap_validate_or_print(node->heap, NULL,
- FALSE, &error, &allocated_mem,
- &ph_size, &n_blocks);
- total_allocated_mem += allocated_mem;
-
- fprintf(outfile,
- "%lu: file %s line %lu of size %lu phys.size %lu"
- " with %lu blocks, type %lu\n",
- node->nth_heap,
- innobase_basename(node->file_name), node->line,
- allocated_mem, ph_size, n_blocks,
- (node->heap)->type);
-next_heap:
- node = UT_LIST_GET_NEXT(all_list, node);
- }
-
- fprintf(outfile, "\n");
-
- fprintf(outfile, "Current allocated memory : %lu\n",
- mem_current_allocated_memory);
- fprintf(outfile, "Current allocated heaps and buffers : %lu\n",
- n_heaps);
- fprintf(outfile, "Cumulative allocated memory : %lu\n",
- mem_total_allocated_memory);
- fprintf(outfile, "Maximum allocated memory : %lu\n",
- mem_max_allocated_memory);
- fprintf(outfile, "Cumulative created heaps and buffers : %lu\n",
- mem_n_created_heaps);
- fprintf(outfile, "Cumulative number of allocations : %lu\n",
- mem_n_allocations);
-
- mem_last_print_info = mem_n_created_heaps;
-
- mutex_exit(&mem_hash_mutex);
-
- mem_pool_print_info(outfile, mem_comm_pool);
-
- /* mem_validate(); */
-
- /* fclose(outfile); */
-#endif
-}
-
-/*****************************************************************//**
-Prints information of dynamic memory usage and currently allocated memory
-heaps or buffers. Can only be used in the debug version. */
-UNIV_INTERN
-void
-mem_print_info(void)
-/*================*/
-{
- mem_print_info_low(TRUE);
-}
-
-/*****************************************************************//**
-Prints information of dynamic memory usage and currently allocated memory
-heaps or buffers since the last ..._print_info or..._print_new_info. */
-UNIV_INTERN
-void
-mem_print_new_info(void)
-/*====================*/
-{
- mem_print_info_low(FALSE);
-}
-#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innobase/mem/mem0mem.cc b/storage/innobase/mem/mem0mem.cc
index 82c1292b4be..0f94ac0a491 100644
--- a/storage/innobase/mem/mem0mem.cc
+++ b/storage/innobase/mem/mem0mem.cc
@@ -1,7 +1,7 @@
/*****************************************************************************
-Copyright (c) 1994, 2011, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2018, MariaDB Corporation.
+Copyright (c) 1994, 2014, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2017, 2019, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -25,111 +25,25 @@ Created 6/9/1994 Heikki Tuuri
*************************************************************************/
#include "mem0mem.h"
-#ifdef UNIV_NONINL
-#include "mem0mem.ic"
-#endif
-
#include "buf0buf.h"
#include "srv0srv.h"
-#include "mem0dbg.cc"
#include <stdarg.h>
-/*
- THE MEMORY MANAGEMENT
- =====================
-
-The basic element of the memory management is called a memory
-heap. A memory heap is conceptually a
-stack from which memory can be allocated. The stack may grow infinitely.
-The top element of the stack may be freed, or
-the whole stack can be freed at one time. The advantage of the
-memory heap concept is that we can avoid using the malloc and free
-functions of C which are quite expensive, for example, on the Solaris + GCC
-system (50 MHz Sparc, 1993) the pair takes 3 microseconds,
-on Win NT + 100MHz Pentium, 2.5 microseconds.
-When we use a memory heap,
-we can allocate larger blocks of memory at a time and thus
-reduce overhead. Slightly more efficient the method is when we
-allocate the memory from the index page buffer pool, as we can
-claim a new page fast. This is called buffer allocation.
-When we allocate the memory from the dynamic memory of the
-C environment, that is called dynamic allocation.
-
-The default way of operation of the memory heap is the following.
-First, when the heap is created, an initial block of memory is
-allocated. In dynamic allocation this may be about 50 bytes.
-If more space is needed, additional blocks are allocated
-and they are put into a linked list.
-After the initial block, each allocated block is twice the size of the
-previous, until a threshold is attained, after which the sizes
-of the blocks stay the same. An exception is, of course, the case
-where the caller requests a memory buffer whose size is
-bigger than the threshold. In that case a block big enough must
-be allocated.
-
-The heap is physically arranged so that if the current block
-becomes full, a new block is allocated and always inserted in the
-chain of blocks as the last block.
-
-In the debug version of the memory management, all the allocated
-heaps are kept in a list (which is implemented as a hash table).
-Thus we can notice if the caller tries to free an already freed
-heap. In addition, each buffer given to the caller contains
-start field at the start and a trailer field at the end of the buffer.
-
-The start field has the following content:
-A. sizeof(ulint) bytes of field length (in the standard byte order)
-B. sizeof(ulint) bytes of check field (a random number)
-
-The trailer field contains:
-A. sizeof(ulint) bytes of check field (the same random number as at the start)
-
-Thus we can notice if something has been copied over the
-borders of the buffer, which is illegal.
-The memory in the buffers is initialized to a random byte sequence.
-After freeing, all the blocks in the heap are set to random bytes
-to help us discover errors which result from the use of
-buffers in an already freed heap. */
-
-#ifdef MEM_PERIODIC_CHECK
-
-ibool mem_block_list_inited;
-/* List of all mem blocks allocated; protected by the mem_comm_pool mutex */
-UT_LIST_BASE_NODE_T(mem_block_t) mem_block_list;
-
-#endif
-
-/**********************************************************************//**
-Duplicates a NUL-terminated string, allocated from a memory heap.
-@return own: a copy of the string */
-UNIV_INTERN
+/** Duplicates a NUL-terminated string, allocated from a memory heap.
+@param[in] heap, memory heap where string is allocated
+@param[in] str) string to be copied
+@return own: a copy of the string */
char*
mem_heap_strdup(
-/*============*/
- mem_heap_t* heap, /*!< in: memory heap where string is allocated */
- const char* str) /*!< in: string to be copied */
+ mem_heap_t* heap,
+ const char* str)
{
return(static_cast<char*>(mem_heap_dup(heap, str, strlen(str) + 1)));
}
/**********************************************************************//**
-Duplicate a block of data, allocated from a memory heap.
-@return own: a copy of the data */
-UNIV_INTERN
-void*
-mem_heap_dup(
-/*=========*/
- mem_heap_t* heap, /*!< in: memory heap where copy is allocated */
- const void* data, /*!< in: data to be copied */
- ulint len) /*!< in: length of data, in bytes */
-{
- return(memcpy(mem_heap_alloc(heap, len), data, len));
-}
-
-/**********************************************************************//**
Concatenate two strings and return the result, using a memory heap.
-@return own: the result */
-UNIV_INTERN
+@return own: the result */
char*
mem_heap_strcat(
/*============*/
@@ -154,7 +68,7 @@ mem_heap_strcat(
/****************************************************************//**
Helper function for mem_heap_printf.
-@return length of formatted string, including terminating NUL */
+@return length of formatted string, including terminating NUL */
static
ulint
mem_heap_printf_low(
@@ -266,8 +180,7 @@ A simple sprintf replacement that dynamically allocates the space for the
formatted string from the given heap. This supports a very limited set of
the printf syntax: types 's' and 'u' and length modifier 'l' (which is
required for the 'u' type).
-@return heap-allocated formatted string */
-UNIV_INTERN
+@return heap-allocated formatted string */
char*
mem_heap_printf(
/*============*/
@@ -294,11 +207,54 @@ mem_heap_printf(
return(str);
}
+#ifdef UNIV_DEBUG
+/** Validates the contents of a memory heap.
+Checks a memory heap for consistency, prints the contents if any error
+is detected. A fatal error is logged if an error is detected.
+@param[in] heap Memory heap to validate. */
+void
+mem_heap_validate(
+ const mem_heap_t* heap)
+{
+ ulint size = 0;
+
+ for (const mem_block_t* block = heap;
+ block != NULL;
+ block = UT_LIST_GET_NEXT(list, block)) {
+
+ switch (block->type) {
+ case MEM_HEAP_DYNAMIC:
+ break;
+ case MEM_HEAP_BUFFER:
+ case MEM_HEAP_BUFFER | MEM_HEAP_BTR_SEARCH:
+ ut_ad(block->len <= UNIV_PAGE_SIZE);
+ break;
+ default:
+ ut_error;
+ }
+
+ size += block->len;
+ }
+
+ ut_ad(size == heap->total_size);
+}
+
+/** Copy the tail of a string.
+@param[in,out] dst destination buffer
+@param[in] src string whose tail to copy
+@param[in] size size of dst buffer, in bytes, including NUL terminator
+@return strlen(src) */
+static void ut_strlcpy_rev(char* dst, const char* src, ulint size)
+{
+ size_t src_size = strlen(src), n = std::min(src_size, size - 1);
+ memcpy(dst, src + src_size - n, n + 1);
+}
+#endif /* UNIV_DEBUG */
+
/***************************************************************//**
Creates a memory heap block where data can be allocated.
@return own: memory heap block, NULL if did not succeed (only possible
for MEM_HEAP_BTR_SEARCH type heaps) */
-UNIV_INTERN
mem_block_t*
mem_heap_create_block_func(
/*=======================*/
@@ -307,34 +263,30 @@ mem_heap_create_block_func(
ulint n, /*!< in: number of bytes needed for user data */
#ifdef UNIV_DEBUG
const char* file_name,/*!< in: file name where created */
- ulint line, /*!< in: line where created */
+ unsigned line, /*!< in: line where created */
#endif /* UNIV_DEBUG */
ulint type) /*!< in: type of heap: MEM_HEAP_DYNAMIC or
MEM_HEAP_BUFFER */
{
-#ifndef UNIV_HOTBACKUP
buf_block_t* buf_block = NULL;
-#endif /* !UNIV_HOTBACKUP */
mem_block_t* block;
ulint len;
ut_ad((type == MEM_HEAP_DYNAMIC) || (type == MEM_HEAP_BUFFER)
|| (type == MEM_HEAP_BUFFER + MEM_HEAP_BTR_SEARCH));
- if (heap && heap->magic_n != MEM_BLOCK_MAGIC_N) {
- mem_analyze_corruption(heap);
+ if (heap != NULL) {
+ ut_d(mem_heap_validate(heap));
}
/* In dynamic allocation, calculate the size: block header + data. */
len = MEM_BLOCK_HEADER_SIZE + MEM_SPACE_NEEDED(n);
-#ifndef UNIV_HOTBACKUP
if (type == MEM_HEAP_DYNAMIC || len < UNIV_PAGE_SIZE / 2) {
ut_ad(type == MEM_HEAP_DYNAMIC || n <= MEM_MAX_ALLOC_IN_BUF);
- block = static_cast<mem_block_t*>(
- mem_area_alloc(&len, mem_comm_pool));
+ block = static_cast<mem_block_t*>(ut_malloc_nokey(len));
} else {
len = UNIV_PAGE_SIZE;
@@ -357,36 +309,18 @@ mem_heap_create_block_func(
block = (mem_block_t*) buf_block->frame;
}
- if(!block) {
- ib_logf(IB_LOG_LEVEL_FATAL,
- " InnoDB: Unable to allocate memory of size %lu.\n",
- len);
+ if (block == NULL) {
+ ib::fatal() << "Unable to allocate memory of size "
+ << len << ".";
}
+
block->buf_block = buf_block;
block->free_block = NULL;
-#else /* !UNIV_HOTBACKUP */
- len = MEM_BLOCK_HEADER_SIZE + MEM_SPACE_NEEDED(n);
- block = ut_malloc(len);
- ut_ad(block);
-#endif /* !UNIV_HOTBACKUP */
- block->magic_n = MEM_BLOCK_MAGIC_N;
ut_d(ut_strlcpy_rev(block->file_name, file_name,
sizeof(block->file_name)));
ut_d(block->line = line);
-#ifdef MEM_PERIODIC_CHECK
- mutex_enter(&(mem_comm_pool->mutex));
-
- if (!mem_block_list_inited) {
- mem_block_list_inited = TRUE;
- UT_LIST_INIT(mem_block_list);
- }
-
- UT_LIST_ADD_LAST(mem_block_list, mem_block_list, block);
-
- mutex_exit(&(mem_comm_pool->mutex));
-#endif
mem_block_set_len(block, len);
mem_block_set_type(block, type);
mem_block_set_free(block, MEM_BLOCK_HEADER_SIZE);
@@ -420,7 +354,6 @@ mem_heap_create_block_func(
Adds a new block to a memory heap.
@return created block, NULL if did not succeed (only possible for
MEM_HEAP_BTR_SEARCH type heaps) */
-UNIV_INTERN
mem_block_t*
mem_heap_add_block(
/*===============*/
@@ -431,8 +364,6 @@ mem_heap_add_block(
mem_block_t* new_block;
ulint new_size;
- ut_ad(mem_heap_check(heap));
-
block = UT_LIST_GET_LAST(heap->base);
/* We have to allocate a new block. The size is always at least
@@ -466,14 +397,13 @@ mem_heap_add_block(
/* Add the new block as the last block */
- UT_LIST_INSERT_AFTER(list, heap->base, block, new_block);
+ UT_LIST_INSERT_AFTER(heap->base, block, new_block);
return(new_block);
}
/******************************************************************//**
Frees a block from a memory heap. */
-UNIV_INTERN
void
mem_heap_block_free(
/*================*/
@@ -482,71 +412,29 @@ mem_heap_block_free(
{
ulint type;
ulint len;
-#ifndef UNIV_HOTBACKUP
buf_block_t* buf_block;
buf_block = static_cast<buf_block_t*>(block->buf_block);
-#endif /* !UNIV_HOTBACKUP */
-
- if (block->magic_n != MEM_BLOCK_MAGIC_N) {
- mem_analyze_corruption(block);
- }
- UT_LIST_REMOVE(list, heap->base, block);
-
-#ifdef MEM_PERIODIC_CHECK
- mutex_enter(&(mem_comm_pool->mutex));
-
- UT_LIST_REMOVE(mem_block_list, mem_block_list, block);
-
- mutex_exit(&(mem_comm_pool->mutex));
-#endif
+ UT_LIST_REMOVE(heap->base, block);
ut_ad(heap->total_size >= block->len);
heap->total_size -= block->len;
type = heap->type;
len = block->len;
- block->magic_n = MEM_FREED_BLOCK_MAGIC_N;
-
-#ifndef UNIV_HOTBACKUP
- if (!srv_use_sys_malloc) {
-#ifdef UNIV_MEM_DEBUG
- UNIV_MEM_ALLOC(block, len);
- /* In the debug version we set the memory to a random
- combination of hex 0xDE and 0xAD. */
- mem_erase_buf((byte*)block, len);
-#endif /* UNIV_MEM_DEBUG */
- UNIV_MEM_FREE(block, len);
-
- }
if (type == MEM_HEAP_DYNAMIC || len < UNIV_PAGE_SIZE / 2) {
-
ut_ad(!buf_block);
- mem_area_free(block, mem_comm_pool);
+ ut_free(block);
} else {
ut_ad(type & MEM_HEAP_BUFFER);
-
buf_block_free(buf_block);
}
-#else /* !UNIV_HOTBACKUP */
-#ifdef UNIV_MEM_DEBUG
- UNIV_MEM_ALLOC(block, len);
- /* In the debug version we set the memory to a random
- combination of hex 0xDE and 0xAD. */
-
- mem_erase_buf((byte*)block, len);
-#endif /* UNIV_MEM_DEBUG */
- UNIV_MEM_FREE(block, len);
- ut_free(block);
-#endif /* !UNIV_HOTBACKUP */
}
-#ifndef UNIV_HOTBACKUP
/******************************************************************//**
Frees the free_block field from a memory heap. */
-UNIV_INTERN
void
mem_heap_free_block_free(
/*=====================*/
@@ -559,31 +447,3 @@ mem_heap_free_block_free(
heap->free_block = NULL;
}
}
-#endif /* !UNIV_HOTBACKUP */
-
-#ifdef MEM_PERIODIC_CHECK
-/******************************************************************//**
-Goes through the list of all allocated mem blocks, checks their magic
-numbers, and reports possible corruption. */
-UNIV_INTERN
-void
-mem_validate_all_blocks(void)
-/*=========================*/
-{
- mem_block_t* block;
-
- mutex_enter(&(mem_comm_pool->mutex));
-
- block = UT_LIST_GET_FIRST(mem_block_list);
-
- while (block) {
- if (block->magic_n != MEM_BLOCK_MAGIC_N) {
- mem_analyze_corruption(block);
- }
-
- block = UT_LIST_GET_NEXT(mem_block_list, block);
- }
-
- mutex_exit(&(mem_comm_pool->mutex));
-}
-#endif
diff --git a/storage/innobase/mem/mem0pool.cc b/storage/innobase/mem/mem0pool.cc
deleted file mode 100644
index 5002a5b3997..00000000000
--- a/storage/innobase/mem/mem0pool.cc
+++ /dev/null
@@ -1,727 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1997, 2011, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/********************************************************************//**
-@file mem/mem0pool.cc
-The lowest-level memory management
-
-Created 5/12/1997 Heikki Tuuri
-*************************************************************************/
-
-#include "mem0pool.h"
-#ifdef UNIV_NONINL
-#include "mem0pool.ic"
-#endif
-
-#include "srv0srv.h"
-#include "sync0sync.h"
-#include "ut0mem.h"
-#include "ut0lst.h"
-#include "ut0byte.h"
-#include "mem0mem.h"
-#include "srv0start.h"
-
-/* We would like to use also the buffer frames to allocate memory. This
-would be desirable, because then the memory consumption of the database
-would be fixed, and we might even lock the buffer pool to the main memory.
-The problem here is that the buffer management routines can themselves call
-memory allocation, while the buffer pool mutex is reserved.
-
-The main components of the memory consumption are:
-
-1. buffer pool,
-2. parsed and optimized SQL statements,
-3. data dictionary cache,
-4. log buffer,
-5. locks for each transaction,
-6. hash table for the adaptive index,
-7. state and buffers for each SQL query currently being executed,
-8. session for each user, and
-9. stack for each OS thread.
-
-Items 1 and 2 are managed by an LRU algorithm. Items 5 and 6 can potentially
-consume very much memory. Items 7 and 8 should consume quite little memory,
-and the OS should take care of item 9, which too should consume little memory.
-
-A solution to the memory management:
-
-1. the buffer pool size is set separately;
-2. log buffer size is set separately;
-3. the common pool size for all the other entries, except 8, is set separately.
-
-Problems: we may waste memory if the common pool is set too big. Another
-problem is the locks, which may take very much space in big transactions.
-Then the shared pool size should be set very big. We can allow locks to take
-space from the buffer pool, but the SQL optimizer is then unaware of the
-usable size of the buffer pool. We could also combine the objects in the
-common pool and the buffers in the buffer pool into a single LRU list and
-manage it uniformly, but this approach does not take into account the parsing
-and other costs unique to SQL statements.
-
-The locks for a transaction can be seen as a part of the state of the
-transaction. Hence, they should be stored in the common pool. We still
-have the problem of a very big update transaction, for example, which
-will set very many x-locks on rows, and the locks will consume a lot
-of memory, say, half of the buffer pool size.
-
-Another problem is what to do if we are not able to malloc a requested
-block of memory from the common pool. Then we can request memory from
-the operating system. If it does not help, a system error results.
-
-Because 5 and 6 may potentially consume very much memory, we let them grow
-into the buffer pool. We may let the locks of a transaction take frames
-from the buffer pool, when the corresponding memory heap block has grown to
-the size of a buffer frame. Similarly for the hash node cells of the locks,
-and for the adaptive index. Thus, for each individual transaction, its locks
-can occupy at most about the size of the buffer frame of memory in the common
-pool, and after that its locks will grow into the buffer pool. */
-
-/** Mask used to extract the free bit from area->size */
-#define MEM_AREA_FREE 1
-
-/** The smallest memory area total size */
-#define MEM_AREA_MIN_SIZE (2 * MEM_AREA_EXTRA_SIZE)
-
-
-/** Data structure for a memory pool. The space is allocated using the buddy
-algorithm, where free list i contains areas of size 2 to power i. */
-struct mem_pool_t{
- byte* buf; /*!< memory pool */
- ulint size; /*!< memory common pool size */
- ulint reserved; /*!< amount of currently allocated
- memory */
- ib_mutex_t mutex; /*!< mutex protecting this struct */
- UT_LIST_BASE_NODE_T(mem_area_t)
- free_list[64]; /*!< lists of free memory areas: an
- area is put to the list whose number
- is the 2-logarithm of the area size */
-};
-
-/** The common memory pool */
-UNIV_INTERN mem_pool_t* mem_comm_pool = NULL;
-
-#ifdef UNIV_PFS_MUTEX
-/* Key to register mutex in mem_pool_t with performance schema */
-UNIV_INTERN mysql_pfs_key_t mem_pool_mutex_key;
-#endif /* UNIV_PFS_MUTEX */
-
-/* We use this counter to check that the mem pool mutex does not leak;
-this is to track a strange assertion failure reported at
-mysql@lists.mysql.com */
-
-UNIV_INTERN ulint mem_n_threads_inside = 0;
-
-/********************************************************************//**
-Reserves the mem pool mutex if we are not in server shutdown. Use
-this function only in memory free functions, since only memory
-free functions are used during server shutdown. */
-UNIV_INLINE
-void
-mem_pool_mutex_enter(
-/*=================*/
- mem_pool_t* pool) /*!< in: memory pool */
-{
- if (srv_shutdown_state < SRV_SHUTDOWN_EXIT_THREADS) {
- mutex_enter(&(pool->mutex));
- }
-}
-
-/********************************************************************//**
-Releases the mem pool mutex if we are not in server shutdown. As
-its corresponding mem_pool_mutex_enter() function, use it only
-in memory free functions */
-UNIV_INLINE
-void
-mem_pool_mutex_exit(
-/*================*/
- mem_pool_t* pool) /*!< in: memory pool */
-{
- if (srv_shutdown_state < SRV_SHUTDOWN_EXIT_THREADS) {
- mutex_exit(&(pool->mutex));
- }
-}
-
-/********************************************************************//**
-Returns memory area size.
-@return size */
-UNIV_INLINE
-ulint
-mem_area_get_size(
-/*==============*/
- mem_area_t* area) /*!< in: area */
-{
- return(area->size_and_free & ~MEM_AREA_FREE);
-}
-
-/********************************************************************//**
-Sets memory area size. */
-UNIV_INLINE
-void
-mem_area_set_size(
-/*==============*/
- mem_area_t* area, /*!< in: area */
- ulint size) /*!< in: size */
-{
- area->size_and_free = (area->size_and_free & MEM_AREA_FREE)
- | size;
-}
-
-/********************************************************************//**
-Returns memory area free bit.
-@return TRUE if free */
-UNIV_INLINE
-ibool
-mem_area_get_free(
-/*==============*/
- mem_area_t* area) /*!< in: area */
-{
-#if TRUE != MEM_AREA_FREE
-# error "TRUE != MEM_AREA_FREE"
-#endif
- return(area->size_and_free & MEM_AREA_FREE);
-}
-
-/********************************************************************//**
-Sets memory area free bit. */
-UNIV_INLINE
-void
-mem_area_set_free(
-/*==============*/
- mem_area_t* area, /*!< in: area */
- ibool free) /*!< in: free bit value */
-{
-#if TRUE != MEM_AREA_FREE
-# error "TRUE != MEM_AREA_FREE"
-#endif
- area->size_and_free = (area->size_and_free & ~MEM_AREA_FREE)
- | free;
-}
-
-/********************************************************************//**
-Creates a memory pool.
-@return memory pool */
-UNIV_INTERN
-mem_pool_t*
-mem_pool_create(
-/*============*/
- ulint size) /*!< in: pool size in bytes */
-{
- mem_pool_t* pool;
- mem_area_t* area;
- ulint i;
- ulint used;
-
- pool = static_cast<mem_pool_t*>(ut_malloc(sizeof(mem_pool_t)));
-
- pool->buf = static_cast<byte*>(ut_malloc_low(size, TRUE));
- pool->size = size;
-
- mutex_create(mem_pool_mutex_key, &pool->mutex, SYNC_MEM_POOL);
-
- /* Initialize the free lists */
-
- for (i = 0; i < 64; i++) {
-
- UT_LIST_INIT(pool->free_list[i]);
- }
-
- used = 0;
-
- while (size - used >= MEM_AREA_MIN_SIZE) {
-
- i = ut_2_log(size - used);
-
- if (ut_2_exp(i) > size - used) {
-
- /* ut_2_log rounds upward */
-
- i--;
- }
-
- area = (mem_area_t*)(pool->buf + used);
-
- mem_area_set_size(area, ut_2_exp(i));
- mem_area_set_free(area, TRUE);
- UNIV_MEM_FREE(MEM_AREA_EXTRA_SIZE + (byte*) area,
- ut_2_exp(i) - MEM_AREA_EXTRA_SIZE);
-
- UT_LIST_ADD_FIRST(free_list, pool->free_list[i], area);
-
- used = used + ut_2_exp(i);
- }
-
- ut_ad(size >= used);
-
- pool->reserved = 0;
-
- return(pool);
-}
-
-/********************************************************************//**
-Frees a memory pool. */
-UNIV_INTERN
-void
-mem_pool_free(
-/*==========*/
- mem_pool_t* pool) /*!< in, own: memory pool */
-{
- ut_free(pool->buf);
- ut_free(pool);
-}
-
-/********************************************************************//**
-Fills the specified free list.
-@return TRUE if we were able to insert a block to the free list */
-static
-ibool
-mem_pool_fill_free_list(
-/*====================*/
- ulint i, /*!< in: free list index */
- mem_pool_t* pool) /*!< in: memory pool */
-{
- mem_area_t* area;
- mem_area_t* area2;
- ibool ret;
-
- ut_ad(mutex_own(&(pool->mutex)));
-
- if (UNIV_UNLIKELY(i >= 63)) {
- /* We come here when we have run out of space in the
- memory pool: */
-
- return(FALSE);
- }
-
- area = UT_LIST_GET_FIRST(pool->free_list[i + 1]);
-
- if (area == NULL) {
- if (UT_LIST_GET_LEN(pool->free_list[i + 1]) > 0) {
- ut_print_timestamp(stderr);
-
- fprintf(stderr,
- " InnoDB: Error: mem pool free list %lu"
- " length is %lu\n"
- "InnoDB: though the list is empty!\n",
- (ulong) i + 1,
- (ulong)
- UT_LIST_GET_LEN(pool->free_list[i + 1]));
- }
-
- ret = mem_pool_fill_free_list(i + 1, pool);
-
- if (ret == FALSE) {
-
- return(FALSE);
- }
-
- area = UT_LIST_GET_FIRST(pool->free_list[i + 1]);
- }
-
- if (UNIV_UNLIKELY(UT_LIST_GET_LEN(pool->free_list[i + 1]) == 0)) {
- mem_analyze_corruption(area);
-
- ut_error;
- }
-
- UT_LIST_REMOVE(free_list, pool->free_list[i + 1], area);
-
- area2 = (mem_area_t*)(((byte*) area) + ut_2_exp(i));
- UNIV_MEM_ALLOC(area2, MEM_AREA_EXTRA_SIZE);
-
- mem_area_set_size(area2, ut_2_exp(i));
- mem_area_set_free(area2, TRUE);
-
- UT_LIST_ADD_FIRST(free_list, pool->free_list[i], area2);
-
- mem_area_set_size(area, ut_2_exp(i));
-
- UT_LIST_ADD_FIRST(free_list, pool->free_list[i], area);
-
- return(TRUE);
-}
-
-/********************************************************************//**
-Allocates memory from a pool. NOTE: This low-level function should only be
-used in mem0mem.*!
-@return own: allocated memory buffer */
-UNIV_INTERN
-void*
-mem_area_alloc(
-/*===========*/
- ulint* psize, /*!< in: requested size in bytes; for optimum
- space usage, the size should be a power of 2
- minus MEM_AREA_EXTRA_SIZE;
- out: allocated size in bytes (greater than
- or equal to the requested size) */
- mem_pool_t* pool) /*!< in: memory pool */
-{
- mem_area_t* area;
- ulint size;
- ulint n;
- ibool ret;
-
- /* If we are using os allocator just make a simple call
- to malloc */
- if (UNIV_LIKELY(srv_use_sys_malloc)) {
- return(malloc(*psize));
- }
-
- size = *psize;
- n = ut_2_log(ut_max(size + MEM_AREA_EXTRA_SIZE, MEM_AREA_MIN_SIZE));
-
- mutex_enter(&(pool->mutex));
- mem_n_threads_inside++;
-
- ut_a(mem_n_threads_inside == 1);
-
- area = UT_LIST_GET_FIRST(pool->free_list[n]);
-
- if (area == NULL) {
- ret = mem_pool_fill_free_list(n, pool);
-
- if (ret == FALSE) {
- /* Out of memory in memory pool: we try to allocate
- from the operating system with the regular malloc: */
-
- mem_n_threads_inside--;
- mutex_exit(&(pool->mutex));
-
- return(ut_malloc(size));
- }
-
- area = UT_LIST_GET_FIRST(pool->free_list[n]);
- }
-
- if (!mem_area_get_free(area)) {
- fprintf(stderr,
- "InnoDB: Error: Removing element from mem pool"
- " free list %lu though the\n"
- "InnoDB: element is not marked free!\n",
- (ulong) n);
-
- mem_analyze_corruption(area);
-
- /* Try to analyze a strange assertion failure reported at
- mysql@lists.mysql.com where the free bit IS 1 in the
- hex dump above */
-
- if (mem_area_get_free(area)) {
- fprintf(stderr,
- "InnoDB: Probably a race condition"
- " because now the area is marked free!\n");
- }
-
- ut_error;
- }
-
- if (UT_LIST_GET_LEN(pool->free_list[n]) == 0) {
- fprintf(stderr,
- "InnoDB: Error: Removing element from mem pool"
- " free list %lu\n"
- "InnoDB: though the list length is 0!\n",
- (ulong) n);
- mem_analyze_corruption(area);
-
- ut_error;
- }
-
- ut_ad(mem_area_get_size(area) == ut_2_exp(n));
-
- mem_area_set_free(area, FALSE);
-
- UT_LIST_REMOVE(free_list, pool->free_list[n], area);
-
- pool->reserved += mem_area_get_size(area);
-
- mem_n_threads_inside--;
- mutex_exit(&(pool->mutex));
-
- ut_ad(mem_pool_validate(pool));
-
- *psize = ut_2_exp(n) - MEM_AREA_EXTRA_SIZE;
- UNIV_MEM_ALLOC(MEM_AREA_EXTRA_SIZE + (byte*) area, *psize);
-
- return((void*)(MEM_AREA_EXTRA_SIZE + ((byte*) area)));
-}
-
-/********************************************************************//**
-Gets the buddy of an area, if it exists in pool.
-@return the buddy, NULL if no buddy in pool */
-UNIV_INLINE
-mem_area_t*
-mem_area_get_buddy(
-/*===============*/
- mem_area_t* area, /*!< in: memory area */
- ulint size, /*!< in: memory area size */
- mem_pool_t* pool) /*!< in: memory pool */
-{
- mem_area_t* buddy;
-
- ut_ad(size != 0);
-
- if (((((byte*) area) - pool->buf) % (2 * size)) == 0) {
-
- /* The buddy is in a higher address */
-
- buddy = (mem_area_t*)(((byte*) area) + size);
-
- if ((((byte*) buddy) - pool->buf) + size > pool->size) {
-
- /* The buddy is not wholly contained in the pool:
- there is no buddy */
-
- buddy = NULL;
- }
- } else {
- /* The buddy is in a lower address; NOTE that area cannot
- be at the pool lower end, because then we would end up to
- the upper branch in this if-clause: the remainder would be
- 0 */
-
- buddy = (mem_area_t*)(((byte*) area) - size);
- }
-
- return(buddy);
-}
-
-/********************************************************************//**
-Frees memory to a pool. */
-UNIV_INTERN
-void
-mem_area_free(
-/*==========*/
- void* ptr, /*!< in, own: pointer to allocated memory
- buffer */
- mem_pool_t* pool) /*!< in: memory pool */
-{
- mem_area_t* area;
- mem_area_t* buddy;
- void* new_ptr;
- ulint size;
- ulint n;
-
- if (UNIV_LIKELY(srv_use_sys_malloc)) {
- free(ptr);
-
- return;
- }
-
- /* It may be that the area was really allocated from the OS with
- regular malloc: check if ptr points within our memory pool */
-
- if ((byte*) ptr < pool->buf || (byte*) ptr >= pool->buf + pool->size) {
- ut_free(ptr);
-
- return;
- }
-
- area = (mem_area_t*) (((byte*) ptr) - MEM_AREA_EXTRA_SIZE);
-
- if (mem_area_get_free(area)) {
- fprintf(stderr,
- "InnoDB: Error: Freeing element to mem pool"
- " free list though the\n"
- "InnoDB: element is marked free!\n");
-
- mem_analyze_corruption(area);
- ut_error;
- }
-
- size = mem_area_get_size(area);
- UNIV_MEM_FREE(ptr, size - MEM_AREA_EXTRA_SIZE);
-
- if (size == 0) {
- fprintf(stderr,
- "InnoDB: Error: Mem area size is 0. Possibly a"
- " memory overrun of the\n"
- "InnoDB: previous allocated area!\n");
-
- mem_analyze_corruption(area);
- ut_error;
- }
-
-#ifdef UNIV_LIGHT_MEM_DEBUG
- if (((byte*) area) + size < pool->buf + pool->size) {
-
- ulint next_size;
-
- next_size = mem_area_get_size(
- (mem_area_t*)(((byte*) area) + size));
- if (UNIV_UNLIKELY(!next_size || !ut_is_2pow(next_size))) {
- fprintf(stderr,
- "InnoDB: Error: Memory area size %lu,"
- " next area size %lu not a power of 2!\n"
- "InnoDB: Possibly a memory overrun of"
- " the buffer being freed here.\n",
- (ulong) size, (ulong) next_size);
- mem_analyze_corruption(area);
-
- ut_error;
- }
- }
-#endif
- buddy = mem_area_get_buddy(area, size, pool);
-
- n = ut_2_log(size);
-
- mem_pool_mutex_enter(pool);
- mem_n_threads_inside++;
-
- ut_a(mem_n_threads_inside == 1);
-
- if (buddy && mem_area_get_free(buddy)
- && (size == mem_area_get_size(buddy))) {
-
- /* The buddy is in a free list */
-
- if ((byte*) buddy < (byte*) area) {
- new_ptr = ((byte*) buddy) + MEM_AREA_EXTRA_SIZE;
-
- mem_area_set_size(buddy, 2 * size);
- mem_area_set_free(buddy, FALSE);
- } else {
- new_ptr = ptr;
-
- mem_area_set_size(area, 2 * size);
- }
-
- /* Remove the buddy from its free list and merge it to area */
-
- UT_LIST_REMOVE(free_list, pool->free_list[n], buddy);
-
- pool->reserved += ut_2_exp(n);
-
- mem_n_threads_inside--;
- mem_pool_mutex_exit(pool);
-
- mem_area_free(new_ptr, pool);
-
- return;
- } else {
- UT_LIST_ADD_FIRST(free_list, pool->free_list[n], area);
-
- mem_area_set_free(area, TRUE);
-
- ut_ad(pool->reserved >= size);
-
- pool->reserved -= size;
- }
-
- mem_n_threads_inside--;
- mem_pool_mutex_exit(pool);
-
- ut_ad(mem_pool_validate(pool));
-}
-
-/********************************************************************//**
-Validates a memory pool.
-@return TRUE if ok */
-UNIV_INTERN
-ibool
-mem_pool_validate(
-/*==============*/
- mem_pool_t* pool) /*!< in: memory pool */
-{
- mem_area_t* area;
- mem_area_t* buddy;
- ulint free;
- ulint i;
-
- mem_pool_mutex_enter(pool);
-
- free = 0;
-
- for (i = 0; i < 64; i++) {
-
- UT_LIST_CHECK(free_list, mem_area_t, pool->free_list[i]);
-
- for (area = UT_LIST_GET_FIRST(pool->free_list[i]);
- area != 0;
- area = UT_LIST_GET_NEXT(free_list, area)) {
-
- ut_a(mem_area_get_free(area));
- ut_a(mem_area_get_size(area) == ut_2_exp(i));
-
- buddy = mem_area_get_buddy(area, ut_2_exp(i), pool);
-
- ut_a(!buddy || !mem_area_get_free(buddy)
- || (ut_2_exp(i) != mem_area_get_size(buddy)));
-
- free += ut_2_exp(i);
- }
- }
-
- ut_a(free + pool->reserved == pool->size);
-
- mem_pool_mutex_exit(pool);
-
- return(TRUE);
-}
-
-/********************************************************************//**
-Prints info of a memory pool. */
-UNIV_INTERN
-void
-mem_pool_print_info(
-/*================*/
- FILE* outfile,/*!< in: output file to write to */
- mem_pool_t* pool) /*!< in: memory pool */
-{
- ulint i;
-
- mem_pool_validate(pool);
-
- fprintf(outfile, "INFO OF A MEMORY POOL\n");
-
- mutex_enter(&(pool->mutex));
-
- for (i = 0; i < 64; i++) {
- if (UT_LIST_GET_LEN(pool->free_list[i]) > 0) {
-
- fprintf(outfile,
- "Free list length %lu for"
- " blocks of size %lu\n",
- (ulong) UT_LIST_GET_LEN(pool->free_list[i]),
- (ulong) ut_2_exp(i));
- }
- }
-
- fprintf(outfile, "Pool size %lu, reserved %lu.\n", (ulong) pool->size,
- (ulong) pool->reserved);
- mutex_exit(&(pool->mutex));
-}
-
-/********************************************************************//**
-Returns the amount of reserved memory.
-@return reserved memory in bytes */
-UNIV_INTERN
-ulint
-mem_pool_get_reserved(
-/*==================*/
- mem_pool_t* pool) /*!< in: memory pool */
-{
- ulint reserved;
-
- mutex_enter(&(pool->mutex));
-
- reserved = pool->reserved;
-
- mutex_exit(&(pool->mutex));
-
- return(reserved);
-}
diff --git a/storage/innobase/mtr/mtr0log.cc b/storage/innobase/mtr/mtr0log.cc
index 189d52d68b0..0e6a80cb363 100644
--- a/storage/innobase/mtr/mtr0log.cc
+++ b/storage/innobase/mtr/mtr0log.cc
@@ -1,6 +1,7 @@
/*****************************************************************************
-Copyright (c) 1995, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2017, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -24,22 +25,15 @@ Created 12/7/1995 Heikki Tuuri
*******************************************************/
#include "mtr0log.h"
-
-#ifdef UNIV_NONINL
-#include "mtr0log.ic"
-#endif
-
#include "buf0buf.h"
#include "dict0dict.h"
#include "log0recv.h"
#include "page0page.h"
-
-#ifndef UNIV_HOTBACKUP
-# include "dict0boot.h"
+#include "buf0dblwr.h"
+#include "dict0boot.h"
/********************************************************//**
Catenates n bytes to the mtr log. */
-UNIV_INTERN
void
mlog_catenate_string(
/*=================*/
@@ -47,30 +41,25 @@ mlog_catenate_string(
const byte* str, /*!< in: string to write */
ulint len) /*!< in: string length */
{
- dyn_array_t* mlog;
-
if (mtr_get_log_mode(mtr) == MTR_LOG_NONE) {
return;
}
- mlog = &(mtr->log);
-
- dyn_push_string(mlog, str, len);
+ mtr->get_log()->push(str, ib_uint32_t(len));
}
/********************************************************//**
Writes the initial part of a log record consisting of one-byte item
type and four-byte space and page numbers. Also pushes info
to the mtr memo that a buffer page has been modified. */
-UNIV_INTERN
void
mlog_write_initial_log_record(
/*==========================*/
const byte* ptr, /*!< in: pointer to (inside) a buffer
frame holding the file page where
modification is made */
- byte type, /*!< in: log item type: MLOG_1BYTE, ... */
+ mlog_id_t type, /*!< in: log item type: MLOG_1BYTE, ... */
mtr_t* mtr) /*!< in: mini-transaction handle */
{
byte* log_ptr;
@@ -90,28 +79,30 @@ mlog_write_initial_log_record(
mlog_close(mtr, log_ptr);
}
-#endif /* !UNIV_HOTBACKUP */
/********************************************************//**
Parses an initial log record written by mlog_write_initial_log_record.
-@return parsed record end, NULL if not a complete record */
-UNIV_INTERN
+@return parsed record end, NULL if not a complete record */
byte*
mlog_parse_initial_log_record(
/*==========================*/
- byte* ptr, /*!< in: buffer */
- byte* end_ptr,/*!< in: buffer end */
- byte* type, /*!< out: log record type: MLOG_1BYTE, ... */
- ulint* space, /*!< out: space id */
- ulint* page_no)/*!< out: page number */
+ const byte* ptr, /*!< in: buffer */
+ const byte* end_ptr,/*!< in: buffer end */
+ mlog_id_t* type, /*!< out: log record type: MLOG_1BYTE, ... */
+ ulint* space, /*!< out: space id */
+ ulint* page_no)/*!< out: page number */
{
if (end_ptr < ptr + 1) {
return(NULL);
}
- *type = (byte)((ulint)*ptr & ~MLOG_SINGLE_REC_FLAG);
- ut_ad(*type <= MLOG_BIGGEST_TYPE || EXTRA_CHECK_MLOG_NUMBER(*type));
+ *type = mlog_id_t(*ptr & ~MLOG_SINGLE_REC_FLAG);
+ if (UNIV_UNLIKELY(*type > MLOG_BIGGEST_TYPE
+ && !EXTRA_CHECK_MLOG_NUMBER(*type))) {
+ recv_sys->found_corrupt_log = true;
+ return NULL;
+ }
ptr++;
@@ -120,36 +111,35 @@ mlog_parse_initial_log_record(
return(NULL);
}
- ptr = mach_parse_compressed(ptr, end_ptr, space);
-
- if (ptr == NULL) {
+ *space = mach_parse_compressed(&ptr, end_ptr);
- return(NULL);
+ if (ptr != NULL) {
+ *page_no = mach_parse_compressed(&ptr, end_ptr);
}
- ptr = mach_parse_compressed(ptr, end_ptr, page_no);
-
- return(ptr);
+ return(const_cast<byte*>(ptr));
}
/********************************************************//**
Parses a log record written by mlog_write_ulint or mlog_write_ull.
-@return parsed record end, NULL if not a complete record or a corrupt record */
-UNIV_INTERN
+@return parsed record end, NULL if not a complete record or a corrupt record */
byte*
mlog_parse_nbytes(
/*==============*/
- ulint type, /*!< in: log record type: MLOG_1BYTE, ... */
- byte* ptr, /*!< in: buffer */
- byte* end_ptr,/*!< in: buffer end */
- byte* page, /*!< in: page where to apply the log record, or NULL */
- void* page_zip)/*!< in/out: compressed page, or NULL */
+ mlog_id_t type, /*!< in: log record type: MLOG_1BYTE, ... */
+ const byte* ptr, /*!< in: buffer */
+ const byte* end_ptr,/*!< in: buffer end */
+ byte* page, /*!< in: page where to apply the log
+ record, or NULL */
+ void* page_zip)/*!< in/out: compressed page, or NULL */
{
ulint offset;
ulint val;
ib_uint64_t dval;
ut_a(type <= MLOG_8BYTES);
+ ut_a(!page || !page_zip
+ || !fil_page_index_page_check(page));
if (end_ptr < ptr + 2) {
return(NULL);
@@ -158,11 +148,6 @@ mlog_parse_nbytes(
offset = mach_read_from_2(ptr);
ptr += 2;
- ut_a(!page || !page_zip || fil_page_get_type(page) != FIL_PAGE_INDEX ||
- /* scrubbing changes page type from FIL_PAGE_INDEX to
- * FIL_PAGE_TYPE_ALLOCATED (rest of this assertion is below) */
- (type == MLOG_2BYTES && offset == FIL_PAGE_TYPE));
-
if (offset >= UNIV_PAGE_SIZE) {
recv_sys->found_corrupt_log = TRUE;
@@ -170,7 +155,7 @@ mlog_parse_nbytes(
}
if (type == MLOG_8BYTES) {
- ptr = mach_ull_parse_compressed(ptr, end_ptr, &dval);
+ dval = mach_u64_parse_compressed(&ptr, end_ptr);
if (ptr == NULL) {
@@ -186,10 +171,10 @@ mlog_parse_nbytes(
mach_write_to_8(page + offset, dval);
}
- return(ptr);
+ return(const_cast<byte*>(ptr));
}
- ptr = mach_parse_compressed(ptr, end_ptr, &val);
+ val = mach_parse_compressed(&ptr, end_ptr);
if (ptr == NULL) {
@@ -198,7 +183,7 @@ mlog_parse_nbytes(
switch (type) {
case MLOG_1BYTE:
- if (UNIV_UNLIKELY(val > 0xFFUL)) {
+ if (val > 0xFFUL) {
goto corrupt;
}
if (page) {
@@ -211,7 +196,7 @@ mlog_parse_nbytes(
}
break;
case MLOG_2BYTES:
- if (UNIV_UNLIKELY(val > 0xFFFFUL)) {
+ if (val > 0xFFFFUL) {
goto corrupt;
}
if (page) {
@@ -222,13 +207,6 @@ mlog_parse_nbytes(
}
mach_write_to_2(page + offset, val);
}
- ut_a(!page || !page_zip ||
- fil_page_get_type(page) != FIL_PAGE_INDEX ||
- /* scrubbing changes page type from FIL_PAGE_INDEX to
- * FIL_PAGE_TYPE_ALLOCATED */
- (type == MLOG_2BYTES &&
- offset == FIL_PAGE_TYPE &&
- val == FIL_PAGE_TYPE_ALLOCATED));
break;
case MLOG_4BYTES:
@@ -247,20 +225,19 @@ mlog_parse_nbytes(
ptr = NULL;
}
- return(ptr);
+ return(const_cast<byte*>(ptr));
}
/********************************************************//**
Writes 1, 2 or 4 bytes to a file page. Writes the corresponding log
record to the mini-transaction log if mtr is not NULL. */
-UNIV_INTERN
void
mlog_write_ulint(
/*=============*/
- byte* ptr, /*!< in: pointer where to write */
- ulint val, /*!< in: value to write */
- byte type, /*!< in: MLOG_1BYTE, MLOG_2BYTES, MLOG_4BYTES */
- mtr_t* mtr) /*!< in: mini-transaction handle */
+ byte* ptr, /*!< in: pointer where to write */
+ ulint val, /*!< in: value to write */
+ mlog_id_t type, /*!< in: MLOG_1BYTE, MLOG_2BYTES, MLOG_4BYTES */
+ mtr_t* mtr) /*!< in: mini-transaction handle */
{
switch (type) {
case MLOG_1BYTE:
@@ -299,7 +276,6 @@ mlog_write_ulint(
/********************************************************//**
Writes 8 bytes to a file page. Writes the corresponding log
record to the mini-transaction log, only if mtr is not NULL */
-UNIV_INTERN
void
mlog_write_ull(
/*===========*/
@@ -321,18 +297,16 @@ mlog_write_ull(
mach_write_to_2(log_ptr, page_offset(ptr));
log_ptr += 2;
- log_ptr += mach_ull_write_compressed(log_ptr, val);
+ log_ptr += mach_u64_write_compressed(log_ptr, val);
mlog_close(mtr, log_ptr);
}
}
}
-#ifndef UNIV_HOTBACKUP
/********************************************************//**
Writes a string to a file page buffered in the buffer pool. Writes the
corresponding log record to the mini-transaction log. */
-UNIV_INTERN
void
mlog_write_string(
/*==============*/
@@ -352,7 +326,6 @@ mlog_write_string(
/********************************************************//**
Logs a write of a string to a file page buffered in the buffer pool.
Writes the corresponding log record to the mini-transaction log. */
-UNIV_INTERN
void
mlog_log_string(
/*============*/
@@ -385,12 +358,10 @@ mlog_log_string(
mlog_catenate_string(mtr, ptr, len);
}
-#endif /* !UNIV_HOTBACKUP */
/********************************************************//**
Parses a log record written by mlog_write_string.
-@return parsed record end, NULL if not a complete record */
-UNIV_INTERN
+@return parsed record end, NULL if not a complete record */
byte*
mlog_parse_string(
/*==============*/
@@ -402,7 +373,9 @@ mlog_parse_string(
ulint offset;
ulint len;
- ut_a(!page || !page_zip || fil_page_get_type(page) != FIL_PAGE_INDEX);
+ ut_a(!page || !page_zip
+ || (fil_page_get_type(page) != FIL_PAGE_INDEX
+ && fil_page_get_type(page) != FIL_PAGE_RTREE));
if (end_ptr < ptr + 4) {
@@ -414,8 +387,7 @@ mlog_parse_string(
len = mach_read_from_2(ptr);
ptr += 2;
- if (UNIV_UNLIKELY(offset >= UNIV_PAGE_SIZE)
- || UNIV_UNLIKELY(len + offset > UNIV_PAGE_SIZE)) {
+ if (offset >= UNIV_PAGE_SIZE || len + offset > UNIV_PAGE_SIZE) {
recv_sys->found_corrupt_log = TRUE;
return(NULL);
@@ -437,19 +409,17 @@ mlog_parse_string(
return(ptr + len);
}
-#ifndef UNIV_HOTBACKUP
/********************************************************//**
Opens a buffer for mlog, writes the initial log record and,
if needed, the field lengths of an index.
-@return buffer, NULL if log mode MTR_LOG_NONE */
-UNIV_INTERN
+@return buffer, NULL if log mode MTR_LOG_NONE */
byte*
mlog_open_and_write_index(
/*======================*/
mtr_t* mtr, /*!< in: mtr */
const byte* rec, /*!< in: index record or page */
const dict_index_t* index, /*!< in: record descriptor */
- byte type, /*!< in: log item type */
+ mlog_id_t type, /*!< in: log item type */
ulint size) /*!< in: requested buffer size in bytes
(if 0, calls mlog_close() and
returns NULL) */
@@ -471,25 +441,46 @@ mlog_open_and_write_index(
} else {
ulint i;
ulint n = dict_index_get_n_fields(index);
- /* total size needed */
ulint total = 11 + size + (n + 2) * 2;
ulint alloc = total;
- /* allocate at most DYN_ARRAY_DATA_SIZE at a time */
- if (alloc > DYN_ARRAY_DATA_SIZE) {
- alloc = DYN_ARRAY_DATA_SIZE;
+
+ if (alloc > mtr_buf_t::MAX_DATA_SIZE) {
+ alloc = mtr_buf_t::MAX_DATA_SIZE;
}
+
+ const bool is_leaf = page_is_leaf(page_align(rec));
+
+ /* For spatial index, on non-leaf page, we just keep
+ 2 fields, MBR and page no. */
+ if (!is_leaf && dict_index_is_spatial(index)) {
+ n = DICT_INDEX_SPATIAL_NODEPTR_SIZE;
+ }
+
log_start = log_ptr = mlog_open(mtr, alloc);
+
if (!log_ptr) {
return(NULL); /* logging is disabled */
}
+
log_end = log_ptr + alloc;
- log_ptr = mlog_write_initial_log_record_fast(rec, type,
- log_ptr, mtr);
+
+ log_ptr = mlog_write_initial_log_record_fast(
+ rec, type, log_ptr, mtr);
+
mach_write_to_2(log_ptr, n);
log_ptr += 2;
- mach_write_to_2(log_ptr,
- dict_index_get_n_unique_in_tree(index));
+
+ if (is_leaf) {
+ mach_write_to_2(
+ log_ptr, dict_index_get_n_unique_in_tree(index));
+ } else {
+ mach_write_to_2(
+ log_ptr,
+ dict_index_get_n_unique_in_tree_nonleaf(index));
+ }
+
log_ptr += 2;
+
for (i = 0; i < n; i++) {
dict_field_t* field;
const dict_col_t* col;
@@ -500,7 +491,7 @@ mlog_open_and_write_index(
len = field->fixed_len;
ut_ad(len < 0x7fff);
if (len == 0
- && (col->len > 255 || col->mtype == DATA_BLOB)) {
+ && (DATA_BIG_COL(col))) {
/* variable-length field
with maximum length > 255 */
len = 0x7fff;
@@ -513,10 +504,13 @@ mlog_open_and_write_index(
ut_a(total > (ulint) (log_ptr - log_start));
total -= log_ptr - log_start;
alloc = total;
- if (alloc > DYN_ARRAY_DATA_SIZE) {
- alloc = DYN_ARRAY_DATA_SIZE;
+
+ if (alloc > mtr_buf_t::MAX_DATA_SIZE) {
+ alloc = mtr_buf_t::MAX_DATA_SIZE;
}
+
log_start = log_ptr = mlog_open(mtr, alloc);
+
if (!log_ptr) {
return(NULL); /* logging is disabled */
}
@@ -535,12 +529,10 @@ mlog_open_and_write_index(
}
return(log_ptr);
}
-#endif /* !UNIV_HOTBACKUP */
/********************************************************//**
Parses a log record written by mlog_open_and_write_index.
-@return parsed record end, NULL if not a complete record */
-UNIV_INTERN
+@return parsed record end, NULL if not a complete record */
byte*
mlog_parse_index(
/*=============*/
@@ -570,7 +562,7 @@ mlog_parse_index(
} else {
n = n_uniq = 1;
}
- table = dict_mem_table_create("LOG_DUMMY", DICT_HDR_SPACE, n,
+ table = dict_mem_table_create("LOG_DUMMY", DICT_HDR_SPACE, n, 0,
comp ? DICT_TF_COMPACT : 0, 0);
ind = dict_mem_index_create("LOG_DUMMY", "LOG_DUMMY",
DICT_HDR_SPACE, 0, n);
@@ -615,6 +607,7 @@ mlog_parse_index(
}
/* avoid ut_ad(index->cached) in dict_index_get_n_unique_in_tree */
ind->cached = TRUE;
+ ut_d(ind->is_dummy = true);
*index = ind;
return(ptr);
}
diff --git a/storage/innobase/mtr/mtr0mtr.cc b/storage/innobase/mtr/mtr0mtr.cc
index 2db6e62cbed..b75a9c4cf02 100644
--- a/storage/innobase/mtr/mtr0mtr.cc
+++ b/storage/innobase/mtr/mtr0mtr.cc
@@ -1,6 +1,7 @@
/*****************************************************************************
-Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1995, 2017, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2017, 2019, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -25,447 +26,906 @@ Created 11/26/1995 Heikki Tuuri
#include "mtr0mtr.h"
-#ifdef UNIV_NONINL
-#include "mtr0mtr.ic"
-#endif
-
#include "buf0buf.h"
#include "buf0flu.h"
#include "page0types.h"
#include "mtr0log.h"
-#include "log0log.h"
-
-#ifndef UNIV_HOTBACKUP
-# include "log0recv.h"
-
-/***************************************************//**
-Checks if a mini-transaction is dirtying a clean page.
-@return TRUE if the mtr is dirtying a clean page. */
-UNIV_INTERN
-ibool
-mtr_block_dirtied(
-/*==============*/
- const buf_block_t* block) /*!< in: block being x-fixed */
+#include "row0trunc.h"
+#include "log0recv.h"
+
+/** Iterate over a memo block in reverse. */
+template <typename Functor>
+struct CIterate {
+ CIterate() : functor() {}
+
+ CIterate(const Functor& functor) : functor(functor) {}
+
+ /** @return false if the functor returns false. */
+ bool operator()(mtr_buf_t::block_t* block) const
+ {
+ const mtr_memo_slot_t* start =
+ reinterpret_cast<const mtr_memo_slot_t*>(
+ block->begin());
+
+ mtr_memo_slot_t* slot =
+ reinterpret_cast<mtr_memo_slot_t*>(
+ block->end());
+
+ ut_ad(!(block->used() % sizeof(*slot)));
+
+ while (slot-- != start) {
+
+ if (!functor(slot)) {
+ return(false);
+ }
+ }
+
+ return(true);
+ }
+
+ Functor functor;
+};
+
+template <typename Functor>
+struct Iterate {
+ Iterate() : functor() {}
+
+ Iterate(const Functor& functor) : functor(functor) {}
+
+ /** @return false if the functor returns false. */
+ bool operator()(mtr_buf_t::block_t* block)
+ {
+ const mtr_memo_slot_t* start =
+ reinterpret_cast<const mtr_memo_slot_t*>(
+ block->begin());
+
+ mtr_memo_slot_t* slot =
+ reinterpret_cast<mtr_memo_slot_t*>(
+ block->end());
+
+ ut_ad(!(block->used() % sizeof(*slot)));
+
+ while (slot-- != start) {
+
+ if (!functor(slot)) {
+ return(false);
+ }
+ }
+
+ return(true);
+ }
+
+ Functor functor;
+};
+
+/** Find specific object */
+struct Find {
+
+ /** Constructor */
+ Find(const void* object, ulint type)
+ :
+ m_slot(),
+ m_type(type),
+ m_object(object)
+ {
+ ut_a(object != NULL);
+ }
+
+ /** @return false if the object was found. */
+ bool operator()(mtr_memo_slot_t* slot)
+ {
+ if (m_object == slot->object && m_type == slot->type) {
+ m_slot = slot;
+ return(false);
+ }
+
+ return(true);
+ }
+
+ /** Slot if found */
+ mtr_memo_slot_t*m_slot;
+
+ /** Type of the object to look for */
+ ulint m_type;
+
+ /** The object instance to look for */
+ const void* m_object;
+};
+
+/** Find a page frame */
+struct FindPage
{
- ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
- ut_ad(block->page.buf_fix_count > 0);
+ /** Constructor
+ @param[in] ptr pointer to within a page frame
+ @param[in] flags MTR_MEMO flags to look for */
+ FindPage(const void* ptr, ulint flags)
+ : m_ptr(ptr), m_flags(flags), m_slot(NULL)
+ {
+ /* There must be some flags to look for. */
+ ut_ad(flags);
+ /* We can only look for page-related flags. */
+ ut_ad(!(flags & ulint(~(MTR_MEMO_PAGE_S_FIX
+ | MTR_MEMO_PAGE_X_FIX
+ | MTR_MEMO_PAGE_SX_FIX
+ | MTR_MEMO_BUF_FIX
+ | MTR_MEMO_MODIFY))));
+ }
- /* It is OK to read oldest_modification because no
- other thread can be performing a write of it and it
- is only during write that the value is reset to 0. */
- return(block->page.oldest_modification == 0);
+ /** Visit a memo entry.
+ @param[in] slot memo entry to visit
+ @retval false if a page was found
+ @retval true if the iteration should continue */
+ bool operator()(mtr_memo_slot_t* slot)
+ {
+ ut_ad(m_slot == NULL);
+
+ if (!(m_flags & slot->type) || slot->object == NULL) {
+ return(true);
+ }
+
+ buf_block_t* block = reinterpret_cast<buf_block_t*>(
+ slot->object);
+
+ if (m_ptr < block->frame
+ || m_ptr >= block->frame + block->page.size.logical()) {
+ return(true);
+ }
+
+ ut_ad(!(m_flags & (MTR_MEMO_PAGE_S_FIX
+ | MTR_MEMO_PAGE_SX_FIX
+ | MTR_MEMO_PAGE_X_FIX))
+ || rw_lock_own_flagged(&block->lock, m_flags));
+
+ m_slot = slot;
+ return(false);
+ }
+
+ /** @return the slot that was found */
+ mtr_memo_slot_t* get_slot() const
+ {
+ ut_ad(m_slot != NULL);
+ return(m_slot);
+ }
+ /** @return the block that was found */
+ buf_block_t* get_block() const
+ {
+ return(reinterpret_cast<buf_block_t*>(get_slot()->object));
+ }
+private:
+ /** Pointer inside a page frame to look for */
+ const void*const m_ptr;
+ /** MTR_MEMO flags to look for */
+ const ulint m_flags;
+ /** The slot corresponding to m_ptr */
+ mtr_memo_slot_t* m_slot;
+};
+
+/** Release latches and decrement the buffer fix count.
+@param slot memo slot */
+static void memo_slot_release(mtr_memo_slot_t *slot)
+{
+ switch (slot->type) {
+#ifdef UNIV_DEBUG
+ default:
+ ut_ad(!"invalid type");
+ break;
+ case MTR_MEMO_MODIFY:
+ break;
+#endif /* UNIV_DEBUG */
+ case MTR_MEMO_S_LOCK:
+ rw_lock_s_unlock(reinterpret_cast<rw_lock_t*>(slot->object));
+ break;
+ case MTR_MEMO_SX_LOCK:
+ rw_lock_sx_unlock(reinterpret_cast<rw_lock_t*>(slot->object));
+ break;
+ case MTR_MEMO_X_LOCK:
+ rw_lock_x_unlock(reinterpret_cast<rw_lock_t*>(slot->object));
+ break;
+ case MTR_MEMO_BUF_FIX:
+ case MTR_MEMO_PAGE_S_FIX:
+ case MTR_MEMO_PAGE_SX_FIX:
+ case MTR_MEMO_PAGE_X_FIX:
+ buf_block_t *block= reinterpret_cast<buf_block_t*>(slot->object);
+ buf_block_unfix(block);
+ buf_page_release_latch(block, slot->type);
+ break;
+ }
+ slot->object= NULL;
}
-/*****************************************************************//**
-Releases the item in the slot given. */
-static MY_ATTRIBUTE((nonnull))
-void
-mtr_memo_slot_release_func(
-/*=======================*/
+/** Release the latches acquired by the mini-transaction. */
+struct ReleaseLatches {
+ /** @return true always. */
+ bool operator()(mtr_memo_slot_t *slot) const
+ {
+ if (!slot->object)
+ return true;
+ switch (slot->type) {
#ifdef UNIV_DEBUG
- mtr_t* mtr, /*!< in/out: mini-transaction */
+ default:
+ ut_ad(!"invalid type");
+ break;
+ case MTR_MEMO_MODIFY:
+ break;
#endif /* UNIV_DEBUG */
- mtr_memo_slot_t* slot) /*!< in: memo slot */
+ case MTR_MEMO_S_LOCK:
+ rw_lock_s_unlock(reinterpret_cast<rw_lock_t*>(slot->object));
+ break;
+ case MTR_MEMO_X_LOCK:
+ rw_lock_x_unlock(reinterpret_cast<rw_lock_t*>(slot->object));
+ break;
+ case MTR_MEMO_SX_LOCK:
+ rw_lock_sx_unlock(reinterpret_cast<rw_lock_t*>(slot->object));
+ break;
+ case MTR_MEMO_BUF_FIX:
+ case MTR_MEMO_PAGE_S_FIX:
+ case MTR_MEMO_PAGE_SX_FIX:
+ case MTR_MEMO_PAGE_X_FIX:
+ buf_block_t *block= reinterpret_cast<buf_block_t*>(slot->object);
+ buf_block_unfix(block);
+ buf_page_release_latch(block, slot->type);
+ break;
+ }
+ slot->object= NULL;
+ return true;
+ }
+};
+
+/** Release the latches and blocks acquired by the mini-transaction. */
+struct ReleaseAll {
+ /** @return true always. */
+ bool operator()(mtr_memo_slot_t *slot) const
+ {
+ if (slot->object)
+ memo_slot_release(slot);
+ return true;
+ }
+};
+
+#ifdef UNIV_DEBUG
+/** Check that all slots have been handled. */
+struct DebugCheck {
+ /** @return true always. */
+ bool operator()(const mtr_memo_slot_t* slot) const
+ {
+ ut_ad(!slot->object);
+ return(true);
+ }
+};
+#endif
+
+/** Release a resource acquired by the mini-transaction. */
+struct ReleaseBlocks {
+ /** Release specific object */
+ ReleaseBlocks(lsn_t start_lsn, lsn_t end_lsn, FlushObserver* observer)
+ :
+ m_end_lsn(end_lsn),
+ m_start_lsn(start_lsn),
+ m_flush_observer(observer)
+ {
+ /* Do nothing */
+ }
+
+ /** Add the modified page to the buffer flush list. */
+ void add_dirty_page_to_flush_list(mtr_memo_slot_t* slot) const
+ {
+ ut_ad(m_end_lsn > 0);
+ ut_ad(m_start_lsn > 0);
+
+ buf_block_t* block;
+
+ block = reinterpret_cast<buf_block_t*>(slot->object);
+
+ buf_flush_note_modification(block, m_start_lsn,
+ m_end_lsn, m_flush_observer);
+ }
+
+ /** @return true always. */
+ bool operator()(mtr_memo_slot_t* slot) const
+ {
+ if (slot->object != NULL) {
+
+ if (slot->type == MTR_MEMO_PAGE_X_FIX
+ || slot->type == MTR_MEMO_PAGE_SX_FIX) {
+
+ add_dirty_page_to_flush_list(slot);
+ }
+ }
+
+ return(true);
+ }
+
+ /** Mini-transaction REDO start LSN */
+ lsn_t m_end_lsn;
+
+ /** Mini-transaction REDO end LSN */
+ lsn_t m_start_lsn;
+
+ /** Flush observer */
+ FlushObserver* m_flush_observer;
+};
+
+/** Write the block contents to the REDO log */
+struct mtr_write_log_t {
+ /** Append a block to the redo log buffer.
+ @return whether the appending should continue */
+ bool operator()(const mtr_buf_t::block_t* block) const
+ {
+ log_write_low(block->begin(), block->used());
+ return(true);
+ }
+};
+
+/** Append records to the system-wide redo log buffer.
+@param[in] log redo log records */
+void
+mtr_write_log(
+ const mtr_buf_t* log)
{
- void* object = slot->object;
- slot->object = NULL;
-
- /* slot release is a local operation for the current mtr.
- We must not be holding the flush_order mutex while
- doing this. */
- ut_ad(!log_flush_order_mutex_own());
-
- switch (slot->type) {
- case MTR_MEMO_PAGE_S_FIX:
- case MTR_MEMO_PAGE_X_FIX:
- case MTR_MEMO_BUF_FIX:
- buf_page_release((buf_block_t*) object, slot->type);
- break;
- case MTR_MEMO_S_LOCK:
- rw_lock_s_unlock((rw_lock_t*) object);
+ const ulint len = log->size();
+ mtr_write_log_t write_log;
+
+ ut_ad(!recv_no_log_write);
+ DBUG_PRINT("ib_log",
+ (ULINTPF " extra bytes written at " LSN_PF,
+ len, log_sys->lsn));
+
+ log_reserve_and_open(len);
+ log->for_each_block(write_log);
+ log_close();
+}
+
+/** Start a mini-transaction. */
+void mtr_t::start()
+{
+ UNIV_MEM_INVALID(this, sizeof *this);
+
+ new(&m_memo) mtr_buf_t();
+ new(&m_log) mtr_buf_t();
+
+ m_made_dirty= false;
+ m_inside_ibuf= false;
+ m_modifications= false;
+ m_n_log_recs= 0;
+ m_log_mode= MTR_LOG_ALL;
+ ut_d(m_user_space_id= TRX_SYS_SPACE);
+ m_user_space= NULL;
+ m_undo_space= NULL;
+ m_sys_space= NULL;
+ m_state= MTR_STATE_ACTIVE;
+ m_flush_observer= NULL;
+ m_commit_lsn= 0;
+}
+
+/** Release the resources */
+inline void mtr_t::release_resources()
+{
+ ut_d(m_memo.for_each_block_in_reverse(CIterate<DebugCheck>()));
+ m_log.erase();
+ m_memo.erase();
+ m_state= MTR_STATE_COMMITTED;
+}
+
+/** Commit a mini-transaction. */
+void
+mtr_t::commit()
+{
+ ut_ad(is_active());
+ ut_ad(!is_inside_ibuf());
+
+ /* This is a dirty read, for debugging. */
+ ut_ad(!m_modifications || !recv_no_log_write);
+ ut_ad(!m_modifications || m_log_mode != MTR_LOG_NONE);
+
+ if (m_modifications
+ && (m_n_log_recs || m_log_mode == MTR_LOG_NO_REDO))
+ {
+ ut_ad(!srv_read_only_mode || m_log_mode == MTR_LOG_NO_REDO);
+
+ lsn_t start_lsn;
+
+ if (const ulint len= prepare_write())
+ start_lsn= finish_write(len);
+ else
+ start_lsn= m_commit_lsn;
+
+ if (m_made_dirty)
+ log_flush_order_mutex_enter();
+
+ /* It is now safe to release the log mutex because the
+ flush_order mutex will ensure that we are the first one
+ to insert into the flush list. */
+ log_mutex_exit();
+
+ m_memo.for_each_block_in_reverse(CIterate<const ReleaseBlocks>
+ (ReleaseBlocks(start_lsn, m_commit_lsn,
+ m_flush_observer)));
+ if (m_made_dirty)
+ log_flush_order_mutex_exit();
+
+ m_memo.for_each_block_in_reverse(CIterate<ReleaseLatches>());
+ }
+ else
+ m_memo.for_each_block_in_reverse(CIterate<ReleaseAll>());
+
+ release_resources();
+}
+
+/** Commit a mini-transaction that did not modify any pages,
+but generated some redo log on a higher level, such as
+MLOG_FILE_NAME records and a MLOG_CHECKPOINT marker.
+The caller must invoke log_mutex_enter() and log_mutex_exit().
+This is to be used at log_checkpoint().
+@param[in] checkpoint_lsn the LSN of the log checkpoint
+@param[in] write_mlog_checkpoint Write MLOG_CHECKPOINT marker
+ if it is enabled. */
+void
+mtr_t::commit_checkpoint(
+ lsn_t checkpoint_lsn,
+ bool write_mlog_checkpoint)
+{
+ ut_ad(log_mutex_own());
+ ut_ad(is_active());
+ ut_ad(!is_inside_ibuf());
+ ut_ad(get_log_mode() == MTR_LOG_ALL);
+ ut_ad(!m_made_dirty);
+ ut_ad(m_memo.size() == 0);
+ ut_ad(!srv_read_only_mode);
+ ut_ad(write_mlog_checkpoint || m_n_log_recs > 1);
+
+ switch (m_n_log_recs) {
+ case 0:
break;
- case MTR_MEMO_X_LOCK:
- rw_lock_x_unlock((rw_lock_t*) object);
+ case 1:
+ *m_log.front()->begin() |= MLOG_SINGLE_REC_FLAG;
break;
-#ifdef UNIV_DEBUG
default:
- ut_ad(slot->type == MTR_MEMO_MODIFY);
- ut_ad(mtr_memo_contains(mtr, object, MTR_MEMO_PAGE_X_FIX));
-#endif /* UNIV_DEBUG */
+ mlog_catenate_ulint(&m_log, MLOG_MULTI_REC_END, MLOG_1BYTE);
+ }
+
+ if (write_mlog_checkpoint) {
+ byte* ptr = m_log.push<byte*>(SIZE_OF_MLOG_CHECKPOINT);
+ compile_time_assert(SIZE_OF_MLOG_CHECKPOINT == 1 + 8);
+ *ptr = MLOG_CHECKPOINT;
+ mach_write_to_8(ptr + 1, checkpoint_lsn);
+ }
+
+ finish_write(m_log.size());
+ release_resources();
+
+ if (write_mlog_checkpoint) {
+ DBUG_PRINT("ib_log",
+ ("MLOG_CHECKPOINT(" LSN_PF ") written at " LSN_PF,
+ checkpoint_lsn, log_sys->lsn));
}
}
#ifdef UNIV_DEBUG
-# define mtr_memo_slot_release(mtr, slot) mtr_memo_slot_release_func(mtr, slot)
-#else /* UNIV_DEBUG */
-# define mtr_memo_slot_release(mtr, slot) mtr_memo_slot_release_func(slot)
+/** Check if a tablespace is associated with the mini-transaction
+(needed for generating a MLOG_FILE_NAME record)
+@param[in] space tablespace
+@return whether the mini-transaction is associated with the space */
+bool
+mtr_t::is_named_space(ulint space) const
+{
+ ut_ad(!m_sys_space || m_sys_space->id == TRX_SYS_SPACE);
+ ut_ad(!m_undo_space || m_undo_space->id != TRX_SYS_SPACE);
+ ut_ad(!m_user_space || m_user_space->id != TRX_SYS_SPACE);
+ ut_ad(!m_sys_space || m_sys_space != m_user_space);
+ ut_ad(!m_sys_space || m_sys_space != m_undo_space);
+ ut_ad(!m_user_space || m_user_space != m_undo_space);
+
+ switch (get_log_mode()) {
+ case MTR_LOG_NONE:
+ case MTR_LOG_NO_REDO:
+ return(true);
+ case MTR_LOG_ALL:
+ case MTR_LOG_SHORT_INSERTS:
+ return(m_user_space_id == space
+ || is_predefined_tablespace(space));
+ }
+
+ ut_error;
+ return(false);
+}
#endif /* UNIV_DEBUG */
-/**********************************************************//**
-Releases the mlocks and other objects stored in an mtr memo.
-They are released in the order opposite to which they were pushed
-to the memo. */
-static MY_ATTRIBUTE((nonnull))
-void
-mtr_memo_pop_all(
-/*=============*/
- mtr_t* mtr) /*!< in/out: mini-transaction */
+/** Acquire a tablespace X-latch.
+NOTE: use mtr_x_lock_space().
+@param[in] space_id tablespace ID
+@param[in] file file name from where called
+@param[in] line line number in file
+@return the tablespace object (never NULL) */
+fil_space_t*
+mtr_t::x_lock_space(ulint space_id, const char* file, unsigned line)
{
- ut_ad(mtr->magic_n == MTR_MAGIC_N);
- ut_ad(mtr->state == MTR_COMMITTING); /* Currently only used in
- commit */
-
- for (const dyn_block_t* block = dyn_array_get_last_block(&mtr->memo);
- block;
- block = dyn_array_get_prev_block(&mtr->memo, block)) {
- const mtr_memo_slot_t* start
- = reinterpret_cast<mtr_memo_slot_t*>(
- dyn_block_get_data(block));
- mtr_memo_slot_t* slot
- = reinterpret_cast<mtr_memo_slot_t*>(
- dyn_block_get_data(block)
- + dyn_block_get_used(block));
-
- ut_ad(!(dyn_block_get_used(block) % sizeof(mtr_memo_slot_t)));
+ fil_space_t* space;
- while (slot-- != start) {
- if (slot->object != NULL) {
- mtr_memo_slot_release(mtr, slot);
- }
+ ut_ad(is_active());
+
+ if (space_id == TRX_SYS_SPACE) {
+ space = m_sys_space;
+
+ if (!space) {
+ space = m_sys_space = fil_space_get(space_id);
}
+ } else if ((space = m_user_space) && space_id == space->id) {
+ } else if ((space = m_undo_space) && space_id == space->id) {
+ } else if (get_log_mode() == MTR_LOG_NO_REDO) {
+ space = fil_space_get(space_id);
+ ut_ad(space->purpose == FIL_TYPE_TEMPORARY
+ || space->purpose == FIL_TYPE_IMPORT
+ || space->redo_skipped_count > 0
+ || srv_is_tablespace_truncated(space->id));
+ } else {
+ /* called from trx_rseg_create() */
+ space = m_undo_space = fil_space_get(space_id);
}
+
+ ut_ad(space);
+ ut_ad(space->id == space_id);
+ x_lock(&space->latch, file, line);
+ ut_ad(space->purpose == FIL_TYPE_TEMPORARY
+ || space->purpose == FIL_TYPE_IMPORT
+ || space->purpose == FIL_TYPE_TABLESPACE);
+ return(space);
}
-/*****************************************************************//**
-Releases the item in the slot given. */
-static
+/** Look up the system tablespace. */
void
-mtr_memo_slot_note_modification(
-/*============================*/
- mtr_t* mtr, /*!< in: mtr */
- mtr_memo_slot_t* slot) /*!< in: memo slot */
+mtr_t::lookup_sys_space()
{
- ut_ad(mtr->modifications);
- ut_ad(!srv_read_only_mode);
- ut_ad(mtr->magic_n == MTR_MAGIC_N);
+ ut_ad(!m_sys_space);
+ m_sys_space = fil_space_get(TRX_SYS_SPACE);
+ ut_ad(m_sys_space);
+}
- if (slot->object != NULL && slot->type == MTR_MEMO_PAGE_X_FIX) {
- buf_block_t* block = (buf_block_t*) slot->object;
+/** Look up the user tablespace.
+@param[in] space_id tablespace ID */
+void
+mtr_t::lookup_user_space(ulint space_id)
+{
+ ut_ad(space_id != TRX_SYS_SPACE);
+ ut_ad(m_user_space_id == space_id);
+ ut_ad(!m_user_space);
+ m_user_space = fil_space_get(space_id);
+ ut_ad(m_user_space);
+}
- ut_ad(!mtr->made_dirty || log_flush_order_mutex_own());
- buf_flush_note_modification(block, mtr);
+/** Set the tablespace associated with the mini-transaction
+(needed for generating a MLOG_FILE_NAME record)
+@param[in] space user or system tablespace */
+void
+mtr_t::set_named_space(fil_space_t* space)
+{
+ ut_ad(m_user_space_id == TRX_SYS_SPACE);
+ ut_d(m_user_space_id = space->id);
+ if (space->id == TRX_SYS_SPACE) {
+ ut_ad(!m_sys_space || m_sys_space == space);
+ m_sys_space = space;
+ } else {
+ m_user_space = space;
}
}
-/**********************************************************//**
-Add the modified pages to the buffer flush list. They are released
-in the order opposite to which they were pushed to the memo. NOTE! It is
-essential that the x-rw-lock on a modified buffer page is not released
-before buf_page_note_modification is called for that page! Otherwise,
-some thread might race to modify it, and the flush list sort order on
-lsn would be destroyed. */
-static
-void
-mtr_memo_note_modifications(
-/*========================*/
- mtr_t* mtr) /*!< in: mtr */
+/** Release an object in the memo stack.
+@return true if released */
+bool
+mtr_t::memo_release(const void* object, ulint type)
{
- ut_ad(!srv_read_only_mode);
- ut_ad(mtr->magic_n == MTR_MAGIC_N);
- ut_ad(mtr->state == MTR_COMMITTING); /* Currently only used in
- commit */
-
- for (const dyn_block_t* block = dyn_array_get_last_block(&mtr->memo);
- block;
- block = dyn_array_get_prev_block(&mtr->memo, block)) {
- const mtr_memo_slot_t* start
- = reinterpret_cast<mtr_memo_slot_t*>(
- dyn_block_get_data(block));
- mtr_memo_slot_t* slot
- = reinterpret_cast<mtr_memo_slot_t*>(
- dyn_block_get_data(block)
- + dyn_block_get_used(block));
-
- ut_ad(!(dyn_block_get_used(block) % sizeof(mtr_memo_slot_t)));
+ ut_ad(is_active());
- while (slot-- != start) {
- if (slot->object != NULL) {
- mtr_memo_slot_note_modification(mtr, slot);
- }
- }
+ /* We cannot release a page that has been written to in the
+ middle of a mini-transaction. */
+ ut_ad(!m_modifications || type != MTR_MEMO_PAGE_X_FIX);
+
+ Iterate<Find> iteration(Find(object, type));
+
+ if (!m_memo.for_each_block_in_reverse(iteration)) {
+ memo_slot_release(iteration.functor.m_slot);
+ return(true);
}
+
+ return(false);
}
-/************************************************************//**
-Append the dirty pages to the flush list. */
-static
+/** Release a page latch.
+@param[in] ptr pointer to within a page frame
+@param[in] type object type: MTR_MEMO_PAGE_X_FIX, ... */
void
-mtr_add_dirtied_pages_to_flush_list(
-/*================================*/
- mtr_t* mtr) /*!< in/out: mtr */
+mtr_t::release_page(const void* ptr, mtr_memo_type_t type)
{
- ut_ad(!srv_read_only_mode);
+ ut_ad(is_active());
- /* No need to acquire log_flush_order_mutex if this mtr has
- not dirtied a clean page. log_flush_order_mutex is used to
- ensure ordered insertions in the flush_list. We need to
- insert in the flush_list iff the page in question was clean
- before modifications. */
- if (mtr->made_dirty) {
- log_flush_order_mutex_enter();
- }
+ /* We cannot release a page that has been written to in the
+ middle of a mini-transaction. */
+ ut_ad(!m_modifications || type != MTR_MEMO_PAGE_X_FIX);
- /* It is now safe to release the log mutex because the
- flush_order mutex will ensure that we are the first one
- to insert into the flush list. */
- log_release();
+ Iterate<FindPage> iteration(FindPage(ptr, type));
- if (mtr->modifications) {
- mtr_memo_note_modifications(mtr);
+ if (!m_memo.for_each_block_in_reverse(iteration)) {
+ memo_slot_release(iteration.functor.get_slot());
+ return;
}
- if (mtr->made_dirty) {
- log_flush_order_mutex_exit();
- }
+ /* The page was not found! */
+ ut_ad(0);
}
-/************************************************************//**
-Writes the contents of a mini-transaction log, if any, to the database log. */
-static
-void
-mtr_log_reserve_and_write(
-/*======================*/
- mtr_t* mtr) /*!< in/out: mtr */
+/** Prepare to write the mini-transaction log to the redo log buffer.
+@return number of bytes to write in finish_write() */
+inline ulint mtr_t::prepare_write()
{
- dyn_array_t* mlog;
- ulint data_size;
- byte* first_data;
+ ut_ad(!recv_no_log_write);
- ut_ad(!srv_read_only_mode);
+ if (UNIV_UNLIKELY(m_log_mode != MTR_LOG_ALL)) {
+ ut_ad(m_log_mode == MTR_LOG_NO_REDO);
+ ut_ad(m_log.size() == 0);
+ log_mutex_enter();
+ m_commit_lsn = log_sys->lsn;
+ return 0;
+ }
- mlog = &(mtr->log);
+ ulint len = m_log.size();
+ ulint n_recs = m_n_log_recs;
+ ut_ad(len > 0);
+ ut_ad(n_recs > 0);
- first_data = dyn_block_get_data(mlog);
+ if (len > log_sys->buf_size / 2) {
+ log_buffer_extend((len + 1) * 2);
+ }
- if (mtr->n_log_recs > 1) {
- mlog_catenate_ulint(mtr, MLOG_MULTI_REC_END, MLOG_1BYTE);
- } else {
- *first_data = (byte)((ulint)*first_data
- | MLOG_SINGLE_REC_FLAG);
+ ut_ad(m_n_log_recs == n_recs);
+
+ fil_space_t* space = m_user_space;
+
+ if (space != NULL && is_predefined_tablespace(space->id)) {
+ /* Omit MLOG_FILE_NAME for predefined tablespaces. */
+ space = NULL;
}
- if (mlog->heap == NULL) {
- ulint len;
+ log_mutex_enter();
- len = mtr->log_mode != MTR_LOG_NO_REDO
- ? dyn_block_get_used(mlog) : 0;
+ if (fil_names_write_if_was_clean(space, this)) {
+ /* This mini-transaction was the first one to modify
+ this tablespace since the latest checkpoint, so
+ some MLOG_FILE_NAME records were appended to m_log. */
+ ut_ad(m_n_log_recs > n_recs);
+ mlog_catenate_ulint(&m_log, MLOG_MULTI_REC_END, MLOG_1BYTE);
+ len = m_log.size();
+ } else {
+ /* This was not the first time of dirtying a
+ tablespace since the latest checkpoint. */
- mtr->end_lsn = log_reserve_and_write_fast(
- first_data, len, &mtr->start_lsn);
+ ut_ad(n_recs == m_n_log_recs);
- if (mtr->end_lsn) {
+ if (n_recs <= 1) {
+ ut_ad(n_recs == 1);
- /* Success. We have the log mutex.
- Add pages to flush list and exit */
- mtr_add_dirtied_pages_to_flush_list(mtr);
+ /* Flag the single log record as the
+ only record in this mini-transaction. */
+ *m_log.front()->begin() |= MLOG_SINGLE_REC_FLAG;
+ } else {
+ /* Because this mini-transaction comprises
+ multiple log records, append MLOG_MULTI_REC_END
+ at the end. */
- return;
+ mlog_catenate_ulint(&m_log, MLOG_MULTI_REC_END,
+ MLOG_1BYTE);
+ len++;
}
}
- data_size = dyn_array_get_data_size(mlog);
+ /* check and attempt a checkpoint if exceeding capacity */
+ log_margin_checkpoint_age(len);
- /* Open the database log for log_write_low */
- mtr->start_lsn = log_reserve_and_open(data_size);
+ return(len);
+}
- if (mtr->log_mode == MTR_LOG_ALL) {
+/** Append the redo log records to the redo log buffer
+@param[in] len number of bytes to write
+@return start_lsn */
+inline lsn_t mtr_t::finish_write(ulint len)
+{
+ ut_ad(m_log_mode == MTR_LOG_ALL);
+ ut_ad(log_mutex_own());
+ ut_ad(m_log.size() == len);
+ ut_ad(len > 0);
- for (dyn_block_t* block = mlog;
- block != 0;
- block = dyn_array_get_next_block(mlog, block)) {
+ lsn_t start_lsn;
- log_write_low(
- dyn_block_get_data(block),
- dyn_block_get_used(block));
- }
+ if (m_log.is_small()) {
+ const mtr_buf_t::block_t* front = m_log.front();
+ ut_ad(len <= front->used());
- } else {
- ut_ad(mtr->log_mode == MTR_LOG_NONE
- || mtr->log_mode == MTR_LOG_NO_REDO);
- /* Do nothing */
+ m_commit_lsn = log_reserve_and_write_fast(front->begin(), len,
+ &start_lsn);
+
+ if (m_commit_lsn) {
+ return start_lsn;
+ }
}
- mtr->end_lsn = log_close();
+ /* Open the database log for log_write_low */
+ start_lsn = log_reserve_and_open(len);
- mtr_add_dirtied_pages_to_flush_list(mtr);
+ mtr_write_log_t write_log;
+ m_log.for_each_block(write_log);
+
+ m_commit_lsn = log_close();
+ return start_lsn;
}
-#endif /* !UNIV_HOTBACKUP */
-/***************************************************************//**
-Commits a mini-transaction. */
-UNIV_INTERN
+/** Release the free extents that was reserved using
+fsp_reserve_free_extents(). This is equivalent to calling
+fil_space_release_free_extents(). This is intended for use
+with index pages.
+@param[in] n_reserved number of reserved extents */
void
-mtr_commit(
-/*=======*/
- mtr_t* mtr) /*!< in: mini-transaction */
+mtr_t::release_free_extents(ulint n_reserved)
{
- ut_ad(mtr->magic_n == MTR_MAGIC_N);
- ut_ad(mtr->state == MTR_ACTIVE);
- ut_ad(!mtr->inside_ibuf);
- ut_d(mtr->state = MTR_COMMITTING);
+ fil_space_t* space;
-#ifndef UNIV_HOTBACKUP
- /* This is a dirty read, for debugging. */
- ut_ad(!recv_no_log_write);
+ ut_ad(!m_undo_space);
- if (mtr->modifications && mtr->n_log_recs) {
- ut_ad(!srv_read_only_mode);
- mtr_log_reserve_and_write(mtr);
- }
+ if (m_user_space) {
- mtr_memo_pop_all(mtr);
-#endif /* !UNIV_HOTBACKUP */
+ ut_ad(m_user_space->id == m_user_space_id);
+ ut_ad(memo_contains(get_memo(), &m_user_space->latch,
+ MTR_MEMO_X_LOCK));
- dyn_array_free(&(mtr->memo));
- dyn_array_free(&(mtr->log));
-#ifdef UNIV_DEBUG_VALGRIND
- /* Declare everything uninitialized except
- mtr->start_lsn, mtr->end_lsn and mtr->state. */
- {
- lsn_t start_lsn = mtr->start_lsn;
- lsn_t end_lsn = mtr->end_lsn;
- UNIV_MEM_INVALID(mtr, sizeof *mtr);
- mtr->start_lsn = start_lsn;
- mtr->end_lsn = end_lsn;
- }
-#endif /* UNIV_DEBUG_VALGRIND */
- ut_d(mtr->state = MTR_COMMITTED);
+ space = m_user_space;
+ } else {
+
+ ut_ad(m_sys_space->id == TRX_SYS_SPACE);
+ ut_ad(memo_contains(get_memo(), &m_sys_space->latch,
+ MTR_MEMO_X_LOCK));
+
+ space = m_sys_space;
+ }
+
+ space->release_free_extents(n_reserved);
}
-#ifndef UNIV_HOTBACKUP
-/***************************************************//**
-Releases an object in the memo stack.
-@return true if released */
-UNIV_INTERN
+#ifdef UNIV_DEBUG
+/** Check if memo contains the given item.
+@return true if contains */
bool
-mtr_memo_release(
-/*=============*/
- mtr_t* mtr, /*!< in/out: mini-transaction */
- void* object, /*!< in: object */
- ulint type) /*!< in: object type: MTR_MEMO_S_LOCK, ... */
+mtr_t::memo_contains(
+ const mtr_buf_t* memo,
+ const void* object,
+ ulint type)
{
- ut_ad(mtr->magic_n == MTR_MAGIC_N);
- ut_ad(mtr->state == MTR_ACTIVE);
- /* We cannot release a page that has been written to in the
- middle of a mini-transaction. */
- ut_ad(!mtr->modifications || type != MTR_MEMO_PAGE_X_FIX);
+ Iterate<Find> iteration(Find(object, type));
+ if (memo->for_each_block_in_reverse(iteration)) {
+ return(false);
+ }
+
+ switch (type) {
+ case MTR_MEMO_X_LOCK:
+ ut_ad(rw_lock_own((rw_lock_t*) object, RW_LOCK_X));
+ break;
+ case MTR_MEMO_SX_LOCK:
+ ut_ad(rw_lock_own((rw_lock_t*) object, RW_LOCK_SX));
+ break;
+ case MTR_MEMO_S_LOCK:
+ ut_ad(rw_lock_own((rw_lock_t*) object, RW_LOCK_S));
+ break;
+ }
- for (const dyn_block_t* block = dyn_array_get_last_block(&mtr->memo);
- block;
- block = dyn_array_get_prev_block(&mtr->memo, block)) {
- const mtr_memo_slot_t* start
- = reinterpret_cast<mtr_memo_slot_t*>(
- dyn_block_get_data(block));
- mtr_memo_slot_t* slot
- = reinterpret_cast<mtr_memo_slot_t*>(
- dyn_block_get_data(block)
- + dyn_block_get_used(block));
+ return(true);
+}
- ut_ad(!(dyn_block_get_used(block) % sizeof(mtr_memo_slot_t)));
+/** Debug check for flags */
+struct FlaggedCheck {
+ FlaggedCheck(const void* ptr, ulint flags)
+ :
+ m_ptr(ptr),
+ m_flags(flags)
+ {
+ /* There must be some flags to look for. */
+ ut_ad(flags);
+ /* Look for rw-lock-related and page-related flags. */
+ ut_ad(!(flags & ulint(~(MTR_MEMO_PAGE_S_FIX
+ | MTR_MEMO_PAGE_X_FIX
+ | MTR_MEMO_PAGE_SX_FIX
+ | MTR_MEMO_BUF_FIX
+ | MTR_MEMO_MODIFY
+ | MTR_MEMO_X_LOCK
+ | MTR_MEMO_SX_LOCK
+ | MTR_MEMO_S_LOCK))));
+ /* Either some rw-lock-related or page-related flags
+ must be specified, but not both at the same time. */
+ ut_ad(!(flags & (MTR_MEMO_PAGE_S_FIX
+ | MTR_MEMO_PAGE_X_FIX
+ | MTR_MEMO_PAGE_SX_FIX
+ | MTR_MEMO_BUF_FIX
+ | MTR_MEMO_MODIFY))
+ == !!(flags & (MTR_MEMO_X_LOCK
+ | MTR_MEMO_SX_LOCK
+ | MTR_MEMO_S_LOCK)));
+ }
- while (slot-- != start) {
- if (object == slot->object && type == slot->type) {
- mtr_memo_slot_release(mtr, slot);
- return(true);
- }
+ /** Visit a memo entry.
+ @param[in] slot memo entry to visit
+ @retval false if m_ptr was found
+ @retval true if the iteration should continue */
+ bool operator()(const mtr_memo_slot_t* slot) const
+ {
+ if (m_ptr != slot->object || !(m_flags & slot->type)) {
+ return(true);
+ }
+
+ if (ulint flags = m_flags & (MTR_MEMO_PAGE_S_FIX
+ | MTR_MEMO_PAGE_SX_FIX
+ | MTR_MEMO_PAGE_X_FIX)) {
+ rw_lock_t* lock = &static_cast<buf_block_t*>(
+ const_cast<void*>(m_ptr))->lock;
+ ut_ad(rw_lock_own_flagged(lock, flags));
+ } else {
+ rw_lock_t* lock = static_cast<rw_lock_t*>(
+ const_cast<void*>(m_ptr));
+ ut_ad(rw_lock_own_flagged(lock, m_flags >> 5));
}
+
+ return(false);
}
- return(false);
-}
-#endif /* !UNIV_HOTBACKUP */
-
-/********************************************************//**
-Reads 1 - 4 bytes from a file page buffered in the buffer pool.
-@return value read */
-UNIV_INTERN
-ulint
-mtr_read_ulint(
-/*===========*/
- const byte* ptr, /*!< in: pointer from where to read */
- ulint type, /*!< in: MLOG_1BYTE, MLOG_2BYTES, MLOG_4BYTES */
- mtr_t* mtr MY_ATTRIBUTE((unused)))
- /*!< in: mini-transaction handle */
+ const void*const m_ptr;
+ const ulint m_flags;
+};
+
+/** Check if memo contains the given item.
+@param object object to search
+@param flags specify types of object (can be ORred) of
+ MTR_MEMO_PAGE_S_FIX ... values
+@return true if contains */
+bool
+mtr_t::memo_contains_flagged(const void* ptr, ulint flags) const
{
- ut_ad(mtr->state == MTR_ACTIVE);
- ut_ad(mtr_memo_contains_page(mtr, ptr, MTR_MEMO_PAGE_S_FIX)
- || mtr_memo_contains_page(mtr, ptr, MTR_MEMO_PAGE_X_FIX));
+ ut_ad(is_active());
- return(mach_read_ulint(ptr, type));
+ return !m_memo.for_each_block_in_reverse(
+ CIterate<FlaggedCheck>(FlaggedCheck(ptr, flags)));
}
-#ifdef UNIV_DEBUG
-# ifndef UNIV_HOTBACKUP
-/**********************************************************//**
-Checks if memo contains the given page.
-@return TRUE if contains */
-UNIV_INTERN
-ibool
-mtr_memo_contains_page(
-/*===================*/
- mtr_t* mtr, /*!< in: mtr */
- const byte* ptr, /*!< in: pointer to buffer frame */
- ulint type) /*!< in: type of object */
+/** Check if memo contains the given page.
+@param[in] ptr pointer to within buffer frame
+@param[in] flags specify types of object with OR of
+ MTR_MEMO_PAGE_S_FIX... values
+@return the block
+@retval NULL if not found */
+buf_block_t*
+mtr_t::memo_contains_page_flagged(
+ const byte* ptr,
+ ulint flags) const
{
- return(mtr_memo_contains(mtr, buf_block_align(ptr), type));
+ Iterate<FindPage> iteration(FindPage(ptr, flags));
+ return m_memo.for_each_block_in_reverse(iteration)
+ ? NULL : iteration.functor.get_block();
}
-/*********************************************************//**
-Prints info of an mtr handle. */
-UNIV_INTERN
+/** Mark the given latched page as modified.
+@param[in] ptr pointer to within buffer frame */
void
-mtr_print(
-/*======*/
- mtr_t* mtr) /*!< in: mtr */
+mtr_t::memo_modify_page(const byte* ptr)
{
- fprintf(stderr,
- "Mini-transaction handle: memo size %lu bytes"
- " log size %lu bytes\n",
- (ulong) dyn_array_get_data_size(&(mtr->memo)),
- (ulong) dyn_array_get_data_size(&(mtr->log)));
+ buf_block_t* block = memo_contains_page_flagged(
+ ptr, MTR_MEMO_PAGE_X_FIX | MTR_MEMO_PAGE_SX_FIX);
+ ut_ad(block != NULL);
+
+ if (!memo_contains(get_memo(), block, MTR_MEMO_MODIFY)) {
+ memo_push(block, MTR_MEMO_MODIFY);
+ }
}
-# endif /* !UNIV_HOTBACKUP */
-#endif /* UNIV_DEBUG */
-/**********************************************************//**
-Releases a buf_page stored in an mtr memo after a
-savepoint. */
-UNIV_INTERN
+/** Print info of an mtr handle. */
void
-mtr_release_buf_page_at_savepoint(
-/*=============================*/
- mtr_t* mtr, /*!< in: mtr */
- ulint savepoint, /*!< in: savepoint */
- buf_block_t* block) /*!< in: block to release */
+mtr_t::print() const
{
- mtr_memo_slot_t* slot;
- dyn_array_t* memo;
-
- ut_ad(mtr);
- ut_ad(mtr->magic_n == MTR_MAGIC_N);
- ut_ad(mtr->state == MTR_ACTIVE);
-
- memo = &(mtr->memo);
-
- ut_ad(dyn_array_get_data_size(memo) > savepoint);
-
- slot = (mtr_memo_slot_t*) dyn_array_get_element(memo, savepoint);
-
- ut_ad(slot->object == block);
- ut_ad(slot->type == MTR_MEMO_PAGE_S_FIX ||
- slot->type == MTR_MEMO_PAGE_X_FIX ||
- slot->type == MTR_MEMO_BUF_FIX);
-
- buf_page_release((buf_block_t*) slot->object, slot->type);
- slot->object = NULL;
+ ib::info() << "Mini-transaction handle: memo size "
+ << m_memo.size() << " bytes log size "
+ << get_log()->size() << " bytes";
}
+
+#endif /* UNIV_DEBUG */
diff --git a/storage/innobase/mysql-test/storage_engine/define_engine.inc b/storage/innobase/mysql-test/storage_engine/define_engine.inc
index 77e384d2351..7d7b0c7407a 100644
--- a/storage/innobase/mysql-test/storage_engine/define_engine.inc
+++ b/storage/innobase/mysql-test/storage_engine/define_engine.inc
@@ -41,9 +41,5 @@ let $default_char_type = CHAR(8);
# e.g. creation of an additional schema or table, etc.
# The cleanup part should be defined in cleanup_engine.inc
-CALL mtr.add_suppression("InnoDB: Resizing redo log from .* to .* pages, LSN=.*");
-CALL mtr.add_suppression("InnoDB: Starting to delete and rewrite log files.");
-CALL mtr.add_suppression("InnoDB: New log files created, LSN=.*");
-
--enable_query_log
--enable_result_log
diff --git a/storage/innobase/mysql-test/storage_engine/disabled.def b/storage/innobase/mysql-test/storage_engine/disabled.def
index bad10099bbf..1d67f9311ca 100644
--- a/storage/innobase/mysql-test/storage_engine/disabled.def
+++ b/storage/innobase/mysql-test/storage_engine/disabled.def
@@ -4,4 +4,6 @@ insert_high_prio : InnoDB does not use table-level locking
insert_low_prio : InnoDB does not use table-level locking
select_high_prio : InnoDB does not use table-level locking
update_low_prio : InnoDB does not use table-level locking
-
+insert_delayed : MDEV-12880 - INSERT DELAYED is not detected as inapplicable to a table under lock
+lock_concurrent : MDEV-12882 - Assertion failure
+tbl_opt_index_dir : INDEX DIRECTORY option is not supported anymore
diff --git a/storage/innobase/mysql-test/storage_engine/lock_concurrent.rdiff b/storage/innobase/mysql-test/storage_engine/lock_concurrent.rdiff
index fe4a0087fa9..c76a5fe7f15 100644
--- a/storage/innobase/mysql-test/storage_engine/lock_concurrent.rdiff
+++ b/storage/innobase/mysql-test/storage_engine/lock_concurrent.rdiff
@@ -1,7 +1,7 @@
--- suite/storage_engine/lock_concurrent.result 2012-06-24 23:55:19.539380000 +0400
+++ suite/storage_engine/lock_concurrent.reject 2012-07-15 17:50:21.279222746 +0400
-@@ -3,10 +3,19 @@
- LOCK TABLES t1 WRITE CONCURRENT, t1 AS t2 READ;
+@@ -4,6 +4,14 @@
+ connect con1,localhost,root,,;
SET lock_wait_timeout = 1;
LOCK TABLES t1 READ LOCAL;
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction
@@ -13,8 +13,11 @@
+# Also, this problem may cause a chain effect (more errors of different kinds in the test).
+# -------------------------------------------
UNLOCK TABLES;
+ connection default;
UNLOCK TABLES;
+@@ -11,6 +19,7 @@
LOCK TABLES t1 READ LOCAL;
+ connection default;
LOCK TABLES t1 WRITE CONCURRENT, t1 AS t2 READ;
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction
UNLOCK TABLES;
diff --git a/storage/innobase/mysql-test/storage_engine/repair_table.rdiff b/storage/innobase/mysql-test/storage_engine/repair_table.rdiff
index be3709c5833..717d437b2d1 100644
--- a/storage/innobase/mysql-test/storage_engine/repair_table.rdiff
+++ b/storage/innobase/mysql-test/storage_engine/repair_table.rdiff
@@ -111,7 +111,7 @@
-test.t1 check error Corrupt
+test.t1 check status OK
SELECT a,b FROM t1;
--ERROR HY000: Incorrect key file for table 't1'; try to repair it
+-ERROR HY000: Index for table 't1' is corrupt; try to repair it
-# Statement ended with one of expected results (0,ER_NOT_KEYFILE,144).
-# If you got a difference in error message, just add it to rdiff file
-INSERT INTO t1 (a,b) VALUES (14,'n'),(15,'o');
diff --git a/storage/innobase/mysql-test/storage_engine/suite.opt b/storage/innobase/mysql-test/storage_engine/suite.opt
index 034b58f2628..627becdbfb5 100644
--- a/storage/innobase/mysql-test/storage_engine/suite.opt
+++ b/storage/innobase/mysql-test/storage_engine/suite.opt
@@ -1 +1 @@
---innodb --ignore-builtin-innodb --plugin-load=ha_innodb
+--innodb
diff --git a/storage/innobase/mysql-test/storage_engine/tbl_opt_data_index_dir.rdiff b/storage/innobase/mysql-test/storage_engine/tbl_opt_index_dir.rdiff
index e09e50b17ec..e09e50b17ec 100644
--- a/storage/innobase/mysql-test/storage_engine/tbl_opt_data_index_dir.rdiff
+++ b/storage/innobase/mysql-test/storage_engine/tbl_opt_index_dir.rdiff
diff --git a/storage/innobase/mysql-test/storage_engine/tbl_opt_key_block_size.opt b/storage/innobase/mysql-test/storage_engine/tbl_opt_key_block_size.opt
deleted file mode 100644
index 7cd737b2b87..00000000000
--- a/storage/innobase/mysql-test/storage_engine/tbl_opt_key_block_size.opt
+++ /dev/null
@@ -1,3 +0,0 @@
---innodb-file-per-table=1
---innodb-file-format=Barracuda
-
diff --git a/storage/innobase/mysql-test/storage_engine/tbl_opt_row_format.opt b/storage/innobase/mysql-test/storage_engine/tbl_opt_row_format.opt
deleted file mode 100644
index 7cd737b2b87..00000000000
--- a/storage/innobase/mysql-test/storage_engine/tbl_opt_row_format.opt
+++ /dev/null
@@ -1,3 +0,0 @@
---innodb-file-per-table=1
---innodb-file-format=Barracuda
-
diff --git a/storage/innobase/mysql-test/storage_engine/tbl_opt_row_format.rdiff b/storage/innobase/mysql-test/storage_engine/tbl_opt_row_format.rdiff
index a6572ffa7f0..daa5fc67dec 100644
--- a/storage/innobase/mysql-test/storage_engine/tbl_opt_row_format.rdiff
+++ b/storage/innobase/mysql-test/storage_engine/tbl_opt_row_format.rdiff
@@ -1,10 +1,44 @@
---- suite/storage_engine/tbl_opt_row_format.result 2012-06-24 23:55:19.539380000 +0400
-+++ suite/storage_engine/tbl_opt_row_format.reject 2012-07-15 19:26:02.235049157 +0400
-@@ -1,5 +1,7 @@
- DROP TABLE IF EXISTS t1;
- CREATE TABLE t1 (a <INT_COLUMN>, b <CHAR_COLUMN>) ENGINE=<STORAGE_ENGINE> <CUSTOM_TABLE_OPTIONS> ROW_FORMAT=FIXED;
-+Warnings:
-+Warning 1478 <STORAGE_ENGINE>: assuming ROW_FORMAT=COMPACT.
+--- ../storage/innobase/mysql-test/storage_engine/tbl_opt_row_format.result~ 2017-05-24 00:40:12.854181048 +0300
++++ ../storage/innobase/mysql-test/storage_engine/tbl_opt_row_format.reject 2017-05-24 00:49:06.578191030 +0300
+@@ -7,19 +7,39 @@
+ `b` char(8) DEFAULT NULL
+ ) ENGINE=<STORAGE_ENGINE> DEFAULT CHARSET=latin1 ROW_FORMAT=DYNAMIC
+ ALTER TABLE t1 ROW_FORMAT=FIXED;
++ERROR HY000: Table storage engine '<STORAGE_ENGINE>' does not support the create option 'ROW_TYPE'
++# ERROR: Statement ended with errno 1478, errname ER_ILLEGAL_HA_CREATE_OPTION (expected to succeed)
++# ------------ UNEXPECTED RESULT ------------
++# [ ALTER TABLE t1 ROW_FORMAT=FIXED ]
++# The statement|command finished with ER_ILLEGAL_HA_CREATE_OPTION.
++# ALTER TABLE or the mix could be unsupported|malfunctioning, or the problem was caused by previous errors.
++# You can change the engine code, or create an rdiff, or disable the test by adding it to disabled.def.
++# Further in this test, the message might sometimes be suppressed; a part of the test might be skipped.
++# Also, this problem may cause a chain effect (more errors of different kinds in the test).
++# -------------------------------------------
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
+ `a` int(11) DEFAULT NULL,
+ `b` char(8) DEFAULT NULL
+-) ENGINE=<STORAGE_ENGINE> DEFAULT CHARSET=latin1 ROW_FORMAT=FIXED
++) ENGINE=<STORAGE_ENGINE> DEFAULT CHARSET=latin1 ROW_FORMAT=DYNAMIC
+ ALTER TABLE t1 ROW_FORMAT=PAGE;
++ERROR HY000: Table storage engine '<STORAGE_ENGINE>' does not support the create option 'ROW_TYPE'
++# ERROR: Statement ended with errno 1478, errname ER_ILLEGAL_HA_CREATE_OPTION (expected to succeed)
++# ------------ UNEXPECTED RESULT ------------
++# [ ALTER TABLE t1 ROW_FORMAT=PAGE ]
++# The statement|command finished with ER_ILLEGAL_HA_CREATE_OPTION.
++# ALTER TABLE or the mix could be unsupported|malfunctioning, or the problem was caused by previous errors.
++# You can change the engine code, or create an rdiff, or disable the test by adding it to disabled.def.
++# Further in this test, the message might sometimes be suppressed; a part of the test might be skipped.
++# Also, this problem may cause a chain effect (more errors of different kinds in the test).
++# -------------------------------------------
+ SHOW CREATE TABLE t1;
+ Table Create Table
+ t1 CREATE TABLE `t1` (
+ `a` int(11) DEFAULT NULL,
+ `b` char(8) DEFAULT NULL
+-) ENGINE=<STORAGE_ENGINE> DEFAULT CHARSET=latin1 ROW_FORMAT=PAGE
++) ENGINE=<STORAGE_ENGINE> DEFAULT CHARSET=latin1 ROW_FORMAT=DYNAMIC
+ ALTER TABLE t1 ROW_FORMAT=COMPACT;
+ SHOW CREATE TABLE t1;
+ Table Create Table
diff --git a/storage/innobase/mysql-test/storage_engine/type_spatial_indexes.rdiff b/storage/innobase/mysql-test/storage_engine/type_spatial_indexes.rdiff
deleted file mode 100644
index e312cf8f65c..00000000000
--- a/storage/innobase/mysql-test/storage_engine/type_spatial_indexes.rdiff
+++ /dev/null
@@ -1,712 +0,0 @@
---- suite/storage_engine/type_spatial_indexes.result 2013-08-05 18:08:49.000000000 +0400
-+++ suite/storage_engine/type_spatial_indexes.reject 2013-08-05 18:25:24.000000000 +0400
-@@ -702,699 +702,15 @@
- DROP DATABASE IF EXISTS gis_ogs;
- CREATE DATABASE gis_ogs;
- CREATE TABLE gis_point (fid <INT_COLUMN>, g POINT NOT NULL, SPATIAL INDEX(g)) ENGINE=<STORAGE_ENGINE> <CUSTOM_TABLE_OPTIONS>;
--CREATE TABLE gis_line (fid <INT_COLUMN>, g LINESTRING NOT NULL, SPATIAL INDEX(g)) ENGINE=<STORAGE_ENGINE> <CUSTOM_TABLE_OPTIONS>;
--CREATE TABLE gis_polygon (fid <INT_COLUMN>, g POLYGON NOT NULL, SPATIAL INDEX(g)) ENGINE=<STORAGE_ENGINE> <CUSTOM_TABLE_OPTIONS>;
--CREATE TABLE gis_multi_point (fid <INT_COLUMN>, g MULTIPOINT NOT NULL, SPATIAL INDEX(g)) ENGINE=<STORAGE_ENGINE> <CUSTOM_TABLE_OPTIONS>;
--CREATE TABLE gis_multi_line (fid <INT_COLUMN>, g MULTILINESTRING NOT NULL, SPATIAL INDEX(g)) ENGINE=<STORAGE_ENGINE> <CUSTOM_TABLE_OPTIONS>;
--CREATE TABLE gis_multi_polygon (fid <INT_COLUMN>, g MULTIPOLYGON NOT NULL, SPATIAL INDEX(g)) ENGINE=<STORAGE_ENGINE> <CUSTOM_TABLE_OPTIONS>;
--CREATE TABLE gis_geometrycollection (fid <INT_COLUMN>, g GEOMETRYCOLLECTION NOT NULL, SPATIAL INDEX(g)) ENGINE=<STORAGE_ENGINE> <CUSTOM_TABLE_OPTIONS>;
--CREATE TABLE gis_geometry (fid <INT_COLUMN>, g GEOMETRY NOT NULL) ENGINE=<STORAGE_ENGINE> <CUSTOM_TABLE_OPTIONS>;
--USE gis_ogs;
--CREATE TABLE lakes (fid INT <CUSTOM_COL_OPTIONS>,
--name CHAR(64) <CUSTOM_COL_OPTIONS>,
--shore POLYGON NOT NULL, SPATIAL INDEX s(shore)) ENGINE=<STORAGE_ENGINE> <CUSTOM_TABLE_OPTIONS>;
--CREATE TABLE road_segments (fid INT <CUSTOM_COL_OPTIONS>,
--name CHAR(64) <CUSTOM_COL_OPTIONS>,
--aliases CHAR(64) <CUSTOM_COL_OPTIONS>,
--num_lanes INT <CUSTOM_COL_OPTIONS>,
--centerline LINESTRING NOT NULL, SPATIAL INDEX c(centerline)) ENGINE=<STORAGE_ENGINE> <CUSTOM_TABLE_OPTIONS>;
--CREATE TABLE divided_routes (fid INT <CUSTOM_COL_OPTIONS>,
--name CHAR(64) <CUSTOM_COL_OPTIONS>,
--num_lanes INT <CUSTOM_COL_OPTIONS>,
--centerlines MULTILINESTRING NOT NULL, SPATIAL INDEX c(centerlines)) ENGINE=<STORAGE_ENGINE> <CUSTOM_TABLE_OPTIONS>;
--CREATE TABLE forests (fid INT <CUSTOM_COL_OPTIONS>,
--name CHAR(64) <CUSTOM_COL_OPTIONS>,
--boundary MULTIPOLYGON NOT NULL, SPATIAL INDEX b(boundary)) ENGINE=<STORAGE_ENGINE> <CUSTOM_TABLE_OPTIONS>;
--CREATE TABLE bridges (fid INT <CUSTOM_COL_OPTIONS>,
--name CHAR(64) <CUSTOM_COL_OPTIONS>,
--position POINT NOT NULL, SPATIAL INDEX p(position)) ENGINE=<STORAGE_ENGINE> <CUSTOM_TABLE_OPTIONS>;
--CREATE TABLE streams (fid INT <CUSTOM_COL_OPTIONS>,
--name CHAR(64) <CUSTOM_COL_OPTIONS>,
--centerline LINESTRING NOT NULL, SPATIAL INDEX c(centerline)) ENGINE=<STORAGE_ENGINE> <CUSTOM_TABLE_OPTIONS>;
--CREATE TABLE buildings (fid INT <CUSTOM_COL_OPTIONS>,
--name CHAR(64) <CUSTOM_COL_OPTIONS>,
--position POINT NOT NULL,
--footprint POLYGON NOT NULL, SPATIAL INDEX p(position), SPATIAL INDEX f(footprint)) ENGINE=<STORAGE_ENGINE> <CUSTOM_TABLE_OPTIONS>;
--CREATE TABLE ponds (fid INT <CUSTOM_COL_OPTIONS>,
--name CHAR(64) <CUSTOM_COL_OPTIONS>,
--type CHAR(64) <CUSTOM_COL_OPTIONS>,
--shores MULTIPOLYGON NOT NULL, SPATIAL INDEX s(shores)) ENGINE=<STORAGE_ENGINE> <CUSTOM_TABLE_OPTIONS>;
--CREATE TABLE named_places (fid INT <CUSTOM_COL_OPTIONS>,
--name CHAR(64) <CUSTOM_COL_OPTIONS>,
--boundary POLYGON NOT NULL, SPATIAL INDEX b(boundary)) ENGINE=<STORAGE_ENGINE> <CUSTOM_TABLE_OPTIONS>;
--CREATE TABLE map_neatlines (fid INT <CUSTOM_COL_OPTIONS>,
--neatline POLYGON NOT NULL, SPATIAL INDEX n(neatline)) ENGINE=<STORAGE_ENGINE> <CUSTOM_TABLE_OPTIONS>;
--USE test;
--SHOW FIELDS FROM gis_point;
--Field Type Null Key Default Extra
--fid int(11) YES NULL
--g point NO MUL NULL
--SHOW FIELDS FROM gis_line;
--Field Type Null Key Default Extra
--fid int(11) YES NULL
--g linestring NO MUL NULL
--SHOW FIELDS FROM gis_polygon;
--Field Type Null Key Default Extra
--fid int(11) YES NULL
--g polygon NO MUL NULL
--SHOW FIELDS FROM gis_multi_point;
--Field Type Null Key Default Extra
--fid int(11) YES NULL
--g multipoint NO MUL NULL
--SHOW FIELDS FROM gis_multi_line;
--Field Type Null Key Default Extra
--fid int(11) YES NULL
--g multilinestring NO MUL NULL
--SHOW FIELDS FROM gis_multi_polygon;
--Field Type Null Key Default Extra
--fid int(11) YES NULL
--g multipolygon NO MUL NULL
--SHOW FIELDS FROM gis_geometrycollection;
--Field Type Null Key Default Extra
--fid int(11) YES NULL
--g geometrycollection NO MUL NULL
--SHOW FIELDS FROM gis_geometry;
--Field Type Null Key Default Extra
--fid int(11) YES NULL
--g geometry NO NULL
--INSERT INTO gis_point (fid,g) VALUES
--(101, PointFromText('POINT(10 10)')),
--(102, PointFromText('POINT(20 10)')),
--(103, PointFromText('POINT(20 20)')),
--(104, PointFromWKB(AsWKB(PointFromText('POINT(10 20)'))));
--INSERT INTO gis_line (fid,g) VALUES
--(105, LineFromText('LINESTRING(0 0,0 10,10 0)')),
--(106, LineStringFromText('LINESTRING(10 10,20 10,20 20,10 20,10 10)')),
--(107, LineStringFromWKB(AsWKB(LineString(Point(10, 10), Point(40, 10)))));
--INSERT INTO gis_polygon (fid,g) VALUES
--(108, PolygonFromText('POLYGON((10 10,20 10,20 20,10 20,10 10))')),
--(109, PolyFromText('POLYGON((0 0,50 0,50 50,0 50,0 0), (10 10,20 10,20 20,10 20,10 10))')),
--(110, PolyFromWKB(AsWKB(Polygon(LineString(Point(0, 0), Point(30, 0), Point(30, 30), Point(0, 0))))));
--INSERT INTO gis_multi_point (fid,g) VALUES
--(111, MultiPointFromText('MULTIPOINT(0 0,10 10,10 20,20 20)')),
--(112, MPointFromText('MULTIPOINT(1 1,11 11,11 21,21 21)')),
--(113, MPointFromWKB(AsWKB(MultiPoint(Point(3, 6), Point(4, 10)))));
--INSERT INTO gis_multi_line (fid,g) VALUES
--(114, MultiLineStringFromText('MULTILINESTRING((10 48,10 21,10 0),(16 0,16 23,16 48))')),
--(115, MLineFromText('MULTILINESTRING((10 48,10 21,10 0))')),
--(116, MLineFromWKB(AsWKB(MultiLineString(LineString(Point(1, 2), Point(3, 5)), LineString(Point(2, 5), Point(5, 8), Point(21, 7))))));
--INSERT INTO gis_multi_polygon (fid,g) VALUES
--(117, MultiPolygonFromText('MULTIPOLYGON(((28 26,28 0,84 0,84 42,28 26),(52 18,66 23,73 9,48 6,52 18)),((59 18,67 18,67 13,59 13,59 18)))')),
--(118, MPolyFromText('MULTIPOLYGON(((28 26,28 0,84 0,84 42,28 26),(52 18,66 23,73 9,48 6,52 18)),((59 18,67 18,67 13,59 13,59 18)))')),
--(119, MPolyFromWKB(AsWKB(MultiPolygon(Polygon(LineString(Point(0, 3), Point(3, 3), Point(3, 0), Point(0, 3)))))));
--INSERT INTO gis_geometrycollection (fid,g) VALUES
--(120, GeomCollFromText('GEOMETRYCOLLECTION(POINT(0 0), LINESTRING(0 0,10 10))')),
--(121, GeometryFromWKB(AsWKB(GeometryCollection(Point(44, 6), LineString(Point(3, 6), Point(7, 9)))))),
--(122, GeomFromText('GeometryCollection()')),
--(123, GeomFromText('GeometryCollection EMPTY'));
--INSERT into gis_geometry (fid,g) SELECT fid,g FROM gis_point;
--INSERT into gis_geometry (fid,g) SELECT fid,g FROM gis_line;
--INSERT into gis_geometry (fid,g) SELECT fid,g FROM gis_polygon;
--INSERT into gis_geometry (fid,g) SELECT fid,g FROM gis_multi_point;
--INSERT into gis_geometry (fid,g) SELECT fid,g FROM gis_multi_line;
--INSERT into gis_geometry (fid,g) SELECT fid,g FROM gis_multi_polygon;
--INSERT into gis_geometry (fid,g) SELECT fid,g FROM gis_geometrycollection;
--SELECT fid, AsText(g) FROM gis_point;
--fid AsText(g)
--101 POINT(10 10)
--102 POINT(20 10)
--103 POINT(20 20)
--104 POINT(10 20)
--SELECT fid, AsText(g) FROM gis_line;
--fid AsText(g)
--105 LINESTRING(0 0,0 10,10 0)
--106 LINESTRING(10 10,20 10,20 20,10 20,10 10)
--107 LINESTRING(10 10,40 10)
--SELECT fid, AsText(g) FROM gis_polygon;
--fid AsText(g)
--108 POLYGON((10 10,20 10,20 20,10 20,10 10))
--109 POLYGON((0 0,50 0,50 50,0 50,0 0),(10 10,20 10,20 20,10 20,10 10))
--110 POLYGON((0 0,30 0,30 30,0 0))
--SELECT fid, AsText(g) FROM gis_multi_point;
--fid AsText(g)
--111 MULTIPOINT(0 0,10 10,10 20,20 20)
--112 MULTIPOINT(1 1,11 11,11 21,21 21)
--113 MULTIPOINT(3 6,4 10)
--SELECT fid, AsText(g) FROM gis_multi_line;
--fid AsText(g)
--114 MULTILINESTRING((10 48,10 21,10 0),(16 0,16 23,16 48))
--115 MULTILINESTRING((10 48,10 21,10 0))
--116 MULTILINESTRING((1 2,3 5),(2 5,5 8,21 7))
--SELECT fid, AsText(g) FROM gis_multi_polygon;
--fid AsText(g)
--117 MULTIPOLYGON(((28 26,28 0,84 0,84 42,28 26),(52 18,66 23,73 9,48 6,52 18)),((59 18,67 18,67 13,59 13,59 18)))
--118 MULTIPOLYGON(((28 26,28 0,84 0,84 42,28 26),(52 18,66 23,73 9,48 6,52 18)),((59 18,67 18,67 13,59 13,59 18)))
--119 MULTIPOLYGON(((0 3,3 3,3 0,0 3)))
--SELECT fid, AsText(g) FROM gis_geometrycollection;
--fid AsText(g)
--120 GEOMETRYCOLLECTION(POINT(0 0),LINESTRING(0 0,10 10))
--121 GEOMETRYCOLLECTION(POINT(44 6),LINESTRING(3 6,7 9))
--122 GEOMETRYCOLLECTION EMPTY
--123 GEOMETRYCOLLECTION EMPTY
--SELECT fid, AsText(g) FROM gis_geometry;
--fid AsText(g)
--101 POINT(10 10)
--102 POINT(20 10)
--103 POINT(20 20)
--104 POINT(10 20)
--105 LINESTRING(0 0,0 10,10 0)
--106 LINESTRING(10 10,20 10,20 20,10 20,10 10)
--107 LINESTRING(10 10,40 10)
--108 POLYGON((10 10,20 10,20 20,10 20,10 10))
--109 POLYGON((0 0,50 0,50 50,0 50,0 0),(10 10,20 10,20 20,10 20,10 10))
--110 POLYGON((0 0,30 0,30 30,0 0))
--111 MULTIPOINT(0 0,10 10,10 20,20 20)
--112 MULTIPOINT(1 1,11 11,11 21,21 21)
--113 MULTIPOINT(3 6,4 10)
--114 MULTILINESTRING((10 48,10 21,10 0),(16 0,16 23,16 48))
--115 MULTILINESTRING((10 48,10 21,10 0))
--116 MULTILINESTRING((1 2,3 5),(2 5,5 8,21 7))
--117 MULTIPOLYGON(((28 26,28 0,84 0,84 42,28 26),(52 18,66 23,73 9,48 6,52 18)),((59 18,67 18,67 13,59 13,59 18)))
--118 MULTIPOLYGON(((28 26,28 0,84 0,84 42,28 26),(52 18,66 23,73 9,48 6,52 18)),((59 18,67 18,67 13,59 13,59 18)))
--119 MULTIPOLYGON(((0 3,3 3,3 0,0 3)))
--120 GEOMETRYCOLLECTION(POINT(0 0),LINESTRING(0 0,10 10))
--121 GEOMETRYCOLLECTION(POINT(44 6),LINESTRING(3 6,7 9))
--122 GEOMETRYCOLLECTION EMPTY
--123 GEOMETRYCOLLECTION EMPTY
--SELECT fid, Dimension(g) FROM gis_geometry;
--fid Dimension(g)
--101 0
--102 0
--103 0
--104 0
--105 1
--106 1
--107 1
--108 2
--109 2
--110 2
--111 0
--112 0
--113 0
--114 1
--115 1
--116 1
--117 2
--118 2
--119 2
--120 1
--121 1
--122 0
--123 0
--SELECT fid, GeometryType(g) FROM gis_geometry;
--fid GeometryType(g)
--101 POINT
--102 POINT
--103 POINT
--104 POINT
--105 LINESTRING
--106 LINESTRING
--107 LINESTRING
--108 POLYGON
--109 POLYGON
--110 POLYGON
--111 MULTIPOINT
--112 MULTIPOINT
--113 MULTIPOINT
--114 MULTILINESTRING
--115 MULTILINESTRING
--116 MULTILINESTRING
--117 MULTIPOLYGON
--118 MULTIPOLYGON
--119 MULTIPOLYGON
--120 GEOMETRYCOLLECTION
--121 GEOMETRYCOLLECTION
--122 GEOMETRYCOLLECTION
--123 GEOMETRYCOLLECTION
--SELECT fid, IsEmpty(g) FROM gis_geometry;
--fid IsEmpty(g)
--101 0
--102 0
--103 0
--104 0
--105 0
--106 0
--107 0
--108 0
--109 0
--110 0
--111 0
--112 0
--113 0
--114 0
--115 0
--116 0
--117 0
--118 0
--119 0
--120 0
--121 0
--122 0
--123 0
--SELECT fid, AsText(Envelope(g)) FROM gis_geometry;
--fid AsText(Envelope(g))
--101 POLYGON((10 10,10 10,10 10,10 10,10 10))
--102 POLYGON((20 10,20 10,20 10,20 10,20 10))
--103 POLYGON((20 20,20 20,20 20,20 20,20 20))
--104 POLYGON((10 20,10 20,10 20,10 20,10 20))
--105 POLYGON((0 0,10 0,10 10,0 10,0 0))
--106 POLYGON((10 10,20 10,20 20,10 20,10 10))
--107 POLYGON((10 10,40 10,40 10,10 10,10 10))
--108 POLYGON((10 10,20 10,20 20,10 20,10 10))
--109 POLYGON((0 0,50 0,50 50,0 50,0 0))
--110 POLYGON((0 0,30 0,30 30,0 30,0 0))
--111 POLYGON((0 0,20 0,20 20,0 20,0 0))
--112 POLYGON((1 1,21 1,21 21,1 21,1 1))
--113 POLYGON((3 6,4 6,4 10,3 10,3 6))
--114 POLYGON((10 0,16 0,16 48,10 48,10 0))
--115 POLYGON((10 0,10 0,10 48,10 48,10 0))
--116 POLYGON((1 2,21 2,21 8,1 8,1 2))
--117 POLYGON((28 0,84 0,84 42,28 42,28 0))
--118 POLYGON((28 0,84 0,84 42,28 42,28 0))
--119 POLYGON((0 0,3 0,3 3,0 3,0 0))
--120 POLYGON((0 0,10 0,10 10,0 10,0 0))
--121 POLYGON((3 6,44 6,44 9,3 9,3 6))
--122 GEOMETRYCOLLECTION EMPTY
--123 GEOMETRYCOLLECTION EMPTY
--SELECT fid, X(g) FROM gis_point;
--fid X(g)
--101 10
--102 20
--103 20
--104 10
--SELECT fid, Y(g) FROM gis_point;
--fid Y(g)
--101 10
--102 10
--103 20
--104 20
--SELECT fid, AsText(StartPoint(g)) FROM gis_line;
--fid AsText(StartPoint(g))
--105 POINT(0 0)
--106 POINT(10 10)
--107 POINT(10 10)
--SELECT fid, AsText(EndPoint(g)) FROM gis_line;
--fid AsText(EndPoint(g))
--105 POINT(10 0)
--106 POINT(10 10)
--107 POINT(40 10)
--SELECT fid, GLength(g) FROM gis_line;
--fid GLength(g)
--105 24.14213562373095
--106 40
--107 30
--SELECT fid, NumPoints(g) FROM gis_line;
--fid NumPoints(g)
--105 3
--106 5
--107 2
--SELECT fid, AsText(PointN(g, 2)) FROM gis_line;
--fid AsText(PointN(g, 2))
--105 POINT(0 10)
--106 POINT(20 10)
--107 POINT(40 10)
--SELECT fid, IsClosed(g) FROM gis_line;
--fid IsClosed(g)
--105 0
--106 1
--107 0
--SELECT fid, AsText(Centroid(g)) FROM gis_polygon;
--fid AsText(Centroid(g))
--108 POINT(15 15)
--109 POINT(25.416666666666668 25.416666666666668)
--110 POINT(20 10)
--SELECT fid, Area(g) FROM gis_polygon;
--fid Area(g)
--108 100
--109 2400
--110 450
--SELECT fid, AsText(ExteriorRing(g)) FROM gis_polygon;
--fid AsText(ExteriorRing(g))
--108 LINESTRING(10 10,20 10,20 20,10 20,10 10)
--109 LINESTRING(0 0,50 0,50 50,0 50,0 0)
--110 LINESTRING(0 0,30 0,30 30,0 0)
--SELECT fid, NumInteriorRings(g) FROM gis_polygon;
--fid NumInteriorRings(g)
--108 0
--109 1
--110 0
--SELECT fid, AsText(InteriorRingN(g, 1)) FROM gis_polygon;
--fid AsText(InteriorRingN(g, 1))
--108 NULL
--109 LINESTRING(10 10,20 10,20 20,10 20,10 10)
--110 NULL
--SELECT fid, IsClosed(g) FROM gis_multi_line;
--fid IsClosed(g)
--114 0
--115 0
--116 0
--SELECT fid, AsText(Centroid(g)) FROM gis_multi_polygon;
--fid AsText(Centroid(g))
--117 POINT(57.98031067576927 17.854754130800433)
--118 POINT(57.98031067576927 17.854754130800433)
--119 POINT(2 2)
--SELECT fid, Area(g) FROM gis_multi_polygon;
--fid Area(g)
--117 1684.5
--118 1684.5
--119 4.5
--SELECT fid, NumGeometries(g) from gis_multi_point;
--fid NumGeometries(g)
--111 4
--112 4
--113 2
--SELECT fid, NumGeometries(g) from gis_multi_line;
--fid NumGeometries(g)
--114 2
--115 1
--116 2
--SELECT fid, NumGeometries(g) from gis_multi_polygon;
--fid NumGeometries(g)
--117 2
--118 2
--119 1
--SELECT fid, NumGeometries(g) from gis_geometrycollection;
--fid NumGeometries(g)
--120 2
--121 2
--122 0
--123 0
--SELECT fid, AsText(GeometryN(g, 2)) from gis_multi_point;
--fid AsText(GeometryN(g, 2))
--111 POINT(10 10)
--112 POINT(11 11)
--113 POINT(4 10)
--SELECT fid, AsText(GeometryN(g, 2)) from gis_multi_line;
--fid AsText(GeometryN(g, 2))
--114 LINESTRING(16 0,16 23,16 48)
--115 NULL
--116 LINESTRING(2 5,5 8,21 7)
--SELECT fid, AsText(GeometryN(g, 2)) from gis_multi_polygon;
--fid AsText(GeometryN(g, 2))
--117 POLYGON((59 18,67 18,67 13,59 13,59 18))
--118 POLYGON((59 18,67 18,67 13,59 13,59 18))
--119 NULL
--SELECT fid, AsText(GeometryN(g, 2)) from gis_geometrycollection;
--fid AsText(GeometryN(g, 2))
--120 LINESTRING(0 0,10 10)
--121 LINESTRING(3 6,7 9)
--122 NULL
--123 NULL
--SELECT fid, AsText(GeometryN(g, 1)) from gis_geometrycollection;
--fid AsText(GeometryN(g, 1))
--120 POINT(0 0)
--121 POINT(44 6)
--122 NULL
--123 NULL
--SELECT g1.fid as first, g2.fid as second,
--Within(g1.g, g2.g) as w, Contains(g1.g, g2.g) as c, Overlaps(g1.g, g2.g) as o,
--Equals(g1.g, g2.g) as e, Disjoint(g1.g, g2.g) as d, Touches(g1.g, g2.g) as t,
--Intersects(g1.g, g2.g) as i, Crosses(g1.g, g2.g) as r
--FROM gis_geometrycollection g1, gis_geometrycollection g2 ORDER BY first, second;
--first second w c o e d t i r
--120 120 1 1 0 1 0 0 1 0
--120 121 0 0 1 0 0 0 1 0
--120 122 NULL NULL NULL NULL NULL NULL NULL NULL
--120 123 NULL NULL NULL NULL NULL NULL NULL NULL
--121 120 0 0 1 0 0 0 1 0
--121 121 1 1 0 1 0 0 1 0
--121 122 NULL NULL NULL NULL NULL NULL NULL NULL
--121 123 NULL NULL NULL NULL NULL NULL NULL NULL
--122 120 NULL NULL NULL NULL NULL NULL NULL NULL
--122 121 NULL NULL NULL NULL NULL NULL NULL NULL
--122 122 NULL NULL NULL NULL NULL NULL NULL NULL
--122 123 NULL NULL NULL NULL NULL NULL NULL NULL
--123 120 NULL NULL NULL NULL NULL NULL NULL NULL
--123 121 NULL NULL NULL NULL NULL NULL NULL NULL
--123 122 NULL NULL NULL NULL NULL NULL NULL NULL
--123 123 NULL NULL NULL NULL NULL NULL NULL NULL
--DROP TABLE gis_point, gis_line, gis_polygon, gis_multi_point, gis_multi_line, gis_multi_polygon, gis_geometrycollection, gis_geometry;
--USE gis_ogs;
--# Lakes
--INSERT INTO lakes (fid,name,shore) VALUES (
--101, 'BLUE LAKE',
--PolyFromText(
--'POLYGON(
-- (52 18,66 23,73 9,48 6,52 18),
-- (59 18,67 18,67 13,59 13,59 18)
-- )',
--101));
--# Road Segments
--INSERT INTO road_segments (fid,name,aliases,num_lanes,centerline) VALUES(102, 'Route 5', NULL, 2,
--LineFromText(
--'LINESTRING( 0 18, 10 21, 16 23, 28 26, 44 31 )' ,101));
--INSERT INTO road_segments (fid,name,aliases,num_lanes,centerline) VALUES(103, 'Route 5', 'Main Street', 4,
--LineFromText(
--'LINESTRING( 44 31, 56 34, 70 38 )' ,101));
--INSERT INTO road_segments (fid,name,aliases,num_lanes,centerline) VALUES(104, 'Route 5', NULL, 2,
--LineFromText(
--'LINESTRING( 70 38, 72 48 )' ,101));
--INSERT INTO road_segments (fid,name,aliases,num_lanes,centerline) VALUES(105, 'Main Street', NULL, 4,
--LineFromText(
--'LINESTRING( 70 38, 84 42 )' ,101));
--INSERT INTO road_segments (fid,name,aliases,num_lanes,centerline) VALUES(106, 'Dirt Road by Green Forest', NULL,
--1,
--LineFromText(
--'LINESTRING( 28 26, 28 0 )',101));
--# DividedRoutes
--INSERT INTO divided_routes (fid,name,num_lanes,centerlines) VALUES(119, 'Route 75', 4,
--MLineFromText(
--'MULTILINESTRING((10 48,10 21,10 0),
-- (16 0,16 23,16 48))', 101));
--# Forests
--INSERT INTO forests (fid,name,boundary) VALUES(109, 'Green Forest',
--MPolyFromText(
--'MULTIPOLYGON(((28 26,28 0,84 0,84 42,28 26),
-- (52 18,66 23,73 9,48 6,52 18)),((59 18,67 18,67 13,59 13,59 18)))',
--101));
--# Bridges
--INSERT INTO bridges (fid,name,position) VALUES(110, 'Cam Bridge', PointFromText(
--'POINT( 44 31 )', 101));
--# Streams
--INSERT INTO streams (fid,name,centerline) VALUES(111, 'Cam Stream',
--LineFromText(
--'LINESTRING( 38 48, 44 41, 41 36, 44 31, 52 18 )', 101));
--INSERT INTO streams (fid,name,centerline) VALUES(112, NULL,
--LineFromText(
--'LINESTRING( 76 0, 78 4, 73 9 )', 101));
--# Buildings
--INSERT INTO buildings (fid,name,position,footprint) VALUES(113, '123 Main Street',
--PointFromText(
--'POINT( 52 30 )', 101),
--PolyFromText(
--'POLYGON( ( 50 31, 54 31, 54 29, 50 29, 50 31) )', 101));
--INSERT INTO buildings (fid,name,position,footprint) VALUES(114, '215 Main Street',
--PointFromText(
--'POINT( 64 33 )', 101),
--PolyFromText(
--'POLYGON( ( 66 34, 62 34, 62 32, 66 32, 66 34) )', 101));
--# Ponds
--INSERT INTO ponds (fid,name,type,shores) VALUES(120, NULL, 'Stock Pond',
--MPolyFromText(
--'MULTIPOLYGON( ( ( 24 44, 22 42, 24 40, 24 44) ),
-- ( ( 26 44, 26 40, 28 42, 26 44) ) )', 101));
--# Named Places
--INSERT INTO named_places (fid,name,boundary) VALUES(117, 'Ashton',
--PolyFromText(
--'POLYGON( ( 62 48, 84 48, 84 30, 56 30, 56 34, 62 48) )', 101));
--INSERT INTO named_places (fid,name,boundary) VALUES(118, 'Goose Island',
--PolyFromText(
--'POLYGON( ( 67 13, 67 18, 59 18, 59 13, 67 13) )', 101));
--# Map Neatlines
--INSERT INTO map_neatlines (fid,neatline) VALUES(115,
--PolyFromText(
--'POLYGON( ( 0 0, 0 48, 84 48, 84 0, 0 0 ) )', 101));
--SELECT Dimension(shore)
--FROM lakes
--WHERE name = 'Blue Lake';
--Dimension(shore)
--2
--SELECT GeometryType(centerlines)
--FROM divided_routes
--WHERE name = 'Route 75';
--GeometryType(centerlines)
--MULTILINESTRING
--SELECT AsText(boundary)
--FROM named_places
--WHERE name = 'Goose Island';
--AsText(boundary)
--POLYGON((67 13,67 18,59 18,59 13,67 13))
--SELECT AsText(PolyFromWKB(AsBinary(boundary),101))
--FROM named_places
--WHERE name = 'Goose Island';
--AsText(PolyFromWKB(AsBinary(boundary),101))
--POLYGON((67 13,67 18,59 18,59 13,67 13))
--SELECT SRID(boundary)
--FROM named_places
--WHERE name = 'Goose Island';
--SRID(boundary)
--101
--SELECT IsEmpty(centerline)
--FROM road_segments
--WHERE name = 'Route 5'
--AND aliases = 'Main Street';
--IsEmpty(centerline)
--0
--SELECT AsText(Envelope(boundary))
--FROM named_places
--WHERE name = 'Goose Island';
--AsText(Envelope(boundary))
--POLYGON((59 13,67 13,67 18,59 18,59 13))
--SELECT X(position)
--FROM bridges
--WHERE name = 'Cam Bridge';
--X(position)
--44
--SELECT Y(position)
--FROM bridges
--WHERE name = 'Cam Bridge';
--Y(position)
--31
--SELECT AsText(StartPoint(centerline))
--FROM road_segments
--WHERE fid = 102;
--AsText(StartPoint(centerline))
--POINT(0 18)
--SELECT AsText(EndPoint(centerline))
--FROM road_segments
--WHERE fid = 102;
--AsText(EndPoint(centerline))
--POINT(44 31)
--SELECT GLength(centerline)
--FROM road_segments
--WHERE fid = 106;
--GLength(centerline)
--26
--SELECT NumPoints(centerline)
--FROM road_segments
--WHERE fid = 102;
--NumPoints(centerline)
--5
--SELECT AsText(PointN(centerline, 1))
--FROM road_segments
--WHERE fid = 102;
--AsText(PointN(centerline, 1))
--POINT(0 18)
--SELECT AsText(Centroid(boundary))
--FROM named_places
--WHERE name = 'Goose Island';
--AsText(Centroid(boundary))
--POINT(63 15.5)
--SELECT Area(boundary)
--FROM named_places
--WHERE name = 'Goose Island';
--Area(boundary)
--40
--SELECT AsText(ExteriorRing(shore))
--FROM lakes
--WHERE name = 'Blue Lake';
--AsText(ExteriorRing(shore))
--LINESTRING(52 18,66 23,73 9,48 6,52 18)
--SELECT NumInteriorRings(shore)
--FROM lakes
--WHERE name = 'Blue Lake';
--NumInteriorRings(shore)
--1
--SELECT AsText(InteriorRingN(shore, 1))
--FROM lakes
--WHERE name = 'Blue Lake';
--AsText(InteriorRingN(shore, 1))
--LINESTRING(59 18,67 18,67 13,59 13,59 18)
--SELECT NumGeometries(centerlines)
--FROM divided_routes
--WHERE name = 'Route 75';
--NumGeometries(centerlines)
--2
--SELECT AsText(GeometryN(centerlines, 2))
--FROM divided_routes
--WHERE name = 'Route 75';
--AsText(GeometryN(centerlines, 2))
--LINESTRING(16 0,16 23,16 48)
--SELECT IsClosed(centerlines)
--FROM divided_routes
--WHERE name = 'Route 75';
--IsClosed(centerlines)
--0
--SELECT GLength(centerlines)
--FROM divided_routes
--WHERE name = 'Route 75';
--GLength(centerlines)
--96
--SELECT AsText(Centroid(shores))
--FROM ponds
--WHERE fid = 120;
--AsText(Centroid(shores))
--POINT(25 42)
--SELECT Area(shores)
--FROM ponds
--WHERE fid = 120;
--Area(shores)
--8
--SELECT ST_Equals(boundary,
--PolyFromText('POLYGON( ( 67 13, 67 18, 59 18, 59 13, 67 13) )',1))
--FROM named_places
--WHERE name = 'Goose Island';
--ST_Equals(boundary,
--PolyFromText('POLYGON( ( 67 13, 67 18, 59 18, 59 13, 67 13) )',1))
--1
--SELECT ST_Disjoint(centerlines, boundary)
--FROM divided_routes, named_places
--WHERE divided_routes.name = 'Route 75'
--AND named_places.name = 'Ashton';
--ST_Disjoint(centerlines, boundary)
--1
--SELECT ST_Touches(centerline, shore)
--FROM streams, lakes
--WHERE streams.name = 'Cam Stream'
--AND lakes.name = 'Blue Lake';
--ST_Touches(centerline, shore)
--1
--SELECT Crosses(road_segments.centerline, divided_routes.centerlines)
--FROM road_segments, divided_routes
--WHERE road_segments.fid = 102
--AND divided_routes.name = 'Route 75';
--Crosses(road_segments.centerline, divided_routes.centerlines)
--1
--SELECT ST_Intersects(road_segments.centerline, divided_routes.centerlines)
--FROM road_segments, divided_routes
--WHERE road_segments.fid = 102
--AND divided_routes.name = 'Route 75';
--ST_Intersects(road_segments.centerline, divided_routes.centerlines)
--1
--SELECT ST_Contains(forests.boundary, named_places.boundary)
--FROM forests, named_places
--WHERE forests.name = 'Green Forest'
--AND named_places.name = 'Ashton';
--ST_Contains(forests.boundary, named_places.boundary)
--0
--SELECT ST_Distance(position, boundary)
--FROM bridges, named_places
--WHERE bridges.name = 'Cam Bridge'
--AND named_places.name = 'Ashton';
--ST_Distance(position, boundary)
--12
--SELECT AsText(ST_Difference(named_places.boundary, forests.boundary))
--FROM named_places, forests
--WHERE named_places.name = 'Ashton'
--AND forests.name = 'Green Forest';
--AsText(ST_Difference(named_places.boundary, forests.boundary))
--POLYGON((56 34,62 48,84 48,84 42,56 34))
--SELECT AsText(ST_Union(shore, boundary))
--FROM lakes, named_places
--WHERE lakes.name = 'Blue Lake'
--AND named_places.name = 'Goose Island';
--AsText(ST_Union(shore, boundary))
--POLYGON((48 6,52 18,66 23,73 9,48 6))
--SELECT AsText(ST_SymDifference(shore, boundary))
--FROM lakes, named_places
--WHERE lakes.name = 'Blue Lake'
--AND named_places.name = 'Ashton';
--AsText(ST_SymDifference(shore, boundary))
--MULTIPOLYGON(((48 6,52 18,66 23,73 9,48 6),(59 13,59 18,67 18,67 13,59 13)),((56 30,56 34,62 48,84 48,84 30,56 30)))
--SELECT count(*)
--FROM buildings, bridges
--WHERE ST_Contains(ST_Buffer(bridges.position, 15.0), buildings.footprint) = 1;
--count(*)
--1
-+ERROR HY000: The storage engine <STORAGE_ENGINE> doesn't support SPATIAL indexes
-+# ERROR: Statement ended with errno 1464, errname ER_TABLE_CANT_HANDLE_SPKEYS (expected to succeed)
-+# ------------ UNEXPECTED RESULT ------------
-+# [ CREATE TABLE gis_point (fid INT(11) /*!*/ /*Custom column options*/, g POINT NOT NULL, SPATIAL INDEX(g)) ENGINE=InnoDB /*!*/ /*Custom table options*/ ]
-+# The statement|command finished with ER_TABLE_CANT_HANDLE_SPKEYS.
-+# Geometry types or spatial indexes or the mix could be unsupported|malfunctioning, or the problem was caused by previous errors.
-+# You can change the engine code, or create an rdiff, or disable the test by adding it to disabled.def.
-+# Further in this test, the message might sometimes be suppressed; a part of the test might be skipped.
-+# Also, this problem may cause a chain effect (more errors of different kinds in the test).
-+# -------------------------------------------
- DROP DATABASE gis_ogs;
- USE test;
diff --git a/storage/innobase/os/os0event.cc b/storage/innobase/os/os0event.cc
new file mode 100644
index 00000000000..9b5f8a45180
--- /dev/null
+++ b/storage/innobase/os/os0event.cc
@@ -0,0 +1,520 @@
+/*****************************************************************************
+
+Copyright (c) 2013, 2015, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2019, MariaDB Corporation.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file os/os0event.cc
+The interface to the operating system condition variables.
+
+Created 2012-09-23 Sunny Bains
+*******************************************************/
+
+#include "os0event.h"
+#include "ut0mutex.h"
+#include <my_sys.h>
+
+#ifdef _WIN32
+#include <windows.h>
+#include <synchapi.h>
+/** Native condition variable. */
+typedef CONDITION_VARIABLE os_cond_t;
+#else
+/** Native condition variable */
+typedef pthread_cond_t os_cond_t;
+#endif /* _WIN32 */
+
+/** InnoDB condition variable. */
+struct os_event {
+ os_event(const char* name) UNIV_NOTHROW;
+
+ ~os_event() UNIV_NOTHROW;
+
+ /**
+ Destroys a condition variable */
+ void destroy() UNIV_NOTHROW
+ {
+#ifndef _WIN32
+ int ret = pthread_cond_destroy(&cond_var);
+ ut_a(ret == 0);
+#endif /* !_WIN32 */
+
+ mutex.destroy();
+ }
+
+ /** Set the event */
+ void set() UNIV_NOTHROW
+ {
+ mutex.enter();
+
+ if (!m_set) {
+ broadcast();
+ }
+
+ mutex.exit();
+ }
+
+ int64_t reset() UNIV_NOTHROW
+ {
+ mutex.enter();
+
+ if (m_set) {
+ m_set = false;
+ }
+
+ int64_t ret = signal_count;
+
+ mutex.exit();
+
+ return(ret);
+ }
+
+ /**
+ Waits for an event object until it is in the signaled state.
+
+ Typically, if the event has been signalled after the os_event_reset()
+ we'll return immediately because event->m_set == true.
+ There are, however, situations (e.g.: sync_array code) where we may
+ lose this information. For example:
+
+ thread A calls os_event_reset()
+ thread B calls os_event_set() [event->m_set == true]
+ thread C calls os_event_reset() [event->m_set == false]
+ thread A calls os_event_wait() [infinite wait!]
+ thread C calls os_event_wait() [infinite wait!]
+
+ Where such a scenario is possible, to avoid infinite wait, the
+ value returned by reset() should be passed in as
+ reset_sig_count. */
+ void wait_low(int64_t reset_sig_count) UNIV_NOTHROW;
+
+ /**
+ Waits for an event object until it is in the signaled state or
+ a timeout is exceeded.
+ @param time_in_usec - timeout in microseconds,
+ or OS_SYNC_INFINITE_TIME
+ @param reset_sig_count- zero or the value returned by
+ previous call of os_event_reset().
+ @return 0 if success, OS_SYNC_TIME_EXCEEDED if timeout was exceeded */
+ ulint wait_time_low(
+ ulint time_in_usec,
+ int64_t reset_sig_count) UNIV_NOTHROW;
+
+ /** @return true if the event is in the signalled state. */
+ bool is_set() const UNIV_NOTHROW
+ {
+ mutex.enter();
+ bool is_set = m_set;
+ mutex.exit();
+ return is_set;
+ }
+
+private:
+ /**
+ Initialize a condition variable */
+ void init() UNIV_NOTHROW
+ {
+
+ mutex.init();
+
+#ifdef _WIN32
+ InitializeConditionVariable(&cond_var);
+#else
+ {
+ int ret;
+
+ ret = pthread_cond_init(&cond_var, NULL);
+ ut_a(ret == 0);
+ }
+#endif /* _WIN32 */
+ }
+
+ /**
+ Wait on condition variable */
+ void wait() UNIV_NOTHROW
+ {
+#ifdef _WIN32
+ if (!SleepConditionVariableCS(&cond_var, mutex, INFINITE)) {
+ ut_error;
+ }
+#else
+ {
+ int ret;
+
+ ret = pthread_cond_wait(&cond_var, mutex);
+ ut_a(ret == 0);
+ }
+#endif /* _WIN32 */
+ }
+
+ /**
+ Wakes all threads waiting for condition variable */
+ void broadcast() UNIV_NOTHROW
+ {
+ m_set = true;
+ ++signal_count;
+
+#ifdef _WIN32
+ WakeAllConditionVariable(&cond_var);
+#else
+ {
+ int ret;
+
+ ret = pthread_cond_broadcast(&cond_var);
+ ut_a(ret == 0);
+ }
+#endif /* _WIN32 */
+ }
+
+ /**
+ Wakes one thread waiting for condition variable */
+ void signal() UNIV_NOTHROW
+ {
+#ifdef _WIN32
+ WakeConditionVariable(&cond_var);
+#else
+ {
+ int ret;
+
+ ret = pthread_cond_signal(&cond_var);
+ ut_a(ret == 0);
+ }
+#endif /* _WIN32 */
+ }
+
+ /**
+ Do a timed wait on condition variable.
+ @param abstime - timeout
+ @param time_in_ms - timeout in milliseconds.
+ @return true if timed out, false otherwise */
+ bool timed_wait(
+#ifndef _WIN32
+ const timespec* abstime
+#else
+ DWORD time_in_ms
+#endif /* !_WIN32 */
+ );
+
+private:
+ bool m_set; /*!< this is true when the
+ event is in the signaled
+ state, i.e., a thread does
+ not stop if it tries to wait
+ for this event */
+ int64_t signal_count; /*!< this is incremented
+ each time the event becomes
+ signaled */
+ mutable EventMutex mutex; /*!< this mutex protects
+ the next fields */
+
+
+ os_cond_t cond_var; /*!< condition variable is
+ used in waiting for the event */
+
+protected:
+ // Disable copying
+ os_event(const os_event&);
+ os_event& operator=(const os_event&);
+};
+
+/**
+Do a timed wait on condition variable.
+@param abstime - absolute time to wait
+@param time_in_ms - timeout in milliseconds
+@return true if timed out */
+bool
+os_event::timed_wait(
+#ifndef _WIN32
+ const timespec* abstime
+#else
+ DWORD time_in_ms
+#endif /* !_WIN32 */
+)
+{
+#ifdef _WIN32
+ BOOL ret;
+
+ ret = SleepConditionVariableCS(&cond_var, mutex, time_in_ms);
+
+ if (!ret) {
+ DWORD err = GetLastError();
+
+ /* FQDN=msdn.microsoft.com
+ @see http://$FQDN/en-us/library/ms686301%28VS.85%29.aspx,
+
+ "Condition variables are subject to spurious wakeups
+ (those not associated with an explicit wake) and stolen wakeups
+ (another thread manages to run before the woken thread)."
+ Check for both types of timeouts.
+ Conditions are checked by the caller.*/
+ if (err == WAIT_TIMEOUT || err == ERROR_TIMEOUT) {
+ return(true);
+ }
+ }
+
+ ut_a(ret);
+
+ return(false);
+#else
+ int ret;
+
+ ret = pthread_cond_timedwait(&cond_var, mutex, abstime);
+
+ switch (ret) {
+ case 0:
+ case ETIMEDOUT:
+ /* We play it safe by checking for EINTR even though
+ according to the POSIX documentation it can't return EINTR. */
+ case EINTR:
+ break;
+
+ default:
+ ib::error() << "pthread_cond_timedwait() returned: " << ret
+ << ": abstime={" << abstime->tv_sec << ","
+ << abstime->tv_nsec << "}";
+ ut_error;
+ }
+
+ return(ret == ETIMEDOUT);
+#endif /* _WIN32 */
+}
+
+/**
+Waits for an event object until it is in the signaled state.
+
+Typically, if the event has been signalled after the os_event_reset()
+we'll return immediately because event->m_set == true.
+There are, however, situations (e.g.: sync_array code) where we may
+lose this information. For example:
+
+thread A calls os_event_reset()
+thread B calls os_event_set() [event->m_set == true]
+thread C calls os_event_reset() [event->m_set == false]
+thread A calls os_event_wait() [infinite wait!]
+thread C calls os_event_wait() [infinite wait!]
+
+Where such a scenario is possible, to avoid infinite wait, the
+value returned by reset() should be passed in as
+reset_sig_count. */
+void
+os_event::wait_low(
+ int64_t reset_sig_count) UNIV_NOTHROW
+{
+ mutex.enter();
+
+ if (!reset_sig_count) {
+ reset_sig_count = signal_count;
+ }
+
+ while (!m_set && signal_count == reset_sig_count) {
+
+ wait();
+
+ /* Spurious wakeups may occur: we have to check if the
+ event really has been signaled after we came here to wait. */
+ }
+
+ mutex.exit();
+}
+
+/**
+Waits for an event object until it is in the signaled state or
+a timeout is exceeded.
+@param time_in_usec - timeout in microseconds, or OS_SYNC_INFINITE_TIME
+@param reset_sig_count - zero or the value returned by previous call
+ of os_event_reset().
+@return 0 if success, OS_SYNC_TIME_EXCEEDED if timeout was exceeded */
+ulint
+os_event::wait_time_low(
+ ulint time_in_usec,
+ int64_t reset_sig_count) UNIV_NOTHROW
+{
+ bool timed_out = false;
+
+#ifdef _WIN32
+ DWORD time_in_ms;
+
+ if (time_in_usec != OS_SYNC_INFINITE_TIME) {
+ time_in_ms = DWORD(time_in_usec / 1000);
+ } else {
+ time_in_ms = INFINITE;
+ }
+#else
+ struct timespec abstime;
+
+ if (time_in_usec != OS_SYNC_INFINITE_TIME) {
+ ulonglong usec = ulonglong(time_in_usec) + my_hrtime().val;
+ abstime.tv_sec = usec / 1000000;
+ abstime.tv_nsec = (usec % 1000000) * 1000;
+ } else {
+ abstime.tv_nsec = 999999999;
+ abstime.tv_sec = (time_t) ULINT_MAX;
+ }
+
+ ut_a(abstime.tv_nsec <= 999999999);
+
+#endif /* _WIN32 */
+
+ mutex.enter();
+
+ if (!reset_sig_count) {
+ reset_sig_count = signal_count;
+ }
+
+ do {
+ if (m_set || signal_count != reset_sig_count) {
+
+ break;
+ }
+
+#ifndef _WIN32
+ timed_out = timed_wait(&abstime);
+#else
+ timed_out = timed_wait(time_in_ms);
+#endif /* !_WIN32 */
+
+ } while (!timed_out);
+
+ mutex.exit();
+
+ return(timed_out ? OS_SYNC_TIME_EXCEEDED : 0);
+}
+
+/** Constructor */
+os_event::os_event(const char* name) UNIV_NOTHROW
+{
+ init();
+
+ m_set = false;
+
+ /* We return this value in os_event_reset(),
+ which can then be be used to pass to the
+ os_event_wait_low(). The value of zero is
+ reserved in os_event_wait_low() for the case
+ when the caller does not want to pass any
+ signal_count value. To distinguish between
+ the two cases we initialize signal_count
+ to 1 here. */
+
+ signal_count = 1;
+}
+
+/** Destructor */
+os_event::~os_event() UNIV_NOTHROW
+{
+ destroy();
+}
+
+/**
+Creates an event semaphore, i.e., a semaphore which may just have two
+states: signaled and nonsignaled. The created event is manual reset: it
+must be reset explicitly by calling sync_os_reset_event.
+@return the event handle */
+os_event_t
+os_event_create(
+/*============*/
+ const char* name) /*!< in: the name of the
+ event, if NULL the event
+ is created without a name */
+{
+ return(UT_NEW_NOKEY(os_event(name)));
+}
+
+/**
+Check if the event is set.
+@return true if set */
+bool
+os_event_is_set(
+/*============*/
+ const os_event_t event) /*!< in: event to test */
+{
+ return(event->is_set());
+}
+
+/**
+Sets an event semaphore to the signaled state: lets waiting threads
+proceed. */
+void
+os_event_set(
+/*=========*/
+ os_event_t event) /*!< in/out: event to set */
+{
+ event->set();
+}
+
+/**
+Resets an event semaphore to the nonsignaled state. Waiting threads will
+stop to wait for the event.
+The return value should be passed to os_even_wait_low() if it is desired
+that this thread should not wait in case of an intervening call to
+os_event_set() between this os_event_reset() and the
+os_event_wait_low() call. See comments for os_event_wait_low().
+@return current signal_count. */
+int64_t
+os_event_reset(
+/*===========*/
+ os_event_t event) /*!< in/out: event to reset */
+{
+ return(event->reset());
+}
+
+/**
+Waits for an event object until it is in the signaled state or
+a timeout is exceeded.
+@return 0 if success, OS_SYNC_TIME_EXCEEDED if timeout was exceeded */
+ulint
+os_event_wait_time_low(
+/*===================*/
+ os_event_t event, /*!< in/out: event to wait */
+ ulint time_in_usec, /*!< in: timeout in
+ microseconds, or
+ OS_SYNC_INFINITE_TIME */
+ int64_t reset_sig_count) /*!< in: zero or the value
+ returned by previous call of
+ os_event_reset(). */
+{
+ return(event->wait_time_low(time_in_usec, reset_sig_count));
+}
+
+/**
+Waits for an event object until it is in the signaled state.
+
+Where such a scenario is possible, to avoid infinite wait, the
+value returned by os_event_reset() should be passed in as
+reset_sig_count. */
+void
+os_event_wait_low(
+/*==============*/
+ os_event_t event, /*!< in: event to wait */
+ int64_t reset_sig_count) /*!< in: zero or the value
+ returned by previous call of
+ os_event_reset(). */
+{
+ event->wait_low(reset_sig_count);
+}
+
+/**
+Frees an event object. */
+void
+os_event_destroy(
+/*=============*/
+ os_event_t& event) /*!< in/own: event to free */
+
+{
+ UT_DELETE(event);
+ event = NULL;
+}
diff --git a/storage/innobase/os/os0file.cc b/storage/innobase/os/os0file.cc
index 3a403f880c1..f7061388cb8 100644
--- a/storage/innobase/os/os0file.cc
+++ b/storage/innobase/os/os0file.cc
@@ -1,8 +1,8 @@
/***********************************************************************
-Copyright (c) 1995, 2017, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1995, 2019, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2009, Percona Inc.
-Copyright (c) 2013, 2019, MariaDB Corporation.
+Copyright (c) 2013, 2020, MariaDB Corporation.
Portions of this file contain modifications contributed and copyrighted
by Percona Inc.. Those modifications are
@@ -33,40 +33,35 @@ The interface to the operating system file i/o primitives
Created 10/21/1995 Heikki Tuuri
*******************************************************/
+#ifndef UNIV_INNOCHECKSUM
#include "os0file.h"
+#include "sql_const.h"
-#ifdef UNIV_NONINL
-#include "os0file.ic"
+#ifdef UNIV_LINUX
+#include <sys/types.h>
+#include <sys/stat.h>
#endif
-#include "ut0mem.h"
#include "srv0srv.h"
#include "srv0start.h"
#include "fil0fil.h"
-#include "fil0crypt.h"
-#include "fsp0fsp.h"
-#include "fil0pagecompress.h"
-#include "buf0buf.h"
-#include "srv0mon.h"
#include "srv0srv.h"
#ifdef HAVE_LINUX_UNISTD_H
#include "unistd.h"
#endif
-#ifndef UNIV_HOTBACKUP
-# include "os0sync.h"
-# include "os0thread.h"
-#else /* !UNIV_HOTBACKUP */
-# ifdef __WIN__
-/* Add includes for the _stat() call to compile on Windows */
-# include <sys/types.h>
-# include <sys/stat.h>
-# include <errno.h>
-# endif /* __WIN__ */
-#endif /* !UNIV_HOTBACKUP */
+#include "os0event.h"
+#include "os0thread.h"
-#if defined(LINUX_NATIVE_AIO)
+#include <vector>
+
+#ifdef LINUX_NATIVE_AIO
#include <libaio.h>
-#endif
+#endif /* LINUX_NATIVE_AIO */
+
+#ifdef HAVE_FALLOC_PUNCH_HOLE_AND_KEEP_SIZE
+# include <fcntl.h>
+# include <linux/falloc.h>
+#endif /* HAVE_FALLOC_PUNCH_HOLE_AND_KEEP_SIZE */
#if defined(UNIV_LINUX) && defined(HAVE_SYS_IOCTL_H)
# include <sys/ioctl.h>
@@ -83,17 +78,8 @@ Created 10/21/1995 Heikki Tuuri
#include <linux/falloc.h>
#endif
-#ifdef HAVE_FALLOC_PUNCH_HOLE_AND_KEEP_SIZE
-# include <fcntl.h>
-# include <linux/falloc.h>
-#endif /* HAVE_FALLOC_PUNCH_HOLE_AND_KEEP_SIZE */
-
-#ifdef HAVE_LZO
-#include "lzo/lzo1x.h"
-#endif
-
-#ifdef HAVE_SNAPPY
-#include "snappy-c.h"
+#ifdef _WIN32
+#include <winioctl.h>
#endif
/** Insert buffer segment id */
@@ -102,27 +88,31 @@ static const ulint IO_IBUF_SEGMENT = 0;
/** Log segment id */
static const ulint IO_LOG_SEGMENT = 1;
+/** Number of retries for partial I/O's */
+static const ulint NUM_RETRIES_ON_PARTIAL_IO = 10;
+
/* This specifies the file permissions InnoDB uses when it creates files in
Unix; the value of os_innodb_umask is initialized in ha_innodb.cc to
my_umask */
-#ifndef __WIN__
+#ifndef _WIN32
/** Umask for creating files */
-UNIV_INTERN ulint os_innodb_umask = S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP;
+static ulint os_innodb_umask = S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP;
#else
/** Umask for creating files */
-UNIV_INTERN ulint os_innodb_umask = 0;
-#define ECANCELED 125
-#endif /* __WIN__ */
+static ulint os_innodb_umask = 0;
+static HANDLE data_completion_port;
+static HANDLE log_completion_port;
+
+static DWORD fls_sync_io = FLS_OUT_OF_INDEXES;
+#define IOCP_SHUTDOWN_KEY (ULONG_PTR)-1
+#endif /* _WIN32 */
-#ifndef UNIV_HOTBACKUP
-/* We use these mutexes to protect lseek + file i/o operation, if the
-OS does not provide an atomic pread or pwrite, or similar */
-#define OS_FILE_N_SEEK_MUTEXES 16
-UNIV_INTERN os_ib_mutex_t os_file_seek_mutexes[OS_FILE_N_SEEK_MUTEXES];
+/** In simulated aio, merge at most this many consecutive i/os */
+static const ulint OS_AIO_MERGE_N_CONSECUTIVE = 64;
-/* In simulated aio, merge at most this many consecutive i/os */
-#define OS_AIO_MERGE_N_CONSECUTIVE 64
+/** Flag indicating if the page_cleaner is in active state. */
+extern bool buf_page_cleaner_is_active;
#ifdef WITH_INNODB_DISALLOW_WRITES
#define WAIT_ALLOW_WRITES() os_event_wait(srv_allow_writes_event)
@@ -135,25 +125,25 @@ UNIV_INTERN os_ib_mutex_t os_file_seek_mutexes[OS_FILE_N_SEEK_MUTEXES];
InnoDB AIO Implementation:
=========================
-We support native AIO for windows and linux. For rest of the platforms
-we simulate AIO by special io-threads servicing the IO-requests.
+We support native AIO for Windows and Linux. For rest of the platforms
+we simulate AIO by special IO-threads servicing the IO-requests.
Simulated AIO:
==============
-In platforms where we 'simulate' AIO following is a rough explanation
+On platforms where we 'simulate' AIO, the following is a rough explanation
of the high level design.
There are four io-threads (for ibuf, log, read, write).
All synchronous IO requests are serviced by the calling thread using
os_file_write/os_file_read. The Asynchronous requests are queued up
in an array (there are four such arrays) by the calling thread.
-Later these requests are picked up by the io-thread and are serviced
+Later these requests are picked up by the IO-thread and are serviced
synchronously.
Windows native AIO:
==================
-If srv_use_native_aio is not set then windows follow the same
+If srv_use_native_aio is not set then Windows follow the same
code as simulated AIO. If the flag is set then native AIO interface
is used. On windows, one of the limitation is that if a file is opened
for AIO no synchronous IO can be done on it. Therefore we have an
@@ -172,7 +162,7 @@ Linux native AIO:
=================
If we have libaio installed on the system and innodb_use_native_aio
-is set to TRUE we follow the code path of native AIO, otherwise we
+is set to true we follow the code path of native AIO, otherwise we
do simulated AIO.
There are innodb_file_io_threads helper threads. These threads work
on the four arrays mentioned above in Simulated AIO.
@@ -184,704 +174,972 @@ the completed IO request and calls completion routine on it.
**********************************************************************/
-/** Flag: enable debug printout for asynchronous i/o */
-UNIV_INTERN ibool os_aio_print_debug = FALSE;
#ifdef UNIV_PFS_IO
/* Keys to register InnoDB I/O with performance schema */
-UNIV_INTERN mysql_pfs_key_t innodb_file_data_key;
-UNIV_INTERN mysql_pfs_key_t innodb_file_log_key;
-UNIV_INTERN mysql_pfs_key_t innodb_file_temp_key;
+mysql_pfs_key_t innodb_data_file_key;
+mysql_pfs_key_t innodb_log_file_key;
+mysql_pfs_key_t innodb_temp_file_key;
#endif /* UNIV_PFS_IO */
-/** The asynchronous i/o array slot structure */
-struct os_aio_slot_t{
- ibool is_read; /*!< TRUE if a read operation */
- ulint pos; /*!< index of the slot in the aio
- array */
- ibool reserved; /*!< TRUE if this slot is reserved */
- time_t reservation_time;/*!< time when reserved */
- ulint len; /*!< length of the block to read or
- write */
- byte* buf; /*!< buffer used in i/o */
- ulint type; /*!< OS_FILE_READ or OS_FILE_WRITE */
- ulint is_log; /*!< 1 if OS_FILE_LOG or 0 */
- ulint page_size; /*!< UNIV_PAGE_SIZE or zip_size */
-
- os_offset_t offset; /*!< file offset in bytes */
- pfs_os_file_t file; /*!< file where to read or write */
- const char* name; /*!< file name or path */
- ibool io_already_done;/*!< used only in simulated aio:
- TRUE if the physical i/o already
- made and only the slot message
- needs to be passed to the caller
- of os_aio_simulated_handle */
- fil_node_t* message1; /*!< message which is given by the */
- void* message2; /*!< the requester of an aio operation
- and which can be used to identify
- which pending aio operation was
- completed */
- ulint bitmap;
-
- ulint* write_size; /*!< Actual write size initialized
- after fist successfull trim
- operation for this page and if
- initialized we do not trim again if
- actual page size does not decrease. */
-
- ulint file_block_size;/*!< file block size */
+class AIO;
+
+/** The asynchronous I/O context */
+struct Slot {
#ifdef WIN_ASYNC_IO
- HANDLE handle; /*!< handle object we need in the
- OVERLAPPED struct */
- OVERLAPPED control; /*!< Windows control block for the
- aio request */
+ /** Windows control block for the aio request
+ must be at the very start of Slot, so we can
+ cast Slot* to OVERLAPPED*
+ */
+ OVERLAPPED control;
+#endif
+
+ /** index of the slot in the aio array */
+ uint16_t pos;
+
+ /** true if this slot is reserved */
+ bool is_reserved;
+
+ /** time when reserved */
+ time_t reservation_time;
+
+ /** buffer used in i/o */
+ byte* buf;
+
+ /** Buffer pointer used for actual IO. We advance this
+ when partial IO is required and not buf */
+ byte* ptr;
+
+ /** OS_FILE_READ or OS_FILE_WRITE */
+ IORequest type;
+
+ /** file offset in bytes */
+ os_offset_t offset;
+
+ /** file where to read or write */
+ pfs_os_file_t file;
+
+ /** file name or path */
+ const char* name;
+
+ /** used only in simulated aio: true if the physical i/o
+ already made and only the slot message needs to be passed
+ to the caller of os_aio_simulated_handle */
+ bool io_already_done;
+
+ /*!< file block size */
+ ulint file_block_size;
+
+ /** The file node for which the IO is requested. */
+ fil_node_t* m1;
+
+ /** the requester of an aio operation and which can be used
+ to identify which pending aio operation was completed */
+ void* m2;
+
+ /** AIO completion status */
+ dberr_t err;
+
+#ifdef WIN_ASYNC_IO
+
+ /** bytes written/read */
+ DWORD n_bytes;
+
+ /** length of the block to read or write */
+ DWORD len;
+
+ /** aio array containing this slot */
+ AIO *array;
#elif defined(LINUX_NATIVE_AIO)
- struct iocb control; /* Linux control block for aio */
- int n_bytes; /* bytes written/read. */
- int ret; /* AIO return code */
+ /** Linux control block for aio */
+ struct iocb control;
+
+ /** AIO return code */
+ int ret;
+
+ /** bytes written/read. */
+ ssize_t n_bytes;
+
+ /** length of the block to read or write */
+ ulint len;
+#else
+ /** length of the block to read or write */
+ ulint len;
+
+ /** bytes written/read. */
+ ulint n_bytes;
#endif /* WIN_ASYNC_IO */
+
+ /** Length of the block before it was compressed */
+ uint32 original_len;
+
};
/** The asynchronous i/o array structure */
-struct os_aio_array_t{
- os_ib_mutex_t mutex; /*!< the mutex protecting the aio array */
- os_event_t not_full;
- /*!< The event which is set to the
- signaled state when there is space in
- the aio outside the ibuf segment;
- os_event_set() and os_event_reset()
- are protected by os_aio_array_t::mutex */
- os_event_t is_empty;
- /*!< The event which is set to the
- signaled state when there are no
- pending i/os in this array;
- os_event_set() and os_event_reset()
- are protected by os_aio_array_t::mutex */
- ulint n_slots;/*!< Total number of slots in the aio
- array. This must be divisible by
- n_threads. */
- ulint n_segments;
- /*!< Number of segments in the aio
- array of pending aio requests. A
- thread can wait separately for any one
- of the segments. */
- ulint cur_seg;/*!< We reserve IO requests in round
- robin fashion to different segments.
- This points to the segment that is to
- be used to service next IO request. */
- ulint n_reserved;
- /*!< Number of reserved slots in the
- aio array outside the ibuf segment */
- os_aio_slot_t* slots; /*!< Pointer to the slots in the array */
-#ifdef __WIN__
- HANDLE* handles;
- /*!< Pointer to an array of OS native
- event handles where we copied the
- handles from slots, in the same
- order. This can be used in
- WaitForMultipleObjects; used only in
- Windows */
-#endif /* __WIN__ */
+class AIO {
+public:
+ /** Constructor
+ @param[in] id Latch ID
+ @param[in] n_slots Number of slots to configure
+ @param[in] segments Number of segments to configure */
+ AIO(latch_id_t id, ulint n_slots, ulint segments);
+
+ /** Destructor */
+ ~AIO();
+
+ /** Initialize the instance
+ @return DB_SUCCESS or error code */
+ dberr_t init();
+
+ /** Requests for a slot in the aio array. If no slot is available, waits
+ until not_full-event becomes signaled.
+
+ @param[in] type IO context
+ @param[in,out] m1 message to be passed along with the AIO
+ operation
+ @param[in,out] m2 message to be passed along with the AIO
+ operation
+ @param[in] file file handle
+ @param[in] name name of the file or path as a null-terminated
+ string
+ @param[in,out] buf buffer where to read or from which to write
+ @param[in] offset file offset, where to read from or start writing
+ @param[in] len length of the block to read or write
+ @return pointer to slot */
+ Slot* reserve_slot(
+ const IORequest& type,
+ fil_node_t* m1,
+ void* m2,
+ pfs_os_file_t file,
+ const char* name,
+ void* buf,
+ os_offset_t offset,
+ ulint len)
+ MY_ATTRIBUTE((warn_unused_result));
+
+ /** @return number of reserved slots */
+ ulint pending_io_count() const;
+
+ /** Returns a pointer to the nth slot in the aio array.
+ @param[in] index Index of the slot in the array
+ @return pointer to slot */
+ const Slot* at(ulint i) const
+ MY_ATTRIBUTE((warn_unused_result))
+ {
+ ut_a(i < m_slots.size());
+
+ return(&m_slots[i]);
+ }
+
+ /** Non const version */
+ Slot* at(ulint i)
+ MY_ATTRIBUTE((warn_unused_result))
+ {
+ ut_a(i < m_slots.size());
+
+ return(&m_slots[i]);
+ }
+
+ /** Frees a slot in the AIO array, assumes caller owns the mutex.
+ @param[in,out] slot Slot to release */
+ void release(Slot* slot);
+
+ /** Frees a slot in the AIO array, assumes caller doesn't own the mutex.
+ @param[in,out] slot Slot to release */
+ void release_with_mutex(Slot* slot);
+
+ /** Prints info about the aio array.
+ @param[in,out] file Where to print */
+ void print(FILE* file);
+
+ /** @return the number of slots per segment */
+ ulint slots_per_segment() const
+ MY_ATTRIBUTE((warn_unused_result))
+ {
+ return(m_slots.size() / m_n_segments);
+ }
+
+ /** @return accessor for n_segments */
+ ulint get_n_segments() const
+ MY_ATTRIBUTE((warn_unused_result))
+ {
+ return(m_n_segments);
+ }
+
+#ifdef UNIV_DEBUG
+ /** @return true if the thread owns the mutex */
+ bool is_mutex_owned() const
+ MY_ATTRIBUTE((warn_unused_result))
+ {
+ return(mutex_own(&m_mutex));
+ }
+#endif /* UNIV_DEBUG */
+
+ /** Acquire the mutex */
+ void acquire() const
+ {
+ mutex_enter(&m_mutex);
+ }
+
+ /** Release the mutex */
+ void release() const
+ {
+ mutex_exit(&m_mutex);
+ }
+
+ /** Write out the state to the file/stream
+ @param[in, out] file File to write to */
+ void to_file(FILE* file) const;
+
+#ifdef LINUX_NATIVE_AIO
+ /** Dispatch an AIO request to the kernel.
+ @param[in,out] slot an already reserved slot
+ @return true on success. */
+ bool linux_dispatch(Slot* slot)
+ MY_ATTRIBUTE((warn_unused_result));
+
+ /** Accessor for an AIO event
+ @param[in] index Index into the array
+ @return the event at the index */
+ io_event* io_events(ulint index)
+ MY_ATTRIBUTE((warn_unused_result))
+ {
+ ut_a(index < m_events.size());
+
+ return(&m_events[index]);
+ }
+
+ /** Accessor for the AIO context
+ @param[in] segment Segment for which to get the context
+ @return the AIO context for the segment */
+ io_context_t io_ctx(ulint segment)
+ MY_ATTRIBUTE((warn_unused_result))
+ {
+ ut_ad(segment < get_n_segments());
+
+ return(m_aio_ctx[segment]);
+ }
+
+ /** Creates an io_context_t for native linux AIO.
+ @param[in] max_events number of events
+ @param[out] io_ctx io_ctx to initialize.
+ @return true on success. */
+ static bool linux_create_io_ctx(unsigned max_events, io_context_t& io_ctx)
+ MY_ATTRIBUTE((warn_unused_result));
+
+ /** Checks if the system supports native linux aio. On some kernel
+ versions where native aio is supported it won't work on tmpfs. In such
+ cases we can't use native aio as it is not possible to mix simulated
+ and native aio.
+ @return true if supported, false otherwise. */
+ static bool is_linux_native_aio_supported()
+ MY_ATTRIBUTE((warn_unused_result));
+#endif /* LINUX_NATIVE_AIO */
+
+#ifdef WIN_ASYNC_IO
+ HANDLE m_completion_port;
+ /** Wake up all AIO threads in Windows native aio */
+ static void wake_at_shutdown() {
+ AIO *all_arrays[] = {s_reads, s_writes, s_log, s_ibuf };
+ for (size_t i = 0; i < array_elements(all_arrays); i++) {
+ AIO *a = all_arrays[i];
+ if (a) {
+ PostQueuedCompletionStatus(a->m_completion_port, 0,
+ IOCP_SHUTDOWN_KEY, 0);
+ }
+ }
+ }
+#endif /* WIN_ASYNC_IO */
+
+#ifdef _WIN32
+ /** This function can be called if one wants to post a batch of reads
+ and prefers an I/O - handler thread to handle them all at once later.You
+ must call os_aio_simulated_wake_handler_threads later to ensure the
+ threads are not left sleeping! */
+ static void simulated_put_read_threads_to_sleep();
+#endif /* _WIN32 */
+
+ /** Create an instance using new(std::nothrow)
+ @param[in] id Latch ID
+ @param[in] n_slots The number of AIO request slots
+ @param[in] segments The number of segments
+ @return a new AIO instance */
+ static AIO* create(
+ latch_id_t id,
+ ulint n_slots,
+ ulint segments)
+ MY_ATTRIBUTE((warn_unused_result));
+
+ /** Initializes the asynchronous io system. Creates one array each
+ for ibuf and log I/O. Also creates one array each for read and write
+ where each array is divided logically into n_readers and n_writers
+ respectively. The caller must create an i/o handler thread for each
+ segment in these arrays. This function also creates the sync array.
+ No I/O handler thread needs to be created for that
+ @param[in] n_per_seg maximum number of pending aio
+ operations allowed per segment
+ @param[in] n_readers number of reader threads
+ @param[in] n_writers number of writer threads
+ @param[in] n_slots_sync number of slots in the sync aio array
+ @return true if AIO sub-system was started successfully */
+ static bool start(
+ ulint n_per_seg,
+ ulint n_readers,
+ ulint n_writers,
+ ulint n_slots_sync)
+ MY_ATTRIBUTE((warn_unused_result));
+
+ /** Free the AIO arrays */
+ static void shutdown();
+
+ /** Print all the AIO segments
+ @param[in,out] file Where to print */
+ static void print_all(FILE* file);
+
+ /** Calculates local segment number and aio array from global
+ segment number.
+ @param[out] array AIO wait array
+ @param[in] segment global segment number
+ @return local segment number within the aio array */
+ static ulint get_array_and_local_segment(
+ AIO** array,
+ ulint segment)
+ MY_ATTRIBUTE((warn_unused_result));
+
+ /** Select the IO slot array
+ @param[in,out] type Type of IO, READ or WRITE
+ @param[in] read_only true if running in read-only mode
+ @param[in] mode IO mode
+ @return slot array or NULL if invalid mode specified */
+ static AIO* select_slot_array(
+ IORequest& type,
+ bool read_only,
+ ulint mode)
+ MY_ATTRIBUTE((warn_unused_result));
+
+ /** Calculates segment number for a slot.
+ @param[in] array AIO wait array
+ @param[in] slot slot in this array
+ @return segment number (which is the number used by, for example,
+ I/O handler threads) */
+ static ulint get_segment_no_from_slot(
+ const AIO* array,
+ const Slot* slot)
+ MY_ATTRIBUTE((warn_unused_result));
+
+ /** Wakes up a simulated AIO I/O-handler thread if it has something
+ to do.
+ @param[in] global_segment the number of the segment in the
+ AIO arrays */
+ static void wake_simulated_handler_thread(ulint global_segment);
+
+ /** Check if it is a read request
+ @param[in] aio The AIO instance to check
+ @return true if the AIO instance is for reading. */
+ static bool is_read(const AIO* aio)
+ MY_ATTRIBUTE((warn_unused_result))
+ {
+ return(s_reads == aio);
+ }
+
+ /** Wait on an event until no pending writes */
+ static void wait_until_no_pending_writes()
+ {
+ os_event_wait(AIO::s_writes->m_is_empty);
+ }
+
+ /** Print to file
+ @param[in] file File to write to */
+ static void print_to_file(FILE* file);
+
+ /** Check for pending IO. Gets the count and also validates the
+ data structures.
+ @return count of pending IO requests */
+ static ulint total_pending_io_count();
+
+private:
+ /** Initialise the slots
+ @return DB_SUCCESS or error code */
+ dberr_t init_slots()
+ MY_ATTRIBUTE((warn_unused_result));
+
+ /** Wakes up a simulated AIO I/O-handler thread if it has something
+ to do for a local segment in the AIO array.
+ @param[in] global_segment the number of the segment in the
+ AIO arrays
+ @param[in] segment the local segment in the AIO array */
+ void wake_simulated_handler_thread(ulint global_segment, ulint segment);
+
+ /** Prints pending IO requests per segment of an aio array.
+ We probably don't need per segment statistics but they can help us
+ during development phase to see if the IO requests are being
+ distributed as expected.
+ @param[in,out] file file where to print
+ @param[in] segments pending IO array */
+ void print_segment_info(
+ FILE* file,
+ const ulint* segments);
+
+#ifdef LINUX_NATIVE_AIO
+ /** Initialise the Linux native AIO data structures
+ @return DB_SUCCESS or error code */
+ dberr_t init_linux_native_aio()
+ MY_ATTRIBUTE((warn_unused_result));
+#endif /* LINUX_NATIVE_AIO */
+
+private:
+ typedef std::vector<Slot> Slots;
+
+ /** the mutex protecting the aio array */
+ mutable SysMutex m_mutex;
+
+ /** Pointer to the slots in the array.
+ Number of elements must be divisible by n_threads. */
+ Slots m_slots;
+
+ /** Number of segments in the aio array of pending aio requests.
+ A thread can wait separately for any one of the segments. */
+ ulint m_n_segments;
+
+ /** The event which is set to the signaled state when
+ there is space in the aio outside the ibuf segment;
+ os_event_set() and os_event_reset() are protected by AIO::m_mutex */
+ os_event_t m_not_full;
+
+ /** The event which is set to the signaled state when
+ there are no pending i/os in this array;
+ os_event_set() and os_event_reset() are protected by AIO::m_mutex */
+ os_event_t m_is_empty;
+
+ /** Number of reserved slots in the AIO array outside
+ the ibuf segment */
+ ulint m_n_reserved;
+
#if defined(LINUX_NATIVE_AIO)
- io_context_t* aio_ctx;
- /* completion queue for IO. There is
- one such queue per segment. Each thread
- will work on one ctx exclusively. */
- struct io_event* aio_events;
- /* The array to collect completed IOs.
- There is one such event for each
- possible pending IO. The size of the
- array is equal to n_slots. */
+ typedef std::vector<io_event> IOEvents;
+
+ /** completion queue for IO. There is one such queue per
+ segment. Each thread will work on one ctx exclusively. */
+ std::vector<io_context_t> m_aio_ctx;
+
+ /** The array to collect completed IOs. There is one such
+ event for each possible pending IO. The size of the array
+ is equal to m_slots.size(). */
+ IOEvents m_events;
#endif /* LINUX_NATIV_AIO */
+
+ /** The aio arrays for non-ibuf i/o and ibuf i/o, as well as
+ sync AIO. These are NULL when the module has not yet been
+ initialized. */
+
+ /** Insert buffer */
+ static AIO* s_ibuf;
+
+ /** Redo log */
+ static AIO* s_log;
+
+ /** Reads */
+ static AIO* s_reads;
+
+ /** Writes */
+ static AIO* s_writes;
+
+ /** Synchronous I/O */
+ static AIO* s_sync;
};
+/** Static declarations */
+AIO* AIO::s_reads;
+AIO* AIO::s_writes;
+AIO* AIO::s_ibuf;
+AIO* AIO::s_log;
+AIO* AIO::s_sync;
+
#if defined(LINUX_NATIVE_AIO)
/** timeout for each io_getevents() call = 500ms. */
-#define OS_AIO_REAP_TIMEOUT (500000000UL)
+static const ulint OS_AIO_REAP_TIMEOUT = 500000000UL;
/** time to sleep, in microseconds if io_setup() returns EAGAIN. */
-#define OS_AIO_IO_SETUP_RETRY_SLEEP (500000UL)
+static const ulint OS_AIO_IO_SETUP_RETRY_SLEEP = 500000UL;
/** number of attempts before giving up on io_setup(). */
-#define OS_AIO_IO_SETUP_RETRY_ATTEMPTS 5
-#endif
+static const int OS_AIO_IO_SETUP_RETRY_ATTEMPTS = 5;
+#endif /* LINUX_NATIVE_AIO */
-/** Array of events used in simulated aio. */
+/** Array of events used in simulated AIO */
static os_event_t* os_aio_segment_wait_events;
-/** The aio arrays for non-ibuf i/o and ibuf i/o, as well as sync aio. These
-are NULL when the module has not yet been initialized. @{ */
-static os_aio_array_t* os_aio_read_array = NULL; /*!< Reads */
-static os_aio_array_t* os_aio_write_array = NULL; /*!< Writes */
-static os_aio_array_t* os_aio_ibuf_array = NULL; /*!< Insert buffer */
-static os_aio_array_t* os_aio_log_array = NULL; /*!< Redo log */
-static os_aio_array_t* os_aio_sync_array = NULL; /*!< Synchronous I/O */
-/* @} */
-
/** Number of asynchronous I/O segments. Set by os_aio_init(). */
-static ulint os_aio_n_segments = ULINT_UNDEFINED;
+static ulint os_aio_n_segments = ULINT_UNDEFINED;
-/** If the following is TRUE, read i/o handler threads try to
+/** If the following is true, read i/o handler threads try to
wait until a batch of new read requests have been posted */
-static ibool os_aio_recommend_sleep_for_read_threads = FALSE;
-#endif /* !UNIV_HOTBACKUP */
-
-UNIV_INTERN ulint os_n_file_reads = 0;
-UNIV_INTERN ulint os_bytes_read_since_printout = 0;
-UNIV_INTERN ulint os_n_file_writes = 0;
-UNIV_INTERN ulint os_n_fsyncs = 0;
-UNIV_INTERN ulint os_n_file_reads_old = 0;
-UNIV_INTERN ulint os_n_file_writes_old = 0;
-UNIV_INTERN ulint os_n_fsyncs_old = 0;
-UNIV_INTERN time_t os_last_printout;
-
-UNIV_INTERN ibool os_has_said_disk_full = FALSE;
-
-#if defined(WIN_ASYNC_IO) || defined(LINUX_NATIVE_AIO)
-/** After first fallocate failure we will disable os_file_trim */
-static bool os_fallocate_failed;
-
-/**********************************************************************//**
-Directly manipulate the allocated disk space by deallocating for the file referred to
-by fd for the byte range starting at offset and continuing for len bytes.
-Within the specified range, partial file system blocks are zeroed, and whole
-file system blocks are removed from the file. After a successful call,
-subsequent reads from this range will return zeroes.
-@return true if success, false if error */
-static
-ibool
-os_file_trim(
-/*=========*/
- os_aio_slot_t* slot); /*!< in: slot structure */
-#endif /* WIN_ASYNC_IO || LINUX_NATIVE_AIO */
-
-/****************************************************************//**
-Does error handling when a file operation fails.
-@return TRUE if we should retry the operation */
-ibool
-os_file_handle_error_no_exit(
-/*=========================*/
- const char* name, /*!< in: name of a file or NULL */
- const char* operation, /*!< in: operation */
- ibool on_error_silent,/*!< in: if TRUE then don't print
- any message to the log. */
- const char* file, /*!< in: file name */
- const ulint line); /*!< in: line */
-
-/****************************************************************//**
-Tries to enable the atomic write feature, if available, for the specified file
-handle.
-@return TRUE if success */
-static __attribute__((warn_unused_result))
-ibool
-os_file_set_atomic_writes(
-/*======================*/
- const char* name /*!< in: name of the file */
- __attribute__((unused)),
- os_file_t file /*!< in: handle to the file */
- __attribute__((unused)))
-{
-#ifdef DFS_IOCTL_ATOMIC_WRITE_SET
- int atomic_option = 1;
-
- if (ioctl(file, DFS_IOCTL_ATOMIC_WRITE_SET, &atomic_option)) {
-
- fprintf(stderr, "InnoDB: Warning:Trying to enable atomic writes on "
- "file %s on non-supported platform!\n", name);
- os_file_handle_error_no_exit(name, "ioctl", FALSE, __FILE__, __LINE__);
- return(FALSE);
- }
-
- return(TRUE);
-#else
- fprintf(stderr, "InnoDB: Error: trying to enable atomic writes on "
- "file %s on non-supported platform!\n", name);
- return(FALSE);
-#endif
-}
+static bool os_aio_recommend_sleep_for_read_threads;
+ulint os_n_file_reads;
+static ulint os_bytes_read_since_printout;
+ulint os_n_file_writes;
+ulint os_n_fsyncs;
+static ulint os_n_file_reads_old;
+static ulint os_n_file_writes_old;
+static ulint os_n_fsyncs_old;
-#ifdef UNIV_DEBUG
-# ifndef UNIV_HOTBACKUP
-/**********************************************************************//**
-Validates the consistency the aio system some of the time.
-@return TRUE if ok or the check was skipped */
-UNIV_INTERN
-ibool
-os_aio_validate_skip(void)
-/*======================*/
-{
-/** Try os_aio_validate() every this many times */
-# define OS_AIO_VALIDATE_SKIP 13
+static time_t os_last_printout;
+bool os_has_said_disk_full;
- /** The os_aio_validate() call skip counter.
- Use a signed type because of the race condition below. */
- static int os_aio_validate_count = OS_AIO_VALIDATE_SKIP;
+/** Default Zip compression level */
+extern uint page_zip_level;
- /* There is a race condition below, but it does not matter,
- because this call is only for heuristic purposes. We want to
- reduce the call frequency of the costly os_aio_validate()
- check in debug builds. */
- if (--os_aio_validate_count > 0) {
- return(TRUE);
- }
+#if DATA_TRX_ID_LEN > 6
+#error "COMPRESSION_ALGORITHM will not fit"
+#endif /* DATA_TRX_ID_LEN */
- os_aio_validate_count = OS_AIO_VALIDATE_SKIP;
- return(os_aio_validate());
+/** Validates the consistency of the aio system.
+@return true if ok */
+static
+bool
+os_aio_validate();
+
+/** Handle errors for file operations.
+@param[in] name name of a file or NULL
+@param[in] operation operation
+@param[in] should_abort whether to abort on an unknown error
+@param[in] on_error_silent whether to suppress reports of non-fatal errors
+@return true if we should retry the operation */
+static MY_ATTRIBUTE((warn_unused_result))
+bool
+os_file_handle_error_cond_exit(
+ const char* name,
+ const char* operation,
+ bool should_abort,
+ bool on_error_silent);
+
+/** Does error handling when a file operation fails.
+@param[in] name name of a file or NULL
+@param[in] operation operation name that failed
+@return true if we should retry the operation */
+static
+bool
+os_file_handle_error(
+ const char* name,
+ const char* operation)
+{
+ /* Exit in case of unknown error */
+ return(os_file_handle_error_cond_exit(name, operation, true, false));
}
-# endif /* !UNIV_HOTBACKUP */
-#endif /* UNIV_DEBUG */
-#ifdef __WIN__
-/***********************************************************************//**
-Gets the operating system version. Currently works only on Windows.
-@return OS_WIN95, OS_WIN31, OS_WINNT, OS_WIN2000, OS_WINXP, OS_WINVISTA,
-OS_WIN7. */
-UNIV_INTERN
-ulint
-os_get_os_version(void)
-/*===================*/
+/** Does error handling when a file operation fails.
+@param[in] name name of a file or NULL
+@param[in] operation operation name that failed
+@param[in] on_error_silent if true then don't print any message to the log.
+@return true if we should retry the operation */
+static
+bool
+os_file_handle_error_no_exit(
+ const char* name,
+ const char* operation,
+ bool on_error_silent)
{
- OSVERSIONINFO os_info;
-
- os_info.dwOSVersionInfoSize = sizeof(OSVERSIONINFO);
-
- ut_a(GetVersionEx(&os_info));
-
- if (os_info.dwPlatformId == VER_PLATFORM_WIN32s) {
- return(OS_WIN31);
- } else if (os_info.dwPlatformId == VER_PLATFORM_WIN32_WINDOWS) {
- return(OS_WIN95);
- } else if (os_info.dwPlatformId == VER_PLATFORM_WIN32_NT) {
- switch (os_info.dwMajorVersion) {
- case 3:
- case 4:
- return(OS_WINNT);
- case 5:
- return (os_info.dwMinorVersion == 0)
- ? OS_WIN2000 : OS_WINXP;
- case 6:
- return (os_info.dwMinorVersion == 0)
- ? OS_WINVISTA : OS_WIN7;
- default:
- return(OS_WIN7);
- }
- } else {
- ut_error;
- return(0);
+ /* Don't exit in case of unknown error */
+ return(os_file_handle_error_cond_exit(
+ name, operation, false, on_error_silent));
+}
+
+/** Handle RENAME error.
+@param name old name of the file
+@param new_name new name of the file */
+static void os_file_handle_rename_error(const char* name, const char* new_name)
+{
+ if (os_file_get_last_error(true) != OS_FILE_DISK_FULL) {
+ ib::error() << "Cannot rename file '" << name << "' to '"
+ << new_name << "'";
+ } else if (!os_has_said_disk_full) {
+ os_has_said_disk_full = true;
+ /* Disk full error is reported irrespective of the
+ on_error_silent setting. */
+ ib::error() << "Full disk prevents renaming file '"
+ << name << "' to '" << new_name << "'";
}
}
-#endif /* __WIN__ */
-/***********************************************************************//**
-Retrieves the last error number if an error occurs in a file io function.
-The number should be retrieved before any other OS calls (because they may
-overwrite the error number). If the number is not known to this program,
-the OS error number + 100 is returned.
-@return error number, or OS error number + 100 */
+/** Does simulated AIO. This function should be called by an i/o-handler
+thread.
+
+@param[in] segment The number of the segment in the aio arrays to wait
+ for; segment 0 is the ibuf i/o thread, segment 1 the
+ log i/o thread, then follow the non-ibuf read threads,
+ and as the last are the non-ibuf write threads
+@param[out] m1 the messages passed with the AIO request; note that
+ also in the case where the AIO operation failed, these
+ output parameters are valid and can be used to restart
+ the operation, for example
+@param[out] m2 Callback argument
+@param[in] type IO context
+@return DB_SUCCESS or error code */
static
-ulint
-os_file_get_last_error_low(
-/*=======================*/
- bool report_all_errors, /*!< in: TRUE if we want an error
- message printed of all errors */
- bool on_error_silent) /*!< in: TRUE then don't print any
- diagnostic to the log */
+dberr_t
+os_aio_simulated_handler(
+ ulint global_segment,
+ fil_node_t** m1,
+ void** m2,
+ IORequest* type);
+
+#ifdef _WIN32
+static HANDLE win_get_syncio_event();
+
+/**
+ Wrapper around Windows DeviceIoControl() function.
+
+ Works synchronously, also in case for handle opened
+ for async access (i.e with FILE_FLAG_OVERLAPPED).
+
+ Accepts the same parameters as DeviceIoControl(),except
+ last parameter (OVERLAPPED).
+*/
+static
+BOOL
+os_win32_device_io_control(
+ HANDLE handle,
+ DWORD code,
+ LPVOID inbuf,
+ DWORD inbuf_size,
+ LPVOID outbuf,
+ DWORD outbuf_size,
+ LPDWORD bytes_returned
+)
{
-#ifdef __WIN__
+ OVERLAPPED overlapped = { 0 };
+ overlapped.hEvent = win_get_syncio_event();
+ BOOL result = DeviceIoControl(handle, code, inbuf, inbuf_size, outbuf,
+ outbuf_size, NULL, &overlapped);
- ulint err = (ulint) GetLastError();
- if (err == ERROR_SUCCESS) {
- return(0);
+ if (result || (GetLastError() == ERROR_IO_PENDING)) {
+ /* Wait for async io to complete */
+ result = GetOverlappedResult(handle, &overlapped, bytes_returned, TRUE);
}
- if (report_all_errors
- || (!on_error_silent
- && err != ERROR_DISK_FULL
- && err != ERROR_FILE_EXISTS)) {
+ return result;
+}
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Operating system error number %lu"
- " in a file operation.\n", (ulong) err);
+#endif
- if (err == ERROR_PATH_NOT_FOUND) {
- fprintf(stderr,
- "InnoDB: The error means the system"
- " cannot find the path specified.\n");
+/***********************************************************************//**
+Try to get number of bytes per sector from file system.
+@return file block size */
+UNIV_INTERN
+ulint
+os_file_get_block_size(
+/*===================*/
+ os_file_t file, /*!< in: handle to a file */
+ const char* name) /*!< in: file name */
+{
+ ulint fblock_size = 512;
- if (srv_is_being_started) {
- fprintf(stderr,
- "InnoDB: If you are installing InnoDB,"
- " remember that you must create\n"
- "InnoDB: directories yourself, InnoDB"
- " does not create them.\n");
- }
- } else if (err == ERROR_ACCESS_DENIED) {
- fprintf(stderr,
- "InnoDB: The error means mysqld does not have"
- " the access rights to\n"
- "InnoDB: the directory. It may also be"
- " you have created a subdirectory\n"
- "InnoDB: of the same name as a data file.\n");
- } else if (err == ERROR_SHARING_VIOLATION
- || err == ERROR_LOCK_VIOLATION) {
- fprintf(stderr,
- "InnoDB: The error means that another program"
- " is using InnoDB's files.\n"
- "InnoDB: This might be a backup or antivirus"
- " software or another instance\n"
- "InnoDB: of MySQL."
- " Please close it to get rid of this error.\n");
- } else if (err == ERROR_WORKING_SET_QUOTA
- || err == ERROR_NO_SYSTEM_RESOURCES) {
- fprintf(stderr,
- "InnoDB: The error means that there are no"
- " sufficient system resources or quota to"
- " complete the operation.\n");
- } else if (err == ERROR_OPERATION_ABORTED) {
- fprintf(stderr,
- "InnoDB: The error means that the I/O"
- " operation has been aborted\n"
- "InnoDB: because of either a thread exit"
- " or an application request.\n"
- "InnoDB: Retry attempt is made.\n");
- } else if (err == ECANCELED || err == ENOTTY) {
- if (strerror(err) != NULL) {
- fprintf(stderr,
- "InnoDB: Error number %d"
- " means '%s'.\n",
- err, strerror(err));
- }
+#if defined(UNIV_LINUX)
+ struct stat local_stat;
+ int err;
- if(srv_use_atomic_writes) {
- fprintf(stderr,
- "InnoDB: Error trying to enable atomic writes on "
- "non-supported destination!\n");
- }
- } else {
- fprintf(stderr,
- "InnoDB: Some operating system error numbers"
- " are described at\n"
- "InnoDB: "
- REFMAN
- "operating-system-error-codes.html\n");
- }
- fflush(stderr);
- }
+ err = fstat((int)file, &local_stat);
- if (err == ERROR_FILE_NOT_FOUND) {
- return(OS_FILE_NOT_FOUND);
- } else if (err == ERROR_DISK_FULL) {
- return(OS_FILE_DISK_FULL);
- } else if (err == ERROR_FILE_EXISTS) {
- return(OS_FILE_ALREADY_EXISTS);
- } else if (err == ERROR_SHARING_VIOLATION
- || err == ERROR_LOCK_VIOLATION) {
- return(OS_FILE_SHARING_VIOLATION);
- } else if (err == ERROR_WORKING_SET_QUOTA
- || err == ERROR_NO_SYSTEM_RESOURCES) {
- return(OS_FILE_INSUFFICIENT_RESOURCE);
- } else if (err == ERROR_OPERATION_ABORTED) {
- return(OS_FILE_OPERATION_ABORTED);
- } else if (err == ERROR_ACCESS_DENIED) {
- return(OS_FILE_ACCESS_VIOLATION);
- } else if (err == ERROR_BUFFER_OVERFLOW) {
- return(OS_FILE_NAME_TOO_LONG);
+ if (err != 0) {
+ os_file_handle_error_no_exit(name, "fstat()", FALSE);
} else {
- return(OS_FILE_ERROR_MAX + err);
- }
-#else
- int err = errno;
- if (err == 0) {
- return(0);
+ fblock_size = local_stat.st_blksize;
}
+#endif /* UNIV_LINUX */
+#ifdef _WIN32
- if (report_all_errors
- || (err != ENOSPC && err != EEXIST && !on_error_silent)) {
-
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Operating system error number %d"
- " in a file operation.\n", err);
+ fblock_size = 0;
- if (err == ENOENT) {
- fprintf(stderr,
- "InnoDB: The error means the system"
- " cannot find the path specified.\n");
+ // Open volume for this file, find out it "physical bytes per sector"
- if (srv_is_being_started) {
- fprintf(stderr,
- "InnoDB: If you are installing InnoDB,"
- " remember that you must create\n"
- "InnoDB: directories yourself, InnoDB"
- " does not create them.\n");
- }
- } else if (err == EACCES) {
- fprintf(stderr,
- "InnoDB: The error means mysqld does not have"
- " the access rights to\n"
- "InnoDB: the directory.\n");
- } else if (err == ECANCELED || err == ENOTTY) {
- if (strerror(err) != NULL) {
- fprintf(stderr,
- "InnoDB: Error number %d"
- " means '%s'.\n",
- err, strerror(err));
- }
+ HANDLE volume_handle = INVALID_HANDLE_VALUE;
+ char volume[MAX_PATH + 4]="\\\\.\\"; // Special prefix required for volume names.
+ if (!GetVolumePathName(name , volume + 4, MAX_PATH)) {
+ os_file_handle_error_no_exit(name,
+ "GetVolumePathName()", FALSE);
+ goto end;
+ }
- if(srv_use_atomic_writes) {
- fprintf(stderr,
- "InnoDB: Error trying to enable atomic writes on "
- "non-supported destination!\n");
- }
- } else {
- if (strerror(err) != NULL) {
- fprintf(stderr,
- "InnoDB: Error number %d"
- " means '%s'.\n",
- err, strerror(err));
- }
+ size_t len = strlen(volume);
+ if (volume[len - 1] == '\\') {
+ // Trim trailing backslash from volume name.
+ volume[len - 1] = 0;
+ }
+ volume_handle = CreateFile(volume, FILE_READ_ATTRIBUTES,
+ FILE_SHARE_READ | FILE_SHARE_WRITE | FILE_SHARE_DELETE,
+ 0, OPEN_EXISTING, 0, 0);
- fprintf(stderr,
- "InnoDB: Some operating system"
- " error numbers are described at\n"
- "InnoDB: "
- REFMAN
- "operating-system-error-codes.html\n");
+ if (volume_handle == INVALID_HANDLE_VALUE) {
+ if (GetLastError() != ERROR_ACCESS_DENIED) {
+ os_file_handle_error_no_exit(volume,
+ "CreateFile()", FALSE);
+ }
+ goto end;
+ }
+
+ DWORD tmp;
+ STORAGE_ACCESS_ALIGNMENT_DESCRIPTOR disk_alignment;
+
+ STORAGE_PROPERTY_QUERY storage_query;
+ memset(&storage_query, 0, sizeof(storage_query));
+ storage_query.PropertyId = StorageAccessAlignmentProperty;
+ storage_query.QueryType = PropertyStandardQuery;
+
+ BOOL result = os_win32_device_io_control(volume_handle,
+ IOCTL_STORAGE_QUERY_PROPERTY,
+ &storage_query,
+ sizeof(storage_query),
+ &disk_alignment,
+ sizeof(disk_alignment),
+ &tmp);
+
+ if (!result) {
+ DWORD err = GetLastError();
+ if (err != ERROR_INVALID_FUNCTION && err != ERROR_NOT_SUPPORTED) {
+ os_file_handle_error_no_exit(volume,
+ "DeviceIoControl(IOCTL_STORAGE_QUERY_PROPERTY)", FALSE);
}
+ goto end;
}
- fflush(stderr);
+ fblock_size = disk_alignment.BytesPerPhysicalSector;
- switch (err) {
- case ENOSPC:
- return(OS_FILE_DISK_FULL);
- case ENOENT:
- return(OS_FILE_NOT_FOUND);
- case EEXIST:
- return(OS_FILE_ALREADY_EXISTS);
- case ENAMETOOLONG:
- return(OS_FILE_NAME_TOO_LONG);
- case EXDEV:
- case ENOTDIR:
- case EISDIR:
- return(OS_FILE_PATH_ERROR);
- case ECANCELED:
- case ENOTTY:
- return(OS_FILE_OPERATION_NOT_SUPPORTED);
- case EAGAIN:
- if (srv_use_native_aio) {
- return(OS_FILE_AIO_RESOURCES_RESERVED);
- }
- break;
- case EINTR:
- if (srv_use_native_aio) {
- return(OS_FILE_AIO_INTERRUPTED);
+end:
+ if (volume_handle != INVALID_HANDLE_VALUE) {
+ CloseHandle(volume_handle);
+ }
+#endif /* _WIN32 */
+
+ /* Currently we support file block size up to 4Kb */
+ if (fblock_size > 4096 || fblock_size < 512) {
+ if (fblock_size < 512) {
+ fblock_size = 512;
+ } else {
+ fblock_size = 4096;
}
- break;
- case EACCES:
- return(OS_FILE_ACCESS_VIOLATION);
}
- return(OS_FILE_ERROR_MAX + err);
-#endif
-}
-/***********************************************************************//**
-Retrieves the last error number if an error occurs in a file io function.
-The number should be retrieved before any other OS calls (because they may
-overwrite the error number). If the number is not known to this program,
-the OS error number + 100 is returned.
-@return error number, or OS error number + 100 */
-UNIV_INTERN
-ulint
-os_file_get_last_error(
-/*===================*/
- bool report_all_errors) /*!< in: TRUE if we want an error
- message printed of all errors */
-{
- return(os_file_get_last_error_low(report_all_errors, false));
+ return fblock_size;
}
-/****************************************************************//**
-Does error handling when a file operation fails.
-Conditionally exits (calling exit(3)) based on should_exit value and the
-error type, if should_exit is TRUE then on_error_silent is ignored.
-@return TRUE if we should retry the operation */
+#ifdef WIN_ASYNC_IO
+/** This function is only used in Windows asynchronous i/o.
+Waits for an aio operation to complete. This function is used to wait the
+for completed requests. The aio array of pending requests is divided
+into segments. The thread specifies which segment or slot it wants to wait
+for. NOTE: this function will also take care of freeing the aio slot,
+therefore no other thread is allowed to do the freeing!
+@param[in] segment The number of the segment in the aio arrays to
+wait for; segment 0 is the ibuf I/O thread,
+segment 1 the log I/O thread, then follow the
+non-ibuf read threads, and as the last are the
+non-ibuf write threads; if this is
+ULINT_UNDEFINED, then it means that sync AIO
+is used, and this parameter is ignored
+@param[in] pos this parameter is used only in sync AIO:
+wait for the aio slot at this position
+@param[out] m1 the messages passed with the AIO request; note
+that also in the case where the AIO operation
+failed, these output parameters are valid and
+can be used to restart the operation,
+for example
+@param[out] m2 callback message
+@param[out] type OS_FILE_WRITE or ..._READ
+@return DB_SUCCESS or error code */
static
-ibool
-os_file_handle_error_cond_exit(
-/*===========================*/
- const char* name, /*!< in: name of a file or NULL */
- const char* operation, /*!< in: operation */
- ibool should_exit, /*!< in: call exit(3) if unknown error
- and this parameter is TRUE */
- ibool on_error_silent,/*!< in: if TRUE then don't print
- any message to the log iff it is
- an unknown non-fatal error */
- const char* file, /*!< in: file name */
- const ulint line) /*!< in: line */
-{
- ulint err;
-
- err = os_file_get_last_error_low(false, on_error_silent);
+dberr_t
+os_aio_windows_handler(
+ ulint segment,
+ ulint pos,
+ fil_node_t** m1,
+ void** m2,
+ IORequest* type);
+#endif /* WIN_ASYNC_IO */
- switch (err) {
- case OS_FILE_DISK_FULL:
- /* We only print a warning about disk full once */
+/** Generic AIO Handler methods. Currently handles IO post processing. */
+class AIOHandler {
+public:
+ /** Do any post processing after a read/write
+ @return DB_SUCCESS or error code. */
+ static dberr_t post_io_processing(Slot* slot);
+};
- if (os_has_said_disk_full) {
+/** Helper class for doing synchronous file IO. Currently, the objective
+is to hide the OS specific code, so that the higher level functions aren't
+peppered with #ifdef. Makes the code flow difficult to follow. */
+class SyncFileIO {
+public:
+ /** Constructor
+ @param[in] fh File handle
+ @param[in,out] buf Buffer to read/write
+ @param[in] n Number of bytes to read/write
+ @param[in] offset Offset where to read or write */
+ SyncFileIO(os_file_t fh, void* buf, ulint n, os_offset_t offset)
+ :
+ m_fh(fh),
+ m_buf(buf),
+ m_n(static_cast<ssize_t>(n)),
+ m_offset(offset)
+ {
+ ut_ad(m_n > 0);
+ }
- return(FALSE);
- }
+ /** Destructor */
+ ~SyncFileIO()
+ {
+ /* No op */
+ }
- /* Disk full error is reported irrespective of the
- on_error_silent setting. */
+ /** Do the read/write
+ @param[in] request The IO context and type
+ @return the number of bytes read/written or negative value on error */
+ ssize_t execute(const IORequest& request);
- if (name) {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Encountered a problem with"
- " file %s\n", name);
- }
+ /** Do the read/write
+ @param[in,out] slot The IO slot, it has the IO context
+ @return the number of bytes read/written or negative value on error */
+ static ssize_t execute(Slot* slot);
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Disk is full. Try to clean the disk"
- " to free space.\n");
+ /** Move the read/write offset up to where the partial IO succeeded.
+ @param[in] n_bytes The number of bytes to advance */
+ void advance(ssize_t n_bytes)
+ {
+ m_offset += n_bytes;
- fprintf(stderr,
- " InnoDB: at file %s and at line %ld\n", file, line);
+ ut_ad(m_n >= n_bytes);
- os_has_said_disk_full = TRUE;
+ m_n -= n_bytes;
- fflush(stderr);
- ut_error;
- return(FALSE);
+ m_buf = reinterpret_cast<uchar*>(m_buf) + n_bytes;
+ }
- case OS_FILE_AIO_RESOURCES_RESERVED:
- case OS_FILE_AIO_INTERRUPTED:
+private:
+ /** Open file handle */
+ os_file_t m_fh;
- return(TRUE);
+ /** Buffer to read/write */
+ void* m_buf;
- case OS_FILE_PATH_ERROR:
- case OS_FILE_ALREADY_EXISTS:
- case OS_FILE_ACCESS_VIOLATION:
+ /** Number of bytes to read/write */
+ ssize_t m_n;
- return(FALSE);
+ /** Offset from where to read/write */
+ os_offset_t m_offset;
+};
- case OS_FILE_SHARING_VIOLATION:
+/** Do any post processing after a read/write
+@return DB_SUCCESS or error code. */
+dberr_t
+AIOHandler::post_io_processing(Slot* slot)
+{
+ ut_ad(slot->is_reserved);
- os_thread_sleep(10000000); /* 10 sec */
- return(TRUE);
+ /* Total bytes read so far */
+ ulint n_bytes = (slot->ptr - slot->buf) + slot->n_bytes;
- case OS_FILE_OPERATION_ABORTED:
- case OS_FILE_INSUFFICIENT_RESOURCE:
+ return(n_bytes == slot->original_len ? DB_SUCCESS : DB_FAIL);
+}
- os_thread_sleep(100000); /* 100 ms */
- return(TRUE);
+/** Count the number of free slots
+@return number of reserved slots */
+ulint
+AIO::pending_io_count() const
+{
+ acquire();
- default:
+#ifdef UNIV_DEBUG
+ ut_a(m_n_segments > 0);
+ ut_a(!m_slots.empty());
- /* If it is an operation that can crash on error then it
- is better to ignore on_error_silent and print an error message
- to the log. */
+ ulint count = 0;
- if (should_exit || !on_error_silent) {
- fprintf(stderr,
- " InnoDB: Operation %s to file %s and at line %ld\n",
- operation, file, line);
- }
+ for (ulint i = 0; i < m_slots.size(); ++i) {
- if (should_exit || !on_error_silent) {
- ib_logf(IB_LOG_LEVEL_ERROR, "File %s: '%s' returned OS "
- "error " ULINTPF ".%s", name ? name : "(unknown)",
- operation, err, should_exit
- ? " Cannot continue operation" : "");
- }
+ const Slot& slot = m_slots[i];
- if (should_exit) {
- abort();
+ if (slot.is_reserved) {
+ ++count;
+ ut_a(slot.len > 0);
}
}
- return(FALSE);
+ ut_a(m_n_reserved == count);
+#endif /* UNIV_DEBUG */
+
+ ulint reserved = m_n_reserved;
+
+ release();
+
+ return(reserved);
}
-/****************************************************************//**
-Does error handling when a file operation fails.
-@return TRUE if we should retry the operation */
+#ifdef UNIV_DEBUG
+/** Validates the consistency the aio system some of the time.
+@return true if ok or the check was skipped */
static
-ibool
-os_file_handle_error(
-/*=================*/
- const char* name, /*!< in: name of a file or NULL */
- const char* operation, /*!< in: operation */
- const char* file, /*!< in: file name */
- const ulint line) /*!< in: line */
+bool
+os_aio_validate_skip()
{
- /* exit in case of unknown error */
- return(os_file_handle_error_cond_exit(name, operation, TRUE, FALSE, file, line));
-}
+/** Try os_aio_validate() every this many times */
+# define OS_AIO_VALIDATE_SKIP 13
-/****************************************************************//**
-Does error handling when a file operation fails.
-@return TRUE if we should retry the operation */
-ibool
-os_file_handle_error_no_exit(
-/*=========================*/
- const char* name, /*!< in: name of a file or NULL */
- const char* operation, /*!< in: operation */
- ibool on_error_silent,/*!< in: if TRUE then don't print
- any message to the log. */
- const char* file, /*!< in: file name */
- const ulint line) /*!< in: line */
-{
- /* don't exit in case of unknown error */
- return(os_file_handle_error_cond_exit(
- name, operation, FALSE, on_error_silent, file, line));
+ /** The os_aio_validate() call skip counter.
+ Use a signed type because of the race condition below. */
+ static int os_aio_validate_count = OS_AIO_VALIDATE_SKIP;
+
+ /* There is a race condition below, but it does not matter,
+ because this call is only for heuristic purposes. We want to
+ reduce the call frequency of the costly os_aio_validate()
+ check in debug builds. */
+ --os_aio_validate_count;
+
+ if (os_aio_validate_count > 0) {
+ return(true);
+ }
+
+ os_aio_validate_count = OS_AIO_VALIDATE_SKIP;
+ return(os_aio_validate());
}
+#endif /* UNIV_DEBUG */
#undef USE_FILE_LOCK
-#define USE_FILE_LOCK
-#if defined(UNIV_HOTBACKUP) || defined(__WIN__)
-/* InnoDB Hot Backup does not lock the data files.
- * On Windows, mandatory locking is used.
- */
-# undef USE_FILE_LOCK
+#ifndef _WIN32
+/* On Windows, mandatory locking is used */
+# define USE_FILE_LOCK
#endif
#ifdef USE_FILE_LOCK
-/****************************************************************//**
-Obtain an exclusive lock on a file.
-@return 0 on success */
+/** Obtain an exclusive lock on a file.
+@param[in] fd file descriptor
+@param[in] name file name
+@return 0 on success */
static
int
os_file_lock(
-/*=========*/
- int fd, /*!< in: file descriptor */
- const char* name) /*!< in: file name */
+ int fd,
+ const char* name)
{
struct flock lk;
- ut_ad(!srv_read_only_mode);
-
lk.l_type = F_WRLCK;
lk.l_whence = SEEK_SET;
lk.l_start = lk.l_len = 0;
if (fcntl(fd, F_SETLK, &lk) == -1) {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Unable to lock %s, error: %d", name, errno);
+ ib::error()
+ << "Unable to lock " << name
+ << " error: " << errno;
if (errno == EAGAIN || errno == EACCES) {
- ib_logf(IB_LOG_LEVEL_INFO,
- "Check that you do not already have "
- "another mysqld process using the "
- "same InnoDB data or log files.");
+
+ ib::info()
+ << "Check that you do not already have"
+ " another mysqld process using the"
+ " same InnoDB data or log files.";
}
return(-1);
@@ -891,26 +1149,108 @@ os_file_lock(
}
#endif /* USE_FILE_LOCK */
-#ifndef UNIV_HOTBACKUP
-/****************************************************************//**
-Creates the seek mutexes used in positioned reads and writes. */
-UNIV_INTERN
+/** Calculates local segment number and aio array from global segment number.
+@param[out] array aio wait array
+@param[in] segment global segment number
+@return local segment number within the aio array */
+ulint
+AIO::get_array_and_local_segment(
+ AIO** array,
+ ulint segment)
+{
+ ulint local_segment;
+ ulint n_extra_segs = (srv_read_only_mode) ? 0 : 2;
+
+ ut_a(segment < os_aio_n_segments);
+
+ if (!srv_read_only_mode && segment < n_extra_segs) {
+
+ /* We don't support ibuf/log IO during read only mode. */
+
+ if (segment == IO_IBUF_SEGMENT) {
+
+ *array = s_ibuf;
+
+ } else if (segment == IO_LOG_SEGMENT) {
+
+ *array = s_log;
+
+ } else {
+ *array = NULL;
+ }
+
+ local_segment = 0;
+
+ } else if (segment < s_reads->m_n_segments + n_extra_segs) {
+
+ *array = s_reads;
+ local_segment = segment - n_extra_segs;
+
+ } else {
+ *array = s_writes;
+
+ local_segment = segment
+ - (s_reads->m_n_segments + n_extra_segs);
+ }
+
+ return(local_segment);
+}
+
+/** Frees a slot in the aio array. Assumes caller owns the mutex.
+@param[in,out] slot Slot to release */
void
-os_io_init_simple(void)
-/*===================*/
+AIO::release(Slot* slot)
{
- for (ulint i = 0; i < OS_FILE_N_SEEK_MUTEXES; i++) {
- os_file_seek_mutexes[i] = os_mutex_create();
+ ut_ad(is_mutex_owned());
+
+ ut_ad(slot->is_reserved);
+
+ slot->is_reserved = false;
+
+ --m_n_reserved;
+
+ if (m_n_reserved == m_slots.size() - 1) {
+ os_event_set(m_not_full);
+ }
+
+ if (m_n_reserved == 0) {
+ os_event_set(m_is_empty);
+ }
+
+#if defined(LINUX_NATIVE_AIO)
+
+ if (srv_use_native_aio) {
+ memset(&slot->control, 0x0, sizeof(slot->control));
+ slot->ret = 0;
+ slot->n_bytes = 0;
+ } else {
+ /* These fields should not be used if we are not
+ using native AIO. */
+ ut_ad(slot->n_bytes == 0);
+ ut_ad(slot->ret == 0);
}
+
+#endif /* WIN_ASYNC_IO */
}
-/** Create a temporary file. This function is like tmpfile(3), but
+/** Frees a slot in the AIO array. Assumes caller doesn't own the mutex.
+@param[in,out] slot Slot to release */
+void
+AIO::release_with_mutex(Slot* slot)
+{
+ acquire();
+
+ release(slot);
+
+ release();
+}
+
+/** Creates a temporary file. This function is like tmpfile(3), but
the temporary file is created in the given parameter path. If the path
-is null then it will create the file in the mysql server configuration
+is NULL then it will create the file in the MySQL server configuration
parameter (--tmpdir).
@param[in] path location for creating temporary file
-@return temporary file handle, or NULL on error */
-UNIV_INTERN
+@@return temporary file handle, or NULL on error */
FILE*
os_file_create_tmpfile(
const char* path)
@@ -919,17 +1259,16 @@ os_file_create_tmpfile(
WAIT_ALLOW_WRITES();
int fd = innobase_mysql_tmpfile(path);
- ut_ad(!srv_read_only_mode);
-
if (fd >= 0) {
file = fdopen(fd, "w+b");
}
- if (!file) {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Error: unable to create temporary file;"
- " errno: %d\n", errno);
+ if (file == NULL) {
+
+ ib::error()
+ << "Unable to create temporary file; errno: "
+ << errno;
+
if (fd >= 0) {
close(fd);
}
@@ -937,988 +1276,1702 @@ os_file_create_tmpfile(
return(file);
}
-#endif /* !UNIV_HOTBACKUP */
-/***********************************************************************//**
-The os_file_opendir() function opens a directory stream corresponding to the
-directory named by the dirname argument. The directory stream is positioned
-at the first entry. In both Unix and Windows we automatically skip the '.'
-and '..' items at the start of the directory listing.
-@return directory stream, NULL if error */
-UNIV_INTERN
-os_file_dir_t
-os_file_opendir(
-/*============*/
- const char* dirname, /*!< in: directory name; it must not
- contain a trailing '\' or '/' */
- ibool error_is_fatal) /*!< in: TRUE if we should treat an
- error as a fatal error; if we try to
- open symlinks then we do not wish a
- fatal error if it happens not to be
- a directory */
+/** Rewind file to its start, read at most size - 1 bytes from it to str, and
+NUL-terminate str. All errors are silently ignored. This function is
+mostly meant to be used with temporary files.
+@param[in,out] file File to read from
+@param[in,out] str Buffer where to read
+@param[in] size Size of buffer */
+void
+os_file_read_string(
+ FILE* file,
+ char* str,
+ ulint size)
{
- os_file_dir_t dir;
-#ifdef __WIN__
- LPWIN32_FIND_DATA lpFindFileData;
- char path[OS_FILE_MAX_PATH + 3];
+ if (size != 0) {
+ rewind(file);
- ut_a(strlen(dirname) < OS_FILE_MAX_PATH);
+ size_t flen = fread(str, 1, size - 1, file);
- strcpy(path, dirname);
- strcpy(path + strlen(path), "\\*");
+ str[flen] = '\0';
+ }
+}
- /* Note that in Windows opening the 'directory stream' also retrieves
- the first entry in the directory. Since it is '.', that is no problem,
- as we will skip over the '.' and '..' entries anyway. */
+/** This function returns a new path name after replacing the basename
+in an old path with a new basename. The old_path is a full path
+name including the extension. The tablename is in the normal
+form "databasename/tablename". The new base name is found after
+the forward slash. Both input strings are null terminated.
- lpFindFileData = static_cast<LPWIN32_FIND_DATA>(
- ut_malloc(sizeof(WIN32_FIND_DATA)));
+This function allocates memory to be returned. It is the callers
+responsibility to free the return value after it is no longer needed.
- dir = FindFirstFile((LPCTSTR) path, lpFindFileData);
+@param[in] old_path Pathname
+@param[in] tablename Contains new base name
+@return own: new full pathname */
+char*
+os_file_make_new_pathname(
+ const char* old_path,
+ const char* tablename)
+{
+ ulint dir_len;
+ char* last_slash;
+ char* base_name;
+ char* new_path;
+ ulint new_path_len;
- ut_free(lpFindFileData);
+ /* Split the tablename into its database and table name components.
+ They are separated by a '/'. */
+ last_slash = strrchr((char*) tablename, '/');
+ base_name = last_slash ? last_slash + 1 : (char*) tablename;
- if (dir == INVALID_HANDLE_VALUE) {
+ /* Find the offset of the last slash. We will strip off the
+ old basename.ibd which starts after that slash. */
+ last_slash = strrchr((char*) old_path, OS_PATH_SEPARATOR);
+ dir_len = last_slash ? last_slash - old_path : strlen(old_path);
- if (error_is_fatal) {
- os_file_handle_error(dirname, "opendir", __FILE__, __LINE__);
- }
+ /* allocate a new path and move the old directory path to it. */
+ new_path_len = dir_len + strlen(base_name) + sizeof "/.ibd";
+ new_path = static_cast<char*>(ut_malloc_nokey(new_path_len));
+ memcpy(new_path, old_path, dir_len);
- return(NULL);
+ snprintf(new_path + dir_len, new_path_len - dir_len,
+ "%c%s.ibd", OS_PATH_SEPARATOR, base_name);
+
+ return(new_path);
+}
+
+/** This function reduces a null-terminated full remote path name into
+the path that is sent by MySQL for DATA DIRECTORY clause. It replaces
+the 'databasename/tablename.ibd' found at the end of the path with just
+'tablename'.
+
+Since the result is always smaller than the path sent in, no new memory
+is allocated. The caller should allocate memory for the path sent in.
+This function manipulates that path in place.
+
+If the path format is not as expected, just return. The result is used
+to inform a SHOW CREATE TABLE command.
+@param[in,out] data_dir_path Full path/data_dir_path */
+void
+os_file_make_data_dir_path(
+ char* data_dir_path)
+{
+ /* Replace the period before the extension with a null byte. */
+ char* ptr = strrchr((char*) data_dir_path, '.');
+
+ if (ptr == NULL) {
+ return;
}
- return(dir);
-#else
- dir = opendir(dirname);
+ ptr[0] = '\0';
- if (dir == NULL && error_is_fatal) {
- os_file_handle_error(dirname, "opendir", __FILE__, __LINE__);
+ /* The tablename starts after the last slash. */
+ ptr = strrchr((char*) data_dir_path, OS_PATH_SEPARATOR);
+
+ if (ptr == NULL) {
+ return;
}
- return(dir);
-#endif /* __WIN__ */
+ ptr[0] = '\0';
+
+ char* tablename = ptr + 1;
+
+ /* The databasename starts after the next to last slash. */
+ ptr = strrchr((char*) data_dir_path, OS_PATH_SEPARATOR);
+
+ if (ptr == NULL) {
+ return;
+ }
+
+ ulint tablename_len = ut_strlen(tablename);
+
+ ut_memmove(++ptr, tablename, tablename_len);
+
+ ptr[tablename_len] = '\0';
}
-/***********************************************************************//**
-Closes a directory stream.
-@return 0 if success, -1 if failure */
-UNIV_INTERN
-int
-os_file_closedir(
-/*=============*/
- os_file_dir_t dir) /*!< in: directory stream */
+/** Check if the path refers to the root of a drive using a pointer
+to the last directory separator that the caller has fixed.
+@param[in] path path name
+@param[in] path last directory separator in the path
+@return true if this path is a drive root, false if not */
+UNIV_INLINE
+bool
+os_file_is_root(
+ const char* path,
+ const char* last_slash)
{
-#ifdef __WIN__
- BOOL ret;
+ return(
+#ifdef _WIN32
+ (last_slash == path + 2 && path[1] == ':') ||
+#endif /* _WIN32 */
+ last_slash == path);
+}
- ret = FindClose(dir);
+/** Return the parent directory component of a null-terminated path.
+Return a new buffer containing the string up to, but not including,
+the final component of the path.
+The path returned will not contain a trailing separator.
+Do not return a root path, return NULL instead.
+The final component trimmed off may be a filename or a directory name.
+If the final component is the only component of the path, return NULL.
+It is the caller's responsibility to free the returned string after it
+is no longer needed.
+@param[in] path Path name
+@return own: parent directory of the path */
+static
+char*
+os_file_get_parent_dir(
+ const char* path)
+{
+ bool has_trailing_slash = false;
- if (!ret) {
- os_file_handle_error_no_exit(NULL, "closedir", FALSE, __FILE__, __LINE__);
+ /* Find the offset of the last slash */
+ const char* last_slash = strrchr(path, OS_PATH_SEPARATOR);
- return(-1);
+ if (!last_slash) {
+ /* No slash in the path, return NULL */
+ return(NULL);
}
- return(0);
-#else
- int ret;
+ /* Ok, there is a slash. Is there anything after it? */
+ if (static_cast<size_t>(last_slash - path + 1) == strlen(path)) {
+ has_trailing_slash = true;
+ }
- ret = closedir(dir);
+ /* Reduce repetative slashes. */
+ while (last_slash > path
+ && last_slash[-1] == OS_PATH_SEPARATOR) {
+ last_slash--;
+ }
- if (ret) {
- os_file_handle_error_no_exit(NULL, "closedir", FALSE, __FILE__, __LINE__);
+ /* Check for the root of a drive. */
+ if (os_file_is_root(path, last_slash)) {
+ return(NULL);
}
- return(ret);
-#endif /* __WIN__ */
+ /* If a trailing slash prevented the first strrchr() from trimming
+ the last component of the path, trim that component now. */
+ if (has_trailing_slash) {
+ /* Back up to the previous slash. */
+ last_slash--;
+ while (last_slash > path
+ && last_slash[0] != OS_PATH_SEPARATOR) {
+ last_slash--;
+ }
+
+ /* Reduce repetative slashes. */
+ while (last_slash > path
+ && last_slash[-1] == OS_PATH_SEPARATOR) {
+ last_slash--;
+ }
+ }
+
+ /* Check for the root of a drive. */
+ if (os_file_is_root(path, last_slash)) {
+ return(NULL);
+ }
+
+ if (last_slash - path < 0) {
+ /* Sanity check, it prevents gcc from trying to handle this case which
+ * results in warnings for some optimized builds */
+ return (NULL);
+ }
+
+ /* Non-trivial directory component */
+
+ return(mem_strdupl(path, last_slash - path));
}
+#ifdef UNIV_ENABLE_UNIT_TEST_GET_PARENT_DIR
-/***********************************************************************//**
-This function returns information of the next file in the directory. We jump
-over the '.' and '..' entries in the directory.
-@return 0 if ok, -1 if error, 1 if at the end of the directory */
-UNIV_INTERN
-int
-os_file_readdir_next_file(
-/*======================*/
- const char* dirname,/*!< in: directory name or path */
- os_file_dir_t dir, /*!< in: directory stream */
- os_file_stat_t* info) /*!< in/out: buffer where the info is returned */
+/* Test the function os_file_get_parent_dir. */
+void
+test_os_file_get_parent_dir(
+ const char* child_dir,
+ const char* expected_dir)
{
-#ifdef __WIN__
- LPWIN32_FIND_DATA lpFindFileData;
- BOOL ret;
+ char* child = mem_strdup(child_dir);
+ char* expected = expected_dir == NULL ? NULL
+ : mem_strdup(expected_dir);
+
+ /* os_file_get_parent_dir() assumes that separators are
+ converted to OS_PATH_SEPARATOR. */
+ os_normalize_path(child);
+ os_normalize_path(expected);
+
+ char* parent = os_file_get_parent_dir(child);
+
+ bool unexpected = (expected == NULL
+ ? (parent != NULL)
+ : (0 != strcmp(parent, expected)));
+ if (unexpected) {
+ ib::fatal() << "os_file_get_parent_dir('" << child
+ << "') returned '" << parent
+ << "', instead of '" << expected << "'.";
+ }
+ ut_free(parent);
+ ut_free(child);
+ ut_free(expected);
+}
- lpFindFileData = static_cast<LPWIN32_FIND_DATA>(
- ut_malloc(sizeof(WIN32_FIND_DATA)));
-next_file:
- ret = FindNextFile(dir, lpFindFileData);
+/* Test the function os_file_get_parent_dir. */
+void
+unit_test_os_file_get_parent_dir()
+{
+ test_os_file_get_parent_dir("/usr/lib/a", "/usr/lib");
+ test_os_file_get_parent_dir("/usr/", NULL);
+ test_os_file_get_parent_dir("//usr//", NULL);
+ test_os_file_get_parent_dir("usr", NULL);
+ test_os_file_get_parent_dir("usr//", NULL);
+ test_os_file_get_parent_dir("/", NULL);
+ test_os_file_get_parent_dir("//", NULL);
+ test_os_file_get_parent_dir(".", NULL);
+ test_os_file_get_parent_dir("..", NULL);
+# ifdef _WIN32
+ test_os_file_get_parent_dir("D:", NULL);
+ test_os_file_get_parent_dir("D:/", NULL);
+ test_os_file_get_parent_dir("D:\\", NULL);
+ test_os_file_get_parent_dir("D:/data", NULL);
+ test_os_file_get_parent_dir("D:/data/", NULL);
+ test_os_file_get_parent_dir("D:\\data\\", NULL);
+ test_os_file_get_parent_dir("D:///data/////", NULL);
+ test_os_file_get_parent_dir("D:\\\\\\data\\\\\\\\", NULL);
+ test_os_file_get_parent_dir("D:/data//a", "D:/data");
+ test_os_file_get_parent_dir("D:\\data\\\\a", "D:\\data");
+ test_os_file_get_parent_dir("D:///data//a///b/", "D:///data//a");
+ test_os_file_get_parent_dir("D:\\\\\\data\\\\a\\\\\\b\\", "D:\\\\\\data\\\\a");
+#endif /* _WIN32 */
+}
+#endif /* UNIV_ENABLE_UNIT_TEST_GET_PARENT_DIR */
- if (ret) {
- ut_a(strlen((char*) lpFindFileData->cFileName)
- < OS_FILE_MAX_PATH);
- if (strcmp((char*) lpFindFileData->cFileName, ".") == 0
- || strcmp((char*) lpFindFileData->cFileName, "..") == 0) {
+/** Creates all missing subdirectories along the given path.
+@param[in] path Path name
+@return DB_SUCCESS if OK, otherwise error code. */
+dberr_t
+os_file_create_subdirs_if_needed(
+ const char* path)
+{
+ if (srv_read_only_mode) {
+
+ ib::error()
+ << "read only mode set. Can't create "
+ << "subdirectories '" << path << "'";
- goto next_file;
- }
+ return(DB_READ_ONLY);
- strcpy(info->name, (char*) lpFindFileData->cFileName);
+ }
- info->size = (ib_int64_t)(lpFindFileData->nFileSizeLow)
- + (((ib_int64_t)(lpFindFileData->nFileSizeHigh))
- << 32);
+ char* subdir = os_file_get_parent_dir(path);
- if (lpFindFileData->dwFileAttributes
- & FILE_ATTRIBUTE_REPARSE_POINT) {
- /* TODO: test Windows symlinks */
- /* TODO: MySQL has apparently its own symlink
- implementation in Windows, dbname.sym can
- redirect a database directory:
- REFMAN "windows-symbolic-links.html" */
- info->type = OS_FILE_TYPE_LINK;
- } else if (lpFindFileData->dwFileAttributes
- & FILE_ATTRIBUTE_DIRECTORY) {
- info->type = OS_FILE_TYPE_DIR;
- } else {
- /* It is probably safest to assume that all other
- file types are normal. Better to check them rather
- than blindly skip them. */
+ if (subdir == NULL) {
+ /* subdir is root or cwd, nothing to do */
+ return(DB_SUCCESS);
+ }
- info->type = OS_FILE_TYPE_FILE;
+ /* Test if subdir exists */
+ os_file_type_t type;
+ bool subdir_exists;
+ bool success = os_file_status(subdir, &subdir_exists, &type);
+
+ if (success && !subdir_exists) {
+
+ /* Subdir does not exist, create it */
+ dberr_t err = os_file_create_subdirs_if_needed(subdir);
+
+ if (err != DB_SUCCESS) {
+
+ ut_free(subdir);
+
+ return(err);
}
+
+ success = os_file_create_directory(subdir, false);
}
- ut_free(lpFindFileData);
+ ut_free(subdir);
- if (ret) {
- return(0);
- } else if (GetLastError() == ERROR_NO_MORE_FILES) {
+ return(success ? DB_SUCCESS : DB_ERROR);
+}
- return(1);
+#ifndef _WIN32
+
+/** Do the read/write
+@param[in] request The IO context and type
+@return the number of bytes read/written or negative value on error */
+ssize_t
+SyncFileIO::execute(const IORequest& request)
+{
+ ssize_t n_bytes;
+
+ if (request.is_read()) {
+ n_bytes = pread(m_fh, m_buf, m_n, m_offset);
} else {
- os_file_handle_error_no_exit(NULL, "readdir_next_file", FALSE, __FILE__, __LINE__);
- return(-1);
+ ut_ad(request.is_write());
+ n_bytes = pwrite(m_fh, m_buf, m_n, m_offset);
}
-#else
- struct dirent* ent;
- char* full_path;
- int ret;
- struct stat statinfo;
-#ifdef HAVE_READDIR_R
- char dirent_buf[sizeof(struct dirent)
- + _POSIX_PATH_MAX + 100];
- /* In /mysys/my_lib.c, _POSIX_PATH_MAX + 1 is used as
- the max file name len; but in most standards, the
- length is NAME_MAX; we add 100 to be even safer */
-#endif
-next_file:
+ return(n_bytes);
+}
+/** Free storage space associated with a section of the file.
+@param[in] fh Open file handle
+@param[in] off Starting offset (SEEK_SET)
+@param[in] len Size of the hole
+@return DB_SUCCESS or error code */
+static
+dberr_t
+os_file_punch_hole_posix(
+ os_file_t fh,
+ os_offset_t off,
+ os_offset_t len)
+{
-#ifdef HAVE_READDIR_R
- ret = readdir_r(dir, (struct dirent*) dirent_buf, &ent);
+#ifdef HAVE_FALLOC_PUNCH_HOLE_AND_KEEP_SIZE
+ const int mode = FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE;
- if (ret != 0
-#ifdef UNIV_AIX
- /* On AIX, only if we got non-NULL 'ent' (result) value and
- a non-zero 'ret' (return) value, it indicates a failed
- readdir_r() call. An NULL 'ent' with an non-zero 'ret'
- would indicate the "end of the directory" is reached. */
- && ent != NULL
-#endif
- ) {
- fprintf(stderr,
- "InnoDB: cannot read directory %s, error %lu\n",
- dirname, (ulong) ret);
+ int ret = fallocate(fh, mode, off, len);
- return(-1);
+ if (ret == 0) {
+ return(DB_SUCCESS);
}
- if (ent == NULL) {
- /* End of directory */
+ if (errno == ENOTSUP) {
+ return(DB_IO_NO_PUNCH_HOLE);
+ }
- return(1);
+ ib::warn()
+ << "fallocate("
+ <<", FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, "
+ << off << ", " << len << ") returned errno: "
+ << errno;
+
+ return(DB_IO_ERROR);
+
+#elif defined(UNIV_SOLARIS)
+
+ // Use F_FREESP
+
+#endif /* HAVE_FALLOC_PUNCH_HOLE_AND_KEEP_SIZE */
+
+ return(DB_IO_NO_PUNCH_HOLE);
+}
+
+#if defined(LINUX_NATIVE_AIO)
+
+/** Linux native AIO handler */
+class LinuxAIOHandler {
+public:
+ /**
+ @param[in] global_segment The global segment*/
+ LinuxAIOHandler(ulint global_segment)
+ :
+ m_global_segment(global_segment)
+ {
+ /* Should never be doing Sync IO here. */
+ ut_a(m_global_segment != ULINT_UNDEFINED);
+
+ /* Find the array and the local segment. */
+
+ m_segment = AIO::get_array_and_local_segment(
+ &m_array, m_global_segment);
+
+ m_n_slots = m_array->slots_per_segment();
}
- ut_a(strlen(ent->d_name) < _POSIX_PATH_MAX + 100 - 1);
-#else
- ent = readdir(dir);
+ /** Destructor */
+ ~LinuxAIOHandler()
+ {
+ // No op
+ }
+
+ /**
+ Process a Linux AIO request
+ @param[out] m1 the messages passed with the
+ @param[out] m2 AIO request; note that in case the
+ AIO operation failed, these output
+ parameters are valid and can be used to
+ restart the operation.
+ @param[out] request IO context
+ @return DB_SUCCESS or error code */
+ dberr_t poll(fil_node_t** m1, void** m2, IORequest* request);
+
+private:
+ /** Resubmit an IO request that was only partially successful
+ @param[in,out] slot Request to resubmit
+ @return DB_SUCCESS or DB_FAIL if the IO resubmit request failed */
+ dberr_t resubmit(Slot* slot);
+
+ /** Check if the AIO succeeded
+ @param[in,out] slot The slot to check
+ @return DB_SUCCESS, DB_FAIL if the operation should be retried or
+ DB_IO_ERROR on all other errors */
+ dberr_t check_state(Slot* slot);
+
+ /** @return true if a shutdown was detected */
+ bool is_shutdown() const
+ {
+ return(srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS
+ && !buf_page_cleaner_is_active);
+ }
+
+ /** If no slot was found then the m_array->m_mutex will be released.
+ @param[out] n_pending The number of pending IOs
+ @return NULL or a slot that has completed IO */
+ Slot* find_completed_slot(ulint* n_pending);
+
+ /** This is called from within the IO-thread. If there are no completed
+ IO requests in the slot array, the thread calls this function to
+ collect more requests from the Linux kernel.
+ The IO-thread waits on io_getevents(), which is a blocking call, with
+ a timeout value. Unless the system is very heavy loaded, keeping the
+ IO-thread very busy, the io-thread will spend most of its time waiting
+ in this function.
+ The IO-thread also exits in this function. It checks server status at
+ each wakeup and that is why we use timed wait in io_getevents(). */
+ void collect();
+
+private:
+ /** Slot array */
+ AIO* m_array;
+
+ /** Number of slots inthe local segment */
+ ulint m_n_slots;
+
+ /** The local segment to check */
+ ulint m_segment;
+
+ /** The global segment */
+ ulint m_global_segment;
+};
- if (ent == NULL) {
+/** Resubmit an IO request that was only partially successful
+@param[in,out] slot Request to resubmit
+@return DB_SUCCESS or DB_FAIL if the IO resubmit request failed */
+dberr_t
+LinuxAIOHandler::resubmit(Slot* slot)
+{
+#ifdef UNIV_DEBUG
+ /* Bytes already read/written out */
+ ulint n_bytes = slot->ptr - slot->buf;
- return(1);
+ ut_ad(m_array->is_mutex_owned());
+
+ ut_ad(n_bytes < slot->original_len);
+ ut_ad(static_cast<ulint>(slot->n_bytes) < slot->original_len - n_bytes);
+ /* Partial read or write scenario */
+ ut_ad(slot->len >= static_cast<ulint>(slot->n_bytes));
+#endif /* UNIV_DEBUG */
+
+ slot->len -= slot->n_bytes;
+ slot->ptr += slot->n_bytes;
+ slot->offset += slot->n_bytes;
+
+ /* Resetting the bytes read/written */
+ slot->n_bytes = 0;
+ slot->io_already_done = false;
+
+ compile_time_assert(sizeof(off_t) >= sizeof(os_offset_t));
+
+ struct iocb* iocb = &slot->control;
+
+ if (slot->type.is_read()) {
+
+ io_prep_pread(
+ iocb,
+ slot->file,
+ slot->ptr,
+ slot->len,
+ slot->offset);
+ } else {
+
+ ut_a(slot->type.is_write());
+
+ io_prep_pwrite(
+ iocb,
+ slot->file,
+ slot->ptr,
+ slot->len,
+ slot->offset);
}
-#endif
- ut_a(strlen(ent->d_name) < OS_FILE_MAX_PATH);
- if (strcmp(ent->d_name, ".") == 0 || strcmp(ent->d_name, "..") == 0) {
+ iocb->data = slot;
- goto next_file;
+ ut_a(reinterpret_cast<size_t>(iocb->u.c.buf) % OS_FILE_LOG_BLOCK_SIZE
+ == 0);
+
+ /* Resubmit an I/O request */
+ int ret = io_submit(m_array->io_ctx(m_segment), 1, &iocb);
+ ut_a(ret != -EINVAL);
+
+ if (ret < 0) {
+ errno = -ret;
}
- strcpy(info->name, ent->d_name);
+ return(ret < 0 ? DB_IO_PARTIAL_FAILED : DB_SUCCESS);
+}
- full_path = static_cast<char*>(
- ut_malloc(strlen(dirname) + strlen(ent->d_name) + 10));
+/** Check if the AIO succeeded
+@param[in,out] slot The slot to check
+@return DB_SUCCESS, DB_FAIL if the operation should be retried or
+ DB_IO_ERROR on all other errors */
+dberr_t
+LinuxAIOHandler::check_state(Slot* slot)
+{
+ ut_ad(m_array->is_mutex_owned());
- sprintf(full_path, "%s/%s", dirname, ent->d_name);
+ /* Note that it may be that there is more then one completed
+ IO requests. We process them one at a time. We may have a case
+ here to improve the performance slightly by dealing with all
+ requests in one sweep. */
- ret = stat(full_path, &statinfo);
+ srv_set_io_thread_op_info(
+ m_global_segment, "processing completed aio requests");
- if (ret) {
+ ut_ad(slot->io_already_done);
- if (errno == ENOENT) {
- /* readdir() returned a file that does not exist,
- it must have been deleted in the meantime. Do what
- would have happened if the file was deleted before
- readdir() - ignore and go to the next entry.
- If this is the last entry then info->name will still
- contain the name of the deleted file when this
- function returns, but this is not an issue since the
- caller shouldn't be looking at info when end of
- directory is returned. */
+ dberr_t err = DB_SUCCESS;
- ut_free(full_path);
+ if (slot->ret == 0) {
- goto next_file;
- }
+ err = AIOHandler::post_io_processing(slot);
- os_file_handle_error_no_exit(full_path, "stat", FALSE, __FILE__, __LINE__);
+ } else {
+ errno = -slot->ret;
- ut_free(full_path);
+ /* os_file_handle_error does tell us if we should retry
+ this IO. As it stands now, we don't do this retry when
+ reaping requests from a different context than
+ the dispatcher. This non-retry logic is the same for
+ Windows and Linux native AIO.
+ We should probably look into this to transparently
+ re-submit the IO. */
+ os_file_handle_error(slot->name, "Linux aio");
- return(-1);
+ err = DB_IO_ERROR;
}
- info->size = (ib_int64_t) statinfo.st_size;
+ return(err);
+}
- if (S_ISDIR(statinfo.st_mode)) {
- info->type = OS_FILE_TYPE_DIR;
- } else if (S_ISLNK(statinfo.st_mode)) {
- info->type = OS_FILE_TYPE_LINK;
- } else if (S_ISREG(statinfo.st_mode)) {
- info->type = OS_FILE_TYPE_FILE;
- } else {
- info->type = OS_FILE_TYPE_UNKNOWN;
+/** If no slot was found then the m_array->m_mutex will be released.
+@param[out] n_pending The number of pending IOs
+@return NULL or a slot that has completed IO */
+Slot*
+LinuxAIOHandler::find_completed_slot(ulint* n_pending)
+{
+ ulint offset = m_n_slots * m_segment;
+
+ *n_pending = 0;
+
+ m_array->acquire();
+
+ Slot* slot = m_array->at(offset);
+
+ for (ulint i = 0; i < m_n_slots; ++i, ++slot) {
+
+ if (slot->is_reserved) {
+
+ ++*n_pending;
+
+ if (slot->io_already_done) {
+
+ /* Something for us to work on.
+ Note: We don't release the mutex. */
+ return(slot);
+ }
+ }
}
- ut_free(full_path);
+ m_array->release();
- return(0);
-#endif
+ return(NULL);
}
-/*****************************************************************//**
-This function attempts to create a directory named pathname. The new
-directory gets default permissions. On Unix the permissions are
-(0770 & ~umask). If the directory exists already, nothing is done and
-the call succeeds, unless the fail_if_exists arguments is true.
-If another error occurs, such as a permission error, this does not crash,
-but reports the error and returns FALSE.
-@return TRUE if call succeeds, FALSE on error */
-UNIV_INTERN
-ibool
-os_file_create_directory(
-/*=====================*/
- const char* pathname, /*!< in: directory name as
- null-terminated string */
- ibool fail_if_exists) /*!< in: if TRUE, pre-existing directory
- is treated as an error. */
+/** This function is only used in Linux native asynchronous i/o. This is
+called from within the io-thread. If there are no completed IO requests
+in the slot array, the thread calls this function to collect more
+requests from the kernel.
+The io-thread waits on io_getevents(), which is a blocking call, with
+a timeout value. Unless the system is very heavy loaded, keeping the
+io-thread very busy, the io-thread will spend most of its time waiting
+in this function.
+The io-thread also exits in this function. It checks server status at
+each wakeup and that is why we use timed wait in io_getevents(). */
+void
+LinuxAIOHandler::collect()
{
-#ifdef __WIN__
- BOOL rcode;
+ ut_ad(m_n_slots > 0);
+ ut_ad(m_array != NULL);
+ ut_ad(m_segment < m_array->get_n_segments());
- rcode = CreateDirectory((LPCTSTR) pathname, NULL);
- if (!(rcode != 0
- || (GetLastError() == ERROR_ALREADY_EXISTS
- && !fail_if_exists))) {
+ /* Which io_context_t we are going to use. */
+ io_context_t io_ctx = m_array->io_ctx(m_segment);
- os_file_handle_error_no_exit(
- pathname, "CreateDirectory", FALSE, __FILE__, __LINE__);
+ /* Starting point of the m_segment we will be working on. */
+ ulint start_pos = m_segment * m_n_slots;
+
+ /* End point. */
+ ulint end_pos = start_pos + m_n_slots;
+
+ for (;;) {
+ struct io_event* events;
+
+ /* Which part of event array we are going to work on. */
+ events = m_array->io_events(m_segment * m_n_slots);
+
+ /* Initialize the events. */
+ memset(events, 0, sizeof(*events) * m_n_slots);
+
+ /* The timeout value is arbitrary. We probably need
+ to experiment with it a little. */
+ struct timespec timeout;
+
+ timeout.tv_sec = 0;
+ timeout.tv_nsec = OS_AIO_REAP_TIMEOUT;
+
+ int ret;
+
+ ret = io_getevents(io_ctx, 1, m_n_slots, events, &timeout);
+ ut_a(ret != -EINVAL);
+ ut_ad(ret != -EFAULT);
+
+ for (int i = 0; i < ret; ++i) {
+
+ struct iocb* iocb;
+
+ iocb = reinterpret_cast<struct iocb*>(events[i].obj);
+ ut_a(iocb != NULL);
+
+ Slot* slot = reinterpret_cast<Slot*>(iocb->data);
+
+ /* Some sanity checks. */
+ ut_a(slot != NULL);
+ ut_a(slot->is_reserved);
+
+ /* We are not scribbling previous segment. */
+ ut_a(slot->pos >= start_pos);
- return(FALSE);
+ /* We have not overstepped to next segment. */
+ ut_a(slot->pos < end_pos);
+
+ /* Deallocate unused blocks from file system.
+ This is newer done to page 0 or to log files.*/
+ if (slot->offset > 0
+ && !slot->type.is_log()
+ && slot->type.is_write()
+ && slot->type.punch_hole()) {
+
+ slot->err = slot->type.punch_hole(
+ slot->file,
+ slot->offset, slot->len);
+ } else {
+ slot->err = DB_SUCCESS;
+ }
+
+ /* Mark this request as completed. The error handling
+ will be done in the calling function. */
+ m_array->acquire();
+
+ /* events[i].res2 should always be ZERO */
+ ut_ad(events[i].res2 == 0);
+ slot->io_already_done = true;
+
+ /*Even though events[i].res is an unsigned number
+ in libaio, it is used to return a negative value
+ (negated errno value) to indicate error and a positive
+ value to indicate number of bytes read or written. */
+
+ if (events[i].res > slot->len) {
+ /* failure */
+ slot->n_bytes = 0;
+ slot->ret = events[i].res;
+ } else {
+ /* success */
+ slot->n_bytes = events[i].res;
+ slot->ret = 0;
+ }
+ m_array->release();
+ }
+
+ if (srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS
+ || !buf_page_cleaner_is_active
+ || ret > 0) {
+
+ break;
+ }
+
+ /* This error handling is for any error in collecting the
+ IO requests. The errors, if any, for any particular IO
+ request are simply passed on to the calling routine. */
+
+ switch (ret) {
+ case -EAGAIN:
+ /* Not enough resources! Try again. */
+
+ case -EINTR:
+ /* Interrupted! The behaviour in case of an interrupt.
+ If we have some completed IOs available then the
+ return code will be the number of IOs. We get EINTR
+ only if there are no completed IOs and we have been
+ interrupted. */
+
+ case 0:
+ /* No pending request! Go back and check again. */
+
+ continue;
+ }
+
+ /* All other errors should cause a trap for now. */
+ ib::fatal()
+ << "Unexpected ret_code[" << ret
+ << "] from io_getevents()!";
+
+ break;
}
+}
- return(TRUE);
-#else
- int rcode;
- WAIT_ALLOW_WRITES();
+/** Process a Linux AIO request
+@param[out] m1 the messages passed with the
+@param[out] m2 AIO request; note that in case the
+ AIO operation failed, these output
+ parameters are valid and can be used to
+ restart the operation.
+@param[out] request IO context
+@return DB_SUCCESS or error code */
+dberr_t
+LinuxAIOHandler::poll(fil_node_t** m1, void** m2, IORequest* request)
+{
+ dberr_t err = DB_SUCCESS;
+ Slot* slot;
- rcode = mkdir(pathname, 0770);
+ /* Loop until we have found a completed request. */
+ for (;;) {
- if (!(rcode == 0 || (errno == EEXIST && !fail_if_exists))) {
- /* failure */
- os_file_handle_error_no_exit(pathname, "mkdir", FALSE, __FILE__, __LINE__);
+ ulint n_pending;
+
+ slot = find_completed_slot(&n_pending);
+
+ if (slot != NULL) {
+
+ ut_ad(m_array->is_mutex_owned());
+
+ err = check_state(slot);
+
+ /* DB_FAIL is not a hard error, we should retry */
+ if (err != DB_FAIL) {
+ break;
+ }
+
+ /* Partial IO, resubmit request for
+ remaining bytes to read/write */
+ err = resubmit(slot);
+
+ if (err != DB_SUCCESS) {
+ break;
+ }
+
+ m_array->release();
+
+ } else if (is_shutdown() && n_pending == 0) {
+
+ /* There is no completed request. If there is
+ no pending request at all, and the system is
+ being shut down, exit. */
+
+ *m1 = NULL;
+ *m2 = NULL;
+
+ return(DB_SUCCESS);
- return(FALSE);
+ } else {
+
+ /* Wait for some request. Note that we return
+ from wait if we have found a request. */
+
+ srv_set_io_thread_op_info(
+ m_global_segment,
+ "waiting for completed aio requests");
+
+ collect();
+ }
+ }
+
+ if (err == DB_IO_PARTIAL_FAILED) {
+ /* Aborting in case of submit failure */
+ ib::fatal()
+ << "Native Linux AIO interface. "
+ "io_submit() call failed when "
+ "resubmitting a partial I/O "
+ "request on the file " << slot->name
+ << ".";
}
- return (TRUE);
-#endif /* __WIN__ */
+ *m1 = slot->m1;
+ *m2 = slot->m2;
+
+ *request = slot->type;
+
+ m_array->release(slot);
+
+ m_array->release();
+
+ return(err);
}
-/****************************************************************//**
-NOTE! Use the corresponding macro os_file_create_simple(), not directly
-this function!
-A simple function to open or create a file.
-@return own: handle to the file, not defined if error, error number
-can be retrieved with os_file_get_last_error */
-UNIV_INTERN
-os_file_t
-os_file_create_simple_func(
-/*=======================*/
- const char* name, /*!< in: name of the file or path as a
- null-terminated string */
- ulint create_mode,/*!< in: create mode */
- ulint access_type,/*!< in: OS_FILE_READ_ONLY or
- OS_FILE_READ_WRITE */
- ibool* success)/*!< out: TRUE if succeed, FALSE if error */
+/** This function is only used in Linux native asynchronous i/o.
+Waits for an aio operation to complete. This function is used to wait for
+the completed requests. The aio array of pending requests is divided
+into segments. The thread specifies which segment or slot it wants to wait
+for. NOTE: this function will also take care of freeing the aio slot,
+therefore no other thread is allowed to do the freeing!
+
+@param[in] global_seg segment number in the aio array
+ to wait for; segment 0 is the ibuf
+ i/o thread, segment 1 is log i/o thread,
+ then follow the non-ibuf read threads,
+ and the last are the non-ibuf write
+ threads.
+@param[out] m1 the messages passed with the
+@param[out] m2 AIO request; note that in case the
+ AIO operation failed, these output
+ parameters are valid and can be used to
+ restart the operation.
+@param[out]xi request IO context
+@return DB_SUCCESS if the IO was successful */
+static
+dberr_t
+os_aio_linux_handler(
+ ulint global_segment,
+ fil_node_t** m1,
+ void** m2,
+ IORequest* request)
{
- os_file_t file;
- ibool retry;
+ return LinuxAIOHandler(global_segment).poll(m1, m2, request);
+}
- *success = FALSE;
-#ifdef __WIN__
- DWORD access;
- DWORD create_flag;
- DWORD attributes = 0;
+/** Dispatch an AIO request to the kernel.
+@param[in,out] slot an already reserved slot
+@return true on success. */
+bool
+AIO::linux_dispatch(Slot* slot)
+{
+ ut_a(slot->is_reserved);
+ ut_ad(slot->type.validate());
- ut_a(!(create_mode & OS_FILE_ON_ERROR_SILENT));
- ut_a(!(create_mode & OS_FILE_ON_ERROR_NO_EXIT));
+ /* Find out what we are going to work with.
+ The iocb struct is directly in the slot.
+ The io_context_t is one per segment. */
- if (create_mode == OS_FILE_OPEN) {
+ ulint io_ctx_index;
+ struct iocb* iocb = &slot->control;
- create_flag = OPEN_EXISTING;
+ io_ctx_index = (slot->pos * m_n_segments) / m_slots.size();
- } else if (srv_read_only_mode) {
+ ut_a(reinterpret_cast<size_t>(iocb->u.c.buf) % OS_FILE_LOG_BLOCK_SIZE
+ == 0);
- create_flag = OPEN_EXISTING;
+ int ret = io_submit(io_ctx(io_ctx_index), 1, &iocb);
+ ut_a(ret != -EINVAL);
- } else if (create_mode == OS_FILE_CREATE) {
+ /* io_submit() returns number of successfully queued requests
+ or -errno. */
- create_flag = CREATE_NEW;
+ if (ret != 1) {
+ errno = -ret;
+ }
- } else if (create_mode == OS_FILE_CREATE_PATH) {
+ return(ret == 1);
+}
+
+/** Creates an io_context_t for native linux AIO.
+@param[in] max_events number of events
+@param[out] io_ctx io_ctx to initialize.
+@return true on success. */
+bool
+AIO::linux_create_io_ctx(
+ unsigned max_events,
+ io_context_t& io_ctx)
+{
+ ssize_t n_retries = 0;
- ut_a(!srv_read_only_mode);
+ for (;;) {
- /* Create subdirs along the path if needed */
- *success = os_file_create_subdirs_if_needed(name);
+ memset(&io_ctx, 0x0, sizeof(io_ctx));
- if (!*success) {
+ /* Initialize the io_ctx. Tell it how many pending
+ IO requests this context will handle. */
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Unable to create subdirectories '%s'",
- name);
+ int ret = io_setup(max_events, &io_ctx);
+ ut_a(ret != -EINVAL);
- return((os_file_t) -1);
+ if (ret == 0) {
+ /* Success. Return now. */
+ return(true);
}
- create_flag = CREATE_NEW;
- create_mode = OS_FILE_CREATE;
+ /* If we hit EAGAIN we'll make a few attempts before failing. */
+
+ switch (ret) {
+ case -EAGAIN:
+ if (n_retries == 0) {
+ /* First time around. */
+ ib::warn()
+ << "io_setup() failed with EAGAIN."
+ " Will make "
+ << OS_AIO_IO_SETUP_RETRY_ATTEMPTS
+ << " attempts before giving up.";
+ }
- } else {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Unknown file create mode (%lu) for file '%s'",
- create_mode, name);
+ if (n_retries < OS_AIO_IO_SETUP_RETRY_ATTEMPTS) {
- return((os_file_t) -1);
- }
+ ++n_retries;
- if (access_type == OS_FILE_READ_ONLY) {
- access = GENERIC_READ;
- } else if (srv_read_only_mode) {
+ ib::warn()
+ << "io_setup() attempt "
+ << n_retries << ".";
- ib_logf(IB_LOG_LEVEL_INFO,
- "read only mode set. Unable to "
- "open file '%s' in RW mode, trying RO mode", name);
+ os_thread_sleep(OS_AIO_IO_SETUP_RETRY_SLEEP);
- access = GENERIC_READ;
+ continue;
+ }
- } else if (access_type == OS_FILE_READ_WRITE) {
- access = GENERIC_READ | GENERIC_WRITE;
- } else {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Unknown file access type (%lu) for file '%s'",
- access_type, name);
+ /* Have tried enough. Better call it a day. */
+ ib::error()
+ << "io_setup() failed with EAGAIN after "
+ << OS_AIO_IO_SETUP_RETRY_ATTEMPTS
+ << " attempts.";
+ break;
+
+ case -ENOSYS:
+ ib::error()
+ << "Linux Native AIO interface"
+ " is not supported on this platform. Please"
+ " check your OS documentation and install"
+ " appropriate binary of InnoDB.";
+
+ break;
- return((os_file_t) -1);
+ default:
+ ib::error()
+ << "Linux Native AIO setup"
+ << " returned following error["
+ << ret << "]";
+ break;
+ }
+
+ ib::info()
+ << "You can disable Linux Native AIO by"
+ " setting innodb_use_native_aio = 0 in my.cnf";
+
+ break;
}
- do {
- /* Use default security attributes and no template file. */
+ return(false);
+}
- file = CreateFile(
- (LPCTSTR) name, access,
- FILE_SHARE_READ | FILE_SHARE_DELETE, NULL,
- create_flag, attributes, NULL);
+/** Checks if the system supports native linux aio. On some kernel
+versions where native aio is supported it won't work on tmpfs. In such
+cases we can't use native aio as it is not possible to mix simulated
+and native aio.
+@return: true if supported, false otherwise. */
+bool
+AIO::is_linux_native_aio_supported()
+{
+ int fd;
+ io_context_t io_ctx;
+ char name[1000];
- if (file == INVALID_HANDLE_VALUE) {
+ if (!linux_create_io_ctx(1, io_ctx)) {
- *success = FALSE;
+ /* The platform does not support native aio. */
- retry = os_file_handle_error(
- name, create_mode == OS_FILE_OPEN ?
- "open" : "create", __FILE__, __LINE__);
+ return(false);
- } else {
- *success = TRUE;
- retry = false;
+ } else if (!srv_read_only_mode) {
+
+ /* Now check if tmpdir supports native aio ops. */
+ fd = innobase_mysql_tmpfile(NULL);
+
+ if (fd < 0) {
+ ib::warn()
+ << "Unable to create temp file to check"
+ " native AIO support.";
+
+ int ret = io_destroy(io_ctx);
+ ut_a(ret != -EINVAL);
+ ut_ad(ret != -EFAULT);
+
+ return(false);
}
+ } else {
- } while (retry);
+ os_normalize_path(srv_log_group_home_dir);
-#else /* __WIN__ */
- int create_flag;
- if (create_mode != OS_FILE_OPEN && create_mode != OS_FILE_OPEN_RAW)
- WAIT_ALLOW_WRITES();
+ ulint dirnamelen = strlen(srv_log_group_home_dir);
- ut_a(!(create_mode & OS_FILE_ON_ERROR_SILENT));
- ut_a(!(create_mode & OS_FILE_ON_ERROR_NO_EXIT));
+ ut_a(dirnamelen < (sizeof name) - 10 - sizeof "ib_logfile");
- if (create_mode == OS_FILE_OPEN) {
+ memcpy(name, srv_log_group_home_dir, dirnamelen);
- if (access_type == OS_FILE_READ_ONLY) {
- create_flag = O_RDONLY;
- } else if (srv_read_only_mode) {
- create_flag = O_RDONLY;
- } else {
- create_flag = O_RDWR;
+ /* Add a path separator if needed. */
+ if (dirnamelen && name[dirnamelen - 1] != OS_PATH_SEPARATOR) {
+
+ name[dirnamelen++] = OS_PATH_SEPARATOR;
}
- } else if (srv_read_only_mode) {
+ strcpy(name + dirnamelen, "ib_logfile0");
- create_flag = O_RDONLY;
+ fd = open(name, O_RDONLY | O_CLOEXEC);
- } else if (create_mode == OS_FILE_CREATE) {
+ if (fd == -1) {
- create_flag = O_RDWR | O_CREAT | O_EXCL;
+ ib::warn()
+ << "Unable to open"
+ << " \"" << name << "\" to check native"
+ << " AIO read support.";
- } else if (create_mode == OS_FILE_CREATE_PATH) {
+ int ret = io_destroy(io_ctx);
+ ut_a(ret != EINVAL);
+ ut_ad(ret != EFAULT);
- /* Create subdirs along the path if needed */
+ return(false);
+ }
+ }
- *success = os_file_create_subdirs_if_needed(name);
+ struct io_event io_event;
- if (!*success) {
+ memset(&io_event, 0x0, sizeof(io_event));
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Unable to create subdirectories '%s'",
- name);
+ byte* buf = static_cast<byte*>(ut_malloc_nokey(UNIV_PAGE_SIZE * 2));
+ byte* ptr = static_cast<byte*>(ut_align(buf, UNIV_PAGE_SIZE));
- return((os_file_t) -1);
- }
+ struct iocb iocb;
+
+ /* Suppress valgrind warning. */
+ memset(buf, 0x00, UNIV_PAGE_SIZE * 2);
+ memset(&iocb, 0x0, sizeof(iocb));
+
+ struct iocb* p_iocb = &iocb;
+
+ if (!srv_read_only_mode) {
+
+ io_prep_pwrite(p_iocb, fd, ptr, UNIV_PAGE_SIZE, 0);
- create_flag = O_RDWR | O_CREAT | O_EXCL;
- create_mode = OS_FILE_CREATE;
} else {
+ ut_a(UNIV_PAGE_SIZE >= 512);
+ io_prep_pread(p_iocb, fd, ptr, 512, 0);
+ }
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Unknown file create mode (%lu) for file '%s'",
- create_mode, name);
+ ut_a(reinterpret_cast<size_t>(p_iocb->u.c.buf) % OS_FILE_LOG_BLOCK_SIZE
+ == 0);
+ int err = io_submit(io_ctx, 1, &p_iocb);
+ ut_a(err != -EINVAL);
- return((os_file_t) -1);
+ if (err >= 1) {
+ /* Now collect the submitted IO request. */
+ err = io_getevents(io_ctx, 1, 1, &io_event, NULL);
+ ut_a(err != -EINVAL);
}
- do {
- file = ::open(name, create_flag | O_CLOEXEC, os_innodb_umask);
+ ut_free(buf);
+ close(fd);
- if (file == -1) {
- *success = FALSE;
+ switch (err) {
+ case 1:
+ {
+ int ret = io_destroy(io_ctx);
+ ut_a(ret != -EINVAL);
+ ut_ad(ret != -EFAULT);
- retry = os_file_handle_error(
- name,
- create_mode == OS_FILE_OPEN
- ? "open" : "create", __FILE__, __LINE__);
- } else {
- *success = TRUE;
- retry = false;
+ return(true);
}
- } while (retry);
+ case -EINVAL:
+ case -ENOSYS:
+ ib::error()
+ << "Linux Native AIO not supported. You can either"
+ " move "
+ << (srv_read_only_mode ? name : "tmpdir")
+ << " to a file system that supports native"
+ " AIO or you can set innodb_use_native_aio to"
+ " FALSE to avoid this message.";
-#ifdef USE_FILE_LOCK
- if (!srv_read_only_mode
- && *success
- && (access_type == OS_FILE_READ_WRITE)
- && os_file_lock(file, name)) {
+ /* fall through. */
+ default:
+ ib::error()
+ << "Linux Native AIO check on "
+ << (srv_read_only_mode ? name : "tmpdir")
+ << "returned error[" << -err << "]";
+ }
- *success = FALSE;
- close(file);
- file = -1;
+ int ret = io_destroy(io_ctx);
+ ut_a(ret != -EINVAL);
+ ut_ad(ret != -EFAULT);
+
+ return(false);
+}
+
+#endif /* LINUX_NATIVE_AIO */
+
+/** Retrieves the last error number if an error occurs in a file io function.
+The number should be retrieved before any other OS calls (because they may
+overwrite the error number). If the number is not known to this program,
+the OS error number + OS_FILE_ERROR_MAX is returned.
+@param[in] report_all_errors true if we want an error message
+ printed of all errors
+@param[in] on_error_silent true then don't print any diagnostic
+ to the log
+@return error number, or OS error number + OS_FILE_ERROR_MAX */
+static
+ulint
+os_file_get_last_error_low(
+ bool report_all_errors,
+ bool on_error_silent)
+{
+ int err = errno;
+
+ if (err == 0) {
+ return(0);
}
-#endif /* USE_FILE_LOCK */
-#endif /* __WIN__ */
+ if (report_all_errors
+ || (err != ENOSPC && err != EEXIST && !on_error_silent)) {
+
+ ib::error()
+ << "Operating system error number "
+ << err
+ << " in a file operation.";
- return(file);
+ if (err == ENOENT) {
+
+ ib::error()
+ << "The error means the system"
+ " cannot find the path specified.";
+
+ if (srv_is_being_started) {
+
+ ib::error()
+ << "If you are installing InnoDB,"
+ " remember that you must create"
+ " directories yourself, InnoDB"
+ " does not create them.";
+ }
+ } else if (err == EACCES) {
+
+ ib::error()
+ << "The error means mysqld does not have"
+ " the access rights to the directory.";
+
+ } else {
+ if (strerror(err) != NULL) {
+
+ ib::error()
+ << "Error number " << err << " means '"
+ << strerror(err) << "'";
+ }
+
+ ib::info() << OPERATING_SYSTEM_ERROR_MSG;
+ }
+ }
+
+ switch (err) {
+ case ENOSPC:
+ return(OS_FILE_DISK_FULL);
+ case ENOENT:
+ return(OS_FILE_NOT_FOUND);
+ case EEXIST:
+ return(OS_FILE_ALREADY_EXISTS);
+ case EXDEV:
+ case ENOTDIR:
+ case EISDIR:
+ return(OS_FILE_PATH_ERROR);
+ case EAGAIN:
+ if (srv_use_native_aio) {
+ return(OS_FILE_AIO_RESOURCES_RESERVED);
+ }
+ break;
+ case EINTR:
+ if (srv_use_native_aio) {
+ return(OS_FILE_AIO_INTERRUPTED);
+ }
+ break;
+ case EACCES:
+ return(OS_FILE_ACCESS_VIOLATION);
+ }
+ return(OS_FILE_ERROR_MAX + err);
}
-/****************************************************************//**
-NOTE! Use the corresponding macro
-os_file_create_simple_no_error_handling(), not directly this function!
-A simple function to open or create a file.
-@return own: handle to the file, not defined if error, error number
-can be retrieved with os_file_get_last_error */
-UNIV_INTERN
-pfs_os_file_t
-os_file_create_simple_no_error_handling_func(
-/*=========================================*/
- const char* name, /*!< in: name of the file or path as a
- null-terminated string */
- ulint create_mode,/*!< in: create mode */
- ulint access_type,/*!< in: OS_FILE_READ_ONLY,
- OS_FILE_READ_WRITE, or
- OS_FILE_READ_ALLOW_DELETE; the last option is
- used by a backup program reading the file */
- ibool* success,/*!< out: TRUE if succeed, FALSE if error */
- ulint atomic_writes) /*! in: atomic writes table option
- value */
+/** Wrapper to fsync(2) that retries the call on some errors.
+Returns the value 0 if successful; otherwise the value -1 is returned and
+the global variable errno is set to indicate the error.
+@param[in] file open file handle
+@return 0 if success, -1 otherwise */
+static
+int
+os_file_fsync_posix(
+ os_file_t file)
{
- pfs_os_file_t file;
- atomic_writes_t awrites = (atomic_writes_t) atomic_writes;
+ ulint failures = 0;
- *success = FALSE;
-#ifdef __WIN__
- DWORD access;
- DWORD create_flag;
- DWORD attributes = 0;
- DWORD share_mode = FILE_SHARE_READ | FILE_SHARE_DELETE;
- ut_a(name);
+ for (;;) {
- ut_a(!(create_mode & OS_FILE_ON_ERROR_SILENT));
- ut_a(!(create_mode & OS_FILE_ON_ERROR_NO_EXIT));
+ ++os_n_fsyncs;
- if (create_mode == OS_FILE_OPEN) {
- create_flag = OPEN_EXISTING;
- } else if (srv_read_only_mode) {
- create_flag = OPEN_EXISTING;
- } else if (create_mode == OS_FILE_CREATE) {
- create_flag = CREATE_NEW;
- } else {
+ int ret = fsync(file);
+
+ if (ret == 0) {
+ return(ret);
+ }
+
+ switch(errno) {
+ case ENOLCK:
+
+ ++failures;
+ ut_a(failures < 1000);
+
+ if (!(failures % 100)) {
+
+ ib::warn()
+ << "fsync(): "
+ << "No locks available; retrying";
+ }
+
+ /* 0.2 sec */
+ os_thread_sleep(200000);
+ break;
+
+ case EINTR:
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Unknown file create mode (%lu) for file '%s'",
- create_mode, name);
- file = INVALID_HANDLE_VALUE;
- return(file);
+ ++failures;
+ ut_a(failures < 2000);
+ break;
+
+ default:
+ ib::fatal() << "fsync() returned " << errno;
+ }
}
+}
- if (access_type == OS_FILE_READ_ONLY) {
- access = GENERIC_READ;
- } else if (srv_read_only_mode) {
- access = GENERIC_READ;
- } else if (access_type == OS_FILE_READ_WRITE) {
- access = GENERIC_READ | GENERIC_WRITE;
- } else if (access_type == OS_FILE_READ_ALLOW_DELETE) {
+/** Check the existence and type of the given file.
+@param[in] path path name of file
+@param[out] exists true if the file exists
+@param[out] type Type of the file, if it exists
+@return true if call succeeded */
+static
+bool
+os_file_status_posix(
+ const char* path,
+ bool* exists,
+ os_file_type_t* type)
+{
+ struct stat statinfo;
- ut_a(!srv_read_only_mode);
+ int ret = stat(path, &statinfo);
- access = GENERIC_READ;
+ *exists = !ret;
- /*!< A backup program has to give mysqld the maximum
- freedom to do what it likes with the file */
+ if (!ret) {
+ /* file exists, everything OK */
+
+ } else if (errno == ENOENT || errno == ENOTDIR || errno == ENAMETOOLONG) {
+ /* file does not exist */
+ return(true);
- share_mode |= FILE_SHARE_DELETE | FILE_SHARE_WRITE;
} else {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Unknown file access type (%lu) for file '%s'",
- access_type, name);
- file = INVALID_HANDLE_VALUE;
- return(file);
+ /* file exists, but stat call failed */
+ os_file_handle_error_no_exit(path, "stat", false);
+ return(false);
}
- file = CreateFile((LPCTSTR) name,
- access,
- share_mode,
- NULL, // Security attributes
- create_flag,
- attributes,
- NULL); // No template file
+ if (S_ISDIR(statinfo.st_mode)) {
+ *type = OS_FILE_TYPE_DIR;
- /* If we have proper file handle and atomic writes should be used,
- try to set atomic writes and if that fails when creating a new
- table, produce a error. If atomic writes are used on existing
- file, ignore error and use traditional writes for that file */
- if (file != INVALID_HANDLE_VALUE
- && (awrites == ATOMIC_WRITES_ON ||
- (srv_use_atomic_writes && awrites == ATOMIC_WRITES_DEFAULT))
- && !os_file_set_atomic_writes(name, file)) {
- if (create_mode == OS_FILE_CREATE) {
- fprintf(stderr, "InnoDB: Error: Can't create file using atomic writes\n");
- CloseHandle(file);
- os_file_delete_if_exists_func(name);
- *success = FALSE;
- file = INVALID_HANDLE_VALUE;
- }
+ } else if (S_ISLNK(statinfo.st_mode)) {
+ *type = OS_FILE_TYPE_LINK;
+
+ } else if (S_ISREG(statinfo.st_mode)) {
+ *type = OS_FILE_TYPE_FILE;
+ } else {
+ *type = OS_FILE_TYPE_UNKNOWN;
+ }
+
+ return(true);
+}
+
+/** NOTE! Use the corresponding macro os_file_flush(), not directly this
+function!
+Flushes the write buffers of a given file to the disk.
+@param[in] file handle to a file
+@return true if success */
+bool
+os_file_flush_func(
+ os_file_t file)
+{
+ int ret;
+
+ WAIT_ALLOW_WRITES();
+ ret = os_file_fsync_posix(file);
+
+ if (ret == 0) {
+ return(true);
}
- *success = file != INVALID_HANDLE_VALUE;
-#else /* __WIN__ */
+ /* Since Linux returns EINVAL if the 'file' is actually a raw device,
+ we choose to ignore that error if we are using raw disks */
+
+ if (srv_start_raw_disk_in_use && errno == EINVAL) {
+
+ return(true);
+ }
+
+ ib::error() << "The OS said file flush did not succeed";
+
+ os_file_handle_error(NULL, "flush");
+
+ /* It is a fatal error if a file flush does not succeed, because then
+ the database can get corrupt on disk */
+ ut_error;
+
+ return(false);
+}
+
+/** NOTE! Use the corresponding macro os_file_create_simple(), not directly
+this function!
+A simple function to open or create a file.
+@param[in] name name of the file or path as a null-terminated
+ string
+@param[in] create_mode create mode
+@param[in] access_type OS_FILE_READ_ONLY or OS_FILE_READ_WRITE
+@param[in] read_only if true, read only checks are enforced
+@param[out] success true if succeed, false if error
+@return handle to the file, not defined if error, error number
+ can be retrieved with os_file_get_last_error */
+pfs_os_file_t
+os_file_create_simple_func(
+ const char* name,
+ ulint create_mode,
+ ulint access_type,
+ bool read_only,
+ bool* success)
+{
+ pfs_os_file_t file;
+
+ *success = false;
+
int create_flag;
const char* mode_str = NULL;
- ut_a(name);
- if (create_mode != OS_FILE_OPEN && create_mode != OS_FILE_OPEN_RAW)
+
+ if (create_mode != OS_FILE_OPEN && create_mode != OS_FILE_OPEN_RAW) {
WAIT_ALLOW_WRITES();
+ }
ut_a(!(create_mode & OS_FILE_ON_ERROR_SILENT));
ut_a(!(create_mode & OS_FILE_ON_ERROR_NO_EXIT));
if (create_mode == OS_FILE_OPEN) {
-
mode_str = "OPEN";
if (access_type == OS_FILE_READ_ONLY) {
create_flag = O_RDONLY;
- } else if (srv_read_only_mode) {
+ } else if (read_only) {
create_flag = O_RDONLY;
} else {
-
- ut_a(access_type == OS_FILE_READ_WRITE
- || access_type == OS_FILE_READ_ALLOW_DELETE);
-
create_flag = O_RDWR;
}
- } else if (srv_read_only_mode) {
+ } else if (read_only) {
mode_str = "OPEN";
-
create_flag = O_RDONLY;
} else if (create_mode == OS_FILE_CREATE) {
mode_str = "CREATE";
-
create_flag = O_RDWR | O_CREAT | O_EXCL;
+ } else if (create_mode == OS_FILE_CREATE_PATH) {
+
+ mode_str = "CREATE PATH";
+ /* Create subdirs along the path if needed. */
+
+ *success = os_file_create_subdirs_if_needed(name);
+
+ if (!*success) {
+
+ ib::error()
+ << "Unable to create subdirectories '"
+ << name << "'";
+
+ return(OS_FILE_CLOSED);
+ }
+
+ create_flag = O_RDWR | O_CREAT | O_EXCL;
+ create_mode = OS_FILE_CREATE;
} else {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Unknown file create mode (%lu) for file '%s'",
- create_mode, name);
- file = -1;
- return(file);
+
+ ib::error()
+ << "Unknown file create mode ("
+ << create_mode
+ << " for file '" << name << "'";
+
+ return(OS_FILE_CLOSED);
}
- file = ::open(name, create_flag | O_CLOEXEC, os_innodb_umask);
+ bool retry;
- *success = file != -1;
+ do {
+ file = open(name, create_flag | O_CLOEXEC, os_innodb_umask);
+
+ if (file == -1) {
+ *success = false;
+ retry = os_file_handle_error(
+ name,
+ create_mode == OS_FILE_OPEN
+ ? "open" : "create");
+ } else {
+ *success = true;
+ retry = false;
+ }
+
+ } while (retry);
/* This function is always called for data files, we should disable
OS caching (O_DIRECT) here as we do in os_file_create_func(), so
we open the same file in the same mode, see man page of open(2). */
if (!srv_read_only_mode
&& *success
- && (srv_unix_file_flush_method == SRV_UNIX_O_DIRECT
- || srv_unix_file_flush_method == SRV_UNIX_O_DIRECT_NO_FSYNC)) {
+ && (srv_file_flush_method == SRV_O_DIRECT
+ || srv_file_flush_method == SRV_O_DIRECT_NO_FSYNC)) {
- os_file_set_nocache(file, name, mode_str);
+ os_file_set_nocache(file, name, mode_str);
}
#ifdef USE_FILE_LOCK
- if (!srv_read_only_mode
+ if (!read_only
&& *success
- && access_type == OS_FILE_READ_WRITE
+ && (access_type == OS_FILE_READ_WRITE)
&& os_file_lock(file, name)) {
- *success = FALSE;
+ *success = false;
close(file);
file = -1;
-
}
#endif /* USE_FILE_LOCK */
- /* If we have proper file handle and atomic writes should be used,
- try to set atomic writes and if that fails when creating a new
- table, produce a error. If atomic writes are used on existing
- file, ignore error and use traditional writes for that file */
- if (file != -1
- && (awrites == ATOMIC_WRITES_ON ||
- (srv_use_atomic_writes && awrites == ATOMIC_WRITES_DEFAULT))
- && !os_file_set_atomic_writes(name, file)) {
- if (create_mode == OS_FILE_CREATE) {
- fprintf(stderr, "InnoDB: Error: Can't create file using atomic writes\n");
- close(file);
- os_file_delete_if_exists_func(name);
- *success = FALSE;
- file = -1;
- }
- }
-
-
-#endif /* __WIN__ */
-
return(file);
}
-/****************************************************************//**
-Tries to disable OS caching on an opened file descriptor. */
-UNIV_INTERN
-void
-os_file_set_nocache(
-/*================*/
- os_file_t fd /*!< in: file descriptor to alter */
- __attribute__((unused)),
- const char* file_name /*!< in: used in the diagnostic
- message */
- MY_ATTRIBUTE((unused)),
- const char* operation_name MY_ATTRIBUTE((unused)))
- /*!< in: "open" or "create"; used
- in the diagnostic message */
+/** This function attempts to create a directory named pathname. The new
+directory gets default permissions. On Unix the permissions are
+(0770 & ~umask). If the directory exists already, nothing is done and
+the call succeeds, unless the fail_if_exists arguments is true.
+If another error occurs, such as a permission error, this does not crash,
+but reports the error and returns false.
+@param[in] pathname directory name as null-terminated string
+@param[in] fail_if_exists if true, pre-existing directory is treated as
+ an error.
+@return true if call succeeds, false on error */
+bool
+os_file_create_directory(
+ const char* pathname,
+ bool fail_if_exists)
{
- /* some versions of Solaris may not have DIRECTIO_ON */
-#if defined(UNIV_SOLARIS) && defined(DIRECTIO_ON)
- if (directio(fd, DIRECTIO_ON) == -1) {
- int errno_save = errno;
+ int rcode;
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Failed to set DIRECTIO_ON on file %s: %s: %s, "
- "continuing anyway.",
- file_name, operation_name, strerror(errno_save));
- }
-#elif defined(O_DIRECT)
- if (fcntl(fd, F_SETFL, O_DIRECT) == -1) {
- int errno_save = errno;
- static bool warning_message_printed = false;
- if (errno_save == EINVAL) {
- if (!warning_message_printed) {
- warning_message_printed = true;
-# ifdef UNIV_LINUX
- ib_logf(IB_LOG_LEVEL_WARN,
- "Failed to set O_DIRECT on file "
- "%s: %s: %s, continuing anyway. "
- "O_DIRECT is known to result "
- "in 'Invalid argument' on Linux on "
- "tmpfs, see MySQL Bug#26662.",
- file_name, operation_name,
- strerror(errno_save));
-# else /* UNIV_LINUX */
- goto short_warning;
-# endif /* UNIV_LINUX */
- }
- } else {
-# ifndef UNIV_LINUX
-short_warning:
-# endif
- ib_logf(IB_LOG_LEVEL_WARN,
- "Failed to set O_DIRECT on file %s: %s: %s, "
- "continuing anyway.",
- file_name, operation_name, strerror(errno_save));
- }
- }
-#endif /* defined(UNIV_SOLARIS) && defined(DIRECTIO_ON) */
-}
+ WAIT_ALLOW_WRITES();
+ rcode = mkdir(pathname, 0770);
-/****************************************************************//**
-NOTE! Use the corresponding macro os_file_create(), not directly
-this function!
-Opens an existing file or creates a new.
-@return own: handle to the file, not defined if error, error number
-can be retrieved with os_file_get_last_error */
-UNIV_INTERN
-pfs_os_file_t
-os_file_create_func(
-/*================*/
- const char* name, /*!< in: name of the file or path as a
- null-terminated string */
- ulint create_mode,/*!< in: create mode */
- ulint purpose,/*!< in: OS_FILE_AIO, if asynchronous,
- non-buffered i/o is desired,
- OS_FILE_NORMAL, if any normal file;
- NOTE that it also depends on type, os_aio_..
- and srv_.. variables whether we really use
- async i/o or unbuffered i/o: look in the
- function source code for the exact rules */
- ulint type, /*!< in: OS_DATA_FILE or OS_LOG_FILE */
- ibool* success,/*!< out: TRUE if succeed, FALSE if error */
- ulint atomic_writes) /*! in: atomic writes table option
- value */
-{
- pfs_os_file_t file;
- ibool retry;
- ibool on_error_no_exit;
- ibool on_error_silent;
- atomic_writes_t awrites = (atomic_writes_t) atomic_writes;
+ if (!(rcode == 0 || (errno == EEXIST && !fail_if_exists))) {
+ /* failure */
+ os_file_handle_error_no_exit(pathname, "mkdir", false);
-#ifdef __WIN__
- DBUG_EXECUTE_IF(
- "ib_create_table_fail_disk_full",
- *success = FALSE;
- SetLastError(ERROR_DISK_FULL);
- file = INVALID_HANDLE_VALUE;
- return(file);
- );
-#else /* __WIN__ */
- DBUG_EXECUTE_IF(
- "ib_create_table_fail_disk_full",
- *success = FALSE;
- errno = ENOSPC;
- file = -1;
- return(file);
- );
-#endif /* __WIN__ */
+ return(false);
+ }
-#ifdef __WIN__
- DWORD create_flag;
- DWORD share_mode = FILE_SHARE_READ | FILE_SHARE_DELETE;
+ return(true);
+}
- on_error_no_exit = create_mode & OS_FILE_ON_ERROR_NO_EXIT
- ? TRUE : FALSE;
+/**
+The os_file_opendir() function opens a directory stream corresponding to the
+directory named by the dirname argument. The directory stream is positioned
+at the first entry. In both Unix and Windows we automatically skip the '.'
+and '..' items at the start of the directory listing.
+@param[in] dirname directory name; it must not contain a trailing
+ '\' or '/'
+@param[in] is_fatal true if we should treat an error as a fatal
+ error; if we try to open symlinks then we do
+ not wish a fatal error if it happens not to be
+ a directory
+@return directory stream, NULL if error */
+os_file_dir_t
+os_file_opendir(
+ const char* dirname,
+ bool error_is_fatal)
+{
+ os_file_dir_t dir;
+ dir = opendir(dirname);
- on_error_silent = create_mode & OS_FILE_ON_ERROR_SILENT
- ? TRUE : FALSE;
+ if (dir == NULL && error_is_fatal) {
+ os_file_handle_error(dirname, "opendir");
+ }
- create_mode &= ~OS_FILE_ON_ERROR_NO_EXIT;
- create_mode &= ~OS_FILE_ON_ERROR_SILENT;
+ return(dir);
+}
- if (create_mode == OS_FILE_OPEN_RAW) {
+/** Closes a directory stream.
+@param[in] dir directory stream
+@return 0 if success, -1 if failure */
+int
+os_file_closedir(
+ os_file_dir_t dir)
+{
+ int ret = closedir(dir);
- ut_a(!srv_read_only_mode);
+ if (ret != 0) {
+ os_file_handle_error_no_exit(NULL, "closedir", false);
+ }
- create_flag = OPEN_EXISTING;
+ return(ret);
+}
- /* On Windows Physical devices require admin privileges and
- have to have the write-share mode set. See the remarks
- section for the CreateFile() function documentation in MSDN. */
+/** This function returns information of the next file in the directory. We jump
+over the '.' and '..' entries in the directory.
+@param[in] dirname directory name or path
+@param[in] dir directory stream
+@param[out] info buffer where the info is returned
+@return 0 if ok, -1 if error, 1 if at the end of the directory */
+int
+os_file_readdir_next_file(
+ const char* dirname,
+ os_file_dir_t dir,
+ os_file_stat_t* info)
+{
+ struct dirent* ent;
+ char* full_path;
+ int ret;
+ struct stat statinfo;
- share_mode |= FILE_SHARE_WRITE;
+next_file:
- } else if (create_mode == OS_FILE_OPEN
- || create_mode == OS_FILE_OPEN_RETRY) {
+ ent = readdir(dir);
- create_flag = OPEN_EXISTING;
+ if (ent == NULL) {
- } else if (srv_read_only_mode) {
+ return(1);
+ }
- create_flag = OPEN_EXISTING;
+ ut_a(strlen(ent->d_name) < OS_FILE_MAX_PATH);
- } else if (create_mode == OS_FILE_CREATE) {
+ if (strcmp(ent->d_name, ".") == 0 || strcmp(ent->d_name, "..") == 0) {
- create_flag = CREATE_NEW;
+ goto next_file;
+ }
- } else if (create_mode == OS_FILE_OVERWRITE) {
+ strcpy(info->name, ent->d_name);
- create_flag = CREATE_ALWAYS;
+ full_path = static_cast<char*>(
+ ut_malloc_nokey(strlen(dirname) + strlen(ent->d_name) + 10));
- } else {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Unknown file create mode (%lu) for file '%s'",
- create_mode, name);
+ sprintf(full_path, "%s/%s", dirname, ent->d_name);
- file = INVALID_HANDLE_VALUE;
- return(file);
- }
+ ret = stat(full_path, &statinfo);
- DWORD attributes = 0;
+ if (ret) {
-#ifdef UNIV_HOTBACKUP
- attributes |= FILE_FLAG_NO_BUFFERING;
-#else
- if (purpose == OS_FILE_AIO) {
+ if (errno == ENOENT) {
+ /* readdir() returned a file that does not exist,
+ it must have been deleted in the meantime. Do what
+ would have happened if the file was deleted before
+ readdir() - ignore and go to the next entry.
+ If this is the last entry then info->name will still
+ contain the name of the deleted file when this
+ function returns, but this is not an issue since the
+ caller shouldn't be looking at info when end of
+ directory is returned. */
-#ifdef WIN_ASYNC_IO
- /* If specified, use asynchronous (overlapped) io and no
- buffering of writes in the OS */
+ ut_free(full_path);
- if (srv_use_native_aio) {
- attributes |= FILE_FLAG_OVERLAPPED;
+ goto next_file;
}
-#endif /* WIN_ASYNC_IO */
-
- } else if (purpose == OS_FILE_NORMAL) {
- /* Use default setting. */
- } else {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Unknown purpose flag (%lu) while opening file '%s'",
- purpose, name);
- file = INVALID_HANDLE_VALUE;
- return(file);
- }
-
-#ifdef UNIV_NON_BUFFERED_IO
- // TODO: Create a bug, this looks wrong. The flush log
- // parameter is dynamic.
- if (type == OS_LOG_FILE && srv_flush_log_at_trx_commit == 2) {
- /* Do not use unbuffered i/o for the log files because
- value 2 denotes that we do not flush the log at every
- commit, but only once per second */
+ os_file_handle_error_no_exit(full_path, "stat", false);
- } else if (srv_win_file_flush_method == SRV_WIN_IO_UNBUFFERED) {
+ ut_free(full_path);
- attributes |= FILE_FLAG_NO_BUFFERING;
+ return(-1);
}
-#endif /* UNIV_NON_BUFFERED_IO */
-#endif /* UNIV_HOTBACKUP */
- DWORD access = GENERIC_READ;
+ info->size = statinfo.st_size;
- if (!srv_read_only_mode) {
- access |= GENERIC_WRITE;
+ if (S_ISDIR(statinfo.st_mode)) {
+ info->type = OS_FILE_TYPE_DIR;
+ } else if (S_ISLNK(statinfo.st_mode)) {
+ info->type = OS_FILE_TYPE_LINK;
+ } else if (S_ISREG(statinfo.st_mode)) {
+ info->type = OS_FILE_TYPE_FILE;
+ } else {
+ info->type = OS_FILE_TYPE_UNKNOWN;
}
- do {
- /* Use default security attributes and no template file. */
- file = CreateFile(
- (LPCTSTR) name, access, share_mode, NULL,
- create_flag, attributes, NULL);
-
- if (file == INVALID_HANDLE_VALUE) {
- const char* operation;
+ ut_free(full_path);
- operation = (create_mode == OS_FILE_CREATE
- && !srv_read_only_mode)
- ? "create" : "open";
+ return(0);
+}
- *success = FALSE;
+/** NOTE! Use the corresponding macro os_file_create(), not directly
+this function!
+Opens an existing file or creates a new.
+@param[in] name name of the file or path as a null-terminated
+ string
+@param[in] create_mode create mode
+@param[in] purpose OS_FILE_AIO, if asynchronous, non-buffered I/O
+ is desired, OS_FILE_NORMAL, if any normal file;
+ NOTE that it also depends on type, os_aio_..
+ and srv_.. variables whether we really use async
+ I/O or unbuffered I/O: look in the function
+ source code for the exact rules
+@param[in] type OS_DATA_FILE or OS_LOG_FILE
+@param[in] read_only true, if read only checks should be enforcedm
+@param[in] success true if succeeded
+@return handle to the file, not defined if error, error number
+ can be retrieved with os_file_get_last_error */
+pfs_os_file_t
+os_file_create_func(
+ const char* name,
+ ulint create_mode,
+ ulint purpose,
+ ulint type,
+ bool read_only,
+ bool* success)
+{
+ bool on_error_no_exit;
+ bool on_error_silent;
- if (on_error_no_exit) {
- retry = os_file_handle_error_no_exit(
- name, operation, on_error_silent, __FILE__, __LINE__);
- } else {
- retry = os_file_handle_error(name, operation, __FILE__, __LINE__);
- }
- } else {
- *success = TRUE;
- retry = FALSE;
- }
+ *success = false;
- } while (retry);
+ DBUG_EXECUTE_IF(
+ "ib_create_table_fail_disk_full",
+ *success = false;
+ errno = ENOSPC;
+ return(OS_FILE_CLOSED);
+ );
- /* If we have proper file handle and atomic writes should be used,
- try to set atomic writes and if that fails when creating a new
- table, produce a error. If atomic writes are used on existing
- file, ignore error and use traditional writes for that file */
- if (file != INVALID_HANDLE_VALUE && type == OS_DATA_FILE
- && (awrites == ATOMIC_WRITES_ON ||
- (srv_use_atomic_writes && awrites == ATOMIC_WRITES_DEFAULT))
- && !os_file_set_atomic_writes(name, file)) {
- if (create_mode == OS_FILE_CREATE) {
- fprintf(stderr, "InnoDB: Error: Can't create file using atomic writes\n");
- CloseHandle(file);
- os_file_delete_if_exists_func(name);
- *success = FALSE;
- file = INVALID_HANDLE_VALUE;
- }
- }
-#else /* __WIN__ */
int create_flag;
const char* mode_str = NULL;
- if (create_mode != OS_FILE_OPEN && create_mode != OS_FILE_OPEN_RAW)
- WAIT_ALLOW_WRITES();
on_error_no_exit = create_mode & OS_FILE_ON_ERROR_NO_EXIT
- ? TRUE : FALSE;
+ ? true : false;
on_error_silent = create_mode & OS_FILE_ON_ERROR_SILENT
- ? TRUE : FALSE;
+ ? true : false;
- create_mode &= ~OS_FILE_ON_ERROR_NO_EXIT;
- create_mode &= ~OS_FILE_ON_ERROR_SILENT;
+ create_mode &= ulint(~(OS_FILE_ON_ERROR_NO_EXIT
+ | OS_FILE_ON_ERROR_SILENT));
if (create_mode == OS_FILE_OPEN
|| create_mode == OS_FILE_OPEN_RAW
@@ -1926,9 +2979,9 @@ os_file_create_func(
mode_str = "OPEN";
- create_flag = srv_read_only_mode ? O_RDONLY : O_RDWR;
+ create_flag = read_only ? O_RDONLY : O_RDWR;
- } else if (srv_read_only_mode) {
+ } else if (read_only) {
mode_str = "OPEN";
@@ -1945,15 +2998,17 @@ os_file_create_func(
create_flag = O_RDWR | O_CREAT | O_TRUNC;
} else {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Unknown file create mode (%lu) for file '%s'",
- create_mode, name);
+ ib::error()
+ << "Unknown file create mode (" << create_mode << ")"
+ << " for file '" << name << "'";
- file = -1;
- return(file);
+ return(OS_FILE_CLOSED);
}
- ut_a(type == OS_LOG_FILE || type == OS_DATA_FILE);
+ ut_a(type == OS_LOG_FILE
+ || type == OS_DATA_FILE
+ || type == OS_DATA_TEMP_FILE);
+
ut_a(purpose == OS_FILE_AIO || purpose == OS_FILE_NORMAL);
#ifdef O_SYNC
@@ -1961,264 +3016,239 @@ os_file_create_func(
O_SYNC because the datasync options seemed to corrupt files in 2001
in both Linux and Solaris */
- if (!srv_read_only_mode
+ if (!read_only
&& type == OS_LOG_FILE
- && srv_unix_file_flush_method == SRV_UNIX_O_DSYNC) {
+ && srv_file_flush_method == SRV_O_DSYNC) {
create_flag |= O_SYNC;
}
#endif /* O_SYNC */
+ os_file_t file;
+ bool retry;
+
do {
- file = ::open(name, create_flag | O_CLOEXEC, os_innodb_umask);
+ file = open(name, create_flag | O_CLOEXEC, os_innodb_umask);
if (file == -1) {
const char* operation;
operation = (create_mode == OS_FILE_CREATE
- && !srv_read_only_mode)
- ? "create" : "open";
+ && !read_only) ? "create" : "open";
- *success = FALSE;
+ *success = false;
if (on_error_no_exit) {
retry = os_file_handle_error_no_exit(
- name, operation, on_error_silent, __FILE__, __LINE__);
+ name, operation, on_error_silent);
} else {
- retry = os_file_handle_error(name, operation, __FILE__, __LINE__);
+ retry = os_file_handle_error(name, operation);
}
} else {
- *success = TRUE;
+ *success = true;
retry = false;
}
} while (retry);
/* We disable OS caching (O_DIRECT) only on data files */
- if (!srv_read_only_mode
+ if (!read_only
&& *success
- && type != OS_LOG_FILE
- && (srv_unix_file_flush_method == SRV_UNIX_O_DIRECT
- || srv_unix_file_flush_method == SRV_UNIX_O_DIRECT_NO_FSYNC)) {
+ && (type != OS_LOG_FILE && type != OS_DATA_TEMP_FILE)
+ && (srv_file_flush_method == SRV_O_DIRECT
+ || srv_file_flush_method == SRV_O_DIRECT_NO_FSYNC)) {
- os_file_set_nocache(file, name, mode_str);
+ os_file_set_nocache(file, name, mode_str);
}
#ifdef USE_FILE_LOCK
- if (!srv_read_only_mode
+ if (!read_only
&& *success
&& create_mode != OS_FILE_OPEN_RAW
&& os_file_lock(file, name)) {
if (create_mode == OS_FILE_OPEN_RETRY) {
- ut_a(!srv_read_only_mode);
-
- ib_logf(IB_LOG_LEVEL_INFO,
- "Retrying to lock the first data file");
+ ib::info()
+ << "Retrying to lock the first data file";
for (int i = 0; i < 100; i++) {
os_thread_sleep(1000000);
if (!os_file_lock(file, name)) {
- *success = TRUE;
+ *success = true;
return(file);
}
}
- ib_logf(IB_LOG_LEVEL_INFO,
- "Unable to open the first data file");
+ ib::info()
+ << "Unable to open the first data file";
}
- *success = FALSE;
+ *success = false;
close(file);
file = -1;
}
#endif /* USE_FILE_LOCK */
- /* If we have proper file handle and atomic writes should be used,
- try to set atomic writes and if that fails when creating a new
- table, produce a error. If atomic writes are used on existing
- file, ignore error and use traditional writes for that file */
- if (file != -1 && type == OS_DATA_FILE
- && (awrites == ATOMIC_WRITES_ON ||
- (srv_use_atomic_writes && awrites == ATOMIC_WRITES_DEFAULT))
- && !os_file_set_atomic_writes(name, file)) {
- if (create_mode == OS_FILE_CREATE) {
- fprintf(stderr, "InnoDB: Error: Can't create file using atomic writes\n");
- close(file);
- os_file_delete_if_exists_func(name);
- *success = FALSE;
- file = -1;
- }
- }
-#endif /* __WIN__ */
-
return(file);
}
-/***********************************************************************//**
-Deletes a file if it exists. The file has to be closed before calling this.
-@return TRUE if success */
-UNIV_INTERN
-bool
-os_file_delete_if_exists_func(
-/*==========================*/
- const char* name) /*!< in: file path as a null-terminated
- string */
+/** NOTE! Use the corresponding macro
+os_file_create_simple_no_error_handling(), not directly this function!
+A simple function to open or create a file.
+@param[in] name name of the file or path as a null-terminated
+ string
+@param[in] create_mode create mode
+@param[in] access_type OS_FILE_READ_ONLY, OS_FILE_READ_WRITE, or
+ OS_FILE_READ_ALLOW_DELETE; the last option
+ is used by a backup program reading the file
+@param[in] read_only if true read only mode checks are enforced
+@param[out] success true if succeeded
+@return own: handle to the file, not defined if error, error number
+ can be retrieved with os_file_get_last_error */
+pfs_os_file_t
+os_file_create_simple_no_error_handling_func(
+ const char* name,
+ ulint create_mode,
+ ulint access_type,
+ bool read_only,
+ bool* success)
{
-#ifdef __WIN__
- bool ret;
- ulint count = 0;
-loop:
- /* In Windows, deleting an .ibd file may fail if mysqlbackup is copying
- it */
-
- ret = DeleteFile((LPCTSTR) name);
+ os_file_t file;
+ int create_flag;
- if (ret) {
- return(true);
+ if (create_mode != OS_FILE_OPEN && create_mode != OS_FILE_OPEN_RAW) {
+ WAIT_ALLOW_WRITES();
}
- DWORD lasterr = GetLastError();
- if (lasterr == ERROR_FILE_NOT_FOUND
- || lasterr == ERROR_PATH_NOT_FOUND) {
- /* the file does not exist, this not an error */
+ ut_a(!(create_mode & OS_FILE_ON_ERROR_SILENT));
+ ut_a(!(create_mode & OS_FILE_ON_ERROR_NO_EXIT));
- return(true);
- }
+ *success = false;
- count++;
+ if (create_mode == OS_FILE_OPEN) {
- if (count > 100 && 0 == (count % 10)) {
- os_file_get_last_error(true); /* print error information */
+ if (access_type == OS_FILE_READ_ONLY) {
- ib_logf(IB_LOG_LEVEL_WARN, "Delete of file %s failed.", name);
- }
+ create_flag = O_RDONLY;
- os_thread_sleep(500000); /* sleep for 0.5 second */
+ } else if (read_only) {
- if (count > 2000) {
+ create_flag = O_RDONLY;
- return(false);
- }
+ } else {
- goto loop;
-#else
- int ret;
- WAIT_ALLOW_WRITES();
+ ut_a(access_type == OS_FILE_READ_WRITE
+ || access_type == OS_FILE_READ_ALLOW_DELETE);
- ret = unlink(name);
+ create_flag = O_RDWR;
+ }
- if (ret != 0 && errno != ENOENT) {
- os_file_handle_error_no_exit(name, "delete", FALSE, __FILE__, __LINE__);
+ } else if (read_only) {
- return(false);
- }
+ create_flag = O_RDONLY;
- return(true);
-#endif /* __WIN__ */
-}
+ } else if (create_mode == OS_FILE_CREATE) {
-/***********************************************************************//**
-Deletes a file. The file has to be closed before calling this.
-@return TRUE if success */
-UNIV_INTERN
-bool
-os_file_delete_func(
-/*================*/
- const char* name) /*!< in: file path as a null-terminated
- string */
-{
-#ifdef __WIN__
- BOOL ret;
- ulint count = 0;
-loop:
- /* In Windows, deleting an .ibd file may fail if mysqlbackup is copying
- it */
+ create_flag = O_RDWR | O_CREAT | O_EXCL;
- ret = DeleteFile((LPCTSTR) name);
+ } else {
- if (ret) {
- return(true);
+ ib::error()
+ << "Unknown file create mode "
+ << create_mode << " for file '" << name << "'";
+
+ return(OS_FILE_CLOSED);
}
- if (GetLastError() == ERROR_FILE_NOT_FOUND) {
- /* If the file does not exist, we classify this as a 'mild'
- error and return */
+ file = open(name, create_flag | O_CLOEXEC, os_innodb_umask);
- return(false);
- }
+ *success = (file != -1);
- count++;
+#ifdef USE_FILE_LOCK
+ if (!read_only
+ && *success
+ && access_type == OS_FILE_READ_WRITE
+ && os_file_lock(file, name)) {
- if (count > 100 && 0 == (count % 10)) {
- os_file_get_last_error(true); /* print error information */
+ *success = false;
+ close(file);
+ file = -1;
- fprintf(stderr,
- "InnoDB: Warning: cannot delete file %s\n"
- "InnoDB: Are you running mysqlbackup"
- " to back up the file?\n", name);
}
+#endif /* USE_FILE_LOCK */
- os_thread_sleep(1000000); /* sleep for a second */
-
- if (count > 2000) {
+ return(file);
+}
- return(false);
+/** Deletes a file if it exists. The file has to be closed before calling this.
+@param[in] name file path as a null-terminated string
+@param[out] exist indicate if file pre-exist
+@return true if success */
+bool
+os_file_delete_if_exists_func(
+ const char* name,
+ bool* exist)
+{
+ if (exist != NULL) {
+ *exist = true;
}
- goto loop;
-#else
int ret;
WAIT_ALLOW_WRITES();
ret = unlink(name);
- if (ret != 0) {
- os_file_handle_error_no_exit(name, "delete", FALSE, __FILE__, __LINE__);
+ if (ret != 0 && errno == ENOENT) {
+ if (exist != NULL) {
+ *exist = false;
+ }
+ } else if (ret != 0 && errno != ENOENT) {
+ os_file_handle_error_no_exit(name, "delete", false);
return(false);
}
return(true);
-#endif
}
-/** Handle RENAME error.
-@param name old name of the file
-@param new_name new name of the file */
-static void os_file_handle_rename_error(const char* name, const char* new_name)
+/** Deletes a file. The file has to be closed before calling this.
+@param[in] name file path as a null-terminated string
+@return true if success */
+bool
+os_file_delete_func(
+ const char* name)
{
- if (os_file_get_last_error(true) != OS_FILE_DISK_FULL) {
- ib_logf(IB_LOG_LEVEL_ERROR, "Cannot rename file '%s' to '%s'",
- name, new_name);
- } else if (!os_has_said_disk_full) {
- os_has_said_disk_full = true;
- /* Disk full error is reported irrespective of the
- on_error_silent setting. */
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Full disk prevents renaming file '%s' to '%s'",
- name, new_name);
+ int ret;
+ WAIT_ALLOW_WRITES();
+
+ ret = unlink(name);
+
+ if (ret != 0) {
+ os_file_handle_error_no_exit(name, "delete", FALSE);
+
+ return(false);
}
+
+ return(true);
}
-/***********************************************************************//**
-NOTE! Use the corresponding macro os_file_rename(), not directly this function!
+/** NOTE! Use the corresponding macro os_file_rename(), not directly this
+function!
Renames a file (can also move it to another directory). It is safest that the
file is closed before calling this function.
-@return TRUE if success */
-UNIV_INTERN
-ibool
+@param[in] oldpath old file path as a null-terminated string
+@param[in] newpath new file path
+@return true if success */
+bool
os_file_rename_func(
-/*================*/
- const char* oldpath,/*!< in: old file path as a null-terminated
- string */
- const char* newpath)/*!< in: new file path */
+ const char* oldpath,
+ const char* newpath)
{
#ifdef UNIV_DEBUG
os_file_type_t type;
- ibool exists;
+ bool exists;
/* New path must not exist. */
ut_ad(os_file_status(newpath, &exists, &type));
@@ -2229,18 +3259,6 @@ os_file_rename_func(
ut_ad(exists);
#endif /* UNIV_DEBUG */
-#ifdef __WIN__
- BOOL ret;
-
- ret = MoveFile((LPCTSTR) oldpath, (LPCTSTR) newpath);
-
- if (ret) {
- return(TRUE);
- }
-
- os_file_handle_rename_error(oldpath, newpath);
- return(FALSE);
-#else
int ret;
WAIT_ALLOW_WRITES();
@@ -2248,324 +3266,444 @@ os_file_rename_func(
if (ret != 0) {
os_file_handle_rename_error(oldpath, newpath);
- return(FALSE);
+
+ return(false);
}
- return(TRUE);
-#endif /* __WIN__ */
+ return(true);
}
-/***********************************************************************//**
-NOTE! Use the corresponding macro os_file_close(), not directly this function!
+/** NOTE! Use the corresponding macro os_file_close(), not directly this
+function!
Closes a file handle. In case of error, error number can be retrieved with
os_file_get_last_error.
-@return TRUE if success */
-UNIV_INTERN
-ibool
+@param[in] file Handle to close
+@return true if success */
+bool
os_file_close_func(
-/*===============*/
- os_file_t file) /*!< in, own: handle to a file */
+ os_file_t file)
{
-#ifdef __WIN__
- BOOL ret;
+ int ret = close(file);
- ret = CloseHandle(file);
+ if (ret == -1) {
+ os_file_handle_error(NULL, "close");
- if (ret) {
- return(TRUE);
+ return(false);
}
- os_file_handle_error(NULL, "close", __FILE__, __LINE__);
+ return(true);
+}
- return(FALSE);
-#else
- int ret;
+/** Gets a file size.
+@param[in] file handle to an open file
+@return file size, or (os_offset_t) -1 on failure */
+os_offset_t
+os_file_get_size(os_file_t file)
+{
+ struct stat statbuf;
+ return fstat(file, &statbuf) ? os_offset_t(-1) : statbuf.st_size;
+}
- ret = close(file);
+/** Gets a file size.
+@param[in] filename Full path to the filename to check
+@return file size if OK, else set m_total_size to ~0 and m_alloc_size to
+ errno */
+os_file_size_t
+os_file_get_size(
+ const char* filename)
+{
+ struct stat s;
+ os_file_size_t file_size;
- if (ret == -1) {
- os_file_handle_error(NULL, "close", __FILE__, __LINE__);
+ int ret = stat(filename, &s);
- return(FALSE);
+ if (ret == 0) {
+ file_size.m_total_size = s.st_size;
+ /* st_blocks is in 512 byte sized blocks */
+ file_size.m_alloc_size = s.st_blocks * 512;
+ } else {
+ file_size.m_total_size = ~0;
+ file_size.m_alloc_size = (os_offset_t) errno;
}
- return(TRUE);
-#endif /* __WIN__ */
+ return(file_size);
}
-#ifdef UNIV_HOTBACKUP
-/***********************************************************************//**
-Closes a file handle.
-@return TRUE if success */
-UNIV_INTERN
-ibool
-os_file_close_no_error_handling(
-/*============================*/
- os_file_t file) /*!< in, own: handle to a file */
+/** This function returns information about the specified file
+@param[in] path pathname of the file
+@param[out] stat_info information of a file in a directory
+@param[in,out] statinfo information of a file in a directory
+@param[in] check_rw_perm for testing whether the file can be opened
+ in RW mode
+@param[in] read_only if true read only mode checks are enforced
+@return DB_SUCCESS if all OK */
+static
+dberr_t
+os_file_get_status_posix(
+ const char* path,
+ os_file_stat_t* stat_info,
+ struct stat* statinfo,
+ bool check_rw_perm,
+ bool read_only)
{
-#ifdef __WIN__
- BOOL ret;
+ int ret = stat(path, statinfo);
- ret = CloseHandle(file);
+ if (ret && (errno == ENOENT || errno == ENOTDIR
+ || errno == ENAMETOOLONG)) {
+ /* file does not exist */
- if (ret) {
- return(TRUE);
+ return(DB_NOT_FOUND);
+
+ } else if (ret) {
+ /* file exists, but stat call failed */
+
+ os_file_handle_error_no_exit(path, "stat", false);
+
+ return(DB_FAIL);
}
- return(FALSE);
-#else
- int ret;
+ switch (statinfo->st_mode & S_IFMT) {
+ case S_IFDIR:
+ stat_info->type = OS_FILE_TYPE_DIR;
+ break;
+ case S_IFLNK:
+ stat_info->type = OS_FILE_TYPE_LINK;
+ break;
+ case S_IFBLK:
+ /* Handle block device as regular file. */
+ case S_IFCHR:
+ /* Handle character device as regular file. */
+ case S_IFREG:
+ stat_info->type = OS_FILE_TYPE_FILE;
+ break;
+ default:
+ stat_info->type = OS_FILE_TYPE_UNKNOWN;
+ }
- ret = close(file);
+ stat_info->size = statinfo->st_size;
+ stat_info->block_size = statinfo->st_blksize;
+ stat_info->alloc_size = statinfo->st_blocks * 512;
- if (ret == -1) {
+ if (check_rw_perm
+ && (stat_info->type == OS_FILE_TYPE_FILE
+ || stat_info->type == OS_FILE_TYPE_BLOCK)) {
- return(FALSE);
+ stat_info->rw_perm = !access(path, read_only
+ ? R_OK : R_OK | W_OK);
}
- return(TRUE);
-#endif /* __WIN__ */
+ return(DB_SUCCESS);
}
-#endif /* UNIV_HOTBACKUP */
-/***********************************************************************//**
-Gets a file size.
-@return file size, or (os_offset_t) -1 on failure */
-UNIV_INTERN
-os_offset_t
-os_file_get_size(
-/*=============*/
- pfs_os_file_t file) /*!< in: handle to a file */
+/** Truncates a file to a specified size in bytes.
+Do nothing if the size to preserve is greater or equal to the current
+size of the file.
+@param[in] pathname file path
+@param[in] file file to be truncated
+@param[in] size size to preserve in bytes
+@return true if success */
+static
+bool
+os_file_truncate_posix(
+ const char* pathname,
+ os_file_t file,
+ os_offset_t size)
{
-#ifdef __WIN__
- os_offset_t offset;
- DWORD high;
- DWORD low;
+ int res = ftruncate(file, size);
- low = GetFileSize(file, &high);
+ if (res == -1) {
- if ((low == 0xFFFFFFFF) && (GetLastError() != NO_ERROR)) {
- return((os_offset_t) -1);
+ bool retry;
+
+ retry = os_file_handle_error_no_exit(
+ pathname, "truncate", false);
+
+ if (retry) {
+ ib::warn()
+ << "Truncate failed for '"
+ << pathname << "'";
+ }
}
- offset = (os_offset_t) low | ((os_offset_t) high << 32);
+ return(res == 0);
+}
- return(offset);
-#else
- struct stat statbuf;
- return fstat(file, &statbuf) ? os_offset_t(-1) : statbuf.st_size;
-#endif /* __WIN__ */
+/** Truncates a file at its current position.
+@return true if success */
+bool
+os_file_set_eof(
+ FILE* file) /*!< in: file to be truncated */
+{
+ WAIT_ALLOW_WRITES();
+ return(!ftruncate(fileno(file), ftell(file)));
}
-/** Extend a file.
+#else /* !_WIN32 */
-On Windows, extending a file allocates blocks for the file,
-unless the file is sparse.
+#include <WinIoCtl.h>
-On Unix, we will extend the file with ftruncate(), if
-file needs to be sparse. Otherwise posix_fallocate() is used
-when available, and if not, binary zeroes are added to the end
-of file.
+/*
+Windows : Handling synchronous IO on files opened asynchronously.
-@param[in] name file name
-@param[in] file file handle
-@param[in] size desired file size
-@param[in] sparse whether to create a sparse file (no preallocating)
-@return whether the operation succeeded */
-UNIV_INTERN
-bool
-os_file_set_size(
- const char* name,
- pfs_os_file_t file,
- os_offset_t size,
- bool is_sparse)
+If file is opened for asynchronous IO (FILE_FLAG_OVERLAPPED) and also bound to
+a completion port, then every IO on this file would normally be enqueued to the
+completion port. Sometimes however we would like to do a synchronous IO. This is
+possible if we initialitze have overlapped.hEvent with a valid event and set its
+lowest order bit to 1 (see MSDN ReadFile and WriteFile description for more info)
+
+We'll create this special event once for each thread and store in thread local
+storage.
+*/
+
+
+static void __stdcall win_free_syncio_event(void *data) {
+ if (data) {
+ CloseHandle((HANDLE)data);
+ }
+}
+
+
+/*
+Initialize tls index.for event handle used for synchronized IO on files that
+might be opened with FILE_FLAG_OVERLAPPED.
+*/
+static void win_init_syncio_event() {
+ fls_sync_io = FlsAlloc(win_free_syncio_event);
+ ut_a(fls_sync_io != FLS_OUT_OF_INDEXES);
+}
+
+
+/*
+Retrieve per-thread event for doing synchronous io on asyncronously opened files
+*/
+static HANDLE win_get_syncio_event()
{
-#ifdef _WIN32
- FILE_END_OF_FILE_INFO feof;
- feof.EndOfFile.QuadPart = size;
- bool success = SetFileInformationByHandle(file,
- FileEndOfFileInfo,
- &feof, sizeof feof);
- if (!success) {
- ib_logf(IB_LOG_LEVEL_ERROR, "os_file_set_size() of file %s"
- " to " INT64PF " bytes failed with %u",
- name, size, GetLastError());
+ HANDLE h;
+
+ h = (HANDLE)FlsGetValue(fls_sync_io);
+ if (h) {
+ return h;
+ }
+ h = CreateEventA(NULL, FALSE, FALSE, NULL);
+ ut_a(h);
+ /* Set low-order bit to keeps I/O completion from being queued */
+ h = (HANDLE)((uintptr_t)h | 1);
+ FlsSetValue(fls_sync_io, h);
+ return h;
+}
+
+
+/** Do the read/write
+@param[in] request The IO context and type
+@return the number of bytes read/written or negative value on error */
+ssize_t
+SyncFileIO::execute(const IORequest& request)
+{
+ OVERLAPPED seek;
+
+ memset(&seek, 0x0, sizeof(seek));
+
+ seek.hEvent = win_get_syncio_event();
+ seek.Offset = (DWORD) m_offset & 0xFFFFFFFF;
+ seek.OffsetHigh = (DWORD) (m_offset >> 32);
+
+ BOOL ret;
+ DWORD n_bytes;
+
+ if (request.is_read()) {
+ ret = ReadFile(m_fh, m_buf,
+ static_cast<DWORD>(m_n), NULL, &seek);
+
+ } else {
+ ut_ad(request.is_write());
+ ret = WriteFile(m_fh, m_buf,
+ static_cast<DWORD>(m_n), NULL, &seek);
}
- return(success);
-#else
- if (is_sparse) {
- bool success = !ftruncate(file, size);
- if (!success) {
- ib_logf(IB_LOG_LEVEL_ERROR, "ftruncate of file %s"
- " to " INT64PF " bytes failed with error %d",
- name, size, errno);
- }
- return(success);
+ if (ret || (GetLastError() == ERROR_IO_PENDING)) {
+ /* Wait for async io to complete */
+ ret = GetOverlappedResult(m_fh, &seek, &n_bytes, TRUE);
}
-# ifdef HAVE_POSIX_FALLOCATE
- if (srv_use_posix_fallocate) {
- int err;
- do {
- os_offset_t current_size = os_file_get_size(file);
- err = current_size >= size
- ? 0 : posix_fallocate(file, current_size,
- size - current_size);
- } while (err == EINTR
- && srv_shutdown_state == SRV_SHUTDOWN_NONE);
-
- switch (err) {
- case 0:
- return true;
- default:
- ib_logf(IB_LOG_LEVEL_ERROR,
- "preallocating " INT64PF " bytes for"
- "file %s failed with error %d",
- size, name, err);
- /* fall through */
- case EINTR:
- errno = err;
- return false;
- case EINVAL:
- /* fall back to the code below */
- break;
- }
+ return(ret ? static_cast<ssize_t>(n_bytes) : -1);
+}
+
+/** Do the read/write
+@param[in,out] slot The IO slot, it has the IO context
+@return the number of bytes read/written or negative value on error */
+ssize_t
+SyncFileIO::execute(Slot* slot)
+{
+ BOOL ret;
+ slot->control.hEvent = win_get_syncio_event();
+ if (slot->type.is_read()) {
+
+ ret = ReadFile(
+ slot->file, slot->ptr, slot->len,
+ NULL, &slot->control);
+
+ } else {
+ ut_ad(slot->type.is_write());
+
+ ret = WriteFile(
+ slot->file, slot->ptr, slot->len,
+ NULL, &slot->control);
+
+ }
+ if (ret || (GetLastError() == ERROR_IO_PENDING)) {
+ /* Wait for async io to complete */
+ ret = GetOverlappedResult(slot->file, &slot->control, &slot->n_bytes, TRUE);
}
-# endif
- os_offset_t current_size = os_file_get_size(file);
+ return(ret ? static_cast<ssize_t>(slot->n_bytes) : -1);
+}
- if (current_size >= size) {
- return true;
+/* Startup/shutdown */
+
+struct WinIoInit
+{
+ WinIoInit() {
+ fls_sync_io = FlsAlloc(win_free_syncio_event);
+ ut_a(fls_sync_io != FLS_OUT_OF_INDEXES);
}
- /* Write up to 1 megabyte at a time. */
- ulint buf_size = ut_min(64, (ulint) (size / UNIV_PAGE_SIZE))
- * UNIV_PAGE_SIZE;
- byte* buf2 = static_cast<byte*>(calloc(1, buf_size + UNIV_PAGE_SIZE));
-
- if (!buf2) {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Cannot allocate " ULINTPF " bytes to extend file\n",
- buf_size + UNIV_PAGE_SIZE);
- return(false);
+ ~WinIoInit() {
+ FlsFree(fls_sync_io);
}
+};
- /* Align the buffer for possible raw i/o */
- byte* buf = static_cast<byte*>(ut_align(buf2, UNIV_PAGE_SIZE));
- bool ret;
+/* Ensures proper initialization and shutdown */
+static WinIoInit win_io_init;
- do {
- ulint n_bytes;
+/** Check if the file system supports sparse files.
+@param[in] name File name
+@return true if the file system supports sparse files */
+static
+bool
+os_is_sparse_file_supported_win32(const char* filename)
+{
+ char volname[MAX_PATH];
+ BOOL result = GetVolumePathName(filename, volname, MAX_PATH);
- if (size - current_size < (os_offset_t) buf_size) {
- n_bytes = (ulint) (size - current_size);
- } else {
- n_bytes = buf_size;
- }
+ if (!result) {
- ret = os_file_write(name, file, buf, current_size, n_bytes);
+ ib::error()
+ << "os_is_sparse_file_supported: "
+ << "Failed to get the volume path name for: "
+ << filename
+ << "- OS error number " << GetLastError();
- if (!ret) {
- break;
- }
+ return(false);
+ }
- current_size += n_bytes;
- } while (current_size < size
- && srv_shutdown_state == SRV_SHUTDOWN_NONE);
+ DWORD flags;
- free(buf2);
+ result = GetVolumeInformation(
+ volname, NULL, MAX_PATH, NULL, NULL,
+ &flags, NULL, MAX_PATH);
- return(ret && current_size >= size && os_file_flush(file));
-#endif
-}
-/***********************************************************************//**
-Truncates a file at its current position.
-@return TRUE if success */
-UNIV_INTERN
-ibool
-os_file_set_eof(
-/*============*/
- FILE* file) /*!< in: file to be truncated */
-{
-#ifdef __WIN__
- HANDLE h = (HANDLE) _get_osfhandle(fileno(file));
- return(SetEndOfFile(h));
-#else /* __WIN__ */
- WAIT_ALLOW_WRITES();
- return(!ftruncate(fileno(file), ftell(file)));
-#endif /* __WIN__ */
-}
+ if (!result) {
+ ib::error()
+ << "os_is_sparse_file_supported: "
+ << "Failed to get the volume info for: "
+ << volname
+ << "- OS error number " << GetLastError();
-#ifndef __WIN__
-/***********************************************************************//**
-Wrapper to fsync(2) that retries the call on some errors.
-Returns the value 0 if successful; otherwise the value -1 is returned and
-the global variable errno is set to indicate the error.
-@return 0 if success, -1 otherwise */
+ return(false);
+ }
+ return(flags & FILE_SUPPORTS_SPARSE_FILES) ? true : false;
+}
+
+/** Free storage space associated with a section of the file.
+@param[in] fh Open file handle
+@param[in] page_size Tablespace page size
+@param[in] block_size File system block size
+@param[in] off Starting offset (SEEK_SET)
+@param[in] len Size of the hole
+@return 0 on success or errno */
static
-int
-os_file_fsync(
-/*==========*/
- os_file_t file) /*!< in: handle to a file */
+dberr_t
+os_file_punch_hole_win32(
+ os_file_t fh,
+ os_offset_t off,
+ os_offset_t len)
{
- int ret;
- int failures;
- ibool retry;
+ FILE_ZERO_DATA_INFORMATION punch;
- failures = 0;
+ punch.FileOffset.QuadPart = off;
+ punch.BeyondFinalZero.QuadPart = off + len;
- do {
- ret = fsync(file);
+ /* If lpOverlapped is NULL, lpBytesReturned cannot be NULL,
+ therefore we pass a dummy parameter. */
+ DWORD temp;
+ BOOL success = os_win32_device_io_control(
+ fh, FSCTL_SET_ZERO_DATA, &punch, sizeof(punch),
+ NULL, 0, &temp);
- os_n_fsyncs++;
+ return(success ? DB_SUCCESS: DB_IO_NO_PUNCH_HOLE);
+}
- if (ret == -1 && errno == ENOLCK) {
+/** Check the existence and type of the given file.
+@param[in] path path name of file
+@param[out] exists true if the file exists
+@param[out] type Type of the file, if it exists
+@return true if call succeeded */
+static
+bool
+os_file_status_win32(
+ const char* path,
+ bool* exists,
+ os_file_type_t* type)
+{
+ int ret;
+ struct _stat64 statinfo;
- if (failures % 100 == 0) {
+ ret = _stat64(path, &statinfo);
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: fsync(): "
- "No locks available; retrying\n");
- }
+ *exists = !ret;
- os_thread_sleep(200000 /* 0.2 sec */);
+ if (!ret) {
+ /* file exists, everything OK */
- failures++;
+ } else if (errno == ENOENT || errno == ENOTDIR || errno == ENAMETOOLONG) {
+ /* file does not exist */
+ return(true);
- retry = TRUE;
- } else {
+ } else {
+ /* file exists, but stat call failed */
+ os_file_handle_error_no_exit(path, "stat", false);
+ return(false);
+ }
- retry = FALSE;
- }
- } while (retry);
+ if (_S_IFDIR & statinfo.st_mode) {
+ *type = OS_FILE_TYPE_DIR;
- return(ret);
+ } else if (_S_IFREG & statinfo.st_mode) {
+ *type = OS_FILE_TYPE_FILE;
+
+ } else {
+ *type = OS_FILE_TYPE_UNKNOWN;
+ }
+
+ return(true);
}
-#endif /* !__WIN__ */
-/***********************************************************************//**
-NOTE! Use the corresponding macro os_file_flush(), not directly this function!
+/** NOTE! Use the corresponding macro os_file_flush(), not directly this
+function!
Flushes the write buffers of a given file to the disk.
-@return TRUE if success */
-UNIV_INTERN
-ibool
+@param[in] file handle to a file
+@return true if success */
+bool
os_file_flush_func(
-/*===============*/
- os_file_t file) /*!< in, own: handle to a file */
+ os_file_t file)
{
-#ifdef __WIN__
- BOOL ret;
+ ++os_n_fsyncs;
- os_n_fsyncs++;
-
- ret = FlushFileBuffers(file);
+ BOOL ret = FlushFileBuffers(file);
if (ret) {
- return(TRUE);
+ return(true);
}
/* Since Windows returns ERROR_INVALID_FUNCTION if the 'file' is
@@ -2574,795 +3712,1035 @@ os_file_flush_func(
if (srv_start_raw_disk_in_use && GetLastError()
== ERROR_INVALID_FUNCTION) {
- return(TRUE);
+ return(true);
}
- os_file_handle_error(NULL, "flush", __FILE__, __LINE__);
+ os_file_handle_error(NULL, "flush");
/* It is a fatal error if a file flush does not succeed, because then
the database can get corrupt on disk */
ut_error;
- return(FALSE);
-#else
- int ret;
- WAIT_ALLOW_WRITES();
+ return(false);
+}
-#if defined(HAVE_DARWIN_THREADS)
-# ifndef F_FULLFSYNC
- /* The following definition is from the Mac OS X 10.3 <sys/fcntl.h> */
-# define F_FULLFSYNC 51 /* fsync + ask the drive to flush to the media */
-# elif F_FULLFSYNC != 51
-# error "F_FULLFSYNC != 51: ABI incompatibility with Mac OS X 10.3"
-# endif
- /* Apple has disabled fsync() for internal disk drives in OS X. That
- caused corruption for a user when he tested a power outage. Let us in
- OS X use a nonstandard flush method recommended by an Apple
- engineer. */
+/** Retrieves the last error number if an error occurs in a file io function.
+The number should be retrieved before any other OS calls (because they may
+overwrite the error number). If the number is not known to this program,
+the OS error number + 100 is returned.
+@param[in] report_all_errors true if we want an error message printed
+ of all errors
+@param[in] on_error_silent true then don't print any diagnostic
+ to the log
+@return error number, or OS error number + 100 */
+static
+ulint
+os_file_get_last_error_low(
+ bool report_all_errors,
+ bool on_error_silent)
+{
+ ulint err = (ulint) GetLastError();
- if (!srv_have_fullfsync) {
- /* If we are not on an operating system that supports this,
- then fall back to a plain fsync. */
+ if (err == ERROR_SUCCESS) {
+ return(0);
+ }
- ret = os_file_fsync(file);
- } else {
- ret = fcntl(file, F_FULLFSYNC, NULL);
+ if (report_all_errors
+ || (!on_error_silent
+ && err != ERROR_DISK_FULL
+ && err != ERROR_FILE_EXISTS)) {
- if (ret) {
- /* If we are not on a file system that supports this,
- then fall back to a plain fsync. */
- ret = os_file_fsync(file);
+ ib::error()
+ << "Operating system error number " << err
+ << " in a file operation.";
+
+ if (err == ERROR_PATH_NOT_FOUND) {
+ ib::error()
+ << "The error means the system"
+ " cannot find the path specified.";
+
+ if (srv_is_being_started) {
+ ib::error()
+ << "If you are installing InnoDB,"
+ " remember that you must create"
+ " directories yourself, InnoDB"
+ " does not create them.";
+ }
+
+ } else if (err == ERROR_ACCESS_DENIED) {
+
+ ib::error()
+ << "The error means mysqld does not have"
+ " the access rights to"
+ " the directory. It may also be"
+ " you have created a subdirectory"
+ " of the same name as a data file.";
+
+ } else if (err == ERROR_SHARING_VIOLATION
+ || err == ERROR_LOCK_VIOLATION) {
+
+ ib::error()
+ << "The error means that another program"
+ " is using InnoDB's files."
+ " This might be a backup or antivirus"
+ " software or another instance"
+ " of MySQL."
+ " Please close it to get rid of this error.";
+
+ } else if (err == ERROR_WORKING_SET_QUOTA
+ || err == ERROR_NO_SYSTEM_RESOURCES) {
+
+ ib::error()
+ << "The error means that there are no"
+ " sufficient system resources or quota to"
+ " complete the operation.";
+
+ } else if (err == ERROR_OPERATION_ABORTED) {
+
+ ib::error()
+ << "The error means that the I/O"
+ " operation has been aborted"
+ " because of either a thread exit"
+ " or an application request."
+ " Retry attempt is made.";
+ } else {
+
+ ib::info() << OPERATING_SYSTEM_ERROR_MSG;
}
}
-#else
- ret = os_file_fsync(file);
-#endif
- if (ret == 0) {
- return(TRUE);
+ if (err == ERROR_FILE_NOT_FOUND) {
+ return(OS_FILE_NOT_FOUND);
+ } else if (err == ERROR_DISK_FULL) {
+ return(OS_FILE_DISK_FULL);
+ } else if (err == ERROR_FILE_EXISTS) {
+ return(OS_FILE_ALREADY_EXISTS);
+ } else if (err == ERROR_SHARING_VIOLATION
+ || err == ERROR_LOCK_VIOLATION) {
+ return(OS_FILE_SHARING_VIOLATION);
+ } else if (err == ERROR_WORKING_SET_QUOTA
+ || err == ERROR_NO_SYSTEM_RESOURCES) {
+ return(OS_FILE_INSUFFICIENT_RESOURCE);
+ } else if (err == ERROR_OPERATION_ABORTED) {
+ return(OS_FILE_OPERATION_ABORTED);
+ } else if (err == ERROR_ACCESS_DENIED) {
+ return(OS_FILE_ACCESS_VIOLATION);
}
- /* Since Linux returns EINVAL if the 'file' is actually a raw device,
- we choose to ignore that error if we are using raw disks */
+ return(OS_FILE_ERROR_MAX + err);
+}
- if (srv_start_raw_disk_in_use && errno == EINVAL) {
- return(TRUE);
- }
+/** NOTE! Use the corresponding macro os_file_create_simple(), not directly
+this function!
+A simple function to open or create a file.
+@param[in] name name of the file or path as a null-terminated
+ string
+@param[in] create_mode create mode
+@param[in] access_type OS_FILE_READ_ONLY or OS_FILE_READ_WRITE
+@param[in] read_only if true read only mode checks are enforced
+@param[out] success true if succeed, false if error
+@return handle to the file, not defined if error, error number
+ can be retrieved with os_file_get_last_error */
+pfs_os_file_t
+os_file_create_simple_func(
+ const char* name,
+ ulint create_mode,
+ ulint access_type,
+ bool read_only,
+ bool* success)
+{
+ os_file_t file;
- ib_logf(IB_LOG_LEVEL_ERROR, "The OS said file flush did not succeed");
+ *success = false;
- os_file_handle_error(NULL, "flush", __FILE__, __LINE__);
+ DWORD access;
+ DWORD create_flag;
+ DWORD attributes = 0;
- /* It is a fatal error if a file flush does not succeed, because then
- the database can get corrupt on disk */
- ut_error;
+ ut_a(!(create_mode & OS_FILE_ON_ERROR_SILENT));
+ ut_a(!(create_mode & OS_FILE_ON_ERROR_NO_EXIT));
+ ut_ad(srv_operation == SRV_OPERATION_NORMAL);
- return(FALSE);
-#endif
-}
+ if (create_mode == OS_FILE_OPEN) {
-#ifndef __WIN__
-/*******************************************************************//**
-Does a synchronous read operation in Posix.
-@return number of bytes read, -1 if error */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
-ssize_t
-os_file_pread(
-/*==========*/
- os_file_t file, /*!< in: handle to a file */
- void* buf, /*!< in: buffer where to read */
- ulint n, /*!< in: number of bytes to read */
- os_offset_t offset) /*!< in: file offset from where to read */
-{
- off_t offs;
+ create_flag = OPEN_EXISTING;
- ut_ad(n);
+ } else if (read_only) {
- /* If off_t is > 4 bytes in size, then we assume we can pass a
- 64-bit address */
- offs = (off_t) offset;
+ create_flag = OPEN_EXISTING;
+
+ } else if (create_mode == OS_FILE_CREATE) {
- if (sizeof(off_t) <= 4) {
- if (offset != (os_offset_t) offs) {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "File read at offset > 4 GB");
+ create_flag = CREATE_NEW;
+
+ } else if (create_mode == OS_FILE_CREATE_PATH) {
+
+ /* Create subdirs along the path if needed. */
+ *success = os_file_create_subdirs_if_needed(name);
+
+ if (!*success) {
+
+ ib::error()
+ << "Unable to create subdirectories '"
+ << name << "'";
+
+ return(OS_FILE_CLOSED);
}
+
+ create_flag = CREATE_NEW;
+ create_mode = OS_FILE_CREATE;
+
+ } else {
+
+ ib::error()
+ << "Unknown file create mode ("
+ << create_mode << ") for file '"
+ << name << "'";
+
+ return(OS_FILE_CLOSED);
}
- os_n_file_reads++;
+ if (access_type == OS_FILE_READ_ONLY) {
- const bool monitor = MONITOR_IS_ON(MONITOR_OS_PENDING_READS);
+ access = GENERIC_READ;
-#ifdef HAVE_PREAD
- MONITOR_ATOMIC_INC_LOW(MONITOR_OS_PENDING_READS, monitor);
- ssize_t n_bytes = pread(file, buf, n, offs);
- MONITOR_ATOMIC_DEC_LOW(MONITOR_OS_PENDING_READS, monitor);
- return(n_bytes);
-#else
- {
- off_t ret_offset;
- ssize_t ret;
-#ifndef UNIV_HOTBACKUP
- ulint i;
-#endif /* !UNIV_HOTBACKUP */
+ } else if (read_only) {
+
+ ib::info()
+ << "Read only mode set. Unable to"
+ " open file '" << name << "' in RW mode, "
+ << "trying RO mode", name;
+
+ access = GENERIC_READ;
+
+ } else if (access_type == OS_FILE_READ_WRITE) {
- MONITOR_ATOMIC_INC_LOW(MONITOR_OS_PENDING_READS, monitor);
-#ifndef UNIV_HOTBACKUP
- /* Protect the seek / read operation with a mutex */
- i = ((ulint) file) % OS_FILE_N_SEEK_MUTEXES;
+ access = GENERIC_READ | GENERIC_WRITE;
+
+ } else {
+
+ ib::error()
+ << "Unknown file access type (" << access_type << ") "
+ "for file '" << name << "'";
- os_mutex_enter(os_file_seek_mutexes[i]);
-#endif /* !UNIV_HOTBACKUP */
+ return(OS_FILE_CLOSED);
+ }
+
+ bool retry;
- ret_offset = lseek(file, offs, SEEK_SET);
+ do {
+ /* Use default security attributes and no template file. */
+
+ file = CreateFile(
+ (LPCTSTR) name, access,
+ FILE_SHARE_READ | FILE_SHARE_DELETE, NULL,
+ create_flag, attributes, NULL);
+
+ if (file == INVALID_HANDLE_VALUE) {
+
+ *success = false;
+
+ retry = os_file_handle_error(
+ name, create_mode == OS_FILE_OPEN ?
+ "open" : "create");
- if (ret_offset < 0) {
- ret = -1;
} else {
- ret = read(file, buf, (ssize_t) n);
+
+ retry = false;
+
+ *success = true;
}
-#ifndef UNIV_HOTBACKUP
- os_mutex_exit(os_file_seek_mutexes[i]);
-#endif /* !UNIV_HOTBACKUP */
+ } while (retry);
- MONITOR_ATOMIC_DEC_LOW(MONITOR_OS_PENDING_READS, monitor);
- return(ret);
- }
-#endif
+ return(file);
}
-/*******************************************************************//**
-Does a synchronous write operation in Posix.
-@return number of bytes written, -1 if error */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
-ssize_t
-os_file_pwrite(
-/*===========*/
- os_file_t file, /*!< in: handle to a file */
- const void* buf, /*!< in: buffer from where to write */
- ulint n, /*!< in: number of bytes to write */
- os_offset_t offset) /*!< in: file offset where to write */
+/** This function attempts to create a directory named pathname. The new
+directory gets default permissions. On Unix the permissions are
+(0770 & ~umask). If the directory exists already, nothing is done and
+the call succeeds, unless the fail_if_exists arguments is true.
+If another error occurs, such as a permission error, this does not crash,
+but reports the error and returns false.
+@param[in] pathname directory name as null-terminated string
+@param[in] fail_if_exists if true, pre-existing directory is treated
+ as an error.
+@return true if call succeeds, false on error */
+bool
+os_file_create_directory(
+ const char* pathname,
+ bool fail_if_exists)
{
- ssize_t ret;
- off_t offs;
+ BOOL rcode;
- ut_ad(n);
- ut_ad(!srv_read_only_mode);
+ rcode = CreateDirectory((LPCTSTR) pathname, NULL);
+ if (!(rcode != 0
+ || (GetLastError() == ERROR_ALREADY_EXISTS
+ && !fail_if_exists))) {
- /* If off_t is > 4 bytes in size, then we assume we can pass a
- 64-bit address */
- offs = (off_t) offset;
+ os_file_handle_error_no_exit(
+ pathname, "CreateDirectory", false);
- if (sizeof(off_t) <= 4) {
- if (offset != (os_offset_t) offs) {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "File write at offset > 4 GB.");
- }
+ return(false);
}
- os_n_file_writes++;
+ return(true);
+}
- const bool monitor = MONITOR_IS_ON(MONITOR_OS_PENDING_WRITES);
-#ifdef HAVE_PWRITE
- MONITOR_ATOMIC_INC_LOW(MONITOR_OS_PENDING_WRITES, monitor);
- ret = pwrite(file, buf, (ssize_t) n, offs);
- MONITOR_ATOMIC_DEC_LOW(MONITOR_OS_PENDING_WRITES, monitor);
+/** The os_file_opendir() function opens a directory stream corresponding to the
+directory named by the dirname argument. The directory stream is positioned
+at the first entry. In both Unix and Windows we automatically skip the '.'
+and '..' items at the start of the directory listing.
+@param[in] dirname directory name; it must not contain a trailing
+ '\' or '/'
+@param[in] is_fatal true if we should treat an error as a fatal
+ error; if we try to open symlinks then we do
+ not wish a fatal error if it happens not to
+ be a directory
+@return directory stream, NULL if error */
+os_file_dir_t
+os_file_opendir(
+ const char* dirname,
+ bool error_is_fatal)
+{
+ os_file_dir_t dir;
+ LPWIN32_FIND_DATA lpFindFileData;
+ char path[OS_FILE_MAX_PATH + 3];
- return(ret);
-#else
- {
- off_t ret_offset;
-# ifndef UNIV_HOTBACKUP
- ulint i;
-# endif /* !UNIV_HOTBACKUP */
+ ut_a(strlen(dirname) < OS_FILE_MAX_PATH);
- MONITOR_ATOMIC_INC_LOW(MONITOR_OS_PENDING_WRITES, monitor);
+ strcpy(path, dirname);
+ strcpy(path + strlen(path), "\\*");
-# ifndef UNIV_HOTBACKUP
- /* Protect the seek / write operation with a mutex */
- i = ((ulint) file) % OS_FILE_N_SEEK_MUTEXES;
+ /* Note that in Windows opening the 'directory stream' also retrieves
+ the first entry in the directory. Since it is '.', that is no problem,
+ as we will skip over the '.' and '..' entries anyway. */
- os_mutex_enter(os_file_seek_mutexes[i]);
-# endif /* UNIV_HOTBACKUP */
+ lpFindFileData = static_cast<LPWIN32_FIND_DATA>(
+ ut_malloc_nokey(sizeof(WIN32_FIND_DATA)));
- ret_offset = lseek(file, offs, SEEK_SET);
+ dir = FindFirstFile((LPCTSTR) path, lpFindFileData);
- if (ret_offset < 0) {
- ret = -1;
+ ut_free(lpFindFileData);
- goto func_exit;
+ if (dir == INVALID_HANDLE_VALUE) {
+
+ if (error_is_fatal) {
+ os_file_handle_error(dirname, "opendir");
}
- ret = write(file, buf, (ssize_t) n);
+ return(NULL);
+ }
+
+ return(dir);
+}
+
+/** Closes a directory stream.
+@param[in] dir directory stream
+@return 0 if success, -1 if failure */
+int
+os_file_closedir(
+ os_file_dir_t dir)
+{
+ BOOL ret;
-func_exit:
-# ifndef UNIV_HOTBACKUP
- os_mutex_exit(os_file_seek_mutexes[i]);
-# endif /* !UNIV_HOTBACKUP */
+ ret = FindClose(dir);
- MONITOR_ATOMIC_DEC_LOW(MONITOR_OS_PENDING_WRITES, monitor);
- return(ret);
+ if (!ret) {
+ os_file_handle_error_no_exit(NULL, "closedir", false);
+
+ return(-1);
}
-#endif /* HAVE_PWRITE */
+
+ return(0);
}
-#endif
-/*******************************************************************//**
-NOTE! Use the corresponding macro os_file_read(), not directly this
-function!
-Requests a synchronous positioned read operation.
-@return TRUE if request was successful, FALSE if fail */
-UNIV_INTERN
-ibool
-os_file_read_func(
-/*==============*/
- os_file_t file, /*!< in: handle to a file */
- void* buf, /*!< in: buffer where to read */
- os_offset_t offset, /*!< in: file offset where to read */
- ulint n) /*!< in: number of bytes to read */
+/** This function returns information of the next file in the directory. We
+jump over the '.' and '..' entries in the directory.
+@param[in] dirname directory name or path
+@param[in] dir directory stream
+@param[out] info buffer where the info is returned
+@return 0 if ok, -1 if error, 1 if at the end of the directory */
+int
+os_file_readdir_next_file(
+ const char* dirname,
+ os_file_dir_t dir,
+ os_file_stat_t* info)
{
-#ifdef __WIN__
BOOL ret;
- DWORD len;
- DWORD ret2;
- DWORD low;
- DWORD high;
- ibool retry;
-#ifndef UNIV_HOTBACKUP
- ulint i;
-#endif /* !UNIV_HOTBACKUP */
+ int status;
+ WIN32_FIND_DATA find_data;
- /* On 64-bit Windows, ulint is 64 bits. But offset and n should be
- no more than 32 bits. */
- ut_a((n & 0xFFFFFFFFUL) == n);
+next_file:
- os_n_file_reads++;
- os_bytes_read_since_printout += n;
- const bool monitor = MONITOR_IS_ON(MONITOR_OS_PENDING_READS);
+ ret = FindNextFile(dir, &find_data);
-try_again:
- ut_ad(buf);
- ut_ad(n > 0);
+ if (ret > 0) {
- low = (DWORD) offset & 0xFFFFFFFF;
- high = (DWORD) (offset >> 32);
+ const char* name;
- MONITOR_ATOMIC_INC_LOW(MONITOR_OS_PENDING_READS, monitor);
+ name = static_cast<const char*>(find_data.cFileName);
-#ifndef UNIV_HOTBACKUP
- /* Protect the seek / read operation with a mutex */
- i = ((ulint) file) % OS_FILE_N_SEEK_MUTEXES;
+ ut_a(strlen(name) < OS_FILE_MAX_PATH);
- os_mutex_enter(os_file_seek_mutexes[i]);
-#endif /* !UNIV_HOTBACKUP */
+ if (strcmp(name, ".") == 0 || strcmp(name, "..") == 0) {
- ret2 = SetFilePointer(
- file, low, reinterpret_cast<PLONG>(&high), FILE_BEGIN);
+ goto next_file;
+ }
- if (ret2 == 0xFFFFFFFF && GetLastError() != NO_ERROR) {
+ strcpy(info->name, name);
-#ifndef UNIV_HOTBACKUP
- os_mutex_exit(os_file_seek_mutexes[i]);
-#endif /* !UNIV_HOTBACKUP */
+ info->size = find_data.nFileSizeHigh;
+ info->size <<= 32;
+ info->size |= find_data.nFileSizeLow;
- MONITOR_ATOMIC_DEC_LOW(MONITOR_OS_PENDING_READS, monitor);
- goto error_handling;
- }
+ if (find_data.dwFileAttributes
+ & FILE_ATTRIBUTE_REPARSE_POINT) {
- ret = ReadFile(file, buf, (DWORD) n, &len, NULL);
+ /* TODO: test Windows symlinks */
+ /* TODO: MySQL has apparently its own symlink
+ implementation in Windows, dbname.sym can
+ redirect a database directory:
+ REFMAN "windows-symbolic-links.html" */
-#ifndef UNIV_HOTBACKUP
- os_mutex_exit(os_file_seek_mutexes[i]);
-#endif /* !UNIV_HOTBACKUP */
+ info->type = OS_FILE_TYPE_LINK;
- MONITOR_ATOMIC_DEC_LOW(MONITOR_OS_PENDING_READS, monitor);
+ } else if (find_data.dwFileAttributes
+ & FILE_ATTRIBUTE_DIRECTORY) {
- if (!ret) {
- } else if (len == n) {
- return(TRUE);
- } else {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Tried to read " ULINTPF " bytes at offset "
- UINT64PF ". Was only able to read %lu.",
- n, offset, ret);
- return FALSE;
- }
-#else /* __WIN__ */
- ibool retry;
- ssize_t ret;
+ info->type = OS_FILE_TYPE_DIR;
- os_bytes_read_since_printout += n;
+ } else {
-try_again:
- ret = os_file_pread(file, buf, n, offset);
-
- if ((ulint) ret == n) {
- return(TRUE);
- } else if (ret == -1) {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Error in system call pread(). The operating"
- " system error number is %lu.",(ulint) errno);
- } else {
- /* Partial read occurred */
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Tried to read " ULINTPF " bytes at offset "
- UINT64PF ". Was only able to read %ld.",
- n, offset, (lint) ret);
- return FALSE;
- }
-#endif /* __WIN__ */
-#ifdef __WIN__
-error_handling:
-#endif
- retry = os_file_handle_error(NULL, "read", __FILE__, __LINE__);
+ /* It is probably safest to assume that all other
+ file types are normal. Better to check them rather
+ than blindly skip them. */
- if (retry) {
- goto try_again;
+ info->type = OS_FILE_TYPE_FILE;
+ }
+
+ status = 0;
+
+ } else if (GetLastError() == ERROR_NO_MORE_FILES) {
+
+ status = 1;
+
+ } else {
+
+ os_file_handle_error_no_exit(NULL, "readdir_next_file", false);
+
+ status = -1;
}
- fprintf(stderr,
- "InnoDB: Fatal error: cannot read from file."
- " OS error number %lu.\n",
-#ifdef __WIN__
- (ulong) GetLastError()
-#else
- (ulong) errno
-#endif /* __WIN__ */
- );
- fflush(stderr);
+ return(status);
+}
- ut_error;
+/** Check that IO of specific size is possible for the file
+opened with FILE_FLAG_NO_BUFFERING.
- return(FALSE);
+The requirement is that IO is multiple of the disk sector size.
+
+@param[in] file file handle
+@param[in] io_size expected io size
+@return true - unbuffered io of requested size is possible, false otherwise.
+
+@note: this function only works correctly with Windows 8 or later,
+(GetFileInformationByHandleEx with FileStorageInfo is only supported there).
+It will return true on earlier Windows version.
+ */
+static bool unbuffered_io_possible(HANDLE file, size_t io_size)
+{
+ FILE_STORAGE_INFO info;
+ if (GetFileInformationByHandleEx(
+ file, FileStorageInfo, &info, sizeof(info))) {
+ ULONG sector_size = info.LogicalBytesPerSector;
+ if (sector_size)
+ return io_size % sector_size == 0;
+ }
+ return true;
}
-/*******************************************************************//**
-NOTE! Use the corresponding macro os_file_read_no_error_handling(),
-not directly this function!
-Requests a synchronous positioned read operation. This function does not do
-any error handling. In case of error it returns FALSE.
-@return TRUE if request was successful, FALSE if fail */
-UNIV_INTERN
-ibool
-os_file_read_no_error_handling_func(
-/*================================*/
- os_file_t file, /*!< in: handle to a file */
- void* buf, /*!< in: buffer where to read */
- os_offset_t offset, /*!< in: file offset where to read */
- ulint n) /*!< in: number of bytes to read */
+
+/** NOTE! Use the corresponding macro os_file_create(), not directly
+this function!
+Opens an existing file or creates a new.
+@param[in] name name of the file or path as a null-terminated
+ string
+@param[in] create_mode create mode
+@param[in] purpose OS_FILE_AIO, if asynchronous, non-buffered I/O
+ is desired, OS_FILE_NORMAL, if any normal file;
+ NOTE that it also depends on type, os_aio_..
+ and srv_.. variables whether we really use async
+ I/O or unbuffered I/O: look in the function
+ source code for the exact rules
+@param[in] type OS_DATA_FILE or OS_LOG_FILE
+@param[in] success true if succeeded
+@return handle to the file, not defined if error, error number
+ can be retrieved with os_file_get_last_error */
+pfs_os_file_t
+os_file_create_func(
+ const char* name,
+ ulint create_mode,
+ ulint purpose,
+ ulint type,
+ bool read_only,
+ bool* success)
{
-#ifdef __WIN__
- BOOL ret;
- DWORD len;
- DWORD ret2;
- DWORD low;
- DWORD high;
- ibool retry;
-#ifndef UNIV_HOTBACKUP
- ulint i;
-#endif /* !UNIV_HOTBACKUP */
+ os_file_t file;
+ bool retry;
+ bool on_error_no_exit;
+ bool on_error_silent;
- /* On 64-bit Windows, ulint is 64 bits. But offset and n should be
- no more than 32 bits. */
- ut_a((n & 0xFFFFFFFFUL) == n);
+ *success = false;
- os_n_file_reads++;
- os_bytes_read_since_printout += n;
- const bool monitor = MONITOR_IS_ON(MONITOR_OS_PENDING_READS);
+ DBUG_EXECUTE_IF(
+ "ib_create_table_fail_disk_full",
+ *success = false;
+ SetLastError(ERROR_DISK_FULL);
+ return(OS_FILE_CLOSED);
+ );
-try_again:
- ut_ad(buf);
- ut_ad(n > 0);
+ DWORD create_flag;
+ DWORD share_mode = srv_operation != SRV_OPERATION_NORMAL
+ ? FILE_SHARE_READ | FILE_SHARE_WRITE | FILE_SHARE_DELETE
+ : FILE_SHARE_READ | FILE_SHARE_DELETE;
- low = (DWORD) offset & 0xFFFFFFFF;
- high = (DWORD) (offset >> 32);
+ if (create_mode != OS_FILE_OPEN && create_mode != OS_FILE_OPEN_RAW) {
+ WAIT_ALLOW_WRITES();
+ }
- MONITOR_ATOMIC_INC_LOW(MONITOR_OS_PENDING_READS, monitor);
+ on_error_no_exit = create_mode & OS_FILE_ON_ERROR_NO_EXIT
+ ? true : false;
+
+ on_error_silent = create_mode & OS_FILE_ON_ERROR_SILENT
+ ? true : false;
+
+ create_mode &= ~(OS_FILE_ON_ERROR_NO_EXIT | OS_FILE_ON_ERROR_SILENT);
-#ifndef UNIV_HOTBACKUP
- /* Protect the seek / read operation with a mutex */
- i = ((ulint) file) % OS_FILE_N_SEEK_MUTEXES;
+ if (create_mode == OS_FILE_OPEN_RAW) {
- os_mutex_enter(os_file_seek_mutexes[i]);
-#endif /* !UNIV_HOTBACKUP */
+ ut_a(!read_only);
- ret2 = SetFilePointer(
- file, low, reinterpret_cast<PLONG>(&high), FILE_BEGIN);
+ create_flag = OPEN_EXISTING;
- if (ret2 == 0xFFFFFFFF && GetLastError() != NO_ERROR) {
+ /* On Windows Physical devices require admin privileges and
+ have to have the write-share mode set. See the remarks
+ section for the CreateFile() function documentation in MSDN. */
-#ifndef UNIV_HOTBACKUP
- os_mutex_exit(os_file_seek_mutexes[i]);
-#endif /* !UNIV_HOTBACKUP */
+ share_mode |= FILE_SHARE_WRITE;
- MONITOR_ATOMIC_DEC_LOW(MONITOR_OS_PENDING_READS, monitor);
- goto error_handling;
+ } else if (create_mode == OS_FILE_OPEN
+ || create_mode == OS_FILE_OPEN_RETRY) {
+
+ create_flag = OPEN_EXISTING;
+
+ } else if (read_only) {
+
+ create_flag = OPEN_EXISTING;
+
+ } else if (create_mode == OS_FILE_CREATE) {
+
+ create_flag = CREATE_NEW;
+
+ } else if (create_mode == OS_FILE_OVERWRITE) {
+
+ create_flag = CREATE_ALWAYS;
+
+ } else {
+ ib::error()
+ << "Unknown file create mode (" << create_mode << ") "
+ << " for file '" << name << "'";
+
+ return(OS_FILE_CLOSED);
}
- ret = ReadFile(file, buf, (DWORD) n, &len, NULL);
+ DWORD attributes = 0;
-#ifndef UNIV_HOTBACKUP
- os_mutex_exit(os_file_seek_mutexes[i]);
-#endif /* !UNIV_HOTBACKUP */
+ if (purpose == OS_FILE_AIO) {
- MONITOR_ATOMIC_DEC_LOW(MONITOR_OS_PENDING_READS, monitor);
+#ifdef WIN_ASYNC_IO
+ /* If specified, use asynchronous (overlapped) io and no
+ buffering of writes in the OS */
+
+ if (srv_use_native_aio) {
+ attributes |= FILE_FLAG_OVERLAPPED;
+ }
+#endif /* WIN_ASYNC_IO */
+
+ } else if (purpose == OS_FILE_NORMAL) {
+
+ /* Use default setting. */
- if (!ret) {
- } else if (len == n) {
- return(TRUE);
} else {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Tried to read " ULINTPF " bytes at offset "
- UINT64PF ". Was only able to read %lu.",
- n, offset, len);
- return FALSE;
+
+ ib::error()
+ << "Unknown purpose flag (" << purpose << ") "
+ << "while opening file '" << name << "'";
+
+ return(OS_FILE_CLOSED);
}
-#else /* __WIN__ */
- ibool retry;
- ssize_t ret;
- os_bytes_read_since_printout += n;
+ if (type == OS_LOG_FILE) {
+ /* There is not reason to use buffered write to logs.*/
+ attributes |= FILE_FLAG_NO_BUFFERING;
+ }
-try_again:
- ret = os_file_pread(file, buf, n, offset);
-
- if ((ulint) ret == n) {
- return(TRUE);
- } else if (ret == -1) {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Error in system call pread(). The operating"
- " system error number is %lu.",(ulint) errno);
- } else {
- /* Partial read occurred */
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Tried to read " ULINTPF " bytes at offset "
- UINT64PF ". Was only able to read %ld.",
- n, offset, (lint) ret);
- return FALSE;
- }
-#endif /* __WIN__ */
-#ifdef __WIN__
-error_handling:
-#endif
- retry = os_file_handle_error_no_exit(NULL, "read", FALSE, __FILE__, __LINE__);
+ switch (srv_file_flush_method)
+ {
+ case SRV_O_DSYNC:
+ if (type == OS_LOG_FILE) {
+ /* Map O_SYNC to FILE_WRITE_THROUGH */
+ attributes |= FILE_FLAG_WRITE_THROUGH;
+ }
+ break;
- if (retry) {
- goto try_again;
+ case SRV_O_DIRECT_NO_FSYNC:
+ case SRV_O_DIRECT:
+ if (type == OS_DATA_FILE) {
+ attributes |= FILE_FLAG_NO_BUFFERING;
+ }
+ break;
+
+ case SRV_ALL_O_DIRECT_FSYNC:
+ /*Traditional Windows behavior, no buffering for any files.*/
+ attributes |= FILE_FLAG_NO_BUFFERING;
+ break;
+
+ case SRV_FSYNC:
+ case SRV_LITTLESYNC:
+ break;
+
+ case SRV_NOSYNC:
+ /* Let Windows cache manager handle all writes.*/
+ attributes &= ~(FILE_FLAG_WRITE_THROUGH | FILE_FLAG_NO_BUFFERING);
+ break;
+
+ default:
+ ut_a(false); /* unknown flush mode.*/
}
- return(FALSE);
-}
-/*******************************************************************//**
-Rewind file to its start, read at most size - 1 bytes from it to str, and
-NUL-terminate str. All errors are silently ignored. This function is
-mostly meant to be used with temporary files. */
-UNIV_INTERN
-void
-os_file_read_string(
-/*================*/
- FILE* file, /*!< in: file to read from */
- char* str, /*!< in: buffer where to read */
- ulint size) /*!< in: size of buffer */
-{
- size_t flen;
+ // TODO: Create a bug, this looks wrong. The flush log
+ // parameter is dynamic.
+ if (type == OS_LOG_FILE && srv_flush_log_at_trx_commit == 2) {
+ /* Do not use unbuffered i/o for the log files because
+ value 2 denotes that we do not flush the log at every
+ commit, but only once per second */
+ attributes &= ~(FILE_FLAG_WRITE_THROUGH | FILE_FLAG_NO_BUFFERING);
+ }
- if (size == 0) {
- return;
+
+ DWORD access = GENERIC_READ;
+
+ if (!read_only) {
+ access |= GENERIC_WRITE;
+ }
+
+ for (;;) {
+ const char *operation;
+
+ /* Use default security attributes and no template file. */
+ file = CreateFile(
+ name, access, share_mode, NULL,
+ create_flag, attributes, NULL);
+
+ /* If FILE_FLAG_NO_BUFFERING was set, check if this can work at all,
+ for expected IO sizes. Reopen without the unbuffered flag, if it is won't work*/
+ if ((file != INVALID_HANDLE_VALUE)
+ && (attributes & FILE_FLAG_NO_BUFFERING)
+ && (type == OS_LOG_FILE)
+ && !unbuffered_io_possible(file, OS_FILE_LOG_BLOCK_SIZE)) {
+ ut_a(CloseHandle(file));
+ attributes &= ~FILE_FLAG_NO_BUFFERING;
+ create_flag = OPEN_ALWAYS;
+ continue;
+ }
+
+ *success = (file != INVALID_HANDLE_VALUE);
+ if (*success) {
+ break;
+ }
+
+ operation = (create_mode == OS_FILE_CREATE && !read_only) ?
+ "create" : "open";
+
+ if (on_error_no_exit) {
+ retry = os_file_handle_error_no_exit(
+ name, operation, on_error_silent);
+ }
+ else {
+ retry = os_file_handle_error(name, operation);
+ }
+
+ if (!retry) {
+ break;
+ }
+ }
+
+ if (*success && srv_use_native_aio && (attributes & FILE_FLAG_OVERLAPPED)) {
+ /* Bind the file handle to completion port. Completion port
+ might not be created yet, in some stages of backup, but
+ must always be there for the server.*/
+ HANDLE port = (type == OS_LOG_FILE) ?
+ log_completion_port : data_completion_port;
+ ut_a(port || srv_operation != SRV_OPERATION_NORMAL);
+ if (port) {
+ ut_a(CreateIoCompletionPort(file, port, 0, 0));
+ }
}
- rewind(file);
- flen = fread(str, 1, size - 1, file);
- str[flen] = '\0';
+ return(file);
}
-/*******************************************************************//**
-NOTE! Use the corresponding macro os_file_write(), not directly
-this function!
-Requests a synchronous write operation.
-@return TRUE if request was successful, FALSE if fail */
-UNIV_INTERN
-ibool
-os_file_write_func(
-/*===============*/
- const char* name, /*!< in: name of the file or path as a
- null-terminated string */
- os_file_t file, /*!< in: handle to a file */
- const void* buf, /*!< in: buffer from which to write */
- os_offset_t offset, /*!< in: file offset where to write */
- ulint n) /*!< in: number of bytes to write */
+/** NOTE! Use the corresponding macro os_file_create_simple_no_error_handling(),
+not directly this function!
+A simple function to open or create a file.
+@param[in] name name of the file or path as a null-terminated
+ string
+@param[in] create_mode create mode
+@param[in] access_type OS_FILE_READ_ONLY, OS_FILE_READ_WRITE, or
+ OS_FILE_READ_ALLOW_DELETE; the last option is
+ used by a backup program reading the file
+@param[out] success true if succeeded
+@return own: handle to the file, not defined if error, error number
+ can be retrieved with os_file_get_last_error */
+pfs_os_file_t
+os_file_create_simple_no_error_handling_func(
+ const char* name,
+ ulint create_mode,
+ ulint access_type,
+ bool read_only,
+ bool* success)
{
- ut_ad(!srv_read_only_mode);
-#ifdef __WIN__
- BOOL ret;
- DWORD len;
- DWORD ret2;
- DWORD low;
- DWORD high;
- ulint n_retries = 0;
- ulint err;
- DWORD saved_error = 0;
-#ifndef UNIV_HOTBACKUP
- ulint i;
-#endif /* !UNIV_HOTBACKUP */
+ os_file_t file;
- /* On 64-bit Windows, ulint is 64 bits. But offset and n should be
- no more than 32 bits. */
- ut_a((n & 0xFFFFFFFFUL) == n);
+ *success = false;
- os_n_file_writes++;
+ DWORD access;
+ DWORD create_flag;
+ DWORD attributes = 0;
+ DWORD share_mode = srv_operation != SRV_OPERATION_NORMAL
+ ? FILE_SHARE_READ | FILE_SHARE_WRITE | FILE_SHARE_DELETE
+ : FILE_SHARE_READ | FILE_SHARE_DELETE;
- ut_ad(buf);
- ut_ad(n > 0);
- const bool monitor = MONITOR_IS_ON(MONITOR_OS_PENDING_WRITES);
-retry:
- low = (DWORD) offset & 0xFFFFFFFF;
- high = (DWORD) (offset >> 32);
+ ut_a(name);
- MONITOR_ATOMIC_INC_LOW(MONITOR_OS_PENDING_WRITES, monitor);
+ ut_a(!(create_mode & OS_FILE_ON_ERROR_SILENT));
+ ut_a(!(create_mode & OS_FILE_ON_ERROR_NO_EXIT));
-#ifndef UNIV_HOTBACKUP
- /* Protect the seek / write operation with a mutex */
- i = ((ulint) file) % OS_FILE_N_SEEK_MUTEXES;
+ if (create_mode == OS_FILE_OPEN) {
- os_mutex_enter(os_file_seek_mutexes[i]);
-#endif /* !UNIV_HOTBACKUP */
+ create_flag = OPEN_EXISTING;
- ret2 = SetFilePointer(
- file, low, reinterpret_cast<PLONG>(&high), FILE_BEGIN);
+ } else if (read_only) {
- if (ret2 == 0xFFFFFFFF && GetLastError() != NO_ERROR) {
+ create_flag = OPEN_EXISTING;
-#ifndef UNIV_HOTBACKUP
- os_mutex_exit(os_file_seek_mutexes[i]);
-#endif /* !UNIV_HOTBACKUP */
+ } else if (create_mode == OS_FILE_CREATE) {
- MONITOR_ATOMIC_DEC_LOW(MONITOR_OS_PENDING_WRITES, monitor);
+ create_flag = CREATE_NEW;
- ut_print_timestamp(stderr);
+ } else {
- fprintf(stderr,
- " InnoDB: Error: File pointer positioning to"
- " file %s failed at\n"
- "InnoDB: offset %llu. Operating system"
- " error number %lu.\n"
- "InnoDB: Some operating system error numbers"
- " are described at\n"
- "InnoDB: "
- REFMAN "operating-system-error-codes.html\n",
- name, offset, (ulong) GetLastError());
+ ib::error()
+ << "Unknown file create mode (" << create_mode << ") "
+ << " for file '" << name << "'";
- return(FALSE);
+ return(OS_FILE_CLOSED);
}
- ret = WriteFile(file, buf, (DWORD) n, &len, NULL);
+ if (access_type == OS_FILE_READ_ONLY) {
+
+ access = GENERIC_READ;
-#ifndef UNIV_HOTBACKUP
- os_mutex_exit(os_file_seek_mutexes[i]);
-#endif /* !UNIV_HOTBACKUP */
+ } else if (read_only) {
- MONITOR_ATOMIC_DEC_LOW(MONITOR_OS_PENDING_WRITES, monitor);
+ access = GENERIC_READ;
+
+ } else if (access_type == OS_FILE_READ_WRITE) {
+
+ access = GENERIC_READ | GENERIC_WRITE;
+
+ } else if (access_type == OS_FILE_READ_ALLOW_DELETE) {
+
+ ut_a(!read_only);
- if (ret && len == n) {
+ access = GENERIC_READ;
+
+ /*!< A backup program has to give mysqld the maximum
+ freedom to do what it likes with the file */
- return(TRUE);
+ share_mode |= FILE_SHARE_DELETE | FILE_SHARE_WRITE
+ | FILE_SHARE_READ;
+ } else {
+
+ ib::error()
+ << "Unknown file access type (" << access_type << ") "
+ << "for file '" << name << "'";
+
+ return(OS_FILE_CLOSED);
}
- /* If some background file system backup tool is running, then, at
- least in Windows 2000, we may get here a specific error. Let us
- retry the operation 100 times, with 1 second waits. */
+ file = CreateFile((LPCTSTR) name,
+ access,
+ share_mode,
+ NULL, // Security attributes
+ create_flag,
+ attributes,
+ NULL); // No template file
- if (GetLastError() == ERROR_LOCK_VIOLATION && n_retries < 100) {
+ *success = (file != INVALID_HANDLE_VALUE);
- os_thread_sleep(1000000);
+ return(file);
+}
- n_retries++;
+/** Deletes a file if it exists. The file has to be closed before calling this.
+@param[in] name file path as a null-terminated string
+@param[out] exist indicate if file pre-exist
+@return true if success */
+bool
+os_file_delete_if_exists_func(
+ const char* name,
+ bool* exist)
+{
+ ulint count = 0;
- goto retry;
+ if (exist != NULL) {
+ *exist = true;
}
- if (!os_has_said_disk_full) {
- char *winmsg = NULL;
+ for (;;) {
+ /* In Windows, deleting an .ibd file may fail if
+ the file is being accessed by an external program,
+ such as a backup tool. */
- saved_error = GetLastError();
- err = (ulint) saved_error;
+ bool ret = DeleteFile((LPCTSTR) name);
- ut_print_timestamp(stderr);
+ if (ret) {
+ return(true);
+ }
- fprintf(stderr,
- " InnoDB: Error: Write to file %s failed"
- " at offset %llu.\n"
- "InnoDB: %lu bytes should have been written,"
- " only %lu were written.\n"
- "InnoDB: Operating system error number %lu.\n"
- "InnoDB: Check that your OS and file system"
- " support files of this size.\n"
- "InnoDB: Check also that the disk is not full"
- " or a disk quota exceeded.\n",
- name, offset,
- (ulong) n, (ulong) len, (ulong) err);
+ DWORD lasterr = GetLastError();
- /* Ask Windows to prepare a standard message for a
- GetLastError() */
+ if (lasterr == ERROR_FILE_NOT_FOUND
+ || lasterr == ERROR_PATH_NOT_FOUND) {
- FormatMessage(FORMAT_MESSAGE_ALLOCATE_BUFFER |
- FORMAT_MESSAGE_FROM_SYSTEM |
- FORMAT_MESSAGE_IGNORE_INSERTS,
- NULL, saved_error,
- MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT),
- (LPSTR)&winmsg, 0, NULL);
+ /* the file does not exist, this not an error */
+ if (exist != NULL) {
+ *exist = false;
+ }
- if (winmsg) {
- fprintf(stderr,
- "InnoDB: FormatMessage: Error number %lu means '%s'.\n",
- (ulong) saved_error, winmsg);
- LocalFree(winmsg);
+ return(true);
}
- if (strerror((int) err) != NULL) {
- fprintf(stderr,
- "InnoDB: Error number %lu means '%s'.\n",
- (ulong) err, strerror((int) err));
+ ++count;
+
+ if (count > 100 && 0 == (count % 10)) {
+
+ /* Print error information */
+ os_file_get_last_error(true);
+
+ ib::warn() << "Delete of file '" << name << "' failed.";
}
- fprintf(stderr,
- "InnoDB: Some operating system error numbers"
- " are described at\n"
- "InnoDB: "
- REFMAN "operating-system-error-codes.html\n");
+ /* Sleep for a second */
+ os_thread_sleep(1000000);
+
+ if (count > 2000) {
- os_has_said_disk_full = TRUE;
+ return(false);
+ }
}
+}
- return(FALSE);
-#else
- ssize_t ret;
- WAIT_ALLOW_WRITES();
+/** Deletes a file. The file has to be closed before calling this.
+@param[in] name File path as NUL terminated string
+@return true if success */
+bool
+os_file_delete_func(
+ const char* name)
+{
+ ulint count = 0;
- ret = os_file_pwrite(file, buf, n, offset);
+ for (;;) {
+ /* In Windows, deleting an .ibd file may fail if
+ the file is being accessed by an external program,
+ such as a backup tool. */
- if ((ulint) ret == n) {
+ BOOL ret = DeleteFile((LPCTSTR) name);
- return(TRUE);
- }
+ if (ret) {
+ return(true);
+ }
- if (!os_has_said_disk_full) {
- ut_print_timestamp(stderr);
+ if (GetLastError() == ERROR_FILE_NOT_FOUND) {
+ /* If the file does not exist, we classify this as
+ a 'mild' error and return */
- if(ret == -1) {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Failure of system call pwrite(). Operating"
- " system error number is %lu.",
- (ulint) errno);
- } else {
- fprintf(stderr,
- " InnoDB: Error: Write to file %s failed"
- " at offset " UINT64PF ".\n"
- "InnoDB: %lu bytes should have been written,"
- " only %ld were written.\n"
- "InnoDB: Operating system error number %lu.\n"
- "InnoDB: Check that your OS and file system"
- " support files of this size.\n"
- "InnoDB: Check also that the disk is not full"
- " or a disk quota exceeded.\n",
- name, offset, n, (lint) ret,
- (ulint) errno);
+ return(false);
}
- if (strerror(errno) != NULL) {
- fprintf(stderr,
- "InnoDB: Error number %d means '%s'.\n",
- errno, strerror(errno));
+ ++count;
+
+ if (count > 100 && 0 == (count % 10)) {
+
+ /* print error information */
+ os_file_get_last_error(true);
+
+ ib::warn()
+ << "Cannot delete file '" << name << "'. Is "
+ << "another program accessing it?";
}
- fprintf(stderr,
- "InnoDB: Some operating system error numbers"
- " are described at\n"
- "InnoDB: "
- REFMAN "operating-system-error-codes.html\n");
+ /* sleep for a second */
+ os_thread_sleep(1000000);
+
+ if (count > 2000) {
- os_has_said_disk_full = TRUE;
+ return(false);
+ }
}
- return(FALSE);
-#endif
+ ut_error;
+ return(false);
}
-/*******************************************************************//**
-Check the existence and type of the given file.
-@return TRUE if call succeeded */
-UNIV_INTERN
-ibool
-os_file_status(
-/*===========*/
- const char* path, /*!< in: pathname of the file */
- ibool* exists, /*!< out: TRUE if file exists */
- os_file_type_t* type) /*!< out: type of the file (if it exists) */
+/** NOTE! Use the corresponding macro os_file_rename(), not directly this
+function!
+Renames a file (can also move it to another directory). It is safest that the
+file is closed before calling this function.
+@param[in] oldpath old file path as a null-terminated string
+@param[in] newpath new file path
+@return true if success */
+bool
+os_file_rename_func(
+ const char* oldpath,
+ const char* newpath)
{
-#ifdef __WIN__
- int ret;
- struct _stat64 statinfo;
+#ifdef UNIV_DEBUG
+ os_file_type_t type;
+ bool exists;
- ret = _stat64(path, &statinfo);
- if (ret && (errno == ENOENT || errno == ENOTDIR || errno == ENAMETOOLONG)) {
- /* file does not exist */
- *exists = FALSE;
- return(TRUE);
- } else if (ret) {
- /* file exists, but stat call failed */
+ /* New path must not exist. */
+ ut_ad(os_file_status(newpath, &exists, &type));
+ ut_ad(!exists);
- os_file_handle_error_no_exit(path, "stat", FALSE, __FILE__, __LINE__);
+ /* Old path must exist. */
+ ut_ad(os_file_status(oldpath, &exists, &type));
+ ut_ad(exists);
+#endif /* UNIV_DEBUG */
- return(FALSE);
+ if (MoveFile((LPCTSTR) oldpath, (LPCTSTR) newpath)) {
+ return(true);
}
- if (_S_IFDIR & statinfo.st_mode) {
- *type = OS_FILE_TYPE_DIR;
- } else if (_S_IFREG & statinfo.st_mode) {
- *type = OS_FILE_TYPE_FILE;
- } else {
- *type = OS_FILE_TYPE_UNKNOWN;
- }
+ os_file_handle_rename_error(oldpath, newpath);
+ return(false);
+}
- *exists = TRUE;
+/** NOTE! Use the corresponding macro os_file_close(), not directly
+this function!
+Closes a file handle. In case of error, error number can be retrieved with
+os_file_get_last_error.
+@param[in,own] file Handle to a file
+@return true if success */
+bool
+os_file_close_func(
+ os_file_t file)
+{
+ ut_a(file > 0);
- return(TRUE);
-#else
- int ret;
- struct stat statinfo;
+ if (CloseHandle(file)) {
+ return(true);
+ }
- ret = stat(path, &statinfo);
- if (ret && (errno == ENOENT || errno == ENOTDIR || errno == ENAMETOOLONG)) {
- /* file does not exist */
- *exists = FALSE;
- return(TRUE);
- } else if (ret) {
- /* file exists, but stat call failed */
+ os_file_handle_error(NULL, "close");
+
+ return(false);
+}
- os_file_handle_error_no_exit(path, "stat", FALSE, __FILE__, __LINE__);
+/** Gets a file size.
+@param[in] file Handle to a file
+@return file size, or (os_offset_t) -1 on failure */
+os_offset_t
+os_file_get_size(
+ os_file_t file)
+{
+ DWORD high;
+ DWORD low = GetFileSize(file, &high);
- return(FALSE);
+ if (low == 0xFFFFFFFF && GetLastError() != NO_ERROR) {
+ return((os_offset_t) -1);
}
- if (S_ISDIR(statinfo.st_mode)) {
- *type = OS_FILE_TYPE_DIR;
- } else if (S_ISLNK(statinfo.st_mode)) {
- *type = OS_FILE_TYPE_LINK;
- } else if (S_ISREG(statinfo.st_mode)) {
- *type = OS_FILE_TYPE_FILE;
+ return(os_offset_t(low | (os_offset_t(high) << 32)));
+}
+
+/** Gets a file size.
+@param[in] filename Full path to the filename to check
+@return file size if OK, else set m_total_size to ~0 and m_alloc_size to
+ errno */
+os_file_size_t
+os_file_get_size(
+ const char* filename)
+{
+ struct __stat64 s;
+ os_file_size_t file_size;
+
+ int ret = _stat64(filename, &s);
+
+ if (ret == 0) {
+
+ file_size.m_total_size = s.st_size;
+
+ DWORD low_size;
+ DWORD high_size;
+
+ low_size = GetCompressedFileSize(filename, &high_size);
+
+ if (low_size != INVALID_FILE_SIZE) {
+
+ file_size.m_alloc_size = high_size;
+ file_size.m_alloc_size <<= 32;
+ file_size.m_alloc_size |= low_size;
+
+ } else {
+ ib::error()
+ << "GetCompressedFileSize("
+ << filename << ", ..) failed.";
+
+ file_size.m_alloc_size = (os_offset_t) -1;
+ }
} else {
- *type = OS_FILE_TYPE_UNKNOWN;
+ file_size.m_total_size = ~0;
+ file_size.m_alloc_size = (os_offset_t) ret;
}
- *exists = TRUE;
-
- return(TRUE);
-#endif
+ return(file_size);
}
-/*******************************************************************//**
-This function returns information about the specified file
-@return DB_SUCCESS if all OK */
-UNIV_INTERN
+/** This function returns information about the specified file
+@param[in] path pathname of the file
+@param[out] stat_info information of a file in a directory
+@param[in,out] statinfo information of a file in a directory
+@param[in] check_rw_perm for testing whether the file can be opened
+ in RW mode
+@param[in] read_only true if the file is opened in read-only mode
+@return DB_SUCCESS if all OK */
+static
dberr_t
-os_file_get_status(
-/*===============*/
- const char* path, /*!< in: pathname of the file */
- os_file_stat_t* stat_info, /*!< information of a file in a
- directory */
- bool check_rw_perm) /*!< in: for testing whether the
- file can be opened in RW mode */
+os_file_get_status_win32(
+ const char* path,
+ os_file_stat_t* stat_info,
+ struct _stat64* statinfo,
+ bool check_rw_perm,
+ bool read_only)
{
- int ret;
-
-#ifdef __WIN__
- struct _stat64 statinfo;
+ int ret = _stat64(path, statinfo);
- ret = _stat64(path, &statinfo);
-
- if (ret && (errno == ENOENT || errno == ENOTDIR)) {
+ if (ret && (errno == ENOENT || errno == ENOTDIR
+ || errno == ENAMETOOLONG)) {
/* file does not exist */
return(DB_NOT_FOUND);
@@ -3370,17 +4748,19 @@ os_file_get_status(
} else if (ret) {
/* file exists, but stat call failed */
- os_file_handle_error_no_exit(path, "stat", FALSE, __FILE__, __LINE__);
+ os_file_handle_error_no_exit(path, "STAT", false);
return(DB_FAIL);
- } else if (_S_IFDIR & statinfo.st_mode) {
+ } else if (_S_IFDIR & statinfo->st_mode) {
+
stat_info->type = OS_FILE_TYPE_DIR;
- } else if (_S_IFREG & statinfo.st_mode) {
+
+ } else if (_S_IFREG & statinfo->st_mode) {
DWORD access = GENERIC_READ;
- if (!srv_read_only_mode) {
+ if (!read_only) {
access |= GENERIC_WRITE;
}
@@ -3394,7 +4774,8 @@ os_file_get_status(
fh = CreateFile(
(LPCTSTR) path, // File to open
access,
- 0, // No sharing
+ FILE_SHARE_READ | FILE_SHARE_WRITE
+ | FILE_SHARE_DELETE, // Full sharing
NULL, // Default security
OPEN_EXISTING, // Existing file only
FILE_ATTRIBUTE_NORMAL, // Normal file
@@ -3407,731 +4788,1236 @@ os_file_get_status(
CloseHandle(fh);
}
}
+ stat_info->block_size = 0;
+
+ /* What follows, is calculation of FS block size, which is not important
+ (it is just shown in I_S innodb tables). The error to calculate it will be ignored.*/
+ char volname[MAX_PATH];
+ BOOL result = GetVolumePathName(path, volname, MAX_PATH);
+ static bool warned_once = false;
+ if (!result) {
+ if (!warned_once) {
+ ib::warn()
+ << "os_file_get_status_win32: "
+ << "Failed to get the volume path name for: "
+ << path
+ << "- OS error number " << GetLastError();
+ warned_once = true;
+ }
+ return(DB_SUCCESS);
+ }
+
+ DWORD sectorsPerCluster;
+ DWORD bytesPerSector;
+ DWORD numberOfFreeClusters;
+ DWORD totalNumberOfClusters;
+
+ result = GetDiskFreeSpace(
+ (LPCSTR) volname,
+ &sectorsPerCluster,
+ &bytesPerSector,
+ &numberOfFreeClusters,
+ &totalNumberOfClusters);
+
+ if (!result) {
+ if (!warned_once) {
+ ib::warn()
+ << "GetDiskFreeSpace(" << volname << ",...) "
+ << "failed "
+ << "- OS error number " << GetLastError();
+ warned_once = true;
+ }
+ return(DB_SUCCESS);
+ }
+ stat_info->block_size = bytesPerSector * sectorsPerCluster;
} else {
stat_info->type = OS_FILE_TYPE_UNKNOWN;
}
-#else
- struct stat statinfo;
- ret = stat(path, &statinfo);
+ return(DB_SUCCESS);
+}
- if (ret && (errno == ENOENT || errno == ENOTDIR)) {
- /* file does not exist */
+/**
+Sets a sparse flag on Windows file.
+@param[in] file file handle
+@return true on success, false on error
+*/
+#include <versionhelpers.h>
+bool os_file_set_sparse_win32(os_file_t file, bool is_sparse)
+{
+ if (!is_sparse && !IsWindows8OrGreater()) {
+ /* Cannot unset sparse flag on older Windows.
+ Until Windows8 it is documented to produce unpredictable results,
+ if there are unallocated ranges in file.*/
+ return false;
+ }
+ DWORD temp;
+ FILE_SET_SPARSE_BUFFER sparse_buffer;
+ sparse_buffer.SetSparse = is_sparse;
+ return os_win32_device_io_control(file,
+ FSCTL_SET_SPARSE, &sparse_buffer, sizeof(sparse_buffer), 0, 0,&temp);
+}
- return(DB_NOT_FOUND);
- } else if (ret) {
- /* file exists, but stat call failed */
+/**
+Change file size on Windows.
- os_file_handle_error_no_exit(path, "stat", FALSE, __FILE__, __LINE__);
+If file is extended, the bytes between old and new EOF
+are zeros.
- return(DB_FAIL);
+If file is sparse, "virtual" block is added at the end of
+allocated area.
- }
+If file is normal, file system allocates storage.
- switch (statinfo.st_mode & S_IFMT) {
- case S_IFDIR:
- stat_info->type = OS_FILE_TYPE_DIR;
- break;
- case S_IFLNK:
- stat_info->type = OS_FILE_TYPE_LINK;
- break;
- case S_IFBLK:
- /* Handle block device as regular file. */
- case S_IFCHR:
- /* Handle character device as regular file. */
- case S_IFREG:
- stat_info->type = OS_FILE_TYPE_FILE;
- break;
- default:
- stat_info->type = OS_FILE_TYPE_UNKNOWN;
+@param[in] pathname file path
+@param[in] file file handle
+@param[in] size size to preserve in bytes
+@return true if success */
+bool
+os_file_change_size_win32(
+ const char* pathname,
+ os_file_t file,
+ os_offset_t size)
+{
+ LARGE_INTEGER length;
+
+ length.QuadPart = size;
+
+ BOOL success = SetFilePointerEx(file, length, NULL, FILE_BEGIN);
+
+ if (!success) {
+ os_file_handle_error_no_exit(
+ pathname, "SetFilePointerEx", false);
+ } else {
+ success = SetEndOfFile(file);
+ if (!success) {
+ os_file_handle_error_no_exit(
+ pathname, "SetEndOfFile", false);
+ }
}
+ return(success);
+}
+/** Truncates a file at its current position.
+@param[in] file Handle to be truncated
+@return true if success */
+bool
+os_file_set_eof(
+ FILE* file)
+{
+ HANDLE h = (HANDLE) _get_osfhandle(fileno(file));
- if (check_rw_perm && stat_info->type == OS_FILE_TYPE_FILE) {
+ return(SetEndOfFile(h));
+}
- int fh;
- int access;
+/** This function can be called if one wants to post a batch of reads and
+prefers an i/o-handler thread to handle them all at once later. You must
+call os_aio_simulated_wake_handler_threads later to ensure the threads
+are not left sleeping! */
+void
+os_aio_simulated_put_read_threads_to_sleep()
+{
+ AIO::simulated_put_read_threads_to_sleep();
+}
- access = !srv_read_only_mode ? O_RDWR : O_RDONLY;
+/** This function can be called if one wants to post a batch of reads and
+prefers an i/o-handler thread to handle them all at once later. You must
+call os_aio_simulated_wake_handler_threads later to ensure the threads
+are not left sleeping! */
+void
+AIO::simulated_put_read_threads_to_sleep()
+{
+ /* The idea of putting background IO threads to sleep is only for
+ Windows when using simulated AIO. Windows XP seems to schedule
+ background threads too eagerly to allow for coalescing during
+ readahead requests. */
- fh = ::open(path, access | O_CLOEXEC, os_innodb_umask);
+ if (srv_use_native_aio) {
+ /* We do not use simulated AIO: do nothing */
- if (fh == -1) {
- stat_info->rw_perm = false;
- } else {
- stat_info->rw_perm = true;
- close(fh);
- }
+ return;
}
-#endif /* _WIN_ */
+ os_aio_recommend_sleep_for_read_threads = true;
- stat_info->ctime = statinfo.st_ctime;
- stat_info->atime = statinfo.st_atime;
- stat_info->mtime = statinfo.st_mtime;
- stat_info->size = statinfo.st_size;
+ for (ulint i = 0; i < os_aio_n_segments; i++) {
+ AIO* array;
- return(DB_SUCCESS);
-}
+ get_array_and_local_segment(&array, i);
-/* path name separator character */
-#ifdef __WIN__
-# define OS_FILE_PATH_SEPARATOR '\\'
-#else
-# define OS_FILE_PATH_SEPARATOR '/'
-#endif
+ if (array == s_reads) {
-/****************************************************************//**
-This function returns a new path name after replacing the basename
-in an old path with a new basename. The old_path is a full path
-name including the extension. The tablename is in the normal
-form "databasename/tablename". The new base name is found after
-the forward slash. Both input strings are null terminated.
-
-This function allocates memory to be returned. It is the callers
-responsibility to free the return value after it is no longer needed.
+ os_event_reset(os_aio_segment_wait_events[i]);
+ }
+ }
+}
-@return own: new full pathname */
-UNIV_INTERN
-char*
-os_file_make_new_pathname(
-/*======================*/
- const char* old_path, /*!< in: pathname */
- const char* tablename) /*!< in: contains new base name */
+#endif /* !_WIN32*/
+
+/** Does a syncronous read or write depending upon the type specified
+In case of partial reads/writes the function tries
+NUM_RETRIES_ON_PARTIAL_IO times to read/write the complete data.
+@param[in] type, IO flags
+@param[in] file handle to an open file
+@param[out] buf buffer where to read
+@param[in] offset file offset from the start where to read
+@param[in] n number of bytes to read, starting from offset
+@param[out] err DB_SUCCESS or error code
+@return number of bytes read/written, -1 if error */
+static MY_ATTRIBUTE((warn_unused_result))
+ssize_t
+os_file_io(
+ const IORequest&in_type,
+ os_file_t file,
+ void* buf,
+ ulint n,
+ os_offset_t offset,
+ dberr_t* err)
{
- ulint dir_len;
- char* last_slash;
- char* base_name;
- char* new_path;
- ulint new_path_len;
+ ulint original_n = n;
+ IORequest type = in_type;
+ ssize_t bytes_returned = 0;
- /* Split the tablename into its database and table name components.
- They are separated by a '/'. */
- last_slash = strrchr((char*) tablename, '/');
- base_name = last_slash ? last_slash + 1 : (char*) tablename;
+ SyncFileIO sync_file_io(file, buf, n, offset);
- /* Find the offset of the last slash. We will strip off the
- old basename.ibd which starts after that slash. */
- last_slash = strrchr((char*) old_path, OS_FILE_PATH_SEPARATOR);
- dir_len = last_slash ? last_slash - old_path : strlen(old_path);
+ for (ulint i = 0; i < NUM_RETRIES_ON_PARTIAL_IO; ++i) {
- /* allocate a new path and move the old directory path to it. */
- new_path_len = dir_len + strlen(base_name) + sizeof "/.ibd";
- new_path = static_cast<char*>(mem_alloc(new_path_len));
- memcpy(new_path, old_path, dir_len);
+ ssize_t n_bytes = sync_file_io.execute(type);
- ut_snprintf(new_path + dir_len,
- new_path_len - dir_len,
- "%c%s.ibd",
- OS_FILE_PATH_SEPARATOR,
- base_name);
+ /* Check for a hard error. Not much we can do now. */
+ if (n_bytes < 0) {
- return(new_path);
-}
+ break;
-/****************************************************************//**
-This function returns a remote path name by combining a data directory
-path provided in a DATA DIRECTORY clause with the tablename which is
-in the form 'database/tablename'. It strips the file basename (which
-is the tablename) found after the last directory in the path provided.
-The full filepath created will include the database name as a directory
-under the path provided. The filename is the tablename with the '.ibd'
-extension. All input and output strings are null-terminated.
+ } else if ((ulint) n_bytes + bytes_returned == n) {
-This function allocates memory to be returned. It is the callers
-responsibility to free the return value after it is no longer needed.
+ bytes_returned += n_bytes;
-@return own: A full pathname; data_dir_path/databasename/tablename.ibd */
-UNIV_INTERN
-char*
-os_file_make_remote_pathname(
-/*=========================*/
- const char* data_dir_path, /*!< in: pathname */
- const char* tablename, /*!< in: tablename */
- const char* extention) /*!< in: file extention; ibd,cfg */
-{
- ulint data_dir_len;
- char* last_slash;
- char* new_path;
- ulint new_path_len;
+ if (offset > 0
+ && !type.is_log()
+ && type.is_write()
+ && type.punch_hole()) {
+ *err = type.punch_hole(file, offset, n);
- ut_ad(extention && strlen(extention) == 3);
+ } else {
+ *err = DB_SUCCESS;
+ }
- /* Find the offset of the last slash. We will strip off the
- old basename or tablename which starts after that slash. */
- last_slash = strrchr((char*) data_dir_path, OS_FILE_PATH_SEPARATOR);
- data_dir_len = last_slash ? last_slash - data_dir_path : strlen(data_dir_path);
+ return(original_n);
+ }
- /* allocate a new path and move the old directory path to it. */
- new_path_len = data_dir_len + strlen(tablename)
- + sizeof "/." + strlen(extention);
- new_path = static_cast<char*>(mem_alloc(new_path_len));
- memcpy(new_path, data_dir_path, data_dir_len);
- ut_snprintf(new_path + data_dir_len,
- new_path_len - data_dir_len,
- "%c%s.%s",
- OS_FILE_PATH_SEPARATOR,
- tablename,
- extention);
-
- srv_normalize_path_for_win(new_path);
+ /* Handle partial read/write. */
- return(new_path);
-}
+ ut_ad((ulint) n_bytes + bytes_returned < n);
-/****************************************************************//**
-This function reduces a null-terminated full remote path name into
-the path that is sent by MySQL for DATA DIRECTORY clause. It replaces
-the 'databasename/tablename.ibd' found at the end of the path with just
-'tablename'.
+ bytes_returned += (ulint) n_bytes;
-Since the result is always smaller than the path sent in, no new memory
-is allocated. The caller should allocate memory for the path sent in.
-This function manipulates that path in place.
+ if (!type.is_partial_io_warning_disabled()) {
-If the path format is not as expected, just return. The result is used
-to inform a SHOW CREATE TABLE command. */
-UNIV_INTERN
-void
-os_file_make_data_dir_path(
-/*========================*/
- char* data_dir_path) /*!< in/out: full path/data_dir_path */
-{
- char* ptr;
- char* tablename;
- ulint tablename_len;
+ const char* op = type.is_read()
+ ? "read" : "written";
- /* Replace the period before the extension with a null byte. */
- ptr = strrchr((char*) data_dir_path, '.');
- if (!ptr) {
- return;
- }
- ptr[0] = '\0';
+ ib::warn()
+ << n
+ << " bytes should have been " << op << ". Only "
+ << bytes_returned
+ << " bytes " << op << ". Retrying"
+ << " for the remaining bytes.";
+ }
- /* The tablename starts after the last slash. */
- ptr = strrchr((char*) data_dir_path, OS_FILE_PATH_SEPARATOR);
- if (!ptr) {
- return;
+ /* Advance the offset and buffer by n_bytes */
+ sync_file_io.advance(n_bytes);
}
- ptr[0] = '\0';
- tablename = ptr + 1;
- /* The databasename starts after the next to last slash. */
- ptr = strrchr((char*) data_dir_path, OS_FILE_PATH_SEPARATOR);
- if (!ptr) {
- return;
+ *err = DB_IO_ERROR;
+
+ if (!type.is_partial_io_warning_disabled()) {
+ ib::warn()
+ << "Retry attempts for "
+ << (type.is_read() ? "reading" : "writing")
+ << " partial data failed.";
}
- tablename_len = ut_strlen(tablename);
- ut_memmove(++ptr, tablename, tablename_len);
+ return(bytes_returned);
+}
- ptr[tablename_len] = '\0';
+/** Does a synchronous write operation in Posix.
+@param[in] type IO context
+@param[in] file handle to an open file
+@param[out] buf buffer from which to write
+@param[in] n number of bytes to read, starting from offset
+@param[in] offset file offset from the start where to read
+@param[out] err DB_SUCCESS or error code
+@return number of bytes written, -1 if error */
+static MY_ATTRIBUTE((warn_unused_result))
+ssize_t
+os_file_pwrite(
+ const IORequest& type,
+ os_file_t file,
+ const byte* buf,
+ ulint n,
+ os_offset_t offset,
+ dberr_t* err)
+{
+ ut_ad(type.validate());
+ ut_ad(type.is_write());
+
+ ++os_n_file_writes;
+
+ const bool monitor = MONITOR_IS_ON(MONITOR_OS_PENDING_WRITES);
+ MONITOR_ATOMIC_INC_LOW(MONITOR_OS_PENDING_WRITES, monitor);
+ ssize_t n_bytes = os_file_io(type, file, const_cast<byte*>(buf),
+ n, offset, err);
+ MONITOR_ATOMIC_DEC_LOW(MONITOR_OS_PENDING_WRITES, monitor);
+
+ return(n_bytes);
}
-/****************************************************************//**
-The function os_file_dirname returns a directory component of a
-null-terminated pathname string. In the usual case, dirname returns
-the string up to, but not including, the final '/', and basename
-is the component following the final '/'. Trailing '/' characters
-are not counted as part of the pathname.
+/** NOTE! Use the corresponding macro os_file_write(), not directly
+Requests a synchronous write operation.
+@param[in] type IO flags
+@param[in] file handle to an open file
+@param[out] buf buffer from which to write
+@param[in] offset file offset from the start where to read
+@param[in] n number of bytes to read, starting from offset
+@return error code
+@retval DB_SUCCESS if the operation succeeded */
+dberr_t
+os_file_write_func(
+ const IORequest& type,
+ const char* name,
+ os_file_t file,
+ const void* buf,
+ os_offset_t offset,
+ ulint n)
+{
+ dberr_t err;
-If path does not contain a slash, dirname returns the string ".".
+ ut_ad(type.validate());
+ ut_ad(n > 0);
-Concatenating the string returned by dirname, a "/", and the basename
-yields a complete pathname.
+ WAIT_ALLOW_WRITES();
-The return value is a copy of the directory component of the pathname.
-The copy is allocated from heap. It is the caller responsibility
-to free it after it is no longer needed.
+ ssize_t n_bytes = os_file_pwrite(type, file, (byte*)buf, n, offset, &err);
-The following list of examples (taken from SUSv2) shows the strings
-returned by dirname and basename for different paths:
+ if ((ulint) n_bytes != n && !os_has_said_disk_full) {
- path dirname basename
- "/usr/lib" "/usr" "lib"
- "/usr/" "/" "usr"
- "usr" "." "usr"
- "/" "/" "/"
- "." "." "."
- ".." "." ".."
+ ib::error()
+ << "Write to file " << name << " failed at offset "
+ << offset << ", " << n
+ << " bytes should have been written,"
+ " only " << n_bytes << " were written."
+ " Operating system error number " << IF_WIN(GetLastError(),errno) << "."
+ " Check that your OS and file system"
+ " support files of this size."
+ " Check also that the disk is not full"
+ " or a disk quota exceeded.";
+#ifndef _WIN32
+ if (strerror(errno) != NULL) {
-@return own: directory component of the pathname */
-UNIV_INTERN
-char*
-os_file_dirname(
-/*============*/
- const char* path) /*!< in: pathname */
-{
- /* Find the offset of the last slash */
- const char* last_slash = strrchr(path, OS_FILE_PATH_SEPARATOR);
- if (!last_slash) {
- /* No slash in the path, return "." */
+ ib::error()
+ << "Error number " << errno
+ << " means '" << strerror(errno) << "'";
+ }
- return(mem_strdup("."));
+ ib::info() << OPERATING_SYSTEM_ERROR_MSG;
+#endif
+ os_has_said_disk_full = true;
}
- /* Ok, there is a slash */
+ return(err);
+}
- if (last_slash == path) {
- /* last slash is the first char of the path */
+/** Does a synchronous read operation in Posix.
+@param[in] type IO flags
+@param[in] file handle to an open file
+@param[out] buf buffer where to read
+@param[in] offset file offset from the start where to read
+@param[in] n number of bytes to read, starting from offset
+@param[out] err DB_SUCCESS or error code
+@return number of bytes read, -1 if error */
+static MY_ATTRIBUTE((warn_unused_result))
+ssize_t
+os_file_pread(
+ const IORequest& type,
+ os_file_t file,
+ void* buf,
+ ulint n,
+ os_offset_t offset,
+ dberr_t* err)
+{
+ ut_ad(type.is_read());
- return(mem_strdup("/"));
- }
+ ++os_n_file_reads;
- /* Non-trivial directory component */
+ const bool monitor = MONITOR_IS_ON(MONITOR_OS_PENDING_READS);
+ MONITOR_ATOMIC_INC_LOW(MONITOR_OS_PENDING_READS, monitor);
+ ssize_t n_bytes = os_file_io(type, file, buf, n, offset, err);
+ MONITOR_ATOMIC_DEC_LOW(MONITOR_OS_PENDING_READS, monitor);
- return(mem_strdupl(path, last_slash - path));
+ return(n_bytes);
}
-/****************************************************************//**
-Creates all missing subdirectories along the given path.
-@return TRUE if call succeeded FALSE otherwise */
-UNIV_INTERN
-ibool
-os_file_create_subdirs_if_needed(
-/*=============================*/
- const char* path) /*!< in: path name */
+/** Requests a synchronous positioned read operation.
+@return DB_SUCCESS if request was successful, false if fail
+@param[in] type IO flags
+@param[in] file handle to an open file
+@param[out] buf buffer where to read
+@param[in] offset file offset from the start where to read
+@param[in] n number of bytes to read, starting from offset
+@param[out] o number of bytes actually read
+@param[in] exit_on_err if true then exit on error
+@return DB_SUCCESS or error code */
+static MY_ATTRIBUTE((warn_unused_result))
+dberr_t
+os_file_read_page(
+ const IORequest& type,
+ os_file_t file,
+ void* buf,
+ os_offset_t offset,
+ ulint n,
+ ulint* o,
+ bool exit_on_err)
{
- if (srv_read_only_mode) {
+ dberr_t err;
- ib_logf(IB_LOG_LEVEL_ERROR,
- "read only mode set. Can't create subdirectories '%s'",
- path);
+ os_bytes_read_since_printout += n;
- return(FALSE);
+ ut_ad(type.validate());
+ ut_ad(n > 0);
+
+ ssize_t n_bytes = os_file_pread(type, file, buf, n, offset, &err);
+ if (o) {
+ *o = n_bytes;
}
- char* subdir = os_file_dirname(path);
+ if (ulint(n_bytes) == n || (err != DB_SUCCESS && !exit_on_err)) {
+ return err;
+ }
- if (strlen(subdir) == 1
- && (*subdir == OS_FILE_PATH_SEPARATOR || *subdir == '.')) {
- /* subdir is root or cwd, nothing to do */
- mem_free(subdir);
+ ib::error() << "Tried to read " << n << " bytes at offset "
+ << offset << ", but was only able to read " << n_bytes;
- return(TRUE);
+ if (!os_file_handle_error_cond_exit(
+ NULL, "read", exit_on_err, false)) {
+ ib::fatal()
+ << "Cannot read from file. OS error number "
+ << errno << ".";
}
- /* Test if subdir exists */
- os_file_type_t type;
- ibool subdir_exists;
- ibool success = os_file_status(subdir, &subdir_exists, &type);
+ if (err == DB_SUCCESS) {
+ err = DB_IO_ERROR;
+ }
- if (success && !subdir_exists) {
+ return err;
+}
+
+/** Retrieves the last error number if an error occurs in a file io function.
+The number should be retrieved before any other OS calls (because they may
+overwrite the error number). If the number is not known to this program,
+the OS error number + 100 is returned.
+@param[in] report_all_errors true if we want an error printed
+ for all errors
+@return error number, or OS error number + 100 */
+ulint
+os_file_get_last_error(
+ bool report_all_errors)
+{
+ return(os_file_get_last_error_low(report_all_errors, false));
+}
- /* subdir does not exist, create it */
- success = os_file_create_subdirs_if_needed(subdir);
+/** Handle errors for file operations.
+@param[in] name name of a file or NULL
+@param[in] operation operation
+@param[in] should_abort whether to abort on an unknown error
+@param[in] on_error_silent whether to suppress reports of non-fatal errors
+@return true if we should retry the operation */
+static MY_ATTRIBUTE((warn_unused_result))
+bool
+os_file_handle_error_cond_exit(
+ const char* name,
+ const char* operation,
+ bool should_abort,
+ bool on_error_silent)
+{
+ ulint err;
- if (!success) {
- mem_free(subdir);
+ err = os_file_get_last_error_low(false, on_error_silent);
- return(FALSE);
+ switch (err) {
+ case OS_FILE_DISK_FULL:
+ /* We only print a warning about disk full once */
+
+ if (os_has_said_disk_full) {
+
+ return(false);
}
- success = os_file_create_directory(subdir, FALSE);
- }
+ /* Disk full error is reported irrespective of the
+ on_error_silent setting. */
- mem_free(subdir);
+ if (name) {
- return(success);
+ ib::error()
+ << "Encountered a problem with file '"
+ << name << "'";
+ }
+
+ ib::error()
+ << "Disk is full. Try to clean the disk to free space.";
+
+ os_has_said_disk_full = true;
+
+ return(false);
+
+ case OS_FILE_AIO_RESOURCES_RESERVED:
+ case OS_FILE_AIO_INTERRUPTED:
+
+ return(true);
+
+ case OS_FILE_PATH_ERROR:
+ case OS_FILE_ALREADY_EXISTS:
+ case OS_FILE_ACCESS_VIOLATION:
+
+ return(false);
+
+ case OS_FILE_SHARING_VIOLATION:
+
+ os_thread_sleep(10000000); /* 10 sec */
+ return(true);
+
+ case OS_FILE_OPERATION_ABORTED:
+ case OS_FILE_INSUFFICIENT_RESOURCE:
+
+ os_thread_sleep(100000); /* 100 ms */
+ return(true);
+
+ default:
+
+ /* If it is an operation that can crash on error then it
+ is better to ignore on_error_silent and print an error message
+ to the log. */
+
+ if (should_abort || !on_error_silent) {
+ ib::error() << "File "
+ << (name != NULL ? name : "(unknown)")
+ << ": '" << operation << "'"
+ " returned OS error " << err << "."
+ << (should_abort
+ ? " Cannot continue operation" : "");
+ }
+
+ if (should_abort) {
+ abort();
+ }
+ }
+
+ return(false);
}
-#ifndef UNIV_HOTBACKUP
-/****************************************************************//**
-Returns a pointer to the nth slot in the aio array.
-@return pointer to slot */
-static
-os_aio_slot_t*
-os_aio_array_get_nth_slot(
-/*======================*/
- os_aio_array_t* array, /*!< in: aio array */
- ulint index) /*!< in: index of the slot */
+#ifndef _WIN32
+/** Tries to disable OS caching on an opened file descriptor.
+@param[in] fd file descriptor to alter
+@param[in] file_name file name, used in the diagnostic message
+@param[in] name "open" or "create"; used in the diagnostic
+ message */
+void
+os_file_set_nocache(
+ int fd MY_ATTRIBUTE((unused)),
+ const char* file_name MY_ATTRIBUTE((unused)),
+ const char* operation_name MY_ATTRIBUTE((unused)))
{
- ut_a(index < array->n_slots);
+ /* some versions of Solaris may not have DIRECTIO_ON */
+#if defined(UNIV_SOLARIS) && defined(DIRECTIO_ON)
+ if (directio(fd, DIRECTIO_ON) == -1) {
+ int errno_save = errno;
- return(&array->slots[index]);
+ ib::error()
+ << "Failed to set DIRECTIO_ON on file "
+ << file_name << ": " << operation_name
+ << strerror(errno_save) << ","
+ " continuing anyway.";
+ }
+#elif defined(O_DIRECT)
+ if (fcntl(fd, F_SETFL, O_DIRECT) == -1) {
+ int errno_save = errno;
+ static bool warning_message_printed = false;
+ if (errno_save == EINVAL) {
+ if (!warning_message_printed) {
+ warning_message_printed = true;
+# ifdef UNIV_LINUX
+ ib::warn()
+ << "Failed to set O_DIRECT on file"
+ << file_name << ";" << operation_name
+ << ": " << strerror(errno_save) << ", "
+ << "ccontinuing anyway. O_DIRECT is "
+ "known to result in 'Invalid argument' "
+ "on Linux on tmpfs, "
+ "see MySQL Bug#26662.";
+# else /* UNIV_LINUX */
+ goto short_warning;
+# endif /* UNIV_LINUX */
+ }
+ } else {
+# ifndef UNIV_LINUX
+short_warning:
+# endif
+ ib::warn()
+ << "Failed to set O_DIRECT on file "
+ << file_name << "; " << operation_name
+ << " : " << strerror(errno_save)
+ << " continuing anyway.";
+ }
+ }
+#endif /* defined(UNIV_SOLARIS) && defined(DIRECTIO_ON) */
}
-#if defined(LINUX_NATIVE_AIO)
-/******************************************************************//**
-Creates an io_context for native linux AIO.
-@return TRUE on success. */
-static
-ibool
-os_aio_linux_create_io_ctx(
-/*=======================*/
- ulint max_events, /*!< in: number of events. */
- io_context_t* io_ctx) /*!< out: io_ctx to initialize. */
-{
- int ret;
- ulint retries = 0;
+#endif /* _WIN32 */
-retry:
- memset(io_ctx, 0x0, sizeof(*io_ctx));
+/** Extend a file.
- /* Initialize the io_ctx. Tell it how many pending
- IO requests this context will handle. */
+On Windows, extending a file allocates blocks for the file,
+unless the file is sparse.
- ret = io_setup(max_events, io_ctx);
- if (ret == 0) {
-#if defined(UNIV_AIO_DEBUG)
- fprintf(stderr,
- "InnoDB: Linux native AIO:"
- " initialized io_ctx for segment\n");
-#endif
- /* Success. Return now. */
- return(TRUE);
- }
-
- /* If we hit EAGAIN we'll make a few attempts before failing. */
-
- switch (ret) {
- case -EAGAIN:
- if (retries == 0) {
- /* First time around. */
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Warning: io_setup() failed"
- " with EAGAIN. Will make %d attempts"
- " before giving up.\n",
- OS_AIO_IO_SETUP_RETRY_ATTEMPTS);
- }
+On Unix, we will extend the file with ftruncate(), if
+file needs to be sparse. Otherwise posix_fallocate() is used
+when available, and if not, binary zeroes are added to the end
+of file.
- if (retries < OS_AIO_IO_SETUP_RETRY_ATTEMPTS) {
- ++retries;
- fprintf(stderr,
- "InnoDB: Warning: io_setup() attempt"
- " %lu failed.\n",
- retries);
- os_thread_sleep(OS_AIO_IO_SETUP_RETRY_SLEEP);
- goto retry;
- }
+@param[in] name file name
+@param[in] file file handle
+@param[in] size desired file size
+@param[in] sparse whether to create a sparse file (no preallocating)
+@return whether the operation succeeded */
+bool
+os_file_set_size(
+ const char* name,
+ os_file_t file,
+ os_offset_t size,
+ bool is_sparse)
+{
+#ifdef _WIN32
+ /* On Windows, changing file size works well and as expected for both
+ sparse and normal files.
- /* Have tried enough. Better call it a day. */
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Error: io_setup() failed"
- " with EAGAIN after %d attempts.\n",
- OS_AIO_IO_SETUP_RETRY_ATTEMPTS);
- break;
+ However, 10.2 up until 10.2.9 made every file sparse in innodb,
+ causing NTFS fragmentation issues(MDEV-13941). We try to undo
+ the damage, and unsparse the file.*/
- case -ENOSYS:
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Error: Linux Native AIO interface"
- " is not supported on this platform. Please"
- " check your OS documentation and install"
- " appropriate binary of InnoDB.\n");
+ if (!is_sparse && os_is_sparse_file_supported(file)) {
+ if (!os_file_set_sparse_win32(file, false))
+ /* Unsparsing file failed. Fallback to writing binary
+ zeros, to avoid even higher fragmentation.*/
+ goto fallback;
+ }
- break;
+ return os_file_change_size_win32(name, file, size);
+fallback:
+#else
+ if (is_sparse) {
+ bool success = !ftruncate(file, size);
+ if (!success) {
+ ib::error() << "ftruncate of file " << name << " to "
+ << size << " bytes failed with error "
+ << errno;
+ }
+ return(success);
+ }
+
+# ifdef HAVE_POSIX_FALLOCATE
+ int err;
+ do {
+ os_offset_t current_size = os_file_get_size(file);
+ err = current_size >= size
+ ? 0 : posix_fallocate(file, current_size,
+ size - current_size);
+ } while (err == EINTR
+ && srv_shutdown_state == SRV_SHUTDOWN_NONE);
+
+ switch (err) {
+ case 0:
+ return true;
default:
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Error: Linux Native AIO setup"
- " returned following error[%d]\n", -ret);
+ ib::error() << "preallocating "
+ << size << " bytes for file " << name
+ << " failed with error " << err;
+ /* fall through */
+ case EINTR:
+ errno = err;
+ return false;
+ case EINVAL:
+ /* fall back to the code below */
break;
}
+# endif /* HAVE_POSIX_ALLOCATE */
+#endif /* _WIN32*/
- fprintf(stderr,
- "InnoDB: You can disable Linux Native AIO by"
- " setting innodb_use_native_aio = 0 in my.cnf\n");
- return(FALSE);
-}
+ /* Write up to 1 megabyte at a time. */
+ ulint buf_size = ut_min(
+ static_cast<ulint>(64),
+ static_cast<ulint>(size / UNIV_PAGE_SIZE));
-/******************************************************************//**
-Checks if the system supports native linux aio. On some kernel
-versions where native aio is supported it won't work on tmpfs. In such
-cases we can't use native aio as it is not possible to mix simulated
-and native aio.
-@return: TRUE if supported, FALSE otherwise. */
-static
-ibool
-os_aio_native_aio_supported(void)
-/*=============================*/
-{
- int fd;
- io_context_t io_ctx;
- char name[1000];
+ buf_size *= UNIV_PAGE_SIZE;
- if (!os_aio_linux_create_io_ctx(1, &io_ctx)) {
- /* The platform does not support native aio. */
- return(FALSE);
- } else if (!srv_read_only_mode) {
- /* Now check if tmpdir supports native aio ops. */
- fd = innobase_mysql_tmpfile(NULL);
+ /* Align the buffer for possible raw i/o */
+ byte* buf2;
- if (fd < 0) {
- ib_logf(IB_LOG_LEVEL_WARN,
- "Unable to create temp file to check "
- "native AIO support.");
+ buf2 = static_cast<byte*>(ut_malloc_nokey(buf_size + UNIV_PAGE_SIZE));
+
+ byte* buf = static_cast<byte*>(ut_align(buf2, UNIV_PAGE_SIZE));
+
+ /* Write buffer full of zeros */
+ memset(buf, 0, buf_size);
+
+ os_offset_t current_size = os_file_get_size(file);
+
+ while (current_size < size
+ && srv_shutdown_state == SRV_SHUTDOWN_NONE) {
+ ulint n_bytes;
- return(FALSE);
+ if (size - current_size < (os_offset_t) buf_size) {
+ n_bytes = (ulint) (size - current_size);
+ } else {
+ n_bytes = buf_size;
}
- } else {
- srv_normalize_path_for_win(srv_log_group_home_dir);
+ dberr_t err;
+ IORequest request(IORequest::WRITE);
- ulint dirnamelen = strlen(srv_log_group_home_dir);
- ut_a(dirnamelen < (sizeof name) - 10 - sizeof "ib_logfile");
- memcpy(name, srv_log_group_home_dir, dirnamelen);
+ err = os_file_write(
+ request, name, file, buf, current_size, n_bytes);
- /* Add a path separator if needed. */
- if (dirnamelen && name[dirnamelen - 1] != SRV_PATH_SEPARATOR) {
- name[dirnamelen++] = SRV_PATH_SEPARATOR;
+ if (err != DB_SUCCESS) {
+ break;
}
- strcpy(name + dirnamelen, "ib_logfile0");
+ current_size += n_bytes;
+ }
- fd = ::open(name, O_RDONLY | O_CLOEXEC);
+ ut_free(buf2);
- if (fd == -1) {
+ return(current_size >= size && os_file_flush(file));
+}
- ib_logf(IB_LOG_LEVEL_WARN,
- "Unable to open \"%s\" to check "
- "native AIO read support.", name);
+/** Truncate a file to a specified size in bytes.
+@param[in] pathname file path
+@param[in] file file to be truncated
+@param[in] size size preserved in bytes
+@param[in] allow_shrink whether to allow the file to become smaller
+@return true if success */
+bool
+os_file_truncate(
+ const char* pathname,
+ os_file_t file,
+ os_offset_t size,
+ bool allow_shrink)
+{
+ if (!allow_shrink) {
+ /* Do nothing if the size preserved is larger than or
+ equal to the current size of file */
+ os_offset_t size_bytes = os_file_get_size(file);
- return(FALSE);
+ if (size >= size_bytes) {
+ return(true);
}
}
- struct io_event io_event;
+#ifdef _WIN32
+ return(os_file_change_size_win32(pathname, file, size));
+#else /* _WIN32 */
+ return(os_file_truncate_posix(pathname, file, size));
+#endif /* _WIN32 */
+}
- memset(&io_event, 0x0, sizeof(io_event));
+/** NOTE! Use the corresponding macro os_file_read(), not directly this
+function!
+Requests a synchronous positioned read operation.
+@return DB_SUCCESS if request was successful, DB_IO_ERROR on failure
+@param[in] type IO flags
+@param[in] file handle to an open file
+@param[out] buf buffer where to read
+@param[in] offset file offset from the start where to read
+@param[in] n number of bytes to read, starting from offset
+@return error code
+@retval DB_SUCCESS if the operation succeeded */
+dberr_t
+os_file_read_func(
+ const IORequest& type,
+ os_file_t file,
+ void* buf,
+ os_offset_t offset,
+ ulint n)
+{
+ return(os_file_read_page(type, file, buf, offset, n, NULL, true));
+}
- byte* buf = static_cast<byte*>(ut_malloc(UNIV_PAGE_SIZE * 2));
- byte* ptr = static_cast<byte*>(ut_align(buf, UNIV_PAGE_SIZE));
+/** NOTE! Use the corresponding macro os_file_read_no_error_handling(),
+not directly this function!
+Requests a synchronous positioned read operation.
+@return DB_SUCCESS if request was successful, DB_IO_ERROR on failure
+@param[in] type IO flags
+@param[in] file handle to an open file
+@param[out] buf buffer where to read
+@param[in] offset file offset from the start where to read
+@param[in] n number of bytes to read, starting from offset
+@param[out] o number of bytes actually read
+@return DB_SUCCESS or error code */
+dberr_t
+os_file_read_no_error_handling_func(
+ const IORequest& type,
+ os_file_t file,
+ void* buf,
+ os_offset_t offset,
+ ulint n,
+ ulint* o)
+{
+ return(os_file_read_page(type, file, buf, offset, n, o, false));
+}
- struct iocb iocb;
+/** Check the existence and type of the given file.
+@param[in] path path name of file
+@param[out] exists true if the file exists
+@param[out] type Type of the file, if it exists
+@return true if call succeeded */
+bool
+os_file_status(
+ const char* path,
+ bool* exists,
+ os_file_type_t* type)
+{
+#ifdef _WIN32
+ return(os_file_status_win32(path, exists, type));
+#else
+ return(os_file_status_posix(path, exists, type));
+#endif /* _WIN32 */
+}
- /* Suppress valgrind warning. */
- memset(buf, 0x00, UNIV_PAGE_SIZE * 2);
- memset(&iocb, 0x0, sizeof(iocb));
+/** Free storage space associated with a section of the file.
+@param[in] fh Open file handle
+@param[in] off Starting offset (SEEK_SET)
+@param[in] len Size of the hole
+@return DB_SUCCESS or error code */
+dberr_t
+os_file_punch_hole(
+ os_file_t fh,
+ os_offset_t off,
+ os_offset_t len)
+{
+ dberr_t err;
- struct iocb* p_iocb = &iocb;
+#ifdef _WIN32
+ err = os_file_punch_hole_win32(fh, off, len);
+#else
+ err = os_file_punch_hole_posix(fh, off, len);
+#endif /* _WIN32 */
- if (!srv_read_only_mode) {
- io_prep_pwrite(p_iocb, fd, ptr, UNIV_PAGE_SIZE, 0);
+ return (err);
+}
+
+/** Free storage space associated with a section of the file.
+@param[in] fh Open file handle
+@param[in] off Starting offset (SEEK_SET)
+@param[in] len Size of the hole
+@return DB_SUCCESS or error code */
+dberr_t
+IORequest::punch_hole(os_file_t fh, os_offset_t off, ulint len)
+{
+ /* In this debugging mode, we act as if punch hole is supported,
+ and then skip any calls to actually punch a hole here.
+ In this way, Transparent Page Compression is still being tested. */
+ DBUG_EXECUTE_IF("ignore_punch_hole",
+ return(DB_SUCCESS);
+ );
+
+ ulint trim_len = get_trim_length(len);
+
+ if (trim_len == 0) {
+ return(DB_SUCCESS);
+ }
+
+ off += len;
+
+ /* Check does file system support punching holes for this
+ tablespace. */
+ if (!should_punch_hole() || !srv_use_trim) {
+ return DB_IO_NO_PUNCH_HOLE;
+ }
+
+ dberr_t err = os_file_punch_hole(fh, off, trim_len);
+
+ if (err == DB_SUCCESS) {
+ srv_stats.page_compressed_trim_op.inc();
} else {
- ut_a(UNIV_PAGE_SIZE >= 512);
- io_prep_pread(p_iocb, fd, ptr, 512, 0);
+ /* If punch hole is not supported,
+ set space so that it is not used. */
+ if (err == DB_IO_NO_PUNCH_HOLE) {
+ space_no_punch_hole();
+ err = DB_SUCCESS;
+ }
}
- int err = io_submit(io_ctx, 1, &p_iocb);
+ return (err);
+}
- if (err >= 1) {
- /* Now collect the submitted IO request. */
- err = io_getevents(io_ctx, 1, 1, &io_event, NULL);
+/** Check if the file system supports sparse files.
+
+Warning: On POSIX systems we try and punch a hole from offset 0 to
+the system configured page size. This should only be called on an empty
+file.
+@param[in] fh File handle for the file - if opened
+@return true if the file system supports sparse files */
+bool
+os_is_sparse_file_supported(os_file_t fh)
+{
+ /* In this debugging mode, we act as if punch hole is supported,
+ then we skip any calls to actually punch a hole. In this way,
+ Transparent Page Compression is still being tested. */
+ DBUG_EXECUTE_IF("ignore_punch_hole",
+ return(true);
+ );
+
+#ifdef _WIN32
+ FILE_ATTRIBUTE_TAG_INFO info;
+ if (GetFileInformationByHandleEx(fh, FileAttributeTagInfo,
+ &info, (DWORD)sizeof(info))) {
+ if (info.FileAttributes != INVALID_FILE_ATTRIBUTES) {
+ return (info.FileAttributes & FILE_ATTRIBUTE_SPARSE_FILE) != 0;
+ }
}
+ return false;
+#else
+ dberr_t err;
- ut_free(buf);
- close(fd);
+ /* We don't know the FS block size, use the sector size. The FS
+ will do the magic. */
+ err = os_file_punch_hole_posix(fh, 0, UNIV_PAGE_SIZE);
- switch (err) {
- case 1:
- return(TRUE);
+ return(err == DB_SUCCESS);
+#endif /* _WIN32 */
+}
- case -EINVAL:
- case -ENOSYS:
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Linux Native AIO not supported. You can either "
- "move %s to a file system that supports native "
- "AIO or you can set innodb_use_native_aio to "
- "FALSE to avoid this message.",
- srv_read_only_mode ? name : "tmpdir");
+/** This function returns information about the specified file
+@param[in] path pathname of the file
+@param[out] stat_info information of a file in a directory
+@param[in] check_rw_perm for testing whether the file can be opened
+ in RW mode
+@param[in] read_only true if file is opened in read-only mode
+@return DB_SUCCESS if all OK */
+dberr_t
+os_file_get_status(
+ const char* path,
+ os_file_stat_t* stat_info,
+ bool check_rw_perm,
+ bool read_only)
+{
+ dberr_t ret;
- /* fall through. */
- default:
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Linux Native AIO check on %s returned error[%d]",
- srv_read_only_mode ? name : "tmpdir", -err);
+#ifdef _WIN32
+ struct _stat64 info;
+
+ ret = os_file_get_status_win32(
+ path, stat_info, &info, check_rw_perm, read_only);
+
+#else
+ struct stat info;
+
+ ret = os_file_get_status_posix(
+ path, stat_info, &info, check_rw_perm, read_only);
+
+#endif /* _WIN32 */
+
+ if (ret == DB_SUCCESS) {
+ stat_info->ctime = info.st_ctime;
+ stat_info->atime = info.st_atime;
+ stat_info->mtime = info.st_mtime;
+ stat_info->size = info.st_size;
}
- return(FALSE);
+ return(ret);
}
-#endif /* LINUX_NATIVE_AIO */
-/******************************************************************//**
-Creates an aio wait array. Note that we return NULL in case of failure.
-We don't care about freeing memory here because we assume that a
-failure will result in server refusing to start up.
-@return own: aio array, NULL on failure */
-static
-os_aio_array_t*
-os_aio_array_create(
-/*================*/
- ulint n, /*!< in: maximum number of pending aio
- operations allowed; n must be
- divisible by n_segments */
- ulint n_segments) /*!< in: number of segments in the aio array */
-{
- os_aio_array_t* array;
+/**
+Waits for an AIO operation to complete. This function is used to wait the
+for completed requests. The aio array of pending requests is divided
+into segments. The thread specifies which segment or slot it wants to wait
+for. NOTE: this function will also take care of freeing the aio slot,
+therefore no other thread is allowed to do the freeing!
+@param[in] segment The number of the segment in the aio arrays to
+ wait for; segment 0 is the ibuf I/O thread,
+ segment 1 the log I/O thread, then follow the
+ non-ibuf read threads, and as the last are the
+ non-ibuf write threads; if this is
+ ULINT_UNDEFINED, then it means that sync AIO
+ is used, and this parameter is ignored
+@param[out] m1 the messages passed with the AIO request; note
+ that also in the case where the AIO operation
+ failed, these output parameters are valid and
+ can be used to restart the operation,
+ for example
+@param[out] m2 callback message
+@param[out] type OS_FILE_WRITE or ..._READ
+@return DB_SUCCESS or error code */
+dberr_t
+os_aio_handler(
+ ulint segment,
+ fil_node_t** m1,
+ void** m2,
+ IORequest* request)
+{
+ dberr_t err;
+
+ if (srv_use_native_aio) {
+ srv_set_io_thread_op_info(segment, "native aio handle");
+
#ifdef WIN_ASYNC_IO
- OVERLAPPED* over;
+
+ err = os_aio_windows_handler(segment, 0, m1, m2, request);
+
#elif defined(LINUX_NATIVE_AIO)
- struct io_event* io_event = NULL;
+
+ err = os_aio_linux_handler(segment, m1, m2, request);
+
+#else
+ ut_error;
+
+ err = DB_ERROR; /* Eliminate compiler warning */
+
#endif /* WIN_ASYNC_IO */
+
+ } else {
+ srv_set_io_thread_op_info(segment, "simulated aio handle");
+
+ err = os_aio_simulated_handler(segment, m1, m2, request);
+ }
+
+ return(err);
+}
+
+#ifdef WIN_ASYNC_IO
+static HANDLE new_completion_port()
+{
+ HANDLE h = CreateIoCompletionPort(INVALID_HANDLE_VALUE, 0, 0, 0);
+ ut_a(h);
+ return h;
+}
+#endif
+
+/** Constructor
+@param[in] id The latch ID
+@param[in] n Number of AIO slots
+@param[in] segments Number of segments */
+AIO::AIO(
+ latch_id_t id,
+ ulint n,
+ ulint segments)
+ :
+ m_slots(n),
+ m_n_segments(segments),
+ m_n_reserved()
+# ifdef LINUX_NATIVE_AIO
+ ,m_events(m_slots.size())
+# endif /* LINUX_NATIVE_AIO */
+#ifdef WIN_ASYNC_IO
+ ,m_completion_port(new_completion_port())
+#endif
+{
ut_a(n > 0);
- ut_a(n_segments > 0);
+ ut_a(m_n_segments > 0);
- array = static_cast<os_aio_array_t*>(ut_malloc(sizeof(*array)));
- memset(array, 0x0, sizeof(*array));
+ mutex_create(id, &m_mutex);
- array->mutex = os_mutex_create();
- array->not_full = os_event_create();
- array->is_empty = os_event_create();
+ m_not_full = os_event_create("aio_not_full");
+ m_is_empty = os_event_create("aio_is_empty");
- os_event_set(array->is_empty);
+ memset((void*)&m_slots[0], 0x0, sizeof(m_slots[0]) * m_slots.size());
+#ifdef LINUX_NATIVE_AIO
+ memset(&m_events[0], 0x0, sizeof(m_events[0]) * m_events.size());
+#endif /* LINUX_NATIVE_AIO */
- array->n_slots = n;
- array->n_segments = n_segments;
+ os_event_set(m_is_empty);
+}
- array->slots = static_cast<os_aio_slot_t*>(
- ut_malloc(n * sizeof(*array->slots)));
+/** Initialise the slots */
+dberr_t
+AIO::init_slots()
+{
+ for (ulint i = 0; i < m_slots.size(); ++i) {
+ Slot& slot = m_slots[i];
- memset(array->slots, 0x0, n * sizeof(*array->slots));
+ slot.pos = static_cast<uint16_t>(i);
-#ifdef __WIN__
- array->handles = static_cast<HANDLE*>(ut_malloc(n * sizeof(HANDLE)));
-#endif /* __WIN__ */
+ slot.is_reserved = false;
-#if defined(LINUX_NATIVE_AIO)
- array->aio_ctx = NULL;
- array->aio_events = NULL;
+#ifdef WIN_ASYNC_IO
+
+ slot.array = this;
+
+#elif defined(LINUX_NATIVE_AIO)
+
+ slot.ret = 0;
- /* If we are not using native aio interface then skip this
- part of initialization. */
- if (!srv_use_native_aio) {
- goto skip_native_aio;
+ slot.n_bytes = 0;
+
+ memset(&slot.control, 0x0, sizeof(slot.control));
+
+#endif /* WIN_ASYNC_IO */
}
- /* Initialize the io_context array. One io_context
+ return(DB_SUCCESS);
+}
+
+#ifdef LINUX_NATIVE_AIO
+/** Initialise the Linux Native AIO interface */
+dberr_t
+AIO::init_linux_native_aio()
+{
+
+ /* Initialize the io_context_t array. One io_context_t
per segment in the array. */
+ m_aio_ctx.resize(get_n_segments());
- array->aio_ctx = static_cast<io_context**>(
- ut_malloc(n_segments * sizeof(*array->aio_ctx)));
+ ulint max_events = slots_per_segment();
- for (ulint i = 0; i < n_segments; ++i) {
- if (!os_aio_linux_create_io_ctx(n/n_segments,
- &array->aio_ctx[i])) {
+ for (std::vector<io_context_t>::iterator it = m_aio_ctx.begin(),
+ end = m_aio_ctx.end();
+ it != end; ++it) {
+
+ if (!linux_create_io_ctx(max_events, *it)) {
/* If something bad happened during aio setup
we disable linux native aio.
- The disadvantage will be a small memory leak
- at shutdown but that's ok compared to a crash
- or a not working server.
- This frequently happens when running the test suite
- with many threads on a system with low fs.aio-max-nr!
- */
-
- fprintf(stderr,
- " InnoDB: Warning: Linux Native AIO disabled "
- "because os_aio_linux_create_io_ctx() "
- "failed. To get rid of this warning you can "
- "try increasing system "
- "fs.aio-max-nr to 1048576 or larger or "
- "setting innodb_use_native_aio = 0 in my.cnf\n");
- srv_use_native_aio = FALSE;
- goto skip_native_aio;
+ This frequently happens when running the test suite
+ with many threads on a system with low fs.aio-max-nr!
+ */
+
+ ib::warn()
+ << "Warning: Linux Native AIO disabled "
+ << "because _linux_create_io_ctx() "
+ << "failed. To get rid of this warning you can "
+ << "try increasing system "
+ << "fs.aio-max-nr to 1048576 or larger or "
+ << "setting innodb_use_native_aio = 0 in my.cnf";
+
+ for (std::vector<io_context_t>::iterator it2
+ = m_aio_ctx.begin();
+ it2 != it; ++it2) {
+ int ret = io_destroy(*it2);
+ ut_a(ret != -EINVAL);
+ }
+
+ m_aio_ctx.clear();
+ srv_use_native_aio = FALSE;
+ return(DB_SUCCESS);
}
}
- /* Initialize the event array. One event per slot. */
- io_event = static_cast<struct io_event*>(
- ut_malloc(n * sizeof(*io_event)));
+ return(DB_SUCCESS);
+}
+#endif /* LINUX_NATIVE_AIO */
+
+/** Initialise the array */
+dberr_t
+AIO::init()
+{
+ ut_a(!m_slots.empty());
+
- memset(io_event, 0x0, sizeof(*io_event) * n);
- array->aio_events = io_event;
+ if (srv_use_native_aio) {
+#ifdef LINUX_NATIVE_AIO
+ dberr_t err = init_linux_native_aio();
+
+ if (err != DB_SUCCESS) {
+ return(err);
+ }
-skip_native_aio:
#endif /* LINUX_NATIVE_AIO */
- for (ulint i = 0; i < n; i++) {
- os_aio_slot_t* slot;
+ }
- slot = os_aio_array_get_nth_slot(array, i);
+ return(init_slots());
+}
- slot->pos = i;
- slot->reserved = FALSE;
-#ifdef WIN_ASYNC_IO
- slot->handle = CreateEvent(NULL,TRUE, FALSE, NULL);
+/** Creates an aio wait array. Note that we return NULL in case of failure.
+We don't care about freeing memory here because we assume that a
+failure will result in server refusing to start up.
+@param[in] id Latch ID
+@param[in] n maximum number of pending AIO operations
+ allowed; n must be divisible by m_n_segments
+@param[in] n_segments number of segments in the AIO array
+@return own: AIO array, NULL on failure */
+AIO*
+AIO::create(
+ latch_id_t id,
+ ulint n,
+ ulint n_segments)
+{
+ if ((n % n_segments)) {
- over = &slot->control;
+ ib::error()
+ << "Maximum number of AIO operations must be "
+ << "divisible by number of segments";
- over->hEvent = slot->handle;
+ return(NULL);
+ }
- array->handles[i] = over->hEvent;
+ AIO* array = UT_NEW_NOKEY(AIO(id, n, n_segments));
-#elif defined(LINUX_NATIVE_AIO)
- memset(&slot->control, 0x0, sizeof(slot->control));
- slot->n_bytes = 0;
- slot->ret = 0;
-#endif /* WIN_ASYNC_IO */
+ if (array != NULL && array->init() != DB_SUCCESS) {
+
+ UT_DELETE(array);
+
+ array = NULL;
}
return(array);
}
-/************************************************************************//**
-Frees an aio wait array. */
-static
-void
-os_aio_array_free(
-/*==============*/
- os_aio_array_t*& array) /*!< in, own: array to free */
+/** AIO destructor */
+AIO::~AIO()
{
-#ifdef WIN_ASYNC_IO
- ulint i;
- for (i = 0; i < array->n_slots; i++) {
- os_aio_slot_t* slot = os_aio_array_get_nth_slot(array, i);
- CloseHandle(slot->handle);
- }
-#endif /* WIN_ASYNC_IO */
+ mutex_destroy(&m_mutex);
-#ifdef __WIN__
- ut_free(array->handles);
-#endif /* __WIN__ */
- os_mutex_free(array->mutex);
- os_event_free(array->not_full);
- os_event_free(array->is_empty);
+ os_event_destroy(m_not_full);
+ os_event_destroy(m_is_empty);
#if defined(LINUX_NATIVE_AIO)
if (srv_use_native_aio) {
- ut_free(array->aio_events);
- ut_free(array->aio_ctx);
+ for (ulint i = 0; i < m_aio_ctx.size(); i++) {
+ int ret = io_destroy(m_aio_ctx[i]);
+ ut_a(ret != -EINVAL);
+ }
}
#endif /* LINUX_NATIVE_AIO */
-
- ut_free(array->slots);
- ut_free(array);
-
- array = 0;
+#if defined(WIN_ASYNC_IO)
+ CloseHandle(m_completion_port);
+#endif
}
-/***********************************************************************
-Initializes the asynchronous io system. Creates one array each for ibuf
+/** Initializes the asynchronous io system. Creates one array each for ibuf
and log i/o. Also creates one array each for read and write where each
-array is divided logically into n_read_segs and n_write_segs
+array is divided logically into n_readers and n_writers
respectively. The caller must create an i/o handler thread for each
segment in these arrays. This function also creates the sync array.
-No i/o handler thread needs to be created for that */
-UNIV_INTERN
-ibool
-os_aio_init(
-/*========*/
- ulint n_per_seg, /*<! in: maximum number of pending aio
- operations allowed per segment */
- ulint n_read_segs, /*<! in: number of reader threads */
- ulint n_write_segs, /*<! in: number of writer threads */
- ulint n_slots_sync) /*<! in: number of slots in the sync aio
- array */
+No i/o handler thread needs to be created for that
+@param[in] n_per_seg maximum number of pending aio
+ operations allowed per segment
+@param[in] n_readers number of reader threads
+@param[in] n_writers number of writer threads
+@param[in] n_slots_sync number of slots in the sync aio array
+@return true if the AIO sub-system was started successfully */
+bool
+AIO::start(
+ ulint n_per_seg,
+ ulint n_readers,
+ ulint n_writers,
+ ulint n_slots_sync)
{
- os_io_init_simple();
-
#if defined(LINUX_NATIVE_AIO)
/* Check if native aio is supported on this system and tmpfs */
- if (srv_use_native_aio && !os_aio_native_aio_supported()) {
+ if (srv_use_native_aio && !is_linux_native_aio_supported()) {
- ib_logf(IB_LOG_LEVEL_WARN, "Linux Native AIO disabled.");
+ ib::warn() << "Linux Native AIO disabled.";
srv_use_native_aio = FALSE;
}
@@ -4139,69 +6025,77 @@ os_aio_init(
srv_reset_io_thread_op_info();
- os_aio_read_array = os_aio_array_create(
- n_read_segs * n_per_seg, n_read_segs);
+ s_reads = create(
+ LATCH_ID_OS_AIO_READ_MUTEX, n_readers * n_per_seg, n_readers);
- if (os_aio_read_array == NULL) {
- return(FALSE);
+ if (s_reads == NULL) {
+ return(false);
}
- ulint start = (srv_read_only_mode) ? 0 : 2;
- ulint n_segs = n_read_segs + start;
+ ulint start = srv_read_only_mode ? 0 : 2;
+ ulint n_segs = n_readers + start;
- /* 0 is the ibuf segment and 1 is the insert buffer segment. */
+ /* 0 is the ibuf segment and 1 is the redo log segment. */
for (ulint i = start; i < n_segs; ++i) {
ut_a(i < SRV_MAX_N_IO_THREADS);
srv_io_thread_function[i] = "read thread";
}
- ulint n_segments = n_read_segs;
+ ulint n_segments = n_readers;
if (!srv_read_only_mode) {
- os_aio_log_array = os_aio_array_create(n_per_seg, 1);
+ s_ibuf = create(LATCH_ID_OS_AIO_IBUF_MUTEX, n_per_seg, 1);
- if (os_aio_log_array == NULL) {
- return(FALSE);
+ if (s_ibuf == NULL) {
+ return(false);
}
++n_segments;
- srv_io_thread_function[1] = "log thread";
+ srv_io_thread_function[0] = "insert buffer thread";
- os_aio_ibuf_array = os_aio_array_create(n_per_seg, 1);
+ s_log = create(LATCH_ID_OS_AIO_LOG_MUTEX, n_per_seg, 1);
- if (os_aio_ibuf_array == NULL) {
- return(FALSE);
+ if (s_log == NULL) {
+ return(false);
}
++n_segments;
- srv_io_thread_function[0] = "insert buffer thread";
+ srv_io_thread_function[1] = "log thread";
- os_aio_write_array = os_aio_array_create(
- n_write_segs * n_per_seg, n_write_segs);
+ } else {
+ s_ibuf = s_log = NULL;
+ }
- if (os_aio_write_array == NULL) {
- return(FALSE);
- }
+ s_writes = create(
+ LATCH_ID_OS_AIO_WRITE_MUTEX, n_writers * n_per_seg, n_writers);
- n_segments += n_write_segs;
+ if (s_writes == NULL) {
+ return(false);
+ }
- for (ulint i = start + n_read_segs; i < n_segments; ++i) {
- ut_a(i < SRV_MAX_N_IO_THREADS);
- srv_io_thread_function[i] = "write thread";
- }
+#ifdef WIN_ASYNC_IO
+ data_completion_port = s_writes->m_completion_port;
+ log_completion_port =
+ s_log ? s_log->m_completion_port : data_completion_port;
+#endif
- ut_ad(n_segments >= 4);
- } else {
- ut_ad(n_segments > 0);
+ n_segments += n_writers;
+
+ for (ulint i = start + n_readers; i < n_segments; ++i) {
+ ut_a(i < SRV_MAX_N_IO_THREADS);
+ srv_io_thread_function[i] = "write thread";
}
- os_aio_sync_array = os_aio_array_create(n_slots_sync, 1);
+ ut_ad(n_segments >= static_cast<ulint>(srv_read_only_mode ? 2 : 4));
+
+ s_sync = create(LATCH_ID_OS_AIO_SYNC_MUTEX, n_slots_sync, 1);
- if (os_aio_sync_array == NULL) {
- return(FALSE);
+ if (s_sync == NULL) {
+
+ return(false);
}
os_aio_n_segments = n_segments;
@@ -4211,96 +6105,94 @@ os_aio_init(
os_last_printout = time(NULL);
if (srv_use_native_aio) {
- return(TRUE);
+ return(true);
}
os_aio_segment_wait_events = static_cast<os_event_t*>(
- ut_malloc(n_segments * sizeof *os_aio_segment_wait_events));
+ ut_zalloc_nokey(
+ n_segments * sizeof *os_aio_segment_wait_events));
+
+ if (os_aio_segment_wait_events == NULL) {
+
+ return(false);
+ }
for (ulint i = 0; i < n_segments; ++i) {
- os_aio_segment_wait_events[i] = os_event_create();
+ os_aio_segment_wait_events[i] = os_event_create(0);
}
- return(TRUE);
+ return(true);
}
-/***********************************************************************
-Frees the asynchronous io system. */
-UNIV_INTERN
+/** Free the AIO arrays */
void
-os_aio_free(void)
-/*=============*/
+AIO::shutdown()
{
- if (os_aio_ibuf_array != 0) {
- os_aio_array_free(os_aio_ibuf_array);
- }
+ UT_DELETE(s_ibuf);
+ s_ibuf = NULL;
- if (os_aio_log_array != 0) {
- os_aio_array_free(os_aio_log_array);
- }
+ UT_DELETE(s_log);
+ s_log = NULL;
- if (os_aio_write_array != 0) {
- os_aio_array_free(os_aio_write_array);
- }
+ UT_DELETE(s_writes);
+ s_writes = NULL;
- if (os_aio_sync_array != 0) {
- os_aio_array_free(os_aio_sync_array);
- }
+ UT_DELETE(s_sync);
+ s_sync = NULL;
- os_aio_array_free(os_aio_read_array);
+ UT_DELETE(s_reads);
+ s_reads = NULL;
+}
- if (!srv_use_native_aio) {
- for (ulint i = 0; i < os_aio_n_segments; i++) {
- os_event_free(os_aio_segment_wait_events[i]);
- }
- }
+/** Initializes the asynchronous io system. Creates one array each for ibuf
+and log i/o. Also creates one array each for read and write where each
+array is divided logically into n_readers and n_writers
+respectively. The caller must create an i/o handler thread for each
+segment in these arrays. This function also creates the sync array.
+No i/o handler thread needs to be created for that
+@param[in] n_readers number of reader threads
+@param[in] n_writers number of writer threads
+@param[in] n_slots_sync number of slots in the sync aio array */
+bool
+os_aio_init(
+ ulint n_readers,
+ ulint n_writers,
+ ulint n_slots_sync)
+{
+ /* Maximum number of pending aio operations allowed per segment */
+ ulint limit = 8 * OS_AIO_N_PENDING_IOS_PER_THREAD;
- ut_free(os_aio_segment_wait_events);
- os_aio_segment_wait_events = 0;
- os_aio_n_segments = 0;
+ return(AIO::start(limit, n_readers, n_writers, n_slots_sync));
}
-#ifdef WIN_ASYNC_IO
-/************************************************************************//**
-Wakes up all async i/o threads in the array in Windows async i/o at
-shutdown. */
-static
+/** Frees the asynchronous io system. */
void
-os_aio_array_wake_win_aio_at_shutdown(
-/*==================================*/
- os_aio_array_t* array) /*!< in: aio array */
+os_aio_free()
{
- ulint i;
+ AIO::shutdown();
- for (i = 0; i < array->n_slots; i++) {
+ ut_ad(!os_aio_segment_wait_events || !srv_use_native_aio);
+ ut_ad(srv_use_native_aio || os_aio_segment_wait_events
+ || !srv_was_started);
- SetEvent((array->slots + i)->handle);
+ if (!srv_use_native_aio && os_aio_segment_wait_events) {
+ for (ulint i = 0; i < os_aio_n_segments; i++) {
+ os_event_destroy(os_aio_segment_wait_events[i]);
+ }
+
+ ut_free(os_aio_segment_wait_events);
+ os_aio_segment_wait_events = 0;
}
+ os_aio_n_segments = 0;
}
-#endif
-/************************************************************************//**
-Wakes up all async i/o threads so that they know to exit themselves in
+/** Wakes up all async i/o threads so that they know to exit themselves in
shutdown. */
-UNIV_INTERN
void
-os_aio_wake_all_threads_at_shutdown(void)
-/*=====================================*/
+os_aio_wake_all_threads_at_shutdown()
{
#ifdef WIN_ASYNC_IO
- /* This code wakes up all ai/o threads in Windows native aio */
- os_aio_array_wake_win_aio_at_shutdown(os_aio_read_array);
- if (os_aio_write_array != 0) {
- os_aio_array_wake_win_aio_at_shutdown(os_aio_write_array);
- }
-
- if (os_aio_ibuf_array != 0) {
- os_aio_array_wake_win_aio_at_shutdown(os_aio_ibuf_array);
- }
-
- if (os_aio_log_array != 0) {
- os_aio_array_wake_win_aio_at_shutdown(os_aio_log_array);
- }
+ AIO::wake_at_shutdown();
#elif defined(LINUX_NATIVE_AIO)
/* When using native AIO interface the io helper threads
wait on io_getevents with a timeout value of 500ms. At
@@ -4314,353 +6206,266 @@ os_aio_wake_all_threads_at_shutdown(void)
/* This loop wakes up all simulated ai/o threads */
- for (ulint i = 0; i < os_aio_n_segments; i++) {
+ for (ulint i = 0; i < os_aio_n_segments; ++i) {
os_event_set(os_aio_segment_wait_events[i]);
}
}
-/************************************************************************//**
-Waits until there are no pending writes in os_aio_write_array. There can
+/** Waits until there are no pending writes in AIO::s_writes. There can
be other, synchronous, pending writes. */
-UNIV_INTERN
void
-os_aio_wait_until_no_pending_writes(void)
-/*=====================================*/
+os_aio_wait_until_no_pending_writes()
{
- ut_ad(!srv_read_only_mode);
- os_event_wait(os_aio_write_array->is_empty);
+ AIO::wait_until_no_pending_writes();
}
-/**********************************************************************//**
-Calculates segment number for a slot.
+/** Calculates segment number for a slot.
+@param[in] array AIO wait array
+@param[in] slot slot in this array
@return segment number (which is the number used by, for example,
-i/o-handler threads) */
-static
+ I/O-handler threads) */
ulint
-os_aio_get_segment_no_from_slot(
-/*============================*/
- os_aio_array_t* array, /*!< in: aio wait array */
- os_aio_slot_t* slot) /*!< in: slot in this array */
+AIO::get_segment_no_from_slot(
+ const AIO* array,
+ const Slot* slot)
{
ulint segment;
ulint seg_len;
- if (array == os_aio_ibuf_array) {
+ if (array == s_ibuf) {
ut_ad(!srv_read_only_mode);
segment = IO_IBUF_SEGMENT;
- } else if (array == os_aio_log_array) {
+ } else if (array == s_log) {
ut_ad(!srv_read_only_mode);
segment = IO_LOG_SEGMENT;
- } else if (array == os_aio_read_array) {
- seg_len = os_aio_read_array->n_slots
- / os_aio_read_array->n_segments;
+ } else if (array == s_reads) {
+ seg_len = s_reads->slots_per_segment();
segment = (srv_read_only_mode ? 0 : 2) + slot->pos / seg_len;
} else {
- ut_ad(!srv_read_only_mode);
- ut_a(array == os_aio_write_array);
-
- seg_len = os_aio_write_array->n_slots
- / os_aio_write_array->n_segments;
+ ut_a(array == s_writes);
- segment = os_aio_read_array->n_segments + 2
- + slot->pos / seg_len;
- }
-
- return(segment);
-}
-
-/**********************************************************************//**
-Calculates local segment number and aio array from global segment number.
-@return local segment number within the aio array */
-static
-ulint
-os_aio_get_array_and_local_segment(
-/*===============================*/
- os_aio_array_t** array, /*!< out: aio wait array */
- ulint global_segment)/*!< in: global segment number */
-{
- ulint segment;
-
- ut_a(global_segment < os_aio_n_segments);
-
- if (srv_read_only_mode) {
- *array = os_aio_read_array;
-
- return(global_segment);
- } else if (global_segment == IO_IBUF_SEGMENT) {
- *array = os_aio_ibuf_array;
- segment = 0;
-
- } else if (global_segment == IO_LOG_SEGMENT) {
- *array = os_aio_log_array;
- segment = 0;
-
- } else if (global_segment < os_aio_read_array->n_segments + 2) {
- *array = os_aio_read_array;
-
- segment = global_segment - 2;
- } else {
- *array = os_aio_write_array;
+ seg_len = s_writes->slots_per_segment();
- segment = global_segment - (os_aio_read_array->n_segments + 2);
+ segment = s_reads->m_n_segments
+ + (srv_read_only_mode ? 0 : 2) + slot->pos / seg_len;
}
return(segment);
}
-/*******************************************************************//**
-Requests for a slot in the aio array. If no slot is available, waits until
+/** Requests for a slot in the aio array. If no slot is available, waits until
not_full-event becomes signaled.
-@return pointer to slot */
-static
-os_aio_slot_t*
-os_aio_array_reserve_slot(
-/*======================*/
- ulint type, /*!< in: OS_FILE_READ or OS_FILE_WRITE */
- ulint is_log, /*!< in: 1 is OS_FILE_LOG or 0 */
- os_aio_array_t* array, /*!< in: aio array */
- fil_node_t* message1,/*!< in: message to be passed along with
- the aio operation */
- void* message2,/*!< in: message to be passed along with
- the aio operation */
- pfs_os_file_t file, /*!< in: file handle */
- const char* name, /*!< in: name of the file or path as a
- null-terminated string */
- void* buf, /*!< in: buffer where to read or from which
- to write */
- os_offset_t offset, /*!< in: file offset */
- ulint len, /*!< in: length of the block to read or write */
- ulint page_size, /*!< in: page size in bytes */
- ulint* write_size)/*!< in/out: Actual write size initialized
- after fist successfull trim
- operation for this page and if
- initialized we do not trim again if
- actual page size does not decrease. */
-{
- os_aio_slot_t* slot = NULL;
-#ifdef WIN_ASYNC_IO
- OVERLAPPED* control;
-
-#elif defined(LINUX_NATIVE_AIO)
- struct iocb* iocb;
- off_t aio_offset;
-
-#endif /* WIN_ASYNC_IO */
- ulint i;
- ulint counter;
- ulint slots_per_seg;
- ulint local_seg;
+@param[in] type IO context
+@param[in,out] m1 message to be passed along with the AIO
+ operation
+@param[in,out] m2 message to be passed along with the AIO
+ operation
+@param[in] file file handle
+@param[in] name name of the file or path as a NUL-terminated
+ string
+@param[in,out] buf buffer where to read or from which to write
+@param[in] offset file offset, where to read from or start writing
+@param[in] len length of the block to read or write
+@return pointer to slot */
+Slot*
+AIO::reserve_slot(
+ const IORequest& type,
+ fil_node_t* m1,
+ void* m2,
+ pfs_os_file_t file,
+ const char* name,
+ void* buf,
+ os_offset_t offset,
+ ulint len)
+{
+ ut_ad(reinterpret_cast<size_t>(buf) % OS_FILE_LOG_BLOCK_SIZE == 0);
+ ut_ad(offset % OS_FILE_LOG_BLOCK_SIZE == 0);
+ ut_ad(len % OS_FILE_LOG_BLOCK_SIZE == 0);
#ifdef WIN_ASYNC_IO
ut_a((len & 0xFFFFFFFFUL) == len);
#endif /* WIN_ASYNC_IO */
/* No need of a mutex. Only reading constant fields */
- slots_per_seg = array->n_slots / array->n_segments;
+ ulint slots_per_seg;
+
+ ut_ad(type.validate());
+
+ slots_per_seg = slots_per_segment();
/* We attempt to keep adjacent blocks in the same local
segment. This can help in merging IO requests when we are
doing simulated AIO */
- local_seg = (offset >> (UNIV_PAGE_SIZE_SHIFT + 6))
- % array->n_segments;
+ ulint local_seg;
-loop:
- os_mutex_enter(array->mutex);
+ local_seg = (offset >> (UNIV_PAGE_SIZE_SHIFT + 6)) % m_n_segments;
- if (array->n_reserved == array->n_slots) {
- os_mutex_exit(array->mutex);
+ for (;;) {
+
+ acquire();
+
+ if (m_n_reserved != m_slots.size()) {
+ break;
+ }
+
+ release();
if (!srv_use_native_aio) {
- /* If the handler threads are suspended, wake them
- so that we get more slots */
+ /* If the handler threads are suspended,
+ wake them so that we get more slots */
os_aio_simulated_wake_handler_threads();
}
- os_event_wait(array->not_full);
-
- goto loop;
+ os_event_wait(m_not_full);
}
+ ulint counter = 0;
+ Slot* slot = NULL;
+
/* We start our search for an available slot from our preferred
local segment and do a full scan of the array. We are
guaranteed to find a slot in full scan. */
- for (i = local_seg * slots_per_seg, counter = 0;
- counter < array->n_slots;
- i++, counter++) {
+ for (ulint i = local_seg * slots_per_seg;
+ counter < m_slots.size();
+ ++i, ++counter) {
- i %= array->n_slots;
+ i %= m_slots.size();
- slot = os_aio_array_get_nth_slot(array, i);
+ slot = at(i);
- if (slot->reserved == FALSE) {
- goto found;
+ if (slot->is_reserved == false) {
+ break;
}
}
/* We MUST always be able to get hold of a reserved slot. */
- ut_error;
+ ut_a(counter < m_slots.size());
-found:
- ut_a(slot->reserved == FALSE);
- array->n_reserved++;
+ ut_a(slot->is_reserved == false);
- if (array->n_reserved == 1) {
- os_event_reset(array->is_empty);
+ ++m_n_reserved;
+
+ if (m_n_reserved == 1) {
+ os_event_reset(m_is_empty);
}
- if (array->n_reserved == array->n_slots) {
- os_event_reset(array->not_full);
+ if (m_n_reserved == m_slots.size()) {
+ os_event_reset(m_not_full);
}
- slot->reserved = TRUE;
+ slot->is_reserved = true;
slot->reservation_time = time(NULL);
- slot->message1 = message1;
- slot->message2 = message2;
+ slot->m1 = m1;
+ slot->m2 = m2;
slot->file = file;
slot->name = name;
+#ifdef _WIN32
+ slot->len = static_cast<DWORD>(len);
+#else
slot->len = len;
+#endif /* _WIN32 */
slot->type = type;
+ slot->buf = static_cast<byte*>(buf);
+ slot->ptr = slot->buf;
slot->offset = offset;
- slot->io_already_done = FALSE;
- slot->write_size = write_size;
- slot->is_log = is_log;
- slot->page_size = page_size;
-
- if (message1) {
- slot->file_block_size = fil_node_get_block_size(message1);
- }
-
+ slot->err = DB_SUCCESS;
+ slot->original_len = static_cast<uint32>(len);
+ slot->io_already_done = false;
slot->buf = static_cast<byte*>(buf);
#ifdef WIN_ASYNC_IO
- control = &slot->control;
- control->Offset = (DWORD) offset & 0xFFFFFFFF;
- control->OffsetHigh = (DWORD) (offset >> 32);
- ResetEvent(slot->handle);
+ {
+ OVERLAPPED* control;
+ control = &slot->control;
+ control->Offset = (DWORD) offset & 0xFFFFFFFF;
+ control->OffsetHigh = (DWORD) (offset >> 32);
+ }
#elif defined(LINUX_NATIVE_AIO)
/* If we are not using native AIO skip this part. */
- if (!srv_use_native_aio) {
- goto skip_native_aio;
- }
-
- /* Check if we are dealing with 64 bit arch.
- If not then make sure that offset fits in 32 bits. */
- aio_offset = (off_t) offset;
-
- ut_a(sizeof(aio_offset) >= sizeof(offset)
- || ((os_offset_t) aio_offset) == offset);
-
- iocb = &slot->control;
-
- if (type == OS_FILE_READ) {
- io_prep_pread(iocb, file, buf, len, aio_offset);
- } else {
- ut_a(type == OS_FILE_WRITE);
- io_prep_pwrite(iocb, file, buf, len, aio_offset);
- }
-
- iocb->data = (void*) slot;
- slot->n_bytes = 0;
- slot->ret = 0;
-
-skip_native_aio:
-#endif /* LINUX_NATIVE_AIO */
- os_mutex_exit(array->mutex);
-
- return(slot);
-}
-
-/*******************************************************************//**
-Frees a slot in the aio array. */
-static
-void
-os_aio_array_free_slot(
-/*===================*/
- os_aio_array_t* array, /*!< in: aio array */
- os_aio_slot_t* slot) /*!< in: pointer to slot */
-{
- os_mutex_enter(array->mutex);
+ if (srv_use_native_aio) {
- ut_ad(slot->reserved);
+ off_t aio_offset;
- slot->reserved = FALSE;
+ /* Check if we are dealing with 64 bit arch.
+ If not then make sure that offset fits in 32 bits. */
+ aio_offset = (off_t) offset;
- array->n_reserved--;
+ ut_a(sizeof(aio_offset) >= sizeof(offset)
+ || ((os_offset_t) aio_offset) == offset);
- if (array->n_reserved == array->n_slots - 1) {
- os_event_set(array->not_full);
- }
+ struct iocb* iocb = &slot->control;
- if (array->n_reserved == 0) {
- os_event_set(array->is_empty);
- }
+ if (type.is_read()) {
-#ifdef WIN_ASYNC_IO
+ io_prep_pread(
+ iocb, file, slot->ptr, slot->len, aio_offset);
+ } else {
+ ut_ad(type.is_write());
- ResetEvent(slot->handle);
+ io_prep_pwrite(
+ iocb, file, slot->ptr, slot->len, aio_offset);
+ }
-#elif defined(LINUX_NATIVE_AIO)
+ iocb->data = slot;
- if (srv_use_native_aio) {
- memset(&slot->control, 0x0, sizeof(slot->control));
slot->n_bytes = 0;
slot->ret = 0;
- /*fprintf(stderr, "Freed up Linux native slot.\n");*/
- } else {
- /* These fields should not be used if we are not
- using native AIO. */
- ut_ad(slot->n_bytes == 0);
- ut_ad(slot->ret == 0);
}
+#endif /* LINUX_NATIVE_AIO */
-#endif
- os_mutex_exit(array->mutex);
+ release();
+
+ return(slot);
}
-/**********************************************************************//**
-Wakes up a simulated aio i/o-handler thread if it has something to do. */
-static
+/** Wakes up a simulated aio i/o-handler thread if it has something to do.
+@param[in] global_segment The number of the segment in the AIO arrays */
void
-os_aio_simulated_wake_handler_thread(
-/*=================================*/
- ulint global_segment) /*!< in: the number of the segment in the aio
- arrays */
+AIO::wake_simulated_handler_thread(ulint global_segment)
{
- os_aio_array_t* array;
- ulint segment;
-
ut_ad(!srv_use_native_aio);
- segment = os_aio_get_array_and_local_segment(&array, global_segment);
+ AIO* array;
+ ulint segment = get_array_and_local_segment(&array, global_segment);
- ulint n = array->n_slots / array->n_segments;
+ array->wake_simulated_handler_thread(global_segment, segment);
+}
- segment *= n;
+/** Wakes up a simulated AIO I/O-handler thread if it has something to do
+for a local segment in the AIO array.
+@param[in] global_segment The number of the segment in the AIO arrays
+@param[in] segment The local segment in the AIO array */
+void
+AIO::wake_simulated_handler_thread(ulint global_segment, ulint segment)
+{
+ ut_ad(!srv_use_native_aio);
+
+ ulint n = slots_per_segment();
+ ulint offset = segment * n;
/* Look through n slots after the segment * n'th slot */
- os_mutex_enter(array->mutex);
+ acquire();
- for (ulint i = 0; i < n; ++i) {
- const os_aio_slot_t* slot;
+ const Slot* slot = at(offset);
- slot = os_aio_array_get_nth_slot(array, segment + i);
+ for (ulint i = 0; i < n; ++i, ++slot) {
- if (slot->reserved) {
+ if (slot->is_reserved) {
/* Found an i/o request */
- os_mutex_exit(array->mutex);
+ release();
os_event_t event;
@@ -4672,15 +6477,12 @@ os_aio_simulated_wake_handler_thread(
}
}
- os_mutex_exit(array->mutex);
+ release();
}
-/**********************************************************************//**
-Wakes up simulated aio i/o-handler threads if they have something to do. */
-UNIV_INTERN
+/** Wakes up simulated aio i/o-handler threads if they have something to do. */
void
-os_aio_simulated_wake_handler_threads(void)
-/*=======================================*/
+os_aio_simulated_wake_handler_threads()
{
if (srv_use_native_aio) {
/* We do not use simulated aio: do nothing */
@@ -4688,1300 +6490,1078 @@ os_aio_simulated_wake_handler_threads(void)
return;
}
- os_aio_recommend_sleep_for_read_threads = FALSE;
+ os_aio_recommend_sleep_for_read_threads = false;
for (ulint i = 0; i < os_aio_n_segments; i++) {
- os_aio_simulated_wake_handler_thread(i);
+ AIO::wake_simulated_handler_thread(i);
}
}
-#ifdef _WIN32
-/**********************************************************************//**
-This function can be called if one wants to post a batch of reads and
-prefers an i/o-handler thread to handle them all at once later. You must
-call os_aio_simulated_wake_handler_threads later to ensure the threads
-are not left sleeping! */
-UNIV_INTERN
-void
-os_aio_simulated_put_read_threads_to_sleep()
+/** Select the IO slot array
+@param[in,out] type Type of IO, READ or WRITE
+@param[in] read_only true if running in read-only mode
+@param[in] mode IO mode
+@return slot array or NULL if invalid mode specified */
+AIO*
+AIO::select_slot_array(IORequest& type, bool read_only, ulint mode)
{
+ AIO* array;
-/* The idea of putting background IO threads to sleep is only for
-Windows when using simulated AIO. Windows XP seems to schedule
-background threads too eagerly to allow for coalescing during
-readahead requests. */
+ ut_ad(type.validate());
- os_aio_array_t* array;
+ switch (mode) {
+ case OS_AIO_NORMAL:
- if (srv_use_native_aio) {
- /* We do not use simulated aio: do nothing */
+ array = type.is_read() ? AIO::s_reads : AIO::s_writes;
+ break;
- return;
- }
+ case OS_AIO_IBUF:
+ ut_ad(type.is_read());
- os_aio_recommend_sleep_for_read_threads = TRUE;
+ /* Reduce probability of deadlock bugs in connection with ibuf:
+ do not let the ibuf i/o handler sleep */
- for (ulint i = 0; i < os_aio_n_segments; i++) {
- os_aio_get_array_and_local_segment(&array, i);
+ type.clear_do_not_wake();
- if (array == os_aio_read_array) {
+ array = read_only ? AIO::s_reads : AIO::s_ibuf;
+ break;
- os_event_reset(os_aio_segment_wait_events[i]);
- }
+ case OS_AIO_LOG:
+
+ array = read_only ? AIO::s_reads : AIO::s_log;
+ break;
+
+ case OS_AIO_SYNC:
+
+ array = AIO::s_sync;
+#if defined(LINUX_NATIVE_AIO)
+ /* In Linux native AIO we don't use sync IO array. */
+ ut_a(!srv_use_native_aio);
+#endif /* LINUX_NATIVE_AIO */
+ break;
+
+ default:
+ ut_error;
+ array = NULL; /* Eliminate compiler warning */
}
+
+ return(array);
}
-#endif /* _WIN32 */
-#if defined(LINUX_NATIVE_AIO)
-/*******************************************************************//**
-Dispatch an AIO request to the kernel.
-@return TRUE on success. */
+#ifdef WIN_ASYNC_IO
+/** This function is only used in Windows asynchronous i/o.
+Waits for an aio operation to complete. This function is used to wait the
+for completed requests. The aio array of pending requests is divided
+into segments. The thread specifies which segment or slot it wants to wait
+for. NOTE: this function will also take care of freeing the aio slot,
+therefore no other thread is allowed to do the freeing!
+@param[in] segment The number of the segment in the aio arrays to
+ wait for; segment 0 is the ibuf I/O thread,
+ segment 1 the log I/O thread, then follow the
+ non-ibuf read threads, and as the last are the
+ non-ibuf write threads; if this is
+ ULINT_UNDEFINED, then it means that sync AIO
+ is used, and this parameter is ignored
+@param[in] pos this parameter is used only in sync AIO:
+ wait for the aio slot at this position
+@param[out] m1 the messages passed with the AIO request; note
+ that also in the case where the AIO operation
+ failed, these output parameters are valid and
+ can be used to restart the operation,
+ for example
+@param[out] m2 callback message
+@param[out] type OS_FILE_WRITE or ..._READ
+@return DB_SUCCESS or error code */
+
+
+
static
-ibool
-os_aio_linux_dispatch(
-/*==================*/
- os_aio_array_t* array, /*!< in: io request array. */
- os_aio_slot_t* slot) /*!< in: an already reserved slot. */
+dberr_t
+os_aio_windows_handler(
+ ulint segment,
+ ulint pos,
+ fil_node_t** m1,
+ void** m2,
+ IORequest* type)
{
- int ret;
- ulint io_ctx_index;
- struct iocb* iocb;
+ Slot* slot= 0;
+ dberr_t err;
- ut_ad(slot != NULL);
- ut_ad(array);
+ BOOL ret;
+ ULONG_PTR key;
- ut_a(slot->reserved);
+ ut_a(segment != ULINT_UNDEFINED);
- /* Find out what we are going to work with.
- The iocb struct is directly in the slot.
- The io_context is one per segment. */
+ /* NOTE! We only access constant fields in os_aio_array. Therefore
+ we do not have to acquire the protecting mutex yet */
- iocb = &slot->control;
- io_ctx_index = (slot->pos * array->n_segments) / array->n_slots;
+ ut_ad(os_aio_validate_skip());
+ AIO *my_array;
+ AIO::get_array_and_local_segment(&my_array, segment);
- ret = io_submit(array->aio_ctx[io_ctx_index], 1, &iocb);
+ HANDLE port = my_array->m_completion_port;
+ ut_ad(port);
+ for (;;) {
+ DWORD len;
+ ret = GetQueuedCompletionStatus(port, &len, &key,
+ (OVERLAPPED **)&slot, INFINITE);
+
+ /* If shutdown key was received, repost the shutdown message and exit */
+ if (ret && key == IOCP_SHUTDOWN_KEY) {
+ PostQueuedCompletionStatus(port, 0, key, NULL);
+ *m1 = NULL;
+ *m2 = NULL;
+ return (DB_SUCCESS);
+ }
-#if defined(UNIV_AIO_DEBUG)
- fprintf(stderr,
- "io_submit[%c] ret[%d]: slot[%p] ctx[%p] seg[%lu]\n",
- (slot->type == OS_FILE_WRITE) ? 'w' : 'r', ret, slot,
- array->aio_ctx[io_ctx_index], (ulong) io_ctx_index);
-#endif
+ ut_a(slot);
- /* io_submit returns number of successfully
- queued requests or -errno. */
- if (UNIV_UNLIKELY(ret != 1)) {
- errno = -ret;
- return(FALSE);
+ if (!ret) {
+ /* IO failed */
+ break;
+ }
+
+ slot->n_bytes= len;
+ ut_a(slot->array);
+ HANDLE slot_port = slot->array->m_completion_port;
+ if (slot_port != port) {
+ /* there are no redirections between data and log */
+ ut_ad(port == data_completion_port);
+ ut_ad(slot_port != log_completion_port);
+
+ /*
+ Redirect completions to the dedicated completion port
+ and threads.
+
+ "Write array" threads receive write,read and ibuf
+ notifications, read and ibuf completions are redirected.
+
+ Forwarding IO completion this way costs a context switch,
+ and this seems tolerable since asynchronous reads are by
+ far less frequent.
+ */
+ ut_a(PostQueuedCompletionStatus(slot_port,
+ len, key, &slot->control));
+ }
+ else {
+ break;
+ }
}
- return(TRUE);
-}
-#endif /* LINUX_NATIVE_AIO */
+ ut_a(slot->is_reserved);
+
+ *m1 = slot->m1;
+ *m2 = slot->m2;
+
+ *type = slot->type;
+
+ bool retry = false;
+
+ if (ret && slot->n_bytes == slot->len) {
+
+ err = DB_SUCCESS;
+
+ } else if (os_file_handle_error(slot->name, "Windows aio")) {
+
+ retry = true;
+
+ } else {
+
+ err = DB_IO_ERROR;
+ }
+
+
+ if (retry) {
+ /* Retry failed read/write operation synchronously. */
+
+#ifdef UNIV_PFS_IO
+ /* This read/write does not go through os_file_read
+ and os_file_write APIs, need to register with
+ performance schema explicitly here. */
+ PSI_file_locker_state state;
+ struct PSI_file_locker* locker = NULL;
+
+ register_pfs_file_io_begin(
+ &state, locker, slot->file, slot->len,
+ slot->type.is_write()
+ ? PSI_FILE_WRITE : PSI_FILE_READ, __FILE__, __LINE__);
+#endif /* UNIV_PFS_IO */
+
+ ut_a((slot->len & 0xFFFFFFFFUL) == slot->len);
+
+ ssize_t n_bytes = SyncFileIO::execute(slot);
+#ifdef UNIV_PFS_IO
+ register_pfs_file_io_end(locker, slot->len);
+#endif /* UNIV_PFS_IO */
+
+ err = (n_bytes == slot->len) ? DB_SUCCESS : DB_IO_ERROR;
+ }
-/*******************************************************************//**
+ if (err == DB_SUCCESS) {
+ err = AIOHandler::post_io_processing(slot);
+ }
+
+ slot->array->release_with_mutex(slot);
+
+ if (srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS
+ && !buf_page_cleaner_is_active
+ && os_aio_all_slots_free()) {
+ /* Last IO, wakeup other io threads */
+ AIO::wake_at_shutdown();
+ }
+ return(err);
+}
+#endif /* WIN_ASYNC_IO */
+
+/**
NOTE! Use the corresponding macro os_aio(), not directly this function!
Requests an asynchronous i/o operation.
-@return TRUE if request was queued successfully, FALSE if fail */
-UNIV_INTERN
-ibool
+@param[in,out] type IO request context
+@param[in] mode IO mode
+@param[in] name Name of the file or path as NUL terminated
+ string
+@param[in] file Open file handle
+@param[out] buf buffer where to read
+@param[in] offset file offset where to read
+@param[in] n number of bytes to read
+@param[in] read_only if true read only mode checks are enforced
+@param[in,out] m1 Message for the AIO handler, (can be used to
+ identify a completed AIO operation); ignored
+ if mode is OS_AIO_SYNC
+@param[in,out] m2 message for the AIO handler (can be used to
+ identify a completed AIO operation); ignored
+ if mode is OS_AIO_SYNC
+
+@return DB_SUCCESS or error code */
+dberr_t
os_aio_func(
-/*========*/
- ulint type, /*!< in: OS_FILE_READ or OS_FILE_WRITE */
- ulint is_log, /*!< in: 1 is OS_FILE_LOG or 0 */
- ulint mode, /*!< in: OS_AIO_NORMAL, ..., possibly ORed
- to OS_AIO_SIMULATED_WAKE_LATER: the
- last flag advises this function not to wake
- i/o-handler threads, but the caller will
- do the waking explicitly later, in this
- way the caller can post several requests in
- a batch; NOTE that the batch must not be
- so big that it exhausts the slots in aio
- arrays! NOTE that a simulated batch
- may introduce hidden chances of deadlocks,
- because i/os are not actually handled until
- all have been posted: use with great
- caution! */
- const char* name, /*!< in: name of the file or path as a
- null-terminated string */
- pfs_os_file_t file, /*!< in: handle to a file */
- void* buf, /*!< in: buffer where to read or from which
- to write */
- os_offset_t offset, /*!< in: file offset where to read or write */
- ulint n, /*!< in: number of bytes to read or write */
- ulint page_size, /*!< in: page size in bytes */
- fil_node_t* message1,/*!< in: message for the aio handler
- (can be used to identify a completed
- aio operation); ignored if mode is
- OS_AIO_SYNC */
- void* message2,/*!< in: message for the aio handler
- (can be used to identify a completed
- aio operation); ignored if mode is
- OS_AIO_SYNC */
- ulint* write_size)/*!< in/out: Actual write size initialized
- after fist successfull trim
- operation for this page and if
- initialized we do not trim again if
- actual page size does not decrease. */
-{
- os_aio_array_t* array;
- os_aio_slot_t* slot;
+ IORequest& type,
+ ulint mode,
+ const char* name,
+ pfs_os_file_t file,
+ void* buf,
+ os_offset_t offset,
+ ulint n,
+ bool read_only,
+ fil_node_t* m1,
+ void* m2)
+{
#ifdef WIN_ASYNC_IO
- void* buffer = NULL;
- ibool retval;
- BOOL ret = TRUE;
- DWORD len = (DWORD) n;
- struct fil_node_t* dummy_mess1;
- void* dummy_mess2;
- ulint dummy_type;
+ BOOL ret = TRUE;
#endif /* WIN_ASYNC_IO */
- ulint wake_later;
- ut_ad(buf);
+
ut_ad(n > 0);
- ut_ad(n % OS_FILE_LOG_BLOCK_SIZE == 0);
- ut_ad(offset % OS_FILE_LOG_BLOCK_SIZE == 0);
+ ut_ad((n % OS_FILE_LOG_BLOCK_SIZE) == 0);
+ ut_ad((offset % OS_FILE_LOG_BLOCK_SIZE) == 0);
ut_ad(os_aio_validate_skip());
+
#ifdef WIN_ASYNC_IO
ut_ad((n & 0xFFFFFFFFUL) == n);
-#endif
-
-
- wake_later = mode & OS_AIO_SIMULATED_WAKE_LATER;
- mode = mode & (~OS_AIO_SIMULATED_WAKE_LATER);
+#endif /* WIN_ASYNC_IO */
DBUG_EXECUTE_IF("ib_os_aio_func_io_failure_28",
mode = OS_AIO_SYNC; os_has_said_disk_full = FALSE;);
- if (mode == OS_AIO_SYNC
-#ifdef WIN_ASYNC_IO
- && !srv_use_native_aio
-#endif /* WIN_ASYNC_IO */
- ) {
- ibool ret;
-
- /* This is actually an ordinary synchronous read or write:
- no need to use an i/o-handler thread. NOTE that if we use
- Windows async i/o, Windows does not allow us to use
- ordinary synchronous os_file_read etc. on the same file,
- therefore we have built a special mechanism for synchronous
- wait in the Windows case.
- Also note that the Performance Schema instrumentation has
- been performed by current os_aio_func()'s wrapper function
- pfs_os_aio_func(). So we would no longer need to call
- Performance Schema instrumented os_file_read() and
- os_file_write(). Instead, we should use os_file_read_func()
- and os_file_write_func() */
-
- if (type == OS_FILE_READ) {
- ret = os_file_read_func(file, buf, offset, n);
- } else {
+ if (mode == OS_AIO_SYNC) {
+ if (type.is_read()) {
+ return(os_file_read_func(type, file, buf, offset, n));
+ }
- ut_ad(!srv_read_only_mode);
- ut_a(type == OS_FILE_WRITE);
+ ut_ad(type.is_write());
- ret = os_file_write_func(name, file, buf, offset, n);
+ return(os_file_write_func(type, name, file, buf, offset, n));
+ }
- DBUG_EXECUTE_IF("ib_os_aio_func_io_failure_28",
- os_has_said_disk_full = FALSE; ret = 0; errno = 28;);
+try_again:
- if (!ret) {
- os_file_handle_error_cond_exit(name, "os_file_write_func", TRUE, FALSE,
- __FILE__, __LINE__);
- }
- }
+ AIO* array;
- return ret;
- }
+ array = AIO::select_slot_array(type, read_only, mode);
-try_again:
- switch (mode) {
- case OS_AIO_NORMAL:
- if (type == OS_FILE_READ) {
- array = os_aio_read_array;
- } else {
- ut_ad(!srv_read_only_mode);
- array = os_aio_write_array;
- }
- break;
- case OS_AIO_IBUF:
- ut_ad(type == OS_FILE_READ);
- /* Reduce probability of deadlock bugs in connection with ibuf:
- do not let the ibuf i/o handler sleep */
+ Slot* slot;
- wake_later = FALSE;
+ slot = array->reserve_slot(type, m1, m2, file, name, buf, offset, n);
- if (srv_read_only_mode) {
- array = os_aio_read_array;
- } else {
- array = os_aio_ibuf_array;
- }
- break;
- case OS_AIO_LOG:
- if (srv_read_only_mode) {
- array = os_aio_read_array;
- } else {
- array = os_aio_log_array;
- }
- break;
- case OS_AIO_SYNC:
- array = os_aio_sync_array;
-#if defined(LINUX_NATIVE_AIO)
- /* In Linux native AIO we don't use sync IO array. */
- ut_a(!srv_use_native_aio);
-#endif /* LINUX_NATIVE_AIO */
- break;
- default:
- ut_error;
- array = NULL; /* Eliminate compiler warning */
- }
+ if (type.is_read()) {
- slot = os_aio_array_reserve_slot(type, is_log, array, message1, message2, file,
- name, buf, offset, n, page_size, write_size);
- if (type == OS_FILE_READ) {
if (srv_use_native_aio) {
- os_n_file_reads++;
+
+ ++os_n_file_reads;
+
os_bytes_read_since_printout += n;
#ifdef WIN_ASYNC_IO
- ret = ReadFile(file, buf, (DWORD) n, &len,
- &(slot->control));
+ ret = ReadFile(
+ file, slot->ptr, slot->len,
+ NULL, &slot->control);
#elif defined(LINUX_NATIVE_AIO)
- if (!os_aio_linux_dispatch(array, slot)) {
+ if (!array->linux_dispatch(slot)) {
goto err_exit;
}
#endif /* WIN_ASYNC_IO */
- } else {
- if (!wake_later) {
- os_aio_simulated_wake_handler_thread(
- os_aio_get_segment_no_from_slot(
- array, slot));
- }
+ } else if (type.is_wake()) {
+ AIO::wake_simulated_handler_thread(
+ AIO::get_segment_no_from_slot(array, slot));
}
- } else if (type == OS_FILE_WRITE) {
- ut_ad(!srv_read_only_mode);
+ } else if (type.is_write()) {
+
if (srv_use_native_aio) {
- os_n_file_writes++;
-#ifdef WIN_ASYNC_IO
+ ++os_n_file_writes;
- n = slot->len;
- buffer = buf;
- ret = WriteFile(file, buffer, (DWORD) n, &len,
- &(slot->control));
+#ifdef WIN_ASYNC_IO
+ ret = WriteFile(
+ file, slot->ptr, slot->len,
+ NULL, &slot->control);
#elif defined(LINUX_NATIVE_AIO)
- if (!os_aio_linux_dispatch(array, slot)) {
+ if (!array->linux_dispatch(slot)) {
goto err_exit;
}
#endif /* WIN_ASYNC_IO */
- } else {
- if (!wake_later) {
- os_aio_simulated_wake_handler_thread(
- os_aio_get_segment_no_from_slot(
- array, slot));
- }
+
+ } else if (type.is_wake()) {
+ AIO::wake_simulated_handler_thread(
+ AIO::get_segment_no_from_slot(array, slot));
}
} else {
ut_error;
}
#ifdef WIN_ASYNC_IO
- if (srv_use_native_aio) {
- if ((ret && len == n)
- || (!ret && GetLastError() == ERROR_IO_PENDING)) {
- /* aio was queued successfully! */
-
- if (mode == OS_AIO_SYNC) {
- /* We want a synchronous i/o operation on a
- file where we also use async i/o: in Windows
- we must use the same wait mechanism as for
- async i/o */
-
- retval = os_aio_windows_handle(
- ULINT_UNDEFINED, slot->pos,
- &dummy_mess1, &dummy_mess2,
- &dummy_type);
-
- return(retval);
- }
+ if (ret || (GetLastError() == ERROR_IO_PENDING)) {
+ /* aio completed or was queued successfully! */
+ return(DB_SUCCESS);
+ }
- return(TRUE);
- }
+ goto err_exit;
- goto err_exit;
- }
#endif /* WIN_ASYNC_IO */
- /* aio was queued successfully! */
- return(TRUE);
+
+ /* AIO request was queued successfully! */
+ return(DB_SUCCESS);
#if defined LINUX_NATIVE_AIO || defined WIN_ASYNC_IO
err_exit:
#endif /* LINUX_NATIVE_AIO || WIN_ASYNC_IO */
- os_aio_array_free_slot(array, slot);
+
+ array->release_with_mutex(slot);
if (os_file_handle_error(
- name,type == OS_FILE_READ ? "aio read" : "aio write", __FILE__, __LINE__)) {
+ name, type.is_read() ? "aio read" : "aio write")) {
goto try_again;
}
- return(FALSE);
+ return(DB_IO_ERROR);
}
-#ifdef WIN_ASYNC_IO
-/**********************************************************************//**
-This function is only used in Windows asynchronous i/o.
-Waits for an aio operation to complete. This function is used to wait the
-for completed requests. The aio array of pending requests is divided
-into segments. The thread specifies which segment or slot it wants to wait
-for. NOTE: this function will also take care of freeing the aio slot,
-therefore no other thread is allowed to do the freeing!
-@return TRUE if the aio operation succeeded */
-UNIV_INTERN
-ibool
-os_aio_windows_handle(
-/*==================*/
- ulint segment, /*!< in: the number of the segment in the aio
- arrays to wait for; segment 0 is the ibuf
- i/o thread, segment 1 the log i/o thread,
- then follow the non-ibuf read threads, and as
- the last are the non-ibuf write threads; if
- this is ULINT_UNDEFINED, then it means that
- sync aio is used, and this parameter is
- ignored */
- ulint pos, /*!< this parameter is used only in sync aio:
- wait for the aio slot at this position */
- fil_node_t**message1, /*!< out: the messages passed with the aio
- request; note that also in the case where
- the aio operation failed, these output
- parameters are valid and can be used to
- restart the operation, for example */
- void** message2,
- ulint* type) /*!< out: OS_FILE_WRITE or ..._READ */
-{
- ulint orig_seg = segment;
- os_aio_array_t* array;
- os_aio_slot_t* slot;
- ulint n;
- ulint i;
- ibool ret_val;
- BOOL ret;
- DWORD len;
- BOOL retry = FALSE;
+/** Simulated AIO handler for reaping IO requests */
+class SimulatedAIOHandler {
+
+public:
+
+ /** Constructor
+ @param[in,out] array The AIO array
+ @param[in] segment Local segment in the array */
+ SimulatedAIOHandler(AIO* array, ulint segment)
+ :
+ m_oldest(),
+ m_n_elems(),
+ m_lowest_offset(IB_UINT64_MAX),
+ m_array(array),
+ m_n_slots(),
+ m_segment(segment),
+ m_ptr(),
+ m_buf()
+ {
+ ut_ad(m_segment < 100);
- if (segment == ULINT_UNDEFINED) {
- segment = 0;
- array = os_aio_sync_array;
- } else {
- segment = os_aio_get_array_and_local_segment(&array, segment);
+ m_slots.resize(OS_AIO_MERGE_N_CONSECUTIVE);
}
- /* NOTE! We only access constant fields in os_aio_array. Therefore
- we do not have to acquire the protecting mutex yet */
-
- ut_ad(os_aio_validate_skip());
- ut_ad(segment < array->n_segments);
-
- n = array->n_slots / array->n_segments;
-
- if (array == os_aio_sync_array) {
-
- WaitForSingleObject(
- os_aio_array_get_nth_slot(array, pos)->handle,
- INFINITE);
-
- i = pos;
-
- } else {
- if (orig_seg != ULINT_UNDEFINED) {
- srv_set_io_thread_op_info(orig_seg, "wait Windows aio");
+ /** Destructor */
+ ~SimulatedAIOHandler()
+ {
+ if (m_ptr != NULL) {
+ ut_free(m_ptr);
}
-
- i = WaitForMultipleObjects(
- (DWORD) n, array->handles + segment * n,
- FALSE, INFINITE);
}
- os_mutex_enter(array->mutex);
+ /** Reset the state of the handler
+ @param[in] n_slots Number of pending AIO operations supported */
+ void init(ulint n_slots)
+ {
+ m_oldest = 0;
+ m_n_elems = 0;
+ m_n_slots = n_slots;
+ m_lowest_offset = IB_UINT64_MAX;
+
+ if (m_ptr != NULL) {
+ ut_free(m_ptr);
+ m_ptr = m_buf = NULL;
+ }
- if (srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS
- && array->n_reserved == 0) {
- *message1 = NULL;
- *message2 = NULL;
- os_mutex_exit(array->mutex);
- return(TRUE);
+ m_slots[0] = NULL;
}
- ut_a(i >= WAIT_OBJECT_0 && i <= WAIT_OBJECT_0 + n);
+ /** Check if there is a slot for which the i/o has already been done
+ @param[out] n_reserved Number of reserved slots
+ @return the first completed slot that is found. */
+ Slot* check_completed(ulint* n_reserved)
+ {
+ ulint offset = m_segment * m_n_slots;
+
+ *n_reserved = 0;
- slot = os_aio_array_get_nth_slot(array, i + segment * n);
+ Slot* slot;
- ut_a(slot->reserved);
+ slot = m_array->at(offset);
- if (orig_seg != ULINT_UNDEFINED) {
- srv_set_io_thread_op_info(
- orig_seg, "get windows aio return value");
- }
- ret = GetOverlappedResult(slot->file, &slot->control, &len, TRUE);
+ for (ulint i = 0; i < m_n_slots; ++i, ++slot) {
- *message1 = slot->message1;
- *message2 = slot->message2;
+ if (slot->is_reserved) {
- *type = slot->type;
+ if (slot->io_already_done) {
- if (ret && len == slot->len) {
+ ut_a(slot->is_reserved);
- ret_val = TRUE;
- } else if (!ret || (len != slot->len)) {
+ return(slot);
+ }
- if (!ret) {
- if (os_file_handle_error(slot->name, "Windows aio", __FILE__, __LINE__)) {
- retry = TRUE;
- } else {
- ret_val = FALSE;
+ ++*n_reserved;
}
- } else {
- retry = TRUE;
}
- } else {
- ret_val = FALSE;
+ return(NULL);
}
- os_mutex_exit(array->mutex);
-
- if (retry) {
- /* retry failed read/write operation synchronously.
- No need to hold array->mutex. */
-
-#ifdef UNIV_PFS_IO
- /* This read/write does not go through os_file_read
- and os_file_write APIs, need to register with
- performance schema explicitly here. */
- struct PSI_file_locker* locker = NULL;
- PSI_file_locker_state state;
- register_pfs_file_io_begin(&state, locker, slot->file, slot->len,
- (slot->type == OS_FILE_WRITE)
- ? PSI_FILE_WRITE
- : PSI_FILE_READ,
- __FILE__, __LINE__);
-#endif
-
- ut_a((slot->len & 0xFFFFFFFFUL) == slot->len);
+ /** If there are at least 2 seconds old requests, then pick the
+ oldest one to prevent starvation. If several requests have the
+ same age, then pick the one at the lowest offset.
+ @return true if request was selected */
+ bool select()
+ {
+ if (!select_oldest()) {
- switch (slot->type) {
- case OS_FILE_WRITE:
- ret = WriteFile(slot->file, slot->buf,
- (DWORD) slot->len, &len,
- &(slot->control));
- break;
- case OS_FILE_READ:
- ret = ReadFile(slot->file, slot->buf,
- (DWORD) slot->len, &len,
- &(slot->control));
- break;
- default:
- ut_error;
+ return(select_lowest_offset());
}
-#ifdef UNIV_PFS_IO
- register_pfs_file_io_end(locker, len);
-#endif
+ return(true);
+ }
- if (!ret && GetLastError() == ERROR_IO_PENDING) {
- /* aio was queued successfully!
- We want a synchronous i/o operation on a
- file where we also use async i/o: in Windows
- we must use the same wait mechanism as for
- async i/o */
- ret = GetOverlappedResult(slot->file,
- &(slot->control),
- &len, TRUE);
- }
+ /** Check if there are several consecutive blocks
+ to read or write. Merge them if found. */
+ void merge()
+ {
+ /* if m_n_elems != 0, then we have assigned
+ something valid to consecutive_ios[0] */
+ ut_ad(m_n_elems != 0);
+ ut_ad(first_slot() != NULL);
- ret_val = ret && len == slot->len;
- }
+ Slot* slot = first_slot();
- if (slot->type == OS_FILE_WRITE &&
- !slot->is_log &&
- srv_use_trim &&
- !os_fallocate_failed) {
- // Deallocate unused blocks from file system
- os_file_trim(slot);
+ while (!merge_adjacent(slot)) {
+ /* No op */
+ }
}
- os_aio_array_free_slot(array, slot);
-
- return(ret_val);
-}
-#endif
+ /** We have now collected n_consecutive I/O requests
+ in the array; allocate a single buffer which can hold
+ all data, and perform the I/O
+ @return the length of the buffer */
+ ulint allocate_buffer()
+ MY_ATTRIBUTE((warn_unused_result))
+ {
+ ulint len;
+ Slot* slot = first_slot();
-#if defined(LINUX_NATIVE_AIO)
-/******************************************************************//**
-This function is only used in Linux native asynchronous i/o. This is
-called from within the io-thread. If there are no completed IO requests
-in the slot array, the thread calls this function to collect more
-requests from the kernel.
-The io-thread waits on io_getevents(), which is a blocking call, with
-a timeout value. Unless the system is very heavy loaded, keeping the
-io-thread very busy, the io-thread will spend most of its time waiting
-in this function.
-The io-thread also exits in this function. It checks server status at
-each wakeup and that is why we use timed wait in io_getevents(). */
-static
-void
-os_aio_linux_collect(
-/*=================*/
- os_aio_array_t* array, /*!< in/out: slot array. */
- ulint segment, /*!< in: local segment no. */
- ulint seg_size) /*!< in: segment size. */
-{
- int i;
- int ret;
- ulint start_pos;
- ulint end_pos;
- struct timespec timeout;
- struct io_event* events;
- struct io_context* io_ctx;
+ ut_ad(m_ptr == NULL);
- /* sanity checks. */
- ut_ad(array != NULL);
- ut_ad(seg_size > 0);
- ut_ad(segment < array->n_segments);
+ if (slot->type.is_read() && m_n_elems > 1) {
- /* Which part of event array we are going to work on. */
- events = &array->aio_events[segment * seg_size];
+ len = 0;
- /* Which io_context we are going to use. */
- io_ctx = array->aio_ctx[segment];
+ for (ulint i = 0; i < m_n_elems; ++i) {
+ len += m_slots[i]->len;
+ }
- /* Starting point of the segment we will be working on. */
- start_pos = segment * seg_size;
+ m_ptr = static_cast<byte*>(
+ ut_malloc_nokey(len + UNIV_PAGE_SIZE));
- /* End point. */
- end_pos = start_pos + seg_size;
+ m_buf = static_cast<byte*>(
+ ut_align(m_ptr, UNIV_PAGE_SIZE));
-retry:
+ } else {
+ len = first_slot()->len;
+ m_buf = first_slot()->buf;
+ }
- /* Initialize the events. The timeout value is arbitrary.
- We probably need to experiment with it a little. */
- memset(events, 0, sizeof(*events) * seg_size);
- timeout.tv_sec = 0;
- timeout.tv_nsec = OS_AIO_REAP_TIMEOUT;
+ return(len);
+ }
- ret = io_getevents(io_ctx, 1, seg_size, events, &timeout);
+ /** We have to compress the individual pages and punch
+ holes in them on a page by page basis when writing to
+ tables that can be compresed at the IO level.
+ @param[in] len Value returned by allocate_buffer */
+ void copy_to_buffer(ulint len)
+ {
+ Slot* slot = first_slot();
- if (ret > 0) {
- for (i = 0; i < ret; i++) {
- os_aio_slot_t* slot;
- struct iocb* control;
+ if (len > slot->len && slot->type.is_write()) {
- control = (struct iocb*) events[i].obj;
- ut_a(control != NULL);
+ byte* ptr = m_buf;
- slot = (os_aio_slot_t*) control->data;
+ ut_ad(ptr != slot->buf);
- /* Some sanity checks. */
- ut_a(slot != NULL);
- ut_a(slot->reserved);
-
-#if defined(UNIV_AIO_DEBUG)
- fprintf(stderr,
- "io_getevents[%c]: slot[%p] ctx[%p]"
- " seg[%lu]\n",
- (slot->type == OS_FILE_WRITE) ? 'w' : 'r',
- slot, io_ctx, segment);
-#endif
+ /* Copy the buffers to the combined buffer */
+ for (ulint i = 0; i < m_n_elems; ++i) {
- /* We are not scribbling previous segment. */
- ut_a(slot->pos >= start_pos);
+ slot = m_slots[i];
- /* We have not overstepped to next segment. */
- ut_a(slot->pos < end_pos);
+ memmove(ptr, slot->buf, slot->len);
- if (slot->type == OS_FILE_WRITE &&
- !slot->is_log &&
- srv_use_trim &&
- !os_fallocate_failed) {
- // Deallocate unused blocks from file system
- os_file_trim(slot);
+ ptr += slot->len;
}
-
- /* Mark this request as completed. The error handling
- will be done in the calling function. */
- os_mutex_enter(array->mutex);
- slot->n_bytes = events[i].res;
- slot->ret = events[i].res2;
- slot->io_already_done = TRUE;
- os_mutex_exit(array->mutex);
}
- return;
- }
-
- if (UNIV_UNLIKELY(srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS)) {
- return;
}
- /* This error handling is for any error in collecting the
- IO requests. The errors, if any, for any particular IO
- request are simply passed on to the calling routine. */
-
- switch (ret) {
- case -EAGAIN:
- /* Not enough resources! Try again. */
- case -EINTR:
- /* Interrupted! I have tested the behaviour in case of an
- interrupt. If we have some completed IOs available then
- the return code will be the number of IOs. We get EINTR only
- if there are no completed IOs and we have been interrupted. */
- case 0:
- /* No pending request! Go back and check again. */
- goto retry;
- }
-
- /* All other errors should cause a trap for now. */
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: unexpected ret_code[%d] from io_getevents()!\n",
- ret);
- ut_error;
-}
-
-/**********************************************************************//**
-This function is only used in Linux native asynchronous i/o.
-Waits for an aio operation to complete. This function is used to wait for
-the completed requests. The aio array of pending requests is divided
-into segments. The thread specifies which segment or slot it wants to wait
-for. NOTE: this function will also take care of freeing the aio slot,
-therefore no other thread is allowed to do the freeing!
-@return TRUE if the IO was successful */
-UNIV_INTERN
-ibool
-os_aio_linux_handle(
-/*================*/
- ulint global_seg, /*!< in: segment number in the aio array
- to wait for; segment 0 is the ibuf
- i/o thread, segment 1 is log i/o thread,
- then follow the non-ibuf read threads,
- and the last are the non-ibuf write
- threads. */
- fil_node_t**message1, /*!< out: the messages passed with the */
- void** message2, /*!< aio request; note that in case the
- aio operation failed, these output
- parameters are valid and can be used to
- restart the operation. */
- ulint* type) /*!< out: OS_FILE_WRITE or ..._READ */
-{
- ulint segment;
- os_aio_array_t* array;
- os_aio_slot_t* slot;
- ulint n;
- ulint i;
- ibool ret = FALSE;
+ /** Do the I/O with ordinary, synchronous i/o functions:
+ @param[in] len Length of buffer for IO */
+ void io()
+ {
+ if (first_slot()->type.is_write()) {
- /* Should never be doing Sync IO here. */
- ut_a(global_seg != ULINT_UNDEFINED);
+ for (ulint i = 0; i < m_n_elems; ++i) {
+ write(m_slots[i]);
+ }
- /* Find the array and the local segment. */
- segment = os_aio_get_array_and_local_segment(&array, global_seg);
- n = array->n_slots / array->n_segments;
+ } else {
- /* Loop until we have found a completed request. */
- for (;;) {
- ibool any_reserved = FALSE;
- os_mutex_enter(array->mutex);
- for (i = 0; i < n; ++i) {
- slot = os_aio_array_get_nth_slot(
- array, i + segment * n);
- if (!slot->reserved) {
- continue;
- } else if (slot->io_already_done) {
- /* Something for us to work on. */
- goto found;
- } else {
- any_reserved = TRUE;
+ for (ulint i = 0; i < m_n_elems; ++i) {
+ read(m_slots[i]);
}
}
+ }
- os_mutex_exit(array->mutex);
-
- /* There is no completed request.
- If there is no pending request at all,
- and the system is being shut down, exit. */
- if (UNIV_UNLIKELY
- (!any_reserved
- && srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS)) {
- *message1 = NULL;
- *message2 = NULL;
- return(TRUE);
+ /** Mark the i/os done in slots */
+ void done()
+ {
+ for (ulint i = 0; i < m_n_elems; ++i) {
+ m_slots[i]->io_already_done = true;
}
-
- /* Wait for some request. Note that we return
- from wait iff we have found a request. */
-
- srv_set_io_thread_op_info(global_seg,
- "waiting for completed aio requests");
- os_aio_linux_collect(array, segment, n);
}
-found:
- /* Note that it may be that there are more then one completed
- IO requests. We process them one at a time. We may have a case
- here to improve the performance slightly by dealing with all
- requests in one sweep. */
- srv_set_io_thread_op_info(global_seg,
- "processing completed aio requests");
-
- /* Ensure that we are scribbling only our segment. */
- ut_a(i < n);
-
- ut_ad(slot != NULL);
- ut_ad(slot->reserved);
- ut_ad(slot->io_already_done);
+ /** @return the first slot in the consecutive array */
+ Slot* first_slot()
+ MY_ATTRIBUTE((warn_unused_result))
+ {
+ ut_a(m_n_elems > 0);
- *message1 = slot->message1;
- *message2 = slot->message2;
+ return(m_slots[0]);
+ }
- *type = slot->type;
+ /** Wait for I/O requests
+ @param[in] global_segment The global segment
+ @param[in,out] event Wait on event if no active requests
+ @return the number of slots */
+ ulint check_pending(
+ ulint global_segment,
+ os_event_t event)
+ MY_ATTRIBUTE((warn_unused_result));
+private:
- if (slot->ret == 0 && slot->n_bytes == (long) slot->len) {
+ /** Do the file read
+ @param[in,out] slot Slot that has the IO context */
+ void read(Slot* slot)
+ {
+ dberr_t err = os_file_read(
+ slot->type,
+ slot->file,
+ slot->ptr,
+ slot->offset,
+ slot->len);
- ret = TRUE;
- } else {
- errno = -slot->ret;
+ ut_a(err == DB_SUCCESS);
+ }
- if (slot->ret == 0) {
- fprintf(stderr,
- "InnoDB: Number of bytes after aio %d requested %lu\n"
- "InnoDB: from file %s\n",
- slot->n_bytes, slot->len, slot->name);
- }
+ /** Do the file read
+ @param[in,out] slot Slot that has the IO context */
+ void write(Slot* slot)
+ {
+ dberr_t err = os_file_write(
+ slot->type,
+ slot->name,
+ slot->file,
+ slot->ptr,
+ slot->offset,
+ slot->len);
- /* os_file_handle_error does tell us if we should retry
- this IO. As it stands now, we don't do this retry when
- reaping requests from a different context than
- the dispatcher. This non-retry logic is the same for
- windows and linux native AIO.
- We should probably look into this to transparently
- re-submit the IO. */
- os_file_handle_error(slot->name, "Linux aio", __FILE__, __LINE__);
+ ut_a(err == DB_SUCCESS);
+ }
- ret = FALSE;
+ /** @return true if the slots are adjacent and can be merged */
+ bool adjacent(const Slot* s1, const Slot* s2) const
+ {
+ return(s1 != s2
+ && s1->file == s2->file
+ && s2->offset == s1->offset + s1->len
+ && s1->type == s2->type);
}
- os_mutex_exit(array->mutex);
+ /** @return true if merge limit reached or no adjacent slots found. */
+ bool merge_adjacent(Slot*& current)
+ {
+ Slot* slot;
+ ulint offset = m_segment * m_n_slots;
- os_aio_array_free_slot(array, slot);
+ slot = m_array->at(offset);
- return(ret);
-}
-#endif /* LINUX_NATIVE_AIO */
+ for (ulint i = 0; i < m_n_slots; ++i, ++slot) {
-/**********************************************************************//**
-Does simulated aio. This function should be called by an i/o-handler
-thread.
-@return TRUE if the aio operation succeeded */
-UNIV_INTERN
-ibool
-os_aio_simulated_handle(
-/*====================*/
- ulint global_segment, /*!< in: the number of the segment in the aio
- arrays to wait for; segment 0 is the ibuf
- i/o thread, segment 1 the log i/o thread,
- then follow the non-ibuf read threads, and as
- the last are the non-ibuf write threads */
- fil_node_t**message1, /*!< out: the messages passed with the aio
- request; note that also in the case where
- the aio operation failed, these output
- parameters are valid and can be used to
- restart the operation, for example */
- void** message2,
- ulint* type) /*!< out: OS_FILE_WRITE or ..._READ */
-{
- os_aio_array_t* array;
- ulint segment;
- os_aio_slot_t* consecutive_ios[OS_AIO_MERGE_N_CONSECUTIVE];
- ulint n_consecutive;
- ulint total_len;
- ulint offs;
- os_offset_t lowest_offset;
- ulint biggest_age;
- ulint age;
- byte* combined_buf;
- byte* combined_buf2;
- ibool ret;
- ibool any_reserved;
- ulint n;
- os_aio_slot_t* aio_slot;
-
- /* Fix compiler warning */
- *consecutive_ios = NULL;
-
- segment = os_aio_get_array_and_local_segment(&array, global_segment);
-
-restart:
- /* NOTE! We only access constant fields in os_aio_array. Therefore
- we do not have to acquire the protecting mutex yet */
+ if (slot->is_reserved && adjacent(current, slot)) {
- srv_set_io_thread_op_info(global_segment,
- "looking for i/o requests (a)");
- ut_ad(os_aio_validate_skip());
- ut_ad(segment < array->n_segments);
+ current = slot;
- n = array->n_slots / array->n_segments;
+ /* Found a consecutive i/o request */
- /* Look through n slots after the segment * n'th slot */
+ m_slots[m_n_elems] = slot;
- if (array == os_aio_read_array
- && os_aio_recommend_sleep_for_read_threads) {
+ ++m_n_elems;
- /* Give other threads chance to add several i/os to the array
- at once. */
+ return(m_n_elems >= m_slots.capacity());
+ }
+ }
- goto recommended_sleep;
+ return(true);
}
- srv_set_io_thread_op_info(global_segment,
- "looking for i/o requests (b)");
+ /** There were no old requests. Look for an I/O request at the lowest
+ offset in the array (we ignore the high 32 bits of the offset in these
+ heuristics) */
+ bool select_lowest_offset()
+ {
+ ut_ad(m_n_elems == 0);
+
+ ulint offset = m_segment * m_n_slots;
- /* Check if there is a slot for which the i/o has already been
- done */
- any_reserved = FALSE;
+ m_lowest_offset = IB_UINT64_MAX;
- os_mutex_enter(array->mutex);
+ for (ulint i = 0; i < m_n_slots; ++i) {
+ Slot* slot;
- for (ulint i = 0; i < n; i++) {
- os_aio_slot_t* slot;
+ slot = m_array->at(i + offset);
- slot = os_aio_array_get_nth_slot(array, i + segment * n);
+ if (slot->is_reserved
+ && slot->offset < m_lowest_offset) {
- if (!slot->reserved) {
- continue;
- } else if (slot->io_already_done) {
+ /* Found an i/o request */
+ m_slots[0] = slot;
- if (os_aio_print_debug) {
- fprintf(stderr,
- "InnoDB: i/o for slot %lu"
- " already done, returning\n",
- (ulong) i);
- }
+ m_n_elems = 1;
- aio_slot = slot;
- ret = TRUE;
- goto slot_io_done;
- } else {
- any_reserved = TRUE;
+ m_lowest_offset = slot->offset;
+ }
}
- }
- /* There is no completed request.
- If there is no pending request at all,
- and the system is being shut down, exit. */
- if (!any_reserved && srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS) {
- os_mutex_exit(array->mutex);
- *message1 = NULL;
- *message2 = NULL;
- return(TRUE);
+ return(m_n_elems > 0);
}
- n_consecutive = 0;
-
- /* If there are at least 2 seconds old requests, then pick the oldest
- one to prevent starvation. If several requests have the same age,
- then pick the one at the lowest offset. */
-
- biggest_age = 0;
- lowest_offset = IB_UINT64_MAX;
-
- for (ulint i = 0; i < n; i++) {
- os_aio_slot_t* slot;
-
- slot = os_aio_array_get_nth_slot(array, i + segment * n);
+ /** Select the slot if it is older than the current oldest slot.
+ @param[in] slot The slot to check */
+ void select_if_older(Slot* slot)
+ {
+ ulint age;
- if (slot->reserved) {
+ age = (ulint) difftime(time(NULL), slot->reservation_time);
- age = (ulint) difftime(
- time(NULL), slot->reservation_time);
+ if ((age >= 2 && age > m_oldest)
+ || (age >= 2
+ && age == m_oldest
+ && slot->offset < m_lowest_offset)) {
- if ((age >= 2 && age > biggest_age)
- || (age >= 2 && age == biggest_age
- && slot->offset < lowest_offset)) {
+ /* Found an i/o request */
+ m_slots[0] = slot;
- /* Found an i/o request */
- consecutive_ios[0] = slot;
+ m_n_elems = 1;
- n_consecutive = 1;
+ m_oldest = age;
- biggest_age = age;
- lowest_offset = slot->offset;
- }
+ m_lowest_offset = slot->offset;
}
}
- if (n_consecutive == 0) {
- /* There were no old requests. Look for an i/o request at the
- lowest offset in the array (we ignore the high 32 bits of the
- offset in these heuristics) */
-
- lowest_offset = IB_UINT64_MAX;
-
- for (ulint i = 0; i < n; i++) {
- os_aio_slot_t* slot;
+ /** Select th oldest slot in the array
+ @return true if oldest slot found */
+ bool select_oldest()
+ {
+ ut_ad(m_n_elems == 0);
- slot = os_aio_array_get_nth_slot(
- array, i + segment * n);
+ Slot* slot;
+ ulint offset = m_n_slots * m_segment;
- if (slot->reserved && slot->offset < lowest_offset) {
+ slot = m_array->at(offset);
- /* Found an i/o request */
- consecutive_ios[0] = slot;
+ for (ulint i = 0; i < m_n_slots; ++i, ++slot) {
- n_consecutive = 1;
-
- lowest_offset = slot->offset;
+ if (slot->is_reserved) {
+ select_if_older(slot);
}
}
+
+ return(m_n_elems > 0);
}
- if (n_consecutive == 0) {
+ typedef std::vector<Slot*> slots_t;
- /* No i/o requested at the moment */
+private:
+ ulint m_oldest;
+ ulint m_n_elems;
+ os_offset_t m_lowest_offset;
- goto wait_for_io;
- }
+ AIO* m_array;
+ ulint m_n_slots;
+ ulint m_segment;
- /* if n_consecutive != 0, then we have assigned
- something valid to consecutive_ios[0] */
- ut_ad(n_consecutive != 0);
- ut_ad(consecutive_ios[0] != NULL);
+ slots_t m_slots;
- aio_slot = consecutive_ios[0];
+ byte* m_ptr;
+ byte* m_buf;
+};
- /* Check if there are several consecutive blocks to read or write */
+/** Wait for I/O requests
+@return the number of slots */
+ulint
+SimulatedAIOHandler::check_pending(
+ ulint global_segment,
+ os_event_t event)
+{
+ /* NOTE! We only access constant fields in os_aio_array.
+ Therefore we do not have to acquire the protecting mutex yet */
-consecutive_loop:
- for (ulint i = 0; i < n; i++) {
- os_aio_slot_t* slot;
+ ut_ad(os_aio_validate_skip());
- slot = os_aio_array_get_nth_slot(array, i + segment * n);
- if (slot->reserved
- && slot != aio_slot
- && slot->offset == aio_slot->offset + aio_slot->len
- && slot->type == aio_slot->type
- && slot->file == aio_slot->file) {
+ ut_ad(m_segment < m_array->get_n_segments());
- /* Found a consecutive i/o request */
+ /* Look through n slots after the segment * n'th slot */
+
+ if (AIO::is_read(m_array)
+ && os_aio_recommend_sleep_for_read_threads) {
- consecutive_ios[n_consecutive] = slot;
- n_consecutive++;
+ /* Give other threads chance to add several
+ I/Os to the array at once. */
- aio_slot = slot;
+ srv_set_io_thread_op_info(
+ global_segment, "waiting for i/o request");
- if (n_consecutive < OS_AIO_MERGE_N_CONSECUTIVE) {
+ os_event_wait(event);
- goto consecutive_loop;
- } else {
- break;
- }
- }
+ return(0);
}
- srv_set_io_thread_op_info(global_segment, "consecutive i/o requests");
+ return(m_array->slots_per_segment());
+}
+
+/** Does simulated AIO. This function should be called by an i/o-handler
+thread.
- /* We have now collected n_consecutive i/o requests in the array;
- allocate a single buffer which can hold all data, and perform the
- i/o */
+@param[in] segment The number of the segment in the aio arrays to wait
+ for; segment 0 is the ibuf i/o thread, segment 1 the
+ log i/o thread, then follow the non-ibuf read threads,
+ and as the last are the non-ibuf write threads
+@param[out] m1 the messages passed with the AIO request; note that
+ also in the case where the AIO operation failed, these
+ output parameters are valid and can be used to restart
+ the operation, for example
+@param[out] m2 Callback argument
+@param[in] type IO context
+@return DB_SUCCESS or error code */
+static
+dberr_t
+os_aio_simulated_handler(
+ ulint global_segment,
+ fil_node_t** m1,
+ void** m2,
+ IORequest* type)
+{
+ Slot* slot;
+ AIO* array;
+ ulint segment;
+ os_event_t event = os_aio_segment_wait_events[global_segment];
- total_len = 0;
- aio_slot = consecutive_ios[0];
+ segment = AIO::get_array_and_local_segment(&array, global_segment);
- for (ulint i = 0; i < n_consecutive; i++) {
- total_len += consecutive_ios[i]->len;
- }
+ SimulatedAIOHandler handler(array, segment);
- if (n_consecutive == 1) {
- /* We can use the buffer of the i/o request */
- combined_buf = aio_slot->buf;
- combined_buf2 = NULL;
- } else {
- combined_buf2 = static_cast<byte*>(
- ut_malloc(total_len + UNIV_PAGE_SIZE));
+ for (;;) {
- ut_a(combined_buf2);
+ srv_set_io_thread_op_info(
+ global_segment, "looking for i/o requests (a)");
- combined_buf = static_cast<byte*>(
- ut_align(combined_buf2, UNIV_PAGE_SIZE));
- }
+ ulint n_slots = handler.check_pending(global_segment, event);
- /* We release the array mutex for the time of the i/o: NOTE that
- this assumes that there is just one i/o-handler thread serving
- a single segment of slots! */
+ if (n_slots == 0) {
+ continue;
+ }
- os_mutex_exit(array->mutex);
+ handler.init(n_slots);
- if (aio_slot->type == OS_FILE_WRITE && n_consecutive > 1) {
- /* Copy the buffers to the combined buffer */
- offs = 0;
+ srv_set_io_thread_op_info(
+ global_segment, "looking for i/o requests (b)");
- for (ulint i = 0; i < n_consecutive; i++) {
+ array->acquire();
- ut_memcpy(combined_buf + offs, consecutive_ios[i]->buf,
- consecutive_ios[i]->len);
+ ulint n_reserved;
- offs += consecutive_ios[i]->len;
- }
- }
+ slot = handler.check_completed(&n_reserved);
- srv_set_io_thread_op_info(global_segment, "doing file i/o");
+ if (slot != NULL) {
- /* Do the i/o with ordinary, synchronous i/o functions: */
- if (aio_slot->type == OS_FILE_WRITE) {
- ut_ad(!srv_read_only_mode);
- ret = os_file_write(
- aio_slot->name, aio_slot->file, combined_buf,
- aio_slot->offset, total_len);
+ break;
- DBUG_EXECUTE_IF("ib_os_aio_func_io_failure_28",
- os_has_said_disk_full = FALSE;
- ret = 0;
- errno = 28;);
+ } else if (n_reserved == 0
+ && !buf_page_cleaner_is_active
+ && srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS) {
- if (!ret) {
- os_file_handle_error_cond_exit(aio_slot->name, "os_file_write_func", TRUE, FALSE,
- __FILE__, __LINE__);
- }
+ /* There is no completed request. If there
+ are no pending request at all, and the system
+ is being shut down, exit. */
- } else {
- ret = os_file_read(
- aio_slot->file, combined_buf,
- aio_slot->offset, total_len);
- }
+ array->release();
- srv_set_io_thread_op_info(global_segment, "file i/o done");
+ *m1 = NULL;
- if (aio_slot->type == OS_FILE_READ && n_consecutive > 1) {
- /* Copy the combined buffer to individual buffers */
- offs = 0;
+ *m2 = NULL;
- for (ulint i = 0; i < n_consecutive; i++) {
+ return(DB_SUCCESS);
- ut_memcpy(consecutive_ios[i]->buf, combined_buf + offs,
- consecutive_ios[i]->len);
- offs += consecutive_ios[i]->len;
+ } else if (handler.select()) {
+
+ break;
}
- }
- if (combined_buf2) {
- ut_free(combined_buf2);
- }
+ /* No I/O requested at the moment */
- os_mutex_enter(array->mutex);
+ srv_set_io_thread_op_info(
+ global_segment, "resetting wait event");
- /* Mark the i/os done in slots */
+ /* We wait here until tbere are more IO requests
+ for this segment. */
- for (ulint i = 0; i < n_consecutive; i++) {
- consecutive_ios[i]->io_already_done = TRUE;
- }
+ os_event_reset(event);
- /* We return the messages for the first slot now, and if there were
- several slots, the messages will be returned with subsequent calls
- of this function */
+ array->release();
-slot_io_done:
+ srv_set_io_thread_op_info(
+ global_segment, "waiting for i/o request");
- ut_a(aio_slot->reserved);
+ os_event_wait(event);
+ }
- *message1 = aio_slot->message1;
- *message2 = aio_slot->message2;
+ /** Found a slot that has already completed its IO */
- *type = aio_slot->type;
+ if (slot == NULL) {
+ /* Merge adjacent requests */
+ handler.merge();
- os_mutex_exit(array->mutex);
+ /* Check if there are several consecutive blocks
+ to read or write */
- os_aio_array_free_slot(array, aio_slot);
+ srv_set_io_thread_op_info(
+ global_segment, "consecutive i/o requests");
- return(ret);
+ // Note: We don't support write combining for simulated AIO.
+ //ulint total_len = handler.allocate_buffer();
-wait_for_io:
- srv_set_io_thread_op_info(global_segment, "resetting wait event");
+ /* We release the array mutex for the time of the I/O: NOTE that
+ this assumes that there is just one i/o-handler thread serving
+ a single segment of slots! */
- /* We wait here until there again can be i/os in the segment
- of this thread */
+ array->release();
- os_event_reset(os_aio_segment_wait_events[global_segment]);
+ // Note: We don't support write combining for simulated AIO.
+ //handler.copy_to_buffer(total_len);
- os_mutex_exit(array->mutex);
+ srv_set_io_thread_op_info(global_segment, "doing file i/o");
-recommended_sleep:
- srv_set_io_thread_op_info(global_segment, "waiting for i/o request");
+ handler.io();
- os_event_wait(os_aio_segment_wait_events[global_segment]);
+ srv_set_io_thread_op_info(global_segment, "file i/o done");
- goto restart;
-}
+ array->acquire();
-/**********************************************************************//**
-Validates the consistency of an aio array.
-@return true if ok */
-static
-bool
-os_aio_array_validate(
-/*==================*/
- os_aio_array_t* array) /*!< in: aio wait array */
-{
- ulint i;
- ulint n_reserved = 0;
+ handler.done();
- os_mutex_enter(array->mutex);
+ /* We return the messages for the first slot now, and if there
+ were several slots, the messages will be returned with
+ subsequent calls of this function */
- ut_a(array->n_slots > 0);
- ut_a(array->n_segments > 0);
+ slot = handler.first_slot();
+ }
- for (i = 0; i < array->n_slots; i++) {
- os_aio_slot_t* slot;
+ ut_ad(slot->is_reserved);
- slot = os_aio_array_get_nth_slot(array, i);
+ *m1 = slot->m1;
+ *m2 = slot->m2;
- if (slot->reserved) {
- n_reserved++;
- ut_a(slot->len > 0);
- }
- }
+ *type = slot->type;
- ut_a(array->n_reserved == n_reserved);
+ array->release(slot);
- os_mutex_exit(array->mutex);
+ array->release();
- return(true);
+ return(DB_SUCCESS);
}
-/**********************************************************************//**
-Validates the consistency the aio system.
-@return TRUE if ok */
-UNIV_INTERN
-ibool
-os_aio_validate(void)
-/*=================*/
+/** Get the total number of pending IOs
+@return the total number of pending IOs */
+ulint
+AIO::total_pending_io_count()
{
- os_aio_array_validate(os_aio_read_array);
+ ulint count = s_reads->pending_io_count();
- if (os_aio_write_array != 0) {
- os_aio_array_validate(os_aio_write_array);
+ if (s_writes != NULL) {
+ count += s_writes->pending_io_count();
}
- if (os_aio_ibuf_array != 0) {
- os_aio_array_validate(os_aio_ibuf_array);
+ if (s_ibuf != NULL) {
+ count += s_ibuf->pending_io_count();
}
- if (os_aio_log_array != 0) {
- os_aio_array_validate(os_aio_log_array);
+ if (s_log != NULL) {
+ count += s_log->pending_io_count();
}
- if (os_aio_sync_array != 0) {
- os_aio_array_validate(os_aio_sync_array);
+ if (s_sync != NULL) {
+ count += s_sync->pending_io_count();
}
- return(TRUE);
+ return(count);
}
-/**********************************************************************//**
-Prints pending IO requests per segment of an aio array.
+/** Validates the consistency the aio system.
+@return true if ok */
+static
+bool
+os_aio_validate()
+{
+ /* The methods countds and validates, we ignore the count. */
+ AIO::total_pending_io_count();
+
+ return(true);
+}
+
+/** Prints pending IO requests per segment of an aio array.
We probably don't need per segment statistics but they can help us
during development phase to see if the IO requests are being
-distributed as expected. */
-static
+distributed as expected.
+@param[in,out] file File where to print
+@param[in] segments Pending IO array */
void
-os_aio_print_segment_info(
-/*======================*/
- FILE* file, /*!< in: file where to print */
- ulint* n_seg, /*!< in: pending IO array */
- os_aio_array_t* array) /*!< in: array to process */
+AIO::print_segment_info(
+ FILE* file,
+ const ulint* segments)
{
- ulint i;
+ ut_ad(m_n_segments > 0);
- ut_ad(array);
- ut_ad(n_seg);
- ut_ad(array->n_segments > 0);
+ if (m_n_segments > 1) {
- if (array->n_segments == 1) {
- return;
- }
+ fprintf(file, " [");
+
+ for (ulint i = 0; i < m_n_segments; ++i, ++segments) {
- fprintf(file, " [");
- for (i = 0; i < array->n_segments; i++) {
- if (i != 0) {
- fprintf(file, ", ");
+ if (i != 0) {
+ fprintf(file, ", ");
+ }
+
+ fprintf(file, ULINTPF, *segments);
}
- fprintf(file, "%lu", n_seg[i]);
+ fprintf(file, "] ");
}
- fprintf(file, "] ");
}
-/**********************************************************************//**
-Prints info about the aio array. */
-UNIV_INTERN
+/** Prints info about the aio array.
+@param[in,out] file Where to print */
void
-os_aio_print_array(
-/*==============*/
- FILE* file, /*!< in: file where to print */
- os_aio_array_t* array) /*!< in: aio array to print */
+AIO::print(FILE* file)
{
- ulint n_reserved = 0;
- ulint n_res_seg[SRV_MAX_N_IO_THREADS];
+ ulint count = 0;
+ ulint n_res_seg[SRV_MAX_N_IO_THREADS];
- os_mutex_enter(array->mutex);
+ mutex_enter(&m_mutex);
- ut_a(array->n_slots > 0);
- ut_a(array->n_segments > 0);
+ ut_a(!m_slots.empty());
+ ut_a(m_n_segments > 0);
memset(n_res_seg, 0x0, sizeof(n_res_seg));
- for (ulint i = 0; i < array->n_slots; ++i) {
- os_aio_slot_t* slot;
- ulint seg_no;
+ for (ulint i = 0; i < m_slots.size(); ++i) {
+ Slot& slot = m_slots[i];
+ ulint segment = (i * m_n_segments) / m_slots.size();
- slot = os_aio_array_get_nth_slot(array, i);
+ if (slot.is_reserved) {
- seg_no = (i * array->n_segments) / array->n_slots;
+ ++count;
- if (slot->reserved) {
- ++n_reserved;
- ++n_res_seg[seg_no];
+ ++n_res_seg[segment];
- ut_a(slot->len > 0);
+ ut_a(slot.len > 0);
}
}
- ut_a(array->n_reserved == n_reserved);
+ ut_a(m_n_reserved == count);
- fprintf(file, " %lu", (ulong) n_reserved);
+ print_segment_info(file, n_res_seg);
- os_aio_print_segment_info(file, n_res_seg, array);
+ mutex_exit(&m_mutex);
+}
+
+/** Print all the AIO segments
+@param[in,out] file Where to print */
+void
+AIO::print_all(FILE* file)
+{
+ s_reads->print(file);
- os_mutex_exit(array->mutex);
+ if (s_writes != NULL) {
+ fputs(", aio writes:", file);
+ s_writes->print(file);
+ }
+
+ if (s_ibuf != NULL) {
+ fputs(",\n ibuf aio reads:", file);
+ s_ibuf->print(file);
+ }
+
+ if (s_log != NULL) {
+ fputs(", log i/o's:", file);
+ s_log->print(file);
+ }
+
+ if (s_sync != NULL) {
+ fputs(", sync i/o's:", file);
+ s_sync->print(file);
+ }
}
-/**********************************************************************//**
-Prints info of the aio arrays. */
-UNIV_INTERN
+/** Prints info of the aio arrays.
+@param[in,out] file file where to print */
void
-os_aio_print(
-/*=========*/
- FILE* file) /*!< in: file where to print */
+os_aio_print(FILE* file)
{
time_t current_time;
double time_elapsed;
double avg_bytes_read;
for (ulint i = 0; i < srv_n_file_io_threads; ++i) {
- fprintf(file, "I/O thread %lu state: %s (%s)",
- (ulong) i,
+ fprintf(file, "I/O thread " ULINTPF " state: %s (%s)",
+ i,
srv_io_thread_op_info[i],
srv_io_thread_function[i]);
#ifndef _WIN32
if (!srv_use_native_aio
- && os_aio_segment_wait_events[i]->is_set) {
+ && os_event_is_set(os_aio_segment_wait_events[i])) {
fprintf(file, " ev set");
}
#endif /* _WIN32 */
@@ -5991,27 +7571,7 @@ os_aio_print(
fputs("Pending normal aio reads:", file);
- os_aio_print_array(file, os_aio_read_array);
-
- if (os_aio_write_array != 0) {
- fputs(", aio writes:", file);
- os_aio_print_array(file, os_aio_write_array);
- }
-
- if (os_aio_ibuf_array != 0) {
- fputs(",\n ibuf aio reads:", file);
- os_aio_print_array(file, os_aio_ibuf_array);
- }
-
- if (os_aio_log_array != 0) {
- fputs(", log i/o's:", file);
- os_aio_print_array(file, os_aio_log_array);
- }
-
- if (os_aio_sync_array != 0) {
- fputs(", sync i/o's:", file);
- os_aio_print_array(file, os_aio_sync_array);
- }
+ AIO::print_all(file);
putc('\n', file);
current_time = time(NULL);
@@ -6046,11 +7606,11 @@ os_aio_print(
}
fprintf(file,
- "%.2f reads/s, %lu avg bytes/read,"
+ "%.2f reads/s, " ULINTPF " avg bytes/read,"
" %.2f writes/s, %.2f fsyncs/s\n",
(os_n_file_reads - os_n_file_reads_old)
/ time_elapsed,
- (ulong) avg_bytes_read,
+ (ulint) avg_bytes_read,
(os_n_file_writes - os_n_file_writes_old)
/ time_elapsed,
(os_n_fsyncs - os_n_fsyncs_old)
@@ -6064,319 +7624,127 @@ os_aio_print(
os_last_printout = current_time;
}
-/**********************************************************************//**
-Refreshes the statistics used to print per-second averages. */
-UNIV_INTERN
+/** Refreshes the statistics used to print per-second averages. */
void
-os_aio_refresh_stats(void)
-/*======================*/
+os_aio_refresh_stats()
{
+ os_n_fsyncs_old = os_n_fsyncs;
+
+ os_bytes_read_since_printout = 0;
+
os_n_file_reads_old = os_n_file_reads;
+
os_n_file_writes_old = os_n_file_writes;
+
os_n_fsyncs_old = os_n_fsyncs;
+
os_bytes_read_since_printout = 0;
os_last_printout = time(NULL);
}
-#ifdef UNIV_DEBUG
-/**********************************************************************//**
-Checks that all slots in the system have been freed, that is, there are
+/** Checks that all slots in the system have been freed, that is, there are
no pending io operations.
-@return TRUE if all free */
-UNIV_INTERN
-ibool
-os_aio_all_slots_free(void)
-/*=======================*/
+@return true if all free */
+bool
+os_aio_all_slots_free()
{
- os_aio_array_t* array;
- ulint n_res = 0;
-
- array = os_aio_read_array;
-
- os_mutex_enter(array->mutex);
-
- n_res += array->n_reserved;
-
- os_mutex_exit(array->mutex);
-
- if (!srv_read_only_mode) {
- ut_a(os_aio_write_array == 0);
-
- array = os_aio_write_array;
-
- os_mutex_enter(array->mutex);
-
- n_res += array->n_reserved;
-
- os_mutex_exit(array->mutex);
-
- ut_a(os_aio_ibuf_array == 0);
-
- array = os_aio_ibuf_array;
-
- os_mutex_enter(array->mutex);
-
- n_res += array->n_reserved;
-
- os_mutex_exit(array->mutex);
- }
-
- ut_a(os_aio_log_array == 0);
-
- array = os_aio_log_array;
-
- os_mutex_enter(array->mutex);
-
- n_res += array->n_reserved;
-
- os_mutex_exit(array->mutex);
+ return(AIO::total_pending_io_count() == 0);
+}
- array = os_aio_sync_array;
+#ifdef UNIV_DEBUG
+/** Prints all pending IO for the array
+@param[in] file file where to print
+@param[in] array array to process */
+void
+AIO::to_file(FILE* file) const
+{
+ acquire();
- os_mutex_enter(array->mutex);
+ fprintf(file, " " ULINTPF "\n", m_n_reserved);
- n_res += array->n_reserved;
+ for (ulint i = 0; i < m_slots.size(); ++i) {
- os_mutex_exit(array->mutex);
+ const Slot& slot = m_slots[i];
- if (n_res == 0) {
+ if (slot.is_reserved) {
- return(TRUE);
+ fprintf(file,
+ "%s IO for %s (offset=" UINT64PF
+ ", size=%lu)\n",
+ slot.type.is_read() ? "read" : "write",
+ slot.name, slot.offset, (unsigned long)(slot.len));
+ }
}
- return(FALSE);
+ release();
}
-#endif /* UNIV_DEBUG */
-#ifdef _WIN32
-#include <winioctl.h>
-#ifndef FSCTL_FILE_LEVEL_TRIM
-#define FSCTL_FILE_LEVEL_TRIM CTL_CODE(FILE_DEVICE_FILE_SYSTEM, 130, METHOD_BUFFERED, FILE_WRITE_DATA)
-typedef struct _FILE_LEVEL_TRIM_RANGE {
- DWORDLONG Offset;
- DWORDLONG Length;
-} FILE_LEVEL_TRIM_RANGE, *PFILE_LEVEL_TRIM_RANGE;
-
-typedef struct _FILE_LEVEL_TRIM {
- DWORD Key;
- DWORD NumRanges;
- FILE_LEVEL_TRIM_RANGE Ranges[1];
-} FILE_LEVEL_TRIM, *PFILE_LEVEL_TRIM;
-#endif
-#endif
-
-#if defined(WIN_ASYNC_IO) || defined(LINUX_NATIVE_AIO)
-/**********************************************************************//**
-Directly manipulate the allocated disk space by deallocating for the file referred to
-by fd for the byte range starting at offset and continuing for len bytes.
-Within the specified range, partial file system blocks are zeroed, and whole
-file system blocks are removed from the file. After a successful call,
-subsequent reads from this range will return zeroes.
-@return true if success, false if error */
-static
-ibool
-os_file_trim(
-/*=========*/
- os_aio_slot_t* slot) /*!< in: slot structure */
+/** Print pending IOs for all arrays */
+void
+AIO::print_to_file(FILE* file)
{
+ fprintf(file, "Pending normal aio reads:");
- size_t len = slot->len;
- size_t trim_len = slot->page_size - len;
- os_offset_t off __attribute__((unused)) = slot->offset + len;
- size_t bsize = slot->file_block_size;
+ s_reads->to_file(file);
-#ifdef UNIV_TRIM_DEBUG
- fprintf(stderr, "Note: TRIM: write_size %lu trim_len %lu len %lu off %lu bz %lu\n",
- slot->write_size ? *slot->write_size : 0, trim_len, len, off, bsize);
-#endif
-
- // Nothing to do if trim length is zero or if actual write
- // size is initialized and it is smaller than current write size.
- // In first write if we trim we set write_size to actual bytes
- // written and rest of the page is trimmed. In following writes
- // there is no need to trim again if write_size only increases
- // because rest of the page is already trimmed. If actual write
- // size decreases we need to trim again.
- if (trim_len == 0 ||
- (slot->write_size &&
- *slot->write_size > 0 &&
- len >= *slot->write_size)) {
-
- if (slot->write_size) {
- if (*slot->write_size > 0 && len >= *slot->write_size) {
- srv_stats.page_compressed_trim_op_saved.inc();
- }
-
- *slot->write_size = len;
- }
-
- return (TRUE);
+ if (s_writes != NULL) {
+ fprintf(file, "Pending normal aio writes:");
+ s_writes->to_file(file);
}
-#ifdef __linux__
-#if defined(HAVE_FALLOC_PUNCH_HOLE_AND_KEEP_SIZE)
- int ret = fallocate(slot->file,
- FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
- off, trim_len);
-
- if (ret) {
- /* After first failure do not try to trim again */
- os_fallocate_failed = true;
- srv_use_trim = FALSE;
- ib_logf(IB_LOG_LEVEL_WARN,
- "fallocate() failed with error %d."
- " start: " UINT64PF " len: " ULINTPF " payload: " ULINTPF "."
- " Disabling fallocate for now.",
- errno, off, ulint(trim_len), ulint(len));
-
- os_file_handle_error_no_exit(slot->name,
- " fallocate(FALLOC_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE) ",
- FALSE, __FILE__, __LINE__);
-
- if (slot->write_size) {
- *slot->write_size = 0;
- }
-
- return (FALSE);
- } else {
- if (slot->write_size) {
- *slot->write_size = len;
- }
- }
-#else
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Warning: fallocate not supported on this installation."
- " InnoDB: Disabling fallocate for now.");
- os_fallocate_failed = true;
- srv_use_trim = FALSE;
- if (slot->write_size) {
- *slot->write_size = 0;
+ if (s_ibuf != NULL) {
+ fprintf(file, "Pending ibuf aio reads:");
+ s_ibuf->to_file(file);
}
-#endif /* HAVE_FALLOC_PUNCH_HOLE_AND_KEEP_SIZE ... */
-
-#elif defined(_WIN32)
- FILE_LEVEL_TRIM flt;
- flt.Key = 0;
- flt.NumRanges = 1;
- flt.Ranges[0].Offset = off;
- flt.Ranges[0].Length = trim_len;
-
- BOOL ret = DeviceIoControl(slot->file, FSCTL_FILE_LEVEL_TRIM,
- &flt, sizeof(flt), NULL, NULL, NULL, NULL);
-
- if (!ret) {
- /* After first failure do not try to trim again */
- os_fallocate_failed = true;
- srv_use_trim=FALSE;
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Warning: fallocate call failed with error.\n"
- " InnoDB: start: %lu len: %lu payload: %lu\n"
- " InnoDB: Disabling fallocate for now.\n", off, trim_len, len);
-
- os_file_handle_error_no_exit(slot->name,
- " DeviceIOControl(FSCTL_FILE_LEVEL_TRIM) ",
- FALSE, __FILE__, __LINE__);
-
- if (slot->write_size) {
- *slot->write_size = 0;
- }
- return (FALSE);
- } else {
- if (slot->write_size) {
- *slot->write_size = len;
- }
+ if (s_log != NULL) {
+ fprintf(file, "Pending log i/o's:");
+ s_log->to_file(file);
}
-#endif
- switch(bsize) {
- case 512:
- srv_stats.page_compression_trim_sect512.add((trim_len / bsize));
- break;
- case 1024:
- srv_stats.page_compression_trim_sect1024.add((trim_len / bsize));
- break;
- case 2948:
- srv_stats.page_compression_trim_sect2048.add((trim_len / bsize));
- break;
- case 4096:
- srv_stats.page_compression_trim_sect4096.add((trim_len / bsize));
- break;
- case 8192:
- srv_stats.page_compression_trim_sect8192.add((trim_len / bsize));
- break;
- case 16384:
- srv_stats.page_compression_trim_sect16384.add((trim_len / bsize));
- break;
- case 32768:
- srv_stats.page_compression_trim_sect32768.add((trim_len / bsize));
- break;
- default:
- break;
+ if (s_sync != NULL) {
+ fprintf(file, "Pending sync i/o's:");
+ s_sync->to_file(file);
}
-
- srv_stats.page_compressed_trim_op.inc();
-
- return (TRUE);
-
}
-#endif /* WIN_ASYNC_IO || LINUX_NATIVE_AIO */
-#endif /* !UNIV_HOTBACKUP */
-/***********************************************************************//**
-Try to get number of bytes per sector from file system.
-@return file block size */
-UNIV_INTERN
-ulint
-os_file_get_block_size(
-/*===================*/
- os_file_t file, /*!< in: handle to a file */
- const char* name) /*!< in: file name */
+/** Prints all pending IO
+@param[in] file File where to print */
+void
+os_aio_print_pending_io(
+ FILE* file)
{
- ulint fblock_size = 512;
+ AIO::print_to_file(file);
+}
-#if defined(UNIV_LINUX) && defined(HAVE_SYS_STATVFS_H)
- struct statvfs fstat;
- int err;
+#endif /* UNIV_DEBUG */
- err = fstatvfs(file, &fstat);
+/**
+Set the file create umask
+@param[in] umask The umask to use for file creation. */
+void
+os_file_set_umask(ulint umask)
+{
+ os_innodb_umask = umask;
+}
- if (err != 0) {
- fprintf(stderr, "InnoDB: Warning: fstatvfs() failed on file %s\n", name);
- os_file_handle_error_no_exit(name, "fstatvfs()", FALSE, __FILE__, __LINE__);
- } else {
- fblock_size = fstat.f_bsize;
- }
-#endif /* UNIV_LINUX */
-#ifdef __WIN__
- {
- DWORD SectorsPerCluster = 0;
- DWORD BytesPerSector = 0;
- DWORD NumberOfFreeClusters = 0;
- DWORD TotalNumberOfClusters = 0;
-
- /*
- if (GetFreeSpace((LPCTSTR)name, &SectorsPerCluster, &BytesPerSector, &NumberOfFreeClusters, &TotalNumberOfClusters)) {
- fblock_size = BytesPerSector;
- } else {
- fprintf(stderr, "InnoDB: Warning: GetFreeSpace() failed on file %s\n", name);
- os_file_handle_error_no_exit(name, "GetFreeSpace()", FALSE, __FILE__, __LINE__);
- }
- */
- }
-#endif /* __WIN__*/
+#else
+#include "univ.i"
+#endif /* !UNIV_INNOCHECKSUM */
- /* Currently we support file block size up to 4Kb */
- if (fblock_size > 4096 || fblock_size < 512) {
- if (fblock_size < 512) {
- fblock_size = 512;
- } else {
- fblock_size = 4096;
+/** Normalizes a directory path for the current OS:
+On Windows, we convert '/' to '\', else we convert '\' to '/'.
+@param[in,out] str A null-terminated directory and file path */
+void
+os_normalize_path(
+ char* str)
+{
+ if (str != NULL) {
+ for (; *str; str++) {
+ if (*str == OS_PATH_SEPARATOR_ALT) {
+ *str = OS_PATH_SEPARATOR;
+ }
}
}
-
- return fblock_size;
}
diff --git a/storage/innobase/os/os0proc.cc b/storage/innobase/os/os0proc.cc
index 46adace8550..60057880c18 100644
--- a/storage/innobase/os/os0proc.cc
+++ b/storage/innobase/os/os0proc.cc
@@ -1,6 +1,7 @@
/*****************************************************************************
-Copyright (c) 1995, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2019, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -24,14 +25,11 @@ process control primitives
Created 9/30/1995 Heikki Tuuri
*******************************************************/
-#include "os0proc.h"
-#ifdef UNIV_NONINL
-#include "os0proc.ic"
+#include "univ.i"
+#ifdef HAVE_LINUX_LARGE_PAGES
+# include "mysqld.h"
#endif
-#include "ut0mem.h"
-#include "ut0byte.h"
-
/* FreeBSD for example has only MAP_ANON, Linux has MAP_ANONYMOUS and
MAP_ANON but MAP_ANON is marked as deprecated */
#if defined(MAP_ANONYMOUS)
@@ -40,63 +38,55 @@ MAP_ANON but MAP_ANON is marked as deprecated */
#define OS_MAP_ANON MAP_ANON
#endif
-UNIV_INTERN ibool os_use_large_pages;
-/* Large page size. This may be a boot-time option on some platforms */
-UNIV_INTERN ulint os_large_page_size;
-
-/****************************************************************//**
-Converts the current process id to a number. It is not guaranteed that the
-number is unique. In Linux returns the 'process number' of the current
-thread. That number is the same as one sees in 'top', for example. In Linux
-the thread id is not the same as one sees in 'top'.
-@return process id as a number */
-UNIV_INTERN
+/** The total amount of memory currently allocated from the operating
+system with os_mem_alloc_large(). */
+ulint os_total_large_mem_allocated = 0;
+
+/** Converts the current process id to a number.
+@return process id as a number */
ulint
os_proc_get_number(void)
/*====================*/
{
-#ifdef __WIN__
- return((ulint)GetCurrentProcessId());
+#ifdef _WIN32
+ return(static_cast<ulint>(GetCurrentProcessId()));
#else
- return((ulint) getpid());
+ return(static_cast<ulint>(getpid()));
#endif
}
-/****************************************************************//**
-Allocates large pages memory.
-@return allocated memory */
-UNIV_INTERN
+/** Allocates large pages memory.
+@param[in,out] n Number of bytes to allocate
+@return allocated memory */
void*
os_mem_alloc_large(
-/*===============*/
- ulint* n) /*!< in/out: number of bytes */
+ ulint* n)
{
void* ptr;
ulint size;
-#if defined HAVE_LARGE_PAGES && defined UNIV_LINUX
+#ifdef HAVE_LINUX_LARGE_PAGES
int shmid;
struct shmid_ds buf;
- if (!os_use_large_pages || !os_large_page_size) {
+ if (!my_use_large_pages || !opt_large_page_size) {
goto skip;
}
- /* Align block size to os_large_page_size */
- ut_ad(ut_is_2pow(os_large_page_size));
- size = ut_2pow_round(*n + (os_large_page_size - 1),
- os_large_page_size);
+ /* Align block size to opt_large_page_size */
+ ut_ad(ut_is_2pow(opt_large_page_size));
+ size = ut_2pow_round(*n + opt_large_page_size - 1,
+ ulint(opt_large_page_size));
shmid = shmget(IPC_PRIVATE, (size_t) size, SHM_HUGETLB | SHM_R | SHM_W);
if (shmid < 0) {
- fprintf(stderr, "InnoDB: HugeTLB: Warning: Failed to allocate"
- " %lu bytes. errno %d\n", size, errno);
+ ib::warn() << "Failed to allocate " << size
+ << " bytes. errno " << errno;
ptr = NULL;
} else {
ptr = shmat(shmid, NULL, 0);
if (ptr == (void*)-1) {
- fprintf(stderr, "InnoDB: HugeTLB: Warning: Failed to"
- " attach shared memory segment, errno %d\n",
- errno);
+ ib::warn() << "Failed to attach shared memory segment,"
+ " errno " << errno;
ptr = NULL;
}
@@ -108,105 +98,87 @@ os_mem_alloc_large(
if (ptr) {
*n = size;
- os_fast_mutex_lock(&ut_list_mutex);
- ut_total_allocated_memory += size;
- os_fast_mutex_unlock(&ut_list_mutex);
+ my_atomic_addlint(
+ &os_total_large_mem_allocated, size);
+
UNIV_MEM_ALLOC(ptr, size);
return(ptr);
}
- fprintf(stderr, "InnoDB HugeTLB: Warning: Using conventional"
- " memory pool\n");
+ ib::warn() << "Using conventional memory pool";
skip:
-#endif /* HAVE_LARGE_PAGES && UNIV_LINUX */
+#endif /* HAVE_LINUX_LARGE_PAGES */
-#ifdef __WIN__
+#ifdef _WIN32
SYSTEM_INFO system_info;
GetSystemInfo(&system_info);
/* Align block size to system page size */
ut_ad(ut_is_2pow(system_info.dwPageSize));
- /* system_info.dwPageSize is only 32-bit. Casting to ulint is required
- on 64-bit Windows. */
- size = *n = ut_2pow_round(*n + (system_info.dwPageSize - 1),
- (ulint) system_info.dwPageSize);
+ size = *n = ut_2pow_round<ulint>(*n + (system_info.dwPageSize - 1),
+ system_info.dwPageSize);
ptr = VirtualAlloc(NULL, size, MEM_COMMIT | MEM_RESERVE,
PAGE_READWRITE);
if (!ptr) {
- fprintf(stderr, "InnoDB: VirtualAlloc(%lu bytes) failed;"
- " Windows error %lu\n",
- (ulong) size, (ulong) GetLastError());
+ ib::info() << "VirtualAlloc(" << size << " bytes) failed;"
+ " Windows error " << GetLastError();
} else {
- os_fast_mutex_lock(&ut_list_mutex);
- ut_total_allocated_memory += size;
- os_fast_mutex_unlock(&ut_list_mutex);
+ my_atomic_addlint(
+ &os_total_large_mem_allocated, size);
UNIV_MEM_ALLOC(ptr, size);
}
-#elif !defined OS_MAP_ANON
- size = *n;
- ptr = ut_malloc_low(size, TRUE, FALSE);
#else
-# ifdef HAVE_GETPAGESIZE
size = getpagesize();
-# else
- size = UNIV_PAGE_SIZE;
-# endif
/* Align block size to system page size */
ut_ad(ut_is_2pow(size));
size = *n = ut_2pow_round(*n + (size - 1), size);
ptr = mmap(NULL, size, PROT_READ | PROT_WRITE,
MAP_PRIVATE | OS_MAP_ANON, -1, 0);
if (UNIV_UNLIKELY(ptr == (void*) -1)) {
- fprintf(stderr, "InnoDB: mmap(%lu bytes) failed;"
- " errno %lu\n",
- (ulong) size, (ulong) errno);
+ ib::error() << "mmap(" << size << " bytes) failed;"
+ " errno " << errno;
ptr = NULL;
} else {
- os_fast_mutex_lock(&ut_list_mutex);
- ut_total_allocated_memory += size;
- os_fast_mutex_unlock(&ut_list_mutex);
+ my_atomic_addlint(
+ &os_total_large_mem_allocated, size);
UNIV_MEM_ALLOC(ptr, size);
}
#endif
return(ptr);
}
-/****************************************************************//**
-Frees large pages memory. */
-UNIV_INTERN
+/** Frees large pages memory.
+@param[in] ptr pointer returned by os_mem_alloc_large()
+@param[in] size size returned by os_mem_alloc_large() */
void
os_mem_free_large(
-/*==============*/
- void *ptr, /*!< in: pointer returned by
- os_mem_alloc_large() */
- ulint size) /*!< in: size returned by
- os_mem_alloc_large() */
+ void *ptr,
+ ulint size)
{
- os_fast_mutex_lock(&ut_list_mutex);
- ut_a(ut_total_allocated_memory >= size);
- os_fast_mutex_unlock(&ut_list_mutex);
-
-#if defined HAVE_LARGE_PAGES && defined UNIV_LINUX
- if (os_use_large_pages && os_large_page_size && !shmdt(ptr)) {
- os_fast_mutex_lock(&ut_list_mutex);
- ut_a(ut_total_allocated_memory >= size);
- ut_total_allocated_memory -= size;
- os_fast_mutex_unlock(&ut_list_mutex);
+ ut_a(os_total_large_mem_allocated >= size);
+
+ // We could have manually poisoned that memory for ASAN.
+ // And we must unpoison it by ourself as specified in documentation
+ // for __asan_poison_memory_region() in sanitizer/asan_interface.h
+ // munmap() doesn't do it for us automatically.
+ UNIV_MEM_ALLOC(ptr, size);
+
+#ifdef HAVE_LINUX_LARGE_PAGES
+ if (my_use_large_pages && opt_large_page_size && !shmdt(ptr)) {
+ my_atomic_addlint(
+ &os_total_large_mem_allocated, -size);
return;
}
-#endif /* HAVE_LARGE_PAGES && UNIV_LINUX */
-#ifdef __WIN__
+#endif /* HAVE_LINUX_LARGE_PAGES */
+#ifdef _WIN32
/* When RELEASE memory, the size parameter must be 0.
Do not use MEM_RELEASE with MEM_DECOMMIT. */
if (!VirtualFree(ptr, 0, MEM_RELEASE)) {
- fprintf(stderr, "InnoDB: VirtualFree(%p, %lu) failed;"
- " Windows error %lu\n",
- ptr, (ulong) size, (ulong) GetLastError());
+ ib::error() << "VirtualFree(" << ptr << ", " << size
+ << ") failed; Windows error " << GetLastError();
} else {
- os_fast_mutex_lock(&ut_list_mutex);
- ut_a(ut_total_allocated_memory >= size);
- ut_total_allocated_memory -= size;
- os_fast_mutex_unlock(&ut_list_mutex);
+ my_atomic_addlint(
+ &os_total_large_mem_allocated, -lint(size));
}
#elif !defined OS_MAP_ANON
ut_free(ptr);
@@ -216,14 +188,11 @@ os_mem_free_large(
# else
if (munmap(ptr, size)) {
# endif /* UNIV_SOLARIS */
- fprintf(stderr, "InnoDB: munmap(%p, %lu) failed;"
- " errno %lu\n",
- ptr, (ulong) size, (ulong) errno);
+ ib::error() << "munmap(" << ptr << ", " << size << ") failed;"
+ " errno " << errno;
} else {
- os_fast_mutex_lock(&ut_list_mutex);
- ut_a(ut_total_allocated_memory >= size);
- ut_total_allocated_memory -= size;
- os_fast_mutex_unlock(&ut_list_mutex);
+ my_atomic_addlint(
+ &os_total_large_mem_allocated, -size);
}
#endif
}
diff --git a/storage/innobase/os/os0sync.cc b/storage/innobase/os/os0sync.cc
deleted file mode 100644
index fadc9e877ba..00000000000
--- a/storage/innobase/os/os0sync.cc
+++ /dev/null
@@ -1,915 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1995, 2015, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file os/os0sync.cc
-The interface to the operating system
-synchronization primitives.
-
-Created 9/6/1995 Heikki Tuuri
-*******************************************************/
-
-#include "os0sync.h"
-#ifdef UNIV_NONINL
-#include "os0sync.ic"
-#endif
-
-#ifdef __WIN__
-#include <windows.h>
-#endif
-
-#include "ut0mem.h"
-#include "srv0start.h"
-#include "srv0srv.h"
-
-/* Type definition for an operating system mutex struct */
-struct os_mutex_t{
- os_event_t event; /*!< Used by sync0arr.cc for queing threads */
- void* handle; /*!< OS handle to mutex */
- ulint count; /*!< we use this counter to check
- that the same thread does not
- recursively lock the mutex: we
- do not assume that the OS mutex
- supports recursive locking, though
- NT seems to do that */
- UT_LIST_NODE_T(os_mutex_t) os_mutex_list;
- /* list of all 'slow' OS mutexes created */
-};
-
-/** Mutex protecting counts and the lists of OS mutexes and events */
-UNIV_INTERN os_ib_mutex_t os_sync_mutex;
-/** TRUE if os_sync_mutex has been initialized */
-static ibool os_sync_mutex_inited = FALSE;
-/** TRUE when os_sync_free() is being executed */
-static ibool os_sync_free_called = FALSE;
-
-/** This is incremented by 1 in os_thread_create and decremented by 1 in
-os_thread_exit */
-UNIV_INTERN ulint os_thread_count = 0;
-
-/** The list of all events created */
-static UT_LIST_BASE_NODE_T(os_event) os_event_list;
-
-/** The list of all OS 'slow' mutexes */
-static UT_LIST_BASE_NODE_T(os_mutex_t) os_mutex_list;
-
-UNIV_INTERN ulint os_event_count = 0;
-UNIV_INTERN ulint os_mutex_count = 0;
-UNIV_INTERN ulint os_fast_mutex_count = 0;
-
-#ifdef UNIV_PFS_MUTEX
-UNIV_INTERN mysql_pfs_key_t event_os_mutex_key;
-UNIV_INTERN mysql_pfs_key_t os_mutex_key;
-#endif
-
-/* Because a mutex is embedded inside an event and there is an
-event embedded inside a mutex, on free, this generates a recursive call.
-This version of the free event function doesn't acquire the global lock */
-static void os_event_free_internal(os_event_t event);
-
-/* On Windows (Vista and later), load function pointers for condition
-variable handling. Those functions are not available in prior versions,
-so we have to use them via runtime loading, as long as we support XP. */
-static void os_cond_module_init(void);
-
-#ifdef __WIN__
-/* Prototypes and function pointers for condition variable functions */
-typedef VOID (WINAPI* InitializeConditionVariableProc)
- (PCONDITION_VARIABLE ConditionVariable);
-static InitializeConditionVariableProc initialize_condition_variable;
-
-typedef BOOL (WINAPI* SleepConditionVariableCSProc)
- (PCONDITION_VARIABLE ConditionVariable,
- PCRITICAL_SECTION CriticalSection,
- DWORD dwMilliseconds);
-static SleepConditionVariableCSProc sleep_condition_variable;
-
-typedef VOID (WINAPI* WakeAllConditionVariableProc)
- (PCONDITION_VARIABLE ConditionVariable);
-static WakeAllConditionVariableProc wake_all_condition_variable;
-
-typedef VOID (WINAPI* WakeConditionVariableProc)
- (PCONDITION_VARIABLE ConditionVariable);
-static WakeConditionVariableProc wake_condition_variable;
-#endif
-
-/*********************************************************//**
-Initialitze condition variable */
-UNIV_INLINE
-void
-os_cond_init(
-/*=========*/
- os_cond_t* cond) /*!< in: condition variable. */
-{
- ut_a(cond);
-
-#ifdef __WIN__
- ut_a(initialize_condition_variable != NULL);
- initialize_condition_variable(cond);
-#else
- ut_a(pthread_cond_init(cond, NULL) == 0);
-#endif
-}
-
-/*********************************************************//**
-Do a timed wait on condition variable.
-@return TRUE if timed out, FALSE otherwise */
-UNIV_INLINE
-ibool
-os_cond_wait_timed(
-/*===============*/
- os_cond_t* cond, /*!< in: condition variable. */
- os_fast_mutex_t* fast_mutex, /*!< in: fast mutex */
-#ifndef __WIN__
- const struct timespec* abstime /*!< in: timeout */
-#else
- DWORD time_in_ms /*!< in: timeout in
- milliseconds*/
-#endif /* !__WIN__ */
-)
-{
- fast_mutex_t* mutex = &fast_mutex->mutex;
-#ifdef __WIN__
- BOOL ret;
- DWORD err;
-
- ut_a(sleep_condition_variable != NULL);
-
- ret = sleep_condition_variable(cond, mutex, time_in_ms);
-
- if (!ret) {
- err = GetLastError();
- /* From http://msdn.microsoft.com/en-us/library/ms686301%28VS.85%29.aspx,
- "Condition variables are subject to spurious wakeups
- (those not associated with an explicit wake) and stolen wakeups
- (another thread manages to run before the woken thread)."
- Check for both types of timeouts.
- Conditions are checked by the caller.*/
- if ((err == WAIT_TIMEOUT) || (err == ERROR_TIMEOUT)) {
- return(TRUE);
- }
- }
-
- ut_a(ret);
-
- return(FALSE);
-#else
- int ret;
-
- ret = pthread_cond_timedwait(cond, mutex, abstime);
-
- switch (ret) {
- case 0:
- case ETIMEDOUT:
- /* We play it safe by checking for EINTR even though
- according to the POSIX documentation it can't return EINTR. */
- case EINTR:
- break;
-
- default:
- fprintf(stderr, " InnoDB: pthread_cond_timedwait() returned: "
- "%d: abstime={%lu,%lu}\n",
- ret, (ulong) abstime->tv_sec, (ulong) abstime->tv_nsec);
- ut_error;
- }
-
- return(ret == ETIMEDOUT);
-#endif
-}
-/*********************************************************//**
-Wait on condition variable */
-UNIV_INLINE
-void
-os_cond_wait(
-/*=========*/
- os_cond_t* cond, /*!< in: condition variable. */
- os_fast_mutex_t* fast_mutex)/*!< in: fast mutex */
-{
- fast_mutex_t* mutex = &fast_mutex->mutex;
- ut_a(cond);
- ut_a(mutex);
-
-#ifdef __WIN__
- ut_a(sleep_condition_variable != NULL);
- ut_a(sleep_condition_variable(cond, mutex, INFINITE));
-#else
- ut_a(pthread_cond_wait(cond, mutex) == 0);
-#endif
-}
-
-/*********************************************************//**
-Wakes all threads waiting for condition variable */
-UNIV_INLINE
-void
-os_cond_broadcast(
-/*==============*/
- os_cond_t* cond) /*!< in: condition variable. */
-{
- ut_a(cond);
-
-#ifdef __WIN__
- ut_a(wake_all_condition_variable != NULL);
- wake_all_condition_variable(cond);
-#else
- ut_a(pthread_cond_broadcast(cond) == 0);
-#endif
-}
-
-/*********************************************************//**
-Destroys condition variable */
-UNIV_INLINE
-void
-os_cond_destroy(
-/*============*/
- os_cond_t* cond) /*!< in: condition variable. */
-{
-#ifdef __WIN__
- /* Do nothing */
-#else
- ut_a(pthread_cond_destroy(cond) == 0);
-#endif
-}
-
-/*********************************************************//**
-On Windows (Vista and later), load function pointers for condition variable
-handling. Those functions are not available in prior versions, so we have to
-use them via runtime loading, as long as we support XP. */
-static
-void
-os_cond_module_init(void)
-/*=====================*/
-{
-#ifdef __WIN__
- HMODULE h_dll;
-
- if (!srv_use_native_conditions)
- return;
-
- h_dll = GetModuleHandle("kernel32");
-
- initialize_condition_variable = (InitializeConditionVariableProc)
- GetProcAddress(h_dll, "InitializeConditionVariable");
- sleep_condition_variable = (SleepConditionVariableCSProc)
- GetProcAddress(h_dll, "SleepConditionVariableCS");
- wake_all_condition_variable = (WakeAllConditionVariableProc)
- GetProcAddress(h_dll, "WakeAllConditionVariable");
- wake_condition_variable = (WakeConditionVariableProc)
- GetProcAddress(h_dll, "WakeConditionVariable");
-
- /* When using native condition variables, check function pointers */
- ut_a(initialize_condition_variable);
- ut_a(sleep_condition_variable);
- ut_a(wake_all_condition_variable);
- ut_a(wake_condition_variable);
-#endif
-}
-
-/*********************************************************//**
-Initializes global event and OS 'slow' mutex lists. */
-UNIV_INTERN
-void
-os_sync_init(void)
-/*==============*/
-{
- UT_LIST_INIT(os_event_list);
- UT_LIST_INIT(os_mutex_list);
-
- os_sync_mutex = NULL;
- os_sync_mutex_inited = FALSE;
-
- /* Now for Windows only */
- os_cond_module_init();
-
- os_sync_mutex = os_mutex_create();
-
- os_sync_mutex_inited = TRUE;
-}
-
-/*********************************************************//**
-Frees created events and OS 'slow' mutexes. */
-UNIV_INTERN
-void
-os_sync_free(void)
-/*==============*/
-{
- os_event_t event;
- os_ib_mutex_t mutex;
-
- os_sync_free_called = TRUE;
- event = UT_LIST_GET_FIRST(os_event_list);
-
- while (event) {
-
- os_event_free(event);
-
- event = UT_LIST_GET_FIRST(os_event_list);
- }
-
- mutex = UT_LIST_GET_FIRST(os_mutex_list);
-
- while (mutex) {
- if (mutex == os_sync_mutex) {
- /* Set the flag to FALSE so that we do not try to
- reserve os_sync_mutex any more in remaining freeing
- operations in shutdown */
- os_sync_mutex_inited = FALSE;
- }
-
- os_mutex_free(mutex);
-
- mutex = UT_LIST_GET_FIRST(os_mutex_list);
- }
- os_sync_free_called = FALSE;
-}
-
-/*********************************************************//**
-Creates an event semaphore, i.e., a semaphore which may just have two
-states: signaled and nonsignaled. The created event is manual reset: it
-must be reset explicitly by calling sync_os_reset_event.
-@return the event handle */
-UNIV_INTERN
-os_event_t
-os_event_create(void)
-/*==================*/
-{
- os_event_t event;
-
-#ifdef __WIN__
- if(!srv_use_native_conditions) {
-
- event = static_cast<os_event_t>(ut_malloc(sizeof(*event)));
-
- event->handle = CreateEvent(NULL, TRUE, FALSE, NULL);
- if (!event->handle) {
- fprintf(stderr,
- "InnoDB: Could not create a Windows event"
- " semaphore; Windows error %lu\n",
- (ulong) GetLastError());
- }
- } else /* Windows with condition variables */
-#endif
- {
- event = static_cast<os_event_t>(ut_malloc(sizeof *event));
-
-#ifndef PFS_SKIP_EVENT_MUTEX
- os_fast_mutex_init(event_os_mutex_key, &event->os_mutex);
-#else
- os_fast_mutex_init(PFS_NOT_INSTRUMENTED, &event->os_mutex);
-#endif
-
- os_cond_init(&(event->cond_var));
-
- event->is_set = FALSE;
-
- /* We return this value in os_event_reset(), which can then be
- be used to pass to the os_event_wait_low(). The value of zero
- is reserved in os_event_wait_low() for the case when the
- caller does not want to pass any signal_count value. To
- distinguish between the two cases we initialize signal_count
- to 1 here. */
- event->signal_count = 1;
- }
-
- /* The os_sync_mutex can be NULL because during startup an event
- can be created [ because it's embedded in the mutex/rwlock ] before
- this module has been initialized */
- if (os_sync_mutex != NULL) {
- os_mutex_enter(os_sync_mutex);
- }
-
- /* Put to the list of events */
- UT_LIST_ADD_FIRST(os_event_list, os_event_list, event);
-
- os_event_count++;
-
- if (os_sync_mutex != NULL) {
- os_mutex_exit(os_sync_mutex);
- }
-
- return(event);
-}
-
-/**********************************************************//**
-Sets an event semaphore to the signaled state: lets waiting threads
-proceed. */
-UNIV_INTERN
-void
-os_event_set(
-/*=========*/
- os_event_t event) /*!< in: event to set */
-{
- ut_a(event);
-
-#ifdef __WIN__
- if (!srv_use_native_conditions) {
- ut_a(SetEvent(event->handle));
- return;
- }
-#endif
-
- os_fast_mutex_lock(&(event->os_mutex));
-
- if (event->is_set) {
- /* Do nothing */
- } else {
- event->is_set = TRUE;
- event->signal_count += 1;
- os_cond_broadcast(&(event->cond_var));
- }
-
- os_fast_mutex_unlock(&(event->os_mutex));
-}
-
-/**********************************************************//**
-Resets an event semaphore to the nonsignaled state. Waiting threads will
-stop to wait for the event.
-The return value should be passed to os_even_wait_low() if it is desired
-that this thread should not wait in case of an intervening call to
-os_event_set() between this os_event_reset() and the
-os_event_wait_low() call. See comments for os_event_wait_low().
-@return current signal_count. */
-UNIV_INTERN
-ib_int64_t
-os_event_reset(
-/*===========*/
- os_event_t event) /*!< in: event to reset */
-{
- ib_int64_t ret = 0;
-
- ut_a(event);
-
-#ifdef __WIN__
- if(!srv_use_native_conditions) {
- ut_a(ResetEvent(event->handle));
- return(0);
- }
-#endif
-
- os_fast_mutex_lock(&(event->os_mutex));
-
- if (!event->is_set) {
- /* Do nothing */
- } else {
- event->is_set = FALSE;
- }
- ret = event->signal_count;
-
- os_fast_mutex_unlock(&(event->os_mutex));
- return(ret);
-}
-
-/**********************************************************//**
-Frees an event object, without acquiring the global lock. */
-static
-void
-os_event_free_internal(
-/*===================*/
- os_event_t event) /*!< in: event to free */
-{
-#ifdef __WIN__
- if(!srv_use_native_conditions) {
- ut_a(event);
- ut_a(CloseHandle(event->handle));
- } else
-#endif
- {
- ut_a(event);
-
- /* This is to avoid freeing the mutex twice */
- os_fast_mutex_free(&(event->os_mutex));
-
- os_cond_destroy(&(event->cond_var));
- }
-
- /* Remove from the list of events */
- UT_LIST_REMOVE(os_event_list, os_event_list, event);
-
- os_event_count--;
-
- ut_free(event);
-}
-
-/**********************************************************//**
-Frees an event object. */
-UNIV_INTERN
-void
-os_event_free(
-/*==========*/
- os_event_t event) /*!< in: event to free */
-
-{
- ut_a(event);
-#ifdef __WIN__
- if(!srv_use_native_conditions){
- ut_a(CloseHandle(event->handle));
- } else /*Windows with condition variables */
-#endif
- {
- os_fast_mutex_free(&(event->os_mutex));
-
- os_cond_destroy(&(event->cond_var));
- }
-
- /* Remove from the list of events */
- os_mutex_enter(os_sync_mutex);
-
- UT_LIST_REMOVE(os_event_list, os_event_list, event);
-
- os_event_count--;
-
- os_mutex_exit(os_sync_mutex);
-
- ut_free(event);
-}
-
-/**********************************************************//**
-Waits for an event object until it is in the signaled state.
-
-Typically, if the event has been signalled after the os_event_reset()
-we'll return immediately because event->is_set == TRUE.
-There are, however, situations (e.g.: sync_array code) where we may
-lose this information. For example:
-
-thread A calls os_event_reset()
-thread B calls os_event_set() [event->is_set == TRUE]
-thread C calls os_event_reset() [event->is_set == FALSE]
-thread A calls os_event_wait() [infinite wait!]
-thread C calls os_event_wait() [infinite wait!]
-
-Where such a scenario is possible, to avoid infinite wait, the
-value returned by os_event_reset() should be passed in as
-reset_sig_count. */
-UNIV_INTERN
-void
-os_event_wait_low(
-/*==============*/
- os_event_t event, /*!< in: event to wait */
- ib_int64_t reset_sig_count)/*!< in: zero or the value
- returned by previous call of
- os_event_reset(). */
-{
-#ifdef __WIN__
- if(!srv_use_native_conditions) {
- DWORD err;
-
- ut_a(event);
-
- UT_NOT_USED(reset_sig_count);
-
- /* Specify an infinite wait */
- err = WaitForSingleObject(event->handle, INFINITE);
-
- ut_a(err == WAIT_OBJECT_0);
- return;
- }
-#endif
-
- os_fast_mutex_lock(&event->os_mutex);
-
- if (!reset_sig_count) {
- reset_sig_count = event->signal_count;
- }
-
- while (!event->is_set && event->signal_count == reset_sig_count) {
- os_cond_wait(&(event->cond_var), &(event->os_mutex));
-
- /* Solaris manual said that spurious wakeups may occur: we
- have to check if the event really has been signaled after
- we came here to wait */
- }
-
- os_fast_mutex_unlock(&event->os_mutex);
-}
-
-/**********************************************************//**
-Waits for an event object until it is in the signaled state or
-a timeout is exceeded.
-@return 0 if success, OS_SYNC_TIME_EXCEEDED if timeout was exceeded */
-UNIV_INTERN
-ulint
-os_event_wait_time_low(
-/*===================*/
- os_event_t event, /*!< in: event to wait */
- ulint time_in_usec, /*!< in: timeout in
- microseconds, or
- OS_SYNC_INFINITE_TIME */
- ib_int64_t reset_sig_count) /*!< in: zero or the value
- returned by previous call of
- os_event_reset(). */
-{
- ibool timed_out = FALSE;
-
-#ifdef __WIN__
- DWORD time_in_ms;
-
- if (!srv_use_native_conditions) {
- DWORD err;
-
- ut_a(event);
-
- if (time_in_usec != OS_SYNC_INFINITE_TIME) {
- time_in_ms = static_cast<DWORD>(time_in_usec / 1000);
- err = WaitForSingleObject(event->handle, time_in_ms);
- } else {
- err = WaitForSingleObject(event->handle, INFINITE);
- }
-
- if (err == WAIT_OBJECT_0) {
- return(0);
- } else if ((err == WAIT_TIMEOUT) || (err == ERROR_TIMEOUT)) {
- return(OS_SYNC_TIME_EXCEEDED);
- }
-
- ut_error;
- /* Dummy value to eliminate compiler warning. */
- return(42);
- } else {
- ut_a(sleep_condition_variable != NULL);
-
- if (time_in_usec != OS_SYNC_INFINITE_TIME) {
- time_in_ms = static_cast<DWORD>(time_in_usec / 1000);
- } else {
- time_in_ms = INFINITE;
- }
- }
-#else
- struct timespec abstime;
-
- if (time_in_usec != OS_SYNC_INFINITE_TIME) {
- ulonglong usec = ulonglong(time_in_usec) + my_hrtime().val;
- abstime.tv_sec = usec / 1000000;
- abstime.tv_nsec = (usec % 1000000) * 1000;
- } else {
- abstime.tv_nsec = 999999999;
- abstime.tv_sec = (time_t) ULINT_MAX;
- }
-
- ut_a(abstime.tv_nsec <= 999999999);
-
-#endif /* __WIN__ */
-
- os_fast_mutex_lock(&event->os_mutex);
-
- if (!reset_sig_count) {
- reset_sig_count = event->signal_count;
- }
-
- do {
- if (event->is_set || event->signal_count != reset_sig_count) {
-
- break;
- }
-
- timed_out = os_cond_wait_timed(
- &event->cond_var, &event->os_mutex,
-#ifndef __WIN__
- &abstime
-#else
- time_in_ms
-#endif /* !__WIN__ */
- );
-
- } while (!timed_out);
-
- os_fast_mutex_unlock(&event->os_mutex);
-
- return(timed_out ? OS_SYNC_TIME_EXCEEDED : 0);
-}
-
-/*********************************************************//**
-Creates an operating system mutex semaphore. Because these are slow, the
-mutex semaphore of InnoDB itself (ib_mutex_t) should be used where possible.
-@return the mutex handle */
-UNIV_INTERN
-os_ib_mutex_t
-os_mutex_create(void)
-/*=================*/
-{
- os_fast_mutex_t* mutex;
- os_ib_mutex_t mutex_str;
-
- mutex = static_cast<os_fast_mutex_t*>(
- ut_malloc(sizeof(os_fast_mutex_t)));
-
- os_fast_mutex_init(os_mutex_key, mutex);
-
- mutex_str = static_cast<os_ib_mutex_t>(ut_malloc(sizeof *mutex_str));
-
- mutex_str->handle = mutex;
- mutex_str->count = 0;
- mutex_str->event = os_event_create();
-
- if (UNIV_LIKELY(os_sync_mutex_inited)) {
- /* When creating os_sync_mutex itself we cannot reserve it */
- os_mutex_enter(os_sync_mutex);
- }
-
- UT_LIST_ADD_FIRST(os_mutex_list, os_mutex_list, mutex_str);
-
- os_mutex_count++;
-
- if (UNIV_LIKELY(os_sync_mutex_inited)) {
- os_mutex_exit(os_sync_mutex);
- }
-
- return(mutex_str);
-}
-
-/**********************************************************//**
-Acquires ownership of a mutex semaphore. */
-UNIV_INTERN
-void
-os_mutex_enter(
-/*===========*/
- os_ib_mutex_t mutex) /*!< in: mutex to acquire */
-{
- os_fast_mutex_lock(static_cast<os_fast_mutex_t*>(mutex->handle));
-
- (mutex->count)++;
-
- ut_a(mutex->count == 1);
-}
-
-/**********************************************************//**
-Releases ownership of a mutex. */
-UNIV_INTERN
-void
-os_mutex_exit(
-/*==========*/
- os_ib_mutex_t mutex) /*!< in: mutex to release */
-{
- ut_a(mutex);
-
- ut_a(mutex->count == 1);
-
- (mutex->count)--;
- os_fast_mutex_unlock(static_cast<os_fast_mutex_t*>(mutex->handle));
-}
-
-/**********************************************************//**
-Frees a mutex object. */
-UNIV_INTERN
-void
-os_mutex_free(
-/*==========*/
- os_ib_mutex_t mutex) /*!< in: mutex to free */
-{
- ut_a(mutex);
-
- if (UNIV_LIKELY(!os_sync_free_called)) {
- os_event_free_internal(mutex->event);
- }
-
- if (UNIV_LIKELY(os_sync_mutex_inited)) {
- os_mutex_enter(os_sync_mutex);
- }
-
- UT_LIST_REMOVE(os_mutex_list, os_mutex_list, mutex);
-
- os_mutex_count--;
-
- if (UNIV_LIKELY(os_sync_mutex_inited)) {
- os_mutex_exit(os_sync_mutex);
- }
-
- os_fast_mutex_free(static_cast<os_fast_mutex_t*>(mutex->handle));
- ut_free(mutex->handle);
- ut_free(mutex);
-}
-
-/*********************************************************//**
-Initializes an operating system fast mutex semaphore. */
-UNIV_INTERN
-void
-os_fast_mutex_init_func(
-/*====================*/
- fast_mutex_t* fast_mutex) /*!< in: fast mutex */
-{
-#ifdef __WIN__
- ut_a(fast_mutex);
-
- InitializeCriticalSection((LPCRITICAL_SECTION) fast_mutex);
-#else
- ut_a(0 == pthread_mutex_init(fast_mutex, MY_MUTEX_INIT_FAST));
-#endif
- if (UNIV_LIKELY(os_sync_mutex_inited)) {
- /* When creating os_sync_mutex itself (in Unix) we cannot
- reserve it */
-
- os_mutex_enter(os_sync_mutex);
- }
-
- os_fast_mutex_count++;
-
- if (UNIV_LIKELY(os_sync_mutex_inited)) {
- os_mutex_exit(os_sync_mutex);
- }
-}
-
-/**********************************************************//**
-Acquires ownership of a fast mutex. */
-UNIV_INTERN
-void
-os_fast_mutex_lock_func(
-/*====================*/
- fast_mutex_t* fast_mutex) /*!< in: mutex to acquire */
-{
-#ifdef __WIN__
- EnterCriticalSection((LPCRITICAL_SECTION) fast_mutex);
-#else
- pthread_mutex_lock(fast_mutex);
-#endif
-}
-
-/**********************************************************//**
-Releases ownership of a fast mutex. */
-UNIV_INTERN
-void
-os_fast_mutex_unlock_func(
-/*======================*/
- fast_mutex_t* fast_mutex) /*!< in: mutex to release */
-{
-#ifdef __WIN__
- LeaveCriticalSection(fast_mutex);
-#else
- pthread_mutex_unlock(fast_mutex);
-#endif
-}
-
-/**********************************************************//**
-Releases ownership of a fast mutex. Implies a full memory barrier even on
-platforms such as PowerPC where this is not normally required. */
-UNIV_INTERN
-void
-os_fast_mutex_unlock_full_barrier(
-/*=================*/
- os_fast_mutex_t* fast_mutex) /*!< in: mutex to release */
-{
-#ifdef __WIN__
- LeaveCriticalSection(&fast_mutex->mutex);
-#else
- pthread_mutex_unlock(&fast_mutex->mutex);
-#ifdef __powerpc__
- os_mb;
-#endif
-#endif
-}
-
-/**********************************************************//**
-Frees a mutex object. */
-UNIV_INTERN
-void
-os_fast_mutex_free_func(
-/*====================*/
- fast_mutex_t* fast_mutex) /*!< in: mutex to free */
-{
-#ifdef __WIN__
- ut_a(fast_mutex);
-
- DeleteCriticalSection((LPCRITICAL_SECTION) fast_mutex);
-#else
- int ret;
-
- ret = pthread_mutex_destroy(fast_mutex);
-
- if (UNIV_UNLIKELY(ret != 0)) {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: error: return value %lu when calling\n"
- "InnoDB: pthread_mutex_destroy().\n", (ulint) ret);
- fprintf(stderr,
- "InnoDB: Byte contents of the pthread mutex at %p:\n",
- (void*) fast_mutex);
- ut_print_buf(stderr, fast_mutex, sizeof(os_fast_mutex_t));
- putc('\n', stderr);
- }
-#endif
- if (UNIV_LIKELY(os_sync_mutex_inited)) {
- /* When freeing the last mutexes, we have
- already freed os_sync_mutex */
-
- os_mutex_enter(os_sync_mutex);
- }
-
- ut_ad(os_fast_mutex_count > 0);
- os_fast_mutex_count--;
-
- if (UNIV_LIKELY(os_sync_mutex_inited)) {
- os_mutex_exit(os_sync_mutex);
- }
-}
diff --git a/storage/innobase/os/os0thread.cc b/storage/innobase/os/os0thread.cc
index 1182166f522..2aac53f572d 100644
--- a/storage/innobase/os/os0thread.cc
+++ b/storage/innobase/os/os0thread.cc
@@ -1,6 +1,7 @@
/*****************************************************************************
Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2017, 2018, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -23,30 +24,22 @@ The interface to the operating system thread control primitives
Created 9/8/1995 Heikki Tuuri
*******************************************************/
-#include "os0thread.h"
-#ifdef UNIV_NONINL
-#include "os0thread.ic"
-#endif
-
-#ifdef __WIN__
-#include <windows.h>
-#endif
-
-#ifndef UNIV_HOTBACKUP
+#include "univ.i"
#include "srv0srv.h"
-#include "os0sync.h"
+
+/** Number of threads active. */
+ulint os_thread_count;
/***************************************************************//**
Compares two thread ids for equality.
-@return TRUE if equal */
-UNIV_INTERN
+@return TRUE if equal */
ibool
os_thread_eq(
/*=========*/
os_thread_id_t a, /*!< in: OS thread or thread id */
os_thread_id_t b) /*!< in: OS thread or thread id */
{
-#ifdef __WIN__
+#ifdef _WIN32
if (a == b) {
return(TRUE);
}
@@ -64,34 +57,25 @@ os_thread_eq(
/****************************************************************//**
Converts an OS thread id to a ulint. It is NOT guaranteed that the ulint is
unique for the thread though!
-@return thread identifier as a number */
-UNIV_INTERN
+@return thread identifier as a number */
ulint
os_thread_pf(
/*=========*/
os_thread_id_t a) /*!< in: OS thread identifier */
{
-#ifdef UNIV_HPUX10
- /* In HP-UX-10.20 a pthread_t is a struct of 3 fields: field1, field2,
- field3. We do not know if field1 determines the thread uniquely. */
-
- return((ulint)(a.field1));
-#else
return((ulint) a);
-#endif
}
/*****************************************************************//**
Returns the thread identifier of current thread. Currently the thread
identifier in Unix is the thread handle itself. Note that in HP-UX
pthread_t is a struct of 3 fields.
-@return current thread identifier */
-UNIV_INTERN
+@return current thread identifier */
os_thread_id_t
os_thread_get_curr_id(void)
/*=======================*/
{
-#ifdef __WIN__
+#ifdef _WIN32
return(GetCurrentThreadId());
#else
return(pthread_self());
@@ -100,10 +84,11 @@ os_thread_get_curr_id(void)
/****************************************************************//**
Creates a new thread of execution. The execution starts from
-the function given. The start function takes a void* parameter
-and returns an ulint.
-@return handle to the thread */
-UNIV_INTERN
+the function given.
+NOTE: We count the number of threads in os_thread_exit(). A created
+thread should always use that to exit so thatthe thread count will be
+decremented.
+We do not return an error code because if there is one, we crash here. */
os_thread_t
os_thread_create_func(
/*==================*/
@@ -114,98 +99,67 @@ os_thread_create_func(
os_thread_id_t* thread_id) /*!< out: id of the created
thread, or NULL */
{
- /* the new thread should look recent changes up here so far. */
- os_wmb;
-
-#ifdef __WIN__
- os_thread_t thread;
- DWORD win_thread_id;
+ os_thread_id_t new_thread_id;
- os_mutex_enter(os_sync_mutex);
- os_thread_count++;
- os_mutex_exit(os_sync_mutex);
+#ifdef _WIN32
+ HANDLE handle;
- thread = CreateThread(NULL, /* no security attributes */
+ handle = CreateThread(NULL, /* no security attributes */
0, /* default size stack */
func,
arg,
0, /* thread runs immediately */
- &win_thread_id);
+ &new_thread_id);
- if (thread_id) {
- *thread_id = win_thread_id;
+ if (!handle) {
+ /* If we cannot start a new thread, life has no meaning. */
+ ib::fatal() << "CreateThread returned " << GetLastError();
}
- return((os_thread_t)thread);
-#else
- int ret;
- os_thread_t pthread;
+ CloseHandle(handle);
+
+ my_atomic_addlint(&os_thread_count, 1);
+
+ return((os_thread_t)new_thread_id);
+#else /* _WIN32 else */
+
pthread_attr_t attr;
-#ifndef UNIV_HPUX10
- ret = pthread_attr_init(&attr);
+ int ret = pthread_attr_init(&attr);
if (UNIV_UNLIKELY(ret)) {
fprintf(stderr,
"InnoDB: Error: pthread_attr_init() returned %d\n",
ret);
abort();
}
-#endif
-#ifdef UNIV_AIX
- /* We must make sure a thread stack is at least 32 kB, otherwise
- InnoDB might crash; we do not know if the default stack size on
- AIX is always big enough. An empirical test on AIX-4.3 suggested
- the size was 96 kB, though. */
+ my_atomic_addlint(&os_thread_count, 1);
- ret = pthread_attr_setstacksize(&attr,
- (size_t)(PTHREAD_STACK_MIN
- + 32 * 1024));
- if (ret) {
- fprintf(stderr,
- "InnoDB: Error: pthread_attr_setstacksize"
- " returned %d\n", ret);
- abort();
- }
-#endif
- os_mutex_enter(os_sync_mutex);
- os_thread_count++;
- os_mutex_exit(os_sync_mutex);
+ ret = pthread_create(&new_thread_id, &attr, func, arg);
-#ifdef UNIV_HPUX10
- ret = pthread_create(&pthread, pthread_attr_default, func, arg);
-#else
- ret = pthread_create(&pthread, &attr, func, arg);
-#endif
- if (UNIV_UNLIKELY(ret)) {
- fprintf(stderr,
- "InnoDB: Error: pthread_create() returned %d\n", ret);
- abort();
- }
+ ut_a(ret == 0);
-#ifndef UNIV_HPUX10
pthread_attr_destroy(&attr);
-#endif
+
+#endif /* not _WIN32 */
ut_a(os_thread_count <= OS_THREAD_MAX_N);
- if (thread_id) {
- *thread_id = pthread;
+ /* Return the thread_id if the caller requests it. */
+ if (thread_id != NULL) {
+ *thread_id = new_thread_id;
}
-
- return(pthread);
-#endif
+ return((os_thread_t)new_thread_id);
}
/** Waits until the specified thread completes and joins it.
Its return value is ignored.
@param[in,out] thread thread to join */
-UNIV_INTERN
void
os_thread_join(
- os_thread_t thread)
+ os_thread_id_t thread)
{
-#ifdef __WIN__
+#ifdef _WIN32
/* Do nothing. */
#else
#ifdef UNIV_DEBUG
@@ -215,81 +169,72 @@ os_thread_join(
/* Waiting on already-quit threads is allowed. */
ut_ad(ret == 0 || ret == ESRCH);
-#endif /* __WIN__ */
+#endif /* _WIN32 */
}
-/*****************************************************************//**
-Exits the current thread. */
-UNIV_INTERN
+/** Exits the current thread.
+@param[in] detach if true, the thread will be detached right before
+exiting. If false, another thread is responsible for joining this thread */
+ATTRIBUTE_NORETURN
void
-os_thread_exit(
-/*===========*/
- void* exit_value, /*!< in: exit value; in Windows this void*
- is cast as a DWORD */
- bool detach) /*!< in: if true, the thread will be detached
- right before exiting. If false, another thread
- is responsible for joining this thread. */
+os_thread_exit(bool detach)
{
#ifdef UNIV_DEBUG_THREAD_CREATION
- fprintf(stderr, "Thread exits, id %lu\n",
- os_thread_pf(os_thread_get_curr_id()));
+ ib::info() << "Thread exits, id "
+ << os_thread_pf(os_thread_get_curr_id());
#endif
#ifdef UNIV_PFS_THREAD
pfs_delete_thread();
#endif
- os_mutex_enter(os_sync_mutex);
- os_thread_count--;
- os_mutex_exit(os_sync_mutex);
+ my_atomic_addlint(&os_thread_count, -1);
-#ifdef __WIN__
- ExitThread((DWORD) exit_value);
+#ifdef _WIN32
+ ExitThread(0);
#else
if (detach) {
pthread_detach(pthread_self());
}
- pthread_exit(exit_value);
+ pthread_exit(NULL);
#endif
}
/*****************************************************************//**
Advises the os to give up remainder of the thread's time slice. */
-UNIV_INTERN
void
os_thread_yield(void)
/*=================*/
{
-#if defined(__WIN__)
+#if defined(_WIN32)
SwitchToThread();
-#elif (defined(HAVE_SCHED_YIELD) && defined(HAVE_SCHED_H))
- sched_yield();
-#elif defined(HAVE_PTHREAD_YIELD_ZERO_ARG)
- pthread_yield();
-#elif defined(HAVE_PTHREAD_YIELD_ONE_ARG)
- pthread_yield(0);
#else
- os_thread_sleep(0);
+ sched_yield();
#endif
}
-#endif /* !UNIV_HOTBACKUP */
/*****************************************************************//**
The thread sleeps at least the time given in microseconds. */
-UNIV_INTERN
void
os_thread_sleep(
/*============*/
ulint tm) /*!< in: time in microseconds */
{
-#ifdef __WIN__
+#ifdef _WIN32
Sleep((DWORD) tm / 1000);
+#elif defined(HAVE_NANOSLEEP)
+ struct timespec t;
+
+ t.tv_sec = tm / 1000000;
+ t.tv_nsec = (tm % 1000000) * 1000;
+
+ ::nanosleep(&t, NULL);
#else
- struct timeval t;
+ struct timeval t;
t.tv_sec = tm / 1000000;
t.tv_usec = tm % 1000000;
select(0, NULL, NULL, NULL, &t);
-#endif
+#endif /* _WIN32 */
}
diff --git a/storage/innobase/page/page0cur.cc b/storage/innobase/page/page0cur.cc
index e9ac4b4bb04..74a10cd67d5 100644
--- a/storage/innobase/page/page0cur.cc
+++ b/storage/innobase/page/page0cur.cc
@@ -1,7 +1,8 @@
/*****************************************************************************
-Copyright (c) 1994, 2013, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1994, 2016, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2012, Facebook Inc.
+Copyright (c) 2018, 2019, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -25,153 +26,176 @@ Created 10/4/1994 Heikki Tuuri
*************************************************************************/
#include "page0cur.h"
-#ifdef UNIV_NONINL
-#include "page0cur.ic"
-#endif
-
#include "page0zip.h"
#include "btr0btr.h"
#include "mtr0log.h"
#include "log0recv.h"
-#include "ut0ut.h"
-#ifndef UNIV_HOTBACKUP
#include "rem0cmp.h"
+#include "gis0rtree.h"
+
+#include <algorithm>
-#ifdef PAGE_CUR_ADAPT
+#ifdef BTR_CUR_HASH_ADAPT
# ifdef UNIV_SEARCH_PERF_STAT
-static ulint page_cur_short_succ = 0;
+static ulint page_cur_short_succ;
# endif /* UNIV_SEARCH_PERF_STAT */
-/*******************************************************************//**
-This is a linear congruential generator PRNG. Returns a pseudo random
-number between 0 and 2^64-1 inclusive. The formula and the constants
-being used are:
-X[n+1] = (a * X[n] + c) mod m
-where:
-X[0] = my_interval_timer()
-a = 1103515245 (3^5 * 5 * 7 * 129749)
-c = 12345 (3 * 5 * 823)
-m = 18446744073709551616 (2^64)
-
-@return number between 0 and 2^64-1 */
-static
-ib_uint64_t
-page_cur_lcg_prng(void)
-/*===================*/
-{
-#define LCG_a 1103515245
-#define LCG_c 12345
- static uint64_t lcg_current;
-
- if (!lcg_current) {
- lcg_current = my_interval_timer();
- }
-
- /* no need to "% 2^64" explicitly because lcg_current is
- 64 bit and this will be done anyway */
- lcg_current = LCG_a * lcg_current + LCG_c;
-
- return(lcg_current);
-}
-
-/****************************************************************//**
-Tries a search shortcut based on the last insert.
-@return TRUE on success */
+/** Try a search shortcut based on the last insert.
+@param[in] block index page
+@param[in] index index tree
+@param[in] tuple search key
+@param[in,out] iup_matched_fields already matched fields in the
+upper limit record
+@param[in,out] ilow_matched_fields already matched fields in the
+lower limit record
+@param[out] cursor page cursor
+@return true on success */
UNIV_INLINE
-ibool
+bool
page_cur_try_search_shortcut(
-/*=========================*/
- const buf_block_t* block, /*!< in: index page */
- const dict_index_t* index, /*!< in: record descriptor */
- const dtuple_t* tuple, /*!< in: data tuple */
+ const buf_block_t* block,
+ const dict_index_t* index,
+ const dtuple_t* tuple,
ulint* iup_matched_fields,
- /*!< in/out: already matched
- fields in upper limit record */
- ulint* iup_matched_bytes,
- /*!< in/out: already matched
- bytes in a field not yet
- completely matched */
ulint* ilow_matched_fields,
- /*!< in/out: already matched
- fields in lower limit record */
- ulint* ilow_matched_bytes,
- /*!< in/out: already matched
- bytes in a field not yet
- completely matched */
- page_cur_t* cursor) /*!< out: page cursor */
+ page_cur_t* cursor)
{
const rec_t* rec;
const rec_t* next_rec;
ulint low_match;
- ulint low_bytes;
ulint up_match;
- ulint up_bytes;
-#ifdef UNIV_SEARCH_DEBUG
- page_cur_t cursor2;
-#endif
ibool success = FALSE;
const page_t* page = buf_block_get_frame(block);
mem_heap_t* heap = NULL;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- ulint* offsets = offsets_;
+ offset_t offsets_[REC_OFFS_NORMAL_SIZE];
+ offset_t* offsets = offsets_;
rec_offs_init(offsets_);
ut_ad(dtuple_check_typed(tuple));
+ ut_ad(page_is_leaf(page));
rec = page_header_get_ptr(page, PAGE_LAST_INSERT);
- offsets = rec_get_offsets(rec, index, offsets,
+ offsets = rec_get_offsets(rec, index, offsets, true,
dtuple_get_n_fields(tuple), &heap);
ut_ad(rec);
ut_ad(page_rec_is_user_rec(rec));
- ut_pair_min(&low_match, &low_bytes,
- *ilow_matched_fields, *ilow_matched_bytes,
- *iup_matched_fields, *iup_matched_bytes);
-
- up_match = low_match;
- up_bytes = low_bytes;
+ low_match = up_match = std::min(*ilow_matched_fields,
+ *iup_matched_fields);
- if (page_cmp_dtuple_rec_with_match(tuple, rec, offsets,
- &low_match, &low_bytes) < 0) {
+ if (cmp_dtuple_rec_with_match(tuple, rec, offsets, &low_match) < 0) {
goto exit_func;
}
next_rec = page_rec_get_next_const(rec);
- offsets = rec_get_offsets(next_rec, index, offsets,
- dtuple_get_n_fields(tuple), &heap);
+ if (!page_rec_is_supremum(next_rec)) {
+ offsets = rec_get_offsets(next_rec, index, offsets, true,
+ dtuple_get_n_fields(tuple), &heap);
- if (page_cmp_dtuple_rec_with_match(tuple, next_rec, offsets,
- &up_match, &up_bytes) >= 0) {
- goto exit_func;
+ if (cmp_dtuple_rec_with_match(tuple, next_rec, offsets,
+ &up_match) >= 0) {
+ goto exit_func;
+ }
+
+ *iup_matched_fields = up_match;
}
page_cur_position(rec, block, cursor);
-#ifdef UNIV_SEARCH_DEBUG
- page_cur_search_with_match(block, index, tuple, PAGE_CUR_DBG,
- iup_matched_fields,
- iup_matched_bytes,
- ilow_matched_fields,
- ilow_matched_bytes,
- &cursor2);
- ut_a(cursor2.rec == cursor->rec);
+ *ilow_matched_fields = low_match;
- if (!page_rec_is_supremum(next_rec)) {
+#ifdef UNIV_SEARCH_PERF_STAT
+ page_cur_short_succ++;
+#endif
+ success = TRUE;
+exit_func:
+ if (UNIV_LIKELY_NULL(heap)) {
+ mem_heap_free(heap);
+ }
+ return(success);
+}
+
+/** Try a search shortcut based on the last insert.
+@param[in] block index page
+@param[in] index index tree
+@param[in] tuple search key
+@param[in,out] iup_matched_fields already matched fields in the
+upper limit record
+@param[in,out] iup_matched_bytes already matched bytes in the
+first partially matched field in the upper limit record
+@param[in,out] ilow_matched_fields already matched fields in the
+lower limit record
+@param[in,out] ilow_matched_bytes already matched bytes in the
+first partially matched field in the lower limit record
+@param[out] cursor page cursor
+@return true on success */
+UNIV_INLINE
+bool
+page_cur_try_search_shortcut_bytes(
+ const buf_block_t* block,
+ const dict_index_t* index,
+ const dtuple_t* tuple,
+ ulint* iup_matched_fields,
+ ulint* iup_matched_bytes,
+ ulint* ilow_matched_fields,
+ ulint* ilow_matched_bytes,
+ page_cur_t* cursor)
+{
+ const rec_t* rec;
+ const rec_t* next_rec;
+ ulint low_match;
+ ulint low_bytes;
+ ulint up_match;
+ ulint up_bytes;
+ ibool success = FALSE;
+ const page_t* page = buf_block_get_frame(block);
+ mem_heap_t* heap = NULL;
+ offset_t offsets_[REC_OFFS_NORMAL_SIZE];
+ offset_t* offsets = offsets_;
+ rec_offs_init(offsets_);
+
+ ut_ad(dtuple_check_typed(tuple));
+ ut_ad(page_is_leaf(page));
+
+ rec = page_header_get_ptr(page, PAGE_LAST_INSERT);
+ offsets = rec_get_offsets(rec, index, offsets, true,
+ dtuple_get_n_fields(tuple), &heap);
- ut_a(*iup_matched_fields == up_match);
- ut_a(*iup_matched_bytes == up_bytes);
+ ut_ad(rec);
+ ut_ad(page_rec_is_user_rec(rec));
+ if (ut_pair_cmp(*ilow_matched_fields, *ilow_matched_bytes,
+ *iup_matched_fields, *iup_matched_bytes) < 0) {
+ up_match = low_match = *ilow_matched_fields;
+ up_bytes = low_bytes = *ilow_matched_bytes;
+ } else {
+ up_match = low_match = *iup_matched_fields;
+ up_bytes = low_bytes = *iup_matched_bytes;
}
- ut_a(*ilow_matched_fields == low_match);
- ut_a(*ilow_matched_bytes == low_bytes);
-#endif
+ if (cmp_dtuple_rec_with_match_bytes(
+ tuple, rec, index, offsets, &low_match, &low_bytes) < 0) {
+ goto exit_func;
+ }
+
+ next_rec = page_rec_get_next_const(rec);
if (!page_rec_is_supremum(next_rec)) {
+ offsets = rec_get_offsets(next_rec, index, offsets, true,
+ dtuple_get_n_fields(tuple), &heap);
+
+ if (cmp_dtuple_rec_with_match_bytes(
+ tuple, next_rec, index, offsets,
+ &up_match, &up_bytes)
+ >= 0) {
+ goto exit_func;
+ }
*iup_matched_fields = up_match;
*iup_matched_bytes = up_bytes;
}
+ page_cur_position(rec, block, cursor);
+
*ilow_matched_fields = low_match;
*ilow_matched_bytes = low_bytes;
@@ -185,22 +209,21 @@ exit_func:
}
return(success);
}
-
-#endif
+#endif /* BTR_CUR_HASH_ADAPT */
#ifdef PAGE_CUR_LE_OR_EXTENDS
/****************************************************************//**
Checks if the nth field in a record is a character type field which extends
the nth field in tuple, i.e., the field is longer or equal in length and has
common first characters.
-@return TRUE if rec field extends tuple field */
+@return TRUE if rec field extends tuple field */
static
ibool
page_cur_rec_field_extends(
/*=======================*/
const dtuple_t* tuple, /*!< in: data tuple */
const rec_t* rec, /*!< in: record */
- const ulint* offsets,/*!< in: array returned by rec_get_offsets() */
+ const offset_t* offsets,/*!< in: array returned by rec_get_offsets() */
ulint n) /*!< in: compare nth field */
{
const dtype_t* type;
@@ -220,16 +243,17 @@ page_cur_rec_field_extends(
|| type->mtype == DATA_FIXBINARY
|| type->mtype == DATA_BINARY
|| type->mtype == DATA_BLOB
+ || DATA_GEOMETRY_MTYPE(type->mtype)
|| type->mtype == DATA_VARMYSQL
|| type->mtype == DATA_MYSQL) {
if (dfield_get_len(dfield) != UNIV_SQL_NULL
&& rec_f_len != UNIV_SQL_NULL
&& rec_f_len >= dfield_get_len(dfield)
- && !cmp_data_data_slow(type->mtype, type->prtype,
- dfield_get_data(dfield),
- dfield_get_len(dfield),
- rec_f, dfield_get_len(dfield))) {
+ && !cmp_data_data(type->mtype, type->prtype,
+ dfield_get_data(dfield),
+ dfield_get_len(dfield),
+ rec_f, dfield_get_len(dfield))) {
return(TRUE);
}
@@ -241,31 +265,283 @@ page_cur_rec_field_extends(
/****************************************************************//**
Searches the right position for a page cursor. */
-UNIV_INTERN
void
page_cur_search_with_match(
/*=======================*/
const buf_block_t* block, /*!< in: buffer block */
- const dict_index_t* index, /*!< in: record descriptor */
+ const dict_index_t* index, /*!< in/out: record descriptor */
const dtuple_t* tuple, /*!< in: data tuple */
- ulint mode, /*!< in: PAGE_CUR_L,
+ page_cur_mode_t mode, /*!< in: PAGE_CUR_L,
PAGE_CUR_LE, PAGE_CUR_G, or
PAGE_CUR_GE */
ulint* iup_matched_fields,
/*!< in/out: already matched
fields in upper limit record */
- ulint* iup_matched_bytes,
- /*!< in/out: already matched
- bytes in a field not yet
- completely matched */
ulint* ilow_matched_fields,
/*!< in/out: already matched
fields in lower limit record */
+ page_cur_t* cursor, /*!< out: page cursor */
+ rtr_info_t* rtr_info)/*!< in/out: rtree search stack */
+{
+ ulint up;
+ ulint low;
+ ulint mid;
+ const page_t* page;
+ const page_dir_slot_t* slot;
+ const rec_t* up_rec;
+ const rec_t* low_rec;
+ const rec_t* mid_rec;
+ ulint up_matched_fields;
+ ulint low_matched_fields;
+ ulint cur_matched_fields;
+ int cmp;
+#ifdef UNIV_ZIP_DEBUG
+ const page_zip_des_t* page_zip = buf_block_get_page_zip(block);
+#endif /* UNIV_ZIP_DEBUG */
+ mem_heap_t* heap = NULL;
+ offset_t offsets_[REC_OFFS_NORMAL_SIZE];
+ offset_t* offsets = offsets_;
+ rec_offs_init(offsets_);
+
+ ut_ad(dtuple_validate(tuple));
+#ifdef UNIV_DEBUG
+# ifdef PAGE_CUR_DBG
+ if (mode != PAGE_CUR_DBG)
+# endif /* PAGE_CUR_DBG */
+# ifdef PAGE_CUR_LE_OR_EXTENDS
+ if (mode != PAGE_CUR_LE_OR_EXTENDS)
+# endif /* PAGE_CUR_LE_OR_EXTENDS */
+ ut_ad(mode == PAGE_CUR_L || mode == PAGE_CUR_LE
+ || mode == PAGE_CUR_G || mode == PAGE_CUR_GE
+ || dict_index_is_spatial(index));
+#endif /* UNIV_DEBUG */
+ page = buf_block_get_frame(block);
+#ifdef UNIV_ZIP_DEBUG
+ ut_a(!page_zip || page_zip_validate(page_zip, page, index));
+#endif /* UNIV_ZIP_DEBUG */
+
+ ut_d(page_check_dir(page));
+ const bool is_leaf = page_is_leaf(page);
+
+#ifdef BTR_CUR_HASH_ADAPT
+ if (is_leaf
+ && (mode == PAGE_CUR_LE)
+ && !dict_index_is_spatial(index)
+ && (page_header_get_field(page, PAGE_N_DIRECTION) > 3)
+ && (page_header_get_ptr(page, PAGE_LAST_INSERT))
+ && (page_header_get_field(page, PAGE_DIRECTION) == PAGE_RIGHT)) {
+
+ if (page_cur_try_search_shortcut(
+ block, index, tuple,
+ iup_matched_fields,
+ ilow_matched_fields,
+ cursor)) {
+ return;
+ }
+ }
+# ifdef PAGE_CUR_DBG
+ if (mode == PAGE_CUR_DBG) {
+ mode = PAGE_CUR_LE;
+ }
+# endif
+#endif /* BTR_CUR_HASH_ADAPT */
+
+ /* If the mode is for R-tree indexes, use the special MBR
+ related compare functions */
+ if (dict_index_is_spatial(index) && mode > PAGE_CUR_LE) {
+ /* For leaf level insert, we still use the traditional
+ compare function for now */
+ if (mode == PAGE_CUR_RTREE_INSERT && is_leaf) {
+ mode = PAGE_CUR_LE;
+ } else {
+ rtr_cur_search_with_match(
+ block, (dict_index_t*)index, tuple, mode,
+ cursor, rtr_info);
+ return;
+ }
+ }
+
+ /* The following flag does not work for non-latin1 char sets because
+ cmp_full_field does not tell how many bytes matched */
+#ifdef PAGE_CUR_LE_OR_EXTENDS
+ ut_a(mode != PAGE_CUR_LE_OR_EXTENDS);
+#endif /* PAGE_CUR_LE_OR_EXTENDS */
+
+ /* If mode PAGE_CUR_G is specified, we are trying to position the
+ cursor to answer a query of the form "tuple < X", where tuple is
+ the input parameter, and X denotes an arbitrary physical record on
+ the page. We want to position the cursor on the first X which
+ satisfies the condition. */
+
+ up_matched_fields = *iup_matched_fields;
+ low_matched_fields = *ilow_matched_fields;
+
+ /* Perform binary search. First the search is done through the page
+ directory, after that as a linear search in the list of records
+ owned by the upper limit directory slot. */
+
+ low = 0;
+ up = page_dir_get_n_slots(page) - 1;
+
+ /* Perform binary search until the lower and upper limit directory
+ slots come to the distance 1 of each other */
+
+ while (up - low > 1) {
+ mid = (low + up) / 2;
+ slot = page_dir_get_nth_slot(page, mid);
+ mid_rec = page_dir_slot_get_rec(slot);
+
+ cur_matched_fields = std::min(low_matched_fields,
+ up_matched_fields);
+
+ offsets = offsets_;
+ offsets = rec_get_offsets(
+ mid_rec, index, offsets, is_leaf,
+ dtuple_get_n_fields_cmp(tuple), &heap);
+
+ cmp = cmp_dtuple_rec_with_match(
+ tuple, mid_rec, offsets, &cur_matched_fields);
+
+ if (cmp > 0) {
+low_slot_match:
+ low = mid;
+ low_matched_fields = cur_matched_fields;
+
+ } else if (cmp) {
+#ifdef PAGE_CUR_LE_OR_EXTENDS
+ if (mode == PAGE_CUR_LE_OR_EXTENDS
+ && page_cur_rec_field_extends(
+ tuple, mid_rec, offsets,
+ cur_matched_fields)) {
+
+ goto low_slot_match;
+ }
+#endif /* PAGE_CUR_LE_OR_EXTENDS */
+up_slot_match:
+ up = mid;
+ up_matched_fields = cur_matched_fields;
+
+ } else if (mode == PAGE_CUR_G || mode == PAGE_CUR_LE
+#ifdef PAGE_CUR_LE_OR_EXTENDS
+ || mode == PAGE_CUR_LE_OR_EXTENDS
+#endif /* PAGE_CUR_LE_OR_EXTENDS */
+ ) {
+ goto low_slot_match;
+ } else {
+
+ goto up_slot_match;
+ }
+ }
+
+ slot = page_dir_get_nth_slot(page, low);
+ low_rec = page_dir_slot_get_rec(slot);
+ slot = page_dir_get_nth_slot(page, up);
+ up_rec = page_dir_slot_get_rec(slot);
+
+ /* Perform linear search until the upper and lower records come to
+ distance 1 of each other. */
+
+ while (page_rec_get_next_const(low_rec) != up_rec) {
+
+ mid_rec = page_rec_get_next_const(low_rec);
+
+ cur_matched_fields = std::min(low_matched_fields,
+ up_matched_fields);
+
+ offsets = offsets_;
+ offsets = rec_get_offsets(
+ mid_rec, index, offsets, is_leaf,
+ dtuple_get_n_fields_cmp(tuple), &heap);
+
+ cmp = cmp_dtuple_rec_with_match(
+ tuple, mid_rec, offsets, &cur_matched_fields);
+
+ if (cmp > 0) {
+low_rec_match:
+ low_rec = mid_rec;
+ low_matched_fields = cur_matched_fields;
+
+ } else if (cmp) {
+#ifdef PAGE_CUR_LE_OR_EXTENDS
+ if (mode == PAGE_CUR_LE_OR_EXTENDS
+ && page_cur_rec_field_extends(
+ tuple, mid_rec, offsets,
+ cur_matched_fields)) {
+
+ goto low_rec_match;
+ }
+#endif /* PAGE_CUR_LE_OR_EXTENDS */
+up_rec_match:
+ up_rec = mid_rec;
+ up_matched_fields = cur_matched_fields;
+ } else if (mode == PAGE_CUR_G || mode == PAGE_CUR_LE
+#ifdef PAGE_CUR_LE_OR_EXTENDS
+ || mode == PAGE_CUR_LE_OR_EXTENDS
+#endif /* PAGE_CUR_LE_OR_EXTENDS */
+ ) {
+ if (!cmp && !cur_matched_fields) {
+#ifdef UNIV_DEBUG
+ mtr_t mtr;
+ mtr_start(&mtr);
+
+ /* We got a match, but cur_matched_fields is
+ 0, it must have REC_INFO_MIN_REC_FLAG */
+ ulint rec_info = rec_get_info_bits(mid_rec,
+ rec_offs_comp(offsets));
+ ut_ad(rec_info & REC_INFO_MIN_REC_FLAG);
+ ut_ad(!page_has_prev(page));
+ mtr_commit(&mtr);
+#endif
+
+ cur_matched_fields = dtuple_get_n_fields_cmp(tuple);
+ }
+
+ goto low_rec_match;
+ } else {
+
+ goto up_rec_match;
+ }
+ }
+
+ if (mode <= PAGE_CUR_GE) {
+ page_cur_position(up_rec, block, cursor);
+ } else {
+ page_cur_position(low_rec, block, cursor);
+ }
+
+ *iup_matched_fields = up_matched_fields;
+ *ilow_matched_fields = low_matched_fields;
+ if (UNIV_LIKELY_NULL(heap)) {
+ mem_heap_free(heap);
+ }
+}
+
+#ifdef BTR_CUR_HASH_ADAPT
+/** Search the right position for a page cursor.
+@param[in] block buffer block
+@param[in] index index tree
+@param[in] tuple key to be searched for
+@param[in] mode search mode
+@param[in,out] iup_matched_fields already matched fields in the
+upper limit record
+@param[in,out] iup_matched_bytes already matched bytes in the
+first partially matched field in the upper limit record
+@param[in,out] ilow_matched_fields already matched fields in the
+lower limit record
+@param[in,out] ilow_matched_bytes already matched bytes in the
+first partially matched field in the lower limit record
+@param[out] cursor page cursor */
+void
+page_cur_search_with_match_bytes(
+ const buf_block_t* block,
+ const dict_index_t* index,
+ const dtuple_t* tuple,
+ page_cur_mode_t mode,
+ ulint* iup_matched_fields,
+ ulint* iup_matched_bytes,
+ ulint* ilow_matched_fields,
ulint* ilow_matched_bytes,
- /*!< in/out: already matched
- bytes in a field not yet
- completely matched */
- page_cur_t* cursor) /*!< out: page cursor */
+ page_cur_t* cursor)
{
ulint up;
ulint low;
@@ -282,21 +558,14 @@ page_cur_search_with_match(
ulint cur_matched_fields;
ulint cur_matched_bytes;
int cmp;
-#ifdef UNIV_SEARCH_DEBUG
- int dbg_cmp;
- ulint dbg_matched_fields;
- ulint dbg_matched_bytes;
-#endif
#ifdef UNIV_ZIP_DEBUG
const page_zip_des_t* page_zip = buf_block_get_page_zip(block);
#endif /* UNIV_ZIP_DEBUG */
mem_heap_t* heap = NULL;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- ulint* offsets = offsets_;
+ offset_t offsets_[REC_OFFS_NORMAL_SIZE];
+ offset_t* offsets = offsets_;
rec_offs_init(offsets_);
- ut_ad(block && tuple && iup_matched_fields && iup_matched_bytes
- && ilow_matched_fields && ilow_matched_bytes && cursor);
ut_ad(dtuple_validate(tuple));
#ifdef UNIV_DEBUG
# ifdef PAGE_CUR_DBG
@@ -313,16 +582,16 @@ page_cur_search_with_match(
ut_a(!page_zip || page_zip_validate(page_zip, page, index));
#endif /* UNIV_ZIP_DEBUG */
- page_check_dir(page);
+ ut_d(page_check_dir(page));
-#ifdef PAGE_CUR_ADAPT
+#ifdef BTR_CUR_HASH_ADAPT
if (page_is_leaf(page)
&& (mode == PAGE_CUR_LE)
&& (page_header_get_field(page, PAGE_N_DIRECTION) > 3)
&& (page_header_get_ptr(page, PAGE_LAST_INSERT))
&& (page_header_get_field(page, PAGE_DIRECTION) == PAGE_RIGHT)) {
- if (page_cur_try_search_shortcut(
+ if (page_cur_try_search_shortcut_bytes(
block, index, tuple,
iup_matched_fields, iup_matched_bytes,
ilow_matched_fields, ilow_matched_bytes,
@@ -335,7 +604,7 @@ page_cur_search_with_match(
mode = PAGE_CUR_LE;
}
# endif
-#endif
+#endif /* BTR_CUR_HASH_ADAPT */
/* The following flag does not work for non-latin1 char sets because
cmp_full_field does not tell how many bytes matched */
@@ -350,7 +619,7 @@ page_cur_search_with_match(
satisfies the condition. */
up_matched_fields = *iup_matched_fields;
- up_matched_bytes = *iup_matched_bytes;
+ up_matched_bytes = *iup_matched_bytes;
low_matched_fields = *ilow_matched_fields;
low_matched_bytes = *ilow_matched_bytes;
@@ -363,6 +632,7 @@ page_cur_search_with_match(
/* Perform binary search until the lower and upper limit directory
slots come to the distance 1 of each other */
+ ut_d(bool is_leaf = page_is_leaf(page));
while (up - low > 1) {
mid = (low + up) / 2;
@@ -373,20 +643,21 @@ page_cur_search_with_match(
low_matched_fields, low_matched_bytes,
up_matched_fields, up_matched_bytes);
- offsets = rec_get_offsets(mid_rec, index, offsets,
- dtuple_get_n_fields_cmp(tuple),
- &heap);
+ offsets = rec_get_offsets(
+ mid_rec, index, offsets_, is_leaf,
+ dtuple_get_n_fields_cmp(tuple), &heap);
+
+ cmp = cmp_dtuple_rec_with_match_bytes(
+ tuple, mid_rec, index, offsets,
+ &cur_matched_fields, &cur_matched_bytes);
- cmp = cmp_dtuple_rec_with_match(tuple, mid_rec, offsets,
- &cur_matched_fields,
- &cur_matched_bytes);
- if (UNIV_LIKELY(cmp > 0)) {
+ if (cmp > 0) {
low_slot_match:
low = mid;
low_matched_fields = cur_matched_fields;
low_matched_bytes = cur_matched_bytes;
- } else if (UNIV_EXPECT(cmp, -1)) {
+ } else if (cmp) {
#ifdef PAGE_CUR_LE_OR_EXTENDS
if (mode == PAGE_CUR_LE_OR_EXTENDS
&& page_cur_rec_field_extends(
@@ -406,7 +677,6 @@ up_slot_match:
|| mode == PAGE_CUR_LE_OR_EXTENDS
#endif /* PAGE_CUR_LE_OR_EXTENDS */
) {
-
goto low_slot_match;
} else {
@@ -430,20 +700,21 @@ up_slot_match:
low_matched_fields, low_matched_bytes,
up_matched_fields, up_matched_bytes);
- offsets = rec_get_offsets(mid_rec, index, offsets,
- dtuple_get_n_fields_cmp(tuple),
- &heap);
+ offsets = rec_get_offsets(
+ mid_rec, index, offsets_, is_leaf,
+ dtuple_get_n_fields_cmp(tuple), &heap);
+
+ cmp = cmp_dtuple_rec_with_match_bytes(
+ tuple, mid_rec, index, offsets,
+ &cur_matched_fields, &cur_matched_bytes);
- cmp = cmp_dtuple_rec_with_match(tuple, mid_rec, offsets,
- &cur_matched_fields,
- &cur_matched_bytes);
- if (UNIV_LIKELY(cmp > 0)) {
+ if (cmp > 0) {
low_rec_match:
low_rec = mid_rec;
low_matched_fields = cur_matched_fields;
low_matched_bytes = cur_matched_bytes;
- } else if (UNIV_EXPECT(cmp, -1)) {
+ } else if (cmp) {
#ifdef PAGE_CUR_LE_OR_EXTENDS
if (mode == PAGE_CUR_LE_OR_EXTENDS
&& page_cur_rec_field_extends(
@@ -462,6 +733,22 @@ up_rec_match:
|| mode == PAGE_CUR_LE_OR_EXTENDS
#endif /* PAGE_CUR_LE_OR_EXTENDS */
) {
+ if (!cmp && !cur_matched_fields) {
+#ifdef UNIV_DEBUG
+ mtr_t mtr;
+ mtr_start(&mtr);
+
+ /* We got a match, but cur_matched_fields is
+ 0, it must have REC_INFO_MIN_REC_FLAG */
+ ulint rec_info = rec_get_info_bits(mid_rec,
+ rec_offs_comp(offsets));
+ ut_ad(rec_info & REC_INFO_MIN_REC_FLAG);
+ ut_ad(!page_has_prev(page));
+ mtr_commit(&mtr);
+#endif
+
+ cur_matched_fields = dtuple_get_n_fields_cmp(tuple);
+ }
goto low_rec_match;
} else {
@@ -470,58 +757,6 @@ up_rec_match:
}
}
-#ifdef UNIV_SEARCH_DEBUG
-
- /* Check that the lower and upper limit records have the
- right alphabetical order compared to tuple. */
- dbg_matched_fields = 0;
- dbg_matched_bytes = 0;
-
- offsets = rec_get_offsets(low_rec, index, offsets,
- ULINT_UNDEFINED, &heap);
- dbg_cmp = page_cmp_dtuple_rec_with_match(tuple, low_rec, offsets,
- &dbg_matched_fields,
- &dbg_matched_bytes);
- if (mode == PAGE_CUR_G) {
- ut_a(dbg_cmp >= 0);
- } else if (mode == PAGE_CUR_GE) {
- ut_a(dbg_cmp == 1);
- } else if (mode == PAGE_CUR_L) {
- ut_a(dbg_cmp == 1);
- } else if (mode == PAGE_CUR_LE) {
- ut_a(dbg_cmp >= 0);
- }
-
- if (!page_rec_is_infimum(low_rec)) {
-
- ut_a(low_matched_fields == dbg_matched_fields);
- ut_a(low_matched_bytes == dbg_matched_bytes);
- }
-
- dbg_matched_fields = 0;
- dbg_matched_bytes = 0;
-
- offsets = rec_get_offsets(up_rec, index, offsets,
- ULINT_UNDEFINED, &heap);
- dbg_cmp = page_cmp_dtuple_rec_with_match(tuple, up_rec, offsets,
- &dbg_matched_fields,
- &dbg_matched_bytes);
- if (mode == PAGE_CUR_G) {
- ut_a(dbg_cmp == -1);
- } else if (mode == PAGE_CUR_GE) {
- ut_a(dbg_cmp <= 0);
- } else if (mode == PAGE_CUR_L) {
- ut_a(dbg_cmp <= 0);
- } else if (mode == PAGE_CUR_LE) {
- ut_a(dbg_cmp == -1);
- }
-
- if (!page_rec_is_supremum(up_rec)) {
-
- ut_a(up_matched_fields == dbg_matched_fields);
- ut_a(up_matched_bytes == dbg_matched_bytes);
- }
-#endif
if (mode <= PAGE_CUR_GE) {
page_cur_position(up_rec, block, cursor);
} else {
@@ -536,19 +771,18 @@ up_rec_match:
mem_heap_free(heap);
}
}
+#endif /* BTR_CUR_HASH_ADAPT */
/***********************************************************//**
Positions a page cursor on a randomly chosen user record on a page. If there
are no user records, sets the cursor on the infimum record. */
-UNIV_INTERN
void
page_cur_open_on_rnd_user_rec(
/*==========================*/
buf_block_t* block, /*!< in: page */
page_cur_t* cursor) /*!< out: page cursor */
{
- ulint rnd;
- ulint n_recs = page_get_n_recs(buf_block_get_frame(block));
+ const ulint n_recs = page_get_n_recs(block->frame);
page_cur_set_before_first(block, cursor);
@@ -557,54 +791,60 @@ page_cur_open_on_rnd_user_rec(
return;
}
- rnd = (ulint) (page_cur_lcg_prng() % n_recs);
-
- do {
- page_cur_move_to_next(cursor);
- } while (rnd--);
+ cursor->rec = page_rec_get_nth(block->frame,
+ ut_rnd_interval(n_recs) + 1);
}
-/***********************************************************//**
-Writes the log record of a record insert on a page. */
-static
+/** Write a redo log record of inserting a record into an index page.
+@param[in] insert_rec inserted record
+@param[in] rec_size rec_get_size(insert_rec)
+@param[in] cursor_rec predecessor of insert_rec
+@param[in,out] index index tree
+@param[in,out] mtr mini-transaction */
void
page_cur_insert_rec_write_log(
-/*==========================*/
- rec_t* insert_rec, /*!< in: inserted physical record */
- ulint rec_size, /*!< in: insert_rec size */
- rec_t* cursor_rec, /*!< in: record the
- cursor is pointing to */
- dict_index_t* index, /*!< in: record descriptor */
- mtr_t* mtr) /*!< in: mini-transaction handle */
+ const rec_t* insert_rec,
+ ulint rec_size,
+ const rec_t* cursor_rec,
+ dict_index_t* index,
+ mtr_t* mtr)
{
ulint cur_rec_size;
ulint extra_size;
ulint cur_extra_size;
const byte* ins_ptr;
- byte* log_ptr;
const byte* log_end;
ulint i;
+ if (dict_table_is_temporary(index->table)) {
+ mtr->set_modified();
+ ut_ad(mtr->get_log_mode() == MTR_LOG_NO_REDO);
+ return;
+ }
+
ut_a(rec_size < UNIV_PAGE_SIZE);
+ ut_ad(mtr->is_named_space(index->space));
ut_ad(page_align(insert_rec) == page_align(cursor_rec));
ut_ad(!page_rec_is_comp(insert_rec)
== !dict_table_is_comp(index->table));
+ ut_d(const bool is_leaf = page_rec_is_leaf(cursor_rec));
+
{
mem_heap_t* heap = NULL;
- ulint cur_offs_[REC_OFFS_NORMAL_SIZE];
- ulint ins_offs_[REC_OFFS_NORMAL_SIZE];
+ offset_t cur_offs_[REC_OFFS_NORMAL_SIZE];
+ offset_t ins_offs_[REC_OFFS_NORMAL_SIZE];
- ulint* cur_offs;
- ulint* ins_offs;
+ offset_t* cur_offs;
+ offset_t* ins_offs;
rec_offs_init(cur_offs_);
rec_offs_init(ins_offs_);
cur_offs = rec_get_offsets(cursor_rec, index, cur_offs_,
- ULINT_UNDEFINED, &heap);
+ is_leaf, ULINT_UNDEFINED, &heap);
ins_offs = rec_get_offsets(insert_rec, index, ins_offs_,
- ULINT_UNDEFINED, &heap);
+ is_leaf, ULINT_UNDEFINED, &heap);
extra_size = rec_offs_extra_size(ins_offs);
cur_extra_size = rec_offs_extra_size(cur_offs);
@@ -646,6 +886,8 @@ page_cur_insert_rec_write_log(
} while (i < min_rec_size);
}
+ byte* log_ptr;
+
if (mtr_get_log_mode(mtr) != MTR_LOG_SHORT_INSERTS) {
if (page_rec_is_comp(insert_rec)) {
@@ -745,37 +987,33 @@ need_extra_info:
mlog_catenate_string(mtr, ins_ptr, rec_size);
}
}
-#else /* !UNIV_HOTBACKUP */
-# define page_cur_insert_rec_write_log(ins_rec,size,cur,index,mtr) ((void) 0)
-#endif /* !UNIV_HOTBACKUP */
/***********************************************************//**
Parses a log record of a record insert on a page.
-@return end of log record or NULL */
-UNIV_INTERN
+@return end of log record or NULL */
byte*
page_cur_parse_insert_rec(
/*======================*/
ibool is_short,/*!< in: TRUE if short inserts */
- byte* ptr, /*!< in: buffer */
- byte* end_ptr,/*!< in: buffer end */
+ const byte* ptr, /*!< in: buffer */
+ const byte* end_ptr,/*!< in: buffer end */
buf_block_t* block, /*!< in: page or NULL */
dict_index_t* index, /*!< in: record descriptor */
mtr_t* mtr) /*!< in: mtr or NULL */
{
- ulint origin_offset;
+ ulint origin_offset = 0; /* remove warning */
ulint end_seg_len;
- ulint mismatch_index;
+ ulint mismatch_index = 0; /* remove warning */
page_t* page;
rec_t* cursor_rec;
byte buf1[1024];
byte* buf;
- byte* ptr2 = ptr;
- ulint info_and_status_bits = 0; /* remove warning */
+ const byte* ptr2 = ptr;
+ ulint info_and_status_bits = 0; /* remove warning */
page_cur_t cursor;
mem_heap_t* heap = NULL;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- ulint* offsets = offsets_;
+ offset_t offsets_[REC_OFFS_NORMAL_SIZE];
+ offset_t* offsets = offsets_;
rec_offs_init(offsets_);
page = block ? buf_block_get_frame(block) : NULL;
@@ -797,7 +1035,7 @@ page_cur_parse_insert_rec(
cursor_rec = page + offset;
- if (UNIV_UNLIKELY(offset >= UNIV_PAGE_SIZE)) {
+ if (offset >= UNIV_PAGE_SIZE) {
recv_sys->found_corrupt_log = TRUE;
@@ -805,14 +1043,14 @@ page_cur_parse_insert_rec(
}
}
- ptr = mach_parse_compressed(ptr, end_ptr, &end_seg_len);
+ end_seg_len = mach_parse_compressed(&ptr, end_ptr);
if (ptr == NULL) {
return(NULL);
}
- if (UNIV_UNLIKELY(end_seg_len >= UNIV_PAGE_SIZE << 1)) {
+ if (end_seg_len >= UNIV_PAGE_SIZE << 1) {
recv_sys->found_corrupt_log = TRUE;
return(NULL);
@@ -829,7 +1067,7 @@ page_cur_parse_insert_rec(
info_and_status_bits = mach_read_from_1(ptr);
ptr++;
- ptr = mach_parse_compressed(ptr, end_ptr, &origin_offset);
+ origin_offset = mach_parse_compressed(&ptr, end_ptr);
if (ptr == NULL) {
@@ -838,7 +1076,7 @@ page_cur_parse_insert_rec(
ut_a(origin_offset < UNIV_PAGE_SIZE);
- ptr = mach_parse_compressed(ptr, end_ptr, &mismatch_index);
+ mismatch_index = mach_parse_compressed(&ptr, end_ptr);
if (ptr == NULL) {
@@ -848,14 +1086,14 @@ page_cur_parse_insert_rec(
ut_a(mismatch_index < UNIV_PAGE_SIZE);
}
- if (UNIV_UNLIKELY(end_ptr < ptr + (end_seg_len >> 1))) {
+ if (end_ptr < ptr + (end_seg_len >> 1)) {
return(NULL);
}
if (!block) {
- return(ptr + (end_seg_len >> 1));
+ return(const_cast<byte*>(ptr + (end_seg_len >> 1)));
}
ut_ad(!!page_is_comp(page) == dict_table_is_comp(index->table));
@@ -864,7 +1102,9 @@ page_cur_parse_insert_rec(
/* Read from the log the inserted index record end segment which
differs from the cursor record */
- offsets = rec_get_offsets(cursor_rec, index, offsets,
+ ut_d(bool is_leaf = page_is_leaf(page));
+
+ offsets = rec_get_offsets(cursor_rec, index, offsets, is_leaf,
ULINT_UNDEFINED, &heap);
if (!(end_seg_len & 0x1UL)) {
@@ -880,47 +1120,42 @@ page_cur_parse_insert_rec(
buf = buf1;
} else {
buf = static_cast<byte*>(
- mem_alloc(mismatch_index + end_seg_len));
+ ut_malloc_nokey(mismatch_index + end_seg_len));
}
/* Build the inserted record to buf */
if (UNIV_UNLIKELY(mismatch_index >= UNIV_PAGE_SIZE)) {
- fprintf(stderr,
- "Is short %lu, info_and_status_bits %lu, offset %lu, "
- "o_offset %lu\n"
- "mismatch index %lu, end_seg_len %lu\n"
- "parsed len %lu\n",
- (ulong) is_short, (ulong) info_and_status_bits,
- (ulong) page_offset(cursor_rec),
- (ulong) origin_offset,
- (ulong) mismatch_index, (ulong) end_seg_len,
- (ulong) (ptr - ptr2));
-
- fputs("Dump of 300 bytes of log:\n", stderr);
- ut_print_buf(stderr, ptr2, 300);
- putc('\n', stderr);
-
- buf_page_print(page, 0);
- ut_error;
+ ib::fatal() << "is_short " << is_short << ", "
+ << "info_and_status_bits " << info_and_status_bits
+ << ", offset " << page_offset(cursor_rec) << ","
+ " o_offset " << origin_offset << ", mismatch index "
+ << mismatch_index << ", end_seg_len " << end_seg_len
+ << " parsed len " << (ptr - ptr2);
}
ut_memcpy(buf, rec_get_start(cursor_rec, offsets), mismatch_index);
ut_memcpy(buf + mismatch_index, ptr, end_seg_len);
if (page_is_comp(page)) {
+ /* Make rec_get_offsets() and rec_offs_make_valid() happy. */
+ ut_d(rec_set_heap_no_new(buf + origin_offset,
+ PAGE_HEAP_NO_USER_LOW));
rec_set_info_and_status_bits(buf + origin_offset,
- info_and_status_bits);
+ info_and_status_bits);
} else {
+ /* Make rec_get_offsets() and rec_offs_make_valid() happy. */
+ ut_d(rec_set_heap_no_old(buf + origin_offset,
+ PAGE_HEAP_NO_USER_LOW));
rec_set_info_bits_old(buf + origin_offset,
- info_and_status_bits);
+ info_and_status_bits);
}
page_cur_position(cursor_rec, block, &cursor);
offsets = rec_get_offsets(buf + origin_offset, index, offsets,
- ULINT_UNDEFINED, &heap);
+ is_leaf, ULINT_UNDEFINED, &heap);
if (UNIV_UNLIKELY(!page_cur_rec_insert(&cursor,
buf + origin_offset,
index, offsets, mtr))) {
@@ -931,22 +1166,21 @@ page_cur_parse_insert_rec(
if (buf != buf1) {
- mem_free(buf);
+ ut_free(buf);
}
if (UNIV_LIKELY_NULL(heap)) {
mem_heap_free(heap);
}
- return(ptr + end_seg_len);
+ return(const_cast<byte*>(ptr + end_seg_len));
}
/***********************************************************//**
Inserts a record next to page cursor on an uncompressed page.
Returns pointer to inserted record if succeed, i.e., enough
space available, NULL otherwise. The cursor stays at the same position.
-@return pointer to record if succeed, NULL otherwise */
-UNIV_INTERN
+@return pointer to record if succeed, NULL otherwise */
rec_t*
page_cur_insert_rec_low(
/*====================*/
@@ -954,7 +1188,7 @@ page_cur_insert_rec_low(
which the new record is inserted */
dict_index_t* index, /*!< in: record descriptor */
const rec_t* rec, /*!< in: pointer to a physical record */
- ulint* offsets,/*!< in/out: rec_get_offsets(rec, index) */
+ offset_t* offsets,/*!< in/out: rec_get_offsets(rec, index) */
mtr_t* mtr) /*!< in: mini-transaction handle, or NULL */
{
byte* insert_buf;
@@ -973,10 +1207,10 @@ page_cur_insert_rec_low(
page = page_align(current_rec);
ut_ad(dict_table_is_comp(index->table)
== (ibool) !!page_is_comp(page));
- ut_ad(fil_page_get_type(page) == FIL_PAGE_INDEX);
- ut_ad(mach_read_from_8(page + PAGE_HEADER + PAGE_INDEX_ID)
- == index->id || recv_recovery_is_on()
- || (mtr ? mtr->inside_ibuf : dict_index_is_ibuf(index)));
+ ut_ad(fil_page_index_page_check(page));
+ ut_ad(mach_read_from_8(page + PAGE_HEADER + PAGE_INDEX_ID) == index->id
+ || recv_recovery_is_on()
+ || (mtr ? mtr->is_inside_ibuf() : dict_index_is_ibuf(index)));
ut_ad(!page_rec_is_supremum(current_rec));
@@ -1005,14 +1239,15 @@ page_cur_insert_rec_low(
free_rec = page_header_get_ptr(page, PAGE_FREE);
if (UNIV_LIKELY_NULL(free_rec)) {
/* Try to allocate from the head of the free list. */
- ulint foffsets_[REC_OFFS_NORMAL_SIZE];
- ulint* foffsets = foffsets_;
+ offset_t foffsets_[REC_OFFS_NORMAL_SIZE];
+ offset_t* foffsets = foffsets_;
mem_heap_t* heap = NULL;
rec_offs_init(foffsets_);
foffsets = rec_get_offsets(
- free_rec, index, foffsets, ULINT_UNDEFINED, &heap);
+ free_rec, index, foffsets, page_is_leaf(page),
+ ULINT_UNDEFINED, &heap);
if (rec_offs_size(foffsets) < rec_size) {
if (UNIV_LIKELY_NULL(heap)) {
mem_heap_free(heap);
@@ -1061,7 +1296,8 @@ use_heap:
data_len = rec_offs_data_size(offsets);
fprintf(stderr, "InnoDB: Error: current_rec == insert_rec "
- " extra_len %lu data_len %lu insert_buf %p rec %p\n",
+ " extra_len " ULINTPF
+ " data_len " ULINTPF " insert_buf %p rec %p\n",
extra_len, data_len, insert_buf, rec);
fprintf(stderr, "InnoDB; Physical record: \n");
rec_print(stderr, rec, index);
@@ -1113,34 +1349,36 @@ use_heap:
|| rec_get_node_ptr_flag(last_insert)
== rec_get_node_ptr_flag(insert_rec));
- if (UNIV_UNLIKELY(last_insert == NULL)) {
- page_header_set_field(page, NULL, PAGE_DIRECTION,
- PAGE_NO_DIRECTION);
- page_header_set_field(page, NULL, PAGE_N_DIRECTION, 0);
-
- } else if ((last_insert == current_rec)
- && (page_header_get_field(page, PAGE_DIRECTION)
- != PAGE_LEFT)) {
-
- page_header_set_field(page, NULL, PAGE_DIRECTION,
- PAGE_RIGHT);
- page_header_set_field(page, NULL, PAGE_N_DIRECTION,
- page_header_get_field(
- page, PAGE_N_DIRECTION) + 1);
-
- } else if ((page_rec_get_next(insert_rec) == last_insert)
- && (page_header_get_field(page, PAGE_DIRECTION)
- != PAGE_RIGHT)) {
-
- page_header_set_field(page, NULL, PAGE_DIRECTION,
- PAGE_LEFT);
- page_header_set_field(page, NULL, PAGE_N_DIRECTION,
- page_header_get_field(
- page, PAGE_N_DIRECTION) + 1);
- } else {
- page_header_set_field(page, NULL, PAGE_DIRECTION,
- PAGE_NO_DIRECTION);
- page_header_set_field(page, NULL, PAGE_N_DIRECTION, 0);
+ if (!dict_index_is_spatial(index)) {
+ if (UNIV_UNLIKELY(last_insert == NULL)) {
+ page_header_set_field(page, NULL, PAGE_DIRECTION,
+ PAGE_NO_DIRECTION);
+ page_header_set_field(page, NULL, PAGE_N_DIRECTION, 0);
+
+ } else if ((last_insert == current_rec)
+ && (page_header_get_field(page, PAGE_DIRECTION)
+ != PAGE_LEFT)) {
+
+ page_header_set_field(page, NULL, PAGE_DIRECTION,
+ PAGE_RIGHT);
+ page_header_set_field(page, NULL, PAGE_N_DIRECTION,
+ page_header_get_field(
+ page, PAGE_N_DIRECTION) + 1);
+
+ } else if ((page_rec_get_next(insert_rec) == last_insert)
+ && (page_header_get_field(page, PAGE_DIRECTION)
+ != PAGE_RIGHT)) {
+
+ page_header_set_field(page, NULL, PAGE_DIRECTION,
+ PAGE_LEFT);
+ page_header_set_field(page, NULL, PAGE_N_DIRECTION,
+ page_header_get_field(
+ page, PAGE_N_DIRECTION) + 1);
+ } else {
+ page_header_set_field(page, NULL, PAGE_DIRECTION,
+ PAGE_NO_DIRECTION);
+ page_header_set_field(page, NULL, PAGE_N_DIRECTION, 0);
+ }
}
page_header_set_ptr(page, NULL, PAGE_LAST_INSERT, insert_rec);
@@ -1174,8 +1412,6 @@ use_heap:
current_rec, index, mtr);
}
- btr_blob_dbg_add_rec(insert_rec, index, offsets, "insert");
-
return(insert_rec);
}
@@ -1190,15 +1426,14 @@ if this is a compressed leaf page in a secondary index.
This has to be done either within the same mini-transaction,
or by invoking ibuf_reset_free_bits() before mtr_commit().
-@return pointer to record if succeed, NULL otherwise */
-UNIV_INTERN
+@return pointer to record if succeed, NULL otherwise */
rec_t*
page_cur_insert_rec_zip(
/*====================*/
page_cur_t* cursor, /*!< in/out: page cursor */
dict_index_t* index, /*!< in: record descriptor */
const rec_t* rec, /*!< in: pointer to a physical record */
- ulint* offsets,/*!< in/out: rec_get_offsets(rec, index) */
+ offset_t* offsets,/*!< in/out: rec_get_offsets(rec, index) */
mtr_t* mtr) /*!< in: mini-transaction handle, or NULL */
{
byte* insert_buf;
@@ -1221,10 +1456,10 @@ page_cur_insert_rec_zip(
page = page_cur_get_page(cursor);
ut_ad(dict_table_is_comp(index->table));
ut_ad(page_is_comp(page));
- ut_ad(fil_page_get_type(page) == FIL_PAGE_INDEX);
- ut_ad(mach_read_from_8(page + PAGE_HEADER + PAGE_INDEX_ID)
- == index->id || recv_recovery_is_on()
- || (mtr ? mtr->inside_ibuf : dict_index_is_ibuf(index)));
+ ut_ad(fil_page_index_page_check(page));
+ ut_ad(mach_read_from_8(page + PAGE_HEADER + PAGE_INDEX_ID) == index->id
+ || (mtr ? mtr->is_inside_ibuf() : dict_index_is_ibuf(index))
+ || recv_recovery_is_on());
ut_ad(!page_cur_is_after_last(cursor));
#ifdef UNIV_ZIP_DEBUG
@@ -1282,7 +1517,7 @@ page_cur_insert_rec_zip(
get rid of the modification log. */
page_create_zip(page_cur_get_block(cursor), index,
page_header_get_field(page, PAGE_LEVEL),
- 0, mtr);
+ 0, NULL, mtr);
ut_ad(!page_header_get_ptr(page, PAGE_FREE));
if (page_zip_available(
@@ -1355,7 +1590,7 @@ page_cur_insert_rec_zip(
if (!log_compressed) {
if (page_zip_compress(
page_zip, page, index,
- level, NULL)) {
+ level, NULL, NULL)) {
page_cur_insert_rec_write_log(
insert_rec, rec_size,
cursor->rec, index, mtr);
@@ -1421,12 +1656,10 @@ page_cur_insert_rec_zip(
}
/* Out of space: restore the page */
- btr_blob_dbg_remove(page, index, "insert_zip_fail");
if (!page_zip_decompress(page_zip, page, FALSE)) {
ut_error; /* Memory corrupted? */
}
ut_ad(page_validate(page, index));
- btr_blob_dbg_add(page, index, "insert_zip_fail");
insert_rec = NULL;
}
@@ -1437,13 +1670,14 @@ page_cur_insert_rec_zip(
if (UNIV_LIKELY_NULL(free_rec)) {
/* Try to allocate from the head of the free list. */
lint extra_size_diff;
- ulint foffsets_[REC_OFFS_NORMAL_SIZE];
- ulint* foffsets = foffsets_;
+ offset_t foffsets_[REC_OFFS_NORMAL_SIZE];
+ offset_t* foffsets = foffsets_;
mem_heap_t* heap = NULL;
rec_offs_init(foffsets_);
foffsets = rec_get_offsets(free_rec, index, foffsets,
+ page_rec_is_leaf(free_rec),
ULINT_UNDEFINED, &heap);
if (rec_offs_size(foffsets) < rec_size) {
too_small:
@@ -1587,34 +1821,38 @@ use_heap:
|| rec_get_node_ptr_flag(last_insert)
== rec_get_node_ptr_flag(insert_rec));
- if (UNIV_UNLIKELY(last_insert == NULL)) {
- page_header_set_field(page, page_zip, PAGE_DIRECTION,
- PAGE_NO_DIRECTION);
- page_header_set_field(page, page_zip, PAGE_N_DIRECTION, 0);
-
- } else if ((last_insert == cursor->rec)
- && (page_header_get_field(page, PAGE_DIRECTION)
- != PAGE_LEFT)) {
-
- page_header_set_field(page, page_zip, PAGE_DIRECTION,
- PAGE_RIGHT);
- page_header_set_field(page, page_zip, PAGE_N_DIRECTION,
- page_header_get_field(
- page, PAGE_N_DIRECTION) + 1);
-
- } else if ((page_rec_get_next(insert_rec) == last_insert)
- && (page_header_get_field(page, PAGE_DIRECTION)
- != PAGE_RIGHT)) {
-
- page_header_set_field(page, page_zip, PAGE_DIRECTION,
- PAGE_LEFT);
- page_header_set_field(page, page_zip, PAGE_N_DIRECTION,
- page_header_get_field(
- page, PAGE_N_DIRECTION) + 1);
- } else {
- page_header_set_field(page, page_zip, PAGE_DIRECTION,
- PAGE_NO_DIRECTION);
- page_header_set_field(page, page_zip, PAGE_N_DIRECTION, 0);
+ if (!dict_index_is_spatial(index)) {
+ if (UNIV_UNLIKELY(last_insert == NULL)) {
+ page_header_set_field(page, page_zip, PAGE_DIRECTION,
+ PAGE_NO_DIRECTION);
+ page_header_set_field(page, page_zip,
+ PAGE_N_DIRECTION, 0);
+
+ } else if ((last_insert == cursor->rec)
+ && (page_header_get_field(page, PAGE_DIRECTION)
+ != PAGE_LEFT)) {
+
+ page_header_set_field(page, page_zip, PAGE_DIRECTION,
+ PAGE_RIGHT);
+ page_header_set_field(page, page_zip, PAGE_N_DIRECTION,
+ page_header_get_field(
+ page, PAGE_N_DIRECTION) + 1);
+
+ } else if ((page_rec_get_next(insert_rec) == last_insert)
+ && (page_header_get_field(page, PAGE_DIRECTION)
+ != PAGE_RIGHT)) {
+
+ page_header_set_field(page, page_zip, PAGE_DIRECTION,
+ PAGE_LEFT);
+ page_header_set_field(page, page_zip, PAGE_N_DIRECTION,
+ page_header_get_field(
+ page, PAGE_N_DIRECTION) + 1);
+ } else {
+ page_header_set_field(page, page_zip, PAGE_DIRECTION,
+ PAGE_NO_DIRECTION);
+ page_header_set_field(page, page_zip,
+ PAGE_N_DIRECTION, 0);
+ }
}
page_header_set_ptr(page, page_zip, PAGE_LAST_INSERT, insert_rec);
@@ -1640,8 +1878,6 @@ use_heap:
page_zip_write_rec(page_zip, insert_rec, index, offsets, 1);
- btr_blob_dbg_add_rec(insert_rec, index, offsets, "insert_zip_ok");
-
/* 9. Write log record of the insert */
if (UNIV_LIKELY(mtr != NULL)) {
page_cur_insert_rec_write_log(insert_rec, rec_size,
@@ -1651,7 +1887,6 @@ use_heap:
return(insert_rec);
}
-#ifndef UNIV_HOTBACKUP
/**********************************************************//**
Writes a log record of copying a record list end to a new created page.
@return 4-byte field where to write the log data length, or NULL if
@@ -1667,6 +1902,7 @@ page_copy_rec_list_to_created_page_write_log(
byte* log_ptr;
ut_ad(!!page_is_comp(page) == dict_table_is_comp(index->table));
+ ut_ad(mtr->is_named_space(index->space));
log_ptr = mlog_open_and_write_index(mtr, page, index,
page_is_comp(page)
@@ -1678,12 +1914,10 @@ page_copy_rec_list_to_created_page_write_log(
return(log_ptr);
}
-#endif /* !UNIV_HOTBACKUP */
/**********************************************************//**
Parses a log record of copying a record list end to a new created page.
-@return end of log record or NULL */
-UNIV_INTERN
+@return end of log record or NULL */
byte*
page_parse_copy_rec_list_to_created_page(
/*=====================================*/
@@ -1718,6 +1952,8 @@ page_parse_copy_rec_list_to_created_page(
return(rec_end);
}
+ ut_ad(fil_page_index_page_check(block->frame));
+
while (ptr < rec_end) {
ptr = page_cur_parse_insert_rec(TRUE, ptr, end_ptr,
block, index, mtr);
@@ -1729,14 +1965,16 @@ page_parse_copy_rec_list_to_created_page(
page_zip = buf_block_get_page_zip(block);
page_header_set_ptr(page, page_zip, PAGE_LAST_INSERT, NULL);
- page_header_set_field(page, page_zip, PAGE_DIRECTION,
- PAGE_NO_DIRECTION);
- page_header_set_field(page, page_zip, PAGE_N_DIRECTION, 0);
+
+ if (!dict_index_is_spatial(index)) {
+ page_header_set_field(page, page_zip, PAGE_DIRECTION,
+ PAGE_NO_DIRECTION);
+ page_header_set_field(page, page_zip, PAGE_N_DIRECTION, 0);
+ }
return(rec_end);
}
-#ifndef UNIV_HOTBACKUP
/*************************************************************//**
Copies records from page to a newly created page, from a given record onward,
including that record. Infimum and supremum records are not copied.
@@ -1745,7 +1983,6 @@ IMPORTANT: The caller will have to update IBUF_BITMAP_FREE
if this is a compressed leaf page in a secondary index.
This has to be done either within the same mini-transaction,
or by invoking ibuf_reset_free_bits() before mtr_commit(). */
-UNIV_INTERN
void
page_copy_rec_list_end_to_created_page(
/*===================================*/
@@ -1762,17 +1999,17 @@ page_copy_rec_list_end_to_created_page(
ulint n_recs;
ulint slot_index;
ulint rec_size;
- ulint log_mode;
byte* log_ptr;
ulint log_data_len;
mem_heap_t* heap = NULL;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- ulint* offsets = offsets_;
+ offset_t offsets_[REC_OFFS_NORMAL_SIZE];
+ offset_t* offsets = offsets_;
rec_offs_init(offsets_);
ut_ad(page_dir_get_n_heap(new_page) == PAGE_HEAP_NO_USER_LOW);
ut_ad(page_align(rec) != new_page);
ut_ad(page_rec_is_comp(rec) == page_is_comp(new_page));
+ ut_ad(fil_page_index_page_check(new_page));
if (page_rec_is_infimum(rec)) {
@@ -1791,15 +2028,17 @@ page_copy_rec_list_end_to_created_page(
page_header_set_ptr(new_page, NULL, PAGE_HEAP_TOP,
new_page + UNIV_PAGE_SIZE - 1);
#endif
-
log_ptr = page_copy_rec_list_to_created_page_write_log(new_page,
index, mtr);
- log_data_len = dyn_array_get_data_size(&(mtr->log));
+ log_data_len = mtr->get_log()->size();
/* Individual inserts are logged in a shorter form */
- log_mode = mtr_set_log_mode(mtr, MTR_LOG_SHORT_INSERTS);
+ const mtr_log_t log_mode = dict_table_is_temporary(index->table)
+ || !index->is_readable() /* IMPORT TABLESPACE */
+ ? mtr_get_log_mode(mtr)
+ : mtr_set_log_mode(mtr, MTR_LOG_SHORT_INSERTS);
prev_rec = page_get_infimum_rec(new_page);
if (page_is_comp(new_page)) {
@@ -1811,8 +2050,10 @@ page_copy_rec_list_end_to_created_page(
slot_index = 0;
n_recs = 0;
+ ut_d(const bool is_leaf = page_is_leaf(new_page));
+
do {
- offsets = rec_get_offsets(rec, index, offsets,
+ offsets = rec_get_offsets(rec, index, offsets, is_leaf,
ULINT_UNDEFINED, &heap);
insert_rec = rec_copy(heap_top, rec, offsets);
@@ -1855,14 +2096,14 @@ page_copy_rec_list_end_to_created_page(
heap_top += rec_size;
rec_offs_make_valid(insert_rec, index, offsets);
- btr_blob_dbg_add_rec(insert_rec, index, offsets, "copy_end");
-
page_cur_insert_rec_write_log(insert_rec, rec_size, prev_rec,
index, mtr);
prev_rec = insert_rec;
rec = page_rec_get_next(rec);
} while (!page_rec_is_supremum(rec));
+ ut_ad(n_recs);
+
if ((slot_index > 0) && (count + 1
+ (PAGE_DIR_SLOT_MAX_N_OWNED + 1) / 2
<= PAGE_DIR_SLOT_MAX_N_OWNED)) {
@@ -1884,11 +2125,11 @@ page_copy_rec_list_end_to_created_page(
mem_heap_free(heap);
}
- log_data_len = dyn_array_get_data_size(&(mtr->log)) - log_data_len;
+ log_data_len = mtr->get_log()->size() - log_data_len;
ut_a(log_data_len < 100 * UNIV_PAGE_SIZE);
- if (UNIV_LIKELY(log_ptr != NULL)) {
+ if (log_ptr != NULL) {
mach_write_to_4(log_ptr, log_data_len);
}
@@ -1909,8 +2150,9 @@ page_copy_rec_list_end_to_created_page(
page_header_set_field(new_page, NULL, PAGE_N_RECS, n_recs);
page_header_set_ptr(new_page, NULL, PAGE_LAST_INSERT, NULL);
+
page_header_set_field(new_page, NULL, PAGE_DIRECTION,
- PAGE_NO_DIRECTION);
+ PAGE_NO_DIRECTION);
page_header_set_field(new_page, NULL, PAGE_N_DIRECTION, 0);
/* Restore the log mode */
@@ -1931,6 +2173,7 @@ page_cur_delete_rec_write_log(
byte* log_ptr;
ut_ad(!!page_rec_is_comp(rec) == dict_table_is_comp(index->table));
+ ut_ad(mtr->is_named_space(index->space));
log_ptr = mlog_open_and_write_index(mtr, rec, index,
page_rec_is_comp(rec)
@@ -1948,14 +2191,10 @@ page_cur_delete_rec_write_log(
mlog_close(mtr, log_ptr + 2);
}
-#else /* !UNIV_HOTBACKUP */
-# define page_cur_delete_rec_write_log(rec,index,mtr) ((void) 0)
-#endif /* !UNIV_HOTBACKUP */
/***********************************************************//**
Parses log record of a record delete on a page.
-@return pointer to record end or NULL */
-UNIV_INTERN
+@return pointer to record end or NULL */
byte*
page_cur_parse_delete_rec(
/*======================*/
@@ -1977,12 +2216,15 @@ page_cur_parse_delete_rec(
offset = mach_read_from_2(ptr);
ptr += 2;
- ut_a(offset <= UNIV_PAGE_SIZE);
+ if (UNIV_UNLIKELY(offset >= srv_page_size)) {
+ recv_sys->found_corrupt_log = true;
+ return NULL;
+ }
if (block) {
page_t* page = buf_block_get_frame(block);
mem_heap_t* heap = NULL;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
+ offset_t offsets_[REC_OFFS_NORMAL_SIZE];
rec_t* rec = page + offset;
rec_offs_init(offsets_);
@@ -1991,6 +2233,7 @@ page_cur_parse_delete_rec(
page_cur_delete_rec(&cursor, index,
rec_get_offsets(rec, index, offsets_,
+ page_rec_is_leaf(rec),
ULINT_UNDEFINED, &heap),
mtr);
if (UNIV_LIKELY_NULL(heap)) {
@@ -2004,13 +2247,12 @@ page_cur_parse_delete_rec(
/***********************************************************//**
Deletes a record at the page cursor. The cursor is moved to the next
record after the deleted one. */
-UNIV_INTERN
void
page_cur_delete_rec(
/*================*/
page_cur_t* cursor, /*!< in/out: a page cursor */
const dict_index_t* index, /*!< in: record descriptor */
- const ulint* offsets,/*!< in: rec_get_offsets(
+ const offset_t* offsets,/*!< in: rec_get_offsets(
cursor->rec, index) */
mtr_t* mtr) /*!< in: mini-transaction handle
or NULL */
@@ -2040,10 +2282,11 @@ page_cur_delete_rec(
current_rec = cursor->rec;
ut_ad(rec_offs_validate(current_rec, index, offsets));
ut_ad(!!page_is_comp(page) == dict_table_is_comp(index->table));
- ut_ad(fil_page_get_type(page) == FIL_PAGE_INDEX);
- ut_ad(mach_read_from_8(page + PAGE_HEADER + PAGE_INDEX_ID)
- == index->id || recv_recovery_is_on()
- || (mtr ? mtr->inside_ibuf : dict_index_is_ibuf(index)));
+ ut_ad(fil_page_index_page_check(page));
+ ut_ad(mach_read_from_8(page + PAGE_HEADER + PAGE_INDEX_ID) == index->id
+ || (mtr ? mtr->is_inside_ibuf() : dict_index_is_ibuf(index))
+ || recv_recovery_is_on());
+ ut_ad(mtr == NULL || mtr->is_named_space(index->space));
/* The record must not be the supremum or infimum record. */
ut_ad(page_rec_is_user_rec(current_rec));
@@ -2102,7 +2345,7 @@ page_cur_delete_rec(
/* rec now points to the record of the previous directory slot. Look
for the immediate predecessor of current_rec in a loop. */
- while(current_rec != rec) {
+ while (current_rec != rec) {
prev_rec = rec;
rec = page_rec_get_next(rec);
}
@@ -2133,8 +2376,6 @@ page_cur_delete_rec(
page_dir_slot_set_n_owned(cur_dir_slot, page_zip, cur_n_owned - 1);
/* 6. Free the memory occupied by the record */
- btr_blob_dbg_remove_rec(current_rec, const_cast<dict_index_t*>(index),
- offsets, "delete");
page_mem_free(page, page_zip, current_rec, index, offsets);
/* 7. Now we have decremented the number of owned records of the slot.
@@ -2144,27 +2385,22 @@ page_cur_delete_rec(
if (cur_n_owned <= PAGE_DIR_SLOT_MIN_N_OWNED) {
page_dir_balance_slot(page, page_zip, cur_slot_no);
}
-
-#ifdef UNIV_ZIP_DEBUG
- ut_a(!page_zip || page_zip_validate(page_zip, page, index));
-#endif /* UNIV_ZIP_DEBUG */
}
#ifdef UNIV_COMPILE_TEST_FUNCS
/*******************************************************************//**
-Print the first n numbers, generated by page_cur_lcg_prng() to make sure
+Print the first n numbers, generated by ut_rnd_gen() to make sure
(visually) that it works properly. */
void
-test_page_cur_lcg_prng(
-/*===================*/
+test_ut_rnd_gen(
int n) /*!< in: print first n numbers */
{
int i;
unsigned long long rnd;
for (i = 0; i < n; i++) {
- rnd = page_cur_lcg_prng();
+ rnd = ut_rnd_gen();
printf("%llu\t%%2=%llu %%3=%llu %%5=%llu %%7=%llu %%11=%llu\n",
rnd,
rnd % 2,
diff --git a/storage/innobase/page/page0page.cc b/storage/innobase/page/page0page.cc
index ac16d71322a..61ebbc3d140 100644
--- a/storage/innobase/page/page0page.cc
+++ b/storage/innobase/page/page0page.cc
@@ -2,7 +2,7 @@
Copyright (c) 1994, 2016, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2012, Facebook Inc.
-Copyright (c) 2018, MariaDB Corporation.
+Copyright (c) 2017, 2019, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -25,23 +25,18 @@ Index page routines
Created 2/2/1994 Heikki Tuuri
*******************************************************/
-#define THIS_MODULE
#include "page0page.h"
-#ifdef UNIV_NONINL
-#include "page0page.ic"
-#endif
-#undef THIS_MODULE
-
#include "page0cur.h"
#include "page0zip.h"
#include "buf0buf.h"
#include "btr0btr.h"
-#ifndef UNIV_HOTBACKUP
-# include "srv0srv.h"
-# include "lock0lock.h"
-# include "fut0lst.h"
-# include "btr0sea.h"
-#endif /* !UNIV_HOTBACKUP */
+#include "row0trunc.h"
+#include "srv0srv.h"
+#include "lock0lock.h"
+#include "fut0lst.h"
+#include "btr0sea.h"
+#include "trx0sys.h"
+#include <algorithm>
/* THE INDEX PAGE
==============
@@ -87,8 +82,7 @@ is 50 x 4 bytes = 200 bytes. */
/***************************************************************//**
Looks for the directory slot which owns the given record.
-@return the directory slot number */
-UNIV_INTERN
+@return the directory slot number */
ulint
page_dir_find_owner_slot(
/*=====================*/
@@ -121,11 +115,9 @@ page_dir_find_owner_slot(
while (UNIV_LIKELY(*(uint16*) slot != rec_offs_bytes)) {
if (UNIV_UNLIKELY(slot == first_slot)) {
- fprintf(stderr,
- "InnoDB: Probable data corruption on"
- " page %lu\n"
- "InnoDB: Original record ",
- (ulong) page_get_page_no(page));
+ ib::error() << "Probable data corruption on page "
+ << page_get_page_no(page)
+ << ". Original record on that page;";
if (page_is_comp(page)) {
fputs("(compact record)", stderr);
@@ -133,20 +125,15 @@ page_dir_find_owner_slot(
rec_print_old(stderr, rec);
}
- fputs("\n"
- "InnoDB: on that page.\n"
- "InnoDB: Cannot find the dir slot for record ",
- stderr);
+ ib::error() << "Cannot find the dir slot for this"
+ " record on that page;";
+
if (page_is_comp(page)) {
fputs("(compact record)", stderr);
} else {
rec_print_old(stderr, page
+ mach_decode_2(rec_offs_bytes));
}
- fputs("\n"
- "InnoDB: on that page!\n", stderr);
-
- buf_page_print(page, 0);
ut_error;
}
@@ -159,7 +146,7 @@ page_dir_find_owner_slot(
/**************************************************************//**
Used to check the consistency of a directory slot.
-@return TRUE if succeed */
+@return TRUE if succeed */
static
ibool
page_dir_slot_check(
@@ -202,7 +189,6 @@ page_dir_slot_check(
/*************************************************************//**
Sets the max trx id field value. */
-UNIV_INTERN
void
page_set_max_trx_id(
/*================*/
@@ -212,9 +198,7 @@ page_set_max_trx_id(
mtr_t* mtr) /*!< in/out: mini-transaction, or NULL */
{
page_t* page = buf_block_get_frame(block);
-#ifndef UNIV_HOTBACKUP
ut_ad(!mtr || mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
-#endif /* !UNIV_HOTBACKUP */
/* It is not necessary to write this change to the redo log, as
during a database recovery we assume that the max trx id of every
@@ -225,20 +209,51 @@ page_set_max_trx_id(
page_zip_write_header(page_zip,
page + (PAGE_HEADER + PAGE_MAX_TRX_ID),
8, mtr);
-#ifndef UNIV_HOTBACKUP
} else if (mtr) {
mlog_write_ull(page + (PAGE_HEADER + PAGE_MAX_TRX_ID),
trx_id, mtr);
-#endif /* !UNIV_HOTBACKUP */
} else {
mach_write_to_8(page + (PAGE_HEADER + PAGE_MAX_TRX_ID), trx_id);
}
}
+/** Persist the AUTO_INCREMENT value on a clustered index root page.
+@param[in,out] block clustered index root page
+@param[in] index clustered index
+@param[in] autoinc next available AUTO_INCREMENT value
+@param[in,out] mtr mini-transaction
+@param[in] reset whether to reset the AUTO_INCREMENT
+ to a possibly smaller value than currently
+ exists in the page */
+void
+page_set_autoinc(
+ buf_block_t* block,
+ const dict_index_t* index MY_ATTRIBUTE((unused)),
+ ib_uint64_t autoinc,
+ mtr_t* mtr,
+ bool reset)
+{
+ ut_ad(mtr_memo_contains_flagged(
+ mtr, block, MTR_MEMO_PAGE_X_FIX | MTR_MEMO_PAGE_SX_FIX));
+ ut_ad(dict_index_is_clust(index));
+ ut_ad(index->page == block->page.id.page_no());
+ ut_ad(index->space == block->page.id.space());
+
+ byte* field = PAGE_HEADER + PAGE_ROOT_AUTO_INC
+ + buf_block_get_frame(block);
+ if (!reset && mach_read_from_8(field) >= autoinc) {
+ /* nothing to update */
+ } else if (page_zip_des_t* page_zip = buf_block_get_page_zip(block)) {
+ mach_write_to_8(field, autoinc);
+ page_zip_write_header(page_zip, field, 8, mtr);
+ } else {
+ mlog_write_ull(field, autoinc, mtr);
+ }
+}
+
/************************************************************//**
Allocates a block of memory from the heap of an index page.
-@return pointer to start of allocated buffer, or NULL if allocation fails */
-UNIV_INTERN
+@return pointer to start of allocated buffer, or NULL if allocation fails */
byte*
page_mem_alloc_heap(
/*================*/
@@ -273,7 +288,6 @@ page_mem_alloc_heap(
return(NULL);
}
-#ifndef UNIV_HOTBACKUP
/**********************************************************//**
Writes a log record of page creation. */
UNIV_INLINE
@@ -283,63 +297,67 @@ page_create_write_log(
buf_frame_t* frame, /*!< in: a buffer frame where the page is
created */
mtr_t* mtr, /*!< in: mini-transaction handle */
- ibool comp) /*!< in: TRUE=compact page format */
+ ibool comp, /*!< in: TRUE=compact page format */
+ bool is_rtree) /*!< in: whether it is R-tree */
{
- mlog_write_initial_log_record(frame, comp
- ? MLOG_COMP_PAGE_CREATE
- : MLOG_PAGE_CREATE, mtr);
-}
-#else /* !UNIV_HOTBACKUP */
-# define page_create_write_log(frame,mtr,comp) ((void) 0)
-#endif /* !UNIV_HOTBACKUP */
-
-/***********************************************************//**
-Parses a redo log record of creating a page.
-@return end of log record or NULL */
-UNIV_INTERN
-byte*
-page_parse_create(
-/*==============*/
- byte* ptr, /*!< in: buffer */
- byte* end_ptr MY_ATTRIBUTE((unused)), /*!< in: buffer end */
- ulint comp, /*!< in: nonzero=compact page format */
- buf_block_t* block, /*!< in: block or NULL */
- mtr_t* mtr) /*!< in: mtr or NULL */
-{
- ut_ad(ptr && end_ptr);
+ mlog_id_t type;
- /* The record is empty, except for the record initial part */
-
- if (block) {
- page_create(block, mtr, comp);
+ if (is_rtree) {
+ type = comp ? MLOG_COMP_PAGE_CREATE_RTREE
+ : MLOG_PAGE_CREATE_RTREE;
+ } else {
+ type = comp ? MLOG_COMP_PAGE_CREATE : MLOG_PAGE_CREATE;
}
- return(ptr);
+ mlog_write_initial_log_record(frame, type, mtr);
}
+/** The page infimum and supremum of an empty page in ROW_FORMAT=REDUNDANT */
+static const byte infimum_supremum_redundant[] = {
+ /* the infimum record */
+ 0x08/*end offset*/,
+ 0x01/*n_owned*/,
+ 0x00, 0x00/*heap_no=0*/,
+ 0x03/*n_fields=1, 1-byte offsets*/,
+ 0x00, 0x74/* pointer to supremum */,
+ 'i', 'n', 'f', 'i', 'm', 'u', 'm', 0,
+ /* the supremum record */
+ 0x09/*end offset*/,
+ 0x01/*n_owned*/,
+ 0x00, 0x08/*heap_no=1*/,
+ 0x03/*n_fields=1, 1-byte offsets*/,
+ 0x00, 0x00/* end of record list */,
+ 's', 'u', 'p', 'r', 'e', 'm', 'u', 'm', 0
+};
+
+/** The page infimum and supremum of an empty page in ROW_FORMAT=COMPACT */
+static const byte infimum_supremum_compact[] = {
+ /* the infimum record */
+ 0x01/*n_owned=1*/,
+ 0x00, 0x02/* heap_no=0, REC_STATUS_INFIMUM */,
+ 0x00, 0x0d/* pointer to supremum */,
+ 'i', 'n', 'f', 'i', 'm', 'u', 'm', 0,
+ /* the supremum record */
+ 0x01/*n_owned=1*/,
+ 0x00, 0x0b/* heap_no=1, REC_STATUS_SUPREMUM */,
+ 0x00, 0x00/* end of record list */,
+ 's', 'u', 'p', 'r', 'e', 'm', 'u', 'm'
+};
+
/**********************************************************//**
The index page creation function.
-@return pointer to the page */
+@return pointer to the page */
static
page_t*
page_create_low(
/*============*/
buf_block_t* block, /*!< in: a buffer block where the
page is created */
- ulint comp) /*!< in: nonzero=compact page format */
+ ulint comp, /*!< in: nonzero=compact page format */
+ bool is_rtree) /*!< in: if it is an R-Tree page */
{
- page_dir_slot_t* slot;
- mem_heap_t* heap;
- dtuple_t* tuple;
- dfield_t* field;
- byte* heap_top;
- rec_t* infimum_rec;
- rec_t* supremum_rec;
page_t* page;
- dict_index_t* index;
- ulint* offsets;
- ut_ad(block);
#if PAGE_BTR_IBUF_FREE_LIST + FLST_BASE_NODE_SIZE > PAGE_DATA
# error "PAGE_BTR_IBUF_FREE_LIST + FLST_BASE_NODE_SIZE > PAGE_DATA"
#endif
@@ -347,175 +365,148 @@ page_create_low(
# error "PAGE_BTR_IBUF_FREE_LIST_NODE + FLST_NODE_SIZE > PAGE_DATA"
#endif
- /* The infimum and supremum records use a dummy index. */
- if (UNIV_LIKELY(comp)) {
- index = dict_ind_compact;
- } else {
- index = dict_ind_redundant;
- }
-
- /* 1. INCREMENT MODIFY CLOCK */
buf_block_modify_clock_inc(block);
page = buf_block_get_frame(block);
- fil_page_set_type(page, FIL_PAGE_INDEX);
-
- heap = mem_heap_create(200);
-
- /* 3. CREATE THE INFIMUM AND SUPREMUM RECORDS */
-
- /* Create first a data tuple for infimum record */
- tuple = dtuple_create(heap, 1);
- dtuple_set_info_bits(tuple, REC_STATUS_INFIMUM);
- field = dtuple_get_nth_field(tuple, 0);
-
- dfield_set_data(field, "infimum", 8);
- dtype_set(dfield_get_type(field),
- DATA_VARCHAR, DATA_ENGLISH | DATA_NOT_NULL, 8);
- /* Set the corresponding physical record to its place in the page
- record heap */
-
- heap_top = page + PAGE_DATA;
-
- infimum_rec = rec_convert_dtuple_to_rec(heap_top, index, tuple, 0);
-
- if (UNIV_LIKELY(comp)) {
- ut_a(infimum_rec == page + PAGE_NEW_INFIMUM);
-
- rec_set_n_owned_new(infimum_rec, NULL, 1);
- rec_set_heap_no_new(infimum_rec, 0);
+ if (is_rtree) {
+ fil_page_set_type(page, FIL_PAGE_RTREE);
} else {
- ut_a(infimum_rec == page + PAGE_OLD_INFIMUM);
-
- rec_set_n_owned_old(infimum_rec, 1);
- rec_set_heap_no_old(infimum_rec, 0);
- }
-
- offsets = rec_get_offsets(infimum_rec, index, NULL,
- ULINT_UNDEFINED, &heap);
-
- heap_top = rec_get_end(infimum_rec, offsets);
-
- /* Create then a tuple for supremum */
-
- tuple = dtuple_create(heap, 1);
- dtuple_set_info_bits(tuple, REC_STATUS_SUPREMUM);
- field = dtuple_get_nth_field(tuple, 0);
-
- dfield_set_data(field, "supremum", comp ? 8 : 9);
- dtype_set(dfield_get_type(field),
- DATA_VARCHAR, DATA_ENGLISH | DATA_NOT_NULL, comp ? 8 : 9);
-
- supremum_rec = rec_convert_dtuple_to_rec(heap_top, index, tuple, 0);
-
- if (UNIV_LIKELY(comp)) {
- ut_a(supremum_rec == page + PAGE_NEW_SUPREMUM);
-
- rec_set_n_owned_new(supremum_rec, NULL, 1);
- rec_set_heap_no_new(supremum_rec, 1);
+ fil_page_set_type(page, FIL_PAGE_INDEX);
+ }
+
+ memset(page + PAGE_HEADER, 0, PAGE_HEADER_PRIV_END);
+ page[PAGE_HEADER + PAGE_N_DIR_SLOTS + 1] = 2;
+ page[PAGE_HEADER + PAGE_DIRECTION + 1] = PAGE_NO_DIRECTION;
+
+ if (comp) {
+ page[PAGE_HEADER + PAGE_N_HEAP] = 0x80;/*page_is_comp()*/
+ page[PAGE_HEADER + PAGE_N_HEAP + 1] = PAGE_HEAP_NO_USER_LOW;
+ page[PAGE_HEADER + PAGE_HEAP_TOP + 1] = PAGE_NEW_SUPREMUM_END;
+ memcpy(page + PAGE_DATA, infimum_supremum_compact,
+ sizeof infimum_supremum_compact);
+ memset(page
+ + PAGE_NEW_SUPREMUM_END, 0,
+ UNIV_PAGE_SIZE - PAGE_DIR - PAGE_NEW_SUPREMUM_END);
+ page[UNIV_PAGE_SIZE - PAGE_DIR - PAGE_DIR_SLOT_SIZE * 2 + 1]
+ = PAGE_NEW_SUPREMUM;
+ page[UNIV_PAGE_SIZE - PAGE_DIR - PAGE_DIR_SLOT_SIZE + 1]
+ = PAGE_NEW_INFIMUM;
} else {
- ut_a(supremum_rec == page + PAGE_OLD_SUPREMUM);
-
- rec_set_n_owned_old(supremum_rec, 1);
- rec_set_heap_no_old(supremum_rec, 1);
+ page[PAGE_HEADER + PAGE_N_HEAP + 1] = PAGE_HEAP_NO_USER_LOW;
+ page[PAGE_HEADER + PAGE_HEAP_TOP + 1] = PAGE_OLD_SUPREMUM_END;
+ memcpy(page + PAGE_DATA, infimum_supremum_redundant,
+ sizeof infimum_supremum_redundant);
+ memset(page
+ + PAGE_OLD_SUPREMUM_END, 0,
+ UNIV_PAGE_SIZE - PAGE_DIR - PAGE_OLD_SUPREMUM_END);
+ page[UNIV_PAGE_SIZE - PAGE_DIR - PAGE_DIR_SLOT_SIZE * 2 + 1]
+ = PAGE_OLD_SUPREMUM;
+ page[UNIV_PAGE_SIZE - PAGE_DIR - PAGE_DIR_SLOT_SIZE + 1]
+ = PAGE_OLD_INFIMUM;
}
- offsets = rec_get_offsets(supremum_rec, index, offsets,
- ULINT_UNDEFINED, &heap);
- heap_top = rec_get_end(supremum_rec, offsets);
-
- ut_ad(heap_top == page
- + (comp ? PAGE_NEW_SUPREMUM_END : PAGE_OLD_SUPREMUM_END));
-
- mem_heap_free(heap);
-
- /* 4. INITIALIZE THE PAGE */
-
- page_header_set_field(page, NULL, PAGE_N_DIR_SLOTS, 2);
- page_header_set_ptr(page, NULL, PAGE_HEAP_TOP, heap_top);
- page_header_set_field(page, NULL, PAGE_N_HEAP, comp
- ? 0x8000 | PAGE_HEAP_NO_USER_LOW
- : PAGE_HEAP_NO_USER_LOW);
- page_header_set_ptr(page, NULL, PAGE_FREE, NULL);
- page_header_set_field(page, NULL, PAGE_GARBAGE, 0);
- page_header_set_ptr(page, NULL, PAGE_LAST_INSERT, NULL);
- page_header_set_field(page, NULL, PAGE_DIRECTION, PAGE_NO_DIRECTION);
- page_header_set_field(page, NULL, PAGE_N_DIRECTION, 0);
- page_header_set_field(page, NULL, PAGE_N_RECS, 0);
- page_set_max_trx_id(block, NULL, 0, NULL);
- memset(heap_top, 0, UNIV_PAGE_SIZE - PAGE_EMPTY_DIR_START
- - page_offset(heap_top));
-
- /* 5. SET POINTERS IN RECORDS AND DIR SLOTS */
-
- /* Set the slots to point to infimum and supremum. */
-
- slot = page_dir_get_nth_slot(page, 0);
- page_dir_slot_set_rec(slot, infimum_rec);
-
- slot = page_dir_get_nth_slot(page, 1);
- page_dir_slot_set_rec(slot, supremum_rec);
-
- /* Set the next pointers in infimum and supremum */
+ return(page);
+}
- if (UNIV_LIKELY(comp)) {
- rec_set_next_offs_new(infimum_rec, PAGE_NEW_SUPREMUM);
- rec_set_next_offs_new(supremum_rec, 0);
- } else {
- rec_set_next_offs_old(infimum_rec, PAGE_OLD_SUPREMUM);
- rec_set_next_offs_old(supremum_rec, 0);
+/** Parses a redo log record of creating a page.
+@param[in,out] block buffer block, or NULL
+@param[in] comp nonzero=compact page format
+@param[in] is_rtree whether it is rtree page */
+void
+page_parse_create(
+ buf_block_t* block,
+ ulint comp,
+ bool is_rtree)
+{
+ if (block != NULL) {
+ page_create_low(block, comp, is_rtree);
}
-
- return(page);
}
/**********************************************************//**
-Create an uncompressed B-tree index page.
-@return pointer to the page */
-UNIV_INTERN
+Create an uncompressed B-tree or R-tree index page.
+@return pointer to the page */
page_t*
page_create(
/*========*/
buf_block_t* block, /*!< in: a buffer block where the
page is created */
mtr_t* mtr, /*!< in: mini-transaction handle */
- ulint comp) /*!< in: nonzero=compact page format */
+ ulint comp, /*!< in: nonzero=compact page format */
+ bool is_rtree) /*!< in: whether it is a R-Tree page */
{
- page_create_write_log(buf_block_get_frame(block), mtr, comp);
- return(page_create_low(block, comp));
+ ut_ad(mtr->is_named_space(block->page.id.space()));
+ page_create_write_log(buf_block_get_frame(block), mtr, comp, is_rtree);
+ return(page_create_low(block, comp, is_rtree));
}
/**********************************************************//**
Create a compressed B-tree index page.
-@return pointer to the page */
-UNIV_INTERN
+@return pointer to the page */
page_t*
page_create_zip(
/*============*/
- buf_block_t* block, /*!< in/out: a buffer frame where the
- page is created */
- dict_index_t* index, /*!< in: the index of the page */
- ulint level, /*!< in: the B-tree level of the page */
- trx_id_t max_trx_id, /*!< in: PAGE_MAX_TRX_ID */
- mtr_t* mtr) /*!< in/out: mini-transaction */
+ buf_block_t* block, /*!< in/out: a buffer frame
+ where the page is created */
+ dict_index_t* index, /*!< in: the index of the
+ page, or NULL when applying
+ TRUNCATE log
+ record during recovery */
+ ulint level, /*!< in: the B-tree level
+ of the page */
+ trx_id_t max_trx_id, /*!< in: PAGE_MAX_TRX_ID */
+ const redo_page_compress_t* page_comp_info,
+ /*!< in: used for applying
+ TRUNCATE log
+ record during recovery */
+ mtr_t* mtr) /*!< in/out: mini-transaction
+ handle */
{
- page_t* page;
- page_zip_des_t* page_zip = buf_block_get_page_zip(block);
+ page_t* page;
+ page_zip_des_t* page_zip = buf_block_get_page_zip(block);
+ bool is_spatial;
ut_ad(block);
ut_ad(page_zip);
- ut_ad(index);
- ut_ad(dict_table_is_comp(index->table));
-
- page = page_create_low(block, TRUE);
+ ut_ad(index == NULL || dict_table_is_comp(index->table));
+ is_spatial = index ? dict_index_is_spatial(index)
+ : page_comp_info->type & DICT_SPATIAL;
+
+ /* PAGE_MAX_TRX_ID or PAGE_ROOT_AUTO_INC are always 0 for
+ temporary tables. */
+ ut_ad(max_trx_id == 0 || !dict_table_is_temporary(index->table));
+ /* In secondary indexes and the change buffer, PAGE_MAX_TRX_ID
+ must be zero on non-leaf pages. max_trx_id can be 0 when the
+ index consists of an empty root (leaf) page. */
+ ut_ad(max_trx_id == 0
+ || level == 0
+ || !dict_index_is_sec_or_ibuf(index)
+ || dict_table_is_temporary(index->table));
+ /* In the clustered index, PAGE_ROOT_AUTOINC or
+ PAGE_MAX_TRX_ID must be 0 on other pages than the root. */
+ ut_ad(level == 0 || max_trx_id == 0
+ || !dict_index_is_sec_or_ibuf(index)
+ || dict_table_is_temporary(index->table));
+
+ page = page_create_low(block, TRUE, is_spatial);
mach_write_to_2(PAGE_HEADER + PAGE_LEVEL + page, level);
mach_write_to_8(PAGE_HEADER + PAGE_MAX_TRX_ID + page, max_trx_id);
- if (!page_zip_compress(page_zip, page, index,
- page_zip_level, mtr)) {
- /* The compression of a newly created page
- should always succeed. */
+ if (truncate_t::s_fix_up_active) {
+ /* Compress the index page created when applying
+ TRUNCATE log during recovery */
+ if (!page_zip_compress(page_zip, page, index, page_zip_level,
+ page_comp_info, NULL)) {
+ /* The compression of a newly created
+ page should always succeed. */
+ ut_error;
+ }
+
+ } else if (!page_zip_compress(page_zip, page, index,
+ page_zip_level, NULL, mtr)) {
+ /* The compression of a newly created
+ page should always succeed. */
ut_error;
}
@@ -524,7 +515,6 @@ page_create_zip(
/**********************************************************//**
Empty a previously created B-tree index page. */
-UNIV_INTERN
void
page_create_empty(
/*==============*/
@@ -532,27 +522,42 @@ page_create_empty(
dict_index_t* index, /*!< in: the index of the page */
mtr_t* mtr) /*!< in/out: mini-transaction */
{
- trx_id_t max_trx_id = 0;
- const page_t* page = buf_block_get_frame(block);
+ trx_id_t max_trx_id;
+ page_t* page = buf_block_get_frame(block);
page_zip_des_t* page_zip= buf_block_get_page_zip(block);
- ut_ad(fil_page_get_type(page) == FIL_PAGE_INDEX);
+ ut_ad(fil_page_index_page_check(page));
+ ut_ad(!index->is_dummy);
+ ut_ad(block->page.id.space() == index->space);
- if (dict_index_is_sec_or_ibuf(index) && page_is_leaf(page)) {
+ /* Multiple transactions cannot simultaneously operate on the
+ same temp-table in parallel.
+ max_trx_id is ignored for temp tables because it not required
+ for MVCC. */
+ if (dict_index_is_sec_or_ibuf(index)
+ && !dict_table_is_temporary(index->table)
+ && page_is_leaf(page)) {
max_trx_id = page_get_max_trx_id(page);
ut_ad(max_trx_id);
+ } else if (block->page.id.page_no() == index->page) {
+ /* Preserve PAGE_ROOT_AUTO_INC. */
+ max_trx_id = page_get_max_trx_id(page);
+ } else {
+ max_trx_id = 0;
}
if (page_zip) {
+ ut_ad(!dict_table_is_temporary(index->table));
page_create_zip(block, index,
page_header_get_field(page, PAGE_LEVEL),
- max_trx_id, mtr);
+ max_trx_id, NULL, mtr);
} else {
- page_create(block, mtr, page_is_comp(page));
+ page_create(block, mtr, page_is_comp(page),
+ dict_index_is_spatial(index));
if (max_trx_id) {
- page_update_max_trx_id(
- block, page_zip, max_trx_id, mtr);
+ mlog_write_ull(PAGE_HEADER + PAGE_MAX_TRX_ID + page,
+ max_trx_id, mtr);
}
}
}
@@ -565,7 +570,6 @@ IMPORTANT: The caller will have to update IBUF_BITMAP_FREE
if new_block is a compressed leaf page in a secondary index.
This has to be done either within the same mini-transaction,
or by invoking ibuf_reset_free_bits() before mtr_commit(). */
-UNIV_INTERN
void
page_copy_rec_list_end_no_locks(
/*============================*/
@@ -579,8 +583,8 @@ page_copy_rec_list_end_no_locks(
page_cur_t cur1;
rec_t* cur2;
mem_heap_t* heap = NULL;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- ulint* offsets = offsets_;
+ offset_t offsets_[REC_OFFS_NORMAL_SIZE];
+ offset_t* offsets = offsets_;
rec_offs_init(offsets_);
page_cur_position(rec, block, &cur1);
@@ -594,36 +598,27 @@ page_copy_rec_list_end_no_locks(
ut_a(page_is_comp(new_page) == page_rec_is_comp(rec));
ut_a(mach_read_from_2(new_page + UNIV_PAGE_SIZE - 10) == (ulint)
(page_is_comp(new_page) ? PAGE_NEW_INFIMUM : PAGE_OLD_INFIMUM));
+ ut_d(const bool is_leaf = page_is_leaf(block->frame));
cur2 = page_get_infimum_rec(buf_block_get_frame(new_block));
/* Copy records from the original page to the new page */
while (!page_cur_is_after_last(&cur1)) {
- rec_t* cur1_rec = page_cur_get_rec(&cur1);
rec_t* ins_rec;
- offsets = rec_get_offsets(cur1_rec, index, offsets,
+ offsets = rec_get_offsets(cur1.rec, index, offsets, is_leaf,
ULINT_UNDEFINED, &heap);
ins_rec = page_cur_insert_rec_low(cur2, index,
- cur1_rec, offsets, mtr);
+ cur1.rec, offsets, mtr);
if (UNIV_UNLIKELY(!ins_rec)) {
- /* Track an assertion failure reported on the mailing
- list on June 18th, 2003 */
-
- buf_page_print(new_page, 0);
- buf_page_print(page_align(rec), 0);
- ut_print_timestamp(stderr);
-
- fprintf(stderr,
- "InnoDB: rec offset %lu, cur1 offset %lu,"
- " cur2 offset %lu\n",
- (ulong) page_offset(rec),
- (ulong) page_offset(page_cur_get_rec(&cur1)),
- (ulong) page_offset(cur2));
- ut_error;
+ ib::fatal() << "Rec offset " << page_offset(rec)
+ << ", cur1 offset " << page_offset(cur1.rec)
+ << ", cur2 offset " << page_offset(cur2);
}
page_cur_move_to_next(&cur1);
+ ut_ad(!(rec_get_info_bits(cur1.rec, page_is_comp(new_page))
+ & REC_INFO_MIN_REC_FLAG));
cur2 = ins_rec;
}
@@ -632,7 +627,6 @@ page_copy_rec_list_end_no_locks(
}
}
-#ifndef UNIV_HOTBACKUP
/*************************************************************//**
Copies records from page to new_page, from a given record onward,
including that record. Infimum and supremum records are not copied.
@@ -645,7 +639,6 @@ or by invoking ibuf_reset_free_bits() before mtr_commit().
@return pointer to the original successor of the infimum record on
new_page, or NULL on zip overflow (new_block will be decompressed) */
-UNIV_INTERN
rec_t*
page_copy_rec_list_end(
/*===================*/
@@ -660,7 +653,9 @@ page_copy_rec_list_end(
page_t* page = page_align(rec);
rec_t* ret = page_rec_get_next(
page_get_infimum_rec(new_page));
- ulint log_mode = 0; /* remove warning */
+ ulint num_moved = 0;
+ rtr_rec_move_t* rec_move = NULL;
+ mem_heap_t* heap = NULL;
#ifdef UNIV_ZIP_DEBUG
if (new_page_zip) {
@@ -680,6 +675,8 @@ page_copy_rec_list_end(
/* Here, "ret" may be pointing to a user record or the
predefined supremum record. */
+ mtr_log_t log_mode = MTR_LOG_NONE;
+
if (new_page_zip) {
log_mode = mtr_set_log_mode(mtr, MTR_LOG_NONE);
}
@@ -688,14 +685,39 @@ page_copy_rec_list_end(
page_copy_rec_list_end_to_created_page(new_page, rec,
index, mtr);
} else {
- page_copy_rec_list_end_no_locks(new_block, block, rec,
- index, mtr);
+ if (dict_index_is_spatial(index)) {
+ ulint max_to_move = page_get_n_recs(
+ buf_block_get_frame(block));
+ heap = mem_heap_create(256);
+
+ rec_move = static_cast<rtr_rec_move_t*>(mem_heap_alloc(
+ heap,
+ sizeof (*rec_move) * max_to_move));
+
+ /* For spatial index, we need to insert recs one by one
+ to keep recs ordered. */
+ rtr_page_copy_rec_list_end_no_locks(new_block,
+ block, rec, index,
+ heap, rec_move,
+ max_to_move,
+ &num_moved,
+ mtr);
+ } else {
+ page_copy_rec_list_end_no_locks(new_block, block, rec,
+ index, mtr);
+ }
}
/* Update PAGE_MAX_TRX_ID on the uncompressed page.
Modifications will be redo logged and copied to the compressed
- page in page_zip_compress() or page_zip_reorganize() below. */
- if (dict_index_is_sec_or_ibuf(index) && page_is_leaf(page)) {
+ page in page_zip_compress() or page_zip_reorganize() below.
+ Multiple transactions cannot simultaneously operate on the
+ same temp-table in parallel.
+ max_trx_id is ignored for temp tables because it not required
+ for MVCC. */
+ if (dict_index_is_sec_or_ibuf(index)
+ && page_is_leaf(page)
+ && !dict_table_is_temporary(index->table)) {
page_update_max_trx_id(new_block, NULL,
page_get_max_trx_id(page), mtr);
}
@@ -703,8 +725,11 @@ page_copy_rec_list_end(
if (new_page_zip) {
mtr_set_log_mode(mtr, log_mode);
- if (!page_zip_compress(new_page_zip, new_page,
- index, page_zip_level, mtr)) {
+ if (!page_zip_compress(new_page_zip,
+ new_page,
+ index,
+ page_zip_level,
+ NULL, mtr)) {
/* Before trying to reorganize the page,
store the number of preceding records on the page. */
ulint ret_pos
@@ -718,15 +743,16 @@ page_copy_rec_list_end(
if (!page_zip_reorganize(new_block, index, mtr)) {
- btr_blob_dbg_remove(new_page, index,
- "copy_end_reorg_fail");
if (!page_zip_decompress(new_page_zip,
new_page, FALSE)) {
ut_error;
}
ut_ad(page_validate(new_page, index));
- btr_blob_dbg_add(new_page, index,
- "copy_end_reorg_fail");
+
+ if (heap) {
+ mem_heap_free(heap);
+ }
+
return(NULL);
} else {
/* The page was reorganized:
@@ -742,7 +768,15 @@ page_copy_rec_list_end(
/* Update the lock table and possible hash index */
- lock_move_rec_list_end(new_block, block, rec);
+ if (dict_index_is_spatial(index) && rec_move) {
+ lock_rtr_move_rec_list(new_block, block, rec_move, num_moved);
+ } else if (!dict_table_is_locking_disabled(index->table)) {
+ lock_move_rec_list_end(new_block, block, rec);
+ }
+
+ if (heap) {
+ mem_heap_free(heap);
+ }
btr_search_move_or_delete_hash_entries(new_block, block, index);
@@ -761,7 +795,6 @@ or by invoking ibuf_reset_free_bits() before mtr_commit().
@return pointer to the original predecessor of the supremum record on
new_page, or NULL on zip overflow (new_block will be decompressed) */
-UNIV_INTERN
rec_t*
page_copy_rec_list_start(
/*=====================*/
@@ -771,26 +804,30 @@ page_copy_rec_list_start(
dict_index_t* index, /*!< in: record descriptor */
mtr_t* mtr) /*!< in: mtr */
{
+ ut_ad(page_align(rec) == block->frame);
+
page_t* new_page = buf_block_get_frame(new_block);
page_zip_des_t* new_page_zip = buf_block_get_page_zip(new_block);
page_cur_t cur1;
rec_t* cur2;
- ulint log_mode = 0 /* remove warning */;
mem_heap_t* heap = NULL;
+ ulint num_moved = 0;
+ rtr_rec_move_t* rec_move = NULL;
rec_t* ret
= page_rec_get_prev(page_get_supremum_rec(new_page));
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- ulint* offsets = offsets_;
+ offset_t offsets_[REC_OFFS_NORMAL_SIZE];
+ offset_t* offsets = offsets_;
rec_offs_init(offsets_);
/* Here, "ret" may be pointing to a user record or the
predefined infimum record. */
if (page_rec_is_infimum(rec)) {
-
return(ret);
}
+ mtr_log_t log_mode = MTR_LOG_NONE;
+
if (new_page_zip) {
log_mode = mtr_set_log_mode(mtr, MTR_LOG_NONE);
}
@@ -800,28 +837,49 @@ page_copy_rec_list_start(
cur2 = ret;
- /* Copy records from the original page to the new page */
-
- while (page_cur_get_rec(&cur1) != rec) {
- rec_t* cur1_rec = page_cur_get_rec(&cur1);
- offsets = rec_get_offsets(cur1_rec, index, offsets,
- ULINT_UNDEFINED, &heap);
- cur2 = page_cur_insert_rec_low(cur2, index,
- cur1_rec, offsets, mtr);
- ut_a(cur2);
-
- page_cur_move_to_next(&cur1);
- }
+ const bool is_leaf = page_rec_is_leaf(rec);
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
+ /* Copy records from the original page to the new page */
+ if (dict_index_is_spatial(index)) {
+ ulint max_to_move = page_get_n_recs(
+ buf_block_get_frame(block));
+ heap = mem_heap_create(256);
+
+ rec_move = static_cast<rtr_rec_move_t*>(mem_heap_alloc(
+ heap,
+ sizeof (*rec_move) * max_to_move));
+
+ /* For spatial index, we need to insert recs one by one
+ to keep recs ordered. */
+ rtr_page_copy_rec_list_start_no_locks(new_block,
+ block, rec, index, heap,
+ rec_move, max_to_move,
+ &num_moved, mtr);
+ } else {
+ while (page_cur_get_rec(&cur1) != rec) {
+ offsets = rec_get_offsets(cur1.rec, index, offsets,
+ is_leaf,
+ ULINT_UNDEFINED, &heap);
+ cur2 = page_cur_insert_rec_low(cur2, index,
+ cur1.rec, offsets, mtr);
+ ut_a(cur2);
+
+ page_cur_move_to_next(&cur1);
+ ut_ad(!(rec_get_info_bits(cur1.rec,
+ page_is_comp(new_page))
+ & REC_INFO_MIN_REC_FLAG));
+ }
}
/* Update PAGE_MAX_TRX_ID on the uncompressed page.
Modifications will be redo logged and copied to the compressed
- page in page_zip_compress() or page_zip_reorganize() below. */
- if (dict_index_is_sec_or_ibuf(index)
- && page_is_leaf(page_align(rec))) {
+ page in page_zip_compress() or page_zip_reorganize() below.
+ Multiple transactions cannot simultaneously operate on the
+ same temp-table in parallel.
+ max_trx_id is ignored for temp tables because it not required
+ for MVCC. */
+ if (is_leaf && dict_index_is_sec_or_ibuf(index)
+ && !dict_table_is_temporary(index->table)) {
page_update_max_trx_id(new_block, NULL,
page_get_max_trx_id(page_align(rec)),
mtr);
@@ -834,8 +892,7 @@ page_copy_rec_list_start(
goto zip_reorganize;);
if (!page_zip_compress(new_page_zip, new_page, index,
- page_zip_level, mtr)) {
-
+ page_zip_level, NULL, mtr)) {
ulint ret_pos;
#ifndef DBUG_OFF
zip_reorganize:
@@ -852,16 +909,17 @@ zip_reorganize:
if (UNIV_UNLIKELY
(!page_zip_reorganize(new_block, index, mtr))) {
- btr_blob_dbg_remove(new_page, index,
- "copy_start_reorg_fail");
if (UNIV_UNLIKELY
(!page_zip_decompress(new_page_zip,
new_page, FALSE))) {
ut_error;
}
ut_ad(page_validate(new_page, index));
- btr_blob_dbg_add(new_page, index,
- "copy_start_reorg_fail");
+
+ if (UNIV_LIKELY_NULL(heap)) {
+ mem_heap_free(heap);
+ }
+
return(NULL);
}
@@ -872,7 +930,15 @@ zip_reorganize:
/* Update the lock table and possible hash index */
- lock_move_rec_list_start(new_block, block, rec, ret);
+ if (dict_index_is_spatial(index)) {
+ lock_rtr_move_rec_list(new_block, block, rec_move, num_moved);
+ } else if (!dict_table_is_locking_disabled(index->table)) {
+ lock_move_rec_list_start(new_block, block, rec, ret);
+ }
+
+ if (heap) {
+ mem_heap_free(heap);
+ }
btr_search_move_or_delete_hash_entries(new_block, block, index);
@@ -887,7 +953,7 @@ page_delete_rec_list_write_log(
/*===========================*/
rec_t* rec, /*!< in: record on page */
dict_index_t* index, /*!< in: record descriptor */
- byte type, /*!< in: operation type:
+ mlog_id_t type, /*!< in: operation type:
MLOG_LIST_END_DELETE, ... */
mtr_t* mtr) /*!< in: mtr */
{
@@ -904,18 +970,14 @@ page_delete_rec_list_write_log(
mlog_close(mtr, log_ptr + 2);
}
}
-#else /* !UNIV_HOTBACKUP */
-# define page_delete_rec_list_write_log(rec,index,type,mtr) ((void) 0)
-#endif /* !UNIV_HOTBACKUP */
/**********************************************************//**
Parses a log record of a record list end or start deletion.
-@return end of log record or NULL */
-UNIV_INTERN
+@return end of log record or NULL */
byte*
page_parse_delete_rec_list(
/*=======================*/
- byte type, /*!< in: MLOG_LIST_END_DELETE,
+ mlog_id_t type, /*!< in: MLOG_LIST_END_DELETE,
MLOG_LIST_START_DELETE,
MLOG_COMP_LIST_END_DELETE or
MLOG_COMP_LIST_START_DELETE */
@@ -967,7 +1029,6 @@ page_parse_delete_rec_list(
/*************************************************************//**
Deletes records from a page from a given record onward, including that record.
The infimum and supremum records are not deleted. */
-UNIV_INTERN
void
page_delete_rec_list_end(
/*=====================*/
@@ -989,8 +1050,8 @@ page_delete_rec_list_end(
page_zip_des_t* page_zip = buf_block_get_page_zip(block);
page_t* page = page_align(rec);
mem_heap_t* heap = NULL;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- ulint* offsets = offsets_;
+ offset_t offsets_[REC_OFFS_NORMAL_SIZE];
+ offset_t* offsets = offsets_;
rec_offs_init(offsets_);
ut_ad(size == ULINT_UNDEFINED || size < UNIV_PAGE_SIZE);
@@ -1047,8 +1108,10 @@ delete_all:
? MLOG_COMP_LIST_END_DELETE
: MLOG_LIST_END_DELETE, mtr);
+ ut_d(const bool is_leaf = page_is_leaf(page));
+
if (page_zip) {
- ulint log_mode;
+ mtr_log_t log_mode;
ut_a(page_is_comp(page));
/* Individual deletes are not logged */
@@ -1059,7 +1122,7 @@ delete_all:
page_cur_t cur;
page_cur_position(rec, block, &cur);
- offsets = rec_get_offsets(rec, index, offsets,
+ offsets = rec_get_offsets(rec, index, offsets, is_leaf,
ULINT_UNDEFINED, &heap);
rec = rec_get_next_ptr(rec, TRUE);
#ifdef UNIV_ZIP_DEBUG
@@ -1093,6 +1156,7 @@ delete_all:
do {
ulint s;
offsets = rec_get_offsets(rec2, index, offsets,
+ is_leaf,
ULINT_UNDEFINED, &heap);
s = rec_offs_size(offsets);
ut_ad(rec2 - page + s - rec_offs_extra_size(offsets)
@@ -1103,8 +1167,7 @@ delete_all:
if (scrub) {
/* scrub record */
- uint recsize = rec_offs_data_size(offsets);
- memset(rec2, 0, recsize);
+ memset(rec2, 0, rec_offs_data_size(offsets));
}
rec2 = page_rec_get_next(rec2);
@@ -1163,9 +1226,6 @@ delete_all:
/* Remove the record chain segment from the record chain */
page_rec_set_next(prev_rec, page_get_supremum_rec(page));
- btr_blob_dbg_op(page, rec, index, "delete_end",
- btr_blob_dbg_remove_rec);
-
/* Catenate the deleted chain segment to the page free list */
page_rec_set_next(last_rec, page_header_get_ptr(page, PAGE_FREE));
@@ -1174,6 +1234,7 @@ delete_all:
page_header_set_field(page, NULL, PAGE_GARBAGE, size
+ page_header_get_field(page, PAGE_GARBAGE));
+ ut_ad(page_get_n_recs(page) > n_recs);
page_header_set_field(page, NULL, PAGE_N_RECS,
(ulint)(page_get_n_recs(page) - n_recs));
}
@@ -1181,7 +1242,6 @@ delete_all:
/*************************************************************//**
Deletes records from page, up to the given record, NOT including
that record. Infimum and supremum records are not deleted. */
-UNIV_INTERN
void
page_delete_rec_list_start(
/*=======================*/
@@ -1191,14 +1251,13 @@ page_delete_rec_list_start(
mtr_t* mtr) /*!< in: mtr */
{
page_cur_t cur1;
- ulint log_mode;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- ulint* offsets = offsets_;
+ offset_t offsets_[REC_OFFS_NORMAL_SIZE];
+ offset_t* offsets = offsets_;
mem_heap_t* heap = NULL;
- byte type;
rec_offs_init(offsets_);
+ ut_ad(page_align(rec) == block->frame);
ut_ad((ibool) !!page_rec_is_comp(rec)
== dict_table_is_comp(index->table));
#ifdef UNIV_ZIP_DEBUG
@@ -1226,6 +1285,8 @@ page_delete_rec_list_start(
return;
}
+ mlog_id_t type;
+
if (page_rec_is_comp(rec)) {
type = MLOG_COMP_LIST_START_DELETE;
} else {
@@ -1239,11 +1300,13 @@ page_delete_rec_list_start(
/* Individual deletes are not logged */
- log_mode = mtr_set_log_mode(mtr, MTR_LOG_NONE);
+ mtr_log_t log_mode = mtr_set_log_mode(mtr, MTR_LOG_NONE);
+ ut_d(const bool is_leaf = page_rec_is_leaf(rec));
while (page_cur_get_rec(&cur1) != rec) {
offsets = rec_get_offsets(page_cur_get_rec(&cur1), index,
- offsets, ULINT_UNDEFINED, &heap);
+ offsets, is_leaf,
+ ULINT_UNDEFINED, &heap);
page_cur_delete_rec(&cur1, index, offsets, mtr);
}
@@ -1256,7 +1319,6 @@ page_delete_rec_list_start(
mtr_set_log_mode(mtr, log_mode);
}
-#ifndef UNIV_HOTBACKUP
/*************************************************************//**
Moves record list end to another page. Moved records include
split_rec.
@@ -1268,7 +1330,6 @@ or by invoking ibuf_reset_free_bits() before mtr_commit().
@return TRUE on success; FALSE on compression failure (new_block will
be decompressed) */
-UNIV_INTERN
ibool
page_move_rec_list_end(
/*===================*/
@@ -1284,6 +1345,8 @@ page_move_rec_list_end(
ulint old_n_recs;
ulint new_n_recs;
+ ut_ad(!dict_index_is_spatial(index));
+
old_data_size = page_get_data_size(new_page);
old_n_recs = page_get_n_recs(new_page);
#ifdef UNIV_ZIP_DEBUG
@@ -1327,8 +1390,7 @@ if new_block is a compressed leaf page in a secondary index.
This has to be done either within the same mini-transaction,
or by invoking ibuf_reset_free_bits() before mtr_commit().
-@return TRUE on success; FALSE on compression failure */
-UNIV_INTERN
+@return TRUE on success; FALSE on compression failure */
ibool
page_move_rec_list_start(
/*=====================*/
@@ -1347,7 +1409,6 @@ page_move_rec_list_start(
return(TRUE);
}
-#endif /* !UNIV_HOTBACKUP */
/**************************************************************//**
Used to delete n slots from the directory. This function updates
@@ -1429,7 +1490,6 @@ page_dir_add_slot(
/****************************************************************//**
Splits a directory slot which owns too many records. */
-UNIV_INTERN
void
page_dir_split_slot(
/*================*/
@@ -1491,7 +1551,6 @@ page_dir_split_slot(
Tries to balance the given directory slot with too few records with the upper
neighbor, so that there are at least the minimum number of records owned by
the slot; this may result in the merging of two slots. */
-UNIV_INTERN
void
page_dir_balance_slot(
/*==================*/
@@ -1514,7 +1573,7 @@ page_dir_balance_slot(
/* The last directory slot cannot be balanced with the upper
neighbor, as there is none. */
- if (UNIV_UNLIKELY(slot_no == page_dir_get_n_slots(page) - 1)) {
+ if (UNIV_UNLIKELY(slot_no + 1 == page_dir_get_n_slots(page))) {
return;
}
@@ -1560,8 +1619,7 @@ page_dir_balance_slot(
/************************************************************//**
Returns the nth record of the record list.
This is the inverse function of page_rec_get_n_recs_before().
-@return nth record */
-UNIV_INTERN
+@return nth record */
const rec_t*
page_rec_get_nth_const(
/*===================*/
@@ -1613,8 +1671,7 @@ page_rec_get_nth_const(
/***************************************************************//**
Returns the number of records before the given record in chain.
The number includes infimum and supremum records.
-@return number of records */
-UNIV_INTERN
+@return number of records */
ulint
page_rec_get_n_recs_before(
/*=======================*/
@@ -1675,42 +1732,35 @@ page_rec_get_n_recs_before(
return((ulint) n);
}
-#ifndef UNIV_HOTBACKUP
/************************************************************//**
Prints record contents including the data relevant only in
the index page context. */
-UNIV_INTERN
void
page_rec_print(
/*===========*/
const rec_t* rec, /*!< in: physical record */
- const ulint* offsets)/*!< in: record descriptor */
+ const offset_t* offsets)/*!< in: record descriptor */
{
ut_a(!page_rec_is_comp(rec) == !rec_offs_comp(offsets));
rec_print_new(stderr, rec, offsets);
if (page_rec_is_comp(rec)) {
- fprintf(stderr,
- " n_owned: %lu; heap_no: %lu; next rec: %lu\n",
- (ulong) rec_get_n_owned_new(rec),
- (ulong) rec_get_heap_no_new(rec),
- (ulong) rec_get_next_offs(rec, TRUE));
+ ib::info() << "n_owned: " << rec_get_n_owned_new(rec)
+ << "; heap_no: " << rec_get_heap_no_new(rec)
+ << "; next rec: " << rec_get_next_offs(rec, TRUE);
} else {
- fprintf(stderr,
- " n_owned: %lu; heap_no: %lu; next rec: %lu\n",
- (ulong) rec_get_n_owned_old(rec),
- (ulong) rec_get_heap_no_old(rec),
- (ulong) rec_get_next_offs(rec, FALSE));
+ ib::info() << "n_owned: " << rec_get_n_owned_old(rec)
+ << "; heap_no: " << rec_get_heap_no_old(rec)
+ << "; next rec: " << rec_get_next_offs(rec, FALSE);
}
page_rec_check(rec);
rec_validate(rec, offsets);
}
-# ifdef UNIV_BTR_PRINT
+#ifdef UNIV_BTR_PRINT
/***************************************************************//**
This is used to print the contents of the directory for
debugging purposes. */
-UNIV_INTERN
void
page_dir_print(
/*===========*/
@@ -1752,7 +1802,6 @@ page_dir_print(
/***************************************************************//**
This is used to print the contents of the page record list for
debugging purposes. */
-UNIV_INTERN
void
page_print_list(
/*============*/
@@ -1765,13 +1814,13 @@ page_print_list(
ulint count;
ulint n_recs;
mem_heap_t* heap = NULL;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- ulint* offsets = offsets_;
+ offset_t offsets_[REC_OFFS_NORMAL_SIZE];
+ offset_t* offsets = offsets_;
rec_offs_init(offsets_);
ut_a((ibool)!!page_is_comp(page) == dict_table_is_comp(index->table));
- fprintf(stderr,
+ fprint(stderr,
"--------------------------------\n"
"PAGE RECORD LIST\n"
"Page address %p\n", page);
@@ -1822,7 +1871,6 @@ page_print_list(
/***************************************************************//**
Prints the info in a page header. */
-UNIV_INTERN
void
page_header_print(
/*==============*/
@@ -1850,7 +1898,6 @@ page_header_print(
/***************************************************************//**
This is used to print the contents of the page for
debugging purposes. */
-UNIV_INTERN
void
page_print(
/*=======*/
@@ -1867,20 +1914,18 @@ page_print(
page_dir_print(page, dn);
page_print_list(block, index, rn);
}
-# endif /* UNIV_BTR_PRINT */
-#endif /* !UNIV_HOTBACKUP */
+#endif /* UNIV_BTR_PRINT */
/***************************************************************//**
The following is used to validate a record on a page. This function
differs from rec_validate as it can also check the n_owned field and
the heap_no field.
-@return TRUE if ok */
-UNIV_INTERN
+@return TRUE if ok */
ibool
page_rec_validate(
/*==============*/
const rec_t* rec, /*!< in: physical record */
- const ulint* offsets)/*!< in: array returned by rec_get_offsets() */
+ const offset_t* offsets)/*!< in: array returned by rec_get_offsets() */
{
ulint n_owned;
ulint heap_no;
@@ -1901,29 +1946,26 @@ page_rec_validate(
}
if (UNIV_UNLIKELY(!(n_owned <= PAGE_DIR_SLOT_MAX_N_OWNED))) {
- fprintf(stderr,
- "InnoDB: Dir slot of rec %lu, n owned too big %lu\n",
- (ulong) page_offset(rec), (ulong) n_owned);
+ ib::warn() << "Dir slot of rec " << page_offset(rec)
+ << ", n owned too big " << n_owned;
return(FALSE);
}
if (UNIV_UNLIKELY(!(heap_no < page_dir_get_n_heap(page)))) {
- fprintf(stderr,
- "InnoDB: Heap no of rec %lu too big %lu %lu\n",
- (ulong) page_offset(rec), (ulong) heap_no,
- (ulong) page_dir_get_n_heap(page));
+ ib::warn() << "Heap no of rec " << page_offset(rec)
+ << " too big " << heap_no << " "
+ << page_dir_get_n_heap(page);
return(FALSE);
}
return(TRUE);
}
-#ifndef UNIV_HOTBACKUP
+#ifdef UNIV_DEBUG
/***************************************************************//**
Checks that the first directory slot points to the infimum record and
the last to the supremum. This function is intended to track if the
bug fixed in 4.0.14 has caused corruption to users' databases. */
-UNIV_INTERN
void
page_check_dir(
/*===========*/
@@ -1940,30 +1982,23 @@ page_check_dir(
if (UNIV_UNLIKELY(!page_rec_is_infimum_low(infimum_offs))) {
- fprintf(stderr,
- "InnoDB: Page directory corruption:"
- " infimum not pointed to\n");
- buf_page_print(page, 0);
- ut_ad(0);
+ ib::fatal() << "Page directory corruption: infimum not"
+ " pointed to";
}
if (UNIV_UNLIKELY(!page_rec_is_supremum_low(supremum_offs))) {
- fprintf(stderr,
- "InnoDB: Page directory corruption:"
- " supremum not pointed to\n");
- buf_page_print(page, 0);
- ut_ad(0);
+ ib::fatal() << "Page directory corruption: supremum not"
+ " pointed to";
}
}
-#endif /* !UNIV_HOTBACKUP */
+#endif /* UNIV_DEBUG */
/***************************************************************//**
This function checks the consistency of an index page when we do not
know the index. This is also resilient so that this should never crash
even if the page is total garbage.
-@return TRUE if ok */
-UNIV_INTERN
+@return TRUE if ok */
ibool
page_simple_validate_old(
/*=====================*/
@@ -1986,9 +2021,8 @@ page_simple_validate_old(
n_slots = page_dir_get_n_slots(page);
if (UNIV_UNLIKELY(n_slots > UNIV_PAGE_SIZE / 4)) {
- fprintf(stderr,
- "InnoDB: Nonsensical number %lu of page dir slots\n",
- (ulong) n_slots);
+ ib::error() << "Nonsensical number " << n_slots
+ << " of page dir slots";
goto func_exit;
}
@@ -1997,13 +2031,12 @@ page_simple_validate_old(
if (UNIV_UNLIKELY(rec_heap_top
> page_dir_get_nth_slot(page, n_slots - 1))) {
-
- fprintf(stderr,
- "InnoDB: Record heap and dir overlap on a page,"
- " heap top %lu, dir %lu\n",
- (ulong) page_header_get_field(page, PAGE_HEAP_TOP),
- (ulong)
- page_offset(page_dir_get_nth_slot(page, n_slots - 1)));
+ ib::error()
+ << "Record heap and dir overlap on a page, heap top "
+ << page_header_get_field(page, PAGE_HEAP_TOP)
+ << ", dir "
+ << page_offset(page_dir_get_nth_slot(page,
+ n_slots - 1));
goto func_exit;
}
@@ -2020,11 +2053,9 @@ page_simple_validate_old(
for (;;) {
if (UNIV_UNLIKELY(rec > rec_heap_top)) {
- fprintf(stderr,
- "InnoDB: Record %lu is above"
- " rec heap top %lu\n",
- (ulong)(rec - page),
- (ulong)(rec_heap_top - page));
+ ib::error() << "Record " << (rec - page)
+ << " is above rec heap top "
+ << (rec_heap_top - page);
goto func_exit;
}
@@ -2034,22 +2065,18 @@ page_simple_validate_old(
if (UNIV_UNLIKELY(rec_get_n_owned_old(rec)
!= own_count)) {
- fprintf(stderr,
- "InnoDB: Wrong owned count %lu, %lu,"
- " rec %lu\n",
- (ulong) rec_get_n_owned_old(rec),
- (ulong) own_count,
- (ulong)(rec - page));
+ ib::error() << "Wrong owned count "
+ << rec_get_n_owned_old(rec)
+ << ", " << own_count << ", rec "
+ << (rec - page);
goto func_exit;
}
if (UNIV_UNLIKELY
(page_dir_slot_get_rec(slot) != rec)) {
- fprintf(stderr,
- "InnoDB: Dir slot does not point"
- " to right rec %lu\n",
- (ulong)(rec - page));
+ ib::error() << "Dir slot does not point"
+ " to right rec " << (rec - page);
goto func_exit;
}
@@ -2070,11 +2097,10 @@ page_simple_validate_old(
if (UNIV_UNLIKELY
(rec_get_next_offs(rec, FALSE) < FIL_PAGE_DATA
|| rec_get_next_offs(rec, FALSE) >= UNIV_PAGE_SIZE)) {
- fprintf(stderr,
- "InnoDB: Next record offset"
- " nonsensical %lu for rec %lu\n",
- (ulong) rec_get_next_offs(rec, FALSE),
- (ulong) (rec - page));
+
+ ib::error() << "Next record offset nonsensical "
+ << rec_get_next_offs(rec, FALSE) << " for rec "
+ << (rec - page);
goto func_exit;
}
@@ -2082,10 +2108,8 @@ page_simple_validate_old(
count++;
if (UNIV_UNLIKELY(count > UNIV_PAGE_SIZE)) {
- fprintf(stderr,
- "InnoDB: Page record list appears"
- " to be circular %lu\n",
- (ulong) count);
+ ib::error() << "Page record list appears"
+ " to be circular " << count;
goto func_exit;
}
@@ -2094,24 +2118,23 @@ page_simple_validate_old(
}
if (UNIV_UNLIKELY(rec_get_n_owned_old(rec) == 0)) {
- fprintf(stderr, "InnoDB: n owned is zero in a supremum rec\n");
+ ib::error() << "n owned is zero in a supremum rec";
goto func_exit;
}
if (UNIV_UNLIKELY(slot_no != n_slots - 1)) {
- fprintf(stderr, "InnoDB: n slots wrong %lu, %lu\n",
- (ulong) slot_no, (ulong) (n_slots - 1));
+ ib::error() << "n slots wrong "
+ << slot_no << ", " << (n_slots - 1);
goto func_exit;
}
- if (UNIV_UNLIKELY(page_header_get_field(page, PAGE_N_RECS)
+ if (UNIV_UNLIKELY(ulint(page_header_get_field(page, PAGE_N_RECS))
+ PAGE_HEAP_NO_USER_LOW
!= count + 1)) {
- fprintf(stderr, "InnoDB: n recs wrong %lu %lu\n",
- (ulong) page_header_get_field(page, PAGE_N_RECS)
- + PAGE_HEAP_NO_USER_LOW,
- (ulong) (count + 1));
+ ib::error() << "n recs wrong "
+ << page_header_get_field(page, PAGE_N_RECS)
+ + PAGE_HEAP_NO_USER_LOW << " " << (count + 1);
goto func_exit;
}
@@ -2122,20 +2145,16 @@ page_simple_validate_old(
while (rec != NULL) {
if (UNIV_UNLIKELY(rec < page + FIL_PAGE_DATA
|| rec >= page + UNIV_PAGE_SIZE)) {
- fprintf(stderr,
- "InnoDB: Free list record has"
- " a nonsensical offset %lu\n",
- (ulong) (rec - page));
+ ib::error() << "Free list record has"
+ " a nonsensical offset " << (rec - page);
goto func_exit;
}
if (UNIV_UNLIKELY(rec > rec_heap_top)) {
- fprintf(stderr,
- "InnoDB: Free list record %lu"
- " is above rec heap top %lu\n",
- (ulong) (rec - page),
- (ulong) (rec_heap_top - page));
+ ib::error() << "Free list record " << (rec - page)
+ << " is above rec heap top "
+ << (rec_heap_top - page);
goto func_exit;
}
@@ -2143,21 +2162,28 @@ page_simple_validate_old(
count++;
if (UNIV_UNLIKELY(count > UNIV_PAGE_SIZE)) {
- fprintf(stderr,
- "InnoDB: Page free list appears"
- " to be circular %lu\n",
- (ulong) count);
+ ib::error() << "Page free list appears"
+ " to be circular " << count;
goto func_exit;
}
- rec = page_rec_get_next_const(rec);
+ ulint offs = rec_get_next_offs(rec, FALSE);
+ if (!offs) {
+ break;
+ }
+ if (UNIV_UNLIKELY(offs < PAGE_OLD_INFIMUM
+ || offs >= srv_page_size)) {
+ ib::error() << "Page free list is corrupted " << count;
+ goto func_exit;
+ }
+
+ rec = page + offs;
}
if (UNIV_UNLIKELY(page_dir_get_n_heap(page) != count + 1)) {
- fprintf(stderr, "InnoDB: N heap is wrong %lu, %lu\n",
- (ulong) page_dir_get_n_heap(page),
- (ulong) (count + 1));
+ ib::error() << "N heap is wrong "
+ << page_dir_get_n_heap(page) << ", " << (count + 1);
goto func_exit;
}
@@ -2172,8 +2198,7 @@ func_exit:
This function checks the consistency of an index page when we do not
know the index. This is also resilient so that this should never crash
even if the page is total garbage.
-@return TRUE if ok */
-UNIV_INTERN
+@return TRUE if ok */
ibool
page_simple_validate_new(
/*=====================*/
@@ -2196,9 +2221,8 @@ page_simple_validate_new(
n_slots = page_dir_get_n_slots(page);
if (UNIV_UNLIKELY(n_slots > UNIV_PAGE_SIZE / 4)) {
- fprintf(stderr,
- "InnoDB: Nonsensical number %lu"
- " of page dir slots\n", (ulong) n_slots);
+ ib::error() << "Nonsensical number " << n_slots
+ << " of page dir slots";
goto func_exit;
}
@@ -2208,12 +2232,11 @@ page_simple_validate_new(
if (UNIV_UNLIKELY(rec_heap_top
> page_dir_get_nth_slot(page, n_slots - 1))) {
- fprintf(stderr,
- "InnoDB: Record heap and dir overlap on a page,"
- " heap top %lu, dir %lu\n",
- (ulong) page_header_get_field(page, PAGE_HEAP_TOP),
- (ulong)
- page_offset(page_dir_get_nth_slot(page, n_slots - 1)));
+ ib::error() << "Record heap and dir overlap on a page,"
+ " heap top "
+ << page_header_get_field(page, PAGE_HEAP_TOP)
+ << ", dir " << page_offset(
+ page_dir_get_nth_slot(page, n_slots - 1));
goto func_exit;
}
@@ -2230,11 +2253,10 @@ page_simple_validate_new(
for (;;) {
if (UNIV_UNLIKELY(rec > rec_heap_top)) {
- fprintf(stderr,
- "InnoDB: Record %lu is above rec"
- " heap top %lu\n",
- (ulong) page_offset(rec),
- (ulong) page_offset(rec_heap_top));
+
+ ib::error() << "Record " << page_offset(rec)
+ << " is above rec heap top "
+ << page_offset(rec_heap_top);
goto func_exit;
}
@@ -2244,22 +2266,18 @@ page_simple_validate_new(
if (UNIV_UNLIKELY(rec_get_n_owned_new(rec)
!= own_count)) {
- fprintf(stderr,
- "InnoDB: Wrong owned count %lu, %lu,"
- " rec %lu\n",
- (ulong) rec_get_n_owned_new(rec),
- (ulong) own_count,
- (ulong) page_offset(rec));
+ ib::error() << "Wrong owned count "
+ << rec_get_n_owned_new(rec) << ", "
+ << own_count << ", rec "
+ << page_offset(rec);
goto func_exit;
}
if (UNIV_UNLIKELY
(page_dir_slot_get_rec(slot) != rec)) {
- fprintf(stderr,
- "InnoDB: Dir slot does not point"
- " to right rec %lu\n",
- (ulong) page_offset(rec));
+ ib::error() << "Dir slot does not point"
+ " to right rec " << page_offset(rec);
goto func_exit;
}
@@ -2280,11 +2298,10 @@ page_simple_validate_new(
if (UNIV_UNLIKELY
(rec_get_next_offs(rec, TRUE) < FIL_PAGE_DATA
|| rec_get_next_offs(rec, TRUE) >= UNIV_PAGE_SIZE)) {
- fprintf(stderr,
- "InnoDB: Next record offset nonsensical %lu"
- " for rec %lu\n",
- (ulong) rec_get_next_offs(rec, TRUE),
- (ulong) page_offset(rec));
+
+ ib::error() << "Next record offset nonsensical "
+ << rec_get_next_offs(rec, TRUE)
+ << " for rec " << page_offset(rec);
goto func_exit;
}
@@ -2292,10 +2309,8 @@ page_simple_validate_new(
count++;
if (UNIV_UNLIKELY(count > UNIV_PAGE_SIZE)) {
- fprintf(stderr,
- "InnoDB: Page record list appears"
- " to be circular %lu\n",
- (ulong) count);
+ ib::error() << "Page record list appears to be"
+ " circular " << count;
goto func_exit;
}
@@ -2304,25 +2319,23 @@ page_simple_validate_new(
}
if (UNIV_UNLIKELY(rec_get_n_owned_new(rec) == 0)) {
- fprintf(stderr, "InnoDB: n owned is zero"
- " in a supremum rec\n");
+ ib::error() << "n owned is zero in a supremum rec";
goto func_exit;
}
if (UNIV_UNLIKELY(slot_no != n_slots - 1)) {
- fprintf(stderr, "InnoDB: n slots wrong %lu, %lu\n",
- (ulong) slot_no, (ulong) (n_slots - 1));
+ ib::error() << "n slots wrong " << slot_no << ", "
+ << (n_slots - 1);
goto func_exit;
}
- if (UNIV_UNLIKELY(page_header_get_field(page, PAGE_N_RECS)
+ if (UNIV_UNLIKELY(ulint(page_header_get_field(page, PAGE_N_RECS))
+ PAGE_HEAP_NO_USER_LOW
!= count + 1)) {
- fprintf(stderr, "InnoDB: n recs wrong %lu %lu\n",
- (ulong) page_header_get_field(page, PAGE_N_RECS)
- + PAGE_HEAP_NO_USER_LOW,
- (ulong) (count + 1));
+ ib::error() << "n recs wrong "
+ << page_header_get_field(page, PAGE_N_RECS)
+ + PAGE_HEAP_NO_USER_LOW << " " << (count + 1);
goto func_exit;
}
@@ -2333,20 +2346,17 @@ page_simple_validate_new(
while (rec != NULL) {
if (UNIV_UNLIKELY(rec < page + FIL_PAGE_DATA
|| rec >= page + UNIV_PAGE_SIZE)) {
- fprintf(stderr,
- "InnoDB: Free list record has"
- " a nonsensical offset %lu\n",
- (ulong) page_offset(rec));
+
+ ib::error() << "Free list record has"
+ " a nonsensical offset " << page_offset(rec);
goto func_exit;
}
if (UNIV_UNLIKELY(rec > rec_heap_top)) {
- fprintf(stderr,
- "InnoDB: Free list record %lu"
- " is above rec heap top %lu\n",
- (ulong) page_offset(rec),
- (ulong) page_offset(rec_heap_top));
+ ib::error() << "Free list record " << page_offset(rec)
+ << " is above rec heap top "
+ << page_offset(rec_heap_top);
goto func_exit;
}
@@ -2354,21 +2364,28 @@ page_simple_validate_new(
count++;
if (UNIV_UNLIKELY(count > UNIV_PAGE_SIZE)) {
- fprintf(stderr,
- "InnoDB: Page free list appears"
- " to be circular %lu\n",
- (ulong) count);
+ ib::error() << "Page free list appears to be"
+ " circular " << count;
goto func_exit;
}
- rec = page_rec_get_next_const(rec);
+ const ulint offs = rec_get_next_offs(rec, TRUE);
+ if (!offs) {
+ break;
+ }
+ if (UNIV_UNLIKELY(offs < PAGE_OLD_INFIMUM
+ || offs >= srv_page_size)) {
+ ib::error() << "Page free list is corrupted " << count;
+ goto func_exit;
+ }
+
+ rec = page + offs;
}
if (UNIV_UNLIKELY(page_dir_get_n_heap(page) != count + 1)) {
- fprintf(stderr, "InnoDB: N heap is wrong %lu, %lu\n",
- (ulong) page_dir_get_n_heap(page),
- (ulong) (count + 1));
+ ib::error() << "N heap is wrong "
+ << page_dir_get_n_heap(page) << ", " << (count + 1);
goto func_exit;
}
@@ -2381,8 +2398,7 @@ func_exit:
/***************************************************************//**
This function checks the consistency of an index page.
-@return TRUE if ok */
-UNIV_INTERN
+@return TRUE if ok */
ibool
page_validate(
/*==========*/
@@ -2400,16 +2416,28 @@ page_validate(
ulint data_size;
const rec_t* rec;
const rec_t* old_rec = NULL;
+ const rec_t* first_rec = NULL;
ulint offs;
ulint n_slots;
ibool ret = FALSE;
ulint i;
- ulint* offsets = NULL;
- ulint* old_offsets = NULL;
+ offset_t offsets_1[REC_OFFS_NORMAL_SIZE];
+ offset_t offsets_2[REC_OFFS_NORMAL_SIZE];
+ offset_t* offsets = offsets_1;
+ offset_t* old_offsets = offsets_2;
+
+ rec_offs_init(offsets_1);
+ rec_offs_init(offsets_2);
+
+#ifdef UNIV_GIS_DEBUG
+ if (dict_index_is_spatial(index)) {
+ fprintf(stderr, "Page no: %lu\n", page_get_page_no(page));
+ }
+#endif /* UNIV_DEBUG */
if (UNIV_UNLIKELY((ibool) !!page_is_comp(page)
!= dict_table_is_comp(index->table))) {
- fputs("InnoDB: 'compact format' flag mismatch\n", stderr);
+ ib::error() << "'compact format' flag mismatch";
goto func_exit2;
}
if (page_is_comp(page)) {
@@ -2422,16 +2450,20 @@ page_validate(
}
}
- if (dict_index_is_sec_or_ibuf(index) && page_is_leaf(page)
+ /* Multiple transactions cannot simultaneously operate on the
+ same temp-table in parallel.
+ max_trx_id is ignored for temp tables because it not required
+ for MVCC. */
+ if (dict_index_is_sec_or_ibuf(index)
+ && !dict_table_is_temporary(index->table)
+ && page_is_leaf(page)
&& !page_is_empty(page)) {
trx_id_t max_trx_id = page_get_max_trx_id(page);
trx_id_t sys_max_trx_id = trx_sys_get_max_trx_id();
if (max_trx_id == 0 || max_trx_id > sys_max_trx_id) {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "PAGE_MAX_TRX_ID out of bounds: "
- TRX_ID_FMT ", " TRX_ID_FMT,
- max_trx_id, sys_max_trx_id);
+ ib::error() << "PAGE_MAX_TRX_ID out of bounds: "
+ << max_trx_id << ", " << sys_max_trx_id;
goto func_exit2;
}
}
@@ -2451,13 +2483,11 @@ page_validate(
if (UNIV_UNLIKELY(!(page_header_get_ptr(page, PAGE_HEAP_TOP)
<= page_dir_get_nth_slot(page, n_slots - 1)))) {
- fprintf(stderr,
- "InnoDB: Record heap and dir overlap"
- " on space %lu page %lu index %s, %p, %p\n",
- (ulong) page_get_space_id(page),
- (ulong) page_get_page_no(page), index->name,
- page_header_get_ptr(page, PAGE_HEAP_TOP),
- page_dir_get_nth_slot(page, n_slots - 1));
+ ib::warn() << "Record heap and dir overlap on space "
+ << page_get_space_id(page) << " page "
+ << page_get_page_no(page) << " index " << index->name
+ << ", " << page_header_get_ptr(page, PAGE_HEAP_TOP)
+ << ", " << page_dir_get_nth_slot(page, n_slots - 1);
goto func_exit;
}
@@ -2474,12 +2504,13 @@ page_validate(
for (;;) {
offsets = rec_get_offsets(rec, index, offsets,
+ page_is_leaf(page),
ULINT_UNDEFINED, &heap);
if (page_is_comp(page) && page_rec_is_user_rec(rec)
&& UNIV_UNLIKELY(rec_get_node_ptr_flag(rec)
== page_is_leaf(page))) {
- fputs("InnoDB: node_ptr flag mismatch\n", stderr);
+ ib::error() << "'node_ptr' flag mismatch";
goto func_exit;
}
@@ -2487,48 +2518,88 @@ page_validate(
goto func_exit;
}
-#ifndef UNIV_HOTBACKUP
+ if (rec == first_rec) {
+ if ((rec_get_info_bits(rec, page_is_comp(page))
+ & REC_INFO_MIN_REC_FLAG)
+ && page_is_leaf(page)) {
+ ib::error() << "REC_INFO_MIN_REC_FLAG "
+ "is set in a leaf-page record";
+ ret = false;
+ }
+ } else if (rec_get_info_bits(rec, page_is_comp(page))
+ & REC_INFO_MIN_REC_FLAG) {
+ ib::error() << "REC_INFO_MIN_REC_FLAG record is not "
+ "first in page";
+ ret = false;
+ }
+
/* Check that the records are in the ascending order */
- if (UNIV_LIKELY(count >= PAGE_HEAP_NO_USER_LOW)
+ if (count >= PAGE_HEAP_NO_USER_LOW
&& !page_rec_is_supremum(rec)) {
- if (UNIV_UNLIKELY
- (1 != cmp_rec_rec(rec, old_rec,
- offsets, old_offsets, index))) {
- fprintf(stderr,
- "InnoDB: Records in wrong order"
- " on space %lu page %lu index %s\n",
- (ulong) page_get_space_id(page),
- (ulong) page_get_page_no(page),
- index->name);
+
+ int ret = cmp_rec_rec(
+ rec, old_rec, offsets, old_offsets, index);
+
+ /* For spatial index, on nonleaf leavel, we
+ allow recs to be equal. */
+ bool rtr_equal_nodeptrs =
+ (ret == 0 && dict_index_is_spatial(index)
+ && !page_is_leaf(page));
+
+ if (ret <= 0 && !rtr_equal_nodeptrs) {
+
+ ib::error() << "Records in wrong order on"
+ " space " << page_get_space_id(page)
+ << " page " << page_get_page_no(page)
+ << " index " << index->name;
+
fputs("\nInnoDB: previous record ", stderr);
- rec_print_new(stderr, old_rec, old_offsets);
- fputs("\nInnoDB: record ", stderr);
- rec_print_new(stderr, rec, offsets);
- putc('\n', stderr);
+ /* For spatial index, print the mbr info.*/
+ if (index->type & DICT_SPATIAL) {
+ putc('\n', stderr);
+ rec_print_mbr_rec(stderr,
+ old_rec, old_offsets);
+ fputs("\nInnoDB: record ", stderr);
+ putc('\n', stderr);
+ rec_print_mbr_rec(stderr, rec, offsets);
+ putc('\n', stderr);
+ putc('\n', stderr);
+
+ } else {
+ rec_print_new(stderr, old_rec, old_offsets);
+ fputs("\nInnoDB: record ", stderr);
+ rec_print_new(stderr, rec, offsets);
+ putc('\n', stderr);
+ }
goto func_exit;
}
}
-#endif /* !UNIV_HOTBACKUP */
if (page_rec_is_user_rec(rec)) {
data_size += rec_offs_size(offsets);
+
+#if UNIV_GIS_DEBUG
+ /* For spatial index, print the mbr info.*/
+ if (index->type & DICT_SPATIAL) {
+ rec_print_mbr_rec(stderr, rec, offsets);
+ putc('\n', stderr);
+ }
+#endif /* UNIV_GIS_DEBUG */
}
offs = page_offset(rec_get_start(rec, offsets));
i = rec_offs_size(offsets);
if (UNIV_UNLIKELY(offs + i >= UNIV_PAGE_SIZE)) {
- fputs("InnoDB: record offset out of bounds\n", stderr);
+ ib::error() << "Record offset out of bounds";
goto func_exit;
}
while (i--) {
if (UNIV_UNLIKELY(buf[offs + i])) {
/* No other record may overlap this */
-
- fputs("InnoDB: Record overlaps another\n",
- stderr);
+ ib::error() << "Record overlaps another";
goto func_exit;
}
@@ -2544,17 +2615,14 @@ page_validate(
if (UNIV_UNLIKELY(rec_own_count)) {
/* This is a record pointed to by a dir slot */
if (UNIV_UNLIKELY(rec_own_count != own_count)) {
- fprintf(stderr,
- "InnoDB: Wrong owned count %lu, %lu\n",
- (ulong) rec_own_count,
- (ulong) own_count);
+ ib::error() << "Wrong owned count "
+ << rec_own_count << ", " << own_count;
goto func_exit;
}
if (page_dir_slot_get_rec(slot) != rec) {
- fputs("InnoDB: Dir slot does not"
- " point to right rec\n",
- stderr);
+ ib::error() << "Dir slot does not"
+ " point to right rec";
goto func_exit;
}
@@ -2576,12 +2644,13 @@ page_validate(
old_rec = rec;
rec = page_rec_get_next_const(rec);
- /* set old_offsets to offsets; recycle offsets */
- {
- ulint* offs = old_offsets;
- old_offsets = offsets;
- offsets = offs;
+ if (page_rec_is_infimum(old_rec)
+ && page_rec_is_user_rec(rec)) {
+ first_rec = rec;
}
+
+ /* set old_offsets to offsets; recycle offsets */
+ std::swap(old_offsets, offsets);
}
if (page_is_comp(page)) {
@@ -2591,30 +2660,28 @@ page_validate(
}
} else if (UNIV_UNLIKELY(rec_get_n_owned_old(rec) == 0)) {
n_owned_zero:
- fputs("InnoDB: n owned is zero\n", stderr);
+ ib::error() << "n owned is zero";
goto func_exit;
}
if (UNIV_UNLIKELY(slot_no != n_slots - 1)) {
- fprintf(stderr, "InnoDB: n slots wrong %lu %lu\n",
- (ulong) slot_no, (ulong) (n_slots - 1));
+ ib::error() << "n slots wrong " << slot_no << " "
+ << (n_slots - 1);
goto func_exit;
}
- if (UNIV_UNLIKELY(page_header_get_field(page, PAGE_N_RECS)
+ if (UNIV_UNLIKELY(ulint(page_header_get_field(page, PAGE_N_RECS))
+ PAGE_HEAP_NO_USER_LOW
!= count + 1)) {
- fprintf(stderr, "InnoDB: n recs wrong %lu %lu\n",
- (ulong) page_header_get_field(page, PAGE_N_RECS)
- + PAGE_HEAP_NO_USER_LOW,
- (ulong) (count + 1));
+ ib::error() << "n recs wrong "
+ << page_header_get_field(page, PAGE_N_RECS)
+ + PAGE_HEAP_NO_USER_LOW << " " << (count + 1);
goto func_exit;
}
if (UNIV_UNLIKELY(data_size != page_get_data_size(page))) {
- fprintf(stderr,
- "InnoDB: Summed data size %lu, returned by func %lu\n",
- (ulong) data_size, (ulong) page_get_data_size(page));
+ ib::error() << "Summed data size " << data_size
+ << ", returned by func " << page_get_data_size(page);
goto func_exit;
}
@@ -2623,38 +2690,54 @@ n_owned_zero:
while (rec != NULL) {
offsets = rec_get_offsets(rec, index, offsets,
+ page_is_leaf(page),
ULINT_UNDEFINED, &heap);
if (UNIV_UNLIKELY(!page_rec_validate(rec, offsets))) {
+ ret = FALSE;
+next_free:
+ const ulint offs = rec_get_next_offs(
+ rec, page_is_comp(page));
+ if (!offs) {
+ break;
+ }
+ if (UNIV_UNLIKELY(offs < PAGE_OLD_INFIMUM
+ || offs >= srv_page_size)) {
+ ib::error() << "Page free list is corrupted";
+ ret = FALSE;
+ break;
+ }
- goto func_exit;
+ rec = page + offs;
+ continue;
}
count++;
offs = page_offset(rec_get_start(rec, offsets));
i = rec_offs_size(offsets);
- if (UNIV_UNLIKELY(offs + i >= UNIV_PAGE_SIZE)) {
- fputs("InnoDB: record offset out of bounds\n", stderr);
- goto func_exit;
+ if (UNIV_UNLIKELY(offs + i >= srv_page_size)) {
+ ib::error() << "Free record offset out of bounds: "
+ << offs << '+' << i;
+ ret = FALSE;
+ goto next_free;
}
while (i--) {
if (UNIV_UNLIKELY(buf[offs + i])) {
- fputs("InnoDB: Record overlaps another"
- " in free list\n", stderr);
+ ib::error() << "Record overlaps another"
+ " in free list";
goto func_exit;
}
buf[offs + i] = 1;
}
- rec = page_rec_get_next_const(rec);
+ goto next_free;
}
if (UNIV_UNLIKELY(page_dir_get_n_heap(page) != count + 1)) {
- fprintf(stderr, "InnoDB: N heap is wrong %lu %lu\n",
- (ulong) page_dir_get_n_heap(page),
- (ulong) count + 1);
+ ib::error() << "N heap is wrong "
+ << page_dir_get_n_heap(page) << " " << count + 1;
goto func_exit;
}
@@ -2665,24 +2748,17 @@ func_exit:
if (UNIV_UNLIKELY(ret == FALSE)) {
func_exit2:
- fprintf(stderr,
- "InnoDB: Apparent corruption"
- " in space %lu page %lu index %s\n",
- (ulong) page_get_space_id(page),
- (ulong) page_get_page_no(page),
- index->name);
- buf_page_print(page, 0);
- ut_ad(0);
+ ib::error() << "Apparent corruption in space "
+ << page_get_space_id(page) << " page "
+ << page_get_page_no(page) << " index " << index->name;
}
return(ret);
}
-#ifndef UNIV_HOTBACKUP
/***************************************************************//**
Looks in the page record list for a record with the given heap number.
-@return record, NULL if not found */
-UNIV_INTERN
+@return record, NULL if not found */
const rec_t*
page_find_rec_with_heap_no(
/*=======================*/
@@ -2694,7 +2770,7 @@ page_find_rec_with_heap_no(
if (page_is_comp(page)) {
rec = page + PAGE_NEW_INFIMUM;
- for(;;) {
+ for (;;) {
ulint rec_heap_no = rec_get_heap_no_new(rec);
if (rec_heap_no == heap_no) {
@@ -2725,14 +2801,12 @@ page_find_rec_with_heap_no(
}
}
}
-#endif /* !UNIV_HOTBACKUP */
/*******************************************************//**
Removes the record from a leaf page. This function does not log
any changes. It is used by the IMPORT tablespace functions.
The cursor is moved to the next record after the deleted one.
-@return true if success, i.e., the page did not become too empty */
-UNIV_INTERN
+@return true if success, i.e., the page did not become too empty */
bool
page_delete_rec(
/*============*/
@@ -2741,7 +2815,7 @@ page_delete_rec(
page_cur_t* pcur, /*!< in/out: page cursor on record
to delete */
page_zip_des_t* page_zip,/*!< in: compressed page descriptor */
- const ulint* offsets)/*!< in: offsets for record */
+ const offset_t* offsets)/*!< in: offsets for record */
{
bool no_compress_needed;
buf_block_t* block = pcur->block;
@@ -2751,9 +2825,8 @@ page_delete_rec(
if (!rec_offs_any_extern(offsets)
&& ((page_get_data_size(page) - rec_offs_size(offsets)
- < BTR_CUR_PAGE_COMPRESS_LIMIT)
- || (mach_read_from_4(page + FIL_PAGE_NEXT) == FIL_NULL
- && mach_read_from_4(page + FIL_PAGE_PREV) == FIL_NULL)
+ < BTR_CUR_PAGE_COMPRESS_LIMIT(index))
+ || !page_has_siblings(page)
|| (page_get_n_recs(page) < 2))) {
ulint root_page_no = dict_index_get_page(index);
@@ -2787,7 +2860,6 @@ page_delete_rec(
@param[in] page index tree leaf page
@return the last record, not delete-marked
@retval infimum record if all records are delete-marked */
-
const rec_t*
page_find_rec_max_not_deleted(
const page_t* page)
diff --git a/storage/innobase/page/page0zip.cc b/storage/innobase/page/page0zip.cc
index 6f1c19799b5..30491cabd16 100644
--- a/storage/innobase/page/page0zip.cc
+++ b/storage/innobase/page/page0zip.cc
@@ -2,7 +2,7 @@
Copyright (c) 2005, 2016, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2012, Facebook Inc.
-Copyright (c) 2014, 2019, MariaDB Corporation.
+Copyright (c) 2014, 2020, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -25,64 +25,52 @@ Compressed page interface
Created June 2005 by Marko Makela
*******************************************************/
-// First include (the generated) my_config.h, to get correct platform defines.
-#include "my_config.h"
-
-#define THIS_MODULE
+#include "page0size.h"
#include "page0zip.h"
-#ifdef UNIV_NONINL
-# include "page0zip.ic"
-#endif
-#undef THIS_MODULE
-#include "fil0fil.h"
-#include "buf0checksum.h"
-#include "mach0data.h"
+#include "span.h"
+
+using st_::span;
+
+/** A BLOB field reference full of zero, for use in assertions and tests.
+Initially, BLOB field references are set to zero, in
+dtuple_convert_big_rec(). */
+const byte field_ref_zero[UNIV_PAGE_SIZE_MAX] = { 0, };
+
#ifndef UNIV_INNOCHECKSUM
#include "page0page.h"
#include "mtr0log.h"
-#include "ut0sort.h"
#include "dict0dict.h"
#include "btr0cur.h"
#include "page0types.h"
#include "log0recv.h"
-#endif /* !UNIV_INNOCHECKSUM */
+#include "row0row.h"
+#include "row0trunc.h"
#include "zlib.h"
-#ifndef UNIV_HOTBACKUP
-#ifndef UNIV_INNOCHECKSUM
-# include "buf0buf.h"
-# include "btr0sea.h"
-# include "dict0boot.h"
-# include "lock0lock.h"
-# include "srv0srv.h"
-#endif /* !UNIV_INNOCHECKSUM */
-# include "buf0lru.h"
-# include "srv0mon.h"
-# include "ut0crc32.h"
-#else /* !UNIV_HOTBACKUP */
-# include "buf0checksum.h"
-# define lock_move_reorganize_page(block, temp_block) ((void) 0)
-# define buf_LRU_stat_inc_unzip() ((void) 0)
-#endif /* !UNIV_HOTBACKUP */
+#include "buf0buf.h"
+#include "buf0types.h"
+#include "buf0checksum.h"
+#include "btr0sea.h"
+#include "dict0boot.h"
+#include "lock0lock.h"
+#include "srv0srv.h"
+#include "buf0lru.h"
+#include "srv0mon.h"
+#include "ut0crc32.h"
+
+#include <map>
+#include <algorithm>
-#ifndef UNIV_INNOCHECKSUM
-#ifndef UNIV_HOTBACKUP
/** Statistics on compression, indexed by page_zip_des_t::ssize - 1 */
-UNIV_INTERN page_zip_stat_t page_zip_stat[PAGE_ZIP_SSIZE_MAX];
+page_zip_stat_t page_zip_stat[PAGE_ZIP_SSIZE_MAX];
/** Statistics on compression, indexed by index->id */
-UNIV_INTERN page_zip_stat_per_index_t page_zip_stat_per_index;
-/** Mutex protecting page_zip_stat_per_index */
-UNIV_INTERN ib_mutex_t page_zip_stat_per_index_mutex;
-#ifdef HAVE_PSI_INTERFACE
-UNIV_INTERN mysql_pfs_key_t page_zip_stat_per_index_mutex_key;
-#endif /* HAVE_PSI_INTERFACE */
-#endif /* !UNIV_HOTBACKUP */
+page_zip_stat_per_index_t page_zip_stat_per_index;
/** Compression level to be used by zlib. Settable by user. */
-UNIV_INTERN uint page_zip_level;
+uint page_zip_level;
/** Whether or not to log compressed page images to avoid possible
compression algorithm changes in zlib. */
-UNIV_INTERN my_bool page_zip_log_pages;
+my_bool page_zip_log_pages;
/* Please refer to ../include/page0zip.ic for a description of the
compressed page format. */
@@ -112,14 +100,15 @@ static const byte supremum_extra_data[] = {
/** Assert that a block of memory is filled with zero bytes.
Compare at most sizeof(field_ref_zero) bytes.
-@param b in: memory block
-@param s in: size of the memory block, in bytes */
-#define ASSERT_ZERO(b, s) \
- ut_ad(!memcmp(b, field_ref_zero, ut_min(s, sizeof field_ref_zero)))
+@param b in: memory block
+@param s in: size of the memory block, in bytes */
+#define ASSERT_ZERO(b, s) \
+ ut_ad(!memcmp(b, field_ref_zero, \
+ ut_min(static_cast<size_t>(s), sizeof field_ref_zero)));
/** Assert that a BLOB pointer is filled with zero bytes.
-@param b in: BLOB pointer */
+@param b in: BLOB pointer */
#define ASSERT_ZERO_BLOB(b) \
- ut_ad(!memcmp(b, field_ref_zero, sizeof field_ref_zero))
+ ut_ad(!memcmp(b, field_ref_zero, FIELD_REF_SIZE))
/* Enable some extra debugging output. This code can be enabled
independently of any UNIV_ debugging conditions. */
@@ -128,7 +117,7 @@ independently of any UNIV_ debugging conditions. */
MY_ATTRIBUTE((format (printf, 1, 2)))
/**********************************************************************//**
Report a failure to decompress or compress.
-@return number of characters printed */
+@return number of characters printed */
static
int
page_zip_fail_func(
@@ -148,21 +137,17 @@ page_zip_fail_func(
return(res);
}
/** Wrapper for page_zip_fail_func()
-@param fmt_args in: printf(3) format string and arguments */
+@param fmt_args in: printf(3) format string and arguments */
# define page_zip_fail(fmt_args) page_zip_fail_func fmt_args
#else /* UNIV_DEBUG || UNIV_ZIP_DEBUG */
/** Dummy wrapper for page_zip_fail_func()
-@param fmt_args ignored: printf(3) format string and arguments */
+@param fmt_args ignored: printf(3) format string and arguments */
# define page_zip_fail(fmt_args) /* empty */
#endif /* UNIV_DEBUG || UNIV_ZIP_DEBUG */
-#endif /* !UNIV_INNOCHECKSUM */
-#ifndef UNIV_INNOCHECKSUM
-#ifndef UNIV_HOTBACKUP
/**********************************************************************//**
Determine the guaranteed free space on an empty page.
-@return minimum payload size on the page */
-UNIV_INTERN
+@return minimum payload size on the page */
ulint
page_zip_empty_size(
/*================*/
@@ -173,8 +158,7 @@ page_zip_empty_size(
/* subtract the page header and the longest
uncompressed data needed for one record */
- (PAGE_DATA
- + PAGE_ZIP_DIR_SLOT_SIZE
- + DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN
+ + PAGE_ZIP_CLUST_LEAF_SLOT_SIZE
+ 1/* encoded heap_no==2 in page_zip_write_rec() */
+ 1/* end of modification log */
- REC_N_NEW_EXTRA_BYTES/* omitted bytes */)
@@ -182,12 +166,61 @@ page_zip_empty_size(
- compressBound(static_cast<uLong>(2 * (n_fields + 1)));
return(size > 0 ? (ulint) size : 0);
}
-#endif /* !UNIV_HOTBACKUP */
+
+/** Check whether a tuple is too big for compressed table
+@param[in] index dict index object
+@param[in] entry entry for the index
+@return true if it's too big, otherwise false */
+bool
+page_zip_is_too_big(
+ const dict_index_t* index,
+ const dtuple_t* entry)
+{
+ const page_size_t& page_size =
+ dict_table_page_size(index->table);
+
+ /* Estimate the free space of an empty compressed page.
+ Subtract one byte for the encoded heap_no in the
+ modification log. */
+ ulint free_space_zip = page_zip_empty_size(
+ index->n_fields, page_size.physical());
+ ulint n_uniq = dict_index_get_n_unique_in_tree(index);
+
+ ut_ad(dict_table_is_comp(index->table));
+ ut_ad(page_size.is_compressed());
+
+ if (free_space_zip == 0) {
+ return(true);
+ }
+
+ /* Subtract one byte for the encoded heap_no in the
+ modification log. */
+ free_space_zip--;
+
+ /* There should be enough room for two node pointer
+ records on an empty non-leaf page. This prevents
+ infinite page splits. */
+
+ if (entry->n_fields >= n_uniq
+ && (REC_NODE_PTR_SIZE
+ + rec_get_converted_size_comp_prefix(
+ index, entry->fields, n_uniq, NULL)
+ /* On a compressed page, there is
+ a two-byte entry in the dense
+ page directory for every record.
+ But there is no record header. */
+ - (REC_N_NEW_EXTRA_BYTES - 2)
+ > free_space_zip / 2)) {
+ return(true);
+ }
+
+ return(false);
+}
/*************************************************************//**
Gets the number of elements in the dense page directory,
including deleted records (the free list).
-@return number of elements in the dense page directory */
+@return number of elements in the dense page directory */
UNIV_INLINE
ulint
page_zip_dir_elems(
@@ -201,7 +234,7 @@ page_zip_dir_elems(
/*************************************************************//**
Gets the size of the compressed page trailer (the dense page directory),
including deleted records (the free list).
-@return length of dense page directory, in bytes */
+@return length of dense page directory, in bytes */
UNIV_INLINE
ulint
page_zip_dir_size(
@@ -214,7 +247,7 @@ page_zip_dir_size(
/*************************************************************//**
Gets an offset to the compressed page trailer (the dense page directory),
including deleted records (the free list).
-@return offset of the dense page directory */
+@return offset of the dense page directory */
UNIV_INLINE
ulint
page_zip_dir_start_offs(
@@ -230,23 +263,23 @@ page_zip_dir_start_offs(
/*************************************************************//**
Gets a pointer to the compressed page trailer (the dense page directory),
including deleted records (the free list).
-@param[in] page_zip compressed page
-@param[in] n_dense number of entries in the directory
-@return pointer to the dense page directory */
+@param[in] page_zip compressed page
+@param[in] n_dense number of entries in the directory
+@return pointer to the dense page directory */
#define page_zip_dir_start_low(page_zip, n_dense) \
((page_zip)->data + page_zip_dir_start_offs(page_zip, n_dense))
/*************************************************************//**
Gets a pointer to the compressed page trailer (the dense page directory),
including deleted records (the free list).
-@param[in] page_zip compressed page
-@return pointer to the dense page directory */
+@param[in] page_zip compressed page
+@return pointer to the dense page directory */
#define page_zip_dir_start(page_zip) \
page_zip_dir_start_low(page_zip, page_zip_dir_elems(page_zip))
/*************************************************************//**
Gets the size of the compressed page trailer (the dense page directory),
only including user records (excluding the free list).
-@return length of dense page directory comprising existing records, in bytes */
+@return length of dense page directory comprising existing records, in bytes */
UNIV_INLINE
ulint
page_zip_dir_user_size(
@@ -261,7 +294,7 @@ page_zip_dir_user_size(
/*************************************************************//**
Find the slot of the given record in the dense page directory.
-@return dense directory slot, or NULL if record not found */
+@return dense directory slot, or NULL if record not found */
UNIV_INLINE
byte*
page_zip_dir_find_low(
@@ -284,7 +317,7 @@ page_zip_dir_find_low(
/*************************************************************//**
Find the slot of the given non-free record in the dense page directory.
-@return dense directory slot, or NULL if record not found */
+@return dense directory slot, or NULL if record not found */
UNIV_INLINE
byte*
page_zip_dir_find(
@@ -303,7 +336,7 @@ page_zip_dir_find(
/*************************************************************//**
Find the slot of the given free record in the dense page directory.
-@return dense directory slot, or NULL if record not found */
+@return dense directory slot, or NULL if record not found */
UNIV_INLINE
byte*
page_zip_dir_find_free(
@@ -338,7 +371,6 @@ page_zip_dir_get(
- PAGE_ZIP_DIR_SLOT_SIZE * (slot + 1)));
}
-#ifndef UNIV_HOTBACKUP
/**********************************************************************//**
Write a log record of compressing an index page. */
static
@@ -402,7 +434,6 @@ page_zip_compress_write_log(
mlog_catenate_string(mtr, page_zip->data + page_zip_get_size(page_zip)
- trailer_size, trailer_size);
}
-#endif /* !UNIV_HOTBACKUP */
/******************************************************//**
Determine how many externally stored columns are contained
@@ -455,7 +486,7 @@ page_zip_get_n_prev_extern(
/**********************************************************************//**
Encode the length of a fixed-length column.
-@return buf + length of encoded val */
+@return buf + length of encoded val */
static
byte*
page_zip_fixed_field_encode(
@@ -483,17 +514,19 @@ page_zip_fixed_field_encode(
/**********************************************************************//**
Write the index information for the compressed page.
-@return used size of buf */
-static
+@return used size of buf */
ulint
page_zip_fields_encode(
/*===================*/
- ulint n, /*!< in: number of fields to compress */
- dict_index_t* index, /*!< in: index comprising at least n fields */
- ulint trx_id_pos,/*!< in: position of the trx_id column
- in the index, or ULINT_UNDEFINED if
- this is a non-leaf page */
- byte* buf) /*!< out: buffer of (n + 1) * 2 bytes */
+ ulint n, /*!< in: number of fields
+ to compress */
+ const dict_index_t* index, /*!< in: index comprising
+ at least n fields */
+ ulint trx_id_pos,
+ /*!< in: position of the trx_id column
+ in the index, or ULINT_UNDEFINED if
+ this is a non-leaf page */
+ byte* buf) /*!< out: buffer of (n + 1) * 2 bytes */
{
const byte* buf_start = buf;
ulint i;
@@ -519,8 +552,7 @@ page_zip_fields_encode(
const dict_col_t* column
= dict_field_get_col(field);
- if (UNIV_UNLIKELY(column->len > 255)
- || UNIV_UNLIKELY(column->mtype == DATA_BLOB)) {
+ if (DATA_BIG_COL(column)) {
val |= 0x7e; /* max > 255 bytes */
}
@@ -636,8 +668,7 @@ page_zip_dir_encode(
status = REC_STATUS_ORDINARY;
} else {
status = REC_STATUS_NODE_PTR;
- if (UNIV_UNLIKELY
- (mach_read_from_4(page + FIL_PAGE_PREV) == FIL_NULL)) {
+ if (UNIV_UNLIKELY(!page_has_prev(page))) {
min_mark = REC_INFO_MIN_REC_FLAG;
}
}
@@ -664,10 +695,10 @@ page_zip_dir_encode(
ut_a(offs < UNIV_PAGE_SIZE - PAGE_DIR);
ut_a(offs >= PAGE_ZIP_START);
#if PAGE_ZIP_DIR_SLOT_MASK & (PAGE_ZIP_DIR_SLOT_MASK + 1)
-# error "PAGE_ZIP_DIR_SLOT_MASK is not 1 less than a power of 2"
+# error PAGE_ZIP_DIR_SLOT_MASK is not 1 less than a power of 2
#endif
-#if PAGE_ZIP_DIR_SLOT_MASK < UNIV_PAGE_ZIP_SIZE_MAX - 1
-# error "PAGE_ZIP_DIR_SLOT_MASK < UNIV_ZIP_SIZE_MAX - 1"
+#if PAGE_ZIP_DIR_SLOT_MASK < UNIV_ZIP_SIZE_MAX - 1
+# error PAGE_ZIP_DIR_SLOT_MASK < UNIV_ZIP_SIZE_MAX - 1
#endif
if (UNIV_UNLIKELY(rec_get_n_owned_new(rec))) {
offs |= PAGE_ZIP_DIR_SLOT_OWNED;
@@ -755,7 +786,6 @@ page_zip_free(
/**********************************************************************//**
Configure the zlib allocator to use the given memory heap. */
-UNIV_INTERN
void
page_zip_set_alloc(
/*===============*/
@@ -777,16 +807,16 @@ page_zip_set_alloc(
#ifdef PAGE_ZIP_COMPRESS_DBG
/** Set this variable in a debugger to enable
excessive logging in page_zip_compress(). */
-UNIV_INTERN ibool page_zip_compress_dbg;
+static ibool page_zip_compress_dbg;
/** Set this variable in a debugger to enable
binary logging of the data passed to deflate().
When this variable is nonzero, it will act
as a log file name generator. */
-UNIV_INTERN unsigned page_zip_compress_log;
+static unsigned page_zip_compress_log;
/**********************************************************************//**
Wrapper for deflate(). Log the operation if page_zip_compress_dbg is set.
-@return deflate() status: Z_OK, Z_BUF_ERROR, ... */
+@return deflate() status: Z_OK, Z_BUF_ERROR, ... */
static
int
page_zip_compress_deflate(
@@ -800,7 +830,10 @@ page_zip_compress_deflate(
ut_print_buf(stderr, strm->next_in, strm->avail_in);
}
if (UNIV_LIKELY_NULL(logfile)) {
- fwrite(strm->next_in, 1, strm->avail_in, logfile);
+ if (fwrite(strm->next_in, 1, strm->avail_in, logfile)
+ != strm->avail_in) {
+ perror("fwrite");
+ }
}
status = deflate(strm, flush);
if (UNIV_UNLIKELY(page_zip_compress_dbg)) {
@@ -813,9 +846,9 @@ page_zip_compress_deflate(
# undef deflate
/** Debug wrapper for the zlib compression routine deflate().
Log the operation if page_zip_compress_dbg is set.
-@param strm in/out: compressed stream
-@param flush in: flushing method
-@return deflate() status: Z_OK, Z_BUF_ERROR, ... */
+@param strm in/out: compressed stream
+@param flush in: flushing method
+@return deflate() status: Z_OK, Z_BUF_ERROR, ... */
# define deflate(strm, flush) page_zip_compress_deflate(logfile, strm, flush)
/** Declaration of the logfile parameter */
# define FILE_LOGFILE FILE* logfile,
@@ -830,7 +863,7 @@ Log the operation if page_zip_compress_dbg is set.
/**********************************************************************//**
Compress the records of a node pointer page.
-@return Z_OK, or a zlib error code */
+@return Z_OK, or a zlib error code */
static
int
page_zip_compress_node_ptrs(
@@ -845,12 +878,12 @@ page_zip_compress_node_ptrs(
mem_heap_t* heap) /*!< in: temporary memory heap */
{
int err = Z_OK;
- ulint* offsets = NULL;
+ offset_t* offsets = NULL;
do {
const rec_t* rec = *recs++;
- offsets = rec_get_offsets(rec, index, offsets,
+ offsets = rec_get_offsets(rec, index, offsets, false,
ULINT_UNDEFINED, &heap);
/* Only leaf nodes may contain externally stored columns. */
ut_ad(!rec_offs_any_extern(offsets));
@@ -896,7 +929,7 @@ page_zip_compress_node_ptrs(
/**********************************************************************//**
Compress the records of a leaf node of a secondary index.
-@return Z_OK, or a zlib error code */
+@return Z_OK, or a zlib error code */
static
int
page_zip_compress_sec(
@@ -942,7 +975,7 @@ page_zip_compress_sec(
/**********************************************************************//**
Compress a record of a leaf node of a clustered index that contains
externally stored columns.
-@return Z_OK, or a zlib error code */
+@return Z_OK, or a zlib error code */
static
int
page_zip_compress_clust_ext(
@@ -950,7 +983,7 @@ page_zip_compress_clust_ext(
FILE_LOGFILE
z_stream* c_stream, /*!< in/out: compressed page stream */
const rec_t* rec, /*!< in: record */
- const ulint* offsets, /*!< in: rec_get_offsets(rec) */
+ const offset_t* offsets, /*!< in: rec_get_offsets(rec) */
ulint trx_id_col, /*!< in: position of of DB_TRX_ID */
byte* deleted, /*!< in: dense directory entry pointing
to the head of the free list */
@@ -1069,7 +1102,7 @@ page_zip_compress_clust_ext(
/**********************************************************************//**
Compress the records of a leaf node of a clustered index.
-@return Z_OK, or a zlib error code */
+@return Z_OK, or a zlib error code */
static
int
page_zip_compress_clust(
@@ -1089,7 +1122,7 @@ page_zip_compress_clust(
mem_heap_t* heap) /*!< in: temporary memory heap */
{
int err = Z_OK;
- ulint* offsets = NULL;
+ offset_t* offsets = NULL;
/* BTR_EXTERN_FIELD_REF storage */
byte* externs = storage - n_dense
* (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
@@ -1099,7 +1132,7 @@ page_zip_compress_clust(
do {
const rec_t* rec = *recs++;
- offsets = rec_get_offsets(rec, index, offsets,
+ offsets = rec_get_offsets(rec, index, offsets, true,
ULINT_UNDEFINED, &heap);
ut_ad(rec_offs_n_fields(offsets)
== dict_index_get_n_fields(index));
@@ -1199,54 +1232,66 @@ page_zip_compress_clust(
} while (--n_dense);
func_exit:
- return(err);
-}
+ return(err);}
/**********************************************************************//**
Compress a page.
@return TRUE on success, FALSE on failure; page_zip will be left
intact on failure. */
-UNIV_INTERN
ibool
page_zip_compress(
/*==============*/
- page_zip_des_t* page_zip,/*!< in: size; out: data, n_blobs,
- m_start, m_end, m_nonempty */
- const page_t* page, /*!< in: uncompressed page */
- dict_index_t* index, /*!< in: index of the B-tree node */
- ulint level, /*!< in: compression level */
- mtr_t* mtr) /*!< in: mini-transaction, or NULL */
+ page_zip_des_t* page_zip, /*!< in: size; out: data,
+ n_blobs, m_start, m_end,
+ m_nonempty */
+ const page_t* page, /*!< in: uncompressed page */
+ dict_index_t* index, /*!< in: index of the B-tree
+ node */
+ ulint level, /*!< in: commpression level */
+ const redo_page_compress_t* page_comp_info,
+ /*!< in: used for applying
+ TRUNCATE log
+ record during recovery */
+ mtr_t* mtr) /*!< in/out: mini-transaction,
+ or NULL */
{
- z_stream c_stream;
- int err;
- ulint n_fields;/* number of index fields needed */
- byte* fields; /*!< index field information */
- byte* buf; /*!< compressed payload of the page */
- byte* buf_end;/* end of buf */
- ulint n_dense;
- ulint slot_size;/* amount of uncompressed bytes per record */
- const rec_t** recs; /*!< dense page directory, sorted by address */
- mem_heap_t* heap;
- ulint trx_id_col;
- ulint n_blobs = 0;
- byte* storage;/* storage of uncompressed columns */
-#ifndef UNIV_HOTBACKUP
- const ulonglong ns = my_interval_timer();
-#endif /* !UNIV_HOTBACKUP */
+ z_stream c_stream;
+ int err;
+ ulint n_fields; /* number of index fields
+ needed */
+ byte* fields; /*!< index field information */
+ byte* buf; /*!< compressed payload of the
+ page */
+ byte* buf_end; /* end of buf */
+ ulint n_dense;
+ ulint slot_size; /* amount of uncompressed bytes
+ per record */
+ const rec_t** recs; /*!< dense page directory,
+ sorted by address */
+ mem_heap_t* heap;
+ ulint trx_id_col = ULINT_UNDEFINED;
+ ulint n_blobs = 0;
+ byte* storage; /* storage of uncompressed
+ columns */
+ index_id_t ind_id;
+ const ulonglong ns = my_interval_timer();
#ifdef PAGE_ZIP_COMPRESS_DBG
- FILE* logfile = NULL;
+ FILE* logfile = NULL;
#endif
/* A local copy of srv_cmp_per_index_enabled to avoid reading that
variable multiple times in this function since it can be changed at
anytime. */
- my_bool cmp_per_index_enabled = srv_cmp_per_index_enabled;
+ my_bool cmp_per_index_enabled;
+ cmp_per_index_enabled = srv_cmp_per_index_enabled;
ut_a(page_is_comp(page));
- ut_a(fil_page_get_type(page) == FIL_PAGE_INDEX);
+ ut_a(fil_page_index_page_check(page));
ut_ad(page_simple_validate_new((page_t*) page));
ut_ad(page_zip_simple_validate(page_zip));
- ut_ad(dict_table_is_comp(index->table));
- ut_ad(!dict_index_is_ibuf(index));
+ ut_ad(!index
+ || (index
+ && dict_table_is_comp(index->table)
+ && !dict_index_is_ibuf(index)));
UNIV_MEM_ASSERT_RW(page, UNIV_PAGE_SIZE);
@@ -1266,31 +1311,43 @@ page_zip_compress(
== PAGE_NEW_SUPREMUM);
}
- if (page_is_leaf(page)) {
- n_fields = dict_index_get_n_fields(index);
+ if (truncate_t::s_fix_up_active) {
+ ut_ad(page_comp_info != NULL);
+ n_fields = page_comp_info->n_fields;
+ ind_id = page_comp_info->index_id;
} else {
- n_fields = dict_index_get_n_unique_in_tree(index);
+ if (page_is_leaf(page)) {
+ n_fields = dict_index_get_n_fields(index);
+ } else {
+ n_fields = dict_index_get_n_unique_in_tree_nonleaf(index);
+ }
+ ind_id = index->id;
}
/* The dense directory excludes the infimum and supremum records. */
n_dense = page_dir_get_n_heap(page) - PAGE_HEAP_NO_USER_LOW;
#ifdef PAGE_ZIP_COMPRESS_DBG
if (UNIV_UNLIKELY(page_zip_compress_dbg)) {
- fprintf(stderr, "compress %p %p %lu %lu %lu\n",
- (void*) page_zip, (void*) page,
- (ibool) page_is_leaf(page),
- n_fields, n_dense);
+ ib::info() << "compress "
+ << static_cast<void*>(page_zip) << " "
+ << static_cast<const void*>(page) << " "
+ << page_is_leaf(page) << " "
+ << n_fields << " " << n_dense;
}
+
if (UNIV_UNLIKELY(page_zip_compress_log)) {
/* Create a log file for every compression attempt. */
char logfilename[9];
- ut_snprintf(logfilename, sizeof logfilename,
- "%08x", page_zip_compress_log++);
+ snprintf(logfilename, sizeof logfilename,
+ "%08x", page_zip_compress_log++);
logfile = fopen(logfilename, "wb");
if (logfile) {
/* Write the uncompressed page to the log. */
- fwrite(page, 1, UNIV_PAGE_SIZE, logfile);
+ if (fwrite(page, 1, UNIV_PAGE_SIZE, logfile)
+ != UNIV_PAGE_SIZE) {
+ perror("fwrite");
+ }
/* Record the compressed size as zero.
This will be overwritten at successful exit. */
putc(0, logfile);
@@ -1300,14 +1357,12 @@ page_zip_compress(
}
}
#endif /* PAGE_ZIP_COMPRESS_DBG */
-#ifndef UNIV_HOTBACKUP
page_zip_stat[page_zip->ssize - 1].compressed++;
if (cmp_per_index_enabled) {
mutex_enter(&page_zip_stat_per_index_mutex);
- page_zip_stat_per_index[index->id].compressed++;
+ page_zip_stat_per_index[ind_id].compressed++;
mutex_exit(&page_zip_stat_per_index_mutex);
}
-#endif /* !UNIV_HOTBACKUP */
if (UNIV_UNLIKELY(n_dense * PAGE_ZIP_DIR_SLOT_SIZE
>= page_zip_get_size(page_zip))) {
@@ -1328,13 +1383,16 @@ page_zip_compress(
&& strcasecmp(index->table_name, "IBUF_DUMMY") != 0) {
#ifdef UNIV_DEBUG
- fprintf(stderr,
- "InnoDB: Simulating a compression failure"
- " for table %s, index %s, page %lu (%s)\n",
- index->table_name,
- index->name,
- page_get_page_no(page),
- page_is_leaf(page) ? "leaf" : "non-leaf");
+ ib::error()
+ << "Simulating a compression failure"
+ << " for table " << index->table->name
+ << " index "
+ << index->name()
+ << " page "
+ << page_get_page_no(page)
+ << "("
+ << (page_is_leaf(page) ? "leaf" : "non-leaf")
+ << ")";
#endif
@@ -1368,25 +1426,38 @@ page_zip_compress(
ut_a(err == Z_OK);
c_stream.next_out = buf;
+
/* Subtract the space reserved for uncompressed data. */
/* Page header and the end marker of the modification log */
c_stream.avail_out = static_cast<uInt>(buf_end - buf - 1);
/* Dense page directory and uncompressed columns, if any */
if (page_is_leaf(page)) {
- if (dict_index_is_clust(index)) {
- trx_id_col = dict_index_get_sys_col_pos(
- index, DATA_TRX_ID);
- ut_ad(trx_id_col > 0);
- ut_ad(trx_id_col != ULINT_UNDEFINED);
+ if ((index && dict_index_is_clust(index))
+ || (page_comp_info
+ && (page_comp_info->type & DICT_CLUSTERED))) {
+
+ if (index) {
+ trx_id_col = dict_index_get_sys_col_pos(
+ index, DATA_TRX_ID);
+ ut_ad(trx_id_col > 0);
+ ut_ad(trx_id_col != ULINT_UNDEFINED);
+ } else if (page_comp_info
+ && (page_comp_info->type
+ & DICT_CLUSTERED)) {
+ trx_id_col = page_comp_info->trx_id_pos;
+ }
slot_size = PAGE_ZIP_DIR_SLOT_SIZE
+ DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN;
+
} else {
/* Signal the absence of trx_id
in page_zip_fields_encode() */
- ut_ad(dict_index_get_sys_col_pos(index, DATA_TRX_ID)
- == ULINT_UNDEFINED);
+ if (index) {
+ ut_ad(dict_index_get_sys_col_pos(
+ index, DATA_TRX_ID) == ULINT_UNDEFINED);
+ }
trx_id_col = 0;
slot_size = PAGE_ZIP_DIR_SLOT_SIZE;
}
@@ -1401,9 +1472,20 @@ page_zip_compress(
}
c_stream.avail_out -= static_cast<uInt>(n_dense * slot_size);
- c_stream.avail_in = static_cast<uInt>(
- page_zip_fields_encode(n_fields, index, trx_id_col, fields));
+ if (truncate_t::s_fix_up_active) {
+ ut_ad(page_comp_info != NULL);
+ c_stream.avail_in = static_cast<uInt>(
+ page_comp_info->field_len);
+ for (ulint i = 0; i < page_comp_info->field_len; i++) {
+ fields[i] = page_comp_info->fields[i];
+ }
+ } else {
+ c_stream.avail_in = static_cast<uInt>(
+ page_zip_fields_encode(
+ n_fields, index, trx_id_col, fields));
+ }
c_stream.next_in = fields;
+
if (UNIV_LIKELY(!trx_id_col)) {
trx_id_col = ULINT_UNDEFINED;
}
@@ -1475,21 +1557,19 @@ err_exit:
fclose(logfile);
}
#endif /* PAGE_ZIP_COMPRESS_DBG */
-#ifndef UNIV_HOTBACKUP
- if (page_is_leaf(page)) {
+ if (page_is_leaf(page) && index) {
dict_index_zip_failure(index);
}
- const ullint time_diff = (my_interval_timer() - ns) / 1000;
+ const uint64_t time_diff = (my_interval_timer() - ns) / 1000;
page_zip_stat[page_zip->ssize - 1].compressed_usec
+= time_diff;
if (cmp_per_index_enabled) {
mutex_enter(&page_zip_stat_per_index_mutex);
- page_zip_stat_per_index[index->id].compressed_usec
+ page_zip_stat_per_index[ind_id].compressed_usec
+= time_diff;
mutex_exit(&page_zip_stat_per_index_mutex);
}
-#endif /* !UNIV_HOTBACKUP */
return(FALSE);
}
@@ -1511,9 +1591,9 @@ err_exit:
#ifdef UNIV_DEBUG
page_zip->m_start =
#endif /* UNIV_DEBUG */
- page_zip->m_end = PAGE_DATA + c_stream.total_out;
+ page_zip->m_end = unsigned(PAGE_DATA + c_stream.total_out);
page_zip->m_nonempty = FALSE;
- page_zip->n_blobs = n_blobs;
+ page_zip->n_blobs = unsigned(n_blobs);
/* Copy those header fields that will not be written
in buf_flush_init_for_writing() */
memcpy(page_zip->data + FIL_PAGE_PREV, page + FIL_PAGE_PREV,
@@ -1530,9 +1610,7 @@ err_exit:
#endif /* UNIV_ZIP_DEBUG */
if (mtr) {
-#ifndef UNIV_HOTBACKUP
page_zip_compress_write_log(page_zip, page, index, mtr);
-#endif /* !UNIV_HOTBACKUP */
}
UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
@@ -1543,58 +1621,30 @@ err_exit:
byte sz[4];
mach_write_to_4(sz, c_stream.total_out);
fseek(logfile, UNIV_PAGE_SIZE, SEEK_SET);
- fwrite(sz, 1, sizeof sz, logfile);
+ if (fwrite(sz, 1, sizeof sz, logfile) != sizeof sz) {
+ perror("fwrite");
+ }
fclose(logfile);
}
#endif /* PAGE_ZIP_COMPRESS_DBG */
-#ifndef UNIV_HOTBACKUP
- const ullint time_diff = (my_interval_timer() - ns) / 1000;
+ const uint64_t time_diff = (my_interval_timer() - ns) / 1000;
page_zip_stat[page_zip->ssize - 1].compressed_ok++;
page_zip_stat[page_zip->ssize - 1].compressed_usec += time_diff;
if (cmp_per_index_enabled) {
mutex_enter(&page_zip_stat_per_index_mutex);
- page_zip_stat_per_index[index->id].compressed_ok++;
- page_zip_stat_per_index[index->id].compressed_usec += time_diff;
+ page_zip_stat_per_index[ind_id].compressed_ok++;
+ page_zip_stat_per_index[ind_id].compressed_usec += time_diff;
mutex_exit(&page_zip_stat_per_index_mutex);
}
- if (page_is_leaf(page)) {
+ if (page_is_leaf(page) && !truncate_t::s_fix_up_active) {
dict_index_zip_success(index);
}
-#endif /* !UNIV_HOTBACKUP */
return(TRUE);
}
/**********************************************************************//**
-Compare two page directory entries.
-@return positive if rec1 > rec2 */
-UNIV_INLINE
-ibool
-page_zip_dir_cmp(
-/*=============*/
- const rec_t* rec1, /*!< in: rec1 */
- const rec_t* rec2) /*!< in: rec2 */
-{
- return(rec1 > rec2);
-}
-
-/**********************************************************************//**
-Sort the dense page directory by address (heap_no). */
-static
-void
-page_zip_dir_sort(
-/*==============*/
- rec_t** arr, /*!< in/out: dense page directory */
- rec_t** aux_arr,/*!< in/out: work area */
- ulint low, /*!< in: lower bound of the sorting area, inclusive */
- ulint high) /*!< in: upper bound of the sorting area, exclusive */
-{
- UT_SORT_FUNCTION_BODY(page_zip_dir_sort, arr, aux_arr, low, high,
- page_zip_dir_cmp);
-}
-
-/**********************************************************************//**
Deallocate the index information initialized by page_zip_fields_decode(). */
static
void
@@ -1613,16 +1663,17 @@ page_zip_fields_free(
/**********************************************************************//**
Read the index information for the compressed page.
-@return own: dummy index describing the page, or NULL on error */
+@return own: dummy index describing the page, or NULL on error */
static
dict_index_t*
page_zip_fields_decode(
/*===================*/
const byte* buf, /*!< in: index information */
const byte* end, /*!< in: end of buf */
- ulint* trx_id_col)/*!< in: NULL for non-leaf pages;
+ ulint* trx_id_col,/*!< in: NULL for non-leaf pages;
for leaf pages, pointer to where to store
the position of the trx_id column */
+ bool is_spatial)/*< in: is spatial index or not */
{
const byte* b;
ulint n;
@@ -1654,12 +1705,12 @@ page_zip_fields_decode(
return(NULL);
}
- table = dict_mem_table_create("ZIP_DUMMY", DICT_HDR_SPACE, n,
+ table = dict_mem_table_create("ZIP_DUMMY", DICT_HDR_SPACE, n, 0,
DICT_TF_COMPACT, 0);
index = dict_mem_index_create("ZIP_DUMMY", "ZIP_DUMMY",
DICT_HDR_SPACE, 0, n);
index->table = table;
- index->n_uniq = n;
+ index->n_uniq = unsigned(n);
/* avoid ut_ad(index->cached) in dict_index_get_n_unique_in_tree */
index->cached = TRUE;
@@ -1718,19 +1769,23 @@ page_zip_fields_decode(
page_zip_fields_free(index);
index = NULL;
} else {
- index->n_nullable = val;
+ index->n_nullable = unsigned(val);
}
}
ut_ad(b == end);
+ if (is_spatial) {
+ index->type |= DICT_SPATIAL;
+ }
+
return(index);
}
/**********************************************************************//**
Populate the sparse page directory from the dense directory.
-@return TRUE on success, FALSE on failure */
-static
+@return TRUE on success, FALSE on failure */
+static MY_ATTRIBUTE((nonnull, warn_unused_result))
ibool
page_zip_dir_decode(
/*================*/
@@ -1741,9 +1796,8 @@ page_zip_dir_decode(
filled in */
rec_t** recs, /*!< out: dense page directory sorted by
ascending address (and heap_no) */
- rec_t** recs_aux,/*!< in/out: scratch area */
ulint n_dense)/*!< in: number of user records, and
- size of recs[] and recs_aux[] */
+ size of recs[] */
{
ulint i;
ulint n_recs;
@@ -1818,15 +1872,13 @@ page_zip_dir_decode(
recs[i] = page + offs;
}
- if (UNIV_LIKELY(n_dense > 1)) {
- page_zip_dir_sort(recs, recs_aux, 0, n_dense);
- }
+ std::sort(recs, recs + n_dense);
return(TRUE);
}
/**********************************************************************//**
Initialize the REC_N_NEW_EXTRA_BYTES of each record.
-@return TRUE on success, FALSE on failure */
+@return TRUE on success, FALSE on failure */
static
ibool
page_zip_set_extra_bytes(
@@ -1924,13 +1976,13 @@ page_zip_set_extra_bytes(
/**********************************************************************//**
Apply the modification log to a record containing externally stored
columns. Do not copy the fields that are stored separately.
-@return pointer to modification log, or NULL on failure */
+@return pointer to modification log, or NULL on failure */
static
const byte*
page_zip_apply_log_ext(
/*===================*/
rec_t* rec, /*!< in/out: record */
- const ulint* offsets, /*!< in: rec_get_offsets(rec) */
+ const offset_t* offsets, /*!< in: rec_get_offsets(rec) */
ulint trx_id_col, /*!< in: position of of DB_TRX_ID */
const byte* data, /*!< in: modification log */
const byte* end) /*!< in: end of modification log */
@@ -1979,8 +2031,8 @@ page_zip_apply_log_ext(
- BTR_EXTERN_FIELD_REF_SIZE;
if (UNIV_UNLIKELY(data + len >= end)) {
- page_zip_fail(("page_zip_apply_log_ext: "
- "ext %p+%lu >= %p\n",
+ page_zip_fail(("page_zip_apply_log_ext:"
+ " ext %p+%lu >= %p\n",
(const void*) data,
(ulong) len,
(const void*) end));
@@ -1997,8 +2049,8 @@ page_zip_apply_log_ext(
/* Copy the last bytes of the record. */
len = rec_get_end(rec, offsets) - next_out;
if (UNIV_UNLIKELY(data + len >= end)) {
- page_zip_fail(("page_zip_apply_log_ext: "
- "last %p+%lu >= %p\n",
+ page_zip_fail(("page_zip_apply_log_ext:"
+ " last %p+%lu >= %p\n",
(const void*) data,
(ulong) len,
(const void*) end));
@@ -2013,7 +2065,7 @@ page_zip_apply_log_ext(
/**********************************************************************//**
Apply the modification log to an uncompressed page.
Do not copy the fields that are stored separately.
-@return pointer to end of modification log, or NULL on failure */
+@return pointer to end of modification log, or NULL on failure */
static
const byte*
page_zip_apply_log(
@@ -2024,13 +2076,14 @@ page_zip_apply_log(
sorted by address (indexed by
heap_no - PAGE_HEAP_NO_USER_LOW) */
ulint n_dense,/*!< in: size of recs[] */
+ bool is_leaf,/*!< in: whether this is a leaf page */
ulint trx_id_col,/*!< in: column number of trx_id in the index,
or ULINT_UNDEFINED if none */
ulint heap_status,
/*!< in: heap_no and status bits for
the next record to uncompress */
dict_index_t* index, /*!< in: index of the page */
- ulint* offsets)/*!< in/out: work area for
+ offset_t* offsets)/*!< in/out: work area for
rec_get_offsets_reverse() */
{
const byte* const end = data + size;
@@ -2098,8 +2151,8 @@ page_zip_apply_log(
if (val & 1) {
/* Clear the data bytes of the record. */
mem_heap_t* heap = NULL;
- ulint* offs;
- offs = rec_get_offsets(rec, index, offsets,
+ offset_t* offs;
+ offs = rec_get_offsets(rec, index, offsets, is_leaf,
ULINT_UNDEFINED, &heap);
memset(rec, 0, rec_offs_data_size(offs));
@@ -2131,8 +2184,8 @@ page_zip_apply_log(
/* Non-leaf nodes should not contain any
externally stored columns. */
if (UNIV_UNLIKELY(hs & REC_STATUS_NODE_PTR)) {
- page_zip_fail(("page_zip_apply_log: "
- "%lu&REC_STATUS_NODE_PTR\n",
+ page_zip_fail(("page_zip_apply_log:"
+ " %lu&REC_STATUS_NODE_PTR\n",
(ulong) hs));
return(NULL);
}
@@ -2148,8 +2201,8 @@ page_zip_apply_log(
- REC_NODE_PTR_SIZE;
/* Copy the data bytes, except node_ptr. */
if (UNIV_UNLIKELY(data + len >= end)) {
- page_zip_fail(("page_zip_apply_log: "
- "node_ptr %p+%lu >= %p\n",
+ page_zip_fail(("page_zip_apply_log:"
+ " node_ptr %p+%lu >= %p\n",
(const void*) data,
(ulong) len,
(const void*) end));
@@ -2163,8 +2216,8 @@ page_zip_apply_log(
/* Copy all data bytes of
a record in a secondary index. */
if (UNIV_UNLIKELY(data + len >= end)) {
- page_zip_fail(("page_zip_apply_log: "
- "sec %p+%lu >= %p\n",
+ page_zip_fail(("page_zip_apply_log:"
+ " sec %p+%lu >= %p\n",
(const void*) data,
(ulong) len,
(const void*) end));
@@ -2182,8 +2235,8 @@ page_zip_apply_log(
if (UNIV_UNLIKELY(data + l >= end)
|| UNIV_UNLIKELY(len < (DATA_TRX_ID_LEN
+ DATA_ROLL_PTR_LEN))) {
- page_zip_fail(("page_zip_apply_log: "
- "trx_id %p+%lu >= %p\n",
+ page_zip_fail(("page_zip_apply_log:"
+ " trx_id %p+%lu >= %p\n",
(const void*) data,
(ulong) l,
(const void*) end));
@@ -2198,8 +2251,8 @@ page_zip_apply_log(
b = rec + l + (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
len = rec_get_end(rec, offsets) - b;
if (UNIV_UNLIKELY(data + len >= end)) {
- page_zip_fail(("page_zip_apply_log: "
- "clust %p+%lu >= %p\n",
+ page_zip_fail(("page_zip_apply_log:"
+ " clust %p+%lu >= %p\n",
(const void*) data,
(ulong) len,
(const void*) end));
@@ -2214,7 +2267,7 @@ page_zip_apply_log(
/**********************************************************************//**
Set the heap_no in a record, and skip the fixed-size record header
that is not included in the d_stream.
-@return TRUE on success, FALSE if d_stream does not end at rec */
+@return TRUE on success, FALSE if d_stream does not end at rec */
static
ibool
page_zip_decompress_heap_no(
@@ -2239,7 +2292,7 @@ page_zip_decompress_heap_no(
/**********************************************************************//**
Decompress the records of a node pointer page.
-@return TRUE on success, FALSE on failure */
+@return TRUE on success, FALSE on failure */
static
ibool
page_zip_decompress_node_ptrs(
@@ -2250,7 +2303,7 @@ page_zip_decompress_node_ptrs(
sorted by address */
ulint n_dense, /*!< in: size of recs[] */
dict_index_t* index, /*!< in: the index of the page */
- ulint* offsets, /*!< in/out: temporary offsets */
+ offset_t* offsets, /*!< in/out: temporary offsets */
mem_heap_t* heap) /*!< in: temporary memory heap */
{
ulint heap_status = REC_STATUS_NODE_PTR
@@ -2295,7 +2348,7 @@ page_zip_decompress_node_ptrs(
}
/* Read the offsets. The status bits are needed here. */
- offsets = rec_get_offsets(rec, index, offsets,
+ offsets = rec_get_offsets(rec, index, offsets, false,
ULINT_UNDEFINED, &heap);
/* Non-leaf nodes should not have any externally
@@ -2373,7 +2426,7 @@ zlib_done:
}
#ifdef UNIV_DEBUG
- page_zip->m_start = PAGE_DATA + d_stream->total_in;
+ page_zip->m_start = unsigned(PAGE_DATA + d_stream->total_in);
#endif /* UNIV_DEBUG */
/* Apply the modification log. */
@@ -2381,14 +2434,14 @@ zlib_done:
const byte* mod_log_ptr;
mod_log_ptr = page_zip_apply_log(d_stream->next_in,
d_stream->avail_in + 1,
- recs, n_dense,
+ recs, n_dense, false,
ULINT_UNDEFINED, heap_status,
index, offsets);
if (UNIV_UNLIKELY(!mod_log_ptr)) {
return(FALSE);
}
- page_zip->m_end = mod_log_ptr - page_zip->data;
+ page_zip->m_end = unsigned(mod_log_ptr - page_zip->data);
page_zip->m_nonempty = mod_log_ptr != d_stream->next_in;
}
@@ -2412,7 +2465,7 @@ zlib_done:
for (slot = 0; slot < n_dense; slot++) {
rec_t* rec = recs[slot];
- offsets = rec_get_offsets(rec, index, offsets,
+ offsets = rec_get_offsets(rec, index, offsets, false,
ULINT_UNDEFINED, &heap);
/* Non-leaf nodes should not have any externally
stored columns. */
@@ -2428,7 +2481,7 @@ zlib_done:
/**********************************************************************//**
Decompress the records of a leaf node of a secondary index.
-@return TRUE on success, FALSE on failure */
+@return TRUE on success, FALSE on failure */
static
ibool
page_zip_decompress_sec(
@@ -2439,7 +2492,7 @@ page_zip_decompress_sec(
sorted by address */
ulint n_dense, /*!< in: size of recs[] */
dict_index_t* index, /*!< in: the index of the page */
- ulint* offsets) /*!< in/out: temporary offsets */
+ offset_t* offsets) /*!< in/out: temporary offsets */
{
ulint heap_status = REC_STATUS_ORDINARY
| PAGE_HEAP_NO_USER_LOW << REC_HEAP_NO_SHIFT;
@@ -2526,23 +2579,21 @@ zlib_done:
- d_stream->next_out);
}
-#ifdef UNIV_DEBUG
- page_zip->m_start = PAGE_DATA + d_stream->total_in;
-#endif /* UNIV_DEBUG */
+ ut_d(page_zip->m_start = unsigned(PAGE_DATA + d_stream->total_in));
/* Apply the modification log. */
{
const byte* mod_log_ptr;
mod_log_ptr = page_zip_apply_log(d_stream->next_in,
d_stream->avail_in + 1,
- recs, n_dense,
+ recs, n_dense, true,
ULINT_UNDEFINED, heap_status,
index, offsets);
if (UNIV_UNLIKELY(!mod_log_ptr)) {
return(FALSE);
}
- page_zip->m_end = mod_log_ptr - page_zip->data;
+ page_zip->m_end = unsigned(mod_log_ptr - page_zip->data);
page_zip->m_nonempty = mod_log_ptr != d_stream->next_in;
}
@@ -2566,14 +2617,14 @@ zlib_done:
/**********************************************************************//**
Decompress a record of a leaf node of a clustered index that contains
externally stored columns.
-@return TRUE on success */
+@return TRUE on success */
static
ibool
page_zip_decompress_clust_ext(
/*==========================*/
z_stream* d_stream, /*!< in/out: compressed page stream */
rec_t* rec, /*!< in/out: record */
- const ulint* offsets, /*!< in: rec_get_offsets(rec) */
+ const offset_t* offsets, /*!< in: rec_get_offsets(rec) */
ulint trx_id_col) /*!< in: position of of DB_TRX_ID */
{
ulint i;
@@ -2676,7 +2727,7 @@ page_zip_decompress_clust_ext(
/**********************************************************************//**
Compress the records of a leaf node of a clustered index.
-@return TRUE on success, FALSE on failure */
+@return TRUE on success, FALSE on failure */
static
ibool
page_zip_decompress_clust(
@@ -2688,7 +2739,7 @@ page_zip_decompress_clust(
ulint n_dense, /*!< in: size of recs[] */
dict_index_t* index, /*!< in: the index of the page */
ulint trx_id_col, /*!< index of the trx_id column */
- ulint* offsets, /*!< in/out: temporary offsets */
+ offset_t* offsets, /*!< in/out: temporary offsets */
mem_heap_t* heap) /*!< in: temporary memory heap */
{
int err;
@@ -2702,9 +2753,7 @@ page_zip_decompress_clust(
/* Subtract the space reserved for uncompressed data. */
d_stream->avail_in -= static_cast<uInt>(n_dense)
- * (PAGE_ZIP_DIR_SLOT_SIZE
- + DATA_TRX_ID_LEN
- + DATA_ROLL_PTR_LEN);
+ * (PAGE_ZIP_CLUST_LEAF_SLOT_SIZE);
/* Decompress the records in heap_no order. */
for (slot = 0; slot < n_dense; slot++) {
@@ -2740,7 +2789,7 @@ page_zip_decompress_clust(
}
/* Read the offsets. The status bits are needed here. */
- offsets = rec_get_offsets(rec, index, offsets,
+ offsets = rec_get_offsets(rec, index, offsets, true,
ULINT_UNDEFINED, &heap);
/* This is a leaf page in a clustered index. */
@@ -2859,23 +2908,21 @@ zlib_done:
- d_stream->next_out);
}
-#ifdef UNIV_DEBUG
- page_zip->m_start = PAGE_DATA + d_stream->total_in;
-#endif /* UNIV_DEBUG */
+ ut_d(page_zip->m_start = unsigned(PAGE_DATA + d_stream->total_in));
/* Apply the modification log. */
{
const byte* mod_log_ptr;
mod_log_ptr = page_zip_apply_log(d_stream->next_in,
d_stream->avail_in + 1,
- recs, n_dense,
+ recs, n_dense, true,
trx_id_col, heap_status,
index, offsets);
if (UNIV_UNLIKELY(!mod_log_ptr)) {
return(FALSE);
}
- page_zip->m_end = mod_log_ptr - page_zip->data;
+ page_zip->m_end = unsigned(mod_log_ptr - page_zip->data);
page_zip->m_nonempty = mod_log_ptr != d_stream->next_in;
}
@@ -2904,7 +2951,7 @@ zlib_done:
rec_t* rec = recs[slot];
ibool exists = !page_zip_dir_find_free(
page_zip, page_offset(rec));
- offsets = rec_get_offsets(rec, index, offsets,
+ offsets = rec_get_offsets(rec, index, offsets, true,
ULINT_UNDEFINED, &heap);
dst = rec_get_nth_field(rec, offsets,
@@ -2946,8 +2993,8 @@ zlib_done:
(externs < page_zip->data
+ page_zip->m_end)) {
page_zip_fail(("page_zip_"
- "decompress_clust: "
- "%p < %p + %lu\n",
+ "decompress_clust:"
+ " %p < %p + %lu\n",
(const void*) externs,
(const void*)
page_zip->data,
@@ -2976,11 +3023,11 @@ zlib_done:
Decompress a page. This function should tolerate errors on the compressed
page. Instead of letting assertions fail, it will return FALSE if an
inconsistency is detected.
-@return TRUE on success, FALSE on failure */
-UNIV_INTERN
+@return TRUE on success, FALSE on failure */
+static
ibool
-page_zip_decompress(
-/*================*/
+page_zip_decompress_low(
+/*====================*/
page_zip_des_t* page_zip,/*!< in: data, ssize;
out: m_start, m_end, m_nonempty, n_blobs */
page_t* page, /*!< out: uncompressed page, may be trashed */
@@ -2995,10 +3042,7 @@ page_zip_decompress(
ulint n_dense;/* number of user records on the page */
ulint trx_id_col = ULINT_UNDEFINED;
mem_heap_t* heap;
- ulint* offsets;
-#ifndef UNIV_HOTBACKUP
- const ulonglong ns = my_interval_timer();
-#endif /* !UNIV_HOTBACKUP */
+ offset_t* offsets;
ut_ad(page_zip_simple_validate(page_zip));
UNIV_MEM_ASSERT_W(page, UNIV_PAGE_SIZE);
@@ -3017,7 +3061,7 @@ page_zip_decompress(
heap = mem_heap_create(n_dense * (3 * sizeof *recs) + UNIV_PAGE_SIZE);
recs = static_cast<rec_t**>(
- mem_heap_alloc(heap, n_dense * (2 * sizeof *recs)));
+ mem_heap_alloc(heap, n_dense * sizeof *recs));
if (all) {
/* Copy the page header. */
@@ -3052,7 +3096,7 @@ page_zip_decompress(
/* Copy the page directory. */
if (UNIV_UNLIKELY(!page_zip_dir_decode(page_zip, page, recs,
- recs + n_dense, n_dense))) {
+ n_dense))) {
zlib_error:
mem_heap_free(heap);
return(FALSE);
@@ -3081,7 +3125,7 @@ zlib_error:
d_stream.avail_in = static_cast<uInt>(
page_zip_get_size(page_zip) - (PAGE_DATA + 1));
d_stream.next_out = page + PAGE_ZIP_START;
- d_stream.avail_out = UNIV_PAGE_SIZE - PAGE_ZIP_START;
+ d_stream.avail_out = uInt(UNIV_PAGE_SIZE - PAGE_ZIP_START);
if (UNIV_UNLIKELY(inflateInit2(&d_stream, UNIV_PAGE_SIZE_SHIFT)
!= Z_OK)) {
@@ -3105,7 +3149,8 @@ zlib_error:
index = page_zip_fields_decode(
page + PAGE_ZIP_START, d_stream.next_out,
- page_is_leaf(page) ? &trx_id_col : NULL);
+ page_is_leaf(page) ? &trx_id_col : NULL,
+ fil_page_get_type(page) == FIL_PAGE_RTREE);
if (UNIV_UNLIKELY(!index)) {
@@ -3121,10 +3166,10 @@ zlib_error:
ulint n = 1 + 1/* node ptr */ + REC_OFFS_HEADER_SIZE
+ dict_index_get_n_fields(index);
- offsets = static_cast<ulint*>(
+ offsets = static_cast<offset_t*>(
mem_heap_alloc(heap, n * sizeof(ulint)));
- *offsets = n;
+ rec_offs_set_n_alloc(offsets, n);
}
/* Decompress the records in heap_no order. */
@@ -3139,8 +3184,7 @@ zlib_error:
goto err_exit;
}
- info_bits = mach_read_from_4(page + FIL_PAGE_PREV) == FIL_NULL
- ? REC_INFO_MIN_REC_FLAG : 0;
+ info_bits = page_has_prev(page) ? 0 : REC_INFO_MIN_REC_FLAG;
if (UNIV_UNLIKELY(!page_zip_set_extra_bytes(page_zip, page,
info_bits))) {
@@ -3182,7 +3226,32 @@ err_exit:
page_zip_fields_free(index);
mem_heap_free(heap);
-#ifndef UNIV_HOTBACKUP
+
+ return(TRUE);
+}
+
+/**********************************************************************//**
+Decompress a page. This function should tolerate errors on the compressed
+page. Instead of letting assertions fail, it will return FALSE if an
+inconsistency is detected.
+@return TRUE on success, FALSE on failure */
+ibool
+page_zip_decompress(
+/*================*/
+ page_zip_des_t* page_zip,/*!< in: data, ssize;
+ out: m_start, m_end, m_nonempty, n_blobs */
+ page_t* page, /*!< out: uncompressed page, may be trashed */
+ ibool all) /*!< in: TRUE=decompress the whole page;
+ FALSE=verify but do not copy some
+ page header fields that should not change
+ after page creation */
+{
+ const ulonglong ns = my_interval_timer();
+
+ if (!page_zip_decompress_low(page_zip, page, all)) {
+ return(FALSE);
+ }
+
const uint64_t time_diff = (my_interval_timer() - ns) / 1000;
page_zip_stat[page_zip->ssize - 1].decompressed++;
page_zip_stat[page_zip->ssize - 1].decompressed_usec += time_diff;
@@ -3195,7 +3264,6 @@ err_exit:
page_zip_stat_per_index[index_id].decompressed_usec += time_diff;
mutex_exit(&page_zip_stat_per_index_mutex);
}
-#endif /* !UNIV_HOTBACKUP */
/* Update the stat counter for LRU policy. */
buf_LRU_stat_inc_unzip();
@@ -3238,17 +3306,16 @@ page_zip_hexdump_func(
}
/** Dump a block of memory on the standard error stream.
-@param buf in: data
-@param size in: length of the data, in bytes */
+@param buf in: data
+@param size in: length of the data, in bytes */
#define page_zip_hexdump(buf, size) page_zip_hexdump_func(#buf, buf, size)
/** Flag: make page_zip_validate() compare page headers only */
-UNIV_INTERN ibool page_zip_validate_header_only = FALSE;
+ibool page_zip_validate_header_only = FALSE;
/**********************************************************************//**
Check that the compressed and decompressed pages match.
-@return TRUE if valid, FALSE if not */
-UNIV_INTERN
+@return TRUE if valid, FALSE if not */
ibool
page_zip_validate_low(
/*==================*/
@@ -3267,7 +3334,19 @@ page_zip_validate_low(
FIL_PAGE_LSN - FIL_PAGE_PREV)
|| memcmp(page_zip->data + FIL_PAGE_TYPE, page + FIL_PAGE_TYPE, 2)
|| memcmp(page_zip->data + FIL_PAGE_DATA, page + FIL_PAGE_DATA,
- PAGE_DATA - FIL_PAGE_DATA)) {
+ PAGE_ROOT_AUTO_INC)
+ /* The PAGE_ROOT_AUTO_INC can be updated while holding an SX-latch
+ on the clustered index root page (page number 3 in .ibd files).
+ That allows concurrent readers (holding buf_block_t::lock S-latch).
+ Because we do not know what type of a latch our caller is holding,
+ we will ignore the field on clustered index root pages in order
+ to avoid false positives. */
+ || (page_get_page_no(page) != 3/* clustered index root page */
+ && memcmp(&page_zip->data[FIL_PAGE_DATA + PAGE_ROOT_AUTO_INC],
+ &page[FIL_PAGE_DATA + PAGE_ROOT_AUTO_INC], 8))
+ || memcmp(&page_zip->data[FIL_PAGE_DATA + PAGE_HEADER_PRIV_END],
+ &page[FIL_PAGE_DATA + PAGE_HEADER_PRIV_END],
+ PAGE_DATA - FIL_PAGE_DATA - PAGE_HEADER_PRIV_END)) {
page_zip_fail(("page_zip_validate: page header\n"));
page_zip_hexdump(page_zip, sizeof *page_zip);
page_zip_hexdump(page_zip->data, page_zip_get_size(page_zip));
@@ -3283,14 +3362,14 @@ page_zip_validate_low(
/* page_zip_decompress() expects the uncompressed page to be
UNIV_PAGE_SIZE aligned. */
- temp_page_buf = static_cast<byte*>(ut_malloc(2 * UNIV_PAGE_SIZE));
+ temp_page_buf = static_cast<byte*>(ut_malloc_nokey(2 * UNIV_PAGE_SIZE));
temp_page = static_cast<byte*>(ut_align(temp_page_buf, UNIV_PAGE_SIZE));
UNIV_MEM_ASSERT_RW(page, UNIV_PAGE_SIZE);
UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
temp_page_zip = *page_zip;
- valid = page_zip_decompress(&temp_page_zip, temp_page, TRUE);
+ valid = page_zip_decompress_low(&temp_page_zip, temp_page, TRUE);
if (!valid) {
fputs("page_zip_validate(): failed to decompress\n", stderr);
goto func_exit;
@@ -3326,7 +3405,7 @@ page_zip_validate_low(
committed. Let us tolerate that difference when we
are performing a sloppy validation. */
- ulint* offsets;
+ offset_t* offsets;
mem_heap_t* heap;
const rec_t* rec;
const rec_t* trec;
@@ -3348,15 +3427,24 @@ page_zip_validate_low(
/* Only the minimum record flag
differed. Let us ignore it. */
- page_zip_fail(("page_zip_validate: "
- "min_rec_flag "
- "(%s"
- "%lu,%lu,0x%02lx)\n",
+ page_zip_fail(("page_zip_validate:"
+ " min_rec_flag"
+ " (%s" ULINTPF "," ULINTPF
+ ",0x%02x)\n",
sloppy ? "ignored, " : "",
page_get_space_id(page),
page_get_page_no(page),
- (ulong) page[offset]));
- valid = sloppy;
+ page[offset]));
+ /* We don't check for spatial index, since
+ the "minimum record" could be deleted when
+ doing rtr_update_mbr_field.
+ GIS_FIXME: need to validate why
+ rtr_update_mbr_field.() could affect this */
+ if (index && dict_index_is_spatial(index)) {
+ valid = true;
+ } else {
+ valid = sloppy;
+ }
goto func_exit;
}
}
@@ -3367,8 +3455,8 @@ page_zip_validate_low(
while (rec || trec) {
if (page_offset(rec) != page_offset(trec)) {
- page_zip_fail(("page_zip_validate: "
- "PAGE_FREE list: %u!=%u\n",
+ page_zip_fail(("page_zip_validate:"
+ " PAGE_FREE list: %u!=%u\n",
(unsigned) page_offset(rec),
(unsigned) page_offset(trec)));
valid = FALSE;
@@ -3386,11 +3474,12 @@ page_zip_validate_low(
page + PAGE_NEW_INFIMUM, TRUE);
trec = page_rec_get_next_low(
temp_page + PAGE_NEW_INFIMUM, TRUE);
+ ut_d(const bool is_leaf = page_is_leaf(page));
do {
if (page_offset(rec) != page_offset(trec)) {
- page_zip_fail(("page_zip_validate: "
- "record list: 0x%02x!=0x%02x\n",
+ page_zip_fail(("page_zip_validate:"
+ " record list: 0x%02x!=0x%02x\n",
(unsigned) page_offset(rec),
(unsigned) page_offset(trec)));
valid = FALSE;
@@ -3400,15 +3489,15 @@ page_zip_validate_low(
if (index) {
/* Compare the data. */
offsets = rec_get_offsets(
- rec, index, offsets,
+ rec, index, offsets, is_leaf,
ULINT_UNDEFINED, &heap);
if (memcmp(rec - rec_offs_extra_size(offsets),
trec - rec_offs_extra_size(offsets),
rec_offs_size(offsets))) {
page_zip_fail(
- ("page_zip_validate: "
- "record content: 0x%02x",
+ ("page_zip_validate:"
+ " record content: 0x%02x",
(unsigned) page_offset(rec)));
valid = FALSE;
break;
@@ -3437,8 +3526,7 @@ func_exit:
/**********************************************************************//**
Check that the compressed and decompressed pages match.
-@return TRUE if valid, FALSE if not */
-UNIV_INTERN
+@return TRUE if valid, FALSE if not */
ibool
page_zip_validate(
/*==============*/
@@ -3454,7 +3542,7 @@ page_zip_validate(
#ifdef UNIV_DEBUG
/**********************************************************************//**
Assert that the compressed and decompressed page headers match.
-@return TRUE */
+@return TRUE */
static
ibool
page_zip_header_cmp(
@@ -3476,7 +3564,7 @@ page_zip_header_cmp(
/**********************************************************************//**
Write a record on the compressed page that contains externally stored
columns. The data must already have been written to the uncompressed page.
-@return end of modification log */
+@return end of modification log */
static
byte*
page_zip_write_rec_ext(
@@ -3485,7 +3573,7 @@ page_zip_write_rec_ext(
const page_t* page, /*!< in: page containing rec */
const byte* rec, /*!< in: record being written */
dict_index_t* index, /*!< in: record descriptor */
- const ulint* offsets, /*!< in: rec_get_offsets(rec, index) */
+ const offset_t* offsets, /*!< in: rec_get_offsets(rec, index) */
ulint create, /*!< in: nonzero=insert, zero=update */
ulint trx_id_col, /*!< in: position of DB_TRX_ID */
ulint heap_no, /*!< in: heap number of rec */
@@ -3598,14 +3686,13 @@ page_zip_write_rec_ext(
/**********************************************************************//**
Write an entire record on the compressed page. The data must already
have been written to the uncompressed page. */
-UNIV_INTERN
void
page_zip_write_rec(
/*===============*/
page_zip_des_t* page_zip,/*!< in/out: compressed page */
const byte* rec, /*!< in: record being written */
dict_index_t* index, /*!< in: the index the record belongs to */
- const ulint* offsets,/*!< in: rec_get_offsets(rec, index) */
+ const offset_t* offsets,/*!< in: rec_get_offsets(rec, index) */
ulint create) /*!< in: nonzero=insert, zero=update */
{
const page_t* page;
@@ -3614,7 +3701,6 @@ page_zip_write_rec(
ulint heap_no;
byte* slot;
- ut_ad(PAGE_ZIP_MATCH(rec, page_zip));
ut_ad(page_zip_simple_validate(page_zip));
ut_ad(page_zip_get_size(page_zip)
> PAGE_DATA + page_zip_dir_size(page_zip));
@@ -3637,6 +3723,11 @@ page_zip_write_rec(
ut_a(slot);
/* Copy the delete mark. */
if (rec_get_deleted_flag(rec, TRUE)) {
+ /* In delete-marked records, DB_TRX_ID must
+ always refer to an existing undo log record.
+ On non-leaf pages, the delete-mark flag is garbage. */
+ ut_ad(!index->is_primary() || !page_is_leaf(page)
+ || row_get_rec_trx_id(rec, index, offsets));
*slot |= PAGE_ZIP_DIR_SLOT_DEL >> 8;
} else {
*slot &= ~(PAGE_ZIP_DIR_SLOT_DEL >> 8);
@@ -3773,7 +3864,7 @@ page_zip_write_rec(
ut_a(!*data);
ut_ad((ulint) (data - page_zip->data) < page_zip_get_size(page_zip));
- page_zip->m_end = data - page_zip->data;
+ page_zip->m_end = unsigned(data - page_zip->data);
page_zip->m_nonempty = TRUE;
#ifdef UNIV_ZIP_DEBUG
@@ -3783,8 +3874,7 @@ page_zip_write_rec(
/***********************************************************//**
Parses a log record of writing a BLOB pointer of a record.
-@return end of log record or NULL */
-UNIV_INTERN
+@return end of log record or NULL */
byte*
page_zip_parse_write_blob_ptr(
/*==========================*/
@@ -3796,6 +3886,8 @@ page_zip_parse_write_blob_ptr(
ulint offset;
ulint z_offset;
+ ut_ad(ptr != NULL);
+ ut_ad(end_ptr != NULL);
ut_ad(!page == !page_zip);
if (UNIV_UNLIKELY
@@ -3807,9 +3899,9 @@ page_zip_parse_write_blob_ptr(
offset = mach_read_from_2(ptr);
z_offset = mach_read_from_2(ptr + 2);
- if (UNIV_UNLIKELY(offset < PAGE_ZIP_START)
- || UNIV_UNLIKELY(offset >= UNIV_PAGE_SIZE)
- || UNIV_UNLIKELY(z_offset >= UNIV_PAGE_SIZE)) {
+ if (offset < PAGE_ZIP_START
+ || offset >= UNIV_PAGE_SIZE
+ || z_offset >= UNIV_PAGE_SIZE) {
corrupt:
recv_sys->found_corrupt_log = TRUE;
@@ -3817,8 +3909,8 @@ corrupt:
}
if (page) {
- if (UNIV_UNLIKELY(!page_zip)
- || UNIV_UNLIKELY(!page_is_leaf(page))) {
+
+ if (!page_zip || !page_is_leaf(page)) {
goto corrupt;
}
@@ -3843,7 +3935,6 @@ corrupt:
/**********************************************************************//**
Write a BLOB pointer of a record on the leaf page of a clustered index.
The information must already have been updated on the uncompressed page. */
-UNIV_INTERN
void
page_zip_write_blob_ptr(
/*====================*/
@@ -3851,7 +3942,7 @@ page_zip_write_blob_ptr(
const byte* rec, /*!< in/out: record whose data is being
written */
dict_index_t* index, /*!< in: index of the page */
- const ulint* offsets,/*!< in: rec_get_offsets(rec, index) */
+ const offset_t* offsets,/*!< in: rec_get_offsets(rec, index) */
ulint n, /*!< in: column index */
mtr_t* mtr) /*!< in: mini-transaction handle,
or NULL if no logging is needed */
@@ -3862,7 +3953,10 @@ page_zip_write_blob_ptr(
ulint blob_no;
ulint len;
- ut_ad(PAGE_ZIP_MATCH(rec, page_zip));
+ ut_ad(page_zip != NULL);
+ ut_ad(rec != NULL);
+ ut_ad(index != NULL);
+ ut_ad(offsets != NULL);
ut_ad(page_simple_validate_new((page_t*) page));
ut_ad(page_zip_simple_validate(page_zip));
ut_ad(page_zip_get_size(page_zip)
@@ -3889,8 +3983,7 @@ page_zip_write_blob_ptr(
externs = page_zip->data + page_zip_get_size(page_zip)
- (page_dir_get_n_heap(page) - PAGE_HEAP_NO_USER_LOW)
- * (PAGE_ZIP_DIR_SLOT_SIZE
- + DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
+ * PAGE_ZIP_CLUST_LEAF_SLOT_SIZE;
field = rec_get_nth_field(rec, offsets, n, &len);
@@ -3904,7 +3997,6 @@ page_zip_write_blob_ptr(
#endif /* UNIV_ZIP_DEBUG */
if (mtr) {
-#ifndef UNIV_HOTBACKUP
byte* log_ptr = mlog_open(
mtr, 11 + 2 + 2 + BTR_EXTERN_FIELD_REF_SIZE);
if (UNIV_UNLIKELY(!log_ptr)) {
@@ -3920,14 +4012,12 @@ page_zip_write_blob_ptr(
memcpy(log_ptr, externs, BTR_EXTERN_FIELD_REF_SIZE);
log_ptr += BTR_EXTERN_FIELD_REF_SIZE;
mlog_close(mtr, log_ptr);
-#endif /* !UNIV_HOTBACKUP */
}
}
/***********************************************************//**
Parses a log record of writing the node pointer of a record.
-@return end of log record or NULL */
-UNIV_INTERN
+@return end of log record or NULL */
byte*
page_zip_parse_write_node_ptr(
/*==========================*/
@@ -3939,6 +4029,8 @@ page_zip_parse_write_node_ptr(
ulint offset;
ulint z_offset;
+ ut_ad(ptr != NULL);
+ ut_ad(end_ptr!= NULL);
ut_ad(!page == !page_zip);
if (UNIV_UNLIKELY(end_ptr < ptr + (2 + 2 + REC_NODE_PTR_SIZE))) {
@@ -3949,9 +4041,9 @@ page_zip_parse_write_node_ptr(
offset = mach_read_from_2(ptr);
z_offset = mach_read_from_2(ptr + 2);
- if (UNIV_UNLIKELY(offset < PAGE_ZIP_START)
- || UNIV_UNLIKELY(offset >= UNIV_PAGE_SIZE)
- || UNIV_UNLIKELY(z_offset >= UNIV_PAGE_SIZE)) {
+ if (offset < PAGE_ZIP_START
+ || offset >= UNIV_PAGE_SIZE
+ || z_offset >= UNIV_PAGE_SIZE) {
corrupt:
recv_sys->found_corrupt_log = TRUE;
@@ -3964,8 +4056,7 @@ corrupt:
byte* storage;
ulint heap_no;
- if (UNIV_UNLIKELY(!page_zip)
- || UNIV_UNLIKELY(page_is_leaf(page))) {
+ if (!page_zip || page_is_leaf(page)) {
goto corrupt;
}
@@ -4001,7 +4092,6 @@ corrupt:
/**********************************************************************//**
Write the node pointer of a record on a non-leaf compressed page. */
-UNIV_INTERN
void
page_zip_write_node_ptr(
/*====================*/
@@ -4017,7 +4107,6 @@ page_zip_write_node_ptr(
page_t* page = page_align(rec);
#endif /* UNIV_DEBUG */
- ut_ad(PAGE_ZIP_MATCH(rec, page_zip));
ut_ad(page_simple_validate_new(page));
ut_ad(page_zip_simple_validate(page_zip));
ut_ad(page_zip_get_size(page_zip)
@@ -4046,7 +4135,6 @@ page_zip_write_node_ptr(
memcpy(storage, field, REC_NODE_PTR_SIZE);
if (mtr) {
-#ifndef UNIV_HOTBACKUP
byte* log_ptr = mlog_open(mtr,
11 + 2 + 2 + REC_NODE_PTR_SIZE);
if (UNIV_UNLIKELY(!log_ptr)) {
@@ -4062,19 +4150,17 @@ page_zip_write_node_ptr(
memcpy(log_ptr, field, REC_NODE_PTR_SIZE);
log_ptr += REC_NODE_PTR_SIZE;
mlog_close(mtr, log_ptr);
-#endif /* !UNIV_HOTBACKUP */
}
}
/**********************************************************************//**
Write the trx_id and roll_ptr of a record on a B-tree leaf node page. */
-UNIV_INTERN
void
page_zip_write_trx_id_and_roll_ptr(
/*===============================*/
page_zip_des_t* page_zip,/*!< in/out: compressed page */
byte* rec, /*!< in/out: record */
- const ulint* offsets,/*!< in: rec_get_offsets(rec, index) */
+ const offset_t* offsets,/*!< in: rec_get_offsets(rec, index) */
ulint trx_id_col,/*!< in: column number of TRX_ID in rec */
trx_id_t trx_id, /*!< in: transaction identifier */
roll_ptr_t roll_ptr)/*!< in: roll_ptr */
@@ -4086,8 +4172,6 @@ page_zip_write_trx_id_and_roll_ptr(
#endif /* UNIV_DEBUG */
ulint len;
- ut_ad(PAGE_ZIP_MATCH(rec, page_zip));
-
ut_ad(page_simple_validate_new(page));
ut_ad(page_zip_simple_validate(page_zip));
ut_ad(page_zip_get_size(page_zip)
@@ -4143,7 +4227,7 @@ page_zip_clear_rec(
page_zip_des_t* page_zip, /*!< in/out: compressed page */
byte* rec, /*!< in: record to clear */
const dict_index_t* index, /*!< in: index of rec */
- const ulint* offsets) /*!< in: rec_get_offsets(rec, index) */
+ const offset_t* offsets) /*!< in: rec_get_offsets(rec, index) */
{
ulint heap_no;
page_t* page = page_align(rec);
@@ -4170,7 +4254,7 @@ page_zip_clear_rec(
there is an array of node_ptr immediately before the
dense page directory, at the very end of the page. */
storage = page_zip_dir_start(page_zip);
- ut_ad(dict_index_get_n_unique_in_tree(index) ==
+ ut_ad(dict_index_get_n_unique_in_tree_nonleaf(index) ==
rec_offs_n_fields(offsets) - 1);
field = rec_get_nth_field(rec, offsets,
rec_offs_n_fields(offsets) - 1,
@@ -4218,16 +4302,11 @@ page_zip_clear_rec(
} else {
ut_ad(!rec_offs_any_extern(offsets));
}
-
-#ifdef UNIV_ZIP_DEBUG
- ut_a(page_zip_validate(page_zip, page, index));
-#endif /* UNIV_ZIP_DEBUG */
}
/**********************************************************************//**
Write the "deleted" flag of a record on a compressed page. The flag must
already have been written on the uncompressed page. */
-UNIV_INTERN
void
page_zip_rec_set_deleted(
/*=====================*/
@@ -4251,7 +4330,6 @@ page_zip_rec_set_deleted(
/**********************************************************************//**
Write the "owned" flag of a record on a compressed page. The n_owned field
must already have been written on the uncompressed page. */
-UNIV_INTERN
void
page_zip_rec_set_owned(
/*===================*/
@@ -4271,7 +4349,6 @@ page_zip_rec_set_owned(
/**********************************************************************//**
Insert a record to the dense page directory. */
-UNIV_INTERN
void
page_zip_dir_insert(
/*================*/
@@ -4350,14 +4427,13 @@ page_zip_dir_insert(
/**********************************************************************//**
Shift the dense page directory and the array of BLOB pointers
when a record is deleted. */
-UNIV_INTERN
void
page_zip_dir_delete(
/*================*/
page_zip_des_t* page_zip, /*!< in/out: compressed page */
byte* rec, /*!< in: deleted record */
const dict_index_t* index, /*!< in: index of rec */
- const ulint* offsets, /*!< in: rec_get_offsets(rec) */
+ const offset_t* offsets, /*!< in: rec_get_offsets(rec) */
const byte* free) /*!< in: previous start of
the free list */
{
@@ -4377,10 +4453,12 @@ page_zip_dir_delete(
slot_rec = page_zip_dir_find(page_zip, page_offset(rec));
ut_a(slot_rec);
-
+ uint16_t n_recs = page_get_n_recs(page);
+ ut_ad(n_recs);
+ ut_ad(n_recs > 1 || page_get_page_no(page) == index->page);
/* This could not be done before page_zip_dir_find(). */
page_header_set_field(page, page_zip, PAGE_N_RECS,
- (ulint)(page_get_n_recs(page) - 1));
+ n_recs - 1);
if (UNIV_UNLIKELY(!free)) {
/* Make the last slot the start of the free list. */
@@ -4423,8 +4501,7 @@ page_zip_dir_delete(
externs = page_zip->data + page_zip_get_size(page_zip)
- (page_dir_get_n_heap(page) - PAGE_HEAP_NO_USER_LOW)
- * (PAGE_ZIP_DIR_SLOT_SIZE
- + DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
+ * PAGE_ZIP_CLUST_LEAF_SLOT_SIZE;
ext_end = externs - page_zip->n_blobs
* BTR_EXTERN_FIELD_REF_SIZE;
@@ -4448,7 +4525,6 @@ skip_blobs:
/**********************************************************************//**
Add a slot to the dense page directory. */
-UNIV_INTERN
void
page_zip_dir_add_slot(
/*==================*/
@@ -4482,19 +4558,15 @@ page_zip_dir_add_slot(
* (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
externs = stored
- page_zip->n_blobs * BTR_EXTERN_FIELD_REF_SIZE;
- ASSERT_ZERO(externs
- - (PAGE_ZIP_DIR_SLOT_SIZE
- + DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN),
- PAGE_ZIP_DIR_SLOT_SIZE
- + DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
- memmove(externs - (PAGE_ZIP_DIR_SLOT_SIZE
- + DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN),
+ ASSERT_ZERO(externs - PAGE_ZIP_CLUST_LEAF_SLOT_SIZE,
+ PAGE_ZIP_CLUST_LEAF_SLOT_SIZE);
+ memmove(externs - PAGE_ZIP_CLUST_LEAF_SLOT_SIZE,
externs, stored - externs);
} else {
stored = dir
- page_zip->n_blobs * BTR_EXTERN_FIELD_REF_SIZE;
ASSERT_ZERO(stored - PAGE_ZIP_DIR_SLOT_SIZE,
- PAGE_ZIP_DIR_SLOT_SIZE);
+ static_cast<size_t>(PAGE_ZIP_DIR_SLOT_SIZE));
}
/* Move the uncompressed area backwards to make space
@@ -4504,8 +4576,7 @@ page_zip_dir_add_slot(
/***********************************************************//**
Parses a log record of writing to the header of a page.
-@return end of log record or NULL */
-UNIV_INTERN
+@return end of log record or NULL */
byte*
page_zip_parse_write_header(
/*========================*/
@@ -4517,7 +4588,8 @@ page_zip_parse_write_header(
ulint offset;
ulint len;
- ut_ad(ptr && end_ptr);
+ ut_ad(ptr != NULL);
+ ut_ad(end_ptr!= NULL);
ut_ad(!page == !page_zip);
if (UNIV_UNLIKELY(end_ptr < ptr + (1 + 1))) {
@@ -4528,20 +4600,20 @@ page_zip_parse_write_header(
offset = (ulint) *ptr++;
len = (ulint) *ptr++;
- if (UNIV_UNLIKELY(!len) || UNIV_UNLIKELY(offset + len >= PAGE_DATA)) {
+ if (len == 0 || offset + len >= PAGE_DATA) {
corrupt:
recv_sys->found_corrupt_log = TRUE;
return(NULL);
}
- if (UNIV_UNLIKELY(end_ptr < ptr + len)) {
+ if (end_ptr < ptr + len) {
return(NULL);
}
if (page) {
- if (UNIV_UNLIKELY(!page_zip)) {
+ if (!page_zip) {
goto corrupt;
}
@@ -4560,10 +4632,8 @@ corrupt:
return(ptr + len);
}
-#ifndef UNIV_HOTBACKUP
/**********************************************************************//**
Write a log record of writing to the uncompressed header portion of a page. */
-UNIV_INTERN
void
page_zip_write_header_log(
/*======================*/
@@ -4579,6 +4649,7 @@ page_zip_write_header_log(
#if PAGE_DATA > 255
# error "PAGE_DATA > 255"
#endif
+ ut_ad(length > 0);
ut_ad(length < 256);
/* If no logging is requested, we may return now */
@@ -4595,7 +4666,6 @@ page_zip_write_header_log(
mlog_catenate_string(mtr, data, length);
}
-#endif /* !UNIV_HOTBACKUP */
/**********************************************************************//**
Reorganize and compress a page. This is a low-level operation for
@@ -4608,7 +4678,6 @@ bits in the same mini-transaction in such a way that the modification
will be redo-logged.
@return TRUE on success, FALSE on failure; page_zip will be left
intact on failure, but page will be overwritten. */
-UNIV_INTERN
ibool
page_zip_reorganize(
/*================*/
@@ -4619,44 +4688,34 @@ page_zip_reorganize(
dict_index_t* index, /*!< in: index of the B-tree node */
mtr_t* mtr) /*!< in: mini-transaction */
{
-#ifndef UNIV_HOTBACKUP
buf_pool_t* buf_pool = buf_pool_from_block(block);
-#endif /* !UNIV_HOTBACKUP */
page_zip_des_t* page_zip = buf_block_get_page_zip(block);
page_t* page = buf_block_get_frame(block);
buf_block_t* temp_block;
page_t* temp_page;
- ulint log_mode;
ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
ut_ad(page_is_comp(page));
ut_ad(!dict_index_is_ibuf(index));
+ ut_ad(!dict_table_is_temporary(index->table));
/* Note that page_zip_validate(page_zip, page, index) may fail here. */
UNIV_MEM_ASSERT_RW(page, UNIV_PAGE_SIZE);
UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
/* Disable logging */
- log_mode = mtr_set_log_mode(mtr, MTR_LOG_NONE);
+ mtr_log_t log_mode = mtr_set_log_mode(mtr, MTR_LOG_NONE);
-#ifndef UNIV_HOTBACKUP
temp_block = buf_block_alloc(buf_pool);
btr_search_drop_page_hash_index(block);
- block->check_index_page_at_flush = TRUE;
-#else /* !UNIV_HOTBACKUP */
- ut_ad(block == back_block1);
- temp_block = back_block2;
-#endif /* !UNIV_HOTBACKUP */
temp_page = temp_block->frame;
/* Copy the old page to temporary space */
buf_frame_copy(temp_page, page);
- btr_blob_dbg_remove(page, index, "zip_reorg");
-
/* Recreate the page: note that global data on page (possible
segment headers, next page-field, etc.) is preserved intact */
- page_create(block, mtr, TRUE);
+ page_create(block, mtr, TRUE, dict_index_is_spatial(index));
/* Copy the records from the temporary space to the recreated page;
do not copy the lock bits yet */
@@ -4665,39 +4724,40 @@ page_zip_reorganize(
page_get_infimum_rec(temp_page),
index, mtr);
- if (!dict_index_is_clust(index) && page_is_leaf(temp_page)) {
- /* Copy max trx id to recreated page */
- trx_id_t max_trx_id = page_get_max_trx_id(temp_page);
- page_set_max_trx_id(block, NULL, max_trx_id, NULL);
- ut_ad(max_trx_id != 0);
- }
+ /* Copy the PAGE_MAX_TRX_ID or PAGE_ROOT_AUTO_INC. */
+ memcpy(page + (PAGE_HEADER + PAGE_MAX_TRX_ID),
+ temp_page + (PAGE_HEADER + PAGE_MAX_TRX_ID), 8);
+ /* PAGE_MAX_TRX_ID must be set on secondary index leaf pages. */
+ ut_ad(dict_index_is_clust(index) || !page_is_leaf(temp_page)
+ || page_get_max_trx_id(page) != 0);
+ /* PAGE_MAX_TRX_ID must be zero on non-leaf pages other than
+ clustered index root pages. */
+ ut_ad(page_get_max_trx_id(page) == 0
+ || (dict_index_is_clust(index)
+ ? !page_has_siblings(temp_page)
+ : page_is_leaf(temp_page)));
/* Restore logging. */
mtr_set_log_mode(mtr, log_mode);
- if (!page_zip_compress(page_zip, page, index, page_zip_level, mtr)) {
+ if (!page_zip_compress(page_zip, page, index,
+ page_zip_level, NULL, mtr)) {
-#ifndef UNIV_HOTBACKUP
buf_block_free(temp_block);
-#endif /* !UNIV_HOTBACKUP */
return(FALSE);
}
lock_move_reorganize_page(block, temp_block);
-#ifndef UNIV_HOTBACKUP
buf_block_free(temp_block);
-#endif /* !UNIV_HOTBACKUP */
return(TRUE);
}
-#ifndef UNIV_HOTBACKUP
/**********************************************************************//**
Copy the records of a page byte for byte. Do not copy the page header
or trailer, except those B-tree header fields that are directly
related to the storage of records. Also copy PAGE_MAX_TRX_ID.
NOTE: The caller must update the lock table and the adaptive hash index. */
-UNIV_INTERN
void
page_zip_copy_recs(
/*===============*/
@@ -4713,6 +4773,7 @@ page_zip_copy_recs(
ut_ad(mtr_memo_contains_page(mtr, page, MTR_MEMO_PAGE_X_FIX));
ut_ad(mtr_memo_contains_page(mtr, src, MTR_MEMO_PAGE_X_FIX));
ut_ad(!dict_index_is_ibuf(index));
+ ut_ad(!dict_table_is_temporary(index->table));
#ifdef UNIV_ZIP_DEBUG
/* The B-tree operations that call this function may set
FIL_PAGE_PREV or PAGE_LEVEL, causing a temporary min_rec_flag
@@ -4726,11 +4787,6 @@ page_zip_copy_recs(
ut_a(dict_index_is_clust(index));
}
- /* The PAGE_MAX_TRX_ID must be set on leaf pages of secondary
- indexes. It does not matter on other pages. */
- ut_a(dict_index_is_clust(index) || !page_is_leaf(src)
- || page_get_max_trx_id(src));
-
UNIV_MEM_ASSERT_W(page, UNIV_PAGE_SIZE);
UNIV_MEM_ASSERT_W(page_zip->data, page_zip_get_size(page_zip));
UNIV_MEM_ASSERT_RW(src, UNIV_PAGE_SIZE);
@@ -4752,6 +4808,18 @@ page_zip_copy_recs(
memcpy(PAGE_DATA + page_zip->data, PAGE_DATA + src_zip->data,
page_zip_get_size(page_zip) - PAGE_DATA);
+ if (dict_index_is_clust(index)) {
+ /* Reset the PAGE_ROOT_AUTO_INC field when copying
+ from a root page. */
+ memset(PAGE_HEADER + PAGE_ROOT_AUTO_INC + page, 0, 8);
+ memset(PAGE_HEADER + PAGE_ROOT_AUTO_INC + page_zip->data,
+ 0, 8);
+ } else {
+ /* The PAGE_MAX_TRX_ID must be nonzero on leaf pages
+ of secondary indexes, and 0 on others. */
+ ut_ad(!page_is_leaf(src) == !page_get_max_trx_id(src));
+ }
+
/* Copy all fields of src_zip to page_zip, except the pointer
to the compressed data page. */
{
@@ -4763,9 +4831,8 @@ page_zip_copy_recs(
+ page_zip->m_end < page_zip_get_size(page_zip));
if (!page_is_leaf(src)
- && UNIV_UNLIKELY(mach_read_from_4(src + FIL_PAGE_PREV) == FIL_NULL)
- && UNIV_LIKELY(mach_read_from_4(page
- + FIL_PAGE_PREV) != FIL_NULL)) {
+ && UNIV_UNLIKELY(!page_has_prev(src))
+ && UNIV_LIKELY(page_has_prev(page))) {
/* Clear the REC_INFO_MIN_REC_FLAG of the first user record. */
ulint offs = rec_get_next_offs(page + PAGE_NEW_INFIMUM,
TRUE);
@@ -4780,28 +4847,23 @@ page_zip_copy_recs(
#ifdef UNIV_ZIP_DEBUG
ut_a(page_zip_validate(page_zip, page, index));
#endif /* UNIV_ZIP_DEBUG */
- btr_blob_dbg_add(page, index, "page_zip_copy_recs");
-
page_zip_compress_write_log(page_zip, page, index, mtr);
}
-#endif /* !UNIV_HOTBACKUP */
-/**********************************************************************//**
-Parses a log record of compressing an index page.
-@return end of log record or NULL */
-UNIV_INTERN
-byte*
-page_zip_parse_compress(
-/*====================*/
- byte* ptr, /*!< in: buffer */
- byte* end_ptr,/*!< in: buffer end */
- page_t* page, /*!< out: uncompressed page */
- page_zip_des_t* page_zip)/*!< out: compressed page */
+/** Parse and optionally apply MLOG_ZIP_PAGE_COMPRESS.
+@param[in] ptr log record
+@param[in] end_ptr end of log
+@param[in,out] block ROW_FORMAT=COMPRESSED block, or NULL for parsing only
+@return end of log record
+@retval NULL if the log record is incomplete */
+byte* page_zip_parse_compress(const byte* ptr, const byte* end_ptr,
+ buf_block_t* block)
{
ulint size;
ulint trailer_size;
- ut_ad(!page == !page_zip);
+ ut_ad(ptr != NULL);
+ ut_ad(end_ptr!= NULL);
if (UNIV_UNLIKELY(ptr + (2 + 2) > end_ptr)) {
@@ -4818,15 +4880,22 @@ page_zip_parse_compress(
return(NULL);
}
- if (page) {
- if (UNIV_UNLIKELY(!page_zip)
- || UNIV_UNLIKELY(page_zip_get_size(page_zip) < size)) {
+ if (block) {
+ ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
+ page_zip_des_t* page_zip = buf_block_get_page_zip(block);
+ if (!page_zip || page_zip_get_size(page_zip) < size
+ || block->page.id.page_no() < 3) {
corrupt:
recv_sys->found_corrupt_log = TRUE;
return(NULL);
}
+ memset(page_zip->data, 0, page_zip_get_size(page_zip));
+ mach_write_to_4(FIL_PAGE_OFFSET
+ + page_zip->data, block->page.id.page_no());
+ mach_write_to_4(FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID
+ + page_zip->data, block->page.id.space());
memcpy(page_zip->data + FIL_PAGE_PREV, ptr, 4);
memcpy(page_zip->data + FIL_PAGE_NEXT, ptr + 4, 4);
memcpy(page_zip->data + FIL_PAGE_TYPE, ptr + 8, size);
@@ -4836,30 +4905,34 @@ corrupt:
memcpy(page_zip->data + page_zip_get_size(page_zip)
- trailer_size, ptr + 8 + size, trailer_size);
- if (UNIV_UNLIKELY(!page_zip_decompress(page_zip, page,
+ if (UNIV_UNLIKELY(!page_zip_decompress(page_zip, block->frame,
TRUE))) {
goto corrupt;
}
}
- return(ptr + 8 + size + trailer_size);
+ return(const_cast<byte*>(ptr) + 8 + size + trailer_size);
}
#endif /* !UNIV_INNOCHECKSUM */
-/**********************************************************************//**
-Calculate the compressed page checksum.
-@return page checksum */
-UNIV_INTERN
-ulint
+/** Calculate the compressed page checksum.
+@param[in] data compressed page
+@param[in] size size of compressed page
+@param[in] algo algorithm to use
+@return page checksum */
+uint32_t
page_zip_calc_checksum(
-/*===================*/
- const void* data, /*!< in: compressed page */
- ulint size, /*!< in: size of compressed page */
- srv_checksum_algorithm_t algo) /*!< in: algorithm to use */
+ const void* data,
+ ulint size,
+ srv_checksum_algorithm_t algo
+#ifdef INNODB_BUG_ENDIAN_CRC32
+ /** for crc32, use the big-endian bug-compatible crc32 variant */
+ , bool use_legacy_big_endian
+#endif
+)
{
uLong adler;
- ib_uint32_t crc32;
const Bytef* s = static_cast<const byte*>(data);
/* Exclude FIL_PAGE_SPACE_OR_CHKSUM, FIL_PAGE_LSN,
@@ -4868,16 +4941,25 @@ page_zip_calc_checksum(
switch (algo) {
case SRV_CHECKSUM_ALGORITHM_CRC32:
case SRV_CHECKSUM_ALGORITHM_STRICT_CRC32:
-
ut_ad(size > FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID);
-
- crc32 = ut_crc32(s + FIL_PAGE_OFFSET,
- FIL_PAGE_LSN - FIL_PAGE_OFFSET)
+#ifdef INNODB_BUG_ENDIAN_CRC32
+ if (use_legacy_big_endian) {
+ return ut_crc32_legacy_big_endian(s + FIL_PAGE_OFFSET,
+ FIL_PAGE_LSN
+ - FIL_PAGE_OFFSET)
+ ^ ut_crc32_legacy_big_endian(
+ s + FIL_PAGE_TYPE, 2)
+ ^ ut_crc32_legacy_big_endian(
+ s + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID,
+ size
+ - FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID);
+ }
+#endif
+ return ut_crc32(s + FIL_PAGE_OFFSET,
+ FIL_PAGE_LSN - FIL_PAGE_OFFSET)
^ ut_crc32(s + FIL_PAGE_TYPE, 2)
^ ut_crc32(s + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID,
size - FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID);
-
- return((ulint) crc32);
case SRV_CHECKSUM_ALGORITHM_INNODB:
case SRV_CHECKSUM_ALGORITHM_STRICT_INNODB:
ut_ad(size > FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID);
@@ -4890,7 +4972,7 @@ page_zip_calc_checksum(
static_cast<uInt>(size)
- FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID);
- return((ulint) adler);
+ return(uint32_t(adler));
case SRV_CHECKSUM_ALGORITHM_NONE:
case SRV_CHECKSUM_ALGORITHM_STRICT_NONE:
return(BUF_NO_CHECKSUM_MAGIC);
@@ -4902,22 +4984,12 @@ page_zip_calc_checksum(
return(0);
}
-/**********************************************************************//**
-Verify a compressed page's checksum.
-@return TRUE if the stored checksum is valid according to the value of
-innodb_checksum_algorithm */
-UNIV_INTERN
-ibool
-page_zip_verify_checksum(
-/*=====================*/
- const void* data, /*!< in: compressed page */
- ulint size) /*!< in: size of compressed page */
+/** Validate the checksum on a ROW_FORMAT=COMPRESSED page.
+@param data ROW_FORMAT=COMPRESSED page
+@param size size of the page, in bytes
+@return whether the stored checksum matches innodb_checksum_algorithm */
+bool page_zip_verify_checksum(const byte *data, size_t size)
{
- ib_uint32_t stored;
- ib_uint32_t calc;
- ib_uint32_t crc32 = 0 /* silence bogus warning */;
- ib_uint32_t innodb = 0 /* silence bogus warning */;
-
const srv_checksum_algorithm_t curr_algo =
static_cast<srv_checksum_algorithm_t>(srv_checksum_algorithm);
@@ -4925,23 +4997,40 @@ page_zip_verify_checksum(
return true;
}
- bool all_zeroes = true;
- for (size_t i = 0; i < size; i++) {
- if (static_cast<const byte*>(data)[i] != 0) {
- all_zeroes = false;
- break;
- }
+ if (buf_is_zeroes(span<const byte>(data, size))) {
+ return true;
}
- if (all_zeroes) {
- return true;
+ const uint32_t stored = mach_read_from_4(
+ data + FIL_PAGE_SPACE_OR_CHKSUM);
+
+ uint32_t calc = page_zip_calc_checksum(data, size, curr_algo);
+
+#ifdef UNIV_INNOCHECKSUM
+ if (log_file) {
+ fprintf(log_file, "page::%llu;"
+ " %s checksum: calculated = %u;"
+ " recorded = %u\n", cur_page_num,
+ buf_checksum_algorithm_name(
+ static_cast<srv_checksum_algorithm_t>(
+ srv_checksum_algorithm)),
+ calc, stored);
}
- stored = static_cast<ib_uint32_t>(mach_read_from_4(
- static_cast<const unsigned char*>(data) + FIL_PAGE_SPACE_OR_CHKSUM));
+ if (!strict_verify) {
+ const uint32_t crc32 = page_zip_calc_checksum(
+ data, size, SRV_CHECKSUM_ALGORITHM_CRC32);
- calc = static_cast<ib_uint32_t>(page_zip_calc_checksum(
- data, size, curr_algo));
+ if (log_file) {
+ fprintf(log_file, "page::%llu: crc32 checksum:"
+ " calculated = %u; recorded = %u\n",
+ cur_page_num, crc32, stored);
+ fprintf(log_file, "page::%llu: none checksum:"
+ " calculated = %lu; recorded = %u\n",
+ cur_page_num, BUF_NO_CHECKSUM_MAGIC, stored);
+ }
+ }
+#endif /* UNIV_INNOCHECKSUM */
if (stored == calc) {
return(TRUE);
@@ -4949,30 +5038,42 @@ page_zip_verify_checksum(
switch (curr_algo) {
case SRV_CHECKSUM_ALGORITHM_STRICT_CRC32:
+#ifdef INNODB_BUG_ENDIAN_CRC32
+ return stored == page_zip_calc_checksum(data, size, curr_algo,
+ true);
+#endif
+ /* fall through */
case SRV_CHECKSUM_ALGORITHM_STRICT_INNODB:
case SRV_CHECKSUM_ALGORITHM_STRICT_NONE:
- return stored == calc;
+ return FALSE;
case SRV_CHECKSUM_ALGORITHM_CRC32:
if (stored == BUF_NO_CHECKSUM_MAGIC) {
return(TRUE);
}
- crc32 = calc;
- innodb = static_cast<ib_uint32_t>(page_zip_calc_checksum(
- data, size, SRV_CHECKSUM_ALGORITHM_INNODB));
- break;
+ return
+#ifdef INNODB_BUG_ENDIAN_CRC32
+ stored == page_zip_calc_checksum(data, size, curr_algo,
+ true) ||
+#endif
+ stored == page_zip_calc_checksum(
+ data, size, SRV_CHECKSUM_ALGORITHM_INNODB);
case SRV_CHECKSUM_ALGORITHM_INNODB:
if (stored == BUF_NO_CHECKSUM_MAGIC) {
return TRUE;
}
- crc32 = static_cast<ib_uint32_t>(page_zip_calc_checksum(
- data, size, SRV_CHECKSUM_ALGORITHM_CRC32));
- innodb = calc;
- break;
+ return stored == page_zip_calc_checksum(
+ data, size, SRV_CHECKSUM_ALGORITHM_CRC32)
+#ifdef INNODB_BUG_ENDIAN_CRC32
+ || stored == page_zip_calc_checksum(
+ data, size,
+ SRV_CHECKSUM_ALGORITHM_CRC32, true)
+#endif
+ ;
case SRV_CHECKSUM_ALGORITHM_NONE:
return TRUE;
}
- return (stored == crc32 || stored == innodb);
+ return FALSE;
}
diff --git a/storage/innobase/pars/lexyy.cc b/storage/innobase/pars/lexyy.cc
index ed6e2b53c2f..1e93ec3ed50 100644
--- a/storage/innobase/pars/lexyy.cc
+++ b/storage/innobase/pars/lexyy.cc
@@ -356,8 +356,8 @@ static void yynoreturn yy_fatal_error ( const char* msg );
(yy_hold_char) = *yy_cp; \
*yy_cp = '\0'; \
(yy_c_buf_p) = yy_cp;
-#define YY_NUM_RULES 124
-#define YY_END_OF_BUFFER 125
+#define YY_NUM_RULES 102
+#define YY_END_OF_BUFFER 103
/* This struct is not used in this scanner,
but its presence is necessary. */
struct yy_trans_info
@@ -365,55 +365,42 @@ struct yy_trans_info
flex_int32_t yy_verify;
flex_int32_t yy_nxt;
};
-static const flex_int16_t yy_accept[425] =
+static const flex_int16_t yy_accept[307] =
{ 0,
- 0, 0, 119, 119, 0, 0, 0, 0, 125, 123,
- 122, 122, 8, 123, 114, 5, 103, 109, 112, 110,
- 107, 111, 123, 113, 1, 123, 108, 106, 104, 105,
- 117, 96, 96, 96, 96, 96, 96, 96, 96, 96,
- 96, 96, 96, 96, 96, 96, 96, 96, 96, 96,
- 115, 116, 119, 120, 6, 7, 9, 10, 122, 4,
- 98, 118, 2, 1, 3, 99, 100, 102, 101, 0,
- 96, 0, 96, 96, 96, 96, 96, 44, 96, 96,
- 96, 96, 96, 96, 96, 96, 96, 96, 96, 96,
- 96, 96, 96, 96, 28, 17, 25, 96, 96, 96,
-
- 96, 96, 96, 54, 63, 96, 14, 96, 96, 96,
- 96, 96, 96, 96, 96, 96, 96, 96, 96, 96,
- 96, 96, 96, 96, 96, 119, 120, 120, 121, 6,
- 7, 9, 10, 2, 0, 97, 13, 45, 96, 96,
- 96, 96, 96, 96, 96, 96, 96, 96, 96, 96,
- 96, 96, 96, 96, 96, 96, 96, 27, 96, 96,
- 96, 41, 96, 96, 96, 96, 21, 96, 96, 96,
- 96, 96, 15, 96, 96, 96, 18, 96, 96, 96,
- 96, 96, 82, 96, 96, 96, 51, 96, 12, 96,
- 36, 96, 96, 96, 96, 96, 96, 96, 96, 96,
-
- 96, 96, 0, 97, 96, 96, 96, 96, 20, 96,
- 24, 96, 96, 96, 96, 96, 96, 96, 96, 96,
- 96, 96, 46, 96, 96, 30, 96, 89, 96, 96,
- 39, 96, 96, 96, 96, 96, 48, 96, 94, 91,
- 32, 93, 96, 11, 66, 96, 96, 96, 42, 96,
- 96, 96, 96, 96, 96, 96, 96, 96, 96, 29,
- 96, 96, 96, 96, 96, 96, 96, 96, 96, 87,
- 0, 96, 26, 96, 96, 96, 68, 96, 96, 96,
- 96, 37, 96, 96, 96, 96, 96, 96, 96, 31,
- 67, 23, 96, 59, 96, 77, 96, 96, 96, 43,
-
- 96, 96, 96, 96, 96, 96, 96, 96, 92, 96,
- 96, 56, 96, 96, 96, 96, 96, 96, 96, 40,
- 33, 0, 81, 95, 19, 96, 96, 85, 96, 76,
- 55, 96, 65, 96, 52, 96, 96, 96, 47, 96,
- 78, 96, 80, 96, 96, 34, 96, 96, 96, 35,
- 74, 96, 96, 96, 96, 60, 96, 50, 49, 96,
- 96, 96, 57, 53, 64, 96, 96, 96, 22, 96,
- 96, 75, 83, 96, 96, 79, 96, 70, 96, 96,
- 96, 96, 96, 38, 96, 90, 69, 96, 86, 96,
- 96, 96, 88, 96, 96, 61, 96, 16, 96, 72,
-
- 71, 96, 58, 96, 84, 96, 96, 96, 96, 96,
- 96, 96, 96, 96, 96, 73, 96, 96, 96, 96,
- 96, 96, 62, 0
+ 0, 0, 97, 97, 0, 0, 0, 0, 103, 101,
+ 100, 100, 8, 101, 92, 5, 81, 87, 90, 88,
+ 85, 89, 101, 91, 1, 101, 86, 84, 82, 83,
+ 95, 74, 74, 74, 74, 74, 74, 74, 74, 74,
+ 74, 74, 74, 74, 74, 74, 74, 74, 74, 74,
+ 93, 94, 97, 98, 6, 7, 9, 10, 100, 4,
+ 76, 96, 2, 1, 3, 77, 78, 80, 79, 0,
+ 74, 0, 74, 74, 74, 74, 36, 74, 74, 74,
+ 74, 74, 74, 74, 74, 74, 74, 74, 74, 74,
+ 23, 17, 20, 74, 74, 74, 74, 74, 74, 46,
+
+ 52, 74, 14, 74, 74, 74, 74, 74, 74, 74,
+ 74, 74, 74, 74, 74, 74, 74, 74, 97, 98,
+ 98, 99, 6, 7, 9, 10, 2, 0, 75, 13,
+ 37, 74, 74, 74, 74, 74, 74, 74, 74, 74,
+ 74, 74, 74, 74, 74, 22, 74, 74, 34, 74,
+ 74, 74, 74, 18, 74, 74, 74, 74, 74, 15,
+ 74, 74, 74, 74, 74, 74, 74, 43, 74, 12,
+ 74, 74, 74, 74, 74, 74, 74, 74, 74, 74,
+ 0, 75, 74, 74, 19, 74, 74, 74, 74, 74,
+ 74, 74, 74, 74, 74, 38, 25, 74, 67, 74,
+
+ 32, 74, 74, 74, 74, 40, 74, 72, 69, 27,
+ 71, 74, 11, 55, 74, 74, 74, 74, 74, 74,
+ 74, 74, 24, 74, 74, 74, 74, 74, 74, 66,
+ 0, 21, 74, 57, 74, 74, 74, 31, 74, 74,
+ 74, 74, 74, 26, 56, 74, 49, 74, 62, 74,
+ 74, 35, 74, 74, 74, 74, 70, 74, 48, 74,
+ 74, 74, 74, 33, 28, 0, 73, 74, 64, 61,
+ 47, 74, 54, 74, 44, 74, 39, 63, 74, 74,
+ 29, 74, 30, 60, 74, 50, 42, 41, 74, 45,
+ 53, 74, 74, 74, 74, 74, 74, 68, 58, 74,
+
+ 65, 74, 51, 16, 59, 0
} ;
static const YY_CHAR yy_ec[256] =
@@ -426,12 +413,12 @@ static const YY_CHAR yy_ec[256] =
17, 17, 17, 17, 17, 17, 17, 18, 19, 20,
21, 22, 23, 24, 25, 26, 27, 28, 29, 30,
31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
- 41, 42, 43, 44, 45, 46, 47, 48, 49, 50,
- 1, 1, 1, 1, 51, 1, 34, 34, 34, 34,
+ 41, 42, 43, 44, 45, 46, 47, 48, 49, 34,
+ 1, 1, 1, 1, 50, 1, 34, 34, 34, 34,
- 34, 34, 34, 34, 34, 34, 34, 52, 34, 34,
- 34, 34, 53, 34, 54, 34, 34, 34, 34, 34,
- 34, 34, 55, 1, 56, 1, 1, 1, 1, 1,
+ 34, 34, 34, 34, 34, 34, 34, 51, 34, 34,
+ 34, 34, 52, 34, 53, 34, 34, 34, 34, 34,
+ 34, 34, 54, 1, 55, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
@@ -448,438 +435,324 @@ static const YY_CHAR yy_ec[256] =
1, 1, 1, 1, 1
} ;
-static const YY_CHAR yy_meta[57] =
+static const YY_CHAR yy_meta[56] =
{ 0,
1, 1, 1, 2, 3, 1, 1, 4, 1, 1,
5, 1, 1, 1, 1, 6, 7, 1, 1, 1,
8, 1, 1, 6, 9, 9, 9, 9, 9, 9,
9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
- 9, 9, 9, 9, 1, 1
+ 9, 9, 9, 1, 1
} ;
-static const flex_int16_t yy_base[438] =
+static const flex_int16_t yy_base[320] =
{ 0,
- 0, 0, 293, 287, 284, 281, 272, 256, 254, 1357,
- 55, 57, 1357, 0, 1357, 1357, 1357, 1357, 1357, 1357,
- 1357, 1357, 238, 227, 46, 205, 1357, 43, 1357, 203,
- 1357, 46, 50, 56, 52, 66, 64, 51, 81, 92,
- 91, 94, 96, 111, 113, 116, 130, 134, 53, 143,
- 1357, 1357, 0, 106, 0, 212, 0, 210, 141, 0,
- 1357, 1357, 192, 56, 173, 1357, 1357, 1357, 1357, 168,
- 140, 150, 152, 154, 155, 161, 167, 171, 177, 172,
- 184, 174, 188, 189, 191, 194, 203, 212, 215, 217,
- 219, 221, 226, 228, 231, 240, 233, 235, 246, 251,
-
- 258, 253, 255, 256, 269, 271, 278, 272, 285, 283,
- 287, 289, 296, 305, 298, 315, 319, 321, 322, 326,
- 332, 333, 342, 339, 343, 0, 112, 173, 1357, 0,
- 155, 0, 156, 132, 93, 0, 355, 357, 358, 360,
- 364, 367, 374, 370, 379, 380, 389, 383, 390, 392,
- 395, 408, 411, 409, 415, 418, 425, 427, 429, 436,
- 431, 441, 446, 448, 450, 452, 453, 462, 471, 464,
- 473, 474, 478, 485, 488, 490, 491, 494, 500, 501,
- 504, 506, 507, 517, 518, 519, 520, 521, 522, 523,
- 533, 536, 538, 543, 549, 554, 555, 561, 556, 566,
-
- 567, 576, 60, 0, 573, 578, 580, 582, 583, 593,
- 589, 596, 598, 603, 605, 607, 610, 617, 619, 621,
- 622, 628, 633, 634, 635, 639, 640, 649, 650, 652,
- 653, 655, 659, 664, 668, 669, 665, 671, 674, 678,
- 681, 685, 687, 688, 692, 697, 698, 701, 703, 704,
- 707, 708, 717, 713, 728, 730, 724, 740, 734, 745,
- 746, 750, 751, 756, 757, 760, 761, 762, 771, 773,
- 42, 778, 782, 783, 787, 789, 792, 794, 793, 804,
- 805, 808, 809, 810, 819, 823, 826, 828, 829, 830,
- 835, 840, 844, 846, 847, 856, 857, 858, 859, 860,
-
- 863, 872, 873, 878, 879, 882, 885, 889, 894, 895,
- 896, 898, 905, 910, 908, 912, 914, 915, 926, 930,
- 931, 73, 932, 933, 935, 937, 942, 944, 946, 947,
- 948, 949, 951, 958, 961, 965, 967, 972, 978, 979,
- 981, 984, 983, 985, 994, 988, 999, 1000, 1001, 1004,
- 1013, 1015, 1022, 1016, 1019, 1026, 1032, 1033, 1035, 1036,
- 1038, 1039, 1048, 1049, 1050, 1051, 1053, 1054, 1060, 1063,
- 1065, 1066, 1069, 1070, 1072, 1082, 1084, 1085, 1087, 1096,
- 1097, 1098, 1099, 1101, 1113, 1114, 1115, 1116, 1117, 1118,
- 1119, 1128, 1130, 1131, 1134, 1133, 1135, 1137, 1150, 1151,
-
- 1153, 1155, 1157, 1162, 1160, 1167, 1172, 1173, 1174, 1176,
- 1185, 1190, 1183, 1187, 1189, 1199, 1204, 1206, 1208, 1210,
- 1215, 1220, 1222, 1357, 1269, 1278, 1287, 1290, 1293, 1297,
- 1306, 1315, 1324, 1333, 1340, 1344, 1347
+ 0, 0, 262, 259, 249, 244, 239, 234, 236, 960,
+ 54, 56, 960, 0, 960, 960, 960, 960, 960, 960,
+ 960, 960, 217, 220, 45, 186, 960, 42, 960, 184,
+ 960, 45, 49, 55, 51, 65, 80, 50, 69, 94,
+ 90, 92, 104, 60, 114, 116, 131, 134, 135, 149,
+ 960, 960, 0, 61, 0, 194, 0, 197, 133, 0,
+ 960, 960, 163, 53, 143, 960, 960, 960, 960, 147,
+ 125, 123, 138, 151, 152, 153, 155, 166, 169, 173,
+ 170, 171, 176, 180, 193, 182, 200, 204, 206, 209,
+ 210, 211, 213, 224, 225, 226, 235, 240, 242, 245,
+
+ 251, 252, 255, 256, 258, 261, 270, 274, 272, 277,
+ 289, 288, 276, 294, 295, 300, 304, 305, 0, 79,
+ 110, 960, 0, 116, 0, 113, 98, 58, 0, 306,
+ 315, 316, 318, 319, 322, 328, 329, 332, 334, 338,
+ 344, 353, 351, 354, 366, 360, 367, 369, 376, 378,
+ 381, 385, 388, 382, 394, 400, 403, 404, 406, 407,
+ 410, 417, 423, 424, 426, 429, 433, 440, 442, 443,
+ 444, 445, 454, 456, 459, 461, 472, 473, 474, 477,
+ 53, 0, 475, 478, 479, 490, 502, 504, 505, 507,
+ 508, 509, 511, 518, 520, 523, 524, 525, 529, 538,
+
+ 541, 542, 543, 545, 547, 544, 556, 557, 558, 559,
+ 560, 569, 572, 574, 578, 581, 579, 583, 588, 590,
+ 600, 601, 602, 607, 611, 613, 612, 618, 622, 629,
+ 41, 634, 636, 638, 639, 643, 645, 648, 649, 650,
+ 655, 659, 661, 660, 670, 675, 676, 679, 680, 682,
+ 686, 689, 691, 696, 693, 700, 705, 706, 709, 711,
+ 712, 716, 722, 723, 726, 72, 727, 736, 737, 738,
+ 739, 740, 742, 743, 752, 753, 755, 757, 758, 759,
+ 764, 770, 769, 771, 774, 784, 785, 786, 787, 789,
+ 790, 791, 796, 801, 802, 803, 806, 807, 812, 817,
+
+ 816, 823, 826, 828, 832, 960, 872, 881, 890, 893,
+ 896, 900, 909, 918, 927, 936, 943, 947, 950
} ;
-static const flex_int16_t yy_def[438] =
+static const flex_int16_t yy_def[320] =
{ 0,
- 424, 1, 425, 425, 426, 426, 427, 427, 424, 424,
- 424, 424, 424, 428, 424, 424, 424, 424, 424, 424,
- 424, 424, 424, 424, 424, 429, 424, 424, 424, 424,
- 424, 430, 430, 430, 430, 430, 34, 430, 430, 430,
- 430, 430, 430, 430, 430, 430, 430, 430, 430, 430,
- 424, 424, 431, 432, 433, 424, 434, 424, 424, 428,
- 424, 424, 424, 424, 429, 424, 424, 424, 424, 435,
- 430, 436, 430, 430, 430, 430, 430, 430, 430, 430,
- 430, 430, 430, 430, 430, 430, 430, 430, 430, 430,
- 430, 430, 430, 430, 430, 430, 430, 430, 430, 430,
-
- 430, 430, 430, 430, 430, 430, 430, 430, 430, 430,
- 430, 430, 430, 430, 430, 430, 430, 430, 430, 430,
- 430, 430, 430, 430, 430, 431, 432, 432, 424, 433,
- 424, 434, 424, 424, 424, 437, 430, 430, 430, 430,
- 430, 430, 430, 430, 430, 430, 430, 430, 430, 430,
- 430, 430, 430, 430, 430, 430, 430, 430, 430, 430,
- 430, 430, 430, 430, 430, 430, 430, 430, 430, 430,
- 430, 430, 430, 430, 430, 430, 430, 430, 430, 430,
- 430, 430, 430, 430, 430, 430, 430, 430, 430, 430,
- 430, 430, 430, 430, 430, 430, 430, 430, 430, 430,
-
- 430, 430, 424, 437, 430, 430, 430, 430, 430, 430,
- 430, 430, 430, 430, 430, 430, 430, 430, 430, 430,
- 430, 430, 430, 430, 430, 430, 430, 430, 430, 430,
- 430, 430, 430, 430, 430, 430, 430, 430, 430, 430,
- 430, 430, 430, 430, 430, 430, 430, 430, 430, 430,
- 430, 430, 430, 430, 430, 430, 430, 430, 430, 430,
- 430, 430, 430, 430, 430, 430, 430, 430, 430, 430,
- 424, 430, 430, 430, 430, 430, 430, 430, 430, 430,
- 430, 430, 430, 430, 430, 430, 430, 430, 430, 430,
- 430, 430, 430, 430, 430, 430, 430, 430, 430, 430,
-
- 430, 430, 430, 430, 430, 430, 430, 430, 430, 430,
- 430, 430, 430, 430, 430, 430, 430, 430, 430, 430,
- 430, 424, 430, 430, 430, 430, 430, 430, 430, 430,
- 430, 430, 430, 430, 430, 430, 430, 430, 430, 430,
- 430, 430, 430, 430, 430, 430, 430, 430, 430, 430,
- 430, 430, 430, 430, 430, 430, 430, 430, 430, 430,
- 430, 430, 430, 430, 430, 430, 430, 430, 430, 430,
- 430, 430, 430, 430, 430, 430, 430, 430, 430, 430,
- 430, 430, 430, 430, 430, 430, 430, 430, 430, 430,
- 430, 430, 430, 430, 430, 430, 430, 430, 430, 430,
-
- 430, 430, 430, 430, 430, 430, 430, 430, 430, 430,
- 430, 430, 430, 430, 430, 430, 430, 430, 430, 430,
- 430, 430, 430, 0, 424, 424, 424, 424, 424, 424,
- 424, 424, 424, 424, 424, 424, 424
+ 306, 1, 307, 307, 308, 308, 309, 309, 306, 306,
+ 306, 306, 306, 310, 306, 306, 306, 306, 306, 306,
+ 306, 306, 306, 306, 306, 311, 306, 306, 306, 306,
+ 306, 312, 312, 312, 312, 312, 312, 312, 312, 312,
+ 312, 312, 312, 312, 312, 312, 312, 312, 312, 312,
+ 306, 306, 313, 314, 315, 306, 316, 306, 306, 310,
+ 306, 306, 306, 306, 311, 306, 306, 306, 306, 317,
+ 312, 318, 312, 312, 312, 312, 312, 312, 312, 312,
+ 312, 312, 312, 312, 312, 312, 312, 312, 312, 312,
+ 312, 312, 312, 312, 312, 312, 312, 312, 312, 312,
+
+ 312, 312, 312, 312, 312, 312, 312, 312, 312, 312,
+ 312, 312, 312, 312, 312, 312, 312, 312, 313, 314,
+ 314, 306, 315, 306, 316, 306, 306, 306, 319, 312,
+ 312, 312, 312, 312, 312, 312, 312, 312, 312, 312,
+ 312, 312, 312, 312, 312, 312, 312, 312, 312, 312,
+ 312, 312, 312, 312, 312, 312, 312, 312, 312, 312,
+ 312, 312, 312, 312, 312, 312, 312, 312, 312, 312,
+ 312, 312, 312, 312, 312, 312, 312, 312, 312, 312,
+ 306, 319, 312, 312, 312, 312, 312, 312, 312, 312,
+ 312, 312, 312, 312, 312, 312, 312, 312, 312, 312,
+
+ 312, 312, 312, 312, 312, 312, 312, 312, 312, 312,
+ 312, 312, 312, 312, 312, 312, 312, 312, 312, 312,
+ 312, 312, 312, 312, 312, 312, 312, 312, 312, 312,
+ 306, 312, 312, 312, 312, 312, 312, 312, 312, 312,
+ 312, 312, 312, 312, 312, 312, 312, 312, 312, 312,
+ 312, 312, 312, 312, 312, 312, 312, 312, 312, 312,
+ 312, 312, 312, 312, 312, 306, 312, 312, 312, 312,
+ 312, 312, 312, 312, 312, 312, 312, 312, 312, 312,
+ 312, 312, 312, 312, 312, 312, 312, 312, 312, 312,
+ 312, 312, 312, 312, 312, 312, 312, 312, 312, 312,
+
+ 312, 312, 312, 312, 312, 0, 306, 306, 306, 306,
+ 306, 306, 306, 306, 306, 306, 306, 306, 306
} ;
-static const flex_int16_t yy_nxt[1414] =
+static const flex_int16_t yy_nxt[1016] =
{ 0,
10, 11, 12, 13, 10, 14, 15, 16, 17, 18,
19, 20, 21, 22, 23, 24, 25, 26, 27, 28,
29, 30, 31, 10, 32, 33, 34, 35, 36, 37,
38, 38, 39, 38, 38, 40, 41, 42, 43, 44,
38, 45, 46, 47, 48, 49, 50, 38, 38, 38,
- 38, 38, 38, 38, 51, 52, 59, 59, 59, 59,
- 63, 70, 64, 67, 68, 70, 70, 70, 70, 72,
- 63, 70, 64, 72, 72, 72, 72, 123, 75, 72,
- 84, 70, 76, 73, 85, 77, 136, 79, 74, 72,
- 86, 80, 90, 322, 81, 71, 70, 82, 78, 91,
-
- 83, 87, 92, 88, 72, 93, 70, 70, 94, 70,
- 95, 70, 271, 89, 72, 72, 128, 72, 96, 72,
- 98, 129, 424, 97, 99, 104, 70, 424, 70, 101,
- 100, 70, 102, 105, 72, 106, 72, 107, 103, 72,
- 108, 110, 59, 59, 113, 70, 203, 114, 134, 70,
- 111, 112, 109, 72, 118, 70, 115, 72, 70, 133,
- 116, 119, 131, 72, 117, 70, 72, 70, 120, 70,
- 70, 121, 135, 122, 124, 72, 70, 72, 72, 137,
- 138, 125, 70, 128, 72, 140, 70, 70, 129, 70,
- 72, 141, 70, 424, 72, 72, 139, 72, 142, 70,
-
- 72, 144, 150, 70, 70, 143, 70, 72, 134, 70,
- 145, 72, 72, 133, 72, 152, 146, 72, 70, 131,
- 147, 148, 156, 69, 153, 66, 72, 70, 149, 151,
- 70, 154, 70, 155, 70, 72, 70, 62, 72, 158,
- 72, 70, 72, 70, 72, 157, 70, 159, 70, 72,
- 70, 72, 61, 424, 72, 70, 72, 161, 72, 58,
- 160, 70, 162, 72, 163, 164, 70, 165, 70, 72,
- 70, 70, 168, 70, 72, 58, 72, 170, 72, 72,
- 169, 72, 166, 167, 70, 172, 70, 70, 56, 171,
- 174, 56, 72, 70, 72, 72, 173, 54, 70, 175,
-
- 70, 72, 70, 54, 70, 176, 72, 180, 72, 424,
- 72, 70, 72, 70, 183, 177, 424, 178, 424, 72,
- 70, 72, 181, 179, 184, 424, 182, 424, 72, 188,
- 70, 186, 424, 189, 70, 185, 70, 70, 72, 187,
- 190, 70, 72, 424, 72, 72, 193, 70, 70, 72,
- 194, 191, 424, 424, 70, 72, 72, 70, 70, 424,
- 198, 192, 72, 424, 196, 72, 72, 200, 424, 424,
- 70, 201, 70, 70, 197, 70, 195, 199, 72, 70,
- 72, 72, 70, 72, 202, 70, 205, 72, 424, 70,
- 72, 208, 206, 72, 70, 70, 207, 72, 70, 209,
-
- 210, 424, 72, 72, 70, 70, 72, 70, 424, 216,
- 70, 211, 72, 72, 424, 72, 218, 424, 72, 424,
- 424, 212, 213, 70, 70, 214, 70, 217, 215, 424,
- 70, 72, 72, 70, 72, 223, 219, 220, 72, 222,
- 70, 72, 70, 221, 70, 424, 70, 424, 72, 424,
- 72, 70, 72, 226, 72, 230, 70, 227, 224, 72,
- 225, 70, 229, 70, 72, 70, 424, 70, 70, 72,
- 424, 72, 228, 72, 232, 72, 72, 70, 233, 70,
- 234, 236, 231, 424, 424, 72, 70, 72, 70, 70,
- 424, 237, 238, 70, 72, 235, 72, 72, 240, 239,
-
- 70, 72, 242, 70, 424, 70, 70, 243, 72, 70,
- 424, 72, 241, 72, 72, 70, 70, 72, 246, 70,
- 244, 70, 70, 72, 72, 245, 248, 72, 249, 72,
- 72, 247, 70, 70, 70, 70, 70, 70, 70, 250,
- 72, 72, 72, 72, 72, 72, 72, 255, 70, 424,
- 251, 70, 253, 70, 424, 424, 72, 252, 70, 72,
- 424, 72, 256, 258, 70, 257, 72, 424, 254, 70,
- 70, 70, 72, 259, 261, 262, 70, 72, 72, 72,
- 260, 70, 70, 424, 72, 266, 263, 265, 70, 72,
- 72, 70, 424, 70, 264, 70, 72, 70, 70, 72,
-
- 267, 72, 269, 72, 70, 72, 72, 268, 70, 424,
- 270, 70, 72, 70, 272, 273, 72, 274, 70, 72,
- 70, 72, 70, 275, 277, 70, 72, 276, 72, 280,
- 72, 281, 70, 72, 70, 279, 70, 70, 424, 424,
- 72, 278, 72, 70, 72, 72, 286, 284, 70, 70,
- 70, 72, 424, 282, 70, 70, 72, 72, 72, 285,
- 283, 424, 72, 72, 70, 70, 288, 70, 70, 290,
- 70, 287, 72, 72, 70, 72, 72, 424, 72, 70,
- 70, 291, 72, 70, 70, 289, 70, 72, 72, 70,
- 424, 72, 72, 70, 72, 292, 70, 72, 293, 297,
-
- 70, 72, 70, 70, 72, 295, 294, 70, 72, 296,
- 72, 72, 70, 70, 298, 72, 70, 424, 70, 70,
- 72, 72, 70, 70, 72, 299, 72, 72, 70, 302,
- 72, 72, 70, 424, 424, 424, 72, 424, 300, 70,
- 72, 301, 306, 70, 424, 70, 303, 72, 304, 70,
- 305, 72, 307, 72, 308, 70, 424, 72, 309, 424,
- 70, 70, 312, 72, 311, 70, 70, 310, 72, 72,
- 424, 70, 70, 72, 72, 70, 70, 70, 313, 72,
- 72, 314, 424, 72, 72, 72, 70, 317, 70, 319,
- 320, 424, 424, 70, 72, 315, 72, 70, 70, 321,
-
- 316, 72, 70, 318, 70, 72, 72, 70, 70, 70,
- 72, 424, 72, 424, 424, 72, 72, 72, 424, 70,
- 70, 323, 327, 70, 70, 70, 324, 72, 72, 424,
- 329, 72, 72, 72, 70, 325, 328, 331, 70, 326,
- 424, 70, 72, 70, 70, 70, 72, 332, 330, 72,
- 70, 72, 72, 72, 335, 70, 424, 424, 72, 70,
- 333, 70, 70, 72, 334, 336, 337, 72, 424, 72,
- 72, 70, 70, 70, 70, 70, 338, 424, 70, 72,
- 72, 72, 72, 72, 424, 340, 72, 70, 70, 341,
- 339, 424, 343, 70, 70, 72, 72, 70, 424, 344,
-
- 70, 72, 72, 342, 70, 72, 348, 424, 72, 70,
- 70, 70, 72, 70, 424, 346, 345, 72, 72, 72,
- 70, 72, 347, 70, 424, 70, 349, 70, 72, 70,
- 70, 72, 350, 72, 354, 72, 351, 72, 72, 352,
- 356, 70, 353, 358, 355, 70, 70, 70, 70, 72,
- 70, 357, 70, 72, 72, 72, 72, 70, 72, 70,
- 72, 70, 70, 70, 70, 72, 70, 72, 359, 72,
- 72, 72, 72, 70, 72, 424, 70, 424, 424, 361,
- 70, 72, 70, 362, 72, 360, 365, 70, 72, 363,
- 72, 366, 364, 70, 70, 72, 70, 424, 70, 70,
-
- 70, 72, 72, 70, 72, 367, 72, 72, 72, 70,
- 368, 72, 424, 424, 70, 70, 70, 72, 424, 70,
- 369, 370, 72, 72, 72, 424, 374, 72, 70, 371,
- 70, 70, 424, 375, 70, 372, 72, 70, 72, 72,
- 373, 70, 72, 376, 379, 72, 377, 70, 70, 72,
- 70, 70, 424, 70, 70, 72, 72, 378, 72, 72,
- 380, 72, 72, 70, 70, 70, 70, 383, 70, 70,
- 382, 72, 72, 72, 72, 70, 72, 72, 70, 381,
- 70, 70, 424, 72, 70, 70, 72, 70, 72, 72,
- 387, 386, 72, 72, 384, 72, 385, 70, 424, 70,
-
- 70, 424, 70, 424, 389, 72, 388, 72, 72, 390,
- 72, 70, 70, 70, 70, 392, 70, 424, 424, 72,
- 72, 72, 72, 393, 72, 391, 396, 424, 70, 70,
- 70, 70, 70, 70, 70, 394, 72, 72, 72, 72,
- 72, 72, 72, 70, 398, 70, 70, 395, 70, 70,
- 70, 72, 70, 72, 72, 424, 72, 72, 72, 424,
- 72, 399, 403, 397, 404, 70, 70, 400, 70, 401,
- 70, 424, 70, 72, 72, 70, 72, 70, 72, 405,
- 72, 402, 70, 72, 424, 72, 424, 70, 70, 70,
- 72, 70, 406, 424, 407, 72, 72, 72, 70, 72,
-
- 70, 412, 70, 424, 70, 70, 72, 424, 72, 410,
- 72, 408, 72, 72, 70, 409, 424, 413, 414, 70,
- 415, 70, 72, 70, 411, 70, 424, 72, 416, 72,
- 70, 72, 424, 72, 419, 70, 424, 70, 72, 417,
- 418, 424, 424, 72, 420, 72, 424, 424, 421, 424,
- 424, 424, 424, 424, 424, 424, 422, 424, 424, 424,
- 424, 424, 424, 424, 424, 424, 424, 424, 423, 53,
- 53, 53, 53, 53, 53, 53, 53, 53, 55, 55,
- 55, 55, 55, 55, 55, 55, 55, 57, 57, 57,
- 57, 57, 57, 57, 57, 57, 60, 424, 60, 65,
-
- 65, 65, 71, 71, 424, 71, 126, 126, 126, 126,
- 424, 126, 126, 126, 126, 127, 127, 127, 127, 127,
- 127, 127, 127, 127, 130, 130, 130, 424, 130, 130,
- 130, 130, 130, 132, 424, 132, 132, 132, 132, 132,
- 132, 132, 136, 424, 424, 424, 424, 424, 136, 72,
- 72, 424, 72, 204, 424, 204, 9, 424, 424, 424,
- 424, 424, 424, 424, 424, 424, 424, 424, 424, 424,
- 424, 424, 424, 424, 424, 424, 424, 424, 424, 424,
- 424, 424, 424, 424, 424, 424, 424, 424, 424, 424,
- 424, 424, 424, 424, 424, 424, 424, 424, 424, 424,
-
- 424, 424, 424, 424, 424, 424, 424, 424, 424, 424,
- 424, 424, 424
+ 38, 38, 38, 51, 52, 59, 59, 59, 59, 63,
+ 70, 64, 67, 68, 70, 70, 70, 63, 72, 64,
+ 70, 121, 72, 72, 72, 70, 122, 75, 72, 83,
+ 70, 76, 73, 72, 70, 129, 78, 74, 72, 306,
+ 79, 266, 72, 80, 306, 70, 81, 77, 91, 82,
+
+ 84, 104, 85, 72, 231, 70, 92, 70, 87, 70,
+ 181, 93, 86, 72, 127, 72, 126, 72, 88, 70,
+ 121, 89, 94, 124, 90, 122, 95, 72, 97, 70,
+ 98, 70, 96, 100, 59, 59, 99, 72, 70, 72,
+ 70, 101, 105, 102, 107, 103, 70, 108, 72, 70,
+ 70, 128, 106, 70, 72, 111, 109, 72, 72, 116,
+ 110, 72, 112, 306, 70, 130, 70, 70, 70, 113,
+ 70, 114, 72, 115, 72, 72, 72, 131, 72, 127,
+ 117, 70, 132, 133, 70, 70, 70, 118, 70, 72,
+ 134, 70, 72, 72, 72, 70, 72, 70, 140, 72,
+
+ 126, 124, 142, 72, 69, 72, 66, 135, 70, 137,
+ 138, 143, 141, 136, 147, 70, 72, 139, 144, 70,
+ 146, 70, 145, 72, 70, 70, 70, 72, 70, 72,
+ 62, 61, 72, 72, 72, 306, 72, 58, 152, 70,
+ 70, 70, 58, 148, 150, 149, 151, 72, 72, 72,
+ 70, 56, 157, 153, 154, 70, 56, 70, 72, 156,
+ 70, 155, 159, 72, 158, 72, 70, 70, 72, 54,
+ 70, 70, 54, 70, 72, 72, 70, 161, 72, 72,
+ 162, 72, 163, 160, 72, 70, 306, 70, 306, 70,
+ 306, 70, 70, 72, 164, 72, 166, 72, 169, 72,
+
+ 72, 165, 171, 70, 70, 167, 306, 170, 306, 70,
+ 70, 72, 72, 168, 172, 70, 173, 72, 72, 70,
+ 70, 70, 176, 72, 306, 174, 175, 72, 72, 72,
+ 70, 70, 178, 70, 70, 177, 179, 70, 72, 72,
+ 306, 72, 72, 70, 70, 72, 180, 70, 183, 70,
+ 184, 72, 72, 70, 306, 72, 306, 72, 189, 70,
+ 185, 72, 191, 306, 186, 188, 70, 72, 70, 70,
+ 187, 190, 306, 306, 72, 70, 72, 72, 306, 195,
+ 196, 70, 70, 72, 70, 192, 193, 306, 194, 72,
+ 72, 70, 72, 70, 197, 200, 70, 70, 198, 72,
+
+ 70, 72, 306, 70, 72, 72, 306, 202, 72, 70,
+ 199, 72, 306, 203, 201, 70, 204, 72, 70, 70,
+ 206, 70, 70, 72, 207, 70, 72, 72, 208, 72,
+ 72, 205, 70, 72, 211, 306, 212, 209, 70, 70,
+ 72, 70, 306, 210, 70, 213, 72, 72, 70, 72,
+ 216, 215, 72, 306, 214, 70, 72, 70, 70, 70,
+ 70, 219, 306, 72, 218, 72, 72, 72, 72, 70,
+ 217, 70, 306, 306, 70, 306, 70, 72, 306, 72,
+ 222, 224, 72, 220, 72, 226, 221, 70, 70, 70,
+ 70, 223, 70, 70, 70, 72, 72, 72, 72, 225,
+
+ 72, 72, 72, 306, 306, 70, 306, 306, 306, 229,
+ 306, 230, 232, 72, 228, 233, 227, 70, 234, 70,
+ 70, 306, 70, 70, 70, 72, 70, 72, 72, 237,
+ 72, 72, 72, 70, 72, 70, 236, 240, 70, 70,
+ 70, 72, 242, 72, 70, 235, 72, 72, 72, 241,
+ 238, 239, 72, 70, 244, 306, 70, 70, 70, 70,
+ 70, 72, 70, 243, 72, 72, 72, 72, 72, 245,
+ 72, 70, 70, 70, 70, 70, 306, 306, 306, 72,
+ 72, 72, 72, 72, 70, 246, 248, 70, 249, 70,
+ 247, 306, 72, 70, 70, 72, 70, 72, 70, 250,
+
+ 306, 72, 72, 70, 72, 70, 72, 251, 255, 253,
+ 306, 72, 306, 72, 256, 70, 70, 70, 257, 252,
+ 254, 306, 70, 72, 72, 72, 70, 70, 70, 259,
+ 72, 306, 306, 70, 72, 72, 72, 70, 306, 260,
+ 263, 72, 306, 258, 70, 72, 264, 306, 306, 70,
+ 265, 70, 72, 70, 70, 261, 262, 72, 70, 72,
+ 70, 72, 72, 70, 70, 70, 72, 268, 72, 306,
+ 70, 72, 72, 72, 70, 70, 70, 271, 72, 267,
+ 306, 306, 72, 72, 72, 70, 269, 272, 270, 275,
+ 70, 70, 306, 72, 70, 70, 273, 70, 72, 72,
+
+ 274, 70, 72, 72, 70, 72, 70, 276, 70, 72,
+ 306, 70, 72, 278, 72, 70, 72, 282, 280, 72,
+ 70, 70, 277, 72, 70, 306, 70, 70, 72, 72,
+ 279, 70, 72, 281, 72, 72, 306, 70, 70, 72,
+ 286, 70, 70, 283, 287, 72, 72, 284, 285, 72,
+ 72, 70, 70, 70, 70, 70, 306, 70, 70, 72,
+ 72, 72, 72, 72, 288, 72, 72, 70, 70, 306,
+ 70, 291, 70, 70, 70, 72, 72, 289, 72, 70,
+ 72, 72, 72, 290, 70, 70, 70, 72, 306, 70,
+ 306, 292, 72, 72, 72, 293, 295, 72, 296, 70,
+
+ 70, 70, 70, 294, 70, 70, 70, 72, 72, 72,
+ 72, 70, 72, 72, 72, 297, 70, 70, 70, 72,
+ 306, 70, 70, 299, 72, 72, 72, 70, 298, 72,
+ 72, 70, 70, 303, 306, 72, 301, 306, 70, 72,
+ 72, 70, 300, 70, 302, 304, 72, 70, 306, 72,
+ 306, 72, 306, 306, 306, 72, 306, 306, 306, 306,
+ 306, 306, 306, 306, 306, 306, 306, 306, 306, 306,
+ 306, 305, 53, 53, 53, 53, 53, 53, 53, 53,
+ 53, 55, 55, 55, 55, 55, 55, 55, 55, 55,
+ 57, 57, 57, 57, 57, 57, 57, 57, 57, 60,
+
+ 306, 60, 65, 65, 65, 71, 71, 306, 71, 119,
+ 119, 119, 119, 306, 119, 119, 119, 119, 120, 120,
+ 120, 120, 120, 120, 120, 120, 120, 123, 123, 123,
+ 306, 123, 123, 123, 123, 123, 125, 306, 125, 125,
+ 125, 125, 125, 125, 125, 129, 306, 306, 306, 306,
+ 306, 129, 72, 72, 306, 72, 182, 306, 182, 9,
+ 306, 306, 306, 306, 306, 306, 306, 306, 306, 306,
+ 306, 306, 306, 306, 306, 306, 306, 306, 306, 306,
+ 306, 306, 306, 306, 306, 306, 306, 306, 306, 306,
+ 306, 306, 306, 306, 306, 306, 306, 306, 306, 306,
+
+ 306, 306, 306, 306, 306, 306, 306, 306, 306, 306,
+ 306, 306, 306, 306, 306
} ;
-static const flex_int16_t yy_chk[1414] =
+static const flex_int16_t yy_chk[1016] =
{ 0,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 11, 11, 12, 12,
- 25, 32, 25, 28, 28, 33, 38, 35, 49, 32,
- 64, 34, 64, 33, 38, 35, 49, 49, 33, 34,
- 35, 36, 33, 32, 35, 33, 322, 34, 32, 36,
- 35, 34, 37, 271, 34, 37, 39, 34, 33, 37,
-
- 34, 36, 37, 36, 39, 37, 41, 40, 37, 42,
- 39, 43, 203, 36, 41, 40, 54, 42, 39, 43,
- 40, 54, 127, 39, 40, 43, 44, 127, 45, 41,
- 40, 46, 42, 43, 44, 43, 45, 43, 42, 46,
- 43, 45, 59, 59, 46, 47, 135, 46, 134, 48,
- 45, 45, 44, 47, 47, 71, 46, 48, 50, 133,
- 46, 47, 131, 71, 46, 72, 50, 73, 47, 74,
- 75, 48, 70, 48, 50, 73, 76, 74, 75, 73,
- 74, 50, 77, 128, 76, 75, 78, 80, 128, 82,
- 77, 76, 79, 65, 78, 80, 74, 82, 76, 81,
-
- 79, 79, 82, 83, 84, 77, 85, 81, 63, 86,
- 80, 83, 84, 58, 85, 84, 80, 86, 87, 56,
- 81, 81, 86, 30, 84, 26, 87, 88, 81, 83,
- 89, 84, 90, 85, 91, 88, 92, 24, 89, 88,
- 90, 93, 91, 94, 92, 87, 95, 89, 97, 93,
- 98, 94, 23, 9, 95, 96, 97, 91, 98, 8,
- 90, 99, 92, 96, 93, 94, 100, 96, 102, 99,
- 103, 104, 98, 101, 100, 7, 102, 100, 103, 104,
- 99, 101, 96, 96, 105, 101, 106, 108, 6, 100,
- 103, 5, 105, 107, 106, 108, 102, 4, 110, 106,
-
- 109, 107, 111, 3, 112, 107, 110, 110, 109, 0,
- 111, 113, 112, 115, 111, 108, 0, 109, 0, 113,
- 114, 115, 110, 109, 112, 0, 110, 0, 114, 114,
- 116, 113, 0, 115, 117, 112, 118, 119, 116, 113,
- 116, 120, 117, 0, 118, 119, 118, 121, 122, 120,
- 119, 116, 0, 0, 124, 121, 122, 123, 125, 0,
- 122, 117, 124, 0, 121, 123, 125, 124, 0, 0,
- 137, 124, 138, 139, 121, 140, 120, 123, 137, 141,
- 138, 139, 142, 140, 125, 144, 139, 141, 0, 143,
- 142, 142, 140, 144, 145, 146, 141, 143, 148, 143,
-
- 143, 0, 145, 146, 147, 149, 148, 150, 0, 148,
- 151, 144, 147, 149, 0, 150, 150, 0, 151, 0,
- 0, 145, 146, 152, 154, 147, 153, 149, 147, 0,
- 155, 152, 154, 156, 153, 154, 151, 151, 155, 153,
- 157, 156, 158, 152, 159, 0, 161, 0, 157, 0,
- 158, 160, 159, 157, 161, 161, 162, 157, 155, 160,
- 156, 163, 160, 164, 162, 165, 0, 166, 167, 163,
- 0, 164, 159, 165, 164, 166, 167, 168, 165, 170,
- 166, 167, 163, 0, 0, 168, 169, 170, 171, 172,
- 0, 167, 168, 173, 169, 166, 171, 172, 170, 169,
-
- 174, 173, 172, 175, 0, 176, 177, 173, 174, 178,
- 0, 175, 171, 176, 177, 179, 180, 178, 176, 181,
- 174, 182, 183, 179, 180, 175, 179, 181, 180, 182,
- 183, 178, 184, 185, 186, 187, 188, 189, 190, 181,
- 184, 185, 186, 187, 188, 189, 190, 186, 191, 0,
- 182, 192, 184, 193, 0, 0, 191, 183, 194, 192,
- 0, 193, 188, 192, 195, 190, 194, 0, 185, 196,
- 197, 199, 195, 193, 195, 195, 198, 196, 197, 199,
- 194, 200, 201, 0, 198, 198, 195, 197, 205, 200,
- 201, 202, 0, 206, 196, 207, 205, 208, 209, 202,
-
- 199, 206, 201, 207, 211, 208, 209, 200, 210, 0,
- 202, 212, 211, 213, 205, 206, 210, 207, 214, 212,
- 215, 213, 216, 208, 212, 217, 214, 210, 215, 215,
- 216, 216, 218, 217, 219, 214, 220, 221, 0, 0,
- 218, 213, 219, 222, 220, 221, 221, 219, 223, 224,
- 225, 222, 0, 217, 226, 227, 223, 224, 225, 220,
- 218, 0, 226, 227, 228, 229, 224, 230, 231, 227,
- 232, 222, 228, 229, 233, 230, 231, 0, 232, 234,
- 237, 229, 233, 235, 236, 225, 238, 234, 237, 239,
- 0, 235, 236, 240, 238, 230, 241, 239, 232, 236,
-
- 242, 240, 243, 244, 241, 234, 233, 245, 242, 235,
- 243, 244, 246, 247, 238, 245, 248, 0, 249, 250,
- 246, 247, 251, 252, 248, 243, 249, 250, 254, 248,
- 251, 252, 253, 0, 0, 0, 254, 0, 246, 257,
- 253, 247, 253, 255, 0, 256, 250, 257, 251, 259,
- 252, 255, 254, 256, 255, 258, 0, 259, 256, 0,
- 260, 261, 259, 258, 258, 262, 263, 257, 260, 261,
- 0, 264, 265, 262, 263, 266, 267, 268, 261, 264,
- 265, 262, 0, 266, 267, 268, 269, 265, 270, 267,
- 268, 0, 0, 272, 269, 263, 270, 273, 274, 269,
-
- 264, 272, 275, 266, 276, 273, 274, 277, 279, 278,
- 275, 0, 276, 0, 0, 277, 279, 278, 0, 280,
- 281, 272, 278, 282, 283, 284, 274, 280, 281, 0,
- 280, 282, 283, 284, 285, 275, 279, 283, 286, 276,
- 0, 287, 285, 288, 289, 290, 286, 284, 281, 287,
- 291, 288, 289, 290, 287, 292, 0, 0, 291, 293,
- 285, 294, 295, 292, 286, 288, 289, 293, 0, 294,
- 295, 296, 297, 298, 299, 300, 293, 0, 301, 296,
- 297, 298, 299, 300, 0, 297, 301, 302, 303, 298,
- 295, 0, 301, 304, 305, 302, 303, 306, 0, 302,
-
- 307, 304, 305, 299, 308, 306, 306, 0, 307, 309,
- 310, 311, 308, 312, 0, 304, 303, 309, 310, 311,
- 313, 312, 305, 315, 0, 314, 307, 316, 313, 317,
- 318, 315, 308, 314, 314, 316, 310, 317, 318, 311,
- 316, 319, 313, 318, 315, 320, 321, 323, 324, 319,
- 325, 317, 326, 320, 321, 323, 324, 327, 325, 328,
- 326, 329, 330, 331, 332, 327, 333, 328, 319, 329,
- 330, 331, 332, 334, 333, 0, 335, 0, 0, 326,
- 336, 334, 337, 327, 335, 325, 334, 338, 336, 329,
- 337, 336, 332, 339, 340, 338, 341, 0, 343, 342,
-
- 344, 339, 340, 346, 341, 337, 343, 342, 344, 345,
- 338, 346, 0, 0, 347, 348, 349, 345, 0, 350,
- 340, 342, 347, 348, 349, 0, 348, 350, 351, 344,
- 352, 354, 0, 349, 355, 345, 351, 353, 352, 354,
- 347, 356, 355, 352, 355, 353, 353, 357, 358, 356,
- 359, 360, 0, 361, 362, 357, 358, 354, 359, 360,
- 357, 361, 362, 363, 364, 365, 366, 362, 367, 368,
- 361, 363, 364, 365, 366, 369, 367, 368, 370, 360,
- 371, 372, 0, 369, 373, 374, 370, 375, 371, 372,
- 370, 368, 373, 374, 366, 375, 367, 376, 0, 377,
-
- 378, 0, 379, 0, 374, 376, 371, 377, 378, 375,
- 379, 380, 381, 382, 383, 379, 384, 0, 0, 380,
- 381, 382, 383, 380, 384, 377, 383, 0, 385, 386,
- 387, 388, 389, 390, 391, 381, 385, 386, 387, 388,
- 389, 390, 391, 392, 388, 393, 394, 382, 396, 395,
- 397, 392, 398, 393, 394, 0, 396, 395, 397, 0,
- 398, 390, 395, 385, 397, 399, 400, 391, 401, 392,
- 402, 0, 403, 399, 400, 405, 401, 404, 402, 399,
- 403, 394, 406, 405, 0, 404, 0, 407, 408, 409,
- 406, 410, 402, 0, 404, 407, 408, 409, 413, 410,
-
- 411, 410, 414, 0, 415, 412, 413, 0, 411, 408,
- 414, 406, 415, 412, 416, 407, 0, 411, 412, 417,
- 413, 418, 416, 419, 409, 420, 0, 417, 414, 418,
- 421, 419, 0, 420, 418, 422, 0, 423, 421, 415,
- 417, 0, 0, 422, 419, 423, 0, 0, 420, 0,
- 0, 0, 0, 0, 0, 0, 421, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 422, 425,
- 425, 425, 425, 425, 425, 425, 425, 425, 426, 426,
- 426, 426, 426, 426, 426, 426, 426, 427, 427, 427,
- 427, 427, 427, 427, 427, 427, 428, 0, 428, 429,
-
- 429, 429, 430, 430, 0, 430, 431, 431, 431, 431,
- 0, 431, 431, 431, 431, 432, 432, 432, 432, 432,
- 432, 432, 432, 432, 433, 433, 433, 0, 433, 433,
- 433, 433, 433, 434, 0, 434, 434, 434, 434, 434,
- 434, 434, 435, 0, 0, 0, 0, 0, 435, 436,
- 436, 0, 436, 437, 0, 437, 424, 424, 424, 424,
- 424, 424, 424, 424, 424, 424, 424, 424, 424, 424,
- 424, 424, 424, 424, 424, 424, 424, 424, 424, 424,
- 424, 424, 424, 424, 424, 424, 424, 424, 424, 424,
- 424, 424, 424, 424, 424, 424, 424, 424, 424, 424,
-
- 424, 424, 424, 424, 424, 424, 424, 424, 424, 424,
- 424, 424, 424
+ 1, 1, 1, 1, 1, 11, 11, 12, 12, 25,
+ 32, 25, 28, 28, 33, 38, 35, 64, 32, 64,
+ 34, 54, 33, 38, 35, 44, 54, 33, 34, 35,
+ 36, 33, 32, 44, 39, 266, 34, 32, 36, 120,
+ 34, 231, 39, 34, 120, 37, 34, 33, 39, 34,
+
+ 36, 44, 36, 37, 181, 41, 39, 42, 37, 40,
+ 128, 39, 36, 41, 127, 42, 126, 40, 37, 43,
+ 121, 37, 40, 124, 37, 121, 40, 43, 41, 45,
+ 42, 46, 40, 43, 59, 59, 42, 45, 72, 46,
+ 71, 43, 45, 43, 46, 43, 47, 46, 71, 48,
+ 49, 70, 45, 73, 47, 47, 46, 48, 49, 49,
+ 46, 73, 47, 65, 50, 73, 74, 75, 76, 47,
+ 77, 48, 50, 48, 74, 75, 76, 74, 77, 63,
+ 50, 78, 75, 76, 79, 81, 82, 50, 80, 78,
+ 78, 83, 79, 81, 82, 84, 80, 86, 81, 83,
+
+ 58, 56, 83, 84, 30, 86, 26, 79, 85, 80,
+ 80, 83, 82, 79, 86, 87, 85, 80, 83, 88,
+ 85, 89, 84, 87, 90, 91, 92, 88, 93, 89,
+ 24, 23, 90, 91, 92, 9, 93, 8, 92, 94,
+ 95, 96, 7, 87, 89, 88, 90, 94, 95, 96,
+ 97, 6, 96, 92, 92, 98, 5, 99, 97, 95,
+ 100, 94, 97, 98, 96, 99, 101, 102, 100, 4,
+ 103, 104, 3, 105, 101, 102, 106, 99, 103, 104,
+ 102, 105, 103, 98, 106, 107, 0, 109, 0, 108,
+ 0, 113, 110, 107, 104, 109, 106, 108, 108, 113,
+
+ 110, 105, 110, 112, 111, 107, 0, 109, 0, 114,
+ 115, 112, 111, 107, 111, 116, 112, 114, 115, 117,
+ 118, 130, 115, 116, 0, 113, 114, 117, 118, 130,
+ 131, 132, 117, 133, 134, 116, 117, 135, 131, 132,
+ 0, 133, 134, 136, 137, 135, 118, 138, 132, 139,
+ 133, 136, 137, 140, 0, 138, 0, 139, 138, 141,
+ 134, 140, 140, 0, 135, 137, 143, 141, 142, 144,
+ 136, 139, 0, 0, 143, 146, 142, 144, 0, 143,
+ 144, 145, 147, 146, 148, 141, 141, 0, 142, 145,
+ 147, 149, 148, 150, 145, 148, 151, 154, 145, 149,
+
+ 152, 150, 0, 153, 151, 154, 0, 151, 152, 155,
+ 147, 153, 0, 152, 150, 156, 153, 155, 157, 158,
+ 154, 159, 160, 156, 155, 161, 157, 158, 156, 159,
+ 160, 153, 162, 161, 159, 0, 160, 157, 163, 164,
+ 162, 165, 0, 158, 166, 161, 163, 164, 167, 165,
+ 164, 163, 166, 0, 162, 168, 167, 169, 170, 171,
+ 172, 167, 0, 168, 166, 169, 170, 171, 172, 173,
+ 165, 174, 0, 0, 175, 0, 176, 173, 0, 174,
+ 172, 174, 175, 169, 176, 176, 171, 177, 178, 179,
+ 183, 173, 180, 184, 185, 177, 178, 179, 183, 175,
+
+ 180, 184, 185, 0, 0, 186, 0, 0, 0, 179,
+ 0, 180, 183, 186, 178, 184, 177, 187, 186, 188,
+ 189, 0, 190, 191, 192, 187, 193, 188, 189, 189,
+ 190, 191, 192, 194, 193, 195, 188, 192, 196, 197,
+ 198, 194, 194, 195, 199, 187, 196, 197, 198, 193,
+ 190, 191, 199, 200, 198, 0, 201, 202, 203, 206,
+ 204, 200, 205, 195, 201, 202, 203, 206, 204, 200,
+ 205, 207, 208, 209, 210, 211, 0, 0, 0, 207,
+ 208, 209, 210, 211, 212, 202, 204, 213, 205, 214,
+ 203, 0, 212, 215, 217, 213, 216, 214, 218, 207,
+
+ 0, 215, 217, 219, 216, 220, 218, 212, 218, 216,
+ 0, 219, 0, 220, 219, 221, 222, 223, 220, 215,
+ 217, 0, 224, 221, 222, 223, 225, 227, 226, 222,
+ 224, 0, 0, 228, 225, 227, 226, 229, 0, 224,
+ 227, 228, 0, 221, 230, 229, 228, 0, 0, 232,
+ 229, 233, 230, 234, 235, 225, 226, 232, 236, 233,
+ 237, 234, 235, 238, 239, 240, 236, 235, 237, 0,
+ 241, 238, 239, 240, 242, 244, 243, 239, 241, 233,
+ 0, 0, 242, 244, 243, 245, 236, 240, 237, 243,
+ 246, 247, 0, 245, 248, 249, 241, 250, 246, 247,
+
+ 242, 251, 248, 249, 252, 250, 253, 246, 255, 251,
+ 0, 254, 252, 250, 253, 256, 255, 255, 253, 254,
+ 257, 258, 248, 256, 259, 0, 260, 261, 257, 258,
+ 251, 262, 259, 254, 260, 261, 0, 263, 264, 262,
+ 261, 265, 267, 256, 262, 263, 264, 258, 260, 265,
+ 267, 268, 269, 270, 271, 272, 0, 273, 274, 268,
+ 269, 270, 271, 272, 263, 273, 274, 275, 276, 0,
+ 277, 274, 278, 279, 280, 275, 276, 268, 277, 281,
+ 278, 279, 280, 272, 283, 282, 284, 281, 0, 285,
+ 0, 276, 283, 282, 284, 279, 282, 285, 285, 286,
+
+ 287, 288, 289, 280, 290, 291, 292, 286, 287, 288,
+ 289, 293, 290, 291, 292, 289, 294, 295, 296, 293,
+ 0, 297, 298, 293, 294, 295, 296, 299, 292, 297,
+ 298, 301, 300, 297, 0, 299, 295, 0, 302, 301,
+ 300, 303, 294, 304, 296, 300, 302, 305, 0, 303,
+ 0, 304, 0, 0, 0, 305, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 302, 307, 307, 307, 307, 307, 307, 307, 307,
+ 307, 308, 308, 308, 308, 308, 308, 308, 308, 308,
+ 309, 309, 309, 309, 309, 309, 309, 309, 309, 310,
+
+ 0, 310, 311, 311, 311, 312, 312, 0, 312, 313,
+ 313, 313, 313, 0, 313, 313, 313, 313, 314, 314,
+ 314, 314, 314, 314, 314, 314, 314, 315, 315, 315,
+ 0, 315, 315, 315, 315, 315, 316, 0, 316, 316,
+ 316, 316, 316, 316, 316, 317, 0, 0, 0, 0,
+ 0, 317, 318, 318, 0, 318, 319, 0, 319, 306,
+ 306, 306, 306, 306, 306, 306, 306, 306, 306, 306,
+ 306, 306, 306, 306, 306, 306, 306, 306, 306, 306,
+ 306, 306, 306, 306, 306, 306, 306, 306, 306, 306,
+ 306, 306, 306, 306, 306, 306, 306, 306, 306, 306,
+
+ 306, 306, 306, 306, 306, 306, 306, 306, 306, 306,
+ 306, 306, 306, 306, 306
} ;
static yy_state_type yy_last_accepting_state;
@@ -899,8 +772,8 @@ static char *yytext;
#line 1 "pars0lex.l"
/*****************************************************************************
-Copyright (c) 1997, 2011, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2019, MariaDB Corporation.
+Copyright (c) 1997, 2014, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2017, 2019, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -945,7 +818,7 @@ Created 12/14/1997 Heikki Tuuri
#include "mem0mem.h"
#include "os0proc.h"
-#define malloc(A) ut_malloc(A)
+#define malloc(A) ut_malloc_nokey(A)
#define free(A) ut_free(A)
#define realloc(P, A) ut_realloc(P, A)
#define exit(A) ut_error
@@ -983,9 +856,9 @@ string_append(
stringbuf_len += len;
}
-#line 986 "lexyy.cc"
+#line 859 "lexyy.cc"
-#line 988 "lexyy.cc"
+#line 861 "lexyy.cc"
#define INITIAL 0
#define comment 1
@@ -1206,7 +1079,7 @@ YY_DECL
#line 112 "pars0lex.l"
-#line 1209 "lexyy.cc"
+#line 1082 "lexyy.cc"
while ( /*CONSTCOND*/1 ) /* loops until end-of-file is reached */
{
@@ -1233,13 +1106,13 @@ yy_match:
while ( yy_chk[yy_base[yy_current_state] + yy_c] != yy_current_state )
{
yy_current_state = (int) yy_def[yy_current_state];
- if ( yy_current_state >= 425 )
+ if ( yy_current_state >= 307 )
yy_c = yy_meta[yy_c];
}
yy_current_state = yy_nxt[yy_base[yy_current_state] + yy_c];
++yy_cp;
}
- while ( yy_current_state != 424 );
+ while ( yy_current_state != 306 );
yy_cp = (yy_last_accepting_cpos);
yy_current_state = (yy_last_accepting_state);
@@ -1458,561 +1331,407 @@ case 18:
YY_RULE_SETUP
#line 252 "pars0lex.l"
{
- return(PARS_OUT_TOKEN);
+ return(PARS_INT_TOKEN);
}
YY_BREAK
case 19:
YY_RULE_SETUP
#line 256 "pars0lex.l"
{
- return(PARS_BINARY_TOKEN);
+ return(PARS_CHAR_TOKEN);
}
YY_BREAK
case 20:
YY_RULE_SETUP
#line 260 "pars0lex.l"
{
- return(PARS_BLOB_TOKEN);
+ return(PARS_IS_TOKEN);
}
YY_BREAK
case 21:
YY_RULE_SETUP
#line 264 "pars0lex.l"
{
- return(PARS_INT_TOKEN);
+ return(PARS_BEGIN_TOKEN);
}
YY_BREAK
case 22:
YY_RULE_SETUP
#line 268 "pars0lex.l"
{
- return(PARS_INT_TOKEN);
+ return(PARS_END_TOKEN);
}
YY_BREAK
case 23:
YY_RULE_SETUP
#line 272 "pars0lex.l"
{
- return(PARS_FLOAT_TOKEN);
+ return(PARS_IF_TOKEN);
}
YY_BREAK
case 24:
YY_RULE_SETUP
#line 276 "pars0lex.l"
{
- return(PARS_CHAR_TOKEN);
+ return(PARS_THEN_TOKEN);
}
YY_BREAK
case 25:
YY_RULE_SETUP
#line 280 "pars0lex.l"
{
- return(PARS_IS_TOKEN);
+ return(PARS_ELSE_TOKEN);
}
YY_BREAK
case 26:
YY_RULE_SETUP
#line 284 "pars0lex.l"
{
- return(PARS_BEGIN_TOKEN);
+ return(PARS_ELSIF_TOKEN);
}
YY_BREAK
case 27:
YY_RULE_SETUP
#line 288 "pars0lex.l"
{
- return(PARS_END_TOKEN);
+ return(PARS_LOOP_TOKEN);
}
YY_BREAK
case 28:
YY_RULE_SETUP
#line 292 "pars0lex.l"
{
- return(PARS_IF_TOKEN);
+ return(PARS_WHILE_TOKEN);
}
YY_BREAK
case 29:
YY_RULE_SETUP
#line 296 "pars0lex.l"
{
- return(PARS_THEN_TOKEN);
+ return(PARS_RETURN_TOKEN);
}
YY_BREAK
case 30:
YY_RULE_SETUP
#line 300 "pars0lex.l"
{
- return(PARS_ELSE_TOKEN);
+ return(PARS_SELECT_TOKEN);
}
YY_BREAK
case 31:
YY_RULE_SETUP
#line 304 "pars0lex.l"
{
- return(PARS_ELSIF_TOKEN);
+ return(PARS_COUNT_TOKEN);
}
YY_BREAK
case 32:
YY_RULE_SETUP
#line 308 "pars0lex.l"
{
- return(PARS_LOOP_TOKEN);
+ return(PARS_FROM_TOKEN);
}
YY_BREAK
case 33:
YY_RULE_SETUP
#line 312 "pars0lex.l"
{
- return(PARS_WHILE_TOKEN);
+ return(PARS_WHERE_TOKEN);
}
YY_BREAK
case 34:
YY_RULE_SETUP
#line 316 "pars0lex.l"
{
- return(PARS_RETURN_TOKEN);
+ return(PARS_FOR_TOKEN);
}
YY_BREAK
case 35:
YY_RULE_SETUP
#line 320 "pars0lex.l"
{
- return(PARS_SELECT_TOKEN);
+ return(PARS_ORDER_TOKEN);
}
YY_BREAK
case 36:
YY_RULE_SETUP
#line 324 "pars0lex.l"
{
- return(PARS_SUM_TOKEN);
+ return(PARS_BY_TOKEN);
}
YY_BREAK
case 37:
YY_RULE_SETUP
#line 328 "pars0lex.l"
{
- return(PARS_COUNT_TOKEN);
+ return(PARS_ASC_TOKEN);
}
YY_BREAK
case 38:
YY_RULE_SETUP
#line 332 "pars0lex.l"
{
- return(PARS_DISTINCT_TOKEN);
+ return(PARS_DESC_TOKEN);
}
YY_BREAK
case 39:
YY_RULE_SETUP
#line 336 "pars0lex.l"
{
- return(PARS_FROM_TOKEN);
+ return(PARS_INSERT_TOKEN);
}
YY_BREAK
case 40:
YY_RULE_SETUP
#line 340 "pars0lex.l"
{
- return(PARS_WHERE_TOKEN);
+ return(PARS_INTO_TOKEN);
}
YY_BREAK
case 41:
YY_RULE_SETUP
#line 344 "pars0lex.l"
{
- return(PARS_FOR_TOKEN);
+ return(PARS_VALUES_TOKEN);
}
YY_BREAK
case 42:
YY_RULE_SETUP
#line 348 "pars0lex.l"
{
- return(PARS_READ_TOKEN);
+ return(PARS_UPDATE_TOKEN);
}
YY_BREAK
case 43:
YY_RULE_SETUP
#line 352 "pars0lex.l"
{
- return(PARS_ORDER_TOKEN);
+ return(PARS_SET_TOKEN);
}
YY_BREAK
case 44:
YY_RULE_SETUP
#line 356 "pars0lex.l"
{
- return(PARS_BY_TOKEN);
+ return(PARS_DELETE_TOKEN);
}
YY_BREAK
case 45:
YY_RULE_SETUP
#line 360 "pars0lex.l"
{
- return(PARS_ASC_TOKEN);
+ return(PARS_CURRENT_TOKEN);
}
YY_BREAK
case 46:
YY_RULE_SETUP
#line 364 "pars0lex.l"
{
- return(PARS_DESC_TOKEN);
+ return(PARS_OF_TOKEN);
}
YY_BREAK
case 47:
YY_RULE_SETUP
#line 368 "pars0lex.l"
{
- return(PARS_INSERT_TOKEN);
+ return(PARS_CREATE_TOKEN);
}
YY_BREAK
case 48:
YY_RULE_SETUP
#line 372 "pars0lex.l"
{
- return(PARS_INTO_TOKEN);
+ return(PARS_TABLE_TOKEN);
}
YY_BREAK
case 49:
YY_RULE_SETUP
#line 376 "pars0lex.l"
{
- return(PARS_VALUES_TOKEN);
+ return(PARS_INDEX_TOKEN);
}
YY_BREAK
case 50:
YY_RULE_SETUP
#line 380 "pars0lex.l"
{
- return(PARS_UPDATE_TOKEN);
+ return(PARS_UNIQUE_TOKEN);
}
YY_BREAK
case 51:
YY_RULE_SETUP
#line 384 "pars0lex.l"
{
- return(PARS_SET_TOKEN);
+ return(PARS_CLUSTERED_TOKEN);
}
YY_BREAK
case 52:
YY_RULE_SETUP
#line 388 "pars0lex.l"
{
- return(PARS_DELETE_TOKEN);
+ return(PARS_ON_TOKEN);
}
YY_BREAK
case 53:
YY_RULE_SETUP
#line 392 "pars0lex.l"
{
- return(PARS_CURRENT_TOKEN);
+ return(PARS_DECLARE_TOKEN);
}
YY_BREAK
case 54:
YY_RULE_SETUP
#line 396 "pars0lex.l"
{
- return(PARS_OF_TOKEN);
+ return(PARS_CURSOR_TOKEN);
}
YY_BREAK
case 55:
YY_RULE_SETUP
#line 400 "pars0lex.l"
{
- return(PARS_CREATE_TOKEN);
+ return(PARS_OPEN_TOKEN);
}
YY_BREAK
case 56:
YY_RULE_SETUP
#line 404 "pars0lex.l"
{
- return(PARS_TABLE_TOKEN);
+ return(PARS_FETCH_TOKEN);
}
YY_BREAK
case 57:
YY_RULE_SETUP
#line 408 "pars0lex.l"
{
- return(PARS_COMPACT_TOKEN);
+ return(PARS_CLOSE_TOKEN);
}
YY_BREAK
case 58:
YY_RULE_SETUP
#line 412 "pars0lex.l"
{
- return(PARS_BLOCK_SIZE_TOKEN);
+ return(PARS_NOTFOUND_TOKEN);
}
YY_BREAK
case 59:
YY_RULE_SETUP
#line 416 "pars0lex.l"
{
- return(PARS_INDEX_TOKEN);
+ return(PARS_TO_BINARY_TOKEN);
}
YY_BREAK
case 60:
YY_RULE_SETUP
#line 420 "pars0lex.l"
{
- return(PARS_UNIQUE_TOKEN);
+ return(PARS_SUBSTR_TOKEN);
}
YY_BREAK
case 61:
YY_RULE_SETUP
#line 424 "pars0lex.l"
{
- return(PARS_CLUSTERED_TOKEN);
+ return(PARS_CONCAT_TOKEN);
}
YY_BREAK
case 62:
YY_RULE_SETUP
#line 428 "pars0lex.l"
{
- return(PARS_DOES_NOT_FIT_IN_MEM_TOKEN);
+ return(PARS_INSTR_TOKEN);
}
YY_BREAK
case 63:
YY_RULE_SETUP
#line 432 "pars0lex.l"
{
- return(PARS_ON_TOKEN);
+ return(PARS_LENGTH_TOKEN);
}
YY_BREAK
case 64:
YY_RULE_SETUP
#line 436 "pars0lex.l"
{
- return(PARS_DECLARE_TOKEN);
+ return(PARS_COMMIT_TOKEN);
}
YY_BREAK
case 65:
YY_RULE_SETUP
#line 440 "pars0lex.l"
{
- return(PARS_CURSOR_TOKEN);
+ return(PARS_ROLLBACK_TOKEN);
}
YY_BREAK
case 66:
YY_RULE_SETUP
#line 444 "pars0lex.l"
{
- return(PARS_OPEN_TOKEN);
+ return(PARS_WORK_TOKEN);
}
YY_BREAK
case 67:
YY_RULE_SETUP
#line 448 "pars0lex.l"
{
- return(PARS_FETCH_TOKEN);
+ return(PARS_EXIT_TOKEN);
}
YY_BREAK
case 68:
YY_RULE_SETUP
#line 452 "pars0lex.l"
{
- return(PARS_CLOSE_TOKEN);
+ return(PARS_FUNCTION_TOKEN);
}
YY_BREAK
case 69:
YY_RULE_SETUP
#line 456 "pars0lex.l"
{
- return(PARS_NOTFOUND_TOKEN);
+ return(PARS_LOCK_TOKEN);
}
YY_BREAK
case 70:
YY_RULE_SETUP
#line 460 "pars0lex.l"
{
- return(PARS_TO_CHAR_TOKEN);
+ return(PARS_SHARE_TOKEN);
}
YY_BREAK
case 71:
YY_RULE_SETUP
#line 464 "pars0lex.l"
{
- return(PARS_TO_NUMBER_TOKEN);
+ return(PARS_MODE_TOKEN);
}
YY_BREAK
case 72:
YY_RULE_SETUP
#line 468 "pars0lex.l"
{
- return(PARS_TO_BINARY_TOKEN);
+ return(PARS_LIKE_TOKEN);
}
YY_BREAK
case 73:
YY_RULE_SETUP
#line 472 "pars0lex.l"
{
- return(PARS_BINARY_TO_NUMBER_TOKEN);
+ return(PARS_BIGINT_TOKEN);
}
YY_BREAK
case 74:
YY_RULE_SETUP
#line 476 "pars0lex.l"
{
- return(PARS_SUBSTR_TOKEN);
-}
- YY_BREAK
-case 75:
-YY_RULE_SETUP
-#line 480 "pars0lex.l"
-{
- return(PARS_REPLSTR_TOKEN);
-}
- YY_BREAK
-case 76:
-YY_RULE_SETUP
-#line 484 "pars0lex.l"
-{
- return(PARS_CONCAT_TOKEN);
-}
- YY_BREAK
-case 77:
-YY_RULE_SETUP
-#line 488 "pars0lex.l"
-{
- return(PARS_INSTR_TOKEN);
-}
- YY_BREAK
-case 78:
-YY_RULE_SETUP
-#line 492 "pars0lex.l"
-{
- return(PARS_LENGTH_TOKEN);
-}
- YY_BREAK
-case 79:
-YY_RULE_SETUP
-#line 496 "pars0lex.l"
-{
- return(PARS_SYSDATE_TOKEN);
-}
- YY_BREAK
-case 80:
-YY_RULE_SETUP
-#line 500 "pars0lex.l"
-{
- return(PARS_PRINTF_TOKEN);
-}
- YY_BREAK
-case 81:
-YY_RULE_SETUP
-#line 504 "pars0lex.l"
-{
- return(PARS_ASSERT_TOKEN);
-}
- YY_BREAK
-case 82:
-YY_RULE_SETUP
-#line 508 "pars0lex.l"
-{
- return(PARS_RND_TOKEN);
-}
- YY_BREAK
-case 83:
-YY_RULE_SETUP
-#line 512 "pars0lex.l"
-{
- return(PARS_RND_STR_TOKEN);
-}
- YY_BREAK
-case 84:
-YY_RULE_SETUP
-#line 516 "pars0lex.l"
-{
- return(PARS_ROW_PRINTF_TOKEN);
-}
- YY_BREAK
-case 85:
-YY_RULE_SETUP
-#line 520 "pars0lex.l"
-{
- return(PARS_COMMIT_TOKEN);
-}
- YY_BREAK
-case 86:
-YY_RULE_SETUP
-#line 524 "pars0lex.l"
-{
- return(PARS_ROLLBACK_TOKEN);
-}
- YY_BREAK
-case 87:
-YY_RULE_SETUP
-#line 528 "pars0lex.l"
-{
- return(PARS_WORK_TOKEN);
-}
- YY_BREAK
-case 88:
-YY_RULE_SETUP
-#line 532 "pars0lex.l"
-{
- return(PARS_UNSIGNED_TOKEN);
-}
- YY_BREAK
-case 89:
-YY_RULE_SETUP
-#line 536 "pars0lex.l"
-{
- return(PARS_EXIT_TOKEN);
-}
- YY_BREAK
-case 90:
-YY_RULE_SETUP
-#line 540 "pars0lex.l"
-{
- return(PARS_FUNCTION_TOKEN);
-}
- YY_BREAK
-case 91:
-YY_RULE_SETUP
-#line 544 "pars0lex.l"
-{
- return(PARS_LOCK_TOKEN);
-}
- YY_BREAK
-case 92:
-YY_RULE_SETUP
-#line 548 "pars0lex.l"
-{
- return(PARS_SHARE_TOKEN);
-}
- YY_BREAK
-case 93:
-YY_RULE_SETUP
-#line 552 "pars0lex.l"
-{
- return(PARS_MODE_TOKEN);
-}
- YY_BREAK
-case 94:
-YY_RULE_SETUP
-#line 556 "pars0lex.l"
-{
- return(PARS_LIKE_TOKEN);
-}
- YY_BREAK
-case 95:
-YY_RULE_SETUP
-#line 560 "pars0lex.l"
-{
- return(PARS_BIGINT_TOKEN);
-}
- YY_BREAK
-case 96:
-YY_RULE_SETUP
-#line 564 "pars0lex.l"
-{
yylval = sym_tab_add_id(pars_sym_tab_global,
(byte*) yytext,
ut_strlen(yytext));
return(PARS_ID_TOKEN);
}
YY_BREAK
-case 97:
+case 75:
YY_RULE_SETUP
-#line 571 "pars0lex.l"
+#line 483 "pars0lex.l"
{
yylval = sym_tab_add_id(pars_sym_tab_global,
(byte*) yytext,
@@ -2020,192 +1739,192 @@ YY_RULE_SETUP
return(PARS_TABLE_NAME_TOKEN);
}
YY_BREAK
-case 98:
+case 76:
YY_RULE_SETUP
-#line 578 "pars0lex.l"
+#line 490 "pars0lex.l"
{
return(PARS_DDOT_TOKEN);
}
YY_BREAK
-case 99:
+case 77:
YY_RULE_SETUP
-#line 582 "pars0lex.l"
+#line 494 "pars0lex.l"
{
return(PARS_ASSIGN_TOKEN);
}
YY_BREAK
-case 100:
+case 78:
YY_RULE_SETUP
-#line 586 "pars0lex.l"
+#line 498 "pars0lex.l"
{
return(PARS_LE_TOKEN);
}
YY_BREAK
-case 101:
+case 79:
YY_RULE_SETUP
-#line 590 "pars0lex.l"
+#line 502 "pars0lex.l"
{
return(PARS_GE_TOKEN);
}
YY_BREAK
-case 102:
+case 80:
YY_RULE_SETUP
-#line 594 "pars0lex.l"
+#line 506 "pars0lex.l"
{
return(PARS_NE_TOKEN);
}
YY_BREAK
-case 103:
+case 81:
YY_RULE_SETUP
-#line 598 "pars0lex.l"
+#line 510 "pars0lex.l"
{
return((int)(*yytext));
}
YY_BREAK
-case 104:
+case 82:
YY_RULE_SETUP
-#line 603 "pars0lex.l"
+#line 515 "pars0lex.l"
{
return((int)(*yytext));
}
YY_BREAK
-case 105:
+case 83:
YY_RULE_SETUP
-#line 608 "pars0lex.l"
+#line 520 "pars0lex.l"
{
return((int)(*yytext));
}
YY_BREAK
-case 106:
+case 84:
YY_RULE_SETUP
-#line 613 "pars0lex.l"
+#line 525 "pars0lex.l"
{
return((int)(*yytext));
}
YY_BREAK
-case 107:
+case 85:
YY_RULE_SETUP
-#line 618 "pars0lex.l"
+#line 530 "pars0lex.l"
{
return((int)(*yytext));
}
YY_BREAK
-case 108:
+case 86:
YY_RULE_SETUP
-#line 623 "pars0lex.l"
+#line 535 "pars0lex.l"
{
return((int)(*yytext));
}
YY_BREAK
-case 109:
+case 87:
YY_RULE_SETUP
-#line 628 "pars0lex.l"
+#line 540 "pars0lex.l"
{
return((int)(*yytext));
}
YY_BREAK
-case 110:
+case 88:
YY_RULE_SETUP
-#line 633 "pars0lex.l"
+#line 545 "pars0lex.l"
{
return((int)(*yytext));
}
YY_BREAK
-case 111:
+case 89:
YY_RULE_SETUP
-#line 638 "pars0lex.l"
+#line 550 "pars0lex.l"
{
return((int)(*yytext));
}
YY_BREAK
-case 112:
+case 90:
YY_RULE_SETUP
-#line 643 "pars0lex.l"
+#line 555 "pars0lex.l"
{
return((int)(*yytext));
}
YY_BREAK
-case 113:
+case 91:
YY_RULE_SETUP
-#line 648 "pars0lex.l"
+#line 560 "pars0lex.l"
{
return((int)(*yytext));
}
YY_BREAK
-case 114:
+case 92:
YY_RULE_SETUP
-#line 653 "pars0lex.l"
+#line 565 "pars0lex.l"
{
return((int)(*yytext));
}
YY_BREAK
-case 115:
+case 93:
YY_RULE_SETUP
-#line 658 "pars0lex.l"
+#line 570 "pars0lex.l"
{
return((int)(*yytext));
}
YY_BREAK
-case 116:
+case 94:
YY_RULE_SETUP
-#line 663 "pars0lex.l"
+#line 575 "pars0lex.l"
{
return((int)(*yytext));
}
YY_BREAK
-case 117:
+case 95:
YY_RULE_SETUP
-#line 668 "pars0lex.l"
+#line 580 "pars0lex.l"
{
return((int)(*yytext));
}
YY_BREAK
-case 118:
+case 96:
YY_RULE_SETUP
-#line 673 "pars0lex.l"
+#line 585 "pars0lex.l"
BEGIN(comment); /* eat up comment */
YY_BREAK
-case 119:
-/* rule 119 can match eol */
+case 97:
+/* rule 97 can match eol */
YY_RULE_SETUP
-#line 675 "pars0lex.l"
+#line 587 "pars0lex.l"
YY_BREAK
-case 120:
-/* rule 120 can match eol */
+case 98:
+/* rule 98 can match eol */
YY_RULE_SETUP
-#line 676 "pars0lex.l"
+#line 588 "pars0lex.l"
YY_BREAK
-case 121:
+case 99:
YY_RULE_SETUP
-#line 677 "pars0lex.l"
+#line 589 "pars0lex.l"
BEGIN(INITIAL);
YY_BREAK
-case 122:
-/* rule 122 can match eol */
+case 100:
+/* rule 100 can match eol */
YY_RULE_SETUP
-#line 679 "pars0lex.l"
+#line 591 "pars0lex.l"
/* eat up whitespace */
YY_BREAK
-case 123:
+case 101:
YY_RULE_SETUP
-#line 682 "pars0lex.l"
+#line 594 "pars0lex.l"
{
fprintf(stderr,"Unrecognized character: %02x\n",
*yytext);
@@ -2215,12 +1934,12 @@ YY_RULE_SETUP
return(0);
}
YY_BREAK
-case 124:
+case 102:
YY_RULE_SETUP
-#line 691 "pars0lex.l"
+#line 603 "pars0lex.l"
YY_FATAL_ERROR( "flex scanner jammed" );
YY_BREAK
-#line 2223 "lexyy.cc"
+#line 1942 "lexyy.cc"
case YY_STATE_EOF(INITIAL):
case YY_STATE_EOF(comment):
case YY_STATE_EOF(quoted):
@@ -2521,7 +2240,7 @@ static int yy_get_next_buffer (void)
while ( yy_chk[yy_base[yy_current_state] + yy_c] != yy_current_state )
{
yy_current_state = (int) yy_def[yy_current_state];
- if ( yy_current_state >= 425 )
+ if ( yy_current_state >= 307 )
yy_c = yy_meta[yy_c];
}
yy_current_state = yy_nxt[yy_base[yy_current_state] + yy_c];
@@ -2549,11 +2268,11 @@ static int yy_get_next_buffer (void)
while ( yy_chk[yy_base[yy_current_state] + yy_c] != yy_current_state )
{
yy_current_state = (int) yy_def[yy_current_state];
- if ( yy_current_state >= 425 )
+ if ( yy_current_state >= 307 )
yy_c = yy_meta[yy_c];
}
yy_current_state = yy_nxt[yy_base[yy_current_state] + yy_c];
- yy_is_jam = (yy_current_state == 424);
+ yy_is_jam = (yy_current_state == 306);
return yy_is_jam ? 0 : yy_current_state;
}
@@ -3106,12 +2825,11 @@ static void yyfree (void * ptr )
#define YYTABLES_NAME "yytables"
-#line 691 "pars0lex.l"
+#line 603 "pars0lex.l"
/**********************************************************************
Release any resources used by the lexer. */
-UNIV_INTERN
void
pars_lexer_close(void)
/*==================*/
diff --git a/storage/innobase/pars/make_bison.sh b/storage/innobase/pars/make_bison.sh
index a1722f5f6b7..6b3cb693978 100755
--- a/storage/innobase/pars/make_bison.sh
+++ b/storage/innobase/pars/make_bison.sh
@@ -1,6 +1,6 @@
#!/bin/bash
#
-# Copyright (c) 1994, 2011, Oracle and/or its affiliates. All Rights Reserved.
+# Copyright (c) 1994, 2013, Oracle and/or its affiliates. All Rights Reserved.
#
# This program is free software; you can redistribute it and/or modify it under
# the terms of the GNU General Public License as published by the Free Software
@@ -26,7 +26,6 @@ mv pars0grm.tab.h ../include/pars0grm.h
sed -e '
s/'"$TMPFILE"'/'"$OUTFILE"'/;
s/^\(\(YYSTYPE\|int\) yy\(char\|nerrs\)\)/static \1/;
-s/\(\(YYSTYPE\|int\) yy\(lval\|parse\)\)/UNIV_INTERN \1/;
' < "$TMPFILE" > "$OUTFILE"
rm "$TMPFILE"
diff --git a/storage/innobase/pars/make_flex.sh b/storage/innobase/pars/make_flex.sh
index b976a56a9dd..2baae9c92df 100755
--- a/storage/innobase/pars/make_flex.sh
+++ b/storage/innobase/pars/make_flex.sh
@@ -24,8 +24,7 @@ OUTFILE=lexyy.cc
flex -o $TMPFILE pars0lex.l
-# AIX needs its includes done in a certain order, so include "univ.i" first
-# to be sure we get it right.
+# The Microsoft compiler needs its includes done in a certain order.
echo '#include "univ.i"' > $OUTFILE
# flex assigns a pointer to an int in one place without a cast, resulting in
diff --git a/storage/innobase/pars/pars0grm.cc b/storage/innobase/pars/pars0grm.cc
index c3649d92555..7e10a783310 100644
--- a/storage/innobase/pars/pars0grm.cc
+++ b/storage/innobase/pars/pars0grm.cc
@@ -1,14 +1,14 @@
-/* A Bison parser, made by GNU Bison 2.3. */
+/* A Bison parser, made by GNU Bison 3.4.2. */
-/* Skeleton implementation for Bison's Yacc-like parsers in C
+/* Bison implementation for Yacc-like parsers in C
- Copyright (C) 1984, 1989, 1990, 2000, 2001, 2002, 2003, 2004, 2005, 2006
- Free Software Foundation, Inc.
+ Copyright (C) 1984, 1989-1990, 2000-2015, 2018-2019 Free Software Foundation,
+ Inc.
- This program is free software; you can redistribute it and/or modify
+ This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
- the Free Software Foundation; either version 2, or (at your option)
- any later version.
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,9 +16,7 @@
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 51 Franklin Street, Fifth Floor,
- Boston, MA 02110-1335 USA. */
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
/* As a special exception, you may create a larger work that contains
part or all of the Bison parser skeleton and distribute that work
@@ -43,11 +41,14 @@
define necessary library symbols; they are noted "INFRINGES ON
USER NAME SPACE" below. */
+/* Undocumented macros, especially those whose name start with YY_,
+ are private implementation details. Do not rely on them. */
+
/* Identify Bison output. */
#define YYBISON 1
/* Bison version. */
-#define YYBISON_VERSION "2.3"
+#define YYBISON_VERSION "3.4.2"
/* Skeleton name. */
#define YYSKELETON_NAME "yacc.c"
@@ -55,236 +56,23 @@
/* Pure parsers. */
#define YYPURE 0
-/* Using locations. */
-#define YYLSP_NEEDED 0
+/* Push parsers. */
+#define YYPUSH 0
+/* Pull parsers. */
+#define YYPULL 1
-/* Tokens. */
-#ifndef YYTOKENTYPE
-# define YYTOKENTYPE
- /* Put the tokens into the symbol table, so that GDB and other debuggers
- know about them. */
- enum yytokentype {
- PARS_INT_LIT = 258,
- PARS_FLOAT_LIT = 259,
- PARS_STR_LIT = 260,
- PARS_FIXBINARY_LIT = 261,
- PARS_BLOB_LIT = 262,
- PARS_NULL_LIT = 263,
- PARS_ID_TOKEN = 264,
- PARS_AND_TOKEN = 265,
- PARS_OR_TOKEN = 266,
- PARS_NOT_TOKEN = 267,
- PARS_GE_TOKEN = 268,
- PARS_LE_TOKEN = 269,
- PARS_NE_TOKEN = 270,
- PARS_PROCEDURE_TOKEN = 271,
- PARS_IN_TOKEN = 272,
- PARS_OUT_TOKEN = 273,
- PARS_BINARY_TOKEN = 274,
- PARS_BLOB_TOKEN = 275,
- PARS_INT_TOKEN = 276,
- PARS_INTEGER_TOKEN = 277,
- PARS_FLOAT_TOKEN = 278,
- PARS_CHAR_TOKEN = 279,
- PARS_IS_TOKEN = 280,
- PARS_BEGIN_TOKEN = 281,
- PARS_END_TOKEN = 282,
- PARS_IF_TOKEN = 283,
- PARS_THEN_TOKEN = 284,
- PARS_ELSE_TOKEN = 285,
- PARS_ELSIF_TOKEN = 286,
- PARS_LOOP_TOKEN = 287,
- PARS_WHILE_TOKEN = 288,
- PARS_RETURN_TOKEN = 289,
- PARS_SELECT_TOKEN = 290,
- PARS_SUM_TOKEN = 291,
- PARS_COUNT_TOKEN = 292,
- PARS_DISTINCT_TOKEN = 293,
- PARS_FROM_TOKEN = 294,
- PARS_WHERE_TOKEN = 295,
- PARS_FOR_TOKEN = 296,
- PARS_DDOT_TOKEN = 297,
- PARS_READ_TOKEN = 298,
- PARS_ORDER_TOKEN = 299,
- PARS_BY_TOKEN = 300,
- PARS_ASC_TOKEN = 301,
- PARS_DESC_TOKEN = 302,
- PARS_INSERT_TOKEN = 303,
- PARS_INTO_TOKEN = 304,
- PARS_VALUES_TOKEN = 305,
- PARS_UPDATE_TOKEN = 306,
- PARS_SET_TOKEN = 307,
- PARS_DELETE_TOKEN = 308,
- PARS_CURRENT_TOKEN = 309,
- PARS_OF_TOKEN = 310,
- PARS_CREATE_TOKEN = 311,
- PARS_TABLE_TOKEN = 312,
- PARS_INDEX_TOKEN = 313,
- PARS_UNIQUE_TOKEN = 314,
- PARS_CLUSTERED_TOKEN = 315,
- PARS_DOES_NOT_FIT_IN_MEM_TOKEN = 316,
- PARS_ON_TOKEN = 317,
- PARS_ASSIGN_TOKEN = 318,
- PARS_DECLARE_TOKEN = 319,
- PARS_CURSOR_TOKEN = 320,
- PARS_SQL_TOKEN = 321,
- PARS_OPEN_TOKEN = 322,
- PARS_FETCH_TOKEN = 323,
- PARS_CLOSE_TOKEN = 324,
- PARS_NOTFOUND_TOKEN = 325,
- PARS_TO_CHAR_TOKEN = 326,
- PARS_TO_NUMBER_TOKEN = 327,
- PARS_TO_BINARY_TOKEN = 328,
- PARS_BINARY_TO_NUMBER_TOKEN = 329,
- PARS_SUBSTR_TOKEN = 330,
- PARS_REPLSTR_TOKEN = 331,
- PARS_CONCAT_TOKEN = 332,
- PARS_INSTR_TOKEN = 333,
- PARS_LENGTH_TOKEN = 334,
- PARS_SYSDATE_TOKEN = 335,
- PARS_PRINTF_TOKEN = 336,
- PARS_ASSERT_TOKEN = 337,
- PARS_RND_TOKEN = 338,
- PARS_RND_STR_TOKEN = 339,
- PARS_ROW_PRINTF_TOKEN = 340,
- PARS_COMMIT_TOKEN = 341,
- PARS_ROLLBACK_TOKEN = 342,
- PARS_WORK_TOKEN = 343,
- PARS_UNSIGNED_TOKEN = 344,
- PARS_EXIT_TOKEN = 345,
- PARS_FUNCTION_TOKEN = 346,
- PARS_LOCK_TOKEN = 347,
- PARS_SHARE_TOKEN = 348,
- PARS_MODE_TOKEN = 349,
- PARS_LIKE_TOKEN = 350,
- PARS_LIKE_TOKEN_EXACT = 351,
- PARS_LIKE_TOKEN_PREFIX = 352,
- PARS_LIKE_TOKEN_SUFFIX = 353,
- PARS_LIKE_TOKEN_SUBSTR = 354,
- PARS_TABLE_NAME_TOKEN = 355,
- PARS_COMPACT_TOKEN = 356,
- PARS_BLOCK_SIZE_TOKEN = 357,
- PARS_BIGINT_TOKEN = 358,
- NEG = 359
- };
-#endif
-/* Tokens. */
-#define PARS_INT_LIT 258
-#define PARS_FLOAT_LIT 259
-#define PARS_STR_LIT 260
-#define PARS_FIXBINARY_LIT 261
-#define PARS_BLOB_LIT 262
-#define PARS_NULL_LIT 263
-#define PARS_ID_TOKEN 264
-#define PARS_AND_TOKEN 265
-#define PARS_OR_TOKEN 266
-#define PARS_NOT_TOKEN 267
-#define PARS_GE_TOKEN 268
-#define PARS_LE_TOKEN 269
-#define PARS_NE_TOKEN 270
-#define PARS_PROCEDURE_TOKEN 271
-#define PARS_IN_TOKEN 272
-#define PARS_OUT_TOKEN 273
-#define PARS_BINARY_TOKEN 274
-#define PARS_BLOB_TOKEN 275
-#define PARS_INT_TOKEN 276
-#define PARS_INTEGER_TOKEN 277
-#define PARS_FLOAT_TOKEN 278
-#define PARS_CHAR_TOKEN 279
-#define PARS_IS_TOKEN 280
-#define PARS_BEGIN_TOKEN 281
-#define PARS_END_TOKEN 282
-#define PARS_IF_TOKEN 283
-#define PARS_THEN_TOKEN 284
-#define PARS_ELSE_TOKEN 285
-#define PARS_ELSIF_TOKEN 286
-#define PARS_LOOP_TOKEN 287
-#define PARS_WHILE_TOKEN 288
-#define PARS_RETURN_TOKEN 289
-#define PARS_SELECT_TOKEN 290
-#define PARS_SUM_TOKEN 291
-#define PARS_COUNT_TOKEN 292
-#define PARS_DISTINCT_TOKEN 293
-#define PARS_FROM_TOKEN 294
-#define PARS_WHERE_TOKEN 295
-#define PARS_FOR_TOKEN 296
-#define PARS_DDOT_TOKEN 297
-#define PARS_READ_TOKEN 298
-#define PARS_ORDER_TOKEN 299
-#define PARS_BY_TOKEN 300
-#define PARS_ASC_TOKEN 301
-#define PARS_DESC_TOKEN 302
-#define PARS_INSERT_TOKEN 303
-#define PARS_INTO_TOKEN 304
-#define PARS_VALUES_TOKEN 305
-#define PARS_UPDATE_TOKEN 306
-#define PARS_SET_TOKEN 307
-#define PARS_DELETE_TOKEN 308
-#define PARS_CURRENT_TOKEN 309
-#define PARS_OF_TOKEN 310
-#define PARS_CREATE_TOKEN 311
-#define PARS_TABLE_TOKEN 312
-#define PARS_INDEX_TOKEN 313
-#define PARS_UNIQUE_TOKEN 314
-#define PARS_CLUSTERED_TOKEN 315
-#define PARS_DOES_NOT_FIT_IN_MEM_TOKEN 316
-#define PARS_ON_TOKEN 317
-#define PARS_ASSIGN_TOKEN 318
-#define PARS_DECLARE_TOKEN 319
-#define PARS_CURSOR_TOKEN 320
-#define PARS_SQL_TOKEN 321
-#define PARS_OPEN_TOKEN 322
-#define PARS_FETCH_TOKEN 323
-#define PARS_CLOSE_TOKEN 324
-#define PARS_NOTFOUND_TOKEN 325
-#define PARS_TO_CHAR_TOKEN 326
-#define PARS_TO_NUMBER_TOKEN 327
-#define PARS_TO_BINARY_TOKEN 328
-#define PARS_BINARY_TO_NUMBER_TOKEN 329
-#define PARS_SUBSTR_TOKEN 330
-#define PARS_REPLSTR_TOKEN 331
-#define PARS_CONCAT_TOKEN 332
-#define PARS_INSTR_TOKEN 333
-#define PARS_LENGTH_TOKEN 334
-#define PARS_SYSDATE_TOKEN 335
-#define PARS_PRINTF_TOKEN 336
-#define PARS_ASSERT_TOKEN 337
-#define PARS_RND_TOKEN 338
-#define PARS_RND_STR_TOKEN 339
-#define PARS_ROW_PRINTF_TOKEN 340
-#define PARS_COMMIT_TOKEN 341
-#define PARS_ROLLBACK_TOKEN 342
-#define PARS_WORK_TOKEN 343
-#define PARS_UNSIGNED_TOKEN 344
-#define PARS_EXIT_TOKEN 345
-#define PARS_FUNCTION_TOKEN 346
-#define PARS_LOCK_TOKEN 347
-#define PARS_SHARE_TOKEN 348
-#define PARS_MODE_TOKEN 349
-#define PARS_LIKE_TOKEN 350
-#define PARS_LIKE_TOKEN_EXACT 351
-#define PARS_LIKE_TOKEN_PREFIX 352
-#define PARS_LIKE_TOKEN_SUFFIX 353
-#define PARS_LIKE_TOKEN_SUBSTR 354
-#define PARS_TABLE_NAME_TOKEN 355
-#define PARS_COMPACT_TOKEN 356
-#define PARS_BLOCK_SIZE_TOKEN 357
-#define PARS_BIGINT_TOKEN 358
-#define NEG 359
-
-
-
-
-/* Copy the first part of user declarations. */
-#line 28 "pars0grm.y"
+
+
+/* First part of user prologue. */
+#line 29 "pars0grm.y"
/* The value of the semantic attribute is a pointer to a query tree node
que_node_t */
#include "univ.i"
-#include <math.h> /* Can't be before univ.i */
+#include <math.h>
#include "pars0pars.h"
#include "mem0mem.h"
#include "que0types.h"
@@ -294,15 +82,22 @@ que_node_t */
#define YYSTYPE que_node_t*
/* #define __STDC__ */
-
int
yylex(void);
+#line 89 "pars0grm.cc"
-/* Enabling traces. */
-#ifndef YYDEBUG
-# define YYDEBUG 0
-#endif
+# ifndef YY_NULLPTR
+# if defined __cplusplus
+# if 201103L <= __cplusplus
+# define YY_NULLPTR nullptr
+# else
+# define YY_NULLPTR 0
+# endif
+# else
+# define YY_NULLPTR ((void*)0)
+# endif
+# endif
/* Enabling verbose error messages. */
#ifdef YYERROR_VERBOSE
@@ -312,25 +107,119 @@ yylex(void);
# define YYERROR_VERBOSE 0
#endif
-/* Enabling the token table. */
-#ifndef YYTOKEN_TABLE
-# define YYTOKEN_TABLE 0
+/* Use api.header.include to #include this header
+ instead of duplicating it here. */
+#ifndef YY_YY_PARS0GRM_TAB_H_INCLUDED
+# define YY_YY_PARS0GRM_TAB_H_INCLUDED
+/* Debug traces. */
+#ifndef YYDEBUG
+# define YYDEBUG 0
+#endif
+#if YYDEBUG
+extern int yydebug;
#endif
+/* Token type. */
+#ifndef YYTOKENTYPE
+# define YYTOKENTYPE
+ enum yytokentype
+ {
+ PARS_INT_LIT = 258,
+ PARS_FLOAT_LIT = 259,
+ PARS_STR_LIT = 260,
+ PARS_NULL_LIT = 261,
+ PARS_ID_TOKEN = 262,
+ PARS_AND_TOKEN = 263,
+ PARS_OR_TOKEN = 264,
+ PARS_NOT_TOKEN = 265,
+ PARS_GE_TOKEN = 266,
+ PARS_LE_TOKEN = 267,
+ PARS_NE_TOKEN = 268,
+ PARS_PROCEDURE_TOKEN = 269,
+ PARS_IN_TOKEN = 270,
+ PARS_INT_TOKEN = 271,
+ PARS_CHAR_TOKEN = 272,
+ PARS_IS_TOKEN = 273,
+ PARS_BEGIN_TOKEN = 274,
+ PARS_END_TOKEN = 275,
+ PARS_IF_TOKEN = 276,
+ PARS_THEN_TOKEN = 277,
+ PARS_ELSE_TOKEN = 278,
+ PARS_ELSIF_TOKEN = 279,
+ PARS_LOOP_TOKEN = 280,
+ PARS_WHILE_TOKEN = 281,
+ PARS_RETURN_TOKEN = 282,
+ PARS_SELECT_TOKEN = 283,
+ PARS_COUNT_TOKEN = 284,
+ PARS_FROM_TOKEN = 285,
+ PARS_WHERE_TOKEN = 286,
+ PARS_FOR_TOKEN = 287,
+ PARS_DDOT_TOKEN = 288,
+ PARS_ORDER_TOKEN = 289,
+ PARS_BY_TOKEN = 290,
+ PARS_ASC_TOKEN = 291,
+ PARS_DESC_TOKEN = 292,
+ PARS_INSERT_TOKEN = 293,
+ PARS_INTO_TOKEN = 294,
+ PARS_VALUES_TOKEN = 295,
+ PARS_UPDATE_TOKEN = 296,
+ PARS_SET_TOKEN = 297,
+ PARS_DELETE_TOKEN = 298,
+ PARS_CURRENT_TOKEN = 299,
+ PARS_OF_TOKEN = 300,
+ PARS_CREATE_TOKEN = 301,
+ PARS_TABLE_TOKEN = 302,
+ PARS_INDEX_TOKEN = 303,
+ PARS_UNIQUE_TOKEN = 304,
+ PARS_CLUSTERED_TOKEN = 305,
+ PARS_ON_TOKEN = 306,
+ PARS_ASSIGN_TOKEN = 307,
+ PARS_DECLARE_TOKEN = 308,
+ PARS_CURSOR_TOKEN = 309,
+ PARS_SQL_TOKEN = 310,
+ PARS_OPEN_TOKEN = 311,
+ PARS_FETCH_TOKEN = 312,
+ PARS_CLOSE_TOKEN = 313,
+ PARS_NOTFOUND_TOKEN = 314,
+ PARS_TO_BINARY_TOKEN = 315,
+ PARS_SUBSTR_TOKEN = 316,
+ PARS_CONCAT_TOKEN = 317,
+ PARS_INSTR_TOKEN = 318,
+ PARS_LENGTH_TOKEN = 319,
+ PARS_COMMIT_TOKEN = 320,
+ PARS_ROLLBACK_TOKEN = 321,
+ PARS_WORK_TOKEN = 322,
+ PARS_EXIT_TOKEN = 323,
+ PARS_FUNCTION_TOKEN = 324,
+ PARS_LOCK_TOKEN = 325,
+ PARS_SHARE_TOKEN = 326,
+ PARS_MODE_TOKEN = 327,
+ PARS_LIKE_TOKEN = 328,
+ PARS_LIKE_TOKEN_EXACT = 329,
+ PARS_LIKE_TOKEN_PREFIX = 330,
+ PARS_LIKE_TOKEN_SUFFIX = 331,
+ PARS_LIKE_TOKEN_SUBSTR = 332,
+ PARS_TABLE_NAME_TOKEN = 333,
+ PARS_BIGINT_TOKEN = 334,
+ NEG = 335
+ };
+#endif
+
+/* Value type. */
#if ! defined YYSTYPE && ! defined YYSTYPE_IS_DECLARED
typedef int YYSTYPE;
-# define yystype YYSTYPE /* obsolescent; will be withdrawn */
-# define YYSTYPE_IS_DECLARED 1
# define YYSTYPE_IS_TRIVIAL 1
+# define YYSTYPE_IS_DECLARED 1
#endif
+extern YYSTYPE yylval;
-/* Copy the second part of user declarations. */
+int yyparse (void);
+
+#endif /* !YY_YY_PARS0GRM_TAB_H_INCLUDED */
-/* Line 216 of yacc.c. */
-#line 334 "pars0grm.cc"
#ifdef short
# undef short
@@ -344,23 +233,20 @@ typedef unsigned char yytype_uint8;
#ifdef YYTYPE_INT8
typedef YYTYPE_INT8 yytype_int8;
-#elif (defined __STDC__ || defined __C99__FUNC__ \
- || defined __cplusplus || defined _MSC_VER)
-typedef signed char yytype_int8;
#else
-typedef short int yytype_int8;
+typedef signed char yytype_int8;
#endif
#ifdef YYTYPE_UINT16
typedef YYTYPE_UINT16 yytype_uint16;
#else
-typedef unsigned short int yytype_uint16;
+typedef unsigned short yytype_uint16;
#endif
#ifdef YYTYPE_INT16
typedef YYTYPE_INT16 yytype_int16;
#else
-typedef short int yytype_int16;
+typedef short yytype_int16;
#endif
#ifndef YYSIZE_T
@@ -368,12 +254,11 @@ typedef short int yytype_int16;
# define YYSIZE_T __SIZE_TYPE__
# elif defined size_t
# define YYSIZE_T size_t
-# elif ! defined YYSIZE_T && (defined __STDC__ || defined __C99__FUNC__ \
- || defined __cplusplus || defined _MSC_VER)
+# elif ! defined YYSIZE_T
# include <stddef.h> /* INFRINGES ON USER NAME SPACE */
# define YYSIZE_T size_t
# else
-# define YYSIZE_T unsigned int
+# define YYSIZE_T unsigned
# endif
#endif
@@ -383,39 +268,61 @@ typedef short int yytype_int16;
# if defined YYENABLE_NLS && YYENABLE_NLS
# if ENABLE_NLS
# include <libintl.h> /* INFRINGES ON USER NAME SPACE */
-# define YY_(msgid) dgettext ("bison-runtime", msgid)
+# define YY_(Msgid) dgettext ("bison-runtime", Msgid)
# endif
# endif
# ifndef YY_
-# define YY_(msgid) msgid
+# define YY_(Msgid) Msgid
# endif
#endif
+#ifndef YY_ATTRIBUTE
+# if (defined __GNUC__ \
+ && (2 < __GNUC__ || (__GNUC__ == 2 && 96 <= __GNUC_MINOR__))) \
+ || defined __SUNPRO_C && 0x5110 <= __SUNPRO_C
+# define YY_ATTRIBUTE(Spec) __attribute__(Spec)
+# else
+# define YY_ATTRIBUTE(Spec) /* empty */
+# endif
+#endif
+
+#ifndef YY_ATTRIBUTE_PURE
+# define YY_ATTRIBUTE_PURE YY_ATTRIBUTE ((__pure__))
+#endif
+
+#ifndef YY_ATTRIBUTE_UNUSED
+# define YY_ATTRIBUTE_UNUSED YY_ATTRIBUTE ((__unused__))
+#endif
+
/* Suppress unused-variable warnings by "using" E. */
#if ! defined lint || defined __GNUC__
-# define YYUSE(e) ((void) (e))
+# define YYUSE(E) ((void) (E))
#else
-# define YYUSE(e) /* empty */
+# define YYUSE(E) /* empty */
#endif
-/* Identity function, used to suppress warnings about constant conditions. */
-#ifndef lint
-# define YYID(n) (n)
+#if defined __GNUC__ && ! defined __ICC && 407 <= __GNUC__ * 100 + __GNUC_MINOR__
+/* Suppress an incorrect diagnostic about yylval being uninitialized. */
+# define YY_IGNORE_MAYBE_UNINITIALIZED_BEGIN \
+ _Pragma ("GCC diagnostic push") \
+ _Pragma ("GCC diagnostic ignored \"-Wuninitialized\"")\
+ _Pragma ("GCC diagnostic ignored \"-Wmaybe-uninitialized\"")
+# define YY_IGNORE_MAYBE_UNINITIALIZED_END \
+ _Pragma ("GCC diagnostic pop")
#else
-#if (defined __STDC__ || defined __C99__FUNC__ \
- || defined __cplusplus || defined _MSC_VER)
-static int
-YYID (int i)
-#else
-static int
-YYID (i)
- int i;
+# define YY_INITIAL_VALUE(Value) Value
#endif
-{
- return i;
-}
+#ifndef YY_IGNORE_MAYBE_UNINITIALIZED_BEGIN
+# define YY_IGNORE_MAYBE_UNINITIALIZED_BEGIN
+# define YY_IGNORE_MAYBE_UNINITIALIZED_END
+#endif
+#ifndef YY_INITIAL_VALUE
+# define YY_INITIAL_VALUE(Value) /* Nothing. */
#endif
+
+#define YY_ASSERT(E) ((void) (0 && (E)))
+
#if ! defined yyoverflow || YYERROR_VERBOSE
/* The parser invokes alloca or malloc; define the necessary symbols. */
@@ -433,11 +340,11 @@ YYID (i)
# define alloca _alloca
# else
# define YYSTACK_ALLOC alloca
-# if ! defined _ALLOCA_H && ! defined _STDLIB_H && (defined __STDC__ || defined __C99__FUNC__ \
- || defined __cplusplus || defined _MSC_VER)
+# if ! defined _ALLOCA_H && ! defined EXIT_SUCCESS
# include <stdlib.h> /* INFRINGES ON USER NAME SPACE */
-# ifndef _STDLIB_H
-# define _STDLIB_H 1
+ /* Use EXIT_SUCCESS as a witness for stdlib.h. */
+# ifndef EXIT_SUCCESS
+# define EXIT_SUCCESS 0
# endif
# endif
# endif
@@ -445,8 +352,8 @@ YYID (i)
# endif
# ifdef YYSTACK_ALLOC
- /* Pacify GCC's `empty if-body' warning. */
-# define YYSTACK_FREE(Ptr) do { /* empty */; } while (YYID (0))
+ /* Pacify GCC's 'empty if-body' warning. */
+# define YYSTACK_FREE(Ptr) do { /* empty */; } while (0)
# ifndef YYSTACK_ALLOC_MAXIMUM
/* The OS might guarantee only one guard page at the bottom of the stack,
and a page size can be as small as 4096 bytes. So we cannot safely
@@ -460,26 +367,24 @@ YYID (i)
# ifndef YYSTACK_ALLOC_MAXIMUM
# define YYSTACK_ALLOC_MAXIMUM YYSIZE_MAXIMUM
# endif
-# if (defined __cplusplus && ! defined _STDLIB_H \
+# if (defined __cplusplus && ! defined EXIT_SUCCESS \
&& ! ((defined YYMALLOC || defined malloc) \
- && (defined YYFREE || defined free)))
+ && (defined YYFREE || defined free)))
# include <stdlib.h> /* INFRINGES ON USER NAME SPACE */
-# ifndef _STDLIB_H
-# define _STDLIB_H 1
+# ifndef EXIT_SUCCESS
+# define EXIT_SUCCESS 0
# endif
# endif
# ifndef YYMALLOC
# define YYMALLOC malloc
-# if ! defined malloc && ! defined _STDLIB_H && (defined __STDC__ || defined __C99__FUNC__ \
- || defined __cplusplus || defined _MSC_VER)
+# if ! defined malloc && ! defined EXIT_SUCCESS
void *malloc (YYSIZE_T); /* INFRINGES ON USER NAME SPACE */
# endif
# endif
# ifndef YYFREE
# define YYFREE free
-# if ! defined free && ! defined _STDLIB_H && (defined __STDC__ || defined __C99__FUNC__ \
- || defined __cplusplus || defined _MSC_VER)
-void free (void*); /* INFRINGES ON USER NAME SPACE */
+# if ! defined free && ! defined EXIT_SUCCESS
+void free (void *); /* INFRINGES ON USER NAME SPACE */
# endif
# endif
# endif
@@ -488,14 +393,14 @@ void free (void*); /* INFRINGES ON USER NAME SPACE */
#if (! defined yyoverflow \
&& (! defined __cplusplus \
- || (defined YYSTYPE_IS_TRIVIAL && YYSTYPE_IS_TRIVIAL)))
+ || (defined YYSTYPE_IS_TRIVIAL && YYSTYPE_IS_TRIVIAL)))
/* A type that is properly aligned for any stack member. */
union yyalloc
{
- yytype_int16 yyss;
- YYSTYPE yyvs;
- };
+ yytype_int16 yyss_alloc;
+ YYSTYPE yyvs_alloc;
+};
/* The size of the maximum gap between one aligned stack and the next. */
# define YYSTACK_GAP_MAXIMUM (sizeof (union yyalloc) - 1)
@@ -506,79 +411,85 @@ union yyalloc
((N) * (sizeof (yytype_int16) + sizeof (YYSTYPE)) \
+ YYSTACK_GAP_MAXIMUM)
-/* Copy COUNT objects from FROM to TO. The source and destination do
- not overlap. */
-# ifndef YYCOPY
-# if defined __GNUC__ && 1 < __GNUC__
-# define YYCOPY(To, From, Count) \
- __builtin_memcpy (To, From, (Count) * sizeof (*(From)))
-# else
-# define YYCOPY(To, From, Count) \
- do \
- { \
- YYSIZE_T yyi; \
- for (yyi = 0; yyi < (Count); yyi++) \
- (To)[yyi] = (From)[yyi]; \
- } \
- while (YYID (0))
-# endif
-# endif
+# define YYCOPY_NEEDED 1
/* Relocate STACK from its old location to the new one. The
local variables YYSIZE and YYSTACKSIZE give the old and new number of
elements in the stack, and YYPTR gives the new location of the
stack. Advance YYPTR to a properly aligned location for the next
stack. */
-# define YYSTACK_RELOCATE(Stack) \
- do \
- { \
- YYSIZE_T yynewbytes; \
- YYCOPY (&yyptr->Stack, Stack, yysize); \
- Stack = &yyptr->Stack; \
- yynewbytes = yystacksize * sizeof (*Stack) + YYSTACK_GAP_MAXIMUM; \
- yyptr += yynewbytes / sizeof (*yyptr); \
- } \
- while (YYID (0))
+# define YYSTACK_RELOCATE(Stack_alloc, Stack) \
+ do \
+ { \
+ YYSIZE_T yynewbytes; \
+ YYCOPY (&yyptr->Stack_alloc, Stack, yysize); \
+ Stack = &yyptr->Stack_alloc; \
+ yynewbytes = yystacksize * sizeof (*Stack) + YYSTACK_GAP_MAXIMUM; \
+ yyptr += yynewbytes / sizeof (*yyptr); \
+ } \
+ while (0)
#endif
+#if defined YYCOPY_NEEDED && YYCOPY_NEEDED
+/* Copy COUNT objects from SRC to DST. The source and destination do
+ not overlap. */
+# ifndef YYCOPY
+# if defined __GNUC__ && 1 < __GNUC__
+# define YYCOPY(Dst, Src, Count) \
+ __builtin_memcpy (Dst, Src, (Count) * sizeof (*(Src)))
+# else
+# define YYCOPY(Dst, Src, Count) \
+ do \
+ { \
+ YYSIZE_T yyi; \
+ for (yyi = 0; yyi < (Count); yyi++) \
+ (Dst)[yyi] = (Src)[yyi]; \
+ } \
+ while (0)
+# endif
+# endif
+#endif /* !YYCOPY_NEEDED */
+
/* YYFINAL -- State number of the termination state. */
#define YYFINAL 5
/* YYLAST -- Last index in YYTABLE. */
-#define YYLAST 816
+#define YYLAST 603
/* YYNTOKENS -- Number of terminals. */
-#define YYNTOKENS 120
+#define YYNTOKENS 96
/* YYNNTS -- Number of nonterminals. */
-#define YYNNTS 73
+#define YYNNTS 64
/* YYNRULES -- Number of rules. */
-#define YYNRULES 183
-/* YYNRULES -- Number of states. */
-#define YYNSTATES 350
+#define YYNRULES 150
+/* YYNSTATES -- Number of states. */
+#define YYNSTATES 300
-/* YYTRANSLATE(YYLEX) -- Bison symbol number corresponding to YYLEX. */
#define YYUNDEFTOK 2
-#define YYMAXUTOK 359
+#define YYMAXUTOK 335
-#define YYTRANSLATE(YYX) \
- ((unsigned int) (YYX) <= YYMAXUTOK ? yytranslate[YYX] : YYUNDEFTOK)
+/* YYTRANSLATE(TOKEN-NUM) -- Symbol number corresponding to TOKEN-NUM
+ as returned by yylex, with out-of-bounds checking. */
+#define YYTRANSLATE(YYX) \
+ ((unsigned) (YYX) <= YYMAXUTOK ? yytranslate[YYX] : YYUNDEFTOK)
-/* YYTRANSLATE[YYLEX] -- Bison symbol number corresponding to YYLEX. */
+/* YYTRANSLATE[TOKEN-NUM] -- Symbol number corresponding to TOKEN-NUM
+ as returned by yylex. */
static const yytype_uint8 yytranslate[] =
{
0, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 112, 2, 2,
- 114, 115, 109, 108, 117, 107, 2, 110, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 113,
- 105, 104, 106, 116, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 88, 2, 2,
+ 90, 91, 85, 84, 93, 83, 2, 86, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 89,
+ 81, 80, 82, 92, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 118, 2, 119, 2, 2, 2, 2,
+ 2, 2, 2, 94, 2, 95, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
@@ -599,168 +510,65 @@ static const yytype_uint8 yytranslate[] =
45, 46, 47, 48, 49, 50, 51, 52, 53, 54,
55, 56, 57, 58, 59, 60, 61, 62, 63, 64,
65, 66, 67, 68, 69, 70, 71, 72, 73, 74,
- 75, 76, 77, 78, 79, 80, 81, 82, 83, 84,
- 85, 86, 87, 88, 89, 90, 91, 92, 93, 94,
- 95, 96, 97, 98, 99, 100, 101, 102, 103, 111
+ 75, 76, 77, 78, 79, 87
};
#if YYDEBUG
-/* YYPRHS[YYN] -- Index of the first RHS symbol of rule number YYN in
- YYRHS. */
-static const yytype_uint16 yyprhs[] =
-{
- 0, 0, 3, 6, 8, 11, 14, 17, 20, 23,
- 26, 29, 32, 35, 38, 41, 44, 47, 50, 53,
- 56, 59, 62, 65, 68, 71, 73, 76, 78, 83,
- 85, 87, 89, 91, 93, 95, 97, 101, 105, 109,
- 113, 116, 120, 124, 128, 132, 136, 140, 144, 148,
- 152, 156, 159, 163, 167, 169, 171, 173, 175, 177,
- 179, 181, 183, 185, 187, 189, 190, 192, 196, 203,
- 208, 210, 212, 214, 218, 220, 224, 225, 227, 231,
- 232, 234, 238, 240, 245, 251, 256, 257, 259, 263,
- 265, 269, 271, 272, 275, 276, 279, 280, 285, 286,
- 288, 290, 291, 296, 305, 309, 315, 318, 322, 324,
- 328, 333, 338, 341, 344, 348, 351, 354, 357, 361,
- 366, 368, 371, 372, 375, 377, 385, 392, 403, 405,
- 407, 410, 413, 418, 423, 429, 431, 435, 436, 440,
- 441, 443, 444, 447, 448, 450, 451, 453, 454, 458,
- 468, 470, 474, 475, 477, 478, 480, 491, 493, 495,
- 498, 501, 503, 505, 507, 509, 511, 513, 517, 521,
- 522, 524, 528, 532, 533, 535, 538, 545, 550, 552,
- 554, 555, 557, 560
-};
-
-/* YYRHS -- A `-1'-separated list of the rules' RHS. */
-static const yytype_int16 yyrhs[] =
-{
- 121, 0, -1, 192, 113, -1, 127, -1, 128, 113,
- -1, 160, 113, -1, 161, 113, -1, 162, 113, -1,
- 159, 113, -1, 163, 113, -1, 155, 113, -1, 142,
- 113, -1, 144, 113, -1, 154, 113, -1, 152, 113,
- -1, 153, 113, -1, 149, 113, -1, 150, 113, -1,
- 164, 113, -1, 166, 113, -1, 165, 113, -1, 181,
- 113, -1, 182, 113, -1, 175, 113, -1, 179, 113,
- -1, 122, -1, 123, 122, -1, 9, -1, 125, 114,
- 133, 115, -1, 3, -1, 4, -1, 5, -1, 6,
- -1, 7, -1, 8, -1, 66, -1, 124, 108, 124,
- -1, 124, 107, 124, -1, 124, 109, 124, -1, 124,
- 110, 124, -1, 107, 124, -1, 114, 124, 115, -1,
- 124, 104, 124, -1, 124, 95, 5, -1, 124, 105,
- 124, -1, 124, 106, 124, -1, 124, 13, 124, -1,
- 124, 14, 124, -1, 124, 15, 124, -1, 124, 10,
- 124, -1, 124, 11, 124, -1, 12, 124, -1, 9,
- 112, 70, -1, 66, 112, 70, -1, 71, -1, 72,
- -1, 73, -1, 74, -1, 75, -1, 77, -1, 78,
- -1, 79, -1, 80, -1, 83, -1, 84, -1, -1,
- 116, -1, 126, 117, 116, -1, 118, 9, 114, 126,
- 115, 119, -1, 129, 114, 133, 115, -1, 76, -1,
- 81, -1, 82, -1, 9, 114, 115, -1, 180, -1,
- 131, 117, 180, -1, -1, 9, -1, 132, 117, 9,
- -1, -1, 124, -1, 133, 117, 124, -1, 124, -1,
- 37, 114, 109, 115, -1, 37, 114, 38, 9, 115,
- -1, 36, 114, 124, 115, -1, -1, 134, -1, 135,
- 117, 134, -1, 109, -1, 135, 49, 132, -1, 135,
- -1, -1, 40, 124, -1, -1, 41, 51, -1, -1,
- 92, 17, 93, 94, -1, -1, 46, -1, 47, -1,
- -1, 44, 45, 9, 140, -1, 35, 136, 39, 131,
- 137, 138, 139, 141, -1, 48, 49, 180, -1, 143,
- 50, 114, 133, 115, -1, 143, 142, -1, 9, 104,
- 124, -1, 145, -1, 146, 117, 145, -1, 40, 54,
- 55, 9, -1, 51, 180, 52, 146, -1, 148, 137,
- -1, 148, 147, -1, 53, 39, 180, -1, 151, 137,
- -1, 151, 147, -1, 85, 142, -1, 9, 63, 124,
- -1, 31, 124, 29, 123, -1, 156, -1, 157, 156,
- -1, -1, 30, 123, -1, 157, -1, 28, 124, 29,
- 123, 158, 27, 28, -1, 33, 124, 32, 123, 27,
- 32, -1, 41, 9, 17, 124, 42, 124, 32, 123,
- 27, 32, -1, 90, -1, 34, -1, 67, 9, -1,
- 69, 9, -1, 68, 9, 49, 132, -1, 68, 9,
- 49, 130, -1, 9, 183, 169, 170, 171, -1, 167,
- -1, 168, 117, 167, -1, -1, 114, 3, 115, -1,
- -1, 89, -1, -1, 12, 8, -1, -1, 61, -1,
- -1, 101, -1, -1, 102, 104, 3, -1, 56, 57,
- 180, 114, 168, 115, 172, 173, 174, -1, 9, -1,
- 176, 117, 9, -1, -1, 59, -1, -1, 60, -1,
- 56, 177, 178, 58, 9, 62, 180, 114, 176, 115,
- -1, 9, -1, 100, -1, 86, 88, -1, 87, 88,
- -1, 21, -1, 22, -1, 103, -1, 24, -1, 19,
- -1, 20, -1, 9, 17, 183, -1, 9, 18, 183,
- -1, -1, 184, -1, 185, 117, 184, -1, 9, 183,
- 113, -1, -1, 186, -1, 187, 186, -1, 64, 65,
- 9, 25, 142, 113, -1, 64, 91, 9, 113, -1,
- 188, -1, 189, -1, -1, 190, -1, 191, 190, -1,
- 16, 9, 114, 185, 115, 25, 187, 191, 26, 123,
- 27, -1
-};
-
-/* YYRLINE[YYN] -- source line where rule number YYN was defined. */
+ /* YYRLINE[YYN] -- Source line where rule number YYN was defined. */
static const yytype_uint16 yyrline[] =
{
- 0, 162, 162, 165, 166, 167, 168, 169, 170, 171,
- 172, 173, 174, 175, 176, 177, 178, 179, 180, 181,
- 182, 183, 184, 185, 186, 190, 191, 196, 197, 199,
- 200, 201, 202, 203, 204, 205, 206, 207, 208, 209,
- 210, 211, 212, 213, 215, 216, 217, 218, 219, 220,
- 221, 222, 223, 225, 230, 231, 232, 233, 235, 236,
- 237, 238, 239, 240, 241, 244, 246, 247, 251, 257,
- 262, 263, 264, 268, 272, 273, 278, 279, 280, 285,
- 286, 287, 291, 292, 297, 303, 310, 311, 312, 317,
- 319, 322, 326, 327, 331, 332, 337, 338, 343, 344,
- 345, 349, 350, 357, 372, 377, 380, 388, 394, 395,
- 400, 406, 415, 423, 431, 438, 446, 454, 460, 467,
- 473, 474, 479, 480, 482, 486, 493, 499, 509, 513,
- 517, 524, 531, 535, 543, 552, 553, 558, 559, 564,
- 565, 571, 572, 578, 579, 585, 586, 591, 592, 597,
- 608, 609, 614, 615, 619, 620, 624, 638, 639, 643,
- 648, 653, 654, 655, 656, 657, 658, 662, 667, 675,
- 676, 677, 682, 688, 690, 691, 695, 703, 709, 710,
- 713, 715, 716, 720
+ 0, 140, 140, 143, 144, 145, 146, 147, 148, 149,
+ 150, 151, 152, 153, 154, 155, 156, 157, 158, 159,
+ 160, 161, 162, 166, 167, 172, 173, 175, 176, 177,
+ 178, 179, 180, 181, 182, 183, 184, 185, 186, 187,
+ 189, 190, 191, 192, 193, 194, 195, 196, 197, 199,
+ 204, 205, 206, 207, 208, 211, 213, 214, 218, 224,
+ 228, 229, 234, 235, 236, 241, 242, 243, 247, 248,
+ 256, 257, 258, 263, 265, 268, 272, 273, 277, 278,
+ 283, 284, 289, 290, 291, 295, 296, 303, 318, 323,
+ 326, 334, 340, 341, 346, 352, 361, 369, 377, 384,
+ 392, 400, 407, 413, 414, 419, 420, 422, 426, 433,
+ 439, 449, 453, 457, 464, 471, 475, 483, 492, 493,
+ 498, 499, 504, 505, 511, 519, 520, 525, 526, 530,
+ 531, 535, 549, 550, 554, 559, 564, 565, 566, 570,
+ 576, 578, 579, 583, 591, 597, 598, 601, 603, 604,
+ 608
};
#endif
-#if YYDEBUG || YYERROR_VERBOSE || YYTOKEN_TABLE
+#if YYDEBUG || YYERROR_VERBOSE || 0
/* YYTNAME[SYMBOL-NUM] -- String name of the symbol SYMBOL-NUM.
First, the terminals, then, starting at YYNTOKENS, nonterminals. */
static const char *const yytname[] =
{
"$end", "error", "$undefined", "PARS_INT_LIT", "PARS_FLOAT_LIT",
- "PARS_STR_LIT", "PARS_FIXBINARY_LIT", "PARS_BLOB_LIT", "PARS_NULL_LIT",
- "PARS_ID_TOKEN", "PARS_AND_TOKEN", "PARS_OR_TOKEN", "PARS_NOT_TOKEN",
- "PARS_GE_TOKEN", "PARS_LE_TOKEN", "PARS_NE_TOKEN",
- "PARS_PROCEDURE_TOKEN", "PARS_IN_TOKEN", "PARS_OUT_TOKEN",
- "PARS_BINARY_TOKEN", "PARS_BLOB_TOKEN", "PARS_INT_TOKEN",
- "PARS_INTEGER_TOKEN", "PARS_FLOAT_TOKEN", "PARS_CHAR_TOKEN",
- "PARS_IS_TOKEN", "PARS_BEGIN_TOKEN", "PARS_END_TOKEN", "PARS_IF_TOKEN",
- "PARS_THEN_TOKEN", "PARS_ELSE_TOKEN", "PARS_ELSIF_TOKEN",
- "PARS_LOOP_TOKEN", "PARS_WHILE_TOKEN", "PARS_RETURN_TOKEN",
- "PARS_SELECT_TOKEN", "PARS_SUM_TOKEN", "PARS_COUNT_TOKEN",
- "PARS_DISTINCT_TOKEN", "PARS_FROM_TOKEN", "PARS_WHERE_TOKEN",
- "PARS_FOR_TOKEN", "PARS_DDOT_TOKEN", "PARS_READ_TOKEN",
- "PARS_ORDER_TOKEN", "PARS_BY_TOKEN", "PARS_ASC_TOKEN", "PARS_DESC_TOKEN",
- "PARS_INSERT_TOKEN", "PARS_INTO_TOKEN", "PARS_VALUES_TOKEN",
- "PARS_UPDATE_TOKEN", "PARS_SET_TOKEN", "PARS_DELETE_TOKEN",
- "PARS_CURRENT_TOKEN", "PARS_OF_TOKEN", "PARS_CREATE_TOKEN",
- "PARS_TABLE_TOKEN", "PARS_INDEX_TOKEN", "PARS_UNIQUE_TOKEN",
- "PARS_CLUSTERED_TOKEN", "PARS_DOES_NOT_FIT_IN_MEM_TOKEN",
- "PARS_ON_TOKEN", "PARS_ASSIGN_TOKEN", "PARS_DECLARE_TOKEN",
- "PARS_CURSOR_TOKEN", "PARS_SQL_TOKEN", "PARS_OPEN_TOKEN",
- "PARS_FETCH_TOKEN", "PARS_CLOSE_TOKEN", "PARS_NOTFOUND_TOKEN",
- "PARS_TO_CHAR_TOKEN", "PARS_TO_NUMBER_TOKEN", "PARS_TO_BINARY_TOKEN",
- "PARS_BINARY_TO_NUMBER_TOKEN", "PARS_SUBSTR_TOKEN", "PARS_REPLSTR_TOKEN",
- "PARS_CONCAT_TOKEN", "PARS_INSTR_TOKEN", "PARS_LENGTH_TOKEN",
- "PARS_SYSDATE_TOKEN", "PARS_PRINTF_TOKEN", "PARS_ASSERT_TOKEN",
- "PARS_RND_TOKEN", "PARS_RND_STR_TOKEN", "PARS_ROW_PRINTF_TOKEN",
- "PARS_COMMIT_TOKEN", "PARS_ROLLBACK_TOKEN", "PARS_WORK_TOKEN",
- "PARS_UNSIGNED_TOKEN", "PARS_EXIT_TOKEN", "PARS_FUNCTION_TOKEN",
+ "PARS_STR_LIT", "PARS_NULL_LIT", "PARS_ID_TOKEN", "PARS_AND_TOKEN",
+ "PARS_OR_TOKEN", "PARS_NOT_TOKEN", "PARS_GE_TOKEN", "PARS_LE_TOKEN",
+ "PARS_NE_TOKEN", "PARS_PROCEDURE_TOKEN", "PARS_IN_TOKEN",
+ "PARS_INT_TOKEN", "PARS_CHAR_TOKEN", "PARS_IS_TOKEN", "PARS_BEGIN_TOKEN",
+ "PARS_END_TOKEN", "PARS_IF_TOKEN", "PARS_THEN_TOKEN", "PARS_ELSE_TOKEN",
+ "PARS_ELSIF_TOKEN", "PARS_LOOP_TOKEN", "PARS_WHILE_TOKEN",
+ "PARS_RETURN_TOKEN", "PARS_SELECT_TOKEN", "PARS_COUNT_TOKEN",
+ "PARS_FROM_TOKEN", "PARS_WHERE_TOKEN", "PARS_FOR_TOKEN",
+ "PARS_DDOT_TOKEN", "PARS_ORDER_TOKEN", "PARS_BY_TOKEN", "PARS_ASC_TOKEN",
+ "PARS_DESC_TOKEN", "PARS_INSERT_TOKEN", "PARS_INTO_TOKEN",
+ "PARS_VALUES_TOKEN", "PARS_UPDATE_TOKEN", "PARS_SET_TOKEN",
+ "PARS_DELETE_TOKEN", "PARS_CURRENT_TOKEN", "PARS_OF_TOKEN",
+ "PARS_CREATE_TOKEN", "PARS_TABLE_TOKEN", "PARS_INDEX_TOKEN",
+ "PARS_UNIQUE_TOKEN", "PARS_CLUSTERED_TOKEN", "PARS_ON_TOKEN",
+ "PARS_ASSIGN_TOKEN", "PARS_DECLARE_TOKEN", "PARS_CURSOR_TOKEN",
+ "PARS_SQL_TOKEN", "PARS_OPEN_TOKEN", "PARS_FETCH_TOKEN",
+ "PARS_CLOSE_TOKEN", "PARS_NOTFOUND_TOKEN", "PARS_TO_BINARY_TOKEN",
+ "PARS_SUBSTR_TOKEN", "PARS_CONCAT_TOKEN", "PARS_INSTR_TOKEN",
+ "PARS_LENGTH_TOKEN", "PARS_COMMIT_TOKEN", "PARS_ROLLBACK_TOKEN",
+ "PARS_WORK_TOKEN", "PARS_EXIT_TOKEN", "PARS_FUNCTION_TOKEN",
"PARS_LOCK_TOKEN", "PARS_SHARE_TOKEN", "PARS_MODE_TOKEN",
"PARS_LIKE_TOKEN", "PARS_LIKE_TOKEN_EXACT", "PARS_LIKE_TOKEN_PREFIX",
"PARS_LIKE_TOKEN_SUFFIX", "PARS_LIKE_TOKEN_SUBSTR",
- "PARS_TABLE_NAME_TOKEN", "PARS_COMPACT_TOKEN", "PARS_BLOCK_SIZE_TOKEN",
- "PARS_BIGINT_TOKEN", "'='", "'<'", "'>'", "'-'", "'+'", "'*'", "'/'",
- "NEG", "'%'", "';'", "'('", "')'", "'?'", "','", "'{'", "'}'", "$accept",
- "top_statement", "statement", "statement_list", "exp", "function_name",
- "question_mark_list", "stored_procedure_call",
- "predefined_procedure_call", "predefined_procedure_name",
+ "PARS_TABLE_NAME_TOKEN", "PARS_BIGINT_TOKEN", "'='", "'<'", "'>'", "'-'",
+ "'+'", "'*'", "'/'", "NEG", "'%'", "';'", "'('", "')'", "'?'", "','",
+ "'{'", "'}'", "$accept", "top_statement", "statement", "statement_list",
+ "exp", "function_name", "question_mark_list", "stored_procedure_call",
"user_function_call", "table_list", "variable_list", "exp_list",
"select_item", "select_item_list", "select_list", "search_condition",
"for_update_clause", "lock_shared_clause", "order_direction",
@@ -769,25 +577,22 @@ static const char *const yytname[] =
"cursor_positioned", "update_statement_start",
"update_statement_searched", "update_statement_positioned",
"delete_statement_start", "delete_statement_searched",
- "delete_statement_positioned", "row_printf_statement",
- "assignment_statement", "elsif_element", "elsif_list", "else_part",
- "if_statement", "while_statement", "for_statement", "exit_statement",
- "return_statement", "open_cursor_statement", "close_cursor_statement",
- "fetch_statement", "column_def", "column_def_list", "opt_column_len",
- "opt_unsigned", "opt_not_null", "not_fit_in_memory", "compact",
- "block_size", "create_table", "column_list", "unique_def",
- "clustered_def", "create_index", "table_name", "commit_statement",
- "rollback_statement", "type_name", "parameter_declaration",
- "parameter_declaration_list", "variable_declaration",
- "variable_declaration_list", "cursor_declaration",
- "function_declaration", "declaration", "declaration_list",
- "procedure_definition", 0
+ "delete_statement_positioned", "assignment_statement", "elsif_element",
+ "elsif_list", "else_part", "if_statement", "while_statement",
+ "for_statement", "exit_statement", "return_statement",
+ "open_cursor_statement", "close_cursor_statement", "fetch_statement",
+ "column_def", "column_def_list", "opt_column_len", "opt_not_null",
+ "create_table", "column_list", "unique_def", "clustered_def",
+ "create_index", "table_name", "commit_statement", "rollback_statement",
+ "type_name", "variable_declaration", "variable_declaration_list",
+ "cursor_declaration", "function_declaration", "declaration",
+ "declaration_list", "procedure_definition", YY_NULLPTR
};
#endif
# ifdef YYPRINT
-/* YYTOKNUM[YYLEX-NUM] -- Internal token number corresponding to
- token YYLEX-NUM. */
+/* YYTOKNUM[NUM] -- (External) token number corresponding to the
+ (internal) symbol number NUM (which must be that of a token). */
static const yytype_uint16 yytoknum[] =
{
0, 256, 257, 258, 259, 260, 261, 262, 263, 264,
@@ -798,478 +603,364 @@ static const yytype_uint16 yytoknum[] =
305, 306, 307, 308, 309, 310, 311, 312, 313, 314,
315, 316, 317, 318, 319, 320, 321, 322, 323, 324,
325, 326, 327, 328, 329, 330, 331, 332, 333, 334,
- 335, 336, 337, 338, 339, 340, 341, 342, 343, 344,
- 345, 346, 347, 348, 349, 350, 351, 352, 353, 354,
- 355, 356, 357, 358, 61, 60, 62, 45, 43, 42,
- 47, 359, 37, 59, 40, 41, 63, 44, 123, 125
+ 61, 60, 62, 45, 43, 42, 47, 335, 37, 59,
+ 40, 41, 63, 44, 123, 125
};
# endif
-/* YYR1[YYN] -- Symbol number of symbol that rule YYN derives. */
-static const yytype_uint8 yyr1[] =
-{
- 0, 120, 121, 122, 122, 122, 122, 122, 122, 122,
- 122, 122, 122, 122, 122, 122, 122, 122, 122, 122,
- 122, 122, 122, 122, 122, 123, 123, 124, 124, 124,
- 124, 124, 124, 124, 124, 124, 124, 124, 124, 124,
- 124, 124, 124, 124, 124, 124, 124, 124, 124, 124,
- 124, 124, 124, 124, 125, 125, 125, 125, 125, 125,
- 125, 125, 125, 125, 125, 126, 126, 126, 127, 128,
- 129, 129, 129, 130, 131, 131, 132, 132, 132, 133,
- 133, 133, 134, 134, 134, 134, 135, 135, 135, 136,
- 136, 136, 137, 137, 138, 138, 139, 139, 140, 140,
- 140, 141, 141, 142, 143, 144, 144, 145, 146, 146,
- 147, 148, 149, 150, 151, 152, 153, 154, 155, 156,
- 157, 157, 158, 158, 158, 159, 160, 161, 162, 163,
- 164, 165, 166, 166, 167, 168, 168, 169, 169, 170,
- 170, 171, 171, 172, 172, 173, 173, 174, 174, 175,
- 176, 176, 177, 177, 178, 178, 179, 180, 180, 181,
- 182, 183, 183, 183, 183, 183, 183, 184, 184, 185,
- 185, 185, 186, 187, 187, 187, 188, 189, 190, 190,
- 191, 191, 191, 192
-};
+#define YYPACT_NINF -129
-/* YYR2[YYN] -- Number of symbols composing right hand side of rule YYN. */
-static const yytype_uint8 yyr2[] =
+#define yypact_value_is_default(Yystate) \
+ (!!((Yystate) == (-129)))
+
+#define YYTABLE_NINF -1
+
+#define yytable_value_is_error(Yytable_value) \
+ 0
+
+ /* YYPACT[STATE-NUM] -- Index in YYTABLE of the portion describing
+ STATE-NUM. */
+static const yytype_int16 yypact[] =
{
- 0, 2, 2, 1, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 1, 2, 1, 4, 1,
- 1, 1, 1, 1, 1, 1, 3, 3, 3, 3,
- 2, 3, 3, 3, 3, 3, 3, 3, 3, 3,
- 3, 2, 3, 3, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 0, 1, 3, 6, 4,
- 1, 1, 1, 3, 1, 3, 0, 1, 3, 0,
- 1, 3, 1, 4, 5, 4, 0, 1, 3, 1,
- 3, 1, 0, 2, 0, 2, 0, 4, 0, 1,
- 1, 0, 4, 8, 3, 5, 2, 3, 1, 3,
- 4, 4, 2, 2, 3, 2, 2, 2, 3, 4,
- 1, 2, 0, 2, 1, 7, 6, 10, 1, 1,
- 2, 2, 4, 4, 5, 1, 3, 0, 3, 0,
- 1, 0, 2, 0, 1, 0, 1, 0, 3, 9,
- 1, 3, 0, 1, 0, 1, 10, 1, 1, 2,
- 2, 1, 1, 1, 1, 1, 1, 3, 3, 0,
- 1, 3, 3, 0, 1, 2, 6, 4, 1, 1,
- 0, 1, 2, 11
+ 5, 34, 46, -28, -41, -129, -129, -12, 45, 57,
+ 23, -129, 9, -129, -129, -129, 20, -9, -129, -129,
+ -129, -129, 2, -129, 83, 87, 278, -129, 93, 28,
+ 71, 427, 427, -129, 335, 105, 85, -1, 104, -27,
+ 129, 132, 133, 76, 77, -129, 141, -129, 149, -129,
+ 61, 19, 62, 118, 65, 66, 118, 68, 69, 70,
+ 72, 73, 74, 75, 78, 79, 82, 84, 89, 90,
+ 91, 94, 138, -129, 427, -129, -129, -129, -129, 86,
+ 427, 96, -129, -129, -129, -129, -129, 427, 427, 438,
+ 92, 454, 95, -129, 1, -129, -24, 130, 157, -1,
+ -129, -129, 144, -1, -1, -129, 139, -129, 154, -129,
+ -129, -129, 98, -129, -129, -129, 108, -129, -129, 345,
+ -129, -129, -129, -129, -129, -129, -129, -129, -129, -129,
+ -129, -129, -129, -129, -129, -129, -129, -129, -129, -129,
+ -129, 112, 1, 135, 285, 143, -8, 15, 427, 427,
+ 427, 427, 427, 278, 203, 427, 427, 427, 427, 427,
+ 427, 427, 427, 278, 124, 204, 381, -1, 427, -129,
+ 209, -129, 120, -129, 173, 215, 131, 427, 180, 1,
+ -129, -129, -129, -129, 285, 285, 30, 30, 1, 10,
+ -129, 30, 30, 30, 60, 60, -8, -8, 1, -39,
+ 192, 137, -129, 136, -129, -13, -129, 472, 146, -129,
+ 147, 225, 227, 151, -129, 136, -129, -21, 0, 229,
+ 278, 427, -129, 213, 219, -129, 427, 220, -129, 237,
+ 427, -1, 214, 427, 427, 209, 23, -129, 14, 196,
+ 160, 158, 162, -129, -129, 278, 486, -129, 231, 1,
+ -129, -129, -129, 218, 194, 517, 1, -129, 175, -129,
+ 225, -1, -129, -129, -129, 278, -129, -129, 251, 234,
+ 278, 266, 260, -129, 181, 278, 201, 239, -129, 235,
+ 184, 271, -129, 272, 208, 275, 258, -129, -129, -129,
+ 17, -129, -7, -129, -129, 277, -129, -129, -129, -129
};
-/* YYDEFACT[STATE-NAME] -- Default rule to reduce with in state
- STATE-NUM when YYTABLE doesn't specify something else to do. Zero
- means the default is an error. */
+ /* YYDEFACT[STATE-NUM] -- Default reduction number in state STATE-NUM.
+ Performed when YYTABLE does not specify something else to do. Zero
+ means the default is an error. */
static const yytype_uint8 yydefact[] =
{
- 0, 0, 0, 0, 0, 1, 2, 169, 0, 170,
- 0, 0, 0, 0, 0, 165, 166, 161, 162, 164,
- 163, 167, 168, 173, 171, 0, 174, 180, 0, 0,
- 175, 178, 179, 181, 0, 172, 0, 0, 0, 182,
- 0, 0, 0, 0, 0, 129, 86, 0, 0, 0,
- 0, 152, 0, 0, 0, 70, 71, 72, 0, 0,
- 0, 128, 0, 25, 0, 3, 0, 0, 0, 0,
- 0, 92, 0, 0, 92, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 1, 2, 0, 0, 140,
+ 0, 141, 147, 136, 138, 137, 0, 0, 142, 145,
+ 146, 148, 0, 139, 0, 0, 0, 149, 0, 0,
+ 0, 0, 0, 112, 70, 0, 0, 0, 0, 127,
+ 0, 0, 0, 0, 0, 111, 0, 23, 0, 3,
+ 0, 0, 0, 76, 0, 0, 76, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 177, 0, 29, 30, 31, 32, 33, 34,
- 27, 0, 35, 54, 55, 56, 57, 58, 59, 60,
- 61, 62, 63, 64, 0, 0, 0, 0, 0, 0,
- 0, 89, 82, 87, 91, 0, 0, 0, 157, 158,
- 0, 0, 0, 153, 154, 130, 0, 131, 117, 159,
- 160, 0, 183, 26, 4, 79, 11, 0, 106, 12,
- 0, 112, 113, 16, 17, 115, 116, 14, 15, 13,
- 10, 8, 5, 6, 7, 9, 18, 20, 19, 23,
- 24, 21, 22, 0, 118, 0, 51, 0, 40, 0,
+ 0, 0, 0, 144, 0, 27, 28, 29, 30, 25,
+ 0, 31, 50, 51, 52, 53, 54, 0, 0, 0,
+ 0, 0, 0, 73, 68, 71, 75, 0, 0, 0,
+ 132, 133, 0, 0, 0, 128, 129, 113, 0, 114,
+ 134, 135, 0, 150, 24, 10, 0, 90, 11, 0,
+ 96, 97, 14, 15, 99, 100, 12, 13, 9, 7,
+ 4, 5, 6, 8, 16, 18, 17, 21, 22, 19,
+ 20, 0, 101, 0, 47, 0, 36, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 79, 0, 0, 0, 76, 0,
- 0, 0, 104, 0, 114, 0, 155, 0, 76, 65,
- 80, 0, 79, 0, 93, 176, 52, 53, 41, 49,
- 50, 46, 47, 48, 122, 43, 42, 44, 45, 37,
- 36, 38, 39, 0, 0, 0, 0, 0, 77, 90,
- 88, 92, 74, 0, 0, 108, 111, 0, 0, 77,
- 133, 132, 66, 0, 69, 0, 0, 0, 0, 0,
- 120, 124, 0, 28, 0, 85, 0, 83, 0, 0,
- 0, 94, 0, 0, 0, 0, 135, 0, 0, 0,
- 0, 0, 81, 105, 110, 123, 0, 121, 0, 126,
- 84, 78, 75, 0, 96, 0, 107, 109, 137, 143,
- 0, 0, 73, 68, 67, 0, 125, 95, 0, 101,
- 0, 0, 139, 144, 145, 136, 0, 119, 0, 0,
- 103, 0, 0, 140, 141, 146, 147, 0, 0, 0,
- 0, 138, 0, 134, 0, 149, 150, 0, 97, 98,
- 127, 142, 0, 156, 0, 99, 100, 102, 148, 151
+ 0, 0, 65, 0, 0, 62, 0, 0, 0, 88,
+ 0, 98, 0, 130, 0, 62, 55, 65, 0, 77,
+ 143, 48, 49, 37, 45, 46, 42, 43, 44, 105,
+ 39, 38, 40, 41, 33, 32, 34, 35, 66, 0,
+ 0, 0, 63, 74, 72, 76, 60, 0, 0, 92,
+ 95, 0, 0, 63, 116, 115, 56, 0, 0, 0,
+ 0, 0, 103, 107, 0, 26, 0, 0, 69, 0,
+ 0, 0, 78, 0, 0, 0, 0, 118, 0, 0,
+ 0, 0, 0, 89, 94, 106, 0, 104, 0, 67,
+ 109, 64, 61, 0, 80, 0, 91, 93, 120, 124,
+ 0, 0, 59, 58, 57, 0, 108, 79, 0, 85,
+ 0, 0, 122, 119, 0, 102, 0, 0, 87, 0,
+ 0, 0, 117, 0, 0, 0, 0, 121, 123, 125,
+ 0, 81, 82, 110, 131, 0, 83, 84, 86, 126
};
-/* YYDEFGOTO[NTERM-NUM]. */
-static const yytype_int16 yydefgoto[] =
-{
- -1, 2, 63, 64, 210, 117, 253, 65, 66, 67,
- 250, 241, 239, 211, 123, 124, 125, 151, 294, 309,
- 347, 320, 68, 69, 70, 245, 246, 152, 71, 72,
- 73, 74, 75, 76, 77, 78, 260, 261, 262, 79,
- 80, 81, 82, 83, 84, 85, 86, 276, 277, 312,
- 324, 333, 314, 326, 335, 87, 337, 134, 207, 88,
- 130, 89, 90, 21, 9, 10, 26, 27, 31, 32,
- 33, 34, 3
-};
-
-/* YYPACT[STATE-NUM] -- Index in YYTABLE of the portion describing
- STATE-NUM. */
-#define YYPACT_NINF -179
-static const yytype_int16 yypact[] =
+ /* YYPGOTO[NTERM-NUM]. */
+static const yytype_int16 yypgoto[] =
{
- 24, 36, 58, -48, -25, -179, -179, 57, 31, -179,
- -74, 14, 14, 50, 57, -179, -179, -179, -179, -179,
- -179, -179, -179, 72, -179, 14, -179, 3, -26, -28,
- -179, -179, -179, -179, 4, -179, 91, 95, 589, -179,
- 80, -6, 43, 285, 285, -179, 19, 99, 69, -5,
- 81, -13, 110, 112, 114, -179, -179, -179, 89, 37,
- 41, -179, 122, -179, 406, -179, 25, 40, 44, -3,
- 46, 116, 49, 51, 116, 52, 53, 54, 55, 56,
- 59, 61, 62, 70, 73, 74, 75, 76, 77, 78,
- 79, 89, -179, 285, -179, -179, -179, -179, -179, -179,
- 82, 285, 83, -179, -179, -179, -179, -179, -179, -179,
- -179, -179, -179, -179, 285, 285, 577, 92, 618, 94,
- 97, -179, 706, -179, -33, 124, 153, -5, -179, -179,
- 141, -5, -5, -179, 136, -179, 148, -179, -179, -179,
- -179, 98, -179, -179, -179, 285, -179, 101, -179, -179,
- 195, -179, -179, -179, -179, -179, -179, -179, -179, -179,
- -179, -179, -179, -179, -179, -179, -179, -179, -179, -179,
- -179, -179, -179, 100, 706, 135, 6, 154, -7, 206,
- 285, 285, 285, 285, 285, 589, 218, 285, 285, 285,
- 285, 285, 285, 285, 285, 589, 285, -27, 216, 173,
- -5, 285, -179, 217, -179, 113, -179, 171, 221, 119,
- 706, -56, 285, 185, 706, -179, -179, -179, -179, 6,
- 6, 27, 27, 706, 345, -179, 27, 27, 27, 35,
- 35, -7, -7, -53, 467, 223, 232, 127, -179, 126,
- -179, -31, -179, 638, 151, -179, 142, 251, 253, 150,
- -179, 126, -179, -46, -179, 285, -45, 256, 589, 285,
- -179, 240, 249, -179, 245, -179, 166, -179, 273, 285,
- -5, 242, 285, 285, 217, 14, -179, -39, 222, 170,
- 167, 179, 706, -179, -179, 589, 679, -179, 268, -179,
- -179, -179, -179, 247, 207, 686, 706, -179, 186, 243,
- 251, -5, -179, -179, -179, 589, -179, -179, 286, 261,
- 589, 303, 219, -179, 224, -179, 193, 589, 226, 272,
- -179, 528, 205, -179, 310, -179, 233, 314, 230, 317,
- 302, -179, 328, -179, 235, -179, -179, -38, -179, 7,
- -179, -179, 334, -179, 331, -179, -179, -179, -179, -179
+ -129, -129, -48, -128, -30, -129, -129, -129, -129, -129,
+ 113, 110, 123, -129, -129, -52, -129, -129, -129, -129,
+ -40, -129, -129, 55, -129, 238, -129, -129, -129, -129,
+ -129, -129, -129, 88, -129, -129, -129, -129, -129, -129,
+ -129, -129, -129, -129, 35, -129, -129, -129, -129, -129,
+ -129, -129, -129, -96, -129, -129, 81, 290, -129, -129,
+ -129, 286, -129, -129
};
-/* YYPGOTO[NTERM-NUM]. */
-static const yytype_int16 yypgoto[] =
+ /* YYDEFGOTO[NTERM-NUM]. */
+static const yytype_int16 yydefgoto[] =
{
- -179, -179, -63, -178, -41, -179, -179, -179, -179, -179,
- -179, -179, 133, -155, 143, -179, -179, -68, -179, -179,
- -179, -179, -40, -179, -179, 71, -179, 269, -179, -179,
- -179, -179, -179, -179, -179, -179, 85, -179, -179, -179,
- -179, -179, -179, -179, -179, -179, -179, 47, -179, -179,
- -179, -179, -179, -179, -179, -179, -179, -179, -179, -179,
- -117, -179, -179, -12, 330, -179, 321, -179, -179, -179,
- 315, -179, -179
+ -1, 2, 47, 48, 94, 90, 217, 49, 214, 205,
+ 203, 199, 95, 96, 97, 120, 254, 269, 298, 278,
+ 50, 51, 52, 209, 210, 121, 53, 54, 55, 56,
+ 57, 58, 59, 222, 223, 224, 60, 61, 62, 63,
+ 64, 65, 66, 67, 237, 238, 272, 282, 68, 290,
+ 106, 174, 69, 102, 70, 71, 16, 11, 12, 19,
+ 20, 21, 22, 3
};
-/* YYTABLE[YYPACT[STATE-NUM]]. What to do in state STATE-NUM. If
- positive, shift that token. If negative, reduce the rule which
- number is the opposite. If zero, do what YYDEFACT says.
- If YYTABLE_NINF, syntax error. */
-#define YYTABLE_NINF -1
+ /* YYTABLE[YYPACT[STATE-NUM]] -- What to do in state STATE-NUM. If
+ positive, shift that token. If negative, reduce the rule whose
+ number is the opposite. If YYTABLE_NINF, syntax error. */
static const yytype_uint16 yytable[] =
{
- 22, 143, 116, 118, 128, 122, 155, 224, 184, 269,
- 202, 236, 25, 28, 204, 205, 198, 234, 138, 182,
- 183, 184, 94, 95, 96, 97, 98, 99, 100, 148,
- 38, 101, 46, 15, 16, 17, 18, 36, 19, 233,
- 1, 13, 184, 14, 132, 4, 133, 147, 11, 12,
- 184, 173, 174, 345, 346, 119, 120, 256, 5, 254,
- 176, 255, 263, 37, 255, 6, 8, 29, 29, 280,
- 283, 281, 255, 178, 179, 23, 299, 343, 300, 344,
- 285, 25, 237, 242, 199, 102, 270, 35, 186, 7,
- 103, 104, 105, 106, 107, 129, 108, 109, 110, 111,
- 40, 186, 112, 113, 41, 91, 93, 92, 126, 214,
- 187, 188, 189, 190, 191, 192, 193, 20, 127, 135,
- 131, 136, 186, 137, 46, 139, 114, 317, 121, 140,
- 186, 141, 321, 115, 190, 191, 192, 193, 144, 219,
- 220, 221, 222, 223, 192, 193, 226, 227, 228, 229,
- 230, 231, 232, 292, 145, 235, 150, 146, 122, 149,
- 243, 143, 153, 200, 154, 157, 158, 159, 160, 161,
- 201, 143, 162, 271, 163, 164, 94, 95, 96, 97,
- 98, 99, 100, 165, 316, 101, 166, 167, 168, 169,
- 170, 171, 172, 203, 175, 177, 206, 208, 94, 95,
- 96, 97, 98, 99, 100, 216, 194, 101, 196, 119,
- 120, 197, 209, 215, 282, 212, 180, 181, 286, 182,
- 183, 184, 143, 225, 217, 238, 244, 247, 214, 248,
- 249, 295, 296, 180, 181, 252, 182, 183, 184, 102,
- 257, 266, 267, 268, 103, 104, 105, 106, 107, 213,
- 108, 109, 110, 111, 143, 273, 112, 113, 143, 274,
- 275, 102, 278, 298, 279, 284, 103, 104, 105, 106,
- 107, 259, 108, 109, 110, 111, 288, 289, 112, 113,
- 114, 290, 291, 293, 301, 302, 303, 115, 94, 95,
- 96, 97, 98, 99, 100, 304, 306, 101, 307, 308,
- 311, 186, 114, 318, 313, 319, 322, 327, 323, 115,
- 187, 188, 189, 190, 191, 192, 193, 329, 186, 328,
- 331, 218, 332, 336, 338, 325, 339, 187, 188, 189,
- 190, 191, 192, 193, 340, 334, 341, 348, 265, 342,
- 349, 251, 240, 156, 24, 297, 287, 315, 30, 39,
- 0, 102, 0, 0, 42, 0, 103, 104, 105, 106,
- 107, 0, 108, 109, 110, 111, 0, 0, 112, 113,
- 0, 0, 0, 43, 0, 258, 259, 0, 44, 45,
- 46, 0, 0, 0, 0, 0, 47, 0, 0, 0,
- 0, 0, 114, 48, 0, 0, 49, 0, 50, 115,
- 0, 51, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 52, 53, 54, 42, 0, 0, 0, 0,
- 0, 55, 0, 0, 0, 0, 56, 57, 0, 0,
- 58, 59, 60, 142, 43, 61, 0, 0, 0, 44,
- 45, 46, 0, 0, 0, 0, 0, 47, 0, 0,
- 0, 0, 0, 0, 48, 0, 0, 49, 0, 50,
- 0, 0, 51, 62, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 52, 53, 54, 42, 0, 0, 0,
- 0, 0, 55, 0, 0, 0, 0, 56, 57, 0,
- 0, 58, 59, 60, 264, 43, 61, 0, 0, 0,
- 44, 45, 46, 0, 0, 0, 0, 0, 47, 0,
- 0, 0, 0, 0, 0, 48, 0, 0, 49, 0,
- 50, 0, 0, 51, 62, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 52, 53, 54, 42, 0, 0,
- 0, 0, 0, 55, 0, 0, 0, 0, 56, 57,
- 0, 0, 58, 59, 60, 330, 43, 61, 0, 0,
- 0, 44, 45, 46, 0, 0, 0, 0, 0, 47,
- 0, 0, 0, 0, 0, 0, 48, 0, 0, 49,
- 0, 50, 0, 0, 51, 62, 0, 180, 181, 0,
- 182, 183, 184, 0, 0, 52, 53, 54, 42, 0,
- 0, 0, 0, 0, 55, 0, 185, 0, 0, 56,
- 57, 0, 0, 58, 59, 60, 0, 43, 61, 0,
- 0, 0, 44, 45, 46, 0, 0, 0, 180, 181,
- 47, 182, 183, 184, 0, 0, 0, 48, 0, 0,
- 49, 0, 50, 0, 0, 51, 62, 0, 180, 181,
- 195, 182, 183, 184, 0, 0, 52, 53, 54, 0,
- 0, 0, 0, 0, 0, 55, 0, 0, 0, 0,
- 56, 57, 186, 0, 58, 59, 60, 0, 0, 61,
- 272, 187, 188, 189, 190, 191, 192, 193, 0, 180,
- 181, 0, 182, 183, 184, 0, 180, 181, 0, 182,
- 183, 184, 0, 0, 0, 0, 0, 62, 305, 0,
- 0, 0, 0, 186, 0, 0, 180, 181, 310, 182,
- 183, 184, 187, 188, 189, 190, 191, 192, 193, 0,
- 0, 0, 0, 186, 0, 0, 0, 0, 0, 0,
- 0, 0, 187, 188, 189, 190, 191, 192, 193, 0,
+ 114, 89, 91, 169, 124, 152, 100, 171, 172, 148,
+ 149, 117, 150, 151, 152, 165, 10, 30, 230, 1,
+ 104, 26, 105, 148, 149, 189, 150, 151, 152, 296,
+ 297, 31, 141, 220, 221, 200, 32, 33, 34, 13,
+ 14, 4, 35, 152, 142, 24, 5, 34, 36, 7,
+ 144, 37, 225, 38, 226, 17, 39, 146, 147, 116,
+ 25, 6, 17, 9, 10, 154, 40, 41, 42, 166,
+ 241, 206, 242, 152, 154, 43, 44, 101, 45, 8,
+ 231, 155, 156, 157, 158, 159, 160, 161, 154, 179,
+ 28, 243, 245, 226, 29, 155, 156, 157, 158, 159,
+ 160, 161, 15, 154, 46, 259, 183, 260, 294, 23,
+ 295, 72, 98, 158, 159, 160, 161, 73, 184, 185,
+ 186, 187, 188, 74, 99, 191, 192, 193, 194, 195,
+ 196, 197, 198, 154, 103, 252, 107, 275, 207, 108,
+ 109, 114, 279, 110, 111, 160, 161, 198, 112, 119,
+ 115, 118, 114, 232, 122, 123, 30, 126, 127, 128,
+ 167, 129, 130, 131, 132, 274, 34, 133, 134, 113,
+ 31, 135, 168, 136, 143, 32, 33, 34, 137, 138,
+ 139, 35, 162, 140, 145, 164, 170, 36, 176, 173,
+ 37, 246, 38, 175, 181, 39, 249, 114, 177, 30,
+ 179, 180, 182, 255, 256, 40, 41, 42, 190, 201,
+ 211, 202, 227, 31, 43, 44, 208, 45, 32, 33,
+ 34, 212, 213, 216, 35, 219, 234, 114, 228, 229,
+ 36, 114, 236, 37, 239, 38, 244, 221, 39, 248,
+ 235, 240, 30, 46, 251, 250, 253, 261, 40, 41,
+ 42, 262, 266, 263, 264, 286, 31, 43, 44, 267,
+ 45, 32, 33, 34, 268, 271, 276, 35, 277, 280,
+ 281, 283, 284, 36, 285, 287, 37, 288, 38, 289,
+ 291, 39, 292, 293, 299, 30, 46, 218, 215, 204,
+ 257, 40, 41, 42, 125, 273, 150, 151, 152, 31,
+ 43, 44, 18, 45, 32, 33, 34, 0, 27, 0,
+ 35, 247, 0, 0, 0, 0, 36, 258, 0, 37,
+ 0, 38, 0, 0, 39, 0, 0, 0, 0, 46,
+ 0, 0, 0, 0, 40, 41, 42, 0, 75, 76,
+ 77, 78, 79, 43, 44, 80, 45, 0, 75, 76,
+ 77, 78, 79, 0, 0, 80, 0, 0, 154, 0,
+ 0, 0, 0, 0, 92, 155, 156, 157, 158, 159,
+ 160, 161, 46, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 75, 76, 77, 78, 79, 178,
+ 81, 80, 0, 0, 0, 82, 83, 84, 85, 86,
+ 81, 0, 0, 0, 0, 82, 83, 84, 85, 86,
+ 92, 0, 0, 0, 0, 0, 0, 0, 87, 0,
+ 93, 0, 0, 0, 0, 88, 0, 0, 87, 0,
+ 75, 76, 77, 78, 79, 88, 81, 80, 0, 0,
+ 0, 82, 83, 84, 85, 86, 148, 149, 0, 150,
+ 151, 152, 0, 0, 0, 0, 0, 0, 0, 0,
+ 153, 0, 148, 149, 87, 150, 151, 152, 0, 0,
+ 0, 88, 0, 0, 0, 0, 0, 0, 0, 163,
+ 148, 149, 81, 150, 151, 152, 0, 82, 83, 84,
+ 85, 86, 0, 0, 148, 149, 0, 150, 151, 152,
+ 0, 0, 0, 0, 0, 233, 0, 0, 265, 0,
+ 87, 154, 0, 0, 0, 0, 0, 88, 155, 156,
+ 157, 158, 159, 160, 161, 148, 149, 154, 150, 151,
+ 152, 0, 0, 0, 155, 156, 157, 158, 159, 160,
+ 161, 0, 270, 0, 0, 154, 0, 0, 0, 0,
+ 0, 0, 155, 156, 157, 158, 159, 160, 161, 154,
+ 0, 0, 0, 0, 0, 0, 155, 156, 157, 158,
+ 159, 160, 161, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 186, 0, 0, 0, 0, 0,
- 0, 186, 0, 187, 188, 189, 190, 191, 192, 193,
- 187, 188, 189, 190, 191, 192, 193, 0, 0, 0,
- 0, 186, 0, 0, 0, 0, 0, 0, 0, 0,
- 187, 188, 189, 190, 191, 192, 193
+ 154, 0, 0, 0, 0, 0, 0, 155, 156, 157,
+ 158, 159, 160, 161
};
static const yytype_int16 yycheck[] =
{
- 12, 64, 43, 44, 9, 46, 74, 185, 15, 40,
- 127, 38, 9, 25, 131, 132, 49, 195, 58, 13,
- 14, 15, 3, 4, 5, 6, 7, 8, 9, 69,
- 26, 12, 35, 19, 20, 21, 22, 65, 24, 194,
- 16, 115, 15, 117, 57, 9, 59, 50, 17, 18,
- 15, 91, 93, 46, 47, 36, 37, 212, 0, 115,
- 101, 117, 115, 91, 117, 113, 9, 64, 64, 115,
- 115, 117, 117, 114, 115, 25, 115, 115, 117, 117,
- 258, 9, 109, 200, 117, 66, 117, 113, 95, 114,
- 71, 72, 73, 74, 75, 100, 77, 78, 79, 80,
- 9, 95, 83, 84, 9, 25, 63, 113, 9, 150,
- 104, 105, 106, 107, 108, 109, 110, 103, 49, 9,
- 39, 9, 95, 9, 35, 88, 107, 305, 109, 88,
- 95, 9, 310, 114, 107, 108, 109, 110, 113, 180,
- 181, 182, 183, 184, 109, 110, 187, 188, 189, 190,
- 191, 192, 193, 270, 114, 196, 40, 113, 199, 113,
- 201, 224, 113, 39, 113, 113, 113, 113, 113, 113,
- 17, 234, 113, 241, 113, 113, 3, 4, 5, 6,
- 7, 8, 9, 113, 301, 12, 113, 113, 113, 113,
- 113, 113, 113, 52, 112, 112, 60, 49, 3, 4,
- 5, 6, 7, 8, 9, 70, 114, 12, 114, 36,
- 37, 114, 114, 113, 255, 114, 10, 11, 259, 13,
- 14, 15, 285, 5, 70, 9, 9, 114, 269, 58,
- 9, 272, 273, 10, 11, 116, 13, 14, 15, 66,
- 55, 9, 115, 117, 71, 72, 73, 74, 75, 54,
- 77, 78, 79, 80, 317, 104, 83, 84, 321, 117,
- 9, 66, 9, 275, 114, 9, 71, 72, 73, 74,
- 75, 31, 77, 78, 79, 80, 27, 32, 83, 84,
- 107, 115, 9, 41, 62, 115, 119, 114, 3, 4,
- 5, 6, 7, 8, 9, 116, 28, 12, 51, 92,
- 114, 95, 107, 17, 61, 44, 3, 114, 89, 114,
- 104, 105, 106, 107, 108, 109, 110, 45, 95, 93,
- 115, 115, 12, 9, 94, 101, 9, 104, 105, 106,
- 107, 108, 109, 110, 32, 102, 8, 3, 115, 104,
- 9, 208, 199, 74, 14, 274, 261, 300, 27, 34,
- -1, 66, -1, -1, 9, -1, 71, 72, 73, 74,
- 75, -1, 77, 78, 79, 80, -1, -1, 83, 84,
- -1, -1, -1, 28, -1, 30, 31, -1, 33, 34,
- 35, -1, -1, -1, -1, -1, 41, -1, -1, -1,
- -1, -1, 107, 48, -1, -1, 51, -1, 53, 114,
- -1, 56, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, 67, 68, 69, 9, -1, -1, -1, -1,
- -1, 76, -1, -1, -1, -1, 81, 82, -1, -1,
- 85, 86, 87, 27, 28, 90, -1, -1, -1, 33,
- 34, 35, -1, -1, -1, -1, -1, 41, -1, -1,
- -1, -1, -1, -1, 48, -1, -1, 51, -1, 53,
- -1, -1, 56, 118, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, 67, 68, 69, 9, -1, -1, -1,
- -1, -1, 76, -1, -1, -1, -1, 81, 82, -1,
- -1, 85, 86, 87, 27, 28, 90, -1, -1, -1,
- 33, 34, 35, -1, -1, -1, -1, -1, 41, -1,
- -1, -1, -1, -1, -1, 48, -1, -1, 51, -1,
- 53, -1, -1, 56, 118, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, 67, 68, 69, 9, -1, -1,
- -1, -1, -1, 76, -1, -1, -1, -1, 81, 82,
- -1, -1, 85, 86, 87, 27, 28, 90, -1, -1,
- -1, 33, 34, 35, -1, -1, -1, -1, -1, 41,
- -1, -1, -1, -1, -1, -1, 48, -1, -1, 51,
- -1, 53, -1, -1, 56, 118, -1, 10, 11, -1,
- 13, 14, 15, -1, -1, 67, 68, 69, 9, -1,
- -1, -1, -1, -1, 76, -1, 29, -1, -1, 81,
- 82, -1, -1, 85, 86, 87, -1, 28, 90, -1,
- -1, -1, 33, 34, 35, -1, -1, -1, 10, 11,
- 41, 13, 14, 15, -1, -1, -1, 48, -1, -1,
- 51, -1, 53, -1, -1, 56, 118, -1, 10, 11,
- 32, 13, 14, 15, -1, -1, 67, 68, 69, -1,
- -1, -1, -1, -1, -1, 76, -1, -1, -1, -1,
- 81, 82, 95, -1, 85, 86, 87, -1, -1, 90,
- 42, 104, 105, 106, 107, 108, 109, 110, -1, 10,
- 11, -1, 13, 14, 15, -1, 10, 11, -1, 13,
- 14, 15, -1, -1, -1, -1, -1, 118, 29, -1,
- -1, -1, -1, 95, -1, -1, 10, 11, 32, 13,
- 14, 15, 104, 105, 106, 107, 108, 109, 110, -1,
- -1, -1, -1, 95, -1, -1, -1, -1, -1, -1,
- -1, -1, 104, 105, 106, 107, 108, 109, 110, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ 48, 31, 32, 99, 56, 13, 7, 103, 104, 8,
+ 9, 51, 11, 12, 13, 39, 7, 7, 31, 14,
+ 47, 19, 49, 8, 9, 153, 11, 12, 13, 36,
+ 37, 21, 72, 23, 24, 163, 26, 27, 28, 16,
+ 17, 7, 32, 13, 74, 54, 0, 28, 38, 90,
+ 80, 41, 91, 43, 93, 53, 46, 87, 88, 40,
+ 69, 89, 53, 18, 7, 73, 56, 57, 58, 93,
+ 91, 167, 93, 13, 73, 65, 66, 78, 68, 91,
+ 93, 80, 81, 82, 83, 84, 85, 86, 73, 119,
+ 7, 91, 220, 93, 7, 80, 81, 82, 83, 84,
+ 85, 86, 79, 73, 94, 91, 91, 93, 91, 89,
+ 93, 18, 7, 83, 84, 85, 86, 89, 148, 149,
+ 150, 151, 152, 52, 39, 155, 156, 157, 158, 159,
+ 160, 161, 162, 73, 30, 231, 7, 265, 168, 7,
+ 7, 189, 270, 67, 67, 85, 86, 177, 7, 31,
+ 89, 89, 200, 205, 89, 89, 7, 89, 89, 89,
+ 30, 89, 89, 89, 89, 261, 28, 89, 89, 20,
+ 21, 89, 15, 89, 88, 26, 27, 28, 89, 89,
+ 89, 32, 90, 89, 88, 90, 42, 38, 90, 50,
+ 41, 221, 43, 39, 59, 46, 226, 245, 90, 7,
+ 230, 89, 59, 233, 234, 56, 57, 58, 5, 85,
+ 90, 7, 20, 21, 65, 66, 7, 68, 26, 27,
+ 28, 48, 7, 92, 32, 45, 80, 275, 91, 93,
+ 38, 279, 7, 41, 7, 43, 7, 24, 46, 20,
+ 93, 90, 7, 94, 7, 25, 32, 51, 56, 57,
+ 58, 91, 21, 95, 92, 20, 21, 65, 66, 41,
+ 68, 26, 27, 28, 70, 90, 15, 32, 34, 3,
+ 10, 90, 71, 38, 35, 91, 41, 6, 43, 7,
+ 72, 46, 7, 25, 7, 7, 94, 177, 175, 166,
+ 235, 56, 57, 58, 56, 260, 11, 12, 13, 21,
+ 65, 66, 12, 68, 26, 27, 28, -1, 22, -1,
+ 32, 223, -1, -1, -1, -1, 38, 236, -1, 41,
+ -1, 43, -1, -1, 46, -1, -1, -1, -1, 94,
+ -1, -1, -1, -1, 56, 57, 58, -1, 3, 4,
+ 5, 6, 7, 65, 66, 10, 68, -1, 3, 4,
+ 5, 6, 7, -1, -1, 10, -1, -1, 73, -1,
+ -1, -1, -1, -1, 29, 80, 81, 82, 83, 84,
+ 85, 86, 94, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, 3, 4, 5, 6, 7, 44,
+ 55, 10, -1, -1, -1, 60, 61, 62, 63, 64,
+ 55, -1, -1, -1, -1, 60, 61, 62, 63, 64,
+ 29, -1, -1, -1, -1, -1, -1, -1, 83, -1,
+ 85, -1, -1, -1, -1, 90, -1, -1, 83, -1,
+ 3, 4, 5, 6, 7, 90, 55, 10, -1, -1,
+ -1, 60, 61, 62, 63, 64, 8, 9, -1, 11,
+ 12, 13, -1, -1, -1, -1, -1, -1, -1, -1,
+ 22, -1, 8, 9, 83, 11, 12, 13, -1, -1,
+ -1, 90, -1, -1, -1, -1, -1, -1, -1, 25,
+ 8, 9, 55, 11, 12, 13, -1, 60, 61, 62,
+ 63, 64, -1, -1, 8, 9, -1, 11, 12, 13,
+ -1, -1, -1, -1, -1, 33, -1, -1, 22, -1,
+ 83, 73, -1, -1, -1, -1, -1, 90, 80, 81,
+ 82, 83, 84, 85, 86, 8, 9, 73, 11, 12,
+ 13, -1, -1, -1, 80, 81, 82, 83, 84, 85,
+ 86, -1, 25, -1, -1, 73, -1, -1, -1, -1,
+ -1, -1, 80, 81, 82, 83, 84, 85, 86, 73,
+ -1, -1, -1, -1, -1, -1, 80, 81, 82, 83,
+ 84, 85, 86, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, 95, -1, -1, -1, -1, -1,
- -1, 95, -1, 104, 105, 106, 107, 108, 109, 110,
- 104, 105, 106, 107, 108, 109, 110, -1, -1, -1,
- -1, 95, -1, -1, -1, -1, -1, -1, -1, -1,
- 104, 105, 106, 107, 108, 109, 110
+ 73, -1, -1, -1, -1, -1, -1, 80, 81, 82,
+ 83, 84, 85, 86
};
-/* YYSTOS[STATE-NUM] -- The (internal number of the) accessing
- symbol of state STATE-NUM. */
+ /* YYSTOS[STATE-NUM] -- The (internal number of the) accessing
+ symbol of state STATE-NUM. */
static const yytype_uint8 yystos[] =
{
- 0, 16, 121, 192, 9, 0, 113, 114, 9, 184,
- 185, 17, 18, 115, 117, 19, 20, 21, 22, 24,
- 103, 183, 183, 25, 184, 9, 186, 187, 183, 64,
- 186, 188, 189, 190, 191, 113, 65, 91, 26, 190,
- 9, 9, 9, 28, 33, 34, 35, 41, 48, 51,
- 53, 56, 67, 68, 69, 76, 81, 82, 85, 86,
- 87, 90, 118, 122, 123, 127, 128, 129, 142, 143,
- 144, 148, 149, 150, 151, 152, 153, 154, 155, 159,
- 160, 161, 162, 163, 164, 165, 166, 175, 179, 181,
- 182, 25, 113, 63, 3, 4, 5, 6, 7, 8,
- 9, 12, 66, 71, 72, 73, 74, 75, 77, 78,
- 79, 80, 83, 84, 107, 114, 124, 125, 124, 36,
- 37, 109, 124, 134, 135, 136, 9, 49, 9, 100,
- 180, 39, 57, 59, 177, 9, 9, 9, 142, 88,
- 88, 9, 27, 122, 113, 114, 113, 50, 142, 113,
- 40, 137, 147, 113, 113, 137, 147, 113, 113, 113,
- 113, 113, 113, 113, 113, 113, 113, 113, 113, 113,
- 113, 113, 113, 142, 124, 112, 124, 112, 124, 124,
- 10, 11, 13, 14, 15, 29, 95, 104, 105, 106,
- 107, 108, 109, 110, 114, 32, 114, 114, 49, 117,
- 39, 17, 180, 52, 180, 180, 60, 178, 49, 114,
- 124, 133, 114, 54, 124, 113, 70, 70, 115, 124,
- 124, 124, 124, 124, 123, 5, 124, 124, 124, 124,
- 124, 124, 124, 133, 123, 124, 38, 109, 9, 132,
- 134, 131, 180, 124, 9, 145, 146, 114, 58, 9,
- 130, 132, 116, 126, 115, 117, 133, 55, 30, 31,
- 156, 157, 158, 115, 27, 115, 9, 115, 117, 40,
- 117, 137, 42, 104, 117, 9, 167, 168, 9, 114,
- 115, 117, 124, 115, 9, 123, 124, 156, 27, 32,
- 115, 9, 180, 41, 138, 124, 124, 145, 183, 115,
- 117, 62, 115, 119, 116, 29, 28, 51, 92, 139,
- 32, 114, 169, 61, 172, 167, 180, 123, 17, 44,
- 141, 123, 3, 89, 170, 101, 173, 114, 93, 45,
- 27, 115, 12, 171, 102, 174, 9, 176, 94, 9,
- 32, 8, 104, 115, 117, 46, 47, 140, 3, 9
+ 0, 14, 97, 159, 7, 0, 89, 90, 91, 18,
+ 7, 153, 154, 16, 17, 79, 152, 53, 153, 155,
+ 156, 157, 158, 89, 54, 69, 19, 157, 7, 7,
+ 7, 21, 26, 27, 28, 32, 38, 41, 43, 46,
+ 56, 57, 58, 65, 66, 68, 94, 98, 99, 103,
+ 116, 117, 118, 122, 123, 124, 125, 126, 127, 128,
+ 132, 133, 134, 135, 136, 137, 138, 139, 144, 148,
+ 150, 151, 18, 89, 52, 3, 4, 5, 6, 7,
+ 10, 55, 60, 61, 62, 63, 64, 83, 90, 100,
+ 101, 100, 29, 85, 100, 108, 109, 110, 7, 39,
+ 7, 78, 149, 30, 47, 49, 146, 7, 7, 7,
+ 67, 67, 7, 20, 98, 89, 40, 116, 89, 31,
+ 111, 121, 89, 89, 111, 121, 89, 89, 89, 89,
+ 89, 89, 89, 89, 89, 89, 89, 89, 89, 89,
+ 89, 116, 100, 88, 100, 88, 100, 100, 8, 9,
+ 11, 12, 13, 22, 73, 80, 81, 82, 83, 84,
+ 85, 86, 90, 25, 90, 39, 93, 30, 15, 149,
+ 42, 149, 149, 50, 147, 39, 90, 90, 44, 100,
+ 89, 59, 59, 91, 100, 100, 100, 100, 100, 99,
+ 5, 100, 100, 100, 100, 100, 100, 100, 100, 107,
+ 99, 85, 7, 106, 108, 105, 149, 100, 7, 119,
+ 120, 90, 48, 7, 104, 106, 92, 102, 107, 45,
+ 23, 24, 129, 130, 131, 91, 93, 20, 91, 93,
+ 31, 93, 111, 33, 80, 93, 7, 140, 141, 7,
+ 90, 91, 93, 91, 7, 99, 100, 129, 20, 100,
+ 25, 7, 149, 32, 112, 100, 100, 119, 152, 91,
+ 93, 51, 91, 95, 92, 22, 21, 41, 70, 113,
+ 25, 90, 142, 140, 149, 99, 15, 34, 115, 99,
+ 3, 10, 143, 90, 71, 35, 20, 91, 6, 7,
+ 145, 72, 7, 25, 91, 93, 36, 37, 114, 7
};
-#define yyerrok (yyerrstatus = 0)
-#define yyclearin (yychar = YYEMPTY)
-#define YYEMPTY (-2)
-#define YYEOF 0
-
-#define YYACCEPT goto yyacceptlab
-#define YYABORT goto yyabortlab
-#define YYERROR goto yyerrorlab
-
-
-/* Like YYERROR except do call yyerror. This remains here temporarily
- to ease the transition to the new meaning of YYERROR, for GCC.
- Once GCC version 2 has supplanted version 1, this can go. */
+ /* YYR1[YYN] -- Symbol number of symbol that rule YYN derives. */
+static const yytype_uint8 yyr1[] =
+{
+ 0, 96, 97, 98, 98, 98, 98, 98, 98, 98,
+ 98, 98, 98, 98, 98, 98, 98, 98, 98, 98,
+ 98, 98, 98, 99, 99, 100, 100, 100, 100, 100,
+ 100, 100, 100, 100, 100, 100, 100, 100, 100, 100,
+ 100, 100, 100, 100, 100, 100, 100, 100, 100, 100,
+ 101, 101, 101, 101, 101, 102, 102, 102, 103, 104,
+ 105, 105, 106, 106, 106, 107, 107, 107, 108, 108,
+ 109, 109, 109, 110, 110, 110, 111, 111, 112, 112,
+ 113, 113, 114, 114, 114, 115, 115, 116, 117, 118,
+ 118, 119, 120, 120, 121, 122, 123, 124, 125, 126,
+ 127, 128, 129, 130, 130, 131, 131, 131, 132, 133,
+ 134, 135, 136, 137, 138, 139, 139, 140, 141, 141,
+ 142, 142, 143, 143, 144, 145, 145, 146, 146, 147,
+ 147, 148, 149, 149, 150, 151, 152, 152, 152, 153,
+ 154, 154, 154, 155, 156, 157, 157, 158, 158, 158,
+ 159
+};
-#define YYFAIL goto yyerrlab
+ /* YYR2[YYN] -- Number of symbols on the right hand side of rule YYN. */
+static const yytype_uint8 yyr2[] =
+{
+ 0, 2, 2, 1, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 1, 2, 1, 4, 1, 1, 1,
+ 1, 1, 3, 3, 3, 3, 2, 3, 3, 3,
+ 3, 3, 3, 3, 3, 3, 3, 2, 3, 3,
+ 1, 1, 1, 1, 1, 0, 1, 3, 6, 3,
+ 1, 3, 0, 1, 3, 0, 1, 3, 1, 4,
+ 0, 1, 3, 1, 3, 1, 0, 2, 0, 2,
+ 0, 4, 0, 1, 1, 0, 4, 8, 3, 5,
+ 2, 3, 1, 3, 4, 4, 2, 2, 3, 2,
+ 2, 3, 4, 1, 2, 0, 2, 1, 7, 6,
+ 10, 1, 1, 2, 2, 4, 4, 4, 1, 3,
+ 0, 3, 0, 2, 6, 1, 3, 0, 1, 0,
+ 1, 10, 1, 1, 2, 2, 1, 1, 1, 3,
+ 0, 1, 2, 6, 4, 1, 1, 0, 1, 2,
+ 10
+};
-#define YYRECOVERING() (!!yyerrstatus)
-#define YYBACKUP(Token, Value) \
-do \
- if (yychar == YYEMPTY && yylen == 1) \
- { \
- yychar = (Token); \
- yylval = (Value); \
- yytoken = YYTRANSLATE (yychar); \
- YYPOPSTACK (1); \
- goto yybackup; \
- } \
- else \
- { \
- yyerror (YY_("syntax error: cannot back up")); \
- YYERROR; \
- } \
-while (YYID (0))
-
-
-#define YYTERROR 1
-#define YYERRCODE 256
-
-
-/* YYLLOC_DEFAULT -- Set CURRENT to span from RHS[1] to RHS[N].
- If N is 0, then set CURRENT to the empty location which ends
- the previous symbol: RHS[0] (always defined). */
-
-#define YYRHSLOC(Rhs, K) ((Rhs)[K])
-#ifndef YYLLOC_DEFAULT
-# define YYLLOC_DEFAULT(Current, Rhs, N) \
- do \
- if (YYID (N)) \
- { \
- (Current).first_line = YYRHSLOC (Rhs, 1).first_line; \
- (Current).first_column = YYRHSLOC (Rhs, 1).first_column; \
- (Current).last_line = YYRHSLOC (Rhs, N).last_line; \
- (Current).last_column = YYRHSLOC (Rhs, N).last_column; \
- } \
- else \
- { \
- (Current).first_line = (Current).last_line = \
- YYRHSLOC (Rhs, 0).last_line; \
- (Current).first_column = (Current).last_column = \
- YYRHSLOC (Rhs, 0).last_column; \
- } \
- while (YYID (0))
-#endif
+#define yyerrok (yyerrstatus = 0)
+#define yyclearin (yychar = YYEMPTY)
+#define YYEMPTY (-2)
+#define YYEOF 0
+#define YYACCEPT goto yyacceptlab
+#define YYABORT goto yyabortlab
+#define YYERROR goto yyerrorlab
-/* YY_LOCATION_PRINT -- Print the location on the stream.
- This macro was not mandated originally: define only if we know
- we won't break user code: when these are the locations we know. */
-#ifndef YY_LOCATION_PRINT
-# if defined YYLTYPE_IS_TRIVIAL && YYLTYPE_IS_TRIVIAL
-# define YY_LOCATION_PRINT(File, Loc) \
- fprintf (File, "%d.%d-%d.%d", \
- (Loc).first_line, (Loc).first_column, \
- (Loc).last_line, (Loc).last_column)
-# else
-# define YY_LOCATION_PRINT(File, Loc) ((void) 0)
-# endif
-#endif
+#define YYRECOVERING() (!!yyerrstatus)
+#define YYBACKUP(Token, Value) \
+ do \
+ if (yychar == YYEMPTY) \
+ { \
+ yychar = (Token); \
+ yylval = (Value); \
+ YYPOPSTACK (yylen); \
+ yystate = *yyssp; \
+ goto yybackup; \
+ } \
+ else \
+ { \
+ yyerror (YY_("syntax error: cannot back up")); \
+ YYERROR; \
+ } \
+ while (0)
+
+/* Error token number */
+#define YYTERROR 1
+#define YYERRCODE 256
-/* YYLEX -- calling `yylex' with the right arguments. */
-#ifdef YYLEX_PARAM
-# define YYLEX yylex (YYLEX_PARAM)
-#else
-# define YYLEX yylex ()
-#endif
/* Enable debugging if requested. */
#if YYDEBUG
@@ -1279,80 +970,63 @@ while (YYID (0))
# define YYFPRINTF fprintf
# endif
-# define YYDPRINTF(Args) \
-do { \
- if (yydebug) \
- YYFPRINTF Args; \
-} while (YYID (0))
-
-# define YY_SYMBOL_PRINT(Title, Type, Value, Location) \
-do { \
- if (yydebug) \
- { \
- YYFPRINTF (stderr, "%s ", Title); \
- yy_symbol_print (stderr, \
- Type, Value); \
- YYFPRINTF (stderr, "\n"); \
- } \
-} while (YYID (0))
-
-
-/*--------------------------------.
-| Print this symbol on YYOUTPUT. |
-`--------------------------------*/
-
-/*ARGSUSED*/
-#if (defined __STDC__ || defined __C99__FUNC__ \
- || defined __cplusplus || defined _MSC_VER)
-static void
-yy_symbol_value_print (FILE *yyoutput, int yytype, YYSTYPE const * const yyvaluep)
-#else
-static void
-yy_symbol_value_print (yyoutput, yytype, yyvaluep)
- FILE *yyoutput;
- int yytype;
- YYSTYPE const * const yyvaluep;
+# define YYDPRINTF(Args) \
+do { \
+ if (yydebug) \
+ YYFPRINTF Args; \
+} while (0)
+
+/* This macro is provided for backward compatibility. */
+#ifndef YY_LOCATION_PRINT
+# define YY_LOCATION_PRINT(File, Loc) ((void) 0)
#endif
+
+
+# define YY_SYMBOL_PRINT(Title, Type, Value, Location) \
+do { \
+ if (yydebug) \
+ { \
+ YYFPRINTF (stderr, "%s ", Title); \
+ yy_symbol_print (stderr, \
+ Type, Value); \
+ YYFPRINTF (stderr, "\n"); \
+ } \
+} while (0)
+
+
+/*-----------------------------------.
+| Print this symbol's value on YYO. |
+`-----------------------------------*/
+
+static void
+yy_symbol_value_print (FILE *yyo, int yytype, YYSTYPE const * const yyvaluep)
{
+ FILE *yyoutput = yyo;
+ YYUSE (yyoutput);
if (!yyvaluep)
return;
# ifdef YYPRINT
if (yytype < YYNTOKENS)
- YYPRINT (yyoutput, yytoknum[yytype], *yyvaluep);
-# else
- YYUSE (yyoutput);
+ YYPRINT (yyo, yytoknum[yytype], *yyvaluep);
# endif
- switch (yytype)
- {
- default:
- break;
- }
+ YY_IGNORE_MAYBE_UNINITIALIZED_BEGIN
+ YYUSE (yytype);
+ YY_IGNORE_MAYBE_UNINITIALIZED_END
}
-/*--------------------------------.
-| Print this symbol on YYOUTPUT. |
-`--------------------------------*/
+/*---------------------------.
+| Print this symbol on YYO. |
+`---------------------------*/
-#if (defined __STDC__ || defined __C99__FUNC__ \
- || defined __cplusplus || defined _MSC_VER)
static void
-yy_symbol_print (FILE *yyoutput, int yytype, YYSTYPE const * const yyvaluep)
-#else
-static void
-yy_symbol_print (yyoutput, yytype, yyvaluep)
- FILE *yyoutput;
- int yytype;
- YYSTYPE const * const yyvaluep;
-#endif
+yy_symbol_print (FILE *yyo, int yytype, YYSTYPE const * const yyvaluep)
{
- if (yytype < YYNTOKENS)
- YYFPRINTF (yyoutput, "token %s (", yytname[yytype]);
- else
- YYFPRINTF (yyoutput, "nterm %s (", yytname[yytype]);
+ YYFPRINTF (yyo, "%s %s (",
+ yytype < YYNTOKENS ? "token" : "nterm", yytname[yytype]);
- yy_symbol_value_print (yyoutput, yytype, yyvaluep);
- YYFPRINTF (yyoutput, ")");
+ yy_symbol_value_print (yyo, yytype, yyvaluep);
+ YYFPRINTF (yyo, ")");
}
/*------------------------------------------------------------------.
@@ -1360,66 +1034,54 @@ yy_symbol_print (yyoutput, yytype, yyvaluep)
| TOP (included). |
`------------------------------------------------------------------*/
-#if (defined __STDC__ || defined __C99__FUNC__ \
- || defined __cplusplus || defined _MSC_VER)
static void
-yy_stack_print (yytype_int16 *bottom, yytype_int16 *top)
-#else
-static void
-yy_stack_print (bottom, top)
- yytype_int16 *bottom;
- yytype_int16 *top;
-#endif
+yy_stack_print (yytype_int16 *yybottom, yytype_int16 *yytop)
{
YYFPRINTF (stderr, "Stack now");
- for (; bottom <= top; ++bottom)
- YYFPRINTF (stderr, " %d", *bottom);
+ for (; yybottom <= yytop; yybottom++)
+ {
+ int yybot = *yybottom;
+ YYFPRINTF (stderr, " %d", yybot);
+ }
YYFPRINTF (stderr, "\n");
}
-# define YY_STACK_PRINT(Bottom, Top) \
-do { \
- if (yydebug) \
- yy_stack_print ((Bottom), (Top)); \
-} while (YYID (0))
+# define YY_STACK_PRINT(Bottom, Top) \
+do { \
+ if (yydebug) \
+ yy_stack_print ((Bottom), (Top)); \
+} while (0)
/*------------------------------------------------.
| Report that the YYRULE is going to be reduced. |
`------------------------------------------------*/
-#if (defined __STDC__ || defined __C99__FUNC__ \
- || defined __cplusplus || defined _MSC_VER)
-static void
-yy_reduce_print (YYSTYPE *yyvsp, int yyrule)
-#else
static void
-yy_reduce_print (yyvsp, yyrule)
- YYSTYPE *yyvsp;
- int yyrule;
-#endif
+yy_reduce_print (yytype_int16 *yyssp, YYSTYPE *yyvsp, int yyrule)
{
+ unsigned long yylno = yyrline[yyrule];
int yynrhs = yyr2[yyrule];
int yyi;
- unsigned long int yylno = yyrline[yyrule];
YYFPRINTF (stderr, "Reducing stack by rule %d (line %lu):\n",
- yyrule - 1, yylno);
+ yyrule - 1, yylno);
/* The symbols being reduced. */
for (yyi = 0; yyi < yynrhs; yyi++)
{
- fprintf (stderr, " $%d = ", yyi + 1);
- yy_symbol_print (stderr, yyrhs[yyprhs[yyrule] + yyi],
- &(yyvsp[(yyi + 1) - (yynrhs)])
- );
- fprintf (stderr, "\n");
+ YYFPRINTF (stderr, " $%d = ", yyi + 1);
+ yy_symbol_print (stderr,
+ yystos[yyssp[yyi + 1 - yynrhs]],
+ &yyvsp[(yyi + 1) - (yynrhs)]
+ );
+ YYFPRINTF (stderr, "\n");
}
}
-# define YY_REDUCE_PRINT(Rule) \
-do { \
- if (yydebug) \
- yy_reduce_print (yyvsp, Rule); \
-} while (YYID (0))
+# define YY_REDUCE_PRINT(Rule) \
+do { \
+ if (yydebug) \
+ yy_reduce_print (yyssp, yyvsp, Rule); \
+} while (0)
/* Nonzero means print parse trace. It is left uninitialized so that
multiple parsers can coexist. */
@@ -1433,7 +1095,7 @@ int yydebug;
/* YYINITDEPTH -- initial size of the parser's stacks. */
-#ifndef YYINITDEPTH
+#ifndef YYINITDEPTH
# define YYINITDEPTH 200
#endif
@@ -1449,7 +1111,6 @@ int yydebug;
#endif
-
#if YYERROR_VERBOSE
# ifndef yystrlen
@@ -1457,15 +1118,8 @@ int yydebug;
# define yystrlen strlen
# else
/* Return the length of YYSTR. */
-#if (defined __STDC__ || defined __C99__FUNC__ \
- || defined __cplusplus || defined _MSC_VER)
static YYSIZE_T
yystrlen (const char *yystr)
-#else
-static YYSIZE_T
-yystrlen (yystr)
- const char *yystr;
-#endif
{
YYSIZE_T yylen;
for (yylen = 0; yystr[yylen]; yylen++)
@@ -1481,16 +1135,8 @@ yystrlen (yystr)
# else
/* Copy YYSRC to YYDEST, returning the address of the terminating '\0' in
YYDEST. */
-#if (defined __STDC__ || defined __C99__FUNC__ \
- || defined __cplusplus || defined _MSC_VER)
static char *
yystpcpy (char *yydest, const char *yysrc)
-#else
-static char *
-yystpcpy (yydest, yysrc)
- char *yydest;
- const char *yysrc;
-#endif
{
char *yyd = yydest;
const char *yys = yysrc;
@@ -1520,235 +1166,245 @@ yytnamerr (char *yyres, const char *yystr)
char const *yyp = yystr;
for (;;)
- switch (*++yyp)
- {
- case '\'':
- case ',':
- goto do_not_strip_quotes;
-
- case '\\':
- if (*++yyp != '\\')
- goto do_not_strip_quotes;
- /* Fall through. */
- default:
- if (yyres)
- yyres[yyn] = *yyp;
- yyn++;
- break;
-
- case '"':
- if (yyres)
- yyres[yyn] = '\0';
- return yyn;
- }
+ switch (*++yyp)
+ {
+ case '\'':
+ case ',':
+ goto do_not_strip_quotes;
+
+ case '\\':
+ if (*++yyp != '\\')
+ goto do_not_strip_quotes;
+ else
+ goto append;
+
+ append:
+ default:
+ if (yyres)
+ yyres[yyn] = *yyp;
+ yyn++;
+ break;
+
+ case '"':
+ if (yyres)
+ yyres[yyn] = '\0';
+ return yyn;
+ }
do_not_strip_quotes: ;
}
if (! yyres)
return yystrlen (yystr);
- return yystpcpy (yyres, yystr) - yyres;
+ return (YYSIZE_T) (yystpcpy (yyres, yystr) - yyres);
}
# endif
-/* Copy into YYRESULT an error message about the unexpected token
- YYCHAR while in state YYSTATE. Return the number of bytes copied,
- including the terminating null byte. If YYRESULT is null, do not
- copy anything; just return the number of bytes that would be
- copied. As a special case, return 0 if an ordinary "syntax error"
- message will do. Return YYSIZE_MAXIMUM if overflow occurs during
- size calculation. */
-static YYSIZE_T
-yysyntax_error (char *yyresult, int yystate, int yychar)
+/* Copy into *YYMSG, which is of size *YYMSG_ALLOC, an error message
+ about the unexpected token YYTOKEN for the state stack whose top is
+ YYSSP.
+
+ Return 0 if *YYMSG was successfully written. Return 1 if *YYMSG is
+ not large enough to hold the message. In that case, also set
+ *YYMSG_ALLOC to the required number of bytes. Return 2 if the
+ required number of bytes is too large to store. */
+static int
+yysyntax_error (YYSIZE_T *yymsg_alloc, char **yymsg,
+ yytype_int16 *yyssp, int yytoken)
{
- int yyn = yypact[yystate];
+ YYSIZE_T yysize0 = yytnamerr (YY_NULLPTR, yytname[yytoken]);
+ YYSIZE_T yysize = yysize0;
+ enum { YYERROR_VERBOSE_ARGS_MAXIMUM = 5 };
+ /* Internationalized format string. */
+ const char *yyformat = YY_NULLPTR;
+ /* Arguments of yyformat. */
+ char const *yyarg[YYERROR_VERBOSE_ARGS_MAXIMUM];
+ /* Number of reported tokens (one for the "unexpected", one per
+ "expected"). */
+ int yycount = 0;
+
+ /* There are many possibilities here to consider:
+ - If this state is a consistent state with a default action, then
+ the only way this function was invoked is if the default action
+ is an error action. In that case, don't check for expected
+ tokens because there are none.
+ - The only way there can be no lookahead present (in yychar) is if
+ this state is a consistent state with a default action. Thus,
+ detecting the absence of a lookahead is sufficient to determine
+ that there is no unexpected or expected token to report. In that
+ case, just report a simple "syntax error".
+ - Don't assume there isn't a lookahead just because this state is a
+ consistent state with a default action. There might have been a
+ previous inconsistent state, consistent state with a non-default
+ action, or user semantic action that manipulated yychar.
+ - Of course, the expected token list depends on states to have
+ correct lookahead information, and it depends on the parser not
+ to perform extra reductions after fetching a lookahead from the
+ scanner and before detecting a syntax error. Thus, state merging
+ (from LALR or IELR) and default reductions corrupt the expected
+ token list. However, the list is correct for canonical LR with
+ one exception: it will still contain any token that will not be
+ accepted due to an error action in a later state.
+ */
+ if (yytoken != YYEMPTY)
+ {
+ int yyn = yypact[*yyssp];
+ yyarg[yycount++] = yytname[yytoken];
+ if (!yypact_value_is_default (yyn))
+ {
+ /* Start YYX at -YYN if negative to avoid negative indexes in
+ YYCHECK. In other words, skip the first -YYN actions for
+ this state because they are default actions. */
+ int yyxbegin = yyn < 0 ? -yyn : 0;
+ /* Stay within bounds of both yycheck and yytname. */
+ int yychecklim = YYLAST - yyn + 1;
+ int yyxend = yychecklim < YYNTOKENS ? yychecklim : YYNTOKENS;
+ int yyx;
+
+ for (yyx = yyxbegin; yyx < yyxend; ++yyx)
+ if (yycheck[yyx + yyn] == yyx && yyx != YYTERROR
+ && !yytable_value_is_error (yytable[yyx + yyn]))
+ {
+ if (yycount == YYERROR_VERBOSE_ARGS_MAXIMUM)
+ {
+ yycount = 1;
+ yysize = yysize0;
+ break;
+ }
+ yyarg[yycount++] = yytname[yyx];
+ {
+ YYSIZE_T yysize1 = yysize + yytnamerr (YY_NULLPTR, yytname[yyx]);
+ if (yysize <= yysize1 && yysize1 <= YYSTACK_ALLOC_MAXIMUM)
+ yysize = yysize1;
+ else
+ return 2;
+ }
+ }
+ }
+ }
- if (! (YYPACT_NINF < yyn && yyn <= YYLAST))
- return 0;
- else
+ switch (yycount)
{
- int yytype = YYTRANSLATE (yychar);
- YYSIZE_T yysize0 = yytnamerr (0, yytname[yytype]);
- YYSIZE_T yysize = yysize0;
- YYSIZE_T yysize1;
- int yysize_overflow = 0;
- enum { YYERROR_VERBOSE_ARGS_MAXIMUM = 5 };
- char const *yyarg[YYERROR_VERBOSE_ARGS_MAXIMUM];
- int yyx;
-
-# if 0
- /* This is so xgettext sees the translatable formats that are
- constructed on the fly. */
- YY_("syntax error, unexpected %s");
- YY_("syntax error, unexpected %s, expecting %s");
- YY_("syntax error, unexpected %s, expecting %s or %s");
- YY_("syntax error, unexpected %s, expecting %s or %s or %s");
- YY_("syntax error, unexpected %s, expecting %s or %s or %s or %s");
-# endif
- char *yyfmt;
- char const *yyf;
- static char const yyunexpected[] = "syntax error, unexpected %s";
- static char const yyexpecting[] = ", expecting %s";
- static char const yyor[] = " or %s";
- char yyformat[sizeof yyunexpected
- + sizeof yyexpecting - 1
- + ((YYERROR_VERBOSE_ARGS_MAXIMUM - 2)
- * (sizeof yyor - 1))];
- char const *yyprefix = yyexpecting;
-
- /* Start YYX at -YYN if negative to avoid negative indexes in
- YYCHECK. */
- int yyxbegin = yyn < 0 ? -yyn : 0;
-
- /* Stay within bounds of both yycheck and yytname. */
- int yychecklim = YYLAST - yyn + 1;
- int yyxend = yychecklim < YYNTOKENS ? yychecklim : YYNTOKENS;
- int yycount = 1;
-
- yyarg[0] = yytname[yytype];
- yyfmt = yystpcpy (yyformat, yyunexpected);
-
- for (yyx = yyxbegin; yyx < yyxend; ++yyx)
- if (yycheck[yyx + yyn] == yyx && yyx != YYTERROR)
- {
- if (yycount == YYERROR_VERBOSE_ARGS_MAXIMUM)
- {
- yycount = 1;
- yysize = yysize0;
- yyformat[sizeof yyunexpected - 1] = '\0';
- break;
- }
- yyarg[yycount++] = yytname[yyx];
- yysize1 = yysize + yytnamerr (0, yytname[yyx]);
- yysize_overflow |= (yysize1 < yysize);
- yysize = yysize1;
- yyfmt = yystpcpy (yyfmt, yyprefix);
- yyprefix = yyor;
- }
-
- yyf = YY_(yyformat);
- yysize1 = yysize + yystrlen (yyf);
- yysize_overflow |= (yysize1 < yysize);
+# define YYCASE_(N, S) \
+ case N: \
+ yyformat = S; \
+ break
+ default: /* Avoid compiler warnings. */
+ YYCASE_(0, YY_("syntax error"));
+ YYCASE_(1, YY_("syntax error, unexpected %s"));
+ YYCASE_(2, YY_("syntax error, unexpected %s, expecting %s"));
+ YYCASE_(3, YY_("syntax error, unexpected %s, expecting %s or %s"));
+ YYCASE_(4, YY_("syntax error, unexpected %s, expecting %s or %s or %s"));
+ YYCASE_(5, YY_("syntax error, unexpected %s, expecting %s or %s or %s or %s"));
+# undef YYCASE_
+ }
+
+ {
+ YYSIZE_T yysize1 = yysize + yystrlen (yyformat);
+ if (yysize <= yysize1 && yysize1 <= YYSTACK_ALLOC_MAXIMUM)
yysize = yysize1;
+ else
+ return 2;
+ }
- if (yysize_overflow)
- return YYSIZE_MAXIMUM;
-
- if (yyresult)
- {
- /* Avoid sprintf, as that infringes on the user's name space.
- Don't have undefined behavior even if the translation
- produced a string with the wrong number of "%s"s. */
- char *yyp = yyresult;
- int yyi = 0;
- while ((*yyp = *yyf) != '\0')
- {
- if (*yyp == '%' && yyf[1] == 's' && yyi < yycount)
- {
- yyp += yytnamerr (yyp, yyarg[yyi++]);
- yyf += 2;
- }
- else
- {
- yyp++;
- yyf++;
- }
- }
- }
- return yysize;
+ if (*yymsg_alloc < yysize)
+ {
+ *yymsg_alloc = 2 * yysize;
+ if (! (yysize <= *yymsg_alloc
+ && *yymsg_alloc <= YYSTACK_ALLOC_MAXIMUM))
+ *yymsg_alloc = YYSTACK_ALLOC_MAXIMUM;
+ return 1;
}
+
+ /* Avoid sprintf, as that infringes on the user's name space.
+ Don't have undefined behavior even if the translation
+ produced a string with the wrong number of "%s"s. */
+ {
+ char *yyp = *yymsg;
+ int yyi = 0;
+ while ((*yyp = *yyformat) != '\0')
+ if (*yyp == '%' && yyformat[1] == 's' && yyi < yycount)
+ {
+ yyp += yytnamerr (yyp, yyarg[yyi++]);
+ yyformat += 2;
+ }
+ else
+ {
+ yyp++;
+ yyformat++;
+ }
+ }
+ return 0;
}
#endif /* YYERROR_VERBOSE */
-
/*-----------------------------------------------.
| Release the memory associated to this symbol. |
`-----------------------------------------------*/
-/*ARGSUSED*/
-#if (defined __STDC__ || defined __C99__FUNC__ \
- || defined __cplusplus || defined _MSC_VER)
static void
yydestruct (const char *yymsg, int yytype, YYSTYPE *yyvaluep)
-#else
-static void
-yydestruct (yymsg, yytype, yyvaluep)
- const char *yymsg;
- int yytype;
- YYSTYPE *yyvaluep;
-#endif
{
YYUSE (yyvaluep);
-
if (!yymsg)
yymsg = "Deleting";
YY_SYMBOL_PRINT (yymsg, yytype, yyvaluep, yylocationp);
-}
+ YY_IGNORE_MAYBE_UNINITIALIZED_BEGIN
+ YYUSE (yytype);
+ YY_IGNORE_MAYBE_UNINITIALIZED_END
+}
-/* Prevent warnings from -Wmissing-prototypes. */
-
-#ifdef YYPARSE_PARAM
-#if defined __STDC__ || defined __cplusplus
-int yyparse (void *YYPARSE_PARAM);
-#else
-int yyparse ();
-#endif
-#else /* ! YYPARSE_PARAM */
-#if defined __STDC__ || defined __cplusplus
-int yyparse (void);
-#else
-int yyparse ();
-#endif
-#endif /* ! YYPARSE_PARAM */
-/* The look-ahead symbol. */
-int yychar;
+/* The lookahead symbol. */
+static int yychar;
-/* The semantic value of the look-ahead symbol. */
+/* The semantic value of the lookahead symbol. */
YYSTYPE yylval;
-
/* Number of syntax errors so far. */
-int yynerrs;
-
+static int yynerrs;
/*----------.
| yyparse. |
`----------*/
-#ifdef YYPARSE_PARAM
-#if (defined __STDC__ || defined __C99__FUNC__ \
- || defined __cplusplus || defined _MSC_VER)
-int
-yyparse (void *YYPARSE_PARAM)
-#else
-int
-yyparse (YYPARSE_PARAM)
- void *YYPARSE_PARAM;
-#endif
-#else /* ! YYPARSE_PARAM */
-#if (defined __STDC__ || defined __C99__FUNC__ \
- || defined __cplusplus || defined _MSC_VER)
int
yyparse (void)
-#else
-int
-yyparse ()
-
-#endif
-#endif
{
+ int yystate;
+ /* Number of tokens to shift before error messages enabled. */
+ int yyerrstatus;
+
+ /* The stacks and their tools:
+ 'yyss': related to states.
+ 'yyvs': related to semantic values.
+
+ Refer to the stacks through separate pointers, to allow yyoverflow
+ to reallocate them elsewhere. */
+
+ /* The state stack. */
+ yytype_int16 yyssa[YYINITDEPTH];
+ yytype_int16 *yyss;
+ yytype_int16 *yyssp;
+
+ /* The semantic value stack. */
+ YYSTYPE yyvsa[YYINITDEPTH];
+ YYSTYPE *yyvs;
+ YYSTYPE *yyvsp;
+
+ YYSIZE_T yystacksize;
- int yystate;
int yyn;
int yyresult;
- /* Number of tokens to shift before error messages enabled. */
- int yyerrstatus;
- /* Look-ahead token as an internal (translated) token number. */
+ /* Lookahead token as an internal (translated) token number. */
int yytoken = 0;
+ /* The variables used to return semantic value and location from the
+ action routines. */
+ YYSTYPE yyval;
+
#if YYERROR_VERBOSE
/* Buffer for error messages, and its allocated size. */
char yymsgbuf[128];
@@ -1756,156 +1412,127 @@ yyparse ()
YYSIZE_T yymsg_alloc = sizeof yymsgbuf;
#endif
- /* Three stacks and their tools:
- `yyss': related to states,
- `yyvs': related to semantic values,
- `yyls': related to locations.
-
- Refer to the stacks thru separate pointers, to allow yyoverflow
- to reallocate them elsewhere. */
-
- /* The state stack. */
- yytype_int16 yyssa[YYINITDEPTH];
- yytype_int16 *yyss = yyssa;
- yytype_int16 *yyssp;
-
- /* The semantic value stack. */
- YYSTYPE yyvsa[YYINITDEPTH];
- YYSTYPE *yyvs = yyvsa;
- YYSTYPE *yyvsp;
-
-
-
#define YYPOPSTACK(N) (yyvsp -= (N), yyssp -= (N))
- YYSIZE_T yystacksize = YYINITDEPTH;
-
- /* The variables used to return semantic value and location from the
- action routines. */
- YYSTYPE yyval;
-
-
/* The number of symbols on the RHS of the reduced rule.
Keep to zero when no symbol should be popped. */
int yylen = 0;
+ yyssp = yyss = yyssa;
+ yyvsp = yyvs = yyvsa;
+ yystacksize = YYINITDEPTH;
+
YYDPRINTF ((stderr, "Starting parse\n"));
yystate = 0;
yyerrstatus = 0;
yynerrs = 0;
- yychar = YYEMPTY; /* Cause a token to be read. */
-
- /* Initialize stack pointers.
- Waste one element of value and location stack
- so that they stay on the same level as the state stack.
- The wasted elements are never initialized. */
-
- yyssp = yyss;
- yyvsp = yyvs;
-
+ yychar = YYEMPTY; /* Cause a token to be read. */
goto yysetstate;
+
/*------------------------------------------------------------.
-| yynewstate -- Push a new state, which is found in yystate. |
+| yynewstate -- push a new state, which is found in yystate. |
`------------------------------------------------------------*/
- yynewstate:
+yynewstate:
/* In all cases, when you get here, the value and location stacks
have just been pushed. So pushing a state here evens the stacks. */
yyssp++;
- yysetstate:
- *yyssp = yystate;
+
+/*--------------------------------------------------------------------.
+| yynewstate -- set current state (the top of the stack) to yystate. |
+`--------------------------------------------------------------------*/
+yysetstate:
+ YYDPRINTF ((stderr, "Entering state %d\n", yystate));
+ YY_ASSERT (0 <= yystate && yystate < YYNSTATES);
+ *yyssp = (yytype_int16) yystate;
if (yyss + yystacksize - 1 <= yyssp)
+#if !defined yyoverflow && !defined YYSTACK_RELOCATE
+ goto yyexhaustedlab;
+#else
{
/* Get the current used size of the three stacks, in elements. */
- YYSIZE_T yysize = yyssp - yyss + 1;
+ YYSIZE_T yysize = (YYSIZE_T) (yyssp - yyss + 1);
-#ifdef yyoverflow
+# if defined yyoverflow
{
- /* Give user a chance to reallocate the stack. Use copies of
- these so that the &'s don't force the real ones into
- memory. */
- YYSTYPE *yyvs1 = yyvs;
- yytype_int16 *yyss1 = yyss;
-
-
- /* Each stack pointer address is followed by the size of the
- data in use in that stack, in bytes. This used to be a
- conditional around just the two extra args, but that might
- be undefined if yyoverflow is a macro. */
- yyoverflow (YY_("memory exhausted"),
- &yyss1, yysize * sizeof (*yyssp),
- &yyvs1, yysize * sizeof (*yyvsp),
-
- &yystacksize);
-
- yyss = yyss1;
- yyvs = yyvs1;
+ /* Give user a chance to reallocate the stack. Use copies of
+ these so that the &'s don't force the real ones into
+ memory. */
+ YYSTYPE *yyvs1 = yyvs;
+ yytype_int16 *yyss1 = yyss;
+
+ /* Each stack pointer address is followed by the size of the
+ data in use in that stack, in bytes. This used to be a
+ conditional around just the two extra args, but that might
+ be undefined if yyoverflow is a macro. */
+ yyoverflow (YY_("memory exhausted"),
+ &yyss1, yysize * sizeof (*yyssp),
+ &yyvs1, yysize * sizeof (*yyvsp),
+ &yystacksize);
+ yyss = yyss1;
+ yyvs = yyvs1;
}
-#else /* no yyoverflow */
-# ifndef YYSTACK_RELOCATE
- goto yyexhaustedlab;
-# else
+# else /* defined YYSTACK_RELOCATE */
/* Extend the stack our own way. */
if (YYMAXDEPTH <= yystacksize)
- goto yyexhaustedlab;
+ goto yyexhaustedlab;
yystacksize *= 2;
if (YYMAXDEPTH < yystacksize)
- yystacksize = YYMAXDEPTH;
+ yystacksize = YYMAXDEPTH;
{
- yytype_int16 *yyss1 = yyss;
- union yyalloc *yyptr =
- (union yyalloc*) YYSTACK_ALLOC (YYSTACK_BYTES (yystacksize));
- if (! yyptr)
- goto yyexhaustedlab;
- YYSTACK_RELOCATE (yyss);
- YYSTACK_RELOCATE (yyvs);
-
-# undef YYSTACK_RELOCATE
- if (yyss1 != yyssa)
- YYSTACK_FREE (yyss1);
+ yytype_int16 *yyss1 = yyss;
+ union yyalloc *yyptr =
+ (union yyalloc *) YYSTACK_ALLOC (YYSTACK_BYTES (yystacksize));
+ if (! yyptr)
+ goto yyexhaustedlab;
+ YYSTACK_RELOCATE (yyss_alloc, yyss);
+ YYSTACK_RELOCATE (yyvs_alloc, yyvs);
+# undef YYSTACK_RELOCATE
+ if (yyss1 != yyssa)
+ YYSTACK_FREE (yyss1);
}
# endif
-#endif /* no yyoverflow */
yyssp = yyss + yysize - 1;
yyvsp = yyvs + yysize - 1;
-
YYDPRINTF ((stderr, "Stack size increased to %lu\n",
- (unsigned long int) yystacksize));
+ (unsigned long) yystacksize));
if (yyss + yystacksize - 1 <= yyssp)
- YYABORT;
+ YYABORT;
}
+#endif /* !defined yyoverflow && !defined YYSTACK_RELOCATE */
- YYDPRINTF ((stderr, "Entering state %d\n", yystate));
+ if (yystate == YYFINAL)
+ YYACCEPT;
goto yybackup;
+
/*-----------.
| yybackup. |
`-----------*/
yybackup:
-
/* Do appropriate processing given the current state. Read a
- look-ahead token if we need one and don't already have one. */
+ lookahead token if we need one and don't already have one. */
- /* First try to decide what to do without reference to look-ahead token. */
+ /* First try to decide what to do without reference to lookahead token. */
yyn = yypact[yystate];
- if (yyn == YYPACT_NINF)
+ if (yypact_value_is_default (yyn))
goto yydefault;
- /* Not known => get a look-ahead token if don't already have one. */
+ /* Not known => get a lookahead token if don't already have one. */
- /* YYCHAR is either YYEMPTY or YYEOF or a valid look-ahead symbol. */
+ /* YYCHAR is either YYEMPTY or YYEOF or a valid lookahead symbol. */
if (yychar == YYEMPTY)
{
YYDPRINTF ((stderr, "Reading a token: "));
- yychar = YYLEX;
+ yychar = yylex ();
}
if (yychar <= YYEOF)
@@ -1927,30 +1554,27 @@ yybackup:
yyn = yytable[yyn];
if (yyn <= 0)
{
- if (yyn == 0 || yyn == YYTABLE_NINF)
- goto yyerrlab;
+ if (yytable_value_is_error (yyn))
+ goto yyerrlab;
yyn = -yyn;
goto yyreduce;
}
- if (yyn == YYFINAL)
- YYACCEPT;
-
/* Count tokens shifted since error; after three, turn off error
status. */
if (yyerrstatus)
yyerrstatus--;
- /* Shift the look-ahead token. */
+ /* Shift the lookahead token. */
YY_SYMBOL_PRINT ("Shifting", yytoken, &yylval, &yylloc);
- /* Discard the shifted token unless it is eof. */
- if (yychar != YYEOF)
- yychar = YYEMPTY;
+ /* Discard the shifted token. */
+ yychar = YYEMPTY;
yystate = yyn;
+ YY_IGNORE_MAYBE_UNINITIALIZED_BEGIN
*++yyvsp = yylval;
-
+ YY_IGNORE_MAYBE_UNINITIALIZED_END
goto yynewstate;
@@ -1965,14 +1589,14 @@ yydefault:
/*-----------------------------.
-| yyreduce -- Do a reduction. |
+| yyreduce -- do a reduction. |
`-----------------------------*/
yyreduce:
/* yyn is the number of a rule to reduce with. */
yylen = yyr2[yyn];
/* If YYLEN is nonzero, implement the default value of the action:
- `$$ = $1'.
+ '$$ = $1'.
Otherwise, the following line sets YYVAL to garbage.
This behavior is undocumented and Bison
@@ -1985,839 +1609,792 @@ yyreduce:
YY_REDUCE_PRINT (yyn);
switch (yyn)
{
- case 25:
-#line 190 "pars0grm.y"
- { (yyval) = que_node_list_add_last(NULL, (yyvsp[(1) - (1)])); ;}
+ case 23:
+#line 166 "pars0grm.y"
+ { yyval = que_node_list_add_last(NULL, yyvsp[0]); }
+#line 1616 "pars0grm.cc"
+ break;
+
+ case 24:
+#line 168 "pars0grm.y"
+ { yyval = que_node_list_add_last(yyvsp[-1], yyvsp[0]); }
+#line 1622 "pars0grm.cc"
+ break;
+
+ case 25:
+#line 172 "pars0grm.y"
+ { yyval = yyvsp[0];}
+#line 1628 "pars0grm.cc"
break;
case 26:
-#line 192 "pars0grm.y"
- { (yyval) = que_node_list_add_last((yyvsp[(1) - (2)]), (yyvsp[(2) - (2)])); ;}
+#line 174 "pars0grm.y"
+ { yyval = pars_func(yyvsp[-3], yyvsp[-1]); }
+#line 1634 "pars0grm.cc"
break;
case 27:
-#line 196 "pars0grm.y"
- { (yyval) = (yyvsp[(1) - (1)]);;}
+#line 175 "pars0grm.y"
+ { yyval = yyvsp[0];}
+#line 1640 "pars0grm.cc"
break;
case 28:
-#line 198 "pars0grm.y"
- { (yyval) = pars_func((yyvsp[(1) - (4)]), (yyvsp[(3) - (4)])); ;}
+#line 176 "pars0grm.y"
+ { yyval = yyvsp[0];}
+#line 1646 "pars0grm.cc"
break;
case 29:
-#line 199 "pars0grm.y"
- { (yyval) = (yyvsp[(1) - (1)]);;}
+#line 177 "pars0grm.y"
+ { yyval = yyvsp[0];}
+#line 1652 "pars0grm.cc"
break;
case 30:
-#line 200 "pars0grm.y"
- { (yyval) = (yyvsp[(1) - (1)]);;}
+#line 178 "pars0grm.y"
+ { yyval = yyvsp[0];}
+#line 1658 "pars0grm.cc"
break;
case 31:
-#line 201 "pars0grm.y"
- { (yyval) = (yyvsp[(1) - (1)]);;}
+#line 179 "pars0grm.y"
+ { yyval = yyvsp[0];}
+#line 1664 "pars0grm.cc"
break;
case 32:
-#line 202 "pars0grm.y"
- { (yyval) = (yyvsp[(1) - (1)]);;}
+#line 180 "pars0grm.y"
+ { yyval = pars_op('+', yyvsp[-2], yyvsp[0]); }
+#line 1670 "pars0grm.cc"
break;
case 33:
-#line 203 "pars0grm.y"
- { (yyval) = (yyvsp[(1) - (1)]);;}
+#line 181 "pars0grm.y"
+ { yyval = pars_op('-', yyvsp[-2], yyvsp[0]); }
+#line 1676 "pars0grm.cc"
break;
case 34:
-#line 204 "pars0grm.y"
- { (yyval) = (yyvsp[(1) - (1)]);;}
+#line 182 "pars0grm.y"
+ { yyval = pars_op('*', yyvsp[-2], yyvsp[0]); }
+#line 1682 "pars0grm.cc"
break;
case 35:
-#line 205 "pars0grm.y"
- { (yyval) = (yyvsp[(1) - (1)]);;}
+#line 183 "pars0grm.y"
+ { yyval = pars_op('/', yyvsp[-2], yyvsp[0]); }
+#line 1688 "pars0grm.cc"
break;
case 36:
-#line 206 "pars0grm.y"
- { (yyval) = pars_op('+', (yyvsp[(1) - (3)]), (yyvsp[(3) - (3)])); ;}
+#line 184 "pars0grm.y"
+ { yyval = pars_op('-', yyvsp[0], NULL); }
+#line 1694 "pars0grm.cc"
break;
case 37:
-#line 207 "pars0grm.y"
- { (yyval) = pars_op('-', (yyvsp[(1) - (3)]), (yyvsp[(3) - (3)])); ;}
+#line 185 "pars0grm.y"
+ { yyval = yyvsp[-1]; }
+#line 1700 "pars0grm.cc"
break;
case 38:
-#line 208 "pars0grm.y"
- { (yyval) = pars_op('*', (yyvsp[(1) - (3)]), (yyvsp[(3) - (3)])); ;}
+#line 186 "pars0grm.y"
+ { yyval = pars_op('=', yyvsp[-2], yyvsp[0]); }
+#line 1706 "pars0grm.cc"
break;
case 39:
-#line 209 "pars0grm.y"
- { (yyval) = pars_op('/', (yyvsp[(1) - (3)]), (yyvsp[(3) - (3)])); ;}
+#line 188 "pars0grm.y"
+ { yyval = pars_op(PARS_LIKE_TOKEN, yyvsp[-2], yyvsp[0]); }
+#line 1712 "pars0grm.cc"
break;
case 40:
-#line 210 "pars0grm.y"
- { (yyval) = pars_op('-', (yyvsp[(2) - (2)]), NULL); ;}
+#line 189 "pars0grm.y"
+ { yyval = pars_op('<', yyvsp[-2], yyvsp[0]); }
+#line 1718 "pars0grm.cc"
break;
case 41:
-#line 211 "pars0grm.y"
- { (yyval) = (yyvsp[(2) - (3)]); ;}
+#line 190 "pars0grm.y"
+ { yyval = pars_op('>', yyvsp[-2], yyvsp[0]); }
+#line 1724 "pars0grm.cc"
break;
case 42:
-#line 212 "pars0grm.y"
- { (yyval) = pars_op('=', (yyvsp[(1) - (3)]), (yyvsp[(3) - (3)])); ;}
+#line 191 "pars0grm.y"
+ { yyval = pars_op(PARS_GE_TOKEN, yyvsp[-2], yyvsp[0]); }
+#line 1730 "pars0grm.cc"
break;
case 43:
-#line 214 "pars0grm.y"
- { (yyval) = pars_op(PARS_LIKE_TOKEN, (yyvsp[(1) - (3)]), (yyvsp[(3) - (3)])); ;}
+#line 192 "pars0grm.y"
+ { yyval = pars_op(PARS_LE_TOKEN, yyvsp[-2], yyvsp[0]); }
+#line 1736 "pars0grm.cc"
break;
case 44:
-#line 215 "pars0grm.y"
- { (yyval) = pars_op('<', (yyvsp[(1) - (3)]), (yyvsp[(3) - (3)])); ;}
+#line 193 "pars0grm.y"
+ { yyval = pars_op(PARS_NE_TOKEN, yyvsp[-2], yyvsp[0]); }
+#line 1742 "pars0grm.cc"
break;
case 45:
-#line 216 "pars0grm.y"
- { (yyval) = pars_op('>', (yyvsp[(1) - (3)]), (yyvsp[(3) - (3)])); ;}
+#line 194 "pars0grm.y"
+ { yyval = pars_op(PARS_AND_TOKEN, yyvsp[-2], yyvsp[0]); }
+#line 1748 "pars0grm.cc"
break;
case 46:
-#line 217 "pars0grm.y"
- { (yyval) = pars_op(PARS_GE_TOKEN, (yyvsp[(1) - (3)]), (yyvsp[(3) - (3)])); ;}
+#line 195 "pars0grm.y"
+ { yyval = pars_op(PARS_OR_TOKEN, yyvsp[-2], yyvsp[0]); }
+#line 1754 "pars0grm.cc"
break;
case 47:
-#line 218 "pars0grm.y"
- { (yyval) = pars_op(PARS_LE_TOKEN, (yyvsp[(1) - (3)]), (yyvsp[(3) - (3)])); ;}
+#line 196 "pars0grm.y"
+ { yyval = pars_op(PARS_NOT_TOKEN, yyvsp[0], NULL); }
+#line 1760 "pars0grm.cc"
break;
case 48:
-#line 219 "pars0grm.y"
- { (yyval) = pars_op(PARS_NE_TOKEN, (yyvsp[(1) - (3)]), (yyvsp[(3) - (3)])); ;}
+#line 198 "pars0grm.y"
+ { yyval = pars_op(PARS_NOTFOUND_TOKEN, yyvsp[-2], NULL); }
+#line 1766 "pars0grm.cc"
break;
case 49:
-#line 220 "pars0grm.y"
- { (yyval) = pars_op(PARS_AND_TOKEN, (yyvsp[(1) - (3)]), (yyvsp[(3) - (3)])); ;}
+#line 200 "pars0grm.y"
+ { yyval = pars_op(PARS_NOTFOUND_TOKEN, yyvsp[-2], NULL); }
+#line 1772 "pars0grm.cc"
break;
case 50:
-#line 221 "pars0grm.y"
- { (yyval) = pars_op(PARS_OR_TOKEN, (yyvsp[(1) - (3)]), (yyvsp[(3) - (3)])); ;}
+#line 204 "pars0grm.y"
+ { yyval = &pars_to_binary_token; }
+#line 1778 "pars0grm.cc"
break;
case 51:
-#line 222 "pars0grm.y"
- { (yyval) = pars_op(PARS_NOT_TOKEN, (yyvsp[(2) - (2)]), NULL); ;}
+#line 205 "pars0grm.y"
+ { yyval = &pars_substr_token; }
+#line 1784 "pars0grm.cc"
break;
case 52:
-#line 224 "pars0grm.y"
- { (yyval) = pars_op(PARS_NOTFOUND_TOKEN, (yyvsp[(1) - (3)]), NULL); ;}
+#line 206 "pars0grm.y"
+ { yyval = &pars_concat_token; }
+#line 1790 "pars0grm.cc"
break;
case 53:
-#line 226 "pars0grm.y"
- { (yyval) = pars_op(PARS_NOTFOUND_TOKEN, (yyvsp[(1) - (3)]), NULL); ;}
+#line 207 "pars0grm.y"
+ { yyval = &pars_instr_token; }
+#line 1796 "pars0grm.cc"
break;
case 54:
-#line 230 "pars0grm.y"
- { (yyval) = &pars_to_char_token; ;}
- break;
-
- case 55:
-#line 231 "pars0grm.y"
- { (yyval) = &pars_to_number_token; ;}
- break;
-
- case 56:
-#line 232 "pars0grm.y"
- { (yyval) = &pars_to_binary_token; ;}
- break;
-
- case 57:
-#line 234 "pars0grm.y"
- { (yyval) = &pars_binary_to_number_token; ;}
+#line 208 "pars0grm.y"
+ { yyval = &pars_length_token; }
+#line 1802 "pars0grm.cc"
break;
case 58:
-#line 235 "pars0grm.y"
- { (yyval) = &pars_substr_token; ;}
+#line 219 "pars0grm.y"
+ { yyval = pars_stored_procedure_call(
+ static_cast<sym_node_t*>(yyvsp[-4])); }
+#line 1809 "pars0grm.cc"
break;
case 59:
-#line 236 "pars0grm.y"
- { (yyval) = &pars_concat_token; ;}
+#line 224 "pars0grm.y"
+ { yyval = yyvsp[-2]; }
+#line 1815 "pars0grm.cc"
break;
case 60:
-#line 237 "pars0grm.y"
- { (yyval) = &pars_instr_token; ;}
+#line 228 "pars0grm.y"
+ { yyval = que_node_list_add_last(NULL, yyvsp[0]); }
+#line 1821 "pars0grm.cc"
break;
case 61:
-#line 238 "pars0grm.y"
- { (yyval) = &pars_length_token; ;}
+#line 230 "pars0grm.y"
+ { yyval = que_node_list_add_last(yyvsp[-2], yyvsp[0]); }
+#line 1827 "pars0grm.cc"
break;
case 62:
-#line 239 "pars0grm.y"
- { (yyval) = &pars_sysdate_token; ;}
+#line 234 "pars0grm.y"
+ { yyval = NULL; }
+#line 1833 "pars0grm.cc"
break;
case 63:
-#line 240 "pars0grm.y"
- { (yyval) = &pars_rnd_token; ;}
+#line 235 "pars0grm.y"
+ { yyval = que_node_list_add_last(NULL, yyvsp[0]); }
+#line 1839 "pars0grm.cc"
break;
case 64:
+#line 237 "pars0grm.y"
+ { yyval = que_node_list_add_last(yyvsp[-2], yyvsp[0]); }
+#line 1845 "pars0grm.cc"
+ break;
+
+ case 65:
#line 241 "pars0grm.y"
- { (yyval) = &pars_rnd_str_token; ;}
+ { yyval = NULL; }
+#line 1851 "pars0grm.cc"
+ break;
+
+ case 66:
+#line 242 "pars0grm.y"
+ { yyval = que_node_list_add_last(NULL, yyvsp[0]);}
+#line 1857 "pars0grm.cc"
+ break;
+
+ case 67:
+#line 243 "pars0grm.y"
+ { yyval = que_node_list_add_last(yyvsp[-2], yyvsp[0]); }
+#line 1863 "pars0grm.cc"
break;
case 68:
-#line 252 "pars0grm.y"
- { (yyval) = pars_stored_procedure_call(
- static_cast<sym_node_t*>((yyvsp[(2) - (6)]))); ;}
+#line 247 "pars0grm.y"
+ { yyval = yyvsp[0]; }
+#line 1869 "pars0grm.cc"
break;
case 69:
-#line 258 "pars0grm.y"
- { (yyval) = pars_procedure_call((yyvsp[(1) - (4)]), (yyvsp[(3) - (4)])); ;}
+#line 249 "pars0grm.y"
+ { yyval = pars_func(&pars_count_token,
+ que_node_list_add_last(NULL,
+ sym_tab_add_int_lit(
+ pars_sym_tab_global, 1))); }
+#line 1878 "pars0grm.cc"
break;
case 70:
-#line 262 "pars0grm.y"
- { (yyval) = &pars_replstr_token; ;}
+#line 256 "pars0grm.y"
+ { yyval = NULL; }
+#line 1884 "pars0grm.cc"
break;
case 71:
-#line 263 "pars0grm.y"
- { (yyval) = &pars_printf_token; ;}
+#line 257 "pars0grm.y"
+ { yyval = que_node_list_add_last(NULL, yyvsp[0]); }
+#line 1890 "pars0grm.cc"
break;
case 72:
-#line 264 "pars0grm.y"
- { (yyval) = &pars_assert_token; ;}
+#line 259 "pars0grm.y"
+ { yyval = que_node_list_add_last(yyvsp[-2], yyvsp[0]); }
+#line 1896 "pars0grm.cc"
break;
case 73:
-#line 268 "pars0grm.y"
- { (yyval) = (yyvsp[(1) - (3)]); ;}
+#line 263 "pars0grm.y"
+ { yyval = pars_select_list(&pars_star_denoter,
+ NULL); }
+#line 1903 "pars0grm.cc"
break;
case 74:
-#line 272 "pars0grm.y"
- { (yyval) = que_node_list_add_last(NULL, (yyvsp[(1) - (1)])); ;}
+#line 266 "pars0grm.y"
+ { yyval = pars_select_list(
+ yyvsp[-2], static_cast<sym_node_t*>(yyvsp[0])); }
+#line 1910 "pars0grm.cc"
break;
case 75:
-#line 274 "pars0grm.y"
- { (yyval) = que_node_list_add_last((yyvsp[(1) - (3)]), (yyvsp[(3) - (3)])); ;}
+#line 268 "pars0grm.y"
+ { yyval = pars_select_list(yyvsp[0], NULL); }
+#line 1916 "pars0grm.cc"
break;
case 76:
-#line 278 "pars0grm.y"
- { (yyval) = NULL; ;}
+#line 272 "pars0grm.y"
+ { yyval = NULL; }
+#line 1922 "pars0grm.cc"
break;
case 77:
-#line 279 "pars0grm.y"
- { (yyval) = que_node_list_add_last(NULL, (yyvsp[(1) - (1)])); ;}
+#line 273 "pars0grm.y"
+ { yyval = yyvsp[0]; }
+#line 1928 "pars0grm.cc"
break;
case 78:
-#line 281 "pars0grm.y"
- { (yyval) = que_node_list_add_last((yyvsp[(1) - (3)]), (yyvsp[(3) - (3)])); ;}
+#line 277 "pars0grm.y"
+ { yyval = NULL; }
+#line 1934 "pars0grm.cc"
break;
case 79:
-#line 285 "pars0grm.y"
- { (yyval) = NULL; ;}
+#line 279 "pars0grm.y"
+ { yyval = &pars_update_token; }
+#line 1940 "pars0grm.cc"
break;
case 80:
-#line 286 "pars0grm.y"
- { (yyval) = que_node_list_add_last(NULL, (yyvsp[(1) - (1)]));;}
+#line 283 "pars0grm.y"
+ { yyval = NULL; }
+#line 1946 "pars0grm.cc"
break;
case 81:
-#line 287 "pars0grm.y"
- { (yyval) = que_node_list_add_last((yyvsp[(1) - (3)]), (yyvsp[(3) - (3)])); ;}
+#line 285 "pars0grm.y"
+ { yyval = &pars_share_token; }
+#line 1952 "pars0grm.cc"
break;
case 82:
-#line 291 "pars0grm.y"
- { (yyval) = (yyvsp[(1) - (1)]); ;}
+#line 289 "pars0grm.y"
+ { yyval = &pars_asc_token; }
+#line 1958 "pars0grm.cc"
break;
case 83:
-#line 293 "pars0grm.y"
- { (yyval) = pars_func(&pars_count_token,
- que_node_list_add_last(NULL,
- sym_tab_add_int_lit(
- pars_sym_tab_global, 1))); ;}
+#line 290 "pars0grm.y"
+ { yyval = &pars_asc_token; }
+#line 1964 "pars0grm.cc"
break;
case 84:
-#line 298 "pars0grm.y"
- { (yyval) = pars_func(&pars_count_token,
- que_node_list_add_last(NULL,
- pars_func(&pars_distinct_token,
- que_node_list_add_last(
- NULL, (yyvsp[(4) - (5)]))))); ;}
+#line 291 "pars0grm.y"
+ { yyval = &pars_desc_token; }
+#line 1970 "pars0grm.cc"
break;
case 85:
-#line 304 "pars0grm.y"
- { (yyval) = pars_func(&pars_sum_token,
- que_node_list_add_last(NULL,
- (yyvsp[(3) - (4)]))); ;}
+#line 295 "pars0grm.y"
+ { yyval = NULL; }
+#line 1976 "pars0grm.cc"
break;
case 86:
-#line 310 "pars0grm.y"
- { (yyval) = NULL; ;}
+#line 297 "pars0grm.y"
+ { yyval = pars_order_by(
+ static_cast<sym_node_t*>(yyvsp[-1]),
+ static_cast<pars_res_word_t*>(yyvsp[0])); }
+#line 1984 "pars0grm.cc"
break;
case 87:
-#line 311 "pars0grm.y"
- { (yyval) = que_node_list_add_last(NULL, (yyvsp[(1) - (1)])); ;}
+#line 308 "pars0grm.y"
+ { yyval = pars_select_statement(
+ static_cast<sel_node_t*>(yyvsp[-6]),
+ static_cast<sym_node_t*>(yyvsp[-4]),
+ static_cast<que_node_t*>(yyvsp[-3]),
+ static_cast<pars_res_word_t*>(yyvsp[-2]),
+ static_cast<pars_res_word_t*>(yyvsp[-1]),
+ static_cast<order_node_t*>(yyvsp[0])); }
+#line 1996 "pars0grm.cc"
break;
case 88:
-#line 313 "pars0grm.y"
- { (yyval) = que_node_list_add_last((yyvsp[(1) - (3)]), (yyvsp[(3) - (3)])); ;}
+#line 319 "pars0grm.y"
+ { yyval = yyvsp[0]; }
+#line 2002 "pars0grm.cc"
break;
case 89:
-#line 317 "pars0grm.y"
- { (yyval) = pars_select_list(&pars_star_denoter,
- NULL); ;}
+#line 324 "pars0grm.y"
+ { yyval = pars_insert_statement(
+ static_cast<sym_node_t*>(yyvsp[-4]), yyvsp[-1], NULL); }
+#line 2009 "pars0grm.cc"
break;
case 90:
-#line 320 "pars0grm.y"
- { (yyval) = pars_select_list(
- (yyvsp[(1) - (3)]), static_cast<sym_node_t*>((yyvsp[(3) - (3)]))); ;}
+#line 327 "pars0grm.y"
+ { yyval = pars_insert_statement(
+ static_cast<sym_node_t*>(yyvsp[-1]),
+ NULL,
+ static_cast<sel_node_t*>(yyvsp[0])); }
+#line 2018 "pars0grm.cc"
break;
case 91:
-#line 322 "pars0grm.y"
- { (yyval) = pars_select_list((yyvsp[(1) - (1)]), NULL); ;}
+#line 334 "pars0grm.y"
+ { yyval = pars_column_assignment(
+ static_cast<sym_node_t*>(yyvsp[-2]),
+ static_cast<que_node_t*>(yyvsp[0])); }
+#line 2026 "pars0grm.cc"
break;
case 92:
-#line 326 "pars0grm.y"
- { (yyval) = NULL; ;}
+#line 340 "pars0grm.y"
+ { yyval = que_node_list_add_last(NULL, yyvsp[0]); }
+#line 2032 "pars0grm.cc"
break;
case 93:
-#line 327 "pars0grm.y"
- { (yyval) = (yyvsp[(2) - (2)]); ;}
+#line 342 "pars0grm.y"
+ { yyval = que_node_list_add_last(yyvsp[-2], yyvsp[0]); }
+#line 2038 "pars0grm.cc"
break;
case 94:
-#line 331 "pars0grm.y"
- { (yyval) = NULL; ;}
+#line 348 "pars0grm.y"
+ { yyval = yyvsp[0]; }
+#line 2044 "pars0grm.cc"
break;
case 95:
-#line 333 "pars0grm.y"
- { (yyval) = &pars_update_token; ;}
+#line 354 "pars0grm.y"
+ { yyval = pars_update_statement_start(
+ FALSE,
+ static_cast<sym_node_t*>(yyvsp[-2]),
+ static_cast<col_assign_node_t*>(yyvsp[0])); }
+#line 2053 "pars0grm.cc"
break;
case 96:
-#line 337 "pars0grm.y"
- { (yyval) = NULL; ;}
+#line 362 "pars0grm.y"
+ { yyval = pars_update_statement(
+ static_cast<upd_node_t*>(yyvsp[-1]),
+ NULL,
+ static_cast<que_node_t*>(yyvsp[0])); }
+#line 2062 "pars0grm.cc"
break;
case 97:
-#line 339 "pars0grm.y"
- { (yyval) = &pars_share_token; ;}
+#line 370 "pars0grm.y"
+ { yyval = pars_update_statement(
+ static_cast<upd_node_t*>(yyvsp[-1]),
+ static_cast<sym_node_t*>(yyvsp[0]),
+ NULL); }
+#line 2071 "pars0grm.cc"
break;
case 98:
-#line 343 "pars0grm.y"
- { (yyval) = &pars_asc_token; ;}
+#line 378 "pars0grm.y"
+ { yyval = pars_update_statement_start(
+ TRUE,
+ static_cast<sym_node_t*>(yyvsp[0]), NULL); }
+#line 2079 "pars0grm.cc"
break;
case 99:
-#line 344 "pars0grm.y"
- { (yyval) = &pars_asc_token; ;}
+#line 385 "pars0grm.y"
+ { yyval = pars_update_statement(
+ static_cast<upd_node_t*>(yyvsp[-1]),
+ NULL,
+ static_cast<que_node_t*>(yyvsp[0])); }
+#line 2088 "pars0grm.cc"
break;
case 100:
-#line 345 "pars0grm.y"
- { (yyval) = &pars_desc_token; ;}
+#line 393 "pars0grm.y"
+ { yyval = pars_update_statement(
+ static_cast<upd_node_t*>(yyvsp[-1]),
+ static_cast<sym_node_t*>(yyvsp[0]),
+ NULL); }
+#line 2097 "pars0grm.cc"
break;
case 101:
-#line 349 "pars0grm.y"
- { (yyval) = NULL; ;}
+#line 401 "pars0grm.y"
+ { yyval = pars_assignment_statement(
+ static_cast<sym_node_t*>(yyvsp[-2]),
+ static_cast<que_node_t*>(yyvsp[0])); }
+#line 2105 "pars0grm.cc"
break;
case 102:
-#line 351 "pars0grm.y"
- { (yyval) = pars_order_by(
- static_cast<sym_node_t*>((yyvsp[(3) - (4)])),
- static_cast<pars_res_word_t*>((yyvsp[(4) - (4)]))); ;}
+#line 409 "pars0grm.y"
+ { yyval = pars_elsif_element(yyvsp[-2], yyvsp[0]); }
+#line 2111 "pars0grm.cc"
break;
case 103:
-#line 362 "pars0grm.y"
- { (yyval) = pars_select_statement(
- static_cast<sel_node_t*>((yyvsp[(2) - (8)])),
- static_cast<sym_node_t*>((yyvsp[(4) - (8)])),
- static_cast<que_node_t*>((yyvsp[(5) - (8)])),
- static_cast<pars_res_word_t*>((yyvsp[(6) - (8)])),
- static_cast<pars_res_word_t*>((yyvsp[(7) - (8)])),
- static_cast<order_node_t*>((yyvsp[(8) - (8)]))); ;}
+#line 413 "pars0grm.y"
+ { yyval = que_node_list_add_last(NULL, yyvsp[0]); }
+#line 2117 "pars0grm.cc"
break;
case 104:
-#line 373 "pars0grm.y"
- { (yyval) = (yyvsp[(3) - (3)]); ;}
+#line 415 "pars0grm.y"
+ { yyval = que_node_list_add_last(yyvsp[-1], yyvsp[0]); }
+#line 2123 "pars0grm.cc"
break;
case 105:
-#line 378 "pars0grm.y"
- { (yyval) = pars_insert_statement(
- static_cast<sym_node_t*>((yyvsp[(1) - (5)])), (yyvsp[(4) - (5)]), NULL); ;}
+#line 419 "pars0grm.y"
+ { yyval = NULL; }
+#line 2129 "pars0grm.cc"
break;
case 106:
-#line 381 "pars0grm.y"
- { (yyval) = pars_insert_statement(
- static_cast<sym_node_t*>((yyvsp[(1) - (2)])),
- NULL,
- static_cast<sel_node_t*>((yyvsp[(2) - (2)]))); ;}
+#line 421 "pars0grm.y"
+ { yyval = yyvsp[0]; }
+#line 2135 "pars0grm.cc"
break;
case 107:
-#line 388 "pars0grm.y"
- { (yyval) = pars_column_assignment(
- static_cast<sym_node_t*>((yyvsp[(1) - (3)])),
- static_cast<que_node_t*>((yyvsp[(3) - (3)]))); ;}
+#line 422 "pars0grm.y"
+ { yyval = yyvsp[0]; }
+#line 2141 "pars0grm.cc"
break;
case 108:
-#line 394 "pars0grm.y"
- { (yyval) = que_node_list_add_last(NULL, (yyvsp[(1) - (1)])); ;}
+#line 429 "pars0grm.y"
+ { yyval = pars_if_statement(yyvsp[-5], yyvsp[-3], yyvsp[-2]); }
+#line 2147 "pars0grm.cc"
break;
case 109:
-#line 396 "pars0grm.y"
- { (yyval) = que_node_list_add_last((yyvsp[(1) - (3)]), (yyvsp[(3) - (3)])); ;}
+#line 435 "pars0grm.y"
+ { yyval = pars_while_statement(yyvsp[-4], yyvsp[-2]); }
+#line 2153 "pars0grm.cc"
break;
case 110:
-#line 402 "pars0grm.y"
- { (yyval) = (yyvsp[(4) - (4)]); ;}
+#line 443 "pars0grm.y"
+ { yyval = pars_for_statement(
+ static_cast<sym_node_t*>(yyvsp[-8]),
+ yyvsp[-6], yyvsp[-4], yyvsp[-2]); }
+#line 2161 "pars0grm.cc"
break;
case 111:
-#line 408 "pars0grm.y"
- { (yyval) = pars_update_statement_start(
- FALSE,
- static_cast<sym_node_t*>((yyvsp[(2) - (4)])),
- static_cast<col_assign_node_t*>((yyvsp[(4) - (4)]))); ;}
+#line 449 "pars0grm.y"
+ { yyval = pars_exit_statement(); }
+#line 2167 "pars0grm.cc"
break;
case 112:
-#line 416 "pars0grm.y"
- { (yyval) = pars_update_statement(
- static_cast<upd_node_t*>((yyvsp[(1) - (2)])),
- NULL,
- static_cast<que_node_t*>((yyvsp[(2) - (2)]))); ;}
+#line 453 "pars0grm.y"
+ { yyval = pars_return_statement(); }
+#line 2173 "pars0grm.cc"
break;
case 113:
-#line 424 "pars0grm.y"
- { (yyval) = pars_update_statement(
- static_cast<upd_node_t*>((yyvsp[(1) - (2)])),
- static_cast<sym_node_t*>((yyvsp[(2) - (2)])),
- NULL); ;}
+#line 458 "pars0grm.y"
+ { yyval = pars_open_statement(
+ ROW_SEL_OPEN_CURSOR,
+ static_cast<sym_node_t*>(yyvsp[0])); }
+#line 2181 "pars0grm.cc"
break;
case 114:
-#line 432 "pars0grm.y"
- { (yyval) = pars_update_statement_start(
- TRUE,
- static_cast<sym_node_t*>((yyvsp[(3) - (3)])), NULL); ;}
+#line 465 "pars0grm.y"
+ { yyval = pars_open_statement(
+ ROW_SEL_CLOSE_CURSOR,
+ static_cast<sym_node_t*>(yyvsp[0])); }
+#line 2189 "pars0grm.cc"
break;
case 115:
-#line 439 "pars0grm.y"
- { (yyval) = pars_update_statement(
- static_cast<upd_node_t*>((yyvsp[(1) - (2)])),
- NULL,
- static_cast<que_node_t*>((yyvsp[(2) - (2)]))); ;}
+#line 472 "pars0grm.y"
+ { yyval = pars_fetch_statement(
+ static_cast<sym_node_t*>(yyvsp[-2]),
+ static_cast<sym_node_t*>(yyvsp[0]), NULL); }
+#line 2197 "pars0grm.cc"
break;
case 116:
-#line 447 "pars0grm.y"
- { (yyval) = pars_update_statement(
- static_cast<upd_node_t*>((yyvsp[(1) - (2)])),
- static_cast<sym_node_t*>((yyvsp[(2) - (2)])),
- NULL); ;}
+#line 476 "pars0grm.y"
+ { yyval = pars_fetch_statement(
+ static_cast<sym_node_t*>(yyvsp[-2]),
+ NULL,
+ static_cast<sym_node_t*>(yyvsp[0])); }
+#line 2206 "pars0grm.cc"
break;
case 117:
-#line 455 "pars0grm.y"
- { (yyval) = pars_row_printf_statement(
- static_cast<sel_node_t*>((yyvsp[(2) - (2)]))); ;}
+#line 484 "pars0grm.y"
+ { yyval = pars_column_def(
+ static_cast<sym_node_t*>(yyvsp[-3]),
+ static_cast<pars_res_word_t*>(yyvsp[-2]),
+ static_cast<sym_node_t*>(yyvsp[-1]),
+ yyvsp[0]); }
+#line 2216 "pars0grm.cc"
break;
case 118:
-#line 461 "pars0grm.y"
- { (yyval) = pars_assignment_statement(
- static_cast<sym_node_t*>((yyvsp[(1) - (3)])),
- static_cast<que_node_t*>((yyvsp[(3) - (3)]))); ;}
+#line 492 "pars0grm.y"
+ { yyval = que_node_list_add_last(NULL, yyvsp[0]); }
+#line 2222 "pars0grm.cc"
break;
case 119:
-#line 469 "pars0grm.y"
- { (yyval) = pars_elsif_element((yyvsp[(2) - (4)]), (yyvsp[(4) - (4)])); ;}
+#line 494 "pars0grm.y"
+ { yyval = que_node_list_add_last(yyvsp[-2], yyvsp[0]); }
+#line 2228 "pars0grm.cc"
break;
case 120:
-#line 473 "pars0grm.y"
- { (yyval) = que_node_list_add_last(NULL, (yyvsp[(1) - (1)])); ;}
+#line 498 "pars0grm.y"
+ { yyval = NULL; }
+#line 2234 "pars0grm.cc"
break;
case 121:
-#line 475 "pars0grm.y"
- { (yyval) = que_node_list_add_last((yyvsp[(1) - (2)]), (yyvsp[(2) - (2)])); ;}
+#line 500 "pars0grm.y"
+ { yyval = yyvsp[-1]; }
+#line 2240 "pars0grm.cc"
break;
case 122:
-#line 479 "pars0grm.y"
- { (yyval) = NULL; ;}
+#line 504 "pars0grm.y"
+ { yyval = NULL; }
+#line 2246 "pars0grm.cc"
break;
case 123:
-#line 481 "pars0grm.y"
- { (yyval) = (yyvsp[(2) - (2)]); ;}
+#line 506 "pars0grm.y"
+ { yyval = &pars_int_token;
+ /* pass any non-NULL pointer */ }
+#line 2253 "pars0grm.cc"
break;
case 124:
-#line 482 "pars0grm.y"
- { (yyval) = (yyvsp[(1) - (1)]); ;}
+#line 513 "pars0grm.y"
+ { yyval = pars_create_table(
+ static_cast<sym_node_t*>(yyvsp[-3]),
+ static_cast<sym_node_t*>(yyvsp[-1])); }
+#line 2261 "pars0grm.cc"
break;
case 125:
-#line 489 "pars0grm.y"
- { (yyval) = pars_if_statement((yyvsp[(2) - (7)]), (yyvsp[(4) - (7)]), (yyvsp[(5) - (7)])); ;}
+#line 519 "pars0grm.y"
+ { yyval = que_node_list_add_last(NULL, yyvsp[0]); }
+#line 2267 "pars0grm.cc"
break;
case 126:
-#line 495 "pars0grm.y"
- { (yyval) = pars_while_statement((yyvsp[(2) - (6)]), (yyvsp[(4) - (6)])); ;}
+#line 521 "pars0grm.y"
+ { yyval = que_node_list_add_last(yyvsp[-2], yyvsp[0]); }
+#line 2273 "pars0grm.cc"
break;
case 127:
-#line 503 "pars0grm.y"
- { (yyval) = pars_for_statement(
- static_cast<sym_node_t*>((yyvsp[(2) - (10)])),
- (yyvsp[(4) - (10)]), (yyvsp[(6) - (10)]), (yyvsp[(8) - (10)])); ;}
+#line 525 "pars0grm.y"
+ { yyval = NULL; }
+#line 2279 "pars0grm.cc"
break;
case 128:
-#line 509 "pars0grm.y"
- { (yyval) = pars_exit_statement(); ;}
+#line 526 "pars0grm.y"
+ { yyval = &pars_unique_token; }
+#line 2285 "pars0grm.cc"
break;
case 129:
-#line 513 "pars0grm.y"
- { (yyval) = pars_return_statement(); ;}
+#line 530 "pars0grm.y"
+ { yyval = NULL; }
+#line 2291 "pars0grm.cc"
break;
case 130:
-#line 518 "pars0grm.y"
- { (yyval) = pars_open_statement(
- ROW_SEL_OPEN_CURSOR,
- static_cast<sym_node_t*>((yyvsp[(2) - (2)]))); ;}
+#line 531 "pars0grm.y"
+ { yyval = &pars_clustered_token; }
+#line 2297 "pars0grm.cc"
break;
case 131:
-#line 525 "pars0grm.y"
- { (yyval) = pars_open_statement(
- ROW_SEL_CLOSE_CURSOR,
- static_cast<sym_node_t*>((yyvsp[(2) - (2)]))); ;}
+#line 540 "pars0grm.y"
+ { yyval = pars_create_index(
+ static_cast<pars_res_word_t*>(yyvsp[-8]),
+ static_cast<pars_res_word_t*>(yyvsp[-7]),
+ static_cast<sym_node_t*>(yyvsp[-5]),
+ static_cast<sym_node_t*>(yyvsp[-3]),
+ static_cast<sym_node_t*>(yyvsp[-1])); }
+#line 2308 "pars0grm.cc"
break;
case 132:
-#line 532 "pars0grm.y"
- { (yyval) = pars_fetch_statement(
- static_cast<sym_node_t*>((yyvsp[(2) - (4)])),
- static_cast<sym_node_t*>((yyvsp[(4) - (4)])), NULL); ;}
+#line 549 "pars0grm.y"
+ { yyval = yyvsp[0]; }
+#line 2314 "pars0grm.cc"
break;
case 133:
-#line 536 "pars0grm.y"
- { (yyval) = pars_fetch_statement(
- static_cast<sym_node_t*>((yyvsp[(2) - (4)])),
- NULL,
- static_cast<sym_node_t*>((yyvsp[(4) - (4)]))); ;}
+#line 550 "pars0grm.y"
+ { yyval = yyvsp[0]; }
+#line 2320 "pars0grm.cc"
break;
case 134:
-#line 544 "pars0grm.y"
- { (yyval) = pars_column_def(
- static_cast<sym_node_t*>((yyvsp[(1) - (5)])),
- static_cast<pars_res_word_t*>((yyvsp[(2) - (5)])),
- static_cast<sym_node_t*>((yyvsp[(3) - (5)])),
- (yyvsp[(4) - (5)]), (yyvsp[(5) - (5)])); ;}
+#line 555 "pars0grm.y"
+ { yyval = pars_commit_statement(); }
+#line 2326 "pars0grm.cc"
break;
case 135:
-#line 552 "pars0grm.y"
- { (yyval) = que_node_list_add_last(NULL, (yyvsp[(1) - (1)])); ;}
+#line 560 "pars0grm.y"
+ { yyval = pars_rollback_statement(); }
+#line 2332 "pars0grm.cc"
break;
case 136:
-#line 554 "pars0grm.y"
- { (yyval) = que_node_list_add_last((yyvsp[(1) - (3)]), (yyvsp[(3) - (3)])); ;}
+#line 564 "pars0grm.y"
+ { yyval = &pars_int_token; }
+#line 2338 "pars0grm.cc"
break;
case 137:
-#line 558 "pars0grm.y"
- { (yyval) = NULL; ;}
+#line 565 "pars0grm.y"
+ { yyval = &pars_bigint_token; }
+#line 2344 "pars0grm.cc"
break;
case 138:
-#line 560 "pars0grm.y"
- { (yyval) = (yyvsp[(2) - (3)]); ;}
- break;
-
- case 139:
-#line 564 "pars0grm.y"
- { (yyval) = NULL; ;}
- break;
-
- case 140:
#line 566 "pars0grm.y"
- { (yyval) = &pars_int_token;
- /* pass any non-NULL pointer */ ;}
+ { yyval = &pars_char_token; }
+#line 2350 "pars0grm.cc"
break;
- case 141:
+ case 139:
#line 571 "pars0grm.y"
- { (yyval) = NULL; ;}
- break;
-
- case 142:
-#line 573 "pars0grm.y"
- { (yyval) = &pars_int_token;
- /* pass any non-NULL pointer */ ;}
+ { yyval = pars_variable_declaration(
+ static_cast<sym_node_t*>(yyvsp[-2]),
+ static_cast<pars_res_word_t*>(yyvsp[-1])); }
+#line 2358 "pars0grm.cc"
break;
case 143:
-#line 578 "pars0grm.y"
- { (yyval) = NULL; ;}
- break;
-
- case 144:
-#line 580 "pars0grm.y"
- { (yyval) = &pars_int_token;
- /* pass any non-NULL pointer */ ;}
- break;
-
- case 145:
#line 585 "pars0grm.y"
- { (yyval) = NULL; ;}
- break;
-
- case 146:
-#line 586 "pars0grm.y"
- { (yyval) = &pars_int_token;
- /* pass any non-NULL pointer */ ;}
- break;
-
- case 147:
-#line 591 "pars0grm.y"
- { (yyval) = NULL; ;}
- break;
-
- case 148:
-#line 593 "pars0grm.y"
- { (yyval) = (yyvsp[(3) - (3)]); ;}
+ { yyval = pars_cursor_declaration(
+ static_cast<sym_node_t*>(yyvsp[-3]),
+ static_cast<sel_node_t*>(yyvsp[-1])); }
+#line 2366 "pars0grm.cc"
break;
- case 149:
-#line 600 "pars0grm.y"
- { (yyval) = pars_create_table(
- static_cast<sym_node_t*>((yyvsp[(3) - (9)])),
- static_cast<sym_node_t*>((yyvsp[(5) - (9)])),
- static_cast<sym_node_t*>((yyvsp[(8) - (9)])),
- static_cast<sym_node_t*>((yyvsp[(9) - (9)])), (yyvsp[(7) - (9)])); ;}
+ case 144:
+#line 592 "pars0grm.y"
+ { yyval = pars_function_declaration(
+ static_cast<sym_node_t*>(yyvsp[-1])); }
+#line 2373 "pars0grm.cc"
break;
case 150:
-#line 608 "pars0grm.y"
- { (yyval) = que_node_list_add_last(NULL, (yyvsp[(1) - (1)])); ;}
- break;
-
- case 151:
-#line 610 "pars0grm.y"
- { (yyval) = que_node_list_add_last((yyvsp[(1) - (3)]), (yyvsp[(3) - (3)])); ;}
- break;
-
- case 152:
#line 614 "pars0grm.y"
- { (yyval) = NULL; ;}
+ { yyval = pars_procedure_definition(
+ static_cast<sym_node_t*>(yyvsp[-8]), yyvsp[-1]); }
+#line 2380 "pars0grm.cc"
break;
- case 153:
-#line 615 "pars0grm.y"
- { (yyval) = &pars_unique_token; ;}
- break;
- case 154:
-#line 619 "pars0grm.y"
- { (yyval) = NULL; ;}
- break;
-
- case 155:
-#line 620 "pars0grm.y"
- { (yyval) = &pars_clustered_token; ;}
- break;
-
- case 156:
-#line 629 "pars0grm.y"
- { (yyval) = pars_create_index(
- static_cast<pars_res_word_t*>((yyvsp[(2) - (10)])),
- static_cast<pars_res_word_t*>((yyvsp[(3) - (10)])),
- static_cast<sym_node_t*>((yyvsp[(5) - (10)])),
- static_cast<sym_node_t*>((yyvsp[(7) - (10)])),
- static_cast<sym_node_t*>((yyvsp[(9) - (10)]))); ;}
- break;
-
- case 157:
-#line 638 "pars0grm.y"
- { (yyval) = (yyvsp[(1) - (1)]); ;}
- break;
-
- case 158:
-#line 639 "pars0grm.y"
- { (yyval) = (yyvsp[(1) - (1)]); ;}
- break;
+#line 2384 "pars0grm.cc"
- case 159:
-#line 644 "pars0grm.y"
- { (yyval) = pars_commit_statement(); ;}
- break;
-
- case 160:
-#line 649 "pars0grm.y"
- { (yyval) = pars_rollback_statement(); ;}
- break;
-
- case 161:
-#line 653 "pars0grm.y"
- { (yyval) = &pars_int_token; ;}
- break;
-
- case 162:
-#line 654 "pars0grm.y"
- { (yyval) = &pars_int_token; ;}
- break;
-
- case 163:
-#line 655 "pars0grm.y"
- { (yyval) = &pars_bigint_token; ;}
- break;
-
- case 164:
-#line 656 "pars0grm.y"
- { (yyval) = &pars_char_token; ;}
- break;
-
- case 165:
-#line 657 "pars0grm.y"
- { (yyval) = &pars_binary_token; ;}
- break;
-
- case 166:
-#line 658 "pars0grm.y"
- { (yyval) = &pars_blob_token; ;}
- break;
-
- case 167:
-#line 663 "pars0grm.y"
- { (yyval) = pars_parameter_declaration(
- static_cast<sym_node_t*>((yyvsp[(1) - (3)])),
- PARS_INPUT,
- static_cast<pars_res_word_t*>((yyvsp[(3) - (3)]))); ;}
- break;
-
- case 168:
-#line 668 "pars0grm.y"
- { (yyval) = pars_parameter_declaration(
- static_cast<sym_node_t*>((yyvsp[(1) - (3)])),
- PARS_OUTPUT,
- static_cast<pars_res_word_t*>((yyvsp[(3) - (3)]))); ;}
- break;
-
- case 169:
-#line 675 "pars0grm.y"
- { (yyval) = NULL; ;}
- break;
-
- case 170:
-#line 676 "pars0grm.y"
- { (yyval) = que_node_list_add_last(NULL, (yyvsp[(1) - (1)])); ;}
- break;
-
- case 171:
-#line 678 "pars0grm.y"
- { (yyval) = que_node_list_add_last((yyvsp[(1) - (3)]), (yyvsp[(3) - (3)])); ;}
- break;
-
- case 172:
-#line 683 "pars0grm.y"
- { (yyval) = pars_variable_declaration(
- static_cast<sym_node_t*>((yyvsp[(1) - (3)])),
- static_cast<pars_res_word_t*>((yyvsp[(2) - (3)]))); ;}
- break;
-
- case 176:
-#line 697 "pars0grm.y"
- { (yyval) = pars_cursor_declaration(
- static_cast<sym_node_t*>((yyvsp[(3) - (6)])),
- static_cast<sel_node_t*>((yyvsp[(5) - (6)]))); ;}
- break;
-
- case 177:
-#line 704 "pars0grm.y"
- { (yyval) = pars_function_declaration(
- static_cast<sym_node_t*>((yyvsp[(3) - (4)]))); ;}
- break;
-
- case 183:
-#line 726 "pars0grm.y"
- { (yyval) = pars_procedure_definition(
- static_cast<sym_node_t*>((yyvsp[(2) - (11)])),
- static_cast<sym_node_t*>((yyvsp[(4) - (11)])),
- (yyvsp[(10) - (11)])); ;}
- break;
-
-
-/* Line 1267 of yacc.c. */
-#line 2826 "pars0grm.cc"
default: break;
}
+ /* User semantic actions sometimes alter yychar, and that requires
+ that yytoken be updated with the new translation. We take the
+ approach of translating immediately before every use of yytoken.
+ One alternative is translating here after every semantic action,
+ but that translation would be missed if the semantic action invokes
+ YYABORT, YYACCEPT, or YYERROR immediately after altering yychar or
+ if it invokes YYBACKUP. In the case of YYABORT or YYACCEPT, an
+ incorrect destructor might then be invoked immediately. In the
+ case of YYERROR or YYBACKUP, subsequent parser actions might lead
+ to an incorrect destructor call or verbose syntax error message
+ before the lookahead is translated. */
YY_SYMBOL_PRINT ("-> $$ =", yyr1[yyn], &yyval, &yyloc);
YYPOPSTACK (yylen);
@@ -2826,26 +2403,28 @@ yyreduce:
*++yyvsp = yyval;
-
- /* Now `shift' the result of the reduction. Determine what state
+ /* Now 'shift' the result of the reduction. Determine what state
that goes to, based on the state we popped back to and the rule
number reduced by. */
-
- yyn = yyr1[yyn];
-
- yystate = yypgoto[yyn - YYNTOKENS] + *yyssp;
- if (0 <= yystate && yystate <= YYLAST && yycheck[yystate] == *yyssp)
- yystate = yytable[yystate];
- else
- yystate = yydefgoto[yyn - YYNTOKENS];
+ {
+ const int yylhs = yyr1[yyn] - YYNTOKENS;
+ const int yyi = yypgoto[yylhs] + *yyssp;
+ yystate = (0 <= yyi && yyi <= YYLAST && yycheck[yyi] == *yyssp
+ ? yytable[yyi]
+ : yydefgoto[yylhs]);
+ }
goto yynewstate;
-/*------------------------------------.
-| yyerrlab -- here on detecting error |
-`------------------------------------*/
+/*--------------------------------------.
+| yyerrlab -- here on detecting error. |
+`--------------------------------------*/
yyerrlab:
+ /* Make sure we have latest lookahead translation. See comments at
+ user semantic actions for why this is necessary. */
+ yytoken = yychar == YYEMPTY ? YYEMPTY : YYTRANSLATE (yychar);
+
/* If not already recovering from an error, report this error. */
if (!yyerrstatus)
{
@@ -2853,37 +2432,36 @@ yyerrlab:
#if ! YYERROR_VERBOSE
yyerror (YY_("syntax error"));
#else
+# define YYSYNTAX_ERROR yysyntax_error (&yymsg_alloc, &yymsg, \
+ yyssp, yytoken)
{
- YYSIZE_T yysize = yysyntax_error (0, yystate, yychar);
- if (yymsg_alloc < yysize && yymsg_alloc < YYSTACK_ALLOC_MAXIMUM)
- {
- YYSIZE_T yyalloc = 2 * yysize;
- if (! (yysize <= yyalloc && yyalloc <= YYSTACK_ALLOC_MAXIMUM))
- yyalloc = YYSTACK_ALLOC_MAXIMUM;
- if (yymsg != yymsgbuf)
- YYSTACK_FREE (yymsg);
- yymsg = (char*) YYSTACK_ALLOC (yyalloc);
- if (yymsg)
- yymsg_alloc = yyalloc;
- else
- {
- yymsg = yymsgbuf;
- yymsg_alloc = sizeof yymsgbuf;
- }
- }
-
- if (0 < yysize && yysize <= yymsg_alloc)
- {
- (void) yysyntax_error (yymsg, yystate, yychar);
- yyerror (yymsg);
- }
- else
- {
- yyerror (YY_("syntax error"));
- if (yysize != 0)
- goto yyexhaustedlab;
- }
+ char const *yymsgp = YY_("syntax error");
+ int yysyntax_error_status;
+ yysyntax_error_status = YYSYNTAX_ERROR;
+ if (yysyntax_error_status == 0)
+ yymsgp = yymsg;
+ else if (yysyntax_error_status == 1)
+ {
+ if (yymsg != yymsgbuf)
+ YYSTACK_FREE (yymsg);
+ yymsg = (char *) YYSTACK_ALLOC (yymsg_alloc);
+ if (!yymsg)
+ {
+ yymsg = yymsgbuf;
+ yymsg_alloc = sizeof yymsgbuf;
+ yysyntax_error_status = 2;
+ }
+ else
+ {
+ yysyntax_error_status = YYSYNTAX_ERROR;
+ yymsgp = yymsg;
+ }
+ }
+ yyerror (yymsgp);
+ if (yysyntax_error_status == 2)
+ goto yyexhaustedlab;
}
+# undef YYSYNTAX_ERROR
#endif
}
@@ -2891,24 +2469,24 @@ yyerrlab:
if (yyerrstatus == 3)
{
- /* If just tried and failed to reuse look-ahead token after an
- error, discard it. */
+ /* If just tried and failed to reuse lookahead token after an
+ error, discard it. */
if (yychar <= YYEOF)
- {
- /* Return failure if at end of input. */
- if (yychar == YYEOF)
- YYABORT;
- }
+ {
+ /* Return failure if at end of input. */
+ if (yychar == YYEOF)
+ YYABORT;
+ }
else
- {
- yydestruct ("Error: discarding",
- yytoken, &yylval);
- yychar = YYEMPTY;
- }
+ {
+ yydestruct ("Error: discarding",
+ yytoken, &yylval);
+ yychar = YYEMPTY;
+ }
}
- /* Else will try to reuse look-ahead token after shifting the error
+ /* Else will try to reuse lookahead token after shifting the error
token. */
goto yyerrlab1;
@@ -2917,14 +2495,12 @@ yyerrlab:
| yyerrorlab -- error raised explicitly by YYERROR. |
`---------------------------------------------------*/
yyerrorlab:
+ /* Pacify compilers when the user code never invokes YYERROR and the
+ label yyerrorlab therefore never appears in user code. */
+ if (0)
+ YYERROR;
- /* Pacify compilers like GCC when the user code never invokes
- YYERROR and the label yyerrorlab therefore never appears in user
- code. */
- if (/*CONSTCOND*/ 0)
- goto yyerrorlab;
-
- /* Do not reclaim the symbols of the rule which action triggered
+ /* Do not reclaim the symbols of the rule whose action triggered
this YYERROR. */
YYPOPSTACK (yylen);
yylen = 0;
@@ -2937,38 +2513,37 @@ yyerrorlab:
| yyerrlab1 -- common code for both syntax error and YYERROR. |
`-------------------------------------------------------------*/
yyerrlab1:
- yyerrstatus = 3; /* Each real token shifted decrements this. */
+ yyerrstatus = 3; /* Each real token shifted decrements this. */
for (;;)
{
yyn = yypact[yystate];
- if (yyn != YYPACT_NINF)
- {
- yyn += YYTERROR;
- if (0 <= yyn && yyn <= YYLAST && yycheck[yyn] == YYTERROR)
- {
- yyn = yytable[yyn];
- if (0 < yyn)
- break;
- }
- }
+ if (!yypact_value_is_default (yyn))
+ {
+ yyn += YYTERROR;
+ if (0 <= yyn && yyn <= YYLAST && yycheck[yyn] == YYTERROR)
+ {
+ yyn = yytable[yyn];
+ if (0 < yyn)
+ break;
+ }
+ }
/* Pop the current state because it cannot handle the error token. */
if (yyssp == yyss)
- YYABORT;
+ YYABORT;
yydestruct ("Error: popping",
- yystos[yystate], yyvsp);
+ yystos[yystate], yyvsp);
YYPOPSTACK (1);
yystate = *yyssp;
YY_STACK_PRINT (yyss, yyssp);
}
- if (yyn == YYFINAL)
- YYACCEPT;
-
+ YY_IGNORE_MAYBE_UNINITIALIZED_BEGIN
*++yyvsp = yylval;
+ YY_IGNORE_MAYBE_UNINITIALIZED_END
/* Shift the error token. */
@@ -2985,6 +2560,7 @@ yyacceptlab:
yyresult = 0;
goto yyreturn;
+
/*-----------------------------------.
| yyabortlab -- YYABORT comes here. |
`-----------------------------------*/
@@ -2992,7 +2568,8 @@ yyabortlab:
yyresult = 1;
goto yyreturn;
-#ifndef yyoverflow
+
+#if !defined yyoverflow || YYERROR_VERBOSE
/*-------------------------------------------------.
| yyexhaustedlab -- memory exhaustion comes here. |
`-------------------------------------------------*/
@@ -3002,18 +2579,27 @@ yyexhaustedlab:
/* Fall through. */
#endif
+
+/*-----------------------------------------------------.
+| yyreturn -- parsing is finished, return the result. |
+`-----------------------------------------------------*/
yyreturn:
- if (yychar != YYEOF && yychar != YYEMPTY)
- yydestruct ("Cleanup: discarding lookahead",
- yytoken, &yylval);
- /* Do not reclaim the symbols of the rule which action triggered
+ if (yychar != YYEMPTY)
+ {
+ /* Make sure we have latest lookahead translation. See comments at
+ user semantic actions for why this is necessary. */
+ yytoken = YYTRANSLATE (yychar);
+ yydestruct ("Cleanup: discarding lookahead",
+ yytoken, &yylval);
+ }
+ /* Do not reclaim the symbols of the rule whose action triggered
this YYABORT or YYACCEPT. */
YYPOPSTACK (yylen);
YY_STACK_PRINT (yyss, yyssp);
while (yyssp != yyss)
{
yydestruct ("Cleanup: popping",
- yystos[*yyssp], yyvsp);
+ yystos[*yyssp], yyvsp);
YYPOPSTACK (1);
}
#ifndef yyoverflow
@@ -3024,11 +2610,7 @@ yyreturn:
if (yymsg != yymsgbuf)
YYSTACK_FREE (yymsg);
#endif
- /* Make sure YYID is used. */
- return YYID (yyresult);
+ return yyresult;
}
-
-
-#line 732 "pars0grm.y"
-
+#line 618 "pars0grm.y"
diff --git a/storage/innobase/pars/pars0grm.y b/storage/innobase/pars/pars0grm.y
index 2e4776caaf4..625ed41bbd4 100644
--- a/storage/innobase/pars/pars0grm.y
+++ b/storage/innobase/pars/pars0grm.y
@@ -1,6 +1,7 @@
/*****************************************************************************
-Copyright (c) 1997, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1997, 2014, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2017, 2019, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -30,7 +31,7 @@ Created 12/14/1997 Heikki Tuuri
que_node_t */
#include "univ.i"
-#include <math.h> /* Can't be before univ.i */
+#include <math.h>
#include "pars0pars.h"
#include "mem0mem.h"
#include "que0types.h"
@@ -40,7 +41,6 @@ que_node_t */
#define YYSTYPE que_node_t*
/* #define __STDC__ */
-
int
yylex(void);
%}
@@ -48,8 +48,6 @@ yylex(void);
%token PARS_INT_LIT
%token PARS_FLOAT_LIT
%token PARS_STR_LIT
-%token PARS_FIXBINARY_LIT
-%token PARS_BLOB_LIT
%token PARS_NULL_LIT
%token PARS_ID_TOKEN
%token PARS_AND_TOKEN
@@ -60,12 +58,7 @@ yylex(void);
%token PARS_NE_TOKEN
%token PARS_PROCEDURE_TOKEN
%token PARS_IN_TOKEN
-%token PARS_OUT_TOKEN
-%token PARS_BINARY_TOKEN
-%token PARS_BLOB_TOKEN
%token PARS_INT_TOKEN
-%token PARS_INTEGER_TOKEN
-%token PARS_FLOAT_TOKEN
%token PARS_CHAR_TOKEN
%token PARS_IS_TOKEN
%token PARS_BEGIN_TOKEN
@@ -78,14 +71,11 @@ yylex(void);
%token PARS_WHILE_TOKEN
%token PARS_RETURN_TOKEN
%token PARS_SELECT_TOKEN
-%token PARS_SUM_TOKEN
%token PARS_COUNT_TOKEN
-%token PARS_DISTINCT_TOKEN
%token PARS_FROM_TOKEN
%token PARS_WHERE_TOKEN
%token PARS_FOR_TOKEN
%token PARS_DDOT_TOKEN
-%token PARS_READ_TOKEN
%token PARS_ORDER_TOKEN
%token PARS_BY_TOKEN
%token PARS_ASC_TOKEN
@@ -103,7 +93,6 @@ yylex(void);
%token PARS_INDEX_TOKEN
%token PARS_UNIQUE_TOKEN
%token PARS_CLUSTERED_TOKEN
-%token PARS_DOES_NOT_FIT_IN_MEM_TOKEN
%token PARS_ON_TOKEN
%token PARS_ASSIGN_TOKEN
%token PARS_DECLARE_TOKEN
@@ -113,25 +102,14 @@ yylex(void);
%token PARS_FETCH_TOKEN
%token PARS_CLOSE_TOKEN
%token PARS_NOTFOUND_TOKEN
-%token PARS_TO_CHAR_TOKEN
-%token PARS_TO_NUMBER_TOKEN
%token PARS_TO_BINARY_TOKEN
-%token PARS_BINARY_TO_NUMBER_TOKEN
%token PARS_SUBSTR_TOKEN
-%token PARS_REPLSTR_TOKEN
%token PARS_CONCAT_TOKEN
%token PARS_INSTR_TOKEN
%token PARS_LENGTH_TOKEN
-%token PARS_SYSDATE_TOKEN
-%token PARS_PRINTF_TOKEN
-%token PARS_ASSERT_TOKEN
-%token PARS_RND_TOKEN
-%token PARS_RND_STR_TOKEN
-%token PARS_ROW_PRINTF_TOKEN
%token PARS_COMMIT_TOKEN
%token PARS_ROLLBACK_TOKEN
%token PARS_WORK_TOKEN
-%token PARS_UNSIGNED_TOKEN
%token PARS_EXIT_TOKEN
%token PARS_FUNCTION_TOKEN
%token PARS_LOCK_TOKEN
@@ -143,8 +121,6 @@ yylex(void);
%token PARS_LIKE_TOKEN_SUFFIX
%token PARS_LIKE_TOKEN_SUBSTR
%token PARS_TABLE_NAME_TOKEN
-%token PARS_COMPACT_TOKEN
-%token PARS_BLOCK_SIZE_TOKEN
%token PARS_BIGINT_TOKEN
%left PARS_AND_TOKEN PARS_OR_TOKEN
@@ -155,6 +131,8 @@ yylex(void);
%left NEG /* negation--unary minus */
%left '%'
+%expect 41
+
/* Grammar follows */
%%
@@ -163,7 +141,6 @@ top_statement:
statement:
stored_procedure_call
- | predefined_procedure_call ';'
| while_statement ';'
| for_statement ';'
| exit_statement ';'
@@ -172,7 +149,6 @@ statement:
| assignment_statement ';'
| select_statement ';'
| insert_statement ';'
- | row_printf_statement ';'
| delete_statement_searched ';'
| delete_statement_positioned ';'
| update_statement_searched ';'
@@ -199,8 +175,6 @@ exp:
| PARS_INT_LIT { $$ = $1;}
| PARS_FLOAT_LIT { $$ = $1;}
| PARS_STR_LIT { $$ = $1;}
- | PARS_FIXBINARY_LIT { $$ = $1;}
- | PARS_BLOB_LIT { $$ = $1;}
| PARS_NULL_LIT { $$ = $1;}
| PARS_SQL_TOKEN { $$ = $1;}
| exp '+' exp { $$ = pars_op('+', $1, $3); }
@@ -227,18 +201,11 @@ exp:
;
function_name:
- PARS_TO_CHAR_TOKEN { $$ = &pars_to_char_token; }
- | PARS_TO_NUMBER_TOKEN { $$ = &pars_to_number_token; }
- | PARS_TO_BINARY_TOKEN { $$ = &pars_to_binary_token; }
- | PARS_BINARY_TO_NUMBER_TOKEN
- { $$ = &pars_binary_to_number_token; }
+ PARS_TO_BINARY_TOKEN { $$ = &pars_to_binary_token; }
| PARS_SUBSTR_TOKEN { $$ = &pars_substr_token; }
| PARS_CONCAT_TOKEN { $$ = &pars_concat_token; }
| PARS_INSTR_TOKEN { $$ = &pars_instr_token; }
| PARS_LENGTH_TOKEN { $$ = &pars_length_token; }
- | PARS_SYSDATE_TOKEN { $$ = &pars_sysdate_token; }
- | PARS_RND_TOKEN { $$ = &pars_rnd_token; }
- | PARS_RND_STR_TOKEN { $$ = &pars_rnd_str_token; }
;
question_mark_list:
@@ -253,17 +220,6 @@ stored_procedure_call:
static_cast<sym_node_t*>($2)); }
;
-predefined_procedure_call:
- predefined_procedure_name '(' exp_list ')'
- { $$ = pars_procedure_call($1, $3); }
-;
-
-predefined_procedure_name:
- PARS_REPLSTR_TOKEN { $$ = &pars_replstr_token; }
- | PARS_PRINTF_TOKEN { $$ = &pars_printf_token; }
- | PARS_ASSERT_TOKEN { $$ = &pars_assert_token; }
-;
-
user_function_call:
PARS_ID_TOKEN '(' ')' { $$ = $1; }
;
@@ -291,19 +247,9 @@ select_item:
exp { $$ = $1; }
| PARS_COUNT_TOKEN '(' '*' ')'
{ $$ = pars_func(&pars_count_token,
- que_node_list_add_last(NULL,
+ que_node_list_add_last(NULL,
sym_tab_add_int_lit(
pars_sym_tab_global, 1))); }
- | PARS_COUNT_TOKEN '(' PARS_DISTINCT_TOKEN PARS_ID_TOKEN ')'
- { $$ = pars_func(&pars_count_token,
- que_node_list_add_last(NULL,
- pars_func(&pars_distinct_token,
- que_node_list_add_last(
- NULL, $4)))); }
- | PARS_SUM_TOKEN '(' exp ')'
- { $$ = pars_func(&pars_sum_token,
- que_node_list_add_last(NULL,
- $3)); }
;
select_item_list:
@@ -450,12 +396,6 @@ delete_statement_positioned:
NULL); }
;
-row_printf_statement:
- PARS_ROW_PRINTF_TOKEN select_statement
- { $$ = pars_row_printf_statement(
- static_cast<sel_node_t*>($2)); }
-;
-
assignment_statement:
PARS_ID_TOKEN PARS_ASSIGN_TOKEN exp
{ $$ = pars_assignment_statement(
@@ -540,12 +480,12 @@ fetch_statement:
;
column_def:
- PARS_ID_TOKEN type_name opt_column_len opt_unsigned opt_not_null
+ PARS_ID_TOKEN type_name opt_column_len opt_not_null
{ $$ = pars_column_def(
static_cast<sym_node_t*>($1),
static_cast<pars_res_word_t*>($2),
static_cast<sym_node_t*>($3),
- $4, $5); }
+ $4); }
;
column_def_list:
@@ -560,13 +500,6 @@ opt_column_len:
{ $$ = $2; }
;
-opt_unsigned:
- /* Nothing */ { $$ = NULL; }
- | PARS_UNSIGNED_TOKEN
- { $$ = &pars_int_token;
- /* pass any non-NULL pointer */ }
-;
-
opt_not_null:
/* Nothing */ { $$ = NULL; }
| PARS_NOT_TOKEN PARS_NULL_LIT
@@ -574,34 +507,12 @@ opt_not_null:
/* pass any non-NULL pointer */ }
;
-not_fit_in_memory:
- /* Nothing */ { $$ = NULL; }
- | PARS_DOES_NOT_FIT_IN_MEM_TOKEN
- { $$ = &pars_int_token;
- /* pass any non-NULL pointer */ }
-;
-
-compact:
- /* Nothing */ { $$ = NULL; }
- | PARS_COMPACT_TOKEN { $$ = &pars_int_token;
- /* pass any non-NULL pointer */ }
-;
-
-block_size:
- /* Nothing */ { $$ = NULL; }
- | PARS_BLOCK_SIZE_TOKEN '=' PARS_INT_LIT
- { $$ = $3; }
-;
-
create_table:
PARS_CREATE_TOKEN PARS_TABLE_TOKEN
table_name '(' column_def_list ')'
- not_fit_in_memory compact block_size
{ $$ = pars_create_table(
static_cast<sym_node_t*>($3),
- static_cast<sym_node_t*>($5),
- static_cast<sym_node_t*>($8),
- static_cast<sym_node_t*>($9), $7); }
+ static_cast<sym_node_t*>($5)); }
;
column_list:
@@ -651,31 +562,8 @@ rollback_statement:
type_name:
PARS_INT_TOKEN { $$ = &pars_int_token; }
- | PARS_INTEGER_TOKEN { $$ = &pars_int_token; }
| PARS_BIGINT_TOKEN { $$ = &pars_bigint_token; }
| PARS_CHAR_TOKEN { $$ = &pars_char_token; }
- | PARS_BINARY_TOKEN { $$ = &pars_binary_token; }
- | PARS_BLOB_TOKEN { $$ = &pars_blob_token; }
-;
-
-parameter_declaration:
- PARS_ID_TOKEN PARS_IN_TOKEN type_name
- { $$ = pars_parameter_declaration(
- static_cast<sym_node_t*>($1),
- PARS_INPUT,
- static_cast<pars_res_word_t*>($3)); }
- | PARS_ID_TOKEN PARS_OUT_TOKEN type_name
- { $$ = pars_parameter_declaration(
- static_cast<sym_node_t*>($1),
- PARS_OUTPUT,
- static_cast<pars_res_word_t*>($3)); }
-;
-
-parameter_declaration_list:
- /* Nothing */ { $$ = NULL; }
- | parameter_declaration { $$ = que_node_list_add_last(NULL, $1); }
- | parameter_declaration_list ',' parameter_declaration
- { $$ = que_node_list_add_last($1, $3); }
;
variable_declaration:
@@ -717,16 +605,14 @@ declaration_list:
;
procedure_definition:
- PARS_PROCEDURE_TOKEN PARS_ID_TOKEN '(' parameter_declaration_list ')'
+ PARS_PROCEDURE_TOKEN PARS_ID_TOKEN '(' ')'
PARS_IS_TOKEN
variable_declaration_list
declaration_list
PARS_BEGIN_TOKEN
statement_list
PARS_END_TOKEN { $$ = pars_procedure_definition(
- static_cast<sym_node_t*>($2),
- static_cast<sym_node_t*>($4),
- $10); }
+ static_cast<sym_node_t*>($2), $9); }
;
%%
diff --git a/storage/innobase/pars/pars0lex.l b/storage/innobase/pars/pars0lex.l
index 71699d2f099..8b2df6b7940 100644
--- a/storage/innobase/pars/pars0lex.l
+++ b/storage/innobase/pars/pars0lex.l
@@ -1,7 +1,7 @@
/*****************************************************************************
-Copyright (c) 1997, 2011, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2019, MariaDB Corporation.
+Copyright (c) 1997, 2014, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2017, 2019, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -60,7 +60,7 @@ Created 12/14/1997 Heikki Tuuri
#include "mem0mem.h"
#include "os0proc.h"
-#define malloc(A) ut_malloc(A)
+#define malloc(A) ut_malloc_nokey(A)
#define free(A) ut_free(A)
#define realloc(P, A) ut_realloc(P, A)
#define exit(A) ut_error
@@ -249,30 +249,10 @@ In the state 'id', only two actions are possible (defined below). */
return(PARS_IN_TOKEN);
}
-"OUT" {
- return(PARS_OUT_TOKEN);
-}
-
-"BINARY" {
- return(PARS_BINARY_TOKEN);
-}
-
-"BLOB" {
- return(PARS_BLOB_TOKEN);
-}
-
"INT" {
return(PARS_INT_TOKEN);
}
-"INTEGER" {
- return(PARS_INT_TOKEN);
-}
-
-"FLOAT" {
- return(PARS_FLOAT_TOKEN);
-}
-
"CHAR" {
return(PARS_CHAR_TOKEN);
}
@@ -321,18 +301,10 @@ In the state 'id', only two actions are possible (defined below). */
return(PARS_SELECT_TOKEN);
}
-"SUM" {
- return(PARS_SUM_TOKEN);
-}
-
"COUNT" {
return(PARS_COUNT_TOKEN);
}
-"DISTINCT" {
- return(PARS_DISTINCT_TOKEN);
-}
-
"FROM" {
return(PARS_FROM_TOKEN);
}
@@ -345,10 +317,6 @@ In the state 'id', only two actions are possible (defined below). */
return(PARS_FOR_TOKEN);
}
-"READ" {
- return(PARS_READ_TOKEN);
-}
-
"ORDER" {
return(PARS_ORDER_TOKEN);
}
@@ -405,14 +373,6 @@ In the state 'id', only two actions are possible (defined below). */
return(PARS_TABLE_TOKEN);
}
-"COMPACT" {
- return(PARS_COMPACT_TOKEN);
-}
-
-"BLOCK_SIZE" {
- return(PARS_BLOCK_SIZE_TOKEN);
-}
-
"INDEX" {
return(PARS_INDEX_TOKEN);
}
@@ -425,10 +385,6 @@ In the state 'id', only two actions are possible (defined below). */
return(PARS_CLUSTERED_TOKEN);
}
-"DOES_NOT_FIT_IN_MEMORY" {
- return(PARS_DOES_NOT_FIT_IN_MEM_TOKEN);
-}
-
"ON" {
return(PARS_ON_TOKEN);
}
@@ -457,30 +413,14 @@ In the state 'id', only two actions are possible (defined below). */
return(PARS_NOTFOUND_TOKEN);
}
-"TO_CHAR" {
- return(PARS_TO_CHAR_TOKEN);
-}
-
-"TO_NUMBER" {
- return(PARS_TO_NUMBER_TOKEN);
-}
-
"TO_BINARY" {
return(PARS_TO_BINARY_TOKEN);
}
-"BINARY_TO_NUMBER" {
- return(PARS_BINARY_TO_NUMBER_TOKEN);
-}
-
"SUBSTR" {
return(PARS_SUBSTR_TOKEN);
}
-"REPLSTR" {
- return(PARS_REPLSTR_TOKEN);
-}
-
"CONCAT" {
return(PARS_CONCAT_TOKEN);
}
@@ -493,30 +433,6 @@ In the state 'id', only two actions are possible (defined below). */
return(PARS_LENGTH_TOKEN);
}
-"SYSDATE" {
- return(PARS_SYSDATE_TOKEN);
-}
-
-"PRINTF" {
- return(PARS_PRINTF_TOKEN);
-}
-
-"ASSERT" {
- return(PARS_ASSERT_TOKEN);
-}
-
-"RND" {
- return(PARS_RND_TOKEN);
-}
-
-"RND_STR" {
- return(PARS_RND_STR_TOKEN);
-}
-
-"ROW_PRINTF" {
- return(PARS_ROW_PRINTF_TOKEN);
-}
-
"COMMIT" {
return(PARS_COMMIT_TOKEN);
}
@@ -529,10 +445,6 @@ In the state 'id', only two actions are possible (defined below). */
return(PARS_WORK_TOKEN);
}
-"UNSIGNED" {
- return(PARS_UNSIGNED_TOKEN);
-}
-
"EXIT" {
return(PARS_EXIT_TOKEN);
}
@@ -692,7 +604,6 @@ In the state 'id', only two actions are possible (defined below). */
/**********************************************************************
Release any resources used by the lexer. */
-UNIV_INTERN
void
pars_lexer_close(void)
/*==================*/
diff --git a/storage/innobase/pars/pars0opt.cc b/storage/innobase/pars/pars0opt.cc
index 135402f8144..4874ce610ca 100644
--- a/storage/innobase/pars/pars0opt.cc
+++ b/storage/innobase/pars/pars0opt.cc
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1997, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1997, 2016, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -24,20 +24,15 @@ Created 12/21/1997 Heikki Tuuri
*******************************************************/
#include "pars0opt.h"
-
-#ifdef UNIV_NONINL
-#include "pars0opt.ic"
-#endif
-
#include "row0sel.h"
#include "row0ins.h"
#include "row0upd.h"
+#include "dict0boot.h"
#include "dict0dict.h"
#include "dict0mem.h"
#include "que0que.h"
#include "pars0grm.h"
#include "pars0pars.h"
-#include "lock0lock.h"
#define OPT_EQUAL 1 /* comparison by = */
#define OPT_COMPARISON 2 /* comparison by <, >, <=, or >= */
@@ -50,7 +45,7 @@ Created 12/21/1997 Heikki Tuuri
/*******************************************************************//**
Inverts a comparison operator.
-@return the equivalent operator when the order of the arguments is switched */
+@return the equivalent operator when the order of the arguments is switched */
static
int
opt_invert_cmp_op(
@@ -79,7 +74,7 @@ opt_invert_cmp_op(
Checks if the value of an expression can be calculated BEFORE the nth table
in a join is accessed. If this is the case, it can possibly be used in an
index search for the nth table.
-@return TRUE if already determined */
+@return TRUE if already determined */
static
ibool
opt_check_exp_determined_before(
@@ -138,7 +133,7 @@ opt_check_exp_determined_before(
/*******************************************************************//**
Looks in a comparison condition if a column value is already restricted by
it BEFORE the nth table is accessed.
-@return expression restricting the value of the column, or NULL if not known */
+@return expression restricting the value of the column, or NULL if not known */
static
que_node_t*
opt_look_for_col_in_comparison_before(
@@ -244,7 +239,7 @@ Looks in a search condition if a column value is already restricted by the
search condition BEFORE the nth table is accessed. Takes into account that
if we will fetch in an ascending order, we cannot utilize an upper limit for
a column value; in a descending order, respectively, a lower limit.
-@return expression restricting the value of the column, or NULL if not known */
+@return expression restricting the value of the column, or NULL if not known */
static
que_node_t*
opt_look_for_col_in_cond_before(
@@ -323,7 +318,7 @@ already know exactly in the query. If we have a comparison condition for
an additional field, 2 point are added. If the index is unique, and we know
all the unique fields for the index we add 1024 points. For a clustered index
we add 1 point.
-@return goodness */
+@return goodness */
static
ulint
opt_calc_index_goodness(
@@ -411,7 +406,7 @@ opt_calc_index_goodness(
/*******************************************************************//**
Calculates the number of matched fields based on an index goodness.
-@return number of excatly or partially matched fields */
+@return number of excatly or partially matched fields */
UNIV_INLINE
ulint
opt_calc_n_fields_from_goodness(
@@ -424,9 +419,9 @@ opt_calc_n_fields_from_goodness(
/*******************************************************************//**
Converts a comparison operator to the corresponding search mode PAGE_CUR_GE,
...
-@return search mode */
+@return search mode */
UNIV_INLINE
-ulint
+page_cur_mode_t
opt_op_to_search_mode(
/*==================*/
ibool asc, /*!< in: TRUE if the rows should be fetched in an
@@ -460,12 +455,12 @@ opt_op_to_search_mode(
ut_error;
}
- return(0);
+ return(PAGE_CUR_UNSUPP);
}
/*******************************************************************//**
Determines if a node is an argument node of a function node.
-@return TRUE if is an argument */
+@return TRUE if is an argument */
static
ibool
opt_is_arg(
@@ -774,10 +769,10 @@ opt_find_test_conds(
fclass = opt_classify_comparison(sel_node, i, cond);
if (fclass == OPT_END_COND) {
- UT_LIST_ADD_LAST(cond_list, plan->end_conds, cond);
+ UT_LIST_ADD_LAST(plan->end_conds, cond);
} else if (fclass == OPT_TEST_COND) {
- UT_LIST_ADD_LAST(cond_list, plan->other_conds, cond);
+ UT_LIST_ADD_LAST(plan->other_conds, cond);
}
}
@@ -839,8 +834,8 @@ opt_determine_and_normalize_test_conds(
plan = sel_node_get_nth_plan(sel_node, i);
- UT_LIST_INIT(plan->end_conds);
- UT_LIST_INIT(plan->other_conds);
+ UT_LIST_INIT(plan->end_conds, &func_node_t::cond_list);
+ UT_LIST_INIT(plan->other_conds, &func_node_t::cond_list);
/* Recursively go through the conjuncts and classify them */
@@ -862,7 +857,6 @@ already exist in the list. If the column is already in the list, puts a value
indirection to point to the occurrence in the column list, except if the
column occurrence we are looking at is in the column list, in which case
nothing is done. */
-UNIV_INTERN
void
opt_find_all_cols(
/*==============*/
@@ -941,7 +935,7 @@ opt_find_all_cols(
/* The same column did not occur in the list: add it */
- UT_LIST_ADD_LAST(col_var_list, *col_list, sym_node);
+ UT_LIST_ADD_LAST(*col_list, sym_node);
sym_node->copy_val = copy_val;
@@ -1037,7 +1031,7 @@ opt_classify_cols(
plan->must_get_clust = FALSE;
- UT_LIST_INIT(plan->columns);
+ UT_LIST_INIT(plan->columns, &sym_node_t::col_var_list);
/* All select list columns should be copied: therefore TRUE as the
first argument */
@@ -1119,13 +1113,13 @@ opt_clust_access(
/* We optimize here only queries to InnoDB's internal system
tables, and they should not contain column prefix indexes. */
- if (dict_index_get_nth_field(index, pos)->prefix_len != 0
+ if (dict_is_sys_table(index->table->id)
+ && (dict_index_get_nth_field(index, pos)->prefix_len != 0
|| dict_index_get_nth_field(clust_index, i)
- ->prefix_len != 0) {
- fprintf(stderr,
- "InnoDB: Error in pars0opt.cc:"
- " table %s has prefix_len != 0\n",
- index->table_name);
+ ->prefix_len != 0)) {
+ ib::error() << "Error in pars0opt.cc: table "
+ << index->table->name
+ << " has prefix_len != 0";
}
*(plan->clust_map + i) = pos;
@@ -1134,11 +1128,19 @@ opt_clust_access(
}
}
+#ifdef UNIV_SQL_DEBUG
+/** Print info of a query plan.
+@param[in,out] sel_node select node */
+static
+void
+opt_print_query_plan(
+ sel_node_t* sel_node);
+#endif
+
/*******************************************************************//**
Optimizes a select. Decides which indexes to tables to use. The tables
are accessed in the order that they were written to the FROM part in the
select statement. */
-UNIV_INTERN
void
opt_search_plan(
/*============*/
@@ -1213,13 +1215,13 @@ opt_search_plan(
#endif
}
-/********************************************************************//**
-Prints info of a query plan. */
-UNIV_INTERN
+#ifdef UNIV_SQL_DEBUG
+/** Print info of a query plan.
+@param[in,out] sel_node select node */
+static
void
opt_print_query_plan(
-/*=================*/
- sel_node_t* sel_node) /*!< in: select node */
+ sel_node_t* sel_node)
{
plan_t* plan;
ulint n_fields;
@@ -1251,11 +1253,13 @@ opt_print_query_plan(
n_fields = 0;
}
- fputs("Table ", stderr);
- dict_index_name_print(stderr, NULL, plan->index);
- fprintf(stderr,"; exact m. %lu, match %lu, end conds %lu\n",
+ fprintf(stderr,
+ "Index %s of table %s"
+ "; exact m. %lu, match %lu, end conds %lu\n",
+ plan->index->name(), plan->index->table_name,
(unsigned long) plan->n_exact_match,
(unsigned long) n_fields,
(unsigned long) UT_LIST_GET_LEN(plan->end_conds));
}
}
+#endif /* UNIV_SQL_DEBUG */
diff --git a/storage/innobase/pars/pars0pars.cc b/storage/innobase/pars/pars0pars.cc
index 3c7f5a81246..280c4ee1815 100644
--- a/storage/innobase/pars/pars0pars.cc
+++ b/storage/innobase/pars/pars0pars.cc
@@ -28,11 +28,6 @@ Created 11/19/1996 Heikki Tuuri
on 1/27/1998 */
#include "pars0pars.h"
-
-#ifdef UNIV_NONINL
-#include "pars0pars.ic"
-#endif
-
#include "row0sel.h"
#include "row0ins.h"
#include "row0upd.h"
@@ -46,56 +41,35 @@ on 1/27/1998 */
#include "data0type.h"
#include "trx0trx.h"
#include "trx0roll.h"
-#include "lock0lock.h"
#include "eval0eval.h"
-#ifdef UNIV_SQL_DEBUG
-/** If the following is set TRUE, the lexer will print the SQL string
-as it tokenizes it */
-UNIV_INTERN ibool pars_print_lexed = FALSE;
-#endif /* UNIV_SQL_DEBUG */
-
/* Global variable used while parsing a single procedure or query : the code is
NOT re-entrant */
-UNIV_INTERN sym_tab_t* pars_sym_tab_global;
+sym_tab_t* pars_sym_tab_global;
/* Global variables used to denote certain reserved words, used in
constructing the parsing tree */
-UNIV_INTERN pars_res_word_t pars_to_char_token = {PARS_TO_CHAR_TOKEN};
-UNIV_INTERN pars_res_word_t pars_to_number_token = {PARS_TO_NUMBER_TOKEN};
-UNIV_INTERN pars_res_word_t pars_to_binary_token = {PARS_TO_BINARY_TOKEN};
-UNIV_INTERN pars_res_word_t pars_binary_to_number_token = {PARS_BINARY_TO_NUMBER_TOKEN};
-UNIV_INTERN pars_res_word_t pars_substr_token = {PARS_SUBSTR_TOKEN};
-UNIV_INTERN pars_res_word_t pars_replstr_token = {PARS_REPLSTR_TOKEN};
-UNIV_INTERN pars_res_word_t pars_concat_token = {PARS_CONCAT_TOKEN};
-UNIV_INTERN pars_res_word_t pars_instr_token = {PARS_INSTR_TOKEN};
-UNIV_INTERN pars_res_word_t pars_length_token = {PARS_LENGTH_TOKEN};
-UNIV_INTERN pars_res_word_t pars_sysdate_token = {PARS_SYSDATE_TOKEN};
-UNIV_INTERN pars_res_word_t pars_printf_token = {PARS_PRINTF_TOKEN};
-UNIV_INTERN pars_res_word_t pars_assert_token = {PARS_ASSERT_TOKEN};
-UNIV_INTERN pars_res_word_t pars_rnd_token = {PARS_RND_TOKEN};
-UNIV_INTERN pars_res_word_t pars_rnd_str_token = {PARS_RND_STR_TOKEN};
-UNIV_INTERN pars_res_word_t pars_count_token = {PARS_COUNT_TOKEN};
-UNIV_INTERN pars_res_word_t pars_sum_token = {PARS_SUM_TOKEN};
-UNIV_INTERN pars_res_word_t pars_distinct_token = {PARS_DISTINCT_TOKEN};
-UNIV_INTERN pars_res_word_t pars_binary_token = {PARS_BINARY_TOKEN};
-UNIV_INTERN pars_res_word_t pars_blob_token = {PARS_BLOB_TOKEN};
-UNIV_INTERN pars_res_word_t pars_int_token = {PARS_INT_TOKEN};
-UNIV_INTERN pars_res_word_t pars_bigint_token = {PARS_BIGINT_TOKEN};
-UNIV_INTERN pars_res_word_t pars_char_token = {PARS_CHAR_TOKEN};
-UNIV_INTERN pars_res_word_t pars_float_token = {PARS_FLOAT_TOKEN};
-UNIV_INTERN pars_res_word_t pars_update_token = {PARS_UPDATE_TOKEN};
-UNIV_INTERN pars_res_word_t pars_asc_token = {PARS_ASC_TOKEN};
-UNIV_INTERN pars_res_word_t pars_desc_token = {PARS_DESC_TOKEN};
-UNIV_INTERN pars_res_word_t pars_open_token = {PARS_OPEN_TOKEN};
-UNIV_INTERN pars_res_word_t pars_close_token = {PARS_CLOSE_TOKEN};
-UNIV_INTERN pars_res_word_t pars_share_token = {PARS_SHARE_TOKEN};
-UNIV_INTERN pars_res_word_t pars_unique_token = {PARS_UNIQUE_TOKEN};
-UNIV_INTERN pars_res_word_t pars_clustered_token = {PARS_CLUSTERED_TOKEN};
+pars_res_word_t pars_to_binary_token = {PARS_TO_BINARY_TOKEN};
+pars_res_word_t pars_substr_token = {PARS_SUBSTR_TOKEN};
+pars_res_word_t pars_concat_token = {PARS_CONCAT_TOKEN};
+pars_res_word_t pars_instr_token = {PARS_INSTR_TOKEN};
+pars_res_word_t pars_length_token = {PARS_LENGTH_TOKEN};
+pars_res_word_t pars_count_token = {PARS_COUNT_TOKEN};
+pars_res_word_t pars_int_token = {PARS_INT_TOKEN};
+pars_res_word_t pars_bigint_token = {PARS_BIGINT_TOKEN};
+pars_res_word_t pars_char_token = {PARS_CHAR_TOKEN};
+pars_res_word_t pars_update_token = {PARS_UPDATE_TOKEN};
+pars_res_word_t pars_asc_token = {PARS_ASC_TOKEN};
+pars_res_word_t pars_desc_token = {PARS_DESC_TOKEN};
+pars_res_word_t pars_open_token = {PARS_OPEN_TOKEN};
+pars_res_word_t pars_close_token = {PARS_CLOSE_TOKEN};
+pars_res_word_t pars_share_token = {PARS_SHARE_TOKEN};
+pars_res_word_t pars_unique_token = {PARS_UNIQUE_TOKEN};
+pars_res_word_t pars_clustered_token = {PARS_CLUSTERED_TOKEN};
/** Global variable used to denote the '*' in SELECT * FROM.. */
-UNIV_INTERN ulint pars_star_denoter = 12345678;
+ulint pars_star_denoter = 12345678;
/********************************************************************
Get user function with the given name.*/
@@ -189,7 +163,7 @@ pars_info_lookup_bound_lit(
/*********************************************************************//**
Determines the class of a function code.
-@return function class: PARS_FUNC_ARITH, ... */
+@return function class: PARS_FUNC_ARITH, ... */
static
ulint
pars_func_get_class(
@@ -207,24 +181,15 @@ pars_func_get_class(
case PARS_AND_TOKEN: case PARS_OR_TOKEN: case PARS_NOT_TOKEN:
return(PARS_FUNC_LOGICAL);
- case PARS_COUNT_TOKEN: case PARS_SUM_TOKEN:
+ case PARS_COUNT_TOKEN:
return(PARS_FUNC_AGGREGATE);
- case PARS_TO_CHAR_TOKEN:
- case PARS_TO_NUMBER_TOKEN:
case PARS_TO_BINARY_TOKEN:
- case PARS_BINARY_TO_NUMBER_TOKEN:
case PARS_SUBSTR_TOKEN:
case PARS_CONCAT_TOKEN:
case PARS_LENGTH_TOKEN:
case PARS_INSTR_TOKEN:
- case PARS_SYSDATE_TOKEN:
case PARS_NOTFOUND_TOKEN:
- case PARS_PRINTF_TOKEN:
- case PARS_ASSERT_TOKEN:
- case PARS_RND_TOKEN:
- case PARS_RND_STR_TOKEN:
- case PARS_REPLSTR_TOKEN:
return(PARS_FUNC_PREDEFINED);
default:
@@ -234,7 +199,7 @@ pars_func_get_class(
/*********************************************************************//**
Parses an operator or predefined function expression.
-@return own: function node in a query tree */
+@return own: function node in a query tree */
static
func_node_t*
pars_func_low(
@@ -257,15 +222,14 @@ pars_func_low(
node->args = arg;
- UT_LIST_ADD_LAST(func_node_list, pars_sym_tab_global->func_node_list,
- node);
+ UT_LIST_ADD_LAST(pars_sym_tab_global->func_node_list, node);
+
return(node);
}
/*********************************************************************//**
Parses a function expression.
-@return own: function node in a query tree */
-UNIV_INTERN
+@return own: function node in a query tree */
func_node_t*
pars_func(
/*======*/
@@ -278,7 +242,6 @@ pars_func(
/*************************************************************************
Rebind a LIKE search string. NOTE: We ignore any '%' characters embedded
within the search string.*/
-
int
pars_like_rebind(
/*=============*/
@@ -301,9 +264,7 @@ pars_like_rebind(
}
/* Is this a '%STRING' or %STRING% ?*/
- if (*ptr == '%') {
- op = (op == IB_LIKE_PREFIX) ? IB_LIKE_SUBSTR : IB_LIKE_SUFFIX;
- }
+ ut_ad(*ptr != '%');
if (node->like_node == NULL) {
/* Add the LIKE operator info node to the node list.
@@ -339,10 +300,8 @@ pars_like_rebind(
mach_read_from_4(static_cast<byte*>(dfield_get_data(dfield))));
switch (op_check) {
- case IB_LIKE_PREFIX:
- case IB_LIKE_SUFFIX:
- case IB_LIKE_SUBSTR:
- case IB_LIKE_EXACT:
+ case IB_LIKE_PREFIX:
+ case IB_LIKE_EXACT:
break;
default:
@@ -383,36 +342,6 @@ pars_like_rebind(
dfield_set_data(dfield, ptr, ptr_len - 1);
break;
- case IB_LIKE_SUFFIX:
- func = PARS_LIKE_TOKEN_SUFFIX;
-
- /* Modify the original node */
- /* Make it an '' empty string */
- dfield_set_len(dfield, 0);
-
- dfield = que_node_get_val(str_node);
- dtype = dfield_get_type(dfield);
-
- ut_a(dtype_get_mtype(dtype) == DATA_VARCHAR);
-
- dfield_set_data(dfield, ptr + 1, ptr_len - 1);
- break;
-
- case IB_LIKE_SUBSTR:
- func = PARS_LIKE_TOKEN_SUBSTR;
-
- /* Modify the original node */
- /* Make it an '' empty string */
- dfield_set_len(dfield, 0);
-
- dfield = que_node_get_val(str_node);
- dtype = dfield_get_type(dfield);
-
- ut_a(dtype_get_mtype(dtype) == DATA_VARCHAR);
-
- dfield_set_data(dfield, ptr + 1, ptr_len - 2);
- break;
-
default:
ut_error;
}
@@ -451,8 +380,7 @@ pars_like_op(
}
/*********************************************************************//**
Parses an operator expression.
-@return own: function node in a query tree */
-UNIV_INTERN
+@return own: function node in a query tree */
func_node_t*
pars_op(
/*====*/
@@ -486,8 +414,7 @@ pars_op(
/*********************************************************************//**
Parses an ORDER BY clause. Order by a single column only is supported.
-@return own: order-by node in a query tree */
-UNIV_INTERN
+@return own: order-by node in a query tree */
order_node_t*
pars_order_by(
/*==========*/
@@ -517,7 +444,7 @@ pars_order_by(
/*********************************************************************//**
Determine if a data type is a built-in string data type of the InnoDB
SQL parser.
-@return TRUE if string data type */
+@return TRUE if string data type */
static
ibool
pars_is_string_type(
@@ -549,7 +476,6 @@ pars_resolve_func_data_type(
arg = node->args;
switch (node->func) {
- case PARS_SUM_TOKEN:
case '+': case '-': case '*': case '/':
/* Inherit the data type from the first argument (which must
not be the SQL null literal whose type is DATA_ERROR) */
@@ -566,13 +492,6 @@ pars_resolve_func_data_type(
dtype_set(que_node_get_data_type(node), DATA_INT, 0, 4);
break;
- case PARS_TO_CHAR_TOKEN:
- case PARS_RND_STR_TOKEN:
- ut_a(dtype_get_mtype(que_node_get_data_type(arg)) == DATA_INT);
- dtype_set(que_node_get_data_type(node), DATA_VARCHAR,
- DATA_ENGLISH, 0);
- break;
-
case PARS_TO_BINARY_TOKEN:
if (dtype_get_mtype(que_node_get_data_type(arg)) == DATA_INT) {
dtype_set(que_node_get_data_type(node), DATA_VARCHAR,
@@ -583,19 +502,12 @@ pars_resolve_func_data_type(
}
break;
- case PARS_TO_NUMBER_TOKEN:
- case PARS_BINARY_TO_NUMBER_TOKEN:
case PARS_LENGTH_TOKEN:
case PARS_INSTR_TOKEN:
ut_a(pars_is_string_type(que_node_get_data_type(arg)->mtype));
dtype_set(que_node_get_data_type(node), DATA_INT, 0, 4);
break;
- case PARS_SYSDATE_TOKEN:
- ut_a(arg == NULL);
- dtype_set(que_node_get_data_type(node), DATA_INT, 0, 4);
- break;
-
case PARS_SUBSTR_TOKEN:
case PARS_CONCAT_TOKEN:
ut_a(pars_is_string_type(que_node_get_data_type(arg)->mtype));
@@ -616,11 +528,6 @@ pars_resolve_func_data_type(
dtype_set(que_node_get_data_type(node), DATA_INT, 0, 4);
break;
- case PARS_RND_TOKEN:
- ut_a(dtype_get_mtype(que_node_get_data_type(arg)) == DATA_INT);
- dtype_set(que_node_get_data_type(node), DATA_INT, 0, 4);
- break;
-
case PARS_LIKE_TOKEN_EXACT:
case PARS_LIKE_TOKEN_PREFIX:
case PARS_LIKE_TOKEN_SUFFIX:
@@ -716,8 +623,7 @@ pars_resolve_exp_variables_and_types(
sym_node->indirection = node;
if (select_node) {
- UT_LIST_ADD_LAST(col_var_list, select_node->copy_variables,
- sym_node);
+ UT_LIST_ADD_LAST(select_node->copy_variables, sym_node);
}
dfield_set_type(que_node_get_val(sym_node),
@@ -869,7 +775,7 @@ pars_retrieve_table_def(
/*********************************************************************//**
Retrieves the table definitions for a list of table name ids.
-@return number of tables */
+@return number of tables */
static
ulint
pars_retrieve_table_list_defs(
@@ -936,8 +842,7 @@ pars_select_all_columns(
/*********************************************************************//**
Parses a select list; creates a query graph node for the whole SELECT
statement.
-@return own: select node in a query tree */
-UNIV_INTERN
+@return own: select node in a query tree */
sel_node_t*
pars_select_list(
/*=============*/
@@ -1001,8 +906,7 @@ pars_check_aggregate(
/*********************************************************************//**
Parses a select statement.
-@return own: select node in a query tree */
-UNIV_INTERN
+@return own: select node in a query tree */
sel_node_t*
pars_select_statement(
/*==================*/
@@ -1030,7 +934,7 @@ pars_select_statement(
== que_node_list_get_len(select_node->select_list));
}
- UT_LIST_INIT(select_node->copy_variables);
+ UT_LIST_INIT(select_node->copy_variables, &sym_node_t::col_var_list);
pars_resolve_exp_list_columns(table_list, select_node->select_list);
pars_resolve_exp_list_variables_and_types(select_node,
@@ -1084,8 +988,7 @@ pars_select_statement(
/*********************************************************************//**
Parses a cursor declaration.
-@return sym_node */
-UNIV_INTERN
+@return sym_node */
que_node_t*
pars_cursor_declaration(
/*====================*/
@@ -1105,8 +1008,7 @@ pars_cursor_declaration(
/*********************************************************************//**
Parses a function declaration.
-@return sym_node */
-UNIV_INTERN
+@return sym_node */
que_node_t*
pars_function_declaration(
/*======================*/
@@ -1125,8 +1027,7 @@ pars_function_declaration(
/*********************************************************************//**
Parses a delete or update statement start.
-@return own: update node in a query tree */
-UNIV_INTERN
+@return own: update node in a query tree */
upd_node_t*
pars_update_statement_start(
/*========================*/
@@ -1149,8 +1050,7 @@ pars_update_statement_start(
/*********************************************************************//**
Parses a column assignment in an update.
-@return column assignment node */
-UNIV_INTERN
+@return column assignment node */
col_assign_node_t*
pars_column_assignment(
/*===================*/
@@ -1235,7 +1135,7 @@ pars_process_assign_list(
upd_field_set_field_no(upd_field, dict_index_get_nth_col_pos(
clust_index, col_sym->col_no,
NULL),
- clust_index, NULL);
+ clust_index);
upd_field->exp = assign_node->val;
if (!dict_col_get_fixed_size(
@@ -1263,8 +1163,7 @@ pars_process_assign_list(
/*********************************************************************//**
Parses an update or delete statement.
-@return own: update node in a query tree */
-UNIV_INTERN
+@return own: update node in a query tree */
upd_node_t*
pars_update_statement(
/*==================*/
@@ -1282,7 +1181,7 @@ pars_update_statement(
pars_retrieve_table_def(table_sym);
node->table = table_sym->table;
- UT_LIST_INIT(node->columns);
+ UT_LIST_INIT(node->columns, &sym_node_t::col_var_list);
/* Make the single table node into a list of table nodes of length 1 */
@@ -1349,8 +1248,7 @@ pars_update_statement(
/*********************************************************************//**
Parses an insert statement.
-@return own: update node in a query tree */
-UNIV_INTERN
+@return own: update node in a query tree */
ins_node_t*
pars_insert_statement(
/*==================*/
@@ -1414,9 +1312,7 @@ pars_set_dfield_type(
pars_res_word_t* type, /*!< in: pointer to a type
token */
ulint len, /*!< in: length, or 0 */
- ibool is_unsigned, /*!< in: if TRUE, column is
- UNSIGNED. */
- ibool is_not_null) /*!< in: if TRUE, column is
+ bool is_not_null) /*!< in: whether the column is
NOT NULL. */
{
ulint flags = 0;
@@ -1425,10 +1321,6 @@ pars_set_dfield_type(
flags |= DATA_NOT_NULL;
}
- if (is_unsigned) {
- flags |= DATA_UNSIGNED;
- }
-
if (type == &pars_bigint_token) {
ut_a(len == 0);
@@ -1443,16 +1335,6 @@ pars_set_dfield_type(
dtype_set(dfield_get_type(dfield), DATA_VARCHAR,
DATA_ENGLISH | flags, len);
- } else if (type == &pars_binary_token) {
- ut_a(len != 0);
-
- dtype_set(dfield_get_type(dfield), DATA_FIXBINARY,
- DATA_BINARY_TYPE | flags, len);
- } else if (type == &pars_blob_token) {
- ut_a(len == 0);
-
- dtype_set(dfield_get_type(dfield), DATA_BLOB,
- DATA_BINARY_TYPE | flags, 0);
} else {
ut_error;
}
@@ -1460,8 +1342,7 @@ pars_set_dfield_type(
/*********************************************************************//**
Parses a variable declaration.
-@return own: symbol table node of type SYM_VAR */
-UNIV_INTERN
+@return own: symbol table node of type SYM_VAR */
sym_node_t*
pars_variable_declaration(
/*======================*/
@@ -1474,29 +1355,7 @@ pars_variable_declaration(
node->param_type = PARS_NOT_PARAM;
- pars_set_dfield_type(que_node_get_val(node), type, 0, FALSE, FALSE);
-
- return(node);
-}
-
-/*********************************************************************//**
-Parses a procedure parameter declaration.
-@return own: symbol table node of type SYM_VAR */
-UNIV_INTERN
-sym_node_t*
-pars_parameter_declaration(
-/*=======================*/
- sym_node_t* node, /*!< in: symbol table node allocated for the
- id of the parameter */
- ulint param_type,
- /*!< in: PARS_INPUT or PARS_OUTPUT */
- pars_res_word_t* type) /*!< in: pointer to a type token */
-{
- ut_a((param_type == PARS_INPUT) || (param_type == PARS_OUTPUT));
-
- pars_variable_declaration(node, type);
-
- node->param_type = param_type;
+ pars_set_dfield_type(que_node_get_val(node), type, 0, false);
return(node);
}
@@ -1524,8 +1383,7 @@ pars_set_parent_in_list(
/*********************************************************************//**
Parses an elsif element.
-@return elsif node */
-UNIV_INTERN
+@return elsif node */
elsif_node_t*
pars_elsif_element(
/*===============*/
@@ -1551,8 +1409,7 @@ pars_elsif_element(
/*********************************************************************//**
Parses an if-statement.
-@return if-statement node */
-UNIV_INTERN
+@return if-statement node */
if_node_t*
pars_if_statement(
/*==============*/
@@ -1605,8 +1462,7 @@ pars_if_statement(
/*********************************************************************//**
Parses a while-statement.
-@return while-statement node */
-UNIV_INTERN
+@return while-statement node */
while_node_t*
pars_while_statement(
/*=================*/
@@ -1634,8 +1490,7 @@ pars_while_statement(
/*********************************************************************//**
Parses a for-loop-statement.
-@return for-statement node */
-UNIV_INTERN
+@return for-statement node */
for_node_t*
pars_for_statement(
/*===============*/
@@ -1671,8 +1526,7 @@ pars_for_statement(
/*********************************************************************//**
Parses an exit statement.
-@return exit statement node */
-UNIV_INTERN
+@return exit statement node */
exit_node_t*
pars_exit_statement(void)
/*=====================*/
@@ -1688,8 +1542,7 @@ pars_exit_statement(void)
/*********************************************************************//**
Parses a return-statement.
-@return return-statement node */
-UNIV_INTERN
+@return return-statement node */
return_node_t*
pars_return_statement(void)
/*=======================*/
@@ -1706,8 +1559,7 @@ pars_return_statement(void)
/*********************************************************************//**
Parses an assignment statement.
-@return assignment statement node */
-UNIV_INTERN
+@return assignment statement node */
assign_node_t*
pars_assignment_statement(
/*======================*/
@@ -1735,8 +1587,7 @@ pars_assignment_statement(
/*********************************************************************//**
Parses a procedure call.
-@return function node */
-UNIV_INTERN
+@return function node */
func_node_t*
pars_procedure_call(
/*================*/
@@ -1755,8 +1606,7 @@ pars_procedure_call(
/*********************************************************************//**
Parses a fetch statement. into_list or user_func (but not both) must be
non-NULL.
-@return fetch statement node */
-UNIV_INTERN
+@return fetch statement node */
fetch_node_t*
pars_fetch_statement(
/*=================*/
@@ -1809,8 +1659,7 @@ pars_fetch_statement(
/*********************************************************************//**
Parses an open or close cursor statement.
-@return fetch statement node */
-UNIV_INTERN
+@return fetch statement node */
open_node_t*
pars_open_statement(
/*================*/
@@ -1841,8 +1690,7 @@ pars_open_statement(
/*********************************************************************//**
Parses a row_printf-statement.
-@return row_printf-statement node */
-UNIV_INTERN
+@return row_printf-statement node */
row_printf_node_t*
pars_row_printf_statement(
/*======================*/
@@ -1864,8 +1712,7 @@ pars_row_printf_statement(
/*********************************************************************//**
Parses a commit statement.
-@return own: commit node struct */
-UNIV_INTERN
+@return own: commit node struct */
commit_node_t*
pars_commit_statement(void)
/*=======================*/
@@ -1875,8 +1722,7 @@ pars_commit_statement(void)
/*********************************************************************//**
Parses a rollback statement.
-@return own: rollback node struct */
-UNIV_INTERN
+@return own: rollback node struct */
roll_node_t*
pars_rollback_statement(void)
/*=========================*/
@@ -1886,8 +1732,7 @@ pars_rollback_statement(void)
/*********************************************************************//**
Parses a column definition at a table creation.
-@return column sym table node */
-UNIV_INTERN
+@return column sym table node */
sym_node_t*
pars_column_def(
/*============*/
@@ -1896,8 +1741,6 @@ pars_column_def(
pars_res_word_t* type, /*!< in: data type */
sym_node_t* len, /*!< in: length of column, or
NULL */
- void* is_unsigned, /*!< in: if not NULL, column
- is of type UNSIGNED. */
void* is_not_null) /*!< in: if not NULL, column
is of type NOT NULL. */
{
@@ -1910,34 +1753,20 @@ pars_column_def(
}
pars_set_dfield_type(que_node_get_val(sym_node), type, len2,
- is_unsigned != NULL, is_not_null != NULL);
+ is_not_null != NULL);
return(sym_node);
}
/*********************************************************************//**
Parses a table creation operation.
-@return table create subgraph */
-UNIV_INTERN
+@return table create subgraph */
tab_node_t*
pars_create_table(
/*==============*/
sym_node_t* table_sym, /*!< in: table name node in the symbol
table */
- sym_node_t* column_defs, /*!< in: list of column names */
- sym_node_t* compact, /* in: non-NULL if COMPACT table. */
- sym_node_t* block_size, /* in: block size (can be NULL) */
- void* not_fit_in_memory MY_ATTRIBUTE((unused)))
- /*!< in: a non-NULL pointer means that
- this is a table which in simulations
- should be simulated as not fitting
- in memory; thread is put to sleep
- to simulate disk accesses; NOTE that
- this flag is not stored to the data
- dictionary on disk, and the database
- will forget about non-NULL value if
- it has to reload the table definition
- from disk */
+ sym_node_t* column_defs) /*!< in: list of column names */
{
dict_table_t* table;
sym_node_t* column;
@@ -1945,67 +1774,16 @@ pars_create_table(
const dtype_t* dtype;
ulint n_cols;
ulint flags = 0;
- ulint flags2 = 0;
-
- if (compact != NULL) {
-
- /* System tables currently only use the REDUNDANT row
- format therefore the check for srv_file_per_table should be
- safe for now. */
-
- flags |= DICT_TF_COMPACT;
-
- /* FIXME: Ideally this should be part of the SQL syntax
- or use some other mechanism. We want to reduce dependency
- on global variables. There is an inherent race here but
- that has always existed around this variable. */
- if (srv_file_per_table) {
- flags2 |= DICT_TF2_USE_TABLESPACE;
- }
- }
-
- if (block_size != NULL) {
- ulint size;
- dfield_t* dfield;
-
- dfield = que_node_get_val(block_size);
+ ulint flags2 = DICT_TF2_FTS_AUX_HEX_NAME;
- ut_a(dfield_get_len(dfield) == 4);
- size = mach_read_from_4(static_cast<byte*>(
- dfield_get_data(dfield)));
-
-
- switch (size) {
- case 0:
- break;
-
- case 1: case 2: case 4: case 8: case 16:
- flags |= DICT_TF_COMPACT;
- /* FTS-FIXME: needs the zip changes */
- /* flags |= size << DICT_TF_COMPRESSED_SHIFT; */
- break;
-
- default:
- ut_error;
- }
- }
-
- /* Set the flags2 when create table or alter tables */
- flags2 |= DICT_TF2_FTS_AUX_HEX_NAME;
DBUG_EXECUTE_IF("innodb_test_wrong_fts_aux_table_name",
flags2 &= ~DICT_TF2_FTS_AUX_HEX_NAME;);
-
n_cols = que_node_list_get_len(column_defs);
table = dict_mem_table_create(
- table_sym->name, 0, n_cols, flags, flags2);
+ table_sym->name, 0, n_cols, 0, flags, flags2);
-#ifdef UNIV_DEBUG
- if (not_fit_in_memory != NULL) {
- table->does_not_fit_in_memory = TRUE;
- }
-#endif /* UNIV_DEBUG */
column = column_defs;
while (column) {
@@ -2020,7 +1798,7 @@ pars_create_table(
column = static_cast<sym_node_t*>(que_node_get_next(column));
}
- node = tab_create_graph_create(table, pars_sym_tab_global->heap, true,
+ node = tab_create_graph_create(table, pars_sym_tab_global->heap,
FIL_ENCRYPTION_DEFAULT, FIL_DEFAULT_ENCRYPTION_KEY);
table_sym->resolved = TRUE;
@@ -2031,8 +1809,7 @@ pars_create_table(
/*********************************************************************//**
Parses an index creation operation.
-@return index create subgraph */
-UNIV_INTERN
+@return index create subgraph */
ind_node_t*
pars_create_index(
/*==============*/
@@ -2075,7 +1852,7 @@ pars_create_index(
column = static_cast<sym_node_t*>(que_node_get_next(column));
}
- node = ind_create_graph_create(index, pars_sym_tab_global->heap, true);
+ node = ind_create_graph_create(index, pars_sym_tab_global->heap, NULL);
table_sym->resolved = TRUE;
table_sym->token_type = SYM_TABLE;
@@ -2088,14 +1865,12 @@ pars_create_index(
/*********************************************************************//**
Parses a procedure definition.
-@return query fork node */
-UNIV_INTERN
+@return query fork node */
que_fork_t*
pars_procedure_definition(
/*======================*/
sym_node_t* sym_node, /*!< in: procedure id node in the symbol
table */
- sym_node_t* param_list, /*!< in: parameter declaration list */
que_node_t* stat_list) /*!< in: statement list */
{
proc_node_t* node;
@@ -2108,7 +1883,7 @@ pars_procedure_definition(
fork = que_fork_create(NULL, NULL, QUE_FORK_PROCEDURE, heap);
fork->trx = NULL;
- thr = que_thr_create(fork, heap);
+ thr = que_thr_create(fork, heap, NULL);
node = static_cast<proc_node_t*>(
mem_heap_alloc(heap, sizeof(proc_node_t)));
@@ -2120,7 +1895,6 @@ pars_procedure_definition(
sym_node->resolved = TRUE;
node->proc_id = sym_node;
- node->param_list = param_list;
node->stat_list = stat_list;
pars_set_parent_in_list(stat_list, node);
@@ -2139,8 +1913,7 @@ Parses a stored procedure call, when this is not within another stored
procedure, that is, the client issues a procedure call directly.
In MySQL/InnoDB, stored InnoDB procedures are invoked via the
parsed procedure tree, not via InnoDB SQL, so this function is not used.
-@return query graph */
-UNIV_INTERN
+@return query graph */
que_fork_t*
pars_stored_procedure_call(
/*=======================*/
@@ -2153,7 +1926,6 @@ pars_stored_procedure_call(
/*************************************************************//**
Retrieves characters to the lexical analyzer. */
-UNIV_INTERN
size_t
pars_get_lex_chars(
/*===============*/
@@ -2161,13 +1933,9 @@ pars_get_lex_chars(
size_t max_size) /*!< in: maximum number of characters which fit
in the buffer */
{
- size_t len = size_t(
- pars_sym_tab_global->string_len
- - pars_sym_tab_global->next_char_pos);
+ size_t len = pars_sym_tab_global->string_len
+ - pars_sym_tab_global->next_char_pos;
if (len == 0) {
-#ifdef YYDEBUG
- /* fputs("SQL string ends\n", stderr); */
-#endif
return(0);
}
@@ -2175,21 +1943,8 @@ pars_get_lex_chars(
len = max_size;
}
-#ifdef UNIV_SQL_DEBUG
- if (pars_print_lexed) {
-
- if (len >= 5) {
- len = 5;
- }
-
- fwrite(pars_sym_tab_global->sql_string
- + pars_sym_tab_global->next_char_pos,
- 1, len, stderr);
- }
-#endif /* UNIV_SQL_DEBUG */
-
- ut_memcpy(buf, pars_sym_tab_global->sql_string
- + pars_sym_tab_global->next_char_pos, len);
+ memcpy(buf, pars_sym_tab_global->sql_string
+ + pars_sym_tab_global->next_char_pos, len);
pars_sym_tab_global->next_char_pos += len;
@@ -2198,7 +1953,6 @@ pars_get_lex_chars(
/*************************************************************//**
Called by yyparse on error. */
-UNIV_INTERN
void
yyerror(
/*====*/
@@ -2207,15 +1961,12 @@ yyerror(
{
ut_ad(s);
- fputs("PARSER ERROR: Syntax error in SQL string\n", stderr);
-
- ut_error;
+ ib::fatal() << "PARSER: Syntax error in SQL string";
}
/*************************************************************//**
Parses an SQL string returning the query graph.
-@return own: the query graph */
-UNIV_INTERN
+@return own: the query graph */
que_t*
pars_sql(
/*=====*/
@@ -2231,7 +1982,7 @@ pars_sql(
heap = mem_heap_create(16000);
/* Currently, the parser is not reentrant: */
- ut_ad(mutex_own(&(dict_sys->mutex)));
+ ut_ad(mutex_own(&dict_sys->mutex));
pars_sym_tab_global = sym_tab_create(heap);
@@ -2263,19 +2014,21 @@ pars_sql(
return(graph);
}
-/******************************************************************//**
-Completes a query graph by adding query thread and fork nodes
+/** Completes a query graph by adding query thread and fork nodes
above it and prepares the graph for running. The fork created is of
type QUE_FORK_MYSQL_INTERFACE.
-@return query thread node to run */
-UNIV_INTERN
+@param[in] node root node for an incomplete query
+ graph, or NULL for dummy graph
+@param[in] trx transaction handle
+@param[in] heap memory heap from which allocated
+@param[in] prebuilt row prebuilt structure
+@return query thread node to run */
que_thr_t*
pars_complete_graph_for_exec(
-/*=========================*/
- que_node_t* node, /*!< in: root node for an incomplete
- query graph, or NULL for dummy graph */
- trx_t* trx, /*!< in: transaction handle */
- mem_heap_t* heap) /*!< in: memory heap from which allocated */
+ que_node_t* node,
+ trx_t* trx,
+ mem_heap_t* heap,
+ row_prebuilt_t* prebuilt)
{
que_fork_t* fork;
que_thr_t* thr;
@@ -2283,7 +2036,7 @@ pars_complete_graph_for_exec(
fork = que_fork_create(NULL, NULL, QUE_FORK_MYSQL_INTERFACE, heap);
fork->trx = trx;
- thr = que_thr_create(fork, heap);
+ thr = que_thr_create(fork, heap, prebuilt);
thr->child = node;
@@ -2298,8 +2051,7 @@ pars_complete_graph_for_exec(
/****************************************************************//**
Create parser info struct.
-@return own: info struct */
-UNIV_INTERN
+@return own: info struct */
pars_info_t*
pars_info_create(void)
/*==================*/
@@ -2322,7 +2074,6 @@ pars_info_create(void)
/****************************************************************//**
Free info struct and everything it contains. */
-UNIV_INTERN
void
pars_info_free(
/*===========*/
@@ -2333,7 +2084,6 @@ pars_info_free(
/****************************************************************//**
Add bound literal. */
-UNIV_INTERN
void
pars_info_add_literal(
/*==================*/
@@ -2373,7 +2123,6 @@ pars_info_add_literal(
/****************************************************************//**
Equivalent to pars_info_add_literal(info, name, str, strlen(str),
DATA_VARCHAR, DATA_ENGLISH). */
-UNIV_INTERN
void
pars_info_add_str_literal(
/*======================*/
@@ -2388,7 +2137,6 @@ pars_info_add_str_literal(
/********************************************************************
If the literal value already exists then it rebinds otherwise it
creates a new entry.*/
-UNIV_INTERN
void
pars_info_bind_literal(
/*===================*/
@@ -2417,7 +2165,6 @@ pars_info_bind_literal(
/********************************************************************
If the literal value already exists then it rebinds otherwise it
creates a new entry.*/
-UNIV_INTERN
void
pars_info_bind_varchar_literal(
/*===========================*/
@@ -2451,7 +2198,6 @@ pars_info_add_literal(info, name, buf, 4, DATA_INT, 0);
except that the buffer is dynamically allocated from the info struct's
heap. */
-UNIV_INTERN
void
pars_info_add_int4_literal(
/*=======================*/
@@ -2468,7 +2214,6 @@ pars_info_add_int4_literal(
/********************************************************************
If the literal value already exists then it rebinds otherwise it
creates a new entry. */
-UNIV_INTERN
void
pars_info_bind_int4_literal(
/*========================*/
@@ -2494,7 +2239,6 @@ pars_info_bind_int4_literal(
/********************************************************************
If the literal value already exists then it rebinds otherwise it
creates a new entry. */
-UNIV_INTERN
void
pars_info_bind_int8_literal(
/*========================*/
@@ -2527,7 +2271,6 @@ pars_info_add_literal(info, name, buf, 8, DATA_FIXBINARY, 0);
except that the buffer is dynamically allocated from the info struct's
heap. */
-UNIV_INTERN
void
pars_info_add_ull_literal(
/*======================*/
@@ -2545,7 +2288,6 @@ pars_info_add_ull_literal(
/****************************************************************//**
If the literal value already exists then it rebinds otherwise it
creates a new entry. */
-UNIV_INTERN
void
pars_info_bind_ull_literal(
/*=======================*/
@@ -2571,7 +2313,6 @@ pars_info_bind_ull_literal(
/****************************************************************//**
Add user function. */
-UNIV_INTERN
void
pars_info_bind_function(
/*====================*/
@@ -2606,7 +2347,6 @@ pars_info_bind_function(
/********************************************************************
Add bound id. */
-UNIV_INTERN
void
pars_info_bind_id(
/*==============*/
@@ -2643,7 +2383,6 @@ pars_info_bind_id(
/********************************************************************
Get bound identifier with the given name.*/
-
pars_bound_id_t*
pars_info_get_bound_id(
/*===================*/
@@ -2657,8 +2396,7 @@ pars_info_get_bound_id(
/****************************************************************//**
Get bound literal with the given name.
-@return bound literal, or NULL if not found */
-UNIV_INTERN
+@return bound literal, or NULL if not found */
pars_bound_lit_t*
pars_info_get_bound_lit(
/*====================*/
diff --git a/storage/innobase/pars/pars0sym.cc b/storage/innobase/pars/pars0sym.cc
index 4ce1946d7be..6e416d7b635 100644
--- a/storage/innobase/pars/pars0sym.cc
+++ b/storage/innobase/pars/pars0sym.cc
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1997, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1997, 2016, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -24,11 +24,6 @@ Created 12/15/1997 Heikki Tuuri
*******************************************************/
#include "pars0sym.h"
-
-#ifdef UNIV_NONINL
-#include "pars0sym.ic"
-#endif
-
#include "mem0mem.h"
#include "data0type.h"
#include "data0data.h"
@@ -40,8 +35,7 @@ Created 12/15/1997 Heikki Tuuri
/******************************************************************//**
Creates a symbol table for a single stored procedure or query.
-@return own: symbol table */
-UNIV_INTERN
+@return own: symbol table */
sym_tab_t*
sym_tab_create(
/*===========*/
@@ -52,8 +46,8 @@ sym_tab_create(
sym_tab = static_cast<sym_tab_t*>(
mem_heap_alloc(heap, sizeof(sym_tab_t)));
- UT_LIST_INIT(sym_tab->sym_list);
- UT_LIST_INIT(sym_tab->func_node_list);
+ UT_LIST_INIT(sym_tab->sym_list, &sym_node_t::sym_list);
+ UT_LIST_INIT(sym_tab->func_node_list, &func_node_t::func_node_list);
sym_tab->heap = heap;
@@ -65,7 +59,6 @@ sym_tab_create(
Frees the memory allocated dynamically AFTER parsing phase for variables
etc. in the symbol table. Does not free the mem heap where the table was
originally created. Frees also SQL explicit cursor definitions. */
-UNIV_INTERN
void
sym_tab_free_private(
/*=================*/
@@ -112,8 +105,7 @@ sym_tab_free_private(
/******************************************************************//**
Adds an integer literal to a symbol table.
-@return symbol table node */
-UNIV_INTERN
+@return symbol table node */
sym_node_t*
sym_tab_add_int_lit(
/*================*/
@@ -145,7 +137,7 @@ sym_tab_add_int_lit(
node->prefetch_buf = NULL;
node->cursor_def = NULL;
- UT_LIST_ADD_LAST(sym_list, sym_tab->sym_list, node);
+ UT_LIST_ADD_LAST(sym_tab->sym_list, node);
node->like_node = NULL;
@@ -156,8 +148,7 @@ sym_tab_add_int_lit(
/******************************************************************//**
Adds a string literal to a symbol table.
-@return symbol table node */
-UNIV_INTERN
+@return symbol table node */
sym_node_t*
sym_tab_add_str_lit(
/*================*/
@@ -184,7 +175,7 @@ sym_tab_add_str_lit(
DATA_VARCHAR, DATA_ENGLISH, 0);
data = (len) ? static_cast<byte*>(mem_heap_dup(sym_tab->heap, str, len))
- : NULL;
+ : NULL;
dfield_set_data(&(node->common.val), data, len);
@@ -192,7 +183,7 @@ sym_tab_add_str_lit(
node->prefetch_buf = NULL;
node->cursor_def = NULL;
- UT_LIST_ADD_LAST(sym_list, sym_tab->sym_list, node);
+ UT_LIST_ADD_LAST(sym_tab->sym_list, node);
node->like_node = NULL;
@@ -203,8 +194,7 @@ sym_tab_add_str_lit(
/******************************************************************//**
Add a bound literal to a symbol table.
-@return symbol table node */
-UNIV_INTERN
+@return symbol table node */
sym_node_t*
sym_tab_add_bound_lit(
/*==================*/
@@ -233,25 +223,15 @@ sym_tab_add_bound_lit(
switch (blit->type) {
case DATA_FIXBINARY:
+ case DATA_CHAR:
+ ut_ad(blit->length > 0);
len = blit->length;
- *lit_type = PARS_FIXBINARY_LIT;
- break;
-
+ /* fall through */
case DATA_BLOB:
- *lit_type = PARS_BLOB_LIT;
- break;
-
case DATA_VARCHAR:
*lit_type = PARS_STR_LIT;
break;
- case DATA_CHAR:
- ut_a(blit->length > 0);
-
- len = blit->length;
- *lit_type = PARS_STR_LIT;
- break;
-
case DATA_INT:
ut_a(blit->length > 0);
ut_a(blit->length <= 8);
@@ -273,7 +253,7 @@ sym_tab_add_bound_lit(
node->prefetch_buf = NULL;
node->cursor_def = NULL;
- UT_LIST_ADD_LAST(sym_list, sym_tab->sym_list, node);
+ UT_LIST_ADD_LAST(sym_tab->sym_list, node);
blit->node = node;
node->like_node = NULL;
@@ -284,7 +264,6 @@ sym_tab_add_bound_lit(
/**********************************************************************
Rebind literal to a node in the symbol table. */
-
sym_node_t*
sym_tab_rebind_lit(
/*===============*/
@@ -328,8 +307,7 @@ sym_tab_rebind_lit(
/******************************************************************//**
Adds an SQL null literal to a symbol table.
-@return symbol table node */
-UNIV_INTERN
+@return symbol table node */
sym_node_t*
sym_tab_add_null_lit(
/*=================*/
@@ -356,7 +334,7 @@ sym_tab_add_null_lit(
node->prefetch_buf = NULL;
node->cursor_def = NULL;
- UT_LIST_ADD_LAST(sym_list, sym_tab->sym_list, node);
+ UT_LIST_ADD_LAST(sym_tab->sym_list, node);
node->like_node = NULL;
@@ -367,8 +345,7 @@ sym_tab_add_null_lit(
/******************************************************************//**
Adds an identifier to a symbol table.
-@return symbol table node */
-UNIV_INTERN
+@return symbol table node */
sym_node_t*
sym_tab_add_id(
/*===========*/
@@ -386,7 +363,7 @@ sym_tab_add_id(
node->name = mem_heap_strdupl(sym_tab->heap, (char*) name, len);
node->name_len = len;
- UT_LIST_ADD_LAST(sym_list, sym_tab->sym_list, node);
+ UT_LIST_ADD_LAST(sym_tab->sym_list, node);
dfield_set_null(&node->common.val);
@@ -397,8 +374,7 @@ sym_tab_add_id(
/******************************************************************//**
Add a bound identifier to a symbol table.
-@return symbol table node */
-UNIV_INTERN
+@return symbol table node */
sym_node_t*
sym_tab_add_bound_id(
/*=================*/
@@ -424,7 +400,7 @@ sym_tab_add_bound_id(
node->name = mem_heap_strdup(sym_tab->heap, bid->id);
node->name_len = strlen(node->name);
- UT_LIST_ADD_LAST(sym_list, sym_tab->sym_list, node);
+ UT_LIST_ADD_LAST(sym_tab->sym_list, node);
dfield_set_null(&node->common.val);
diff --git a/storage/innobase/que/que0que.cc b/storage/innobase/que/que0que.cc
index 602b92854db..3ad948af4d2 100644
--- a/storage/innobase/que/que0que.cc
+++ b/storage/innobase/que/que0que.cc
@@ -1,6 +1,7 @@
/*****************************************************************************
-Copyright (c) 1996, 2015, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2017, 2018, 2020 MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -24,12 +25,6 @@ Created 5/27/1996 Heikki Tuuri
*******************************************************/
#include "que0que.h"
-
-#ifdef UNIV_NONINL
-#include "que0que.ic"
-#endif
-
-#include "usr0sess.h"
#include "trx0trx.h"
#include "trx0roll.h"
#include "row0undo.h"
@@ -40,18 +35,9 @@ Created 5/27/1996 Heikki Tuuri
#include "dict0crea.h"
#include "log0log.h"
#include "eval0proc.h"
-#include "lock0lock.h"
-#include "eval0eval.h"
-#include "pars0types.h"
#define QUE_MAX_LOOPS_WITHOUT_CHECK 16
-#ifdef UNIV_DEBUG
-/* If the following flag is set TRUE, the module will print trace info
-of SQL execution in the UNIV_SQL_DEBUG version */
-UNIV_INTERN ibool que_trace_on = FALSE;
-#endif /* UNIV_DEBUG */
-
/* Short introduction to query graphs
==================================
@@ -123,8 +109,7 @@ que_thr_move_to_run_state(
/***********************************************************************//**
Creates a query graph fork node.
-@return own: fork node */
-UNIV_INTERN
+@return own: fork node */
que_fork_t*
que_fork_create(
/*============*/
@@ -153,22 +138,27 @@ que_fork_create(
fork->graph = (graph != NULL) ? graph : fork;
+ UT_LIST_INIT(fork->thrs, &que_thr_t::thrs);
+
return(fork);
}
-/***********************************************************************//**
-Creates a query graph thread node.
-@return own: query thread node */
-UNIV_INTERN
+
+/** Creates a query graph thread node.
+@param[in] parent parent node, i.e., a fork node
+@param[in] heap memory heap where created
+@param[in] prebuilt row prebuilt structure
+@return own: query thread node */
que_thr_t*
que_thr_create(
-/*===========*/
- que_fork_t* parent, /*!< in: parent node, i.e., a fork node */
- mem_heap_t* heap) /*!< in: memory heap where created */
+ que_fork_t* parent,
+ mem_heap_t* heap,
+ row_prebuilt_t* prebuilt)
{
que_thr_t* thr;
- ut_ad(parent && heap);
+ ut_ad(parent != NULL);
+ ut_ad(heap != NULL);
thr = static_cast<que_thr_t*>(mem_heap_zalloc(heap, sizeof(*thr)));
@@ -184,7 +174,9 @@ que_thr_create(
thr->lock_state = QUE_THR_LOCK_NOLOCK;
- UT_LIST_ADD_LAST(thrs, parent->thrs, thr);
+ thr->prebuilt = prebuilt;
+
+ UT_LIST_ADD_LAST(parent->thrs, thr);
return(thr);
}
@@ -195,12 +187,11 @@ a worker thread to execute it. This function should be used to end
the wait state of a query thread waiting for a lock or a stored procedure
completion.
@return the query thread that needs to be released. */
-UNIV_INTERN
que_thr_t*
que_thr_end_lock_wait(
/*==================*/
trx_t* trx) /*!< in: transaction with que_state in
- QUE_THR_LOCK_WAIT */
+ QUE_THR_LOCK_WAIT */
{
que_thr_t* thr;
ibool was_active;
@@ -249,7 +240,6 @@ Round robin scheduler.
@return a query thread of the graph moved to QUE_THR_RUNNING state, or
NULL; the query thread should be executed by que_run_threads by the
caller */
-UNIV_INTERN
que_thr_t*
que_fork_scheduler_round_robin(
/*===========================*/
@@ -299,7 +289,6 @@ is returned.
@return a query thread of the graph moved to QUE_THR_RUNNING state, or
NULL; the query thread should be executed by que_run_threads by the
caller */
-UNIV_INTERN
que_thr_t*
que_fork_start_command(
/*===================*/
@@ -358,9 +347,10 @@ que_fork_start_command(
break;
+ case QUE_THR_RUNNING:
case QUE_THR_LOCK_WAIT:
+ case QUE_THR_PROCEDURE_WAIT:
ut_error;
-
}
}
@@ -398,7 +388,6 @@ que_graph_free_stat_list(
/**********************************************************************//**
Frees a query graph, but not the heap where it was created. Does not free
explicit cursor declarations, they are freed in que_graph_free. */
-UNIV_INTERN
void
que_graph_free_recursive(
/*=====================*/
@@ -414,11 +403,17 @@ que_graph_free_recursive(
ind_node_t* cre_ind;
purge_node_t* purge;
+ DBUG_ENTER("que_graph_free_recursive");
+
if (node == NULL) {
- return;
+ DBUG_VOID_RETURN;
}
+ DBUG_PRINT("que_graph_free_recursive",
+ ("node: %p, type: " ULINTPF, node,
+ que_node_get_type(node)));
+
switch (que_node_get_type(node)) {
case QUE_NODE_FORK:
@@ -437,14 +432,7 @@ que_graph_free_recursive(
thr = static_cast<que_thr_t*>(node);
- if (thr->magic_n != QUE_THR_MAGIC_N) {
- fprintf(stderr,
- "que_thr struct appears corrupt;"
- " magic n %lu\n",
- (unsigned long) thr->magic_n);
- mem_analyze_corruption(thr);
- ut_error;
- }
+ ut_a(thr->magic_n == QUE_THR_MAGIC_N);
thr->magic_n = QUE_THR_MAGIC_FREED;
@@ -470,8 +458,14 @@ que_graph_free_recursive(
ins = static_cast<ins_node_t*>(node);
que_graph_free_recursive(ins->select);
+ ins->select = NULL;
+
+ ins->~ins_node_t();
- mem_heap_free(ins->entry_sys_heap);
+ if (ins->entry_sys_heap != NULL) {
+ mem_heap_free(ins->entry_sys_heap);
+ ins->entry_sys_heap = NULL;
+ }
break;
case QUE_NODE_PURGE:
@@ -482,23 +476,28 @@ que_graph_free_recursive(
break;
case QUE_NODE_UPDATE:
-
upd = static_cast<upd_node_t*>(node);
if (upd->in_mysql_interface) {
btr_pcur_free_for_mysql(upd->pcur);
+ upd->in_mysql_interface = FALSE;
}
que_graph_free_recursive(upd->cascade_node);
if (upd->cascade_heap) {
mem_heap_free(upd->cascade_heap);
+ upd->cascade_heap = NULL;
}
que_graph_free_recursive(upd->select);
+ upd->select = NULL;
- mem_heap_free(upd->heap);
+ if (upd->heap != NULL) {
+ mem_heap_free(upd->heap);
+ upd->heap = NULL;
+ }
break;
case QUE_NODE_CREATE_TABLE:
@@ -506,7 +505,7 @@ que_graph_free_recursive(
que_graph_free_recursive(cre_tab->tab_def);
que_graph_free_recursive(cre_tab->col_def);
- que_graph_free_recursive(cre_tab->commit_node);
+ que_graph_free_recursive(cre_tab->v_col_def);
mem_heap_free(cre_tab->heap);
@@ -516,7 +515,6 @@ que_graph_free_recursive(
que_graph_free_recursive(cre_ind->ind_def);
que_graph_free_recursive(cre_ind->field_def);
- que_graph_free_recursive(cre_ind->commit_node);
mem_heap_free(cre_ind->heap);
@@ -559,17 +557,14 @@ que_graph_free_recursive(
break;
default:
- fprintf(stderr,
- "que_node struct appears corrupt; type %lu\n",
- (unsigned long) que_node_get_type(node));
- mem_analyze_corruption(node);
ut_error;
}
+
+ DBUG_VOID_RETURN;
}
/**********************************************************************//**
Frees a query graph. */
-UNIV_INTERN
void
que_graph_free(
/*===========*/
@@ -600,7 +595,7 @@ que_graph_free(
/****************************************************************//**
Performs an execution step on a thr node.
-@return query thread to run next, or NULL if none */
+@return query thread to run next, or NULL if none */
static
que_thr_t*
que_thr_node_step(
@@ -669,8 +664,7 @@ que_thr_move_to_run_state(
/**********************************************************************//**
Stops a query thread if graph or trx is in a state requiring it. The
conditions are tested in the order (1) graph, (2) trx.
-@return TRUE if stopped */
-UNIV_INTERN
+@return TRUE if stopped */
ibool
que_thr_stop(
/*=========*/
@@ -781,7 +775,6 @@ A patch for MySQL used to 'stop' a dummy query thread used in MySQL. The
query thread is stopped and made inactive, except in the case where
it was put to the lock wait state in lock0lock.cc, but the lock has already
been granted or the transaction chosen as a victim in deadlock resolution. */
-UNIV_INTERN
void
que_thr_stop_for_mysql(
/*===================*/
@@ -791,9 +784,6 @@ que_thr_stop_for_mysql(
trx = thr_get_trx(thr);
- /* Can't be the purge transaction. */
- ut_a(trx->id != 0);
-
trx_mutex_enter(trx);
if (thr->state == QUE_THR_RUNNING) {
@@ -830,22 +820,13 @@ que_thr_stop_for_mysql(
Moves a thread from another state to the QUE_THR_RUNNING state. Increments
the n_active_thrs counters of the query graph and transaction if thr was
not active. */
-UNIV_INTERN
void
que_thr_move_to_run_state_for_mysql(
/*================================*/
que_thr_t* thr, /*!< in: an query thread */
trx_t* trx) /*!< in: transaction */
{
- if (thr->magic_n != QUE_THR_MAGIC_N) {
- fprintf(stderr,
- "que_thr struct appears corrupt; magic n %lu\n",
- (unsigned long) thr->magic_n);
-
- mem_analyze_corruption(thr);
-
- ut_error;
- }
+ ut_a(thr->magic_n == QUE_THR_MAGIC_N);
if (!thr->is_active) {
@@ -862,7 +843,6 @@ que_thr_move_to_run_state_for_mysql(
/**********************************************************************//**
A patch for MySQL used to 'stop' a dummy query thread used in MySQL
select, when there is no error or lock wait. */
-UNIV_INTERN
void
que_thr_stop_for_mysql_no_error(
/*============================*/
@@ -870,20 +850,10 @@ que_thr_stop_for_mysql_no_error(
trx_t* trx) /*!< in: transaction */
{
ut_ad(thr->state == QUE_THR_RUNNING);
- ut_ad(thr_get_trx(thr)->id != 0);
ut_ad(thr->is_active == TRUE);
ut_ad(trx->lock.n_active_thrs == 1);
ut_ad(thr->graph->n_active_thrs == 1);
-
- if (thr->magic_n != QUE_THR_MAGIC_N) {
- fprintf(stderr,
- "que_thr struct appears corrupt; magic n %lu\n",
- (unsigned long) thr->magic_n);
-
- mem_analyze_corruption(thr);
-
- ut_error;
- }
+ ut_a(thr->magic_n == QUE_THR_MAGIC_N);
thr->state = QUE_THR_COMPLETED;
@@ -896,8 +866,7 @@ que_thr_stop_for_mysql_no_error(
/****************************************************************//**
Get the first containing loop node (e.g. while_node_t or for_node_t) for the
given node, or NULL if the node is not within a loop.
-@return containing loop node, or NULL. */
-UNIV_INTERN
+@return containing loop node, or NULL. */
que_node_t*
que_node_get_containing_loop_node(
/*==============================*/
@@ -924,68 +893,64 @@ que_node_get_containing_loop_node(
return(node);
}
-/**********************************************************************//**
-Prints info of an SQL query graph node. */
-UNIV_INTERN
-void
-que_node_print_info(
-/*================*/
- que_node_t* node) /*!< in: query graph node */
+#ifndef DBUG_OFF
+/** Gets information of an SQL query graph node.
+@return type description */
+static MY_ATTRIBUTE((warn_unused_result, nonnull))
+const char*
+que_node_type_string(
+/*=================*/
+ const que_node_t* node) /*!< in: query graph node */
{
- ulint type;
- const char* str;
-
- type = que_node_get_type(node);
-
- if (type == QUE_NODE_SELECT) {
- str = "SELECT";
- } else if (type == QUE_NODE_INSERT) {
- str = "INSERT";
- } else if (type == QUE_NODE_UPDATE) {
- str = "UPDATE";
- } else if (type == QUE_NODE_WHILE) {
- str = "WHILE";
- } else if (type == QUE_NODE_ASSIGNMENT) {
- str = "ASSIGNMENT";
- } else if (type == QUE_NODE_IF) {
- str = "IF";
- } else if (type == QUE_NODE_FETCH) {
- str = "FETCH";
- } else if (type == QUE_NODE_OPEN) {
- str = "OPEN";
- } else if (type == QUE_NODE_PROC) {
- str = "STORED PROCEDURE";
- } else if (type == QUE_NODE_FUNC) {
- str = "FUNCTION";
- } else if (type == QUE_NODE_LOCK) {
- str = "LOCK";
- } else if (type == QUE_NODE_THR) {
- str = "QUERY THREAD";
- } else if (type == QUE_NODE_COMMIT) {
- str = "COMMIT";
- } else if (type == QUE_NODE_UNDO) {
- str = "UNDO ROW";
- } else if (type == QUE_NODE_PURGE) {
- str = "PURGE ROW";
- } else if (type == QUE_NODE_ROLLBACK) {
- str = "ROLLBACK";
- } else if (type == QUE_NODE_CREATE_TABLE) {
- str = "CREATE TABLE";
- } else if (type == QUE_NODE_CREATE_INDEX) {
- str = "CREATE INDEX";
- } else if (type == QUE_NODE_FOR) {
- str = "FOR LOOP";
- } else if (type == QUE_NODE_RETURN) {
- str = "RETURN";
- } else if (type == QUE_NODE_EXIT) {
- str = "EXIT";
- } else {
- str = "UNKNOWN NODE TYPE";
+ switch (que_node_get_type(node)) {
+ case QUE_NODE_SELECT:
+ return("SELECT");
+ case QUE_NODE_INSERT:
+ return("INSERT");
+ case QUE_NODE_UPDATE:
+ return("UPDATE");
+ case QUE_NODE_WHILE:
+ return("WHILE");
+ case QUE_NODE_ASSIGNMENT:
+ return("ASSIGNMENT");
+ case QUE_NODE_IF:
+ return("IF");
+ case QUE_NODE_FETCH:
+ return("FETCH");
+ case QUE_NODE_OPEN:
+ return("OPEN");
+ case QUE_NODE_PROC:
+ return("STORED PROCEDURE");
+ case QUE_NODE_FUNC:
+ return("FUNCTION");
+ case QUE_NODE_LOCK:
+ return("LOCK");
+ case QUE_NODE_THR:
+ return("QUERY THREAD");
+ case QUE_NODE_COMMIT:
+ return("COMMIT");
+ case QUE_NODE_UNDO:
+ return("UNDO ROW");
+ case QUE_NODE_PURGE:
+ return("PURGE ROW");
+ case QUE_NODE_ROLLBACK:
+ return("ROLLBACK");
+ case QUE_NODE_CREATE_TABLE:
+ return("CREATE TABLE");
+ case QUE_NODE_CREATE_INDEX:
+ return("CREATE INDEX");
+ case QUE_NODE_FOR:
+ return("FOR LOOP");
+ case QUE_NODE_RETURN:
+ return("RETURN");
+ case QUE_NODE_EXIT:
+ return("EXIT");
+ default:
+ ut_ad(0);
+ return("UNKNOWN NODE TYPE");
}
-
- fprintf(stderr, "Node type %lu: %s, address %p\n",
- (ulong) type, str, (void*) node);
}
+#endif /* !DBUG_OFF */
/**********************************************************************//**
Performs an execution step on a query thread.
@@ -1014,12 +979,10 @@ que_thr_step(
old_thr = thr;
-#ifdef UNIV_DEBUG
- if (que_trace_on) {
- fputs("To execute: ", stderr);
- que_node_print_info(node);
- }
-#endif
+ DBUG_PRINT("ib_que", ("Execute %u (%s) at %p",
+ unsigned(type), que_node_type_string(node),
+ (const void*) node));
+
if (type & QUE_NODE_CONTROL_STAT) {
if ((thr->prev_node != que_node_get_parent(node))
&& que_node_get_next(thr->prev_node)) {
@@ -1168,7 +1131,6 @@ que_run_threads_low(
/**********************************************************************//**
Run a query thread. Handles lock waits. */
-UNIV_INTERN
void
que_run_threads(
/*============*/
@@ -1220,8 +1182,7 @@ loop:
/*********************************************************************//**
Evaluate the given SQL.
-@return error code or DB_SUCCESS */
-UNIV_INTERN
+@return error code or DB_SUCCESS */
dberr_t
que_eval_sql(
/*=========*/
@@ -1235,6 +1196,9 @@ que_eval_sql(
que_thr_t* thr;
que_t* graph;
+ DBUG_ENTER("que_eval_sql");
+ DBUG_PRINT("que_eval_sql", ("query: %s", sql));
+
ut_a(trx->error_state == DB_SUCCESS);
if (reserve_dict_mutex) {
@@ -1247,8 +1211,6 @@ que_eval_sql(
mutex_exit(&dict_sys->mutex);
}
- ut_a(graph);
-
graph->trx = trx;
trx->graph = NULL;
@@ -1268,25 +1230,5 @@ que_eval_sql(
mutex_exit(&dict_sys->mutex);
}
- return(trx->error_state);
-}
-
-/*********************************************************************//**
-Initialise the query sub-system. */
-UNIV_INTERN
-void
-que_init(void)
-/*==========*/
-{
- /* No op */
-}
-
-/*********************************************************************//**
-Close the query sub-system. */
-UNIV_INTERN
-void
-que_close(void)
-/*===========*/
-{
- /* No op */
+ DBUG_RETURN(trx->error_state);
}
diff --git a/storage/innobase/read/read0read.cc b/storage/innobase/read/read0read.cc
index be304dfcc2e..3fd52d5d6dd 100644
--- a/storage/innobase/read/read0read.cc
+++ b/storage/innobase/read/read0read.cc
@@ -1,6 +1,7 @@
/*****************************************************************************
-Copyright (c) 1996, 2013, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1996, 2015, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2019, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -25,10 +26,6 @@ Created 2/16/1997 Heikki Tuuri
#include "read0read.h"
-#ifdef UNIV_NONINL
-#include "read0read.ic"
-#endif
-
#include "srv0srv.h"
#include "trx0sys.h"
@@ -135,7 +132,7 @@ in any cursor read view.
PROOF: We know that:
1: Currently active read views in trx_sys_t::view_list are ordered by
- read_view_t::low_limit_no in descending order, that is,
+ ReadView::low_limit_no in descending order, that is,
newest read view first.
2: Purge clones the oldest read view and uses that to determine whether there
@@ -171,484 +168,621 @@ try to open read_view at same time. Only one can acquire trx_sys->mutex.
In which order will the views be opened? Should it matter? If no, why?
The order does not matter. No new transactions can be created and no running
-transaction can commit or rollback (or free views).
+RW transaction can commit or rollback (or free views). AC-NL-RO transactions
+will mark their views as closed but not actually free their views.
*/
-/*********************************************************************//**
-Creates a read view object.
-@return own: read view struct */
-UNIV_INLINE
-read_view_t*
-read_view_create_low(
-/*=================*/
- ulint n, /*!< in: number of cells in the trx_ids array */
- mem_heap_t* heap) /*!< in: memory heap from which allocated */
+/** Minimum number of elements to reserve in ReadView::ids_t */
+static const ulint MIN_TRX_IDS = 32;
+
+#ifdef UNIV_DEBUG
+/** Functor to validate the view list. */
+struct ViewCheck {
+
+ ViewCheck() : m_prev_view() { }
+
+ void operator()(const ReadView* view)
+ {
+ ut_a(m_prev_view == NULL
+ || view->is_closed()
+ || view->le(m_prev_view));
+
+ m_prev_view = view;
+ }
+
+ const ReadView* m_prev_view;
+};
+
+/**
+Validates a read view list. */
+
+bool
+MVCC::validate() const
{
- read_view_t* view;
+ ViewCheck check;
- view = static_cast<read_view_t*>(
- mem_heap_alloc(
- heap, sizeof(*view) + n * sizeof(*view->trx_ids)));
+ ut_ad(mutex_own(&trx_sys->mutex));
- view->n_trx_ids = n;
- view->trx_ids = (trx_id_t*) &view[1];
+ ut_list_map(m_views, check);
- return(view);
+ return(true);
}
+#endif /* UNIV_DEBUG */
-/*********************************************************************//**
-Clones a read view object. This function will allocate space for two read
-views contiguously, one identical in size and content as @param view (starting
-at returned pointer) and another view immediately following the trx_ids array.
-The second view will have space for an extra trx_id_t element.
-@return read view struct */
-UNIV_INLINE
-read_view_t*
-read_view_clone(
-/*============*/
- const read_view_t* view, /*!< in: view to clone */
- mem_heap_t* heap) /*!< in: memory heap
- from which allocated */
+/**
+Try and increase the size of the array. Old elements are
+copied across.
+@param n Make space for n elements */
+
+void
+ReadView::ids_t::reserve(ulint n)
{
- ulint sz;
- read_view_t* clone;
- read_view_t* new_view;
+ if (n <= capacity()) {
+ return;
+ }
- ut_ad(mutex_own(&trx_sys->mutex));
+ /** Keep a minimum threshold */
+ if (n < MIN_TRX_IDS) {
+ n = MIN_TRX_IDS;
+ }
+
+ value_type* p = m_ptr;
+
+ m_ptr = UT_NEW_ARRAY_NOKEY(value_type, n);
- /* Allocate space for two views. */
+ m_reserved = n;
- sz = sizeof(*view) + view->n_trx_ids * sizeof(*view->trx_ids);
+ ut_ad(size() < capacity());
- /* Add an extra trx_id_t slot for the new view. */
+ if (p != NULL) {
- clone = static_cast<read_view_t*>(
- mem_heap_alloc(heap, (sz * 2) + sizeof(trx_id_t)));
+ ::memmove(m_ptr, p, size() * sizeof(value_type));
- /* Only the contents of the old view are important, the new view
- will be created from this and so we don't copy that across. */
+ UT_DELETE_ARRAY(p);
+ }
+}
+
+/**
+Copy and overwrite this array contents
+@param start Source array
+@param end Pointer to end of array */
+
+void
+ReadView::ids_t::assign(const value_type* start, const value_type* end)
+{
+ ut_ad(end >= start);
+
+ ulint n = end - start;
- memcpy(clone, view, sz);
+ /* No need to copy the old contents across during reserve(). */
+ clear();
- clone->trx_ids = (trx_id_t*) &clone[1];
+ /* Create extra space if required. */
+ reserve(n);
- new_view = (read_view_t*) &clone->trx_ids[clone->n_trx_ids];
- new_view->trx_ids = (trx_id_t*) &new_view[1];
- new_view->n_trx_ids = clone->n_trx_ids + 1;
+ resize(n);
- ut_a(new_view->n_trx_ids == view->n_trx_ids + 1);
+ ut_ad(size() == n);
- return(clone);
+ ::memmove(m_ptr, start, size() * sizeof(value_type));
}
-/*********************************************************************//**
-Insert the view in the proper order into the trx_sys->view_list. The
-read view list is ordered by read_view_t::low_limit_no in descending order. */
-static
+/**
+Append a value to the array.
+@param value the value to append */
+
void
-read_view_add(
-/*==========*/
- read_view_t* view) /*!< in: view to add to */
+ReadView::ids_t::push_back(value_type value)
{
- read_view_t* elem;
- read_view_t* prev_elem;
+ if (capacity() <= size()) {
+ reserve(size() * 2);
+ }
- ut_ad(mutex_own(&trx_sys->mutex));
- ut_ad(read_view_validate(view));
+ m_ptr[m_size++] = value;
+ ut_ad(size() <= capacity());
+}
+
+/**
+Insert the value in the correct slot, preserving the order. Doesn't
+check for duplicates. */
+
+void
+ReadView::ids_t::insert(value_type value)
+{
+ ut_ad(value > 0);
- /* Find the correct slot for insertion. */
- for (elem = UT_LIST_GET_FIRST(trx_sys->view_list), prev_elem = NULL;
- elem != NULL && view->low_limit_no < elem->low_limit_no;
- prev_elem = elem, elem = UT_LIST_GET_NEXT(view_list, elem)) {
- /* No op */
+ reserve(size() + 1);
+
+ if (empty() || back() < value) {
+ push_back(value);
+ return;
}
- if (prev_elem == NULL) {
- UT_LIST_ADD_FIRST(view_list, trx_sys->view_list, view);
+ value_type* end = data() + size();
+ value_type* ub = std::upper_bound(data(), end, value);
+
+ if (ub == end) {
+ push_back(value);
} else {
- UT_LIST_INSERT_AFTER(
- view_list, trx_sys->view_list, prev_elem, view);
+ ut_ad(ub < end);
+
+ ulint n_elems = std::distance(ub, end);
+ ulint n = n_elems * sizeof(value_type);
+
+ /* Note: Copying overlapped memory locations. */
+ ::memmove(ub + 1, ub, n);
+
+ *ub = value;
+
+ resize(size() + 1);
}
+}
+
+/**
+ReadView constructor */
+ReadView::ReadView()
+ :
+ m_low_limit_id(),
+ m_up_limit_id(),
+ m_creator_trx_id(),
+ m_ids(),
+ m_low_limit_no()
+{
+ ut_d(::memset(&m_view_list, 0x0, sizeof(m_view_list)));
+}
- ut_ad(read_view_list_validate());
+/**
+ReadView destructor */
+ReadView::~ReadView()
+{
+ // Do nothing
}
-/** Functor to create thew view trx_ids array. */
-struct CreateView {
+/** Constructor
+@param size Number of views to pre-allocate */
+MVCC::MVCC(ulint size)
+{
+ UT_LIST_INIT(m_free, &ReadView::m_view_list);
+ UT_LIST_INIT(m_views, &ReadView::m_view_list);
- CreateView(read_view_t* view)
- : m_view(view)
- {
- m_n_trx = m_view->n_trx_ids;
- m_view->n_trx_ids = 0;
+ for (ulint i = 0; i < size; ++i) {
+ ReadView* view = UT_NEW_NOKEY(ReadView());
+
+ UT_LIST_ADD_FIRST(m_free, view);
}
+}
- void operator()(const trx_t* trx)
- {
- ut_ad(mutex_own(&trx_sys->mutex));
- ut_ad(trx->in_rw_trx_list);
+MVCC::~MVCC()
+{
+ for (ReadView* view = UT_LIST_GET_FIRST(m_free);
+ view != NULL;
+ view = UT_LIST_GET_FIRST(m_free)) {
- /* trx->state cannot change from or to NOT_STARTED
- while we are holding the trx_sys->mutex. It may change
- from ACTIVE to PREPARED or COMMITTED. */
+ UT_LIST_REMOVE(m_free, view);
- if (trx->id != m_view->creator_trx_id
- && !trx_state_eq(trx, TRX_STATE_COMMITTED_IN_MEMORY)) {
+ UT_DELETE(view);
+ }
- ut_ad(m_n_trx > m_view->n_trx_ids);
+ ut_a(UT_LIST_GET_LEN(m_views) == 0);
+}
- m_view->trx_ids[m_view->n_trx_ids++] = trx->id;
+/**
+Copy the transaction ids from the source vector */
- /* NOTE that a transaction whose trx number is <
- trx_sys->max_trx_id can still be active, if it is
- in the middle of its commit! Note that when a
- transaction starts, we initialize trx->no to
- TRX_ID_MAX. */
+void
+ReadView::copy_trx_ids(const trx_ids_t& trx_ids)
+{
+ ulint size = trx_ids.size();
- /* trx->no is protected by trx_sys->mutex, which
- we are holding. It is assigned by trx_commit()
- before lock_trx_release_locks() assigns
- trx->state = TRX_STATE_COMMITTED_IN_MEMORY. */
+ if (m_creator_trx_id > 0) {
+ ut_ad(size > 0);
+ --size;
+ }
- if (m_view->low_limit_no > trx->no) {
- m_view->low_limit_no = trx->no;
- }
- }
+ if (size == 0) {
+ m_ids.clear();
+ return;
}
- read_view_t* m_view;
- ulint m_n_trx;
-};
+ m_ids.reserve(size);
+ m_ids.resize(size);
-/*********************************************************************//**
-Opens a read view where exactly the transactions serialized before this
-point in time are seen in the view.
-@return own: read view struct */
-static
-read_view_t*
-read_view_open_now_low(
-/*===================*/
- trx_id_t cr_trx_id, /*!< in: trx_id of creating
- transaction, or 0 used in purge */
- mem_heap_t* heap) /*!< in: memory heap from which
- allocated */
-{
- read_view_t* view;
- ulint n_trx = UT_LIST_GET_LEN(trx_sys->rw_trx_list);
+ ids_t::value_type* p = m_ids.data();
- ut_ad(mutex_own(&trx_sys->mutex));
+ /* Copy all the trx_ids except the creator trx id */
+
+ if (m_creator_trx_id > 0) {
- view = read_view_create_low(n_trx, heap);
+ /* Note: We go through all this trouble because it is
+ unclear whether std::vector::resize() will cause an
+ overhead or not. We should test this extensively and
+ if the vector to vector copy is fast enough then get
+ rid of this code and replace it with more readable
+ and obvious code. The code below does exactly one copy,
+ and filters out the creator's trx id. */
- view->undo_no = 0;
- view->type = VIEW_NORMAL;
- view->creator_trx_id = cr_trx_id;
+ trx_ids_t::const_iterator it = std::lower_bound(
+ trx_ids.begin(), trx_ids.end(), m_creator_trx_id);
- /* No future transactions should be visible in the view */
+ ut_ad(it != trx_ids.end() && *it == m_creator_trx_id);
- view->low_limit_no = trx_sys->max_trx_id;
- view->low_limit_id = view->low_limit_no;
+ ulint i = std::distance(trx_ids.begin(), it);
+ ulint n = i * sizeof(trx_ids_t::value_type);
- /* No active transaction should be visible, except cr_trx */
+ ::memmove(p, &trx_ids[0], n);
- ut_list_map(trx_sys->rw_trx_list, &trx_t::trx_list, CreateView(view));
+ n = (trx_ids.size() - i - 1) * sizeof(trx_ids_t::value_type);
- if (view->n_trx_ids > 0) {
- /* The last active transaction has the smallest id: */
- view->up_limit_id = view->trx_ids[view->n_trx_ids - 1];
+ ut_ad(i + (n / sizeof(trx_ids_t::value_type)) == m_ids.size());
+
+ if (n > 0) {
+ ::memmove(p + i, &trx_ids[i + 1], n);
+ }
} else {
- view->up_limit_id = view->low_limit_id;
- }
+ ulint n = size * sizeof(trx_ids_t::value_type);
- /* Purge views are not added to the view list. */
- if (cr_trx_id > 0) {
- read_view_add(view);
+ ::memmove(p, &trx_ids[0], n);
}
- return(view);
+#ifdef UNIV_DEBUG
+ /* Assert that all transaction ids in list are active. */
+ for (trx_ids_t::const_iterator it = trx_ids.begin();
+ it != trx_ids.end(); ++it) {
+
+ trx_t* trx = trx_get_rw_trx_by_id(*it);
+ ut_ad(trx != NULL);
+ switch (trx->state) {
+ case TRX_STATE_ACTIVE:
+ case TRX_STATE_PREPARED:
+ case TRX_STATE_PREPARED_RECOVERED:
+ case TRX_STATE_COMMITTED_IN_MEMORY:
+ continue;
+ case TRX_STATE_NOT_STARTED:
+ break;
+ }
+ ut_ad(!"invalid state");
+ }
+#endif /* UNIV_DEBUG */
}
-/*********************************************************************//**
+/**
Opens a read view where exactly the transactions serialized before this
point in time are seen in the view.
-@return own: read view struct */
-UNIV_INTERN
-read_view_t*
-read_view_open_now(
-/*===============*/
- trx_id_t cr_trx_id, /*!< in: trx_id of creating
- transaction, or 0 used in purge */
- mem_heap_t* heap) /*!< in: memory heap from which
- allocated */
+@param id Creator transaction id */
+
+void
+ReadView::prepare(trx_id_t id)
{
- read_view_t* view;
+ ut_ad(mutex_own(&trx_sys->mutex));
- mutex_enter(&trx_sys->mutex);
+ m_creator_trx_id = id;
- view = read_view_open_now_low(cr_trx_id, heap);
+ m_low_limit_no = m_low_limit_id = trx_sys->max_trx_id;
- mutex_exit(&trx_sys->mutex);
+ if (!trx_sys->rw_trx_ids.empty()) {
+ copy_trx_ids(trx_sys->rw_trx_ids);
+ } else {
+ m_ids.clear();
+ }
- return(view);
+ if (UT_LIST_GET_LEN(trx_sys->serialisation_list) > 0) {
+ const trx_t* trx;
+
+ trx = UT_LIST_GET_FIRST(trx_sys->serialisation_list);
+
+ if (trx->no < m_low_limit_no) {
+ m_low_limit_no = trx->no;
+ }
+ }
}
-/*********************************************************************//**
-Makes a copy of the oldest existing read view, with the exception that also
-the creating trx of the oldest view is set as not visible in the 'copied'
-view. Opens a new view if no views currently exist. The view must be closed
-with ..._close. This is used in purge.
-@return own: read view struct */
-UNIV_INTERN
-read_view_t*
-read_view_purge_open(
-/*=================*/
- mem_heap_t* heap) /*!< in: memory heap from which
- allocated */
+/**
+Complete the read view creation */
+
+void
+ReadView::complete()
{
- ulint i;
- read_view_t* view;
- read_view_t* oldest_view;
- trx_id_t creator_trx_id;
- ulint insert_done = 0;
+ /* The first active transaction has the smallest id. */
+ m_up_limit_id = !m_ids.empty() ? m_ids.front() : m_low_limit_id;
- mutex_enter(&trx_sys->mutex);
+ ut_ad(m_up_limit_id <= m_low_limit_id);
- oldest_view = UT_LIST_GET_LAST(trx_sys->view_list);
+ m_closed = false;
+}
- if (oldest_view == NULL) {
+/**
+Find a free view from the active list, if none found then allocate
+a new view.
+@return a view to use */
+
+ReadView*
+MVCC::get_view()
+{
+ ut_ad(mutex_own(&trx_sys->mutex));
- view = read_view_open_now_low(0, heap);
+ ReadView* view;
- mutex_exit(&trx_sys->mutex);
+ if (UT_LIST_GET_LEN(m_free) > 0) {
+ view = UT_LIST_GET_FIRST(m_free);
+ UT_LIST_REMOVE(m_free, view);
+ } else {
+ view = UT_NEW_NOKEY(ReadView());
- return(view);
+ if (view == NULL) {
+ ib::error() << "Failed to allocate MVCC view";
+ }
}
- /* Allocate space for both views, the oldest and the new purge view. */
+ return(view);
+}
+
+/**
+Release a view that is inactive but not closed. Caller must own
+the trx_sys_t::mutex.
+@param view View to release */
+void
+MVCC::view_release(ReadView*& view)
+{
+ ut_ad(!srv_read_only_mode);
+ ut_ad(trx_sys_mutex_own());
+
+ uintptr_t p = reinterpret_cast<uintptr_t>(view);
+
+ ut_a(p & 0x1);
+
+ view = reinterpret_cast<ReadView*>(p & ~1);
- oldest_view = read_view_clone(oldest_view, heap);
+ ut_ad(view->m_closed);
- ut_ad(read_view_validate(oldest_view));
+ /** RW transactions should not free their views here. Their views
+ should freed using view_close_view() */
- mutex_exit(&trx_sys->mutex);
+ ut_ad(view->m_creator_trx_id == 0);
- ut_a(oldest_view->creator_trx_id > 0);
- creator_trx_id = oldest_view->creator_trx_id;
+ UT_LIST_REMOVE(m_views, view);
- view = (read_view_t*) &oldest_view->trx_ids[oldest_view->n_trx_ids];
+ UT_LIST_ADD_LAST(m_free, view);
- /* Add the creator transaction id in the trx_ids array in the
- correct slot. */
+ view = NULL;
+}
+
+/**
+Allocate and create a view.
+@param view view owned by this class created for the
+ caller. Must be freed by calling view_close()
+@param trx transaction instance of caller */
+void
+MVCC::view_open(ReadView*& view, trx_t* trx)
+{
+ ut_ad(!srv_read_only_mode);
+
+ /** If no new RW transaction has been started since the last view
+ was created then reuse the the existing view. */
+ if (view != NULL) {
+
+ uintptr_t p = reinterpret_cast<uintptr_t>(view);
+
+ view = reinterpret_cast<ReadView*>(p & ~1);
+
+ ut_ad(view->m_closed);
+
+ /* NOTE: This can be optimised further, for now we only
+ resuse the view iff there are no active RW transactions.
+
+ There is an inherent race here between purge and this
+ thread. Purge will skip views that are marked as closed.
+ Therefore we must set the low limit id after we reset the
+ closed status after the check. */
- for (i = 0; i < oldest_view->n_trx_ids; ++i) {
- trx_id_t id;
+ if (trx_is_autocommit_non_locking(trx) && view->empty()) {
- id = oldest_view->trx_ids[i - insert_done];
+ view->m_closed = false;
- if (insert_done == 0 && creator_trx_id > id) {
- id = creator_trx_id;
- insert_done = 1;
+ if (view->m_low_limit_id == trx_sys_get_max_trx_id()) {
+ return;
+ } else {
+ view->m_closed = true;
+ }
}
- view->trx_ids[i] = id;
- }
+ mutex_enter(&trx_sys->mutex);
+
+ UT_LIST_REMOVE(m_views, view);
- if (insert_done == 0) {
- view->trx_ids[i] = creator_trx_id;
} else {
- ut_a(i > 0);
- view->trx_ids[i] = oldest_view->trx_ids[i - 1];
+ mutex_enter(&trx_sys->mutex);
+
+ view = get_view();
}
- view->creator_trx_id = 0;
+ if (view != NULL) {
- view->low_limit_no = oldest_view->low_limit_no;
- view->low_limit_id = oldest_view->low_limit_id;
+ view->prepare(trx->id);
- if (view->n_trx_ids > 0) {
- /* The last active transaction has the smallest id: */
+ view->complete();
- view->up_limit_id = view->trx_ids[view->n_trx_ids - 1];
- } else {
- view->up_limit_id = oldest_view->up_limit_id;
+ UT_LIST_ADD_FIRST(m_views, view);
+
+ ut_ad(!view->is_closed());
+
+ ut_ad(validate());
}
- return(view);
+ trx_sys_mutex_exit();
}
-/*********************************************************************//**
-Closes a consistent read view for MySQL. This function is called at an SQL
-statement end if the trx isolation level is <= TRX_ISO_READ_COMMITTED. */
-UNIV_INTERN
-void
-read_view_close_for_mysql(
-/*======================*/
- trx_t* trx) /*!< in: trx which has a read view */
+/**
+Get the oldest (active) view in the system.
+@return oldest view if found or NULL */
+
+ReadView*
+MVCC::get_oldest_view() const
{
- ut_a(trx->global_read_view);
+ ReadView* view;
- read_view_remove(trx->global_read_view, false);
+ ut_ad(mutex_own(&trx_sys->mutex));
- mem_heap_empty(trx->global_read_view_heap);
+ for (view = UT_LIST_GET_LAST(m_views);
+ view != NULL;
+ view = UT_LIST_GET_PREV(m_view_list, view)) {
- trx->read_view = NULL;
- trx->global_read_view = NULL;
+ if (!view->is_closed()) {
+ break;
+ }
+ }
+
+ return(view);
}
-/*********************************************************************//**
-Prints a read view to stderr. */
-UNIV_INTERN
+/**
+Copy state from another view. Must call copy_complete() to finish.
+@param other view to copy from */
+
void
-read_view_print(
-/*============*/
- const read_view_t* view) /*!< in: read view */
+ReadView::copy_prepare(const ReadView& other)
{
- ulint n_ids;
- ulint i;
+ ut_ad(&other != this);
+
+ if (!other.m_ids.empty()) {
+ const ids_t::value_type* p = other.m_ids.data();
- if (view->type == VIEW_HIGH_GRANULARITY) {
- fprintf(stderr,
- "High-granularity read view undo_n:o " TRX_ID_FMT "\n",
- view->undo_no);
+ m_ids.assign(p, p + other.m_ids.size());
} else {
- fprintf(stderr, "Normal read view\n");
+ m_ids.clear();
}
- fprintf(stderr, "Read view low limit trx n:o " TRX_ID_FMT "\n",
- view->low_limit_no);
-
- fprintf(stderr, "Read view up limit trx id " TRX_ID_FMT "\n",
- view->up_limit_id);
+ m_up_limit_id = other.m_up_limit_id;
- fprintf(stderr, "Read view low limit trx id " TRX_ID_FMT "\n",
- view->low_limit_id);
+ m_low_limit_no = other.m_low_limit_no;
- fprintf(stderr, "Read view individually stored trx ids:\n");
+ m_low_limit_id = other.m_low_limit_id;
- n_ids = view->n_trx_ids;
-
- for (i = 0; i < n_ids; i++) {
- fprintf(stderr, "Read view trx id " TRX_ID_FMT "\n",
- view->trx_ids[i]);
- }
+ m_creator_trx_id = other.m_creator_trx_id;
}
-/*********************************************************************//**
-Create a high-granularity consistent cursor view for mysql to be used
-in cursors. In this consistent read view modifications done by the
-creating transaction after the cursor is created or future transactions
-are not visible. */
-UNIV_INTERN
-cursor_view_t*
-read_cursor_view_create_for_mysql(
-/*==============================*/
- trx_t* cr_trx) /*!< in: trx where cursor view is created */
+/**
+Complete the copy, insert the creator transaction id into the
+m_ids too and adjust the m_up_limit_id, if required */
+
+void
+ReadView::copy_complete()
{
- read_view_t* view;
- mem_heap_t* heap;
- ulint n_trx;
- cursor_view_t* curview;
+ ut_ad(!trx_sys_mutex_own());
- /* Use larger heap than in trx_create when creating a read_view
- because cursors are quite long. */
+ if (m_creator_trx_id > 0) {
+ m_ids.insert(m_creator_trx_id);
+ }
- heap = mem_heap_create(512);
+ if (!m_ids.empty()) {
+ /* The last active transaction has the smallest id. */
+ m_up_limit_id = std::min(m_ids.front(), m_up_limit_id);
+ }
- curview = (cursor_view_t*) mem_heap_alloc(heap, sizeof(*curview));
+ ut_ad(m_up_limit_id <= m_low_limit_id);
- curview->heap = heap;
+ /* We added the creator transaction ID to the m_ids. */
+ m_creator_trx_id = 0;
+}
- /* Drop cursor tables from consideration when evaluating the
- need of auto-commit */
+/** Clones the oldest view and stores it in view. No need to
+call view_close(). The caller owns the view that is passed in.
+This function is called by Purge to determine whether it should
+purge the delete marked record or not.
+@param view Preallocated view, owned by the caller */
- curview->n_mysql_tables_in_use = cr_trx->n_mysql_tables_in_use;
+void
+MVCC::clone_oldest_view(ReadView* view)
+{
+ mutex_enter(&trx_sys->mutex);
- cr_trx->n_mysql_tables_in_use = 0;
+ ReadView* oldest_view = get_oldest_view();
- mutex_enter(&trx_sys->mutex);
+ if (oldest_view == NULL) {
- n_trx = UT_LIST_GET_LEN(trx_sys->rw_trx_list);
+ view->prepare(0);
- curview->read_view = read_view_create_low(n_trx, curview->heap);
+ trx_sys_mutex_exit();
- view = curview->read_view;
- view->undo_no = cr_trx->undo_no;
- view->type = VIEW_HIGH_GRANULARITY;
- view->creator_trx_id = UINT64_UNDEFINED;
+ view->complete();
- /* No future transactions should be visible in the view */
+ } else {
+ view->copy_prepare(*oldest_view);
- view->low_limit_no = trx_sys->max_trx_id;
- view->low_limit_id = view->low_limit_no;
+ trx_sys_mutex_exit();
- /* No active transaction should be visible */
+ view->copy_complete();
+ }
+}
- ut_list_map(trx_sys->rw_trx_list, &trx_t::trx_list, CreateView(view));
+/**
+@return the number of active views */
- view->creator_trx_id = cr_trx->id;
+ulint
+MVCC::size() const
+{
+ trx_sys_mutex_enter();
- if (view->n_trx_ids > 0) {
- /* The last active transaction has the smallest id: */
+ ulint size = 0;
- view->up_limit_id = view->trx_ids[view->n_trx_ids - 1];
- } else {
- view->up_limit_id = view->low_limit_id;
- }
+ for (const ReadView* view = UT_LIST_GET_FIRST(m_views);
+ view != NULL;
+ view = UT_LIST_GET_NEXT(m_view_list, view)) {
- read_view_add(view);
+ if (!view->is_closed()) {
+ ++size;
+ }
+ }
- mutex_exit(&trx_sys->mutex);
+ trx_sys_mutex_exit();
- return(curview);
+ return(size);
}
-/*********************************************************************//**
-Close a given consistent cursor view for mysql and restore global read view
-back to a transaction read view. */
-UNIV_INTERN
-void
-read_cursor_view_close_for_mysql(
-/*=============================*/
- trx_t* trx, /*!< in: trx */
- cursor_view_t* curview)/*!< in: cursor view to be closed */
+/**
+Close a view created by the above function.
+@param view view allocated by view_open()
+@param own_mutex whether the caller owns trx_sys_t::mutex */
+void MVCC::view_close(ReadView*& view, bool own_mutex)
{
- ut_a(curview);
- ut_a(curview->read_view);
- ut_a(curview->heap);
+ uintptr_t p = reinterpret_cast<uintptr_t>(view);
- /* Add cursor's tables to the global count of active tables that
- belong to this transaction */
- trx->n_mysql_tables_in_use += curview->n_mysql_tables_in_use;
+ /* Note: The assumption here is that AC-NL-RO transactions will
+ call this function with own_mutex == false. */
+ if (!own_mutex) {
+ /* Sanitise the pointer first. */
+ ReadView* ptr = reinterpret_cast<ReadView*>(p & ~1);
- read_view_remove(curview->read_view, false);
+ /* Note this can be called for a read view that
+ was already closed. */
+ ptr->m_closed = true;
- trx->read_view = trx->global_read_view;
+ /* Set the view as closed. */
+ view = reinterpret_cast<ReadView*>(p | 0x1);
+ } else {
+ view = reinterpret_cast<ReadView*>(p & ~1);
- mem_heap_free(curview->heap);
-}
+ view->close();
-/*********************************************************************//**
-This function sets a given consistent cursor view to a transaction
-read view if given consistent cursor view is not NULL. Otherwise, function
-restores a global read view to a transaction read view. */
-UNIV_INTERN
-void
-read_cursor_set_for_mysql(
-/*======================*/
- trx_t* trx, /*!< in: transaction where cursor is set */
- cursor_view_t* curview)/*!< in: consistent cursor view to be set */
-{
- ut_a(trx);
+ UT_LIST_REMOVE(m_views, view);
+ UT_LIST_ADD_LAST(m_free, view);
- mutex_enter(&trx_sys->mutex);
+ ut_ad(validate());
- if (UNIV_LIKELY(curview != NULL)) {
- trx->read_view = curview->read_view;
- } else {
- trx->read_view = trx->global_read_view;
+ view = NULL;
}
+}
+
+/**
+Set the view creator transaction id. Note: This shouldbe set only
+for views created by RW transactions.
+@param view Set the creator trx id for this view
+@param id Transaction id to set */
- ut_ad(read_view_validate(trx->read_view));
+void
+MVCC::set_view_creator_trx_id(ReadView* view, trx_id_t id)
+{
+ ut_ad(id > 0);
+ ut_ad(mutex_own(&trx_sys->mutex));
- mutex_exit(&trx_sys->mutex);
+ view->creator_trx_id(id);
}
diff --git a/storage/innobase/rem/rem0cmp.cc b/storage/innobase/rem/rem0cmp.cc
index 5f9c497cb29..cda286ef503 100644
--- a/storage/innobase/rem/rem0cmp.cc
+++ b/storage/innobase/rem/rem0cmp.cc
@@ -1,6 +1,7 @@
/*****************************************************************************
-Copyright (c) 1994, 2016, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1994, 2019, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2020, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -24,14 +25,10 @@ Created 7/1/1994 Heikki Tuuri
************************************************************************/
#include "rem0cmp.h"
-
-#ifdef UNIV_NONINL
-#include "rem0cmp.ic"
-#endif
-
-#include "ha_prototypes.h"
+#include "rem0rec.h"
+#include "page0page.h"
+#include "dict0mem.h"
#include "handler0alter.h"
-#include "srv0srv.h"
/* ALPHABETICAL ORDER
==================
@@ -52,83 +49,53 @@ At the present, the comparison functions return 0 in the case,
where two records disagree only in the way that one
has more fields than the other. */
-#ifdef UNIV_DEBUG
-/*************************************************************//**
-Used in debug checking of cmp_dtuple_... .
-This function is used to compare a data tuple to a physical record. If
-dtuple has n fields then rec must have either m >= n fields, or it must
-differ from dtuple in some of the m fields rec has.
-@return 1, 0, -1, if dtuple is greater, equal, less than rec,
-respectively, when only the common first fields are compared */
-static
-int
-cmp_debug_dtuple_rec_with_match(
-/*============================*/
- const dtuple_t* dtuple, /*!< in: data tuple */
- const rec_t* rec, /*!< in: physical record which differs from
- dtuple in some of the common fields, or which
- has an equal number or more fields than
- dtuple */
- const ulint* offsets,/*!< in: array returned by rec_get_offsets() */
- ulint n_cmp, /*!< in: number of fields to compare */
- ulint* matched_fields)/*!< in/out: number of already
- completely matched fields; when function
- returns, contains the value for current
- comparison */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-#endif /* UNIV_DEBUG */
-/*************************************************************//**
-This function is used to compare two data fields for which the data type
-is such that we must use MySQL code to compare them. The prototype here
-must be a copy of the one in ha_innobase.cc!
-@return 1, 0, -1, if a is greater, equal, less than b, respectively */
-extern
+/** Compare two data fields.
+@param[in] prtype precise type
+@param[in] a data field
+@param[in] a_length length of a, in bytes (not UNIV_SQL_NULL)
+@param[in] b data field
+@param[in] b_length length of b, in bytes (not UNIV_SQL_NULL)
+@return positive, 0, negative, if a is greater, equal, less than b,
+respectively */
+UNIV_INLINE
int
innobase_mysql_cmp(
-/*===============*/
- int mysql_type, /*!< in: MySQL type */
- uint charset_number, /*!< in: number of the charset */
- const unsigned char* a, /*!< in: data field */
- unsigned int a_length, /*!< in: data field length,
- not UNIV_SQL_NULL */
- const unsigned char* b, /*!< in: data field */
- unsigned int b_length); /*!< in: data field length,
- not UNIV_SQL_NULL */
-/*************************************************************//**
-This function is used to compare two data fields for which the data type
-is such that we must use MySQL code to compare them. The prototype here
-must be a copy of the one in ha_innobase.cc!
-@return 1, 0, -1, if a is greater, equal, less than b, respectively */
-extern
-int
-innobase_mysql_cmp_prefix(
-/*======================*/
- int mysql_type, /*!< in: MySQL type */
- uint charset_number, /*!< in: number of the charset */
- const unsigned char* a, /*!< in: data field */
- unsigned int a_length, /*!< in: data field length,
- not UNIV_SQL_NULL */
- const unsigned char* b, /*!< in: data field */
- unsigned int b_length); /*!< in: data field length,
- not UNIV_SQL_NULL */
-/*********************************************************************//**
-Transforms the character code so that it is ordered appropriately for the
-language. This is only used for the latin1 char set. MySQL does the
-comparisons for other char sets.
-@return collation order position */
-UNIV_INLINE
-ulint
-cmp_collate(
-/*========*/
- ulint code) /*!< in: code of a character stored in database record */
+ ulint prtype,
+ const byte* a,
+ unsigned int a_length,
+ const byte* b,
+ unsigned int b_length)
{
- return((ulint) srv_latin1_ordering[code]);
+#ifdef UNIV_DEBUG
+ switch (prtype & DATA_MYSQL_TYPE_MASK) {
+ case MYSQL_TYPE_BIT:
+ case MYSQL_TYPE_STRING:
+ case MYSQL_TYPE_VAR_STRING:
+ case MYSQL_TYPE_TINY_BLOB:
+ case MYSQL_TYPE_MEDIUM_BLOB:
+ case MYSQL_TYPE_BLOB:
+ case MYSQL_TYPE_LONG_BLOB:
+ case MYSQL_TYPE_VARCHAR:
+ break;
+ default:
+ ut_error;
+ }
+#endif /* UNIV_DEBUG */
+
+ uint cs_num = (uint) dtype_get_charset_coll(prtype);
+
+ if (CHARSET_INFO* cs = get_charset(cs_num, MYF(MY_WME))) {
+ return(cs->coll->strnncollsp(
+ cs, a, a_length, b, b_length));
+ }
+
+ ib::fatal() << "Unable to find charset-collation " << cs_num;
+ return(0);
}
/*************************************************************//**
Returns TRUE if two columns are equal for comparison purposes.
-@return TRUE if the columns are considered equal in comparisons */
-UNIV_INTERN
+@return TRUE if the columns are considered equal in comparisons */
ibool
cmp_cols_are_equal(
/*===============*/
@@ -178,14 +145,85 @@ cmp_cols_are_equal(
return(col1->mtype != DATA_INT || col1->len == col2->len);
}
+/** Compare two DATA_DECIMAL (MYSQL_TYPE_DECIMAL) fields.
+TODO: Remove this function. Everything should use MYSQL_TYPE_NEWDECIMAL.
+@param[in] a data field
+@param[in] a_length length of a, in bytes (not UNIV_SQL_NULL)
+@param[in] b data field
+@param[in] b_length length of b, in bytes (not UNIV_SQL_NULL)
+@return positive, 0, negative, if a is greater, equal, less than b,
+respectively */
+static ATTRIBUTE_COLD
+int
+cmp_decimal(
+ const byte* a,
+ unsigned int a_length,
+ const byte* b,
+ unsigned int b_length)
+{
+ int swap_flag;
+
+ /* Remove preceding spaces */
+ for (; a_length && *a == ' '; a++, a_length--) { }
+ for (; b_length && *b == ' '; b++, b_length--) { }
+
+ if (*a == '-') {
+ swap_flag = -1;
+
+ if (*b != '-') {
+ return(swap_flag);
+ }
+
+ a++; b++;
+ a_length--;
+ b_length--;
+ } else {
+ swap_flag = 1;
+
+ if (*b == '-') {
+ return(swap_flag);
+ }
+ }
+
+ while (a_length > 0 && (*a == '+' || *a == '0')) {
+ a++; a_length--;
+ }
+
+ while (b_length > 0 && (*b == '+' || *b == '0')) {
+ b++; b_length--;
+ }
+
+ if (a_length != b_length) {
+ if (a_length < b_length) {
+ return(-swap_flag);
+ }
+
+ return(swap_flag);
+ }
+
+ while (a_length > 0 && *a == *b) {
+
+ a++; b++; a_length--;
+ }
+
+ if (a_length == 0) {
+ return(0);
+ }
+
+ if (*a <= *b) {
+ swap_flag = -swap_flag;
+ }
+
+ return(swap_flag);
+}
+
/*************************************************************//**
-Innobase uses this function to compare two data fields for which the data type
-is such that we must compare whole fields or call MySQL to do the comparison
+Innobase uses this function to compare two geometry data fields
@return 1, 0, -1, if a is greater, equal, less than b, respectively */
static
int
-cmp_whole_field(
-/*============*/
+cmp_geometry_field(
+/*===============*/
ulint mtype, /*!< in: main type */
ulint prtype, /*!< in: precise type */
const byte* a, /*!< in: data field */
@@ -195,66 +233,107 @@ cmp_whole_field(
unsigned int b_length) /*!< in: data field length,
not UNIV_SQL_NULL */
{
- float f_1;
- float f_2;
- double d_1;
- double d_2;
- int swap_flag = 1;
-
- switch (mtype) {
-
- case DATA_DECIMAL:
- /* Remove preceding spaces */
- for (; a_length && *a == ' '; a++, a_length--) { }
- for (; b_length && *b == ' '; b++, b_length--) { }
-
- if (*a == '-') {
- if (*b != '-') {
- return(-1);
- }
-
- a++; b++;
- a_length--;
- b_length--;
-
- swap_flag = -1;
+ double x1, x2;
+ double y1, y2;
- } else if (*b == '-') {
+ ut_ad(prtype & DATA_GIS_MBR);
- return(1);
- }
-
- while (a_length > 0 && (*a == '+' || *a == '0')) {
- a++; a_length--;
- }
+ if (a_length < sizeof(double) || b_length < sizeof(double)) {
+ return(0);
+ }
- while (b_length > 0 && (*b == '+' || *b == '0')) {
- b++; b_length--;
- }
+ /* Try to compare mbr left lower corner (xmin, ymin) */
+ x1 = mach_double_read(a);
+ x2 = mach_double_read(b);
+ y1 = mach_double_read(a + sizeof(double) * SPDIMS);
+ y2 = mach_double_read(b + sizeof(double) * SPDIMS);
- if (a_length != b_length) {
- if (a_length < b_length) {
- return(-swap_flag);
- }
+ if (x1 > x2) {
+ return(1);
+ } else if (x2 > x1) {
+ return(-1);
+ }
- return(swap_flag);
- }
+ if (y1 > y2) {
+ return(1);
+ } else if (y2 > y1) {
+ return(-1);
+ }
- while (a_length > 0 && *a == *b) {
+ /* left lower corner (xmin, ymin) overlaps, now right upper corner */
+ x1 = mach_double_read(a + sizeof(double));
+ x2 = mach_double_read(b + sizeof(double));
+ y1 = mach_double_read(a + sizeof(double) * SPDIMS + sizeof(double));
+ y2 = mach_double_read(b + sizeof(double) * SPDIMS + sizeof(double));
- a++; b++; a_length--;
- }
+ if (x1 > x2) {
+ return(1);
+ } else if (x2 > x1) {
+ return(-1);
+ }
- if (a_length == 0) {
+ if (y1 > y2) {
+ return(1);
+ } else if (y2 > y1) {
+ return(-1);
+ }
- return(0);
- }
+ return(0);
+}
+/*************************************************************//**
+Innobase uses this function to compare two gis data fields
+@return 1, 0, -1, if mode == PAGE_CUR_MBR_EQUAL. And return
+1, 0 for rest compare modes, depends on a and b qualifies the
+relationship (CONTAINT, WITHIN etc.) */
+static
+int
+cmp_gis_field(
+/*============*/
+ page_cur_mode_t mode, /*!< in: compare mode */
+ const byte* a, /*!< in: data field */
+ unsigned int a_length, /*!< in: data field length,
+ not UNIV_SQL_NULL */
+ const byte* b, /*!< in: data field */
+ unsigned int b_length) /*!< in: data field length,
+ not UNIV_SQL_NULL */
+{
+ if (mode == PAGE_CUR_MBR_EQUAL) {
+ /* TODO: Since the DATA_GEOMETRY is not used in compare
+ function, we could pass it instead of a specific type now */
+ return(cmp_geometry_field(DATA_GEOMETRY, DATA_GIS_MBR,
+ a, a_length, b, b_length));
+ } else {
+ return(rtree_key_cmp(mode, a, a_length, b, b_length));
+ }
+}
- if (*a > *b) {
- return(swap_flag);
- }
+/** Compare two data fields.
+@param[in] mtype main type
+@param[in] prtype precise type
+@param[in] a data field
+@param[in] a_length length of a, in bytes (not UNIV_SQL_NULL)
+@param[in] b data field
+@param[in] b_length length of b, in bytes (not UNIV_SQL_NULL)
+@return positive, 0, negative, if a is greater, equal, less than b,
+respectively */
+static
+int
+cmp_whole_field(
+ ulint mtype,
+ ulint prtype,
+ const byte* a,
+ unsigned int a_length,
+ const byte* b,
+ unsigned int b_length)
+{
+ float f_1;
+ float f_2;
+ double d_1;
+ double d_2;
- return(-swap_flag);
+ switch (mtype) {
+ case DATA_DECIMAL:
+ return(cmp_decimal(a, a_length, b, b_length));
case DATA_DOUBLE:
d_1 = mach_double_read(a);
d_2 = mach_double_read(b);
@@ -278,403 +357,319 @@ cmp_whole_field(
}
return(0);
+ case DATA_VARCHAR:
+ case DATA_CHAR:
+ return(my_charset_latin1.coll->strnncollsp(
+ &my_charset_latin1,
+ a, a_length, b, b_length));
case DATA_BLOB:
if (prtype & DATA_BINARY_TYPE) {
-
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Error: comparing a binary BLOB"
- " with a character set sensitive\n"
- "InnoDB: comparison!\n");
+ ib::error() << "Comparing a binary BLOB"
+ " using a character set collation!";
+ ut_ad(0);
}
/* fall through */
case DATA_VARMYSQL:
case DATA_MYSQL:
- return(innobase_mysql_cmp(
- (int)(prtype & DATA_MYSQL_TYPE_MASK),
- (uint) dtype_get_charset_coll(prtype),
- a, a_length, b, b_length));
+ return(innobase_mysql_cmp(prtype,
+ a, a_length, b, b_length));
+ case DATA_GEOMETRY:
+ return(cmp_geometry_field(mtype, prtype, a, a_length, b,
+ b_length));
default:
- fprintf(stderr,
- "InnoDB: unknown type number %lu\n",
- (ulong) mtype);
- ut_error;
+ ib::fatal() << "Unknown data type number " << mtype;
}
return(0);
}
-/*****************************************************************
-This function is used to compare two dfields where at least the first
-has its data type field set. */
-UNIV_INTERN
-int
-cmp_dfield_dfield_like_prefix(
-/*==========================*/
- /* out: 1, 0, -1, if dfield1 is greater, equal,
- less than dfield2, respectively */
- dfield_t* dfield1,/* in: data field; must have type field set */
- dfield_t* dfield2)/* in: data field */
-{
- const dtype_t* type;
- int ret;
-
- ut_ad(dfield_check_typed(dfield1));
-
- type = dfield_get_type(dfield1);
-
- if (type->mtype >= DATA_FLOAT) {
- ret = innobase_mysql_cmp_prefix(
- static_cast<int>(type->prtype & DATA_MYSQL_TYPE_MASK),
- static_cast<uint>(dtype_get_charset_coll(type->prtype)),
- static_cast<byte*>(dfield_get_data(dfield1)),
- static_cast<uint>(dfield_get_len(dfield1)),
- static_cast<byte*>(dfield_get_data(dfield2)),
- static_cast<uint>(dfield_get_len(dfield2)));
- } else {
- ret = (cmp_data_data_like_prefix(
- static_cast<byte*>(dfield_get_data(dfield1)),
- dfield_get_len(dfield1),
- static_cast<byte*>(dfield_get_data(dfield2)),
- dfield_get_len(dfield2)));
- }
-
- return(ret);
-}
-
-/*************************************************************//**
-This function is used to compare two data fields for which we know the
-data type.
-@return 1, 0, -1, if data1 is greater, equal, less than data2, respectively */
-UNIV_INTERN
+/** Compare two data fields.
+@param[in] mtype main type
+@param[in] prtype precise type
+@param[in] data1 data field
+@param[in] len1 length of data1 in bytes, or UNIV_SQL_NULL
+@param[in] data2 data field
+@param[in] len2 length of data2 in bytes, or UNIV_SQL_NULL
+@return the comparison result of data1 and data2
+@retval 0 if data1 is equal to data2
+@retval negative if data1 is less than data2
+@retval positive if data1 is greater than data2 */
+inline
int
-cmp_data_data_slow(
-/*===============*/
- ulint mtype, /*!< in: main type */
- ulint prtype, /*!< in: precise type */
- const byte* data1, /*!< in: data field (== a pointer to a memory
- buffer) */
- ulint len1, /*!< in: data field length or UNIV_SQL_NULL */
- const byte* data2, /*!< in: data field (== a pointer to a memory
- buffer) */
- ulint len2) /*!< in: data field length or UNIV_SQL_NULL */
+cmp_data(
+ ulint mtype,
+ ulint prtype,
+ const byte* data1,
+ ulint len1,
+ const byte* data2,
+ ulint len2)
{
- ulint data1_byte;
- ulint data2_byte;
- ulint cur_bytes;
-
if (len1 == UNIV_SQL_NULL || len2 == UNIV_SQL_NULL) {
-
if (len1 == len2) {
-
return(0);
}
- if (len1 == UNIV_SQL_NULL) {
- /* We define the SQL null to be the smallest possible
- value of a field in the alphabetical order */
-
- return(-1);
- }
-
- return(1);
+ /* We define the SQL null to be the smallest possible
+ value of a field. */
+ return(len1 == UNIV_SQL_NULL ? -1 : 1);
}
- if (mtype >= DATA_FLOAT
- || (mtype == DATA_BLOB
- && 0 == (prtype & DATA_BINARY_TYPE)
- && dtype_get_charset_coll(prtype)
- != DATA_MYSQL_LATIN1_SWEDISH_CHARSET_COLL)) {
+ ulint pad;
+ switch (mtype) {
+ case DATA_FIXBINARY:
+ case DATA_BINARY:
+ if (dtype_get_charset_coll(prtype)
+ != DATA_MYSQL_BINARY_CHARSET_COLL) {
+ pad = 0x20;
+ break;
+ }
+ /* fall through */
+ case DATA_INT:
+ case DATA_SYS_CHILD:
+ case DATA_SYS:
+ pad = ULINT_UNDEFINED;
+ break;
+ case DATA_GEOMETRY:
+ ut_ad(prtype & DATA_BINARY_TYPE);
+ pad = ULINT_UNDEFINED;
+ if (prtype & DATA_GIS_MBR) {
+ return(cmp_whole_field(mtype, prtype,
+ data1, (unsigned) len1,
+ data2, (unsigned) len2));
+ }
+ break;
+ case DATA_BLOB:
+ if (prtype & DATA_BINARY_TYPE) {
+ pad = ULINT_UNDEFINED;
+ break;
+ }
+ /* fall through */
+ default:
return(cmp_whole_field(mtype, prtype,
data1, (unsigned) len1,
data2, (unsigned) len2));
}
- /* Compare then the fields */
-
- cur_bytes = 0;
+ ulint len;
+ int cmp;
- for (;;) {
- if (len1 <= cur_bytes) {
- if (len2 <= cur_bytes) {
+ if (len1 < len2) {
+ len = len1;
+ len2 -= len;
+ len1 = 0;
+ } else {
+ len = len2;
+ len1 -= len;
+ len2 = 0;
+ }
- return(0);
+ if (len) {
+#if defined __i386__ || defined __x86_64__ || defined _M_IX86 || defined _M_X64
+ /* Compare the first bytes with a loop to avoid the call
+ overhead of memcmp(). On x86 and x86-64, the GCC built-in
+ (repz cmpsb) seems to be very slow, so we will be calling the
+ libc version. http://gcc.gnu.org/bugzilla/show_bug.cgi?id=43052
+ tracks the slowness of the GCC built-in memcmp().
+
+ We compare up to the first 4..7 bytes with the loop.
+ The (len & 3) is used for "normalizing" or
+ "quantizing" the len parameter for the memcmp() call,
+ in case the whole prefix is equal. On x86 and x86-64,
+ the GNU libc memcmp() of equal strings is faster with
+ len=4 than with len=3.
+
+ On other architectures than the IA32 or AMD64, there could
+ be a built-in memcmp() that is faster than the loop.
+ We only use the loop where we know that it can improve
+ the performance. */
+ for (ulint i = 4 + (len & 3); i > 0; i--) {
+ cmp = int(*data1++) - int(*data2++);
+ if (cmp) {
+ return(cmp);
}
- data1_byte = dtype_get_pad_char(mtype, prtype);
-
- if (data1_byte == ULINT_UNDEFINED) {
-
- return(-1);
+ if (!--len) {
+ break;
}
- } else {
- data1_byte = *data1;
}
- if (len2 <= cur_bytes) {
- data2_byte = dtype_get_pad_char(mtype, prtype);
-
- if (data2_byte == ULINT_UNDEFINED) {
+ if (len) {
+#endif /* IA32 or AMD64 */
+ cmp = memcmp(data1, data2, len);
- return(1);
+ if (cmp) {
+ return(cmp);
}
- } else {
- data2_byte = *data2;
- }
- if (data1_byte == data2_byte) {
- /* If the bytes are equal, they will remain such even
- after the collation transformation below */
-
- goto next_byte;
+ data1 += len;
+ data2 += len;
+#if defined __i386__ || defined __x86_64__ || defined _M_IX86 || defined _M_X64
}
+#endif /* IA32 or AMD64 */
+ }
- if (mtype <= DATA_CHAR
- || (mtype == DATA_BLOB
- && 0 == (prtype & DATA_BINARY_TYPE))) {
+ cmp = (int) (len1 - len2);
- data1_byte = cmp_collate(data1_byte);
- data2_byte = cmp_collate(data2_byte);
- }
+ if (!cmp || pad == ULINT_UNDEFINED) {
+ return(cmp);
+ }
- if (data1_byte > data2_byte) {
+ len = 0;
- return(1);
- } else if (data1_byte < data2_byte) {
+ if (len1) {
+ do {
+ cmp = static_cast<int>(
+ mach_read_from_1(&data1[len++]) - pad);
+ } while (cmp == 0 && len < len1);
+ } else {
+ ut_ad(len2 > 0);
- return(-1);
- }
-next_byte:
- /* Next byte */
- cur_bytes++;
- data1++;
- data2++;
+ do {
+ cmp = static_cast<int>(
+ pad - mach_read_from_1(&data2[len++]));
+ } while (cmp == 0 && len < len2);
}
- return(0); /* Not reached */
+ return(cmp);
}
-/*****************************************************************
-This function is used to compare two data fields for which we know the
-data type to be VARCHAR */
-
+/** Compare a GIS data tuple to a physical record.
+@param[in] dtuple data tuple
+@param[in] rec R-tree record
+@param[in] offsets rec_get_offsets(rec)
+@param[in] mode compare mode
+@retval negative if dtuple is less than rec */
int
-cmp_data_data_slow_varchar(
-/*=======================*/
- /* out: 1, 0, -1, if lhs is greater, equal,
- less than rhs, respectively */
- const byte* lhs, /* in: data field (== a pointer to a memory
- buffer) */
- ulint lhs_len,/* in: data field length or UNIV_SQL_NULL */
- const byte* rhs, /* in: data field (== a pointer to a memory
- buffer) */
- ulint rhs_len)/* in: data field length or UNIV_SQL_NULL */
+cmp_dtuple_rec_with_gis(
+/*====================*/
+ const dtuple_t* dtuple, /*!< in: data tuple */
+ const rec_t* rec, /*!< in: physical record which differs from
+ dtuple in some of the common fields, or which
+ has an equal number or more fields than
+ dtuple */
+ const offset_t* offsets,/*!< in: array returned by rec_get_offsets() */
+ page_cur_mode_t mode) /*!< in: compare mode */
{
- ulint i;
-
- ut_a(rhs_len != UNIV_SQL_NULL);
-
- if (lhs_len == UNIV_SQL_NULL) {
-
- /* We define the SQL null to be the smallest possible
- value of a field in the alphabetical order */
-
- return(-1);
- }
-
- /* Compare the values.*/
-
- for (i = 0; i < lhs_len && i < rhs_len; ++i, ++rhs, ++lhs) {
- ulint lhs_byte = *lhs;
- ulint rhs_byte = *rhs;
-
- if (lhs_byte != rhs_byte) {
- /* If the bytes are equal, they will remain such even
- after the collation transformation below */
-
- lhs_byte = cmp_collate(lhs_byte);
- rhs_byte = cmp_collate(rhs_byte);
-
- if (lhs_byte > rhs_byte) {
+ const dfield_t* dtuple_field; /* current field in logical record */
+ ulint dtuple_f_len; /* the length of the current field
+ in the logical record */
+ ulint rec_f_len; /* length of current field in rec */
+ const byte* rec_b_ptr; /* pointer to the current byte in
+ rec field */
+ int ret = 0; /* return value */
- return(1);
- } else if (lhs_byte < rhs_byte) {
+ dtuple_field = dtuple_get_nth_field(dtuple, 0);
+ dtuple_f_len = dfield_get_len(dtuple_field);
- return(-1);
- }
- }
- }
+ rec_b_ptr = rec_get_nth_field(rec, offsets, 0, &rec_f_len);
+ ret = cmp_gis_field(
+ mode, static_cast<const byte*>(dfield_get_data(dtuple_field)),
+ (unsigned) dtuple_f_len, rec_b_ptr, (unsigned) rec_f_len);
- return((i == lhs_len && i == rhs_len) ? 0 :
- static_cast<int>(rhs_len - lhs_len));
+ return(ret);
}
-/*****************************************************************
-This function is used to compare two data fields for which we know the
-data type. The comparison is done for the LIKE operator.*/
-
+/** Compare a GIS data tuple to a physical record in rtree non-leaf node.
+We need to check the page number field, since we don't store pk field in
+rtree non-leaf node.
+@param[in] dtuple data tuple
+@param[in] rec R-tree record
+@param[in] offsets rec_get_offsets(rec)
+@retval negative if dtuple is less than rec */
int
-cmp_data_data_slow_like_prefix(
-/*===========================*/
- /* out: 1, 0, -1, if lhs is greater, equal,
- less than rhs, respectively */
- const byte* lhs, /* in: data field (== a pointer to a memory
- buffer) */
- ulint len1, /* in: data field length or UNIV_SQL_NULL */
- const byte* rhs, /* in: data field (== a pointer to a memory
- buffer) */
- ulint len2) /* in: data field length or UNIV_SQL_NULL */
+cmp_dtuple_rec_with_gis_internal(
+ const dtuple_t* dtuple,
+ const rec_t* rec,
+ const offset_t* offsets)
{
- ulint i;
-
- ut_a(len2 != UNIV_SQL_NULL);
-
- if (len1 == UNIV_SQL_NULL) {
+ const dfield_t* dtuple_field; /* current field in logical record */
+ ulint dtuple_f_len; /* the length of the current field
+ in the logical record */
+ ulint rec_f_len; /* length of current field in rec */
+ const byte* rec_b_ptr; /* pointer to the current byte in
+ rec field */
+ int ret = 0; /* return value */
- /* We define the SQL null to be the smallest possible
- value of a field in the alphabetical order */
+ dtuple_field = dtuple_get_nth_field(dtuple, 0);
+ dtuple_f_len = dfield_get_len(dtuple_field);
- return(-1);
+ rec_b_ptr = rec_get_nth_field(rec, offsets, 0, &rec_f_len);
+ ret = cmp_gis_field(
+ PAGE_CUR_WITHIN,
+ static_cast<const byte*>(dfield_get_data(dtuple_field)),
+ (unsigned) dtuple_f_len, rec_b_ptr, (unsigned) rec_f_len);
+ if (ret != 0) {
+ return(ret);
}
- /* Compare the values.*/
-
- for (i = 0; i < len1 && i < len2; ++i, ++rhs, ++lhs) {
- ulint lhs_byte = *lhs;
- ulint rhs_byte = *rhs;
-
- if (lhs_byte != rhs_byte) {
- /* If the bytes are equal, they will remain such even
- after the collation transformation below */
-
- lhs_byte = cmp_collate(lhs_byte);
- rhs_byte = cmp_collate(rhs_byte);
-
- if (lhs_byte > rhs_byte) {
+ dtuple_field = dtuple_get_nth_field(dtuple, 1);
+ dtuple_f_len = dfield_get_len(dtuple_field);
+ rec_b_ptr = rec_get_nth_field(rec, offsets, 1, &rec_f_len);
- return(1);
- } else if (lhs_byte < rhs_byte) {
-
- return(-1);
- }
- }
- }
-
- return(i == len2 ? 0 : 1);
+ return(cmp_data(dtuple_field->type.mtype,
+ dtuple_field->type.prtype,
+ static_cast<const byte*>(dtuple_field->data),
+ dtuple_f_len,
+ rec_b_ptr,
+ rec_f_len));
}
-/*****************************************************************
-This function is used to compare two data fields for which we know the
-data type. The comparison is done for the LIKE operator.*/
-
+/** Compare two data fields.
+@param[in] mtype main type
+@param[in] prtype precise type
+@param[in] data1 data field
+@param[in] len1 length of data1 in bytes, or UNIV_SQL_NULL
+@param[in] data2 data field
+@param[in] len2 length of data2 in bytes, or UNIV_SQL_NULL
+@return the comparison result of data1 and data2
+@retval 0 if data1 is equal to data2
+@retval negative if data1 is less than data2
+@retval positive if data1 is greater than data2 */
int
-cmp_data_data_slow_like_suffix(
-/*===========================*/
- /* out: 1, 0, -1, if data1 is greater, equal,
- less than data2, respectively */
- /* in: data field (== a pointer to a
- memory buffer) */
- const byte* data1 UNIV_UNUSED,
- /* in: data field length or UNIV_SQL_NULL */
- ulint len1 UNIV_UNUSED,
- /* in: data field (== a pointer to a memory
- buffer) */
- const byte* data2 UNIV_UNUSED,
- /* in: data field length or UNIV_SQL_NULL */
- ulint len2 UNIV_UNUSED)
-
+cmp_data_data(
+ ulint mtype,
+ ulint prtype,
+ const byte* data1,
+ ulint len1,
+ const byte* data2,
+ ulint len2)
{
- ut_error; // FIXME:
- return(1);
+ return(cmp_data(mtype, prtype, data1, len1, data2, len2));
}
-/*****************************************************************
-This function is used to compare two data fields for which we know the
-data type. The comparison is done for the LIKE operator.*/
-
-int
-cmp_data_data_slow_like_substr(
-/*===========================*/
- /* out: 1, 0, -1, if data1 is greater, equal,
- less than data2, respectively */
- /* in: data field (== a pointer to a
- memory buffer) */
- const byte* data1 UNIV_UNUSED,
- /* in: data field length or UNIV_SQL_NULL */
- ulint len1 UNIV_UNUSED,
- /* in: data field (== a pointer to a memory
- buffer) */
- const byte* data2 UNIV_UNUSED,
- /* in: data field length or UNIV_SQL_NULL */
- ulint len2 UNIV_UNUSED)
-{
- ut_error; // FIXME:
- return(1);
-}
-/*************************************************************//**
-This function is used to compare a data tuple to a physical record.
-Only dtuple->n_fields_cmp first fields are taken into account for
-the data tuple! If we denote by n = n_fields_cmp, then rec must
-have either m >= n fields, or it must differ from dtuple in some of
-the m fields rec has. If rec has an externally stored field we do not
-compare it but return with value 0 if such a comparison should be
-made.
-@return 1, 0, -1, if dtuple is greater, equal, less than rec,
-respectively, when only the common first fields are compared, or until
-the first externally stored field in rec */
-UNIV_INTERN
+/** Compare a data tuple to a physical record.
+@param[in] dtuple data tuple
+@param[in] rec B-tree record
+@param[in] offsets rec_get_offsets(rec)
+@param[in] n_cmp number of fields to compare
+@param[in,out] matched_fields number of completely matched fields
+@return the comparison result of dtuple and rec
+@retval 0 if dtuple is equal to rec
+@retval negative if dtuple is less than rec
+@retval positive if dtuple is greater than rec */
int
cmp_dtuple_rec_with_match_low(
-/*==========================*/
- const dtuple_t* dtuple, /*!< in: data tuple */
- const rec_t* rec, /*!< in: physical record which differs from
- dtuple in some of the common fields, or which
- has an equal number or more fields than
- dtuple */
- const ulint* offsets,/*!< in: array returned by rec_get_offsets() */
- ulint n_cmp, /*!< in: number of fields to compare */
- ulint* matched_fields, /*!< in/out: number of already completely
- matched fields; when function returns,
- contains the value for current comparison */
- ulint* matched_bytes) /*!< in/out: number of already matched
- bytes within the first field not completely
- matched; when function returns, contains the
- value for current comparison */
+ const dtuple_t* dtuple,
+ const rec_t* rec,
+ const offset_t* offsets,
+ ulint n_cmp,
+ ulint* matched_fields)
{
- const dfield_t* dtuple_field; /* current field in logical record */
- ulint dtuple_f_len; /* the length of the current field
- in the logical record */
- const byte* dtuple_b_ptr; /* pointer to the current byte in
- logical field data */
- ulint dtuple_byte; /* value of current byte to be compared
- in dtuple*/
- ulint rec_f_len; /* length of current field in rec */
- const byte* rec_b_ptr; /* pointer to the current byte in
- rec field */
- ulint rec_byte; /* value of current byte to be
- compared in rec */
ulint cur_field; /* current field number */
- ulint cur_bytes; /* number of already matched bytes
- in current field */
int ret; /* return value */
- ut_ad(dtuple != NULL);
- ut_ad(rec != NULL);
- ut_ad(matched_fields != NULL);
- ut_ad(matched_bytes != NULL);
ut_ad(dtuple_check_typed(dtuple));
ut_ad(rec_offs_validate(rec, NULL, offsets));
cur_field = *matched_fields;
- cur_bytes = *matched_bytes;
ut_ad(n_cmp > 0);
ut_ad(n_cmp <= dtuple_get_n_fields(dtuple));
ut_ad(cur_field <= n_cmp);
ut_ad(cur_field <= rec_offs_n_fields(offsets));
- if (cur_bytes == 0 && cur_field == 0) {
+ if (cur_field == 0) {
ulint rec_info = rec_get_info_bits(rec,
rec_offs_comp(offsets));
ulint tup_info = dtuple_get_info_bits(dtuple);
@@ -688,43 +683,149 @@ cmp_dtuple_rec_with_match_low(
}
}
- /* Match fields in a loop; stop if we run out of fields in dtuple
- or find an externally stored field */
-
- while (cur_field < n_cmp) {
+ /* Match fields in a loop */
- ulint mtype;
- ulint prtype;
+ for (; cur_field < n_cmp; cur_field++) {
+ const byte* rec_b_ptr;
+ const dfield_t* dtuple_field
+ = dtuple_get_nth_field(dtuple, cur_field);
+ const byte* dtuple_b_ptr
+ = static_cast<const byte*>(
+ dfield_get_data(dtuple_field));
+ const dtype_t* type
+ = dfield_get_type(dtuple_field);
+ ulint dtuple_f_len
+ = dfield_get_len(dtuple_field);
+ ulint rec_f_len;
+
+ /* We should never compare against an externally
+ stored field. Only clustered index records can
+ contain externally stored fields, and the first fields
+ (primary key fields) should already differ. */
+ ut_ad(!rec_offs_nth_extern(offsets, cur_field));
+
+ rec_b_ptr = rec_get_nth_field(rec, offsets, cur_field,
+ &rec_f_len);
+
+ ut_ad(!dfield_is_ext(dtuple_field));
+
+ ret = cmp_data(type->mtype, type->prtype,
+ dtuple_b_ptr, dtuple_f_len,
+ rec_b_ptr, rec_f_len);
+ if (ret) {
+ goto order_resolved;
+ }
+ }
- dtuple_field = dtuple_get_nth_field(dtuple, cur_field);
- {
- const dtype_t* type
- = dfield_get_type(dtuple_field);
+ ret = 0; /* If we ran out of fields, dtuple was equal to rec
+ up to the common fields */
+order_resolved:
+ *matched_fields = cur_field;
+ return(ret);
+}
- mtype = type->mtype;
- prtype = type->prtype;
+/** Get the pad character code point for a type.
+@param[in] type
+@return pad character code point
+@retval ULINT_UNDEFINED if no padding is specified */
+UNIV_INLINE
+ulint
+cmp_get_pad_char(
+ const dtype_t* type)
+{
+ switch (type->mtype) {
+ case DATA_FIXBINARY:
+ case DATA_BINARY:
+ if (dtype_get_charset_coll(type->prtype)
+ == DATA_MYSQL_BINARY_CHARSET_COLL) {
+ /* Starting from 5.0.18, do not pad
+ VARBINARY or BINARY columns. */
+ return(ULINT_UNDEFINED);
+ }
+ /* Fall through */
+ case DATA_CHAR:
+ case DATA_VARCHAR:
+ case DATA_MYSQL:
+ case DATA_VARMYSQL:
+ /* Space is the padding character for all char and binary
+ strings, and starting from 5.0.3, also for TEXT strings. */
+ return(0x20);
+ case DATA_GEOMETRY:
+ /* DATA_GEOMETRY is binary data, not ASCII-based. */
+ return(ULINT_UNDEFINED);
+ case DATA_BLOB:
+ if (!(type->prtype & DATA_BINARY_TYPE)) {
+ return(0x20);
}
+ /* Fall through */
+ default:
+ /* No padding specified */
+ return(ULINT_UNDEFINED);
+ }
+}
- dtuple_f_len = dfield_get_len(dtuple_field);
+/** Compare a data tuple to a physical record.
+@param[in] dtuple data tuple
+@param[in] rec B-tree or R-tree index record
+@param[in] index index tree
+@param[in] offsets rec_get_offsets(rec)
+@param[in,out] matched_fields number of completely matched fields
+@param[in,out] matched_bytes number of matched bytes in the first
+field that is not matched
+@return the comparison result of dtuple and rec
+@retval 0 if dtuple is equal to rec
+@retval negative if dtuple is less than rec
+@retval positive if dtuple is greater than rec */
+int
+cmp_dtuple_rec_with_match_bytes(
+ const dtuple_t* dtuple,
+ const rec_t* rec,
+ const dict_index_t* index,
+ const offset_t* offsets,
+ ulint* matched_fields,
+ ulint* matched_bytes)
+{
+ ulint n_cmp = dtuple_get_n_fields_cmp(dtuple);
+ ulint cur_field; /* current field number */
+ ulint cur_bytes;
+ int ret; /* return value */
- rec_b_ptr = rec_get_nth_field(rec, offsets,
- cur_field, &rec_f_len);
+ ut_ad(dtuple_check_typed(dtuple));
+ ut_ad(rec_offs_validate(rec, index, offsets));
+ ut_ad(!(REC_INFO_MIN_REC_FLAG
+ & dtuple_get_info_bits(dtuple)));
+ ut_ad(!(REC_INFO_MIN_REC_FLAG
+ & rec_get_info_bits(rec, rec_offs_comp(offsets))));
- /* If we have matched yet 0 bytes, it may be that one or
- both the fields are SQL null, or the record or dtuple may be
- the predefined minimum record, or the field is externally
- stored */
+ cur_field = *matched_fields;
+ cur_bytes = *matched_bytes;
- if (UNIV_LIKELY(cur_bytes == 0)) {
- if (rec_offs_nth_extern(offsets, cur_field)) {
- /* We do not compare to an externally
- stored field */
+ ut_ad(n_cmp <= dtuple_get_n_fields(dtuple));
+ ut_ad(cur_field <= n_cmp);
+ ut_ad(cur_field + (cur_bytes > 0) <= rec_offs_n_fields(offsets));
- ret = 0;
+ /* Match fields in a loop; stop if we run out of fields in dtuple
+ or find an externally stored field */
- goto order_resolved;
- }
+ while (cur_field < n_cmp) {
+ const dfield_t* dfield = dtuple_get_nth_field(
+ dtuple, cur_field);
+ const dtype_t* type = dfield_get_type(dfield);
+ ulint dtuple_f_len = dfield_get_len(dfield);
+ const byte* dtuple_b_ptr;
+ const byte* rec_b_ptr;
+ ulint rec_f_len;
+
+ dtuple_b_ptr = static_cast<const byte*>(
+ dfield_get_data(dfield));
+ rec_b_ptr = rec_get_nth_field(rec, offsets,
+ cur_field, &rec_f_len);
+ ut_ad(!rec_offs_nth_extern(offsets, cur_field));
+ /* If we have matched yet 0 bytes, it may be that one or
+ both the fields are SQL null, or the record or dtuple may be
+ the predefined minimum record. */
+ if (cur_bytes == 0) {
if (dtuple_f_len == UNIV_SQL_NULL) {
if (rec_f_len == UNIV_SQL_NULL) {
@@ -743,97 +844,74 @@ cmp_dtuple_rec_with_match_low(
}
}
- if (mtype >= DATA_FLOAT
- || (mtype == DATA_BLOB
- && 0 == (prtype & DATA_BINARY_TYPE)
- && dtype_get_charset_coll(prtype)
- != DATA_MYSQL_LATIN1_SWEDISH_CHARSET_COLL)) {
-
- ret = cmp_whole_field(
- mtype, prtype,
- static_cast<const byte*>(
- dfield_get_data(dtuple_field)),
- (unsigned) dtuple_f_len,
- rec_b_ptr, (unsigned) rec_f_len);
-
- if (ret != 0) {
- cur_bytes = 0;
+ switch (type->mtype) {
+ case DATA_FIXBINARY:
+ case DATA_BINARY:
+ case DATA_INT:
+ case DATA_SYS_CHILD:
+ case DATA_SYS:
+ break;
+ case DATA_BLOB:
+ if (type->prtype & DATA_BINARY_TYPE) {
+ break;
+ }
+ /* fall through */
+ default:
+ ret = cmp_data(type->mtype, type->prtype,
+ dtuple_b_ptr, dtuple_f_len,
+ rec_b_ptr, rec_f_len);
- goto order_resolved;
- } else {
+ if (!ret) {
goto next_field;
}
+
+ cur_bytes = 0;
+ goto order_resolved;
}
/* Set the pointers at the current byte */
- rec_b_ptr = rec_b_ptr + cur_bytes;
- dtuple_b_ptr = (byte*) dfield_get_data(dtuple_field)
- + cur_bytes;
+ rec_b_ptr += cur_bytes;
+ dtuple_b_ptr += cur_bytes;
/* Compare then the fields */
- for (;;) {
- if (UNIV_UNLIKELY(rec_f_len <= cur_bytes)) {
+ for (const ulint pad = cmp_get_pad_char(type);;
+ cur_bytes++) {
+ ulint rec_byte = pad;
+ ulint dtuple_byte = pad;
+
+ if (rec_f_len <= cur_bytes) {
if (dtuple_f_len <= cur_bytes) {
goto next_field;
}
- rec_byte = dtype_get_pad_char(mtype, prtype);
-
if (rec_byte == ULINT_UNDEFINED) {
ret = 1;
goto order_resolved;
}
} else {
- rec_byte = *rec_b_ptr;
+ rec_byte = *rec_b_ptr++;
}
- if (UNIV_UNLIKELY(dtuple_f_len <= cur_bytes)) {
- dtuple_byte = dtype_get_pad_char(mtype,
- prtype);
-
+ if (dtuple_f_len <= cur_bytes) {
if (dtuple_byte == ULINT_UNDEFINED) {
ret = -1;
goto order_resolved;
}
} else {
- dtuple_byte = *dtuple_b_ptr;
+ dtuple_byte = *dtuple_b_ptr++;
}
- if (dtuple_byte == rec_byte) {
- /* If the bytes are equal, they will
- remain such even after the collation
- transformation below */
-
- goto next_byte;
- }
-
- if (mtype <= DATA_CHAR
- || (mtype == DATA_BLOB
- && !(prtype & DATA_BINARY_TYPE))) {
-
- rec_byte = cmp_collate(rec_byte);
- dtuple_byte = cmp_collate(dtuple_byte);
- }
-
- ret = (int) (dtuple_byte - rec_byte);
- if (UNIV_LIKELY(ret)) {
- if (ret < 0) {
- ret = -1;
- goto order_resolved;
- } else {
- ret = 1;
- goto order_resolved;
- }
+ if (dtuple_byte < rec_byte) {
+ ret = -1;
+ goto order_resolved;
+ } else if (dtuple_byte > rec_byte) {
+ ret = 1;
+ goto order_resolved;
}
-next_byte:
- /* Next byte */
- cur_bytes++;
- rec_b_ptr++;
- dtuple_b_ptr++;
}
next_field:
@@ -846,82 +924,65 @@ next_field:
ret = 0; /* If we ran out of fields, dtuple was equal to rec
up to the common fields */
order_resolved:
- ut_ad((ret >= - 1) && (ret <= 1));
- ut_ad(ret == cmp_debug_dtuple_rec_with_match(dtuple, rec, offsets,
- n_cmp, matched_fields));
- ut_ad(*matched_fields == cur_field); /* In the debug version, the
- above cmp_debug_... sets
- *matched_fields to a value */
*matched_fields = cur_field;
*matched_bytes = cur_bytes;
return(ret);
}
-/**************************************************************//**
-Compares a data tuple to a physical record.
+/** Compare a data tuple to a physical record.
@see cmp_dtuple_rec_with_match
-@return 1, 0, -1, if dtuple is greater, equal, less than rec, respectively */
-UNIV_INTERN
+@param[in] dtuple data tuple
+@param[in] rec B-tree record
+@param[in] offsets rec_get_offsets(rec); may be NULL
+for ROW_FORMAT=REDUNDANT
+@return the comparison result of dtuple and rec
+@retval 0 if dtuple is equal to rec
+@retval negative if dtuple is less than rec
+@retval positive if dtuple is greater than rec */
int
cmp_dtuple_rec(
-/*===========*/
- const dtuple_t* dtuple, /*!< in: data tuple */
- const rec_t* rec, /*!< in: physical record */
- const ulint* offsets)/*!< in: array returned by rec_get_offsets() */
+ const dtuple_t* dtuple,
+ const rec_t* rec,
+ const offset_t* offsets)
{
ulint matched_fields = 0;
- ulint matched_bytes = 0;
ut_ad(rec_offs_validate(rec, NULL, offsets));
return(cmp_dtuple_rec_with_match(dtuple, rec, offsets,
- &matched_fields, &matched_bytes));
+ &matched_fields));
}
/**************************************************************//**
Checks if a dtuple is a prefix of a record. The last field in dtuple
is allowed to be a prefix of the corresponding field in the record.
-@return TRUE if prefix */
-UNIV_INTERN
+@return TRUE if prefix */
ibool
cmp_dtuple_is_prefix_of_rec(
/*========================*/
const dtuple_t* dtuple, /*!< in: data tuple */
const rec_t* rec, /*!< in: physical record */
- const ulint* offsets)/*!< in: array returned by rec_get_offsets() */
+ const offset_t* offsets)/*!< in: array returned by rec_get_offsets() */
{
ulint n_fields;
ulint matched_fields = 0;
- ulint matched_bytes = 0;
ut_ad(rec_offs_validate(rec, NULL, offsets));
n_fields = dtuple_get_n_fields(dtuple);
if (n_fields > rec_offs_n_fields(offsets)) {
-
+ ut_ad(0);
return(FALSE);
}
- cmp_dtuple_rec_with_match(dtuple, rec, offsets,
- &matched_fields, &matched_bytes);
- if (matched_fields == n_fields) {
-
- return(TRUE);
- }
-
- if (matched_fields == n_fields - 1
- && matched_bytes == dfield_get_len(
- dtuple_get_nth_field(dtuple, n_fields - 1))) {
- return(TRUE);
- }
-
- return(FALSE);
+ cmp_dtuple_rec_with_match(dtuple, rec, offsets, &matched_fields);
+ return(matched_fields == n_fields);
}
/*************************************************************//**
Compare two physical record fields.
-@retval 1 if rec1 field is greater than rec2
-@retval -1 if rec1 field is less than rec2
+@retval positive if rec1 field is greater than rec2
+@retval negative if rec1 field is less than rec2
@retval 0 if rec1 field equals to rec2 */
static MY_ATTRIBUTE((nonnull, warn_unused_result))
int
@@ -929,8 +990,8 @@ cmp_rec_rec_simple_field(
/*=====================*/
const rec_t* rec1, /*!< in: physical record */
const rec_t* rec2, /*!< in: physical record */
- const ulint* offsets1,/*!< in: rec_get_offsets(rec1, ...) */
- const ulint* offsets2,/*!< in: rec_get_offsets(rec2, ...) */
+ const offset_t* offsets1,/*!< in: rec_get_offsets(rec1, ...) */
+ const offset_t* offsets2,/*!< in: rec_get_offsets(rec2, ...) */
const dict_index_t* index, /*!< in: data dictionary index */
ulint n) /*!< in: field to compare */
{
@@ -946,92 +1007,22 @@ cmp_rec_rec_simple_field(
rec1_b_ptr = rec_get_nth_field(rec1, offsets1, n, &rec1_f_len);
rec2_b_ptr = rec_get_nth_field(rec2, offsets2, n, &rec2_f_len);
- if (rec1_f_len == UNIV_SQL_NULL || rec2_f_len == UNIV_SQL_NULL) {
- if (rec1_f_len == rec2_f_len) {
- return(0);
- }
- /* We define the SQL null to be the smallest possible
- value of a field in the alphabetical order */
- return(rec1_f_len == UNIV_SQL_NULL ? -1 : 1);
- }
-
- if (col->mtype >= DATA_FLOAT
- || (col->mtype == DATA_BLOB
- && !(col->prtype & DATA_BINARY_TYPE)
- && dtype_get_charset_coll(col->prtype)
- != DATA_MYSQL_LATIN1_SWEDISH_CHARSET_COLL)) {
- return(cmp_whole_field(col->mtype, col->prtype,
- rec1_b_ptr, (unsigned) rec1_f_len,
- rec2_b_ptr, (unsigned) rec2_f_len));
- }
-
- /* Compare the fields */
- for (ulint cur_bytes = 0;; cur_bytes++, rec1_b_ptr++, rec2_b_ptr++) {
- ulint rec1_byte;
- ulint rec2_byte;
-
- if (rec2_f_len <= cur_bytes) {
- if (rec1_f_len <= cur_bytes) {
- return(0);
- }
-
- rec2_byte = dtype_get_pad_char(
- col->mtype, col->prtype);
-
- if (rec2_byte == ULINT_UNDEFINED) {
- return(1);
- }
- } else {
- rec2_byte = *rec2_b_ptr;
- }
-
- if (rec1_f_len <= cur_bytes) {
- rec1_byte = dtype_get_pad_char(
- col->mtype, col->prtype);
-
- if (rec1_byte == ULINT_UNDEFINED) {
- return(-1);
- }
- } else {
- rec1_byte = *rec1_b_ptr;
- }
-
- if (rec1_byte == rec2_byte) {
- /* If the bytes are equal, they will remain such
- even after the collation transformation below */
- continue;
- }
-
- if (col->mtype <= DATA_CHAR
- || (col->mtype == DATA_BLOB
- && !(col->prtype & DATA_BINARY_TYPE))) {
-
- rec1_byte = cmp_collate(rec1_byte);
- rec2_byte = cmp_collate(rec2_byte);
- }
-
- if (rec1_byte < rec2_byte) {
- return(-1);
- } else if (rec1_byte > rec2_byte) {
- return(1);
- }
- }
+ return(cmp_data(col->mtype, col->prtype,
+ rec1_b_ptr, rec1_f_len, rec2_b_ptr, rec2_f_len));
}
-/*************************************************************//**
-Compare two physical records that contain the same number of columns,
+/** Compare two physical records that contain the same number of columns,
none of which are stored externally.
-@retval 1 if rec1 (including non-ordering columns) is greater than rec2
-@retval -1 if rec1 (including non-ordering columns) is less than rec2
+@retval positive if rec1 (including non-ordering columns) is greater than rec2
+@retval negative if rec1 (including non-ordering columns) is less than rec2
@retval 0 if rec1 is a duplicate of rec2 */
-UNIV_INTERN
int
cmp_rec_rec_simple(
/*===============*/
const rec_t* rec1, /*!< in: physical record */
const rec_t* rec2, /*!< in: physical record */
- const ulint* offsets1,/*!< in: rec_get_offsets(rec1, ...) */
- const ulint* offsets2,/*!< in: rec_get_offsets(rec2, ...) */
+ const offset_t* offsets1,/*!< in: rec_get_offsets(rec1, ...) */
+ const offset_t* offsets2,/*!< in: rec_get_offsets(rec2, ...) */
const dict_index_t* index, /*!< in: data dictionary index */
struct TABLE* table) /*!< in: MySQL table, for reporting
duplicate key value if applicable,
@@ -1097,50 +1088,40 @@ cmp_rec_rec_simple(
return(0);
}
-/*************************************************************//**
-This function is used to compare two physical records. Only the common
-first fields are compared, and if an externally stored field is
-encountered, then 0 is returned.
-@return 1, 0, -1 if rec1 is greater, equal, less, respectively */
-UNIV_INTERN
+/** Compare two B-tree or R-tree records.
+Only the common first fields are compared, and externally stored field
+are treated as equal.
+@param[in] rec1 record (possibly not on an index page)
+@param[in] rec2 B-tree or R-tree record in an index page
+@param[in] offsets1 rec_get_offsets(rec1, index)
+@param[in] offsets2 rec_get_offsets(rec2, index)
+@param[in] nulls_unequal true if this is for index cardinality
+ statistics estimation with
+ innodb_stats_method=nulls_unequal
+ or innodb_stats_method=nulls_ignored
+@param[out] matched_fields number of completely matched fields
+ within the first field not completely matched
+@retval 0 if rec1 is equal to rec2
+@retval negative if rec1 is less than rec2
+@retval positive if rec1 is greater than rec2 */
int
-cmp_rec_rec_with_match(
-/*===================*/
- const rec_t* rec1, /*!< in: physical record */
- const rec_t* rec2, /*!< in: physical record */
- const ulint* offsets1,/*!< in: rec_get_offsets(rec1, index) */
- const ulint* offsets2,/*!< in: rec_get_offsets(rec2, index) */
- dict_index_t* index, /*!< in: data dictionary index */
- ibool nulls_unequal,
- /* in: TRUE if this is for index statistics
- cardinality estimation, and innodb_stats_method
- is "nulls_unequal" or "nulls_ignored" */
- ulint* matched_fields, /*!< in/out: number of already completely
- matched fields; when the function returns,
- contains the value the for current
- comparison */
- ulint* matched_bytes) /*!< in/out: number of already matched
- bytes within the first field not completely
- matched; when the function returns, contains
- the value for the current comparison */
+cmp_rec_rec(
+ const rec_t* rec1,
+ const rec_t* rec2,
+ const offset_t* offsets1,
+ const offset_t* offsets2,
+ const dict_index_t* index,
+ bool nulls_unequal,
+ ulint* matched_fields)
{
- ulint rec1_n_fields; /* the number of fields in rec */
ulint rec1_f_len; /* length of current field in rec */
const byte* rec1_b_ptr; /* pointer to the current byte
in rec field */
- ulint rec1_byte; /* value of current byte to be
- compared in rec */
- ulint rec2_n_fields; /* the number of fields in rec */
ulint rec2_f_len; /* length of current field in rec */
const byte* rec2_b_ptr; /* pointer to the current byte
in rec field */
- ulint rec2_byte; /* value of current byte to be
- compared in rec */
- ulint cur_field; /* current field number */
- ulint cur_bytes; /* number of already matched
- bytes in current field */
+ ulint cur_field = 0; /* current field number */
int ret = 0; /* return value */
- ulint comp;
ut_ad(rec1 != NULL);
ut_ad(rec2 != NULL);
@@ -1148,318 +1129,125 @@ cmp_rec_rec_with_match(
ut_ad(rec_offs_validate(rec1, index, offsets1));
ut_ad(rec_offs_validate(rec2, index, offsets2));
ut_ad(rec_offs_comp(offsets1) == rec_offs_comp(offsets2));
+ ut_ad(fil_page_index_page_check(page_align(rec2)));
+ ut_ad(!!dict_index_is_spatial(index)
+ == (fil_page_get_type(page_align(rec2)) == FIL_PAGE_RTREE));
+
+ ulint comp = rec_offs_comp(offsets1);
+ ulint n_fields;
+
+ /* Test if rec is the predefined minimum record */
+ if (UNIV_UNLIKELY(rec_get_info_bits(rec1, comp)
+ & REC_INFO_MIN_REC_FLAG)) {
+ /* There should only be one such record. */
+ ut_ad(!(rec_get_info_bits(rec2, comp)
+ & REC_INFO_MIN_REC_FLAG));
+ ret = -1;
+ goto order_resolved;
+ } else if (UNIV_UNLIKELY
+ (rec_get_info_bits(rec2, comp)
+ & REC_INFO_MIN_REC_FLAG)) {
+ ret = 1;
+ goto order_resolved;
+ }
- comp = rec_offs_comp(offsets1);
- rec1_n_fields = rec_offs_n_fields(offsets1);
- rec2_n_fields = rec_offs_n_fields(offsets2);
-
- cur_field = *matched_fields;
- cur_bytes = *matched_bytes;
-
- /* Match fields in a loop */
+ /* For non-leaf spatial index records, the
+ dict_index_get_n_unique_in_tree() does include the child page
+ number, because spatial index node pointers only contain
+ the MBR (minimum bounding rectangle) and the child page number.
- while ((cur_field < rec1_n_fields) && (cur_field < rec2_n_fields)) {
+ For B-tree node pointers, the key alone (secondary index
+ columns and PRIMARY KEY columns) must be unique, and there is
+ no need to compare the child page number. */
+ n_fields = std::min(rec_offs_n_fields(offsets1),
+ rec_offs_n_fields(offsets2));
+ n_fields = std::min(n_fields, dict_index_get_n_unique_in_tree(index));
+ for (; cur_field < n_fields; cur_field++) {
ulint mtype;
ulint prtype;
- if (dict_index_is_univ(index)) {
+ if (UNIV_UNLIKELY(dict_index_is_ibuf(index))) {
/* This is for the insert buffer B-tree. */
mtype = DATA_BINARY;
prtype = 0;
} else {
- const dict_col_t* col
- = dict_index_get_nth_col(index, cur_field);
-
+ const dict_col_t* col = dict_index_get_nth_col(
+ index, cur_field);
mtype = col->mtype;
prtype = col->prtype;
+
+ if (UNIV_LIKELY(!dict_index_is_spatial(index))) {
+ } else if (cur_field == 0) {
+ ut_ad(DATA_GEOMETRY_MTYPE(mtype));
+ prtype |= DATA_GIS_MBR;
+ } else if (!page_rec_is_leaf(rec2)) {
+ /* Compare the child page number. */
+ ut_ad(cur_field == 1);
+ mtype = DATA_SYS_CHILD;
+ prtype = 0;
+ }
}
+ /* We should never encounter an externally stored field.
+ Externally stored fields only exist in clustered index
+ leaf page records. These fields should already differ
+ in the primary key columns already, before DB_TRX_ID,
+ DB_ROLL_PTR, and any externally stored columns. */
+ ut_ad(!rec_offs_nth_extern(offsets1, cur_field));
+ ut_ad(!rec_offs_nth_extern(offsets2, cur_field));
+
rec1_b_ptr = rec_get_nth_field(rec1, offsets1,
cur_field, &rec1_f_len);
rec2_b_ptr = rec_get_nth_field(rec2, offsets2,
cur_field, &rec2_f_len);
- if (cur_bytes == 0) {
- if (cur_field == 0) {
- /* Test if rec is the predefined minimum
- record */
- if (UNIV_UNLIKELY(rec_get_info_bits(rec1, comp)
- & REC_INFO_MIN_REC_FLAG)) {
-
- if (!(rec_get_info_bits(rec2, comp)
- & REC_INFO_MIN_REC_FLAG)) {
- ret = -1;
- }
-
- goto order_resolved;
-
- } else if (UNIV_UNLIKELY
- (rec_get_info_bits(rec2, comp)
- & REC_INFO_MIN_REC_FLAG)) {
-
- ret = 1;
-
- goto order_resolved;
- }
- }
-
- if (rec_offs_nth_extern(offsets1, cur_field)
- || rec_offs_nth_extern(offsets2, cur_field)) {
- /* We do not compare to an externally
- stored field */
-
- goto order_resolved;
- }
-
- if (rec1_f_len == UNIV_SQL_NULL
- || rec2_f_len == UNIV_SQL_NULL) {
-
- if (rec1_f_len == rec2_f_len) {
- /* This is limited to stats collection,
- cannot use it for regular search */
- if (nulls_unequal) {
- ret = -1;
- } else {
- goto next_field;
- }
- } else if (rec2_f_len == UNIV_SQL_NULL) {
-
- /* We define the SQL null to be the
- smallest possible value of a field
- in the alphabetical order */
-
- ret = 1;
- } else {
- ret = -1;
- }
-
- goto order_resolved;
- }
- }
-
- if (mtype >= DATA_FLOAT
- || (mtype == DATA_BLOB
- && 0 == (prtype & DATA_BINARY_TYPE)
- && dtype_get_charset_coll(prtype)
- != DATA_MYSQL_LATIN1_SWEDISH_CHARSET_COLL)) {
-
- ret = cmp_whole_field(mtype, prtype,
- rec1_b_ptr,
- (unsigned) rec1_f_len,
- rec2_b_ptr,
- (unsigned) rec2_f_len);
- if (ret != 0) {
- cur_bytes = 0;
-
- goto order_resolved;
- } else {
- goto next_field;
- }
+ if (nulls_unequal
+ && rec1_f_len == UNIV_SQL_NULL
+ && rec2_f_len == UNIV_SQL_NULL) {
+ ret = -1;
+ goto order_resolved;
}
- /* Set the pointers at the current byte */
- rec1_b_ptr = rec1_b_ptr + cur_bytes;
- rec2_b_ptr = rec2_b_ptr + cur_bytes;
-
- /* Compare then the fields */
- for (;;) {
- if (rec2_f_len <= cur_bytes) {
-
- if (rec1_f_len <= cur_bytes) {
-
- goto next_field;
- }
-
- rec2_byte = dtype_get_pad_char(mtype, prtype);
-
- if (rec2_byte == ULINT_UNDEFINED) {
- ret = 1;
-
- goto order_resolved;
- }
- } else {
- rec2_byte = *rec2_b_ptr;
- }
-
- if (rec1_f_len <= cur_bytes) {
- rec1_byte = dtype_get_pad_char(mtype, prtype);
-
- if (rec1_byte == ULINT_UNDEFINED) {
- ret = -1;
-
- goto order_resolved;
- }
- } else {
- rec1_byte = *rec1_b_ptr;
- }
-
- if (rec1_byte == rec2_byte) {
- /* If the bytes are equal, they will remain
- such even after the collation transformation
- below */
-
- goto next_byte;
- }
-
- if (mtype <= DATA_CHAR
- || (mtype == DATA_BLOB
- && !(prtype & DATA_BINARY_TYPE))) {
-
- rec1_byte = cmp_collate(rec1_byte);
- rec2_byte = cmp_collate(rec2_byte);
- }
-
- if (rec1_byte < rec2_byte) {
- ret = -1;
- goto order_resolved;
- } else if (rec1_byte > rec2_byte) {
- ret = 1;
- goto order_resolved;
- }
-next_byte:
- /* Next byte */
-
- cur_bytes++;
- rec1_b_ptr++;
- rec2_b_ptr++;
+ ret = cmp_data(mtype, prtype,
+ rec1_b_ptr, rec1_f_len,
+ rec2_b_ptr, rec2_f_len);
+ if (ret) {
+ goto order_resolved;
}
-
-next_field:
- cur_field++;
- cur_bytes = 0;
}
- ut_ad(cur_bytes == 0);
-
/* If we ran out of fields, rec1 was equal to rec2 up
to the common fields */
ut_ad(ret == 0);
order_resolved:
-
- ut_ad((ret >= - 1) && (ret <= 1));
-
- *matched_fields = cur_field;
- *matched_bytes = cur_bytes;
-
- return(ret);
+ if (matched_fields) {
+ *matched_fields = cur_field;
+ }
+ return ret;
}
-#ifdef UNIV_DEBUG
-/*************************************************************//**
-Used in debug checking of cmp_dtuple_... .
-This function is used to compare a data tuple to a physical record. If
-dtuple has n fields then rec must have either m >= n fields, or it must
-differ from dtuple in some of the m fields rec has. If encounters an
-externally stored field, returns 0.
-@return 1, 0, -1, if dtuple is greater, equal, less than rec,
-respectively, when only the common first fields are compared */
-static
-int
-cmp_debug_dtuple_rec_with_match(
-/*============================*/
- const dtuple_t* dtuple, /*!< in: data tuple */
- const rec_t* rec, /*!< in: physical record which differs from
- dtuple in some of the common fields, or which
- has an equal number or more fields than
- dtuple */
- const ulint* offsets,/*!< in: array returned by rec_get_offsets() */
- ulint n_cmp, /*!< in: number of fields to compare */
- ulint* matched_fields) /*!< in/out: number of already
- completely matched fields; when function
- returns, contains the value for current
- comparison */
-{
- const dfield_t* dtuple_field; /* current field in logical record */
- ulint dtuple_f_len; /* the length of the current field
- in the logical record */
- const byte* dtuple_f_data; /* pointer to the current logical
- field data */
- ulint rec_f_len; /* length of current field in rec */
- const byte* rec_f_data; /* pointer to the current rec field */
- int ret; /* return value */
- ulint cur_field; /* current field number */
-
- ut_ad(dtuple != NULL);
- ut_ad(rec != NULL);
- ut_ad(matched_fields != NULL);
- ut_ad(dtuple_check_typed(dtuple));
- ut_ad(rec_offs_validate(rec, NULL, offsets));
-
- ut_ad(n_cmp > 0);
- ut_ad(n_cmp <= dtuple_get_n_fields(dtuple));
- ut_ad(*matched_fields <= n_cmp);
- ut_ad(*matched_fields <= rec_offs_n_fields(offsets));
-
- cur_field = *matched_fields;
-
- if (cur_field == 0) {
- if (UNIV_UNLIKELY
- (rec_get_info_bits(rec, rec_offs_comp(offsets))
- & REC_INFO_MIN_REC_FLAG)) {
-
- ret = !(dtuple_get_info_bits(dtuple)
- & REC_INFO_MIN_REC_FLAG);
+#ifdef UNIV_COMPILE_TEST_FUNCS
- goto order_resolved;
- }
+#ifdef HAVE_UT_CHRONO_T
- if (UNIV_UNLIKELY
- (dtuple_get_info_bits(dtuple) & REC_INFO_MIN_REC_FLAG)) {
- ret = -1;
+void
+test_cmp_data_data(ulint len)
+{
+ int i;
+ static byte zeros[64];
- goto order_resolved;
- }
+ if (len > sizeof zeros) {
+ len = sizeof zeros;
}
- /* Match fields in a loop; stop if we run out of fields in dtuple */
-
- while (cur_field < n_cmp) {
-
- ulint mtype;
- ulint prtype;
-
- dtuple_field = dtuple_get_nth_field(dtuple, cur_field);
- {
- const dtype_t* type
- = dfield_get_type(dtuple_field);
-
- mtype = type->mtype;
- prtype = type->prtype;
- }
-
- dtuple_f_data = static_cast<const byte*>(
- dfield_get_data(dtuple_field));
-
- dtuple_f_len = dfield_get_len(dtuple_field);
-
- rec_f_data = rec_get_nth_field(rec, offsets,
- cur_field, &rec_f_len);
-
- if (rec_offs_nth_extern(offsets, cur_field)) {
- /* We do not compare to an externally stored field */
-
- ret = 0;
+ ut_chrono_t ch(__func__);
- goto order_resolved;
- }
-
- ret = cmp_data_data(mtype, prtype, dtuple_f_data, dtuple_f_len,
- rec_f_data, rec_f_len);
- if (ret != 0) {
- goto order_resolved;
- }
-
- cur_field++;
+ for (i = 1000000; i > 0; i--) {
+ i += cmp_data(DATA_INT, 0, zeros, len, zeros, len);
}
+}
- ret = 0; /* If we ran out of fields, dtuple was equal to rec
- up to the common fields */
-order_resolved:
- ut_ad((ret >= - 1) && (ret <= 1));
-
- *matched_fields = cur_field;
+#endif /* HAVE_UT_CHRONO_T */
- return(ret);
-}
-#endif /* UNIV_DEBUG */
+#endif /* UNIV_COMPILE_TEST_FUNCS */
diff --git a/storage/innobase/rem/rem0rec.cc b/storage/innobase/rem/rem0rec.cc
index daab2d21718..3abe483fd91 100644
--- a/storage/innobase/rem/rem0rec.cc
+++ b/storage/innobase/rem/rem0rec.cc
@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 1994, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2018, 2019, MariaDB Corporation.
+Copyright (c) 2017, 2019, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -25,18 +25,10 @@ Created 5/30/1994 Heikki Tuuri
*************************************************************************/
#include "rem0rec.h"
-
-#ifdef UNIV_NONINL
-#include "rem0rec.ic"
-#endif
-
#include "page0page.h"
-#include "mtr0mtr.h"
#include "mtr0log.h"
#include "fts0fts.h"
-#ifdef WITH_WSREP
-#include <ha_prototypes.h>
-#endif /* WITH_WSREP */
+#include "trx0sys.h"
/* PHYSICAL RECORD (OLD STYLE)
===========================
@@ -147,12 +139,9 @@ end of some field (containing also <FIELD-END>).
A record is a complete-field prefix of another record, if
the corresponding canonical strings have the same property. */
-/* this is used to fool compiler in rec_validate */
-UNIV_INTERN ulint rec_dummy;
-
/***************************************************************//**
Validates the consistency of an old-style physical record.
-@return TRUE if ok */
+@return TRUE if ok */
static
ibool
rec_validate_old(
@@ -162,8 +151,7 @@ rec_validate_old(
/******************************************************//**
Determine how many of the first n columns in a compact
physical record are stored externally.
-@return number of externally stored columns */
-UNIV_INTERN
+@return number of externally stored columns */
ulint
rec_get_n_extern_new(
/*=================*/
@@ -225,8 +213,7 @@ rec_get_n_extern_new(
stored in one byte for 0..127. The length
will be encoded in two bytes when it is 128 or
more, or when the field is stored externally. */
- if (UNIV_UNLIKELY(col->len > 255)
- || UNIV_UNLIKELY(col->mtype == DATA_BLOB)) {
+ if (DATA_BIG_COL(col)) {
if (len & 0x80) {
/* 1exxxxxxx xxxxxxxx */
if (len & 0x40) {
@@ -255,12 +242,12 @@ rec_init_offsets_comp_ordinary(
format for temporary files in
index creation */
const dict_index_t* index, /*!< in: record descriptor */
- ulint* offsets)/*!< in/out: array of offsets;
+ offset_t* offsets)/*!< in/out: array of offsets;
in: n=rec_offs_n_fields(offsets) */
{
ulint i = 0;
- ulint offs = 0;
- ulint any_ext = 0;
+ offset_t offs = 0;
+ offset_t any_ext = 0;
ulint n_null = index->n_nullable;
const byte* nulls = temp
? rec - 1
@@ -272,8 +259,8 @@ rec_init_offsets_comp_ordinary(
/* We cannot invoke rec_offs_make_valid() here if temp=true.
Similarly, rec_offs_validate() will fail in that case, because
it invokes rec_get_status(). */
- offsets[2] = (ulint) rec;
- offsets[3] = (ulint) index;
+ memcpy(&offsets[RECORD_OFFSET], &rec, sizeof(rec));
+ memcpy(&offsets[INDEX_OFFSET], &index, sizeof(index));
#endif /* UNIV_DEBUG */
ut_ad(temp || dict_table_is_comp(index->table));
@@ -290,7 +277,7 @@ rec_init_offsets_comp_ordinary(
= dict_index_get_nth_field(index, i);
const dict_col_t* col
= dict_field_get_col(field);
- ulint len;
+ offset_t len;
if (!(col->prtype & DATA_NOT_NULL)) {
/* nullable field => read the null flag */
@@ -307,7 +294,7 @@ rec_init_offsets_comp_ordinary(
We do not advance offs, and we set
the length to zero and enable the
SQL NULL flag in offsets[]. */
- len = offs | REC_OFFS_SQL_NULL;
+ len = combine(offs, SQL_NULL);
goto resolved;
}
null_mask <<= 1;
@@ -324,22 +311,20 @@ rec_init_offsets_comp_ordinary(
stored in one byte for 0..127. The length
will be encoded in two bytes when it is 128 or
more, or when the field is stored externally. */
- if (UNIV_UNLIKELY(col->len > 255)
- || UNIV_UNLIKELY(col->mtype
- == DATA_BLOB)) {
+ if (DATA_BIG_COL(col)) {
if (len & 0x80) {
/* 1exxxxxxx xxxxxxxx */
len <<= 8;
len |= *lens--;
- offs += len & 0x3fff;
+ offs += get_value(len);
if (UNIV_UNLIKELY(len
& 0x4000)) {
ut_ad(dict_index_is_clust
(index));
any_ext = REC_OFFS_EXTERNAL;
- len = offs
- | REC_OFFS_EXTERNAL;
+ len = combine(offs,
+ STORED_OFFPAGE);
} else {
len = offs;
}
@@ -356,8 +341,8 @@ resolved:
rec_offs_base(offsets)[i + 1] = len;
} while (++i < rec_offs_n_fields(offsets));
- *rec_offs_base(offsets)
- = (rec - (lens + 1)) | REC_OFFS_COMPACT | any_ext;
+ *rec_offs_base(offsets) = static_cast<offset_t>(rec - (lens + 1))
+ | REC_OFFS_COMPACT | any_ext;
}
/******************************************************//**
@@ -370,21 +355,20 @@ to the extra size (if REC_OFFS_COMPACT is set, the record is in the
new format; if REC_OFFS_EXTERNAL is set, the record contains externally
stored columns), and rec_offs_base(offsets)[1..n_fields] will be set to
offsets past the end of fields 0..n_fields, or to the beginning of
-fields 1..n_fields+1. When the high-order bit of the offset at [i+1]
-is set (REC_OFFS_SQL_NULL), the field i is NULL. When the second
-high-order bit of the offset at [i+1] is set (REC_OFFS_EXTERNAL), the
-field i is being stored externally. */
+fields 1..n_fields+1. When the type of the offset at [i+1]
+is (SQL_NULL), the field i is NULL. When the type of the offset at [i+1]
+is (STORED_OFFPAGE), the field i is stored externally. */
static
void
rec_init_offsets(
/*=============*/
const rec_t* rec, /*!< in: physical record */
const dict_index_t* index, /*!< in: record descriptor */
- ulint* offsets)/*!< in/out: array of offsets;
+ offset_t* offsets)/*!< in/out: array of offsets;
in: n=rec_offs_n_fields(offsets) */
{
ulint i = 0;
- ulint offs;
+ offset_t offs;
rec_offs_make_valid(rec, index, offsets);
@@ -406,7 +390,8 @@ rec_init_offsets(
return;
case REC_STATUS_NODE_PTR:
n_node_ptr_field
- = dict_index_get_n_unique_in_tree(index);
+ = dict_index_get_n_unique_in_tree_nonleaf(
+ index);
break;
case REC_STATUS_ORDINARY:
rec_init_offsets_comp_ordinary(
@@ -421,7 +406,7 @@ rec_init_offsets(
/* read the lengths of fields 0..n */
do {
- ulint len;
+ offset_t len;
if (UNIV_UNLIKELY(i == n_node_ptr_field)) {
len = offs += REC_NODE_PTR_SIZE;
goto resolved;
@@ -443,16 +428,16 @@ rec_init_offsets(
We do not advance offs, and we set
the length to zero and enable the
SQL NULL flag in offsets[]. */
- len = offs | REC_OFFS_SQL_NULL;
+ len = combine(offs, SQL_NULL);
goto resolved;
}
null_mask <<= 1;
}
if (UNIV_UNLIKELY(!field->fixed_len)) {
- /* Variable-length field: read the length */
const dict_col_t* col
= dict_field_get_col(field);
+ /* Variable-length field: read the length */
len = *lens--;
/* If the maximum length of the field
is up to 255 bytes, the actual length
@@ -463,9 +448,7 @@ rec_init_offsets(
encoded in two bytes when it is 128 or
more, or when the field is stored
externally. */
- if (UNIV_UNLIKELY(col->len > 255)
- || UNIV_UNLIKELY(col->mtype
- == DATA_BLOB)) {
+ if (DATA_BIG_COL(col)) {
if (len & 0x80) {
/* 1exxxxxxx xxxxxxxx */
@@ -477,7 +460,7 @@ rec_init_offsets(
stored columns. Thus
the "e" flag must be 0. */
ut_a(!(len & 0x4000));
- offs += len & 0x3fff;
+ offs += get_value(len);
len = offs;
goto resolved;
@@ -492,36 +475,39 @@ resolved:
rec_offs_base(offsets)[i + 1] = len;
} while (++i < rec_offs_n_fields(offsets));
- *rec_offs_base(offsets)
- = (rec - (lens + 1)) | REC_OFFS_COMPACT;
+ *rec_offs_base(offsets) = static_cast<offset_t>(
+ (rec - (lens + 1)) | REC_OFFS_COMPACT);
} else {
/* Old-style record: determine extra size and end offsets */
offs = REC_N_OLD_EXTRA_BYTES;
if (rec_get_1byte_offs_flag(rec)) {
- offs += rec_offs_n_fields(offsets);
+ offs += static_cast<offset_t>(
+ rec_offs_n_fields(offsets));
*rec_offs_base(offsets) = offs;
/* Determine offsets to fields */
do {
offs = rec_1_get_field_end_info(rec, i);
if (offs & REC_1BYTE_SQL_NULL_MASK) {
offs &= ~REC_1BYTE_SQL_NULL_MASK;
- offs |= REC_OFFS_SQL_NULL;
+ set_type(offs, SQL_NULL);
}
rec_offs_base(offsets)[1 + i] = offs;
} while (++i < rec_offs_n_fields(offsets));
} else {
- offs += 2 * rec_offs_n_fields(offsets);
+ offs += 2
+ * static_cast<offset_t>(
+ rec_offs_n_fields(offsets));
*rec_offs_base(offsets) = offs;
/* Determine offsets to fields */
do {
offs = rec_2_get_field_end_info(rec, i);
if (offs & REC_2BYTE_SQL_NULL_MASK) {
offs &= ~REC_2BYTE_SQL_NULL_MASK;
- offs |= REC_OFFS_SQL_NULL;
+ set_type(offs, SQL_NULL);
}
if (offs & REC_2BYTE_EXTERN_MASK) {
offs &= ~REC_2BYTE_EXTERN_MASK;
- offs |= REC_OFFS_EXTERNAL;
+ set_type(offs, STORED_OFFPAGE);
*rec_offs_base(offsets) |= REC_OFFS_EXTERNAL;
}
rec_offs_base(offsets)[1 + i] = offs;
@@ -530,26 +516,28 @@ resolved:
}
}
-/******************************************************//**
-The following function determines the offsets to each field
-in the record. It can reuse a previously returned array.
-@return the new offsets */
-UNIV_INTERN
-ulint*
+/** Determine the offsets to each field in an index record.
+@param[in] rec physical record
+@param[in] index the index that the record belongs to
+@param[in,out] offsets array comprising offsets[0] allocated elements,
+ or an array from rec_get_offsets(), or NULL
+@param[in] leaf whether this is a leaf-page record
+@param[in] n_fields maximum number of offsets to compute
+ (ULINT_UNDEFINED to compute all offsets)
+@param[in,out] heap memory heap
+@return the new offsets */
+offset_t*
rec_get_offsets_func(
-/*=================*/
- const rec_t* rec, /*!< in: physical record */
- const dict_index_t* index, /*!< in: record descriptor */
- ulint* offsets,/*!< in/out: array consisting of
- offsets[0] allocated elements,
- or an array from rec_get_offsets(),
- or NULL */
- ulint n_fields,/*!< in: maximum number of
- initialized fields
- (ULINT_UNDEFINED if all fields) */
+ const rec_t* rec,
+ const dict_index_t* index,
+ offset_t* offsets,
+#ifdef UNIV_DEBUG
+ bool leaf,
+#endif /* UNIV_DEBUG */
+ ulint n_fields,
#ifdef UNIV_DEBUG
const char* file, /*!< in: file name where called */
- ulint line, /*!< in: line number where called */
+ unsigned line, /*!< in: line number where called */
#endif /* UNIV_DEBUG */
mem_heap_t** heap) /*!< in/out: memory heap */
{
@@ -560,17 +548,24 @@ rec_get_offsets_func(
switch (UNIV_EXPECT(rec_get_status(rec),
REC_STATUS_ORDINARY)) {
case REC_STATUS_ORDINARY:
+ ut_ad(leaf);
n = dict_index_get_n_fields(index);
break;
case REC_STATUS_NODE_PTR:
/* Node pointer records consist of the
uniquely identifying fields of the record
followed by a child page number field. */
- n = dict_index_get_n_unique_in_tree(index) + 1;
+ ut_ad(!leaf);
+ n = dict_index_get_n_unique_in_tree_nonleaf(index) + 1;
break;
case REC_STATUS_INFIMUM:
case REC_STATUS_SUPREMUM:
/* infimum or supremum record */
+ ut_ad(rec_get_heap_no_new(rec)
+ == ulint(rec_get_status(rec)
+ == REC_STATUS_INFIMUM
+ ? PAGE_HEAP_NO_INFIMUM
+ : PAGE_HEAP_NO_SUPREMUM));
n = 1;
break;
default:
@@ -579,6 +574,27 @@ rec_get_offsets_func(
}
} else {
n = rec_get_n_fields_old(rec);
+ /* Here, rec can be allocated from the heap (copied
+ from an index page record), or it can be located in an
+ index page. If rec is not in an index page, then
+ page_rec_is_user_rec(rec) and similar predicates
+ cannot be evaluated. We can still distinguish the
+ infimum and supremum record based on the heap number. */
+ ut_d(const bool is_user_rec = rec_get_heap_no_old(rec)
+ >= PAGE_HEAP_NO_USER_LOW);
+ /* The infimum and supremum records carry 1 field. */
+ ut_ad(is_user_rec || n == 1);
+ ut_ad(!is_user_rec || leaf || index->is_dummy
+ || dict_index_is_ibuf(index)
+ || n == n_fields /* dict_stats_analyze_index_level() */
+ || n
+ == dict_index_get_n_unique_in_tree_nonleaf(index) + 1);
+ ut_ad(!is_user_rec || !leaf || index->is_dummy
+ || dict_index_is_ibuf(index)
+ || n == n_fields /* btr_pcur_restore_position() */
+ || n == index->n_fields
+ || (index->id == DICT_INDEXES_ID
+ && (n == DICT_NUM_FIELDS__SYS_INDEXES - 1)));
}
if (UNIV_UNLIKELY(n_fields < n)) {
@@ -592,11 +608,11 @@ rec_get_offsets_func(
if (UNIV_UNLIKELY(!offsets)
|| UNIV_UNLIKELY(rec_offs_get_n_alloc(offsets) < size)) {
if (UNIV_UNLIKELY(!*heap)) {
- *heap = mem_heap_create_at(size * sizeof(ulint),
+ *heap = mem_heap_create_at(size * sizeof(*offsets),
file, line);
}
- offsets = static_cast<ulint*>(
- mem_heap_alloc(*heap, size * sizeof(ulint)));
+ offsets = static_cast<offset_t*>(
+ mem_heap_alloc(*heap, size * sizeof(*offsets)));
rec_offs_set_n_alloc(offsets, size);
}
@@ -609,7 +625,6 @@ rec_get_offsets_func(
/******************************************************//**
The following function determines the offsets to each field
in the record. It can reuse a previously allocated array. */
-UNIV_INTERN
void
rec_get_offsets_reverse(
/*====================*/
@@ -620,13 +635,13 @@ rec_get_offsets_reverse(
const dict_index_t* index, /*!< in: record descriptor */
ulint node_ptr,/*!< in: nonzero=node pointer,
0=leaf node */
- ulint* offsets)/*!< in/out: array consisting of
+ offset_t* offsets)/*!< in/out: array consisting of
offsets[0] allocated elements */
{
ulint n;
ulint i;
- ulint offs;
- ulint any_ext;
+ offset_t offs;
+ offset_t any_ext = 0;
const byte* nulls;
const byte* lens;
dict_field_t* field;
@@ -636,7 +651,8 @@ rec_get_offsets_reverse(
ut_ad(dict_table_is_comp(index->table));
if (UNIV_UNLIKELY(node_ptr)) {
- n_node_ptr_field = dict_index_get_n_unique_in_tree(index);
+ n_node_ptr_field =
+ dict_index_get_n_unique_in_tree_nonleaf(index);
n = n_node_ptr_field + 1;
} else {
n_node_ptr_field = ULINT_UNDEFINED;
@@ -650,11 +666,10 @@ rec_get_offsets_reverse(
lens = nulls + UT_BITS_IN_BYTES(index->n_nullable);
i = offs = 0;
null_mask = 1;
- any_ext = 0;
/* read the lengths of fields 0..n */
do {
- ulint len;
+ offset_t len;
if (UNIV_UNLIKELY(i == n_node_ptr_field)) {
len = offs += REC_NODE_PTR_SIZE;
goto resolved;
@@ -675,7 +690,7 @@ rec_get_offsets_reverse(
We do not advance offs, and we set
the length to zero and enable the
SQL NULL flag in offsets[]. */
- len = offs | REC_OFFS_SQL_NULL;
+ len = combine(offs, SQL_NULL);
goto resolved;
}
null_mask <<= 1;
@@ -693,17 +708,17 @@ rec_get_offsets_reverse(
stored in one byte for 0..127. The length
will be encoded in two bytes when it is 128 or
more, or when the field is stored externally. */
- if (UNIV_UNLIKELY(col->len > 255)
- || UNIV_UNLIKELY(col->mtype == DATA_BLOB)) {
+ if (DATA_BIG_COL(col)) {
if (len & 0x80) {
/* 1exxxxxxx xxxxxxxx */
len <<= 8;
len |= *lens++;
- offs += len & 0x3fff;
+ offs += get_value(len);
if (UNIV_UNLIKELY(len & 0x4000)) {
any_ext = REC_OFFS_EXTERNAL;
- len = offs | REC_OFFS_EXTERNAL;
+ len = combine(offs,
+ STORED_OFFPAGE);
} else {
len = offs;
}
@@ -714,22 +729,22 @@ rec_get_offsets_reverse(
len = offs += len;
} else {
- len = offs += field->fixed_len;
+ len = offs += static_cast<offset_t>(field->fixed_len);
}
resolved:
rec_offs_base(offsets)[i + 1] = len;
} while (++i < rec_offs_n_fields(offsets));
ut_ad(lens >= extra);
- *rec_offs_base(offsets) = (lens - extra + REC_N_NEW_EXTRA_BYTES)
- | REC_OFFS_COMPACT | any_ext;
+ *rec_offs_base(offsets)
+ = static_cast<offset_t>((lens - extra + REC_N_NEW_EXTRA_BYTES)
+ | REC_OFFS_COMPACT | any_ext);
}
/************************************************************//**
The following function is used to get the offset to the nth
data field in an old-style record.
-@return offset to the field */
-UNIV_INTERN
+@return offset to the field */
ulint
rec_get_nth_field_offs_old(
/*=======================*/
@@ -779,7 +794,7 @@ rec_get_nth_field_offs_old(
/**********************************************************//**
Determines the size of a data tuple prefix in ROW_FORMAT=COMPACT.
-@return total size */
+@return total size */
UNIV_INLINE MY_ATTRIBUTE((warn_unused_result, nonnull(1,2)))
ulint
rec_get_converted_size_comp_prefix_low(
@@ -797,15 +812,15 @@ rec_get_converted_size_comp_prefix_low(
ulint extra_size;
ulint data_size;
ulint i;
- ulint n_null = index->n_nullable;
ut_ad(n_fields > 0);
ut_ad(n_fields <= dict_index_get_n_fields(index));
ut_ad(!temp || extra);
+ ut_d(ulint n_null = index->n_nullable);
+
extra_size = temp
- ? UT_BITS_IN_BYTES(n_null)
- : REC_N_NEW_EXTRA_BYTES
- + UT_BITS_IN_BYTES(n_null);
+ ? UT_BITS_IN_BYTES(index->n_nullable)
+ : REC_N_NEW_EXTRA_BYTES + UT_BITS_IN_BYTES(index->n_nullable);
data_size = 0;
if (temp && dict_table_is_comp(index->table)) {
@@ -825,8 +840,20 @@ rec_get_converted_size_comp_prefix_low(
len = dfield_get_len(&fields[i]);
col = dict_field_get_col(field);
- ut_ad(dict_col_type_assert_equal(col,
- dfield_get_type(&fields[i])));
+#ifdef UNIV_DEBUG
+ const dtype_t* type = dfield_get_type(&fields[i]);
+ if (dict_index_is_spatial(index)) {
+ if (DATA_GEOMETRY_MTYPE(col->mtype) && i == 0) {
+ ut_ad(type->prtype & DATA_GIS_MBR);
+ } else {
+ ut_ad(type->mtype == DATA_SYS_CHILD
+ || dict_col_type_assert_equal(col, type));
+ }
+ } else {
+ ut_ad(dict_col_type_assert_equal(col, type));
+ }
+#endif
+
/* All NULLable fields must be included in the n_null count. */
ut_ad((col->prtype & DATA_NOT_NULL) || n_null--);
@@ -836,7 +863,7 @@ rec_get_converted_size_comp_prefix_low(
continue;
}
- ut_ad(len <= col->len || col->mtype == DATA_BLOB
+ ut_ad(len <= col->len || DATA_LARGE_MTYPE(col->mtype)
|| (col->len == 0 && col->mtype == DATA_VARCHAR));
fixed_len = field->fixed_len;
@@ -855,18 +882,26 @@ rec_get_converted_size_comp_prefix_low(
#ifdef UNIV_DEBUG
ut_ad(len <= fixed_len);
- ut_ad(!col->mbmaxlen || len >= col->mbminlen
- * (fixed_len / col->mbmaxlen));
+ if (dict_index_is_spatial(index)) {
+ ut_ad(type->mtype == DATA_SYS_CHILD
+ || !col->mbmaxlen
+ || len >= col->mbminlen
+ * fixed_len / col->mbmaxlen);
+ } else {
+ ut_ad(type->mtype != DATA_SYS_CHILD);
+ ut_ad(!col->mbmaxlen
+ || len >= col->mbminlen
+ * fixed_len / col->mbmaxlen);
+ }
/* dict_index_add_col() should guarantee this */
ut_ad(!field->prefix_len
|| fixed_len == field->prefix_len);
#endif /* UNIV_DEBUG */
} else if (dfield_is_ext(&fields[i])) {
- ut_ad(col->len >= 256 || col->mtype == DATA_BLOB);
+ ut_ad(DATA_BIG_COL(col));
extra_size += 2;
- } else if (len < 128
- || (col->len < 256 && col->mtype != DATA_BLOB)) {
+ } else if (len < 128 || !DATA_BIG_COL(col)) {
extra_size++;
} else {
/* For variable-length columns, we look up the
@@ -887,8 +922,7 @@ rec_get_converted_size_comp_prefix_low(
/**********************************************************//**
Determines the size of a data tuple prefix in ROW_FORMAT=COMPACT.
-@return total size */
-UNIV_INTERN
+@return total size */
ulint
rec_get_converted_size_comp_prefix(
/*===============================*/
@@ -904,8 +938,7 @@ rec_get_converted_size_comp_prefix(
/**********************************************************//**
Determines the size of a data tuple in ROW_FORMAT=COMPACT.
-@return total size */
-UNIV_INTERN
+@return total size */
ulint
rec_get_converted_size_comp(
/*========================*/
@@ -928,7 +961,8 @@ rec_get_converted_size_comp(
break;
case REC_STATUS_NODE_PTR:
n_fields--;
- ut_ad(n_fields == dict_index_get_n_unique_in_tree(index));
+ ut_ad(n_fields == dict_index_get_n_unique_in_tree_nonleaf(
+ index));
ut_ad(dfield_get_len(&fields[n_fields]) == REC_NODE_PTR_SIZE);
size = REC_NODE_PTR_SIZE; /* child page number */
break;
@@ -950,7 +984,6 @@ rec_get_converted_size_comp(
/***********************************************************//**
Sets the value of the ith field SQL null bit of an old-style record. */
-UNIV_INTERN
void
rec_set_nth_field_null_bit(
/*=======================*/
@@ -989,7 +1022,6 @@ rec_set_nth_field_null_bit(
/***********************************************************//**
Sets an old-style record field to SQL null.
The physical size of the field is not changed. */
-UNIV_INTERN
void
rec_set_nth_field_sql_null(
/*=======================*/
@@ -1008,7 +1040,7 @@ rec_set_nth_field_sql_null(
/*********************************************************//**
Builds an old-style physical record out of a data tuple and
stores it beginning from the start of the given buffer.
-@return pointer to the origin of physical record */
+@return pointer to the origin of physical record */
static
rec_t*
rec_convert_dtuple_to_rec_old(
@@ -1038,17 +1070,14 @@ rec_convert_dtuple_to_rec_old(
/* Calculate the offset of the origin in the physical record */
rec = buf + rec_get_converted_extra_size(data_size, n_fields, n_ext);
-#ifdef UNIV_DEBUG
- /* Suppress Valgrind warnings of ut_ad()
- in mach_write_to_1(), mach_write_to_2() et al. */
- memset(buf, 0xff, rec - buf + data_size);
-#endif /* UNIV_DEBUG */
/* Store the number of fields */
rec_set_n_fields_old(rec, n_fields);
/* Set the info bits of the record */
rec_set_info_bits_old(rec, dtuple_get_info_bits(dtuple)
& REC_INFO_BITS_MASK);
+ /* Make rec_get_offsets() and rec_offs_make_valid() happy. */
+ ut_d(rec_set_heap_no_old(rec, PAGE_HEAP_NO_USER_LOW));
/* Store the data and the offsets */
@@ -1122,7 +1151,7 @@ rec_convert_dtuple_to_rec_old(
/*********************************************************//**
Builds a ROW_FORMAT=COMPACT record out of a data tuple. */
-UNIV_INLINE MY_ATTRIBUTE((nonnull))
+UNIV_INLINE
void
rec_convert_dtuple_to_rec_comp(
/*===========================*/
@@ -1145,10 +1174,11 @@ rec_convert_dtuple_to_rec_comp(
ulint n_node_ptr_field;
ulint fixed_len;
ulint null_mask = 1;
- ulint n_null;
- ut_ad(temp || dict_table_is_comp(index->table));
ut_ad(n_fields > 0);
+ ut_ad(temp || dict_table_is_comp(index->table));
+ ulint n_null = index->n_nullable;
+ const ulint n_null_bytes = UT_BITS_IN_BYTES(n_null);
if (temp) {
ut_ad(status == REC_STATUS_ORDINARY);
@@ -1161,6 +1191,8 @@ rec_convert_dtuple_to_rec_comp(
temp = false;
}
} else {
+ /* Make rec_get_offsets() and rec_offs_make_valid() happy. */
+ ut_d(rec_set_heap_no_new(rec, PAGE_HEAP_NO_USER_LOW));
nulls = rec - (REC_N_NEW_EXTRA_BYTES + 1);
switch (UNIV_EXPECT(status, REC_STATUS_ORDINARY)) {
@@ -1170,7 +1202,8 @@ rec_convert_dtuple_to_rec_comp(
break;
case REC_STATUS_NODE_PTR:
ut_ad(n_fields
- == dict_index_get_n_unique_in_tree(index) + 1);
+ == dict_index_get_n_unique_in_tree_nonleaf(index)
+ + 1);
n_node_ptr_field = n_fields - 1;
break;
case REC_STATUS_INFIMUM:
@@ -1185,15 +1218,17 @@ rec_convert_dtuple_to_rec_comp(
}
end = rec;
- n_null = index->n_nullable;
- lens = nulls - UT_BITS_IN_BYTES(n_null);
/* clear the SQL-null flags */
+ lens = nulls - n_null_bytes;
memset(lens + 1, 0, nulls - lens);
/* Store the data and the offsets */
- for (i = 0, field = fields; i < n_fields; i++, field++) {
+ for (i = 0; i < n_fields; i++) {
const dict_field_t* ifield;
+ dict_col_t* col = NULL;
+
+ field = &fields[i];
type = dfield_get_type(field);
len = dfield_get_len(field);
@@ -1231,10 +1266,12 @@ rec_convert_dtuple_to_rec_comp(
ifield = dict_index_get_nth_field(index, i);
fixed_len = ifield->fixed_len;
+ col = ifield->col;
if (temp && fixed_len
- && !dict_col_get_fixed_size(ifield->col, temp)) {
+ && !dict_col_get_fixed_size(col, temp)) {
fixed_len = 0;
}
+
/* If the maximum length of a variable-length field
is up to 255 bytes, the actual length is always stored
in one byte. If the maximum length is more than 255
@@ -1242,28 +1279,24 @@ rec_convert_dtuple_to_rec_comp(
0..127. The length will be encoded in two bytes when
it is 128 or more, or when the field is stored externally. */
if (fixed_len) {
-#ifdef UNIV_DEBUG
ut_ad(len <= fixed_len);
- ut_ad(!ifield->col->mbmaxlen
- || len >= ifield->col->mbminlen
- * (fixed_len / ifield->col->mbmaxlen));
+ ut_ad(!col->mbmaxlen
+ || len >= col->mbminlen
+ * fixed_len / col->mbmaxlen);
ut_ad(!dfield_is_ext(field));
-#endif /* UNIV_DEBUG */
} else if (dfield_is_ext(field)) {
- ut_ad(ifield->col->len >= 256
- || ifield->col->mtype == DATA_BLOB);
+ ut_ad(DATA_BIG_COL(col));
ut_ad(len <= REC_ANTELOPE_MAX_INDEX_COL_LEN
+ BTR_EXTERN_FIELD_REF_SIZE);
*lens-- = (byte) (len >> 8) | 0xc0;
*lens-- = (byte) len;
} else {
ut_ad(len <= dtype_get_len(type)
- || dtype_get_mtype(type) == DATA_BLOB
+ || DATA_LARGE_MTYPE(dtype_get_mtype(type))
|| !strcmp(index->name,
FTS_INDEX_TABLE_IND_NAME));
- if (len < 128
- || (dtype_get_len(type) < 256
- && dtype_get_mtype(type) != DATA_BLOB)) {
+ if (len < 128 || !DATA_BIG_LEN_MTYPE(
+ dtype_get_len(type), dtype_get_mtype(type))) {
*lens-- = (byte) len;
} else {
@@ -1283,7 +1316,7 @@ rec_convert_dtuple_to_rec_comp(
/*********************************************************//**
Builds a new-style physical record out of a data tuple and
stores it beginning from the start of the given buffer.
-@return pointer to the origin of physical record */
+@return pointer to the origin of physical record */
static
rec_t*
rec_convert_dtuple_to_rec_new(
@@ -1314,8 +1347,7 @@ rec_convert_dtuple_to_rec_new(
/*********************************************************//**
Builds a physical record out of a data tuple and
stores it beginning from the start of the given buffer.
-@return pointer to the origin of physical record */
-UNIV_INTERN
+@return pointer to the origin of physical record */
rec_t*
rec_convert_dtuple_to_rec(
/*======================*/
@@ -1340,45 +1372,21 @@ rec_convert_dtuple_to_rec(
rec = rec_convert_dtuple_to_rec_old(buf, dtuple, n_ext);
}
-#ifdef UNIV_DEBUG
- {
- mem_heap_t* heap = NULL;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- const ulint* offsets;
- ulint i;
- rec_offs_init(offsets_);
-
- offsets = rec_get_offsets(rec, index,
- offsets_, ULINT_UNDEFINED, &heap);
- ut_ad(rec_validate(rec, offsets));
- ut_ad(dtuple_get_n_fields(dtuple)
- == rec_offs_n_fields(offsets));
-
- for (i = 0; i < rec_offs_n_fields(offsets); i++) {
- ut_ad(!dfield_is_ext(dtuple_get_nth_field(dtuple, i))
- == !rec_offs_nth_extern(offsets, i));
- }
-
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
- }
-#endif /* UNIV_DEBUG */
return(rec);
}
-#ifndef UNIV_HOTBACKUP
-/**********************************************************//**
-Determines the size of a data tuple prefix in ROW_FORMAT=COMPACT.
-@return total size */
-UNIV_INTERN
+/** Determine the size of a data tuple prefix in a temporary file.
+@param[in] index clustered or secondary index
+@param[in] fields data fields
+@param[in] n_fields number of data fields
+@param[out] extra record header size
+@return total size, in bytes */
ulint
rec_get_converted_size_temp(
-/*========================*/
- const dict_index_t* index, /*!< in: record descriptor */
- const dfield_t* fields, /*!< in: array of data fields */
- ulint n_fields,/*!< in: number of data fields */
- ulint* extra) /*!< out: extra size */
+ const dict_index_t* index,
+ const dfield_t* fields,
+ ulint n_fields,
+ ulint* extra)
{
return(rec_get_converted_size_comp_prefix_low(
index, fields, n_fields, extra, true));
@@ -1387,13 +1395,12 @@ rec_get_converted_size_temp(
/******************************************************//**
Determine the offset to each field in temporary file.
@see rec_convert_dtuple_to_temp() */
-UNIV_INTERN
void
rec_init_offsets_temp(
/*==================*/
const rec_t* rec, /*!< in: temporary file record */
const dict_index_t* index, /*!< in: record descriptor */
- ulint* offsets)/*!< in/out: array of offsets;
+ offset_t* offsets)/*!< in/out: array of offsets;
in: n=rec_offs_n_fields(offsets) */
{
rec_init_offsets_comp_ordinary(rec, true, index, offsets);
@@ -1402,7 +1409,6 @@ rec_init_offsets_temp(
/*********************************************************//**
Builds a temporary file record out of a data tuple.
@see rec_init_offsets_temp() */
-UNIV_INTERN
void
rec_convert_dtuple_to_temp(
/*=======================*/
@@ -1415,26 +1421,33 @@ rec_convert_dtuple_to_temp(
REC_STATUS_ORDINARY, true);
}
-/**************************************************************//**
-Copies the first n fields of a physical record to a data tuple. The fields
-are copied to the memory heap. */
-UNIV_INTERN
+/** Copy the first n fields of a (copy of a) physical record to a data tuple.
+The fields are copied into the memory heap.
+@param[out] tuple data tuple
+@param[in] rec index record, or a copy thereof
+@param[in] is_leaf whether rec is a leaf page record
+@param[in] n_fields number of fields to copy
+@param[in,out] heap memory heap */
void
-rec_copy_prefix_to_dtuple(
-/*======================*/
- dtuple_t* tuple, /*!< out: data tuple */
- const rec_t* rec, /*!< in: physical record */
- const dict_index_t* index, /*!< in: record descriptor */
- ulint n_fields, /*!< in: number of fields
- to copy */
- mem_heap_t* heap) /*!< in: memory heap */
+rec_copy_prefix_to_dtuple_func(
+ dtuple_t* tuple,
+ const rec_t* rec,
+ const dict_index_t* index,
+#ifdef UNIV_DEBUG
+ bool is_leaf,
+#endif /* UNIV_DEBUG */
+ ulint n_fields,
+ mem_heap_t* heap)
{
- ulint i;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- ulint* offsets = offsets_;
+ offset_t offsets_[REC_OFFS_NORMAL_SIZE];
+ offset_t* offsets = offsets_;
rec_offs_init(offsets_);
- offsets = rec_get_offsets(rec, index, offsets, n_fields, &heap);
+ ut_ad(is_leaf || n_fields
+ <= dict_index_get_n_unique_in_tree_nonleaf(index) + 1);
+
+ offsets = rec_get_offsets(rec, index, offsets, is_leaf,
+ n_fields, &heap);
ut_ad(rec_validate(rec, offsets));
ut_ad(dtuple_check_typed(tuple));
@@ -1442,7 +1455,7 @@ rec_copy_prefix_to_dtuple(
dtuple_set_info_bits(tuple, rec_get_info_bits(
rec, dict_table_is_comp(index->table)));
- for (i = 0; i < n_fields; i++) {
+ for (ulint i = 0; i < n_fields; i++) {
dfield_t* field;
const byte* data;
ulint len;
@@ -1463,7 +1476,7 @@ rec_copy_prefix_to_dtuple(
/**************************************************************//**
Copies the first n fields of an old-style physical record
to a new physical record in a buffer.
-@return own: copied record */
+@return own: copied record */
static
rec_t*
rec_copy_prefix_to_buf_old(
@@ -1488,11 +1501,9 @@ rec_copy_prefix_to_buf_old(
prefix_len = area_start + area_end;
if ((*buf == NULL) || (*buf_size < prefix_len)) {
- if (*buf != NULL) {
- mem_free(*buf);
- }
-
- *buf = static_cast<byte*>(mem_alloc2(prefix_len, buf_size));
+ ut_free(*buf);
+ *buf_size = prefix_len;
+ *buf = static_cast<byte*>(ut_malloc_nokey(prefix_len));
}
ut_memcpy(*buf, rec - area_start, prefix_len);
@@ -1507,8 +1518,7 @@ rec_copy_prefix_to_buf_old(
/**************************************************************//**
Copies the first n fields of a physical record to a new physical record in
a buffer.
-@return own: copied record */
-UNIV_INTERN
+@return own: copied record */
rec_t*
rec_copy_prefix_to_buf(
/*===================*/
@@ -1527,6 +1537,7 @@ rec_copy_prefix_to_buf(
ulint prefix_len;
ulint null_mask;
ulint status;
+ bool is_rtr_node_ptr = false;
UNIV_PREFETCH_RW(*buf);
@@ -1545,8 +1556,16 @@ rec_copy_prefix_to_buf(
ut_ad(n_fields <= dict_index_get_n_fields(index));
break;
case REC_STATUS_NODE_PTR:
- /* it doesn't make sense to copy the child page number field */
- ut_ad(n_fields <= dict_index_get_n_unique_in_tree(index));
+ /* For R-tree, we need to copy the child page number field. */
+ if (dict_index_is_spatial(index)) {
+ ut_ad(n_fields == DICT_INDEX_SPATIAL_NODEPTR_SIZE + 1);
+ is_rtr_node_ptr = true;
+ } else {
+ /* it doesn't make sense to copy the child page number
+ field */
+ ut_ad(n_fields <=
+ dict_index_get_n_unique_in_tree_nonleaf(index));
+ }
break;
case REC_STATUS_INFIMUM:
case REC_STATUS_SUPREMUM:
@@ -1585,7 +1604,11 @@ rec_copy_prefix_to_buf(
null_mask <<= 1;
}
- if (field->fixed_len) {
+ if (is_rtr_node_ptr && i == 1) {
+ /* For rtree node ptr rec, we need to
+ copy the page no field with 4 bytes len. */
+ prefix_len += 4;
+ } else if (field->fixed_len) {
prefix_len += field->fixed_len;
} else {
ulint len = *lens--;
@@ -1596,7 +1619,7 @@ rec_copy_prefix_to_buf(
stored in one byte for 0..127. The length
will be encoded in two bytes when it is 128 or
more, or when the column is stored externally. */
- if (col->len > 255 || col->mtype == DATA_BLOB) {
+ if (DATA_BIG_COL(col)) {
if (len & 0x80) {
/* 1exxxxxx */
len &= 0x3f;
@@ -1614,138 +1637,106 @@ rec_copy_prefix_to_buf(
prefix_len += rec - (lens + 1);
if ((*buf == NULL) || (*buf_size < prefix_len)) {
- if (*buf != NULL) {
- mem_free(*buf);
- }
-
- *buf = static_cast<byte*>(mem_alloc2(prefix_len, buf_size));
+ ut_free(*buf);
+ *buf_size = prefix_len;
+ *buf = static_cast<byte*>(ut_malloc_nokey(prefix_len));
}
memcpy(*buf, lens + 1, prefix_len);
return(*buf + (rec - (lens + 1)));
}
-#endif /* UNIV_HOTBACKUP */
/***************************************************************//**
Validates the consistency of an old-style physical record.
-@return TRUE if ok */
+@return TRUE if ok */
static
ibool
rec_validate_old(
/*=============*/
const rec_t* rec) /*!< in: physical record */
{
- const byte* data;
ulint len;
ulint n_fields;
ulint len_sum = 0;
- ulint sum = 0;
ulint i;
ut_a(rec);
n_fields = rec_get_n_fields_old(rec);
if ((n_fields == 0) || (n_fields > REC_MAX_N_FIELDS)) {
- fprintf(stderr, "InnoDB: Error: record has %lu fields\n",
- (ulong) n_fields);
+ ib::error() << "Record has " << n_fields << " fields";
return(FALSE);
}
for (i = 0; i < n_fields; i++) {
- data = rec_get_nth_field_old(rec, i, &len);
+ rec_get_nth_field_offs_old(rec, i, &len);
if (!((len < UNIV_PAGE_SIZE) || (len == UNIV_SQL_NULL))) {
- fprintf(stderr,
- "InnoDB: Error: record field %lu len %lu\n",
- (ulong) i,
- (ulong) len);
+ ib::error() << "Record field " << i << " len " << len;
return(FALSE);
}
if (len != UNIV_SQL_NULL) {
len_sum += len;
- sum += *(data + len -1); /* dereference the
- end of the field to
- cause a memory trap
- if possible */
} else {
len_sum += rec_get_nth_field_size(rec, i);
}
}
if (len_sum != rec_get_data_size_old(rec)) {
- fprintf(stderr,
- "InnoDB: Error: record len should be %lu, len %lu\n",
- (ulong) len_sum,
- rec_get_data_size_old(rec));
+ ib::error() << "Record len should be " << len_sum << ", len "
+ << rec_get_data_size_old(rec);
return(FALSE);
}
- rec_dummy = sum; /* This is here only to fool the compiler */
-
return(TRUE);
}
/***************************************************************//**
Validates the consistency of a physical record.
-@return TRUE if ok */
-UNIV_INTERN
+@return TRUE if ok */
ibool
rec_validate(
/*=========*/
const rec_t* rec, /*!< in: physical record */
- const ulint* offsets)/*!< in: array returned by rec_get_offsets() */
+ const offset_t* offsets)/*!< in: array returned by rec_get_offsets() */
{
- const byte* data;
ulint len;
ulint n_fields;
ulint len_sum = 0;
- ulint sum = 0;
ulint i;
n_fields = rec_offs_n_fields(offsets);
if ((n_fields == 0) || (n_fields > REC_MAX_N_FIELDS)) {
- fprintf(stderr, "InnoDB: Error: record has %lu fields\n",
- (ulong) n_fields);
+ ib::error() << "Record has " << n_fields << " fields";
return(FALSE);
}
ut_a(rec_offs_comp(offsets) || n_fields <= rec_get_n_fields_old(rec));
for (i = 0; i < n_fields; i++) {
- data = rec_get_nth_field(rec, offsets, i, &len);
+ rec_get_nth_field_offs(offsets, i, &len);
if (!((len < UNIV_PAGE_SIZE) || (len == UNIV_SQL_NULL))) {
- fprintf(stderr,
- "InnoDB: Error: record field %lu len %lu\n",
- (ulong) i,
- (ulong) len);
+ ib::error() << "Record field " << i << " len " << len;
return(FALSE);
}
if (len != UNIV_SQL_NULL) {
len_sum += len;
- sum += *(data + len -1); /* dereference the
- end of the field to
- cause a memory trap
- if possible */
} else if (!rec_offs_comp(offsets)) {
len_sum += rec_get_nth_field_size(rec, i);
}
}
if (len_sum != rec_offs_data_size(offsets)) {
- fprintf(stderr,
- "InnoDB: Error: record len should be %lu, len %lu\n",
- (ulong) len_sum,
- (ulong) rec_offs_data_size(offsets));
+ ib::error() << "Record len should be " << len_sum << ", len "
+ << rec_offs_data_size(offsets);
return(FALSE);
}
- rec_dummy = sum; /* This is here only to fool the compiler */
-
if (!rec_offs_comp(offsets)) {
ut_a(rec_validate_old(rec));
}
@@ -1755,7 +1746,6 @@ rec_validate(
/***************************************************************//**
Prints an old-style physical record. */
-UNIV_INTERN
void
rec_print_old(
/*==========*/
@@ -1769,17 +1759,17 @@ rec_print_old(
n = rec_get_n_fields_old(rec);
- fprintf(file, "PHYSICAL RECORD: n_fields %lu;"
- " %u-byte offsets; info bits %lu\n",
- (ulong) n,
+ fprintf(file, "PHYSICAL RECORD: n_fields " ULINTPF ";"
+ " %u-byte offsets; info bits " ULINTPF "\n",
+ n,
rec_get_1byte_offs_flag(rec) ? 1 : 2,
- (ulong) rec_get_info_bits(rec, FALSE));
+ rec_get_info_bits(rec, FALSE));
for (i = 0; i < n; i++) {
data = rec_get_nth_field_old(rec, i, &len);
- fprintf(file, " %lu:", (ulong) i);
+ fprintf(file, " " ULINTPF ":", i);
if (len != UNIV_SQL_NULL) {
if (len <= 30) {
@@ -1788,11 +1778,11 @@ rec_print_old(
} else {
ut_print_buf(file, data, 30);
- fprintf(file, " (total %lu bytes)",
- (ulong) len);
+ fprintf(file, " (total " ULINTPF " bytes)",
+ len);
}
} else {
- fprintf(file, " SQL NULL, size %lu ",
+ fprintf(file, " SQL NULL, size " ULINTPF " ",
rec_get_nth_field_size(rec, i));
}
@@ -1803,17 +1793,16 @@ rec_print_old(
rec_validate_old(rec);
}
-#ifndef UNIV_HOTBACKUP
/***************************************************************//**
Prints a physical record in ROW_FORMAT=COMPACT. Ignores the
record header. */
-UNIV_INTERN
+static
void
rec_print_comp(
/*===========*/
FILE* file, /*!< in: file where to print */
const rec_t* rec, /*!< in: physical record */
- const ulint* offsets)/*!< in: array returned by rec_get_offsets() */
+ const offset_t* offsets)/*!< in: array returned by rec_get_offsets() */
{
ulint i;
@@ -1823,7 +1812,7 @@ rec_print_comp(
data = rec_get_nth_field(rec, offsets, i, &len);
- fprintf(file, " %lu:", (ulong) i);
+ fprintf(file, " " ULINTPF ":", i);
if (len != UNIV_SQL_NULL) {
if (len <= 30) {
@@ -1831,16 +1820,17 @@ rec_print_comp(
ut_print_buf(file, data, len);
} else if (rec_offs_nth_extern(offsets, i)) {
ut_print_buf(file, data, 30);
- fprintf(file, " (total %lu bytes, external)",
- (ulong) len);
+ fprintf(file,
+ " (total " ULINTPF " bytes, external)",
+ len);
ut_print_buf(file, data + len
- BTR_EXTERN_FIELD_REF_SIZE,
BTR_EXTERN_FIELD_REF_SIZE);
} else {
ut_print_buf(file, data, 30);
- fprintf(file, " (total %lu bytes)",
- (ulong) len);
+ fprintf(file, " (total " ULINTPF " bytes)",
+ len);
}
} else {
fputs(" SQL NULL", file);
@@ -1851,26 +1841,175 @@ rec_print_comp(
}
/***************************************************************//**
+Prints an old-style spatial index record. */
+static
+void
+rec_print_mbr_old(
+/*==============*/
+ FILE* file, /*!< in: file where to print */
+ const rec_t* rec) /*!< in: physical record */
+{
+ const byte* data;
+ ulint len;
+ ulint n;
+ ulint i;
+
+ ut_ad(rec);
+
+ n = rec_get_n_fields_old(rec);
+
+ fprintf(file, "PHYSICAL RECORD: n_fields %lu;"
+ " %u-byte offsets; info bits %lu\n",
+ (ulong) n,
+ rec_get_1byte_offs_flag(rec) ? 1 : 2,
+ (ulong) rec_get_info_bits(rec, FALSE));
+
+ for (i = 0; i < n; i++) {
+
+ data = rec_get_nth_field_old(rec, i, &len);
+
+ fprintf(file, " %lu:", (ulong) i);
+
+ if (len != UNIV_SQL_NULL) {
+ if (i == 0) {
+ fprintf(file, " MBR:");
+ for (; len > 0; len -= sizeof(double)) {
+ double d = mach_double_read(data);
+
+ if (len != sizeof(double)) {
+ fprintf(file, "%.2lf,", d);
+ } else {
+ fprintf(file, "%.2lf", d);
+ }
+
+ data += sizeof(double);
+ }
+ } else {
+ if (len <= 30) {
+
+ ut_print_buf(file, data, len);
+ } else {
+ ut_print_buf(file, data, 30);
+
+ fprintf(file, " (total %lu bytes)",
+ (ulong) len);
+ }
+ }
+ } else {
+ fprintf(file, " SQL NULL, size " ULINTPF " ",
+ rec_get_nth_field_size(rec, i));
+ }
+
+ putc(';', file);
+ putc('\n', file);
+ }
+
+ if (rec_get_deleted_flag(rec, false)) {
+ fprintf(file, " Deleted");
+ }
+
+ if (rec_get_info_bits(rec, true) & REC_INFO_MIN_REC_FLAG) {
+ fprintf(file, " First rec");
+ }
+
+ rec_validate_old(rec);
+}
+
+/***************************************************************//**
+Prints a spatial index record. */
+void
+rec_print_mbr_rec(
+/*==============*/
+ FILE* file, /*!< in: file where to print */
+ const rec_t* rec, /*!< in: physical record */
+ const offset_t* offsets)/*!< in: array returned by rec_get_offsets() */
+{
+ ut_ad(rec_offs_validate(rec, NULL, offsets));
+
+ if (!rec_offs_comp(offsets)) {
+ rec_print_mbr_old(file, rec);
+ return;
+ }
+
+ for (ulint i = 0; i < rec_offs_n_fields(offsets); i++) {
+ const byte* data;
+ ulint len;
+
+ data = rec_get_nth_field(rec, offsets, i, &len);
+
+ if (i == 0) {
+ fprintf(file, " MBR:");
+ for (; len > 0; len -= sizeof(double)) {
+ double d = mach_double_read(data);
+
+ if (len != sizeof(double)) {
+ fprintf(file, "%.2lf,", d);
+ } else {
+ fprintf(file, "%.2lf", d);
+ }
+
+ data += sizeof(double);
+ }
+ } else {
+ fprintf(file, " %lu:", (ulong) i);
+
+ if (len != UNIV_SQL_NULL) {
+ if (len <= 30) {
+
+ ut_print_buf(file, data, len);
+ } else {
+ ut_print_buf(file, data, 30);
+
+ fprintf(file, " (total %lu bytes)",
+ (ulong) len);
+ }
+ } else {
+ fputs(" SQL NULL", file);
+ }
+ }
+ putc(';', file);
+ }
+
+ if (rec_get_info_bits(rec, true) & REC_INFO_DELETED_FLAG) {
+ fprintf(file, " Deleted");
+ }
+
+ if (rec_get_info_bits(rec, true) & REC_INFO_MIN_REC_FLAG) {
+ fprintf(file, " First rec");
+ }
+
+
+ rec_validate(rec, offsets);
+}
+
+/***************************************************************//**
Prints a physical record. */
-UNIV_INTERN
void
rec_print_new(
/*==========*/
FILE* file, /*!< in: file where to print */
const rec_t* rec, /*!< in: physical record */
- const ulint* offsets)/*!< in: array returned by rec_get_offsets() */
+ const offset_t* offsets)/*!< in: array returned by rec_get_offsets() */
{
ut_ad(rec_offs_validate(rec, NULL, offsets));
+#ifdef UNIV_DEBUG
+ if (rec_get_deleted_flag(rec, rec_offs_comp(offsets))) {
+ DBUG_PRINT("info", ("deleted "));
+ } else {
+ DBUG_PRINT("info", ("not-deleted "));
+ }
+#endif /* UNIV_DEBUG */
+
if (!rec_offs_comp(offsets)) {
rec_print_old(file, rec);
return;
}
- fprintf(file, "PHYSICAL RECORD: n_fields %lu;"
- " compact format; info bits %lu\n",
- (ulong) rec_offs_n_fields(offsets),
- (ulong) rec_get_info_bits(rec, TRUE));
+ fprintf(file, "PHYSICAL RECORD: n_fields " ULINTPF ";"
+ " compact format; info bits " ULINTPF "\n",
+ rec_offs_n_fields(offsets),
+ rec_get_info_bits(rec, TRUE));
rec_print_comp(file, rec, offsets);
rec_validate(rec, offsets);
@@ -1878,7 +2017,6 @@ rec_print_new(
/***************************************************************//**
Prints a physical record. */
-UNIV_INTERN
void
rec_print(
/*======*/
@@ -1891,11 +2029,12 @@ rec_print(
return;
} else {
mem_heap_t* heap = NULL;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
+ offset_t offsets_[REC_OFFS_NORMAL_SIZE];
rec_offs_init(offsets_);
rec_print_new(file, rec,
rec_get_offsets(rec, index, offsets_,
+ page_rec_is_leaf(rec),
ULINT_UNDEFINED, &heap));
if (UNIV_LIKELY_NULL(heap)) {
mem_heap_free(heap);
@@ -1903,51 +2042,144 @@ rec_print(
}
}
-# ifdef UNIV_DEBUG
-/************************************************************//**
-Reads the DB_TRX_ID of a clustered index record.
-@return the value of DB_TRX_ID */
-UNIV_INTERN
+/** Pretty-print a record.
+@param[in,out] o output stream
+@param[in] rec physical record
+@param[in] info rec_get_info_bits(rec)
+@param[in] offsets rec_get_offsets(rec) */
+void
+rec_print(
+ std::ostream& o,
+ const rec_t* rec,
+ ulint info,
+ const offset_t* offsets)
+{
+ const ulint comp = rec_offs_comp(offsets);
+ const ulint n = rec_offs_n_fields(offsets);
+
+ ut_ad(rec_offs_validate(rec, NULL, offsets));
+
+ o << (comp ? "COMPACT RECORD" : "RECORD")
+ << "(info_bits=" << info << ", " << n << " fields): {";
+
+ for (ulint i = 0; i < n; i++) {
+ const byte* data;
+ ulint len;
+
+ if (i) {
+ o << ',';
+ }
+
+ data = rec_get_nth_field(rec, offsets, i, &len);
+
+ if (len == UNIV_SQL_NULL) {
+ o << "NULL";
+ continue;
+ }
+
+ if (rec_offs_nth_extern(offsets, i)) {
+ ulint local_len = len - BTR_EXTERN_FIELD_REF_SIZE;
+ ut_ad(len >= BTR_EXTERN_FIELD_REF_SIZE);
+
+ o << '['
+ << local_len
+ << '+' << BTR_EXTERN_FIELD_REF_SIZE << ']';
+ ut_print_buf(o, data, local_len);
+ ut_print_buf_hex(o, data + local_len,
+ BTR_EXTERN_FIELD_REF_SIZE);
+ } else {
+ o << '[' << len << ']';
+ ut_print_buf(o, data, len);
+ }
+ }
+
+ o << "}";
+}
+
+/** Display a record.
+@param[in,out] o output stream
+@param[in] r record to display
+@return the output stream */
+std::ostream&
+operator<<(std::ostream& o, const rec_index_print& r)
+{
+ mem_heap_t* heap = NULL;
+ offset_t* offsets = rec_get_offsets(
+ r.m_rec, r.m_index, NULL, page_rec_is_leaf(r.m_rec),
+ ULINT_UNDEFINED, &heap);
+ rec_print(o, r.m_rec,
+ rec_get_info_bits(r.m_rec, rec_offs_comp(offsets)),
+ offsets);
+ mem_heap_free(heap);
+ return(o);
+}
+
+/** Display a record.
+@param[in,out] o output stream
+@param[in] r record to display
+@return the output stream */
+std::ostream&
+operator<<(std::ostream& o, const rec_offsets_print& r)
+{
+ rec_print(o, r.m_rec,
+ rec_get_info_bits(r.m_rec, rec_offs_comp(r.m_offsets)),
+ r.m_offsets);
+ return(o);
+}
+
+#ifdef UNIV_DEBUG
+/** Read the DB_TRX_ID of a clustered index record.
+@param[in] rec clustered index record
+@param[in] index clustered index
+@return the value of DB_TRX_ID */
trx_id_t
rec_get_trx_id(
-/*===========*/
- const rec_t* rec, /*!< in: record */
- const dict_index_t* index) /*!< in: clustered index */
+ const rec_t* rec,
+ const dict_index_t* index)
{
- const page_t* page
- = page_align(rec);
ulint trx_id_col
= dict_index_get_sys_col_pos(index, DATA_TRX_ID);
const byte* trx_id;
ulint len;
mem_heap_t* heap = NULL;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- ulint* offsets = offsets_;
+ offset_t offsets_[REC_OFFS_HEADER_SIZE + MAX_REF_PARTS + 2];
rec_offs_init(offsets_);
+ offset_t* offsets = offsets_;
- ut_ad(fil_page_get_type(page) == FIL_PAGE_INDEX);
- ut_ad(mach_read_from_8(page + PAGE_HEADER + PAGE_INDEX_ID)
- == index->id);
+ ut_ad(trx_id_col <= MAX_REF_PARTS);
ut_ad(dict_index_is_clust(index));
ut_ad(trx_id_col > 0);
ut_ad(trx_id_col != ULINT_UNDEFINED);
- offsets = rec_get_offsets(rec, index, offsets, trx_id_col + 1, &heap);
+ offsets = rec_get_offsets(rec, index, offsets, true,
+ trx_id_col + 1, &heap);
trx_id = rec_get_nth_field(rec, offsets, trx_id_col, &len);
ut_ad(len == DATA_TRX_ID_LEN);
- if (heap) {
+ if (UNIV_LIKELY_NULL(heap)) {
mem_heap_free(heap);
}
return(trx_read_trx_id(trx_id));
}
-# endif /* UNIV_DEBUG */
-#endif /* !UNIV_HOTBACKUP */
+#endif /* UNIV_DEBUG */
+/** Mark the nth field as externally stored.
+@param[in] offsets array returned by rec_get_offsets()
+@param[in] n nth field */
+void
+rec_offs_make_nth_extern(
+ offset_t* offsets,
+ const ulint n)
+{
+ ut_ad(!rec_offs_nth_sql_null(offsets, n));
+ set_type(rec_offs_base(offsets)[1 +n ], STORED_OFFPAGE);
+}
#ifdef WITH_WSREP
+# include "ha_prototypes.h"
+
int
wsrep_rec_get_foreign_key(
byte *buf, /* out: extracted key */
@@ -1963,14 +2195,14 @@ wsrep_rec_get_foreign_key(
ulint i;
uint key_parts;
mem_heap_t* heap = NULL;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- const ulint* offsets;
+ offset_t offsets_[REC_OFFS_NORMAL_SIZE];
+ const offset_t* offsets;
ut_ad(index_for);
ut_ad(index_ref);
rec_offs_init(offsets_);
- offsets = rec_get_offsets(rec, index_for, offsets_,
+ offsets = rec_get_offsets(rec, index_for, offsets_, true,
ULINT_UNDEFINED, &heap);
ut_ad(rec_offs_validate(rec, NULL, offsets));
@@ -1992,9 +2224,10 @@ wsrep_rec_get_foreign_key(
data = rec_get_nth_field(rec, offsets, i, &len);
if (key_len + ((len != UNIV_SQL_NULL) ? len + 1 : 1) >
*buf_len) {
- fprintf (stderr,
- "WSREP: FK key len exceeded %lu %lu %lu\n",
- key_len, len, *buf_len);
+ fprintf(stderr,
+ "WSREP: FK key len exceeded "
+ ULINTPF " " ULINTPF " " ULINTPF "\n",
+ key_len, len, *buf_len);
goto err_out;
}
diff --git a/storage/innobase/row/row0ext.cc b/storage/innobase/row/row0ext.cc
index bfc8165c9c5..f7e28981939 100644
--- a/storage/innobase/row/row0ext.cc
+++ b/storage/innobase/row/row0ext.cc
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 2006, 2013, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2006, 2016, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -24,23 +24,20 @@ Created September 2006 Marko Makela
*******************************************************/
#include "row0ext.h"
-
-#ifdef UNIV_NONINL
-#include "row0ext.ic"
-#endif
-
#include "btr0cur.h"
-/********************************************************************//**
-Fills the column prefix cache of an externally stored column. */
+/** Fills the column prefix cache of an externally stored column.
+@param[in,out] ext column prefix cache
+@param[in] i index of ext->ext[]
+@param[in] page_size page size
+@param[in] dfield data field */
static
void
row_ext_cache_fill(
-/*===============*/
- row_ext_t* ext, /*!< in/out: column prefix cache */
- ulint i, /*!< in: index of ext->ext[] */
- ulint zip_size,/*!< compressed page size in bytes, or 0 */
- const dfield_t* dfield) /*!< in: data field */
+ row_ext_t* ext,
+ ulint i,
+ const page_size_t& page_size,
+ const dfield_t* dfield)
{
const byte* field = static_cast<const byte*>(
dfield_get_data(dfield));
@@ -78,15 +75,14 @@ row_ext_cache_fill(
crashed during the execution of
btr_free_externally_stored_field(). */
ext->len[i] = btr_copy_externally_stored_field_prefix(
- buf, ext->max_len, zip_size, field, f_len);
+ buf, ext->max_len, page_size, field, f_len);
}
}
}
/********************************************************************//**
Creates a cache of column prefixes of externally stored columns.
-@return own: column prefix cache */
-UNIV_INTERN
+@return own: column prefix cache */
row_ext_t*
row_ext_create(
/*===========*/
@@ -105,7 +101,7 @@ row_ext_create(
mem_heap_t* heap) /*!< in: heap where created */
{
ulint i;
- ulint zip_size = dict_tf_get_zip_size(flags);
+ const page_size_t& page_size = dict_tf_get_page_size(flags);
row_ext_t* ret;
@@ -115,12 +111,10 @@ row_ext_create(
mem_heap_alloc(heap,
(sizeof *ret) + (n_ext - 1) * sizeof ret->len));
- ut_ad(ut_is_2pow(zip_size));
- ut_ad(zip_size <= UNIV_ZIP_SIZE_MAX);
-
ret->n_ext = n_ext;
ret->ext = ext;
ret->max_len = DICT_MAX_FIELD_LEN_BY_FORMAT_FLAG(flags);
+ ret->page_size.copy_from(page_size);
ret->buf = static_cast<byte*>(
mem_heap_alloc(heap, n_ext * ret->max_len));
@@ -135,7 +129,7 @@ row_ext_create(
const dfield_t* dfield;
dfield = dtuple_get_nth_field(tuple, ext[i]);
- row_ext_cache_fill(ret, i, zip_size, dfield);
+ row_ext_cache_fill(ret, i, page_size, dfield);
}
return(ret);
diff --git a/storage/innobase/row/row0ftsort.cc b/storage/innobase/row/row0ftsort.cc
index 5a8e085ad8f..e3b608769e1 100644
--- a/storage/innobase/row/row0ftsort.cc
+++ b/storage/innobase/row/row0ftsort.cc
@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 2010, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2015, 2019, MariaDB Corporation.
+Copyright (c) 2015, 2020, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -24,16 +24,16 @@ Create Full Text Index with (parallel) merge sort
Created 10/13/2010 Jimmy Yang
*******************************************************/
-#include "dict0dict.h" /* dict_table_stats_lock() */
-#include "row0merge.h"
-#include "pars0pars.h"
#include "row0ftsort.h"
+#include "dict0dict.h"
#include "row0merge.h"
#include "row0row.h"
#include "btr0cur.h"
+#include "fts0plugin.h"
+#include "log0crypt.h"
/** Read the next record to buffer N.
-@param N index into array of merge info structure */
+@param N index into array of merge info structure */
#define ROW_MERGE_READ_GET_NEXT(N) \
do { \
b[N] = row_merge_read_rec( \
@@ -48,7 +48,7 @@ Created 10/13/2010 Jimmy Yang
} while (0)
/** Parallel sort degree */
-UNIV_INTERN ulong fts_sort_pll_degree = 2;
+ulong fts_sort_pll_degree = 2;
/*********************************************************************//**
Create a temporary "fts sort index" used to merge sort the
@@ -62,7 +62,6 @@ integer value)
@see fts_create_one_index_table()
@return dict_index_t structure for the fts sort index */
-UNIV_INTERN
dict_index_t*
row_merge_create_fts_sort_index(
/*============================*/
@@ -83,13 +82,14 @@ row_merge_create_fts_sort_index(
// FIXME: This name shouldn't be hard coded here.
new_index = dict_mem_index_create(
- index->table->name, "tmp_fts_idx", 0, DICT_FTS, 3);
+ index->table->name.m_name, "tmp_fts_idx", 0, DICT_FTS, 3);
new_index->id = index->id;
new_index->table = (dict_table_t*) table;
new_index->n_uniq = FTS_NUM_FIELDS_SORT;
new_index->n_def = FTS_NUM_FIELDS_SORT;
new_index->cached = TRUE;
+ new_index->parser = index->parser;
idx_field = dict_index_get_nth_field(index, 0);
charset = fts_index_get_charset(index);
@@ -164,7 +164,6 @@ row_merge_create_fts_sort_index(
/*********************************************************************//**
Initialize FTS parallel sort structures.
@return TRUE if all successful */
-UNIV_INTERN
ibool
row_fts_psort_info_init(
/*====================*/
@@ -193,7 +192,7 @@ row_fts_psort_info_init(
block_size = 3 * srv_sort_buf_size;
- *psort = psort_info = static_cast<fts_psort_t*>(mem_zalloc(
+ *psort = psort_info = static_cast<fts_psort_t*>(ut_zalloc_nokey(
fts_sort_pll_degree * sizeof *psort_info));
if (!psort_info) {
@@ -203,11 +202,11 @@ row_fts_psort_info_init(
/* Common Info for all sort threads */
common_info = static_cast<fts_psort_common_t*>(
- mem_alloc(sizeof *common_info));
+ ut_malloc_nokey(sizeof *common_info));
if (!common_info) {
ut_free(dup);
- mem_free(psort_info);
+ ut_free(psort_info);
return(FALSE);
}
@@ -215,8 +214,8 @@ row_fts_psort_info_init(
common_info->new_table = (dict_table_t*) new_table;
common_info->trx = trx;
common_info->all_info = psort_info;
- common_info->sort_event = os_event_create();
- common_info->merge_event = os_event_create();
+ common_info->sort_event = os_event_create(0);
+ common_info->merge_event = os_event_create(0);
common_info->opt_doc_id_size = opt_doc_id_size;
if (log_tmp_is_encrypted()) {
@@ -225,16 +224,19 @@ row_fts_psort_info_init(
ut_ad(trx->mysql_thd != NULL);
const char* path = thd_innodb_tmpdir(trx->mysql_thd);
-
/* There will be FTS_NUM_AUX_INDEX number of "sort buckets" for
each parallel sort thread. Each "sort bucket" holds records for
a particular "FTS index partition" */
for (j = 0; j < fts_sort_pll_degree; j++) {
+
+ UT_LIST_INIT(
+ psort_info[j].fts_doc_list, &fts_doc_item_t::doc_list);
+
for (i = 0; i < FTS_NUM_AUX_INDEX; i++) {
psort_info[j].merge_file[i] =
static_cast<merge_file_t*>(
- mem_zalloc(sizeof(merge_file_t)));
+ ut_zalloc_nokey(sizeof(merge_file_t)));
if (!psort_info[j].merge_file[i]) {
ret = FALSE;
@@ -251,7 +253,7 @@ row_fts_psort_info_init(
/* Need to align memory for O_DIRECT write */
psort_info[j].block_alloc[i] =
- static_cast<row_merge_block_t*>(ut_malloc(
+ static_cast<row_merge_block_t*>(ut_malloc_nokey(
block_size + 1024));
psort_info[j].merge_block[i] =
@@ -259,13 +261,18 @@ row_fts_psort_info_init(
ut_align(
psort_info[j].block_alloc[i], 1024));
+ if (!psort_info[j].merge_block[i]) {
+ ret = FALSE;
+ goto func_exit;
+ }
+
/* If tablespace is encrypted, allocate additional buffer for
encryption/decryption. */
if (encrypted) {
/* Need to align memory for O_DIRECT write */
psort_info[j].crypt_alloc[i] =
- static_cast<row_merge_block_t*>(ut_malloc(
+ static_cast<row_merge_block_t*>(ut_malloc_nokey(
block_size + 1024));
psort_info[j].crypt_block[i] =
@@ -281,11 +288,6 @@ row_fts_psort_info_init(
psort_info[j].crypt_alloc[i] = NULL;
psort_info[j].crypt_block[i] = NULL;
}
-
- if (!psort_info[j].merge_block[i]) {
- ret = FALSE;
- goto func_exit;
- }
}
psort_info[j].child_status = 0;
@@ -293,13 +295,13 @@ row_fts_psort_info_init(
psort_info[j].psort_common = common_info;
psort_info[j].error = DB_SUCCESS;
psort_info[j].memory_used = 0;
- mutex_create(fts_pll_tokenize_mutex_key, &psort_info[j].mutex, SYNC_FTS_TOKENIZE);
+ mutex_create(LATCH_ID_FTS_PLL_TOKENIZE, &psort_info[j].mutex);
}
/* Initialize merge_info structures parallel merge and insert
into auxiliary FTS tables (FTS_INDEX_TABLE) */
*merge = merge_info = static_cast<fts_psort_t*>(
- mem_alloc(FTS_NUM_AUX_INDEX * sizeof *merge_info));
+ ut_malloc_nokey(FTS_NUM_AUX_INDEX * sizeof *merge_info));
for (j = 0; j < FTS_NUM_AUX_INDEX; j++) {
@@ -318,7 +320,6 @@ func_exit:
/*********************************************************************//**
Clean up and deallocate FTS parallel sort structures, and close the
merge sort files */
-UNIV_INTERN
void
row_fts_psort_info_destroy(
/*=======================*/
@@ -336,34 +337,28 @@ row_fts_psort_info_destroy(
psort_info[j].merge_file[i]);
}
- if (psort_info[j].block_alloc[i]) {
- ut_free(psort_info[j].block_alloc[i]);
- }
+ ut_free(psort_info[j].block_alloc[i]);
+ ut_free(psort_info[j].merge_file[i]);
if (psort_info[j].crypt_alloc[i]) {
ut_free(psort_info[j].crypt_alloc[i]);
}
-
- mem_free(psort_info[j].merge_file[i]);
}
mutex_free(&psort_info[j].mutex);
}
- os_event_free(merge_info[0].psort_common->sort_event);
- os_event_free(merge_info[0].psort_common->merge_event);
+ os_event_destroy(merge_info[0].psort_common->sort_event);
+ os_event_destroy(merge_info[0].psort_common->merge_event);
ut_free(merge_info[0].psort_common->dup);
- mem_free(merge_info[0].psort_common);
- mem_free(psort_info);
+ ut_free(merge_info[0].psort_common);
+ ut_free(psort_info);
}
- if (merge_info) {
- mem_free(merge_info);
- }
+ ut_free(merge_info);
}
/*********************************************************************//**
Free up merge buffers when merge sort is done */
-UNIV_INTERN
void
row_fts_free_pll_merge_buf(
/*=======================*/
@@ -386,6 +381,92 @@ row_fts_free_pll_merge_buf(
}
/*********************************************************************//**
+FTS plugin parser 'myql_add_word' callback function for row merge.
+Refer to 'st_mysql_ftparser_param' for more detail.
+@return always returns 0 */
+static
+int
+row_merge_fts_doc_add_word_for_parser(
+/*==================================*/
+ MYSQL_FTPARSER_PARAM *param, /* in: parser paramter */
+ const char *word, /* in: token word */
+ int word_len, /* in: word len */
+ MYSQL_FTPARSER_BOOLEAN_INFO* boolean_info) /* in: boolean info */
+{
+ fts_string_t str;
+ fts_tokenize_ctx_t* t_ctx;
+ row_fts_token_t* fts_token;
+ byte* ptr;
+
+ ut_ad(param);
+ ut_ad(param->mysql_ftparam);
+ ut_ad(word);
+ ut_ad(boolean_info);
+
+ t_ctx = static_cast<fts_tokenize_ctx_t*>(param->mysql_ftparam);
+ ut_ad(t_ctx);
+
+ str.f_str = (byte*)(word);
+ str.f_len = word_len;
+ str.f_n_char = fts_get_token_size(
+ (CHARSET_INFO*)param->cs, word, word_len);
+
+ /* JAN: TODO: MySQL 5.7 FTS
+ ut_ad(boolean_info->position >= 0);
+ */
+
+ ptr = static_cast<byte*>(ut_malloc_nokey(sizeof(row_fts_token_t)
+ + sizeof(fts_string_t) + str.f_len));
+ fts_token = reinterpret_cast<row_fts_token_t*>(ptr);
+ fts_token->text = reinterpret_cast<fts_string_t*>(
+ ptr + sizeof(row_fts_token_t));
+ fts_token->text->f_str = static_cast<byte*>(
+ ptr + sizeof(row_fts_token_t) + sizeof(fts_string_t));
+
+ fts_token->text->f_len = str.f_len;
+ fts_token->text->f_n_char = str.f_n_char;
+ memcpy(fts_token->text->f_str, str.f_str, str.f_len);
+
+ /* JAN: TODO: MySQL 5.7 FTS
+ fts_token->position = boolean_info->position;
+ */
+
+ /* Add token to list */
+ UT_LIST_ADD_LAST(t_ctx->fts_token_list, fts_token);
+
+ return(0);
+}
+
+/*********************************************************************//**
+Tokenize by fts plugin parser */
+static
+void
+row_merge_fts_doc_tokenize_by_parser(
+/*=================================*/
+ fts_doc_t* doc, /* in: doc to tokenize */
+ st_mysql_ftparser* parser, /* in: plugin parser instance */
+ fts_tokenize_ctx_t* t_ctx) /* in/out: tokenize ctx instance */
+{
+ MYSQL_FTPARSER_PARAM param;
+
+ ut_a(parser);
+
+ /* Set paramters for param */
+ param.mysql_parse = fts_tokenize_document_internal;
+ param.mysql_add_word = row_merge_fts_doc_add_word_for_parser;
+ param.mysql_ftparam = t_ctx;
+ param.cs = doc->charset;
+ param.doc = reinterpret_cast<char*>(doc->text.f_str);
+ param.length = static_cast<int>(doc->text.f_len);
+ param.mode= MYSQL_FTPARSER_SIMPLE_MODE;
+
+ PARSER_INIT(parser, &param);
+ /* We assume parse returns successfully here. */
+ parser->parse(&param);
+ PARSER_DEINIT(parser, &param);
+}
+
+/*********************************************************************//**
Tokenize incoming text data and add to the sort buffer.
@see row_merge_buf_encode()
@return TRUE if the record passed, FALSE if out of space */
@@ -402,8 +483,7 @@ row_merge_fts_doc_tokenize(
store Doc ID during sort*/
fts_tokenize_ctx_t* t_ctx) /*!< in/out: tokenize context */
{
- ulint i;
- ulint inc;
+ ulint inc = 0;
fts_string_t str;
ulint len;
row_merge_buf_t* buf;
@@ -413,6 +493,7 @@ row_merge_fts_doc_tokenize(
byte str_buf[FTS_MAX_WORD_LEN + 1];
ulint data_size[FTS_NUM_AUX_INDEX];
ulint n_tuple[FTS_NUM_AUX_INDEX];
+ st_mysql_ftparser* parser;
t_str.f_n_char = 0;
t_ctx->buf_used = 0;
@@ -420,28 +501,59 @@ row_merge_fts_doc_tokenize(
memset(n_tuple, 0, FTS_NUM_AUX_INDEX * sizeof(ulint));
memset(data_size, 0, FTS_NUM_AUX_INDEX * sizeof(ulint));
+ parser = sort_buf[0]->index->parser;
+
/* Tokenize the data and add each word string, its corresponding
doc id and position to sort buffer */
- for (i = t_ctx->processed_len; i < doc->text.f_len; i += inc) {
- ib_rbt_bound_t parent;
+ while (t_ctx->processed_len < doc->text.f_len) {
ulint idx = 0;
- ib_uint32_t position;
- ulint offset = 0;
ulint cur_len;
doc_id_t write_doc_id;
+ row_fts_token_t* fts_token = NULL;
+
+ if (parser != NULL) {
+ if (t_ctx->processed_len == 0) {
+ UT_LIST_INIT(t_ctx->fts_token_list, &row_fts_token_t::token_list);
+
+ /* Parse the whole doc and cache tokens */
+ row_merge_fts_doc_tokenize_by_parser(doc,
+ parser, t_ctx);
- inc = innobase_mysql_fts_get_token(
- doc->charset, doc->text.f_str + i,
- doc->text.f_str + doc->text.f_len, &str, &offset);
+ /* Just indictate we have parsed all the word */
+ t_ctx->processed_len += 1;
+ }
+
+ /* Then get a token */
+ fts_token = UT_LIST_GET_FIRST(t_ctx->fts_token_list);
+ if (fts_token) {
+ str.f_len = fts_token->text->f_len;
+ str.f_n_char = fts_token->text->f_n_char;
+ str.f_str = fts_token->text->f_str;
+ } else {
+ ut_ad(UT_LIST_GET_LEN(t_ctx->fts_token_list) == 0);
+ /* Reach the end of the list */
+ t_ctx->processed_len = doc->text.f_len;
+ break;
+ }
+ } else {
+ inc = innobase_mysql_fts_get_token(
+ doc->charset,
+ doc->text.f_str + t_ctx->processed_len,
+ doc->text.f_str + doc->text.f_len, &str);
- ut_a(inc > 0);
+ ut_a(inc > 0);
+ }
/* Ignore string whose character number is less than
"fts_min_token_size" or more than "fts_max_token_size" */
- if (str.f_n_char < fts_min_token_size
- || str.f_n_char > fts_max_token_size) {
+ if (!fts_check_token(&str, NULL, NULL)) {
+ if (parser != NULL) {
+ UT_LIST_REMOVE(t_ctx->fts_token_list, fts_token);
+ ut_free(fts_token);
+ } else {
+ t_ctx->processed_len += inc;
+ }
- t_ctx->processed_len += inc;
continue;
}
@@ -451,13 +563,17 @@ row_merge_fts_doc_tokenize(
t_str.f_str = (byte*) &str_buf;
- /* if "cached_stopword" is defined, ingore words in the
+ /* if "cached_stopword" is defined, ignore words in the
stopword list */
- if (t_ctx->cached_stopword
- && rbt_search(t_ctx->cached_stopword,
- &parent, &t_str) == 0) {
+ if (!fts_check_token(&str, t_ctx->cached_stopword,
+ doc->charset)) {
+ if (parser != NULL) {
+ UT_LIST_REMOVE(t_ctx->fts_token_list, fts_token);
+ ut_free(fts_token);
+ } else {
+ t_ctx->processed_len += inc;
+ }
- t_ctx->processed_len += inc;
continue;
}
@@ -543,14 +659,18 @@ row_merge_fts_doc_tokenize(
++field;
- /* The third field is the position */
- mach_write_to_4(
- (byte*) &position,
- (i + offset + inc - str.f_len + t_ctx->init_pos));
-
- dfield_set_data(field, &position, sizeof(position));
- len = dfield_get_len(field);
- ut_ad(len == sizeof(ib_uint32_t));
+ /* The third field is the position.
+ MySQL 5.7 changed the fulltext parser plugin interface
+ by adding MYSQL_FTPARSER_BOOLEAN_INFO::position.
+ Below we assume that the field is always 0. */
+ unsigned pos = t_ctx->init_pos;
+ byte position[4];
+ if (parser == NULL) {
+ pos += t_ctx->processed_len + inc - str.f_len;
+ }
+ len = 4;
+ mach_write_to_4(position, pos);
+ dfield_set_data(field, &position, len);
field->type.mtype = DATA_INT;
field->type.prtype = DATA_NOT_NULL;
@@ -562,7 +682,7 @@ row_merge_fts_doc_tokenize(
/* Reserve one byte for the end marker of row_merge_block_t */
if (buf->total_size + data_size[idx] + cur_len
- >= (srv_sort_buf_size - 1)) {
+ >= srv_sort_buf_size - 1) {
buf_full = TRUE;
break;
@@ -570,13 +690,18 @@ row_merge_fts_doc_tokenize(
/* Increment the number of tuples */
n_tuple[idx]++;
- t_ctx->processed_len += inc;
+ if (parser != NULL) {
+ UT_LIST_REMOVE(t_ctx->fts_token_list, fts_token);
+ ut_free(fts_token);
+ } else {
+ t_ctx->processed_len += inc;
+ }
data_size[idx] += cur_len;
}
/* Update the data length and the number of new word tuples
added in this round of tokenization */
- for (i = 0; i < FTS_NUM_AUX_INDEX; i++) {
+ for (ulint i = 0; i < FTS_NUM_AUX_INDEX; i++) {
/* The computation of total_size below assumes that no
delete-mark flags will be stored and that all fields
are NOT NULL and fixed-length. */
@@ -614,8 +739,7 @@ row_merge_fts_get_next_doc_item(
*doc_item = UT_LIST_GET_FIRST(psort_info->fts_doc_list);
if (*doc_item != NULL) {
- UT_LIST_REMOVE(doc_list, psort_info->fts_doc_list,
- *doc_item);
+ UT_LIST_REMOVE(psort_info->fts_doc_list, *doc_item);
ut_ad(psort_info->memory_used >= sizeof(fts_doc_item_t)
+ (*doc_item)->field->len);
@@ -630,7 +754,7 @@ row_merge_fts_get_next_doc_item(
Function performs parallel tokenization of the incoming doc strings.
It also performs the initial in memory sort of the parsed records.
@return OS_THREAD_DUMMY_RETURN */
-UNIV_INTERN
+static
os_thread_ret_t
fts_parallel_tokenization(
/*======================*/
@@ -649,7 +773,6 @@ fts_parallel_tokenization(
ib_uint64_t total_rec = 0;
ulint num_doc_processed = 0;
doc_id_t last_doc_id = 0;
- ulint zip_size;
mem_heap_t* blob_heap = NULL;
fts_doc_t doc;
dict_table_t* table = psort_info->psort_common->new_table;
@@ -659,6 +782,12 @@ fts_parallel_tokenization(
ut_ad(psort_info->psort_common->trx->mysql_thd != NULL);
+ /* const char* path = thd_innodb_tmpdir(
+ psort_info->psort_common->trx->mysql_thd);
+ */
+
+ ut_ad(psort_info->psort_common->trx->mysql_thd != NULL);
+
const char* path = thd_innodb_tmpdir(
psort_info->psort_common->trx->mysql_thd);
@@ -675,7 +804,8 @@ fts_parallel_tokenization(
block = psort_info->merge_block;
crypt_block = psort_info->crypt_block;
- zip_size = dict_table_zip_size(table);
+
+ const page_size_t& page_size = dict_table_page_size(table);
row_merge_fts_get_next_doc_item(psort_info, &doc_item);
@@ -705,7 +835,7 @@ loop:
doc.text.f_str =
btr_copy_externally_stored_field(
&doc.text.f_len, data,
- zip_size, data_len, blob_heap);
+ page_size, data_len, blob_heap);
} else {
doc.text.f_str = data;
doc.text.f_len = data_len;
@@ -735,15 +865,13 @@ loop:
num_doc_processed++;
if (fts_enable_diag_print && num_doc_processed % 10000 == 1) {
- ib_logf(IB_LOG_LEVEL_INFO,
- "number of doc processed %d\n",
- (int) num_doc_processed);
+ ib::info() << "Number of documents processed: "
+ << num_doc_processed;
#ifdef FTS_INTERNAL_DIAG_PRINT
for (i = 0; i < FTS_NUM_AUX_INDEX; i++) {
- ib_logf(IB_LOG_LEVEL_INFO,
- "ID %d, partition %d, word "
- "%d\n",(int) psort_info->psort_id,
- (int) i, (int) mycount[i]);
+ ib::info() << "ID " << psort_info->psort_id
+ << ", partition " << i << ", word "
+ << mycount[i];
}
#endif
}
@@ -770,7 +898,7 @@ loop:
block[t_ctx.buf_used],
crypt_block[t_ctx.buf_used],
table->space)) {
- error = DB_TEMP_FILE_WRITE_FAILURE;
+ error = DB_TEMP_FILE_WRITE_FAIL;
goto func_exit;
}
@@ -789,14 +917,13 @@ loop:
goto exit;
} else if (retried > 10000) {
ut_ad(!doc_item);
- /* retied too many times and cannot get new record */
- ib_logf(IB_LOG_LEVEL_ERROR,
- "InnoDB: FTS parallel sort processed "
- "%lu records, the sort queue has "
- "%lu records. But sort cannot get "
- "the next records", num_doc_processed,
- UT_LIST_GET_LEN(
- psort_info->fts_doc_list));
+ /* retried too many times and cannot get new record */
+ ib::error() << "FTS parallel sort processed "
+ << num_doc_processed
+ << " records, the sort queue has "
+ << UT_LIST_GET_LEN(psort_info->fts_doc_list)
+ << " records. But sort cannot get the next"
+ " records";
goto exit;
}
} else if (psort_info->state == FTS_PARENT_EXITING) {
@@ -865,7 +992,7 @@ exit:
block[i],
crypt_block[i],
table->space)) {
- error = DB_TEMP_FILE_WRITE_FAILURE;
+ error = DB_TEMP_FILE_WRITE_FAIL;
goto func_exit;
}
@@ -938,18 +1065,13 @@ func_exit:
os_event_set(psort_info->psort_common->sort_event);
psort_info->child_status = FTS_CHILD_EXITING;
-#ifdef __WIN__
- CloseHandle(psort_info->thread_hdl);
-#endif /*__WIN__ */
-
- os_thread_exit(NULL);
+ os_thread_exit();
OS_THREAD_DUMMY_RETURN;
}
/*********************************************************************//**
Start the parallel tokenization and parallel merge sort */
-UNIV_INTERN
void
row_fts_start_psort(
/*================*/
@@ -960,16 +1082,17 @@ row_fts_start_psort(
for (i = 0; i < fts_sort_pll_degree; i++) {
psort_info[i].psort_id = i;
- psort_info[i].thread_hdl = os_thread_create(
- fts_parallel_tokenization,
- (void*) &psort_info[i], &thd_id);
+ psort_info[i].thread_hdl =
+ os_thread_create(fts_parallel_tokenization,
+ (void*) &psort_info[i],
+ &thd_id);
}
}
/*********************************************************************//**
Function performs the merge and insertion of the sorted records.
@return OS_THREAD_DUMMY_RETURN */
-UNIV_INTERN
+static
os_thread_ret_t
fts_parallel_merge(
/*===============*/
@@ -990,25 +1113,19 @@ fts_parallel_merge(
os_event_set(psort_info->psort_common->merge_event);
psort_info->child_status = FTS_CHILD_EXITING;
-#ifdef __WIN__
- CloseHandle(psort_info->thread_hdl);
-#endif /*__WIN__ */
-
- os_thread_exit(NULL, false);
+ os_thread_exit(false);
OS_THREAD_DUMMY_RETURN;
}
/*********************************************************************//**
Kick off the parallel merge and insert thread */
-UNIV_INTERN
void
row_fts_start_parallel_merge(
/*=========================*/
fts_psort_t* merge_info) /*!< in: parallel sort info */
{
int i = 0;
- os_thread_id_t thd_id;
/* Kick off merge/insert threads */
for (i = 0; i < FTS_NUM_AUX_INDEX; i++) {
@@ -1016,47 +1133,93 @@ row_fts_start_parallel_merge(
merge_info[i].child_status = 0;
merge_info[i].thread_hdl = os_thread_create(
- fts_parallel_merge, (void*) &merge_info[i], &thd_id);
+ fts_parallel_merge,
+ (void*) &merge_info[i],
+ &merge_info[i].thread_hdl);
}
}
+/**
+Write out a single word's data as new entry/entries in the INDEX table.
+@param[in] ins_ctx insert context
+@param[in] word word string
+@param[in] node node colmns
+@return DB_SUCCUESS if insertion runs fine, otherwise error code */
+static
+dberr_t
+row_merge_write_fts_node(
+ const fts_psort_insert_t* ins_ctx,
+ const fts_string_t* word,
+ const fts_node_t* node)
+{
+ dtuple_t* tuple;
+ dfield_t* field;
+ dberr_t ret = DB_SUCCESS;
+ doc_id_t write_first_doc_id[8];
+ doc_id_t write_last_doc_id[8];
+ ib_uint32_t write_doc_count;
+
+ tuple = ins_ctx->tuple;
+
+ /* The first field is the tokenized word */
+ field = dtuple_get_nth_field(tuple, 0);
+ dfield_set_data(field, word->f_str, word->f_len);
+
+ /* The second field is first_doc_id */
+ field = dtuple_get_nth_field(tuple, 1);
+ fts_write_doc_id((byte*)&write_first_doc_id, node->first_doc_id);
+ dfield_set_data(field, &write_first_doc_id, sizeof(doc_id_t));
+
+ /* The third and fourth fileds(TRX_ID, ROLL_PTR) are filled already.*/
+ /* The fifth field is last_doc_id */
+ field = dtuple_get_nth_field(tuple, 4);
+ fts_write_doc_id((byte*)&write_last_doc_id, node->last_doc_id);
+ dfield_set_data(field, &write_last_doc_id, sizeof(doc_id_t));
+
+ /* The sixth field is doc_count */
+ field = dtuple_get_nth_field(tuple, 5);
+ mach_write_to_4((byte*)&write_doc_count, (ib_uint32_t)node->doc_count);
+ dfield_set_data(field, &write_doc_count, sizeof(ib_uint32_t));
+
+ /* The seventh field is ilist */
+ field = dtuple_get_nth_field(tuple, 6);
+ dfield_set_data(field, node->ilist, node->ilist_size);
+
+ ret = ins_ctx->btr_bulk->insert(tuple);
+
+ return(ret);
+}
+
/********************************************************************//**
Insert processed FTS data to auxillary index tables.
-@return DB_SUCCESS if insertion runs fine */
+@return DB_SUCCESS if insertion runs fine */
static MY_ATTRIBUTE((nonnull))
dberr_t
row_merge_write_fts_word(
/*=====================*/
- trx_t* trx, /*!< in: transaction */
- que_t** ins_graph, /*!< in: Insert query graphs */
- fts_tokenizer_word_t* word, /*!< in: sorted and tokenized
- word */
- fts_table_t* fts_table, /*!< in: fts aux table instance */
- CHARSET_INFO* charset) /*!< in: charset */
+ fts_psort_insert_t* ins_ctx, /*!< in: insert context */
+ fts_tokenizer_word_t* word) /*!< in: sorted and tokenized
+ word */
{
- ulint selected;
dberr_t ret = DB_SUCCESS;
- selected = fts_select_index(
- charset, word->text.f_str, word->text.f_len);
- fts_table->suffix = fts_get_suffix(selected);
+ ut_ad(ins_ctx->aux_index_id == fts_select_index(
+ ins_ctx->charset, word->text.f_str, word->text.f_len));
/* Pop out each fts_node in word->nodes write them to auxiliary table */
- while (ib_vector_size(word->nodes) > 0) {
+ for (ulint i = 0; i < ib_vector_size(word->nodes); i++) {
dberr_t error;
fts_node_t* fts_node;
- fts_node = static_cast<fts_node_t*>(ib_vector_pop(word->nodes));
+ fts_node = static_cast<fts_node_t*>(ib_vector_get(word->nodes, i));
- error = fts_write_node(
- trx, &ins_graph[selected], fts_table, &word->text,
- fts_node);
+ error = row_merge_write_fts_node(ins_ctx, &word->text, fts_node);
if (error != DB_SUCCESS) {
- fprintf(stderr, "InnoDB: failed to write"
- " word %s to FTS auxiliary index"
- " table, error (%s) \n",
- word->text.f_str, ut_strerr(error));
+ ib::error() << "Failed to write word "
+ << word->text.f_str << " to FTS auxiliary"
+ " index table, error (" << ut_strerr(error)
+ << ")";
ret = error;
}
@@ -1064,13 +1227,15 @@ row_merge_write_fts_word(
fts_node->ilist = NULL;
}
+ ib_vector_reset(word->nodes);
+
return(ret);
}
/*********************************************************************//**
Read sorted FTS data files and insert data tuples to auxillary tables.
-@return DB_SUCCESS or error number */
-UNIV_INTERN
+@return DB_SUCCESS or error number */
+static
void
row_fts_insert_tuple(
/*=================*/
@@ -1112,11 +1277,7 @@ row_fts_insert_tuple(
positions);
/* Write out the current word */
- row_merge_write_fts_word(ins_ctx->trx,
- ins_ctx->ins_graph, word,
- &ins_ctx->fts_table,
- ins_ctx->charset);
-
+ row_merge_write_fts_word(ins_ctx, word);
}
return;
@@ -1130,7 +1291,7 @@ row_fts_insert_tuple(
token_word.f_str = static_cast<byte*>(dfield_get_data(dfield));
if (!word->text.f_str) {
- fts_utf8_string_dup(&word->text, &token_word, ins_ctx->heap);
+ fts_string_dup(&word->text, &token_word, ins_ctx->heap);
}
/* compare to the last word, to see if they are the same
@@ -1147,12 +1308,10 @@ row_fts_insert_tuple(
}
/* Write out the current word */
- row_merge_write_fts_word(ins_ctx->trx, ins_ctx->ins_graph,
- word, &ins_ctx->fts_table,
- ins_ctx->charset);
+ row_merge_write_fts_word(ins_ctx, word);
/* Copy the new word */
- fts_utf8_string_dup(&word->text, &token_word, ins_ctx->heap);
+ fts_string_dup(&word->text, &token_word, ins_ctx->heap);
num_item = ib_vector_size(positions);
@@ -1212,7 +1371,7 @@ row_fts_sel_tree_propagate(
int propogated, /*<! in: tree node propagated */
int* sel_tree, /*<! in: selection tree */
const mrec_t** mrec, /*<! in: sort record */
- ulint** offsets, /*<! in: record offsets */
+ offset_t** offsets, /*<! in: record offsets */
dict_index_t* index) /*<! in/out: FTS index */
{
ulint parent;
@@ -1262,7 +1421,7 @@ row_fts_sel_tree_update(
ulint propagated, /*<! in: node to propagate up */
ulint height, /*<! in: tree height */
const mrec_t** mrec, /*<! in: sort record */
- ulint** offsets, /*<! in: record offsets */
+ offset_t** offsets, /*<! in: record offsets */
dict_index_t* index) /*<! in: index dictionary */
{
ulint i;
@@ -1284,7 +1443,7 @@ row_fts_build_sel_tree_level(
int* sel_tree, /*<! in/out: selection tree */
ulint level, /*<! in: selection tree level */
const mrec_t** mrec, /*<! in: sort record */
- ulint** offsets, /*<! in: record offsets */
+ offset_t** offsets, /*<! in: record offsets */
dict_index_t* index) /*<! in: index dictionary */
{
ulint start;
@@ -1344,7 +1503,7 @@ row_fts_build_sel_tree(
/*===================*/
int* sel_tree, /*<! in/out: selection tree */
const mrec_t** mrec, /*<! in: sort record */
- ulint** offsets, /*<! in: record offsets */
+ offset_t** offsets, /*<! in: record offsets */
dict_index_t* index) /*<! in: index dictionary */
{
ulint treelevel = 1;
@@ -1379,8 +1538,7 @@ row_fts_build_sel_tree(
/*********************************************************************//**
Read sorted file containing index data tuples and insert these data
tuples to the index
-@return DB_SUCCESS or error number */
-UNIV_INTERN
+@return DB_SUCCESS or error number */
dberr_t
row_fts_merge_insert(
/*=================*/
@@ -1395,12 +1553,11 @@ row_fts_merge_insert(
mem_heap_t* heap;
dberr_t error = DB_SUCCESS;
ulint* foffs;
- ulint** offsets;
+ offset_t** offsets;
fts_tokenizer_word_t new_word;
ib_vector_t* positions;
doc_id_t last_doc_id;
ib_alloc_t* heap_alloc;
- ulint n_bytes;
ulint i;
mrec_buf_t** buf;
int* fd;
@@ -1413,14 +1570,20 @@ row_fts_merge_insert(
ulint start;
fts_psort_insert_t ins_ctx;
ulint count_diag = 0;
- ulint space;
+ fts_table_t fts_table;
+ char aux_table_name[MAX_FULL_NAME_LEN];
+ dict_table_t* aux_table;
+ dict_index_t* aux_index;
+ trx_t* trx;
+ byte sys_buf[DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN];
/* We use the insert query graph as the dummy graph
needed in the row module call */
- ins_ctx.trx = trx_allocate_for_background();
+ trx = trx_allocate_for_background();
+ trx_start_if_not_started(trx, true);
- ins_ctx.trx->op_info = "inserting index entries";
+ trx->op_info = "inserting index entries";
ins_ctx.opt_doc_id_size = psort_info[0].psort_common->opt_doc_id_size;
@@ -1430,7 +1593,7 @@ row_fts_merge_insert(
heap, sizeof (*b) * fts_sort_pll_degree);
foffs = (ulint*) mem_heap_alloc(
heap, sizeof(*foffs) * fts_sort_pll_degree);
- offsets = (ulint**) mem_heap_alloc(
+ offsets = (offset_t**) mem_heap_alloc(
heap, sizeof(*offsets) * fts_sort_pll_degree);
buf = (mrec_buf_t**) mem_heap_alloc(
heap, sizeof(*buf) * fts_sort_pll_degree);
@@ -1454,10 +1617,10 @@ row_fts_merge_insert(
num = 1 + REC_OFFS_HEADER_SIZE
+ dict_index_get_n_fields(index);
- offsets[i] = static_cast<ulint*>(mem_heap_zalloc(
+ offsets[i] = static_cast<offset_t*>(mem_heap_zalloc(
heap, num * sizeof *offsets[i]));
- offsets[i][0] = num;
- offsets[i][1] = dict_index_get_n_fields(index);
+ rec_offs_set_n_alloc(offsets[i], num);
+ rec_offs_set_n_fields(offsets[i], dict_index_get_n_fields(index));
block[i] = psort_info[i].merge_block[id];
crypt_block[i] = psort_info[i].crypt_block[id];
b[i] = psort_info[i].merge_block[id];
@@ -1471,9 +1634,8 @@ row_fts_merge_insert(
}
if (fts_enable_diag_print) {
- ut_print_timestamp(stderr);
- fprintf(stderr, " InnoDB_FTS: to inserted %lu records\n",
- (ulong) count_diag);
+ ib::info() << "InnoDB_FTS: to insert " << count_diag
+ << " records";
}
/* Initialize related variables if creating FTS indexes */
@@ -1485,23 +1647,47 @@ row_fts_merge_insert(
positions = ib_vector_create(heap_alloc, sizeof(ulint), 32);
last_doc_id = 0;
- /* Allocate insert query graphs for FTS auxillary
- Index Table, note we have FTS_NUM_AUX_INDEX such index tables */
- n_bytes = sizeof(que_t*) * (FTS_NUM_AUX_INDEX + 1);
- ins_ctx.ins_graph = static_cast<que_t**>(mem_heap_alloc(heap, n_bytes));
- memset(ins_ctx.ins_graph, 0x0, n_bytes);
-
/* We should set the flags2 with aux_table_name here,
in order to get the correct aux table names. */
index->table->flags2 |= DICT_TF2_FTS_AUX_HEX_NAME;
DBUG_EXECUTE_IF("innodb_test_wrong_fts_aux_table_name",
index->table->flags2 &= ~DICT_TF2_FTS_AUX_HEX_NAME;);
-
- ins_ctx.fts_table.type = FTS_INDEX_TABLE;
- ins_ctx.fts_table.index_id = index->id;
- ins_ctx.fts_table.table_id = table->id;
- ins_ctx.fts_table.table = index->table;
- space = table->space;
+ fts_table.type = FTS_INDEX_TABLE;
+ fts_table.index_id = index->id;
+ fts_table.table_id = table->id;
+ fts_table.table = index->table;
+ fts_table.suffix = fts_get_suffix(id);
+
+ /* Get aux index */
+ fts_get_table_name(&fts_table, aux_table_name);
+ aux_table = dict_table_open_on_name(aux_table_name, FALSE, FALSE,
+ DICT_ERR_IGNORE_NONE);
+ ut_ad(aux_table != NULL);
+ dict_table_close(aux_table, FALSE, FALSE);
+ aux_index = dict_table_get_first_index(aux_table);
+
+ /* Create bulk load instance */
+ ins_ctx.btr_bulk = UT_NEW_NOKEY(
+ BtrBulk(aux_index, trx, psort_info[0].psort_common->trx
+ ->get_flush_observer()));
+
+ /* Create tuple for insert */
+ ins_ctx.tuple = dtuple_create(heap, dict_index_get_n_fields(aux_index));
+ dict_index_copy_types(ins_ctx.tuple, aux_index,
+ dict_index_get_n_fields(aux_index));
+
+ /* Set TRX_ID and ROLL_PTR */
+ trx_write_trx_id(sys_buf, trx->id);
+ trx_write_roll_ptr(&sys_buf[DATA_TRX_ID_LEN],
+ 1ULL << ROLL_PTR_INSERT_FLAG_POS);
+ dfield_set_data(dtuple_get_nth_field(ins_ctx.tuple, 2),
+ sys_buf, DATA_TRX_ID_LEN);
+ dfield_set_data(dtuple_get_nth_field(ins_ctx.tuple, 3),
+ &sys_buf[DATA_TRX_ID_LEN], DATA_ROLL_PTR_LEN);
+
+ ut_d(ins_ctx.aux_index_id = id);
+
+ const ulint space = table->space;
for (i = 0; i < fts_sort_pll_degree; i++) {
if (psort_info[i].merge_file[id]->n_rec == 0) {
@@ -1534,7 +1720,6 @@ row_fts_merge_insert(
corresponding FTS index auxiliary tables */
for (;;) {
dtuple_t* dtuple;
- ulint n_ext;
int min_rec = 0;
if (fts_sort_pll_degree <= 2) {
@@ -1577,7 +1762,7 @@ row_fts_merge_insert(
}
dtuple = row_rec_to_index_entry_low(
- mrec[min_rec], index, offsets[min_rec], &n_ext,
+ mrec[min_rec], index, offsets[min_rec],
tuple_heap);
row_fts_insert_tuple(
@@ -1603,26 +1788,25 @@ row_fts_merge_insert(
}
exit:
- fts_sql_commit(ins_ctx.trx);
+ fts_sql_commit(trx);
- ins_ctx.trx->op_info = "";
+ trx->op_info = "";
mem_heap_free(tuple_heap);
- for (i = 0; i < FTS_NUM_AUX_INDEX; i++) {
- if (ins_ctx.ins_graph[i]) {
- fts_que_graph_free(ins_ctx.ins_graph[i]);
- }
- }
+ error = ins_ctx.btr_bulk->finish(error);
+ UT_DELETE(ins_ctx.btr_bulk);
- trx_free_for_background(ins_ctx.trx);
+ trx_free_for_background(trx);
mem_heap_free(heap);
if (fts_enable_diag_print) {
- ut_print_timestamp(stderr);
- fprintf(stderr, " InnoDB_FTS: inserted %lu records\n",
- (ulong) count);
+ ib::info() << "InnoDB_FTS: inserted " << count << " records";
+ }
+
+ if (psort_info[0].psort_common->trx->get_flush_observer()) {
+ row_merge_write_redo(aux_index);
}
return(error);
diff --git a/storage/innobase/row/row0import.cc b/storage/innobase/row/row0import.cc
index f5e882f96fe..ad7f77f6215 100644
--- a/storage/innobase/row/row0import.cc
+++ b/storage/innobase/row/row0import.cc
@@ -25,15 +25,10 @@ Created 2012-02-08 by Sunny Bains.
*******************************************************/
#include "row0import.h"
-
-#ifdef UNIV_NONINL
-#include "row0import.ic"
-#endif
-
#include "btr0pcur.h"
-#include "btr0sea.h"
#include "que0que.h"
#include "dict0boot.h"
+#include "dict0load.h"
#include "ibuf0ibuf.h"
#include "pars0pars.h"
#include "row0upd.h"
@@ -51,9 +46,13 @@ Created 2012-02-08 by Sunny Bains.
#include <vector>
+#ifdef HAVE_MY_AES_H
+#include <my_aes.h>
+#endif
+
/** The size of the buffer to use for IO.
-@param n - page size of the tablespace.
-@retval number of pages */
+@param n physical page size
+@return number of pages */
#define IO_BUFFER_SIZE(n) ((1024 * 1024) / n)
/** For gathering stats on records during phase I */
@@ -116,7 +115,7 @@ struct row_import {
m_hostname(NULL),
m_table_name(NULL),
m_autoinc(0),
- m_page_size(0),
+ m_page_size(0, 0, false),
m_flags(0),
m_n_cols(0),
m_cols(NULL),
@@ -127,45 +126,39 @@ struct row_import {
~row_import() UNIV_NOTHROW;
- /**
- Find the index entry in in the indexes array.
- @param name - index name
+ /** Find the index entry in in the indexes array.
+ @param name index name
@return instance if found else 0. */
row_index_t* get_index(const char* name) const UNIV_NOTHROW;
- /**
- Get the number of rows in the index.
- @param name - index name
+ /** Get the number of rows in the index.
+ @param name index name
@return number of rows (doesn't include delete marked rows). */
ulint get_n_rows(const char* name) const UNIV_NOTHROW;
- /**
- Find the ordinal value of the column name in the cfg table columns.
- @param name - of column to look for.
+ /** Find the ordinal value of the column name in the cfg table columns.
+ @param name of column to look for.
@return ULINT_UNDEFINED if not found. */
ulint find_col(const char* name) const UNIV_NOTHROW;
- /**
- Get the number of rows for which purge failed during the convert phase.
- @param name - index name
+ /** Get the number of rows for which purge failed during the
+ convert phase.
+ @param name index name
@return number of rows for which purge failed. */
- ulint get_n_purge_failed(const char* name) const UNIV_NOTHROW;
+ ulint get_n_purge_failed(const char* name) const UNIV_NOTHROW;
- /**
- Check if the index is clean. ie. no delete-marked records
- @param name - index name
+ /** Check if the index is clean. ie. no delete-marked records
+ @param name index name
@return true if index needs to be purged. */
bool requires_purge(const char* name) const UNIV_NOTHROW
{
return(get_n_purge_failed(name) > 0);
}
- /**
- Set the index root <space, pageno> using the index name */
+ /** Set the index root <space, pageno> using the index name */
void set_root_by_name() UNIV_NOTHROW;
- /**
- Set the index root <space, pageno> using a heuristic
+ /** Set the index root <space, pageno> using a heuristic
@return DB_SUCCESS or error code */
dberr_t set_root_by_heuristic() UNIV_NOTHROW;
@@ -178,18 +171,16 @@ struct row_import {
THD* thd,
const dict_index_t* index) UNIV_NOTHROW;
- /**
- Check if the table schema that was read from the .cfg file matches the
- in memory table definition.
- @param thd - MySQL session variable
+ /** Check if the table schema that was read from the .cfg file
+ matches the in memory table definition.
+ @param thd MySQL session variable
@return DB_SUCCESS or error code. */
dberr_t match_table_columns(
THD* thd) UNIV_NOTHROW;
- /**
- Check if the table (and index) schema that was read from the .cfg file
- matches the in memory table definition.
- @param thd - MySQL session variable
+ /** Check if the table (and index) schema that was read from the
+ .cfg file matches the in memory table definition.
+ @param thd MySQL session variable
@return DB_SUCCESS or error code. */
dberr_t match_schema(
THD* thd) UNIV_NOTHROW;
@@ -205,7 +196,7 @@ struct row_import {
ib_uint64_t m_autoinc; /*!< Next autoinc value */
- ulint m_page_size; /*!< Tablespace page size */
+ page_size_t m_page_size; /*!< Tablespace page size */
ulint m_flags; /*!< Table flags */
@@ -231,15 +222,13 @@ struct row_import {
/** Use the page cursor to iterate over records in a block. */
class RecIterator {
public:
- /**
- Default constructor */
+ /** Default constructor */
RecIterator() UNIV_NOTHROW
{
memset(&m_cur, 0x0, sizeof(m_cur));
}
- /**
- Position the cursor on the first user record. */
+ /** Position the cursor on the first user record. */
void open(buf_block_t* block) UNIV_NOTHROW
{
page_cur_set_before_first(block, &m_cur);
@@ -249,8 +238,7 @@ public:
}
}
- /**
- Move to the next record. */
+ /** Move to the next record. */
void next() UNIV_NOTHROW
{
page_cur_move_to_next(&m_cur);
@@ -276,7 +264,7 @@ public:
bool remove(
const dict_index_t* index,
page_zip_des_t* page_zip,
- ulint* offsets) UNIV_NOTHROW
+ offset_t* offsets) UNIV_NOTHROW
{
/* We can't end up with an empty page unless it is root. */
if (page_get_n_recs(m_cur.block->frame) <= 1) {
@@ -296,9 +284,9 @@ couldn't purge the delete marked reocrds during Phase I. */
class IndexPurge {
public:
/** Constructor
- @param trx - the user transaction covering the import tablespace
- @param index - to be imported
- @param space_id - space id of the tablespace */
+ @param trx the user transaction covering the import tablespace
+ @param index to be imported
+ @param space_id space id of the tablespace */
IndexPurge(
trx_t* trx,
dict_index_t* index) UNIV_NOTHROW
@@ -307,9 +295,8 @@ public:
m_index(index),
m_n_rows(0)
{
- ib_logf(IB_LOG_LEVEL_INFO,
- "Phase II - Purge records from index %s",
- index->name);
+ ib::info() << "Phase II - Purge records from index "
+ << index->name;
}
/** Descructor */
@@ -327,28 +314,23 @@ public:
}
private:
- /**
- Begin import, position the cursor on the first record. */
+ /** Begin import, position the cursor on the first record. */
void open() UNIV_NOTHROW;
- /**
- Close the persistent curosr and commit the mini-transaction. */
+ /** Close the persistent curosr and commit the mini-transaction. */
void close() UNIV_NOTHROW;
- /**
- Position the cursor on the next record.
+ /** Position the cursor on the next record.
@return DB_SUCCESS or error code */
dberr_t next() UNIV_NOTHROW;
- /**
- Store the persistent cursor position and reopen the
+ /** Store the persistent cursor position and reopen the
B-tree cursor in BTR_MODIFY_TREE mode, because the
tree structure may be changed during a pessimistic delete. */
void purge_pessimistic_delete() UNIV_NOTHROW;
- /**
- Purge delete-marked records.
- @param offsets - current row offsets. */
+ /** Purge delete-marked records.
+ @param offsets current row offsets. */
void purge() UNIV_NOTHROW;
protected:
@@ -371,25 +353,25 @@ class AbstractCallback
{
public:
/** Constructor
- @param trx - covering transaction */
+ @param trx covering transaction */
AbstractCallback(trx_t* trx)
:
+ m_page_size(0, 0, false),
m_trx(trx),
m_space(ULINT_UNDEFINED),
m_xdes(),
m_xdes_page_no(ULINT_UNDEFINED),
m_space_flags(ULINT_UNDEFINED) UNIV_NOTHROW { }
- /**
- Free any extent descriptor instance */
+ /** Free any extent descriptor instance */
virtual ~AbstractCallback()
{
- delete [] m_xdes;
+ UT_DELETE_ARRAY(m_xdes);
}
/** Determine the page size to use for traversing the tablespace
- @param file_size - size of the tablespace file in bytes
- @param block - contents of the first page in the tablespace file.
+ @param file_size size of the tablespace file in bytes
+ @param block contents of the first page in the tablespace file.
@retval DB_SUCCESS or error code. */
virtual dberr_t init(
os_offset_t file_size,
@@ -398,33 +380,27 @@ public:
/** @return true if compressed table. */
bool is_compressed_table() const UNIV_NOTHROW
{
- return(get_zip_size() > 0);
+ return(get_page_size().is_compressed());
+ }
+
+ /** @return the tablespace flags */
+ ulint get_space_flags() const
+ {
+ return(m_space_flags);
}
/**
Set the name of the physical file and the file handle that is used
to open it for the file that is being iterated over.
- @param filename - then physical name of the tablespace file.
- @param file - OS file handle */
+ @param filename the physical name of the tablespace file
+ @param file OS file handle */
void set_file(const char* filename, pfs_os_file_t file) UNIV_NOTHROW
{
m_file = file;
m_filepath = filename;
}
- /** The compressed page size
- @return the compressed page size */
- ulint get_zip_size() const
- {
- return(m_zip_size);
- }
-
- /** The compressed page size
- @return the compressed page size */
- ulint get_page_size() const
- {
- return(m_page_size);
- }
+ const page_size_t& get_page_size() const { return m_page_size; }
const char* filename() const { return m_filepath; }
@@ -454,10 +430,9 @@ public:
}
protected:
- /**
- Get the physical offset of the extent descriptor within the page.
- @param page_no - page number of the extent descriptor
- @param page - contents of the page containing the extent descriptor.
+ /** Get the physical offset of the extent descriptor within the page.
+ @param page_no page number of the extent descriptor
+ @param page contents of the page containing the extent descriptor.
@return the start of the xdes array in a page */
const xdes_t* xdes(
ulint page_no,
@@ -465,19 +440,18 @@ protected:
{
ulint offset;
- offset = xdes_calc_descriptor_index(get_zip_size(), page_no);
+ offset = xdes_calc_descriptor_index(get_page_size(), page_no);
return(page + XDES_ARR_OFFSET + XDES_SIZE * offset);
}
- /**
- Set the current page directory (xdes). If the extent descriptor is
+ /** Set the current page directory (xdes). If the extent descriptor is
marked as free then free the current extent descriptor and set it to
0. This implies that all pages that are covered by this extent
descriptor are also freed.
- @param page_no - offset of page within the file
- @param page - page contents
+ @param page_no offset of page within the file
+ @param page page contents
@return DB_SUCCESS or error code. */
dberr_t set_current_xdes(
ulint page_no,
@@ -485,9 +459,8 @@ protected:
{
m_xdes_page_no = page_no;
- delete[] m_xdes;
-
- m_xdes = 0;
+ UT_DELETE_ARRAY(m_xdes);
+ m_xdes = NULL;
ulint state;
const xdes_t* xdesc = page + XDES_ARR_OFFSET;
@@ -496,39 +469,32 @@ protected:
if (state != XDES_FREE) {
- m_xdes = new(std::nothrow) xdes_t[m_page_size];
+ m_xdes = UT_NEW_ARRAY_NOKEY(xdes_t,
+ m_page_size.physical());
/* Trigger OOM */
- DBUG_EXECUTE_IF("ib_import_OOM_13",
- delete [] m_xdes; m_xdes = 0;);
+ DBUG_EXECUTE_IF(
+ "ib_import_OOM_13",
+ UT_DELETE_ARRAY(m_xdes);
+ m_xdes = NULL;
+ );
- if (m_xdes == 0) {
+ if (m_xdes == NULL) {
return(DB_OUT_OF_MEMORY);
}
- memcpy(m_xdes, page, m_page_size);
+ memcpy(m_xdes, page, m_page_size.physical());
}
return(DB_SUCCESS);
}
- /**
- @return true if it is a root page */
- bool is_root_page(const page_t* page) const UNIV_NOTHROW
- {
- ut_ad(fil_page_get_type(page) == FIL_PAGE_INDEX);
-
- return(mach_read_from_4(page + FIL_PAGE_NEXT) == FIL_NULL
- && mach_read_from_4(page + FIL_PAGE_PREV) == FIL_NULL);
- }
-
- /**
- Check if the page is marked as free in the extent descriptor.
- @param page_no - page number to check in the extent descriptor.
+ /** Check if the page is marked as free in the extent descriptor.
+ @param page_no page number to check in the extent descriptor.
@return true if the page is marked as free */
bool is_free(ulint page_no) const UNIV_NOTHROW
{
- ut_a(xdes_calc_descriptor_page(get_zip_size(), page_no)
+ ut_a(xdes_calc_descriptor_page(get_page_size(), page_no)
== m_xdes_page_no);
if (m_xdes != 0) {
@@ -543,11 +509,8 @@ protected:
}
protected:
- /** Compressed table page size */
- ulint m_zip_size;
-
/** The tablespace page size. */
- ulint m_page_size;
+ page_size_t m_page_size;
/** File handle to the tablespace */
pfs_os_file_t m_file;
@@ -583,8 +546,8 @@ protected:
};
/** Determine the page size to use for traversing the tablespace
-@param file_size - size of the tablespace file in bytes
-@param block - contents of the first page in the tablespace file.
+@param file_size size of the tablespace file in bytes
+@param block contents of the first page in the tablespace file.
@retval DB_SUCCESS or error code. */
dberr_t
AbstractCallback::init(
@@ -597,9 +560,8 @@ AbstractCallback::init(
if (!fsp_flags_is_valid(m_space_flags, true)) {
ulint cflags = fsp_flags_convert_from_101(m_space_flags);
if (cflags == ULINT_UNDEFINED) {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Invalid FSP_SPACE_FLAGS=0x%x",
- int(m_space_flags));
+ ib::error() << "Invalid FSP_SPACE_FLAGS="
+ << ib::hex(m_space_flags);
return(DB_CORRUPTION);
}
m_space_flags = cflags;
@@ -607,39 +569,21 @@ AbstractCallback::init(
/* Clear the DATA_DIR flag, which is basically garbage. */
m_space_flags &= ~(1U << FSP_FLAGS_POS_RESERVED);
+ m_page_size.copy_from(page_size_t(m_space_flags));
- /* Since we don't know whether it is a compressed table
- or not, the data is always read into the block->frame. */
-
- m_zip_size = fsp_header_get_zip_size(page);
-
- if (!ut_is_2pow(m_zip_size) || m_zip_size > UNIV_ZIP_SIZE_MAX) {
- return(DB_CORRUPTION);
- }
-
- /* Set the page size used to traverse the tablespace. */
-
- m_page_size = (is_compressed_table())
- ? get_zip_size() : fsp_flags_get_page_size(m_space_flags);
+ if (!is_compressed_table() && !m_page_size.equals_to(univ_page_size)) {
- if (m_page_size == 0) {
- ib_logf(IB_LOG_LEVEL_ERROR, "Page size is 0");
- return(DB_CORRUPTION);
- } else if (!is_compressed_table() && m_page_size != UNIV_PAGE_SIZE) {
-
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Page size " ULINTPF " of ibd file is not the same "
- "as the server page size " ULINTPF,
- m_page_size, UNIV_PAGE_SIZE);
+ ib::error() << "Page size " << m_page_size.physical()
+ << " of ibd file is not the same as the server page"
+ " size " << univ_page_size.physical();
return(DB_CORRUPTION);
- } else if ((file_size % m_page_size)) {
+ } else if (file_size % m_page_size.physical() != 0) {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "File size " UINT64PF " is not a multiple "
- "of the page size " ULINTPF,
- (ib_uint64_t) file_size, m_page_size);
+ ib::error() << "File size " << file_size << " is not a"
+ " multiple of the page size "
+ << m_page_size.physical();
return(DB_CORRUPTION);
}
@@ -670,11 +614,11 @@ struct FetchIndexRootPages : public AbstractCallback {
ulint m_page_no; /*!< Root page number */
};
- typedef std::vector<Index> Indexes;
+ typedef std::vector<Index, ut_allocator<Index> > Indexes;
/** Constructor
- @param trx - covering (user) transaction
- @param table - table definition in server .*/
+ @param trx covering (user) transaction
+ @param table table definition in server .*/
FetchIndexRootPages(const dict_table_t* table, trx_t* trx)
:
AbstractCallback(trx),
@@ -690,8 +634,7 @@ struct FetchIndexRootPages : public AbstractCallback {
return(m_space);
}
- /**
- Called for each block as it is read from the file.
+ /** Called for each block as it is read from the file.
@param block block to convert, it is not from the buffer pool.
@retval DB_SUCCESS or error code. */
dberr_t operator()(buf_block_t* block) UNIV_NOTHROW;
@@ -707,8 +650,7 @@ struct FetchIndexRootPages : public AbstractCallback {
Indexes m_indexes;
};
-/**
-Called for each block as it is read from the file. Check index pages to
+/** Called for each block as it is read from the file. Check index pages to
determine the exact row format. We can't get that from the tablespace
header flags alone.
@@ -723,15 +665,14 @@ dberr_t FetchIndexRootPages::operator()(buf_block_t* block) UNIV_NOTHROW
ulint page_type = fil_page_get_type(page);
if (page_type == FIL_PAGE_TYPE_XDES) {
- return set_current_xdes(block->page.offset, page);
- } else if (page_type == FIL_PAGE_INDEX
- && !is_free(block->page.offset)
- && is_root_page(page)) {
+ return set_current_xdes(block->page.id.page_no(), page);
+ } else if (fil_page_index_page_check(page)
+ && !is_free(block->page.id.page_no())
+ && !page_has_siblings(page)) {
index_id_t id = btr_page_get_index_id(page);
- ulint page_no = buf_block_get_page_no(block);
- m_indexes.push_back(Index(id, page_no));
+ m_indexes.push_back(Index(id, block->page.id.page_no()));
if (m_indexes.size() == 1) {
/* Check that the tablespace flags match the table flags. */
@@ -760,23 +701,26 @@ FetchIndexRootPages::build_row_import(row_import* cfg) const UNIV_NOTHROW
Indexes::const_iterator end = m_indexes.end();
ut_a(cfg->m_table == m_table);
- cfg->m_page_size = m_page_size;
+ cfg->m_page_size.copy_from(m_page_size);
cfg->m_n_indexes = m_indexes.size();
if (cfg->m_n_indexes == 0) {
- ib_logf(IB_LOG_LEVEL_ERROR, "No B+Tree found in tablespace");
+ ib::error() << "No B+Tree found in tablespace";
return(DB_CORRUPTION);
}
- cfg->m_indexes = new(std::nothrow) row_index_t[cfg->m_n_indexes];
+ cfg->m_indexes = UT_NEW_ARRAY_NOKEY(row_index_t, cfg->m_n_indexes);
/* Trigger OOM */
- DBUG_EXECUTE_IF("ib_import_OOM_11",
- delete [] cfg->m_indexes; cfg->m_indexes = 0;);
+ DBUG_EXECUTE_IF(
+ "ib_import_OOM_11",
+ UT_DELETE_ARRAY(cfg->m_indexes);
+ cfg->m_indexes = NULL;
+ );
- if (cfg->m_indexes == 0) {
+ if (cfg->m_indexes == NULL) {
return(DB_OUT_OF_MEMORY);
}
@@ -790,18 +734,20 @@ FetchIndexRootPages::build_row_import(row_import* cfg) const UNIV_NOTHROW
char name[BUFSIZ];
- ut_snprintf(name, sizeof(name), "index" IB_ID_FMT, it->m_id);
+ snprintf(name, sizeof(name), "index" IB_ID_FMT, it->m_id);
ulint len = strlen(name) + 1;
- cfg_index->m_name = new(std::nothrow) byte[len];
+ cfg_index->m_name = UT_NEW_ARRAY_NOKEY(byte, len);
/* Trigger OOM */
- DBUG_EXECUTE_IF("ib_import_OOM_12",
- delete [] cfg_index->m_name;
- cfg_index->m_name = 0;);
+ DBUG_EXECUTE_IF(
+ "ib_import_OOM_12",
+ UT_DELETE_ARRAY(cfg_index->m_name);
+ cfg_index->m_name = NULL;
+ );
- if (cfg_index->m_name == 0) {
+ if (cfg_index->m_name == NULL) {
return(DB_OUT_OF_MEMORY);
}
@@ -845,8 +791,8 @@ tablespace file.
class PageConverter : public AbstractCallback {
public:
/** Constructor
- * @param cfg - config of table being imported.
- * @param trx - transaction covering the import */
+ @param cfg config of table being imported.
+ @param trx transaction covering the import */
PageConverter(row_import* cfg, trx_t* trx) UNIV_NOTHROW;
virtual ~PageConverter() UNIV_NOTHROW
@@ -863,92 +809,80 @@ public:
return(m_cfg->m_table->space);
}
- /**
- Called for each block as it is read from the file.
+ /** Called for each block as it is read from the file.
@param block block to convert, it is not from the buffer pool.
@retval DB_SUCCESS or error code. */
dberr_t operator()(buf_block_t* block) UNIV_NOTHROW;
private:
- /**
- Update the page, set the space id, max trx id and index id.
- @param block - block read from file
- @param page_type - type of the page
+ /** Update the page, set the space id, max trx id and index id.
+ @param block block read from file
+ @param page_type type of the page
@retval DB_SUCCESS or error code */
dberr_t update_page(
buf_block_t* block,
ulint& page_type) UNIV_NOTHROW;
- /**
- Update the space, index id, trx id.
- @param block - block to convert
+ /** Update the space, index id, trx id.
+ @param block block to convert
@return DB_SUCCESS or error code */
dberr_t update_index_page(buf_block_t* block) UNIV_NOTHROW;
/** Update the BLOB refrences and write UNDO log entries for
rows that can't be purged optimistically.
- @param block - block to update
+ @param block block to update
@retval DB_SUCCESS or error code */
dberr_t update_records(buf_block_t* block) UNIV_NOTHROW;
- /**
- Validate the space flags and update tablespace header page.
- @param block - block read from file, not from the buffer pool.
+ /** Validate the space flags and update tablespace header page.
+ @param block block read from file, not from the buffer pool.
@retval DB_SUCCESS or error code */
dberr_t update_header(buf_block_t* block) UNIV_NOTHROW;
- /**
- Adjust the BLOB reference for a single column that is externally stored
- @param rec - record to update
- @param offsets - column offsets for the record
- @param i - column ordinal value
+ /** Adjust the BLOB reference for a single column that is externally stored
+ @param rec record to update
+ @param offsets column offsets for the record
+ @param i column ordinal value
@return DB_SUCCESS or error code */
dberr_t adjust_cluster_index_blob_column(
rec_t* rec,
- const ulint* offsets,
+ const offset_t* offsets,
ulint i) UNIV_NOTHROW;
- /**
- Adjusts the BLOB reference in the clustered index row for all
+ /** Adjusts the BLOB reference in the clustered index row for all
externally stored columns.
- @param rec - record to update
- @param offsets - column offsets for the record
+ @param rec record to update
+ @param offsets column offsets for the record
@return DB_SUCCESS or error code */
dberr_t adjust_cluster_index_blob_columns(
rec_t* rec,
- const ulint* offsets) UNIV_NOTHROW;
+ const offset_t* offsets) UNIV_NOTHROW;
- /**
- In the clustered index, adjist the BLOB pointers as needed.
+ /** In the clustered index, adjist the BLOB pointers as needed.
Also update the BLOB reference, write the new space id.
- @param rec - record to update
- @param offsets - column offsets for the record
+ @param rec record to update
+ @param offsets column offsets for the record
@return DB_SUCCESS or error code */
dberr_t adjust_cluster_index_blob_ref(
rec_t* rec,
- const ulint* offsets) UNIV_NOTHROW;
+ const offset_t* offsets) UNIV_NOTHROW;
- /**
- Purge delete-marked records, only if it is possible to do
+ /** Purge delete-marked records, only if it is possible to do
so without re-organising the B+tree.
- @param offsets - current row offsets.
+ @param offsets current row offsets.
@retval true if purged */
- bool purge(const ulint* offsets) UNIV_NOTHROW;
+ bool purge(const offset_t* offsets) UNIV_NOTHROW;
- /**
- Adjust the BLOB references and sys fields for the current record.
- @param index - the index being converted
- @param rec - record to update
- @param offsets - column offsets for the record
- @param deleted - true if row is delete marked
+ /** Adjust the BLOB references and sys fields for the current record.
+ @param index the index being converted
+ @param rec record to update
+ @param offsets column offsets for the record
@return DB_SUCCESS or error code. */
dberr_t adjust_cluster_record(
const dict_index_t* index,
rec_t* rec,
- const ulint* offsets,
- bool deleted) UNIV_NOTHROW;
+ const offset_t* offsets) UNIV_NOTHROW;
- /**
- Find an index with the matching id.
+ /** Find an index with the matching id.
@return row_index_t* instance or 0 */
row_index_t* find_index(index_id_t id) UNIV_NOTHROW
{
@@ -980,10 +914,10 @@ private:
RecIterator m_rec_iter;
/** Record offset */
- ulint m_offsets_[REC_OFFS_NORMAL_SIZE];
+ offset_t m_offsets_[REC_OFFS_NORMAL_SIZE];
/** Pointer to m_offsets_ */
- ulint* m_offsets;
+ offset_t* m_offsets;
/** Memory heap for the record offsets */
mem_heap_t* m_heap;
@@ -997,9 +931,9 @@ row_import destructor. */
row_import::~row_import() UNIV_NOTHROW
{
for (ulint i = 0; m_indexes != 0 && i < m_n_indexes; ++i) {
- delete [] m_indexes[i].m_name;
+ UT_DELETE_ARRAY(m_indexes[i].m_name);
- if (m_indexes[i].m_fields == 0) {
+ if (m_indexes[i].m_fields == NULL) {
continue;
}
@@ -1007,26 +941,25 @@ row_import::~row_import() UNIV_NOTHROW
ulint n_fields = m_indexes[i].m_n_fields;
for (ulint j = 0; j < n_fields; ++j) {
- delete [] fields[j].name;
+ UT_DELETE_ARRAY(const_cast<char*>(fields[j].name()));
}
- delete [] fields;
+ UT_DELETE_ARRAY(fields);
}
for (ulint i = 0; m_col_names != 0 && i < m_n_cols; ++i) {
- delete [] m_col_names[i];
+ UT_DELETE_ARRAY(m_col_names[i]);
}
- delete [] m_cols;
- delete [] m_indexes;
- delete [] m_col_names;
- delete [] m_table_name;
- delete [] m_hostname;
+ UT_DELETE_ARRAY(m_cols);
+ UT_DELETE_ARRAY(m_indexes);
+ UT_DELETE_ARRAY(m_col_names);
+ UT_DELETE_ARRAY(m_table_name);
+ UT_DELETE_ARRAY(m_hostname);
}
-/**
-Find the index entry in in the indexes array.
-@param name - index name
+/** Find the index entry in in the indexes array.
+@param name index name
@return instance if found else 0. */
row_index_t*
row_import::get_index(
@@ -1047,9 +980,8 @@ row_import::get_index(
return(0);
}
-/**
-Get the number of rows in the index.
-@param name - index name
+/** Get the number of rows in the index.
+@param name index name
@return number of rows (doesn't include delete marked rows). */
ulint
row_import::get_n_rows(
@@ -1062,9 +994,8 @@ row_import::get_n_rows(
return(index->m_stats.m_n_rows);
}
-/**
-Get the number of rows for which purge failed uding the convert phase.
-@param name - index name
+/** Get the number of rows for which purge failed uding the convert phase.
+@param name index name
@return number of rows for which purge failed. */
ulint
row_import::get_n_purge_failed(
@@ -1077,9 +1008,8 @@ row_import::get_n_purge_failed(
return(index->m_stats.m_n_purge_failed);
}
-/**
-Find the ordinal value of the column name in the cfg table columns.
-@param name - of column to look for.
+/** Find the ordinal value of the column name in the cfg table columns.
+@param name of column to look for.
@return ULINT_UNDEFINED if not found. */
ulint
row_import::find_col(
@@ -1114,9 +1044,9 @@ row_import::match_index_columns(
if (cfg_index == 0) {
ib_errf(thd, IB_LOG_LEVEL_ERROR,
- ER_TABLE_SCHEMA_MISMATCH,
- "Index %s not found in tablespace meta-data file.",
- index->name);
+ ER_TABLE_SCHEMA_MISMATCH,
+ "Index %s not found in tablespace meta-data file.",
+ index->name());
return(DB_ERROR);
}
@@ -1124,10 +1054,10 @@ row_import::match_index_columns(
if (cfg_index->m_n_fields != index->n_fields) {
ib_errf(thd, IB_LOG_LEVEL_ERROR,
- ER_TABLE_SCHEMA_MISMATCH,
- "Index field count %u doesn't match"
- " tablespace metadata file value " ULINTPF,
- index->n_fields, cfg_index->m_n_fields);
+ ER_TABLE_SCHEMA_MISMATCH,
+ "Index field count %u doesn't match"
+ " tablespace metadata file value " ULINTPF,
+ index->n_fields, cfg_index->m_n_fields);
return(DB_ERROR);
}
@@ -1139,13 +1069,13 @@ row_import::match_index_columns(
for (ulint i = 0; i < index->n_fields; ++i, ++field, ++cfg_field) {
- if (strcmp(field->name, cfg_field->name) != 0) {
+ if (strcmp(field->name(), cfg_field->name()) != 0) {
ib_errf(thd, IB_LOG_LEVEL_ERROR,
- ER_TABLE_SCHEMA_MISMATCH,
- "Index field name %s doesn't match"
- " tablespace metadata field name %s"
- " for field position " ULINTPF,
- field->name, cfg_field->name, i);
+ ER_TABLE_SCHEMA_MISMATCH,
+ "Index field name %s doesn't match"
+ " tablespace metadata field name %s"
+ " for field position " ULINTPF,
+ field->name(), cfg_field->name(), i);
err = DB_ERROR;
}
@@ -1155,7 +1085,7 @@ row_import::match_index_columns(
ER_TABLE_SCHEMA_MISMATCH,
"Index %s field %s prefix len %u"
" doesn't match metadata file value %u",
- index->name, field->name,
+ index->name(), field->name(),
field->prefix_len, cfg_field->prefix_len);
err = DB_ERROR;
@@ -1166,7 +1096,7 @@ row_import::match_index_columns(
ER_TABLE_SCHEMA_MISMATCH,
"Index %s field %s fixed len %u"
" doesn't match metadata file value %u",
- index->name, field->name,
+ index->name(), field->name(),
field->fixed_len,
cfg_field->fixed_len);
@@ -1177,10 +1107,9 @@ row_import::match_index_columns(
return(err);
}
-/**
-Check if the table schema that was read from the .cfg file matches the
+/** Check if the table schema that was read from the .cfg file matches the
in memory table definition.
-@param thd - MySQL session variable
+@param thd MySQL session variable
@return DB_SUCCESS or error code. */
dberr_t
row_import::match_table_columns(
@@ -1225,60 +1154,82 @@ row_import::match_table_columns(
if (cfg_col->prtype != col->prtype) {
ib_errf(thd,
- IB_LOG_LEVEL_ERROR,
- ER_TABLE_SCHEMA_MISMATCH,
- "Column %s precise type mismatch.",
- col_name);
+ IB_LOG_LEVEL_ERROR,
+ ER_TABLE_SCHEMA_MISMATCH,
+ "Column %s precise type mismatch,"
+ " it's 0X%X in the table and 0X%X"
+ " in the tablespace meta file",
+ col_name, col->prtype, cfg_col->prtype);
err = DB_ERROR;
}
if (cfg_col->mtype != col->mtype) {
ib_errf(thd,
- IB_LOG_LEVEL_ERROR,
- ER_TABLE_SCHEMA_MISMATCH,
- "Column %s main type mismatch.",
- col_name);
+ IB_LOG_LEVEL_ERROR,
+ ER_TABLE_SCHEMA_MISMATCH,
+ "Column %s main type mismatch,"
+ " it's 0X%X in the table and 0X%X"
+ " in the tablespace meta file",
+ col_name, col->mtype, cfg_col->mtype);
err = DB_ERROR;
}
if (cfg_col->len != col->len) {
ib_errf(thd,
- IB_LOG_LEVEL_ERROR,
- ER_TABLE_SCHEMA_MISMATCH,
- "Column %s length mismatch.",
- col_name);
+ IB_LOG_LEVEL_ERROR,
+ ER_TABLE_SCHEMA_MISMATCH,
+ "Column %s length mismatch,"
+ " it's %u in the table and %u"
+ " in the tablespace meta file",
+ col_name, col->len, cfg_col->len);
err = DB_ERROR;
}
if (cfg_col->mbminlen != col->mbminlen
|| cfg_col->mbmaxlen != col->mbmaxlen) {
ib_errf(thd,
- IB_LOG_LEVEL_ERROR,
- ER_TABLE_SCHEMA_MISMATCH,
- "Column %s multi-byte len mismatch.",
- col_name);
+ IB_LOG_LEVEL_ERROR,
+ ER_TABLE_SCHEMA_MISMATCH,
+ "Column %s multi-byte len mismatch,"
+ " it's %u-%u in the table and %u-%u"
+ " in the tablespace meta file",
+ col_name, col->mbminlen, col->mbmaxlen,
+ cfg_col->mbminlen, cfg_col->mbmaxlen);
err = DB_ERROR;
}
if (cfg_col->ind != col->ind) {
+ ib_errf(thd,
+ IB_LOG_LEVEL_ERROR,
+ ER_TABLE_SCHEMA_MISMATCH,
+ "Column %s position mismatch,"
+ " it's %u in the table and %u"
+ " in the tablespace meta file",
+ col_name, col->ind, cfg_col->ind);
err = DB_ERROR;
}
if (cfg_col->ord_part != col->ord_part) {
ib_errf(thd,
- IB_LOG_LEVEL_ERROR,
- ER_TABLE_SCHEMA_MISMATCH,
- "Column %s ordering mismatch.",
- col_name);
+ IB_LOG_LEVEL_ERROR,
+ ER_TABLE_SCHEMA_MISMATCH,
+ "Column %s ordering mismatch,"
+ " it's %u in the table and %u"
+ " in the tablespace meta file",
+ col_name, col->ord_part,
+ cfg_col->ord_part);
err = DB_ERROR;
}
if (cfg_col->max_prefix != col->max_prefix) {
ib_errf(thd,
- IB_LOG_LEVEL_ERROR,
- ER_TABLE_SCHEMA_MISMATCH,
- "Column %s max prefix mismatch.",
- col_name);
+ IB_LOG_LEVEL_ERROR,
+ ER_TABLE_SCHEMA_MISMATCH,
+ "Column %s max prefix mismatch"
+ " it's %u in the table and %u"
+ " in the tablespace meta file",
+ col_name, col->max_prefix,
+ cfg_col->max_prefix);
err = DB_ERROR;
}
}
@@ -1287,10 +1238,9 @@ row_import::match_table_columns(
return(err);
}
-/**
-Check if the table (and index) schema that was read from the .cfg file
+/** Check if the table (and index) schema that was read from the .cfg file
matches the in memory table definition.
-@param thd - MySQL session variable
+@param thd MySQL session variable
@return DB_SUCCESS or error code. */
dberr_t
row_import::match_schema(
@@ -1346,9 +1296,9 @@ uncompressed:
ib_errf(thd, IB_LOG_LEVEL_ERROR, ER_TABLE_SCHEMA_MISMATCH,
"Table flags don't match, server table has 0x%x"
- " and the meta-data file has 0x%lx;"
+ " and the meta-data file has 0x" ULINTPFx ";"
" .cfg file uses %s",
- m_table->flags, ulong(m_flags), msg);
+ m_table->flags, m_flags, msg);
return(DB_ERROR);
} else if (m_table->n_cols != m_n_cols) {
@@ -1439,17 +1389,9 @@ row_import::set_root_by_heuristic() UNIV_NOTHROW
if (UT_LIST_GET_LEN(m_table->indexes) != m_n_indexes) {
- char table_name[MAX_FULL_NAME_LEN + 1];
-
- innobase_format_name(
- table_name, sizeof(table_name), m_table->name, FALSE);
-
- ib_logf(IB_LOG_LEVEL_WARN,
- "Table %s should have " ULINTPF
- " indexes but the tablespace has " ULINTPF " indexes",
- table_name,
- UT_LIST_GET_LEN(m_table->indexes),
- m_n_indexes);
+ ib::warn() << "Table " << m_table->name << " should have "
+ << UT_LIST_GET_LEN(m_table->indexes) << " indexes but"
+ " the tablespace has " << m_n_indexes << " indexes";
}
dict_mutex_enter_for_mysql();
@@ -1463,22 +1405,23 @@ row_import::set_root_by_heuristic() UNIV_NOTHROW
if (index->type & DICT_FTS) {
index->type |= DICT_CORRUPT;
- ib_logf(IB_LOG_LEVEL_WARN,
- "Skipping FTS index: %s", index->name);
+ ib::warn() << "Skipping FTS index: " << index->name;
} else if (i < m_n_indexes) {
- delete [] cfg_index[i].m_name;
+ UT_DELETE_ARRAY(cfg_index[i].m_name);
ulint len = strlen(index->name) + 1;
- cfg_index[i].m_name = new(std::nothrow) byte[len];
+ cfg_index[i].m_name = UT_NEW_ARRAY_NOKEY(byte, len);
/* Trigger OOM */
- DBUG_EXECUTE_IF("ib_import_OOM_14",
- delete[] cfg_index[i].m_name;
- cfg_index[i].m_name = 0;);
+ DBUG_EXECUTE_IF(
+ "ib_import_OOM_14",
+ UT_DELETE_ARRAY(cfg_index[i].m_name);
+ cfg_index[i].m_name = NULL;
+ );
- if (cfg_index[i].m_name == 0) {
+ if (cfg_index[i].m_name == NULL) {
err = DB_OUT_OF_MEMORY;
break;
}
@@ -1581,13 +1524,70 @@ IndexPurge::next() UNIV_NOTHROW
mtr_set_log_mode(&m_mtr, MTR_LOG_NO_REDO);
btr_pcur_restore_position(BTR_MODIFY_LEAF, &m_pcur, &m_mtr);
+ /* The following is based on btr_pcur_move_to_next_user_rec(). */
+ m_pcur.old_stored = false;
+ ut_ad(m_pcur.latch_mode == BTR_MODIFY_LEAF);
+ do {
+ if (btr_pcur_is_after_last_on_page(&m_pcur)) {
+ if (btr_pcur_is_after_last_in_tree(&m_pcur, &m_mtr)) {
+ return DB_END_OF_INDEX;
+ }
- if (!btr_pcur_move_to_next_user_rec(&m_pcur, &m_mtr)) {
+ buf_block_t* block = btr_pcur_get_block(&m_pcur);
+ uint32_t next_page = btr_page_get_next(block->frame);
- return(DB_END_OF_INDEX);
- }
+ /* MDEV-13542 FIXME: Make these checks part of
+ btr_pcur_move_to_next_page(), and introduce a
+ return status that will be checked in all callers! */
+ switch (next_page) {
+ default:
+ if (next_page != block->page.id.page_no()) {
+ break;
+ }
+ /* MDEV-20931 FIXME: Check that
+ next_page is within the tablespace
+ bounds! Also check that it is not a
+ change buffer bitmap page. */
+ /* fall through */
+ case 0:
+ case 1:
+ case FIL_NULL:
+ return DB_CORRUPTION;
+ }
- return(DB_SUCCESS);
+ dict_index_t* index = m_pcur.btr_cur.index;
+ buf_block_t* next_block = btr_block_get(
+ page_id_t(block->page.id.space(), next_page),
+ block->page.size, BTR_MODIFY_LEAF, index,
+ &m_mtr);
+
+ if (UNIV_UNLIKELY(!next_block
+ || !fil_page_index_page_check(
+ next_block->frame)
+ || !!dict_index_is_spatial(index)
+ != (fil_page_get_type(
+ next_block->frame)
+ == FIL_PAGE_RTREE)
+ || page_is_comp(next_block->frame)
+ != page_is_comp(block->frame)
+ || btr_page_get_prev(
+ next_block->frame)
+ != block->page.id.page_no())) {
+ return DB_CORRUPTION;
+ }
+
+ btr_leaf_page_release(block, BTR_MODIFY_LEAF, &m_mtr);
+
+ page_cur_set_before_first(next_block,
+ &m_pcur.btr_cur.page_cur);
+
+ ut_d(page_check_dir(next_block->frame));
+ } else {
+ btr_pcur_move_to_next_on_page(&m_pcur);
+ }
+ } while (!btr_pcur_is_on_user_rec(&m_pcur));
+
+ return DB_SUCCESS;
}
/**
@@ -1599,14 +1599,15 @@ IndexPurge::purge_pessimistic_delete() UNIV_NOTHROW
{
dberr_t err;
- btr_pcur_restore_position(BTR_MODIFY_TREE, &m_pcur, &m_mtr);
+ btr_pcur_restore_position(BTR_MODIFY_TREE | BTR_LATCH_FOR_DELETE,
+ &m_pcur, &m_mtr);
ut_ad(rec_get_deleted_flag(
btr_pcur_get_rec(&m_pcur),
dict_table_is_comp(m_index->table)));
btr_cur_pessimistic_delete(
- &err, FALSE, btr_pcur_get_btr_cur(&m_pcur), 0, RB_NONE, &m_mtr);
+ &err, FALSE, btr_pcur_get_btr_cur(&m_pcur), 0, false, &m_mtr);
ut_a(err == DB_SUCCESS);
@@ -1630,10 +1631,9 @@ IndexPurge::purge() UNIV_NOTHROW
btr_pcur_restore_position(BTR_MODIFY_LEAF, &m_pcur, &m_mtr);
}
-/**
-Constructor
-* @param cfg - config of table being imported.
-* @param trx - transaction covering the import */
+/** Constructor
+@param cfg config of table being imported.
+@param trx transaction covering the import */
inline
PageConverter::PageConverter(
row_import* cfg,
@@ -1653,17 +1653,16 @@ PageConverter::PageConverter(
rec_offs_init(m_offsets_);
}
-/**
-Adjust the BLOB reference for a single column that is externally stored
-@param rec - record to update
-@param offsets - column offsets for the record
-@param i - column ordinal value
+/** Adjust the BLOB reference for a single column that is externally stored
+@param rec record to update
+@param offsets column offsets for the record
+@param i column ordinal value
@return DB_SUCCESS or error code */
inline
dberr_t
PageConverter::adjust_cluster_index_blob_column(
rec_t* rec,
- const ulint* offsets,
+ const offset_t* offsets,
ulint i) UNIV_NOTHROW
{
ulint len;
@@ -1676,47 +1675,38 @@ PageConverter::adjust_cluster_index_blob_column(
if (len < BTR_EXTERN_FIELD_REF_SIZE) {
- char index_name[MAX_FULL_NAME_LEN + 1];
-
- innobase_format_name(
- index_name, sizeof(index_name),
- m_cluster_index->name, TRUE);
-
ib_errf(m_trx->mysql_thd, IB_LOG_LEVEL_ERROR,
ER_INNODB_INDEX_CORRUPT,
"Externally stored column(" ULINTPF
") has a reference length of " ULINTPF
" in the cluster index %s",
- i, len, index_name);
+ i, len, m_cluster_index->name());
return(DB_CORRUPTION);
}
field += BTR_EXTERN_SPACE_ID - BTR_EXTERN_FIELD_REF_SIZE + len;
- if (is_compressed_table()) {
- mach_write_to_4(field, get_space_id());
+ mach_write_to_4(field, get_space_id());
+ if (m_page_zip_ptr) {
page_zip_write_blob_ptr(
m_page_zip_ptr, rec, m_cluster_index, offsets, i, 0);
- } else {
- mlog_write_ulint(field, get_space_id(), MLOG_4BYTES, 0);
}
return(DB_SUCCESS);
}
-/**
-Adjusts the BLOB reference in the clustered index row for all externally
+/** Adjusts the BLOB reference in the clustered index row for all externally
stored columns.
-@param rec - record to update
-@param offsets - column offsets for the record
+@param rec record to update
+@param offsets column offsets for the record
@return DB_SUCCESS or error code */
inline
dberr_t
PageConverter::adjust_cluster_index_blob_columns(
rec_t* rec,
- const ulint* offsets) UNIV_NOTHROW
+ const offset_t* offsets) UNIV_NOTHROW
{
ut_ad(rec_offs_any_extern(offsets));
@@ -1740,17 +1730,16 @@ PageConverter::adjust_cluster_index_blob_columns(
return(DB_SUCCESS);
}
-/**
-In the clustered index, adjust BLOB pointers as needed. Also update the
+/** In the clustered index, adjust BLOB pointers as needed. Also update the
BLOB reference, write the new space id.
-@param rec - record to update
-@param offsets - column offsets for the record
+@param rec record to update
+@param offsets column offsets for the record
@return DB_SUCCESS or error code */
inline
dberr_t
PageConverter::adjust_cluster_index_blob_ref(
rec_t* rec,
- const ulint* offsets) UNIV_NOTHROW
+ const offset_t* offsets) UNIV_NOTHROW
{
if (rec_offs_any_extern(offsets)) {
dberr_t err;
@@ -1765,14 +1754,13 @@ PageConverter::adjust_cluster_index_blob_ref(
return(DB_SUCCESS);
}
-/**
-Purge delete-marked records, only if it is possible to do so without
+/** Purge delete-marked records, only if it is possible to do so without
re-organising the B+tree.
-@param offsets - current row offsets.
+@param offsets current row offsets.
@return true if purge succeeded */
inline
bool
-PageConverter::purge(const ulint* offsets) UNIV_NOTHROW
+PageConverter::purge(const offset_t* offsets) UNIV_NOTHROW
{
const dict_index_t* index = m_index->m_srv_index;
@@ -1789,19 +1777,16 @@ PageConverter::purge(const ulint* offsets) UNIV_NOTHROW
return(false);
}
-/**
-Adjust the BLOB references and sys fields for the current record.
-@param rec - record to update
-@param offsets - column offsets for the record
-@param deleted - true if row is delete marked
+/** Adjust the BLOB references and sys fields for the current record.
+@param rec record to update
+@param offsets column offsets for the record
@return DB_SUCCESS or error code. */
inline
dberr_t
PageConverter::adjust_cluster_record(
const dict_index_t* index,
rec_t* rec,
- const ulint* offsets,
- bool deleted) UNIV_NOTHROW
+ const offset_t* offsets) UNIV_NOTHROW
{
dberr_t err;
@@ -1813,16 +1798,15 @@ PageConverter::adjust_cluster_record(
row_upd_rec_sys_fields(
rec, m_page_zip_ptr, m_cluster_index, m_offsets,
- m_trx, roll_ptr_t(1) << 55);
+ m_trx, roll_ptr_t(1) << ROLL_PTR_INSERT_FLAG_POS);
}
return(err);
}
-/**
-Update the BLOB refrences and write UNDO log entries for
+/** Update the BLOB refrences and write UNDO log entries for
rows that can't be purged optimistically.
-@param block - block to update
+@param block block to update
@retval DB_SUCCESS or error code */
inline
dberr_t
@@ -1847,15 +1831,14 @@ PageConverter::update_records(
if (deleted || clust_index) {
m_offsets = rec_get_offsets(
- rec, m_index->m_srv_index, m_offsets,
+ rec, m_index->m_srv_index, m_offsets, true,
ULINT_UNDEFINED, &m_heap);
}
if (clust_index) {
dberr_t err = adjust_cluster_record(
- m_index->m_srv_index, rec, m_offsets,
- deleted);
+ m_index->m_srv_index, rec, m_offsets);
if (err != DB_SUCCESS) {
return(err);
@@ -1883,8 +1866,7 @@ PageConverter::update_records(
return(DB_SUCCESS);
}
-/**
-Update the space, index id, trx id.
+/** Update the space, index id, trx id.
@return DB_SUCCESS or error code */
inline
dberr_t
@@ -1894,22 +1876,18 @@ PageConverter::update_index_page(
index_id_t id;
buf_frame_t* page = block->frame;
- if (is_free(buf_block_get_page_no(block))) {
+ if (is_free(block->page.id.page_no())) {
return(DB_SUCCESS);
} else if ((id = btr_page_get_index_id(page)) != m_index->m_id) {
row_index_t* index = find_index(id);
if (index == 0) {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Page for tablespace " ULINTPF " is "
- " index page with id " IB_ID_FMT " but that"
- " index is not found from configuration file."
- " Current index name %s and id " IB_ID_FMT ".",
- m_space,
- id,
- m_index->m_name,
- m_index->m_id);
+ ib::error() << "Page for tablespace " << m_space
+ << " is index page with id " << id
+ << " but that index is not found from"
+ << " configuration file. Current index name "
+ << m_index->m_name << " and id " << m_index->m_id;
m_index = 0;
return(DB_CORRUPTION);
}
@@ -1924,10 +1902,10 @@ PageConverter::update_index_page(
return(DB_SUCCESS);
}
- if (m_index && block->page.offset == m_index->m_page_no) {
+ if (m_index && block->page.id.page_no() == m_index->m_page_no) {
byte *b = FIL_PAGE_DATA + PAGE_BTR_SEG_LEAF + FSEG_HDR_SPACE
+ page;
- mach_write_to_4(b, block->page.space);
+ mach_write_to_4(b, block->page.id.space());
memcpy(FIL_PAGE_DATA + PAGE_BTR_SEG_TOP + FSEG_HDR_SPACE
+ page, b, 4);
@@ -1951,12 +1929,28 @@ PageConverter::update_index_page(
btr_page_set_index_id(
page, m_page_zip_ptr, m_index->m_srv_index->id, 0);
- page_set_max_trx_id(block, m_page_zip_ptr, m_trx->id, 0);
+ if (dict_index_is_clust(m_index->m_srv_index)) {
+ if (block->page.id.page_no() == m_index->m_srv_index->page) {
+ /* Preserve the PAGE_ROOT_AUTO_INC. */
+ } else {
+ /* Clear PAGE_MAX_TRX_ID so that it can be
+ used for other purposes in the future. IMPORT
+ in MySQL 5.6, 5.7 and MariaDB 10.0 and 10.1
+ would set the field to the transaction ID even
+ on clustered index pages. */
+ page_set_max_trx_id(block, m_page_zip_ptr, 0, NULL);
+ }
+ } else {
+ /* Set PAGE_MAX_TRX_ID on secondary index leaf pages,
+ and clear it on non-leaf pages. */
+ page_set_max_trx_id(block, m_page_zip_ptr,
+ page_is_leaf(page) ? m_trx->id : 0, NULL);
+ }
- if (page_is_empty(block->frame)) {
+ if (page_is_empty(page)) {
/* Only a root page can be empty. */
- if (!is_root_page(block->frame)) {
+ if (page_has_siblings(page)) {
// TODO: We should relax this and skip secondary
// indexes. Mark them as corrupt because they can
// always be rebuilt.
@@ -1969,9 +1963,8 @@ PageConverter::update_index_page(
return page_is_leaf(block->frame) ? update_records(block) : DB_SUCCESS;
}
-/**
-Validate the space flags and update tablespace header page.
-@param block - block read from file, not from the buffer pool.
+/** Validate the space flags and update tablespace header page.
+@param block block read from file, not from the buffer pool.
@retval DB_SUCCESS or error code */
inline
dberr_t
@@ -1979,13 +1972,11 @@ PageConverter::update_header(
buf_block_t* block) UNIV_NOTHROW
{
/* Check for valid header */
- switch(fsp_header_get_space_id(get_frame(block))) {
+ switch (fsp_header_get_space_id(get_frame(block))) {
case 0:
return(DB_CORRUPTION);
case ULINT_UNDEFINED:
- ib_logf(IB_LOG_LEVEL_WARN,
- "Space id check in the header failed "
- "- ignored");
+ ib::warn() << "Space id check in the header failed: ignored";
}
mach_write_to_8(
@@ -2009,9 +2000,8 @@ PageConverter::update_header(
return(DB_SUCCESS);
}
-/**
-Update the page, set the space id, max trx id and index id.
-@param block - block read from file
+/** Update the page, set the space id, max trx id and index id.
+@param block block read from file
@retval DB_SUCCESS or error code */
inline
dberr_t
@@ -2031,11 +2021,12 @@ PageConverter::update_page(
switch (page_type = fil_page_get_type(get_frame(block))) {
case FIL_PAGE_TYPE_FSP_HDR:
+ ut_a(block->page.id.page_no() == 0);
/* Work directly on the uncompressed page headers. */
- ut_a(buf_block_get_page_no(block) == 0);
return(update_header(block));
case FIL_PAGE_INDEX:
+ case FIL_PAGE_RTREE:
/* We need to decompress the contents into block->frame
before we can do any thing with Btree pages. */
@@ -2057,7 +2048,7 @@ PageConverter::update_page(
case FIL_PAGE_TYPE_XDES:
err = set_current_xdes(
- buf_block_get_page_no(block), get_frame(block));
+ block->page.id.page_no(), get_frame(block));
/* fall through */
case FIL_PAGE_INODE:
case FIL_PAGE_TYPE_TRX_SYS:
@@ -2077,14 +2068,12 @@ PageConverter::update_page(
return(err);
}
- ib_logf(IB_LOG_LEVEL_WARN, "Unknown page type (" ULINTPF ")",
- page_type);
+ ib::warn() << "Unknown page type (" << page_type << ")";
return(DB_CORRUPTION);
}
-/**
-Called for every page in the tablespace. If the page was not
+/** Called for every page in the tablespace. If the page was not
updated then its state must be set to BUF_PAGE_NOT_USED.
@param block block read from file, note it is not from the buffer pool
@retval DB_SUCCESS or error code. */
@@ -2093,31 +2082,27 @@ dberr_t PageConverter::operator()(buf_block_t* block) UNIV_NOTHROW
/* If we already had an old page with matching number
in the buffer pool, evict it now, because
we no longer evict the pages on DISCARD TABLESPACE. */
- buf_page_get_gen(get_space_id(), get_zip_size(), block->page.offset,
+ buf_page_get_gen(block->page.id, get_page_size(),
RW_NO_LATCH, NULL, BUF_EVICT_IF_IN_POOL,
- __FILE__, __LINE__, NULL);
+ __FILE__, __LINE__, NULL, NULL);
ulint page_type;
dberr_t err = update_page(block, page_type);
if (err != DB_SUCCESS) return err;
- /* Note: For compressed pages this function will write to the
- zip descriptor and for uncompressed pages it will write to
- page (ie. the block->frame). Therefore the caller should write
- out the descriptor contents and not block->frame for compressed
- pages. */
-
- if (!is_compressed_table() || page_type == FIL_PAGE_INDEX) {
+ if (!block->page.zip.data) {
+ buf_flush_init_for_writing(
+ NULL, block->frame, NULL, m_current_lsn);
+ } else if (fil_page_type_is_index(page_type)) {
buf_flush_init_for_writing(
- get_frame(block),
- block->page.zip.data ? &block->page.zip : NULL,
+ NULL, block->page.zip.data, &block->page.zip,
m_current_lsn);
} else {
- /* Calculate and update the checksum of non-btree
- pages for compressed tables explicitly here. */
+ /* Calculate and update the checksum of non-index
+ pages for ROW_FORMAT=COMPRESSED tables. */
buf_flush_update_zip_checksum(
- get_frame(block), get_zip_size(),
+ block->page.zip.data, get_page_size().physical(),
m_current_lsn);
}
@@ -2142,15 +2127,9 @@ row_import_discard_changes(
prebuilt->trx->error_info = NULL;
- char table_name[MAX_FULL_NAME_LEN + 1];
-
- innobase_format_name(
- table_name, sizeof(table_name),
- prebuilt->table->name, FALSE);
-
- ib_logf(IB_LOG_LEVEL_INFO,
- "Discarding tablespace of table %s: %s",
- table_name, ut_strerr(err));
+ ib::info() << "Discarding tablespace of table "
+ << prebuilt->table->name
+ << ": " << ut_strerr(err);
if (trx->dict_operation_lock_mode != RW_X_LATCH) {
ut_a(trx->dict_operation_lock_mode == 0);
@@ -2207,7 +2186,7 @@ row_import_cleanup(
DBUG_EXECUTE_IF("ib_import_before_checkpoint_crash", DBUG_SUICIDE(););
- log_make_checkpoint_at(LSN_MAX, TRUE);
+ log_make_checkpoint();
return(err);
}
@@ -2227,7 +2206,7 @@ row_import_error(
innobase_format_name(
table_name, sizeof(table_name),
- prebuilt->table->name, FALSE);
+ prebuilt->table->name.m_name);
ib_senderrf(
trx->mysql_thd, IB_LOG_LEVEL_WARN,
@@ -2268,11 +2247,6 @@ row_import_adjust_root_pages_of_secondary_indexes(
/* Adjust the root pages of the secondary indexes only. */
while ((index = dict_table_get_next_index(index)) != NULL) {
- char index_name[MAX_FULL_NAME_LEN + 1];
-
- innobase_format_name(
- index_name, sizeof(index_name), index->name, TRUE);
-
ut_a(!dict_index_is_clust(index));
if (!(index->type & DICT_CORRUPT)
@@ -2284,9 +2258,8 @@ row_import_adjust_root_pages_of_secondary_indexes(
err = btr_root_adjust_on_import(index);
} else {
- ib_logf(IB_LOG_LEVEL_WARN,
- "Skip adjustment of root pages for "
- "index %s.", index->name);
+ ib::warn() << "Skip adjustment of root pages for"
+ " index " << index->name << ".";
err = DB_CORRUPTION;
}
@@ -2300,9 +2273,9 @@ row_import_adjust_root_pages_of_secondary_indexes(
ib_errf(trx->mysql_thd,
IB_LOG_LEVEL_WARN,
ER_INNODB_INDEX_CORRUPT,
- "Index '%s' not found or corrupt, "
- "you should recreate this index.",
- index_name);
+ "Index %s not found or corrupt,"
+ " you should recreate this index.",
+ index->name());
/* Do not bail out, so that the data
can be recovered. */
@@ -2339,7 +2312,7 @@ row_import_adjust_root_pages_of_secondary_indexes(
ER_INNODB_INDEX_CORRUPT,
"Index '%s' contains " ULINTPF " entries, "
"should be " ULINTPF ", you should recreate "
- "this index.", index_name,
+ "this index.", index->name(),
purge.get_n_rows(), n_rows_in_table);
index->type |= DICT_CORRUPT;
@@ -2396,13 +2369,13 @@ row_import_set_sys_max_row_id(
ulint len;
const byte* field;
mem_heap_t* heap = NULL;
- ulint offsets_[1 + REC_OFFS_HEADER_SIZE];
- ulint* offsets;
+ offset_t offsets_[1 + REC_OFFS_HEADER_SIZE];
+ offset_t* offsets;
rec_offs_init(offsets_);
offsets = rec_get_offsets(
- rec, index, offsets_, ULINT_UNDEFINED, &heap);
+ rec, index, offsets_, true, ULINT_UNDEFINED, &heap);
field = rec_get_nth_field(
rec, offsets,
@@ -2431,16 +2404,11 @@ row_import_set_sys_max_row_id(
err = DB_CORRUPTION;);
if (err != DB_SUCCESS) {
- char index_name[MAX_FULL_NAME_LEN + 1];
-
- innobase_format_name(
- index_name, sizeof(index_name), index->name, TRUE);
-
ib_errf(prebuilt->trx->mysql_thd,
IB_LOG_LEVEL_WARN,
ER_INNODB_INDEX_CORRUPT,
- "Index '%s' corruption detected, invalid DB_ROW_ID "
- "in index.", index_name);
+ "Index `%s` corruption detected, invalid DB_ROW_ID"
+ " in index.", index->name());
return(err);
@@ -2519,13 +2487,16 @@ row_import_cfg_read_index_fields(
byte row[sizeof(ib_uint32_t) * 3];
ulint n_fields = index->m_n_fields;
- index->m_fields = new(std::nothrow) dict_field_t[n_fields];
+ index->m_fields = UT_NEW_ARRAY_NOKEY(dict_field_t, n_fields);
/* Trigger OOM */
- DBUG_EXECUTE_IF("ib_import_OOM_4",
- delete [] index->m_fields; index->m_fields = 0;);
+ DBUG_EXECUTE_IF(
+ "ib_import_OOM_4",
+ UT_DELETE_ARRAY(index->m_fields);
+ index->m_fields = NULL;
+ );
- if (index->m_fields == 0) {
+ if (index->m_fields == NULL) {
return(DB_OUT_OF_MEMORY);
}
@@ -2542,7 +2513,7 @@ row_import_cfg_read_index_fields(
ib_senderrf(
thd, IB_LOG_LEVEL_ERROR, ER_IO_READ_ERROR,
- errno, strerror(errno),
+ (ulong) errno, strerror(errno),
"while reading index fields.");
return(DB_IO_ERROR);
@@ -2559,12 +2530,16 @@ row_import_cfg_read_index_fields(
/* Include the NUL byte in the length. */
ulint len = mach_read_from_4(ptr);
- byte* name = new(std::nothrow) byte[len];
+ byte* name = UT_NEW_ARRAY_NOKEY(byte, len);
/* Trigger OOM */
- DBUG_EXECUTE_IF("ib_import_OOM_5", delete [] name; name = 0;);
+ DBUG_EXECUTE_IF(
+ "ib_import_OOM_5",
+ UT_DELETE_ARRAY(name);
+ name = NULL;
+ );
- if (name == 0) {
+ if (name == NULL) {
return(DB_OUT_OF_MEMORY);
}
@@ -2576,7 +2551,7 @@ row_import_cfg_read_index_fields(
ib_senderrf(
thd, IB_LOG_LEVEL_ERROR, ER_IO_READ_ERROR,
- errno, strerror(errno),
+ (ulong) errno, strerror(errno),
"while parsing table name.");
return(err);
@@ -2606,13 +2581,16 @@ row_import_read_index_data(
ut_a(cfg->m_n_indexes > 0);
ut_a(cfg->m_n_indexes < 1024);
- cfg->m_indexes = new(std::nothrow) row_index_t[cfg->m_n_indexes];
+ cfg->m_indexes = UT_NEW_ARRAY_NOKEY(row_index_t, cfg->m_n_indexes);
/* Trigger OOM */
- DBUG_EXECUTE_IF("ib_import_OOM_6",
- delete [] cfg->m_indexes; cfg->m_indexes = 0;);
+ DBUG_EXECUTE_IF(
+ "ib_import_OOM_6",
+ UT_DELETE_ARRAY(cfg->m_indexes);
+ cfg->m_indexes = NULL;
+ );
- if (cfg->m_indexes == 0) {
+ if (cfg->m_indexes == NULL) {
return(DB_OUT_OF_MEMORY);
}
@@ -2635,17 +2613,17 @@ row_import_read_index_data(
if (n_bytes != sizeof(row)) {
char msg[BUFSIZ];
- ut_snprintf(msg, sizeof(msg),
- "while reading index meta-data, expected "
- "to read %lu bytes but read only %lu "
- "bytes",
- (ulong) sizeof(row), (ulong) n_bytes);
+ snprintf(msg, sizeof(msg),
+ "while reading index meta-data, expected "
+ "to read " ULINTPF
+ " bytes but read only " ULINTPF " bytes",
+ sizeof(row), n_bytes);
ib_senderrf(
thd, IB_LOG_LEVEL_ERROR, ER_IO_READ_ERROR,
- errno, strerror(errno), msg);
+ (ulong) errno, strerror(errno), msg);
- ib_logf(IB_LOG_LEVEL_ERROR, "IO Error: %s", msg);
+ ib::error() << "IO Error: " << msg;
return(DB_IO_ERROR);
}
@@ -2697,14 +2675,16 @@ row_import_read_index_data(
return(DB_CORRUPTION);
}
- cfg_index->m_name = new(std::nothrow) byte[len];
+ cfg_index->m_name = UT_NEW_ARRAY_NOKEY(byte, len);
/* Trigger OOM */
- DBUG_EXECUTE_IF("ib_import_OOM_7",
- delete [] cfg_index->m_name;
- cfg_index->m_name = 0;);
+ DBUG_EXECUTE_IF(
+ "ib_import_OOM_7",
+ UT_DELETE_ARRAY(cfg_index->m_name);
+ cfg_index->m_name = NULL;
+ );
- if (cfg_index->m_name == 0) {
+ if (cfg_index->m_name == NULL) {
return(DB_OUT_OF_MEMORY);
}
@@ -2716,7 +2696,7 @@ row_import_read_index_data(
ib_senderrf(
thd, IB_LOG_LEVEL_ERROR, ER_IO_READ_ERROR,
- errno, strerror(errno),
+ (ulong) errno, strerror(errno),
"while parsing index name.");
return(err);
@@ -2755,7 +2735,7 @@ row_import_read_indexes(
if (fread(row, 1, sizeof(row), file) != sizeof(row)) {
ib_senderrf(
thd, IB_LOG_LEVEL_ERROR, ER_IO_READ_ERROR,
- errno, strerror(errno),
+ (ulong) errno, strerror(errno),
"while reading number of indexes.");
return(DB_IO_ERROR);
@@ -2800,23 +2780,29 @@ row_import_read_columns(
ut_a(cfg->m_n_cols > 0);
ut_a(cfg->m_n_cols < 1024);
- cfg->m_cols = new(std::nothrow) dict_col_t[cfg->m_n_cols];
+ cfg->m_cols = UT_NEW_ARRAY_NOKEY(dict_col_t, cfg->m_n_cols);
/* Trigger OOM */
- DBUG_EXECUTE_IF("ib_import_OOM_8",
- delete [] cfg->m_cols; cfg->m_cols = 0;);
+ DBUG_EXECUTE_IF(
+ "ib_import_OOM_8",
+ UT_DELETE_ARRAY(cfg->m_cols);
+ cfg->m_cols = NULL;
+ );
- if (cfg->m_cols == 0) {
+ if (cfg->m_cols == NULL) {
return(DB_OUT_OF_MEMORY);
}
- cfg->m_col_names = new(std::nothrow) byte* [cfg->m_n_cols];
+ cfg->m_col_names = UT_NEW_ARRAY_NOKEY(byte*, cfg->m_n_cols);
/* Trigger OOM */
- DBUG_EXECUTE_IF("ib_import_OOM_9",
- delete [] cfg->m_col_names; cfg->m_col_names = 0;);
+ DBUG_EXECUTE_IF(
+ "ib_import_OOM_9",
+ UT_DELETE_ARRAY(cfg->m_col_names);
+ cfg->m_col_names = NULL;
+ );
- if (cfg->m_col_names == 0) {
+ if (cfg->m_col_names == NULL) {
return(DB_OUT_OF_MEMORY);
}
@@ -2835,7 +2821,7 @@ row_import_read_columns(
if (fread(row, 1, sizeof(row), file) != sizeof(row)) {
ib_senderrf(
thd, IB_LOG_LEVEL_ERROR, ER_IO_READ_ERROR,
- errno, strerror(errno),
+ (ulong) errno, strerror(errno),
"while reading table column meta-data.");
return(DB_IO_ERROR);
@@ -2879,14 +2865,16 @@ row_import_read_columns(
return(DB_CORRUPTION);
}
- cfg->m_col_names[i] = new(std::nothrow) byte[len];
+ cfg->m_col_names[i] = UT_NEW_ARRAY_NOKEY(byte, len);
/* Trigger OOM */
- DBUG_EXECUTE_IF("ib_import_OOM_10",
- delete [] cfg->m_col_names[i];
- cfg->m_col_names[i] = 0;);
+ DBUG_EXECUTE_IF(
+ "ib_import_OOM_10",
+ UT_DELETE_ARRAY(cfg->m_col_names[i]);
+ cfg->m_col_names[i] = NULL;
+ );
- if (cfg->m_col_names[i] == 0) {
+ if (cfg->m_col_names[i] == NULL) {
return(DB_OUT_OF_MEMORY);
}
@@ -2899,7 +2887,7 @@ row_import_read_columns(
ib_senderrf(
thd, IB_LOG_LEVEL_ERROR, ER_IO_READ_ERROR,
- errno, strerror(errno),
+ (ulong) errno, strerror(errno),
"while parsing table column name.");
return(err);
@@ -2930,7 +2918,7 @@ row_import_read_v1(
if (fread(value, 1, sizeof(value), file) != sizeof(value)) {
ib_senderrf(
thd, IB_LOG_LEVEL_ERROR, ER_IO_READ_ERROR,
- errno, strerror(errno),
+ (ulong) errno, strerror(errno),
"while reading meta-data export hostname length.");
return(DB_IO_ERROR);
@@ -2939,13 +2927,16 @@ row_import_read_v1(
ulint len = mach_read_from_4(value);
/* NUL byte is part of name length. */
- cfg->m_hostname = new(std::nothrow) byte[len];
+ cfg->m_hostname = UT_NEW_ARRAY_NOKEY(byte, len);
/* Trigger OOM */
- DBUG_EXECUTE_IF("ib_import_OOM_1",
- delete [] cfg->m_hostname; cfg->m_hostname = 0;);
+ DBUG_EXECUTE_IF(
+ "ib_import_OOM_1",
+ UT_DELETE_ARRAY(cfg->m_hostname);
+ cfg->m_hostname = NULL;
+ );
- if (cfg->m_hostname == 0) {
+ if (cfg->m_hostname == NULL) {
return(DB_OUT_OF_MEMORY);
}
@@ -2955,7 +2946,7 @@ row_import_read_v1(
ib_senderrf(
thd, IB_LOG_LEVEL_ERROR, ER_IO_READ_ERROR,
- errno, strerror(errno),
+ (ulong) errno, strerror(errno),
"while parsing export hostname.");
return(err);
@@ -2969,7 +2960,7 @@ row_import_read_v1(
if (fread(value, 1, sizeof(value), file) != sizeof(value)) {
ib_senderrf(
thd, IB_LOG_LEVEL_ERROR, ER_IO_READ_ERROR,
- errno, strerror(errno),
+ (ulong) errno, strerror(errno),
"while reading meta-data table name length.");
return(DB_IO_ERROR);
@@ -2978,13 +2969,16 @@ row_import_read_v1(
len = mach_read_from_4(value);
/* NUL byte is part of name length. */
- cfg->m_table_name = new(std::nothrow) byte[len];
+ cfg->m_table_name = UT_NEW_ARRAY_NOKEY(byte, len);
/* Trigger OOM */
- DBUG_EXECUTE_IF("ib_import_OOM_2",
- delete [] cfg->m_table_name; cfg->m_table_name = 0;);
+ DBUG_EXECUTE_IF(
+ "ib_import_OOM_2",
+ UT_DELETE_ARRAY(cfg->m_table_name);
+ cfg->m_table_name = NULL;
+ );
- if (cfg->m_table_name == 0) {
+ if (cfg->m_table_name == NULL) {
return(DB_OUT_OF_MEMORY);
}
@@ -2993,15 +2987,14 @@ row_import_read_v1(
if (err != DB_SUCCESS) {
ib_senderrf(
thd, IB_LOG_LEVEL_ERROR, ER_IO_READ_ERROR,
- errno, strerror(errno),
+ (ulong) errno, strerror(errno),
"while parsing table name.");
return(err);
}
- ib_logf(IB_LOG_LEVEL_INFO,
- "Importing tablespace for table '%s' that was exported "
- "from host '%s'", cfg->m_table_name, cfg->m_hostname);
+ ib::info() << "Importing tablespace for table '" << cfg->m_table_name
+ << "' that was exported from host '" << cfg->m_hostname << "'";
byte row[sizeof(ib_uint32_t) * 3];
@@ -3013,7 +3006,7 @@ row_import_read_v1(
if (fread(row, 1, sizeof(ib_uint64_t), file) != sizeof(ib_uint64_t)) {
ib_senderrf(
thd, IB_LOG_LEVEL_ERROR, ER_IO_READ_ERROR,
- errno, strerror(errno),
+ (ulong) errno, strerror(errno),
"while reading autoinc value.");
return(DB_IO_ERROR);
@@ -3029,7 +3022,7 @@ row_import_read_v1(
if (fread(row, 1, sizeof(row), file) != sizeof(row)) {
ib_senderrf(
thd, IB_LOG_LEVEL_ERROR, ER_IO_READ_ERROR,
- errno, strerror(errno),
+ (ulong) errno, strerror(errno),
"while reading meta-data header.");
return(DB_IO_ERROR);
@@ -3037,17 +3030,18 @@ row_import_read_v1(
byte* ptr = row;
- cfg->m_page_size = mach_read_from_4(ptr);
+ const ulint logical_page_size = mach_read_from_4(ptr);
ptr += sizeof(ib_uint32_t);
- if (cfg->m_page_size != UNIV_PAGE_SIZE) {
+ if (logical_page_size != univ_page_size.logical()) {
ib_errf(thd, IB_LOG_LEVEL_ERROR, ER_TABLE_SCHEMA_MISMATCH,
- "Tablespace to be imported has a different "
- "page size than this server. Server page size "
- "is " ULINTPF ", whereas tablespace page size is "
- ULINTPF,
- UNIV_PAGE_SIZE, cfg->m_page_size);
+ "Tablespace to be imported has a different"
+ " page size than this server. Server page size"
+ " is " ULINTPF ", whereas tablespace page size"
+ " is " ULINTPF,
+ univ_page_size.logical(),
+ logical_page_size);
return(DB_ERROR);
}
@@ -3055,24 +3049,26 @@ row_import_read_v1(
cfg->m_flags = mach_read_from_4(ptr);
ptr += sizeof(ib_uint32_t);
+ cfg->m_page_size.copy_from(dict_tf_get_page_size(cfg->m_flags));
+
+ ut_a(logical_page_size == cfg->m_page_size.logical());
+
cfg->m_n_cols = mach_read_from_4(ptr);
if (!dict_tf_is_valid(cfg->m_flags)) {
+ ib_errf(thd, IB_LOG_LEVEL_ERROR,
+ ER_TABLE_SCHEMA_MISMATCH,
+ "Invalid table flags: " ULINTPF, cfg->m_flags);
return(DB_CORRUPTION);
+ }
- } else if ((err = row_import_read_columns(file, thd, cfg))
- != DB_SUCCESS) {
-
- return(err);
-
- } else if ((err = row_import_read_indexes(file, thd, cfg))
- != DB_SUCCESS) {
+ err = row_import_read_columns(file, thd, cfg);
- return(err);
+ if (err == DB_SUCCESS) {
+ err = row_import_read_indexes(file, thd, cfg);
}
- ut_a(err == DB_SUCCESS);
return(err);
}
@@ -3097,7 +3093,7 @@ row_import_read_meta_data(
if (fread(&row, 1, sizeof(row), file) != sizeof(row)) {
ib_senderrf(
thd, IB_LOG_LEVEL_ERROR, ER_IO_READ_ERROR,
- errno, strerror(errno),
+ (ulong) errno, strerror(errno),
"while reading meta-data version.");
return(DB_IO_ERROR);
@@ -3142,13 +3138,13 @@ row_import_read_cfg(
if (file == NULL) {
char msg[BUFSIZ];
- ut_snprintf(msg, sizeof(msg),
- "Error opening '%s', will attempt to import "
- "without schema verification", name);
+ snprintf(msg, sizeof(msg),
+ "Error opening '%s', will attempt to import"
+ " without schema verification", name);
ib_senderrf(
thd, IB_LOG_LEVEL_WARN, ER_IO_READ_ERROR,
- errno, strerror(errno), msg);
+ (ulong) errno, strerror(errno), msg);
cfg.m_missing = true;
@@ -3164,24 +3160,13 @@ row_import_read_cfg(
return(err);
}
-/*****************************************************************//**
-Update the <space, root page> of a table's indexes from the values
-in the data dictionary.
+/** Update the root page numbers and tablespace ID of a table.
+@param[in,out] trx dictionary transaction
+@param[in,out] table persistent table
+@param[in] reset whether to reset the fields to FIL_NULL
@return DB_SUCCESS or error code */
-UNIV_INTERN
dberr_t
-row_import_update_index_root(
-/*=========================*/
- trx_t* trx, /*!< in/out: transaction that
- covers the update */
- const dict_table_t* table, /*!< in: Table for which we want
- to set the root page_no */
- bool reset, /*!< in: if true then set to
- FIL_NUL */
- bool dict_locked) /*!< in: Set to true if the
- caller already owns the
- dict_sys_t:: mutex. */
-
+row_import_update_index_root(trx_t* trx, dict_table_t* table, bool reset)
{
const dict_index_t* index;
que_t* graph = 0;
@@ -3197,9 +3182,7 @@ row_import_update_index_root(
"WHERE TABLE_ID = :table_id AND ID = :index_id;\n"
"END;\n"};
- if (!dict_locked) {
- mutex_enter(&dict_sys->mutex);
- }
+ table->def_trx_id = trx->id;
for (index = dict_table_get_first_index(table);
index != 0;
@@ -3262,17 +3245,11 @@ row_import_update_index_root(
err = trx->error_state;
if (err != DB_SUCCESS) {
- char index_name[MAX_FULL_NAME_LEN + 1];
-
- innobase_format_name(
- index_name, sizeof(index_name),
- index->name, TRUE);
-
ib_errf(trx->mysql_thd, IB_LOG_LEVEL_ERROR,
ER_INTERNAL_ERROR,
- "While updating the <space, root page "
- "number> of index %s - %s",
- index_name, ut_strerr(err));
+ "While updating the <space, root page"
+ " number> of index %s - %s",
+ index->name(), ut_strerr(err));
break;
}
@@ -3280,10 +3257,6 @@ row_import_update_index_root(
que_graph_free(graph);
- if (!dict_locked) {
- mutex_exit(&dict_sys->mutex);
- }
-
return(err);
}
@@ -3336,7 +3309,6 @@ row_import_set_discarded(
/*****************************************************************//**
Update the DICT_TF2_DISCARDED flag in SYS_TABLES.
@return DB_SUCCESS or error code. */
-UNIV_INTERN
dberr_t
row_import_update_discarded_flag(
/*=============================*/
@@ -3358,8 +3330,8 @@ row_import_update_discarded_flag(
"PROCEDURE UPDATE_DISCARDED_FLAG() IS\n"
"DECLARE FUNCTION my_func;\n"
"DECLARE CURSOR c IS\n"
- " SELECT MIX_LEN "
- " FROM SYS_TABLES "
+ " SELECT MIX_LEN"
+ " FROM SYS_TABLES"
" WHERE ID = :table_id FOR UPDATE;"
"\n"
"BEGIN\n"
@@ -3402,7 +3374,6 @@ struct fil_iterator_t {
os_offset_t start; /*!< From where to start */
os_offset_t end; /*!< Where to stop */
os_offset_t file_size; /*!< File size in bytes */
- ulint page_size; /*!< Page size */
ulint n_io_buffers; /*!< Number of pages to use
for IO */
byte* io_buffer; /*!< Buffer to use for IO */
@@ -3431,7 +3402,8 @@ fil_iterate(
AbstractCallback& callback)
{
os_offset_t offset;
- ulint n_bytes = iter.n_io_buffers * iter.page_size;
+ const ulint size = callback.get_page_size().physical();
+ ulint n_bytes = iter.n_io_buffers * size;
const ulint buf_size = srv_page_size
#ifdef HAVE_LZO
@@ -3440,8 +3412,7 @@ fil_iterate(
+ snappy_max_compressed_length(srv_page_size)
#endif
;
- byte* page_compress_buf = static_cast<byte*>(
- ut_malloc_low(buf_size, false));
+ byte* page_compress_buf = static_cast<byte*>(malloc(buf_size));
ut_ad(!srv_read_only_mode);
if (!page_compress_buf) {
@@ -3465,7 +3436,6 @@ fil_iterate(
if (block->page.zip.data) {
/* Zip IO is done in the compressed page buffer. */
io_buffer = block->page.zip.data;
- ut_ad(PAGE_ZIP_MATCH(block->frame, &block->page.zip));
}
/* We have to read the exact number of bytes. Otherwise the
@@ -3475,7 +3445,7 @@ fil_iterate(
iter.end - offset));
ut_ad(n_bytes > 0);
- ut_ad(!(n_bytes % iter.page_size));
+ ut_ad(!(n_bytes % size));
const bool encrypted = iter.crypt_data != NULL
&& iter.crypt_data->should_encrypt();
@@ -3484,23 +3454,28 @@ fil_iterate(
? iter.crypt_io_buffer : io_buffer;
byte* const writeptr = readptr;
- if (!os_file_read_no_error_handling(iter.file, readptr,
- offset, n_bytes)) {
- ib_logf(IB_LOG_LEVEL_ERROR, "os_file_read() failed");
- err = DB_IO_ERROR;
+ IORequest read_request(IORequest::READ);
+ read_request.disable_partial_io_warnings();
+
+ err = os_file_read_no_error_handling(
+ read_request, iter.file, readptr, offset, n_bytes, 0);
+ if (err != DB_SUCCESS) {
+ ib::error() << iter.filepath
+ << ": os_file_read() failed";
goto func_exit;
}
bool updated = false;
- const ulint size = iter.page_size;
- ulint n_pages_read = ulint(n_bytes) / size;
- block->page.offset = offset / size;
+ os_offset_t page_off = offset;
+ ulint n_pages_read = n_bytes / size;
+ block->page.id.set_page_no(ulint(page_off / size));
for (ulint i = 0; i < n_pages_read;
- ++i, block->frame += size, block->page.offset++) {
- byte* src = readptr + (i * size);
+ block->page.id.set_page_no(block->page.id.page_no() + 1),
+ ++i, page_off += size, block->frame += size) {
+ byte* src = readptr + i * size;
const ulint page_no = page_get_page_no(src);
- if (!page_no && block->page.offset) {
+ if (!page_no && block->page.id.page_no()) {
const ulint* b = reinterpret_cast<const ulint*>
(src);
const ulint* const e = b + size / sizeof *b;
@@ -3515,37 +3490,39 @@ fil_iterate(
continue;
}
- if (page_no != block->page.offset) {
+ if (page_no != block->page.id.page_no()) {
page_corrupted:
- ib_logf(IB_LOG_LEVEL_WARN,
- "%s: Page %lu at offset "
- UINT64PF " looks corrupted.",
- callback.filename(),
- ulong(offset / size), offset);
+ ib::warn() << callback.filename()
+ << ": Page " << (offset / size)
+ << " at offset " << offset
+ << " looks corrupted.";
err = DB_CORRUPTION;
goto func_exit;
}
- bool decrypted = false;
- byte* dst = io_buffer + (i * size);
- bool frame_changed = false;
- ulint page_type = mach_read_from_2(src+FIL_PAGE_TYPE);
const bool page_compressed
- = page_type
- == FIL_PAGE_PAGE_COMPRESSED_ENCRYPTED
- || page_type == FIL_PAGE_PAGE_COMPRESSED;
+ = fil_page_is_compressed_encrypted(src)
+ || fil_page_is_compressed(src);
if (page_compressed && block->page.zip.data) {
goto page_corrupted;
}
+ bool decrypted = false;
+ byte* dst = io_buffer + i * size;
+ bool frame_changed = false;
+
if (!encrypted) {
} else if (!mach_read_from_4(
FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION
+ src)) {
not_encrypted:
- if (!page_compressed
- && !block->page.zip.data) {
+ if (block->page.id.page_no() == 0
+ && block->page.zip.data) {
+ block->page.zip.data = src;
+ frame_changed = true;
+ } else if (!page_compressed
+ && !block->page.zip.data) {
block->frame = src;
frame_changed = true;
} else {
@@ -3554,13 +3531,13 @@ not_encrypted:
}
} else {
if (!fil_space_verify_crypt_checksum(
- src, callback.get_zip_size())) {
+ src, callback.get_page_size())) {
goto page_corrupted;
}
decrypted = fil_space_decrypt(
iter.crypt_data, dst,
- iter.page_size, src, &err);
+ callback.get_page_size(), src, &err);
if (err != DB_SUCCESS) {
goto func_exit;
@@ -3587,7 +3564,7 @@ not_encrypted:
false,
encrypted && !frame_changed
? dst : src,
- callback.get_zip_size(), NULL)) {
+ callback.get_page_size(), NULL)) {
goto page_corrupted;
}
@@ -3629,48 +3606,46 @@ not_encrypted:
/* When tablespace is encrypted or compressed its
first page (i.e. page 0) is not encrypted or
compressed and there is no need to copy frame. */
- if (encrypted && block->page.offset != 0) {
+ if (encrypted && block->page.id.page_no() != 0) {
byte *local_frame = callback.get_frame(block);
ut_ad((writeptr + (i * size)) != local_frame);
memcpy((writeptr + (i * size)), local_frame, size);
}
if (frame_changed) {
- block->frame = dst;
+ if (block->page.zip.data) {
+ block->page.zip.data = dst;
+ } else {
+ block->frame = dst;
+ }
}
src = io_buffer + (i * size);
if (page_compressed) {
updated = true;
- if (fil_page_compress(
+ if (ulint len = fil_page_compress(
src,
page_compress_buf,
0,/* FIXME: compression level */
512,/* FIXME: proper block size */
encrypted)) {
/* FIXME: remove memcpy() */
- memcpy(src, page_compress_buf,
- srv_page_size);
+ memcpy(src, page_compress_buf, len);
+ memset(src + len, 0,
+ srv_page_size - len);
}
}
- /* If tablespace is encrypted, encrypt page before we
- write it back. Note that we should not encrypt the
- buffer that is in buffer pool. */
- /* NOTE: At this stage of IMPORT the
- buffer pool is not being used at all! */
- if (decrypted && encrypted) {
- byte *dest = writeptr + (i * size);
-
+ /* Encrypt the page if encryption was used. */
+ if (encrypted && decrypted) {
+ byte *dest = writeptr + i * size;
byte* tmp = fil_encrypt_buf(
iter.crypt_data,
- callback.get_space_id(),
- block->page.offset,
+ block->page.id.space(),
+ block->page.id.page_no(),
mach_read_from_8(src + FIL_PAGE_LSN),
- src,
- callback.get_zip_size(),
- dest);
+ src, callback.get_page_size(), dest);
if (tmp == src) {
/* TODO: remove unnecessary memcpy's */
@@ -3683,19 +3658,21 @@ not_encrypted:
}
/* A page was updated in the set, write back to disk. */
- if (updated
- && !os_file_write(
- iter.filepath, iter.file, writeptr,
- offset, (ulint) n_bytes)) {
+ if (updated) {
+ IORequest write_request(IORequest::WRITE);
- ib_logf(IB_LOG_LEVEL_ERROR, "os_file_write() failed");
- err = DB_IO_ERROR;
- goto func_exit;
+ err = os_file_write(write_request,
+ iter.filepath, iter.file,
+ writeptr, offset, n_bytes);
+
+ if (err != DB_SUCCESS) {
+ goto func_exit;
+ }
}
}
func_exit:
- ut_free(page_compress_buf);
+ free(page_compress_buf);
return err;
}
@@ -3723,46 +3700,36 @@ fil_tablespace_iterate(
DBUG_EXECUTE_IF("ib_import_trigger_corruption_1",
return(DB_CORRUPTION););
+ /* Make sure the data_dir_path is set. */
+ dict_get_and_save_data_dir_path(table, false);
+
if (DICT_TF_HAS_DATA_DIR(table->flags)) {
- dict_get_and_save_data_dir_path(table, false);
ut_a(table->data_dir_path);
- filepath = os_file_make_remote_pathname(
- table->data_dir_path, table->name, "ibd");
+ filepath = fil_make_filepath(
+ table->data_dir_path, table->name.m_name, IBD, true);
} else {
- filepath = fil_make_ibd_name(table->name, false);
+ filepath = fil_make_filepath(
+ NULL, table->name.m_name, IBD, false);
}
- {
- ibool success;
+ if (!filepath) {
+ return(DB_OUT_OF_MEMORY);
+ } else {
+ bool success;
file = os_file_create_simple_no_error_handling(
- innodb_file_data_key, filepath,
- OS_FILE_OPEN, OS_FILE_READ_WRITE, &success, FALSE);
-
- DBUG_EXECUTE_IF("fil_tablespace_iterate_failure",
- {
- static bool once;
-
- if (!once || ut_rnd_interval(0, 10) == 5) {
- once = true;
- success = FALSE;
- os_file_close(file);
- }
- });
+ innodb_data_file_key, filepath,
+ OS_FILE_OPEN, OS_FILE_READ_WRITE, false, &success);
if (!success) {
/* The following call prints an error message */
os_file_get_last_error(true);
-
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Trying to import a tablespace, but could not "
- "open the tablespace file %s", filepath);
-
- mem_free(filepath);
-
- return(DB_TABLESPACE_NOT_FOUND);
-
+ ib::error() << "Trying to import a tablespace,"
+ " but could not open the tablespace file "
+ << filepath;
+ ut_free(filepath);
+ return DB_TABLESPACE_NOT_FOUND;
} else {
err = DB_SUCCESS;
}
@@ -3778,28 +3745,35 @@ fil_tablespace_iterate(
We allocate an extra page in case it is a compressed table. One
page is to ensure alignement. */
- void* page_ptr = mem_alloc(3 * UNIV_PAGE_SIZE);
+ void* page_ptr = ut_malloc_nokey(3 * UNIV_PAGE_SIZE);
byte* page = static_cast<byte*>(ut_align(page_ptr, UNIV_PAGE_SIZE));
- /* The block we will use for every physical page */
- buf_block_t block;
-
- memset(&block, 0, sizeof block);
- block.frame = page;
- block.page.space = callback.get_space_id();
- block.page.io_fix = BUF_IO_NONE;
- block.page.buf_fix_count = 1;
- block.page.state = BUF_BLOCK_FILE_PAGE;
+ buf_block_t* block = reinterpret_cast<buf_block_t*>
+ (ut_zalloc_nokey(sizeof *block));
+ block->frame = page;
+ block->page.id = page_id_t(0, 0);
+ block->page.io_fix = BUF_IO_NONE;
+ block->page.buf_fix_count = 1;
+ block->page.state = BUF_BLOCK_FILE_PAGE;
/* Read the first page and determine the page and zip size. */
- if (!os_file_read_no_error_handling(file, page, 0, UNIV_PAGE_SIZE)) {
+ IORequest request(IORequest::READ);
+ request.disable_partial_io_warnings();
- err = DB_IO_ERROR;
+ err = os_file_read_no_error_handling(request, file, page, 0,
+ UNIV_PAGE_SIZE, 0);
- } else if ((err = callback.init(file_size, &block)) == DB_SUCCESS) {
- if (const ulint zip_size = callback.get_zip_size()) {
- page_zip_set_size(&block.page.zip, zip_size);
+ if (err == DB_SUCCESS) {
+ err = callback.init(file_size, block);
+ }
+
+ if (err == DB_SUCCESS) {
+ block->page.id = page_id_t(callback.get_space_id(), 0);
+ block->page.size.copy_from(callback.get_page_size());
+ if (block->page.size.is_compressed()) {
+ page_zip_set_size(&block->page.zip,
+ callback.get_page_size().physical());
/* ROW_FORMAT=COMPRESSED is not optimised for block IO
for now. We do the IMPORT page by page. */
n_io_buffers = 1;
@@ -3807,82 +3781,71 @@ fil_tablespace_iterate(
fil_iterator_t iter;
- iter.file = file;
- iter.start = 0;
- iter.end = file_size;
- iter.filepath = filepath;
- iter.file_size = file_size;
- iter.n_io_buffers = n_io_buffers;
- iter.page_size = callback.get_page_size();
-
- /* In MariaDB/MySQL 5.6 tablespace does not exist
- during import, therefore we can't use space directly
- here. */
- ulint crypt_data_offset = fsp_header_get_crypt_offset(
- callback.get_zip_size());
-
/* read (optional) crypt data */
iter.crypt_data = fil_space_read_crypt_data(
- 0, page, crypt_data_offset);
+ callback.get_page_size(), page);
- /** If tablespace is encrypted, it needs extra buffers */
- if (iter.crypt_data != NULL) {
+ /* If tablespace is encrypted, it needs extra buffers */
+ if (iter.crypt_data && n_io_buffers > 1) {
/* decrease io buffers so that memory
- * consumption doesnt double
- * note: the +1 is to avoid n_io_buffers getting down to 0 */
- iter.n_io_buffers = (iter.n_io_buffers + 1) / 2;
+ consumption will not double */
+ n_io_buffers /= 2;
}
- /** Add an extra page for compressed page scratch area. */
+ iter.file = file;
+ iter.start = 0;
+ iter.end = file_size;
+ iter.filepath = filepath;
+ iter.file_size = file_size;
+ iter.n_io_buffers = n_io_buffers;
- void* io_buffer = mem_alloc(
+ /* Add an extra page for compressed page scratch area. */
+ void* io_buffer = ut_malloc_nokey(
(2 + iter.n_io_buffers) * UNIV_PAGE_SIZE);
iter.io_buffer = static_cast<byte*>(
ut_align(io_buffer, UNIV_PAGE_SIZE));
void* crypt_io_buffer = NULL;
- if (iter.crypt_data != NULL) {
- crypt_io_buffer = mem_alloc(
+ if (iter.crypt_data) {
+ crypt_io_buffer = ut_malloc_nokey(
(2 + iter.n_io_buffers) * UNIV_PAGE_SIZE);
iter.crypt_io_buffer = static_cast<byte*>(
ut_align(crypt_io_buffer, UNIV_PAGE_SIZE));
}
- if (block.page.zip.ssize) {
+ if (block->page.zip.ssize) {
ut_ad(iter.n_io_buffers == 1);
- block.frame = iter.io_buffer;
- block.page.zip.data = block.frame + UNIV_PAGE_SIZE;
- ut_d(block.page.zip.m_external = true);
+ block->frame = iter.io_buffer;
+ block->page.zip.data = block->frame + UNIV_PAGE_SIZE;
}
- err = fil_iterate(iter, &block, callback);
+ err = fil_iterate(iter, block, callback);
- mem_free(io_buffer);
-
- if (crypt_io_buffer != NULL) {
- mem_free(crypt_io_buffer);
- iter.crypt_io_buffer = NULL;
+ if (iter.crypt_data) {
fil_space_destroy_crypt_data(&iter.crypt_data);
}
+
+ ut_free(crypt_io_buffer);
+ ut_free(io_buffer);
}
if (err == DB_SUCCESS) {
-
- ib_logf(IB_LOG_LEVEL_INFO, "Sync to disk");
+ ib::info() << "Sync to disk";
if (!os_file_flush(file)) {
- ib_logf(IB_LOG_LEVEL_INFO, "os_file_flush() failed!");
+ ib::info() << "os_file_flush() failed!";
err = DB_IO_ERROR;
} else {
- ib_logf(IB_LOG_LEVEL_INFO, "Sync to disk - done!");
+ ib::info() << "Sync to disk - done!";
}
}
os_file_close(file);
- mem_free(page_ptr);
- mem_free(filepath);
+ ut_free(page_ptr);
+ ut_free(filepath);
+ ut_free(block);
return(err);
}
@@ -3890,8 +3853,7 @@ fil_tablespace_iterate(
/*****************************************************************//**
Imports a tablespace. The space id in the .ibd file must match the space id
of the table in the data dictionary.
-@return error code or DB_SUCCESS */
-UNIV_INTERN
+@return error code or DB_SUCCESS */
dberr_t
row_import_for_mysql(
/*=================*/
@@ -3901,26 +3863,28 @@ row_import_for_mysql(
dberr_t err;
trx_t* trx;
ib_uint64_t autoinc = 0;
- char table_name[MAX_FULL_NAME_LEN + 1];
char* filepath = NULL;
+ ulint space_flags MY_ATTRIBUTE((unused));
+ /* The caller assured that this is not read_only_mode and that no
+ temorary tablespace is being imported. */
ut_ad(!srv_read_only_mode);
-
- innobase_format_name(
- table_name, sizeof(table_name), table->name, FALSE);
+ ut_ad(!dict_table_is_temporary(table));
ut_a(table->space);
ut_ad(prebuilt->trx);
- ut_a(table->file_unreadable);
+ ut_a(!table->is_readable());
- trx_start_if_not_started(prebuilt->trx);
+ ibuf_delete_for_discarded_space(table->space);
+
+ trx_start_if_not_started(prebuilt->trx, true);
trx = trx_allocate_for_mysql();
/* So that the table is not DROPped during recovery. */
trx_set_dict_operation(trx, TRX_DICT_OP_INDEX);
- trx_start_if_not_started(trx);
+ trx_start_if_not_started(trx, true);
/* So that we can send error messages to the user. */
trx->mysql_thd = prebuilt->trx->mysql_thd;
@@ -3935,7 +3899,10 @@ row_import_for_mysql(
mutex_enter(&trx->undo_mutex);
- err = trx_undo_assign_undo(trx, TRX_UNDO_UPDATE);
+ /* TODO: Do not write any undo log for the IMPORT cleanup. */
+ trx_undo_t** pundo = &trx->rsegs.m_redo.update_undo;
+ err = trx_undo_assign_undo(trx, trx->rsegs.m_redo.rseg, pundo,
+ TRX_UNDO_UPDATE);
mutex_exit(&trx->undo_mutex);
@@ -3946,7 +3913,7 @@ row_import_for_mysql(
return(row_import_cleanup(prebuilt, trx, err));
- } else if (trx->update_undo == 0) {
+ } else if (trx->rsegs.m_redo.update_undo == 0) {
err = DB_TOO_MANY_CONCURRENT_TRXS;
return(row_import_cleanup(prebuilt, trx, err));
@@ -3967,7 +3934,7 @@ row_import_for_mysql(
if (err == DB_SUCCESS) {
- /* We have a schema file, try and match it with the our
+ /* We have a schema file, try and match it with our
data dictionary. */
err = cfg.match_schema(trx->mysql_thd);
@@ -3996,12 +3963,12 @@ row_import_for_mysql(
ut_a(err == DB_FAIL);
- cfg.m_page_size = UNIV_PAGE_SIZE;
+ cfg.m_page_size.copy_from(univ_page_size);
FetchIndexRootPages fetchIndexRootPages(table, trx);
err = fil_tablespace_iterate(
- table, IO_BUFFER_SIZE(cfg.m_page_size),
+ table, IO_BUFFER_SIZE(cfg.m_page_size.physical()),
fetchIndexRootPages);
if (err == DB_SUCCESS) {
@@ -4017,6 +3984,8 @@ row_import_for_mysql(
}
}
+ space_flags = fetchIndexRootPages.get_space_flags();
+
} else {
rw_lock_s_unlock_gen(&dict_operation_lock, 0);
}
@@ -4027,7 +3996,7 @@ row_import_for_mysql(
prebuilt->trx->op_info = "importing tablespace";
- ib_logf(IB_LOG_LEVEL_INFO, "Phase I - Update all pages");
+ ib::info() << "Phase I - Update all pages";
/* Iterate over all the pages and do the sanity checking and
the conversion required to import the tablespace. */
@@ -4037,11 +4006,11 @@ row_import_for_mysql(
/* Set the IO buffer size in pages. */
err = fil_tablespace_iterate(
- table, IO_BUFFER_SIZE(cfg.m_page_size), converter);
+ table, IO_BUFFER_SIZE(cfg.m_page_size.physical()), converter);
DBUG_EXECUTE_IF("ib_import_reset_space_and_lsn_failure",
err = DB_TOO_MANY_CONCURRENT_TRXS;);
-
+#ifdef BTR_CUR_HASH_ADAPT
/* On DISCARD TABLESPACE, we did not drop any adaptive hash
index entries. If we replaced the discarded tablespace with a
smaller one here, there could still be some adaptive hash
@@ -4058,18 +4027,20 @@ row_import_for_mysql(
break;
}
}
+#endif /* BTR_CUR_HASH_ADAPT */
if (err != DB_SUCCESS) {
char table_name[MAX_FULL_NAME_LEN + 1];
innobase_format_name(
- table_name, sizeof(table_name), table->name, FALSE);
+ table_name, sizeof(table_name),
+ table->name.m_name);
if (err != DB_DECRYPTION_FAILED) {
ib_errf(trx->mysql_thd, IB_LOG_LEVEL_ERROR,
ER_INTERNAL_ERROR,
- "Cannot reset LSNs in table '%s' : %s",
+ "Cannot reset LSNs in table %s : %s",
table_name, ut_strerr(err));
}
@@ -4081,25 +4052,41 @@ row_import_for_mysql(
/* If the table is stored in a remote tablespace, we need to
determine that filepath from the link file and system tables.
Find the space ID in SYS_TABLES since this is an ALTER TABLE. */
+ dict_get_and_save_data_dir_path(table, true);
+
if (DICT_TF_HAS_DATA_DIR(table->flags)) {
- dict_get_and_save_data_dir_path(table, true);
ut_a(table->data_dir_path);
- filepath = os_file_make_remote_pathname(
- table->data_dir_path, table->name, "ibd");
+ filepath = fil_make_filepath(
+ table->data_dir_path, table->name.m_name, IBD, true);
} else {
- filepath = fil_make_ibd_name(table->name, false);
+ filepath = fil_make_filepath(
+ NULL, table->name.m_name, IBD, false);
+ }
+
+ DBUG_EXECUTE_IF(
+ "ib_import_OOM_15",
+ ut_free(filepath);
+ filepath = NULL;
+ );
+
+ if (filepath == NULL) {
+ row_mysql_unlock_data_dictionary(trx);
+ return(row_import_cleanup(prebuilt, trx, DB_OUT_OF_MEMORY));
}
- ut_a(filepath);
/* Open the tablespace so that we can access via the buffer pool.
We set the 2nd param (fix_dict = true) here because we already
- have an x-lock on dict_operation_lock and dict_sys->mutex. */
+ have an x-lock on dict_operation_lock and dict_sys->mutex.
+ The tablespace is initially opened as a temporary one, because
+ we will not be writing any redo log for it before we have invoked
+ fil_space_set_imported() to declare it a persistent tablespace. */
- err = fil_open_single_table_tablespace(
- true, true, table->space,
- dict_tf_to_fsp_flags(table->flags),
- table->name, filepath);
+ ulint fsp_flags = dict_tf_to_fsp_flags(table->flags);
+
+ err = fil_ibd_open(
+ true, true, FIL_TYPE_IMPORT, table->space,
+ fsp_flags, table->name.m_name, filepath);
DBUG_EXECUTE_IF("ib_import_open_tablespace_failure",
err = DB_TABLESPACE_NOT_FOUND;);
@@ -4111,14 +4098,14 @@ row_import_for_mysql(
ER_GET_ERRMSG,
err, ut_strerr(err), filepath);
- mem_free(filepath);
+ ut_free(filepath);
return(row_import_cleanup(prebuilt, trx, err));
}
row_mysql_unlock_data_dictionary(trx);
- mem_free(filepath);
+ ut_free(filepath);
err = ibuf_check_bitmap_on_import(trx, table->space);
@@ -4146,10 +4133,6 @@ row_import_for_mysql(
if (err != DB_SUCCESS) {
return(row_import_error(prebuilt, trx, err));
- }
-
- if (err != DB_SUCCESS) {
- return(row_import_error(prebuilt, trx, err));
} else if (cfg.requires_purge(index->name)) {
/* Purge any delete-marked records that couldn't be
@@ -4196,28 +4179,34 @@ row_import_for_mysql(
}
}
- ib_logf(IB_LOG_LEVEL_INFO, "Phase III - Flush changes to disk");
+ ib::info() << "Phase III - Flush changes to disk";
/* Ensure that all pages dirtied during the IMPORT make it to disk.
The only dirty pages generated should be from the pessimistic purge
of delete marked records that couldn't be purged in Phase I. */
- buf_LRU_flush_or_remove_pages(prebuilt->table->space, trx);
-
- if (trx_is_interrupted(trx)) {
- ib_logf(IB_LOG_LEVEL_INFO, "Phase III - Flush interrupted");
- return(row_import_error(prebuilt, trx, DB_INTERRUPTED));
- } else {
- ib_logf(IB_LOG_LEVEL_INFO, "Phase IV - Flush complete");
+ {
+ FlushObserver observer(prebuilt->table->space, trx, NULL);
+ buf_LRU_flush_or_remove_pages(prebuilt->table->space,
+ &observer);
+
+ if (observer.is_interrupted()) {
+ ib::info() << "Phase III - Flush interrupted";
+ return(row_import_error(prebuilt, trx,
+ DB_INTERRUPTED));
+ }
}
+ ib::info() << "Phase IV - Flush complete";
+ fil_space_set_imported(prebuilt->table->space);
+
/* The dictionary latches will be released in in row_import_cleanup()
after the transaction commit, for both success and error. */
row_mysql_lock_data_dictionary(trx);
/* Update the root pages of the table's indexes. */
- err = row_import_update_index_root(trx, table, false, true);
+ err = row_import_update_index_root(trx, table, false);
if (err != DB_SUCCESS) {
return(row_import_error(prebuilt, trx, err));
@@ -4233,21 +4222,15 @@ row_import_for_mysql(
table->file_unreadable = false;
table->flags2 &= ~DICT_TF2_DISCARDED;
- if (autoinc != 0) {
- char table_name[MAX_FULL_NAME_LEN + 1];
-
- innobase_format_name(
- table_name, sizeof(table_name), table->name, FALSE);
+ /* Set autoinc value read from .cfg file, if one was specified.
+ Otherwise, keep the PAGE_ROOT_AUTO_INC as is. */
+ if (autoinc) {
+ ib::info() << table->name << " autoinc value set to "
+ << autoinc;
- ib_logf(IB_LOG_LEVEL_INFO, "%s autoinc value set to " IB_ID_FMT,
- table_name, autoinc);
-
- dict_table_autoinc_lock(table);
- dict_table_autoinc_initialize(table, autoinc);
- dict_table_autoinc_unlock(table);
+ table->autoinc = autoinc--;
+ btr_write_autoinc(dict_table_get_first_index(table), autoinc);
}
- ut_a(err == DB_SUCCESS);
-
return(row_import_cleanup(prebuilt, trx, err));
}
diff --git a/storage/innobase/row/row0ins.cc b/storage/innobase/row/row0ins.cc
index ecbfd03ec93..5b7faa4888a 100644
--- a/storage/innobase/row/row0ins.cc
+++ b/storage/innobase/row/row0ins.cc
@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2017, 2020, MariaDB Corporation.
+Copyright (c) 2016, 2020, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -25,14 +25,7 @@ Created 4/20/1996 Heikki Tuuri
*******************************************************/
#include "row0ins.h"
-
-#ifdef UNIV_NONINL
-#include "row0ins.ic"
-#endif
-
-#include "ha_prototypes.h"
#include "dict0dict.h"
-#include "dict0boot.h"
#include "trx0rec.h"
#include "trx0undo.h"
#include "btr0btr.h"
@@ -42,18 +35,15 @@ Created 4/20/1996 Heikki Tuuri
#include "que0que.h"
#include "row0upd.h"
#include "row0sel.h"
-#include "row0row.h"
#include "row0log.h"
#include "rem0cmp.h"
#include "lock0lock.h"
#include "log0log.h"
#include "eval0eval.h"
#include "data0data.h"
-#include "usr0sess.h"
#include "buf0lru.h"
#include "fts0fts.h"
#include "fts0types.h"
-#include "m_string.h"
#ifdef WITH_WSREP
#include <mysql/service_wsrep.h>
@@ -73,8 +63,7 @@ introduced where a call to log_free_check() is bypassed. */
/*********************************************************************//**
Creates an insert node struct.
-@return own: insert node struct */
-UNIV_INTERN
+@return own: insert node struct */
ins_node_t*
ins_node_create(
/*============*/
@@ -84,8 +73,8 @@ ins_node_create(
{
ins_node_t* node;
- node = static_cast<ins_node_t*>(
- mem_heap_alloc(heap, sizeof(ins_node_t)));
+ node = new (static_cast<ins_node_t*>(
+ mem_heap_alloc(heap, sizeof(ins_node_t)))) ins_node_t;
node->common.type = QUE_NODE_INSERT;
@@ -94,7 +83,6 @@ ins_node_create(
node->state = INS_NODE_SET_IX_LOCK;
node->table = table;
node->index = NULL;
- node->entry = NULL;
node->select = NULL;
@@ -120,20 +108,21 @@ ins_node_create_entry_list(
ut_ad(node->entry_sys_heap);
- UT_LIST_INIT(node->entry_list);
-
/* We will include all indexes (include those corrupted
- secondary indexes) in the entry list. Filteration of
+ secondary indexes) in the entry list. Filtration of
these corrupted index will be done in row_ins() */
+ node->entry_list.reserve(UT_LIST_GET_LEN(node->table->indexes));
+
for (index = dict_table_get_first_index(node->table);
index != 0;
index = dict_table_get_next_index(index)) {
- entry = row_build_index_entry(
- node->row, NULL, index, node->entry_sys_heap);
+ entry = row_build_index_entry_low(
+ node->row, NULL, index, node->entry_sys_heap,
+ ROW_BUILD_FOR_INSERT);
- UT_LIST_ADD_LAST(tuple_list, node->entry_list, entry);
+ node->entry_list.push_back(entry);
}
}
@@ -147,58 +136,50 @@ row_ins_alloc_sys_fields(
{
dtuple_t* row;
dict_table_t* table;
- mem_heap_t* heap;
const dict_col_t* col;
dfield_t* dfield;
- byte* ptr;
row = node->row;
table = node->table;
- heap = node->entry_sys_heap;
- ut_ad(row && table && heap);
ut_ad(dtuple_get_n_fields(row) == dict_table_get_n_cols(table));
/* allocate buffer to hold the needed system created hidden columns. */
- uint len = DATA_ROW_ID_LEN + DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN;
- ptr = static_cast<byte*>(mem_heap_zalloc(heap, len));
+ compile_time_assert(DATA_ROW_ID_LEN
+ + DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN
+ == sizeof node->sys_buf);
+ memset(node->sys_buf, 0, sizeof node->sys_buf);
+ /* Assign DB_ROLL_PTR to 1 << ROLL_PTR_INSERT_FLAG_POS */
+ node->sys_buf[DATA_ROW_ID_LEN + DATA_TRX_ID_LEN] = 0x80;
/* 1. Populate row-id */
col = dict_table_get_sys_col(table, DATA_ROW_ID);
dfield = dtuple_get_nth_field(row, dict_col_get_no(col));
- dfield_set_data(dfield, ptr, DATA_ROW_ID_LEN);
-
- node->row_id_buf = ptr;
-
- ptr += DATA_ROW_ID_LEN;
+ dfield_set_data(dfield, node->sys_buf, DATA_ROW_ID_LEN);
/* 2. Populate trx id */
col = dict_table_get_sys_col(table, DATA_TRX_ID);
dfield = dtuple_get_nth_field(row, dict_col_get_no(col));
- dfield_set_data(dfield, ptr, DATA_TRX_ID_LEN);
-
- node->trx_id_buf = ptr;
-
- ptr += DATA_TRX_ID_LEN;
-
- /* 3. Populate roll ptr */
+ dfield_set_data(dfield, &node->sys_buf[DATA_ROW_ID_LEN],
+ DATA_TRX_ID_LEN);
col = dict_table_get_sys_col(table, DATA_ROLL_PTR);
dfield = dtuple_get_nth_field(row, dict_col_get_no(col));
- dfield_set_data(dfield, ptr, DATA_ROLL_PTR_LEN);
+ dfield_set_data(dfield, &node->sys_buf[DATA_ROW_ID_LEN
+ + DATA_TRX_ID_LEN],
+ DATA_ROLL_PTR_LEN);
}
/*********************************************************************//**
Sets a new row to insert for an INS_DIRECT node. This function is only used
if we have constructed the row separately, which is a rare case; this
function is quite slow. */
-UNIV_INTERN
void
ins_node_set_new_row(
/*=================*/
@@ -207,7 +188,8 @@ ins_node_set_new_row(
{
node->state = INS_NODE_SET_IX_LOCK;
node->index = NULL;
- node->entry = NULL;
+ node->entry_list.clear();
+ node->entry = node->entry_list.end();
node->row = row;
@@ -231,7 +213,7 @@ ins_node_set_new_row(
Does an insert operation by updating a delete-marked existing record
in the index. This situation can occur if the delete-marked record is
kept in the index for consistent reads.
-@return DB_SUCCESS or error code */
+@return DB_SUCCESS or error code */
static MY_ATTRIBUTE((nonnull, warn_unused_result))
dberr_t
row_ins_sec_index_entry_by_modify(
@@ -241,7 +223,7 @@ row_ins_sec_index_entry_by_modify(
depending on whether mtr holds just a leaf
latch or also a tree latch */
btr_cur_t* cursor, /*!< in: B-tree cursor */
- ulint** offsets,/*!< in/out: offsets on cursor->page_cur.rec */
+ offset_t** offsets,/*!< in/out: offsets on cursor->page_cur.rec */
mem_heap_t* offsets_heap,
/*!< in/out: memory heap that can be emptied */
mem_heap_t* heap, /*!< in/out: memory heap */
@@ -278,11 +260,10 @@ row_ins_sec_index_entry_by_modify(
case, the change would already be there. The CREATE
INDEX should be waiting for a MySQL meta-data lock
upgrade at least until this INSERT or UPDATE
- returns. After that point, the TEMP_INDEX_PREFIX
- would be dropped from the index name in
- commit_inplace_alter_table(). */
+ returns. After that point, set_committed(true)
+ would be invoked in commit_inplace_alter_table(). */
ut_a(update->n_fields == 0);
- ut_a(*cursor->index->name == TEMP_INDEX_PREFIX);
+ ut_a(!cursor->index->is_committed());
ut_ad(!dict_index_is_online_ddl(cursor->index));
return(DB_SUCCESS);
}
@@ -326,49 +307,57 @@ row_ins_sec_index_entry_by_modify(
Does an insert operation by delete unmarking and updating a delete marked
existing record in the index. This situation can occur if the delete marked
record is kept in the index for consistent reads.
-@return DB_SUCCESS, DB_FAIL, or error code */
+@return DB_SUCCESS, DB_FAIL, or error code */
static MY_ATTRIBUTE((nonnull, warn_unused_result))
dberr_t
row_ins_clust_index_entry_by_modify(
/*================================*/
+ btr_pcur_t* pcur, /*!< in/out: a persistent cursor pointing
+ to the clust_rec that is being modified. */
ulint flags, /*!< in: undo logging and locking flags */
ulint mode, /*!< in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE,
depending on whether mtr holds just a leaf
latch or also a tree latch */
- btr_cur_t* cursor, /*!< in: B-tree cursor */
- ulint** offsets,/*!< out: offsets on cursor->page_cur.rec */
+ offset_t** offsets,/*!< out: offsets on cursor->page_cur.rec */
mem_heap_t** offsets_heap,
/*!< in/out: pointer to memory heap that can
be emptied, or NULL */
mem_heap_t* heap, /*!< in/out: memory heap */
- big_rec_t** big_rec,/*!< out: possible big rec vector of fields
- which have to be stored externally by the
- caller */
const dtuple_t* entry, /*!< in: index entry to insert */
que_thr_t* thr, /*!< in: query thread */
mtr_t* mtr) /*!< in: mtr; must be committed before
latching any further pages */
{
const rec_t* rec;
- const upd_t* update;
- dberr_t err;
-
+ upd_t* update;
+ dberr_t err = DB_SUCCESS;
+ btr_cur_t* cursor = btr_pcur_get_btr_cur(pcur);
+ TABLE* mysql_table = NULL;
ut_ad(dict_index_is_clust(cursor->index));
- *big_rec = NULL;
-
rec = btr_cur_get_rec(cursor);
ut_ad(rec_get_deleted_flag(rec,
dict_table_is_comp(cursor->index->table)));
+ /* In delete-marked records, DB_TRX_ID must
+ always refer to an existing undo log record. */
+ ut_ad(rec_get_trx_id(rec, cursor->index));
/* Build an update vector containing all the fields to be modified;
NOTE that this vector may NOT contain system columns trx_id or
roll_ptr */
+ if (thr->prebuilt != NULL) {
+ mysql_table = thr->prebuilt->m_mysql_table;
+ ut_ad(thr->prebuilt->trx == thr_get_trx(thr));
+ }
update = row_upd_build_difference_binary(
cursor->index, entry, rec, NULL, true,
- thr_get_trx(thr), heap);
+ thr_get_trx(thr), heap, mysql_table, &err);
+ if (err != DB_SUCCESS) {
+ return(err);
+ }
+
if (mode != BTR_MODIFY_TREE) {
ut_ad((mode & ~BTR_ALREADY_S_LATCHED) == BTR_MODIFY_LEAF);
@@ -392,10 +381,24 @@ row_ins_clust_index_entry_by_modify(
return(DB_LOCK_TABLE_FULL);
}
+
+ big_rec_t* big_rec = NULL;
+
err = btr_cur_pessimistic_update(
flags | BTR_KEEP_POS_FLAG,
cursor, offsets, offsets_heap, heap,
- big_rec, update, 0, thr, thr_get_trx(thr)->id, mtr);
+ &big_rec, update, 0, thr, thr_get_trx(thr)->id, mtr);
+
+ if (big_rec) {
+ ut_a(err == DB_SUCCESS);
+
+ DEBUG_SYNC_C("before_row_ins_upd_extern");
+ err = btr_store_big_rec_extern_fields(
+ pcur, *offsets, big_rec, mtr,
+ BTR_STORE_INSERT_UPDATE);
+ DEBUG_SYNC_C("after_row_ins_upd_extern");
+ dtuple_big_rec_free(big_rec);
+ }
}
return(err);
@@ -404,7 +407,7 @@ row_ins_clust_index_entry_by_modify(
/*********************************************************************//**
Returns TRUE if in a cascaded update/delete an ancestor node of node
updates (not DELETE, but UPDATE) table.
-@return TRUE if an ancestor updates table */
+@return TRUE if an ancestor updates table */
static
ibool
row_ins_cascade_ancestor_updates_table(
@@ -434,7 +437,7 @@ row_ins_cascade_ancestor_updates_table(
/*********************************************************************//**
Returns the number of ancestor UPDATE or DELETE nodes of a
cascaded update/delete node.
-@return number of ancestors */
+@return number of ancestors */
static MY_ATTRIBUTE((nonnull, warn_unused_result))
ulint
row_ins_cascade_n_ancestors(
@@ -457,12 +460,9 @@ row_ins_cascade_n_ancestors(
/******************************************************************//**
Calculates the update vector node->cascade->update for a child table in
a cascaded update.
-@return number of fields in the calculated update vector; the value
-can also be 0 if no foreign key fields changed; the returned value is
-ULINT_UNDEFINED if the column type in the child table is too short to
-fit the new value in the parent table: that means the update fails */
+@return whether any FULLTEXT INDEX is affected */
static MY_ATTRIBUTE((nonnull, warn_unused_result))
-ulint
+bool
row_ins_cascade_calc_update_vec(
/*============================*/
upd_node_t* node, /*!< in: update node of the parent
@@ -471,10 +471,9 @@ row_ins_cascade_calc_update_vec(
type is != 0 */
mem_heap_t* heap, /*!< in: memory heap to use as
temporary storage */
- trx_t* trx, /*!< in: update transaction */
- ibool* fts_col_affected)/*!< out: is FTS column affected */
+ trx_t* trx) /*!< in: update transaction */
{
- upd_node_t* cascade = node->cascade_node;
+ upd_node_t* cascade = node->cascade_node;
dict_table_t* table = foreign->foreign_table;
dict_index_t* index = foreign->foreign_index;
upd_t* update;
@@ -485,9 +484,10 @@ row_ins_cascade_calc_update_vec(
ulint parent_field_no;
ulint i;
ulint j;
- ibool doc_id_updated = FALSE;
+ bool doc_id_updated = false;
ulint doc_id_pos = 0;
doc_id_t new_doc_id = FTS_NULL_DOC_ID;
+ ulint prefix_col;
ut_a(cascade);
ut_a(table);
@@ -506,22 +506,22 @@ row_ins_cascade_calc_update_vec(
update = cascade->update;
update->info_bits = 0;
- update->n_fields = foreign->n_fields;
n_fields_updated = 0;
- *fts_col_affected = FALSE;
+ bool affects_fulltext = false;
if (table->fts) {
doc_id_pos = dict_table_get_nth_col_pos(
- table, table->fts->doc_col);
+ table, table->fts->doc_col, &prefix_col);
}
for (i = 0; i < foreign->n_fields; i++) {
parent_field_no = dict_table_get_nth_col_pos(
parent_table,
- dict_index_get_nth_col_no(parent_index, i));
+ dict_index_get_nth_col_no(parent_index, i),
+ &prefix_col);
for (j = 0; j < parent_update->n_fields; j++) {
const upd_field_t* parent_ufield
@@ -544,7 +544,8 @@ row_ins_cascade_calc_update_vec(
ufield->field_no
= dict_table_get_nth_col_pos(
- table, dict_col_get_no(col));
+ table, dict_col_get_no(col),
+ &prefix_col);
ufield->orig_len = 0;
ufield->exp = NULL;
@@ -560,8 +561,7 @@ row_ins_cascade_calc_update_vec(
if (dfield_is_null(&ufield->new_val)
&& (col->prtype & DATA_NOT_NULL)) {
-
- return(ULINT_UNDEFINED);
+ goto err_exit;
}
/* If the new value would not fit in the
@@ -577,8 +577,7 @@ row_ins_cascade_calc_update_vec(
dfield_get_data(
&ufield->new_val)))
< ufield_len) {
-
- return(ULINT_UNDEFINED);
+ goto err_exit;
}
/* If the parent column type has a different
@@ -622,7 +621,7 @@ row_ins_cascade_calc_update_vec(
col->prtype)
== DATA_MYSQL_BINARY_CHARSET_COLL) {
/* Do not pad BINARY columns */
- return(ULINT_UNDEFINED);
+ goto err_exit;
}
row_mysql_pad_col(mbminlen,
@@ -636,9 +635,10 @@ row_ins_cascade_calc_update_vec(
if (table->fts
&& dict_table_is_fts_column(
table->fts->indexes,
- dict_col_get_no(col))
+ dict_col_get_no(col),
+ dict_col_is_virtual(col))
!= ULINT_UNDEFINED) {
- *fts_col_affected = TRUE;
+ affects_fulltext = true;
}
/* If Doc ID is updated, check whether the
@@ -655,31 +655,24 @@ row_ins_cascade_calc_update_vec(
dfield_get_data(
&ufield->new_val)));
+ affects_fulltext = true;
+ doc_id_updated = true;
+
if (new_doc_id <= 0) {
- fprintf(stderr,
- "InnoDB: FTS Doc ID "
- "must be larger than "
- "0 \n");
- return(ULINT_UNDEFINED);
+ ib::error() << "FTS Doc ID"
+ " must be larger than"
+ " 0";
+ goto err_exit;
}
if (new_doc_id < n_doc_id) {
- fprintf(stderr,
- "InnoDB: FTS Doc ID "
- "must be larger than "
- IB_ID_FMT" for table",
- n_doc_id -1);
-
- ut_print_name(stderr, trx,
- TRUE,
- table->name);
-
- putc('\n', stderr);
- return(ULINT_UNDEFINED);
+ ib::error() << "FTS Doc ID"
+ " must be larger than "
+ << n_doc_id - 1
+ << " for table "
+ << table->name;
+ goto err_exit;
}
-
- *fts_col_affected = TRUE;
- doc_id_updated = TRUE;
}
n_fields_updated++;
@@ -687,17 +680,21 @@ row_ins_cascade_calc_update_vec(
}
}
- /* Generate a new Doc ID if FTS index columns get updated */
- if (table->fts && *fts_col_affected) {
+ if (affects_fulltext) {
+ ut_ad(table->fts);
+
if (DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_HAS_DOC_ID)) {
doc_id_t doc_id;
- upd_field_t* ufield;
+ doc_id_t* next_doc_id;
+ upd_field_t* ufield;
+
+ next_doc_id = static_cast<doc_id_t*>(mem_heap_alloc(
+ heap, sizeof(doc_id_t)));
ut_ad(!doc_id_updated);
ufield = update->fields + n_fields_updated;
- fts_get_next_doc_id(table, &trx->fts_next_doc_id);
- doc_id = fts_update_doc_id(table, ufield,
- &trx->fts_next_doc_id);
+ fts_get_next_doc_id(table, next_doc_id);
+ doc_id = fts_update_doc_id(table, ufield, next_doc_id);
n_fields_updated++;
fts_trx_add_op(trx, table, doc_id, FTS_INSERT, NULL);
} else {
@@ -706,19 +703,18 @@ row_ins_cascade_calc_update_vec(
fts_trx_add_op(trx, table, new_doc_id,
FTS_INSERT, NULL);
} else {
- fprintf(stderr, "InnoDB: FTS Doc ID must be "
- "updated along with FTS indexed "
- "column for table ");
- ut_print_name(stderr, trx, TRUE, table->name);
- putc('\n', stderr);
- return(ULINT_UNDEFINED);
+ ib::error() << "FTS Doc ID must be updated"
+ " along with FTS indexed column for"
+ " table " << table->name;
+err_exit:
+ n_fields_updated = ULINT_UNDEFINED;
}
}
}
update->n_fields = n_fields_updated;
- return(n_fields_updated);
+ return affects_fulltext;
}
/*********************************************************************//**
@@ -737,10 +733,9 @@ row_ins_set_detailed(
rewind(srv_misc_tmpfile);
if (os_file_set_eof(srv_misc_tmpfile)) {
- std::string fk_str;
- ut_print_name(srv_misc_tmpfile, trx, TRUE,
+ ut_print_name(srv_misc_tmpfile, trx,
foreign->foreign_table_name);
- fk_str = dict_print_info_on_foreign_key_in_create_format(
+ std::string fk_str = dict_print_info_on_foreign_key_in_create_format(
trx, foreign, FALSE);
fputs(fk_str.c_str(), srv_misc_tmpfile);
trx_set_detailed_error_from_file(trx, srv_misc_tmpfile);
@@ -765,9 +760,7 @@ row_ins_foreign_trx_print(
ulint n_trx_locks;
ulint heap_size;
- if (srv_read_only_mode) {
- return;
- }
+ ut_ad(!srv_read_only_mode);
lock_mutex_enter();
n_rec_locks = lock_number_of_rows_locked(&trx->lock);
@@ -775,7 +768,7 @@ row_ins_foreign_trx_print(
heap_size = mem_heap_get_size(trx->lock.lock_heap);
lock_mutex_exit();
- mutex_enter(&trx_sys->mutex);
+ trx_sys_mutex_enter();
mutex_enter(&dict_foreign_err_mutex);
rewind(dict_foreign_err_file);
@@ -785,7 +778,7 @@ row_ins_foreign_trx_print(
trx_print_low(dict_foreign_err_file, trx, 600,
n_rec_locks, n_trx_locks, heap_size);
- mutex_exit(&trx_sys->mutex);
+ trx_sys_mutex_exit();
ut_ad(mutex_own(&dict_foreign_err_mutex));
}
@@ -821,23 +814,22 @@ row_ins_foreign_report_err(
row_ins_foreign_trx_print(trx);
fputs("Foreign key constraint fails for table ", ef);
- ut_print_name(ef, trx, TRUE, foreign->foreign_table_name);
+ ut_print_name(ef, trx, foreign->foreign_table_name);
fputs(":\n", ef);
fk_str = dict_print_info_on_foreign_key_in_create_format(trx, foreign,
TRUE);
fputs(fk_str.c_str(), ef);
putc('\n', ef);
fputs(errstr, ef);
- fputs(" in parent table, in index ", ef);
- ut_print_name(ef, trx, FALSE, foreign->referenced_index->name);
+ fprintf(ef, " in parent table, in index %s",
+ foreign->referenced_index->name());
if (entry) {
fputs(" tuple:\n", ef);
dtuple_print(ef, entry);
}
fputs("\nBut in child table ", ef);
- ut_print_name(ef, trx, TRUE, foreign->foreign_table_name);
- fputs(", in index ", ef);
- ut_print_name(ef, trx, FALSE, foreign->foreign_index->name);
+ ut_print_name(ef, trx, foreign->foreign_table_name);
+ fprintf(ef, ", in index %s", foreign->foreign_index->name());
if (rec) {
fputs(", there is a record:\n", ef);
rec_print(ef, rec, foreign->foreign_index);
@@ -878,13 +870,17 @@ row_ins_foreign_report_add_err(
row_ins_foreign_trx_print(trx);
fputs("Foreign key constraint fails for table ", ef);
- ut_print_name(ef, trx, TRUE, foreign->foreign_table_name);
+ ut_print_name(ef, trx, foreign->foreign_table_name);
fputs(":\n", ef);
fk_str = dict_print_info_on_foreign_key_in_create_format(trx, foreign,
TRUE);
fputs(fk_str.c_str(), ef);
- fputs("\nTrying to add in child table, in index ", ef);
- ut_print_name(ef, trx, FALSE, foreign->foreign_index->name);
+ if (foreign->foreign_index) {
+ fprintf(ef, " in parent table, in index %s",
+ foreign->foreign_index->name());
+ } else {
+ fputs(" in parent table", ef);
+ }
if (entry) {
fputs(" tuple:\n", ef);
/* TODO: DB_TRX_ID and DB_ROLL_PTR may be uninitialized.
@@ -892,10 +888,10 @@ row_ins_foreign_report_add_err(
dtuple_print(ef, entry);
}
fputs("\nBut in parent table ", ef);
- ut_print_name(ef, trx, TRUE, foreign->referenced_table_name);
- fputs(", in index ", ef);
- ut_print_name(ef, trx, FALSE, foreign->referenced_index->name);
- fputs(",\nthe closest match we can find is record:\n", ef);
+ ut_print_name(ef, trx, foreign->referenced_table_name);
+ fprintf(ef, ", in index %s,\n"
+ "the closest match we can find is record:\n",
+ foreign->referenced_index->name());
if (rec && page_rec_is_supremum(rec)) {
/* If the cursor ended on a supremum record, it is better
to report the previous record in the error message, so that
@@ -922,19 +918,135 @@ row_ins_invalidate_query_cache(
const char* name) /*!< in: table name prefixed with
database name and a '/' character */
{
- char* buf;
- char* ptr;
ulint len = strlen(name) + 1;
+ innobase_invalidate_query_cache(thr_get_trx(thr), name, len);
+}
- buf = mem_strdupl(name, len);
- ptr = strchr(buf, '/');
- ut_a(ptr);
- *ptr = '\0';
+/** Fill virtual column information in cascade node for the child table.
+@param[out] cascade child update node
+@param[in] rec clustered rec of child table
+@param[in] index clustered index of child table
+@param[in] node parent update node
+@param[in] foreign foreign key information
+@param[out] err error code. */
+static
+void
+row_ins_foreign_fill_virtual(
+ upd_node_t* cascade,
+ const rec_t* rec,
+ dict_index_t* index,
+ upd_node_t* node,
+ dict_foreign_t* foreign,
+ dberr_t* err)
+{
+ THD* thd = current_thd;
+ row_ext_t* ext;
+ offset_t offsets_[REC_OFFS_NORMAL_SIZE];
+ rec_offs_init(offsets_);
+ const offset_t* offsets =
+ rec_get_offsets(rec, index, offsets_, true,
+ ULINT_UNDEFINED, &cascade->heap);
+ mem_heap_t* v_heap = NULL;
+ TABLE* mysql_table= NULL;
+ VCOL_STORAGE* vcol_storage= NULL;
+ byte* record;
+ upd_t* update = cascade->update;
+ ulint n_v_fld = index->table->n_v_def;
+ ulint n_diff;
+ upd_field_t* upd_field;
+ dict_vcol_set* v_cols = foreign->v_cols;
+ update->old_vrow = row_build(
+ ROW_COPY_DATA, index, rec,
+ offsets, index->table, NULL, NULL,
+ &ext, cascade->heap);
+ n_diff = update->n_fields;
+
+ update->n_fields += n_v_fld;
+
+ if (index->table->vc_templ == NULL) {
+ /** This can occur when there is a cascading
+ delete or update after restart. */
+ innobase_init_vc_templ(index->table);
+ }
+
+ if (innobase_allocate_row_for_vcol(thd, index, &v_heap,
+ &mysql_table,
+ &record, &vcol_storage)) {
+ if (v_heap) mem_heap_free(v_heap);
+ *err = DB_OUT_OF_MEMORY;
+ goto func_exit;
+ }
+
+ for (ulint i = 0; i < n_v_fld; i++) {
+
+ dict_v_col_t* col = dict_table_get_nth_v_col(
+ index->table, i);
+
+ dict_vcol_set::iterator it = v_cols->find(col);
+
+ if (it == v_cols->end()) {
+ continue;
+ }
+
+ dfield_t* vfield = innobase_get_computed_value(
+ update->old_vrow, col, index,
+ &v_heap, update->heap, NULL, thd, mysql_table,
+ record, NULL, NULL, NULL);
+
+ if (vfield == NULL) {
+ *err = DB_COMPUTE_VALUE_FAILED;
+ goto func_exit;
+ }
+
+ upd_field = upd_get_nth_field(update, n_diff);
+
+ upd_field->old_v_val = static_cast<dfield_t*>(
+ mem_heap_alloc(cascade->heap,
+ sizeof *upd_field->old_v_val));
+
+ dfield_copy(upd_field->old_v_val, vfield);
+
+ upd_field_set_v_field_no(upd_field, i, index);
+
+ if (node->is_delete
+ ? (foreign->type & DICT_FOREIGN_ON_DELETE_SET_NULL)
+ : (foreign->type & DICT_FOREIGN_ON_UPDATE_SET_NULL)) {
+
+ dfield_set_null(&upd_field->new_val);
+ }
- innobase_invalidate_query_cache(thr_get_trx(thr), buf, len);
- mem_free(buf);
+ if (!node->is_delete
+ && (foreign->type & DICT_FOREIGN_ON_UPDATE_CASCADE)) {
+
+ dfield_t* new_vfield = innobase_get_computed_value(
+ update->old_vrow, col, index,
+ &v_heap, update->heap, NULL, thd,
+ mysql_table, record, NULL,
+ node->update, foreign);
+
+ if (new_vfield == NULL) {
+ *err = DB_COMPUTE_VALUE_FAILED;
+ goto func_exit;
+ }
+
+ dfield_copy(&(upd_field->new_val), new_vfield);
+ }
+
+ n_diff++;
+ }
+
+ update->n_fields = n_diff;
+ *err = DB_SUCCESS;
+
+func_exit:
+ if (v_heap) {
+ if (vcol_storage)
+ innobase_free_row_for_vcol(vcol_storage);
+ mem_heap_free(v_heap);
+ }
}
+
#ifdef WITH_WSREP
dberr_t wsrep_append_foreign_key(trx_t *trx,
dict_foreign_t* foreign,
@@ -948,7 +1060,7 @@ dberr_t wsrep_append_foreign_key(trx_t *trx,
Perform referential actions or checks when a parent row is deleted or updated
and the constraint had an ON DELETE or ON UPDATE condition which was not
RESTRICT.
-@return DB_SUCCESS, DB_LOCK_WAIT, or error code */
+@return DB_SUCCESS, DB_LOCK_WAIT, or error code */
static MY_ATTRIBUTE((nonnull, warn_unused_result))
dberr_t
row_ins_foreign_check_on_constraint(
@@ -970,28 +1082,26 @@ row_ins_foreign_check_on_constraint(
dict_index_t* index;
dict_index_t* clust_index;
dtuple_t* ref;
- mem_heap_t* upd_vec_heap = NULL;
const rec_t* rec;
const rec_t* clust_rec;
const buf_block_t* clust_block;
upd_t* update;
- ulint n_to_update;
dberr_t err;
- ulint i;
trx_t* trx;
mem_heap_t* tmp_heap = NULL;
doc_id_t doc_id = FTS_NULL_DOC_ID;
- ibool fts_col_affacted = FALSE;
+
+ DBUG_ENTER("row_ins_foreign_check_on_constraint");
trx = thr_get_trx(thr);
/* Since we are going to delete or update a row, we have to invalidate
the MySQL query cache for table. A deadlock of threads is not possible
here because the caller of this function does not hold any latches with
- the sync0sync.h rank above the lock_sys_t::mutex. The query cache mutex
- has a rank just above the lock_sys_t::mutex. */
+ the mutex rank above the lock_sys_t::mutex. The query cache mutex
+ has a rank just above the lock_sys_t::mutex. */
- row_ins_invalidate_query_cache(thr, table->name);
+ row_ins_invalidate_query_cache(thr, table->name.m_name);
node = static_cast<upd_node_t*>(thr->run_node);
@@ -1003,7 +1113,7 @@ row_ins_foreign_check_on_constraint(
thr, foreign,
btr_pcur_get_rec(pcur), entry);
- return(DB_ROW_IS_REFERENCED);
+ DBUG_RETURN(DB_ROW_IS_REFERENCED);
}
if (!node->is_delete && 0 == (foreign->type
@@ -1016,37 +1126,21 @@ row_ins_foreign_check_on_constraint(
thr, foreign,
btr_pcur_get_rec(pcur), entry);
- return(DB_ROW_IS_REFERENCED);
+ DBUG_RETURN(DB_ROW_IS_REFERENCED);
}
if (node->cascade_node == NULL) {
- /* Extend our query graph by creating a child to current
- update node. The child is used in the cascade or set null
- operation. */
-
node->cascade_heap = mem_heap_create(128);
node->cascade_node = row_create_update_node_for_mysql(
table, node->cascade_heap);
que_node_set_parent(node->cascade_node, node);
- }
-
- /* Initialize cascade_node to do the operation we want. Note that we
- use the SAME cascade node to do all foreign key operations of the
- SQL DELETE: the table of the cascade node may change if there are
- several child tables to the table where the delete is done! */
+ }
cascade = node->cascade_node;
-
cascade->table = table;
-
cascade->foreign = foreign;
-
- if (node->is_delete
- && (foreign->type & DICT_FOREIGN_ON_DELETE_CASCADE)) {
- cascade->is_delete = TRUE;
- } else {
- cascade->is_delete = FALSE;
-
+ if (!(cascade->is_delete = node->is_delete
+ && (foreign->type & DICT_FOREIGN_ON_DELETE_CASCADE))) {
if (foreign->n_fields > cascade->update_n_fields) {
/* We have to make the update vector longer */
@@ -1054,35 +1148,36 @@ row_ins_foreign_check_on_constraint(
node->cascade_heap);
cascade->update_n_fields = foreign->n_fields;
}
- }
- /* We do not allow cyclic cascaded updating (DELETE is allowed,
- but not UPDATE) of the same table, as this can lead to an infinite
- cycle. Check that we are not updating the same table which is
- already being modified in this cascade chain. We have to check
- this also because the modification of the indexes of a 'parent'
- table may still be incomplete, and we must avoid seeing the indexes
- of the parent table in an inconsistent state! */
+ /* We do not allow cyclic cascaded updating (DELETE is
+ allowed, but not UPDATE) of the same table, as this
+ can lead to an infinite cycle. Check that we are not
+ updating the same table which is already being
+ modified in this cascade chain. We have to check this
+ also because the modification of the indexes of a
+ 'parent' table may still be incomplete, and we must
+ avoid seeing the indexes of the parent table in an
+ inconsistent state! */
- if (!cascade->is_delete
- && row_ins_cascade_ancestor_updates_table(cascade, table)) {
+ if (row_ins_cascade_ancestor_updates_table(cascade, table)) {
- /* We do not know if this would break foreign key
- constraints, but play safe and return an error */
+ /* We do not know if this would break foreign key
+ constraints, but play safe and return an error */
- err = DB_ROW_IS_REFERENCED;
+ err = DB_ROW_IS_REFERENCED;
- row_ins_foreign_report_err(
- "Trying an update, possibly causing a cyclic"
- " cascaded update\n"
- "in the child table,", thr, foreign,
- btr_pcur_get_rec(pcur), entry);
+ row_ins_foreign_report_err(
+ "Trying an update, possibly causing a cyclic"
+ " cascaded update\n"
+ "in the child table,", thr, foreign,
+ btr_pcur_get_rec(pcur), entry);
- goto nonstandard_exit_func;
+ goto nonstandard_exit_func;
+ }
}
- if (row_ins_cascade_n_ancestors(cascade) >= 15) {
- err = DB_ROW_IS_REFERENCED;
+ if (row_ins_cascade_n_ancestors(cascade) >= FK_MAX_CASCADE_DEL) {
+ err = DB_FOREIGN_EXCEED_MAX_CASCADE;
row_ins_foreign_report_err(
"Trying a too deep cascaded delete or update\n",
@@ -1125,12 +1220,11 @@ row_ins_foreign_check_on_constraint(
|| btr_pcur_get_low_match(cascade->pcur)
< dict_index_get_n_unique(clust_index)) {
- fputs("InnoDB: error in cascade of a foreign key op\n"
- "InnoDB: ", stderr);
- dict_index_name_print(stderr, trx, index);
+ ib::error() << "In cascade of a foreign key op index "
+ << index->name
+ << " of table " << index->table->name;
- fputs("\n"
- "InnoDB: record ", stderr);
+ fputs("InnoDB: record ", stderr);
rec_print(stderr, rec, index);
fputs("\n"
"InnoDB: clustered record ", stderr);
@@ -1165,6 +1259,9 @@ row_ins_foreign_check_on_constraint(
}
if (rec_get_deleted_flag(clust_rec, dict_table_is_comp(table))) {
+ /* In delete-marked records, DB_TRX_ID must
+ always refer to an existing undo log record. */
+ ut_ad(rec_get_trx_id(clust_rec, clust_index));
/* This can happen if there is a circular reference of
rows such that cascading delete comes to delete a row
already in the process of being delete marked */
@@ -1174,13 +1271,13 @@ row_ins_foreign_check_on_constraint(
}
if (table->fts) {
- doc_id = fts_get_doc_id_from_rec(table, clust_rec, tmp_heap);
+ doc_id = fts_get_doc_id_from_rec(table, clust_rec,
+ clust_index, tmp_heap);
}
if (node->is_delete
? (foreign->type & DICT_FOREIGN_ON_DELETE_SET_NULL)
: (foreign->type & DICT_FOREIGN_ON_UPDATE_SET_NULL)) {
-
/* Build the appropriate update vector which sets
foreign->n_fields first fields in rec to SQL NULL */
@@ -1191,39 +1288,66 @@ row_ins_foreign_check_on_constraint(
UNIV_MEM_INVALID(update->fields,
update->n_fields * sizeof *update->fields);
- for (i = 0; i < foreign->n_fields; i++) {
+ bool affects_fulltext = false;
+
+ for (ulint i = 0; i < foreign->n_fields; i++) {
upd_field_t* ufield = &update->fields[i];
+ ulint col_no = dict_index_get_nth_col_no(
+ index, i);
+ ulint prefix_col;
ufield->field_no = dict_table_get_nth_col_pos(
- table,
- dict_index_get_nth_col_no(index, i));
+ table, col_no, &prefix_col);
+ dict_col_t* col = dict_table_get_nth_col(
+ table, col_no);
+ dict_col_copy_type(col, dfield_get_type(&ufield->new_val));
+
ufield->orig_len = 0;
ufield->exp = NULL;
dfield_set_null(&ufield->new_val);
- if (table->fts && dict_table_is_fts_column(
- table->fts->indexes,
- dict_index_get_nth_col_no(index, i))
- != ULINT_UNDEFINED) {
- fts_col_affacted = TRUE;
+ if (!affects_fulltext
+ && table->fts && dict_table_is_fts_column(
+ table->fts->indexes,
+ dict_index_get_nth_col_no(index, i),
+ dict_col_is_virtual(
+ dict_index_get_nth_col(index, i)))
+ != ULINT_UNDEFINED) {
+ affects_fulltext = true;
}
}
- if (fts_col_affacted) {
+ if (affects_fulltext) {
fts_trx_add_op(trx, table, doc_id, FTS_DELETE, NULL);
}
+
+ if (foreign->v_cols != NULL
+ && foreign->v_cols->size() > 0) {
+ row_ins_foreign_fill_virtual(
+ cascade, clust_rec, clust_index,
+ node, foreign, &err);
+
+ if (err != DB_SUCCESS) {
+ goto nonstandard_exit_func;
+ }
+ }
} else if (table->fts && cascade->is_delete) {
/* DICT_FOREIGN_ON_DELETE_CASCADE case */
- for (i = 0; i < foreign->n_fields; i++) {
- if (table->fts && dict_table_is_fts_column(
+ bool affects_fulltext = false;
+
+ for (ulint i = 0; i < foreign->n_fields; i++) {
+ if (dict_table_is_fts_column(
table->fts->indexes,
- dict_index_get_nth_col_no(index, i))
- != ULINT_UNDEFINED) {
- fts_col_affacted = TRUE;
+ dict_index_get_nth_col_no(index, i),
+ dict_col_is_virtual(
+ dict_index_get_nth_col(index, i)))
+ != ULINT_UNDEFINED) {
+ affects_fulltext = true;
+ break;
}
}
- if (fts_col_affacted) {
+ if (affects_fulltext) {
fts_trx_add_op(trx, table, doc_id, FTS_DELETE, NULL);
}
}
@@ -1234,12 +1358,21 @@ row_ins_foreign_check_on_constraint(
/* Build the appropriate update vector which sets changing
foreign->n_fields first fields in rec to new values */
- upd_vec_heap = mem_heap_create(256);
+ bool affects_fulltext = row_ins_cascade_calc_update_vec(
+ node, foreign, tmp_heap, trx);
+
+ if (foreign->v_cols && !foreign->v_cols->empty()) {
+ row_ins_foreign_fill_virtual(
+ cascade, clust_rec, clust_index,
+ node, foreign, &err);
- n_to_update = row_ins_cascade_calc_update_vec(
- node, foreign, upd_vec_heap, trx, &fts_col_affacted);
+ if (err != DB_SUCCESS) {
+ goto nonstandard_exit_func;
+ }
+ }
- if (n_to_update == ULINT_UNDEFINED) {
+ switch (cascade->update->n_fields) {
+ case ULINT_UNDEFINED:
err = DB_ROW_IS_REFERENCED;
row_ins_foreign_report_err(
@@ -1252,10 +1385,7 @@ row_ins_foreign_check_on_constraint(
thr, foreign, btr_pcur_get_rec(pcur), entry);
goto nonstandard_exit_func;
- }
-
- if (cascade->update->n_fields == 0) {
-
+ case 0:
/* The update does not change any columns referred
to in this foreign key constraint: no need to do
anything */
@@ -1266,7 +1396,7 @@ row_ins_foreign_check_on_constraint(
}
/* Mark the old Doc ID as deleted */
- if (fts_col_affacted) {
+ if (affects_fulltext) {
ut_ad(table->fts);
fts_trx_add_op(trx, table, doc_id, FTS_DELETE, NULL);
}
@@ -1290,28 +1420,16 @@ row_ins_foreign_check_on_constraint(
cascade->state = UPD_NODE_UPDATE_CLUSTERED;
#ifdef WITH_WSREP
- err = wsrep_append_foreign_key(
- thr_get_trx(thr),
- foreign,
- cascade->pcur->old_rec,
- clust_index,
+ err = wsrep_append_foreign_key(trx, foreign, cascade->pcur->old_rec, clust_index,
FALSE, WSREP_KEY_EXCLUSIVE);
if (err != DB_SUCCESS) {
fprintf(stderr,
"WSREP: foreign key append failed: %d\n", err);
} else
#endif /* WITH_WSREP */
- err = row_update_cascade_for_mysql(thr, cascade,
+ err = row_update_cascade_for_mysql(thr, cascade,
foreign->foreign_table);
- if (foreign->foreign_table->n_foreign_key_checks_running == 0) {
- fprintf(stderr,
- "InnoDB: error: table %s has the counter 0"
- " though there is\n"
- "InnoDB: a FOREIGN KEY check running on it.\n",
- foreign->foreign_table->name);
- }
-
/* Release the data dictionary latch for a while, so that we do not
starve other threads from doing CREATE TABLE etc. if we have a huge
cascaded operation running. The counter n_foreign_key_checks_running
@@ -1334,21 +1452,14 @@ row_ins_foreign_check_on_constraint(
mem_heap_free(tmp_heap);
}
- if (upd_vec_heap) {
- mem_heap_free(upd_vec_heap);
- }
-
- return(err);
+ DBUG_RETURN(err);
nonstandard_exit_func:
+
if (tmp_heap) {
mem_heap_free(tmp_heap);
}
- if (upd_vec_heap) {
- mem_heap_free(upd_vec_heap);
- }
-
btr_pcur_store_position(pcur, mtr);
mtr_commit(mtr);
@@ -1356,13 +1467,13 @@ nonstandard_exit_func:
btr_pcur_restore_position(BTR_SEARCH_LEAF, pcur, mtr);
- return(err);
+ DBUG_RETURN(err);
}
/*********************************************************************//**
Sets a shared lock on a record. Used in locking possible duplicate key
records and also in checking foreign key constraints.
-@return DB_SUCCESS, DB_SUCCESS_LOCKED_REC, or error code */
+@return DB_SUCCESS, DB_SUCCESS_LOCKED_REC, or error code */
static
dberr_t
row_ins_set_shared_rec_lock(
@@ -1372,7 +1483,7 @@ row_ins_set_shared_rec_lock(
const buf_block_t* block, /*!< in: buffer block of rec */
const rec_t* rec, /*!< in: record */
dict_index_t* index, /*!< in: index */
- const ulint* offsets,/*!< in: rec_get_offsets(rec, index) */
+ const offset_t* offsets,/*!< in: rec_get_offsets(rec, index) */
que_thr_t* thr) /*!< in: query thread */
{
dberr_t err;
@@ -1393,7 +1504,7 @@ row_ins_set_shared_rec_lock(
/*********************************************************************//**
Sets a exclusive lock on a record. Used in locking possible duplicate key
records
-@return DB_SUCCESS, DB_SUCCESS_LOCKED_REC, or error code */
+@return DB_SUCCESS, DB_SUCCESS_LOCKED_REC, or error code */
static
dberr_t
row_ins_set_exclusive_rec_lock(
@@ -1403,7 +1514,7 @@ row_ins_set_exclusive_rec_lock(
const buf_block_t* block, /*!< in: buffer block of rec */
const rec_t* rec, /*!< in: record */
dict_index_t* index, /*!< in: index */
- const ulint* offsets,/*!< in: rec_get_offsets(rec, index) */
+ const offset_t* offsets,/*!< in: rec_get_offsets(rec, index) */
que_thr_t* thr) /*!< in: query thread */
{
dberr_t err;
@@ -1425,8 +1536,7 @@ row_ins_set_exclusive_rec_lock(
Checks if foreign key constraint fails for an index entry. Sets shared locks
which lock either the success or the failure of the constraint. NOTE that
the caller must have a shared latch on dict_operation_lock.
-@return DB_SUCCESS, DB_NO_REFERENCED_ROW, or DB_ROW_IS_REFERENCED */
-UNIV_INTERN
+@return DB_SUCCESS, DB_NO_REFERENCED_ROW, or DB_ROW_IS_REFERENCED */
dberr_t
row_ins_check_foreign_constraint(
/*=============================*/
@@ -1448,21 +1558,25 @@ row_ins_check_foreign_constraint(
ulint n_fields_cmp;
btr_pcur_t pcur;
int cmp;
- ulint i;
mtr_t mtr;
trx_t* trx = thr_get_trx(thr);
mem_heap_t* heap = NULL;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- ulint* offsets = offsets_;
+ offset_t offsets_[REC_OFFS_NORMAL_SIZE];
+ offset_t* offsets = offsets_;
+
+ bool skip_gap_lock;
+
+ skip_gap_lock = (trx->isolation_level <= TRX_ISO_READ_COMMITTED);
+
+ DBUG_ENTER("row_ins_check_foreign_constraint");
+
rec_offs_init(offsets_);
#ifdef WITH_WSREP
upd_node= NULL;
#endif /* WITH_WSREP */
-run_again:
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_SHARED));
-#endif /* UNIV_SYNC_DEBUG */
+
+ ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_S));
err = DB_SUCCESS;
@@ -1475,11 +1589,8 @@ run_again:
/* If any of the foreign key fields in entry is SQL NULL, we
suppress the foreign key check: this is compatible with Oracle,
for example */
-
- for (i = 0; i < foreign->n_fields; i++) {
- if (UNIV_SQL_NULL == dfield_get_len(
- dtuple_get_nth_field(entry, i))) {
-
+ for (ulint i = 0; i < foreign->n_fields; i++) {
+ if (dfield_is_null(dtuple_get_nth_field(entry, i))) {
goto exit_func;
}
}
@@ -1517,39 +1628,55 @@ run_again:
}
if (check_table == NULL
- || check_table->file_unreadable
- || check_index == NULL) {
-
- if (!srv_read_only_mode && check_ref) {
- FILE* ef = dict_foreign_err_file;
- std::string fk_str;
-
- row_ins_set_detailed(trx, foreign);
-
- row_ins_foreign_trx_print(trx);
-
- fputs("Foreign key constraint fails for table ", ef);
- ut_print_name(ef, trx, TRUE,
- foreign->foreign_table_name);
- fputs(":\n", ef);
- fk_str = dict_print_info_on_foreign_key_in_create_format(
- trx, foreign, TRUE);
- fputs(fk_str.c_str(), ef);
- fputs("\nTrying to add to index ", ef);
- ut_print_name(ef, trx, FALSE,
- foreign->foreign_index->name);
- fputs(" tuple:\n", ef);
+ || !check_table->is_readable()
+ || check_index == NULL
+ || fil_space_get(check_table->space)->is_being_truncated) {
+
+ FILE* ef = dict_foreign_err_file;
+ std::string fk_str;
+
+ row_ins_set_detailed(trx, foreign);
+ row_ins_foreign_trx_print(trx);
+
+ fputs("Foreign key constraint fails for table ", ef);
+ ut_print_name(ef, trx, check_ref
+ ? foreign->foreign_table_name
+ : foreign->referenced_table_name);
+ fputs(":\n", ef);
+ fk_str = dict_print_info_on_foreign_key_in_create_format(
+ trx, foreign, TRUE);
+ fputs(fk_str.c_str(), ef);
+ if (check_ref) {
+ if (foreign->foreign_index) {
+ fprintf(ef, "\nTrying to add to index %s"
+ " tuple:\n",
+ foreign->foreign_index->name());
+ } else {
+ fputs("\nTrying to add tuple:\n", ef);
+ }
dtuple_print(ef, entry);
fputs("\nBut the parent table ", ef);
- ut_print_name(ef, trx, TRUE,
- foreign->referenced_table_name);
- fputs("\nor its .ibd file does"
+ ut_print_name(ef, trx, foreign->referenced_table_name);
+ fputs("\nor its .ibd file or the required index does"
" not currently exist!\n", ef);
- mutex_exit(&dict_foreign_err_mutex);
-
err = DB_NO_REFERENCED_ROW;
+ } else {
+ if (foreign->referenced_index) {
+ fprintf(ef, "\nTrying to modify index %s"
+ " tuple:\n",
+ foreign->referenced_index->name());
+ } else {
+ fputs("\nTrying to modify tuple:\n", ef);
+ }
+ dtuple_print(ef, entry);
+ fputs("\nBut the referencing table ", ef);
+ ut_print_name(ef, trx, foreign->foreign_table_name);
+ fputs("\nor its .ibd file or the required index does"
+ " not currently exist!\n", ef);
+ err = DB_ROW_IS_REFERENCED;
}
+ mutex_exit(&dict_foreign_err_mutex);
goto exit_func;
}
@@ -1587,11 +1714,16 @@ run_again:
continue;
}
- offsets = rec_get_offsets(rec, check_index,
- offsets, ULINT_UNDEFINED, &heap);
+ offsets = rec_get_offsets(rec, check_index, offsets, true,
+ ULINT_UNDEFINED, &heap);
if (page_rec_is_supremum(rec)) {
+ if (skip_gap_lock) {
+
+ continue;
+ }
+
err = row_ins_set_shared_rec_lock(LOCK_ORDINARY, block,
rec, check_index,
offsets, thr);
@@ -1609,8 +1741,16 @@ run_again:
if (cmp == 0) {
if (rec_get_deleted_flag(rec,
rec_offs_comp(offsets))) {
+ /* In delete-marked records, DB_TRX_ID must
+ always refer to an existing undo log record. */
+ ut_ad(!dict_index_is_clust(check_index)
+ || row_get_rec_trx_id(rec, check_index,
+ offsets));
+
err = row_ins_set_shared_rec_lock(
- LOCK_ORDINARY, block,
+ skip_gap_lock
+ ? LOCK_REC_NOT_GAP
+ : LOCK_ORDINARY, block,
rec, check_index, offsets, thr);
switch (err) {
case DB_SUCCESS_LOCKED_REC:
@@ -1715,19 +1855,21 @@ run_again:
} else {
ut_a(cmp < 0);
- err = row_ins_set_shared_rec_lock(
- LOCK_GAP, block,
- rec, check_index, offsets, thr);
+ err = skip_gap_lock
+ ? DB_SUCCESS
+ : row_ins_set_shared_rec_lock(
+ LOCK_GAP, block,
+ rec, check_index, offsets, thr);
switch (err) {
case DB_SUCCESS_LOCKED_REC:
+ err = DB_SUCCESS;
+ /* fall through */
case DB_SUCCESS:
if (check_ref) {
err = DB_NO_REFERENCED_ROW;
row_ins_foreign_report_add_err(
trx, foreign, rec, entry);
- } else {
- err = DB_SUCCESS;
}
default:
break;
@@ -1755,55 +1897,38 @@ end_scan:
do_possible_lock_wait:
if (err == DB_LOCK_WAIT) {
- bool verified = false;
-
trx->error_state = err;
que_thr_stop_for_mysql(thr);
+ thr->lock_state = QUE_THR_LOCK_ROW;
+
+ /* To avoid check_table being dropped, increment counter */
+ my_atomic_addlint(
+ &check_table->n_foreign_key_checks_running, 1);
+
lock_wait_suspend_thread(thr);
- if (check_table->to_be_dropped) {
- /* The table is being dropped. We shall timeout
- this operation */
- err = DB_LOCK_WAIT_TIMEOUT;
- goto exit_func;
- }
+ thr->lock_state = QUE_THR_LOCK_NOLOCK;
- /* We had temporarily released dict_operation_lock in
- above lock sleep wait, now we have the lock again, and
- we will need to re-check whether the foreign key has been
- dropped. We only need to verify if the table is referenced
- table case (check_ref == 0), since MDL lock will prevent
- concurrent DDL and DML on the same table */
- if (!check_ref) {
- for (dict_foreign_set::iterator it
- = table->referenced_set.begin();
- it != table->referenced_set.end();
- ++it) {
- if (*it == foreign) {
- verified = true;
- break;
- }
- }
+ err = trx->error_state;
+ if (err != DB_SUCCESS) {
+ } else if (check_table->to_be_dropped) {
+ err = DB_LOCK_WAIT_TIMEOUT;
} else {
- verified = true;
+ err = DB_LOCK_WAIT;
}
- if (!verified) {
- err = DB_DICT_CHANGED;
- } else if (trx->error_state == DB_SUCCESS) {
- goto run_again;
- } else {
- err = trx->error_state;
- }
+ my_atomic_addlint(&check_table->n_foreign_key_checks_running,
+ -1);
}
exit_func:
- if (UNIV_LIKELY_NULL(heap)) {
+ if (heap != NULL) {
mem_heap_free(heap);
}
- return(err);
+
+ DBUG_RETURN(err);
}
/***************************************************************//**
@@ -1812,13 +1937,14 @@ is not mentioned in any constraint, this function does nothing,
Otherwise does searches to the indexes of referenced tables and
sets shared locks which lock either the success or the failure of
a constraint.
-@return DB_SUCCESS or error code */
+@return DB_SUCCESS or error code */
static MY_ATTRIBUTE((nonnull, warn_unused_result))
dberr_t
row_ins_check_foreign_constraints(
/*==============================*/
dict_table_t* table, /*!< in: table */
dict_index_t* index, /*!< in: index */
+ bool pk, /*!< in: index->is_primary() */
dtuple_t* entry, /*!< in: index entry for index */
que_thr_t* thr) /*!< in: query thread */
{
@@ -1827,6 +1953,8 @@ row_ins_check_foreign_constraints(
trx_t* trx;
ibool got_s_lock = FALSE;
+ DBUG_ASSERT(index->is_primary() == pk);
+
trx = thr_get_trx(thr);
DEBUG_SYNC_C_IF_THD(thr_get_trx(thr)->mysql_thd,
@@ -1838,9 +1966,9 @@ row_ins_check_foreign_constraints(
foreign = *it;
- if (foreign->foreign_index == index) {
+ if (foreign->foreign_index == index
+ || (pk && !foreign->foreign_index)) {
dict_table_t* ref_table = NULL;
- dict_table_t* foreign_table = foreign->foreign_table;
dict_table_t* referenced_table
= foreign->referenced_table;
@@ -1858,9 +1986,9 @@ row_ins_check_foreign_constraints(
}
if (referenced_table) {
- os_inc_counter(dict_sys->mutex,
- foreign_table
- ->n_foreign_key_checks_running);
+ my_atomic_addlint(
+ &foreign->foreign_table
+ ->n_foreign_key_checks_running, 1);
}
/* NOTE that if the thread ends up waiting for a lock
@@ -1871,13 +1999,10 @@ row_ins_check_foreign_constraints(
err = row_ins_check_foreign_constraint(
TRUE, foreign, table, entry, thr);
- DBUG_EXECUTE_IF("row_ins_dict_change_err",
- err = DB_DICT_CHANGED;);
-
if (referenced_table) {
- os_dec_counter(dict_sys->mutex,
- foreign_table
- ->n_foreign_key_checks_running);
+ my_atomic_addlint(
+ &foreign->foreign_table
+ ->n_foreign_key_checks_running, -1);
}
if (got_s_lock) {
@@ -1901,7 +2026,7 @@ row_ins_check_foreign_constraints(
/***************************************************************//**
Checks if a unique key violation to rec would occur at the index entry
insert.
-@return TRUE if error */
+@return TRUE if error */
static
ibool
row_ins_dupl_error_with_rec(
@@ -1911,10 +2036,9 @@ row_ins_dupl_error_with_rec(
the record! */
const dtuple_t* entry, /*!< in: entry to insert */
dict_index_t* index, /*!< in: index */
- const ulint* offsets)/*!< in: rec_get_offsets(rec, index) */
+ const offset_t* offsets)/*!< in: rec_get_offsets(rec, index) */
{
ulint matched_fields;
- ulint matched_bytes;
ulint n_unique;
ulint i;
@@ -1923,10 +2047,8 @@ row_ins_dupl_error_with_rec(
n_unique = dict_index_get_n_unique(index);
matched_fields = 0;
- matched_bytes = 0;
- cmp_dtuple_rec_with_match(entry, rec, offsets,
- &matched_fields, &matched_bytes);
+ cmp_dtuple_rec_with_match(entry, rec, offsets, &matched_fields);
if (matched_fields < n_unique) {
@@ -1936,7 +2058,7 @@ row_ins_dupl_error_with_rec(
/* In a unique secondary index we allow equal key values if they
contain SQL NULLs */
- if (!dict_index_is_clust(index)) {
+ if (!dict_index_is_clust(index) && !index->nulls_equal) {
for (i = 0; i < n_unique; i++) {
if (dfield_is_null(dtuple_get_nth_field(entry, i))) {
@@ -1953,7 +2075,7 @@ row_ins_dupl_error_with_rec(
Scans a unique non-clustered index at a given index entry to determine
whether a uniqueness violation has occurred for the key value of the entry.
Set shared locks on possible duplicate records.
-@return DB_SUCCESS, DB_DUPLICATE_KEY, or DB_LOCK_WAIT */
+@return DB_SUCCESS, DB_DUPLICATE_KEY, or DB_LOCK_WAIT */
static MY_ATTRIBUTE((nonnull, warn_unused_result))
dberr_t
row_ins_scan_sec_index_for_duplicate(
@@ -1973,11 +2095,14 @@ row_ins_scan_sec_index_for_duplicate(
btr_pcur_t pcur;
dberr_t err = DB_SUCCESS;
ulint allow_duplicates;
- ulint* offsets = NULL;
+ offset_t offsets_[REC_OFFS_SEC_INDEX_SIZE];
+ offset_t* offsets = offsets_;
+ DBUG_ENTER("row_ins_scan_sec_index_for_duplicate");
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(s_latch == rw_lock_own(&index->lock, RW_LOCK_SHARED));
-#endif /* UNIV_SYNC_DEBUG */
+ rec_offs_init(offsets_);
+
+ ut_ad(s_latch == rw_lock_own_flagged(
+ &index->lock, RW_LOCK_FLAG_S | RW_LOCK_FLAG_SX));
n_unique = dict_index_get_n_unique(index);
@@ -1985,11 +2110,13 @@ row_ins_scan_sec_index_for_duplicate(
n_unique first fields is NULL, a unique key violation cannot occur,
since we define NULL != NULL in this case */
- for (ulint i = 0; i < n_unique; i++) {
- if (UNIV_SQL_NULL == dfield_get_len(
- dtuple_get_nth_field(entry, i))) {
+ if (!index->nulls_equal) {
+ for (ulint i = 0; i < n_unique; i++) {
+ if (UNIV_SQL_NULL == dfield_get_len(
+ dtuple_get_nth_field(entry, i))) {
- return(DB_SUCCESS);
+ DBUG_RETURN(DB_SUCCESS);
+ }
}
}
@@ -2001,7 +2128,7 @@ row_ins_scan_sec_index_for_duplicate(
btr_pcur_open(index, entry, PAGE_CUR_GE,
s_latch
- ? BTR_SEARCH_LEAF | BTR_ALREADY_S_LATCHED
+ ? BTR_SEARCH_LEAF_ALREADY_S_LATCHED
: BTR_SEARCH_LEAF,
&pcur, mtr);
@@ -2019,7 +2146,7 @@ row_ins_scan_sec_index_for_duplicate(
continue;
}
- offsets = rec_get_offsets(rec, index, offsets,
+ offsets = rec_get_offsets(rec, index, offsets, true,
ULINT_UNDEFINED, &offsets_heap);
if (flags & BTR_NO_LOCKING_FLAG) {
@@ -2065,15 +2192,14 @@ row_ins_scan_sec_index_for_duplicate(
/* If the duplicate is on hidden FTS_DOC_ID,
state so in the error log */
- if (DICT_TF2_FLAG_IS_SET(
+ if (index == index->table->fts_doc_id_index
+ && DICT_TF2_FLAG_IS_SET(
index->table,
- DICT_TF2_FTS_HAS_DOC_ID)
- && strcmp(index->name,
- FTS_DOC_ID_INDEX_NAME) == 0) {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Duplicate FTS_DOC_ID value"
- " on table %s",
- index->table->name);
+ DICT_TF2_FTS_HAS_DOC_ID)) {
+
+ ib::error() << "Duplicate FTS_DOC_ID"
+ " value on table "
+ << index->table->name;
}
goto end_scan;
@@ -2088,14 +2214,14 @@ end_scan:
/* Restore old value */
dtuple_set_n_fields_cmp(entry, n_fields_cmp);
- return(err);
+ DBUG_RETURN(err);
}
/** Checks for a duplicate when the table is being rebuilt online.
-@retval DB_SUCCESS when no duplicate is detected
-@retval DB_SUCCESS_LOCKED_REC when rec is an exact match of entry or
+@retval DB_SUCCESS when no duplicate is detected
+@retval DB_SUCCESS_LOCKED_REC when rec is an exact match of entry or
a newer version of entry (the entry should not be inserted)
-@retval DB_DUPLICATE_KEY when entry is a duplicate of rec */
+@retval DB_DUPLICATE_KEY when entry is a duplicate of rec */
static MY_ATTRIBUTE((nonnull, warn_unused_result))
dberr_t
row_ins_duplicate_online(
@@ -2103,10 +2229,9 @@ row_ins_duplicate_online(
ulint n_uniq, /*!< in: offset of DB_TRX_ID */
const dtuple_t* entry, /*!< in: entry that is being inserted */
const rec_t* rec, /*!< in: clustered index record */
- ulint* offsets)/*!< in/out: rec_get_offsets(rec) */
+ offset_t* offsets)/*!< in/out: rec_get_offsets(rec) */
{
ulint fields = 0;
- ulint bytes = 0;
/* During rebuild, there should not be any delete-marked rows
in the new table. */
@@ -2116,7 +2241,7 @@ row_ins_duplicate_online(
/* Compare the PRIMARY KEY fields and the
DB_TRX_ID, DB_ROLL_PTR. */
cmp_dtuple_rec_with_match_low(
- entry, rec, offsets, n_uniq + 2, &fields, &bytes);
+ entry, rec, offsets, n_uniq + 2, &fields);
if (fields < n_uniq) {
/* Not a duplicate. */
@@ -2125,7 +2250,6 @@ row_ins_duplicate_online(
if (fields == n_uniq + 2) {
/* rec is an exact match of entry. */
- ut_ad(bytes == 0);
return(DB_SUCCESS_LOCKED_REC);
}
@@ -2133,10 +2257,10 @@ row_ins_duplicate_online(
}
/** Checks for a duplicate when the table is being rebuilt online.
-@retval DB_SUCCESS when no duplicate is detected
-@retval DB_SUCCESS_LOCKED_REC when rec is an exact match of entry or
+@retval DB_SUCCESS when no duplicate is detected
+@retval DB_SUCCESS_LOCKED_REC when rec is an exact match of entry or
a newer version of entry (the entry should not be inserted)
-@retval DB_DUPLICATE_KEY when entry is a duplicate of rec */
+@retval DB_DUPLICATE_KEY when entry is a duplicate of rec */
static MY_ATTRIBUTE((nonnull, warn_unused_result))
dberr_t
row_ins_duplicate_error_in_clust_online(
@@ -2144,14 +2268,14 @@ row_ins_duplicate_error_in_clust_online(
ulint n_uniq, /*!< in: offset of DB_TRX_ID */
const dtuple_t* entry, /*!< in: entry that is being inserted */
const btr_cur_t*cursor, /*!< in: cursor on insert position */
- ulint** offsets,/*!< in/out: rec_get_offsets(rec) */
+ offset_t** offsets,/*!< in/out: rec_get_offsets(rec) */
mem_heap_t** heap) /*!< in/out: heap for offsets */
{
dberr_t err = DB_SUCCESS;
const rec_t* rec = btr_cur_get_rec(cursor);
if (cursor->low_match >= n_uniq && !page_rec_is_infimum(rec)) {
- *offsets = rec_get_offsets(rec, cursor->index, *offsets,
+ *offsets = rec_get_offsets(rec, cursor->index, *offsets, true,
ULINT_UNDEFINED, heap);
err = row_ins_duplicate_online(n_uniq, entry, rec, *offsets);
if (err != DB_SUCCESS) {
@@ -2162,7 +2286,7 @@ row_ins_duplicate_error_in_clust_online(
rec = page_rec_get_next_const(btr_cur_get_rec(cursor));
if (cursor->up_match >= n_uniq && !page_rec_is_supremum(rec)) {
- *offsets = rec_get_offsets(rec, cursor->index, *offsets,
+ *offsets = rec_get_offsets(rec, cursor->index, *offsets, true,
ULINT_UNDEFINED, heap);
err = row_ins_duplicate_online(n_uniq, entry, rec, *offsets);
}
@@ -2181,22 +2305,20 @@ record */
static MY_ATTRIBUTE((nonnull, warn_unused_result))
dberr_t
row_ins_duplicate_error_in_clust(
+ ulint flags, /*!< in: undo logging and locking flags */
btr_cur_t* cursor, /*!< in: B-tree cursor */
const dtuple_t* entry, /*!< in: entry to insert */
- que_thr_t* thr, /*!< in: query thread */
- mtr_t* mtr) /*!< in: mtr */
+ que_thr_t* thr) /*!< in: query thread */
{
dberr_t err;
rec_t* rec;
ulint n_unique;
trx_t* trx = thr_get_trx(thr);
mem_heap_t*heap = NULL;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- ulint* offsets = offsets_;
+ offset_t offsets_[REC_OFFS_NORMAL_SIZE];
+ offset_t* offsets = offsets_;
rec_offs_init(offsets_);
- UT_NOT_USED(mtr);
-
ut_ad(dict_index_is_clust(cursor->index));
/* NOTE: For unique non-clustered indexes there may be any number
@@ -2220,6 +2342,7 @@ row_ins_duplicate_error_in_clust(
if (!page_rec_is_infimum(rec)) {
offsets = rec_get_offsets(rec, cursor->index, offsets,
+ true,
ULINT_UNDEFINED, &heap);
/* We set a lock on the possible duplicate: this
@@ -2227,7 +2350,10 @@ row_ins_duplicate_error_in_clust(
sure that in roll-forward we get the same duplicate
errors as in original execution */
- if (trx->duplicates) {
+ if (flags & BTR_NO_LOCKING_FLAG) {
+ /* Do nothing if no-locking is set */
+ err = DB_SUCCESS;
+ } else if (trx->duplicates) {
/* If the SQL-query will update or replace
duplicate key we will take X-lock for
@@ -2270,6 +2396,7 @@ duplicate:
if (!page_rec_is_supremum(rec)) {
offsets = rec_get_offsets(rec, cursor->index, offsets,
+ true,
ULINT_UNDEFINED, &heap);
if (trx->duplicates) {
@@ -2346,6 +2473,66 @@ row_ins_must_modify_rec(
&& !page_rec_is_infimum(btr_cur_get_rec(cursor)));
}
+/** Insert the externally stored fields (off-page columns)
+of a clustered index entry.
+@param[in] entry index entry to insert
+@param[in] big_rec externally stored fields
+@param[in,out] offsets rec_get_offsets()
+@param[in,out] heap memory heap
+@param[in] thd client connection, or NULL
+@param[in] index clustered index
+@return error code
+@retval DB_SUCCESS
+@retval DB_OUT_OF_FILE_SPACE */
+static
+dberr_t
+row_ins_index_entry_big_rec(
+ const dtuple_t* entry,
+ const big_rec_t* big_rec,
+ offset_t* offsets,
+ mem_heap_t** heap,
+ dict_index_t* index,
+ const void* thd __attribute__((unused)))
+{
+ mtr_t mtr;
+ btr_pcur_t pcur;
+ rec_t* rec;
+ dberr_t error;
+
+ ut_ad(dict_index_is_clust(index));
+
+ DEBUG_SYNC_C_IF_THD(thd, "before_row_ins_extern_latch");
+
+ mtr.start();
+ if (index->table->is_temporary()) {
+ mtr.set_log_mode(MTR_LOG_NO_REDO);
+ } else {
+ mtr.set_named_space(index->space);
+ }
+
+ btr_pcur_open(index, entry, PAGE_CUR_LE, BTR_MODIFY_TREE,
+ &pcur, &mtr);
+ rec = btr_pcur_get_rec(&pcur);
+ offsets = rec_get_offsets(rec, index, offsets, true,
+ ULINT_UNDEFINED, heap);
+
+ DEBUG_SYNC_C_IF_THD(thd, "before_row_ins_extern");
+ error = btr_store_big_rec_extern_fields(
+ &pcur, offsets, big_rec, &mtr, BTR_STORE_INSERT);
+ DEBUG_SYNC_C_IF_THD(thd, "after_row_ins_extern");
+
+ if (error == DB_SUCCESS
+ && dict_index_is_online_ddl(index)) {
+ row_log_table_insert(btr_pcur_get_rec(&pcur), index, offsets);
+ }
+
+ mtr.commit();
+
+ btr_pcur_close(&pcur);
+
+ return(error);
+}
+
/***************************************************************//**
Tries to insert an entry into a clustered index, ignoring foreign key
constraints. If a record with the same unique key is found, the other
@@ -2357,7 +2544,6 @@ the delete marked record.
@retval DB_LOCK_WAIT on lock wait when !(flags & BTR_NO_LOCKING_FLAG)
@retval DB_FAIL if retry with BTR_MODIFY_TREE is needed
@return error code */
-UNIV_INTERN
dberr_t
row_ins_clust_index_entry_low(
/*==========================*/
@@ -2371,54 +2557,89 @@ row_ins_clust_index_entry_low(
ulint n_ext, /*!< in: number of externally stored columns */
que_thr_t* thr) /*!< in: query thread */
{
- btr_cur_t cursor;
- ulint* offsets = NULL;
+ btr_pcur_t pcur;
+ btr_cur_t* cursor;
dberr_t err = DB_SUCCESS;
big_rec_t* big_rec = NULL;
mtr_t mtr;
+ ib_uint64_t auto_inc = 0;
mem_heap_t* offsets_heap = NULL;
+ offset_t offsets_[REC_OFFS_NORMAL_SIZE];
+ offset_t* offsets = offsets_;
+ rec_offs_init(offsets_);
+
+ DBUG_ENTER("row_ins_clust_index_entry_low");
ut_ad(dict_index_is_clust(index));
ut_ad(!dict_index_is_unique(index)
|| n_uniq == dict_index_get_n_unique(index));
ut_ad(!n_uniq || n_uniq == dict_index_get_n_unique(index));
+ ut_ad(!thr_get_trx(thr)->in_rollback);
mtr_start(&mtr);
- if (mode == BTR_MODIFY_LEAF && dict_index_is_online_ddl(index)) {
- mode = BTR_MODIFY_LEAF | BTR_ALREADY_S_LATCHED;
- mtr_s_lock(dict_index_get_lock(index), &mtr);
- }
+ if (dict_table_is_temporary(index->table)) {
+ /* Disable REDO logging as the lifetime of temp-tables is
+ limited to server or connection lifetime and so REDO
+ information is not needed on restart for recovery.
+ Disable locking as temp-tables are local to a connection. */
- cursor.thr = thr;
+ ut_ad(flags & BTR_NO_LOCKING_FLAG);
+ ut_ad(!dict_index_is_online_ddl(index));
+ ut_ad(!index->table->persistent_autoinc);
+ mtr.set_log_mode(MTR_LOG_NO_REDO);
+ } else {
+ mtr.set_named_space(index->space);
+
+ if (mode == BTR_MODIFY_LEAF
+ && dict_index_is_online_ddl(index)) {
+ mode = BTR_MODIFY_LEAF_ALREADY_S_LATCHED;
+ mtr_s_lock(dict_index_get_lock(index), &mtr);
+ }
+
+ if (unsigned ai = index->table->persistent_autoinc) {
+ /* Prepare to persist the AUTO_INCREMENT value
+ from the index entry to PAGE_ROOT_AUTO_INC. */
+ const dfield_t* dfield = dtuple_get_nth_field(
+ entry, ai - 1);
+ auto_inc = dfield_is_null(dfield)
+ ? 0
+ : row_parse_int(static_cast<const byte*>(
+ dfield->data),
+ dfield->len,
+ dfield->type.mtype,
+ dfield->type.prtype
+ & DATA_UNSIGNED);
+ }
+ }
/* Note that we use PAGE_CUR_LE as the search mode, because then
the function will return in both low_match and up_match of the
cursor sensible values */
-
- err = btr_cur_search_to_nth_level(index, 0, entry, PAGE_CUR_LE, mode,
- &cursor, 0, __FILE__, __LINE__, &mtr);
-
+ err = btr_pcur_open_low(index, 0, entry, PAGE_CUR_LE, mode, &pcur,
+ __FILE__, __LINE__, auto_inc, &mtr);
if (err != DB_SUCCESS) {
index->table->file_unreadable = true;
- mtr_commit(&mtr);
+ mtr.commit();
goto func_exit;
}
+ cursor = btr_pcur_get_btr_cur(&pcur);
+ cursor->thr = thr;
+
#ifdef UNIV_DEBUG
{
- page_t* page = btr_cur_get_page(&cursor);
+ page_t* page = btr_cur_get_page(cursor);
rec_t* first_rec = page_rec_get_next(
page_get_infimum_rec(page));
ut_ad(page_rec_is_supremum(first_rec)
- || rec_get_n_fields(first_rec, index)
- == dtuple_get_n_fields(entry));
+ || rec_n_fields_is_sane(index, first_rec, entry));
}
-#endif
+#endif /* UNIV_DEBUG */
- if (n_uniq && (cursor.up_match >= n_uniq
- || cursor.low_match >= n_uniq)) {
+ if (n_uniq
+ && (cursor->up_match >= n_uniq || cursor->low_match >= n_uniq)) {
if (flags
== (BTR_CREATE_FLAG | BTR_NO_LOCKING_FLAG
@@ -2426,7 +2647,7 @@ row_ins_clust_index_entry_low(
/* Set no locks when applying log
in online table rebuild. Only check for duplicates. */
err = row_ins_duplicate_error_in_clust_online(
- n_uniq, entry, &cursor,
+ n_uniq, entry, cursor,
&offsets, &offsets_heap);
switch (err) {
@@ -2437,14 +2658,14 @@ row_ins_clust_index_entry_low(
/* fall through */
case DB_SUCCESS_LOCKED_REC:
case DB_DUPLICATE_KEY:
- thr_get_trx(thr)->error_info = cursor.index;
+ thr_get_trx(thr)->error_info = cursor->index;
}
} else {
/* Note that the following may return also
DB_LOCK_WAIT */
err = row_ins_duplicate_error_in_clust(
- &cursor, entry, thr, &mtr);
+ flags, cursor, entry, thr);
}
if (err != DB_SUCCESS) {
@@ -2454,77 +2675,21 @@ err_exit:
}
}
- if (row_ins_must_modify_rec(&cursor)) {
+ /* Note: Allowing duplicates would qualify for modification of
+ an existing record as the new entry is exactly same as old entry. */
+ if (row_ins_must_modify_rec(cursor)) {
/* There is already an index entry with a long enough common
prefix, we must convert the insert into a modify of an
existing record */
mem_heap_t* entry_heap = mem_heap_create(1024);
err = row_ins_clust_index_entry_by_modify(
- flags, mode, &cursor, &offsets, &offsets_heap,
- entry_heap, &big_rec, entry, thr, &mtr);
-
- rec_t* rec = btr_cur_get_rec(&cursor);
-
- if (big_rec) {
- ut_a(err == DB_SUCCESS);
- /* Write out the externally stored
- columns while still x-latching
- index->lock and block->lock. Allocate
- pages for big_rec in the mtr that
- modified the B-tree, but be sure to skip
- any pages that were freed in mtr. We will
- write out the big_rec pages before
- committing the B-tree mini-transaction. If
- the system crashes so that crash recovery
- will not replay the mtr_commit(&mtr), the
- big_rec pages will be left orphaned until
- the pages are allocated for something else.
-
- TODO: If the allocation extends the
- tablespace, it will not be redo
- logged, in either mini-transaction.
- Tablespace extension should be
- redo-logged in the big_rec
- mini-transaction, so that recovery
- will not fail when the big_rec was
- written to the extended portion of the
- file, in case the file was somehow
- truncated in the crash. */
-
- DEBUG_SYNC_C_IF_THD(
- thr_get_trx(thr)->mysql_thd,
- "before_row_ins_upd_extern");
- err = btr_store_big_rec_extern_fields(
- index, btr_cur_get_block(&cursor),
- rec, offsets, big_rec, &mtr,
- BTR_STORE_INSERT_UPDATE);
- DEBUG_SYNC_C_IF_THD(
- thr_get_trx(thr)->mysql_thd,
- "after_row_ins_upd_extern");
- /* If writing big_rec fails (for
- example, because of DB_OUT_OF_FILE_SPACE),
- the record will be corrupted. Even if
- we did not update any externally
- stored columns, our update could cause
- the record to grow so that a
- non-updated column was selected for
- external storage. This non-update
- would not have been written to the
- undo log, and thus the record cannot
- be rolled back.
-
- However, because we have not executed
- mtr_commit(mtr) yet, the update will
- not be replayed in crash recovery, and
- the following assertion failure will
- effectively "roll back" the operation. */
- ut_a(err == DB_SUCCESS);
- dtuple_big_rec_free(big_rec);
- }
+ &pcur, flags, mode, &offsets, &offsets_heap,
+ entry_heap, entry, thr, &mtr);
if (err == DB_SUCCESS && dict_index_is_online_ddl(index)) {
- row_log_table_insert(rec, index, offsets);
+ row_log_table_insert(btr_cur_get_rec(cursor),
+ index, offsets);
}
mtr_commit(&mtr);
@@ -2536,7 +2701,7 @@ err_exit:
ut_ad((mode & ~BTR_ALREADY_S_LATCHED)
== BTR_MODIFY_LEAF);
err = btr_cur_optimistic_insert(
- flags, &cursor, &offsets, &offsets_heap,
+ flags, cursor, &offsets, &offsets_heap,
entry, &insert_rec, &big_rec,
n_ext, thr, &mtr);
} else {
@@ -2546,22 +2711,24 @@ err_exit:
goto err_exit;
}
+ DEBUG_SYNC_C("before_insert_pessimitic_row_ins_clust");
+
err = btr_cur_optimistic_insert(
- flags, &cursor,
+ flags, cursor,
&offsets, &offsets_heap,
entry, &insert_rec, &big_rec,
n_ext, thr, &mtr);
if (err == DB_FAIL) {
err = btr_cur_pessimistic_insert(
- flags, &cursor,
+ flags, cursor,
&offsets, &offsets_heap,
entry, &insert_rec, &big_rec,
n_ext, thr, &mtr);
}
}
- if (UNIV_LIKELY_NULL(big_rec)) {
+ if (big_rec != NULL) {
mtr_commit(&mtr);
/* Online table rebuild could read (and
@@ -2571,12 +2738,10 @@ err_exit:
DBUG_EXECUTE_IF(
"row_ins_extern_checkpoint",
- log_make_checkpoint_at(
- LSN_MAX, TRUE););
+ log_write_up_to(mtr.commit_lsn(), true););
err = row_ins_index_entry_big_rec(
entry, big_rec, offsets, &offsets_heap, index,
- thr_get_trx(thr)->mysql_thd,
- __FILE__, __LINE__);
+ thr_get_trx(thr)->mysql_thd);
dtuple_convert_back_big_rec(index, entry, big_rec);
} else {
if (err == DB_SUCCESS
@@ -2590,29 +2755,37 @@ err_exit:
}
func_exit:
- if (offsets_heap) {
+ if (offsets_heap != NULL) {
mem_heap_free(offsets_heap);
}
- return(err);
+ btr_pcur_close(&pcur);
+
+ DBUG_RETURN(err);
}
-/***************************************************************//**
-Starts a mini-transaction and checks if the index will be dropped.
+/** Start a mini-transaction and check if the index will be dropped.
+@param[in,out] mtr mini-transaction
+@param[in,out] index secondary index
+@param[in] check whether to check
+@param[in] search_mode flags
@return true if the index is to be dropped */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
+static MY_ATTRIBUTE((warn_unused_result))
bool
row_ins_sec_mtr_start_and_check_if_aborted(
-/*=======================================*/
- mtr_t* mtr, /*!< out: mini-transaction */
- dict_index_t* index, /*!< in/out: secondary index */
- bool check, /*!< in: whether to check */
+ mtr_t* mtr,
+ dict_index_t* index,
+ bool check,
ulint search_mode)
- /*!< in: flags */
{
ut_ad(!dict_index_is_clust(index));
+ ut_ad(mtr->is_named_space(index->space));
+
+ const mtr_log_t log_mode = mtr->get_log_mode();
mtr_start(mtr);
+ mtr->set_named_space(index->space);
+ mtr->set_log_mode(log_mode);
if (!check) {
return(false);
@@ -2621,13 +2794,13 @@ row_ins_sec_mtr_start_and_check_if_aborted(
if (search_mode & BTR_ALREADY_S_LATCHED) {
mtr_s_lock(dict_index_get_lock(index), mtr);
} else {
- mtr_x_lock(dict_index_get_lock(index), mtr);
+ mtr_sx_lock(dict_index_get_lock(index), mtr);
}
switch (index->online_status) {
case ONLINE_INDEX_ABORTED:
case ONLINE_INDEX_ABORTED_DROPPED:
- ut_ad(*index->name == TEMP_INDEX_PREFIX);
+ ut_ad(!index->is_committed());
return(true);
case ONLINE_INDEX_COMPLETE:
return(false);
@@ -2647,7 +2820,6 @@ It is then unmarked. Otherwise, the entry is just inserted to the index.
@retval DB_LOCK_WAIT on lock wait when !(flags & BTR_NO_LOCKING_FLAG)
@retval DB_FAIL if retry with BTR_MODIFY_TREE is needed
@return error code */
-UNIV_INTERN
dberr_t
row_ins_sec_index_entry_low(
/*========================*/
@@ -2664,19 +2836,38 @@ row_ins_sec_index_entry_low(
row_log_table_apply(), or 0 */
que_thr_t* thr) /*!< in: query thread */
{
+ DBUG_ENTER("row_ins_sec_index_entry_low");
+
btr_cur_t cursor;
- ulint search_mode = mode | BTR_INSERT;
+ ulint search_mode = mode;
dberr_t err = DB_SUCCESS;
ulint n_unique;
mtr_t mtr;
- ulint* offsets = NULL;
+ offset_t offsets_[REC_OFFS_NORMAL_SIZE];
+ offset_t* offsets = offsets_;
+ rec_offs_init(offsets_);
+ rtr_info_t rtr_info;
ut_ad(!dict_index_is_clust(index));
ut_ad(mode == BTR_MODIFY_LEAF || mode == BTR_MODIFY_TREE);
cursor.thr = thr;
- ut_ad(thr_get_trx(thr)->id);
- mtr_start(&mtr);
+ cursor.rtr_info = NULL;
+ ut_ad(thr_get_trx(thr)->id != 0);
+
+ mtr.start();
+
+ if (index->table->is_temporary()) {
+ /* Disable locking, because temporary tables are never
+ shared between transactions or connections. */
+ ut_ad(flags & BTR_NO_LOCKING_FLAG);
+ mtr.set_log_mode(MTR_LOG_NO_REDO);
+ } else {
+ mtr.set_named_space(index->space);
+ if (!dict_index_is_spatial(index)) {
+ search_mode |= BTR_INSERT;
+ }
+ }
/* Ensure that we acquire index->lock when inserting into an
index with index->online_status == ONLINE_INDEX_COMPLETE, but
@@ -2684,14 +2875,14 @@ row_ins_sec_index_entry_low(
This prevents a concurrent change of index->online_status.
The memory object cannot be freed as long as we have an open
reference to the table, or index->table->n_ref_count > 0. */
- const bool check = *index->name == TEMP_INDEX_PREFIX;
+ const bool check = !index->is_committed();
if (check) {
DEBUG_SYNC_C("row_ins_sec_index_enter");
if (mode == BTR_MODIFY_LEAF) {
search_mode |= BTR_ALREADY_S_LATCHED;
mtr_s_lock(dict_index_get_lock(index), &mtr);
} else {
- mtr_x_lock(dict_index_get_lock(index), &mtr);
+ mtr_sx_lock(dict_index_get_lock(index), &mtr);
}
if (row_log_online_op_try(
@@ -2708,9 +2899,43 @@ row_ins_sec_index_entry_low(
search_mode |= BTR_IGNORE_SEC_UNIQUE;
}
- err = btr_cur_search_to_nth_level(index, 0, entry, PAGE_CUR_LE,
- search_mode,
- &cursor, 0, __FILE__, __LINE__, &mtr);
+ if (dict_index_is_spatial(index)) {
+ cursor.index = index;
+ rtr_init_rtr_info(&rtr_info, false, &cursor, index, false);
+ rtr_info_update_btr(&cursor, &rtr_info);
+
+ err = btr_cur_search_to_nth_level(
+ index, 0, entry, PAGE_CUR_RTREE_INSERT,
+ search_mode,
+ &cursor, 0, __FILE__, __LINE__, &mtr);
+
+ if (mode == BTR_MODIFY_LEAF && rtr_info.mbr_adj) {
+ mtr_commit(&mtr);
+ rtr_clean_rtr_info(&rtr_info, true);
+ rtr_init_rtr_info(&rtr_info, false, &cursor,
+ index, false);
+ rtr_info_update_btr(&cursor, &rtr_info);
+ mtr_start(&mtr);
+ mtr.set_named_space(index->space);
+ search_mode &= ulint(~BTR_MODIFY_LEAF);
+ search_mode |= BTR_MODIFY_TREE;
+ err = btr_cur_search_to_nth_level(
+ index, 0, entry, PAGE_CUR_RTREE_INSERT,
+ search_mode,
+ &cursor, 0, __FILE__, __LINE__, &mtr);
+ mode = BTR_MODIFY_TREE;
+ }
+
+ DBUG_EXECUTE_IF(
+ "rtree_test_check_count", {
+ goto func_exit;});
+
+ } else {
+ err = btr_cur_search_to_nth_level(
+ index, 0, entry, PAGE_CUR_LE,
+ search_mode,
+ &cursor, 0, __FILE__, __LINE__, &mtr);
+ }
if (err != DB_SUCCESS) {
if (err == DB_DECRYPTION_FAILED) {
@@ -2719,13 +2944,14 @@ row_ins_sec_index_entry_low(
"Table %s is encrypted but encryption service or"
" used key_id is not available. "
" Can't continue reading table.",
- index->table->name);
+ index->table->name.m_name);
index->table->file_unreadable = true;
}
goto func_exit;
}
if (cursor.flag == BTR_CUR_INSERT_TO_IBUF) {
+ ut_ad(!dict_index_is_spatial(index));
/* The insert was buffered during the search: we are done */
goto func_exit;
}
@@ -2737,10 +2963,9 @@ row_ins_sec_index_entry_low(
page_get_infimum_rec(page));
ut_ad(page_rec_is_supremum(first_rec)
- || rec_get_n_fields(first_rec, index)
- == dtuple_get_n_fields(entry));
+ || rec_n_fields_is_sane(index, first_rec, entry));
}
-#endif
+#endif /* UNIV_DEBUG */
n_unique = dict_index_get_n_unique(index);
@@ -2764,12 +2989,11 @@ row_ins_sec_index_entry_low(
case DB_SUCCESS:
break;
case DB_DUPLICATE_KEY:
- if (*index->name == TEMP_INDEX_PREFIX) {
+ if (!index->is_committed()) {
ut_ad(!thr_get_trx(thr)
->dict_operation_lock_mode);
mutex_enter(&dict_sys->mutex);
- dict_set_corrupted_index_cache_only(
- index, index->table);
+ dict_set_corrupted_index_cache_only(index);
mutex_exit(&dict_sys->mutex);
/* Do not return any error to the
caller. The duplicate will be reported
@@ -2782,7 +3006,10 @@ row_ins_sec_index_entry_low(
}
/* fall through */
default:
- return(err);
+ if (dict_index_is_spatial(index)) {
+ rtr_clean_rtr_info(&rtr_info, true);
+ }
+ DBUG_RETURN(err);
}
if (row_ins_sec_mtr_start_and_check_if_aborted(
@@ -2797,10 +3024,10 @@ row_ins_sec_index_entry_low(
prevent any insertion of a duplicate by another
transaction. Let us now reposition the cursor and
continue the insertion. */
-
btr_cur_search_to_nth_level(
index, 0, entry, PAGE_CUR_LE,
- search_mode & ~(BTR_INSERT | BTR_IGNORE_SEC_UNIQUE),
+ (search_mode
+ & ~(BTR_INSERT | BTR_IGNORE_SEC_UNIQUE)),
&cursor, 0, __FILE__, __LINE__, &mtr);
}
@@ -2809,12 +3036,17 @@ row_ins_sec_index_entry_low(
prefix, we must convert the insert into a modify of an
existing record */
offsets = rec_get_offsets(
- btr_cur_get_rec(&cursor), index, offsets,
+ btr_cur_get_rec(&cursor), index, offsets, true,
ULINT_UNDEFINED, &offsets_heap);
err = row_ins_sec_index_entry_by_modify(
flags, mode, &cursor, &offsets,
offsets_heap, heap, entry, thr, &mtr);
+
+ if (err == DB_SUCCESS && dict_index_is_spatial(index)
+ && rtr_info.mbr_adj) {
+ err = rtr_ins_enlarge_mbr(&cursor, thr, &mtr);
+ }
} else {
rec_t* insert_rec;
big_rec_t* big_rec;
@@ -2824,6 +3056,11 @@ row_ins_sec_index_entry_low(
flags, &cursor, &offsets, &offsets_heap,
entry, &insert_rec,
&big_rec, 0, thr, &mtr);
+ if (err == DB_SUCCESS
+ && dict_index_is_spatial(index)
+ && rtr_info.mbr_adj) {
+ err = rtr_ins_enlarge_mbr(&cursor, thr, &mtr);
+ }
} else {
ut_ad(mode == BTR_MODIFY_TREE);
if (buf_LRU_buf_pool_running_out()) {
@@ -2844,6 +3081,11 @@ row_ins_sec_index_entry_low(
entry, &insert_rec,
&big_rec, 0, thr, &mtr);
}
+ if (err == DB_SUCCESS
+ && dict_index_is_spatial(index)
+ && rtr_info.mbr_adj) {
+ err = rtr_ins_enlarge_mbr(&cursor, thr, &mtr);
+ }
}
if (err == DB_SUCCESS && trx_id) {
@@ -2857,60 +3099,12 @@ row_ins_sec_index_entry_low(
}
func_exit:
- mtr_commit(&mtr);
- return(err);
-}
-
-/***************************************************************//**
-Tries to insert the externally stored fields (off-page columns)
-of a clustered index entry.
-@return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
-UNIV_INTERN
-dberr_t
-row_ins_index_entry_big_rec_func(
-/*=============================*/
- const dtuple_t* entry, /*!< in/out: index entry to insert */
- const big_rec_t* big_rec,/*!< in: externally stored fields */
- ulint* offsets,/*!< in/out: rec offsets */
- mem_heap_t** heap, /*!< in/out: memory heap */
- dict_index_t* index, /*!< in: index */
- const char* file, /*!< in: file name of caller */
-#ifndef DBUG_OFF
- const void* thd, /*!< in: connection, or NULL */
-#endif /* DBUG_OFF */
- ulint line) /*!< in: line number of caller */
-{
- mtr_t mtr;
- btr_cur_t cursor;
- rec_t* rec;
- dberr_t error;
-
- ut_ad(dict_index_is_clust(index));
-
- DEBUG_SYNC_C_IF_THD(thd, "before_row_ins_extern_latch");
-
- mtr_start(&mtr);
- btr_cur_search_to_nth_level(index, 0, entry, PAGE_CUR_LE,
- BTR_MODIFY_TREE, &cursor, 0,
- file, line, &mtr);
- rec = btr_cur_get_rec(&cursor);
- offsets = rec_get_offsets(rec, index, offsets,
- ULINT_UNDEFINED, heap);
-
- DEBUG_SYNC_C_IF_THD(thd, "before_row_ins_extern");
- error = btr_store_big_rec_extern_fields(
- index, btr_cur_get_block(&cursor),
- rec, offsets, big_rec, &mtr, BTR_STORE_INSERT);
- DEBUG_SYNC_C_IF_THD(thd, "after_row_ins_extern");
-
- if (error == DB_SUCCESS
- && dict_index_is_online_ddl(index)) {
- row_log_table_insert(rec, index, offsets);
+ if (dict_index_is_spatial(index)) {
+ rtr_clean_rtr_info(&rtr_info, true);
}
mtr_commit(&mtr);
-
- return(error);
+ DBUG_RETURN(err);
}
/***************************************************************//**
@@ -2918,8 +3112,7 @@ Inserts an entry into a clustered index. Tries first optimistic,
then pessimistic descent down the tree. If the entry matches enough
to a delete marked record, performs the insert by updating or delete
unmarking the delete marked record.
-@return DB_SUCCESS, DB_LOCK_WAIT, DB_DUPLICATE_KEY, or some other error code */
-UNIV_INTERN
+@return DB_SUCCESS, DB_LOCK_WAIT, DB_DUPLICATE_KEY, or some other error code */
dberr_t
row_ins_clust_index_entry(
/*======================*/
@@ -2931,44 +3124,54 @@ row_ins_clust_index_entry(
dberr_t err;
ulint n_uniq;
+ DBUG_ENTER("row_ins_clust_index_entry");
+
if (!index->table->foreign_set.empty()) {
err = row_ins_check_foreign_constraints(
- index->table, index, entry, thr);
+ index->table, index, true, entry, thr);
if (err != DB_SUCCESS) {
- return(err);
+ DBUG_RETURN(err);
}
}
n_uniq = dict_index_is_unique(index) ? index->n_uniq : 0;
- /* Try first optimistic descent to the B-tree */
+ ulint flags = dict_table_is_temporary(index->table)
+ ? BTR_NO_LOCKING_FLAG
+ : 0;
+
+ /* For intermediate table during copy alter table,
+ skip the undo log and record lock checking for
+ insertion operation.
+ */
+ if (index->table->skip_alter_undo) {
+ flags |= BTR_NO_UNDO_LOG_FLAG | BTR_NO_LOCKING_FLAG;
+ }
+ /* Try first optimistic descent to the B-tree */
log_free_check();
err = row_ins_clust_index_entry_low(
- 0, BTR_MODIFY_LEAF, index, n_uniq, entry, n_ext, thr);
+ flags, BTR_MODIFY_LEAF, index, n_uniq, entry,
+ n_ext, thr);
-#ifdef UNIV_DEBUG
- /* Work around Bug#14626800 ASSERTION FAILURE IN DEBUG_SYNC().
- Once it is fixed, remove the 'ifdef', 'if' and this comment. */
- if (!thr_get_trx(thr)->ddl) {
- DEBUG_SYNC_C_IF_THD(thr_get_trx(thr)->mysql_thd,
- "after_row_ins_clust_index_entry_leaf");
- }
-#endif /* UNIV_DEBUG */
+ DEBUG_SYNC_C_IF_THD(thr_get_trx(thr)->mysql_thd,
+ "after_row_ins_clust_index_entry_leaf");
if (err != DB_FAIL) {
DEBUG_SYNC_C("row_ins_clust_index_entry_leaf_after");
- return(err);
+ DBUG_RETURN(err);
}
/* Try then pessimistic descent to the B-tree */
-
log_free_check();
- return(row_ins_clust_index_entry_low(
- 0, BTR_MODIFY_TREE, index, n_uniq, entry, n_ext, thr));
+ err = row_ins_clust_index_entry_low(
+ flags, BTR_MODIFY_TREE, index, n_uniq, entry,
+ n_ext, thr);
+
+ DBUG_RETURN(err);
}
/***************************************************************//**
@@ -2976,8 +3179,7 @@ Inserts an entry into a secondary index. Tries first optimistic,
then pessimistic descent down the tree. If the entry matches enough
to a delete marked record, performs the insert by updating or delete
unmarking the delete marked record.
-@return DB_SUCCESS, DB_LOCK_WAIT, DB_DUPLICATE_KEY, or some other error code */
-UNIV_INTERN
+@return DB_SUCCESS, DB_LOCK_WAIT, DB_DUPLICATE_KEY, or some other error code */
dberr_t
row_ins_sec_index_entry(
/*====================*/
@@ -2988,21 +3190,22 @@ row_ins_sec_index_entry(
dberr_t err;
mem_heap_t* offsets_heap;
mem_heap_t* heap;
+ trx_id_t trx_id = 0;
DBUG_EXECUTE_IF("row_ins_sec_index_entry_timeout", {
- DBUG_SET("-d,row_ins_sec_index_entry_timeout");
- return(DB_LOCK_WAIT);});
+ DBUG_SET("-d,row_ins_sec_index_entry_timeout");
+ return(DB_LOCK_WAIT);});
if (!index->table->foreign_set.empty()) {
err = row_ins_check_foreign_constraints(index->table, index,
- entry, thr);
+ false, entry, thr);
if (err != DB_SUCCESS) {
return(err);
}
}
- ut_ad(thr_get_trx(thr)->id);
+ ut_ad(thr_get_trx(thr)->id != 0);
offsets_heap = mem_heap_create(1024);
heap = mem_heap_create(1024);
@@ -3010,23 +3213,35 @@ row_ins_sec_index_entry(
/* Try first optimistic descent to the B-tree */
log_free_check();
+ ulint flags = dict_table_is_temporary(index->table)
+ ? BTR_NO_LOCKING_FLAG
+ : 0;
+
+ /* For intermediate table during copy alter table,
+ skip the undo log and record lock checking for
+ insertion operation.
+ */
+ if (index->table->skip_alter_undo) {
+ trx_id = thr_get_trx(thr)->id;
+ flags |= BTR_NO_UNDO_LOG_FLAG | BTR_NO_LOCKING_FLAG;
+ }
err = row_ins_sec_index_entry_low(
- 0, BTR_MODIFY_LEAF, index, offsets_heap, heap, entry, 0, thr);
+ flags, BTR_MODIFY_LEAF, index, offsets_heap, heap, entry,
+ trx_id, thr);
if (err == DB_FAIL) {
mem_heap_empty(heap);
if (index->space == IBUF_SPACE_ID
- && !dict_index_is_unique(index)) {
+ && !(index->type & (DICT_UNIQUE | DICT_SPATIAL))) {
ibuf_free_excess_pages();
}
/* Try then pessimistic descent to the B-tree */
-
log_free_check();
err = row_ins_sec_index_entry_low(
- 0, BTR_MODIFY_TREE, index,
+ flags, BTR_MODIFY_TREE, index,
offsets_heap, heap, entry, 0, thr);
}
@@ -3040,7 +3255,7 @@ Inserts an index entry to index. Tries first optimistic, then pessimistic
descent down the tree. If the entry matches enough to a delete marked record,
performs the insert by updating or delete unmarking the delete marked
record.
-@return DB_SUCCESS, DB_LOCK_WAIT, DB_DUPLICATE_KEY, or some other error code */
+@return DB_SUCCESS, DB_LOCK_WAIT, DB_DUPLICATE_KEY, or some other error code */
static
dberr_t
row_ins_index_entry(
@@ -3049,6 +3264,8 @@ row_ins_index_entry(
dtuple_t* entry, /*!< in/out: index entry to insert */
que_thr_t* thr) /*!< in: query thread */
{
+ ut_ad(thr_get_trx(thr)->id != 0);
+
DBUG_EXECUTE_IF("row_ins_index_entry_timeout", {
DBUG_SET("-d,row_ins_index_entry_timeout");
return(DB_LOCK_WAIT);});
@@ -3060,35 +3277,88 @@ row_ins_index_entry(
}
}
-/***********************************************************//**
-Sets the values of the dtuple fields in entry from the values of appropriate
-columns in row. */
-static MY_ATTRIBUTE((nonnull))
+
+/*****************************************************************//**
+This function generate MBR (Minimum Bounding Box) for spatial objects
+and set it to spatial index field. */
+static
void
+row_ins_spatial_index_entry_set_mbr_field(
+/*======================================*/
+ dfield_t* field, /*!< in/out: mbr field */
+ const dfield_t* row_field) /*!< in: row field */
+{
+ ulint dlen = 0;
+ double mbr[SPDIMS * 2];
+
+ /* This must be a GEOMETRY datatype */
+ ut_ad(DATA_GEOMETRY_MTYPE(field->type.mtype));
+
+ const byte* dptr = static_cast<const byte*>(
+ dfield_get_data(row_field));
+ dlen = dfield_get_len(row_field);
+
+ /* obtain the MBR */
+ rtree_mbr_from_wkb(dptr + GEO_DATA_HEADER_SIZE,
+ static_cast<uint>(dlen - GEO_DATA_HEADER_SIZE),
+ SPDIMS, mbr);
+
+ /* Set mbr as index entry data */
+ dfield_write_mbr(field, mbr);
+}
+
+/** Sets the values of the dtuple fields in entry from the values of appropriate
+columns in row.
+@param[in] index index handler
+@param[out] entry index entry to make
+@param[in] row row
+@return DB_SUCCESS if the set is successful */
+static
+dberr_t
row_ins_index_entry_set_vals(
-/*=========================*/
- dict_index_t* index, /*!< in: index */
- dtuple_t* entry, /*!< in: index entry to make */
- const dtuple_t* row) /*!< in: row */
+ const dict_index_t* index,
+ dtuple_t* entry,
+ const dtuple_t* row)
{
ulint n_fields;
ulint i;
+ ulint num_v = dtuple_get_n_v_fields(entry);
n_fields = dtuple_get_n_fields(entry);
- for (i = 0; i < n_fields; i++) {
- dict_field_t* ind_field;
+ for (i = 0; i < n_fields + num_v; i++) {
+ dict_field_t* ind_field = NULL;
dfield_t* field;
const dfield_t* row_field;
ulint len;
+ dict_col_t* col;
+
+ if (i >= n_fields) {
+ /* This is virtual field */
+ field = dtuple_get_nth_v_field(entry, i - n_fields);
+ col = &dict_table_get_nth_v_col(
+ index->table, i - n_fields)->m_col;
+ } else {
+ field = dtuple_get_nth_field(entry, i);
+ ind_field = dict_index_get_nth_field(index, i);
+ col = ind_field->col;
+ }
+
+ if (dict_col_is_virtual(col)) {
+ const dict_v_col_t* v_col
+ = reinterpret_cast<const dict_v_col_t*>(col);
+ ut_ad(dtuple_get_n_fields(row)
+ == dict_table_get_n_cols(index->table));
+ row_field = dtuple_get_nth_v_field(row, v_col->v_pos);
+ } else {
+ row_field = dtuple_get_nth_field(
+ row, ind_field->col->ind);
+ }
- field = dtuple_get_nth_field(entry, i);
- ind_field = dict_index_get_nth_field(index, i);
- row_field = dtuple_get_nth_field(row, ind_field->col->ind);
len = dfield_get_len(row_field);
/* Check column prefix indexes */
- if (ind_field->prefix_len > 0
+ if (ind_field != NULL && ind_field->prefix_len > 0
&& dfield_get_len(row_field) != UNIV_SQL_NULL) {
const dict_col_t* col
@@ -3104,12 +3374,26 @@ row_ins_index_entry_set_vals(
ut_ad(!dfield_is_ext(row_field));
}
+ /* Handle spatial index. For the first field, replace
+ the data with its MBR (Minimum Bounding Box). */
+ if ((i == 0) && dict_index_is_spatial(index)) {
+ if (!row_field->data
+ || row_field->len < GEO_DATA_HEADER_SIZE) {
+ return(DB_CANT_CREATE_GEOMETRY_OBJECT);
+ }
+ row_ins_spatial_index_entry_set_mbr_field(
+ field, row_field);
+ continue;
+ }
+
dfield_set_data(field, dfield_get_data(row_field), len);
if (dfield_is_ext(row_field)) {
ut_ad(dict_index_is_clust(index));
dfield_set_ext(field);
}
}
+
+ return(DB_SUCCESS);
}
/***********************************************************//**
@@ -3125,24 +3409,25 @@ row_ins_index_entry_step(
{
dberr_t err;
+ DBUG_ENTER("row_ins_index_entry_step");
+
ut_ad(dtuple_check_typed(node->row));
- row_ins_index_entry_set_vals(node->index, node->entry, node->row);
+ err = row_ins_index_entry_set_vals(node->index, *node->entry,
+ node->row);
- ut_ad(dtuple_check_typed(node->entry));
+ if (err != DB_SUCCESS) {
+ DBUG_RETURN(err);
+ }
- err = row_ins_index_entry(node->index, node->entry, thr);
+ ut_ad(dtuple_check_typed(*node->entry));
-#ifdef UNIV_DEBUG
- /* Work around Bug#14626800 ASSERTION FAILURE IN DEBUG_SYNC().
- Once it is fixed, remove the 'ifdef', 'if' and this comment. */
- if (!thr_get_trx(thr)->ddl) {
- DEBUG_SYNC_C_IF_THD(thr_get_trx(thr)->mysql_thd,
- "after_row_ins_index_entry_step");
- }
-#endif /* UNIV_DEBUG */
+ err = row_ins_index_entry(node->index, *node->entry, thr);
- return(err);
+ DEBUG_SYNC_C_IF_THD(thr_get_trx(thr)->mysql_thd,
+ "after_row_ins_index_entry_step");
+
+ DBUG_RETURN(err);
}
/***********************************************************//**
@@ -3168,7 +3453,7 @@ row_ins_alloc_row_id_step(
row_id = dict_sys_get_new_row_id();
- dict_sys_write_row_id(node->row_id_buf, row_id);
+ dict_sys_write_row_id(node->sys_buf, row_id);
}
/***********************************************************//**
@@ -3246,14 +3531,17 @@ row_ins(
ins_node_t* node, /*!< in: row insert node */
que_thr_t* thr) /*!< in: query thread */
{
- dberr_t err;
+ DBUG_ENTER("row_ins");
+
+ DBUG_PRINT("row_ins", ("table: %s", node->table->name.m_name));
if (node->state == INS_NODE_ALLOC_ROW_ID) {
row_ins_alloc_row_id_step(node);
node->index = dict_table_get_first_index(node->table);
- node->entry = UT_LIST_GET_FIRST(node->entry_list);
+ ut_ad(node->entry_list.empty() == false);
+ node->entry = node->entry_list.begin();
if (node->ins_type == INS_SEARCHED) {
@@ -3271,41 +3559,34 @@ row_ins(
while (node->index != NULL) {
if (node->index->type != DICT_FTS) {
- err = row_ins_index_entry_step(node, thr);
+ dberr_t err = row_ins_index_entry_step(node, thr);
if (err != DB_SUCCESS) {
-
- return(err);
+ DBUG_RETURN(err);
}
}
node->index = dict_table_get_next_index(node->index);
- node->entry = UT_LIST_GET_NEXT(tuple_list, node->entry);
-
- DBUG_EXECUTE_IF(
- "row_ins_skip_sec",
- node->index = NULL; node->entry = NULL; break;);
+ ++node->entry;
/* Skip corrupted secondary index and its entry */
- while (node->index && dict_index_is_corrupted(node->index)) {
-
+ while (node->index && node->index->is_corrupted()) {
node->index = dict_table_get_next_index(node->index);
- node->entry = UT_LIST_GET_NEXT(tuple_list, node->entry);
+ ++node->entry;
}
}
- ut_ad(node->entry == NULL);
+ ut_ad(node->entry == node->entry_list.end());
node->state = INS_NODE_ALLOC_ROW_ID;
- return(DB_SUCCESS);
+ DBUG_RETURN(DB_SUCCESS);
}
/***********************************************************//**
Inserts a row to a table. This is a high-level function used in SQL execution
graphs.
-@return query thread to run next or NULL */
-UNIV_INTERN
+@return query thread to run next or NULL */
que_thr_t*
row_ins_step(
/*=========*/
@@ -3319,9 +3600,11 @@ row_ins_step(
ut_ad(thr);
+ DEBUG_SYNC_C("innodb_row_ins_step_enter");
+
trx = thr_get_trx(thr);
- trx_start_if_not_started_xa(trx);
+ trx_start_if_not_started_xa(trx, true);
node = static_cast<ins_node_t*>(thr->run_node);
@@ -3342,9 +3625,9 @@ row_ins_step(
This happens, for example, when a row update moves it to another
partition. In that case, we have already set the IX lock on the
table during the search operation, and there is no need to set
- it again here. But we must write trx->id to node->trx_id_buf. */
+ it again here. But we must write trx->id to node->sys_buf. */
- trx_write_trx_id(node->trx_id_buf, trx->id);
+ trx_write_trx_id(&node->sys_buf[DATA_ROW_ID_LEN], trx->id);
if (node->state == INS_NODE_SET_IX_LOCK) {
diff --git a/storage/innobase/row/row0log.cc b/storage/innobase/row/row0log.cc
index 4d2ee83024a..87eb9a7a8ff 100644
--- a/storage/innobase/row/row0log.cc
+++ b/storage/innobase/row/row0log.cc
@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 2011, 2018, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2017, 2018, MariaDB Corporation.
+Copyright (c) 2017, 2020, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -25,21 +25,21 @@ Created 2011-05-26 Marko Makela
*******************************************************/
#include "row0log.h"
-
-#ifdef UNIV_NONINL
-#include "row0log.ic"
-#endif
-
#include "row0row.h"
#include "row0ins.h"
#include "row0upd.h"
#include "row0merge.h"
#include "row0ext.h"
+#include "log0crypt.h"
#include "data0data.h"
#include "que0que.h"
+#include "srv0mon.h"
#include "handler0alter.h"
+#include "ut0stage.h"
+#include "trx0rec.h"
-#include<map>
+#include <algorithm>
+#include <map>
ulint onlineddl_rowlog_rows;
ulint onlineddl_rowlog_pct_used;
@@ -64,22 +64,15 @@ enum row_op {
ROW_OP_DELETE
};
-#ifdef UNIV_DEBUG
-/** Write information about the applied record to the error log */
-# define ROW_LOG_APPLY_PRINT
-#endif /* UNIV_DEBUG */
-
-#ifdef ROW_LOG_APPLY_PRINT
-/** When set, write information about the applied record to the error log */
-static bool row_log_apply_print;
-#endif /* ROW_LOG_APPLY_PRINT */
-
/** Size of the modification log entry header, in bytes */
#define ROW_LOG_HEADER_SIZE 2/*op, extra_size*/
/** Log block for modifications during online ALTER TABLE */
struct row_log_buf_t {
byte* block; /*!< file block buffer */
+ ut_new_pfx_t block_pfx; /*!< opaque descriptor of "block". Set
+ by ut_allocator::allocate_large() and fed to
+ ut_allocator::deallocate_large(). */
mrec_buf_t buf; /*!< buffer for accessing a record
that spans two blocks */
ulint blocks; /*!< current position in blocks */
@@ -88,14 +81,13 @@ struct row_log_buf_t {
the start of the row_log_table log;
0 for row_log_online_op() and
row_log_apply(). */
- ulint size; /*!< allocated size of block */
};
/** Tracks BLOB allocation during online ALTER TABLE */
class row_log_table_blob_t {
public:
/** Constructor (declaring a BLOB freed)
- @param offset_arg row_log_t::tail::total */
+ @param offset_arg row_log_t::tail::total */
#ifdef UNIV_DEBUG
row_log_table_blob_t(ulonglong offset_arg) :
old_offset (0), free_offset (offset_arg),
@@ -106,7 +98,7 @@ public:
#endif /* UNIV_DEBUG */
/** Declare a BLOB freed again.
- @param offset_arg row_log_t::tail::total */
+ @param offset_arg row_log_t::tail::total */
#ifdef UNIV_DEBUG
void blob_free(ulonglong offset_arg)
#else /* UNIV_DEBUG */
@@ -120,14 +112,14 @@ public:
offset = BLOB_FREED;
}
/** Declare a freed BLOB reused.
- @param offset_arg row_log_t::tail::total */
+ @param offset_arg row_log_t::tail::total */
void blob_alloc(ulonglong offset_arg) {
ut_ad(free_offset <= offset_arg);
ut_d(old_offset = offset);
offset = offset_arg;
}
/** Determine if a BLOB was freed at a given log position
- @param offset_arg row_log_t::head::total after the log record
+ @param offset_arg row_log_t::head::total after the log record
@return true if freed */
bool is_freed(ulonglong offset_arg) const {
/* This is supposed to be the offset at the end of the
@@ -157,7 +149,12 @@ If a page number maps to 0, it is an off-page column that has been freed.
If a page number maps to a nonzero number, the number is a byte offset
into the index->online_log, indicating that the page is safe to access
when applying log records starting from that offset. */
-typedef std::map<ulint, row_log_table_blob_t> page_no_map;
+typedef std::map<
+ ulint,
+ row_log_table_blob_t,
+ std::less<ulint>,
+ ut_allocator<std::pair<const ulint, row_log_table_blob_t> > >
+ page_no_map;
/** @brief Buffer for logging modifications during online index creation
@@ -210,8 +207,8 @@ struct row_log_t {
};
/** Create the file or online log if it does not exist.
-@param[in,out] log online rebuild log
-@return file descriptor. */
+@param[in,out] log online rebuild log
+@return true if success, false if not */
static MY_ATTRIBUTE((warn_unused_result))
int
row_log_tmpfile(
@@ -220,6 +217,13 @@ row_log_tmpfile(
DBUG_ENTER("row_log_tmpfile");
if (log->fd < 0) {
log->fd = row_merge_file_create_low(log->path);
+ DBUG_EXECUTE_IF("row_log_tmpfile_fail",
+ if (log->fd > 0)
+ row_merge_file_destroy_low(log->fd);
+ log->fd = -1;);
+ if (log->fd >= 0) {
+ MONITOR_ATOMIC_INC(MONITOR_ALTER_TABLE_LOG_FILES);
+ }
}
DBUG_RETURN(log->fd);
@@ -235,13 +239,15 @@ row_log_block_allocate(
{
DBUG_ENTER("row_log_block_allocate");
if (log_buf.block == NULL) {
- log_buf.size = srv_sort_buf_size;
- log_buf.block = (byte*) os_mem_alloc_large(&log_buf.size);
- DBUG_EXECUTE_IF("simulate_row_log_allocation_failure",
- if (log_buf.block)
- os_mem_free_large(log_buf.block, log_buf.size);
- log_buf.block = NULL;);
- if (!log_buf.block) {
+ DBUG_EXECUTE_IF(
+ "simulate_row_log_allocation_failure",
+ DBUG_RETURN(false);
+ );
+
+ log_buf.block = ut_allocator<byte>(mem_key_row_log_buf)
+ .allocate_large(srv_sort_buf_size, &log_buf.block_pfx);
+
+ if (log_buf.block == NULL) {
DBUG_RETURN(false);
}
}
@@ -257,7 +263,8 @@ row_log_block_free(
{
DBUG_ENTER("row_log_block_free");
if (log_buf.block != NULL) {
- os_mem_free_large(log_buf.block, log_buf.size);
+ ut_allocator<byte>(mem_key_row_log_buf).deallocate_large(
+ log_buf.block, &log_buf.block_pfx);
log_buf.block = NULL;
}
DBUG_VOID_RETURN;
@@ -265,7 +272,6 @@ row_log_block_free(
/******************************************************//**
Logs an operation to a secondary index that is (or was) being created. */
-UNIV_INTERN
void
row_log_online_op(
/*==============*/
@@ -283,12 +289,10 @@ row_log_online_op(
ut_ad(dtuple_validate(tuple));
ut_ad(dtuple_get_n_fields(tuple) == dict_index_get_n_fields(index));
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(rw_lock_own(dict_index_get_lock(index), RW_LOCK_SHARED)
- || rw_lock_own(dict_index_get_lock(index), RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
+ ut_ad(rw_lock_own_flagged(&index->lock,
+ RW_LOCK_FLAG_X | RW_LOCK_FLAG_S));
- if (dict_index_is_corrupted(index)) {
+ if (index->is_corrupted()) {
return;
}
@@ -354,21 +358,22 @@ row_log_online_op(
const os_offset_t byte_offset
= (os_offset_t) log->tail.blocks
* srv_sort_buf_size;
- ibool ret;
- byte * buf = log->tail.block;
+ IORequest request(IORequest::WRITE);
+ byte* buf = log->tail.block;
if (byte_offset + srv_sort_buf_size >= srv_online_max_size) {
goto write_failed;
}
if (mrec_size == avail_size) {
- ut_ad(b == &log->tail.block[srv_sort_buf_size]);
+ ut_ad(b == &buf[srv_sort_buf_size]);
} else {
ut_ad(b == log->tail.buf + mrec_size);
- memcpy(log->tail.block + log->tail.bytes,
+ memcpy(buf + log->tail.bytes,
log->tail.buf, avail_size);
}
- UNIV_MEM_ASSERT_RW(log->tail.block, srv_sort_buf_size);
+
+ UNIV_MEM_ASSERT_RW(buf, srv_sort_buf_size);
if (row_log_tmpfile(log) < 0) {
log->error = DB_OUT_OF_MEMORY;
@@ -378,11 +383,9 @@ row_log_online_op(
/* If encryption is enabled encrypt buffer before writing it
to file system. */
if (log_tmp_is_encrypted()) {
- if (!log_tmp_block_encrypt(log->tail.block,
- srv_sort_buf_size,
- log->crypt_tail,
- byte_offset,
- index->table->space)) {
+ if (!log_tmp_block_encrypt(
+ buf, srv_sort_buf_size,
+ log->crypt_tail, byte_offset)) {
log->error = DB_DECRYPTION_FAILED;
goto write_failed;
}
@@ -391,14 +394,12 @@ row_log_online_op(
buf = log->crypt_tail;
}
- ret = os_file_write_int_fd(
- "(modification log)",
- log->fd,
- buf, byte_offset, srv_sort_buf_size);
-
log->tail.blocks++;
-
- if (!ret) {
+ if (DB_SUCCESS != os_file_write_int_fd(
+ request,
+ "(modification log)",
+ log->fd,
+ buf, byte_offset, srv_sort_buf_size)) {
write_failed:
/* We set the flag directly instead of invoking
dict_set_corrupted_index_cache_only(index) here,
@@ -407,6 +408,7 @@ write_failed:
}
UNIV_MEM_INVALID(log->tail.block, srv_sort_buf_size);
+ UNIV_MEM_INVALID(buf, srv_sort_buf_size);
memcpy(log->tail.block, log->tail.buf + avail_size,
mrec_size - avail_size);
@@ -424,7 +426,6 @@ err_exit:
/******************************************************//**
Gets the error status of the online index rebuild log.
@return DB_SUCCESS or error code */
-UNIV_INTERN
dberr_t
row_log_table_get_error(
/*====================*/
@@ -495,21 +496,21 @@ row_log_table_close_func(
const os_offset_t byte_offset
= (os_offset_t) log->tail.blocks
* srv_sort_buf_size;
- ibool ret;
- byte * buf = log->tail.block;
+ IORequest request(IORequest::WRITE);
+ byte* buf = log->tail.block;
if (byte_offset + srv_sort_buf_size >= srv_online_max_size) {
goto write_failed;
}
if (size == avail) {
- ut_ad(b == &log->tail.block[srv_sort_buf_size]);
+ ut_ad(b == &buf[srv_sort_buf_size]);
} else {
ut_ad(b == log->tail.buf + size);
- memcpy(log->tail.block + log->tail.bytes,
- log->tail.buf, avail);
+ memcpy(buf + log->tail.bytes, log->tail.buf, avail);
}
- UNIV_MEM_ASSERT_RW(log->tail.block, srv_sort_buf_size);
+
+ UNIV_MEM_ASSERT_RW(buf, srv_sort_buf_size);
if (row_log_tmpfile(log) < 0) {
log->error = DB_OUT_OF_MEMORY;
@@ -519,11 +520,10 @@ row_log_table_close_func(
/* If encryption is enabled encrypt buffer before writing it
to file system. */
if (log_tmp_is_encrypted()) {
- if (!log_tmp_block_encrypt(log->tail.block,
- srv_sort_buf_size,
- log->crypt_tail,
- byte_offset,
- index->table->space)) {
+ if (!log_tmp_block_encrypt(
+ log->tail.block, srv_sort_buf_size,
+ log->crypt_tail, byte_offset,
+ index->table->space)) {
log->error = DB_DECRYPTION_FAILED;
goto err_exit;
}
@@ -532,18 +532,17 @@ row_log_table_close_func(
buf = log->crypt_tail;
}
- ret = os_file_write_int_fd(
- "(modification log)",
- log->fd,
- buf, byte_offset, srv_sort_buf_size);
-
log->tail.blocks++;
-
- if (!ret) {
+ if (DB_SUCCESS != os_file_write_int_fd(
+ request,
+ "(modification log)",
+ log->fd,
+ buf, byte_offset, srv_sort_buf_size)) {
write_failed:
log->error = DB_ONLINE_LOG_TOO_BIG;
}
UNIV_MEM_INVALID(log->tail.block, srv_sort_buf_size);
+ UNIV_MEM_INVALID(buf, srv_sort_buf_size);
memcpy(log->tail.block, log->tail.buf + avail, size - avail);
log->tail.bytes = size - avail;
} else {
@@ -556,9 +555,9 @@ write_failed:
err_exit:
mutex_exit(&log->mutex);
- os_atomic_increment_ulint(&onlineddl_rowlog_rows, 1);
+ my_atomic_addlint(&onlineddl_rowlog_rows, 1);
/* 10000 means 100.00%, 4525 means 45.25% */
- onlineddl_rowlog_pct_used = (log->tail.total * 10000) / srv_online_max_size;
+ onlineddl_rowlog_pct_used = static_cast<ulint>((log->tail.total * 10000) / srv_online_max_size);
}
#ifdef UNIV_DEBUG
@@ -569,10 +568,23 @@ err_exit:
row_log_table_close_func(index, size, avail)
#endif /* UNIV_DEBUG */
+/** Check whether a virtual column is indexed in the new table being
+created during alter table
+@param[in] index cluster index
+@param[in] v_no virtual column number
+@return true if it is indexed, else false */
+bool
+row_log_col_is_indexed(
+ const dict_index_t* index,
+ ulint v_no)
+{
+ return(dict_table_get_nth_v_col(
+ index->online_log->table, v_no)->m_col.ord_part);
+}
+
/******************************************************//**
Logs a delete operation to a table that is being rebuilt.
This will be merged in row_log_table_apply_delete(). */
-UNIV_INTERN
void
row_log_table_delete(
/*=================*/
@@ -580,7 +592,7 @@ row_log_table_delete(
page X-latched */
dict_index_t* index, /*!< in/out: clustered index, S-latched
or X-latched */
- const ulint* offsets,/*!< in: rec_get_offsets(rec,index) */
+ const offset_t* offsets,/*!< in: rec_get_offsets(rec,index) */
const byte* sys) /*!< in: DB_TRX_ID,DB_ROLL_PTR that should
be logged, or NULL to use those in rec */
{
@@ -595,13 +607,12 @@ row_log_table_delete(
ut_ad(rec_offs_validate(rec, index, offsets));
ut_ad(rec_offs_n_fields(offsets) == dict_index_get_n_fields(index));
ut_ad(rec_offs_size(offsets) <= sizeof index->online_log->tail.buf);
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(rw_lock_own(&index->lock, RW_LOCK_SHARED)
- || rw_lock_own(&index->lock, RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
+ ut_ad(rw_lock_own_flagged(
+ &index->lock,
+ RW_LOCK_FLAG_S | RW_LOCK_FLAG_X | RW_LOCK_FLAG_SX));
- if (dict_index_is_corrupted(index)
- || !dict_index_is_online_ddl(index)
+ if (index->online_status != ONLINE_INDEX_CREATION
+ || (index->type & DICT_CORRUPT) || index->table->corrupted
|| index->online_log->error != DB_SUCCESS) {
return;
}
@@ -722,13 +733,14 @@ row_log_table_low_redundant(
ut_ad(!page_is_comp(page_align(rec)));
ut_ad(dict_index_get_n_fields(index) == rec_get_n_fields_old(rec));
- ut_ad(dict_tf_is_valid(index->table->flags));
+ ut_ad(dict_tf2_is_valid(index->table->flags, index->table->flags2));
ut_ad(!dict_table_is_comp(index->table)); /* redundant row format */
ut_ad(dict_index_is_clust(new_index));
heap = mem_heap_create(DTUPLE_EST_ALLOC(index->n_fields));
tuple = dtuple_create(heap, index->n_fields);
dict_index_copy_types(tuple, index, index->n_fields);
+
dtuple_set_n_fields_cmp(tuple, dict_index_get_n_unique(index));
if (rec_get_1byte_offs_flag(rec)) {
@@ -815,7 +827,7 @@ row_log_table_low_redundant(
/******************************************************//**
Logs an insert or update to a table that is being rebuilt. */
-static MY_ATTRIBUTE((nonnull(1,2,3)))
+static
void
row_log_table_low(
/*==============*/
@@ -823,7 +835,7 @@ row_log_table_low(
page X-latched */
dict_index_t* index, /*!< in/out: clustered index, S-latched
or X-latched */
- const ulint* offsets,/*!< in: rec_get_offsets(rec,index) */
+ const offset_t* offsets,/*!< in: rec_get_offsets(rec,index) */
bool insert, /*!< in: true if insert, false if update */
const dtuple_t* old_pk) /*!< in: old PRIMARY KEY value (if !insert
and a PRIMARY KEY is being created) */
@@ -834,24 +846,30 @@ row_log_table_low(
ulint extra_size;
ulint mrec_size;
ulint avail_size;
- const dict_index_t* new_index = dict_table_get_first_index(
- index->online_log->table);
+ const dict_index_t* new_index;
+
+ new_index = dict_table_get_first_index(index->online_log->table);
+
ut_ad(dict_index_is_clust(index));
ut_ad(dict_index_is_clust(new_index));
ut_ad(!dict_index_is_online_ddl(new_index));
ut_ad(rec_offs_validate(rec, index, offsets));
ut_ad(rec_offs_n_fields(offsets) == dict_index_get_n_fields(index));
ut_ad(rec_offs_size(offsets) <= sizeof index->online_log->tail.buf);
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(rw_lock_own(&index->lock, RW_LOCK_SHARED)
- || rw_lock_own(&index->lock, RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
+ ut_ad(rw_lock_own_flagged(
+ &index->lock,
+ RW_LOCK_FLAG_S | RW_LOCK_FLAG_X | RW_LOCK_FLAG_SX));
ut_ad(fil_page_get_type(page_align(rec)) == FIL_PAGE_INDEX);
- ut_ad(page_is_leaf(page_align(rec)));
+ ut_ad(page_rec_is_leaf(rec));
ut_ad(!page_is_comp(page_align(rec)) == !rec_offs_comp(offsets));
-
- if (dict_index_is_corrupted(index)
- || !dict_index_is_online_ddl(index)
+ /* old_pk=row_log_table_get_pk() [not needed in INSERT] is a prefix
+ of the clustered index record (PRIMARY KEY,DB_TRX_ID,DB_ROLL_PTR),
+ with no information on virtual columns */
+ ut_ad(!old_pk || !insert);
+ ut_ad(!old_pk || old_pk->n_v_fields == 0);
+
+ if (index->online_status != ONLINE_INDEX_CREATION
+ || (index->type & DICT_CORRUPT) || index->table->corrupted
|| index->online_log->error != DB_SUCCESS) {
return;
}
@@ -923,7 +941,6 @@ row_log_table_low(
/******************************************************//**
Logs an update to a table that is being rebuilt.
This will be merged in row_log_table_apply_update(). */
-UNIV_INTERN
void
row_log_table_update(
/*=================*/
@@ -931,7 +948,7 @@ row_log_table_update(
page X-latched */
dict_index_t* index, /*!< in/out: clustered index, S-latched
or X-latched */
- const ulint* offsets,/*!< in: rec_get_offsets(rec,index) */
+ const offset_t* offsets,/*!< in: rec_get_offsets(rec,index) */
const dtuple_t* old_pk) /*!< in: row_log_table_get_pk()
before the update */
{
@@ -939,9 +956,9 @@ row_log_table_update(
}
/** Gets the old table column of a PRIMARY KEY column.
-@param table old table (before ALTER TABLE)
-@param col_map mapping of old column numbers to new ones
-@param col_no column position in the new table
+@param table old table (before ALTER TABLE)
+@param col_map mapping of old column numbers to new ones
+@param col_no column position in the new table
@return old table column, or NULL if this is an added column */
static
const dict_col_t*
@@ -961,36 +978,35 @@ row_log_table_get_pk_old_col(
}
/** Maps an old table column of a PRIMARY KEY column.
-@param col old table column (before ALTER TABLE)
-@param ifield clustered index field in the new table (after ALTER TABLE)
-@param dfield clustered index tuple field in the new table
-@param heap memory heap for allocating dfield contents
-@param rec clustered index leaf page record in the old table
-@param offsets rec_get_offsets(rec)
-@param i rec field corresponding to col
-@param zip_size compressed page size of the old table, or 0 for uncompressed
-@param max_len maximum length of dfield
-@retval DB_INVALID_NULL if a NULL value is encountered
-@retval DB_TOO_BIG_INDEX_COL if the maximum prefix length is exceeded */
+@param[in] col old table column (before ALTER TABLE)
+@param[in] ifield clustered index field in the new table (after
+ALTER TABLE)
+@param[in,out] dfield clustered index tuple field in the new table
+@param[in,out] heap memory heap for allocating dfield contents
+@param[in] rec clustered index leaf page record in the old
+table
+@param[in] offsets rec_get_offsets(rec)
+@param[in] i rec field corresponding to col
+@param[in] page_size page size of the old table
+@param[in] max_len maximum length of dfield
+@retval DB_INVALID_NULL if a NULL value is encountered
+@retval DB_TOO_BIG_INDEX_COL if the maximum prefix length is exceeded */
static
dberr_t
row_log_table_get_pk_col(
-/*=====================*/
const dict_col_t* col,
const dict_field_t* ifield,
dfield_t* dfield,
mem_heap_t* heap,
const rec_t* rec,
- const ulint* offsets,
+ const offset_t* offsets,
ulint i,
- ulint zip_size,
+ const page_size_t& page_size,
ulint max_len)
{
const byte* field;
ulint len;
- ut_ad(ut_is_2pow(zip_size));
-
field = rec_get_nth_field(rec, offsets, i, &len);
if (len == UNIV_SQL_NULL) {
@@ -1012,7 +1028,7 @@ row_log_table_get_pk_col(
mem_heap_alloc(heap, field_len));
len = btr_copy_externally_stored_field_prefix(
- blob_field, field_len, zip_size, field, len);
+ blob_field, field_len, page_size, field, len);
if (len >= max_len + 1) {
return(DB_TOO_BIG_INDEX_COL);
}
@@ -1030,7 +1046,6 @@ Constructs the old PRIMARY KEY and DB_TRX_ID,DB_ROLL_PTR
of a table that is being rebuilt.
@return tuple of PRIMARY KEY,DB_TRX_ID,DB_ROLL_PTR in the rebuilt table,
or NULL if the PRIMARY KEY definition does not change */
-UNIV_INTERN
const dtuple_t*
row_log_table_get_pk(
/*=================*/
@@ -1038,7 +1053,7 @@ row_log_table_get_pk(
page X-latched */
dict_index_t* index, /*!< in/out: clustered index, S-latched
or X-latched */
- const ulint* offsets,/*!< in: rec_get_offsets(rec,index) */
+ const offset_t* offsets,/*!< in: rec_get_offsets(rec,index) */
byte* sys, /*!< out: DB_TRX_ID,DB_ROLL_PTR for
row_log_table_delete(), or NULL */
mem_heap_t** heap) /*!< in/out: memory heap where allocated */
@@ -1049,10 +1064,9 @@ row_log_table_get_pk(
ut_ad(dict_index_is_clust(index));
ut_ad(dict_index_is_online_ddl(index));
ut_ad(!offsets || rec_offs_validate(rec, index, offsets));
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(rw_lock_own(&index->lock, RW_LOCK_SHARED)
- || rw_lock_own(&index->lock, RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
+ ut_ad(rw_lock_own_flagged(
+ &index->lock,
+ RW_LOCK_FLAG_S | RW_LOCK_FLAG_X | RW_LOCK_FLAG_SX));
ut_ad(log);
ut_ad(log->table);
@@ -1071,8 +1085,8 @@ row_log_table_get_pk(
if (!offsets) {
offsets = rec_get_offsets(
- rec, index, NULL, pos + 1,
- heap);
+ rec, index, NULL, true,
+ pos + 1, heap);
}
trx_id_offs = rec_get_nth_field_offs(
@@ -1116,7 +1130,7 @@ row_log_table_get_pk(
}
if (!offsets) {
- offsets = rec_get_offsets(rec, index, NULL,
+ offsets = rec_get_offsets(rec, index, NULL, true,
ULINT_UNDEFINED, heap);
}
@@ -1125,7 +1139,9 @@ row_log_table_get_pk(
dtuple_set_n_fields_cmp(tuple, new_n_uniq);
const ulint max_len = DICT_MAX_FIELD_LEN_BY_FORMAT(new_table);
- const ulint zip_size = dict_table_zip_size(index->table);
+
+ const page_size_t& page_size
+ = dict_table_page_size(index->table);
for (ulint new_i = 0; new_i < new_n_uniq; new_i++) {
dict_field_t* ifield;
@@ -1152,7 +1168,7 @@ row_log_table_get_pk(
log->error = row_log_table_get_pk_col(
col, ifield, dfield, *heap,
- rec, offsets, i, zip_size, max_len);
+ rec, offsets, i, page_size, max_len);
if (log->error != DB_SUCCESS) {
err_exit:
@@ -1223,7 +1239,6 @@ func_exit:
/******************************************************//**
Logs an insert to a table that is being rebuilt.
This will be merged in row_log_table_apply_insert(). */
-UNIV_INTERN
void
row_log_table_insert(
/*=================*/
@@ -1231,14 +1246,13 @@ row_log_table_insert(
page X-latched */
dict_index_t* index, /*!< in/out: clustered index, S-latched
or X-latched */
- const ulint* offsets)/*!< in: rec_get_offsets(rec,index) */
+ const offset_t* offsets)/*!< in: rec_get_offsets(rec,index) */
{
row_log_table_low(rec, index, offsets, true, NULL);
}
/******************************************************//**
Notes that a BLOB is being freed during online ALTER TABLE. */
-UNIV_INTERN
void
row_log_table_blob_free(
/*====================*/
@@ -1247,9 +1261,9 @@ row_log_table_blob_free(
{
ut_ad(dict_index_is_clust(index));
ut_ad(dict_index_is_online_ddl(index));
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(rw_lock_own(&index->lock, RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
+ ut_ad(rw_lock_own_flagged(
+ &index->lock,
+ RW_LOCK_FLAG_X | RW_LOCK_FLAG_SX));
ut_ad(page_no != FIL_NULL);
if (index->online_log->error != DB_SUCCESS) {
@@ -1258,8 +1272,8 @@ row_log_table_blob_free(
page_no_map* blobs = index->online_log->blobs;
- if (!blobs) {
- index->online_log->blobs = blobs = new page_no_map();
+ if (blobs == NULL) {
+ index->online_log->blobs = blobs = UT_NEW_NOKEY(page_no_map());
}
#ifdef UNIV_DEBUG
@@ -1283,7 +1297,6 @@ row_log_table_blob_free(
/******************************************************//**
Notes that a BLOB is being allocated during online ALTER TABLE. */
-UNIV_INTERN
void
row_log_table_blob_alloc(
/*=====================*/
@@ -1292,9 +1305,11 @@ row_log_table_blob_alloc(
{
ut_ad(dict_index_is_clust(index));
ut_ad(dict_index_is_online_ddl(index));
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(rw_lock_own(&index->lock, RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
+
+ ut_ad(rw_lock_own_flagged(
+ &index->lock,
+ RW_LOCK_FLAG_X | RW_LOCK_FLAG_SX));
+
ut_ad(page_no != FIL_NULL);
if (index->online_log->error != DB_SUCCESS) {
@@ -1322,7 +1337,7 @@ row_log_table_apply_convert_mrec(
/*=============================*/
const mrec_t* mrec, /*!< in: merge record */
dict_index_t* index, /*!< in: index of mrec */
- const ulint* offsets, /*!< in: offsets of mrec */
+ const offset_t* offsets, /*!< in: offsets of mrec */
const row_log_t* log, /*!< in: rebuild context */
mem_heap_t* heap, /*!< in/out: memory heap */
trx_id_t trx_id, /*!< in: DB_TRX_ID of mrec */
@@ -1364,6 +1379,7 @@ row_log_table_apply_convert_mrec(
const dict_col_t* col
= dict_field_get_col(ind_field);
+
ulint col_no
= log->col_map[dict_col_get_no(col)];
@@ -1372,8 +1388,9 @@ row_log_table_apply_convert_mrec(
continue;
}
- dfield_t* dfield
+ dfield_t* dfield
= dtuple_get_nth_field(row, col_no);
+
ulint len;
const byte* data;
@@ -1404,7 +1421,7 @@ row_log_table_apply_convert_mrec(
data = btr_rec_copy_externally_stored_field(
mrec, offsets,
- dict_table_zip_size(index->table),
+ dict_table_page_size(index->table),
i, &len, heap);
ut_a(data);
dfield_set_data(dfield, data, len);
@@ -1493,14 +1510,9 @@ row_log_table_apply_insert_low(
ut_ad(dtuple_validate(row));
ut_ad(trx_id);
-#ifdef ROW_LOG_APPLY_PRINT
- if (row_log_apply_print) {
- fprintf(stderr, "table apply insert "
- IB_ID_FMT " " IB_ID_FMT "\n",
- index->table->id, index->id);
- dtuple_print(stderr, row);
- }
-#endif /* ROW_LOG_APPLY_PRINT */
+ DBUG_LOG("ib_alter_table",
+ "insert table " << index->table->id << " (index "
+ << index->id << "): " << rec_printer(row).str());
static const ulint flags
= (BTR_CREATE_FLAG
@@ -1511,7 +1523,8 @@ row_log_table_apply_insert_low(
entry = row_build_index_entry(row, NULL, index, heap);
error = row_ins_clust_index_entry_low(
- flags, BTR_MODIFY_TREE, index, index->n_uniq, entry, 0, thr);
+ flags, BTR_MODIFY_TREE, index, index->n_uniq,
+ entry, 0, thr);
switch (error) {
case DB_SUCCESS:
@@ -1556,7 +1569,7 @@ row_log_table_apply_insert(
/*=======================*/
que_thr_t* thr, /*!< in: query graph */
const mrec_t* mrec, /*!< in: record to insert */
- const ulint* offsets, /*!< in: offsets of mrec */
+ const offset_t* offsets, /*!< in: offsets of mrec */
mem_heap_t* offsets_heap, /*!< in/out: memory heap
that can be emptied */
mem_heap_t* heap, /*!< in/out: memory heap */
@@ -1608,7 +1621,7 @@ row_log_table_apply_delete_low(
/*===========================*/
btr_pcur_t* pcur, /*!< in/out: B-tree cursor,
will be trashed */
- const ulint* offsets, /*!< in: offsets on pcur */
+ const offset_t* offsets, /*!< in: offsets on pcur */
mem_heap_t* heap, /*!< in/out: memory heap */
mtr_t* mtr) /*!< in/out: mini-transaction,
will be committed */
@@ -1620,14 +1633,11 @@ row_log_table_apply_delete_low(
ut_ad(dict_index_is_clust(index));
-#ifdef ROW_LOG_APPLY_PRINT
- if (row_log_apply_print) {
- fprintf(stderr, "table apply delete "
- IB_ID_FMT " " IB_ID_FMT "\n",
- index->table->id, index->id);
- rec_print_new(stderr, btr_pcur_get_rec(pcur), offsets);
- }
-#endif /* ROW_LOG_APPLY_PRINT */
+ DBUG_LOG("ib_alter_table",
+ "delete table " << index->table->id << " (index "
+ << index->id << "): "
+ << rec_printer(btr_pcur_get_rec(pcur), offsets).str());
+
if (dict_table_get_next_index(index)) {
/* Build a row template for purging secondary index entries. */
row = row_build(
@@ -1638,7 +1648,7 @@ row_log_table_apply_delete_low(
}
btr_cur_pessimistic_delete(&error, FALSE, btr_pcur_get_btr_cur(pcur),
- BTR_CREATE_FLAG, RB_NONE, mtr);
+ BTR_CREATE_FLAG, false, mtr);
mtr_commit(mtr);
if (error != DB_SUCCESS) {
@@ -1653,8 +1663,10 @@ row_log_table_apply_delete_low(
const dtuple_t* entry = row_build_index_entry(
row, ext, index, heap);
mtr_start(mtr);
+ mtr->set_named_space(index->space);
btr_pcur_open(index, entry, PAGE_CUR_LE,
- BTR_MODIFY_TREE, pcur, mtr);
+ BTR_MODIFY_TREE | BTR_LATCH_FOR_DELETE,
+ pcur, mtr);
#ifdef UNIV_DEBUG
switch (btr_pcur_get_btr_cur(pcur)->flag) {
case BTR_CUR_DELETE_REF:
@@ -1684,7 +1696,7 @@ flag_ok:
btr_cur_pessimistic_delete(&error, FALSE,
btr_pcur_get_btr_cur(pcur),
- BTR_CREATE_FLAG, RB_NONE, mtr);
+ BTR_CREATE_FLAG, false, mtr);
mtr_commit(mtr);
}
@@ -1702,7 +1714,7 @@ row_log_table_apply_delete(
DB_TRX_ID in the new
clustered index */
const mrec_t* mrec, /*!< in: merge record */
- const ulint* moffsets, /*!< in: offsets of mrec */
+ const offset_t* moffsets, /*!< in: offsets of mrec */
mem_heap_t* offsets_heap, /*!< in/out: memory heap
that can be emptied */
mem_heap_t* heap, /*!< in/out: memory heap */
@@ -1713,7 +1725,7 @@ row_log_table_apply_delete(
dtuple_t* old_pk;
mtr_t mtr;
btr_pcur_t pcur;
- ulint* offsets;
+ offset_t* offsets;
ut_ad(rec_offs_n_fields(moffsets)
== dict_index_get_n_unique(index) + 2);
@@ -1733,8 +1745,10 @@ row_log_table_apply_delete(
}
mtr_start(&mtr);
+ mtr.set_named_space(index->space);
btr_pcur_open(index, old_pk, PAGE_CUR_LE,
- BTR_MODIFY_TREE, &pcur, &mtr);
+ BTR_MODIFY_TREE | BTR_LATCH_FOR_DELETE,
+ &pcur, &mtr);
#ifdef UNIV_DEBUG
switch (btr_pcur_get_btr_cur(&pcur)->flag) {
case BTR_CUR_DELETE_REF:
@@ -1764,7 +1778,7 @@ all_done:
return(DB_SUCCESS);
}
- offsets = rec_get_offsets(btr_pcur_get_rec(&pcur), index, NULL,
+ offsets = rec_get_offsets(btr_pcur_get_rec(&pcur), index, NULL, true,
ULINT_UNDEFINED, &offsets_heap);
#if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG
ut_a(!rec_offs_any_null_extern(btr_pcur_get_rec(&pcur), offsets));
@@ -1821,7 +1835,7 @@ row_log_table_apply_update(
DB_TRX_ID in the new
clustered index */
const mrec_t* mrec, /*!< in: new value */
- const ulint* offsets, /*!< in: offsets of mrec */
+ const offset_t* offsets, /*!< in: offsets of mrec */
mem_heap_t* offsets_heap, /*!< in/out: memory heap
that can be emptied */
mem_heap_t* heap, /*!< in/out: memory heap */
@@ -1876,6 +1890,7 @@ row_log_table_apply_update(
}
mtr_start(&mtr);
+ mtr.set_named_space(index->space);
btr_pcur_open(index, old_pk, PAGE_CUR_LE,
BTR_MODIFY_TREE, &pcur, &mtr);
#ifdef UNIV_DEBUG
@@ -1946,7 +1961,7 @@ func_exit:
mtr_commit(&mtr);
}
func_exit_committed:
- ut_ad(mtr.state == MTR_COMMITTED);
+ ut_ad(mtr.has_committed());
if (error != DB_SUCCESS) {
/* Report the erroneous row using the new
@@ -1958,9 +1973,9 @@ func_exit_committed:
}
/* Prepare to update (or delete) the record. */
- ulint* cur_offsets = rec_get_offsets(
- btr_pcur_get_rec(&pcur),
- index, NULL, ULINT_UNDEFINED, &offsets_heap);
+ offset_t* cur_offsets = rec_get_offsets(
+ btr_pcur_get_rec(&pcur), index, NULL, true,
+ ULINT_UNDEFINED, &offsets_heap);
if (!log->same_pk) {
/* Only update the record if DB_TRX_ID,DB_ROLL_PTR match what
@@ -2021,11 +2036,14 @@ func_exit_committed:
goto func_exit_committed;
}
- dtuple_t* entry = row_build_index_entry(
- row, NULL, index, heap);
- const upd_t* update = row_upd_build_difference_binary(
+ dtuple_t* entry = row_build_index_entry_low(
+ row, NULL, index, heap, ROW_BUILD_NORMAL);
+ upd_t* update = row_upd_build_difference_binary(
index, entry, btr_pcur_get_rec(&pcur), cur_offsets,
- false, NULL, heap);
+ false, NULL, heap, dup->table, &error);
+ if (error != DB_SUCCESS) {
+ goto func_exit;
+ }
if (!update->n_fields) {
/* Nothing to do. */
@@ -2056,7 +2074,7 @@ func_exit_committed:
error = row_log_table_apply_delete_low(
&pcur, cur_offsets, heap, &mtr);
- ut_ad(mtr.state == MTR_COMMITTED);
+ ut_ad(mtr.has_committed());
if (error == DB_SUCCESS) {
error = row_log_table_apply_insert_low(
@@ -2076,15 +2094,12 @@ func_exit_committed:
ROW_COPY_DATA, index, btr_pcur_get_rec(&pcur),
cur_offsets, NULL, NULL, NULL, &old_ext, heap);
ut_ad(old_row);
-#ifdef ROW_LOG_APPLY_PRINT
- if (row_log_apply_print) {
- fprintf(stderr, "table apply update "
- IB_ID_FMT " " IB_ID_FMT "\n",
- index->table->id, index->id);
- dtuple_print(stderr, old_row);
- dtuple_print(stderr, row);
- }
-#endif /* ROW_LOG_APPLY_PRINT */
+
+ DBUG_LOG("ib_alter_table",
+ "update table " << index->table->id
+ << " (index " << index->id
+ << ": " << rec_printer(old_row).str()
+ << " to " << rec_printer(row).str());
} else {
old_row = NULL;
old_ext = NULL;
@@ -2103,9 +2118,8 @@ func_exit_committed:
if (big_rec) {
if (error == DB_SUCCESS) {
error = btr_store_big_rec_extern_fields(
- index, btr_pcur_get_block(&pcur),
- btr_pcur_get_rec(&pcur), cur_offsets,
- big_rec, &mtr, BTR_STORE_UPDATE);
+ &pcur, cur_offsets, big_rec, &mtr,
+ BTR_STORE_UPDATE);
}
dtuple_big_rec_free(big_rec);
@@ -2126,6 +2140,10 @@ func_exit_committed:
continue;
}
+ if (dict_index_has_virtual(index)) {
+ dtuple_copy_v_fields(old_row, old_pk);
+ }
+
mtr_commit(&mtr);
entry = row_build_index_entry(old_row, old_ext, index, heap);
@@ -2135,6 +2153,7 @@ func_exit_committed:
}
mtr_start(&mtr);
+ mtr.set_named_space(index->space);
if (ROW_FOUND != row_search_index_entry(
index, entry, BTR_MODIFY_TREE, &pcur, &mtr)) {
@@ -2145,7 +2164,7 @@ func_exit_committed:
btr_cur_pessimistic_delete(
&error, FALSE, btr_pcur_get_btr_cur(&pcur),
- BTR_CREATE_FLAG, RB_NONE, &mtr);
+ BTR_CREATE_FLAG, false, &mtr);
if (error != DB_SUCCESS) {
break;
@@ -2166,6 +2185,7 @@ func_exit_committed:
}
mtr_start(&mtr);
+ mtr.set_named_space(index->space);
}
goto func_exit;
@@ -2193,7 +2213,7 @@ row_log_table_apply_op(
mem_heap_t* heap, /*!< in/out: memory heap */
const mrec_t* mrec, /*!< in: merge record */
const mrec_t* mrec_end, /*!< in: end of buffer */
- ulint* offsets) /*!< in/out: work area
+ offset_t* offsets) /*!< in/out: work area
for parsing mrec */
{
row_log_t* log = dup->index->online_log;
@@ -2435,16 +2455,74 @@ row_log_table_apply_op(
return(next_mrec);
}
-/******************************************************//**
-Applies operations to a table was rebuilt.
+#ifdef HAVE_PSI_STAGE_INTERFACE
+/** Estimate how much an ALTER TABLE progress should be incremented per
+one block of log applied.
+For the other phases of ALTER TABLE we increment the progress with 1 per
+page processed.
+@return amount of abstract units to add to work_completed when one block
+of log is applied.
+*/
+inline
+ulint
+row_log_progress_inc_per_block()
+{
+ /* We must increment the progress once per page (as in
+ univ_page_size, usually 16KiB). One block here is srv_sort_buf_size
+ (usually 1MiB). */
+ const ulint pages_per_block = std::max(
+ static_cast<unsigned long>(
+ srv_sort_buf_size / univ_page_size.physical()),
+ 1UL);
+
+ /* Multiply by an artificial factor of 6 to even the pace with
+ the rest of the ALTER TABLE phases, they process page_size amount
+ of data faster. */
+ return(pages_per_block * 6);
+}
+
+/** Estimate how much work is to be done by the log apply phase
+of an ALTER TABLE for this index.
+@param[in] index index whose log to assess
+@return work to be done by log-apply in abstract units
+*/
+ulint
+row_log_estimate_work(
+ const dict_index_t* index)
+{
+ if (index == NULL || index->online_log == NULL) {
+ return(0);
+ }
+
+ const row_log_t* l = index->online_log;
+ const ulint bytes_left =
+ static_cast<ulint>(l->tail.total - l->head.total);
+ const ulint blocks_left = bytes_left / srv_sort_buf_size;
+
+ return(blocks_left * row_log_progress_inc_per_block());
+}
+#else /* HAVE_PSI_STAGE_INTERFACE */
+inline
+ulint
+row_log_progress_inc_per_block()
+{
+ return(0);
+}
+#endif /* HAVE_PSI_STAGE_INTERFACE */
+
+/** Applies operations to a table was rebuilt.
+@param[in] thr query graph
+@param[in,out] dup for reporting duplicate key errors
+@param[in,out] stage performance schema accounting object, used by
+ALTER TABLE. If not NULL, then stage->inc() will be called for each block
+of log that is applied.
@return DB_SUCCESS, or error code on failure */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
+static MY_ATTRIBUTE((warn_unused_result))
dberr_t
row_log_table_apply_ops(
-/*====================*/
- que_thr_t* thr, /*!< in: query graph */
- row_merge_dup_t*dup) /*!< in/out: for reporting duplicate key
- errors */
+ que_thr_t* thr,
+ row_merge_dup_t* dup,
+ ut_stage_alter_t* stage)
{
dberr_t error;
const mrec_t* mrec = NULL;
@@ -2453,7 +2531,7 @@ row_log_table_apply_ops(
const mrec_t* next_mrec_end;
mem_heap_t* heap;
mem_heap_t* offsets_heap;
- ulint* offsets;
+ offset_t* offsets;
bool has_index_lock;
dict_index_t* index = const_cast<dict_index_t*>(
dup->index);
@@ -2472,9 +2550,7 @@ row_log_table_apply_ops(
ut_ad(dict_index_is_clust(index));
ut_ad(dict_index_is_online_ddl(index));
ut_ad(trx->mysql_thd);
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(rw_lock_own(dict_index_get_lock(index), RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
+ ut_ad(rw_lock_own(dict_index_get_lock(index), RW_LOCK_X));
ut_ad(!dict_index_is_online_ddl(new_index));
ut_ad(trx_id_col > 0);
ut_ad(trx_id_col != ULINT_UNDEFINED);
@@ -2483,9 +2559,9 @@ row_log_table_apply_ops(
UNIV_MEM_INVALID(&mrec_end, sizeof mrec_end);
- offsets = static_cast<ulint*>(ut_malloc(i * sizeof *offsets));
- offsets[0] = i;
- offsets[1] = dict_index_get_n_fields(index);
+ offsets = static_cast<offset_t*>(ut_malloc_nokey(i * sizeof *offsets));
+ rec_offs_set_n_alloc(offsets, i);
+ rec_offs_set_n_fields(offsets, dict_index_get_n_fields(index));
heap = mem_heap_create(UNIV_PAGE_SIZE);
offsets_heap = mem_heap_create(UNIV_PAGE_SIZE);
@@ -2493,16 +2569,16 @@ row_log_table_apply_ops(
next_block:
ut_ad(has_index_lock);
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(rw_lock_own(dict_index_get_lock(index), RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
+ ut_ad(rw_lock_own(dict_index_get_lock(index), RW_LOCK_X));
ut_ad(index->online_log->head.bytes == 0);
+ stage->inc(row_log_progress_inc_per_block());
+
if (trx_is_interrupted(trx)) {
goto interrupted;
}
- if (dict_index_is_corrupted(index)) {
+ if (index->is_corrupted()) {
error = DB_INDEX_CORRUPT;
goto func_exit;
}
@@ -2518,8 +2594,8 @@ next_block:
if (UNIV_UNLIKELY(index->online_log->head.blocks
> index->online_log->tail.blocks)) {
unexpected_eof:
- fprintf(stderr, "InnoDB: unexpected end of temporary file"
- " for table %s\n", index->table_name);
+ ib::error() << "Unexpected end of temporary file for table "
+ << index->table->name;
corruption:
error = DB_CORRUPTION;
goto func_exit;
@@ -2530,11 +2606,13 @@ corruption:
if (index->online_log->head.blocks) {
#ifdef HAVE_FTRUNCATE
/* Truncate the file in order to save space. */
- if (index->online_log->fd != -1
+ if (index->online_log->fd > 0
&& ftruncate(index->online_log->fd, 0) == -1) {
- fprintf(stderr, "InnoDB: Error: Truncate of file "
- "\'%s\' failed with error %d:%s\n",
- index->name + 1, errno, strerror(errno));
+ ib::error()
+ << "\'" << index->name + 1
+ << "\' failed with error "
+ << errno << ":" << strerror(errno);
+
goto corruption;
}
#endif /* HAVE_FTRUNCATE */
@@ -2558,7 +2636,6 @@ all_done:
}
} else {
os_offset_t ofs;
- ibool success;
ofs = (os_offset_t) index->online_log->head.blocks
* srv_sort_buf_size;
@@ -2576,33 +2653,29 @@ all_done:
goto func_exit;
}
- byte * buf = index->online_log->head.block;
+ IORequest request(IORequest::READ);
+ byte* buf = index->online_log->head.block;
- success = os_file_read_no_error_handling_int_fd(
- index->online_log->fd,
- buf, ofs,
- srv_sort_buf_size);
+ if (DB_SUCCESS != os_file_read_no_error_handling_int_fd(
+ request, index->online_log->fd,
+ buf, ofs, srv_sort_buf_size)) {
+ ib::error()
+ << "Unable to read temporary file"
+ " for table " << index->table_name;
+ goto corruption;
+ }
- /* If encryption is enabled decrypt buffer after reading it
- from file system. */
- if (success && log_tmp_is_encrypted()) {
- if (!log_tmp_block_decrypt(buf,
- srv_sort_buf_size,
- index->online_log->crypt_head,
- ofs,
- index->table->space)) {
+ if (log_tmp_is_encrypted()) {
+ if (!log_tmp_block_decrypt(
+ buf, srv_sort_buf_size,
+ index->online_log->crypt_head, ofs)) {
error = DB_DECRYPTION_FAILED;
goto func_exit;
}
srv_stats.n_rowlog_blocks_decrypted.inc();
- memcpy(buf, index->online_log->crypt_head, srv_sort_buf_size);
- }
-
- if (!success) {
- fprintf(stderr, "InnoDB: unable to read temporary file"
- " for table %s\n", index->table_name);
- goto corruption;
+ memcpy(buf, index->online_log->crypt_head,
+ srv_sort_buf_size);
}
#ifdef POSIX_FADV_DONTNEED
@@ -2610,14 +2683,6 @@ all_done:
posix_fadvise(index->online_log->fd,
ofs, srv_sort_buf_size, POSIX_FADV_DONTNEED);
#endif /* POSIX_FADV_DONTNEED */
-#if 0 //def FALLOC_FL_PUNCH_HOLE
- /* Try to deallocate the space for the file on disk.
- This should work on ext4 on Linux 2.6.39 and later,
- and be ignored when the operation is unsupported. */
- fallocate(index->online_log->fd,
- FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
- ofs, srv_sort_buf_size);
-#endif /* FALLOC_FL_PUNCH_HOLE */
next_mrec = index->online_log->head.block;
next_mrec_end = next_mrec + srv_sort_buf_size;
@@ -2810,27 +2875,31 @@ func_exit:
return(error);
}
-/******************************************************//**
-Apply the row_log_table log to a table upon completing rebuild.
+/** Apply the row_log_table log to a table upon completing rebuild.
+@param[in] thr query graph
+@param[in] old_table old table
+@param[in,out] table MySQL table (for reporting duplicates)
+@param[in,out] stage performance schema accounting object, used by
+ALTER TABLE. stage->begin_phase_log_table() will be called initially and then
+stage->inc() will be called for each block of log that is applied.
@return DB_SUCCESS, or error code on failure */
-UNIV_INTERN
dberr_t
row_log_table_apply(
-/*================*/
- que_thr_t* thr, /*!< in: query graph */
- dict_table_t* old_table,
- /*!< in: old table */
- struct TABLE* table) /*!< in/out: MySQL table
- (for reporting duplicates) */
+ que_thr_t* thr,
+ dict_table_t* old_table,
+ struct TABLE* table,
+ ut_stage_alter_t* stage)
{
dberr_t error;
dict_index_t* clust_index;
thr_get_trx(thr)->error_key_num = 0;
+ DBUG_EXECUTE_IF("innodb_trx_duplicates",
+ thr_get_trx(thr)->duplicates = TRX_DUP_REPLACE;);
+
+ stage->begin_phase_log_table();
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(!rw_lock_own(&dict_operation_lock, RW_LOCK_SHARED));
-#endif /* UNIV_SYNC_DEBUG */
+ ut_ad(!rw_lock_own(&dict_operation_lock, RW_LOCK_S));
clust_index = dict_table_get_first_index(old_table);
rw_lock_x_lock(dict_index_get_lock(clust_index));
@@ -2849,7 +2918,7 @@ row_log_table_apply(
clust_index->online_log->col_map, 0
};
- error = row_log_table_apply_ops(thr, &dup);
+ error = row_log_table_apply_ops(thr, &dup, stage);
ut_ad(error != DB_SUCCESS
|| clust_index->online_log->head.total
@@ -2857,6 +2926,9 @@ row_log_table_apply(
}
rw_lock_x_unlock(dict_index_get_lock(clust_index));
+ DBUG_EXECUTE_IF("innodb_trx_duplicates",
+ thr_get_trx(thr)->duplicates = 0;);
+
return(error);
}
@@ -2864,7 +2936,6 @@ row_log_table_apply(
Allocate the row log for an index and flag the index
for online creation.
@retval true if success, false if not */
-UNIV_INTERN
bool
row_log_allocate(
/*=============*/
@@ -2889,17 +2960,17 @@ row_log_allocate(
ut_ad(same_pk || table);
ut_ad(!table || col_map);
ut_ad(!add_cols || col_map);
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(rw_lock_own(dict_index_get_lock(index), RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
- log = (row_log_t*) ut_malloc(sizeof *log);
- if (!log) {
+ ut_ad(rw_lock_own(dict_index_get_lock(index), RW_LOCK_X));
+
+ log = static_cast<row_log_t*>(ut_malloc_nokey(sizeof *log));
+
+ if (log == NULL) {
DBUG_RETURN(false);
}
log->fd = -1;
- mutex_create(index_online_log_key, &log->mutex,
- SYNC_INDEX_ONLINE_LOG);
+ mutex_create(LATCH_ID_INDEX_ONLINE_LOG, &log->mutex);
+
log->blobs = NULL;
log->table = table;
log->same_pk = same_pk;
@@ -2910,10 +2981,11 @@ row_log_allocate(
log->tail.blocks = log->tail.bytes = 0;
log->tail.total = 0;
log->tail.block = log->head.block = NULL;
+ log->crypt_tail = log->crypt_head = NULL;
log->head.blocks = log->head.bytes = 0;
log->head.total = 0;
log->path = path;
- log->crypt_tail = log->crypt_head = NULL;
+
dict_index_set_online_status(index, ONLINE_INDEX_CREATION);
index->online_log = log;
@@ -2938,7 +3010,6 @@ row_log_allocate(
/******************************************************//**
Free the row log for an index that was being created online. */
-UNIV_INTERN
void
row_log_free(
/*=========*/
@@ -2946,7 +3017,7 @@ row_log_free(
{
MONITOR_ATOMIC_DEC(MONITOR_ONLINE_CREATE_INDEX);
- delete log->blobs;
+ UT_DELETE(log->blobs);
row_log_block_free(log->tail);
row_log_block_free(log->head);
row_merge_file_destroy_low(log->fd);
@@ -2961,25 +3032,24 @@ row_log_free(
mutex_free(&log->mutex);
ut_free(log);
- log = 0;
+ log = NULL;
}
/******************************************************//**
Get the latest transaction ID that has invoked row_log_online_op()
during online creation.
@return latest transaction ID, or 0 if nothing was logged */
-UNIV_INTERN
trx_id_t
row_log_get_max_trx(
/*================*/
dict_index_t* index) /*!< in: index, must be locked */
{
ut_ad(dict_index_get_online_status(index) == ONLINE_INDEX_CREATION);
-#ifdef UNIV_SYNC_DEBUG
- ut_ad((rw_lock_own(dict_index_get_lock(index), RW_LOCK_SHARED)
+
+ ut_ad((rw_lock_own(dict_index_get_lock(index), RW_LOCK_S)
&& mutex_own(&index->online_log->mutex))
- || rw_lock_own(dict_index_get_lock(index), RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
+ || rw_lock_own(dict_index_get_lock(index), RW_LOCK_X));
+
return(index->online_log->max_trx);
}
@@ -3003,17 +3073,24 @@ row_log_apply_op_low(
{
mtr_t mtr;
btr_cur_t cursor;
- ulint* offsets = NULL;
+ offset_t* offsets = NULL;
ut_ad(!dict_index_is_clust(index));
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(rw_lock_own(dict_index_get_lock(index), RW_LOCK_EX)
+
+ ut_ad(rw_lock_own(dict_index_get_lock(index), RW_LOCK_X)
== has_index_lock);
-#endif /* UNIV_SYNC_DEBUG */
- ut_ad(!dict_index_is_corrupted(index));
+
+ ut_ad(!index->is_corrupted());
ut_ad(trx_id != 0 || op == ROW_OP_DELETE);
+ DBUG_LOG("ib_create_index",
+ (op == ROW_OP_INSERT ? "insert " : "delete ")
+ << (has_index_lock ? "locked index " : "unlocked index ")
+ << index->id << ',' << ib::hex(trx_id) << ": "
+ << rec_printer(entry).str());
+
mtr_start(&mtr);
+ mtr.set_named_space(index->space);
/* We perform the pessimistic variant of the operations if we
already hold index->lock exclusively. First, search the
@@ -3070,6 +3147,7 @@ row_log_apply_op_low(
Lock the index tree exclusively. */
mtr_commit(&mtr);
mtr_start(&mtr);
+ mtr.set_named_space(index->space);
btr_cur_search_to_nth_level(
index, 0, entry, PAGE_CUR_LE,
BTR_MODIFY_TREE, &cursor, 0,
@@ -3086,11 +3164,11 @@ row_log_apply_op_low(
/* As there are no externally stored fields in
a secondary index record, the parameter
- rb_ctx = RB_NONE will be ignored. */
+ rollback=false will be ignored. */
btr_cur_pessimistic_delete(
error, FALSE, &cursor,
- BTR_CREATE_FLAG, RB_NONE, &mtr);
+ BTR_CREATE_FLAG, false, &mtr);
break;
case ROW_OP_INSERT:
if (exists) {
@@ -3172,6 +3250,7 @@ insert_the_rec:
Lock the index tree exclusively. */
mtr_commit(&mtr);
mtr_start(&mtr);
+ mtr.set_named_space(index->space);
btr_cur_search_to_nth_level(
index, 0, entry, PAGE_CUR_LE,
BTR_MODIFY_TREE, &cursor, 0,
@@ -3228,25 +3307,23 @@ row_log_apply_op(
in exclusive mode */
const mrec_t* mrec, /*!< in: merge record */
const mrec_t* mrec_end, /*!< in: end of buffer */
- ulint* offsets) /*!< in/out: work area for
+ offset_t* offsets) /*!< in/out: work area for
rec_init_offsets_temp() */
{
enum row_op op;
ulint extra_size;
ulint data_size;
- ulint n_ext;
dtuple_t* entry;
trx_id_t trx_id;
/* Online index creation is only used for secondary indexes. */
ut_ad(!dict_index_is_clust(index));
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(rw_lock_own(dict_index_get_lock(index), RW_LOCK_EX)
+
+ ut_ad(rw_lock_own(dict_index_get_lock(index), RW_LOCK_X)
== has_index_lock);
-#endif /* UNIV_SYNC_DEBUG */
- if (dict_index_is_corrupted(index)) {
+ if (index->is_corrupted()) {
*error = DB_INDEX_CORRUPT;
return(NULL);
}
@@ -3314,38 +3391,32 @@ corrupted:
}
entry = row_rec_to_index_entry_low(
- mrec - data_size, index, offsets, &n_ext, heap);
+ mrec - data_size, index, offsets, heap);
/* Online index creation is only implemented for secondary
indexes, which never contain off-page columns. */
- ut_ad(n_ext == 0);
-#ifdef ROW_LOG_APPLY_PRINT
- if (row_log_apply_print) {
- fprintf(stderr, "apply " IB_ID_FMT " " TRX_ID_FMT " %u %u ",
- index->id, trx_id,
- unsigned (op), unsigned (has_index_lock));
- for (const byte* m = mrec - data_size; m < mrec; m++) {
- fprintf(stderr, "%02x", *m);
- }
- putc('\n', stderr);
- }
-#endif /* ROW_LOG_APPLY_PRINT */
+ ut_ad(dtuple_get_n_ext(entry) == 0);
+
row_log_apply_op_low(index, dup, error, offsets_heap,
has_index_lock, op, trx_id, entry);
return(mrec);
}
-/******************************************************//**
-Applies operations to a secondary index that was being created.
+/** Applies operations to a secondary index that was being created.
+@param[in] trx transaction (for checking if the operation was
+interrupted)
+@param[in,out] index index
+@param[in,out] dup for reporting duplicate key errors
+@param[in,out] stage performance schema accounting object, used by
+ALTER TABLE. If not NULL, then stage->inc() will be called for each block
+of log that is applied.
@return DB_SUCCESS, or error code on failure */
-static MY_ATTRIBUTE((nonnull))
+static
dberr_t
row_log_apply_ops(
-/*==============*/
- trx_t* trx, /*!< in: transaction (for checking if
- the operation was interrupted) */
- dict_index_t* index, /*!< in/out: index */
- row_merge_dup_t*dup) /*!< in/out: for reporting duplicate key
- errors */
+ const trx_t* trx,
+ dict_index_t* index,
+ row_merge_dup_t* dup,
+ ut_stage_alter_t* stage)
{
dberr_t error;
const mrec_t* mrec = NULL;
@@ -3354,22 +3425,20 @@ row_log_apply_ops(
const mrec_t* next_mrec_end;
mem_heap_t* offsets_heap;
mem_heap_t* heap;
- ulint* offsets;
+ offset_t* offsets;
bool has_index_lock;
const ulint i = 1 + REC_OFFS_HEADER_SIZE
+ dict_index_get_n_fields(index);
ut_ad(dict_index_is_online_ddl(index));
- ut_ad(*index->name == TEMP_INDEX_PREFIX);
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(rw_lock_own(dict_index_get_lock(index), RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
+ ut_ad(!index->is_committed());
+ ut_ad(rw_lock_own(dict_index_get_lock(index), RW_LOCK_X));
ut_ad(index->online_log);
UNIV_MEM_INVALID(&mrec_end, sizeof mrec_end);
- offsets = static_cast<ulint*>(ut_malloc(i * sizeof *offsets));
- offsets[0] = i;
- offsets[1] = dict_index_get_n_fields(index);
+ offsets = static_cast<offset_t*>(ut_malloc_nokey(i * sizeof *offsets));
+ rec_offs_set_n_alloc(offsets, i);
+ rec_offs_set_n_fields(offsets, dict_index_get_n_fields(index));
offsets_heap = mem_heap_create(UNIV_PAGE_SIZE);
heap = mem_heap_create(UNIV_PAGE_SIZE);
@@ -3377,11 +3446,11 @@ row_log_apply_ops(
next_block:
ut_ad(has_index_lock);
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(rw_lock_own(dict_index_get_lock(index), RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
+ ut_ad(rw_lock_own(dict_index_get_lock(index), RW_LOCK_X));
ut_ad(index->online_log->head.bytes == 0);
+ stage->inc(row_log_progress_inc_per_block());
+
if (trx_is_interrupted(trx)) {
goto interrupted;
}
@@ -3391,7 +3460,7 @@ next_block:
goto func_exit;
}
- if (dict_index_is_corrupted(index)) {
+ if (index->is_corrupted()) {
error = DB_INDEX_CORRUPT;
goto func_exit;
}
@@ -3399,8 +3468,8 @@ next_block:
if (UNIV_UNLIKELY(index->online_log->head.blocks
> index->online_log->tail.blocks)) {
unexpected_eof:
- fprintf(stderr, "InnoDB: unexpected end of temporary file"
- " for index %s\n", index->name + 1);
+ ib::error() << "Unexpected end of temporary file for index "
+ << index->name;
corruption:
error = DB_CORRUPTION;
goto func_exit;
@@ -3411,11 +3480,13 @@ corruption:
if (index->online_log->head.blocks) {
#ifdef HAVE_FTRUNCATE
/* Truncate the file in order to save space. */
- if (index->online_log->fd != -1
+ if (index->online_log->fd > 0
&& ftruncate(index->online_log->fd, 0) == -1) {
- fprintf(stderr, "InnoDB: Error: Truncate of file "
- "\'%s\' failed with error %d:%s\n",
- index->name + 1, errno, strerror(errno));
+ ib::error()
+ << "\'" << index->name + 1
+ << "\' failed with error "
+ << errno << ":" << strerror(errno);
+
goto corruption;
}
#endif /* HAVE_FTRUNCATE */
@@ -3436,11 +3507,10 @@ all_done:
goto func_exit;
}
} else {
- os_offset_t ofs;
- ibool success;
-
- ofs = (os_offset_t) index->online_log->head.blocks
+ os_offset_t ofs = static_cast<os_offset_t>(
+ index->online_log->head.blocks)
* srv_sort_buf_size;
+ IORequest request(IORequest::READ);
ut_ad(has_index_lock);
has_index_lock = false;
@@ -3453,21 +3523,21 @@ all_done:
goto func_exit;
}
- byte* buf = index->online_log->head.block;
+ byte* buf = index->online_log->head.block;
- success = os_file_read_no_error_handling_int_fd(
- index->online_log->fd,
- buf, ofs,
- srv_sort_buf_size);
+ if (DB_SUCCESS != os_file_read_no_error_handling_int_fd(
+ request, index->online_log->fd,
+ buf, ofs, srv_sort_buf_size)) {
+ ib::error()
+ << "Unable to read temporary file"
+ " for index " << index->name;
+ goto corruption;
+ }
- /* If encryption is enabled decrypt buffer after reading it
- from file system. */
- if (success && log_tmp_is_encrypted()) {
- if (!log_tmp_block_decrypt(buf,
- srv_sort_buf_size,
- index->online_log->crypt_head,
- ofs,
- index->table->space)) {
+ if (log_tmp_is_encrypted()) {
+ if (!log_tmp_block_decrypt(
+ buf, srv_sort_buf_size,
+ index->online_log->crypt_head, ofs)) {
error = DB_DECRYPTION_FAILED;
goto func_exit;
}
@@ -3476,25 +3546,11 @@ all_done:
memcpy(buf, index->online_log->crypt_head, srv_sort_buf_size);
}
- if (!success) {
- fprintf(stderr, "InnoDB: unable to read temporary file"
- " for index %s\n", index->name + 1);
- goto corruption;
- }
-
#ifdef POSIX_FADV_DONTNEED
/* Each block is read exactly once. Free up the file cache. */
posix_fadvise(index->online_log->fd,
ofs, srv_sort_buf_size, POSIX_FADV_DONTNEED);
#endif /* POSIX_FADV_DONTNEED */
-#if 0 //def FALLOC_FL_PUNCH_HOLE
- /* Try to deallocate the space for the file on disk.
- This should work on ext4 on Linux 2.6.39 and later,
- and be ignored when the operation is unsupported. */
- fallocate(index->online_log->fd,
- FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
- ofs, srv_sort_buf_size);
-#endif /* FALLOC_FL_PUNCH_HOLE */
next_mrec = index->online_log->head.block;
next_mrec_end = next_mrec + srv_sort_buf_size;
@@ -3676,18 +3732,21 @@ func_exit:
return(error);
}
-/******************************************************//**
-Apply the row log to the index upon completing index creation.
+/** Apply the row log to the index upon completing index creation.
+@param[in] trx transaction (for checking if the operation was
+interrupted)
+@param[in,out] index secondary index
+@param[in,out] table MySQL table (for reporting duplicates)
+@param[in,out] stage performance schema accounting object, used by
+ALTER TABLE. stage->begin_phase_log_index() will be called initially and then
+stage->inc() will be called for each block of log that is applied.
@return DB_SUCCESS, or error code on failure */
-UNIV_INTERN
dberr_t
row_log_apply(
-/*==========*/
- trx_t* trx, /*!< in: transaction (for checking if
- the operation was interrupted) */
- dict_index_t* index, /*!< in/out: secondary index */
- struct TABLE* table) /*!< in/out: MySQL table
- (for reporting duplicates) */
+ const trx_t* trx,
+ dict_index_t* index,
+ struct TABLE* table,
+ ut_stage_alter_t* stage)
{
dberr_t error;
row_log_t* log;
@@ -3697,12 +3756,14 @@ row_log_apply(
ut_ad(dict_index_is_online_ddl(index));
ut_ad(!dict_index_is_clust(index));
+ stage->begin_phase_log_index();
+
log_free_check();
rw_lock_x_lock(dict_index_get_lock(index));
if (!dict_table_is_corrupted(index->table)) {
- error = row_log_apply_ops(trx, index, &dup);
+ error = row_log_apply_ops(trx, index, &dup, stage);
} else {
error = DB_SUCCESS;
}
@@ -3723,11 +3784,6 @@ row_log_apply(
log = index->online_log;
index->online_log = NULL;
- /* We could remove the TEMP_INDEX_PREFIX and update the data
- dictionary to say that this index is complete, if we had
- access to the .frm file here. If the server crashes before
- all requested indexes have been created, this completed index
- will be dropped. */
rw_lock_x_unlock(dict_index_get_lock(index));
row_log_free(log);
diff --git a/storage/innobase/row/row0merge.cc b/storage/innobase/row/row0merge.cc
index ac7eddfedbb..29f69317a18 100644
--- a/storage/innobase/row/row0merge.cc
+++ b/storage/innobase/row/row0merge.cc
@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 2005, 2017, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2014, 2019, MariaDB Corporation.
+Copyright (c) 2014, 2020, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -33,7 +33,9 @@ Completed by Sunny Bains and Marko Makela
#include "row0ext.h"
#include "row0log.h"
#include "row0ins.h"
+#include "row0row.h"
#include "row0sel.h"
+#include "log0crypt.h"
#include "dict0crea.h"
#include "trx0purge.h"
#include "lock0lock.h"
@@ -41,74 +43,251 @@ Completed by Sunny Bains and Marko Makela
#include "ut0sort.h"
#include "row0ftsort.h"
#include "row0import.h"
+#include "row0vers.h"
#include "handler0alter.h"
-#include "ha_prototypes.h"
+#include "btr0bulk.h"
+#include "ut0stage.h"
#include "fil0crypt.h"
/* Ignore posix_fadvise() on those platforms where it does not exist */
-#if defined __WIN__
+#if defined _WIN32
# define posix_fadvise(fd, offset, len, advice) /* nothing */
-#endif /* __WIN__ */
-
-#ifdef UNIV_DEBUG
-/** Set these in order ot enable debug printout. */
-/* @{ */
-/** Log each record read from temporary file. */
-static ibool row_merge_print_read;
-/** Log each record write to temporary file. */
-static ibool row_merge_print_write;
-/** Log each row_merge_blocks() call, merging two blocks of records to
-a bigger one. */
-static ibool row_merge_print_block;
-/** Log each block read from temporary file. */
-static ibool row_merge_print_block_read;
-/** Log each block read from temporary file. */
-static ibool row_merge_print_block_write;
-/* @} */
-#endif /* UNIV_DEBUG */
+#endif /* _WIN32 */
/* Whether to disable file system cache */
-UNIV_INTERN char srv_disable_sort_file_cache;
+char srv_disable_sort_file_cache;
+
+/** Class that caches index row tuples made from a single cluster
+index page scan, and then insert into corresponding index tree */
+class index_tuple_info_t {
+public:
+ /** constructor
+ @param[in] heap memory heap
+ @param[in] index index to be created */
+ index_tuple_info_t(
+ mem_heap_t* heap,
+ dict_index_t* index) UNIV_NOTHROW
+ {
+ m_heap = heap;
+ m_index = index;
+ m_dtuple_vec = UT_NEW_NOKEY(idx_tuple_vec());
+ }
-/* Maximum pending doc memory limit in bytes for a fts tokenization thread */
-#define FTS_PENDING_DOC_MEMORY_LIMIT 1000000
+ /** destructor */
+ ~index_tuple_info_t()
+ {
+ UT_DELETE(m_dtuple_vec);
+ }
-#ifdef UNIV_DEBUG
-/******************************************************//**
-Display a merge tuple. */
-static MY_ATTRIBUTE((nonnull))
-void
-row_merge_tuple_print(
-/*==================*/
- FILE* f, /*!< in: output stream */
- const mtuple_t* entry, /*!< in: tuple to print */
- ulint n_fields)/*!< in: number of fields in the tuple */
-{
- ulint j;
+ /** Get the index object
+ @return the index object */
+ dict_index_t* get_index() UNIV_NOTHROW
+ {
+ return(m_index);
+ }
- for (j = 0; j < n_fields; j++) {
- const dfield_t* field = &entry->fields[j];
+ /** Caches an index row into index tuple vector
+ @param[in] row table row
+ @param[in] ext externally stored column
+ prefixes, or NULL */
+ void add(
+ const dtuple_t* row,
+ const row_ext_t* ext) UNIV_NOTHROW
+ {
+ dtuple_t* dtuple;
- if (dfield_is_null(field)) {
- fputs("\n NULL;", f);
- } else {
- ulint field_len = dfield_get_len(field);
- ulint len = ut_min(field_len, 20);
- if (dfield_is_ext(field)) {
- fputs("\nE", f);
- } else {
- fputs("\n ", f);
+ dtuple = row_build_index_entry(row, ext, m_index, m_heap);
+
+ ut_ad(dtuple);
+
+ m_dtuple_vec->push_back(dtuple);
+ }
+
+ /** Insert spatial index rows cached in vector into spatial index
+ @param[in] trx_id transaction id
+ @param[in,out] row_heap memory heap
+ @param[in] pcur cluster index scanning cursor
+ @param[in,out] scan_mtr mini-transaction for pcur
+ @return DB_SUCCESS if successful, else error number */
+ inline dberr_t insert(
+ trx_id_t trx_id,
+ mem_heap_t* row_heap,
+ btr_pcur_t* pcur,
+ mtr_t* scan_mtr)
+ {
+ big_rec_t* big_rec;
+ rec_t* rec;
+ btr_cur_t ins_cur;
+ mtr_t mtr;
+ rtr_info_t rtr_info;
+ offset_t* ins_offsets = NULL;
+ dberr_t error = DB_SUCCESS;
+ dtuple_t* dtuple;
+ ulint count = 0;
+ const ulint flag = BTR_NO_UNDO_LOG_FLAG
+ | BTR_NO_LOCKING_FLAG
+ | BTR_KEEP_SYS_FLAG | BTR_CREATE_FLAG;
+
+ ut_ad(dict_index_is_spatial(m_index));
+
+ DBUG_EXECUTE_IF("row_merge_instrument_log_check_flush",
+ log_sys->check_flush_or_checkpoint = true;
+ );
+
+ for (idx_tuple_vec::iterator it = m_dtuple_vec->begin();
+ it != m_dtuple_vec->end();
+ ++it) {
+ dtuple = *it;
+ ut_ad(dtuple);
+
+ if (log_sys->check_flush_or_checkpoint) {
+ if (scan_mtr->is_active()) {
+ btr_pcur_move_to_prev_on_page(pcur);
+ btr_pcur_store_position(pcur, scan_mtr);
+ scan_mtr->commit();
+ }
+
+ log_free_check();
}
- ut_print_buf(f, dfield_get_data(field), len);
- if (len != field_len) {
- fprintf(f, " (total " ULINTPF " bytes)",
- field_len);
+
+ mtr.start();
+ mtr.set_named_space(m_index->space);
+
+ ins_cur.index = m_index;
+ rtr_init_rtr_info(&rtr_info, false, &ins_cur, m_index,
+ false);
+ rtr_info_update_btr(&ins_cur, &rtr_info);
+
+ btr_cur_search_to_nth_level(m_index, 0, dtuple,
+ PAGE_CUR_RTREE_INSERT,
+ BTR_MODIFY_LEAF, &ins_cur,
+ 0, __FILE__, __LINE__,
+ &mtr);
+
+ /* It need to update MBR in parent entry,
+ so change search mode to BTR_MODIFY_TREE */
+ if (rtr_info.mbr_adj) {
+ mtr_commit(&mtr);
+ rtr_clean_rtr_info(&rtr_info, true);
+ rtr_init_rtr_info(&rtr_info, false, &ins_cur,
+ m_index, false);
+ rtr_info_update_btr(&ins_cur, &rtr_info);
+ mtr_start(&mtr);
+ mtr.set_named_space(m_index->space);
+ btr_cur_search_to_nth_level(
+ m_index, 0, dtuple,
+ PAGE_CUR_RTREE_INSERT,
+ BTR_MODIFY_TREE, &ins_cur, 0,
+ __FILE__, __LINE__, &mtr);
}
+
+ error = btr_cur_optimistic_insert(
+ flag, &ins_cur, &ins_offsets, &row_heap,
+ dtuple, &rec, &big_rec, 0, NULL, &mtr);
+
+ if (error == DB_FAIL) {
+ ut_ad(!big_rec);
+ mtr.commit();
+ mtr.start();
+ mtr.set_named_space(m_index->space);
+
+ rtr_clean_rtr_info(&rtr_info, true);
+ rtr_init_rtr_info(&rtr_info, false,
+ &ins_cur, m_index, false);
+
+ rtr_info_update_btr(&ins_cur, &rtr_info);
+ btr_cur_search_to_nth_level(
+ m_index, 0, dtuple,
+ PAGE_CUR_RTREE_INSERT,
+ BTR_MODIFY_TREE,
+ &ins_cur, 0,
+ __FILE__, __LINE__, &mtr);
+
+
+ error = btr_cur_pessimistic_insert(
+ flag, &ins_cur, &ins_offsets,
+ &row_heap, dtuple, &rec,
+ &big_rec, 0, NULL, &mtr);
+ }
+
+ DBUG_EXECUTE_IF(
+ "row_merge_ins_spatial_fail",
+ error = DB_FAIL;
+ );
+
+ if (error == DB_SUCCESS) {
+ if (rtr_info.mbr_adj) {
+ error = rtr_ins_enlarge_mbr(
+ &ins_cur, NULL, &mtr);
+ }
+
+ if (error == DB_SUCCESS) {
+ page_update_max_trx_id(
+ btr_cur_get_block(&ins_cur),
+ btr_cur_get_page_zip(&ins_cur),
+ trx_id, &mtr);
+ }
+ }
+
+ mtr_commit(&mtr);
+
+ rtr_clean_rtr_info(&rtr_info, true);
+ count++;
}
+
+ m_dtuple_vec->clear();
+
+ return(error);
}
- putc('\n', f);
-}
-#endif /* UNIV_DEBUG */
+
+private:
+ /** Cache index rows made from a cluster index scan. Usually
+ for rows on single cluster index page */
+ typedef std::vector<dtuple_t*, ut_allocator<dtuple_t*> >
+ idx_tuple_vec;
+
+ /** vector used to cache index rows made from cluster index scan */
+ idx_tuple_vec* m_dtuple_vec;
+
+ /** the index being built */
+ dict_index_t* m_index;
+
+ /** memory heap for creating index tuples */
+ mem_heap_t* m_heap;
+};
+
+/* Maximum pending doc memory limit in bytes for a fts tokenization thread */
+#define FTS_PENDING_DOC_MEMORY_LIMIT 1000000
+
+/** Insert sorted data tuples to the index.
+@param[in] index index to be inserted
+@param[in] old_table old table
+@param[in] fd file descriptor
+@param[in,out] block file buffer
+@param[in] row_buf row_buf the sorted data tuples,
+or NULL if fd, block will be used instead
+@param[in,out] btr_bulk btr bulk instance
+@param[in,out] stage performance schema accounting object, used by
+ALTER TABLE. If not NULL stage->begin_phase_insert() will be called initially
+and then stage->inc() will be called for each record that is processed.
+@return DB_SUCCESS or error number */
+static MY_ATTRIBUTE((warn_unused_result))
+dberr_t
+row_merge_insert_index_tuples(
+ dict_index_t* index,
+ const dict_table_t* old_table,
+ int fd,
+ row_merge_block_t* block,
+ const row_merge_buf_t* row_buf,
+ BtrBulk* btr_bulk,
+ const ib_uint64_t table_total_rows, /*!< in: total rows of old table */
+ const double pct_progress, /*!< in: total progress
+ percent until now */
+ const double pct_cost, /*!< in: current progress percent
+ */
+ row_merge_block_t* crypt_block, /*!< in: crypt buf or NULL */
+ ulint space, /*!< in: space id */
+ ut_stage_alter_t* stage = NULL);
/******************************************************//**
Encode an index record. */
@@ -148,7 +327,7 @@ row_merge_buf_encode(
/******************************************************//**
Allocate a sort buffer.
-@return own: sort buffer */
+@return own: sort buffer */
static MY_ATTRIBUTE((malloc, nonnull))
row_merge_buf_t*
row_merge_buf_create_low(
@@ -171,7 +350,7 @@ row_merge_buf_create_low(
buf->index = index;
buf->max_tuples = max_tuples;
buf->tuples = static_cast<mtuple_t*>(
- ut_malloc(2 * max_tuples * sizeof *buf->tuples));
+ ut_malloc_nokey(2 * max_tuples * sizeof *buf->tuples));
buf->tmp_tuples = buf->tuples + max_tuples;
return(buf);
@@ -179,8 +358,7 @@ row_merge_buf_create_low(
/******************************************************//**
Allocate a sort buffer.
-@return own: sort buffer */
-UNIV_INTERN
+@return own: sort buffer */
row_merge_buf_t*
row_merge_buf_create(
/*=================*/
@@ -191,8 +369,9 @@ row_merge_buf_create(
ulint buf_size;
mem_heap_t* heap;
- max_tuples = (srv_sort_buf_size)
- / ut_max(1, dict_index_get_min_size(index));
+ max_tuples = srv_sort_buf_size
+ / ut_max(static_cast<ulint>(1),
+ dict_index_get_min_size(index));
buf_size = (sizeof *buf);
@@ -205,8 +384,7 @@ row_merge_buf_create(
/******************************************************//**
Empty a sort buffer.
-@return sort buffer */
-UNIV_INTERN
+@return sort buffer */
row_merge_buf_t*
row_merge_buf_empty(
/*================*/
@@ -232,7 +410,6 @@ row_merge_buf_empty(
/******************************************************//**
Deallocate a sort buffer. */
-UNIV_INTERN
void
row_merge_buf_free(
/*===============*/
@@ -258,7 +435,7 @@ row_merge_buf_redundant_convert(
const dfield_t* row_field,
dfield_t* field,
ulint len,
- ulint zip_size,
+ const page_size_t& page_size,
mem_heap_t* heap)
{
ut_ad(field->type.mbminlen == 1);
@@ -269,7 +446,7 @@ row_merge_buf_redundant_convert(
ut_ad(field_len <= len);
if (row_field->ext) {
- const byte* field_data = static_cast<byte*>(
+ const byte* field_data = static_cast<const byte*>(
dfield_get_data(row_field));
ulint ext_len;
@@ -278,7 +455,7 @@ row_merge_buf_redundant_convert(
field_ref_zero, BTR_EXTERN_FIELD_REF_SIZE));
byte* data = btr_copy_externally_stored_field(
- &ext_len, field_data, zip_size, field_len, heap);
+ &ext_len, field_data, page_size, field_len, heap);
ut_ad(ext_len < len);
@@ -297,8 +474,9 @@ row_merge_buf_redundant_convert(
@param[in,out] buf sort buffer
@param[in] fts_index fts index to be created
@param[in] old_table original table
+@param[in] new_table new table
@param[in,out] psort_info parallel sort info
-@param[in] row table row
+@param[in,out] row table row
@param[in] ext cache of externally stored
column prefixes, or NULL
@param[in,out] doc_id Doc ID if we are creating
@@ -307,8 +485,10 @@ row_merge_buf_redundant_convert(
converting to ROW_FORMAT=REDUNDANT, or NULL
when not to invoke
row_merge_buf_redundant_convert()
-@param[in,out] exceed_page set if the record size exceeds the page size
- when converting to ROW_FORMAT=REDUNDANT
+@param[in,out] err set if error occurs
+@param[in,out] v_heap heap memory to process data for virtual column
+@param[in,out] my_table mysql table object
+@param[in] trx transaction object
@return number of rows added, 0 if out of space */
static
ulint
@@ -316,12 +496,16 @@ row_merge_buf_add(
row_merge_buf_t* buf,
dict_index_t* fts_index,
const dict_table_t* old_table,
+ const dict_table_t* new_table,
fts_psort_t* psort_info,
- const dtuple_t* row,
+ dtuple_t* row,
const row_ext_t* ext,
doc_id_t* doc_id,
mem_heap_t* conv_heap,
- bool* exceed_page)
+ dberr_t* err,
+ mem_heap_t** v_heap,
+ TABLE* my_table,
+ trx_t* trx)
{
ulint i;
const dict_index_t* index;
@@ -334,6 +518,8 @@ row_merge_buf_add(
ulint bucket = 0;
doc_id_t write_doc_id;
ulint n_row_added = 0;
+ VCOL_STORAGE* vcol_storage= 0;
+ byte* record;
DBUG_ENTER("row_merge_buf_add");
if (buf->n_tuples >= buf->max_tuples) {
@@ -351,6 +537,9 @@ row_merge_buf_add(
fts_index */
index = (buf->index->type & DICT_FTS) ? fts_index : buf->index;
+ /* create spatial index should not come here */
+ ut_ad(!dict_index_is_spatial(index));
+
n_fields = dict_index_get_n_fields(index);
entry = &buf->tuples[buf->n_tuples];
@@ -370,11 +559,17 @@ row_merge_buf_add(
const dfield_t* row_field;
col = ifield->col;
+ const dict_v_col_t* v_col = NULL;
+ if (dict_col_is_virtual(col)) {
+ v_col = reinterpret_cast<const dict_v_col_t*>(col);
+ }
+
col_no = dict_col_get_no(col);
/* Process the Doc ID column */
if (*doc_id > 0
- && col_no == index->table->fts->doc_col) {
+ && col_no == index->table->fts->doc_col
+ && !dict_col_is_virtual(col)) {
fts_write_doc_id((byte*) &write_doc_id, *doc_id);
/* Note: field->data now points to a value on the
@@ -392,9 +587,33 @@ row_merge_buf_add(
field->type.mbmaxlen = 0;
field->type.len = ifield->col->len;
} else {
- row_field = dtuple_get_nth_field(row, col_no);
+ /* Use callback to get the virtual column value */
+ if (dict_col_is_virtual(col)) {
+ dict_index_t* clust_index
+ = dict_table_get_first_index(new_table);
+
+ if (!vcol_storage &&
+ innobase_allocate_row_for_vcol(trx->mysql_thd, clust_index, v_heap, &my_table, &record, &vcol_storage)) {
+ *err = DB_OUT_OF_MEMORY;
+ goto error;
+ }
+
+ row_field = innobase_get_computed_value(
+ row, v_col, clust_index,
+ v_heap, NULL, ifield, trx->mysql_thd,
+ my_table, record, old_table, NULL,
+ NULL);
+
+ if (row_field == NULL) {
+ *err = DB_COMPUTE_VALUE_FAILED;
+ goto error;
+ }
+ dfield_copy(field, row_field);
+ } else {
+ row_field = dtuple_get_nth_field(row, col_no);
+ dfield_copy(field, row_field);
+ }
- dfield_copy(field, row_field);
/* Tokenize and process data for FTS */
if (index->type & DICT_FTS) {
@@ -416,14 +635,14 @@ row_merge_buf_add(
row,
index->table->fts->doc_col);
*doc_id = (doc_id_t) mach_read_from_8(
- static_cast<byte*>(
+ static_cast<const byte*>(
dfield_get_data(doc_field)));
if (*doc_id == 0) {
- ib_logf(IB_LOG_LEVEL_WARN,
- "FTS Doc ID is zero. "
- "Record Skipped");
- DBUG_RETURN(0);
+ ib::warn() << "FTS Doc ID is"
+ " zero. Record"
+ " skipped";
+ goto error;
}
}
@@ -432,8 +651,8 @@ row_merge_buf_add(
continue;
}
- ptr = ut_malloc(sizeof(*doc_item)
- + field->len);
+ ptr = ut_malloc_nokey(sizeof(*doc_item)
+ + field->len);
doc_item = static_cast<fts_doc_item_t*>(ptr);
value = static_cast<byte*>(ptr)
@@ -451,7 +670,6 @@ row_merge_buf_add(
if (psort_info[bucket].error == DB_SUCCESS) {
UT_LIST_ADD_LAST(
- doc_list,
psort_info[bucket].fts_doc_list,
doc_item);
psort_info[bucket].memory_used +=
@@ -476,11 +694,10 @@ row_merge_buf_add(
if (field->len != UNIV_SQL_NULL
&& col->mtype == DATA_MYSQL
&& col->len != field->len) {
-
if (conv_heap != NULL) {
row_merge_buf_redundant_convert(
row_field, field, col->len,
- dict_table_zip_size(old_table),
+ dict_table_page_size(old_table),
conv_heap);
} else {
/* Field length mismatch should not
@@ -510,7 +727,8 @@ row_merge_buf_add(
len = dfield_get_len(field);
}
}
- } else {
+ } else if (!dict_col_is_virtual(col)) {
+ /* Only non-virtual column are stored externally */
const byte* buf = row_ext_lookup(ext, col_no,
&len);
if (UNIV_LIKELY_NULL(buf)) {
@@ -531,7 +749,8 @@ row_merge_buf_add(
dfield_set_len(field, len);
}
- ut_ad(len <= col->len || col->mtype == DATA_BLOB);
+ ut_ad(len <= col->len
+ || DATA_LARGE_MTYPE(col->mtype));
fixed_len = ifield->fixed_len;
if (fixed_len && !dict_table_is_comp(index->table)
@@ -556,7 +775,7 @@ row_merge_buf_add(
} else if (dfield_is_ext(field)) {
extra_size += 2;
} else if (len < 128
- || (col->len < 256 && col->mtype != DATA_BLOB)) {
+ || (!DATA_BIG_COL(col))) {
extra_size++;
} else {
/* For variable-length columns, we look up the
@@ -571,7 +790,7 @@ row_merge_buf_add(
/* If this is FTS index, we already populated the sort buffer, return
here */
if (index->type & DICT_FTS) {
- DBUG_RETURN(n_row_added);
+ goto end;
}
#ifdef UNIV_DEBUG
@@ -598,14 +817,14 @@ row_merge_buf_add(
ut_ad(size < UNIV_PAGE_SIZE) in rec_offs_data_size().
It may hit the assert before attempting to insert the row. */
if (conv_heap != NULL && data_size > UNIV_PAGE_SIZE) {
- *exceed_page = true;
+ *err = DB_TOO_BIG_RECORD;
}
ut_ad(data_size < srv_sort_buf_size);
/* Reserve bytes for the end marker of row_merge_block_t. */
if (buf->total_size + data_size >= srv_sort_buf_size) {
- DBUG_RETURN(0);
+ goto error;
}
buf->total_size += data_size;
@@ -624,12 +843,19 @@ row_merge_buf_add(
mem_heap_empty(conv_heap);
}
+end:
+ if (vcol_storage)
+ innobase_free_row_for_vcol(vcol_storage);
DBUG_RETURN(n_row_added);
+
+error:
+ if (vcol_storage)
+ innobase_free_row_for_vcol(vcol_storage);
+ DBUG_RETURN(0);
}
/*************************************************************//**
Report a duplicate key. */
-UNIV_INTERN
void
row_merge_dup_report(
/*=================*/
@@ -645,7 +871,8 @@ row_merge_dup_report(
/*************************************************************//**
Compare two tuples.
-@return 1, 0, -1 if a is greater, equal, less, respectively, than b */
+@return positive, 0, negative if a is greater, equal, less, than b,
+respectively */
static MY_ATTRIBUTE((warn_unused_result))
int
row_merge_tuple_cmp(
@@ -709,17 +936,18 @@ no_report:
/** Wrapper for row_merge_tuple_sort() to inject some more context to
UT_SORT_FUNCTION_BODY().
-@param tuples array of tuples that being sorted
-@param aux work area, same size as tuples[]
-@param low lower bound of the sorting area, inclusive
-@param high upper bound of the sorting area, inclusive */
+@param tuples array of tuples that being sorted
+@param aux work area, same size as tuples[]
+@param low lower bound of the sorting area, inclusive
+@param high upper bound of the sorting area, inclusive */
#define row_merge_tuple_sort_ctx(tuples, aux, low, high) \
row_merge_tuple_sort(n_uniq, n_field, dup, tuples, aux, low, high)
/** Wrapper for row_merge_tuple_cmp() to inject some more context to
UT_SORT_FUNCTION_BODY().
-@param a first tuple to be compared
-@param b second tuple to be compared
-@return 1, 0, -1 if a is greater, equal, less, respectively, than b */
+@param a first tuple to be compared
+@param b second tuple to be compared
+@return positive, 0, negative, if a is greater, equal, less, than b,
+respectively */
#define row_merge_tuple_cmp_ctx(a,b) \
row_merge_tuple_cmp(n_uniq, n_field, a, b, dup)
@@ -749,7 +977,6 @@ row_merge_tuple_sort(
/******************************************************//**
Sort a buffer. */
-UNIV_INTERN
void
row_merge_buf_sort(
/*===============*/
@@ -757,6 +984,8 @@ row_merge_buf_sort(
row_merge_dup_t* dup) /*!< in/out: reporter of duplicates
(NULL if non-unique index) */
{
+ ut_ad(!dict_index_is_spatial(buf->index));
+
row_merge_tuple_sort(dict_index_get_n_unique(buf->index),
dict_index_get_n_fields(buf->index),
dup,
@@ -765,7 +994,6 @@ row_merge_buf_sort(
/******************************************************//**
Write a buffer to a block. */
-UNIV_INTERN
void
row_merge_buf_write(
/*================*/
@@ -778,19 +1006,19 @@ row_merge_buf_write(
ulint n_fields= dict_index_get_n_fields(index);
byte* b = &block[0];
+ DBUG_ENTER("row_merge_buf_write");
+
for (ulint i = 0; i < buf->n_tuples; i++) {
const mtuple_t* entry = &buf->tuples[i];
row_merge_buf_encode(&b, index, entry, n_fields);
ut_ad(b < &block[srv_sort_buf_size]);
-#ifdef UNIV_DEBUG
- if (row_merge_print_write) {
- fprintf(stderr, "row_merge_buf_write %p,%d,"
- ULINTPF " " ULINTPF,
- (void*) b, of->fd, of->offset, i);
- row_merge_tuple_print(stderr, entry, n_fields);
- }
-#endif /* UNIV_DEBUG */
+
+ DBUG_LOG("ib_merge_sort",
+ reinterpret_cast<const void*>(b) << ','
+ << of->fd << ',' << of->offset << ' ' <<
+ i << ": " <<
+ rec_printer(entry->fields, n_fields).str());
}
/* Write an "end-of-chunk" marker. */
@@ -802,26 +1030,24 @@ row_merge_buf_write(
to avoid bogus warnings. */
memset(b, 0xff, &block[srv_sort_buf_size] - b);
#endif /* UNIV_DEBUG_VALGRIND */
-#ifdef UNIV_DEBUG
- if (row_merge_print_write) {
- fprintf(stderr, "row_merge_buf_write %p,%d," ULINTPF " EOF\n",
- (void*) b, of->fd, of->offset);
- }
-#endif /* UNIV_DEBUG */
+ DBUG_LOG("ib_merge_sort",
+ "write " << reinterpret_cast<const void*>(b) << ','
+ << of->fd << ',' << of->offset << " EOF");
+ DBUG_VOID_RETURN;
}
/******************************************************//**
Create a memory heap and allocate space for row_merge_rec_offsets()
and mrec_buf_t[3].
-@return memory heap */
+@return memory heap */
static
mem_heap_t*
row_merge_heap_create(
/*==================*/
const dict_index_t* index, /*!< in: record descriptor */
mrec_buf_t** buf, /*!< out: 3 buffers */
- ulint** offsets1, /*!< out: offsets */
- ulint** offsets2) /*!< out: offsets */
+ offset_t** offsets1, /*!< out: offsets */
+ offset_t** offsets2) /*!< out: offsets */
{
ulint i = 1 + REC_OFFS_HEADER_SIZE
+ dict_index_get_n_fields(index);
@@ -830,21 +1056,21 @@ row_merge_heap_create(
*buf = static_cast<mrec_buf_t*>(
mem_heap_alloc(heap, 3 * sizeof **buf));
- *offsets1 = static_cast<ulint*>(
+ *offsets1 = static_cast<offset_t*>(
mem_heap_alloc(heap, i * sizeof **offsets1));
- *offsets2 = static_cast<ulint*>(
+ *offsets2 = static_cast<offset_t*>(
mem_heap_alloc(heap, i * sizeof **offsets2));
- (*offsets1)[0] = (*offsets2)[0] = i;
- (*offsets1)[1] = (*offsets2)[1] = dict_index_get_n_fields(index);
+ rec_offs_set_n_alloc(*offsets1, i);
+ rec_offs_set_n_alloc(*offsets2, i);
+ rec_offs_set_n_fields(*offsets1, dict_index_get_n_fields(index));
+ rec_offs_set_n_fields(*offsets2, dict_index_get_n_fields(index));
return(heap);
}
-/********************************************************************//**
-Read a merge block from the file system.
-@return TRUE if request was successful, FALSE if fail */
-UNIV_INTERN
+/** Read a merge block from the file system.
+@return whether the request was completed successfully */
bool
row_merge_read(
/*===========*/
@@ -857,24 +1083,20 @@ row_merge_read(
ulint space) /*!< in: space id */
{
os_offset_t ofs = ((os_offset_t) offset) * srv_sort_buf_size;
- bool success;
- DBUG_EXECUTE_IF("row_merge_read_failure", return(FALSE););
+ DBUG_ENTER("row_merge_read");
+ DBUG_LOG("ib_merge_sort", "fd=" << fd << " ofs=" << ofs);
+ DBUG_EXECUTE_IF("row_merge_read_failure", DBUG_RETURN(FALSE););
-#ifdef UNIV_DEBUG
- if (row_merge_print_block_read) {
- fprintf(stderr, "row_merge_read fd=%d ofs=" ULINTPF "\n",
- fd, offset);
- }
-#endif /* UNIV_DEBUG */
-
- success = os_file_read_no_error_handling_int_fd(fd, buf,
- ofs, srv_sort_buf_size);
+ IORequest request(IORequest::READ);
+ const bool success = DB_SUCCESS
+ == os_file_read_no_error_handling_int_fd(
+ request, fd, buf, ofs, srv_sort_buf_size);
/* If encryption is enabled decrypt buffer */
if (success && log_tmp_is_encrypted()) {
if (!log_tmp_block_decrypt(buf, srv_sort_buf_size,
- crypt_buf, ofs, space)) {
+ crypt_buf, ofs)) {
return (FALSE);
}
@@ -887,19 +1109,18 @@ row_merge_read(
posix_fadvise(fd, ofs, srv_sort_buf_size, POSIX_FADV_DONTNEED);
#endif /* POSIX_FADV_DONTNEED */
- if (UNIV_UNLIKELY(!success)) {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: failed to read merge block at " UINT64PF "\n",
- ofs);
+ if (!success) {
+ ib::error() << "Failed to read merge block at " << ofs;
}
- return(UNIV_LIKELY(success));
+ DBUG_RETURN(success);
}
/********************************************************************//**
Write a merge block to the file system.
-@return TRUE if request was successful, FALSE if fail */
+@return whether the request was completed successfully
+@retval false on error
+@retval true on success */
UNIV_INTERN
bool
row_merge_write(
@@ -913,30 +1134,28 @@ row_merge_write(
{
size_t buf_len = srv_sort_buf_size;
os_offset_t ofs = buf_len * (os_offset_t) offset;
- bool ret;
void* out_buf = (void *)buf;
- DBUG_EXECUTE_IF("row_merge_write_failure", return(FALSE););
+ DBUG_ENTER("row_merge_write");
+ DBUG_LOG("ib_merge_sort", "fd=" << fd << " ofs=" << ofs);
+ DBUG_EXECUTE_IF("row_merge_write_failure", DBUG_RETURN(FALSE););
/* For encrypted tables, encrypt data before writing */
if (log_tmp_is_encrypted()) {
- if (!log_tmp_block_encrypt((const byte *)buf, buf_len,
- (byte *)crypt_buf, ofs, space)) {
- return (FALSE);
+ if (!log_tmp_block_encrypt(static_cast<const byte*>(buf),
+ buf_len,
+ static_cast<byte*>(crypt_buf),
+ ofs)) {
+ return false;
}
srv_stats.n_merge_blocks_encrypted.inc();
out_buf = crypt_buf;
}
- ret = os_file_write_int_fd("(merge)", fd, out_buf, ofs, buf_len);
-
-#ifdef UNIV_DEBUG
- if (row_merge_print_block_write) {
- fprintf(stderr, "row_merge_write fd=%d ofs=" ULINTPF "\n",
- fd, offset);
- }
-#endif /* UNIV_DEBUG */
+ IORequest request(IORequest::WRITE);
+ const bool success = DB_SUCCESS == os_file_write_int_fd(
+ request, "(merge)", fd, out_buf, ofs, buf_len);
#ifdef POSIX_FADV_DONTNEED
/* The block will be needed on the next merge pass,
@@ -944,13 +1163,12 @@ row_merge_write(
posix_fadvise(fd, ofs, buf_len, POSIX_FADV_DONTNEED);
#endif /* POSIX_FADV_DONTNEED */
- return(UNIV_LIKELY(ret));
+ DBUG_RETURN(success);
}
/********************************************************************//**
Read a merge record.
-@return pointer to next record, or NULL on I/O error or end of list */
-UNIV_INTERN
+@return pointer to next record, or NULL on I/O error or end of list */
const byte*
row_merge_read_rec(
/*===============*/
@@ -963,7 +1181,7 @@ row_merge_read_rec(
const mrec_t** mrec, /*!< out: pointer to merge record,
or NULL on end of list
(non-NULL on I/O error) */
- ulint* offsets,/*!< out: offsets of mrec */
+ offset_t* offsets,/*!< out: offsets of mrec */
row_merge_block_t* crypt_block, /*!< in: crypt buf or NULL */
ulint space) /*!< in: space id */
{
@@ -974,23 +1192,21 @@ row_merge_read_rec(
ut_ad(b >= &block[0]);
ut_ad(b < &block[srv_sort_buf_size]);
- ut_ad(*offsets == 1 + REC_OFFS_HEADER_SIZE
+ ut_ad(rec_offs_get_n_alloc(offsets) == 1 + REC_OFFS_HEADER_SIZE
+ dict_index_get_n_fields(index));
+ DBUG_ENTER("row_merge_read_rec");
+
extra_size = *b++;
if (UNIV_UNLIKELY(!extra_size)) {
/* End of list */
*mrec = NULL;
-#ifdef UNIV_DEBUG
- if (row_merge_print_read) {
- fprintf(stderr, "row_merge_read %p,%p,%d," ULINTPF
- " EOF\n",
- (const void*) b, (const void*) block,
- fd, *foffs);
- }
-#endif /* UNIV_DEBUG */
- return(NULL);
+ DBUG_LOG("ib_merge_sort",
+ "read " << reinterpret_cast<const void*>(b) << ',' <<
+ reinterpret_cast<const void*>(block) << ',' <<
+ fd << ',' << *foffs << " EOF");
+ DBUG_RETURN(NULL);
}
if (extra_size >= 0x80) {
@@ -1003,7 +1219,7 @@ row_merge_read_rec(
err_exit:
/* Signal I/O error. */
*mrec = b;
- return(NULL);
+ DBUG_RETURN(NULL);
}
/* Wrap around to the beginning of the buffer. */
@@ -1082,14 +1298,8 @@ err_exit:
avail_size = &block[srv_sort_buf_size] - b;
memcpy(*buf, b, avail_size);
*mrec = *buf + extra_size;
-#ifdef UNIV_DEBUG
- /* We cannot invoke rec_offs_make_valid() here, because there
- are no REC_N_NEW_EXTRA_BYTES between extra_size and data_size.
- Similarly, rec_offs_validate() would fail, because it invokes
- rec_get_status(). */
- offsets[2] = (ulint) *mrec;
- offsets[3] = (ulint) index;
-#endif /* UNIV_DEBUG */
+
+ rec_init_offsets_temp(*mrec, index, offsets);
if (!row_merge_read(fd, ++(*foffs), block,
crypt_block,
@@ -1106,17 +1316,12 @@ err_exit:
b += extra_size + data_size - avail_size;
func_exit:
-#ifdef UNIV_DEBUG
- if (row_merge_print_read) {
- fprintf(stderr, "row_merge_read %p,%p,%d," ULINTPF " ",
- (const void*) b, (const void*) block,
- fd, *foffs);
- rec_print_comp(stderr, *mrec, offsets);
- putc('\n', stderr);
- }
-#endif /* UNIV_DEBUG */
-
- return(b);
+ DBUG_LOG("ib_merge_sort",
+ reinterpret_cast<const void*>(b) << ',' <<
+ reinterpret_cast<const void*>(block)
+ << ",fd=" << fd << ',' << *foffs << ": "
+ << rec_printer(*mrec, 0, offsets).str());
+ DBUG_RETURN(b);
}
/********************************************************************//**
@@ -1127,29 +1332,28 @@ row_merge_write_rec_low(
/*====================*/
byte* b, /*!< out: buffer */
ulint e, /*!< in: encoded extra_size */
-#ifdef UNIV_DEBUG
+#ifndef DBUG_OFF
ulint size, /*!< in: total size to write */
int fd, /*!< in: file descriptor */
ulint foffs, /*!< in: file offset */
-#endif /* UNIV_DEBUG */
+#endif /* !DBUG_OFF */
const mrec_t* mrec, /*!< in: record to write */
- const ulint* offsets)/*!< in: offsets of mrec */
-#ifndef UNIV_DEBUG
+ const offset_t* offsets)/*!< in: offsets of mrec */
+#ifdef DBUG_OFF
# define row_merge_write_rec_low(b, e, size, fd, foffs, mrec, offsets) \
row_merge_write_rec_low(b, e, mrec, offsets)
-#endif /* !UNIV_DEBUG */
+#endif /* DBUG_OFF */
{
-#ifdef UNIV_DEBUG
+ DBUG_ENTER("row_merge_write_rec_low");
+
+#ifndef DBUG_OFF
const byte* const end = b + size;
- ut_ad(e == rec_offs_extra_size(offsets) + 1);
+#endif /* DBUG_OFF */
+ DBUG_ASSERT(e == rec_offs_extra_size(offsets) + 1);
- if (row_merge_print_write) {
- fprintf(stderr, "row_merge_write %p,%d," ULINTPF " ",
- (void*) b, fd, foffs);
- rec_print_comp(stderr, mrec, offsets);
- putc('\n', stderr);
- }
-#endif /* UNIV_DEBUG */
+ DBUG_LOG("ib_merge_sort",
+ reinterpret_cast<const void*>(b) << ",fd=" << fd << ','
+ << foffs << ": " << rec_printer(mrec, 0, offsets).str());
if (e < 0x80) {
*b++ = (byte) e;
@@ -1159,12 +1363,13 @@ row_merge_write_rec_low(
}
memcpy(b, mrec - rec_offs_extra_size(offsets), rec_offs_size(offsets));
- ut_ad(b + rec_offs_size(offsets) == end);
+ DBUG_ASSERT(b + rec_offs_size(offsets) == end);
+ DBUG_VOID_RETURN;
}
/********************************************************************//**
Write a merge record.
-@return pointer to end of block, or NULL on error */
+@return pointer to end of block, or NULL on error */
static
byte*
row_merge_write_rec(
@@ -1175,7 +1380,7 @@ row_merge_write_rec(
int fd, /*!< in: file descriptor */
ulint* foffs, /*!< in/out: file offset */
const mrec_t* mrec, /*!< in: record to write */
- const ulint* offsets,/*!< in: offsets of mrec */
+ const offset_t* offsets,/*!< in: offsets of mrec */
row_merge_block_t* crypt_block, /*!< in: crypt buf or NULL */
ulint space) /*!< in: space id */
{
@@ -1235,7 +1440,7 @@ row_merge_write_rec(
/********************************************************************//**
Write an end-of-list marker.
-@return pointer to end of block, or NULL on error */
+@return pointer to end of block, or NULL on error */
static
byte*
row_merge_write_eof(
@@ -1251,12 +1456,12 @@ row_merge_write_eof(
ut_ad(b >= &block[0]);
ut_ad(b < &block[srv_sort_buf_size]);
ut_ad(foffs);
-#ifdef UNIV_DEBUG
- if (row_merge_print_write) {
- fprintf(stderr, "row_merge_write %p,%p,%d," ULINTPF " EOF\n",
- (void*) b, (void*) block, fd, *foffs);
- }
-#endif /* UNIV_DEBUG */
+
+ DBUG_ENTER("row_merge_write_eof");
+ DBUG_LOG("ib_merge_sort",
+ reinterpret_cast<const void*>(b) << ',' <<
+ reinterpret_cast<const void*>(block) <<
+ ",fd=" << fd << ',' << *foffs);
*b++ = 0;
UNIV_MEM_ASSERT_RW(&block[0], b - &block[0]);
@@ -1268,20 +1473,17 @@ row_merge_write_eof(
memset(b, 0xff, &block[srv_sort_buf_size] - b);
#endif /* UNIV_DEBUG_VALGRIND */
- if (!row_merge_write(fd, (*foffs)++, block,
- crypt_block,
- space)) {
- return(NULL);
+ if (!row_merge_write(fd, (*foffs)++, block, crypt_block, space)) {
+ DBUG_RETURN(NULL);
}
UNIV_MEM_INVALID(&block[0], srv_sort_buf_size);
-
- return(&block[0]);
+ DBUG_RETURN(&block[0]);
}
/** Create a temporary file if it has not been created already.
@param[in,out] tmpfd temporary file handle
-@param[in] path path to create temporary file
+@param[in] path location for creating temporary file
@return file descriptor, or -1 on failure */
static MY_ATTRIBUTE((warn_unused_result))
int
@@ -1291,6 +1493,9 @@ row_merge_tmpfile_if_needed(
{
if (*tmpfd < 0) {
*tmpfd = row_merge_file_create_low(path);
+ if (*tmpfd >= 0) {
+ MONITOR_ATOMIC_INC(MONITOR_ALTER_TABLE_SORT_FILES);
+ }
}
return(*tmpfd);
@@ -1298,9 +1503,8 @@ row_merge_tmpfile_if_needed(
/** Create a temporary file for merge sort if it was not created already.
@param[in,out] file merge file structure
-@param[in,out] tmpfd temporary file structure
@param[in] nrec number of records in the file
-@param[in] path path to create temporary files
+@param[in] path location for creating temporary file
@return file descriptor, or -1 on failure */
static MY_ATTRIBUTE((warn_unused_result))
int
@@ -1312,6 +1516,7 @@ row_merge_file_create_if_needed(
{
ut_ad(file->fd < 0 || *tmpfd >=0);
if (file->fd < 0 && row_merge_file_create(file, path) >= 0) {
+ MONITOR_ATOMIC_INC(MONITOR_ALTER_TABLE_SORT_FILES);
if (row_merge_tmpfile_if_needed(tmpfd, path) < 0) {
return(-1);
}
@@ -1323,6 +1528,117 @@ row_merge_file_create_if_needed(
return(file->fd);
}
+/** Copy the merge data tuple from another merge data tuple.
+@param[in] mtuple source merge data tuple
+@param[in,out] prev_mtuple destination merge data tuple
+@param[in] n_unique number of unique fields exist in the mtuple
+@param[in,out] heap memory heap where last_mtuple allocated */
+static
+void
+row_mtuple_create(
+ const mtuple_t* mtuple,
+ mtuple_t* prev_mtuple,
+ ulint n_unique,
+ mem_heap_t* heap)
+{
+ memcpy(prev_mtuple->fields, mtuple->fields,
+ n_unique * sizeof *mtuple->fields);
+
+ dfield_t* field = prev_mtuple->fields;
+
+ for (ulint i = 0; i < n_unique; i++) {
+ dfield_dup(field++, heap);
+ }
+}
+
+/** Compare two merge data tuples.
+@param[in] prev_mtuple merge data tuple
+@param[in] current_mtuple merge data tuple
+@param[in,out] dup reporter of duplicates
+@retval positive, 0, negative if current_mtuple is greater, equal, less, than
+last_mtuple. */
+static
+int
+row_mtuple_cmp(
+ const mtuple_t* prev_mtuple,
+ const mtuple_t* current_mtuple,
+ row_merge_dup_t* dup)
+{
+ ut_ad(dict_index_is_clust(dup->index));
+ const ulint n_unique = dict_index_get_n_unique(dup->index);
+
+ return(row_merge_tuple_cmp(
+ n_unique, n_unique, *current_mtuple, *prev_mtuple, dup));
+}
+
+/** Insert cached spatial index rows.
+@param[in] trx_id transaction id
+@param[in] sp_tuples cached spatial rows
+@param[in] num_spatial number of spatial indexes
+@param[in,out] row_heap heap for insert
+@param[in,out] sp_heap heap for tuples
+@param[in,out] pcur cluster index cursor
+@param[in,out] mtr mini transaction
+@return DB_SUCCESS or error number */
+static
+dberr_t
+row_merge_spatial_rows(
+ trx_id_t trx_id,
+ index_tuple_info_t** sp_tuples,
+ ulint num_spatial,
+ mem_heap_t* row_heap,
+ mem_heap_t* sp_heap,
+ btr_pcur_t* pcur,
+ mtr_t* mtr)
+{
+ dberr_t err = DB_SUCCESS;
+
+ if (sp_tuples == NULL) {
+ return(DB_SUCCESS);
+ }
+
+ ut_ad(sp_heap != NULL);
+
+ for (ulint j = 0; j < num_spatial; j++) {
+ err = sp_tuples[j]->insert(trx_id, row_heap, pcur, mtr);
+
+ if (err != DB_SUCCESS) {
+ return(err);
+ }
+ }
+
+ mem_heap_empty(sp_heap);
+
+ return(err);
+}
+
+/** Check if the geometry field is valid.
+@param[in] row the row
+@param[in] index spatial index
+@return true if it's valid, false if it's invalid. */
+static
+bool
+row_geo_field_is_valid(
+ const dtuple_t* row,
+ dict_index_t* index)
+{
+ const dict_field_t* ind_field
+ = dict_index_get_nth_field(index, 0);
+ const dict_col_t* col
+ = ind_field->col;
+ ulint col_no
+ = dict_col_get_no(col);
+ const dfield_t* dfield
+ = dtuple_get_nth_field(row, col_no);
+
+ if (dfield_is_null(dfield)
+ || dfield_get_len(dfield) < GEO_DATA_HEADER_SIZE) {
+ return(false);
+ }
+
+ return(true);
+}
+
/** Reads clustered index of the table and create temporary files
containing the index entries for the indexes to be built.
@param[in] trx transaction
@@ -1340,16 +1656,24 @@ containing the index entries for the indexes to be built.
@param[in] key_numbers MySQL key numbers to create
@param[in] n_index number of indexes to create
@param[in] add_cols default values of added columns, or NULL
+@param[in] add_v newly added virtual columns along with indexes
@param[in] col_map mapping of old column numbers to new ones, or
- NULL if old_table == new_table
+NULL if old_table == new_table
@param[in] add_autoinc number of added AUTO_INCREMENT columns, or
- ULINT_UNDEFINED if none is added
-@param[in,out] sequence autoinc sequence
+ULINT_UNDEFINED if none is added
+@param[in,out] sequence autoinc sequence
@param[in,out] block file buffer
+@param[in] skip_pk_sort whether the new PRIMARY KEY will follow
+existing order
@param[in,out] tmpfd temporary file handle
+@param[in,out] stage performance schema accounting object, used by
+ALTER TABLE. stage->n_pk_recs_inc() will be called for each record read and
+stage->inc() will be called for each page read.
@param[in] pct_cost percent of task weight out of total alter job
@param[in,out] crypt_block crypted file buffer
-@return DB_SUCCESS or error */
+@param[in] eval_table mysql table used to evaluate virtual column
+ value, see innobase_get_computed_value().
+@return DB_SUCCESS or error */
static MY_ATTRIBUTE((warn_unused_result))
dberr_t
row_merge_read_clustered_index(
@@ -1365,18 +1689,24 @@ row_merge_read_clustered_index(
const ulint* key_numbers,
ulint n_index,
const dtuple_t* add_cols,
+ const dict_add_v_col_t* add_v,
const ulint* col_map,
ulint add_autoinc,
ib_sequence_t& sequence,
row_merge_block_t* block,
+ bool skip_pk_sort,
int* tmpfd,
- float pct_cost,
- row_merge_block_t* crypt_block)
+ ut_stage_alter_t* stage,
+ double pct_cost,
+ row_merge_block_t* crypt_block,
+ struct TABLE* eval_table)
{
dict_index_t* clust_index; /* Clustered index */
- mem_heap_t* row_heap; /* Heap memory to create
+ mem_heap_t* row_heap = NULL;/* Heap memory to create
clustered index tuples */
row_merge_buf_t** merge_buf; /* Temporary list for records*/
+ mem_heap_t* v_heap = NULL; /* Heap memory to process large
+ data for virtual column */
btr_pcur_t pcur; /* Cursor on the clustered
index */
mtr_t mtr; /* Mini transaction */
@@ -1390,12 +1720,18 @@ row_merge_read_clustered_index(
ibool add_doc_id = FALSE;
os_event_t fts_parallel_sort_event = NULL;
ibool fts_pll_sort = FALSE;
- ib_int64_t sig_count = 0;
+ int64_t sig_count = 0;
+ index_tuple_info_t** sp_tuples = NULL;
+ mem_heap_t* sp_heap = NULL;
+ ulint num_spatial = 0;
+ BtrBulk* clust_btr_bulk = NULL;
+ bool clust_temp_file = false;
+ mem_heap_t* mtuple_heap = NULL;
+ mtuple_t prev_mtuple;
mem_heap_t* conv_heap = NULL;
-
- float curr_progress = 0.0;
- ib_int64_t read_rows = 0;
- ib_int64_t table_total_rows = 0;
+ double curr_progress = 0.0;
+ ib_uint64_t read_rows = 0;
+ ib_uint64_t table_total_rows = 0;
DBUG_ENTER("row_merge_read_clustered_index");
@@ -1414,13 +1750,22 @@ row_merge_read_clustered_index(
DEBUG_FTS_SORT_PRINT("FTS_SORT: Start Create Index\n");
#endif
- ut_ad(trx->mysql_thd != NULL);
- const char* path = thd_innodb_tmpdir(trx->mysql_thd);
-
/* Create and initialize memory for record buffers */
merge_buf = static_cast<row_merge_buf_t**>(
- mem_alloc(n_index * sizeof *merge_buf));
+ ut_malloc_nokey(n_index * sizeof *merge_buf));
+
+ row_merge_dup_t clust_dup = {index[0], table, col_map, 0};
+ dfield_t* prev_fields;
+ const ulint n_uniq = dict_index_get_n_unique(index[0]);
+
+ ut_ad(trx->mysql_thd != NULL);
+
+ const char* path = thd_innodb_tmpdir(trx->mysql_thd);
+
+ ut_ad(!skip_pk_sort || dict_index_is_clust(index[0]));
+ /* There is no previous tuple yet. */
+ prev_mtuple.fields = NULL;
for (ulint i = 0; i < n_index; i++) {
if (index[i]->type & DICT_FTS) {
@@ -1450,10 +1795,37 @@ row_merge_read_clustered_index(
fts_parallel_sort_event =
psort_info[0].psort_common->sort_event;
} else {
+ if (dict_index_is_spatial(index[i])) {
+ num_spatial++;
+ }
+
merge_buf[i] = row_merge_buf_create(index[i]);
}
}
+ if (num_spatial > 0) {
+ ulint count = 0;
+
+ sp_heap = mem_heap_create(512);
+
+ sp_tuples = static_cast<index_tuple_info_t**>(
+ ut_malloc_nokey(num_spatial
+ * sizeof(*sp_tuples)));
+
+ for (ulint i = 0; i < n_index; i++) {
+ if (dict_index_is_spatial(index[i])) {
+ sp_tuples[count]
+ = UT_NEW_NOKEY(
+ index_tuple_info_t(
+ sp_heap,
+ index[i]));
+ count++;
+ }
+ }
+
+ ut_ad(count == num_spatial);
+ }
+
mtr_start(&mtr);
/* Find the clustered index and create a persistent cursor
@@ -1471,7 +1843,7 @@ row_merge_read_clustered_index(
do not violate the added NOT NULL constraints. */
nonnull = static_cast<ulint*>(
- mem_alloc(dict_table_get_n_cols(new_table)
+ ut_malloc_nokey(dict_table_get_n_cols(new_table)
* sizeof *nonnull));
for (ulint i = 0; i < dict_table_get_n_cols(old_table); i++) {
@@ -1494,7 +1866,7 @@ row_merge_read_clustered_index(
}
if (!n_nonnull) {
- mem_free(nonnull);
+ ut_free(nonnull);
nonnull = NULL;
}
}
@@ -1506,27 +1878,39 @@ row_merge_read_clustered_index(
conv_heap = mem_heap_create(sizeof(mrec_buf_t));
}
+ if (skip_pk_sort) {
+ prev_fields = static_cast<dfield_t*>(
+ ut_malloc_nokey(n_uniq * sizeof *prev_fields));
+ mtuple_heap = mem_heap_create(sizeof(mrec_buf_t));
+ } else {
+ prev_fields = NULL;
+ }
+
/* Scan the clustered index. */
for (;;) {
- const rec_t* rec;
- ulint* offsets;
- const dtuple_t* row;
- row_ext_t* ext;
- page_cur_t* cur = btr_pcur_get_page_cur(&pcur);
-
- mem_heap_empty(row_heap);
-
/* Do not continue if table pages are still encrypted */
- if (!old_table->is_readable() ||
- !new_table->is_readable()) {
+ if (!old_table->is_readable() || !new_table->is_readable()) {
err = DB_DECRYPTION_FAILED;
trx->error_key_num = 0;
goto func_exit;
}
+ const rec_t* rec;
+ offset_t* offsets;
+ dtuple_t* row;
+ row_ext_t* ext;
+ page_cur_t* cur = btr_pcur_get_page_cur(&pcur);
+
+ mem_heap_empty(row_heap);
+
page_cur_move_to_next(cur);
+ stage->n_pk_recs_inc();
+
if (page_cur_is_after_last(cur)) {
+
+ stage->inc();
+
if (UNIV_UNLIKELY(trx_is_interrupted(trx))) {
err = DB_INTERRUPTED;
trx->error_key_num = 0;
@@ -1540,6 +1924,7 @@ row_merge_read_clustered_index(
goto func_exit;
}
}
+
#ifdef DBUG_OFF
# define dbug_run_purge false
#else /* DBUG_OFF */
@@ -1549,9 +1934,21 @@ row_merge_read_clustered_index(
"ib_purge_on_create_index_page_switch",
dbug_run_purge = true;);
+ /* Insert the cached spatial index rows. */
+ err = row_merge_spatial_rows(
+ trx->id, sp_tuples, num_spatial,
+ row_heap, sp_heap, &pcur, &mtr);
+
+ if (err != DB_SUCCESS) {
+ goto func_exit;
+ }
+
+ if (!mtr.is_active()) {
+ goto scan_next;
+ }
+
if (dbug_run_purge
- || rw_lock_get_waiters(
- dict_index_get_lock(clust_index))) {
+ || dict_index_get_lock(clust_index)->waiters) {
/* There are waiters on the clustered
index tree lock, likely the purge
thread. Store and restore the cursor
@@ -1565,8 +1962,8 @@ row_merge_read_clustered_index(
/* Leaf pages must never be empty, unless
this is the only page in the index tree. */
ut_ad(btr_pcur_is_on_user_rec(&pcur)
- || buf_block_get_page_no(
- btr_pcur_get_block(&pcur))
+ || btr_pcur_get_block(
+ &pcur)->page.id.page_no()
== clust_index->page);
btr_pcur_store_position(&pcur, &mtr);
@@ -1586,7 +1983,7 @@ row_merge_read_clustered_index(
/* Give the waiters a chance to proceed. */
os_thread_yield();
-
+scan_next:
mtr_start(&mtr);
/* Restore position on the record, or its
predecessor if the record was purged
@@ -1601,9 +1998,9 @@ end_of_index:
row = NULL;
mtr_commit(&mtr);
mem_heap_free(row_heap);
- if (nonnull) {
- mem_free(nonnull);
- }
+ row_heap = NULL;
+ ut_free(nonnull);
+ nonnull = NULL;
goto write_buffers;
}
} else {
@@ -1611,7 +2008,7 @@ end_of_index:
buf_block_t* block;
next_page_no = btr_page_get_next(
- page_cur_get_page(cur), &mtr);
+ page_cur_get_page(cur));
if (next_page_no == FIL_NULL) {
goto end_of_index;
@@ -1619,9 +2016,10 @@ end_of_index:
block = page_cur_get_block(cur);
block = btr_block_get(
- buf_block_get_space(block),
- buf_block_get_zip_size(block),
- next_page_no, BTR_SEARCH_LEAF,
+ page_id_t(block->page.id.space(),
+ next_page_no),
+ block->page.size,
+ BTR_SEARCH_LEAF,
clust_index, &mtr);
btr_leaf_page_release(page_cur_get_block(cur),
@@ -1635,10 +2033,10 @@ end_of_index:
rec = page_cur_get_rec(cur);
- offsets = rec_get_offsets(rec, clust_index, NULL,
- ULINT_UNDEFINED, &row_heap);
-
if (online) {
+ offsets = rec_get_offsets(rec, clust_index, NULL, true,
+ ULINT_UNDEFINED, &row_heap);
+
/* Perform a REPEATABLE READ.
When rebuilding the table online,
@@ -1658,18 +2056,18 @@ end_of_index:
ONLINE_INDEX_COMPLETE state between the time
the DML thread has updated the clustered index
but has not yet accessed secondary index. */
- ut_ad(trx->read_view);
+ ut_ad(MVCC::is_view_active(trx->read_view));
- if (!read_view_sees_trx_id(
- trx->read_view,
+ if (!trx->read_view->changes_visible(
row_get_rec_trx_id(
- rec, clust_index, offsets))) {
+ rec, clust_index, offsets),
+ old_table->name)) {
rec_t* old_vers;
row_vers_build_for_consistent_read(
rec, &mtr, clust_index, &offsets,
trx->read_view, &row_heap,
- row_heap, &old_vers);
+ row_heap, &old_vers, NULL);
rec = old_vers;
@@ -1681,6 +2079,10 @@ end_of_index:
if (rec_get_deleted_flag(
rec,
dict_table_is_comp(old_table))) {
+ /* In delete-marked records, DB_TRX_ID must
+ always refer to an existing undo log record. */
+ ut_ad(row_get_rec_trx_id(rec, clust_index,
+ offsets));
/* This record was deleted in the latest
committed version, or it was deleted and
then reinserted-by-update before purge
@@ -1691,6 +2093,9 @@ end_of_index:
ut_ad(!rec_offs_any_null_extern(rec, offsets));
} else if (rec_get_deleted_flag(
rec, dict_table_is_comp(old_table))) {
+ /* In delete-marked records, DB_TRX_ID must
+ always refer to an existing undo log record. */
+ ut_ad(rec_get_trx_id(rec, clust_index));
/* Skip delete-marked records.
Skipping delete-marked records will make the
@@ -1700,6 +2105,9 @@ end_of_index:
would make it tricky to detect duplicate
keys. */
continue;
+ } else {
+ offsets = rec_get_offsets(rec, clust_index, NULL, true,
+ ULINT_UNDEFINED, &row_heap);
}
/* When !online, we are holding a lock on old_table, preventing
@@ -1709,9 +2117,10 @@ end_of_index:
/* Build a row based on the clustered index. */
- row = row_build(ROW_COPY_POINTERS, clust_index,
- rec, offsets, new_table,
- add_cols, col_map, &ext, row_heap);
+ row = row_build_w_add_vcol(ROW_COPY_POINTERS, clust_index,
+ rec, offsets, new_table,
+ add_cols, add_v, col_map, &ext,
+ row_heap);
ut_ad(row);
for (ulint i = 0; i < n_nonnull; i++) {
@@ -1738,9 +2147,8 @@ end_of_index:
ut_ad(add_autoinc
< dict_table_get_n_user_cols(new_table));
- const dfield_t* dfield;
-
- dfield = dtuple_get_nth_field(row, add_autoinc);
+ dfield_t* dfield = dtuple_get_nth_field(row,
+ add_autoinc);
if (dfield_is_null(dfield)) {
goto write_buffers;
}
@@ -1790,25 +2198,60 @@ write_buffers:
/* Build all entries for all the indexes to be created
in a single scan of the clustered index. */
- for (ulint i = 0; i < n_index; i++) {
+ ulint s_idx_cnt = 0;
+ bool skip_sort = skip_pk_sort
+ && dict_index_is_clust(merge_buf[0]->index);
+
+ for (ulint k = 0, i = 0; i < n_index; i++, skip_sort = false) {
row_merge_buf_t* buf = merge_buf[i];
- merge_file_t* file = &files[i];
ulint rows_added = 0;
- bool exceed_page = false;
+
+ if (dict_index_is_spatial(buf->index)) {
+ if (!row) {
+ continue;
+ }
+
+ ut_ad(sp_tuples[s_idx_cnt]->get_index()
+ == buf->index);
+
+ /* If the geometry field is invalid, report
+ error. */
+ if (!row_geo_field_is_valid(row, buf->index)) {
+ err = DB_CANT_CREATE_GEOMETRY_OBJECT;
+ break;
+ }
+
+ sp_tuples[s_idx_cnt]->add(row, ext);
+ s_idx_cnt++;
+
+ continue;
+ }
+
+ merge_file_t* file = &files[k++];
if (UNIV_LIKELY
(row && (rows_added = row_merge_buf_add(
- buf, fts_index, old_table,
+ buf, fts_index, old_table, new_table,
psort_info, row, ext, &doc_id,
- conv_heap, &exceed_page)))) {
+ conv_heap, &err,
+ &v_heap, eval_table, trx)))) {
+
+ /* Set the page flush observer for the
+ transaction when buffering the very first
+ record for a non-redo-logged operation. */
+ if (file->n_rec == 0 && i == 0
+ && innodb_log_optimize_ddl) {
+ trx->set_flush_observer(
+ new_table->space, stage);
+ }
/* If we are creating FTS index,
a single row can generate more
records for tokenized word */
file->n_rec += rows_added;
- if (exceed_page) {
- err = DB_TOO_BIG_RECORD;
+ if (err != DB_SUCCESS) {
+ ut_ad(err == DB_TOO_BIG_RECORD);
break;
}
@@ -1818,8 +2261,10 @@ write_buffers:
if (buf->index->type & DICT_FTS) {
/* Check if error occurs in child thread */
- for (ulint j = 0; j < fts_sort_pll_degree; j++) {
- if (psort_info[j].error != DB_SUCCESS) {
+ for (ulint j = 0;
+ j < fts_sort_pll_degree; j++) {
+ if (psort_info[j].error
+ != DB_SUCCESS) {
err = psort_info[j].error;
trx->error_key_num = i;
break;
@@ -1831,9 +2276,39 @@ write_buffers:
}
}
+ if (skip_sort) {
+ ut_ad(buf->n_tuples > 0);
+ const mtuple_t* curr =
+ &buf->tuples[buf->n_tuples - 1];
+
+ ut_ad(i == 0);
+ ut_ad(dict_index_is_clust(merge_buf[0]->index));
+ /* Detect duplicates by comparing the
+ current record with previous record.
+ When temp file is not used, records
+ should be in sorted order. */
+ if (prev_mtuple.fields != NULL
+ && (row_mtuple_cmp(
+ &prev_mtuple, curr,
+ &clust_dup) == 0)) {
+
+ err = DB_DUPLICATE_KEY;
+ trx->error_key_num
+ = key_numbers[0];
+ goto func_exit;
+ }
+
+ prev_mtuple.fields = curr->fields;
+ }
+
continue;
}
+ if (err == DB_COMPUTE_VALUE_FAILED) {
+ trx->error_key_num = i;
+ goto func_exit;
+ }
+
if (buf->index->type & DICT_FTS) {
if (!row || !doc_id) {
continue;
@@ -1848,10 +2323,119 @@ write_buffers:
ut_ad(buf->n_tuples || row == NULL);
/* We have enough data tuples to form a block.
- Sort them and write to disk. */
+ Sort them and write to disk if temp file is used
+ or insert into index if temp file is not used. */
+ ut_ad(old_table == new_table
+ ? !dict_index_is_clust(buf->index)
+ : (i == 0) == dict_index_is_clust(buf->index));
+
+ /* We have enough data tuples to form a block.
+ Sort them (if !skip_sort) and write to disk. */
if (buf->n_tuples) {
- if (dict_index_is_unique(buf->index)) {
+ if (skip_sort) {
+ /* Temporary File is not used.
+ so insert sorted block to the index */
+ if (row != NULL) {
+ /* We have to do insert the
+ cached spatial index rows, since
+ after the mtr_commit, the cluster
+ index page could be updated, then
+ the data in cached rows become
+ invalid. */
+ err = row_merge_spatial_rows(
+ trx->id, sp_tuples,
+ num_spatial,
+ row_heap, sp_heap,
+ &pcur, &mtr);
+
+ if (err != DB_SUCCESS) {
+ goto func_exit;
+ }
+
+ /* We are not at the end of
+ the scan yet. We must
+ mtr_commit() in order to be
+ able to call log_free_check()
+ in row_merge_insert_index_tuples().
+ Due to mtr_commit(), the
+ current row will be invalid, and
+ we must reread it on the next
+ loop iteration. */
+ if (mtr.is_active()) {
+ btr_pcur_move_to_prev_on_page(
+ &pcur);
+ btr_pcur_store_position(
+ &pcur, &mtr);
+
+ mtr.commit();
+ }
+ }
+
+ mem_heap_empty(mtuple_heap);
+ prev_mtuple.fields = prev_fields;
+
+ row_mtuple_create(
+ &buf->tuples[buf->n_tuples - 1],
+ &prev_mtuple, n_uniq,
+ mtuple_heap);
+
+ if (clust_btr_bulk == NULL) {
+ clust_btr_bulk = UT_NEW_NOKEY(
+ BtrBulk(index[i],
+ trx,
+ trx->get_flush_observer()));
+ } else {
+ clust_btr_bulk->latch();
+ }
+
+ err = row_merge_insert_index_tuples(
+ index[i], old_table,
+ -1, NULL, buf, clust_btr_bulk,
+ table_total_rows,
+ curr_progress,
+ pct_cost,
+ crypt_block,
+ new_table->space);
+
+ if (row == NULL) {
+ err = clust_btr_bulk->finish(
+ err);
+ UT_DELETE(clust_btr_bulk);
+ clust_btr_bulk = NULL;
+ } else {
+ /* Release latches for possible
+ log_free_chck in spatial index
+ build. */
+ clust_btr_bulk->release();
+ }
+
+ if (err != DB_SUCCESS) {
+ break;
+ }
+
+ if (row != NULL) {
+ /* Restore the cursor on the
+ previous clustered index record,
+ and empty the buffer. The next
+ iteration of the outer loop will
+ advance the cursor and read the
+ next record (the one which we
+ had to ignore due to the buffer
+ overflow). */
+ mtr_start(&mtr);
+ btr_pcur_restore_position(
+ BTR_SEARCH_LEAF, &pcur,
+ &mtr);
+ buf = row_merge_buf_empty(buf);
+ merge_buf[i] = buf;
+ /* Restart the outer loop on the
+ record. We did not insert it
+ into any index yet. */
+ ut_ad(i == 0);
+ break;
+ }
+ } else if (dict_index_is_unique(buf->index)) {
row_merge_dup_t dup = {
buf->index, table, col_map, 0};
@@ -1891,30 +2475,83 @@ write_buffers:
dict_index_get_lock(buf->index));
}
- if (buf->n_tuples > 0) {
+ /* Secondary index and clustered index which is
+ not in sorted order can use the temporary file.
+ Fulltext index should not use the temporary file. */
+ if (!skip_sort && !(buf->index->type & DICT_FTS)) {
+ /* In case we can have all rows in sort buffer,
+ we can insert directly into the index without
+ temporary file if clustered index does not uses
+ temporary file. */
+ if (row == NULL && file->fd == -1
+ && !clust_temp_file) {
+ DBUG_EXECUTE_IF(
+ "row_merge_write_failure",
+ err = DB_TEMP_FILE_WRITE_FAIL;
+ trx->error_key_num = i;
+ goto all_done;);
+
+ DBUG_EXECUTE_IF(
+ "row_merge_tmpfile_fail",
+ err = DB_OUT_OF_MEMORY;
+ trx->error_key_num = i;
+ goto all_done;);
+
+ BtrBulk btr_bulk(
+ index[i], trx,
+ trx->get_flush_observer());
+
+ err = row_merge_insert_index_tuples(
+ index[i], old_table,
+ -1, NULL, buf, &btr_bulk,
+ table_total_rows,
+ curr_progress,
+ pct_cost,
+ crypt_block,
+ new_table->space);
+
+ err = btr_bulk.finish(err);
+
+ DBUG_EXECUTE_IF(
+ "row_merge_insert_big_row",
+ err = DB_TOO_BIG_RECORD;);
- if (row_merge_file_create_if_needed(
- file, tmpfd, buf->n_tuples, path) < 0) {
- err = DB_OUT_OF_MEMORY;
- trx->error_key_num = i;
- break;
- }
+ if (err != DB_SUCCESS) {
+ break;
+ }
+ } else {
+ if (row_merge_file_create_if_needed(
+ file, tmpfd,
+ buf->n_tuples, path) < 0) {
+ err = DB_OUT_OF_MEMORY;
+ trx->error_key_num = i;
+ break;
+ }
- ut_ad(file->n_rec > 0);
+ /* Ensure that duplicates in the
+ clustered index will be detected before
+ inserting secondary index records. */
+ if (dict_index_is_clust(buf->index)) {
+ clust_temp_file = true;
+ }
- row_merge_buf_write(buf, file, block);
+ ut_ad(file->n_rec > 0);
- if (!row_merge_write(file->fd, file->offset++,
- block, crypt_block,
- new_table->space)) {
- err = DB_TEMP_FILE_WRITE_FAILURE;
- trx->error_key_num = i;
- break;
- }
- }
+ row_merge_buf_write(buf, file, block);
- UNIV_MEM_INVALID(&block[0], srv_sort_buf_size);
+ if (!row_merge_write(
+ file->fd, file->offset++,
+ block, crypt_block,
+ new_table->space)) {
+ err = DB_TEMP_FILE_WRITE_FAIL;
+ trx->error_key_num = i;
+ break;
+ }
+ UNIV_MEM_INVALID(
+ &block[0], srv_sort_buf_size);
+ }
+ }
merge_buf[i] = row_merge_buf_empty(buf);
buf = merge_buf[i];
@@ -1926,16 +2563,15 @@ write_buffers:
if (UNIV_UNLIKELY
(!(rows_added = row_merge_buf_add(
buf, fts_index, old_table,
- psort_info, row, ext,
+ new_table, psort_info, row, ext,
&doc_id, conv_heap,
- &exceed_page)))) {
+ &err, &v_heap, eval_table, trx)))) {
/* An empty buffer should have enough
room for at least one record. */
ut_error;
}
- if (exceed_page) {
- err = DB_TOO_BIG_RECORD;
+ if (err != DB_SUCCESS) {
break;
}
@@ -1951,6 +2587,10 @@ write_buffers:
goto func_exit;
}
+ if (v_heap) {
+ mem_heap_empty(v_heap);
+ }
+
/* Increment innodb_onlineddl_pct_progress status variable */
read_rows++;
if(read_rows % 1000 == 0) {
@@ -1959,20 +2599,37 @@ write_buffers:
pct_cost :
((pct_cost * read_rows) / table_total_rows);
/* presenting 10.12% as 1012 integer */
- onlineddl_pct_progress = curr_progress * 100;
+ onlineddl_pct_progress = (ulint) (curr_progress * 100);
}
}
func_exit:
- mtr_commit(&mtr);
+ if (mtr.is_active()) {
+ mtr_commit(&mtr);
+ }
+ if (row_heap) {
+ mem_heap_free(row_heap);
+ }
+ ut_free(nonnull);
- mem_heap_free(row_heap);
+all_done:
+ if (clust_btr_bulk != NULL) {
+ ut_ad(err != DB_SUCCESS);
+ clust_btr_bulk->latch();
+ err = clust_btr_bulk->finish(
+ err);
+ UT_DELETE(clust_btr_bulk);
+ }
- if (nonnull) {
- mem_free(nonnull);
+ if (prev_fields != NULL) {
+ ut_free(prev_fields);
+ mem_heap_free(mtuple_heap);
+ }
+
+ if (v_heap) {
+ mem_heap_free(v_heap);
}
-all_done:
if (conv_heap != NULL) {
mem_heap_free(conv_heap);
}
@@ -2035,11 +2692,9 @@ wait_again:
} while (!all_exit && trial_count < max_trial_count);
if (!all_exit) {
- ut_ad(0);
- ib_logf(IB_LOG_LEVEL_FATAL,
- "Not all child sort threads exited"
- " when creating FTS index '%s'",
- fts_sort_idx->name);
+ ib::fatal() << "Not all child sort threads exited"
+ " when creating FTS index '"
+ << fts_sort_idx->name << "'";
}
}
@@ -2052,17 +2707,27 @@ wait_again:
row_fts_free_pll_merge_buf(psort_info);
- mem_free(merge_buf);
+ ut_free(merge_buf);
btr_pcur_close(&pcur);
+ if (sp_tuples != NULL) {
+ for (ulint i = 0; i < num_spatial; i++) {
+ UT_DELETE(sp_tuples[i]);
+ }
+ ut_free(sp_tuples);
+
+ if (sp_heap) {
+ mem_heap_free(sp_heap);
+ }
+ }
+
/* Update the next Doc ID we used. Table should be locked, so
no concurrent DML */
if (max_doc_id && err == DB_SUCCESS) {
/* Sync fts cache for other fts indexes to keep all
fts indexes consistent in sync_doc_id. */
- err = fts_sync_table(const_cast<dict_table_t*>(new_table),
- false, true, false);
+ err = fts_sync_table(const_cast<dict_table_t*>(new_table));
if (err == DB_SUCCESS) {
fts_update_next_doc_id(NULL, new_table, max_doc_id);
@@ -2075,10 +2740,10 @@ wait_again:
}
/** Write a record via buffer 2 and read the next record to buffer N.
-@param N number of the buffer (0 or 1)
-@param INDEX record descriptor
-@param AT_END statement to execute at end of input */
-#define ROW_MERGE_WRITE_GET_NEXT(N, INDEX, AT_END) \
+@param N number of the buffer (0 or 1)
+@param INDEX record descriptor
+@param AT_END statement to execute at end of input */
+#define ROW_MERGE_WRITE_GET_NEXT_LOW(N, INDEX, AT_END) \
do { \
b2 = row_merge_write_rec(&block[2 * srv_sort_buf_size], \
&buf[2], b2, \
@@ -2104,26 +2769,44 @@ wait_again:
} \
} while (0)
-/*************************************************************//**
-Merge two blocks of records on disk and write a bigger block.
-@return DB_SUCCESS or error code */
-static __attribute__((warn_unused_result))
+#ifdef HAVE_PSI_STAGE_INTERFACE
+#define ROW_MERGE_WRITE_GET_NEXT(N, INDEX, AT_END) \
+ do { \
+ if (stage != NULL) { \
+ stage->inc(); \
+ } \
+ ROW_MERGE_WRITE_GET_NEXT_LOW(N, INDEX, AT_END); \
+ } while (0)
+#else /* HAVE_PSI_STAGE_INTERFACE */
+#define ROW_MERGE_WRITE_GET_NEXT(N, INDEX, AT_END) \
+ ROW_MERGE_WRITE_GET_NEXT_LOW(N, INDEX, AT_END)
+#endif /* HAVE_PSI_STAGE_INTERFACE */
+
+/** Merge two blocks of records on disk and write a bigger block.
+@param[in] dup descriptor of index being created
+@param[in] file file containing index entries
+@param[in,out] block 3 buffers
+@param[in,out] foffs0 offset of first source list in the file
+@param[in,out] foffs1 offset of second source list in the file
+@param[in,out] of output file
+@param[in,out] stage performance schema accounting object, used by
+@param[in,out] crypt_block encryption buffer
+@param[in] space tablespace ID for encryption
+ALTER TABLE. If not NULL stage->inc() will be called for each record
+processed.
+@return DB_SUCCESS or error code */
+static MY_ATTRIBUTE((warn_unused_result))
dberr_t
row_merge_blocks(
-/*=============*/
- const row_merge_dup_t* dup, /*!< in: descriptor of
- index being created */
- const merge_file_t* file, /*!< in: file containing
- index entries */
- row_merge_block_t* block, /*!< in/out: 3 buffers */
- ulint* foffs0, /*!< in/out: offset of first
- source list in the file */
- ulint* foffs1, /*!< in/out: offset of second
- source list in the file */
- merge_file_t* of, /*!< in/out: output file */
- row_merge_block_t* crypt_block,/*!< in: in/out: crypted file
- buffer */
- ulint space) /*!< in: space id */
+ const row_merge_dup_t* dup,
+ const merge_file_t* file,
+ row_merge_block_t* block,
+ ulint* foffs0,
+ ulint* foffs1,
+ merge_file_t* of,
+ ut_stage_alter_t* stage,
+ row_merge_block_t* crypt_block,
+ ulint space)
{
mem_heap_t* heap; /*!< memory heap for offsets0, offsets1 */
@@ -2135,20 +2818,13 @@ row_merge_blocks(
const mrec_t* mrec0; /*!< merge rec, points to block[0] or buf[0] */
const mrec_t* mrec1; /*!< merge rec, points to
block[srv_sort_buf_size] or buf[1] */
- ulint* offsets0;/* offsets of mrec0 */
- ulint* offsets1;/* offsets of mrec1 */
+ offset_t* offsets0;/* offsets of mrec0 */
+ offset_t* offsets1;/* offsets of mrec1 */
-#ifdef UNIV_DEBUG
- if (row_merge_print_block) {
- fprintf(stderr,
- "row_merge_blocks fd=%d ofs=" ULINTPF
- " + fd=%d ofs=" ULINTPF
- " = fd=%d ofs=" ULINTPF "\n",
- file->fd, *foffs0,
- file->fd, *foffs1,
- of->fd, of->offset);
- }
-#endif /* UNIV_DEBUG */
+ DBUG_ENTER("row_merge_blocks");
+ DBUG_LOG("ib_merge_sort",
+ "fd=" << file->fd << ',' << *foffs0 << '+' << *foffs1
+ << " to fd=" << of->fd << ',' << of->offset);
heap = row_merge_heap_create(dup->index, &buf, &offsets0, &offsets1);
@@ -2163,7 +2839,7 @@ row_merge_blocks(
space)) {
corrupt:
mem_heap_free(heap);
- return(DB_CORRUPTION);
+ DBUG_RETURN(DB_CORRUPTION);
}
b0 = &block[0];
@@ -2190,20 +2866,16 @@ corrupt:
}
while (mrec0 && mrec1) {
- switch (cmp_rec_rec_simple(
- mrec0, mrec1, offsets0, offsets1,
- dup->index, dup->table)) {
- case 0:
- mem_heap_free(heap);
- return(DB_DUPLICATE_KEY);
- case -1:
+ int cmp = cmp_rec_rec_simple(
+ mrec0, mrec1, offsets0, offsets1,
+ dup->index, dup->table);
+ if (cmp < 0) {
ROW_MERGE_WRITE_GET_NEXT(0, dup->index, goto merged);
- break;
- case 1:
+ } else if (cmp) {
ROW_MERGE_WRITE_GET_NEXT(1, dup->index, goto merged);
- break;
- default:
- ut_error;
+ } else {
+ mem_heap_free(heap);
+ DBUG_RETURN(DB_DUPLICATE_KEY);
}
}
@@ -2225,28 +2897,37 @@ done1:
mem_heap_free(heap);
- b2 = row_merge_write_eof(&block[2 * srv_sort_buf_size],
+ b2 = row_merge_write_eof(
+ &block[2 * srv_sort_buf_size],
b2, of->fd, &of->offset,
crypt_block ? &crypt_block[2 * srv_sort_buf_size] : NULL,
space);
-
- return(b2 ? DB_SUCCESS : DB_CORRUPTION);
+ DBUG_RETURN(b2 ? DB_SUCCESS : DB_CORRUPTION);
}
-/*************************************************************//**
-Copy a block of index entries.
-@return TRUE on success, FALSE on failure */
-static __attribute__((warn_unused_result))
-bool
+/** Copy a block of index entries.
+@param[in] index index being created
+@param[in] file input file
+@param[in,out] block 3 buffers
+@param[in,out] foffs0 input file offset
+@param[in,out] of output file
+@param[in,out] stage performance schema accounting object, used by
+@param[in,out] crypt_block encryption buffer
+@param[in] space tablespace ID for encryption
+ALTER TABLE. If not NULL stage->inc() will be called for each record
+processed.
+@return TRUE on success, FALSE on failure */
+static MY_ATTRIBUTE((warn_unused_result))
+ibool
row_merge_blocks_copy(
-/*==================*/
- const dict_index_t* index, /*!< in: index being created */
- const merge_file_t* file, /*!< in: input file */
- row_merge_block_t* block, /*!< in/out: 3 buffers */
- ulint* foffs0, /*!< in/out: input file offset */
- merge_file_t* of, /*!< in/out: output file */
- row_merge_block_t* crypt_block, /*!< in: crypt buf or NULL */
- ulint space) /*!< in: space id */
+ const dict_index_t* index,
+ const merge_file_t* file,
+ row_merge_block_t* block,
+ ulint* foffs0,
+ merge_file_t* of,
+ ut_stage_alter_t* stage,
+ row_merge_block_t* crypt_block,
+ ulint space)
{
mem_heap_t* heap; /*!< memory heap for offsets0, offsets1 */
@@ -2255,18 +2936,13 @@ row_merge_blocks_copy(
const byte* b0; /*!< pointer to block[0] */
byte* b2; /*!< pointer to block[2 * srv_sort_buf_size] */
const mrec_t* mrec0; /*!< merge rec, points to block[0] */
- ulint* offsets0;/* offsets of mrec0 */
- ulint* offsets1;/* dummy offsets */
+ offset_t* offsets0;/* offsets of mrec0 */
+ offset_t* offsets1;/* dummy offsets */
-#ifdef UNIV_DEBUG
- if (row_merge_print_block) {
- fprintf(stderr,
- "row_merge_blocks_copy fd=%d ofs=" ULINTPF
- " = fd=%d ofs=" ULINTPF "\n",
- file->fd, *foffs0,
- of->fd, of->offset);
- }
-#endif /* UNIV_DEBUG */
+ DBUG_ENTER("row_merge_blocks_copy");
+ DBUG_LOG("ib_merge_sort",
+ "fd=" << file->fd << ',' << foffs0
+ << " to fd=" << of->fd << ',' << of->offset);
heap = row_merge_heap_create(index, &buf, &offsets0, &offsets1);
@@ -2278,7 +2954,7 @@ row_merge_blocks_copy(
space)) {
corrupt:
mem_heap_free(heap);
- return(FALSE);
+ DBUG_RETURN(FALSE);
}
b0 = &block[0];
@@ -2309,34 +2985,43 @@ done0:
mem_heap_free(heap);
- return(row_merge_write_eof(&block[2 * srv_sort_buf_size],
- b2, of->fd, &of->offset,
- crypt_block ? &crypt_block[2 * srv_sort_buf_size] : NULL,
- space)
- != NULL);
+ DBUG_RETURN(row_merge_write_eof(
+ &block[2 * srv_sort_buf_size],
+ b2, of->fd, &of->offset,
+ crypt_block
+ ? &crypt_block[2 * srv_sort_buf_size]
+ : NULL, space)
+ != NULL);
}
-/*************************************************************//**
-Merge disk files.
-@return DB_SUCCESS or error code */
+/** Merge disk files.
+@param[in] trx transaction
+@param[in] dup descriptor of index being created
+@param[in,out] file file containing index entries
+@param[in,out] block 3 buffers
+@param[in,out] tmpfd temporary file handle
+@param[in,out] num_run Number of runs that remain to be merged
+@param[in,out] run_offset Array that contains the first offset number
+for each merge run
+@param[in,out] stage performance schema accounting object, used by
+@param[in,out] crypt_block encryption buffer
+@param[in] space tablespace ID for encryption
+ALTER TABLE. If not NULL stage->inc() will be called for each record
+processed.
+@return DB_SUCCESS or error code */
static
dberr_t
row_merge(
-/*======*/
- trx_t* trx, /*!< in: transaction */
- const row_merge_dup_t* dup, /*!< in: descriptor of
- index being created */
- merge_file_t* file, /*!< in/out: file containing
- index entries */
- row_merge_block_t* block, /*!< in/out: 3 buffers */
- int* tmpfd, /*!< in/out: temporary file handle */
- ulint* num_run,/*!< in/out: Number of runs remain
- to be merged */
- ulint* run_offset, /*!< in/out: Array contains the
- first offset number for each merge
- run */
- row_merge_block_t* crypt_block, /*!< in: crypt buf or NULL */
- ulint space) /*!< in: space id */
+ trx_t* trx,
+ const row_merge_dup_t* dup,
+ merge_file_t* file,
+ row_merge_block_t* block,
+ int* tmpfd,
+ ulint* num_run,
+ ulint* run_offset,
+ ut_stage_alter_t* stage,
+ row_merge_block_t* crypt_block,
+ ulint space)
{
ulint foffs0; /*!< first input offset */
ulint foffs1; /*!< second input offset */
@@ -2383,9 +3068,8 @@ row_merge(
run_offset[n_run++] = of.offset;
error = row_merge_blocks(dup, file, block,
- &foffs0, &foffs1, &of,
- crypt_block,
- space);
+ &foffs0, &foffs1, &of, stage,
+ crypt_block, space);
if (error != DB_SUCCESS) {
return(error);
@@ -2405,9 +3089,8 @@ row_merge(
run_offset[n_run++] = of.offset;
if (!row_merge_blocks_copy(dup->index, file, block,
- &foffs0, &of,
- crypt_block,
- space)) {
+ &foffs0, &of, stage,
+ crypt_block, space)) {
return(DB_CORRUPTION);
}
}
@@ -2424,9 +3107,8 @@ row_merge(
run_offset[n_run++] = of.offset;
if (!row_merge_blocks_copy(dup->index, file, block,
- &foffs1, &of,
- crypt_block,
- space)) {
+ &foffs1, &of, stage,
+ crypt_block, space)) {
return(DB_CORRUPTION);
}
}
@@ -2460,30 +3142,33 @@ row_merge(
return(DB_SUCCESS);
}
-/*************************************************************//**
-Merge disk files.
-@return DB_SUCCESS or error code */
-UNIV_INTERN
+/** Merge disk files.
+@param[in] trx transaction
+@param[in] dup descriptor of index being created
+@param[in,out] file file containing index entries
+@param[in,out] block 3 buffers
+@param[in,out] tmpfd temporary file handle
+@param[in,out] stage performance schema accounting object, used by
+ALTER TABLE. If not NULL, stage->begin_phase_sort() will be called initially
+and then stage->inc() will be called for each record processed.
+@return DB_SUCCESS or error code */
dberr_t
row_merge_sort(
-/*===========*/
- trx_t* trx, /*!< in: transaction */
- const row_merge_dup_t* dup, /*!< in: descriptor of
- index being created */
- merge_file_t* file, /*!< in/out: file containing
- index entries */
- row_merge_block_t* block, /*!< in/out: 3 buffers */
- int* tmpfd, /*!< in/out: temporary file handle
- */
+ trx_t* trx,
+ const row_merge_dup_t* dup,
+ merge_file_t* file,
+ row_merge_block_t* block,
+ int* tmpfd,
const bool update_progress,
/*!< in: update progress
status variable or not */
- const float pct_progress,
+ const double pct_progress,
/*!< in: total progress percent
until now */
- const float pct_cost, /*!< in: current progress percent */
+ const double pct_cost, /*!< in: current progress percent */
row_merge_block_t* crypt_block, /*!< in: crypt buf or NULL */
- ulint space) /*!< in: space id */
+ ulint space, /*!< in: space id */
+ ut_stage_alter_t* stage)
{
const ulint half = file->offset / 2;
ulint num_runs;
@@ -2491,22 +3176,26 @@ row_merge_sort(
dberr_t error = DB_SUCCESS;
ulint merge_count = 0;
ulint total_merge_sort_count;
- float curr_progress = 0;
+ double curr_progress = 0;
DBUG_ENTER("row_merge_sort");
/* Record the number of merge runs we need to perform */
num_runs = file->offset;
+ if (stage != NULL) {
+ stage->begin_phase_sort(log2(num_runs));
+ }
+
/* If num_runs are less than 1, nothing to merge */
if (num_runs <= 1) {
DBUG_RETURN(error);
}
- total_merge_sort_count = ceil(log2f(num_runs));
+ total_merge_sort_count = ulint(ceil(log2(double(num_runs))));
/* "run_offset" records each run's first offset number */
- run_offset = (ulint*) mem_alloc(file->offset * sizeof(ulint));
+ run_offset = (ulint*) ut_malloc_nokey(file->offset * sizeof(ulint));
/* This tells row_merge() where to start for the first round
of merge. */
@@ -2545,9 +3234,8 @@ row_merge_sort(
#endif /* UNIV_SOLARIS */
error = row_merge(trx, dup, file, block, tmpfd,
- &num_runs, run_offset,
- crypt_block,
- space);
+ &num_runs, run_offset, stage,
+ crypt_block, space);
if(update_progress) {
merge_count++;
@@ -2555,7 +3243,7 @@ row_merge_sort(
pct_cost :
((pct_cost * merge_count) / total_merge_sort_count);
/* presenting 10.12% as 1012 integer */;
- onlineddl_pct_progress = (pct_progress + curr_progress) * 100;
+ onlineddl_pct_progress = (ulint) ((pct_progress + curr_progress) * 100);
}
if (error != DB_SUCCESS) {
@@ -2565,7 +3253,7 @@ row_merge_sort(
UNIV_MEM_ASSERT_RW(run_offset, num_runs * sizeof *run_offset);
} while (num_runs > 1);
- mem_free(run_offset);
+ ut_free(run_offset);
/* Progress report only for "normal" indexes. */
#ifndef UNIV_SOLARIS
@@ -2577,24 +3265,30 @@ row_merge_sort(
DBUG_RETURN(error);
}
-/*************************************************************//**
-Copy externally stored columns to the data tuple. */
-static MY_ATTRIBUTE((nonnull))
+/** Copy externally stored columns to the data tuple.
+@param[in] mrec record containing BLOB pointers,
+or NULL to use tuple instead
+@param[in] offsets offsets of mrec
+@param[in] zip_size compressed page size in bytes, or 0
+@param[in,out] tuple data tuple
+@param[in,out] heap memory heap */
+static
void
row_merge_copy_blobs(
-/*=================*/
- const mrec_t* mrec, /*!< in: merge record */
- const ulint* offsets,/*!< in: offsets of mrec */
- ulint zip_size,/*!< in: compressed page size in bytes, or 0 */
- dtuple_t* tuple, /*!< in/out: data tuple */
- mem_heap_t* heap) /*!< in/out: memory heap */
+ const mrec_t* mrec,
+ const offset_t* offsets,
+ const page_size_t& page_size,
+ dtuple_t* tuple,
+ mem_heap_t* heap)
{
- ut_ad(rec_offs_any_extern(offsets));
+ ut_ad(mrec == NULL || rec_offs_any_extern(offsets));
for (ulint i = 0; i < dtuple_get_n_fields(tuple); i++) {
ulint len;
const void* data;
dfield_t* field = dtuple_get_nth_field(tuple, i);
+ ulint field_len;
+ const byte* field_data;
if (!dfield_is_ext(field)) {
continue;
@@ -2608,8 +3302,25 @@ row_merge_copy_blobs(
columns cannot possibly be freed between the time the
BLOB pointers are read (row_merge_read_clustered_index())
and dereferenced (below). */
- data = btr_rec_copy_externally_stored_field(
- mrec, offsets, zip_size, i, &len, heap);
+ if (mrec == NULL) {
+ field_data
+ = static_cast<byte*>(dfield_get_data(field));
+ field_len = dfield_get_len(field);
+
+ ut_a(field_len >= BTR_EXTERN_FIELD_REF_SIZE);
+
+ ut_a(memcmp(field_data + field_len
+ - BTR_EXTERN_FIELD_REF_SIZE,
+ field_ref_zero,
+ BTR_EXTERN_FIELD_REF_SIZE));
+
+ data = btr_copy_externally_stored_field(
+ &len, field_data, page_size, field_len, heap);
+ } else {
+ data = btr_rec_copy_externally_stored_field(
+ mrec, offsets, page_size, i, &len, heap);
+ }
+
/* Because we have locked the table, any records
written by incomplete transactions must have been
rolled back already. There must not be any incomplete
@@ -2620,41 +3331,80 @@ row_merge_copy_blobs(
}
}
-/********************************************************************//**
-Read sorted file containing index data tuples and insert these data
-tuples to the index
-@return DB_SUCCESS or error number */
-static __attribute__((warn_unused_result))
+/** Convert a merge record to a typed data tuple. Note that externally
+stored fields are not copied to heap.
+@param[in,out] index index on the table
+@param[in] mtuple merge record
+@param[in] heap memory heap from which memory needed is allocated
+@return index entry built. */
+static
+void
+row_merge_mtuple_to_dtuple(
+ dict_index_t* index,
+ dtuple_t* dtuple,
+ const mtuple_t* mtuple)
+{
+ ut_ad(!dict_index_is_ibuf(index));
+
+ memcpy(dtuple->fields, mtuple->fields,
+ dtuple->n_fields * sizeof *mtuple->fields);
+}
+
+/** Insert sorted data tuples to the index.
+@param[in] index index to be inserted
+@param[in] old_table old table
+@param[in] fd file descriptor
+@param[in,out] block file buffer
+@param[in] row_buf row_buf the sorted data tuples,
+or NULL if fd, block will be used instead
+@param[in,out] btr_bulk btr bulk instance
+@param[in,out] stage performance schema accounting object, used by
+ALTER TABLE. If not NULL stage->begin_phase_insert() will be called initially
+and then stage->inc() will be called for each record that is processed.
+@return DB_SUCCESS or error number */
+static MY_ATTRIBUTE((warn_unused_result))
dberr_t
row_merge_insert_index_tuples(
-/*==========================*/
- trx_id_t trx_id, /*!< in: transaction identifier */
- dict_index_t* index, /*!< in: index */
- const dict_table_t* old_table,/*!< in: old table */
- int fd, /*!< in: file descriptor */
- row_merge_block_t* block, /*!< in/out: file buffer */
- const ib_int64_t table_total_rows, /*!< in: total rows of old table */
- const float pct_progress, /*!< in: total progress percent until now */
- const float pct_cost, /*!< in: current progress percent
+ dict_index_t* index,
+ const dict_table_t* old_table,
+ int fd,
+ row_merge_block_t* block,
+ const row_merge_buf_t* row_buf,
+ BtrBulk* btr_bulk,
+ const ib_uint64_t table_total_rows, /*!< in: total rows of old table */
+ const double pct_progress, /*!< in: total progress
+ percent until now */
+ const double pct_cost, /*!< in: current progress percent
*/
row_merge_block_t* crypt_block, /*!< in: crypt buf or NULL */
- ulint space) /*!< in: space id */
+ ulint space, /*!< in: space id */
+ ut_stage_alter_t* stage)
{
const byte* b;
mem_heap_t* heap;
mem_heap_t* tuple_heap;
- mem_heap_t* ins_heap;
dberr_t error = DB_SUCCESS;
ulint foffs = 0;
- ulint* offsets;
+ offset_t* offsets;
mrec_buf_t* buf;
- ib_int64_t inserted_rows = 0;
- float curr_progress;
+ ulint n_rows = 0;
+ dtuple_t* dtuple;
+ ib_uint64_t inserted_rows = 0;
+ double curr_progress = 0;
+ dict_index_t* old_index = NULL;
+ const mrec_t* mrec = NULL;
+ mtr_t mtr;
+
+
DBUG_ENTER("row_merge_insert_index_tuples");
ut_ad(!srv_read_only_mode);
ut_ad(!(index->type & DICT_FTS));
- ut_ad(trx_id);
+ ut_ad(!dict_index_is_spatial(index));
+
+ if (stage != NULL) {
+ stage->begin_phase_insert();
+ }
tuple_heap = mem_heap_create(1000);
@@ -2662,32 +3412,56 @@ row_merge_insert_index_tuples(
ulint i = 1 + REC_OFFS_HEADER_SIZE
+ dict_index_get_n_fields(index);
heap = mem_heap_create(sizeof *buf + i * sizeof *offsets);
- ins_heap = mem_heap_create(sizeof *buf + i * sizeof *offsets);
- offsets = static_cast<ulint*>(
+ offsets = static_cast<offset_t*>(
mem_heap_alloc(heap, i * sizeof *offsets));
- offsets[0] = i;
- offsets[1] = dict_index_get_n_fields(index);
+ rec_offs_set_n_alloc(offsets, i);
+ rec_offs_set_n_fields(offsets, dict_index_get_n_fields(index));
}
- b = &block[0];
-
- if (!row_merge_read(fd, foffs, block,
- crypt_block,
- space)) {
- error = DB_CORRUPTION;
+ if (row_buf != NULL) {
+ ut_ad(fd == -1);
+ ut_ad(block == NULL);
+ DBUG_EXECUTE_IF("row_merge_read_failure",
+ error = DB_CORRUPTION;
+ goto err_exit;);
+ buf = NULL;
+ b = NULL;
+ dtuple = dtuple_create(
+ heap, dict_index_get_n_fields(index));
+ dtuple_set_n_fields_cmp(
+ dtuple, dict_index_get_n_unique_in_tree(index));
} else {
- buf = static_cast<mrec_buf_t*>(
- mem_heap_alloc(heap, sizeof *buf));
+ b = block;
+ dtuple = NULL;
- for (;;) {
- const mrec_t* mrec;
- dtuple_t* dtuple;
- ulint n_ext;
- big_rec_t* big_rec;
- rec_t* rec;
- btr_cur_t cursor;
- mtr_t mtr;
+ if (!row_merge_read(fd, foffs, block, crypt_block, space)) {
+ error = DB_CORRUPTION;
+ goto err_exit;
+ } else {
+ buf = static_cast<mrec_buf_t*>(
+ mem_heap_alloc(heap, sizeof *buf));
+ }
+ }
+ for (;;) {
+
+ if (stage != NULL) {
+ stage->inc();
+ }
+
+ if (row_buf != NULL) {
+ if (n_rows >= row_buf->n_tuples) {
+ break;
+ }
+
+ /* Convert merge tuple record from
+ row buffer to data tuple record */
+ row_merge_mtuple_to_dtuple(
+ index, dtuple, &row_buf->tuples[n_rows]);
+ n_rows++;
+ /* BLOB pointers must be copied from dtuple */
+ mrec = NULL;
+ } else {
b = row_merge_read_rec(block, buf, b, index,
fd, &foffs, &mrec, offsets,
crypt_block,
@@ -2701,155 +3475,80 @@ row_merge_insert_index_tuples(
break;
}
- dict_index_t* old_index
- = dict_table_get_first_index(old_table);
-
- if (dict_index_is_clust(index)
- && dict_index_is_online_ddl(old_index)) {
- error = row_log_table_get_error(old_index);
- if (error != DB_SUCCESS) {
- break;
- }
- }
-
dtuple = row_rec_to_index_entry_low(
- mrec, index, offsets, &n_ext, tuple_heap);
-
- if (!n_ext) {
- /* There are no externally stored columns. */
- } else {
- ut_ad(dict_index_is_clust(index));
- /* Off-page columns can be fetched safely
- when concurrent modifications to the table
- are disabled. (Purge can process delete-marked
- records, but row_merge_read_clustered_index()
- would have skipped them.)
-
- When concurrent modifications are enabled,
- row_merge_read_clustered_index() will
- only see rows from transactions that were
- committed before the ALTER TABLE started
- (REPEATABLE READ).
-
- Any modifications after the
- row_merge_read_clustered_index() scan
- will go through row_log_table_apply().
- Any modifications to off-page columns
- will be tracked by
- row_log_table_blob_alloc() and
- row_log_table_blob_free(). */
- row_merge_copy_blobs(
- mrec, offsets,
- dict_table_zip_size(old_table),
- dtuple, tuple_heap);
- }
-
- ut_ad(dtuple_validate(dtuple));
- log_free_check();
-
- mtr_start(&mtr);
- /* Insert after the last user record. */
- btr_cur_open_at_index_side(
- false, index, BTR_MODIFY_LEAF,
- &cursor, 0, &mtr);
- page_cur_position(
- page_rec_get_prev(btr_cur_get_rec(&cursor)),
- btr_cur_get_block(&cursor),
- btr_cur_get_page_cur(&cursor));
- cursor.flag = BTR_CUR_BINARY;
-#ifdef UNIV_DEBUG
- /* Check that the records are inserted in order. */
- rec = btr_cur_get_rec(&cursor);
-
- if (!page_rec_is_infimum(rec)) {
- ulint* rec_offsets = rec_get_offsets(
- rec, index, offsets,
- ULINT_UNDEFINED, &tuple_heap);
- ut_ad(cmp_dtuple_rec(dtuple, rec, rec_offsets)
- > 0);
- }
-#endif /* UNIV_DEBUG */
- ulint* ins_offsets = NULL;
-
- error = btr_cur_optimistic_insert(
- BTR_NO_UNDO_LOG_FLAG | BTR_NO_LOCKING_FLAG
- | BTR_KEEP_SYS_FLAG | BTR_CREATE_FLAG,
- &cursor, &ins_offsets, &ins_heap,
- dtuple, &rec, &big_rec, 0, NULL, &mtr);
+ mrec, index, offsets, tuple_heap);
+ }
- if (error == DB_FAIL) {
- ut_ad(!big_rec);
- mtr_commit(&mtr);
- mtr_start(&mtr);
- btr_cur_open_at_index_side(
- false, index, BTR_MODIFY_TREE,
- &cursor, 0, &mtr);
- page_cur_position(
- page_rec_get_prev(btr_cur_get_rec(
- &cursor)),
- btr_cur_get_block(&cursor),
- btr_cur_get_page_cur(&cursor));
+ old_index = dict_table_get_first_index(old_table);
- error = btr_cur_pessimistic_insert(
- BTR_NO_UNDO_LOG_FLAG
- | BTR_NO_LOCKING_FLAG
- | BTR_KEEP_SYS_FLAG | BTR_CREATE_FLAG,
- &cursor, &ins_offsets, &ins_heap,
- dtuple, &rec, &big_rec, 0, NULL, &mtr);
+ if (dict_index_is_clust(index)
+ && dict_index_is_online_ddl(old_index)) {
+ error = row_log_table_get_error(old_index);
+ if (error != DB_SUCCESS) {
+ break;
}
+ }
- if (!dict_index_is_clust(index)) {
- page_update_max_trx_id(
- btr_cur_get_block(&cursor),
- btr_cur_get_page_zip(&cursor),
- trx_id, &mtr);
- }
+ if (dict_index_is_clust(index) && dtuple_get_n_ext(dtuple)) {
+ /* Off-page columns can be fetched safely
+ when concurrent modifications to the table
+ are disabled. (Purge can process delete-marked
+ records, but row_merge_read_clustered_index()
+ would have skipped them.)
+
+ When concurrent modifications are enabled,
+ row_merge_read_clustered_index() will
+ only see rows from transactions that were
+ committed before the ALTER TABLE started
+ (REPEATABLE READ).
+
+ Any modifications after the
+ row_merge_read_clustered_index() scan
+ will go through row_log_table_apply().
+ Any modifications to off-page columns
+ will be tracked by
+ row_log_table_blob_alloc() and
+ row_log_table_blob_free(). */
+ row_merge_copy_blobs(
+ mrec, offsets,
+ dict_table_page_size(old_table),
+ dtuple, tuple_heap);
+ }
- mtr_commit(&mtr);
+#ifdef UNIV_DEBUG
+ static const latch_level_t latches[] = {
+ SYNC_INDEX_TREE, /* index->lock */
+ SYNC_LEVEL_VARYING /* btr_bulk->m_page_bulks */
+ };
+#endif /* UNIV_DEBUG */
- if (UNIV_LIKELY_NULL(big_rec)) {
- /* If the system crashes at this
- point, the clustered index record will
- contain a null BLOB pointer. This
- should not matter, because the copied
- table will be dropped on crash
- recovery anyway. */
-
- ut_ad(dict_index_is_clust(index));
- ut_ad(error == DB_SUCCESS);
- error = row_ins_index_entry_big_rec(
- dtuple, big_rec,
- ins_offsets, &ins_heap,
- index, NULL, __FILE__, __LINE__);
- dtuple_convert_back_big_rec(
- index, dtuple, big_rec);
- }
+ ut_ad(dtuple_validate(dtuple));
+ ut_ad(!sync_check_iterate(sync_allowed_latches(latches,
+ latches + 2)));
+ error = btr_bulk->insert(dtuple);
- if (error != DB_SUCCESS) {
- goto err_exit;
- }
+ if (error != DB_SUCCESS) {
+ goto err_exit;
+ }
- mem_heap_empty(tuple_heap);
- mem_heap_empty(ins_heap);
+ mem_heap_empty(tuple_heap);
- /* Increment innodb_onlineddl_pct_progress status variable */
- inserted_rows++;
- if(inserted_rows % 1000 == 0) {
- /* Update progress for each 1000 rows */
- curr_progress = (inserted_rows >= table_total_rows ||
- table_total_rows <= 0) ?
- pct_cost :
- ((pct_cost * inserted_rows) / table_total_rows);
+ /* Increment innodb_onlineddl_pct_progress status variable */
+ inserted_rows++;
+ if(inserted_rows % 1000 == 0) {
+ /* Update progress for each 1000 rows */
+ curr_progress = (inserted_rows >= table_total_rows ||
+ table_total_rows <= 0) ?
+ pct_cost :
+ ((pct_cost * inserted_rows) / table_total_rows);
- /* presenting 10.12% as 1012 integer */;
- onlineddl_pct_progress = (pct_progress + curr_progress) * 100;
- }
+ /* presenting 10.12% as 1012 integer */;
+ onlineddl_pct_progress = (ulint) ((pct_progress + curr_progress) * 100);
}
}
err_exit:
mem_heap_free(tuple_heap);
- mem_heap_free(ins_heap);
mem_heap_free(heap);
DBUG_RETURN(error);
@@ -2857,8 +3556,7 @@ err_exit:
/*********************************************************************//**
Sets an exclusive lock on a table, for the duration of creating indexes.
-@return error code or DB_SUCCESS */
-UNIV_INTERN
+@return error code or DB_SUCCESS */
dberr_t
row_merge_lock_table(
/*=================*/
@@ -2866,76 +3564,13 @@ row_merge_lock_table(
dict_table_t* table, /*!< in: table to lock */
enum lock_mode mode) /*!< in: LOCK_X or LOCK_S */
{
- mem_heap_t* heap;
- que_thr_t* thr;
- dberr_t err;
- sel_node_t* node;
-
ut_ad(!srv_read_only_mode);
ut_ad(mode == LOCK_X || mode == LOCK_S);
- heap = mem_heap_create(512);
-
trx->op_info = "setting table lock for creating or dropping index";
+ trx->ddl = true;
- node = sel_node_create(heap);
- thr = pars_complete_graph_for_exec(node, trx, heap);
- thr->graph->state = QUE_FORK_ACTIVE;
-
- /* We use the select query graph as the dummy graph needed
- in the lock module call */
-
- thr = static_cast<que_thr_t*>(
- que_fork_get_first_thr(
- static_cast<que_fork_t*>(que_node_get_parent(thr))));
-
- que_thr_move_to_run_state_for_mysql(thr, trx);
-
-run_again:
- thr->run_node = thr;
- thr->prev_node = thr->common.parent;
-
- err = lock_table(0, table, mode, thr);
-
- trx->error_state = err;
-
- if (UNIV_LIKELY(err == DB_SUCCESS)) {
- que_thr_stop_for_mysql_no_error(thr, trx);
- } else {
- que_thr_stop_for_mysql(thr);
-
- if (err != DB_QUE_THR_SUSPENDED) {
- bool was_lock_wait;
-
- was_lock_wait = row_mysql_handle_errors(
- &err, trx, thr, NULL);
-
- if (was_lock_wait) {
- goto run_again;
- }
- } else {
- que_thr_t* run_thr;
- que_node_t* parent;
-
- parent = que_node_get_parent(thr);
-
- run_thr = que_fork_start_command(
- static_cast<que_fork_t*>(parent));
-
- ut_a(run_thr == thr);
-
- /* There was a lock wait but the thread was not
- in a ready to run or running state. */
- trx->error_state = DB_LOCK_WAIT;
-
- goto run_again;
- }
- }
-
- que_graph_free(thr->graph);
- trx->op_info = "";
-
- return(err);
+ return(lock_table_for_trx(table, trx, mode));
}
/*********************************************************************//**
@@ -2962,9 +3597,7 @@ row_merge_drop_index_dict(
ut_ad(mutex_own(&dict_sys->mutex));
ut_ad(trx->dict_operation_lock_mode == RW_X_LATCH);
ut_ad(trx_get_dict_operation(trx) == TRX_DICT_OP_INDEX);
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
+ ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_X));
info = pars_info_create();
pars_info_add_ull_literal(info, "indexid", index_id);
@@ -2977,9 +3610,8 @@ row_merge_drop_index_dict(
DB_TOO_MANY_CONCURRENT_TRXS. */
trx->error_state = DB_SUCCESS;
- ut_print_timestamp(stderr);
- fprintf(stderr, " InnoDB: Error: row_merge_drop_index_dict "
- "failed with error code: %u.\n", (unsigned) error);
+ ib::error() << "row_merge_drop_index_dict failed with error "
+ << error;
}
trx->op_info = "";
@@ -2989,7 +3621,6 @@ row_merge_drop_index_dict(
Drop indexes that were created before an error occurred.
The data dictionary must have been locked exclusively by the caller,
because the transaction will not be committed. */
-UNIV_INTERN
void
row_merge_drop_indexes_dict(
/*========================*/
@@ -3029,9 +3660,7 @@ row_merge_drop_indexes_dict(
ut_ad(mutex_own(&dict_sys->mutex));
ut_ad(trx->dict_operation_lock_mode == RW_X_LATCH);
ut_ad(trx_get_dict_operation(trx) == TRX_DICT_OP_INDEX);
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
+ ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_X));
/* It is possible that table->n_ref_count > 1 when
locked=TRUE. In this case, all code that should have an open
@@ -3045,15 +3674,18 @@ row_merge_drop_indexes_dict(
trx->op_info = "dropping indexes";
error = que_eval_sql(info, sql, FALSE, trx);
- if (error != DB_SUCCESS) {
+ switch (error) {
+ case DB_SUCCESS:
+ break;
+ default:
/* Even though we ensure that DDL transactions are WAIT
and DEADLOCK free, we could encounter other errors e.g.,
DB_TOO_MANY_CONCURRENT_TRXS. */
+ ib::error() << "row_merge_drop_indexes_dict failed with error "
+ << error;
+ /* fall through */
+ case DB_TOO_MANY_CONCURRENT_TRXS:
trx->error_state = DB_SUCCESS;
-
- ut_print_timestamp(stderr);
- fprintf(stderr, " InnoDB: Error: row_merge_drop_indexes_dict "
- "failed with error code: %u.\n", (unsigned) error);
}
trx->op_info = "";
@@ -3063,7 +3695,6 @@ row_merge_drop_indexes_dict(
Drop indexes that were created before an error occurred.
The data dictionary must have been locked exclusively by the caller,
because the transaction will not be committed. */
-UNIV_INTERN
void
row_merge_drop_indexes(
/*===================*/
@@ -3079,16 +3710,14 @@ row_merge_drop_indexes(
ut_ad(mutex_own(&dict_sys->mutex));
ut_ad(trx->dict_operation_lock_mode == RW_X_LATCH);
ut_ad(trx_get_dict_operation(trx) == TRX_DICT_OP_INDEX);
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
+ ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_X));
index = dict_table_get_first_index(table);
ut_ad(dict_index_is_clust(index));
ut_ad(dict_index_get_online_status(index) == ONLINE_INDEX_COMPLETE);
/* the caller should have an open handle to the table */
- ut_ad(table->n_ref_count >= 1);
+ ut_ad(table->get_ref_count() >= 1);
/* It is possible that table->n_ref_count > 1 when
locked=TRUE. In this case, all code that should have an open
@@ -3097,7 +3726,7 @@ row_merge_drop_indexes(
A concurrent purge will be prevented by dict_operation_lock. */
- if (!locked && (table->n_ref_count > 1
+ if (!locked && (table->get_ref_count() > 1
|| UT_LIST_GET_FIRST(table->locks))) {
/* We will have to drop the indexes later, when the
table is guaranteed to be no longer in use. Mark the
@@ -3114,7 +3743,7 @@ row_merge_drop_indexes(
case ONLINE_INDEX_ABORTED_DROPPED:
continue;
case ONLINE_INDEX_COMPLETE:
- if (*index->name != TEMP_INDEX_PREFIX) {
+ if (index->is_committed()) {
/* Do nothing to already
published indexes. */
} else if (index->type & DICT_FTS) {
@@ -3134,17 +3763,6 @@ row_merge_drop_indexes(
ut_ad(prev);
ut_a(table->fts);
fts_drop_index(table, index, trx);
- /* Since
- INNOBASE_SHARE::idx_trans_tbl
- is shared between all open
- ha_innobase handles to this
- table, no thread should be
- accessing this dict_index_t
- object. Also, we should be
- holding LOCK=SHARED MDL on the
- table even after the MDL
- upgrade timeout. */
-
/* We can remove a DICT_FTS
index from the cache, because
we do not allow ADD FULLTEXT INDEX
@@ -3167,7 +3785,7 @@ row_merge_drop_indexes(
continue;
case ONLINE_INDEX_CREATION:
rw_lock_x_lock(dict_index_get_lock(index));
- ut_ad(*index->name == TEMP_INDEX_PREFIX);
+ ut_ad(!index->is_committed());
row_log_abort_sec(index);
drop_aborted:
rw_lock_x_unlock(dict_index_get_lock(index));
@@ -3211,7 +3829,7 @@ row_merge_drop_indexes(
ut_ad(!dict_index_is_clust(index));
- if (*index->name == TEMP_INDEX_PREFIX) {
+ if (!index->is_committed()) {
/* If it is FTS index, drop from table->fts
and also drop its auxiliary tables */
if (index->type & DICT_FTS) {
@@ -3250,7 +3868,6 @@ row_merge_drop_indexes(
/*********************************************************************//**
Drop all partially created indexes during crash recovery. */
-UNIV_INTERN
void
row_merge_drop_temp_indexes(void)
/*=============================*/
@@ -3302,9 +3919,8 @@ row_merge_drop_temp_indexes(void)
DB_TOO_MANY_CONCURRENT_TRXS. */
trx->error_state = DB_SUCCESS;
- ut_print_timestamp(stderr);
- fprintf(stderr, " InnoDB: Error: row_merge_drop_temp_indexes "
- "failed with error code: %u.\n", (unsigned) error);
+ ib::error() << "row_merge_drop_temp_indexes failed with error"
+ << error;
}
trx_commit_for_mysql(trx);
@@ -3315,9 +3931,8 @@ row_merge_drop_temp_indexes(void)
/** Create temporary merge files in the given paramater path, and if
UNIV_PFS_IO defined, register the file descriptor with Performance Schema.
-@param[in] path location for creating temporary merge files.
+@param[in] path location for creating temporary merge files, or NULL
@return File descriptor */
-UNIV_INTERN
int
row_merge_file_create_low(
const char* path)
@@ -3327,19 +3942,18 @@ row_merge_file_create_low(
/* This temp file open does not go through normal
file APIs, add instrumentation to register with
performance schema */
- struct PSI_file_locker* locker = NULL;
PSI_file_locker_state state;
if (!path) {
path = mysql_tmpdir;
}
static const char label[] = "/Innodb Merge Temp File";
char* name = static_cast<char*>(
- ut_malloc(strlen(path) + sizeof label));
+ ut_malloc_nokey(strlen(path) + sizeof label));
strcpy(name, path);
strcat(name, label);
- locker = PSI_FILE_CALL(get_thread_file_name_locker)(
- &state, innodb_file_temp_key, PSI_FILE_OPEN,
- path ? name : label, &locker);
+ PSI_file_locker* locker = PSI_FILE_CALL(get_thread_file_name_locker)(
+ &state, innodb_temp_file_key, PSI_FILE_OPEN,
+ path ? name : label, &locker);
if (locker != NULL) {
PSI_FILE_CALL(start_file_open_wait)(locker,
__FILE__,
@@ -3356,9 +3970,8 @@ row_merge_file_create_low(
#endif
if (fd < 0) {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Cannot create temporary merge file");
- return (-1);
+ ib::error() << "Cannot create temporary merge file";
+ return(-1);
}
return(fd);
}
@@ -3366,9 +3979,8 @@ row_merge_file_create_low(
/** Create a merge file in the given location.
@param[out] merge_file merge file structure
-@param[in] path location for creating temporary file
+@param[in] path location for creating temporary file, or NULL
@return file descriptor, or -1 on failure */
-UNIV_INTERN
int
row_merge_file_create(
merge_file_t* merge_file,
@@ -3380,7 +3992,7 @@ row_merge_file_create(
if (merge_file->fd >= 0) {
if (srv_disable_sort_file_cache) {
- os_file_set_nocache((os_file_t)merge_file->fd,
+ os_file_set_nocache(merge_file->fd,
"row0merge.cc", "sort");
}
}
@@ -3390,7 +4002,6 @@ row_merge_file_create(
/*********************************************************************//**
Destroy a merge file. And de-register the file from Performance Schema
if UNIV_PFS_IO is defined. */
-UNIV_INTERN
void
row_merge_file_destroy_low(
/*=======================*/
@@ -3417,7 +4028,6 @@ row_merge_file_destroy_low(
}
/*********************************************************************//**
Destroy a merge file. */
-UNIV_INTERN
void
row_merge_file_destroy(
/*===================*/
@@ -3435,8 +4045,7 @@ row_merge_file_destroy(
Rename an index in the dictionary that was created. The data
dictionary must have been locked exclusively by the caller, because
the transaction will not be committed.
-@return DB_SUCCESS if all OK */
-UNIV_INTERN
+@return DB_SUCCESS if all OK */
dberr_t
row_merge_rename_index_to_add(
/*==========================*/
@@ -3473,10 +4082,8 @@ row_merge_rename_index_to_add(
DB_TOO_MANY_CONCURRENT_TRXS. */
trx->error_state = DB_SUCCESS;
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Error: row_merge_rename_index_to_add "
- "failed with error code: %u.\n", (unsigned) err);
+ ib::error() << "row_merge_rename_index_to_add failed with"
+ " error " << err;
}
trx->op_info = "";
@@ -3488,8 +4095,7 @@ row_merge_rename_index_to_add(
Rename an index in the dictionary that is to be dropped. The data
dictionary must have been locked exclusively by the caller, because
the transaction will not be committed.
-@return DB_SUCCESS if all OK */
-UNIV_INTERN
+@return DB_SUCCESS if all OK */
dberr_t
row_merge_rename_index_to_drop(
/*===========================*/
@@ -3529,10 +4135,8 @@ row_merge_rename_index_to_drop(
DB_TOO_MANY_CONCURRENT_TRXS. */
trx->error_state = DB_SUCCESS;
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Error: row_merge_rename_index_to_drop "
- "failed with error code: %u.\n", (unsigned) err);
+ ib::error() << "row_merge_rename_index_to_drop failed with"
+ " error " << err;
}
trx->op_info = "";
@@ -3544,8 +4148,8 @@ row_merge_rename_index_to_drop(
Provide a new pathname for a table that is being renamed if it belongs to
a file-per-table tablespace. The caller is responsible for freeing the
memory allocated for the return value.
-@return new pathname of tablespace file, or NULL if space = 0 */
-UNIV_INTERN
+@return new pathname of tablespace file, or NULL if space = 0 */
+static
char*
row_make_new_pathname(
/*==================*/
@@ -3555,14 +4159,14 @@ row_make_new_pathname(
char* new_path;
char* old_path;
- ut_ad(table->space != TRX_SYS_SPACE);
+ ut_ad(!is_system_tablespace(table->space));
old_path = fil_space_get_first_path(table->space);
ut_a(old_path);
new_path = os_file_make_new_pathname(old_path, new_name);
- mem_free(old_path);
+ ut_free(old_path);
return(new_path);
}
@@ -3571,8 +4175,7 @@ row_make_new_pathname(
Rename the tables in the data dictionary. The data dictionary must
have been locked exclusively by the caller, because the transaction
will not be committed.
-@return error code or DB_SUCCESS */
-UNIV_INTERN
+@return error code or DB_SUCCESS */
dberr_t
row_merge_rename_tables_dict(
/*=========================*/
@@ -3600,8 +4203,8 @@ row_merge_rename_tables_dict(
info = pars_info_create();
- pars_info_add_str_literal(info, "new_name", new_table->name);
- pars_info_add_str_literal(info, "old_name", old_table->name);
+ pars_info_add_str_literal(info, "new_name", new_table->name.m_name);
+ pars_info_add_str_literal(info, "old_name", old_table->name.m_name);
pars_info_add_str_literal(info, "tmp_name", tmp_name);
err = que_eval_sql(info,
@@ -3613,10 +4216,11 @@ row_merge_rename_tables_dict(
" WHERE NAME = :new_name;\n"
"END;\n", FALSE, trx);
- /* Update SYS_TABLESPACES and SYS_DATAFILES if the old
- table is in a non-system tablespace where space > 0. */
+ /* Update SYS_TABLESPACES and SYS_DATAFILES if the old table being
+ renamed is a single-table tablespace, which must be implicitly
+ renamed along with the table. */
if (err == DB_SUCCESS
- && old_table->space != TRX_SYS_SPACE
+ && dict_table_is_file_per_table(old_table)
&& fil_space_get(old_table->space) != NULL) {
/* Make pathname to update SYS_DATAFILES. */
char* tmp_path = row_make_new_pathname(old_table, tmp_name);
@@ -3639,19 +4243,22 @@ row_merge_rename_tables_dict(
" WHERE SPACE = :old_space;\n"
"END;\n", FALSE, trx);
- mem_free(tmp_path);
+ ut_free(tmp_path);
}
- /* Update SYS_TABLESPACES and SYS_DATAFILES if the new
- table is in a non-system tablespace where space > 0. */
- if (err == DB_SUCCESS && new_table->space != TRX_SYS_SPACE) {
+ /* Update SYS_TABLESPACES and SYS_DATAFILES if the new table being
+ renamed is a single-table tablespace, which must be implicitly
+ renamed along with the table. */
+ if (err == DB_SUCCESS
+ && dict_table_is_file_per_table(new_table)) {
/* Make pathname to update SYS_DATAFILES. */
char* old_path = row_make_new_pathname(
- new_table, old_table->name);
+ new_table, old_table->name.m_name);
info = pars_info_create();
- pars_info_add_str_literal(info, "old_name", old_table->name);
+ pars_info_add_str_literal(info, "old_name",
+ old_table->name.m_name);
pars_info_add_str_literal(info, "old_path", old_path);
pars_info_add_int4_literal(info, "new_space",
(lint) new_table->space);
@@ -3667,7 +4274,7 @@ row_merge_rename_tables_dict(
" WHERE SPACE = :new_space;\n"
"END;\n", FALSE, trx);
- mem_free(old_path);
+ ut_free(old_path);
}
if (err == DB_SUCCESS && dict_table_is_discarded(new_table)) {
@@ -3680,22 +4287,28 @@ row_merge_rename_tables_dict(
return(err);
}
-/*********************************************************************//**
-Create and execute a query graph for creating an index.
-@return DB_SUCCESS or error code */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
+/** Create and execute a query graph for creating an index.
+@param[in,out] trx trx
+@param[in,out] table table
+@param[in,out] index index
+@param[in] add_v new virtual columns added along with add index call
+@return DB_SUCCESS or error code */
+MY_ATTRIBUTE((nonnull(1,2), warn_unused_result))
+static
dberr_t
row_merge_create_index_graph(
-/*=========================*/
- trx_t* trx, /*!< in: trx */
- dict_table_t* table, /*!< in: table */
- dict_index_t*& index) /*!< in,out: index */
+ trx_t* trx,
+ dict_table_t* table,
+ dict_index_t*& index,
+ const dict_add_v_col_t* add_v)
{
ind_node_t* node; /*!< Index creation node */
mem_heap_t* heap; /*!< Memory heap */
que_thr_t* thr; /*!< Query thread */
dberr_t err;
+ DBUG_ENTER("row_merge_create_index_graph");
+
ut_ad(trx);
ut_ad(table);
ut_ad(index);
@@ -3703,8 +4316,8 @@ row_merge_create_index_graph(
heap = mem_heap_create(512);
index->table = table;
- node = ind_create_graph_create(index, heap, false);
- thr = pars_complete_graph_for_exec(node, trx, heap);
+ node = ind_create_graph_create(index, heap, add_v);
+ thr = pars_complete_graph_for_exec(node, trx, heap, NULL);
ut_a(thr == que_fork_start_command(
static_cast<que_fork_t*>(que_node_get_parent(thr))));
@@ -3717,24 +4330,30 @@ row_merge_create_index_graph(
que_graph_free((que_t*) que_node_get_parent(thr));
- return(err);
+ DBUG_RETURN(err);
}
-/*********************************************************************//**
-Create the index and load in to the dictionary.
-@return index, or NULL on error */
-UNIV_INTERN
+/** Create the index and load in to the dictionary.
+@param[in,out] trx trx (sets error_state)
+@param[in,out] table the index is on this table
+@param[in] index_def the index definition
+@param[in] add_v new virtual columns added along with add
+ index call
+@return index, or NULL on error */
dict_index_t*
row_merge_create_index(
-/*===================*/
- trx_t* trx, /*!< in/out: trx (sets error_state) */
- dict_table_t* table, /*!< in: the index is on this table */
- const index_def_t* index_def) /*!< in: the index definition */
+ trx_t* trx,
+ dict_table_t* table,
+ const index_def_t* index_def,
+ const dict_add_v_col_t* add_v)
{
dict_index_t* index;
dberr_t err;
ulint n_fields = index_def->n_fields;
ulint i;
+ bool has_new_v_col = false;
+
+ DBUG_ENTER("row_merge_create_index");
ut_ad(!srv_read_only_mode);
@@ -3742,26 +4361,44 @@ row_merge_create_index(
a persistent operation. We pass 0 as the space id, and determine at
a lower level the space id where to store the table. */
- index = dict_mem_index_create(table->name, index_def->name,
+ index = dict_mem_index_create(table->name.m_name, index_def->name,
0, index_def->ind_type, n_fields);
ut_a(index);
+ index->set_committed(index_def->rebuild);
+
for (i = 0; i < n_fields; i++) {
+ const char* name;
index_field_t* ifield = &index_def->fields[i];
- dict_mem_index_add_field(
- index,
- dict_table_get_col_name(table, ifield->col_no),
- ifield->prefix_len);
+ if (ifield->is_v_col) {
+ if (ifield->col_no >= table->n_v_def) {
+ ut_ad(ifield->col_no < table->n_v_def
+ + add_v->n_v_col);
+ ut_ad(ifield->col_no >= table->n_v_def);
+ name = add_v->v_col_name[
+ ifield->col_no - table->n_v_def];
+ has_new_v_col = true;
+ } else {
+ name = dict_table_get_v_col_name(
+ table, ifield->col_no);
+ }
+ } else {
+ name = dict_table_get_col_name(table, ifield->col_no);
+ }
+
+ dict_mem_index_add_field(index, name, ifield->prefix_len);
}
ut_d(const dict_index_t* const index_template = index);
/* Add the index to SYS_INDEXES, using the index prototype. */
- err = row_merge_create_index_graph(trx, table, index);
+ err = row_merge_create_index_graph(trx, table, index, add_v);
if (err == DB_SUCCESS) {
ut_ad(index != index_template);
+ index->parser = index_def->parser;
+ index->has_new_v_col = has_new_v_col;
/* Note the id of the transaction that created this
index, we use it to restrict readers from accessing
this index, to ensure read consistency. */
@@ -3774,28 +4411,30 @@ row_merge_create_index(
index = NULL;
}
- return(index);
+ DBUG_RETURN(index);
}
/*********************************************************************//**
Check if a transaction can use an index. */
-UNIV_INTERN
-ibool
+bool
row_merge_is_index_usable(
/*======================*/
const trx_t* trx, /*!< in: transaction */
const dict_index_t* index) /*!< in: index to check */
{
- if (!dict_index_is_clust(index)
+ if (!index->is_primary()
&& dict_index_is_online_ddl(index)) {
/* Indexes that are being created are not useable. */
- return(FALSE);
+ return(false);
}
- return(!dict_index_is_corrupted(index)
+ return(!index->is_corrupted()
&& (dict_table_is_temporary(index->table)
- || !trx->read_view
- || read_view_sees_trx_id(trx->read_view, index->trx_id)));
+ || index->trx_id == 0
+ || !MVCC::is_view_active(trx->read_view)
+ || trx->read_view->changes_visible(
+ index->trx_id,
+ index->table->name)));
}
/*********************************************************************//**
@@ -3803,8 +4442,7 @@ Drop a table. The caller must have ensured that the background stats
thread is not processing the table. This can be done by calling
dict_stats_wait_bg_to_stop_using_table() after locking the dictionary and
before calling this function.
-@return DB_SUCCESS or error code */
-UNIV_INTERN
+@return DB_SUCCESS or error code */
dberr_t
row_merge_drop_table(
/*=================*/
@@ -3814,49 +4452,84 @@ row_merge_drop_table(
ut_ad(!srv_read_only_mode);
/* There must be no open transactions on the table. */
- ut_a(table->n_ref_count == 0);
+ ut_a(table->get_ref_count() == 0);
- return(row_drop_table_for_mysql(table->name, trx, false, false, false));
+ return(row_drop_table_for_mysql(table->name.m_name,
+ trx, SQLCOM_DROP_TABLE, false, false));
}
-/*********************************************************************//**
-Build indexes on a table by reading a clustered index,
-creating a temporary file containing index entries, merge sorting
-these index entries and inserting sorted index entries to indexes.
-@return DB_SUCCESS or error code */
-UNIV_INTERN
+/** Write an MLOG_INDEX_LOAD record to indicate in the redo-log
+that redo-logging of individual index pages was disabled, and
+the flushing of such pages to the data files was completed.
+@param[in] index an index tree on which redo logging was disabled */
+void row_merge_write_redo(const dict_index_t* index)
+{
+ ut_ad(!dict_table_is_temporary(index->table));
+ ut_ad(!(index->type & (DICT_SPATIAL | DICT_FTS)));
+
+ mtr_t mtr;
+ mtr.start();
+ byte* log_ptr = mlog_open(&mtr, 11 + 8);
+ log_ptr = mlog_write_initial_log_record_low(
+ MLOG_INDEX_LOAD,
+ index->space, index->page, log_ptr, &mtr);
+ mach_write_to_8(log_ptr, index->id);
+ mlog_close(&mtr, log_ptr + 8);
+ mtr.commit();
+}
+
+/** Build indexes on a table by reading a clustered index, creating a temporary
+file containing index entries, merge sorting these index entries and inserting
+sorted index entries to indexes.
+@param[in] trx transaction
+@param[in] old_table table where rows are read from
+@param[in] new_table table where indexes are created; identical to
+old_table unless creating a PRIMARY KEY
+@param[in] online true if creating indexes online
+@param[in] indexes indexes to be created
+@param[in] key_numbers MySQL key numbers
+@param[in] n_indexes size of indexes[]
+@param[in,out] table MySQL table, for reporting erroneous key value
+if applicable
+@param[in] add_cols default values of added columns, or NULL
+@param[in] col_map mapping of old column numbers to new ones, or
+NULL if old_table == new_table
+@param[in] add_autoinc number of added AUTO_INCREMENT columns, or
+ULINT_UNDEFINED if none is added
+@param[in,out] sequence autoinc sequence
+@param[in] skip_pk_sort whether the new PRIMARY KEY will follow
+existing order
+@param[in,out] stage performance schema accounting object, used by
+ALTER TABLE. stage->begin_phase_read_pk() will be called at the beginning of
+this function and it will be passed to other functions for further accounting.
+@param[in] add_v new virtual columns added along with indexes
+@param[in] eval_table mysql table used to evaluate virtual column
+ value, see innobase_get_computed_value().
+@return DB_SUCCESS or error code */
dberr_t
row_merge_build_indexes(
-/*====================*/
- trx_t* trx, /*!< in: transaction */
- dict_table_t* old_table, /*!< in: table where rows are
- read from */
- dict_table_t* new_table, /*!< in: table where indexes are
- created; identical to old_table
- unless creating a PRIMARY KEY */
- bool online, /*!< in: true if creating indexes
- online */
- dict_index_t** indexes, /*!< in: indexes to be created */
- const ulint* key_numbers, /*!< in: MySQL key numbers */
- ulint n_indexes, /*!< in: size of indexes[] */
- struct TABLE* table, /*!< in/out: MySQL table, for
- reporting erroneous key value
- if applicable */
- const dtuple_t* add_cols, /*!< in: default values of
- added columns, or NULL */
- const ulint* col_map, /*!< in: mapping of old column
- numbers to new ones, or NULL
- if old_table == new_table */
- ulint add_autoinc, /*!< in: number of added
- AUTO_INCREMENT column, or
- ULINT_UNDEFINED if none is added */
- ib_sequence_t& sequence) /*!< in: autoinc instance if
- add_autoinc != ULINT_UNDEFINED */
+ trx_t* trx,
+ dict_table_t* old_table,
+ dict_table_t* new_table,
+ bool online,
+ dict_index_t** indexes,
+ const ulint* key_numbers,
+ ulint n_indexes,
+ struct TABLE* table,
+ const dtuple_t* add_cols,
+ const ulint* col_map,
+ ulint add_autoinc,
+ ib_sequence_t& sequence,
+ bool skip_pk_sort,
+ ut_stage_alter_t* stage,
+ const dict_add_v_col_t* add_v,
+ struct TABLE* eval_table)
{
merge_file_t* merge_files;
row_merge_block_t* block;
+ ut_new_pfx_t block_pfx;
+ ut_new_pfx_t crypt_pfx;
row_merge_block_t* crypt_block = NULL;
- ulint block_size;
ulint i;
ulint j;
dberr_t error;
@@ -3864,14 +4537,14 @@ row_merge_build_indexes(
dict_index_t* fts_sort_idx = NULL;
fts_psort_t* psort_info = NULL;
fts_psort_t* merge_info = NULL;
- ib_int64_t sig_count = 0;
+ int64_t sig_count = 0;
bool fts_psort_initiated = false;
- float total_static_cost = 0;
- float total_dynamic_cost = 0;
- uint total_index_blocks = 0;
- float pct_cost=0;
- float pct_progress=0;
+ double total_static_cost = 0;
+ double total_dynamic_cost = 0;
+ ulint total_index_blocks = 0;
+ double pct_cost=0;
+ double pct_progress=0;
DBUG_ENTER("row_merge_build_indexes");
@@ -3879,47 +4552,62 @@ row_merge_build_indexes(
ut_ad((old_table == new_table) == !col_map);
ut_ad(!add_cols || col_map);
+ stage->begin_phase_read_pk(skip_pk_sort && new_table != old_table
+ ? n_indexes - 1
+ : n_indexes);
+
/* Allocate memory for merge file data structure and initialize
fields */
- block_size = 3 * srv_sort_buf_size;
- block = static_cast<row_merge_block_t*>(
- os_mem_alloc_large(&block_size));
+ ut_allocator<row_merge_block_t> alloc(mem_key_row_merge_sort);
+
+ /* This will allocate "3 * srv_sort_buf_size" elements of type
+ row_merge_block_t. The latter is defined as byte. */
+ block = alloc.allocate_large(3 * srv_sort_buf_size, &block_pfx);
if (block == NULL) {
DBUG_RETURN(DB_OUT_OF_MEMORY);
}
- /* If temporal log file is encrypted allocate memory for
- encryption/decryption. */
+ crypt_pfx.m_size = 0; /* silence bogus -Wmaybe-uninitialized */
+ TRASH_ALLOC(&crypt_pfx, sizeof crypt_pfx);
+
if (log_tmp_is_encrypted()) {
crypt_block = static_cast<row_merge_block_t*>(
- os_mem_alloc_large(&block_size));
+ alloc.allocate_large(3 * srv_sort_buf_size,
+ &crypt_pfx));
if (crypt_block == NULL) {
DBUG_RETURN(DB_OUT_OF_MEMORY);
}
}
- trx_start_if_not_started_xa(trx);
+ trx_start_if_not_started_xa(trx, true);
+ ulint n_merge_files = 0;
+
+ for (ulint i = 0; i < n_indexes; i++)
+ {
+ if (!dict_index_is_spatial(indexes[i])) {
+ n_merge_files++;
+ }
+ }
merge_files = static_cast<merge_file_t*>(
- mem_alloc(n_indexes * sizeof *merge_files));
+ ut_malloc_nokey(n_merge_files * sizeof *merge_files));
/* Initialize all the merge file descriptors, so that we
don't call row_merge_file_destroy() on uninitialized
merge file descriptor */
- for (i = 0; i < n_indexes; i++) {
+ for (i = 0; i < n_merge_files; i++) {
merge_files[i].fd = -1;
merge_files[i].offset = 0;
+ merge_files[i].n_rec = 0;
}
total_static_cost = COST_BUILD_INDEX_STATIC * n_indexes + COST_READ_CLUSTERED_INDEX;
total_dynamic_cost = COST_BUILD_INDEX_DYNAMIC * n_indexes;
-
for (i = 0; i < n_indexes; i++) {
-
if (indexes[i]->type & DICT_FTS) {
ibool opt_doc_id_size = FALSE;
@@ -3930,18 +4618,24 @@ row_merge_build_indexes(
fts_sort_idx = row_merge_create_fts_sort_index(
indexes[i], old_table, &opt_doc_id_size);
- row_merge_dup_t* dup = static_cast<row_merge_dup_t*>(
- ut_malloc(sizeof *dup));
+ row_merge_dup_t* dup
+ = static_cast<row_merge_dup_t*>(
+ ut_malloc_nokey(sizeof *dup));
dup->index = fts_sort_idx;
dup->table = table;
dup->col_map = col_map;
dup->n_dup = 0;
- row_fts_psort_info_init(
- trx, dup, new_table, opt_doc_id_size,
- &psort_info, &merge_info);
+ /* This can fail e.g. if temporal files can't be
+ created */
+ if (!row_fts_psort_info_init(
+ trx, dup, new_table, opt_doc_id_size,
+ &psort_info, &merge_info)) {
+ error = DB_CORRUPTION;
+ goto func_exit;
+ }
- /* "We need to ensure that we free the resources
+ /* We need to ensure that we free the resources
allocated */
fts_psort_initiated = true;
}
@@ -3967,20 +4661,21 @@ row_merge_build_indexes(
"Table %s is encrypted but encryption service or"
" used key_id is not available. "
" Can't continue reading table.",
- !old_table->is_readable() ? old_table->name :
- new_table->name);
+ !old_table->is_readable() ? old_table->name.m_name :
+ new_table->name.m_name);
goto func_exit;
}
/* Read clustered index of the table and create files for
secondary index entries for merge sort */
-
error = row_merge_read_clustered_index(
trx, table, old_table, new_table, online, indexes,
fts_sort_idx, psort_info, merge_files, key_numbers,
- n_indexes, add_cols, col_map,
- add_autoinc, sequence, block, &tmpfd,
- pct_cost, crypt_block);
+ n_indexes, add_cols, add_v, col_map, add_autoinc,
+ sequence, block, skip_pk_sort, &tmpfd, stage,
+ pct_cost, crypt_block, eval_table);
+
+ stage->end_phase_read_pk();
pct_progress += pct_cost;
@@ -3990,7 +4685,7 @@ row_merge_build_indexes(
" and create temporary files");
}
- for (i = 0; i < n_indexes; i++) {
+ for (i = 0; i < n_merge_files; i++) {
total_index_blocks += merge_files[i].offset;
}
@@ -4003,9 +4698,13 @@ row_merge_build_indexes(
/* Now we have files containing index entries ready for
sorting and inserting. */
- for (i = 0; i < n_indexes; i++) {
+ for (ulint k = 0, i = 0; i < n_indexes; i++) {
dict_index_t* sort_idx = indexes[i];
+ if (dict_index_is_spatial(sort_idx)) {
+ continue;
+ }
+
if (indexes[i]->type & DICT_FTS) {
os_event_t fts_parallel_merge_event;
@@ -4055,11 +4754,10 @@ wait_again:
}
if (!all_exit) {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Not all child merge threads"
- " exited when creating FTS"
- " index '%s'",
- indexes[i]->name);
+ ib::error() << "Not all child merge"
+ " threads exited when creating"
+ " FTS index '"
+ << indexes[i]->name << "'";
} else {
for (j = 0; j < FTS_NUM_AUX_INDEX;
j++) {
@@ -4079,13 +4777,13 @@ wait_again:
#ifdef FTS_INTERNAL_DIAG_PRINT
DEBUG_FTS_SORT_PRINT("FTS_SORT: Complete Insert\n");
#endif
- } else if (merge_files[i].fd != -1) {
+ } else if (merge_files[k].fd >= 0) {
char buf[NAME_LEN + 1];
row_merge_dup_t dup = {
sort_idx, table, col_map, 0};
pct_cost = (COST_BUILD_INDEX_STATIC +
- (total_dynamic_cost * merge_files[i].offset /
+ (total_dynamic_cost * merge_files[k].offset /
total_index_blocks)) /
(total_static_cost + total_dynamic_cost)
* PCT_COST_MERGESORT_INDEX * 100;
@@ -4093,8 +4791,7 @@ wait_again:
buf, sizeof buf,
indexes[i]->name,
strlen(indexes[i]->name),
- trx->mysql_thd,
- FALSE);
+ trx->mysql_thd);
buf[bufend - buf]='\0';
if (global_system_variables.log_warnings > 2) {
@@ -4110,11 +4807,10 @@ wait_again:
}
error = row_merge_sort(
- trx, &dup, &merge_files[i],
+ trx, &dup, &merge_files[k],
block, &tmpfd, true,
pct_progress, pct_cost,
- crypt_block,
- new_table->space);
+ crypt_block, new_table->space, stage);
pct_progress += pct_cost;
@@ -4132,8 +4828,11 @@ wait_again:
os_thread_sleep(20000000);); /* 20 sec */
if (error == DB_SUCCESS) {
+ BtrBulk btr_bulk(sort_idx, trx,
+ trx->get_flush_observer());
+
pct_cost = (COST_BUILD_INDEX_STATIC +
- (total_dynamic_cost * merge_files[i].offset /
+ (total_dynamic_cost * merge_files[k].offset /
total_index_blocks)) /
(total_static_cost + total_dynamic_cost) *
PCT_COST_INSERT_INDEX * 100;
@@ -4149,10 +4848,13 @@ wait_again:
}
error = row_merge_insert_index_tuples(
- trx->id, sort_idx, old_table,
- merge_files[i].fd, block,
- merge_files[i].n_rec, pct_progress, pct_cost,
- crypt_block, new_table->space);
+ sort_idx, old_table,
+ merge_files[k].fd, block, NULL,
+ &btr_bulk,
+ merge_files[k].n_rec, pct_progress, pct_cost,
+ crypt_block, new_table->space, stage);
+
+ error = btr_bulk.finish(error);
pct_progress += pct_cost;
@@ -4167,25 +4869,41 @@ wait_again:
}
/* Close the temporary file to free up space. */
- row_merge_file_destroy(&merge_files[i]);
+ row_merge_file_destroy(&merge_files[k++]);
if (indexes[i]->type & DICT_FTS) {
row_fts_psort_info_destroy(psort_info, merge_info);
fts_psort_initiated = false;
- } else if (error != DB_SUCCESS || !online) {
- /* Do not apply any online log. */
+ } else if (dict_index_is_spatial(indexes[i])) {
+ /* We never disable redo logging for
+ creating SPATIAL INDEX. Avoid writing any
+ unnecessary MLOG_INDEX_LOAD record. */
} else if (old_table != new_table) {
ut_ad(!sort_idx->online_log);
ut_ad(sort_idx->online_status
== ONLINE_INDEX_COMPLETE);
+ } else if (FlushObserver* flush_observer =
+ trx->get_flush_observer()) {
+ if (error != DB_SUCCESS) {
+ flush_observer->interrupted();
+ }
+ flush_observer->flush();
+ row_merge_write_redo(indexes[i]);
+ }
+
+ if (old_table != new_table
+ || (indexes[i]->type & (DICT_FTS | DICT_SPATIAL))
+ || error != DB_SUCCESS || !online) {
+ /* Do not apply any online log. */
} else {
if (global_system_variables.log_warnings > 2) {
sql_print_information(
"InnoDB: Online DDL : Applying"
" log to index");
}
+
DEBUG_SYNC_C("row_log_apply_before");
- error = row_log_apply(trx, sort_idx, table);
+ error = row_log_apply(trx, sort_idx, table, stage);
DEBUG_SYNC_C("row_log_apply_after");
}
@@ -4195,19 +4913,13 @@ wait_again:
}
if (indexes[i]->type & DICT_FTS && fts_enable_diag_print) {
- char* name = (char*) indexes[i]->name;
-
- if (*name == TEMP_INDEX_PREFIX) {
- name++;
- }
-
- ut_print_timestamp(stderr);
- fprintf(stderr, " InnoDB: Finished building "
- "full-text index %s\n", name);
+ ib::info() << "Finished building full-text index "
+ << indexes[i]->name;
}
}
func_exit:
+
DBUG_EXECUTE_IF(
"ib_build_indexes_too_many_concurrent_trxs",
error = DB_TOO_MANY_CONCURRENT_TRXS;
@@ -4221,7 +4933,7 @@ func_exit:
row_merge_file_destroy_low(tmpfd);
- for (i = 0; i < n_indexes; i++) {
+ for (i = 0; i < n_merge_files; i++) {
row_merge_file_destroy(&merge_files[i]);
}
@@ -4229,11 +4941,12 @@ func_exit:
dict_mem_index_free(fts_sort_idx);
}
- mem_free(merge_files);
- os_mem_free_large(block, block_size);
+ ut_free(merge_files);
+
+ alloc.deallocate_large(block, &block_pfx);
if (crypt_block) {
- os_mem_free_large(crypt_block, block_size);
+ alloc.deallocate_large(crypt_block, &crypt_pfx);
}
DICT_TF2_FLAG_UNSET(new_table, DICT_TF2_FTS_ADD_DOC_ID);
@@ -4243,7 +4956,7 @@ func_exit:
as aborted. */
for (i = 0; i < n_indexes; i++) {
ut_ad(!(indexes[i]->type & DICT_FTS));
- ut_ad(*indexes[i]->name == TEMP_INDEX_PREFIX);
+ ut_ad(!indexes[i]->is_committed());
ut_ad(!dict_index_is_clust(indexes[i]));
/* Completed indexes should be dropped as
@@ -4269,12 +4982,44 @@ func_exit:
/* fall through */
case ONLINE_INDEX_ABORTED_DROPPED:
case ONLINE_INDEX_ABORTED:
- MONITOR_MUTEX_INC(
- &dict_sys->mutex,
+ MONITOR_ATOMIC_INC(
MONITOR_BACKGROUND_DROP_INDEX);
}
}
}
+ DBUG_EXECUTE_IF("ib_index_crash_after_bulk_load", DBUG_SUICIDE(););
+
+ if (FlushObserver* flush_observer = trx->get_flush_observer()) {
+
+ DBUG_EXECUTE_IF("ib_index_build_fail_before_flush",
+ error = DB_INTERRUPTED;
+ );
+
+ if (error != DB_SUCCESS) {
+ flush_observer->interrupted();
+ }
+
+ flush_observer->flush();
+
+ if (old_table != new_table) {
+ for (const dict_index_t* index
+ = dict_table_get_first_index(new_table);
+ index != NULL;
+ index = dict_table_get_next_index(index)) {
+ if (!(index->type
+ & (DICT_FTS | DICT_SPATIAL))) {
+ row_merge_write_redo(index);
+ }
+ }
+ }
+
+ trx->remove_flush_observer();
+
+ if (trx_is_interrupted(trx)) {
+ error = DB_INTERRUPTED;
+ }
+ }
+
DBUG_RETURN(error);
}
diff --git a/storage/innobase/row/row0mysql.cc b/storage/innobase/row/row0mysql.cc
index 999e8d882ec..ac44e32c326 100644
--- a/storage/innobase/row/row0mysql.cc
+++ b/storage/innobase/row/row0mysql.cc
@@ -25,51 +25,52 @@ Contains also create table and other data dictionary operations.
Created 9/17/2000 Heikki Tuuri
*******************************************************/
-#include "row0mysql.h"
-
-#ifdef UNIV_NONINL
-#include "row0mysql.ic"
-#endif
-
+#include "univ.i"
#include <debug_sync.h>
-#include <my_dbug.h>
+#include <gstream.h>
+#include <spatial.h>
+#include "row0mysql.h"
+#include "btr0sea.h"
+#include "dict0boot.h"
+#include "dict0crea.h"
#include <sql_const.h>
-#include "row0ins.h"
-#include "row0merge.h"
-#include "row0sel.h"
-#include "row0upd.h"
-#include "row0row.h"
-#include "que0que.h"
-#include "pars0pars.h"
#include "dict0dict.h"
-#include "dict0crea.h"
#include "dict0load.h"
#include "dict0priv.h"
#include "dict0boot.h"
#include "dict0stats.h"
#include "dict0stats_bg.h"
-#include "trx0roll.h"
-#include "trx0purge.h"
-#include "trx0rec.h"
-#include "trx0undo.h"
-#include "lock0lock.h"
-#include "rem0cmp.h"
-#include "log0log.h"
-#include "btr0sea.h"
+#include "dict0defrag_bg.h"
#include "btr0defragment.h"
#include "fil0fil.h"
#include "fil0crypt.h"
-#include "srv0srv.h"
-#include "ibuf0ibuf.h"
+#include "fsp0file.h"
#include "fts0fts.h"
#include "fts0types.h"
-#include "srv0start.h"
+#include "ibuf0ibuf.h"
+#include "lock0lock.h"
+#include "log0log.h"
+#include "pars0pars.h"
+#include "que0que.h"
+#include "rem0cmp.h"
#include "row0import.h"
-#include "m_string.h"
-#include "my_sys.h"
-#include "ha_prototypes.h"
+#include "row0ins.h"
+#include "row0merge.h"
+#include "row0row.h"
+#include "row0sel.h"
+#include "row0upd.h"
+#include "trx0purge.h"
+#include "trx0rec.h"
+#include "trx0roll.h"
+#include "trx0undo.h"
+#include "srv0start.h"
+#include "row0ext.h"
+#include "srv0start.h"
+
#include <algorithm>
+#include <deque>
+#include <vector>
#ifdef WITH_WSREP
#include "mysql/service_wsrep.h"
@@ -79,7 +80,7 @@ Created 9/17/2000 Heikki Tuuri
#endif
/** Provide optional 4.x backwards compatibility for 5.0 and above */
-UNIV_INTERN ibool row_rollback_on_timeout = FALSE;
+ibool row_rollback_on_timeout = FALSE;
/** Chain node of the list of tables to drop in the background. */
struct row_mysql_drop_t{
@@ -88,11 +89,6 @@ struct row_mysql_drop_t{
/*!< list chain node */
};
-#ifdef UNIV_PFS_MUTEX
-/* Key to register drop list mutex with performance schema */
-UNIV_INTERN mysql_pfs_key_t row_drop_list_mutex_key;
-#endif /* UNIV_PFS_MUTEX */
-
/** @brief List of tables we should drop in background.
ALTER TABLE in MySQL requires that the table handler can drop the
@@ -106,30 +102,9 @@ static ib_mutex_t row_drop_list_mutex;
/** Flag: has row_mysql_drop_list been initialized? */
static ibool row_mysql_drop_list_inited = FALSE;
-/** Magic table names for invoking various monitor threads */
-/* @{ */
-static const char S_innodb_monitor[] = "innodb_monitor";
-static const char S_innodb_lock_monitor[] = "innodb_lock_monitor";
-static const char S_innodb_tablespace_monitor[] = "innodb_tablespace_monitor";
-static const char S_innodb_table_monitor[] = "innodb_table_monitor";
-#ifdef UNIV_MEM_DEBUG
-static const char S_innodb_mem_validate[] = "innodb_mem_validate";
-#endif /* UNIV_MEM_DEBUG */
-/* @} */
-
-/** Evaluates to true if str1 equals str2_onstack, used for comparing
-the magic table names.
-@param str1 in: string to compare
-@param str1_len in: length of str1, in bytes, including terminating NUL
-@param str2_onstack in: char[] array containing a NUL terminated string
-@return TRUE if str1 equals str2_onstack */
-#define STR_EQ(str1, str1_len, str2_onstack) \
- ((str1_len) == sizeof(str2_onstack) \
- && memcmp(str1, str2_onstack, sizeof(str2_onstack)) == 0)
-
/*******************************************************************//**
Determine if the given name is a name reserved for MySQL system tables.
-@return TRUE if name is a MySQL system table name */
+@return TRUE if name is a MySQL system table name */
static
ibool
row_mysql_is_system_table(
@@ -146,6 +121,21 @@ row_mysql_is_system_table(
|| 0 == strcmp(name + 6, "db"));
}
+#ifdef UNIV_DEBUG
+/** Wait for the background drop list to become empty. */
+void
+row_wait_for_background_drop_list_empty()
+{
+ bool empty = false;
+ while (!empty) {
+ mutex_enter(&row_drop_list_mutex);
+ empty = (UT_LIST_GET_LEN(row_mysql_drop_list) == 0);
+ mutex_exit(&row_drop_list_mutex);
+ os_thread_sleep(100000);
+ }
+}
+#endif /* UNIV_DEBUG */
+
/*******************************************************************//**
Delays an INSERT, DELETE or UPDATE operation if the purge is lagging. */
static
@@ -160,15 +150,20 @@ row_mysql_delay_if_needed(void)
/*******************************************************************//**
Frees the blob heap in prebuilt when no longer needed. */
-UNIV_INTERN
void
row_mysql_prebuilt_free_blob_heap(
/*==============================*/
row_prebuilt_t* prebuilt) /*!< in: prebuilt struct of a
ha_innobase:: table handle */
{
+ DBUG_ENTER("row_mysql_prebuilt_free_blob_heap");
+
+ DBUG_PRINT("row_mysql_prebuilt_free_blob_heap",
+ ("blob_heap freeing: %p", prebuilt->blob_heap));
+
mem_heap_free(prebuilt->blob_heap);
prebuilt->blob_heap = NULL;
+ DBUG_VOID_RETURN;
}
/*******************************************************************//**
@@ -176,7 +171,6 @@ Stores a >= 5.0.3 format true VARCHAR length to dest, in the MySQL row
format.
@return pointer to the data, we skip the 1 or 2 bytes at the start
that are used to store the len */
-UNIV_INTERN
byte*
row_mysql_store_true_var_len(
/*=========================*/
@@ -205,7 +199,6 @@ Reads a >= 5.0.3 format true VARCHAR length, in the MySQL row format, and
returns a pointer to the data.
@return pointer to the data, we skip the 1 or 2 bytes at the start
that are used to store the len */
-UNIV_INTERN
const byte*
row_mysql_read_true_varchar(
/*========================*/
@@ -229,7 +222,6 @@ row_mysql_read_true_varchar(
/*******************************************************************//**
Stores a reference to a BLOB in the MySQL format. */
-UNIV_INTERN
void
row_mysql_store_blob_ref(
/*=====================*/
@@ -266,8 +258,7 @@ row_mysql_store_blob_ref(
/*******************************************************************//**
Reads a reference to a BLOB in the MySQL format.
-@return pointer to BLOB data */
-UNIV_INTERN
+@return pointer to BLOB data */
const byte*
row_mysql_read_blob_ref(
/*====================*/
@@ -286,9 +277,69 @@ row_mysql_read_blob_ref(
return(data);
}
+/*******************************************************************//**
+Converting InnoDB geometry data format to MySQL data format. */
+void
+row_mysql_store_geometry(
+/*=====================*/
+ byte* dest, /*!< in/out: where to store */
+ ulint dest_len, /*!< in: dest buffer size: determines
+ into how many bytes the GEOMETRY length
+ is stored, the space for the length
+ may vary from 1 to 4 bytes */
+ const byte* src, /*!< in: GEOMETRY data; if the value to
+ store is SQL NULL this should be NULL
+ pointer */
+ ulint src_len) /*!< in: GEOMETRY length; if the value
+ to store is SQL NULL this should be 0;
+ remember also to set the NULL bit in
+ the MySQL record header! */
+{
+ /* MySQL might assume the field is set to zero except the length and
+ the pointer fields */
+ UNIV_MEM_ASSERT_RW(src, src_len);
+ UNIV_MEM_ASSERT_W(dest, dest_len);
+ UNIV_MEM_INVALID(dest, dest_len);
+
+ memset(dest, '\0', dest_len);
+
+ /* In dest there are 1 - 4 bytes reserved for the BLOB length,
+ and after that 8 bytes reserved for the pointer to the data.
+ In 32-bit architectures we only use the first 4 bytes of the pointer
+ slot. */
+
+ ut_ad(dest_len - 8 > 1 || src_len < 1<<8);
+ ut_ad(dest_len - 8 > 2 || src_len < 1<<16);
+ ut_ad(dest_len - 8 > 3 || src_len < 1<<24);
+
+ mach_write_to_n_little_endian(dest, dest_len - 8, src_len);
+
+ memcpy(dest + dest_len - 8, &src, sizeof src);
+}
+
+/*******************************************************************//**
+Read geometry data in the MySQL format.
+@return pointer to geometry data */
+static
+const byte*
+row_mysql_read_geometry(
+/*====================*/
+ ulint* len, /*!< out: data length */
+ const byte* ref, /*!< in: geometry data in the
+ MySQL format */
+ ulint col_len) /*!< in: MySQL format length */
+{
+ byte* data;
+
+ *len = mach_read_from_n_little_endian(ref, col_len - 8);
+
+ memcpy(&data, ref + col_len - 8, sizeof data);
+
+ return(data);
+}
+
/**************************************************************//**
Pad a column with spaces. */
-UNIV_INTERN
void
row_mysql_pad_col(
/*==============*/
@@ -333,8 +384,7 @@ row_mysql_pad_col(
Stores a non-SQL-NULL field given in the MySQL format in the InnoDB format.
The counterpart of this function is row_sel_field_store_in_mysql_format() in
row0sel.cc.
-@return up to which byte we used buf in the conversion */
-UNIV_INTERN
+@return up to which byte we used buf in the conversion */
byte*
row_mysql_store_col_in_innobase_format(
/*===================================*/
@@ -480,11 +530,11 @@ row_mysql_store_col_in_innobase_format(
We will try to truncate it to n bytes by stripping
space padding. If the field contains single-byte
characters only, it will be truncated to n characters.
- Consider a CHAR(5) field containing the string ".a "
- where "." denotes a 3-byte character represented by
- the bytes "$%&". After our stripping, the string will
- be stored as "$%&a " (5 bytes). The string ".abc "
- will be stored as "$%&abc" (6 bytes).
+ Consider a CHAR(5) field containing the string
+ ".a " where "." denotes a 3-byte character represented
+ by the bytes "$%&". After our stripping, the string will
+ be stored as "$%&a " (5 bytes). The string
+ ".abc " will be stored as "$%&abc" (6 bytes).
The space padding will be restored in row0sel.cc, function
row_sel_field_store_in_mysql_format(). */
@@ -499,9 +549,15 @@ row_mysql_store_col_in_innobase_format(
while (col_len > n_chars && ptr[col_len - 1] == 0x20) {
col_len--;
}
- } else if (type == DATA_BLOB && row_format_col) {
+ } else if (!row_format_col) {
+ /* if mysql data is from a MySQL key value
+ since the length is always stored in 2 bytes,
+ we need do nothing here. */
+ } else if (type == DATA_BLOB) {
ptr = row_mysql_read_blob_ref(&col_len, mysql_data, col_len);
+ } else if (DATA_GEOMETRY_MTYPE(type)) {
+ ptr = row_mysql_read_geometry(&col_len, mysql_data, col_len);
}
dfield_set_data(dfield, ptr, col_len);
@@ -522,14 +578,18 @@ row_mysql_convert_row_to_innobase(
copied there! */
row_prebuilt_t* prebuilt, /*!< in: prebuilt struct where template
must be of type ROW_MYSQL_WHOLE_ROW */
- byte* mysql_rec) /*!< in: row in the MySQL format;
+ const byte* mysql_rec, /*!< in: row in the MySQL format;
NOTE: do not discard as long as
row is used, as row may contain
pointers to this record! */
+ mem_heap_t** blob_heap) /*!< in: FIX_ME, remove this after
+ server fixes its issue */
{
const mysql_row_templ_t*templ;
dfield_t* dfield;
ulint i;
+ ulint n_col = 0;
+ ulint n_v_col = 0;
ut_ad(prebuilt->template_type == ROW_MYSQL_WHOLE_ROW);
ut_ad(prebuilt->mysql_template);
@@ -537,7 +597,15 @@ row_mysql_convert_row_to_innobase(
for (i = 0; i < prebuilt->n_template; i++) {
templ = prebuilt->mysql_template + i;
- dfield = dtuple_get_nth_field(row, i);
+
+ if (templ->is_virtual) {
+ ut_ad(n_v_col < dtuple_get_n_v_fields(row));
+ dfield = dtuple_get_nth_v_field(row, n_v_col);
+ n_v_col++;
+ } else {
+ dfield = dtuple_get_nth_field(row, n_col);
+ n_col++;
+ }
if (templ->mysql_null_bit_mask != 0) {
/* Column may be SQL NULL */
@@ -560,6 +628,16 @@ row_mysql_convert_row_to_innobase(
mysql_rec + templ->mysql_col_offset,
templ->mysql_col_len,
dict_table_is_comp(prebuilt->table));
+
+ /* server has issue regarding handling BLOB virtual fields,
+ and we need to duplicate it with our own memory here */
+ if (templ->is_virtual
+ && DATA_LARGE_MTYPE(dfield_get_type(dfield)->mtype)) {
+ if (*blob_heap == NULL) {
+ *blob_heap = mem_heap_create(dfield->len);
+ }
+ dfield_dup(dfield, *blob_heap);
+ }
next_column:
;
}
@@ -600,7 +678,6 @@ next_column:
Handles user errors and lock waits detected by the database engine.
@return true if it was a lock wait and we should continue running the
query thread and in that case the thr is ALREADY in the running state. */
-UNIV_INTERN
bool
row_mysql_handle_errors(
/*====================*/
@@ -614,6 +691,8 @@ row_mysql_handle_errors(
{
dberr_t err;
+ DBUG_ENTER("row_mysql_handle_errors");
+
handle_new_error:
err = trx->error_state;
@@ -621,6 +700,9 @@ handle_new_error:
trx->error_state = DB_SUCCESS;
+ DBUG_LOG("trx", "handle error: " << ut_strerr(err)
+ << ";id=" << ib::hex(trx->id) << ", " << trx);
+
switch (err) {
case DB_LOCK_WAIT_TIMEOUT:
if (row_rollback_on_timeout) {
@@ -630,7 +712,6 @@ handle_new_error:
case DB_DUPLICATE_KEY:
case DB_FOREIGN_DUPLICATE_KEY:
case DB_TOO_BIG_RECORD:
- case DB_TOO_BIG_FOR_REDO:
case DB_UNDO_RECORD_TOO_BIG:
case DB_ROW_IS_REFERENCED:
case DB_NO_REFERENCED_ROW:
@@ -640,10 +721,14 @@ handle_new_error:
case DB_READ_ONLY:
case DB_FTS_INVALID_DOCID:
case DB_INTERRUPTED:
- case DB_DICT_CHANGED:
+ case DB_CANT_CREATE_GEOMETRY_OBJECT:
case DB_TABLE_NOT_FOUND:
case DB_DECRYPTION_FAILED:
+ case DB_COMPUTE_VALUE_FAILED:
rollback_to_savept:
+ DBUG_EXECUTE_IF("row_mysql_crash_if_error", {
+ log_buffer_flush_to_disk();
+ DBUG_SUICIDE(); });
if (savept) {
/* Roll back the latest, possibly incomplete insertion
or update */
@@ -663,7 +748,7 @@ handle_new_error:
*new_err = err;
- return(true);
+ DBUG_RETURN(true);
case DB_DEADLOCK:
case DB_LOCK_TABLE_FULL:
@@ -675,40 +760,30 @@ handle_new_error:
break;
case DB_MUST_GET_MORE_FILE_SPACE:
- fputs("InnoDB: The database cannot continue"
- " operation because of\n"
- "InnoDB: lack of space. You must add"
- " a new data file to\n"
- "InnoDB: my.cnf and restart the database.\n", stderr);
- abort();
+ ib::fatal() << "The database cannot continue operation because"
+ " of lack of space. You must add a new data file"
+ " to my.cnf and restart the database.";
+ break;
case DB_CORRUPTION:
case DB_PAGE_CORRUPTED:
- fputs("InnoDB: We detected index corruption"
- " in an InnoDB type table.\n"
- "InnoDB: You have to dump + drop + reimport"
- " the table or, in\n"
- "InnoDB: a case of widespread corruption,"
- " dump all InnoDB\n"
- "InnoDB: tables and recreate the"
- " whole InnoDB tablespace.\n"
- "InnoDB: If the mysqld server crashes"
- " after the startup or when\n"
- "InnoDB: you dump the tables, look at\n"
- "InnoDB: " REFMAN "forcing-innodb-recovery.html"
- " for help.\n", stderr);
+ ib::error() << "We detected index corruption in an InnoDB type"
+ " table. You have to dump + drop + reimport the"
+ " table or, in a case of widespread corruption,"
+ " dump all InnoDB tables and recreate the whole"
+ " tablespace. If the mysqld server crashes after"
+ " the startup or when you dump the tables. "
+ << FORCE_RECOVERY_MSG;
goto rollback_to_savept;
case DB_FOREIGN_EXCEED_MAX_CASCADE:
- fprintf(stderr, "InnoDB: Cannot delete/update rows with"
- " cascading foreign key constraints that exceed max"
- " depth of %lu\n"
- "Please drop excessive foreign constraints"
- " and try again\n", (ulong) DICT_FK_MAX_RECURSIVE_LOAD);
+ ib::error() << "Cannot delete/update rows with cascading"
+ " foreign key constraints that exceed max depth of "
+ << FK_MAX_CASCADE_DEL << ". Please drop excessive"
+ " foreign constraints and try again";
goto rollback_to_savept;
default:
- fprintf(stderr, "InnoDB: unknown error code %lu\n",
- (ulong) err);
- ut_error;
+ ib::fatal() << "Unknown error code " << err << ": "
+ << ut_strerr(err);
}
if (trx->error_state != DB_SUCCESS) {
@@ -719,13 +794,12 @@ handle_new_error:
trx->error_state = DB_SUCCESS;
- return(false);
+ DBUG_RETURN(false);
}
/********************************************************************//**
Create a prebuilt struct for a MySQL table handle.
-@return own: a prebuilt struct */
-UNIV_INTERN
+@return own: a prebuilt struct */
row_prebuilt_t*
row_create_prebuilt(
/*================*/
@@ -733,6 +807,8 @@ row_create_prebuilt(
ulint mysql_row_len) /*!< in: length in bytes of a row in
the MySQL format */
{
+ DBUG_ENTER("row_create_prebuilt");
+
row_prebuilt_t* prebuilt;
mem_heap_t* heap;
dict_index_t* clust_index;
@@ -742,7 +818,8 @@ row_create_prebuilt(
uint srch_key_len = 0;
ulint search_tuple_n_fields;
- search_tuple_n_fields = 2 * dict_table_get_n_cols(table);
+ search_tuple_n_fields = 2 * (dict_table_get_n_cols(table)
+ + dict_table_get_n_v_cols(table));
clust_index = dict_table_get_first_index(table);
@@ -782,9 +859,12 @@ row_create_prebuilt(
sure if this prebuilt instance is going to be \
used in inserts */ \
+ (mysql_row_len < 256 ? mysql_row_len : 0) \
- + DTUPLE_EST_ALLOC(dict_table_get_n_cols(table)) \
+ + DTUPLE_EST_ALLOC(dict_table_get_n_cols(table) \
+ + dict_table_get_n_v_cols(table)) \
+ sizeof(que_fork_t) \
+ sizeof(que_thr_t) \
+ + sizeof(*prebuilt->pcur) \
+ + sizeof(*prebuilt->clust_pcur) \
)
/* Calculate size of key buffer used to store search key in
@@ -802,7 +882,8 @@ row_create_prebuilt(
== MAX_REF_PARTS););
uint temp_len = 0;
for (uint i = 0; i < temp_index->n_uniq; i++) {
- if (temp_index->fields[i].col->mtype == DATA_INT) {
+ ulint type = temp_index->fields[i].col->mtype;
+ if (type == DATA_INT) {
temp_len +=
temp_index->fields[i].fixed_len;
}
@@ -843,8 +924,14 @@ row_create_prebuilt(
prebuilt->srch_key_val2 = NULL;
}
- btr_pcur_reset(&prebuilt->pcur);
- btr_pcur_reset(&prebuilt->clust_pcur);
+ prebuilt->pcur = static_cast<btr_pcur_t*>(
+ mem_heap_zalloc(prebuilt->heap,
+ sizeof(btr_pcur_t)));
+ prebuilt->clust_pcur = static_cast<btr_pcur_t*>(
+ mem_heap_zalloc(prebuilt->heap,
+ sizeof(btr_pcur_t)));
+ btr_pcur_reset(prebuilt->pcur);
+ btr_pcur_reset(prebuilt->clust_pcur);
prebuilt->select_lock_type = LOCK_NONE;
prebuilt->stored_select_lock_type = LOCK_NONE_UNSET;
@@ -871,47 +958,35 @@ row_create_prebuilt(
prebuilt->mysql_row_len = mysql_row_len;
- return(prebuilt);
+ prebuilt->fts_doc_id_in_read_set = 0;
+ prebuilt->blob_heap = NULL;
+
+ prebuilt->m_no_prefetch = false;
+ prebuilt->m_read_virtual_key = false;
+
+ DBUG_RETURN(prebuilt);
}
/********************************************************************//**
Free a prebuilt struct for a MySQL table handle. */
-UNIV_INTERN
void
row_prebuilt_free(
/*==============*/
row_prebuilt_t* prebuilt, /*!< in, own: prebuilt struct */
ibool dict_locked) /*!< in: TRUE=data dictionary locked */
{
- ulint i;
-
- if (UNIV_UNLIKELY
- (prebuilt->magic_n != ROW_PREBUILT_ALLOCATED
- || prebuilt->magic_n2 != ROW_PREBUILT_ALLOCATED)) {
-
- fprintf(stderr,
- "InnoDB: Error: trying to free a corrupt\n"
- "InnoDB: table handle. Magic n %lu,"
- " magic n2 %lu, table name ",
- (ulong) prebuilt->magic_n,
- (ulong) prebuilt->magic_n2);
- ut_print_name(stderr, NULL, TRUE, prebuilt->table->name);
- putc('\n', stderr);
-
- mem_analyze_corruption(prebuilt);
+ DBUG_ENTER("row_prebuilt_free");
- ut_error;
- }
+ ut_a(prebuilt->magic_n == ROW_PREBUILT_ALLOCATED);
+ ut_a(prebuilt->magic_n2 == ROW_PREBUILT_ALLOCATED);
prebuilt->magic_n = ROW_PREBUILT_FREED;
prebuilt->magic_n2 = ROW_PREBUILT_FREED;
- btr_pcur_reset(&prebuilt->pcur);
- btr_pcur_reset(&prebuilt->clust_pcur);
+ btr_pcur_reset(prebuilt->pcur);
+ btr_pcur_reset(prebuilt->clust_pcur);
- if (prebuilt->mysql_template) {
- mem_free(prebuilt->mysql_template);
- }
+ ut_free(prebuilt->mysql_template);
if (prebuilt->ins_graph) {
que_graph_free_recursive(prebuilt->ins_graph);
@@ -926,7 +1001,7 @@ row_prebuilt_free(
}
if (prebuilt->blob_heap) {
- mem_heap_free(prebuilt->blob_heap);
+ row_mysql_prebuilt_free_blob_heap(prebuilt);
}
if (prebuilt->old_vers_heap) {
@@ -937,44 +1012,38 @@ row_prebuilt_free(
byte* base = prebuilt->fetch_cache[0] - 4;
byte* ptr = base;
- for (i = 0; i < MYSQL_FETCH_CACHE_SIZE; i++) {
- byte* row;
- ulint magic1;
- ulint magic2;
-
- magic1 = mach_read_from_4(ptr);
+ for (ulint i = 0; i < MYSQL_FETCH_CACHE_SIZE; i++) {
+ ulint magic1 = mach_read_from_4(ptr);
+ ut_a(magic1 == ROW_PREBUILT_FETCH_MAGIC_N);
ptr += 4;
- row = ptr;
+ byte* row = ptr;
+ ut_a(row == prebuilt->fetch_cache[i]);
ptr += prebuilt->mysql_row_len;
- magic2 = mach_read_from_4(ptr);
+ ulint magic2 = mach_read_from_4(ptr);
+ ut_a(magic2 == ROW_PREBUILT_FETCH_MAGIC_N);
ptr += 4;
-
- if (ROW_PREBUILT_FETCH_MAGIC_N != magic1
- || row != prebuilt->fetch_cache[i]
- || ROW_PREBUILT_FETCH_MAGIC_N != magic2) {
-
- fputs("InnoDB: Error: trying to free"
- " a corrupt fetch buffer.\n", stderr);
-
- mem_analyze_corruption(base);
- ut_error;
- }
}
- mem_free(base);
+ ut_free(base);
}
- dict_table_close(prebuilt->table, dict_locked, TRUE);
+ if (prebuilt->rtr_info) {
+ rtr_clean_rtr_info(prebuilt->rtr_info, true);
+ }
+ if (prebuilt->table) {
+ dict_table_close(prebuilt->table, dict_locked, FALSE);
+ }
mem_heap_free(prebuilt->heap);
+
+ DBUG_VOID_RETURN;
}
/*********************************************************************//**
Updates the transaction pointers in query graphs stored in the prebuilt
struct. */
-UNIV_INTERN
void
row_update_prebuilt_trx(
/*====================*/
@@ -982,29 +1051,9 @@ row_update_prebuilt_trx(
in MySQL handle */
trx_t* trx) /*!< in: transaction handle */
{
- if (trx->magic_n != TRX_MAGIC_N) {
- fprintf(stderr,
- "InnoDB: Error: trying to use a corrupt\n"
- "InnoDB: trx handle. Magic n %lu\n",
- (ulong) trx->magic_n);
-
- mem_analyze_corruption(trx);
-
- ut_error;
- }
-
- if (prebuilt->magic_n != ROW_PREBUILT_ALLOCATED) {
- fprintf(stderr,
- "InnoDB: Error: trying to use a corrupt\n"
- "InnoDB: table handle. Magic n %lu, table name ",
- (ulong) prebuilt->magic_n);
- ut_print_name(stderr, trx, TRUE, prebuilt->table->name);
- putc('\n', stderr);
-
- mem_analyze_corruption(prebuilt);
-
- ut_error;
- }
+ ut_a(trx->magic_n == TRX_MAGIC_N);
+ ut_a(prebuilt->magic_n == ROW_PREBUILT_ALLOCATED);
+ ut_a(prebuilt->magic_n2 == ROW_PREBUILT_ALLOCATED);
prebuilt->trx = trx;
@@ -1025,7 +1074,7 @@ row_update_prebuilt_trx(
Gets pointer to a prebuilt dtuple used in insertions. If the insert graph
has not yet been built in the prebuilt struct, then this function first
builds it.
-@return prebuilt dtuple; the column type information is also set in it */
+@return prebuilt dtuple; the column type information is also set in it */
static
dtuple_t*
row_get_prebuilt_insert_row(
@@ -1043,7 +1092,7 @@ row_get_prebuilt_insert_row(
may need to rebuild the row insert template. */
if (prebuilt->trx_id == table->def_trx_id
- && UT_LIST_GET_LEN(prebuilt->ins_node->entry_list)
+ && prebuilt->ins_node->entry_list.size()
== UT_LIST_GET_LEN(table->indexes)) {
return(prebuilt->ins_node->row);
@@ -1076,7 +1125,9 @@ row_get_prebuilt_insert_row(
dtuple_t* row;
- row = dtuple_create(prebuilt->heap, dict_table_get_n_cols(table));
+ row = dtuple_create_with_vcol(
+ prebuilt->heap, dict_table_get_n_cols(table),
+ dict_table_get_n_v_cols(table));
dict_table_copy_types(row, table);
@@ -1086,7 +1137,7 @@ row_get_prebuilt_insert_row(
que_node_get_parent(
pars_complete_graph_for_exec(
node,
- prebuilt->trx, prebuilt->heap)));
+ prebuilt->trx, prebuilt->heap, prebuilt)));
prebuilt->ins_graph->state = QUE_FORK_ACTIVE;
@@ -1096,74 +1147,12 @@ row_get_prebuilt_insert_row(
}
/*********************************************************************//**
-Updates the table modification counter and calculates new estimates
-for table and index statistics if necessary. */
-UNIV_INLINE
-void
-row_update_statistics_if_needed(
-/*============================*/
- trx_t* trx,
- dict_table_t* table) /*!< in: table */
-{
- ib_uint64_t counter;
- ib_uint64_t n_rows;
-
- if (!table->stat_initialized) {
- DBUG_EXECUTE_IF(
- "test_upd_stats_if_needed_not_inited",
- fprintf(stderr, "test_upd_stats_if_needed_not_inited "
- "was executed\n");
- );
- return;
- }
-
- counter = table->stat_modified_counter++;
- n_rows = dict_table_get_n_rows(table);
-
- if (dict_stats_is_persistent_enabled(table)) {
- if (counter > n_rows / 10 /* 10% */
- && dict_stats_auto_recalc_is_enabled(table)) {
-
-#ifdef WITH_WSREP
- if (trx->is_wsrep() &&
- wsrep_thd_is_BF(trx->mysql_thd, FALSE)) {
- WSREP_DEBUG("Avoiding background statistics"
- " calculation for table %s",
- table->name);
- return;
- }
-#endif /* WITH_WSREP */
-
- dict_stats_recalc_pool_add(table);
- table->stat_modified_counter = 0;
- }
- return;
- }
-
- /* Calculate new statistics if 1 / 16 of table has been modified
- since the last time a statistics batch was run.
- We calculate statistics at most every 16th round, since we may have
- a counter table which is very small and updated very often. */
- ib_uint64_t threshold= 16 + n_rows / 16; /* 6.25% */
- if (srv_stats_modified_counter)
- threshold= ut_min(srv_stats_modified_counter, threshold);
-
- if (counter > threshold) {
-
- ut_ad(!mutex_own(&dict_sys->mutex));
- /* this will reset table->stat_modified_counter to 0 */
- dict_stats_update(table, DICT_STATS_RECALC_TRANSIENT);
- }
-}
-
-/*********************************************************************//**
Sets an AUTO_INC type lock on the table mentioned in prebuilt. The
AUTO_INC lock gives exclusive access to the auto-inc counter of the
table. The lock is reserved only for the duration of an SQL statement.
It is not compatible with another AUTO_INC or exclusive lock on the
table.
-@return error code or DB_SUCCESS */
-UNIV_INTERN
+@return error code or DB_SUCCESS */
dberr_t
row_lock_table_autoinc_for_mysql(
/*=============================*/
@@ -1177,11 +1166,9 @@ row_lock_table_autoinc_for_mysql(
dberr_t err;
ibool was_lock_wait;
- ut_ad(trx);
-
/* If we already hold an AUTOINC lock on the table then do nothing.
- Note: We peek at the value of the current owner without acquiring
- the lock mutex. **/
+ Note: We peek at the value of the current owner without acquiring
+ the lock mutex. */
if (trx == table->autoinc_trx) {
return(DB_SUCCESS);
@@ -1206,7 +1193,7 @@ run_again:
/* It may be that the current session has not yet started
its transaction, or it has been committed: */
- trx_start_if_not_started_xa(trx);
+ trx_start_if_not_started_xa(trx, true);
err = lock_table(0, prebuilt->table, LOCK_AUTO_INC, thr);
@@ -1233,29 +1220,17 @@ run_again:
return(err);
}
-/*********************************************************************//**
-Sets a table lock on the table mentioned in prebuilt.
-@return error code or DB_SUCCESS */
-UNIV_INTERN
+/** Lock a table.
+@param[in,out] prebuilt table handle
+@return error code or DB_SUCCESS */
dberr_t
-row_lock_table_for_mysql(
-/*=====================*/
- row_prebuilt_t* prebuilt, /*!< in: prebuilt struct in the MySQL
- table handle */
- dict_table_t* table, /*!< in: table to lock, or NULL
- if prebuilt->table should be
- locked as
- prebuilt->select_lock_type */
- ulint mode) /*!< in: lock mode of table
- (ignored if table==NULL) */
+row_lock_table(row_prebuilt_t* prebuilt)
{
trx_t* trx = prebuilt->trx;
que_thr_t* thr;
dberr_t err;
ibool was_lock_wait;
- ut_ad(trx);
-
trx->op_info = "setting table lock";
if (prebuilt->sel_graph == NULL) {
@@ -1277,19 +1252,12 @@ run_again:
/* It may be that the current session has not yet started
its transaction, or it has been committed: */
- trx_start_if_not_started_xa(trx);
+ trx_start_if_not_started_xa(trx, false);
- if (table) {
- err = lock_table(
- 0, table,
- static_cast<enum lock_mode>(mode), thr);
- } else {
- err = lock_table(
- 0, prebuilt->table,
- static_cast<enum lock_mode>(
- prebuilt->select_lock_type),
- thr);
- }
+ err = lock_table(0, prebuilt->table,
+ static_cast<enum lock_mode>(
+ prebuilt->select_lock_type),
+ thr);
trx->error_state = err;
@@ -1314,15 +1282,14 @@ run_again:
return(err);
}
-/*********************************************************************//**
-Determine is tablespace encrypted but decryption failed, is table corrupted
+/** Determine is tablespace encrypted but decryption failed, is table corrupted
or is tablespace .ibd file missing.
@param[in] table Table
@param[in] trx Transaction
@param[in] push_warning true if we should push warning to user
-@return DB_DECRYPTION_FAILED table is encrypted but decryption failed
-DB_CORRUPTION table is corrupted
-DB_TABLESPACE_NOT_FOUND tablespace .ibd file not found */
+@retval DB_DECRYPTION_FAILED table is encrypted but decryption failed
+@retval DB_CORRUPTION table is corrupted
+@retval DB_TABLESPACE_NOT_FOUND tablespace .ibd file not found */
static
dberr_t
row_mysql_get_table_status(
@@ -1330,7 +1297,7 @@ row_mysql_get_table_status(
trx_t* trx,
bool push_warning = true)
{
- dberr_t err = DB_SUCCESS;
+ dberr_t err;
if (fil_space_t* space = fil_space_acquire_silent(table->space)) {
if (space->crypt_data && space->crypt_data->is_encrypted()) {
// maybe we cannot access the table due to failing
@@ -1340,7 +1307,7 @@ row_mysql_get_table_status(
"Table %s in tablespace %lu encrypted."
"However key management plugin or used key_id is not found or"
" used encryption algorithm or method does not match.",
- table->name, table->space);
+ table->name.m_name, table->space);
}
err = DB_DECRYPTION_FAILED;
@@ -1348,7 +1315,7 @@ row_mysql_get_table_status(
if (push_warning) {
ib_push_warning(trx, DB_CORRUPTION,
"Table %s in tablespace %lu corrupted.",
- table->name, table->space);
+ table->name.m_name, table->space);
}
err = DB_CORRUPTION;
@@ -1356,34 +1323,22 @@ row_mysql_get_table_status(
fil_space_release(space);
} else {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "InnoDB: MySQL is trying to use a table handle"
- " but the .ibd file for"
- " table %s does not exist."
- " Have you deleted the .ibd file"
- " from the database directory under"
- " the MySQL datadir, or have you"
- " used DISCARD TABLESPACE?"
- " Look from " REFMAN "innodb-troubleshooting.html"
- " how you can resolve the problem.",
- table->name);
-
- err = DB_TABLESPACE_NOT_FOUND;
- }
-
- return (err);
+ ib::error() << ".ibd file is missing for table "
+ << table->name;
+ err = DB_TABLESPACE_NOT_FOUND;
+ }
+
+ return(err);
}
-/*********************************************************************//**
-Does an insert for MySQL.
-@return error code or DB_SUCCESS */
-UNIV_INTERN
+/** Does an insert for MySQL.
+@param[in] mysql_rec row in the MySQL format
+@param[in,out] prebuilt prebuilt struct in MySQL handle
+@return error code or DB_SUCCESS */
dberr_t
row_insert_for_mysql(
-/*=================*/
- byte* mysql_rec, /*!< in: row in the MySQL format */
- row_prebuilt_t* prebuilt) /*!< in: prebuilt struct in MySQL
- handle */
+ const byte* mysql_rec,
+ row_prebuilt_t* prebuilt)
{
trx_savept_t savept;
que_thr_t* thr;
@@ -1393,49 +1348,51 @@ row_insert_for_mysql(
ins_node_t* node = prebuilt->ins_node;
dict_table_t* table = prebuilt->table;
+ /* FIX_ME: This blob heap is used to compensate an issue in server
+ for virtual column blob handling */
+ mem_heap_t* blob_heap = NULL;
+
ut_ad(trx);
+ ut_a(prebuilt->magic_n == ROW_PREBUILT_ALLOCATED);
+ ut_a(prebuilt->magic_n2 == ROW_PREBUILT_ALLOCATED);
if (dict_table_is_discarded(prebuilt->table)) {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "The table %s doesn't have a corresponding "
- "tablespace, it was discarded.",
- prebuilt->table->name);
+
+ ib::error() << "The table " << prebuilt->table->name
+ << " doesn't have a corresponding tablespace, it was"
+ " discarded.";
return(DB_TABLESPACE_DELETED);
} else if (!prebuilt->table->is_readable()) {
- return (row_mysql_get_table_status(prebuilt->table, trx, true));
- } else if (prebuilt->magic_n != ROW_PREBUILT_ALLOCATED) {
- fprintf(stderr,
- "InnoDB: Error: trying to free a corrupt\n"
- "InnoDB: table handle. Magic n %lu, table name ",
- (ulong) prebuilt->magic_n);
- ut_print_name(stderr, trx, TRUE, prebuilt->table->name);
- putc('\n', stderr);
+ return(row_mysql_get_table_status(prebuilt->table, trx, true));
+ } else if (high_level_read_only) {
+ return(DB_READ_ONLY);
+ }
- mem_analyze_corruption(prebuilt);
+ DBUG_EXECUTE_IF("mark_table_corrupted", {
+ /* Mark the table corrupted for the clustered index */
+ dict_index_t* index = dict_table_get_first_index(table);
+ ut_ad(dict_index_is_clust(index));
+ dict_set_corrupted(index, trx, "INSERT TABLE"); });
- ut_error;
- } else if (srv_force_recovery) {
- fputs("InnoDB: innodb_force_recovery is on: we do not allow\n"
- "InnoDB: database modifications by the user. Shut down\n"
- "InnoDB: mysqld and edit my.cnf so that"
- "InnoDB: innodb_force_... is removed.\n",
- stderr);
+ if (dict_table_is_corrupted(table)) {
- return(DB_READ_ONLY);
+ ib::error() << "Table " << table->name << " is corrupt.";
+ return(DB_TABLE_CORRUPT);
}
trx->op_info = "inserting";
row_mysql_delay_if_needed();
- trx_start_if_not_started_xa(trx);
+ trx_start_if_not_started_xa(trx, true);
row_get_prebuilt_insert_row(prebuilt);
node = prebuilt->ins_node;
- row_mysql_convert_row_to_innobase(node->row, prebuilt, mysql_rec);
+ row_mysql_convert_row_to_innobase(node->row, prebuilt, mysql_rec,
+ &blob_heap);
savept = trx_savept_take(trx);
@@ -1480,19 +1437,22 @@ error_exit:
trx->op_info = "";
+ if (blob_heap != NULL) {
+ mem_heap_free(blob_heap);
+ }
+
return(err);
}
if (dict_table_has_fts_index(table)) {
- doc_id_t doc_id;
+ doc_id_t doc_id;
/* Extract the doc id from the hidden FTS column */
doc_id = fts_get_doc_id_from_row(table, node->row);
if (doc_id <= 0) {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "FTS_DOC_ID must be larger than 0"
- " for table %s", table->name);
+ ib::error() << "FTS_DOC_ID must be larger than 0 for table "
+ << table->name;
err = DB_FTS_INVALID_DOCID;
trx->error_state = DB_FTS_INVALID_DOCID;
goto error_exit;
@@ -1503,10 +1463,9 @@ error_exit:
= table->fts->cache->next_doc_id;
if (doc_id < next_doc_id) {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "FTS_DOC_ID must be larger than "
- UINT64PF " for table %s",
- next_doc_id - 1, table->name);
+ ib::error() << "FTS_DOC_ID must be larger than "
+ << next_doc_id - 1 << " for table "
+ << table->name;
err = DB_FTS_INVALID_DOCID;
trx->error_state = DB_FTS_INVALID_DOCID;
@@ -1519,22 +1478,33 @@ error_exit:
FTS_DOC_ID_MAX_STEP value. */
if (doc_id - next_doc_id >= FTS_DOC_ID_MAX_STEP) {
- fprintf(stderr,
- "InnoDB: Doc ID " UINT64PF " is too"
- " big. Its difference with largest"
- " used Doc ID " UINT64PF " cannot"
- " exceed or equal to %d\n",
- doc_id, next_doc_id - 1,
- FTS_DOC_ID_MAX_STEP);
+ ib::error() << "Doc ID " << doc_id
+ << " is too big. Its difference with"
+ " largest used Doc ID "
+ << next_doc_id - 1 << " cannot"
+ " exceed or equal to "
+ << FTS_DOC_ID_MAX_STEP;
err = DB_FTS_INVALID_DOCID;
trx->error_state = DB_FTS_INVALID_DOCID;
goto error_exit;
}
}
- /* Pass NULL for the columns affected, since an INSERT affects
- all FTS indexes. */
- fts_trx_add_op(trx, table, doc_id, FTS_INSERT, NULL);
+ if (table->skip_alter_undo) {
+ if (trx->fts_trx == NULL) {
+ trx->fts_trx = fts_trx_create(trx);
+ }
+
+ fts_trx_table_t ftt;
+ ftt.table = table;
+ ftt.fts_trx = trx->fts_trx;
+
+ fts_add_doc_from_tuple(&ftt, doc_id, node->row);
+ } else {
+ /* Pass NULL for the columns affected, since an INSERT affects
+ all FTS indexes. */
+ fts_trx_add_op(trx, table, doc_id, FTS_INSERT, NULL);
+ }
}
que_thr_stop_for_mysql_no_error(thr, trx);
@@ -1553,19 +1523,21 @@ error_exit:
if (prebuilt->clust_index_was_generated) {
/* set row id to prebuilt */
- ut_memcpy(prebuilt->row_id, node->row_id_buf, DATA_ROW_ID_LEN);
+ memcpy(prebuilt->row_id, node->sys_buf, DATA_ROW_ID_LEN);
}
- row_update_statistics_if_needed(trx, table);
-
+ dict_stats_update_if_needed(table, trx->mysql_thd);
trx->op_info = "";
+ if (blob_heap != NULL) {
+ mem_heap_free(blob_heap);
+ }
+
return(err);
}
/*********************************************************************//**
Builds a dummy query graph used in selects. */
-UNIV_INTERN
void
row_prebuild_sel_graph(
/*===================*/
@@ -1584,7 +1556,8 @@ row_prebuild_sel_graph(
que_node_get_parent(
pars_complete_graph_for_exec(
static_cast<sel_node_t*>(node),
- prebuilt->trx, prebuilt->heap)));
+ prebuilt->trx, prebuilt->heap,
+ prebuilt)));
prebuilt->sel_graph->state = QUE_FORK_ACTIVE;
}
@@ -1593,8 +1566,7 @@ row_prebuild_sel_graph(
/*********************************************************************//**
Creates an query graph node of 'update' type to be used in the MySQL
interface.
-@return own: update node */
-UNIV_INTERN
+@return own: update node */
upd_node_t*
row_create_update_node_for_mysql(
/*=============================*/
@@ -1603,6 +1575,8 @@ row_create_update_node_for_mysql(
{
upd_node_t* node;
+ DBUG_ENTER("row_create_update_node_for_mysql");
+
node = upd_node_create(heap);
node->in_mysql_interface = TRUE;
@@ -1610,53 +1584,52 @@ row_create_update_node_for_mysql(
node->searched_update = FALSE;
node->select = NULL;
node->pcur = btr_pcur_create_for_mysql();
+
+ DBUG_PRINT("info", ("node: %p, pcur: %p", node, node->pcur));
+
node->table = table;
- node->update = upd_create(dict_table_get_n_cols(table), heap);
+ node->update = upd_create(dict_table_get_n_cols(table)
+ + dict_table_get_n_v_cols(table), heap);
node->update_n_fields = dict_table_get_n_cols(table);
- UT_LIST_INIT(node->columns);
+ UT_LIST_INIT(node->columns, &sym_node_t::col_var_list);
+
node->has_clust_rec_x_lock = TRUE;
node->cmpl_info = 0;
node->table_sym = NULL;
node->col_assign_list = NULL;
- return(node);
+ DBUG_RETURN(node);
}
/*********************************************************************//**
Gets pointer to a prebuilt update vector used in updates. If the update
graph has not yet been built in the prebuilt struct, then this function
first builds it.
-@return prebuilt update vector */
-UNIV_INTERN
+@return prebuilt update vector */
upd_t*
row_get_prebuilt_update_vector(
/*===========================*/
row_prebuilt_t* prebuilt) /*!< in: prebuilt struct in MySQL
handle */
{
- dict_table_t* table = prebuilt->table;
- upd_node_t* node;
-
- ut_ad(prebuilt && table && prebuilt->trx);
-
if (prebuilt->upd_node == NULL) {
/* Not called before for this handle: create an update node
and query graph to the prebuilt struct */
- node = row_create_update_node_for_mysql(table, prebuilt->heap);
-
- prebuilt->upd_node = node;
+ prebuilt->upd_node = row_create_update_node_for_mysql(
+ prebuilt->table, prebuilt->heap);
prebuilt->upd_graph = static_cast<que_fork_t*>(
que_node_get_parent(
pars_complete_graph_for_exec(
- static_cast<upd_node_t*>(node),
- prebuilt->trx, prebuilt->heap)));
+ prebuilt->upd_node,
+ prebuilt->trx, prebuilt->heap,
+ prebuilt)));
prebuilt->upd_graph->state = QUE_FORK_ACTIVE;
}
@@ -1675,8 +1648,9 @@ row_fts_do_update(
doc_id_t old_doc_id, /* in: old document id */
doc_id_t new_doc_id) /* in: new document id */
{
- if (trx->fts_next_doc_id) {
+ if(trx->fts_next_doc_id) {
fts_trx_add_op(trx, table, old_doc_id, FTS_DELETE, NULL);
+ if(new_doc_id != FTS_NULL_DOC_ID)
fts_trx_add_op(trx, table, new_doc_id, FTS_INSERT, NULL);
}
}
@@ -1696,6 +1670,8 @@ row_fts_update_or_delete(
upd_node_t* node = prebuilt->upd_node;
doc_id_t old_doc_id = prebuilt->fts_doc_id;
+ DBUG_ENTER("row_fts_update_or_delete");
+
ut_a(dict_table_has_fts_index(prebuilt->table));
/* Deletes are simple; get them out of the way first. */
@@ -1704,18 +1680,16 @@ row_fts_update_or_delete(
fts_trx_add_op(trx, table, old_doc_id, FTS_DELETE, NULL);
} else {
doc_id_t new_doc_id;
-
new_doc_id = fts_read_doc_id((byte*) &trx->fts_next_doc_id);
if (new_doc_id == 0) {
- fprintf(stderr, " InnoDB FTS: Doc ID cannot be 0 \n");
+ ib::error() << "InnoDB FTS: Doc ID cannot be 0";
return(DB_FTS_INVALID_DOCID);
}
-
row_fts_do_update(trx, table, old_doc_id, new_doc_id);
}
- return(DB_SUCCESS);
+ DBUG_RETURN(DB_SUCCESS);
}
/*********************************************************************//**
@@ -1746,9 +1720,7 @@ init_fts_doc_id_for_ref(
foreign = *it;
- if (foreign->foreign_table == NULL) {
- break;
- }
+ ut_ad(foreign->foreign_table != NULL);
if (foreign->foreign_table->fts != NULL) {
fts_init_doc_id(foreign->foreign_table);
@@ -1762,57 +1734,35 @@ init_fts_doc_id_for_ref(
}
}
-/*********************************************************************//**
-Does an update or delete of a row for MySQL.
-@return error code or DB_SUCCESS */
-UNIV_INTERN
+/** Does an update or delete of a row for MySQL.
+@param[in,out] prebuilt prebuilt struct in MySQL handle
+@return error code or DB_SUCCESS */
dberr_t
-row_update_for_mysql(
-/*=================*/
- byte* mysql_rec, /*!< in: the row to be updated, in
- the MySQL format */
- row_prebuilt_t* prebuilt) /*!< in: prebuilt struct in MySQL
- handle */
+row_update_for_mysql(row_prebuilt_t* prebuilt)
{
trx_savept_t savept;
dberr_t err;
que_thr_t* thr;
- ibool was_lock_wait;
dict_index_t* clust_index;
- /* ulint ref_len; */
upd_node_t* node;
dict_table_t* table = prebuilt->table;
trx_t* trx = prebuilt->trx;
ulint fk_depth = 0;
+ bool got_s_lock = false;
- ut_ad(prebuilt != NULL);
- ut_ad(trx != NULL);
- UT_NOT_USED(mysql_rec);
-
- if (!table->is_readable()) {
- return (row_mysql_get_table_status(table, trx, true));
- }
-
- if (UNIV_UNLIKELY(prebuilt->magic_n != ROW_PREBUILT_ALLOCATED)) {
- fprintf(stderr,
- "InnoDB: Error: trying to free a corrupt\n"
- "InnoDB: table handle. Magic n %lu, table name ",
- (ulong) prebuilt->magic_n);
- ut_print_name(stderr, trx, TRUE, prebuilt->table->name);
- putc('\n', stderr);
+ DBUG_ENTER("row_update_for_mysql");
- mem_analyze_corruption(prebuilt);
+ ut_ad(trx);
+ ut_a(prebuilt->magic_n == ROW_PREBUILT_ALLOCATED);
+ ut_a(prebuilt->magic_n2 == ROW_PREBUILT_ALLOCATED);
+ ut_a(prebuilt->template_type == ROW_MYSQL_WHOLE_ROW);
+ ut_ad(table->stat_initialized);
- ut_error;
+ if (!table->is_readable()) {
+ return(row_mysql_get_table_status(table, trx, true));
}
- if (UNIV_UNLIKELY(srv_force_recovery)) {
- fputs("InnoDB: innodb_force_recovery is on: we do not allow\n"
- "InnoDB: database modifications by the user. Shut down\n"
- "InnoDB: mysqld and edit my.cnf so that"
- "InnoDB: innodb_force_... is removed.\n",
- stderr);
-
+ if (high_level_read_only) {
return(DB_READ_ONLY);
}
@@ -1822,7 +1772,9 @@ row_update_for_mysql(
row_mysql_delay_if_needed();
- trx_start_if_not_started_xa(trx);
+ init_fts_doc_id_for_ref(table, &fk_depth);
+
+ trx_start_if_not_started_xa(trx, true);
if (dict_table_is_referenced_by_foreign_key(table)) {
/* Share lock the data dictionary to prevent any
@@ -1837,14 +1789,16 @@ row_update_for_mysql(
}
node = prebuilt->upd_node;
+ const bool is_delete = node->is_delete;
+ ut_ad(node->table == table);
clust_index = dict_table_get_first_index(table);
- if (prebuilt->pcur.btr_cur.index == clust_index) {
- btr_pcur_copy_stored_position(node->pcur, &prebuilt->pcur);
+ if (prebuilt->pcur->btr_cur.index == clust_index) {
+ btr_pcur_copy_stored_position(node->pcur, prebuilt->pcur);
} else {
btr_pcur_copy_stored_position(node->pcur,
- &prebuilt->clust_pcur);
+ prebuilt->clust_pcur);
}
ut_a(node->pcur->rel_pos == BTR_PCUR_ON);
@@ -1866,43 +1820,37 @@ row_update_for_mysql(
que_thr_move_to_run_state_for_mysql(thr, trx);
-run_again:
- thr->run_node = node;
- thr->prev_node = node;
- thr->fk_cascade_depth = 0;
+ for (;;) {
+ thr->run_node = node;
+ thr->prev_node = node;
+ thr->fk_cascade_depth = 0;
- row_upd_step(thr);
+ row_upd_step(thr);
- err = trx->error_state;
+ err = trx->error_state;
- /* Reset fk_cascade_depth back to 0 */
- thr->fk_cascade_depth = 0;
+ if (err == DB_SUCCESS) {
+ break;
+ }
- if (err != DB_SUCCESS) {
que_thr_stop_for_mysql(thr);
if (err == DB_RECORD_NOT_FOUND) {
trx->error_state = DB_SUCCESS;
- trx->op_info = "";
-
- return(err);
+ goto error;
}
thr->lock_state= QUE_THR_LOCK_ROW;
DEBUG_SYNC(trx->mysql_thd, "row_update_for_mysql_error");
- was_lock_wait = row_mysql_handle_errors(&err, trx, thr,
- &savept);
+ bool was_lock_wait = row_mysql_handle_errors(
+ &err, trx, thr, &savept);
thr->lock_state= QUE_THR_LOCK_NOLOCK;
- if (was_lock_wait) {
- goto run_again;
+ if (!was_lock_wait) {
+ goto error;
}
-
- trx->op_info = "";
-
- return(err);
}
que_thr_stop_for_mysql_no_error(thr, trx);
@@ -1910,13 +1858,21 @@ run_again:
if (dict_table_has_fts_index(table)
&& trx->fts_next_doc_id != UINT64_UNDEFINED) {
err = row_fts_update_or_delete(prebuilt);
- if (err != DB_SUCCESS) {
- trx->op_info = "";
- return(err);
+ if (UNIV_UNLIKELY(err != DB_SUCCESS)) {
+ ut_ad(!"unexpected error");
+ goto error;
}
}
- if (node->is_delete) {
+ /* Completed cascading operations (if any) */
+ if (got_s_lock) {
+ row_mysql_unfreeze_data_dictionary(trx);
+ }
+
+ bool update_statistics;
+ ut_ad(node->is_delete == is_delete);
+
+ if (/*node->*/is_delete) {
/* Not protected by dict_table_stats_lock() for performance
reasons, we would rather get garbage in stat_n_rows (which is
just an estimate anyway) than protecting the following code
@@ -1929,34 +1885,39 @@ run_again:
srv_stats.n_rows_deleted.inc(size_t(trx->id));
}
+ update_statistics = !srv_stats_include_delete_marked;
} else {
if (table->is_system_db) {
srv_stats.n_system_rows_updated.inc(size_t(trx->id));
} else {
srv_stats.n_rows_updated.inc(size_t(trx->id));
}
+
+ update_statistics
+ = !(node->cmpl_info & UPD_NODE_NO_ORD_CHANGE);
}
- /* We update table statistics only if it is a DELETE or UPDATE
- that changes indexed columns, UPDATEs that change only non-indexed
- columns would not affect statistics. */
- if (node->is_delete || !(node->cmpl_info & UPD_NODE_NO_ORD_CHANGE)) {
- row_update_statistics_if_needed(trx, prebuilt->table);
+ if (update_statistics) {
+ dict_stats_update_if_needed(prebuilt->table, trx->mysql_thd);
} else {
- /* Update the table modification counter even when
- non-indexed columns change if statistics is initialized. */
- if (prebuilt->table->stat_initialized) {
- prebuilt->table->stat_modified_counter++;
- }
+ /* Always update the table modification counter. */
+ prebuilt->table->stat_modified_counter++;
}
trx->op_info = "";
- return(err);
+ DBUG_RETURN(err);
+
+error:
+ trx->op_info = "";
+ if (got_s_lock) {
+ row_mysql_unfreeze_data_dictionary(trx);
+ }
+
+ DBUG_RETURN(err);
}
-/*********************************************************************//**
-This can only be used when srv_locks_unsafe_for_binlog is TRUE or this
+/** This can only be used when srv_locks_unsafe_for_binlog is TRUE or this
session is using a READ COMMITTED or READ UNCOMMITTED isolation level.
Before calling this function row_search_for_mysql() must have
initialized prebuilt->new_rec_locks to store the information which new
@@ -1964,21 +1925,18 @@ record locks really were set. This function removes a newly set
clustered index record lock under prebuilt->pcur or
prebuilt->clust_pcur. Thus, this implements a 'mini-rollback' that
releases the latest clustered index record lock we set.
-@return error code or DB_SUCCESS */
-UNIV_INTERN
+@param[in,out] prebuilt prebuilt struct in MySQL handle
+@param[in] has_latches_on_recs TRUE if called so that we have the
+ latches on the records under pcur
+ and clust_pcur, and we do not need
+ to reposition the cursors. */
void
row_unlock_for_mysql(
-/*=================*/
- row_prebuilt_t* prebuilt, /*!< in/out: prebuilt struct in MySQL
- handle */
- ibool has_latches_on_recs)/*!< in: TRUE if called so
- that we have the latches on
- the records under pcur and
- clust_pcur, and we do not need
- to reposition the cursors. */
+ row_prebuilt_t* prebuilt,
+ ibool has_latches_on_recs)
{
- btr_pcur_t* pcur = &prebuilt->pcur;
- btr_pcur_t* clust_pcur = &prebuilt->clust_pcur;
+ btr_pcur_t* pcur = prebuilt->pcur;
+ btr_pcur_t* clust_pcur = prebuilt->clust_pcur;
trx_t* trx = prebuilt->trx;
ut_ad(prebuilt != NULL);
@@ -1988,11 +1946,13 @@ row_unlock_for_mysql(
(!srv_locks_unsafe_for_binlog
&& trx->isolation_level > TRX_ISO_READ_COMMITTED)) {
- fprintf(stderr,
- "InnoDB: Error: calling row_unlock_for_mysql though\n"
- "InnoDB: innodb_locks_unsafe_for_binlog is FALSE and\n"
- "InnoDB: this session is not using"
- " READ COMMITTED isolation level.\n");
+ ib::error() << "Calling row_unlock_for_mysql though"
+ " innodb_locks_unsafe_for_binlog is FALSE and this"
+ " session is not using READ COMMITTED isolation"
+ " level.";
+ return;
+ }
+ if (dict_index_is_spatial(prebuilt->index)) {
return;
}
@@ -2043,11 +2003,11 @@ row_unlock_for_mysql(
+ index->trx_id_offset);
} else {
mem_heap_t* heap = NULL;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- ulint* offsets = offsets_;
+ offset_t offsets_[REC_OFFS_NORMAL_SIZE];
+ offset_t* offsets = offsets_;
rec_offs_init(offsets_);
- offsets = rec_get_offsets(rec, index, offsets,
+ offsets = rec_get_offsets(rec, index, offsets, true,
ULINT_UNDEFINED, &heap);
rec_trx_id = row_get_rec_trx_id(rec, index, offsets);
@@ -2087,129 +2047,15 @@ no_unlock:
trx->op_info = "";
}
-/**********************************************************************//**
-Does a cascaded delete or set null in a foreign key operation.
-@return error code or DB_SUCCESS */
-UNIV_INTERN
-dberr_t
-row_update_cascade_for_mysql(
-/*=========================*/
- que_thr_t* thr, /*!< in: query thread */
- upd_node_t* node, /*!< in: update node used in the cascade
- or set null operation */
- dict_table_t* table) /*!< in: table where we do the operation */
-{
- dberr_t err;
- trx_t* trx;
-
- trx = thr_get_trx(thr);
-
- /* Increment fk_cascade_depth to record the recursive call depth on
- a single update/delete that affects multiple tables chained
- together with foreign key relations. */
- thr->fk_cascade_depth++;
-
- if (thr->fk_cascade_depth > FK_MAX_CASCADE_DEL) {
- return(DB_FOREIGN_EXCEED_MAX_CASCADE);
- }
-run_again:
- thr->run_node = node;
- thr->prev_node = node;
-
- DEBUG_SYNC_C("foreign_constraint_update_cascade");
-
- row_upd_step(thr);
-
- /* The recursive call for cascading update/delete happens
- in above row_upd_step(), reset the counter once we come
- out of the recursive call, so it does not accumulate for
- different row deletes */
- thr->fk_cascade_depth = 0;
-
- err = trx->error_state;
-
- /* Note that the cascade node is a subnode of another InnoDB
- query graph node. We do a normal lock wait in this node, but
- all errors are handled by the parent node. */
-
- if (err == DB_LOCK_WAIT) {
- /* Handle lock wait here */
-
- que_thr_stop_for_mysql(thr);
-
- lock_wait_suspend_thread(thr);
-
- /* Note that a lock wait may also end in a lock wait timeout,
- or this transaction is picked as a victim in selective
- deadlock resolution */
-
- if (trx->error_state != DB_SUCCESS) {
-
- return(trx->error_state);
- }
-
- /* Retry operation after a normal lock wait */
-
- goto run_again;
- }
-
- if (err != DB_SUCCESS) {
-
- return(err);
- }
-
- if (node->is_delete) {
- /* Not protected by dict_table_stats_lock() for performance
- reasons, we would rather get garbage in stat_n_rows (which is
- just an estimate anyway) than protecting the following code
- with a latch. */
- dict_table_n_rows_dec(table);
-
- if (table->is_system_db) {
- srv_stats.n_system_rows_deleted.inc(size_t(trx->id));
- } else {
- srv_stats.n_rows_deleted.inc(size_t(trx->id));
- }
- } else {
- if (table->is_system_db) {
- srv_stats.n_system_rows_updated.inc(size_t(trx->id));
- } else {
- srv_stats.n_rows_updated.inc(size_t(trx->id));
- }
- }
-
- row_update_statistics_if_needed(trx, table);
-
- return(err);
-}
-
-/*********************************************************************//**
-Checks if a table is such that we automatically created a clustered
-index on it (on row id).
-@return TRUE if the clustered index was generated automatically */
-UNIV_INTERN
-ibool
-row_table_got_default_clust_index(
-/*==============================*/
- const dict_table_t* table) /*!< in: table */
-{
- const dict_index_t* clust_index;
-
- clust_index = dict_table_get_first_index(table);
-
- return(dict_index_get_nth_col(clust_index, 0)->mtype == DATA_SYS);
-}
-
/*********************************************************************//**
Locks the data dictionary in shared mode from modifications, for performing
foreign key check, rollback, or other operation invisible to MySQL. */
-UNIV_INTERN
void
row_mysql_freeze_data_dictionary_func(
/*==================================*/
trx_t* trx, /*!< in/out: transaction */
const char* file, /*!< in: file name */
- ulint line) /*!< in: line number */
+ unsigned line) /*!< in: line number */
{
ut_a(trx->dict_operation_lock_mode == 0);
@@ -2220,7 +2066,6 @@ row_mysql_freeze_data_dictionary_func(
/*********************************************************************//**
Unlocks the data dictionary shared lock. */
-UNIV_INTERN
void
row_mysql_unfreeze_data_dictionary(
/*===============================*/
@@ -2235,16 +2080,99 @@ row_mysql_unfreeze_data_dictionary(
trx->dict_operation_lock_mode = 0;
}
+/**********************************************************************//**
+Does a cascaded delete or set null in a foreign key operation.
+@return error code or DB_SUCCESS */
+dberr_t
+row_update_cascade_for_mysql(
+/*=========================*/
+ que_thr_t* thr, /*!< in: query thread */
+ upd_node_t* node, /*!< in: update node used in the cascade
+ or set null operation */
+ dict_table_t* table) /*!< in: table where we do the operation */
+{
+ /* Increment fk_cascade_depth to record the recursive call depth on
+ a single update/delete that affects multiple tables chained
+ together with foreign key relations. */
+
+ if (++thr->fk_cascade_depth > FK_MAX_CASCADE_DEL) {
+ return(DB_FOREIGN_EXCEED_MAX_CASCADE);
+ }
+
+ const trx_t* trx = thr_get_trx(thr);
+
+ for (;;) {
+ thr->run_node = node;
+ thr->prev_node = node;
+
+ DEBUG_SYNC_C("foreign_constraint_update_cascade");
+ {
+ TABLE *mysql_table = thr->prebuilt->m_mysql_table;
+ thr->prebuilt->m_mysql_table = NULL;
+ row_upd_step(thr);
+ thr->prebuilt->m_mysql_table = mysql_table;
+ }
+
+ switch (trx->error_state) {
+ case DB_LOCK_WAIT:
+ que_thr_stop_for_mysql(thr);
+ lock_wait_suspend_thread(thr);
+
+ if (trx->error_state == DB_SUCCESS) {
+ continue;
+ }
+
+ /* fall through */
+ default:
+ /* Other errors are handled for the parent node. */
+ thr->fk_cascade_depth = 0;
+ return trx->error_state;
+
+ case DB_SUCCESS:
+ thr->fk_cascade_depth = 0;
+ bool stats;
+
+ if (node->is_delete) {
+ /* Not protected by
+ dict_table_stats_lock() for
+ performance reasons, we would rather
+ get garbage in stat_n_rows (which is
+ just an estimate anyway) than
+ protecting the following code with a
+ latch. */
+ dict_table_n_rows_dec(node->table);
+
+ stats = !srv_stats_include_delete_marked;
+ srv_stats.n_rows_deleted.inc(size_t(trx->id));
+ } else {
+ stats = !(node->cmpl_info
+ & UPD_NODE_NO_ORD_CHANGE);
+ srv_stats.n_rows_updated.inc(size_t(trx->id));
+ }
+
+ if (stats) {
+ dict_stats_update_if_needed(node->table,
+ trx->mysql_thd);
+ } else {
+ /* Always update the table
+ modification counter. */
+ node->table->stat_modified_counter++;
+ }
+
+ return(DB_SUCCESS);
+ }
+ }
+}
+
/*********************************************************************//**
Locks the data dictionary exclusively for performing a table create or other
data dictionary modification operation. */
-UNIV_INTERN
void
row_mysql_lock_data_dictionary_func(
/*================================*/
trx_t* trx, /*!< in/out: transaction */
const char* file, /*!< in: file name */
- ulint line) /*!< in: line number */
+ unsigned line) /*!< in: line number */
{
ut_a(trx->dict_operation_lock_mode == 0
|| trx->dict_operation_lock_mode == RW_X_LATCH);
@@ -2255,12 +2183,11 @@ row_mysql_lock_data_dictionary_func(
rw_lock_x_lock_inline(&dict_operation_lock, 0, file, line);
trx->dict_operation_lock_mode = RW_X_LATCH;
- mutex_enter(&(dict_sys->mutex));
+ mutex_enter(&dict_sys->mutex);
}
/*********************************************************************//**
Unlocks the data dictionary exclusive lock. */
-UNIV_INTERN
void
row_mysql_unlock_data_dictionary(
/*=============================*/
@@ -2273,21 +2200,16 @@ row_mysql_unlock_data_dictionary(
/* Serialize data dictionary operations with dictionary mutex:
no deadlocks can occur then in these operations */
- mutex_exit(&(dict_sys->mutex));
+ mutex_exit(&dict_sys->mutex);
rw_lock_x_unlock(&dict_operation_lock);
trx->dict_operation_lock_mode = 0;
}
/*********************************************************************//**
-Creates a table for MySQL. If the name of the table ends in
-one of "innodb_monitor", "innodb_lock_monitor", "innodb_tablespace_monitor",
-"innodb_table_monitor", then this will also start the printing of monitor
-output by the master thread. If the table name ends in "innodb_mem_validate",
-InnoDB will try to invoke mem_validate(). On failure the transaction will
-be rolled back and the 'table' object will be freed.
-@return error code or DB_SUCCESS */
-UNIV_INTERN
+Creates a table for MySQL. On failure the transaction will be rolled back
+and the 'table' object will be freed.
+@return error code or DB_SUCCESS */
dberr_t
row_create_table_for_mysql(
/*=======================*/
@@ -2295,21 +2217,16 @@ row_create_table_for_mysql(
(will be freed, or on DB_SUCCESS
added to the data dictionary cache) */
trx_t* trx, /*!< in/out: transaction */
- bool commit, /*!< in: if true, commit the transaction */
fil_encryption_t mode, /*!< in: encryption mode */
- ulint key_id) /*!< in: encryption key_id */
+ uint32_t key_id) /*!< in: encryption key_id */
{
tab_node_t* node;
mem_heap_t* heap;
que_thr_t* thr;
- const char* table_name;
- ulint table_name_len;
dberr_t err;
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
- ut_ad(mutex_own(&(dict_sys->mutex)));
+ ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_X));
+ ut_ad(mutex_own(&dict_sys->mutex));
ut_ad(trx->dict_operation_lock_mode == RW_X_LATCH);
DBUG_EXECUTE_IF(
@@ -2319,82 +2236,22 @@ row_create_table_for_mysql(
trx->op_info = "creating table";
- if (row_mysql_is_system_table(table->name)) {
-
- fprintf(stderr,
- "InnoDB: Error: trying to create a MySQL system"
- " table %s of type InnoDB.\n"
- "InnoDB: MySQL system tables must be"
- " of the MyISAM type!\n",
- table->name);
+ if (row_mysql_is_system_table(table->name.m_name)) {
+ ib::error() << "Trying to create a MySQL system table "
+ << table->name << " of type InnoDB. MySQL system"
+ " tables must be of the MyISAM type!";
#ifndef DBUG_OFF
err_exit:
#endif /* !DBUG_OFF */
dict_mem_table_free(table);
- if (commit) {
- trx_commit_for_mysql(trx);
- }
-
trx->op_info = "";
return(DB_ERROR);
}
- trx_start_if_not_started_xa(trx);
-
- /* The table name is prefixed with the database name and a '/'.
- Certain table names starting with 'innodb_' have their special
- meaning regardless of the database name. Thus, we need to
- ignore the database name prefix in the comparisons. */
- table_name = dict_remove_db_name(table->name);
- table_name_len = strlen(table_name) + 1;
-
- if (STR_EQ(table_name, table_name_len, S_innodb_monitor)) {
-
- /* Table equals "innodb_monitor":
- start monitor prints */
-
- srv_print_innodb_monitor = TRUE;
-
- /* The lock timeout monitor thread also takes care
- of InnoDB monitor prints */
-
- os_event_set(srv_monitor_event);
- } else if (STR_EQ(table_name, table_name_len,
- S_innodb_lock_monitor)) {
-
- srv_print_innodb_monitor = TRUE;
- srv_print_innodb_lock_monitor = TRUE;
- os_event_set(srv_monitor_event);
- } else if (STR_EQ(table_name, table_name_len,
- S_innodb_tablespace_monitor)) {
-
- srv_print_innodb_tablespace_monitor = TRUE;
- os_event_set(srv_monitor_event);
- } else if (STR_EQ(table_name, table_name_len,
- S_innodb_table_monitor)) {
-
- srv_print_innodb_table_monitor = TRUE;
- os_event_set(srv_monitor_event);
-#ifdef UNIV_MEM_DEBUG
- } else if (STR_EQ(table_name, table_name_len,
- S_innodb_mem_validate)) {
- /* We define here a debugging feature intended for
- developers */
-
- fputs("Validating InnoDB memory:\n"
- "to use this feature you must compile InnoDB with\n"
- "UNIV_MEM_DEBUG defined in univ.i and"
- " the server must be\n"
- "quiet because allocation from a mem heap"
- " is not protected\n"
- "by any semaphore.\n", stderr);
- ut_a(mem_validate());
- fputs("Memory validated\n", stderr);
-#endif /* UNIV_MEM_DEBUG */
- }
+ trx_start_if_not_started_xa(trx, true);
heap = mem_heap_create(512);
@@ -2407,12 +2264,12 @@ err_exit:
/* If the transaction was previously flagged as
TRX_DICT_OP_INDEX, we should be creating auxiliary
tables for full-text indexes. */
- ut_ad(strstr(table->name, "/FTS_") != NULL);
+ ut_ad(strstr(table->name.m_name, "/FTS_") != NULL);
}
- node = tab_create_graph_create(table, heap, commit, mode, key_id);
+ node = tab_create_graph_create(table, heap, mode, key_id);
- thr = pars_complete_graph_for_exec(node, trx, heap);
+ thr = pars_complete_graph_for_exec(node, trx, heap, NULL);
ut_a(thr == que_fork_start_command(
static_cast<que_fork_t*>(que_node_get_parent(thr))));
@@ -2421,26 +2278,26 @@ err_exit:
err = trx->error_state;
- if (table->space != TRX_SYS_SPACE) {
- ut_a(DICT_TF2_FLAG_IS_SET(table, DICT_TF2_USE_TABLESPACE));
+ /* Update SYS_TABLESPACES and SYS_DATAFILES if a new file-per-table
+ tablespace was created. */
+ if (err == DB_SUCCESS && dict_table_is_file_per_table(table)) {
- /* Update SYS_TABLESPACES and SYS_DATAFILES if a new
- tablespace was created. */
- if (err == DB_SUCCESS) {
- char* path;
- path = fil_space_get_first_path(table->space);
+ ut_ad(dict_table_is_file_per_table(table));
- err = dict_create_add_tablespace_to_dictionary(
- table->space, table->name,
- fil_space_get_flags(table->space),
- path, trx, commit);
+ char* path;
+ path = fil_space_get_first_path(table->space);
- mem_free(path);
- }
+ err = dict_replace_tablespace_in_dictionary(
+ table->space, table->name.m_name,
+ fil_space_get_flags(table->space),
+ path, trx);
+
+ ut_free(path);
if (err != DB_SUCCESS) {
+
/* We must delete the link file. */
- fil_delete_link_file(table->name);
+ RemoteDatafile::delete_link_file(table->name.m_name);
}
}
@@ -2451,47 +2308,30 @@ err_exit:
trx->error_state = DB_SUCCESS;
trx_rollback_to_savepoint(trx, NULL);
- ut_print_timestamp(stderr);
- fputs(" InnoDB: Warning: cannot create table ",
- stderr);
- ut_print_name(stderr, trx, TRUE, table->name);
- fputs(" because tablespace full\n", stderr);
+ ib::warn() << "Cannot create table "
+ << table->name
+ << " because tablespace full";
- if (dict_table_open_on_name(table->name, TRUE, FALSE,
+ if (dict_table_open_on_name(table->name.m_name, TRUE, FALSE,
DICT_ERR_IGNORE_NONE)) {
- /* Make things easy for the drop table code. */
-
- if (table->can_be_evicted) {
- dict_table_move_from_lru_to_non_lru(table);
- }
-
- dict_table_close(table, TRUE, FALSE);
-
- row_drop_table_for_mysql(table->name, trx, FALSE, TRUE);
-
- if (commit) {
- trx_commit_for_mysql(trx);
- }
+ dict_table_close_and_drop(trx, table);
} else {
dict_mem_table_free(table);
}
break;
+ case DB_UNSUPPORTED:
case DB_TOO_MANY_CONCURRENT_TRXS:
/* We already have .ibd file here. it should be deleted. */
- if (table->space
+ if (dict_table_is_file_per_table(table)
&& fil_delete_tablespace(table->space) != DB_SUCCESS) {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Error: not able to"
- " delete tablespace %lu of table ",
- (ulong) table->space);
- ut_print_name(stderr, trx, TRUE, table->name);
- fputs("!\n", stderr);
+ ib::error() << "Not able to delete tablespace "
+ << table->space << " of table "
+ << table->name << "!";
}
/* fall through */
@@ -2512,11 +2352,9 @@ err_exit:
}
/*********************************************************************//**
-Does an index creation operation for MySQL. TODO: currently failure
-to create an index results in dropping the whole table! This is no problem
-currently as all indexes must be created at the same time as the table.
-@return error number or DB_SUCCESS */
-UNIV_INTERN
+Create an index when creating a table.
+On failure, the caller must drop the table!
+@return error number or DB_SUCCESS */
dberr_t
row_create_index_for_mysql(
/*=======================*/
@@ -2538,14 +2376,9 @@ row_create_index_for_mysql(
ulint len;
char* table_name;
char* index_name;
- dict_table_t* table;
+ dict_table_t* table = NULL;
ibool is_fts;
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
- ut_ad(mutex_own(&(dict_sys->mutex)));
-
trx->op_info = "creating index";
/* Copy the table name because we may want to drop the
@@ -2556,10 +2389,15 @@ row_create_index_for_mysql(
is_fts = (index->type == DICT_FTS);
+ ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_X));
+ ut_ad(mutex_own(&dict_sys->mutex));
+
table = dict_table_open_on_name(table_name, TRUE, TRUE,
DICT_ERR_IGNORE_NONE);
- trx_start_if_not_started_xa(trx);
+ if (!dict_table_is_temporary(table)) {
+ trx_start_if_not_started_xa(trx, true);
+ }
for (i = 0; i < index->n_def; i++) {
/* Check that prefix_len and actual length
@@ -2585,25 +2423,48 @@ row_create_index_for_mysql(
}
}
- heap = mem_heap_create(512);
-
trx_set_dict_operation(trx, TRX_DICT_OP_TABLE);
- /* Note that the space id where we store the index is inherited from
- the table in dict_build_index_def_step() in dict0crea.cc. */
+ /* For temp-table we avoid insertion into SYSTEM TABLES to
+ maintain performance and so we have separate path that directly
+ just updates dictonary cache. */
+ if (!dict_table_is_temporary(table)) {
+ /* Note that the space id where we store the index is
+ inherited from the table in dict_build_index_def_step()
+ in dict0crea.cc. */
+
+ heap = mem_heap_create(512);
+ node = ind_create_graph_create(index, heap, NULL);
- node = ind_create_graph_create(index, heap, true);
+ thr = pars_complete_graph_for_exec(node, trx, heap, NULL);
- thr = pars_complete_graph_for_exec(node, trx, heap);
+ ut_a(thr == que_fork_start_command(
+ static_cast<que_fork_t*>(
+ que_node_get_parent(thr))));
- ut_a(thr == que_fork_start_command(
- static_cast<que_fork_t*>(que_node_get_parent(thr))));
+ que_run_threads(thr);
- que_run_threads(thr);
+ err = trx->error_state;
- err = trx->error_state;
+ que_graph_free((que_t*) que_node_get_parent(thr));
+ } else {
+ dict_build_index_def(table, index, trx);
- que_graph_free((que_t*) que_node_get_parent(thr));
+ err = dict_index_add_to_cache(table, index, FIL_NULL);
+ ut_ad((index == NULL) == (err != DB_SUCCESS));
+
+ if (err != DB_SUCCESS) {
+ goto error_handling;
+ }
+
+ index->table = table;
+
+ err = dict_create_index_tree_in_mem(index, trx);
+
+ if (err != DB_SUCCESS) {
+ dict_index_remove_from_cache(table, index);
+ }
+ }
/* Create the index specific FTS auxiliary tables. */
if (err == DB_SUCCESS && is_fts) {
@@ -2612,102 +2473,17 @@ row_create_index_for_mysql(
idx = dict_table_get_index_on_name(table, index_name);
ut_ad(idx);
- err = fts_create_index_tables(trx, idx);
+ err = fts_create_index_tables_low(
+ trx, idx, table->name.m_name, table->id);
}
error_handling:
dict_table_close(table, TRUE, FALSE);
- if (err != DB_SUCCESS) {
- /* We have special error handling here */
-
- trx->error_state = DB_SUCCESS;
-
- trx_rollback_to_savepoint(trx, NULL);
-
- row_drop_table_for_mysql(table_name, trx, FALSE, TRUE);
-
- trx_commit_for_mysql(trx);
-
- trx->error_state = DB_SUCCESS;
- }
-
trx->op_info = "";
- mem_free(table_name);
- mem_free(index_name);
-
- return(err);
-}
-
-/*********************************************************************//**
-Scans a table create SQL string and adds to the data dictionary
-the foreign key constraints declared in the string. This function
-should be called after the indexes for a table have been created.
-Each foreign key constraint must be accompanied with indexes in
-both participating tables. The indexes are allowed to contain more
-fields than mentioned in the constraint. Check also that foreign key
-constraints which reference this table are ok.
-@return error code or DB_SUCCESS */
-UNIV_INTERN
-dberr_t
-row_table_add_foreign_constraints(
-/*==============================*/
- trx_t* trx, /*!< in: transaction */
- const char* sql_string, /*!< in: table create statement where
- foreign keys are declared like:
- FOREIGN KEY (a, b) REFERENCES table2(c, d),
- table2 can be written also with the
- database name before it: test.table2 */
- size_t sql_length, /*!< in: length of sql_string */
- const char* name, /*!< in: table full name in the
- normalized form
- database_name/table_name */
- ibool reject_fks) /*!< in: if TRUE, fail with error
- code DB_CANNOT_ADD_CONSTRAINT if
- any foreign keys are found. */
-{
- dberr_t err;
-
- ut_ad(mutex_own(&(dict_sys->mutex)));
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
- ut_a(sql_string);
-
- trx->op_info = "adding foreign keys";
-
- trx_start_if_not_started_xa(trx);
-
- trx_set_dict_operation(trx, TRX_DICT_OP_TABLE);
-
- err = dict_create_foreign_constraints(trx, sql_string, sql_length,
- name, reject_fks);
-
- DBUG_EXECUTE_IF("ib_table_add_foreign_fail",
- err = DB_DUPLICATE_KEY;);
-
- DEBUG_SYNC_C("table_add_foreign_constraints");
-
- if (err == DB_SUCCESS) {
- /* Check that also referencing constraints are ok */
- err = dict_load_foreigns(name, NULL, false, true,
- DICT_ERR_IGNORE_NONE);
- }
-
- if (err != DB_SUCCESS) {
- /* We have special error handling here */
-
- trx->error_state = DB_SUCCESS;
-
- trx_rollback_to_savepoint(trx, NULL);
-
- row_drop_table_for_mysql(name, trx, FALSE, TRUE);
-
- trx_commit_for_mysql(trx);
-
- trx->error_state = DB_SUCCESS;
- }
+ ut_free(table_name);
+ ut_free(index_name);
return(err);
}
@@ -2719,7 +2495,7 @@ table before all handles to it has been removed. Furhermore, the MySQL's
call to drop table must be non-blocking. Therefore we do the drop table
as a background operation, which is taken care of by the master thread
in srv0srv.cc.
-@return error code or DB_SUCCESS */
+@return error code or DB_SUCCESS */
static
dberr_t
row_drop_table_for_mysql_in_background(
@@ -2735,21 +2511,11 @@ row_drop_table_for_mysql_in_background(
foreign keys, we must set the following to be able to drop the
table: */
- trx->check_foreigns = FALSE;
-
- /* fputs("InnoDB: Error: Dropping table ", stderr);
- ut_print_name(stderr, trx, TRUE, name);
- fputs(" in background drop list\n", stderr); */
+ trx->check_foreigns = false;
/* Try to drop the table in InnoDB */
- error = row_drop_table_for_mysql(name, trx, FALSE, FALSE);
-
- /* Flush the log to reduce probability that the .frm files and
- the InnoDB data dictionary get out-of-sync if the user runs
- with innodb_flush_log_at_trx_commit = 0 */
-
- log_buffer_flush_to_disk();
+ error = row_drop_table_for_mysql(name, trx, SQLCOM_TRUNCATE);
trx_commit_for_mysql(trx);
@@ -2762,8 +2528,7 @@ row_drop_table_for_mysql_in_background(
The master thread in srv0srv.cc calls this regularly to drop tables which
we must drop in background after queries to them have ended. Such lazy
dropping of tables is needed in ALTER TABLE on Unix.
-@return how many tables dropped + remaining tables in list */
-UNIV_INTERN
+@return how many tables dropped + remaining tables in list */
ulint
row_drop_tables_for_mysql_in_background(void)
/*=========================================*/
@@ -2789,13 +2554,16 @@ next:
return(n_tables + n_tables_dropped);
}
- table = dict_table_open_on_id(drop->table_id, FALSE,
- DICT_TABLE_OP_NORMAL);
+ /* On fast shutdown, just empty the list without dropping tables. */
+ table = srv_shutdown_state == SRV_SHUTDOWN_NONE || !srv_fast_shutdown
+ ? dict_table_open_on_id(drop->table_id, FALSE,
+ DICT_TABLE_OP_OPEN_ONLY_IF_CACHED)
+ : NULL;
if (!table) {
n_tables_dropped++;
mutex_enter(&row_drop_list_mutex);
- UT_LIST_REMOVE(row_mysql_drop_list, row_mysql_drop_list, drop);
+ UT_LIST_REMOVE(row_mysql_drop_list, drop);
MONITOR_DEC(MONITOR_BACKGROUND_DROP_TABLE);
ut_free(drop);
goto next;
@@ -2807,16 +2575,20 @@ next:
dict_table_close(table, FALSE, FALSE);
mutex_enter(&row_drop_list_mutex);
- UT_LIST_REMOVE(row_mysql_drop_list, row_mysql_drop_list, drop);
- UT_LIST_ADD_LAST(row_mysql_drop_list, row_mysql_drop_list,
- drop);
+ UT_LIST_REMOVE(row_mysql_drop_list, drop);
+ UT_LIST_ADD_LAST(row_mysql_drop_list, drop);
goto next;
}
+ char* name = mem_strdup(table->name.m_name);
+
dict_table_close(table, FALSE, FALSE);
- if (DB_SUCCESS != row_drop_table_for_mysql_in_background(
- table->name)) {
+ dberr_t err = row_drop_table_for_mysql_in_background(name);
+
+ ut_free(name);
+
+ if (err != DB_SUCCESS) {
/* If the DROP fails for some table, we return, and let the
main thread retry later */
return(n_tables + n_tables_dropped);
@@ -2828,8 +2600,7 @@ next:
/*********************************************************************//**
Get the background drop list length. NOTE: the caller must own the
drop list mutex!
-@return how many tables in list */
-UNIV_INTERN
+@return how many tables in list */
ulint
row_get_background_drop_list_len_low(void)
/*======================================*/
@@ -2847,6 +2618,75 @@ row_get_background_drop_list_len_low(void)
return(len);
}
+/** Drop garbage tables during recovery. */
+void
+row_mysql_drop_garbage_tables()
+{
+ mem_heap_t* heap = mem_heap_create(FN_REFLEN);
+ btr_pcur_t pcur;
+ mtr_t mtr;
+ trx_t* trx = trx_allocate_for_background();
+ trx->op_info = "dropping garbage tables";
+ row_mysql_lock_data_dictionary(trx);
+
+ mtr.start();
+ btr_pcur_open_at_index_side(
+ true, dict_table_get_first_index(dict_sys->sys_tables),
+ BTR_SEARCH_LEAF, &pcur, true, 0, &mtr);
+
+ for (;;) {
+ const rec_t* rec;
+ const byte* field;
+ ulint len;
+ const char* table_name;
+
+ btr_pcur_move_to_next_user_rec(&pcur, &mtr);
+
+ if (!btr_pcur_is_on_user_rec(&pcur)) {
+ break;
+ }
+
+ rec = btr_pcur_get_rec(&pcur);
+ if (rec_get_deleted_flag(rec, 0)) {
+ continue;
+ }
+
+ field = rec_get_nth_field_old(rec, 0/*NAME*/, &len);
+ if (len == UNIV_SQL_NULL || len == 0) {
+ /* Corrupted SYS_TABLES.NAME */
+ continue;
+ }
+
+ table_name = mem_heap_strdupl(
+ heap,
+ reinterpret_cast<const char*>(field), len);
+ if (strstr(table_name, "/" TEMP_FILE_PREFIX "-")) {
+ btr_pcur_store_position(&pcur, &mtr);
+ btr_pcur_commit_specify_mtr(&pcur, &mtr);
+
+ if (dict_load_table(table_name,
+ DICT_ERR_IGNORE_DROP)) {
+ row_drop_table_for_mysql(
+ table_name, trx,
+ SQLCOM_DROP_TABLE);
+ trx_commit_for_mysql(trx);
+ }
+
+ mtr.start();
+ btr_pcur_restore_position(BTR_SEARCH_LEAF,
+ &pcur, &mtr);
+ }
+
+ mem_heap_empty(heap);
+ }
+
+ btr_pcur_close(&pcur);
+ mtr.commit();
+ row_mysql_unlock_data_dictionary(trx);
+ trx_free_for_background(trx);
+ mem_heap_free(heap);
+}
+
/*********************************************************************//**
If a table is not yet in the drop list, adds the table to the list of tables
which the master thread drops in background. We need this on Unix because in
@@ -2876,10 +2716,10 @@ row_add_table_to_background_drop_list(table_id_t table_id)
}
}
- drop = static_cast<row_mysql_drop_t*>(ut_malloc(sizeof *drop));
+ drop = static_cast<row_mysql_drop_t*>(ut_malloc_nokey(sizeof *drop));
drop->table_id = table_id;
- UT_LIST_ADD_LAST(row_mysql_drop_list, row_mysql_drop_list, drop);
+ UT_LIST_ADD_LAST(row_mysql_drop_list, drop);
MONITOR_INC(MONITOR_BACKGROUND_DROP_TABLE);
func_exit:
@@ -2887,21 +2727,22 @@ func_exit:
return added;
}
-/*********************************************************************//**
-Reassigns the table identifier of a table.
-@return error code or DB_SUCCESS */
+/** Reassigns the table identifier of a table.
+@param[in,out] table table
+@param[in,out] trx transaction
+@param[out] new_id new table id
+@return error code or DB_SUCCESS */
static
dberr_t
row_mysql_table_id_reassign(
-/*========================*/
- dict_table_t* table, /*!< in/out: table */
- trx_t* trx, /*!< in/out: transaction */
- table_id_t* new_id) /*!< out: new table id */
+ dict_table_t* table,
+ trx_t* trx,
+ table_id_t* new_id)
{
dberr_t err;
pars_info_t* info = pars_info_create();
- dict_hdr_get_new_id(new_id, NULL, NULL);
+ dict_hdr_get_new_id(new_id, NULL, NULL, table, false);
/* Remove all locks except the table-level S and X locks. */
lock_remove_all_on_table(table, FALSE);
@@ -2919,6 +2760,8 @@ row_mysql_table_id_reassign(
" WHERE TABLE_ID = :old_id;\n"
"UPDATE SYS_INDEXES SET TABLE_ID = :new_id\n"
" WHERE TABLE_ID = :old_id;\n"
+ "UPDATE SYS_VIRTUAL SET TABLE_ID = :new_id\n"
+ " WHERE TABLE_ID = :old_id;\n"
"END;\n", FALSE, trx);
return(err);
@@ -2939,7 +2782,7 @@ row_discard_tablespace_begin(
trx_set_dict_operation(trx, TRX_DICT_OP_TABLE);
- trx_start_if_not_started_xa(trx);
+ trx_start_if_not_started_xa(trx, true);
/* Serialize data dictionary operations with dictionary mutex:
this is to avoid deadlocks during data dictionary operations */
@@ -2949,11 +2792,11 @@ row_discard_tablespace_begin(
dict_table_t* table;
table = dict_table_open_on_name(
- name, TRUE, FALSE, DICT_ERR_IGNORE_NONE);
+ name, TRUE, FALSE, DICT_ERR_IGNORE_FK_NOKEY);
if (table) {
dict_stats_wait_bg_to_stop_using_table(table, trx);
- ut_a(table->space != TRX_SYS_SPACE);
+ ut_a(!is_system_tablespace(table->space));
ut_a(table->n_foreign_key_checks_running == 0);
}
@@ -3002,10 +2845,10 @@ row_discard_tablespace_foreign_key_checks(
ut_print_timestamp(ef);
fputs(" Cannot DISCARD table ", ef);
- ut_print_name(stderr, trx, TRUE, table->name);
+ ut_print_name(ef, trx, table->name.m_name);
fputs("\n"
"because it is referenced by ", ef);
- ut_print_name(stderr, trx, TRUE, foreign->foreign_table_name);
+ ut_print_name(ef, trx, foreign->foreign_table_name);
putc('\n', ef);
mutex_exit(&dict_foreign_err_mutex);
@@ -3029,13 +2872,13 @@ row_discard_tablespace_end(
}
DBUG_EXECUTE_IF("ib_discard_before_commit_crash",
- log_make_checkpoint_at(LSN_MAX, TRUE);
+ log_write_up_to(LSN_MAX, true);
DBUG_SUICIDE(););
trx_commit_for_mysql(trx);
DBUG_EXECUTE_IF("ib_discard_after_commit_crash",
- log_make_checkpoint_at(LSN_MAX, TRUE);
+ log_write_up_to(LSN_MAX, true);
DBUG_SUICIDE(););
row_mysql_unlock_data_dictionary(trx);
@@ -3082,17 +2925,17 @@ row_discard_tablespace(
table_id_t new_id;
- /* Set the TABLESPACE DISCARD flag in the table definition on disk. */
-
- err = row_import_update_discarded_flag(trx, table->id, true, true);
+ /* Set the TABLESPACE DISCARD flag in the table definition
+ on disk. */
+ err = row_import_update_discarded_flag(
+ trx, table->id, true, true);
if (err != DB_SUCCESS) {
return(err);
}
/* Update the index root pages in the system tables, on disk */
-
- err = row_import_update_index_root(trx, table, true, true);
+ err = row_import_update_index_root(trx, table, true);
if (err != DB_SUCCESS) {
return(err);
@@ -3118,7 +2961,7 @@ row_discard_tablespace(
err = fil_discard_tablespace(table->space);
- switch(err) {
+ switch (err) {
case DB_SUCCESS:
case DB_IO_ERROR:
case DB_TABLESPACE_NOT_FOUND:
@@ -3164,9 +3007,8 @@ row_discard_tablespace(
/*********************************************************************//**
Discards the tablespace of a table which stored in an .ibd file. Discarding
means that this function renames the .ibd file and assigns a new table id for
-the table. Also the flag table->ibd_file_missing is set to TRUE.
-@return error code or DB_SUCCESS */
-UNIV_INTERN
+the table. Also the file_unreadable flag is set.
+@return error code or DB_SUCCESS */
dberr_t
row_discard_tablespace_for_mysql(
/*=============================*/
@@ -3182,11 +3024,19 @@ row_discard_tablespace_for_mysql(
if (table == 0) {
err = DB_TABLE_NOT_FOUND;
+ } else if (dict_table_is_temporary(table)) {
+
+ ib_senderrf(trx->mysql_thd, IB_LOG_LEVEL_ERROR,
+ ER_CANNOT_DISCARD_TEMPORARY_TABLE);
+
+ err = DB_ERROR;
+
} else if (table->space == TRX_SYS_SPACE) {
char table_name[MAX_FULL_NAME_LEN + 1];
innobase_format_name(
- table_name, sizeof(table_name), table->name, FALSE);
+ table_name, sizeof(table_name),
+ table->name.m_name);
ib_senderrf(trx->mysql_thd, IB_LOG_LEVEL_ERROR,
ER_TABLE_IN_SYSTEM_TABLESPACE, table_name);
@@ -3197,7 +3047,8 @@ row_discard_tablespace_for_mysql(
char table_name[MAX_FULL_NAME_LEN + 1];
innobase_format_name(
- table_name, sizeof(table_name), table->name, FALSE);
+ table_name, sizeof(table_name),
+ table->name.m_name);
ib_senderrf(trx->mysql_thd, IB_LOG_LEVEL_ERROR,
ER_DISCARD_FK_CHECKS_RUNNING, table_name);
@@ -3233,8 +3084,7 @@ row_discard_tablespace_for_mysql(
/*********************************************************************//**
Sets an exclusive lock on a table.
-@return error code or DB_SUCCESS */
-UNIV_INTERN
+@return error code or DB_SUCCESS */
dberr_t
row_mysql_lock_table(
/*=================*/
@@ -3255,7 +3105,7 @@ row_mysql_lock_table(
trx->op_info = op_info;
node = sel_node_create(heap);
- thr = pars_complete_graph_for_exec(node, trx, heap);
+ thr = pars_complete_graph_for_exec(node, trx, heap, NULL);
thr->graph->state = QUE_FORK_ACTIVE;
/* We use the select query graph as the dummy graph needed
@@ -3279,30 +3129,7 @@ run_again:
} else {
que_thr_stop_for_mysql(thr);
- if (err != DB_QUE_THR_SUSPENDED) {
- ibool was_lock_wait;
-
- was_lock_wait = row_mysql_handle_errors(
- &err, trx, thr, NULL);
-
- if (was_lock_wait) {
- goto run_again;
- }
- } else {
- que_thr_t* run_thr;
- que_node_t* parent;
-
- parent = que_node_get_parent(thr);
-
- run_thr = que_fork_start_command(
- static_cast<que_fork_t*>(parent));
-
- ut_a(run_thr == thr);
-
- /* There was a lock wait but the thread was not
- in a ready to run or running state. */
- trx->error_state = DB_LOCK_WAIT;
-
+ if (row_mysql_handle_errors(&err, trx, thr, NULL)) {
goto run_again;
}
}
@@ -3313,671 +3140,161 @@ run_again:
return(err);
}
-static
-void
-fil_wait_crypt_bg_threads(
- dict_table_t* table)
-{
- time_t start = time(0);
- time_t last = start;
-
- while (table->n_ref_count > 0) {
- dict_mutex_exit_for_mysql();
- os_thread_sleep(20000);
- dict_mutex_enter_for_mysql();
- time_t now = time(0);
- if (now >= last + 30) {
- fprintf(stderr,
- "WARNING: waited %ld seconds "
- "for ref-count on table: %s space: %u\n",
- (long)(now - start), table->name, table->space);
- last = now;
- }
-
- if (now >= start + 300) {
- fprintf(stderr,
- "WARNING: after %ld seconds, gave up waiting "
- "for ref-count on table: %s space: %u\n",
- (long)(now - start), table->name, table->space);
- break;
- }
- }
-}
-
-/*********************************************************************//**
-Truncates a table for MySQL.
-@return error code or DB_SUCCESS */
-UNIV_INTERN
+/** Drop ancillary FTS tables as part of dropping a table.
+@param[in,out] table Table cache entry
+@param[in,out] trx Transaction handle
+@return error code or DB_SUCCESS */
+UNIV_INLINE
dberr_t
-row_truncate_table_for_mysql(
-/*=========================*/
- dict_table_t* table, /*!< in: table handle */
- trx_t* trx) /*!< in: transaction handle */
+row_drop_ancillary_fts_tables(
+ dict_table_t* table,
+ trx_t* trx)
{
- dberr_t err;
- mem_heap_t* heap;
- byte* buf;
- dtuple_t* tuple;
- dfield_t* dfield;
- dict_index_t* sys_index;
- btr_pcur_t pcur;
- mtr_t mtr;
- table_id_t new_id;
- ulint recreate_space = 0;
- pars_info_t* info = NULL;
- ibool has_internal_doc_id;
- ulint old_space = table->space;
-
- /* How do we prevent crashes caused by ongoing operations on
- the table? Old operations could try to access non-existent
- pages.
-
- 1) SQL queries, INSERT, SELECT, ...: we must get an exclusive
- InnoDB table lock on the table before we can do TRUNCATE
- TABLE. Then there are no running queries on the table.
-
- 2) Purge and rollback: we assign a new table id for the
- table. Since purge and rollback look for the table based on
- the table id, they see the table as 'dropped' and discard
- their operations.
-
- 3) Insert buffer: TRUNCATE TABLE is analogous to DROP TABLE,
- so we do not have to remove insert buffer records, as the
- insert buffer works at a low level. If a freed page is later
- reallocated, the allocator will remove the ibuf entries for
- it.
-
- When we truncate *.ibd files by recreating them (analogous to
- DISCARD TABLESPACE), we remove all entries for the table in the
- insert buffer tree. This is not strictly necessary, because
- in 6) we will assign a new tablespace identifier, but we can
- free up some space in the system tablespace.
-
- 4) Linear readahead and random readahead: we use the same
- method as in 3) to discard ongoing operations. (This is only
- relevant for TRUNCATE TABLE by DISCARD TABLESPACE.)
-
- 5) FOREIGN KEY operations: if
- table->n_foreign_key_checks_running > 0, we do not allow the
- TRUNCATE. We also reserve the data dictionary latch.
-
- 6) Crash recovery: To prevent the application of pre-truncation
- redo log records on the truncated tablespace, we will assign
- a new tablespace identifier to the truncated tablespace. */
-
- ut_ad(table);
-
- if (dict_table_is_discarded(table)) {
- return(DB_TABLESPACE_DELETED);
- } else if (!table->is_readable()) {
- return (row_mysql_get_table_status(table, trx, true));
- }
-
- if (table->fts) {
- fts_optimize_remove_table(table);
- }
-
- trx_start_for_ddl(trx, TRX_DICT_OP_TABLE);
-
- trx->op_info = "truncating table";
-
- /* Serialize data dictionary operations with dictionary mutex:
- no deadlocks can occur then in these operations */
-
- ut_a(trx->dict_operation_lock_mode == 0);
- /* Prevent foreign key checks etc. while we are truncating the
- table */
- row_mysql_lock_data_dictionary(trx);
-
- ut_ad(mutex_own(&(dict_sys->mutex)));
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
-
- dict_stats_wait_bg_to_stop_using_table(table, trx);
-
- /* Check if the table is referenced by foreign key constraints from
- some other table (not the table itself) */
-
- dict_foreign_set::iterator it
- = std::find_if(table->referenced_set.begin(),
- table->referenced_set.end(),
- dict_foreign_different_tables());
-
- if (!srv_read_only_mode
- && it != table->referenced_set.end()
- && trx->check_foreigns) {
-
- FILE* ef = dict_foreign_err_file;
- dict_foreign_t* foreign = *it;
-
- /* We only allow truncating a referenced table if
- FOREIGN_KEY_CHECKS is set to 0 */
-
- mutex_enter(&dict_foreign_err_mutex);
- rewind(ef);
- ut_print_timestamp(ef);
-
- fputs(" Cannot truncate table ", ef);
- ut_print_name(ef, trx, TRUE, table->name);
- fputs(" by DROP+CREATE\n"
- "InnoDB: because it is referenced by ", ef);
- ut_print_name(ef, trx, TRUE, foreign->foreign_table_name);
- putc('\n', ef);
- mutex_exit(&dict_foreign_err_mutex);
-
- err = DB_ERROR;
- goto funct_exit;
- }
-
- /* TODO: could we replace the counter n_foreign_key_checks_running
- with lock checks on the table? Acquire here an exclusive lock on the
- table, and rewrite lock0lock.cc and the lock wait in srv0srv.cc so that
- they can cope with the table having been truncated here? Foreign key
- checks take an IS or IX lock on the table. */
-
- if (table->n_foreign_key_checks_running > 0) {
- ut_print_timestamp(stderr);
- fputs(" InnoDB: Cannot truncate table ", stderr);
- ut_print_name(stderr, trx, TRUE, table->name);
- fputs(" by DROP+CREATE\n"
- "InnoDB: because there is a foreign key check"
- " running on it.\n",
- stderr);
- err = DB_ERROR;
-
- goto funct_exit;
- }
-
- /* Check if memcached plugin is running on this table. if is, we don't
- allow truncate this table. */
- if (table->memcached_sync_count != 0) {
- ut_print_timestamp(stderr);
- fputs(" InnoDB: Cannot truncate table ", stderr);
- ut_print_name(stderr, trx, TRUE, table->name);
- fputs(" by DROP+CREATE\n"
- "InnoDB: because there are memcached operations"
- " running on it.\n",
- stderr);
- err = DB_ERROR;
-
- goto funct_exit;
- } else {
- /* We need to set this counter to -1 for blocking
- memcached operations. */
- table->memcached_sync_count = DICT_TABLE_IN_DDL;
- }
-
- /* Remove all locks except the table-level X lock. */
-
- lock_remove_all_on_table(table, FALSE);
-
- /* Ensure that the table will be dropped by
- trx_rollback_active() in case of a crash. */
-
- trx->table_id = table->id;
- trx_set_dict_operation(trx, TRX_DICT_OP_TABLE);
-
- /* Assign an undo segment for the transaction, so that the
- transaction will be recovered after a crash. */
-
- mutex_enter(&trx->undo_mutex);
-
- err = trx_undo_assign_undo(trx, TRX_UNDO_UPDATE);
-
- mutex_exit(&trx->undo_mutex);
-
- if (err != DB_SUCCESS) {
-
- goto funct_exit;
- }
-
- if (table->space && !DICT_TF2_FLAG_IS_SET(table, DICT_TF2_TEMPORARY)) {
- /* Discard and create the single-table tablespace. */
- ulint space_id = table->space;
- ulint flags = ULINT_UNDEFINED;
- ulint key_id = FIL_DEFAULT_ENCRYPTION_KEY;
- fil_encryption_t mode = FIL_ENCRYPTION_DEFAULT;
-
- dict_get_and_save_data_dir_path(table, true);
-
- if (fil_space_t* space = fil_space_acquire(space_id)) {
- fil_space_crypt_t* crypt_data = space->crypt_data;
-
- if (crypt_data) {
- key_id = crypt_data->key_id;
- mode = crypt_data->encryption;
- }
-
- flags = space->flags;
- fil_space_release(space);
- }
-
- while (buf_LRU_drop_page_hash_for_tablespace(table)) {
- if (trx_is_interrupted(trx)
- || srv_shutdown_state != SRV_SHUTDOWN_NONE) {
- err = DB_INTERRUPTED;
- goto funct_exit;
- }
- }
-
- if (flags != ULINT_UNDEFINED
- && fil_discard_tablespace(space_id) == DB_SUCCESS) {
-
- dict_index_t* index;
-
- dict_hdr_get_new_id(NULL, NULL, &space_id);
-
- /* Lock all index trees for this table. We must
- do so after dict_hdr_get_new_id() to preserve
- the latch order */
- dict_table_x_lock_indexes(table);
-
- if (space_id == ULINT_UNDEFINED
- || fil_create_new_single_table_tablespace(
- space_id, table->name,
- table->data_dir_path,
- flags, table->flags2,
- FIL_IBD_FILE_INITIAL_SIZE,
- mode, key_id)
- != DB_SUCCESS) {
- dict_table_x_unlock_indexes(table);
-
- ib_logf(IB_LOG_LEVEL_ERROR,
- "TRUNCATE TABLE %s failed to "
- "create a new tablespace",
- table->name);
-
- table->file_unreadable = true;
- err = DB_ERROR;
- goto funct_exit;
- }
-
- recreate_space = space_id;
-
- /* Replace the space_id in the data dictionary cache.
- The persisent data dictionary (SYS_TABLES.SPACE
- and SYS_INDEXES.SPACE) are updated later in this
- function. */
- table->space = space_id;
- index = dict_table_get_first_index(table);
- do {
- index->space = space_id;
- index = dict_table_get_next_index(index);
- } while (index);
-
- mtr_start(&mtr);
- fsp_header_init(space_id,
- FIL_IBD_FILE_INITIAL_SIZE, &mtr);
- mtr_commit(&mtr);
- }
- } else {
- /* Lock all index trees for this table, as we will
- truncate the table/index and possibly change their metadata.
- All DML/DDL are blocked by table level lock, with
- a few exceptions such as queries into information schema
- about the table, MySQL could try to access index stats
- for this kind of query, we need to use index locks to
- sync up */
- dict_table_x_lock_indexes(table);
- }
-
- /* scan SYS_INDEXES for all indexes of the table */
- heap = mem_heap_create(800);
-
- tuple = dtuple_create(heap, 1);
- dfield = dtuple_get_nth_field(tuple, 0);
-
- buf = static_cast<byte*>(mem_heap_alloc(heap, 8));
- mach_write_to_8(buf, table->id);
-
- dfield_set_data(dfield, buf, 8);
- sys_index = dict_table_get_first_index(dict_sys->sys_indexes);
- dict_index_copy_types(tuple, sys_index, 1);
-
- mtr_start(&mtr);
- btr_pcur_open_on_user_rec(sys_index, tuple, PAGE_CUR_GE,
- BTR_MODIFY_LEAF, &pcur, &mtr);
- for (;;) {
- rec_t* rec;
- const byte* field;
- ulint len;
- ulint root_page_no;
-
- if (!btr_pcur_is_on_user_rec(&pcur)) {
- /* The end of SYS_INDEXES has been reached. */
- break;
- }
-
- rec = btr_pcur_get_rec(&pcur);
-
- field = rec_get_nth_field_old(
- rec, DICT_FLD__SYS_INDEXES__TABLE_ID, &len);
- ut_ad(len == 8);
-
- if (memcmp(buf, field, len) != 0) {
- /* End of indexes for the table (TABLE_ID mismatch). */
- break;
- }
-
- if (rec_get_deleted_flag(rec, FALSE)) {
- /* The index has been dropped. */
- goto next_rec;
- }
-
- /* This call may commit and restart mtr
- and reposition pcur. */
- root_page_no = dict_truncate_index_tree(table, recreate_space,
- &pcur, &mtr);
-
- rec = btr_pcur_get_rec(&pcur);
-
- if (root_page_no != FIL_NULL) {
- page_rec_write_field(
- rec, DICT_FLD__SYS_INDEXES__PAGE_NO,
- root_page_no, &mtr);
- /* We will need to commit and restart the
- mini-transaction in order to avoid deadlocks.
- The dict_truncate_index_tree() call has allocated
- a page in this mini-transaction, and the rest of
- this loop could latch another index page. */
- mtr_commit(&mtr);
- mtr_start(&mtr);
- btr_pcur_restore_position(BTR_MODIFY_LEAF,
- &pcur, &mtr);
- }
-
-next_rec:
- btr_pcur_move_to_next_user_rec(&pcur, &mtr);
- }
-
- btr_pcur_close(&pcur);
- mtr_commit(&mtr);
-
- mem_heap_free(heap);
- /* Done with index truncation, release index tree locks,
- subsequent work relates to table level metadata change */
- dict_table_x_unlock_indexes(table);
-
- dict_hdr_get_new_id(&new_id, NULL, NULL);
-
- /* Create new FTS auxiliary tables with the new_id, and
- drop the old index later, only if everything runs successful. */
- has_internal_doc_id = dict_table_has_fts_index(table)
- || DICT_TF2_FLAG_IS_SET(
- table, DICT_TF2_FTS_HAS_DOC_ID);
- if (has_internal_doc_id) {
- dict_table_t fts_table;
- ulint i;
-
- fts_table.name = table->name;
- fts_table.id = new_id;
- fts_table.flags2 = table->flags2;
-
- err = fts_create_common_tables(
- trx, &fts_table, table->name, TRUE);
-
- for (i = 0;
- i < ib_vector_size(table->fts->indexes)
- && err == DB_SUCCESS;
- i++) {
-
- dict_index_t* fts_index;
+ /* Drop ancillary FTS tables */
+ if (dict_table_has_fts_index(table)
+ || DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_HAS_DOC_ID)) {
- fts_index = static_cast<dict_index_t*>(
- ib_vector_getp(table->fts->indexes, i));
+ ut_ad(table->get_ref_count() == 0);
+ ut_ad(trx_is_started(trx));
- err = fts_create_index_tables_low(
- trx, fts_index, table->name, new_id);
- }
+ dberr_t err = fts_drop_tables(trx, table);
if (err != DB_SUCCESS) {
- trx->error_state = DB_SUCCESS;
- trx_rollback_to_savepoint(trx, NULL);
- trx->error_state = DB_SUCCESS;
- ut_print_timestamp(stderr);
- fputs(" InnoDB: Unable to truncate FTS index for"
- " table", stderr);
- ut_print_name(stderr, trx, TRUE, table->name);
- fputs("\n", stderr);
+ ib::error() << " Unable to remove ancillary FTS"
+ " tables for table "
+ << table->name << " : " << ut_strerr(err);
- goto funct_exit;
- } else {
- ut_ad(trx->state != TRX_STATE_NOT_STARTED);
+ return(err);
}
}
- info = pars_info_create();
+ /* The table->fts flag can be set on the table for which
+ the cluster index is being rebuilt. Such table might not have
+ DICT_TF2_FTS flag set. So keep this out of above
+ dict_table_has_fts_index condition */
+ if (table->fts != NULL) {
+ /* fts_que_graph_free_check_lock would try to acquire
+ dict mutex lock */
+ table->fts->dict_locked = true;
- pars_info_add_int4_literal(info, "new_space", (lint) table->space);
- pars_info_add_ull_literal(info, "old_id", table->id);
- pars_info_add_ull_literal(info, "new_id", new_id);
-
- err = que_eval_sql(info,
- "PROCEDURE RENUMBER_TABLE_ID_PROC () IS\n"
- "BEGIN\n"
- "UPDATE SYS_TABLES"
- " SET ID = :new_id, SPACE = :new_space\n"
- " WHERE ID = :old_id;\n"
- "UPDATE SYS_COLUMNS SET TABLE_ID = :new_id\n"
- " WHERE TABLE_ID = :old_id;\n"
- "UPDATE SYS_INDEXES"
- " SET TABLE_ID = :new_id, SPACE = :new_space\n"
- " WHERE TABLE_ID = :old_id;\n"
- "END;\n"
- , FALSE, trx);
-
- if (err == DB_SUCCESS && old_space != table->space) {
- info = pars_info_create();
-
- pars_info_add_int4_literal(info, "old_space", (lint) old_space);
-
- pars_info_add_int4_literal(
- info, "new_space", (lint) table->space);
-
- err = que_eval_sql(info,
- "PROCEDURE RENUMBER_TABLESPACE_PROC () IS\n"
- "BEGIN\n"
- "UPDATE SYS_TABLESPACES"
- " SET SPACE = :new_space\n"
- " WHERE SPACE = :old_space;\n"
- "UPDATE SYS_DATAFILES"
- " SET SPACE = :new_space"
- " WHERE SPACE = :old_space;\n"
- "END;\n"
- , FALSE, trx);
+ fts_free(table);
}
- DBUG_EXECUTE_IF("ib_ddl_crash_before_fts_truncate", err = DB_ERROR;);
-
- if (err != DB_SUCCESS) {
- trx->error_state = DB_SUCCESS;
- trx_rollback_to_savepoint(trx, NULL);
- trx->error_state = DB_SUCCESS;
-
- /* Update system table failed. Table in memory metadata
- could be in an inconsistent state, mark the in-memory
- table->corrupted to be true. In the long run, this should
- be fixed by atomic truncate table */
- table->corrupted = true;
-
- ut_print_timestamp(stderr);
- fputs(" InnoDB: Unable to assign a new identifier to table ",
- stderr);
- ut_print_name(stderr, trx, TRUE, table->name);
- fputs("\n"
- "InnoDB: after truncating it. Background processes"
- " may corrupt the table!\n", stderr);
- /* Failed to update the table id, so drop the new
- FTS auxiliary tables */
- if (has_internal_doc_id) {
- ut_ad(trx->state == TRX_STATE_NOT_STARTED);
-
- table_id_t id = table->id;
+ return(DB_SUCCESS);
+}
- table->id = new_id;
+/** Drop a table from the memory cache as part of dropping a table.
+@param[in] tablename A copy of table->name. Used when table == null
+@param[in,out] table Table cache entry
+@param[in,out] trx Transaction handle
+@return error code or DB_SUCCESS */
+UNIV_INLINE
+dberr_t
+row_drop_table_from_cache(
+ const char* tablename,
+ dict_table_t* table,
+ trx_t* trx)
+{
+ dberr_t err = DB_SUCCESS;
+ ut_ad(!dict_table_is_temporary(table));
- fts_drop_tables(trx, table);
+ /* Remove the pointer to this table object from the list
+ of modified tables by the transaction because the object
+ is going to be destroyed below. */
+ trx->mod_tables.erase(table);
- table->id = id;
-
- ut_ad(trx->state != TRX_STATE_NOT_STARTED);
- }
+ dict_table_remove_from_cache(table);
+ if (dict_load_table(tablename, DICT_ERR_IGNORE_FK_NOKEY)) {
+ ib::error() << "Not able to remove table "
+ << ut_get_name(trx, tablename)
+ << " from the dictionary cache!";
err = DB_ERROR;
- } else {
- /* Drop the old FTS index */
- if (has_internal_doc_id) {
- ut_ad(trx->state != TRX_STATE_NOT_STARTED);
- fts_drop_tables(trx, table);
- ut_ad(trx->state != TRX_STATE_NOT_STARTED);
- }
-
- DBUG_EXECUTE_IF("ib_truncate_crash_after_fts_drop",
- DBUG_SUICIDE(););
-
- dict_table_change_id_in_cache(table, new_id);
-
- /* Reset the Doc ID in cache to 0 */
- if (has_internal_doc_id && table->fts->cache) {
- DBUG_EXECUTE_IF("ib_trunc_sleep_before_fts_cache_clear",
- os_thread_sleep(10000000););
-
- table->fts->dict_locked = true;
- fts_update_next_doc_id(trx, table, 0);
- fts_cache_clear(table->fts->cache);
- fts_cache_init(table->fts->cache);
- table->fts->dict_locked = false;
- }
}
- /* Reset auto-increment. */
- dict_table_autoinc_lock(table);
- dict_table_autoinc_initialize(table, 1);
- dict_table_autoinc_unlock(table);
-
- trx_commit_for_mysql(trx);
-
-funct_exit:
+ return(err);
+}
- if (table->memcached_sync_count == DICT_TABLE_IN_DDL) {
- /* We need to set the memcached sync back to 0, unblock
- memcached operationse. */
- table->memcached_sync_count = 0;
- }
+/** Drop a single-table tablespace as part of dropping or renaming a table.
+This deletes the fil_space_t if found and the file on disk.
+@param[in] space_id Tablespace ID
+@param[in] tablename Table name, same as the tablespace name
+@param[in] filepath File path of tablespace to delete
+@param[in] table_flags table flags
+@return error code or DB_SUCCESS */
+UNIV_INLINE
+dberr_t
+row_drop_single_table_tablespace(
+ ulint space_id,
+ const char* tablename,
+ const char* filepath,
+ ulint table_flags)
+{
+ dberr_t err = DB_SUCCESS;
- /* Add the table back to FTS optimize background thread. */
- if (table->fts) {
- fts_optimize_add_table(table);
- }
+ /* If the tablespace is not in the cache, just delete the file. */
+ if (!fil_space_for_table_exists_in_mem(
+ space_id, tablename, table_flags)) {
- row_mysql_unlock_data_dictionary(trx);
+ /* Force a delete of any discarded or temporary files. */
+ fil_delete_file(filepath);
- dict_stats_update(table, DICT_STATS_EMPTY_TABLE);
+ ib::info() << "Removed datafile " << filepath
+ << " for table " << tablename;
+ } else if (fil_delete_tablespace(space_id) != DB_SUCCESS) {
- trx->op_info = "";
+ ib::error() << "We removed the InnoDB internal data"
+ " dictionary entry of table " << tablename
+ << " but we are not able to delete the tablespace "
+ << space_id << " file " << filepath << "!";
- srv_wake_master_thread();
+ err = DB_ERROR;
+ }
return(err);
}
-/*********************************************************************//**
-Drops a table for MySQL. If the name of the dropped table ends in
-one of "innodb_monitor", "innodb_lock_monitor", "innodb_tablespace_monitor",
-"innodb_table_monitor", then this will also stop the printing of monitor
-output by the master thread. If the data dictionary was not already locked
-by the transaction, the transaction will be committed. Otherwise, the
-data dictionary will remain locked.
-@return error code or DB_SUCCESS */
-UNIV_INTERN
+/** Drop a table.
+If the data dictionary was not already locked by the transaction,
+the transaction will be committed. Otherwise, the data dictionary
+will remain locked.
+@param[in] name Table name
+@param[in,out] trx Transaction handle
+@param[in] sqlcom type of SQL operation
+@param[in] create_failed true=create table failed
+ because e.g. foreign key column
+@param[in] nonatomic Whether it is permitted to release
+ and reacquire dict_operation_lock
+@return error code or DB_SUCCESS */
dberr_t
row_drop_table_for_mysql(
-/*=====================*/
- const char* name, /*!< in: table name */
- trx_t* trx, /*!< in: transaction handle */
- bool drop_db,/*!< in: true=dropping whole database */
- bool create_failed,/*!<in: TRUE=create table failed
- because e.g. foreign key column
- type mismatch. */
- bool nonatomic)
- /*!< in: whether it is permitted
- to release and reacquire dict_operation_lock */
+ const char* name,
+ trx_t* trx,
+ enum_sql_command sqlcom,
+ bool create_failed,
+ bool nonatomic)
{
dberr_t err;
dict_foreign_t* foreign;
dict_table_t* table;
- ibool print_msg;
- ulint space_id;
- char* filepath = NULL;
- const char* tablename_minus_db;
- char* tablename = NULL;
- bool ibd_file_missing;
- ulint namelen;
+ char* filepath = NULL;
+ char* tablename = NULL;
bool locked_dictionary = false;
pars_info_t* info = NULL;
- mem_heap_t* heap = NULL;
DBUG_ENTER("row_drop_table_for_mysql");
-
- DBUG_PRINT("row_drop_table_for_mysql", ("table: %s", name));
+ DBUG_PRINT("row_drop_table_for_mysql", ("table: '%s'", name));
ut_a(name != NULL);
- /* The table name is prefixed with the database name and a '/'.
- Certain table names starting with 'innodb_' have their special
- meaning regardless of the database name. Thus, we need to
- ignore the database name prefix in the comparisons. */
- tablename_minus_db = strchr(name, '/');
-
- if (tablename_minus_db) {
- tablename_minus_db++;
- } else {
- /* Ancillary FTS tables don't have '/' characters. */
- tablename_minus_db = name;
- }
-
- namelen = strlen(tablename_minus_db) + 1;
-
- if (namelen == sizeof S_innodb_monitor
- && !memcmp(tablename_minus_db, S_innodb_monitor,
- sizeof S_innodb_monitor)) {
-
- /* Table name equals "innodb_monitor":
- stop monitor prints */
-
- srv_print_innodb_monitor = FALSE;
- srv_print_innodb_lock_monitor = FALSE;
- } else if (namelen == sizeof S_innodb_lock_monitor
- && !memcmp(tablename_minus_db, S_innodb_lock_monitor,
- sizeof S_innodb_lock_monitor)) {
- srv_print_innodb_monitor = FALSE;
- srv_print_innodb_lock_monitor = FALSE;
- } else if (namelen == sizeof S_innodb_tablespace_monitor
- && !memcmp(tablename_minus_db, S_innodb_tablespace_monitor,
- sizeof S_innodb_tablespace_monitor)) {
-
- srv_print_innodb_tablespace_monitor = FALSE;
- } else if (namelen == sizeof S_innodb_table_monitor
- && !memcmp(tablename_minus_db, S_innodb_table_monitor,
- sizeof S_innodb_table_monitor)) {
-
- srv_print_innodb_table_monitor = FALSE;
- }
-
/* Serialize data dictionary operations with dictionary mutex:
no deadlocks can occur then in these operations */
trx->op_info = "dropping table";
- /* This function is called recursively via fts_drop_tables(). */
- if (trx->state == TRX_STATE_NOT_STARTED) {
- trx_start_for_ddl(trx, TRX_DICT_OP_TABLE);
- }
-
if (trx->dict_operation_lock_mode != RW_X_LATCH) {
- /* Prevent foreign key checks etc. while we are dropping the
- table */
+ /* Prevent foreign key checks etc. while we are
+ dropping the table */
row_mysql_lock_data_dictionary(trx);
@@ -3985,33 +3302,31 @@ row_drop_table_for_mysql(
nonatomic = true;
}
- ut_ad(mutex_own(&(dict_sys->mutex)));
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
+ ut_ad(mutex_own(&dict_sys->mutex));
+ ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_X));
table = dict_table_open_on_name(
name, TRUE, FALSE,
static_cast<dict_err_ignore_t>(
- DICT_ERR_IGNORE_INDEX_ROOT | DICT_ERR_IGNORE_CORRUPT));
+ DICT_ERR_IGNORE_INDEX_ROOT
+ | DICT_ERR_IGNORE_CORRUPT));
if (!table) {
- err = DB_TABLE_NOT_FOUND;
- ut_print_timestamp(stderr);
-
- fputs(" InnoDB: Error: table ", stderr);
- ut_print_name(stderr, trx, TRUE, name);
- fputs(" does not exist in the InnoDB internal\n"
- "InnoDB: data dictionary though MySQL is"
- " trying to drop it.\n"
- "InnoDB: Have you copied the .frm file"
- " of the table to the\n"
- "InnoDB: MySQL database directory"
- " from another database?\n"
- "InnoDB: You can look for further help from\n"
- "InnoDB: " REFMAN "innodb-troubleshooting.html\n",
- stderr);
- goto funct_exit;
+ if (locked_dictionary) {
+ row_mysql_unlock_data_dictionary(trx);
+ }
+ trx->op_info = "";
+ DBUG_RETURN(DB_TABLE_NOT_FOUND);
+ }
+
+ /* This function is called recursively via fts_drop_tables(). */
+ if (!trx_is_started(trx)) {
+
+ if (!dict_table_is_temporary(table)) {
+ trx_start_for_ddl(trx, TRX_DICT_OP_TABLE);
+ } else {
+ trx_set_dict_operation(trx, TRX_DICT_OP_TABLE);
+ }
}
/* Turn on this drop bit before we could release the dictionary
@@ -4052,17 +3367,54 @@ row_drop_table_for_mysql(
/* make sure background stats thread is not running on the table */
ut_ad(!(table->stats_bg_flag & BG_STAT_IN_PROGRESS));
-
- /* Delete the link file if used. */
- if (DICT_TF_HAS_DATA_DIR(table->flags)) {
- fil_delete_link_file(name);
- }
+ const bool is_temp_name = strstr(table->name.m_name,
+ "/" TEMP_FILE_PREFIX);
+ mem_heap_t* heap = NULL;
if (!dict_table_is_temporary(table)) {
+ if (table->space != TRX_SYS_SPACE) {
+#ifdef BTR_CUR_HASH_ADAPT
+ /* On DISCARD TABLESPACE, we would not drop the
+ adaptive hash index entries. If the tablespace is
+ missing here, delete-marking the record in SYS_INDEXES
+ would not free any pages in the buffer pool. Thus,
+ dict_index_remove_from_cache() would hang due to
+ adaptive hash index entries existing in the buffer
+ pool. To prevent this hang, and also to guarantee
+ that btr_search_drop_page_hash_when_freed() will avoid
+ calling btr_search_drop_page_hash_index() while we
+ hold the InnoDB dictionary lock, we will drop any
+ adaptive hash index entries upfront. */
+ const bool immune = is_temp_name
+ || create_failed
+ || sqlcom == SQLCOM_CREATE_TABLE
+ || strstr(table->name.m_name, "/FTS");
+
+ while (buf_LRU_drop_page_hash_for_tablespace(table)) {
+ if ((!immune && trx_is_interrupted(trx))
+ || srv_shutdown_state
+ != SRV_SHUTDOWN_NONE) {
+ err = DB_INTERRUPTED;
+ table->to_be_dropped = false;
+ dict_table_close(table, true, false);
+ goto funct_exit;
+ }
+ }
+#endif /* BTR_CUR_HASH_ADAPT */
+
+ /* Delete the link file if used. */
+ if (DICT_TF_HAS_DATA_DIR(table->flags)) {
+ RemoteDatafile::delete_link_file(name);
+ }
+ }
dict_stats_recalc_pool_del(table);
dict_stats_defrag_pool_del(table, NULL);
if (btr_defragment_thread_active) {
+ /* During fts_drop_orphaned_tables() in
+ recv_recovery_rollback_active() the
+ btr_defragment_mutex has not yet been
+ initialized by btr_defragment_init(). */
btr_defragment_remove_table(table);
}
@@ -4074,17 +3426,11 @@ row_drop_table_for_mysql(
err = dict_stats_drop_table(name, errstr, sizeof(errstr));
if (err != DB_SUCCESS) {
- ib_logf(IB_LOG_LEVEL_WARN, "%s", errstr);
+ ib::warn() << errstr;
}
}
- /* Move the table the the non-LRU list so that it isn't
- considered for eviction. */
-
- if (table->can_be_evicted) {
- dict_table_move_from_lru_to_non_lru(table);
- }
-
+ dict_table_prevent_eviction(table);
dict_table_close(table, TRUE, FALSE);
/* Check if the table is referenced by foreign key constraints from
@@ -4099,7 +3445,7 @@ row_drop_table_for_mysql(
foreign = *it;
- const bool ref_ok = drop_db
+ const bool ref_ok = sqlcom == SQLCOM_DROP_DB
&& dict_tables_have_same_db(
name,
foreign->foreign_table_name_lookup);
@@ -4123,10 +3469,10 @@ row_drop_table_for_mysql(
ut_print_timestamp(ef);
fputs(" Cannot drop table ", ef);
- ut_print_name(ef, trx, TRUE, name);
+ ut_print_name(ef, trx, name);
fputs("\n"
"because it is referenced by ", ef);
- ut_print_name(ef, trx, TRUE,
+ ut_print_name(ef, trx,
foreign->foreign_table_name);
putc('\n', ef);
mutex_exit(&dict_foreign_err_mutex);
@@ -4136,12 +3482,7 @@ row_drop_table_for_mysql(
}
}
-
- DBUG_EXECUTE_IF("row_drop_table_add_to_background",
- row_add_table_to_background_drop_list(table->id);
- err = DB_SUCCESS;
- goto funct_exit;
- );
+ DBUG_EXECUTE_IF("row_drop_table_add_to_background", goto defer;);
/* TODO: could we replace the counter n_foreign_key_checks_running
with lock checks on the table? Acquire here an exclusive lock on the
@@ -4150,27 +3491,28 @@ row_drop_table_for_mysql(
checks take an IS or IX lock on the table. */
if (table->n_foreign_key_checks_running > 0) {
- if (row_add_table_to_background_drop_list(table->id)) {
- ut_print_timestamp(stderr);
- fputs(" InnoDB: You are trying to drop table ",
- stderr);
- ut_print_name(stderr, trx, TRUE, table->name);
- fputs("\n"
- "InnoDB: though there is a"
- " foreign key check running on it.\n"
- "InnoDB: Adding the table to"
- " the background drop queue.\n",
- stderr);
+defer:
+ if (!is_temp_name) {
+ heap = mem_heap_create(FN_REFLEN);
+ const char* tmp_name
+ = dict_mem_create_temporary_tablename(
+ heap, table->name.m_name, table->id);
+ ib::info() << "Deferring DROP TABLE " << table->name
+ << "; renaming to " << tmp_name;
+ err = row_rename_table_for_mysql(
+ table->name.m_name, tmp_name, trx,
+ false, false);
+ } else {
+ err = DB_SUCCESS;
+ }
+ if (err == DB_SUCCESS) {
+ row_add_table_to_background_drop_list(table->id);
}
-
- /* We return DB_SUCCESS to MySQL though the drop will
- happen lazily later */
- err = DB_SUCCESS;
goto funct_exit;
}
/* Remove all locks that are on the table or its records, if there
- are no refernces to the table but it has record locks, we release
+ are no references to the table but it has record locks, we release
the record locks unconditionally. One use case is:
CREATE TABLE t2 (PRIMARY KEY (a)) SELECT * FROM t1;
@@ -4183,34 +3525,9 @@ row_drop_table_for_mysql(
shouldn't have to. There should never be record locks on a table
that is going to be dropped. */
- /* Wait on background threads to stop using table */
- fil_wait_crypt_bg_threads(table);
-
- if (table->n_ref_count == 0) {
- lock_remove_all_on_table(table, TRUE);
- ut_a(table->n_rec_locks == 0);
- } else if (table->n_ref_count > 0 || table->n_rec_locks > 0) {
- if (row_add_table_to_background_drop_list(table->id)) {
- ut_print_timestamp(stderr);
- fputs(" InnoDB: Warning: MySQL is"
- " trying to drop table ", stderr);
- ut_print_name(stderr, trx, TRUE, table->name);
- fputs("\n"
- "InnoDB: though there are still"
- " open handles to it.\n"
- "InnoDB: Adding the table to the"
- " background drop queue.\n",
- stderr);
-
- /* We return DB_SUCCESS to MySQL though the drop will
- happen lazily later */
- err = DB_SUCCESS;
- } else {
- /* The table is already in the background drop list */
- err = DB_ERROR;
- }
-
- goto funct_exit;
+ if (table->get_ref_count() > 0 || table->n_rec_locks > 0
+ || lock_table_has_locks(table)) {
+ goto defer;
}
/* The "to_be_dropped" marks table that is to be dropped, but
@@ -4220,37 +3537,6 @@ row_drop_table_for_mysql(
and it is free to be dropped */
table->to_be_dropped = false;
- /* If we get this far then the table to be dropped must not have
- any table or record locks on it. */
-
- ut_a(!lock_table_has_locks(table));
-
- if (table->space != TRX_SYS_SPACE) {
- /* On DISCARD TABLESPACE, we would not drop the
- adaptive hash index entries. If the tablespace is
- missing here, delete-marking the record in SYS_INDEXES
- would not free any pages in the buffer pool. Thus,
- dict_index_remove_from_cache() would hang due to
- adaptive hash index entries existing in the buffer
- pool. To prevent this hang, and also to guarantee
- that btr_search_drop_page_hash_when_freed() will avoid
- calling btr_search_drop_page_hash_index() while we
- hold the InnoDB dictionary lock, we will drop any
- adaptive hash index entries upfront. */
- const bool immune = create_failed
- || dict_table_is_temporary(table)
- || strncmp(tablename_minus_db, tmp_file_prefix,
- tmp_file_prefix_length)
- || strncmp(tablename_minus_db, "FTS_", 4);
- while (buf_LRU_drop_page_hash_for_tablespace(table)) {
- if ((!immune && trx_is_interrupted(trx))
- || srv_shutdown_state != SRV_SHUTDOWN_NONE) {
- err = DB_INTERRUPTED;
- goto funct_exit;
- }
- }
- }
-
switch (trx_get_dict_operation(trx)) {
case TRX_DICT_OP_NONE:
trx_set_dict_operation(trx, TRX_DICT_OP_TABLE);
@@ -4260,8 +3546,9 @@ row_drop_table_for_mysql(
case TRX_DICT_OP_INDEX:
/* If the transaction was previously flagged as
TRX_DICT_OP_INDEX, we should be dropping auxiliary
- tables for full-text indexes. */
- ut_ad(strstr(table->name, "/FTS_") != NULL);
+ tables for full-text indexes or temp tables. */
+ ut_ad(strstr(table->name.m_name, "/FTS_")
+ || strstr(table->name.m_name, TEMP_TABLE_PATH_PREFIX));
}
/* Mark all indexes unavailable in the data dictionary cache
@@ -4290,236 +3577,180 @@ row_drop_table_for_mysql(
rw_lock_x_unlock(dict_index_get_lock(index));
}
- /* We use the private SQL parser of Innobase to generate the
- query graphs needed in deleting the dictionary data from system
- tables in Innobase. Deleting a row from SYS_INDEXES table also
- frees the file segments of the B-tree associated with the index. */
+ if (!table->is_temporary()) {
+ /* We use the private SQL parser of Innobase to generate the
+ query graphs needed in deleting the dictionary data from system
+ tables in Innobase. Deleting a row from SYS_INDEXES table also
+ frees the file segments of the B-tree associated with the
+ index. */
- info = pars_info_create();
+ info = pars_info_create();
- pars_info_add_str_literal(info, "name", name);
+ pars_info_add_str_literal(info, "name", name);
- if (strcmp(name, "SYS_FOREIGN") && strcmp(name, "SYS_FOREIGN_COLS")
- && dict_table_get_low("SYS_FOREIGN")
- && dict_table_get_low("SYS_FOREIGN_COLS")) {
- err = que_eval_sql(
- info,
- "PROCEDURE DROP_FOREIGN_PROC () IS\n"
- "fid CHAR;\n"
+ if (sqlcom != SQLCOM_TRUNCATE
+ && strchr(name, '/')
+ && dict_table_get_low("SYS_FOREIGN")
+ && dict_table_get_low("SYS_FOREIGN_COLS")) {
+ err = que_eval_sql(
+ info,
+ "PROCEDURE DROP_FOREIGN_PROC () IS\n"
+ "fid CHAR;\n"
- "DECLARE CURSOR fk IS\n"
- "SELECT ID FROM SYS_FOREIGN\n"
- "WHERE FOR_NAME = :name\n"
- "AND TO_BINARY(FOR_NAME) = TO_BINARY(:name)\n"
- "FOR UPDATE;\n"
+ "DECLARE CURSOR fk IS\n"
+ "SELECT ID FROM SYS_FOREIGN\n"
+ "WHERE FOR_NAME = :name\n"
+ "AND TO_BINARY(FOR_NAME) = TO_BINARY(:name)\n"
+ "FOR UPDATE;\n"
- "BEGIN\n"
- "OPEN fk;\n"
- "WHILE 1 = 1 LOOP\n"
- " FETCH fk INTO fid;\n"
- " IF (SQL % NOTFOUND) THEN RETURN; END IF;\n"
- " DELETE FROM SYS_FOREIGN_COLS WHERE ID = fid;\n"
- " DELETE FROM SYS_FOREIGN WHERE ID = fid;\n"
- "END LOOP;\n"
- "CLOSE fk;\n"
- "END;\n", FALSE, trx);
- if (err == DB_SUCCESS) {
- info = pars_info_create();
- pars_info_add_str_literal(info, "name", name);
- goto do_drop;
- }
- } else {
+ "BEGIN\n"
+ "OPEN fk;\n"
+ "WHILE 1 = 1 LOOP\n"
+ " FETCH fk INTO fid;\n"
+ " IF (SQL % NOTFOUND) THEN RETURN; END IF;\n"
+ " DELETE FROM SYS_FOREIGN_COLS WHERE ID=fid;\n"
+ " DELETE FROM SYS_FOREIGN WHERE ID=fid;\n"
+ "END LOOP;\n"
+ "CLOSE fk;\n"
+ "END;\n", FALSE, trx);
+ if (err == DB_SUCCESS) {
+ info = pars_info_create();
+ pars_info_add_str_literal(info, "name", name);
+ goto do_drop;
+ }
+ } else {
do_drop:
- err = que_eval_sql(
- info,
- "PROCEDURE DROP_TABLE_PROC () IS\n"
- "table_id CHAR;\n"
- "index_id CHAR;\n"
-
- "DECLARE CURSOR cur_idx IS\n"
- "SELECT ID FROM SYS_INDEXES\n"
- "WHERE TABLE_ID = table_id\n"
- "FOR UPDATE;\n"
-
- "BEGIN\n"
- "SELECT ID INTO table_id\n"
- "FROM SYS_TABLES WHERE NAME = :name FOR UPDATE;\n"
- "IF (SQL % NOTFOUND) THEN RETURN; END IF;\n"
- "OPEN cur_idx;\n"
- "WHILE 1 = 1 LOOP\n"
- " FETCH cur_idx INTO index_id;\n"
- " IF (SQL % NOTFOUND) THEN EXIT; END IF;\n"
- " DELETE FROM SYS_FIELDS\n"
- " WHERE INDEX_ID = index_id;\n"
- " DELETE FROM SYS_INDEXES\n"
- " WHERE ID = index_id AND TABLE_ID = table_id;\n"
- "END LOOP;\n"
- "CLOSE cur_idx;\n"
+ if (dict_table_get_low("SYS_VIRTUAL")) {
+ err = que_eval_sql(
+ info,
+ "PROCEDURE DROP_VIRTUAL_PROC () IS\n"
+ "tid CHAR;\n"
+
+ "BEGIN\n"
+ "SELECT ID INTO tid FROM SYS_TABLES\n"
+ "WHERE NAME = :name FOR UPDATE;\n"
+ "IF (SQL % NOTFOUND) THEN RETURN;"
+ " END IF;\n"
+ "DELETE FROM SYS_VIRTUAL"
+ " WHERE TABLE_ID = tid;\n"
+ "END;\n", FALSE, trx);
+ if (err == DB_SUCCESS) {
+ info = pars_info_create();
+ pars_info_add_str_literal(
+ info, "name", name);
+ }
+ } else {
+ err = DB_SUCCESS;
+ }
- "DELETE FROM SYS_COLUMNS WHERE TABLE_ID = table_id;\n"
- "DELETE FROM SYS_TABLES WHERE NAME = :name;\n"
+ err = err == DB_SUCCESS ? que_eval_sql(
+ info,
+ "PROCEDURE DROP_TABLE_PROC () IS\n"
+ "tid CHAR;\n"
+ "iid CHAR;\n"
- "END;\n", FALSE, trx);
+ "DECLARE CURSOR cur_idx IS\n"
+ "SELECT ID FROM SYS_INDEXES\n"
+ "WHERE TABLE_ID = tid FOR UPDATE;\n"
- if (err == DB_SUCCESS && table->space
- && dict_table_get_low("SYS_TABLESPACES")
- && dict_table_get_low("SYS_DATAFILES")) {
- info = pars_info_create();
- pars_info_add_int4_literal(info, "id",
- lint(table->space));
- err = que_eval_sql(
- info,
- "PROCEDURE DROP_SPACE_PROC () IS\n"
"BEGIN\n"
- "DELETE FROM SYS_TABLESPACES\n"
- "WHERE SPACE = :id;\n"
- "DELETE FROM SYS_DATAFILES\n"
- "WHERE SPACE = :id;\n"
- "END;\n", FALSE, trx);
+ "SELECT ID INTO tid FROM SYS_TABLES\n"
+ "WHERE NAME = :name FOR UPDATE;\n"
+ "IF (SQL % NOTFOUND) THEN RETURN; END IF;\n"
+
+ "OPEN cur_idx;\n"
+ "WHILE 1 = 1 LOOP\n"
+ " FETCH cur_idx INTO iid;\n"
+ " IF (SQL % NOTFOUND) THEN EXIT; END IF;\n"
+ " DELETE FROM SYS_FIELDS\n"
+ " WHERE INDEX_ID = iid;\n"
+ " DELETE FROM SYS_INDEXES\n"
+ " WHERE ID = iid AND TABLE_ID = tid;\n"
+ "END LOOP;\n"
+ "CLOSE cur_idx;\n"
+
+ "DELETE FROM SYS_COLUMNS WHERE TABLE_ID=tid;\n"
+ "DELETE FROM SYS_TABLES WHERE NAME=:name;\n"
+
+ "END;\n", FALSE, trx) : err;
+
+ if (err == DB_SUCCESS && table->space
+ && dict_table_get_low("SYS_TABLESPACES")
+ && dict_table_get_low("SYS_DATAFILES")) {
+ info = pars_info_create();
+ pars_info_add_int4_literal(info, "id",
+ lint(table->space));
+ err = que_eval_sql(
+ info,
+ "PROCEDURE DROP_SPACE_PROC () IS\n"
+ "BEGIN\n"
+ "DELETE FROM SYS_TABLESPACES\n"
+ "WHERE SPACE = :id;\n"
+ "DELETE FROM SYS_DATAFILES\n"
+ "WHERE SPACE = :id;\n"
+ "END;\n", FALSE, trx);
+ }
}
+ } else {
+ page_no = page_nos;
+ for (dict_index_t* index = dict_table_get_first_index(table);
+ index != NULL;
+ index = dict_table_get_next_index(index)) {
+ /* remove the index object associated. */
+ dict_drop_index_tree_in_mem(index, *page_no++);
+ }
+ trx->mod_tables.erase(table);
+ dict_table_remove_from_cache(table);
+ err = DB_SUCCESS;
+ goto funct_exit;
}
switch (err) {
- ibool is_temp;
+ ulint space_id;
+ bool is_discarded;
ulint table_flags;
case DB_SUCCESS:
- /* Clone the name, in case it has been allocated
- from table->heap, which will be freed by
- dict_table_remove_from_cache(table) below. */
space_id = table->space;
- ibd_file_missing = table->file_unreadable;
-
+ is_discarded = dict_table_is_discarded(table);
table_flags = table->flags;
- is_temp = DICT_TF2_FLAG_IS_SET(table, DICT_TF2_TEMPORARY);
-
- /* If there is a temp path then the temp flag is set.
- However, during recovery or reloading the table object
- after eviction from data dictionary cache, we might
- have a temp flag but not know the temp path */
- ut_a(table->dir_path_of_temp_table == NULL || is_temp);
- if (dict_table_is_discarded(table)
- || (!table->is_readable()
- && fil_space_get(table->space) == NULL)) {
- /* Do not attempt to drop known-to-be-missing
- tablespaces. */
- space_id = 0;
- }
+ ut_ad(!dict_table_is_temporary(table));
- /* We do not allow temporary tables with a remote path. */
- ut_a(!(is_temp && DICT_TF_HAS_DATA_DIR(table_flags)));
+ err = row_drop_ancillary_fts_tables(table, trx);
+ if (err != DB_SUCCESS) {
+ break;
+ }
- if (space_id && DICT_TF_HAS_DATA_DIR(table_flags)) {
+ /* Determine the tablespace filename before we drop
+ dict_table_t. Free this memory before returning. */
+ if (DICT_TF_HAS_DATA_DIR(table->flags)) {
dict_get_and_save_data_dir_path(table, true);
- ut_a(table->data_dir_path);
-
- filepath = os_file_make_remote_pathname(
- table->data_dir_path, table->name, "ibd");
- } else if (table->dir_path_of_temp_table) {
- filepath = fil_make_ibd_name(
- table->dir_path_of_temp_table, true);
+ ut_ad(table->data_dir_path
+ || dict_table_is_discarded(table));
+ filepath = fil_make_filepath(
+ table->data_dir_path,
+ table->name.m_name, IBD,
+ table->data_dir_path != NULL);
} else {
- filepath = fil_make_ibd_name(tablename, false);
- }
-
- if (dict_table_has_fts_index(table)
- || DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_HAS_DOC_ID)) {
- ut_ad(table->n_ref_count == 0);
- ut_ad(trx->state != TRX_STATE_NOT_STARTED);
- err = fts_drop_tables(trx, table);
-
- if (err != DB_SUCCESS) {
- ut_print_timestamp(stderr);
- fprintf(stderr," InnoDB: Error: (%s) not "
- "able to remove ancillary FTS tables "
- "for table ", ut_strerr(err));
- ut_print_name(stderr, trx, TRUE, tablename);
- fputs("\n", stderr);
-
- goto funct_exit;
- }
- }
-
- /* The table->fts flag can be set on the table for which
- the cluster index is being rebuilt. Such table might not have
- DICT_TF2_FTS flag set. So keep this out of above
- dict_table_has_fts_index condition */
- if (table->fts) {
- /* Need to set TABLE_DICT_LOCKED bit, since
- fts_que_graph_free_check_lock would try to acquire
- dict mutex lock */
- table->fts->dict_locked = true;
- fts_free(table);
+ filepath = fil_make_filepath(
+ NULL, table->name.m_name, IBD, false);
}
- dict_table_remove_from_cache(table);
-
- if (dict_load_table(tablename, TRUE,
- DICT_ERR_IGNORE_NONE) != NULL) {
- ut_print_timestamp(stderr);
- fputs(" InnoDB: Error: not able to remove table ",
- stderr);
- ut_print_name(stderr, trx, TRUE, tablename);
- fputs(" from the dictionary cache!\n", stderr);
- err = DB_ERROR;
+ /* Free the dict_table_t object. */
+ err = row_drop_table_from_cache(tablename, table, trx);
+ if (err != DB_SUCCESS) {
+ break;
}
- /* Do not drop possible .ibd tablespace if something went
- wrong: we do not want to delete valuable data of the user */
-
- /* Don't spam the log if we can't find the tablespace of
- a temp table or if the tablesace has been discarded. */
- print_msg = !(is_temp || ibd_file_missing);
-
- if (err == DB_SUCCESS && space_id > TRX_SYS_SPACE) {
- if (!is_temp
- && !fil_space_for_table_exists_in_mem(
- space_id, tablename,
- print_msg, false, NULL, 0,
- table_flags)) {
- /* This might happen if we are dropping a
- discarded tablespace */
- err = DB_SUCCESS;
-
- if (print_msg) {
- char msg_tablename[MAX_FULL_NAME_LEN + 1];
-
- innobase_format_name(
- msg_tablename,
- sizeof msg_tablename,
- tablename, FALSE);
-
- ib_logf(IB_LOG_LEVEL_INFO,
- "Removed the table %s from "
- "InnoDB's data dictionary",
- msg_tablename);
- }
-
- /* Force a delete of any discarded
- or temporary files. */
-
- fil_delete_file(filepath);
-
- } else if (fil_delete_tablespace(space_id)
- != DB_SUCCESS) {
- fprintf(stderr,
- "InnoDB: We removed now the InnoDB"
- " internal data dictionary entry\n"
- "InnoDB: of table ");
- ut_print_name(stderr, trx, TRUE, tablename);
- fprintf(stderr, ".\n");
-
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Error: not able to"
- " delete tablespace %lu of table ",
- (ulong) space_id);
- ut_print_name(stderr, trx, TRUE, tablename);
- fputs("!\n", stderr);
- err = DB_ERROR;
- }
+ /* Do not attempt to drop known-to-be-missing tablespaces,
+ nor the system tablespace. */
+ if (is_discarded || is_system_tablespace(space_id)) {
+ break;
}
+ /* We can now drop the single-table tablespace. */
+ err = row_drop_single_table_tablespace(
+ space_id, tablename, filepath, table_flags);
break;
case DB_OUT_OF_FILE_SPACE:
@@ -4539,13 +3770,10 @@ do_drop:
default:
/* This is some error we do not expect. Print
- the error number and rollback transaction */
- ut_print_timestamp(stderr);
-
- fprintf(stderr, "InnoDB: unknown error code %lu"
- " while dropping table:", (ulong) err);
- ut_print_name(stderr, trx, TRUE, tablename);
- fprintf(stderr, ".\n");
+ the error number and rollback the transaction */
+ ib::error() << "Unknown error code " << err << " while"
+ " dropping table: "
+ << ut_get_name(trx, tablename) << ".";
trx->error_state = DB_SUCCESS;
trx_rollback_to_savepoint(trx, NULL);
@@ -4566,16 +3794,38 @@ do_drop:
}
}
+ if (err != DB_SUCCESS && table != NULL) {
+ /* Drop table has failed with error but as drop table is not
+ transaction safe we should mark the table as corrupted to avoid
+ unwarranted follow-up action on this table that can result
+ in more serious issues. */
+
+ table->corrupted = true;
+ for (dict_index_t* index = UT_LIST_GET_FIRST(table->indexes);
+ index != NULL;
+ index = UT_LIST_GET_NEXT(indexes, index)) {
+ dict_set_corrupted(index, trx, "DROP TABLE");
+ }
+ }
+
funct_exit:
if (heap) {
mem_heap_free(heap);
}
- if (filepath) {
- mem_free(filepath);
- }
+
+ ut_free(filepath);
if (locked_dictionary) {
- trx_commit_for_mysql(trx);
+
+ if (trx_is_started(trx)) {
+
+ trx_commit_for_mysql(trx);
+ }
+
+ /* Add the table to fts queue if drop table fails */
+ if (err != DB_SUCCESS && table->fts) {
+ fts_optimize_add_table(table);
+ }
row_mysql_unlock_data_dictionary(trx);
}
@@ -4587,103 +3837,17 @@ funct_exit:
DBUG_RETURN(err);
}
-/*********************************************************************//**
-Drop all temporary tables during crash recovery. */
-UNIV_INTERN
-void
-row_mysql_drop_temp_tables(void)
-/*============================*/
+/** Drop a table after failed CREATE TABLE. */
+dberr_t row_drop_table_after_create_fail(const char* name, trx_t* trx)
{
- trx_t* trx;
- btr_pcur_t pcur;
- mtr_t mtr;
- mem_heap_t* heap;
-
- trx = trx_allocate_for_background();
- trx->op_info = "dropping temporary tables";
- row_mysql_lock_data_dictionary(trx);
-
- heap = mem_heap_create(200);
-
- mtr_start(&mtr);
-
- btr_pcur_open_at_index_side(
- true,
- dict_table_get_first_index(dict_sys->sys_tables),
- BTR_SEARCH_LEAF, &pcur, true, 0, &mtr);
-
- for (;;) {
- const rec_t* rec;
- const byte* field;
- ulint len;
- const char* table_name;
- dict_table_t* table;
-
- btr_pcur_move_to_next_user_rec(&pcur, &mtr);
-
- if (!btr_pcur_is_on_user_rec(&pcur)) {
- break;
- }
-
- /* The high order bit of N_COLS is set unless
- ROW_FORMAT=REDUNDANT. */
- rec = btr_pcur_get_rec(&pcur);
- field = rec_get_nth_field_old(
- rec, DICT_FLD__SYS_TABLES__NAME, &len);
- field = rec_get_nth_field_old(
- rec, DICT_FLD__SYS_TABLES__N_COLS, &len);
- if (len != 4
- || !(mach_read_from_4(field) & DICT_N_COLS_COMPACT)) {
- continue;
- }
-
- /* Older versions of InnoDB, which only supported tables
- in ROW_FORMAT=REDUNDANT could write garbage to
- SYS_TABLES.MIX_LEN, where we now store the is_temp flag.
- Above, we assumed is_temp=0 if ROW_FORMAT=REDUNDANT. */
- field = rec_get_nth_field_old(
- rec, DICT_FLD__SYS_TABLES__MIX_LEN, &len);
- if (len != 4
- || !(mach_read_from_4(field) & DICT_TF2_TEMPORARY)) {
- continue;
- }
-
- /* This is a temporary table. */
- field = rec_get_nth_field_old(
- rec, DICT_FLD__SYS_TABLES__NAME, &len);
- if (len == UNIV_SQL_NULL || len == 0) {
- /* Corrupted SYS_TABLES.NAME */
- continue;
- }
-
- table_name = mem_heap_strdupl(heap, (const char*) field, len);
-
- btr_pcur_store_position(&pcur, &mtr);
- btr_pcur_commit_specify_mtr(&pcur, &mtr);
-
- table = dict_load_table(table_name, TRUE, DICT_ERR_IGNORE_NONE);
-
- if (table) {
- row_drop_table_for_mysql(table_name, trx, FALSE, FALSE);
- trx_commit_for_mysql(trx);
- }
-
- mtr_start(&mtr);
- btr_pcur_restore_position(BTR_SEARCH_LEAF,
- &pcur, &mtr);
- }
-
- btr_pcur_close(&pcur);
- mtr_commit(&mtr);
- mem_heap_free(heap);
- row_mysql_unlock_data_dictionary(trx);
- trx_free_for_background(trx);
+ ib::warn() << "Dropping incompletely created " << name << " table.";
+ return row_drop_table_for_mysql(name, trx, SQLCOM_DROP_DB, true);
}
/*******************************************************************//**
Drop all foreign keys in a database, see Bug#18942.
Called at the end of row_drop_database_for_mysql().
-@return error code or DB_SUCCESS */
+@return error code or DB_SUCCESS */
static MY_ATTRIBUTE((nonnull, warn_unused_result))
dberr_t
drop_all_foreign_keys_in_db(
@@ -4740,29 +3904,46 @@ drop_all_foreign_keys_in_db(
return(err);
}
-/*********************************************************************//**
-Drops a database for MySQL.
-@return error code or DB_SUCCESS */
-UNIV_INTERN
+/** Drop a database for MySQL.
+@param[in] name database name which ends at '/'
+@param[in] trx transaction handle
+@param[out] found number of dropped tables/partitions
+@return error code or DB_SUCCESS */
dberr_t
row_drop_database_for_mysql(
-/*========================*/
- const char* name, /*!< in: database name which ends to '/' */
- trx_t* trx) /*!< in: transaction handle */
+ const char* name,
+ trx_t* trx,
+ ulint* found)
{
dict_table_t* table;
char* table_name;
dberr_t err = DB_SUCCESS;
ulint namelen = strlen(name);
+ bool is_partition = false;
+
+ ut_ad(found != NULL);
+
+ DBUG_ENTER("row_drop_database_for_mysql");
+
+ DBUG_PRINT("row_drop_database_for_mysql", ("db: '%s'", name));
ut_a(name != NULL);
- ut_a(name[namelen - 1] == '/');
+ /* Assert DB name or partition name. */
+ if (name[namelen - 1] == '#') {
+ ut_ad(name[namelen - 2] != '/');
+ is_partition = true;
+ trx->op_info = "dropping partitions";
+ } else {
+ ut_a(name[namelen - 1] == '/');
+ trx->op_info = "dropping database";
+ }
- trx->op_info = "dropping database";
+ *found = 0;
trx_set_dict_operation(trx, TRX_DICT_OP_TABLE);
- trx_start_if_not_started_xa(trx);
+ trx_start_if_not_started_xa(trx, true);
+
loop:
row_mysql_lock_data_dictionary(trx);
@@ -4776,7 +3957,7 @@ loop:
table_name, strlen(table_name));
if (parent_table_name != NULL) {
- mem_free(table_name);
+ ut_free(table_name);
table_name = parent_table_name;
}
@@ -4788,33 +3969,30 @@ loop:
| DICT_ERR_IGNORE_CORRUPT));
if (!table) {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Cannot load table %s from InnoDB internal "
- "data dictionary during drop database",
- table_name);
- mem_free(table_name);
+ ib::error() << "Cannot load table " << table_name
+ << " from InnoDB internal data dictionary"
+ " during drop database";
+ ut_free(table_name);
err = DB_TABLE_NOT_FOUND;
break;
}
- if (!row_is_mysql_tmp_table_name(table->name)) {
+ if (!table->name.is_temporary()) {
/* There could be orphan temp tables left from
interrupted alter table. Leave them, and handle
the rest.*/
- if (table->can_be_evicted) {
- ib_logf(IB_LOG_LEVEL_WARN,
- "Orphan table encountered during "
- "DROP DATABASE. This is possible if "
- "'%s.frm' was lost.", table->name);
+ if (table->can_be_evicted
+ && (name[namelen - 1] != '#')) {
+ ib::warn() << "Orphan table encountered during"
+ " DROP DATABASE. This is possible if '"
+ << table->name << ".frm' was lost.";
}
if (!table->is_readable()
- && fil_space_get(table->space) == NULL) {
-
- ib_logf(IB_LOG_LEVEL_WARN,
- "Missing %s.ibd file for table %s.",
- table->name, table->name);
+ && !fil_space_get(table->space)) {
+ ib::warn() << "Missing .ibd file for table "
+ << table->name << ".";
}
}
@@ -4825,56 +4003,63 @@ loop:
if we are holding, the dict_sys->mutex. */
ut_ad(mutex_own(&dict_sys->mutex));
+ /* Disable statistics on the found table. */
+ if (!dict_stats_stop_bg(table)) {
+ row_mysql_unlock_data_dictionary(trx);
+
+ os_thread_sleep(250000);
+
+ ut_free(table_name);
+
+ goto loop;
+ }
+
/* Wait until MySQL does not have any queries running on
the table */
- if (table->n_ref_count > 0) {
+ if (table->get_ref_count() > 0) {
row_mysql_unlock_data_dictionary(trx);
- ut_print_timestamp(stderr);
- fputs(" InnoDB: Warning: MySQL is trying to"
- " drop database ", stderr);
- ut_print_name(stderr, trx, TRUE, name);
- fputs("\n"
- "InnoDB: though there are still"
- " open handles to table ", stderr);
- ut_print_name(stderr, trx, TRUE, table_name);
- fputs(".\n", stderr);
+ ib::warn() << "MySQL is trying to drop database "
+ << ut_get_name(trx, name) << " though"
+ " there are still open handles to table "
+ << table->name << ".";
os_thread_sleep(1000000);
- mem_free(table_name);
+ ut_free(table_name);
goto loop;
}
- err = row_drop_table_for_mysql(table_name, trx, TRUE, FALSE);
+ err = row_drop_table_for_mysql(
+ table_name, trx, SQLCOM_DROP_DB);
trx_commit_for_mysql(trx);
if (err != DB_SUCCESS) {
- fputs("InnoDB: DROP DATABASE ", stderr);
- ut_print_name(stderr, trx, TRUE, name);
- fprintf(stderr, " failed with error (%s) for table ",
- ut_strerr(err));
- ut_print_name(stderr, trx, TRUE, table_name);
- putc('\n', stderr);
- mem_free(table_name);
+ ib::error() << "DROP DATABASE "
+ << ut_get_name(trx, name) << " failed"
+ " with error (" << ut_strerr(err) << ") for"
+ " table " << ut_get_name(trx, table_name);
+ ut_free(table_name);
break;
}
- mem_free(table_name);
+ ut_free(table_name);
+ (*found)++;
}
- if (err == DB_SUCCESS) {
+ /* Partitioning does not yet support foreign keys. */
+ if (err == DB_SUCCESS && !is_partition) {
/* after dropping all tables try to drop all leftover
foreign keys in case orphaned ones exist */
err = drop_all_foreign_keys_in_db(name, trx);
if (err != DB_SUCCESS) {
- fputs("InnoDB: DROP DATABASE ", stderr);
- ut_print_name(stderr, trx, TRUE, name);
- fprintf(stderr, " failed with error %d while "
- "dropping all foreign keys", err);
+ const std::string& db = ut_get_name(trx, name);
+ ib::error() << "DROP DATABASE " << db << " failed with"
+ " error " << err << " while dropping all"
+ " foreign keys";
}
}
@@ -4884,27 +4069,12 @@ loop:
trx->op_info = "";
- return(err);
-}
-
-/*********************************************************************//**
-Checks if a table name contains the string "/#sql" which denotes temporary
-tables in MySQL.
-@return true if temporary table */
-UNIV_INTERN MY_ATTRIBUTE((warn_unused_result))
-bool
-row_is_mysql_tmp_table_name(
-/*========================*/
- const char* name) /*!< in: table name in the form
- 'database/tablename' */
-{
- return(strstr(name, "/#sql") != NULL);
- /* return(strstr(name, "/@0023sql") != NULL); */
+ DBUG_RETURN(err);
}
/****************************************************************//**
Delete a single constraint.
-@return error code or DB_SUCCESS */
+@return error code or DB_SUCCESS */
static MY_ATTRIBUTE((nonnull, warn_unused_result))
dberr_t
row_delete_constraint_low(
@@ -4927,7 +4097,7 @@ row_delete_constraint_low(
/****************************************************************//**
Delete a single constraint.
-@return error code or DB_SUCCESS */
+@return error code or DB_SUCCESS */
static MY_ATTRIBUTE((nonnull, warn_unused_result))
dberr_t
row_delete_constraint(
@@ -4960,15 +4130,16 @@ row_delete_constraint(
/*********************************************************************//**
Renames a table for MySQL.
-@return error code or DB_SUCCESS */
-UNIV_INTERN
+@return error code or DB_SUCCESS */
dberr_t
row_rename_table_for_mysql(
/*=======================*/
const char* old_name, /*!< in: old table name */
const char* new_name, /*!< in: new table name */
trx_t* trx, /*!< in/out: transaction */
- bool commit) /*!< in: whether to commit trx */
+ bool commit, /*!< in: whether to commit trx */
+ bool use_fk) /*!< in: whether to parse and enforce
+ FOREIGN KEY constraints */
{
dict_table_t* table = NULL;
ibool dict_locked = FALSE;
@@ -4986,37 +4157,27 @@ row_rename_table_for_mysql(
ut_a(new_name != NULL);
ut_ad(trx->state == TRX_STATE_ACTIVE);
- if (srv_force_recovery) {
- fputs("InnoDB: innodb_force_recovery is on: we do not allow\n"
- "InnoDB: database modifications by the user. Shut down\n"
- "InnoDB: mysqld and edit my.cnf so that"
- "InnoDB: innodb_force_... is removed.\n",
- stderr);
-
- err = DB_READ_ONLY;
- goto funct_exit;
+ if (high_level_read_only) {
+ return(DB_READ_ONLY);
} else if (row_mysql_is_system_table(new_name)) {
- fprintf(stderr,
- "InnoDB: Error: trying to create a MySQL"
- " system table %s of type InnoDB.\n"
- "InnoDB: MySQL system tables must be"
- " of the MyISAM type!\n",
- new_name);
+ ib::error() << "Trying to create a MySQL system table "
+ << new_name << " of type InnoDB. MySQL system tables"
+ " must be of the MyISAM type!";
goto funct_exit;
}
trx->op_info = "renaming table";
- old_is_tmp = row_is_mysql_tmp_table_name(old_name);
- new_is_tmp = row_is_mysql_tmp_table_name(new_name);
+ old_is_tmp = dict_table_t::is_temporary_name(old_name);
+ new_is_tmp = dict_table_t::is_temporary_name(new_name);
dict_locked = trx->dict_operation_lock_mode == RW_X_LATCH;
table = dict_table_open_on_name(old_name, dict_locked, FALSE,
- DICT_ERR_IGNORE_NONE);
+ DICT_ERR_IGNORE_FK_NOKEY);
/* We look for pattern #P# to see if the table is partitioned
MySQL table. */
@@ -5060,29 +4221,15 @@ row_rename_table_for_mysql(
whether there exists table name in
system table whose name is
not being normalized to lower case */
- normalize_table_name_low(
+ normalize_table_name_c_low(
par_case_name, old_name, FALSE);
#endif
table = dict_table_open_on_name(par_case_name, dict_locked, FALSE,
- DICT_ERR_IGNORE_NONE);
+ DICT_ERR_IGNORE_FK_NOKEY);
}
if (!table) {
err = DB_TABLE_NOT_FOUND;
- ut_print_timestamp(stderr);
-
- fputs(" InnoDB: Error: table ", stderr);
- ut_print_name(stderr, trx, TRUE, old_name);
- fputs(" does not exist in the InnoDB internal\n"
- "InnoDB: data dictionary though MySQL is"
- " trying to rename the table.\n"
- "InnoDB: Have you copied the .frm file"
- " of the table to the\n"
- "InnoDB: MySQL database directory"
- " from another database?\n"
- "InnoDB: You can look for further help from\n"
- "InnoDB: " REFMAN "innodb-troubleshooting.html\n",
- stderr);
goto funct_exit;
} else if (!table->is_readable()
@@ -5091,14 +4238,13 @@ row_rename_table_for_mysql(
err = DB_TABLE_NOT_FOUND;
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Table %s does not have an .ibd file in the database "
- "directory. See " REFMAN "innodb-troubleshooting.html",
- old_name);
+ ib::error() << "Table " << old_name << " does not have an .ibd"
+ " file in the database directory. "
+ << TROUBLESHOOTING_MSG;
goto funct_exit;
- } else if (new_is_tmp) {
+ } else if (use_fk && !old_is_tmp && new_is_tmp) {
/* MySQL is doing an ALTER TABLE command and it renames the
original table to a temporary table name. We want to preserve
the original foreign key constraint definitions despite the
@@ -5125,16 +4271,22 @@ row_rename_table_for_mysql(
}
if (table->n_foreign_key_checks_running > 0) {
- ut_print_timestamp(stderr);
- fputs(" InnoDB: Error: in ALTER TABLE ", stderr);
- ut_print_name(stderr, trx, TRUE, old_name);
- fprintf(stderr, "\n"
- "InnoDB: a FOREIGN KEY check is running.\n"
- "InnoDB: Cannot rename table.\n");
+ ib::error() << "In ALTER TABLE "
+ << ut_get_name(trx, old_name)
+ << " a FOREIGN KEY check is running. Cannot rename"
+ " table.";
err = DB_TABLE_IN_FK_CHECK;
goto funct_exit;
}
+ if (!table->is_temporary() && srv_safe_truncate) {
+ err = trx_undo_report_rename(trx, table);
+
+ if (err != DB_SUCCESS) {
+ goto funct_exit;
+ }
+ }
+
/* We use the private SQL parser of Innobase to generate the query
graphs needed in updating the dictionary data from system tables. */
@@ -5152,13 +4304,19 @@ row_rename_table_for_mysql(
"END;\n"
, FALSE, trx);
- /* SYS_TABLESPACES and SYS_DATAFILES track non-system tablespaces
- which have space IDs > 0. */
- if (err == DB_SUCCESS
- && table->space != TRX_SYS_SPACE
- && table->is_readable()) {
- /* Make a new pathname to update SYS_DATAFILES. */
- char* new_path = row_make_new_pathname(table, new_name);
+ /* SYS_TABLESPACES and SYS_DATAFILES need to be updated if
+ the table is in a single-table tablespace. */
+ if (err != DB_SUCCESS || !dict_table_is_file_per_table(table)) {
+ } else if (char* old_path = fil_space_get_first_path(table->space)) {
+ char* new_path = os_file_make_new_pathname(old_path, new_name);
+
+ /* If old path and new path are the same means tablename
+ has not changed and only the database name holding the table
+ has changed so we need to make the complete filepath again. */
+ if (!dict_tables_have_same_db(old_name, new_name)) {
+ ut_free(new_path);
+ new_path = fil_make_filepath(NULL, new_name, IBD, false);
+ }
info = pars_info_create();
@@ -5178,7 +4336,8 @@ row_rename_table_for_mysql(
"END;\n"
, FALSE, trx);
- mem_free(new_path);
+ ut_free(old_path);
+ ut_free(new_path);
}
if (err != DB_SUCCESS) {
goto end;
@@ -5328,40 +4487,31 @@ row_rename_table_for_mysql(
end:
if (err != DB_SUCCESS) {
if (err == DB_DUPLICATE_KEY) {
- ut_print_timestamp(stderr);
- fputs(" InnoDB: Error; possible reasons:\n"
- "InnoDB: 1) Table rename would cause"
- " two FOREIGN KEY constraints\n"
- "InnoDB: to have the same internal name"
- " in case-insensitive comparison.\n"
- "InnoDB: 2) table ", stderr);
- ut_print_name(stderr, trx, TRUE, new_name);
- fputs(" exists in the InnoDB internal data\n"
- "InnoDB: dictionary though MySQL is"
- " trying to rename table ", stderr);
- ut_print_name(stderr, trx, TRUE, old_name);
- fputs(" to it.\n"
- "InnoDB: Have you deleted the .frm file"
- " and not used DROP TABLE?\n"
- "InnoDB: You can look for further help from\n"
- "InnoDB: " REFMAN "innodb-troubleshooting.html\n"
- "InnoDB: If table ", stderr);
- ut_print_name(stderr, trx, TRUE, new_name);
- fputs(" is a temporary table #sql..., then"
- " it can be that\n"
- "InnoDB: there are still queries running"
- " on the table, and it will be\n"
- "InnoDB: dropped automatically when"
- " the queries end.\n"
- "InnoDB: You can drop the orphaned table"
- " inside InnoDB by\n"
- "InnoDB: creating an InnoDB table with"
- " the same name in another\n"
- "InnoDB: database and copying the .frm file"
- " to the current database.\n"
- "InnoDB: Then MySQL thinks the table exists,"
- " and DROP TABLE will\n"
- "InnoDB: succeed.\n", stderr);
+ ib::error() << "Possible reasons:";
+ ib::error() << "(1) Table rename would cause two"
+ " FOREIGN KEY constraints to have the same"
+ " internal name in case-insensitive"
+ " comparison.";
+ ib::error() << "(2) Table "
+ << ut_get_name(trx, new_name)
+ << " exists in the InnoDB internal data"
+ " dictionary though MySQL is trying to rename"
+ " table " << ut_get_name(trx, old_name)
+ << " to it. Have you deleted the .frm file and"
+ " not used DROP TABLE?";
+ ib::info() << TROUBLESHOOTING_MSG;
+ ib::error() << "If table "
+ << ut_get_name(trx, new_name)
+ << " is a temporary table #sql..., then"
+ " it can be that there are still queries"
+ " running on the table, and it will be dropped"
+ " automatically when the queries end. You can"
+ " drop the orphaned table inside InnoDB by"
+ " creating an InnoDB table with the same name"
+ " in another database and copying the .frm file"
+ " to the current database. Then MySQL thinks"
+ " the table exists, and DROP TABLE will"
+ " succeed.";
}
trx->error_state = DB_SUCCESS;
trx_rollback_to_savepoint(trx, NULL);
@@ -5379,38 +4529,37 @@ end:
goto funct_exit;
}
+ /* In case of copy alter, template db_name and
+ table_name should be renamed only for newly
+ created table. */
+ if (table->vc_templ != NULL && !new_is_tmp) {
+ innobase_rename_vc_templ(table);
+ }
+
/* We only want to switch off some of the type checking in
- an ALTER, not in a RENAME. */
+ an ALTER TABLE...ALGORITHM=COPY, not in a RENAME. */
+ dict_names_t fk_tables;
err = dict_load_foreigns(
new_name, NULL,
false, !old_is_tmp || trx->check_foreigns,
- DICT_ERR_IGNORE_NONE);
+ DICT_ERR_IGNORE_NONE, fk_tables);
if (err != DB_SUCCESS) {
- ut_print_timestamp(stderr);
if (old_is_tmp) {
- fputs(" InnoDB: Error: in ALTER TABLE ",
- stderr);
- ut_print_name(stderr, trx, TRUE, new_name);
- fputs("\n"
- "InnoDB: has or is referenced"
- " in foreign key constraints\n"
- "InnoDB: which are not compatible"
- " with the new table definition.\n",
- stderr);
+ ib::error() << "In ALTER TABLE "
+ << ut_get_name(trx, new_name)
+ << " has or is referenced in foreign"
+ " key constraints which are not"
+ " compatible with the new table"
+ " definition.";
} else {
- fputs(" InnoDB: Error: in RENAME TABLE"
- " table ",
- stderr);
- ut_print_name(stderr, trx, TRUE, new_name);
- fputs("\n"
- "InnoDB: is referenced in"
- " foreign key constraints\n"
- "InnoDB: which are not compatible"
- " with the new table definition.\n",
- stderr);
+ ib::error() << "In RENAME TABLE table "
+ << ut_get_name(trx, new_name)
+ << " is referenced in foreign key"
+ " constraints which are not compatible"
+ " with the new table definition.";
}
ut_a(DB_SUCCESS == dict_table_rename_in_cache(
@@ -5419,6 +4568,31 @@ end:
trx_rollback_to_savepoint(trx, NULL);
trx->error_state = DB_SUCCESS;
}
+
+ /* Check whether virtual column or stored column affects
+ the foreign key constraint of the table. */
+ if (dict_foreigns_has_s_base_col(
+ table->foreign_set, table)) {
+ err = DB_NO_FK_ON_S_BASE_COL;
+ ut_a(DB_SUCCESS == dict_table_rename_in_cache(
+ table, old_name, FALSE));
+ trx->error_state = DB_SUCCESS;
+ trx_rollback_to_savepoint(trx, NULL);
+ trx->error_state = DB_SUCCESS;
+ goto funct_exit;
+ }
+
+ /* Fill the virtual column set in foreign when
+ the table undergoes copy alter operation. */
+ dict_mem_table_free_foreign_vcol_set(table);
+ dict_mem_table_fill_foreign_vcol_set(table);
+
+ while (!fk_tables.empty()) {
+ dict_load_table(fk_tables.front(),
+ DICT_ERR_IGNORE_NONE);
+ fk_tables.pop_front();
+ }
+
table->data_dir_path= NULL;
}
@@ -5426,7 +4600,7 @@ funct_exit:
if (aux_fts_rename && err != DB_SUCCESS
&& table != NULL && (table->space != 0)) {
- char* orig_name = table->name;
+ char* orig_name = table->name.m_name;
trx_t* trx_bg = trx_allocate_for_background();
/* If the first fts_rename fails, the trx would
@@ -5445,9 +4619,9 @@ funct_exit:
in cache is not changed yet. If the reverting fails,
the ibd data may be left in the new database, which
can be fixed only manually. */
- table->name = const_cast<char*>(new_name);
+ table->name.m_name = const_cast<char*>(new_name);
fts_rename_aux_tables(table, old_name, trx_bg);
- table->name = orig_name;
+ table->name.m_name = orig_name;
trx_bg->dict_operation_lock_mode = 0;
trx_commit_for_mysql(trx_bg);
@@ -5459,6 +4633,7 @@ funct_exit:
}
if (commit) {
+ DEBUG_SYNC(trx->mysql_thd, "before_rename_table_commit");
trx_commit_for_mysql(trx);
}
@@ -5472,14 +4647,14 @@ funct_exit:
}
/*********************************************************************//**
-Checks that the index contains entries in an ascending order, unique
-constraint is not broken, and calculates the number of index entries
+Scans an index for either COUNT(*) or CHECK TABLE.
+If CHECK TABLE; Checks that the index contains entries in an ascending order,
+unique constraint is not broken, and calculates the number of index entries
in the read view of the current transaction.
-@return true if ok */
-UNIV_INTERN
-bool
-row_check_index_for_mysql(
-/*======================*/
+@return DB_SUCCESS or other error */
+dberr_t
+row_scan_index_for_mysql(
+/*=====================*/
row_prebuilt_t* prebuilt, /*!< in: prebuilt struct
in MySQL handle */
const dict_index_t* index, /*!< in: index */
@@ -5488,23 +4663,23 @@ row_check_index_for_mysql(
{
dtuple_t* prev_entry = NULL;
ulint matched_fields;
- ulint matched_bytes;
byte* buf;
- ulint ret;
+ dberr_t ret;
rec_t* rec;
- bool is_ok = true;
int cmp;
ibool contains_null;
ulint i;
ulint cnt;
mem_heap_t* heap = NULL;
- ulint n_ext;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- ulint* offsets;
+ offset_t offsets_[REC_OFFS_NORMAL_SIZE];
+ offset_t* offsets;
rec_offs_init(offsets_);
*n_rows = 0;
+ /* Don't support RTree Leaf level scan */
+ ut_ad(!dict_index_is_spatial(index));
+
if (dict_index_is_clust(index)) {
/* The clustered index of a table is always available.
During online ALTER TABLE that rebuilds the table, the
@@ -5518,10 +4693,11 @@ row_check_index_for_mysql(
/* Full Text index are implemented by auxiliary tables,
not the B-tree. We also skip secondary indexes that are
being created online. */
- return(true);
+ return(DB_SUCCESS);
}
- buf = static_cast<byte*>(mem_alloc(UNIV_PAGE_SIZE));
+ ulint bufsize = ut_max(UNIV_PAGE_SIZE, prebuilt->mysql_row_len);
+ buf = static_cast<byte*>(ut_malloc_nokey(bufsize));
heap = mem_heap_create(100);
cnt = 1000;
@@ -5531,6 +4707,7 @@ loop:
/* Check thd->killed every 1,000 scanned rows */
if (--cnt == 0) {
if (trx_is_interrupted(prebuilt->trx)) {
+ ret = DB_INTERRUPTED;
goto func_exit;
}
cnt = 1000;
@@ -5539,39 +4716,43 @@ loop:
switch (ret) {
case DB_SUCCESS:
break;
+ case DB_DEADLOCK:
+ case DB_LOCK_TABLE_FULL:
+ case DB_LOCK_WAIT_TIMEOUT:
+ case DB_INTERRUPTED:
+ goto func_exit;
default:
- ut_print_timestamp(stderr);
- fputs(" InnoDB: Warning: CHECK TABLE on ", stderr);
- dict_index_name_print(stderr, prebuilt->trx, index);
- fprintf(stderr, " returned %lu\n", ret);
+ ib::warn() << "CHECK TABLE on index " << index->name << " of"
+ " table " << index->table->name << " returned " << ret;
/* (this error is ignored by CHECK TABLE) */
/* fall through */
case DB_END_OF_INDEX:
+ ret = DB_SUCCESS;
func_exit:
- mem_free(buf);
+ ut_free(buf);
mem_heap_free(heap);
- return(is_ok);
+ return(ret);
}
*n_rows = *n_rows + 1;
+ /* else this code is doing handler::check() for CHECK TABLE */
+
/* row_search... returns the index record in buf, record origin offset
within buf stored in the first 4 bytes, because we have built a dummy
template */
rec = buf + mach_read_from_4(buf);
- offsets = rec_get_offsets(rec, index, offsets_,
+ offsets = rec_get_offsets(rec, index, offsets_, true,
ULINT_UNDEFINED, &heap);
if (prev_entry != NULL) {
matched_fields = 0;
- matched_bytes = 0;
cmp = cmp_dtuple_rec_with_match(prev_entry, rec, offsets,
- &matched_fields,
- &matched_bytes);
+ &matched_fields);
contains_null = FALSE;
/* In a unique secondary index we allow equal key values if
@@ -5588,27 +4769,25 @@ func_exit:
}
}
+ const char* msg;
+
if (cmp > 0) {
- fputs("InnoDB: index records in a wrong order in ",
- stderr);
+ ret = DB_INDEX_CORRUPT;
+ msg = "index records in a wrong order in ";
not_ok:
- dict_index_name_print(stderr,
- prebuilt->trx, index);
- fputs("\n"
- "InnoDB: prev record ", stderr);
- dtuple_print(stderr, prev_entry);
- fputs("\n"
- "InnoDB: record ", stderr);
- rec_print_new(stderr, rec, offsets);
- putc('\n', stderr);
- is_ok = false;
+ ib::error()
+ << msg << index->name
+ << " of table " << index->table->name
+ << ": " << *prev_entry << ", "
+ << rec_offsets_print(rec, offsets);
+ /* Continue reading */
} else if (dict_index_is_unique(index)
&& !contains_null
&& matched_fields
>= dict_index_get_n_ordering_defined_by_user(
index)) {
-
- fputs("InnoDB: duplicate key in ", stderr);
+ ret = DB_DUPLICATE_KEY;
+ msg = "duplicate key in ";
goto not_ok;
}
}
@@ -5625,77 +4804,51 @@ not_ok:
tmp_heap = mem_heap_create(size);
- offsets = static_cast<ulint*>(
+ offsets = static_cast<offset_t*>(
mem_heap_dup(tmp_heap, offsets, size));
}
mem_heap_empty(heap);
prev_entry = row_rec_to_index_entry(
- rec, index, offsets, &n_ext, heap);
+ rec, index, offsets, heap);
if (UNIV_LIKELY_NULL(tmp_heap)) {
mem_heap_free(tmp_heap);
}
}
- ret = row_search_for_mysql(buf, PAGE_CUR_G, prebuilt, 0, ROW_SEL_NEXT);
+ ret = row_search_for_mysql(
+ buf, PAGE_CUR_G, prebuilt, 0, ROW_SEL_NEXT);
goto loop;
}
/*********************************************************************//**
-Determines if a table is a magic monitor table.
-@return true if monitor table */
-UNIV_INTERN
-bool
-row_is_magic_monitor_table(
-/*=======================*/
- const char* table_name) /*!< in: name of the table, in the
- form database/table_name */
-{
- const char* name; /* table_name without database/ */
- ulint len;
-
- name = dict_remove_db_name(table_name);
- len = strlen(name) + 1;
-
- return(STR_EQ(name, len, S_innodb_monitor)
- || STR_EQ(name, len, S_innodb_lock_monitor)
- || STR_EQ(name, len, S_innodb_tablespace_monitor)
- || STR_EQ(name, len, S_innodb_table_monitor)
-#ifdef UNIV_MEM_DEBUG
- || STR_EQ(name, len, S_innodb_mem_validate)
-#endif /* UNIV_MEM_DEBUG */
- );
-}
-
-/*********************************************************************//**
Initialize this module */
-UNIV_INTERN
void
row_mysql_init(void)
/*================*/
{
- mutex_create(
- row_drop_list_mutex_key,
- &row_drop_list_mutex, SYNC_NO_ORDER_CHECK);
+ mutex_create(LATCH_ID_ROW_DROP_LIST, &row_drop_list_mutex);
- UT_LIST_INIT(row_mysql_drop_list);
+ UT_LIST_INIT(
+ row_mysql_drop_list,
+ &row_mysql_drop_t::row_mysql_drop_list);
row_mysql_drop_list_inited = TRUE;
}
/*********************************************************************//**
Close this module */
-UNIV_INTERN
void
row_mysql_close(void)
/*================*/
{
ut_a(UT_LIST_GET_LEN(row_mysql_drop_list) == 0);
- mutex_free(&row_drop_list_mutex);
-
- row_mysql_drop_list_inited = FALSE;
+ if (row_mysql_drop_list_inited) {
+ mutex_free(&row_drop_list_mutex);
+ row_mysql_drop_list_inited = FALSE;
+ }
}
diff --git a/storage/innobase/row/row0purge.cc b/storage/innobase/row/row0purge.cc
index fccfe651ea7..b13bf47ce95 100644
--- a/storage/innobase/row/row0purge.cc
+++ b/storage/innobase/row/row0purge.cc
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1997, 2016, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1997, 2017, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2017, 2019, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
@@ -25,13 +25,9 @@ Created 3/14/1997 Heikki Tuuri
*******************************************************/
#include "row0purge.h"
-
-#ifdef UNIV_NONINL
-#include "row0purge.ic"
-#endif
-
#include "fsp0fsp.h"
#include "mach0data.h"
+#include "dict0stats.h"
#include "trx0rseg.h"
#include "trx0trx.h"
#include "trx0roll.h"
@@ -47,6 +43,10 @@ Created 3/14/1997 Heikki Tuuri
#include "log0log.h"
#include "srv0mon.h"
#include "srv0start.h"
+#include "handler.h"
+#include "ha_innodb.h"
+#include "fil0fil.h"
+#include "debug_sync.h"
/*************************************************************************
IMPORTANT NOTE: Any operation that generates redo MUST check that there
@@ -58,36 +58,10 @@ check.
If you make a change in this module make sure that no codepath is
introduced where a call to log_free_check() is bypassed. */
-/********************************************************************//**
-Creates a purge node to a query graph.
-@return own: purge node */
-UNIV_INTERN
-purge_node_t*
-row_purge_node_create(
-/*==================*/
- que_thr_t* parent, /*!< in: parent node */
- mem_heap_t* heap) /*!< in: memory heap where created */
-{
- purge_node_t* node;
-
- ut_ad(parent != NULL);
- ut_ad(heap != NULL);
-
- node = static_cast<purge_node_t*>(
- mem_heap_zalloc(heap, sizeof(*node)));
-
- node->common.type = QUE_NODE_PURGE;
- node->common.parent = parent;
- node->done = TRUE;
- node->heap = mem_heap_create(256);
-
- return(node);
-}
-
/***********************************************************//**
Repositions the pcur in the purge node on the clustered index record,
if found. If the record is not found, close pcur.
-@return TRUE if the record was found */
+@return TRUE if the record was found */
static
ibool
row_purge_reposition_pcur(
@@ -134,18 +108,18 @@ row_purge_remove_clust_if_poss_low(
mtr_t mtr;
rec_t* rec;
mem_heap_t* heap = NULL;
- ulint* offsets;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
+ offset_t* offsets;
+ offset_t offsets_[REC_OFFS_NORMAL_SIZE];
rec_offs_init(offsets_);
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_SHARED));
-#endif /* UNIV_SYNC_DEBUG */
+ ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_S)
+ || node->vcol_info.is_used());
index = dict_table_get_first_index(node->table);
log_free_check();
mtr_start(&mtr);
+ mtr.set_named_space(index->space);
if (!row_purge_reposition_pcur(mode, node, &mtr)) {
/* The record was already removed. */
@@ -155,22 +129,27 @@ row_purge_remove_clust_if_poss_low(
rec = btr_pcur_get_rec(&node->pcur);
offsets = rec_get_offsets(
- rec, index, offsets_, ULINT_UNDEFINED, &heap);
+ rec, index, offsets_, true, ULINT_UNDEFINED, &heap);
if (node->roll_ptr != row_get_rec_roll_ptr(rec, index, offsets)) {
/* Someone else has modified the record later: do not remove */
goto func_exit;
}
+ ut_ad(rec_get_deleted_flag(rec, rec_offs_comp(offsets)));
+ /* In delete-marked records, DB_TRX_ID must
+ always refer to an existing undo log record. */
+ ut_ad(row_get_rec_trx_id(rec, index, offsets));
+
if (mode == BTR_MODIFY_LEAF) {
success = btr_cur_optimistic_delete(
btr_pcur_get_btr_cur(&node->pcur), 0, &mtr);
} else {
dberr_t err;
- ut_ad(mode == BTR_MODIFY_TREE);
+ ut_ad(mode == (BTR_MODIFY_TREE | BTR_LATCH_FOR_DELETE));
btr_cur_pessimistic_delete(
&err, FALSE, btr_pcur_get_btr_cur(&node->pcur), 0,
- RB_NONE, &mtr);
+ false, &mtr);
switch (err) {
case DB_SUCCESS:
@@ -218,7 +197,7 @@ row_purge_remove_clust_if_poss(
n_tries < BTR_CUR_RETRY_DELETE_N_TIMES;
n_tries++) {
if (row_purge_remove_clust_if_poss_low(
- node, BTR_MODIFY_TREE)) {
+ node, BTR_MODIFY_TREE | BTR_LATCH_FOR_DELETE)) {
return(true);
}
@@ -228,8 +207,55 @@ row_purge_remove_clust_if_poss(
return(false);
}
-/***********************************************************//**
-Determines if it is possible to remove a secondary index entry.
+/** Tries to store secondary index cursor before openin mysql table for
+virtual index condition computation.
+@param[in,out] node row purge node
+@param[in] index secondary index
+@param[in,out] sec_pcur secondary index cursor
+@param[in,out] sec_mtr mini-transaction which holds
+ secondary index entry */
+static void row_purge_store_vsec_cur(
+ purge_node_t* node,
+ dict_index_t* index,
+ btr_pcur_t* sec_pcur,
+ mtr_t* sec_mtr)
+{
+ row_purge_reposition_pcur(BTR_SEARCH_LEAF, node, sec_mtr);
+
+ if (!node->found_clust) {
+ return;
+ }
+
+ node->vcol_info.set_requested();
+
+ btr_pcur_store_position(sec_pcur, sec_mtr);
+
+ btr_pcurs_commit_specify_mtr(&node->pcur, sec_pcur, sec_mtr);
+}
+
+/** Tries to restore secondary index cursor after opening the mysql table
+@param[in,out] node row purge node
+@param[in] index secondary index
+@param[in,out] sec_mtr mini-transaction which holds secondary index entry
+@param[in] is_tree true=pessimistic purge,
+ false=optimistic (leaf-page only)
+@return false in case of restore failure. */
+static bool row_purge_restore_vsec_cur(
+ purge_node_t* node,
+ dict_index_t* index,
+ btr_pcur_t* sec_pcur,
+ mtr_t* sec_mtr,
+ bool is_tree)
+{
+ sec_mtr->start();
+ sec_mtr->set_named_space(index->space);
+
+ return btr_pcur_restore_position(
+ is_tree ? BTR_PURGE_TREE : BTR_PURGE_LEAF,
+ sec_pcur, sec_mtr);
+}
+
+/** Determines if it is possible to remove a secondary index entry.
Removal is possible if the secondary index entry does not refer to any
not delete marked version of a clustered index record where DB_TRX_ID
is newer than the purge view.
@@ -242,40 +268,108 @@ inserts a record that the secondary index entry would refer to.
However, in that case, the user transaction would also re-insert the
secondary index entry after purge has removed it and released the leaf
page latch.
-@return true if the secondary index record can be purged */
-UNIV_INTERN
+@param[in,out] node row purge node
+@param[in] index secondary index
+@param[in] entry secondary index entry
+@param[in,out] sec_pcur secondary index cursor or NULL
+ if it is called for purge buffering
+ operation.
+@param[in,out] sec_mtr mini-transaction which holds
+ secondary index entry or NULL if it is
+ called for purge buffering operation.
+@param[in] is_tree true=pessimistic purge,
+ false=optimistic (leaf-page only)
+@return true if the secondary index record can be purged */
bool
row_purge_poss_sec(
-/*===============*/
- purge_node_t* node, /*!< in/out: row purge node */
- dict_index_t* index, /*!< in: secondary index */
- const dtuple_t* entry) /*!< in: secondary index entry */
+ purge_node_t* node,
+ dict_index_t* index,
+ const dtuple_t* entry,
+ btr_pcur_t* sec_pcur,
+ mtr_t* sec_mtr,
+ bool is_tree)
{
bool can_delete;
mtr_t mtr;
ut_ad(!dict_index_is_clust(index));
+
+ const bool store_cur = sec_mtr && !node->vcol_info.is_used()
+ && dict_index_has_virtual(index);
+
+ if (store_cur) {
+ row_purge_store_vsec_cur(node, index, sec_pcur, sec_mtr);
+ ut_ad(sec_mtr->has_committed()
+ == node->vcol_info.is_requested());
+
+ /* The PRIMARY KEY value was not found in the clustered
+ index. The secondary index record found. We can purge
+ the secondary index record. */
+ if (!node->vcol_info.is_requested()) {
+ ut_ad(!node->found_clust);
+ return true;
+ }
+ }
+
+retry_purge_sec:
mtr_start(&mtr);
can_delete = !row_purge_reposition_pcur(BTR_SEARCH_LEAF, node, &mtr)
- || !row_vers_old_has_index_entry(TRUE,
+ || !row_vers_old_has_index_entry(true,
btr_pcur_get_rec(&node->pcur),
- &mtr, index, entry);
+ &mtr, index, entry,
+ node->roll_ptr, node->trx_id,
+ &node->vcol_info);
+
+ if (node->vcol_info.is_first_fetch()) {
+ ut_ad(store_cur);
+
+ const TABLE* t= node->vcol_info.table();
+ DBUG_LOG("purge", "retry " << t
+ << (is_tree ? " tree" : " leaf")
+ << index->name << "," << index->table->name
+ << ": " << rec_printer(entry).str());
+
+ ut_ad(mtr.has_committed());
+
+ if (t) {
+ node->vcol_info.set_used();
+ goto retry_purge_sec;
+ }
+
+ node->table = NULL;
+ sec_pcur = NULL;
+ return false;
+ }
/* Persistent cursor is closed if reposition fails. */
if (node->found_clust) {
btr_pcur_commit_specify_mtr(&node->pcur, &mtr);
} else {
- mtr_commit(&mtr);
+ mtr.commit();
+ }
+
+ ut_ad(mtr.has_committed());
+
+ /* If the virtual column info is not used then reset the virtual column
+ info. */
+ if (node->vcol_info.is_requested()
+ && !node->vcol_info.is_used()) {
+ node->vcol_info.reset();
+ }
+
+ if (store_cur && !row_purge_restore_vsec_cur(
+ node, index, sec_pcur, sec_mtr, is_tree)) {
+ return false;
}
- return(can_delete);
+ return can_delete;
}
/***************************************************************
Removes a secondary index entry if possible, by modifying the
index tree. Does not try to buffer the delete.
-@return TRUE if success or if not found */
+@return TRUE if success or if not found */
static MY_ATTRIBUTE((nonnull, warn_unused_result))
ibool
row_purge_remove_sec_if_poss_tree(
@@ -285,7 +379,6 @@ row_purge_remove_sec_if_poss_tree(
const dtuple_t* entry) /*!< in: index entry */
{
btr_pcur_t pcur;
- btr_cur_t* btr_cur;
ibool success = TRUE;
dberr_t err;
mtr_t mtr;
@@ -293,13 +386,13 @@ row_purge_remove_sec_if_poss_tree(
log_free_check();
mtr_start(&mtr);
+ mtr.set_named_space(index->space);
- if (*index->name == TEMP_INDEX_PREFIX) {
- /* The index->online_status may change if the
- index->name starts with TEMP_INDEX_PREFIX (meaning
- that the index is or was being created online). It is
- protected by index->lock. */
- mtr_x_lock(dict_index_get_lock(index), &mtr);
+ if (!index->is_committed()) {
+ /* The index->online_status may change if the index is
+ or was being created online, but not committed yet. It
+ is protected by index->lock. */
+ mtr_sx_lock(dict_index_get_lock(index), &mtr);
if (dict_index_is_online_ddl(index)) {
/* Online secondary index creation will not
@@ -311,13 +404,15 @@ row_purge_remove_sec_if_poss_tree(
}
} else {
/* For secondary indexes,
- index->online_status==ONLINE_INDEX_CREATION unless
- index->name starts with TEMP_INDEX_PREFIX. */
+ index->online_status==ONLINE_INDEX_COMPLETE if
+ index->is_committed(). */
ut_ad(!dict_index_is_online_ddl(index));
}
- search_result = row_search_index_entry(index, entry, BTR_MODIFY_TREE,
- &pcur, &mtr);
+ search_result = row_search_index_entry(
+ index, entry,
+ BTR_MODIFY_TREE | BTR_LATCH_FOR_DELETE,
+ &pcur, &mtr);
switch (search_result) {
case ROW_NOT_FOUND:
@@ -344,36 +439,35 @@ row_purge_remove_sec_if_poss_tree(
ut_error;
}
- btr_cur = btr_pcur_get_btr_cur(&pcur);
-
/* We should remove the index record if no later version of the row,
which cannot be purged yet, requires its existence. If some requires,
we should do nothing. */
- if (row_purge_poss_sec(node, index, entry)) {
+ if (row_purge_poss_sec(node, index, entry, &pcur, &mtr, true)) {
+
/* Remove the index record, which should have been
marked for deletion. */
- if (!rec_get_deleted_flag(btr_cur_get_rec(btr_cur),
+ if (!rec_get_deleted_flag(btr_cur_get_rec(
+ btr_pcur_get_btr_cur(&pcur)),
dict_table_is_comp(index->table))) {
- fputs("InnoDB: tried to purge sec index entry not"
- " marked for deletion in\n"
- "InnoDB: ", stderr);
- dict_index_name_print(stderr, NULL, index);
- fputs("\n"
- "InnoDB: tuple ", stderr);
- dtuple_print(stderr, entry);
- fputs("\n"
- "InnoDB: record ", stderr);
- rec_print(stderr, btr_cur_get_rec(btr_cur), index);
- putc('\n', stderr);
+ ib::error()
+ << "tried to purge non-delete-marked record"
+ " in index " << index->name
+ << " of table " << index->table->name
+ << ": tuple: " << *entry
+ << ", record: " << rec_index_print(
+ btr_cur_get_rec(
+ btr_pcur_get_btr_cur(&pcur)),
+ index);
ut_ad(0);
goto func_exit;
}
- btr_cur_pessimistic_delete(&err, FALSE, btr_cur, 0,
- RB_NONE, &mtr);
+ btr_cur_pessimistic_delete(&err, FALSE,
+ btr_pcur_get_btr_cur(&pcur),
+ 0, false, &mtr);
switch (UNIV_EXPECT(err, DB_SUCCESS)) {
case DB_SUCCESS:
break;
@@ -385,6 +479,13 @@ row_purge_remove_sec_if_poss_tree(
}
}
+ if (node->vcol_op_failed()) {
+ ut_ad(mtr.has_committed());
+ ut_ad(!pcur.old_rec_buf);
+ ut_ad(pcur.pos_state == BTR_PCUR_NOT_POSITIONED);
+ return false;
+ }
+
func_exit:
btr_pcur_close(&pcur);
func_exit_no_pcur:
@@ -396,8 +497,8 @@ func_exit_no_pcur:
/***************************************************************
Removes a secondary index entry without modifying the index tree,
if possible.
-@retval true if success or if not found
-@retval false if row_purge_remove_sec_if_poss_tree() should be invoked */
+@retval true if success or if not found
+@retval false if row_purge_remove_sec_if_poss_tree() should be invoked */
static MY_ATTRIBUTE((nonnull, warn_unused_result))
bool
row_purge_remove_sec_if_poss_leaf(
@@ -408,19 +509,25 @@ row_purge_remove_sec_if_poss_leaf(
{
mtr_t mtr;
btr_pcur_t pcur;
- ulint mode;
+ enum btr_latch_mode mode;
enum row_search_result search_result;
bool success = true;
log_free_check();
-
+ ut_ad(index->table == node->table);
+ ut_ad(!dict_table_is_temporary(index->table));
mtr_start(&mtr);
+ mtr.set_named_space(index->space);
+
+ if (!index->is_committed()) {
+ /* For uncommitted spatial index, we also skip the purge. */
+ if (dict_index_is_spatial(index)) {
+ goto func_exit_no_pcur;
+ }
- if (*index->name == TEMP_INDEX_PREFIX) {
- /* The index->online_status may change if the
- index->name starts with TEMP_INDEX_PREFIX (meaning
- that the index is or was being created online). It is
- protected by index->lock. */
+ /* The index->online_status may change if the the
+ index is or was being created online, but not
+ committed yet. It is protected by index->lock. */
mtr_s_lock(dict_index_get_lock(index), &mtr);
if (dict_index_is_online_ddl(index)) {
@@ -432,30 +539,45 @@ row_purge_remove_sec_if_poss_leaf(
goto func_exit_no_pcur;
}
- mode = BTR_MODIFY_LEAF | BTR_ALREADY_S_LATCHED | BTR_DELETE;
+ mode = BTR_PURGE_LEAF_ALREADY_S_LATCHED;
} else {
/* For secondary indexes,
- index->online_status==ONLINE_INDEX_CREATION unless
- index->name starts with TEMP_INDEX_PREFIX. */
+ index->online_status==ONLINE_INDEX_COMPLETE if
+ index->is_committed(). */
ut_ad(!dict_index_is_online_ddl(index));
- mode = BTR_MODIFY_LEAF | BTR_DELETE;
+ /* Change buffering is disabled for spatial index and
+ virtual index. */
+ mode = (dict_index_is_spatial(index)
+ || dict_index_has_virtual(index))
+ ? BTR_MODIFY_LEAF
+ : BTR_PURGE_LEAF;
}
/* Set the purge node for the call to row_purge_poss_sec(). */
pcur.btr_cur.purge_node = node;
- /* Set the query thread, so that ibuf_insert_low() will be
- able to invoke thd_get_trx(). */
- pcur.btr_cur.thr = static_cast<que_thr_t*>(que_node_get_parent(node));
+ if (dict_index_is_spatial(index)) {
+ rw_lock_sx_lock(dict_index_get_lock(index));
+ pcur.btr_cur.thr = NULL;
+ } else {
+ /* Set the query thread, so that ibuf_insert_low() will be
+ able to invoke thd_get_trx(). */
+ pcur.btr_cur.thr = static_cast<que_thr_t*>(
+ que_node_get_parent(node));
+ }
search_result = row_search_index_entry(
index, entry, mode, &pcur, &mtr);
+ if (dict_index_is_spatial(index)) {
+ rw_lock_sx_unlock(dict_index_get_lock(index));
+ }
+
switch (search_result) {
case ROW_FOUND:
/* Before attempting to purge a record, check
if it is safe to do so. */
- if (row_purge_poss_sec(node, index, entry)) {
+ if (row_purge_poss_sec(node, index, entry, &pcur, &mtr, false)) {
btr_cur_t* btr_cur = btr_pcur_get_btr_cur(&pcur);
/* Only delete-marked records should be purged. */
@@ -463,19 +585,15 @@ row_purge_remove_sec_if_poss_leaf(
btr_cur_get_rec(btr_cur),
dict_table_is_comp(index->table))) {
- fputs("InnoDB: tried to purge sec index"
- " entry not marked for deletion in\n"
- "InnoDB: ", stderr);
- dict_index_name_print(stderr, NULL, index);
- fputs("\n"
- "InnoDB: tuple ", stderr);
- dtuple_print(stderr, entry);
- fputs("\n"
- "InnoDB: record ", stderr);
- rec_print(stderr, btr_cur_get_rec(btr_cur),
- index);
- putc('\n', stderr);
-
+ ib::error()
+ << "tried to purge non-delete-marked"
+ " record" " in index " << index->name
+ << " of table " << index->table->name
+ << ": tuple: " << *entry
+ << ", record: "
+ << rec_index_print(
+ btr_cur_get_rec(btr_cur),
+ index);
ut_ad(0);
btr_pcur_close(&pcur);
@@ -483,12 +601,54 @@ row_purge_remove_sec_if_poss_leaf(
goto func_exit_no_pcur;
}
+ if (dict_index_is_spatial(index)) {
+ const page_t* page;
+ const trx_t* trx = NULL;
+
+ if (btr_cur->rtr_info != NULL
+ && btr_cur->rtr_info->thr != NULL) {
+ trx = thr_get_trx(
+ btr_cur->rtr_info->thr);
+ }
+
+ page = btr_cur_get_page(btr_cur);
+
+ if (!lock_test_prdt_page_lock(
+ trx,
+ page_get_space_id(page),
+ page_get_page_no(page))
+ && page_get_n_recs(page) < 2
+ && btr_cur_get_block(btr_cur)
+ ->page.id.page_no() !=
+ dict_index_get_page(index)) {
+ /* this is the last record on page,
+ and it has a "page" lock on it,
+ which mean search is still depending
+ on it, so do not delete */
+ DBUG_LOG("purge",
+ "skip purging last"
+ " record on page "
+ << btr_cur_get_block(btr_cur)
+ ->page.id);
+
+ btr_pcur_close(&pcur);
+ mtr_commit(&mtr);
+ return(success);
+ }
+ }
+
if (!btr_cur_optimistic_delete(btr_cur, 0, &mtr)) {
/* The index entry could not be deleted. */
success = false;
}
}
+
+ if (node->vcol_op_failed()) {
+ btr_pcur_close(&pcur);
+ return false;
+ }
+
/* (The index entry is still needed,
or the deletion succeeded) */
/* fall through */
@@ -499,13 +659,13 @@ row_purge_remove_sec_if_poss_leaf(
case ROW_NOT_FOUND:
/* The index entry does not exist, nothing to do. */
btr_pcur_close(&pcur);
- func_exit_no_pcur:
+func_exit_no_pcur:
mtr_commit(&mtr);
return(success);
}
ut_error;
- return(FALSE);
+ return(false);
}
/***********************************************************//**
@@ -535,6 +695,10 @@ row_purge_remove_sec_if_poss(
return;
}
retry:
+ if (node->vcol_op_failed()) {
+ return;
+ }
+
success = row_purge_remove_sec_if_poss_tree(node, index, entry);
/* The delete operation may fail if we have little
file space left: TODO: easiest to crash the database
@@ -552,6 +716,25 @@ retry:
ut_a(success);
}
+/** Skip uncommitted virtual indexes on newly added virtual column.
+@param[in,out] index dict index object */
+static
+inline
+void
+row_purge_skip_uncommitted_virtual_index(
+ dict_index_t*& index)
+{
+ /* We need to skip virtual indexes which is not
+ committed yet. It's safe because these indexes are
+ newly created by alter table, and because we do
+ not support LOCK=NONE when adding an index on newly
+ added virtual column.*/
+ while (index != NULL && dict_index_has_virtual(index)
+ && !index->is_committed() && index->has_new_v_col) {
+ index = dict_table_get_next_index(index);
+ }
+}
+
/***********************************************************//**
Purges a delete marking of a record.
@retval true if the row was not found, or it was successfully removed
@@ -571,14 +754,23 @@ row_purge_del_mark(
/* skip corrupted secondary index */
dict_table_skip_corrupt_index(node->index);
+ row_purge_skip_uncommitted_virtual_index(node->index);
+
if (!node->index) {
break;
}
if (node->index->type != DICT_FTS) {
dtuple_t* entry = row_build_index_entry_low(
- node->row, NULL, node->index, heap);
+ node->row, NULL, node->index,
+ heap, ROW_BUILD_FOR_PURGE);
row_purge_remove_sec_if_poss(node, node->index, entry);
+
+ if (node->vcol_op_failed()) {
+ mem_heap_free(heap);
+ return false;
+ }
+
mem_heap_empty(heap);
}
@@ -605,9 +797,9 @@ row_purge_upd_exist_or_extern_func(
{
mem_heap_t* heap;
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_SHARED));
-#endif /* UNIV_SYNC_DEBUG */
+ ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_S)
+ || node->vcol_info.is_used());
+ ut_ad(!node->table->skip_alter_undo);
if (node->rec_type == TRX_UNDO_UPD_DEL_REC
|| (node->cmpl_info & UPD_NODE_NO_ORD_CHANGE)) {
@@ -620,6 +812,8 @@ row_purge_upd_exist_or_extern_func(
while (node->index != NULL) {
dict_table_skip_corrupt_index(node->index);
+ row_purge_skip_uncommitted_virtual_index(node->index);
+
if (!node->index) {
break;
}
@@ -628,8 +822,17 @@ row_purge_upd_exist_or_extern_func(
thr, NULL, NULL)) {
/* Build the older version of the index entry */
dtuple_t* entry = row_build_index_entry_low(
- node->row, NULL, node->index, heap);
+ node->row, NULL, node->index,
+ heap, ROW_BUILD_FOR_PURGE);
row_purge_remove_sec_if_poss(node, node->index, entry);
+
+ if (node->vcol_op_failed()) {
+ ut_ad(!node->table);
+ mem_heap_free(heap);
+ return;
+ }
+ ut_ad(node->table);
+
mem_heap_empty(heap);
}
@@ -674,17 +877,21 @@ skip_secondaries:
&is_insert, &rseg_id,
&page_no, &offset);
- rseg = trx_sys_get_nth_rseg(trx_sys, rseg_id);
+ rseg = trx_sys->rseg_array[rseg_id];
+
ut_a(rseg != NULL);
- ut_a(rseg->id == rseg_id);
+ ut_ad(rseg->id == rseg_id);
+ ut_ad(rseg->is_persistent());
mtr_start(&mtr);
- /* We have to acquire an X-latch to the clustered
- index tree */
+ /* We have to acquire an SX-latch to the clustered
+ index tree (exclude other tree changes) */
index = dict_table_get_first_index(node->table);
- mtr_x_lock(dict_index_get_lock(index), &mtr);
+ mtr_sx_lock(dict_index_get_lock(index), &mtr);
+
+ mtr.set_named_space(index->space);
/* NOTE: we must also acquire an X-latch to the
root page of the tree. We will need it when we
@@ -698,7 +905,8 @@ skip_secondaries:
btr_root_get(index, &mtr);
block = buf_page_get(
- rseg->space, 0, page_no, RW_X_LATCH, &mtr);
+ page_id_t(rseg->space, page_no),
+ univ_page_size, RW_X_LATCH, &mtr);
buf_block_dbg_add_level(block, SYNC_TRX_UNDO_PAGE);
@@ -711,7 +919,7 @@ skip_secondaries:
index,
data_field + dfield_get_len(&ufield->new_val)
- BTR_EXTERN_FIELD_REF_SIZE,
- NULL, NULL, NULL, 0, RB_NONE, &mtr);
+ NULL, NULL, NULL, 0, false, &mtr);
mtr_commit(&mtr);
}
}
@@ -740,10 +948,8 @@ row_purge_parse_undo_rec(
{
dict_index_t* clust_index;
byte* ptr;
- trx_t* trx;
undo_no_t undo_no;
table_id_t table_id;
- trx_id_t trx_id;
roll_ptr_t roll_ptr;
ulint info_bits;
ulint type;
@@ -758,47 +964,80 @@ row_purge_parse_undo_rec(
node->rec_type = type;
if (type == TRX_UNDO_UPD_DEL_REC && !*updated_extern) {
-
- return(false);
+skip:
+ node->table = NULL;
+ return false;
}
- ptr = trx_undo_update_rec_get_sys_cols(ptr, &trx_id, &roll_ptr,
+ ptr = trx_undo_update_rec_get_sys_cols(ptr, &node->trx_id, &roll_ptr,
&info_bits);
- node->table = NULL;
+
+ if (node->is_skipped(table_id)) {
+ goto skip;
+ }
/* Prevent DROP TABLE etc. from running when we are doing the purge
for this row */
+try_again:
rw_lock_s_lock_inline(&dict_operation_lock, 0, __FILE__, __LINE__);
node->table = dict_table_open_on_id(
table_id, FALSE, DICT_TABLE_OP_NORMAL);
+ trx_id_t trx_id = TRX_ID_MAX;
+
if (node->table == NULL) {
/* The table has been dropped: no need to do purge */
goto err_exit;
}
- if (node->table->file_unreadable) {
- /* We skip purge of missing .ibd files */
+ ut_ad(!dict_table_is_temporary(node->table));
- dict_table_close(node->table, FALSE, FALSE);
+ if (!fil_table_accessible(node->table)) {
+ goto inaccessible;
+ }
- node->table = NULL;
+ if (node->table->n_v_cols && !node->table->vc_templ
+ && dict_table_has_indexed_v_cols(node->table)) {
+ /* Need server fully up for virtual column computation */
+ if (!mysqld_server_started) {
- goto err_exit;
+ dict_table_close(node->table, FALSE, FALSE);
+ rw_lock_s_unlock(&dict_operation_lock);
+ if (srv_shutdown_state != SRV_SHUTDOWN_NONE) {
+ return(false);
+ }
+ os_thread_sleep(1000000);
+ goto try_again;
+ }
+
+ node->vcol_info.set_requested();
+ node->vcol_info.set_used();
+ node->vcol_info.set_table(innobase_init_vc_templ(node->table));
+ node->vcol_info.set_used();
}
clust_index = dict_table_get_first_index(node->table);
- if (clust_index == NULL) {
+ if (!clust_index || clust_index->is_corrupted()) {
/* The table was corrupt in the data dictionary.
dict_set_corrupted() works on an index, and
we do not have an index to call it with. */
+inaccessible:
+ DBUG_ASSERT(table_id == node->table->id);
+ trx_id = node->table->def_trx_id;
+ if (!trx_id) {
+ trx_id = TRX_ID_MAX;
+ }
close_exit:
dict_table_close(node->table, FALSE, FALSE);
+ node->table = NULL;
err_exit:
rw_lock_s_unlock(&dict_operation_lock);
+ if (table_id) {
+ node->skip(table_id, trx_id);
+ }
return(false);
}
@@ -807,16 +1046,16 @@ err_exit:
&& !*updated_extern) {
/* Purge requires no changes to indexes: we may return */
+ table_id = 0;
goto close_exit;
}
ptr = trx_undo_rec_get_row_ref(ptr, clust_index, &(node->ref),
node->heap);
- trx = thr_get_trx(thr);
-
- ptr = trx_undo_update_rec_get_update(ptr, clust_index, type, trx_id,
- roll_ptr, info_bits, trx,
+ ptr = trx_undo_update_rec_get_update(ptr, clust_index, type,
+ node->trx_id,
+ roll_ptr, info_bits,
node->heap, &(node->update));
/* Read to the partial row the fields that occur in indexes */
@@ -840,9 +1079,9 @@ row_purge_record_func(
/*==================*/
purge_node_t* node, /*!< in: row purge node */
trx_undo_rec_t* undo_rec, /*!< in: record to purge */
-#ifdef UNIV_DEBUG
+#if defined UNIV_DEBUG || defined WITH_WSREP
const que_thr_t*thr, /*!< in: query thread */
-#endif /* UNIV_DEBUG */
+#endif /* UNIV_DEBUG || WITH_WSREP */
bool updated_extern) /*!< in: whether external columns
were updated */
{
@@ -850,18 +1089,25 @@ row_purge_record_func(
bool purged = true;
ut_ad(!node->found_clust);
+ ut_ad(!node->table->skip_alter_undo);
clust_index = dict_table_get_first_index(node->table);
node->index = dict_table_get_next_index(clust_index);
+ ut_ad(!trx_undo_roll_ptr_is_insert(node->roll_ptr));
switch (node->rec_type) {
case TRX_UNDO_DEL_MARK_REC:
purged = row_purge_del_mark(node);
- if (!purged) {
- break;
+ if (purged) {
+ if (node->table->stat_initialized
+ && srv_stats_include_delete_marked) {
+ dict_stats_update_if_needed(
+ node->table,
+ thr->graph->trx->mysql_thd);
+ }
+ MONITOR_INC(MONITOR_N_DEL_ROW_PURGE);
}
- MONITOR_INC(MONITOR_N_DEL_ROW_PURGE);
break;
default:
if (!updated_extern) {
@@ -887,13 +1133,13 @@ row_purge_record_func(
return(purged);
}
-#ifdef UNIV_DEBUG
+#if defined UNIV_DEBUG || defined WITH_WSREP
# define row_purge_record(node,undo_rec,thr,updated_extern) \
row_purge_record_func(node,undo_rec,thr,updated_extern)
-#else /* UNIV_DEBUG */
+#else /* UNIV_DEBUG || WITH_WSREP */
# define row_purge_record(node,undo_rec,thr,updated_extern) \
row_purge_record_func(node,undo_rec,updated_extern)
-#endif /* UNIV_DEBUG */
+#endif /* UNIV_DEBUG || WITH_WSREP */
/***********************************************************//**
Fetches an undo log record and does the purge for the recorded operation.
@@ -916,10 +1162,15 @@ row_purge(
bool purged = row_purge_record(
node, undo_rec, thr, updated_extern);
- rw_lock_s_unlock(&dict_operation_lock);
+ if (!node->vcol_info.is_used()) {
+ rw_lock_s_unlock(&dict_operation_lock);
+ }
+
+ ut_ad(!rw_lock_own(&dict_operation_lock, RW_LOCK_S));
if (purged
- || srv_shutdown_state != SRV_SHUTDOWN_NONE) {
+ || srv_shutdown_state != SRV_SHUTDOWN_NONE
+ || node->vcol_op_failed()) {
return;
}
@@ -937,30 +1188,17 @@ row_purge_end(
/*==========*/
que_thr_t* thr) /*!< in: query thread */
{
- purge_node_t* node;
-
ut_ad(thr);
- node = static_cast<purge_node_t*>(thr->run_node);
-
- ut_ad(que_node_get_type(node) == QUE_NODE_PURGE);
-
- thr->run_node = que_node_get_parent(node);
-
- node->undo_recs = NULL;
-
- node->done = TRUE;
+ thr->run_node = static_cast<purge_node_t*>(thr->run_node)->end();
ut_a(thr->run_node != NULL);
-
- mem_heap_empty(node->heap);
}
/***********************************************************//**
Does the purge operation for a single undo log record. This is a high-level
function used in an SQL execution graph.
-@return query thread to run next or NULL */
-UNIV_INTERN
+@return query thread to run next or NULL */
que_thr_t*
row_purge_step(
/*===========*/
@@ -970,18 +1208,27 @@ row_purge_step(
node = static_cast<purge_node_t*>(thr->run_node);
- node->table = NULL;
- node->row = NULL;
- node->ref = NULL;
- node->index = NULL;
- node->update = NULL;
- node->found_clust = FALSE;
- node->rec_type = ULINT_UNDEFINED;
- node->cmpl_info = ULINT_UNDEFINED;
-
- ut_a(!node->done);
+ node->start();
- ut_ad(que_node_get_type(node) == QUE_NODE_PURGE);
+#ifdef UNIV_DEBUG
+ srv_slot_t *slot = thr->thread_slot;
+ ut_ad(slot);
+
+ rw_lock_x_lock(&slot->debug_sync_lock);
+ while (UT_LIST_GET_LEN(slot->debug_sync)) {
+ srv_slot_t::debug_sync_t *sync =
+ UT_LIST_GET_FIRST(slot->debug_sync);
+ const char* sync_str = reinterpret_cast<char*>(&sync[1]);
+ bool result = debug_sync_set_action(current_thd,
+ sync_str,
+ strlen(sync_str));
+ ut_a(!result);
+
+ UT_LIST_REMOVE(slot->debug_sync, sync);
+ ut_free(sync);
+ }
+ rw_lock_x_unlock(&slot->debug_sync_lock);
+#endif
if (!(node->undo_recs == NULL || ib_vector_is_empty(node->undo_recs))) {
trx_purge_rec_t*purge_rec;
@@ -997,11 +1244,14 @@ row_purge_step(
row_purge_end(thr);
} else {
thr->run_node = node;
+ node->vcol_info.reset();
}
} else {
row_purge_end(thr);
}
+ innobase_reset_background_thd(thr_get_trx(thr)->mysql_thd);
+
return(thr);
}
@@ -1028,14 +1278,15 @@ purge_node_t::validate_pcur()
return(true);
}
- if (pcur.old_stored != BTR_PCUR_OLD_STORED) {
+ if (!pcur.old_stored) {
return(true);
}
- dict_index_t* clust_index = pcur.btr_cur.index;
+ dict_index_t* clust_index = pcur.btr_cur.index;
- ulint* offsets = rec_get_offsets(
- pcur.old_rec, clust_index, NULL, pcur.old_n_fields, &heap);
+ offset_t* offsets = rec_get_offsets(
+ pcur.old_rec, clust_index, NULL, true,
+ pcur.old_n_fields, &heap);
/* Here we are comparing the purge ref record and the stored initial
part in persistent cursor. Both cases we store n_uniq fields of the
@@ -1044,9 +1295,9 @@ purge_node_t::validate_pcur()
int st = cmp_dtuple_rec(ref, pcur.old_rec, offsets);
if (st != 0) {
- fprintf(stderr, "Purge node pcur validation failed\n");
- dtuple_print(stderr, ref);
- rec_print(stderr, pcur.old_rec, clust_index);
+ ib::error() << "Purge node pcur validation failed";
+ ib::error() << rec_printer(ref).str();
+ ib::error() << rec_printer(pcur.old_rec, offsets).str();
return(false);
}
diff --git a/storage/innobase/row/row0quiesce.cc b/storage/innobase/row/row0quiesce.cc
index 26352ec6dce..b80b387b0fa 100644
--- a/storage/innobase/row/row0quiesce.cc
+++ b/storage/innobase/row/row0quiesce.cc
@@ -26,15 +26,14 @@ Created 2012-02-08 by Sunny Bains.
#include "row0quiesce.h"
#include "row0mysql.h"
-
-#ifdef UNIV_NONINL
-#include "row0quiesce.ic"
-#endif
-
#include "ibuf0ibuf.h"
#include "srv0start.h"
#include "trx0purge.h"
+#ifdef HAVE_MY_AES_H
+#include <my_aes.h>
+#endif
+
/*********************************************************************//**
Write the meta data (index user fields) config file.
@return DB_SUCCESS or error code. */
@@ -65,7 +64,7 @@ row_quiesce_write_index_fields(
ib_senderrf(
thd, IB_LOG_LEVEL_WARN, ER_IO_WRITE_ERROR,
- errno, strerror(errno),
+ (ulong) errno, strerror(errno),
"while writing index fields.");
return(DB_IO_ERROR);
@@ -85,7 +84,7 @@ row_quiesce_write_index_fields(
ib_senderrf(
thd, IB_LOG_LEVEL_WARN, ER_IO_WRITE_ERROR,
- errno, strerror(errno),
+ (ulong) errno, strerror(errno),
"while writing index column.");
return(DB_IO_ERROR);
@@ -119,7 +118,7 @@ row_quiesce_write_indexes(
if (fwrite(row, 1, sizeof(row), file) != sizeof(row)) {
ib_senderrf(
thd, IB_LOG_LEVEL_WARN, ER_IO_WRITE_ERROR,
- errno, strerror(errno),
+ (ulong) errno, strerror(errno),
"while writing index count.");
return(DB_IO_ERROR);
@@ -173,7 +172,7 @@ row_quiesce_write_indexes(
ib_senderrf(
thd, IB_LOG_LEVEL_WARN, ER_IO_WRITE_ERROR,
- errno, strerror(errno),
+ (ulong) errno, strerror(errno),
"while writing index meta-data.");
return(DB_IO_ERROR);
@@ -194,7 +193,7 @@ row_quiesce_write_indexes(
ib_senderrf(
thd, IB_LOG_LEVEL_WARN, ER_IO_WRITE_ERROR,
- errno, strerror(errno),
+ (ulong) errno, strerror(errno),
"while writing index name.");
return(DB_IO_ERROR);
@@ -258,7 +257,7 @@ row_quiesce_write_table(
if (fwrite(row, 1, sizeof(row), file) != sizeof(row)) {
ib_senderrf(
thd, IB_LOG_LEVEL_WARN, ER_IO_WRITE_ERROR,
- errno, strerror(errno),
+ (ulong) errno, strerror(errno),
"while writing table column data.");
return(DB_IO_ERROR);
@@ -285,7 +284,7 @@ row_quiesce_write_table(
ib_senderrf(
thd, IB_LOG_LEVEL_WARN, ER_IO_WRITE_ERROR,
- errno, strerror(errno),
+ (ulong) errno, strerror(errno),
"while writing column name.");
return(DB_IO_ERROR);
@@ -317,7 +316,7 @@ row_quiesce_write_header(
if (fwrite(&value, 1, sizeof(value), file) != sizeof(value)) {
ib_senderrf(
thd, IB_LOG_LEVEL_WARN, ER_IO_WRITE_ERROR,
- errno, strerror(errno),
+ (ulong) errno, strerror(errno),
"while writing meta-data version number.");
return(DB_IO_ERROR);
@@ -331,8 +330,7 @@ row_quiesce_write_header(
if (hostname == 0) {
static const char NullHostname[] = "Hostname unknown";
- ib_logf(IB_LOG_LEVEL_WARN,
- "Unable to determine server hostname.");
+ ib::warn() << "Unable to determine server hostname.";
hostname = NullHostname;
}
@@ -348,15 +346,15 @@ row_quiesce_write_header(
ib_senderrf(
thd, IB_LOG_LEVEL_WARN, ER_IO_WRITE_ERROR,
- errno, strerror(errno),
+ (ulong) errno, strerror(errno),
"while writing hostname.");
return(DB_IO_ERROR);
}
/* The table name includes the NUL byte. */
- ut_a(table->name != 0);
- len = static_cast<ib_uint32_t>(strlen(table->name) + 1);
+ ut_a(table->name.m_name != NULL);
+ len = static_cast<ib_uint32_t>(strlen(table->name.m_name) + 1);
/* Write the table name. */
mach_write_to_4(value, len);
@@ -364,11 +362,11 @@ row_quiesce_write_header(
DBUG_EXECUTE_IF("ib_export_io_write_failure_6", close(fileno(file)););
if (fwrite(&value, 1, sizeof(value), file) != sizeof(value)
- || fwrite(table->name, 1, len, file) != len) {
+ || fwrite(table->name.m_name, 1, len, file) != len) {
ib_senderrf(
thd, IB_LOG_LEVEL_WARN, ER_IO_WRITE_ERROR,
- errno, strerror(errno),
+ (ulong) errno, strerror(errno),
"while writing table name.");
return(DB_IO_ERROR);
@@ -384,7 +382,7 @@ row_quiesce_write_header(
if (fwrite(row, 1, sizeof(ib_uint64_t), file) != sizeof(ib_uint64_t)) {
ib_senderrf(
thd, IB_LOG_LEVEL_WARN, ER_IO_WRITE_ERROR,
- errno, strerror(errno),
+ (ulong) errno, strerror(errno),
"while writing table autoinc value.");
return(DB_IO_ERROR);
@@ -408,7 +406,7 @@ row_quiesce_write_header(
if (fwrite(row, 1, sizeof(row), file) != sizeof(row)) {
ib_senderrf(
thd, IB_LOG_LEVEL_WARN, ER_IO_WRITE_ERROR,
- errno, strerror(errno),
+ (ulong) errno, strerror(errno),
"while writing table meta-data.");
return(DB_IO_ERROR);
@@ -433,7 +431,7 @@ row_quiesce_write_cfg(
srv_get_meta_data_filename(table, name, sizeof(name));
- ib_logf(IB_LOG_LEVEL_INFO, "Writing table metadata to '%s'", name);
+ ib::info() << "Writing table metadata to '" << name << "'";
FILE* file = fopen(name, "w+b");
@@ -457,23 +455,21 @@ row_quiesce_write_cfg(
char msg[BUFSIZ];
- ut_snprintf(msg, sizeof(msg), "%s flush() failed",
- name);
+ snprintf(msg, sizeof(msg), "%s flush() failed", name);
ib_senderrf(
thd, IB_LOG_LEVEL_WARN, ER_IO_WRITE_ERROR,
- errno, strerror(errno), msg);
+ (ulong) errno, strerror(errno), msg);
}
if (fclose(file) != 0) {
char msg[BUFSIZ];
- ut_snprintf(msg, sizeof(msg), "%s flose() failed",
- name);
+ snprintf(msg, sizeof(msg), "%s flose() failed", name);
ib_senderrf(
thd, IB_LOG_LEVEL_WARN, ER_IO_WRITE_ERROR,
- errno, strerror(errno), msg);
+ (ulong) errno, strerror(errno), msg);
}
}
@@ -510,7 +506,6 @@ row_quiesce_table_has_fts_index(
/*********************************************************************//**
Quiesce the tablespace that the table resides in. */
-UNIV_INTERN
void
row_quiesce_table_start(
/*====================*/
@@ -521,17 +516,12 @@ row_quiesce_table_start(
ut_a(srv_n_purge_threads > 0);
ut_ad(!srv_read_only_mode);
- char table_name[MAX_FULL_NAME_LEN + 1];
-
ut_a(trx->mysql_thd != 0);
- innobase_format_name(
- table_name, sizeof(table_name), table->name, FALSE);
-
- ib_logf(IB_LOG_LEVEL_INFO,
- "Sync to disk of '%s' started.", table_name);
+ ut_ad(fil_space_get(table->space) != NULL);
+ ib::info() << "Sync to disk of " << table->name << " started.";
- if (trx_purge_state() != PURGE_STATE_DISABLED) {
+ if (srv_undo_sources) {
trx_purge_stop();
}
@@ -540,31 +530,32 @@ row_quiesce_table_start(
&& !trx_is_interrupted(trx);
++count) {
if (!(count % 20)) {
- ib_logf(IB_LOG_LEVEL_INFO,
- "Merging change buffer entries for '%s'",
- table_name);
+ ib::info() << "Merging change buffer entries for "
+ << table->name;
}
}
if (!trx_is_interrupted(trx)) {
- buf_LRU_flush_or_remove_pages(table->space, trx);
+ {
+ FlushObserver observer(table->space, trx, NULL);
+ buf_LRU_flush_or_remove_pages(table->space, &observer);
+ }
if (trx_is_interrupted(trx)) {
- ib_logf(IB_LOG_LEVEL_WARN, "Quiesce aborted!");
+ ib::warn() << "Quiesce aborted!";
} else if (row_quiesce_write_cfg(table, trx->mysql_thd)
!= DB_SUCCESS) {
- ib_logf(IB_LOG_LEVEL_WARN,
- "There was an error writing to the "
- "meta data file");
+ ib::warn() << "There was an error writing to the"
+ " meta data file";
} else {
- ib_logf(IB_LOG_LEVEL_INFO,
- "Table '%s' flushed to disk", table_name);
+ ib::info() << "Table " << table->name
+ << " flushed to disk";
}
} else {
- ib_logf(IB_LOG_LEVEL_WARN, "Quiesce aborted!");
+ ib::warn() << "Quiesce aborted!";
}
dberr_t err = row_quiesce_set_state(table, QUIESCE_COMPLETE, trx);
@@ -573,7 +564,6 @@ row_quiesce_table_start(
/*********************************************************************//**
Cleanup after table quiesce. */
-UNIV_INTERN
void
row_quiesce_table_complete(
/*=======================*/
@@ -581,13 +571,9 @@ row_quiesce_table_complete(
trx_t* trx) /*!< in/out: transaction/session */
{
ulint count = 0;
- char table_name[MAX_FULL_NAME_LEN + 1];
ut_a(trx->mysql_thd != 0);
- innobase_format_name(
- table_name, sizeof(table_name), table->name, FALSE);
-
/* We need to wait for the operation to complete if the
transaction has been killed. */
@@ -595,9 +581,8 @@ row_quiesce_table_complete(
/* Print a warning after every minute. */
if (!(count % 60)) {
- ib_logf(IB_LOG_LEVEL_WARN,
- "Waiting for quiesce of '%s' to complete",
- table_name);
+ ib::warn() << "Waiting for quiesce of " << table->name
+ << " to complete";
}
/* Sleep for a second. */
@@ -606,19 +591,20 @@ row_quiesce_table_complete(
++count;
}
- /* Remove the .cfg file now that the user has resumed
- normal operations. Otherwise it will cause problems when
- the user tries to drop the database (remove directory). */
- char cfg_name[OS_FILE_MAX_PATH];
+ if (!opt_bootstrap) {
+ /* Remove the .cfg file now that the user has resumed
+ normal operations. Otherwise it will cause problems when
+ the user tries to drop the database (remove directory). */
+ char cfg_name[OS_FILE_MAX_PATH];
- srv_get_meta_data_filename(table, cfg_name, sizeof(cfg_name));
+ srv_get_meta_data_filename(table, cfg_name, sizeof(cfg_name));
- os_file_delete_if_exists(innodb_file_data_key, cfg_name);
+ os_file_delete_if_exists(innodb_data_file_key, cfg_name, NULL);
- ib_logf(IB_LOG_LEVEL_INFO,
- "Deleting the meta-data file '%s'", cfg_name);
+ ib::info() << "Deleting the meta-data file '" << cfg_name << "'";
+ }
- if (trx_purge_state() != PURGE_STATE_DISABLED) {
+ if (srv_undo_sources) {
trx_purge_run();
}
@@ -629,7 +615,6 @@ row_quiesce_table_complete(
/*********************************************************************//**
Set a table's quiesce state.
@return DB_SUCCESS or error code. */
-UNIV_INTERN
dberr_t
row_quiesce_set_state(
/*==================*/
@@ -646,12 +631,19 @@ row_quiesce_set_state(
return(DB_UNSUPPORTED);
+ } else if (dict_table_is_temporary(table)) {
+
+ ib_senderrf(trx->mysql_thd, IB_LOG_LEVEL_WARN,
+ ER_CANNOT_DISCARD_TEMPORARY_TABLE);
+
+ return(DB_UNSUPPORTED);
} else if (table->space == TRX_SYS_SPACE) {
char table_name[MAX_FULL_NAME_LEN + 1];
innobase_format_name(
- table_name, sizeof(table_name), table->name, FALSE);
+ table_name, sizeof(table_name),
+ table->name.m_name);
ib_senderrf(trx->mysql_thd, IB_LOG_LEVEL_WARN,
ER_TABLE_IN_SYSTEM_TABLESPACE, table_name);
@@ -661,8 +653,8 @@ row_quiesce_set_state(
ib_senderrf(trx->mysql_thd, IB_LOG_LEVEL_WARN,
ER_NOT_SUPPORTED_YET,
- "FLUSH TABLES on tables that have an FTS index. "
- "FTS auxiliary tables will not be flushed.");
+ "FLUSH TABLES on tables that have an FTS index."
+ " FTS auxiliary tables will not be flushed.");
} else if (DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_HAS_DOC_ID)) {
/* If this flag is set then the table may not have any active
@@ -670,10 +662,10 @@ row_quiesce_set_state(
ib_senderrf(trx->mysql_thd, IB_LOG_LEVEL_WARN,
ER_NOT_SUPPORTED_YET,
- "FLUSH TABLES on a table that had an FTS index, "
- "created on a hidden column, the "
- "auxiliary tables haven't been dropped as yet. "
- "FTS auxiliary tables will not be flushed.");
+ "FLUSH TABLES on a table that had an FTS index,"
+ " created on a hidden column, the"
+ " auxiliary tables haven't been dropped as yet."
+ " FTS auxiliary tables will not be flushed.");
}
row_mysql_lock_data_dictionary(trx);
diff --git a/storage/innobase/row/row0row.cc b/storage/innobase/row/row0row.cc
index 516dc823812..816e53a8d5e 100644
--- a/storage/innobase/row/row0row.cc
+++ b/storage/innobase/row/row0row.cc
@@ -1,7 +1,7 @@
/*****************************************************************************
-Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2018, 2019, MariaDB Corporation.
+Copyright (c) 1996, 2018, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2018, 2020, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -25,15 +25,10 @@ Created 4/20/1996 Heikki Tuuri
*******************************************************/
#include "row0row.h"
-
-#ifdef UNIV_NONINL
-#include "row0row.ic"
-#endif
-
#include "data0type.h"
#include "dict0dict.h"
+#include "dict0boot.h"
#include "btr0btr.h"
-#include "ha_prototypes.h"
#include "mach0data.h"
#include "trx0rseg.h"
#include "trx0trx.h"
@@ -47,6 +42,8 @@ Created 4/20/1996 Heikki Tuuri
#include "rem0cmp.h"
#include "read0read.h"
#include "ut0mem.h"
+#include "gis0geo.h"
+#include "row0mysql.h"
/*****************************************************************//**
When an insert or purge to a table is performed, this function builds
@@ -54,7 +51,6 @@ the entry to be inserted into or purged from an index on the table.
@return index entry which should be inserted or purged
@retval NULL if the externally stored columns in the clustered index record
are unavailable and ext != NULL, or row is missing some needed columns. */
-UNIV_INTERN
dtuple_t*
row_build_index_entry_low(
/*======================*/
@@ -63,18 +59,28 @@ row_build_index_entry_low(
const row_ext_t* ext, /*!< in: externally stored column
prefixes, or NULL */
dict_index_t* index, /*!< in: index on the table */
- mem_heap_t* heap) /*!< in: memory heap from which
+ mem_heap_t* heap, /*!< in: memory heap from which
the memory for the index entry
is allocated */
+ ulint flag) /*!< in: ROW_BUILD_NORMAL,
+ ROW_BUILD_FOR_PURGE
+ or ROW_BUILD_FOR_UNDO */
{
dtuple_t* entry;
ulint entry_len;
ulint i;
+ ulint num_v = 0;
entry_len = dict_index_get_n_fields(index);
- entry = dtuple_create(heap, entry_len);
- if (dict_index_is_univ(index)) {
+ if (flag == ROW_BUILD_FOR_INSERT && dict_index_is_clust(index)) {
+ num_v = dict_table_get_n_v_cols(index->table);
+ entry = dtuple_create_with_vcol(heap, entry_len, num_v);
+ } else {
+ entry = dtuple_create(heap, entry_len);
+ }
+
+ if (dict_index_is_ibuf(index)) {
dtuple_set_n_fields_cmp(entry, entry_len);
/* There may only be externally stored columns
in a clustered index B-tree of a user table. */
@@ -84,22 +90,48 @@ row_build_index_entry_low(
entry, dict_index_get_n_unique_in_tree(index));
}
- for (i = 0; i < entry_len; i++) {
- const dict_field_t* ind_field
- = dict_index_get_nth_field(index, i);
- const dict_col_t* col
- = ind_field->col;
- ulint col_no
- = dict_col_get_no(col);
- dfield_t* dfield
- = dtuple_get_nth_field(entry, i);
- const dfield_t* dfield2
- = dtuple_get_nth_field(row, col_no);
+ for (i = 0; i < entry_len + num_v; i++) {
+ const dict_field_t* ind_field = NULL;
+ const dict_col_t* col;
+ ulint col_no = 0;
+ dfield_t* dfield;
+ const dfield_t* dfield2;
ulint len;
+ if (i >= entry_len) {
+ /* This is to insert new rows to cluster index */
+ ut_ad(dict_index_is_clust(index)
+ && flag == ROW_BUILD_FOR_INSERT);
+ dfield = dtuple_get_nth_v_field(entry, i - entry_len);
+ col = &dict_table_get_nth_v_col(
+ index->table, i - entry_len)->m_col;
+
+ } else {
+ ind_field = dict_index_get_nth_field(index, i);
+ col = ind_field->col;
+ col_no = dict_col_get_no(col);
+ dfield = dtuple_get_nth_field(entry, i);
+ }
#if DATA_MISSING != 0
# error "DATA_MISSING != 0"
#endif
+
+ if (dict_col_is_virtual(col)) {
+ const dict_v_col_t* v_col
+ = reinterpret_cast<const dict_v_col_t*>(col);
+
+ ut_ad(v_col->v_pos < dtuple_get_n_v_fields(row));
+ dfield2 = dtuple_get_nth_v_field(row, v_col->v_pos);
+
+ ut_ad(dfield_is_null(dfield2) ||
+ dfield_get_len(dfield2) == 0 || dfield2->data);
+ } else {
+ dfield2 = dtuple_get_nth_field(row, col_no);
+ ut_ad(dfield_get_type(dfield2)->mtype == DATA_MISSING
+ || (!(dfield_get_type(dfield2)->prtype
+ & DATA_VIRTUAL)));
+ }
+
if (UNIV_UNLIKELY(dfield_get_type(dfield2)->mtype
== DATA_MISSING)) {
/* The field has not been initialized in the row.
@@ -107,6 +139,132 @@ row_build_index_entry_low(
return(NULL);
}
+#ifdef UNIV_DEBUG
+ if (dfield_get_type(dfield2)->prtype & DATA_VIRTUAL
+ && dict_index_is_clust(index)) {
+ ut_ad(flag == ROW_BUILD_FOR_INSERT);
+ }
+#endif /* UNIV_DEBUG */
+
+ /* Special handle spatial index, set the first field
+ which is for store MBR. */
+ if (dict_index_is_spatial(index) && i == 0) {
+ double* mbr;
+
+ dfield_copy(dfield, dfield2);
+ dfield->type.prtype |= DATA_GIS_MBR;
+
+ /* Allocate memory for mbr field */
+ ulint mbr_len = DATA_MBR_LEN;
+ mbr = static_cast<double*>(mem_heap_alloc(heap, mbr_len));
+
+ /* Set mbr field data. */
+ dfield_set_data(dfield, mbr, mbr_len);
+
+ if (dfield2->data) {
+ const uchar* dptr = NULL;
+ ulint dlen = 0;
+ ulint flen = 0;
+ double tmp_mbr[SPDIMS * 2];
+ mem_heap_t* temp_heap = NULL;
+
+ if (dfield_is_ext(dfield2)) {
+ if (flag == ROW_BUILD_FOR_PURGE) {
+ const byte* ptr = NULL;
+
+ spatial_status_t spatial_status;
+ spatial_status =
+ dfield_get_spatial_status(
+ dfield2);
+
+ switch (spatial_status) {
+ case SPATIAL_ONLY:
+ ptr = static_cast<const byte*>(
+ dfield_get_data(
+ dfield2));
+ ut_ad(dfield_get_len(dfield2)
+ == DATA_MBR_LEN);
+ break;
+
+ case SPATIAL_MIXED:
+ ptr = static_cast<const byte*>(
+ dfield_get_data(
+ dfield2))
+ + dfield_get_len(
+ dfield2);
+ break;
+
+ case SPATIAL_UNKNOWN:
+ ut_ad(0);
+ /* fall through */
+ case SPATIAL_NONE:
+ /* Undo record is logged before
+ spatial index is created.*/
+ return(NULL);
+ }
+
+ memcpy(mbr, ptr, DATA_MBR_LEN);
+ continue;
+ }
+
+ if (flag == ROW_BUILD_FOR_UNDO
+ && dict_table_get_format(index->table)
+ >= UNIV_FORMAT_B) {
+ /* For build entry for undo, and
+ the table is Barrcuda, we need
+ to skip the prefix data. */
+ flen = BTR_EXTERN_FIELD_REF_SIZE;
+ ut_ad(dfield_get_len(dfield2) >=
+ BTR_EXTERN_FIELD_REF_SIZE);
+ dptr = static_cast<const byte*>(
+ dfield_get_data(dfield2))
+ + dfield_get_len(dfield2)
+ - BTR_EXTERN_FIELD_REF_SIZE;
+ } else {
+ flen = dfield_get_len(dfield2);
+ dptr = static_cast<const byte*>(
+ dfield_get_data(dfield2));
+ }
+
+ temp_heap = mem_heap_create(1000);
+
+ const page_size_t page_size
+ = (ext != NULL)
+ ? ext->page_size
+ : dict_table_page_size(
+ index->table);
+
+ dptr = btr_copy_externally_stored_field(
+ &dlen, dptr,
+ page_size,
+ flen,
+ temp_heap);
+ } else {
+ dptr = static_cast<const uchar*>(
+ dfield_get_data(dfield2));
+ dlen = dfield_get_len(dfield2);
+
+ }
+
+ if (dlen <= GEO_DATA_HEADER_SIZE) {
+ for (uint i = 0; i < SPDIMS; ++i) {
+ tmp_mbr[i * 2] = DBL_MAX;
+ tmp_mbr[i * 2 + 1] = -DBL_MAX;
+ }
+ } else {
+ rtree_mbr_from_wkb(dptr + GEO_DATA_HEADER_SIZE,
+ static_cast<uint>(dlen
+ - GEO_DATA_HEADER_SIZE),
+ SPDIMS, tmp_mbr);
+ }
+ dfield_write_mbr(dfield, tmp_mbr);
+ if (temp_heap) {
+ mem_heap_free(temp_heap);
+ }
+ }
+ continue;
+ }
+
len = dfield_get_len(dfield2);
dfield_copy(dfield, dfield2);
@@ -115,13 +273,14 @@ row_build_index_entry_low(
continue;
}
- if (ind_field->prefix_len == 0
+ if ((!ind_field || ind_field->prefix_len == 0)
&& (!dfield_is_ext(dfield)
|| dict_index_is_clust(index))) {
/* The dfield_copy() above suffices for
columns that are stored in-page, or for
clustered index record columns that are not
- part of a column prefix in the PRIMARY KEY. */
+ part of a column prefix in the PRIMARY KEY,
+ or for virtaul columns in cluster index record. */
continue;
}
@@ -134,7 +293,7 @@ row_build_index_entry_low(
stored off-page. */
ut_ad(col->ord_part);
- if (ext) {
+ if (ext && !col->is_virtual()) {
/* See if the column is stored externally. */
const byte* buf = row_ext_lookup(ext, col_no,
&len);
@@ -184,51 +343,42 @@ row_build_index_entry_low(
return(entry);
}
-/*******************************************************************//**
-An inverse function to row_build_index_entry. Builds a row from a
-record in a clustered index.
-@return own: row built; see the NOTE below! */
-UNIV_INTERN
+/** An inverse function to row_build_index_entry. Builds a row from a
+record in a clustered index, with possible indexing on ongoing
+addition of new virtual columns.
+@param[in] type ROW_COPY_POINTERS or ROW_COPY_DATA;
+@param[in] index clustered index
+@param[in] rec record in the clustered index
+@param[in] offsets rec_get_offsets(rec,index) or NULL
+@param[in] col_table table, to check which
+ externally stored columns
+ occur in the ordering columns
+ of an index, or NULL if
+ index->table should be
+ consulted instead
+@param[in] add_cols default values of added columns, or NULL
+@param[in] add_v new virtual columns added
+ along with new indexes
+@param[in] col_map mapping of old column
+ numbers to new ones, or NULL
+@param[in] ext cache of externally stored column
+ prefixes, or NULL
+@param[in] heap memory heap from which
+ the memory needed is allocated
+@return own: row built; */
+static inline
dtuple_t*
-row_build(
-/*======*/
- ulint type, /*!< in: ROW_COPY_POINTERS or
- ROW_COPY_DATA; the latter
- copies also the data fields to
- heap while the first only
- places pointers to data fields
- on the index page, and thus is
- more efficient */
- const dict_index_t* index, /*!< in: clustered index */
- const rec_t* rec, /*!< in: record in the clustered
- index; NOTE: in the case
- ROW_COPY_POINTERS the data
- fields in the row will point
- directly into this record,
- therefore, the buffer page of
- this record must be at least
- s-latched and the latch held
- as long as the row dtuple is used! */
- const ulint* offsets,/*!< in: rec_get_offsets(rec,index)
- or NULL, in which case this function
- will invoke rec_get_offsets() */
+row_build_low(
+ ulint type,
+ const dict_index_t* index,
+ const rec_t* rec,
+ const offset_t* offsets,
const dict_table_t* col_table,
- /*!< in: table, to check which
- externally stored columns
- occur in the ordering columns
- of an index, or NULL if
- index->table should be
- consulted instead */
const dtuple_t* add_cols,
- /*!< in: default values of
- added columns, or NULL */
- const ulint* col_map,/*!< in: mapping of old column
- numbers to new ones, or NULL */
- row_ext_t** ext, /*!< out, own: cache of
- externally stored column
- prefixes, or NULL */
- mem_heap_t* heap) /*!< in: memory heap from which
- the memory needed is allocated */
+ const dict_add_v_col_t* add_v,
+ const ulint* col_map,
+ row_ext_t** ext,
+ mem_heap_t* heap)
{
const byte* copy;
dtuple_t* row;
@@ -238,34 +388,34 @@ row_build(
byte* buf;
ulint j;
mem_heap_t* tmp_heap = NULL;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
+ offset_t offsets_[REC_OFFS_NORMAL_SIZE];
rec_offs_init(offsets_);
ut_ad(index != NULL);
ut_ad(rec != NULL);
ut_ad(heap != NULL);
ut_ad(dict_index_is_clust(index));
- ut_ad(!mutex_own(&trx_sys->mutex));
+ ut_ad(!trx_sys_mutex_own());
ut_ad(!col_map || col_table);
if (!offsets) {
- offsets = rec_get_offsets(rec, index, offsets_,
+ offsets = rec_get_offsets(rec, index, offsets_, true,
ULINT_UNDEFINED, &tmp_heap);
} else {
ut_ad(rec_offs_validate(rec, index, offsets));
}
#if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG
- if (rec_offs_any_null_extern(rec, offsets)) {
- /* This condition can occur during crash recovery
- before trx_rollback_active() has completed execution,
- or when a concurrently executing
- row_ins_index_entry_low() has committed the B-tree
- mini-transaction but has not yet managed to restore
- the cursor position for writing the big_rec. */
- ut_a(trx_undo_roll_ptr_is_insert(
- row_get_rec_roll_ptr(rec, index, offsets)));
- }
+ /* Some blob refs can be NULL during crash recovery before
+ trx_rollback_active() has completed execution, or when a concurrently
+ executing insert or update has committed the B-tree mini-transaction
+ but has not yet managed to restore the cursor position for writing
+ the big_rec. Note that the mini-transaction can be committed multiple
+ times, and the cursor restore can happen multiple times for single
+ insert or update statement. */
+ ut_a(!rec_offs_any_null_extern(rec, offsets)
+ || trx_rw_is_active(row_get_rec_trx_id(rec, index, offsets),
+ NULL, false));
#endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */
if (type != ROW_COPY_POINTERS) {
@@ -285,7 +435,7 @@ row_build(
}
/* Avoid a debug assertion in rec_offs_validate(). */
- rec_offs_make_valid(copy, index, const_cast<ulint*>(offsets));
+ rec_offs_make_valid(copy, index, const_cast<offset_t*>(offsets));
if (!col_table) {
ut_ad(!col_map);
@@ -302,8 +452,22 @@ row_build(
dict_table_get_nth_col(col_table, i),
dfield_get_type(dtuple_get_nth_field(row, i)));
}
+ } else if (add_v != NULL) {
+ row = dtuple_create_with_vcol(
+ heap, dict_table_get_n_cols(col_table),
+ dict_table_get_n_v_cols(col_table) + add_v->n_v_col);
+ dict_table_copy_types(row, col_table);
+
+ for (ulint i = 0; i < add_v->n_v_col; i++) {
+ dict_col_copy_type(
+ &add_v->v_col[i].m_col,
+ dfield_get_type(dtuple_get_nth_v_field(
+ row, i + col_table->n_v_def)));
+ }
} else {
- row = dtuple_create(heap, dict_table_get_n_cols(col_table));
+ row = dtuple_create_with_vcol(
+ heap, dict_table_get_n_cols(col_table),
+ dict_table_get_n_v_cols(col_table));
dict_table_copy_types(row, col_table);
}
@@ -361,7 +525,7 @@ row_build(
}
}
- rec_offs_make_valid(rec, index, const_cast<ulint*>(offsets));
+ rec_offs_make_valid(rec, index, const_cast<offset_t*>(offsets));
ut_ad(dtuple_check_typed(row));
@@ -389,19 +553,106 @@ row_build(
return(row);
}
+
+/*******************************************************************//**
+An inverse function to row_build_index_entry. Builds a row from a
+record in a clustered index.
+@return own: row built; see the NOTE below! */
+dtuple_t*
+row_build(
+/*======*/
+ ulint type, /*!< in: ROW_COPY_POINTERS or
+ ROW_COPY_DATA; the latter
+ copies also the data fields to
+ heap while the first only
+ places pointers to data fields
+ on the index page, and thus is
+ more efficient */
+ const dict_index_t* index, /*!< in: clustered index */
+ const rec_t* rec, /*!< in: record in the clustered
+ index; NOTE: in the case
+ ROW_COPY_POINTERS the data
+ fields in the row will point
+ directly into this record,
+ therefore, the buffer page of
+ this record must be at least
+ s-latched and the latch held
+ as long as the row dtuple is used! */
+ const offset_t* offsets,/*!< in: rec_get_offsets(rec,index)
+ or NULL, in which case this function
+ will invoke rec_get_offsets() */
+ const dict_table_t* col_table,
+ /*!< in: table, to check which
+ externally stored columns
+ occur in the ordering columns
+ of an index, or NULL if
+ index->table should be
+ consulted instead */
+ const dtuple_t* add_cols,
+ /*!< in: default values of
+ added columns, or NULL */
+ const ulint* col_map,/*!< in: mapping of old column
+ numbers to new ones, or NULL */
+ row_ext_t** ext, /*!< out, own: cache of
+ externally stored column
+ prefixes, or NULL */
+ mem_heap_t* heap) /*!< in: memory heap from which
+ the memory needed is allocated */
+{
+ return(row_build_low(type, index, rec, offsets, col_table,
+ add_cols, NULL, col_map, ext, heap));
+}
+
+/** An inverse function to row_build_index_entry. Builds a row from a
+record in a clustered index, with possible indexing on ongoing
+addition of new virtual columns.
+@param[in] type ROW_COPY_POINTERS or ROW_COPY_DATA;
+@param[in] index clustered index
+@param[in] rec record in the clustered index
+@param[in] offsets rec_get_offsets(rec,index) or NULL
+@param[in] col_table table, to check which
+ externally stored columns
+ occur in the ordering columns
+ of an index, or NULL if
+ index->table should be
+ consulted instead
+@param[in] add_cols default values of added columns, or NULL
+@param[in] add_v new virtual columns added
+ along with new indexes
+@param[in] col_map mapping of old column
+ numbers to new ones, or NULL
+@param[in] ext cache of externally stored column
+ prefixes, or NULL
+@param[in] heap memory heap from which
+ the memory needed is allocated
+@return own: row built; */
+dtuple_t*
+row_build_w_add_vcol(
+ ulint type,
+ const dict_index_t* index,
+ const rec_t* rec,
+ const offset_t* offsets,
+ const dict_table_t* col_table,
+ const dtuple_t* add_cols,
+ const dict_add_v_col_t* add_v,
+ const ulint* col_map,
+ row_ext_t** ext,
+ mem_heap_t* heap)
+{
+ return(row_build_low(type, index, rec, offsets, col_table,
+ add_cols, add_v, col_map, ext, heap));
+}
+
/*******************************************************************//**
Converts an index record to a typed data tuple.
@return index entry built; does not set info_bits, and the data fields
in the entry will point directly to rec */
-UNIV_INTERN
dtuple_t*
row_rec_to_index_entry_low(
/*=======================*/
const rec_t* rec, /*!< in: record in the index */
const dict_index_t* index, /*!< in: index */
- const ulint* offsets,/*!< in: rec_get_offsets(rec, index) */
- ulint* n_ext, /*!< out: number of externally
- stored columns */
+ const offset_t* offsets,/*!< in: rec_get_offsets(rec, index) */
mem_heap_t* heap) /*!< in: memory heap from which
the memory needed is allocated */
{
@@ -415,11 +666,10 @@ row_rec_to_index_entry_low(
ut_ad(rec != NULL);
ut_ad(heap != NULL);
ut_ad(index != NULL);
+
/* Because this function may be invoked by row0merge.cc
on a record whose header is in different format, the check
rec_offs_validate(rec, index, offsets) must be avoided here. */
- ut_ad(n_ext);
- *n_ext = 0;
rec_len = rec_offs_n_fields(offsets);
@@ -427,7 +677,11 @@ row_rec_to_index_entry_low(
dtuple_set_n_fields_cmp(entry,
dict_index_get_n_unique_in_tree(index));
- ut_ad(rec_len == dict_index_get_n_fields(index));
+ ut_ad(rec_len == dict_index_get_n_fields(index)
+ /* a record for older SYS_INDEXES table
+ (missing merge_threshold column) is acceptable. */
+ || (index->table->id == DICT_INDEXES_ID
+ && rec_len == dict_index_get_n_fields(index) - 1));
dict_index_copy_types(entry, index, rec_len);
@@ -440,7 +694,6 @@ row_rec_to_index_entry_low(
if (rec_offs_nth_extern(offsets, i)) {
dfield_set_ext(dfield);
- (*n_ext)++;
}
}
@@ -452,16 +705,13 @@ row_rec_to_index_entry_low(
/*******************************************************************//**
Converts an index record to a typed data tuple. NOTE that externally
stored (often big) fields are NOT copied to heap.
-@return own: index entry built */
-UNIV_INTERN
+@return own: index entry built */
dtuple_t*
row_rec_to_index_entry(
/*===================*/
const rec_t* rec, /*!< in: record in the index */
const dict_index_t* index, /*!< in: index */
- const ulint* offsets,/*!< in: rec_get_offsets(rec) */
- ulint* n_ext, /*!< out: number of externally
- stored columns */
+ const offset_t* offsets,/*!< in: rec_get_offsets(rec) */
mem_heap_t* heap) /*!< in: memory heap from which
the memory needed is allocated */
{
@@ -480,10 +730,10 @@ row_rec_to_index_entry(
copy_rec = rec_copy(buf, rec, offsets);
- rec_offs_make_valid(copy_rec, index, const_cast<ulint*>(offsets));
+ rec_offs_make_valid(copy_rec, index, const_cast<offset_t*>(offsets));
entry = row_rec_to_index_entry_low(
- copy_rec, index, offsets, n_ext, heap);
- rec_offs_make_valid(rec, index, const_cast<ulint*>(offsets));
+ copy_rec, index, offsets, heap);
+ rec_offs_make_valid(rec, index, const_cast<offset_t*>(offsets));
dtuple_set_info_bits(entry,
rec_get_info_bits(rec, rec_offs_comp(offsets)));
@@ -494,8 +744,7 @@ row_rec_to_index_entry(
/*******************************************************************//**
Builds from a secondary index record a row reference with which we can
search the clustered index record.
-@return own: row reference built; see the NOTE below! */
-UNIV_INTERN
+@return own: row reference built; see the NOTE below! */
dtuple_t*
row_build_row_ref(
/*==============*/
@@ -526,8 +775,8 @@ row_build_row_ref(
ulint clust_col_prefix_len;
ulint i;
mem_heap_t* tmp_heap = NULL;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- ulint* offsets = offsets_;
+ offset_t offsets_[REC_OFFS_NORMAL_SIZE];
+ offset_t* offsets = offsets_;
rec_offs_init(offsets_);
ut_ad(index != NULL);
@@ -535,7 +784,7 @@ row_build_row_ref(
ut_ad(heap != NULL);
ut_ad(!dict_index_is_clust(index));
- offsets = rec_get_offsets(rec, index, offsets,
+ offsets = rec_get_offsets(rec, index, offsets, true,
ULINT_UNDEFINED, &tmp_heap);
/* Secondary indexes must not contain externally stored columns. */
ut_ad(!rec_offs_any_extern(offsets));
@@ -608,7 +857,6 @@ row_build_row_ref(
/*******************************************************************//**
Builds from a secondary index record a row reference with which we can
search the clustered index record. */
-UNIV_INTERN
void
row_build_row_ref_in_tuple(
/*=======================*/
@@ -623,7 +871,7 @@ row_build_row_ref_in_tuple(
held as long as the row
reference is used! */
const dict_index_t* index, /*!< in: secondary index */
- ulint* offsets,/*!< in: rec_get_offsets(rec, index)
+ offset_t* offsets,/*!< in: rec_get_offsets(rec, index)
or NULL */
trx_t* trx) /*!< in: transaction */
{
@@ -636,30 +884,17 @@ row_build_row_ref_in_tuple(
ulint clust_col_prefix_len;
ulint i;
mem_heap_t* heap = NULL;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
+ offset_t offsets_[REC_OFFS_NORMAL_SIZE];
rec_offs_init(offsets_);
ut_ad(!dict_index_is_clust(index));
-
- if (UNIV_UNLIKELY(!index->table)) {
- fputs("InnoDB: table ", stderr);
-notfound:
- ut_print_name(stderr, trx, TRUE, index->table_name);
- fputs(" for index ", stderr);
- ut_print_name(stderr, trx, FALSE, index->name);
- fputs(" not found\n", stderr);
- ut_error;
- }
+ ut_a(index->table);
clust_index = dict_table_get_first_index(index->table);
-
- if (UNIV_UNLIKELY(!clust_index)) {
- fputs("InnoDB: clust index for table ", stderr);
- goto notfound;
- }
+ ut_ad(clust_index);
if (!offsets) {
- offsets = rec_get_offsets(rec, index, offsets_,
+ offsets = rec_get_offsets(rec, index, offsets_, true,
ULINT_UNDEFINED, &heap);
} else {
ut_ad(rec_offs_validate(rec, index, offsets));
@@ -717,8 +952,7 @@ notfound:
/***************************************************************//**
Searches the clustered index record for a row, if we have the row reference.
-@return TRUE if found */
-UNIV_INTERN
+@return TRUE if found */
ibool
row_search_on_row_ref(
/*==================*/
@@ -739,7 +973,10 @@ row_search_on_row_ref(
ut_a(dtuple_get_n_fields(ref) == dict_index_get_n_unique(index));
- btr_pcur_open(index, ref, PAGE_CUR_LE, mode, pcur, mtr);
+ if (btr_pcur_open(index, ref, PAGE_CUR_LE, mode, pcur, mtr)
+ != DB_SUCCESS) {
+ return FALSE;
+ }
low_match = btr_pcur_get_low_match(pcur);
@@ -761,8 +998,7 @@ row_search_on_row_ref(
/*********************************************************************//**
Fetches the clustered index record for a secondary index record. The latches
on the secondary index record are preserved.
-@return record or NULL, if no record found */
-UNIV_INTERN
+@return record or NULL, if no record found */
rec_t*
row_get_clust_rec(
/*==============*/
@@ -802,8 +1038,7 @@ row_get_clust_rec(
/***************************************************************//**
Searches an index record.
-@return whether the record was found or buffered */
-UNIV_INTERN
+@return whether the record was found or buffered */
enum row_search_result
row_search_index_entry(
/*===================*/
@@ -820,11 +1055,17 @@ row_search_index_entry(
ut_ad(dtuple_check_typed(entry));
- btr_pcur_open(index, entry, PAGE_CUR_LE, mode, pcur, mtr);
+ if (dict_index_is_spatial(index)) {
+ ut_ad(mode & BTR_MODIFY_LEAF || mode & BTR_MODIFY_TREE);
+ rtr_pcur_open(index, entry, PAGE_CUR_RTREE_LOCATE,
+ mode, pcur, mtr);
+ } else {
+ btr_pcur_open(index, entry, PAGE_CUR_LE, mode, pcur, mtr);
+ }
switch (btr_pcur_get_btr_cur(pcur)->flag) {
case BTR_CUR_DELETE_REF:
- ut_a(mode & BTR_DELETE);
+ ut_a(mode & BTR_DELETE && !dict_index_is_spatial(index));
return(ROW_NOT_DELETED_REF);
case BTR_CUR_DEL_MARK_IBUF:
@@ -865,7 +1106,7 @@ Not more than "buf_size" bytes are written to "buf".
The result is always '\0'-terminated (provided buf_size > 0) and the
number of bytes that were written to "buf" is returned (including the
terminating '\0').
-@return number of bytes that were written */
+@return number of bytes that were written */
static
ulint
row_raw_format_int(
@@ -890,9 +1131,9 @@ row_raw_format_int(
value = mach_read_int_type(
(const byte*) data, data_len, unsigned_type);
- ret = ut_snprintf(
+ ret = snprintf(
buf, buf_size,
- unsigned_type ? UINT64PF : INT64PF, value) + 1;
+ unsigned_type ? "%llu" : "%lld", (longlong) value)+1;
} else {
*format_in_hex = TRUE;
@@ -913,7 +1154,7 @@ Not more than "buf_size" bytes are written to "buf".
The result is always '\0'-terminated (provided buf_size > 0) and the
number of bytes that were written to "buf" is returned (including the
terminating '\0').
-@return number of bytes that were written */
+@return number of bytes that were written */
static
ulint
row_raw_format_str(
@@ -963,8 +1204,7 @@ Not more than "buf_size" bytes are written to "buf".
The result is always NUL-terminated (provided buf_size is positive) and the
number of bytes that were written to "buf" is returned (including the
terminating NUL).
-@return number of bytes that were written */
-UNIV_INTERN
+@return number of bytes that were written */
ulint
row_raw_format(
/*===========*/
@@ -988,7 +1228,7 @@ row_raw_format(
if (data_len == UNIV_SQL_NULL) {
- ret = ut_snprintf((char*) buf, buf_size, "NULL") + 1;
+ ret = snprintf((char*) buf, buf_size, "NULL") + 1;
return(ut_min(ret, buf_size));
}
@@ -1042,9 +1282,9 @@ row_raw_format(
return(ret);
}
-#ifdef UNIV_COMPILE_TEST_FUNCS
+#ifdef UNIV_ENABLE_UNIT_TEST_ROW_RAW_FORMAT_INT
-#include "ut0dbg.h"
+#ifdef HAVE_UT_CHRONO_T
void
test_row_raw_format_int()
@@ -1052,7 +1292,6 @@ test_row_raw_format_int()
ulint ret;
char buf[128];
ibool format_in_hex;
- speedo_t speedo;
ulint i;
#define CALL_AND_TEST(data, data_len, prtype, buf, buf_size,\
@@ -1236,7 +1475,7 @@ test_row_raw_format_int()
/* speed test */
- speedo_reset(&speedo);
+ ut_chrono_t ch(__func__);
for (i = 0; i < 1000000; i++) {
row_raw_format_int("\x23", 1,
@@ -1253,8 +1492,8 @@ test_row_raw_format_int()
DATA_UNSIGNED, buf, sizeof(buf),
&format_in_hex);
}
-
- speedo_show(&speedo);
}
-#endif /* UNIV_COMPILE_TEST_FUNCS */
+#endif /* HAVE_UT_CHRONO_T */
+
+#endif /* UNIV_ENABLE_UNIT_TEST_ROW_RAW_FORMAT_INT */
diff --git a/storage/innobase/row/row0sel.cc b/storage/innobase/row/row0sel.cc
index 936b0eccfb7..381a1fb8bc5 100644
--- a/storage/innobase/row/row0sel.cc
+++ b/storage/innobase/row/row0sel.cc
@@ -2,7 +2,7 @@
Copyright (c) 1997, 2017, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2008, Google Inc.
-Copyright (c) 2015, 2018, MariaDB Corporation.
+Copyright (c) 2015, 2019, MariaDB Corporation.
Portions of this file contain modifications contributed and copyrighted by
Google, Inc. Those modifications are gratefully acknowledged and are described
@@ -32,11 +32,6 @@ Created 12/19/1997 Heikki Tuuri
*******************************************************/
#include "row0sel.h"
-
-#ifdef UNIV_NONINL
-#include "row0sel.ic"
-#endif
-
#include "dict0dict.h"
#include "dict0boot.h"
#include "trx0undo.h"
@@ -44,6 +39,7 @@ Created 12/19/1997 Heikki Tuuri
#include "btr0btr.h"
#include "btr0cur.h"
#include "btr0sea.h"
+#include "gis0rtree.h"
#include "mach0data.h"
#include "que0que.h"
#include "row0upd.h"
@@ -58,11 +54,7 @@ Created 12/19/1997 Heikki Tuuri
#include "read0read.h"
#include "buf0lru.h"
#include "srv0srv.h"
-#include "ha_prototypes.h"
-#include "m_string.h" /* for my_sys.h */
-#include "my_sys.h" /* DEBUG_SYNC_C */
-
-#include "my_compare.h" /* enum icp_result */
+#include "srv0mon.h"
/* Maximum number of rows to prefetch; MySQL interface has another parameter */
#define SEL_MAX_N_PREFETCH 16
@@ -87,7 +79,7 @@ is alphabetically the same as the corresponding BLOB column in the clustered
index record.
NOTE: the comparison is NOT done as a binary comparison, but character
fields are compared with collation!
-@return TRUE if the columns are equal */
+@return TRUE if the columns are equal */
static
ibool
row_sel_sec_rec_is_for_blob(
@@ -113,7 +105,6 @@ row_sel_sec_rec_is_for_blob(
{
ulint len;
byte buf[REC_VERSION_56_MAX_INDEX_COL_LEN];
- ulint zip_size = dict_tf_get_zip_size(table->flags);
/* This function should never be invoked on an Antelope format
table, because they should always contain enough prefix in the
@@ -124,9 +115,8 @@ row_sel_sec_rec_is_for_blob(
ut_ad(prefix_len > 0);
ut_a(prefix_len <= sizeof buf);
- if (UNIV_UNLIKELY
- (!memcmp(clust_field + clust_len - BTR_EXTERN_FIELD_REF_SIZE,
- field_ref_zero, BTR_EXTERN_FIELD_REF_SIZE))) {
+ if (!memcmp(clust_field + clust_len - BTR_EXTERN_FIELD_REF_SIZE,
+ field_ref_zero, BTR_EXTERN_FIELD_REF_SIZE)) {
/* The externally stored field was not written yet.
This record should only be seen by
recv_recovery_rollback_active() or any
@@ -134,11 +124,11 @@ row_sel_sec_rec_is_for_blob(
return(FALSE);
}
- len = btr_copy_externally_stored_field_prefix(buf, prefix_len,
- zip_size,
- clust_field, clust_len);
+ len = btr_copy_externally_stored_field_prefix(
+ buf, prefix_len, dict_tf_get_page_size(table->flags),
+ clust_field, clust_len);
- if (UNIV_UNLIKELY(len == 0)) {
+ if (len == 0) {
/* The BLOB was being deleted as the server crashed.
There should not be any secondary index records
referring to this clustered index record, because
@@ -153,26 +143,28 @@ row_sel_sec_rec_is_for_blob(
return(!cmp_data_data(mtype, prtype, buf, len, sec_field, sec_len));
}
-/********************************************************************//**
-Returns TRUE if the user-defined column values in a secondary index record
+/** Returns TRUE if the user-defined column values in a secondary index record
are alphabetically the same as the corresponding columns in the clustered
index record.
NOTE: the comparison is NOT done as a binary comparison, but character
fields are compared with collation!
+@param[in] sec_rec secondary index record
+@param[in] sec_index secondary index
+@param[in] clust_rec clustered index record;
+ must be protected by a page s-latch
+@param[in] clust_index clustered index
+@param[in] thr query thread
@return TRUE if the secondary record is equal to the corresponding
fields in the clustered record, when compared with collation;
FALSE if not equal or if the clustered record has been marked for deletion */
static
ibool
row_sel_sec_rec_is_for_clust_rec(
-/*=============================*/
- const rec_t* sec_rec, /*!< in: secondary index record */
- dict_index_t* sec_index, /*!< in: secondary index */
- const rec_t* clust_rec, /*!< in: clustered index record;
- must be protected by a lock or
- a page latch against deletion
- in rollback or purge */
- dict_index_t* clust_index) /*!< in: clustered index */
+ const rec_t* sec_rec,
+ dict_index_t* sec_index,
+ const rec_t* clust_rec,
+ dict_index_t* clust_index,
+ que_thr_t* thr)
{
const byte* sec_field;
ulint sec_len;
@@ -180,17 +172,22 @@ row_sel_sec_rec_is_for_clust_rec(
ulint n;
ulint i;
mem_heap_t* heap = NULL;
- ulint clust_offsets_[REC_OFFS_NORMAL_SIZE];
- ulint sec_offsets_[REC_OFFS_SMALL_SIZE];
- ulint* clust_offs = clust_offsets_;
- ulint* sec_offs = sec_offsets_;
+ offset_t clust_offsets_[REC_OFFS_NORMAL_SIZE];
+ offset_t sec_offsets_[REC_OFFS_SMALL_SIZE];
+ offset_t* clust_offs = clust_offsets_;
+ offset_t* sec_offs = sec_offsets_;
ibool is_equal = TRUE;
+ VCOL_STORAGE* vcol_storage= 0;
+ byte* record;
rec_offs_init(clust_offsets_);
rec_offs_init(sec_offsets_);
if (rec_get_deleted_flag(clust_rec,
dict_table_is_comp(clust_index->table))) {
+ /* In delete-marked records, DB_TRX_ID must
+ always refer to an existing undo log record. */
+ ut_ad(rec_get_trx_id(clust_rec, clust_index));
/* The clustered index record is delete-marked;
it is not visible in the read view. Besides,
@@ -199,32 +196,76 @@ row_sel_sec_rec_is_for_clust_rec(
return(FALSE);
}
+ heap = mem_heap_create(256);
+
clust_offs = rec_get_offsets(clust_rec, clust_index, clust_offs,
- ULINT_UNDEFINED, &heap);
+ true, ULINT_UNDEFINED, &heap);
sec_offs = rec_get_offsets(sec_rec, sec_index, sec_offs,
- ULINT_UNDEFINED, &heap);
+ true, ULINT_UNDEFINED, &heap);
n = dict_index_get_n_ordering_defined_by_user(sec_index);
for (i = 0; i < n; i++) {
const dict_field_t* ifield;
const dict_col_t* col;
- ulint clust_pos;
- ulint clust_len;
+ ulint clust_pos = 0;
+ ulint clust_len = 0;
ulint len;
+ bool is_virtual;
ifield = dict_index_get_nth_field(sec_index, i);
col = dict_field_get_col(ifield);
- clust_pos = dict_col_get_clust_pos(col, clust_index);
- clust_field = rec_get_nth_field(
- clust_rec, clust_offs, clust_pos, &clust_len);
+ is_virtual = dict_col_is_virtual(col);
+
+ /* For virtual column, its value will need to be
+ reconstructed from base column in cluster index */
+ if (is_virtual) {
+ const dict_v_col_t* v_col;
+ dfield_t* vfield;
+ row_ext_t* ext;
+
+ if (!vcol_storage)
+ {
+ TABLE *mysql_table= thr->prebuilt->m_mysql_table;
+ innobase_allocate_row_for_vcol(thr_get_trx(thr)->mysql_thd,
+ clust_index,
+ &heap,
+ &mysql_table,
+ &record,
+ &vcol_storage);
+ }
+
+ v_col = reinterpret_cast<const dict_v_col_t*>(col);
+
+ dtuple_t* row = row_build(
+ ROW_COPY_POINTERS,
+ clust_index, clust_rec,
+ clust_offs,
+ NULL, NULL, NULL, &ext, heap);
+
+ vfield = innobase_get_computed_value(
+ row, v_col, clust_index,
+ &heap, NULL, NULL,
+ thr_get_trx(thr)->mysql_thd,
+ thr->prebuilt->m_mysql_table,
+ record, NULL, NULL, NULL);
+
+ clust_len = vfield->len;
+ clust_field = static_cast<byte*>(vfield->data);
+ } else {
+ clust_pos = dict_col_get_clust_pos(col, clust_index);
+
+ clust_field = rec_get_nth_field(
+ clust_rec, clust_offs, clust_pos, &clust_len);
+ }
+
sec_field = rec_get_nth_field(sec_rec, sec_offs, i, &sec_len);
len = clust_len;
if (ifield->prefix_len > 0 && len != UNIV_SQL_NULL
- && sec_len != UNIV_SQL_NULL) {
+ && sec_len != UNIV_SQL_NULL && !is_virtual) {
if (rec_offs_nth_extern(clust_offs, clust_pos)) {
len -= BTR_EXTERN_FIELD_REF_SIZE;
@@ -250,17 +291,54 @@ row_sel_sec_rec_is_for_clust_rec(
}
}
- if (0 != cmp_data_data(col->mtype, col->prtype,
- clust_field, len,
- sec_field, sec_len)) {
+ /* For spatial index, the first field is MBR, we check
+ if the MBR is equal or not. */
+ if (dict_index_is_spatial(sec_index) && i == 0) {
+ rtr_mbr_t tmp_mbr;
+ rtr_mbr_t sec_mbr;
+ byte* dptr =
+ const_cast<byte*>(clust_field);
+
+ ut_ad(clust_len != UNIV_SQL_NULL);
+
+ /* For externally stored field, we need to get full
+ geo data to generate the MBR for comparing. */
+ if (rec_offs_nth_extern(clust_offs, clust_pos)) {
+ dptr = btr_copy_externally_stored_field(
+ &clust_len, dptr,
+ dict_tf_get_page_size(
+ sec_index->table->flags),
+ len, heap);
+ }
+
+ rtree_mbr_from_wkb(dptr + GEO_DATA_HEADER_SIZE,
+ static_cast<uint>(clust_len
+ - GEO_DATA_HEADER_SIZE),
+ SPDIMS,
+ reinterpret_cast<double*>(
+ &tmp_mbr));
+ rtr_read_mbr(sec_field, &sec_mbr);
+
+ if (!MBR_EQUAL_CMP(&sec_mbr, &tmp_mbr)) {
+ is_equal = FALSE;
+ goto func_exit;
+ }
+ } else {
+
+ if (0 != cmp_data_data(col->mtype, col->prtype,
+ clust_field, len,
+ sec_field, sec_len)) {
inequal:
- is_equal = FALSE;
- goto func_exit;
+ is_equal = FALSE;
+ goto func_exit;
+ }
}
}
func_exit:
if (UNIV_LIKELY_NULL(heap)) {
+ if (UNIV_LIKELY_NULL(vcol_storage))
+ innobase_free_row_for_vcol(vcol_storage);
mem_heap_free(heap);
}
return(is_equal);
@@ -268,8 +346,7 @@ func_exit:
/*********************************************************************//**
Creates a select node struct.
-@return own: select node struct */
-UNIV_INTERN
+@return own: select node struct */
sel_node_t*
sel_node_create(
/*============*/
@@ -291,7 +368,6 @@ sel_node_create(
/*********************************************************************//**
Frees the memory private to a select node when a query graph is freed,
does not free the heap where the node was originally created. */
-UNIV_INTERN
void
sel_node_free_private(
/*==================*/
@@ -418,7 +494,7 @@ row_sel_fetch_columns(
dict_index_t* index, /*!< in: record index */
const rec_t* rec, /*!< in: record in a clustered or non-clustered
index; must be protected by a page latch */
- const ulint* offsets,/*!< in: rec_get_offsets(rec, index) */
+ const offset_t* offsets,/*!< in: rec_get_offsets(rec, index) */
sym_node_t* column) /*!< in: first column in a column list, or
NULL */
{
@@ -454,7 +530,7 @@ row_sel_fetch_columns(
data = btr_rec_copy_externally_stored_field(
rec, offsets,
- dict_table_zip_size(index->table),
+ dict_table_page_size(index->table),
field_no, &len, heap);
/* data == NULL means that the
@@ -509,7 +585,7 @@ sel_col_prefetch_buf_alloc(
ut_ad(que_node_get_type(column) == QUE_NODE_SYMBOL);
column->prefetch_buf = static_cast<sel_buf_t*>(
- mem_alloc(SEL_MAX_N_PREFETCH * sizeof(sel_buf_t)));
+ ut_malloc_nokey(SEL_MAX_N_PREFETCH * sizeof(sel_buf_t)));
for (i = 0; i < SEL_MAX_N_PREFETCH; i++) {
sel_buf = column->prefetch_buf + i;
@@ -523,7 +599,6 @@ sel_col_prefetch_buf_alloc(
/*********************************************************************//**
Frees a prefetch buffer for a column, including the dynamically allocated
memory for data stored there. */
-UNIV_INTERN
void
sel_col_prefetch_buf_free(
/*======================*/
@@ -537,11 +612,11 @@ sel_col_prefetch_buf_free(
if (sel_buf->val_buf_size > 0) {
- mem_free(sel_buf->data);
+ ut_free(sel_buf->data);
}
}
- mem_free(prefetch_buf);
+ ut_free(prefetch_buf);
}
/*********************************************************************//**
@@ -678,15 +753,15 @@ sel_enqueue_prefetched_row(
/*********************************************************************//**
Builds a previous version of a clustered index record for a consistent read
-@return DB_SUCCESS or error code */
+@return DB_SUCCESS or error code */
static MY_ATTRIBUTE((nonnull, warn_unused_result))
dberr_t
row_sel_build_prev_vers(
/*====================*/
- read_view_t* read_view, /*!< in: read view */
+ ReadView* read_view, /*!< in: read view */
dict_index_t* index, /*!< in: plan node for table */
rec_t* rec, /*!< in: record in a clustered index */
- ulint** offsets, /*!< in/out: offsets returned by
+ offset_t** offsets, /*!< in/out: offsets returned by
rec_get_offsets(rec, plan->index) */
mem_heap_t** offset_heap, /*!< in/out: memory heap from which
the offsets are allocated */
@@ -707,21 +782,21 @@ row_sel_build_prev_vers(
err = row_vers_build_for_consistent_read(
rec, mtr, index, offsets, read_view, offset_heap,
- *old_vers_heap, old_vers);
+ *old_vers_heap, old_vers, NULL);
return(err);
}
/*********************************************************************//**
Builds the last committed version of a clustered index record for a
semi-consistent read. */
-static MY_ATTRIBUTE((nonnull))
+static
void
row_sel_build_committed_vers_for_mysql(
/*===================================*/
dict_index_t* clust_index, /*!< in: clustered index */
row_prebuilt_t* prebuilt, /*!< in: prebuilt struct */
const rec_t* rec, /*!< in: record in a clustered index */
- ulint** offsets, /*!< in/out: offsets returned by
+ offset_t** offsets, /*!< in/out: offsets returned by
rec_get_offsets(rec, clust_index) */
mem_heap_t** offset_heap, /*!< in/out: memory heap from which
the offsets are allocated */
@@ -729,6 +804,8 @@ row_sel_build_committed_vers_for_mysql(
record does not exist in the view:
i.e., it was freshly inserted
afterwards */
+ dtuple_t** vrow, /*!< out: to be filled with old virtual
+ column version if any */
mtr_t* mtr) /*!< in: mtr */
{
if (prebuilt->old_vers_heap) {
@@ -740,13 +817,13 @@ row_sel_build_committed_vers_for_mysql(
row_vers_build_for_semi_consistent_read(
rec, mtr, clust_index, offsets, offset_heap,
- prebuilt->old_vers_heap, old_vers);
+ prebuilt->old_vers_heap, old_vers, vrow);
}
/*********************************************************************//**
Tests the conditions which determine when the index segment we are searching
through has been exhausted.
-@return TRUE if row passed the tests */
+@return TRUE if row passed the tests */
UNIV_INLINE
ibool
row_sel_test_end_conds(
@@ -782,7 +859,7 @@ row_sel_test_end_conds(
/*********************************************************************//**
Tests the other conditions.
-@return TRUE if row passed the tests */
+@return TRUE if row passed the tests */
UNIV_INLINE
ibool
row_sel_test_other_conds(
@@ -811,7 +888,7 @@ row_sel_test_other_conds(
/*********************************************************************//**
Retrieves the clustered index record corresponding to a record in a
non-clustered index. Does the necessary locking.
-@return DB_SUCCESS or error code */
+@return DB_SUCCESS or error code */
static MY_ATTRIBUTE((nonnull, warn_unused_result))
dberr_t
row_sel_get_clust_rec(
@@ -833,15 +910,15 @@ row_sel_get_clust_rec(
rec_t* old_vers;
dberr_t err;
mem_heap_t* heap = NULL;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- ulint* offsets = offsets_;
+ offset_t offsets_[REC_OFFS_NORMAL_SIZE];
+ offset_t* offsets = offsets_;
rec_offs_init(offsets_);
*out_rec = NULL;
offsets = rec_get_offsets(rec,
btr_pcur_get_btr_cur(&plan->pcur)->index,
- offsets, ULINT_UNDEFINED, &heap);
+ offsets, true, ULINT_UNDEFINED, &heap);
row_build_row_ref_fast(plan->clust_ref, plan->clust_map, rec, offsets);
@@ -876,7 +953,7 @@ row_sel_get_clust_rec(
goto func_exit;
}
- offsets = rec_get_offsets(clust_rec, index, offsets,
+ offsets = rec_get_offsets(clust_rec, index, offsets, true,
ULINT_UNDEFINED, &heap);
if (!node->read_view) {
@@ -900,7 +977,7 @@ row_sel_get_clust_rec(
err = lock_clust_rec_read_check_and_lock(
0, btr_pcur_get_block(&plan->clust_pcur),
clust_rec, index, offsets,
- static_cast<enum lock_mode>(node->row_lock_mode),
+ static_cast<lock_mode>(node->row_lock_mode),
lock_type,
thr);
@@ -957,7 +1034,8 @@ row_sel_get_clust_rec(
|| rec_get_deleted_flag(rec, dict_table_is_comp(
plan->table)))
&& !row_sel_sec_rec_is_for_clust_rec(rec, plan->index,
- clust_rec, index)) {
+ clust_rec, index,
+ thr)) {
goto func_exit;
}
}
@@ -965,7 +1043,7 @@ row_sel_get_clust_rec(
/* Fetch the columns needed in test conditions. The clustered
index record is protected by a page latch that was acquired
when plan->clust_pcur was positioned. The latch will not be
- released until mtr_commit(mtr). */
+ released until mtr->commit(). */
ut_ad(!rec_get_deleted_flag(clust_rec, rec_offs_comp(offsets)));
row_sel_fetch_columns(index, clust_rec, offsets,
@@ -981,23 +1059,194 @@ err_exit:
}
/*********************************************************************//**
+Sets a lock on a page of R-Tree record. This is all or none action,
+mostly due to we cannot reposition a record in R-Tree (with the
+nature of splitting)
+@return DB_SUCCESS, DB_SUCCESS_LOCKED_REC, or error code */
+UNIV_INLINE
+dberr_t
+sel_set_rtr_rec_lock(
+/*=================*/
+ btr_pcur_t* pcur, /*!< in: cursor */
+ const rec_t* first_rec,/*!< in: record */
+ dict_index_t* index, /*!< in: index */
+ const offset_t* offsets,/*!< in: rec_get_offsets(rec, index) */
+ ulint mode, /*!< in: lock mode */
+ ulint type, /*!< in: LOCK_ORDINARY, LOCK_GAP, or
+ LOC_REC_NOT_GAP */
+ que_thr_t* thr, /*!< in: query thread */
+ mtr_t* mtr) /*!< in: mtr */
+{
+ matched_rec_t* match = pcur->btr_cur.rtr_info->matches;
+ mem_heap_t* heap = NULL;
+ dberr_t err = DB_SUCCESS;
+ trx_t* trx = thr_get_trx(thr);
+ buf_block_t* cur_block = btr_pcur_get_block(pcur);
+ offset_t offsets_[REC_OFFS_NORMAL_SIZE];
+ offset_t* my_offsets = const_cast<offset_t*>(offsets);
+ rec_t* rec = const_cast<rec_t*>(first_rec);
+ rtr_rec_vector* match_rec;
+ rtr_rec_vector::iterator end;
+
+ rec_offs_init(offsets_);
+
+ if (match->locked || page_rec_is_supremum(first_rec)) {
+ return(DB_SUCCESS_LOCKED_REC);
+ }
+
+ ut_ad(page_align(first_rec) == cur_block->frame);
+ ut_ad(match->valid);
+
+ rw_lock_x_lock(&(match->block.lock));
+retry:
+ cur_block = btr_pcur_get_block(pcur);
+ ut_ad(rw_lock_own_flagged(&match->block.lock,
+ RW_LOCK_FLAG_X | RW_LOCK_FLAG_S));
+ ut_ad(page_is_leaf(buf_block_get_frame(cur_block)));
+
+ err = lock_sec_rec_read_check_and_lock(
+ 0, cur_block, rec, index, my_offsets,
+ static_cast<lock_mode>(mode), type, thr);
+
+ if (err == DB_LOCK_WAIT) {
+re_scan:
+ mtr->commit();
+ trx->error_state = err;
+ que_thr_stop_for_mysql(thr);
+ thr->lock_state = QUE_THR_LOCK_ROW;
+ if (row_mysql_handle_errors(
+ &err, trx, thr, NULL)) {
+ thr->lock_state = QUE_THR_LOCK_NOLOCK;
+ mtr->start();
+
+ mutex_enter(&match->rtr_match_mutex);
+ if (!match->valid && match->matched_recs->empty()) {
+ mutex_exit(&match->rtr_match_mutex);
+ err = DB_RECORD_NOT_FOUND;
+ goto func_end;
+ }
+ mutex_exit(&match->rtr_match_mutex);
+
+ ulint page_no = page_get_page_no(
+ btr_pcur_get_page(pcur));
+ page_id_t page_id(dict_index_get_space(index),
+ page_no);
+
+ cur_block = buf_page_get_gen(
+ page_id, dict_table_page_size(index->table),
+ RW_X_LATCH, NULL, BUF_GET,
+ __FILE__, __LINE__, mtr, &err);
+ } else {
+ mtr->start();
+ goto func_end;
+ }
+
+ DEBUG_SYNC_C("rtr_set_lock_wait");
+
+ if (!match->valid) {
+ /* Page got deleted */
+ mtr->commit();
+ mtr->start();
+ err = DB_RECORD_NOT_FOUND;
+ goto func_end;
+ }
+
+ match->matched_recs->clear();
+
+ rtr_cur_search_with_match(
+ cur_block, index,
+ pcur->btr_cur.rtr_info->search_tuple,
+ pcur->btr_cur.rtr_info->search_mode,
+ &pcur->btr_cur.page_cur,
+ pcur->btr_cur.rtr_info);
+
+ if (!page_is_leaf(buf_block_get_frame(cur_block))) {
+ /* Page got splitted and promoted (only for
+ root page it is possible). Release the
+ page and ask for a re-search */
+ mtr->commit();
+ mtr->start();
+ err = DB_RECORD_NOT_FOUND;
+ goto func_end;
+ }
+
+ rec = btr_pcur_get_rec(pcur);
+ my_offsets = offsets_;
+ my_offsets = rec_get_offsets(rec, index, my_offsets, true,
+ ULINT_UNDEFINED, &heap);
+
+ /* No match record */
+ if (page_rec_is_supremum(rec) || !match->valid) {
+ mtr->commit();
+ mtr->start();
+ err = DB_RECORD_NOT_FOUND;
+ goto func_end;
+ }
+
+ goto retry;
+ }
+
+ my_offsets = offsets_;
+ match_rec = match->matched_recs;
+ end = match_rec->end();
+
+ for (rtr_rec_vector::iterator it = match_rec->begin();
+ it != end; ++it) {
+ rtr_rec_t* rtr_rec = &(*it);
+
+ my_offsets = rec_get_offsets(
+ rtr_rec->r_rec, index, my_offsets, true,
+ ULINT_UNDEFINED, &heap);
+
+ err = lock_sec_rec_read_check_and_lock(
+ 0, &match->block, rtr_rec->r_rec, index,
+ my_offsets, static_cast<lock_mode>(mode),
+ type, thr);
+
+ if (err == DB_SUCCESS || err == DB_SUCCESS_LOCKED_REC) {
+ rtr_rec->locked = true;
+ } else if (err == DB_LOCK_WAIT) {
+ goto re_scan;
+ } else {
+ goto func_end;
+ }
+ }
+
+ match->locked = true;
+
+func_end:
+ rw_lock_x_unlock(&(match->block.lock));
+ if (heap != NULL) {
+ mem_heap_free(heap);
+ }
+
+ ut_ad(err != DB_LOCK_WAIT);
+
+ return(err);
+}
+
+/*********************************************************************//**
Sets a lock on a record.
-@return DB_SUCCESS, DB_SUCCESS_LOCKED_REC, or error code */
+@return DB_SUCCESS, DB_SUCCESS_LOCKED_REC, or error code */
UNIV_INLINE
dberr_t
sel_set_rec_lock(
/*=============*/
- const buf_block_t* block, /*!< in: buffer block of rec */
+ btr_pcur_t* pcur, /*!< in: cursor */
const rec_t* rec, /*!< in: record */
dict_index_t* index, /*!< in: index */
- const ulint* offsets,/*!< in: rec_get_offsets(rec, index) */
+ const offset_t* offsets,/*!< in: rec_get_offsets(rec, index) */
ulint mode, /*!< in: lock mode */
ulint type, /*!< in: LOCK_ORDINARY, LOCK_GAP, or
LOC_REC_NOT_GAP */
- que_thr_t* thr) /*!< in: query thread */
+ que_thr_t* thr, /*!< in: query thread */
+ mtr_t* mtr) /*!< in: mtr */
{
- trx_t* trx;
- dberr_t err;
+ trx_t* trx;
+ dberr_t err = DB_SUCCESS;
+ const buf_block_t* block;
+
+ block = btr_pcur_get_block(pcur);
trx = thr_get_trx(thr);
@@ -1011,11 +1260,23 @@ sel_set_rec_lock(
if (dict_index_is_clust(index)) {
err = lock_clust_rec_read_check_and_lock(
0, block, rec, index, offsets,
- static_cast<enum lock_mode>(mode), type, thr);
+ static_cast<lock_mode>(mode), type, thr);
} else {
- err = lock_sec_rec_read_check_and_lock(
- 0, block, rec, index, offsets,
- static_cast<enum lock_mode>(mode), type, thr);
+
+ if (dict_index_is_spatial(index)) {
+ if (type == LOCK_GAP || type == LOCK_ORDINARY) {
+ ut_ad(0);
+ ib::error() << "Incorrectly request GAP lock "
+ "on RTree";
+ return(DB_SUCCESS);
+ }
+ err = sel_set_rtr_rec_lock(pcur, rec, index, offsets,
+ mode, type, thr, mtr);
+ } else {
+ err = lock_sec_rec_read_check_and_lock(
+ 0, block, rec, index, offsets,
+ static_cast<lock_mode>(mode), type, thr);
+ }
}
return(err);
@@ -1205,10 +1466,11 @@ plan_reset_cursor(
plan->n_rows_prefetched = 0;
}
+#ifdef BTR_CUR_HASH_ADAPT
/*********************************************************************//**
Tries to do a shortcut to fetch a clustered index record with a unique key,
using the hash index if possible (not always).
-@return SEL_FOUND, SEL_EXHAUSTED, SEL_RETRY */
+@return SEL_FOUND, SEL_EXHAUSTED, SEL_RETRY */
static
ulint
row_sel_try_search_shortcut(
@@ -1217,15 +1479,15 @@ row_sel_try_search_shortcut(
plan_t* plan, /*!< in: plan for a unique search in clustered
index */
ibool search_latch_locked,
- /*!< in: whether the search holds
- btr_search_latch */
+ /*!< in: whether the search holds latch on
+ search system. */
mtr_t* mtr) /*!< in: mtr */
{
dict_index_t* index;
rec_t* rec;
mem_heap_t* heap = NULL;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- ulint* offsets = offsets_;
+ offset_t offsets_[REC_OFFS_NORMAL_SIZE];
+ offset_t* offsets = offsets_;
ulint ret;
rec_offs_init(offsets_);
@@ -1234,11 +1496,8 @@ row_sel_try_search_shortcut(
ut_ad(node->read_view);
ut_ad(plan->unique_search);
ut_ad(!plan->must_get_clust);
-#ifdef UNIV_SYNC_DEBUG
- if (search_latch_locked) {
- ut_ad(rw_lock_own(&btr_search_latch, RW_LOCK_SHARED));
- }
-#endif /* UNIV_SYNC_DEBUG */
+ ut_ad(!search_latch_locked
+ || rw_lock_own(btr_get_search_latch(index), RW_LOCK_S));
row_sel_open_pcur(plan, search_latch_locked, mtr);
@@ -1263,7 +1522,8 @@ row_sel_try_search_shortcut(
/* This is a non-locking consistent read: if necessary, fetch
a previous version of the record */
- offsets = rec_get_offsets(rec, index, offsets, ULINT_UNDEFINED, &heap);
+ offsets = rec_get_offsets(rec, index, offsets, true,
+ ULINT_UNDEFINED, &heap);
if (dict_index_is_clust(index)) {
if (!lock_clust_rec_cons_read_sees(rec, index, offsets,
@@ -1271,7 +1531,9 @@ row_sel_try_search_shortcut(
ret = SEL_RETRY;
goto func_exit;
}
- } else if (!lock_sec_rec_cons_read_sees(rec, node->read_view)) {
+ } else if (!srv_read_only_mode
+ && !lock_sec_rec_cons_read_sees(
+ rec, index, node->read_view)) {
ret = SEL_RETRY;
goto func_exit;
@@ -1288,7 +1550,7 @@ row_sel_try_search_shortcut(
/* Fetch the columns needed in test conditions. The index
record is protected by a page latch that was acquired when
plan->pcur was positioned. The latch will not be released
- until mtr_commit(mtr). */
+ until mtr->commit(). */
row_sel_fetch_columns(index, rec, offsets,
UT_LIST_GET_FIRST(plan->columns));
@@ -1311,11 +1573,12 @@ func_exit:
}
return(ret);
}
+#endif /* BTR_CUR_HASH_ADAPT */
/*********************************************************************//**
Performs a select step.
-@return DB_SUCCESS or error code */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
+@return DB_SUCCESS or error code */
+static MY_ATTRIBUTE((warn_unused_result))
dberr_t
row_sel(
/*====*/
@@ -1329,7 +1592,6 @@ row_sel(
rec_t* rec;
rec_t* old_vers;
rec_t* clust_rec;
- ibool search_latch_locked;
ibool consistent_read;
/* The following flag becomes TRUE when we are doing a
@@ -1348,16 +1610,19 @@ row_sel(
contains a clustered index latch, and
&mtr must be committed before we move
to the next non-clustered record */
- ulint found_flag;
dberr_t err;
mem_heap_t* heap = NULL;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- ulint* offsets = offsets_;
+ offset_t offsets_[REC_OFFS_NORMAL_SIZE];
+ offset_t* offsets = offsets_;
rec_offs_init(offsets_);
ut_ad(thr->run_node == node);
- search_latch_locked = FALSE;
+#ifdef BTR_CUR_HASH_ADAPT
+ ibool search_latch_locked = FALSE;
+#else /* BTR_CUR_HASH_ADAPT */
+# define search_latch_locked false
+#endif /* BTR_CUR_HASH_ADAPT */
if (node->read_view) {
/* In consistent reads, we try to do with the hash index and
@@ -1402,16 +1667,17 @@ table_loop:
/* Open a cursor to index, or restore an open cursor position */
- mtr_start(&mtr);
+ mtr.start();
+#ifdef BTR_CUR_HASH_ADAPT
if (consistent_read && plan->unique_search && !plan->pcur_is_open
- && !plan->must_get_clust
- && !plan->table->big_rows) {
+ && !plan->must_get_clust) {
if (!search_latch_locked) {
- rw_lock_s_lock(&btr_search_latch);
+ btr_search_s_lock(index);
search_latch_locked = TRUE;
- } else if (rw_lock_get_writer(&btr_search_latch) == RW_LOCK_WAIT_EX) {
+ } else if (rw_lock_get_writer(btr_get_search_latch(index))
+ == RW_LOCK_X_WAIT) {
/* There is an x-latch request waiting: release the
s-latch for a moment; as an s-latch here is often
@@ -1420,36 +1686,35 @@ table_loop:
from acquiring an s-latch for a long time, lowering
performance significantly in multiprocessors. */
- rw_lock_s_unlock(&btr_search_latch);
- rw_lock_s_lock(&btr_search_latch);
+ btr_search_s_unlock(index);
+ btr_search_s_lock(index);
}
- found_flag = row_sel_try_search_shortcut(node, plan,
- search_latch_locked,
- &mtr);
-
- if (found_flag == SEL_FOUND) {
-
+ switch (row_sel_try_search_shortcut(node, plan,
+ search_latch_locked,
+ &mtr)) {
+ case SEL_FOUND:
goto next_table;
-
- } else if (found_flag == SEL_EXHAUSTED) {
-
+ case SEL_EXHAUSTED:
goto table_exhausted;
+ default:
+ ut_ad(0);
+ case SEL_RETRY:
+ break;
}
- ut_ad(found_flag == SEL_RETRY);
-
plan_reset_cursor(plan);
- mtr_commit(&mtr);
- mtr_start(&mtr);
+ mtr.commit();
+ mtr.start();
}
if (search_latch_locked) {
- rw_lock_s_unlock(&btr_search_latch);
+ btr_search_s_unlock(index);
search_latch_locked = FALSE;
}
+#endif /* BTR_CUR_HASH_ADAPT */
if (!plan->pcur_is_open) {
/* Evaluate the expressions to build the search tuple and
@@ -1498,6 +1763,9 @@ rec_loop:
if (!node->asc && cursor_just_opened
&& !page_rec_is_supremum(rec)) {
+ /* Do not support "descending search" for Spatial index */
+ ut_ad(!dict_index_is_spatial(index));
+
/* When we open a cursor for a descending search, we must set
a next-key lock on the successor record: otherwise it would
be possible to insert new records next to the cursor position,
@@ -1512,6 +1780,7 @@ rec_loop:
trx = thr_get_trx(thr);
offsets = rec_get_offsets(next_rec, index, offsets,
+ true,
ULINT_UNDEFINED, &heap);
/* If innodb_locks_unsafe_for_binlog option is used
@@ -1532,10 +1801,10 @@ rec_loop:
lock_type = LOCK_ORDINARY;
}
- err = sel_set_rec_lock(btr_pcur_get_block(&plan->pcur),
+ err = sel_set_rec_lock(&plan->pcur,
next_rec, index, offsets,
node->row_lock_mode,
- lock_type, thr);
+ lock_type, thr, &mtr);
switch (err) {
case DB_SUCCESS_LOCKED_REC:
@@ -1570,7 +1839,7 @@ skip_lock:
ulint lock_type;
trx_t* trx;
- offsets = rec_get_offsets(rec, index, offsets,
+ offsets = rec_get_offsets(rec, index, offsets, true,
ULINT_UNDEFINED, &heap);
trx = thr_get_trx(thr);
@@ -1580,7 +1849,8 @@ skip_lock:
we lock only the record, i.e., next-key locking is
not used. */
if (srv_locks_unsafe_for_binlog
- || trx->isolation_level <= TRX_ISO_READ_COMMITTED) {
+ || trx->isolation_level <= TRX_ISO_READ_COMMITTED
+ || dict_index_is_spatial(index)) {
if (page_rec_is_supremum(rec)) {
@@ -1592,9 +1862,10 @@ skip_lock:
lock_type = LOCK_ORDINARY;
}
- err = sel_set_rec_lock(btr_pcur_get_block(&plan->pcur),
+ err = sel_set_rec_lock(&plan->pcur,
rec, index, offsets,
- node->row_lock_mode, lock_type, thr);
+ node->row_lock_mode, lock_type,
+ thr, &mtr);
switch (err) {
case DB_SUCCESS_LOCKED_REC:
@@ -1655,7 +1926,8 @@ skip_lock:
/* PHASE 3: Get previous version in a consistent read */
cons_read_requires_clust_rec = FALSE;
- offsets = rec_get_offsets(rec, index, offsets, ULINT_UNDEFINED, &heap);
+ offsets = rec_get_offsets(rec, index, offsets, true,
+ ULINT_UNDEFINED, &heap);
if (consistent_read) {
/* This is a non-locking consistent read: if necessary, fetch
@@ -1663,8 +1935,8 @@ skip_lock:
if (dict_index_is_clust(index)) {
- if (!lock_clust_rec_cons_read_sees(rec, index, offsets,
- node->read_view)) {
+ if (!lock_clust_rec_cons_read_sees(
+ rec, index, offsets, node->read_view)) {
err = row_sel_build_prev_vers(
node->read_view, index, rec,
@@ -1685,7 +1957,7 @@ skip_lock:
exhausted. */
offsets = rec_get_offsets(
- rec, index, offsets,
+ rec, index, offsets, true,
ULINT_UNDEFINED, &heap);
/* Fetch the columns needed in
@@ -1695,7 +1967,7 @@ skip_lock:
by row_sel_open_pcur() or
row_sel_restore_pcur_pos().
The latch will not be released
- until mtr_commit(mtr). */
+ until mtr.commit(). */
row_sel_fetch_columns(
index, rec, offsets,
@@ -1712,8 +1984,10 @@ skip_lock:
rec = old_vers;
}
- } else if (!lock_sec_rec_cons_read_sees(rec,
- node->read_view)) {
+ } else if (!srv_read_only_mode
+ && !lock_sec_rec_cons_read_sees(
+ rec, index, node->read_view)) {
+
cons_read_requires_clust_rec = TRUE;
}
}
@@ -1723,7 +1997,7 @@ skip_lock:
/* Fetch the columns needed in test conditions. The record is
protected by a page latch that was acquired by
row_sel_open_pcur() or row_sel_restore_pcur_pos(). The latch
- will not be released until mtr_commit(mtr). */
+ will not be released until mtr.commit(). */
row_sel_fetch_columns(index, rec, offsets,
UT_LIST_GET_FIRST(plan->columns));
@@ -1787,6 +2061,11 @@ skip_lock:
if (rec_get_deleted_flag(clust_rec,
dict_table_is_comp(plan->table))) {
+ /* In delete-marked records, DB_TRX_ID must
+ always refer to an existing update_undo log record. */
+ ut_ad(rec_get_trx_id(clust_rec,
+ dict_table_get_first_index(
+ plan->table)));
/* The record is delete marked: we can skip it */
@@ -1819,8 +2098,7 @@ skip_lock:
ut_ad(plan->pcur.latch_mode == BTR_SEARCH_LEAF);
if ((plan->n_rows_fetched <= SEL_PREFETCH_LIMIT)
- || plan->unique_search || plan->no_prefetch
- || plan->table->big_rows) {
+ || plan->unique_search || plan->no_prefetch) {
/* No prefetch in operation: go to the next table */
@@ -1885,7 +2163,7 @@ next_table:
btr_pcur_store_position(&(plan->pcur), &mtr);
}
- mtr_commit(&mtr);
+ mtr.commit();
mtr_has_extra_clust_latch = FALSE;
@@ -1925,7 +2203,7 @@ table_exhausted:
plan->cursor_at_end = TRUE;
- mtr_commit(&mtr);
+ mtr.commit();
mtr_has_extra_clust_latch = FALSE;
@@ -1975,11 +2253,9 @@ stop_for_a_while:
plan->stored_cursor_rec_processed = FALSE;
btr_pcur_store_position(&(plan->pcur), &mtr);
- mtr_commit(&mtr);
+ mtr.commit();
+ ut_ad(!sync_check_iterate(sync_check()));
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(sync_thread_levels_empty_except_dict());
-#endif /* UNIV_SYNC_DEBUG */
err = DB_SUCCESS;
goto func_exit;
@@ -1993,13 +2269,10 @@ commit_mtr_for_a_while:
ut_ad(!search_latch_locked);
btr_pcur_store_position(&(plan->pcur), &mtr);
- mtr_commit(&mtr);
+ mtr.commit();
mtr_has_extra_clust_latch = FALSE;
-
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(sync_thread_levels_empty_except_dict());
-#endif /* UNIV_SYNC_DEBUG */
+ ut_ad(!sync_check_iterate(dict_sync_check()));
goto table_loop;
@@ -2012,17 +2285,17 @@ lock_wait_or_error:
plan->stored_cursor_rec_processed = FALSE;
btr_pcur_store_position(&(plan->pcur), &mtr);
- mtr_commit(&mtr);
-
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(sync_thread_levels_empty_except_dict());
-#endif /* UNIV_SYNC_DEBUG */
+ mtr.commit();
func_exit:
+#ifdef BTR_CUR_HASH_ADAPT
if (search_latch_locked) {
- rw_lock_s_unlock(&btr_search_latch);
+ btr_search_s_unlock(index);
}
- if (UNIV_LIKELY_NULL(heap)) {
+#endif /* BTR_CUR_HASH_ADAPT */
+ ut_ad(!sync_check_iterate(dict_sync_check()));
+
+ if (heap != NULL) {
mem_heap_free(heap);
}
return(err);
@@ -2031,8 +2304,7 @@ func_exit:
/**********************************************************************//**
Performs a select step. This is a high-level function used in SQL execution
graphs.
-@return query thread to run next or NULL */
-UNIV_INTERN
+@return query thread to run next or NULL */
que_thr_t*
row_sel_step(
/*=========*/
@@ -2060,17 +2332,23 @@ row_sel_step(
/* It may be that the current session has not yet started
its transaction, or it has been committed: */
- trx_start_if_not_started_xa(thr_get_trx(thr));
+ trx_start_if_not_started_xa(thr_get_trx(thr), false);
plan_reset_cursor(sel_node_get_nth_plan(node, 0));
if (node->consistent_read) {
/* Assign a read view for the query */
- node->read_view = trx_assign_read_view(
- thr_get_trx(thr));
+ trx_assign_read_view(thr_get_trx(thr));
+
+ if (thr_get_trx(thr)->read_view != NULL) {
+ node->read_view = thr_get_trx(thr)->read_view;
+ } else {
+ node->read_view = NULL;
+ }
+
} else {
sym_node_t* table_node;
- enum lock_mode i_lock_mode;
+ lock_mode i_lock_mode;
if (node->set_x_locks) {
i_lock_mode = LOCK_IX;
@@ -2137,8 +2415,7 @@ row_sel_step(
/**********************************************************************//**
Performs a fetch for a cursor.
-@return query thread to run next or NULL */
-UNIV_INTERN
+@return query thread to run next or NULL */
que_thr_t*
fetch_step(
/*=======*/
@@ -2185,8 +2462,7 @@ fetch_step(
sel_node->common.parent = node;
if (sel_node->state == SEL_NODE_CLOSED) {
- fprintf(stderr,
- "InnoDB: Error: fetch called on a closed cursor\n");
+ ib::error() << "fetch called on a closed cursor";
thr_get_trx(thr)->error_state = DB_ERROR;
@@ -2198,52 +2474,9 @@ fetch_step(
return(thr);
}
-/****************************************************************//**
-Sample callback function for fetch that prints each row.
-@return always returns non-NULL */
-UNIV_INTERN
-void*
-row_fetch_print(
-/*============*/
- void* row, /*!< in: sel_node_t* */
- void* user_arg) /*!< in: not used */
-{
- que_node_t* exp;
- ulint i = 0;
- sel_node_t* node = static_cast<sel_node_t*>(row);
-
- UT_NOT_USED(user_arg);
-
- fprintf(stderr, "row_fetch_print: row %p\n", row);
-
- for (exp = node->select_list;
- exp != 0;
- exp = que_node_get_next(exp), i++) {
-
- dfield_t* dfield = que_node_get_val(exp);
- const dtype_t* type = dfield_get_type(dfield);
-
- fprintf(stderr, " column %lu:\n", (ulong) i);
-
- dtype_print(type);
- putc('\n', stderr);
-
- if (dfield_get_len(dfield) != UNIV_SQL_NULL) {
- ut_print_buf(stderr, dfield_get_data(dfield),
- dfield_get_len(dfield));
- putc('\n', stderr);
- } else {
- fputs(" <NULL>;\n", stderr);
- }
- }
-
- return((void*)42);
-}
-
/***********************************************************//**
Prints a row in a select result.
-@return query thread to run next or NULL */
-UNIV_INTERN
+@return query thread to run next or NULL */
que_thr_t*
row_printf_step(
/*============*/
@@ -2310,7 +2543,6 @@ the parameter key_len. But currently we do not allow search keys where the
last field is only a prefix of the full key field len and print a warning if
such appears. A counterpart of this function is
ha_innobase::store_key_val_for_row() in ha_innodb.cc. */
-UNIV_INTERN
void
row_sel_convert_mysql_key_to_innobase(
/*==================================*/
@@ -2392,30 +2624,42 @@ row_sel_convert_mysql_key_to_innobase(
}
/* Calculate data length and data field total length */
+ if (DATA_LARGE_MTYPE(type) || DATA_GEOMETRY_MTYPE(type)) {
+
+ /* For R-tree index, data length should be the
+ total size of the wkb data.*/
+ if (dict_index_is_spatial(index)) {
+ ut_ad(DATA_GEOMETRY_MTYPE(type));
+ data_len = key_len;
+ data_field_len = data_offset + data_len;
+ } else {
+ /* The key field is a column prefix of a BLOB
+ or TEXT. */
+
+ ut_a(field->prefix_len > 0);
+
+ /* MySQL stores the actual data length to the
+ first 2 bytes after the optional SQL NULL
+ marker byte. The storage format is
+ little-endian, that is, the most significant
+ byte at a higher address. In UTF-8, MySQL
+ seems to reserve field->prefix_len bytes for
+ storing this field in the key value buffer,
+ even though the actual value only takes data
+ len bytes from the start. */
+
+ data_len = key_ptr[data_offset]
+ + 256 * key_ptr[data_offset + 1];
+ data_field_len = data_offset + 2
+ + field->prefix_len;
+
+ data_offset += 2;
+
+ /* Now that we know the length, we store the
+ column value like it would be a fixed char
+ field */
+ }
- if (type == DATA_BLOB) {
- /* The key field is a column prefix of a BLOB or
- TEXT */
-
- ut_a(field->prefix_len > 0);
-
- /* MySQL stores the actual data length to the first 2
- bytes after the optional SQL NULL marker byte. The
- storage format is little-endian, that is, the most
- significant byte at a higher address. In UTF-8, MySQL
- seems to reserve field->prefix_len bytes for
- storing this field in the key value buffer, even
- though the actual value only takes data_len bytes
- from the start. */
-
- data_len = key_ptr[data_offset]
- + 256 * key_ptr[data_offset + 1];
- data_field_len = data_offset + 2 + field->prefix_len;
-
- data_offset += 2;
-
- /* Now that we know the length, we store the column
- value like it would be a fixed char field */
} else if (field->prefix_len > 0) {
/* Looks like MySQL pads unused end bytes in the
@@ -2436,10 +2680,9 @@ row_sel_convert_mysql_key_to_innobase(
data_field_len = data_offset + data_len;
}
- if (UNIV_UNLIKELY
- (dtype_get_mysql_type(dfield_get_type(dfield))
+ if ((dtype_get_mysql_type(dfield_get_type(dfield))
== DATA_MYSQL_TRUE_VARCHAR)
- && UNIV_LIKELY(type != DATA_INT)) {
+ && (type != DATA_INT)) {
/* In a MySQL key value format, a true VARCHAR is
always preceded by 2 bytes of a length field.
dfield_get_type(dfield)->len returns the maximum
@@ -2476,19 +2719,14 @@ row_sel_convert_mysql_key_to_innobase(
trick to calculate LIKE 'abc%' type queries there
should never be partial-field prefixes in searches. */
- ut_print_timestamp(stderr);
-
- fputs(" InnoDB: Warning: using a partial-field"
- " key prefix in search.\n"
- "InnoDB: ", stderr);
- dict_index_name_print(stderr, trx, index);
- fprintf(stderr, ". Last data field length %lu bytes,\n"
- "InnoDB: key ptr now exceeds"
- " key end by %lu bytes.\n"
- "InnoDB: Key value in the MySQL format:\n",
- (ulong) data_field_len,
- (ulong) (key_ptr - key_end));
- fflush(stderr);
+ ib::warn() << "Using a partial-field key prefix in"
+ " search, index " << index->name
+ << " of table " << index->table->name
+ << ". Last data field length "
+ << data_field_len << " bytes, key ptr now"
+ " exceeds key end by " << (key_ptr - key_end)
+ << " bytes. Key value in the MySQL format:";
+
ut_print_buf(stderr, original_key_ptr, key_len);
putc('\n', stderr);
@@ -2497,7 +2735,7 @@ row_sel_convert_mysql_key_to_innobase(
dfield_set_len(dfield, len
- (ulint) (key_ptr - key_end));
}
- ut_ad(0);
+ ut_ad(0);
}
n_fields++;
@@ -2522,7 +2760,7 @@ row_sel_store_row_id_to_prebuilt(
row_prebuilt_t* prebuilt, /*!< in/out: prebuilt */
const rec_t* index_rec, /*!< in: record */
const dict_index_t* index, /*!< in: index of the record */
- const ulint* offsets) /*!< in: rec_get_offsets
+ const offset_t* offsets) /*!< in: rec_get_offsets
(index_rec, index) */
{
const byte* data;
@@ -2535,14 +2773,14 @@ row_sel_store_row_id_to_prebuilt(
dict_index_get_sys_col_pos(index, DATA_ROW_ID), &len);
if (UNIV_UNLIKELY(len != DATA_ROW_ID_LEN)) {
- fprintf(stderr,
- "InnoDB: Error: Row id field is"
- " wrong length %lu in ", (ulong) len);
- dict_index_name_print(stderr, prebuilt->trx, index);
- fprintf(stderr, "\n"
- "InnoDB: Field number %lu, record:\n",
- (ulong) dict_index_get_sys_col_pos(index,
- DATA_ROW_ID));
+
+ ib::error() << "Row id field is wrong length " << len << " in"
+ " index " << index->name
+ << " of table " << index->table->name
+ << ", Field number "
+ << dict_index_get_sys_col_pos(index, DATA_ROW_ID)
+ << ", record:";
+
rec_print_new(stderr, index_rec, offsets);
putc('\n', stderr);
ut_error;
@@ -2551,32 +2789,9 @@ row_sel_store_row_id_to_prebuilt(
ut_memcpy(prebuilt->row_id, data, len);
}
-#ifdef UNIV_DEBUG
-/** Convert a non-SQL-NULL field from Innobase format to MySQL format. */
-# define row_sel_field_store_in_mysql_format(dest,templ,idx,field,src,len) \
- row_sel_field_store_in_mysql_format_func(dest,templ,idx,field,src,len)
-#else /* UNIV_DEBUG */
-/** Convert a non-SQL-NULL field from Innobase format to MySQL format. */
-# define row_sel_field_store_in_mysql_format(dest,templ,idx,field,src,len) \
- row_sel_field_store_in_mysql_format_func(dest,templ,src,len)
-#endif /* UNIV_DEBUG */
-
-/** Stores a non-SQL-NULL field in the MySQL format. The counterpart of this
-function is row_mysql_store_col_in_innobase_format() in row0mysql.cc.
-@param[in,out] dest buffer where to store; NOTE
- that BLOBs are not in themselves stored
- here: the caller must allocate and copy
- the BLOB into buffer before, and pass
- the pointer to the BLOB in 'data'
-@param[in] templ MySQL column template. Its following fields
- are referenced: type, is_unsigned, mysql_col_len,
- mbminlen, mbmaxlen
-@param[in] index InnoDB index
-@param[in] field_no templ->rec_field_no or templ->clust_rec_field_no
- or templ->icp_rec_field_no
-@param[in] data data to store
-@param[in] len length of the data */
-static MY_ATTRIBUTE((nonnull))
+/**************************************************************//**
+Stores a non-SQL-NULL field in the MySQL format. The counterpart of this
+function is row_mysql_store_col_in_innobase_format() in row0mysql.cc. */
void
row_sel_field_store_in_mysql_format_func(
byte* dest,
@@ -2591,7 +2806,8 @@ row_sel_field_store_in_mysql_format_func(
byte* ptr;
#ifdef UNIV_DEBUG
const dict_field_t* field
- = dict_index_get_nth_field(index, field_no);
+ = templ->is_virtual
+ ? NULL : dict_index_get_nth_field(index, field_no);
#endif /* UNIV_DEBUG */
ut_ad(len != UNIV_SQL_NULL);
@@ -2683,6 +2899,11 @@ row_sel_field_store_in_mysql_format_func(
len);
break;
+ case DATA_GEOMETRY:
+ /* We store all geometry data as BLOB data at server layer. */
+ row_mysql_store_geometry(dest, templ->mysql_col_len, data, len);
+ break;
+
case DATA_MYSQL:
memcpy(dest, data, len);
@@ -2710,7 +2931,8 @@ row_sel_field_store_in_mysql_format_func(
&& field->prefix_len > 0)
|| templ->rec_field_is_prefix);
- ut_ad(!(field->prefix_len % templ->mbmaxlen));
+ ut_ad(templ->is_virtual
+ || !(field->prefix_len % templ->mbmaxlen));
if (templ->mbminlen == 1 && templ->mbmaxlen != 1) {
/* Pad with spaces. This undoes the stripping
@@ -2736,9 +2958,10 @@ row_sel_field_store_in_mysql_format_func(
case DATA_DECIMAL:
/* Above are the valid column types for MySQL data. */
#endif /* UNIV_DEBUG */
- ut_ad(field->prefix_len
- ? field->prefix_len == len
- : templ->mysql_col_len == len);
+ ut_ad((templ->is_virtual && !field)
+ || (field && field->prefix_len
+ ? field->prefix_len == len
+ : templ->mysql_col_len == len));
memcpy(dest, data, len);
}
}
@@ -2773,10 +2996,12 @@ row_sel_store_mysql_field_func(
#ifdef UNIV_DEBUG
const dict_index_t* index,
#endif
- const ulint* offsets,
+ const offset_t* offsets,
ulint field_no,
const mysql_row_templ_t*templ)
{
+ DBUG_ENTER("row_sel_store_mysql_field_func");
+
const byte* data;
ulint len;
@@ -2794,10 +3019,9 @@ row_sel_store_mysql_field_func(
mem_heap_t* heap;
/* Copy an externally stored field to a temporary heap */
- ut_a(!prebuilt->trx->has_search_latch);
ut_ad(field_no == templ->clust_rec_field_no);
- if (UNIV_UNLIKELY(templ->type == DATA_BLOB)) {
+ if (DATA_LARGE_MTYPE(templ->type)) {
if (prebuilt->blob_heap == NULL) {
prebuilt->blob_heap = mem_heap_create(
UNIV_PAGE_SIZE);
@@ -2814,7 +3038,7 @@ row_sel_store_mysql_field_func(
data = btr_rec_copy_externally_stored_field(
rec, offsets,
- dict_table_zip_size(prebuilt->table),
+ dict_table_page_size(prebuilt->table),
field_no, &len, heap);
if (UNIV_UNLIKELY(!data)) {
@@ -2829,7 +3053,7 @@ row_sel_store_mysql_field_func(
ut_a(prebuilt->trx->isolation_level
== TRX_ISO_READ_UNCOMMITTED);
- return(FALSE);
+ DBUG_RETURN(FALSE);
}
ut_a(len != UNIV_SQL_NULL);
@@ -2860,10 +3084,11 @@ row_sel_store_mysql_field_func(
(const byte*) prebuilt->default_rec
+ templ->mysql_col_offset,
templ->mysql_col_len);
- return(TRUE);
+ DBUG_RETURN(TRUE);
}
- if (UNIV_UNLIKELY(templ->type == DATA_BLOB)) {
+ if (DATA_LARGE_MTYPE(templ->type)
+ || DATA_GEOMETRY_MTYPE(templ->type)) {
/* It is a BLOB field locally stored in the
InnoDB record: we MUST copy its contents to
@@ -2878,6 +3103,8 @@ row_sel_store_mysql_field_func(
if (prebuilt->blob_heap == NULL) {
prebuilt->blob_heap = mem_heap_create(
UNIV_PAGE_SIZE);
+ DBUG_PRINT("anna", ("blob_heap allocated: %p",
+ prebuilt->blob_heap));
}
data = static_cast<byte*>(
@@ -2898,45 +3125,115 @@ row_sel_store_mysql_field_func(
&= ~(byte) templ->mysql_null_bit_mask;
}
- return(TRUE);
+ DBUG_RETURN(TRUE);
}
/** Convert a row in the Innobase format to a row in the MySQL format.
Note that the template in prebuilt may advise us to copy only a few
columns to mysql_rec, other columns are left blank. All columns may not
be needed in the query.
-@param[out] mysql_rec row in the MySQL format
-@param[in] prebuilt prebuilt structure
-@param[in] rec Innobase record in the index
- which was described in prebuilt's
- template, or in the clustered index;
- must be protected by a page latch
-@param[in] rec_clust TRUE if the rec in the clustered index
-@param[in] index index of rec
-@param[in] offsets array returned by rec_get_offsets(rec)
-@return TRUE on success, FALSE if not all columns could be retrieved */
-static MY_ATTRIBUTE((warn_unused_result))
-ibool
-row_sel_store_mysql_rec(
+@param[out] mysql_rec row in the MySQL format
+@param[in] prebuilt cursor
+@param[in] rec Innobase record in the index
+ which was described in prebuilt's
+ template, or in the clustered index;
+ must be protected by a page latch
+@param[in] vrow virtual columns
+@param[in] rec_clust whether index must be the clustered index
+@param[in] index index of rec
+@param[in] offsets array returned by rec_get_offsets(rec)
+@retval true on success
+@retval false if not all columns could be retrieved */
+MY_ATTRIBUTE((warn_unused_result))
+static bool row_sel_store_mysql_rec(
byte* mysql_rec,
row_prebuilt_t* prebuilt,
const rec_t* rec,
- ibool rec_clust,
+ const dtuple_t* vrow,
+ bool rec_clust,
const dict_index_t* index,
- const ulint* offsets)
+ const offset_t* offsets)
{
- ulint i;
+ DBUG_ENTER("row_sel_store_mysql_rec");
ut_ad(rec_clust || index == prebuilt->index);
ut_ad(!rec_clust || dict_index_is_clust(index));
if (UNIV_LIKELY_NULL(prebuilt->blob_heap)) {
- mem_heap_free(prebuilt->blob_heap);
- prebuilt->blob_heap = NULL;
+ row_mysql_prebuilt_free_blob_heap(prebuilt);
}
- for (i = 0; i < prebuilt->n_template; i++) {
+ for (ulint i = 0; i < prebuilt->n_template; i++) {
const mysql_row_templ_t*templ = &prebuilt->mysql_template[i];
+
+ if (templ->is_virtual && dict_index_is_clust(index)) {
+ /* Virtual columns are never declared NOT NULL. */
+ ut_ad(templ->mysql_null_bit_mask);
+
+ /* Skip virtual columns if it is not a covered
+ search or virtual key read is not requested. */
+ if (!rec_clust
+ || !prebuilt->index->has_virtual()
+ || (!prebuilt->read_just_key
+ && !prebuilt->m_read_virtual_key)) {
+ /* Initialize the NULL bit. */
+ mysql_rec[templ->mysql_null_byte_offset]
+ |= (byte) templ->mysql_null_bit_mask;
+ continue;
+ }
+
+ dict_v_col_t* col;
+ col = dict_table_get_nth_v_col(
+ index->table, templ->clust_rec_field_no);
+
+ ut_ad(vrow);
+
+ const dfield_t* dfield = dtuple_get_nth_v_field(
+ vrow, col->v_pos);
+
+ /* If this is a partitioned table, it might request
+ InnoDB to fill out virtual column data for serach
+ index key values while other non key columns are also
+ getting selected. The non-key virtual columns may
+ not be materialized and we should skip them. */
+ if (dfield_get_type(dfield)->mtype == DATA_MISSING) {
+#ifdef UNIV_DEBUG
+ ulint prefix;
+#endif /* UNIV_DEBUG */
+ ut_ad(prebuilt->m_read_virtual_key);
+
+ /* If it is part of index key the data should
+ have been materialized. */
+ ut_ad(dict_index_get_nth_col_or_prefix_pos(
+ prebuilt->index, col->v_pos, false,
+ true, &prefix) == ULINT_UNDEFINED);
+
+ continue;
+ }
+
+ if (dfield->len == UNIV_SQL_NULL) {
+ mysql_rec[templ->mysql_null_byte_offset]
+ |= (byte) templ->mysql_null_bit_mask;
+ memcpy(mysql_rec
+ + templ->mysql_col_offset,
+ (const byte*) prebuilt->default_rec
+ + templ->mysql_col_offset,
+ templ->mysql_col_len);
+ } else {
+ row_sel_field_store_in_mysql_format(
+ mysql_rec + templ->mysql_col_offset,
+ templ, index, templ->clust_rec_field_no,
+ (const byte*)dfield->data, dfield->len);
+ if (templ->mysql_null_bit_mask) {
+ mysql_rec[
+ templ->mysql_null_byte_offset]
+ &= ~(byte) templ->mysql_null_bit_mask;
+ }
+ }
+
+ continue;
+ }
+
const ulint field_no
= rec_clust
? templ->clust_rec_field_no
@@ -2954,36 +3251,39 @@ row_sel_store_mysql_rec(
if (!row_sel_store_mysql_field(mysql_rec, prebuilt,
rec, index, offsets,
field_no, templ)) {
- return(FALSE);
+
+ DBUG_RETURN(false);
}
}
/* FIXME: We only need to read the doc_id if an FTS indexed
column is being updated.
- NOTE, the record must be cluster index record. Secondary index
- might not have the Doc ID */
- if (dict_table_has_fts_index(prebuilt->table)
- && dict_index_is_clust(index)) {
-
- prebuilt->fts_doc_id = fts_get_doc_id_from_rec(
- prebuilt->table, rec, NULL);
+ NOTE, the record can be cluster or secondary index record.
+ if secondary index is used then FTS_DOC_ID column should be part
+ of this index. */
+ if (dict_table_has_fts_index(prebuilt->table)) {
+ if (dict_index_is_clust(index)
+ || prebuilt->fts_doc_id_in_read_set) {
+ prebuilt->fts_doc_id = fts_get_doc_id_from_rec(
+ prebuilt->table, rec, index, NULL);
+ }
}
- return(TRUE);
+ DBUG_RETURN(true);
}
/*********************************************************************//**
Builds a previous version of a clustered index record for a consistent read
-@return DB_SUCCESS or error code */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
+@return DB_SUCCESS or error code */
+static MY_ATTRIBUTE((warn_unused_result))
dberr_t
row_sel_build_prev_vers_for_mysql(
/*==============================*/
- read_view_t* read_view, /*!< in: read view */
+ ReadView* read_view, /*!< in: read view */
dict_index_t* clust_index, /*!< in: clustered index */
row_prebuilt_t* prebuilt, /*!< in: prebuilt struct */
const rec_t* rec, /*!< in: record in a clustered index */
- ulint** offsets, /*!< in/out: offsets returned by
+ offset_t** offsets, /*!< in/out: offsets returned by
rec_get_offsets(rec, clust_index) */
mem_heap_t** offset_heap, /*!< in/out: memory heap from which
the offsets are allocated */
@@ -2991,6 +3291,8 @@ row_sel_build_prev_vers_for_mysql(
record does not exist in the view:
i.e., it was freshly inserted
afterwards */
+ dtuple_t** vrow, /*!< out: dtuple to hold old virtual
+ column data */
mtr_t* mtr) /*!< in: mtr */
{
dberr_t err;
@@ -3003,18 +3305,33 @@ row_sel_build_prev_vers_for_mysql(
err = row_vers_build_for_consistent_read(
rec, mtr, clust_index, offsets, read_view, offset_heap,
- prebuilt->old_vers_heap, old_vers);
+ prebuilt->old_vers_heap, old_vers, vrow);
return(err);
}
+/** Helper class to cache clust_rec and old_ver */
+class Row_sel_get_clust_rec_for_mysql
+{
+ const rec_t *cached_clust_rec;
+ rec_t *cached_old_vers;
+
+public:
+ Row_sel_get_clust_rec_for_mysql() :
+ cached_clust_rec(NULL), cached_old_vers(NULL) {}
+
+ dberr_t operator()(row_prebuilt_t *prebuilt, dict_index_t *sec_index,
+ const rec_t *rec, que_thr_t *thr, const rec_t **out_rec,
+ offset_t **offsets, mem_heap_t **offset_heap,
+ dtuple_t **vrow, mtr_t *mtr);
+};
+
/*********************************************************************//**
Retrieves the clustered index record corresponding to a record in a
non-clustered index. Does the necessary locking. Used in the MySQL
interface.
-@return DB_SUCCESS, DB_SUCCESS_LOCKED_REC, or error code */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
+@return DB_SUCCESS, DB_SUCCESS_LOCKED_REC, or error code */
dberr_t
-row_sel_get_clust_rec_for_mysql(
+Row_sel_get_clust_rec_for_mysql::operator()(
/*============================*/
row_prebuilt_t* prebuilt,/*!< in: prebuilt struct in the handle */
dict_index_t* sec_index,/*!< in: secondary index where rec resides */
@@ -3027,12 +3344,13 @@ row_sel_get_clust_rec_for_mysql(
it, NULL if the old version did not exist
in the read view, i.e., it was a fresh
inserted version */
- ulint** offsets,/*!< in: offsets returned by
+ offset_t** offsets,/*!< in: offsets returned by
rec_get_offsets(rec, sec_index);
out: offsets returned by
rec_get_offsets(out_rec, clust_index) */
mem_heap_t** offset_heap,/*!< in/out: memory heap from which
the offsets are allocated */
+ dtuple_t** vrow, /*!< out: virtual column to fill */
mtr_t* mtr) /*!< in: mtr used to get access to the
non-clustered record; the same mtr is used to
access the clustered index */
@@ -3056,38 +3374,101 @@ row_sel_get_clust_rec_for_mysql(
btr_pcur_open_with_no_init(clust_index, prebuilt->clust_ref,
PAGE_CUR_LE, BTR_SEARCH_LEAF,
- &prebuilt->clust_pcur, 0, mtr);
+ prebuilt->clust_pcur, 0, mtr);
- clust_rec = btr_pcur_get_rec(&prebuilt->clust_pcur);
+ clust_rec = btr_pcur_get_rec(prebuilt->clust_pcur);
- prebuilt->clust_pcur.trx_if_known = trx;
+ prebuilt->clust_pcur->trx_if_known = trx;
/* Note: only if the search ends up on a non-infimum record is the
low_match value the real match to the search tuple */
if (!page_rec_is_user_rec(clust_rec)
- || btr_pcur_get_low_match(&prebuilt->clust_pcur)
+ || btr_pcur_get_low_match(prebuilt->clust_pcur)
< dict_index_get_n_unique(clust_index)) {
+ btr_cur_t* btr_cur = btr_pcur_get_btr_cur(prebuilt->pcur);
+
+ /* If this is a spatial index scan, and we are reading
+ from a shadow buffer, the record could be already
+ deleted (due to rollback etc.). So get the original
+ page and verify that */
+ if (dict_index_is_spatial(sec_index)
+ && btr_cur->rtr_info->matches
+ && (page_align(rec)
+ == btr_cur->rtr_info->matches->block.frame
+ || rec != btr_pcur_get_rec(prebuilt->pcur))) {
+#ifdef UNIV_DEBUG
+ rtr_info_t* rtr_info = btr_cur->rtr_info;
+ mutex_enter(&rtr_info->matches->rtr_match_mutex);
+ /* The page could be deallocated (by rollback etc.) */
+ if (!rtr_info->matches->valid) {
+ mutex_exit(&rtr_info->matches->rtr_match_mutex);
+ clust_rec = NULL;
+
+ err = DB_SUCCESS;
+ goto func_exit;
+ }
+ mutex_exit(&rtr_info->matches->rtr_match_mutex);
- /* In a rare case it is possible that no clust rec is found
- for a delete-marked secondary index record: if in row0umod.cc
- in row_undo_mod_remove_clust_low() we have already removed
- the clust rec, while purge is still cleaning and removing
- secondary index records associated with earlier versions of
- the clustered index record. In that case we know that the
- clustered index record did not exist in the read view of
- trx. */
+ if (rec_get_deleted_flag(rec,
+ dict_table_is_comp(sec_index->table))
+ && prebuilt->select_lock_type == LOCK_NONE) {
- if (!rec_get_deleted_flag(rec,
+ clust_rec = NULL;
+
+ err = DB_SUCCESS;
+ goto func_exit;
+ }
+
+ if (rec != btr_pcur_get_rec(prebuilt->pcur)) {
+ clust_rec = NULL;
+
+ err = DB_SUCCESS;
+ goto func_exit;
+ }
+
+ /* FIXME: Why is this block not the
+ same as btr_pcur_get_block(prebuilt->pcur),
+ and is it not unsafe to use RW_NO_LATCH here? */
+ buf_block_t* block = buf_page_get_gen(
+ btr_pcur_get_block(prebuilt->pcur)->page.id,
+ dict_table_page_size(sec_index->table),
+ RW_NO_LATCH, NULL, BUF_GET,
+ __FILE__, __LINE__, mtr, &err);
+ mem_heap_t* heap = mem_heap_create(256);
+ dtuple_t* tuple = dict_index_build_data_tuple(
+ rec, sec_index, true,
+ sec_index->n_fields, heap);
+ page_cur_t page_cursor;
+
+ ulint low_match = page_cur_search(
+ block, sec_index, tuple,
+ PAGE_CUR_LE, &page_cursor);
+
+ ut_ad(low_match < dtuple_get_n_fields_cmp(tuple));
+ mem_heap_free(heap);
+ clust_rec = NULL;
+
+ err = DB_SUCCESS;
+ goto func_exit;
+#endif /* UNIV_DEBUG */
+ } else if (!rec_get_deleted_flag(rec,
dict_table_is_comp(sec_index->table))
|| prebuilt->select_lock_type != LOCK_NONE) {
- ut_print_timestamp(stderr);
- fputs(" InnoDB: error clustered record"
- " for sec rec not found\n"
- "InnoDB: ", stderr);
- dict_index_name_print(stderr, trx, sec_index);
- fputs("\n"
- "InnoDB: sec index record ", stderr);
+ /* In a rare case it is possible that no clust
+ rec is found for a delete-marked secondary index
+ record: if in row0umod.cc in
+ row_undo_mod_remove_clust_low() we have already removed
+ the clust rec, while purge is still cleaning and
+ removing secondary index records associated with
+ earlier versions of the clustered index record.
+ In that case we know that the clustered index
+ record did not exist in the read view of trx. */
+ ib::error() << "Clustered record for sec rec not found"
+ " index " << sec_index->name
+ << " of table " << sec_index->table->name;
+
+ fputs("InnoDB: sec index record ", stderr);
rec_print(stderr, rec, sec_index);
fputs("\n"
"InnoDB: clust index record ", stderr);
@@ -3106,7 +3487,7 @@ row_sel_get_clust_rec_for_mysql(
goto func_exit;
}
- *offsets = rec_get_offsets(clust_rec, clust_index, *offsets,
+ *offsets = rec_get_offsets(clust_rec, clust_index, *offsets, true,
ULINT_UNDEFINED, offset_heap);
if (prebuilt->select_lock_type != LOCK_NONE) {
@@ -3115,9 +3496,9 @@ row_sel_get_clust_rec_for_mysql(
we set a LOCK_REC_NOT_GAP type lock */
err = lock_clust_rec_read_check_and_lock(
- 0, btr_pcur_get_block(&prebuilt->clust_pcur),
+ 0, btr_pcur_get_block(prebuilt->clust_pcur),
clust_rec, clust_index, *offsets,
- static_cast<enum lock_mode>(prebuilt->select_lock_type),
+ static_cast<lock_mode>(prebuilt->select_lock_type),
LOCK_REC_NOT_GAP,
thr);
@@ -3140,17 +3521,38 @@ row_sel_get_clust_rec_for_mysql(
if (trx->isolation_level > TRX_ISO_READ_UNCOMMITTED
&& !lock_clust_rec_cons_read_sees(
clust_rec, clust_index, *offsets,
- trx->read_view)) {
+ trx_get_read_view(trx))) {
- /* The following call returns 'offsets' associated with
- 'old_vers' */
- err = row_sel_build_prev_vers_for_mysql(
- trx->read_view, clust_index, prebuilt,
- clust_rec, offsets, offset_heap, &old_vers,
- mtr);
+ if (clust_rec != cached_clust_rec) {
+ /* The following call returns 'offsets' associated with
+ 'old_vers' */
+ err = row_sel_build_prev_vers_for_mysql(
+ trx->read_view, clust_index, prebuilt,
+ clust_rec, offsets, offset_heap, &old_vers,
+ vrow, mtr);
- if (err != DB_SUCCESS || old_vers == NULL) {
+ if (err != DB_SUCCESS) {
+
+ goto err_exit;
+ }
+ cached_clust_rec = clust_rec;
+ cached_old_vers = old_vers;
+ } else {
+ err = DB_SUCCESS;
+ old_vers = cached_old_vers;
+
+ /* The offsets need not be same for the latest
+ version of clust_rec and its old version
+ old_vers. Re-calculate the offsets for old_vers. */
+
+ if (old_vers != NULL) {
+ *offsets = rec_get_offsets(
+ old_vers, clust_index, *offsets,
+ true, ULINT_UNDEFINED, offset_heap);
+ }
+ }
+ if (old_vers == NULL) {
goto err_exit;
}
@@ -3170,20 +3572,17 @@ row_sel_get_clust_rec_for_mysql(
visit through secondary index records that would not really
exist in our snapshot. */
+ /* And for spatial index, since the rec is from shadow buffer,
+ so we need to check if it's exactly match the clust_rec. */
if (clust_rec
&& (old_vers
|| trx->isolation_level <= TRX_ISO_READ_UNCOMMITTED
+ || dict_index_is_spatial(sec_index)
|| rec_get_deleted_flag(rec, dict_table_is_comp(
sec_index->table)))
&& !row_sel_sec_rec_is_for_clust_rec(
- rec, sec_index, clust_rec, clust_index)) {
+ rec, sec_index, clust_rec, clust_index, thr)) {
clust_rec = NULL;
-#ifdef UNIV_SEARCH_DEBUG
- } else {
- ut_a(clust_rec == NULL
- || row_sel_sec_rec_is_for_clust_rec(
- rec, sec_index, clust_rec, clust_index));
-#endif
}
err = DB_SUCCESS;
@@ -3192,14 +3591,11 @@ row_sel_get_clust_rec_for_mysql(
func_exit:
*out_rec = clust_rec;
- /* Store the current position if select_lock_type is not
- LOCK_NONE or if we are scanning using InnoDB APIs */
- if (prebuilt->select_lock_type != LOCK_NONE
- || prebuilt->innodb_api) {
+ if (prebuilt->select_lock_type != LOCK_NONE) {
/* We may use the cursor in update or in unlock_row():
store its position */
- btr_pcur_store_position(&prebuilt->clust_pcur, mtr);
+ btr_pcur_store_position(prebuilt->clust_pcur, mtr);
}
err_exit:
@@ -3245,7 +3641,7 @@ sel_restore_position_for_mysql(
ut_ad((pcur->rel_pos == BTR_PCUR_ON)
== btr_pcur_is_on_user_rec(pcur));
}
-#endif
+#endif /* UNIV_DEBUG */
/* The position may need be adjusted for rel_pos and moves_up. */
@@ -3322,7 +3718,7 @@ row_sel_copy_cached_field_for_mysql(
UNIV_MEM_ASSERT_W(buf, templ->mysql_col_len);
if (templ->mysql_type == DATA_MYSQL_TRUE_VARCHAR
- && templ->type != DATA_INT) {
+ && (templ->type != DATA_INT)) {
/* Check for != DATA_INT to make sure we do
not treat MySQL ENUM or SET as a true VARCHAR!
Find the actual length of the true VARCHAR field. */
@@ -3340,31 +3736,36 @@ row_sel_copy_cached_field_for_mysql(
/** Copy used fields from cached row.
Copy cache record field by field, don't touch fields that
are not covered by current key.
-@param[out] buf Where to copy the MySQL row.
-@param[in] cached_rec What to copy (in MySQL row format).
-@param[in] prebuilt prebuilt struct. */
+@param[out] buf Where to copy the MySQL row.
+@param[in] cached_rec What to copy (in MySQL row format).
+@param[in] prebuilt prebuilt struct. */
void
row_sel_copy_cached_fields_for_mysql(
- byte* buf,
- const byte* cached_rec,
- row_prebuilt_t* prebuilt)
+ byte* buf,
+ const byte* cached_rec,
+ row_prebuilt_t* prebuilt)
{
- const mysql_row_templ_t*templ;
- ulint i;
- for (i = 0; i < prebuilt->n_template; i++) {
- templ = prebuilt->mysql_template + i;
-
- row_sel_copy_cached_field_for_mysql(
- buf, cached_rec, templ);
- /* Copy NULL bit of the current field from cached_rec
- to buf */
- if (templ->mysql_null_bit_mask) {
- buf[templ->mysql_null_byte_offset]
- ^= (buf[templ->mysql_null_byte_offset]
- ^ cached_rec[templ->mysql_null_byte_offset])
- & (byte) templ->mysql_null_bit_mask;
- }
- }
+ const mysql_row_templ_t*templ;
+ ulint i;
+ for (i = 0; i < prebuilt->n_template; i++) {
+ templ = prebuilt->mysql_template + i;
+
+ /* Skip virtual columns */
+ if (templ->is_virtual) {
+ continue;
+ }
+
+ row_sel_copy_cached_field_for_mysql(
+ buf, cached_rec, templ);
+ /* Copy NULL bit of the current field from cached_rec
+ to buf */
+ if (templ->mysql_null_bit_mask) {
+ buf[templ->mysql_null_byte_offset]
+ ^= (buf[templ->mysql_null_byte_offset]
+ ^ cached_rec[templ->mysql_null_byte_offset])
+ & (byte) templ->mysql_null_bit_mask;
+ }
+ }
}
/********************************************************************//**
@@ -3388,22 +3789,7 @@ row_sel_dequeue_cached_row_for_mysql(
cached_rec = prebuilt->fetch_cache[prebuilt->fetch_cache_first];
if (UNIV_UNLIKELY(prebuilt->keep_other_fields_on_keyread)) {
- /* Copy cache record field by field, don't touch fields that
- are not covered by current key */
-
- for (i = 0; i < prebuilt->n_template; i++) {
- templ = prebuilt->mysql_template + i;
- row_sel_copy_cached_field_for_mysql(
- buf, cached_rec, templ);
- /* Copy NULL bit of the current field from cached_rec
- to buf */
- if (templ->mysql_null_bit_mask) {
- buf[templ->mysql_null_byte_offset]
- ^= (buf[templ->mysql_null_byte_offset]
- ^ cached_rec[templ->mysql_null_byte_offset])
- & (byte) templ->mysql_null_bit_mask;
- }
- }
+ row_sel_copy_cached_fields_for_mysql(buf, cached_rec, prebuilt);
} else if (prebuilt->mysql_prefix_len > 63) {
/* The record is long. Copy it field by field, in case
there are some long VARCHAR column of which only a
@@ -3415,8 +3801,17 @@ row_sel_dequeue_cached_row_for_mysql(
/* Then copy the requested fields. */
for (i = 0; i < prebuilt->n_template; i++) {
+ templ = prebuilt->mysql_template + i;
+
+ /* Skip virtual columns */
+ if (templ->is_virtual
+ && !(dict_index_has_virtual(prebuilt->index)
+ && prebuilt->read_just_key)) {
+ continue;
+ }
+
row_sel_copy_cached_field_for_mysql(
- buf, cached_rec, prebuilt->mysql_template + i);
+ buf, cached_rec, templ);
}
} else {
ut_memcpy(buf, cached_rec, prebuilt->mysql_prefix_len);
@@ -3444,7 +3839,7 @@ row_sel_prefetch_cache_init(
/* Reserve space for the magic number. */
sz = UT_ARR_SIZE(prebuilt->fetch_cache) * (prebuilt->mysql_row_len + 8);
- ptr = static_cast<byte*>(mem_alloc(sz));
+ ptr = static_cast<byte*>(ut_malloc_nokey(sz));
for (i = 0; i < UT_ARR_SIZE(prebuilt->fetch_cache); i++) {
@@ -3510,44 +3905,34 @@ row_sel_enqueue_cache_row_for_mysql(
++prebuilt->n_fetch_cached;
}
+#ifdef BTR_CUR_HASH_ADAPT
/*********************************************************************//**
Tries to do a shortcut to fetch a clustered index record with a unique key,
using the hash index if possible (not always). We assume that the search
mode is PAGE_CUR_GE, it is a consistent read, there is a read view in trx,
btr search latch has been locked in S-mode if AHI is enabled.
-@return SEL_FOUND, SEL_EXHAUSTED, SEL_RETRY */
+@return SEL_FOUND, SEL_EXHAUSTED, SEL_RETRY */
static
ulint
row_sel_try_search_shortcut_for_mysql(
/*==================================*/
const rec_t** out_rec,/*!< out: record if found */
row_prebuilt_t* prebuilt,/*!< in: prebuilt struct */
- ulint** offsets,/*!< in/out: for rec_get_offsets(*out_rec) */
+ offset_t** offsets,/*!< in/out: for rec_get_offsets(*out_rec) */
mem_heap_t** heap, /*!< in/out: heap for rec_get_offsets() */
mtr_t* mtr) /*!< in: started mtr */
{
dict_index_t* index = prebuilt->index;
const dtuple_t* search_tuple = prebuilt->search_tuple;
- btr_pcur_t* pcur = &prebuilt->pcur;
+ btr_pcur_t* pcur = prebuilt->pcur;
trx_t* trx = prebuilt->trx;
const rec_t* rec;
ut_ad(dict_index_is_clust(index));
ut_ad(!prebuilt->templ_contains_blob);
-#ifndef UNIV_SEARCH_DEBUG
btr_pcur_open_with_no_init(index, search_tuple, PAGE_CUR_GE,
- BTR_SEARCH_LEAF, pcur,
- (trx->has_search_latch)
- ? RW_S_LATCH
- : 0,
- mtr);
-#else /* UNIV_SEARCH_DEBUG */
- btr_pcur_open_with_no_init(index, search_tuple, PAGE_CUR_GE,
- BTR_SEARCH_LEAF, pcur,
- 0,
- mtr);
-#endif /* UNIV_SEARCH_DEBUG */
+ BTR_SEARCH_LEAF, pcur, RW_S_LATCH, mtr);
rec = btr_pcur_get_rec(pcur);
if (!page_rec_is_user_rec(rec)) {
@@ -3567,16 +3952,19 @@ row_sel_try_search_shortcut_for_mysql(
/* This is a non-locking consistent read: if necessary, fetch
a previous version of the record */
- *offsets = rec_get_offsets(rec, index, *offsets,
+ *offsets = rec_get_offsets(rec, index, *offsets, true,
ULINT_UNDEFINED, heap);
- if (!lock_clust_rec_cons_read_sees(rec, index,
- *offsets, trx->read_view)) {
+ if (!lock_clust_rec_cons_read_sees(
+ rec, index, *offsets, trx_get_read_view(trx))) {
return(SEL_RETRY);
}
if (rec_get_deleted_flag(rec, dict_table_is_comp(index->table))) {
+ /* In delete-marked records, DB_TRX_ID must
+ always refer to an existing undo log record. */
+ ut_ad(row_get_rec_trx_id(rec, index, *offsets));
return(SEL_EXHAUSTED);
}
@@ -3585,12 +3973,13 @@ row_sel_try_search_shortcut_for_mysql(
return(SEL_FOUND);
}
+#endif /* BTR_CUR_HASH_ADAPT */
/*********************************************************************//**
Check a pushed-down index condition.
@return ICP_NO_MATCH, ICP_MATCH, or ICP_OUT_OF_RANGE */
static
-enum icp_result
+ICP_RESULT
row_search_idx_cond_check(
/*======================*/
byte* mysql_rec, /*!< out: record
@@ -3600,9 +3989,9 @@ row_search_idx_cond_check(
row_prebuilt_t* prebuilt, /*!< in/out: prebuilt struct
for the table handle */
const rec_t* rec, /*!< in: InnoDB record */
- const ulint* offsets) /*!< in: rec_get_offsets() */
+ const offset_t* offsets) /*!< in: rec_get_offsets() */
{
- enum icp_result result;
+ ICP_RESULT result;
ulint i;
ut_ad(rec_offs_validate(rec, prebuilt->index, offsets));
@@ -3623,6 +4012,11 @@ row_search_idx_cond_check(
for (i = 0; i < prebuilt->idx_cond_n_cols; i++) {
const mysql_row_templ_t*templ = &prebuilt->mysql_template[i];
+ /* Skip virtual columns */
+ if (templ->is_virtual) {
+ continue;
+ }
+
if (!row_sel_store_mysql_field(mysql_rec, prebuilt,
rec, prebuilt->index, offsets,
templ->icp_rec_field_no,
@@ -3646,7 +4040,7 @@ row_search_idx_cond_check(
if (!prebuilt->need_to_access_clustered
|| dict_index_is_clust(prebuilt->index)) {
if (!row_sel_store_mysql_rec(
- mysql_rec, prebuilt, rec, FALSE,
+ mysql_rec, prebuilt, rec, NULL, false,
prebuilt->index, offsets)) {
ut_ad(dict_index_is_clust(prebuilt->index));
return(ICP_NO_MATCH);
@@ -3669,6 +4063,60 @@ row_search_idx_cond_check(
return(result);
}
+/** Extract virtual column data from a virtual index record and fill a dtuple
+@param[in] rec the virtual (secondary) index record
+@param[in] index the virtual index
+@param[in,out] vrow the dtuple where data extract to
+@param[in] heap memory heap to allocate memory
+*/
+static
+void
+row_sel_fill_vrow(
+ const rec_t* rec,
+ dict_index_t* index,
+ dtuple_t** vrow,
+ mem_heap_t* heap)
+{
+ offset_t offsets_[REC_OFFS_NORMAL_SIZE];
+ offset_t* offsets = offsets_;
+ rec_offs_init(offsets_);
+
+ ut_ad(!(*vrow));
+ ut_ad(page_rec_is_leaf(rec));
+
+ offsets = rec_get_offsets(rec, index, offsets, true,
+ ULINT_UNDEFINED, &heap);
+
+ *vrow = dtuple_create_with_vcol(
+ heap, 0, dict_table_get_n_v_cols(index->table));
+
+ /* Initialize all virtual row's mtype to DATA_MISSING */
+ dtuple_init_v_fld(*vrow);
+
+ for (ulint i = 0; i < dict_index_get_n_fields(index); i++) {
+ const dict_field_t* field;
+ const dict_col_t* col;
+
+ field = dict_index_get_nth_field(index, i);
+ col = dict_field_get_col(field);
+
+ if (dict_col_is_virtual(col)) {
+ const byte* data;
+ ulint len;
+
+ data = rec_get_nth_field(rec, offsets, i, &len);
+
+ const dict_v_col_t* vcol = reinterpret_cast<
+ const dict_v_col_t*>(col);
+
+ dfield_t* dfield = dtuple_get_nth_v_field(
+ *vrow, vcol->v_pos);
+ dfield_set_data(dfield, data, len);
+ dict_col_copy_type(col, dfield_get_type(dfield));
+ }
+ }
+}
+
/** Return the record field length in characters.
@param[in] col table column of the field
@param[in] field_no field number
@@ -3681,7 +4129,7 @@ rec_field_len_in_chars(
const dict_col_t* col,
const ulint field_no,
const rec_t* rec,
- const ulint* offsets)
+ const offset_t* offsets)
{
const ulint cset = dtype_get_charset_coll(col->prtype);
const CHARSET_INFO* cs = all_charsets[cset];
@@ -3691,7 +4139,7 @@ rec_field_len_in_chars(
rec, offsets, field_no, &rec_field_len));
if (UNIV_UNLIKELY(!cs)) {
- ib_logf(IB_LOG_LEVEL_WARN, "Missing collation " ULINTPF, cset);
+ ib::warn() << "Missing collation " << cset;
return SIZE_T_MAX;
}
@@ -3708,7 +4156,7 @@ static
bool row_search_with_covering_prefix(
row_prebuilt_t* prebuilt,
const rec_t* rec,
- const ulint* offsets)
+ const offset_t* offsets)
{
const dict_index_t* index = prebuilt->index;
ut_ad(!dict_index_is_clust(index));
@@ -3781,46 +4229,47 @@ bool row_search_with_covering_prefix(
return true;
}
-/********************************************************************//**
-Searches for rows in the database. This is used in the interface to
-MySQL. This function opens a cursor, and also implements fetch next
-and fetch prev. NOTE that if we do a search with a full key value
-from a unique index (ROW_SEL_EXACT), then we will not store the cursor
-position and fetch next or fetch prev must not be tried to the cursor!
-@return DB_SUCCESS, DB_RECORD_NOT_FOUND, DB_END_OF_INDEX, DB_DEADLOCK,
-DB_LOCK_TABLE_FULL, DB_CORRUPTION, or DB_TOO_BIG_RECORD */
-UNIV_INTERN
+/** Searches for rows in the database using cursor.
+Function is mainly used for tables that are shared across connections and
+so it employs technique that can help re-construct the rows that
+transaction is suppose to see.
+It also has optimization such as pre-caching the rows, using AHI, etc.
+
+@param[out] buf buffer for the fetched row in MySQL format
+@param[in] mode search mode PAGE_CUR_L
+@param[in,out] prebuilt prebuilt struct for the table handler;
+ this contains the info to search_tuple,
+ index; if search tuple contains 0 field then
+ we position the cursor at start or the end of
+ index, depending on 'mode'
+@param[in] match_mode 0 or ROW_SEL_EXACT or ROW_SEL_EXACT_PREFIX
+@param[in] direction 0 or ROW_SEL_NEXT or ROW_SEL_PREV;
+ Note: if this is != 0, then prebuilt must has a
+ pcur with stored position! In opening of a
+ cursor 'direction' should be 0.
+@return DB_SUCCESS or error code */
dberr_t
-row_search_for_mysql(
-/*=================*/
- byte* buf, /*!< in/out: buffer for the fetched
- row in the MySQL format */
- ulint mode, /*!< in: search mode PAGE_CUR_L, ... */
- row_prebuilt_t* prebuilt, /*!< in: prebuilt struct for the
- table handle; this contains the info
- of search_tuple, index; if search
- tuple contains 0 fields then we
- position the cursor at the start or
- the end of the index, depending on
- 'mode' */
- ulint match_mode, /*!< in: 0 or ROW_SEL_EXACT or
- ROW_SEL_EXACT_PREFIX */
- ulint direction) /*!< in: 0 or ROW_SEL_NEXT or
- ROW_SEL_PREV; NOTE: if this is != 0,
- then prebuilt must have a pcur
- with stored position! In opening of a
- cursor 'direction' should be 0. */
+row_search_mvcc(
+ byte* buf,
+ page_cur_mode_t mode,
+ row_prebuilt_t* prebuilt,
+ ulint match_mode,
+ ulint direction)
{
+ DBUG_ENTER("row_search_mvcc");
+
dict_index_t* index = prebuilt->index;
ibool comp = dict_table_is_comp(index->table);
const dtuple_t* search_tuple = prebuilt->search_tuple;
- btr_pcur_t* pcur = &prebuilt->pcur;
+ btr_pcur_t* pcur = prebuilt->pcur;
trx_t* trx = prebuilt->trx;
dict_index_t* clust_index;
que_thr_t* thr;
- const rec_t* rec = NULL;
+ const rec_t* rec;
+ dtuple_t* vrow = NULL;
const rec_t* result_rec = NULL;
const rec_t* clust_rec;
+ Row_sel_get_clust_rec_for_mysql row_sel_get_clust_rec_for_mysql;
dberr_t err = DB_SUCCESS;
ibool unique_search = FALSE;
ibool mtr_has_extra_clust_latch = FALSE;
@@ -3832,110 +4281,50 @@ row_search_for_mysql(
/* if the returned record was locked and we did a semi-consistent
read (fetch the newest committed version), then this is set to
TRUE */
-#ifdef UNIV_SEARCH_DEBUG
- ulint cnt = 0;
-#endif /* UNIV_SEARCH_DEBUG */
ulint next_offs;
ibool same_user_rec;
mtr_t mtr;
mem_heap_t* heap = NULL;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- ulint* offsets = offsets_;
+ offset_t offsets_[REC_OFFS_NORMAL_SIZE];
+ offset_t* offsets = offsets_;
ibool table_lock_waited = FALSE;
byte* next_buf = 0;
+ bool spatial_search = false;
rec_offs_init(offsets_);
ut_ad(index && pcur && search_tuple);
+ ut_a(prebuilt->magic_n == ROW_PREBUILT_ALLOCATED);
+ ut_a(prebuilt->magic_n2 == ROW_PREBUILT_ALLOCATED);
/* We don't support FTS queries from the HANDLER interfaces, because
we implemented FTS as reversed inverted index with auxiliary tables.
So anything related to traditional index query would not apply to
it. */
- if (index->type & DICT_FTS) {
- return(DB_END_OF_INDEX);
+ if (prebuilt->index->type & DICT_FTS) {
+ DBUG_RETURN(DB_END_OF_INDEX);
}
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(!sync_thread_levels_nonempty_trx(trx->has_search_latch));
-#endif /* UNIV_SYNC_DEBUG */
+ ut_ad(!sync_check_iterate(sync_check()));
if (dict_table_is_discarded(prebuilt->table)) {
-
- return(DB_TABLESPACE_DELETED);
-
+ DBUG_RETURN(DB_TABLESPACE_DELETED);
} else if (!prebuilt->table->is_readable()) {
- if (fil_space_get(prebuilt->table->space) == NULL) {
- return(DB_TABLESPACE_NOT_FOUND);
- } else {
- return(DB_DECRYPTION_FAILED);
- }
+ DBUG_RETURN(fil_space_get(prebuilt->table->space)
+ ? DB_DECRYPTION_FAILED
+ : DB_TABLESPACE_NOT_FOUND);
} else if (!prebuilt->index_usable) {
-
- return(DB_MISSING_HISTORY);
-
- } else if (dict_index_is_corrupted(index)) {
-
- return(DB_CORRUPTION);
-
- } else if (prebuilt->magic_n != ROW_PREBUILT_ALLOCATED) {
- fprintf(stderr,
- "InnoDB: Error: trying to free a corrupt\n"
- "InnoDB: table handle. Magic n %lu, table name ",
- (ulong) prebuilt->magic_n);
- ut_print_name(stderr, trx, TRUE, prebuilt->table->name);
- putc('\n', stderr);
-
- mem_analyze_corruption(prebuilt);
-
- ut_error;
- }
-
-#if 0
- /* August 19, 2005 by Heikki: temporarily disable this error
- print until the cursor lock count is done correctly.
- See bugs #12263 and #12456!*/
-
- if (trx->n_mysql_tables_in_use == 0
- && UNIV_UNLIKELY(prebuilt->select_lock_type == LOCK_NONE)) {
- /* Note that if MySQL uses an InnoDB temp table that it
- created inside LOCK TABLES, then n_mysql_tables_in_use can
- be zero; in that case select_lock_type is set to LOCK_X in
- ::start_stmt. */
-
- fputs("InnoDB: Error: MySQL is trying to perform a SELECT\n"
- "InnoDB: but it has not locked"
- " any tables in ::external_lock()!\n",
- stderr);
- trx_print(stderr, trx, 600);
- fputc('\n', stderr);
+ DBUG_RETURN(DB_MISSING_HISTORY);
+ } else if (prebuilt->index->is_corrupted()) {
+ DBUG_RETURN(DB_CORRUPTION);
}
-#endif
-
-#if 0
- fprintf(stderr, "Match mode %lu\n search tuple ",
- (ulong) match_mode);
- dtuple_print(search_tuple);
- fprintf(stderr, "N tables locked %lu\n",
- (ulong) trx->mysql_n_tables_locked);
-#endif
- /*-------------------------------------------------------------*/
- /* PHASE 0: Release a possible s-latch we are holding on the
- adaptive hash index latch if there is someone waiting behind */
-
- if (UNIV_UNLIKELY(rw_lock_get_writer(&btr_search_latch) != RW_LOCK_NOT_LOCKED)
- && trx->has_search_latch) {
-
- /* There is an x-latch request on the adaptive hash index:
- release the s-latch to reduce starvation and wait for
- BTR_SEA_TIMEOUT rounds before trying to keep it again over
- calls from MySQL */
-
- rw_lock_s_unlock(&btr_search_latch);
- trx->has_search_latch = FALSE;
- trx->search_latch_timeout = BTR_SEA_TIMEOUT;
- }
+ /* We need to get the virtual column values stored in secondary
+ index key, if this is covered index scan or virtual key read is
+ requested. */
+ bool need_vrow = dict_index_has_virtual(prebuilt->index)
+ && (prebuilt->read_just_key
+ || prebuilt->m_read_virtual_key);
/* Reset the new record lock info if srv_locks_unsafe_for_binlog
is set or session is using a READ COMMITED isolation level. Then
@@ -4044,8 +4433,17 @@ row_search_for_mysql(
}
}
- mtr_start(&mtr);
+ /* We don't support sequencial scan for Rtree index, because it
+ is no meaning to do so. */
+ if (dict_index_is_spatial(index)
+ && !RTREE_SEARCH_MODE(mode)) {
+ err = DB_END_OF_INDEX;
+ goto func_exit;
+ }
+
+ mtr.start();
+#ifdef BTR_CUR_HASH_ADAPT
/*-------------------------------------------------------------*/
/* PHASE 2: Try fast adaptive hash index search if possible */
@@ -4057,18 +4455,18 @@ row_search_for_mysql(
if (UNIV_UNLIKELY(direction == 0)
&& unique_search
+ && btr_search_enabled
&& dict_index_is_clust(index)
&& !prebuilt->templ_contains_blob
&& !prebuilt->used_in_HANDLER
- && (prebuilt->mysql_row_len < UNIV_PAGE_SIZE / 8)
- && !prebuilt->innodb_api) {
+ && (prebuilt->mysql_row_len < UNIV_PAGE_SIZE / 8)) {
mode = PAGE_CUR_GE;
if (trx->mysql_n_tables_locked == 0
&& prebuilt->select_lock_type == LOCK_NONE
&& trx->isolation_level > TRX_ISO_READ_UNCOMMITTED
- && trx->read_view) {
+ && MVCC::is_view_active(trx->read_view)) {
/* This is a SELECT query done as a consistent read,
and the read view has already been allocated:
@@ -4082,25 +4480,17 @@ row_search_for_mysql(
and if we try that, we can deadlock on the adaptive
hash index semaphore! */
-#ifndef UNIV_SEARCH_DEBUG
- if (!trx->has_search_latch) {
- rw_lock_s_lock(&btr_search_latch);
- trx->has_search_latch = TRUE;
- }
-#endif
+ rw_lock_s_lock(btr_get_search_latch(index));
+
switch (row_sel_try_search_shortcut_for_mysql(
&rec, prebuilt, &offsets, &heap,
&mtr)) {
case SEL_FOUND:
-#ifdef UNIV_SEARCH_DEBUG
- ut_a(0 == cmp_dtuple_rec(search_tuple,
- rec, offsets));
-#endif
/* At this point, rec is protected by
a page latch that was acquired by
row_sel_try_search_shortcut_for_mysql().
The latch will not be released until
- mtr_commit(&mtr). */
+ mtr.commit(). */
ut_ad(!rec_get_deleted_flag(rec, comp));
if (prebuilt->idx_cond) {
@@ -4119,7 +4509,7 @@ row_search_for_mysql(
if (!row_sel_store_mysql_rec(
buf, prebuilt,
- rec, FALSE, index,
+ rec, NULL, false, index,
offsets)) {
/* Only fresh inserts may contain
incomplete externally stored
@@ -4137,34 +4527,28 @@ row_search_for_mysql(
}
shortcut_match:
- mtr_commit(&mtr);
+ mtr.commit();
- /* ut_print_name(stderr, index->name);
- fputs(" shortcut\n", stderr); */
+ /* NOTE that we do NOT store the cursor
+ position */
err = DB_SUCCESS;
- goto release_search_latch_if_needed;
+
+ rw_lock_s_unlock(btr_get_search_latch(index));
+
+ goto func_exit;
case SEL_EXHAUSTED:
shortcut_mismatch:
- mtr_commit(&mtr);
-
- /* ut_print_name(stderr, index->name);
- fputs(" record not found 2\n", stderr); */
+ mtr.commit();
err = DB_RECORD_NOT_FOUND;
-release_search_latch_if_needed:
- if (trx->search_latch_timeout > 0
- && trx->has_search_latch) {
- trx->search_latch_timeout--;
-
- rw_lock_s_unlock(&btr_search_latch);
- trx->has_search_latch = FALSE;
- }
+ rw_lock_s_unlock(btr_get_search_latch(index));
/* NOTE that we do NOT store the cursor
position */
+
goto func_exit;
case SEL_RETRY:
@@ -4174,18 +4558,19 @@ release_search_latch_if_needed:
ut_ad(0);
}
- mtr_commit(&mtr);
- mtr_start(&mtr);
+ mtr.commit();
+ mtr.start();
+
+ rw_lock_s_unlock(btr_get_search_latch(index));
}
}
+#endif /* BTR_CUR_HASH_ADAPT */
/*-------------------------------------------------------------*/
/* PHASE 3: Open or restore index cursor position */
- if (trx->has_search_latch) {
- rw_lock_s_unlock(&btr_search_latch);
- trx->has_search_latch = FALSE;
- }
+ spatial_search = dict_index_is_spatial(index)
+ && mode >= PAGE_CUR_CONTAIN;
/* The state of a running trx can only be changed by the
thread that is currently serving the transaction. Because we
@@ -4193,14 +4578,14 @@ release_search_latch_if_needed:
mutex. */
ut_ad(prebuilt->sql_stat_start || trx->state == TRX_STATE_ACTIVE);
- ut_ad(trx->state == TRX_STATE_NOT_STARTED
- || trx->state == TRX_STATE_ACTIVE);
+ ut_ad(!trx_is_started(trx) || trx->state == TRX_STATE_ACTIVE);
ut_ad(prebuilt->sql_stat_start
|| prebuilt->select_lock_type != LOCK_NONE
- || trx->read_view);
+ || MVCC::is_view_active(trx->read_view)
+ || srv_read_only_mode);
- trx_start_if_not_started(trx);
+ trx_start_if_not_started(trx, false);
if (trx->isolation_level <= TRX_ISO_READ_COMMITTED
&& prebuilt->select_lock_type != LOCK_NONE
@@ -4217,7 +4602,8 @@ release_search_latch_if_needed:
otherwise downward */
if (UNIV_UNLIKELY(direction == 0)) {
- if (mode == PAGE_CUR_GE || mode == PAGE_CUR_G) {
+ if (mode == PAGE_CUR_GE || mode == PAGE_CUR_G
+ || mode >= PAGE_CUR_CONTAIN) {
moves_up = TRUE;
}
} else if (direction == ROW_SEL_NEXT) {
@@ -4235,14 +4621,13 @@ release_search_latch_if_needed:
if (!prebuilt->sql_stat_start) {
/* No need to set an intention lock or assign a read view */
- if (UNIV_UNLIKELY
- (trx->read_view == NULL
- && prebuilt->select_lock_type == LOCK_NONE)) {
+ if (!MVCC::is_view_active(trx->read_view)
+ && !srv_read_only_mode
+ && prebuilt->select_lock_type == LOCK_NONE) {
- fputs("InnoDB: Error: MySQL is trying to"
- " perform a consistent read\n"
- "InnoDB: but the read view is not assigned!\n",
- stderr);
+ ib::error() << "MySQL is trying to perform a"
+ " consistent read but the read view is not"
+ " assigned!";
trx_print(stderr, trx, 600);
fputc('\n', stderr);
ut_error;
@@ -4251,7 +4636,10 @@ release_search_latch_if_needed:
/* This is a consistent read */
/* Assign a read view for the query */
- trx_assign_read_view(trx);
+ if (!srv_read_only_mode) {
+ trx_assign_read_view(trx);
+ }
+
prebuilt->sql_stat_start = FALSE;
} else {
wait_table_again:
@@ -4270,6 +4658,12 @@ wait_table_again:
/* Open or restore index cursor position */
if (UNIV_LIKELY(direction != 0)) {
+ if (spatial_search) {
+ /* R-Tree access does not need to do
+ cursor position and resposition */
+ goto next_rec;
+ }
+
ibool need_to_process = sel_restore_position_for_mysql(
&same_user_rec, BTR_SEARCH_LEAF,
pcur, moves_up, &mtr);
@@ -4296,6 +4690,34 @@ wait_table_again:
}
} else if (dtuple_get_n_fields(search_tuple) > 0) {
+ pcur->btr_cur.thr = thr;
+
+ if (dict_index_is_spatial(index)) {
+ bool need_pred_lock;
+
+ need_pred_lock = (set_also_gap_locks
+ && !(srv_locks_unsafe_for_binlog
+ || trx->isolation_level
+ <= TRX_ISO_READ_COMMITTED)
+ && prebuilt->select_lock_type
+ != LOCK_NONE);
+
+ if (!prebuilt->rtr_info) {
+ prebuilt->rtr_info = rtr_create_rtr_info(
+ need_pred_lock, true,
+ btr_pcur_get_btr_cur(pcur), index);
+ prebuilt->rtr_info->search_tuple = search_tuple;
+ prebuilt->rtr_info->search_mode = mode;
+ rtr_info_update_btr(btr_pcur_get_btr_cur(pcur),
+ prebuilt->rtr_info);
+ } else {
+ rtr_info_reinit_in_cursor(
+ btr_pcur_get_btr_cur(pcur),
+ index, need_pred_lock);
+ prebuilt->rtr_info->search_tuple = search_tuple;
+ prebuilt->rtr_info->search_mode = mode;
+ }
+ }
err = btr_pcur_open_with_no_init(index, search_tuple, mode,
BTR_SEARCH_LEAF,
@@ -4303,30 +4725,33 @@ wait_table_again:
if (err != DB_SUCCESS) {
rec = NULL;
- goto lock_wait_or_error;
+ goto page_read_error;
}
pcur->trx_if_known = trx;
rec = btr_pcur_get_rec(pcur);
+ ut_ad(page_rec_is_leaf(rec));
if (!moves_up
&& !page_rec_is_supremum(rec)
&& set_also_gap_locks
&& !(srv_locks_unsafe_for_binlog
|| trx->isolation_level <= TRX_ISO_READ_COMMITTED)
- && prebuilt->select_lock_type != LOCK_NONE) {
+ && prebuilt->select_lock_type != LOCK_NONE
+ && !dict_index_is_spatial(index)) {
/* Try to place a gap lock on the next index record
to prevent phantoms in ORDER BY ... DESC queries */
const rec_t* next_rec = page_rec_get_next_const(rec);
offsets = rec_get_offsets(next_rec, index, offsets,
+ true,
ULINT_UNDEFINED, &heap);
- err = sel_set_rec_lock(btr_pcur_get_block(pcur),
+ err = sel_set_rec_lock(pcur,
next_rec, index, offsets,
prebuilt->select_lock_type,
- LOCK_GAP, thr);
+ LOCK_GAP, thr, &mtr);
switch (err) {
case DB_SUCCESS_LOCKED_REC:
@@ -4350,18 +4775,20 @@ wait_table_again:
"Table %s is encrypted but encryption service or"
" used key_id is not available. "
" Can't continue reading table.",
- prebuilt->table->name);
+ prebuilt->table->name.m_name);
index->table->file_unreadable = true;
}
rec = NULL;
- goto lock_wait_or_error;
+ goto page_read_error;
}
}
rec_loop:
DEBUG_SYNC_C("row_search_rec_loop");
if (trx_is_interrupted(trx)) {
- btr_pcur_store_position(pcur, &mtr);
+ if (!spatial_search) {
+ btr_pcur_store_position(pcur, &mtr);
+ }
err = DB_INTERRUPTED;
goto normal_return;
}
@@ -4373,21 +4800,11 @@ rec_loop:
if (!index->table->is_readable()) {
err = DB_DECRYPTION_FAILED;
- goto lock_wait_or_error;
+ goto page_read_error;
}
ut_ad(!!page_rec_is_comp(rec) == comp);
-#ifdef UNIV_SEARCH_DEBUG
- /*
- fputs("Using ", stderr);
- dict_index_name_print(stderr, trx, index);
- fprintf(stderr, " cnt %lu ; Page no %lu\n", cnt,
- page_get_page_no(page_align(rec)));
- rec_print(stderr, rec, index);
- printf("delete-mark: %lu\n",
- rec_get_deleted_flag(rec, page_rec_is_comp(rec)));
- */
-#endif /* UNIV_SEARCH_DEBUG */
+ ut_ad(page_rec_is_leaf(rec));
if (page_rec_is_infimum(rec)) {
@@ -4403,7 +4820,8 @@ rec_loop:
if (set_also_gap_locks
&& !(srv_locks_unsafe_for_binlog
|| trx->isolation_level <= TRX_ISO_READ_COMMITTED)
- && prebuilt->select_lock_type != LOCK_NONE) {
+ && prebuilt->select_lock_type != LOCK_NONE
+ && !dict_index_is_spatial(index)) {
/* Try to place a lock on the index record */
@@ -4412,12 +4830,12 @@ rec_loop:
level we do not lock gaps. Supremum record is really
a gap and therefore we do not set locks there. */
- offsets = rec_get_offsets(rec, index, offsets,
+ offsets = rec_get_offsets(rec, index, offsets, true,
ULINT_UNDEFINED, &heap);
- err = sel_set_rec_lock(btr_pcur_get_block(pcur),
+ err = sel_set_rec_lock(pcur,
rec, index, offsets,
prebuilt->select_lock_type,
- LOCK_ORDINARY, thr);
+ LOCK_ORDINARY, thr, &mtr);
switch (err) {
case DB_SUCCESS_LOCKED_REC:
@@ -4429,6 +4847,7 @@ rec_loop:
goto lock_wait_or_error;
}
}
+
/* A page supremum record cannot be in the result set: skip
it now that we have placed a possible lock on it */
@@ -4457,49 +4876,41 @@ rec_loop:
wrong_offs:
if (srv_force_recovery == 0 || moves_up == FALSE) {
- ut_print_timestamp(stderr);
- buf_page_print(page_align(rec), 0);
- fprintf(stderr,
- "\nInnoDB: rec address %p,"
- " buf block fix count %lu\n",
- (void*) rec, (ulong)
- btr_cur_get_block(btr_pcur_get_btr_cur(pcur))
- ->page.buf_fix_count);
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Index corruption: rec offs " ULINTPF
- " next offs " ULINTPF
- ", page no " ULINTPF " ,"
- "InnoDB: ",
- page_offset(rec),
- next_offs,
- page_get_page_no(page_align(rec)));
-
- dict_index_name_print(stderr, trx, index);
- fputs(". Run CHECK TABLE. You may need to\n"
- "InnoDB: restore from a backup, or"
- " dump + drop + reimport the table.\n",
- stderr);
+ ib::error() << "Rec address "
+ << static_cast<const void*>(rec)
+ << ", buf block fix count "
+ << btr_cur_get_block(
+ btr_pcur_get_btr_cur(pcur))->page
+ .buf_fix_count;
+
+ ib::error() << "Index corruption: rec offs "
+ << page_offset(rec) << " next offs "
+ << next_offs << ", page no "
+ << page_get_page_no(page_align(rec))
+ << ", index " << index->name
+ << " of table " << index->table->name
+ << ". Run CHECK TABLE. You may need to"
+ " restore from a backup, or dump + drop +"
+ " reimport the table.";
ut_ad(0);
err = DB_CORRUPTION;
- goto lock_wait_or_error;
+ goto page_read_error;
} else {
/* The user may be dumping a corrupt table. Jump
over the corruption to recover as much as possible. */
- fprintf(stderr,
- "InnoDB: Index corruption: rec offs %lu"
- " next offs %lu, page no %lu,\n"
- "InnoDB: ",
- (ulong) page_offset(rec),
- (ulong) next_offs,
- (ulong) page_get_page_no(page_align(rec)));
- dict_index_name_print(stderr, trx, index);
- fputs(". We try to skip the rest of the page.\n",
- stderr);
-
- btr_pcur_move_to_last_on_page(pcur, &mtr);
-
+ ib::info() << "Index corruption: rec offs "
+ << page_offset(rec) << " next offs "
+ << next_offs << ", page no "
+ << page_get_page_no(page_align(rec))
+ << ", index " << index->name
+ << " of table " << index->table->name
+ << ". We try to skip the rest of the page.";
+
+ page_cur_set_after_last(btr_pcur_get_block(pcur),
+ btr_pcur_get_page_cur(pcur));
+ pcur->old_stored = false;
goto next_rec;
}
}
@@ -4507,26 +4918,23 @@ wrong_offs:
/* Calculate the 'offsets' associated with 'rec' */
- ut_ad(fil_page_get_type(btr_pcur_get_page(pcur)) == FIL_PAGE_INDEX);
+ ut_ad(fil_page_index_page_check(btr_pcur_get_page(pcur)));
ut_ad(btr_page_get_index_id(btr_pcur_get_page(pcur)) == index->id);
- offsets = rec_get_offsets(rec, index, offsets, ULINT_UNDEFINED, &heap);
+ offsets = rec_get_offsets(rec, index, offsets, true,
+ ULINT_UNDEFINED, &heap);
if (UNIV_UNLIKELY(srv_force_recovery > 0)) {
if (!rec_validate(rec, offsets)
|| !btr_index_rec_validate(rec, index, FALSE)) {
- char buf[MAX_FULL_NAME_LEN];
- ut_format_name(index->table->name, FALSE, buf, sizeof(buf));
-
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Index %s corrupted: rec offs " ULINTPF
- " next offs " ULINTPF
- ", page no " ULINTPF " ."
- " We try to skip the record.",
- buf,
- page_offset(rec),
- next_offs,
- page_get_page_no(page_align(rec)));
+
+ ib::error() << "Index corruption: rec offs "
+ << page_offset(rec) << " next offs "
+ << next_offs << ", page no "
+ << page_get_page_no(page_align(rec))
+ << ", index " << index->name
+ << " of table " << index->table->name
+ << ". We try to skip the record.";
goto next_rec;
}
@@ -4549,7 +4957,8 @@ wrong_offs:
&& !(srv_locks_unsafe_for_binlog
|| trx->isolation_level
<= TRX_ISO_READ_COMMITTED)
- && prebuilt->select_lock_type != LOCK_NONE) {
+ && prebuilt->select_lock_type != LOCK_NONE
+ && !dict_index_is_spatial(index)) {
/* Try to place a gap lock on the index
record only if innodb_locks_unsafe_for_binlog
@@ -4557,10 +4966,10 @@ wrong_offs:
using a READ COMMITTED or lower isolation level. */
err = sel_set_rec_lock(
- btr_pcur_get_block(pcur),
+ pcur,
rec, index, offsets,
prebuilt->select_lock_type, LOCK_GAP,
- thr);
+ thr, &mtr);
switch (err) {
case DB_SUCCESS_LOCKED_REC:
@@ -4582,11 +4991,6 @@ wrong_offs:
pcur->rel_pos = BTR_PCUR_BEFORE;
err = DB_RECORD_NOT_FOUND;
-#if 0
- ut_print_name(stderr, trx, FALSE, index->name);
- fputs(" record not found 3\n", stderr);
-#endif
-
goto normal_return;
}
@@ -4598,7 +5002,8 @@ wrong_offs:
&& !(srv_locks_unsafe_for_binlog
|| trx->isolation_level
<= TRX_ISO_READ_COMMITTED)
- && prebuilt->select_lock_type != LOCK_NONE) {
+ && prebuilt->select_lock_type != LOCK_NONE
+ && !dict_index_is_spatial(index)) {
/* Try to place a gap lock on the index
record only if innodb_locks_unsafe_for_binlog
@@ -4606,10 +5011,10 @@ wrong_offs:
using a READ COMMITTED or lower isolation level. */
err = sel_set_rec_lock(
- btr_pcur_get_block(pcur),
+ pcur,
rec, index, offsets,
prebuilt->select_lock_type, LOCK_GAP,
- thr);
+ thr, &mtr);
switch (err) {
case DB_SUCCESS_LOCKED_REC:
@@ -4631,11 +5036,6 @@ wrong_offs:
pcur->rel_pos = BTR_PCUR_BEFORE;
err = DB_RECORD_NOT_FOUND;
-#if 0
- ut_print_name(stderr, trx, FALSE, index->name);
- fputs(" record not found 4\n", stderr);
-#endif
-
goto normal_return;
}
}
@@ -4664,20 +5064,45 @@ wrong_offs:
if (!rec_get_deleted_flag(rec, comp)) {
goto no_gap_lock;
}
- if (trx_id_t trx_id = index == clust_index
- ? row_get_rec_trx_id(rec, index, offsets)
- : row_vers_impl_x_locked(rec, index, offsets)) {
- if (trx_rw_is_active(trx_id, NULL)) {
- /* The record belongs to an active
- transaction. We must acquire a lock. */
- goto no_gap_lock;
+
+ /* At most one transaction can be active
+ for temporary table. */
+ if (dict_table_is_temporary(clust_index->table)) {
+ goto no_gap_lock;
+ }
+
+ if (index == clust_index) {
+ trx_id_t trx_id = row_get_rec_trx_id(
+ rec, index, offsets);
+ /* In delete-marked records, DB_TRX_ID must
+ always refer to an existing undo log record. */
+ ut_ad(trx_id);
+ if (!trx_rw_is_active(trx_id, NULL, false)) {
+ /* The clustered index record
+ was delete-marked in a committed
+ transaction. Ignore the record. */
+ goto locks_ok_del_marked;
}
+ } else if (trx_t* trx = row_vers_impl_x_locked(
+ rec, index, offsets)) {
+ /* The record belongs to an active
+ transaction. We must acquire a lock. */
+ trx->release_reference();
+ } else {
+ /* The secondary index record does not
+ point to a delete-marked clustered index
+ record that belongs to an active transaction.
+ Ignore the secondary index record, because
+ it is not locked. */
+ goto next_rec;
}
- goto locks_ok_del_marked;
+
+ goto no_gap_lock;
}
if (!set_also_gap_locks
- || (unique_search && !rec_get_deleted_flag(rec, comp))) {
+ || (unique_search && !rec_get_deleted_flag(rec, comp))
+ || dict_index_is_spatial(index)) {
goto no_gap_lock;
} else {
@@ -4705,10 +5130,10 @@ no_gap_lock:
lock_type = LOCK_REC_NOT_GAP;
}
- err = sel_set_rec_lock(btr_pcur_get_block(pcur),
+ err = sel_set_rec_lock(pcur,
rec, index, offsets,
prebuilt->select_lock_type,
- lock_type, thr);
+ lock_type, thr, &mtr);
switch (err) {
const rec_t* old_vers;
@@ -4725,6 +5150,9 @@ no_gap_lock:
case DB_SUCCESS:
break;
case DB_LOCK_WAIT:
+ /* Lock wait for R-tree should already
+ be handled in sel_set_rtr_rec_lock() */
+ ut_ad(!dict_index_is_spatial(index));
/* Never unlock rows that were part of a conflict. */
prebuilt->new_rec_locks = 0;
@@ -4740,17 +5168,14 @@ no_gap_lock:
associated with 'old_vers' */
row_sel_build_committed_vers_for_mysql(
clust_index, prebuilt, rec,
- &offsets, &heap, &old_vers, &mtr);
+ &offsets, &heap, &old_vers, need_vrow ? &vrow : NULL,
+ &mtr);
/* Check whether it was a deadlock or not, if not
a deadlock and the transaction had to wait then
release the lock it is waiting on. */
- lock_mutex_enter();
- trx_mutex_enter(trx);
err = lock_trx_handle_wait(trx);
- lock_mutex_exit();
- trx_mutex_exit(trx);
switch (err) {
case DB_SUCCESS:
@@ -4759,12 +5184,13 @@ no_gap_lock:
Do a normal locking read. */
offsets = rec_get_offsets(
- rec, index, offsets, ULINT_UNDEFINED,
- &heap);
+ rec, index, offsets, true,
+ ULINT_UNDEFINED, &heap);
goto locks_ok;
case DB_DEADLOCK:
goto lock_wait_or_error;
case DB_LOCK_WAIT:
+ ut_ad(!dict_index_is_spatial(index));
err = DB_SUCCESS;
break;
default:
@@ -4780,6 +5206,13 @@ no_gap_lock:
did_semi_consistent_read = TRUE;
rec = old_vers;
break;
+ case DB_RECORD_NOT_FOUND:
+ if (dict_index_is_spatial(index)) {
+ goto next_rec;
+ } else {
+ goto lock_wait_or_error;
+ }
+
default:
goto lock_wait_or_error;
@@ -4800,9 +5233,10 @@ no_gap_lock:
high force recovery level set, we try to avoid crashes
by skipping this lookup */
- if (UNIV_LIKELY(srv_force_recovery < 5)
+ if (srv_force_recovery < 5
&& !lock_clust_rec_cons_read_sees(
- rec, index, offsets, trx->read_view)) {
+ rec, index, offsets,
+ trx_get_read_view(trx))) {
rec_t* old_vers;
/* The following call returns 'offsets'
@@ -4810,7 +5244,8 @@ no_gap_lock:
err = row_sel_build_prev_vers_for_mysql(
trx->read_view, clust_index,
prebuilt, rec, &offsets, &heap,
- &old_vers, &mtr);
+ &old_vers, need_vrow ? &vrow : NULL,
+ &mtr);
if (err != DB_SUCCESS) {
@@ -4835,8 +5270,9 @@ no_gap_lock:
ut_ad(!dict_index_is_clust(index));
- if (!lock_sec_rec_cons_read_sees(
- rec, trx->read_view)) {
+ if (!srv_read_only_mode
+ && !lock_sec_rec_cons_read_sees(
+ rec, index, trx->read_view)) {
/* We should look at the clustered index.
However, as this is a non-locking read,
we can skip the clustered index lookup if
@@ -4868,6 +5304,11 @@ locks_ok:
if (rec_get_deleted_flag(rec, comp)) {
locks_ok_del_marked:
+ /* In delete-marked records, DB_TRX_ID must
+ always refer to an existing undo log record. */
+ ut_ad(index != clust_index
+ || row_get_rec_trx_id(rec, index, offsets));
+
/* The record is delete-marked: we can skip it */
/* This is an optimization to skip setting the next key lock
@@ -4927,18 +5368,22 @@ requires_clust_rec:
mtr_has_extra_clust_latch = TRUE;
+ ut_ad(!vrow);
/* The following call returns 'offsets' associated with
'clust_rec'. Note that 'clust_rec' can be an old version
built for a consistent read. */
err = row_sel_get_clust_rec_for_mysql(prebuilt, index, rec,
thr, &clust_rec,
- &offsets, &heap, &mtr);
+ &offsets, &heap,
+ need_vrow ? &vrow : NULL,
+ &mtr);
switch (err) {
case DB_SUCCESS:
if (clust_rec == NULL) {
/* The record did not exist in the read view */
- ut_ad(prebuilt->select_lock_type == LOCK_NONE);
+ ut_ad(prebuilt->select_lock_type == LOCK_NONE
+ || dict_index_is_spatial(index));
goto next_rec;
}
@@ -4955,6 +5400,7 @@ requires_clust_rec:
err = DB_SUCCESS;
break;
default:
+ vrow = NULL;
goto lock_wait_or_error;
}
@@ -4976,6 +5422,13 @@ requires_clust_rec:
goto next_rec;
}
+ if (need_vrow && !vrow) {
+ if (!heap) {
+ heap = mem_heap_create(100);
+ }
+ row_sel_fill_vrow(rec, index, &vrow, heap);
+ }
+
result_rec = clust_rec;
ut_ad(rec_offs_validate(result_rec, clust_index, offsets));
@@ -4993,8 +5446,8 @@ requires_clust_rec:
authoritative case is in result_rec, the
appropriate version of the clustered index record. */
if (!row_sel_store_mysql_rec(
- buf, prebuilt, result_rec,
- TRUE, clust_index, offsets)) {
+ buf, prebuilt, result_rec, vrow,
+ true, clust_index, offsets)) {
goto next_rec;
}
}
@@ -5011,19 +5464,19 @@ use_covering_index:
offsets));
ut_ad(!rec_get_deleted_flag(result_rec, comp));
- /* At this point, the clustered index record is protected
+ /* Decide whether to prefetch extra rows.
+ At this point, the clustered index record is protected
by a page latch that was acquired when pcur was positioned.
- The latch will not be released until mtr_commit(&mtr). */
+ The latch will not be released until mtr.commit(). */
if ((match_mode == ROW_SEL_EXACT
|| prebuilt->n_rows_fetched >= MYSQL_FETCH_CACHE_THRESHOLD)
&& prebuilt->select_lock_type == LOCK_NONE
+ && !prebuilt->m_no_prefetch
&& !prebuilt->templ_contains_blob
&& !prebuilt->clust_index_was_generated
&& !prebuilt->used_in_HANDLER
- && !prebuilt->innodb_api
- && prebuilt->template_type
- != ROW_MYSQL_DUMMY_TEMPLATE
+ && prebuilt->template_type != ROW_MYSQL_DUMMY_TEMPLATE
&& !prebuilt->in_fts_query) {
/* Inside an update, for example, we do not cache rows,
@@ -5060,7 +5513,7 @@ use_covering_index:
? row_sel_fetch_last_buf(prebuilt) : buf;
if (!row_sel_store_mysql_rec(
- next_buf, prebuilt, result_rec,
+ next_buf, prebuilt, result_rec, vrow,
result_rec != rec,
result_rec != rec ? clust_index : index,
offsets)) {
@@ -5102,6 +5555,7 @@ use_covering_index:
/* We used 'offsets' for the clust
rec, recalculate them for 'rec' */
offsets = rec_get_offsets(rec, index, offsets,
+ true,
ULINT_UNDEFINED,
&heap);
result_rec = rec;
@@ -5112,10 +5566,10 @@ use_covering_index:
rec_offs_size(offsets));
mach_write_to_4(buf,
rec_offs_extra_size(offsets) + 4);
- } else if (!prebuilt->idx_cond && !prebuilt->innodb_api) {
+ } else if (!prebuilt->idx_cond) {
/* The record was not yet converted to MySQL format. */
if (!row_sel_store_mysql_rec(
- buf, prebuilt, result_rec,
+ buf, prebuilt, result_rec, vrow,
result_rec != rec,
result_rec != rec ? clust_index : index,
offsets)) {
@@ -5155,15 +5609,12 @@ idx_cond_failed:
|| !dict_index_is_clust(index)
|| direction != 0
|| prebuilt->select_lock_type != LOCK_NONE
- || prebuilt->used_in_HANDLER
- || prebuilt->innodb_api) {
+ || prebuilt->used_in_HANDLER) {
/* Inside an update always store the cursor position */
- btr_pcur_store_position(pcur, &mtr);
-
- if (prebuilt->innodb_api) {
- prebuilt->innodb_api_rec = result_rec;
+ if (!spatial_search) {
+ btr_pcur_store_position(pcur, &mtr);
}
}
@@ -5177,6 +5628,7 @@ next_rec:
}
did_semi_consistent_read = FALSE;
prebuilt->new_rec_locks = 0;
+ vrow = NULL;
/*-------------------------------------------------------------*/
/* PHASE 5: Move the cursor to the next index record */
@@ -5191,57 +5643,84 @@ next_rec:
the cursor. What prevents us from buffer-fixing all leaf pages
within the mini-transaction is the btr_leaf_page_release()
call in btr_pcur_move_to_next_page(). Only the leaf page where
- the cursor is positioned will remain buffer-fixed. */
-
- if (UNIV_UNLIKELY(mtr_has_extra_clust_latch)) {
- /* We must commit mtr if we are moving to the next
- non-clustered index record, because we could break the
- latching order if we would access a different clustered
+ the cursor is positioned will remain buffer-fixed.
+ For R-tree spatial search, we also commit the mini-transaction
+ each time */
+
+ if (mtr_has_extra_clust_latch || spatial_search) {
+ /* If we have extra cluster latch, we must commit
+ mtr if we are moving to the next non-clustered
+ index record, because we could break the latching
+ order if we would access a different clustered
index page right away without releasing the previous. */
- btr_pcur_store_position(pcur, &mtr);
+ /* No need to do store restore for R-tree */
+ if (!spatial_search) {
+ btr_pcur_store_position(pcur, &mtr);
+ }
- mtr_commit(&mtr);
+ mtr.commit();
mtr_has_extra_clust_latch = FALSE;
- mtr_start(&mtr);
- if (sel_restore_position_for_mysql(&same_user_rec,
+ mtr.start();
+
+ if (!spatial_search
+ && sel_restore_position_for_mysql(&same_user_rec,
BTR_SEARCH_LEAF,
pcur, moves_up, &mtr)) {
-#ifdef UNIV_SEARCH_DEBUG
- cnt++;
-#endif /* UNIV_SEARCH_DEBUG */
-
goto rec_loop;
}
}
if (moves_up) {
- if (UNIV_UNLIKELY(!btr_pcur_move_to_next(pcur, &mtr))) {
-not_moved:
- btr_pcur_store_position(pcur, &mtr);
-
- if (match_mode != 0) {
- err = DB_RECORD_NOT_FOUND;
+ if (UNIV_UNLIKELY(spatial_search)) {
+ if (rtr_pcur_move_to_next(
+ search_tuple, mode, pcur, 0, &mtr)) {
+ goto rec_loop;
+ }
+ } else {
+ const buf_block_t* block = btr_pcur_get_block(pcur);
+ /* This is based on btr_pcur_move_to_next(),
+ but avoids infinite read loop of a corrupted page. */
+ ut_ad(pcur->pos_state == BTR_PCUR_IS_POSITIONED);
+ ut_ad(pcur->latch_mode != BTR_NO_LATCHES);
+ pcur->old_stored = false;
+ if (btr_pcur_is_after_last_on_page(pcur)) {
+ if (btr_pcur_is_after_last_in_tree(pcur,
+ &mtr)) {
+ goto not_moved;
+ }
+ btr_pcur_move_to_next_page(pcur, &mtr);
+ if (UNIV_UNLIKELY(btr_pcur_get_block(pcur)
+ == block)) {
+ err = DB_CORRUPTION;
+ goto lock_wait_or_error;
+ }
} else {
- err = DB_END_OF_INDEX;
+ btr_pcur_move_to_next_on_page(pcur);
}
- goto normal_return;
+ goto rec_loop;
}
} else {
- if (UNIV_UNLIKELY(!btr_pcur_move_to_prev(pcur, &mtr))) {
- goto not_moved;
+ if (btr_pcur_move_to_prev(pcur, &mtr)) {
+ goto rec_loop;
}
}
-#ifdef UNIV_SEARCH_DEBUG
- cnt++;
-#endif /* UNIV_SEARCH_DEBUG */
+not_moved:
+ if (!spatial_search) {
+ btr_pcur_store_position(pcur, &mtr);
+ }
- goto rec_loop;
+ err = match_mode ? DB_RECORD_NOT_FOUND : DB_END_OF_INDEX;
+ goto normal_return;
lock_wait_or_error:
+ if (!dict_index_is_spatial(index)) {
+ btr_pcur_store_position(pcur, &mtr);
+ }
+page_read_error:
/* Reset the old and new "did semi-consistent read" flags. */
if (UNIV_UNLIKELY(prebuilt->row_read_type
== ROW_READ_DID_SEMI_CONSISTENT)) {
@@ -5249,21 +5728,17 @@ lock_wait_or_error:
}
did_semi_consistent_read = FALSE;
- /*-------------------------------------------------------------*/
-
- if (rec) {
- btr_pcur_store_position(pcur, &mtr);
- }
-
lock_table_wait:
- mtr_commit(&mtr);
+ mtr.commit();
mtr_has_extra_clust_latch = FALSE;
trx->error_state = err;
/* The following is a patch for MySQL */
- que_thr_stop_for_mysql(thr);
+ if (thr->is_active) {
+ que_thr_stop_for_mysql(thr);
+ }
thr->lock_state = QUE_THR_LOCK_ROW;
@@ -5271,7 +5746,7 @@ lock_table_wait:
/* It was a lock wait, and it ended */
thr->lock_state = QUE_THR_LOCK_NOLOCK;
- mtr_start(&mtr);
+ mtr.start();
/* Table lock waited, go try to obtain table lock
again */
@@ -5281,9 +5756,11 @@ lock_table_wait:
goto wait_table_again;
}
- sel_restore_position_for_mysql(&same_user_rec,
- BTR_SEARCH_LEAF, pcur,
- moves_up, &mtr);
+ if (!dict_index_is_spatial(index)) {
+ sel_restore_position_for_mysql(
+ &same_user_rec, BTR_SEARCH_LEAF, pcur,
+ moves_up, &mtr);
+ }
if ((srv_locks_unsafe_for_binlog
|| trx->isolation_level <= TRX_ISO_READ_COMMITTED)
@@ -5315,18 +5792,15 @@ lock_table_wait:
thr->lock_state = QUE_THR_LOCK_NOLOCK;
-#ifdef UNIV_SEARCH_DEBUG
- /* fputs("Using ", stderr);
- dict_index_name_print(stderr, index);
- fprintf(stderr, " cnt %lu ret value %lu err\n", cnt, err); */
-#endif /* UNIV_SEARCH_DEBUG */
goto func_exit;
normal_return:
/*-------------------------------------------------------------*/
que_thr_stop_for_mysql_no_error(thr, trx);
- mtr_commit(&mtr);
+ mtr.commit();
+
+ DEBUG_SYNC_C("row_search_for_mysql_before_return");
if (prebuilt->idx_cond != 0) {
@@ -5350,15 +5824,18 @@ normal_return:
err = DB_SUCCESS;
}
-#ifdef UNIV_SEARCH_DEBUG
- /* fputs("Using ", stderr);
- dict_index_name_print(stderr, index);
- fprintf(stderr, " cnt %lu ret value %lu err\n", cnt, err); */
-#endif /* UNIV_SEARCH_DEBUG */
+#ifdef UNIV_DEBUG
+ if (dict_index_is_spatial(index) && err != DB_SUCCESS
+ && err != DB_END_OF_INDEX && err != DB_INTERRUPTED) {
+ rtr_node_path_t* path = pcur->btr_cur.rtr_info->path;
+
+ ut_ad(path->empty());
+ }
+#endif
func_exit:
trx->op_info = "";
- if (UNIV_LIKELY_NULL(heap)) {
+ if (heap != NULL) {
mem_heap_free(heap);
}
@@ -5368,96 +5845,142 @@ func_exit:
ut_ad(prebuilt->row_read_type != ROW_READ_WITH_LOCKS
|| !did_semi_consistent_read);
- if (UNIV_UNLIKELY(prebuilt->row_read_type != ROW_READ_WITH_LOCKS)) {
- if (UNIV_UNLIKELY(did_semi_consistent_read)) {
+ if (prebuilt->row_read_type != ROW_READ_WITH_LOCKS) {
+ if (did_semi_consistent_read) {
prebuilt->row_read_type = ROW_READ_DID_SEMI_CONSISTENT;
} else {
prebuilt->row_read_type = ROW_READ_TRY_SEMI_CONSISTENT;
}
}
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(!sync_thread_levels_nonempty_trx(trx->has_search_latch));
-#endif /* UNIV_SYNC_DEBUG */
+ ut_ad(!sync_check_iterate(sync_check()));
DEBUG_SYNC_C("innodb_row_search_for_mysql_exit");
- return(err);
+ DBUG_RETURN(err);
}
-/*******************************************************************//**
-Checks if MySQL at the moment is allowed for this table to retrieve a
-consistent read result, or store it to the query cache.
-@return TRUE if storing or retrieving from the query cache is permitted */
-UNIV_INTERN
-ibool
-row_search_check_if_query_cache_permitted(
-/*======================================*/
- trx_t* trx, /*!< in: transaction object */
- const char* norm_name) /*!< in: concatenation of database name,
- '/' char, table name */
+/********************************************************************//**
+Count rows in a R-Tree leaf level.
+@return DB_SUCCESS if successful */
+dberr_t
+row_count_rtree_recs(
+/*=================*/
+ row_prebuilt_t* prebuilt, /*!< in: prebuilt struct for the
+ table handle; this contains the info
+ of search_tuple, index; if search
+ tuple contains 0 fields then we
+ position the cursor at the start or
+ the end of the index, depending on
+ 'mode' */
+ ulint* n_rows) /*!< out: number of entries
+ seen in the consistent read */
{
- dict_table_t* table;
- ibool ret = FALSE;
-
- /* Disable query cache altogether for all tables if recovered XA
- transactions in prepared state exist. This is because we do not
- restore the table locks for those transactions and we may wrongly
- set ret=TRUE above if "lock_table_get_n_locks(table) == 0". See
- "Bug#14658648 XA ROLLBACK (DISTRIBUTED DATABASE) NOT WORKING WITH
- QUERY CACHE ENABLED".
- Read trx_sys->n_prepared_recovered_trx without mutex protection,
- not possible to end up with a torn read since n_prepared_recovered_trx
- is word size. */
- if (trx_sys->n_prepared_recovered_trx > 0) {
+ dict_index_t* index = prebuilt->index;
+ dberr_t ret = DB_SUCCESS;
+ mtr_t mtr;
+ mem_heap_t* heap;
+ dtuple_t* entry;
+ dtuple_t* search_entry = prebuilt->search_tuple;
+ ulint entry_len;
+ ulint i;
+ byte* buf;
- return(FALSE);
- }
+ ut_a(dict_index_is_spatial(index));
- table = dict_table_open_on_name(norm_name, FALSE, FALSE,
- DICT_ERR_IGNORE_NONE);
+ *n_rows = 0;
- if (table == NULL) {
+ heap = mem_heap_create(256);
- return(FALSE);
- }
+ /* Build a search tuple. */
+ entry_len = dict_index_get_n_fields(index);
+ entry = dtuple_create(heap, entry_len);
+
+ for (i = 0; i < entry_len; i++) {
+ const dict_field_t* ind_field
+ = dict_index_get_nth_field(index, i);
+ const dict_col_t* col
+ = ind_field->col;
+ dfield_t* dfield
+ = dtuple_get_nth_field(entry, i);
+
+ if (i == 0) {
+ double* mbr;
+ double tmp_mbr[SPDIMS * 2];
+
+ dfield->type.mtype = DATA_GEOMETRY;
+ dfield->type.prtype |= DATA_GIS_MBR;
- /* Start the transaction if it is not started yet */
+ /* Allocate memory for mbr field */
+ mbr = static_cast<double*>
+ (mem_heap_alloc(heap, DATA_MBR_LEN));
- trx_start_if_not_started(trx);
+ /* Set mbr field data. */
+ dfield_set_data(dfield, mbr, DATA_MBR_LEN);
- /* If there are locks on the table or some trx has invalidated the
- cache up to our trx id, then ret = FALSE.
- We do not check what type locks there are on the table, though only
- IX type locks actually would require ret = FALSE. */
+ for (uint j = 0; j < SPDIMS; j++) {
+ tmp_mbr[j * 2] = DBL_MAX;
+ tmp_mbr[j * 2 + 1] = -DBL_MAX;
+ }
+ dfield_write_mbr(dfield, tmp_mbr);
+ continue;
+ }
- if (lock_table_get_n_locks(table) == 0
- && trx->id >= table->query_cache_inv_trx_id) {
+ dfield->type.mtype = col->mtype;
+ dfield->type.prtype = col->prtype;
- ret = TRUE;
+ }
- /* If the isolation level is high, assign a read view for the
- transaction if it does not yet have one */
+ prebuilt->search_tuple = entry;
- if (trx->isolation_level >= TRX_ISO_REPEATABLE_READ
- && !trx->read_view) {
+ ulint bufsize = ut_max(UNIV_PAGE_SIZE, prebuilt->mysql_row_len);
+ buf = static_cast<byte*>(ut_malloc_nokey(bufsize));
- trx->read_view = read_view_open_now(
- trx->id, trx->global_read_view_heap);
+ ulint cnt = 1000;
- trx->global_read_view = trx->read_view;
+ ret = row_search_for_mysql(buf, PAGE_CUR_WITHIN, prebuilt, 0, 0);
+loop:
+ /* Check thd->killed every 1,000 scanned rows */
+ if (--cnt == 0) {
+ if (trx_is_interrupted(prebuilt->trx)) {
+ ret = DB_INTERRUPTED;
+ goto func_exit;
}
+ cnt = 1000;
}
- dict_table_close(table, FALSE, FALSE);
+ switch (ret) {
+ case DB_SUCCESS:
+ break;
+ case DB_DEADLOCK:
+ case DB_LOCK_TABLE_FULL:
+ case DB_LOCK_WAIT_TIMEOUT:
+ case DB_INTERRUPTED:
+ goto func_exit;
+ default:
+ /* fall through (this error is ignored by CHECK TABLE) */
+ case DB_END_OF_INDEX:
+ ret = DB_SUCCESS;
+func_exit:
+ prebuilt->search_tuple = search_entry;
+ ut_free(buf);
+ mem_heap_free(heap);
- return(ret);
+ return(ret);
+ }
+
+ *n_rows = *n_rows + 1;
+
+ ret = row_search_for_mysql(
+ buf, PAGE_CUR_WITHIN, prebuilt, 0, ROW_SEL_NEXT);
+
+ goto loop;
}
/*******************************************************************//**
Read the AUTOINC column from the current row. If the value is less than
0 and the type is not unsigned then we reset the value to 0.
-@return value read from the column */
+@return value read from the column */
static
ib_uint64_t
row_search_autoinc_read_column(
@@ -5472,12 +5995,14 @@ row_search_autoinc_read_column(
const byte* data;
ib_uint64_t value;
mem_heap_t* heap = NULL;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- ulint* offsets = offsets_;
+ offset_t offsets_[REC_OFFS_NORMAL_SIZE];
+ offset_t* offsets = offsets_;
rec_offs_init(offsets_);
+ ut_ad(page_rec_is_leaf(rec));
- offsets = rec_get_offsets(rec, index, offsets, col_no + 1, &heap);
+ offsets = rec_get_offsets(rec, index, offsets, true,
+ col_no + 1, &heap);
if (rec_offs_nth_sql_null(offsets, col_no)) {
/* There is no non-NULL value in the auto-increment column. */
@@ -5487,29 +6012,7 @@ row_search_autoinc_read_column(
data = rec_get_nth_field(rec, offsets, col_no, &len);
- switch (mtype) {
- case DATA_INT:
- ut_a(len <= sizeof value);
- value = mach_read_int_type(data, len, unsigned_type);
- break;
-
- case DATA_FLOAT:
- ut_a(len == sizeof(float));
- value = (ib_uint64_t) mach_float_read(data);
- break;
-
- case DATA_DOUBLE:
- ut_a(len == sizeof(double));
- value = (ib_uint64_t) mach_double_read(data);
- break;
-
- default:
- ut_error;
- }
-
- if (!unsigned_type && (ib_int64_t) value < 0) {
- value = 0;
- }
+ value = row_parse_int(data, len, mtype, unsigned_type);
func_exit:
if (UNIV_LIKELY_NULL(heap)) {
@@ -5555,43 +6058,27 @@ row_search_get_max_rec(
return(rec);
}
-/*******************************************************************//**
-Read the max AUTOINC value from an index.
-@return DB_SUCCESS if all OK else error code, DB_RECORD_NOT_FOUND if
-column name can't be found in index */
-UNIV_INTERN
-dberr_t
-row_search_max_autoinc(
-/*===================*/
- dict_index_t* index, /*!< in: index to search */
- const char* col_name, /*!< in: name of autoinc column */
- ib_uint64_t* value) /*!< out: AUTOINC value read */
+/** Read the max AUTOINC value from an index.
+@param[in] index index starting with an AUTO_INCREMENT column
+@return the largest AUTO_INCREMENT value
+@retval 0 if no records were found */
+ib_uint64_t
+row_search_max_autoinc(dict_index_t* index)
{
- dict_field_t* dfield = dict_index_get_nth_field(index, 0);
- dberr_t error = DB_SUCCESS;
- *value = 0;
+ const dict_field_t* dfield = dict_index_get_nth_field(index, 0);
- if (strcmp(col_name, dfield->name) != 0) {
- error = DB_RECORD_NOT_FOUND;
- } else {
- mtr_t mtr;
- const rec_t* rec;
-
- mtr_start(&mtr);
-
- rec = row_search_get_max_rec(index, &mtr);
-
- if (rec != NULL) {
- ibool unsigned_type = (
- dfield->col->prtype & DATA_UNSIGNED);
+ ib_uint64_t value = 0;
- *value = row_search_autoinc_read_column(
- index, rec, 0,
- dfield->col->mtype, unsigned_type);
- }
+ mtr_t mtr;
+ mtr.start();
- mtr_commit(&mtr);
+ if (const rec_t* rec = row_search_get_max_rec(index, &mtr)) {
+ value = row_search_autoinc_read_column(
+ index, rec, 0,
+ dfield->col->mtype,
+ dfield->col->prtype & DATA_UNSIGNED);
}
- return(error);
+ mtr.commit();
+ return(value);
}
diff --git a/storage/innobase/row/row0trunc.cc b/storage/innobase/row/row0trunc.cc
new file mode 100644
index 00000000000..5e512c602e6
--- /dev/null
+++ b/storage/innobase/row/row0trunc.cc
@@ -0,0 +1,3013 @@
+/*****************************************************************************
+
+Copyright (c) 2013, 2018, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2017, 2019, MariaDB Corporation.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file row/row0trunc.cc
+TRUNCATE implementation
+
+Created 2013-04-12 Sunny Bains
+*******************************************************/
+
+#include "row0mysql.h"
+#include "pars0pars.h"
+#include "btr0pcur.h"
+#include "dict0crea.h"
+#include "dict0boot.h"
+#include "dict0load.h"
+#include "dict0stats.h"
+#include "dict0stats_bg.h"
+#include "lock0lock.h"
+#include "fts0fts.h"
+#include "srv0start.h"
+#include "row0trunc.h"
+#include "os0file.h"
+#include "que0que.h"
+#include "trx0undo.h"
+
+/* FIXME: For temporary tables, use a simple approach of btr_free()
+and btr_create() of each index tree. */
+
+/* FIXME: For persistent tables, remove this code in MDEV-11655
+and use a combination of the transactional DDL log to make atomic the
+low-level operations ha_innobase::delete_table(), ha_innobase::create(). */
+
+bool truncate_t::s_fix_up_active = false;
+truncate_t::tables_t truncate_t::s_tables;
+truncate_t::truncated_tables_t truncate_t::s_truncated_tables;
+
+static const byte magic[] = { 0x01, 0xf3, 0xa1, 0x20 };
+
+/**
+Iterator over the the raw records in an index, doesn't support MVCC. */
+class IndexIterator {
+
+public:
+ /**
+ Iterate over an indexes records
+ @param index index to iterate over */
+ explicit IndexIterator(dict_index_t* index)
+ :
+ m_index(index)
+ {
+ /* Do nothing */
+ }
+
+ /**
+ Search for key. Position the cursor on a record GE key.
+ @return DB_SUCCESS or error code. */
+ dberr_t search(dtuple_t& key, bool noredo)
+ {
+ mtr_start(&m_mtr);
+
+ if (noredo) {
+ mtr_set_log_mode(&m_mtr, MTR_LOG_NO_REDO);
+ }
+
+ btr_pcur_open_on_user_rec(
+ m_index,
+ &key,
+ PAGE_CUR_GE,
+ BTR_MODIFY_LEAF,
+ &m_pcur, &m_mtr);
+
+ return(DB_SUCCESS);
+ }
+
+ /**
+ Iterate over all the records
+ @return DB_SUCCESS or error code */
+ template <typename Callback>
+ dberr_t for_each(Callback& callback)
+ {
+ dberr_t err = DB_SUCCESS;
+
+ for (;;) {
+
+ if (!btr_pcur_is_on_user_rec(&m_pcur)
+ || !callback.match(&m_mtr, &m_pcur)) {
+
+ /* The end of of the index has been reached. */
+ err = DB_END_OF_INDEX;
+ break;
+ }
+
+ rec_t* rec = btr_pcur_get_rec(&m_pcur);
+
+ if (!rec_get_deleted_flag(rec, FALSE)) {
+
+ err = callback(&m_mtr, &m_pcur);
+
+ if (err != DB_SUCCESS) {
+ break;
+ }
+ }
+
+ btr_pcur_move_to_next_user_rec(&m_pcur, &m_mtr);
+ }
+
+ btr_pcur_close(&m_pcur);
+ mtr_commit(&m_mtr);
+
+ return(err == DB_END_OF_INDEX ? DB_SUCCESS : err);
+ }
+
+private:
+ // Disable copying
+ IndexIterator(const IndexIterator&);
+ IndexIterator& operator=(const IndexIterator&);
+
+private:
+ mtr_t m_mtr;
+ btr_pcur_t m_pcur;
+ dict_index_t* m_index;
+};
+
+/** SysIndex table iterator, iterate over records for a table. */
+class SysIndexIterator {
+
+public:
+ /**
+ Iterate over all the records that match the table id.
+ @return DB_SUCCESS or error code */
+ template <typename Callback>
+ dberr_t for_each(Callback& callback) const
+ {
+ dict_index_t* sys_index;
+ byte buf[DTUPLE_EST_ALLOC(1)];
+ dtuple_t* tuple =
+ dtuple_create_from_mem(buf, sizeof(buf), 1, 0);
+ dfield_t* dfield = dtuple_get_nth_field(tuple, 0);
+
+ dfield_set_data(
+ dfield,
+ callback.table_id(),
+ sizeof(*callback.table_id()));
+
+ sys_index = dict_table_get_first_index(dict_sys->sys_indexes);
+
+ dict_index_copy_types(tuple, sys_index, 1);
+
+ IndexIterator iterator(sys_index);
+
+ /* Search on the table id and position the cursor
+ on GE table_id. */
+ iterator.search(*tuple, callback.get_logging_status());
+
+ return(iterator.for_each(callback));
+ }
+};
+
+/** Generic callback abstract class. */
+class Callback
+{
+
+public:
+ /**
+ Constructor
+ @param table_id id of the table being operated.
+ @param noredo if true turn off logging. */
+ Callback(table_id_t table_id, bool noredo)
+ :
+ m_id(),
+ m_noredo(noredo)
+ {
+ /* Convert to storage byte order. */
+ mach_write_to_8(&m_id, table_id);
+ }
+
+ /**
+ Destructor */
+ virtual ~Callback()
+ {
+ /* Do nothing */
+ }
+
+ /**
+ @param mtr mini-transaction covering the iteration
+ @param pcur persistent cursor used for iteration
+ @return true if the table id column matches. */
+ bool match(mtr_t* mtr, btr_pcur_t* pcur) const
+ {
+ ulint len;
+ const byte* field;
+ rec_t* rec = btr_pcur_get_rec(pcur);
+
+ field = rec_get_nth_field_old(
+ rec, DICT_FLD__SYS_INDEXES__TABLE_ID, &len);
+
+ ut_ad(len == 8);
+
+ return(memcmp(&m_id, field, len) == 0);
+ }
+
+ /**
+ @return pointer to table id storage format buffer */
+ const table_id_t* table_id() const
+ {
+ return(&m_id);
+ }
+
+ /**
+ @return return if logging needs to be turned off. */
+ bool get_logging_status() const
+ {
+ return(m_noredo);
+ }
+
+protected:
+ // Disably copying
+ Callback(const Callback&);
+ Callback& operator=(const Callback&);
+
+protected:
+ /** Table id in storage format */
+ table_id_t m_id;
+
+ /** Turn off logging. */
+ const bool m_noredo;
+};
+
+/**
+Creates a TRUNCATE log record with space id, table name, data directory path,
+tablespace flags, table format, index ids, index types, number of index fields
+and index field information of the table. */
+class TruncateLogger : public Callback {
+
+public:
+ /**
+ Constructor
+
+ @param table Table to truncate
+ @param flags tablespace falgs */
+ TruncateLogger(
+ dict_table_t* table,
+ ulint flags,
+ table_id_t new_table_id)
+ :
+ Callback(table->id, false),
+ m_table(table),
+ m_flags(flags),
+ m_truncate(table->id, new_table_id, table->data_dir_path),
+ m_log_file_name()
+ {
+ /* Do nothing */
+ }
+
+ /**
+ Initialize Truncate Logger by constructing Truncate Log File Name.
+
+ @return DB_SUCCESS or error code. */
+ dberr_t init()
+ {
+ /* Construct log file name. */
+ ulint log_file_name_buf_sz =
+ strlen(srv_log_group_home_dir)
+ + (22 + 22 + sizeof "ib_trunc.log");
+
+ m_log_file_name = UT_NEW_ARRAY_NOKEY(char, log_file_name_buf_sz);
+ if (m_log_file_name == NULL) {
+ return(DB_OUT_OF_MEMORY);
+ }
+ memset(m_log_file_name, 0, log_file_name_buf_sz);
+
+ strcpy(m_log_file_name, srv_log_group_home_dir);
+ ulint log_file_name_len = strlen(m_log_file_name);
+ if (m_log_file_name[log_file_name_len - 1]
+ != OS_PATH_SEPARATOR) {
+
+ m_log_file_name[log_file_name_len]
+ = OS_PATH_SEPARATOR;
+ log_file_name_len = strlen(m_log_file_name);
+ }
+
+ snprintf(m_log_file_name + log_file_name_len,
+ log_file_name_buf_sz - log_file_name_len,
+ "ib_%u_" IB_ID_FMT "_trunc.log",
+ m_table->space, m_table->id);
+
+ return(DB_SUCCESS);
+
+ }
+
+ /**
+ Destructor */
+ ~TruncateLogger()
+ {
+ if (m_log_file_name != NULL) {
+ bool exist;
+ os_file_delete_if_exists(
+ innodb_log_file_key, m_log_file_name, &exist);
+ UT_DELETE_ARRAY(m_log_file_name);
+ m_log_file_name = NULL;
+ }
+ }
+
+ /**
+ @param mtr mini-transaction covering the read
+ @param pcur persistent cursor used for reading
+ @return DB_SUCCESS or error code */
+ dberr_t operator()(mtr_t* mtr, btr_pcur_t* pcur);
+
+ /** Called after iteratoring over the records.
+ @return true if invariant satisfied. */
+ bool debug() const
+ {
+ /* We must find all the index entries on disk. */
+ return(UT_LIST_GET_LEN(m_table->indexes)
+ == m_truncate.indexes());
+ }
+
+ /**
+ Write the TRUNCATE log
+ @return DB_SUCCESS or error code */
+ dberr_t log() const
+ {
+ dberr_t err = DB_SUCCESS;
+
+ if (m_log_file_name == 0) {
+ return(DB_ERROR);
+ }
+
+ bool ret;
+ os_file_t handle = os_file_create(
+ innodb_log_file_key, m_log_file_name,
+ OS_FILE_CREATE, OS_FILE_NORMAL,
+ OS_LOG_FILE, srv_read_only_mode, &ret);
+ if (!ret) {
+ return(DB_IO_ERROR);
+ }
+
+
+ ulint sz = UNIV_PAGE_SIZE;
+ void* buf = ut_zalloc_nokey(sz + UNIV_PAGE_SIZE);
+ if (buf == 0) {
+ os_file_close(handle);
+ return(DB_OUT_OF_MEMORY);
+ }
+
+ /* Align the memory for file i/o if we might have O_DIRECT set*/
+ byte* log_buf = static_cast<byte*>(
+ ut_align(buf, UNIV_PAGE_SIZE));
+
+ lsn_t lsn = log_get_lsn();
+
+ /* Generally loop should exit in single go but
+ just for those 1% of rare cases we need to assume
+ corner case. */
+ do {
+ /* First 4 bytes are reserved for magic number
+ which is currently 0. */
+ err = m_truncate.write(
+ log_buf + 4, log_buf + sz - 4,
+ m_table->space, m_table->name.m_name,
+ m_flags, m_table->flags, lsn);
+
+ DBUG_EXECUTE_IF("ib_err_trunc_oom_logging",
+ err = DB_FAIL;);
+
+ if (err != DB_SUCCESS) {
+ ut_ad(err == DB_FAIL);
+ ut_free(buf);
+ sz *= 2;
+ buf = ut_zalloc_nokey(sz + UNIV_PAGE_SIZE);
+ DBUG_EXECUTE_IF("ib_err_trunc_oom_logging",
+ ut_free(buf);
+ buf = 0;);
+ if (buf == 0) {
+ os_file_close(handle);
+ return(DB_OUT_OF_MEMORY);
+ }
+ log_buf = static_cast<byte*>(
+ ut_align(buf, UNIV_PAGE_SIZE));
+ }
+
+ } while (err != DB_SUCCESS);
+
+ dberr_t io_err;
+
+ IORequest request(IORequest::WRITE);
+
+ io_err = os_file_write(
+ request, m_log_file_name, handle, log_buf, 0, sz);
+
+ if (io_err != DB_SUCCESS) {
+
+ ib::error()
+ << "IO: Failed to write the file size to '"
+ << m_log_file_name << "'";
+
+ /* Preserve the original error code */
+ if (err == DB_SUCCESS) {
+ err = io_err;
+ }
+ }
+
+ os_file_flush(handle);
+ os_file_close(handle);
+
+ ut_free(buf);
+
+ /* Why we need MLOG_TRUNCATE when we have truncate_log for
+ recovery?
+ - truncate log can protect us if crash happens while truncate
+ is active. Once truncate is done truncate log is removed.
+ - If crash happens post truncate and system is yet to
+ checkpoint, on recovery we would see REDO records from action
+ before truncate (unless we explicitly checkpoint before
+ returning from truncate API. Costly alternative so rejected).
+ - These REDO records may reference a page that doesn't exist
+ post truncate so we need a mechanism to skip all such REDO
+ records. MLOG_TRUNCATE records space_id and lsn that exactly
+ serve the purpose.
+ - If checkpoint happens post truncate and crash happens post
+ this point then neither MLOG_TRUNCATE nor REDO record
+ from action before truncate are accessible. */
+ if (!is_system_tablespace(m_table->space)) {
+ mtr_t mtr;
+ byte* log_ptr;
+
+ mtr_start(&mtr);
+
+ log_ptr = mlog_open(&mtr, 11 + 8);
+ log_ptr = mlog_write_initial_log_record_low(
+ MLOG_TRUNCATE, m_table->space, 0,
+ log_ptr, &mtr);
+
+ mach_write_to_8(log_ptr, lsn);
+ log_ptr += 8;
+
+ mlog_close(&mtr, log_ptr);
+ mtr_commit(&mtr);
+ }
+
+ return(err);
+ }
+
+ /**
+ Indicate completion of truncate log by writing magic-number.
+ File will be removed from the system but to protect against
+ unlink (File-System) anomalies we ensure we write magic-number. */
+ void done()
+ {
+ if (m_log_file_name == 0) {
+ return;
+ }
+
+ bool ret;
+ os_file_t handle = os_file_create_simple_no_error_handling(
+ innodb_log_file_key, m_log_file_name,
+ OS_FILE_OPEN, OS_FILE_READ_WRITE,
+ srv_read_only_mode, &ret);
+ DBUG_EXECUTE_IF("ib_err_trunc_writing_magic_number",
+ os_file_close(handle);
+ ret = false;);
+ if (!ret) {
+ ib::error() << "Failed to open truncate log file "
+ << m_log_file_name << "."
+ " If server crashes before truncate log is"
+ " removed make sure it is manually removed"
+ " before restarting server";
+ os_file_delete(innodb_log_file_key, m_log_file_name);
+ return;
+ }
+
+ if (os_file_write(IORequest(IORequest::WRITE),
+ m_log_file_name, handle, magic, 0,
+ sizeof magic) != DB_SUCCESS) {
+ ib::error()
+ << "IO: Failed to write the magic number to '"
+ << m_log_file_name << "'";
+ }
+
+ DBUG_EXECUTE_IF("ib_trunc_crash_after_updating_magic_no",
+ DBUG_SUICIDE(););
+ os_file_flush(handle);
+ os_file_close(handle);
+ DBUG_EXECUTE_IF("ib_trunc_crash_after_logging_complete",
+ log_buffer_flush_to_disk();
+ os_thread_sleep(1000000);
+ DBUG_SUICIDE(););
+ os_file_delete(innodb_log_file_key, m_log_file_name);
+ }
+
+private:
+ /** Lookup the index using the index id.
+ @return index instance if found else NULL */
+ const dict_index_t* find(index_id_t id) const
+ {
+ for (const dict_index_t* index = UT_LIST_GET_FIRST(
+ m_table->indexes);
+ index != NULL;
+ index = UT_LIST_GET_NEXT(indexes, index)) {
+
+ if (index->id == id) {
+ return(index);
+ }
+ }
+
+ return(NULL);
+ }
+
+private:
+ /** Table to be truncated */
+ dict_table_t* m_table;
+
+ /** Tablespace flags */
+ ulint m_flags;
+
+ /** Collect table to truncate information */
+ truncate_t m_truncate;
+
+ /** Truncate log file name. */
+ char* m_log_file_name;
+};
+
+/**
+Scan to find out truncate log file from the given directory path.
+
+@param dir_path look for log directory in following path.
+@param log_files cache to hold truncate log file name found.
+@return DB_SUCCESS or error code. */
+dberr_t
+TruncateLogParser::scan(
+ const char* dir_path,
+ trunc_log_files_t& log_files)
+{
+ os_file_dir_t dir;
+ os_file_stat_t fileinfo;
+ dberr_t err = DB_SUCCESS;
+ const ulint dir_len = strlen(dir_path);
+
+ /* Scan and look out for the truncate log files. */
+ dir = os_file_opendir(dir_path, true);
+ if (dir == NULL) {
+ return(DB_IO_ERROR);
+ }
+
+ while (fil_file_readdir_next_file(
+ &err, dir_path, dir, &fileinfo) == 0) {
+
+ const size_t nm_len = strlen(fileinfo.name);
+
+ if (fileinfo.type == OS_FILE_TYPE_FILE
+ && nm_len > sizeof "ib_trunc.log"
+ && (0 == strncmp(fileinfo.name + nm_len
+ - ((sizeof "trunc.log") - 1),
+ "trunc.log", (sizeof "trunc.log") - 1))
+ && (0 == strncmp(fileinfo.name, "ib_", 3))) {
+
+ if (fileinfo.size == 0) {
+ /* Truncate log not written. Remove the file. */
+ os_file_delete(
+ innodb_log_file_key, fileinfo.name);
+ continue;
+ }
+
+ /* Construct file name by appending directory path */
+ ulint sz = dir_len + 22 + 22 + sizeof "ib_trunc.log";
+ char* log_file_name = UT_NEW_ARRAY_NOKEY(char, sz);
+ if (log_file_name == NULL) {
+ err = DB_OUT_OF_MEMORY;
+ break;
+ }
+
+ memcpy(log_file_name, dir_path, dir_len);
+ char* e = log_file_name + dir_len;
+ if (e[-1] != OS_PATH_SEPARATOR) {
+ *e++ = OS_PATH_SEPARATOR;
+ }
+ strcpy(e, fileinfo.name);
+ log_files.push_back(log_file_name);
+ }
+ }
+
+ os_file_closedir(dir);
+
+ return(err);
+}
+
+/**
+Parse the log file and populate table to truncate information.
+(Add this table to truncate information to central vector that is then
+ used by truncate fix-up routine to fix-up truncate action of the table.)
+
+@param log_file_name log file to parse
+@return DB_SUCCESS or error code. */
+dberr_t
+TruncateLogParser::parse(
+ const char* log_file_name)
+{
+ dberr_t err = DB_SUCCESS;
+ truncate_t* truncate = NULL;
+
+ /* Open the file and read magic-number to findout if truncate action
+ was completed. */
+ bool ret;
+ os_file_t handle = os_file_create_simple(
+ innodb_log_file_key, log_file_name,
+ OS_FILE_OPEN, OS_FILE_READ_ONLY, srv_read_only_mode, &ret);
+ if (!ret) {
+ ib::error() << "Error opening truncate log file: "
+ << log_file_name;
+ return(DB_IO_ERROR);
+ }
+
+ ulint sz = UNIV_PAGE_SIZE;
+ void* buf = ut_zalloc_nokey(sz + UNIV_PAGE_SIZE);
+ if (buf == 0) {
+ os_file_close(handle);
+ return(DB_OUT_OF_MEMORY);
+ }
+
+ IORequest request(IORequest::READ);
+
+ /* Align the memory for file i/o if we might have O_DIRECT set*/
+ byte* log_buf = static_cast<byte*>(ut_align(buf, UNIV_PAGE_SIZE));
+
+ do {
+ err = os_file_read(request, handle, log_buf, 0, sz);
+
+ if (err != DB_SUCCESS) {
+ os_file_close(handle);
+ break;
+ }
+
+ if (!memcmp(log_buf, magic, sizeof magic)) {
+
+ /* Truncate action completed. Avoid parsing the file. */
+ os_file_close(handle);
+
+ os_file_delete(innodb_log_file_key, log_file_name);
+ break;
+ }
+
+ if (truncate == NULL) {
+ truncate = UT_NEW_NOKEY(truncate_t(log_file_name));
+ if (truncate == NULL) {
+ os_file_close(handle);
+ err = DB_OUT_OF_MEMORY;
+ break;
+ }
+ }
+
+ err = truncate->parse(log_buf + 4, log_buf + sz - 4);
+
+ if (err != DB_SUCCESS) {
+
+ ut_ad(err == DB_FAIL);
+
+ ut_free(buf);
+ buf = 0;
+
+ sz *= 2;
+
+ buf = ut_zalloc_nokey(sz + UNIV_PAGE_SIZE);
+
+ if (buf == 0) {
+ os_file_close(handle);
+ err = DB_OUT_OF_MEMORY;
+ UT_DELETE(truncate);
+ truncate = NULL;
+ break;
+ }
+
+ log_buf = static_cast<byte*>(
+ ut_align(buf, UNIV_PAGE_SIZE));
+ }
+ } while (err != DB_SUCCESS);
+
+ ut_free(buf);
+
+ if (err == DB_SUCCESS && truncate != NULL) {
+ truncate_t::add(truncate);
+ os_file_close(handle);
+ }
+
+ return(err);
+}
+
+/**
+Scan and Parse truncate log files.
+
+@param dir_path look for log directory in following path
+@return DB_SUCCESS or error code. */
+dberr_t
+TruncateLogParser::scan_and_parse(
+ const char* dir_path)
+{
+ dberr_t err;
+ trunc_log_files_t log_files;
+
+ /* Scan and trace all the truncate log files. */
+ err = TruncateLogParser::scan(dir_path, log_files);
+
+ /* Parse truncate lof files if scan was successful. */
+ if (err == DB_SUCCESS) {
+
+ for (ulint i = 0;
+ i < log_files.size() && err == DB_SUCCESS;
+ i++) {
+ err = TruncateLogParser::parse(log_files[i]);
+ }
+ }
+
+ trunc_log_files_t::const_iterator end = log_files.end();
+ for (trunc_log_files_t::const_iterator it = log_files.begin();
+ it != end;
+ ++it) {
+ if (*it != NULL) {
+ UT_DELETE_ARRAY(*it);
+ }
+ }
+ log_files.clear();
+
+ return(err);
+}
+
+/** Callback to drop indexes during TRUNCATE */
+class DropIndex : public Callback {
+
+public:
+ /**
+ Constructor
+
+ @param[in,out] table Table to truncate
+ @param[in,out] trx dictionary transaction
+ @param[in] noredo whether to disable redo logging */
+ DropIndex(dict_table_t* table, trx_t* trx, bool noredo)
+ : Callback(table->id, noredo), m_trx(trx), m_table(table) {}
+
+ /**
+ @param mtr mini-transaction covering the read
+ @param pcur persistent cursor used for reading
+ @return DB_SUCCESS or error code */
+ dberr_t operator()(mtr_t* mtr, btr_pcur_t* pcur) const;
+
+private:
+ /** dictionary transaction */
+ trx_t* const m_trx;
+ /** Table to be truncated */
+ dict_table_t* const m_table;
+};
+
+/** Callback to create the indexes during TRUNCATE */
+class CreateIndex : public Callback {
+
+public:
+ /**
+ Constructor
+
+ @param[in,out] table Table to truncate
+ @param[in] noredo whether to disable redo logging */
+ CreateIndex(dict_table_t* table, bool noredo)
+ :
+ Callback(table->id, noredo),
+ m_table(table)
+ {
+ /* No op */
+ }
+
+ /**
+ Create the new index and update the root page number in the
+ SysIndex table.
+
+ @param mtr mini-transaction covering the read
+ @param pcur persistent cursor used for reading
+ @return DB_SUCCESS or error code */
+ dberr_t operator()(mtr_t* mtr, btr_pcur_t* pcur) const;
+
+private:
+ // Disably copying
+ CreateIndex(const CreateIndex&);
+ CreateIndex& operator=(const CreateIndex&);
+
+private:
+ /** Table to be truncated */
+ dict_table_t* m_table;
+};
+
+/** Check for presence of table-id in SYS_XXXX tables. */
+class TableLocator : public Callback {
+
+public:
+ /**
+ Constructor
+ @param table_id table_id to look for */
+ explicit TableLocator(table_id_t table_id)
+ :
+ Callback(table_id, false),
+ m_table_found()
+ {
+ /* No op */
+ }
+
+ /**
+ @return true if table is found */
+ bool is_table_found() const
+ {
+ return(m_table_found);
+ }
+
+ /**
+ Look for table-id in SYS_XXXX tables without loading the table.
+
+ @param mtr mini-transaction covering the read
+ @param pcur persistent cursor used for reading
+ @return DB_SUCCESS or error code */
+ dberr_t operator()(mtr_t* mtr, btr_pcur_t* pcur);
+
+private:
+ // Disably copying
+ TableLocator(const TableLocator&);
+ TableLocator& operator=(const TableLocator&);
+
+private:
+ /** Set to true if table is present */
+ bool m_table_found;
+};
+
+/**
+@param mtr mini-transaction covering the read
+@param pcur persistent cursor used for reading
+@return DB_SUCCESS or error code */
+dberr_t
+TruncateLogger::operator()(mtr_t* mtr, btr_pcur_t* pcur)
+{
+ ulint len;
+ const byte* field;
+ rec_t* rec = btr_pcur_get_rec(pcur);
+ truncate_t::index_t index;
+
+ field = rec_get_nth_field_old(
+ rec, DICT_FLD__SYS_INDEXES__TYPE, &len);
+ ut_ad(len == 4);
+ index.m_type = mach_read_from_4(field);
+
+ field = rec_get_nth_field_old(rec, DICT_FLD__SYS_INDEXES__ID, &len);
+ ut_ad(len == 8);
+ index.m_id = mach_read_from_8(field);
+
+ field = rec_get_nth_field_old(
+ rec, DICT_FLD__SYS_INDEXES__PAGE_NO, &len);
+ ut_ad(len == 4);
+ index.m_root_page_no = mach_read_from_4(field);
+
+ /* For compressed tables we need to store extra meta-data
+ required during btr_create(). */
+ if (FSP_FLAGS_GET_ZIP_SSIZE(m_flags)) {
+
+ const dict_index_t* dict_index = find(index.m_id);
+
+ if (dict_index != NULL) {
+
+ dberr_t err = index.set(dict_index);
+
+ if (err != DB_SUCCESS) {
+ m_truncate.clear();
+ return(err);
+ }
+
+ } else {
+ ib::warn() << "Index id " << index.m_id
+ << " not found";
+ }
+ }
+
+ m_truncate.add(index);
+
+ return(DB_SUCCESS);
+}
+
+/**
+Drop an index in the table.
+
+@param mtr mini-transaction covering the read
+@param pcur persistent cursor used for reading
+@return DB_SUCCESS or error code */
+dberr_t
+DropIndex::operator()(mtr_t* mtr, btr_pcur_t* pcur) const
+{
+ rec_t* rec = btr_pcur_get_rec(pcur);
+
+ bool freed = dict_drop_index_tree(rec, pcur, m_trx, mtr);
+
+#ifdef UNIV_DEBUG
+ {
+ ulint len;
+ const byte* field;
+ ulint index_type;
+
+ field = rec_get_nth_field_old(
+ btr_pcur_get_rec(pcur), DICT_FLD__SYS_INDEXES__TYPE,
+ &len);
+ ut_ad(len == 4);
+
+ index_type = mach_read_from_4(field);
+
+ if (index_type & DICT_CLUSTERED) {
+ /* Clustered index */
+ DBUG_EXECUTE_IF("ib_trunc_crash_on_drop_of_clust_index",
+ log_buffer_flush_to_disk();
+ os_thread_sleep(2000000);
+ DBUG_SUICIDE(););
+ } else if (index_type & DICT_UNIQUE) {
+ /* Unique index */
+ DBUG_EXECUTE_IF("ib_trunc_crash_on_drop_of_uniq_index",
+ log_buffer_flush_to_disk();
+ os_thread_sleep(2000000);
+ DBUG_SUICIDE(););
+ } else if (index_type == 0) {
+ /* Secondary index */
+ DBUG_EXECUTE_IF("ib_trunc_crash_on_drop_of_sec_index",
+ log_buffer_flush_to_disk();
+ os_thread_sleep(2000000);
+ DBUG_SUICIDE(););
+ }
+ }
+#endif /* UNIV_DEBUG */
+
+ DBUG_EXECUTE_IF("ib_err_trunc_drop_index",
+ freed = false;);
+
+ if (freed) {
+
+ /* We will need to commit and restart the
+ mini-transaction in order to avoid deadlocks.
+ The dict_drop_index_tree() call has freed
+ a page in this mini-transaction, and the rest
+ of this loop could latch another index page.*/
+ const mtr_log_t log_mode = mtr->get_log_mode();
+ mtr_commit(mtr);
+
+ mtr_start(mtr);
+ mtr->set_log_mode(log_mode);
+
+ btr_pcur_restore_position(BTR_MODIFY_LEAF, pcur, mtr);
+ } else {
+ /* Check if the .ibd file is missing. */
+ bool found;
+
+ fil_space_get_page_size(m_table->space, &found);
+
+ DBUG_EXECUTE_IF("ib_err_trunc_drop_index",
+ found = false;);
+
+ if (!found) {
+ return(DB_ERROR);
+ }
+ }
+
+ return(DB_SUCCESS);
+}
+
+/**
+Create the new index and update the root page number in the
+SysIndex table.
+
+@param mtr mini-transaction covering the read
+@param pcur persistent cursor used for reading
+@return DB_SUCCESS or error code */
+dberr_t
+CreateIndex::operator()(mtr_t* mtr, btr_pcur_t* pcur) const
+{
+ ulint root_page_no;
+
+ root_page_no = dict_recreate_index_tree(m_table, pcur, mtr);
+
+#ifdef UNIV_DEBUG
+ {
+ ulint len;
+ const byte* field;
+ ulint index_type;
+
+ field = rec_get_nth_field_old(
+ btr_pcur_get_rec(pcur), DICT_FLD__SYS_INDEXES__TYPE,
+ &len);
+ ut_ad(len == 4);
+
+ index_type = mach_read_from_4(field);
+
+ if (index_type & DICT_CLUSTERED) {
+ /* Clustered index */
+ DBUG_EXECUTE_IF(
+ "ib_trunc_crash_on_create_of_clust_index",
+ log_buffer_flush_to_disk();
+ os_thread_sleep(2000000);
+ DBUG_SUICIDE(););
+ } else if (index_type & DICT_UNIQUE) {
+ /* Unique index */
+ DBUG_EXECUTE_IF(
+ "ib_trunc_crash_on_create_of_uniq_index",
+ log_buffer_flush_to_disk();
+ os_thread_sleep(2000000);
+ DBUG_SUICIDE(););
+ } else if (index_type == 0) {
+ /* Secondary index */
+ DBUG_EXECUTE_IF(
+ "ib_trunc_crash_on_create_of_sec_index",
+ log_buffer_flush_to_disk();
+ os_thread_sleep(2000000);
+ DBUG_SUICIDE(););
+ }
+ }
+#endif /* UNIV_DEBUG */
+
+ DBUG_EXECUTE_IF("ib_err_trunc_create_index",
+ root_page_no = FIL_NULL;);
+
+ if (root_page_no != FIL_NULL) {
+ ulint len;
+ byte* data = rec_get_nth_field_old(
+ btr_pcur_get_rec(pcur),
+ DICT_FLD__SYS_INDEXES__PAGE_NO, &len);
+ ut_ad(len == 4);
+ mlog_write_ulint(data, root_page_no, MLOG_4BYTES, mtr);
+
+ /* We will need to commit and restart the
+ mini-transaction in order to avoid deadlocks.
+ The dict_create_index_tree() call has allocated
+ a page in this mini-transaction, and the rest of
+ this loop could latch another index page. */
+ mtr_commit(mtr);
+
+ mtr_start(mtr);
+
+ btr_pcur_restore_position(BTR_MODIFY_LEAF, pcur, mtr);
+
+ } else {
+ bool found;
+ fil_space_get_page_size(m_table->space, &found);
+
+ DBUG_EXECUTE_IF("ib_err_trunc_create_index",
+ found = false;);
+
+ if (!found) {
+ return(DB_ERROR);
+ }
+ }
+
+ return(DB_SUCCESS);
+}
+
+/**
+Look for table-id in SYS_XXXX tables without loading the table.
+
+@param mtr mini-transaction covering the read
+@param pcur persistent cursor used for reading
+@return DB_SUCCESS */
+dberr_t
+TableLocator::operator()(mtr_t* mtr, btr_pcur_t* pcur)
+{
+ m_table_found = true;
+
+ return(DB_SUCCESS);
+}
+
+/**
+Rollback the transaction and release the index locks.
+Drop indexes if table is corrupted so that drop/create
+sequence works as expected.
+
+@param table table to truncate
+@param trx transaction covering the TRUNCATE
+@param new_id new table id that was suppose to get assigned
+ to the table if truncate executed successfully.
+@param has_internal_doc_id indicate existence of fts index
+@param no_redo if true, turn-off redo logging
+@param corrupted table corrupted status
+@param unlock_index if true then unlock indexes before action */
+static
+void
+row_truncate_rollback(
+ dict_table_t* table,
+ trx_t* trx,
+ table_id_t new_id,
+ bool has_internal_doc_id,
+ bool no_redo,
+ bool corrupted,
+ bool unlock_index)
+{
+ if (unlock_index) {
+ dict_table_x_unlock_indexes(table);
+ }
+
+ trx->error_state = DB_SUCCESS;
+
+ trx_rollback_to_savepoint(trx, NULL);
+
+ trx->error_state = DB_SUCCESS;
+
+ if (corrupted && !dict_table_is_temporary(table)) {
+
+ /* Cleanup action to ensure we don't left over stale entries
+ if we are marking table as corrupted. This will ensure
+ it can be recovered using drop/create sequence. */
+ dict_table_x_lock_indexes(table);
+
+ DropIndex dropIndex(table, trx, no_redo);
+
+ SysIndexIterator().for_each(dropIndex);
+
+ dict_table_x_unlock_indexes(table);
+
+ for (dict_index_t* index = UT_LIST_GET_FIRST(table->indexes);
+ index != NULL;
+ index = UT_LIST_GET_NEXT(indexes, index)) {
+
+ dict_set_corrupted(index, trx, "TRUNCATE TABLE");
+ }
+
+ if (has_internal_doc_id) {
+
+ ut_ad(!trx_is_started(trx));
+
+ table_id_t id = table->id;
+
+ table->id = new_id;
+
+ fts_drop_tables(trx, table);
+
+ table->id = id;
+
+ ut_ad(trx_is_started(trx));
+
+ trx_commit_for_mysql(trx);
+ }
+
+ } else if (corrupted && dict_table_is_temporary(table)) {
+
+ dict_table_x_lock_indexes(table);
+
+ for (dict_index_t* index = UT_LIST_GET_FIRST(table->indexes);
+ index != NULL;
+ index = UT_LIST_GET_NEXT(indexes, index)) {
+
+ dict_drop_index_tree_in_mem(index, index->page);
+
+ index->page = FIL_NULL;
+ }
+
+ dict_table_x_unlock_indexes(table);
+ }
+
+ table->corrupted = corrupted;
+}
+
+/**
+Finish the TRUNCATE operations for both commit and rollback.
+
+@param table table being truncated
+@param trx transaction covering the truncate
+@param fsp_flags tablespace flags
+@param logger table to truncate information logger
+@param err status of truncate operation
+
+@return DB_SUCCESS or error code */
+static MY_ATTRIBUTE((warn_unused_result))
+dberr_t
+row_truncate_complete(
+ dict_table_t* table,
+ trx_t* trx,
+ ulint fsp_flags,
+ TruncateLogger* &logger,
+ dberr_t err)
+{
+ bool is_file_per_table = dict_table_is_file_per_table(table);
+
+ /* Add the table back to FTS optimize background thread. */
+ if (table->fts) {
+ fts_optimize_add_table(table);
+ }
+
+ row_mysql_unlock_data_dictionary(trx);
+
+ DEBUG_SYNC_C("ib_trunc_table_trunc_completing");
+
+ if (!dict_table_is_temporary(table)) {
+
+ DBUG_EXECUTE_IF("ib_trunc_crash_before_log_removal",
+ log_buffer_flush_to_disk();
+ os_thread_sleep(500000);
+ DBUG_SUICIDE(););
+
+ /* Note: We don't log-checkpoint instead we have written
+ a special REDO log record MLOG_TRUNCATE that is used to
+ avoid applying REDO records before truncate for crash
+ that happens post successful truncate completion. */
+
+ if (logger != NULL) {
+ logger->done();
+ UT_DELETE(logger);
+ logger = NULL;
+ }
+ }
+
+ /* If non-temp file-per-table tablespace... */
+ if (is_file_per_table
+ && !dict_table_is_temporary(table)
+ && fsp_flags != ULINT_UNDEFINED) {
+
+ /* This function will reset back the stop_new_ops
+ and is_being_truncated so that fil-ops can re-start. */
+ dberr_t err2 = truncate_t::truncate(
+ table->space,
+ table->data_dir_path,
+ table->name.m_name, fsp_flags, false);
+
+ if (err2 != DB_SUCCESS) {
+ return(err2);
+ }
+ }
+
+ if (err == DB_SUCCESS) {
+ dict_stats_update(table, DICT_STATS_EMPTY_TABLE);
+ }
+
+ trx->op_info = "";
+
+ /* For temporary tables or if there was an error, we need to reset
+ the dict operation flags. */
+ trx->ddl = false;
+ trx->dict_operation = TRX_DICT_OP_NONE;
+
+ ut_ad(!trx_is_started(trx));
+
+ srv_wake_master_thread();
+
+ DBUG_EXECUTE_IF("ib_trunc_crash_after_truncate_done",
+ DBUG_SUICIDE(););
+
+ return(err);
+}
+
+/**
+Handle FTS truncate issues.
+@param table table being truncated
+@param new_id new id for the table
+@param trx transaction covering the truncate
+@return DB_SUCCESS or error code. */
+static MY_ATTRIBUTE((warn_unused_result))
+dberr_t
+row_truncate_fts(
+ dict_table_t* table,
+ table_id_t new_id,
+ trx_t* trx)
+{
+ dict_table_t fts_table;
+
+ fts_table.id = new_id;
+ fts_table.name = table->name;
+ fts_table.flags2 = table->flags2;
+ fts_table.flags = table->flags;
+ fts_table.space = table->space;
+
+ /* table->data_dir_path is used for FTS AUX table
+ creation. */
+ if (DICT_TF_HAS_DATA_DIR(table->flags)
+ && table->data_dir_path == NULL) {
+ dict_get_and_save_data_dir_path(table, true);
+ ut_ad(table->data_dir_path != NULL);
+ }
+
+ fts_table.data_dir_path = table->data_dir_path;
+
+ dberr_t err;
+
+ err = fts_create_common_tables(
+ trx, &fts_table, table->name.m_name, TRUE);
+
+ for (ulint i = 0;
+ i < ib_vector_size(table->fts->indexes) && err == DB_SUCCESS;
+ i++) {
+
+ dict_index_t* fts_index;
+
+ fts_index = static_cast<dict_index_t*>(
+ ib_vector_getp(table->fts->indexes, i));
+
+ err = fts_create_index_tables_low(
+ trx, fts_index, table->name.m_name, new_id);
+ }
+
+ DBUG_EXECUTE_IF("ib_err_trunc_during_fts_trunc",
+ err = DB_ERROR;);
+
+ if (err != DB_SUCCESS) {
+
+ trx->error_state = DB_SUCCESS;
+ trx_rollback_to_savepoint(trx, NULL);
+ trx->error_state = DB_SUCCESS;
+
+ ib::error() << "Unable to truncate FTS index for table "
+ << table->name;
+ } else {
+
+ ut_ad(trx_is_started(trx));
+ }
+
+ return(err);
+}
+
+/**
+Update system table to reflect new table id.
+@param old_table_id old table id
+@param new_table_id new table id
+@param reserve_dict_mutex if TRUE, acquire/release
+ dict_sys->mutex around call to pars_sql.
+@param trx transaction
+@return error code or DB_SUCCESS */
+static MY_ATTRIBUTE((warn_unused_result))
+dberr_t
+row_truncate_update_table_id(
+ table_id_t old_table_id,
+ table_id_t new_table_id,
+ ibool reserve_dict_mutex,
+ trx_t* trx)
+{
+ pars_info_t* info = NULL;
+ dberr_t err = DB_SUCCESS;
+
+ /* Scan the SYS_XXXX table and update to reflect new table-id. */
+ info = pars_info_create();
+ pars_info_add_ull_literal(info, "old_id", old_table_id);
+ pars_info_add_ull_literal(info, "new_id", new_table_id);
+
+ err = que_eval_sql(
+ info,
+ "PROCEDURE RENUMBER_TABLE_ID_PROC () IS\n"
+ "BEGIN\n"
+ "UPDATE SYS_TABLES"
+ " SET ID = :new_id\n"
+ " WHERE ID = :old_id;\n"
+ "UPDATE SYS_COLUMNS SET TABLE_ID = :new_id\n"
+ " WHERE TABLE_ID = :old_id;\n"
+ "UPDATE SYS_INDEXES"
+ " SET TABLE_ID = :new_id\n"
+ " WHERE TABLE_ID = :old_id;\n"
+ "UPDATE SYS_VIRTUAL"
+ " SET TABLE_ID = :new_id\n"
+ " WHERE TABLE_ID = :old_id;\n"
+ "END;\n", reserve_dict_mutex, trx);
+
+ return(err);
+}
+
+/**
+Get the table id to truncate.
+@param truncate_t old/new table id of table to truncate
+@return table_id_t table_id to use in SYS_XXXX table update. */
+static MY_ATTRIBUTE((warn_unused_result))
+table_id_t
+row_truncate_get_trunc_table_id(
+ const truncate_t& truncate)
+{
+ TableLocator tableLocator(truncate.old_table_id());
+
+ SysIndexIterator().for_each(tableLocator);
+
+ return(tableLocator.is_table_found() ?
+ truncate.old_table_id(): truncate.new_table_id());
+}
+
+/**
+Update system table to reflect new table id and root page number.
+@param truncate_t old/new table id of table to truncate
+ and updated root_page_no of indexes.
+@param new_table_id new table id
+@param reserve_dict_mutex if TRUE, acquire/release
+ dict_sys->mutex around call to pars_sql.
+@param mark_index_corrupted if true, then mark index corrupted.
+@return error code or DB_SUCCESS */
+static MY_ATTRIBUTE((warn_unused_result))
+dberr_t
+row_truncate_update_sys_tables_during_fix_up(
+ const truncate_t& truncate,
+ table_id_t new_table_id,
+ ibool reserve_dict_mutex,
+ bool mark_index_corrupted)
+{
+ trx_t* trx = trx_allocate_for_background();
+
+ trx_set_dict_operation(trx, TRX_DICT_OP_TABLE);
+
+ table_id_t table_id = row_truncate_get_trunc_table_id(truncate);
+
+ /* Step-1: Update the root-page-no */
+
+ dberr_t err;
+
+ err = truncate.update_root_page_no(
+ trx, table_id, reserve_dict_mutex, mark_index_corrupted);
+
+ if (err != DB_SUCCESS) {
+ return(err);
+ }
+
+ /* Step-2: Update table-id. */
+
+ err = row_truncate_update_table_id(
+ table_id, new_table_id, reserve_dict_mutex, trx);
+
+ if (err == DB_SUCCESS) {
+ dict_mutex_enter_for_mysql();
+
+ /* Remove the table with old table_id from cache. */
+ dict_table_t* old_table = dict_table_open_on_id(
+ table_id, true, DICT_TABLE_OP_NORMAL);
+
+ if (old_table != NULL) {
+ dict_table_close(old_table, true, false);
+ dict_table_remove_from_cache(old_table);
+ }
+
+ /* Open table with new table_id and set table as
+ corrupted if it has FTS index. */
+
+ dict_table_t* table = dict_table_open_on_id(
+ new_table_id, true, DICT_TABLE_OP_NORMAL);
+ ut_ad(table->id == new_table_id);
+
+ bool has_internal_doc_id =
+ dict_table_has_fts_index(table)
+ || DICT_TF2_FLAG_IS_SET(
+ table, DICT_TF2_FTS_HAS_DOC_ID);
+
+ if (has_internal_doc_id) {
+ trx->dict_operation_lock_mode = RW_X_LATCH;
+ fts_check_corrupt(table, trx);
+ trx->dict_operation_lock_mode = 0;
+ }
+
+ dict_table_close(table, true, false);
+ dict_mutex_exit_for_mysql();
+ }
+
+ trx_commit_for_mysql(trx);
+ trx_free_for_background(trx);
+
+ return(err);
+}
+
+/**
+Truncate also results in assignment of new table id, update the system
+SYSTEM TABLES with the new id.
+@param table, table being truncated
+@param new_id, new table id
+@param has_internal_doc_id, has doc col (fts)
+@param no_redo if true, turn-off redo logging
+@param trx transaction handle
+@return error code or DB_SUCCESS */
+static MY_ATTRIBUTE((warn_unused_result))
+dberr_t
+row_truncate_update_system_tables(
+ dict_table_t* table,
+ table_id_t new_id,
+ bool has_internal_doc_id,
+ bool no_redo,
+ trx_t* trx)
+{
+ dberr_t err = DB_SUCCESS;
+
+ ut_a(!dict_table_is_temporary(table));
+
+ err = row_truncate_update_table_id(table->id, new_id, FALSE, trx);
+
+ DBUG_EXECUTE_IF("ib_err_trunc_during_sys_table_update",
+ err = DB_ERROR;);
+
+ if (err != DB_SUCCESS) {
+
+ row_truncate_rollback(
+ table, trx, new_id, has_internal_doc_id,
+ no_redo, true, false);
+
+ ib::error() << "Unable to assign a new identifier to table "
+ << table->name << " after truncating it. Marked the"
+ " table as corrupted. In-memory representation is now"
+ " different from the on-disk representation.";
+ err = DB_ERROR;
+ } else {
+ /* Drop the old FTS index */
+ if (has_internal_doc_id) {
+
+ ut_ad(trx_is_started(trx));
+
+ fts_drop_tables(trx, table);
+
+ DBUG_EXECUTE_IF("ib_truncate_crash_while_fts_cleanup",
+ DBUG_SUICIDE(););
+
+ ut_ad(trx_is_started(trx));
+ }
+
+ DBUG_EXECUTE_IF("ib_trunc_crash_after_fts_drop",
+ log_buffer_flush_to_disk();
+ os_thread_sleep(2000000);
+ DBUG_SUICIDE(););
+
+ dict_table_change_id_in_cache(table, new_id);
+
+ /* Reset the Doc ID in cache to 0 */
+ if (has_internal_doc_id && table->fts->cache != NULL) {
+ DBUG_EXECUTE_IF("ib_trunc_sleep_before_fts_cache_clear",
+ os_thread_sleep(10000000););
+
+ table->fts->dict_locked = true;
+ fts_update_next_doc_id(trx, table, 0);
+ fts_cache_clear(table->fts->cache);
+ fts_cache_init(table->fts->cache);
+ table->fts->dict_locked = false;
+ }
+ }
+
+ return(err);
+}
+
+/**
+Prepare for the truncate process. On success all of the table's indexes will
+be locked in X mode.
+@param table table to truncate
+@param flags tablespace flags
+@return error code or DB_SUCCESS */
+static MY_ATTRIBUTE((warn_unused_result))
+dberr_t
+row_truncate_prepare(dict_table_t* table, ulint* flags)
+{
+ ut_ad(!dict_table_is_temporary(table));
+ ut_ad(dict_table_is_file_per_table(table));
+
+ *flags = fil_space_get_flags(table->space);
+
+ ut_ad(!dict_table_is_temporary(table));
+
+ dict_get_and_save_data_dir_path(table, true);
+
+ if (*flags != ULINT_UNDEFINED) {
+
+ dberr_t err = fil_prepare_for_truncate(table->space);
+
+ if (err != DB_SUCCESS) {
+ return(err);
+ }
+ }
+
+ return(DB_SUCCESS);
+}
+
+/**
+Do foreign key checks before starting TRUNCATE.
+@param table table being truncated
+@param trx transaction covering the truncate
+@return DB_SUCCESS or error code */
+static MY_ATTRIBUTE((warn_unused_result))
+dberr_t
+row_truncate_foreign_key_checks(
+ const dict_table_t* table,
+ const trx_t* trx)
+{
+ /* Check if the table is referenced by foreign key constraints from
+ some other table (not the table itself) */
+
+ dict_foreign_set::iterator it
+ = std::find_if(table->referenced_set.begin(),
+ table->referenced_set.end(),
+ dict_foreign_different_tables());
+
+ if (!srv_read_only_mode
+ && it != table->referenced_set.end()
+ && trx->check_foreigns) {
+
+ dict_foreign_t* foreign = *it;
+
+ FILE* ef = dict_foreign_err_file;
+
+ /* We only allow truncating a referenced table if
+ FOREIGN_KEY_CHECKS is set to 0 */
+
+ mutex_enter(&dict_foreign_err_mutex);
+
+ rewind(ef);
+
+ ut_print_timestamp(ef);
+
+ fputs(" Cannot truncate table ", ef);
+ ut_print_name(ef, trx, table->name.m_name);
+ fputs(" by DROP+CREATE\n"
+ "InnoDB: because it is referenced by ", ef);
+ ut_print_name(ef, trx, foreign->foreign_table_name);
+ putc('\n', ef);
+
+ mutex_exit(&dict_foreign_err_mutex);
+
+ return(DB_ERROR);
+ }
+
+ /* TODO: could we replace the counter n_foreign_key_checks_running
+ with lock checks on the table? Acquire here an exclusive lock on the
+ table, and rewrite lock0lock.cc and the lock wait in srv0srv.cc so that
+ they can cope with the table having been truncated here? Foreign key
+ checks take an IS or IX lock on the table. */
+
+ if (table->n_foreign_key_checks_running > 0) {
+ ib::warn() << "Cannot truncate table " << table->name
+ << " because there is a foreign key check running on"
+ " it.";
+
+ return(DB_ERROR);
+ }
+
+ return(DB_SUCCESS);
+}
+
+/**
+Do some sanity checks before starting the actual TRUNCATE.
+@param table table being truncated
+@return DB_SUCCESS or error code */
+static MY_ATTRIBUTE((warn_unused_result))
+dberr_t
+row_truncate_sanity_checks(
+ const dict_table_t* table)
+{
+ if (dict_table_is_discarded(table)) {
+
+ return(DB_TABLESPACE_DELETED);
+
+ } else if (!table->is_readable()) {
+ if (fil_space_get(table->space) == NULL) {
+ return(DB_TABLESPACE_NOT_FOUND);
+
+ } else {
+ return(DB_DECRYPTION_FAILED);
+ }
+ } else if (dict_table_is_corrupted(table)) {
+
+ return(DB_TABLE_CORRUPT);
+ }
+
+ return(DB_SUCCESS);
+}
+
+/**
+Truncates a table for MySQL.
+@param table table being truncated
+@param trx transaction covering the truncate
+@return error code or DB_SUCCESS */
+dberr_t row_truncate_table_for_mysql(dict_table_t* table, trx_t* trx)
+{
+ bool is_file_per_table = dict_table_is_file_per_table(table);
+ dberr_t err;
+#ifdef UNIV_DEBUG
+ ulint old_space = table->space;
+#endif /* UNIV_DEBUG */
+ TruncateLogger* logger = NULL;
+
+ /* Understanding the truncate flow.
+
+ Step-1: Perform intiial sanity check to ensure table can be truncated.
+ This would include check for tablespace discard status, ibd file
+ missing, etc ....
+
+ Step-2: Start transaction (only for non-temp table as temp-table don't
+ modify any data on disk doesn't need transaction object).
+
+ Step-3: Validate ownership of needed locks (Exclusive lock).
+ Ownership will also ensure there is no active SQL queries, INSERT,
+ SELECT, .....
+
+ Step-4: Stop all the background process associated with table.
+
+ Step-5: There are few foreign key related constraint under which
+ we can't truncate table (due to referential integrity unless it is
+ turned off). Ensure this condition is satisfied.
+
+ Step-6: Truncate operation can be rolled back in case of error
+ till some point. Associate rollback segment to record undo log.
+
+ Step-7: Generate new table-id.
+ Why we need new table-id ?
+ Purge and rollback case: we assign a new table id for the table.
+ Since purge and rollback look for the table based on the table id,
+ they see the table as 'dropped' and discard their operations.
+
+ Step-8: Log information about tablespace which includes
+ table and index information. If there is a crash in the next step
+ then during recovery we will attempt to fixup the operation.
+
+ Step-9: Drop all indexes (this include freeing of the pages
+ associated with them).
+
+ Step-10: Re-create new indexes.
+
+ Step-11: Update new table-id to in-memory cache (dictionary),
+ on-disk (INNODB_SYS_TABLES). INNODB_SYS_INDEXES also needs to
+ be updated to reflect updated root-page-no of new index created
+ and updated table-id.
+
+ Step-12: Cleanup Stage. Reset auto-inc value to 1.
+ Release all the locks.
+ Commit the transaction. Update trx operation state.
+
+ Notes:
+ - On error, log checkpoint is done followed writing of magic number to
+ truncate log file. If servers crashes after truncate, fix-up action
+ will not be applied.
+
+ - log checkpoint is done before starting truncate table to ensure
+ that previous REDO log entries are not applied if current truncate
+ crashes. Consider following use-case:
+ - create table .... insert/load table .... truncate table (crash)
+ - on restart table is restored .... truncate table (crash)
+ - on restart (assuming default log checkpoint is not done) will have
+ 2 REDO log entries for same table. (Note 2 REDO log entries
+ for different table is not an issue).
+ For system-tablespace we can't truncate the tablespace so we need
+ to initiate a local cleanup that involves dropping of indexes and
+ re-creating them. If we apply stale entry we might end-up issuing
+ drop on wrong indexes.
+
+ - Insert buffer: TRUNCATE TABLE is analogous to DROP TABLE,
+ so we do not have to remove insert buffer records, as the
+ insert buffer works at a low level. If a freed page is later
+ reallocated, the allocator will remove the ibuf entries for
+ it. When we prepare to truncate *.ibd files, we remove all entries
+ for the table in the insert buffer tree. This is not strictly
+ necessary, but we can free up some space in the system tablespace.
+
+ - Linear readahead and random readahead: we use the same
+ method as in 3) to discard ongoing operations. (This is only
+ relevant for TRUNCATE TABLE by TRUNCATE TABLESPACE.)
+ Ensure that the table will be dropped by trx_rollback_active() in
+ case of a crash.
+ */
+
+ /*-----------------------------------------------------------------*/
+ /* Step-1: Perform intiial sanity check to ensure table can be
+ truncated. This would include check for tablespace discard status,
+ ibd file missing, etc .... */
+ err = row_truncate_sanity_checks(table);
+ if (err != DB_SUCCESS) {
+ return(err);
+
+ }
+
+ /* Step-2: Start transaction (only for non-temp table as temp-table
+ don't modify any data on disk doesn't need transaction object). */
+ if (!dict_table_is_temporary(table)) {
+ if (table->fts) {
+ fts_optimize_remove_table(table);
+ }
+
+ /* Avoid transaction overhead for temporary table DDL. */
+ trx_start_for_ddl(trx, TRX_DICT_OP_TABLE);
+ }
+
+ DEBUG_SYNC_C("row_trunc_before_dict_lock");
+
+ /* Step-3: Validate ownership of needed locks (Exclusive lock).
+ Ownership will also ensure there is no active SQL queries, INSERT,
+ SELECT, .....*/
+ trx->op_info = "truncating table";
+ ut_a(trx->dict_operation_lock_mode == 0);
+ row_mysql_lock_data_dictionary(trx);
+ ut_ad(mutex_own(&dict_sys->mutex));
+ ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_X));
+
+ /* Step-4: Stop all the background process associated with table. */
+ dict_stats_wait_bg_to_stop_using_table(table, trx);
+
+ /* Step-5: There are few foreign key related constraint under which
+ we can't truncate table (due to referential integrity unless it is
+ turned off). Ensure this condition is satisfied. */
+ ulint fsp_flags = ULINT_UNDEFINED;
+ err = row_truncate_foreign_key_checks(table, trx);
+ if (err != DB_SUCCESS) {
+ trx_rollback_to_savepoint(trx, NULL);
+ return(row_truncate_complete(
+ table, trx, fsp_flags, logger, err));
+ }
+
+ /* Remove all locks except the table-level X lock. */
+ lock_remove_all_on_table(table, FALSE);
+ trx->table_id = table->id;
+ trx_set_dict_operation(trx, TRX_DICT_OP_TABLE);
+
+ /* Step-6: Truncate operation can be rolled back in case of error
+ till some point. Associate rollback segment to record undo log. */
+ if (!dict_table_is_temporary(table)) {
+ mutex_enter(&trx->undo_mutex);
+
+ trx_undo_t** pundo = &trx->rsegs.m_redo.update_undo;
+ err = trx_undo_assign_undo(
+ trx, trx->rsegs.m_redo.rseg, pundo, TRX_UNDO_UPDATE);
+
+ mutex_exit(&trx->undo_mutex);
+
+ DBUG_EXECUTE_IF("ib_err_trunc_assigning_undo_log",
+ err = DB_ERROR;);
+ if (err != DB_SUCCESS) {
+ trx_rollback_to_savepoint(trx, NULL);
+ return(row_truncate_complete(
+ table, trx, fsp_flags, logger, err));
+ }
+ }
+
+ /* Step-7: Generate new table-id.
+ Why we need new table-id ?
+ Purge and rollback: we assign a new table id for the
+ table. Since purge and rollback look for the table based on
+ the table id, they see the table as 'dropped' and discard
+ their operations. */
+ table_id_t new_id;
+ dict_hdr_get_new_id(&new_id, NULL, NULL, table, false);
+
+ /* Check if table involves FTS index. */
+ bool has_internal_doc_id =
+ dict_table_has_fts_index(table)
+ || DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_HAS_DOC_ID);
+
+ bool no_redo = is_file_per_table && !has_internal_doc_id;
+
+ /* Step-8: Log information about tablespace which includes
+ table and index information. If there is a crash in the next step
+ then during recovery we will attempt to fixup the operation. */
+
+ /* Lock all index trees for this table, as we will truncate
+ the table/index and possibly change their metadata. All
+ DML/DDL are blocked by table level X lock, with a few exceptions
+ such as queries into information schema about the table,
+ MySQL could try to access index stats for this kind of query,
+ we need to use index locks to sync up */
+ dict_table_x_lock_indexes(table);
+
+ if (!dict_table_is_temporary(table)) {
+
+ if (is_file_per_table) {
+
+ err = row_truncate_prepare(table, &fsp_flags);
+
+ DBUG_EXECUTE_IF("ib_err_trunc_preparing_for_truncate",
+ err = DB_ERROR;);
+
+ if (err != DB_SUCCESS) {
+ row_truncate_rollback(
+ table, trx, new_id,
+ has_internal_doc_id,
+ no_redo, false, true);
+ return(row_truncate_complete(
+ table, trx, fsp_flags, logger, err));
+ }
+ } else {
+ fsp_flags = fil_space_get_flags(table->space);
+
+ DBUG_EXECUTE_IF("ib_err_trunc_preparing_for_truncate",
+ fsp_flags = ULINT_UNDEFINED;);
+
+ if (fsp_flags == ULINT_UNDEFINED) {
+ row_truncate_rollback(
+ table, trx, new_id,
+ has_internal_doc_id,
+ no_redo, false, true);
+ return(row_truncate_complete(
+ table, trx, fsp_flags,
+ logger, DB_ERROR));
+ }
+ }
+
+ logger = UT_NEW_NOKEY(TruncateLogger(
+ table, fsp_flags, new_id));
+
+ err = logger->init();
+ if (err != DB_SUCCESS) {
+ row_truncate_rollback(
+ table, trx, new_id, has_internal_doc_id,
+ no_redo, false, true);
+ return(row_truncate_complete(
+ table, trx, fsp_flags, logger, DB_ERROR));
+
+ }
+
+ err = SysIndexIterator().for_each(*logger);
+ if (err != DB_SUCCESS) {
+ row_truncate_rollback(
+ table, trx, new_id, has_internal_doc_id,
+ no_redo, false, true);
+ return(row_truncate_complete(
+ table, trx, fsp_flags, logger, DB_ERROR));
+
+ }
+
+ ut_ad(logger->debug());
+
+ err = logger->log();
+
+ if (err != DB_SUCCESS) {
+ row_truncate_rollback(
+ table, trx, new_id, has_internal_doc_id,
+ no_redo, false, true);
+ return(row_truncate_complete(
+ table, trx, fsp_flags, logger, DB_ERROR));
+ }
+ }
+
+ DBUG_EXECUTE_IF("ib_trunc_crash_after_redo_log_write_complete",
+ log_buffer_flush_to_disk();
+ os_thread_sleep(3000000);
+ DBUG_SUICIDE(););
+
+ /* Step-9: Drop all indexes (free index pages associated with these
+ indexes) */
+ if (!dict_table_is_temporary(table)) {
+
+ DropIndex dropIndex(table, trx, no_redo);
+
+ err = SysIndexIterator().for_each(dropIndex);
+
+ if (err != DB_SUCCESS) {
+
+ row_truncate_rollback(
+ table, trx, new_id, has_internal_doc_id,
+ no_redo, true, true);
+
+ return(row_truncate_complete(
+ table, trx, fsp_flags, logger, err));
+ }
+ } else {
+ /* For temporary tables we don't have entries in SYSTEM TABLES*/
+ ut_ad(fsp_is_system_temporary(table->space));
+ for (dict_index_t* index = UT_LIST_GET_FIRST(table->indexes);
+ index != NULL;
+ index = UT_LIST_GET_NEXT(indexes, index)) {
+
+ err = dict_truncate_index_tree_in_mem(index);
+
+ if (err != DB_SUCCESS) {
+ row_truncate_rollback(
+ table, trx, new_id, has_internal_doc_id,
+ no_redo, true, true);
+ return(row_truncate_complete(
+ table, trx, fsp_flags, logger, err));
+ }
+
+ DBUG_EXECUTE_IF(
+ "ib_trunc_crash_during_drop_index_temp_table",
+ log_buffer_flush_to_disk();
+ os_thread_sleep(2000000);
+ DBUG_SUICIDE(););
+ }
+ }
+
+ if (is_file_per_table && fsp_flags != ULINT_UNDEFINED) {
+ /* A single-table tablespace has initially
+ FIL_IBD_FILE_INITIAL_SIZE number of pages allocated and an
+ extra page is allocated for each of the indexes present. But in
+ the case of clust index 2 pages are allocated and as one is
+ covered in the calculation as part of table->indexes.count we
+ take care of the other page by adding 1. */
+ ulint space_size = table->indexes.count +
+ FIL_IBD_FILE_INITIAL_SIZE + 1;
+
+ if (has_internal_doc_id) {
+ /* Since aux tables are created for fts indexes and
+ they use seperate tablespaces. */
+ space_size -= ib_vector_size(table->fts->indexes);
+ }
+
+ fil_reinit_space_header_for_table(table, space_size, trx);
+ }
+
+ DBUG_EXECUTE_IF("ib_trunc_crash_with_intermediate_log_checkpoint",
+ log_buffer_flush_to_disk();
+ os_thread_sleep(2000000);
+ log_checkpoint(TRUE);
+ os_thread_sleep(1000000);
+ DBUG_SUICIDE(););
+
+ DBUG_EXECUTE_IF("ib_trunc_crash_drop_reinit_done_create_to_start",
+ log_buffer_flush_to_disk();
+ os_thread_sleep(2000000);
+ DBUG_SUICIDE(););
+
+ /* Step-10: Re-create new indexes. */
+ if (!dict_table_is_temporary(table)) {
+
+ CreateIndex createIndex(table, no_redo);
+
+ err = SysIndexIterator().for_each(createIndex);
+
+ if (err != DB_SUCCESS) {
+
+ row_truncate_rollback(
+ table, trx, new_id, has_internal_doc_id,
+ no_redo, true, true);
+
+ return(row_truncate_complete(
+ table, trx, fsp_flags, logger, err));
+ }
+ }
+
+ /* Done with index truncation, release index tree locks,
+ subsequent work relates to table level metadata change */
+ dict_table_x_unlock_indexes(table);
+
+ if (has_internal_doc_id) {
+
+ err = row_truncate_fts(table, new_id, trx);
+
+ if (err != DB_SUCCESS) {
+
+ row_truncate_rollback(
+ table, trx, new_id, has_internal_doc_id,
+ no_redo, true, false);
+
+ return(row_truncate_complete(
+ table, trx, fsp_flags, logger, err));
+ }
+ }
+
+ /* Step-11: Update new table-id to in-memory cache (dictionary),
+ on-disk (INNODB_SYS_TABLES). INNODB_SYS_INDEXES also needs to
+ be updated to reflect updated root-page-no of new index created
+ and updated table-id. */
+ if (dict_table_is_temporary(table)) {
+
+ dict_table_change_id_in_cache(table, new_id);
+ err = DB_SUCCESS;
+
+ } else {
+
+ /* If this fails then we are in an inconsistent state and
+ the results are undefined. */
+ ut_ad(old_space == table->space);
+
+ err = row_truncate_update_system_tables(
+ table, new_id, has_internal_doc_id, no_redo, trx);
+
+ if (err != DB_SUCCESS) {
+ return(row_truncate_complete(
+ table, trx, fsp_flags, logger, err));
+ }
+ }
+
+ DBUG_EXECUTE_IF("ib_trunc_crash_on_updating_dict_sys_info",
+ log_buffer_flush_to_disk();
+ os_thread_sleep(2000000);
+ DBUG_SUICIDE(););
+
+ /* Step-12: Cleanup Stage. Reset auto-inc value to 1.
+ Release all the locks.
+ Commit the transaction. Update trx operation state. */
+ dict_table_autoinc_lock(table);
+ dict_table_autoinc_initialize(table, 1);
+ dict_table_autoinc_unlock(table);
+
+ if (trx_is_started(trx)) {
+
+ trx_commit_for_mysql(trx);
+ }
+
+ return(row_truncate_complete(table, trx, fsp_flags, logger, err));
+}
+
+/**
+Fix the table truncate by applying information parsed from TRUNCATE log.
+Fix-up includes re-creating table (drop and re-create indexes)
+@return error code or DB_SUCCESS */
+dberr_t
+truncate_t::fixup_tables_in_system_tablespace()
+{
+ dberr_t err = DB_SUCCESS;
+
+ /* Using the info cached during REDO log scan phase fix the
+ table truncate. */
+
+ for (tables_t::iterator it = s_tables.begin();
+ it != s_tables.end();) {
+
+ if ((*it)->m_space_id == TRX_SYS_SPACE) {
+ /* Step-1: Drop and re-create indexes. */
+ ib::info() << "Completing truncate for table with "
+ "id (" << (*it)->m_old_table_id << ") "
+ "residing in the system tablespace.";
+
+ err = fil_recreate_table(
+ (*it)->m_space_id,
+ (*it)->m_format_flags,
+ (*it)->m_tablespace_flags,
+ (*it)->m_tablename,
+ **it);
+
+ /* Step-2: Update the SYS_XXXX tables to reflect
+ this new table_id and root_page_no. */
+ table_id_t new_id;
+
+ dict_hdr_get_new_id(&new_id, NULL, NULL, NULL, true);
+
+ err = row_truncate_update_sys_tables_during_fix_up(
+ **it, new_id, TRUE,
+ (err == DB_SUCCESS) ? false : true);
+
+ if (err != DB_SUCCESS) {
+ break;
+ }
+
+ os_file_delete(
+ innodb_log_file_key, (*it)->m_log_file_name);
+ UT_DELETE(*it);
+ it = s_tables.erase(it);
+ } else {
+ ++it;
+ }
+ }
+
+ /* Also clear the map used to track tablespace truncated. */
+ s_truncated_tables.clear();
+
+ return(err);
+}
+
+/**
+Fix the table truncate by applying information parsed from TRUNCATE log.
+Fix-up includes re-creating tablespace.
+@return error code or DB_SUCCESS */
+dberr_t
+truncate_t::fixup_tables_in_non_system_tablespace()
+{
+ dberr_t err = DB_SUCCESS;
+
+ /* Using the info cached during REDO log scan phase fix the
+ table truncate. */
+ tables_t::iterator end = s_tables.end();
+
+ for (tables_t::iterator it = s_tables.begin(); it != end; ++it) {
+
+ /* All tables in the system tablespace have already been
+ done and erased from this list. */
+ ut_a((*it)->m_space_id != TRX_SYS_SPACE);
+
+ /* Drop tablespace, drop indexes and re-create indexes. */
+
+ ib::info() << "Completing truncate for table with "
+ "id (" << (*it)->m_old_table_id << ") "
+ "residing in file-per-table tablespace with "
+ "id (" << (*it)->m_space_id << ")";
+
+ if (!fil_space_get((*it)->m_space_id)) {
+
+ /* Create the database directory for name,
+ if it does not exist yet */
+ fil_create_directory_for_tablename(
+ (*it)->m_tablename);
+
+ err = fil_ibd_create(
+ (*it)->m_space_id,
+ (*it)->m_tablename,
+ (*it)->m_dir_path,
+ (*it)->m_tablespace_flags,
+ FIL_IBD_FILE_INITIAL_SIZE,
+ (*it)->m_encryption,
+ (*it)->m_key_id);
+
+ if (err != DB_SUCCESS) {
+ /* If checkpoint is not yet done
+ and table is dropped and then we might
+ still have REDO entries for this table
+ which are INVALID. Ignore them. */
+ ib::warn() << "Failed to create"
+ " tablespace for "
+ << (*it)->m_space_id
+ << " space-id";
+ err = DB_ERROR;
+ break;
+ }
+ }
+
+ ut_ad(fil_space_get((*it)->m_space_id));
+
+ err = fil_recreate_tablespace(
+ (*it)->m_space_id,
+ (*it)->m_format_flags,
+ (*it)->m_tablespace_flags,
+ (*it)->m_tablename,
+ **it, log_get_lsn());
+
+ /* Step-2: Update the SYS_XXXX tables to reflect new
+ table-id and root_page_no. */
+ table_id_t new_id;
+
+ dict_hdr_get_new_id(&new_id, NULL, NULL, NULL, true);
+
+ err = row_truncate_update_sys_tables_during_fix_up(
+ **it, new_id, TRUE, (err == DB_SUCCESS) ? false : true);
+
+ if (err != DB_SUCCESS) {
+ break;
+ }
+ }
+
+ if (err == DB_SUCCESS && s_tables.size() > 0) {
+
+ log_make_checkpoint();
+ }
+
+ for (ulint i = 0; i < s_tables.size(); ++i) {
+ os_file_delete(
+ innodb_log_file_key, s_tables[i]->m_log_file_name);
+ UT_DELETE(s_tables[i]);
+ }
+
+ s_tables.clear();
+
+ return(err);
+}
+
+/**
+Constructor
+
+@param old_table_id old table id assigned to table before truncate
+@param new_table_id new table id that will be assigned to table
+ after truncate
+@param dir_path directory path */
+
+truncate_t::truncate_t(
+ table_id_t old_table_id,
+ table_id_t new_table_id,
+ const char* dir_path)
+ :
+ m_space_id(),
+ m_old_table_id(old_table_id),
+ m_new_table_id(new_table_id),
+ m_dir_path(),
+ m_tablename(),
+ m_tablespace_flags(),
+ m_format_flags(),
+ m_indexes(),
+ m_log_lsn(),
+ m_log_file_name(),
+ /* JAN: TODO: Encryption */
+ m_encryption(FIL_ENCRYPTION_DEFAULT),
+ m_key_id(FIL_DEFAULT_ENCRYPTION_KEY)
+{
+ if (dir_path != NULL) {
+ m_dir_path = mem_strdup(dir_path);
+ }
+}
+
+/**
+Consturctor
+
+@param log_file_name parse the log file during recovery to populate
+ information related to table to truncate */
+truncate_t::truncate_t(
+ const char* log_file_name)
+ :
+ m_space_id(),
+ m_old_table_id(),
+ m_new_table_id(),
+ m_dir_path(),
+ m_tablename(),
+ m_tablespace_flags(),
+ m_format_flags(),
+ m_indexes(),
+ m_log_lsn(),
+ m_log_file_name(),
+ /* JAN: TODO: Encryption */
+ m_encryption(FIL_ENCRYPTION_DEFAULT),
+ m_key_id(FIL_DEFAULT_ENCRYPTION_KEY)
+
+{
+ m_log_file_name = mem_strdup(log_file_name);
+ if (m_log_file_name == NULL) {
+ ib::fatal() << "Failed creating truncate_t; out of memory";
+ }
+}
+
+/** Constructor */
+
+truncate_t::index_t::index_t()
+ :
+ m_id(),
+ m_type(),
+ m_root_page_no(FIL_NULL),
+ m_new_root_page_no(FIL_NULL),
+ m_n_fields(),
+ m_trx_id_pos(ULINT_UNDEFINED),
+ m_fields()
+{
+ /* Do nothing */
+}
+
+/** Destructor */
+
+truncate_t::~truncate_t()
+{
+ if (m_dir_path != NULL) {
+ ut_free(m_dir_path);
+ m_dir_path = NULL;
+ }
+
+ if (m_tablename != NULL) {
+ ut_free(m_tablename);
+ m_tablename = NULL;
+ }
+
+ if (m_log_file_name != NULL) {
+ ut_free(m_log_file_name);
+ m_log_file_name = NULL;
+ }
+
+ m_indexes.clear();
+}
+
+/**
+@return number of indexes parsed from the log record */
+
+size_t
+truncate_t::indexes() const
+{
+ return(m_indexes.size());
+}
+
+/**
+Update root page number in SYS_XXXX tables.
+
+@param trx transaction object
+@param table_id table id for which information needs to
+ be updated.
+@param reserve_dict_mutex if TRUE, acquire/release
+ dict_sys->mutex around call to pars_sql.
+@param mark_index_corrupted if true, then mark index corrupted.
+@return DB_SUCCESS or error code */
+
+dberr_t
+truncate_t::update_root_page_no(
+ trx_t* trx,
+ table_id_t table_id,
+ ibool reserve_dict_mutex,
+ bool mark_index_corrupted) const
+{
+ indexes_t::const_iterator end = m_indexes.end();
+
+ dberr_t err = DB_SUCCESS;
+
+ for (indexes_t::const_iterator it = m_indexes.begin();
+ it != end;
+ ++it) {
+
+ pars_info_t* info = pars_info_create();
+
+ pars_info_add_int4_literal(
+ info, "page_no", it->m_new_root_page_no);
+
+ pars_info_add_ull_literal(info, "table_id", table_id);
+
+ pars_info_add_ull_literal(
+ info, "index_id",
+ (mark_index_corrupted ? -1 : it->m_id));
+
+ err = que_eval_sql(
+ info,
+ "PROCEDURE RENUMBER_IDX_PAGE_NO_PROC () IS\n"
+ "BEGIN\n"
+ "UPDATE SYS_INDEXES"
+ " SET PAGE_NO = :page_no\n"
+ " WHERE TABLE_ID = :table_id"
+ " AND ID = :index_id;\n"
+ "END;\n", reserve_dict_mutex, trx);
+
+ if (err != DB_SUCCESS) {
+ break;
+ }
+ }
+
+ return(err);
+}
+
+/**
+Check whether a tablespace was truncated during recovery
+@param space_id tablespace id to check
+@return true if the tablespace was truncated */
+
+bool
+truncate_t::is_tablespace_truncated(ulint space_id)
+{
+ tables_t::iterator end = s_tables.end();
+
+ for (tables_t::iterator it = s_tables.begin(); it != end; ++it) {
+
+ if ((*it)->m_space_id == space_id) {
+
+ return(true);
+ }
+ }
+
+ return(false);
+}
+
+/** Was tablespace truncated (on crash before checkpoint).
+If the MLOG_TRUNCATE redo-record is still available then tablespace
+was truncated and checkpoint is yet to happen.
+@param[in] space_id tablespace id to check.
+@return true if tablespace is was truncated. */
+bool
+truncate_t::was_tablespace_truncated(ulint space_id)
+{
+ return(s_truncated_tables.find(space_id) != s_truncated_tables.end());
+}
+
+/** Get the lsn associated with space.
+@param[in] space_id tablespace id to check.
+@return associated lsn. */
+lsn_t
+truncate_t::get_truncated_tablespace_init_lsn(ulint space_id)
+{
+ ut_ad(was_tablespace_truncated(space_id));
+
+ return(s_truncated_tables.find(space_id)->second);
+}
+
+/**
+Parses log record during recovery
+@param start_ptr buffer containing log body to parse
+@param end_ptr buffer end
+
+@return DB_SUCCESS or error code */
+
+dberr_t
+truncate_t::parse(
+ byte* start_ptr,
+ const byte* end_ptr)
+{
+ /* Parse lsn, space-id, format-flags and tablespace-flags. */
+ if (end_ptr < start_ptr + (8 + 4 + 4 + 4)) {
+ return(DB_FAIL);
+ }
+
+ m_log_lsn = mach_read_from_8(start_ptr);
+ start_ptr += 8;
+
+ m_space_id = mach_read_from_4(start_ptr);
+ start_ptr += 4;
+
+ m_format_flags = mach_read_from_4(start_ptr);
+ start_ptr += 4;
+
+ m_tablespace_flags = mach_read_from_4(start_ptr);
+ start_ptr += 4;
+
+ /* Parse table-name. */
+ if (end_ptr < start_ptr + (2)) {
+ return(DB_FAIL);
+ }
+
+ ulint n_tablename_len = mach_read_from_2(start_ptr);
+ start_ptr += 2;
+
+ if (n_tablename_len > 0) {
+ if (end_ptr < start_ptr + n_tablename_len) {
+ return(DB_FAIL);
+ }
+ m_tablename = mem_strdup(reinterpret_cast<char*>(start_ptr));
+ ut_ad(m_tablename[n_tablename_len - 1] == 0);
+ start_ptr += n_tablename_len;
+ }
+
+
+ /* Parse and read old/new table-id, number of indexes */
+ if (end_ptr < start_ptr + (8 + 8 + 2 + 2)) {
+ return(DB_FAIL);
+ }
+
+ ut_ad(m_indexes.empty());
+
+ m_old_table_id = mach_read_from_8(start_ptr);
+ start_ptr += 8;
+
+ m_new_table_id = mach_read_from_8(start_ptr);
+ start_ptr += 8;
+
+ ulint n_indexes = mach_read_from_2(start_ptr);
+ start_ptr += 2;
+
+ /* Parse the remote directory from TRUNCATE log record */
+ {
+ ulint n_tabledirpath_len = mach_read_from_2(start_ptr);
+ start_ptr += 2;
+
+ if (end_ptr < start_ptr + n_tabledirpath_len) {
+ return(DB_FAIL);
+ }
+
+ if (n_tabledirpath_len > 0) {
+
+ m_dir_path = mem_strdup(reinterpret_cast<char*>(start_ptr));
+ ut_ad(m_dir_path[n_tabledirpath_len - 1] == 0);
+ start_ptr += n_tabledirpath_len;
+ }
+ }
+
+ /* Parse index ids and types from TRUNCATE log record */
+ for (ulint i = 0; i < n_indexes; ++i) {
+ index_t index;
+
+ if (end_ptr < start_ptr + (8 + 4 + 4 + 4)) {
+ return(DB_FAIL);
+ }
+
+ index.m_id = mach_read_from_8(start_ptr);
+ start_ptr += 8;
+
+ index.m_type = mach_read_from_4(start_ptr);
+ start_ptr += 4;
+
+ index.m_root_page_no = mach_read_from_4(start_ptr);
+ start_ptr += 4;
+
+ index.m_trx_id_pos = mach_read_from_4(start_ptr);
+ start_ptr += 4;
+
+ if (!(index.m_type & DICT_FTS)) {
+ m_indexes.push_back(index);
+ }
+ }
+
+ ut_ad(!m_indexes.empty());
+
+ if (FSP_FLAGS_GET_ZIP_SSIZE(m_tablespace_flags)) {
+
+ /* Parse the number of index fields from TRUNCATE log record */
+ for (ulint i = 0; i < m_indexes.size(); ++i) {
+
+ if (end_ptr < start_ptr + (2 + 2)) {
+ return(DB_FAIL);
+ }
+
+ m_indexes[i].m_n_fields = mach_read_from_2(start_ptr);
+ start_ptr += 2;
+
+ ulint len = mach_read_from_2(start_ptr);
+ start_ptr += 2;
+
+ if (end_ptr < start_ptr + len) {
+ return(DB_FAIL);
+ }
+
+ index_t& index = m_indexes[i];
+
+ /* Should be NUL terminated. */
+ ut_ad((start_ptr)[len - 1] == 0);
+
+ index_t::fields_t::iterator end;
+
+ end = index.m_fields.end();
+
+ index.m_fields.insert(
+ end, start_ptr, &(start_ptr)[len]);
+
+ start_ptr += len;
+ }
+ }
+
+ return(DB_SUCCESS);
+}
+
+/** Parse log record from REDO log file during recovery.
+@param[in,out] start_ptr buffer containing log body to parse
+@param[in] end_ptr buffer end
+@param[in] space_id tablespace identifier
+@return parsed upto or NULL. */
+byte*
+truncate_t::parse_redo_entry(
+ byte* start_ptr,
+ const byte* end_ptr,
+ ulint space_id)
+{
+ lsn_t lsn;
+
+ /* Parse space-id, lsn */
+ if (end_ptr < (start_ptr + 8)) {
+ return(NULL);
+ }
+
+ lsn = mach_read_from_8(start_ptr);
+ start_ptr += 8;
+
+ /* Tablespace can't exist in both state.
+ (scheduled-for-truncate, was-truncated). */
+ if (!is_tablespace_truncated(space_id)) {
+
+ truncated_tables_t::iterator it =
+ s_truncated_tables.find(space_id);
+
+ if (it == s_truncated_tables.end()) {
+ s_truncated_tables.insert(
+ std::pair<ulint, lsn_t>(space_id, lsn));
+ } else {
+ it->second = lsn;
+ }
+ }
+
+ return(start_ptr);
+}
+
+/**
+Set the truncate log values for a compressed table.
+@param index index from which recreate infoormation needs to be extracted
+@return DB_SUCCESS or error code */
+
+dberr_t
+truncate_t::index_t::set(
+ const dict_index_t* index)
+{
+ /* Get trx-id column position (set only for clustered index) */
+ if (dict_index_is_clust(index)) {
+ m_trx_id_pos = dict_index_get_sys_col_pos(index, DATA_TRX_ID);
+ ut_ad(m_trx_id_pos > 0);
+ ut_ad(m_trx_id_pos != ULINT_UNDEFINED);
+ } else {
+ m_trx_id_pos = 0;
+ }
+
+ /* Original logic set this field differently if page is not leaf.
+ For truncate case this being first page to get created it is
+ always a leaf page and so we don't need that condition here. */
+ m_n_fields = dict_index_get_n_fields(index);
+
+ /* See requirements of page_zip_fields_encode for size. */
+ ulint encoded_buf_size = (m_n_fields + 1) * 2;
+ byte* encoded_buf = UT_NEW_ARRAY_NOKEY(byte, encoded_buf_size);
+
+ if (encoded_buf == NULL) {
+ return(DB_OUT_OF_MEMORY);
+ }
+
+ ulint len = page_zip_fields_encode(
+ m_n_fields, index, m_trx_id_pos, encoded_buf);
+ ut_a(len <= encoded_buf_size);
+
+ /* Append the encoded fields data. */
+ m_fields.insert(m_fields.end(), &encoded_buf[0], &encoded_buf[len]);
+
+ /* NUL terminate the encoded data */
+ m_fields.push_back(0);
+
+ UT_DELETE_ARRAY(encoded_buf);
+
+ return(DB_SUCCESS);
+}
+
+/** Create an index for a table.
+@param[in] table_name table name, for which to create
+the index
+@param[in] space_id space id where we have to
+create the index
+@param[in] page_size page size of the .ibd file
+@param[in] index_type type of index to truncate
+@param[in] index_id id of index to truncate
+@param[in] btr_redo_create_info control info for ::btr_create()
+@param[in,out] mtr mini-transaction covering the
+create index
+@return root page no or FIL_NULL on failure */
+ulint
+truncate_t::create_index(
+ const char* table_name,
+ ulint space_id,
+ const page_size_t& page_size,
+ ulint index_type,
+ index_id_t index_id,
+ const btr_create_t& btr_redo_create_info,
+ mtr_t* mtr) const
+{
+ ulint root_page_no = btr_create(
+ index_type, space_id, page_size, index_id,
+ NULL, &btr_redo_create_info, mtr);
+
+ if (root_page_no == FIL_NULL) {
+
+ ib::info() << "innodb_force_recovery was set to "
+ << srv_force_recovery << ". Continuing crash recovery"
+ " even though we failed to create index " << index_id
+ << " for compressed table '" << table_name << "' with"
+ " tablespace " << space_id << " during recovery";
+ }
+
+ return(root_page_no);
+}
+
+/** Check if index has been modified since TRUNCATE log snapshot
+was recorded.
+@param space_id space_id where table/indexes resides.
+@param root_page_no root page of index that needs to be verified.
+@return true if modified else false */
+
+bool
+truncate_t::is_index_modified_since_logged(
+ ulint space_id,
+ ulint root_page_no) const
+{
+ mtr_t mtr;
+ bool found;
+ const page_size_t& page_size = fil_space_get_page_size(space_id,
+ &found);
+ dberr_t err = DB_SUCCESS;
+
+ ut_ad(found);
+
+ mtr_start(&mtr);
+
+ /* Root page could be in free state if truncate crashed after drop_index
+ and page was not allocated for any other object. */
+ buf_block_t* block= buf_page_get_gen(
+ page_id_t(space_id, root_page_no), page_size, RW_X_LATCH, NULL,
+ BUF_GET_POSSIBLY_FREED, __FILE__, __LINE__, &mtr, &err);
+
+ page_t* root = buf_block_get_frame(block);
+
+#ifdef UNIV_DEBUG
+ /* If the root page has been freed as part of truncate drop_index action
+ and not yet allocated for any object still the pagelsn > snapshot lsn */
+ if (block->page.file_page_was_freed) {
+ ut_ad(mach_read_from_8(root + FIL_PAGE_LSN) > m_log_lsn);
+ }
+#endif /* UNIV_DEBUG */
+
+ lsn_t page_lsn = mach_read_from_8(root + FIL_PAGE_LSN);
+
+ mtr_commit(&mtr);
+
+ if (page_lsn > m_log_lsn) {
+ return(true);
+ }
+
+ return(false);
+}
+
+/** Drop indexes for a table.
+@param space_id space_id where table/indexes resides. */
+
+void
+truncate_t::drop_indexes(
+ ulint space_id) const
+{
+ mtr_t mtr;
+ ulint root_page_no = FIL_NULL;
+
+ indexes_t::const_iterator end = m_indexes.end();
+
+ for (indexes_t::const_iterator it = m_indexes.begin();
+ it != end;
+ ++it) {
+
+ root_page_no = it->m_root_page_no;
+
+ bool found;
+ const page_size_t& page_size
+ = fil_space_get_page_size(space_id, &found);
+
+ ut_ad(found);
+
+ if (is_index_modified_since_logged(
+ space_id, root_page_no)) {
+ /* Page has been modified since TRUNCATE log snapshot
+ was recorded so not safe to drop the index. */
+ continue;
+ }
+
+ mtr_start(&mtr);
+
+ if (space_id != TRX_SYS_SPACE) {
+ /* Do not log changes for single-table
+ tablespaces, we are in recovery mode. */
+ mtr_set_log_mode(&mtr, MTR_LOG_NO_REDO);
+ }
+
+ if (root_page_no != FIL_NULL) {
+ const page_id_t root_page_id(space_id, root_page_no);
+
+ btr_free_if_exists(
+ root_page_id, page_size, it->m_id, &mtr);
+ }
+
+ /* If tree is already freed then we might return immediately
+ in which case we need to release the lock we have acquired
+ on root_page. */
+ mtr_commit(&mtr);
+ }
+}
+
+
+/** Create the indexes for a table
+@param[in] table_name table name, for which to create the indexes
+@param[in] space_id space id where we have to create the indexes
+@param[in] page_size page size of the .ibd file
+@param[in] flags tablespace flags
+@param[in] format_flags page format flags
+@return DB_SUCCESS or error code. */
+dberr_t
+truncate_t::create_indexes(
+ const char* table_name,
+ ulint space_id,
+ const page_size_t& page_size,
+ ulint flags,
+ ulint format_flags)
+{
+ mtr_t mtr;
+
+ mtr_start(&mtr);
+
+ if (space_id != TRX_SYS_SPACE) {
+ /* Do not log changes for single-table tablespaces, we
+ are in recovery mode. */
+ mtr_set_log_mode(&mtr, MTR_LOG_NO_REDO);
+ }
+
+ /* Create all new index trees with table format, index ids, index
+ types, number of index fields and index field information taken
+ out from the TRUNCATE log record. */
+
+ ulint root_page_no = FIL_NULL;
+ indexes_t::iterator end = m_indexes.end();
+ for (indexes_t::iterator it = m_indexes.begin();
+ it != end;
+ ++it) {
+
+ btr_create_t btr_redo_create_info(
+ FSP_FLAGS_GET_ZIP_SSIZE(flags)
+ ? &it->m_fields[0] : NULL);
+
+ btr_redo_create_info.format_flags = format_flags;
+
+ if (FSP_FLAGS_GET_ZIP_SSIZE(flags)) {
+
+ btr_redo_create_info.n_fields = it->m_n_fields;
+ /* Skip the NUL appended field */
+ btr_redo_create_info.field_len =
+ it->m_fields.size() - 1;
+ btr_redo_create_info.trx_id_pos = it->m_trx_id_pos;
+ }
+
+ root_page_no = create_index(
+ table_name, space_id, page_size, it->m_type, it->m_id,
+ btr_redo_create_info, &mtr);
+
+ if (root_page_no == FIL_NULL) {
+ break;
+ }
+
+ it->m_new_root_page_no = root_page_no;
+ }
+
+ mtr_commit(&mtr);
+
+ return(root_page_no == FIL_NULL ? DB_ERROR : DB_SUCCESS);
+}
+
+/**
+Write a TRUNCATE log record for fixing up table if truncate crashes.
+@param start_ptr buffer to write log record
+@param end_ptr buffer end
+@param space_id space id
+@param tablename the table name in the usual databasename/tablename
+ format of InnoDB
+@param flags tablespace flags
+@param format_flags page format
+@param lsn lsn while logging
+@return DB_SUCCESS or error code */
+
+dberr_t
+truncate_t::write(
+ byte* start_ptr,
+ byte* end_ptr,
+ ulint space_id,
+ const char* tablename,
+ ulint flags,
+ ulint format_flags,
+ lsn_t lsn) const
+{
+ if (end_ptr < start_ptr) {
+ return(DB_FAIL);
+ }
+
+ /* LSN, Type, Space-ID, format-flag (also know as log_flag.
+ Stored in page_no field), tablespace flags */
+ if (end_ptr < (start_ptr + (8 + 4 + 4 + 4))) {
+ return(DB_FAIL);
+ }
+
+ mach_write_to_8(start_ptr, lsn);
+ start_ptr += 8;
+
+ mach_write_to_4(start_ptr, space_id);
+ start_ptr += 4;
+
+ mach_write_to_4(start_ptr, format_flags);
+ start_ptr += 4;
+
+ mach_write_to_4(start_ptr, flags);
+ start_ptr += 4;
+
+ /* Name of the table. */
+ /* Include the NUL in the log record. */
+ ulint len = strlen(tablename) + 1;
+ if (end_ptr < (start_ptr + (len + 2))) {
+ return(DB_FAIL);
+ }
+
+ mach_write_to_2(start_ptr, len);
+ start_ptr += 2;
+
+ memcpy(start_ptr, tablename, len - 1);
+ start_ptr += len;
+
+ DBUG_EXECUTE_IF("ib_trunc_crash_while_writing_redo_log",
+ DBUG_SUICIDE(););
+
+ /* Old/New Table-ID, Number of Indexes and Tablespace dir-path-name. */
+ /* Write the remote directory of the table into mtr log */
+ len = m_dir_path != NULL ? strlen(m_dir_path) + 1 : 0;
+ if (end_ptr < (start_ptr + (len + 8 + 8 + 2 + 2))) {
+ return(DB_FAIL);
+ }
+
+ /* Write out old-table-id. */
+ mach_write_to_8(start_ptr, m_old_table_id);
+ start_ptr += 8;
+
+ /* Write out new-table-id. */
+ mach_write_to_8(start_ptr, m_new_table_id);
+ start_ptr += 8;
+
+ /* Write out the number of indexes. */
+ mach_write_to_2(start_ptr, m_indexes.size());
+ start_ptr += 2;
+
+ /* Write the length (NUL included) of the .ibd path. */
+ mach_write_to_2(start_ptr, len);
+ start_ptr += 2;
+
+ if (m_dir_path != NULL) {
+ memcpy(start_ptr, m_dir_path, len - 1);
+ start_ptr += len;
+ }
+
+ /* Indexes information (id, type) */
+ /* Write index ids, type, root-page-no into mtr log */
+ for (ulint i = 0; i < m_indexes.size(); ++i) {
+
+ if (end_ptr < (start_ptr + (8 + 4 + 4 + 4))) {
+ return(DB_FAIL);
+ }
+
+ mach_write_to_8(start_ptr, m_indexes[i].m_id);
+ start_ptr += 8;
+
+ mach_write_to_4(start_ptr, m_indexes[i].m_type);
+ start_ptr += 4;
+
+ mach_write_to_4(start_ptr, m_indexes[i].m_root_page_no);
+ start_ptr += 4;
+
+ mach_write_to_4(start_ptr, m_indexes[i].m_trx_id_pos);
+ start_ptr += 4;
+ }
+
+ /* If tablespace compressed then field info of each index. */
+ if (FSP_FLAGS_GET_ZIP_SSIZE(flags)) {
+
+ for (ulint i = 0; i < m_indexes.size(); ++i) {
+
+ ulint len = m_indexes[i].m_fields.size();
+ if (end_ptr < (start_ptr + (len + 2 + 2))) {
+ return(DB_FAIL);
+ }
+
+ mach_write_to_2(
+ start_ptr, m_indexes[i].m_n_fields);
+ start_ptr += 2;
+
+ mach_write_to_2(start_ptr, len);
+ start_ptr += 2;
+
+ const byte* ptr = &m_indexes[i].m_fields[0];
+ memcpy(start_ptr, ptr, len - 1);
+ start_ptr += len;
+ }
+ }
+
+ return(DB_SUCCESS);
+}
diff --git a/storage/innobase/row/row0uins.cc b/storage/innobase/row/row0uins.cc
index 25888b65578..d62e730d800 100644
--- a/storage/innobase/row/row0uins.cc
+++ b/storage/innobase/row/row0uins.cc
@@ -1,7 +1,7 @@
/*****************************************************************************
-Copyright (c) 1997, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2017, MariaDB Corporation.
+Copyright (c) 1997, 2017, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2017, 2019, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -25,12 +25,8 @@ Created 2/25/1997 Heikki Tuuri
*******************************************************/
#include "row0uins.h"
-
-#ifdef UNIV_NONINL
-#include "row0uins.ic"
-#endif
-
#include "dict0dict.h"
+#include "dict0stats.h"
#include "dict0boot.h"
#include "dict0crea.h"
#include "trx0undo.h"
@@ -47,6 +43,7 @@ Created 2/25/1997 Heikki Tuuri
#include "que0que.h"
#include "ibuf0ibuf.h"
#include "log0log.h"
+#include "fil0fil.h"
/*************************************************************************
IMPORTANT NOTE: Any operation that generates redo MUST check that there
@@ -61,7 +58,7 @@ introduced where a call to log_free_check() is bypassed. */
/***************************************************************//**
Removes a clustered index record. The pcur in node was positioned on the
record, now it is detached.
-@return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
+@return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
static MY_ATTRIBUTE((nonnull, warn_unused_result))
dberr_t
row_undo_ins_remove_clust_rec(
@@ -77,8 +74,14 @@ row_undo_ins_remove_clust_rec(
bool online;
ut_ad(dict_index_is_clust(index));
+ ut_ad(node->trx->in_rollback);
- mtr_start(&mtr);
+ mtr.start();
+ if (index->table->is_temporary()) {
+ mtr.set_log_mode(MTR_LOG_NO_REDO);
+ } else {
+ mtr.set_named_space(index->space);
+ }
/* This is similar to row_undo_mod_clust(). The DDL thread may
already have copied this row from the log to the new table.
@@ -104,28 +107,31 @@ row_undo_ins_remove_clust_rec(
ut_ad(rec_get_trx_id(btr_cur_get_rec(btr_cur), btr_cur->index)
== node->trx->id);
+ ut_ad(!rec_get_deleted_flag(
+ btr_cur_get_rec(btr_cur),
+ dict_table_is_comp(btr_cur->index->table)));
if (online && dict_index_is_online_ddl(index)) {
const rec_t* rec = btr_cur_get_rec(btr_cur);
mem_heap_t* heap = NULL;
- const ulint* offsets = rec_get_offsets(
- rec, index, NULL, ULINT_UNDEFINED, &heap);
+ const offset_t* offsets = rec_get_offsets(
+ rec, index, NULL, true, ULINT_UNDEFINED, &heap);
row_log_table_delete(rec, index, offsets, NULL);
mem_heap_free(heap);
}
if (node->table->id == DICT_INDEXES_ID) {
+
ut_ad(!online);
ut_ad(node->trx->dict_operation_lock_mode == RW_X_LATCH);
- /* Drop the index tree associated with the row in
- SYS_INDEXES table: */
+ dict_drop_index_tree(
+ btr_pcur_get_rec(&node->pcur), &node->pcur, node->trx,
+ &mtr);
- dict_drop_index_tree(btr_pcur_get_rec(&(node->pcur)), &mtr);
+ mtr.commit();
- mtr_commit(&mtr);
-
- mtr_start(&mtr);
+ mtr.start();
success = btr_pcur_restore_position(
BTR_MODIFY_LEAF, &node->pcur, &mtr);
@@ -140,16 +146,19 @@ row_undo_ins_remove_clust_rec(
btr_pcur_commit_specify_mtr(&node->pcur, &mtr);
retry:
/* If did not succeed, try pessimistic descent to tree */
- mtr_start(&mtr);
+ mtr.start();
+ if (index->table->is_temporary()) {
+ mtr.set_log_mode(MTR_LOG_NO_REDO);
+ } else {
+ mtr.set_named_space(index->space);
+ }
- success = btr_pcur_restore_position(BTR_MODIFY_TREE,
- &(node->pcur), &mtr);
+ success = btr_pcur_restore_position(
+ BTR_MODIFY_TREE | BTR_LATCH_FOR_DELETE,
+ &node->pcur, &mtr);
ut_a(success);
- btr_cur_pessimistic_delete(&err, FALSE, btr_cur, 0,
- trx_is_recv(node->trx)
- ? RB_RECOVERY
- : RB_NORMAL, &mtr);
+ btr_cur_pessimistic_delete(&err, FALSE, btr_cur, 0, true, &mtr);
/* The delete operation may fail if we have little
file space left: TODO: easiest to crash the database
@@ -169,14 +178,13 @@ retry:
func_exit:
btr_pcur_commit_specify_mtr(&node->pcur, &mtr);
- trx_undo_rec_release(node->trx, node->undo_no);
return(err);
}
/***************************************************************//**
Removes a secondary index entry if found.
-@return DB_SUCCESS, DB_FAIL, or DB_OUT_OF_FILE_SPACE */
+@return DB_SUCCESS, DB_FAIL, or DB_OUT_OF_FILE_SPACE */
static MY_ATTRIBUTE((nonnull, warn_unused_result))
dberr_t
row_undo_ins_remove_sec_low(
@@ -185,66 +193,70 @@ row_undo_ins_remove_sec_low(
depending on whether we wish optimistic or
pessimistic descent down the index tree */
dict_index_t* index, /*!< in: index */
- dtuple_t* entry) /*!< in: index entry to remove */
+ dtuple_t* entry, /*!< in: index entry to remove */
+ que_thr_t* thr) /*!< in: query thread */
{
btr_pcur_t pcur;
- btr_cur_t* btr_cur;
dberr_t err = DB_SUCCESS;
mtr_t mtr;
- enum row_search_result search_result;
-
- log_free_check();
+ const bool modify_leaf = mode == BTR_MODIFY_LEAF;
- mtr_start(&mtr);
+ row_mtr_start(&mtr, index, !modify_leaf);
- if (mode == BTR_MODIFY_LEAF) {
+ if (modify_leaf) {
mode = BTR_MODIFY_LEAF | BTR_ALREADY_S_LATCHED;
mtr_s_lock(dict_index_get_lock(index), &mtr);
} else {
- ut_ad(mode == BTR_MODIFY_TREE);
- if (index->space == IBUF_SPACE_ID
- && !dict_index_is_unique(index)) {
- ibuf_free_excess_pages();
- }
- mtr_x_lock(dict_index_get_lock(index), &mtr);
+ ut_ad(mode == (BTR_MODIFY_TREE | BTR_LATCH_FOR_DELETE));
+ mtr_sx_lock(dict_index_get_lock(index), &mtr);
}
if (row_log_online_op_try(index, entry, 0)) {
goto func_exit_no_pcur;
}
- search_result = row_search_index_entry(index, entry, mode,
- &pcur, &mtr);
+ if (dict_index_is_spatial(index)) {
+ if (modify_leaf) {
+ mode |= BTR_RTREE_DELETE_MARK;
+ }
+ btr_pcur_get_btr_cur(&pcur)->thr = thr;
+ mode |= BTR_RTREE_UNDO_INS;
+ }
- switch (search_result) {
- case ROW_NOT_FOUND:
- goto func_exit;
- case ROW_FOUND:
- break;
+ switch (row_search_index_entry(index, entry, mode, &pcur, &mtr)) {
case ROW_BUFFERED:
case ROW_NOT_DELETED_REF:
/* These are invalid outcomes, because the mode passed
to row_search_index_entry() did not include any of the
flags BTR_INSERT, BTR_DELETE, or BTR_DELETE_MARK. */
ut_error;
- }
+ case ROW_NOT_FOUND:
+ break;
+ case ROW_FOUND:
+ if (dict_index_is_spatial(index)
+ && rec_get_deleted_flag(
+ btr_pcur_get_rec(&pcur),
+ dict_table_is_comp(index->table))) {
+ ib::error() << "Record found in index " << index->name
+ << " is deleted marked on insert rollback.";
+ ut_ad(0);
+ }
- btr_cur = btr_pcur_get_btr_cur(&pcur);
+ btr_cur_t* btr_cur = btr_pcur_get_btr_cur(&pcur);
- if (mode != BTR_MODIFY_TREE) {
- err = btr_cur_optimistic_delete(btr_cur, 0, &mtr)
- ? DB_SUCCESS : DB_FAIL;
- } else {
- /* No need to distinguish RB_RECOVERY here, because we
- are deleting a secondary index record: the distinction
- between RB_NORMAL and RB_RECOVERY only matters when
- deleting a record that contains externally stored
- columns. */
- ut_ad(!dict_index_is_clust(index));
- btr_cur_pessimistic_delete(&err, FALSE, btr_cur, 0,
- RB_NORMAL, &mtr);
+ if (modify_leaf) {
+ err = btr_cur_optimistic_delete(btr_cur, 0, &mtr)
+ ? DB_SUCCESS : DB_FAIL;
+ } else {
+ /* Passing rollback=false here, because we are
+ deleting a secondary index record: the distinction
+ only matters when deleting a record that contains
+ externally stored columns. */
+ btr_cur_pessimistic_delete(&err, FALSE, btr_cur, 0,
+ false, &mtr);
+ }
}
-func_exit:
+
btr_pcur_close(&pcur);
func_exit_no_pcur:
mtr_commit(&mtr);
@@ -255,20 +267,21 @@ func_exit_no_pcur:
/***************************************************************//**
Removes a secondary index entry from the index if found. Tries first
optimistic, then pessimistic descent down the tree.
-@return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
+@return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
static MY_ATTRIBUTE((nonnull, warn_unused_result))
dberr_t
row_undo_ins_remove_sec(
/*====================*/
dict_index_t* index, /*!< in: index */
- dtuple_t* entry) /*!< in: index entry to insert */
+ dtuple_t* entry, /*!< in: index entry to insert */
+ que_thr_t* thr) /*!< in: query thread */
{
dberr_t err;
ulint n_tries = 0;
/* Try first optimistic descent to the B-tree */
- err = row_undo_ins_remove_sec_low(BTR_MODIFY_LEAF, index, entry);
+ err = row_undo_ins_remove_sec_low(BTR_MODIFY_LEAF, index, entry, thr);
if (err == DB_SUCCESS) {
@@ -277,7 +290,9 @@ row_undo_ins_remove_sec(
/* Try then pessimistic descent to the B-tree */
retry:
- err = row_undo_ins_remove_sec_low(BTR_MODIFY_TREE, index, entry);
+ err = row_undo_ins_remove_sec_low(
+ BTR_MODIFY_TREE | BTR_LATCH_FOR_DELETE,
+ index, entry, thr);
/* The delete operation may fail if we have little
file space left: TODO: easiest to crash the database
@@ -308,16 +323,13 @@ row_undo_ins_parse_undo_rec(
byte* ptr;
undo_no_t undo_no;
table_id_t table_id;
- ulint type;
ulint dummy;
bool dummy_extern;
ut_ad(node);
- ptr = trx_undo_rec_get_pars(node->undo_rec, &type, &dummy,
+ ptr = trx_undo_rec_get_pars(node->undo_rec, &node->rec_type, &dummy,
&dummy_extern, &undo_no, &table_id);
- ut_ad(type == TRX_UNDO_INSERT_REC);
- node->rec_type = type;
node->update = NULL;
node->table = dict_table_open_on_id(
@@ -325,28 +337,67 @@ row_undo_ins_parse_undo_rec(
/* Skip the UNDO if we can't find the table or the .ibd file. */
if (UNIV_UNLIKELY(node->table == NULL)) {
- } else if (UNIV_UNLIKELY(node->table->file_unreadable)) {
+ return;
+ }
+
+ switch (node->rec_type) {
+ default:
+ ut_ad(!"wrong undo record type");
+ goto close_table;
+ case TRX_UNDO_INSERT_REC:
+ break;
+ case TRX_UNDO_RENAME_TABLE:
+ dict_table_t* table = node->table;
+ ut_ad(!table->is_temporary());
+ ut_ad(dict_table_is_file_per_table(table)
+ == (table->space != TRX_SYS_SPACE));
+ size_t len = mach_read_from_2(node->undo_rec)
+ + node->undo_rec - ptr - 2;
+ ptr[len] = 0;
+ const char* name = reinterpret_cast<char*>(ptr);
+ if (strcmp(table->name.m_name, name)) {
+ dict_table_rename_in_cache(table, name, false,
+ table_id != 0);
+ }
+ goto close_table;
+ }
+
+ if (UNIV_UNLIKELY(!fil_table_accessible(node->table))) {
close_table:
+ /* Normally, tables should not disappear or become
+ unaccessible during ROLLBACK, because they should be
+ protected by InnoDB table locks. Corruption could be
+ a valid exception.
+
+ FIXME: When running out of temporary tablespace, it
+ would probably be better to just drop all temporary
+ tables (and temporary undo log records) of the current
+ connection, instead of doing this rollback. */
dict_table_close(node->table, dict_locked, FALSE);
node->table = NULL;
} else {
+ ut_ad(!node->table->skip_alter_undo);
clust_index = dict_table_get_first_index(node->table);
if (clust_index != NULL) {
- trx_undo_rec_get_row_ref(
+ ptr = trx_undo_rec_get_row_ref(
ptr, clust_index, &node->ref, node->heap);
if (!row_undo_search_clust_to_pcur(node)) {
+ /* An error probably occurred during
+ an insert into the clustered index,
+ after we wrote the undo log record. */
goto close_table;
}
+ if (node->table->n_v_cols) {
+ trx_undo_read_v_cols(node->table, ptr,
+ node->row, false);
+ }
} else {
- ut_print_timestamp(stderr);
- fprintf(stderr, " InnoDB: table ");
- ut_print_name(stderr, node->trx, TRUE,
- node->table->name);
- fprintf(stderr, " has no indexes, "
- "ignoring the table\n");
+ ib::warn() << "Table " << node->table->name
+ << " has no indexes,"
+ " ignoring the table";
goto close_table;
}
}
@@ -354,12 +405,13 @@ close_table:
/***************************************************************//**
Removes secondary index records.
-@return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
+@return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
static MY_ATTRIBUTE((nonnull, warn_unused_result))
dberr_t
row_undo_ins_remove_sec_rec(
/*========================*/
- undo_node_t* node) /*!< in/out: row undo node */
+ undo_node_t* node, /*!< in/out: row undo node */
+ que_thr_t* thr) /*!< in: query thread */
{
dberr_t err = DB_SUCCESS;
dict_index_t* index = node->index;
@@ -394,7 +446,7 @@ row_undo_ins_remove_sec_rec(
assume that the secondary index record does
not exist. */
} else {
- err = row_undo_ins_remove_sec(index, entry);
+ err = row_undo_ins_remove_sec(index, entry, thr);
if (UNIV_UNLIKELY(err != DB_SUCCESS)) {
goto func_exit;
@@ -417,25 +469,25 @@ the same clustered index unique key did not have any record, even delete
marked, at the time of the insert. InnoDB is eager in a rollback:
if it figures out that an index record will be removed in the purge
anyway, it will remove it in the rollback.
-@return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
-UNIV_INTERN
+@return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
dberr_t
row_undo_ins(
/*=========*/
- undo_node_t* node) /*!< in: row undo node */
+ undo_node_t* node, /*!< in: row undo node */
+ que_thr_t* thr) /*!< in: query thread */
{
dberr_t err;
ibool dict_locked;
ut_ad(node->state == UNDO_NODE_INSERT);
+ ut_ad(node->trx->in_rollback);
+ ut_ad(trx_undo_roll_ptr_is_insert(node->roll_ptr));
dict_locked = node->trx->dict_operation_lock_mode == RW_X_LATCH;
row_undo_ins_parse_undo_rec(node, dict_locked);
if (node->table == NULL) {
- trx_undo_rec_release(node->trx, node->undo_no);
-
return(DB_SUCCESS);
}
@@ -448,7 +500,7 @@ row_undo_ins(
dict_table_skip_corrupt_index(node->index);
- err = row_undo_ins_remove_sec_rec(node);
+ err = row_undo_ins_remove_sec_rec(node, thr);
if (err == DB_SUCCESS) {
@@ -470,6 +522,24 @@ row_undo_ins(
mutex_exit(&dict_sys->mutex);
}
+
+ if (err == DB_SUCCESS && node->table->stat_initialized) {
+ /* Not protected by dict_table_stats_lock() for
+ performance reasons, we would rather get garbage
+ in stat_n_rows (which is just an estimate anyway)
+ than protecting the following code with a latch. */
+ dict_table_n_rows_dec(node->table);
+
+ /* Do not attempt to update statistics when
+ executing ROLLBACK in the InnoDB SQL
+ interpreter, because in that case we would
+ already be holding dict_sys->mutex, which
+ would be acquired when updating statistics. */
+ if (!dict_locked) {
+ dict_stats_update_if_needed(
+ node->table, node->trx->mysql_thd);
+ }
+ }
}
dict_table_close(node->table, dict_locked, FALSE);
diff --git a/storage/innobase/row/row0umod.cc b/storage/innobase/row/row0umod.cc
index 90fd3c79c57..8e2775a050b 100644
--- a/storage/innobase/row/row0umod.cc
+++ b/storage/innobase/row/row0umod.cc
@@ -1,7 +1,7 @@
/*****************************************************************************
-Copyright (c) 1997, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2017, 2019, MariaDB Corporation.
+Copyright (c) 1997, 2017, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2017, 2020, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -25,12 +25,8 @@ Created 2/27/1997 Heikki Tuuri
*******************************************************/
#include "row0umod.h"
-
-#ifdef UNIV_NONINL
-#include "row0umod.ic"
-#endif
-
#include "dict0dict.h"
+#include "dict0stats.h"
#include "dict0boot.h"
#include "trx0undo.h"
#include "trx0roll.h"
@@ -73,13 +69,13 @@ introduced where a call to log_free_check() is bypassed. */
/***********************************************************//**
Undoes a modify in a clustered index record.
-@return DB_SUCCESS, DB_FAIL, or error code: we may run out of file space */
+@return DB_SUCCESS, DB_FAIL, or error code: we may run out of file space */
static MY_ATTRIBUTE((nonnull, warn_unused_result))
dberr_t
row_undo_mod_clust_low(
/*===================*/
undo_node_t* node, /*!< in: row undo node */
- ulint** offsets,/*!< out: rec_get_offsets() on the record */
+ offset_t** offsets,/*!< out: rec_get_offsets() on the record */
mem_heap_t** offsets_heap,
/*!< in/out: memory heap that can be emptied */
mem_heap_t* heap, /*!< in/out: memory heap */
@@ -161,7 +157,6 @@ dberr_t
row_undo_mod_remove_clust_low(
/*==========================*/
undo_node_t* node, /*!< in: row undo node */
- que_thr_t* thr, /*!< in: query thread */
mtr_t* mtr, /*!< in/out: mini-transaction */
ulint mode) /*!< in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE */
{
@@ -174,10 +169,15 @@ row_undo_mod_remove_clust_low(
/* Find out if the record has been purged already
or if we can remove it. */
- if (!btr_pcur_restore_position(mode, &node->pcur, mtr)
- || row_vers_must_preserve_del_marked(node->new_trx_id, mtr)) {
+ if (!btr_pcur_restore_position(mode, &node->pcur, mtr)) {
+ return DB_SUCCESS;
+ }
- return(DB_SUCCESS);
+ DEBUG_SYNC_C("rollback_purge_clust");
+
+ if (row_vers_must_preserve_del_marked(node->new_trx_id,
+ node->table->name, mtr)) {
+ return DB_SUCCESS;
}
btr_cur = btr_pcur_get_btr_cur(&node->pcur);
@@ -187,7 +187,7 @@ row_undo_mod_remove_clust_low(
if (!trx_id_offset) {
mem_heap_t* heap = NULL;
ulint trx_id_col;
- const ulint* offsets;
+ const offset_t* offsets;
ulint len;
trx_id_col = dict_index_get_sys_col_pos(
@@ -197,7 +197,7 @@ row_undo_mod_remove_clust_low(
offsets = rec_get_offsets(
btr_cur_get_rec(btr_cur), btr_cur_get_index(btr_cur),
- NULL, trx_id_col + 1, &heap);
+ NULL, true, trx_id_col + 1, &heap);
trx_id_offset = rec_get_nth_field_offs(
offsets, trx_id_col, &len);
@@ -217,21 +217,26 @@ row_undo_mod_remove_clust_low(
than the rolling-back one. */
ut_ad(rec_get_deleted_flag(btr_cur_get_rec(btr_cur),
dict_table_is_comp(node->table)));
+ /* In delete-marked records, DB_TRX_ID must
+ always refer to an existing update_undo log record. */
+ ut_ad(rec_get_trx_id(btr_cur_get_rec(btr_cur), btr_cur->index));
if (mode == BTR_MODIFY_LEAF) {
err = btr_cur_optimistic_delete(btr_cur, 0, mtr)
? DB_SUCCESS
: DB_FAIL;
} else {
- ut_ad(mode == BTR_MODIFY_TREE);
+ ut_ad(mode == (BTR_MODIFY_TREE | BTR_LATCH_FOR_DELETE));
/* This operation is analogous to purge, we can free also
- inherited externally stored fields */
+ inherited externally stored fields.
+ We can also assume that the record was complete
+ (including BLOBs), because it had been delete-marked
+ after it had been completely inserted. Therefore, we
+ are passing rollback=false, just like purge does. */
btr_cur_pessimistic_delete(&err, FALSE, btr_cur, 0,
- thr_is_recv(thr)
- ? RB_RECOVERY_PURGE_REC
- : RB_NONE, mtr);
+ false, mtr);
/* The delete operation may fail if we have little
file space left: TODO: easiest to crash the database
@@ -244,7 +249,7 @@ row_undo_mod_remove_clust_low(
/***********************************************************//**
Undoes a modify in a clustered index record. Sets also the node state for the
next round of undo.
-@return DB_SUCCESS or error code: we may run out of file space */
+@return DB_SUCCESS or error code: we may run out of file space */
static MY_ATTRIBUTE((nonnull, warn_unused_result))
dberr_t
row_undo_mod_clust(
@@ -260,16 +265,20 @@ row_undo_mod_clust(
ut_ad(thr_get_trx(thr) == node->trx);
ut_ad(node->trx->dict_operation_lock_mode);
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_SHARED)
- || rw_lock_own(&dict_operation_lock, RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
+ ut_ad(node->trx->in_rollback);
+ ut_ad(rw_lock_own_flagged(&dict_operation_lock,
+ RW_LOCK_FLAG_X | RW_LOCK_FLAG_S));
log_free_check();
pcur = &node->pcur;
index = btr_cur_get_index(btr_pcur_get_btr_cur(pcur));
- mtr_start(&mtr);
+ mtr.start();
+ if (index->table->is_temporary()) {
+ mtr.set_log_mode(MTR_LOG_NO_REDO);
+ } else {
+ mtr.set_named_space(index->space);
+ }
online = dict_index_is_online_ddl(index);
if (online) {
@@ -279,7 +288,7 @@ row_undo_mod_clust(
mem_heap_t* heap = mem_heap_create(1024);
mem_heap_t* offsets_heap = NULL;
- ulint* offsets = NULL;
+ offset_t* offsets = NULL;
const dtuple_t* rebuilt_old_pk;
byte sys[DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN];
@@ -298,7 +307,12 @@ row_undo_mod_clust(
/* We may have to modify tree structure: do a pessimistic
descent down the index tree */
- mtr_start(&mtr);
+ mtr.start();
+ if (index->table->is_temporary()) {
+ mtr.set_log_mode(MTR_LOG_NO_REDO);
+ } else {
+ mtr.set_named_space(index->space);
+ }
err = row_undo_mod_clust_low(
node, &offsets, &offsets_heap,
@@ -312,10 +326,12 @@ row_undo_mod_clust(
ut_ad(online || !dict_index_is_online_ddl(index));
if (err == DB_SUCCESS && online) {
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(rw_lock_own(&index->lock, RW_LOCK_SHARED)
- || rw_lock_own(&index->lock, RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
+
+ ut_ad(rw_lock_own_flagged(
+ &index->lock,
+ RW_LOCK_FLAG_S | RW_LOCK_FLAG_X
+ | RW_LOCK_FLAG_SX));
+
switch (node->rec_type) {
case TRX_UNDO_DEL_MARK_REC:
row_log_table_insert(
@@ -343,30 +359,43 @@ row_undo_mod_clust(
* it can be reallocated at any time after this mtr-commits
* which is just below
*/
- ut_ad(srv_immediate_scrub_data_uncompressed ||
- rec_get_trx_id(btr_pcur_get_rec(pcur), index) == node->new_trx_id);
+ ut_ad(srv_immediate_scrub_data_uncompressed
+ || row_get_rec_trx_id(btr_pcur_get_rec(pcur), index, offsets)
+ == node->new_trx_id);
btr_pcur_commit_specify_mtr(pcur, &mtr);
+ DEBUG_SYNC_C("rollback_undo_pk");
if (err == DB_SUCCESS && node->rec_type == TRX_UNDO_UPD_DEL_REC) {
- mtr_start(&mtr);
+ mtr.start();
+ if (index->table->is_temporary()) {
+ mtr.set_log_mode(MTR_LOG_NO_REDO);
+ } else {
+ mtr.set_named_space(index->space);
+ }
/* It is not necessary to call row_log_table,
because the record is delete-marked and would thus
be omitted from the rebuilt copy of the table. */
err = row_undo_mod_remove_clust_low(
- node, thr, &mtr, BTR_MODIFY_LEAF);
+ node, &mtr, BTR_MODIFY_LEAF);
if (err != DB_SUCCESS) {
btr_pcur_commit_specify_mtr(pcur, &mtr);
/* We may have to modify tree structure: do a
pessimistic descent down the index tree */
- mtr_start(&mtr);
+ mtr.start();
+ if (index->table->is_temporary()) {
+ mtr.set_log_mode(MTR_LOG_NO_REDO);
+ } else {
+ mtr.set_named_space(index->space);
+ }
- err = row_undo_mod_remove_clust_low(node, thr, &mtr,
- BTR_MODIFY_TREE);
+ err = row_undo_mod_remove_clust_low(
+ node, &mtr,
+ BTR_MODIFY_TREE | BTR_LATCH_FOR_DELETE);
ut_ad(err == DB_SUCCESS
|| err == DB_OUT_OF_FILE_SPACE);
@@ -377,8 +406,6 @@ row_undo_mod_clust(
node->state = UNDO_NODE_FETCH_NEXT;
- trx_undo_rec_release(node->trx, node->undo_no);
-
if (offsets_heap) {
mem_heap_free(offsets_heap);
}
@@ -388,7 +415,7 @@ row_undo_mod_clust(
/***********************************************************//**
Delete marks or removes a secondary index entry if found.
-@return DB_SUCCESS, DB_FAIL, or DB_OUT_OF_FILE_SPACE */
+@return DB_SUCCESS, DB_FAIL, or DB_OUT_OF_FILE_SPACE */
static MY_ATTRIBUTE((nonnull, warn_unused_result))
dberr_t
row_undo_mod_del_mark_or_remove_sec_low(
@@ -403,31 +430,24 @@ row_undo_mod_del_mark_or_remove_sec_low(
btr_pcur_t pcur;
btr_cur_t* btr_cur;
ibool success;
- ibool old_has;
dberr_t err = DB_SUCCESS;
mtr_t mtr;
mtr_t mtr_vers;
- enum row_search_result search_result;
+ row_search_result search_result;
+ const bool modify_leaf = mode == BTR_MODIFY_LEAF;
- log_free_check();
- mtr_start(&mtr);
- if (mode == BTR_MODIFY_TREE
- && index->space == IBUF_SPACE_ID
- && !dict_index_is_unique(index)) {
- ibuf_free_excess_pages();
- }
+ row_mtr_start(&mtr, index, !modify_leaf);
- if (*index->name == TEMP_INDEX_PREFIX) {
- /* The index->online_status may change if the
- index->name starts with TEMP_INDEX_PREFIX (meaning
- that the index is or was being created online). It is
- protected by index->lock. */
- if (mode == BTR_MODIFY_LEAF) {
+ if (!index->is_committed()) {
+ /* The index->online_status may change if the index is
+ or was being created online, but not committed yet. It
+ is protected by index->lock. */
+ if (modify_leaf) {
mode = BTR_MODIFY_LEAF | BTR_ALREADY_S_LATCHED;
mtr_s_lock(dict_index_get_lock(index), &mtr);
} else {
- ut_ad(mode == BTR_MODIFY_TREE);
- mtr_x_lock(dict_index_get_lock(index), &mtr);
+ ut_ad(mode == (BTR_MODIFY_TREE | BTR_LATCH_FOR_DELETE));
+ mtr_sx_lock(dict_index_get_lock(index), &mtr);
}
if (row_log_online_op_try(index, entry, 0)) {
@@ -435,13 +455,21 @@ row_undo_mod_del_mark_or_remove_sec_low(
}
} else {
/* For secondary indexes,
- index->online_status==ONLINE_INDEX_CREATION unless
- index->name starts with TEMP_INDEX_PREFIX. */
+ index->online_status==ONLINE_INDEX_COMPLETE if
+ index->is_committed(). */
ut_ad(!dict_index_is_online_ddl(index));
}
btr_cur = btr_pcur_get_btr_cur(&pcur);
+ if (dict_index_is_spatial(index)) {
+ if (modify_leaf) {
+ btr_cur->thr = thr;
+ mode |= BTR_RTREE_DELETE_MARK;
+ }
+ mode |= BTR_RTREE_UNDO_INS;
+ }
+
search_result = row_search_index_entry(index, entry, mode,
&pcur, &mtr);
@@ -471,23 +499,37 @@ row_undo_mod_del_mark_or_remove_sec_low(
which cannot be purged yet, requires its existence. If some requires,
we should delete mark the record. */
- mtr_start(&mtr_vers);
+ mtr_vers.start();
success = btr_pcur_restore_position(BTR_SEARCH_LEAF, &(node->pcur),
&mtr_vers);
ut_a(success);
- old_has = row_vers_old_has_index_entry(FALSE,
- btr_pcur_get_rec(&(node->pcur)),
- &mtr_vers, index, entry);
- if (old_has) {
+ /* For temporary table, we can skip to check older version of
+ clustered index entry. Because the purge won't process
+ any no-redo rollback segment undo logs. */
+ if (dict_table_is_temporary(node->table)
+ || row_vers_old_has_index_entry(
+ false, btr_pcur_get_rec(&(node->pcur)),
+ &mtr_vers, index, entry, 0, 0)) {
err = btr_cur_del_mark_set_sec_rec(BTR_NO_LOCKING_FLAG,
btr_cur, TRUE, thr, &mtr);
ut_ad(err == DB_SUCCESS);
} else {
/* Remove the index record */
- if (mode != BTR_MODIFY_TREE) {
+ if (dict_index_is_spatial(index)) {
+ rec_t* rec = btr_pcur_get_rec(&pcur);
+ if (rec_get_deleted_flag(rec,
+ dict_table_is_comp(index->table))) {
+ ib::error() << "Record found in index "
+ << index->name << " is deleted marked"
+ " on rollback update.";
+ ut_ad(0);
+ }
+ }
+
+ if (modify_leaf) {
success = btr_cur_optimistic_delete(btr_cur, 0, &mtr);
if (success) {
err = DB_SUCCESS;
@@ -495,15 +537,13 @@ row_undo_mod_del_mark_or_remove_sec_low(
err = DB_FAIL;
}
} else {
- /* No need to distinguish RB_RECOVERY_PURGE here,
+ /* Passing rollback=false,
because we are deleting a secondary index record:
- the distinction between RB_NORMAL and
- RB_RECOVERY_PURGE only matters when deleting a
- record that contains externally stored
- columns. */
+ the distinction only matters when deleting a
+ record that contains externally stored columns. */
ut_ad(!dict_index_is_clust(index));
btr_cur_pessimistic_delete(&err, FALSE, btr_cur, 0,
- RB_NORMAL, &mtr);
+ false, &mtr);
/* The delete operation may fail if we have little
file space left: TODO: easiest to crash the database
@@ -529,7 +569,7 @@ return to the original values because we do not know them. But this should
not cause problems because in row0sel.cc, in queries we always retrieve the
clustered index record or an earlier version of it, if the secondary index
record through which we do the search is delete-marked.
-@return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
+@return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
static MY_ATTRIBUTE((nonnull, warn_unused_result))
dberr_t
row_undo_mod_del_mark_or_remove_sec(
@@ -549,7 +589,7 @@ row_undo_mod_del_mark_or_remove_sec(
}
err = row_undo_mod_del_mark_or_remove_sec_low(node, thr, index,
- entry, BTR_MODIFY_TREE);
+ entry, BTR_MODIFY_TREE | BTR_LATCH_FOR_DELETE);
return(err);
}
@@ -558,10 +598,10 @@ Delete unmarks a secondary index entry which must be found. It might not be
delete-marked at the moment, but it does not harm to unmark it anyway. We also
need to update the fields of the secondary index record if we updated its
fields but alphabetically they stayed the same, e.g., 'abc' -> 'aBc'.
-@retval DB_SUCCESS on success
-@retval DB_FAIL if BTR_MODIFY_TREE should be tried
-@retval DB_OUT_OF_FILE_SPACE when running out of tablespace
-@retval DB_DUPLICATE_KEY if the value was missing
+@retval DB_SUCCESS on success
+@retval DB_FAIL if BTR_MODIFY_TREE should be tried
+@retval DB_OUT_OF_FILE_SPACE when running out of tablespace
+@retval DB_DUPLICATE_KEY if the value was missing
and an insert would lead to a duplicate exists */
static MY_ATTRIBUTE((nonnull, warn_unused_result))
dberr_t
@@ -582,29 +622,33 @@ row_undo_mod_del_unmark_sec_and_undo_update(
trx_t* trx = thr_get_trx(thr);
const ulint flags
= BTR_KEEP_SYS_FLAG | BTR_NO_LOCKING_FLAG;
- enum row_search_result search_result;
-
- ut_ad(trx->id);
-
- log_free_check();
- mtr_start(&mtr);
- if (mode == BTR_MODIFY_TREE
- && index->space == IBUF_SPACE_ID
- && !dict_index_is_unique(index)) {
- ibuf_free_excess_pages();
+ row_search_result search_result;
+ ulint orig_mode = mode;
+
+ ut_ad(trx->id != 0);
+
+ if (dict_index_is_spatial(index)) {
+ /* FIXME: Currently we do a 2-pass search for the undo
+ due to avoid undel-mark a wrong rec in rolling back in
+ partial update. Later, we could log some info in
+ secondary index updates to avoid this. */
+ ut_ad(mode & BTR_MODIFY_LEAF);
+ mode |= BTR_RTREE_DELETE_MARK;
}
- if (*index->name == TEMP_INDEX_PREFIX) {
- /* The index->online_status may change if the
- index->name starts with TEMP_INDEX_PREFIX (meaning
- that the index is or was being created online). It is
- protected by index->lock. */
+try_again:
+ row_mtr_start(&mtr, index, !(mode & BTR_MODIFY_LEAF));
+
+ if (!index->is_committed()) {
+ /* The index->online_status may change if the index is
+ or was being created online, but not committed yet. It
+ is protected by index->lock. */
if (mode == BTR_MODIFY_LEAF) {
mode = BTR_MODIFY_LEAF | BTR_ALREADY_S_LATCHED;
mtr_s_lock(dict_index_get_lock(index), &mtr);
} else {
ut_ad(mode == BTR_MODIFY_TREE);
- mtr_x_lock(dict_index_get_lock(index), &mtr);
+ mtr_sx_lock(dict_index_get_lock(index), &mtr);
}
if (row_log_online_op_try(index, entry, trx->id)) {
@@ -612,18 +656,20 @@ row_undo_mod_del_unmark_sec_and_undo_update(
}
} else {
/* For secondary indexes,
- index->online_status==ONLINE_INDEX_CREATION unless
- index->name starts with TEMP_INDEX_PREFIX. */
+ index->online_status==ONLINE_INDEX_COMPLETE if
+ index->is_committed(). */
ut_ad(!dict_index_is_online_ddl(index));
}
+ btr_cur->thr = thr;
+
search_result = row_search_index_entry(index, entry, mode,
&pcur, &mtr);
switch (search_result) {
mem_heap_t* heap;
mem_heap_t* offsets_heap;
- ulint* offsets;
+ offset_t* offsets;
case ROW_BUFFERED:
case ROW_NOT_DELETED_REF:
/* These are invalid outcomes, because the mode passed
@@ -631,7 +677,18 @@ row_undo_mod_del_unmark_sec_and_undo_update(
flags BTR_INSERT, BTR_DELETE, or BTR_DELETE_MARK. */
ut_error;
case ROW_NOT_FOUND:
- if (*index->name != TEMP_INDEX_PREFIX) {
+ /* For spatial index, if first search didn't find an
+ undel-marked rec, try to find a del-marked rec. */
+ if (dict_index_is_spatial(index) && btr_cur->rtr_info->fd_del) {
+ if (mode != orig_mode) {
+ mode = orig_mode;
+ btr_pcur_close(&pcur);
+ mtr_commit(&mtr);
+ goto try_again;
+ }
+ }
+
+ if (index->is_committed()) {
/* During online secondary index creation, it
is possible that MySQL is waiting for a
meta-data lock upgrade before invoking
@@ -640,34 +697,20 @@ row_undo_mod_del_unmark_sec_and_undo_update(
finished building the index, but it does not
yet exist in MySQL. In this case, we suppress
the printout to the error log. */
- fputs("InnoDB: error in sec index entry del undo in\n"
- "InnoDB: ", stderr);
- dict_index_name_print(stderr, trx, index);
- fputs("\n"
- "InnoDB: tuple ", stderr);
- dtuple_print(stderr, entry);
- fputs("\n"
- "InnoDB: record ", stderr);
- rec_print(stderr, btr_pcur_get_rec(&pcur), index);
- putc('\n', stderr);
- trx_print(stderr, trx, 0);
- fputs("\n"
- "InnoDB: Submit a detailed bug report"
- " to https://jira.mariadb.org/\n", stderr);
-
- ib_logf(IB_LOG_LEVEL_WARN,
- "record in index %s was not found"
- " on rollback, trying to insert",
- index->name);
+ ib::warn() << "Record in index " << index->name
+ << " of table " << index->table->name
+ << " was not found on rollback, trying to"
+ " insert: " << *entry
+ << " at: " << rec_index_print(
+ btr_cur_get_rec(btr_cur), index);
}
if (btr_cur->up_match >= dict_index_get_n_unique(index)
|| btr_cur->low_match >= dict_index_get_n_unique(index)) {
- if (*index->name != TEMP_INDEX_PREFIX) {
- ib_logf(IB_LOG_LEVEL_WARN,
- "record in index %s was not found on"
- " rollback, and a duplicate exists",
- index->name);
+ if (index->is_committed()) {
+ ib::warn() << "Record in index " << index->name
+ << " was not found on rollback, and"
+ " a duplicate exists";
}
err = DB_DUPLICATE_KEY;
break;
@@ -713,6 +756,7 @@ row_undo_mod_del_unmark_sec_and_undo_update(
err = btr_cur_del_mark_set_sec_rec(
BTR_NO_LOCKING_FLAG,
btr_cur, FALSE, thr, &mtr);
+
ut_a(err == DB_SUCCESS);
heap = mem_heap_create(
sizeof(upd_t)
@@ -720,7 +764,7 @@ row_undo_mod_del_unmark_sec_and_undo_update(
offsets_heap = NULL;
offsets = rec_get_offsets(
btr_cur_get_rec(btr_cur),
- index, NULL, ULINT_UNDEFINED, &offsets_heap);
+ index, NULL, true, ULINT_UNDEFINED, &offsets_heap);
update = row_upd_build_sec_rec_difference_binary(
btr_cur_get_rec(btr_cur), index, offsets, entry, heap);
if (upd_get_n_fields(update) == 0) {
@@ -780,7 +824,7 @@ row_undo_mod_sec_flag_corrupted(
we can only mark the index corrupted in the
data dictionary cache. TODO: fix this somehow.*/
mutex_enter(&dict_sys->mutex);
- dict_set_corrupted_index_cache_only(index, index->table);
+ dict_set_corrupted_index_cache_only(index);
mutex_exit(&dict_sys->mutex);
break;
default:
@@ -795,7 +839,7 @@ row_undo_mod_sec_flag_corrupted(
/***********************************************************//**
Undoes a modify in secondary indexes when undo record type is UPD_DEL.
-@return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
+@return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
static MY_ATTRIBUTE((nonnull, warn_unused_result))
dberr_t
row_undo_mod_upd_del_sec(
@@ -862,7 +906,7 @@ row_undo_mod_upd_del_sec(
/***********************************************************//**
Undoes a modify in secondary indexes when undo record type is DEL_MARK.
-@return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
+@return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
static MY_ATTRIBUTE((nonnull, warn_unused_result))
dberr_t
row_undo_mod_del_mark_sec(
@@ -930,7 +974,7 @@ row_undo_mod_del_mark_sec(
/***********************************************************//**
Undoes a modify in secondary indexes when undo record type is UPD_EXIST.
-@return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
+@return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
static MY_ATTRIBUTE((nonnull, warn_unused_result))
dberr_t
row_undo_mod_upd_exist_sec(
@@ -950,15 +994,31 @@ row_undo_mod_upd_exist_sec(
heap = mem_heap_create(1024);
+
while (node->index != NULL) {
dict_index_t* index = node->index;
dtuple_t* entry;
- if (index->type == DICT_FTS
- || !row_upd_changes_ord_field_binary(
- index, node->update, thr, node->row, node->ext)) {
- dict_table_next_uncorrupted_index(node->index);
- continue;
+ if (dict_index_is_spatial(index)) {
+ if (!row_upd_changes_ord_field_binary_func(
+ index, node->update,
+#ifdef UNIV_DEBUG
+ thr,
+#endif /* UNIV_DEBUG */
+ node->row,
+ node->ext, ROW_BUILD_FOR_UNDO)) {
+ dict_table_next_uncorrupted_index(node->index);
+ continue;
+ }
+ } else {
+ if (index->type == DICT_FTS
+ || !row_upd_changes_ord_field_binary(index,
+ node->update,
+ thr, node->row,
+ node->ext)) {
+ dict_table_next_uncorrupted_index(node->index);
+ continue;
+ }
}
/* Build the newest version of the index entry */
@@ -1016,9 +1076,17 @@ row_undo_mod_upd_exist_sec(
the secondary index record if we updated its fields
but alphabetically they stayed the same, e.g.,
'abc' -> 'aBc'. */
- entry = row_build_index_entry(node->undo_row,
- node->undo_ext,
- index, heap);
+ if (dict_index_is_spatial(index)) {
+ entry = row_build_index_entry_low(node->undo_row,
+ node->undo_ext,
+ index, heap,
+ ROW_BUILD_FOR_UNDO);
+ } else {
+ entry = row_build_index_entry(node->undo_row,
+ node->undo_ext,
+ index, heap);
+ }
+
ut_a(entry);
err = row_undo_mod_del_unmark_sec_and_undo_update(
@@ -1080,12 +1148,21 @@ row_undo_mod_parse_undo_rec(
return;
}
- if (node->table->file_unreadable) {
- dict_table_close(node->table, dict_locked, FALSE);
+ ut_ad(!node->table->skip_alter_undo);
- /* We skip undo operations to missing .ibd files */
- node->table = NULL;
+ if (UNIV_UNLIKELY(!fil_table_accessible(node->table))) {
+close_table:
+ /* Normally, tables should not disappear or become
+ unaccessible during ROLLBACK, because they should be
+ protected by InnoDB table locks. Corruption could be
+ a valid exception.
+ FIXME: When running out of temporary tablespace, it
+ would probably be better to just drop all temporary
+ tables (and temporary undo log records) of the current
+ connection, instead of doing this rollback. */
+ dict_table_close(node->table, dict_locked, FALSE);
+ node->table = NULL;
return;
}
@@ -1097,24 +1174,52 @@ row_undo_mod_parse_undo_rec(
ptr = trx_undo_rec_get_row_ref(ptr, clust_index, &(node->ref),
node->heap);
- trx_undo_update_rec_get_update(ptr, clust_index, type, trx_id,
- roll_ptr, info_bits, node->trx,
+ ptr = trx_undo_update_rec_get_update(ptr, clust_index, type, trx_id,
+ roll_ptr, info_bits,
node->heap, &(node->update));
node->new_trx_id = trx_id;
node->cmpl_info = cmpl_info;
if (!row_undo_search_clust_to_pcur(node)) {
+ /* As long as this rolling-back transaction exists,
+ the PRIMARY KEY value pointed to by the undo log
+ record should exist.
+
+ However, if InnoDB is killed during a rollback, or
+ shut down during the rollback of recovered
+ transactions, then after restart we may try to roll
+ back some of the same undo log records again, because
+ trx_roll_try_truncate() is not being invoked after
+ every undo log record.
+
+ It is also possible that the record
+ was not modified yet (the DB_ROLL_PTR does not match
+ node->roll_ptr) and thus there is nothing to roll back.
+
+ btr_cur_upd_lock_and_undo() only writes the undo log
+ record after successfully acquiring an exclusive lock
+ on the the clustered index record. That lock will not
+ be released before the transaction is committed or
+ fully rolled back. (Exception: if the server was
+ killed, restarted, and shut down again before the
+ rollback of the recovered transaction was completed,
+ it is possible that the transaction was partially
+ rolled back and locks released.) */
+ goto close_table;
+ }
- dict_table_close(node->table, dict_locked, FALSE);
-
- node->table = NULL;
+ /* Extract indexed virtual columns from undo log */
+ if (node->table->n_v_cols) {
+ row_upd_replace_vcol(node->row, node->table,
+ node->update, false, node->undo_row,
+ (node->cmpl_info & UPD_NODE_NO_ORD_CHANGE)
+ ? NULL : ptr);
}
}
/***********************************************************//**
Undoes a modify operation on a row of a table.
-@return DB_SUCCESS or error code */
-UNIV_INTERN
+@return DB_SUCCESS or error code */
dberr_t
row_undo_mod(
/*=========*/
@@ -1127,6 +1232,8 @@ row_undo_mod(
ut_ad(node != NULL);
ut_ad(thr != NULL);
ut_ad(node->state == UNDO_NODE_MODIFY);
+ ut_ad(node->trx->in_rollback);
+ ut_ad(!trx_undo_roll_ptr_is_insert(node->roll_ptr));
dict_locked = thr_get_trx(thr)->dict_operation_lock_mode == RW_X_LATCH;
@@ -1138,7 +1245,6 @@ row_undo_mod(
/* It is already undone, or will be undone by another query
thread, or table was dropped */
- trx_undo_rec_release(node->trx, node->undo_no);
node->state = UNDO_NODE_FETCH_NEXT;
return(DB_SUCCESS);
@@ -1168,8 +1274,39 @@ row_undo_mod(
}
if (err == DB_SUCCESS) {
-
err = row_undo_mod_clust(node, thr);
+
+ bool update_statistics
+ = !(node->cmpl_info & UPD_NODE_NO_ORD_CHANGE);
+
+ if (err == DB_SUCCESS && node->table->stat_initialized) {
+ switch (node->rec_type) {
+ case TRX_UNDO_UPD_EXIST_REC:
+ break;
+ case TRX_UNDO_DEL_MARK_REC:
+ dict_table_n_rows_inc(node->table);
+ update_statistics = update_statistics
+ || !srv_stats_include_delete_marked;
+ break;
+ case TRX_UNDO_UPD_DEL_REC:
+ dict_table_n_rows_dec(node->table);
+ update_statistics = update_statistics
+ || !srv_stats_include_delete_marked;
+ break;
+ }
+
+ /* Do not attempt to update statistics when
+ executing ROLLBACK in the InnoDB SQL
+ interpreter, because in that case we would
+ already be holding dict_sys->mutex, which
+ would be acquired when updating statistics. */
+ if (update_statistics && !dict_locked) {
+ dict_stats_update_if_needed(
+ node->table, node->trx->mysql_thd);
+ } else {
+ node->table->stat_modified_counter++;
+ }
+ }
}
dict_table_close(node->table, dict_locked, FALSE);
diff --git a/storage/innobase/row/row0undo.cc b/storage/innobase/row/row0undo.cc
index da9f2557397..11b775da376 100644
--- a/storage/innobase/row/row0undo.cc
+++ b/storage/innobase/row/row0undo.cc
@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 1997, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2017, MariaDB Corporation.
+Copyright (c) 2017, 2020, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -25,11 +25,6 @@ Created 1/8/1997 Heikki Tuuri
*******************************************************/
#include "row0undo.h"
-
-#ifdef UNIV_NONINL
-#include "row0undo.ic"
-#endif
-
#include "fsp0fsp.h"
#include "mach0data.h"
#include "trx0rseg.h"
@@ -123,18 +118,20 @@ or if the roll ptr is NULL, i.e., it was a fresh insert. */
/********************************************************************//**
Creates a row undo node to a query graph.
-@return own: undo node */
-UNIV_INTERN
+@return own: undo node */
undo_node_t*
row_undo_node_create(
/*=================*/
- trx_t* trx, /*!< in: transaction */
+ trx_t* trx, /*!< in/out: transaction */
que_thr_t* parent, /*!< in: parent node, i.e., a thr node */
mem_heap_t* heap) /*!< in: memory heap where created */
{
undo_node_t* undo;
- ut_ad(trx && parent && heap);
+ ut_ad(trx_state_eq(trx, TRX_STATE_ACTIVE)
+ || trx_state_eq(trx, TRX_STATE_PREPARED_RECOVERED)
+ || trx_state_eq(trx, TRX_STATE_PREPARED));
+ ut_ad(parent);
undo = static_cast<undo_node_t*>(
mem_heap_alloc(heap, sizeof(undo_node_t)));
@@ -157,50 +154,47 @@ Looks for the clustered index record when node has the row reference.
The pcur in node is used in the search. If found, stores the row to node,
and stores the position of pcur, and detaches it. The pcur must be closed
by the caller in any case.
-@return TRUE if found; NOTE the node->pcur must be closed by the
+@return true if found; NOTE the node->pcur must be closed by the
caller, regardless of the return value */
-UNIV_INTERN
-ibool
+bool
row_undo_search_clust_to_pcur(
/*==========================*/
- undo_node_t* node) /*!< in: row undo node */
+ undo_node_t* node) /*!< in/out: row undo node */
{
dict_index_t* clust_index;
- ibool found;
+ bool found;
mtr_t mtr;
- ibool ret;
- rec_t* rec;
+ row_ext_t** ext;
+ const rec_t* rec;
mem_heap_t* heap = NULL;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- ulint* offsets = offsets_;
+ offset_t offsets_[REC_OFFS_NORMAL_SIZE];
+ offset_t* offsets = offsets_;
rec_offs_init(offsets_);
+ ut_ad(!node->table->skip_alter_undo);
+
mtr_start(&mtr);
clust_index = dict_table_get_first_index(node->table);
- found = row_search_on_row_ref(&(node->pcur), BTR_MODIFY_LEAF,
+ found = row_search_on_row_ref(&node->pcur, BTR_MODIFY_LEAF,
node->table, node->ref, &mtr);
- rec = btr_pcur_get_rec(&(node->pcur));
-
- offsets = rec_get_offsets(rec, clust_index, offsets,
- ULINT_UNDEFINED, &heap);
+ if (!found) {
+ goto func_exit;
+ }
- if (!found || node->roll_ptr
- != row_get_rec_roll_ptr(rec, clust_index, offsets)) {
+ rec = btr_pcur_get_rec(&node->pcur);
- /* We must remove the reservation on the undo log record
- BEFORE releasing the latch on the clustered index page: this
- is to make sure that some thread will eventually undo the
- modification corresponding to node->roll_ptr. */
+ offsets = rec_get_offsets(rec, clust_index, offsets, true,
+ ULINT_UNDEFINED, &heap);
- /* fputs("--------------------undoing a previous version\n",
- stderr); */
+ found = row_get_rec_roll_ptr(rec, clust_index, offsets)
+ == node->roll_ptr;
- ret = FALSE;
- } else {
- row_ext_t** ext;
+ if (found) {
+ ut_ad(row_get_rec_trx_id(rec, clust_index, offsets)
+ == node->trx->id);
if (dict_table_get_format(node->table) >= UNIV_FORMAT_B) {
/* In DYNAMIC or COMPRESSED format, there is
@@ -219,6 +213,20 @@ row_undo_search_clust_to_pcur(
node->row = row_build(ROW_COPY_DATA, clust_index, rec,
offsets, NULL,
NULL, NULL, ext, node->heap);
+
+ /* We will need to parse out virtual column info from undo
+ log, first mark them DATA_MISSING. So we will know if the
+ value gets updated */
+ if (node->table->n_v_cols
+ && node->state != UNDO_NODE_INSERT
+ && !(node->cmpl_info & UPD_NODE_NO_ORD_CHANGE)) {
+ for (ulint i = 0;
+ i < dict_table_get_n_v_cols(node->table); i++) {
+ dfield_get_type(dtuple_get_nth_v_field(
+ node->row, i))->mtype = DATA_MISSING;
+ }
+ }
+
if (node->rec_type == TRX_UNDO_UPD_EXIST_REC) {
node->undo_row = dtuple_copy(node->row, node->heap);
row_upd_replace(node->undo_row, &node->undo_ext,
@@ -228,64 +236,47 @@ row_undo_search_clust_to_pcur(
node->undo_ext = NULL;
}
- btr_pcur_store_position(&(node->pcur), &mtr);
-
- ret = TRUE;
+ btr_pcur_store_position(&node->pcur, &mtr);
}
- btr_pcur_commit_specify_mtr(&(node->pcur), &mtr);
-
- if (UNIV_LIKELY_NULL(heap)) {
+ if (heap) {
mem_heap_free(heap);
}
- return(ret);
+
+func_exit:
+ btr_pcur_commit_specify_mtr(&node->pcur, &mtr);
+ return(found);
}
/***********************************************************//**
Fetches an undo log record and does the undo for the recorded operation.
If none left, or a partial rollback completed, returns control to the
parent node, which is always a query thread node.
-@return DB_SUCCESS if operation successfully completed, else error code */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
+@return DB_SUCCESS if operation successfully completed, else error code */
+static MY_ATTRIBUTE((warn_unused_result))
dberr_t
row_undo(
/*=====*/
undo_node_t* node, /*!< in: row undo node */
que_thr_t* thr) /*!< in: query thread */
{
- dberr_t err;
- trx_t* trx;
- roll_ptr_t roll_ptr;
- ibool locked_data_dict;
-
- ut_ad(node != NULL);
- ut_ad(thr != NULL);
-
- trx = node->trx;
+ trx_t* trx = node->trx;
+ ut_ad(trx->in_rollback);
if (node->state == UNDO_NODE_FETCH_NEXT) {
- node->undo_rec = trx_roll_pop_top_rec_of_trx(trx,
- trx->roll_limit,
- &roll_ptr,
- node->heap);
+ node->undo_rec = trx_roll_pop_top_rec_of_trx(
+ trx, &node->roll_ptr, node->heap);
+
if (!node->undo_rec) {
/* Rollback completed for this query thread */
-
thr->run_node = que_node_get_parent(node);
-
return(DB_SUCCESS);
}
- node->roll_ptr = roll_ptr;
node->undo_no = trx_undo_rec_get_undo_no(node->undo_rec);
-
- if (trx_undo_roll_ptr_is_insert(roll_ptr)) {
-
- node->state = UNDO_NODE_INSERT;
- } else {
- node->state = UNDO_NODE_MODIFY;
- }
+ node->state = trx_undo_roll_ptr_is_insert(node->roll_ptr)
+ ? UNDO_NODE_INSERT : UNDO_NODE_MODIFY;
}
/* Prevent DROP TABLE etc. while we are rolling back this row.
@@ -293,16 +284,18 @@ row_undo(
then we already have dict_operation_lock locked in x-mode. Do not
try to lock again, because that would cause a hang. */
- locked_data_dict = (trx->dict_operation_lock_mode == 0);
+ const bool locked_data_dict = (trx->dict_operation_lock_mode == 0);
if (locked_data_dict) {
row_mysql_freeze_data_dictionary(trx);
}
+ dberr_t err;
+
if (node->state == UNDO_NODE_INSERT) {
- err = row_undo_ins(node);
+ err = row_undo_ins(node, thr);
node->state = UNDO_NODE_FETCH_NEXT;
} else {
@@ -328,8 +321,7 @@ row_undo(
/***********************************************************//**
Undoes a row operation in a table. This is a high-level function used
in SQL execution graphs.
-@return query thread to run next or NULL */
-UNIV_INTERN
+@return query thread to run next or NULL */
que_thr_t*
row_undo_step(
/*==========*/
@@ -358,25 +350,23 @@ row_undo_step(
err = row_undo(node, thr);
+#ifdef ENABLED_DEBUG_SYNC
+ if (trx->mysql_thd) {
+ DEBUG_SYNC_C("trx_after_rollback_row");
+ }
+#endif /* ENABLED_DEBUG_SYNC */
+
trx->error_state = err;
if (err != DB_SUCCESS) {
/* SQL error detected */
- fprintf(stderr, "InnoDB: Fatal error (%s) in rollback.\n",
- ut_strerr(err));
-
if (err == DB_OUT_OF_FILE_SPACE) {
- fprintf(stderr,
- "InnoDB: Out of tablespace.\n"
- "InnoDB: Consider increasing"
- " your tablespace.\n");
- abort();
+ ib::fatal() << "Out of tablespace during rollback."
+ " Consider increasing your tablespace.";
}
- ut_error;
-
- return(NULL);
+ ib::fatal() << "Error (" << ut_strerr(err) << ") in rollback.";
}
return(thr);
diff --git a/storage/innobase/row/row0upd.cc b/storage/innobase/row/row0upd.cc
index 6bb97e917bb..31d4d31c1f9 100644
--- a/storage/innobase/row/row0upd.cc
+++ b/storage/innobase/row/row0upd.cc
@@ -1,7 +1,7 @@
/*****************************************************************************
-Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2018, 2020, MariaDB Corporation.
+Copyright (c) 1996, 2017, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2015, 2020, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -25,16 +25,10 @@ Created 12/27/1996 Heikki Tuuri
*******************************************************/
#include "row0upd.h"
-
-#ifdef UNIV_NONINL
-#include "row0upd.ic"
-#endif
-
-#include "ha_prototypes.h"
#include "dict0dict.h"
+#include "dict0mem.h"
#include "trx0undo.h"
#include "rem0rec.h"
-#ifndef UNIV_HOTBACKUP
#include "dict0boot.h"
#include "dict0crea.h"
#include "mach0data.h"
@@ -52,8 +46,10 @@ Created 12/27/1996 Heikki Tuuri
#include "pars0sym.h"
#include "eval0eval.h"
#include "buf0lru.h"
+#include "trx0rec.h"
+#include "fts0fts.h"
+#include "fts0types.h"
#include <algorithm>
-
#include <mysql/plugin.h>
#include <mysql/service_wsrep.h>
@@ -112,7 +108,7 @@ introduced where a call to log_free_check() is bypassed. */
Checks if an update vector changes some of the first ordering fields of an
index record. This is only used in foreign key checks and we can assume
that index does not contain column prefixes.
-@return TRUE if changes */
+@return TRUE if changes */
static
ibool
row_upd_changes_first_fields_binary(
@@ -122,7 +118,6 @@ row_upd_changes_first_fields_binary(
const upd_t* update, /*!< in: update vector for the row */
ulint n); /*!< in: how many first fields to check */
-
/*********************************************************************//**
Checks if index currently is mentioned as a referenced index in a foreign
key constraint.
@@ -175,12 +170,10 @@ wsrep_row_upd_index_is_foreign(
trx_t* trx) /*!< in: transaction */
{
dict_table_t* table = index->table;
- dict_foreign_t* foreign;
ibool froze_data_dict = FALSE;
ibool is_referenced = FALSE;
if (table->foreign_set.empty()) {
-
return(FALSE);
}
@@ -189,21 +182,13 @@ wsrep_row_upd_index_is_foreign(
froze_data_dict = TRUE;
}
- for (dict_foreign_set::iterator it= table->foreign_set.begin();
- it != table->foreign_set.end();
- ++ it)
- {
- foreign= *it;
-
- if (foreign->foreign_index == index) {
-
- is_referenced = TRUE;
- goto func_exit;
- }
+ dict_foreign_set::iterator it
+ = std::find_if(table->foreign_set.begin(),
+ table->foreign_set.end(),
+ dict_foreign_with_foreign_index(index));
- }
+ is_referenced = (it != table->foreign_set.end());
-func_exit:
if (froze_data_dict) {
row_mysql_unfreeze_data_dictionary(trx);
}
@@ -219,7 +204,7 @@ under pcur.
NOTE that this function will temporarily commit mtr and lose the
pcur position!
-@return DB_SUCCESS or an error code */
+@return DB_SUCCESS or an error code */
static MY_ATTRIBUTE((nonnull, warn_unused_result))
dberr_t
row_upd_check_references_constraints(
@@ -229,7 +214,7 @@ row_upd_check_references_constraints(
cursor position is lost in this function! */
dict_table_t* table, /*!< in: table in question */
dict_index_t* index, /*!< in: index of the cursor */
- ulint* offsets,/*!< in/out: rec_get_offsets(pcur.rec, index) */
+ offset_t* offsets,/*!< in/out: rec_get_offsets(pcur.rec, index) */
que_thr_t* thr, /*!< in: query thread */
mtr_t* mtr) /*!< in: mtr */
{
@@ -238,13 +223,13 @@ row_upd_check_references_constraints(
dtuple_t* entry;
trx_t* trx;
const rec_t* rec;
- ulint n_ext;
dberr_t err;
ibool got_s_lock = FALSE;
- if (table->referenced_set.empty()) {
+ DBUG_ENTER("row_upd_check_references_constraints");
- return(DB_SUCCESS);
+ if (table->referenced_set.empty()) {
+ DBUG_RETURN(DB_SUCCESS);
}
trx = thr_get_trx(thr);
@@ -254,13 +239,13 @@ row_upd_check_references_constraints(
heap = mem_heap_create(500);
- entry = row_rec_to_index_entry(rec, index, offsets, &n_ext, heap);
+ entry = row_rec_to_index_entry(rec, index, offsets, heap);
mtr_commit(mtr);
DEBUG_SYNC_C("foreign_constraint_check_for_update");
- mtr_start(mtr);
+ mtr->start();
if (trx->dict_operation_lock_mode == 0) {
got_s_lock = TRUE;
@@ -268,7 +253,8 @@ row_upd_check_references_constraints(
row_mysql_freeze_data_dictionary(trx);
}
-run_again:
+ DEBUG_SYNC_C_IF_THD(thr_get_trx(thr)->mysql_thd,
+ "foreign_constraint_check_for_insert");
for (dict_foreign_set::iterator it = table->referenced_set.begin();
it != table->referenced_set.end();
@@ -297,10 +283,32 @@ run_again:
FALSE, FALSE, DICT_ERR_IGNORE_NONE);
}
+ /* dict_operation_lock is held both here
+ (UPDATE or DELETE with FOREIGN KEY) and by TRUNCATE
+ TABLE operations.
+ If a TRUNCATE TABLE operation is in progress,
+ there can be 2 possible conditions:
+ 1) row_truncate_table_for_mysql() is not yet called.
+ 2) Truncate releases dict_operation_lock
+ during eviction of pages from buffer pool
+ for a file-per-table tablespace.
+
+ In case of (1), truncate will wait for FK operation
+ to complete.
+ In case of (2), truncate will be rolled forward even
+ if it is interrupted. So if the foreign table is
+ undergoing a truncate, ignore the FK check. */
+
if (foreign_table) {
- os_inc_counter(dict_sys->mutex,
- foreign_table
- ->n_foreign_key_checks_running);
+ mutex_enter(&fil_system->mutex);
+ const fil_space_t* space = fil_space_get_by_id(
+ foreign_table->space);
+ const bool being_truncated = space
+ && space->is_being_truncated;
+ mutex_exit(&fil_system->mutex);
+ if (being_truncated) {
+ continue;
+ }
}
/* NOTE that if the thread ends up waiting for a lock
@@ -308,29 +316,32 @@ run_again:
But the counter on the table protects 'foreign' from
being dropped while the check is running. */
+ if (foreign_table) {
+ my_atomic_addlint(
+ &foreign_table->n_foreign_key_checks_running,
+ 1);
+ }
+
err = row_ins_check_foreign_constraint(
FALSE, foreign, table, entry, thr);
if (foreign_table) {
- os_dec_counter(dict_sys->mutex,
- foreign_table
- ->n_foreign_key_checks_running);
+ my_atomic_addlint(
+ &foreign_table->n_foreign_key_checks_running,
+ -1);
}
-
if (ref_table != NULL) {
dict_table_close(ref_table, FALSE, FALSE);
}
- /* Some table foreign key dropped, try again */
- if (err == DB_DICT_CHANGED) {
- goto run_again;
- } else if (err != DB_SUCCESS) {
+ if (err != DB_SUCCESS) {
goto func_exit;
}
}
}
err = DB_SUCCESS;
+
func_exit:
if (got_s_lock) {
row_mysql_unfreeze_data_dictionary(trx);
@@ -338,8 +349,10 @@ func_exit:
mem_heap_free(heap);
- return(err);
+ DEBUG_SYNC_C("foreign_constraint_check_for_update_done");
+ DBUG_RETURN(err);
}
+
#ifdef WITH_WSREP
static
dberr_t
@@ -350,7 +363,7 @@ wsrep_row_upd_check_foreign_constraints(
cursor position is lost in this function! */
dict_table_t* table, /*!< in: table in question */
dict_index_t* index, /*!< in: index of the cursor */
- ulint* offsets,/*!< in/out: rec_get_offsets(pcur.rec, index) */
+ offset_t* offsets,/*!< in/out: rec_get_offsets(pcur.rec, index) */
que_thr_t* thr, /*!< in: query thread */
mtr_t* mtr) /*!< in: mtr */
{
@@ -359,27 +372,24 @@ wsrep_row_upd_check_foreign_constraints(
dtuple_t* entry;
trx_t* trx;
const rec_t* rec;
- ulint n_ext;
dberr_t err;
ibool got_s_lock = FALSE;
ibool opened = FALSE;
if (table->foreign_set.empty()) {
-
return(DB_SUCCESS);
}
trx = thr_get_trx(thr);
- /* TODO: make native slave thread bail out here */
+ /* TODO: make native slave thread bail out here */
rec = btr_pcur_get_rec(pcur);
ut_ad(rec_offs_validate(rec, index, offsets));
heap = mem_heap_create(500);
- entry = row_rec_to_index_entry(rec, index, offsets,
- &n_ext, heap);
+ entry = row_rec_to_index_entry(rec, index, offsets, heap);
mtr_commit(mtr);
@@ -391,12 +401,11 @@ wsrep_row_upd_check_foreign_constraints(
row_mysql_freeze_data_dictionary(trx);
}
- for (dict_foreign_set::iterator it= table->foreign_set.begin();
+ for (dict_foreign_set::iterator it = table->foreign_set.begin();
it != table->foreign_set.end();
- ++ it)
- {
- foreign= *it;
+ ++it) {
+ foreign = *it;
/* Note that we may have an update which updates the index
record, but does NOT update the first fields which are
referenced in a foreign key constraint. Then the update does
@@ -416,12 +425,6 @@ wsrep_row_upd_check_foreign_constraints(
opened = (foreign->referenced_table) ? TRUE : FALSE;
}
- if (foreign->referenced_table) {
- os_inc_counter(dict_sys->mutex,
- foreign->referenced_table
- ->n_foreign_key_checks_running);
- }
-
/* NOTE that if the thread ends up waiting for a lock
we will release dict_operation_lock temporarily!
But the counter on the table protects 'foreign' from
@@ -431,10 +434,6 @@ wsrep_row_upd_check_foreign_constraints(
TRUE, foreign, table, entry, thr);
if (foreign->referenced_table) {
- os_dec_counter(dict_sys->mutex,
- foreign->referenced_table
- ->n_foreign_key_checks_running);
-
if (opened == TRUE) {
dict_table_close(foreign->referenced_table, FALSE, FALSE);
opened = FALSE;
@@ -442,11 +441,9 @@ wsrep_row_upd_check_foreign_constraints(
}
if (err != DB_SUCCESS) {
-
goto func_exit;
}
}
-
}
err = DB_SUCCESS;
@@ -457,16 +454,28 @@ func_exit:
mem_heap_free(heap);
- DEBUG_SYNC_C("foreign_constraint_check_for_update_done");
-
return(err);
}
+
+/** Determine if a FOREIGN KEY constraint needs to be processed.
+@param[in] node query node
+@param[in] trx transaction
+@return whether the node cannot be ignored */
+
+inline bool wsrep_must_process_fk(const upd_node_t* node, const trx_t* trx)
+{
+ if (!trx->is_wsrep()) {
+ return false;
+ }
+ return que_node_get_type(node->common.parent) != QUE_NODE_UPDATE
+ || static_cast<upd_node_t*>(node->common.parent)->cascade_node
+ != node;
+}
#endif /* WITH_WSREP */
/*********************************************************************//**
Creates an update node for a query graph.
-@return own: update node */
-UNIV_INTERN
+@return own: update node */
upd_node_t*
upd_node_create(
/*============*/
@@ -475,45 +484,25 @@ upd_node_create(
upd_node_t* node;
node = static_cast<upd_node_t*>(
- mem_heap_alloc(heap, sizeof(upd_node_t)));
+ mem_heap_zalloc(heap, sizeof(upd_node_t)));
node->common.type = QUE_NODE_UPDATE;
-
node->state = UPD_NODE_UPDATE_CLUSTERED;
- node->in_mysql_interface = FALSE;
-
- node->row = NULL;
- node->ext = NULL;
- node->upd_row = NULL;
- node->upd_ext = NULL;
- node->index = NULL;
- node->update = NULL;
-
- node->foreign = NULL;
- node->cascade_heap = NULL;
- node->cascade_node = NULL;
-
- node->select = NULL;
-
node->heap = mem_heap_create(128);
node->magic_n = UPD_NODE_MAGIC_N;
- node->cmpl_info = 0;
-
return(node);
}
-#endif /* !UNIV_HOTBACKUP */
/*********************************************************************//**
Updates the trx id and roll ptr field in a clustered index record in database
recovery. */
-UNIV_INTERN
void
row_upd_rec_sys_fields_in_recovery(
/*===============================*/
rec_t* rec, /*!< in/out: record */
page_zip_des_t* page_zip,/*!< in/out: compressed page, or NULL */
- const ulint* offsets,/*!< in: array returned by rec_get_offsets() */
+ const offset_t* offsets,/*!< in: array returned by rec_get_offsets() */
ulint pos, /*!< in: TRX_ID position in rec */
trx_id_t trx_id, /*!< in: transaction id */
roll_ptr_t roll_ptr)/*!< in: roll ptr of the undo log record */
@@ -537,10 +526,8 @@ row_upd_rec_sys_fields_in_recovery(
}
}
-#ifndef UNIV_HOTBACKUP
/*********************************************************************//**
Sets the trx id or roll ptr field of a clustered index entry. */
-UNIV_INTERN
void
row_upd_index_entry_sys_field(
/*==========================*/
@@ -564,6 +551,7 @@ row_upd_index_entry_sys_field(
field = static_cast<byte*>(dfield_get_data(dfield));
if (type == DATA_TRX_ID) {
+ ut_ad(val > 0);
trx_write_trx_id(field, val);
} else {
ut_ad(type == DATA_ROLL_PTR);
@@ -576,12 +564,11 @@ Returns TRUE if row update changes size of some field in index or if some
field to be updated is stored externally in rec or update.
@return TRUE if the update changes the size of some field in index or
the field is external in rec or update */
-UNIV_INTERN
ibool
row_upd_changes_field_size_or_external(
/*===================================*/
dict_index_t* index, /*!< in: index */
- const ulint* offsets,/*!< in: rec_get_offsets(rec, index) */
+ const offset_t* offsets,/*!< in: rec_get_offsets(rec, index) */
const upd_t* update) /*!< in: update vector */
{
const upd_field_t* upd_field;
@@ -592,11 +579,19 @@ row_upd_changes_field_size_or_external(
ulint i;
ut_ad(rec_offs_validate(NULL, index, offsets));
+ ut_ad(!index->table->skip_alter_undo);
n_fields = upd_get_n_fields(update);
for (i = 0; i < n_fields; i++) {
upd_field = upd_get_nth_field(update, i);
+ /* We should ignore virtual field if the index is not
+ a virtual index */
+ if (upd_fld_is_virtual_col(upd_field)
+ && !index->has_virtual()) {
+ continue;
+ }
+
new_val = &(upd_field->new_val);
new_len = dfield_get_len(new_val);
@@ -640,7 +635,6 @@ row_upd_changes_field_size_or_external(
/***********************************************************//**
Returns true if row update contains disowned external fields.
@return true if the update contains disowned external fields. */
-UNIV_INTERN
bool
row_upd_changes_disowned_external(
/*==============================*/
@@ -677,7 +671,6 @@ row_upd_changes_disowned_external(
return(false);
}
-#endif /* !UNIV_HOTBACKUP */
/***********************************************************//**
Replaces the new column values stored in the update vector to the
@@ -685,13 +678,12 @@ record given. No field size changes are allowed. This function is
usually invoked on a clustered index. The only use case for a
secondary index is row_ins_sec_index_entry_by_modify() or its
counterpart in ibuf_insert_to_index_page(). */
-UNIV_INTERN
void
row_upd_rec_in_place(
/*=================*/
rec_t* rec, /*!< in/out: record where replaced */
dict_index_t* index, /*!< in: the index the record belongs to */
- const ulint* offsets,/*!< in: array returned by rec_get_offsets() */
+ const offset_t* offsets,/*!< in: array returned by rec_get_offsets() */
const upd_t* update, /*!< in: update vector */
page_zip_des_t* page_zip)/*!< in: compressed page with enough space
available, or NULL */
@@ -702,6 +694,7 @@ row_upd_rec_in_place(
ulint i;
ut_ad(rec_offs_validate(rec, index, offsets));
+ ut_ad(!index->table->skip_alter_undo);
if (rec_offs_comp(offsets)) {
rec_set_info_bits_new(rec, update->info_bits);
@@ -712,49 +705,21 @@ row_upd_rec_in_place(
n_fields = upd_get_n_fields(update);
for (i = 0; i < n_fields; i++) {
-#ifdef UNIV_BLOB_DEBUG
- btr_blob_dbg_t b;
- const byte* field_ref = NULL;
-#endif /* UNIV_BLOB_DEBUG */
-
upd_field = upd_get_nth_field(update, i);
+
+ /* No need to update virtual columns for non-virtual index */
+ if (upd_fld_is_virtual_col(upd_field)
+ && !dict_index_has_virtual(index)) {
+ continue;
+ }
+
new_val = &(upd_field->new_val);
ut_ad(!dfield_is_ext(new_val) ==
!rec_offs_nth_extern(offsets, upd_field->field_no));
-#ifdef UNIV_BLOB_DEBUG
- if (dfield_is_ext(new_val)) {
- ulint len;
- field_ref = rec_get_nth_field(rec, offsets, i, &len);
- ut_a(len != UNIV_SQL_NULL);
- ut_a(len >= BTR_EXTERN_FIELD_REF_SIZE);
- field_ref += len - BTR_EXTERN_FIELD_REF_SIZE;
-
- b.ref_page_no = page_get_page_no(page_align(rec));
- b.ref_heap_no = page_rec_get_heap_no(rec);
- b.ref_field_no = i;
- b.blob_page_no = mach_read_from_4(
- field_ref + BTR_EXTERN_PAGE_NO);
- ut_a(b.ref_field_no >= index->n_uniq);
- btr_blob_dbg_rbt_delete(index, &b, "upd_in_place");
- }
-#endif /* UNIV_BLOB_DEBUG */
rec_set_nth_field(rec, offsets, upd_field->field_no,
dfield_get_data(new_val),
dfield_get_len(new_val));
-
-#ifdef UNIV_BLOB_DEBUG
- if (dfield_is_ext(new_val)) {
- b.blob_page_no = mach_read_from_4(
- field_ref + BTR_EXTERN_PAGE_NO);
- b.always_owner = b.owner = !(field_ref[BTR_EXTERN_LEN]
- & BTR_EXTERN_OWNER_FLAG);
- b.del = rec_get_deleted_flag(
- rec, rec_offs_comp(offsets));
-
- btr_blob_dbg_rbt_insert(index, &b, "upd_in_place");
- }
-#endif /* UNIV_BLOB_DEBUG */
}
if (page_zip) {
@@ -762,12 +727,10 @@ row_upd_rec_in_place(
}
}
-#ifndef UNIV_HOTBACKUP
/*********************************************************************//**
Writes into the redo log the values of trx id and roll ptr and enough info
to determine their positions within a clustered index record.
-@return new pointer to mlog */
-UNIV_INTERN
+@return new pointer to mlog */
byte*
row_upd_write_sys_vals_to_log(
/*==========================*/
@@ -788,26 +751,24 @@ row_upd_write_sys_vals_to_log(
trx_write_roll_ptr(log_ptr, roll_ptr);
log_ptr += DATA_ROLL_PTR_LEN;
- log_ptr += mach_ull_write_compressed(log_ptr, trx_id);
+ log_ptr += mach_u64_write_compressed(log_ptr, trx_id);
return(log_ptr);
}
-#endif /* !UNIV_HOTBACKUP */
/*********************************************************************//**
Parses the log data of system field values.
-@return log data end or NULL */
-UNIV_INTERN
+@return log data end or NULL */
byte*
row_upd_parse_sys_vals(
/*===================*/
- byte* ptr, /*!< in: buffer */
- byte* end_ptr,/*!< in: buffer end */
+ const byte* ptr, /*!< in: buffer */
+ const byte* end_ptr,/*!< in: buffer end */
ulint* pos, /*!< out: TRX_ID position in record */
trx_id_t* trx_id, /*!< out: trx id */
roll_ptr_t* roll_ptr)/*!< out: roll ptr */
{
- ptr = mach_parse_compressed(ptr, end_ptr, pos);
+ *pos = mach_parse_compressed(&ptr, end_ptr);
if (ptr == NULL) {
@@ -822,15 +783,13 @@ row_upd_parse_sys_vals(
*roll_ptr = trx_read_roll_ptr(ptr);
ptr += DATA_ROLL_PTR_LEN;
- ptr = mach_ull_parse_compressed(ptr, end_ptr, trx_id);
+ *trx_id = mach_u64_parse_compressed(&ptr, end_ptr);
- return(ptr);
+ return(const_cast<byte*>(ptr));
}
-#ifndef UNIV_HOTBACKUP
/***********************************************************//**
Writes to the redo log the new values of the fields occurring in the index. */
-UNIV_INTERN
void
row_upd_index_write_log(
/*====================*/
@@ -875,7 +834,13 @@ row_upd_index_write_log(
len = dfield_get_len(new_val);
- log_ptr += mach_write_compressed(log_ptr, upd_field->field_no);
+ /* If this is a virtual column, mark it using special
+ field_no */
+ ulint field_no = upd_fld_is_virtual_col(upd_field)
+ ? REC_MAX_N_FIELDS + upd_field->field_no
+ : upd_field->field_no;
+
+ log_ptr += mach_write_compressed(log_ptr, field_no);
log_ptr += mach_write_compressed(log_ptr, len);
if (len != UNIV_SQL_NULL) {
@@ -888,7 +853,7 @@ row_upd_index_write_log(
mlog_catenate_string(
mtr,
- static_cast<byte*>(
+ static_cast<const byte*>(
dfield_get_data(new_val)),
len);
@@ -900,17 +865,15 @@ row_upd_index_write_log(
mlog_close(mtr, log_ptr);
}
-#endif /* !UNIV_HOTBACKUP */
/*********************************************************************//**
Parses the log data written by row_upd_index_write_log.
-@return log data end or NULL */
-UNIV_INTERN
+@return log data end or NULL */
byte*
row_upd_index_parse(
/*================*/
- byte* ptr, /*!< in: buffer */
- byte* end_ptr,/*!< in: buffer end */
+ const byte* ptr, /*!< in: buffer */
+ const byte* end_ptr,/*!< in: buffer end */
mem_heap_t* heap, /*!< in: memory heap where update vector is
built */
upd_t** update_out)/*!< out: update vector */
@@ -930,7 +893,7 @@ row_upd_index_parse(
info_bits = mach_read_from_1(ptr);
ptr++;
- ptr = mach_parse_compressed(ptr, end_ptr, &n_fields);
+ n_fields = mach_parse_compressed(&ptr, end_ptr);
if (ptr == NULL) {
@@ -945,16 +908,23 @@ row_upd_index_parse(
upd_field = upd_get_nth_field(update, i);
new_val = &(upd_field->new_val);
- ptr = mach_parse_compressed(ptr, end_ptr, &field_no);
+ field_no = mach_parse_compressed(&ptr, end_ptr);
if (ptr == NULL) {
return(NULL);
}
+ /* Check if this is a virtual column, mark the prtype
+ if that is the case */
+ if (field_no >= REC_MAX_N_FIELDS) {
+ new_val->type.prtype |= DATA_VIRTUAL;
+ field_no -= REC_MAX_N_FIELDS;
+ }
+
upd_field->field_no = field_no;
- ptr = mach_parse_compressed(ptr, end_ptr, &len);
+ len = mach_parse_compressed(&ptr, end_ptr);
if (ptr == NULL) {
@@ -978,22 +948,20 @@ row_upd_index_parse(
*update_out = update;
- return(ptr);
+ return(const_cast<byte*>(ptr));
}
-#ifndef UNIV_HOTBACKUP
/***************************************************************//**
Builds an update vector from those fields which in a secondary index entry
differ from a record that has the equal ordering fields. NOTE: we compare
the fields as binary strings!
-@return own: update vector of differing fields */
-UNIV_INTERN
+@return own: update vector of differing fields */
upd_t*
row_upd_build_sec_rec_difference_binary(
/*====================================*/
const rec_t* rec, /*!< in: secondary index record */
dict_index_t* index, /*!< in: index */
- const ulint* offsets,/*!< in: rec_get_offsets(rec, index) */
+ const offset_t* offsets,/*!< in: rec_get_offsets(rec, index) */
const dtuple_t* entry, /*!< in: entry to insert */
mem_heap_t* heap) /*!< in: memory heap from which allocated */
{
@@ -1010,6 +978,7 @@ row_upd_build_sec_rec_difference_binary(
ut_ad(rec_offs_validate(rec, index, offsets));
ut_ad(rec_offs_n_fields(offsets) == dtuple_get_n_fields(entry));
ut_ad(!rec_offs_any_extern(offsets));
+ ut_ad(!index->table->skip_alter_undo);
update = upd_create(dtuple_get_n_fields(entry), heap);
@@ -1038,7 +1007,7 @@ row_upd_build_sec_rec_difference_binary(
dfield_copy(&(upd_field->new_val), dfield);
- upd_field_set_field_no(upd_field, i, index, NULL);
+ upd_field_set_field_no(upd_field, i, index);
n_diff++;
}
@@ -1049,40 +1018,52 @@ row_upd_build_sec_rec_difference_binary(
return(update);
}
-/***************************************************************//**
-Builds an update vector from those fields, excluding the roll ptr and
+
+/** Builds an update vector from those fields, excluding the roll ptr and
trx id fields, which in an index entry differ from a record that has
the equal ordering fields. NOTE: we compare the fields as binary strings!
+@param[in] index clustered index
+@param[in] entry clustered index entry to insert
+@param[in] rec clustered index record
+@param[in] offsets rec_get_offsets(rec,index), or NULL
+@param[in] no_sys skip the system columns
+ DB_TRX_ID and DB_ROLL_PTR
+@param[in] trx transaction (for diagnostics),
+ or NULL
+@param[in] heap memory heap from which allocated
+@param[in] mysql_table NULL, or mysql table object when
+ user thread invokes dml
+@param[out] error error number in case of failure
@return own: update vector of differing fields, excluding roll ptr and
-trx id */
-UNIV_INTERN
-const upd_t*
+trx id,if error is not equal to DB_SUCCESS, return NULL */
+upd_t*
row_upd_build_difference_binary(
-/*============================*/
- dict_index_t* index, /*!< in: clustered index */
- const dtuple_t* entry, /*!< in: entry to insert */
- const rec_t* rec, /*!< in: clustered index record */
- const ulint* offsets,/*!< in: rec_get_offsets(rec,index), or NULL */
- bool no_sys, /*!< in: skip the system columns
- DB_TRX_ID and DB_ROLL_PTR */
- trx_t* trx, /*!< in: transaction */
- mem_heap_t* heap) /*!< in: memory heap from which allocated */
+ dict_index_t* index,
+ const dtuple_t* entry,
+ const rec_t* rec,
+ const offset_t* offsets,
+ bool no_sys,
+ trx_t* trx,
+ mem_heap_t* heap,
+ TABLE* mysql_table,
+ dberr_t* error)
{
upd_field_t* upd_field;
- const dfield_t* dfield;
- const byte* data;
ulint len;
upd_t* update;
ulint n_diff;
ulint trx_id_pos;
ulint i;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
+ offset_t offsets_[REC_OFFS_NORMAL_SIZE];
+ ulint n_fld = dtuple_get_n_fields(entry);
+ ulint n_v_fld = dtuple_get_n_v_fields(entry);
rec_offs_init(offsets_);
/* This function is used only for a clustered index */
ut_a(dict_index_is_clust(index));
+ ut_ad(!index->table->skip_alter_undo);
- update = upd_create(dtuple_get_n_fields(entry), heap);
+ update = upd_create(n_fld + n_v_fld, heap);
n_diff = 0;
@@ -1091,24 +1072,28 @@ row_upd_build_difference_binary(
== trx_id_pos + 1);
if (!offsets) {
- offsets = rec_get_offsets(rec, index, offsets_,
+ offsets = rec_get_offsets(rec, index, offsets_, true,
ULINT_UNDEFINED, &heap);
} else {
ut_ad(rec_offs_validate(rec, index, offsets));
}
- for (i = 0; i < dtuple_get_n_fields(entry); i++) {
-
- data = rec_get_nth_field(rec, offsets, i, &len);
-
- dfield = dtuple_get_nth_field(entry, i);
+ for (i = 0; i < n_fld; i++) {
+ const byte* data = rec_get_nth_field(rec, offsets, i, &len);
+ const dfield_t* dfield = dtuple_get_nth_field(entry, i);
/* NOTE: we compare the fields as binary strings!
(No collation) */
+ if (no_sys) {
+ /* TRX_ID */
+ if (i == trx_id_pos) {
+ continue;
+ }
- if (no_sys && (i == trx_id_pos || i == trx_id_pos + 1)) {
-
- continue;
+ /* DB_ROLL_PTR */
+ if (i == trx_id_pos + 1) {
+ continue;
+ }
}
if (!dfield_is_ext(dfield)
@@ -1119,41 +1104,122 @@ row_upd_build_difference_binary(
dfield_copy(&(upd_field->new_val), dfield);
- upd_field_set_field_no(upd_field, i, index, trx);
+ upd_field_set_field_no(upd_field, i, index);
n_diff++;
}
}
+ /* Check the virtual columns updates. Even if there is no non-virtual
+ column (base columns) change, we will still need to build the
+ indexed virtual column value so that undo log would log them (
+ for purge/mvcc purpose) */
+ if (n_v_fld > 0) {
+ row_ext_t* ext;
+ mem_heap_t* v_heap = NULL;
+ byte* record;
+ VCOL_STORAGE* vcol_storage;
+
+ THD* thd;
+
+ if (trx == NULL) {
+ thd = current_thd;
+ } else {
+ thd = trx->mysql_thd;
+ }
+
+ ut_ad(!update->old_vrow);
+
+ innobase_allocate_row_for_vcol(thd, index, &v_heap,
+ &mysql_table,
+ &record, &vcol_storage);
+
+ for (i = 0; i < n_v_fld; i++) {
+ const dict_v_col_t* col
+ = dict_table_get_nth_v_col(index->table, i);
+
+ if (!col->m_col.ord_part) {
+ continue;
+ }
+
+ if (update->old_vrow == NULL) {
+ update->old_vrow = row_build(
+ ROW_COPY_POINTERS, index, rec, offsets,
+ index->table, NULL, NULL, &ext, heap);
+ }
+
+ dfield_t* vfield = innobase_get_computed_value(
+ update->old_vrow, col, index,
+ &v_heap, heap, NULL, thd, mysql_table, record,
+ NULL, NULL, NULL);
+ if (vfield == NULL) {
+ if (v_heap) mem_heap_free(v_heap);
+ *error = DB_COMPUTE_VALUE_FAILED;
+ return(NULL);
+ }
+
+ const dfield_t* dfield = dtuple_get_nth_v_field(
+ entry, i);
+
+ if (!dfield_data_is_binary_equal(
+ dfield, vfield->len,
+ static_cast<byte*>(vfield->data))) {
+ upd_field = upd_get_nth_field(update, n_diff);
+
+ upd_field->old_v_val = static_cast<dfield_t*>(
+ mem_heap_alloc(
+ heap,
+ sizeof *upd_field->old_v_val));
+
+ dfield_copy(upd_field->old_v_val, vfield);
+
+ dfield_copy(&(upd_field->new_val), dfield);
+
+ upd_field_set_v_field_no(
+ upd_field, i, index);
+
+ n_diff++;
+
+ }
+ }
+
+ if (v_heap) {
+ if (vcol_storage)
+ innobase_free_row_for_vcol(vcol_storage);
+ mem_heap_free(v_heap);
+ }
+ }
+
update->n_fields = n_diff;
+ ut_ad(update->validate());
return(update);
}
-/***********************************************************//**
-Fetch a prefix of an externally stored column. This is similar
-to row_ext_lookup(), but the row_ext_t holds the old values
+/** Fetch a prefix of an externally stored column.
+This is similar to row_ext_lookup(), but the row_ext_t holds the old values
of the column and must not be poisoned with the new values.
-@return BLOB prefix */
+@param[in] data 'internally' stored part of the field
+containing also the reference to the external part
+@param[in] local_len length of data, in bytes
+@param[in] page_size BLOB page size
+@param[in,out] len input - length of prefix to
+fetch; output: fetched length of the prefix
+@param[in,out] heap heap where to allocate
+@return BLOB prefix */
static
byte*
row_upd_ext_fetch(
-/*==============*/
- const byte* data, /*!< in: 'internally' stored part of the
- field containing also the reference to
- the external part */
- ulint local_len, /*!< in: length of data, in bytes */
- ulint zip_size, /*!< in: nonzero=compressed BLOB
- page size, zero for uncompressed
- BLOBs */
- ulint* len, /*!< in: length of prefix to fetch;
- out: fetched length of the prefix */
- mem_heap_t* heap) /*!< in: heap where to allocate */
+ const byte* data,
+ ulint local_len,
+ const page_size_t& page_size,
+ ulint* len,
+ mem_heap_t* heap)
{
byte* buf = static_cast<byte*>(mem_heap_alloc(heap, *len));
*len = btr_copy_externally_stored_field_prefix(
- buf, *len, zip_size, data, local_len);
+ buf, *len, page_size, data, local_len);
/* We should never update records containing a half-deleted BLOB. */
ut_a(*len);
@@ -1161,22 +1227,24 @@ row_upd_ext_fetch(
return(buf);
}
-/***********************************************************//**
-Replaces the new column value stored in the update vector in
-the given index entry field. */
+/** Replaces the new column value stored in the update vector in
+the given index entry field.
+@param[in,out] dfield data field of the index entry
+@param[in] field index field
+@param[in] col field->col
+@param[in] uf update field
+@param[in,out] heap memory heap for allocating and copying
+the new value
+@param[in] page_size page size */
static
void
row_upd_index_replace_new_col_val(
-/*==============================*/
- dfield_t* dfield, /*!< in/out: data field
- of the index entry */
- const dict_field_t* field, /*!< in: index field */
- const dict_col_t* col, /*!< in: field->col */
- const upd_field_t* uf, /*!< in: update field */
- mem_heap_t* heap, /*!< in: memory heap for allocating
- and copying the new value */
- ulint zip_size)/*!< in: compressed page
- size of the table, or 0 */
+ dfield_t* dfield,
+ const dict_field_t* field,
+ const dict_col_t* col,
+ const upd_field_t* uf,
+ mem_heap_t* heap,
+ const page_size_t& page_size)
{
ulint len;
const byte* data;
@@ -1200,7 +1268,7 @@ row_upd_index_replace_new_col_val(
len = field->prefix_len;
- data = row_upd_ext_fetch(data, l, zip_size,
+ data = row_upd_ext_fetch(data, l, page_size,
&len, heap);
}
@@ -1259,7 +1327,6 @@ row_upd_index_replace_new_col_val(
/***********************************************************//**
Replaces the new column values stored in the update vector to the index entry
given. */
-UNIV_INTERN
void
row_upd_index_replace_new_col_vals_index_pos(
/*=========================================*/
@@ -1279,9 +1346,11 @@ row_upd_index_replace_new_col_vals_index_pos(
mem_heap_t* heap) /*!< in: memory heap for allocating and
copying the new values */
{
+ ut_ad(!index->table->skip_alter_undo);
+
ulint i;
ulint n_fields;
- const ulint zip_size = dict_table_zip_size(index->table);
+ const page_size_t& page_size = dict_table_page_size(index->table);
dtuple_set_info_bits(entry, update->info_bits);
@@ -1298,12 +1367,22 @@ row_upd_index_replace_new_col_vals_index_pos(
field = dict_index_get_nth_field(index, i);
col = dict_field_get_col(field);
- uf = upd_get_field_by_field_no(update, i);
+ if (dict_col_is_virtual(col)) {
+ const dict_v_col_t* vcol = reinterpret_cast<
+ const dict_v_col_t*>(
+ col);
+
+ uf = upd_get_field_by_field_no(
+ update, vcol->v_pos, true);
+ } else {
+ uf = upd_get_field_by_field_no(
+ update, i, false);
+ }
if (uf) {
row_upd_index_replace_new_col_val(
dtuple_get_nth_field(entry, i),
- field, col, uf, heap, zip_size);
+ field, col, uf, heap, page_size);
}
}
}
@@ -1311,7 +1390,6 @@ row_upd_index_replace_new_col_vals_index_pos(
/***********************************************************//**
Replaces the new column values stored in the update vector to the index entry
given. */
-UNIV_INTERN
void
row_upd_index_replace_new_col_vals(
/*===============================*/
@@ -1330,8 +1408,9 @@ row_upd_index_replace_new_col_vals(
ulint i;
const dict_index_t* clust_index
= dict_table_get_first_index(index->table);
- const ulint zip_size
- = dict_table_zip_size(index->table);
+ const page_size_t& page_size = dict_table_page_size(index->table);
+
+ ut_ad(!index->table->skip_alter_undo);
dtuple_set_info_bits(entry, update->info_bits);
@@ -1342,20 +1421,170 @@ row_upd_index_replace_new_col_vals(
field = dict_index_get_nth_field(index, i);
col = dict_field_get_col(field);
- uf = upd_get_field_by_field_no(
- update, dict_col_get_clust_pos(col, clust_index));
+ if (dict_col_is_virtual(col)) {
+ const dict_v_col_t* vcol = reinterpret_cast<
+ const dict_v_col_t*>(
+ col);
+
+ uf = upd_get_field_by_field_no(
+ update, vcol->v_pos, true);
+ } else {
+ uf = upd_get_field_by_field_no(
+ update,
+ dict_col_get_clust_pos(col, clust_index),
+ false);
+ }
if (uf) {
row_upd_index_replace_new_col_val(
dtuple_get_nth_field(entry, i),
- field, col, uf, heap, zip_size);
+ field, col, uf, heap, page_size);
+ }
+ }
+}
+
+/** Replaces the virtual column values stored in the update vector.
+@param[in,out] row row whose column to be set
+@param[in] field data to set
+@param[in] len data length
+@param[in] vcol virtual column info */
+static
+void
+row_upd_set_vcol_data(
+ dtuple_t* row,
+ const byte* field,
+ ulint len,
+ dict_v_col_t* vcol)
+{
+ dfield_t* dfield = dtuple_get_nth_v_field(row, vcol->v_pos);
+
+ if (dfield_get_type(dfield)->mtype == DATA_MISSING) {
+ dict_col_copy_type(&vcol->m_col, dfield_get_type(dfield));
+
+ dfield_set_data(dfield, field, len);
+ }
+}
+
+/** Replaces the virtual column values stored in a dtuple with that of
+a update vector.
+@param[in,out] row row whose column to be updated
+@param[in] table table
+@param[in] update an update vector built for the clustered index
+@param[in] upd_new update to new or old value
+@param[in,out] undo_row undo row (if needs to be updated)
+@param[in] ptr remaining part in update undo log */
+void
+row_upd_replace_vcol(
+ dtuple_t* row,
+ const dict_table_t* table,
+ const upd_t* update,
+ bool upd_new,
+ dtuple_t* undo_row,
+ const byte* ptr)
+{
+ ulint col_no;
+ ulint i;
+ ulint n_cols;
+
+ ut_ad(!table->skip_alter_undo);
+
+ n_cols = dtuple_get_n_v_fields(row);
+ for (col_no = 0; col_no < n_cols; col_no++) {
+ dfield_t* dfield;
+
+ const dict_v_col_t* col
+ = dict_table_get_nth_v_col(table, col_no);
+
+ /* If there is no index on the column, do not bother for
+ value update */
+ if (!col->m_col.ord_part) {
+ dict_index_t* clust_index
+ = dict_table_get_first_index(table);
+
+ /* Skip the column if there is no online alter
+ table in progress or it is not being indexed
+ in new table */
+ if (!dict_index_is_online_ddl(clust_index)
+ || !row_log_col_is_indexed(clust_index, col_no)) {
+ continue;
+ }
+ }
+
+ dfield = dtuple_get_nth_v_field(row, col_no);
+
+ for (i = 0; i < upd_get_n_fields(update); i++) {
+ const upd_field_t* upd_field
+ = upd_get_nth_field(update, i);
+ if (!upd_fld_is_virtual_col(upd_field)
+ || upd_field->field_no != col->v_pos) {
+ continue;
+ }
+
+ if (upd_new) {
+ dfield_copy_data(dfield, &upd_field->new_val);
+ } else {
+ dfield_copy_data(dfield, upd_field->old_v_val);
+ }
+
+ dfield->type = upd_field->new_val.type;
+ break;
+ }
+ }
+
+ bool first_v_col = true;
+ bool is_undo_log = true;
+
+ /* We will read those unchanged (but indexed) virtual columns in */
+ if (ptr != NULL) {
+ const byte* end_ptr;
+
+ end_ptr = ptr + mach_read_from_2(ptr);
+ ptr += 2;
+
+ while (ptr != end_ptr) {
+ const byte* field;
+ ulint field_no;
+ ulint len;
+ ulint orig_len;
+ bool is_v;
+
+ field_no = mach_read_next_compressed(&ptr);
+
+ is_v = (field_no >= REC_MAX_N_FIELDS);
+
+ if (is_v) {
+ ptr = trx_undo_read_v_idx(
+ table, ptr, first_v_col, &is_undo_log,
+ &field_no);
+ first_v_col = false;
+ }
+
+ ptr = trx_undo_rec_get_col_val(
+ ptr, &field, &len, &orig_len);
+
+ if (field_no == ULINT_UNDEFINED) {
+ ut_ad(is_v);
+ continue;
+ }
+
+ if (is_v) {
+ dict_v_col_t* vcol = dict_table_get_nth_v_col(
+ table, field_no);
+
+ row_upd_set_vcol_data(row, field, len, vcol);
+
+ if (undo_row) {
+ row_upd_set_vcol_data(
+ undo_row, field, len, vcol);
+ }
+ }
+ ut_ad(ptr<= end_ptr);
}
}
}
/***********************************************************//**
Replaces the new column values stored in the update vector. */
-UNIV_INTERN
void
row_upd_replace(
/*============*/
@@ -1384,6 +1613,7 @@ row_upd_replace(
ut_ad(dict_index_is_clust(index));
ut_ad(update);
ut_ad(heap);
+ ut_ad(update->validate());
n_cols = dtuple_get_n_fields(row);
table = index->table;
@@ -1416,7 +1646,8 @@ row_upd_replace(
const upd_field_t* upd_field
= upd_get_nth_field(update, i);
- if (upd_field->field_no != clust_pos) {
+ if (upd_field->field_no != clust_pos
+ || upd_fld_is_virtual_col(upd_field)) {
continue;
}
@@ -1436,6 +1667,8 @@ row_upd_replace(
} else {
*ext = NULL;
}
+
+ row_upd_replace_vcol(row, table, update, true, NULL, NULL);
}
/***********************************************************//**
@@ -1445,7 +1678,6 @@ This function is fast if the update vector is short or the number of ordering
fields in the index is small. Otherwise, this can be quadratic.
NOTE: we compare the fields as binary strings!
@return TRUE if update vector changes an ordering field in the index record */
-UNIV_INTERN
ibool
row_upd_changes_ord_field_binary_func(
/*==================================*/
@@ -1460,8 +1692,10 @@ row_upd_changes_ord_field_binary_func(
row and the data values in update are not
known when this function is called, e.g., at
compile time */
- const row_ext_t*ext) /*!< NULL, or prefixes of the externally
+ const row_ext_t*ext, /*!< NULL, or prefixes of the externally
stored columns in the old row */
+ ulint flag) /*!< in: ROW_BUILD_NORMAL,
+ ROW_BUILD_FOR_PURGE or ROW_BUILD_FOR_UNDO */
{
ulint n_unique;
ulint i;
@@ -1470,6 +1704,7 @@ row_upd_changes_ord_field_binary_func(
ut_ad(thr);
ut_ad(thr->graph);
ut_ad(thr->graph->trx);
+ ut_ad(!index->table->skip_alter_undo);
n_unique = dict_index_get_n_unique(index);
@@ -1485,13 +1720,25 @@ row_upd_changes_ord_field_binary_func(
dfield_t dfield_ext;
ulint dfield_len= 0;
const byte* buf;
+ bool is_virtual;
+ const dict_v_col_t* vcol = NULL;
ind_field = dict_index_get_nth_field(index, i);
col = dict_field_get_col(ind_field);
col_no = dict_col_get_no(col);
+ is_virtual = dict_col_is_virtual(col);
+
+ if (is_virtual) {
+ vcol = reinterpret_cast<const dict_v_col_t*>(col);
- upd_field = upd_get_field_by_field_no(
- update, dict_col_get_clust_pos(col, clust_index));
+ upd_field = upd_get_field_by_field_no(
+ update, vcol->v_pos, true);
+ } else {
+ upd_field = upd_get_field_by_field_no(
+ update,
+ dict_col_get_clust_pos(col, clust_index),
+ false);
+ }
if (upd_field == NULL) {
continue;
@@ -1502,7 +1749,113 @@ row_upd_changes_ord_field_binary_func(
return(TRUE);
}
- dfield = dtuple_get_nth_field(row, col_no);
+ if (is_virtual) {
+ dfield = dtuple_get_nth_v_field(
+ row, vcol->v_pos);
+ } else {
+ dfield = dtuple_get_nth_field(row, col_no);
+ }
+
+ /* For spatial index update, since the different geometry
+ data could generate same MBR, so, if the new index entry is
+ same as old entry, which means the MBR is not changed, we
+ don't need to do anything. */
+ if (dict_index_is_spatial(index) && i == 0) {
+ double mbr1[SPDIMS * 2];
+ double mbr2[SPDIMS * 2];
+ rtr_mbr_t* old_mbr;
+ rtr_mbr_t* new_mbr;
+ const uchar* dptr = NULL;
+ ulint flen = 0;
+ ulint dlen = 0;
+ mem_heap_t* temp_heap = NULL;
+ const dfield_t* new_field = &upd_field->new_val;
+
+ const page_size_t page_size
+ = (ext != NULL)
+ ? ext->page_size
+ : dict_table_page_size(
+ index->table);
+
+ ut_ad(dfield->data != NULL
+ && dfield->len > GEO_DATA_HEADER_SIZE);
+ ut_ad(dict_col_get_spatial_status(col) != SPATIAL_NONE);
+
+ /* Get the old mbr. */
+ if (dfield_is_ext(dfield)) {
+ /* For off-page stored data, we
+ need to read the whole field data. */
+ flen = dfield_get_len(dfield);
+ dptr = static_cast<const byte*>(
+ dfield_get_data(dfield));
+ temp_heap = mem_heap_create(1000);
+
+ dptr = btr_copy_externally_stored_field(
+ &dlen, dptr,
+ page_size,
+ flen,
+ temp_heap);
+ } else {
+ dptr = static_cast<const uchar*>(dfield->data);
+ dlen = dfield->len;
+ }
+
+ rtree_mbr_from_wkb(dptr + GEO_DATA_HEADER_SIZE,
+ static_cast<uint>(dlen
+ - GEO_DATA_HEADER_SIZE),
+ SPDIMS, mbr1);
+ old_mbr = reinterpret_cast<rtr_mbr_t*>(mbr1);
+
+ /* Get the new mbr. */
+ if (dfield_is_ext(new_field)) {
+ if (flag == ROW_BUILD_FOR_UNDO
+ && dict_table_get_format(index->table)
+ >= UNIV_FORMAT_B) {
+ /* For undo, and the table is Barrcuda,
+ we need to skip the prefix data. */
+ flen = BTR_EXTERN_FIELD_REF_SIZE;
+ ut_ad(dfield_get_len(new_field) >=
+ BTR_EXTERN_FIELD_REF_SIZE);
+ dptr = static_cast<const byte*>(
+ dfield_get_data(new_field))
+ + dfield_get_len(new_field)
+ - BTR_EXTERN_FIELD_REF_SIZE;
+ } else {
+ flen = dfield_get_len(new_field);
+ dptr = static_cast<const byte*>(
+ dfield_get_data(new_field));
+ }
+
+ if (temp_heap == NULL) {
+ temp_heap = mem_heap_create(1000);
+ }
+
+ dptr = btr_copy_externally_stored_field(
+ &dlen, dptr,
+ page_size,
+ flen,
+ temp_heap);
+ } else {
+ dptr = static_cast<const byte*>(
+ upd_field->new_val.data);
+ dlen = upd_field->new_val.len;
+ }
+ rtree_mbr_from_wkb(dptr + GEO_DATA_HEADER_SIZE,
+ static_cast<uint>(dlen
+ - GEO_DATA_HEADER_SIZE),
+ SPDIMS, mbr2);
+ new_mbr = reinterpret_cast<rtr_mbr_t*>(mbr2);
+
+ if (temp_heap) {
+ mem_heap_free(temp_heap);
+ }
+
+ if (!MBR_EQUAL_CMP(old_mbr, new_mbr)) {
+ return(TRUE);
+ } else {
+ continue;
+ }
+ }
/* This treatment of column prefix indexes is loosely
based on row_build_index_entry(). */
@@ -1542,7 +1895,7 @@ row_upd_changes_ord_field_binary_func(
ut_a(dict_index_is_clust(index)
|| ind_field->prefix_len <= dfield_len);
- buf = static_cast<byte*>(dfield_get_data(dfield));
+ buf= static_cast<const byte*>(dfield_get_data(dfield));
copy_dfield:
ut_a(dfield_len > 0);
dfield_copy(&dfield_ext, dfield);
@@ -1566,7 +1919,6 @@ Checks if an update vector changes an ordering field of an index record.
NOTE: we compare the fields as binary strings!
@return TRUE if update vector may change an ordering field in an index
record */
-UNIV_INTERN
ibool
row_upd_changes_some_index_ord_field_binary(
/*========================================*/
@@ -1583,11 +1935,17 @@ row_upd_changes_some_index_ord_field_binary(
upd_field = upd_get_nth_field(update, i);
- if (dict_field_get_col(dict_index_get_nth_field(
- index, upd_field->field_no))
- ->ord_part) {
-
- return(TRUE);
+ if (upd_fld_is_virtual_col(upd_field)) {
+ if (dict_table_get_nth_v_col(index->table,
+ upd_field->field_no)
+ ->m_col.ord_part) {
+ return(TRUE);
+ }
+ } else {
+ if (dict_field_get_col(dict_index_get_nth_field(
+ index, upd_field->field_no))->ord_part) {
+ return(TRUE);
+ }
}
}
@@ -1597,7 +1955,6 @@ row_upd_changes_some_index_ord_field_binary(
/***********************************************************//**
Checks if an FTS Doc ID column is affected by an UPDATE.
@return whether the Doc ID column is changed */
-UNIV_INTERN
bool
row_upd_changes_doc_id(
/*===================*/
@@ -1608,6 +1965,8 @@ row_upd_changes_doc_id(
dict_index_t* clust_index;
fts_t* fts = table->fts;
+ ut_ad(!table->skip_alter_undo);
+
clust_index = dict_table_get_first_index(table);
/* Convert from index-specific column number to table-global
@@ -1620,7 +1979,6 @@ row_upd_changes_doc_id(
Checks if an FTS indexed column is affected by an UPDATE.
@return offset within fts_t::indexes if FTS indexed column updated else
ULINT_UNDEFINED */
-UNIV_INTERN
ulint
row_upd_changes_fts_column(
/*=======================*/
@@ -1631,20 +1989,28 @@ row_upd_changes_fts_column(
dict_index_t* clust_index;
fts_t* fts = table->fts;
- clust_index = dict_table_get_first_index(table);
+ ut_ad(!table->skip_alter_undo);
- /* Convert from index-specific column number to table-global
- column number. */
- col_no = dict_index_get_nth_col_no(clust_index, upd_field->field_no);
+ if (upd_fld_is_virtual_col(upd_field)) {
+ col_no = upd_field->field_no;
+ return(dict_table_is_fts_column(fts->indexes, col_no, true));
+ } else {
+ clust_index = dict_table_get_first_index(table);
+
+ /* Convert from index-specific column number to table-global
+ column number. */
+ col_no = dict_index_get_nth_col_no(clust_index,
+ upd_field->field_no);
+ return(dict_table_is_fts_column(fts->indexes, col_no, false));
+ }
- return(dict_table_is_fts_column(fts->indexes, col_no));
}
/***********************************************************//**
Checks if an update vector changes some of the first ordering fields of an
index record. This is only used in foreign key checks and we can assume
that index does not contain column prefixes.
-@return TRUE if changes */
+@return TRUE if changes */
static
ibool
row_upd_changes_first_fields_binary(
@@ -1701,7 +2067,7 @@ void
row_upd_copy_columns(
/*=================*/
rec_t* rec, /*!< in: record in a clustered index */
- const ulint* offsets,/*!< in: array returned by rec_get_offsets() */
+ const offset_t* offsets,/*!< in: array returned by rec_get_offsets() */
sym_node_t* column) /*!< in: first column in a column list, or
NULL */
{
@@ -1745,20 +2111,114 @@ row_upd_eval_new_vals(
}
}
-/***********************************************************//**
-Stores to the heap the row on which the node->pcur is positioned. */
+/** Stores to the heap the virtual columns that need for any indexes
+@param[in,out] node row update node
+@param[in] update an update vector if it is update
+@param[in] thd mysql thread handle
+@param[in,out] mysql_table mysql table object */
+static
+void
+row_upd_store_v_row(
+ upd_node_t* node,
+ const upd_t* update,
+ THD* thd,
+ TABLE* mysql_table)
+{
+ mem_heap_t* heap = NULL;
+ dict_index_t* index = dict_table_get_first_index(node->table);
+ byte* record= 0;
+ VCOL_STORAGE *vcol_storage= 0;
+
+ if (!update)
+ innobase_allocate_row_for_vcol(thd, index, &heap, &mysql_table,
+ &record, &vcol_storage);
+
+ for (ulint col_no = 0; col_no < dict_table_get_n_v_cols(node->table);
+ col_no++) {
+
+ const dict_v_col_t* col
+ = dict_table_get_nth_v_col(node->table, col_no);
+
+ if (col->m_col.ord_part) {
+ dfield_t* dfield
+ = dtuple_get_nth_v_field(node->row, col_no);
+ ulint n_upd
+ = update ? upd_get_n_fields(update) : 0;
+ ulint i = 0;
+
+ /* Check if the value is already in update vector */
+ for (i = 0; i < n_upd; i++) {
+ const upd_field_t* upd_field
+ = upd_get_nth_field(update, i);
+ if (!(upd_field->new_val.type.prtype
+ & DATA_VIRTUAL)
+ || upd_field->field_no != col->v_pos) {
+ continue;
+ }
+
+ dfield_copy_data(dfield, upd_field->old_v_val);
+ dfield_dup(dfield, node->heap);
+ break;
+ }
+
+ /* Not updated */
+ if (i >= n_upd) {
+ /* If this is an update, then the value
+ should be in update->old_vrow */
+ if (update) {
+ if (update->old_vrow == NULL) {
+ /* This only happens in
+ cascade update. And virtual
+ column can't be affected,
+ so it is Ok to set it to NULL */
+ dfield_set_null(dfield);
+ } else {
+ dfield_t* vfield
+ = dtuple_get_nth_v_field(
+ update->old_vrow,
+ col_no);
+ dfield_copy_data(dfield, vfield);
+ dfield_dup(dfield, node->heap);
+ }
+ } else {
+ /* Need to compute, this happens when
+ deleting row */
+ innobase_get_computed_value(
+ node->row, col, index,
+ &heap, node->heap, NULL,
+ thd, mysql_table, record, NULL,
+ NULL, NULL);
+ }
+ }
+ }
+ }
+
+ if (heap) {
+ if (vcol_storage)
+ innobase_free_row_for_vcol(vcol_storage);
+ mem_heap_free(heap);
+ }
+
+}
+
+/** Stores to the heap the row on which the node->pcur is positioned.
+@param[in] node row update node
+@param[in] thd mysql thread handle
+@param[in,out] mysql_table NULL, or mysql table object when
+ user thread invokes dml */
static
void
row_upd_store_row(
-/*==============*/
- upd_node_t* node) /*!< in: row update node */
+ upd_node_t* node,
+ THD* thd,
+ TABLE* mysql_table)
{
dict_index_t* clust_index;
rec_t* rec;
mem_heap_t* heap = NULL;
row_ext_t** ext;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- const ulint* offsets;
+ offset_t offsets_[REC_OFFS_NORMAL_SIZE];
+ const offset_t* offsets;
rec_offs_init(offsets_);
ut_ad(node->pcur->latch_mode != BTR_NO_LATCHES);
@@ -1771,7 +2231,7 @@ row_upd_store_row(
rec = btr_pcur_get_rec(node->pcur);
- offsets = rec_get_offsets(rec, clust_index, offsets_,
+ offsets = rec_get_offsets(rec, clust_index, offsets_, true,
ULINT_UNDEFINED, &heap);
if (dict_table_get_format(node->table) >= UNIV_FORMAT_B) {
@@ -1789,6 +2249,12 @@ row_upd_store_row(
node->row = row_build(ROW_COPY_DATA, clust_index, rec, offsets,
NULL, NULL, NULL, ext, node->heap);
+
+ if (node->table->n_v_cols) {
+ row_upd_store_v_row(node, node->is_delete ? NULL : node->update,
+ thd, mysql_table);
+ }
+
if (node->is_delete) {
node->upd_row = NULL;
node->upd_ext = NULL;
@@ -1803,23 +2269,6 @@ row_upd_store_row(
}
}
-#ifdef WITH_WSREP
-/** Determine if a FOREIGN KEY constraint needs to be processed.
-@param[in] node query node
-@param[in] trx transaction
-@return whether the node cannot be ignored */
-
-inline bool wsrep_must_process_fk(const upd_node_t* node, const trx_t* trx)
-{
- if (!trx->is_wsrep()) {
- return false;
- }
- return que_node_get_type(node->common.parent) != QUE_NODE_UPDATE
- || static_cast<upd_node_t*>(node->common.parent)->cascade_node
- != node;
-}
-#endif /* WITH_WSREP */
-
/***********************************************************//**
Updates a secondary index entry of a row.
@return DB_SUCCESS if operation successfully completed, else error
@@ -1842,9 +2291,10 @@ row_upd_sec_index_entry(
dberr_t err = DB_SUCCESS;
trx_t* trx = thr_get_trx(thr);
ulint mode;
+ ulint flags;
enum row_search_result search_result;
- ut_ad(trx->id);
+ ut_ad(trx->id != 0);
index = node->index;
@@ -1861,22 +2311,28 @@ row_upd_sec_index_entry(
log_free_check();
-#ifdef UNIV_DEBUG
- /* Work around Bug#14626800 ASSERTION FAILURE IN DEBUG_SYNC().
- Once it is fixed, remove the 'ifdef', 'if' and this comment. */
- if (!trx->ddl) {
- DEBUG_SYNC_C_IF_THD(trx->mysql_thd,
- "before_row_upd_sec_index_entry");
- }
-#endif /* UNIV_DEBUG */
+ DEBUG_SYNC_C_IF_THD(trx->mysql_thd,
+ "before_row_upd_sec_index_entry");
- mtr_start(&mtr);
+ mtr.start();
+
+ switch (index->space) {
+ case SRV_TMP_SPACE_ID:
+ mtr.set_log_mode(MTR_LOG_NO_REDO);
+ flags = BTR_NO_LOCKING_FLAG;
+ break;
+ default:
+ mtr.set_named_space(index->space);
+ /* fall through */
+ case IBUF_SPACE_ID:
+ flags = 0;
+ break;
+ }
- if (*index->name == TEMP_INDEX_PREFIX) {
- /* The index->online_status may change if the
- index->name starts with TEMP_INDEX_PREFIX (meaning
- that the index is or was being created online). It is
- protected by index->lock. */
+ if (!index->is_committed()) {
+ /* The index->online_status may change if the index is
+ or was being created online, but not committed yet. It
+ is protected by index->lock. */
mtr_s_lock(dict_index_get_lock(index), &mtr);
@@ -1905,22 +2361,32 @@ row_upd_sec_index_entry(
}
/* We can only buffer delete-mark operations if there
- are no foreign key constraints referring to the index. */
- mode = referenced
- ? BTR_MODIFY_LEAF | BTR_ALREADY_S_LATCHED
- : BTR_MODIFY_LEAF | BTR_ALREADY_S_LATCHED
- | BTR_DELETE_MARK;
+ are no foreign key constraints referring to the index.
+ Change buffering is disabled for temporary tables and
+ spatial index. */
+ mode = (referenced || dict_table_is_temporary(index->table)
+ || dict_index_is_spatial(index))
+ ? BTR_MODIFY_LEAF_ALREADY_S_LATCHED
+ : BTR_DELETE_MARK_LEAF_ALREADY_S_LATCHED;
} else {
/* For secondary indexes,
- index->online_status==ONLINE_INDEX_CREATION unless
- index->name starts with TEMP_INDEX_PREFIX. */
+ index->online_status==ONLINE_INDEX_COMPLETE if
+ index->is_committed(). */
ut_ad(!dict_index_is_online_ddl(index));
/* We can only buffer delete-mark operations if there
- are no foreign key constraints referring to the index. */
- mode = referenced
+ are no foreign key constraints referring to the index.
+ Change buffering is disabled for temporary tables and
+ spatial index. */
+ mode = (referenced || dict_table_is_temporary(index->table)
+ || dict_index_is_spatial(index))
? BTR_MODIFY_LEAF
- : BTR_MODIFY_LEAF | BTR_DELETE_MARK;
+ : BTR_DELETE_MARK_LEAF;
+ }
+
+ if (dict_index_is_spatial(index)) {
+ ut_ad(mode & BTR_MODIFY_LEAF);
+ mode |= BTR_RTREE_DELETE_MARK;
}
/* Set the query thread, so that ibuf_insert_low() will be
@@ -1943,56 +2409,57 @@ row_upd_sec_index_entry(
break;
case ROW_NOT_FOUND:
- if (*index->name == TEMP_INDEX_PREFIX) {
+ if (!index->is_committed()) {
/* When online CREATE INDEX copied the update
that we already made to the clustered index,
and completed the secondary index creation
before we got here, the old secondary index
record would not exist. The CREATE INDEX
should be waiting for a MySQL meta-data lock
- upgrade at least until this UPDATE
- returns. After that point, the
- TEMP_INDEX_PREFIX would be dropped from the
- index name in commit_inplace_alter_table(). */
+ upgrade at least until this UPDATE returns.
+ After that point, set_committed(true) would be
+ invoked by commit_inplace_alter_table(). */
break;
}
- fputs("InnoDB: error in sec index entry update in\n"
- "InnoDB: ", stderr);
- dict_index_name_print(stderr, trx, index);
- fputs("\n"
- "InnoDB: tuple ", stderr);
- dtuple_print(stderr, entry);
- fputs("\n"
- "InnoDB: record ", stderr);
- rec_print(stderr, rec, index);
- putc('\n', stderr);
- trx_print(stderr, trx, 0);
- fputs("\n"
- "InnoDB: Submit a detailed bug report"
- " to https://jira.mariadb.org/\n", stderr);
+ if (dict_index_is_spatial(index) && btr_cur->rtr_info->fd_del) {
+ /* We found the record, but a delete marked */
+ break;
+ }
+
+ ib::error()
+ << "Record in index " << index->name
+ << " of table " << index->table->name
+ << " was not found on update: " << *entry
+ << " at: " << rec_index_print(rec, index);
+#ifdef UNIV_DEBUG
+ mtr_commit(&mtr);
+ mtr_start(&mtr);
+ ut_ad(btr_validate_index(index, 0, false));
ut_ad(0);
+#endif /* UNIV_DEBUG */
break;
case ROW_FOUND:
+ ut_ad(err == DB_SUCCESS);
+
/* Delete mark the old index record; it can already be
delete marked if we return after a lock wait in
row_ins_sec_index_entry() below */
if (!rec_get_deleted_flag(
rec, dict_table_is_comp(index->table))) {
err = btr_cur_del_mark_set_sec_rec(
- 0, btr_cur, TRUE, thr, &mtr);
-
+ flags, btr_cur, TRUE, thr, &mtr);
if (err != DB_SUCCESS) {
break;
}
-
#ifdef WITH_WSREP
if (!referenced && foreign
&& wsrep_must_process_fk(node, trx)
&& !wsrep_thd_is_BF(trx->mysql_thd, FALSE)) {
- ulint* offsets = rec_get_offsets(
- rec, index, NULL, ULINT_UNDEFINED,
- &heap);
+
+ offset_t* offsets = rec_get_offsets(
+ rec, index, NULL, true,
+ ULINT_UNDEFINED, &heap);
err = wsrep_row_upd_check_foreign_constraints(
node, &pcur, index->table,
@@ -2003,29 +2470,42 @@ row_upd_sec_index_entry(
case DB_NO_REFERENCED_ROW:
err = DB_SUCCESS;
break;
+ case DB_LOCK_WAIT:
+ if (wsrep_debug) {
+ ib::warn() << "WSREP: sec index FK lock wait"
+ << " index " << index->name
+ << " table " << index->table->name
+ << " query " << wsrep_thd_query(trx->mysql_thd);
+ }
+ break;
case DB_DEADLOCK:
if (wsrep_debug) {
- ib_logf(IB_LOG_LEVEL_WARN,
- "WSREP: sec index FK check fail for deadlock: "
- " index %s table %s", index->name, index->table->name);
+ ib::warn() << "WSREP: sec index FK check fail for deadlock"
+ << " index " << index->name
+ << " table " << index->table->name
+ << " query " << wsrep_thd_query(trx->mysql_thd);
}
break;
default:
- ib_logf(IB_LOG_LEVEL_ERROR,
- "WSREP: referenced FK check fail: %s index %s table %s",
- ut_strerr(err), index->name, index->table->name);
+ ib::error() << "WSREP: referenced FK check fail: " << ut_strerr(err)
+ << " index " << index->name
+ << " table " << index->table->name
+ << " query " << wsrep_thd_query(trx->mysql_thd);
+
break;
}
}
#endif /* WITH_WSREP */
}
+ ut_ad(err == DB_SUCCESS);
+
if (referenced) {
- ulint* offsets;
+ offset_t* offsets;
offsets = rec_get_offsets(
- rec, index, NULL, ULINT_UNDEFINED,
+ rec, index, NULL, true, ULINT_UNDEFINED,
&heap);
/* NOTE that the following call loses
@@ -2090,27 +2570,27 @@ row_upd_sec_step(
row_upd_clust_rec_by_insert_inherit_func(rec,offsets,entry,update)
#else /* UNIV_DEBUG */
# define row_upd_clust_rec_by_insert_inherit(rec,offsets,entry,update) \
- row_upd_clust_rec_by_insert_inherit_func(entry,update)
+ row_upd_clust_rec_by_insert_inherit_func(rec,entry,update)
#endif /* UNIV_DEBUG */
/*******************************************************************//**
Mark non-updated off-page columns inherited when the primary key is
updated. We must mark them as inherited in entry, so that they are not
freed in a rollback. A limited version of this function used to be
called btr_cur_mark_dtuple_inherited_extern().
-@return TRUE if any columns were inherited */
-static MY_ATTRIBUTE((warn_unused_result))
-ibool
+@return whether any columns were inherited */
+static
+bool
row_upd_clust_rec_by_insert_inherit_func(
/*=====================================*/
-#ifdef UNIV_DEBUG
const rec_t* rec, /*!< in: old record, or NULL */
- const ulint* offsets,/*!< in: rec_get_offsets(rec), or NULL */
+#ifdef UNIV_DEBUG
+ const offset_t* offsets,/*!< in: rec_get_offsets(rec), or NULL */
#endif /* UNIV_DEBUG */
dtuple_t* entry, /*!< in/out: updated entry to be
inserted into the clustered index */
const upd_t* update) /*!< in: update vector */
{
- ibool inherit = FALSE;
+ bool inherit = false;
ulint i;
ut_ad(!rec == !offsets);
@@ -2124,9 +2604,9 @@ row_upd_clust_rec_by_insert_inherit_func(
ut_ad(!offsets
|| !rec_offs_nth_extern(offsets, i)
== !dfield_is_ext(dfield)
- || upd_get_field_by_field_no(update, i));
+ || upd_get_field_by_field_no(update, i, false));
if (!dfield_is_ext(dfield)
- || upd_get_field_by_field_no(update, i)) {
+ || upd_get_field_by_field_no(update, i, false)) {
continue;
}
@@ -2158,13 +2638,19 @@ row_upd_clust_rec_by_insert_inherit_func(
data += len - BTR_EXTERN_FIELD_REF_SIZE;
/* The pointer must not be zero. */
ut_a(memcmp(data, field_ref_zero, BTR_EXTERN_FIELD_REF_SIZE));
+
+ /* The BLOB must be owned, unless we are resuming from
+ a lock wait and we already had disowned the BLOB. */
+ ut_a(rec == NULL
+ || !(data[BTR_EXTERN_LEN] & BTR_EXTERN_OWNER_FLAG));
data[BTR_EXTERN_LEN] &= ~BTR_EXTERN_OWNER_FLAG;
data[BTR_EXTERN_LEN] |= BTR_EXTERN_INHERITED_FLAG;
/* The BTR_EXTERN_INHERITED_FLAG only matters in
- rollback. Purge will always free the extern fields of
- a delete-marked row. */
+ rollback of a fresh insert (insert_undo log).
+ Purge (operating on update_undo log) will always free
+ the extern fields of a delete-marked row. */
- inherit = TRUE;
+ inherit = true;
}
return(inherit);
@@ -2187,8 +2673,8 @@ row_upd_clust_rec_by_insert(
ibool referenced,/*!< in: TRUE if index may be referenced in
a foreign key constraint */
#ifdef WITH_WSREP
- ibool foreign, /*!< in: TRUE if index is foreign key index */
-#endif /* WITH_WSREP */
+ bool foreign,/*!< in: whether this is a foreign key */
+#endif
mtr_t* mtr) /*!< in/out: mtr; gets committed here */
{
mem_heap_t* heap;
@@ -2198,12 +2684,14 @@ row_upd_clust_rec_by_insert(
dict_table_t* table;
dtuple_t* entry;
dberr_t err;
- ibool change_ownership = FALSE;
rec_t* rec;
- ulint* offsets = NULL;
+ offset_t offsets_[REC_OFFS_NORMAL_SIZE];
+ offset_t* offsets = offsets_;
ut_ad(dict_index_is_clust(index));
+ rec_offs_init(offsets_);
+
trx = thr_get_trx(thr);
table = node->table;
pcur = node->pcur;
@@ -2211,40 +2699,46 @@ row_upd_clust_rec_by_insert(
heap = mem_heap_create(1000);
- entry = row_build_index_entry(node->upd_row, node->upd_ext,
- index, heap);
- ut_a(entry);
+ entry = row_build_index_entry_low(node->upd_row, node->upd_ext,
+ index, heap, ROW_BUILD_FOR_INSERT);
+ ut_ad(dtuple_get_info_bits(entry) == 0);
row_upd_index_entry_sys_field(entry, index, DATA_TRX_ID, trx->id);
switch (node->state) {
default:
ut_error;
- case UPD_NODE_INSERT_BLOB:
- /* A lock wait occurred in row_ins_clust_index_entry() in
- the previous invocation of this function. Mark the
- off-page columns in the entry inherited. */
-
- change_ownership = row_upd_clust_rec_by_insert_inherit(
- NULL, NULL, entry, node->update);
- ut_a(change_ownership);
- /* fall through */
case UPD_NODE_INSERT_CLUSTERED:
/* A lock wait occurred in row_ins_clust_index_entry() in
the previous invocation of this function. */
+ row_upd_clust_rec_by_insert_inherit(
+ NULL, NULL, entry, node->update);
break;
case UPD_NODE_UPDATE_CLUSTERED:
/* This is the first invocation of the function where
we update the primary key. Delete-mark the old record
in the clustered index and prepare to insert a new entry. */
rec = btr_cur_get_rec(btr_cur);
- offsets = rec_get_offsets(rec, index, NULL,
+ offsets = rec_get_offsets(rec, index, offsets, true,
ULINT_UNDEFINED, &heap);
ut_ad(page_rec_is_user_rec(rec));
+ if (rec_get_deleted_flag(rec, rec_offs_comp(offsets))) {
+ /* If the clustered index record is already delete
+ marked, then we are here after a DB_LOCK_WAIT.
+ Skip delete marking clustered index and disowning
+ its blobs. */
+ ut_ad(row_get_rec_trx_id(rec, index, offsets)
+ == trx->id);
+ ut_ad(!trx_undo_roll_ptr_is_insert(
+ row_get_rec_roll_ptr(rec, index,
+ offsets)));
+ goto check_fk;
+ }
+
err = btr_cur_del_mark_set_clust_rec(
btr_cur_get_block(btr_cur), rec, index, offsets,
- thr, mtr);
+ thr, node->row, mtr);
if (err != DB_SUCCESS) {
err_exit:
mtr_commit(mtr);
@@ -2258,20 +2752,19 @@ err_exit:
old record and owned by the new entry. */
if (rec_offs_any_extern(offsets)) {
- change_ownership = row_upd_clust_rec_by_insert_inherit(
- rec, offsets, entry, node->update);
-
- if (change_ownership) {
+ if (row_upd_clust_rec_by_insert_inherit(
+ rec, offsets, entry, node->update)) {
/* The blobs are disowned here, expecting the
insert down below to inherit them. But if the
insert fails, then this disown will be undone
when the operation is rolled back. */
btr_cur_disown_inherited_fields(
btr_cur_get_page_zip(btr_cur),
- rec, index, offsets, node->update, mtr);
+ rec, index, offsets, node->update,
+ mtr);
}
}
-
+check_fk:
if (referenced) {
/* NOTE that the following call loses
the position of pcur ! */
@@ -2283,7 +2776,7 @@ err_exit:
goto err_exit;
}
#ifdef WITH_WSREP
- } else if ((foreign && wsrep_must_process_fk(node, trx))) {
+ } else if (foreign && wsrep_must_process_fk(node, trx)) {
err = wsrep_row_upd_check_foreign_constraints(
node, pcur, table, index, offsets, thr, mtr);
@@ -2294,16 +2787,16 @@ err_exit:
break;
case DB_DEADLOCK:
if (wsrep_debug) {
- ib_logf(IB_LOG_LEVEL_WARN,
- "WSREP: sec index FK check fail for deadlock: "
- " index %s table %s", index->name, index->table->name);
+ ib::warn() << "WSREP: sec index FK check fail for deadlock"
+ << " index " << index->name
+ << " table " << index->table->name;
}
- break;
+ goto err_exit;
default:
- ib_logf(IB_LOG_LEVEL_ERROR,
- "WSREP: referenced FK check fail: %s index %s table %s",
- ut_strerr(err), index->name, index->table->name);
- break;
+ ib::error() << "WSREP: referenced FK check fail: " << ut_strerr(err)
+ << " index " << index->name
+ << " table " << index->table->name;
+ goto err_exit;
}
#endif /* WITH_WSREP */
}
@@ -2314,9 +2807,7 @@ err_exit:
err = row_ins_clust_index_entry(
index, entry, thr,
node->upd_ext ? node->upd_ext->n_ext : 0);
- node->state = change_ownership
- ? UPD_NODE_INSERT_BLOB
- : UPD_NODE_INSERT_CLUSTERED;
+ node->state = UPD_NODE_INSERT_CLUSTERED;
mem_heap_free(heap);
@@ -2332,9 +2823,10 @@ static MY_ATTRIBUTE((nonnull, warn_unused_result))
dberr_t
row_upd_clust_rec(
/*==============*/
+ ulint flags, /*!< in: undo logging and locking flags */
upd_node_t* node, /*!< in: row update node */
dict_index_t* index, /*!< in: clustered index */
- ulint* offsets,/*!< in: rec_get_offsets() on node->pcur */
+ offset_t* offsets,/*!< in: rec_get_offsets() on node->pcur */
mem_heap_t** offsets_heap,
/*!< in/out: memory heap, can be emptied */
que_thr_t* thr, /*!< in: query thread */
@@ -2348,6 +2840,8 @@ row_upd_clust_rec(
const dtuple_t* rebuilt_old_pk = NULL;
ut_ad(dict_index_is_clust(index));
+ ut_ad(!thr_get_trx(thr)->in_rollback);
+ ut_ad(!node->table->skip_alter_undo);
pcur = node->pcur;
btr_cur = btr_pcur_get_btr_cur(pcur);
@@ -2368,28 +2862,22 @@ row_upd_clust_rec(
if (node->cmpl_info & UPD_NODE_NO_SIZE_CHANGE) {
err = btr_cur_update_in_place(
- BTR_NO_LOCKING_FLAG, btr_cur,
+ flags | BTR_NO_LOCKING_FLAG, btr_cur,
offsets, node->update,
node->cmpl_info, thr, thr_get_trx(thr)->id, mtr);
} else {
err = btr_cur_optimistic_update(
- BTR_NO_LOCKING_FLAG, btr_cur,
+ flags | BTR_NO_LOCKING_FLAG, btr_cur,
&offsets, offsets_heap, node->update,
node->cmpl_info, thr, thr_get_trx(thr)->id, mtr);
}
- if (err == DB_SUCCESS && dict_index_is_online_ddl(index)) {
- row_log_table_update(btr_cur_get_rec(btr_cur),
- index, offsets, rebuilt_old_pk);
+ if (err == DB_SUCCESS) {
+ goto success;
}
mtr_commit(mtr);
- if (UNIV_LIKELY(err == DB_SUCCESS)) {
-
- goto func_exit;
- }
-
if (buf_LRU_buf_pool_running_out()) {
err = DB_LOCK_TABLE_FULL;
@@ -2398,7 +2886,16 @@ row_upd_clust_rec(
/* We may have to modify the tree structure: do a pessimistic descent
down the index tree */
- mtr_start(mtr);
+ mtr->start();
+
+ if (index->table->is_temporary()) {
+ /* Disable locking, because temporary tables are never
+ shared between transactions or connections. */
+ flags |= BTR_NO_LOCKING_FLAG;
+ mtr->set_log_mode(MTR_LOG_NO_REDO);
+ } else {
+ mtr->set_named_space(index->space);
+ }
/* NOTE: this transaction has an s-lock or x-lock on the record and
therefore other transactions cannot modify the record when we have no
@@ -2416,58 +2913,26 @@ row_upd_clust_rec(
}
err = btr_cur_pessimistic_update(
- BTR_NO_LOCKING_FLAG | BTR_KEEP_POS_FLAG, btr_cur,
+ flags | BTR_NO_LOCKING_FLAG | BTR_KEEP_POS_FLAG, btr_cur,
&offsets, offsets_heap, heap, &big_rec,
node->update, node->cmpl_info,
thr, thr_get_trx(thr)->id, mtr);
if (big_rec) {
ut_a(err == DB_SUCCESS);
- /* Write out the externally stored
- columns while still x-latching
- index->lock and block->lock. Allocate
- pages for big_rec in the mtr that
- modified the B-tree, but be sure to skip
- any pages that were freed in mtr. We will
- write out the big_rec pages before
- committing the B-tree mini-transaction. If
- the system crashes so that crash recovery
- will not replay the mtr_commit(&mtr), the
- big_rec pages will be left orphaned until
- the pages are allocated for something else.
-
- TODO: If the allocation extends the tablespace, it
- will not be redo logged, in either mini-transaction.
- Tablespace extension should be redo-logged in the
- big_rec mini-transaction, so that recovery will not
- fail when the big_rec was written to the extended
- portion of the file, in case the file was somehow
- truncated in the crash. */
DEBUG_SYNC_C("before_row_upd_extern");
err = btr_store_big_rec_extern_fields(
- index, btr_cur_get_block(btr_cur),
- btr_cur_get_rec(btr_cur), offsets,
- big_rec, mtr, BTR_STORE_UPDATE);
+ pcur, offsets, big_rec, mtr, BTR_STORE_UPDATE);
DEBUG_SYNC_C("after_row_upd_extern");
- /* If writing big_rec fails (for example, because of
- DB_OUT_OF_FILE_SPACE), the record will be corrupted.
- Even if we did not update any externally stored
- columns, our update could cause the record to grow so
- that a non-updated column was selected for external
- storage. This non-update would not have been written
- to the undo log, and thus the record cannot be rolled
- back.
-
- However, because we have not executed mtr_commit(mtr)
- yet, the update will not be replayed in crash
- recovery, and the following assertion failure will
- effectively "roll back" the operation. */
- ut_a(err == DB_SUCCESS);
}
- if (err == DB_SUCCESS && dict_index_is_online_ddl(index)) {
- row_log_table_update(btr_cur_get_rec(btr_cur),
- index, offsets, rebuilt_old_pk);
+ if (err == DB_SUCCESS) {
+success:
+ if (dict_index_is_online_ddl(index)) {
+ row_log_table_update(
+ btr_cur_get_rec(btr_cur),
+ index, offsets, rebuilt_old_pk);
+ }
}
mtr_commit(mtr);
@@ -2485,31 +2950,29 @@ func_exit:
/***********************************************************//**
Delete marks a clustered index record.
-@return DB_SUCCESS if operation successfully completed, else error code */
+@return DB_SUCCESS if operation successfully completed, else error code */
static MY_ATTRIBUTE((nonnull, warn_unused_result))
dberr_t
row_upd_del_mark_clust_rec(
/*=======================*/
upd_node_t* node, /*!< in: row update node */
dict_index_t* index, /*!< in: clustered index */
- ulint* offsets,/*!< in/out: rec_get_offsets() for the
+ offset_t* offsets,/*!< in/out: rec_get_offsets() for the
record under the cursor */
que_thr_t* thr, /*!< in: query thread */
ibool referenced,
/*!< in: TRUE if index may be referenced in
a foreign key constraint */
#ifdef WITH_WSREP
- ibool foreign,/*!< in: TRUE if index is foreign key index */
-#endif /* WITH_WSREP */
+ bool foreign,/*!< in: whether this is a foreign key */
+#endif
mtr_t* mtr) /*!< in: mtr; gets committed here */
{
btr_pcur_t* pcur;
btr_cur_t* btr_cur;
dberr_t err;
-#ifdef WITH_WSREP
rec_t* rec;
- trx_t* trx = thr_get_trx(thr) ;
-#endif /* WITH_WSREP */
+ trx_t* trx = thr_get_trx(thr);
ut_ad(dict_index_is_clust(index));
ut_ad(node->is_delete);
@@ -2520,22 +2983,19 @@ row_upd_del_mark_clust_rec(
/* Store row because we have to build also the secondary index
entries */
- row_upd_store_row(node);
+ row_upd_store_row(node, trx->mysql_thd,
+ thr->prebuilt && thr->prebuilt->table == node->table
+ ? thr->prebuilt->m_mysql_table : NULL);
/* Mark the clustered index record deleted; we do not have to check
locks, because we assume that we have an x-lock on the record */
-#ifdef WITH_WSREP
rec = btr_cur_get_rec(btr_cur);
-#endif /* WITH_WSREP */
err = btr_cur_del_mark_set_clust_rec(
-#ifdef WITH_WSREP
btr_cur_get_block(btr_cur), rec,
-#else
- btr_cur_get_block(btr_cur), btr_cur_get_rec(btr_cur),
-#endif /* WITH_WSREP */
- index, offsets, thr, mtr);
+ index, offsets, thr, node->row, mtr);
+
if (err != DB_SUCCESS) {
} else if (referenced) {
/* NOTE that the following call loses the position of pcur ! */
@@ -2554,15 +3014,16 @@ row_upd_del_mark_clust_rec(
break;
case DB_DEADLOCK:
if (wsrep_debug) {
- ib_logf(IB_LOG_LEVEL_WARN,
- "WSREP: sec index FK check fail for deadlock: "
- " index %s table %s", index->name, index->table->name);
+ ib::warn() << "WSREP: sec index FK check fail for deadlock"
+ << " index " << index->name
+ << " table " << index->table->name;
}
break;
default:
- ib_logf(IB_LOG_LEVEL_ERROR,
- "WSREP: referenced FK check fail: %s index %s table %s",
- ut_strerr(err), index->name, index->table->name);
+ ib::error() << "WSREP: referenced FK check fail: " << ut_strerr(err)
+ << " index " << index->name
+ << " table " << index->table->name;
+
break;
}
#endif /* WITH_WSREP */
@@ -2591,24 +3052,34 @@ row_upd_clust_step(
mtr_t mtr;
rec_t* rec;
mem_heap_t* heap = NULL;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- ulint* offsets;
+ offset_t offsets_[REC_OFFS_NORMAL_SIZE];
+ offset_t* offsets;
ibool referenced;
+ trx_t* trx = thr_get_trx(thr);
+
rec_offs_init(offsets_);
index = dict_table_get_first_index(node->table);
- referenced = row_upd_index_is_referenced(index, thr_get_trx(thr));
+ referenced = row_upd_index_is_referenced(index, trx);
+
#ifdef WITH_WSREP
- ibool foreign = wsrep_row_upd_index_is_foreign(
- index, thr_get_trx(thr));
-#endif /* WITH_WSREP */
+ const bool foreign = wsrep_row_upd_index_is_foreign(index, trx);
+#endif
pcur = node->pcur;
/* We have to restore the cursor to its position */
- mtr_start(&mtr);
+ mtr.start();
+
+ const ulint flags = index->table->is_temporary()
+ ? BTR_NO_LOCKING_FLAG : 0;
+ if (flags) {
+ mtr.set_log_mode(MTR_LOG_NO_REDO);
+ } else {
+ mtr.set_named_space(index->space);
+ }
/* If the restoration does not succeed, then the same
transaction has deleted the record on which the cursor was,
@@ -2622,15 +3093,7 @@ row_upd_clust_step(
ulint mode;
-#ifdef UNIV_DEBUG
- /* Work around Bug#14626800 ASSERTION FAILURE IN DEBUG_SYNC().
- Once it is fixed, remove the 'ifdef', 'if' and this comment. */
- if (!thr_get_trx(thr)->ddl) {
- DEBUG_SYNC_C_IF_THD(
- thr_get_trx(thr)->mysql_thd,
- "innodb_row_upd_clust_step_enter");
- }
-#endif /* UNIV_DEBUG */
+ DEBUG_SYNC_C_IF_THD(trx->mysql_thd, "innodb_row_upd_clust_step_enter");
if (dict_index_is_online_ddl(index)) {
ut_ad(node->table->id != DICT_INDEXES_ID);
@@ -2658,38 +3121,40 @@ row_upd_clust_step(
ut_ad(!dict_index_is_online_ddl(index));
- dict_drop_index_tree(btr_pcur_get_rec(pcur), &mtr);
+ dict_drop_index_tree(
+ btr_pcur_get_rec(pcur), pcur, trx, &mtr);
- mtr_commit(&mtr);
+ mtr.commit();
- mtr_start(&mtr);
+ mtr.start();
+ mtr.set_named_space(index->space);
success = btr_pcur_restore_position(BTR_MODIFY_LEAF, pcur,
&mtr);
if (!success) {
err = DB_ERROR;
- mtr_commit(&mtr);
+ mtr.commit();
return(err);
}
}
rec = btr_pcur_get_rec(pcur);
- offsets = rec_get_offsets(rec, index, offsets_,
+ offsets = rec_get_offsets(rec, index, offsets_, true,
ULINT_UNDEFINED, &heap);
- if (!node->has_clust_rec_x_lock) {
+ if (!flags && !node->has_clust_rec_x_lock) {
err = lock_clust_rec_modify_check_and_lock(
0, btr_pcur_get_block(pcur),
rec, index, offsets, thr);
if (err != DB_SUCCESS) {
- mtr_commit(&mtr);
+ mtr.commit();
goto exit_func;
}
}
- ut_ad(lock_trx_has_rec_x_lock(thr_get_trx(thr), index->table,
+ ut_ad(lock_trx_has_rec_x_lock(trx, index->table,
btr_pcur_get_block(pcur),
page_rec_get_heap_no(rec)));
@@ -2697,11 +3162,11 @@ row_upd_clust_step(
if (node->is_delete) {
err = row_upd_del_mark_clust_rec(
+ node, index, offsets, thr, referenced,
#ifdef WITH_WSREP
- node, index, offsets, thr, referenced, foreign, &mtr);
-#else
- node, index, offsets, thr, referenced, &mtr);
-#endif /* WITH_WSREP */
+ foreign,
+#endif
+ &mtr);
if (err == DB_SUCCESS) {
node->state = UPD_NODE_UPDATE_ALL_SEC;
@@ -2725,11 +3190,12 @@ row_upd_clust_step(
if (node->cmpl_info & UPD_NODE_NO_ORD_CHANGE) {
err = row_upd_clust_rec(
- node, index, offsets, &heap, thr, &mtr);
+ flags, node, index, offsets, &heap, thr, &mtr);
goto exit_func;
}
- row_upd_store_row(node);
+ row_upd_store_row(node, trx->mysql_thd,
+ thr->prebuilt ? thr->prebuilt->m_mysql_table : NULL);
if (row_upd_changes_ord_field_binary(index, node->update, thr,
node->row, node->ext)) {
@@ -2746,12 +3212,11 @@ row_upd_clust_step(
externally! */
err = row_upd_clust_rec_by_insert(
+ node, index, thr, referenced,
#ifdef WITH_WSREP
- node, index, thr, referenced, foreign, &mtr);
-#else
- node, index, thr, referenced, &mtr);
-#endif /* WITH_WSREP */
-
+ foreign,
+#endif
+ &mtr);
if (err != DB_SUCCESS) {
goto exit_func;
@@ -2760,7 +3225,7 @@ row_upd_clust_step(
node->state = UPD_NODE_UPDATE_ALL_SEC;
} else {
err = row_upd_clust_rec(
- node, index, offsets, &heap, thr, &mtr);
+ flags, node, index, offsets, &heap, thr, &mtr);
if (err != DB_SUCCESS) {
@@ -2785,7 +3250,7 @@ to this node, we assume that we have a persistent cursor which was on a
record, and the position of the cursor is stored in the cursor.
@return DB_SUCCESS if operation successfully completed, else error
code or DB_LOCK_WAIT */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
+static
dberr_t
row_upd(
/*====*/
@@ -2793,6 +3258,15 @@ row_upd(
que_thr_t* thr) /*!< in: query thread */
{
dberr_t err = DB_SUCCESS;
+ DBUG_ENTER("row_upd");
+
+ ut_ad(!thr_get_trx(thr)->in_rollback);
+
+ DBUG_PRINT("row_upd", ("table: %s", node->table->name.m_name));
+ DBUG_PRINT("row_upd", ("info bits in update vector: 0x" ULINTPFx,
+ node->update ? node->update->info_bits: 0));
+ DBUG_PRINT("row_upd", ("foreign_id: %s",
+ node->foreign ? node->foreign->id: "NULL"));
if (UNIV_LIKELY(node->in_mysql_interface)) {
@@ -2811,32 +3285,26 @@ row_upd(
switch (node->state) {
case UPD_NODE_UPDATE_CLUSTERED:
case UPD_NODE_INSERT_CLUSTERED:
- case UPD_NODE_INSERT_BLOB:
log_free_check();
+
err = row_upd_clust_step(node, thr);
if (err != DB_SUCCESS) {
- return(err);
+ DBUG_RETURN(err);
}
}
+ DEBUG_SYNC_C_IF_THD(thr_get_trx(thr)->mysql_thd,
+ "after_row_upd_clust");
+
if (node->index == NULL
|| (!node->is_delete
&& (node->cmpl_info & UPD_NODE_NO_ORD_CHANGE))) {
- return(DB_SUCCESS);
+ DBUG_RETURN(DB_SUCCESS);
}
-#ifdef UNIV_DEBUG
- /* Work around Bug#14626800 ASSERTION FAILURE IN DEBUG_SYNC().
- Once it is fixed, remove the 'ifdef', 'if' and this comment. */
- if (!thr_get_trx(thr)->ddl) {
- DEBUG_SYNC_C_IF_THD(thr_get_trx(thr)->mysql_thd,
- "after_row_upd_clust");
- }
-#endif /* UNIV_DEBUG */
-
DBUG_EXECUTE_IF("row_upd_skip_sec", node->index = NULL;);
do {
@@ -2852,7 +3320,7 @@ row_upd(
if (err != DB_SUCCESS) {
- return(err);
+ DBUG_RETURN(err);
}
}
@@ -2873,14 +3341,13 @@ row_upd(
node->state = UPD_NODE_UPDATE_CLUSTERED;
- return(err);
+ DBUG_RETURN(err);
}
/***********************************************************//**
Updates a row in a table. This is a high-level function used in SQL execution
graphs.
-@return query thread to run next or NULL */
-UNIV_INTERN
+@return query thread to run next or NULL */
que_thr_t*
row_upd_step(
/*=========*/
@@ -2891,12 +3358,13 @@ row_upd_step(
que_node_t* parent;
dberr_t err = DB_SUCCESS;
trx_t* trx;
+ DBUG_ENTER("row_upd_step");
ut_ad(thr);
trx = thr_get_trx(thr);
- trx_start_if_not_started_xa(trx);
+ trx_start_if_not_started_xa(trx, true);
node = static_cast<upd_node_t*>(thr->run_node);
@@ -2934,7 +3402,7 @@ row_upd_step(
thr->run_node = sel_node;
- return(thr);
+ DBUG_RETURN(thr);
}
}
@@ -2960,7 +3428,7 @@ row_upd_step(
thr->run_node = parent;
- return(thr);
+ DBUG_RETURN(thr);
}
/* DO THE CHECKS OF THE CONSISTENCY CONSTRAINTS HERE */
@@ -2971,7 +3439,7 @@ error_handling:
trx->error_state = err;
if (err != DB_SUCCESS) {
- return(NULL);
+ DBUG_RETURN(NULL);
}
/* DO THE TRIGGER ACTIONS HERE */
@@ -2988,6 +3456,5 @@ error_handling:
node->state = UPD_NODE_UPDATE_CLUSTERED;
- return(thr);
+ DBUG_RETURN(thr);
}
-#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innobase/row/row0vers.cc b/storage/innobase/row/row0vers.cc
index d81f7889430..b3d0e93b732 100644
--- a/storage/innobase/row/row0vers.cc
+++ b/storage/innobase/row/row0vers.cc
@@ -1,6 +1,7 @@
/*****************************************************************************
-Copyright (c) 1997, 2014, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1997, 2017, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2017, 2020, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -24,11 +25,6 @@ Created 2/6/1997 Heikki Tuuri
*******************************************************/
#include "row0vers.h"
-
-#ifdef UNIV_NONINL
-#include "row0vers.ic"
-#endif
-
#include "dict0dict.h"
#include "dict0boot.h"
#include "btr0btr.h"
@@ -45,33 +41,63 @@ Created 2/6/1997 Heikki Tuuri
#include "rem0cmp.h"
#include "read0read.h"
#include "lock0lock.h"
+#include "row0mysql.h"
+
+/** Check whether all non-virtual index fields are equal.
+@param[in] index the secondary index
+@param[in] a first index entry to compare
+@param[in] b second index entry to compare
+@return whether all non-virtual fields are equal */
+static
+bool
+row_vers_non_virtual_fields_equal(
+ const dict_index_t* index,
+ const dfield_t* a,
+ const dfield_t* b)
+{
+ const dict_field_t* end = &index->fields[index->n_fields];
-/*****************************************************************//**
-Finds out if an active transaction has inserted or modified a secondary
+ for (const dict_field_t* ifield = index->fields; ifield != end;
+ ifield++) {
+ if (!dict_col_is_virtual(ifield->col)
+ && cmp_dfield_dfield(a++, b++)) {
+ return false;
+ }
+ }
+
+ return true;
+}
+
+/** Determine if an active transaction has inserted or modified a secondary
index record.
-@return 0 if committed, else the active transaction id;
-NOTE that this function can return false positives but never false
-negatives. The caller must confirm all positive results by calling
-trx_is_active() while holding lock_sys->mutex. */
+@param[in] clust_rec clustered index record
+@param[in] clust_index clustered index
+@param[in] rec secondary index record
+@param[in] index secondary index
+@param[in] offsets rec_get_offsets(rec, index)
+@param[in,out] mtr mini-transaction
+@return the active transaction; state must be rechecked after
+trx_mutex_enter(), and trx->release_reference() must be invoked
+@retval NULL if the record was committed */
UNIV_INLINE
-trx_id_t
+trx_t*
row_vers_impl_x_locked_low(
-/*=======================*/
- const rec_t* clust_rec, /*!< in: clustered index record */
- dict_index_t* clust_index, /*!< in: the clustered index */
- const rec_t* rec, /*!< in: secondary index record */
- dict_index_t* index, /*!< in: the secondary index */
- const ulint* offsets, /*!< in: rec_get_offsets(rec, index) */
- mtr_t* mtr) /*!< in/out: mini-transaction */
+ const rec_t* clust_rec,
+ dict_index_t* clust_index,
+ const rec_t* rec,
+ dict_index_t* index,
+ const offset_t* offsets,
+ mtr_t* mtr)
{
- trx_id_t trx_id;
- ibool corrupt;
- ulint comp;
- ulint rec_del;
- const rec_t* version;
rec_t* prev_version = NULL;
- ulint* clust_offsets;
+ offset_t clust_offsets_[REC_OFFS_NORMAL_SIZE];
+ offset_t* clust_offsets = clust_offsets_;
mem_heap_t* heap;
+ dtuple_t* ientry = NULL;
+ mem_heap_t* v_heap = NULL;
+ dtuple_t* cur_vrow = NULL;
+
+ rec_offs_init(clust_offsets_);
DBUG_ENTER("row_vers_impl_x_locked_low");
@@ -79,13 +105,18 @@ row_vers_impl_x_locked_low(
heap = mem_heap_create(1024);
- clust_offsets = rec_get_offsets(
- clust_rec, clust_index, NULL, ULINT_UNDEFINED, &heap);
+ clust_offsets = rec_get_offsets(clust_rec, clust_index, clust_offsets,
+ true, ULINT_UNDEFINED, &heap);
+
+ const trx_id_t trx_id = row_get_rec_trx_id(
+ clust_rec, clust_index, clust_offsets);
- trx_id = row_get_rec_trx_id(clust_rec, clust_index, clust_offsets);
- corrupt = FALSE;
+ ut_ad(!clust_index->table->is_temporary());
- if (!trx_rw_is_active(trx_id, &corrupt)) {
+ bool corrupt = false;
+ trx_t* trx = trx_rw_is_active(trx_id, &corrupt, true);
+
+ if (trx == 0) {
/* The transaction that modified or inserted clust_rec is no
longer active, or it is corrupt: no implicit lock on rec */
if (corrupt) {
@@ -97,12 +128,22 @@ row_vers_impl_x_locked_low(
DBUG_RETURN(0);
}
- comp = page_rec_is_comp(rec);
+ const ulint comp = page_rec_is_comp(rec);
ut_ad(index->table == clust_index->table);
ut_ad(!!comp == dict_table_is_comp(index->table));
ut_ad(!comp == !page_rec_is_comp(clust_rec));
- rec_del = rec_get_deleted_flag(rec, comp);
+ const ulint rec_del = rec_get_deleted_flag(rec, comp);
+
+ if (dict_index_has_virtual(index)) {
+ ulint est_size = DTUPLE_EST_ALLOC(index->n_fields);
+
+ /* Allocate the dtuple for virtual columns extracted from undo
+ log with its own heap, so to avoid it being freed as we
+ iterating in the version loop below. */
+ v_heap = mem_heap_create(est_size);
+ ientry = row_rec_to_index_entry(rec, index, offsets, v_heap);
+ }
/* We look up if some earlier version, which was modified by
the trx_id transaction, of the clustered index record would
@@ -115,13 +156,14 @@ row_vers_impl_x_locked_low(
modify rec, and does not necessarily have an implicit x-lock
on rec. */
- for (version = clust_rec;; version = prev_version) {
+ for (const rec_t* version = clust_rec;; version = prev_version) {
row_ext_t* ext;
- const dtuple_t* row;
+ dtuple_t* row;
dtuple_t* entry;
ulint vers_del;
trx_id_t prev_trx_id;
mem_heap_t* old_heap = heap;
+ dtuple_t* vrow = NULL;
/* We keep the semaphore in mtr on the clust_rec page, so
that no other transaction can update it and get an
@@ -131,18 +173,27 @@ row_vers_impl_x_locked_low(
trx_undo_prev_version_build(
clust_rec, mtr, version, clust_index, clust_offsets,
- heap, &prev_version);
+ heap, &prev_version, NULL,
+ dict_index_has_virtual(index) ? &vrow : NULL, 0);
+
+ trx_mutex_enter(trx);
+ const bool committed = trx_state_eq(
+ trx, TRX_STATE_COMMITTED_IN_MEMORY);
+ trx_mutex_exit(trx);
/* The oldest visible clustered index version must not be
delete-marked, because we never start a transaction by
inserting a delete-marked record. */
- ut_ad(prev_version
- || !rec_get_deleted_flag(version, comp)
- || !trx_rw_is_active(trx_id, NULL));
+ ut_ad(committed || prev_version
+ || !rec_get_deleted_flag(version, comp));
- /* Free version and clust_offsets. */
+ /* Free version. */
mem_heap_free(old_heap);
+ if (committed) {
+ goto not_locked;
+ }
+
if (prev_version == NULL) {
/* We reached the oldest visible version without
@@ -162,15 +213,17 @@ row_vers_impl_x_locked_low(
or updated, the leaf page record always is
created with a clear delete-mark flag.
(We never insert a delete-marked record.) */
- trx_id = 0;
+not_locked:
+ trx->release_reference();
+ trx = 0;
}
break;
}
clust_offsets = rec_get_offsets(
- prev_version, clust_index, NULL, ULINT_UNDEFINED,
- &heap);
+ prev_version, clust_index, clust_offsets, true,
+ ULINT_UNDEFINED, &heap);
vers_del = rec_get_deleted_flag(prev_version, comp);
@@ -185,14 +238,62 @@ row_vers_impl_x_locked_low(
clust_offsets,
NULL, NULL, NULL, &ext, heap);
- entry = row_build_index_entry(row, ext, index, heap);
+ if (dict_index_has_virtual(index)) {
+ if (vrow) {
+ /* Keep the virtual row info for the next
+ version */
+ cur_vrow = dtuple_copy(vrow, v_heap);
+ dtuple_dup_v_fld(cur_vrow, v_heap);
+ }
- /* entry may be NULL if a record was inserted in place
- of a deleted record, and the BLOB pointers of the new
- record were not initialized yet. But in that case,
- prev_version should be NULL. */
+ if (!cur_vrow) {
+ /* Build index entry out of row */
+ entry = row_build_index_entry(row, ext, index,
+ heap);
+
+ /* entry could only be NULL (the
+ clustered index record could contain
+ BLOB pointers that are NULL) if we
+ were accessing a freshly inserted
+ record before it was fully inserted.
+ prev_version cannot possibly be such
+ an incomplete record, because its
+ transaction would have to be committed
+ in order for later versions of the
+ record to be able to exist. */
+ ut_ad(entry);
+
+ /* If the indexed virtual columns has changed,
+ there must be log record to generate vrow.
+ Otherwise, it is not changed, so no need
+ to compare */
+ if (!row_vers_non_virtual_fields_equal(
+ index,
+ ientry->fields, entry->fields)) {
+ if (rec_del != vers_del) {
+ break;
+ }
+ } else if (!rec_del) {
+ break;
+ }
+
+ goto result_check;
+ } else {
+ ut_ad(row->n_v_fields == cur_vrow->n_v_fields);
+ dtuple_copy_v_fields(row, cur_vrow);
+ }
+ }
+
+ entry = row_build_index_entry(row, ext, index, heap);
- ut_a(entry != NULL);
+ /* entry could only be NULL (the clustered index
+ record could contain BLOB pointers that are NULL) if
+ we were accessing a freshly inserted record before it
+ was fully inserted. prev_version cannot possibly be
+ such an incomplete record, because its transaction
+ would have to be committed in order for later versions
+ of the record to be able to exist. */
+ ut_ad(entry);
/* If we get here, we know that the trx_id transaction
modified prev_version. Let us check if prev_version
@@ -205,22 +306,7 @@ row_vers_impl_x_locked_low(
/* We check if entry and rec are identified in the alphabetical
ordering */
-
- if (!trx_rw_is_active(trx_id, &corrupt)) {
- /* Transaction no longer active: no implicit
- x-lock. This situation should only be possible
- because we are not holding lock_sys->mutex. */
- ut_ad(!lock_mutex_own());
- if (corrupt) {
- lock_report_trx_id_insanity(
- trx_id,
- prev_version, clust_index,
- clust_offsets,
- trx_sys_get_max_trx_id());
- }
- trx_id = 0;
- break;
- } else if (0 == cmp_dtuple_rec(entry, rec, offsets)) {
+ if (0 == cmp_dtuple_rec(entry, rec, offsets)) {
/* The delete marks of rec and prev_version should be
equal for rec to be in the state required by
prev_version */
@@ -250,44 +336,48 @@ row_vers_impl_x_locked_low(
break;
}
- if (trx_id != prev_trx_id) {
+result_check:
+ if (trx->id != prev_trx_id) {
/* prev_version was the first version modified by
the trx_id transaction: no implicit x-lock */
-
- trx_id = 0;
- break;
+ goto not_locked;
}
}
- DBUG_PRINT("info", ("Implicit lock is held by trx:%lu",
- static_cast<unsigned long>(trx_id)));
+ if (trx) {
+ DBUG_PRINT("info", ("Implicit lock is held by trx:" TRX_ID_FMT,
+ trx_id));
+ }
+
+ if (v_heap != NULL) {
+ mem_heap_free(v_heap);
+ }
mem_heap_free(heap);
- DBUG_RETURN(trx_id);
+ DBUG_RETURN(trx);
}
-/*****************************************************************//**
-Finds out if an active transaction has inserted or modified a secondary
+/** Determine if an active transaction has inserted or modified a secondary
index record.
-@return 0 if committed, else the active transaction id;
-NOTE that this function can return false positives but never false
-negatives. The caller must confirm all positive results by calling
-trx_is_active() while holding lock_sys->mutex. */
-UNIV_INTERN
-trx_id_t
+@param[in] rec secondary index record
+@param[in] index secondary index
+@param[in] offsets rec_get_offsets(rec, index)
+@return the active transaction; state must be rechecked after
+trx_mutex_enter(), and trx->release_reference() must be invoked
+@retval NULL if the record was committed */
+trx_t*
row_vers_impl_x_locked(
-/*===================*/
- const rec_t* rec, /*!< in: record in a secondary index */
- dict_index_t* index, /*!< in: the secondary index */
- const ulint* offsets)/*!< in: rec_get_offsets(rec, index) */
+ const rec_t* rec,
+ dict_index_t* index,
+ const offset_t* offsets)
{
- dict_index_t* clust_index;
- const rec_t* clust_rec;
- trx_id_t trx_id;
mtr_t mtr;
+ trx_t* trx;
+ const rec_t* clust_rec;
+ dict_index_t* clust_index;
ut_ad(!lock_mutex_own());
- ut_ad(!mutex_own(&trx_sys->mutex));
+ ut_ad(!trx_sys_mutex_own());
mtr_start(&mtr);
@@ -301,7 +391,7 @@ row_vers_impl_x_locked(
clust_rec = row_get_clust_rec(
BTR_SEARCH_LEAF, rec, index, &clust_index, &mtr);
- if (UNIV_UNLIKELY(!clust_rec)) {
+ if (!clust_rec) {
/* In a rare case it is possible that no clust rec is found
for a secondary index record: if in row0umod.cc
row_undo_mod_remove_clust_low() we have already removed the
@@ -314,84 +404,525 @@ row_vers_impl_x_locked(
a rollback we always undo the modifications to secondary index
records before the clustered index record. */
- trx_id = 0;
+ trx = 0;
} else {
- trx_id = row_vers_impl_x_locked_low(
+ trx = row_vers_impl_x_locked_low(
clust_rec, clust_index, rec, index, offsets, &mtr);
+
+ ut_ad(trx == 0 || trx->is_referenced());
}
mtr_commit(&mtr);
- return(trx_id);
+ return(trx);
}
/*****************************************************************//**
Finds out if we must preserve a delete marked earlier version of a clustered
index record, because it is >= the purge view.
-@return TRUE if earlier version should be preserved */
-UNIV_INTERN
+@param[in] trx_id transaction id in the version
+@param[in] name table name
+@param[in,out] mtr mini transaction holding the latch on the
+ clustered index record; it will also hold
+ the latch on purge_view
+@return TRUE if earlier version should be preserved */
ibool
row_vers_must_preserve_del_marked(
/*==============================*/
- trx_id_t trx_id, /*!< in: transaction id in the version */
- mtr_t* mtr) /*!< in: mtr holding the latch on the
- clustered index record; it will also
- hold the latch on purge_view */
+ trx_id_t trx_id,
+ const table_name_t& name,
+ mtr_t* mtr)
{
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(!rw_lock_own(&(purge_sys->latch), RW_LOCK_SHARED));
-#endif /* UNIV_SYNC_DEBUG */
+ ut_ad(!rw_lock_own(&(purge_sys->latch), RW_LOCK_S));
- mtr_s_lock(&(purge_sys->latch), mtr);
+ mtr_s_lock(&purge_sys->latch, mtr);
- return(!read_view_sees_trx_id(purge_sys->view, trx_id));
+ return(!purge_sys->view.changes_visible(trx_id, name));
}
-/*****************************************************************//**
-Finds out if a version of the record, where the version >= the current
+/** build virtual column value from current cluster index record data
+@param[in,out] row the cluster index row in dtuple form
+@param[in] clust_index clustered index
+@param[in] index the secondary index
+@param[in] heap heap used to build virtual dtuple
+@param[in,out] vcol_info virtual column information. */
+static
+void
+row_vers_build_clust_v_col(
+ dtuple_t* row,
+ dict_index_t* clust_index,
+ dict_index_t* index,
+ mem_heap_t* heap,
+ purge_vcol_info_t* vcol_info)
+{
+ mem_heap_t* local_heap = NULL;
+ VCOL_STORAGE *vcol_storage= NULL;
+ THD* thd= current_thd;
+ TABLE* maria_table= 0;
+ byte* record= 0;
+
+ ut_ad(dict_index_has_virtual(index));
+ ut_ad(index->table == clust_index->table);
+
+ if (vcol_info != NULL) {
+ vcol_info->set_used();
+ maria_table = vcol_info->table();
+ }
+ DEBUG_SYNC(current_thd, "ib_clust_v_col_before_row_allocated");
+
+ innobase_allocate_row_for_vcol(thd, index,
+ &local_heap,
+ &maria_table,
+ &record,
+ &vcol_storage);
+
+ if (vcol_info && !vcol_info->table()) {
+ vcol_info->set_table(maria_table);
+ goto func_exit;
+ }
+
+ for (ulint i = 0; i < dict_index_get_n_fields(index); i++) {
+ const dict_field_t* ind_field = dict_index_get_nth_field(
+ index, i);
+
+ if (dict_col_is_virtual(ind_field->col)) {
+ const dict_v_col_t* col;
+
+ col = reinterpret_cast<const dict_v_col_t*>(
+ ind_field->col);
+
+ innobase_get_computed_value(
+ row, col, clust_index, &local_heap,
+ heap, NULL, thd, maria_table, record, NULL,
+ NULL, NULL);
+ }
+ }
+
+func_exit:
+ if (local_heap) {
+ if (vcol_storage)
+ innobase_free_row_for_vcol(vcol_storage);
+ mem_heap_free(local_heap);
+ }
+}
+
+/** Build latest virtual column data from undo log
+@param[in] in_purge whether this is the purge thread
+@param[in] rec clustered index record
+@param[in] clust_index clustered index
+@param[in,out] clust_offsets offsets on the clustered index record
+@param[in] index the secondary index
+@param[in] roll_ptr the rollback pointer for the purging record
+@param[in] trx_id trx id for the purging record
+@param[in,out] v_heap heap used to build vrow
+@param[out] v_row dtuple holding the virtual rows
+@param[in,out] mtr mtr holding the latch on rec */
+static
+void
+row_vers_build_cur_vrow_low(
+ bool in_purge,
+ const rec_t* rec,
+ dict_index_t* clust_index,
+ offset_t* clust_offsets,
+ dict_index_t* index,
+ roll_ptr_t roll_ptr,
+ trx_id_t trx_id,
+ mem_heap_t* v_heap,
+ dtuple_t** vrow,
+ mtr_t* mtr)
+{
+ const rec_t* version;
+ rec_t* prev_version;
+ mem_heap_t* heap = NULL;
+ ulint num_v = dict_table_get_n_v_cols(index->table);
+ const dfield_t* field;
+ ulint i;
+ bool all_filled = false;
+
+ *vrow = dtuple_create_with_vcol(v_heap, 0, num_v);
+ dtuple_init_v_fld(*vrow);
+
+ for (i = 0; i < num_v; i++) {
+ dfield_get_type(dtuple_get_nth_v_field(*vrow, i))->mtype
+ = DATA_MISSING;
+ }
+
+ version = rec;
+
+ /* If this is called by purge thread, set TRX_UNDO_PREV_IN_PURGE
+ bit to search the undo log until we hit the current undo log with
+ roll_ptr */
+ const ulint status = in_purge
+ ? TRX_UNDO_PREV_IN_PURGE | TRX_UNDO_GET_OLD_V_VALUE
+ : TRX_UNDO_GET_OLD_V_VALUE;
+
+ while (!all_filled) {
+ mem_heap_t* heap2 = heap;
+ heap = mem_heap_create(1024);
+ roll_ptr_t cur_roll_ptr = row_get_rec_roll_ptr(
+ version, clust_index, clust_offsets);
+
+ trx_undo_prev_version_build(
+ rec, mtr, version, clust_index, clust_offsets,
+ heap, &prev_version, NULL, vrow, status);
+
+ if (heap2) {
+ mem_heap_free(heap2);
+ }
+
+ if (!prev_version) {
+ /* Versions end here */
+ break;
+ }
+
+ clust_offsets = rec_get_offsets(prev_version, clust_index,
+ NULL,
+ true, ULINT_UNDEFINED, &heap);
+
+ ulint entry_len = dict_index_get_n_fields(index);
+
+ all_filled = true;
+
+ for (i = 0; i < entry_len; i++) {
+ const dict_field_t* ind_field
+ = dict_index_get_nth_field(index, i);
+ const dict_col_t* col = ind_field->col;
+
+ if (!dict_col_is_virtual(col)) {
+ continue;
+ }
+
+ const dict_v_col_t* v_col
+ = reinterpret_cast<const dict_v_col_t*>(col);
+ field = dtuple_get_nth_v_field(*vrow, v_col->v_pos);
+
+ if (dfield_get_type(field)->mtype == DATA_MISSING) {
+ all_filled = false;
+ break;
+ }
+
+ }
+
+ trx_id_t rec_trx_id = row_get_rec_trx_id(
+ prev_version, clust_index, clust_offsets);
+
+ if (rec_trx_id < trx_id || roll_ptr == cur_roll_ptr) {
+ break;
+ }
+
+ version = prev_version;
+ }
+
+ mem_heap_free(heap);
+}
+
+/** Check a virtual column value index secondary virtual index matches
+that of current cluster index record, which is recreated from information
+stored in undo log
+@param[in] rec record in the clustered index
+@param[in] icentry the index entry built from a cluster row
+@param[in] clust_index cluster index
+@param[in] clust_offsets offsets on the cluster record
+@param[in] index the secondary index
+@param[in] ientry the secondary index entry
+@param[in] roll_ptr the rollback pointer for the purging record
+@param[in] trx_id trx id for the purging record
+@param[in,out] v_heap heap used to build virtual dtuple
+@param[in,out] v_row dtuple holding the virtual rows (if needed)
+@param[in] mtr mtr holding the latch on rec
+@return true if matches, false otherwise */
+static
+bool
+row_vers_vc_matches_cluster(
+ const rec_t* rec,
+ const dtuple_t* icentry,
+ dict_index_t* clust_index,
+ offset_t* clust_offsets,
+ dict_index_t* index,
+ const dtuple_t* ientry,
+ roll_ptr_t roll_ptr,
+ trx_id_t trx_id,
+ mem_heap_t* v_heap,
+ dtuple_t** vrow,
+ mtr_t* mtr)
+{
+ const rec_t* version;
+ rec_t* prev_version;
+ mem_heap_t* heap2;
+ mem_heap_t* heap = NULL;
+ mem_heap_t* tuple_heap;
+ ulint num_v = dict_table_get_n_v_cols(index->table);
+ bool compare[REC_MAX_N_FIELDS];
+ ulint n_fields = dtuple_get_n_fields(ientry);
+ ulint n_non_v_col = 0;
+ ulint n_cmp_v_col = 0;
+ const dfield_t* field1;
+ dfield_t* field2;
+ ulint i;
+
+ /* First compare non-virtual columns (primary keys) */
+ ut_ad(index->n_fields == n_fields);
+ ut_ad(n_fields == dtuple_get_n_fields(icentry));
+ {
+ const dfield_t* a = ientry->fields;
+ const dfield_t* b = icentry->fields;
+
+ for (const dict_field_t *ifield = index->fields,
+ *const end = &index->fields[index->n_fields];
+ ifield != end; ifield++, a++, b++) {
+ if (!dict_col_is_virtual(ifield->col)) {
+ if (cmp_dfield_dfield(a, b)) {
+ return false;
+ }
+ n_non_v_col++;
+ }
+ }
+ }
+
+ tuple_heap = mem_heap_create(1024);
+
+ ut_ad(n_fields > n_non_v_col);
+
+ *vrow = dtuple_create_with_vcol(v_heap ? v_heap : tuple_heap, 0, num_v);
+ dtuple_init_v_fld(*vrow);
+
+ for (i = 0; i < num_v; i++) {
+ dfield_get_type(dtuple_get_nth_v_field(*vrow, i))->mtype
+ = DATA_MISSING;
+ compare[i] = false;
+ }
+
+ version = rec;
+
+ while (n_cmp_v_col < n_fields - n_non_v_col) {
+ heap2 = heap;
+ heap = mem_heap_create(1024);
+ roll_ptr_t cur_roll_ptr = row_get_rec_roll_ptr(
+ version, clust_index, clust_offsets);
+
+ ut_ad(cur_roll_ptr != 0);
+ ut_ad(roll_ptr != 0);
+
+ trx_undo_prev_version_build(
+ rec, mtr, version, clust_index, clust_offsets,
+ heap, &prev_version, NULL, vrow,
+ TRX_UNDO_PREV_IN_PURGE | TRX_UNDO_GET_OLD_V_VALUE);
+
+ if (heap2) {
+ mem_heap_free(heap2);
+ }
+
+ if (!prev_version) {
+ /* Versions end here */
+ goto func_exit;
+ }
+
+ clust_offsets = rec_get_offsets(prev_version, clust_index,
+ NULL,
+ true, ULINT_UNDEFINED, &heap);
+
+ ulint entry_len = dict_index_get_n_fields(index);
+
+ for (i = 0; i < entry_len; i++) {
+ const dict_field_t* ind_field
+ = dict_index_get_nth_field(index, i);
+ const dict_col_t* col = ind_field->col;
+ field1 = dtuple_get_nth_field(ientry, i);
+
+ if (!dict_col_is_virtual(col)) {
+ continue;
+ }
+
+ const dict_v_col_t* v_col
+ = reinterpret_cast<const dict_v_col_t*>(col);
+ field2
+ = dtuple_get_nth_v_field(*vrow, v_col->v_pos);
+
+ if ((dfield_get_type(field2)->mtype != DATA_MISSING)
+ && (!compare[v_col->v_pos])) {
+
+ if (ind_field->prefix_len != 0
+ && !dfield_is_null(field2)
+ && field2->len > ind_field->prefix_len) {
+ field2->len = ind_field->prefix_len;
+ }
+
+ /* The index field mismatch */
+ if (v_heap
+ || cmp_dfield_dfield(field2, field1) != 0) {
+ if (v_heap) {
+ dtuple_dup_v_fld(*vrow, v_heap);
+ }
+
+ mem_heap_free(tuple_heap);
+ mem_heap_free(heap);
+ return(false);
+ }
+
+ compare[v_col->v_pos] = true;
+ n_cmp_v_col++;
+ }
+ }
+
+ trx_id_t rec_trx_id = row_get_rec_trx_id(
+ prev_version, clust_index, clust_offsets);
+
+ if (rec_trx_id < trx_id || roll_ptr == cur_roll_ptr) {
+ break;
+ }
+
+ version = prev_version;
+ }
+
+func_exit:
+ if (n_cmp_v_col == 0) {
+ *vrow = NULL;
+ }
+
+ mem_heap_free(tuple_heap);
+ mem_heap_free(heap);
+
+ /* FIXME: In the case of n_cmp_v_col is not the same as
+ n_fields - n_non_v_col, callback is needed to compare the rest
+ columns. At the timebeing, we will need to return true */
+ return (true);
+}
+
+/** Build a dtuple contains virtual column data for current cluster index
+@param[in] in_purge called by purge thread
+@param[in] rec cluster index rec
+@param[in] clust_index cluster index
+@param[in] clust_offsets cluster rec offset
+@param[in] index secondary index
+@param[in] ientry secondary index rec
+@param[in] roll_ptr roll_ptr for the purge record
+@param[in] trx_id transaction ID on the purging record
+@param[in,out] heap heap memory
+@param[in,out] v_heap heap memory to keep virtual colum dtuple
+@param[in] mtr mtr holding the latch on rec
+@param[in,out] vcol_info virtual column information for purge thread
+@return dtuple contains virtual column data */
+static
+dtuple_t*
+row_vers_build_cur_vrow(
+ bool in_purge,
+ const rec_t* rec,
+ dict_index_t* clust_index,
+ offset_t** clust_offsets,
+ dict_index_t* index,
+ const dtuple_t* ientry,
+ roll_ptr_t roll_ptr,
+ trx_id_t trx_id,
+ mem_heap_t* heap,
+ mem_heap_t* v_heap,
+ mtr_t* mtr,
+ purge_vcol_info_t* vcol_info)
+{
+ dtuple_t* cur_vrow = NULL;
+
+ roll_ptr_t t_roll_ptr = row_get_rec_roll_ptr(
+ rec, clust_index, *clust_offsets);
+
+ /* if the row is newly inserted, then the virtual
+ columns need to be computed */
+ if (trx_undo_roll_ptr_is_insert(t_roll_ptr)) {
+
+ ut_ad(!rec_get_deleted_flag(rec, page_rec_is_comp(rec)));
+
+ /* This is a newly inserted record and cannot
+ be deleted, So the externally stored field
+ cannot be freed yet. */
+ dtuple_t* row = row_build(ROW_COPY_POINTERS, clust_index,
+ rec, *clust_offsets,
+ NULL, NULL, NULL, NULL, heap);
+
+ if (vcol_info && !vcol_info->is_used()) {
+ mtr->commit();
+ }
+
+ row_vers_build_clust_v_col(
+ row, clust_index, index, heap, vcol_info);
+
+ if (vcol_info != NULL && vcol_info->is_first_fetch()) {
+ return NULL;
+ }
+
+ cur_vrow = dtuple_copy(row, v_heap);
+ dtuple_dup_v_fld(cur_vrow, v_heap);
+ } else {
+ /* Try to fetch virtual column data from undo log */
+ row_vers_build_cur_vrow_low(
+ in_purge, rec, clust_index, *clust_offsets,
+ index, roll_ptr, trx_id, v_heap, &cur_vrow, mtr);
+ }
+
+ *clust_offsets = rec_get_offsets(rec, clust_index, NULL, true,
+ ULINT_UNDEFINED, &heap);
+ return(cur_vrow);
+}
+
+/** Finds out if a version of the record, where the version >= the current
purge view, should have ientry as its secondary index entry. We check
if there is any not delete marked version of the record where the trx
-id >= purge view, and the secondary index entry and ientry are identified in
-the alphabetical ordering; exactly in this case we return TRUE.
-@return TRUE if earlier version should have */
-UNIV_INTERN
-ibool
+id >= purge view, and the secondary index entry == ientry; exactly in
+this case we return TRUE.
+@param[in] also_curr TRUE if also rec is included in the versions
+ to search; otherwise only versions prior
+ to it are searched
+@param[in] rec record in the clustered index; the caller
+ must have a latch on the page
+@param[in] mtr mtr holding the latch on rec; it will
+ also hold the latch on purge_view
+@param[in] index secondary index
+@param[in] ientry secondary index entry
+@param[in] roll_ptr roll_ptr for the purge record
+@param[in] trx_id transaction ID on the purging record
+@param[in,out] vcol_info virtual column information for purge thread.
+@return TRUE if earlier version should have */
+bool
row_vers_old_has_index_entry(
-/*=========================*/
- ibool also_curr,/*!< in: TRUE if also rec is included in the
- versions to search; otherwise only versions
- prior to it are searched */
- const rec_t* rec, /*!< in: record in the clustered index; the
- caller must have a latch on the page */
- mtr_t* mtr, /*!< in: mtr holding the latch on rec; it will
- also hold the latch on purge_view */
- dict_index_t* index, /*!< in: the secondary index */
- const dtuple_t* ientry) /*!< in: the secondary index entry */
+ bool also_curr,
+ const rec_t* rec,
+ mtr_t* mtr,
+ dict_index_t* index,
+ const dtuple_t* ientry,
+ roll_ptr_t roll_ptr,
+ trx_id_t trx_id,
+ purge_vcol_info_t* vcol_info)
{
const rec_t* version;
rec_t* prev_version;
dict_index_t* clust_index;
- ulint* clust_offsets;
+ offset_t* clust_offsets;
mem_heap_t* heap;
mem_heap_t* heap2;
- const dtuple_t* row;
+ dtuple_t* row;
const dtuple_t* entry;
ulint comp;
+ dtuple_t* vrow = NULL;
+ mem_heap_t* v_heap = NULL;
+ dtuple_t* cur_vrow = NULL;
- ut_ad(mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_X_FIX)
- || mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_S_FIX));
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(!rw_lock_own(&(purge_sys->latch), RW_LOCK_SHARED));
-#endif /* UNIV_SYNC_DEBUG */
+ ut_ad(mtr_memo_contains_page_flagged(mtr, rec, MTR_MEMO_PAGE_X_FIX
+ | MTR_MEMO_PAGE_S_FIX));
+ ut_ad(!rw_lock_own(&(purge_sys->latch), RW_LOCK_S));
+ ut_ad(also_curr || !vcol_info);
clust_index = dict_table_get_first_index(index->table);
comp = page_rec_is_comp(rec);
ut_ad(!dict_table_is_comp(index->table) == !comp);
heap = mem_heap_create(1024);
- clust_offsets = rec_get_offsets(rec, clust_index, NULL,
+ clust_offsets = rec_get_offsets(rec, clust_index, NULL, true,
ULINT_UNDEFINED, &heap);
+ if (dict_index_has_virtual(index)) {
+ v_heap = mem_heap_create(100);
+ }
+
+ DBUG_EXECUTE_IF("ib_purge_virtual_index_crash",
+ DBUG_SUICIDE(););
+
if (also_curr && !rec_get_deleted_flag(rec, comp)) {
row_ext_t* ext;
@@ -405,34 +936,119 @@ row_vers_old_has_index_entry(
row = row_build(ROW_COPY_POINTERS, clust_index,
rec, clust_offsets,
NULL, NULL, NULL, &ext, heap);
- entry = row_build_index_entry(row, ext, index, heap);
- /* If entry == NULL, the record contains unset BLOB
- pointers. This must be a freshly inserted record. If
- this is called from
- row_purge_remove_sec_if_poss_low(), the thread will
- hold latches on the clustered index and the secondary
- index. Because the insert works in three steps:
-
- (1) insert the record to clustered index
- (2) store the BLOBs and update BLOB pointers
- (3) insert records to secondary indexes
-
- the purge thread can safely ignore freshly inserted
- records and delete the secondary index record. The
- thread that inserted the new record will be inserting
- the secondary index records. */
-
- /* NOTE that we cannot do the comparison as binary
- fields because the row is maybe being modified so that
- the clustered index record has already been updated to
- a different binary value in a char field, but the
- collation identifies the old and new value anyway! */
- if (entry && !dtuple_coll_cmp(ientry, entry)) {
+ if (dict_index_has_virtual(index)) {
+
+
+#ifdef DBUG_OFF
+# define dbug_v_purge false
+#else /* DBUG_OFF */
+ bool dbug_v_purge = false;
+#endif /* DBUG_OFF */
+
+ DBUG_EXECUTE_IF(
+ "ib_purge_virtual_index_callback",
+ dbug_v_purge = true;);
+
+ roll_ptr_t t_roll_ptr = row_get_rec_roll_ptr(
+ rec, clust_index, clust_offsets);
+
+ /* if the row is newly inserted, then the virtual
+ columns need to be computed */
+ if (trx_undo_roll_ptr_is_insert(t_roll_ptr)
+ || dbug_v_purge) {
+
+ if (vcol_info && !vcol_info->is_used()) {
+ mtr->commit();
+ }
+
+ row_vers_build_clust_v_col(
+ row, clust_index, index, heap,
+ vcol_info);
+
+ if (vcol_info && vcol_info->is_first_fetch()) {
+ goto unsafe_to_purge;
+ }
+
+ entry = row_build_index_entry(
+ row, ext, index, heap);
+ if (entry && !dtuple_coll_cmp(ientry, entry)) {
+ goto unsafe_to_purge;
+ }
+ } else {
+ /* Build index entry out of row */
+ entry = row_build_index_entry(row, ext, index, heap);
+ /* entry could only be NULL if
+ the clustered index record is an uncommitted
+ inserted record whose BLOBs have not been
+ written yet. The secondary index record
+ can be safely removed, because it cannot
+ possibly refer to this incomplete
+ clustered index record. (Insert would
+ always first be completed for the
+ clustered index record, then proceed to
+ secondary indexes.) */
+
+ if (entry && row_vers_vc_matches_cluster(
+ rec, entry,
+ clust_index, clust_offsets,
+ index, ientry, roll_ptr,
+ trx_id, NULL, &vrow, mtr)) {
+ goto unsafe_to_purge;
+ }
+ }
+ clust_offsets = rec_get_offsets(rec, clust_index, NULL,
+ true,
+ ULINT_UNDEFINED, &heap);
+ } else {
+
+ entry = row_build_index_entry(
+ row, ext, index, heap);
+
+ /* If entry == NULL, the record contains unset BLOB
+ pointers. This must be a freshly inserted record. If
+ this is called from
+ row_purge_remove_sec_if_poss_low(), the thread will
+ hold latches on the clustered index and the secondary
+ index. Because the insert works in three steps:
+
+ (1) insert the record to clustered index
+ (2) store the BLOBs and update BLOB pointers
+ (3) insert records to secondary indexes
+
+ the purge thread can safely ignore freshly inserted
+ records and delete the secondary index record. The
+ thread that inserted the new record will be inserting
+ the secondary index records. */
- mem_heap_free(heap);
+ /* NOTE that we cannot do the comparison as binary
+ fields because the row is maybe being modified so that
+ the clustered index record has already been updated to
+ a different binary value in a char field, but the
+ collation identifies the old and new value anyway! */
+ if (entry && !dtuple_coll_cmp(ientry, entry)) {
+unsafe_to_purge:
+ mem_heap_free(heap);
- return(TRUE);
+ if (v_heap) {
+ mem_heap_free(v_heap);
+ }
+ return true;
+ }
+ }
+ } else if (dict_index_has_virtual(index)) {
+ /* The current cluster index record could be
+ deleted, but the previous version of it might not. We will
+ need to get the virtual column data from undo record
+ associated with current cluster index */
+
+ cur_vrow = row_vers_build_cur_vrow(
+ also_curr, rec, clust_index, &clust_offsets,
+ index, ientry, roll_ptr, trx_id, heap, v_heap, mtr,
+ vcol_info);
+
+ if (vcol_info && vcol_info->is_first_fetch()) {
+ goto unsafe_to_purge;
}
}
@@ -441,21 +1057,46 @@ row_vers_old_has_index_entry(
for (;;) {
heap2 = heap;
heap = mem_heap_create(1024);
+ vrow = NULL;
+
trx_undo_prev_version_build(rec, mtr, version,
clust_index, clust_offsets,
- heap, &prev_version);
+ heap, &prev_version, NULL,
+ dict_index_has_virtual(index)
+ ? &vrow : NULL, 0);
mem_heap_free(heap2); /* free version and clust_offsets */
if (!prev_version) {
/* Versions end here */
-
mem_heap_free(heap);
- return(FALSE);
+ if (v_heap) {
+ mem_heap_free(v_heap);
+ }
+
+ return false;
}
clust_offsets = rec_get_offsets(prev_version, clust_index,
- NULL, ULINT_UNDEFINED, &heap);
+ NULL, true,
+ ULINT_UNDEFINED, &heap);
+
+ if (dict_index_has_virtual(index)) {
+ if (vrow) {
+ /* Keep the virtual row info for the next
+ version, unless it is changed */
+ mem_heap_empty(v_heap);
+ cur_vrow = dtuple_copy(vrow, v_heap);
+ dtuple_dup_v_fld(cur_vrow, v_heap);
+ }
+
+ if (!cur_vrow) {
+ /* Nothing for this index has changed,
+ continue */
+ version = prev_version;
+ continue;
+ }
+ }
if (!rec_get_deleted_flag(prev_version, comp)) {
row_ext_t* ext;
@@ -466,6 +1107,13 @@ row_vers_old_has_index_entry(
row = row_build(ROW_COPY_POINTERS, clust_index,
prev_version, clust_offsets,
NULL, NULL, NULL, &ext, heap);
+
+ if (dict_index_has_virtual(index)) {
+ ut_ad(cur_vrow);
+ ut_ad(row->n_v_fields == cur_vrow->n_v_fields);
+ dtuple_copy_v_fields(row, cur_vrow);
+ }
+
entry = row_build_index_entry(row, ext, index, heap);
/* If entry == NULL, the record contains unset
@@ -481,10 +1129,7 @@ row_vers_old_has_index_entry(
and new value anyway! */
if (entry && !dtuple_coll_cmp(ientry, entry)) {
-
- mem_heap_free(heap);
-
- return(TRUE);
+ goto unsafe_to_purge;
}
}
@@ -496,8 +1141,7 @@ row_vers_old_has_index_entry(
Constructs the version of a clustered index record which a consistent
read should see. We assume that the trx id stored in rec is such that
the consistent read should not see rec in its present version.
-@return DB_SUCCESS or DB_MISSING_HISTORY */
-UNIV_INTERN
+@return DB_SUCCESS or DB_MISSING_HISTORY */
dberr_t
row_vers_build_for_consistent_read(
/*===============================*/
@@ -507,19 +1151,20 @@ row_vers_build_for_consistent_read(
of this records */
mtr_t* mtr, /*!< in: mtr holding the latch on rec */
dict_index_t* index, /*!< in: the clustered index */
- ulint** offsets,/*!< in/out: offsets returned by
+ offset_t** offsets,/*!< in/out: offsets returned by
rec_get_offsets(rec, index) */
- read_view_t* view, /*!< in: the consistent read view */
+ ReadView* view, /*!< in: the consistent read view */
mem_heap_t** offset_heap,/*!< in/out: memory heap from which
the offsets are allocated */
mem_heap_t* in_heap,/*!< in: memory heap from which the memory for
*old_vers is allocated; memory for possible
intermediate versions is allocated and freed
locally within the function */
- rec_t** old_vers)/*!< out, own: old version, or NULL
+ rec_t** old_vers,/*!< out, own: old version, or NULL
if the history is missing or the record
does not exist in the view, that is,
it was freshly inserted afterwards */
+ dtuple_t** vrow) /*!< out: virtual row */
{
const rec_t* version;
rec_t* prev_version;
@@ -529,77 +1174,52 @@ row_vers_build_for_consistent_read(
dberr_t err;
ut_ad(dict_index_is_clust(index));
- ut_ad(mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_X_FIX)
- || mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_S_FIX));
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(!rw_lock_own(&(purge_sys->latch), RW_LOCK_SHARED));
-#endif /* UNIV_SYNC_DEBUG */
+ ut_ad(mtr_memo_contains_page_flagged(mtr, rec, MTR_MEMO_PAGE_X_FIX
+ | MTR_MEMO_PAGE_S_FIX));
+ ut_ad(!rw_lock_own(&(purge_sys->latch), RW_LOCK_S));
ut_ad(rec_offs_validate(rec, index, *offsets));
trx_id = row_get_rec_trx_id(rec, index, *offsets);
- ut_ad(!read_view_sees_trx_id(view, trx_id));
+ ut_ad(!view->changes_visible(trx_id, index->table->name));
+
+ ut_ad(!vrow || !(*vrow));
version = rec;
for (;;) {
- mem_heap_t* heap2 = heap;
- trx_undo_rec_t* undo_rec;
- roll_ptr_t roll_ptr;
- undo_no_t undo_no;
+ mem_heap_t* prev_heap = heap;
+
heap = mem_heap_create(1024);
- /* If we have high-granularity consistent read view and
- creating transaction of the view is the same as trx_id in
- the record we see this record only in the case when
- undo_no of the record is < undo_no in the view. */
+ if (vrow) {
+ *vrow = NULL;
+ }
- if (view->type == VIEW_HIGH_GRANULARITY
- && view->creator_trx_id == trx_id) {
+ /* If purge can't see the record then we can't rely on
+ the UNDO log record. */
- roll_ptr = row_get_rec_roll_ptr(version, index,
- *offsets);
- undo_rec = trx_undo_get_undo_rec_low(roll_ptr, heap);
- undo_no = trx_undo_rec_get_undo_no(undo_rec);
- mem_heap_empty(heap);
+ bool purge_sees = trx_undo_prev_version_build(
+ rec, mtr, version, index, *offsets, heap,
+ &prev_version, NULL, vrow, 0);
- if (view->undo_no > undo_no) {
- /* The view already sees this version: we can
- copy it to in_heap and return */
+ err = (purge_sees) ? DB_SUCCESS : DB_MISSING_HISTORY;
-#if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG
- ut_a(!rec_offs_any_null_extern(
- version, *offsets));
-#endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */
-
- buf = static_cast<byte*>(mem_heap_alloc(
- in_heap, rec_offs_size(*offsets)));
-
- *old_vers = rec_copy(buf, version, *offsets);
- rec_offs_make_valid(*old_vers, index,
- *offsets);
- err = DB_SUCCESS;
- break;
- }
- }
-
- err = trx_undo_prev_version_build(rec, mtr, version, index,
- *offsets, heap,
- &prev_version)
- ? DB_SUCCESS : DB_MISSING_HISTORY;
- if (heap2) {
- mem_heap_free(heap2); /* free version */
+ if (prev_heap != NULL) {
+ mem_heap_free(prev_heap);
}
if (prev_version == NULL) {
/* It was a freshly inserted version */
*old_vers = NULL;
+ ut_ad(!vrow || !(*vrow));
break;
}
- *offsets = rec_get_offsets(prev_version, index, *offsets,
- ULINT_UNDEFINED, offset_heap);
+ *offsets = rec_get_offsets(
+ prev_version, index, *offsets,
+ true, ULINT_UNDEFINED, offset_heap);
#if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG
ut_a(!rec_offs_any_null_extern(prev_version, *offsets));
@@ -607,7 +1227,7 @@ row_vers_build_for_consistent_read(
trx_id = row_get_rec_trx_id(prev_version, index, *offsets);
- if (read_view_sees_trx_id(view, trx_id)) {
+ if (view->changes_visible(trx_id, index->table->name)) {
/* The view already sees this version: we can copy
it to in_heap and return */
@@ -618,11 +1238,16 @@ row_vers_build_for_consistent_read(
*old_vers = rec_copy(buf, prev_version, *offsets);
rec_offs_make_valid(*old_vers, index, *offsets);
+
+ if (vrow && *vrow) {
+ *vrow = dtuple_copy(*vrow, in_heap);
+ dtuple_dup_v_fld(*vrow, in_heap);
+ }
break;
}
version = prev_version;
- }/* for (;;) */
+ }
mem_heap_free(heap);
@@ -632,7 +1257,6 @@ row_vers_build_for_consistent_read(
/*****************************************************************//**
Constructs the last committed version of a clustered index record,
which should be seen by a semi-consistent read. */
-UNIV_INTERN
void
row_vers_build_for_semi_consistent_read(
/*====================================*/
@@ -642,7 +1266,7 @@ row_vers_build_for_semi_consistent_read(
of this records */
mtr_t* mtr, /*!< in: mtr holding the latch on rec */
dict_index_t* index, /*!< in: the clustered index */
- ulint** offsets,/*!< in/out: offsets returned by
+ offset_t** offsets,/*!< in/out: offsets returned by
rec_get_offsets(rec, index) */
mem_heap_t** offset_heap,/*!< in/out: memory heap from which
the offsets are allocated */
@@ -650,9 +1274,11 @@ row_vers_build_for_semi_consistent_read(
*old_vers is allocated; memory for possible
intermediate versions is allocated and freed
locally within the function */
- const rec_t** old_vers)/*!< out: rec, old version, or NULL if the
+ const rec_t** old_vers,/*!< out: rec, old version, or NULL if the
record does not exist in the view, that is,
it was freshly inserted afterwards */
+ dtuple_t** vrow) /*!< out: virtual row, old version, or NULL
+ if it is not updated in the view */
{
const rec_t* version;
mem_heap_t* heap = NULL;
@@ -660,15 +1286,14 @@ row_vers_build_for_semi_consistent_read(
trx_id_t rec_trx_id = 0;
ut_ad(dict_index_is_clust(index));
- ut_ad(mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_X_FIX)
- || mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_S_FIX));
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(!rw_lock_own(&(purge_sys->latch), RW_LOCK_SHARED));
-#endif /* UNIV_SYNC_DEBUG */
+ ut_ad(mtr_memo_contains_page_flagged(mtr, rec, MTR_MEMO_PAGE_X_FIX
+ | MTR_MEMO_PAGE_S_FIX));
+ ut_ad(!rw_lock_own(&(purge_sys->latch), RW_LOCK_S));
ut_ad(rec_offs_validate(rec, index, *offsets));
version = rec;
+ ut_ad(!vrow || !(*vrow));
for (;;) {
const trx_t* version_trx;
@@ -681,7 +1306,7 @@ row_vers_build_for_semi_consistent_read(
rec_trx_id = version_trx_id;
}
- mutex_enter(&trx_sys->mutex);
+ trx_sys_mutex_enter();
version_trx = trx_get_rw_trx_by_id(version_trx_id);
/* Because version_trx is a read-write transaction,
its state cannot change from or to NOT_STARTED while
@@ -692,7 +1317,7 @@ row_vers_build_for_semi_consistent_read(
TRX_STATE_COMMITTED_IN_MEMORY)) {
version_trx = NULL;
}
- mutex_exit(&trx_sys->mutex);
+ trx_sys_mutex_exit();
if (!version_trx) {
committed_version_trx:
@@ -705,6 +1330,9 @@ committed_version_trx:
if (rec == version) {
*old_vers = rec;
+ if (vrow) {
+ *vrow = NULL;
+ }
break;
}
@@ -722,6 +1350,7 @@ committed_version_trx:
version = rec;
*offsets = rec_get_offsets(version,
index, *offsets,
+ true,
ULINT_UNDEFINED,
offset_heap);
}
@@ -732,6 +1361,10 @@ committed_version_trx:
*old_vers = rec_copy(buf, version, *offsets);
rec_offs_make_valid(*old_vers, index, *offsets);
+ if (vrow && *vrow) {
+ *vrow = dtuple_copy(*vrow, in_heap);
+ dtuple_dup_v_fld(*vrow, in_heap);
+ }
break;
}
@@ -742,7 +1375,8 @@ committed_version_trx:
if (!trx_undo_prev_version_build(rec, mtr, version, index,
*offsets, heap,
- &prev_version)) {
+ &prev_version,
+ in_heap, vrow, 0)) {
mem_heap_free(heap);
heap = heap2;
heap2 = NULL;
@@ -756,11 +1390,12 @@ committed_version_trx:
if (prev_version == NULL) {
/* It was a freshly inserted version */
*old_vers = NULL;
+ ut_ad(!vrow || !(*vrow));
break;
}
version = prev_version;
- *offsets = rec_get_offsets(version, index, *offsets,
+ *offsets = rec_get_offsets(version, index, *offsets, true,
ULINT_UNDEFINED, offset_heap);
#if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG
ut_a(!rec_offs_any_null_extern(version, *offsets));
diff --git a/storage/innobase/snappy.cmake b/storage/innobase/snappy.cmake
new file mode 100644
index 00000000000..2810472cef6
--- /dev/null
+++ b/storage/innobase/snappy.cmake
@@ -0,0 +1,32 @@
+# Copyright (C) 2015, MariaDB Corporation. All Rights Reserved.
+#
+# This program is free software; you can redistribute it and/or modify it under
+# the terms of the GNU General Public License as published by the Free Software
+# Foundation; version 2 of the License.
+#
+# This program is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along with
+# this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA
+
+SET(WITH_INNODB_SNAPPY AUTO CACHE STRING
+ "Build with snappy. Possible values are 'ON', 'OFF', 'AUTO' and default is 'AUTO'")
+
+MACRO (MYSQL_CHECK_SNAPPY)
+ IF (WITH_INNODB_SNAPPY STREQUAL "ON" OR WITH_INNODB_SNAPPY STREQUAL "AUTO")
+ CHECK_INCLUDE_FILES(snappy-c.h HAVE_SNAPPY_H)
+ CHECK_LIBRARY_EXISTS(snappy snappy_uncompress "" HAVE_SNAPPY_SHARED_LIB)
+
+ IF(HAVE_SNAPPY_SHARED_LIB AND HAVE_SNAPPY_H)
+ ADD_DEFINITIONS(-DHAVE_SNAPPY=1)
+ LINK_LIBRARIES(snappy)
+ ELSE()
+ IF (WITH_INNODB_SNAPPY STREQUAL "ON")
+ MESSAGE(FATAL_ERROR "Required snappy library is not found")
+ ENDIF()
+ ENDIF()
+ ENDIF()
+ENDMACRO()
diff --git a/storage/innobase/srv/srv0conc.cc b/storage/innobase/srv/srv0conc.cc
index bf57c308acf..9936635a055 100644
--- a/storage/innobase/srv/srv0conc.cc
+++ b/storage/innobase/srv/srv0conc.cc
@@ -1,7 +1,7 @@
/*****************************************************************************
-Copyright (c) 2011, 2012, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2017, 2020, MariaDB Corporation.
+Copyright (c) 2011, 2015, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2015, 2020, MariaDB Corporation.
Portions of this file contain modifications contributed and copyrighted by
Google, Inc. Those modifications are gratefully acknowledged and are described
@@ -39,141 +39,50 @@ Created 2011/04/18 Sunny Bains
*******************************************************/
#include "srv0srv.h"
-#include "sync0sync.h"
#include "trx0trx.h"
-
-#include <mysql/plugin.h>
+#include "row0mysql.h"
+#include "dict0dict.h"
+#include <mysql/service_thd_wait.h>
#include <mysql/service_wsrep.h>
/** Number of times a thread is allowed to enter InnoDB within the same
SQL query after it has once got the ticket. */
-UNIV_INTERN ulong srv_n_free_tickets_to_enter = 500;
+ulong srv_n_free_tickets_to_enter = 500;
-#ifdef HAVE_ATOMIC_BUILTINS
/** Maximum sleep delay (in micro-seconds), value of 0 disables it. */
-UNIV_INTERN ulong srv_adaptive_max_sleep_delay = 150000;
-#endif /* HAVE_ATOMIC_BUILTINS */
+ulong srv_adaptive_max_sleep_delay = 150000;
-UNIV_INTERN ulong srv_thread_sleep_delay = 10000;
+ulong srv_thread_sleep_delay = 10000;
/** We are prepared for a situation that we have this many threads waiting for
a semaphore inside InnoDB. innobase_start_or_create_for_mysql() sets the
value. */
-UNIV_INTERN ulint srv_max_n_threads = 0;
+ulint srv_max_n_threads = 0;
/** The following controls how many threads we let inside InnoDB concurrently:
threads waiting for locks are not counted into the number because otherwise
we could get a deadlock. Value of 0 will disable the concurrency check. */
-UNIV_INTERN ulong srv_thread_concurrency = 0;
-
-#ifndef HAVE_ATOMIC_BUILTINS
-
-/** This mutex protects srv_conc data structures */
-static os_fast_mutex_t srv_conc_mutex;
-
-/** Concurrency list node */
-typedef UT_LIST_NODE_T(struct srv_conc_slot_t) srv_conc_node_t;
-
-/** Slot for a thread waiting in the concurrency control queue. */
-struct srv_conc_slot_t{
- os_event_t event; /*!< event to wait for;
- os_event_set() and os_event_reset()
- are protected by srv_conc_mutex */
- ibool reserved; /*!< TRUE if slot
- reserved */
- ibool wait_ended; /*!< TRUE when another thread has
- already set the event and the thread
- in this slot is free to proceed; but
- reserved may still be TRUE at that
- point */
- srv_conc_node_t srv_conc_queue; /*!< queue node */
-#ifdef WITH_WSREP
- void *thd; /*!< to see priority */
-#endif
-};
-
-/** Queue of threads waiting to get in */
-typedef UT_LIST_BASE_NODE_T(srv_conc_slot_t) srv_conc_queue_t;
-
-static srv_conc_queue_t srv_conc_queue;
-
-/** Array of wait slots */
-static srv_conc_slot_t* srv_conc_slots;
-
-#if defined(UNIV_PFS_MUTEX)
-/* Key to register srv_conc_mutex_key with performance schema */
-UNIV_INTERN mysql_pfs_key_t srv_conc_mutex_key;
-#endif /* UNIV_PFS_MUTEX */
-
-#endif /* !HAVE_ATOMIC_BUILTINS */
+ulong srv_thread_concurrency = 0;
/** Variables tracking the active and waiting threads. */
struct srv_conc_t {
- char pad[64 - (sizeof(ulint) + sizeof(lint))];
-
- /** Number of transactions that have declared_to_be_inside_innodb set.
- It used to be a non-error for this value to drop below zero temporarily.
- This is no longer true. We'll, however, keep the lint datatype to add
- assertions to catch any corner cases that we may have missed. */
+ char pad[CACHE_LINE_SIZE - (sizeof(ulint) + sizeof(lint))];
- volatile lint n_active;
+ /** Number of transactions that have declared_to_be_inside_innodb */
+ ulint n_active;
/** Number of OS threads waiting in the FIFO for permission to
enter InnoDB */
- volatile lint n_waiting;
+ ulint n_waiting;
};
/* Control variables for tracking concurrency. */
static srv_conc_t srv_conc;
/*********************************************************************//**
-Initialise the concurrency management data structures */
-void
-srv_conc_init(void)
-/*===============*/
-{
-#ifndef HAVE_ATOMIC_BUILTINS
- ulint i;
-
- /* Init the server concurrency restriction data structures */
-
- os_fast_mutex_init(srv_conc_mutex_key, &srv_conc_mutex);
-
- UT_LIST_INIT(srv_conc_queue);
-
- srv_conc_slots = static_cast<srv_conc_slot_t*>(
- mem_zalloc(OS_THREAD_MAX_N * sizeof(*srv_conc_slots)));
-
- for (i = 0; i < OS_THREAD_MAX_N; i++) {
- srv_conc_slot_t* conc_slot = &srv_conc_slots[i];
-
- conc_slot->event = os_event_create();
- ut_a(conc_slot->event);
-#ifdef WITH_WSREP
- conc_slot->thd = NULL;
-#endif /* WITH_WSREP */
- }
-#endif /* !HAVE_ATOMIC_BUILTINS */
-}
-
-/*********************************************************************//**
-Free the concurrency management data structures */
-void
-srv_conc_free(void)
-/*===============*/
-{
-#ifndef HAVE_ATOMIC_BUILTINS
- os_fast_mutex_free(&srv_conc_mutex);
- mem_free(srv_conc_slots);
- srv_conc_slots = NULL;
-#endif /* !HAVE_ATOMIC_BUILTINS */
-}
-
-#ifdef HAVE_ATOMIC_BUILTINS
-/*********************************************************************//**
Note that a user thread is entering InnoDB. */
static
void
@@ -214,20 +123,34 @@ srv_conc_enter_innodb_with_atomics(
ulint sleep_in_us;
#ifdef WITH_WSREP
if (trx->is_wsrep() && wsrep_trx_is_aborting(trx->mysql_thd)) {
- if (wsrep_debug)
- fprintf(stderr,
- "srv_conc_enter due to MUST_ABORT");
+ if (wsrep_debug) {
+ ib::info() <<
+ "srv_conc_enter due to MUST_ABORT";
+ }
srv_conc_force_enter_innodb(trx);
return;
}
#endif /* WITH_WSREP */
- if (srv_conc.n_active < (lint) srv_thread_concurrency) {
+ if (srv_thread_concurrency == 0) {
+
+ if (notified_mysql) {
+
+ (void) my_atomic_addlint(
+ &srv_conc.n_waiting, -1);
+
+ thd_wait_end(trx->mysql_thd);
+ }
+
+ return;
+ }
+
+ if (srv_conc.n_active < srv_thread_concurrency) {
ulint n_active;
/* Check if there are any free tickets. */
- n_active = os_atomic_increment_lint(
- &srv_conc.n_active, 1);
+ n_active = my_atomic_addlint(
+ &srv_conc.n_active, 1) + 1;
if (n_active <= srv_thread_concurrency) {
@@ -235,8 +158,8 @@ srv_conc_enter_innodb_with_atomics(
if (notified_mysql) {
- (void) os_atomic_decrement_lint(
- &srv_conc.n_waiting, 1);
+ (void) my_atomic_addlint(
+ &srv_conc.n_waiting, -1);
thd_wait_end(trx->mysql_thd);
}
@@ -259,26 +182,20 @@ srv_conc_enter_innodb_with_atomics(
/* Since there were no free seats, we relinquish
the overbooked ticket. */
- (void) os_atomic_decrement_lint(
- &srv_conc.n_active, 1);
+ (void) my_atomic_addlint(
+ &srv_conc.n_active, -1);
}
if (!notified_mysql) {
- (void) os_atomic_increment_lint(
+ (void) my_atomic_addlint(
&srv_conc.n_waiting, 1);
- /* Release possible search system latch this
- thread has */
-
- if (trx->has_search_latch) {
- trx_search_latch_release_if_reserved(trx);
- }
-
thd_wait_begin(trx->mysql_thd, THD_WAIT_USER_LOCK);
notified_mysql = TRUE;
}
+ DEBUG_SYNC_C("user_thread_waiting");
trx->op_info = "sleeping before entering InnoDB";
sleep_in_us = srv_thread_sleep_delay;
@@ -315,294 +232,41 @@ srv_conc_exit_innodb_with_atomics(
trx->n_tickets_to_enter_innodb = 0;
trx->declared_to_be_inside_innodb = FALSE;
- (void) os_atomic_decrement_lint(&srv_conc.n_active, 1);
-}
-#else
-/*********************************************************************//**
-Note that a user thread is leaving InnoDB code. */
-static
-void
-srv_conc_exit_innodb_without_atomics(
-/*=================================*/
- trx_t* trx) /*!< in/out: transaction */
-{
- srv_conc_slot_t* slot;
-
- os_fast_mutex_lock(&srv_conc_mutex);
-
- ut_ad(srv_conc.n_active > 0);
- srv_conc.n_active--;
- trx->declared_to_be_inside_innodb = FALSE;
- trx->n_tickets_to_enter_innodb = 0;
-
- slot = NULL;
-
- if (srv_conc.n_active < (lint) srv_thread_concurrency) {
-#ifdef WITH_WSREP
- srv_conc_slot_t* wsrep_slot;
-#endif
- /* Look for a slot where a thread is waiting and no other
- thread has yet released the thread */
-
- for (slot = UT_LIST_GET_FIRST(srv_conc_queue);
- slot != NULL && slot->wait_ended == TRUE;
- slot = UT_LIST_GET_NEXT(srv_conc_queue, slot)) {
-
- /* No op */
- }
-
-#ifdef WITH_WSREP
- /* look for aborting trx, they must be released asap */
- wsrep_slot= slot;
- while (wsrep_slot && (wsrep_slot->wait_ended == TRUE ||
- !wsrep_trx_is_aborting(wsrep_slot->thd))) {
- wsrep_slot = UT_LIST_GET_NEXT(srv_conc_queue, wsrep_slot);
- }
- if (wsrep_slot) {
- slot = wsrep_slot;
- if (wsrep_debug)
- fprintf(stderr, "WSREP: releasing aborting thd\n");
- }
-#endif
- if (slot != NULL) {
- slot->wait_ended = TRUE;
-
- /* We increment the count on behalf of the released
- thread */
-
- srv_conc.n_active++;
- }
- }
-
- if (slot != NULL) {
- os_event_set(slot->event);
- }
-
- os_fast_mutex_unlock(&srv_conc_mutex);
+ (void) my_atomic_addlint(&srv_conc.n_active, -1);
}
/*********************************************************************//**
-Handle the scheduling of a user thread that wants to enter InnoDB. */
-static
-void
-srv_conc_enter_innodb_without_atomics(
-/*==================================*/
- trx_t* trx) /*!< in/out: transaction that wants
- to enter InnoDB */
-{
- ulint i;
- srv_conc_slot_t* slot = NULL;
- ibool has_slept = FALSE;
-
- os_fast_mutex_lock(&srv_conc_mutex);
-retry:
- if (UNIV_UNLIKELY(trx->declared_to_be_inside_innodb)) {
- os_fast_mutex_unlock(&srv_conc_mutex);
- ut_print_timestamp(stderr);
- fputs(" InnoDB: Error: trying to declare trx"
- " to enter InnoDB, but\n"
- "InnoDB: it already is declared.\n", stderr);
- trx_print(stderr, trx, 0);
- putc('\n', stderr);
- return;
- }
-
- ut_ad(srv_conc.n_active >= 0);
-
- if (srv_conc.n_active < (lint) srv_thread_concurrency) {
-
- srv_conc.n_active++;
- trx->declared_to_be_inside_innodb = TRUE;
- trx->n_tickets_to_enter_innodb = srv_n_free_tickets_to_enter;
-
- os_fast_mutex_unlock(&srv_conc_mutex);
-
- return;
- }
-#ifdef WITH_WSREP
- if (trx->is_wsrep() && wsrep_thd_is_brute_force(trx->mysql_thd)) {
- srv_conc_force_enter_innodb(trx);
- return;
- }
-#endif
-
- /* If the transaction is not holding resources, let it sleep
- for srv_thread_sleep_delay microseconds, and try again then */
-
- if (!has_slept && !trx->has_search_latch
- && NULL == UT_LIST_GET_FIRST(trx->lock.trx_locks)) {
-
- has_slept = TRUE; /* We let it sleep only once to avoid
- starvation */
-
- srv_conc.n_waiting++;
-
- os_fast_mutex_unlock(&srv_conc_mutex);
-
- trx->op_info = "sleeping before joining InnoDB queue";
-
- /* Peter Zaitsev suggested that we take the sleep away
- altogether. But the sleep may be good in pathological
- situations of lots of thread switches. Simply put some
- threads aside for a while to reduce the number of thread
- switches. */
- if (srv_thread_sleep_delay > 0) {
- os_thread_sleep(srv_thread_sleep_delay);
- }
-
- trx->op_info = "";
-
- os_fast_mutex_lock(&srv_conc_mutex);
-
- srv_conc.n_waiting--;
-
- goto retry;
- }
-
- /* Too many threads inside: put the current thread to a queue */
-
- for (i = 0; i < OS_THREAD_MAX_N; i++) {
- slot = srv_conc_slots + i;
-
- if (!slot->reserved) {
-
- break;
- }
- }
-
- if (i == OS_THREAD_MAX_N) {
- /* Could not find a free wait slot, we must let the
- thread enter */
-
- srv_conc.n_active++;
- trx->declared_to_be_inside_innodb = TRUE;
- trx->n_tickets_to_enter_innodb = 0;
-
- os_fast_mutex_unlock(&srv_conc_mutex);
-
- return;
- }
-
- /* Release possible search system latch this thread has */
- if (trx->has_search_latch) {
- trx_search_latch_release_if_reserved(trx);
- }
-
- /* Add to the queue */
- slot->reserved = TRUE;
- slot->wait_ended = FALSE;
-#ifdef WITH_WSREP
- slot->thd = trx->mysql_thd;
-#endif
-
- UT_LIST_ADD_LAST(srv_conc_queue, srv_conc_queue, slot);
-
- os_event_reset(slot->event);
-
- srv_conc.n_waiting++;
-
-#ifdef WITH_WSREP
- if (trx->is_wsrep() && wsrep_trx_is_aborting(trx->mysql_thd)) {
- os_fast_mutex_unlock(&srv_conc_mutex);
- if (wsrep_debug)
- fprintf(stderr, "srv_conc_enter due to MUST_ABORT");
- trx->declared_to_be_inside_innodb = TRUE;
- trx->n_tickets_to_enter_innodb = srv_n_free_tickets_to_enter;
- return;
- }
- trx->wsrep_event = slot->event;
-#endif /* WITH_WSREP */
- os_fast_mutex_unlock(&srv_conc_mutex);
-
- /* Go to wait for the event; when a thread leaves InnoDB it will
- release this thread */
-
- ut_ad(!trx->has_search_latch);
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(!sync_thread_levels_nonempty_trx(trx->has_search_latch));
-#endif /* UNIV_SYNC_DEBUG */
- trx->op_info = "waiting in InnoDB queue";
-
- thd_wait_begin(trx->mysql_thd, THD_WAIT_USER_LOCK);
-
- os_event_wait(slot->event);
- thd_wait_end(trx->mysql_thd);
-#ifdef WITH_WSREP
- trx->wsrep_event = NULL;
-#endif /* WITH_WSREP */
-
- trx->op_info = "";
-
- os_fast_mutex_lock(&srv_conc_mutex);
-
- srv_conc.n_waiting--;
-
- /* NOTE that the thread which released this thread already
- incremented the thread counter on behalf of this thread */
-
- slot->reserved = FALSE;
-#ifdef WITH_WSREP
- slot->thd = NULL;
-#endif
-
- UT_LIST_REMOVE(srv_conc_queue, srv_conc_queue, slot);
-
- trx->declared_to_be_inside_innodb = TRUE;
- trx->n_tickets_to_enter_innodb = srv_n_free_tickets_to_enter;
-
- os_fast_mutex_unlock(&srv_conc_mutex);
-}
-#endif /* HAVE_ATOMIC_BUILTINS */
-
-/*********************************************************************//**
Puts an OS thread to wait if there are too many concurrent threads
-(>= srv_thread_concurrency) inside InnoDB. The threads wait in a FIFO queue. */
-UNIV_INTERN
+(>= srv_thread_concurrency) inside InnoDB. The threads wait in a FIFO queue.
+@param[in,out] prebuilt row prebuilt handler */
void
srv_conc_enter_innodb(
-/*==================*/
- trx_t* trx) /*!< in: transaction object associated with the
- thread */
+ row_prebuilt_t* prebuilt)
{
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(!sync_thread_levels_nonempty_trx(trx->has_search_latch));
-#endif /* UNIV_SYNC_DEBUG */
+ trx_t* trx = prebuilt->trx;
+
+ ut_ad(!sync_check_iterate(sync_check()));
-#ifdef HAVE_ATOMIC_BUILTINS
srv_conc_enter_innodb_with_atomics(trx);
-#else
- srv_conc_enter_innodb_without_atomics(trx);
-#endif /* HAVE_ATOMIC_BUILTINS */
}
/*********************************************************************//**
This lets a thread enter InnoDB regardless of the number of threads inside
InnoDB. This must be called when a thread ends a lock wait. */
-UNIV_INTERN
void
srv_conc_force_enter_innodb(
/*========================*/
trx_t* trx) /*!< in: transaction object associated with the
thread */
{
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(!sync_thread_levels_nonempty_trx(trx->has_search_latch));
-#endif /* UNIV_SYNC_DEBUG */
+ ut_ad(!sync_check_iterate(sync_check()));
if (!srv_thread_concurrency) {
return;
}
- ut_ad(srv_conc.n_active >= 0);
-
-#ifdef HAVE_ATOMIC_BUILTINS
- (void) os_atomic_increment_lint(&srv_conc.n_active, 1);
-#else
- os_fast_mutex_lock(&srv_conc_mutex);
- ++srv_conc.n_active;
- os_fast_mutex_unlock(&srv_conc_mutex);
-#endif /* HAVE_ATOMIC_BUILTINS */
+ (void) my_atomic_addlint(&srv_conc.n_active, 1);
trx->n_tickets_to_enter_innodb = 1;
trx->declared_to_be_inside_innodb = TRUE;
@@ -611,7 +275,6 @@ srv_conc_force_enter_innodb(
/*********************************************************************//**
This must be called when a thread exits InnoDB in a lock wait or at the
end of an SQL statement. */
-UNIV_INTERN
void
srv_conc_force_exit_innodb(
/*=======================*/
@@ -625,20 +288,13 @@ srv_conc_force_exit_innodb(
return;
}
-#ifdef HAVE_ATOMIC_BUILTINS
srv_conc_exit_innodb_with_atomics(trx);
-#else
- srv_conc_exit_innodb_without_atomics(trx);
-#endif /* HAVE_ATOMIC_BUILTINS */
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(!sync_thread_levels_nonempty_trx(trx->has_search_latch));
-#endif /* UNIV_SYNC_DEBUG */
+ ut_ad(!sync_check_iterate(sync_check()));
}
/*********************************************************************//**
Get the count of threads waiting inside InnoDB. */
-UNIV_INTERN
ulint
srv_conc_get_waiting_threads(void)
/*==============================*/
@@ -648,7 +304,6 @@ srv_conc_get_waiting_threads(void)
/*********************************************************************//**
Get the count of threads active inside InnoDB. */
-UNIV_INTERN
ulint
srv_conc_get_active_threads(void)
/*==============================*/
@@ -660,25 +315,28 @@ srv_conc_get_active_threads(void)
UNIV_INTERN
void
wsrep_srv_conc_cancel_wait(
-/*==================*/
+/*=======================*/
trx_t* trx) /*!< in: transaction object associated with the
thread */
{
#ifdef HAVE_ATOMIC_BUILTINS
- /* aborting transactions will enter innodb by force in
+ /* aborting transactions will enter innodb by force in
srv_conc_enter_innodb_with_atomics(). No need to cancel here,
thr will wake up after os_sleep and let to enter innodb
*/
- if (wsrep_debug)
- fprintf(stderr, "WSREP: conc slot cancel, no atomics\n");
+ if (wsrep_debug) {
+ ib::info() << "WSREP: conc slot cancel, no atomics";
+ }
#else
- os_fast_mutex_lock(&srv_conc_mutex);
+ // JAN: TODO: MySQL 5.7
+ //os_fast_mutex_lock(&srv_conc_mutex);
if (trx->wsrep_event) {
- if (wsrep_debug)
- fprintf(stderr, "WSREP: conc slot cancel\n");
+ if (wsrep_debug) {
+ ib::info() << "WSREP: conc slot cancel";
+ }
os_event_set(trx->wsrep_event);
}
- os_fast_mutex_unlock(&srv_conc_mutex);
+ //os_fast_mutex_unlock(&srv_conc_mutex);
#endif
}
#endif /* WITH_WSREP */
diff --git a/storage/innobase/srv/srv0mon.cc b/storage/innobase/srv/srv0mon.cc
index d3ba177698e..d204479c1c0 100644
--- a/storage/innobase/srv/srv0mon.cc
+++ b/storage/innobase/srv/srv0mon.cc
@@ -25,19 +25,16 @@ Database monitor counter interfaces
Created 12/9/2009 Jimmy Yang
*******************************************************/
-#ifndef UNIV_HOTBACKUP
-#include "os0file.h"
+#include "buf0buf.h"
+#include "dict0mem.h"
+#include "ibuf0ibuf.h"
+#include "lock0lock.h"
#include "mach0data.h"
+#include "os0file.h"
#include "srv0mon.h"
#include "srv0srv.h"
-#include "buf0buf.h"
-#include "trx0sys.h"
#include "trx0rseg.h"
-#include "lock0lock.h"
-#include "ibuf0ibuf.h"
-#ifdef UNIV_NONINL
-#include "srv0mon.ic"
-#endif
+#include "trx0sys.h"
/* Macro to standardize the counter names for counters in the
"monitor_buf_page" module as they have very structured defines */
@@ -53,7 +50,6 @@ Created 12/9/2009 Jimmy Yang
#define MONITOR_BUF_PAGE_WRITTEN(name, description, code) \
MONITOR_BUF_PAGE(name, description, code, "written", PAGE_WRITTEN)
-
/** This array defines basic static information of monitor counters,
including each monitor's name, module it belongs to, a short
description and its property/type and corresponding monitor_id.
@@ -89,13 +85,6 @@ static monitor_info_t innodb_counter_info[] =
MONITOR_NONE,
MONITOR_DEFAULT_START, MONITOR_TABLE_REFERENCE},
- {"metadata_mem_pool_size", "metadata",
- "Size of a memory pool InnoDB uses to store data dictionary"
- " and internal data structures in bytes",
- static_cast<monitor_type_t>(
- MONITOR_EXISTING | MONITOR_DEFAULT_ON | MONITOR_DISPLAY_CURRENT),
- MONITOR_DEFAULT_START, MONITOR_OVLD_META_MEM_POOL},
-
/* ========== Counters for Lock Module ========== */
{"module_lock", "lock", "Lock Module",
MONITOR_MODULE,
@@ -268,10 +257,10 @@ static monitor_info_t innodb_counter_info[] =
MONITOR_DEFAULT_START, MONITOR_OVLD_BUF_POOL_PAGES_DIRTY},
{"buffer_pool_bytes_dirty", "buffer",
- "Buffer bytes currently dirty (innodb_buffer_pool_bytes_dirty)",
- static_cast<monitor_type_t>(
- MONITOR_EXISTING | MONITOR_DISPLAY_CURRENT | MONITOR_DEFAULT_ON),
- MONITOR_DEFAULT_START, MONITOR_OVLD_BUF_POOL_BYTES_DIRTY},
+ "Buffer bytes currently dirty (innodb_buffer_pool_bytes_dirty)",
+ static_cast<monitor_type_t>(
+ MONITOR_EXISTING | MONITOR_DISPLAY_CURRENT | MONITOR_DEFAULT_ON),
+ MONITOR_DEFAULT_START, MONITOR_OVLD_BUF_POOL_BYTES_DIRTY},
{"buffer_pool_pages_free", "buffer",
"Buffer pages currently free (innodb_buffer_pool_pages_free)",
@@ -356,11 +345,6 @@ static monitor_info_t innodb_counter_info[] =
MONITOR_SET_MEMBER, MONITOR_FLUSH_BATCH_SCANNED,
MONITOR_FLUSH_BATCH_SCANNED_PER_CALL},
- {"buffer_flush_batch_rescan", "buffer",
- "Number of times rescan of flush list forced",
- MONITOR_NONE,
- MONITOR_DEFAULT_START, MONITOR_FLUSH_HP_RESCAN},
-
/* Cumulative counter for pages flushed in flush batches */
{"buffer_flush_batch_total_pages", "buffer",
"Total pages flushed as part of flush batch",
@@ -398,6 +382,71 @@ static monitor_info_t innodb_counter_info[] =
MONITOR_NONE,
MONITOR_DEFAULT_START, MONITOR_FLUSH_N_TO_FLUSH_REQUESTED},
+ {"buffer_flush_n_to_flush_by_age", "buffer",
+ "Number of pages target by LSN Age for flushing.",
+ MONITOR_NONE,
+ MONITOR_DEFAULT_START, MONITOR_FLUSH_N_TO_FLUSH_BY_AGE},
+
+ {"buffer_flush_adaptive_avg_time_slot", "buffer",
+ "Avg time (ms) spent for adaptive flushing recently per slot.",
+ MONITOR_NONE,
+ MONITOR_DEFAULT_START, MONITOR_FLUSH_ADAPTIVE_AVG_TIME_SLOT},
+
+ {"buffer_LRU_batch_flush_avg_time_slot", "buffer",
+ "Avg time (ms) spent for LRU batch flushing recently per slot.",
+ MONITOR_NONE,
+ MONITOR_DEFAULT_START, MONITOR_LRU_BATCH_FLUSH_AVG_TIME_SLOT},
+
+ {"buffer_flush_adaptive_avg_time_thread", "buffer",
+ "Avg time (ms) spent for adaptive flushing recently per thread.",
+ MONITOR_NONE,
+ MONITOR_DEFAULT_START, MONITOR_FLUSH_ADAPTIVE_AVG_TIME_THREAD},
+
+ {"buffer_LRU_batch_flush_avg_time_thread", "buffer",
+ "Avg time (ms) spent for LRU batch flushing recently per thread.",
+ MONITOR_NONE,
+ MONITOR_DEFAULT_START, MONITOR_LRU_BATCH_FLUSH_AVG_TIME_THREAD},
+
+ {"buffer_flush_adaptive_avg_time_est", "buffer",
+ "Estimated time (ms) spent for adaptive flushing recently.",
+ MONITOR_NONE,
+ MONITOR_DEFAULT_START, MONITOR_FLUSH_ADAPTIVE_AVG_TIME_EST},
+
+ {"buffer_LRU_batch_flush_avg_time_est", "buffer",
+ "Estimated time (ms) spent for LRU batch flushing recently.",
+ MONITOR_NONE,
+ MONITOR_DEFAULT_START, MONITOR_LRU_BATCH_FLUSH_AVG_TIME_EST},
+
+ {"buffer_flush_avg_time", "buffer",
+ "Avg time (ms) spent for flushing recently.",
+ MONITOR_NONE,
+ MONITOR_DEFAULT_START, MONITOR_FLUSH_AVG_TIME},
+
+ {"buffer_flush_adaptive_avg_pass", "buffer",
+ "Numner of adaptive flushes passed during the recent Avg period.",
+ MONITOR_NONE,
+ MONITOR_DEFAULT_START, MONITOR_FLUSH_ADAPTIVE_AVG_PASS},
+
+ {"buffer_LRU_batch_flush_avg_pass", "buffer",
+ "Number of LRU batch flushes passed during the recent Avg period.",
+ MONITOR_NONE,
+ MONITOR_DEFAULT_START, MONITOR_LRU_BATCH_FLUSH_AVG_PASS},
+
+ {"buffer_flush_avg_pass", "buffer",
+ "Number of flushes passed during the recent Avg period.",
+ MONITOR_NONE,
+ MONITOR_DEFAULT_START, MONITOR_FLUSH_AVG_PASS},
+
+ {"buffer_LRU_get_free_loops", "buffer",
+ "Total loops in LRU get free.",
+ MONITOR_NONE,
+ MONITOR_DEFAULT_START, MONITOR_LRU_GET_FREE_LOOPS},
+
+ {"buffer_LRU_get_free_waits", "buffer",
+ "Total sleep waits in LRU get free.",
+ MONITOR_NONE,
+ MONITOR_DEFAULT_START, MONITOR_LRU_GET_FREE_WAITS},
+
{"buffer_flush_avg_page_rate", "buffer",
"Average number of pages at which flushing is happening",
MONITOR_NONE,
@@ -726,16 +775,16 @@ static monitor_info_t innodb_counter_info[] =
MONITOR_MODULE,
MONITOR_DEFAULT_START, MONITOR_MODULE_TRX},
- {"trx_rw_commits", "transaction", "Number of read-write transactions "
- "committed",
+ {"trx_rw_commits", "transaction",
+ "Number of read-write transactions committed",
MONITOR_NONE, MONITOR_DEFAULT_START, MONITOR_TRX_RW_COMMIT},
- {"trx_ro_commits", "transaction", "Number of read-only transactions "
- "committed",
+ {"trx_ro_commits", "transaction",
+ "Number of read-only transactions committed",
MONITOR_NONE, MONITOR_DEFAULT_START, MONITOR_TRX_RO_COMMIT},
- {"trx_nl_ro_commits", "transaction", "Number of non-locking "
- "auto-commit read-only transactions committed",
+ {"trx_nl_ro_commits", "transaction",
+ "Number of non-locking auto-commit read-only transactions committed",
MONITOR_NONE, MONITOR_DEFAULT_START, MONITOR_TRX_NL_RO_COMMIT},
{"trx_commits_insert_update", "transaction",
@@ -795,7 +844,7 @@ static monitor_info_t innodb_counter_info[] =
MONITOR_DEFAULT_START, MONITOR_N_DEL_ROW_PURGE},
{"purge_upd_exist_or_extern_records", "purge",
- "Number of purges on updates of existing records and "
+ "Number of purges on updates of existing records and"
" updates on delete marked record with externally stored field",
MONITOR_NONE,
MONITOR_DEFAULT_START, MONITOR_N_UPD_EXIST_EXTERN},
@@ -872,10 +921,10 @@ static monitor_info_t innodb_counter_info[] =
MONITOR_EXISTING | MONITOR_DISPLAY_CURRENT),
MONITOR_DEFAULT_START, MONITOR_OVLD_MAX_AGE_SYNC},
- {"log_pending_log_writes", "recovery", "Pending log writes",
+ {"log_pending_log_flushes", "recovery", "Pending log flushes",
static_cast<monitor_type_t>(
MONITOR_EXISTING | MONITOR_DISPLAY_CURRENT),
- MONITOR_DEFAULT_START, MONITOR_PENDING_LOG_WRITE},
+ MONITOR_DEFAULT_START, MONITOR_PENDING_LOG_FLUSH},
{"log_pending_checkpoint_writes", "recovery", "Pending checkpoints",
static_cast<monitor_type_t>(
@@ -905,6 +954,12 @@ static monitor_info_t innodb_counter_info[] =
MONITOR_EXISTING | MONITOR_DEFAULT_ON),
MONITOR_DEFAULT_START, MONITOR_OVLD_LOG_WRITES},
+ {"log_padded", "recovery",
+ "Bytes of log padded for log write ahead",
+ static_cast<monitor_type_t>(
+ MONITOR_EXISTING | MONITOR_DEFAULT_ON),
+ MONITOR_DEFAULT_START, MONITOR_OVLD_LOG_PADDED},
+
/* ========== Counters for Page Compression ========== */
{"module_compress", "compression", "Page Compression Info",
MONITOR_MODULE,
@@ -934,41 +989,6 @@ static monitor_info_t innodb_counter_info[] =
MONITOR_NONE,
MONITOR_DEFAULT_START, MONITOR_OVLD_PAGE_COMPRESS_SAVED},
- {"compress_trim_sect512", "compression",
- "Number of sect-512 TRIMed by page compression",
- MONITOR_NONE,
- MONITOR_DEFAULT_START, MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT512},
-
- {"compress_trim_sect1024", "compression",
- "Number of sect-1024 TRIMed by page compression",
- MONITOR_NONE,
- MONITOR_DEFAULT_START, MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT1024},
-
- {"compress_trim_sect2048", "compression",
- "Number of sect-2048 TRIMed by page compression",
- MONITOR_NONE,
- MONITOR_DEFAULT_START, MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT2048},
-
- {"compress_trim_sect4096", "compression",
- "Number of sect-4K TRIMed by page compression",
- MONITOR_NONE,
- MONITOR_DEFAULT_START, MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT4096},
-
- {"compress_trim_sect8192", "compression",
- "Number of sect-8K TRIMed by page compression",
- MONITOR_NONE,
- MONITOR_DEFAULT_START, MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT8192},
-
- {"compress_trim_sect16384", "compression",
- "Number of sect-16K TRIMed by page compression",
- MONITOR_NONE,
- MONITOR_DEFAULT_START, MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT16384},
-
- {"compress_trim_sect32768", "compression",
- "Number of sect-32K TRIMed by page compression",
- MONITOR_NONE,
- MONITOR_DEFAULT_START, MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT32768},
-
{"compress_pages_page_compressed", "compression",
"Number of pages compressed by page compression",
MONITOR_NONE,
@@ -979,11 +999,6 @@ static monitor_info_t innodb_counter_info[] =
MONITOR_NONE,
MONITOR_DEFAULT_START, MONITOR_OVLD_PAGE_COMPRESSED_TRIM_OP},
- {"compress_page_compressed_trim_op_saved", "compression",
- "Number of TRIM operation saved by page compression",
- MONITOR_NONE,
- MONITOR_DEFAULT_START, MONITOR_OVLD_PAGE_COMPRESSED_TRIM_OP_SAVED},
-
{"compress_pages_page_decompressed", "compression",
"Number of pages decompressed by page compression",
MONITOR_NONE,
@@ -1037,8 +1052,9 @@ static monitor_info_t innodb_counter_info[] =
MONITOR_NONE,
MONITOR_DEFAULT_START, MONITOR_INDEX_DISCARD},
+#ifdef BTR_CUR_HASH_ADAPT
/* ========== Counters for Adaptive Hash Index ========== */
- {"module_adaptive_hash", "adaptive_hash_index", "Adpative Hash Index",
+ {"module_adaptive_hash", "adaptive_hash_index", "Adaptive Hash Index",
MONITOR_MODULE,
MONITOR_DEFAULT_START, MONITOR_MODULE_ADAPTIVE_HASH},
@@ -1047,6 +1063,7 @@ static monitor_info_t innodb_counter_info[] =
static_cast<monitor_type_t>(
MONITOR_EXISTING | MONITOR_DEFAULT_ON),
MONITOR_DEFAULT_START, MONITOR_OVLD_ADAPTIVE_HASH_SEARCH},
+#endif /* BTR_CUR_HASH_ADAPT */
{"adaptive_hash_searches_btree", "adaptive_hash_index",
"Number of searches using B-tree on an index search",
@@ -1054,6 +1071,7 @@ static monitor_info_t innodb_counter_info[] =
MONITOR_EXISTING | MONITOR_DEFAULT_ON),
MONITOR_DEFAULT_START, MONITOR_OVLD_ADAPTIVE_HASH_SEARCH_BTREE},
+#ifdef BTR_CUR_HASH_ADAPT
{"adaptive_hash_pages_added", "adaptive_hash_index",
"Number of index pages on which the Adaptive Hash Index is built",
MONITOR_NONE,
@@ -1085,6 +1103,7 @@ static monitor_info_t innodb_counter_info[] =
"Number of Adaptive Hash Index rows updated",
MONITOR_NONE,
MONITOR_DEFAULT_START, MONITOR_ADAPTIVE_HASH_ROW_UPDATED},
+#endif /* BTR_CUR_HASH_ADAPT */
/* ========== Counters for tablespace ========== */
{"module_file", "file_system", "Tablespace and File System Manager",
@@ -1252,6 +1271,12 @@ static monitor_info_t innodb_counter_info[] =
MONITOR_EXISTING | MONITOR_DEFAULT_ON),
MONITOR_DEFAULT_START, MONITOR_OVLD_RWLOCK_X_SPIN_WAITS},
+ {"innodb_rwlock_sx_spin_waits", "server",
+ "Number of rwlock spin waits due to sx latch request",
+ static_cast<monitor_type_t>(
+ MONITOR_EXISTING | MONITOR_DEFAULT_ON),
+ MONITOR_DEFAULT_START, MONITOR_OVLD_RWLOCK_SX_SPIN_WAITS},
+
{"innodb_rwlock_s_spin_rounds", "server",
"Number of rwlock spin loop rounds due to shared latch request",
static_cast<monitor_type_t>(
@@ -1264,6 +1289,12 @@ static monitor_info_t innodb_counter_info[] =
MONITOR_EXISTING | MONITOR_DEFAULT_ON),
MONITOR_DEFAULT_START, MONITOR_OVLD_RWLOCK_X_SPIN_ROUNDS},
+ {"innodb_rwlock_sx_spin_rounds", "server",
+ "Number of rwlock spin loop rounds due to sx latch request",
+ static_cast<monitor_type_t>(
+ MONITOR_EXISTING | MONITOR_DEFAULT_ON),
+ MONITOR_DEFAULT_START, MONITOR_OVLD_RWLOCK_SX_SPIN_ROUNDS},
+
{"innodb_rwlock_s_os_waits", "server",
"Number of OS waits due to shared latch request",
static_cast<monitor_type_t>(
@@ -1276,14 +1307,19 @@ static monitor_info_t innodb_counter_info[] =
MONITOR_EXISTING | MONITOR_DEFAULT_ON),
MONITOR_DEFAULT_START, MONITOR_OVLD_RWLOCK_X_OS_WAITS},
+ {"innodb_rwlock_sx_os_waits", "server",
+ "Number of OS waits due to sx latch request",
+ static_cast<monitor_type_t>(
+ MONITOR_EXISTING | MONITOR_DEFAULT_ON),
+ MONITOR_DEFAULT_START, MONITOR_OVLD_RWLOCK_SX_OS_WAITS},
+
/* ========== Counters for DML operations ========== */
{"module_dml", "dml", "Statistics for DMLs",
MONITOR_MODULE,
MONITOR_DEFAULT_START, MONITOR_MODULE_DML_STATS},
{"dml_reads", "dml", "Number of rows read",
- static_cast<monitor_type_t>(
- MONITOR_EXISTING | MONITOR_DEFAULT_ON),
+ static_cast<monitor_type_t>(MONITOR_EXISTING),
MONITOR_DEFAULT_START, MONITOR_OLVD_ROW_READ},
{"dml_inserts", "dml", "Number of rows inserted",
@@ -1346,6 +1382,16 @@ static monitor_info_t innodb_counter_info[] =
MONITOR_NONE,
MONITOR_DEFAULT_START, MONITOR_PENDING_ALTER_TABLE},
+ {"ddl_sort_file_alter_table", "ddl",
+ "Number of sort files created during alter table",
+ MONITOR_NONE,
+ MONITOR_DEFAULT_START, MONITOR_ALTER_TABLE_SORT_FILES},
+
+ {"ddl_log_file_alter_table", "ddl",
+ "Number of log files created during alter table",
+ MONITOR_NONE,
+ MONITOR_DEFAULT_START, MONITOR_ALTER_TABLE_LOG_FILES},
+
/* ===== Counters for ICP (Index Condition Pushdown) Module ===== */
{"module_icp", "icp", "Index Condition Pushdown",
MONITOR_MODULE,
@@ -1368,6 +1414,16 @@ static monitor_info_t innodb_counter_info[] =
MONITOR_NONE,
MONITOR_DEFAULT_START, MONITOR_ICP_MATCH},
+ /* ========== Mutex monitoring on/off ========== */
+ {"latch_status", "Latch counters",
+ "Collect latch counters to display via SHOW ENGING INNODB MUTEX",
+ MONITOR_MODULE,
+ MONITOR_DEFAULT_START, MONITOR_MODULE_LATCHES},
+
+ {"latch", "sync", "Latch monitoring control",
+ MONITOR_HIDDEN,
+ MONITOR_DEFAULT_START, MONITOR_LATCHES},
+
/* ========== To turn on/off reset all counters ========== */
{"all", "All Counters", "Turn on/off and reset all counters",
MONITOR_MODULE,
@@ -1375,47 +1431,18 @@ static monitor_info_t innodb_counter_info[] =
};
/* The "innodb_counter_value" array stores actual counter values */
-UNIV_INTERN monitor_value_t innodb_counter_value[NUM_MONITOR];
+monitor_value_t innodb_counter_value[NUM_MONITOR];
/* monitor_set_tbl is used to record and determine whether a monitor
has been turned on/off. */
-UNIV_INTERN ulint monitor_set_tbl[(NUM_MONITOR + NUM_BITS_ULINT
+ulint monitor_set_tbl[(NUM_MONITOR + NUM_BITS_ULINT
- 1) / NUM_BITS_ULINT];
-#ifndef HAVE_ATOMIC_BUILTINS_64
-/** Mutex protecting atomic operations on platforms that lack
-built-in operations for atomic memory access */
-ib_mutex_t monitor_mutex;
-
-/** Key to register monitor_mutex with performance schema */
-UNIV_INTERN mysql_pfs_key_t monitor_mutex_key;
-
-/****************************************************************//**
-Initialize the monitor subsystem. */
-UNIV_INTERN
-void
-srv_mon_create(void)
-/*================*/
-{
- mutex_create(monitor_mutex_key, &monitor_mutex, SYNC_ANY_LATCH);
-}
-/****************************************************************//**
-Close the monitor subsystem. */
-UNIV_INTERN
-void
-srv_mon_free(void)
-/*==============*/
-{
- mutex_free(&monitor_mutex);
-}
-#endif /* !HAVE_ATOMIC_BUILTINS_64 */
-
/****************************************************************//**
Get a monitor's "monitor_info" by its monitor id (index into the
innodb_counter_info array.
-@return Point to corresponding monitor_info_t, or NULL if no such
+@return Point to corresponding monitor_info_t, or NULL if no such
monitor */
-UNIV_INTERN
monitor_info_t*
srv_mon_get_info(
/*=============*/
@@ -1432,9 +1459,8 @@ srv_mon_get_info(
/****************************************************************//**
Get monitor's name by its monitor id (indexing into the
innodb_counter_info array.
-@return corresponding monitor name, or NULL if no such
+@return corresponding monitor name, or NULL if no such
monitor */
-UNIV_INTERN
const char*
srv_mon_get_name(
/*=============*/
@@ -1452,7 +1478,6 @@ srv_mon_get_name(
Turn on/off, reset monitor counters in a module. If module_id
is MONITOR_ALL_COUNTER then turn on all monitor counters.
turned on because it has already been turned on. */
-UNIV_INTERN
void
srv_mon_set_module_control(
/*=======================*/
@@ -1516,8 +1541,9 @@ srv_mon_set_module_control(
should be aware some counters are already on before
turn them on again (which could reset counter value) */
if (MONITOR_IS_ON(ix) && (set_option == MONITOR_TURN_ON)) {
- fprintf(stderr, "Monitor '%s' is already enabled.\n",
- srv_mon_get_name((monitor_id_t) ix));
+ ib::info() << "Monitor '"
+ << srv_mon_get_name((monitor_id_t) ix)
+ << "' is already enabled.";
continue;
}
@@ -1592,7 +1618,6 @@ corresponding monitors are turned on/off/reset, and do appropriate
mathematics to deduct the actual value. Please also refer to
srv_export_innodb_status() for related global counters used by
the existing status variables.*/
-UNIV_INTERN
void
srv_mon_process_existing_counter(
/*=============================*/
@@ -1617,10 +1642,6 @@ srv_mon_process_existing_counter(
/* Get the value from corresponding global variable */
switch (monitor_id) {
- case MONITOR_OVLD_META_MEM_POOL:
- value = srv_mem_pool_size;
- break;
-
/* export_vars.innodb_buffer_pool_reads. Num Reads from
disk (page not in buffer) */
case MONITOR_OVLD_BUF_POOL_READS:
@@ -1804,6 +1825,10 @@ srv_mon_process_existing_counter(
value = srv_stats.log_writes;
break;
+ case MONITOR_OVLD_LOG_PADDED:
+ value = srv_stats.log_padded;
+ break;
+
/* innodb_dblwr_writes */
case MONITOR_OVLD_SRV_DBLWR_WRITES:
value = srv_stats.dblwr_writes;
@@ -1827,6 +1852,10 @@ srv_mon_process_existing_counter(
value = rw_lock_stats.rw_x_spin_wait_count;
break;
+ case MONITOR_OVLD_RWLOCK_SX_SPIN_WAITS:
+ value = rw_lock_stats.rw_sx_spin_wait_count;
+ break;
+
case MONITOR_OVLD_RWLOCK_S_SPIN_ROUNDS:
value = rw_lock_stats.rw_s_spin_round_count;
break;
@@ -1835,6 +1864,10 @@ srv_mon_process_existing_counter(
value = rw_lock_stats.rw_x_spin_round_count;
break;
+ case MONITOR_OVLD_RWLOCK_SX_SPIN_ROUNDS:
+ value = rw_lock_stats.rw_sx_spin_round_count;
+ break;
+
case MONITOR_OVLD_RWLOCK_S_OS_WAITS:
value = rw_lock_stats.rw_s_os_wait_count;
break;
@@ -1843,6 +1876,10 @@ srv_mon_process_existing_counter(
value = rw_lock_stats.rw_x_os_wait_count;
break;
+ case MONITOR_OVLD_RWLOCK_SX_OS_WAITS:
+ value = rw_lock_stats.rw_sx_os_wait_count;
+ break;
+
case MONITOR_OVLD_BUFFER_POOL_SIZE:
value = srv_buf_pool_size;
break;
@@ -1926,7 +1963,7 @@ srv_mon_process_existing_counter(
break;
case MONITOR_OVLD_N_FILE_OPENED:
- value = fil_n_file_opened;
+ value = fil_system->n_open;
break;
case MONITOR_OVLD_IBUF_MERGE_INSERT:
@@ -1973,9 +2010,9 @@ srv_mon_process_existing_counter(
value = (mon_type_t) log_sys->lsn;
break;
- case MONITOR_PENDING_LOG_WRITE:
+ case MONITOR_PENDING_LOG_FLUSH:
mutex_enter(&log_sys->mutex);
- value = static_cast<mon_type_t>(log_sys->n_pending_writes);
+ value = static_cast<mon_type_t>(log_sys->n_pending_flushes);
mutex_exit(&log_sys->mutex);
break;
@@ -2008,9 +2045,11 @@ srv_mon_process_existing_counter(
value = log_sys->max_modified_age_sync;
break;
+#ifdef BTR_CUR_HASH_ADAPT
case MONITOR_OVLD_ADAPTIVE_HASH_SEARCH:
value = btr_cur_n_sea;
break;
+#endif /* BTR_CUR_HASH_ADAPT */
case MONITOR_OVLD_ADAPTIVE_HASH_SEARCH_BTREE:
value = btr_cur_n_non_sea;
@@ -2019,36 +2058,12 @@ srv_mon_process_existing_counter(
case MONITOR_OVLD_PAGE_COMPRESS_SAVED:
value = srv_stats.page_compression_saved;
break;
- case MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT512:
- value = srv_stats.page_compression_trim_sect512;
- break;
- case MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT1024:
- value = srv_stats.page_compression_trim_sect1024;
- break;
- case MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT2048:
- value = srv_stats.page_compression_trim_sect2048;
- break;
- case MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT4096:
- value = srv_stats.page_compression_trim_sect4096;
- break;
- case MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT8192:
- value = srv_stats.page_compression_trim_sect8192;
- break;
- case MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT16384:
- value = srv_stats.page_compression_trim_sect16384;
- break;
- case MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT32768:
- value = srv_stats.page_compression_trim_sect32768;
- break;
case MONITOR_OVLD_PAGES_PAGE_COMPRESSED:
value = srv_stats.pages_page_compressed;
break;
case MONITOR_OVLD_PAGE_COMPRESSED_TRIM_OP:
value = srv_stats.page_compressed_trim_op;
break;
- case MONITOR_OVLD_PAGE_COMPRESSED_TRIM_OP_SAVED:
- value = srv_stats.page_compressed_trim_op_saved;
- break;
case MONITOR_OVLD_PAGES_PAGE_DECOMPRESSED:
value = srv_stats.pages_page_decompressed;
break;
@@ -2131,7 +2146,6 @@ srv_mon_process_existing_counter(
/*************************************************************//**
Reset a monitor, create a new base line with the current monitor
value. This baseline is recorded by MONITOR_VALUE_RESET(monitor) */
-UNIV_INTERN
void
srv_mon_reset(
/*==========*/
@@ -2179,7 +2193,6 @@ srv_mon_reset(
/*************************************************************//**
Turn on monitor counters that are marked as default ON. */
-UNIV_INTERN
void
srv_mon_default_on(void)
/*====================*/
@@ -2196,4 +2209,3 @@ srv_mon_default_on(void)
}
}
}
-#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innobase/srv/srv0srv.cc b/storage/innobase/srv/srv0srv.cc
index f933442f036..44c07693e6a 100644
--- a/storage/innobase/srv/srv0srv.cc
+++ b/storage/innobase/srv/srv0srv.cc
@@ -39,39 +39,37 @@ The database server main program
Created 10/8/1995 Heikki Tuuri
*******************************************************/
-/* Dummy comment */
-#include "srv0srv.h"
+#include "my_global.h"
+// JAN: TODO: MySQL 5.7 missing header
+//#include "my_thread.h"
+//
+// #include "mysql/psi/mysql_stage.h"
+// #include "mysql/psi/psi.h"
-#include "ut0mem.h"
-#include "ut0ut.h"
-#include "os0proc.h"
-#include "mem0mem.h"
-#include "mem0pool.h"
-#include "sync0sync.h"
-#include "que0que.h"
-#include "log0recv.h"
-#include "pars0pars.h"
-#include "usr0sess.h"
-#include "lock0lock.h"
-#include "trx0purge.h"
-#include "ibuf0ibuf.h"
+#include "btr0sea.h"
#include "buf0flu.h"
#include "buf0lru.h"
-#include "btr0sea.h"
-#include "dict0load.h"
#include "dict0boot.h"
-#include "srv0start.h"
+#include "dict0load.h"
+#include "ibuf0ibuf.h"
+#include "lock0lock.h"
+#include "log0recv.h"
+#include "mem0mem.h"
+#include "os0proc.h"
+#include "pars0pars.h"
+#include "que0que.h"
#include "row0mysql.h"
+#include "row0trunc.h"
#include "row0log.h"
-#include "ha_prototypes.h"
-#include "trx0i_s.h"
-#include "os0sync.h" /* for HAVE_ATOMIC_BUILTINS */
#include "srv0mon.h"
+#include "srv0srv.h"
+#include "srv0start.h"
+#include "sync0sync.h"
+#include "trx0i_s.h"
+#include "trx0purge.h"
#include "ut0crc32.h"
#include "btr0defragment.h"
-
-#include "mysql/plugin.h"
-#include "mysql/service_thd_wait.h"
+#include "ut0mem.h"
#include "fil0fil.h"
#include "fil0crypt.h"
#include "fil0pagecompress.h"
@@ -88,18 +86,17 @@ UNIV_INTERN ulong srv_fatal_semaphore_wait_threshold = DEFAULT_SRV_FATAL_SEMAPH
/* How much data manipulation language (DML) statements need to be delayed,
in microseconds, in order to reduce the lagging of the purge thread. */
-UNIV_INTERN ulint srv_dml_needed_delay = 0;
-
-UNIV_INTERN bool srv_monitor_active;
-UNIV_INTERN bool srv_error_monitor_active;
+ulint srv_dml_needed_delay;
-UNIV_INTERN bool srv_buf_dump_thread_active;
+bool srv_monitor_active;
+bool srv_error_monitor_active;
+bool srv_buf_dump_thread_active;
+bool srv_dict_stats_thread_active;
+bool srv_buf_resize_thread_active;
-UNIV_INTERN bool srv_dict_stats_thread_active;
+my_bool srv_scrub_log;
-UNIV_INTERN my_bool srv_scrub_log;
-
-UNIV_INTERN const char* srv_main_thread_op_info = "";
+const char* srv_main_thread_op_info = "";
/** Prefix used by MySQL to indicate pre-5.1 table name encoding */
const char srv_mysql50_table_name_prefix[10] = "#mysql50#";
@@ -109,40 +106,61 @@ const char srv_mysql50_table_name_prefix[10] = "#mysql50#";
/* The following three are dir paths which are catenated before file
names, where the file name itself may also contain a path */
-UNIV_INTERN char* srv_data_home = NULL;
+char* srv_data_home;
/** Rollback files directory, can be absolute. */
-UNIV_INTERN char* srv_undo_dir = NULL;
+char* srv_undo_dir;
/** The number of tablespaces to use for rollback segments. */
-UNIV_INTERN ulong srv_undo_tablespaces = 8;
+ulong srv_undo_tablespaces;
/** The number of UNDO tablespaces that are open and ready to use. */
-UNIV_INTERN ulint srv_undo_tablespaces_open = 8;
+ulint srv_undo_tablespaces_open;
+
+/** The number of UNDO tablespaces that are active (hosting some rollback
+segment). It is quite possible that some of the tablespaces doesn't host
+any of the rollback-segment based on configuration used. */
+ulint srv_undo_tablespaces_active;
/* The number of rollback segments to use */
-UNIV_INTERN ulong srv_undo_logs = 1;
+ulong srv_undo_logs;
-#ifdef UNIV_LOG_ARCHIVE
-UNIV_INTERN char* srv_arch_dir = NULL;
-#endif /* UNIV_LOG_ARCHIVE */
+/** Rate at which UNDO records should be purged. */
+ulong srv_purge_rseg_truncate_frequency;
+
+/** Enable or Disable Truncate of UNDO tablespace.
+Note: If enabled then UNDO tablespace will be selected for truncate.
+While Server waits for undo-tablespace to truncate if user disables
+it, truncate action is completed but no new tablespace is marked
+for truncate (action is never aborted). */
+my_bool srv_undo_log_truncate;
+
+/** Maximum size of undo tablespace. */
+unsigned long long srv_max_undo_log_size;
+
+/** Default undo tablespace size in UNIV_PAGEs count (10MB). */
+const ulint SRV_UNDO_TABLESPACE_SIZE_IN_PAGES =
+ ((1024 * 1024) * 10) / UNIV_PAGE_SIZE_DEF;
/** Set if InnoDB must operate in read-only mode. We don't do any
recovery and open all tables in RO mode instead of RW mode. We don't
sync the max trx id to disk either. */
-UNIV_INTERN my_bool srv_read_only_mode;
+my_bool srv_read_only_mode;
/** store to its own file each table created by an user; data
dictionary tables are in the system tablespace 0 */
-UNIV_INTERN my_bool srv_file_per_table;
+my_bool srv_file_per_table;
+/** whether to use backup-safe TRUNCATE and crash-safe RENAME
+instead of the MySQL 5.7 WL#6501 TRUNCATE TABLE implementation */
+my_bool srv_safe_truncate;
/** The file format to use on new *.ibd files. */
-UNIV_INTERN ulint srv_file_format = 0;
+ulint srv_file_format;
/** Whether to check file format during startup. A value of
UNIV_FORMAT_MAX + 1 means no checking ie. FALSE. The default is to
set it to the highest format we support. */
-UNIV_INTERN ulint srv_max_file_format_at_startup = UNIV_FORMAT_MAX;
+ulint srv_max_file_format_at_startup = UNIV_FORMAT_MAX;
/** Set if InnoDB operates in read-only mode or innodb-force-recovery
is greater than SRV_FORCE_NO_TRX_UNDO. */
-UNIV_INTERN my_bool high_level_read_only;
+my_bool high_level_read_only;
#if UNIV_FORMAT_A
# error "UNIV_FORMAT_A must be 0!"
@@ -150,84 +168,65 @@ UNIV_INTERN my_bool high_level_read_only;
/** Place locks to records only i.e. do not use next-key locking except
on duplicate key checking and foreign key checking */
-UNIV_INTERN ibool srv_locks_unsafe_for_binlog = FALSE;
+ibool srv_locks_unsafe_for_binlog;
/** Sort buffer size in index creation */
-UNIV_INTERN ulong srv_sort_buf_size = 1048576;
+ulong srv_sort_buf_size;
/** Maximum modification log file size for online index creation */
-UNIV_INTERN unsigned long long srv_online_max_size;
+unsigned long long srv_online_max_size;
/* If this flag is TRUE, then we will use the native aio of the
OS (provided we compiled Innobase with it in), otherwise we will
use simulated aio we build below with threads.
Currently we support native aio on windows and linux */
-UNIV_INTERN my_bool srv_use_native_aio = TRUE;
-UNIV_INTERN my_bool srv_numa_interleave = FALSE;
-
-/* If this flag is TRUE, then we will use fallocate(PUCH_HOLE)
-to the pages */
-UNIV_INTERN my_bool srv_use_trim = FALSE;
-/* If this flag is TRUE, then we will use posix fallocate for file extentsion */
-UNIV_INTERN my_bool srv_use_posix_fallocate = FALSE;
-/* If this flag is TRUE, then we disable doublewrite buffer */
-UNIV_INTERN my_bool srv_use_atomic_writes = FALSE;
-/* If this flag IS TRUE, then we use this algorithm for page compressing the pages */
-UNIV_INTERN ulong innodb_compression_algorithm = PAGE_ZLIB_ALGORITHM;
-/* Number of threads used for multi-threaded flush */
-UNIV_INTERN long srv_mtflush_threads = MTFLUSH_DEFAULT_WORKER;
-/* If this flag is TRUE, then we will use multi threaded flush. */
-UNIV_INTERN my_bool srv_use_mtflush = FALSE;
-
-#ifdef __WIN__
-/* Windows native condition variables. We use runtime loading / function
-pointers, because they are not available on Windows Server 2003 and
-Windows XP/2000.
-
-We use condition for events on Windows if possible, even if os_event
-resembles Windows kernel event object well API-wise. The reason is
-performance, kernel objects are heavyweights and WaitForSingleObject() is a
-performance killer causing calling thread to context switch. Besides, Innodb
-is preallocating large number (often millions) of os_events. With kernel event
-objects it takes a big chunk out of non-paged pool, which is better suited
-for tasks like IO than for storing idle event objects. */
-UNIV_INTERN ibool srv_use_native_conditions = FALSE;
-#endif /* __WIN__ */
-
-UNIV_INTERN ulint srv_n_data_files = 0;
-UNIV_INTERN char** srv_data_file_names = NULL;
-/* size in database pages */
-UNIV_INTERN ulint* srv_data_file_sizes = NULL;
-
-/* if TRUE, then we auto-extend the last data file */
-UNIV_INTERN ibool srv_auto_extend_last_data_file = FALSE;
-/* if != 0, this tells the max size auto-extending may increase the
-last data file size */
-UNIV_INTERN ulint srv_last_file_size_max = 0;
-/* If the last data file is auto-extended, we add this
-many pages to it at a time */
-UNIV_INTERN ulong srv_auto_extend_increment = 8;
-UNIV_INTERN ulint* srv_data_file_is_raw_partition = NULL;
-
-/* If the following is TRUE we do not allow inserts etc. This protects
-the user from forgetting the 'newraw' keyword to my.cnf */
-
-UNIV_INTERN ibool srv_created_new_raw = FALSE;
-
-UNIV_INTERN char* srv_log_group_home_dir = NULL;
-
-UNIV_INTERN ulong srv_n_log_files = SRV_N_LOG_FILES_MAX;
-/* size in database pages */
-UNIV_INTERN ib_uint64_t srv_log_file_size = IB_UINT64_MAX;
-UNIV_INTERN ib_uint64_t srv_log_file_size_requested;
-/* size in database pages */
-UNIV_INTERN ulint srv_log_buffer_size = ULINT_MAX;
-UNIV_INTERN ulong srv_flush_log_at_trx_commit = 1;
-UNIV_INTERN uint srv_flush_log_at_timeout = 1;
-UNIV_INTERN ulong srv_page_size = UNIV_PAGE_SIZE_DEF;
-UNIV_INTERN ulong srv_page_size_shift = UNIV_PAGE_SIZE_SHIFT_DEF;
-
-/* Try to flush dirty pages so as to avoid IO bursts at
-the checkpoints. */
-UNIV_INTERN char srv_adaptive_flushing = TRUE;
+my_bool srv_use_native_aio;
+my_bool srv_numa_interleave;
+/** innodb_use_trim; whether to use fallocate(PUNCH_HOLE) with
+page_compression */
+my_bool srv_use_trim;
+/** copy of innodb_use_atomic_writes; @see innobase_init() */
+my_bool srv_use_atomic_writes;
+/** innodb_compression_algorithm; used with page compression */
+ulong innodb_compression_algorithm;
+/** innodb_mtflush_threads; number of threads used for multi-threaded flush */
+long srv_mtflush_threads;
+/** innodb_use_mtflush; whether to use multi threaded flush. */
+my_bool srv_use_mtflush;
+
+#ifdef UNIV_DEBUG
+/** Used by SET GLOBAL innodb_master_thread_disabled_debug = X. */
+my_bool srv_master_thread_disabled_debug;
+/** Event used to inform that master thread is disabled. */
+static os_event_t srv_master_thread_disabled_event;
+#endif /* UNIV_DEBUG */
+
+/*------------------------- LOG FILES ------------------------ */
+char* srv_log_group_home_dir;
+
+ulong srv_n_log_files;
+/** The InnoDB redo log file size, or 0 when changing the redo log format
+at startup (while disallowing writes to the redo log). */
+ulonglong srv_log_file_size;
+/** copy of innodb_log_buffer_size, but in database pages */
+ulint srv_log_buffer_size;
+/** innodb_flush_log_at_trx_commit */
+ulong srv_flush_log_at_trx_commit;
+/** innodb_flush_log_at_timeout */
+uint srv_flush_log_at_timeout;
+/** innodb_page_size */
+ulong srv_page_size;
+/** log2 of innodb_page_size; @see innobase_init() */
+ulong srv_page_size_shift;
+/** innodb_log_write_ahead_size */
+ulong srv_log_write_ahead_size;
+
+page_size_t univ_page_size(0, 0, false);
+
+/** innodb_adaptive_flushing; try to flush dirty pages so as to avoid
+IO bursts at the checkpoints. */
+my_bool srv_adaptive_flushing;
+
+/** innodb_flush_sync; whether to ignore io_capacity at log checkpoints */
+my_bool srv_flush_sync;
/** Maximum number of times allowed to conditionally acquire
mutex before switching to blocking wait on the mutex */
@@ -243,271 +242,255 @@ with mutex_enter(), which will wait until it gets the mutex. */
UNIV_INTERN os_event_t srv_allow_writes_event;
#endif /* WITH_INNODB_DISALLOW_WRITES */
-/** The sort order table of the MySQL latin1_swedish_ci character set
-collation */
-UNIV_INTERN const byte* srv_latin1_ordering;
-
-/* use os/external memory allocator */
-UNIV_INTERN my_bool srv_use_sys_malloc = TRUE;
-/* requested size in kilobytes */
-UNIV_INTERN ulint srv_buf_pool_size = ULINT_MAX;
-/* requested number of buffer pool instances */
-UNIV_INTERN ulint srv_buf_pool_instances = 1;
-/* number of locks to protect buf_pool->page_hash */
-UNIV_INTERN ulong srv_n_page_hash_locks = 16;
-/** Scan depth for LRU flush batch i.e.: number of blocks scanned*/
-UNIV_INTERN ulong srv_LRU_scan_depth = 1024;
-/** whether or not to flush neighbors of a block */
-UNIV_INTERN ulong srv_flush_neighbors = 1;
-/* previously requested size */
-UNIV_INTERN ulint srv_buf_pool_old_size;
-/* current size in kilobytes */
-UNIV_INTERN ulint srv_buf_pool_curr_size = 0;
-/* dump that may % of each buffer pool during BP dump */
-UNIV_INTERN ulong srv_buf_pool_dump_pct;
-/* size in bytes */
-UNIV_INTERN ulint srv_mem_pool_size = ULINT_MAX;
-UNIV_INTERN ulint srv_lock_table_size = ULINT_MAX;
-
-UNIV_INTERN ulong srv_idle_flush_pct = 100;
-
-/* This parameter is deprecated. Use srv_n_io_[read|write]_threads
-instead. */
-UNIV_INTERN ulint srv_n_file_io_threads = ULINT_MAX;
-UNIV_INTERN ulint srv_n_read_io_threads = ULINT_MAX;
-UNIV_INTERN ulint srv_n_write_io_threads = ULINT_MAX;
-
-/* Switch to enable random read ahead. */
-UNIV_INTERN my_bool srv_random_read_ahead = FALSE;
-/* User settable value of the number of pages that must be present
+/** copy of innodb_buffer_pool_size */
+ulint srv_buf_pool_size;
+const ulint srv_buf_pool_min_size = 5 * 1024 * 1024;
+/** Default pool size in bytes */
+const ulint srv_buf_pool_def_size = 128 * 1024 * 1024;
+/** Requested buffer pool chunk size. Each buffer pool instance consists
+of one or more chunks. */
+ulong srv_buf_pool_chunk_unit;
+/** innodb_buffer_pool_instances (0 is interpreted as 1) */
+ulong srv_buf_pool_instances;
+/** Default value of innodb_buffer_pool_instances */
+const ulong srv_buf_pool_instances_default = 0;
+/** innodb_page_hash_locks (a debug-only parameter);
+number of locks to protect buf_pool->page_hash */
+ulong srv_n_page_hash_locks = 16;
+/** innodb_lru_scan_depth; number of blocks scanned in LRU flush batch */
+ulong srv_LRU_scan_depth;
+/** innodb_flush_neighbors; whether or not to flush neighbors of a block */
+ulong srv_flush_neighbors;
+/** Previously requested size */
+ulint srv_buf_pool_old_size;
+/** Current size as scaling factor for the other components */
+ulint srv_buf_pool_base_size;
+/** Current size in bytes */
+ulint srv_buf_pool_curr_size;
+/** Dump this % of each buffer pool during BP dump */
+ulong srv_buf_pool_dump_pct;
+/** Lock table size in bytes */
+ulint srv_lock_table_size = ULINT_MAX;
+
+/** innodb_idle_flush_pct */
+ulong srv_idle_flush_pct;
+
+/** copy of innodb_read_io_threads */
+ulint srv_n_read_io_threads;
+/** copy of innodb_write_io_threads */
+ulint srv_n_write_io_threads;
+
+/** innodb_random_read_ahead */
+my_bool srv_random_read_ahead;
+/** innodb_read_ahead_threshold; the number of pages that must be present
in the buffer cache and accessed sequentially for InnoDB to trigger a
readahead request. */
-UNIV_INTERN ulong srv_read_ahead_threshold = 56;
+ulong srv_read_ahead_threshold;
+
+/** innodb_change_buffer_max_size; maximum on-disk size of change
+buffer in terms of percentage of the buffer pool. */
+uint srv_change_buffer_max_size;
+
+char* srv_file_flush_method_str;
-#ifdef UNIV_LOG_ARCHIVE
-UNIV_INTERN ibool srv_log_archive_on = FALSE;
-UNIV_INTERN ibool srv_archive_recovery = 0;
-UNIV_INTERN ib_uint64_t srv_archive_recovery_limit_lsn;
-#endif /* UNIV_LOG_ARCHIVE */
-/* This parameter is used to throttle the number of insert buffers that are
-merged in a batch. By increasing this parameter on a faster disk you can
-possibly reduce the number of I/O operations performed to complete the
-merge operation. The value of this parameter is used as is by the
-background loop when the system is idle (low load), on a busy system
-the parameter is scaled down by a factor of 4, this is to avoid putting
-a heavier load on the I/O sub system. */
+enum srv_flush_t srv_file_flush_method = IF_WIN(SRV_ALL_O_DIRECT_FSYNC,SRV_FSYNC);
-UNIV_INTERN ulong srv_insert_buffer_batch_size = 20;
-UNIV_INTERN char* srv_file_flush_method_str = NULL;
-UNIV_INTERN ulint srv_unix_file_flush_method = SRV_UNIX_FSYNC;
-UNIV_INTERN ulint srv_win_file_flush_method = SRV_WIN_IO_UNBUFFERED;
+/** copy of innodb_open_files, initialized by innobase_init() */
+ulint srv_max_n_open_files;
-UNIV_INTERN ulint srv_max_n_open_files = 300;
+/** innodb_io_capacity */
+ulong srv_io_capacity;
+/** innodb_io_capacity_max */
+ulong srv_max_io_capacity;
-/* Number of IO operations per second the server can do */
-UNIV_INTERN ulong srv_io_capacity = 200;
-UNIV_INTERN ulong srv_max_io_capacity = 400;
+/** innodb_page_cleaners; the number of page cleaner threads */
+ulong srv_n_page_cleaners;
/* The InnoDB main thread tries to keep the ratio of modified pages
in the buffer pool to all database pages in the buffer pool smaller than
the following number. But it is not guaranteed that the value stays below
that during a time of heavy update/insert activity. */
-UNIV_INTERN double srv_max_buf_pool_modified_pct = 75.0;
-UNIV_INTERN double srv_max_dirty_pages_pct_lwm = 50.0;
+/** innodb_max_dirty_pages_pct */
+double srv_max_buf_pool_modified_pct;
+/** innodb_max_dirty_pages_pct_lwm */
+double srv_max_dirty_pages_pct_lwm;
-/* This is the percentage of log capacity at which adaptive flushing,
-if enabled, will kick in. */
-UNIV_INTERN double srv_adaptive_flushing_lwm = 10.0;
+/** innodb_adaptive_flushing_lwm; the percentage of log capacity at
+which adaptive flushing, if enabled, will kick in. */
+double srv_adaptive_flushing_lwm;
-/* Number of iterations over which adaptive flushing is averaged. */
-UNIV_INTERN ulong srv_flushing_avg_loops = 30;
+/** innodb_flushing_avg_loops; number of iterations over which
+adaptive flushing is averaged */
+ulong srv_flushing_avg_loops;
-/* The number of purge threads to use.*/
-UNIV_INTERN ulong srv_n_purge_threads;
+/** innodb_purge_threads; the number of purge threads to use */
+ulong srv_n_purge_threads;
-/* the number of pages to purge in one batch */
-UNIV_INTERN ulong srv_purge_batch_size = 20;
+/** innodb_purge_batch_size, in pages */
+ulong srv_purge_batch_size;
-/* Internal setting for "innodb_stats_method". Decides how InnoDB treats
+/** innodb_stats_method decides how InnoDB treats
NULL value when collecting statistics. By default, it is set to
SRV_STATS_NULLS_EQUAL(0), ie. all NULL value are treated equal */
-UNIV_INTERN ulong srv_innodb_stats_method = SRV_STATS_NULLS_EQUAL;
+ulong srv_innodb_stats_method;
-UNIV_INTERN srv_stats_t srv_stats;
+srv_stats_t srv_stats;
/* structure to pass status variables to MySQL */
-UNIV_INTERN export_var_t export_vars;
+export_var_t export_vars;
/** Normally 0. When nonzero, skip some phases of crash recovery,
starting from SRV_FORCE_IGNORE_CORRUPT, so that data can be recovered
by SELECT or mysqldump. When this is nonzero, we do not allow any user
modifications to the data. */
-UNIV_INTERN ulong srv_force_recovery;
+ulong srv_force_recovery;
-/** Print all user-level transactions deadlocks to mysqld stderr */
+/** innodb_print_all_deadlocks; whether to print all user-level
+transactions deadlocks to the error log */
+my_bool srv_print_all_deadlocks;
-UNIV_INTERN my_bool srv_print_all_deadlocks = FALSE;
+/** innodb_cmp_per_index_enabled; enable
+INFORMATION_SCHEMA.innodb_cmp_per_index */
+my_bool srv_cmp_per_index_enabled;
-/** Enable INFORMATION_SCHEMA.innodb_cmp_per_index */
-UNIV_INTERN my_bool srv_cmp_per_index_enabled = FALSE;
-
-/* If the following is set to 1 then we do not run purge and insert buffer
+/** innodb_fast_shutdown; if 1 then we do not run purge and insert buffer
merge to completion before shutdown. If it is set to 2, do not even flush the
buffer pool to data files at the shutdown: we effectively 'crash'
InnoDB (but lose no committed transactions). */
-UNIV_INTERN ulint srv_fast_shutdown = 0;
+uint srv_fast_shutdown;
-/* Generate a innodb_status.<pid> file */
-UNIV_INTERN ibool srv_innodb_status = FALSE;
+/** copy of innodb_status_file; generate a innodb_status.<pid> file */
+ibool srv_innodb_status;
-/* Optimize prefix index queries to skip cluster index lookup when possible */
-/* Enables or disables this prefix optimization. Disabled by default. */
-UNIV_INTERN my_bool srv_prefix_index_cluster_optimization = 0;
+/** innodb_prefix_index_cluster_optimization; whether to optimize
+prefix index queries to skip cluster index lookup when possible */
+my_bool srv_prefix_index_cluster_optimization;
-/* When estimating number of different key values in an index, sample
+/** innodb_stats_transient_sample_pages;
+When estimating number of different key values in an index, sample
this many index pages, there are 2 ways to calculate statistics:
* persistent stats that are calculated by ANALYZE TABLE and saved
in the innodb database.
* quick transient stats, that are used if persistent stats for the given
table/index are not found in the innodb database */
-UNIV_INTERN unsigned long long srv_stats_transient_sample_pages = 8;
-UNIV_INTERN my_bool srv_stats_persistent = TRUE;
-UNIV_INTERN my_bool srv_stats_include_delete_marked = FALSE;
-UNIV_INTERN unsigned long long srv_stats_persistent_sample_pages = 20;
-UNIV_INTERN my_bool srv_stats_auto_recalc = TRUE;
-
-/* The number of rows modified before we calculate new statistics (default 0
-= current limits) */
-UNIV_INTERN unsigned long long srv_stats_modified_counter = 0;
-
-/* Enable traditional statistic calculation based on number of configured
-pages default true. */
-UNIV_INTERN my_bool srv_stats_sample_traditional = TRUE;
-
-UNIV_INTERN ibool srv_use_doublewrite_buf = TRUE;
-
-/** doublewrite buffer is 1MB is size i.e.: it can hold 128 16K pages.
-The following parameter is the size of the buffer that is used for
-batch flushing i.e.: LRU flushing and flush_list flushing. The rest
-of the pages are used for single page flushing. */
-UNIV_INTERN ulong srv_doublewrite_batch_size = 120;
-
-UNIV_INTERN ulong srv_replication_delay = 0;
-
-/*-------------------------------------------*/
-#ifdef HAVE_MEMORY_BARRIER
-/* No idea to wait long with memory barriers */
-UNIV_INTERN ulong srv_n_spin_wait_rounds = 15;
-#else
-UNIV_INTERN ulong srv_n_spin_wait_rounds = 30;
-#endif
-UNIV_INTERN ulong srv_spin_wait_delay = 6;
-UNIV_INTERN ibool srv_priority_boost = TRUE;
-
-#ifdef UNIV_DEBUG
-UNIV_INTERN ibool srv_print_thread_releases = FALSE;
-UNIV_INTERN ibool srv_print_lock_waits = FALSE;
-UNIV_INTERN ibool srv_print_buf_io = FALSE;
-UNIV_INTERN ibool srv_print_log_io = FALSE;
-UNIV_INTERN ibool srv_print_latch_waits = FALSE;
-#endif /* UNIV_DEBUG */
-
-static ulint srv_n_rows_inserted_old = 0;
-static ulint srv_n_rows_updated_old = 0;
-static ulint srv_n_rows_deleted_old = 0;
-static ulint srv_n_rows_read_old = 0;
-static ulint srv_n_system_rows_inserted_old = 0;
-static ulint srv_n_system_rows_updated_old = 0;
-static ulint srv_n_system_rows_deleted_old = 0;
-static ulint srv_n_system_rows_read_old = 0;
-
-UNIV_INTERN ulint srv_truncated_status_writes = 0;
-UNIV_INTERN ulint srv_available_undo_logs = 0;
+unsigned long long srv_stats_transient_sample_pages;
+/** innodb_stats_persistent */
+my_bool srv_stats_persistent;
+/** innodb_stats_include_delete_marked */
+my_bool srv_stats_include_delete_marked;
+/** innodb_stats_persistent_sample_pages */
+unsigned long long srv_stats_persistent_sample_pages;
+/** innodb_stats_auto_recalc */
+my_bool srv_stats_auto_recalc;
+
+/** innodb_stats_modified_counter; The number of rows modified before
+we calculate new statistics (default 0 = current limits) */
+unsigned long long srv_stats_modified_counter;
+
+/** innodb_stats_traditional; enable traditional statistic calculation
+based on number of configured pages */
+my_bool srv_stats_sample_traditional;
+
+/** copy of innodb_doublewrite */
+ibool srv_use_doublewrite_buf;
+
+/** innodb_doublewrite_batch_size (a debug parameter) specifies the
+number of pages to use in LRU and flush_list batch flushing.
+The rest of the doublewrite buffer is used for single-page flushing. */
+ulong srv_doublewrite_batch_size = 120;
+
+/** innodb_replication_delay */
+ulong srv_replication_delay;
+
+/** innodb_sync_spin_loops */
+ulong srv_n_spin_wait_rounds;
+/** innodb_spin_wait_delay */
+uint srv_spin_wait_delay;
+
+static ulint srv_n_rows_inserted_old;
+static ulint srv_n_rows_updated_old;
+static ulint srv_n_rows_deleted_old;
+static ulint srv_n_rows_read_old;
+static ulint srv_n_system_rows_inserted_old;
+static ulint srv_n_system_rows_updated_old;
+static ulint srv_n_system_rows_deleted_old;
+static ulint srv_n_system_rows_read_old;
+
+ulint srv_truncated_status_writes;
+/** Number of initialized rollback segments for persistent undo log */
+ulong srv_available_undo_logs;
/* Defragmentation */
-UNIV_INTERN my_bool srv_defragment = FALSE;
-UNIV_INTERN uint srv_defragment_n_pages = 7;
-UNIV_INTERN uint srv_defragment_stats_accuracy = 0;
-UNIV_INTERN uint srv_defragment_fill_factor_n_recs = 20;
-UNIV_INTERN double srv_defragment_fill_factor = 0.9;
-UNIV_INTERN uint srv_defragment_frequency =
- SRV_DEFRAGMENT_FREQUENCY_DEFAULT;
-UNIV_INTERN ulonglong srv_defragment_interval = 0;
+UNIV_INTERN my_bool srv_defragment;
+/** innodb_defragment_n_pages */
+UNIV_INTERN uint srv_defragment_n_pages;
+UNIV_INTERN uint srv_defragment_stats_accuracy;
+/** innodb_defragment_fill_factor_n_recs */
+UNIV_INTERN uint srv_defragment_fill_factor_n_recs;
+/** innodb_defragment_fill_factor */
+UNIV_INTERN double srv_defragment_fill_factor;
+/** innodb_defragment_frequency */
+UNIV_INTERN uint srv_defragment_frequency;
+/** derived from innodb_defragment_frequency;
+@see innodb_defragment_frequency_update() */
+UNIV_INTERN ulonglong srv_defragment_interval;
+
+/** Current mode of operation */
+UNIV_INTERN enum srv_operation_mode srv_operation;
/* Set the following to 0 if you want InnoDB to write messages on
-stderr on startup/shutdown. */
-UNIV_INTERN ibool srv_print_verbose_log = TRUE;
-UNIV_INTERN my_bool srv_print_innodb_monitor = FALSE;
-UNIV_INTERN my_bool srv_print_innodb_lock_monitor = FALSE;
-UNIV_INTERN ibool srv_print_innodb_tablespace_monitor = FALSE;
-UNIV_INTERN ibool srv_print_innodb_table_monitor = FALSE;
+stderr on startup/shutdown. Not enabled on the embedded server. */
+ibool srv_print_verbose_log;
+my_bool srv_print_innodb_monitor;
+my_bool srv_print_innodb_lock_monitor;
+/** innodb_force_primary_key; whether to disallow CREATE TABLE without
+PRIMARY KEY */
+my_bool srv_force_primary_key;
-/** If this flag is set tables without primary key are not allowed */
-UNIV_INTERN my_bool srv_force_primary_key = FALSE;
+/** Key version to encrypt the temporary tablespace */
+my_bool innodb_encrypt_temporary_tables;
/* Array of English strings describing the current state of an
i/o handler thread */
-UNIV_INTERN const char* srv_io_thread_op_info[SRV_MAX_N_IO_THREADS];
-UNIV_INTERN const char* srv_io_thread_function[SRV_MAX_N_IO_THREADS];
+const char* srv_io_thread_op_info[SRV_MAX_N_IO_THREADS];
+const char* srv_io_thread_function[SRV_MAX_N_IO_THREADS];
-UNIV_INTERN time_t srv_last_monitor_time;
+static time_t srv_last_monitor_time;
static ib_mutex_t srv_innodb_monitor_mutex;
+/** Mutex protecting page_zip_stat_per_index */
+ib_mutex_t page_zip_stat_per_index_mutex;
+
/* Mutex for locking srv_monitor_file. Not created if srv_read_only_mode */
-UNIV_INTERN ib_mutex_t srv_monitor_file_mutex;
-
-#ifdef UNIV_PFS_MUTEX
-# ifndef HAVE_ATOMIC_BUILTINS
-/* Key to register server_mutex with performance schema */
-UNIV_INTERN mysql_pfs_key_t server_mutex_key;
-# endif /* !HAVE_ATOMIC_BUILTINS */
-/** Key to register srv_innodb_monitor_mutex with performance schema */
-UNIV_INTERN mysql_pfs_key_t srv_innodb_monitor_mutex_key;
-/** Key to register srv_monitor_file_mutex with performance schema */
-UNIV_INTERN mysql_pfs_key_t srv_monitor_file_mutex_key;
-/** Key to register srv_dict_tmpfile_mutex with performance schema */
-UNIV_INTERN mysql_pfs_key_t srv_dict_tmpfile_mutex_key;
-/** Key to register the mutex with performance schema */
-UNIV_INTERN mysql_pfs_key_t srv_misc_tmpfile_mutex_key;
-/** Key to register srv_sys_t::mutex with performance schema */
-UNIV_INTERN mysql_pfs_key_t srv_sys_mutex_key;
-/** Key to register srv_sys_t::tasks_mutex with performance schema */
-UNIV_INTERN mysql_pfs_key_t srv_sys_tasks_mutex_key;
-#endif /* UNIV_PFS_MUTEX */
+ib_mutex_t srv_monitor_file_mutex;
/** Temporary file for innodb monitor output */
-UNIV_INTERN FILE* srv_monitor_file;
-/** Mutex for locking srv_dict_tmpfile. Not created if srv_read_only_mode.
-This mutex has a very high rank; threads reserving it should not
-be holding any InnoDB latches. */
-UNIV_INTERN ib_mutex_t srv_dict_tmpfile_mutex;
-/** Temporary file for output from the data dictionary */
-UNIV_INTERN FILE* srv_dict_tmpfile;
+FILE* srv_monitor_file;
/** Mutex for locking srv_misc_tmpfile. Not created if srv_read_only_mode.
This mutex has a very low rank; threads reserving it should not
acquire any further latches or sleep before releasing this one. */
-UNIV_INTERN ib_mutex_t srv_misc_tmpfile_mutex;
+ib_mutex_t srv_misc_tmpfile_mutex;
/** Temporary file for miscellanous diagnostic output */
-UNIV_INTERN FILE* srv_misc_tmpfile;
+FILE* srv_misc_tmpfile;
-UNIV_INTERN ulint srv_main_thread_process_no = 0;
-UNIV_INTERN ulint srv_main_thread_id = 0;
+static ulint srv_main_thread_process_no;
+static ulint srv_main_thread_id;
/* The following counts are used by the srv_master_thread. */
/** Iterations of the loop bounded by 'srv_active' label. */
-static ulint srv_main_active_loops = 0;
+static ulint srv_main_active_loops;
/** Iterations of the loop bounded by the 'srv_idle' label. */
-static ulint srv_main_idle_loops = 0;
+static ulint srv_main_idle_loops;
/** Iterations of the loop bounded by the 'srv_shutdown' label. */
-static ulint srv_main_shutdown_loops = 0;
+static ulint srv_main_shutdown_loops;
/** Log writes involving flush. */
-static ulint srv_log_writes_and_flush = 0;
+static ulint srv_log_writes_and_flush;
/* This is only ever touched by the master thread. It records the
time when the last flush of log file has happened. The master
@@ -515,9 +498,6 @@ thread ensures that we flush the log files at least once per
second. */
static time_t srv_last_log_flush_time;
-/** Enable semaphore request instrumentation */
-UNIV_INTERN my_bool srv_instrument_semaphores = FALSE;
-
/* Interval in seconds at which various tasks are performed by the
master thread when server is active. In order to balance the workload,
we should try to keep intervals such that they are not multiple of
@@ -533,10 +513,10 @@ current_time % 5 != 0. */
# define SRV_MASTER_DICT_LRU_INTERVAL (47)
/** Simulate compression failures. */
-UNIV_INTERN uint srv_simulate_comp_failures = 0;
+UNIV_INTERN uint srv_simulate_comp_failures;
/** Buffer pool dump status frequence in percentages */
-UNIV_INTERN ulong srv_buf_dump_status_frequency = 0;
+UNIV_INTERN ulong srv_buf_dump_status_frequency;
/** Acquire the system_mutex. */
#define srv_sys_mutex_enter() do { \
@@ -641,7 +621,8 @@ struct srv_sys_t{
ulint n_sys_threads; /*!< size of the sys_threads
array */
- srv_slot_t sys_threads[32 + 1]; /*!< server thread table;
+ srv_slot_t
+ sys_threads[srv_max_purge_threads + 1]; /*!< server thread table;
os_event_set() and
os_event_reset() on
sys_threads[]->event are
@@ -649,45 +630,90 @@ struct srv_sys_t{
ulint n_threads_active[SRV_MASTER + 1];
/*!< number of threads active
- in a thread class */
+ in a thread class; protected
+ by both my_atomic_addlint()
+ and mutex */
srv_stats_t::ulint_ctr_1_t
activity_count; /*!< For tracking server
activity */
};
-#ifndef HAVE_ATOMIC_BUILTINS
-/** Mutex protecting some server global variables. */
-UNIV_INTERN ib_mutex_t server_mutex;
-#endif /* !HAVE_ATOMIC_BUILTINS */
-
static srv_sys_t srv_sys;
/** Event to signal srv_monitor_thread. Not protected by a mutex.
Set after setting srv_print_innodb_monitor. */
-UNIV_INTERN os_event_t srv_monitor_event;
+os_event_t srv_monitor_event;
/** Event to signal the shutdown of srv_error_monitor_thread.
Not protected by a mutex. */
-UNIV_INTERN os_event_t srv_error_event;
+os_event_t srv_error_event;
/** Event for waking up buf_dump_thread. Not protected by a mutex.
Set on shutdown or by buf_dump_start() or buf_load_start(). */
-UNIV_INTERN os_event_t srv_buf_dump_event;
+os_event_t srv_buf_dump_event;
+
+/** Event to signal the buffer pool resize thread */
+os_event_t srv_buf_resize_event;
/** The buffer pool dump/load file name */
-UNIV_INTERN char* srv_buf_dump_filename;
+char* srv_buf_dump_filename;
/** Boolean config knobs that tell InnoDB to dump the buffer pool at shutdown
and/or load it during startup. */
-UNIV_INTERN char srv_buffer_pool_dump_at_shutdown = FALSE;
-UNIV_INTERN char srv_buffer_pool_load_at_startup = FALSE;
-
-/** Slot index in the srv_sys.sys_threads array for the purge thread. */
-static const ulint SRV_PURGE_SLOT = 1;
+char srv_buffer_pool_dump_at_shutdown = TRUE;
+char srv_buffer_pool_load_at_startup = TRUE;
/** Slot index in the srv_sys.sys_threads array for the master thread. */
-static const ulint SRV_MASTER_SLOT = 0;
+#define SRV_MASTER_SLOT 0
+
+/** Slot index in the srv_sys.sys_threads array for the purge thread. */
+#define SRV_PURGE_SLOT 1
+
+/** Slot index in the srv_sys.sys_threads array from which purge workers start.
+ */
+#define SRV_WORKER_SLOTS_START 2
+
+#ifdef HAVE_PSI_STAGE_INTERFACE
+/** Performance schema stage event for monitoring ALTER TABLE progress
+everything after flush log_make_checkpoint(). */
+PSI_stage_info srv_stage_alter_table_end
+ = {0, "alter table (end)", PSI_FLAG_STAGE_PROGRESS};
+
+/** Performance schema stage event for monitoring ALTER TABLE progress
+log_make_checkpoint(). */
+PSI_stage_info srv_stage_alter_table_flush
+ = {0, "alter table (flush)", PSI_FLAG_STAGE_PROGRESS};
+
+/** Performance schema stage event for monitoring ALTER TABLE progress
+row_merge_insert_index_tuples(). */
+PSI_stage_info srv_stage_alter_table_insert
+ = {0, "alter table (insert)", PSI_FLAG_STAGE_PROGRESS};
+
+/** Performance schema stage event for monitoring ALTER TABLE progress
+row_log_apply(). */
+PSI_stage_info srv_stage_alter_table_log_index
+ = {0, "alter table (log apply index)", PSI_FLAG_STAGE_PROGRESS};
+
+/** Performance schema stage event for monitoring ALTER TABLE progress
+row_log_table_apply(). */
+PSI_stage_info srv_stage_alter_table_log_table
+ = {0, "alter table (log apply table)", PSI_FLAG_STAGE_PROGRESS};
+
+/** Performance schema stage event for monitoring ALTER TABLE progress
+row_merge_sort(). */
+PSI_stage_info srv_stage_alter_table_merge_sort
+ = {0, "alter table (merge sort)", PSI_FLAG_STAGE_PROGRESS};
+
+/** Performance schema stage event for monitoring ALTER TABLE progress
+row_merge_read_clustered_index(). */
+PSI_stage_info srv_stage_alter_table_read_pk_internal_sort
+ = {0, "alter table (read PK and internal sort)", PSI_FLAG_STAGE_PROGRESS};
+
+/** Performance schema stage event for monitoring buffer pool load progress. */
+PSI_stage_info srv_stage_buffer_pool_load
+ = {0, "buffer pool load", PSI_FLAG_STAGE_PROGRESS};
+#endif /* HAVE_PSI_STAGE_INTERFACE */
/*********************************************************************//**
Prints counters for work done by srv_master_thread. */
@@ -697,18 +723,17 @@ srv_print_master_thread_info(
/*=========================*/
FILE *file) /* in: output stream */
{
- fprintf(file, "srv_master_thread loops: %lu srv_active, "
- "%lu srv_shutdown, %lu srv_idle\n",
+ fprintf(file, "srv_master_thread loops: " ULINTPF " srv_active, "
+ ULINTPF " srv_shutdown, " ULINTPF " srv_idle\n"
+ "srv_master_thread log flush and writes: " ULINTPF "\n",
srv_main_active_loops,
srv_main_shutdown_loops,
- srv_main_idle_loops);
- fprintf(file, "srv_master_thread log flush and writes: %lu\n",
+ srv_main_idle_loops,
srv_log_writes_and_flush);
}
/*********************************************************************//**
Sets the info describing an i/o thread current state. */
-UNIV_INTERN
void
srv_set_io_thread_op_info(
/*======================*/
@@ -723,7 +748,6 @@ srv_set_io_thread_op_info(
/*********************************************************************//**
Resets the info describing an i/o thread current state. */
-UNIV_INTERN
void
srv_reset_io_thread_op_info()
/*=========================*/
@@ -772,7 +796,7 @@ srv_slot_get_type(
/*********************************************************************//**
Reserves a slot in the thread table for the current thread.
-@return reserved slot */
+@return reserved slot */
static
srv_slot_t*
srv_reserve_slot(
@@ -796,7 +820,7 @@ srv_reserve_slot(
case SRV_WORKER:
/* Find an empty slot, skip the master and purge slots. */
- for (slot = &srv_sys.sys_threads[2];
+ for (slot = &srv_sys.sys_threads[SRV_WORKER_SLOTS_START];
slot->in_use;
++slot) {
@@ -817,7 +841,7 @@ srv_reserve_slot(
ut_ad(srv_slot_get_type(slot) == type);
- ++srv_sys.n_threads_active[type];
+ my_atomic_addlint(&srv_sys.n_threads_active[type], 1);
srv_sys_mutex_exit();
@@ -828,7 +852,7 @@ srv_reserve_slot(
Suspends the calling thread to wait for the event in its thread slot.
@return the current signal count of the event. */
static
-ib_int64_t
+int64_t
srv_suspend_thread_low(
/*===================*/
srv_slot_t* slot) /*!< in/out: thread slot */
@@ -858,16 +882,15 @@ srv_suspend_thread_low(
case SRV_WORKER:
ut_a(srv_n_purge_threads > 1);
- ut_a(srv_sys.n_threads_active[type] > 0);
break;
}
ut_a(!slot->suspended);
slot->suspended = TRUE;
- ut_a(srv_sys.n_threads_active[type] > 0);
-
- srv_sys.n_threads_active[type]--;
+ if (my_atomic_addlint(&srv_sys.n_threads_active[type], -1) < 0) {
+ ut_error;
+ }
return(os_event_reset(slot->event));
}
@@ -876,14 +899,14 @@ srv_suspend_thread_low(
Suspends the calling thread to wait for the event in its thread slot.
@return the current signal count of the event. */
static
-ib_int64_t
+int64_t
srv_suspend_thread(
/*===============*/
srv_slot_t* slot) /*!< in/out: thread slot */
{
srv_sys_mutex_enter();
- ib_int64_t sig_count = srv_suspend_thread_low(slot);
+ int64_t sig_count = srv_suspend_thread_low(slot);
srv_sys_mutex_exit();
@@ -898,7 +921,7 @@ srv_suspend_thread(
@return whether the wait timed out */
static
bool
-srv_resume_thread(srv_slot_t* slot, ib_int64_t sig_count = 0, bool wait = true,
+srv_resume_thread(srv_slot_t* slot, int64_t sig_count = 0, bool wait = true,
ulint timeout_usec = 0)
{
bool timeout;
@@ -922,7 +945,7 @@ srv_resume_thread(srv_slot_t* slot, ib_int64_t sig_count = 0, bool wait = true,
ut_ad(slot->suspended);
slot->suspended = FALSE;
- ++srv_sys.n_threads_active[slot->type];
+ my_atomic_addlint(&srv_sys.n_threads_active[slot->type], 1);
srv_sys_mutex_exit();
return(timeout);
}
@@ -1010,46 +1033,45 @@ srv_free_slot(
srv_sys_mutex_exit();
}
-/*********************************************************************//**
-Initializes the server. */
-UNIV_INTERN
+/** Initialize the server. */
+static
void
-srv_init(void)
-/*==========*/
+srv_init()
{
-#ifndef HAVE_ATOMIC_BUILTINS
- mutex_create(server_mutex_key, &server_mutex, SYNC_ANY_LATCH);
-#endif /* !HAVE_ATOMIC_BUILTINS */
-
- mutex_create(srv_innodb_monitor_mutex_key,
- &srv_innodb_monitor_mutex, SYNC_NO_ORDER_CHECK);
+ mutex_create(LATCH_ID_SRV_INNODB_MONITOR, &srv_innodb_monitor_mutex);
srv_sys.n_sys_threads = srv_read_only_mode
? 0
: srv_n_purge_threads + 1/* purge coordinator */;
if (!srv_read_only_mode) {
+ mutex_create(LATCH_ID_SRV_SYS, &srv_sys.mutex);
- mutex_create(srv_sys_mutex_key, &srv_sys.mutex, SYNC_THREADS);
-
- mutex_create(srv_sys_tasks_mutex_key,
- &srv_sys.tasks_mutex, SYNC_ANY_LATCH);
+ mutex_create(LATCH_ID_SRV_SYS_TASKS, &srv_sys.tasks_mutex);
for (ulint i = 0; i < srv_sys.n_sys_threads; ++i) {
srv_slot_t* slot = &srv_sys.sys_threads[i];
- slot->event = os_event_create();
+ slot->event = os_event_create(0);
ut_a(slot->event);
}
- srv_error_event = os_event_create();
+ srv_error_event = os_event_create(0);
- srv_monitor_event = os_event_create();
+ srv_monitor_event = os_event_create(0);
- srv_buf_dump_event = os_event_create();
+ srv_buf_dump_event = os_event_create(0);
+
+ buf_flush_event = os_event_create("buf_flush_event");
+
+ UT_LIST_INIT(srv_sys.tasks, &que_thr_t::queue);
}
+ srv_buf_resize_event = os_event_create(0);
+
+ ut_d(srv_master_thread_disabled_event = os_event_create(0));
+
/* page_zip_stat_per_index_mutex is acquired from:
1. page_zip_compress() (after SYNC_FSP)
2. page_zip_decompress()
@@ -1057,22 +1079,19 @@ srv_init(void)
4. innodb_cmp_per_index_update(), no other latches
since we do not acquire any other latches while holding this mutex,
it can have very low level. We pick SYNC_ANY_LATCH for it. */
-
- mutex_create(
- page_zip_stat_per_index_mutex_key,
- &page_zip_stat_per_index_mutex, SYNC_ANY_LATCH);
+ mutex_create(LATCH_ID_PAGE_ZIP_STAT_PER_INDEX,
+ &page_zip_stat_per_index_mutex);
/* Create dummy indexes for infimum and supremum records */
dict_ind_init();
- srv_conc_init();
#ifdef WITH_INNODB_DISALLOW_WRITES
/* Writes have to be enabled on init or else we hang. Thus, we
always set the event here regardless of innobase_disallow_writes.
That flag will always be 0 at this point because it isn't settable
via my.cnf or command line arg. */
- srv_allow_writes_event = os_event_create();
+ srv_allow_writes_event = os_event_create(0);
os_event_set(srv_allow_writes_event);
#endif /* WITH_INNODB_DISALLOW_WRITES */
@@ -1086,40 +1105,38 @@ srv_init(void)
/*********************************************************************//**
Frees the data structures created in srv_init(). */
-UNIV_INTERN
void
srv_free(void)
/*==========*/
{
- srv_conc_free();
-
- /* The mutexes srv_sys.mutex and srv_sys.tasks_mutex should have
- been freed by sync_close() already. */
+ if (!srv_buf_resize_event) {
+ return;
+ }
- trx_i_s_cache_free(trx_i_s_cache);
+ mutex_free(&srv_innodb_monitor_mutex);
+ mutex_free(&page_zip_stat_per_index_mutex);
if (!srv_read_only_mode) {
- os_event_free(srv_buf_dump_event);
- srv_buf_dump_event = NULL;
+ mutex_free(&srv_sys.mutex);
+ mutex_free(&srv_sys.tasks_mutex);
+
+ for (ulint i = 0; i < srv_sys.n_sys_threads; ++i) {
+ os_event_destroy(srv_sys.sys_threads[i].event);
+ }
+
+ os_event_destroy(srv_error_event);
+ os_event_destroy(srv_monitor_event);
+ os_event_destroy(srv_buf_dump_event);
+ os_event_destroy(buf_flush_event);
}
-}
-/*********************************************************************//**
-Initializes the synchronization primitives, memory system, and the thread
-local storage. */
-UNIV_INTERN
-void
-srv_general_init(void)
-/*==================*/
-{
- ut_mem_init();
- /* Reset the system variables in the recovery module. */
- recv_sys_var_init();
- os_sync_init();
- sync_init();
- mem_init(srv_mem_pool_size);
- que_init();
- row_mysql_init();
+ os_event_destroy(srv_buf_resize_event);
+
+ ut_d(os_event_destroy(srv_master_thread_disabled_event));
+
+ dict_ind_free();
+
+ trx_i_s_cache_free(trx_i_s_cache);
}
/*********************************************************************//**
@@ -1129,29 +1146,17 @@ void
srv_normalize_init_values(void)
/*===========================*/
{
- ulint n;
- ulint i;
+ srv_sys_space.normalize();
- n = srv_n_data_files;
+ srv_tmp_space.normalize();
- for (i = 0; i < n; i++) {
- srv_data_file_sizes[i] = srv_data_file_sizes[i]
- * ((1024 * 1024) / UNIV_PAGE_SIZE);
- }
-
- srv_last_file_size_max = srv_last_file_size_max
- * ((1024 * 1024) / UNIV_PAGE_SIZE);
-
- srv_log_file_size = srv_log_file_size / UNIV_PAGE_SIZE;
-
- srv_log_buffer_size = srv_log_buffer_size / UNIV_PAGE_SIZE;
+ srv_log_buffer_size /= UNIV_PAGE_SIZE;
srv_lock_table_size = 5 * (srv_buf_pool_size / UNIV_PAGE_SIZE);
}
/*********************************************************************//**
Boots the InnoDB server. */
-UNIV_INTERN
void
srv_boot(void)
/*==========*/
@@ -1161,15 +1166,15 @@ srv_boot(void)
srv_normalize_init_values();
- /* Initialize synchronization primitives, memory management, and thread
- local storage */
-
- srv_general_init();
+ sync_check_init();
+ /* Reset the system variables in the recovery module. */
+ recv_sys_var_init();
+ trx_pool_init();
+ row_mysql_init();
/* Initialize this module */
srv_init();
- srv_mon_create();
}
/******************************************************************//**
@@ -1181,11 +1186,22 @@ srv_refresh_innodb_monitor_stats(void)
{
mutex_enter(&srv_innodb_monitor_mutex);
- srv_last_monitor_time = time(NULL);
+ time_t current_time = time(NULL);
+
+ if (difftime(current_time, srv_last_monitor_time) <= 60) {
+ /* We referesh InnoDB Monitor values so that averages are
+ printed from at most 60 last seconds */
+ mutex_exit(&srv_innodb_monitor_mutex);
+ return;
+ }
+
+ srv_last_monitor_time = current_time;
os_aio_refresh_stats();
+#ifdef BTR_CUR_HASH_ADAPT
btr_cur_n_sea_old = btr_cur_n_sea;
+#endif /* BTR_CUR_HASH_ADAPT */
btr_cur_n_non_sea_old = btr_cur_n_non_sea;
log_refresh_stats();
@@ -1209,7 +1225,6 @@ srv_refresh_innodb_monitor_stats(void)
Outputs to a file the output of the InnoDB Monitor.
@return FALSE if not all information printed
due to failure to obtain necessary mutex */
-UNIV_INTERN
ibool
srv_printf_innodb_monitor(
/*======================*/
@@ -1256,6 +1271,7 @@ srv_printf_innodb_monitor(
fputs("----------\n"
"SEMAPHORES\n"
"----------\n", file);
+
sync_print(file);
/* Conceptually, srv_innodb_monitor_mutex has a very high latching
@@ -1316,7 +1332,31 @@ srv_printf_innodb_monitor(
"-------------------------------------\n", file);
ibuf_print(file);
- ha_print_info(file, btr_search_sys->hash_index);
+#ifdef BTR_CUR_HASH_ADAPT
+ for (ulint i = 0; i < btr_ahi_parts; ++i) {
+ const hash_table_t* table = btr_search_sys->hash_tables[i];
+
+ ut_ad(table->magic_n == HASH_TABLE_MAGIC_N);
+ /* this is only used for buf_pool->page_hash */
+ ut_ad(!table->heaps);
+ /* this is used for the adaptive hash index */
+ ut_ad(table->heap);
+
+ const mem_heap_t* heap = table->heap;
+ /* The heap may change during the following call,
+ so the data displayed may be garbage. We intentionally
+ avoid acquiring btr_search_latches[] so that the
+ diagnostic output will not stop here even in case another
+ thread hangs while holding btr_search_latches[].
+
+ This should be safe from crashes, because
+ table->heap will be pointing to the same object
+ for the full lifetime of the server. Even during
+ btr_search_disable() the heap will stay valid. */
+ fprintf(file, "Hash table size " ULINTPF
+ ", node heap has " ULINTPF " buffer(s)\n",
+ table->n_cells, heap->base.count - !heap->free_block);
+ }
fprintf(file,
"%.2f hash searches/s, %.2f non-hash searches/s\n",
@@ -1325,6 +1365,12 @@ srv_printf_innodb_monitor(
(btr_cur_n_non_sea - btr_cur_n_non_sea_old)
/ time_elapsed);
btr_cur_n_sea_old = btr_cur_n_sea;
+#else /* BTR_CUR_HASH_ADAPT */
+ fprintf(file,
+ "%.2f non-hash searches/s\n",
+ (btr_cur_n_non_sea - btr_cur_n_non_sea_old)
+ / time_elapsed);
+#endif /* BTR_CUR_HASH_ADAPT */
btr_cur_n_non_sea_old = btr_cur_n_non_sea;
fputs("---\n"
@@ -1336,45 +1382,45 @@ srv_printf_innodb_monitor(
"BUFFER POOL AND MEMORY\n"
"----------------------\n", file);
fprintf(file,
- "Total memory allocated " ULINTPF
- "; in additional pool allocated " ULINTPF "\n",
- ut_total_allocated_memory,
- mem_pool_get_reserved(mem_comm_pool));
+ "Total large memory allocated " ULINTPF "\n"
+ "Dictionary memory allocated " ULINTPF "\n",
+ os_total_large_mem_allocated,
+ dict_sys_get_size());
buf_print_io(file);
fputs("--------------\n"
"ROW OPERATIONS\n"
"--------------\n", file);
- fprintf(file, "%ld queries inside InnoDB, %lu queries in queue\n",
- (long) srv_conc_get_active_threads(),
+ fprintf(file,
+ ULINTPF " queries inside InnoDB, "
+ ULINTPF " queries in queue\n",
+ srv_conc_get_active_threads(),
srv_conc_get_waiting_threads());
/* This is a dirty read, without holding trx_sys->mutex. */
- fprintf(file, "%lu read views open inside InnoDB\n",
- UT_LIST_GET_LEN(trx_sys->view_list));
+ fprintf(file, ULINTPF " read views open inside InnoDB\n",
+ trx_sys->mvcc->size());
n_reserved = fil_space_get_n_reserved_extents(0);
if (n_reserved > 0) {
fprintf(file,
- "%lu tablespace extents now reserved for"
+ ULINTPF " tablespace extents now reserved for"
" B-tree split operations\n",
- (ulong) n_reserved);
+ n_reserved);
}
-#ifdef UNIV_LINUX
- fprintf(file, "Main thread process no. %lu, id %lu, state: %s\n",
- (ulong) srv_main_thread_process_no,
- (ulong) srv_main_thread_id,
- srv_main_thread_op_info);
-#else
- fprintf(file, "Main thread id %lu, state: %s\n",
- (ulong) srv_main_thread_id,
+ fprintf(file,
+ "Process ID=" ULINTPF
+ ", Main thread ID=" ULINTPF
+ ", state: %s\n",
+ srv_main_thread_process_no,
+ srv_main_thread_id,
srv_main_thread_op_info);
-#endif
fprintf(file,
"Number of rows inserted " ULINTPF
- ", updated " ULINTPF ", deleted " ULINTPF
+ ", updated " ULINTPF
+ ", deleted " ULINTPF
", read " ULINTPF "\n",
(ulint) srv_stats.n_rows_inserted,
(ulint) srv_stats.n_rows_updated,
@@ -1430,7 +1476,6 @@ srv_printf_innodb_monitor(
/******************************************************************//**
Function to pass InnoDB status variables to MySQL */
-UNIV_INTERN
void
srv_export_innodb_status(void)
/*==========================*/
@@ -1522,6 +1567,7 @@ srv_export_innodb_status(void)
#else
export_vars.innodb_have_atomic_builtins = 0;
#endif
+
export_vars.innodb_page_size = UNIV_PAGE_SIZE;
export_vars.innodb_log_waits = srv_stats.log_waits;
@@ -1590,20 +1636,17 @@ srv_export_innodb_status(void)
export_vars.innodb_system_rows_deleted =
srv_stats.n_system_rows_deleted;
- export_vars.innodb_num_open_files = fil_n_file_opened;
+ export_vars.innodb_num_open_files = fil_system->n_open;
export_vars.innodb_truncated_status_writes =
srv_truncated_status_writes;
export_vars.innodb_available_undo_logs = srv_available_undo_logs;
export_vars.innodb_page_compression_saved = srv_stats.page_compression_saved;
- export_vars.innodb_page_compression_trim_sect512 = srv_stats.page_compression_trim_sect512;
- export_vars.innodb_page_compression_trim_sect4096 = srv_stats.page_compression_trim_sect4096;
export_vars.innodb_index_pages_written = srv_stats.index_pages_written;
export_vars.innodb_non_index_pages_written = srv_stats.non_index_pages_written;
export_vars.innodb_pages_page_compressed = srv_stats.pages_page_compressed;
export_vars.innodb_page_compressed_trim_op = srv_stats.page_compressed_trim_op;
- export_vars.innodb_page_compressed_trim_op_saved = srv_stats.page_compressed_trim_op_saved;
export_vars.innodb_pages_page_decompressed = srv_stats.pages_page_decompressed;
export_vars.innodb_pages_page_compression_error = srv_stats.pages_page_compression_error;
export_vars.innodb_pages_decrypted = srv_stats.pages_decrypted;
@@ -1613,6 +1656,12 @@ srv_export_innodb_status(void)
export_vars.innodb_n_rowlog_blocks_encrypted = srv_stats.n_rowlog_blocks_encrypted;
export_vars.innodb_n_rowlog_blocks_decrypted = srv_stats.n_rowlog_blocks_decrypted;
+ export_vars.innodb_n_temp_blocks_encrypted =
+ srv_stats.n_temp_blocks_encrypted;
+
+ export_vars.innodb_n_temp_blocks_decrypted =
+ srv_stats.n_temp_blocks_decrypted;
+
export_vars.innodb_defragment_compression_failures =
btr_defragment_compression_failures;
export_vars.innodb_defragment_failures = btr_defragment_failures;
@@ -1622,34 +1671,6 @@ srv_export_innodb_status(void)
export_vars.innodb_onlineddl_rowlog_pct_used = onlineddl_rowlog_pct_used;
export_vars.innodb_onlineddl_pct_progress = onlineddl_pct_progress;
-#ifdef UNIV_DEBUG
- rw_lock_s_lock(&purge_sys->latch);
- trx_id_t done_trx_no = purge_sys->done.trx_no;
- trx_id_t up_limit_id = purge_sys->view
- ? purge_sys->view->up_limit_id
- : 0;
- rw_lock_s_unlock(&purge_sys->latch);
-
- mutex_enter(&trx_sys->mutex);
- trx_id_t max_trx_id = trx_sys->rw_max_trx_id;
- mutex_exit(&trx_sys->mutex);
-
- if (!done_trx_no || max_trx_id < done_trx_no - 1) {
- export_vars.innodb_purge_trx_id_age = 0;
- } else {
- export_vars.innodb_purge_trx_id_age =
- (ulint) (max_trx_id - done_trx_no + 1);
- }
-
- if (!up_limit_id
- || max_trx_id < up_limit_id) {
- export_vars.innodb_purge_view_trx_id_age = 0;
- } else {
- export_vars.innodb_purge_view_trx_id_age =
- (ulint) (max_trx_id - up_limit_id);
- }
-#endif /* UNIV_DEBUG */
-
export_vars.innodb_sec_rec_cluster_reads =
srv_stats.n_sec_rec_cluster_reads;
export_vars.innodb_sec_rec_cluster_reads_avoided =
@@ -1683,6 +1704,7 @@ srv_export_innodb_status(void)
scrub_stat.page_split_failures_missing_index;
export_vars.innodb_scrub_page_split_failures_unknown =
scrub_stat.page_split_failures_unknown;
+ export_vars.innodb_scrub_log = srv_stats.n_log_scrubs;
}
mutex_exit(&srv_innodb_monitor_mutex);
@@ -1690,16 +1712,14 @@ srv_export_innodb_status(void)
/*********************************************************************//**
A thread which prints the info output by various InnoDB monitors.
-@return a dummy parameter */
-extern "C" UNIV_INTERN
+@return a dummy parameter */
+extern "C"
os_thread_ret_t
DECLARE_THREAD(srv_monitor_thread)(void*)
{
- ib_int64_t sig_count;
+ int64_t sig_count;
double time_elapsed;
time_t current_time;
- time_t last_table_monitor_time;
- time_t last_tablespace_monitor_time;
time_t last_monitor_time;
ulint mutex_skipped;
ibool last_srv_print_monitor;
@@ -1707,8 +1727,8 @@ DECLARE_THREAD(srv_monitor_thread)(void*)
ut_ad(!srv_read_only_mode);
#ifdef UNIV_DEBUG_THREAD_CREATION
- fprintf(stderr, "Lock timeout thread starts, id %lu\n",
- os_thread_pf(os_thread_get_curr_id()));
+ ib::info() << "Lock timeout thread starts, id "
+ << os_thread_pf(os_thread_get_curr_id());
#endif /* UNIV_DEBUG_THREAD_CREATION */
#ifdef UNIV_PFS_THREAD
@@ -1717,8 +1737,6 @@ DECLARE_THREAD(srv_monitor_thread)(void*)
current_time = time(NULL);
srv_last_monitor_time = current_time;
- last_table_monitor_time = current_time;
- last_tablespace_monitor_time = current_time;
last_monitor_time = current_time;
mutex_skipped = 0;
last_srv_print_monitor = srv_print_innodb_monitor;
@@ -1778,69 +1796,16 @@ loop:
os_file_set_eof(srv_monitor_file);
mutex_exit(&srv_monitor_file_mutex);
}
-
- if (srv_print_innodb_tablespace_monitor
- && difftime(current_time,
- last_tablespace_monitor_time) > 60) {
- last_tablespace_monitor_time = current_time;
-
- fputs("========================"
- "========================\n",
- stderr);
-
- ut_print_timestamp(stderr);
-
- fputs(" INNODB TABLESPACE MONITOR OUTPUT\n"
- "========================"
- "========================\n",
- stderr);
-
- fsp_print(0);
- fputs("Validating tablespace\n", stderr);
- fsp_validate(0);
- fputs("Validation ok\n"
- "---------------------------------------\n"
- "END OF INNODB TABLESPACE MONITOR OUTPUT\n"
- "=======================================\n",
- stderr);
- }
-
- if (srv_print_innodb_table_monitor
- && difftime(current_time, last_table_monitor_time) > 60) {
-
- last_table_monitor_time = current_time;
-
- fprintf(stderr, "Warning: %s\n",
- DEPRECATED_MSG_INNODB_TABLE_MONITOR);
-
- fputs("===========================================\n",
- stderr);
-
- ut_print_timestamp(stderr);
-
- fputs(" INNODB TABLE MONITOR OUTPUT\n"
- "===========================================\n",
- stderr);
- dict_print();
-
- fputs("-----------------------------------\n"
- "END OF INNODB TABLE MONITOR OUTPUT\n"
- "==================================\n",
- stderr);
-
- fprintf(stderr, "Warning: %s\n",
- DEPRECATED_MSG_INNODB_TABLE_MONITOR);
- }
}
+ srv_refresh_innodb_monitor_stats();
+
if (srv_shutdown_state != SRV_SHUTDOWN_NONE) {
goto exit_func;
}
if (srv_print_innodb_monitor
- || srv_print_innodb_lock_monitor
- || srv_print_innodb_tablespace_monitor
- || srv_print_innodb_table_monitor) {
+ || srv_print_innodb_lock_monitor) {
goto loop;
}
@@ -1852,7 +1817,7 @@ exit_func:
/* We count the number of threads in os_thread_exit(). A created
thread should always use that to exit and not use return() to exit. */
- os_thread_exit(NULL);
+ os_thread_exit();
OS_THREAD_DUMMY_RETURN;
}
@@ -1860,10 +1825,8 @@ exit_func:
/*********************************************************************//**
A thread which prints warnings about semaphore waits which have lasted
too long. These can be used to track bugs which cause hangs.
-Note: In order to make sync_arr_wake_threads_if_sema_free work as expected,
-we should avoid waiting any mutexes in this function!
-@return a dummy parameter */
-extern "C" UNIV_INTERN
+@return a dummy parameter */
+extern "C"
os_thread_ret_t
DECLARE_THREAD(srv_error_monitor_thread)(void*)
{
@@ -1871,7 +1834,7 @@ DECLARE_THREAD(srv_error_monitor_thread)(void*)
ulint fatal_cnt = 0;
lsn_t old_lsn;
lsn_t new_lsn;
- ib_int64_t sig_count;
+ int64_t sig_count;
/* longest waiting thread for a semaphore */
os_thread_id_t waiter = os_thread_get_curr_id();
os_thread_id_t old_waiter = waiter;
@@ -1884,8 +1847,8 @@ DECLARE_THREAD(srv_error_monitor_thread)(void*)
old_lsn = srv_start_lsn;
#ifdef UNIV_DEBUG_THREAD_CREATION
- fprintf(stderr, "Error monitor thread starts, id %lu\n",
- os_thread_pf(os_thread_get_curr_id()));
+ ib::info() << "Error monitor thread starts, id "
+ << os_thread_pf(os_thread_get_curr_id());
#endif /* UNIV_DEBUG_THREAD_CREATION */
#ifdef UNIV_PFS_THREAD
@@ -1898,41 +1861,24 @@ loop:
if (log_peek_lsn(&new_lsn)) {
if (new_lsn < old_lsn) {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Error: old log sequence number " LSN_PF
- " was greater\n"
- "InnoDB: than the new log sequence number " LSN_PF "!\n"
- "InnoDB: Please submit a bug report"
- " to https://jira.mariadb.org\n",
- old_lsn, new_lsn);
+ ib::error() << "Old log sequence number " << old_lsn << " was"
+ << " greater than the new log sequence number "
+ << new_lsn << ". Please submit a bug report to"
+ " https://jira.mariadb.org/";
ut_ad(0);
}
old_lsn = new_lsn;
}
- if (difftime(time(NULL), srv_last_monitor_time) > 60) {
- /* We referesh InnoDB Monitor values so that averages are
- printed from at most 60 last seconds */
-
- srv_refresh_innodb_monitor_stats();
- }
-
/* Update the statistics collected for deciding LRU
eviction policy. */
buf_LRU_stat_update();
- /* In case mutex_exit is not a memory barrier, it is
- theoretically possible some threads are left waiting though
- the semaphore is already released. Wake up those threads: */
-
- sync_arr_wake_threads_if_sema_free();
-
if (sync_array_print_long_waits(&waiter, &sema)
&& sema == old_sema && os_thread_eq(waiter, old_waiter)) {
#if defined(WITH_WSREP) && defined(WITH_INNODB_DISALLOW_WRITES)
- if (srv_allow_writes_event->is_set) {
+ if (os_event_is_set(srv_allow_writes_event)) {
#endif /* WITH_WSREP */
fatal_cnt++;
#if defined(WITH_WSREP) && defined(WITH_INNODB_DISALLOW_WRITES)
@@ -1941,20 +1887,15 @@ loop:
"WSREP: avoiding InnoDB self crash due to long "
"semaphore wait of > %lu seconds\n"
"Server is processing SST donor operation, "
- "fatal_cnt now: %lu",
- (ulong) srv_fatal_semaphore_wait_threshold, fatal_cnt);
+ "fatal_cnt now: " ULINTPF,
+ srv_fatal_semaphore_wait_threshold, fatal_cnt);
}
#endif /* WITH_WSREP */
if (fatal_cnt > 10) {
-
- fprintf(stderr,
- "InnoDB: Error: semaphore wait has lasted"
- " > %lu seconds\n"
- "InnoDB: We intentionally crash the server,"
- " because it appears to be hung.\n",
- (ulong) srv_fatal_semaphore_wait_threshold);
-
- ut_error;
+ ib::fatal() << "Semaphore wait has lasted > "
+ << srv_fatal_semaphore_wait_threshold
+ << " seconds. We intentionally crash the"
+ " server because it appears to be hung.";
}
} else {
fatal_cnt = 0;
@@ -1981,14 +1922,13 @@ loop:
/* We count the number of threads in os_thread_exit(). A created
thread should always use that to exit and not use return() to exit. */
- os_thread_exit(NULL);
+ os_thread_exit();
OS_THREAD_DUMMY_RETURN;
}
/******************************************************************//**
Increment the server activity count. */
-UNIV_INTERN
void
srv_inc_activity_count(void)
/*========================*/
@@ -2001,7 +1941,6 @@ Check whether any background thread is active. If so return the thread
type.
@return SRV_NONE if all are suspended or have exited, thread
type if any are still active. */
-UNIV_INTERN
srv_thread_type
srv_get_active_thread_type(void)
/*============================*/
@@ -2023,38 +1962,34 @@ srv_get_active_thread_type(void)
srv_sys_mutex_exit();
- /* Check only on shutdown. */
- if (ret == SRV_NONE
- && srv_shutdown_state != SRV_SHUTDOWN_NONE
- && trx_purge_state() != PURGE_STATE_DISABLED
- && trx_purge_state() != PURGE_STATE_EXIT) {
-
- ret = SRV_PURGE;
+ if (ret == SRV_NONE && srv_shutdown_state != SRV_SHUTDOWN_NONE
+ && purge_sys != NULL) {
+ /* Check only on shutdown. */
+ switch (trx_purge_state()) {
+ case PURGE_STATE_RUN:
+ case PURGE_STATE_STOP:
+ ret = SRV_PURGE;
+ break;
+ case PURGE_STATE_INIT:
+ case PURGE_STATE_DISABLED:
+ case PURGE_STATE_EXIT:
+ break;
+ }
}
return(ret);
}
-/*******************************************************************//**
-Tells the InnoDB server that there has been activity in the database
-and wakes up the master thread if it is suspended (not sleeping). Used
-in the MySQL interface. Note that there is a small chance that the master
-thread stays suspended (we do not protect our operation with the
-srv_sys_t->mutex, for performance reasons). */
-UNIV_INTERN
+/** Wake up the InnoDB master thread if it was suspended (not sleeping). */
void
-srv_active_wake_master_thread(void)
-/*===============================*/
+srv_active_wake_master_thread_low()
{
- if (srv_read_only_mode) {
- return;
- }
-
+ ut_ad(!srv_read_only_mode);
ut_ad(!srv_sys_mutex_own());
srv_inc_activity_count();
- if (srv_sys.n_threads_active[SRV_MASTER] == 0) {
+ if (my_atomic_loadlint(&srv_sys.n_threads_active[SRV_MASTER]) == 0) {
srv_slot_t* slot;
srv_sys_mutex_enter();
@@ -2072,37 +2007,25 @@ srv_active_wake_master_thread(void)
}
}
-/*******************************************************************//**
-Tells the purge thread that there has been activity in the database
-and wakes up the purge thread if it is suspended (not sleeping). Note
-that there is a small chance that the purge thread stays suspended
-(we do not protect our check with the srv_sys_t:mutex and the
-purge_sys->latch, for performance reasons). */
-UNIV_INTERN
+/** Wake up the purge threads if there is work to do. */
void
-srv_wake_purge_thread_if_not_active(void)
-/*=====================================*/
+srv_wake_purge_thread_if_not_active()
{
ut_ad(!srv_sys_mutex_own());
if (purge_sys->state == PURGE_STATE_RUN
- && srv_sys.n_threads_active[SRV_PURGE] == 0) {
+ && !my_atomic_loadlint(&srv_sys.n_threads_active[SRV_PURGE])
+ && my_atomic_loadlint(&trx_sys->rseg_history_len)) {
srv_release_threads(SRV_PURGE, 1);
}
}
-/*******************************************************************//**
-Wakes up the master thread if it is suspended or being suspended. */
-UNIV_INTERN
+/** Wake up the master thread if it is suspended or being suspended. */
void
-srv_wake_master_thread(void)
-/*========================*/
+srv_wake_master_thread()
{
- ut_ad(!srv_sys_mutex_own());
-
srv_inc_activity_count();
-
srv_release_threads(SRV_MASTER, 1);
}
@@ -2110,7 +2033,6 @@ srv_wake_master_thread(void)
Get current server activity count. We don't hold srv_sys::mutex while
reading this value as it is only used in heuristics.
@return activity count. */
-UNIV_INTERN
ulint
srv_get_activity_count(void)
/*========================*/
@@ -2121,7 +2043,6 @@ srv_get_activity_count(void)
/*******************************************************************//**
Check if there has been any activity.
@return FALSE if no change in activity counter. */
-UNIV_INTERN
ibool
srv_check_activity(
/*===============*/
@@ -2145,7 +2066,7 @@ srv_sync_log_buffer_in_background(void)
srv_main_thread_op_info = "flushing log";
if (difftime(current_time, srv_last_log_flush_time)
>= srv_flush_log_at_timeout) {
- log_buffer_sync_in_background(TRUE);
+ log_buffer_sync_in_background(true);
srv_last_log_flush_time = current_time;
srv_log_writes_and_flush++;
}
@@ -2196,25 +2117,74 @@ srv_shutdown_print_master_pending(
*last_print_time = current_time;
if (n_tables_to_drop) {
- ut_print_timestamp(stderr);
- fprintf(stderr, " InnoDB: Waiting for "
- "%lu table(s) to be dropped\n",
- (ulong) n_tables_to_drop);
+ ib::info() << "Waiting for " << n_tables_to_drop
+ << " table(s) to be dropped";
}
/* Check change buffer merge, we only wait for change buffer
merge if it is a slow shutdown */
if (!srv_fast_shutdown && n_bytes_merged) {
- ut_print_timestamp(stderr);
- fprintf(stderr, " InnoDB: Waiting for change "
- "buffer merge to complete\n"
- " InnoDB: number of bytes of change buffer "
- "just merged: %lu\n",
- n_bytes_merged);
+ ib::info() << "Waiting for change buffer merge to"
+ " complete number of bytes of change buffer"
+ " just merged: " << n_bytes_merged;
}
}
}
+#ifdef UNIV_DEBUG
+/** Waits in loop as long as master thread is disabled (debug) */
+static
+void
+srv_master_do_disabled_loop(void)
+{
+ if (!srv_master_thread_disabled_debug) {
+ /* We return here to avoid changing op_info. */
+ return;
+ }
+
+ srv_main_thread_op_info = "disabled";
+
+ while (srv_master_thread_disabled_debug) {
+ os_event_set(srv_master_thread_disabled_event);
+ if (srv_shutdown_state != SRV_SHUTDOWN_NONE) {
+ break;
+ }
+ os_thread_sleep(100000);
+ }
+
+ srv_main_thread_op_info = "";
+}
+
+/** Disables master thread. It's used by:
+ SET GLOBAL innodb_master_thread_disabled_debug = 1 (0).
+@param[in] thd thread handle
+@param[in] var pointer to system variable
+@param[out] var_ptr where the formal string goes
+@param[in] save immediate result from check function */
+void
+srv_master_thread_disabled_debug_update(
+ THD* thd,
+ struct st_mysql_sys_var* var,
+ void* var_ptr,
+ const void* save)
+{
+ /* This method is protected by mutex, as every SET GLOBAL .. */
+ ut_ad(srv_master_thread_disabled_event != NULL);
+
+ const bool disable = *static_cast<const my_bool*>(save);
+
+ const int64_t sig_count = os_event_reset(
+ srv_master_thread_disabled_event);
+
+ srv_master_thread_disabled_debug = disable;
+
+ if (disable) {
+ os_event_wait_low(
+ srv_master_thread_disabled_event, sig_count);
+ }
+}
+#endif /* UNIV_DEBUG */
+
/*********************************************************************//**
Perform the tasks that the master thread is supposed to do when the
server is active. There are two types of tasks. The first category is
@@ -2229,7 +2199,6 @@ srv_master_do_active_tasks(void)
{
time_t cur_time = time(NULL);
ulonglong counter_time = microsecond_interval_timer();
- ulint n_evicted = 0;
/* First do the tasks that we are suppose to do at each
invocation of this function. */
@@ -2246,6 +2215,8 @@ srv_master_do_active_tasks(void)
MONITOR_INC_TIME_IN_MICRO_SECS(
MONITOR_SRV_BACKGROUND_DROP_TABLE_MICROSECOND, counter_time);
+ ut_d(srv_master_do_disabled_loop());
+
if (srv_shutdown_state != SRV_SHUTDOWN_NONE) {
return;
}
@@ -2271,28 +2242,31 @@ srv_master_do_active_tasks(void)
/* Now see if various tasks that are performed at defined
intervals need to be performed. */
-#ifdef MEM_PERIODIC_CHECK
- /* Check magic numbers of every allocated mem block once in
- SRV_MASTER_MEM_VALIDATE_INTERVAL seconds */
- if (cur_time % SRV_MASTER_MEM_VALIDATE_INTERVAL == 0) {
- mem_validate_all_blocks();
- MONITOR_INC_TIME_IN_MICRO_SECS(
- MONITOR_SRV_MEM_VALIDATE_MICROSECOND, counter_time);
- }
-#endif
if (srv_shutdown_state != SRV_SHUTDOWN_NONE) {
return;
}
if (cur_time % SRV_MASTER_DICT_LRU_INTERVAL == 0) {
srv_main_thread_op_info = "enforcing dict cache limit";
- n_evicted = srv_master_evict_from_table_cache(50);
- MONITOR_INC_VALUE(
- MONITOR_SRV_DICT_LRU_EVICT_COUNT_ACTIVE, n_evicted);
+ ulint n_evicted = srv_master_evict_from_table_cache(50);
+ if (n_evicted != 0) {
+ MONITOR_INC_VALUE(
+ MONITOR_SRV_DICT_LRU_EVICT_COUNT_ACTIVE, n_evicted);
+ }
MONITOR_INC_TIME_IN_MICRO_SECS(
MONITOR_SRV_DICT_LRU_MICROSECOND, counter_time);
}
+ /* The periodic log_checkpoint() call here makes it harder to
+ reproduce bugs in crash recovery or mariabackup --prepare, or
+ in code that writes the redo log records. Omitting the call
+ here should not affect correctness, because log_free_check()
+ should still be invoking checkpoints when needed. In a
+ production server, those calls could cause "furious flushing"
+ and stall the server. Normally we want to perform checkpoints
+ early and often to avoid those situations. */
+ DBUG_EXECUTE_IF("ib_log_checkpoint_avoid", return;);
+
if (srv_shutdown_state != SRV_SHUTDOWN_NONE) {
return;
}
@@ -2300,7 +2274,7 @@ srv_master_do_active_tasks(void)
/* Make a new checkpoint */
if (cur_time % SRV_MASTER_CHECKPOINT_INTERVAL == 0) {
srv_main_thread_op_info = "making checkpoint";
- log_checkpoint(TRUE, FALSE);
+ log_checkpoint(true);
MONITOR_INC_TIME_IN_MICRO_SECS(
MONITOR_SRV_CHECKPOINT_MICROSECOND, counter_time);
}
@@ -2319,7 +2293,6 @@ void
srv_master_do_idle_tasks(void)
/*==========================*/
{
- ulint n_evicted = 0;
++srv_main_idle_loops;
MONITOR_INC(MONITOR_MASTER_IDLE_LOOPS);
@@ -2335,6 +2308,8 @@ srv_master_do_idle_tasks(void)
MONITOR_SRV_BACKGROUND_DROP_TABLE_MICROSECOND,
counter_time);
+ ut_d(srv_master_do_disabled_loop());
+
if (srv_shutdown_state != SRV_SHUTDOWN_NONE) {
return;
}
@@ -2356,9 +2331,11 @@ srv_master_do_idle_tasks(void)
}
srv_main_thread_op_info = "enforcing dict cache limit";
- n_evicted = srv_master_evict_from_table_cache(100);
- MONITOR_INC_VALUE(
- MONITOR_SRV_DICT_LRU_EVICT_COUNT_IDLE, n_evicted);
+ ulint n_evicted = srv_master_evict_from_table_cache(100);
+ if (n_evicted != 0) {
+ MONITOR_INC_VALUE(
+ MONITOR_SRV_DICT_LRU_EVICT_COUNT_IDLE, n_evicted);
+ }
MONITOR_INC_TIME_IN_MICRO_SECS(
MONITOR_SRV_DICT_LRU_MICROSECOND, counter_time);
@@ -2367,13 +2344,23 @@ srv_master_do_idle_tasks(void)
MONITOR_INC_TIME_IN_MICRO_SECS(
MONITOR_SRV_LOG_FLUSH_MICROSECOND, counter_time);
+ /* The periodic log_checkpoint() call here makes it harder to
+ reproduce bugs in crash recovery or mariabackup --prepare, or
+ in code that writes the redo log records. Omitting the call
+ here should not affect correctness, because log_free_check()
+ should still be invoking checkpoints when needed. In a
+ production server, those calls could cause "furious flushing"
+ and stall the server. Normally we want to perform checkpoints
+ early and often to avoid those situations. */
+ DBUG_EXECUTE_IF("ib_log_checkpoint_avoid", return;);
+
if (srv_shutdown_state != SRV_SHUTDOWN_NONE) {
return;
}
/* Make a new checkpoint */
srv_main_thread_op_info = "making checkpoint";
- log_checkpoint(TRUE, FALSE);
+ log_checkpoint(true);
MONITOR_INC_TIME_IN_MICRO_SECS(MONITOR_SRV_CHECKPOINT_MICROSECOND,
counter_time);
}
@@ -2432,8 +2419,8 @@ srv_master_sleep(void)
/*********************************************************************//**
The master thread controlling the server.
-@return a dummy parameter */
-extern "C" UNIV_INTERN
+@return a dummy parameter */
+extern "C"
os_thread_ret_t
DECLARE_THREAD(srv_master_thread)(
/*==============================*/
@@ -2442,6 +2429,7 @@ DECLARE_THREAD(srv_master_thread)(
os_thread_create */
{
my_thread_init();
+ DBUG_ENTER("srv_master_thread");
srv_slot_t* slot;
ulint old_activity_count = srv_get_activity_count();
@@ -2449,8 +2437,8 @@ DECLARE_THREAD(srv_master_thread)(
ut_ad(!srv_read_only_mode);
#ifdef UNIV_DEBUG_THREAD_CREATION
- fprintf(stderr, "Master thread starts, id %lu\n",
- os_thread_pf(os_thread_get_curr_id()));
+ ib::info() << "Master thread starts, id "
+ << os_thread_pf(os_thread_get_curr_id());
#endif /* UNIV_DEBUG_THREAD_CREATION */
#ifdef UNIV_PFS_THREAD
@@ -2464,10 +2452,6 @@ DECLARE_THREAD(srv_master_thread)(
ut_a(slot == srv_sys.sys_threads);
loop:
- if (srv_force_recovery >= SRV_FORCE_NO_BACKGROUND) {
- goto suspend_thread;
- }
-
while (srv_shutdown_state == SRV_SHUTDOWN_NONE) {
srv_master_sleep();
@@ -2482,7 +2466,6 @@ loop:
}
}
-suspend_thread:
switch (srv_shutdown_state) {
case SRV_SHUTDOWN_NONE:
break;
@@ -2499,7 +2482,7 @@ suspend_thread:
}
srv_suspend_thread(slot);
my_thread_end();
- os_thread_exit(NULL);
+ os_thread_exit();
}
srv_main_thread_op_info = "suspending";
@@ -2554,10 +2537,11 @@ srv_purge_should_exit(ulint n_purged)
/*********************************************************************//**
Fetch and execute a task from the work queue.
-@return true if a task was executed */
+@param [in,out] slot purge worker thread slot
+@return true if a task was executed */
static
bool
-srv_task_execute(void)
+srv_task_execute(ut_d(srv_slot_t *slot))
/*==================*/
{
que_thr_t* thr = NULL;
@@ -2573,17 +2557,18 @@ srv_task_execute(void)
ut_a(que_node_get_type(thr->child) == QUE_NODE_PURGE);
- UT_LIST_REMOVE(queue, srv_sys.tasks, thr);
+ UT_LIST_REMOVE(srv_sys.tasks, thr);
}
mutex_exit(&srv_sys.tasks_mutex);
if (thr != NULL) {
+ ut_d(thr->thread_slot = slot);
que_run_threads(thr);
- os_atomic_inc_ulint(
- &purge_sys->bh_mutex, &purge_sys->n_completed, 1);
+ my_atomic_addlint(
+ &purge_sys->n_completed, 1);
}
return(thr != NULL);
@@ -2591,8 +2576,8 @@ srv_task_execute(void)
/*********************************************************************//**
Worker thread that reads tasks from the work queue and executes them.
-@return a dummy parameter */
-extern "C" UNIV_INTERN
+@return a dummy parameter */
+extern "C"
os_thread_ret_t
DECLARE_THREAD(srv_worker_thread)(
/*==============================*/
@@ -2605,32 +2590,36 @@ DECLARE_THREAD(srv_worker_thread)(
ut_ad(!srv_read_only_mode);
ut_a(srv_force_recovery < SRV_FORCE_NO_BACKGROUND);
+ my_thread_init();
+ THD* thd = innobase_create_background_thd("InnoDB purge worker");
#ifdef UNIV_DEBUG_THREAD_CREATION
- ut_print_timestamp(stderr);
- fprintf(stderr, " InnoDB: worker thread starting, id %lu\n",
- os_thread_pf(os_thread_get_curr_id()));
+ ib::info() << "Worker thread starting, id "
+ << os_thread_pf(os_thread_get_curr_id());
#endif /* UNIV_DEBUG_THREAD_CREATION */
slot = srv_reserve_slot(SRV_WORKER);
- ut_a(srv_n_purge_threads > 1);
-
- srv_sys_mutex_enter();
-
- ut_a(srv_sys.n_threads_active[SRV_WORKER] < srv_n_purge_threads);
+#ifdef UNIV_DEBUG
+ UT_LIST_INIT(slot->debug_sync,
+ &srv_slot_t::debug_sync_t::debug_sync_list);
+ rw_lock_create(PFS_NOT_INSTRUMENTED, &slot->debug_sync_lock,
+ SYNC_NO_ORDER_CHECK);
+#endif
- srv_sys_mutex_exit();
+ ut_a(srv_n_purge_threads > 1);
+ ut_a(ulong(my_atomic_loadlint(&srv_sys.n_threads_active[SRV_WORKER]))
+ < srv_n_purge_threads);
/* We need to ensure that the worker threads exit after the
- purge coordinator thread. Otherwise the purge coordinaor can
+ purge coordinator thread. Otherwise the purge coordinator can
end up waiting forever in trx_purge_wait_for_workers_to_complete() */
do {
srv_suspend_thread(slot);
srv_resume_thread(slot);
- if (srv_task_execute()) {
+ if (srv_task_execute(ut_d(slot))) {
/* If there are tasks in the queue, wakeup
the purge coordinator thread. */
@@ -2648,33 +2637,34 @@ DECLARE_THREAD(srv_worker_thread)(
ut_a(!purge_sys->running);
ut_a(purge_sys->state == PURGE_STATE_EXIT);
- ut_a(srv_shutdown_state > SRV_SHUTDOWN_NONE);
rw_lock_x_unlock(&purge_sys->latch);
#ifdef UNIV_DEBUG_THREAD_CREATION
- ut_print_timestamp(stderr);
- fprintf(stderr, " InnoDB: Purge worker thread exiting, id %lu\n",
- os_thread_pf(os_thread_get_curr_id()));
+ ib::info() << "Purge worker thread exiting, id "
+ << os_thread_pf(os_thread_get_curr_id());
#endif /* UNIV_DEBUG_THREAD_CREATION */
+ innobase_destroy_background_thd(thd);
my_thread_end();
/* We count the number of threads in os_thread_exit(). A created
thread should always use that to exit and not use return() to exit. */
- os_thread_exit(NULL);
+ os_thread_exit();
OS_THREAD_DUMMY_RETURN; /* Not reached, avoid compiler warning */
}
-/*********************************************************************//**
-Do the actual purge operation.
+/** Do the actual purge operation.
+@param[in,out] n_total_purged total number of purged pages
+@param[in,out] slot purge coordinator thread slot
@return length of history list before the last purge batch. */
static
ulint
-srv_do_purge(
-/*=========*/
- ulint n_threads, /*!< in: number of threads to use */
- ulint* n_total_purged) /*!< in/out: total pages purged */
+srv_do_purge(ulint* n_total_purged
+#ifdef UNIV_DEBUG
+ , srv_slot_t *slot
+#endif
+)
{
ulint n_pages_purged;
@@ -2682,6 +2672,7 @@ srv_do_purge(
static ulint n_use_threads = 0;
static ulint rseg_history_len = 0;
ulint old_activity_count = srv_get_activity_count();
+ const ulint n_threads = srv_n_purge_threads;
ut_a(n_threads > 0);
ut_ad(!srv_read_only_mode);
@@ -2730,9 +2721,20 @@ srv_do_purge(
break;
}
+ ulint undo_trunc_freq =
+ purge_sys->undo_trunc.get_rseg_truncate_frequency();
+
+ ulint rseg_truncate_frequency = ut_min(
+ static_cast<ulint>(srv_purge_rseg_truncate_frequency),
+ undo_trunc_freq);
+
n_pages_purged = trx_purge(
n_use_threads, srv_purge_batch_size,
- (++count % TRX_SYS_N_RSEGS) == 0);
+ (++count % rseg_truncate_frequency) == 0
+#ifdef UNIV_DEBUG
+ , slot
+#endif
+ );
*n_total_purged += n_pages_purged;
} while (!srv_purge_should_exit(n_pages_purged)
@@ -2761,7 +2763,7 @@ srv_purge_coordinator_suspend(
/** Maximum wait time on the purge event, in micro-seconds. */
static const ulint SRV_PURGE_MAX_TIMEOUT = 10000;
- ib_int64_t sig_count = srv_suspend_thread(slot);
+ int64_t sig_count = srv_suspend_thread(slot);
do {
rw_lock_x_lock(&purge_sys->latch);
@@ -2816,8 +2818,8 @@ srv_purge_coordinator_suspend(
/*********************************************************************//**
Purge coordinator thread that schedules the purge tasks.
-@return a dummy parameter */
-extern "C" UNIV_INTERN
+@return a dummy parameter */
+extern "C"
os_thread_ret_t
DECLARE_THREAD(srv_purge_coordinator_thread)(
/*=========================================*/
@@ -2825,7 +2827,7 @@ DECLARE_THREAD(srv_purge_coordinator_thread)(
required by os_thread_create */
{
my_thread_init();
-
+ THD* thd = innobase_create_background_thd("InnoDB purge coordinator");
srv_slot_t* slot;
ulint n_total_purged = ULINT_UNDEFINED;
@@ -2846,13 +2848,18 @@ DECLARE_THREAD(srv_purge_coordinator_thread)(
#endif /* UNIV_PFS_THREAD */
#ifdef UNIV_DEBUG_THREAD_CREATION
- ut_print_timestamp(stderr);
- fprintf(stderr, " InnoDB: Purge coordinator thread created, id %lu\n",
- os_thread_pf(os_thread_get_curr_id()));
+ ib::info() << "Purge coordinator thread created, id "
+ << os_thread_pf(os_thread_get_curr_id());
#endif /* UNIV_DEBUG_THREAD_CREATION */
slot = srv_reserve_slot(SRV_PURGE);
+#ifdef UNIV_DEBUG
+ UT_LIST_INIT(slot->debug_sync,
+ &srv_slot_t::debug_sync_t::debug_sync_list);
+ rw_lock_create(PFS_NOT_INSTRUMENTED, &slot->debug_sync_lock,
+ SYNC_NO_ORDER_CHECK);
+#endif
ulint rseg_history_len = trx_sys->rseg_history_len;
do {
@@ -2875,8 +2882,11 @@ DECLARE_THREAD(srv_purge_coordinator_thread)(
n_total_purged = 0;
- rseg_history_len = srv_do_purge(
- srv_n_purge_threads, &n_total_purged);
+ rseg_history_len = srv_do_purge(&n_total_purged
+#ifdef UNIV_DEBUG
+ , slot
+#endif
+ );
} while (!srv_purge_should_exit(n_total_purged));
/* The task queue should always be empty, independent of fast
@@ -2890,25 +2900,42 @@ DECLARE_THREAD(srv_purge_coordinator_thread)(
purge_sys->state = PURGE_STATE_EXIT;
+ /* If there are any pending undo-tablespace truncate then clear
+ it off as we plan to shutdown the purge thread. */
+ purge_sys->undo_trunc.clear();
+
purge_sys->running = false;
+ /* Ensure that the wait in trx_purge_stop() will terminate. */
+ os_event_set(purge_sys->event);
+
rw_lock_x_unlock(&purge_sys->latch);
#ifdef UNIV_DEBUG_THREAD_CREATION
- ut_print_timestamp(stderr);
- fprintf(stderr, " InnoDB: Purge coordinator exiting, id %lu\n",
- os_thread_pf(os_thread_get_curr_id()));
+ ib::info() << "Purge coordinator exiting, id "
+ << os_thread_pf(os_thread_get_curr_id());
#endif /* UNIV_DEBUG_THREAD_CREATION */
/* Ensure that all the worker threads quit. */
- if (srv_n_purge_threads > 1) {
- srv_release_threads(SRV_WORKER, srv_n_purge_threads - 1);
+ if (ulint n_workers = srv_n_purge_threads - 1) {
+ const srv_slot_t* slot;
+ const srv_slot_t* const end = &srv_sys.sys_threads[
+ srv_sys.n_sys_threads];
+
+ do {
+ srv_release_threads(SRV_WORKER, n_workers);
+ srv_sys_mutex_enter();
+ for (slot = &srv_sys.sys_threads[2];
+ !slot++->in_use && slot < end; );
+ srv_sys_mutex_exit();
+ } while (slot < end);
}
+ innobase_destroy_background_thd(thd);
my_thread_end();
/* We count the number of threads in os_thread_exit(). A created
thread should always use that to exit and not use return() to exit. */
- os_thread_exit(NULL);
+ os_thread_exit();
OS_THREAD_DUMMY_RETURN; /* Not reached, avoid compiler warning */
}
@@ -2916,7 +2943,6 @@ DECLARE_THREAD(srv_purge_coordinator_thread)(
/**********************************************************************//**
Enqueues a task to server task queue and releases a worker thread, if there
is a suspended one. */
-UNIV_INTERN
void
srv_que_task_enqueue_low(
/*=====================*/
@@ -2925,7 +2951,7 @@ srv_que_task_enqueue_low(
ut_ad(!srv_read_only_mode);
mutex_enter(&srv_sys.tasks_mutex);
- UT_LIST_ADD_LAST(queue, srv_sys.tasks, thr);
+ UT_LIST_ADD_LAST(srv_sys.tasks, thr);
mutex_exit(&srv_sys.tasks_mutex);
@@ -2934,8 +2960,7 @@ srv_que_task_enqueue_low(
/**********************************************************************//**
Get count of tasks in the queue.
-@return number of tasks in queue */
-UNIV_INTERN
+@return number of tasks in queue */
ulint
srv_get_task_queue_length(void)
/*===========================*/
@@ -2954,14 +2979,17 @@ srv_get_task_queue_length(void)
}
/** Wake up the purge threads. */
-UNIV_INTERN
void
srv_purge_wakeup()
{
ut_ad(!srv_read_only_mode);
+ ut_ad(!sync_check_iterate(sync_check()));
- if (srv_force_recovery < SRV_FORCE_NO_BACKGROUND) {
+ if (srv_force_recovery >= SRV_FORCE_NO_BACKGROUND) {
+ return;
+ }
+ do {
srv_release_threads(SRV_PURGE, 1);
if (srv_n_purge_threads > 1) {
@@ -2969,21 +2997,83 @@ srv_purge_wakeup()
srv_release_threads(SRV_WORKER, n_workers);
}
+ } while (!my_atomic_loadptr_explicit(reinterpret_cast<void**>
+ (&srv_running),
+ MY_MEMORY_ORDER_RELAXED)
+ && (srv_sys.n_threads_active[SRV_WORKER]
+ || srv_sys.n_threads_active[SRV_PURGE]));
+}
+
+/** Shut down the purge threads. */
+void srv_purge_shutdown()
+{
+ do {
+ ut_ad(!srv_undo_sources);
+ srv_purge_wakeup();
+ } while (srv_sys.sys_threads[SRV_PURGE_SLOT].in_use);
+}
+
+/** Check if tablespace is being truncated.
+(Ignore system-tablespace as we don't re-create the tablespace
+and so some of the action that are suppressed by this function
+for independent tablespace are not applicable to system-tablespace).
+@param space_id space_id to check for truncate action
+@return true if being truncated, false if not being
+ truncated or tablespace is system-tablespace. */
+bool
+srv_is_tablespace_truncated(ulint space_id)
+{
+ if (is_system_tablespace(space_id)) {
+ return(false);
}
+
+ return(truncate_t::is_tablespace_truncated(space_id)
+ || undo::Truncate::is_tablespace_truncated(space_id));
+
}
-/** Check whether given space id is undo tablespace id
-@param[in] space_id space id to check
-@return true if it is undo tablespace else false. */
+/** Check if tablespace was truncated.
+@param[in] space space object to check for truncate action
+@return true if tablespace was truncated and we still have an active
+MLOG_TRUNCATE REDO log record. */
bool
-srv_is_undo_tablespace(
- ulint space_id)
+srv_was_tablespace_truncated(const fil_space_t* space)
{
- if (srv_undo_space_id_start == 0) {
- return (false);
+ if (space == NULL) {
+ ut_ad(0);
+ return(false);
}
- return(space_id >= srv_undo_space_id_start
- && space_id < (srv_undo_space_id_start
- + srv_undo_tablespaces_open));
+ return (!is_system_tablespace(space->id)
+ && truncate_t::was_tablespace_truncated(space->id));
}
+
+#ifdef UNIV_DEBUG
+static ulint get_first_slot(srv_thread_type type)
+{
+ switch (type) {
+ case SRV_MASTER:
+ return SRV_MASTER_SLOT;
+ case SRV_PURGE:
+ return SRV_PURGE_SLOT;
+ case SRV_WORKER:
+ /* Find an empty slot, skip the master and purge slots. */
+ return SRV_WORKER_SLOTS_START;
+ default:
+ ut_error;
+ }
+}
+
+void srv_for_each_thread(srv_thread_type type,
+ srv_slot_callback_t callback,
+ const void *arg)
+{
+ for (ulint slot_idx= get_first_slot(type);
+ slot_idx < srv_sys.n_sys_threads
+ && srv_sys.sys_threads[slot_idx].in_use
+ && srv_sys.sys_threads[slot_idx].type == type;
+ slot_idx++) {
+ callback(&srv_sys.sys_threads[slot_idx], arg);
+ }
+}
+#endif
diff --git a/storage/innobase/srv/srv0start.cc b/storage/innobase/srv/srv0start.cc
index a73e67e9f5b..41a1d42f649 100644
--- a/storage/innobase/srv/srv0start.cc
+++ b/storage/innobase/srv/srv0start.cc
@@ -39,8 +39,12 @@ Starts the InnoDB database server
Created 2/16/1996 Heikki Tuuri
*************************************************************************/
+#include "my_global.h"
+
#include "mysqld.h"
-#include "pars0pars.h"
+#include "mysql/psi/mysql_stage.h"
+#include "mysql/psi/psi.h"
+
#include "row0ftsort.h"
#include "ut0mem.h"
#include "mem0mem.h"
@@ -56,7 +60,7 @@ Created 2/16/1996 Heikki Tuuri
#include "fsp0fsp.h"
#include "rem0rec.h"
#include "mtr0mtr.h"
-#include "log0log.h"
+#include "log0crypt.h"
#include "log0recv.h"
#include "page0page.h"
#include "page0cur.h"
@@ -69,65 +73,64 @@ Created 2/16/1996 Heikki Tuuri
#include "srv0start.h"
#include "srv0srv.h"
#include "btr0defragment.h"
+#include "row0trunc.h"
#include "mysql/service_wsrep.h" /* wsrep_recovery */
-
-#ifndef UNIV_HOTBACKUP
-# include "trx0rseg.h"
-# include "os0proc.h"
-# include "sync0sync.h"
-# include "buf0flu.h"
-# include "buf0rea.h"
-# include "buf0mtflu.h"
-# include "dict0boot.h"
-# include "dict0load.h"
-# include "dict0stats_bg.h"
-# include "que0que.h"
-# include "usr0sess.h"
-# include "lock0lock.h"
-# include "trx0roll.h"
-# include "trx0purge.h"
-# include "lock0lock.h"
-# include "pars0pars.h"
-# include "btr0sea.h"
-# include "rem0cmp.h"
-# include "dict0crea.h"
-# include "row0ins.h"
-# include "row0sel.h"
-# include "row0upd.h"
-# include "row0row.h"
-# include "row0mysql.h"
-# include "btr0pcur.h"
-# include "os0sync.h"
-# include "zlib.h"
-# include "ut0crc32.h"
-# include "btr0scrub.h"
+#include "trx0rseg.h"
+#include "os0proc.h"
+#include "buf0flu.h"
+#include "buf0rea.h"
+#include "buf0mtflu.h"
+#include "dict0boot.h"
+#include "dict0load.h"
+#include "dict0stats_bg.h"
+#include "que0que.h"
+#include "lock0lock.h"
+#include "trx0roll.h"
+#include "trx0purge.h"
+#include "lock0lock.h"
+#include "pars0pars.h"
+#include "btr0sea.h"
+#include "rem0cmp.h"
+#include "dict0crea.h"
+#include "row0ins.h"
+#include "row0sel.h"
+#include "row0upd.h"
+#include "row0row.h"
+#include "row0mysql.h"
+#include "row0trunc.h"
+#include "btr0pcur.h"
+#include "os0event.h"
+#include "zlib.h"
+#include "ut0crc32.h"
+#include "btr0scrub.h"
/** Log sequence number immediately after startup */
-UNIV_INTERN lsn_t srv_start_lsn;
+lsn_t srv_start_lsn;
/** Log sequence number at shutdown */
-UNIV_INTERN lsn_t srv_shutdown_lsn;
-
-#ifdef HAVE_DARWIN_THREADS
-# include <sys/utsname.h>
-/** TRUE if the F_FULLFSYNC option is available */
-UNIV_INTERN ibool srv_have_fullfsync = FALSE;
-#endif
+lsn_t srv_shutdown_lsn;
/** TRUE if a raw partition is in use */
-UNIV_INTERN ibool srv_start_raw_disk_in_use = FALSE;
+ibool srv_start_raw_disk_in_use;
+
+/** Number of IO threads to use */
+ulint srv_n_file_io_threads;
/** UNDO tablespaces starts with space id. */
ulint srv_undo_space_id_start;
/** TRUE if the server is being started, before rolling back any
incomplete transactions */
-UNIV_INTERN ibool srv_startup_is_before_trx_rollback_phase = FALSE;
+bool srv_startup_is_before_trx_rollback_phase;
/** TRUE if the server is being started */
-UNIV_INTERN ibool srv_is_being_started = FALSE;
+bool srv_is_being_started;
+/** TRUE if SYS_TABLESPACES is available for lookups */
+bool srv_sys_tablespaces_open;
/** TRUE if the server was successfully started */
-UNIV_INTERN ibool srv_was_started = FALSE;
+bool srv_was_started;
+/** The original value of srv_log_file_size (innodb_log_file_size) */
+static ulonglong srv_log_file_size_requested;
/** TRUE if innobase_start_or_create_for_mysql() has been called */
-static ibool srv_start_has_been_called;
+static bool srv_start_has_been_called;
/** Whether any undo log records can be generated */
UNIV_INTERN bool srv_undo_sources;
@@ -135,14 +138,42 @@ UNIV_INTERN bool srv_undo_sources;
#ifdef UNIV_DEBUG
/** InnoDB system tablespace to set during recovery */
UNIV_INTERN uint srv_sys_space_size_debug;
+/** whether redo log files have been created at startup */
+UNIV_INTERN bool srv_log_files_created;
#endif /* UNIV_DEBUG */
+/** Bit flags for tracking background thread creation. They are used to
+determine which threads need to be stopped if we need to abort during
+the initialisation step. */
+enum srv_start_state_t {
+ /** No thread started */
+ SRV_START_STATE_NONE = 0, /*!< No thread started */
+ /** lock_wait_timeout_thread started */
+ SRV_START_STATE_LOCK_SYS = 1, /*!< Started lock-timeout
+ thread. */
+ /** buf_flush_page_cleaner_coordinator,
+ buf_flush_page_cleaner_worker started */
+ SRV_START_STATE_IO = 2,
+ /** srv_error_monitor_thread, srv_monitor_thread started */
+ SRV_START_STATE_MONITOR = 4,
+ /** srv_master_thread started */
+ SRV_START_STATE_MASTER = 8,
+ /** srv_purge_coordinator_thread, srv_worker_thread started */
+ SRV_START_STATE_PURGE = 16,
+ /** fil_crypt_thread, btr_defragment_thread started
+ (all background threads that can generate redo log but not undo log */
+ SRV_START_STATE_REDO = 32
+};
+
+/** Track server thrd starting phases */
+static ulint srv_start_state;
+
/** At a shutdown this value climbs from SRV_SHUTDOWN_NONE to
SRV_SHUTDOWN_CLEANUP and then to SRV_SHUTDOWN_LAST_PHASE, and so on */
-UNIV_INTERN enum srv_shutdown_state srv_shutdown_state = SRV_SHUTDOWN_NONE;
+enum srv_shutdown_t srv_shutdown_state = SRV_SHUTDOWN_NONE;
/** Files comprising the system tablespace */
-static pfs_os_file_t files[1000];
+pfs_os_file_t files[1000];
/** io_handler_thread parameters for thread identification */
static ulint n[SRV_MAX_N_IO_THREADS + 6];
@@ -155,115 +186,55 @@ void *mtflush_ctx=NULL;
/** Thead handles */
static os_thread_t thread_handles[SRV_MAX_N_IO_THREADS + 6 + 32];
-static os_thread_t buf_flush_page_cleaner_thread_handle;
static os_thread_t buf_dump_thread_handle;
static os_thread_t dict_stats_thread_handle;
/** Status variables, is thread started ?*/
static bool thread_started[SRV_MAX_N_IO_THREADS + 6 + 32] = {false};
-static bool buf_flush_page_cleaner_thread_started = false;
-static bool buf_dump_thread_started = false;
-static bool dict_stats_thread_started = false;
-
-/** We use this mutex to test the return value of pthread_mutex_trylock
- on successful locking. HP-UX does NOT return 0, though Linux et al do. */
-static os_fast_mutex_t srv_os_test_mutex;
-
/** Name of srv_monitor_file */
static char* srv_monitor_file_name;
-#endif /* !UNIV_HOTBACKUP */
-/** Default undo tablespace size in UNIV_PAGEs count (10MB). */
-static const ulint SRV_UNDO_TABLESPACE_SIZE_IN_PAGES =
- ((1024 * 1024) * 10) / UNIV_PAGE_SIZE_DEF;
+/** Minimum expected tablespace size. (10M) */
+static const ulint MIN_EXPECTED_TABLESPACE_SIZE = 5 * 1024 * 1024;
/** */
-#define SRV_N_PENDING_IOS_PER_THREAD OS_AIO_N_PENDING_IOS_PER_THREAD
#define SRV_MAX_N_PENDING_SYNC_IOS 100
#ifdef UNIV_PFS_THREAD
/* Keys to register InnoDB threads with performance schema */
-UNIV_INTERN mysql_pfs_key_t io_handler_thread_key;
-UNIV_INTERN mysql_pfs_key_t srv_lock_timeout_thread_key;
-UNIV_INTERN mysql_pfs_key_t srv_error_monitor_thread_key;
-UNIV_INTERN mysql_pfs_key_t srv_monitor_thread_key;
-UNIV_INTERN mysql_pfs_key_t srv_master_thread_key;
-UNIV_INTERN mysql_pfs_key_t srv_purge_thread_key;
+mysql_pfs_key_t buf_dump_thread_key;
+mysql_pfs_key_t dict_stats_thread_key;
+mysql_pfs_key_t io_handler_thread_key;
+mysql_pfs_key_t io_ibuf_thread_key;
+mysql_pfs_key_t io_log_thread_key;
+mysql_pfs_key_t io_read_thread_key;
+mysql_pfs_key_t io_write_thread_key;
+mysql_pfs_key_t srv_error_monitor_thread_key;
+mysql_pfs_key_t srv_lock_timeout_thread_key;
+mysql_pfs_key_t srv_master_thread_key;
+mysql_pfs_key_t srv_monitor_thread_key;
+mysql_pfs_key_t srv_purge_thread_key;
+mysql_pfs_key_t srv_worker_thread_key;
#endif /* UNIV_PFS_THREAD */
-/** Innobase start-up aborted. Perform cleanup actions.
-@param[in] create_new_db TRUE if new db is being created
-@param[in] file File name
-@param[in] line Line number
-@param[in] err Reason for aborting InnoDB startup
-@return DB_SUCCESS or error code. */
-static
-dberr_t
-srv_init_abort(
- bool create_new_db,
- const char* file,
- ulint line,
- dberr_t err)
+#ifdef HAVE_PSI_STAGE_INTERFACE
+/** Array of all InnoDB stage events for monitoring activities via
+performance schema. */
+static PSI_stage_info* srv_stages[] =
{
- if (create_new_db) {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Database creation was aborted"
- " at %s [" ULINTPF "]"
- " with error %s. You may need"
- " to delete the ibdata1 file before trying to start"
- " up again.",
- file, line, ut_strerr(err));
- } else {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Plugin initialization aborted"
- " at %s [" ULINTPF "]"
- " with error %s.",
- file, line, ut_strerr(err));
- }
-
- return(err);
-}
-
-/*********************************************************************//**
-Convert a numeric string that optionally ends in G or M or K, to a number
-containing megabytes.
-@return next character in string */
-static
-char*
-srv_parse_megabytes(
-/*================*/
- char* str, /*!< in: string containing a quantity in bytes */
- ulint* megs) /*!< out: the number in megabytes */
-{
- char* endp;
- ulint size;
-
- size = strtoul(str, &endp, 10);
-
- str = endp;
-
- switch (*str) {
- case 'G': case 'g':
- size *= 1024;
- /* fall through */
- case 'M': case 'm':
- str++;
- break;
- case 'K': case 'k':
- size /= 1024;
- str++;
- break;
- default:
- size /= 1024 * 1024;
- break;
- }
-
- *megs = size;
- return(str);
-}
+ &srv_stage_alter_table_end,
+ &srv_stage_alter_table_flush,
+ &srv_stage_alter_table_insert,
+ &srv_stage_alter_table_log_index,
+ &srv_stage_alter_table_log_table,
+ &srv_stage_alter_table_merge_sort,
+ &srv_stage_alter_table_read_pk_internal_sort,
+ &srv_stage_buffer_pool_load,
+};
+#endif /* HAVE_PSI_STAGE_INTERFACE */
/*********************************************************************//**
Check if a file can be opened in read-write mode.
-@return true if it doesn't exist or can be opened in rw mode. */
+@return true if it doesn't exist or can be opened in rw mode. */
static
bool
srv_file_check_mode(
@@ -274,14 +245,12 @@ srv_file_check_mode(
memset(&stat, 0x0, sizeof(stat));
- dberr_t err = os_file_get_status(name, &stat, true);
+ dberr_t err = os_file_get_status(
+ name, &stat, true, srv_read_only_mode);
if (err == DB_FAIL) {
-
- ib_logf(IB_LOG_LEVEL_ERROR,
- "os_file_get_status() failed on '%s'. Can't determine "
- "file permissions", name);
-
+ ib::error() << "os_file_get_status() failed on '" << name
+ << "'. Can't determine file permissions.";
return(false);
} else if (err == DB_SUCCESS) {
@@ -291,20 +260,15 @@ srv_file_check_mode(
if (stat.type == OS_FILE_TYPE_FILE) {
if (!stat.rw_perm) {
-
- ib_logf(IB_LOG_LEVEL_ERROR,
- "%s can't be opened in %s mode",
- name,
- srv_read_only_mode
- ? "read" : "read-write");
-
+ const char* mode = srv_read_only_mode
+ ? "read" : "read-write";
+ ib::error() << name << " can't be opened in "
+ << mode << " mode.";
return(false);
}
} else {
/* Not a regular file, bail out. */
-
- ib_logf(IB_LOG_LEVEL_ERROR,
- "'%s' not a regular file.", name);
+ ib::error() << "'" << name << "' not a regular file.";
return(false);
}
@@ -319,217 +283,10 @@ srv_file_check_mode(
return(true);
}
-/*********************************************************************//**
-Reads the data files and their sizes from a character string given in
-the .cnf file.
-@return TRUE if ok, FALSE on parse error */
-UNIV_INTERN
-ibool
-srv_parse_data_file_paths_and_sizes(
-/*================================*/
- char* str) /*!< in/out: the data file path string */
-{
- char* input_str;
- char* path;
- ulint size;
- ulint i = 0;
-
- srv_auto_extend_last_data_file = FALSE;
- srv_last_file_size_max = 0;
- srv_data_file_names = NULL;
- srv_data_file_sizes = NULL;
- srv_data_file_is_raw_partition = NULL;
-
- input_str = str;
-
- /* First calculate the number of data files and check syntax:
- path:size[M | G];path:size[M | G]... . Note that a Windows path may
- contain a drive name and a ':'. */
-
- while (*str != '\0') {
- path = str;
-
- while ((*str != ':' && *str != '\0')
- || (*str == ':'
- && (*(str + 1) == '\\' || *(str + 1) == '/'
- || *(str + 1) == ':'))) {
- str++;
- }
-
- if (*str == '\0') {
- return(FALSE);
- }
-
- str++;
-
- str = srv_parse_megabytes(str, &size);
-
- if (0 == strncmp(str, ":autoextend",
- (sizeof ":autoextend") - 1)) {
-
- str += (sizeof ":autoextend") - 1;
-
- if (0 == strncmp(str, ":max:",
- (sizeof ":max:") - 1)) {
-
- str += (sizeof ":max:") - 1;
-
- str = srv_parse_megabytes(str, &size);
- }
-
- if (*str != '\0') {
-
- return(FALSE);
- }
- }
-
- if (strlen(str) >= 6
- && *str == 'n'
- && *(str + 1) == 'e'
- && *(str + 2) == 'w') {
- str += 3;
- }
-
- if (*str == 'r' && *(str + 1) == 'a' && *(str + 2) == 'w') {
- str += 3;
- }
-
- if (size == 0) {
- return(FALSE);
- }
-
- i++;
-
- if (*str == ';') {
- str++;
- } else if (*str != '\0') {
-
- return(FALSE);
- }
- }
-
- if (i == 0) {
- /* If innodb_data_file_path was defined it must contain
- at least one data file definition */
-
- return(FALSE);
- }
-
- srv_data_file_names = static_cast<char**>(
- malloc(i * sizeof *srv_data_file_names));
-
- srv_data_file_sizes = static_cast<ulint*>(
- malloc(i * sizeof *srv_data_file_sizes));
-
- srv_data_file_is_raw_partition = static_cast<ulint*>(
- malloc(i * sizeof *srv_data_file_is_raw_partition));
-
- srv_n_data_files = i;
-
- /* Then store the actual values to our arrays */
-
- str = input_str;
- i = 0;
-
- while (*str != '\0') {
- path = str;
-
- /* Note that we must step over the ':' in a Windows path;
- a Windows path normally looks like C:\ibdata\ibdata1:1G, but
- a Windows raw partition may have a specification like
- \\.\C::1Gnewraw or \\.\PHYSICALDRIVE2:1Gnewraw */
-
- while ((*str != ':' && *str != '\0')
- || (*str == ':'
- && (*(str + 1) == '\\' || *(str + 1) == '/'
- || *(str + 1) == ':'))) {
- str++;
- }
-
- if (*str == ':') {
- /* Make path a null-terminated string */
- *str = '\0';
- str++;
- }
-
- str = srv_parse_megabytes(str, &size);
-
- srv_data_file_names[i] = path;
- srv_data_file_sizes[i] = size;
-
- if (0 == strncmp(str, ":autoextend",
- (sizeof ":autoextend") - 1)) {
-
- srv_auto_extend_last_data_file = TRUE;
-
- str += (sizeof ":autoextend") - 1;
-
- if (0 == strncmp(str, ":max:",
- (sizeof ":max:") - 1)) {
-
- str += (sizeof ":max:") - 1;
-
- str = srv_parse_megabytes(
- str, &srv_last_file_size_max);
- }
-
- if (*str != '\0') {
-
- return(FALSE);
- }
- }
-
- (srv_data_file_is_raw_partition)[i] = 0;
-
- if (strlen(str) >= 6
- && *str == 'n'
- && *(str + 1) == 'e'
- && *(str + 2) == 'w') {
- str += 3;
- /* Initialize new raw device only during bootstrap */
- (srv_data_file_is_raw_partition)[i] = SRV_NEW_RAW;
- }
-
- if (*str == 'r' && *(str + 1) == 'a' && *(str + 2) == 'w') {
- str += 3;
-
- /* Initialize new raw device only during bootstrap */
- if ((srv_data_file_is_raw_partition)[i] == 0) {
- (srv_data_file_is_raw_partition)[i] = SRV_NEW_RAW;
- }
- }
-
- i++;
-
- if (*str == ';') {
- str++;
- }
- }
-
- return(TRUE);
-}
-
-/*********************************************************************//**
-Frees the memory allocated by srv_parse_data_file_paths_and_sizes()
-and srv_parse_log_group_home_dirs(). */
-UNIV_INTERN
-void
-srv_free_paths_and_sizes(void)
-/*==========================*/
-{
- free(srv_data_file_names);
- srv_data_file_names = NULL;
- free(srv_data_file_sizes);
- srv_data_file_sizes = NULL;
- free(srv_data_file_is_raw_partition);
- srv_data_file_is_raw_partition = NULL;
-}
-
-#ifndef UNIV_HOTBACKUP
/********************************************************************//**
I/o-handler thread function.
-@return OS_THREAD_DUMMY_RETURN */
-extern "C" UNIV_INTERN
+@return OS_THREAD_DUMMY_RETURN */
+extern "C"
os_thread_ret_t
DECLARE_THREAD(io_handler_thread)(
/*==============================*/
@@ -541,16 +298,37 @@ DECLARE_THREAD(io_handler_thread)(
segment = *((ulint*) arg);
#ifdef UNIV_DEBUG_THREAD_CREATION
- ib_logf(IB_LOG_LEVEL_INFO,
- "Io handler thread %lu starts, id %lu\n", segment,
- os_thread_pf(os_thread_get_curr_id()));
+ ib::info() << "Io handler thread " << segment << " starts, id "
+ << os_thread_pf(os_thread_get_curr_id());
#endif
-#ifdef UNIV_PFS_THREAD
- pfs_register_thread(io_handler_thread_key);
-#endif /* UNIV_PFS_THREAD */
+ /* For read only mode, we don't need ibuf and log I/O thread.
+ Please see innobase_start_or_create_for_mysql() */
+ ulint start = (srv_read_only_mode) ? 0 : 2;
+
+ if (segment < start) {
+ if (segment == 0) {
+ pfs_register_thread(io_ibuf_thread_key);
+ } else {
+ ut_ad(segment == 1);
+ pfs_register_thread(io_log_thread_key);
+ }
+ } else if (segment >= start
+ && segment < (start + srv_n_read_io_threads)) {
+ pfs_register_thread(io_read_thread_key);
- while (srv_shutdown_state != SRV_SHUTDOWN_EXIT_THREADS) {
+ } else if (segment >= (start + srv_n_read_io_threads)
+ && segment < (start + srv_n_read_io_threads
+ + srv_n_write_io_threads)) {
+ pfs_register_thread(io_write_thread_key);
+
+ } else {
+ pfs_register_thread(io_handler_thread_key);
+ }
+
+ while (srv_shutdown_state != SRV_SHUTDOWN_EXIT_THREADS
+ || buf_page_cleaner_is_active
+ || !os_aio_all_slots_free()) {
fil_aio_wait(segment);
}
@@ -559,35 +337,14 @@ DECLARE_THREAD(io_handler_thread)(
The thread actually never comes here because it is exited in an
os_event_wait(). */
- os_thread_exit(NULL);
+ os_thread_exit();
OS_THREAD_DUMMY_RETURN;
}
-#endif /* !UNIV_HOTBACKUP */
/*********************************************************************//**
-Normalizes a directory path for Windows: converts slashes to backslashes. */
-UNIV_INTERN
-void
-srv_normalize_path_for_win(
-/*=======================*/
- char* str MY_ATTRIBUTE((unused))) /*!< in/out: null-terminated
- character string */
-{
-#ifdef __WIN__
- for (; *str; str++) {
-
- if (*str == '/') {
- *str = '\\';
- }
- }
-#endif
-}
-
-#ifndef UNIV_HOTBACKUP
-/*********************************************************************//**
Creates a log file.
-@return DB_SUCCESS or error code */
+@return DB_SUCCESS or error code */
static MY_ATTRIBUTE((nonnull, warn_unused_result))
dberr_t
create_log_file(
@@ -595,30 +352,25 @@ create_log_file(
pfs_os_file_t* file, /*!< out: file handle */
const char* name) /*!< in: log file name */
{
- ibool ret;
+ bool ret;
*file = os_file_create(
- innodb_file_log_key, name,
+ innodb_log_file_key, name,
OS_FILE_CREATE|OS_FILE_ON_ERROR_NO_EXIT, OS_FILE_NORMAL,
- OS_LOG_FILE, &ret, FALSE);
+ OS_LOG_FILE, srv_read_only_mode, &ret);
if (!ret) {
- ib_logf(IB_LOG_LEVEL_ERROR, "Cannot create %s", name);
+ ib::error() << "Cannot create " << name;
return(DB_ERROR);
}
- ib_logf(IB_LOG_LEVEL_INFO,
- "Setting log file %s size to %lu MB",
- name, (ulong) srv_log_file_size
- >> (20 - UNIV_PAGE_SIZE_SHIFT));
+ ib::info() << "Setting log file " << name << " size to "
+ << srv_log_file_size << " bytes";
- ret = os_file_set_size(name, *file,
- (os_offset_t) srv_log_file_size
- << UNIV_PAGE_SIZE_SHIFT);
+ ret = os_file_set_size(name, *file, srv_log_file_size);
if (!ret) {
- ib_logf(IB_LOG_LEVEL_ERROR, "Cannot set log file"
- " %s to size %lu MB", name, (ulong) srv_log_file_size
- >> (20 - UNIV_PAGE_SIZE_SHIFT));
+ ib::error() << "Cannot set log file " << name << " size to "
+ << srv_log_file_size << " bytes";
return(DB_ERROR);
}
@@ -631,60 +383,69 @@ create_log_file(
/** Initial number of the first redo log file */
#define INIT_LOG_FILE0 (SRV_N_LOG_FILES_MAX + 1)
+/** Delete all log files.
+@param[in,out] logfilename buffer for log file name
+@param[in] dirnamelen length of the directory path
+@param[in] n_files number of files to delete
+@param[in] i first file to delete */
+static
+void
+delete_log_files(char* logfilename, size_t dirnamelen, uint n_files, uint i=0)
+{
+ /* Remove any old log files. */
+ for (; i < n_files; i++) {
+ sprintf(logfilename + dirnamelen, "ib_logfile%u", i);
+
+ /* Ignore errors about non-existent files or files
+ that cannot be removed. The create_log_file() will
+ return an error when the file exists. */
+#ifdef _WIN32
+ DeleteFile((LPCTSTR) logfilename);
+#else
+ unlink(logfilename);
+#endif
+ }
+}
+
/*********************************************************************//**
Creates all log files.
-@return DB_SUCCESS or error code */
+@return DB_SUCCESS or error code */
static
dberr_t
create_log_files(
/*=============*/
- bool create_new_db, /*!< in: TRUE if new database is being
- created */
char* logfilename, /*!< in/out: buffer for log file name */
size_t dirnamelen, /*!< in: length of the directory path */
lsn_t lsn, /*!< in: FIL_PAGE_FILE_FLUSH_LSN value */
char*& logfile0) /*!< out: name of the first log file */
{
+ dberr_t err;
+
if (srv_read_only_mode) {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Cannot create log files in read-only mode");
+ ib::error() << "Cannot create log files in read-only mode";
return(DB_READ_ONLY);
}
- /* We prevent system tablespace creation with existing files in
- data directory. So we do not delete log files when creating new system
- tablespace */
- if (!create_new_db) {
- /* Remove any old log files. */
- for (unsigned i = 0; i <= INIT_LOG_FILE0; i++) {
- sprintf(logfilename + dirnamelen, "ib_logfile%u", i);
+ /* Crashing after deleting the first file should be
+ recoverable. The buffer pool was clean, and we can simply
+ create all log files from the scratch. */
+ DBUG_EXECUTE_IF("innodb_log_abort_6",
+ delete_log_files(logfilename, dirnamelen, 1);
+ return(DB_ERROR););
- /* Ignore errors about non-existent files or files
- that cannot be removed. The create_log_file() will
- return an error when the file exists. */
-#ifdef __WIN__
- DeleteFile((LPCTSTR) logfilename);
-#else
- unlink(logfilename);
-#endif
- /* Crashing after deleting the first
- file should be recoverable. The buffer
- pool was clean, and we can simply create
- all log files from the scratch. */
- DBUG_EXECUTE_IF("innodb_log_abort_6",
- return(DB_ERROR););
- }
- }
+ delete_log_files(logfilename, dirnamelen, INIT_LOG_FILE0 + 1);
+ DBUG_PRINT("ib_log", ("After innodb_log_abort_6"));
ut_ad(!buf_pool_check_no_pending_io());
DBUG_EXECUTE_IF("innodb_log_abort_7", return(DB_ERROR););
+ DBUG_PRINT("ib_log", ("After innodb_log_abort_7"));
for (unsigned i = 0; i < srv_n_log_files; i++) {
sprintf(logfilename + dirnamelen,
"ib_logfile%u", i ? i : INIT_LOG_FILE0);
- dberr_t err = create_log_file(&files[i], logfilename);
+ err = create_log_file(&files[i], logfilename);
if (err != DB_SUCCESS) {
return(err);
@@ -692,48 +453,70 @@ create_log_files(
}
DBUG_EXECUTE_IF("innodb_log_abort_8", return(DB_ERROR););
+ DBUG_PRINT("ib_log", ("After innodb_log_abort_8"));
/* We did not create the first log file initially as
ib_logfile0, so that crash recovery cannot find it until it
has been completed and renamed. */
sprintf(logfilename + dirnamelen, "ib_logfile%u", INIT_LOG_FILE0);
- fil_space_create(
- logfilename, SRV_LOG_SPACE_FIRST_ID, 0,
- FIL_LOG,
- NULL /* no encryption yet */,
- true /* this is create */);
+ fil_space_t* log_space = fil_space_create(
+ "innodb_redo_log", SRV_LOG_SPACE_FIRST_ID, 0, FIL_TYPE_LOG,
+ NULL/* innodb_encrypt_log works at a different level */);
ut_a(fil_validate());
+ ut_a(log_space != NULL);
- logfile0 = fil_node_create(
- logfilename, (ulint) srv_log_file_size,
- SRV_LOG_SPACE_FIRST_ID, FALSE);
+ const ulint size = ulint(srv_log_file_size >> srv_page_size_shift);
+
+ logfile0 = log_space->add(logfilename, OS_FILE_CLOSED, size,
+ false, false)->name;
ut_a(logfile0);
for (unsigned i = 1; i < srv_n_log_files; i++) {
+
sprintf(logfilename + dirnamelen, "ib_logfile%u", i);
- if (!fil_node_create(
- logfilename,
- (ulint) srv_log_file_size,
- SRV_LOG_SPACE_FIRST_ID, FALSE)) {
- ut_error;
- }
+ log_space->add(logfilename, OS_FILE_CLOSED, size,
+ false, false);
}
- log_group_init(0, srv_n_log_files,
- srv_log_file_size * UNIV_PAGE_SIZE,
- SRV_LOG_SPACE_FIRST_ID,
- SRV_LOG_SPACE_FIRST_ID + 1);
+ log_init(srv_n_log_files);
+ if (!log_set_capacity(srv_log_file_size_requested)) {
+ return(DB_ERROR);
+ }
fil_open_log_and_system_tablespace_files();
/* Create a log checkpoint. */
- mutex_enter(&log_sys->mutex);
- ut_d(recv_no_log_write = FALSE);
- recv_reset_logs(lsn);
- mutex_exit(&log_sys->mutex);
+ log_mutex_enter();
+ if (log_sys->is_encrypted() && !log_crypt_init()) {
+ return DB_ERROR;
+ }
+ ut_d(recv_no_log_write = false);
+ log_sys->lsn = ut_uint64_align_up(lsn, OS_FILE_LOG_BLOCK_SIZE);
+
+ log_sys->log.lsn = log_sys->lsn;
+ log_sys->log.lsn_offset = LOG_FILE_HDR_SIZE;
+
+ log_sys->buf_next_to_write = 0;
+ log_sys->write_lsn = log_sys->lsn;
+
+ log_sys->next_checkpoint_no = 0;
+ log_sys->last_checkpoint_lsn = 0;
+
+ memset(log_sys->buf, 0, log_sys->buf_size);
+ log_block_init(log_sys->buf, log_sys->lsn);
+ log_block_set_first_rec_group(log_sys->buf, LOG_BLOCK_HDR_SIZE);
+
+ log_sys->buf_free = LOG_BLOCK_HDR_SIZE;
+ log_sys->lsn += LOG_BLOCK_HDR_SIZE;
+
+ MONITOR_SET(MONITOR_LSN_CHECKPOINT_AGE,
+ (log_sys->lsn - log_sys->last_checkpoint_lsn));
+ log_mutex_exit();
+
+ log_make_checkpoint();
return(DB_SUCCESS);
}
@@ -759,7 +542,11 @@ create_log_files_rename(
we need to explicitly flush the log buffers. */
fil_flush(SRV_LOG_SPACE_FIRST_ID);
+ ut_ad(!srv_log_files_created);
+ ut_d(srv_log_files_created = true);
+
DBUG_EXECUTE_IF("innodb_log_abort_9", return(DB_ERROR););
+ DBUG_PRINT("ib_log", ("After innodb_log_abort_9"));
/* Close the log files, so that we can rename
the first one. */
@@ -769,446 +556,32 @@ create_log_files_rename(
checkpoint has been created. */
sprintf(logfilename + dirnamelen, "ib_logfile%u", 0);
- ib_logf(IB_LOG_LEVEL_INFO,
- "Renaming log file %s to %s", logfile0, logfilename);
+ ib::info() << "Renaming log file " << logfile0 << " to "
+ << logfilename;
- mutex_enter(&log_sys->mutex);
+ log_mutex_enter();
ut_ad(strlen(logfile0) == 2 + strlen(logfilename));
dberr_t err = os_file_rename(
- innodb_file_log_key, logfile0, logfilename)
+ innodb_log_file_key, logfile0, logfilename)
? DB_SUCCESS : DB_ERROR;
/* Replace the first file with ib_logfile0. */
strcpy(logfile0, logfilename);
- mutex_exit(&log_sys->mutex);
+ log_mutex_exit();
DBUG_EXECUTE_IF("innodb_log_abort_10", err = DB_ERROR;);
if (err == DB_SUCCESS) {
fil_open_log_and_system_tablespace_files();
- ib_logf(IB_LOG_LEVEL_WARN,
- "New log files created, LSN=" LSN_PF, lsn);
+ ib::info() << "New log files created, LSN=" << lsn;
}
return(err);
}
/*********************************************************************//**
-Opens a log file.
-@return DB_SUCCESS or error code */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
-dberr_t
-open_log_file(
-/*==========*/
- pfs_os_file_t* file, /*!< out: file handle */
- const char* name, /*!< in: log file name */
- os_offset_t* size) /*!< out: file size */
-{
- ibool ret;
-
- *file = os_file_create(innodb_file_log_key, name,
- OS_FILE_OPEN, OS_FILE_AIO,
- OS_LOG_FILE, &ret, FALSE);
- if (!ret) {
- ib_logf(IB_LOG_LEVEL_ERROR, "Unable to open '%s'", name);
- return(DB_ERROR);
- }
-
- *size = os_file_get_size(*file);
-
- ret = os_file_close(*file);
- ut_a(ret);
- return(DB_SUCCESS);
-}
-
-/** Creates or opens database data files and closes them.
-@param[out] create_new_db true = create new database
-@param[out] min_arch_log_no min of archived log numbers in
- data files
-@param[out] max_arch_log_no max of archived log numbers in
- data files
-@param[out] flushed_lsn flushed lsn in fist datafile
-@param[out] sum_of_new_sizes sum of sizes of the new files
- added
-@return DB_SUCCESS or error code */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
-dberr_t
-open_or_create_data_files(
- bool* create_new_db,
-#ifdef UNIV_LOG_ARCHIVE
- ulint* min_arch_log_no,
- ulint* max_arch_log_no,
-#endif /* UNIV_LOG_ARCHIVE */
- lsn_t* flushed_lsn,
- ulint* sum_of_new_sizes)
-{
- ibool ret;
- ulint i;
- ibool one_opened = FALSE;
- ibool one_created = FALSE;
- os_offset_t size;
- ulint flags;
- ulint space=0;
- ulint rounded_size_pages;
- char name[10000];
- fil_space_crypt_t* crypt_data=NULL;
-
- if (srv_n_data_files >= 1000) {
-
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Can only have < 1000 data files, you have "
- "defined %lu", (ulong) srv_n_data_files);
-
- return(DB_ERROR);
- }
-
- *sum_of_new_sizes = 0;
-
- *create_new_db = false;
-
- srv_normalize_path_for_win(srv_data_home);
-
- for (i = 0; i < srv_n_data_files; i++) {
- ulint dirnamelen;
-
- srv_normalize_path_for_win(srv_data_file_names[i]);
- dirnamelen = strlen(srv_data_home);
-
- ut_a(dirnamelen + strlen(srv_data_file_names[i])
- < (sizeof name) - 1);
-
- memcpy(name, srv_data_home, dirnamelen);
-
- /* Add a path separator if needed. */
- if (dirnamelen && name[dirnamelen - 1] != SRV_PATH_SEPARATOR) {
- name[dirnamelen++] = SRV_PATH_SEPARATOR;
- }
-
- strcpy(name + dirnamelen, srv_data_file_names[i]);
-
- /* Note: It will return true if the file doesn' exist. */
-
- if (!srv_file_check_mode(name)) {
-
- return(DB_FAIL);
-
- } else if (srv_data_file_is_raw_partition[i] == 0) {
-
- /* First we try to create the file: if it already
- exists, ret will get value FALSE */
-
- files[i] = os_file_create(
- innodb_file_data_key, name, OS_FILE_CREATE,
- OS_FILE_NORMAL, OS_DATA_FILE, &ret, FALSE);
-
- if (srv_read_only_mode) {
-
- if (ret) {
- goto size_check;
- }
-
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Opening %s failed!", name);
-
- return(DB_ERROR);
-
- } else if (!ret
- && os_file_get_last_error(false)
- != OS_FILE_ALREADY_EXISTS
-#ifdef UNIV_AIX
- /* AIX 5.1 after security patch ML7 may have
- errno set to 0 here, which causes our
- function to return 100; work around that
- AIX problem */
- && os_file_get_last_error(false) != 100
-#endif /* UNIV_AIX */
- ) {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Creating or opening %s failed!",
- name);
-
- return(DB_ERROR);
- }
-
- } else if (srv_data_file_is_raw_partition[i] == SRV_NEW_RAW) {
-
- ut_a(!srv_read_only_mode);
-
- /* The partition is opened, not created; then it is
- written over */
-
- srv_start_raw_disk_in_use = TRUE;
- srv_created_new_raw = TRUE;
-
- files[i] = os_file_create(
- innodb_file_data_key, name, OS_FILE_OPEN_RAW,
- OS_FILE_NORMAL, OS_DATA_FILE, &ret, FALSE);
-
- if (!ret) {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Error in opening %s", name);
-
- return(DB_ERROR);
- }
-
- const char* check_msg;
-
- check_msg = fil_read_first_page(
- files[i], FALSE, &flags, &space,
-#ifdef UNIV_LOG_ARCHIVE
- min_arch_log_no, max_arch_log_no,
-#endif /* UNIV_LOG_ARCHIVE */
- flushed_lsn, NULL);
-
- /* If first page is valid, don't overwrite DB.
- It prevents overwriting DB when mysql_install_db
- starts mysqld multiple times during bootstrap. */
- if (check_msg == NULL) {
-
- srv_created_new_raw = FALSE;
- ret = FALSE;
- }
-
- } else if (srv_data_file_is_raw_partition[i] == SRV_OLD_RAW) {
- srv_start_raw_disk_in_use = TRUE;
-
- ret = FALSE;
- } else {
- ut_a(0);
- }
-
- if (ret == FALSE) {
- const char* check_msg;
- /* We open the data file */
-
- if (one_created) {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Data files can only be added at "
- "the end of a tablespace, but "
- "data file %s existed beforehand.",
- name);
- return(DB_ERROR);
- }
-
- if (srv_data_file_is_raw_partition[i] == SRV_OLD_RAW) {
- ut_a(!srv_read_only_mode);
- files[i] = os_file_create(
- innodb_file_data_key,
- name, OS_FILE_OPEN_RAW,
- OS_FILE_NORMAL, OS_DATA_FILE, &ret, FALSE);
- } else if (i == 0) {
- files[i] = os_file_create(
- innodb_file_data_key,
- name, OS_FILE_OPEN_RETRY,
- OS_FILE_NORMAL, OS_DATA_FILE, &ret, FALSE);
- } else {
- files[i] = os_file_create(
- innodb_file_data_key,
- name, OS_FILE_OPEN, OS_FILE_NORMAL,
- OS_DATA_FILE, &ret, FALSE);
- }
-
- if (!ret) {
- os_file_get_last_error(true);
-
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Can't open '%s'", name);
-
- return(DB_ERROR);
- }
-
- if (srv_data_file_is_raw_partition[i] == SRV_OLD_RAW) {
- goto skip_size_check;
- }
-
-size_check:
- size = os_file_get_size(files[i]);
- ut_a(size != (os_offset_t) -1);
-
- /* If InnoDB encountered an error or was killed
- while extending the data file, the last page
- could be incomplete. */
-
- rounded_size_pages = static_cast<ulint>(
- size >> UNIV_PAGE_SIZE_SHIFT);
-
- if (i == srv_n_data_files - 1
- && srv_auto_extend_last_data_file) {
-
- if (srv_data_file_sizes[i] > rounded_size_pages
- || (srv_last_file_size_max > 0
- && srv_last_file_size_max
- < rounded_size_pages)) {
-
- ib_logf(IB_LOG_LEVEL_ERROR,
- "auto-extending "
- "data file %s is "
- "of a different size "
- ULINTPF " pages (rounded "
- "down to MB) than specified "
- "in the .cnf file: "
- "initial " ULINTPF " pages, "
- "max " ULINTPF " (relevant if "
- "non-zero) pages!",
- name,
- rounded_size_pages,
- srv_data_file_sizes[i],
- srv_last_file_size_max);
-
- return(DB_ERROR);
- }
-
- srv_data_file_sizes[i] = rounded_size_pages;
- }
-
- if (rounded_size_pages != srv_data_file_sizes[i]) {
-
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Data file %s is of a different "
- "size " ULINTPF " pages (rounded down to MB) "
- "than specified in the .cnf file "
- ULINTPF " pages!",
- name,
- rounded_size_pages,
- srv_data_file_sizes[i]);
-
- return(DB_ERROR);
- }
-skip_size_check:
-
- /* This is the earliest location where we can load
- the double write buffer. */
- if (i == 0) {
- buf_dblwr_init_or_load_pages(
- files[i], srv_data_file_names[i], true);
- }
-
- bool retry = true;
-check_first_page:
- check_msg = fil_read_first_page(
- files[i], one_opened, &flags, &space,
-#ifdef UNIV_LOG_ARCHIVE
- min_arch_log_no, max_arch_log_no,
-#endif /* UNIV_LOG_ARCHIVE */
- flushed_lsn, &crypt_data);
-
- if (check_msg) {
-
- if (retry) {
- fsp_open_info fsp;
- const ulint page_no = 0;
-
- retry = false;
- fsp.id = 0;
- fsp.filepath = srv_data_file_names[i];
- fsp.file = files[i];
-
- if (fil_user_tablespace_restore_page(
- &fsp, page_no)) {
- goto check_first_page;
- }
- }
-
- ib_logf(IB_LOG_LEVEL_ERROR,
- "%s in data file %s",
- check_msg, name);
- return(DB_ERROR);
- }
-
- /* The first file of the system tablespace must
- have space ID = TRX_SYS_SPACE. The FSP_SPACE_ID
- field in files greater than ibdata1 are unreliable. */
- ut_a(one_opened || space == TRX_SYS_SPACE);
-
- /* Check the flags for the first system tablespace
- file only. */
- if (!one_opened
- && UNIV_PAGE_SIZE
- != fsp_flags_get_page_size(flags)) {
-
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Data file \"%s\" uses page size " ULINTPF " ,"
- "but the start-up parameter "
- "is --innodb-page-size=" ULINTPF " .",
- name,
- fsp_flags_get_page_size(flags),
- UNIV_PAGE_SIZE);
-
- return(DB_ERROR);
- }
-
- one_opened = TRUE;
- } else if (!srv_read_only_mode) {
- /* We created the data file and now write it full of
- zeros */
-
- one_created = TRUE;
-
- if (i > 0) {
- ib_logf(IB_LOG_LEVEL_INFO,
- "Data file %s did not"
- " exist: new to be created",
- name);
- } else {
- ib_logf(IB_LOG_LEVEL_INFO,
- "The first specified "
- "data file %s did not exist: "
- "a new database to be created!",
- name);
-
- *create_new_db = TRUE;
- }
-
- ib_logf(IB_LOG_LEVEL_INFO,
- "Setting file %s size to " ULINTPF " MB",
- name,
- (srv_data_file_sizes[i]
- >> (20 - UNIV_PAGE_SIZE_SHIFT)));
-
- ret = os_file_set_size(
- name, files[i],
- (os_offset_t) srv_data_file_sizes[i]
- << UNIV_PAGE_SIZE_SHIFT
- /* TODO: enable page_compression on the
- system tablespace and add
- , FSP_FLAGS_HAS_PAGE_COMPRESSION(flags)*/);
-
- if (!ret) {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Error in creating %s: "
- "probably out of disk space",
- name);
-
- return(DB_ERROR);
- }
-
- *sum_of_new_sizes += srv_data_file_sizes[i];
- }
-
- ret = os_file_close(files[i]);
- ut_a(ret);
-
- if (i == 0) {
- flags = FSP_FLAGS_PAGE_SSIZE();
-
- fil_space_create(name, 0, flags, FIL_TABLESPACE,
- crypt_data, (*create_new_db) == true);
- }
-
- ut_a(fil_validate());
-
- if (!fil_node_create(name, srv_data_file_sizes[i], 0,
- srv_data_file_is_raw_partition[i] != 0)) {
- return(DB_ERROR);
- }
- }
-
- return(DB_SUCCESS);
-}
-
-/*********************************************************************//**
Create undo tablespace.
-@return DB_SUCCESS or error code */
+@return DB_SUCCESS or error code */
static
dberr_t
srv_undo_tablespace_create(
@@ -1217,20 +590,21 @@ srv_undo_tablespace_create(
ulint size) /*!< in: tablespace size in pages */
{
pfs_os_file_t fh;
- ibool ret;
+ bool ret;
dberr_t err = DB_SUCCESS;
os_file_create_subdirs_if_needed(name);
fh = os_file_create(
- innodb_file_data_key,
+ innodb_data_file_key,
name,
srv_read_only_mode ? OS_FILE_OPEN : OS_FILE_CREATE,
- OS_FILE_NORMAL, OS_DATA_FILE, &ret, FALSE);
+ OS_FILE_NORMAL, OS_DATA_FILE, srv_read_only_mode, &ret);
if (srv_read_only_mode && ret) {
- ib_logf(IB_LOG_LEVEL_INFO,
- "%s opened in read-only mode", name);
+
+ ib::info() << name << " opened in read-only mode";
+
} else if (ret == FALSE) {
if (os_file_get_last_error(false) != OS_FILE_ALREADY_EXISTS
#ifdef UNIV_AIX
@@ -1240,15 +614,8 @@ srv_undo_tablespace_create(
&& os_file_get_last_error(false) != 100
#endif /* UNIV_AIX */
) {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Can't create UNDO tablespace %s", name);
- } else {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Creating system tablespace with"
- " existing undo tablespaces is not"
- " supported. Please delete all undo"
- " tablespaces before creating new"
- " system tablespace.");
+ ib::error() << "Can't create UNDO tablespace "
+ << name;
}
err = DB_ERROR;
} else {
@@ -1256,24 +623,21 @@ srv_undo_tablespace_create(
/* We created the data file and now write it full of zeros */
- ib_logf(IB_LOG_LEVEL_INFO,
- "Data file %s did not exist: new to be created",
- name);
+ ib::info() << "Data file " << name << " did not exist: new to"
+ " be created";
- ib_logf(IB_LOG_LEVEL_INFO,
- "Setting file %s size to %lu MB",
- name, size >> (20 - UNIV_PAGE_SIZE_SHIFT));
+ ib::info() << "Setting file " << name << " size to "
+ << (size >> (20 - UNIV_PAGE_SIZE_SHIFT)) << " MB";
- ret = os_file_set_size(name, fh, size << UNIV_PAGE_SIZE_SHIFT
- /* TODO: enable page_compression on the
- system tablespace and add
- FSP_FLAGS_HAS_PAGE_COMPRESSION(flags)
- */);
+ ib::info() << "Database physically writes the file full: "
+ << "wait...";
+
+ ret = os_file_set_size(
+ name, fh, os_offset_t(size) << UNIV_PAGE_SIZE_SHIFT);
if (!ret) {
- ib_logf(IB_LOG_LEVEL_INFO,
- "Error in creating %s: probably out of "
- "disk space", name);
+ ib::info() << "Error in creating " << name
+ << ": probably out of disk space";
err = DB_ERROR;
}
@@ -1284,122 +648,167 @@ srv_undo_tablespace_create(
return(err);
}
-/*********************************************************************//**
-Open an undo tablespace.
-@return DB_SUCCESS or error code */
-static
-dberr_t
-srv_undo_tablespace_open(
-/*=====================*/
- const char* name, /*!< in: tablespace name */
- ulint space) /*!< in: tablespace id */
+/** Open an undo tablespace.
+@param[in] name tablespace file name
+@param[in] space_id tablespace ID
+@param[in] create_new_db whether undo tablespaces are being created
+@return whether the tablespace was opened */
+static bool srv_undo_tablespace_open(const char* name, ulint space_id,
+ bool create_new_db)
{
pfs_os_file_t fh;
- dberr_t err = DB_ERROR;
- ibool ret;
- ulint flags;
+ bool success;
+ char undo_name[sizeof "innodb_undo000"];
- if (!srv_file_check_mode(name)) {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "UNDO tablespaces must be %s!",
- srv_read_only_mode ? "writable" : "readable");
-
- return(DB_ERROR);
- }
+ snprintf(undo_name, sizeof(undo_name),
+ "innodb_undo%03u", static_cast<unsigned>(space_id));
fh = os_file_create(
- innodb_file_data_key, name,
- OS_FILE_OPEN_RETRY
- | OS_FILE_ON_ERROR_NO_EXIT
- | OS_FILE_ON_ERROR_SILENT,
- OS_FILE_NORMAL,
- OS_DATA_FILE,
- &ret,
- FALSE);
+ innodb_data_file_key, name, OS_FILE_OPEN
+ | OS_FILE_ON_ERROR_NO_EXIT | OS_FILE_ON_ERROR_SILENT,
+ OS_FILE_AIO, OS_DATA_FILE, srv_read_only_mode, &success);
+ if (!success) {
+ return false;
+ }
- /* If the file open was successful then load the tablespace. */
+ os_offset_t size = os_file_get_size(fh);
+ ut_a(size != os_offset_t(-1));
- if (ret) {
- os_offset_t size;
+ /* Load the tablespace into InnoDB's internal data structures. */
- size = os_file_get_size(fh);
- ut_a(size != (os_offset_t) -1);
+ /* We set the biggest space id to the undo tablespace
+ because InnoDB hasn't opened any other tablespace apart
+ from the system tablespace. */
- /* Load the tablespace into InnoDB's internal
- data structures. */
+ fil_set_max_space_id_if_bigger(space_id);
- const char* check_msg;
- fil_space_crypt_t* crypt_data = NULL;
+ fil_space_t* space = fil_space_create(
+ undo_name, space_id, FSP_FLAGS_PAGE_SSIZE(),
+ FIL_TYPE_TABLESPACE, NULL);
- /* Set the compressed page size to 0 (non-compressed) */
- flags = FSP_FLAGS_PAGE_SSIZE();
+ ut_a(fil_validate());
+ ut_a(space);
- /* Read first page to find out does the crypt_info
- exists on undo tablespace. */
- check_msg = fil_read_first_page(
- fh, FALSE, &flags, &space,
- NULL, &crypt_data, false);
+ fil_node_t* file = space->add(name, fh, 0, false, true);
- ret = os_file_close(fh);
- ut_a(ret);
+ mutex_enter(&fil_system->mutex);
- if (check_msg) {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "%s in data file %s",
- check_msg, name);
- return (err);
+ if (create_new_db) {
+ space->size = file->size = ulint(size >> srv_page_size_shift);
+ space->size_in_header = SRV_UNDO_TABLESPACE_SIZE_IN_PAGES;
+ } else {
+ success = file->read_page0(true);
+ if (!success) {
+ os_file_close(file->handle);
+ file->handle = OS_FILE_CLOSED;
+ ut_a(fil_system->n_open > 0);
+ fil_system->n_open--;
}
+ }
- /* We set the biggest space id to the undo tablespace
- because InnoDB hasn't opened any other tablespace apart
- from the system tablespace. */
+ mutex_exit(&fil_system->mutex);
- fil_set_max_space_id_if_bigger(space);
+ return success;
+}
- fil_space_create(name, space, flags, FIL_TABLESPACE,
- crypt_data,
- true /* create */);
+/** Check if undo tablespaces and redo log files exist before creating a
+new system tablespace
+@retval DB_SUCCESS if all undo and redo logs are not found
+@retval DB_ERROR if any undo and redo logs are found */
+static
+dberr_t
+srv_check_undo_redo_logs_exists()
+{
+ bool ret;
+ os_file_t fh;
+ char name[OS_FILE_MAX_PATH];
- ut_a(fil_validate());
+ /* Check if any undo tablespaces exist */
+ for (ulint i = 1; i <= srv_undo_tablespaces; ++i) {
- os_offset_t n_pages = size / UNIV_PAGE_SIZE;
+ snprintf(
+ name, sizeof(name),
+ "%s%cundo%03zu",
+ srv_undo_dir, OS_PATH_SEPARATOR,
+ i);
+
+ fh = os_file_create(
+ innodb_data_file_key, name,
+ OS_FILE_OPEN_RETRY
+ | OS_FILE_ON_ERROR_NO_EXIT
+ | OS_FILE_ON_ERROR_SILENT,
+ OS_FILE_NORMAL,
+ OS_DATA_FILE,
+ srv_read_only_mode,
+ &ret);
+
+ if (ret) {
+ os_file_close(fh);
+ ib::error()
+ << "undo tablespace '" << name << "' exists."
+ " Creating system tablespace with existing undo"
+ " tablespaces is not supported. Please delete"
+ " all undo tablespaces before creating new"
+ " system tablespace.";
+ return(DB_ERROR);
+ }
+ }
+
+ /* Check if any redo log files exist */
+ char logfilename[OS_FILE_MAX_PATH];
+ size_t dirnamelen = strlen(srv_log_group_home_dir);
+ memcpy(logfilename, srv_log_group_home_dir, dirnamelen);
- /* On 64 bit Windows ulint can be 32 bit and os_offset_t
- is 64 bit. It is OK to cast the n_pages to ulint because
- the unit has been scaled to pages and they are always
- 32 bit. */
- if (fil_node_create(name, (ulint) n_pages, space, FALSE)) {
- err = DB_SUCCESS;
+ for (unsigned i = 0; i < srv_n_log_files; i++) {
+ sprintf(logfilename + dirnamelen,
+ "ib_logfile%u", i);
+
+ fh = os_file_create(
+ innodb_log_file_key, logfilename,
+ OS_FILE_OPEN_RETRY
+ | OS_FILE_ON_ERROR_NO_EXIT
+ | OS_FILE_ON_ERROR_SILENT,
+ OS_FILE_NORMAL,
+ OS_LOG_FILE,
+ srv_read_only_mode,
+ &ret);
+
+ if (ret) {
+ os_file_close(fh);
+ ib::error() << "redo log file '" << logfilename
+ << "' exists. Creating system tablespace with"
+ " existing redo log files is not recommended."
+ " Please delete all redo log files before"
+ " creating new system tablespace.";
+ return(DB_ERROR);
}
}
- return(err);
+ return(DB_SUCCESS);
}
-/********************************************************************
-Opens the configured number of undo tablespaces.
-@return DB_SUCCESS or error code */
-static
+undo::undo_spaces_t undo::Truncate::s_fix_up_spaces;
+
+/** Open the configured number of dedicated undo tablespaces.
+@param[in] create_new_db whether the database is being initialized
+@return DB_SUCCESS or error code */
dberr_t
-srv_undo_tablespaces_init(
-/*======================*/
- ibool create_new_db, /*!< in: TRUE if new db being
- created */
- const ulint n_conf_tablespaces, /*!< in: configured undo
- tablespaces */
- ulint* n_opened) /*!< out: number of UNDO
- tablespaces successfully
- discovered and opened */
+srv_undo_tablespaces_init(bool create_new_db)
{
- ulint i;
- dberr_t err = DB_SUCCESS;
- ulint prev_space_id = 0;
- ulint n_undo_tablespaces;
- ulint undo_tablespace_ids[TRX_SYS_N_RSEGS + 1];
+ ulint i;
+ dberr_t err = DB_SUCCESS;
+ ulint prev_space_id = 0;
+ ulint n_undo_tablespaces;
+ ulint undo_tablespace_ids[TRX_SYS_N_RSEGS + 1];
+
+ srv_undo_tablespaces_open = 0;
- *n_opened = 0;
+ ut_a(srv_undo_tablespaces <= TRX_SYS_N_RSEGS);
+ ut_a(!create_new_db || srv_operation == SRV_OPERATION_NORMAL);
- ut_a(n_conf_tablespaces <= TRX_SYS_N_RSEGS);
+ if (srv_undo_tablespaces == 1) { /* 1 is not allowed, make it 0 */
+ srv_undo_tablespaces = 0;
+ }
memset(undo_tablespace_ids, 0x0, sizeof(undo_tablespace_ids));
@@ -1411,17 +820,17 @@ srv_undo_tablespaces_init(
the location of the undo tablespaces and their space ids this
restriction will/should be lifted. */
- for (i = 0; create_new_db && i < n_conf_tablespaces; ++i) {
+ for (i = 0; create_new_db && i < srv_undo_tablespaces; ++i) {
char name[OS_FILE_MAX_PATH];
ulint space_id = i + 1;
DBUG_EXECUTE_IF("innodb_undo_upgrade",
space_id = i + 3;);
- ut_snprintf(
+ snprintf(
name, sizeof(name),
- "%s%cundo%03lu",
- srv_undo_dir, SRV_PATH_SEPARATOR, space_id);
+ "%s%cundo%03zu",
+ srv_undo_dir, OS_PATH_SEPARATOR, space_id);
if (i == 0) {
srv_undo_space_id_start = space_id;
@@ -1434,11 +843,8 @@ srv_undo_tablespaces_init(
name, SRV_UNDO_TABLESPACE_SIZE_IN_PAGES);
if (err != DB_SUCCESS) {
-
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Could not create undo tablespace '%s'.",
- name);
-
+ ib::error() << "Could not create undo tablespace '"
+ << name << "'.";
return(err);
}
}
@@ -1447,20 +853,65 @@ srv_undo_tablespaces_init(
the system tablespace (0). If we are creating a new instance then
we build the undo_tablespace_ids ourselves since they don't
already exist. */
+ n_undo_tablespaces = create_new_db
+ || srv_operation == SRV_OPERATION_BACKUP
+ || srv_operation == SRV_OPERATION_RESTORE_DELTA
+ ? srv_undo_tablespaces
+ : trx_rseg_get_n_undo_tablespaces(undo_tablespace_ids);
+ srv_undo_tablespaces_active = srv_undo_tablespaces;
- if (!create_new_db) {
- n_undo_tablespaces = trx_rseg_get_n_undo_tablespaces(
- undo_tablespace_ids);
+ switch (srv_operation) {
+ case SRV_OPERATION_RESTORE_DELTA:
+ case SRV_OPERATION_BACKUP:
+ for (i = 0; i < n_undo_tablespaces; i++) {
+ undo_tablespace_ids[i] = i + srv_undo_space_id_start;
+ }
- if (n_undo_tablespaces != 0) {
- srv_undo_space_id_start = undo_tablespace_ids[0];
- prev_space_id = srv_undo_space_id_start - 1;
+ prev_space_id = srv_undo_space_id_start - 1;
+ break;
+ case SRV_OPERATION_NORMAL:
+ if (create_new_db) {
+ break;
}
+ /* fall through */
+ case SRV_OPERATION_RESTORE_ROLLBACK_XA:
+ case SRV_OPERATION_RESTORE:
+ case SRV_OPERATION_RESTORE_EXPORT:
+ ut_ad(!create_new_db);
- } else {
- n_undo_tablespaces = n_conf_tablespaces;
+ /* Check if any of the UNDO tablespace needs fix-up because
+ server crashed while truncate was active on UNDO tablespace.*/
+ for (i = 0; i < n_undo_tablespaces; ++i) {
+
+ undo::Truncate undo_trunc;
+
+ if (undo_trunc.needs_fix_up(undo_tablespace_ids[i])) {
+
+ char name[OS_FILE_MAX_PATH];
+
+ snprintf(name, sizeof(name),
+ "%s%cundo%03zu",
+ srv_undo_dir, OS_PATH_SEPARATOR,
+ undo_tablespace_ids[i]);
+
+ os_file_delete(innodb_data_file_key, name);
+
+ err = srv_undo_tablespace_create(
+ name,
+ SRV_UNDO_TABLESPACE_SIZE_IN_PAGES);
+
+ if (err != DB_SUCCESS) {
+ ib::error() << "Could not fix-up undo "
+ " tablespace truncate '"
+ << name << "'.";
+ return(err);
+ }
- undo_tablespace_ids[n_conf_tablespaces] = ULINT_UNDEFINED;
+ undo::Truncate::s_fix_up_spaces.push_back(
+ undo_tablespace_ids[i]);
+ }
+ }
+ break;
}
/* Open all the undo tablespaces that are currently in use. If we
@@ -1471,32 +922,33 @@ srv_undo_tablespaces_init(
for (i = 0; i < n_undo_tablespaces; ++i) {
char name[OS_FILE_MAX_PATH];
- ut_snprintf(
+ snprintf(
name, sizeof(name),
- "%s%cundo%03lu",
- srv_undo_dir, SRV_PATH_SEPARATOR,
+ "%s%cundo%03zu",
+ srv_undo_dir, OS_PATH_SEPARATOR,
undo_tablespace_ids[i]);
/* Should be no gaps in undo tablespace ids. */
- ut_a(prev_space_id + 1 == undo_tablespace_ids[i]);
+ ut_a(!i || prev_space_id + 1 == undo_tablespace_ids[i]);
/* The system space id should not be in this array. */
ut_a(undo_tablespace_ids[i] != 0);
ut_a(undo_tablespace_ids[i] != ULINT_UNDEFINED);
- err = srv_undo_tablespace_open(name, undo_tablespace_ids[i]);
-
- if (err != DB_SUCCESS) {
-
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Unable to open undo tablespace '%s'.", name);
-
- return(err);
+ if (!srv_undo_tablespace_open(name, undo_tablespace_ids[i],
+ create_new_db)) {
+ ib::error() << "Unable to open undo tablespace '"
+ << name << "'.";
+ return DB_ERROR;
}
prev_space_id = undo_tablespace_ids[i];
- ++*n_opened;
+ /* Note the first undo tablespace id in case of
+ no active undo tablespace. */
+ if (0 == srv_undo_tablespaces_open++) {
+ srv_undo_space_id_start = undo_tablespace_ids[i];
+ }
}
/* Open any extra unused undo tablespaces. These must be contiguous.
@@ -1504,33 +956,26 @@ srv_undo_tablespaces_init(
not in use and therefore not required by recovery. We only check
that there are no gaps. */
- for (i = prev_space_id + 1; i < TRX_SYS_N_RSEGS; ++i) {
+ for (i = prev_space_id + 1;
+ i < srv_undo_space_id_start + TRX_SYS_N_RSEGS; ++i) {
char name[OS_FILE_MAX_PATH];
- ut_snprintf(
+ snprintf(
name, sizeof(name),
- "%s%cundo%03lu", srv_undo_dir, SRV_PATH_SEPARATOR, i);
+ "%s%cundo%03zu", srv_undo_dir, OS_PATH_SEPARATOR, i);
- /* Undo space ids start from 1. */
- err = srv_undo_tablespace_open(name, i);
-
- if (err != DB_SUCCESS) {
+ if (!srv_undo_tablespace_open(name, i, create_new_db)) {
+ err = DB_ERROR;
break;
}
- /** Note the first undo tablespace id in case of
- no active undo tablespace. */
- if (n_undo_tablespaces == 0) {
- srv_undo_space_id_start = i;
- }
-
++n_undo_tablespaces;
- ++*n_opened;
+ ++srv_undo_tablespaces_open;
}
- /** Explictly specify the srv_undo_space_id_start
- as zero when there are no undo tablespaces. */
+ /* Initialize srv_undo_space_id_start=0 when there are no
+ dedicated undo tablespaces. */
if (n_undo_tablespaces == 0) {
srv_undo_space_id_start = 0;
}
@@ -1539,37 +984,22 @@ srv_undo_tablespaces_init(
tolerate that discrepancy but not the inverse. Because there could
be unused undo tablespaces for future use. */
- if (n_conf_tablespaces > n_undo_tablespaces) {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Expected to open %lu undo "
- "tablespaces but was able\n",
- n_conf_tablespaces);
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: to find only %lu undo "
- "tablespaces.\n", n_undo_tablespaces);
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Set the "
- "innodb_undo_tablespaces parameter to "
- "the\n");
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: correct value and retry. Suggested "
- "value is %lu\n", n_undo_tablespaces);
+ if (srv_undo_tablespaces > n_undo_tablespaces) {
+ ib::error() << "Expected to open innodb_undo_tablespaces="
+ << srv_undo_tablespaces
+ << " but was able to find only "
+ << n_undo_tablespaces;
return(err != DB_SUCCESS ? err : DB_ERROR);
- } else if (n_undo_tablespaces > 0) {
+ } else if (n_undo_tablespaces > 0) {
- ib_logf(IB_LOG_LEVEL_INFO, "Opened %lu undo tablespaces",
- n_undo_tablespaces);
+ ib::info() << "Opened " << n_undo_tablespaces
+ << " undo tablespaces";
- if (n_conf_tablespaces == 0) {
- ib_logf(IB_LOG_LEVEL_WARN,
- "Using the system tablespace for all UNDO "
- "logging because innodb_undo_tablespaces=0");
+ if (srv_undo_tablespaces == 0) {
+ ib::warn() << "innodb_undo_tablespaces=0 disables"
+ " dedicated undo log tablespaces";
}
}
@@ -1589,6 +1019,63 @@ srv_undo_tablespaces_init(
mtr_commit(&mtr);
}
+ if (!undo::Truncate::s_fix_up_spaces.empty()) {
+
+ /* Step-1: Initialize the tablespace header and rsegs header. */
+ mtr_t mtr;
+ trx_sysf_t* sys_header;
+
+ mtr_start(&mtr);
+ /* Turn off REDO logging. We are in server start mode and fixing
+ UNDO tablespace even before REDO log is read. Let's say we
+ do REDO logging here then this REDO log record will be applied
+ as part of the current recovery process. We surely don't need
+ that as this is fix-up action parallel to REDO logging. */
+ mtr_set_log_mode(&mtr, MTR_LOG_NO_REDO);
+ sys_header = trx_sysf_get(&mtr);
+
+ for (undo::undo_spaces_t::const_iterator it
+ = undo::Truncate::s_fix_up_spaces.begin();
+ it != undo::Truncate::s_fix_up_spaces.end();
+ ++it) {
+
+ undo::Truncate::add_space_to_trunc_list(*it);
+
+ fsp_header_init(
+ *it, SRV_UNDO_TABLESPACE_SIZE_IN_PAGES, &mtr);
+
+ mtr_x_lock(fil_space_get_latch(*it, NULL), &mtr);
+
+ for (ulint i = 0; i < TRX_SYS_N_RSEGS; i++) {
+
+ ulint space_id = trx_sysf_rseg_get_space(
+ sys_header, i, &mtr);
+
+ if (space_id == *it) {
+ trx_rseg_header_create(
+ *it, ULINT_MAX, i, &mtr);
+ }
+ }
+
+ undo::Truncate::clear_trunc_list();
+ }
+ mtr_commit(&mtr);
+
+ /* Step-2: Flush the dirty pages from the buffer pool. */
+ for (undo::undo_spaces_t::const_iterator it
+ = undo::Truncate::s_fix_up_spaces.begin();
+ it != undo::Truncate::s_fix_up_spaces.end();
+ ++it) {
+ FlushObserver dummy(TRX_SYS_SPACE, NULL, NULL);
+ buf_LRU_flush_or_remove_pages(TRX_SYS_SPACE, &dummy);
+ FlushObserver dummy2(*it, NULL, NULL);
+ buf_LRU_flush_or_remove_pages(*it, &dummy2);
+
+ /* Remove the truncate redo log file. */
+ undo::done(*it);
+ }
+ }
+
return(DB_SUCCESS);
}
@@ -1615,8 +1102,7 @@ srv_start_wait_for_purge_to_start()
break;
case PURGE_STATE_INIT:
- ib_logf(IB_LOG_LEVEL_INFO,
- "Waiting for purge to start");
+ ib::info() << "Waiting for purge to start";
os_thread_sleep(50000);
break;
@@ -1628,282 +1114,489 @@ srv_start_wait_for_purge_to_start()
}
}
+/** Create the temporary file tablespace.
+@param[in] create_new_db whether we are creating a new database
+@return DB_SUCCESS or error code. */
+static
+dberr_t
+srv_open_tmp_tablespace(bool create_new_db)
+{
+ ulint sum_of_new_sizes;
+
+ /* Will try to remove if there is existing file left-over by last
+ unclean shutdown */
+ srv_tmp_space.set_sanity_check_status(true);
+ srv_tmp_space.delete_files();
+ srv_tmp_space.set_ignore_read_only(true);
+
+ ib::info() << "Creating shared tablespace for temporary tables";
+
+ bool create_new_temp_space;
+
+ srv_tmp_space.set_space_id(SRV_TMP_SPACE_ID);
+
+ dberr_t err = srv_tmp_space.check_file_spec(
+ &create_new_temp_space, 12 * 1024 * 1024);
+
+ if (err == DB_FAIL) {
+
+ ib::error() << "The " << srv_tmp_space.name()
+ << " data file must be writable!";
+
+ err = DB_ERROR;
+
+ } else if (err != DB_SUCCESS) {
+ ib::error() << "Could not create the shared "
+ << srv_tmp_space.name() << ".";
+
+ } else if ((err = srv_tmp_space.open_or_create(
+ true, create_new_db, &sum_of_new_sizes, NULL))
+ != DB_SUCCESS) {
+
+ ib::error() << "Unable to create the shared "
+ << srv_tmp_space.name();
+
+ } else {
+
+ mtr_t mtr;
+ ulint size = srv_tmp_space.get_sum_of_sizes();
+
+ /* Open this shared temp tablespace in the fil_system so that
+ it stays open until shutdown. */
+ if (fil_space_open(srv_tmp_space.name())) {
+
+ /* Initialize the header page */
+ mtr_start(&mtr);
+ mtr_set_log_mode(&mtr, MTR_LOG_NO_REDO);
+
+ fsp_header_init(SRV_TMP_SPACE_ID, size, &mtr);
+
+ mtr_commit(&mtr);
+ } else {
+ /* This file was just opened in the code above! */
+ ib::error() << "The " << srv_tmp_space.name()
+ << " data file cannot be re-opened"
+ " after check_file_spec() succeeded!";
+
+ err = DB_ERROR;
+ }
+ }
+
+ return(err);
+}
+
+/****************************************************************//**
+Set state to indicate start of particular group of threads in InnoDB. */
+UNIV_INLINE
+void
+srv_start_state_set(
+/*================*/
+ srv_start_state_t state) /*!< in: indicate current state of
+ thread startup */
+{
+ srv_start_state |= state;
+}
+
+/****************************************************************//**
+Check if following group of threads is started.
+@return true if started */
+UNIV_INLINE
+bool
+srv_start_state_is_set(
+/*===================*/
+ srv_start_state_t state) /*!< in: state to check for */
+{
+ return(srv_start_state & state);
+}
+
+/**
+Shutdown all background threads created by InnoDB. */
+static
+void
+srv_shutdown_all_bg_threads()
+{
+ ut_ad(!srv_undo_sources);
+ srv_shutdown_state = SRV_SHUTDOWN_EXIT_THREADS;
+
+ /* All threads end up waiting for certain events. Put those events
+ to the signaled state. Then the threads will exit themselves after
+ os_event_wait(). */
+ for (uint i = 0; i < 1000; ++i) {
+ /* NOTE: IF YOU CREATE THREADS IN INNODB, YOU MUST EXIT THEM
+ HERE OR EARLIER */
+
+ if (srv_start_state_is_set(SRV_START_STATE_LOCK_SYS)) {
+ /* a. Let the lock timeout thread exit */
+ os_event_set(lock_sys->timeout_event);
+ }
+
+ if (!srv_read_only_mode) {
+ /* b. srv error monitor thread exits automatically,
+ no need to do anything here */
+
+ if (srv_start_state_is_set(SRV_START_STATE_MASTER)) {
+ /* c. We wake the master thread so that
+ it exits */
+ srv_wake_master_thread();
+ }
+
+ if (srv_start_state_is_set(SRV_START_STATE_PURGE)) {
+ /* d. Wakeup purge threads. */
+ srv_purge_wakeup();
+ }
+
+ if (srv_n_fil_crypt_threads_started) {
+ os_event_set(fil_crypt_threads_event);
+ }
+
+ if (log_scrub_thread_active) {
+ os_event_set(log_scrub_event);
+ }
+ }
+
+ if (srv_start_state_is_set(SRV_START_STATE_IO)) {
+ ut_ad(!srv_read_only_mode);
+
+ /* e. Exit the i/o threads */
+ if (recv_sys->flush_start != NULL) {
+ os_event_set(recv_sys->flush_start);
+ }
+ if (recv_sys->flush_end != NULL) {
+ os_event_set(recv_sys->flush_end);
+ }
+
+ os_event_set(buf_flush_event);
+
+ if (srv_use_mtflush) {
+ buf_mtflu_io_thread_exit();
+ }
+ }
+
+ if (!os_thread_count) {
+ return;
+ }
+
+ switch (srv_operation) {
+ case SRV_OPERATION_BACKUP:
+ case SRV_OPERATION_RESTORE_DELTA:
+ break;
+ case SRV_OPERATION_NORMAL:
+ case SRV_OPERATION_RESTORE_ROLLBACK_XA:
+ case SRV_OPERATION_RESTORE:
+ case SRV_OPERATION_RESTORE_EXPORT:
+ if (!buf_page_cleaner_is_active
+ && os_aio_all_slots_free()) {
+ os_aio_wake_all_threads_at_shutdown();
+ }
+ }
+
+ os_thread_sleep(100000);
+ }
+
+ ib::warn() << os_thread_count << " threads created by InnoDB"
+ " had not exited at shutdown!";
+ ut_d(os_aio_print_pending_io(stderr));
+ ut_ad(0);
+}
+
+#ifdef UNIV_DEBUG
+# define srv_init_abort(_db_err) \
+ srv_init_abort_low(create_new_db, __FILE__, __LINE__, _db_err)
+#else
+# define srv_init_abort(_db_err) \
+ srv_init_abort_low(create_new_db, _db_err)
+#endif /* UNIV_DEBUG */
+
+/** Innobase start-up aborted. Perform cleanup actions.
+@param[in] create_new_db TRUE if new db is being created
+@param[in] file File name
+@param[in] line Line number
+@param[in] err Reason for aborting InnoDB startup
+@return DB_SUCCESS or error code. */
+MY_ATTRIBUTE((warn_unused_result, nonnull))
+static
+dberr_t
+srv_init_abort_low(
+ bool create_new_db,
+#ifdef UNIV_DEBUG
+ const char* file,
+ unsigned line,
+#endif /* UNIV_DEBUG */
+ dberr_t err)
+{
+ if (create_new_db) {
+ ib::error() << "Database creation was aborted"
+#ifdef UNIV_DEBUG
+ " at " << innobase_basename(file) << "[" << line << "]"
+#endif /* UNIV_DEBUG */
+ " with error " << ut_strerr(err) << ". You may need"
+ " to delete the ibdata1 file before trying to start"
+ " up again.";
+ } else {
+ ib::error() << "Plugin initialization aborted"
+#ifdef UNIV_DEBUG
+ " at " << innobase_basename(file) << "[" << line << "]"
+#endif /* UNIV_DEBUG */
+ " with error " << ut_strerr(err);
+ }
+
+ srv_shutdown_all_bg_threads();
+ return(err);
+}
+
+/** Prepare to delete the redo log files. Flush the dirty pages from all the
+buffer pools. Flush the redo log buffer to the redo log file.
+@param[in] n_files number of old redo log files
+@return lsn upto which data pages have been flushed. */
+static
+lsn_t
+srv_prepare_to_delete_redo_log_files(
+ ulint n_files)
+{
+ DBUG_ENTER("srv_prepare_to_delete_redo_log_files");
+
+ lsn_t flushed_lsn;
+ ulint pending_io = 0;
+ ulint count = 0;
+
+ if (srv_safe_truncate) {
+ if ((log_sys->log.format & ~LOG_HEADER_FORMAT_ENCRYPTED)
+ != LOG_HEADER_FORMAT_10_3
+ || log_sys->log.subformat != 1) {
+ srv_log_file_size = 0;
+ }
+ } else {
+ if ((log_sys->log.format & ~LOG_HEADER_FORMAT_ENCRYPTED)
+ != LOG_HEADER_FORMAT_10_2) {
+ srv_log_file_size = 0;
+ }
+ }
+
+ do {
+ /* Clean the buffer pool. */
+ buf_flush_sync_all_buf_pools();
+
+ DBUG_EXECUTE_IF("innodb_log_abort_1", DBUG_RETURN(0););
+ DBUG_PRINT("ib_log", ("After innodb_log_abort_1"));
+
+ log_mutex_enter();
+
+ fil_names_clear(log_sys->lsn, false);
+
+ flushed_lsn = log_sys->lsn;
+
+ {
+ ib::info info;
+ if (srv_log_file_size == 0) {
+ info << ((log_sys->log.format
+ & ~LOG_HEADER_FORMAT_ENCRYPTED)
+ < LOG_HEADER_FORMAT_10_3
+ ? "Upgrading redo log: "
+ : "Downgrading redo log: ");
+ } else if (n_files != srv_n_log_files
+ || srv_log_file_size
+ != srv_log_file_size_requested) {
+ if (srv_encrypt_log
+ == log_sys->is_encrypted()) {
+ info << (srv_encrypt_log
+ ? "Resizing encrypted"
+ : "Resizing");
+ } else if (srv_encrypt_log) {
+ info << "Encrypting and resizing";
+ } else {
+ info << "Removing encryption"
+ " and resizing";
+ }
+
+ info << " redo log from " << n_files
+ << "*" << srv_log_file_size << " to ";
+ } else if (srv_encrypt_log) {
+ info << "Encrypting redo log: ";
+ } else {
+ info << "Removing redo log encryption: ";
+ }
+
+ info << srv_n_log_files << "*"
+ << srv_log_file_size_requested
+ << " bytes; LSN=" << flushed_lsn;
+ }
+
+ srv_start_lsn = flushed_lsn;
+ /* Flush the old log files. */
+ log_mutex_exit();
+
+ log_write_up_to(flushed_lsn, true);
+
+ /* If innodb_flush_method=O_DSYNC,
+ we need to explicitly flush the log buffers. */
+ fil_flush(SRV_LOG_SPACE_FIRST_ID);
+
+ ut_ad(flushed_lsn == log_get_lsn());
+
+ /* Check if the buffer pools are clean. If not
+ retry till it is clean. */
+ pending_io = buf_pool_check_no_pending_io();
+
+ if (pending_io > 0) {
+ count++;
+ /* Print a message every 60 seconds if we
+ are waiting to clean the buffer pools */
+ if (srv_print_verbose_log && count > 600) {
+ ib::info() << "Waiting for "
+ << pending_io << " buffer "
+ << "page I/Os to complete";
+ count = 0;
+ }
+ }
+ os_thread_sleep(100000);
+
+ } while (buf_pool_check_no_pending_io());
+
+ DBUG_RETURN(flushed_lsn);
+}
+
/********************************************************************
Starts InnoDB and creates a new database if database files
are not found and the user wants.
-@return DB_SUCCESS or error code */
-UNIV_INTERN
+@return DB_SUCCESS or error code */
dberr_t
innobase_start_or_create_for_mysql()
{
- bool create_new_db;
+ bool create_new_db = false;
lsn_t flushed_lsn;
-#ifdef UNIV_LOG_ARCHIVE
- ulint min_arch_log_no;
- ulint max_arch_log_no;
-#endif /* UNIV_LOG_ARCHIVE */
- ulint sum_of_new_sizes;
- dberr_t err;
- unsigned i;
+ dberr_t err = DB_SUCCESS;
ulint srv_n_log_files_found = srv_n_log_files;
- ulint io_limit;
mtr_t mtr;
- ib_bh_t* ib_bh;
- ulint n_recovered_trx;
char logfilename[10000];
char* logfile0 = NULL;
size_t dirnamelen;
- bool sys_datafiles_created = false;
-
- /* Check that os_fast_mutexes work as expected */
- os_fast_mutex_init(PFS_NOT_INSTRUMENTED, &srv_os_test_mutex);
-
- ut_a(0 == os_fast_mutex_trylock(&srv_os_test_mutex));
-
- os_fast_mutex_unlock(&srv_os_test_mutex);
-
- os_fast_mutex_lock(&srv_os_test_mutex);
-
- os_fast_mutex_unlock(&srv_os_test_mutex);
+ unsigned i = 0;
- os_fast_mutex_free(&srv_os_test_mutex);
+ ut_ad(srv_operation == SRV_OPERATION_NORMAL
+ || is_mariabackup_restore_or_export());
if (srv_force_recovery == SRV_FORCE_NO_LOG_REDO) {
- srv_read_only_mode = 1;
+ srv_read_only_mode = true;
}
high_level_read_only = srv_read_only_mode
- || srv_force_recovery > SRV_FORCE_NO_TRX_UNDO;
+ || srv_force_recovery > SRV_FORCE_NO_TRX_UNDO
+ || srv_sys_space.created_new_raw();
+
+ /* Reset the start state. */
+ srv_start_state = SRV_START_STATE_NONE;
if (srv_read_only_mode) {
- ib_logf(IB_LOG_LEVEL_INFO, "Started in read only mode");
- }
-
-#ifdef HAVE_DARWIN_THREADS
-# ifdef F_FULLFSYNC
- /* This executable has been compiled on Mac OS X 10.3 or later.
- Assume that F_FULLFSYNC is available at run-time. */
- srv_have_fullfsync = TRUE;
-# else /* F_FULLFSYNC */
- /* This executable has been compiled on Mac OS X 10.2
- or earlier. Determine if the executable is running
- on Mac OS X 10.3 or later. */
- struct utsname utsname;
- if (uname(&utsname)) {
- ut_print_timestamp(stderr);
- fputs(" InnoDB: cannot determine Mac OS X version!\n", stderr);
- } else {
- srv_have_fullfsync = strcmp(utsname.release, "7.") >= 0;
- }
- if (!srv_have_fullfsync) {
- ut_print_timestamp(stderr);
- fputs(" InnoDB: On Mac OS X, fsync() may be "
- "broken on internal drives,\n", stderr);
- ut_print_timestamp(stderr);
- fputs(" InnoDB: making transactions unsafe!\n", stderr);
- }
-# endif /* F_FULLFSYNC */
-#endif /* HAVE_DARWIN_THREADS */
-
- ib_logf(IB_LOG_LEVEL_INFO,
- "Using %s to ref count buffer pool pages",
-#ifdef PAGE_ATOMIC_REF_COUNT
- "atomics"
-#else
- "mutexes"
-#endif /* PAGE_ATOMIC_REF_COUNT */
- );
+ ib::info() << "Started in read only mode";
+
+ /* There is no write to InnoDB tablespaces (not even
+ temporary ones, because also CREATE TEMPORARY TABLE is
+ refused in read-only mode). */
+ srv_use_doublewrite_buf = FALSE;
+ }
compile_time_assert(sizeof(ulint) == sizeof(void*));
#ifdef UNIV_DEBUG
- ib_logf(IB_LOG_LEVEL_INFO,
- " InnoDB: !!!!!!!! UNIV_DEBUG switched on !!!!!!!!!");
+ ib::info() << "!!!!!!!! UNIV_DEBUG switched on !!!!!!!!!";
#endif
#ifdef UNIV_IBUF_DEBUG
- ib_logf(IB_LOG_LEVEL_INFO,
- " InnoDB: !!!!!!!! UNIV_IBUF_DEBUG switched on !!!!!!!!!");
-# ifdef UNIV_IBUF_COUNT_DEBUG
- ib_logf(IB_LOG_LEVEL_INFO,
- " InnoDB: !!!!!!!! UNIV_IBUF_COUNT_DEBUG switched on "
- "!!!!!!!!!");
- ib_logf(IB_LOG_LEVEL_INFO,
- " InnoDB: Crash recovery will fail with UNIV_IBUF_COUNT_DEBUG");
-# endif
-#endif
-
-#ifdef UNIV_BLOB_DEBUG
- ib_logf(IB_LOG_LEVEL_INFO,
- "InnoDB: !!!!!!!! UNIV_BLOB_DEBUG switched on !!!!!!!!!\n"
- "InnoDB: Server restart may fail with UNIV_BLOB_DEBUG");
-#endif /* UNIV_BLOB_DEBUG */
-
-#ifdef UNIV_SYNC_DEBUG
- ib_logf(IB_LOG_LEVEL_INFO,
- " InnoDB: !!!!!!!! UNIV_SYNC_DEBUG switched on !!!!!!!!!");
-#endif
-
-#ifdef UNIV_SEARCH_DEBUG
- ib_logf(IB_LOG_LEVEL_INFO,
- " InnoDB: !!!!!!!! UNIV_SEARCH_DEBUG switched on !!!!!!!!!");
+ ib::info() << "!!!!!!!! UNIV_IBUF_DEBUG switched on !!!!!!!!!";
#endif
#ifdef UNIV_LOG_LSN_DEBUG
- ib_logf(IB_LOG_LEVEL_INFO,
- " InnoDB: !!!!!!!! UNIV_LOG_LSN_DEBUG switched on !!!!!!!!!");
+ ib::info() << "!!!!!!!! UNIV_LOG_LSN_DEBUG switched on !!!!!!!!!";
#endif /* UNIV_LOG_LSN_DEBUG */
-#ifdef UNIV_MEM_DEBUG
- ib_logf(IB_LOG_LEVEL_INFO,
- " InnoDB: !!!!!!!! UNIV_MEM_DEBUG switched on !!!!!!!!!");
-#endif
-
- if (srv_use_sys_malloc) {
- ib_logf(IB_LOG_LEVEL_INFO,
- "The InnoDB memory heap is disabled");
- }
#if defined(COMPILER_HINTS_ENABLED)
- ib_logf(IB_LOG_LEVEL_INFO,
- " InnoDB: Compiler hints enabled.");
+ ib::info() << "Compiler hints enabled.";
#endif /* defined(COMPILER_HINTS_ENABLED) */
- ib_logf(IB_LOG_LEVEL_INFO,
- "" IB_ATOMICS_STARTUP_MSG "");
-
- ib_logf(IB_LOG_LEVEL_INFO,
- "" IB_MEMORY_BARRIER_STARTUP_MSG "");
-
-#ifndef HAVE_MEMORY_BARRIER
-#if defined __i386__ || defined __x86_64__ || defined _M_IX86 || defined _M_X64 || defined __WIN__
+#ifdef _WIN32
+ ib::info() << "Mutexes and rw_locks use Windows interlocked functions";
#else
- ib_logf(IB_LOG_LEVEL_WARN,
- "MySQL was built without a memory barrier capability on this"
- " architecture, which might allow a mutex/rw_lock violation"
- " under high thread concurrency. This may cause a hang.");
-#endif /* IA32 or AMD64 */
-#endif /* HAVE_MEMORY_BARRIER */
-
- ib_logf(IB_LOG_LEVEL_INFO,
- "Compressed tables use zlib " ZLIB_VERSION
+ ib::info() << "Mutexes and rw_locks use GCC atomic builtins";
+#endif
+ ib::info() << MUTEX_TYPE;
+
+ ib::info() << "Compressed tables use zlib " ZLIB_VERSION
#ifdef UNIV_ZIP_DEBUG
" with validation"
#endif /* UNIV_ZIP_DEBUG */
- );
+ ;
#ifdef UNIV_ZIP_COPY
- ib_logf(IB_LOG_LEVEL_INFO, "and extra copying");
+ ib::info() << "and extra copying";
#endif /* UNIV_ZIP_COPY */
-
/* Since InnoDB does not currently clean up all its internal data
structures in MySQL Embedded Server Library server_end(), we
print an error message if someone tries to start up InnoDB a
second time during the process lifetime. */
if (srv_start_has_been_called) {
- ut_print_timestamp(stderr);
- fprintf(stderr, " InnoDB: Error: startup called second time "
- "during the process\n");
- ut_print_timestamp(stderr);
- fprintf(stderr, " InnoDB: lifetime. In the MySQL Embedded "
- "Server Library you\n");
- ut_print_timestamp(stderr);
- fprintf(stderr, " InnoDB: cannot call server_init() more "
- "than once during the\n");
- ut_print_timestamp(stderr);
- fprintf(stderr, " InnoDB: process lifetime.\n");
+ ib::error() << "Startup called second time"
+ " during the process lifetime."
+ " In the MySQL Embedded Server Library"
+ " you cannot call server_init() more than"
+ " once during the process lifetime.";
}
- srv_start_has_been_called = TRUE;
-
-#ifdef UNIV_DEBUG
- log_do_write = TRUE;
-#endif /* UNIV_DEBUG */
- /* yydebug = TRUE; */
-
- srv_is_being_started = TRUE;
- srv_startup_is_before_trx_rollback_phase = TRUE;
-
-#ifdef __WIN__
- switch (os_get_os_version()) {
- case OS_WIN95:
- case OS_WIN31:
- case OS_WINNT:
- /* On Win 95, 98, ME, Win32 subsystem for Windows 3.1,
- and NT use simulated aio. In NT Windows provides async i/o,
- but when run in conjunction with InnoDB Hot Backup, it seemed
- to corrupt the data files. */
-
- srv_use_native_aio = FALSE;
- break;
+ srv_start_has_been_called = true;
- case OS_WIN2000:
- case OS_WINXP:
- /* On 2000 and XP, async IO is available. */
- srv_use_native_aio = TRUE;
- break;
+ srv_is_being_started = true;
- default:
- /* Vista and later have both async IO and condition variables */
- srv_use_native_aio = TRUE;
- srv_use_native_conditions = TRUE;
- break;
- }
+#ifdef _WIN32
+ srv_use_native_aio = TRUE;
#elif defined(LINUX_NATIVE_AIO)
if (srv_use_native_aio) {
- ib_logf(IB_LOG_LEVEL_INFO, "Using Linux native AIO");
+ ib::info() << "Using Linux native AIO";
}
#else
/* Currently native AIO is supported only on windows and linux
and that also when the support is compiled in. In all other
cases, we ignore the setting of innodb_use_native_aio. */
srv_use_native_aio = FALSE;
-#endif /* __WIN__ */
+#endif /* _WIN32 */
+
+ /* Register performance schema stages before any real work has been
+ started which may need to be instrumented. */
+ mysql_stage_register("innodb", srv_stages, UT_ARR_SIZE(srv_stages));
if (srv_file_flush_method_str == NULL) {
/* These are the default options */
-
- srv_unix_file_flush_method = SRV_UNIX_FSYNC;
-
- srv_win_file_flush_method = SRV_WIN_IO_UNBUFFERED;
-#ifndef __WIN__
+ srv_file_flush_method = IF_WIN(SRV_ALL_O_DIRECT_FSYNC,SRV_FSYNC);
} else if (0 == ut_strcmp(srv_file_flush_method_str, "fsync")) {
- srv_unix_file_flush_method = SRV_UNIX_FSYNC;
+ srv_file_flush_method = SRV_FSYNC;
} else if (0 == ut_strcmp(srv_file_flush_method_str, "O_DSYNC")) {
- srv_unix_file_flush_method = SRV_UNIX_O_DSYNC;
+ srv_file_flush_method = SRV_O_DSYNC;
} else if (0 == ut_strcmp(srv_file_flush_method_str, "O_DIRECT")) {
- srv_unix_file_flush_method = SRV_UNIX_O_DIRECT;
+ srv_file_flush_method = SRV_O_DIRECT;
} else if (0 == ut_strcmp(srv_file_flush_method_str, "O_DIRECT_NO_FSYNC")) {
- srv_unix_file_flush_method = SRV_UNIX_O_DIRECT_NO_FSYNC;
+ srv_file_flush_method = SRV_O_DIRECT_NO_FSYNC;
} else if (0 == ut_strcmp(srv_file_flush_method_str, "littlesync")) {
- srv_unix_file_flush_method = SRV_UNIX_LITTLESYNC;
+ srv_file_flush_method = SRV_LITTLESYNC;
} else if (0 == ut_strcmp(srv_file_flush_method_str, "nosync")) {
- srv_unix_file_flush_method = SRV_UNIX_NOSYNC;
-#else
+ srv_file_flush_method = SRV_NOSYNC;
+#ifdef _WIN32
} else if (0 == ut_strcmp(srv_file_flush_method_str, "normal")) {
- srv_win_file_flush_method = SRV_WIN_IO_NORMAL;
- srv_use_native_aio = FALSE;
-
+ srv_file_flush_method = SRV_FSYNC;
} else if (0 == ut_strcmp(srv_file_flush_method_str, "unbuffered")) {
- srv_win_file_flush_method = SRV_WIN_IO_UNBUFFERED;
- srv_use_native_aio = FALSE;
-
} else if (0 == ut_strcmp(srv_file_flush_method_str,
"async_unbuffered")) {
- srv_win_file_flush_method = SRV_WIN_IO_UNBUFFERED;
-#endif /* __WIN__ */
+#endif /* _WIN32 */
} else {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Unrecognized value %s for innodb_flush_method",
- srv_file_flush_method_str);
- return(DB_ERROR);
+ ib::error() << "Unrecognized value "
+ << srv_file_flush_method_str
+ << " for innodb_flush_method";
+ err = DB_ERROR;
}
/* Note that the call srv_boot() also changes the values of
@@ -1914,7 +1607,6 @@ innobase_start_or_create_for_mysql()
maximum number of threads that can wait in the 'srv_conc array' for
their time to enter InnoDB. */
-#define BUF_POOL_SIZE_THRESHOLD (1024 * 1024 * 1024)
srv_max_n_threads = 1 /* io_ibuf_thread */
+ 1 /* io_log_thread */
+ 1 /* lock_wait_timeout_thread */
@@ -1926,7 +1618,6 @@ innobase_start_or_create_for_mysql()
+ 1 /* dict_stats_thread */
+ 1 /* fts_optimize_thread */
+ 1 /* recv_writer_thread */
- + 1 /* buf_flush_page_cleaner_thread */
+ 1 /* trx_rollback_or_clean_all_recovered */
+ 128 /* added as margin, for use of
InnoDB Memcached etc. */
@@ -1934,127 +1625,146 @@ innobase_start_or_create_for_mysql()
+ srv_n_read_io_threads
+ srv_n_write_io_threads
+ srv_n_purge_threads
+ + srv_n_page_cleaners
/* FTS Parallel Sort */
+ fts_sort_pll_degree * FTS_NUM_AUX_INDEX
* max_connections;
- if (srv_buf_pool_size < BUF_POOL_SIZE_THRESHOLD) {
- /* If buffer pool is less than 1 GB,
- use only one buffer pool instance */
+ if (srv_buf_pool_size >= BUF_POOL_SIZE_THRESHOLD) {
+
+ if (srv_buf_pool_instances == srv_buf_pool_instances_default) {
+#if defined(_WIN32) && !defined(_WIN64)
+ /* Do not allocate too large of a buffer pool on
+ Windows 32-bit systems, which can have trouble
+ allocating larger single contiguous memory blocks. */
+ srv_buf_pool_size = static_cast<ulint>(ut_uint64_align_up(srv_buf_pool_size, srv_buf_pool_chunk_unit));
+ srv_buf_pool_instances = ut_min(
+ static_cast<ulong>(MAX_BUFFER_POOLS),
+ static_cast<ulong>(srv_buf_pool_size / srv_buf_pool_chunk_unit));
+#else /* defined(_WIN32) && !defined(_WIN64) */
+ /* Default to 8 instances when size > 1GB. */
+ srv_buf_pool_instances = 8;
+#endif /* defined(_WIN32) && !defined(_WIN64) */
+ }
+ } else {
+ /* If buffer pool is less than 1 GiB, assume fewer
+ threads. Also use only one buffer pool instance. */
+ if (srv_buf_pool_instances != srv_buf_pool_instances_default
+ && srv_buf_pool_instances != 1) {
+ /* We can't distinguish whether the user has explicitly
+ started mysqld with --innodb-buffer-pool-instances=0,
+ (srv_buf_pool_instances_default is 0) or has not
+ specified that option at all. Thus we have the
+ limitation that if the user started with =0, we
+ will not emit a warning here, but we should actually
+ do so. */
+ ib::info()
+ << "Adjusting innodb_buffer_pool_instances"
+ " from " << srv_buf_pool_instances << " to 1"
+ " since innodb_buffer_pool_size is less than "
+ << BUF_POOL_SIZE_THRESHOLD / (1024 * 1024)
+ << " MiB";
+ }
+
srv_buf_pool_instances = 1;
}
- srv_boot();
+ if (srv_buf_pool_chunk_unit * srv_buf_pool_instances
+ > srv_buf_pool_size) {
+ /* Size unit of buffer pool is larger than srv_buf_pool_size.
+ adjust srv_buf_pool_chunk_unit for srv_buf_pool_size. */
+ srv_buf_pool_chunk_unit
+ = static_cast<ulong>(srv_buf_pool_size)
+ / srv_buf_pool_instances;
+ if (srv_buf_pool_size % srv_buf_pool_instances != 0) {
+ ++srv_buf_pool_chunk_unit;
+ }
+ }
- if (ut_crc32_sse2_enabled) {
- ib_logf(IB_LOG_LEVEL_INFO, "Using SSE crc32 instructions");
- } else if (ut_crc32_power8_enabled) {
- ib_logf(IB_LOG_LEVEL_INFO, "Using POWER8 crc32 instructions");
- } else {
- ib_logf(IB_LOG_LEVEL_INFO, "Using generic crc32 instructions");
+ srv_buf_pool_size = buf_pool_size_align(srv_buf_pool_size);
+
+ if (srv_n_page_cleaners > srv_buf_pool_instances) {
+ /* limit of page_cleaner parallelizability
+ is number of buffer pool instances. */
+ srv_n_page_cleaners = srv_buf_pool_instances;
}
+ srv_boot();
+
+ ib::info() << ut_crc32_implementation;
+
if (!srv_read_only_mode) {
- mutex_create(srv_monitor_file_mutex_key,
- &srv_monitor_file_mutex, SYNC_NO_ORDER_CHECK);
+ mutex_create(LATCH_ID_SRV_MONITOR_FILE,
+ &srv_monitor_file_mutex);
if (srv_innodb_status) {
srv_monitor_file_name = static_cast<char*>(
- mem_alloc(
+ ut_malloc_nokey(
strlen(fil_path_to_mysql_datadir)
+ 20 + sizeof "/innodb_status."));
- sprintf(srv_monitor_file_name, "%s/innodb_status.%lu",
+ sprintf(srv_monitor_file_name,
+ "%s/innodb_status." ULINTPF,
fil_path_to_mysql_datadir,
os_proc_get_number());
srv_monitor_file = fopen(srv_monitor_file_name, "w+");
if (!srv_monitor_file) {
-
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Unable to create %s: %s",
- srv_monitor_file_name,
- strerror(errno));
-
- return(DB_ERROR);
+ ib::error() << "Unable to create "
+ << srv_monitor_file_name << ": "
+ << strerror(errno);
+ if (err == DB_SUCCESS) {
+ err = DB_ERROR;
+ }
}
} else {
+
srv_monitor_file_name = NULL;
srv_monitor_file = os_file_create_tmpfile(NULL);
- if (!srv_monitor_file) {
- return(DB_ERROR);
+ if (!srv_monitor_file && err == DB_SUCCESS) {
+ err = DB_ERROR;
}
}
- mutex_create(srv_dict_tmpfile_mutex_key,
- &srv_dict_tmpfile_mutex, SYNC_DICT_OPERATION);
-
- srv_dict_tmpfile = os_file_create_tmpfile(NULL);
-
- if (!srv_dict_tmpfile) {
- return(DB_ERROR);
- }
-
- mutex_create(srv_misc_tmpfile_mutex_key,
- &srv_misc_tmpfile_mutex, SYNC_ANY_LATCH);
+ mutex_create(LATCH_ID_SRV_MISC_TMPFILE,
+ &srv_misc_tmpfile_mutex);
srv_misc_tmpfile = os_file_create_tmpfile(NULL);
- if (!srv_misc_tmpfile) {
- return(DB_ERROR);
+ if (!srv_misc_tmpfile && err == DB_SUCCESS) {
+ err = DB_ERROR;
}
}
- /* If user has set the value of innodb_file_io_threads then
- we'll emit a message telling the user that this parameter
- is now deprecated. */
- if (srv_n_file_io_threads != 4) {
- ib_logf(IB_LOG_LEVEL_WARN,
- "innodb_file_io_threads is deprecated. Please use "
- "innodb_read_io_threads and innodb_write_io_threads "
- "instead");
+ if (err != DB_SUCCESS) {
+ return(srv_init_abort(err));
}
- /* Now overwrite the value on srv_n_file_io_threads */
srv_n_file_io_threads = srv_n_read_io_threads;
+ srv_n_file_io_threads += srv_n_write_io_threads;
+
if (!srv_read_only_mode) {
/* Add the log and ibuf IO threads. */
srv_n_file_io_threads += 2;
- srv_n_file_io_threads += srv_n_write_io_threads;
} else {
- ib_logf(IB_LOG_LEVEL_INFO,
- "Disabling background IO write threads.");
-
- srv_n_write_io_threads = 0;
+ ib::info() << "Disabling background log and ibuf IO write"
+ << " threads.";
}
ut_a(srv_n_file_io_threads <= SRV_MAX_N_IO_THREADS);
- io_limit = 8 * SRV_N_PENDING_IOS_PER_THREAD;
-
- /* On Windows when using native aio the number of aio requests
- that a thread can handle at a given time is limited to 32
- i.e.: SRV_N_PENDING_IOS_PER_THREAD */
-# ifdef __WIN__
- if (srv_use_native_aio) {
- io_limit = SRV_N_PENDING_IOS_PER_THREAD;
- }
-# endif /* __WIN__ */
-
- if (!os_aio_init(io_limit,
- srv_n_read_io_threads,
+ if (!os_aio_init(srv_n_read_io_threads,
srv_n_write_io_threads,
SRV_MAX_N_PENDING_SYNC_IOS)) {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Fatal : Cannot initialize AIO sub-system");
+ ib::error() << "Cannot initialize AIO sub-system";
- return(DB_ERROR);
+ return(srv_init_abort(DB_ERROR));
}
fil_init(srv_file_per_table ? 50000 : 5000, srv_max_n_open_files);
@@ -2070,21 +1780,30 @@ innobase_start_or_create_for_mysql()
unit = 'M';
}
- /* Print time to initialize the buffer pool */
- ib_logf(IB_LOG_LEVEL_INFO,
- "Initializing buffer pool, size = %.1f%c", size, unit);
+ double chunk_size;
+ char chunk_unit;
+
+ if (srv_buf_pool_chunk_unit >= 1024 * 1024 * 1024) {
+ chunk_size = srv_buf_pool_chunk_unit / 1024.0 / 1024 / 1024;
+ chunk_unit = 'G';
+ } else {
+ chunk_size = srv_buf_pool_chunk_unit / 1024.0 / 1024;
+ chunk_unit = 'M';
+ }
+
+ ib::info() << "Initializing buffer pool, total size = "
+ << size << unit << ", instances = " << srv_buf_pool_instances
+ << ", chunk size = " << chunk_size << chunk_unit;
err = buf_pool_init(srv_buf_pool_size, srv_buf_pool_instances);
if (err != DB_SUCCESS) {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Cannot allocate memory for the buffer pool");
+ ib::error() << "Cannot allocate memory for the buffer pool";
- return(DB_ERROR);
+ return(srv_init_abort(DB_ERROR));
}
- ib_logf(IB_LOG_LEVEL_INFO,
- "Completed initialization of buffer pool");
+ ib::info() << "Completed initialization of buffer pool";
#ifdef UNIV_DEBUG
/* We have observed deadlocks with a 5MB buffer pool but
@@ -2092,217 +1811,149 @@ innobase_start_or_create_for_mysql()
if (srv_buf_pool_size <= 5 * 1024 * 1024) {
- ib_logf(IB_LOG_LEVEL_INFO,
- "Small buffer pool size (%luM), the flst_validate() "
- "debug function can cause a deadlock if the "
- "buffer pool fills up.",
- srv_buf_pool_size / 1024 / 1024);
+ ib::info() << "Small buffer pool size ("
+ << srv_buf_pool_size / 1024 / 1024
+ << "M), the flst_validate() debug function can cause a"
+ << " deadlock if the buffer pool fills up.";
}
#endif /* UNIV_DEBUG */
fsp_init();
- log_init();
+ log_sys_init();
+ recv_sys_init();
lock_sys_create(srv_lock_table_size);
/* Create i/o-handler threads: */
- for (i = 0; i < srv_n_file_io_threads; ++i) {
+ for (ulint t = 0; t < srv_n_file_io_threads; ++t) {
- n[i] = i;
+ n[t] = t;
- thread_handles[i] = os_thread_create(io_handler_thread, n + i, thread_ids + i);
- thread_started[i] = true;
+ thread_handles[t] = os_thread_create(io_handler_thread, n + t, thread_ids + t);
+ thread_started[t] = true;
}
-#ifdef UNIV_LOG_ARCHIVE
- if (0 != ut_strcmp(srv_log_group_home_dir, srv_arch_dir)) {
- ut_print_timestamp(stderr);
- fprintf(stderr, " InnoDB: Error: you must set the log group home dir in my.cnf\n");
- ut_print_timestamp(stderr);
- fprintf(stderr, " InnoDB: the same as log arch dir.\n");
+ if (!srv_read_only_mode) {
+ buf_flush_page_cleaner_init();
- return(DB_ERROR);
- }
-#endif /* UNIV_LOG_ARCHIVE */
+ buf_page_cleaner_is_active = true;
+ os_thread_create(buf_flush_page_cleaner_coordinator,
+ NULL, NULL);
- if (srv_n_log_files * srv_log_file_size * UNIV_PAGE_SIZE
- >= 512ULL * 1024ULL * 1024ULL * 1024ULL) {
- /* log_block_convert_lsn_to_no() limits the returned block
- number to 1G and given that OS_FILE_LOG_BLOCK_SIZE is 512
- bytes, then we have a limit of 512 GB. If that limit is to
- be raised, then log_block_convert_lsn_to_no() must be
- modified. */
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Combined size of log files must be < 512 GB");
+ for (i = 1; i < srv_n_page_cleaners; ++i) {
+ os_thread_create(buf_flush_page_cleaner_worker,
+ NULL, NULL);
+ }
- return(DB_ERROR);
+#ifdef UNIV_LINUX
+ /* Wait for the setpriority() call to finish. */
+ os_event_wait(recv_sys->flush_end);
+#endif /* UNIV_LINUX */
+ srv_start_state_set(SRV_START_STATE_IO);
}
- if (srv_n_log_files * srv_log_file_size >= ULINT_MAX) {
- /* fil_io() takes ulint as an argument and we are passing
- (next_offset / UNIV_PAGE_SIZE) to it in log_group_write_buf().
- So (next_offset / UNIV_PAGE_SIZE) must be less than ULINT_MAX.
- So next_offset must be < ULINT_MAX * UNIV_PAGE_SIZE. This
- means that we are limited to ULINT_MAX * UNIV_PAGE_SIZE which
- is 64 TB on 32 bit systems. */
- fprintf(stderr,
- " InnoDB: Error: combined size of log files"
- " must be < %lu GB\n",
- ULINT_MAX / 1073741824 * UNIV_PAGE_SIZE);
+ if (srv_n_log_files * srv_log_file_size >= log_group_max_size) {
+ /* Log group size is limited by the size of page number. Remove this
+ limitation when fil_io() is not used for recovery log io. */
+ ib::error() << "Combined size of log files must be < "
+ << log_group_max_size;
- return(DB_ERROR);
+ return(srv_init_abort(DB_ERROR));
}
- sum_of_new_sizes = 0;
+ os_normalize_path(srv_data_home);
- for (i = 0; i < srv_n_data_files; i++) {
-#ifndef __WIN__
- if (sizeof(off_t) < 5
- && srv_data_file_sizes[i]
- >= (ulint) (1 << (32 - UNIV_PAGE_SIZE_SHIFT))) {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Error: file size must be < 4 GB"
- " with this MySQL binary\n");
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: and operating system combination,"
- " in some OS's < 2 GB\n");
+ /* Check if the data files exist or not. */
+ err = srv_sys_space.check_file_spec(
+ &create_new_db, MIN_EXPECTED_TABLESPACE_SIZE);
- return(DB_ERROR);
- }
-#endif
- sum_of_new_sizes += srv_data_file_sizes[i];
+ if (err != DB_SUCCESS) {
+ return(srv_init_abort(DB_ERROR));
}
- if (!srv_auto_extend_last_data_file && sum_of_new_sizes < 640) {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Combined size in innodb_data_file_path"
- " must be at least %u MiB",
- 640 >> (20 - UNIV_PAGE_SIZE_SHIFT));
+ srv_startup_is_before_trx_rollback_phase = !create_new_db;
- return(DB_ERROR);
+ /* Check if undo tablespaces and redo log files exist before creating
+ a new system tablespace */
+ if (create_new_db) {
+ err = srv_check_undo_redo_logs_exists();
+ if (err != DB_SUCCESS) {
+ return(srv_init_abort(DB_ERROR));
+ }
+ recv_sys_debug_free();
}
- recv_sys_create();
- recv_sys_init(buf_pool_get_curr_size());
+ /* Open or create the data files. */
+ ulint sum_of_new_sizes;
- err = open_or_create_data_files(&create_new_db,
-#ifdef UNIV_LOG_ARCHIVE
- &min_arch_log_no, &max_arch_log_no,
-#endif /* UNIV_LOG_ARCHIVE */
- &flushed_lsn,
- &sum_of_new_sizes);
- if (err == DB_FAIL) {
-
- ib_logf(IB_LOG_LEVEL_ERROR,
- "The system tablespace must be writable!");
-
- return(DB_ERROR);
+ err = srv_sys_space.open_or_create(
+ false, create_new_db, &sum_of_new_sizes, &flushed_lsn);
- } else if (err != DB_SUCCESS) {
-
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Could not open or create the system tablespace. If "
- "you tried to add new data files to the system "
- "tablespace, and it failed here, you should now "
- "edit innodb_data_file_path in my.cnf back to what "
- "it was, and remove the new ibdata files InnoDB "
- "created in this failed attempt. InnoDB only wrote "
- "those files full of zeros, but did not yet use "
- "them in any way. But be careful: do not remove "
- "old data files which contain your precious data!");
-
- return(err);
+ switch (err) {
+ case DB_SUCCESS:
+ break;
+ case DB_CANNOT_OPEN_FILE:
+ ib::error()
+ << "Could not open or create the system tablespace. If"
+ " you tried to add new data files to the system"
+ " tablespace, and it failed here, you should now"
+ " edit innodb_data_file_path in my.cnf back to what"
+ " it was, and remove the new ibdata files InnoDB"
+ " created in this failed attempt. InnoDB only wrote"
+ " those files full of zeros, but did not yet use"
+ " them in any way. But be careful: do not remove"
+ " old data files which contain your precious data!";
+ /* fall through */
+ default:
+ /* Other errors might come from Datafile::validate_first_page() */
+ return(srv_init_abort(err));
}
-#ifdef UNIV_LOG_ARCHIVE
- srv_normalize_path_for_win(srv_arch_dir);
- srv_arch_dir = srv_add_path_separator_if_needed(srv_arch_dir);
-#endif /* UNIV_LOG_ARCHIVE */
-
dirnamelen = strlen(srv_log_group_home_dir);
ut_a(dirnamelen < (sizeof logfilename) - 10 - sizeof "ib_logfile");
memcpy(logfilename, srv_log_group_home_dir, dirnamelen);
/* Add a path separator if needed. */
- if (dirnamelen && logfilename[dirnamelen - 1] != SRV_PATH_SEPARATOR) {
- logfilename[dirnamelen++] = SRV_PATH_SEPARATOR;
+ if (dirnamelen && logfilename[dirnamelen - 1] != OS_PATH_SEPARATOR) {
+ logfilename[dirnamelen++] = OS_PATH_SEPARATOR;
}
srv_log_file_size_requested = srv_log_file_size;
+ if (innodb_encrypt_temporary_tables && !log_crypt_init()) {
+ return srv_init_abort(DB_ERROR);
+ }
+
if (create_new_db) {
- bool success = buf_flush_list(ULINT_MAX, LSN_MAX, NULL);
- ut_a(success);
- flushed_lsn = log_get_lsn();
+ buf_flush_sync_all_buf_pools();
- buf_flush_wait_batch_end(NULL, BUF_FLUSH_LIST);
+ flushed_lsn = log_get_lsn();
- err = create_log_files(create_new_db, logfilename, dirnamelen,
- flushed_lsn, logfile0);
+ err = create_log_files(
+ logfilename, dirnamelen, flushed_lsn, logfile0);
if (err != DB_SUCCESS) {
- return(err);
+ return(srv_init_abort(err));
}
} else {
- ut_d(fil_space_get(0)->recv_size = srv_sys_space_size_debug);
+ srv_log_file_size = 0;
for (i = 0; i < SRV_N_LOG_FILES_MAX; i++) {
- os_offset_t size;
os_file_stat_t stat_info;
sprintf(logfilename + dirnamelen,
"ib_logfile%u", i);
err = os_file_get_status(
- logfilename, &stat_info, false);
+ logfilename, &stat_info, false,
+ srv_read_only_mode);
if (err == DB_NOT_FOUND) {
- if (i == 0) {
-
- if (flushed_lsn < (lsn_t) 1000) {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Cannot create"
- " log files because"
- " data files are"
- " corrupt or the"
- " database was not"
- " shut down cleanly"
- " after creating"
- " the data files.");
- return(DB_ERROR);
- }
-
- err = create_log_files(
- create_new_db, logfilename,
- dirnamelen, flushed_lsn,
- logfile0);
-
- if (err == DB_SUCCESS) {
- err = create_log_files_rename(
- logfilename,
- dirnamelen,
- flushed_lsn,
- logfile0);
- }
-
- if (err != DB_SUCCESS) {
- return(err);
- }
-
- /* Suppress the message about
- crash recovery. */
- flushed_lsn = log_get_lsn();
- goto files_checked;
- } else if (i < 2) {
- /* must have at least 2 log files */
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Only one log file found.");
- return(err);
- }
+ if (i == 0
+ && is_mariabackup_restore_or_export())
+ return (DB_SUCCESS);
/* opened all files */
break;
@@ -2313,80 +1964,120 @@ innobase_start_or_create_for_mysql()
}
if (!srv_file_check_mode(logfilename)) {
- return(DB_ERROR);
- }
-
- err = open_log_file(&files[i], logfilename, &size);
-
- if (err != DB_SUCCESS) {
- return(err);
+ return(srv_init_abort(DB_ERROR));
}
+ const os_offset_t size = stat_info.size;
ut_a(size != (os_offset_t) -1);
- if (size & ((1 << UNIV_PAGE_SIZE_SHIFT) - 1)) {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Log file %s size "
- UINT64PF " is not a multiple of"
- " innodb_page_size",
- logfilename, size);
- return(DB_ERROR);
- }
+ if (size & (OS_FILE_LOG_BLOCK_SIZE - 1)) {
- size >>= UNIV_PAGE_SIZE_SHIFT;
+ ib::error() << "Log file " << logfilename
+ << " size " << size << " is not a"
+ " multiple of 512 bytes";
+ return(srv_init_abort(DB_ERROR));
+ }
if (i == 0) {
+ if (size == 0
+ && is_mariabackup_restore_or_export()) {
+ /* Tolerate an empty ib_logfile0
+ from a previous run of
+ mariabackup --prepare. */
+ return(DB_SUCCESS);
+ }
+ /* The first log file must consist of
+ at least the following 512-byte pages:
+ header, checkpoint page 1, empty,
+ checkpoint page 2, redo log page(s).
+
+ Mariabackup --prepare would create an
+ empty ib_logfile0. Tolerate it if there
+ are no other ib_logfile* files. */
+ if ((size != 0 || i != 0)
+ && size <= OS_FILE_LOG_BLOCK_SIZE * 4) {
+ ib::error() << "Log file "
+ << logfilename << " size "
+ << size << " is too small";
+ return(srv_init_abort(DB_ERROR));
+ }
srv_log_file_size = size;
} else if (size != srv_log_file_size) {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Log file %s is"
- " of different size " UINT64PF " bytes"
- " than other log"
- " files " UINT64PF " bytes!",
- logfilename,
- size << UNIV_PAGE_SIZE_SHIFT,
- (os_offset_t) srv_log_file_size
- << UNIV_PAGE_SIZE_SHIFT);
- return(DB_ERROR);
+
+ ib::error() << "Log file " << logfilename
+ << " is of different size " << size
+ << " bytes than other log files "
+ << srv_log_file_size << " bytes!";
+ return(srv_init_abort(DB_ERROR));
}
}
+ if (srv_log_file_size == 0) {
+ if (flushed_lsn < lsn_t(1000)) {
+ ib::error()
+ << "Cannot create log files because"
+ " data files are corrupt or the"
+ " database was not shut down cleanly"
+ " after creating the data files.";
+ return srv_init_abort(DB_ERROR);
+ }
+
+ strcpy(logfilename + dirnamelen, "ib_logfile0");
+ srv_log_file_size = srv_log_file_size_requested;
+
+ err = create_log_files(
+ logfilename, dirnamelen,
+ flushed_lsn, logfile0);
+
+ if (err == DB_SUCCESS) {
+ err = create_log_files_rename(
+ logfilename, dirnamelen,
+ flushed_lsn, logfile0);
+ }
+
+ if (err != DB_SUCCESS) {
+ return(srv_init_abort(err));
+ }
+
+ /* Suppress the message about
+ crash recovery. */
+ flushed_lsn = log_get_lsn();
+ goto files_checked;
+ }
+
srv_n_log_files_found = i;
/* Create the in-memory file space objects. */
sprintf(logfilename + dirnamelen, "ib_logfile%u", 0);
- fil_space_create(logfilename,
- SRV_LOG_SPACE_FIRST_ID, 0, FIL_LOG,
- NULL /* no encryption yet */,
- true /* create */);
+ /* Disable the doublewrite buffer for log files. */
+ fil_space_t* log_space = fil_space_create(
+ "innodb_redo_log",
+ SRV_LOG_SPACE_FIRST_ID, 0,
+ FIL_TYPE_LOG,
+ NULL /* no encryption yet */);
ut_a(fil_validate());
+ ut_a(log_space);
+
+ ut_a(srv_log_file_size <= log_group_max_size);
- /* srv_log_file_size is measured in pages; if page size is 16KB,
- then we have a limit of 64TB on 32 bit systems */
- ut_a(srv_log_file_size <= ULINT_MAX);
+ const ulint size = 1 + ulint((srv_log_file_size - 1)
+ >> srv_page_size_shift);
- for (unsigned j = 0; j < i; j++) {
+ for (unsigned j = 0; j < srv_n_log_files_found; j++) {
sprintf(logfilename + dirnamelen, "ib_logfile%u", j);
- if (!fil_node_create(logfilename,
- (ulint) srv_log_file_size,
- SRV_LOG_SPACE_FIRST_ID, FALSE)) {
- return(DB_ERROR);
- }
+ log_space->add(logfilename, OS_FILE_CLOSED, size,
+ false, false);
}
-#ifdef UNIV_LOG_ARCHIVE
- /* Create the file space object for archived logs. Under
- MySQL, no archiving ever done. */
- fil_space_create("arch_log_space", SRV_LOG_SPACE_FIRST_ID + 1,
- 0, FIL_LOG, NULL, true);
-#endif /* UNIV_LOG_ARCHIVE */
- log_group_init(0, i, srv_log_file_size * UNIV_PAGE_SIZE,
- SRV_LOG_SPACE_FIRST_ID,
- SRV_LOG_SPACE_FIRST_ID + 1);
+ log_init(srv_n_log_files_found);
+
+ if (!log_set_capacity(srv_log_file_size_requested)) {
+ return(srv_init_abort(DB_ERROR));
+ }
}
files_checked:
@@ -2395,11 +2086,9 @@ files_checked:
shutdown */
fil_open_log_and_system_tablespace_files();
+ ut_d(fil_space_get(0)->recv_size = srv_sys_space_size_debug);
- err = srv_undo_tablespaces_init(
- create_new_db,
- srv_undo_tablespaces,
- &srv_undo_tablespaces_open);
+ err = srv_undo_tablespaces_init(create_new_db);
/* If the force recovery is set very high then we carry on regardless
of all errors. Basically this is fingers crossed mode. */
@@ -2407,7 +2096,7 @@ files_checked:
if (err != DB_SUCCESS
&& srv_force_recovery < SRV_FORCE_NO_UNDO_LOG_SCAN) {
- return(err);
+ return(srv_init_abort(err));
}
/* Initialize objects used by dict stats gathering thread, which
@@ -2426,19 +2115,19 @@ files_checked:
mtr_start(&mtr);
fsp_header_init(0, sum_of_new_sizes, &mtr);
+
compile_time_assert(TRX_SYS_SPACE == 0);
compile_time_assert(IBUF_SPACE_ID == 0);
ulint ibuf_root = btr_create(
- DICT_CLUSTERED | DICT_UNIVERSAL | DICT_IBUF,
- 0, 0, DICT_IBUF_ID_MIN,
- dict_ind_redundant, &mtr);
+ DICT_CLUSTERED | DICT_IBUF,
+ 0, univ_page_size, DICT_IBUF_ID_MIN,
+ dict_ind_redundant, NULL, &mtr);
mtr_commit(&mtr);
if (ibuf_root == FIL_NULL) {
- return(srv_init_abort(true, __FILE__, __LINE__,
- DB_ERROR));
+ return(srv_init_abort(DB_ERROR));
}
ut_ad(ibuf_root == IBUF_TREE_ROOT_PAGE_NO);
@@ -2448,77 +2137,29 @@ files_checked:
All the remaining rollback segments will be created later,
after the double write buffer has been created. */
trx_sys_create_sys_pages();
-
- ib_bh = trx_sys_init_at_db_start();
- n_recovered_trx = UT_LIST_GET_LEN(trx_sys->rw_trx_list);
-
- /* The purge system needs to create the purge view and
- therefore requires that the trx_sys is inited. */
-
- trx_purge_sys_create(srv_n_purge_threads, ib_bh);
+ trx_sys_init_at_db_start();
err = dict_create();
if (err != DB_SUCCESS) {
- return(err);
+ return(srv_init_abort(err));
}
- srv_startup_is_before_trx_rollback_phase = FALSE;
-
- bool success = buf_flush_list(ULINT_MAX, LSN_MAX, NULL);
- ut_a(success);
+ buf_flush_sync_all_buf_pools();
flushed_lsn = log_get_lsn();
- buf_flush_wait_batch_end(NULL, BUF_FLUSH_LIST);
-
- /* Stamp the LSN to the data files. */
err = fil_write_flushed_lsn(flushed_lsn);
- if (err != DB_SUCCESS) {
- return(err);
- }
-
- err = create_log_files_rename(logfilename, dirnamelen,
- flushed_lsn, logfile0);
-
- if (err != DB_SUCCESS) {
- return(err);
- }
-#ifdef UNIV_LOG_ARCHIVE
- } else if (srv_archive_recovery) {
-
- ib_logf(IB_LOG_LEVEL_INFO,
- " Starting archive recovery from a backup...");
-
- err = recv_recovery_from_archive_start(
- min_flushed_lsn, srv_archive_recovery_limit_lsn,
- min_arch_log_no);
- if (err != DB_SUCCESS) {
-
- return(DB_ERROR);
+ if (err == DB_SUCCESS) {
+ err = create_log_files_rename(
+ logfilename, dirnamelen,
+ flushed_lsn, logfile0);
}
- /* Since ibuf init is in dict_boot, and ibuf is needed
- in any disk i/o, first call dict_boot */
-
- err = dict_boot();
if (err != DB_SUCCESS) {
- return(err);
+ return(srv_init_abort(err));
}
-
- ib_bh = trx_sys_init_at_db_start();
- n_recovered_trx = UT_LIST_GET_LEN(trx_sys->rw_trx_list);
-
- /* The purge system needs to create the purge view and
- therefore requires that the trx_sys is inited. */
-
- trx_purge_sys_create(srv_n_purge_threads, ib_bh);
-
- srv_startup_is_before_trx_rollback_phase = FALSE;
-
- recv_recovery_from_archive_finish();
-#endif /* UNIV_LOG_ARCHIVE */
} else {
/* Check if we support the max format that is stamped
@@ -2537,7 +2178,7 @@ files_checked:
srv_max_file_format_at_startup);
if (err != DB_SUCCESS) {
- return(err);
+ return(srv_init_abort(err));
}
/* Invalidate the buffer pool to ensure that we reread
@@ -2547,24 +2188,49 @@ files_checked:
and there must be no page in the buf_flush list. */
buf_pool_invalidate();
+ /* Scan and locate truncate log files. Parsed located files
+ and add table to truncate information to central vector for
+ truncate fix-up action post recovery. */
+ err = TruncateLogParser::scan_and_parse(srv_log_group_home_dir);
+ if (err != DB_SUCCESS) {
+
+ return(srv_init_abort(DB_ERROR));
+ }
+
/* We always try to do a recovery, even if the database had
been shut down normally: this is the normal startup path */
- err = recv_recovery_from_checkpoint_start(
- LOG_CHECKPOINT, LSN_MAX,
- flushed_lsn);
+ err = recv_recovery_from_checkpoint_start(flushed_lsn);
- if (err == DB_SUCCESS) {
- /* Initialize the change buffer. */
- err = dict_boot();
- }
+ recv_sys->dblwr.pages.clear();
if (err != DB_SUCCESS) {
- return(err);
+ return(srv_init_abort(err));
}
- /* This must precede recv_apply_hashed_log_recs(true). */
- ib_bh = trx_sys_init_at_db_start();
+ switch (srv_operation) {
+ case SRV_OPERATION_NORMAL:
+ case SRV_OPERATION_RESTORE_ROLLBACK_XA:
+ case SRV_OPERATION_RESTORE_EXPORT:
+ /* Initialize the change buffer. */
+ err = dict_boot();
+ if (err != DB_SUCCESS) {
+ return(srv_init_abort(err));
+ }
+ /* This must precede
+ recv_apply_hashed_log_recs(true). */
+ trx_sys_init_at_db_start();
+ break;
+ case SRV_OPERATION_RESTORE_DELTA:
+ case SRV_OPERATION_BACKUP:
+ ut_ad(!"wrong mariabackup mode");
+ /* fall through */
+ case SRV_OPERATION_RESTORE:
+ /* mariabackup --prepare only deals with
+ the redo log and the data files, not with
+ transactions or the data dictionary. */
+ break;
+ }
if (srv_force_recovery < SRV_FORCE_NO_LOG_REDO) {
/* Apply the hashed log records to the
@@ -2573,11 +2239,16 @@ files_checked:
recv_apply_hashed_log_recs(true);
- if (recv_sys->found_corrupt_log) {
- return (DB_CORRUPTION);
+ if (recv_sys->found_corrupt_log
+ || recv_sys->found_corrupt_fs) {
+ return(srv_init_abort(DB_CORRUPTION));
}
DBUG_PRINT("ib_log", ("apply completed"));
+
+ if (recv_needed_recovery) {
+ trx_sys_print_mysql_binlog_offset();
+ }
}
if (!srv_read_only_mode) {
@@ -2590,9 +2261,9 @@ files_checked:
if (sum_of_new_sizes > 0) {
/* New data file(s) were added */
- mtr_start(&mtr);
+ mtr.start();
fsp_header_inc_size(0, sum_of_new_sizes, &mtr);
- mtr_commit(&mtr);
+ mtr.commit();
/* Immediately write the log record about
increased tablespace size to disk, so that it
is durable even if mysqld would crash
@@ -2603,37 +2274,27 @@ files_checked:
const ulint tablespace_size_in_header
= fsp_header_get_tablespace_size();
-
-#ifdef UNIV_DEBUG
- /* buf_debug_prints = TRUE; */
-#endif /* UNIV_DEBUG */
- ulint sum_of_data_file_sizes = 0;
-
- for (ulint d = 0; d < srv_n_data_files; d++) {
- sum_of_data_file_sizes += srv_data_file_sizes[d];
- }
-
+ const ulint sum_of_data_file_sizes
+ = srv_sys_space.get_sum_of_sizes();
/* Compare the system tablespace file size to what is
- stored in FSP_SIZE. In open_or_create_data_files()
+ stored in FSP_SIZE. In srv_sys_space.open_or_create()
we already checked that the file sizes match the
innodb_data_file_path specification. */
if (srv_read_only_mode
|| sum_of_data_file_sizes == tablespace_size_in_header) {
/* Do not complain about the size. */
- } else if (!srv_auto_extend_last_data_file
+ } else if (!srv_sys_space.can_auto_extend_last_file()
|| sum_of_data_file_sizes
< tablespace_size_in_header) {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Tablespace size stored in header is " ULINTPF
- " pages, but the sum of data file sizes is "
- ULINTPF " pages",
- tablespace_size_in_header,
- sum_of_data_file_sizes);
+ ib::error() << "Tablespace size stored in header is "
+ << tablespace_size_in_header
+ << " pages, but the sum of data file sizes is "
+ << sum_of_data_file_sizes << " pages";
if (srv_force_recovery == 0
&& sum_of_data_file_sizes
< tablespace_size_in_header) {
- ib_logf(IB_LOG_LEVEL_ERROR,
+ ib::error() <<
"Cannot start InnoDB. The tail of"
" the system tablespace is"
" missing. Have you edited"
@@ -2643,120 +2304,94 @@ files_checked:
" You can set innodb_force_recovery=1"
" in my.cnf to force"
" a startup if you are trying to"
- " recover a badly corrupt database.");
+ " recover a badly corrupt database.";
- return(DB_ERROR);
+ return(srv_init_abort(DB_ERROR));
}
}
- n_recovered_trx = UT_LIST_GET_LEN(trx_sys->rw_trx_list);
-
- /* The purge system needs to create the purge view and
- therefore requires that the trx_sys is inited. */
-
- trx_purge_sys_create(srv_n_purge_threads, ib_bh);
-
/* recv_recovery_from_checkpoint_finish needs trx lists which
are initialized in trx_sys_init_at_db_start(). */
recv_recovery_from_checkpoint_finish();
- if (srv_force_recovery < SRV_FORCE_NO_IBUF_MERGE) {
- /* The following call is necessary for the insert
- buffer to work with multiple tablespaces. We must
- know the mapping between space id's and .ibd file
- names.
-
- In a crash recovery, we check that the info in data
- dictionary is consistent with what we already know
- about space id's from the call of
- fil_load_single_table_tablespaces().
-
- In a normal startup, we create the space objects for
- every table in the InnoDB data dictionary that has
- an .ibd file.
-
- We also determine the maximum tablespace id used. */
- dict_check_t dict_check;
-
- if (recv_needed_recovery) {
- dict_check = DICT_CHECK_ALL_LOADED;
- } else if (n_recovered_trx) {
- dict_check = DICT_CHECK_SOME_LOADED;
- } else {
- dict_check = DICT_CHECK_NONE_LOADED;
- }
-
- /* Create the SYS_TABLESPACES and SYS_DATAFILES system table */
- err = dict_create_or_check_sys_tablespace();
- if (err != DB_SUCCESS) {
- return(err);
+ if (is_mariabackup_restore_or_export()) {
+ /* After applying the redo log from
+ SRV_OPERATION_BACKUP, flush the changes
+ to the data files and truncate or delete the log.
+ Unless --export is specified, no further change to
+ InnoDB files is needed. */
+ ut_ad(!srv_force_recovery);
+ ut_ad(srv_n_log_files_found <= 1);
+ ut_ad(recv_no_log_write);
+ buf_flush_sync_all_buf_pools();
+ err = fil_write_flushed_lsn(log_get_lsn());
+ ut_ad(!buf_pool_check_no_pending_io());
+ fil_close_log_files(true);
+ log_group_close_all();
+ if (err == DB_SUCCESS) {
+ bool trunc = is_mariabackup_restore();
+ /* Delete subsequent log files. */
+ delete_log_files(logfilename, dirnamelen,
+ srv_n_log_files_found, trunc);
+ if (trunc) {
+ /* Truncate the first log file. */
+ strcpy(logfilename + dirnamelen,
+ "ib_logfile0");
+ FILE* f = fopen(logfilename, "w");
+ fclose(f);
+ }
}
-
- sys_datafiles_created = true;
-
- /* This function assumes that SYS_DATAFILES exists */
- dict_check_tablespaces_and_store_max_id(dict_check);
+ return(err);
}
- if (!srv_force_recovery
- && !recv_sys->found_corrupt_log
- && (srv_log_file_size_requested != srv_log_file_size
- || srv_n_log_files_found != srv_n_log_files)) {
- /* Prepare to replace the redo log files. */
-
- if (srv_read_only_mode) {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Cannot resize log files "
- "in read-only mode.");
- return(DB_READ_ONLY);
- }
-
- /* Clean the buffer pool. */
- bool success = buf_flush_list(
- ULINT_MAX, LSN_MAX, NULL);
- ut_a(success);
+ /* Upgrade or resize or rebuild the redo logs before
+ generating any dirty pages, so that the old redo log
+ files will not be written to. */
+
+ if (srv_force_recovery == SRV_FORCE_NO_LOG_REDO) {
+ /* Completely ignore the redo log. */
+ } else if (srv_read_only_mode) {
+ /* Leave the redo log alone. */
+ } else if (srv_log_file_size_requested == srv_log_file_size
+ && srv_n_log_files_found == srv_n_log_files
+ && log_sys->log.format
+ == (srv_safe_truncate
+ ? (srv_encrypt_log
+ ? LOG_HEADER_FORMAT_10_3
+ | LOG_HEADER_FORMAT_ENCRYPTED
+ : LOG_HEADER_FORMAT_10_3)
+ : (srv_encrypt_log
+ ? LOG_HEADER_FORMAT_10_2
+ | LOG_HEADER_FORMAT_ENCRYPTED
+ : LOG_HEADER_FORMAT_10_2))
+ && log_sys->log.subformat == !!srv_safe_truncate) {
+ /* No need to add or remove encryption,
+ upgrade, downgrade, or resize. */
+ } else {
+ /* Prepare to delete the old redo log files */
+ flushed_lsn = srv_prepare_to_delete_redo_log_files(i);
DBUG_EXECUTE_IF("innodb_log_abort_1",
- return(DB_ERROR););
-
- flushed_lsn = log_get_lsn();
-
- ib_logf(IB_LOG_LEVEL_WARN,
- "Resizing redo log from %u*%u to %u*%u pages"
- ", LSN=" LSN_PF,
- (unsigned) i,
- (unsigned) srv_log_file_size,
- (unsigned) srv_n_log_files,
- (unsigned) srv_log_file_size_requested,
- flushed_lsn);
-
- buf_flush_wait_batch_end(NULL, BUF_FLUSH_LIST);
-
- /* Flush the old log files. */
- log_buffer_flush_to_disk();
- /* If innodb_flush_method=O_DSYNC,
- we need to explicitly flush the log buffers. */
- fil_flush(SRV_LOG_SPACE_FIRST_ID);
-
- ut_ad(flushed_lsn == log_get_lsn());
-
+ return(srv_init_abort(DB_ERROR)););
/* Prohibit redo log writes from any other
threads until creating a log checkpoint at the
end of create_log_files(). */
- ut_d(recv_no_log_write = TRUE);
+ ut_d(recv_no_log_write = true);
ut_ad(!buf_pool_check_no_pending_io());
DBUG_EXECUTE_IF("innodb_log_abort_3",
- return(DB_ERROR););
+ return(srv_init_abort(DB_ERROR)););
+ DBUG_PRINT("ib_log", ("After innodb_log_abort_3"));
/* Stamp the LSN to the data files. */
err = fil_write_flushed_lsn(flushed_lsn);
DBUG_EXECUTE_IF("innodb_log_abort_4", err = DB_ERROR;);
+ DBUG_PRINT("ib_log", ("After innodb_log_abort_4"));
if (err != DB_SUCCESS) {
- return(err);
+ return(srv_init_abort(err));
}
/* Close and free the redo log files, so that
@@ -2764,30 +2399,114 @@ files_checked:
fil_close_log_files(true);
DBUG_EXECUTE_IF("innodb_log_abort_5",
- return(DB_ERROR););
+ return(srv_init_abort(DB_ERROR)););
+ DBUG_PRINT("ib_log", ("After innodb_log_abort_5"));
/* Free the old log file space. */
log_group_close_all();
- ib_logf(IB_LOG_LEVEL_WARN,
- "Starting to delete and rewrite log files.");
+ ib::info() << "Starting to delete and rewrite log"
+ " files.";
srv_log_file_size = srv_log_file_size_requested;
- err = create_log_files(create_new_db, logfilename,
- dirnamelen, flushed_lsn,
- logfile0);
+ err = create_log_files(
+ logfilename, dirnamelen, flushed_lsn,
+ logfile0);
+
+ if (err == DB_SUCCESS) {
+ err = create_log_files_rename(
+ logfilename, dirnamelen, flushed_lsn,
+ logfile0);
+ }
if (err != DB_SUCCESS) {
- return(err);
+ return(srv_init_abort(err));
}
+ }
- err = create_log_files_rename(logfilename, dirnamelen,
- log_get_lsn(), logfile0);
+ /* Validate a few system page types that were left
+ uninitialized by older versions of MySQL. */
+ if (!high_level_read_only) {
+ mtr_t mtr;
+ buf_block_t* block;
+ mtr.start();
+ mtr.set_sys_modified();
+ /* Bitmap page types will be reset in
+ buf_dblwr_check_block() without redo logging. */
+ block = buf_page_get(
+ page_id_t(IBUF_SPACE_ID,
+ FSP_IBUF_HEADER_PAGE_NO),
+ univ_page_size, RW_X_LATCH, &mtr);
+ fil_block_check_type(*block, FIL_PAGE_TYPE_SYS, &mtr);
+ /* Already MySQL 3.23.53 initialized
+ FSP_IBUF_TREE_ROOT_PAGE_NO to
+ FIL_PAGE_INDEX. No need to reset that one. */
+ block = buf_page_get(
+ page_id_t(TRX_SYS_SPACE, TRX_SYS_PAGE_NO),
+ univ_page_size, RW_X_LATCH, &mtr);
+ fil_block_check_type(*block, FIL_PAGE_TYPE_TRX_SYS,
+ &mtr);
+ block = buf_page_get(
+ page_id_t(TRX_SYS_SPACE,
+ FSP_FIRST_RSEG_PAGE_NO),
+ univ_page_size, RW_X_LATCH, &mtr);
+ fil_block_check_type(*block, FIL_PAGE_TYPE_SYS, &mtr);
+ block = buf_page_get(
+ page_id_t(TRX_SYS_SPACE, FSP_DICT_HDR_PAGE_NO),
+ univ_page_size, RW_X_LATCH, &mtr);
+ fil_block_check_type(*block, FIL_PAGE_TYPE_SYS, &mtr);
+ mtr.commit();
+ }
+
+ /* Roll back any recovered data dictionary transactions, so
+ that the data dictionary tables will be free of any locks.
+ The data dictionary latch should guarantee that there is at
+ most one data dictionary transaction active at a time. */
+ if (srv_force_recovery < SRV_FORCE_NO_TRX_UNDO) {
+ trx_rollback_or_clean_recovered(FALSE);
+ }
+
+ /* Fix-up truncate of tables in the system tablespace
+ if server crashed while truncate was active. The non-
+ system tables are done after tablespace discovery. Do
+ this now because this procedure assumes that no pages
+ have changed since redo recovery. Tablespace discovery
+ can do updates to pages in the system tablespace.*/
+ err = truncate_t::fixup_tables_in_system_tablespace();
+ if (srv_force_recovery < SRV_FORCE_NO_IBUF_MERGE) {
+ /* Open or Create SYS_TABLESPACES and SYS_DATAFILES
+ so that tablespace names and other metadata can be
+ found. */
+ err = dict_create_or_check_sys_tablespace();
if (err != DB_SUCCESS) {
- return(err);
+ return(srv_init_abort(err));
}
+
+ /* The following call is necessary for the insert
+ buffer to work with multiple tablespaces. We must
+ know the mapping between space id's and .ibd file
+ names.
+
+ In a crash recovery, we check that the info in data
+ dictionary is consistent with what we already know
+ about space id's from the calls to fil_ibd_load().
+
+ In a normal startup, we create the space objects for
+ every table in the InnoDB data dictionary that has
+ an .ibd file.
+
+ We also determine the maximum tablespace id used. */
+ dict_check_tablespaces_and_store_max_id();
+ }
+
+ /* Fix-up truncate of table if server crashed while truncate
+ was active. */
+ err = truncate_t::fixup_tables_in_non_system_tablespace();
+
+ if (err != DB_SUCCESS) {
+ return(srv_init_abort(err));
}
recv_recovery_rollback_active();
@@ -2804,33 +2523,10 @@ files_checked:
ut_ad(err == DB_SUCCESS);
ut_a(sum_of_new_sizes != ULINT_UNDEFINED);
-#ifdef UNIV_LOG_ARCHIVE
- /* Archiving is always off under MySQL */
- if (!srv_log_archive_on) {
- ut_a(DB_SUCCESS == log_archive_noarchivelog());
- } else {
- mutex_enter(&(log_sys->mutex));
-
- start_archive = FALSE;
-
- if (log_sys->archiving_state == LOG_ARCH_OFF) {
- start_archive = TRUE;
- }
-
- mutex_exit(&(log_sys->mutex));
-
- if (start_archive) {
- ut_a(DB_SUCCESS == log_archive_archivelog());
- }
- }
-#endif /* UNIV_LOG_ARCHIVE */
-
- /* fprintf(stderr, "Max allowed record size %lu\n",
- page_get_free_space_of_empty() / 2); */
-
- if (!buf_dblwr_create()) {
- return(srv_init_abort(create_new_db, __FILE__, __LINE__,
- DB_ERROR));
+ /* Create the doublewrite buffer to a new tablespace */
+ if (!srv_read_only_mode && srv_force_recovery < SRV_FORCE_NO_TRX_UNDO
+ && !buf_dblwr_create()) {
+ return(srv_init_abort(DB_ERROR));
}
/* Here the double write buffer has already been created and so
@@ -2849,22 +2545,12 @@ files_checked:
ut_a(srv_undo_logs > 0);
ut_a(srv_undo_logs <= TRX_SYS_N_RSEGS);
- /* The number of rsegs that exist in InnoDB is given by status
- variable srv_available_undo_logs. The number of rsegs to use can
- be set using the dynamic global variable srv_undo_logs. */
-
- srv_available_undo_logs = trx_sys_create_rsegs(
- srv_undo_tablespaces, srv_undo_logs);
-
- if (srv_available_undo_logs == ULINT_UNDEFINED) {
- /* Can only happen if server is read only. */
- ut_a(srv_read_only_mode);
- srv_undo_logs = ULONG_UNDEFINED;
- } else if (srv_available_undo_logs < srv_undo_logs) {
- /* Should due to out of file space. */
- return (srv_init_abort(create_new_db, __FILE__, __LINE__, DB_ERROR));
+ if (!trx_sys_create_rsegs()) {
+ return(srv_init_abort(DB_ERROR));
}
+ srv_startup_is_before_trx_rollback_phase = false;
+
if (!srv_read_only_mode) {
/* Create the thread which watches the timeouts
for lock waits */
@@ -2887,50 +2573,69 @@ files_checked:
srv_monitor_thread,
NULL, thread_ids + 4 + SRV_MAX_N_IO_THREADS);
thread_started[4 + SRV_MAX_N_IO_THREADS] = true;
+ srv_start_state |= SRV_START_STATE_LOCK_SYS
+ | SRV_START_STATE_MONITOR;
}
/* Create the SYS_FOREIGN and SYS_FOREIGN_COLS system tables */
err = dict_create_or_check_foreign_constraint_tables();
- if (err != DB_SUCCESS) {
- return(err);
+ if (err == DB_SUCCESS) {
+ err = dict_create_or_check_sys_tablespace();
+ if (err == DB_SUCCESS) {
+ err = dict_create_or_check_sys_virtual();
+ }
+ }
+ switch (err) {
+ case DB_SUCCESS:
+ break;
+ case DB_READ_ONLY:
+ if (srv_force_recovery >= SRV_FORCE_NO_TRX_UNDO) {
+ break;
+ }
+ ib::error() << "Cannot create system tables in read-only mode";
+ /* fall through */
+ default:
+ return(srv_init_abort(err));
}
- /* Create the SYS_TABLESPACES and SYS_DATAFILES system tables if we
- have not done that already on crash recovery. */
- if (sys_datafiles_created == false) {
- err = dict_create_or_check_sys_tablespace();
+ if (!srv_read_only_mode && srv_operation == SRV_OPERATION_NORMAL) {
+ /* Initialize the innodb_temporary tablespace and keep
+ it open until shutdown. */
+ err = srv_open_tmp_tablespace(create_new_db);
+
if (err != DB_SUCCESS) {
- return(err);
+ return(srv_init_abort(err));
}
- }
- srv_is_being_started = FALSE;
+ trx_temp_rseg_create();
+ }
ut_a(trx_purge_state() == PURGE_STATE_INIT);
/* Create the master thread which does purge and other utility
operations */
- if (!srv_read_only_mode) {
-
+ if (!srv_read_only_mode
+ && srv_force_recovery < SRV_FORCE_NO_BACKGROUND) {
thread_handles[1 + SRV_MAX_N_IO_THREADS] = os_thread_create(
srv_master_thread,
NULL, thread_ids + (1 + SRV_MAX_N_IO_THREADS));
thread_started[1 + SRV_MAX_N_IO_THREADS] = true;
+ srv_start_state_set(SRV_START_STATE_MASTER);
+ }
+ if (!srv_read_only_mode
+ && (srv_operation == SRV_OPERATION_NORMAL
+ || srv_operation == SRV_OPERATION_RESTORE_ROLLBACK_XA)
+ && srv_force_recovery < SRV_FORCE_NO_BACKGROUND) {
srv_undo_sources = true;
/* Create the dict stats gathering thread */
srv_dict_stats_thread_active = true;
dict_stats_thread_handle = os_thread_create(
dict_stats_thread, NULL, NULL);
- dict_stats_thread_started = true;
/* Create the thread that will optimize the FTS sub-system. */
fts_optimize_init();
- }
-
- if (!srv_read_only_mode
- && srv_force_recovery < SRV_FORCE_NO_BACKGROUND) {
thread_handles[5 + SRV_MAX_N_IO_THREADS] = os_thread_create(
srv_purge_coordinator_thread,
@@ -2951,11 +2656,16 @@ files_checked:
srv_start_wait_for_purge_to_start();
+ srv_start_state_set(SRV_START_STATE_PURGE);
} else {
purge_sys->state = PURGE_STATE_DISABLED;
}
+ srv_is_being_started = false;
+
if (!srv_read_only_mode) {
+ /* wake main loop of page cleaner up */
+ os_event_set(buf_flush_event);
if (srv_use_mtflush) {
/* Start multi-threaded flush threads */
@@ -2969,31 +2679,17 @@ files_checked:
mtflush_ctx,
(thread_ids + 6 + 32));
}
-
- buf_page_cleaner_is_active = true;
- buf_flush_page_cleaner_thread_handle = os_thread_create(buf_flush_page_cleaner_thread, NULL, NULL);
- buf_flush_page_cleaner_thread_started = true;
}
if (srv_print_verbose_log) {
- ib_logf(IB_LOG_LEVEL_INFO,
- "%s started; log sequence number " LSN_PF "",
- INNODB_VERSION_STR, srv_start_lsn);
+ ib::info() << INNODB_VERSION_STR
+ << " started; log sequence number "
+ << srv_start_lsn;
}
if (srv_force_recovery > 0) {
- ib_logf(IB_LOG_LEVEL_INFO,
- "!!! innodb_force_recovery is set to %lu !!!",
- (ulong) srv_force_recovery);
- }
-
- if (!srv_read_only_mode) {
- /*
- Create a checkpoint before logging anything new, so that
- the current encryption key in use is definitely logged
- before any log blocks encrypted with that key.
- */
- log_make_checkpoint_at(LSN_MAX, TRUE);
+ ib::info() << "!!! innodb_force_recovery is set to "
+ << srv_force_recovery << " !!!";
}
if (srv_force_recovery == 0) {
@@ -3006,6 +2702,10 @@ files_checked:
}
if (!srv_read_only_mode) {
+ if (create_new_db) {
+ srv_buffer_pool_load_at_startup = FALSE;
+ }
+
#ifdef WITH_WSREP
/*
Create the dump/load thread only when not running with
@@ -3019,16 +2719,18 @@ files_checked:
buf_dump_thread_handle=
os_thread_create(buf_dump_thread, NULL, NULL);
- buf_dump_thread_started = true;
#ifdef WITH_WSREP
} else {
- ib_logf(IB_LOG_LEVEL_WARN,
+ ib::warn() <<
"Skipping buffer pool dump/restore during "
- "wsrep recovery.");
+ "wsrep recovery.";
}
#endif /* WITH_WSREP */
- /* Create thread(s) that handles key rotation */
+ /* Create thread(s) that handles key rotation. This is
+ needed already here as log_preflush_pool_modified_pages
+ will flush dirty pages and that might need e.g.
+ fil_crypt_threads_event. */
fil_system_enter();
btr_scrub_init();
fil_crypt_threads_init();
@@ -3038,64 +2740,24 @@ files_checked:
btr_defragment_init();
btr_defragment_thread_active = true;
os_thread_create(btr_defragment_thread, NULL, NULL);
- }
-
- srv_was_started = TRUE;
-
- return(DB_SUCCESS);
-}
-
-#if 0
-/********************************************************************
-Sync all FTS cache before shutdown */
-static
-void
-srv_fts_close(void)
-/*===============*/
-{
- dict_table_t* table;
- for (table = UT_LIST_GET_FIRST(dict_sys->table_LRU);
- table; table = UT_LIST_GET_NEXT(table_LRU, table)) {
- fts_t* fts = table->fts;
-
- if (fts != NULL) {
- fts_sync_table(table);
- }
+ srv_start_state |= SRV_START_STATE_REDO;
}
- for (table = UT_LIST_GET_FIRST(dict_sys->table_non_LRU);
- table; table = UT_LIST_GET_NEXT(table_LRU, table)) {
- fts_t* fts = table->fts;
+ /* Create the buffer pool resize thread */
+ srv_buf_resize_thread_active = true;
+ os_thread_create(buf_resize_thread, NULL, NULL);
- if (fts != NULL) {
- fts_sync_table(table);
- }
- }
+ return(DB_SUCCESS);
}
-#endif
-/** Shut down InnoDB. */
-UNIV_INTERN
+/** Shut down background threads that can generate undo log. */
void
-innodb_shutdown()
+srv_shutdown_bg_undo_sources()
{
- ulint i;
-
- if (!srv_was_started) {
- if (srv_is_being_started) {
- ib_logf(IB_LOG_LEVEL_WARN,
- "Shutting down an improperly started, "
- "or created database!");
- }
- }
-
if (srv_undo_sources) {
ut_ad(!srv_read_only_mode);
- /* Shutdown the FTS optimize sub system. */
- fts_optimize_start_shutdown();
-
- fts_optimize_end();
+ fts_optimize_shutdown();
dict_stats_shutdown();
while (row_get_background_drop_list_len_low()) {
srv_wake_master_thread();
@@ -3103,260 +2765,189 @@ innodb_shutdown()
}
srv_undo_sources = false;
}
+}
- /* 1. Flush the buffer pool to disk, write the current lsn to
- the tablespace header(s), and copy all log data to archive.
- The step 1 is the real InnoDB shutdown. The remaining steps 2 - ...
- just free data structures after the shutdown. */
-
- logs_empty_and_mark_files_at_shutdown();
-
- if (srv_conc_get_active_threads() != 0) {
- ib_logf(IB_LOG_LEVEL_WARN,
- "Query counter shows %ld queries still "
- "inside InnoDB at shutdown",
- srv_conc_get_active_threads());
- }
-
- /* 2. Make all threads created by InnoDB to exit */
-
- srv_shutdown_state = SRV_SHUTDOWN_EXIT_THREADS;
-
- /* All threads end up waiting for certain events. Put those events
- to the signaled state. Then the threads will exit themselves after
- os_event_wait(). */
-
- for (i = 0; i < 1000; i++) {
- /* NOTE: IF YOU CREATE THREADS IN INNODB, YOU MUST EXIT THEM
- HERE OR EARLIER */
-
- if (!srv_read_only_mode) {
- /* a. Let the lock timeout thread exit */
- os_event_set(lock_sys->timeout_event);
-
- /* b. srv error monitor thread exits automatically,
- no need to do anything here */
-
- /* c. We wake the master thread so that it exits */
- srv_wake_master_thread();
-
- /* d. Wakeup purge threads. */
- srv_purge_wakeup();
- }
-
- /* e. Exit the i/o threads */
-
- os_aio_wake_all_threads_at_shutdown();
-
- /* f. dict_stats_thread is signaled from
- logs_empty_and_mark_files_at_shutdown() and should have
- already quit or is quitting right now. */
-
-
- if (srv_use_mtflush) {
- /* g. Exit the multi threaded flush threads */
-
- buf_mtflu_io_thread_exit();
- }
-
- os_mutex_enter(os_sync_mutex);
-
- if (os_thread_count == 0) {
- /* All the threads have exited or are just exiting;
- NOTE that the threads may not have completed their
- exit yet. Should we use pthread_join() to make sure
- they have exited? If we did, we would have to
- remove the pthread_detach() from
- os_thread_exit(). Now we just sleep 0.1
- seconds and hope that is enough! */
-
- os_mutex_exit(os_sync_mutex);
-
- os_thread_sleep(100000);
+/** Shut down InnoDB. */
+void
+innodb_shutdown()
+{
+ ut_ad(!my_atomic_loadptr_explicit(reinterpret_cast<void**>
+ (&srv_running),
+ MY_MEMORY_ORDER_RELAXED));
+ ut_ad(!srv_undo_sources);
+
+ switch (srv_operation) {
+ case SRV_OPERATION_BACKUP:
+ case SRV_OPERATION_RESTORE:
+ case SRV_OPERATION_RESTORE_DELTA:
+ case SRV_OPERATION_RESTORE_EXPORT:
+ case SRV_OPERATION_RESTORE_ROLLBACK_XA:
+ fil_close_all_files();
+ break;
+ case SRV_OPERATION_NORMAL:
+ /* Shut down the persistent files. */
+ logs_empty_and_mark_files_at_shutdown();
- break;
+ if (ulint n_threads = srv_conc_get_active_threads()) {
+ ib::warn() << "Query counter shows "
+ << n_threads << " queries still"
+ " inside InnoDB at shutdown";
}
-
- os_mutex_exit(os_sync_mutex);
-
- os_thread_sleep(100000);
}
- if (i == 1000) {
- ib_logf(IB_LOG_LEVEL_WARN,
- "%lu threads created by InnoDB"
- " had not exited at shutdown!",
- (ulong) os_thread_count);
- }
+ /* Exit any remaining threads. */
+ srv_shutdown_all_bg_threads();
if (srv_monitor_file) {
fclose(srv_monitor_file);
srv_monitor_file = 0;
if (srv_monitor_file_name) {
unlink(srv_monitor_file_name);
- mem_free(srv_monitor_file_name);
+ ut_free(srv_monitor_file_name);
}
}
- if (srv_dict_tmpfile) {
- fclose(srv_dict_tmpfile);
- srv_dict_tmpfile = 0;
- }
-
if (srv_misc_tmpfile) {
fclose(srv_misc_tmpfile);
srv_misc_tmpfile = 0;
}
- if (!srv_read_only_mode) {
+ ut_ad(dict_stats_event || !srv_was_started || srv_read_only_mode);
+ ut_ad(dict_sys || !srv_was_started);
+ ut_ad(trx_sys || !srv_was_started);
+ ut_ad(buf_dblwr || !srv_was_started || srv_read_only_mode
+ || srv_force_recovery >= SRV_FORCE_NO_TRX_UNDO);
+ ut_ad(lock_sys || !srv_was_started);
+#ifdef BTR_CUR_HASH_ADAPT
+ ut_ad(btr_search_sys || !srv_was_started);
+#endif /* BTR_CUR_HASH_ADAPT */
+ ut_ad(ibuf || !srv_was_started);
+ ut_ad(log_sys || !srv_was_started);
+
+ if (dict_stats_event) {
dict_stats_thread_deinit();
- fil_crypt_threads_cleanup();
- btr_scrub_cleanup();
- btr_defragment_shutdown();
}
-#ifdef __WIN__
- /* MDEV-361: ha_innodb.dll leaks handles on Windows
- MDEV-7403: should not pass recv_writer_thread_handle to
- CloseHandle().
-
- On Windows we should call CloseHandle() for all
- open thread handles. */
- if (os_thread_count == 0) {
- for (i = 0; i < SRV_MAX_N_IO_THREADS + 6 + 32; ++i) {
- if (thread_started[i]) {
- CloseHandle(thread_handles[i]);
- }
- }
-
- if (buf_flush_page_cleaner_thread_started) {
- CloseHandle(buf_flush_page_cleaner_thread_handle);
- }
-
- if (buf_dump_thread_started) {
- CloseHandle(buf_dump_thread_handle);
- }
+ if (srv_start_state_is_set(SRV_START_STATE_REDO)) {
+ ut_ad(!srv_read_only_mode);
+ /* srv_shutdown_bg_undo_sources() already invoked
+ fts_optimize_shutdown(); dict_stats_shutdown(); */
- if (dict_stats_thread_started) {
- CloseHandle(dict_stats_thread_handle);
- }
+ fil_crypt_threads_cleanup();
+ btr_scrub_cleanup();
+ btr_defragment_shutdown();
}
-#endif /* __WIN __ */
/* This must be disabled before closing the buffer pool
and closing the data dictionary. */
- btr_search_disable();
- ibuf_close();
- log_shutdown();
- trx_sys_file_format_close();
- trx_sys_close();
- lock_sys_close();
+#ifdef BTR_CUR_HASH_ADAPT
+ if (dict_sys) {
+ btr_search_disable(true);
+ }
+#endif /* BTR_CUR_HASH_ADAPT */
+ if (ibuf) {
+ ibuf_close();
+ }
+ if (log_sys) {
+ log_shutdown();
+ }
+ if (trx_sys) {
+ trx_sys_file_format_close();
+ trx_sys_close();
+ }
+ UT_DELETE(purge_sys);
+ purge_sys = NULL;
+ if (buf_dblwr) {
+ buf_dblwr_free();
+ }
+ if (lock_sys) {
+ lock_sys_close();
+ }
+
+ trx_pool_close();
/* We don't create these mutexes in RO mode because we don't create
the temp files that the cover. */
if (!srv_read_only_mode) {
mutex_free(&srv_monitor_file_mutex);
- mutex_free(&srv_dict_tmpfile_mutex);
mutex_free(&srv_misc_tmpfile_mutex);
}
- dict_close();
- btr_search_sys_free();
+ if (dict_sys) {
+ dict_close();
+ }
+
+#ifdef BTR_CUR_HASH_ADAPT
+ if (btr_search_sys) {
+ btr_search_sys_free();
+ }
+#endif /* BTR_CUR_HASH_ADAPT */
/* 3. Free all InnoDB's own mutexes and the os_fast_mutexes inside
them */
os_aio_free();
- que_close();
row_mysql_close();
- srv_mon_free();
- fil_close();
- sync_close();
srv_free();
+ fil_close();
- /* 4. Free all os_events and os_mutexes */
-
- os_sync_free();
-
- /* 5. Free all allocated memory */
+ /* 4. Free all allocated memory */
pars_lexer_close();
- log_mem_free();
- buf_pool_free(srv_buf_pool_instances);
- mem_close();
-
- /* ut_free_all_mem() frees all allocated memory not freed yet
- in shutdown, and it will also free the ut_list_mutex, so it
- should be the last one for all operation */
- ut_free_all_mem();
-
- if (os_thread_count != 0
- || os_event_count != 0
- || os_mutex_count != 0
- || os_fast_mutex_count != 0) {
- ib_logf(IB_LOG_LEVEL_WARN,
- "Some resources were not cleaned up in shutdown: "
- "threads %lu, events %lu, os_mutexes %lu, "
- "os_fast_mutexes %lu",
- (ulong) os_thread_count, (ulong) os_event_count,
- (ulong) os_mutex_count, (ulong) os_fast_mutex_count);
+ recv_sys_close();
+
+ ut_ad(buf_pool_ptr || !srv_was_started);
+ if (buf_pool_ptr) {
+ buf_pool_free(srv_buf_pool_instances);
}
+ sync_check_close();
+
if (dict_foreign_err_file) {
fclose(dict_foreign_err_file);
}
- if (srv_print_verbose_log) {
- ib_logf(IB_LOG_LEVEL_INFO,
- "Shutdown completed; log sequence number " LSN_PF "",
- srv_shutdown_lsn);
+ if (srv_was_started && srv_print_verbose_log) {
+ ib::info() << "Shutdown completed; log sequence number "
+ << srv_shutdown_lsn;
}
- srv_was_started = FALSE;
- srv_start_has_been_called = FALSE;
+ srv_start_state = SRV_START_STATE_NONE;
+ srv_was_started = false;
+ srv_start_has_been_called = false;
}
-#endif /* !UNIV_HOTBACKUP */
-/*****************************************************************//**
-Get the meta-data filename from the table name. */
-UNIV_INTERN
+/** Get the meta-data filename from the table name for a
+single-table tablespace.
+@param[in] table table object
+@param[out] filename filename
+@param[in] max_len filename max length */
void
srv_get_meta_data_filename(
-/*=======================*/
- dict_table_t* table, /*!< in: table */
- char* filename, /*!< out: filename */
- ulint max_len) /*!< in: filename max length */
+ dict_table_t* table,
+ char* filename,
+ ulint max_len)
{
- ulint len;
- char* path;
- char* suffix;
- static const ulint suffix_len = strlen(".cfg");
+ ulint len;
+ char* path;
+
+ /* Make sure the data_dir_path is set. */
+ dict_get_and_save_data_dir_path(table, false);
if (DICT_TF_HAS_DATA_DIR(table->flags)) {
- dict_get_and_save_data_dir_path(table, false);
ut_a(table->data_dir_path);
- path = os_file_make_remote_pathname(
- table->data_dir_path, table->name, "cfg");
+ path = fil_make_filepath(
+ table->data_dir_path, table->name.m_name, CFG, true);
} else {
- path = fil_make_ibd_name(table->name, false);
+ path = fil_make_filepath(NULL, table->name.m_name, CFG, false);
}
ut_a(path);
len = ut_strlen(path);
ut_a(max_len >= len);
- suffix = path + (len - suffix_len);
- if (strncmp(suffix, ".cfg", suffix_len) == 0) {
- strcpy(filename, path);
- } else {
- ut_ad(!strcmp(suffix, ".ibd"));
- memcpy(filename, path, len - suffix_len);
- suffix = filename + (len - suffix_len);
- strcpy(suffix, ".cfg");
- }
-
- mem_free(path);
+ strcpy(filename, path);
- srv_normalize_path_for_win(filename);
+ ut_free(path);
}
diff --git a/storage/innobase/sync/sync0arr.cc b/storage/innobase/sync/sync0arr.cc
index 9af6cfe5f0c..0c942ada430 100644
--- a/storage/innobase/sync/sync0arr.cc
+++ b/storage/innobase/sync/sync0arr.cc
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1995, 2015, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2008, Google Inc.
Copyright (c) 2013, 2019, MariaDB Corporation.
@@ -31,13 +31,7 @@ The wait array used in synchronization primitives
Created 9/5/1995 Heikki Tuuri
*******************************************************/
-#include "univ.i"
-
#include "sync0arr.h"
-#ifdef UNIV_NONINL
-#include "sync0arr.ic"
-#endif
-
#include <mysqld_error.h>
#include <mysql/plugin.h>
#include <hash.h>
@@ -51,71 +45,74 @@ Created 9/5/1995 Heikki Tuuri
#include <sql_plugin.h>
#include <innodb_priv.h>
-#include "sync0sync.h"
-#include "sync0rw.h"
-#include "os0sync.h"
-#include "os0file.h"
#include "lock0lock.h"
-#include "srv0srv.h"
-#include "ha_prototypes.h"
+#include "sync0rw.h"
/*
WAIT ARRAY
==========
-The wait array consists of cells each of which has an
-an operating system event object created for it. The threads
-waiting for a mutex, for example, can reserve a cell
-in the array and suspend themselves to wait for the event
-to become signaled. When using the wait array, remember to make
-sure that some thread holding the synchronization object
-will eventually know that there is a waiter in the array and
-signal the object, to prevent infinite wait.
-Why we chose to implement a wait array? First, to make
-mutexes fast, we had to code our own implementation of them,
-which only in usually uncommon cases resorts to using
-slow operating system primitives. Then we had the choice of
-assigning a unique OS event for each mutex, which would
-be simpler, or using a global wait array. In some operating systems,
-the global wait array solution is more efficient and flexible,
-because we can do with a very small number of OS events,
-say 200. In NT 3.51, allocating events seems to be a quadratic
-algorithm, because 10 000 events are created fast, but
-100 000 events takes a couple of minutes to create.
-
-As of 5.0.30 the above mentioned design is changed. Since now
-OS can handle millions of wait events efficiently, we no longer
-have this concept of each cell of wait array having one event.
-Instead, now the event that a thread wants to wait on is embedded
-in the wait object (mutex or rw_lock). We still keep the global
-wait array for the sake of diagnostics and also to avoid infinite
-wait The error_monitor thread scans the global wait array to signal
+The wait array consists of cells each of which has an an event object created
+for it. The threads waiting for a mutex, for example, can reserve a cell
+in the array and suspend themselves to wait for the event to become signaled.
+When using the wait array, remember to make sure that some thread holding
+the synchronization object will eventually know that there is a waiter in
+the array and signal the object, to prevent infinite wait. Why we chose
+to implement a wait array? First, to make mutexes fast, we had to code
+our own implementation of them, which only in usually uncommon cases
+resorts to using slow operating system primitives. Then we had the choice of
+assigning a unique OS event for each mutex, which would be simpler, or
+using a global wait array. In some operating systems, the global wait
+array solution is more efficient and flexible, because we can do with
+a very small number of OS events, say 200. In NT 3.51, allocating events
+seems to be a quadratic algorithm, because 10 000 events are created fast,
+but 100 000 events takes a couple of minutes to create.
+
+As of 5.0.30 the above mentioned design is changed. Since now OS can handle
+millions of wait events efficiently, we no longer have this concept of each
+cell of wait array having one event. Instead, now the event that a thread
+wants to wait on is embedded in the wait object (mutex or rw_lock). We still
+keep the global wait array for the sake of diagnostics and also to avoid
+infinite wait The error_monitor thread scans the global wait array to signal
any waiting threads who have missed the signal. */
-/** A cell where an individual thread may wait suspended
-until a resource is released. The suspending is implemented
-using an operating system event semaphore. */
+typedef SyncArrayMutex::MutexType WaitMutex;
+typedef BlockSyncArrayMutex::MutexType BlockWaitMutex;
+
+/** The latch types that use the sync array. */
+union sync_object_t {
+
+ /** RW lock instance */
+ rw_lock_t* lock;
+
+ /** Mutex instance */
+ WaitMutex* mutex;
+
+ /** Block mutex instance */
+ BlockWaitMutex* bpmutex;
+};
+
+/** A cell where an individual thread may wait suspended until a resource
+is released. The suspending is implemented using an operating system
+event semaphore. */
+
struct sync_cell_t {
- void* wait_object; /*!< pointer to the object the
+ sync_object_t latch; /*!< pointer to the object the
thread is waiting for; if NULL
the cell is free for use */
- ib_mutex_t* old_wait_mutex; /*!< the latest wait mutex in cell */
- rw_lock_t* old_wait_rw_lock;
- /*!< the latest wait rw-lock
- in cell */
ulint request_type; /*!< lock type requested on the
object */
const char* file; /*!< in debug version file where
requested */
ulint line; /*!< in debug version line where
- requested */
- os_thread_id_t thread; /*!< thread id of this waiting
+ requested, or ULINT_UNDEFINED */
+ os_thread_id_t thread_id; /*!< thread id of this waiting
thread */
- ibool waiting; /*!< TRUE if the thread has already
+ bool waiting; /*!< TRUE if the thread has already
called sync_array_event_wait
on this cell */
- ib_int64_t signal_count; /*!< We capture the signal_count
- of the wait_object when we
+ int64_t signal_count; /*!< We capture the signal_count
+ of the latch when we
reset the event. This value is
then passed on to os_event_wait
and we wait only if the event
@@ -128,51 +125,66 @@ struct sync_cell_t {
time_t reservation_time;
};
-/* NOTE: It is allowed for a thread to wait
-for an event allocated for the array without owning the
-protecting mutex (depending on the case: OS or database mutex), but
-all changes (set or reset) to the state of the event must be made
-while owning the mutex. */
+/* NOTE: It is allowed for a thread to wait for an event allocated for
+the array without owning the protecting mutex (depending on the case:
+OS or database mutex), but all changes (set or reset) to the state of
+the event must be made while owning the mutex. */
+
/** Synchronization array */
struct sync_array_t {
+
+ /** Constructor
+ Creates a synchronization wait array. It is protected by a mutex
+ which is automatically reserved when the functions operating on it
+ are called.
+ @param[in] num_cells Number of cells to create */
+ sync_array_t(ulint num_cells)
+ UNIV_NOTHROW;
+
+ /** Destructor */
+ ~sync_array_t()
+ UNIV_NOTHROW;
+
ulint n_reserved; /*!< number of currently reserved
cells in the wait array */
ulint n_cells; /*!< number of cells in the
wait array */
sync_cell_t* array; /*!< pointer to wait array */
- ib_mutex_t mutex; /*!< possible database mutex
- protecting this data structure */
- os_ib_mutex_t os_mutex; /*!< Possible operating system mutex
- protecting the data structure.
- As this data structure is used in
- constructing the database mutex,
- to prevent infinite recursion
- in implementation, we fall back to
- an OS mutex. */
+ SysMutex mutex; /*!< System mutex protecting the
+ data structure. As this data
+ structure is used in constructing
+ the database mutex, to prevent
+ infinite recursion in implementation,
+ we fall back to an OS mutex. */
ulint res_count; /*!< count of cell reservations
since creation of the array */
+ ulint next_free_slot; /*!< the next free cell in the array */
+ ulint first_free_slot;/*!< the last slot that was freed */
};
/** User configured sync array size */
-UNIV_INTERN ulong srv_sync_array_size = 32;
+ulong srv_sync_array_size = 1;
/** Locally stored copy of srv_sync_array_size */
-static ulint sync_array_size;
+ulint sync_array_size;
/** The global array of wait cells for implementation of the database's own
mutexes and read-write locks */
-static sync_array_t** sync_wait_array;
+sync_array_t** sync_wait_array;
/** count of how many times an object has been signalled */
-static ulint sg_count;
+static ulint sg_count;
+
+#define sync_array_exit(a) mutex_exit(&(a)->mutex)
+#define sync_array_enter(a) mutex_enter(&(a)->mutex)
-#ifdef UNIV_SYNC_DEBUG
+#ifdef UNIV_DEBUG
/******************************************************************//**
This function is called only in the debug version. Detects a deadlock
of one or more threads because of waits of semaphores.
-@return TRUE if deadlock detected */
+@return TRUE if deadlock detected */
static
-ibool
+bool
sync_array_detect_deadlock(
/*=======================*/
sync_array_t* arr, /*!< in: wait array; NOTE! the caller must
@@ -180,103 +192,87 @@ sync_array_detect_deadlock(
sync_cell_t* start, /*!< in: cell where recursive search started */
sync_cell_t* cell, /*!< in: cell to search */
ulint depth); /*!< in: recursion depth */
-#endif /* UNIV_SYNC_DEBUG */
+#endif /* UNIV_DEBUG */
-/*****************************************************************//**
-Gets the nth cell in array.
-@return cell */
-sync_cell_t*
-sync_array_get_nth_cell(
-/*====================*/
- sync_array_t* arr, /*!< in: sync array */
- ulint n) /*!< in: index */
+/** Constructor
+Creates a synchronization wait array. It is protected by a mutex
+which is automatically reserved when the functions operating on it
+are called.
+@param[in] num_cells Number of cells to create */
+sync_array_t::sync_array_t(ulint num_cells)
+ UNIV_NOTHROW
+ :
+ n_reserved(),
+ n_cells(num_cells),
+ array(UT_NEW_ARRAY_NOKEY(sync_cell_t, num_cells)),
+ mutex(),
+ res_count(),
+ next_free_slot(),
+ first_free_slot(ULINT_UNDEFINED)
{
- ut_a(arr);
- ut_a(n < arr->n_cells);
+ ut_a(num_cells > 0);
- return(arr->array + n);
+ memset(array, 0x0, sizeof(sync_cell_t) * n_cells);
+
+ /* Then create the mutex to protect the wait array */
+ mutex_create(LATCH_ID_SYNC_ARRAY_MUTEX, &mutex);
}
-/******************************************************************//**
-Looks for a cell with the given thread id.
-@return pointer to cell or NULL if not found */
+/** Validate the integrity of the wait array. Check
+that the number of reserved cells equals the count variable.
+@param[in,out] arr sync wait array */
static
-sync_cell_t*
-sync_array_find_thread(
-/*===================*/
- sync_array_t* arr, /*!< in: wait array */
- os_thread_id_t thread) /*!< in: thread id */
+void
+sync_array_validate(sync_array_t* arr)
{
ulint i;
- sync_cell_t* cell;
+ ulint count = 0;
+
+ sync_array_enter(arr);
for (i = 0; i < arr->n_cells; i++) {
+ sync_cell_t* cell;
cell = sync_array_get_nth_cell(arr, i);
- if (cell->wait_object != NULL
- && os_thread_eq(cell->thread, thread)) {
-
- return(cell); /* Found */
+ if (cell->latch.mutex != NULL) {
+ count++;
}
}
- return(NULL); /* Not found */
-}
-
-/******************************************************************//**
-Reserves the mutex semaphore protecting a sync array. */
-static
-void
-sync_array_enter(
-/*=============*/
- sync_array_t* arr) /*!< in: sync wait array */
-{
- os_mutex_enter(arr->os_mutex);
-}
+ ut_a(count == arr->n_reserved);
-/******************************************************************//**
-Releases the mutex semaphore protecting a sync array. */
-static
-void
-sync_array_exit(
-/*============*/
- sync_array_t* arr) /*!< in: sync wait array */
-{
- os_mutex_exit(arr->os_mutex);
+ sync_array_exit(arr);
}
-/*******************************************************************//**
-Creates a synchronization wait array. It is protected by a mutex
-which is automatically reserved when the functions operating on it
-are called.
-@return own: created wait array */
-static
-sync_array_t*
-sync_array_create(
-/*==============*/
- ulint n_cells) /*!< in: number of cells in the array
- to create */
+/** Destructor */
+sync_array_t::~sync_array_t()
+ UNIV_NOTHROW
{
- ulint sz;
- sync_array_t* arr;
+ ut_a(n_reserved == 0);
- ut_a(n_cells > 0);
+ sync_array_validate(this);
- /* Allocate memory for the data structures */
- arr = static_cast<sync_array_t*>(ut_malloc(sizeof(*arr)));
- memset(arr, 0x0, sizeof(*arr));
+ /* Release the mutex protecting the wait array */
- sz = sizeof(sync_cell_t) * n_cells;
- arr->array = static_cast<sync_cell_t*>(ut_malloc(sz));
- memset(arr->array, 0x0, sz);
+ mutex_free(&mutex);
- arr->n_cells = n_cells;
+ UT_DELETE_ARRAY(array);
+}
- /* Then create the mutex to protect the wait array complex */
- arr->os_mutex = os_mutex_create();
+/*****************************************************************//**
+Gets the nth cell in array.
+@return cell */
+UNIV_INTERN
+sync_cell_t*
+sync_array_get_nth_cell(
+/*====================*/
+ sync_array_t* arr, /*!< in: sync array */
+ ulint n) /*!< in: index */
+{
+ ut_a(n < arr->n_cells);
- return(arr);
+ return(arr->array + n);
}
/******************************************************************//**
@@ -287,43 +283,7 @@ sync_array_free(
/*============*/
sync_array_t* arr) /*!< in, own: sync wait array */
{
- ut_a(arr->n_reserved == 0);
-
- sync_array_validate(arr);
-
- /* Release the mutex protecting the wait array complex */
-
- os_mutex_free(arr->os_mutex);
-
- ut_free(arr->array);
- ut_free(arr);
-}
-
-/********************************************************************//**
-Validates the integrity of the wait array. Checks
-that the number of reserved cells equals the count variable. */
-UNIV_INTERN
-void
-sync_array_validate(
-/*================*/
- sync_array_t* arr) /*!< in: sync wait array */
-{
- ulint i;
- sync_cell_t* cell;
- ulint count = 0;
-
- sync_array_enter(arr);
-
- for (i = 0; i < arr->n_cells; i++) {
- cell = sync_array_get_nth_cell(arr, i);
- if (cell->wait_object != NULL) {
- count++;
- }
- }
-
- ut_a(count == arr->n_reserved);
-
- sync_array_exit(arr);
+ UT_DELETE(arr);
}
/*******************************************************************//**
@@ -334,87 +294,140 @@ sync_cell_get_event(
/*================*/
sync_cell_t* cell) /*!< in: non-empty sync array cell */
{
- ulint type = cell->request_type;
+ ulint type = cell->request_type;
if (type == SYNC_MUTEX) {
- return(((ib_mutex_t*) cell->wait_object)->event);
- } else if (type == RW_LOCK_WAIT_EX) {
- return(((rw_lock_t*) cell->wait_object)->wait_ex_event);
- } else { /* RW_LOCK_SHARED and RW_LOCK_EX wait on the same event */
- return(((rw_lock_t*) cell->wait_object)->event);
+
+ return(cell->latch.mutex->event());
+
+ } else if (type == SYNC_BUF_BLOCK) {
+
+ return(cell->latch.bpmutex->event());
+
+ } else if (type == RW_LOCK_X_WAIT) {
+
+ return(cell->latch.lock->wait_ex_event);
+
+ } else { /* RW_LOCK_S and RW_LOCK_X wait on the same event */
+
+ return(cell->latch.lock->event);
}
}
/******************************************************************//**
Reserves a wait array cell for waiting for an object.
The event of the cell is reset to nonsignalled state.
-@return true if free cell is found, otherwise false */
-UNIV_INTERN
-bool
+@return sync cell to wait on */
+sync_cell_t*
sync_array_reserve_cell(
/*====================*/
sync_array_t* arr, /*!< in: wait array */
void* object, /*!< in: pointer to the object to wait for */
ulint type, /*!< in: lock request type */
const char* file, /*!< in: file where requested */
- ulint line, /*!< in: line where requested */
- ulint* index) /*!< out: index of the reserved cell */
+ unsigned line) /*!< in: line where requested */
{
sync_cell_t* cell;
- os_event_t event;
- ulint i;
-
- ut_a(object);
- ut_a(index);
sync_array_enter(arr);
- arr->res_count++;
+ if (arr->first_free_slot != ULINT_UNDEFINED) {
+ /* Try and find a slot in the free list */
+ ut_ad(arr->first_free_slot < arr->next_free_slot);
+ cell = sync_array_get_nth_cell(arr, arr->first_free_slot);
+ arr->first_free_slot = cell->line;
+ } else if (arr->next_free_slot < arr->n_cells) {
+ /* Try and find a slot after the currently allocated slots */
+ cell = sync_array_get_nth_cell(arr, arr->next_free_slot);
+ ++arr->next_free_slot;
+ } else {
+ sync_array_exit(arr);
- /* Reserve a new cell. */
- for (i = 0; i < arr->n_cells; i++) {
- cell = sync_array_get_nth_cell(arr, i);
+ // We should return NULL and if there is more than
+ // one sync array, try another sync array instance.
+ return(NULL);
+ }
- if (cell->wait_object == NULL) {
+ ++arr->res_count;
- cell->waiting = FALSE;
- cell->wait_object = object;
+ ut_ad(arr->n_reserved < arr->n_cells);
+ ut_ad(arr->next_free_slot <= arr->n_cells);
- if (type == SYNC_MUTEX) {
- cell->old_wait_mutex =
- static_cast<ib_mutex_t*>(object);
- } else {
- cell->old_wait_rw_lock =
- static_cast<rw_lock_t*>(object);
- }
+ ++arr->n_reserved;
- cell->request_type = type;
+ /* Reserve the cell. */
+ ut_ad(cell->latch.mutex == NULL);
- cell->file = file;
- cell->line = line;
+ cell->request_type = type;
- arr->n_reserved++;
+ if (cell->request_type == SYNC_MUTEX) {
+ cell->latch.mutex = reinterpret_cast<WaitMutex*>(object);
+ } else if (cell->request_type == SYNC_BUF_BLOCK) {
+ cell->latch.bpmutex = reinterpret_cast<BlockWaitMutex*>(object);
+ } else {
+ cell->latch.lock = reinterpret_cast<rw_lock_t*>(object);
+ }
+
+ cell->waiting = false;
- *index = i;
+ cell->file = file;
+ cell->line = line;
- sync_array_exit(arr);
+ sync_array_exit(arr);
- /* Make sure the event is reset and also store
- the value of signal_count at which the event
- was reset. */
- event = sync_cell_get_event(cell);
- cell->signal_count = os_event_reset(event);
+ cell->thread_id = os_thread_get_curr_id();
- cell->reservation_time = time(NULL);
+ cell->reservation_time = time(NULL);
- cell->thread = os_thread_get_curr_id();
+ /* Make sure the event is reset and also store the value of
+ signal_count at which the event was reset. */
+ os_event_t event = sync_cell_get_event(cell);
+ cell->signal_count = os_event_reset(event);
- return(true);
+ return(cell);
+}
+
+/******************************************************************//**
+Frees the cell. NOTE! sync_array_wait_event frees the cell
+automatically! */
+void
+sync_array_free_cell(
+/*=================*/
+ sync_array_t* arr, /*!< in: wait array */
+ sync_cell_t*& cell) /*!< in/out: the cell in the array */
+{
+ sync_array_enter(arr);
+
+ ut_a(cell->latch.mutex != NULL);
+
+ cell->waiting = false;
+ cell->signal_count = 0;
+ cell->latch.mutex = NULL;
+
+ /* Setup the list of free slots in the array */
+ cell->line = arr->first_free_slot;
+
+ arr->first_free_slot = cell - arr->array;
+
+ ut_a(arr->n_reserved > 0);
+ arr->n_reserved--;
+
+ if (arr->next_free_slot > arr->n_cells / 2 && arr->n_reserved == 0) {
+#ifdef UNIV_DEBUG
+ for (ulint i = 0; i < arr->next_free_slot; ++i) {
+ cell = sync_array_get_nth_cell(arr, i);
+
+ ut_ad(!cell->waiting);
+ ut_ad(cell->latch.mutex == 0);
+ ut_ad(cell->signal_count == 0);
}
+#endif /* UNIV_DEBUG */
+ arr->next_free_slot = 0;
+ arr->first_free_slot = ULINT_UNDEFINED;
}
+ sync_array_exit(arr);
- /* No free cell found */
- return false;
+ cell = 0;
}
/******************************************************************//**
@@ -422,30 +435,21 @@ This function should be called when a thread starts to wait on
a wait array cell. In the debug version this function checks
if the wait for a semaphore will result in a deadlock, in which
case prints info and asserts. */
-UNIV_INTERN
void
sync_array_wait_event(
/*==================*/
sync_array_t* arr, /*!< in: wait array */
- ulint index) /*!< in: index of the reserved cell */
+ sync_cell_t*& cell) /*!< in: index of the reserved cell */
{
- sync_cell_t* cell;
- os_event_t event;
-
- ut_a(arr);
-
sync_array_enter(arr);
- cell = sync_array_get_nth_cell(arr, index);
+ ut_ad(!cell->waiting);
+ ut_ad(cell->latch.mutex);
+ ut_ad(os_thread_get_curr_id() == cell->thread_id);
- ut_a(cell->wait_object);
- ut_a(!cell->waiting);
- ut_ad(os_thread_get_curr_id() == cell->thread);
+ cell->waiting = true;
- event = sync_cell_get_event(cell);
- cell->waiting = TRUE;
-
-#ifdef UNIV_SYNC_DEBUG
+#ifdef UNIV_DEBUG
/* We use simple enter to the mutex below, because if
we cannot acquire it at once, mutex_enter would call
@@ -454,19 +458,21 @@ sync_array_wait_event(
rw_lock_debug_mutex_enter();
- if (TRUE == sync_array_detect_deadlock(arr, cell, cell, 0)) {
+ if (sync_array_detect_deadlock(arr, cell, cell, 0)) {
- fputs("########################################\n", stderr);
- ut_error;
+ ib::fatal() << "########################################"
+ " Deadlock Detected!";
}
rw_lock_debug_mutex_exit();
-#endif
+#endif /* UNIV_DEBUG */
sync_array_exit(arr);
- os_event_wait_low(event, cell->signal_count);
+ os_event_wait_low(sync_cell_get_event(cell), cell->signal_count);
- sync_array_free_cell(arr, index);
+ sync_array_free_cell(arr, cell);
+
+ cell = 0;
}
/******************************************************************//**
@@ -476,11 +482,8 @@ void
sync_array_cell_print(
/*==================*/
FILE* file, /*!< in: file where to print */
- sync_cell_t* cell, /*!< in: sync cell */
- os_thread_id_t* reserver) /*!< out: write reserver or
- 0 */
+ sync_cell_t* cell) /*!< in: sync cell */
{
- ib_mutex_t* mutex;
rw_lock_t* rwlock;
ulint type;
ulint writer;
@@ -490,40 +493,75 @@ sync_array_cell_print(
fprintf(file,
"--Thread %lu has waited at %s line %lu"
" for %.2f seconds the semaphore:\n",
- (ulong) os_thread_pf(cell->thread),
+ (ulong) os_thread_pf(cell->thread_id),
innobase_basename(cell->file), (ulong) cell->line,
difftime(time(NULL), cell->reservation_time));
if (type == SYNC_MUTEX) {
- /* We use old_wait_mutex in case the cell has already
- been freed meanwhile */
- mutex = cell->old_wait_mutex;
+ WaitMutex* mutex = cell->latch.mutex;
+ const WaitMutex::MutexPolicy& policy = mutex->policy();
+#ifdef UNIV_DEBUG
+ const char* name = policy.get_enter_filename();
+ if (name == NULL) {
+ /* The mutex might have been released. */
+ name = "NULL";
+ }
+#endif /* UNIV_DEBUG */
if (mutex) {
- fprintf(file,
- "Mutex at %p created file %s line " ULINTPF
- ", lock var %lu\n"
- "Last time reserved by thread " ULINTPF
- " in file %s line " ULINTPF ", "
- "waiters flag " ULINTPF "\n",
- (void*) mutex,
- innobase_basename(mutex->cfile_name),
- mutex->cline,
- (ulong) mutex->lock_word,
- os_thread_pf(mutex->thread_id),
- innobase_basename(mutex->file_name),
- mutex->line, mutex->waiters);
+ fprintf(file,
+ "Mutex at %p, %s, lock var %x\n"
+#ifdef UNIV_DEBUG
+ "Last time reserved in file %s line %u"
+#endif /* UNIV_DEBUG */
+ "\n",
+ (void*) mutex,
+ policy.to_string().c_str(),
+ mutex->state()
+#ifdef UNIV_DEBUG
+ ,name,
+ policy.get_enter_line()
+#endif /* UNIV_DEBUG */
+ );
}
-
- } else if (type == RW_LOCK_EX
- || type == RW_LOCK_WAIT_EX
- || type == RW_LOCK_SHARED) {
-
- fputs(type == RW_LOCK_EX ? "X-lock on"
- : type == RW_LOCK_WAIT_EX ? "X-lock (wait_ex) on"
+ } else if (type == SYNC_BUF_BLOCK) {
+ BlockWaitMutex* mutex = cell->latch.bpmutex;
+
+ const BlockWaitMutex::MutexPolicy& policy =
+ mutex->policy();
+#ifdef UNIV_DEBUG
+ const char* name = policy.get_enter_filename();
+ if (name == NULL) {
+ /* The mutex might have been released. */
+ name = "NULL";
+ }
+#endif /* UNIV_DEBUG */
+
+ fprintf(file,
+ "Mutex at %p, %s, lock var %lu\n"
+#ifdef UNIV_DEBUG
+ "Last time reserved in file %s line %lu"
+#endif /* UNIV_DEBUG */
+ "\n",
+ (void*) mutex,
+ policy.to_string().c_str(),
+ (ulong) mutex->state()
+#ifdef UNIV_DEBUG
+ ,name,
+ (ulong) policy.get_enter_line()
+#endif /* UNIV_DEBUG */
+ );
+ } else if (type == RW_LOCK_X
+ || type == RW_LOCK_X_WAIT
+ || type == RW_LOCK_SX
+ || type == RW_LOCK_S) {
+
+ fputs(type == RW_LOCK_X ? "X-lock on"
+ : type == RW_LOCK_X_WAIT ? "X-lock (wait_ex) on"
+ : type == RW_LOCK_SX ? "SX-lock on"
: "S-lock on", file);
- rwlock = (rw_lock_t*)cell->old_wait_rw_lock;
+ rwlock = cell->latch.lock;
if (rwlock) {
fprintf(file,
@@ -533,36 +571,43 @@ sync_array_cell_print(
writer = rw_lock_get_writer(rwlock);
- if (writer && writer != RW_LOCK_NOT_LOCKED) {
+ if (writer != RW_LOCK_NOT_LOCKED) {
+
fprintf(file,
"a writer (thread id " ULINTPF ") has"
" reserved it in mode %s",
os_thread_pf(rwlock->writer_thread),
- writer == RW_LOCK_EX
- ? " exclusive\n"
+ writer == RW_LOCK_X ? " exclusive\n"
+ : writer == RW_LOCK_SX ? " SX\n"
: " wait exclusive\n");
- *reserver = rwlock->writer_thread;
}
fprintf(file,
"number of readers " ULINTPF
- ", waiters flag " ULINTPF ", "
- "lock_word: %lx\n"
+ ", waiters flag %u, "
+ "lock_word: " ULINTPFx "\n"
"Last time read locked in file %s line %u\n"
- "Last time write locked in file %s line %u\n"
- "Holder thread " ULINTPF
- " file %s line " ULINTPF "\n",
+ "Last time write locked in file %s line %u"
+#if 0 /* JAN: TODO: FIX LATER */
+ "\nHolder thread " ULINTPF
+ " file %s line " ULINTPF
+#endif
+ "\n",
rw_lock_get_reader_count(rwlock),
rwlock->waiters,
rwlock->lock_word,
innobase_basename(rwlock->last_s_file_name),
rwlock->last_s_line,
innobase_basename(rwlock->last_x_file_name),
- rwlock->last_x_line,
- os_thread_pf(rwlock->thread_id),
+ rwlock->last_x_line
+#if 0 /* JAN: TODO: FIX LATER */
+ , os_thread_pf(rwlock->thread_id),
innobase_basename(rwlock->file_name),
- rwlock->line);
+ rwlock->line
+#endif
+ );
}
+
} else {
ut_error;
}
@@ -572,11 +617,37 @@ sync_array_cell_print(
}
}
-#ifdef UNIV_SYNC_DEBUG
+#ifdef UNIV_DEBUG
+/******************************************************************//**
+Looks for a cell with the given thread id.
+@return pointer to cell or NULL if not found */
+static
+sync_cell_t*
+sync_array_find_thread(
+/*===================*/
+ sync_array_t* arr, /*!< in: wait array */
+ os_thread_id_t thread) /*!< in: thread id */
+{
+ ulint i;
+
+ for (i = 0; i < arr->n_cells; i++) {
+ sync_cell_t* cell;
+
+ cell = sync_array_get_nth_cell(arr, i);
+
+ if (cell->latch.mutex != NULL
+ && os_thread_eq(cell->thread_id, thread)) {
+
+ return(cell); /* Found */
+ }
+ }
+
+ return(NULL); /* Not found */
+}
/******************************************************************//**
Recursion step for deadlock detection.
-@return TRUE if deadlock detected */
+@return TRUE if deadlock detected */
static
ibool
sync_array_deadlock_step(
@@ -615,12 +686,29 @@ sync_array_deadlock_step(
return(FALSE);
}
+/**
+Report an error to stderr.
+@param lock rw-lock instance
+@param debug rw-lock debug information
+@param cell thread context */
+static
+void
+sync_array_report_error(
+ rw_lock_t* lock,
+ rw_lock_debug_t* debug,
+ sync_cell_t* cell)
+{
+ fprintf(stderr, "rw-lock %p ", (void*) lock);
+ sync_array_cell_print(stderr, cell);
+ rw_lock_debug_print(stderr, debug);
+}
+
/******************************************************************//**
This function is called only in the debug version. Detects a deadlock
of one or more threads because of waits of semaphores.
-@return TRUE if deadlock detected */
+@return TRUE if deadlock detected */
static
-ibool
+bool
sync_array_detect_deadlock(
/*=======================*/
sync_array_t* arr, /*!< in: wait array; NOTE! the caller must
@@ -629,34 +717,80 @@ sync_array_detect_deadlock(
sync_cell_t* cell, /*!< in: cell to search */
ulint depth) /*!< in: recursion depth */
{
- ib_mutex_t* mutex;
rw_lock_t* lock;
os_thread_id_t thread;
ibool ret;
rw_lock_debug_t*debug;
- os_thread_id_t reserver=0;
ut_a(arr);
ut_a(start);
ut_a(cell);
- ut_ad(cell->wait_object);
- ut_ad(os_thread_get_curr_id() == start->thread);
+ ut_ad(cell->latch.mutex != 0);
+ ut_ad(os_thread_get_curr_id() == start->thread_id);
ut_ad(depth < 100);
depth++;
if (!cell->waiting) {
-
- return(FALSE); /* No deadlock here */
+ /* No deadlock here */
+ return(false);
}
- if (cell->request_type == SYNC_MUTEX) {
+ switch (cell->request_type) {
+ case SYNC_MUTEX: {
+
+ WaitMutex* mutex = cell->latch.mutex;
+ const WaitMutex::MutexPolicy& policy = mutex->policy();
+
+ if (mutex->state() != MUTEX_STATE_UNLOCKED) {
+ thread = policy.get_thread_id();
+
+ /* Note that mutex->thread_id above may be
+ also OS_THREAD_ID_UNDEFINED, because the
+ thread which held the mutex maybe has not
+ yet updated the value, or it has already
+ released the mutex: in this case no deadlock
+ can occur, as the wait array cannot contain
+ a thread with ID_UNDEFINED value. */
+ ret = sync_array_deadlock_step(
+ arr, start, thread, 0, depth);
+
+ if (ret) {
+ const char* name;
+
+ name = policy.get_enter_filename();
+
+ if (name == NULL) {
+ /* The mutex might have been
+ released. */
+ name = "NULL";
+ }
- mutex = static_cast<ib_mutex_t*>(cell->wait_object);
+ ib::info()
+ << "Mutex " << mutex << " owned by"
+ " thread " << os_thread_pf(thread)
+ << " file " << name << " line "
+ << policy.get_enter_line();
+
+ sync_array_cell_print(stderr, cell);
+
+ return(true);
+ }
+ }
+
+ /* No deadlock */
+ return(false);
+ }
- if (mutex_get_lock_word(mutex) != 0) {
+ case SYNC_BUF_BLOCK: {
- thread = mutex->thread_id;
+ BlockWaitMutex* mutex = cell->latch.bpmutex;
+
+ const BlockWaitMutex::MutexPolicy& policy =
+ mutex->policy();
+
+ if (mutex->state() != MUTEX_STATE_UNLOCKED) {
+ thread = policy.get_thread_id();
/* Note that mutex->thread_id above may be
also OS_THREAD_ID_UNDEFINED, because the
@@ -665,64 +799,80 @@ sync_array_detect_deadlock(
released the mutex: in this case no deadlock
can occur, as the wait array cannot contain
a thread with ID_UNDEFINED value. */
+ ret = sync_array_deadlock_step(
+ arr, start, thread, 0, depth);
- ret = sync_array_deadlock_step(arr, start, thread, 0,
- depth);
if (ret) {
- fprintf(stderr,
- "Mutex %p owned by thread %lu file %s line %lu\n",
- mutex, (ulong) os_thread_pf(mutex->thread_id),
- mutex->file_name, (ulong) mutex->line);
- sync_array_cell_print(stderr, cell, &reserver);
+ const char* name;
+
+ name = policy.get_enter_filename();
+
+ if (name == NULL) {
+ /* The mutex might have been
+ released. */
+ name = "NULL";
+ }
+
+ ib::info()
+ << "Mutex " << mutex << " owned by"
+ " thread " << os_thread_pf(thread)
+ << " file " << name << " line "
+ << policy.get_enter_line();
+
- return(TRUE);
+ return(true);
}
}
- return(FALSE); /* No deadlock */
+ /* No deadlock */
+ return(false);
+ }
- } else if (cell->request_type == RW_LOCK_EX
- || cell->request_type == RW_LOCK_WAIT_EX) {
+ case RW_LOCK_X:
+ case RW_LOCK_X_WAIT:
- lock = static_cast<rw_lock_t*>(cell->wait_object);
+ lock = cell->latch.lock;
for (debug = UT_LIST_GET_FIRST(lock->debug_list);
- debug != 0;
+ debug != NULL;
debug = UT_LIST_GET_NEXT(list, debug)) {
thread = debug->thread_id;
- if (((debug->lock_type == RW_LOCK_EX)
- && !os_thread_eq(thread, cell->thread))
- || ((debug->lock_type == RW_LOCK_WAIT_EX)
- && !os_thread_eq(thread, cell->thread))
- || (debug->lock_type == RW_LOCK_SHARED)) {
+ switch (debug->lock_type) {
+ case RW_LOCK_X:
+ case RW_LOCK_SX:
+ case RW_LOCK_X_WAIT:
+ if (os_thread_eq(thread, cell->thread_id)) {
+ break;
+ }
+ /* fall through */
+ case RW_LOCK_S:
/* The (wait) x-lock request can block
infinitely only if someone (can be also cell
thread) is holding s-lock, or someone
- (cannot be cell thread) (wait) x-lock, and
- he is blocked by start thread */
+ (cannot be cell thread) (wait) x-lock or
+ sx-lock, and he is blocked by start thread */
ret = sync_array_deadlock_step(
arr, start, thread, debug->pass,
depth);
+
if (ret) {
-print:
- fprintf(stderr, "rw-lock %p ",
- (void*) lock);
- sync_array_cell_print(stderr, cell, &reserver);
+ sync_array_report_error(
+ lock, debug, cell);
rw_lock_debug_print(stderr, debug);
return(TRUE);
}
}
}
- return(FALSE);
+ return(false);
- } else if (cell->request_type == RW_LOCK_SHARED) {
+ case RW_LOCK_SX:
- lock = static_cast<rw_lock_t*>(cell->wait_object);
+ lock = cell->latch.lock;
for (debug = UT_LIST_GET_FIRST(lock->debug_list);
debug != 0;
@@ -730,200 +880,88 @@ print:
thread = debug->thread_id;
- if ((debug->lock_type == RW_LOCK_EX)
- || (debug->lock_type == RW_LOCK_WAIT_EX)) {
+ switch (debug->lock_type) {
+ case RW_LOCK_X:
+ case RW_LOCK_SX:
+ case RW_LOCK_X_WAIT:
- /* The s-lock request can block infinitely
- only if someone (can also be cell thread) is
- holding (wait) x-lock, and he is blocked by
- start thread */
+ if (os_thread_eq(thread, cell->thread_id)) {
+ break;
+ }
+
+ /* The sx-lock request can block infinitely
+ only if someone (can be also cell thread) is
+ holding (wait) x-lock or sx-lock, and he is
+ blocked by start thread */
ret = sync_array_deadlock_step(
arr, start, thread, debug->pass,
depth);
+
if (ret) {
- goto print;
+ sync_array_report_error(
+ lock, debug, cell);
+ return(TRUE);
}
}
}
- return(FALSE);
-
- } else {
- ut_error;
- }
-
- return(TRUE); /* Execution never reaches this line: for compiler
- fooling only */
-}
-#endif /* UNIV_SYNC_DEBUG */
-
-/******************************************************************//**
-Determines if we can wake up the thread waiting for a sempahore. */
-static
-ibool
-sync_arr_cell_can_wake_up(
-/*======================*/
- sync_cell_t* cell) /*!< in: cell to search */
-{
- ib_mutex_t* mutex;
- rw_lock_t* lock;
-
- if (cell->request_type == SYNC_MUTEX) {
-
- mutex = static_cast<ib_mutex_t*>(cell->wait_object);
-
- os_rmb;
- if (mutex_get_lock_word(mutex) == 0) {
-
- return(TRUE);
- }
+ return(false);
- } else if (cell->request_type == RW_LOCK_EX) {
+ case RW_LOCK_S:
- lock = static_cast<rw_lock_t*>(cell->wait_object);
+ lock = cell->latch.lock;
- os_rmb;
- if (lock->lock_word > 0) {
- /* Either unlocked or only read locked. */
+ for (debug = UT_LIST_GET_FIRST(lock->debug_list);
+ debug != 0;
+ debug = UT_LIST_GET_NEXT(list, debug)) {
- return(TRUE);
- }
+ thread = debug->thread_id;
- } else if (cell->request_type == RW_LOCK_WAIT_EX) {
+ if (debug->lock_type == RW_LOCK_X
+ || debug->lock_type == RW_LOCK_X_WAIT) {
- lock = static_cast<rw_lock_t*>(cell->wait_object);
+ /* The s-lock request can block infinitely
+ only if someone (can also be cell thread) is
+ holding (wait) x-lock, and he is blocked by
+ start thread */
- /* lock_word == 0 means all readers have left */
- os_rmb;
- if (lock->lock_word == 0) {
+ ret = sync_array_deadlock_step(
+ arr, start, thread, debug->pass,
+ depth);
- return(TRUE);
+ if (ret) {
+ sync_array_report_error(
+ lock, debug, cell);
+ return(TRUE);
+ }
+ }
}
- } else if (cell->request_type == RW_LOCK_SHARED) {
- lock = static_cast<rw_lock_t*>(cell->wait_object);
- /* lock_word > 0 means no writer or reserved writer */
- os_rmb;
- if (lock->lock_word > 0) {
+ return(false);
- return(TRUE);
- }
+ default:
+ ut_error;
}
- return(FALSE);
-}
-
-/******************************************************************//**
-Frees the cell. NOTE! sync_array_wait_event frees the cell
-automatically! */
-UNIV_INTERN
-void
-sync_array_free_cell(
-/*=================*/
- sync_array_t* arr, /*!< in: wait array */
- ulint index) /*!< in: index of the cell in array */
-{
- sync_cell_t* cell;
-
- sync_array_enter(arr);
-
- cell = sync_array_get_nth_cell(arr, index);
-
- ut_a(cell->wait_object != NULL);
-
- cell->waiting = FALSE;
- cell->wait_object = NULL;
- cell->signal_count = 0;
-
- ut_a(arr->n_reserved > 0);
- arr->n_reserved--;
-
- sync_array_exit(arr);
+ return(true);
}
+#endif /* UNIV_DEBUG */
/**********************************************************************//**
Increments the signalled count. */
-UNIV_INTERN
void
-sync_array_object_signalled(void)
-/*=============================*/
+sync_array_object_signalled()
+/*=========================*/
{
-#ifdef HAVE_ATOMIC_BUILTINS
- (void) os_atomic_increment_ulint(&sg_count, 1);
-#else
++sg_count;
-#endif /* HAVE_ATOMIC_BUILTINS */
-}
-
-/**********************************************************************//**
-If the wakeup algorithm does not work perfectly at semaphore relases,
-this function will do the waking (see the comment in mutex_exit). This
-function should be called about every 1 second in the server.
-
-Note that there's a race condition between this thread and mutex_exit
-changing the lock_word and calling signal_object, so sometimes this finds
-threads to wake up even when nothing has gone wrong. */
-static
-void
-sync_array_wake_threads_if_sema_free_low(
-/*=====================================*/
- sync_array_t* arr) /* in/out: wait array */
-{
- ulint i = 0;
- ulint count;
-
- sync_array_enter(arr);
-
- for (count = 0; count < arr->n_reserved; ++i) {
- sync_cell_t* cell;
-
- cell = sync_array_get_nth_cell(arr, i);
-
- if (cell->wait_object != NULL) {
-
- count++;
-
- if (sync_arr_cell_can_wake_up(cell)) {
- os_event_t event;
-
- event = sync_cell_get_event(cell);
-
- os_event_set(event);
- }
- }
- }
-
- sync_array_exit(arr);
-}
-
-/**********************************************************************//**
-If the wakeup algorithm does not work perfectly at semaphore relases,
-this function will do the waking (see the comment in mutex_exit). This
-function should be called about every 1 second in the server.
-
-Note that there's a race condition between this thread and mutex_exit
-changing the lock_word and calling signal_object, so sometimes this finds
-threads to wake up even when nothing has gone wrong. */
-UNIV_INTERN
-void
-sync_arr_wake_threads_if_sema_free(void)
-/*====================================*/
-{
- ulint i;
-
- for (i = 0; i < sync_array_size; ++i) {
-
- sync_array_wake_threads_if_sema_free_low(
- sync_wait_array[i]);
- }
}
/**********************************************************************//**
Prints warnings of long semaphore waits to stderr.
-@return TRUE if fatal semaphore wait threshold was exceeded */
+@return TRUE if fatal semaphore wait threshold was exceeded */
static
-ibool
+bool
sync_array_print_long_waits_low(
/*============================*/
sync_array_t* arr, /*!< in: sync array instance */
@@ -931,14 +969,14 @@ sync_array_print_long_waits_low(
const void** sema, /*!< out: longest-waited-for semaphore */
ibool* noticed)/*!< out: TRUE if long wait noticed */
{
- ulint i;
ulint fatal_timeout = srv_fatal_semaphore_wait_threshold;
ibool fatal = FALSE;
double longest_diff = 0;
+ ulint i;
/* For huge tables, skip the check during CHECK TABLE etc... */
if (fatal_timeout > SRV_SEMAPHORE_WAIT_EXTENSION) {
- return(FALSE);
+ return(false);
}
#ifdef UNIV_DEBUG_VALGRIND
@@ -953,28 +991,25 @@ sync_array_print_long_waits_low(
# define SYNC_ARRAY_TIMEOUT 240
#endif
- for (i = 0; i < arr->n_cells; i++) {
+ for (ulint i = 0; i < arr->n_cells; i++) {
- double diff;
sync_cell_t* cell;
- void* wait_object;
- os_thread_id_t reserver=0;
+ void* latch;
cell = sync_array_get_nth_cell(arr, i);
- wait_object = cell->wait_object;
+ latch = cell->latch.mutex;
- if (wait_object == NULL || !cell->waiting) {
+ if (latch == NULL || !cell->waiting) {
continue;
}
- diff = difftime(time(NULL), cell->reservation_time);
+ double diff = difftime(time(NULL), cell->reservation_time);
if (diff > SYNC_ARRAY_TIMEOUT) {
- fputs("InnoDB: Warning: a long semaphore wait:\n",
- stderr);
- sync_array_cell_print(stderr, cell, &reserver);
+ ib::warn() << "A long semaphore wait:";
+ sync_array_cell_print(stderr, cell);
*noticed = TRUE;
}
@@ -984,62 +1019,29 @@ sync_array_print_long_waits_low(
if (diff > longest_diff) {
longest_diff = diff;
- *sema = wait_object;
- *waiter = cell->thread;
+ *sema = latch;
+ *waiter = cell->thread_id;
}
}
- /* We found a long semaphore wait, wait all threads that are
+ /* We found a long semaphore wait, print all threads that are
waiting for a semaphore. */
if (*noticed) {
for (i = 0; i < arr->n_cells; i++) {
void* wait_object;
- os_thread_id_t reserver=(os_thread_id_t)ULINT_UNDEFINED;
sync_cell_t* cell;
- ulint loop = 0;
cell = sync_array_get_nth_cell(arr, i);
- wait_object = cell->wait_object;
+ wait_object = cell->latch.mutex;
if (wait_object == NULL || !cell->waiting) {
continue;
}
- fputs("InnoDB: Warning: semaphore wait:\n",
- stderr);
- sync_array_cell_print(stderr, cell, &reserver);
-
- /* Try to output cell information for writer recursive way */
- while (reserver != (os_thread_id_t)ULINT_UNDEFINED) {
- sync_cell_t* reserver_wait;
-
- reserver_wait = sync_array_find_thread(arr, reserver);
-
- if (reserver_wait &&
- reserver_wait->wait_object != NULL &&
- reserver_wait->waiting) {
- fputs("InnoDB: Warning: Writer thread is waiting this semaphore:\n",
- stderr);
- reserver = (os_thread_id_t)ULINT_UNDEFINED;
- sync_array_cell_print(stderr, reserver_wait, &reserver);
- loop++;
-
- if (reserver_wait->thread == reserver) {
- reserver = (os_thread_id_t)ULINT_UNDEFINED;
- }
- } else {
- reserver = (os_thread_id_t)ULINT_UNDEFINED;
- }
-
- /* This is protection against loop */
- if (loop > 100) {
- fputs("InnoDB: Warning: Too many waiting threads.\n", stderr);
- break;
- }
-
- }
+ ib::info() << "A semaphore wait:";
+ sync_array_cell_print(stderr, cell);
}
}
@@ -1050,8 +1052,7 @@ sync_array_print_long_waits_low(
/**********************************************************************//**
Prints warnings of long semaphore waits to stderr.
-@return TRUE if fatal semaphore wait threshold was exceeded */
-UNIV_INTERN
+@return TRUE if fatal semaphore wait threshold was exceeded */
ibool
sync_array_print_long_waits(
/*========================*/
@@ -1099,7 +1100,8 @@ sync_array_print_long_waits(
MONITOR_VALUE(MONITOR_OS_PENDING_WRITES));
srv_print_innodb_monitor = TRUE;
- os_event_set(srv_monitor_event);
+
+ lock_set_timeout_event();
os_thread_sleep(30000000);
@@ -1123,7 +1125,6 @@ sync_array_print_info_low(
{
ulint i;
ulint count = 0;
- os_thread_id_t r = 0;
fprintf(file,
"OS WAIT ARRAY INFO: reservation count " ULINTPF "\n",
@@ -1134,9 +1135,9 @@ sync_array_print_info_low(
cell = sync_array_get_nth_cell(arr, i);
- if (cell->wait_object != NULL) {
+ if (cell->latch.mutex != 0) {
count++;
- sync_array_cell_print(file, cell, &r);
+ sync_array_cell_print(file, cell);
}
}
}
@@ -1159,65 +1160,50 @@ sync_array_print_info(
/**********************************************************************//**
Create the primary system wait array(s), they are protected by an OS mutex */
-UNIV_INTERN
void
sync_array_init(
/*============*/
ulint n_threads) /*!< in: Number of slots to
create in all arrays */
{
- ulint i;
- ulint n_slots;
-
ut_a(sync_wait_array == NULL);
ut_a(srv_sync_array_size > 0);
ut_a(n_threads > 0);
sync_array_size = srv_sync_array_size;
- /* We have to use ut_malloc() because the mutex infrastructure
- hasn't been initialised yet. It is required by mem_alloc() and
- the heap functions. */
-
- sync_wait_array = static_cast<sync_array_t**>(
- ut_malloc(sizeof(*sync_wait_array) * sync_array_size));
+ sync_wait_array = UT_NEW_ARRAY_NOKEY(sync_array_t*, sync_array_size);
- n_slots = 1 + (n_threads - 1) / sync_array_size;
+ ulint n_slots = 1 + (n_threads - 1) / sync_array_size;
- for (i = 0; i < sync_array_size; ++i) {
+ for (ulint i = 0; i < sync_array_size; ++i) {
- sync_wait_array[i] = sync_array_create(n_slots);
+ sync_wait_array[i] = UT_NEW_NOKEY(sync_array_t(n_slots));
}
}
/**********************************************************************//**
Close sync array wait sub-system. */
-UNIV_INTERN
void
sync_array_close(void)
/*==================*/
{
- ulint i;
-
- for (i = 0; i < sync_array_size; ++i) {
+ for (ulint i = 0; i < sync_array_size; ++i) {
sync_array_free(sync_wait_array[i]);
}
- ut_free(sync_wait_array);
+ UT_DELETE_ARRAY(sync_wait_array);
sync_wait_array = NULL;
}
/**********************************************************************//**
Print info about the sync array(s). */
-UNIV_INTERN
void
sync_array_print(
/*=============*/
FILE* file) /*!< in/out: Print to this stream */
{
- ulint i;
-
- for (i = 0; i < sync_array_size; ++i) {
+ for (ulint i = 0; i < sync_array_size; ++i) {
sync_array_print_info(file, sync_wait_array[i]);
}
@@ -1227,25 +1213,6 @@ sync_array_print(
}
/**********************************************************************//**
-Get an instance of the sync wait array. */
-UNIV_INTERN
-sync_array_t*
-sync_array_get(void)
-/*================*/
-{
- ulint i;
- static ulint count;
-
-#ifdef HAVE_ATOMIC_BUILTINS
- i = os_atomic_increment_ulint(&count, 1);
-#else
- i = count++;
-#endif /* HAVE_ATOMIC_BUILTINS */
-
- return(sync_wait_array[i % sync_array_size]);
-}
-
-/**********************************************************************//**
Prints info of the wait array without using any mutexes/semaphores. */
UNIV_INTERN
void
@@ -1260,12 +1227,10 @@ sync_array_print_innodb(void)
for (i = 0; i < arr->n_cells; i++) {
void* wait_object;
sync_cell_t* cell;
- os_thread_id_t reserver=(os_thread_id_t)ULINT_UNDEFINED;
- ulint loop=0;
cell = sync_array_get_nth_cell(arr, i);
- wait_object = cell->wait_object;
+ wait_object = cell->latch.mutex;
if (wait_object == NULL || !cell->waiting) {
@@ -1274,34 +1239,7 @@ sync_array_print_innodb(void)
fputs("InnoDB: Warning: semaphore wait:\n",
stderr);
- sync_array_cell_print(stderr, cell, &reserver);
-
- /* Try to output cell information for writer recursive way */
- while (reserver != (os_thread_id_t)ULINT_UNDEFINED) {
- sync_cell_t* reserver_wait;
-
- reserver_wait = sync_array_find_thread(arr, reserver);
-
- if (reserver_wait &&
- reserver_wait->wait_object != NULL &&
- reserver_wait->waiting) {
- fputs("InnoDB: Warning: Writer thread is waiting this semaphore:\n",
- stderr);
- sync_array_cell_print(stderr, reserver_wait, &reserver);
-
- if (reserver_wait->thread == reserver) {
- reserver = (os_thread_id_t)ULINT_UNDEFINED;
- }
- } else {
- reserver = (os_thread_id_t)ULINT_UNDEFINED;
- }
-
- /* This is protection against loop */
- if (loop > 100) {
- fputs("InnoDB: Warning: Too many waiting threads.\n", stderr);
- break;
- }
- }
+ sync_array_cell_print(stderr, cell);
}
fputs("InnoDB: Semaphore wait debug output ended:\n", stderr);
@@ -1340,7 +1278,7 @@ sync_arr_get_item(
wait_cell = sync_array_get_nth_cell(sync_arr, i);
if (wait_cell) {
- wait_object = wait_cell->wait_object;
+ wait_object = wait_cell->latch.mutex;
if(wait_object != NULL && wait_cell->waiting) {
found = TRUE;
@@ -1382,70 +1320,87 @@ sync_arr_fill_sys_semphore_waits_table(
for(ulint i=0; i < n_items;i++) {
sync_cell_t *cell=NULL;
if (sync_arr_get_item(i, &cell)) {
- ib_mutex_t* mutex;
+ WaitMutex* mutex;
type = cell->request_type;
- OK(field_store_ulint(fields[SYS_SEMAPHORE_WAITS_THREAD_ID], (longlong)os_thread_pf(cell->thread)));
+ /* JAN: FIXME
+ OK(field_store_ulint(fields[SYS_SEMAPHORE_WAITS_THREAD_ID],
+ (longlong)os_thread_pf(cell->thread)));
+ */
OK(field_store_string(fields[SYS_SEMAPHORE_WAITS_FILE], innobase_basename(cell->file)));
- OK(field_store_ulint(fields[SYS_SEMAPHORE_WAITS_LINE], cell->line));
- OK(field_store_ulint(fields[SYS_SEMAPHORE_WAITS_WAIT_TIME], (longlong)difftime(time(NULL), cell->reservation_time)));
+ OK(fields[SYS_SEMAPHORE_WAITS_LINE]->store(cell->line, true));
+ fields[SYS_SEMAPHORE_WAITS_LINE]->set_notnull();
+ OK(field_store_ulint(fields[SYS_SEMAPHORE_WAITS_WAIT_TIME], (ulint)difftime(time(NULL), cell->reservation_time)));
if (type == SYNC_MUTEX) {
- mutex = static_cast<ib_mutex_t*>(cell->old_wait_mutex);
+ mutex = static_cast<WaitMutex*>(cell->latch.mutex);
if (mutex) {
- OK(field_store_string(fields[SYS_SEMAPHORE_WAITS_OBJECT_NAME], mutex->cmutex_name));
+ // JAN: FIXME
+ // OK(field_store_string(fields[SYS_SEMAPHORE_WAITS_OBJECT_NAME], mutex->cmutex_name));
OK(field_store_ulint(fields[SYS_SEMAPHORE_WAITS_WAIT_OBJECT], (longlong)mutex));
OK(field_store_string(fields[SYS_SEMAPHORE_WAITS_WAIT_TYPE], "MUTEX"));
- OK(field_store_ulint(fields[SYS_SEMAPHORE_WAITS_HOLDER_THREAD_ID], (longlong)mutex->thread_id));
- OK(field_store_string(fields[SYS_SEMAPHORE_WAITS_HOLDER_FILE], innobase_basename(mutex->file_name)));
- OK(field_store_ulint(fields[SYS_SEMAPHORE_WAITS_HOLDER_LINE], mutex->line));
- OK(field_store_string(fields[SYS_SEMAPHORE_WAITS_CREATED_FILE], innobase_basename(mutex->cfile_name)));
- OK(field_store_ulint(fields[SYS_SEMAPHORE_WAITS_CREATED_LINE], mutex->cline));
- OK(field_store_ulint(fields[SYS_SEMAPHORE_WAITS_WAITERS_FLAG], (longlong)mutex->waiters));
- OK(field_store_ulint(fields[SYS_SEMAPHORE_WAITS_LOCK_WORD], (longlong)mutex->lock_word));
- OK(field_store_string(fields[SYS_SEMAPHORE_WAITS_LAST_WRITER_FILE], innobase_basename(mutex->file_name)));
- OK(field_store_ulint(fields[SYS_SEMAPHORE_WAITS_LAST_WRITER_LINE], mutex->line));
- OK(field_store_ulint(fields[SYS_SEMAPHORE_WAITS_OS_WAIT_COUNT], mutex->count_os_wait));
+ //OK(field_store_ulint(fields[SYS_SEMAPHORE_WAITS_HOLDER_THREAD_ID], (longlong)mutex->thread_id));
+ //OK(field_store_string(fields[SYS_SEMAPHORE_WAITS_HOLDER_FILE], innobase_basename(mutex->file_name)));
+ //OK(fields[SYS_SEMAPHORE_WAITS_HOLDER_LINE]->store(mutex->line, true));
+ //fields[SYS_SEMAPHORE_WAITS_HOLDER_LINE]->set_notnull();
+ //OK(field_store_string(fields[SYS_SEMAPHORE_WAITS_CREATED_FILE], innobase_basename(mutex->cfile_name)));
+ //OK(fields[SYS_SEMAPHORE_WAITS_CREATED_LINE]->store(mutex->cline, true));
+ //fields[SYS_SEMAPHORE_WAITS_CREATED_LINE]->set_notnull();
+ //OK(field_store_ulint(fields[SYS_SEMAPHORE_WAITS_WAITERS_FLAG], (longlong)mutex->waiters));
+ //OK(field_store_ulint(fields[SYS_SEMAPHORE_WAITS_LOCK_WORD], (longlong)mutex->lock_word));
+ //OK(field_store_string(fields[SYS_SEMAPHORE_WAITS_LAST_WRITER_FILE], innobase_basename(mutex->file_name)));
+ //OK(fields[SYS_SEMAPHORE_WAITS_LAST_WRITER_LINE]->store(mutex->line, true));
+ //fields[SYS_SEMAPHORE_WAITS_LAST_WRITER_LINE]->set_notnull();
+ //OK(field_store_ulint(fields[SYS_SEMAPHORE_WAITS_OS_WAIT_COUNT], mutex->count_os_wait));
}
- } else if (type == RW_LOCK_EX
- || type == RW_LOCK_WAIT_EX
- || type == RW_LOCK_SHARED) {
+ } else if (type == RW_LOCK_X_WAIT
+ || type == RW_LOCK_X
+ || type == RW_LOCK_SX
+ || type == RW_LOCK_S) {
rw_lock_t* rwlock=NULL;
- rwlock = static_cast<rw_lock_t *> (cell->old_wait_rw_lock);
+ rwlock = static_cast<rw_lock_t *> (cell->latch.lock);
if (rwlock) {
ulint writer = rw_lock_get_writer(rwlock);
OK(field_store_ulint(fields[SYS_SEMAPHORE_WAITS_WAIT_OBJECT], (longlong)rwlock));
- if (type == RW_LOCK_EX) {
- OK(field_store_string(fields[SYS_SEMAPHORE_WAITS_WAIT_TYPE], "RW_LOCK_EX"));
- } else if (type == RW_LOCK_WAIT_EX) {
- OK(field_store_string(fields[SYS_SEMAPHORE_WAITS_WAIT_TYPE], "RW_LOCK_WAIT_EX"));
- } else if (type == RW_LOCK_SHARED) {
- OK(field_store_string(fields[SYS_SEMAPHORE_WAITS_WAIT_TYPE], "RW_LOCK_SHARED"));
+ if (type == RW_LOCK_X) {
+ OK(field_store_string(fields[SYS_SEMAPHORE_WAITS_WAIT_TYPE], "RW_LOCK_X"));
+ } else if (type == RW_LOCK_X_WAIT) {
+ OK(field_store_string(fields[SYS_SEMAPHORE_WAITS_WAIT_TYPE], "RW_LOCK_X_WAIT"));
+ } else if (type == RW_LOCK_S) {
+ OK(field_store_string(fields[SYS_SEMAPHORE_WAITS_WAIT_TYPE], "RW_LOCK_S"));
+ } else if (type == RW_LOCK_SX) {
+ OK(field_store_string(fields[SYS_SEMAPHORE_WAITS_WAIT_TYPE], "RW_LOCK_SX"));
}
if (writer != RW_LOCK_NOT_LOCKED) {
- OK(field_store_string(fields[SYS_SEMAPHORE_WAITS_OBJECT_NAME], rwlock->lock_name));
+ // JAN: FIXME
+ // OK(field_store_string(fields[SYS_SEMAPHORE_WAITS_OBJECT_NAME], rwlock->lock_name));
OK(field_store_ulint(fields[SYS_SEMAPHORE_WAITS_WRITER_THREAD], (longlong)os_thread_pf(rwlock->writer_thread)));
- if (writer == RW_LOCK_EX) {
- OK(field_store_string(fields[SYS_SEMAPHORE_WAITS_RESERVATION_MODE], "RW_LOCK_EX"));
- } else if (writer == RW_LOCK_WAIT_EX) {
- OK(field_store_string(fields[SYS_SEMAPHORE_WAITS_RESERVATION_MODE], "RW_LOCK_WAIT_EX"));
+ if (writer == RW_LOCK_X) {
+ OK(field_store_string(fields[SYS_SEMAPHORE_WAITS_RESERVATION_MODE], "RW_LOCK_X"));
+ } else if (writer == RW_LOCK_X_WAIT) {
+ OK(field_store_string(fields[SYS_SEMAPHORE_WAITS_RESERVATION_MODE], "RW_LOCK_X_WAIT"));
+ } else if (type == RW_LOCK_SX) {
+ OK(field_store_string(fields[SYS_SEMAPHORE_WAITS_RESERVATION_MODE], "RW_LOCK_SX"));
}
- OK(field_store_ulint(fields[SYS_SEMAPHORE_WAITS_HOLDER_THREAD_ID], (longlong)rwlock->thread_id));
- OK(field_store_string(fields[SYS_SEMAPHORE_WAITS_HOLDER_FILE], innobase_basename(rwlock->file_name)));
- OK(field_store_ulint(fields[SYS_SEMAPHORE_WAITS_HOLDER_LINE], rwlock->line));
+ //OK(field_store_ulint(fields[SYS_SEMAPHORE_WAITS_HOLDER_THREAD_ID], (longlong)rwlock->thread_id));
+ //OK(field_store_string(fields[SYS_SEMAPHORE_WAITS_HOLDER_FILE], innobase_basename(rwlock->file_name)));
+ //OK(fields[SYS_SEMAPHORE_WAITS_HOLDER_LINE]->store(rwlock->line, true));
+ //fields[SYS_SEMAPHORE_WAITS_HOLDER_LINE]->set_notnull();
OK(field_store_ulint(fields[SYS_SEMAPHORE_WAITS_READERS], rw_lock_get_reader_count(rwlock)));
OK(field_store_ulint(fields[SYS_SEMAPHORE_WAITS_WAITERS_FLAG], (longlong)rwlock->waiters));
OK(field_store_ulint(fields[SYS_SEMAPHORE_WAITS_LOCK_WORD], (longlong)rwlock->lock_word));
OK(field_store_string(fields[SYS_SEMAPHORE_WAITS_LAST_READER_FILE], innobase_basename(rwlock->last_s_file_name)));
- OK(field_store_ulint(fields[SYS_SEMAPHORE_WAITS_LAST_READER_LINE], rwlock->last_s_line));
+ OK(fields[SYS_SEMAPHORE_WAITS_LAST_READER_LINE]->store(rwlock->last_s_line, true));
+ fields[SYS_SEMAPHORE_WAITS_LAST_READER_LINE]->set_notnull();
OK(field_store_string(fields[SYS_SEMAPHORE_WAITS_LAST_WRITER_FILE], innobase_basename(rwlock->last_x_file_name)));
- OK(field_store_ulint(fields[SYS_SEMAPHORE_WAITS_LAST_WRITER_LINE], rwlock->last_x_line));
+ OK(fields[SYS_SEMAPHORE_WAITS_LAST_WRITER_LINE]->store(rwlock->last_x_line, true));
+ fields[SYS_SEMAPHORE_WAITS_LAST_WRITER_LINE]->set_notnull();
OK(field_store_ulint(fields[SYS_SEMAPHORE_WAITS_OS_WAIT_COUNT], rwlock->count_os_wait));
}
}
diff --git a/storage/innobase/sync/sync0debug.cc b/storage/innobase/sync/sync0debug.cc
new file mode 100644
index 00000000000..c4a2fd90b37
--- /dev/null
+++ b/storage/innobase/sync/sync0debug.cc
@@ -0,0 +1,1764 @@
+/*****************************************************************************
+
+Copyright (c) 2014, 2016, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2017, 2018, MariaDB Corporation.
+
+Portions of this file contain modifications contributed and copyrighted by
+Google, Inc. Those modifications are gratefully acknowledged and are described
+briefly in the InnoDB documentation. The contributions by Google are
+incorporated with their permission, and subject to the conditions contained in
+the file COPYING.Google.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file sync/sync0debug.cc
+Debug checks for latches.
+
+Created 2012-08-21 Sunny Bains
+*******************************************************/
+
+#include "sync0sync.h"
+#include "sync0debug.h"
+#include "srv0start.h"
+
+#include <vector>
+#include <string>
+#include <algorithm>
+#include <iostream>
+
+#ifdef UNIV_DEBUG
+
+my_bool srv_sync_debug;
+
+/** The global mutex which protects debug info lists of all rw-locks.
+To modify the debug info list of an rw-lock, this mutex has to be
+acquired in addition to the mutex protecting the lock. */
+static SysMutex rw_lock_debug_mutex;
+
+/** The latch held by a thread */
+struct Latched {
+
+ /** Constructor */
+ Latched() : m_latch(), m_level(SYNC_UNKNOWN) { }
+
+ /** Constructor
+ @param[in] latch Latch instance
+ @param[in] level Level of latch held */
+ Latched(const latch_t* latch,
+ latch_level_t level)
+ :
+ m_latch(latch),
+ m_level(level)
+ {
+ /* No op */
+ }
+
+ /** @return the latch level */
+ latch_level_t get_level() const
+ {
+ return(m_level);
+ }
+
+ /** Check if the rhs latch and level match
+ @param[in] rhs instance to compare with
+ @return true on match */
+ bool operator==(const Latched& rhs) const
+ {
+ return(m_latch == rhs.m_latch && m_level == rhs.m_level);
+ }
+
+ /** The latch instance */
+ const latch_t* m_latch;
+
+ /** The latch level. For buffer blocks we can pass a separate latch
+ level to check against, see buf_block_dbg_add_level() */
+ latch_level_t m_level;
+};
+
+/** Thread specific latches. This is ordered on level in descending order. */
+typedef std::vector<Latched, ut_allocator<Latched> > Latches;
+
+/** The deadlock detector. */
+struct LatchDebug {
+
+ /** Debug mutex for control structures, should not be tracked
+ by this module. */
+ typedef OSMutex Mutex;
+
+ /** Comparator for the ThreadMap. */
+ struct os_thread_id_less
+ : public std::binary_function<
+ os_thread_id_t,
+ os_thread_id_t,
+ bool>
+ {
+ /** @return true if lhs < rhs */
+ bool operator()(
+ const os_thread_id_t& lhs,
+ const os_thread_id_t& rhs) const
+ UNIV_NOTHROW
+ {
+ return(os_thread_pf(lhs) < os_thread_pf(rhs));
+ }
+ };
+
+ /** For tracking a thread's latches. */
+ typedef std::map<
+ os_thread_id_t,
+ Latches*,
+ os_thread_id_less,
+ ut_allocator<std::pair<const os_thread_id_t, Latches*> > >
+ ThreadMap;
+
+ /** Constructor */
+ LatchDebug()
+ UNIV_NOTHROW;
+
+ /** Destructor */
+ ~LatchDebug()
+ UNIV_NOTHROW
+ {
+ m_mutex.destroy();
+ }
+
+ /** Create a new instance if one doesn't exist else return
+ the existing one.
+ @param[in] add add an empty entry if one is not
+ found (default no)
+ @return pointer to a thread's acquired latches. */
+ Latches* thread_latches(bool add = false)
+ UNIV_NOTHROW;
+
+ /** Check that all the latches already owned by a thread have a lower
+ level than limit.
+ @param[in] latches the thread's existing (acquired) latches
+ @param[in] limit to check against
+ @return latched if there is one with a level <= limit . */
+ const Latched* less(
+ const Latches* latches,
+ latch_level_t limit) const
+ UNIV_NOTHROW;
+
+ /** Checks if the level value exists in the thread's acquired latches.
+ @param[in] latches the thread's existing (acquired) latches
+ @param[in] level to lookup
+ @return latch if found or 0 */
+ const latch_t* find(
+ const Latches* Latches,
+ latch_level_t level) const
+ UNIV_NOTHROW;
+
+ /**
+ Checks if the level value exists in the thread's acquired latches.
+ @param[in] level to lookup
+ @return latch if found or 0 */
+ const latch_t* find(latch_level_t level)
+ UNIV_NOTHROW;
+
+ /** Report error and abort.
+ @param[in] latches thread's existing latches
+ @param[in] latched The existing latch causing the
+ invariant to fail
+ @param[in] level The new level request that breaks
+ the order */
+ void crash(
+ const Latches* latches,
+ const Latched* latched,
+ latch_level_t level) const
+ UNIV_NOTHROW;
+
+ /** Do a basic ordering check.
+ @param[in] latches thread's existing latches
+ @param[in] requested_level Level requested by latch
+ @param[in] level declared ulint so that we can
+ do level - 1. The level of the
+ latch that the thread is trying
+ to acquire
+ @return true if passes, else crash with error message. */
+ bool basic_check(
+ const Latches* latches,
+ latch_level_t requested_level,
+ ulint level) const
+ UNIV_NOTHROW;
+
+ /** Adds a latch and its level in the thread level array. Allocates
+ the memory for the array if called for the first time for this
+ OS thread. Makes the checks against other latch levels stored
+ in the array for this thread.
+
+ @param[in] latch latch that the thread wants to acqire.
+ @param[in] level latch level to check against */
+ void lock_validate(
+ const latch_t* latch,
+ latch_level_t level)
+ UNIV_NOTHROW
+ {
+ /* Ignore diagnostic latches, starting with '.' */
+
+ if (*latch->get_name() != '.'
+ && latch->get_level() != SYNC_LEVEL_VARYING) {
+
+ ut_ad(level != SYNC_LEVEL_VARYING);
+
+ Latches* latches = check_order(latch, level);
+
+ ut_a(latches->empty()
+ || level == SYNC_LEVEL_VARYING
+ || level == SYNC_NO_ORDER_CHECK
+ || latches->back().get_level()
+ == SYNC_NO_ORDER_CHECK
+ || latches->back().m_latch->get_level()
+ == SYNC_LEVEL_VARYING
+ || latches->back().get_level() >= level);
+ }
+ }
+
+ /** Adds a latch and its level in the thread level array. Allocates
+ the memory for the array if called for the first time for this
+ OS thread. Makes the checks against other latch levels stored
+ in the array for this thread.
+
+ @param[in] latch latch that the thread wants to acqire.
+ @param[in] level latch level to check against */
+ void lock_granted(
+ const latch_t* latch,
+ latch_level_t level)
+ UNIV_NOTHROW
+ {
+ /* Ignore diagnostic latches, starting with '.' */
+
+ if (*latch->get_name() != '.'
+ && latch->get_level() != SYNC_LEVEL_VARYING) {
+
+ Latches* latches = thread_latches(true);
+
+ latches->push_back(Latched(latch, level));
+ }
+ }
+
+ /** For recursive X rw-locks.
+ @param[in] latch The RW-Lock to relock */
+ void relock(const latch_t* latch)
+ UNIV_NOTHROW
+ {
+ ut_a(latch->m_rw_lock);
+
+ latch_level_t level = latch->get_level();
+
+ /* Ignore diagnostic latches, starting with '.' */
+
+ if (*latch->get_name() != '.'
+ && latch->get_level() != SYNC_LEVEL_VARYING) {
+
+ Latches* latches = thread_latches(true);
+
+ Latches::iterator it = std::find(
+ latches->begin(), latches->end(),
+ Latched(latch, level));
+
+ ut_a(latches->empty()
+ || level == SYNC_LEVEL_VARYING
+ || level == SYNC_NO_ORDER_CHECK
+ || latches->back().m_latch->get_level()
+ == SYNC_LEVEL_VARYING
+ || latches->back().m_latch->get_level()
+ == SYNC_NO_ORDER_CHECK
+ || latches->back().get_level() >= level
+ || it != latches->end());
+
+ if (it == latches->end()) {
+ latches->push_back(Latched(latch, level));
+ } else {
+ latches->insert(it, Latched(latch, level));
+ }
+ }
+ }
+
+ /** Iterate over a thread's latches.
+ @param[in] functor The callback
+ @return true if the functor returns true. */
+ bool for_each(const sync_check_functor_t& functor)
+ UNIV_NOTHROW
+ {
+ if (const Latches* latches = thread_latches()) {
+ Latches::const_iterator end = latches->end();
+ for (Latches::const_iterator it = latches->begin();
+ it != end; ++it) {
+
+ if (functor(it->m_level)) {
+ return(true);
+ }
+ }
+ }
+
+ return(false);
+ }
+
+ /** Removes a latch from the thread level array if it is found there.
+ @param[in] latch The latch that was released
+ @return true if found in the array; it is not an error if the latch is
+ not found, as we presently are not able to determine the level for
+ every latch reservation the program does */
+ void unlock(const latch_t* latch) UNIV_NOTHROW;
+
+ /** Get the level name
+ @param[in] level The level ID to lookup
+ @return level name */
+ const std::string& get_level_name(latch_level_t level) const
+ UNIV_NOTHROW
+ {
+ Levels::const_iterator it = m_levels.find(level);
+
+ ut_ad(it != m_levels.end());
+
+ return(it->second);
+ }
+
+ /** Initialise the debug data structures */
+ static void init()
+ UNIV_NOTHROW;
+
+ /** Shutdown the latch debug checking */
+ static void shutdown()
+ UNIV_NOTHROW;
+
+ /** @return the singleton instance */
+ static LatchDebug* instance()
+ UNIV_NOTHROW
+ {
+ return(s_instance);
+ }
+
+ /** Create the singleton instance */
+ static void create_instance()
+ UNIV_NOTHROW
+ {
+ ut_ad(s_instance == NULL);
+
+ s_instance = UT_NEW_NOKEY(LatchDebug());
+ }
+
+private:
+ /** Disable copying */
+ LatchDebug(const LatchDebug&);
+ LatchDebug& operator=(const LatchDebug&);
+
+ /** Adds a latch and its level in the thread level array. Allocates
+ the memory for the array if called first time for this OS thread.
+ Makes the checks against other latch levels stored in the array
+ for this thread.
+
+ @param[in] latch pointer to a mutex or an rw-lock
+ @param[in] level level in the latching order
+ @return the thread's latches */
+ Latches* check_order(
+ const latch_t* latch,
+ latch_level_t level)
+ UNIV_NOTHROW;
+
+ /** Print the latches acquired by a thread
+ @param[in] latches Latches acquired by a thread */
+ void print_latches(const Latches* latches) const
+ UNIV_NOTHROW;
+
+ /** Special handling for the RTR mutexes. We need to add proper
+ levels for them if possible.
+ @param[in] latch Latch to check
+ @return true if it is a an _RTR_ mutex */
+ bool is_rtr_mutex(const latch_t* latch) const
+ UNIV_NOTHROW
+ {
+ return(latch->get_id() == LATCH_ID_RTR_ACTIVE_MUTEX
+ || latch->get_id() == LATCH_ID_RTR_PATH_MUTEX
+ || latch->get_id() == LATCH_ID_RTR_MATCH_MUTEX
+ || latch->get_id() == LATCH_ID_RTR_SSN_MUTEX);
+ }
+
+private:
+ /** Comparator for the Levels . */
+ struct latch_level_less
+ : public std::binary_function<
+ latch_level_t,
+ latch_level_t,
+ bool>
+ {
+ /** @return true if lhs < rhs */
+ bool operator()(
+ const latch_level_t& lhs,
+ const latch_level_t& rhs) const
+ UNIV_NOTHROW
+ {
+ return(lhs < rhs);
+ }
+ };
+
+ typedef std::map<
+ latch_level_t,
+ std::string,
+ latch_level_less,
+ ut_allocator<std::pair<const latch_level_t, std::string> > >
+ Levels;
+
+ /** Mutex protecting the deadlock detector data structures. */
+ Mutex m_mutex;
+
+ /** Thread specific data. Protected by m_mutex. */
+ ThreadMap m_threads;
+
+ /** Mapping from latche level to its string representation. */
+ Levels m_levels;
+
+ /** The singleton instance. Must be created in single threaded mode. */
+ static LatchDebug* s_instance;
+
+public:
+ /** For checking whether this module has been initialised or not. */
+ static bool s_initialized;
+};
+
+/** The latch order checking infra-structure */
+LatchDebug* LatchDebug::s_instance = NULL;
+bool LatchDebug::s_initialized = false;
+
+#define LEVEL_MAP_INSERT(T) \
+do { \
+ std::pair<Levels::iterator, bool> result = \
+ m_levels.insert(Levels::value_type(T, #T)); \
+ ut_ad(result.second); \
+} while(0)
+
+/** Setup the mapping from level ID to level name mapping */
+LatchDebug::LatchDebug()
+{
+ m_mutex.init();
+
+ LEVEL_MAP_INSERT(SYNC_UNKNOWN);
+ LEVEL_MAP_INSERT(SYNC_MUTEX);
+ LEVEL_MAP_INSERT(RW_LOCK_SX);
+ LEVEL_MAP_INSERT(RW_LOCK_X_WAIT);
+ LEVEL_MAP_INSERT(RW_LOCK_S);
+ LEVEL_MAP_INSERT(RW_LOCK_X);
+ LEVEL_MAP_INSERT(RW_LOCK_NOT_LOCKED);
+ LEVEL_MAP_INSERT(SYNC_MONITOR_MUTEX);
+ LEVEL_MAP_INSERT(SYNC_ANY_LATCH);
+ LEVEL_MAP_INSERT(SYNC_DOUBLEWRITE);
+ LEVEL_MAP_INSERT(SYNC_BUF_FLUSH_LIST);
+ LEVEL_MAP_INSERT(SYNC_BUF_BLOCK);
+ LEVEL_MAP_INSERT(SYNC_BUF_PAGE_HASH);
+ LEVEL_MAP_INSERT(SYNC_BUF_POOL);
+ LEVEL_MAP_INSERT(SYNC_POOL);
+ LEVEL_MAP_INSERT(SYNC_POOL_MANAGER);
+ LEVEL_MAP_INSERT(SYNC_SEARCH_SYS);
+ LEVEL_MAP_INSERT(SYNC_WORK_QUEUE);
+ LEVEL_MAP_INSERT(SYNC_FTS_TOKENIZE);
+ LEVEL_MAP_INSERT(SYNC_FTS_OPTIMIZE);
+ LEVEL_MAP_INSERT(SYNC_FTS_BG_THREADS);
+ LEVEL_MAP_INSERT(SYNC_FTS_CACHE_INIT);
+ LEVEL_MAP_INSERT(SYNC_RECV);
+ LEVEL_MAP_INSERT(SYNC_LOG_FLUSH_ORDER);
+ LEVEL_MAP_INSERT(SYNC_LOG);
+ LEVEL_MAP_INSERT(SYNC_LOG_WRITE);
+ LEVEL_MAP_INSERT(SYNC_PAGE_CLEANER);
+ LEVEL_MAP_INSERT(SYNC_PURGE_QUEUE);
+ LEVEL_MAP_INSERT(SYNC_TRX_SYS_HEADER);
+ LEVEL_MAP_INSERT(SYNC_REC_LOCK);
+ LEVEL_MAP_INSERT(SYNC_THREADS);
+ LEVEL_MAP_INSERT(SYNC_TRX);
+ LEVEL_MAP_INSERT(SYNC_TRX_SYS);
+ LEVEL_MAP_INSERT(SYNC_LOCK_SYS);
+ LEVEL_MAP_INSERT(SYNC_LOCK_WAIT_SYS);
+ LEVEL_MAP_INSERT(SYNC_INDEX_ONLINE_LOG);
+ LEVEL_MAP_INSERT(SYNC_IBUF_BITMAP);
+ LEVEL_MAP_INSERT(SYNC_IBUF_BITMAP_MUTEX);
+ LEVEL_MAP_INSERT(SYNC_IBUF_TREE_NODE);
+ LEVEL_MAP_INSERT(SYNC_IBUF_TREE_NODE_NEW);
+ LEVEL_MAP_INSERT(SYNC_IBUF_INDEX_TREE);
+ LEVEL_MAP_INSERT(SYNC_IBUF_MUTEX);
+ LEVEL_MAP_INSERT(SYNC_FSP_PAGE);
+ LEVEL_MAP_INSERT(SYNC_FSP);
+ LEVEL_MAP_INSERT(SYNC_EXTERN_STORAGE);
+ LEVEL_MAP_INSERT(SYNC_TRX_UNDO_PAGE);
+ LEVEL_MAP_INSERT(SYNC_RSEG_HEADER);
+ LEVEL_MAP_INSERT(SYNC_RSEG_HEADER_NEW);
+ LEVEL_MAP_INSERT(SYNC_NOREDO_RSEG);
+ LEVEL_MAP_INSERT(SYNC_REDO_RSEG);
+ LEVEL_MAP_INSERT(SYNC_TRX_UNDO);
+ LEVEL_MAP_INSERT(SYNC_PURGE_LATCH);
+ LEVEL_MAP_INSERT(SYNC_TREE_NODE);
+ LEVEL_MAP_INSERT(SYNC_TREE_NODE_FROM_HASH);
+ LEVEL_MAP_INSERT(SYNC_TREE_NODE_NEW);
+ LEVEL_MAP_INSERT(SYNC_INDEX_TREE);
+ LEVEL_MAP_INSERT(SYNC_IBUF_PESS_INSERT_MUTEX);
+ LEVEL_MAP_INSERT(SYNC_IBUF_HEADER);
+ LEVEL_MAP_INSERT(SYNC_DICT_HEADER);
+ LEVEL_MAP_INSERT(SYNC_STATS_AUTO_RECALC);
+ LEVEL_MAP_INSERT(SYNC_DICT_AUTOINC_MUTEX);
+ LEVEL_MAP_INSERT(SYNC_DICT);
+ LEVEL_MAP_INSERT(SYNC_FTS_CACHE);
+ LEVEL_MAP_INSERT(SYNC_DICT_OPERATION);
+ LEVEL_MAP_INSERT(SYNC_FILE_FORMAT_TAG);
+ LEVEL_MAP_INSERT(SYNC_TRX_I_S_LAST_READ);
+ LEVEL_MAP_INSERT(SYNC_TRX_I_S_RWLOCK);
+ LEVEL_MAP_INSERT(SYNC_RECV_WRITER);
+ LEVEL_MAP_INSERT(SYNC_LEVEL_VARYING);
+ LEVEL_MAP_INSERT(SYNC_NO_ORDER_CHECK);
+
+ /* Enum count starts from 0 */
+ ut_ad(m_levels.size() == SYNC_LEVEL_MAX + 1);
+}
+
+/** Print the latches acquired by a thread
+@param[in] latches Latches acquired by a thread */
+void
+LatchDebug::print_latches(const Latches* latches) const
+ UNIV_NOTHROW
+{
+ ib::error() << "Latches already owned by this thread: ";
+
+ Latches::const_iterator end = latches->end();
+
+ for (Latches::const_iterator it = latches->begin();
+ it != end;
+ ++it) {
+
+ ib::error()
+ << sync_latch_get_name(it->m_latch->get_id())
+ << " -> "
+ << it->m_level << " "
+ << "(" << get_level_name(it->m_level) << ")";
+ }
+}
+
+/** Report error and abort
+@param[in] latches thread's existing latches
+@param[in] latched The existing latch causing the invariant to fail
+@param[in] level The new level request that breaks the order */
+void
+LatchDebug::crash(
+ const Latches* latches,
+ const Latched* latched,
+ latch_level_t level) const
+ UNIV_NOTHROW
+{
+ const latch_t* latch = latched->m_latch;
+ const std::string& in_level_name = get_level_name(level);
+
+ const std::string& latch_level_name =
+ get_level_name(latched->m_level);
+
+ ib::error()
+ << "Thread " << os_thread_pf(os_thread_get_curr_id())
+ << " already owns a latch "
+ << sync_latch_get_name(latch->m_id) << " at level"
+ << " " << latched->m_level << " (" << latch_level_name
+ << " ), which is at a lower/same level than the"
+ << " requested latch: "
+ << level << " (" << in_level_name << "). "
+ << latch->to_string();
+
+ print_latches(latches);
+
+ ut_error;
+}
+
+/** Check that all the latches already owned by a thread have a lower
+level than limit.
+@param[in] latches the thread's existing (acquired) latches
+@param[in] limit to check against
+@return latched info if there is one with a level <= limit . */
+const Latched*
+LatchDebug::less(
+ const Latches* latches,
+ latch_level_t limit) const
+ UNIV_NOTHROW
+{
+ Latches::const_iterator end = latches->end();
+
+ for (Latches::const_iterator it = latches->begin(); it != end; ++it) {
+
+ if (it->m_level <= limit) {
+ return(&(*it));
+ }
+ }
+
+ return(NULL);
+}
+
+/** Do a basic ordering check.
+@param[in] latches thread's existing latches
+@param[in] requested_level Level requested by latch
+@param[in] in_level declared ulint so that we can do level - 1.
+ The level of the latch that the thread is
+ trying to acquire
+@return true if passes, else crash with error message. */
+bool
+LatchDebug::basic_check(
+ const Latches* latches,
+ latch_level_t requested_level,
+ ulint in_level) const
+ UNIV_NOTHROW
+{
+ latch_level_t level = latch_level_t(in_level);
+
+ ut_ad(level < SYNC_LEVEL_MAX);
+
+ const Latched* latched = less(latches, level);
+
+ if (latched != NULL) {
+ crash(latches, latched, requested_level);
+ return(false);
+ }
+
+ return(true);
+}
+
+/** Create a new instance if one doesn't exist else return the existing one.
+@param[in] add add an empty entry if one is not found
+ (default no)
+@return pointer to a thread's acquired latches. */
+Latches*
+LatchDebug::thread_latches(bool add)
+ UNIV_NOTHROW
+{
+ m_mutex.enter();
+
+ os_thread_id_t thread_id = os_thread_get_curr_id();
+ ThreadMap::iterator lb = m_threads.lower_bound(thread_id);
+
+ if (lb != m_threads.end()
+ && !(m_threads.key_comp()(thread_id, lb->first))) {
+
+ Latches* latches = lb->second;
+
+ m_mutex.exit();
+
+ return(latches);
+
+ } else if (!add) {
+
+ m_mutex.exit();
+
+ return(NULL);
+
+ } else {
+ typedef ThreadMap::value_type value_type;
+
+ Latches* latches = UT_NEW_NOKEY(Latches());
+
+ ut_a(latches != NULL);
+
+ latches->reserve(32);
+
+ m_threads.insert(lb, value_type(thread_id, latches));
+
+ m_mutex.exit();
+
+ return(latches);
+ }
+}
+
+/** Checks if the level value exists in the thread's acquired latches.
+@param[in] levels the thread's existing (acquired) latches
+@param[in] level to lookup
+@return latch if found or 0 */
+const latch_t*
+LatchDebug::find(
+ const Latches* latches,
+ latch_level_t level) const UNIV_NOTHROW
+{
+ Latches::const_iterator end = latches->end();
+
+ for (Latches::const_iterator it = latches->begin(); it != end; ++it) {
+
+ if (it->m_level == level) {
+
+ return(it->m_latch);
+ }
+ }
+
+ return(0);
+}
+
+/** Checks if the level value exists in the thread's acquired latches.
+@param[in] level The level to lookup
+@return latch if found or NULL */
+const latch_t*
+LatchDebug::find(latch_level_t level)
+ UNIV_NOTHROW
+{
+ return(find(thread_latches(), level));
+}
+
+/**
+Adds a latch and its level in the thread level array. Allocates the memory
+for the array if called first time for this OS thread. Makes the checks
+against other latch levels stored in the array for this thread.
+@param[in] latch pointer to a mutex or an rw-lock
+@param[in] level level in the latching order
+@return the thread's latches */
+Latches*
+LatchDebug::check_order(
+ const latch_t* latch,
+ latch_level_t level)
+ UNIV_NOTHROW
+{
+ ut_ad(latch->get_level() != SYNC_LEVEL_VARYING);
+
+ Latches* latches = thread_latches(true);
+
+ /* NOTE that there is a problem with _NODE and _LEAF levels: if the
+ B-tree height changes, then a leaf can change to an internal node
+ or the other way around. We do not know at present if this can cause
+ unnecessary assertion failures below. */
+
+ switch (level) {
+ case SYNC_NO_ORDER_CHECK:
+ case SYNC_EXTERN_STORAGE:
+ case SYNC_TREE_NODE_FROM_HASH:
+ /* Do no order checking */
+ break;
+
+ case SYNC_TRX_SYS_HEADER:
+
+ if (srv_is_being_started) {
+ /* This is violated during trx_sys_create_rsegs()
+ when creating additional rollback segments when
+ upgrading in innobase_start_or_create_for_mysql(). */
+ break;
+ }
+
+ /* Fall through */
+
+ case SYNC_MONITOR_MUTEX:
+ case SYNC_RECV:
+ case SYNC_FTS_BG_THREADS:
+ case SYNC_WORK_QUEUE:
+ case SYNC_FTS_TOKENIZE:
+ case SYNC_FTS_OPTIMIZE:
+ case SYNC_FTS_CACHE:
+ case SYNC_FTS_CACHE_INIT:
+ case SYNC_PAGE_CLEANER:
+ case SYNC_LOG:
+ case SYNC_LOG_WRITE:
+ case SYNC_LOG_FLUSH_ORDER:
+ case SYNC_FILE_FORMAT_TAG:
+ case SYNC_DOUBLEWRITE:
+ case SYNC_SEARCH_SYS:
+ case SYNC_THREADS:
+ case SYNC_LOCK_SYS:
+ case SYNC_LOCK_WAIT_SYS:
+ case SYNC_TRX_SYS:
+ case SYNC_IBUF_BITMAP_MUTEX:
+ case SYNC_REDO_RSEG:
+ case SYNC_NOREDO_RSEG:
+ case SYNC_TRX_UNDO:
+ case SYNC_PURGE_LATCH:
+ case SYNC_PURGE_QUEUE:
+ case SYNC_DICT_AUTOINC_MUTEX:
+ case SYNC_DICT_OPERATION:
+ case SYNC_DICT_HEADER:
+ case SYNC_TRX_I_S_RWLOCK:
+ case SYNC_TRX_I_S_LAST_READ:
+ case SYNC_IBUF_MUTEX:
+ case SYNC_INDEX_ONLINE_LOG:
+ case SYNC_STATS_AUTO_RECALC:
+ case SYNC_POOL:
+ case SYNC_POOL_MANAGER:
+ case SYNC_RECV_WRITER:
+
+ basic_check(latches, level, level);
+ break;
+
+ case SYNC_ANY_LATCH:
+
+ /* Temporary workaround for LATCH_ID_RTR_*_MUTEX */
+ if (is_rtr_mutex(latch)) {
+
+ const Latched* latched = less(latches, level);
+
+ if (latched == NULL
+ || (latched != NULL
+ && is_rtr_mutex(latched->m_latch))) {
+
+ /* No violation */
+ break;
+
+ }
+
+ crash(latches, latched, level);
+
+ } else {
+ basic_check(latches, level, level);
+ }
+
+ break;
+
+ case SYNC_TRX:
+
+ /* Either the thread must own the lock_sys->mutex, or
+ it is allowed to own only ONE trx_t::mutex. */
+
+ if (less(latches, level) != NULL) {
+ basic_check(latches, level, level - 1);
+ ut_a(find(latches, SYNC_LOCK_SYS) != 0);
+ }
+ break;
+
+ case SYNC_BUF_FLUSH_LIST:
+ case SYNC_BUF_POOL:
+
+ /* We can have multiple mutexes of this type therefore we
+ can only check whether the greater than condition holds. */
+
+ basic_check(latches, level, level - 1);
+ break;
+
+ case SYNC_BUF_PAGE_HASH:
+
+ /* Multiple page_hash locks are only allowed during
+ buf_validate and that is where buf_pool mutex is already
+ held. */
+
+ /* Fall through */
+
+ case SYNC_BUF_BLOCK:
+
+ /* Either the thread must own the (buffer pool) buf_pool->mutex
+ or it is allowed to latch only ONE of (buffer block)
+ block->mutex or buf_pool->zip_mutex. */
+
+ if (less(latches, level) != NULL) {
+ basic_check(latches, level, level - 1);
+ ut_a(find(latches, SYNC_BUF_POOL) != 0);
+ }
+ break;
+
+ case SYNC_REC_LOCK:
+
+ if (find(latches, SYNC_LOCK_SYS) != 0) {
+ basic_check(latches, level, SYNC_REC_LOCK - 1);
+ } else {
+ basic_check(latches, level, SYNC_REC_LOCK);
+ }
+ break;
+
+ case SYNC_IBUF_BITMAP:
+
+ /* Either the thread must own the master mutex to all
+ the bitmap pages, or it is allowed to latch only ONE
+ bitmap page. */
+
+ if (find(latches, SYNC_IBUF_BITMAP_MUTEX) != 0) {
+
+ basic_check(latches, level, SYNC_IBUF_BITMAP - 1);
+
+ } else if (!srv_is_being_started) {
+
+ /* This is violated during trx_sys_create_rsegs()
+ when creating additional rollback segments during
+ upgrade. */
+
+ basic_check(latches, level, SYNC_IBUF_BITMAP);
+ }
+ break;
+
+ case SYNC_FSP_PAGE:
+ ut_a(find(latches, SYNC_FSP) != 0);
+ break;
+
+ case SYNC_FSP:
+
+ ut_a(find(latches, SYNC_FSP) != 0
+ || basic_check(latches, level, SYNC_FSP));
+ break;
+
+ case SYNC_TRX_UNDO_PAGE:
+
+ /* Purge is allowed to read in as many UNDO pages as it likes.
+ The purge thread can read the UNDO pages without any covering
+ mutex. */
+
+ ut_a(find(latches, SYNC_TRX_UNDO) != 0
+ || find(latches, SYNC_REDO_RSEG) != 0
+ || find(latches, SYNC_NOREDO_RSEG) != 0
+ || basic_check(latches, level, level - 1));
+ break;
+
+ case SYNC_RSEG_HEADER:
+
+ ut_a(find(latches, SYNC_REDO_RSEG) != 0
+ || find(latches, SYNC_NOREDO_RSEG) != 0);
+ break;
+
+ case SYNC_RSEG_HEADER_NEW:
+
+ ut_a(find(latches, SYNC_FSP_PAGE) != 0);
+ break;
+
+ case SYNC_TREE_NODE:
+
+ {
+ const latch_t* fsp_latch;
+
+ fsp_latch = find(latches, SYNC_FSP);
+
+ ut_a((fsp_latch != NULL
+ && fsp_latch->is_temp_fsp())
+ || find(latches, SYNC_INDEX_TREE) != 0
+ || find(latches, SYNC_DICT_OPERATION)
+ || basic_check(latches,
+ level, SYNC_TREE_NODE - 1));
+ }
+
+ break;
+
+ case SYNC_TREE_NODE_NEW:
+
+ ut_a(find(latches, SYNC_FSP_PAGE) != 0);
+ break;
+
+ case SYNC_INDEX_TREE:
+
+ basic_check(latches, level, SYNC_TREE_NODE - 1);
+ break;
+
+ case SYNC_IBUF_TREE_NODE:
+
+ ut_a(find(latches, SYNC_IBUF_INDEX_TREE) != 0
+ || basic_check(latches, level, SYNC_IBUF_TREE_NODE - 1));
+ break;
+
+ case SYNC_IBUF_TREE_NODE_NEW:
+
+ /* ibuf_add_free_page() allocates new pages for the change
+ buffer while only holding the tablespace x-latch. These
+ pre-allocated new pages may only be used while holding
+ ibuf_mutex, in btr_page_alloc_for_ibuf(). */
+
+ ut_a(find(latches, SYNC_IBUF_MUTEX) != 0
+ || find(latches, SYNC_FSP) != 0);
+ break;
+
+ case SYNC_IBUF_INDEX_TREE:
+
+ if (find(latches, SYNC_FSP) != 0) {
+ basic_check(latches, level, level - 1);
+ } else {
+ basic_check(latches, level, SYNC_IBUF_TREE_NODE - 1);
+ }
+ break;
+
+ case SYNC_IBUF_PESS_INSERT_MUTEX:
+
+ basic_check(latches, level, SYNC_FSP - 1);
+ ut_a(find(latches, SYNC_IBUF_MUTEX) == 0);
+ break;
+
+ case SYNC_IBUF_HEADER:
+
+ basic_check(latches, level, SYNC_FSP - 1);
+ ut_a(find(latches, SYNC_IBUF_MUTEX) == NULL);
+ ut_a(find(latches, SYNC_IBUF_PESS_INSERT_MUTEX) == NULL);
+ break;
+
+ case SYNC_DICT:
+ basic_check(latches, level, SYNC_DICT);
+ break;
+
+ case SYNC_MUTEX:
+ case SYNC_UNKNOWN:
+ case SYNC_LEVEL_VARYING:
+ case RW_LOCK_X:
+ case RW_LOCK_X_WAIT:
+ case RW_LOCK_S:
+ case RW_LOCK_SX:
+ case RW_LOCK_NOT_LOCKED:
+ /* These levels should never be set for a latch. */
+ ut_error;
+ break;
+ }
+
+ return(latches);
+}
+
+/** Removes a latch from the thread level array if it is found there.
+@param[in] latch that was released/unlocked
+@param[in] level level of the latch
+@return true if found in the array; it is not an error if the latch is
+not found, as we presently are not able to determine the level for
+every latch reservation the program does */
+void
+LatchDebug::unlock(const latch_t* latch)
+ UNIV_NOTHROW
+{
+ if (latch->get_level() == SYNC_LEVEL_VARYING) {
+ // We don't have varying level mutexes
+ ut_ad(latch->m_rw_lock);
+ }
+
+ Latches* latches;
+
+ if (*latch->get_name() == '.') {
+
+ /* Ignore diagnostic latches, starting with '.' */
+
+ } else if ((latches = thread_latches()) != NULL) {
+
+ Latches::reverse_iterator rend = latches->rend();
+
+ for (Latches::reverse_iterator it = latches->rbegin();
+ it != rend;
+ ++it) {
+
+ if (it->m_latch != latch) {
+
+ continue;
+ }
+
+ Latches::iterator i = it.base();
+
+ latches->erase(--i);
+
+ /* If this thread doesn't own any more
+ latches remove from the map.
+
+ FIXME: Perhaps use the master thread
+ to do purge. Or, do it from close connection.
+ This could be expensive. */
+
+ if (latches->empty()) {
+
+ m_mutex.enter();
+
+ os_thread_id_t thread_id;
+
+ thread_id = os_thread_get_curr_id();
+
+ m_threads.erase(thread_id);
+
+ m_mutex.exit();
+
+ UT_DELETE(latches);
+ }
+
+ return;
+ }
+
+ if (latch->get_level() != SYNC_LEVEL_VARYING) {
+ ib::error()
+ << "Couldn't find latch "
+ << sync_latch_get_name(latch->get_id());
+
+ print_latches(latches);
+
+ /** Must find the latch. */
+ ut_error;
+ }
+ }
+}
+
+/** Get the latch id from a latch name.
+@param[in] name Latch name
+@return latch id if found else LATCH_ID_NONE. */
+latch_id_t
+sync_latch_get_id(const char* name)
+{
+ LatchMetaData::const_iterator end = latch_meta.end();
+
+ /* Linear scan should be OK, this should be extremely rare. */
+
+ for (LatchMetaData::const_iterator it = latch_meta.begin();
+ it != end;
+ ++it) {
+
+ if (*it == NULL || (*it)->get_id() == LATCH_ID_NONE) {
+
+ continue;
+
+ } else if (strcmp((*it)->get_name(), name) == 0) {
+
+ return((*it)->get_id());
+ }
+ }
+
+ return(LATCH_ID_NONE);
+}
+
+/** Get the latch name from a sync level
+@param[in] level Latch level to lookup
+@return NULL if not found. */
+const char*
+sync_latch_get_name(latch_level_t level)
+{
+ LatchMetaData::const_iterator end = latch_meta.end();
+
+ /* Linear scan should be OK, this should be extremely rare. */
+
+ for (LatchMetaData::const_iterator it = latch_meta.begin();
+ it != end;
+ ++it) {
+
+ if (*it == NULL || (*it)->get_id() == LATCH_ID_NONE) {
+
+ continue;
+
+ } else if ((*it)->get_level() == level) {
+
+ return((*it)->get_name());
+ }
+ }
+
+ return(0);
+}
+
+/** Check if it is OK to acquire the latch.
+@param[in] latch latch type */
+void
+sync_check_lock_validate(const latch_t* latch)
+{
+ if (LatchDebug::instance() != NULL) {
+ LatchDebug::instance()->lock_validate(
+ latch, latch->get_level());
+ }
+}
+
+/** Note that the lock has been granted
+@param[in] latch latch type */
+void
+sync_check_lock_granted(const latch_t* latch)
+{
+ if (LatchDebug::instance() != NULL) {
+ LatchDebug::instance()->lock_granted(latch, latch->get_level());
+ }
+}
+
+/** Check if it is OK to acquire the latch.
+@param[in] latch latch type
+@param[in] level Latch level */
+void
+sync_check_lock(
+ const latch_t* latch,
+ latch_level_t level)
+{
+ if (LatchDebug::instance() != NULL) {
+
+ ut_ad(latch->get_level() == SYNC_LEVEL_VARYING);
+ ut_ad(latch->get_id() == LATCH_ID_BUF_BLOCK_LOCK);
+
+ LatchDebug::instance()->lock_validate(latch, level);
+ LatchDebug::instance()->lock_granted(latch, level);
+ }
+}
+
+/** Check if it is OK to re-acquire the lock.
+@param[in] latch RW-LOCK to relock (recursive X locks) */
+void
+sync_check_relock(const latch_t* latch)
+{
+ if (LatchDebug::instance() != NULL) {
+ LatchDebug::instance()->relock(latch);
+ }
+}
+
+/** Removes a latch from the thread level array if it is found there.
+@param[in] latch The latch to unlock */
+void
+sync_check_unlock(const latch_t* latch)
+{
+ if (LatchDebug::instance() != NULL) {
+ LatchDebug::instance()->unlock(latch);
+ }
+}
+
+/** Checks if the level array for the current thread contains a
+mutex or rw-latch at the specified level.
+@param[in] level to find
+@return a matching latch, or NULL if not found */
+const latch_t*
+sync_check_find(latch_level_t level)
+{
+ if (LatchDebug::instance() != NULL) {
+ return(LatchDebug::instance()->find(level));
+ }
+
+ return(NULL);
+}
+
+/** Iterate over the thread's latches.
+@param[in,out] functor called for each element.
+@return true if the functor returns true for any element */
+bool
+sync_check_iterate(const sync_check_functor_t& functor)
+{
+ if (LatchDebug* debug = LatchDebug::instance()) {
+ return(debug->for_each(functor));
+ }
+
+ return(false);
+}
+
+/** Enable sync order checking.
+
+Note: We don't enforce any synchronisation checks. The caller must ensure
+that no races can occur */
+void
+sync_check_enable()
+{
+ if (!srv_sync_debug) {
+
+ return;
+ }
+
+ /* We should always call this before we create threads. */
+
+ LatchDebug::create_instance();
+}
+
+/** Initialise the debug data structures */
+void
+LatchDebug::init()
+ UNIV_NOTHROW
+{
+ mutex_create(LATCH_ID_RW_LOCK_DEBUG, &rw_lock_debug_mutex);
+}
+
+/** Shutdown the latch debug checking
+
+Note: We don't enforce any synchronisation checks. The caller must ensure
+that no races can occur */
+void
+LatchDebug::shutdown()
+ UNIV_NOTHROW
+{
+ mutex_free(&rw_lock_debug_mutex);
+
+ ut_a(s_initialized);
+
+ s_initialized = false;
+
+ UT_DELETE(s_instance);
+
+ LatchDebug::s_instance = NULL;
+}
+
+/** Acquires the debug mutex. We cannot use the mutex defined in sync0sync,
+because the debug mutex is also acquired in sync0arr while holding the OS
+mutex protecting the sync array, and the ordinary mutex_enter might
+recursively call routines in sync0arr, leading to a deadlock on the OS
+mutex. */
+void
+rw_lock_debug_mutex_enter()
+{
+ mutex_enter(&rw_lock_debug_mutex);
+}
+
+/** Releases the debug mutex. */
+void
+rw_lock_debug_mutex_exit()
+{
+ mutex_exit(&rw_lock_debug_mutex);
+}
+#endif /* UNIV_DEBUG */
+
+/* Meta data for all the InnoDB latches. If the latch is not in recorded
+here then it will be be considered for deadlock checks. */
+LatchMetaData latch_meta;
+
+/** Load the latch meta data. */
+static
+void
+sync_latch_meta_init()
+ UNIV_NOTHROW
+{
+ latch_meta.resize(LATCH_ID_MAX);
+
+ /* The latches should be ordered on latch_id_t. So that we can
+ index directly into the vector to update and fetch meta-data. */
+
+ LATCH_ADD_MUTEX(AUTOINC, SYNC_DICT_AUTOINC_MUTEX, autoinc_mutex_key);
+
+#if defined PFS_SKIP_BUFFER_MUTEX_RWLOCK || defined PFS_GROUP_BUFFER_SYNC
+ LATCH_ADD_MUTEX(BUF_BLOCK_MUTEX, SYNC_BUF_BLOCK, PFS_NOT_INSTRUMENTED);
+#else
+ LATCH_ADD_MUTEX(BUF_BLOCK_MUTEX, SYNC_BUF_BLOCK,
+ buffer_block_mutex_key);
+#endif /* PFS_SKIP_BUFFER_MUTEX_RWLOCK || PFS_GROUP_BUFFER_SYNC */
+
+ LATCH_ADD_MUTEX(BUF_POOL, SYNC_BUF_POOL, buf_pool_mutex_key);
+
+ LATCH_ADD_MUTEX(BUF_POOL_ZIP, SYNC_BUF_BLOCK, buf_pool_zip_mutex_key);
+
+ LATCH_ADD_MUTEX(CACHE_LAST_READ, SYNC_TRX_I_S_LAST_READ,
+ cache_last_read_mutex_key);
+
+ LATCH_ADD_MUTEX(DICT_FOREIGN_ERR, SYNC_NO_ORDER_CHECK,
+ dict_foreign_err_mutex_key);
+
+ LATCH_ADD_MUTEX(DICT_SYS, SYNC_DICT, dict_sys_mutex_key);
+
+ LATCH_ADD_MUTEX(FILE_FORMAT_MAX, SYNC_FILE_FORMAT_TAG,
+ file_format_max_mutex_key);
+
+ LATCH_ADD_MUTEX(FIL_SYSTEM, SYNC_ANY_LATCH, fil_system_mutex_key);
+
+ LATCH_ADD_MUTEX(FLUSH_LIST, SYNC_BUF_FLUSH_LIST, flush_list_mutex_key);
+
+ LATCH_ADD_MUTEX(FTS_BG_THREADS, SYNC_FTS_BG_THREADS,
+ fts_bg_threads_mutex_key);
+
+ LATCH_ADD_MUTEX(FTS_DELETE, SYNC_FTS_OPTIMIZE, fts_delete_mutex_key);
+
+ LATCH_ADD_MUTEX(FTS_OPTIMIZE, SYNC_FTS_OPTIMIZE,
+ fts_optimize_mutex_key);
+
+ LATCH_ADD_MUTEX(FTS_DOC_ID, SYNC_FTS_OPTIMIZE, fts_doc_id_mutex_key);
+
+ LATCH_ADD_MUTEX(FTS_PLL_TOKENIZE, SYNC_FTS_TOKENIZE,
+ fts_pll_tokenize_mutex_key);
+
+ LATCH_ADD_MUTEX(HASH_TABLE_MUTEX, SYNC_BUF_PAGE_HASH,
+ hash_table_mutex_key);
+
+ LATCH_ADD_MUTEX(IBUF_BITMAP, SYNC_IBUF_BITMAP_MUTEX,
+ ibuf_bitmap_mutex_key);
+
+ LATCH_ADD_MUTEX(IBUF, SYNC_IBUF_MUTEX, ibuf_mutex_key);
+
+ LATCH_ADD_MUTEX(IBUF_PESSIMISTIC_INSERT, SYNC_IBUF_PESS_INSERT_MUTEX,
+ ibuf_pessimistic_insert_mutex_key);
+
+ LATCH_ADD_MUTEX(LOG_SYS, SYNC_LOG, log_sys_mutex_key);
+
+ LATCH_ADD_MUTEX(LOG_WRITE, SYNC_LOG_WRITE, log_sys_write_mutex_key);
+
+ LATCH_ADD_MUTEX(LOG_FLUSH_ORDER, SYNC_LOG_FLUSH_ORDER,
+ log_flush_order_mutex_key);
+
+ LATCH_ADD_MUTEX(MUTEX_LIST, SYNC_NO_ORDER_CHECK, mutex_list_mutex_key);
+
+ LATCH_ADD_MUTEX(PAGE_CLEANER, SYNC_PAGE_CLEANER,
+ page_cleaner_mutex_key);
+
+ LATCH_ADD_MUTEX(PURGE_SYS_PQ, SYNC_PURGE_QUEUE,
+ purge_sys_pq_mutex_key);
+
+ LATCH_ADD_MUTEX(RECALC_POOL, SYNC_STATS_AUTO_RECALC,
+ recalc_pool_mutex_key);
+
+ LATCH_ADD_MUTEX(RECV_SYS, SYNC_RECV, recv_sys_mutex_key);
+
+ LATCH_ADD_MUTEX(RECV_WRITER, SYNC_RECV_WRITER, recv_writer_mutex_key);
+
+ LATCH_ADD_MUTEX(REDO_RSEG, SYNC_REDO_RSEG, redo_rseg_mutex_key);
+
+ LATCH_ADD_MUTEX(NOREDO_RSEG, SYNC_NOREDO_RSEG, noredo_rseg_mutex_key);
+
+#ifdef UNIV_DEBUG
+ /* Mutex names starting with '.' are not tracked. They are assumed
+ to be diagnostic mutexes used in debugging. */
+ latch_meta[LATCH_ID_RW_LOCK_DEBUG] =
+ LATCH_ADD_MUTEX(RW_LOCK_DEBUG,
+ SYNC_NO_ORDER_CHECK,
+ rw_lock_debug_mutex_key);
+#endif /* UNIV_DEBUG */
+
+ LATCH_ADD_MUTEX(RTR_SSN_MUTEX, SYNC_ANY_LATCH, rtr_ssn_mutex_key);
+
+ LATCH_ADD_MUTEX(RTR_ACTIVE_MUTEX, SYNC_ANY_LATCH,
+ rtr_active_mutex_key);
+
+ LATCH_ADD_MUTEX(RTR_MATCH_MUTEX, SYNC_ANY_LATCH, rtr_match_mutex_key);
+
+ LATCH_ADD_MUTEX(RTR_PATH_MUTEX, SYNC_ANY_LATCH, rtr_path_mutex_key);
+
+ LATCH_ADD_MUTEX(RW_LOCK_LIST, SYNC_NO_ORDER_CHECK,
+ rw_lock_list_mutex_key);
+
+ LATCH_ADD_MUTEX(RW_LOCK_MUTEX, SYNC_NO_ORDER_CHECK, rw_lock_mutex_key);
+
+ LATCH_ADD_MUTEX(SRV_INNODB_MONITOR, SYNC_NO_ORDER_CHECK,
+ srv_innodb_monitor_mutex_key);
+
+ LATCH_ADD_MUTEX(SRV_MISC_TMPFILE, SYNC_ANY_LATCH,
+ srv_misc_tmpfile_mutex_key);
+
+ LATCH_ADD_MUTEX(SRV_MONITOR_FILE, SYNC_NO_ORDER_CHECK,
+ srv_monitor_file_mutex_key);
+
+ LATCH_ADD_MUTEX(BUF_DBLWR, SYNC_DOUBLEWRITE, buf_dblwr_mutex_key);
+
+ LATCH_ADD_MUTEX(TRX_UNDO, SYNC_TRX_UNDO, trx_undo_mutex_key);
+
+ LATCH_ADD_MUTEX(TRX_POOL, SYNC_POOL, trx_pool_mutex_key);
+
+ LATCH_ADD_MUTEX(TRX_POOL_MANAGER, SYNC_POOL_MANAGER,
+ trx_pool_manager_mutex_key);
+
+ LATCH_ADD_MUTEX(TRX, SYNC_TRX, trx_mutex_key);
+
+ LATCH_ADD_MUTEX(LOCK_SYS, SYNC_LOCK_SYS, lock_mutex_key);
+
+ LATCH_ADD_MUTEX(LOCK_SYS_WAIT, SYNC_LOCK_WAIT_SYS,
+ lock_wait_mutex_key);
+
+ LATCH_ADD_MUTEX(TRX_SYS, SYNC_TRX_SYS, trx_sys_mutex_key);
+
+ LATCH_ADD_MUTEX(SRV_SYS, SYNC_THREADS, srv_sys_mutex_key);
+
+ LATCH_ADD_MUTEX(SRV_SYS_TASKS, SYNC_ANY_LATCH, srv_threads_mutex_key);
+
+ LATCH_ADD_MUTEX(PAGE_ZIP_STAT_PER_INDEX, SYNC_ANY_LATCH,
+ page_zip_stat_per_index_mutex_key);
+
+#ifndef PFS_SKIP_EVENT_MUTEX
+ LATCH_ADD_MUTEX(EVENT_MANAGER, SYNC_NO_ORDER_CHECK,
+ event_manager_mutex_key);
+#else
+ LATCH_ADD_MUTEX(EVENT_MANAGER, SYNC_NO_ORDER_CHECK,
+ PFS_NOT_INSTRUMENTED);
+#endif /* !PFS_SKIP_EVENT_MUTEX */
+
+ LATCH_ADD_MUTEX(EVENT_MUTEX, SYNC_NO_ORDER_CHECK, event_mutex_key);
+
+ LATCH_ADD_MUTEX(SYNC_ARRAY_MUTEX, SYNC_NO_ORDER_CHECK,
+ sync_array_mutex_key);
+
+ LATCH_ADD_MUTEX(ZIP_PAD_MUTEX, SYNC_NO_ORDER_CHECK, zip_pad_mutex_key);
+
+ LATCH_ADD_MUTEX(OS_AIO_READ_MUTEX, SYNC_NO_ORDER_CHECK,
+ PFS_NOT_INSTRUMENTED);
+
+ LATCH_ADD_MUTEX(OS_AIO_WRITE_MUTEX, SYNC_NO_ORDER_CHECK,
+ PFS_NOT_INSTRUMENTED);
+
+ LATCH_ADD_MUTEX(OS_AIO_LOG_MUTEX, SYNC_NO_ORDER_CHECK,
+ PFS_NOT_INSTRUMENTED);
+
+ LATCH_ADD_MUTEX(OS_AIO_IBUF_MUTEX, SYNC_NO_ORDER_CHECK,
+ PFS_NOT_INSTRUMENTED);
+
+ LATCH_ADD_MUTEX(OS_AIO_SYNC_MUTEX, SYNC_NO_ORDER_CHECK,
+ PFS_NOT_INSTRUMENTED);
+
+ LATCH_ADD_MUTEX(ROW_DROP_LIST, SYNC_NO_ORDER_CHECK,
+ row_drop_list_mutex_key);
+
+ LATCH_ADD_MUTEX(INDEX_ONLINE_LOG, SYNC_INDEX_ONLINE_LOG,
+ index_online_log_key);
+
+ LATCH_ADD_MUTEX(WORK_QUEUE, SYNC_WORK_QUEUE, PFS_NOT_INSTRUMENTED);
+
+ // Add the RW locks
+ LATCH_ADD_RWLOCK(BTR_SEARCH, SYNC_SEARCH_SYS, btr_search_latch_key);
+
+ LATCH_ADD_RWLOCK(BUF_BLOCK_LOCK, SYNC_LEVEL_VARYING,
+ buf_block_lock_key);
+
+#ifdef UNIV_DEBUG
+ LATCH_ADD_RWLOCK(BUF_BLOCK_DEBUG, SYNC_LEVEL_VARYING,
+ buf_block_debug_latch_key);
+#endif /* UNIV_DEBUG */
+
+ LATCH_ADD_RWLOCK(DICT_OPERATION, SYNC_DICT_OPERATION,
+ dict_operation_lock_key);
+
+ LATCH_ADD_RWLOCK(CHECKPOINT, SYNC_NO_ORDER_CHECK, checkpoint_lock_key);
+
+ LATCH_ADD_RWLOCK(FIL_SPACE, SYNC_FSP, fil_space_latch_key);
+
+ LATCH_ADD_RWLOCK(FTS_CACHE, SYNC_FTS_CACHE, fts_cache_rw_lock_key);
+
+ LATCH_ADD_RWLOCK(FTS_CACHE_INIT, SYNC_FTS_CACHE_INIT,
+ fts_cache_init_rw_lock_key);
+
+ LATCH_ADD_RWLOCK(TRX_I_S_CACHE, SYNC_TRX_I_S_RWLOCK,
+ trx_i_s_cache_lock_key);
+
+ LATCH_ADD_RWLOCK(TRX_PURGE, SYNC_PURGE_LATCH, trx_purge_latch_key);
+
+ LATCH_ADD_RWLOCK(IBUF_INDEX_TREE, SYNC_IBUF_INDEX_TREE,
+ index_tree_rw_lock_key);
+
+ LATCH_ADD_RWLOCK(INDEX_TREE, SYNC_INDEX_TREE, index_tree_rw_lock_key);
+
+ LATCH_ADD_RWLOCK(DICT_TABLE_STATS, SYNC_INDEX_TREE,
+ dict_table_stats_key);
+
+ LATCH_ADD_RWLOCK(HASH_TABLE_RW_LOCK, SYNC_BUF_PAGE_HASH,
+ hash_table_locks_key);
+
+ LATCH_ADD_MUTEX(SYNC_DEBUG_MUTEX, SYNC_NO_ORDER_CHECK,
+ PFS_NOT_INSTRUMENTED);
+
+ /* JAN: TODO: Add PFS instrumentation */
+ LATCH_ADD_MUTEX(SCRUB_STAT_MUTEX, SYNC_NO_ORDER_CHECK,
+ PFS_NOT_INSTRUMENTED);
+ LATCH_ADD_MUTEX(DEFRAGMENT_MUTEX, SYNC_NO_ORDER_CHECK,
+ PFS_NOT_INSTRUMENTED);
+ LATCH_ADD_MUTEX(BTR_DEFRAGMENT_MUTEX, SYNC_NO_ORDER_CHECK,
+ PFS_NOT_INSTRUMENTED);
+ LATCH_ADD_MUTEX(MTFLUSH_THREAD_MUTEX, SYNC_NO_ORDER_CHECK,
+ PFS_NOT_INSTRUMENTED);
+ LATCH_ADD_MUTEX(MTFLUSH_MUTEX, SYNC_NO_ORDER_CHECK,
+ PFS_NOT_INSTRUMENTED);
+ LATCH_ADD_MUTEX(FIL_CRYPT_MUTEX, SYNC_NO_ORDER_CHECK,
+ PFS_NOT_INSTRUMENTED);
+ LATCH_ADD_MUTEX(FIL_CRYPT_STAT_MUTEX, SYNC_NO_ORDER_CHECK,
+ PFS_NOT_INSTRUMENTED);
+ LATCH_ADD_MUTEX(FIL_CRYPT_DATA_MUTEX, SYNC_NO_ORDER_CHECK,
+ PFS_NOT_INSTRUMENTED);
+ LATCH_ADD_MUTEX(FIL_CRYPT_THREADS_MUTEX, SYNC_NO_ORDER_CHECK,
+ PFS_NOT_INSTRUMENTED);
+
+ latch_id_t id = LATCH_ID_NONE;
+
+ /* The array should be ordered on latch ID.We need to
+ index directly into it from the mutex policy to update
+ the counters and access the meta-data. */
+
+ for (LatchMetaData::iterator it = latch_meta.begin();
+ it != latch_meta.end();
+ ++it) {
+
+ const latch_meta_t* meta = *it;
+
+
+ /* Skip blank entries */
+ if (meta == NULL || meta->get_id() == LATCH_ID_NONE) {
+ continue;
+ }
+
+ ut_a(id < meta->get_id());
+
+ id = meta->get_id();
+ }
+}
+
+/** Destroy the latch meta data */
+static
+void
+sync_latch_meta_destroy()
+{
+ for (LatchMetaData::iterator it = latch_meta.begin();
+ it != latch_meta.end();
+ ++it) {
+
+ UT_DELETE(*it);
+ }
+
+ latch_meta.clear();
+}
+
+/** Track mutex file creation name and line number. This is to avoid storing
+{ const char* name; uint16_t line; } in every instance. This results in the
+sizeof(Mutex) > 64. We use a lookup table to store it separately. Fetching
+the values is very rare, only required for diagnostic purposes. And, we
+don't create/destroy mutexes that frequently. */
+struct CreateTracker {
+
+ /** Constructor */
+ CreateTracker()
+ UNIV_NOTHROW
+ {
+ m_mutex.init();
+ }
+
+ /** Destructor */
+ ~CreateTracker()
+ UNIV_NOTHROW
+ {
+ ut_ad(m_files.empty());
+
+ m_mutex.destroy();
+ }
+
+ /** Register where the latch was created
+ @param[in] ptr Latch instance
+ @param[in] filename Where created
+ @param[in] line Line number in filename */
+ void register_latch(
+ const void* ptr,
+ const char* filename,
+ uint16_t line)
+ UNIV_NOTHROW
+ {
+ m_mutex.enter();
+
+ Files::iterator lb = m_files.lower_bound(ptr);
+
+ ut_ad(lb == m_files.end()
+ || m_files.key_comp()(ptr, lb->first));
+
+ typedef Files::value_type value_type;
+
+ m_files.insert(lb, value_type(ptr, File(filename, line)));
+
+ m_mutex.exit();
+ }
+
+ /** Deregister a latch - when it is destroyed
+ @param[in] ptr Latch instance being destroyed */
+ void deregister_latch(const void* ptr)
+ UNIV_NOTHROW
+ {
+ m_mutex.enter();
+
+ Files::iterator lb = m_files.lower_bound(ptr);
+
+ ut_ad(lb != m_files.end()
+ && !(m_files.key_comp()(ptr, lb->first)));
+
+ m_files.erase(lb);
+
+ m_mutex.exit();
+ }
+
+ /** Get the create string, format is "name:line"
+ @param[in] ptr Latch instance
+ @return the create string or "" if not found */
+ std::string get(const void* ptr)
+ UNIV_NOTHROW
+ {
+ m_mutex.enter();
+
+ std::string created;
+
+ Files::iterator lb = m_files.lower_bound(ptr);
+
+ if (lb != m_files.end()
+ && !(m_files.key_comp()(ptr, lb->first))) {
+
+ std::ostringstream msg;
+
+ msg << lb->second.m_name << ":" << lb->second.m_line;
+
+ created = msg.str();
+ }
+
+ m_mutex.exit();
+
+ return(created);
+ }
+
+private:
+ /** For tracking the filename and line number */
+ struct File {
+
+ /** Constructor */
+ File() UNIV_NOTHROW : m_name(), m_line() { }
+
+ /** Constructor
+ @param[in] name Filename where created
+ @param[in] line Line number where created */
+ File(const char* name, uint16_t line)
+ UNIV_NOTHROW
+ :
+ m_name(sync_basename(name)),
+ m_line(line)
+ {
+ /* No op */
+ }
+
+ /** Filename where created */
+ std::string m_name;
+
+ /** Line number where created */
+ uint16_t m_line;
+ };
+
+ /** Map the mutex instance to where it was created */
+ typedef std::map<
+ const void*,
+ File,
+ std::less<const void*>,
+ ut_allocator<std::pair<const void* const, File> > >
+ Files;
+
+ typedef OSMutex Mutex;
+
+ /** Mutex protecting m_files */
+ Mutex m_mutex;
+
+ /** Track the latch creation */
+ Files m_files;
+};
+
+/** Track latch creation location. For reducing the size of the latches */
+static CreateTracker create_tracker;
+
+/** Register a latch, called when it is created
+@param[in] ptr Latch instance that was created
+@param[in] filename Filename where it was created
+@param[in] line Line number in filename */
+void
+sync_file_created_register(
+ const void* ptr,
+ const char* filename,
+ uint16_t line)
+{
+ create_tracker.register_latch(ptr, filename, line);
+}
+
+/** Deregister a latch, called when it is destroyed
+@param[in] ptr Latch to be destroyed */
+void
+sync_file_created_deregister(const void* ptr)
+{
+ create_tracker.deregister_latch(ptr);
+}
+
+/** Get the string where the file was created. Its format is "name:line"
+@param[in] ptr Latch instance
+@return created information or "" if can't be found */
+std::string
+sync_file_created_get(const void* ptr)
+{
+ return(create_tracker.get(ptr));
+}
+
+/** Initializes the synchronization data structures. */
+void
+sync_check_init()
+{
+ ut_ad(!LatchDebug::s_initialized);
+ ut_d(LatchDebug::s_initialized = true);
+
+ sync_latch_meta_init();
+
+ /* Init the rw-lock & mutex list and create the mutex to protect it. */
+
+ UT_LIST_INIT(rw_lock_list, &rw_lock_t::list);
+
+ mutex_create(LATCH_ID_RW_LOCK_LIST, &rw_lock_list_mutex);
+
+ ut_d(LatchDebug::init());
+
+ sync_array_init(OS_THREAD_MAX_N);
+}
+
+/** Free the InnoDB synchronization data structures. */
+void
+sync_check_close()
+{
+ ut_d(LatchDebug::shutdown());
+
+ mutex_free(&rw_lock_list_mutex);
+
+ sync_array_close();
+
+ sync_latch_meta_destroy();
+}
+
diff --git a/storage/innobase/sync/sync0rw.cc b/storage/innobase/sync/sync0rw.cc
index 46428e266c6..df710a53cf6 100644
--- a/storage/innobase/sync/sync0rw.cc
+++ b/storage/innobase/sync/sync0rw.cc
@@ -1,7 +1,8 @@
/*****************************************************************************
-Copyright (c) 1995, 2013, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2008, Google Inc.
+Copyright (c) 2017, 2019, MariaDB Corporation.
Portions of this file contain modifications contributed and copyrighted by
Google, Inc. Those modifications are gratefully acknowledged and are described
@@ -31,39 +32,58 @@ Created 9/11/1995 Heikki Tuuri
*******************************************************/
#include "sync0rw.h"
-#ifdef UNIV_NONINL
-#include "sync0rw.ic"
-#include "sync0arr.ic"
-#endif
-
-#include "os0thread.h"
-#include "mem0mem.h"
-#include "srv0srv.h"
-#include "os0sync.h" /* for INNODB_RW_LOCKS_USE_ATOMICS */
-#include "ha_prototypes.h"
#include "my_cpu.h"
+#include <my_sys.h>
/*
IMPLEMENTATION OF THE RW_LOCK
=============================
The status of a rw_lock is held in lock_word. The initial value of lock_word is
X_LOCK_DECR. lock_word is decremented by 1 for each s-lock and by X_LOCK_DECR
-for each x-lock. This describes the lock state for each value of lock_word:
-
-lock_word == X_LOCK_DECR: Unlocked.
-0 < lock_word < X_LOCK_DECR: Read locked, no waiting writers.
- (X_LOCK_DECR - lock_word) is the
- number of readers that hold the lock.
-lock_word == 0: Write locked
--X_LOCK_DECR < lock_word < 0: Read locked, with a waiting writer.
- (-lock_word) is the number of readers
- that hold the lock.
-lock_word <= -X_LOCK_DECR: Recursively write locked. lock_word has been
- decremented by X_LOCK_DECR for the first lock
- and the first recursive lock, then by 1 for
- each recursive lock thereafter.
- So the number of locks is:
- (lock_copy == 0) ? 1 : 2 - (lock_copy + X_LOCK_DECR)
+or 1 for each x-lock. This describes the lock state for each value of lock_word:
+
+lock_word == X_LOCK_DECR: Unlocked.
+X_LOCK_HALF_DECR < lock_word < X_LOCK_DECR:
+ S locked, no waiting writers.
+ (X_LOCK_DECR - lock_word) is the number
+ of S locks.
+lock_word == X_LOCK_HALF_DECR: SX locked, no waiting writers.
+0 < lock_word < X_LOCK_HALF_DECR:
+ SX locked AND S locked, no waiting writers.
+ (X_LOCK_HALF_DECR - lock_word) is the number
+ of S locks.
+lock_word == 0: X locked, no waiting writers.
+-X_LOCK_HALF_DECR < lock_word < 0:
+ S locked, with a waiting writer.
+ (-lock_word) is the number of S locks.
+lock_word == -X_LOCK_HALF_DECR: X locked and SX locked, no waiting writers.
+-X_LOCK_DECR < lock_word < -X_LOCK_HALF_DECR:
+ S locked, with a waiting writer
+ which has SX lock.
+ -(lock_word + X_LOCK_HALF_DECR) is the number
+ of S locks.
+lock_word == -X_LOCK_DECR: X locked with recursive X lock (2 X locks).
+-(X_LOCK_DECR + X_LOCK_HALF_DECR) < lock_word < -X_LOCK_DECR:
+ X locked. The number of the X locks is:
+ 2 - (lock_word + X_LOCK_DECR)
+lock_word == -(X_LOCK_DECR + X_LOCK_HALF_DECR):
+ X locked with recursive X lock (2 X locks)
+ and SX locked.
+lock_word < -(X_LOCK_DECR + X_LOCK_HALF_DECR):
+ X locked and SX locked.
+ The number of the X locks is:
+ 2 - (lock_word + X_LOCK_DECR + X_LOCK_HALF_DECR)
+
+ LOCK COMPATIBILITY MATRIX
+
+ | S|SX| X|
+ --+--+--+--+
+ S| +| +| -|
+ --+--+--+--+
+ SX| +| -| -|
+ --+--+--+--+
+ X| -| -| -|
+ --+--+--+--+
The lock_word is always read and updated atomically and consistently, so that
it always represents the state of the lock, and the state of the lock changes
@@ -71,35 +91,17 @@ with a single atomic operation. This lock_word holds all of the information
that a thread needs in order to determine if it is eligible to gain the lock
or if it must spin or sleep. The one exception to this is that writer_thread
must be verified before recursive write locks: to solve this scenario, we make
-writer_thread readable by all threads, but only writeable by the x-lock holder.
+writer_thread readable by all threads, but only writeable by the x-lock or
+sx-lock holder.
The other members of the lock obey the following rules to remain consistent:
-recursive: This and the writer_thread field together control the
- behaviour of recursive x-locking.
- lock->recursive must be FALSE in following states:
- 1) The writer_thread contains garbage i.e.: the
- lock has just been initialized.
- 2) The lock is not x-held and there is no
- x-waiter waiting on WAIT_EX event.
- 3) The lock is x-held or there is an x-waiter
- waiting on WAIT_EX event but the 'pass' value
- is non-zero.
- lock->recursive is TRUE iff:
- 1) The lock is x-held or there is an x-waiter
- waiting on WAIT_EX event and the 'pass' value
- is zero.
- This flag must be set after the writer_thread field
- has been updated with a memory ordering barrier.
- It is unset before the lock_word has been incremented.
-writer_thread: Is used only in recursive x-locking. Can only be safely
- read iff lock->recursive flag is TRUE.
- This field is uninitialized at lock creation time and
- is updated atomically when x-lock is acquired or when
- move_ownership is called. A thread is only allowed to
- set the value of this field to it's thread_id i.e.: a
- thread cannot set writer_thread to some other thread's
- id.
+writer_thread: Is used only in recursive x-locking or sx-locking.
+ This field is 0 at lock creation time and is updated
+ when x-lock is acquired or when move_ownership is called.
+ A thread is only allowed to set the value of this field to
+ it's thread_id i.e.: a thread cannot set writer_thread to
+ some other thread's id.
waiters: May be set to 1 anytime, but to avoid unnecessary wake-up
signals, it should only be set to 1 when there are threads
waiting on event. Must be 1 when a writer starts waiting to
@@ -136,28 +138,13 @@ wait_ex_event: A thread may only wait on the wait_ex_event after it has
Verify lock_word == 0 (waiting thread holds x_lock)
*/
-UNIV_INTERN rw_lock_stats_t rw_lock_stats;
+rw_lock_stats_t rw_lock_stats;
/* The global list of rw-locks */
-UNIV_INTERN rw_lock_list_t rw_lock_list;
-UNIV_INTERN ib_mutex_t rw_lock_list_mutex;
-
-#ifdef UNIV_PFS_MUTEX
-UNIV_INTERN mysql_pfs_key_t rw_lock_list_mutex_key;
-UNIV_INTERN mysql_pfs_key_t rw_lock_mutex_key;
-#endif /* UNIV_PFS_MUTEX */
-
-#ifdef UNIV_SYNC_DEBUG
-/* The global mutex which protects debug info lists of all rw-locks.
-To modify the debug info list of an rw-lock, this mutex has to be
-acquired in addition to the mutex protecting the lock. */
-
-UNIV_INTERN os_fast_mutex_t rw_lock_debug_mutex;
-
-# ifdef UNIV_PFS_MUTEX
-UNIV_INTERN mysql_pfs_key_t rw_lock_debug_mutex_key;
-# endif
+rw_lock_list_t rw_lock_list;
+ib_mutex_t rw_lock_list_mutex;
+#ifdef UNIV_DEBUG
/******************************************************************//**
Creates a debug info struct. */
static
@@ -174,13 +161,13 @@ rw_lock_debug_free(
/******************************************************************//**
Creates a debug info struct.
-@return own: debug info struct */
+@return own: debug info struct */
static
rw_lock_debug_t*
rw_lock_debug_create(void)
/*======================*/
{
- return((rw_lock_debug_t*) mem_alloc(sizeof(rw_lock_debug_t)));
+ return((rw_lock_debug_t*) ut_malloc_nokey(sizeof(rw_lock_debug_t)));
}
/******************************************************************//**
@@ -191,84 +178,70 @@ rw_lock_debug_free(
/*===============*/
rw_lock_debug_t* info)
{
- mem_free(info);
+ ut_free(info);
}
-#endif /* UNIV_SYNC_DEBUG */
+#endif /* UNIV_DEBUG */
/******************************************************************//**
Creates, or rather, initializes an rw-lock object in a specified memory
location (which must be appropriately aligned). The rw-lock is initialized
to the non-locked state. Explicit freeing of the rw-lock with rw_lock_free
is necessary only if the memory block containing it is freed. */
-UNIV_INTERN
void
rw_lock_create_func(
/*================*/
rw_lock_t* lock, /*!< in: pointer to memory */
#ifdef UNIV_DEBUG
-# ifdef UNIV_SYNC_DEBUG
- ulint level, /*!< in: level */
-# endif /* UNIV_SYNC_DEBUG */
+ latch_level_t level, /*!< in: level */
#endif /* UNIV_DEBUG */
- const char* cmutex_name, /*!< in: mutex name */
const char* cfile_name, /*!< in: file name where created */
- ulint cline) /*!< in: file line where created */
+ unsigned cline) /*!< in: file line where created */
{
+#if defined(UNIV_DEBUG) && !defined(UNIV_PFS_RWLOCK)
+ /* It should have been created in pfs_rw_lock_create_func() */
+ new(lock) rw_lock_t();
+#endif /* UNIV_DEBUG */
+
/* If this is the very first time a synchronization object is
created, then the following call initializes the sync system. */
-#ifndef INNODB_RW_LOCKS_USE_ATOMICS
- mutex_create(rw_lock_mutex_key, rw_lock_get_mutex(lock),
- SYNC_NO_ORDER_CHECK);
-
- lock->mutex.cfile_name = cfile_name;
- lock->mutex.cline = cline;
- lock->mutex.lock_name = cmutex_name;
- ut_d(lock->mutex.ib_mutex_type = 1);
-#else /* INNODB_RW_LOCKS_USE_ATOMICS */
-# ifdef UNIV_DEBUG
- UT_NOT_USED(cmutex_name);
-# endif
-#endif /* INNODB_RW_LOCKS_USE_ATOMICS */
-
lock->lock_word = X_LOCK_DECR;
lock->waiters = 0;
- /* We set this value to signify that lock->writer_thread
- contains garbage at initialization and cannot be used for
- recursive x-locking. */
- lock->recursive = FALSE;
- /* Silence Valgrind when UNIV_DEBUG_VALGRIND is not enabled. */
- memset((void*) &lock->writer_thread, 0, sizeof lock->writer_thread);
- UNIV_MEM_INVALID(&lock->writer_thread, sizeof lock->writer_thread);
+ lock->sx_recursive = 0;
+ lock->writer_thread= 0;
-#ifdef UNIV_SYNC_DEBUG
- UT_LIST_INIT(lock->debug_list);
+#ifdef UNIV_DEBUG
+ lock->m_rw_lock = true;
- lock->level = level;
-#endif /* UNIV_SYNC_DEBUG */
+ UT_LIST_INIT(lock->debug_list, &rw_lock_debug_t::list);
+
+ lock->m_id = sync_latch_get_id(sync_latch_get_name(level));
+ ut_a(lock->m_id != LATCH_ID_NONE);
- ut_d(lock->magic_n = RW_LOCK_MAGIC_N);
+ lock->level = level;
+#endif /* UNIV_DEBUG */
lock->cfile_name = cfile_name;
- lock->cline = (unsigned int) cline;
- lock->lock_name = cmutex_name;
+
+ /* This should hold in practice. If it doesn't then we need to
+ split the source file anyway. Or create the locks on lines
+ less than 8192. cline is unsigned:13. */
+ ut_ad(cline <= 8192);
+ lock->cline = cline;
lock->count_os_wait = 0;
- lock->file_name = "not yet reserved";
- lock->line = 0;
lock->last_s_file_name = "not yet reserved";
lock->last_x_file_name = "not yet reserved";
lock->last_s_line = 0;
lock->last_x_line = 0;
- lock->event = os_event_create();
- lock->wait_ex_event = os_event_create();
+ lock->event = os_event_create(0);
+ lock->wait_ex_event = os_event_create(0);
- mutex_enter(&rw_lock_list_mutex);
+ lock->is_block_lock = 0;
- ut_ad(UT_LIST_GET_FIRST(rw_lock_list) == NULL
- || UT_LIST_GET_FIRST(rw_lock_list)->magic_n == RW_LOCK_MAGIC_N);
+ mutex_enter(&rw_lock_list_mutex);
- UT_LIST_ADD_FIRST(list, rw_lock_list, lock);
+ UT_LIST_ADD_FIRST(rw_lock_list, lock);
mutex_exit(&rw_lock_list_mutex);
}
@@ -277,82 +250,30 @@ rw_lock_create_func(
Calling this function is obligatory only if the memory buffer containing
the rw-lock is freed. Removes an rw-lock object from the global list. The
rw-lock is checked to be in the non-locked state. */
-UNIV_INTERN
void
rw_lock_free_func(
/*==============*/
- rw_lock_t* lock) /*!< in: rw-lock */
+ rw_lock_t* lock) /*!< in/out: rw-lock */
{
-#ifndef INNODB_RW_LOCKS_USE_ATOMICS
- ib_mutex_t* mutex;
-#endif /* !INNODB_RW_LOCKS_USE_ATOMICS */
-
- os_rmb;
ut_ad(rw_lock_validate(lock));
ut_a(lock->lock_word == X_LOCK_DECR);
mutex_enter(&rw_lock_list_mutex);
-#ifndef INNODB_RW_LOCKS_USE_ATOMICS
- mutex = rw_lock_get_mutex(lock);
-#endif /* !INNODB_RW_LOCKS_USE_ATOMICS */
-
- os_event_free(lock->event);
-
- os_event_free(lock->wait_ex_event);
+ os_event_destroy(lock->event);
- ut_ad(UT_LIST_GET_PREV(list, lock) == NULL
- || UT_LIST_GET_PREV(list, lock)->magic_n == RW_LOCK_MAGIC_N);
- ut_ad(UT_LIST_GET_NEXT(list, lock) == NULL
- || UT_LIST_GET_NEXT(list, lock)->magic_n == RW_LOCK_MAGIC_N);
+ os_event_destroy(lock->wait_ex_event);
- UT_LIST_REMOVE(list, rw_lock_list, lock);
+ UT_LIST_REMOVE(rw_lock_list, lock);
mutex_exit(&rw_lock_list_mutex);
-
- ut_d(lock->magic_n = 0);
-
-#ifndef INNODB_RW_LOCKS_USE_ATOMICS
- /* We have merely removed the rw_lock from the list, the memory
- has not been freed. Therefore the pointer to mutex is valid. */
- mutex_free(mutex);
-#endif /* !INNODB_RW_LOCKS_USE_ATOMICS */
-}
-
-#ifdef UNIV_DEBUG
-/******************************************************************//**
-Checks that the rw-lock has been initialized and that there are no
-simultaneous shared and exclusive locks.
-@return TRUE */
-UNIV_INTERN
-ibool
-rw_lock_validate(
-/*=============*/
- rw_lock_t* lock) /*!< in: rw-lock */
-{
- ulint waiters;
- lint lock_word;
-
- ut_ad(lock);
-
- waiters = rw_lock_get_waiters(lock);
- lock_word = lock->lock_word;
-
- ut_ad(lock->magic_n == RW_LOCK_MAGIC_N);
- ut_ad(waiters == 0 || waiters == 1);
- ut_ad(lock_word > -(2 * X_LOCK_DECR));
- ut_ad(lock_word <= X_LOCK_DECR);
-
- return(TRUE);
}
-#endif /* UNIV_DEBUG */
/******************************************************************//**
Lock an rw-lock in shared mode for the current thread. If the rw-lock is
locked in exclusive mode, or there is an exclusive lock request waiting,
-the function spins a preset time (controlled by SYNC_SPIN_ROUNDS), waiting
+the function spins a preset time (controlled by srv_n_spin_wait_rounds), waiting
for the lock, before suspending the thread. */
-UNIV_INTERN
void
rw_lock_s_lock_spin(
/*================*/
@@ -360,74 +281,96 @@ rw_lock_s_lock_spin(
ulint pass, /*!< in: pass value; != 0, if the lock
will be passed to another thread to unlock */
const char* file_name, /*!< in: file name where lock requested */
- ulint line) /*!< in: line where requested */
+ unsigned line) /*!< in: line where requested */
{
- ulint index; /* index of the reserved wait cell */
ulint i = 0; /* spin round count */
sync_array_t* sync_arr;
- size_t counter_index;
+ ulint spin_count = 0;
+ uint64_t count_os_wait = 0;
/* We reuse the thread id to index into the counter, cache
it here for efficiency. */
- counter_index = (size_t) os_thread_get_curr_id();
-
ut_ad(rw_lock_validate(lock));
- rw_lock_stats.rw_s_spin_wait_count.add(counter_index, 1);
lock_loop:
/* Spin waiting for the writer field to become free */
- os_rmb;
HMT_low();
- while (i < SYNC_SPIN_ROUNDS && lock->lock_word <= 0) {
- if (srv_spin_wait_delay) {
- ut_delay(ut_rnd_interval(0, srv_spin_wait_delay));
- }
-
+ while (i < srv_n_spin_wait_rounds && lock->lock_word <= 0) {
+ ut_delay(srv_spin_wait_delay);
i++;
- os_rmb;
}
HMT_medium();
- if (i >= SYNC_SPIN_ROUNDS) {
+ if (i >= srv_n_spin_wait_rounds) {
os_thread_yield();
}
+ ++spin_count;
+
/* We try once again to obtain the lock */
- if (TRUE == rw_lock_s_lock_low(lock, pass, file_name, line)) {
- rw_lock_stats.rw_s_spin_round_count.add(counter_index, i);
+ if (rw_lock_s_lock_low(lock, pass, file_name, line)) {
+
+ if (count_os_wait > 0) {
+ lock->count_os_wait +=
+ static_cast<uint32_t>(count_os_wait);
+ rw_lock_stats.rw_s_os_wait_count.add(count_os_wait);
+ }
+
+ rw_lock_stats.rw_s_spin_round_count.add(spin_count);
return; /* Success */
} else {
- if (i < SYNC_SPIN_ROUNDS) {
+ if (i < srv_n_spin_wait_rounds) {
goto lock_loop;
}
- rw_lock_stats.rw_s_spin_round_count.add(counter_index, i);
- sync_arr = sync_array_get_and_reserve_cell(lock,
- RW_LOCK_SHARED,
- file_name,
- line, &index);
+ ++count_os_wait;
+
+ sync_cell_t* cell;
+
+ sync_arr = sync_array_get_and_reserve_cell(
+ lock, RW_LOCK_S, file_name, line, &cell);
/* Set waiters before checking lock_word to ensure wake-up
signal is sent. This may lead to some unnecessary signals. */
- rw_lock_set_waiter_flag(lock);
+ my_atomic_fas32((int32*) &lock->waiters, 1);
+
+ if (rw_lock_s_lock_low(lock, pass, file_name, line)) {
+
+ sync_array_free_cell(sync_arr, cell);
+
+ if (count_os_wait > 0) {
+
+ lock->count_os_wait +=
+ static_cast<uint32_t>(count_os_wait);
+
+ rw_lock_stats.rw_s_os_wait_count.add(
+ count_os_wait);
+ }
+
+ rw_lock_stats.rw_s_spin_round_count.add(spin_count);
- if (TRUE == rw_lock_s_lock_low(lock, pass, file_name, line)) {
- sync_array_free_cell(sync_arr, index);
return; /* Success */
}
- /* these stats may not be accurate */
- lock->count_os_wait++;
- rw_lock_stats.rw_s_os_wait_count.add(counter_index, 1);
-
- sync_array_wait_event(sync_arr, index);
+ /* see comments in trx_commit_low() to
+ before_trx_state_committed_in_memory explaining
+ this care to invoke the following sync check.*/
+#ifndef DBUG_OFF
+#ifdef UNIV_DEBUG
+ if (lock->get_level() != SYNC_DICT_OPERATION) {
+ DEBUG_SYNC_C("rw_s_lock_waiting");
+ }
+#endif
+#endif
+ sync_array_wait_event(sync_arr, cell);
i = 0;
+
goto lock_loop;
}
}
@@ -440,16 +383,15 @@ read was done. The ownership is moved because we want that the current
thread is able to acquire a second x-latch which is stored in an mtr.
This, in turn, is needed to pass the debug checks of index page
operations. */
-UNIV_INTERN
void
rw_lock_x_lock_move_ownership(
/*==========================*/
rw_lock_t* lock) /*!< in: lock which was x-locked in the
buffer read */
{
- ut_ad(rw_lock_is_locked(lock, RW_LOCK_EX));
+ ut_ad(rw_lock_is_locked(lock, RW_LOCK_X));
- rw_lock_set_writer_id_and_recursion_flag(lock, TRUE);
+ lock->writer_thread = os_thread_get_curr_id();
}
/******************************************************************//**
@@ -457,91 +399,90 @@ Function for the next writer to call. Waits for readers to exit.
The caller must have already decremented lock_word by X_LOCK_DECR. */
UNIV_INLINE
void
-rw_lock_x_lock_wait(
-/*================*/
+rw_lock_x_lock_wait_func(
+/*=====================*/
rw_lock_t* lock, /*!< in: pointer to rw-lock */
-#ifdef UNIV_SYNC_DEBUG
+#ifdef UNIV_DEBUG
ulint pass, /*!< in: pass value; != 0, if the lock will
be passed to another thread to unlock */
#endif
+ lint threshold,/*!< in: threshold to wait for */
const char* file_name,/*!< in: file name where lock requested */
- ulint line) /*!< in: line where requested */
+ unsigned line) /*!< in: line where requested */
{
- ulint index;
ulint i = 0;
+ ulint n_spins = 0;
sync_array_t* sync_arr;
- size_t counter_index;
+ uint64_t count_os_wait = 0;
- /* We reuse the thread id to index into the counter, cache
- it here for efficiency. */
+ ut_ad(lock->lock_word <= threshold);
- counter_index = (size_t) os_thread_get_curr_id();
-
- os_rmb;
- ut_ad(lock->lock_word <= 0);
+ HMT_low();
+ while (lock->lock_word < threshold) {
+ ut_delay(srv_spin_wait_delay);
- HMT_low();
- while (lock->lock_word < 0) {
- if (srv_spin_wait_delay) {
- ut_delay(ut_rnd_interval(0, srv_spin_wait_delay));
- }
- if(i < SYNC_SPIN_ROUNDS) {
+ if (i < srv_n_spin_wait_rounds) {
i++;
- os_rmb;
continue;
}
HMT_medium();
/* If there is still a reader, then go to sleep.*/
- rw_lock_stats.rw_x_spin_round_count.add(counter_index, i);
+ ++n_spins;
+
+ sync_cell_t* cell;
- sync_arr = sync_array_get_and_reserve_cell(lock,
- RW_LOCK_WAIT_EX,
- file_name,
- line, &index);
+ sync_arr = sync_array_get_and_reserve_cell(
+ lock, RW_LOCK_X_WAIT, file_name, line, &cell);
i = 0;
/* Check lock_word to ensure wake-up isn't missed.*/
- if (lock->lock_word < 0) {
+ if (lock->lock_word < threshold) {
- /* these stats may not be accurate */
- lock->count_os_wait++;
- rw_lock_stats.rw_x_os_wait_count.add(counter_index, 1);
+ ++count_os_wait;
/* Add debug info as it is needed to detect possible
deadlock. We must add info for WAIT_EX thread for
deadlock detection to work properly. */
-#ifdef UNIV_SYNC_DEBUG
- rw_lock_add_debug_info(lock, pass, RW_LOCK_WAIT_EX,
- file_name, line);
-#endif
+ ut_d(rw_lock_add_debug_info(
+ lock, pass, RW_LOCK_X_WAIT,
+ file_name, line));
- if (srv_instrument_semaphores) {
- lock->thread_id = os_thread_get_curr_id();
- lock->file_name = file_name;
- lock->line = line;
- }
+ sync_array_wait_event(sync_arr, cell);
+
+ ut_d(rw_lock_remove_debug_info(
+ lock, pass, RW_LOCK_X_WAIT));
- sync_array_wait_event(sync_arr, index);
-#ifdef UNIV_SYNC_DEBUG
- rw_lock_remove_debug_info(
- lock, pass, RW_LOCK_WAIT_EX);
-#endif
/* It is possible to wake when lock_word < 0.
We must pass the while-loop check to proceed.*/
+
} else {
- sync_array_free_cell(sync_arr, index);
+ sync_array_free_cell(sync_arr, cell);
+ break;
}
HMT_low();
}
HMT_medium();
- rw_lock_stats.rw_x_spin_round_count.add(counter_index, i);
+ rw_lock_stats.rw_x_spin_round_count.add(n_spins);
+
+ if (count_os_wait > 0) {
+ lock->count_os_wait += static_cast<uint32_t>(count_os_wait);
+ rw_lock_stats.rw_x_os_wait_count.add(count_os_wait);
+ }
}
+#ifdef UNIV_DEBUG
+# define rw_lock_x_lock_wait(L, P, T, F, O) \
+ rw_lock_x_lock_wait_func(L, P, T, F, O)
+#else
+# define rw_lock_x_lock_wait(L, P, T, F, O) \
+ rw_lock_x_lock_wait_func(L, T, F, O)
+#endif /* UNIV_DBEUG */
+
/******************************************************************//**
Low-level function for acquiring an exclusive lock.
-@return FALSE if did not succeed, TRUE if success. */
+@return FALSE if did not succeed, TRUE if success. */
UNIV_INLINE
ibool
rw_lock_x_lock_low(
@@ -550,43 +491,53 @@ rw_lock_x_lock_low(
ulint pass, /*!< in: pass value; != 0, if the lock will
be passed to another thread to unlock */
const char* file_name,/*!< in: file name where lock requested */
- ulint line) /*!< in: line where requested */
+ unsigned line) /*!< in: line where requested */
{
- ibool local_recursive= lock->recursive;
-
- if (rw_lock_lock_word_decr(lock, X_LOCK_DECR)) {
+ if (rw_lock_lock_word_decr(lock, X_LOCK_DECR, X_LOCK_HALF_DECR)) {
- /* lock->recursive also tells us if the writer_thread
- field is stale or active. As we are going to write
- our own thread id in that field it must be that the
- current writer_thread value is not active. */
- ut_a(!lock->recursive);
+ /* As we are going to write our own thread id in that field it
+ must be that the current writer_thread value is not active. */
+ ut_a(!lock->writer_thread);
/* Decrement occurred: we are writer or next-writer. */
- rw_lock_set_writer_id_and_recursion_flag(
- lock, pass ? FALSE : TRUE);
+ if (!pass)
+ {
+ lock->writer_thread = os_thread_get_curr_id();
+ }
- rw_lock_x_lock_wait(lock,
-#ifdef UNIV_SYNC_DEBUG
- pass,
-#endif
- file_name, line);
+ rw_lock_x_lock_wait(lock, pass, 0, file_name, line);
} else {
os_thread_id_t thread_id = os_thread_get_curr_id();
- /* Decrement failed: relock or failed lock
- Note: recursive must be loaded before writer_thread see
- comment for rw_lock_set_writer_id_and_recursion_flag().
- To achieve this we load it before rw_lock_lock_word_decr(),
- which implies full memory barrier in current implementation. */
- if (!pass && local_recursive
- && os_thread_eq(lock->writer_thread, thread_id)) {
- /* Relock */
- if (lock->lock_word == 0) {
- lock->lock_word -= X_LOCK_DECR;
+ /* Decrement failed: An X or SX lock is held by either
+ this thread or another. Try to relock. */
+ if (!pass && os_thread_eq(lock->writer_thread, thread_id)) {
+ /* Other s-locks can be allowed. If it is request x
+ recursively while holding sx lock, this x lock should
+ be along with the latching-order. */
+
+ /* The existing X or SX lock is from this thread */
+ if (rw_lock_lock_word_decr(lock, X_LOCK_DECR, 0)) {
+ /* There is at least one SX-lock from this
+ thread, but no X-lock. */
+
+ /* Wait for any the other S-locks to be
+ released. */
+ rw_lock_x_lock_wait(
+ lock, pass, -X_LOCK_HALF_DECR,
+ file_name, line);
+
} else {
- --lock->lock_word;
+ /* At least one X lock by this thread already
+ exists. Add another. */
+ if (lock->lock_word == 0
+ || lock->lock_word == -X_LOCK_HALF_DECR) {
+ lock->lock_word -= X_LOCK_DECR;
+ } else {
+ ut_ad(lock->lock_word <= -X_LOCK_DECR);
+ --lock->lock_word;
+ }
}
} else {
@@ -594,18 +545,85 @@ rw_lock_x_lock_low(
return(FALSE);
}
}
-#ifdef UNIV_SYNC_DEBUG
- rw_lock_add_debug_info(lock, pass, RW_LOCK_EX, file_name, line);
-#endif
- if (srv_instrument_semaphores) {
- lock->thread_id = os_thread_get_curr_id();
- lock->file_name = file_name;
- lock->line = line;
+ ut_d(rw_lock_add_debug_info(lock, pass, RW_LOCK_X, file_name, line));
+
+ lock->last_x_file_name = file_name;
+ lock->last_x_line = line;
+
+ return(TRUE);
+}
+
+/******************************************************************//**
+Low-level function for acquiring an sx lock.
+@return FALSE if did not succeed, TRUE if success. */
+ibool
+rw_lock_sx_lock_low(
+/*================*/
+ rw_lock_t* lock, /*!< in: pointer to rw-lock */
+ ulint pass, /*!< in: pass value; != 0, if the lock will
+ be passed to another thread to unlock */
+ const char* file_name,/*!< in: file name where lock requested */
+ unsigned line) /*!< in: line where requested */
+{
+ if (rw_lock_lock_word_decr(lock, X_LOCK_HALF_DECR, X_LOCK_HALF_DECR)) {
+
+ /* As we are going to write our own thread id in that field it
+ must be that the current writer_thread value is not active. */
+ ut_a(!lock->writer_thread);
+
+ /* Decrement occurred: we are the SX lock owner. */
+ if (!pass)
+ {
+ lock->writer_thread = os_thread_get_curr_id();
+ }
+
+ lock->sx_recursive = 1;
+ } else {
+ os_thread_id_t thread_id = os_thread_get_curr_id();
+
+ /* Decrement failed: It already has an X or SX lock by this
+ thread or another thread. If it is this thread, relock,
+ else fail. */
+ if (!pass && os_thread_eq(lock->writer_thread, thread_id)) {
+ /* This thread owns an X or SX lock */
+ if (lock->sx_recursive++ == 0) {
+ /* This thread is making first SX-lock request
+ and it must be holding at least one X-lock here
+ because:
+
+ * There can't be a WAIT_EX thread because we are
+ the thread which has it's thread_id written in
+ the writer_thread field and we are not waiting.
+
+ * Any other X-lock thread cannot exist because
+ it must update recursive flag only after
+ updating the thread_id. Had there been
+ a concurrent X-locking thread which succeeded
+ in decrementing the lock_word it must have
+ written it's thread_id before setting the
+ recursive flag. As we cleared the if()
+ condition above therefore we must be the only
+ thread working on this lock and it is safe to
+ read and write to the lock_word. */
+
+ ut_ad((lock->lock_word == 0)
+ || ((lock->lock_word <= -X_LOCK_DECR)
+ && (lock->lock_word
+ > -(X_LOCK_DECR
+ + X_LOCK_HALF_DECR))));
+ lock->lock_word -= X_LOCK_HALF_DECR;
+ }
+ } else {
+ /* Another thread locked before us */
+ return(FALSE);
+ }
}
+ ut_d(rw_lock_add_debug_info(lock, pass, RW_LOCK_SX, file_name, line));
+
lock->last_x_file_name = file_name;
- lock->last_x_line = (unsigned int) line;
+ lock->last_x_line = line;
return(TRUE);
}
@@ -614,12 +632,11 @@ rw_lock_x_lock_low(
NOTE! Use the corresponding macro, not directly this function! Lock an
rw-lock in exclusive mode for the current thread. If the rw-lock is locked
in shared or exclusive mode, or there is an exclusive lock request waiting,
-the function spins a preset time (controlled by SYNC_SPIN_ROUNDS), waiting
+the function spins a preset time (controlled by srv_n_spin_wait_rounds), waiting
for the lock before suspending the thread. If the same thread has an x-lock
on the rw-lock, locking succeed, with the following exception: if pass != 0,
only a single x-lock may be taken on the lock. NOTE: If the same thread has
an s-lock, locking does not succeed! */
-UNIV_INTERN
void
rw_lock_x_lock_func(
/*================*/
@@ -627,115 +644,245 @@ rw_lock_x_lock_func(
ulint pass, /*!< in: pass value; != 0, if the lock will
be passed to another thread to unlock */
const char* file_name,/*!< in: file name where lock requested */
- ulint line) /*!< in: line where requested */
+ unsigned line) /*!< in: line where requested */
{
- ulint i; /*!< spin round count */
- ulint index; /*!< index of the reserved wait cell */
+ ulint i = 0;
sync_array_t* sync_arr;
- ibool spinning = FALSE;
- size_t counter_index;
-
- /* We reuse the thread id to index into the counter, cache
- it here for efficiency. */
-
- counter_index = (size_t) os_thread_get_curr_id();
+ ulint spin_count = 0;
+ uint64_t count_os_wait = 0;
ut_ad(rw_lock_validate(lock));
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(!rw_lock_own(lock, RW_LOCK_SHARED));
-#endif /* UNIV_SYNC_DEBUG */
-
- i = 0;
+ ut_ad(!rw_lock_own(lock, RW_LOCK_S));
lock_loop:
if (rw_lock_x_lock_low(lock, pass, file_name, line)) {
- rw_lock_stats.rw_x_spin_round_count.add(counter_index, i);
- return; /* Locking succeeded */
+ if (count_os_wait > 0) {
+ lock->count_os_wait +=
+ static_cast<uint32_t>(count_os_wait);
+ rw_lock_stats.rw_x_os_wait_count.add(count_os_wait);
+ }
- } else {
+ rw_lock_stats.rw_x_spin_round_count.add(spin_count);
- if (!spinning) {
- spinning = TRUE;
+ /* Locking succeeded */
+ return;
- rw_lock_stats.rw_x_spin_wait_count.add(
- counter_index, 1);
- }
+ } else {
/* Spin waiting for the lock_word to become free */
- os_rmb;
HMT_low();
- while (i < SYNC_SPIN_ROUNDS
- && lock->lock_word <= 0) {
- if (srv_spin_wait_delay) {
- ut_delay(ut_rnd_interval(0,
- srv_spin_wait_delay));
- }
+ while (i < srv_n_spin_wait_rounds
+ && lock->lock_word <= X_LOCK_HALF_DECR) {
+ ut_delay(srv_spin_wait_delay);
i++;
- os_rmb;
}
+
HMT_medium();
- if (i >= SYNC_SPIN_ROUNDS) {
+ spin_count += i;
+
+ if (i >= srv_n_spin_wait_rounds) {
+
os_thread_yield();
+
} else {
+
goto lock_loop;
}
}
- rw_lock_stats.rw_x_spin_round_count.add(counter_index, i);
+ sync_cell_t* cell;
- sync_arr = sync_array_get_and_reserve_cell(lock, RW_LOCK_EX,
- file_name, line, &index);
+ sync_arr = sync_array_get_and_reserve_cell(
+ lock, RW_LOCK_X, file_name, line, &cell);
/* Waiters must be set before checking lock_word, to ensure signal
is sent. This could lead to a few unnecessary wake-up signals. */
- rw_lock_set_waiter_flag(lock);
+ my_atomic_fas32((int32*) &lock->waiters, 1);
if (rw_lock_x_lock_low(lock, pass, file_name, line)) {
- sync_array_free_cell(sync_arr, index);
- return; /* Locking succeeded */
+ sync_array_free_cell(sync_arr, cell);
+
+ if (count_os_wait > 0) {
+ lock->count_os_wait +=
+ static_cast<uint32_t>(count_os_wait);
+ rw_lock_stats.rw_x_os_wait_count.add(count_os_wait);
+ }
+
+ rw_lock_stats.rw_x_spin_round_count.add(spin_count);
+
+ /* Locking succeeded */
+ return;
}
- /* these stats may not be accurate */
- lock->count_os_wait++;
- rw_lock_stats.rw_x_os_wait_count.add(counter_index, 1);
+ ++count_os_wait;
- sync_array_wait_event(sync_arr, index);
+ sync_array_wait_event(sync_arr, cell);
i = 0;
+
goto lock_loop;
}
-#ifdef UNIV_SYNC_DEBUG
/******************************************************************//**
-Acquires the debug mutex. We cannot use the mutex defined in sync0sync,
-because the debug mutex is also acquired in sync0arr while holding the OS
-mutex protecting the sync array, and the ordinary mutex_enter might
-recursively call routines in sync0arr, leading to a deadlock on the OS
-mutex. */
-UNIV_INTERN
+NOTE! Use the corresponding macro, not directly this function! Lock an
+rw-lock in SX mode for the current thread. If the rw-lock is locked
+in exclusive mode, or there is an exclusive lock request waiting,
+the function spins a preset time (controlled by SYNC_SPIN_ROUNDS), waiting
+for the lock, before suspending the thread. If the same thread has an x-lock
+on the rw-lock, locking succeed, with the following exception: if pass != 0,
+only a single sx-lock may be taken on the lock. NOTE: If the same thread has
+an s-lock, locking does not succeed! */
void
-rw_lock_debug_mutex_enter(void)
-/*===========================*/
+rw_lock_sx_lock_func(
+/*=================*/
+ rw_lock_t* lock, /*!< in: pointer to rw-lock */
+ ulint pass, /*!< in: pass value; != 0, if the lock will
+ be passed to another thread to unlock */
+ const char* file_name,/*!< in: file name where lock requested */
+ unsigned line) /*!< in: line where requested */
+
+{
+ ulint i = 0;
+ sync_array_t* sync_arr;
+ ulint spin_count = 0;
+ uint64_t count_os_wait = 0;
+ ulint spin_wait_count = 0;
+
+ ut_ad(rw_lock_validate(lock));
+ ut_ad(!rw_lock_own(lock, RW_LOCK_S));
+
+lock_loop:
+
+ if (rw_lock_sx_lock_low(lock, pass, file_name, line)) {
+
+ if (count_os_wait > 0) {
+ lock->count_os_wait +=
+ static_cast<uint32_t>(count_os_wait);
+ rw_lock_stats.rw_sx_os_wait_count.add(count_os_wait);
+ }
+
+ rw_lock_stats.rw_sx_spin_round_count.add(spin_count);
+ rw_lock_stats.rw_sx_spin_wait_count.add(spin_wait_count);
+
+ /* Locking succeeded */
+ return;
+
+ } else {
+
+ ++spin_wait_count;
+
+ /* Spin waiting for the lock_word to become free */
+ while (i < srv_n_spin_wait_rounds
+ && lock->lock_word <= X_LOCK_HALF_DECR) {
+
+ ut_delay(srv_spin_wait_delay);
+ i++;
+ }
+
+ spin_count += i;
+
+ if (i >= srv_n_spin_wait_rounds) {
+
+ os_thread_yield();
+
+ } else {
+
+ goto lock_loop;
+ }
+ }
+
+ sync_cell_t* cell;
+
+ sync_arr = sync_array_get_and_reserve_cell(
+ lock, RW_LOCK_SX, file_name, line, &cell);
+
+ /* Waiters must be set before checking lock_word, to ensure signal
+ is sent. This could lead to a few unnecessary wake-up signals. */
+ my_atomic_fas32((int32*) &lock->waiters, 1);
+
+ if (rw_lock_sx_lock_low(lock, pass, file_name, line)) {
+
+ sync_array_free_cell(sync_arr, cell);
+
+ if (count_os_wait > 0) {
+ lock->count_os_wait +=
+ static_cast<uint32_t>(count_os_wait);
+ rw_lock_stats.rw_sx_os_wait_count.add(count_os_wait);
+ }
+
+ rw_lock_stats.rw_sx_spin_round_count.add(spin_count);
+ rw_lock_stats.rw_sx_spin_wait_count.add(spin_wait_count);
+
+ /* Locking succeeded */
+ return;
+ }
+
+ ++count_os_wait;
+
+ sync_array_wait_event(sync_arr, cell);
+
+ i = 0;
+
+ goto lock_loop;
+}
+
+#ifdef UNIV_DEBUG
+
+/******************************************************************//**
+Checks that the rw-lock has been initialized and that there are no
+simultaneous shared and exclusive locks.
+@return true */
+bool
+rw_lock_validate(
+/*=============*/
+ const rw_lock_t* lock) /*!< in: rw-lock */
{
- os_fast_mutex_lock(&rw_lock_debug_mutex);
+ lint lock_word;
+
+ ut_ad(lock);
+
+ lock_word = lock->lock_word;
+
+ ut_ad(lock->waiters < 2);
+ ut_ad(lock_word > -(2 * X_LOCK_DECR));
+ ut_ad(lock_word <= X_LOCK_DECR);
+
+ return(true);
}
/******************************************************************//**
-Releases the debug mutex. */
-UNIV_INTERN
-void
-rw_lock_debug_mutex_exit(void)
-/*==========================*/
+Checks if somebody has locked the rw-lock in the specified mode.
+@return true if locked */
+bool
+rw_lock_is_locked(
+/*==============*/
+ rw_lock_t* lock, /*!< in: rw-lock */
+ ulint lock_type) /*!< in: lock type: RW_LOCK_S,
+ RW_LOCK_X or RW_LOCK_SX */
{
- os_fast_mutex_unlock(&rw_lock_debug_mutex);
+ ut_ad(rw_lock_validate(lock));
+
+ switch (lock_type) {
+ case RW_LOCK_S:
+ return(rw_lock_get_reader_count(lock) > 0);
+
+ case RW_LOCK_X:
+ return(rw_lock_get_writer(lock) == RW_LOCK_X);
+
+ case RW_LOCK_SX:
+ return(rw_lock_get_sx_lock_count(lock) > 0);
+
+ default:
+ ut_error;
+ }
+ return(false); /* avoid compiler warnings */
}
/******************************************************************//**
Inserts the debug information for an rw-lock. */
-UNIV_INTERN
void
rw_lock_add_debug_info(
/*===================*/
@@ -743,37 +890,45 @@ rw_lock_add_debug_info(
ulint pass, /*!< in: pass value */
ulint lock_type, /*!< in: lock type */
const char* file_name, /*!< in: file where requested */
- ulint line) /*!< in: line where requested */
+ unsigned line) /*!< in: line where requested */
{
- rw_lock_debug_t* info;
+ ut_ad(file_name != NULL);
- ut_ad(lock);
- ut_ad(file_name);
-
- info = rw_lock_debug_create();
+ rw_lock_debug_t* info = rw_lock_debug_create();
rw_lock_debug_mutex_enter();
- info->file_name = file_name;
+ info->pass = pass;
info->line = line;
info->lock_type = lock_type;
+ info->file_name = file_name;
info->thread_id = os_thread_get_curr_id();
- info->pass = pass;
- UT_LIST_ADD_FIRST(list, lock->debug_list, info);
+ UT_LIST_ADD_FIRST(lock->debug_list, info);
rw_lock_debug_mutex_exit();
- if ((pass == 0) && (lock_type != RW_LOCK_WAIT_EX)) {
- sync_thread_add_level(lock, lock->level,
- lock_type == RW_LOCK_EX
- && lock->lock_word < 0);
+ if (pass == 0 && lock_type != RW_LOCK_X_WAIT) {
+
+ /* Recursive x while holding SX
+ (lock_type == RW_LOCK_X && lock_word == -X_LOCK_HALF_DECR)
+ is treated as not-relock (new lock). */
+
+ if ((lock_type == RW_LOCK_X
+ && lock->lock_word < -X_LOCK_HALF_DECR)
+ || (lock_type == RW_LOCK_SX
+ && (lock->lock_word < 0 || lock->sx_recursive == 1))) {
+
+ sync_check_lock_validate(lock);
+ sync_check_lock_granted(lock);
+ } else {
+ sync_check_relock(lock);
+ }
}
}
/******************************************************************//**
Removes a debug information struct for an rw-lock. */
-UNIV_INTERN
void
rw_lock_remove_debug_info(
/*======================*/
@@ -785,122 +940,147 @@ rw_lock_remove_debug_info(
ut_ad(lock);
- if ((pass == 0) && (lock_type != RW_LOCK_WAIT_EX)) {
- sync_thread_reset_level(lock);
+ if (pass == 0 && lock_type != RW_LOCK_X_WAIT) {
+ sync_check_unlock(lock);
}
rw_lock_debug_mutex_enter();
- info = UT_LIST_GET_FIRST(lock->debug_list);
+ for (info = UT_LIST_GET_FIRST(lock->debug_list);
+ info != 0;
+ info = UT_LIST_GET_NEXT(list, info)) {
- while (info != NULL) {
- if ((pass == info->pass)
- && ((pass != 0)
+ if (pass == info->pass
+ && (pass != 0
|| os_thread_eq(info->thread_id,
os_thread_get_curr_id()))
- && (info->lock_type == lock_type)) {
+ && info->lock_type == lock_type) {
/* Found! */
- UT_LIST_REMOVE(list, lock->debug_list, info);
+ UT_LIST_REMOVE(lock->debug_list, info);
+
rw_lock_debug_mutex_exit();
rw_lock_debug_free(info);
return;
}
-
- info = UT_LIST_GET_NEXT(list, info);
}
ut_error;
}
-#endif /* UNIV_SYNC_DEBUG */
-#ifdef UNIV_SYNC_DEBUG
/******************************************************************//**
Checks if the thread has locked the rw-lock in the specified mode, with
the pass value == 0.
-@return TRUE if locked */
-UNIV_INTERN
+@return TRUE if locked */
ibool
rw_lock_own(
/*========*/
rw_lock_t* lock, /*!< in: rw-lock */
- ulint lock_type) /*!< in: lock type: RW_LOCK_SHARED,
- RW_LOCK_EX */
+ ulint lock_type) /*!< in: lock type: RW_LOCK_S,
+ RW_LOCK_X */
{
- rw_lock_debug_t* info;
-
ut_ad(lock);
ut_ad(rw_lock_validate(lock));
- rw_lock_debug_mutex_enter();
+ const os_thread_id_t thread_id = os_thread_get_curr_id();
+
+ if (!os_thread_eq(lock->writer_thread, thread_id)) {
+ } else if (lock_type == RW_LOCK_X && rw_lock_get_x_lock_count(lock)) {
+ return TRUE;
+ } else if (lock_type == RW_LOCK_SX && rw_lock_get_sx_lock_count(lock)) {
+ return TRUE;
+ }
- info = UT_LIST_GET_FIRST(lock->debug_list);
+ rw_lock_debug_mutex_enter();
- while (info != NULL) {
+ for (const rw_lock_debug_t* info = UT_LIST_GET_FIRST(lock->debug_list);
+ info != NULL;
+ info = UT_LIST_GET_NEXT(list, info)) {
- if (os_thread_eq(info->thread_id, os_thread_get_curr_id())
- && (info->pass == 0)
- && (info->lock_type == lock_type)) {
+ if (os_thread_eq(info->thread_id, thread_id)
+ && info->pass == 0
+ && info->lock_type == lock_type) {
rw_lock_debug_mutex_exit();
/* Found! */
return(TRUE);
}
-
- info = UT_LIST_GET_NEXT(list, info);
}
rw_lock_debug_mutex_exit();
return(FALSE);
}
-#endif /* UNIV_SYNC_DEBUG */
-/******************************************************************//**
-Checks if somebody has locked the rw-lock in the specified mode.
-@return TRUE if locked */
-UNIV_INTERN
-ibool
-rw_lock_is_locked(
-/*==============*/
- rw_lock_t* lock, /*!< in: rw-lock */
- ulint lock_type) /*!< in: lock type: RW_LOCK_SHARED,
- RW_LOCK_EX */
+/** Checks if the thread has locked the rw-lock in the specified mode, with
+the pass value == 0.
+@param[in] lock rw-lock
+@param[in] flags specify lock types with OR of the
+ rw_lock_flag_t values
+@return true if locked */
+bool rw_lock_own_flagged(const rw_lock_t* lock, rw_lock_flags_t flags)
{
- ibool ret = FALSE;
-
- ut_ad(lock);
ut_ad(rw_lock_validate(lock));
- if (lock_type == RW_LOCK_SHARED) {
- if (rw_lock_get_reader_count(lock) > 0) {
- ret = TRUE;
+ const os_thread_id_t thread_id = os_thread_get_curr_id();
+
+ if (!os_thread_eq(lock->writer_thread, thread_id)) {
+ } else if ((flags & RW_LOCK_FLAG_X)
+ && rw_lock_get_x_lock_count(lock)) {
+ return true;
+ } else if ((flags & RW_LOCK_FLAG_SX)
+ && rw_lock_get_sx_lock_count(lock)) {
+ return true;
+ }
+
+ rw_lock_debug_mutex_enter();
+
+ for (rw_lock_debug_t* info = UT_LIST_GET_FIRST(lock->debug_list);
+ info != NULL;
+ info = UT_LIST_GET_NEXT(list, info)) {
+ if (!os_thread_eq(info->thread_id, thread_id)
+ || info->pass) {
+ continue;
}
- } else if (lock_type == RW_LOCK_EX) {
- if (rw_lock_get_writer(lock) == RW_LOCK_EX) {
- ret = TRUE;
+
+ switch (info->lock_type) {
+ case RW_LOCK_S:
+ if (!(flags & RW_LOCK_FLAG_S)) {
+ continue;
+ }
+ break;
+
+ case RW_LOCK_X:
+ if (!(flags & RW_LOCK_FLAG_X)) {
+ continue;
+ }
+ break;
+
+ case RW_LOCK_SX:
+ if (!(flags & RW_LOCK_FLAG_SX)) {
+ continue;
+ }
+ break;
}
- } else {
- ut_error;
+
+ rw_lock_debug_mutex_exit();
+ return true;
}
- return(ret);
+ rw_lock_debug_mutex_exit();
+ return false;
}
-#ifdef UNIV_SYNC_DEBUG
/***************************************************************//**
Prints debug info of currently locked rw-locks. */
-UNIV_INTERN
void
rw_lock_list_print_info(
/*====================*/
FILE* file) /*!< in: file where to print */
{
- rw_lock_t* lock;
- ulint count = 0;
- rw_lock_debug_t* info;
+ ulint count = 0;
mutex_enter(&rw_lock_list_mutex);
@@ -908,143 +1088,128 @@ rw_lock_list_print_info(
"RW-LATCH INFO\n"
"-------------\n", file);
- lock = UT_LIST_GET_FIRST(rw_lock_list);
-
- while (lock != NULL) {
+ for (const rw_lock_t* lock = UT_LIST_GET_FIRST(rw_lock_list);
+ lock != NULL;
+ lock = UT_LIST_GET_NEXT(list, lock)) {
count++;
-#ifndef INNODB_RW_LOCKS_USE_ATOMICS
- mutex_enter(&(lock->mutex));
-#endif
if (lock->lock_word != X_LOCK_DECR) {
fprintf(file, "RW-LOCK: %p ", (void*) lock);
- if (rw_lock_get_waiters(lock)) {
+ if (lock->waiters) {
fputs(" Waiters for the lock exist\n", file);
} else {
putc('\n', file);
}
+ rw_lock_debug_t* info;
+
rw_lock_debug_mutex_enter();
- info = UT_LIST_GET_FIRST(lock->debug_list);
- while (info != NULL) {
+
+ for (info = UT_LIST_GET_FIRST(lock->debug_list);
+ info != NULL;
+ info = UT_LIST_GET_NEXT(list, info)) {
+
rw_lock_debug_print(file, info);
- info = UT_LIST_GET_NEXT(list, info);
}
+
rw_lock_debug_mutex_exit();
}
-#ifndef INNODB_RW_LOCKS_USE_ATOMICS
- mutex_exit(&(lock->mutex));
-#endif
-
- lock = UT_LIST_GET_NEXT(list, lock);
}
- fprintf(file, "Total number of rw-locks %ld\n", count);
+ fprintf(file, "Total number of rw-locks " ULINTPF "\n", count);
mutex_exit(&rw_lock_list_mutex);
}
-/***************************************************************//**
-Prints debug info of an rw-lock. */
-UNIV_INTERN
-void
-rw_lock_print(
-/*==========*/
- rw_lock_t* lock) /*!< in: rw-lock */
-{
- rw_lock_debug_t* info;
-
- fprintf(stderr,
- "-------------\n"
- "RW-LATCH INFO\n"
- "RW-LATCH: %p ", (void*) lock);
-
-#ifndef INNODB_RW_LOCKS_USE_ATOMICS
- /* We used to acquire lock->mutex here, but it would cause a
- recursive call to sync_thread_add_level() if UNIV_SYNC_DEBUG
- is defined. Since this function is only invoked from
- sync_thread_levels_g(), let us choose the smaller evil:
- performing dirty reads instead of causing bogus deadlocks or
- assertion failures. */
-#endif
- if (lock->lock_word != X_LOCK_DECR) {
-
- if (rw_lock_get_waiters(lock)) {
- fputs(" Waiters for the lock exist\n", stderr);
- } else {
- putc('\n', stderr);
- }
-
- rw_lock_debug_mutex_enter();
- info = UT_LIST_GET_FIRST(lock->debug_list);
- while (info != NULL) {
- rw_lock_debug_print(stderr, info);
- info = UT_LIST_GET_NEXT(list, info);
- }
- rw_lock_debug_mutex_exit();
- }
-}
-
/*********************************************************************//**
Prints info of a debug struct. */
-UNIV_INTERN
void
rw_lock_debug_print(
/*================*/
FILE* f, /*!< in: output stream */
- rw_lock_debug_t* info) /*!< in: debug struct */
+ const rw_lock_debug_t* info) /*!< in: debug struct */
{
- ulint rwt;
-
- rwt = info->lock_type;
+ ulint rwt = info->lock_type;
fprintf(f, "Locked: thread %lu file %s line %lu ",
- (ulong) os_thread_pf(info->thread_id), info->file_name,
- (ulong) info->line);
- if (rwt == RW_LOCK_SHARED) {
+ static_cast<ulong>(os_thread_pf(info->thread_id)),
+ sync_basename(info->file_name),
+ static_cast<ulong>(info->line));
+
+ switch (rwt) {
+ case RW_LOCK_S:
fputs("S-LOCK", f);
- } else if (rwt == RW_LOCK_EX) {
+ break;
+ case RW_LOCK_X:
fputs("X-LOCK", f);
- } else if (rwt == RW_LOCK_WAIT_EX) {
+ break;
+ case RW_LOCK_SX:
+ fputs("SX-LOCK", f);
+ break;
+ case RW_LOCK_X_WAIT:
fputs("WAIT X-LOCK", f);
- } else {
+ break;
+ default:
ut_error;
}
+
if (info->pass != 0) {
fprintf(f, " pass value %lu", (ulong) info->pass);
}
- putc('\n', f);
+
+ fprintf(f, "\n");
}
-/***************************************************************//**
-Returns the number of currently locked rw-locks. Works only in the debug
-version.
-@return number of locked rw-locks */
-UNIV_INTERN
-ulint
-rw_lock_n_locked(void)
-/*==================*/
+/** Print where it was locked from
+@return the string representation */
+std::string
+rw_lock_t::locked_from() const
{
- rw_lock_t* lock;
- ulint count = 0;
+ /* Note: For X locks it can be locked form multiple places because
+ the same thread can call X lock recursively. */
- mutex_enter(&rw_lock_list_mutex);
+ std::ostringstream msg;
+ bool written = false;
- lock = UT_LIST_GET_FIRST(rw_lock_list);
+ ut_ad(rw_lock_validate(this));
- while (lock != NULL) {
+ rw_lock_debug_mutex_enter();
- if (lock->lock_word != X_LOCK_DECR) {
- count++;
+ for (rw_lock_debug_t* info = UT_LIST_GET_FIRST(debug_list);
+ info != NULL;
+ info = UT_LIST_GET_NEXT(list, info)) {
+ if (!os_thread_eq(info->thread_id, os_thread_get_curr_id())) {
+ continue;
+ }
+
+ if (written) {
+ msg << ", ";
}
- lock = UT_LIST_GET_NEXT(list, lock);
+ written = true;
+
+ msg << info->file_name << ":" << info->line;
}
- mutex_exit(&rw_lock_list_mutex);
+ rw_lock_debug_mutex_exit();
+
+ return(msg.str());
+}
+
+/** Print the rw-lock information.
+@return the string representation */
+std::string
+rw_lock_t::to_string() const
+{
+ std::ostringstream msg;
- return(count);
+ msg << "RW-LATCH: "
+ << "thread id " << os_thread_pf(os_thread_get_curr_id())
+ << " addr: " << this
+ << " Locked from: " << locked_from().c_str();
+
+ return(msg.str());
}
-#endif /* UNIV_SYNC_DEBUG */
+#endif /* UNIV_DEBUG */
diff --git a/storage/innobase/sync/sync0sync.cc b/storage/innobase/sync/sync0sync.cc
index 2c908494550..0a81f9c00e7 100644
--- a/storage/innobase/sync/sync0sync.cc
+++ b/storage/innobase/sync/sync0sync.cc
@@ -2,7 +2,6 @@
Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2008, Google Inc.
-Copyright (c) 2017, MariaDB Corporation.
Portions of this file contain modifications contributed and copyrighted by
Google, Inc. Those modifications are gratefully acknowledged and are described
@@ -31,1549 +30,240 @@ Mutex, the basic synchronization primitive
Created 9/5/1995 Heikki Tuuri
*******************************************************/
-#include "sync0sync.h"
-#ifdef UNIV_NONINL
-#include "sync0sync.ic"
-#include "sync0arr.ic"
-#endif
-
#include "sync0rw.h"
-#include "buf0buf.h"
-#include "srv0srv.h"
-#include "buf0types.h"
-#include "os0sync.h" /* for HAVE_ATOMIC_BUILTINS */
-#ifdef UNIV_SYNC_DEBUG
-# include "srv0start.h" /* srv_is_being_started */
-#endif /* UNIV_SYNC_DEBUG */
-#include "ha_prototypes.h"
-#include "my_cpu.h"
-
-#include <vector>
-
-/*
- REASONS FOR IMPLEMENTING THE SPIN LOCK MUTEX
- ============================================
-
-Semaphore operations in operating systems are slow: Solaris on a 1993 Sparc
-takes 3 microseconds (us) for a lock-unlock pair and Windows NT on a 1995
-Pentium takes 20 microseconds for a lock-unlock pair. Therefore, we have to
-implement our own efficient spin lock mutex. Future operating systems may
-provide efficient spin locks, but we cannot count on that.
-
-Another reason for implementing a spin lock is that on multiprocessor systems
-it can be more efficient for a processor to run a loop waiting for the
-semaphore to be released than to switch to a different thread. A thread switch
-takes 25 us on both platforms mentioned above. See Gray and Reuter's book
-Transaction processing for background.
-
-How long should the spin loop last before suspending the thread? On a
-uniprocessor, spinning does not help at all, because if the thread owning the
-mutex is not executing, it cannot be released. Spinning actually wastes
-resources.
-
-On a multiprocessor, we do not know if the thread owning the mutex is
-executing or not. Thus it would make sense to spin as long as the operation
-guarded by the mutex would typically last assuming that the thread is
-executing. If the mutex is not released by that time, we may assume that the
-thread owning the mutex is not executing and suspend the waiting thread.
-
-A typical operation (where no i/o involved) guarded by a mutex or a read-write
-lock may last 1 - 20 us on the current Pentium platform. The longest
-operations are the binary searches on an index node.
-
-We conclude that the best choice is to set the spin time at 20 us. Then the
-system should work well on a multiprocessor. On a uniprocessor we have to
-make sure that thread swithches due to mutex collisions are not frequent,
-i.e., they do not happen every 100 us or so, because that wastes too much
-resources. If the thread switches are not frequent, the 20 us wasted in spin
-loop is not too much.
-
-Empirical studies on the effect of spin time should be done for different
-platforms.
-
-
- IMPLEMENTATION OF THE MUTEX
- ===========================
-
-For background, see Curt Schimmel's book on Unix implementation on modern
-architectures. The key points in the implementation are atomicity and
-serialization of memory accesses. The test-and-set instruction (XCHG in
-Pentium) must be atomic. As new processors may have weak memory models, also
-serialization of memory references may be necessary. The successor of Pentium,
-P6, has at least one mode where the memory model is weak. As far as we know,
-in Pentium all memory accesses are serialized in the program order and we do
-not have to worry about the memory model. On other processors there are
-special machine instructions called a fence, memory barrier, or storage
-barrier (STBAR in Sparc), which can be used to serialize the memory accesses
-to happen in program order relative to the fence instruction.
-
-Leslie Lamport has devised a "bakery algorithm" to implement a mutex without
-the atomic test-and-set, but his algorithm should be modified for weak memory
-models. We do not use Lamport's algorithm, because we guess it is slower than
-the atomic test-and-set.
-
-Our mutex implementation works as follows: After that we perform the atomic
-test-and-set instruction on the memory word. If the test returns zero, we
-know we got the lock first. If the test returns not zero, some other thread
-was quicker and got the lock: then we spin in a loop reading the memory word,
-waiting it to become zero. It is wise to just read the word in the loop, not
-perform numerous test-and-set instructions, because they generate memory
-traffic between the cache and the main memory. The read loop can just access
-the cache, saving bus bandwidth.
-
-If we cannot acquire the mutex lock in the specified time, we reserve a cell
-in the wait array, set the waiters byte in the mutex to 1. To avoid a race
-condition, after setting the waiters byte and before suspending the waiting
-thread, we still have to check that the mutex is reserved, because it may
-have happened that the thread which was holding the mutex has just released
-it and did not see the waiters byte set to 1, a case which would lead the
-other thread to an infinite wait.
-
-LEMMA 1: After a thread resets the event of a mutex (or rw_lock), some
-======
-thread will eventually call os_event_set() on that particular event.
-Thus no infinite wait is possible in this case.
-
-Proof: After making the reservation the thread sets the waiters field in the
-mutex to 1. Then it checks that the mutex is still reserved by some thread,
-or it reserves the mutex for itself. In any case, some thread (which may be
-also some earlier thread, not necessarily the one currently holding the mutex)
-will set the waiters field to 0 in mutex_exit, and then call
-os_event_set() with the mutex as an argument.
-Q.E.D.
-
-LEMMA 2: If an os_event_set() call is made after some thread has called
-======
-the os_event_reset() and before it starts wait on that event, the call
-will not be lost to the second thread. This is true even if there is an
-intervening call to os_event_reset() by another thread.
-Thus no infinite wait is possible in this case.
-
-Proof (non-windows platforms): os_event_reset() returns a monotonically
-increasing value of signal_count. This value is increased at every
-call of os_event_set() If thread A has called os_event_reset() followed
-by thread B calling os_event_set() and then some other thread C calling
-os_event_reset(), the is_set flag of the event will be set to FALSE;
-but now if thread A calls os_event_wait_low() with the signal_count
-value returned from the earlier call of os_event_reset(), it will
-return immediately without waiting.
-Q.E.D.
-
-Proof (windows): If there is a writer thread which is forced to wait for
-the lock, it may be able to set the state of rw_lock to RW_LOCK_WAIT_EX
-The design of rw_lock ensures that there is one and only one thread
-that is able to change the state to RW_LOCK_WAIT_EX and this thread is
-guaranteed to acquire the lock after it is released by the current
-holders and before any other waiter gets the lock.
-On windows this thread waits on a separate event i.e.: wait_ex_event.
-Since only one thread can wait on this event there is no chance
-of this event getting reset before the writer starts wait on it.
-Therefore, this thread is guaranteed to catch the os_set_event()
-signalled unconditionally at the release of the lock.
-Q.E.D. */
-
-/* Number of spin waits on mutexes: for performance monitoring */
-
-/** The number of iterations in the mutex_spin_wait() spin loop.
-Intended for performance monitoring. */
-static ib_counter_t<ib_int64_t, IB_N_SLOTS> mutex_spin_round_count;
-/** The number of mutex_spin_wait() calls. Intended for
-performance monitoring. */
-static ib_counter_t<ib_int64_t, IB_N_SLOTS> mutex_spin_wait_count;
-/** The number of OS waits in mutex_spin_wait(). Intended for
-performance monitoring. */
-static ib_counter_t<ib_int64_t, IB_N_SLOTS> mutex_os_wait_count;
-/** The number of mutex_exit() calls. Intended for performance
-monitoring. */
-UNIV_INTERN ib_int64_t mutex_exit_count;
-
-/** This variable is set to TRUE when sync_init is called */
-UNIV_INTERN ibool sync_initialized = FALSE;
-
-#ifdef UNIV_SYNC_DEBUG
-/** An acquired mutex or rw-lock and its level in the latching order */
-struct sync_level_t;
-/** Mutexes or rw-locks held by a thread */
-struct sync_thread_t;
-
-/** The latch levels currently owned by threads are stored in this data
-structure; the size of this array is OS_THREAD_MAX_N */
-
-UNIV_INTERN sync_thread_t* sync_thread_level_arrays;
-
-/** Mutex protecting sync_thread_level_arrays */
-UNIV_INTERN ib_mutex_t sync_thread_mutex;
-
-# ifdef UNIV_PFS_MUTEX
-UNIV_INTERN mysql_pfs_key_t sync_thread_mutex_key;
-# endif /* UNIV_PFS_MUTEX */
-#endif /* UNIV_SYNC_DEBUG */
-
-/** Global list of database mutexes (not OS mutexes) created. */
-UNIV_INTERN ut_list_base_node_t mutex_list;
-
-/** Mutex protecting the mutex_list variable */
-UNIV_INTERN ib_mutex_t mutex_list_mutex;
+#include "sync0sync.h"
#ifdef UNIV_PFS_MUTEX
-UNIV_INTERN mysql_pfs_key_t mutex_list_mutex_key;
+/* Key to register autoinc_mutex with performance schema */
+mysql_pfs_key_t autoinc_mutex_key;
+mysql_pfs_key_t buffer_block_mutex_key;
+mysql_pfs_key_t buf_pool_mutex_key;
+mysql_pfs_key_t buf_pool_zip_mutex_key;
+mysql_pfs_key_t cache_last_read_mutex_key;
+mysql_pfs_key_t dict_foreign_err_mutex_key;
+mysql_pfs_key_t dict_sys_mutex_key;
+mysql_pfs_key_t file_format_max_mutex_key;
+mysql_pfs_key_t fil_system_mutex_key;
+mysql_pfs_key_t flush_list_mutex_key;
+mysql_pfs_key_t fts_bg_threads_mutex_key;
+mysql_pfs_key_t fts_delete_mutex_key;
+mysql_pfs_key_t fts_optimize_mutex_key;
+mysql_pfs_key_t fts_doc_id_mutex_key;
+mysql_pfs_key_t fts_pll_tokenize_mutex_key;
+mysql_pfs_key_t hash_table_mutex_key;
+mysql_pfs_key_t ibuf_bitmap_mutex_key;
+mysql_pfs_key_t ibuf_mutex_key;
+mysql_pfs_key_t ibuf_pessimistic_insert_mutex_key;
+mysql_pfs_key_t log_sys_mutex_key;
+mysql_pfs_key_t log_sys_write_mutex_key;
+mysql_pfs_key_t log_cmdq_mutex_key;
+mysql_pfs_key_t log_flush_order_mutex_key;
+mysql_pfs_key_t mutex_list_mutex_key;
+mysql_pfs_key_t recalc_pool_mutex_key;
+mysql_pfs_key_t page_cleaner_mutex_key;
+mysql_pfs_key_t purge_sys_pq_mutex_key;
+mysql_pfs_key_t recv_sys_mutex_key;
+mysql_pfs_key_t recv_writer_mutex_key;
+mysql_pfs_key_t redo_rseg_mutex_key;
+mysql_pfs_key_t noredo_rseg_mutex_key;
+mysql_pfs_key_t page_zip_stat_per_index_mutex_key;
+# ifdef UNIV_DEBUG
+mysql_pfs_key_t rw_lock_debug_mutex_key;
+# endif /* UNIV_DEBUG */
+mysql_pfs_key_t rtr_active_mutex_key;
+mysql_pfs_key_t rtr_match_mutex_key;
+mysql_pfs_key_t rtr_path_mutex_key;
+mysql_pfs_key_t rtr_ssn_mutex_key;
+mysql_pfs_key_t rw_lock_list_mutex_key;
+mysql_pfs_key_t rw_lock_mutex_key;
+mysql_pfs_key_t srv_innodb_monitor_mutex_key;
+mysql_pfs_key_t srv_misc_tmpfile_mutex_key;
+mysql_pfs_key_t srv_monitor_file_mutex_key;
+mysql_pfs_key_t buf_dblwr_mutex_key;
+mysql_pfs_key_t trx_undo_mutex_key;
+mysql_pfs_key_t trx_mutex_key;
+mysql_pfs_key_t trx_pool_mutex_key;
+mysql_pfs_key_t trx_pool_manager_mutex_key;
+mysql_pfs_key_t lock_mutex_key;
+mysql_pfs_key_t lock_wait_mutex_key;
+mysql_pfs_key_t trx_sys_mutex_key;
+mysql_pfs_key_t srv_sys_mutex_key;
+mysql_pfs_key_t srv_threads_mutex_key;
+mysql_pfs_key_t event_mutex_key;
+mysql_pfs_key_t event_manager_mutex_key;
+mysql_pfs_key_t sync_array_mutex_key;
+mysql_pfs_key_t thread_mutex_key;
+mysql_pfs_key_t zip_pad_mutex_key;
+mysql_pfs_key_t row_drop_list_mutex_key;
#endif /* UNIV_PFS_MUTEX */
-
-#ifdef UNIV_SYNC_DEBUG
-/** Latching order checks start when this is set TRUE */
-UNIV_INTERN ibool sync_order_checks_on = FALSE;
-
-/** Array for tracking sync levels per thread. */
-typedef std::vector<sync_level_t> sync_arr_t;
-
-
-/** Mutexes or rw-locks held by a thread */
-struct sync_thread_t{
- os_thread_id_t id; /*!< OS thread id */
- sync_arr_t* levels; /*!< level array for this thread; if
- this is NULL this slot is unused */
-};
-
-/** An acquired mutex or rw-lock and its level in the latching order */
-struct sync_level_t{
- void* latch; /*!< pointer to a mutex or an
- rw-lock; NULL means that
- the slot is empty */
- ulint level; /*!< level of the latch in the
- latching order. This field is
- overloaded to serve as a node in a
- linked list of free nodes too. When
- latch == NULL then this will contain
- the ordinal value of the next free
- element */
-};
-#endif /* UNIV_SYNC_DEBUG */
-
-/******************************************************************//**
-Creates, or rather, initializes a mutex object in a specified memory
-location (which must be appropriately aligned). The mutex is initialized
-in the reset state. Explicit freeing of the mutex with mutex_free is
-necessary only if the memory block containing it is freed. */
-UNIV_INTERN
+#ifdef UNIV_PFS_RWLOCK
+mysql_pfs_key_t btr_search_latch_key;
+mysql_pfs_key_t buf_block_lock_key;
+# ifdef UNIV_DEBUG
+mysql_pfs_key_t buf_block_debug_latch_key;
+# endif /* UNIV_DEBUG */
+mysql_pfs_key_t checkpoint_lock_key;
+mysql_pfs_key_t dict_operation_lock_key;
+mysql_pfs_key_t dict_table_stats_key;
+mysql_pfs_key_t hash_table_locks_key;
+mysql_pfs_key_t index_tree_rw_lock_key;
+mysql_pfs_key_t index_online_log_key;
+mysql_pfs_key_t fil_space_latch_key;
+mysql_pfs_key_t fts_cache_rw_lock_key;
+mysql_pfs_key_t fts_cache_init_rw_lock_key;
+mysql_pfs_key_t trx_i_s_cache_lock_key;
+mysql_pfs_key_t trx_purge_latch_key;
+#endif /* UNIV_PFS_RWLOCK */
+
+/** For monitoring active mutexes */
+MutexMonitor mutex_monitor;
+
+/**
+Prints wait info of the sync system.
+@param file - where to print */
+static
void
-mutex_create_func(
-/*==============*/
- ib_mutex_t* mutex, /*!< in: pointer to memory */
- const char* cmutex_name, /*!< in: mutex name */
-#ifdef UNIV_DEBUG
-# ifdef UNIV_SYNC_DEBUG
- ulint level, /*!< in: level */
-# endif /* UNIV_SYNC_DEBUG */
-#endif /* UNIV_DEBUG */
- const char* cfile_name, /*!< in: file name where created */
- ulint cline) /*!< in: file line where created */
+sync_print_wait_info(FILE* file)
{
-#if defined(HAVE_ATOMIC_BUILTINS)
- mutex_reset_lock_word(mutex);
-#else
- os_fast_mutex_init(PFS_NOT_INSTRUMENTED, &mutex->os_fast_mutex);
- mutex->lock_word = 0;
-#endif
- mutex->event = os_event_create();
- mutex_set_waiters(mutex, 0);
-#ifdef UNIV_DEBUG
- mutex->magic_n = MUTEX_MAGIC_N;
-#endif /* UNIV_DEBUG */
-
- mutex->line = 0;
- mutex->file_name = "not yet reserved";
-#ifdef UNIV_SYNC_DEBUG
- mutex->level = level;
-#endif /* UNIV_SYNC_DEBUG */
- mutex->cfile_name = cfile_name;
- mutex->cline = cline;
- mutex->count_os_wait = 0;
- mutex->cmutex_name = cmutex_name;
-
- /* Check that lock_word is aligned; this is important on Intel */
- ut_ad(((ulint)(&(mutex->lock_word))) % 4 == 0);
-
- /* NOTE! The very first mutexes are not put to the mutex list */
-
- if (mutex == &mutex_list_mutex
-#ifdef UNIV_SYNC_DEBUG
- || mutex == &sync_thread_mutex
-#endif /* UNIV_SYNC_DEBUG */
- ) {
-
- return;
- }
-
- mutex_enter(&mutex_list_mutex);
-
- ut_ad(UT_LIST_GET_LEN(mutex_list) == 0
- || UT_LIST_GET_FIRST(mutex_list)->magic_n == MUTEX_MAGIC_N);
-
- UT_LIST_ADD_FIRST(list, mutex_list, mutex);
+ fprintf(file,
+ "RW-shared spins " UINT64PF ", rounds " UINT64PF ","
+ " OS waits " UINT64PF "\n"
+ "RW-excl spins " UINT64PF ", rounds " UINT64PF ","
+ " OS waits " UINT64PF "\n"
+ "RW-sx spins " UINT64PF ", rounds " UINT64PF ","
+ " OS waits " UINT64PF "\n",
+ (ib_uint64_t) rw_lock_stats.rw_s_spin_wait_count,
+ (ib_uint64_t) rw_lock_stats.rw_s_spin_round_count,
+ (ib_uint64_t) rw_lock_stats.rw_s_os_wait_count,
+ (ib_uint64_t) rw_lock_stats.rw_x_spin_wait_count,
+ (ib_uint64_t) rw_lock_stats.rw_x_spin_round_count,
+ (ib_uint64_t) rw_lock_stats.rw_x_os_wait_count,
+ (ib_uint64_t) rw_lock_stats.rw_sx_spin_wait_count,
+ (ib_uint64_t) rw_lock_stats.rw_sx_spin_round_count,
+ (ib_uint64_t) rw_lock_stats.rw_sx_os_wait_count);
- mutex_exit(&mutex_list_mutex);
+ fprintf(file,
+ "Spin rounds per wait: %.2f RW-shared,"
+ " %.2f RW-excl, %.2f RW-sx\n",
+ (double) rw_lock_stats.rw_s_spin_round_count /
+ (rw_lock_stats.rw_s_spin_wait_count
+ ? rw_lock_stats.rw_s_spin_wait_count : 1),
+ (double) rw_lock_stats.rw_x_spin_round_count /
+ (rw_lock_stats.rw_x_spin_wait_count
+ ? rw_lock_stats.rw_x_spin_wait_count : 1),
+ (double) rw_lock_stats.rw_sx_spin_round_count /
+ (rw_lock_stats.rw_sx_spin_wait_count
+ ? rw_lock_stats.rw_sx_spin_wait_count : 1));
}
-/******************************************************************//**
-NOTE! Use the corresponding macro mutex_free(), not directly this function!
-Calling this function is obligatory only if the memory buffer containing
-the mutex is freed. Removes a mutex object from the mutex list. The mutex
-is checked to be in the reset state. */
-UNIV_INTERN
+/**
+Prints info of the sync system.
+@param file - where to print */
void
-mutex_free_func(
-/*============*/
- ib_mutex_t* mutex) /*!< in: mutex */
+sync_print(FILE* file)
{
- ut_ad(mutex_validate(mutex));
- ut_a(mutex_get_lock_word(mutex) == 0);
- ut_a(mutex_get_waiters(mutex) == 0);
-
-#ifdef UNIV_MEM_DEBUG
- if (mutex == &mem_hash_mutex) {
- ut_ad(UT_LIST_GET_LEN(mutex_list) == 1);
- ut_ad(UT_LIST_GET_FIRST(mutex_list) == &mem_hash_mutex);
- UT_LIST_REMOVE(list, mutex_list, mutex);
- goto func_exit;
- }
-#endif /* UNIV_MEM_DEBUG */
-
- if (mutex != &mutex_list_mutex
-#ifdef UNIV_SYNC_DEBUG
- && mutex != &sync_thread_mutex
-#endif /* UNIV_SYNC_DEBUG */
- ) {
-
- mutex_enter(&mutex_list_mutex);
-
- ut_ad(!UT_LIST_GET_PREV(list, mutex)
- || UT_LIST_GET_PREV(list, mutex)->magic_n
- == MUTEX_MAGIC_N);
- ut_ad(!UT_LIST_GET_NEXT(list, mutex)
- || UT_LIST_GET_NEXT(list, mutex)->magic_n
- == MUTEX_MAGIC_N);
-
- UT_LIST_REMOVE(list, mutex_list, mutex);
-
- mutex_exit(&mutex_list_mutex);
- }
-
- os_event_free(mutex->event);
-#ifdef UNIV_MEM_DEBUG
-func_exit:
-#endif /* UNIV_MEM_DEBUG */
-#if !defined(HAVE_ATOMIC_BUILTINS)
- os_fast_mutex_free(&(mutex->os_fast_mutex));
-#endif
- /* If we free the mutex protecting the mutex list (freeing is
- not necessary), we have to reset the magic number AFTER removing
- it from the list. */
#ifdef UNIV_DEBUG
- mutex->magic_n = 0;
+ rw_lock_list_print_info(file);
#endif /* UNIV_DEBUG */
- return;
-}
-
-/********************************************************************//**
-NOTE! Use the corresponding macro in the header file, not this function
-directly. Tries to lock the mutex for the current thread. If the lock is not
-acquired immediately, returns with return value 1.
-@return 0 if succeed, 1 if not */
-UNIV_INTERN
-ulint
-mutex_enter_nowait_func(
-/*====================*/
- ib_mutex_t* mutex, /*!< in: pointer to mutex */
- const char* file_name MY_ATTRIBUTE((unused)),
- /*!< in: file name where mutex
- requested */
- ulint line MY_ATTRIBUTE((unused)))
- /*!< in: line where requested */
-{
- ut_ad(mutex_validate(mutex));
- if (!ib_mutex_test_and_set(mutex)) {
-
- mutex->thread_id = os_thread_get_curr_id();
-#ifdef UNIV_SYNC_DEBUG
- mutex_set_debug_info(mutex, file_name, line);
-#else
- if (srv_instrument_semaphores) {
- mutex->file_name = file_name;
- mutex->line = line;
- }
-#endif
- return(0); /* Succeeded! */
- }
+ sync_array_print(file);
- return(1);
+ sync_print_wait_info(file);
}
-#ifdef UNIV_DEBUG
-/******************************************************************//**
-Checks that the mutex has been initialized.
-@return TRUE */
-UNIV_INTERN
-ibool
-mutex_validate(
-/*===========*/
- const ib_mutex_t* mutex) /*!< in: mutex */
+/** Print the filename "basename" e.g., p = "/a/b/c/d/e.cc" -> p = "e.cc"
+@param[in] filename Name from where to extract the basename
+@return the basename */
+const char*
+sync_basename(const char* filename)
{
- ut_a(mutex);
+ const char* ptr = filename + strlen(filename) - 1;
- if (mutex->magic_n != MUTEX_MAGIC_N) {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Mutex %p not initialized file %s line %lu.",
- mutex, mutex->cfile_name, mutex->cline);
+ while (ptr > filename && *ptr != '/' && *ptr != '\\') {
+ --ptr;
}
- ut_ad(mutex->magic_n == MUTEX_MAGIC_N);
- return(TRUE);
-}
+ ++ptr;
-/******************************************************************//**
-Checks that the current thread owns the mutex. Works only in the debug
-version.
-@return TRUE if owns */
-UNIV_INTERN
-ibool
-mutex_own(
-/*======*/
- const ib_mutex_t* mutex) /*!< in: mutex */
-{
- ut_ad(mutex_validate(mutex));
-
- return(mutex_get_lock_word(mutex) == 1
- && os_thread_eq(mutex->thread_id, os_thread_get_curr_id()));
+ return(ptr);
}
-#endif /* UNIV_DEBUG */
-/******************************************************************//**
-Sets the waiters field in a mutex. */
-UNIV_INTERN
-void
-mutex_set_waiters(
-/*==============*/
- ib_mutex_t* mutex, /*!< in: mutex */
- ulint n) /*!< in: value to set */
+/** String representation of the filename and line number where the
+latch was created
+@param[in] id Latch ID
+@param[in] created Filename and line number where it was crated
+@return the string representation */
+std::string
+sync_mutex_to_string(
+ latch_id_t id,
+ const std::string& created)
{
- volatile ulint* ptr; /* declared volatile to ensure that
- the value is stored to memory */
- ut_ad(mutex);
+ std::ostringstream msg;
- ptr = &(mutex->waiters);
+ msg << "Mutex " << sync_latch_get_name(id) << " "
+ << "created " << created;
- *ptr = n; /* Here we assume that the write of a single
- word in memory is atomic */
+ return(msg.str());
}
-/******************************************************************//**
-Reserves a mutex for the current thread. If the mutex is reserved, the
-function spins a preset time (controlled by SYNC_SPIN_ROUNDS), waiting
-for the mutex before suspending the thread. */
-UNIV_INTERN
+/** Enable the mutex monitoring */
void
-mutex_spin_wait(
-/*============*/
- ib_mutex_t* mutex, /*!< in: pointer to mutex */
- const char* file_name, /*!< in: file name where mutex
- requested */
- ulint line) /*!< in: line where requested */
+MutexMonitor::enable()
{
- ulint i; /* spin round count */
- ulint index; /* index of the reserved wait cell */
- sync_array_t* sync_arr;
- size_t counter_index;
-
- counter_index = (size_t) os_thread_get_curr_id();
-
- ut_ad(mutex);
+ /** Note: We don't add any latch meta-data after startup. Therefore
+ there is no need to use a mutex here. */
- /* This update is not thread safe, but we don't mind if the count
- isn't exact. Moved out of ifdef that follows because we are willing
- to sacrifice the cost of counting this as the data is valuable.
- Count the number of calls to mutex_spin_wait. */
- mutex_spin_wait_count.add(counter_index, 1);
+ LatchMetaData::iterator end = latch_meta.end();
-mutex_loop:
+ for (LatchMetaData::iterator it = latch_meta.begin(); it != end; ++it) {
- i = 0;
-
- /* Spin waiting for the lock word to become zero. Note that we do
- not have to assume that the read access to the lock word is atomic,
- as the actual locking is always committed with atomic test-and-set.
- In reality, however, all processors probably have an atomic read of
- a memory word. */
-
-spin_loop:
-
- HMT_low();
- os_rmb;
- while (mutex_get_lock_word(mutex) != 0 && i < SYNC_SPIN_ROUNDS) {
- if (srv_spin_wait_delay) {
- ut_delay(ut_rnd_interval(0, srv_spin_wait_delay));
+ if (*it != NULL) {
+ (*it)->get_counter()->enable();
}
- i++;
}
- HMT_medium();
-
- if (i >= SYNC_SPIN_ROUNDS) {
- os_thread_yield();
- }
-
- mutex_spin_round_count.add(counter_index, i);
-
- if (ib_mutex_test_and_set(mutex) == 0) {
- /* Succeeded! */
-
- mutex->thread_id = os_thread_get_curr_id();
-#ifdef UNIV_SYNC_DEBUG
- mutex_set_debug_info(mutex, file_name, line);
-#endif
- if (srv_instrument_semaphores) {
- mutex->file_name = file_name;
- mutex->line = line;
- }
-
- return;
- }
-
- /* We may end up with a situation where lock_word is 0 but the OS
- fast mutex is still reserved. On FreeBSD the OS does not seem to
- schedule a thread which is constantly calling pthread_mutex_trylock
- (in ib_mutex_test_and_set implementation). Then we could end up
- spinning here indefinitely. The following 'i++' stops this infinite
- spin. */
-
- i++;
-
- if (i < SYNC_SPIN_ROUNDS) {
- goto spin_loop;
- }
-
- sync_arr = sync_array_get_and_reserve_cell(mutex, SYNC_MUTEX,
- file_name, line, &index);
-
- /* The memory order of the array reservation and the change in the
- waiters field is important: when we suspend a thread, we first
- reserve the cell and then set waiters field to 1. When threads are
- released in mutex_exit, the waiters field is first set to zero and
- then the event is set to the signaled state. */
-
- mutex_set_waiters(mutex, 1);
-
- /* Make sure waiters store won't pass over mutex_test_and_set */
-#ifdef __powerpc__
- os_mb;
-#endif
-
- /* Try to reserve still a few times */
- for (i = 0; i < 4; i++) {
- if (ib_mutex_test_and_set(mutex) == 0) {
- /* Succeeded! Free the reserved wait cell */
-
- sync_array_free_cell(sync_arr, index);
-
- mutex->thread_id = os_thread_get_curr_id();
-#ifdef UNIV_SYNC_DEBUG
- mutex_set_debug_info(mutex, file_name, line);
-#endif
- if (srv_instrument_semaphores) {
- mutex->file_name = file_name;
- mutex->line = line;
- }
-
- return;
-
- /* Note that in this case we leave the waiters field
- set to 1. We cannot reset it to zero, as we do not
- know if there are other waiters. */
- }
- }
-
- /* Now we know that there has been some thread holding the mutex
- after the change in the wait array and the waiters field was made.
- Now there is no risk of infinite wait on the event. */
-
- mutex_os_wait_count.add(counter_index, 1);
-
- mutex->count_os_wait++;
-
- sync_array_wait_event(sync_arr, index);
-
- goto mutex_loop;
-}
-
-/******************************************************************//**
-Releases the threads waiting in the primary wait array for this mutex. */
-UNIV_INTERN
-void
-mutex_signal_object(
-/*================*/
- ib_mutex_t* mutex) /*!< in: mutex */
-{
- mutex_set_waiters(mutex, 0);
-
- /* The memory order of resetting the waiters field and
- signaling the object is important. See LEMMA 1 above. */
- os_event_set(mutex->event);
- sync_array_object_signalled();
-}
-
-#ifdef UNIV_SYNC_DEBUG
-/******************************************************************//**
-Sets the debug information for a reserved mutex. */
-UNIV_INTERN
-void
-mutex_set_debug_info(
-/*=================*/
- ib_mutex_t* mutex, /*!< in: mutex */
- const char* file_name, /*!< in: file where requested */
- ulint line) /*!< in: line where requested */
-{
- ut_ad(mutex);
- ut_ad(file_name);
-
- sync_thread_add_level(mutex, mutex->level, FALSE);
-
- mutex->file_name = file_name;
- mutex->line = line;
}
-/******************************************************************//**
-Gets the debug information for a reserved mutex. */
-UNIV_INTERN
+/** Disable the mutex monitoring */
void
-mutex_get_debug_info(
-/*=================*/
- ib_mutex_t* mutex, /*!< in: mutex */
- const char** file_name, /*!< out: file where requested */
- ulint* line, /*!< out: line where requested */
- os_thread_id_t* thread_id) /*!< out: id of the thread which owns
- the mutex */
+MutexMonitor::disable()
{
- ut_ad(mutex);
+ /** Note: We don't add any latch meta-data after startup. Therefore
+ there is no need to use a mutex here. */
- *file_name = mutex->file_name;
- *line = mutex->line;
- *thread_id = mutex->thread_id;
-}
+ LatchMetaData::iterator end = latch_meta.end();
-/******************************************************************//**
-Prints debug info of currently reserved mutexes. */
-static
-void
-mutex_list_print_info(
-/*==================*/
- FILE* file) /*!< in: file where to print */
-{
- ib_mutex_t* mutex;
- const char* file_name;
- ulint line;
- os_thread_id_t thread_id;
- ulint count = 0;
+ for (LatchMetaData::iterator it = latch_meta.begin(); it != end; ++it) {
- fputs("----------\n"
- "MUTEX INFO\n"
- "----------\n", file);
-
- mutex_enter(&mutex_list_mutex);
-
- mutex = UT_LIST_GET_FIRST(mutex_list);
-
- while (mutex != NULL) {
- count++;
-
- if (mutex_get_lock_word(mutex) != 0) {
- mutex_get_debug_info(mutex, &file_name, &line,
- &thread_id);
- fprintf(file,
- "Locked mutex: addr %p thread %ld"
- " file %s line %ld\n",
- (void*) mutex, os_thread_pf(thread_id),
- file_name, line);
+ if (*it != NULL) {
+ (*it)->get_counter()->disable();
}
-
- mutex = UT_LIST_GET_NEXT(list, mutex);
}
-
- fprintf(file, "Total number of mutexes %ld\n", count);
-
- mutex_exit(&mutex_list_mutex);
}
-/******************************************************************//**
-Counts currently reserved mutexes. Works only in the debug version.
-@return number of reserved mutexes */
-UNIV_INTERN
-ulint
-mutex_n_reserved(void)
-/*==================*/
-{
- ib_mutex_t* mutex;
- ulint count = 0;
-
- mutex_enter(&mutex_list_mutex);
-
- for (mutex = UT_LIST_GET_FIRST(mutex_list);
- mutex != NULL;
- mutex = UT_LIST_GET_NEXT(list, mutex)) {
-
- if (mutex_get_lock_word(mutex) != 0) {
-
- count++;
- }
- }
-
- mutex_exit(&mutex_list_mutex);
-
- ut_a(count >= 1);
-
- /* Subtract one, because this function itself was holding
- one mutex (mutex_list_mutex) */
-
- return(count - 1);
-}
-
-/******************************************************************//**
-Returns TRUE if no mutex or rw-lock is currently locked. Works only in
-the debug version.
-@return TRUE if no mutexes and rw-locks reserved */
-UNIV_INTERN
-ibool
-sync_all_freed(void)
-/*================*/
-{
- return(mutex_n_reserved() + rw_lock_n_locked() == 0);
-}
-
-/******************************************************************//**
-Looks for the thread slot for the calling thread.
-@return pointer to thread slot, NULL if not found */
-static
-sync_thread_t*
-sync_thread_level_arrays_find_slot(void)
-/*====================================*/
-
-{
- ulint i;
- os_thread_id_t id;
-
- id = os_thread_get_curr_id();
-
- for (i = 0; i < OS_THREAD_MAX_N; i++) {
- sync_thread_t* slot;
-
- slot = &sync_thread_level_arrays[i];
-
- if (slot->levels && os_thread_eq(slot->id, id)) {
-
- return(slot);
- }
- }
-
- return(NULL);
-}
-
-/******************************************************************//**
-Looks for an unused thread slot.
-@return pointer to thread slot */
-static
-sync_thread_t*
-sync_thread_level_arrays_find_free(void)
-/*====================================*/
-
-{
- ulint i;
-
- for (i = 0; i < OS_THREAD_MAX_N; i++) {
- sync_thread_t* slot;
-
- slot = &sync_thread_level_arrays[i];
-
- if (slot->levels == NULL) {
-
- return(slot);
- }
- }
-
- return(NULL);
-}
-
-/******************************************************************//**
-Print warning. */
-static
+/** Reset the mutex monitoring counters */
void
-sync_print_warning(
-/*===============*/
- const sync_level_t* slot) /*!< in: slot for which to
- print warning */
+MutexMonitor::reset()
{
- ib_mutex_t* mutex;
+ /** Note: We don't add any latch meta-data after startup. Therefore
+ there is no need to use a mutex here. */
- mutex = static_cast<ib_mutex_t*>(slot->latch);
+ LatchMetaData::iterator end = latch_meta.end();
- if (mutex->magic_n == MUTEX_MAGIC_N) {
- fprintf(stderr,
- "Mutex created at %s %lu\n",
- innobase_basename(mutex->cfile_name),
- (ulong) mutex->cline);
+ for (LatchMetaData::iterator it = latch_meta.begin(); it != end; ++it) {
- if (mutex_get_lock_word(mutex) != 0) {
- ulint line;
- const char* file_name;
- os_thread_id_t thread_id;
-
- mutex_get_debug_info(
- mutex, &file_name, &line, &thread_id);
-
- fprintf(stderr,
- "InnoDB: Locked mutex:"
- " addr %p thread %ld file %s line %ld\n",
- (void*) mutex, os_thread_pf(thread_id),
- file_name, (ulong) line);
- } else {
- fputs("Not locked\n", stderr);
+ if (*it != NULL) {
+ (*it)->get_counter()->reset();
}
- } else {
- rw_lock_t* lock;
-
- lock = static_cast<rw_lock_t*>(slot->latch);
-
- rw_lock_print(lock);
}
-}
-
-/******************************************************************//**
-Checks if all the level values stored in the level array are greater than
-the given limit.
-@return TRUE if all greater */
-static
-ibool
-sync_thread_levels_g(
-/*=================*/
- sync_arr_t* arr, /*!< in: pointer to level array for an OS
- thread */
- ulint limit, /*!< in: level limit */
- ulint warn) /*!< in: TRUE=display a diagnostic message */
-{
- ulint i;
-
- for (i = 0; i < arr->size(); i++) {
- const sync_level_t* slot;
- slot = (const sync_level_t*)&(arr->at(i));
+ mutex_enter(&rw_lock_list_mutex);
- if (slot->latch != NULL && slot->level <= limit) {
- if (warn) {
- fprintf(stderr,
- "InnoDB: sync levels should be"
- " > %lu but a level is %lu\n",
- (ulong) limit, (ulong) slot->level);
+ for (rw_lock_t* rw_lock = UT_LIST_GET_FIRST(rw_lock_list);
+ rw_lock != NULL;
+ rw_lock = UT_LIST_GET_NEXT(list, rw_lock)) {
- sync_print_warning(slot);
- }
-
- return(FALSE);
- }
+ rw_lock->count_os_wait = 0;
}
- return(TRUE);
-}
-
-/******************************************************************//**
-Checks if the level value is stored in the level array.
-@return slot if found or NULL */
-static
-const sync_level_t*
-sync_thread_levels_contain(
-/*=======================*/
- sync_arr_t* arr, /*!< in: pointer to level array for an OS
- thread */
- ulint level) /*!< in: level */
-{
- ulint i;
-
- for (i = 0; i < arr->size(); i++) {
- const sync_level_t* slot;
-
- slot = (const sync_level_t*)&(arr->at(i));
-
- if (slot->latch != NULL && slot->level == level) {
-
- return(slot);
- }
- }
-
- return(NULL);
-}
-
-/******************************************************************//**
-Checks if the level array for the current thread contains a
-mutex or rw-latch at the specified level.
-@return a matching latch, or NULL if not found */
-UNIV_INTERN
-void*
-sync_thread_levels_contains(
-/*========================*/
- ulint level) /*!< in: latching order level
- (SYNC_DICT, ...)*/
-{
- ulint i;
- sync_arr_t* arr;
- sync_thread_t* thread_slot;
-
- if (!sync_order_checks_on) {
-
- return(NULL);
- }
-
- mutex_enter(&sync_thread_mutex);
-
- thread_slot = sync_thread_level_arrays_find_slot();
-
- if (thread_slot == NULL) {
-
- mutex_exit(&sync_thread_mutex);
-
- return(NULL);
- }
-
- arr = thread_slot->levels;
-
- for (i = 0; i < arr->size(); i++) {
- sync_level_t* slot;
-
- slot = (sync_level_t*)&(arr->at(i));
-
- if (slot->latch != NULL && slot->level == level) {
-
- mutex_exit(&sync_thread_mutex);
- return(slot->latch);
- }
- }
-
- mutex_exit(&sync_thread_mutex);
-
- return(NULL);
-}
-
-/******************************************************************//**
-Checks that the level array for the current thread is empty.
-@return a latch, or NULL if empty except the exceptions specified below */
-UNIV_INTERN
-void*
-sync_thread_levels_nonempty_gen(
-/*============================*/
- ibool dict_mutex_allowed) /*!< in: TRUE if dictionary mutex is
- allowed to be owned by the thread */
-{
- ulint i;
- sync_arr_t* arr;
- sync_thread_t* thread_slot;
-
- if (!sync_order_checks_on) {
-
- return(NULL);
- }
-
- mutex_enter(&sync_thread_mutex);
-
- thread_slot = sync_thread_level_arrays_find_slot();
-
- if (thread_slot == NULL) {
-
- mutex_exit(&sync_thread_mutex);
-
- return(NULL);
- }
-
- arr = thread_slot->levels;
-
- for (i = 0; i < arr->size(); ++i) {
- const sync_level_t* slot;
-
- slot = (const sync_level_t*)&(arr->at(i));
-
- if (slot->latch != NULL
- && (!dict_mutex_allowed
- || (slot->level != SYNC_DICT
- && slot->level != SYNC_DICT_OPERATION
- && slot->level != SYNC_FTS_CACHE))) {
-
- mutex_exit(&sync_thread_mutex);
- ut_error;
-
- return(slot->latch);
- }
- }
-
- mutex_exit(&sync_thread_mutex);
-
- return(NULL);
-}
-
-/******************************************************************//**
-Checks if the level array for the current thread is empty,
-except for the btr_search_latch.
-@return a latch, or NULL if empty except the exceptions specified below */
-UNIV_INTERN
-void*
-sync_thread_levels_nonempty_trx(
-/*============================*/
- ibool has_search_latch)
- /*!< in: TRUE if and only if the thread
- is supposed to hold btr_search_latch */
-{
- ulint i;
- sync_arr_t* arr;
- sync_thread_t* thread_slot;
-
- if (!sync_order_checks_on) {
-
- return(NULL);
- }
-
- ut_a(!has_search_latch
- || sync_thread_levels_contains(SYNC_SEARCH_SYS));
-
- mutex_enter(&sync_thread_mutex);
-
- thread_slot = sync_thread_level_arrays_find_slot();
-
- if (thread_slot == NULL) {
-
- mutex_exit(&sync_thread_mutex);
-
- return(NULL);
- }
-
- arr = thread_slot->levels;
-
- for (i = 0; i < arr->size(); ++i) {
- const sync_level_t* slot;
-
- slot = (const sync_level_t*)&(arr->at(i));
-
- if (slot->latch != NULL
- && (!has_search_latch
- || slot->level != SYNC_SEARCH_SYS)) {
-
- mutex_exit(&sync_thread_mutex);
- ut_error;
-
- return(slot->latch);
- }
- }
-
- mutex_exit(&sync_thread_mutex);
-
- return(NULL);
-}
-
-/******************************************************************//**
-Adds a latch and its level in the thread level array. Allocates the memory
-for the array if called first time for this OS thread. Makes the checks
-against other latch levels stored in the array for this thread. */
-UNIV_INTERN
-void
-sync_thread_add_level(
-/*==================*/
- void* latch, /*!< in: pointer to a mutex or an rw-lock */
- ulint level, /*!< in: level in the latching order; if
- SYNC_LEVEL_VARYING, nothing is done */
- ibool relock) /*!< in: TRUE if re-entering an x-lock */
-{
- sync_arr_t* array;
- sync_thread_t* thread_slot;
- sync_level_t sync_level;
-
- if (!sync_order_checks_on) {
-
- return;
- }
-
- if ((latch == (void*) &sync_thread_mutex)
- || (latch == (void*) &mutex_list_mutex)
- || (latch == (void*) &rw_lock_debug_mutex)
- || (latch == (void*) &rw_lock_list_mutex)) {
-
- return;
- }
-
- if (level == SYNC_LEVEL_VARYING) {
-
- return;
- }
-
- mutex_enter(&sync_thread_mutex);
-
- thread_slot = sync_thread_level_arrays_find_slot();
-
- if (thread_slot == NULL) {
-
- /* We have to allocate the level array for a new thread */
- array = new sync_arr_t();
- ut_a(array != NULL);
- thread_slot = sync_thread_level_arrays_find_free();
- thread_slot->levels = array;
- thread_slot->id = os_thread_get_curr_id();
- }
-
- array = thread_slot->levels;
-
- if (relock) {
- goto levels_ok;
- }
-
- /* NOTE that there is a problem with _NODE and _LEAF levels: if the
- B-tree height changes, then a leaf can change to an internal node
- or the other way around. We do not know at present if this can cause
- unnecessary assertion failures below. */
-
- switch (level) {
- case SYNC_NO_ORDER_CHECK:
- case SYNC_EXTERN_STORAGE:
- case SYNC_TREE_NODE_FROM_HASH:
- /* Do no order checking */
- break;
- case SYNC_TRX_SYS_HEADER:
- if (srv_is_being_started) {
- /* This is violated during trx_sys_create_rsegs()
- when creating additional rollback segments when
- upgrading in innobase_start_or_create_for_mysql(). */
- break;
- }
- /* fall through */
- case SYNC_MEM_POOL:
- case SYNC_MEM_HASH:
- case SYNC_RECV:
- case SYNC_FTS_BG_THREADS:
- case SYNC_WORK_QUEUE:
- case SYNC_FTS_TOKENIZE:
- case SYNC_FTS_OPTIMIZE:
- case SYNC_FTS_CACHE:
- case SYNC_FTS_CACHE_INIT:
- case SYNC_LOG:
- case SYNC_LOG_FLUSH_ORDER:
- case SYNC_ANY_LATCH:
- case SYNC_FILE_FORMAT_TAG:
- case SYNC_DOUBLEWRITE:
- case SYNC_SEARCH_SYS:
- case SYNC_THREADS:
- case SYNC_LOCK_SYS:
- case SYNC_LOCK_WAIT_SYS:
- case SYNC_TRX_SYS:
- case SYNC_IBUF_BITMAP_MUTEX:
- case SYNC_RSEG:
- case SYNC_TRX_UNDO:
- case SYNC_PURGE_LATCH:
- case SYNC_PURGE_QUEUE:
- case SYNC_DICT_AUTOINC_MUTEX:
- case SYNC_DICT_OPERATION:
- case SYNC_DICT_HEADER:
- case SYNC_TRX_I_S_RWLOCK:
- case SYNC_TRX_I_S_LAST_READ:
- case SYNC_IBUF_MUTEX:
- case SYNC_INDEX_ONLINE_LOG:
- case SYNC_STATS_AUTO_RECALC:
- case SYNC_STATS_DEFRAG:
- if (!sync_thread_levels_g(array, level, TRUE)) {
- fprintf(stderr,
- "InnoDB: sync_thread_levels_g(array, %lu)"
- " does not hold!\n", level);
- ut_error;
- }
- break;
- case SYNC_TRX:
- /* Either the thread must own the lock_sys->mutex, or
- it is allowed to own only ONE trx->mutex. */
- if (!sync_thread_levels_g(array, level, FALSE)) {
- ut_a(sync_thread_levels_g(array, level - 1, TRUE));
- ut_a(sync_thread_levels_contain(array, SYNC_LOCK_SYS));
- }
- break;
- case SYNC_BUF_FLUSH_LIST:
- case SYNC_BUF_POOL:
- /* We can have multiple mutexes of this type therefore we
- can only check whether the greater than condition holds. */
- if (!sync_thread_levels_g(array, level-1, TRUE)) {
- fprintf(stderr,
- "InnoDB: sync_thread_levels_g(array, %lu)"
- " does not hold!\n", level-1);
- ut_error;
- }
- break;
-
-
- case SYNC_BUF_PAGE_HASH:
- /* Multiple page_hash locks are only allowed during
- buf_validate and that is where buf_pool mutex is already
- held. */
- /* Fall through */
-
- case SYNC_BUF_BLOCK:
- /* Either the thread must own the buffer pool mutex
- (buf_pool->mutex), or it is allowed to latch only ONE
- buffer block (block->mutex or buf_pool->zip_mutex). */
- if (!sync_thread_levels_g(array, level, FALSE)) {
- ut_a(sync_thread_levels_g(array, level - 1, TRUE));
- ut_a(sync_thread_levels_contain(array, SYNC_BUF_POOL));
- }
- break;
- case SYNC_REC_LOCK:
- if (sync_thread_levels_contain(array, SYNC_LOCK_SYS)) {
- ut_a(sync_thread_levels_g(array, SYNC_REC_LOCK - 1,
- TRUE));
- } else {
- ut_a(sync_thread_levels_g(array, SYNC_REC_LOCK, TRUE));
- }
- break;
- case SYNC_IBUF_BITMAP:
- /* Either the thread must own the master mutex to all
- the bitmap pages, or it is allowed to latch only ONE
- bitmap page. */
- if (sync_thread_levels_contain(array,
- SYNC_IBUF_BITMAP_MUTEX)) {
- ut_a(sync_thread_levels_g(array, SYNC_IBUF_BITMAP - 1,
- TRUE));
- } else {
- /* This is violated during trx_sys_create_rsegs()
- when creating additional rollback segments when
- upgrading in innobase_start_or_create_for_mysql(). */
- ut_a(srv_is_being_started
- || sync_thread_levels_g(array, SYNC_IBUF_BITMAP,
- TRUE));
- }
- break;
- case SYNC_FSP_PAGE:
- ut_a(sync_thread_levels_contain(array, SYNC_FSP));
- break;
- case SYNC_FSP:
- ut_a(sync_thread_levels_contain(array, SYNC_FSP)
- || sync_thread_levels_g(array, SYNC_FSP, TRUE));
- break;
- case SYNC_TRX_UNDO_PAGE:
- /* Purge is allowed to read in as many UNDO pages as it likes,
- there was a bogus rule here earlier that forced the caller to
- acquire the trx_purge_t::mutex. The purge mutex did not really
- protect anything because it was only ever acquired by the
- single purge thread. The purge thread can read the UNDO pages
- without any covering mutex. */
-
- ut_a(sync_thread_levels_contain(array, SYNC_TRX_UNDO)
- || sync_thread_levels_contain(array, SYNC_RSEG)
- || sync_thread_levels_g(array, level - 1, TRUE));
- break;
- case SYNC_RSEG_HEADER:
- ut_a(sync_thread_levels_contain(array, SYNC_RSEG));
- break;
- case SYNC_RSEG_HEADER_NEW:
- ut_a(sync_thread_levels_contain(array, SYNC_FSP_PAGE));
- break;
- case SYNC_TREE_NODE:
- ut_a(sync_thread_levels_contain(array, SYNC_INDEX_TREE)
- || sync_thread_levels_contain(array, SYNC_DICT_OPERATION)
- || sync_thread_levels_g(array, SYNC_TREE_NODE - 1, TRUE));
- break;
- case SYNC_TREE_NODE_NEW:
- ut_a(sync_thread_levels_contain(array, SYNC_FSP_PAGE));
- break;
- case SYNC_INDEX_TREE:
- ut_a(sync_thread_levels_g(array, SYNC_TREE_NODE - 1, TRUE));
- break;
- case SYNC_IBUF_TREE_NODE:
- ut_a(sync_thread_levels_contain(array, SYNC_IBUF_INDEX_TREE)
- || sync_thread_levels_g(array, SYNC_IBUF_TREE_NODE - 1,
- TRUE));
- break;
- case SYNC_IBUF_TREE_NODE_NEW:
- /* ibuf_add_free_page() allocates new pages for the
- change buffer while only holding the tablespace
- x-latch. These pre-allocated new pages may only be
- taken in use while holding ibuf_mutex, in
- btr_page_alloc_for_ibuf(). */
- ut_a(sync_thread_levels_contain(array, SYNC_IBUF_MUTEX)
- || sync_thread_levels_contain(array, SYNC_FSP));
- break;
- case SYNC_IBUF_INDEX_TREE:
- if (sync_thread_levels_contain(array, SYNC_FSP)) {
- ut_a(sync_thread_levels_g(array, level - 1, TRUE));
- } else {
- ut_a(sync_thread_levels_g(
- array, SYNC_IBUF_TREE_NODE - 1, TRUE));
- }
- break;
- case SYNC_IBUF_PESS_INSERT_MUTEX:
- ut_a(sync_thread_levels_g(array, SYNC_FSP - 1, TRUE));
- ut_a(!sync_thread_levels_contain(array, SYNC_IBUF_MUTEX));
- break;
- case SYNC_IBUF_HEADER:
- ut_a(sync_thread_levels_g(array, SYNC_FSP - 1, TRUE));
- ut_a(!sync_thread_levels_contain(array, SYNC_IBUF_MUTEX));
- ut_a(!sync_thread_levels_contain(array,
- SYNC_IBUF_PESS_INSERT_MUTEX));
- break;
- case SYNC_DICT:
-#ifdef UNIV_DEBUG
- ut_a(buf_debug_prints
- || sync_thread_levels_g(array, SYNC_DICT, TRUE));
-#else /* UNIV_DEBUG */
- ut_a(sync_thread_levels_g(array, SYNC_DICT, TRUE));
-#endif /* UNIV_DEBUG */
- break;
- default:
- ut_error;
- }
-
-levels_ok:
-
- sync_level.latch = latch;
- sync_level.level = level;
- array->push_back(sync_level);
-
- mutex_exit(&sync_thread_mutex);
-}
-
-/******************************************************************//**
-Removes a latch from the thread level array if it is found there.
-@return TRUE if found in the array; it is no error if the latch is
-not found, as we presently are not able to determine the level for
-every latch reservation the program does */
-UNIV_INTERN
-ibool
-sync_thread_reset_level(
-/*====================*/
- void* latch) /*!< in: pointer to a mutex or an rw-lock */
-{
- sync_arr_t* array;
- sync_thread_t* thread_slot;
-
- if (!sync_order_checks_on) {
-
- return(FALSE);
- }
-
- if ((latch == (void*) &sync_thread_mutex)
- || (latch == (void*) &mutex_list_mutex)
- || (latch == (void*) &rw_lock_debug_mutex)
- || (latch == (void*) &rw_lock_list_mutex)) {
-
- return(FALSE);
- }
-
- mutex_enter(&sync_thread_mutex);
-
- thread_slot = sync_thread_level_arrays_find_slot();
-
- if (thread_slot == NULL) {
-
- ut_error;
-
- mutex_exit(&sync_thread_mutex);
- return(FALSE);
- }
-
- array = thread_slot->levels;
-
- for (std::vector<sync_level_t>::iterator it = array->begin(); it != array->end(); ++it) {
- sync_level_t level = *it;
-
- if (level.latch != latch) {
- continue;
- }
-
- array->erase(it);
- mutex_exit(&sync_thread_mutex);
- return(TRUE);
- }
-
- if (((ib_mutex_t*) latch)->magic_n != MUTEX_MAGIC_N) {
- rw_lock_t* rw_lock;
-
- rw_lock = (rw_lock_t*) latch;
-
- if (rw_lock->level == SYNC_LEVEL_VARYING) {
- mutex_exit(&sync_thread_mutex);
-
- return(TRUE);
- }
- }
-
- ut_error;
-
- mutex_exit(&sync_thread_mutex);
-
- return(FALSE);
-}
-#endif /* UNIV_SYNC_DEBUG */
-
-/******************************************************************//**
-Initializes the synchronization data structures. */
-UNIV_INTERN
-void
-sync_init(void)
-/*===========*/
-{
- ut_a(sync_initialized == FALSE);
-
- sync_initialized = TRUE;
-
- sync_array_init(OS_THREAD_MAX_N);
-
-#ifdef UNIV_SYNC_DEBUG
- /* Create the thread latch level array where the latch levels
- are stored for each OS thread */
-
- sync_thread_level_arrays = static_cast<sync_thread_t*>(
- calloc(sizeof(sync_thread_t), OS_THREAD_MAX_N));
-
- ut_a(sync_thread_level_arrays != NULL);
-
-#endif /* UNIV_SYNC_DEBUG */
- /* Init the mutex list and create the mutex to protect it. */
-
- UT_LIST_INIT(mutex_list);
- mutex_create(mutex_list_mutex_key, &mutex_list_mutex,
- SYNC_NO_ORDER_CHECK);
-#ifdef UNIV_SYNC_DEBUG
- mutex_create(sync_thread_mutex_key, &sync_thread_mutex,
- SYNC_NO_ORDER_CHECK);
-#endif /* UNIV_SYNC_DEBUG */
-
- /* Init the rw-lock list and create the mutex to protect it. */
-
- UT_LIST_INIT(rw_lock_list);
- mutex_create(rw_lock_list_mutex_key, &rw_lock_list_mutex,
- SYNC_NO_ORDER_CHECK);
-
-#ifdef UNIV_SYNC_DEBUG
- os_fast_mutex_init(rw_lock_debug_mutex_key, &rw_lock_debug_mutex);
-#endif /* UNIV_SYNC_DEBUG */
-}
-
-#ifdef UNIV_SYNC_DEBUG
-/******************************************************************//**
-Frees all debug memory. */
-static
-void
-sync_thread_level_arrays_free(void)
-/*===============================*/
-
-{
- ulint i;
-
- for (i = 0; i < OS_THREAD_MAX_N; i++) {
- sync_thread_t* slot;
-
- slot = &sync_thread_level_arrays[i];
-
- /* If this slot was allocated then free the slot memory too. */
- if (slot->levels != NULL) {
- delete slot->levels;
- }
- }
-
- free(sync_thread_level_arrays);
- sync_thread_level_arrays = NULL;
-}
-#endif /* UNIV_SYNC_DEBUG */
-
-/******************************************************************//**
-Frees the resources in InnoDB's own synchronization data structures. Use
-os_sync_free() after calling this. */
-UNIV_INTERN
-void
-sync_close(void)
-/*===========*/
-{
- ib_mutex_t* mutex;
-
- sync_array_close();
-
- for (mutex = UT_LIST_GET_FIRST(mutex_list);
- mutex != NULL;
- /* No op */) {
-
-#ifdef UNIV_MEM_DEBUG
- if (mutex == &mem_hash_mutex) {
- mutex = UT_LIST_GET_NEXT(list, mutex);
- continue;
- }
-#endif /* UNIV_MEM_DEBUG */
-
- mutex_free(mutex);
-
- mutex = UT_LIST_GET_FIRST(mutex_list);
- }
-
- mutex_free(&mutex_list_mutex);
-#ifdef UNIV_SYNC_DEBUG
- mutex_free(&sync_thread_mutex);
-
- /* Switch latching order checks on in sync0sync.cc */
- sync_order_checks_on = FALSE;
-
- sync_thread_level_arrays_free();
- os_fast_mutex_free(&rw_lock_debug_mutex);
-#endif /* UNIV_SYNC_DEBUG */
-
- sync_initialized = FALSE;
-}
-
-/*******************************************************************//**
-Prints wait info of the sync system. */
-UNIV_INTERN
-void
-sync_print_wait_info(
-/*=================*/
- FILE* file) /*!< in: file where to print */
-{
- fprintf(file,
- "Mutex spin waits " UINT64PF ", rounds " UINT64PF ", "
- "OS waits " UINT64PF "\n"
- "RW-shared spins " UINT64PF ", rounds " UINT64PF ", "
- "OS waits " UINT64PF "\n"
- "RW-excl spins " UINT64PF ", rounds " UINT64PF ", "
- "OS waits " UINT64PF "\n",
- (ib_uint64_t) mutex_spin_wait_count,
- (ib_uint64_t) mutex_spin_round_count,
- (ib_uint64_t) mutex_os_wait_count,
- (ib_uint64_t) rw_lock_stats.rw_s_spin_wait_count,
- (ib_uint64_t) rw_lock_stats.rw_s_spin_round_count,
- (ib_uint64_t) rw_lock_stats.rw_s_os_wait_count,
- (ib_uint64_t) rw_lock_stats.rw_x_spin_wait_count,
- (ib_uint64_t) rw_lock_stats.rw_x_spin_round_count,
- (ib_uint64_t) rw_lock_stats.rw_x_os_wait_count);
-
- fprintf(file,
- "Spin rounds per wait: %.2f mutex, %.2f RW-shared, "
- "%.2f RW-excl\n",
- (double) mutex_spin_round_count /
- (mutex_spin_wait_count ? mutex_spin_wait_count : 1),
- (double) rw_lock_stats.rw_s_spin_round_count /
- (rw_lock_stats.rw_s_spin_wait_count
- ? rw_lock_stats.rw_s_spin_wait_count : 1),
- (double) rw_lock_stats.rw_x_spin_round_count /
- (rw_lock_stats.rw_x_spin_wait_count
- ? rw_lock_stats.rw_x_spin_wait_count : 1));
-}
-
-/*******************************************************************//**
-Prints info of the sync system. */
-UNIV_INTERN
-void
-sync_print(
-/*=======*/
- FILE* file) /*!< in: file where to print */
-{
-#ifdef UNIV_SYNC_DEBUG
- mutex_list_print_info(file);
-
- rw_lock_list_print_info(file);
-#endif /* UNIV_SYNC_DEBUG */
-
- sync_array_print(file);
-
- sync_print_wait_info(file);
+ mutex_exit(&rw_lock_list_mutex);
}
diff --git a/storage/innobase/trx/trx0i_s.cc b/storage/innobase/trx/trx0i_s.cc
index 57057f2f81e..540194f1dc2 100644
--- a/storage/innobase/trx/trx0i_s.cc
+++ b/storage/innobase/trx/trx0i_s.cc
@@ -1,7 +1,7 @@
/*****************************************************************************
-Copyright (c) 2007, 2012, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2019, MariaDB Corporation.
+Copyright (c) 2007, 2015, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2017, 2019, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -29,20 +29,10 @@ table cache" for later retrieval.
Created July 17, 2007 Vasil Dimov
*******************************************************/
-/* Found during the build of 5.5.3 on Linux 2.4 and early 2.6 kernels:
- The includes "univ.i" -> "my_global.h" cause a different path
- to be taken further down with pthread functions and types,
- so they must come first.
- From the symptoms, this is related to bug#46587 in the MySQL bug DB.
-*/
-#include "univ.i"
-
-#include <mysql/plugin.h>
-
+#include "trx0i_s.h"
#include "buf0buf.h"
#include "dict0dict.h"
#include "ha0storage.h"
-#include "ha_prototypes.h"
#include "hash0hash.h"
#include "lock0iter.h"
#include "lock0lock.h"
@@ -53,12 +43,9 @@ Created July 17, 2007 Vasil Dimov
#include "srv0srv.h"
#include "sync0rw.h"
#include "sync0sync.h"
-#include "sync0types.h"
-#include "trx0i_s.h"
#include "trx0sys.h"
-#include "trx0trx.h"
-#include "ut0mem.h"
-#include "ut0ut.h"
+
+#include <sql_class.h>
/** Initial number of rows in the table cache */
#define TABLE_CACHE_INITIAL_ROWSNUM 1024
@@ -115,15 +102,15 @@ noop because it will be empty. */
/* @} */
/** Memory limit passed to ha_storage_put_memlim().
-@param cache hash storage
-@return maximum allowed allocation size */
+@param cache hash storage
+@return maximum allowed allocation size */
#define MAX_ALLOWED_FOR_STORAGE(cache) \
(TRX_I_S_MEM_LIMIT \
- (cache)->mem_allocd)
/** Memory limit in table_cache_create_empty_row().
-@param cache hash storage
-@return maximum allowed allocation size */
+@param cache hash storage
+@return maximum allowed allocation size */
#define MAX_ALLOWED_FOR_ALLOC(cache) \
(TRX_I_S_MEM_LIMIT \
- (cache)->mem_allocd \
@@ -155,6 +142,10 @@ struct trx_i_s_cache_t {
the rest of this structure */
ulonglong last_read; /*!< last time the cache was read;
measured in nanoseconds */
+ ib_mutex_t last_read_mutex;/*!< mutex protecting the
+ last_read member - it is updated
+ inside a shared lock of the
+ rw_lock member */
i_s_table_cache_t innodb_trx; /*!< innodb_trx table */
i_s_table_cache_t innodb_locks; /*!< innodb_locks table */
i_s_table_cache_t innodb_lock_waits;/*!< innodb_lock_waits table */
@@ -185,21 +176,12 @@ static trx_i_s_cache_t trx_i_s_cache_static;
/** This is the intermediate buffer where data needed to fill the
INFORMATION SCHEMA tables is fetched and later retrieved by the C++
code in handler/i_s.cc. */
-UNIV_INTERN trx_i_s_cache_t* trx_i_s_cache = &trx_i_s_cache_static;
-
-/* Key to register the lock/mutex with performance schema */
-#ifdef UNIV_PFS_RWLOCK
-UNIV_INTERN mysql_pfs_key_t trx_i_s_cache_lock_key;
-#endif /* UNIV_PFS_RWLOCK */
-
-#ifdef UNIV_PFS_MUTEX
-UNIV_INTERN mysql_pfs_key_t cache_last_read_mutex_key;
-#endif /* UNIV_PFS_MUTEX */
+trx_i_s_cache_t* trx_i_s_cache = &trx_i_s_cache_static;
/*******************************************************************//**
For a record lock that is in waiting state retrieves the only bit that
is set, for a table lock returns ULINT_UNDEFINED.
-@return record number within the heap */
+@return record number within the heap */
static
ulint
wait_lock_get_heap_no(
@@ -262,7 +244,7 @@ table_cache_free(
/* the memory is actually allocated in
table_cache_create_empty_row() */
if (table_cache->chunks[i].base) {
- mem_free(table_cache->chunks[i].base);
+ ut_free(table_cache->chunks[i].base);
table_cache->chunks[i].base = NULL;
}
}
@@ -273,7 +255,7 @@ Returns an empty row from a table cache. The row is allocated if no more
empty rows are available. The number of used rows is incremented.
If the memory limit is hit then NULL is returned and nothing is
allocated.
-@return empty row, or NULL if out of memory */
+@return empty row, or NULL if out of memory */
static
void*
table_cache_create_empty_row(
@@ -343,16 +325,17 @@ table_cache_create_empty_row(
chunk = &table_cache->chunks[i];
- chunk->base = mem_alloc2(req_bytes, &got_bytes);
+ got_bytes = req_bytes;
+ chunk->base = ut_malloc_nokey(req_bytes);
got_rows = got_bytes / table_cache->row_size;
cache->mem_allocd += got_bytes;
#if 0
- printf("allocating chunk %d req bytes=%lu, got bytes=%lu, "
- "row size=%lu, "
- "req rows=%lu, got rows=%lu\n",
+ printf("allocating chunk %d req bytes=%lu, got bytes=%lu,"
+ " row size=%lu,"
+ " req rows=%lu, got rows=%lu\n",
i, req_bytes, got_bytes,
table_cache->row_size,
req_rows, got_rows);
@@ -414,14 +397,13 @@ table_cache_create_empty_row(
#ifdef UNIV_DEBUG
/*******************************************************************//**
Validates a row in the locks cache.
-@return TRUE if valid */
+@return TRUE if valid */
static
ibool
i_s_locks_row_validate(
/*===================*/
const i_s_locks_row_t* row) /*!< in: row to validate */
{
- ut_ad(row->lock_trx_id != 0);
ut_ad(row->lock_mode != NULL);
ut_ad(row->lock_type != NULL);
ut_ad(row->lock_table != NULL);
@@ -450,7 +432,7 @@ i_s_locks_row_validate(
/*******************************************************************//**
Fills i_s_trx_row_t object.
If memory can not be allocated then FALSE is returned.
-@return FALSE if allocation fails */
+@return FALSE if allocation fails */
static
ibool
fill_trx_row(
@@ -468,13 +450,12 @@ fill_trx_row(
which to copy volatile
strings */
{
- const char* stmt;
size_t stmt_len;
const char* s;
ut_ad(lock_mutex_own());
- row->trx_id = trx->id;
+ row->trx_id = trx_get_id_for_print(trx);
row->trx_started = trx->start_time;
row->trx_state = trx_get_que_state_str(trx);
row->requested_lock_row = requested_lock_row;
@@ -490,7 +471,7 @@ fill_trx_row(
row->trx_wait_started = 0;
}
- row->trx_weight = (ullint) TRX_WEIGHT(trx);
+ row->trx_weight = static_cast<uintmax_t>(TRX_WEIGHT(trx));
if (trx->mysql_thd == NULL) {
/* For internal transactions e.g., purge and transactions
@@ -503,19 +484,10 @@ fill_trx_row(
row->trx_mysql_thread_id = thd_get_thread_id(trx->mysql_thd);
- stmt = trx->mysql_thd
- ? innobase_get_stmt(trx->mysql_thd, &stmt_len)
- : NULL;
+ char query[TRX_I_S_TRX_QUERY_MAX_LEN + 1];
+ stmt_len = innobase_get_stmt_safe(trx->mysql_thd, query, sizeof(query));
- if (stmt != NULL) {
- char query[TRX_I_S_TRX_QUERY_MAX_LEN + 1];
-
- if (stmt_len > TRX_I_S_TRX_QUERY_MAX_LEN) {
- stmt_len = TRX_I_S_TRX_QUERY_MAX_LEN;
- }
-
- memcpy(query, stmt, stmt_len);
- query[stmt_len] = '\0';
+ if (stmt_len > 0) {
row->trx_query = static_cast<const char*>(
ha_storage_put_memlim(
@@ -552,7 +524,7 @@ thd_done:
row->trx_tables_in_use = trx->n_mysql_tables_in_use;
- row->trx_tables_locked = trx->mysql_n_tables_locked;
+ row->trx_tables_locked = lock_number_of_tables_locked(&trx->lock);
/* These are protected by both trx->mutex or lock_sys->mutex,
or just lock_sys->mutex. For reading, it suffices to hold
@@ -606,10 +578,6 @@ thd_done:
row->trx_foreign_key_error = NULL;
}
- row->trx_has_search_latch = (ibool) trx->has_search_latch;
-
- row->trx_search_latch_timeout = trx->search_latch_timeout;
-
row->trx_is_read_only = trx->read_only;
row->trx_is_autocommit_non_locking = trx_is_autocommit_non_locking(trx);
@@ -621,7 +589,7 @@ thd_done:
Format the nth field of "rec" and put it in "buf". The result is always
NUL-terminated. Returns the number of bytes that were written to "buf"
(including the terminating NUL).
-@return end of the result */
+@return end of the result */
static
ulint
put_nth_field(
@@ -631,7 +599,7 @@ put_nth_field(
ulint n, /*!< in: number of field */
const dict_index_t* index, /*!< in: index */
const rec_t* rec, /*!< in: record */
- const ulint* offsets)/*!< in: record offsets, returned
+ const offset_t* offsets)/*!< in: record offsets, returned
by rec_get_offsets() */
{
const byte* data;
@@ -679,7 +647,7 @@ put_nth_field(
/*******************************************************************//**
Fills the "lock_data" member of i_s_locks_row_t object.
If memory can not be allocated then FALSE is returned.
-@return FALSE if allocation fails */
+@return FALSE if allocation fails */
static
ibool
fill_lock_data(
@@ -690,18 +658,38 @@ fill_lock_data(
trx_i_s_cache_t* cache) /*!< in/out: cache where to store
volatile data */
{
+ ut_a(lock_get_type(lock) == LOCK_REC);
+
+ switch (heap_no) {
+ case PAGE_HEAP_NO_INFIMUM:
+ case PAGE_HEAP_NO_SUPREMUM:
+ *lock_data = ha_storage_put_str_memlim(
+ cache->storage,
+ heap_no == PAGE_HEAP_NO_INFIMUM
+ ? "infimum pseudo-record"
+ : "supremum pseudo-record",
+ MAX_ALLOWED_FOR_STORAGE(cache));
+ return(*lock_data != NULL);
+ }
+
mtr_t mtr;
const buf_block_t* block;
const page_t* page;
const rec_t* rec;
-
- ut_a(lock_get_type(lock) == LOCK_REC);
+ const dict_index_t* index;
+ ulint n_fields;
+ mem_heap_t* heap;
+ offset_t offsets_onstack[REC_OFFS_NORMAL_SIZE];
+ offset_t* offsets;
+ char buf[TRX_I_S_LOCK_DATA_MAX_LEN];
+ ulint buf_used;
+ ulint i;
mtr_start(&mtr);
- block = buf_page_try_get(lock_rec_get_space_id(lock),
- lock_rec_get_page_no(lock),
+ block = buf_page_try_get(page_id_t(lock_rec_get_space_id(lock),
+ lock_rec_get_page_no(lock)),
&mtr);
if (block == NULL) {
@@ -713,66 +701,43 @@ fill_lock_data(
return(TRUE);
}
- page = (const page_t*) buf_block_get_frame(block);
-
- rec = page_find_rec_with_heap_no(page, heap_no);
-
- if (page_rec_is_infimum(rec)) {
-
- *lock_data = ha_storage_put_str_memlim(
- cache->storage, "infimum pseudo-record",
- MAX_ALLOWED_FOR_STORAGE(cache));
- } else if (page_rec_is_supremum(rec)) {
-
- *lock_data = ha_storage_put_str_memlim(
- cache->storage, "supremum pseudo-record",
- MAX_ALLOWED_FOR_STORAGE(cache));
- } else {
+ page = reinterpret_cast<const page_t*>(buf_block_get_frame(block));
- const dict_index_t* index;
- ulint n_fields;
- mem_heap_t* heap;
- ulint offsets_onstack[REC_OFFS_NORMAL_SIZE];
- ulint* offsets;
- char buf[TRX_I_S_LOCK_DATA_MAX_LEN];
- ulint buf_used;
- ulint i;
+ rec_offs_init(offsets_onstack);
+ offsets = offsets_onstack;
- rec_offs_init(offsets_onstack);
- offsets = offsets_onstack;
+ rec = page_find_rec_with_heap_no(page, heap_no);
- index = lock_rec_get_index(lock);
+ index = lock_rec_get_index(lock);
- n_fields = dict_index_get_n_unique(index);
+ n_fields = dict_index_get_n_unique(index);
- ut_a(n_fields > 0);
+ ut_a(n_fields > 0);
- heap = NULL;
- offsets = rec_get_offsets(rec, index, offsets, n_fields,
- &heap);
+ heap = NULL;
+ offsets = rec_get_offsets(rec, index, offsets, true, n_fields, &heap);
- /* format and store the data */
+ /* format and store the data */
- buf_used = 0;
- for (i = 0; i < n_fields; i++) {
+ buf_used = 0;
+ for (i = 0; i < n_fields; i++) {
- buf_used += put_nth_field(
- buf + buf_used, sizeof(buf) - buf_used,
- i, index, rec, offsets) - 1;
- }
+ buf_used += put_nth_field(
+ buf + buf_used, sizeof(buf) - buf_used,
+ i, index, rec, offsets) - 1;
+ }
- *lock_data = (const char*) ha_storage_put_memlim(
- cache->storage, buf, buf_used + 1,
- MAX_ALLOWED_FOR_STORAGE(cache));
+ *lock_data = (const char*) ha_storage_put_memlim(
+ cache->storage, buf, buf_used + 1,
+ MAX_ALLOWED_FOR_STORAGE(cache));
- if (UNIV_UNLIKELY(heap != NULL)) {
+ if (heap != NULL) {
- /* this means that rec_get_offsets() has created a new
- heap and has stored offsets in it; check that this is
- really the case and free the heap */
- ut_a(offsets != offsets_onstack);
- mem_heap_free(heap);
- }
+ /* this means that rec_get_offsets() has created a new
+ heap and has stored offsets in it; check that this is
+ really the case and free the heap */
+ ut_a(offsets != offsets_onstack);
+ mem_heap_free(heap);
}
mtr_commit(&mtr);
@@ -788,7 +753,7 @@ fill_lock_data(
/*******************************************************************//**
Fills i_s_locks_row_t object. Returns its first argument.
If memory can not be allocated then FALSE is returned.
-@return FALSE if allocation fails */
+@return FALSE if allocation fails */
static
ibool
fill_locks_row(
@@ -806,7 +771,7 @@ fill_locks_row(
row->lock_type = lock_get_type_str(lock);
row->lock_table = ha_storage_put_str_memlim(
- cache->storage, lock_get_table_name(lock),
+ cache->storage, lock_get_table_name(lock).m_name,
MAX_ALLOWED_FOR_STORAGE(cache));
/* memory could not be allocated */
@@ -862,7 +827,7 @@ fill_locks_row(
/*******************************************************************//**
Fills i_s_lock_waits_row_t object. Returns its first argument.
-@return result object that's filled */
+@return result object that's filled */
static
i_s_lock_waits_row_t*
fill_lock_waits_row(
@@ -890,7 +855,7 @@ Calculates a hash fold for a lock. For a record lock the fold is
calculated from 4 elements, which uniquely identify a lock at a given
point in time: transaction id, space id, page number, record number.
For a table lock the fold is table's id.
-@return fold */
+@return fold */
static
ulint
fold_lock(
@@ -939,7 +904,7 @@ fold_lock(
/*******************************************************************//**
Checks whether i_s_locks_row_t object represents a lock_t object.
-@return TRUE if they match */
+@return TRUE if they match */
static
ibool
locks_row_eq_lock(
@@ -983,7 +948,7 @@ locks_row_eq_lock(
Searches for a row in the innodb_locks cache that has a specified id.
This happens in O(1) time since a hash table is used. Returns pointer to
the row or NULL if none is found.
-@return row or NULL */
+@return row or NULL */
static
i_s_locks_row_t*
search_innodb_locks(
@@ -1026,7 +991,7 @@ Adds new element to the locks cache, enlarging it if necessary.
Returns a pointer to the added row. If the row is already present then
no row is added and a pointer to the existing row is returned.
If row can not be allocated then NULL is returned.
-@return row */
+@return row */
static
i_s_locks_row_t*
add_lock_to_cache(
@@ -1093,7 +1058,7 @@ add_lock_to_cache(
/*******************************************************************//**
Adds new pair of locks to the lock waits cache.
If memory can not be allocated then FALSE is returned.
-@return FALSE if allocation fails */
+@return FALSE if allocation fails */
static
ibool
add_lock_wait_to_cache(
@@ -1130,7 +1095,7 @@ innodb_locks and a pointer to the added row is returned in
requested_lock_row, otherwise requested_lock_row is set to NULL.
If rows can not be allocated then FALSE is returned and the value of
requested_lock_row is undefined.
-@return FALSE if allocation fails */
+@return FALSE if allocation fails */
static
ibool
add_trx_relevant_locks_to_cache(
@@ -1227,9 +1192,15 @@ Checks if the cache can safely be updated.
@return whether the cache can be updated */
static bool can_cache_be_updated(trx_i_s_cache_t* cache)
{
-#ifdef UNIV_SYNC_DEBUG
- ut_a(rw_lock_own(&cache->rw_lock, RW_LOCK_EX));
-#endif
+ /* Here we read cache->last_read without acquiring its mutex
+ because last_read is only updated when a shared rw lock on the
+ whole cache is being held (see trx_i_s_cache_end_read()) and
+ we are currently holding an exclusive rw lock on the cache.
+ So it is not possible for last_read to be updated while we are
+ reading it. */
+
+ ut_ad(rw_lock_own(&cache->rw_lock, RW_LOCK_X));
+
return my_interval_timer() - cache->last_read > CACHE_MIN_IDLE_TIME_NS;
}
@@ -1259,17 +1230,14 @@ void
fetch_data_into_cache_low(
/*======================*/
trx_i_s_cache_t* cache, /*!< in/out: cache */
- ibool only_ac_nl, /*!< in: only select non-locking
- autocommit transactions */
- trx_list_t* trx_list) /*!< in: trx list */
+ bool read_write, /*!< in: only read-write
+ transactions */
+ trx_ut_list_t* trx_list) /*!< in: trx list */
{
const trx_t* trx;
+ bool rw_trx_list = trx_list == &trx_sys->rw_trx_list;
- ut_ad(trx_list == &trx_sys->rw_trx_list
- || trx_list == &trx_sys->ro_trx_list
- || trx_list == &trx_sys->mysql_trx_list);
-
- ut_ad(only_ac_nl == (trx_list == &trx_sys->mysql_trx_list));
+ ut_ad(rw_trx_list || trx_list == &trx_sys->mysql_trx_list);
/* Iterate over the transaction list and add each one
to innodb_trx's cache. We also add all locks that are relevant
@@ -1279,26 +1247,24 @@ fetch_data_into_cache_low(
for (trx = UT_LIST_GET_FIRST(*trx_list);
trx != NULL;
trx =
- (trx_list == &trx_sys->mysql_trx_list
- ? UT_LIST_GET_NEXT(mysql_trx_list, trx)
- : UT_LIST_GET_NEXT(trx_list, trx))) {
+ (rw_trx_list
+ ? UT_LIST_GET_NEXT(trx_list, trx)
+ : UT_LIST_GET_NEXT(mysql_trx_list, trx))) {
i_s_trx_row_t* trx_row;
i_s_locks_row_t* requested_lock_row;
- if (trx->state == TRX_STATE_NOT_STARTED
- || (only_ac_nl && !trx_is_autocommit_non_locking(trx))) {
+ /* Note: Read only transactions that modify temporary
+ tables an have a transaction ID */
+ if (!trx_is_started(trx)
+ || (!rw_trx_list && trx->id != 0 && !trx->read_only)) {
continue;
}
assert_trx_nonlocking_or_in_list(trx);
- ut_ad(trx->in_ro_trx_list
- == (trx_list == &trx_sys->ro_trx_list));
-
- ut_ad(trx->in_rw_trx_list
- == (trx_list == &trx_sys->rw_trx_list));
+ ut_ad(trx->in_rw_trx_list == rw_trx_list);
if (!add_trx_relevant_locks_to_cache(cache, trx,
&requested_lock_row)) {
@@ -1307,9 +1273,9 @@ fetch_data_into_cache_low(
return;
}
- trx_row = (i_s_trx_row_t*)
- table_cache_create_empty_row(&cache->innodb_trx,
- cache);
+ trx_row = reinterpret_cast<i_s_trx_row_t*>(
+ table_cache_create_empty_row(
+ &cache->innodb_trx, cache));
/* memory could not be allocated */
if (trx_row == NULL) {
@@ -1321,7 +1287,7 @@ fetch_data_into_cache_low(
if (!fill_trx_row(trx_row, trx, requested_lock_row, cache)) {
/* memory could not be allocated */
- cache->innodb_trx.rows_used--;
+ --cache->innodb_trx.rows_used;
cache->is_truncated = TRUE;
return;
}
@@ -1338,16 +1304,16 @@ fetch_data_into_cache(
trx_i_s_cache_t* cache) /*!< in/out: cache */
{
ut_ad(lock_mutex_own());
- ut_ad(mutex_own(&trx_sys->mutex));
+ ut_ad(trx_sys_mutex_own());
trx_i_s_cache_clear(cache);
- fetch_data_into_cache_low(cache, FALSE, &trx_sys->rw_trx_list);
- fetch_data_into_cache_low(cache, FALSE, &trx_sys->ro_trx_list);
+ /* Capture the state of the read-write transactions. This includes
+ internal transactions too. They are not on mysql_trx_list */
+ fetch_data_into_cache_low(cache, true, &trx_sys->rw_trx_list);
- /* Only select autocommit non-locking selects because they can
- only be on the MySQL transaction list (TRUE). */
- fetch_data_into_cache_low(cache, TRUE, &trx_sys->mysql_trx_list);
+ /* Capture the state of the read-only active transactions */
+ fetch_data_into_cache_low(cache, false, &trx_sys->mysql_trx_list);
cache->is_truncated = FALSE;
}
@@ -1355,17 +1321,12 @@ fetch_data_into_cache(
/*******************************************************************//**
Update the transactions cache if it has not been read for some time.
Called from handler/i_s.cc.
-@return 0 - fetched, 1 - not */
-UNIV_INTERN
+@return 0 - fetched, 1 - not */
int
trx_i_s_possibly_fetch_data_into_cache(
/*===================================*/
trx_i_s_cache_t* cache) /*!< in/out: cache */
{
-#ifdef UNIV_SYNC_DEBUG
- ut_a(rw_lock_own(&cache->rw_lock, RW_LOCK_EX));
-#endif
-
if (!can_cache_be_updated(cache)) {
return(1);
@@ -1375,11 +1336,11 @@ trx_i_s_possibly_fetch_data_into_cache(
lock_mutex_enter();
- mutex_enter(&trx_sys->mutex);
+ trx_sys_mutex_enter();
fetch_data_into_cache(cache);
- mutex_exit(&trx_sys->mutex);
+ trx_sys_mutex_exit();
lock_mutex_exit();
@@ -1392,8 +1353,7 @@ trx_i_s_possibly_fetch_data_into_cache(
/*******************************************************************//**
Returns TRUE if the data in the cache is truncated due to the memory
limit posed by TRX_I_S_MEM_LIMIT.
-@return TRUE if truncated */
-UNIV_INTERN
+@return TRUE if truncated */
ibool
trx_i_s_cache_is_truncated(
/*=======================*/
@@ -1404,7 +1364,6 @@ trx_i_s_cache_is_truncated(
/*******************************************************************//**
Initialize INFORMATION SCHEMA trx related cache. */
-UNIV_INTERN
void
trx_i_s_cache_init(
/*===============*/
@@ -1416,6 +1375,8 @@ trx_i_s_cache_init(
release lock mutex
release trx_i_s_cache_t::rw_lock
acquire trx_i_s_cache_t::rw_lock, S
+ acquire trx_i_s_cache_t::last_read_mutex
+ release trx_i_s_cache_t::last_read_mutex
release trx_i_s_cache_t::rw_lock */
rw_lock_create(trx_i_s_cache_lock_key, &cache->rw_lock,
@@ -1423,6 +1384,8 @@ trx_i_s_cache_init(
cache->last_read = 0;
+ mutex_create(LATCH_ID_CACHE_LAST_READ, &cache->last_read_mutex);
+
table_cache_init(&cache->innodb_trx, sizeof(i_s_trx_row_t));
table_cache_init(&cache->innodb_locks, sizeof(i_s_locks_row_t));
table_cache_init(&cache->innodb_lock_waits,
@@ -1440,23 +1403,23 @@ trx_i_s_cache_init(
/*******************************************************************//**
Free the INFORMATION SCHEMA trx related cache. */
-UNIV_INTERN
void
trx_i_s_cache_free(
/*===============*/
trx_i_s_cache_t* cache) /*!< in, own: cache to free */
{
+ rw_lock_free(&cache->rw_lock);
+ mutex_free(&cache->last_read_mutex);
+
hash_table_free(cache->locks_hash);
ha_storage_free(cache->storage);
table_cache_free(&cache->innodb_trx);
table_cache_free(&cache->innodb_locks);
table_cache_free(&cache->innodb_lock_waits);
- memset(cache, 0, sizeof *cache);
}
/*******************************************************************//**
Issue a shared/read lock on the tables cache. */
-UNIV_INTERN
void
trx_i_s_cache_start_read(
/*=====================*/
@@ -1467,22 +1430,24 @@ trx_i_s_cache_start_read(
/*******************************************************************//**
Release a shared/read lock on the tables cache. */
-UNIV_INTERN
void
trx_i_s_cache_end_read(
/*===================*/
trx_i_s_cache_t* cache) /*!< in: cache */
{
-#ifdef UNIV_SYNC_DEBUG
- ut_a(rw_lock_own(&cache->rw_lock, RW_LOCK_SHARED));
-#endif
+ ut_ad(rw_lock_own(&cache->rw_lock, RW_LOCK_S));
+
+ /* update cache last read time */
+ const ulonglong now = my_interval_timer();
+ mutex_enter(&cache->last_read_mutex);
+ cache->last_read = now;
+ mutex_exit(&cache->last_read_mutex);
rw_lock_s_unlock(&cache->rw_lock);
}
/*******************************************************************//**
Issue an exclusive/write lock on the tables cache. */
-UNIV_INTERN
void
trx_i_s_cache_start_write(
/*======================*/
@@ -1493,22 +1458,19 @@ trx_i_s_cache_start_write(
/*******************************************************************//**
Release an exclusive/write lock on the tables cache. */
-UNIV_INTERN
void
trx_i_s_cache_end_write(
/*====================*/
trx_i_s_cache_t* cache) /*!< in: cache */
{
-#ifdef UNIV_SYNC_DEBUG
- ut_a(rw_lock_own(&cache->rw_lock, RW_LOCK_EX));
-#endif
+ ut_ad(rw_lock_own(&cache->rw_lock, RW_LOCK_X));
rw_lock_x_unlock(&cache->rw_lock);
}
/*******************************************************************//**
Selects a INFORMATION SCHEMA table cache from the whole cache.
-@return table cache */
+@return table cache */
static
i_s_table_cache_t*
cache_select_table(
@@ -1516,35 +1478,26 @@ cache_select_table(
trx_i_s_cache_t* cache, /*!< in: whole cache */
enum i_s_table table) /*!< in: which table */
{
- i_s_table_cache_t* table_cache;
-
-#ifdef UNIV_SYNC_DEBUG
- ut_a(rw_lock_own(&cache->rw_lock, RW_LOCK_SHARED)
- || rw_lock_own(&cache->rw_lock, RW_LOCK_EX));
-#endif
+ ut_ad(rw_lock_own_flagged(&cache->rw_lock,
+ RW_LOCK_FLAG_X | RW_LOCK_FLAG_S));
switch (table) {
case I_S_INNODB_TRX:
- table_cache = &cache->innodb_trx;
- break;
+ return &cache->innodb_trx;
case I_S_INNODB_LOCKS:
- table_cache = &cache->innodb_locks;
- break;
+ return &cache->innodb_locks;
case I_S_INNODB_LOCK_WAITS:
- table_cache = &cache->innodb_lock_waits;
- break;
- default:
- ut_error;
+ return &cache->innodb_lock_waits;
}
- return(table_cache);
+ ut_error;
+ return NULL;
}
/*******************************************************************//**
Retrieves the number of used rows in the cache for a given
INFORMATION SCHEMA table.
-@return number of rows */
-UNIV_INTERN
+@return number of rows */
ulint
trx_i_s_cache_get_rows_used(
/*========================*/
@@ -1561,8 +1514,7 @@ trx_i_s_cache_get_rows_used(
/*******************************************************************//**
Retrieves the nth row (zero-based) in the cache for a given
INFORMATION SCHEMA table.
-@return row */
-UNIV_INTERN
+@return row */
void*
trx_i_s_cache_get_nth_row(
/*======================*/
@@ -1602,8 +1554,7 @@ Crafts a lock id string from a i_s_locks_row_t object. Returns its
second argument. This function aborts if there is not enough space in
lock_id. Be sure to provide at least TRX_I_S_LOCK_ID_MAX_LEN + 1 if you
want to be 100% sure that it will not abort.
-@return resulting lock id */
-UNIV_INTERN
+@return resulting lock id */
char*
trx_i_s_create_lock_id(
/*===================*/
@@ -1618,16 +1569,17 @@ trx_i_s_create_lock_id(
if (row->lock_space != ULINT_UNDEFINED) {
/* record lock */
- res_len = ut_snprintf(lock_id, lock_id_size,
- TRX_ID_FMT ":%lu:%lu:%lu",
- row->lock_trx_id, row->lock_space,
- row->lock_page, row->lock_rec);
+ res_len = snprintf(lock_id, lock_id_size,
+ TRX_ID_FMT
+ ":" ULINTPF ":" ULINTPF ":" ULINTPF,
+ row->lock_trx_id, row->lock_space,
+ row->lock_page, row->lock_rec);
} else {
/* table lock */
- res_len = ut_snprintf(lock_id, lock_id_size,
- TRX_ID_FMT ":" UINT64PF,
- row->lock_trx_id,
- row->lock_table_id);
+ res_len = snprintf(lock_id, lock_id_size,
+ TRX_ID_FMT":" UINT64PF,
+ row->lock_trx_id,
+ row->lock_table_id);
}
/* the typecast is safe because snprintf(3) never returns
diff --git a/storage/innobase/trx/trx0purge.cc b/storage/innobase/trx/trx0purge.cc
index bd61bc85961..732435ccefb 100644
--- a/storage/innobase/trx/trx0purge.cc
+++ b/storage/innobase/trx/trx0purge.cc
@@ -25,165 +25,205 @@ Created 3/26/1996 Heikki Tuuri
*******************************************************/
#include "trx0purge.h"
-
-#ifdef UNIV_NONINL
-#include "trx0purge.ic"
-#endif
-
#include "fsp0fsp.h"
-#include "mach0data.h"
-#include "trx0rseg.h"
-#include "trx0trx.h"
-#include "trx0roll.h"
-#include "read0read.h"
#include "fut0fut.h"
+#include "mach0data.h"
+#include "mtr0log.h"
+#include "os0thread.h"
#include "que0que.h"
+#include "read0read.h"
#include "row0purge.h"
#include "row0upd.h"
-#include "trx0rec.h"
+#include "srv0mon.h"
#include "srv0srv.h"
#include "srv0start.h"
-#include "os0thread.h"
-#include "srv0mon.h"
-#include "mtr0log.h"
+#include "sync0sync.h"
+#include "trx0rec.h"
+#include "trx0roll.h"
+#include "trx0rseg.h"
+#include "trx0trx.h"
/** Maximum allowable purge history length. <=0 means 'infinite'. */
-UNIV_INTERN ulong srv_max_purge_lag = 0;
+ulong srv_max_purge_lag = 0;
/** Max DML user threads delay in micro-seconds. */
-UNIV_INTERN ulong srv_max_purge_lag_delay = 0;
+ulong srv_max_purge_lag_delay = 0;
/** The global data structure coordinating a purge */
-UNIV_INTERN trx_purge_t* purge_sys = NULL;
+purge_sys_t* purge_sys;
/** A dummy undo record used as a return value when we have a whole undo log
which needs no purge */
-UNIV_INTERN trx_undo_rec_t trx_purge_dummy_rec;
-
-#ifdef UNIV_PFS_RWLOCK
-/* Key to register trx_purge_latch with performance schema */
-UNIV_INTERN mysql_pfs_key_t trx_purge_latch_key;
-#endif /* UNIV_PFS_RWLOCK */
-
-#ifdef UNIV_PFS_MUTEX
-/* Key to register purge_sys_bh_mutex with performance schema */
-UNIV_INTERN mysql_pfs_key_t purge_sys_bh_mutex_key;
-#endif /* UNIV_PFS_MUTEX */
+trx_undo_rec_t trx_purge_dummy_rec;
#ifdef UNIV_DEBUG
-UNIV_INTERN my_bool srv_purge_view_update_only_debug;
+my_bool srv_purge_view_update_only_debug;
#endif /* UNIV_DEBUG */
-/****************************************************************//**
-Builds a purge 'query' graph. The actual purge is performed by executing
-this query graph.
-@return own: the query graph */
-static
-que_t*
-trx_purge_graph_build(
-/*==================*/
- trx_t* trx, /*!< in: transaction */
- ulint n_purge_threads) /*!< in: number of purge
- threads */
+/** Sentinel value */
+const TrxUndoRsegs TrxUndoRsegsIterator::NullElement(UINT64_UNDEFINED);
+
+/** Constructor */
+TrxUndoRsegsIterator::TrxUndoRsegsIterator()
+ :
+ m_trx_undo_rsegs(NullElement),
+ m_iter(m_trx_undo_rsegs.end())
{
- ulint i;
- mem_heap_t* heap;
- que_fork_t* fork;
+}
- heap = mem_heap_create(512);
- fork = que_fork_create(NULL, NULL, QUE_FORK_PURGE, heap);
- fork->trx = trx;
+/** Sets the next rseg to purge in purge_sys.
+@return whether anything is to be purged */
+inline
+bool
+TrxUndoRsegsIterator::set_next()
+{
+ mutex_enter(&purge_sys->pq_mutex);
- for (i = 0; i < n_purge_threads; ++i) {
- que_thr_t* thr;
+ /* Only purge consumes events from the priority queue, user
+ threads only produce the events. */
- thr = que_thr_create(fork, heap);
+ /* Check if there are more rsegs to process in the
+ current element. */
+ if (m_iter != m_trx_undo_rsegs.end()) {
+
+ /* We are still processing rollback segment from
+ the same transaction and so expected transaction
+ number shouldn't increase. Undo increment of
+ expected trx_no done by caller assuming rollback
+ segments from given transaction are done. */
+ purge_sys->iter.trx_no = (*m_iter)->last_trx_no;
+
+ } else if (!purge_sys->purge_queue.empty()) {
+
+ /* Read the next element from the queue.
+ Combine elements if they have same transaction number.
+ This can happen if a transaction shares redo rollback segment
+ with another transaction that has already added it to purge
+ queue and former transaction also needs to schedule non-redo
+ rollback segment for purge. */
+ m_trx_undo_rsegs = NullElement;
+
+ purge_pq_t& purge_queue = purge_sys->purge_queue;
+
+ while (!purge_queue.empty()) {
+
+ if (m_trx_undo_rsegs.get_trx_no() == UINT64_UNDEFINED) {
+ m_trx_undo_rsegs = purge_queue.top();
+ } else if (purge_queue.top().get_trx_no() ==
+ m_trx_undo_rsegs.get_trx_no()) {
+ m_trx_undo_rsegs.append(
+ purge_queue.top());
+ } else {
+ break;
+ }
- thr->child = row_purge_node_create(thr, heap);
- }
+ purge_queue.pop();
+ }
- return(fork);
-}
+ m_iter = m_trx_undo_rsegs.begin();
-/********************************************************************//**
-Creates the global purge system control structure and inits the history
-mutex. */
-UNIV_INTERN
-void
-trx_purge_sys_create(
-/*=================*/
- ulint n_purge_threads, /*!< in: number of purge
- threads */
- ib_bh_t* ib_bh) /*!< in, own: UNDO log min
- binary heap */
-{
- purge_sys = static_cast<trx_purge_t*>(mem_zalloc(sizeof(*purge_sys)));
+ } else {
+ /* Queue is empty, reset iterator. */
+ m_trx_undo_rsegs = NullElement;
+ m_iter = m_trx_undo_rsegs.end();
- purge_sys->state = PURGE_STATE_INIT;
- purge_sys->event = os_event_create();
+ mutex_exit(&purge_sys->pq_mutex);
- /* Take ownership of ib_bh, we are responsible for freeing it. */
- purge_sys->ib_bh = ib_bh;
+ purge_sys->rseg = NULL;
+ return false;
+ }
- rw_lock_create(trx_purge_latch_key,
- &purge_sys->latch, SYNC_PURGE_LATCH);
+ purge_sys->rseg = *m_iter++;
- mutex_create(
- purge_sys_bh_mutex_key, &purge_sys->bh_mutex,
- SYNC_PURGE_QUEUE);
+ mutex_exit(&purge_sys->pq_mutex);
- purge_sys->heap = mem_heap_create(256);
+ ut_a(purge_sys->rseg != NULL);
- ut_a(n_purge_threads > 0);
+ mutex_enter(&purge_sys->rseg->mutex);
- purge_sys->sess = sess_open();
+ ut_a(purge_sys->rseg->last_page_no != FIL_NULL);
+ ut_ad(purge_sys->rseg->last_trx_no == m_trx_undo_rsegs.get_trx_no());
- purge_sys->trx = purge_sys->sess->trx;
+ /* We assume in purge of externally stored fields that space id is
+ in the range of UNDO tablespace space ids */
+ ut_a(purge_sys->rseg->space == TRX_SYS_SPACE
+ || srv_is_undo_tablespace(purge_sys->rseg->space));
- ut_a(purge_sys->trx->sess == purge_sys->sess);
+ ut_a(purge_sys->iter.trx_no <= purge_sys->rseg->last_trx_no);
- /* A purge transaction is not a real transaction, we use a transaction
- here only because the query threads code requires it. It is otherwise
- quite unnecessary. We should get rid of it eventually. */
- purge_sys->trx->id = 0;
- purge_sys->trx->start_time = time(NULL);
- purge_sys->trx->start_time_micro = microsecond_interval_timer();
- purge_sys->trx->state = TRX_STATE_ACTIVE;
- purge_sys->trx->op_info = "purge trx";
+ purge_sys->iter.trx_no = purge_sys->rseg->last_trx_no;
+ purge_sys->hdr_offset = purge_sys->rseg->last_offset;
+ purge_sys->hdr_page_no = purge_sys->rseg->last_page_no;
- purge_sys->query = trx_purge_graph_build(
- purge_sys->trx, n_purge_threads);
+ mutex_exit(&purge_sys->rseg->mutex);
- purge_sys->view = read_view_purge_open(purge_sys->heap);
+ return(true);
}
-/************************************************************************
-Frees the global purge system control structure. */
-UNIV_INTERN
-void
-trx_purge_sys_close(void)
-/*======================*/
+/** Build a purge 'query' graph. The actual purge is performed by executing
+this query graph.
+@return own: the query graph */
+static
+que_t*
+purge_graph_build()
{
- que_graph_free(purge_sys->query);
-
- ut_a(purge_sys->trx->id == 0);
- ut_a(purge_sys->sess->trx == purge_sys->trx);
-
- purge_sys->trx->state = TRX_STATE_NOT_STARTED;
+ ut_a(srv_n_purge_threads > 0);
- sess_close(purge_sys->sess);
+ trx_t* trx = trx_allocate_for_background();
+ ut_ad(!trx->id);
+ trx->start_time = time(NULL);
+ trx->start_time_micro = microsecond_interval_timer();
+ trx->state = TRX_STATE_ACTIVE;
+ trx->op_info = "purge trx";
- rw_lock_free(&purge_sys->latch);
- mutex_free(&purge_sys->bh_mutex);
+ mem_heap_t* heap = mem_heap_create(512);
+ que_fork_t* fork = que_fork_create(
+ NULL, NULL, QUE_FORK_PURGE, heap);
+ fork->trx = trx;
- mem_heap_free(purge_sys->heap);
+ for (ulint i = 0; i < srv_n_purge_threads; ++i) {
+ que_thr_t* thr = que_thr_create(fork, heap, NULL);
+ thr->child = new(mem_heap_alloc(heap, sizeof(purge_node_t)))
+ purge_node_t(thr);
+ }
- ib_bh_free(purge_sys->ib_bh);
+ return(fork);
+}
- os_event_free(purge_sys->event);
- mem_free(purge_sys);
+/** Construct the purge system. */
+purge_sys_t::purge_sys_t()
+ : latch(), event(os_event_create(0)),
+ n_stop(0), running(false), state(PURGE_STATE_INIT),
+ query(purge_graph_build()),
+ view(), n_submitted(0), n_completed(0),
+ iter(), limit(),
+#ifdef UNIV_DEBUG
+ done(),
+#endif /* UNIV_DEBUG */
+ next_stored(false), rseg(NULL),
+ page_no(0), offset(0), hdr_page_no(0), hdr_offset(0),
+ rseg_iter(), purge_queue(), pq_mutex(), undo_trunc()
+{
+ ut_ad(!purge_sys);
+ rw_lock_create(trx_purge_latch_key, &latch, SYNC_PURGE_LATCH);
+ mutex_create(LATCH_ID_PURGE_SYS_PQ, &pq_mutex);
+}
- purge_sys = NULL;
+/** Destruct the purge system. */
+purge_sys_t::~purge_sys_t()
+{
+ ut_ad(this == purge_sys);
+
+ trx_t* trx = query->trx;
+ que_graph_free(query);
+ ut_ad(!trx->id);
+ ut_ad(trx->state == TRX_STATE_ACTIVE);
+ trx->state = TRX_STATE_NOT_STARTED;
+ trx_free_for_background(trx);
+ view.close();
+ rw_lock_free(&latch);
+ mutex_free(&pq_mutex);
+ os_event_destroy(event);
}
/*================ UNDO LOG HISTORY LIST =============================*/
@@ -191,28 +231,19 @@ trx_purge_sys_close(void)
/********************************************************************//**
Adds the update undo log as the first log in the history list. Removes the
update undo log segment from the rseg slot if it is too big for reuse. */
-UNIV_INTERN
void
trx_purge_add_update_undo_to_history(
/*=================================*/
- trx_t* trx, /*!< in: transaction */
- page_t* undo_page, /*!< in: update undo log header page,
- x-latched */
- mtr_t* mtr) /*!< in: mtr */
+ trx_t* trx, /*!< in: transaction */
+ page_t* undo_page, /*!< in: update undo log header page,
+ x-latched */
+ mtr_t* mtr) /*!< in: mtr */
{
- trx_undo_t* undo;
- trx_rseg_t* rseg;
- trx_rsegf_t* rseg_header;
- trx_ulogf_t* undo_header;
-
- undo = trx->update_undo;
- rseg = undo->rseg;
-
- rseg_header = trx_rsegf_get(
- undo->rseg->space, undo->rseg->zip_size, undo->rseg->page_no,
- mtr);
-
- undo_header = undo_page + undo->hdr_offset;
+ trx_undo_t* undo = trx->rsegs.m_redo.update_undo;
+ trx_rseg_t* rseg = undo->rseg;
+ trx_rsegf_t* rseg_header = trx_rsegf_get(
+ rseg->space, rseg->page_no, mtr);
+ trx_ulogf_t* undo_header = undo_page + undo->hdr_offset;
if (undo->state != TRX_UNDO_CACHED) {
ulint hist_size;
@@ -223,10 +254,7 @@ trx_purge_add_update_undo_to_history(
/* The undo log segment will not be reused */
if (UNIV_UNLIKELY(undo->id >= TRX_RSEG_N_SLOTS)) {
- fprintf(stderr,
- "InnoDB: Error: undo->id is %lu\n",
- (ulong) undo->id);
- ut_error;
+ ib::fatal() << "undo->id is " << undo->id;
}
trx_rsegf_set_nth_undo(rseg_header, undo->id, FIL_NULL, mtr);
@@ -237,7 +265,7 @@ trx_purge_add_update_undo_to_history(
rseg_header + TRX_RSEG_HISTORY_SIZE, MLOG_4BYTES, mtr);
ut_ad(undo->size == flst_get_len(
- seg_header + TRX_UNDO_PAGE_LIST, mtr));
+ seg_header + TRX_UNDO_PAGE_LIST));
mlog_write_ulint(
rseg_header + TRX_RSEG_HISTORY_SIZE,
@@ -252,26 +280,30 @@ trx_purge_add_update_undo_to_history(
Before any transaction-generating background threads or the
purge have been started, recv_recovery_rollback_active() can
start transactions in row_merge_drop_temp_indexes() and
- fts_drop_orphaned_tables(), and roll back recovered transactions. */
+ fts_drop_orphaned_tables(), and roll back recovered transactions.
+
+ Arbitrary user transactions may be executed when all the undo log
+ related background processes (including purge) are disabled due to
+ innodb_force_recovery=2 or innodb_force_recovery=3.
+ DROP TABLE may be executed at any innodb_force_recovery level.
+
+ During fast shutdown, we may also continue to execute
+ user transactions. */
ut_ad(srv_undo_sources
|| trx->undo_no == 0
|| ((srv_is_being_started
|| trx_rollback_or_clean_is_active)
- && purge_sys->state == PURGE_STATE_INIT));
+ && purge_sys->state == PURGE_STATE_INIT)
+ || (srv_force_recovery >= SRV_FORCE_NO_BACKGROUND
+ && purge_sys->state == PURGE_STATE_DISABLED)
+ || ((trx->in_mysql_trx_list || trx->internal)
+ && srv_fast_shutdown));
/* Add the log as the first in the history list */
flst_add_first(rseg_header + TRX_RSEG_HISTORY,
undo_header + TRX_UNDO_HISTORY_NODE, mtr);
-#ifdef HAVE_ATOMIC_BUILTINS
- os_atomic_increment_ulint(&trx_sys->rseg_history_len, 1);
-#else
- mutex_enter(&trx_sys->mutex);
- ++trx_sys->rseg_history_len;
- mutex_exit(&trx_sys->mutex);
-#endif /* HAVE_ATOMIC_BUILTINS */
-
- srv_wake_purge_thread_if_not_active();
+ my_atomic_addlint(&trx_sys->rseg_history_len, 1);
/* Write the trx number to the undo log header */
mlog_write_ull(undo_header + TRX_UNDO_TRX_NO, trx->no, mtr);
@@ -304,24 +336,15 @@ trx_purge_remove_log_hdr(
{
flst_remove(rseg_hdr + TRX_RSEG_HISTORY,
log_hdr + TRX_UNDO_HISTORY_NODE, mtr);
-#ifdef HAVE_ATOMIC_BUILTINS
- os_atomic_decrement_ulint(&trx_sys->rseg_history_len, 1);
-#else
- mutex_enter(&trx_sys->mutex);
- --trx_sys->rseg_history_len;
- mutex_exit(&trx_sys->mutex);
-#endif
+ my_atomic_addlint(&trx_sys->rseg_history_len, -1);
}
-/** Frees an undo log segment which is in the history list. Removes the
-undo log hdr from the history list.
+/** Free an undo log segment, and remove the header from the history list.
@param[in,out] rseg rollback segment
@param[in] hdr_addr file address of log_hdr */
static
void
-trx_purge_free_segment(
- trx_rseg_t* rseg,
- fil_addr_t hdr_addr)
+trx_purge_free_segment(trx_rseg_t* rseg, fil_addr_t hdr_addr)
{
mtr_t mtr;
trx_rsegf_t* rseg_hdr;
@@ -329,9 +352,7 @@ trx_purge_free_segment(
trx_usegf_t* seg_hdr;
ulint seg_size;
ulint hist_size;
- ibool marked = FALSE;
-
- /* fputs("Freeing an update undo log segment\n", stderr); */
+ bool marked = false;
for (;;) {
page_t* undo_page;
@@ -340,11 +361,10 @@ trx_purge_free_segment(
mutex_enter(&rseg->mutex);
- rseg_hdr = trx_rsegf_get(
- rseg->space, rseg->zip_size, rseg->page_no, &mtr);
+ rseg_hdr = trx_rsegf_get(rseg->space, rseg->page_no, &mtr);
undo_page = trx_undo_page_get(
- rseg->space, rseg->zip_size, hdr_addr.page, &mtr);
+ page_id_t(rseg->space, hdr_addr.page), &mtr);
seg_hdr = undo_page + TRX_UNDO_SEG_HDR;
log_hdr = undo_page + hdr_addr.boffset;
@@ -356,15 +376,14 @@ trx_purge_free_segment(
not try to access them again. */
if (!marked) {
+ marked = true;
mlog_write_ulint(
log_hdr + TRX_UNDO_DEL_MARKS, FALSE,
MLOG_2BYTES, &mtr);
-
- marked = TRUE;
}
if (fseg_free_step_not_header(
- seg_hdr + TRX_UNDO_FSEG_HEADER, &mtr)) {
+ seg_hdr + TRX_UNDO_FSEG_HEADER, false, &mtr)) {
break;
}
@@ -378,7 +397,7 @@ trx_purge_free_segment(
stored in the list base node tells us how big it was before we
started the freeing. */
- seg_size = flst_get_len(seg_hdr + TRX_UNDO_PAGE_LIST, &mtr);
+ seg_size = flst_get_len(seg_hdr + TRX_UNDO_PAGE_LIST);
/* We may free the undo log segment header page; it must be freed
within the same mtr as the undo log header is removed from the
@@ -394,7 +413,7 @@ trx_purge_free_segment(
is not flooded with bufferfixed pages: see the note in
fsp0fsp.cc. */
- } while(!fseg_free_step(seg_hdr + TRX_UNDO_FSEG_HEADER, &mtr));
+ } while (!fseg_free_step(seg_hdr + TRX_UNDO_FSEG_HEADER, false, &mtr));
hist_size = mtr_read_ulint(rseg_hdr + TRX_RSEG_HISTORY_SIZE,
MLOG_4BYTES, &mtr);
@@ -412,14 +431,12 @@ trx_purge_free_segment(
mtr_commit(&mtr);
}
-/********************************************************************//**
-Removes unnecessary history data from a rollback segment. */
+/** Remove unnecessary history data from a rollback segment.
+@param[in,out] rseg rollback segment
+@param[in] limit truncate offset */
static
void
-trx_purge_truncate_rseg_history(
-/*============================*/
- trx_rseg_t* rseg, /*!< in: rollback segment */
- const purge_iter_t* limit) /*!< in: truncate offset */
+trx_purge_truncate_rseg_history(trx_rseg_t* rseg, const purge_iter_t* limit)
{
fil_addr_t hdr_addr;
fil_addr_t prev_hdr_addr;
@@ -431,10 +448,10 @@ trx_purge_truncate_rseg_history(
trx_id_t undo_trx_no;
mtr_start(&mtr);
+ ut_ad(rseg->is_persistent());
mutex_enter(&(rseg->mutex));
- rseg_hdr = trx_rsegf_get(rseg->space, rseg->zip_size,
- rseg->page_no, &mtr);
+ rseg_hdr = trx_rsegf_get(rseg->space, rseg->page_no, &mtr);
hdr_addr = trx_purge_get_log_from_hist(
flst_get_last(rseg_hdr + TRX_RSEG_HISTORY, &mtr));
@@ -448,8 +465,8 @@ loop:
return;
}
- undo_page = trx_undo_page_get(rseg->space, rseg->zip_size,
- hdr_addr.page, &mtr);
+ undo_page = trx_undo_page_get(page_id_t(rseg->space, hdr_addr.page),
+ &mtr);
log_hdr = undo_page + hdr_addr.boffset;
@@ -457,10 +474,14 @@ loop:
if (undo_trx_no >= limit->trx_no) {
- if (undo_trx_no == limit->trx_no) {
+ /* limit space_id should match the rollback segment
+ space id to avoid freeing of the page belongs to
+ different rollback segment for the same trx_no. */
+ if (undo_trx_no == limit->trx_no
+ && rseg->space == limit->undo_rseg_space) {
trx_undo_truncate_start(
- rseg, rseg->space, hdr_addr.page,
+ rseg, hdr_addr.page,
hdr_addr.boffset, limit->undo_no);
}
@@ -486,7 +507,6 @@ loop:
/* calls the trx_purge_remove_log_hdr()
inside trx_purge_free_segment(). */
trx_purge_free_segment(rseg, hdr_addr);
-
} else {
/* Remove the log hdr from the rseg history. */
trx_purge_remove_log_hdr(rseg_hdr, log_hdr, &mtr);
@@ -498,14 +518,601 @@ loop:
mtr_start(&mtr);
mutex_enter(&(rseg->mutex));
- rseg_hdr = trx_rsegf_get(rseg->space, rseg->zip_size,
- rseg->page_no, &mtr);
+ rseg_hdr = trx_rsegf_get(rseg->space, rseg->page_no, &mtr);
hdr_addr = prev_hdr_addr;
goto loop;
}
+/** UNDO log truncate logger. Needed to track state of truncate during crash.
+An auxiliary redo log file undo_<space_id>_trunc.log will created while the
+truncate of the UNDO is in progress. This file is required during recovery
+to complete the truncate. */
+
+namespace undo {
+ /** Magic Number to indicate truncate action is complete. */
+ static const ib_uint32_t s_magic = 76845412;
+
+ /** Populate log file name based on space_id
+ @param[in] space_id id of the undo tablespace.
+ @return DB_SUCCESS or error code */
+ static dberr_t populate_log_file_name(
+ ulint space_id,
+ char*& log_file_name)
+ {
+ static const char s_log_prefix[] = "undo_";
+ static const char s_log_ext[] = "trunc.log";
+
+ ulint log_file_name_sz = strlen(srv_log_group_home_dir)
+ + (22 - 1 /* NUL */
+ + sizeof s_log_prefix + sizeof s_log_ext);
+
+ log_file_name = new (std::nothrow) char[log_file_name_sz];
+ if (log_file_name == 0) {
+ return(DB_OUT_OF_MEMORY);
+ }
+
+ memset(log_file_name, 0, log_file_name_sz);
+
+ strcpy(log_file_name, srv_log_group_home_dir);
+ ulint log_file_name_len = strlen(log_file_name);
+
+ if (log_file_name[log_file_name_len - 1]
+ != OS_PATH_SEPARATOR) {
+
+ log_file_name[log_file_name_len]
+ = OS_PATH_SEPARATOR;
+ log_file_name_len = strlen(log_file_name);
+ }
+
+ snprintf(log_file_name + log_file_name_len,
+ log_file_name_sz - log_file_name_len,
+ "%s" ULINTPF "_%s", s_log_prefix,
+ space_id, s_log_ext);
+
+ return(DB_SUCCESS);
+ }
+
+ /** Mark completion of undo truncate action by writing magic number to
+ the log file and then removing it from the disk.
+ If we are going to remove it from disk then why write magic number ?
+ This is to safeguard from unlink (file-system) anomalies that will keep
+ the link to the file even after unlink action is successfull and
+ ref-count = 0.
+ @param[in] space_id id of the undo tablespace to truncate.*/
+ void done(
+ ulint space_id)
+ {
+ dberr_t err;
+ char* log_file_name;
+
+ /* Step-1: Create the log file name using the pre-decided
+ prefix/suffix and table id of undo tablepsace to truncate. */
+ err = populate_log_file_name(space_id, log_file_name);
+ if (err != DB_SUCCESS) {
+ return;
+ }
+
+ /* Step-2: Open log file and write magic number to
+ indicate done phase. */
+ bool ret;
+ os_file_t handle =
+ os_file_create_simple_no_error_handling(
+ innodb_log_file_key, log_file_name,
+ OS_FILE_OPEN, OS_FILE_READ_WRITE,
+ srv_read_only_mode, &ret);
+
+ if (!ret) {
+ os_file_delete(innodb_log_file_key, log_file_name);
+ delete[] log_file_name;
+ return;
+ }
+
+ ulint sz = UNIV_PAGE_SIZE;
+ void* buf = ut_zalloc_nokey(sz + UNIV_PAGE_SIZE);
+ if (buf == NULL) {
+ os_file_close(handle);
+ os_file_delete(innodb_log_file_key, log_file_name);
+ delete[] log_file_name;
+ return;
+ }
+
+ byte* log_buf = static_cast<byte*>(
+ ut_align(buf, UNIV_PAGE_SIZE));
+
+ mach_write_to_4(log_buf, undo::s_magic);
+
+ IORequest request(IORequest::WRITE);
+
+ err = os_file_write(
+ request, log_file_name, handle, log_buf, 0, sz);
+
+ ut_ad(err == DB_SUCCESS);
+
+ os_file_flush(handle);
+ os_file_close(handle);
+
+ ut_free(buf);
+ os_file_delete(innodb_log_file_key, log_file_name);
+ delete[] log_file_name;
+ }
+
+ /** Check if TRUNCATE_DDL_LOG file exist.
+ @param[in] space_id id of the undo tablespace.
+ @return true if exist else false. */
+ bool is_log_present(
+ ulint space_id)
+ {
+ dberr_t err;
+ char* log_file_name;
+
+ /* Step-1: Populate log file name. */
+ err = populate_log_file_name(space_id, log_file_name);
+ if (err != DB_SUCCESS) {
+ return(false);
+ }
+
+ /* Step-2: Check for existence of the file. */
+ bool exist;
+ os_file_type_t type;
+ os_file_status(log_file_name, &exist, &type);
+
+ /* Step-3: If file exists, check it for presence of magic
+ number. If found, then delete the file and report file
+ doesn't exist as presence of magic number suggest that
+ truncate action was complete. */
+
+ if (exist) {
+ bool ret;
+ os_file_t handle =
+ os_file_create_simple_no_error_handling(
+ innodb_log_file_key, log_file_name,
+ OS_FILE_OPEN, OS_FILE_READ_WRITE,
+ srv_read_only_mode, &ret);
+ if (!ret) {
+ os_file_delete(innodb_log_file_key,
+ log_file_name);
+ delete[] log_file_name;
+ return(false);
+ }
+
+ ulint sz = UNIV_PAGE_SIZE;
+ void* buf = ut_zalloc_nokey(sz + UNIV_PAGE_SIZE);
+ if (buf == NULL) {
+ os_file_close(handle);
+ os_file_delete(innodb_log_file_key,
+ log_file_name);
+ delete[] log_file_name;
+ return(false);
+ }
+
+ byte* log_buf = static_cast<byte*>(
+ ut_align(buf, UNIV_PAGE_SIZE));
+
+ IORequest request(IORequest::READ);
+
+ dberr_t err;
+
+ err = os_file_read(request, handle, log_buf, 0, sz);
+
+ os_file_close(handle);
+
+ if (err != DB_SUCCESS) {
+
+ ib::info()
+ << "Unable to read '"
+ << log_file_name << "' : "
+ << ut_strerr(err);
+
+ os_file_delete(
+ innodb_log_file_key, log_file_name);
+
+ ut_free(buf);
+
+ delete[] log_file_name;
+
+ return(false);
+ }
+
+ ulint magic_no = mach_read_from_4(log_buf);
+
+ ut_free(buf);
+
+ if (magic_no == undo::s_magic) {
+ /* Found magic number. */
+ os_file_delete(innodb_log_file_key,
+ log_file_name);
+ delete[] log_file_name;
+ return(false);
+ }
+ }
+
+ delete[] log_file_name;
+
+ return(exist);
+ }
+};
+
+/** Iterate over all the UNDO tablespaces and check if any of the UNDO
+tablespace qualifies for TRUNCATE (size > threshold).
+@param[in,out] undo_trunc undo truncate tracker */
+static
+void
+trx_purge_mark_undo_for_truncate(
+ undo::Truncate* undo_trunc)
+{
+ /* Step-1: If UNDO Tablespace
+ - already marked for truncate (OR)
+ - truncate disabled
+ return immediately else search for qualifying tablespace. */
+ if (undo_trunc->is_marked() || !srv_undo_log_truncate) {
+ return;
+ }
+
+ /* Step-2: Validation/Qualification checks
+ a. At-least 2 UNDO tablespaces so even if one UNDO tablespace
+ is being truncated server can continue to operate.
+ b. At-least 2 persistent UNDO logs (besides the default rseg-0)
+ b. At-least 1 UNDO tablespace size > threshold. */
+ if (srv_undo_tablespaces_active < 2 || srv_undo_logs < 3) {
+ return;
+ }
+
+ /* Avoid bias selection and so start the scan from immediate next
+ of last selected UNDO tablespace for truncate. */
+ ulint space_id = undo_trunc->get_scan_start();
+
+ for (ulint i = 1; i <= srv_undo_tablespaces_active; i++) {
+
+ if (fil_space_get_size(space_id)
+ > (srv_max_undo_log_size / srv_page_size)) {
+ /* Tablespace qualifies for truncate. */
+ undo_trunc->mark(space_id);
+ undo::Truncate::add_space_to_trunc_list(space_id);
+ break;
+ }
+
+ space_id = ((space_id + 1) % (srv_undo_tablespaces_active + 1));
+ if (space_id == 0) {
+ /* Note: UNDO tablespace ids starts from 1. */
+ ++space_id;
+ }
+ }
+
+ /* Couldn't make any selection. */
+ if (!undo_trunc->is_marked()) {
+ return;
+ }
+
+ DBUG_LOG("undo",
+ "marking for truncate UNDO tablespace "
+ << undo_trunc->get_marked_space_id());
+
+ /* Step-3: Iterate over all the rsegs of selected UNDO tablespace
+ and mark them temporarily unavailable for allocation.*/
+ for (ulint i = 0; i < TRX_SYS_N_RSEGS; ++i) {
+ if (trx_rseg_t* rseg = trx_sys->rseg_array[i]) {
+ ut_ad(rseg->is_persistent());
+ if (rseg->space == undo_trunc->get_marked_space_id()) {
+
+ /* Once set this rseg will not be allocated
+ to new booting transaction but we will wait
+ for existing active transaction to finish. */
+ rseg->skip_allocation = true;
+ undo_trunc->add_rseg_to_trunc(rseg);
+ }
+ }
+ }
+}
+
+undo::undo_spaces_t undo::Truncate::s_spaces_to_truncate;
+
+/** Cleanse purge queue to remove the rseg that reside in undo-tablespace
+marked for truncate.
+@param[in,out] undo_trunc undo truncate tracker */
+static
+void
+trx_purge_cleanse_purge_queue(
+ undo::Truncate* undo_trunc)
+{
+ mutex_enter(&purge_sys->pq_mutex);
+ typedef std::vector<TrxUndoRsegs> purge_elem_list_t;
+ purge_elem_list_t purge_elem_list;
+
+ /* Remove rseg instances that are in the purge queue before we start
+ truncate of corresponding UNDO truncate. */
+ while (!purge_sys->purge_queue.empty()) {
+ purge_elem_list.push_back(purge_sys->purge_queue.top());
+ purge_sys->purge_queue.pop();
+ }
+ ut_ad(purge_sys->purge_queue.empty());
+
+ for (purge_elem_list_t::iterator it = purge_elem_list.begin();
+ it != purge_elem_list.end();
+ ++it) {
+
+ for (TrxUndoRsegs::iterator it2 = it->begin();
+ it2 != it->end();
+ ++it2) {
+
+ if ((*it2)->space
+ == undo_trunc->get_marked_space_id()) {
+ it->erase(it2);
+ break;
+ }
+ }
+
+ if (it->size()) {
+ /* size != 0 suggest that there exist other rsegs that
+ needs processing so add this element to purge queue.
+ Note: Other rseg could be non-redo rsegs. */
+ purge_sys->purge_queue.push(*it);
+ }
+ }
+ mutex_exit(&purge_sys->pq_mutex);
+}
+
+/** Iterate over selected UNDO tablespace and check if all the rsegs
+that resides in the tablespace are free.
+@param[in] limit truncate_limit
+@param[in,out] undo_trunc undo truncate tracker */
+static
+void
+trx_purge_initiate_truncate(
+ purge_iter_t* limit,
+ undo::Truncate* undo_trunc)
+{
+ /* Step-1: Early check to findout if any of the the UNDO tablespace
+ is marked for truncate. */
+ if (!undo_trunc->is_marked()) {
+ /* No tablespace marked for truncate yet. */
+ return;
+ }
+
+ /* Step-2: Scan over each rseg and ensure that it doesn't hold any
+ active undo records. */
+ bool all_free = true;
+
+ for (ulint i = 0; i < undo_trunc->rsegs_size() && all_free; ++i) {
+
+ trx_rseg_t* rseg = undo_trunc->get_ith_rseg(i);
+
+ mutex_enter(&rseg->mutex);
+
+ if (rseg->trx_ref_count > 0) {
+ /* This rseg is still being held by an active
+ transaction. */
+ all_free = false;
+ mutex_exit(&rseg->mutex);
+ continue;
+ }
+
+ ut_ad(rseg->trx_ref_count == 0);
+ ut_ad(rseg->skip_allocation);
+
+ ulint size_of_rsegs = rseg->curr_size;
+
+ if (size_of_rsegs == 1) {
+ mutex_exit(&rseg->mutex);
+ continue;
+ } else {
+
+ /* There could be cached undo segment. Check if records
+ in these segments can be purged. Normal purge history
+ will not touch these cached segment. */
+ ulint cached_undo_size = 0;
+
+ for (trx_undo_t* undo =
+ UT_LIST_GET_FIRST(rseg->update_undo_cached);
+ undo != NULL && all_free;
+ undo = UT_LIST_GET_NEXT(undo_list, undo)) {
+
+ if (limit->trx_no < undo->trx_id) {
+ all_free = false;
+ } else {
+ cached_undo_size += undo->size;
+ }
+ }
+
+ for (trx_undo_t* undo =
+ UT_LIST_GET_FIRST(rseg->insert_undo_cached);
+ undo != NULL && all_free;
+ undo = UT_LIST_GET_NEXT(undo_list, undo)) {
+
+ if (limit->trx_no < undo->trx_id) {
+ all_free = false;
+ } else {
+ cached_undo_size += undo->size;
+ }
+ }
+
+ ut_ad(size_of_rsegs >= (cached_undo_size + 1));
+
+ if (size_of_rsegs > (cached_undo_size + 1)) {
+ /* There are pages besides cached pages that
+ still hold active data. */
+ all_free = false;
+ }
+ }
+
+ mutex_exit(&rseg->mutex);
+ }
+
+ if (!all_free) {
+ /* rseg still holds active data.*/
+ return;
+ }
+
+
+ /* Step-3: Start the actual truncate.
+ a. Remove rseg instance if added to purge queue before we
+ initiate truncate.
+ b. Execute actual truncate */
+
+ const ulint space_id = undo_trunc->get_marked_space_id();
+
+ ib::info() << "Truncating UNDO tablespace " << space_id;
+
+ trx_purge_cleanse_purge_queue(undo_trunc);
+
+ ut_a(srv_is_undo_tablespace(space_id));
+
+ /* Flush all to-be-discarded pages of the tablespace.
+
+ During truncation, we do not want any writes to the
+ to-be-discarded area, because we must set the space->size
+ early in order to have deterministic page allocation.
+
+ If a log checkpoint was completed at LSN earlier than our
+ mini-transaction commit and the server was killed, then
+ discarding the to-be-trimmed pages without flushing would
+ break crash recovery. So, we cannot avoid the write. */
+ {
+ FlushObserver observer(
+ space_id,
+ UT_LIST_GET_FIRST(purge_sys->query->thrs)->graph->trx,
+ NULL);
+ buf_LRU_flush_or_remove_pages(space_id, &observer);
+ }
+
+ log_free_check();
+
+ /* Adjust the tablespace metadata. */
+ fil_space_t* space = fil_truncate_prepare(space_id);
+
+ if (!space) {
+ ib::error() << "Failed to find UNDO tablespace " << space_id;
+ return;
+ }
+
+ /* Undo tablespace always are a single file. */
+ ut_a(UT_LIST_GET_LEN(space->chain) == 1);
+ fil_node_t* file = UT_LIST_GET_FIRST(space->chain);
+ /* The undo tablespace files are never closed. */
+ ut_ad(file->is_open());
+
+ /* Re-initialize tablespace, in a single mini-transaction. */
+ mtr_t mtr;
+ const ulint size = SRV_UNDO_TABLESPACE_SIZE_IN_PAGES;
+ mtr.start();
+ mtr_x_lock(&space->latch, &mtr);
+ fil_truncate_log(space, size, &mtr);
+ fsp_header_init(space_id, size, &mtr);
+ mutex_enter(&fil_system->mutex);
+ space->size = file->size = size;
+ mutex_exit(&fil_system->mutex);
+
+ for (ulint i = 0; i < undo_trunc->rsegs_size(); ++i) {
+ trx_rseg_t* rseg = undo_trunc->get_ith_rseg(i);
+
+ buf_block_t* rblock = trx_rseg_header_create(
+ space_id, ULINT_MAX, rseg->id, &mtr);
+ ut_ad(rblock);
+ rseg->page_no = rblock ? rblock->page.id.page_no() : FIL_NULL;
+
+ /* Before re-initialization ensure that we free the existing
+ structure. There can't be any active transactions. */
+ ut_a(UT_LIST_GET_LEN(rseg->update_undo_list) == 0);
+ ut_a(UT_LIST_GET_LEN(rseg->insert_undo_list) == 0);
+
+ trx_undo_t* next_undo;
+
+ for (trx_undo_t* undo =
+ UT_LIST_GET_FIRST(rseg->update_undo_cached);
+ undo != NULL;
+ undo = next_undo) {
+
+ next_undo = UT_LIST_GET_NEXT(undo_list, undo);
+ UT_LIST_REMOVE(rseg->update_undo_cached, undo);
+ MONITOR_DEC(MONITOR_NUM_UNDO_SLOT_CACHED);
+ trx_undo_mem_free(undo);
+ }
+
+ for (trx_undo_t* undo =
+ UT_LIST_GET_FIRST(rseg->insert_undo_cached);
+ undo != NULL;
+ undo = next_undo) {
+
+ next_undo = UT_LIST_GET_NEXT(undo_list, undo);
+ UT_LIST_REMOVE(rseg->insert_undo_cached, undo);
+ MONITOR_DEC(MONITOR_NUM_UNDO_SLOT_CACHED);
+ trx_undo_mem_free(undo);
+ }
+
+ UT_LIST_INIT(rseg->update_undo_list, &trx_undo_t::undo_list);
+ UT_LIST_INIT(rseg->update_undo_cached, &trx_undo_t::undo_list);
+ UT_LIST_INIT(rseg->insert_undo_list, &trx_undo_t::undo_list);
+ UT_LIST_INIT(rseg->insert_undo_cached, &trx_undo_t::undo_list);
+
+ /* These were written by trx_rseg_header_create(). */
+ ut_ad(mach_read_from_4(TRX_RSEG + TRX_RSEG_MAX_SIZE
+ + rblock->frame)
+ == uint32_t(rseg->max_size));
+ ut_ad(!mach_read_from_4(TRX_RSEG + TRX_RSEG_HISTORY_SIZE
+ + rblock->frame));
+
+ rseg->max_size = ULINT_MAX;
+
+ /* Initialize the undo log lists according to the rseg header */
+ rseg->curr_size = 1;
+ rseg->trx_ref_count = 0;
+ rseg->last_page_no = FIL_NULL;
+ rseg->last_offset = 0;
+ rseg->last_trx_no = 0;
+ rseg->last_del_marks = FALSE;
+ }
+
+ mtr.commit();
+ /* Write-ahead the redo log record. */
+ log_write_up_to(mtr.commit_lsn(), true);
+
+ /* Trim the file size. */
+ os_file_truncate(file->name, file->handle,
+ os_offset_t(size) << srv_page_size_shift, true);
+
+ /* This is only executed by the srv_purge_coordinator_thread. */
+ export_vars.innodb_undo_truncations++;
+
+ /* TODO: PUNCH_HOLE the garbage (with write-ahead logging) */
+
+ mutex_enter(&fil_system->mutex);
+ ut_ad(space->stop_new_ops);
+ ut_ad(space->is_being_truncated);
+ space->stop_new_ops = false;
+ space->is_being_truncated = false;
+ mutex_exit(&fil_system->mutex);
+
+ if (purge_sys->rseg != NULL
+ && purge_sys->rseg->last_page_no == FIL_NULL) {
+ /* If purge_sys->rseg is pointing to rseg that was recently
+ truncated then move to next rseg element.
+ Note: Ideally purge_sys->rseg should be NULL because purge
+ should complete processing of all the records but there is
+ purge_batch_size that can force the purge loop to exit before
+ all the records are purged and in this case purge_sys->rseg
+ could point to a valid rseg waiting for next purge cycle. */
+ purge_sys->next_stored = false;
+ purge_sys->rseg = NULL;
+ }
+
+ DBUG_EXECUTE_IF("ib_undo_trunc",
+ ib::info() << "ib_undo_trunc";
+ log_write_up_to(LSN_MAX, true);
+ DBUG_SUICIDE(););
+
+ /* Completed truncate. Now it is safe to re-use the tablespace. */
+ for (ulint i = 0; i < undo_trunc->rsegs_size(); ++i) {
+ trx_rseg_t* rseg = undo_trunc->get_ith_rseg(i);
+ rseg->skip_allocation = false;
+ }
+
+ ib::info() << "Truncated UNDO tablespace " << space_id;
+
+ undo_trunc->reset();
+ undo::Truncate::clear_trunc_list();
+}
+
/********************************************************************//**
Removes unnecessary history data from rollback segments. NOTE that when this
function is called, the caller must not have any latches on undo log pages! */
@@ -514,21 +1121,22 @@ void
trx_purge_truncate_history(
/*========================*/
purge_iter_t* limit, /*!< in: truncate limit */
- const read_view_t* view) /*!< in: purge view */
+ const ReadView* view) /*!< in: purge view */
{
- ulint i;
+ ut_ad(trx_purge_check_limit());
/* We play safe and set the truncate limit at most to the purge view
low_limit number, though this is not necessary */
- if (limit->trx_no >= view->low_limit_no) {
- limit->trx_no = view->low_limit_no;
+ if (limit->trx_no >= view->low_limit_no()) {
+ limit->trx_no = view->low_limit_no();
limit->undo_no = 0;
+ limit->undo_rseg_space = ULINT_UNDEFINED;
}
- ut_ad(limit->trx_no <= purge_sys->view->low_limit_no);
+ ut_ad(limit->trx_no <= purge_sys->view.low_limit_no());
- for (i = 0; i < TRX_SYS_N_RSEGS; ++i) {
+ for (ulint i = 0; i < TRX_SYS_N_RSEGS; ++i) {
trx_rseg_t* rseg = trx_sys->rseg_array[i];
if (rseg != NULL) {
@@ -536,6 +1144,14 @@ trx_purge_truncate_history(
trx_purge_truncate_rseg_history(rseg, limit);
}
}
+
+ /* UNDO tablespace truncate. We will try to truncate as much as we
+ can (greedy approach). This will ensure when the server is idle we
+ try and truncate all the UNDO tablespaces. */
+ for (ulint i = srv_undo_tablespaces_active; i--; ) {
+ trx_purge_mark_undo_for_truncate(&purge_sys->undo_trunc);
+ trx_purge_initiate_truncate(limit, &purge_sys->undo_trunc);
+ }
}
/***********************************************************************//**
@@ -549,14 +1165,12 @@ trx_purge_rseg_get_next_history_log(
ulint* n_pages_handled)/*!< in/out: number of UNDO pages
handled */
{
- const void* ptr;
page_t* undo_page;
trx_ulogf_t* log_hdr;
fil_addr_t prev_log_addr;
trx_id_t trx_no;
ibool del_marks;
mtr_t mtr;
- rseg_queue_t rseg_queue;
mutex_enter(&(rseg->mutex));
@@ -564,12 +1178,13 @@ trx_purge_rseg_get_next_history_log(
purge_sys->iter.trx_no = rseg->last_trx_no + 1;
purge_sys->iter.undo_no = 0;
- purge_sys->next_stored = FALSE;
+ purge_sys->iter.undo_rseg_space = ULINT_UNDEFINED;
+ purge_sys->next_stored = false;
mtr_start(&mtr);
undo_page = trx_undo_page_get_s_latched(
- rseg->space, rseg->zip_size, rseg->last_page_no, &mtr);
+ page_id_t(rseg->space, rseg->last_page_no), &mtr);
log_hdr = undo_page + rseg->last_offset;
@@ -597,8 +1212,9 @@ trx_purge_rseg_get_next_history_log(
/* Read the trx number and del marks from the previous log header */
mtr_start(&mtr);
- log_hdr = trx_undo_page_get_s_latched(rseg->space, rseg->zip_size,
- prev_log_addr.page, &mtr)
+ log_hdr = trx_undo_page_get_s_latched(page_id_t(rseg->space,
+ prev_log_addr.page),
+ &mtr)
+ prev_log_addr.boffset;
trx_no = mach_read_from_8(log_hdr + TRX_UNDO_TRX_NO);
@@ -614,95 +1230,32 @@ trx_purge_rseg_get_next_history_log(
rseg->last_trx_no = trx_no;
rseg->last_del_marks = del_marks;
- rseg_queue.rseg = rseg;
- rseg_queue.trx_no = rseg->last_trx_no;
+ TrxUndoRsegs elem(rseg->last_trx_no);
+ elem.push_back(rseg);
/* Purge can also produce events, however these are already ordered
in the rollback segment and any user generated event will be greater
than the events that Purge produces. ie. Purge can never produce
events from an empty rollback segment. */
- mutex_enter(&purge_sys->bh_mutex);
+ mutex_enter(&purge_sys->pq_mutex);
- ptr = ib_bh_push(purge_sys->ib_bh, &rseg_queue);
- ut_a(ptr != NULL);
+ purge_sys->purge_queue.push(elem);
- mutex_exit(&purge_sys->bh_mutex);
+ mutex_exit(&purge_sys->pq_mutex);
mutex_exit(&rseg->mutex);
}
-/***********************************************************************//**
-Chooses the rollback segment with the smallest trx_id.
-@return zip_size if log is for a compressed table, ULINT_UNDEFINED if
- no rollback segments to purge, 0 for non compressed tables. */
-static
-ulint
-trx_purge_get_rseg_with_min_trx_id(
-/*===============================*/
- trx_purge_t* purge_sys) /*!< in/out: purge instance */
-
-{
- ulint zip_size = 0;
-
- mutex_enter(&purge_sys->bh_mutex);
-
- /* Only purge consumes events from the binary heap, user
- threads only produce the events. */
-
- if (!ib_bh_is_empty(purge_sys->ib_bh)) {
- trx_rseg_t* rseg;
-
- rseg = ((rseg_queue_t*) ib_bh_first(purge_sys->ib_bh))->rseg;
- ib_bh_pop(purge_sys->ib_bh);
-
- mutex_exit(&purge_sys->bh_mutex);
-
- purge_sys->rseg = rseg;
- } else {
- mutex_exit(&purge_sys->bh_mutex);
-
- purge_sys->rseg = NULL;
-
- return(ULINT_UNDEFINED);
- }
-
- ut_a(purge_sys->rseg != NULL);
-
- mutex_enter(&purge_sys->rseg->mutex);
-
- ut_a(purge_sys->rseg->last_page_no != FIL_NULL);
-
- /* We assume in purge of externally stored fields that space id is
- in the range of UNDO tablespace space ids */
- ut_a(purge_sys->rseg->space == 0
- || srv_is_undo_tablespace(purge_sys->rseg->space));
-
- zip_size = purge_sys->rseg->zip_size;
-
- ut_a(purge_sys->iter.trx_no <= purge_sys->rseg->last_trx_no);
-
- purge_sys->iter.trx_no = purge_sys->rseg->last_trx_no;
- purge_sys->hdr_offset = purge_sys->rseg->last_offset;
- purge_sys->hdr_page_no = purge_sys->rseg->last_page_no;
-
- mutex_exit(&purge_sys->rseg->mutex);
-
- return(zip_size);
-}
-
-/***********************************************************************//**
-Position the purge sys "iterator" on the undo record to use for purging. */
+/** Position the purge sys "iterator" on the undo record to use for purging. */
static
void
-trx_purge_read_undo_rec(
-/*====================*/
- trx_purge_t* purge_sys, /*!< in/out: purge instance */
- ulint zip_size) /*!< in: block size or 0 */
+trx_purge_read_undo_rec()
{
ulint offset;
ulint page_no;
ib_uint64_t undo_no;
+ ulint undo_rseg_space;
purge_sys->hdr_offset = purge_sys->rseg->last_offset;
page_no = purge_sys->hdr_page_no = purge_sys->rseg->last_page_no;
@@ -715,30 +1268,33 @@ trx_purge_read_undo_rec(
undo_rec = trx_undo_get_first_rec(
purge_sys->rseg->space,
- zip_size,
purge_sys->hdr_page_no,
purge_sys->hdr_offset, RW_S_LATCH, &mtr);
if (undo_rec != NULL) {
offset = page_offset(undo_rec);
undo_no = trx_undo_rec_get_undo_no(undo_rec);
+ undo_rseg_space = purge_sys->rseg->space;
page_no = page_get_page_no(page_align(undo_rec));
} else {
offset = 0;
undo_no = 0;
+ undo_rseg_space = ULINT_UNDEFINED;
}
mtr_commit(&mtr);
} else {
offset = 0;
undo_no = 0;
+ undo_rseg_space = ULINT_UNDEFINED;
}
purge_sys->offset = offset;
purge_sys->page_no = page_no;
purge_sys->iter.undo_no = undo_no;
+ purge_sys->iter.undo_rseg_space = undo_rseg_space;
- purge_sys->next_stored = TRUE;
+ purge_sys->next_stored = true;
}
/***********************************************************************//**
@@ -751,14 +1307,10 @@ void
trx_purge_choose_next_log(void)
/*===========================*/
{
- ulint zip_size;
+ ut_ad(!purge_sys->next_stored);
- ut_ad(purge_sys->next_stored == FALSE);
-
- zip_size = trx_purge_get_rseg_with_min_trx_id(purge_sys);
-
- if (purge_sys->rseg != NULL) {
- trx_purge_read_undo_rec(purge_sys, zip_size);
+ if (purge_sys->rseg_iter.set_next()) {
+ trx_purge_read_undo_rec();
} else {
/* There is nothing to do yet. */
os_thread_yield();
@@ -767,7 +1319,7 @@ trx_purge_choose_next_log(void)
/***********************************************************************//**
Gets the next record to purge and updates the info in the purge system.
-@return copy of an undo log record or pointer to the dummy undo log record */
+@return copy of an undo log record or pointer to the dummy undo log record */
static
trx_undo_rec_t*
trx_purge_get_next_rec(
@@ -784,14 +1336,12 @@ trx_purge_get_next_rec(
ulint offset;
ulint page_no;
ulint space;
- ulint zip_size;
mtr_t mtr;
ut_ad(purge_sys->next_stored);
- ut_ad(purge_sys->iter.trx_no < purge_sys->view->low_limit_no);
+ ut_ad(purge_sys->iter.trx_no < purge_sys->view.low_limit_no());
space = purge_sys->rseg->space;
- zip_size = purge_sys->rseg->zip_size;
page_no = purge_sys->page_no;
offset = purge_sys->offset;
@@ -811,7 +1361,8 @@ trx_purge_get_next_rec(
mtr_start(&mtr);
- undo_page = trx_undo_page_get_s_latched(space, zip_size, page_no, &mtr);
+ undo_page = trx_undo_page_get_s_latched(page_id_t(space, page_no),
+ &mtr);
rec = undo_page + offset;
@@ -869,7 +1420,7 @@ trx_purge_get_next_rec(
mtr_start(&mtr);
undo_page = trx_undo_page_get_s_latched(
- space, zip_size, page_no, &mtr);
+ page_id_t(space, page_no), &mtr);
rec = undo_page + offset;
} else {
@@ -878,6 +1429,7 @@ trx_purge_get_next_rec(
purge_sys->offset = rec2 - page;
purge_sys->page_no = page_get_page_no(page);
purge_sys->iter.undo_no = trx_undo_rec_get_undo_no(rec2);
+ purge_sys->iter.undo_rseg_space = space;
if (undo_page != page) {
/* We advance to a new page of the undo log: */
@@ -897,7 +1449,7 @@ Fetches the next undo log record from the history list to purge. It must be
released with the corresponding release function.
@return copy of an undo log record or pointer to trx_purge_dummy_rec,
if the whole undo log can skipped in purge; NULL if none left */
-static MY_ATTRIBUTE((warn_unused_result, nonnull))
+static MY_ATTRIBUTE((warn_unused_result))
trx_undo_rec_t*
trx_purge_fetch_next_rec(
/*=====================*/
@@ -910,18 +1462,13 @@ trx_purge_fetch_next_rec(
trx_purge_choose_next_log();
if (!purge_sys->next_stored) {
-
- if (srv_print_thread_releases) {
- fprintf(stderr,
- "Purge: No logs left in the"
- " history list\n");
- }
-
+ DBUG_PRINT("ib_purge",
+ ("no logs left in the history list"));
return(NULL);
}
}
- if (purge_sys->iter.trx_no >= purge_sys->view->low_limit_no) {
+ if (purge_sys->iter.trx_no >= purge_sys->view.low_limit_no()) {
return(NULL);
}
@@ -941,25 +1488,26 @@ trx_purge_fetch_next_rec(
/*******************************************************************//**
This function runs a purge batch.
-@return number of undo log pages handled in the batch */
+@return number of undo log pages handled in the batch */
static
ulint
trx_purge_attach_undo_recs(
/*=======================*/
ulint n_purge_threads,/*!< in: number of purge threads */
- trx_purge_t* purge_sys, /*!< in/out: purge instance */
- purge_iter_t* limit, /*!< out: records read up to */
+ purge_sys_t* purge_sys, /*!< in/out: purge instance */
ulint batch_size) /*!< in: no. of pages to purge */
{
que_thr_t* thr;
- ulint i = 0;
+ ulint i;
ulint n_pages_handled = 0;
ulint n_thrs = UT_LIST_GET_LEN(purge_sys->query->thrs);
ut_a(n_purge_threads > 0);
- *limit = purge_sys->iter;
+ purge_sys->limit = purge_sys->iter;
+#ifdef UNIV_DEBUG
+ i = 0;
/* Debug code to validate some pre-requisites and reset done flag. */
for (thr = UT_LIST_GET_FIRST(purge_sys->query->thrs);
thr != NULL && i < n_purge_threads;
@@ -970,16 +1518,16 @@ trx_purge_attach_undo_recs(
/* Get the purge node. */
node = (purge_node_t*) thr->child;
- ut_a(que_node_get_type(node) == QUE_NODE_PURGE);
- ut_a(node->undo_recs == NULL);
- ut_a(node->done);
-
- node->done = FALSE;
+ ut_ad(que_node_get_type(node) == QUE_NODE_PURGE);
+ ut_ad(node->undo_recs == NULL);
+ ut_ad(!node->in_progress);
+ ut_d(node->in_progress = true);
}
/* There should never be fewer nodes than threads, the inverse
however is allowed because we only use purge threads as needed. */
- ut_a(i == n_purge_threads);
+ ut_ad(i == n_purge_threads);
+#endif
/* Fetch and parse the UNDO records. The UNDO records are added
to a per purge node vector. */
@@ -1006,11 +1554,8 @@ trx_purge_attach_undo_recs(
/* Track the max {trx_id, undo_no} for truncating the
UNDO logs once we have purged the records. */
- if (purge_sys->iter.trx_no > limit->trx_no
- || (purge_sys->iter.trx_no == limit->trx_no
- && purge_sys->iter.undo_no >= limit->undo_no)) {
-
- *limit = purge_sys->iter;
+ if (trx_purge_check_limit()) {
+ purge_sys->limit = purge_sys->iter;
}
/* Fetch the next record, and advance the purge_sys->iter. */
@@ -1098,39 +1643,20 @@ static
void
trx_purge_wait_for_workers_to_complete(
/*===================================*/
- trx_purge_t* purge_sys) /*!< in: purge instance */
+ purge_sys_t* purge_sys) /*!< in: purge instance */
{
ulint n_submitted = purge_sys->n_submitted;
-#ifdef HAVE_ATOMIC_BUILTINS
/* Ensure that the work queue empties out. */
- while (!os_compare_and_swap_ulint(
- &purge_sys->n_completed, n_submitted, n_submitted)) {
-#else
- mutex_enter(&purge_sys->bh_mutex);
-
- while (purge_sys->n_completed < n_submitted) {
-#endif /* HAVE_ATOMIC_BUILTINS */
-
-#ifndef HAVE_ATOMIC_BUILTINS
- mutex_exit(&purge_sys->bh_mutex);
-#endif /* !HAVE_ATOMIC_BUILTINS */
+ while ((ulint) my_atomic_loadlint(&purge_sys->n_completed) != n_submitted) {
if (srv_get_task_queue_length() > 0) {
srv_release_threads(SRV_WORKER, 1);
}
os_thread_yield();
-
-#ifndef HAVE_ATOMIC_BUILTINS
- mutex_enter(&purge_sys->bh_mutex);
-#endif /* !HAVE_ATOMIC_BUILTINS */
}
-#ifndef HAVE_ATOMIC_BUILTINS
- mutex_exit(&purge_sys->bh_mutex);
-#endif /* !HAVE_ATOMIC_BUILTINS */
-
/* None of the worker threads should be doing any work. */
ut_a(purge_sys->n_submitted == purge_sys->n_completed);
@@ -1139,26 +1665,9 @@ trx_purge_wait_for_workers_to_complete(
ut_a(srv_get_task_queue_length() == 0);
}
-/******************************************************************//**
-Remove old historical changes from the rollback segments. */
-static
-void
-trx_purge_truncate(void)
-/*====================*/
-{
- ut_ad(trx_purge_check_limit());
-
- if (purge_sys->limit.trx_no == 0) {
- trx_purge_truncate_history(&purge_sys->iter, purge_sys->view);
- } else {
- trx_purge_truncate_history(&purge_sys->limit, purge_sys->view);
- }
-}
-
/*******************************************************************//**
This function runs a purge batch.
-@return number of undo log pages handled in the batch */
-UNIV_INTERN
+@return number of undo log pages handled in the batch */
ulint
trx_purge(
/*======*/
@@ -1166,7 +1675,12 @@ trx_purge(
to submit to the work queue */
ulint batch_size, /*!< in: the maximum number of records
to purge in one batch */
- bool truncate) /*!< in: truncate history if true */
+ bool truncate /*!< in: truncate history if true */
+#ifdef UNIV_DEBUG
+ , srv_slot_t *slot /*!< in/out: purge coordinator
+ thread slot */
+#endif
+)
{
que_thr_t* thr = NULL;
ulint n_pages_handled;
@@ -1179,24 +1693,18 @@ trx_purge(
ut_a(purge_sys->n_submitted == purge_sys->n_completed);
rw_lock_x_lock(&purge_sys->latch);
-
- purge_sys->view = NULL;
-
- mem_heap_empty(purge_sys->heap);
-
- purge_sys->view = read_view_purge_open(purge_sys->heap);
-
+ trx_sys->mvcc->clone_oldest_view(&purge_sys->view);
rw_lock_x_unlock(&purge_sys->latch);
#ifdef UNIV_DEBUG
if (srv_purge_view_update_only_debug) {
return(0);
}
-#endif
+#endif /* UNIV_DEBUG */
/* Fetch the UNDO recs that need to be purged. */
n_pages_handled = trx_purge_attach_undo_recs(
- n_purge_threads, purge_sys, &purge_sys->limit, batch_size);
+ n_purge_threads, purge_sys, batch_size);
/* Do we do an asynchronous purge or not ? */
if (n_purge_threads > 1) {
@@ -1227,10 +1735,11 @@ trx_purge(
run_synchronously:
++purge_sys->n_submitted;
+ ut_d(thr->thread_slot = slot);
que_run_threads(thr);
- os_atomic_inc_ulint(
- &purge_sys->bh_mutex, &purge_sys->n_completed, 1);
+ my_atomic_addlint(
+ &purge_sys->n_completed, 1);
if (n_purge_threads > 1) {
trx_purge_wait_for_workers_to_complete(purge_sys);
@@ -1250,7 +1759,11 @@ run_synchronously:
#endif /* UNIV_DEBUG */
if (truncate) {
- trx_purge_truncate();
+ trx_purge_truncate_history(
+ purge_sys->limit.trx_no
+ ? &purge_sys->limit
+ : &purge_sys->iter,
+ &purge_sys->view);
}
MONITOR_INC_VALUE(MONITOR_PURGE_INVOKED, 1);
@@ -1262,7 +1775,6 @@ run_synchronously:
/*******************************************************************//**
Get the purge state.
@return purge state. */
-UNIV_INTERN
purge_state_t
trx_purge_state(void)
/*=================*/
@@ -1280,58 +1792,52 @@ trx_purge_state(void)
/*******************************************************************//**
Stop purge and wait for it to stop, move to PURGE_STATE_STOP. */
-UNIV_INTERN
void
trx_purge_stop(void)
/*================*/
{
- ut_a(srv_n_purge_threads > 0);
-
rw_lock_x_lock(&purge_sys->latch);
- const ib_int64_t sig_count = os_event_reset(purge_sys->event);
- const purge_state_t state = purge_sys->state;
-
- ut_a(state == PURGE_STATE_RUN || state == PURGE_STATE_STOP);
-
- ++purge_sys->n_stop;
-
- if (state == PURGE_STATE_RUN) {
- ib_logf(IB_LOG_LEVEL_INFO, "Stopping purge");
+ switch (purge_sys->state) {
+ case PURGE_STATE_INIT:
+ case PURGE_STATE_DISABLED:
+ ut_error;
+ case PURGE_STATE_EXIT:
+ /* Shutdown must have been initiated during
+ FLUSH TABLES FOR EXPORT. */
+ ut_ad(!srv_undo_sources);
+unlock:
+ rw_lock_x_unlock(&purge_sys->latch);
+ break;
+ case PURGE_STATE_STOP:
+ ut_ad(srv_n_purge_threads > 0);
+ ++purge_sys->n_stop;
+ purge_sys->state = PURGE_STATE_STOP;
+ if (!purge_sys->running) {
+ goto unlock;
+ }
+ ib::info() << "Waiting for purge to stop";
+ do {
+ rw_lock_x_unlock(&purge_sys->latch);
+ os_thread_sleep(10000);
+ rw_lock_x_lock(&purge_sys->latch);
+ } while (purge_sys->running);
+ goto unlock;
+ case PURGE_STATE_RUN:
+ ut_ad(srv_n_purge_threads > 0);
+ ++purge_sys->n_stop;
+ ib::info() << "Stopping purge";
/* We need to wakeup the purge thread in case it is suspended,
so that it can acknowledge the state change. */
- srv_purge_wakeup();
- }
-
- purge_sys->state = PURGE_STATE_STOP;
-
- if (state != PURGE_STATE_STOP) {
+ const int64_t sig_count = os_event_reset(purge_sys->event);
+ purge_sys->state = PURGE_STATE_STOP;
rw_lock_x_unlock(&purge_sys->latch);
+ srv_purge_wakeup();
/* Wait for purge coordinator to signal that it
is suspended. */
os_event_wait_low(purge_sys->event, sig_count);
- } else {
- bool once = true;
-
- /* Wait for purge to signal that it has actually stopped. */
- while (purge_sys->running) {
-
- if (once) {
- ib_logf(IB_LOG_LEVEL_INFO,
- "Waiting for purge to stop");
- once = false;
- }
-
- rw_lock_x_unlock(&purge_sys->latch);
-
- os_thread_sleep(10000);
-
- rw_lock_x_lock(&purge_sys->latch);
- }
-
- rw_lock_x_unlock(&purge_sys->latch);
}
MONITOR_INC_VALUE(MONITOR_PURGE_STOP_COUNT, 1);
@@ -1339,40 +1845,35 @@ trx_purge_stop(void)
/*******************************************************************//**
Resume purge, move to PURGE_STATE_RUN. */
-UNIV_INTERN
void
trx_purge_run(void)
/*===============*/
{
rw_lock_x_lock(&purge_sys->latch);
- switch(purge_sys->state) {
- case PURGE_STATE_INIT:
+ switch (purge_sys->state) {
case PURGE_STATE_EXIT:
+ /* Shutdown must have been initiated during
+ FLUSH TABLES FOR EXPORT. */
+ ut_ad(!srv_undo_sources);
+ break;
+ case PURGE_STATE_INIT:
case PURGE_STATE_DISABLED:
ut_error;
case PURGE_STATE_RUN:
- case PURGE_STATE_STOP:
+ ut_a(!purge_sys->n_stop);
break;
- }
-
- if (purge_sys->n_stop > 0) {
-
- ut_a(purge_sys->state == PURGE_STATE_STOP);
-
- --purge_sys->n_stop;
-
- if (purge_sys->n_stop == 0) {
+ case PURGE_STATE_STOP:
+ ut_a(purge_sys->n_stop);
+ if (--purge_sys->n_stop == 0) {
- ib_logf(IB_LOG_LEVEL_INFO, "Resuming purge");
+ ib::info() << "Resuming purge";
purge_sys->state = PURGE_STATE_RUN;
}
MONITOR_INC_VALUE(MONITOR_PURGE_RESUME_COUNT, 1);
- } else {
- ut_a(purge_sys->state == PURGE_STATE_RUN);
}
rw_lock_x_unlock(&purge_sys->latch);
diff --git a/storage/innobase/trx/trx0rec.cc b/storage/innobase/trx/trx0rec.cc
index c1cc794ebfe..3e41cb5ad1f 100644
--- a/storage/innobase/trx/trx0rec.cc
+++ b/storage/innobase/trx/trx0rec.cc
@@ -1,7 +1,7 @@
/*****************************************************************************
-Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2017, 2019, MariaDB Corporation.
+Copyright (c) 1996, 2019, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2017, 2020, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -25,16 +25,10 @@ Created 3/26/1996 Heikki Tuuri
*******************************************************/
#include "trx0rec.h"
-
-#ifdef UNIV_NONINL
-#include "trx0rec.ic"
-#endif
-
#include "fsp0fsp.h"
#include "mach0data.h"
#include "trx0undo.h"
#include "mtr0log.h"
-#ifndef UNIV_HOTBACKUP
#include "dict0dict.h"
#include "ut0mem.h"
#include "read0read.h"
@@ -44,6 +38,7 @@ Created 3/26/1996 Heikki Tuuri
#include "trx0purge.h"
#include "trx0rseg.h"
#include "row0row.h"
+#include "row0mysql.h"
/*=========== UNDO LOG RECORD CREATION AND DECODING ====================*/
@@ -86,12 +81,10 @@ trx_undof_page_add_undo_rec_log(
mlog_catenate_string(mtr, undo_page + old_free + 2, len);
}
}
-#endif /* !UNIV_HOTBACKUP */
/***********************************************************//**
Parses a redo log record of adding an undo log record.
-@return end of log record or NULL */
-UNIV_INTERN
+@return end of log record or NULL */
byte*
trx_undo_parse_add_undo_rec(
/*========================*/
@@ -135,10 +128,9 @@ trx_undo_parse_add_undo_rec(
return(ptr + len);
}
-#ifndef UNIV_HOTBACKUP
/**********************************************************************//**
Calculates the free space left for extending an undo log record.
-@return bytes left */
+@return bytes left */
UNIV_INLINE
ulint
trx_undo_left(
@@ -156,7 +148,7 @@ trx_undo_left(
Set the next and previous pointers in the undo page for the undo record
that was written to ptr. Update the first free value by the number of bytes
written for this undo record.
-@return offset of the inserted entry on the page if succeeded, 0 if fail */
+@return offset of the inserted entry on the page if succeeded, 0 if fail */
static
ulint
trx_undo_page_set_next_prev_and_add(
@@ -204,9 +196,260 @@ trx_undo_page_set_next_prev_and_add(
return(first_free);
}
+/** Virtual column undo log version. To distinguish it from a length value
+in 5.7.8 undo log, it starts with 0xF1 */
+static const ulint VIRTUAL_COL_UNDO_FORMAT_1 = 0xF1;
+
+/** Write virtual column index info (index id and column position in index)
+to the undo log
+@param[in,out] undo_page undo log page
+@param[in] table the table
+@param[in] pos the virtual column position
+@param[in] ptr undo log record being written
+@param[in] first_v_col whether this is the first virtual column
+ which could start with a version marker
+@return new undo log pointer */
+static
+byte*
+trx_undo_log_v_idx(
+ page_t* undo_page,
+ const dict_table_t* table,
+ ulint pos,
+ byte* ptr,
+ bool first_v_col)
+{
+ ut_ad(pos < table->n_v_def);
+ dict_v_col_t* vcol = dict_table_get_nth_v_col(table, pos);
+
+ ulint n_idx = vcol->v_indexes->size();
+ byte* old_ptr;
+
+ ut_ad(n_idx > 0);
+
+ /* Size to reserve, max 5 bytes for each index id and position, plus
+ 5 bytes for num of indexes, 2 bytes for write total length.
+ 1 byte for undo log record format version marker */
+ ulint size = n_idx * (5 + 5) + 5 + 2 + (first_v_col ? 1 : 0);
+
+ if (trx_undo_left(undo_page, ptr) < size) {
+ return(NULL);
+ }
+
+ if (first_v_col) {
+ /* write the version marker */
+ mach_write_to_1(ptr, VIRTUAL_COL_UNDO_FORMAT_1);
+
+ ptr += 1;
+ }
+
+ old_ptr = ptr;
+
+ ptr += 2;
+
+ ptr += mach_write_compressed(ptr, n_idx);
+
+ dict_v_idx_list::iterator it;
+
+ for (it = vcol->v_indexes->begin();
+ it != vcol->v_indexes->end(); ++it) {
+ dict_v_idx_t v_index = *it;
+
+ ptr += mach_write_compressed(
+ ptr, static_cast<ulint>(v_index.index->id));
+
+ ptr += mach_write_compressed(ptr, v_index.nth_field);
+ }
+
+ mach_write_to_2(old_ptr, ptr - old_ptr);
+
+ return(ptr);
+}
+
+/** Read virtual column index from undo log, and verify the column is still
+indexed, and return its position
+@param[in] table the table
+@param[in] ptr undo log pointer
+@param[out] col_pos the column number or ULINT_UNDEFINED
+ if the column is not indexed any more
+@return remaining part of undo log record after reading these values */
+static
+const byte*
+trx_undo_read_v_idx_low(
+ const dict_table_t* table,
+ const byte* ptr,
+ ulint* col_pos)
+{
+ ulint len = mach_read_from_2(ptr);
+ const byte* old_ptr = ptr;
+
+ *col_pos = ULINT_UNDEFINED;
+
+ ptr += 2;
+
+ ulint num_idx = mach_read_next_compressed(&ptr);
+
+ ut_ad(num_idx > 0);
+
+ dict_index_t* clust_index = dict_table_get_first_index(table);
+
+ for (ulint i = 0; i < num_idx; i++) {
+ index_id_t id = mach_read_next_compressed(&ptr);
+ ulint pos = mach_read_next_compressed(&ptr);
+ dict_index_t* index = dict_table_get_next_index(clust_index);
+
+ while (index != NULL) {
+ /* Return if we find a matching index.
+ TODO: in the future, it might be worth to add
+ checks on other indexes */
+ if (index->id == id) {
+ const dict_col_t* col = dict_index_get_nth_col(
+ index, pos);
+ ut_ad(dict_col_is_virtual(col));
+ const dict_v_col_t* vcol = reinterpret_cast<
+ const dict_v_col_t*>(col);
+ *col_pos = vcol->v_pos;
+ return(old_ptr + len);
+ }
+
+ index = dict_table_get_next_index(index);
+ }
+ }
+
+ return(old_ptr + len);
+}
+
+/** Read virtual column index from undo log or online log if the log
+contains such info, and in the undo log case, verify the column is
+still indexed, and output its position
+@param[in] table the table
+@param[in] ptr undo log pointer
+@param[in] first_v_col if this is the first virtual column, which
+ has the version marker
+@param[in,out] is_undo_log this function is used to parse both undo log,
+ and online log for virtual columns. So
+ check to see if this is undo log. When
+ first_v_col is true, is_undo_log is output,
+ when first_v_col is false, is_undo_log is input
+@param[in,out] field_no the column number
+@return remaining part of undo log record after reading these values */
+const byte*
+trx_undo_read_v_idx(
+ const dict_table_t* table,
+ const byte* ptr,
+ bool first_v_col,
+ bool* is_undo_log,
+ ulint* field_no)
+{
+ /* Version marker only put on the first virtual column */
+ if (first_v_col) {
+ /* Undo log has the virtual undo log marker */
+ *is_undo_log = (mach_read_from_1(ptr)
+ == VIRTUAL_COL_UNDO_FORMAT_1);
+
+ if (*is_undo_log) {
+ ptr += 1;
+ }
+ }
+
+ if (*is_undo_log) {
+ ptr = trx_undo_read_v_idx_low(table, ptr, field_no);
+ } else {
+ *field_no -= REC_MAX_N_FIELDS;
+ }
+
+ return(ptr);
+}
+
+/** Reports in the undo log of an insert of virtual columns.
+@param[in] undo_page undo log page
+@param[in] table the table
+@param[in] row dtuple contains the virtual columns
+@param[in,out] ptr log ptr
+@return true if write goes well, false if out of space */
+static
+bool
+trx_undo_report_insert_virtual(
+ page_t* undo_page,
+ dict_table_t* table,
+ const dtuple_t* row,
+ byte** ptr)
+{
+ byte* start = *ptr;
+ bool first_v_col = true;
+
+ if (trx_undo_left(undo_page, *ptr) < 2) {
+ return(false);
+ }
+
+ /* Reserve 2 bytes to write the number
+ of bytes the stored fields take in this
+ undo record */
+ *ptr += 2;
+
+ for (ulint col_no = 0; col_no < dict_table_get_n_v_cols(table);
+ col_no++) {
+ const dict_v_col_t* col
+ = dict_table_get_nth_v_col(table, col_no);
+
+ if (col->m_col.ord_part) {
+
+ /* make sure enought space to write the length */
+ if (trx_undo_left(undo_page, *ptr) < 5) {
+ return(false);
+ }
+
+ ulint pos = col_no;
+ pos += REC_MAX_N_FIELDS;
+ *ptr += mach_write_compressed(*ptr, pos);
+
+ *ptr = trx_undo_log_v_idx(undo_page, table,
+ col_no, *ptr, first_v_col);
+ first_v_col = false;
+
+ if (*ptr == NULL) {
+ return(false);
+ }
+
+ const dfield_t* vfield = dtuple_get_nth_v_field(
+ row, col->v_pos);
+ switch (ulint flen = vfield->len) {
+ case 0: case UNIV_SQL_NULL:
+ if (trx_undo_left(undo_page, *ptr) < 5) {
+ return(false);
+ }
+
+ *ptr += mach_write_compressed(*ptr, flen);
+ break;
+ default:
+ ulint max_len
+ = dict_max_v_field_len_store_undo(
+ table, col_no);
+
+ if (flen > max_len) {
+ flen = max_len;
+ }
+
+ if (trx_undo_left(undo_page, *ptr) < flen + 5) {
+
+ return(false);
+ }
+ *ptr += mach_write_compressed(*ptr, flen);
+
+ memcpy(*ptr, vfield->data, flen);
+ *ptr += flen;
+ }
+ }
+ }
+
+ /* Always mark the end of the log with 2 bytes length field */
+ mach_write_to_2(start, *ptr - start);
+
+ return(true);
+}
+
/**********************************************************************//**
Reports in the undo log of an insert of a clustered index record.
-@return offset of the inserted entry on the page if succeed, 0 if fail */
+@return offset of the inserted entry on the page if succeed, 0 if fail */
static
ulint
trx_undo_page_report_insert(
@@ -244,8 +487,8 @@ trx_undo_page_report_insert(
/* Store first some general parameters to the undo log */
*ptr++ = TRX_UNDO_INSERT_REC;
- ptr += mach_ull_write_much_compressed(ptr, trx->undo_no);
- ptr += mach_ull_write_much_compressed(ptr, index->table->id);
+ ptr += mach_u64_write_much_compressed(ptr, trx->undo_no);
+ ptr += mach_u64_write_much_compressed(ptr, index->table->id);
/*----------------------------------------*/
/* Store then the fields required to uniquely determine the record
to be inserted in the clustered index */
@@ -262,24 +505,33 @@ trx_undo_page_report_insert(
ptr += mach_write_compressed(ptr, flen);
- if (flen != UNIV_SQL_NULL) {
+ switch (flen) {
+ case 0: case UNIV_SQL_NULL:
+ break;
+ default:
if (trx_undo_left(undo_page, ptr) < flen) {
return(0);
}
- ut_memcpy(ptr, dfield_get_data(field), flen);
+ memcpy(ptr, dfield_get_data(field), flen);
ptr += flen;
}
}
+ if (index->table->n_v_cols) {
+ if (!trx_undo_report_insert_virtual(
+ undo_page, index->table, clust_entry, &ptr)) {
+ return(0);
+ }
+ }
+
return(trx_undo_page_set_next_prev_and_add(undo_page, ptr, mtr));
}
/**********************************************************************//**
Reads from an undo log record the general parameters.
-@return remaining part of undo log record after reading these values */
-UNIV_INTERN
+@return remaining part of undo log record after reading these values */
byte*
trx_undo_rec_get_pars(
/*==================*/
@@ -293,7 +545,7 @@ trx_undo_rec_get_pars(
undo_no_t* undo_no, /*!< out: undo log record number */
table_id_t* table_id) /*!< out: table id */
{
- byte* ptr;
+ const byte* ptr;
ulint type_cmpl;
ptr = undo_rec + 2;
@@ -307,31 +559,27 @@ trx_undo_rec_get_pars(
*type = type_cmpl & (TRX_UNDO_CMPL_INFO_MULT - 1);
*cmpl_info = type_cmpl / TRX_UNDO_CMPL_INFO_MULT;
- *undo_no = mach_ull_read_much_compressed(ptr);
- ptr += mach_ull_get_much_compressed_size(*undo_no);
-
- *table_id = mach_ull_read_much_compressed(ptr);
- ptr += mach_ull_get_much_compressed_size(*table_id);
+ *undo_no = mach_read_next_much_compressed(&ptr);
+ *table_id = mach_read_next_much_compressed(&ptr);
- return(ptr);
+ return(const_cast<byte*>(ptr));
}
-/**********************************************************************//**
-Reads from an undo log record a stored column value.
-@return remaining part of undo log record after reading these values */
-static
+/** Read from an undo log record a non-virtual column value.
+@param[in,out] ptr pointer to remaining part of the undo record
+@param[in,out] field stored field
+@param[in,out] len length of the field, or UNIV_SQL_NULL
+@param[in,out] orig_len original length of the locally stored part
+of an externally stored column, or 0
+@return remaining part of undo log record after reading these values */
byte*
trx_undo_rec_get_col_val(
-/*=====================*/
- byte* ptr, /*!< in: pointer to remaining part of undo log record */
- byte** field, /*!< out: pointer to stored field */
- ulint* len, /*!< out: length of the field, or UNIV_SQL_NULL */
- ulint* orig_len)/*!< out: original length of the locally
- stored part of an externally stored column, or 0 */
+ const byte* ptr,
+ const byte** field,
+ ulint* len,
+ ulint* orig_len)
{
- *len = mach_read_compressed(ptr);
- ptr += mach_get_compressed_size(*len);
-
+ *len = mach_read_next_compressed(&ptr);
*orig_len = 0;
switch (*len) {
@@ -339,12 +587,10 @@ trx_undo_rec_get_col_val(
*field = NULL;
break;
case UNIV_EXTERN_STORAGE_FIELD:
- *orig_len = mach_read_compressed(ptr);
- ptr += mach_get_compressed_size(*orig_len);
- *len = mach_read_compressed(ptr);
- ptr += mach_get_compressed_size(*len);
+ *orig_len = mach_read_next_compressed(&ptr);
+ *len = mach_read_next_compressed(&ptr);
*field = ptr;
- ptr += *len;
+ ptr += *len & ~SPATIAL_STATUS_MASK;
ut_ad(*orig_len >= BTR_EXTERN_FIELD_REF_SIZE);
ut_ad(*len > *orig_len);
@@ -362,19 +608,19 @@ trx_undo_rec_get_col_val(
default:
*field = ptr;
if (*len >= UNIV_EXTERN_STORAGE_FIELD) {
- ptr += *len - UNIV_EXTERN_STORAGE_FIELD;
+ ptr += (*len - UNIV_EXTERN_STORAGE_FIELD)
+ & ~SPATIAL_STATUS_MASK;
} else {
ptr += *len;
}
}
- return(ptr);
+ return(const_cast<byte*>(ptr));
}
/*******************************************************************//**
Builds a row reference from an undo log record.
-@return pointer to remaining part of undo record */
-UNIV_INTERN
+@return pointer to remaining part of undo record */
byte*
trx_undo_rec_get_row_ref(
/*=====================*/
@@ -403,7 +649,7 @@ trx_undo_rec_get_row_ref(
for (i = 0; i < ref_len; i++) {
dfield_t* dfield;
- byte* field;
+ const byte* field;
ulint len;
ulint orig_len;
@@ -419,8 +665,8 @@ trx_undo_rec_get_row_ref(
/*******************************************************************//**
Skips a row reference from an undo log record.
-@return pointer to remaining part of undo record */
-UNIV_INTERN
+@return pointer to remaining part of undo record */
+static
byte*
trx_undo_rec_skip_row_ref(
/*======================*/
@@ -437,9 +683,9 @@ trx_undo_rec_skip_row_ref(
ref_len = dict_index_get_n_unique(index);
for (i = 0; i < ref_len; i++) {
- byte* field;
- ulint len;
- ulint orig_len;
+ const byte* field;
+ ulint len;
+ ulint orig_len;
ptr = trx_undo_rec_get_col_val(ptr, &field, &len, &orig_len);
}
@@ -447,27 +693,27 @@ trx_undo_rec_skip_row_ref(
return(ptr);
}
-/**********************************************************************//**
-Fetch a prefix of an externally stored column, for writing to the undo log
-of an update or delete marking of a clustered index record.
-@return ext_buf */
+/** Fetch a prefix of an externally stored column, for writing to the undo
+log of an update or delete marking of a clustered index record.
+@param[out] ext_buf buffer to hold the prefix data and BLOB pointer
+@param[in] prefix_len prefix size to store in the undo log
+@param[in] page_size page size
+@param[in] field an externally stored column
+@param[in,out] len input: length of field; output: used length of
+ext_buf
+@return ext_buf */
static
byte*
trx_undo_page_fetch_ext(
-/*====================*/
- byte* ext_buf, /*!< in: buffer to hold the prefix
- data and BLOB pointer */
- ulint prefix_len, /*!< in: prefix size to store
- in the undo log */
- ulint zip_size, /*!< compressed page size in bytes,
- or 0 for uncompressed BLOB */
- const byte* field, /*!< in: an externally stored column */
- ulint* len) /*!< in: length of field;
- out: used length of ext_buf */
+ byte* ext_buf,
+ ulint prefix_len,
+ const page_size_t& page_size,
+ const byte* field,
+ ulint* len)
{
/* Fetch the BLOB. */
ulint ext_len = btr_copy_externally_stored_field_prefix(
- ext_buf, prefix_len, zip_size, field, *len);
+ ext_buf, prefix_len, page_size, field, *len);
/* BLOBs should always be nonempty. */
ut_a(ext_len);
/* Append the BLOB pointer to the prefix. */
@@ -478,27 +724,56 @@ trx_undo_page_fetch_ext(
return(ext_buf);
}
-/**********************************************************************//**
-Writes to the undo log a prefix of an externally stored column.
-@return undo log position */
+/** Writes to the undo log a prefix of an externally stored column.
+@param[out] ptr undo log position, at least 15 bytes must be
+available
+@param[out] ext_buf a buffer of DICT_MAX_FIELD_LEN_BY_FORMAT()
+ size, or NULL when should not fetch a longer
+ prefix
+@param[in] prefix_len prefix size to store in the undo log
+@param[in] page_size page size
+@param[in,out] field the locally stored part of the externally
+stored column
+@param[in,out] len length of field, in bytes
+@param[in] spatial_status whether the column is used by spatial index or
+ regular index
+@return undo log position */
static
byte*
trx_undo_page_report_modify_ext(
-/*============================*/
- byte* ptr, /*!< in: undo log position,
- at least 15 bytes must be available */
- byte* ext_buf, /*!< in: a buffer of
- DICT_MAX_FIELD_LEN_BY_FORMAT() size,
- or NULL when should not fetch
- a longer prefix */
- ulint prefix_len, /*!< prefix size to store in the
- undo log */
- ulint zip_size, /*!< compressed page size in bytes,
- or 0 for uncompressed BLOB */
- const byte** field, /*!< in/out: the locally stored part of
- the externally stored column */
- ulint* len) /*!< in/out: length of field, in bytes */
+ byte* ptr,
+ byte* ext_buf,
+ ulint prefix_len,
+ const page_size_t& page_size,
+ const byte** field,
+ ulint* len,
+ spatial_status_t spatial_status)
{
+ ulint spatial_len= 0;
+
+ switch (spatial_status) {
+ case SPATIAL_UNKNOWN:
+ case SPATIAL_NONE:
+ break;
+
+ case SPATIAL_MIXED:
+ case SPATIAL_ONLY:
+ spatial_len = DATA_MBR_LEN;
+ break;
+ }
+
+ /* Encode spatial status into length. */
+ spatial_len |= spatial_status << SPATIAL_STATUS_SHIFT;
+
+ if (spatial_status == SPATIAL_ONLY) {
+ /* If the column is only used by gis index, log its
+ MBR is enough.*/
+ ptr += mach_write_compressed(ptr, UNIV_EXTERN_STORAGE_FIELD
+ + spatial_len);
+
+ return(ptr);
+ }
+
if (ext_buf) {
ut_a(prefix_len > 0);
@@ -510,18 +785,54 @@ trx_undo_page_report_modify_ext(
ptr += mach_write_compressed(ptr, *len);
- *field = trx_undo_page_fetch_ext(ext_buf, prefix_len, zip_size,
- *field, len);
+ *field = trx_undo_page_fetch_ext(ext_buf, prefix_len,
+ page_size, *field, len);
- ptr += mach_write_compressed(ptr, *len);
+ ptr += mach_write_compressed(ptr, *len + spatial_len);
} else {
ptr += mach_write_compressed(ptr, UNIV_EXTERN_STORAGE_FIELD
- + *len);
+ + *len + spatial_len);
}
return(ptr);
}
+/** Get MBR from a Geometry column stored externally
+@param[out] mbr MBR to fill
+@param[in] pagesize table pagesize
+@param[in] field field contain the geometry data
+@param[in,out] len length of field, in bytes
+*/
+static
+void
+trx_undo_get_mbr_from_ext(
+/*======================*/
+ double* mbr,
+ const page_size_t& page_size,
+ const byte* field,
+ ulint* len)
+{
+ uchar* dptr = NULL;
+ ulint dlen;
+ mem_heap_t* heap = mem_heap_create(100);
+
+ dptr = btr_copy_externally_stored_field(
+ &dlen, field, page_size, *len, heap);
+
+ if (dlen <= GEO_DATA_HEADER_SIZE) {
+ for (uint i = 0; i < SPDIMS; ++i) {
+ mbr[i * 2] = DBL_MAX;
+ mbr[i * 2 + 1] = -DBL_MAX;
+ }
+ } else {
+ rtree_mbr_from_wkb(dptr + GEO_DATA_HEADER_SIZE,
+ static_cast<uint>(dlen
+ - GEO_DATA_HEADER_SIZE), SPDIMS, mbr);
+ }
+
+ mem_heap_free(heap);
+}
+
/**********************************************************************//**
Reports in the undo log of an update or delete marking of a clustered index
record.
@@ -537,15 +848,17 @@ trx_undo_page_report_modify(
delete marking is done */
const rec_t* rec, /*!< in: clustered index record which
has NOT yet been modified */
- const ulint* offsets, /*!< in: rec_get_offsets(rec, index) */
+ const offset_t* offsets, /*!< in: rec_get_offsets(rec, index) */
const upd_t* update, /*!< in: update vector which tells the
columns to be updated; in the case of
a delete, this should be set to NULL */
ulint cmpl_info, /*!< in: compiler info on secondary
index updates */
+ const dtuple_t* row, /*!< in: clustered index row contains
+ virtual column info */
mtr_t* mtr) /*!< in: mtr */
{
- dict_table_t* table;
+ dict_table_t* table = index->table;
ulint first_free;
byte* ptr;
const byte* field;
@@ -558,12 +871,17 @@ trx_undo_page_report_modify(
ibool ignore_prefix = FALSE;
byte ext_buf[REC_VERSION_56_MAX_INDEX_COL_LEN
+ BTR_EXTERN_FIELD_REF_SIZE];
+ bool first_v_col = true;
ut_a(dict_index_is_clust(index));
ut_ad(rec_offs_validate(rec, index, offsets));
- ut_ad(mach_read_from_2(undo_page + TRX_UNDO_PAGE_HDR
- + TRX_UNDO_PAGE_TYPE) == TRX_UNDO_UPDATE);
- table = index->table;
+ ut_ad(mach_read_from_2(TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_TYPE
+ + undo_page) == TRX_UNDO_UPDATE
+ || (dict_table_is_temporary(table)
+ && mach_read_from_2(TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_TYPE
+ + undo_page) == TRX_UNDO_INSERT));
+ trx_undo_t* update_undo = dict_table_is_temporary(table)
+ ? NULL : trx->rsegs.m_redo.update_undo;
first_free = mach_read_from_2(undo_page + TRX_UNDO_PAGE_HDR
+ TRX_UNDO_PAGE_FREE);
@@ -588,6 +906,10 @@ trx_undo_page_report_modify(
ut_ad(!rec_get_deleted_flag(rec, dict_table_is_comp(table)));
type_cmpl = TRX_UNDO_DEL_MARK_REC;
} else if (rec_get_deleted_flag(rec, dict_table_is_comp(table))) {
+ /* In delete-marked records, DB_TRX_ID must
+ always refer to an existing update_undo log record. */
+ ut_ad(row_get_rec_trx_id(rec, index, offsets));
+
type_cmpl = TRX_UNDO_UPD_DEL_REC;
/* We are about to update a delete marked record.
We don't typically need the prefix in this case unless
@@ -602,9 +924,9 @@ trx_undo_page_report_modify(
type_cmpl_ptr = ptr;
*ptr++ = (byte) type_cmpl;
- ptr += mach_ull_write_much_compressed(ptr, trx->undo_no);
+ ptr += mach_u64_write_much_compressed(ptr, trx->undo_no);
- ptr += mach_ull_write_much_compressed(ptr, table->id);
+ ptr += mach_u64_write_much_compressed(ptr, table->id);
/*----------------------------------------*/
/* Store the state of the info bits */
@@ -626,14 +948,15 @@ trx_undo_page_report_modify(
if (ignore_prefix) {
ignore_prefix = (trx_id != trx->id);
}
- ptr += mach_ull_write_compressed(ptr, trx_id);
+ ptr += mach_u64_write_compressed(ptr, trx_id);
field = rec_get_nth_field(rec, offsets,
dict_index_get_sys_col_pos(
index, DATA_ROLL_PTR), &flen);
ut_ad(flen == DATA_ROLL_PTR_LEN);
+ ut_ad(memcmp(field, field_ref_zero, DATA_ROLL_PTR_LEN));
- ptr += mach_ull_write_compressed(ptr, trx_read_roll_ptr(field));
+ ptr += mach_u64_write_compressed(ptr, trx_read_roll_ptr(field));
/*----------------------------------------*/
/* Store then the fields required to uniquely determine the
@@ -660,7 +983,7 @@ trx_undo_page_report_modify(
return(0);
}
- ut_memcpy(ptr, field, flen);
+ memcpy(ptr, field, flen);
ptr += flen;
}
}
@@ -674,11 +997,38 @@ trx_undo_page_report_modify(
return(0);
}
- ptr += mach_write_compressed(ptr, upd_get_n_fields(update));
+ ulint n_updated = upd_get_n_fields(update);
+
+ /* If this is an online update while an inplace alter table
+ is in progress and the table has virtual column, we will
+ need to double check if there are any non-indexed columns
+ being registered in update vector in case they will be indexed
+ in new table */
+ if (dict_index_is_online_ddl(index) && table->n_v_cols > 0) {
+ for (i = 0; i < upd_get_n_fields(update); i++) {
+ upd_field_t* fld = upd_get_nth_field(
+ update, i);
+ ulint pos = fld->field_no;
+
+ /* These columns must not have an index
+ on them */
+ if (upd_fld_is_virtual_col(fld)
+ && dict_table_get_nth_v_col(
+ table, pos)->v_indexes->empty()) {
+ n_updated--;
+ }
+ }
+ }
+
+ ptr += mach_write_compressed(ptr, n_updated);
for (i = 0; i < upd_get_n_fields(update); i++) {
+ upd_field_t* fld = upd_get_nth_field(update, i);
- ulint pos = upd_get_nth_field(update, i)->field_no;
+ bool is_virtual = upd_fld_is_virtual_col(fld);
+ ulint max_v_log_len = 0;
+
+ ulint pos = fld->field_no;
/* Write field number to undo log */
if (trx_undo_left(undo_page, ptr) < 5) {
@@ -686,17 +1036,59 @@ trx_undo_page_report_modify(
return(0);
}
+ if (is_virtual) {
+ /* Skip the non-indexed column, during
+ an online alter table */
+ if (dict_index_is_online_ddl(index)
+ && dict_table_get_nth_v_col(
+ table, pos)->v_indexes->empty()) {
+ continue;
+ }
+
+ /* add REC_MAX_N_FIELDS to mark this
+ is a virtual col */
+ pos += REC_MAX_N_FIELDS;
+ }
+
ptr += mach_write_compressed(ptr, pos);
/* Save the old value of field */
- field = rec_get_nth_field(rec, offsets, pos, &flen);
+ if (is_virtual) {
+ ut_ad(fld->field_no < table->n_v_def);
+
+ ptr = trx_undo_log_v_idx(undo_page, table,
+ fld->field_no, ptr,
+ first_v_col);
+ if (ptr == NULL) {
+ return(0);
+ }
+ first_v_col = false;
+
+ max_v_log_len
+ = dict_max_v_field_len_store_undo(
+ table, fld->field_no);
+
+ field = static_cast<byte*>(
+ fld->old_v_val->data);
+ flen = fld->old_v_val->len;
+
+ /* Only log sufficient bytes for index
+ record update */
+ if (flen != UNIV_SQL_NULL) {
+ flen = ut_min(
+ flen, max_v_log_len);
+ }
+ } else {
+ field = rec_get_nth_field(rec, offsets,
+ pos, &flen);
+ }
if (trx_undo_left(undo_page, ptr) < 15) {
return(0);
}
- if (rec_offs_nth_extern(offsets, pos)) {
+ if (!is_virtual && rec_offs_nth_extern(offsets, pos)) {
const dict_col_t* col
= dict_index_get_nth_col(index, pos);
ulint prefix_len
@@ -712,13 +1104,15 @@ trx_undo_page_report_modify(
&& !ignore_prefix
&& flen < REC_ANTELOPE_MAX_INDEX_COL_LEN
? ext_buf : NULL, prefix_len,
- dict_table_zip_size(table),
- &field, &flen);
+ dict_table_page_size(table),
+ &field, &flen, SPATIAL_UNKNOWN);
/* Notify purge that it eventually has to
free the old externally stored field */
- trx->update_undo->del_marks = TRUE;
+ if (update_undo) {
+ update_undo->del_marks = TRUE;
+ }
*type_cmpl_ptr |= TRX_UNDO_UPD_EXTERN;
} else {
@@ -731,12 +1125,43 @@ trx_undo_page_report_modify(
return(0);
}
- ut_memcpy(ptr, field, flen);
+ memcpy(ptr, field, flen);
ptr += flen;
}
+
+ /* Also record the new value for virtual column */
+ if (is_virtual) {
+ field = static_cast<byte*>(fld->new_val.data);
+ flen = fld->new_val.len;
+ if (flen != UNIV_SQL_NULL) {
+ flen = ut_min(
+ flen, max_v_log_len);
+ }
+
+ if (trx_undo_left(undo_page, ptr) < 15) {
+
+ return(0);
+ }
+
+ ptr += mach_write_compressed(ptr, flen);
+
+ if (flen != UNIV_SQL_NULL) {
+ if (trx_undo_left(undo_page, ptr) < flen) {
+
+ return(0);
+ }
+
+ memcpy(ptr, field, flen);
+ ptr += flen;
+ }
+ }
}
}
+ /* Reset the first_v_col, so to put the virtual column undo
+ version marker again, when we log all the indexed columns */
+ first_v_col = true;
+
/*----------------------------------------*/
/* In the case of a delete marking, and also in the case of an update
where any ordering field of any index changes, store the values of all
@@ -752,9 +1177,13 @@ trx_undo_page_report_modify(
(including BLOBs) are recovered before anything is rolled back. */
if (!update || !(cmpl_info & UPD_NODE_NO_ORD_CHANGE)) {
- byte* old_ptr = ptr;
+ byte* old_ptr = ptr;
+ double mbr[SPDIMS * 2];
+ mem_heap_t* row_heap = NULL;
- trx->update_undo->del_marks = TRUE;
+ if (update_undo) {
+ update_undo->del_marks = TRUE;
+ }
if (trx_undo_left(undo_page, ptr) < 5) {
@@ -776,39 +1205,65 @@ trx_undo_page_report_modify(
continue;
}
- if (update) {
+ const ulint pos = dict_index_get_nth_col_pos(
+ index, col_no, NULL);
+ /* All non-virtual columns must be present in
+ the clustered index. */
+ ut_ad(pos != ULINT_UNDEFINED);
+
+ const bool is_ext = rec_offs_nth_extern(offsets, pos);
+ const spatial_status_t spatial_status = is_ext
+ ? dict_col_get_spatial_status(col)
+ : SPATIAL_NONE;
+
+ switch (spatial_status) {
+ case SPATIAL_UNKNOWN:
+ ut_ad(0);
+ /* fall through */
+ case SPATIAL_MIXED:
+ case SPATIAL_ONLY:
+ /* Externally stored spatially indexed
+ columns will be (redundantly) logged
+ again, because we did not write the
+ MBR yet, that is, the previous call to
+ trx_undo_page_report_modify_ext()
+ was with SPATIAL_UNKNOWN. */
+ break;
+ case SPATIAL_NONE:
+ if (!update) {
+ /* This is a DELETE operation. */
+ break;
+ }
+ /* Avoid redundantly logging indexed
+ columns that were updated. */
+
for (i = 0; i < update->n_fields; i++) {
- const dict_field_t* f
- = dict_index_get_nth_field(
- index,
- upd_get_nth_field(
- update, i)
- ->field_no);
- if (f->col == col) {
+ const ulint field_no
+ = upd_get_nth_field(update, i)
+ ->field_no;
+ if (field_no >= index->n_fields
+ || dict_index_get_nth_field(
+ index, field_no)->col
+ == col) {
goto already_logged;
}
}
}
- if (TRUE) {
- ulint pos;
-
+ if (true) {
/* Write field number to undo log */
if (trx_undo_left(undo_page, ptr) < 5 + 15) {
return(0);
}
- pos = dict_index_get_nth_col_pos(index,
- col_no,
- NULL);
ptr += mach_write_compressed(ptr, pos);
/* Save the old value of field */
field = rec_get_nth_field(rec, offsets, pos,
&flen);
- if (rec_offs_nth_extern(offsets, pos)) {
+ if (is_ext) {
const dict_col_t* col =
dict_index_get_nth_col(
index, pos);
@@ -818,35 +1273,147 @@ trx_undo_page_report_modify(
ut_a(prefix_len < sizeof ext_buf);
+ /* If there is a spatial index on it,
+ log its MBR */
+ if (spatial_status != SPATIAL_NONE) {
+ ut_ad(DATA_GEOMETRY_MTYPE(
+ col->mtype));
+
+ trx_undo_get_mbr_from_ext(
+ mbr,
+ dict_table_page_size(
+ table),
+ field, &flen);
+ }
+
ptr = trx_undo_page_report_modify_ext(
ptr,
flen < REC_ANTELOPE_MAX_INDEX_COL_LEN
&& !ignore_prefix
? ext_buf : NULL, prefix_len,
- dict_table_zip_size(table),
- &field, &flen);
+ dict_table_page_size(table),
+ &field, &flen,
+ spatial_status);
} else {
ptr += mach_write_compressed(
ptr, flen);
}
- if (flen != UNIV_SQL_NULL) {
+ if (flen != UNIV_SQL_NULL
+ && spatial_status != SPATIAL_ONLY) {
if (trx_undo_left(undo_page, ptr)
< flen) {
return(0);
}
- ut_memcpy(ptr, field, flen);
+ memcpy(ptr, field, flen);
ptr += flen;
}
+
+ if (spatial_status != SPATIAL_NONE) {
+ if (trx_undo_left(undo_page, ptr)
+ < DATA_MBR_LEN) {
+ return(0);
+ }
+
+ for (int i = 0; i < SPDIMS * 2;
+ i++) {
+ mach_double_write(
+ ptr, mbr[i]);
+ ptr += sizeof(double);
+ }
+ }
}
already_logged:
continue;
}
+ for (col_no = 0; col_no < dict_table_get_n_v_cols(table);
+ col_no++) {
+ const dict_v_col_t* col
+ = dict_table_get_nth_v_col(table, col_no);
+
+ if (col->m_col.ord_part) {
+ ulint pos = col_no;
+ ulint max_v_log_len
+ = dict_max_v_field_len_store_undo(
+ table, pos);
+
+ /* Write field number to undo log.
+ Make sure there is enought space in log */
+ if (trx_undo_left(undo_page, ptr) < 5) {
+
+ return(0);
+ }
+
+ pos += REC_MAX_N_FIELDS;
+ ptr += mach_write_compressed(ptr, pos);
+
+ ut_ad(col_no < table->n_v_def);
+ ptr = trx_undo_log_v_idx(undo_page, table,
+ col_no, ptr,
+ first_v_col);
+ first_v_col = false;
+
+ if (!ptr) {
+ return(0);
+ }
+
+ const dfield_t* vfield = NULL;
+
+ if (update) {
+ ut_ad(!row);
+ if (update->old_vrow == NULL) {
+ flen = UNIV_SQL_NULL;
+ } else {
+ vfield = dtuple_get_nth_v_field(
+ update->old_vrow,
+ col->v_pos);
+ }
+ } else if (row) {
+ vfield = dtuple_get_nth_v_field(
+ row, col->v_pos);
+ } else {
+ ut_ad(0);
+ }
+
+ if (vfield) {
+ field = static_cast<byte*>(vfield->data);
+ flen = vfield->len;
+ } else {
+ ut_ad(flen == UNIV_SQL_NULL);
+ }
+
+ if (flen != UNIV_SQL_NULL) {
+ flen = ut_min(
+ flen, max_v_log_len);
+ }
+
+ ptr += mach_write_compressed(ptr, flen);
+
+ switch (flen) {
+ case 0: case UNIV_SQL_NULL:
+ break;
+ default:
+ if (trx_undo_left(undo_page, ptr)
+ < flen) {
+
+ return(0);
+ }
+
+ memcpy(ptr, field, flen);
+ ptr += flen;
+ }
+ }
+ }
+
mach_write_to_2(old_ptr, ptr - old_ptr);
+
+ if (row_heap) {
+ mem_heap_free(row_heap);
+ }
}
/*----------------------------------------*/
@@ -873,12 +1440,11 @@ already_logged:
/**********************************************************************//**
Reads from an undo log update record the system field values of the old
version.
-@return remaining part of undo log record after reading these values */
-UNIV_INTERN
+@return remaining part of undo log record after reading these values */
byte*
trx_undo_update_rec_get_sys_cols(
/*=============================*/
- byte* ptr, /*!< in: remaining part of undo
+ const byte* ptr, /*!< in: remaining part of undo
log record after reading
general parameters */
trx_id_t* trx_id, /*!< out: trx id */
@@ -891,56 +1457,20 @@ trx_undo_update_rec_get_sys_cols(
/* Read the values of the system columns */
- *trx_id = mach_ull_read_compressed(ptr);
- ptr += mach_ull_get_compressed_size(*trx_id);
+ *trx_id = mach_u64_read_next_compressed(&ptr);
+ *roll_ptr = mach_u64_read_next_compressed(&ptr);
- *roll_ptr = mach_ull_read_compressed(ptr);
- ptr += mach_ull_get_compressed_size(*roll_ptr);
-
- return(ptr);
-}
-
-/**********************************************************************//**
-Reads from an update undo log record the number of updated fields.
-@return remaining part of undo log record after reading this value */
-UNIV_INLINE
-byte*
-trx_undo_update_rec_get_n_upd_fields(
-/*=================================*/
- byte* ptr, /*!< in: pointer to remaining part of undo log record */
- ulint* n) /*!< out: number of fields */
-{
- *n = mach_read_compressed(ptr);
- ptr += mach_get_compressed_size(*n);
-
- return(ptr);
-}
-
-/**********************************************************************//**
-Reads from an update undo log record a stored field number.
-@return remaining part of undo log record after reading this value */
-UNIV_INLINE
-byte*
-trx_undo_update_rec_get_field_no(
-/*=============================*/
- byte* ptr, /*!< in: pointer to remaining part of undo log record */
- ulint* field_no)/*!< out: field number */
-{
- *field_no = mach_read_compressed(ptr);
- ptr += mach_get_compressed_size(*field_no);
-
- return(ptr);
+ return(const_cast<byte*>(ptr));
}
/*******************************************************************//**
Builds an update vector based on a remaining part of an undo log record.
@return remaining part of the record, NULL if an error detected, which
means that the record is corrupted */
-UNIV_INTERN
byte*
trx_undo_update_rec_get_update(
/*===========================*/
- byte* ptr, /*!< in: remaining part in update undo log
+ const byte* ptr, /*!< in: remaining part in update undo log
record, after reading the row reference
NOTE that this copy of the undo log record must
be preserved as long as the update vector is
@@ -955,7 +1485,6 @@ trx_undo_update_rec_get_update(
trx_id_t trx_id, /*!< in: transaction id from this undo record */
roll_ptr_t roll_ptr,/*!< in: roll pointer from this undo record */
ulint info_bits,/*!< in: info bits from this undo record */
- trx_t* trx, /*!< in: transaction */
mem_heap_t* heap, /*!< in: memory heap from which the memory
needed is allocated */
upd_t** upd) /*!< out, own: update vector */
@@ -965,11 +1494,14 @@ trx_undo_update_rec_get_update(
ulint n_fields;
byte* buf;
ulint i;
+ bool first_v_col = true;
+ bool is_undo_log = true;
+ ulint n_skip_field = 0;
ut_a(dict_index_is_clust(index));
if (type != TRX_UNDO_DEL_MARK_REC) {
- ptr = trx_undo_update_rec_get_n_upd_fields(ptr, &n_fields);
+ n_fields = mach_read_next_compressed(&ptr);
} else {
n_fields = 0;
}
@@ -988,7 +1520,7 @@ trx_undo_update_rec_get_update(
upd_field_set_field_no(upd_field,
dict_index_get_sys_col_pos(index, DATA_TRX_ID),
- index, trx);
+ index);
dfield_set_data(&(upd_field->new_val), buf, DATA_TRX_ID_LEN);
upd_field = upd_get_nth_field(update, n_fields + 1);
@@ -999,36 +1531,43 @@ trx_undo_update_rec_get_update(
upd_field_set_field_no(
upd_field, dict_index_get_sys_col_pos(index, DATA_ROLL_PTR),
- index, trx);
+ index);
dfield_set_data(&(upd_field->new_val), buf, DATA_ROLL_PTR_LEN);
/* Store then the updated ordinary columns to the update vector */
for (i = 0; i < n_fields; i++) {
- byte* field;
- ulint len;
- ulint field_no;
- ulint orig_len;
-
- ptr = trx_undo_update_rec_get_field_no(ptr, &field_no);
-
- if (field_no >= dict_index_get_n_fields(index)) {
- fprintf(stderr,
- "InnoDB: Error: trying to access"
- " update undo rec field %lu in ",
- (ulong) field_no);
- dict_index_name_print(stderr, trx, index);
- fprintf(stderr, "\n"
- "InnoDB: but index has only %lu fields\n"
- "InnoDB: Submit a detailed bug report"
- " to https://jira.mariadb.org/\n"
- "InnoDB: Run also CHECK TABLE ",
- (ulong) dict_index_get_n_fields(index));
- ut_print_name(stderr, trx, TRUE, index->table_name);
- fprintf(stderr, "\n"
- "InnoDB: n_fields = %lu, i = %lu, ptr %p\n",
- (ulong) n_fields, (ulong) i, ptr);
+ const byte* field;
+ ulint len;
+ ulint field_no;
+ ulint orig_len;
+ bool is_virtual;
+
+ field_no = mach_read_next_compressed(&ptr);
+
+ is_virtual = (field_no >= REC_MAX_N_FIELDS);
+
+ if (is_virtual) {
+ /* If new version, we need to check index list to figure
+ out the correct virtual column position */
+ ptr = trx_undo_read_v_idx(
+ index->table, ptr, first_v_col, &is_undo_log,
+ &field_no);
+ first_v_col = false;
+ } else if (field_no >= dict_index_get_n_fields(index)) {
+ ib::error() << "Trying to access update undo rec"
+ " field " << field_no
+ << " in index " << index->name
+ << " of table " << index->table->name
+ << " but index has only "
+ << dict_index_get_n_fields(index)
+ << " fields " << BUG_REPORT_MSG
+ << ". Run also CHECK TABLE "
+ << index->table->name << "."
+ " n_fields = " << n_fields << ", i = " << i
+ << ", ptr " << ptr;
+
ut_ad(0);
*upd = NULL;
return(NULL);
@@ -1036,7 +1575,25 @@ trx_undo_update_rec_get_update(
upd_field = upd_get_nth_field(update, i);
- upd_field_set_field_no(upd_field, field_no, index, trx);
+ if (is_virtual) {
+ /* This column could be dropped or no longer indexed */
+ if (field_no == ULINT_UNDEFINED) {
+ /* Mark this is no longer needed */
+ upd_field->field_no = REC_MAX_N_FIELDS;
+
+ ptr = trx_undo_rec_get_col_val(
+ ptr, &field, &len, &orig_len);
+ ptr = trx_undo_rec_get_col_val(
+ ptr, &field, &len, &orig_len);
+ n_skip_field++;
+ continue;
+ }
+
+ upd_field_set_v_field_no(
+ upd_field, field_no, index);
+ } else {
+ upd_field_set_field_no(upd_field, field_no, index);
+ }
ptr = trx_undo_rec_get_col_val(ptr, &field, &len, &orig_len);
@@ -1052,23 +1609,63 @@ trx_undo_update_rec_get_update(
dfield_set_data(&upd_field->new_val, field, len);
dfield_set_ext(&upd_field->new_val);
}
+
+ if (is_virtual) {
+ upd_field->old_v_val = static_cast<dfield_t*>(
+ mem_heap_alloc(
+ heap, sizeof *upd_field->old_v_val));
+ ptr = trx_undo_rec_get_col_val(
+ ptr, &field, &len, &orig_len);
+ if (len == UNIV_SQL_NULL) {
+ dfield_set_null(upd_field->old_v_val);
+ } else if (len < UNIV_EXTERN_STORAGE_FIELD) {
+ dfield_set_data(
+ upd_field->old_v_val, field, len);
+ } else {
+ ut_ad(0);
+ }
+ }
}
- *upd = update;
+ /* In rare scenario, we could have skipped virtual column (as they
+ are dropped. We will regenerate a update vector and skip them */
+ if (n_skip_field > 0) {
+ ulint n = 0;
+ ut_ad(n_skip_field <= n_fields);
- return(ptr);
+ upd_t* new_update = upd_create(
+ n_fields + 2 - n_skip_field, heap);
+
+ for (i = 0; i < n_fields + 2; i++) {
+ upd_field = upd_get_nth_field(update, i);
+
+ if (upd_field->field_no == REC_MAX_N_FIELDS) {
+ continue;
+ }
+
+ upd_field_t* new_upd_field
+ = upd_get_nth_field(new_update, n);
+ *new_upd_field = *upd_field;
+ n++;
+ }
+ ut_ad(n == n_fields + 2 - n_skip_field);
+ *upd = new_update;
+ } else {
+ *upd = update;
+ }
+
+ return(const_cast<byte*>(ptr));
}
/*******************************************************************//**
Builds a partial row from an update undo log record, for purge.
It contains the columns which occur as ordering in any index of the table.
Any missing columns are indicated by col->mtype == DATA_MISSING.
-@return pointer to remaining part of undo record */
-UNIV_INTERN
+@return pointer to remaining part of undo record */
byte*
trx_undo_rec_get_partial_row(
/*=========================*/
- byte* ptr, /*!< in: remaining part in update undo log
+ const byte* ptr, /*!< in: remaining part in update undo log
record of a suitable type, at the start of
the stored index columns;
NOTE that this copy of the undo log record must
@@ -1085,24 +1682,30 @@ trx_undo_rec_get_partial_row(
needed is allocated */
{
const byte* end_ptr;
- ulint row_len;
+ bool first_v_col = true;
+ bool is_undo_log = true;
ut_ad(dict_index_is_clust(index));
- row_len = dict_table_get_n_cols(index->table);
-
- *row = dtuple_create(heap, row_len);
+ *row = dtuple_create_with_vcol(
+ heap, dict_table_get_n_cols(index->table),
+ dict_table_get_n_v_cols(index->table));
/* Mark all columns in the row uninitialized, so that
we can distinguish missing fields from fields that are SQL NULL. */
- for (ulint i = 0; i < row_len; i++) {
+ for (ulint i = 0; i < dict_table_get_n_cols(index->table); i++) {
dfield_get_type(dtuple_get_nth_field(*row, i))
->mtype = DATA_MISSING;
}
+ dtuple_init_v_fld(*row);
+
for (const upd_field_t* uf = update->fields, * const ue
= update->fields + update->n_fields;
uf != ue; uf++) {
+ if (uf->old_v_val) {
+ continue;
+ }
ulint c = dict_index_get_nth_col(index, uf->field_no)->ind;
*dtuple_get_nth_field(*row, c) = uf->new_val;
}
@@ -1111,40 +1714,112 @@ trx_undo_rec_get_partial_row(
ptr += 2;
while (ptr != end_ptr) {
- dfield_t* dfield;
- byte* field;
- ulint field_no;
- const dict_col_t* col;
- ulint col_no;
- ulint len;
- ulint orig_len;
+ dfield_t* dfield;
+ const byte* field;
+ ulint field_no;
+ const dict_col_t* col;
+ ulint col_no;
+ ulint len;
+ ulint orig_len;
+ bool is_virtual;
+
+ field_no = mach_read_next_compressed(&ptr);
- ptr = trx_undo_update_rec_get_field_no(ptr, &field_no);
+ is_virtual = (field_no >= REC_MAX_N_FIELDS);
- col = dict_index_get_nth_col(index, field_no);
- col_no = dict_col_get_no(col);
+ if (is_virtual) {
+ ptr = trx_undo_read_v_idx(
+ index->table, ptr, first_v_col, &is_undo_log,
+ &field_no);
+ first_v_col = false;
+ }
ptr = trx_undo_rec_get_col_val(ptr, &field, &len, &orig_len);
- dfield = dtuple_get_nth_field(*row, col_no);
- ut_ad(dfield->type.mtype == DATA_MISSING
- || dict_col_type_assert_equal(col, &dfield->type));
- ut_ad(dfield->type.mtype == DATA_MISSING
- || dfield->len == len);
- dict_col_copy_type(
- dict_table_get_nth_col(index->table, col_no),
- dfield_get_type(dfield));
+ /* This column could be dropped or no longer indexed */
+ if (field_no == ULINT_UNDEFINED) {
+ ut_ad(is_virtual);
+ continue;
+ }
+
+ if (is_virtual) {
+ dict_v_col_t* vcol = dict_table_get_nth_v_col(
+ index->table, field_no);
+ col = &vcol->m_col;
+ col_no = dict_col_get_no(col);
+ dfield = dtuple_get_nth_v_field(*row, vcol->v_pos);
+ dict_col_copy_type(
+ &vcol->m_col,
+ dfield_get_type(dfield));
+ } else {
+ col = dict_index_get_nth_col(index, field_no);
+ col_no = dict_col_get_no(col);
+ dfield = dtuple_get_nth_field(*row, col_no);
+ ut_ad(dfield->type.mtype == DATA_MISSING
+ || dict_col_type_assert_equal(col,
+ &dfield->type));
+ ut_ad(dfield->type.mtype == DATA_MISSING
+ || dfield->len == len
+ || (len != UNIV_SQL_NULL
+ && len >= UNIV_EXTERN_STORAGE_FIELD));
+ dict_col_copy_type(
+ dict_table_get_nth_col(index->table, col_no),
+ dfield_get_type(dfield));
+ }
+
dfield_set_data(dfield, field, len);
if (len != UNIV_SQL_NULL
&& len >= UNIV_EXTERN_STORAGE_FIELD) {
- dfield_set_len(dfield,
- len - UNIV_EXTERN_STORAGE_FIELD);
+ spatial_status_t spatial_status;
+
+ /* Decode spatial status. */
+ spatial_status = static_cast<spatial_status_t>(
+ (len & SPATIAL_STATUS_MASK)
+ >> SPATIAL_STATUS_SHIFT);
+ len &= ~SPATIAL_STATUS_MASK;
+
+ /* Keep compatible with 5.7.9 format. */
+ if (spatial_status == SPATIAL_UNKNOWN) {
+ spatial_status =
+ dict_col_get_spatial_status(col);
+ }
+
+ switch (spatial_status) {
+ case SPATIAL_ONLY:
+ ut_ad(len - UNIV_EXTERN_STORAGE_FIELD
+ == DATA_MBR_LEN);
+ dfield_set_len(
+ dfield,
+ len - UNIV_EXTERN_STORAGE_FIELD);
+ break;
+
+ case SPATIAL_MIXED:
+ dfield_set_len(
+ dfield,
+ len - UNIV_EXTERN_STORAGE_FIELD
+ - DATA_MBR_LEN);
+ break;
+
+ case SPATIAL_NONE:
+ dfield_set_len(
+ dfield,
+ len - UNIV_EXTERN_STORAGE_FIELD);
+ break;
+
+ case SPATIAL_UNKNOWN:
+ ut_ad(0);
+ break;
+ }
+
dfield_set_ext(dfield);
+ dfield_set_spatial_status(dfield, spatial_status);
+
/* If the prefix of this column is indexed,
ensure that enough prefix is stored in the
undo log record. */
- if (!ignore_prefix && col->ord_part) {
+ if (!ignore_prefix && col->ord_part
+ && spatial_status != SPATIAL_ONLY) {
ut_a(dfield_get_len(dfield)
>= BTR_EXTERN_FIELD_REF_SIZE);
ut_a(dict_table_get_format(index->table)
@@ -1156,9 +1831,8 @@ trx_undo_rec_get_partial_row(
}
}
- return(ptr);
+ return(const_cast<byte*>(ptr));
}
-#endif /* !UNIV_HOTBACKUP */
/***********************************************************************//**
Erases the unused undo log page end.
@@ -1183,8 +1857,7 @@ trx_undo_erase_page_end(
/***********************************************************//**
Parses a redo log record of erasing of an undo page end.
-@return end of log record or NULL */
-UNIV_INTERN
+@return end of log record or NULL */
byte*
trx_undo_parse_erase_page_end(
/*==========================*/
@@ -1193,7 +1866,8 @@ trx_undo_parse_erase_page_end(
page_t* page, /*!< in: page or NULL */
mtr_t* mtr) /*!< in: mtr or NULL */
{
- ut_ad(ptr && end_ptr);
+ ut_ad(ptr != NULL);
+ ut_ad(end_ptr != NULL);
if (page == NULL) {
@@ -1205,14 +1879,125 @@ trx_undo_parse_erase_page_end(
return(ptr);
}
-#ifndef UNIV_HOTBACKUP
+/** Report a RENAME TABLE operation.
+@param[in,out] trx transaction
+@param[in] table table that is being renamed
+@param[in,out] block undo page
+@param[in,out] mtr mini-transaction
+@return byte offset of the undo log record
+@retval 0 in case of failure */
+static
+ulint
+trx_undo_page_report_rename(trx_t* trx, const dict_table_t* table,
+ buf_block_t* block, mtr_t* mtr)
+{
+ byte* ptr_first_free = TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_FREE
+ + block->frame;
+ ulint first_free = mach_read_from_2(ptr_first_free);
+ ut_ad(first_free >= TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_HDR_SIZE);
+ ut_ad(first_free <= UNIV_PAGE_SIZE);
+ byte* start = block->frame + first_free;
+ size_t len = strlen(table->name.m_name);
+ const size_t fixed = 2 + 1 + 11 + 11 + 2;
+ ut_ad(len <= NAME_LEN * 2 + 1);
+ /* The -10 is used in trx_undo_left() */
+ compile_time_assert((NAME_LEN * 1) * 2 + fixed
+ + TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_HDR_SIZE
+ < UNIV_PAGE_SIZE_MIN - 10 - FIL_PAGE_DATA_END);
+
+ if (trx_undo_left(block->frame, start) < fixed + len) {
+ ut_ad(first_free > TRX_UNDO_PAGE_HDR
+ + TRX_UNDO_PAGE_HDR_SIZE);
+ return 0;
+ }
+
+ byte* ptr = start + 2;
+ *ptr++ = TRX_UNDO_RENAME_TABLE;
+ ptr += mach_u64_write_much_compressed(ptr, trx->undo_no);
+ ptr += mach_u64_write_much_compressed(ptr, table->id);
+ memcpy(ptr, table->name.m_name, len);
+ ptr += len;
+ mach_write_to_2(ptr, first_free);
+ ptr += 2;
+ ulint offset = page_offset(ptr);
+ mach_write_to_2(start, offset);
+ mach_write_to_2(ptr_first_free, offset);
+
+ trx_undof_page_add_undo_rec_log(block->frame, first_free, offset, mtr);
+ return first_free;
+}
+
+/** Report a RENAME TABLE operation.
+@param[in,out] trx transaction
+@param[in] table table that is being renamed
+@return DB_SUCCESS or error code */
+dberr_t trx_undo_report_rename(trx_t* trx, const dict_table_t* table)
+{
+ ut_ad(!trx->read_only);
+ ut_ad(trx->id);
+ ut_ad(!table->is_temporary());
+ ut_ad(srv_safe_truncate);
+
+ trx_rseg_t* rseg = trx->rsegs.m_redo.rseg;
+ trx_undo_t** pundo = &trx->rsegs.m_redo.insert_undo;
+ mutex_enter(&trx->undo_mutex);
+ dberr_t err = *pundo
+ ? DB_SUCCESS
+ : trx_undo_assign_undo(trx, rseg, pundo, TRX_UNDO_INSERT);
+ ut_ad((err == DB_SUCCESS) == (*pundo != NULL));
+ if (trx_undo_t* undo = *pundo) {
+ mtr_t mtr;
+ mtr.start();
+
+ buf_block_t* block = buf_page_get_gen(
+ page_id_t(undo->space, undo->last_page_no),
+ univ_page_size, RW_X_LATCH,
+ buf_pool_is_obsolete(undo->withdraw_clock)
+ ? NULL : undo->guess_block,
+ BUF_GET, __FILE__, __LINE__, &mtr, &err);
+ ut_ad((err == DB_SUCCESS) == !!block);
+
+ for (ut_d(int loop_count = 0); block;) {
+ ut_ad(loop_count++ < 2);
+ buf_block_dbg_add_level(block, SYNC_TRX_UNDO_PAGE);
+ ut_ad(undo->last_page_no == block->page.id.page_no());
+
+ if (ulint offset = trx_undo_page_report_rename(
+ trx, table, block, &mtr)) {
+ undo->withdraw_clock = buf_withdraw_clock;
+ undo->empty = FALSE;
+ undo->top_page_no = undo->last_page_no;
+ undo->top_offset = offset;
+ undo->top_undo_no = trx->undo_no++;
+ undo->guess_block = block;
+
+ trx->undo_rseg_space = rseg->space;
+ err = DB_SUCCESS;
+ break;
+ } else {
+ mtr.commit();
+ mtr.start();
+ block = trx_undo_add_page(trx, undo, &mtr);
+ if (!block) {
+ err = DB_OUT_OF_FILE_SPACE;
+ break;
+ }
+ }
+ }
+
+ mtr.commit();
+ }
+
+ mutex_exit(&trx->undo_mutex);
+ return err;
+}
+
/***********************************************************************//**
Writes information to an undo log about an insert, update, or a delete marking
of a clustered index record. This information is used in a rollback of the
transaction and in consistent reads that must look to the history of this
transaction.
-@return DB_SUCCESS or error code */
-UNIV_INTERN
+@return DB_SUCCESS or error code */
dberr_t
trx_undo_report_row_operation(
/*==========================*/
@@ -1220,102 +2005,102 @@ trx_undo_report_row_operation(
dict_index_t* index, /*!< in: clustered index */
const dtuple_t* clust_entry, /*!< in: in the case of an insert,
index entry to insert into the
- clustered index, otherwise NULL */
+ clustered index; in updates,
+ may contain a clustered index
+ record tuple that also contains
+ virtual columns of the table;
+ otherwise, NULL */
const upd_t* update, /*!< in: in the case of an update,
the update vector, otherwise NULL */
ulint cmpl_info, /*!< in: compiler info on secondary
index updates */
- const rec_t* rec, /*!< in: in case of an update or delete
+ const rec_t* rec, /*!< in: case of an update or delete
marking, the record in the clustered
- index, otherwise NULL */
- const ulint* offsets, /*!< in: rec_get_offsets(rec) */
+ index; NULL if insert */
+ const offset_t* offsets, /*!< in: rec_get_offsets(rec) */
roll_ptr_t* roll_ptr) /*!< out: DB_ROLL_PTR to the
undo log record */
{
trx_t* trx;
- trx_undo_t* undo;
ulint page_no;
buf_block_t* undo_block;
- trx_rseg_t* rseg;
mtr_t mtr;
- dberr_t err = DB_SUCCESS;
#ifdef UNIV_DEBUG
int loop_count = 0;
#endif /* UNIV_DEBUG */
- ut_ad(!srv_read_only_mode);
ut_a(dict_index_is_clust(index));
+ ut_ad(!update || rec);
ut_ad(!rec || rec_offs_validate(rec, index, offsets));
- ut_ad(thr);
- ut_ad(!clust_entry || (!update && !rec));
+ ut_ad(!srv_read_only_mode);
trx = thr_get_trx(thr);
+ /* This function must not be invoked during rollback
+ (of a TRX_STATE_PREPARE transaction or otherwise). */
+ ut_ad(trx_state_eq(trx, TRX_STATE_ACTIVE));
+ ut_ad(!trx->in_rollback);
- /* This table is visible only to the session that created it. */
- if (trx->read_only) {
- ut_ad(!srv_read_only_mode);
- /* MySQL should block writes to non-temporary tables. */
- ut_a(DICT_TF2_FLAG_IS_SET(index->table, DICT_TF2_TEMPORARY));
- if (trx->rseg == 0) {
- trx_assign_rseg(trx);
- }
- }
-
- rseg = trx->rseg;
-
- mtr_start(&mtr);
- mutex_enter(&trx->undo_mutex);
-
- /* If the undo log is not assigned yet, assign one */
-
- if (clust_entry) {
- undo = trx->insert_undo;
+ mtr.start();
+ trx_undo_t** pundo;
+ trx_rseg_t* rseg;
+ const bool is_temp = dict_table_is_temporary(index->table);
- if (undo == NULL) {
+ if (is_temp) {
+ mtr.set_log_mode(MTR_LOG_NO_REDO);
- err = trx_undo_assign_undo(trx, TRX_UNDO_INSERT);
- undo = trx->insert_undo;
+ rseg = trx->get_temp_rseg();
+ pundo = &trx->rsegs.m_noredo.undo;
+ } else {
+ ut_ad(!trx->read_only);
+ ut_ad(trx->id);
+ /* Keep INFORMATION_SCHEMA.TABLES.UPDATE_TIME
+ up-to-date for persistent tables. Temporary tables are
+ not listed there. */
+ trx->mod_tables.insert(index->table);
+
+ pundo = !rec
+ ? &trx->rsegs.m_redo.insert_undo
+ : &trx->rsegs.m_redo.update_undo;
+ rseg = trx->rsegs.m_redo.rseg;
+ }
- if (undo == NULL) {
- /* Did not succeed */
- ut_ad(err != DB_SUCCESS);
- goto err_exit;
- }
+ mutex_enter(&trx->undo_mutex);
+ dberr_t err;
- ut_ad(err == DB_SUCCESS);
- }
+ if (*pundo) {
+ err = DB_SUCCESS;
+ } else if (!rec || is_temp) {
+ err = trx_undo_assign_undo(trx, rseg, pundo, TRX_UNDO_INSERT);
} else {
- undo = trx->update_undo;
-
- if (undo == NULL) {
- err = trx_undo_assign_undo(trx, TRX_UNDO_UPDATE);
- undo = trx->update_undo;
+ err = trx_undo_assign_undo(trx, rseg, pundo, TRX_UNDO_UPDATE);
+ }
- if (undo == NULL) {
- /* Did not succeed */
- ut_ad(err != DB_SUCCESS);
- goto err_exit;
- }
- }
+ trx_undo_t* undo = *pundo;
- ut_ad(err == DB_SUCCESS);
+ ut_ad((err == DB_SUCCESS) == (undo != NULL));
+ if (undo == NULL) {
+ goto err_exit;
}
page_no = undo->last_page_no;
+
undo_block = buf_page_get_gen(
- undo->space, undo->zip_size, page_no, RW_X_LATCH,
- undo->guess_block, BUF_GET, __FILE__, __LINE__, &mtr);
+ page_id_t(undo->space, page_no), univ_page_size, RW_X_LATCH,
+ buf_pool_is_obsolete(undo->withdraw_clock)
+ ? NULL : undo->guess_block, BUF_GET, __FILE__, __LINE__,
+ &mtr, &err);
+
buf_block_dbg_add_level(undo_block, SYNC_TRX_UNDO_PAGE);
do {
- ut_ad(page_no == buf_block_get_page_no(undo_block));
+ ut_ad(page_no == undo_block->page.id.page_no());
page_t* undo_page = buf_block_get_frame(undo_block);
- ulint offset = clust_entry
+ ulint offset = !rec
? trx_undo_page_report_insert(
undo_page, trx, index, clust_entry, &mtr)
: trx_undo_page_report_modify(
undo_page, trx, index, rec, offsets, update,
- cmpl_info, &mtr);
+ cmpl_info, clust_entry, &mtr);
if (UNIV_UNLIKELY(offset == 0)) {
/* The record did not fit on the page. We erase the
@@ -1339,10 +2124,13 @@ trx_undo_report_row_operation(
latches, such as SYNC_FSP and SYNC_FSP_PAGE. */
mtr_commit(&mtr);
- mtr_start(&mtr);
+ mtr.start();
+ if (is_temp) {
+ mtr.set_log_mode(MTR_LOG_NO_REDO);
+ }
mutex_enter(&rseg->mutex);
- trx_undo_free_last_page(trx, undo, &mtr);
+ trx_undo_free_last_page(undo, &mtr);
mutex_exit(&rseg->mutex);
err = DB_UNDO_RECORD_TOO_BIG;
@@ -1352,22 +2140,21 @@ trx_undo_report_row_operation(
mtr_commit(&mtr);
} else {
/* Success */
-
+ undo->withdraw_clock = buf_withdraw_clock;
mtr_commit(&mtr);
undo->empty = FALSE;
undo->top_page_no = page_no;
undo->top_offset = offset;
- undo->top_undo_no = trx->undo_no;
+ undo->top_undo_no = trx->undo_no++;
undo->guess_block = undo_block;
- trx->undo_no++;
+ trx->undo_rseg_space = rseg->space;
mutex_exit(&trx->undo_mutex);
*roll_ptr = trx_undo_build_roll_ptr(
- clust_entry != NULL,
- rseg->id, page_no, offset);
+ !rec, rseg->id, page_no, offset);
return(DB_SUCCESS);
}
@@ -1376,19 +2163,29 @@ trx_undo_report_row_operation(
/* We have to extend the undo log by one page */
ut_ad(++loop_count < 2);
- mtr_start(&mtr);
+ mtr.start();
- /* When we add a page to an undo log, this is analogous to
- a pessimistic insert in a B-tree, and we must reserve the
- counterpart of the tree latch, which is the rseg mutex. */
+ if (is_temp) {
+ mtr.set_log_mode(MTR_LOG_NO_REDO);
+ }
- mutex_enter(&rseg->mutex);
undo_block = trx_undo_add_page(trx, undo, &mtr);
- mutex_exit(&rseg->mutex);
-
page_no = undo->last_page_no;
+
+ DBUG_EXECUTE_IF("ib_err_ins_undo_page_add_failure",
+ undo_block = NULL;);
} while (undo_block != NULL);
+ ib_errf(trx->mysql_thd, IB_LOG_LEVEL_ERROR,
+ DB_OUT_OF_FILE_SPACE,
+ //ER_INNODB_UNDO_LOG_FULL,
+ "No more space left over in %s tablespace for allocating UNDO"
+ " log pages. Please add new data file to the tablespace or"
+ " check if filesystem is full or enable auto-extension for"
+ " the tablespace",
+ undo->space == TRX_SYS_SPACE
+ ? "system" : is_temp ? "temporary" : "undo");
+
/* Did not succeed: out of space */
err = DB_OUT_OF_FILE_SPACE;
@@ -1400,16 +2197,14 @@ err_exit:
/*============== BUILDING PREVIOUS VERSION OF A RECORD ===============*/
-/******************************************************************//**
-Copies an undo record to heap. This function can be called if we know that
-the undo log record exists.
-@return own: copy of the record */
-UNIV_INTERN
+/** Copy an undo record to heap.
+@param[in] roll_ptr roll pointer to a record that exists
+@param[in,out] heap memory heap where copied */
+static
trx_undo_rec_t*
trx_undo_get_undo_rec_low(
-/*======================*/
- roll_ptr_t roll_ptr, /*!< in: roll pointer to record */
- mem_heap_t* heap) /*!< in: memory heap where copied */
+ roll_ptr_t roll_ptr,
+ mem_heap_t* heap)
{
trx_undo_rec_t* undo_rec;
ulint rseg_id;
@@ -1422,12 +2217,14 @@ trx_undo_get_undo_rec_low(
trx_undo_decode_roll_ptr(roll_ptr, &is_insert, &rseg_id, &page_no,
&offset);
- rseg = trx_rseg_get_on_id(rseg_id);
+ ut_ad(page_no > FSP_FIRST_INODE_PAGE_NO);
+ ut_ad(offset >= TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_HDR_SIZE);
+ rseg = trx_sys->rseg_array[rseg_id];
mtr_start(&mtr);
- undo_page = trx_undo_page_get_s_latched(rseg->space, rseg->zip_size,
- page_no, &mtr);
+ undo_page = trx_undo_page_get_s_latched(
+ page_id_t(rseg->space, page_no), &mtr);
undo_rec = trx_undo_rec_copy(undo_page + offset, heap);
@@ -1436,30 +2233,32 @@ trx_undo_get_undo_rec_low(
return(undo_rec);
}
-/******************************************************************//**
-Copies an undo record to heap.
-
-NOTE: the caller must have latches on the clustered index page.
-
+/** Copy an undo record to heap.
+@param[in] roll_ptr roll pointer to record
+@param[in,out] heap memory heap where copied
+@param[in] trx_id id of the trx that generated
+ the roll pointer: it points to an
+ undo log of this transaction
+@param[in] name table name
+@param[out] undo_rec own: copy of the record
@retval true if the undo log has been
truncated and we cannot fetch the old version
-@retval false if the undo log record is available */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
+@retval false if the undo log record is available
+NOTE: the caller must have latches on the clustered index page. */
+static MY_ATTRIBUTE((warn_unused_result))
bool
trx_undo_get_undo_rec(
-/*==================*/
- roll_ptr_t roll_ptr, /*!< in: roll pointer to record */
- trx_id_t trx_id, /*!< in: id of the trx that generated
- the roll pointer: it points to an
- undo log of this transaction */
- trx_undo_rec_t**undo_rec, /*!< out, own: copy of the record */
- mem_heap_t* heap) /*!< in: memory heap where copied */
+ roll_ptr_t roll_ptr,
+ mem_heap_t* heap,
+ trx_id_t trx_id,
+ const table_name_t& name,
+ trx_undo_rec_t** undo_rec)
{
bool missing_history;
rw_lock_s_lock(&purge_sys->latch);
- missing_history = read_view_sees_trx_id(purge_sys->view, trx_id);
+ missing_history = purge_sys->view.changes_visible(trx_id, name);
if (!missing_history) {
*undo_rec = trx_undo_get_undo_rec_low(roll_ptr, heap);
}
@@ -1481,8 +2280,7 @@ hold a latch on the index page of the clustered index record.
@retval true if previous version was built, or if it was an insert
or the table has been rebuilt
@retval false if the previous version is earlier than purge_view,
-which means that it may have been removed */
-UNIV_INTERN
+or being purged, which means that it may have been removed */
bool
trx_undo_prev_version_build(
/*========================*/
@@ -1494,14 +2292,23 @@ trx_undo_prev_version_build(
index_rec page and purge_view */
const rec_t* rec, /*!< in: version of a clustered index record */
dict_index_t* index, /*!< in: clustered index */
- ulint* offsets,/*!< in/out: rec_get_offsets(rec, index) */
+ offset_t* offsets,/*!< in/out: rec_get_offsets(rec, index) */
mem_heap_t* heap, /*!< in: memory heap from which the memory
needed is allocated */
- rec_t** old_vers)/*!< out, own: previous version, or NULL if
+ rec_t** old_vers,/*!< out, own: previous version, or NULL if
rec is the first inserted version, or if
history data has been deleted (an error),
or if the purge COULD have removed the version
though it has not yet done so */
+ mem_heap_t* v_heap, /* !< in: memory heap used to create vrow
+ dtuple if it is not yet created. This heap
+ diffs from "heap" above in that it could be
+ prebuilt->old_vers_heap for selection */
+ dtuple_t** vrow, /*!< out: virtual column info, if any */
+ ulint v_status)
+ /*!< in: status determine if it is going
+ into this function by purge thread or not.
+ And if we read "after image" of undo log */
{
trx_undo_rec_t* undo_rec = NULL;
dtuple_t* entry;
@@ -1517,12 +2324,11 @@ trx_undo_prev_version_build(
ulint cmpl_info;
bool dummy_extern;
byte* buf;
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(!rw_lock_own(&purge_sys->latch, RW_LOCK_SHARED));
-#endif /* UNIV_SYNC_DEBUG */
- ut_ad(mtr_memo_contains_page(index_mtr, index_rec, MTR_MEMO_PAGE_S_FIX)
- || mtr_memo_contains_page(index_mtr, index_rec,
- MTR_MEMO_PAGE_X_FIX));
+
+ ut_ad(!rw_lock_own(&purge_sys->latch, RW_LOCK_S));
+ ut_ad(mtr_memo_contains_page_flagged(index_mtr, index_rec,
+ MTR_MEMO_PAGE_S_FIX
+ | MTR_MEMO_PAGE_X_FIX));
ut_ad(rec_offs_validate(rec, index, offsets));
ut_a(dict_index_is_clust(index));
@@ -1535,12 +2341,24 @@ trx_undo_prev_version_build(
return(true);
}
+ ut_ad(!dict_table_is_temporary(index->table));
+
rec_trx_id = row_get_rec_trx_id(rec, index, offsets);
- if (trx_undo_get_undo_rec(roll_ptr, rec_trx_id, &undo_rec, heap)) {
- /* The undo record may already have been purged,
- during purge or semi-consistent read. */
- return(false);
+ ut_ad(!index->table->skip_alter_undo);
+
+ if (trx_undo_get_undo_rec(
+ roll_ptr, heap, rec_trx_id, index->table->name,
+ &undo_rec)) {
+ if (v_status & TRX_UNDO_PREV_IN_PURGE) {
+ /* We are fetching the record being purged */
+ undo_rec = trx_undo_get_undo_rec_low(
+ roll_ptr, heap);
+ } else {
+ /* The undo record may already have been purged,
+ during purge or semi-consistent read. */
+ return(false);
+ }
}
ptr = trx_undo_rec_get_pars(undo_rec, &type, &cmpl_info,
@@ -1582,16 +2400,10 @@ trx_undo_prev_version_build(
ptr = trx_undo_update_rec_get_update(ptr, index, type, trx_id,
roll_ptr, info_bits,
- NULL, heap, &update);
+ heap, &update);
ut_a(ptr);
-# if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG
- ut_a(!rec_offs_any_null_extern(rec, offsets));
-# endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */
-
if (row_upd_changes_field_size_or_external(index, offsets, update)) {
- ulint n_ext;
-
/* We should confirm the existence of disowned external data,
if the previous version record is delete marked. If the trx_id
of the previous record is seen by purge view, we should treat
@@ -1613,8 +2425,10 @@ trx_undo_prev_version_build(
bool missing_extern;
rw_lock_s_lock(&purge_sys->latch);
- missing_extern = read_view_sees_trx_id(purge_sys->view,
- trx_id);
+
+ missing_extern = purge_sys->view.changes_visible(
+ trx_id, index->table->name);
+
rw_lock_s_unlock(&purge_sys->latch);
if (missing_extern) {
@@ -1630,30 +2444,125 @@ trx_undo_prev_version_build(
those fields that update updates to become externally stored
fields. Store the info: */
- entry = row_rec_to_index_entry(
- rec, index, offsets, &n_ext, heap);
- n_ext += btr_push_update_extern_fields(entry, update, heap);
+ entry = row_rec_to_index_entry(rec, index, offsets, heap);
/* The page containing the clustered index record
corresponding to entry is latched in mtr. Thus the
following call is safe. */
row_upd_index_replace_new_col_vals(entry, index, update, heap);
- buf = static_cast<byte*>(
- mem_heap_alloc(
- heap,
- rec_get_converted_size(index, entry, n_ext)));
+ /* Get number of externally stored columns in updated record */
+ const ulint n_ext = dtuple_get_n_ext(entry);
+
+ buf = static_cast<byte*>(mem_heap_alloc(
+ heap, rec_get_converted_size(index, entry, n_ext)));
*old_vers = rec_convert_dtuple_to_rec(buf, index,
entry, n_ext);
} else {
- buf = static_cast<byte*>(
- mem_heap_alloc(heap, rec_offs_size(offsets)));
+ buf = static_cast<byte*>(mem_heap_alloc(
+ heap, rec_offs_size(offsets)));
*old_vers = rec_copy(buf, rec, offsets);
rec_offs_make_valid(*old_vers, index, offsets);
row_upd_rec_in_place(*old_vers, index, offsets, update, NULL);
}
+ /* Set the old value (which is the after image of an update) in the
+ update vector to dtuple vrow */
+ if (v_status & TRX_UNDO_GET_OLD_V_VALUE) {
+ row_upd_replace_vcol((dtuple_t*)*vrow, index->table, update,
+ false, NULL, NULL);
+ }
+
+#if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG
+ offset_t offsets_dbg[REC_OFFS_NORMAL_SIZE];
+ rec_offs_init(offsets_dbg);
+ ut_a(!rec_offs_any_null_extern(
+ *old_vers, rec_get_offsets(*old_vers, index, offsets_dbg, true,
+ ULINT_UNDEFINED, &heap)));
+#endif // defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG
+
+ if (vrow && !(cmpl_info & UPD_NODE_NO_ORD_CHANGE)) {
+ if (!(*vrow)) {
+ *vrow = dtuple_create_with_vcol(
+ v_heap ? v_heap : heap,
+ dict_table_get_n_cols(index->table),
+ dict_table_get_n_v_cols(index->table));
+ dtuple_init_v_fld(*vrow);
+ }
+
+ ut_ad(index->table->n_v_cols);
+ trx_undo_read_v_cols(index->table, ptr, *vrow,
+ v_status & TRX_UNDO_PREV_IN_PURGE);
+ }
+
return(true);
}
-#endif /* !UNIV_HOTBACKUP */
+
+/** Read virtual column value from undo log
+@param[in] table the table
+@param[in] ptr undo log pointer
+@param[in,out] row the dtuple to fill
+@param[in] in_purge whether this is called by purge */
+void
+trx_undo_read_v_cols(
+ const dict_table_t* table,
+ const byte* ptr,
+ dtuple_t* row,
+ bool in_purge)
+{
+ const byte* end_ptr;
+ bool first_v_col = true;
+ bool is_undo_log = true;
+
+ end_ptr = ptr + mach_read_from_2(ptr);
+ ptr += 2;
+ while (ptr < end_ptr) {
+ dfield_t* dfield;
+ const byte* field;
+ ulint field_no;
+ ulint len;
+ ulint orig_len;
+ bool is_virtual;
+
+ field_no = mach_read_next_compressed(
+ const_cast<const byte**>(&ptr));
+
+ is_virtual = (field_no >= REC_MAX_N_FIELDS);
+
+ if (is_virtual) {
+ ptr = trx_undo_read_v_idx(
+ table, ptr, first_v_col, &is_undo_log,
+ &field_no);
+ first_v_col = false;
+ }
+
+ ptr = trx_undo_rec_get_col_val(
+ ptr, &field, &len, &orig_len);
+
+ /* The virtual column is no longer indexed or does not exist.
+ This needs to put after trx_undo_rec_get_col_val() so the
+ undo ptr advances */
+ if (field_no == ULINT_UNDEFINED) {
+ ut_ad(is_virtual);
+ continue;
+ }
+
+ if (is_virtual) {
+ dict_v_col_t* vcol = dict_table_get_nth_v_col(
+ table, field_no);
+
+ dfield = dtuple_get_nth_v_field(row, vcol->v_pos);
+
+ if (!in_purge
+ || dfield_get_type(dfield)->mtype == DATA_MISSING) {
+ dict_col_copy_type(
+ &vcol->m_col,
+ dfield_get_type(dfield));
+ dfield_set_data(dfield, field, len);
+ }
+ }
+ }
+
+ ut_ad(ptr == end_ptr);
+}
diff --git a/storage/innobase/trx/trx0roll.cc b/storage/innobase/trx/trx0roll.cc
index de673208d32..b6d7aa9f380 100644
--- a/storage/innobase/trx/trx0roll.cc
+++ b/storage/innobase/trx/trx0roll.cc
@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 1996, 2017, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2016, 2019, MariaDB Corporation.
+Copyright (c) 2016, 2020, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -24,35 +24,30 @@ Transaction rollback
Created 3/26/1996 Heikki Tuuri
*******************************************************/
-#include "my_config.h"
-#include <my_service_manager.h>
-
#include "trx0roll.h"
-#ifdef UNIV_NONINL
-#include "trx0roll.ic"
-#endif
+#include <my_service_manager.h>
+#include <mysql/service_wsrep.h>
#include "fsp0fsp.h"
+#include "lock0lock.h"
#include "mach0data.h"
-#include "trx0rseg.h"
-#include "trx0trx.h"
-#include "trx0undo.h"
-#include "trx0rec.h"
+#include "pars0pars.h"
#include "que0que.h"
-#include "usr0sess.h"
-#include "srv0start.h"
#include "read0read.h"
-#include "row0undo.h"
#include "row0mysql.h"
-#include "lock0lock.h"
-#include "pars0pars.h"
+#include "row0undo.h"
#include "srv0mon.h"
+#include "srv0start.h"
+#include "trx0rec.h"
+#include "trx0rseg.h"
#include "trx0sys.h"
+#include "trx0trx.h"
+#include "trx0undo.h"
/** This many pages must be undone before a truncate is tried within
rollback */
-#define TRX_ROLL_TRUNC_THRESHOLD 1
+static const ulint TRX_ROLL_TRUNC_THRESHOLD = 1;
/** true if trx_rollback_or_clean_all_recovered() thread is active */
bool trx_rollback_or_clean_is_active;
@@ -88,17 +83,20 @@ trx_rollback_to_savepoint_low(
roll_node = roll_node_create(heap);
if (savept != NULL) {
- roll_node->partial = TRUE;
- roll_node->savept = *savept;
- assert_trx_in_list(trx);
- } else {
+ roll_node->savept = savept;
+ check_trx_state(trx);
+ } else {
assert_trx_nonlocking_or_in_list(trx);
}
trx->error_state = DB_SUCCESS;
- if (trx->insert_undo || trx->update_undo) {
- thr = pars_complete_graph_for_exec(roll_node, trx, heap);
+ if (trx->has_logged()) {
+
+ ut_ad(trx->rsegs.m_redo.rseg != 0
+ || trx->rsegs.m_noredo.rseg != 0);
+
+ thr = pars_complete_graph_for_exec(roll_node, trx, heap, NULL);
ut_a(thr == que_fork_start_command(
static_cast<que_fork_t*>(que_node_get_parent(thr))));
@@ -134,8 +132,7 @@ trx_rollback_to_savepoint_low(
/*******************************************************************//**
Rollback a transaction to a given savepoint or do a complete rollback.
-@return error code or DB_SUCCESS */
-UNIV_INTERN
+@return error code or DB_SUCCESS */
dberr_t
trx_rollback_to_savepoint(
/*======================*/
@@ -146,7 +143,7 @@ trx_rollback_to_savepoint(
{
ut_ad(!trx_mutex_own(trx));
- trx_start_if_not_started_xa(trx);
+ trx_start_if_not_started_xa(trx, true);
trx_rollback_to_savepoint_low(trx, savept);
@@ -155,7 +152,7 @@ trx_rollback_to_savepoint(
/*******************************************************************//**
Rollback a transaction used in MySQL.
-@return error code or DB_SUCCESS */
+@return error code or DB_SUCCESS */
static
dberr_t
trx_rollback_for_mysql_low(
@@ -178,14 +175,10 @@ trx_rollback_for_mysql_low(
return(trx->error_state);
}
-/*******************************************************************//**
-Rollback a transaction used in MySQL.
-@return error code or DB_SUCCESS */
-UNIV_INTERN
-dberr_t
-trx_rollback_for_mysql(
-/*===================*/
- trx_t* trx) /*!< in/out: transaction */
+/** Rollback a transaction used in MySQL
+@param[in, out] trx transaction
+@return error code or DB_SUCCESS */
+dberr_t trx_rollback_for_mysql(trx_t* trx)
{
/* We are reading trx->state without holding trx_sys->mutex
here, because the rollback should be invoked for a running
@@ -194,6 +187,7 @@ trx_rollback_for_mysql(
switch (trx->state) {
case TRX_STATE_NOT_STARTED:
+ trx->will_lock = 0;
ut_ad(trx->in_mysql_trx_list);
return(DB_SUCCESS);
@@ -205,10 +199,50 @@ trx_rollback_for_mysql(
case TRX_STATE_PREPARED:
case TRX_STATE_PREPARED_RECOVERED:
ut_ad(!trx_is_autocommit_non_locking(trx));
+ if (trx->has_logged_persistent()) {
+ /* The XA ROLLBACK of a XA PREPARE transaction
+ will consist of multiple mini-transactions.
+
+ As the very first step of XA ROLLBACK, we must
+ change the undo log state back from
+ TRX_UNDO_PREPARED to TRX_UNDO_ACTIVE, in order
+ to ensure that recovery will complete the
+ rollback.
+
+ Failure to perform this step could cause a
+ situation where we would roll back part of
+ a XA PREPARE transaction, the server would be
+ killed, and finally, the transaction would be
+ recovered in XA PREPARE state, with some of
+ the actions already having been rolled back. */
+ trx_undo_ptr_t* undo_ptr = &trx->rsegs.m_redo;
+ mtr_t mtr;
+ mtr.start();
+ mutex_enter(&trx->rsegs.m_redo.rseg->mutex);
+ if (undo_ptr->insert_undo != NULL) {
+ trx_undo_set_state_at_prepare(
+ trx, undo_ptr->insert_undo,
+ true, &mtr);
+ }
+ if (undo_ptr->update_undo != NULL) {
+ trx_undo_set_state_at_prepare(
+ trx, undo_ptr->update_undo,
+ true, &mtr);
+ }
+ mutex_exit(&trx->rsegs.m_redo.rseg->mutex);
+ /* Write the redo log for the XA ROLLBACK
+ state change to the global buffer. It is
+ not necessary to flush the redo log. If
+ a durable log write of a later mini-transaction
+ takes place for whatever reason, then this state
+ change will be durable as well. */
+ mtr.commit();
+ ut_ad(mtr.commit_lsn() > 0);
+ }
return(trx_rollback_for_mysql_low(trx));
case TRX_STATE_COMMITTED_IN_MEMORY:
- assert_trx_in_list(trx);
+ check_trx_state(trx);
break;
}
@@ -218,8 +252,7 @@ trx_rollback_for_mysql(
/*******************************************************************//**
Rollback the latest SQL statement for MySQL.
-@return error code or DB_SUCCESS */
-UNIV_INTERN
+@return error code or DB_SUCCESS */
dberr_t
trx_rollback_last_sql_stat_for_mysql(
/*=================================*/
@@ -236,6 +269,7 @@ trx_rollback_last_sql_stat_for_mysql(
switch (trx->state) {
case TRX_STATE_NOT_STARTED:
return(DB_SUCCESS);
+
case TRX_STATE_ACTIVE:
assert_trx_nonlocking_or_in_list(trx);
@@ -244,7 +278,7 @@ trx_rollback_last_sql_stat_for_mysql(
err = trx_rollback_to_savepoint(
trx, &trx->last_sql_stat_start);
- if (trx->fts_trx) {
+ if (trx->fts_trx != NULL) {
fts_savepoint_rollback_last_stmt(trx);
}
@@ -255,6 +289,7 @@ trx_rollback_last_sql_stat_for_mysql(
trx->op_info = "";
return(err);
+
case TRX_STATE_PREPARED:
case TRX_STATE_PREPARED_RECOVERED:
case TRX_STATE_COMMITTED_IN_MEMORY:
@@ -300,14 +335,14 @@ trx_roll_savepoint_free(
trx_t* trx, /*!< in: transaction handle */
trx_named_savept_t* savep) /*!< in: savepoint to free */
{
- UT_LIST_REMOVE(trx_savepoints, trx->trx_savepoints, savep);
- mem_free(savep->name);
- mem_free(savep);
+ UT_LIST_REMOVE(trx->trx_savepoints, savep);
+
+ ut_free(savep->name);
+ ut_free(savep);
}
/*******************************************************************//**
Frees savepoint structs starting from savep. */
-UNIV_INTERN
void
trx_roll_savepoints_free(
/*=====================*/
@@ -341,7 +376,7 @@ trx_rollback_to_savepoint_for_mysql_low(
/*====================================*/
trx_t* trx, /*!< in/out: transaction */
trx_named_savept_t* savep, /*!< in/out: savepoint */
- ib_int64_t* mysql_binlog_cache_pos)
+ int64_t* mysql_binlog_cache_pos)
/*!< out: the MySQL binlog
cache position corresponding
to this savepoint; MySQL needs
@@ -373,6 +408,12 @@ trx_rollback_to_savepoint_for_mysql_low(
trx->op_info = "";
+#ifdef WITH_WSREP
+ if (wsrep_on(trx->mysql_thd) &&
+ trx->lock.was_chosen_as_deadlock_victim) {
+ trx->lock.was_chosen_as_deadlock_victim = FALSE;
+ }
+#endif
return(err);
}
@@ -385,13 +426,12 @@ the row, these locks are naturally released in the rollback. Savepoints which
were set after this savepoint are deleted.
@return if no savepoint of the name found then DB_NO_SAVEPOINT,
otherwise DB_SUCCESS */
-UNIV_INTERN
dberr_t
trx_rollback_to_savepoint_for_mysql(
/*================================*/
trx_t* trx, /*!< in: transaction handle */
const char* savepoint_name, /*!< in: savepoint name */
- ib_int64_t* mysql_binlog_cache_pos) /*!< out: the MySQL binlog cache
+ int64_t* mysql_binlog_cache_pos) /*!< out: the MySQL binlog cache
position corresponding to this
savepoint; MySQL needs this
information to remove the
@@ -414,14 +454,16 @@ trx_rollback_to_savepoint_for_mysql(
switch (trx->state) {
case TRX_STATE_NOT_STARTED:
- ut_print_timestamp(stderr);
- fputs(" InnoDB: Error: transaction has a savepoint ", stderr);
- ut_print_name(stderr, trx, FALSE, savep->name);
- fputs(" though it is not started\n", stderr);
+ ib::error() << "Transaction has a savepoint "
+ << savep->name
+ << " though it is not started";
return(DB_ERROR);
+
case TRX_STATE_ACTIVE:
+
return(trx_rollback_to_savepoint_for_mysql_low(
trx, savep, mysql_binlog_cache_pos));
+
case TRX_STATE_PREPARED:
case TRX_STATE_PREPARED_RECOVERED:
case TRX_STATE_COMMITTED_IN_MEMORY:
@@ -439,36 +481,36 @@ Creates a named savepoint. If the transaction is not yet started, starts it.
If there is already a savepoint of the same name, this call erases that old
savepoint and replaces it with a new. Savepoints are deleted in a transaction
commit or rollback.
-@return always DB_SUCCESS */
-UNIV_INTERN
+@return always DB_SUCCESS */
dberr_t
trx_savepoint_for_mysql(
/*====================*/
trx_t* trx, /*!< in: transaction handle */
const char* savepoint_name, /*!< in: savepoint name */
- ib_int64_t binlog_cache_pos) /*!< in: MySQL binlog cache
+ int64_t binlog_cache_pos) /*!< in: MySQL binlog cache
position corresponding to this
connection at the time of the
savepoint */
{
trx_named_savept_t* savep;
- trx_start_if_not_started_xa(trx);
+ trx_start_if_not_started_xa(trx, false);
savep = trx_savepoint_find(trx, savepoint_name);
if (savep) {
/* There is a savepoint with the same name: free that */
- UT_LIST_REMOVE(trx_savepoints, trx->trx_savepoints, savep);
+ UT_LIST_REMOVE(trx->trx_savepoints, savep);
- mem_free(savep->name);
- mem_free(savep);
+ ut_free(savep->name);
+ ut_free(savep);
}
/* Create a new savepoint and add it as the last in the list */
- savep = static_cast<trx_named_savept_t*>(mem_alloc(sizeof(*savep)));
+ savep = static_cast<trx_named_savept_t*>(
+ ut_malloc_nokey(sizeof(*savep)));
savep->name = mem_strdup(savepoint_name);
@@ -476,7 +518,7 @@ trx_savepoint_for_mysql(
savep->mysql_binlog_cache_pos = binlog_cache_pos;
- UT_LIST_ADD_LAST(trx_savepoints, trx->trx_savepoints, savep);
+ UT_LIST_ADD_LAST(trx->trx_savepoints, savep);
return(DB_SUCCESS);
}
@@ -486,7 +528,6 @@ Releases only the named savepoint. Savepoints which were set after this
savepoint are left as is.
@return if no savepoint of the name found then DB_NO_SAVEPOINT,
otherwise DB_SUCCESS */
-UNIV_INTERN
dberr_t
trx_release_savepoint_for_mysql(
/*============================*/
@@ -513,7 +554,6 @@ Determines if this transaction is rolling back an incomplete transaction
in crash recovery.
@return TRUE if trx is an incomplete transaction that is being rolled
back in crash recovery */
-UNIV_INTERN
ibool
trx_is_recv(
/*========*/
@@ -524,8 +564,7 @@ trx_is_recv(
/*******************************************************************//**
Returns a transaction savepoint taken at this point in time.
-@return savepoint */
-UNIV_INTERN
+@return savepoint */
trx_savept_t
trx_savept_take(
/*============*/
@@ -552,13 +591,16 @@ trx_rollback_active(
roll_node_t* roll_node;
dict_table_t* table;
ibool dictionary_locked = FALSE;
+ const trx_id_t trx_id = trx->id;
+
+ ut_ad(trx_id);
heap = mem_heap_create(512);
fork = que_fork_create(NULL, NULL, QUE_FORK_RECOVERY, heap);
fork->trx = trx;
- thr = que_thr_create(fork, heap);
+ thr = que_thr_create(fork, heap, NULL);
roll_node = roll_node_create(heap);
@@ -583,6 +625,7 @@ trx_rollback_active(
if (trx->error_state != DB_SUCCESS) {
ut_ad(trx->error_state == DB_INTERRUPTED);
+ ut_ad(!srv_is_being_started);
ut_ad(!srv_undo_sources);
ut_ad(srv_fast_shutdown);
ut_ad(!dictionary_locked);
@@ -602,41 +645,27 @@ trx_rollback_active(
if (trx_get_dict_operation(trx) != TRX_DICT_OP_NONE
&& trx->table_id != 0) {
+ ut_ad(dictionary_locked);
+
/* If the transaction was for a dictionary operation,
we drop the relevant table only if it is not flagged
as DISCARDED. If it still exists. */
table = dict_table_open_on_id(
- trx->table_id, dictionary_locked,
- DICT_TABLE_OP_NORMAL);
+ trx->table_id, TRUE, DICT_TABLE_OP_NORMAL);
if (table && !dict_table_is_discarded(table)) {
+ ib::warn() << "Dropping table '" << table->name
+ << "', with id " << trx->table_id
+ << " in recovery";
- dberr_t err;
-
- /* Ensure that the table doesn't get evicted from the
- cache, keeps things simple for drop. */
-
- if (table->can_be_evicted) {
- dict_table_move_from_lru_to_non_lru(table);
- }
-
- dict_table_close(table, dictionary_locked, FALSE);
-
- ib_logf(IB_LOG_LEVEL_WARN,
- "Dropping table '%s', with id " UINT64PF " "
- "in recovery",
- table->name, trx->table_id);
+ dict_table_close_and_drop(trx, table);
- err = row_drop_table_for_mysql(table->name, trx, TRUE, FALSE);
trx_commit_for_mysql(trx);
-
- ut_a(err == DB_SUCCESS);
}
}
- ib_logf(IB_LOG_LEVEL_INFO,
- "Rollback of trx with id " TRX_ID_FMT " completed", trx->id);
+ ib::info() << "Rolled back recovered transaction " << trx_id;
func_exit:
if (dictionary_locked) {
@@ -662,12 +691,12 @@ trx_rollback_resurrected(
ibool* all) /*!< in/out: FALSE=roll back dictionary transactions;
TRUE=roll back all non-PREPARED transactions */
{
- ut_ad(mutex_own(&trx_sys->mutex));
+ ut_ad(trx_sys_mutex_own());
/* The trx->is_recovered flag and trx->state are set
- atomically under the protection of the trx->mutex (and
- lock_sys->mutex) in lock_trx_release_locks(). We do not want
- to accidentally clean up a non-recovered transaction here. */
+ atomically under the protection of the trx->mutex in
+ trx_t::commit_state(). We do not want to accidentally clean up
+ a non-recovered transaction here. */
trx_mutex_enter(trx);
if (!trx->is_recovered) {
@@ -679,29 +708,28 @@ func_exit:
switch (trx->state) {
case TRX_STATE_COMMITTED_IN_MEMORY:
trx_mutex_exit(trx);
- mutex_exit(&trx_sys->mutex);
- fprintf(stderr,
- "InnoDB: Cleaning up trx with id " TRX_ID_FMT "\n",
- trx->id);
+ trx_sys_mutex_exit();
+ ib::info() << "Cleaning up trx with id " << ib::hex(trx->id);
+
trx_cleanup_at_db_startup(trx);
- trx_free_for_background(trx);
+ trx_free_resurrected(trx);
return(TRUE);
case TRX_STATE_ACTIVE:
- if (!srv_undo_sources && srv_fast_shutdown) {
+ if (!srv_is_being_started
+ && !srv_undo_sources && srv_fast_shutdown) {
fake_prepared:
trx->state = TRX_STATE_PREPARED;
- trx_sys->n_prepared_trx++;
- trx_sys->n_prepared_recovered_trx++;
*all = FALSE;
goto func_exit;
}
trx_mutex_exit(trx);
if (*all || trx_get_dict_operation(trx) != TRX_DICT_OP_NONE) {
- mutex_exit(&trx_sys->mutex);
+ trx_sys_mutex_exit();
trx_rollback_active(trx);
if (trx->error_state != DB_SUCCESS) {
ut_ad(trx->error_state == DB_INTERRUPTED);
+ trx->error_state = DB_SUCCESS;
ut_ad(!srv_undo_sources);
ut_ad(srv_fast_shutdown);
mutex_enter(&trx_sys->mutex);
@@ -725,7 +753,6 @@ fake_prepared:
/** Report progress when rolling back a row of a recovered transaction.
@return whether the rollback should be aborted due to pending shutdown */
-UNIV_INTERN
bool
trx_roll_must_shutdown()
{
@@ -734,6 +761,7 @@ trx_roll_must_shutdown()
ut_ad(trx_state_eq(trx, TRX_STATE_ACTIVE));
if (trx_get_dict_operation(trx) == TRX_DICT_OP_NONE
+ && !srv_is_being_started
&& !srv_undo_sources && srv_fast_shutdown) {
return true;
}
@@ -756,7 +784,6 @@ trx_roll_must_shutdown()
n_rows += t->undo_no;
}
}
-
if (n_rows > 0) {
service_manager_extend_timeout(
INNODB_EXTEND_TIMEOUT_INTERVAL,
@@ -764,9 +791,8 @@ trx_roll_must_shutdown()
"%llu rows", n_trx, n_rows);
}
- ib_logf(IB_LOG_LEVEL_INFO,
- "To roll back: " ULINTPF " transactions, "
- "%llu rows", n_trx, n_rows);
+ ib::info() << "To roll back: " << n_trx << " transactions, "
+ << n_rows << " rows";
}
mutex_exit(&recv_sys->mutex);
@@ -779,7 +805,6 @@ Rollback or clean up any incomplete transactions which were
encountered in crash recovery. If the transaction already was
committed, then we clean up a possible insert undo log. If the
transaction was not yet committed, then we roll it back. */
-UNIV_INTERN
void
trx_rollback_or_clean_recovered(
/*============================*/
@@ -796,9 +821,8 @@ trx_rollback_or_clean_recovered(
}
if (all) {
- ib_logf(IB_LOG_LEVEL_INFO,
- "Starting in background the rollback"
- " of recovered transactions");
+ ib::info() << "Starting in background the rollback"
+ " of recovered transactions";
}
/* Note: For XA recovered transactions, we rely on MySQL to
@@ -810,7 +834,7 @@ trx_rollback_or_clean_recovered(
recovered transactions to clean up or recover. */
do {
- mutex_enter(&trx_sys->mutex);
+ trx_sys_mutex_enter();
for (trx = UT_LIST_GET_FIRST(trx_sys->rw_trx_list);
trx != NULL;
@@ -824,19 +848,19 @@ trx_rollback_or_clean_recovered(
if (trx_rollback_resurrected(trx, &all)) {
- mutex_enter(&trx_sys->mutex);
+ trx_sys_mutex_enter();
break;
}
}
- mutex_exit(&trx_sys->mutex);
+ trx_sys_mutex_exit();
} while (trx != NULL);
if (all) {
- ib_logf(IB_LOG_LEVEL_INFO,
- "Rollback of non-prepared transactions completed");
+ ib::info() << "Rollback of non-prepared transactions"
+ " completed";
}
}
@@ -846,8 +870,8 @@ encountered in crash recovery. If the transaction already was
committed, then we clean up a possible insert undo log. If the
transaction was not yet committed, then we roll it back.
Note: this is done in a background thread.
-@return a dummy parameter */
-extern "C" UNIV_INTERN
+@return a dummy parameter */
+extern "C"
os_thread_ret_t
DECLARE_THREAD(trx_rollback_or_clean_all_recovered)(
/*================================================*/
@@ -870,213 +894,56 @@ DECLARE_THREAD(trx_rollback_or_clean_all_recovered)(
/* We count the number of threads in os_thread_exit(). A created
thread should always use that to exit and not use return() to exit. */
- os_thread_exit(NULL);
+ os_thread_exit();
OS_THREAD_DUMMY_RETURN;
}
-/*******************************************************************//**
-Creates an undo number array.
-@return own: undo number array */
+/** Try to truncate the undo logs.
+@param[in,out] trx transaction */
static
-trx_undo_arr_t*
-trx_undo_arr_create(
-/*================*/
- ulint n_cells) /*!< Number of cells */
-{
- trx_undo_arr_t* arr;
- mem_heap_t* heap;
- ulint sz = sizeof(*arr) + sizeof(*arr->infos) * n_cells;
-
- heap = mem_heap_create(sz);
-
- arr = static_cast<trx_undo_arr_t*>(mem_heap_zalloc(heap, sz));
-
- arr->n_cells = n_cells;
-
- arr->infos = (trx_undo_inf_t*) (arr + 1);
-
- arr->heap = heap;
-
- return(arr);
-}
-
-/*******************************************************************//**
-Frees an undo number array. */
-UNIV_INTERN
void
-trx_undo_arr_free(
-/*==============*/
- trx_undo_arr_t* arr) /*!< in: undo number array */
-{
- mem_heap_free(arr->heap);
-}
-
-/*******************************************************************//**
-Stores info of an undo log record to the array if it is not stored yet.
-@return FALSE if the record already existed in the array */
-static
-ibool
-trx_undo_arr_store_info(
-/*====================*/
- trx_t* trx, /*!< in: transaction */
- undo_no_t undo_no)/*!< in: undo number */
+trx_roll_try_truncate(trx_t* trx)
{
- ulint i;
- trx_undo_arr_t* arr;
- ulint n = 0;
- ulint n_used;
- trx_undo_inf_t* stored_here = NULL;
-
- arr = trx->undo_no_arr;
- n_used = arr->n_used;
-
- for (i = 0; i < arr->n_cells; i++) {
- trx_undo_inf_t* cell;
-
- cell = trx_undo_arr_get_nth_info(arr, i);
-
- if (!cell->in_use) {
- if (!stored_here) {
- /* Not in use, we may store here */
- cell->undo_no = undo_no;
- cell->in_use = TRUE;
-
- arr->n_used++;
-
- stored_here = cell;
- }
- } else {
- n++;
-
- if (cell->undo_no == undo_no) {
-
- if (stored_here) {
- stored_here->in_use = FALSE;
- ut_ad(arr->n_used > 0);
- arr->n_used--;
- }
-
- ut_ad(arr->n_used == n_used);
-
- return(FALSE);
- }
- }
-
- if (n == n_used && stored_here) {
-
- ut_ad(arr->n_used == 1 + n_used);
-
- return(TRUE);
- }
- }
-
- ut_error;
-
- return(FALSE);
-}
-
-/*******************************************************************//**
-Removes an undo number from the array. */
-static
-void
-trx_undo_arr_remove_info(
-/*=====================*/
- trx_undo_arr_t* arr, /*!< in: undo number array */
- undo_no_t undo_no)/*!< in: undo number */
-{
- ulint i;
-
- for (i = 0; i < arr->n_cells; i++) {
-
- trx_undo_inf_t* cell;
-
- cell = trx_undo_arr_get_nth_info(arr, i);
-
- if (cell->in_use && cell->undo_no == undo_no) {
- cell->in_use = FALSE;
- ut_ad(arr->n_used > 0);
- --arr->n_used;
- break;
- }
- }
-}
-
-/*******************************************************************//**
-Gets the biggest undo number in an array.
-@return biggest value, 0 if the array is empty */
-static
-undo_no_t
-trx_undo_arr_get_biggest(
-/*=====================*/
- const trx_undo_arr_t* arr) /*!< in: undo number array */
-{
- ulint i;
- undo_no_t biggest = 0;
- ulint n_checked = 0;
-
- for (i = 0; i < arr->n_cells && n_checked < arr->n_used; ++i) {
-
- const trx_undo_inf_t* cell = &arr->infos[i];
-
- if (cell->in_use) {
-
- ++n_checked;
-
- if (cell->undo_no > biggest) {
-
- biggest = cell->undo_no;
- }
- }
- }
-
- return(biggest);
-}
-
-/***********************************************************************//**
-Tries truncate the undo logs. */
-static
-void
-trx_roll_try_truncate(
-/*==================*/
- trx_t* trx) /*!< in/out: transaction */
-{
- undo_no_t limit;
- const trx_undo_arr_t* arr;
-
- ut_ad(mutex_own(&(trx->undo_mutex)));
- ut_ad(mutex_own(&((trx->rseg)->mutex)));
+ ut_ad(mutex_own(&trx->undo_mutex));
trx->pages_undone = 0;
- arr = trx->undo_no_arr;
-
- limit = trx->undo_no;
+ undo_no_t undo_no = trx->undo_no;
+ trx_undo_t* insert_undo = trx->rsegs.m_redo.insert_undo;
+ trx_undo_t* update_undo = trx->rsegs.m_redo.update_undo;
- if (arr->n_used > 0) {
- undo_no_t biggest;
-
- biggest = trx_undo_arr_get_biggest(arr);
-
- if (biggest >= limit) {
-
- limit = biggest + 1;
+ if (insert_undo || update_undo) {
+ mutex_enter(&trx->rsegs.m_redo.rseg->mutex);
+ if (insert_undo) {
+ ut_ad(insert_undo->rseg == trx->rsegs.m_redo.rseg);
+ trx_undo_truncate_end(insert_undo, undo_no, false);
}
+ if (update_undo) {
+ ut_ad(update_undo->rseg == trx->rsegs.m_redo.rseg);
+ trx_undo_truncate_end(update_undo, undo_no, false);
+ }
+ mutex_exit(&trx->rsegs.m_redo.rseg->mutex);
}
- if (trx->insert_undo) {
- trx_undo_truncate_end(trx, trx->insert_undo, limit);
+ if (trx_undo_t* undo = trx->rsegs.m_noredo.undo) {
+ ut_ad(undo->rseg == trx->rsegs.m_noredo.rseg);
+ mutex_enter(&undo->rseg->mutex);
+ trx_undo_truncate_end(undo, undo_no, true);
+ mutex_exit(&undo->rseg->mutex);
}
- if (trx->update_undo) {
- trx_undo_truncate_end(trx, trx->update_undo, limit);
+#ifdef WITH_WSREP_OUT
+ if (wsrep_on(trx->mysql_thd)) {
+ trx->lock.was_chosen_as_deadlock_victim = FALSE;
}
+#endif /* WITH_WSREP */
}
/***********************************************************************//**
Pops the topmost undo log record in a single undo log and updates the info
about the topmost record in the undo log memory struct.
-@return undo log record, the page s-latched */
+@return undo log record, the page s-latched */
static
trx_undo_rec_t*
trx_roll_pop_top_rec(
@@ -1085,23 +952,14 @@ trx_roll_pop_top_rec(
trx_undo_t* undo, /*!< in: undo log */
mtr_t* mtr) /*!< in: mtr */
{
- page_t* undo_page;
- ulint offset;
- trx_undo_rec_t* prev_rec;
- page_t* prev_rec_page;
-
ut_ad(mutex_own(&trx->undo_mutex));
- undo_page = trx_undo_page_get_s_latched(
- undo->space, undo->zip_size, undo->top_page_no, mtr);
-
- offset = undo->top_offset;
+ page_t* undo_page = trx_undo_page_get_s_latched(
+ page_id_t(undo->space, undo->top_page_no), mtr);
- /* fprintf(stderr, "Thread %lu undoing trx " TRX_ID_FMT
- " undo record " TRX_ID_FMT "\n",
- os_thread_get_curr_id(), trx->id, undo->top_undo_no); */
+ ulint offset = undo->top_offset;
- prev_rec = trx_undo_get_prev_rec(
+ trx_undo_rec_t* prev_rec = trx_undo_get_prev_rec(
undo_page + offset, undo->hdr_page_no, undo->hdr_offset,
true, mtr);
@@ -1109,7 +967,7 @@ trx_roll_pop_top_rec(
undo->empty = TRUE;
} else {
- prev_rec_page = page_align(prev_rec);
+ page_t* prev_rec_page = page_align(prev_rec);
if (prev_rec_page != undo_page) {
@@ -1124,163 +982,113 @@ trx_roll_pop_top_rec(
return(undo_page + offset);
}
-/********************************************************************//**
-Pops the topmost record when the two undo logs of a transaction are seen
-as a single stack of records ordered by their undo numbers. Inserts the
-undo number of the popped undo record to the array of currently processed
-undo numbers in the transaction. When the query thread finishes processing
-of this undo record, it must be released with trx_undo_rec_release.
-@return undo log record copied to heap, NULL if none left, or if the
-undo number of the top record would be less than the limit */
-UNIV_INTERN
+/** Get the last undo log record of a transaction (for rollback).
+@param[in,out] trx transaction
+@param[out] roll_ptr DB_ROLL_PTR to the undo record
+@param[in,out] heap memory heap for allocation
+@return undo log record copied to heap
+@retval NULL if none left or the roll_limit (savepoint) was reached */
trx_undo_rec_t*
-trx_roll_pop_top_rec_of_trx(
-/*========================*/
- trx_t* trx, /*!< in: transaction */
- undo_no_t limit, /*!< in: least undo number we need */
- roll_ptr_t* roll_ptr,/*!< out: roll pointer to undo record */
- mem_heap_t* heap) /*!< in: memory heap where copied */
+trx_roll_pop_top_rec_of_trx(trx_t* trx, roll_ptr_t* roll_ptr, mem_heap_t* heap)
{
- trx_undo_t* undo;
- trx_undo_t* ins_undo;
- trx_undo_t* upd_undo;
- trx_undo_rec_t* undo_rec;
- trx_undo_rec_t* undo_rec_copy;
- undo_no_t undo_no;
- ibool is_insert;
- trx_rseg_t* rseg;
- mtr_t mtr;
-
- rseg = trx->rseg;
-try_again:
- mutex_enter(&(trx->undo_mutex));
+ mutex_enter(&trx->undo_mutex);
if (trx->pages_undone >= TRX_ROLL_TRUNC_THRESHOLD) {
- mutex_enter(&rseg->mutex);
-
trx_roll_try_truncate(trx);
-
- mutex_exit(&rseg->mutex);
}
- ins_undo = trx->insert_undo;
- upd_undo = trx->update_undo;
-
- if (!ins_undo || ins_undo->empty) {
- undo = upd_undo;
- } else if (!upd_undo || upd_undo->empty) {
- undo = ins_undo;
- } else if (upd_undo->top_undo_no > ins_undo->top_undo_no) {
- undo = upd_undo;
- } else {
- undo = ins_undo;
+ trx_undo_t* undo = NULL;
+ trx_undo_t* insert = trx->rsegs.m_redo.insert_undo;
+ trx_undo_t* update = trx->rsegs.m_redo.update_undo;
+ trx_undo_t* temp = trx->rsegs.m_noredo.undo;
+ const undo_no_t limit = trx->roll_limit;
+
+ ut_ad(!insert || !update || insert->empty || update->empty
+ || insert->top_undo_no != update->top_undo_no);
+ ut_ad(!insert || !temp || insert->empty || temp->empty
+ || insert->top_undo_no != temp->top_undo_no);
+ ut_ad(!update || !temp || update->empty || temp->empty
+ || update->top_undo_no != temp->top_undo_no);
+
+ if (insert && !insert->empty && limit <= insert->top_undo_no) {
+ undo = insert;
}
- if (!undo || undo->empty || limit > undo->top_undo_no) {
-
- if ((trx->undo_no_arr)->n_used == 0) {
- /* Rollback is ending */
-
- mutex_enter(&(rseg->mutex));
-
- trx_roll_try_truncate(trx);
-
- mutex_exit(&(rseg->mutex));
+ if (update && !update->empty && update->top_undo_no >= limit) {
+ if (!undo) {
+ undo = update;
+ } else if (undo->top_undo_no < update->top_undo_no) {
+ undo = update;
}
+ }
- mutex_exit(&(trx->undo_mutex));
+ if (temp && !temp->empty && temp->top_undo_no >= limit) {
+ if (!undo) {
+ undo = temp;
+ } else if (undo->top_undo_no < temp->top_undo_no) {
+ undo = temp;
+ }
+ }
+ if (undo == NULL) {
+ trx_roll_try_truncate(trx);
+ /* Mark any ROLLBACK TO SAVEPOINT completed, so that
+ if the transaction object is committed and reused
+ later, we will default to a full ROLLBACK. */
+ trx->roll_limit = 0;
+ ut_d(trx->in_rollback = false);
+ mutex_exit(&trx->undo_mutex);
return(NULL);
}
- is_insert = (undo == ins_undo);
+ ut_ad(!undo->empty);
+ ut_ad(limit <= undo->top_undo_no);
*roll_ptr = trx_undo_build_roll_ptr(
- is_insert, undo->rseg->id, undo->top_page_no, undo->top_offset);
-
- mtr_start(&mtr);
-
- undo_rec = trx_roll_pop_top_rec(trx, undo, &mtr);
-
- undo_no = trx_undo_rec_get_undo_no(undo_rec);
-
- ut_ad(undo_no + 1 == trx->undo_no);
-
- trx->undo_no = undo_no;
-
- if (!trx_undo_arr_store_info(trx, undo_no)) {
- /* A query thread is already processing this undo log record */
-
- mutex_exit(&(trx->undo_mutex));
-
- mtr_commit(&mtr);
-
- goto try_again;
+ false, undo->rseg->id, undo->top_page_no, undo->top_offset);
+
+ mtr_t mtr;
+ mtr.start();
+
+ trx_undo_rec_t* undo_rec = trx_roll_pop_top_rec(trx, undo, &mtr);
+ const undo_no_t undo_no = trx_undo_rec_get_undo_no(undo_rec);
+ switch (trx_undo_rec_get_type(undo_rec)) {
+ case TRX_UNDO_RENAME_TABLE:
+ ut_ad(undo == insert);
+ /* fall through */
+ case TRX_UNDO_INSERT_REC:
+ ut_ad(undo == insert || undo == temp);
+ *roll_ptr |= 1ULL << ROLL_PTR_INSERT_FLAG_POS;
+ break;
+ default:
+ ut_ad(undo == update || undo == temp);
+ break;
}
- undo_rec_copy = trx_undo_rec_copy(undo_rec, heap);
+ ut_ad(trx_roll_check_undo_rec_ordering(
+ undo_no, undo->rseg->space, trx));
- mutex_exit(&(trx->undo_mutex));
+ trx->undo_no = undo_no;
+ trx->undo_rseg_space = undo->rseg->space;
+ mutex_exit(&trx->undo_mutex);
- mtr_commit(&mtr);
+ trx_undo_rec_t* undo_rec_copy = trx_undo_rec_copy(undo_rec, heap);
+ mtr.commit();
return(undo_rec_copy);
}
-/********************************************************************//**
-Reserves an undo log record for a query thread to undo. This should be
-called if the query thread gets the undo log record not using the pop
-function above.
-@return TRUE if succeeded */
-UNIV_INTERN
-ibool
-trx_undo_rec_reserve(
-/*=================*/
- trx_t* trx, /*!< in/out: transaction */
- undo_no_t undo_no)/*!< in: undo number of the record */
-{
- ibool ret;
-
- mutex_enter(&(trx->undo_mutex));
-
- ret = trx_undo_arr_store_info(trx, undo_no);
-
- mutex_exit(&(trx->undo_mutex));
-
- return(ret);
-}
-
-/*******************************************************************//**
-Releases a reserved undo record. */
-UNIV_INTERN
-void
-trx_undo_rec_release(
-/*=================*/
- trx_t* trx, /*!< in/out: transaction */
- undo_no_t undo_no)/*!< in: undo number */
-{
- trx_undo_arr_t* arr;
-
- mutex_enter(&(trx->undo_mutex));
-
- arr = trx->undo_no_arr;
-
- trx_undo_arr_remove_info(arr, undo_no);
-
- mutex_exit(&(trx->undo_mutex));
-}
-
/****************************************************************//**
Builds an undo 'query' graph for a transaction. The actual rollback is
performed by executing this query graph like a query subprocedure call.
The reply about the completion of the rollback will be sent by this
graph.
-@return own: the query graph */
+@return own: the query graph */
static
que_t*
trx_roll_graph_build(
/*=================*/
- trx_t* trx) /*!< in: trx handle */
+ trx_t* trx) /*!< in/out: transaction */
{
mem_heap_t* heap;
que_fork_t* fork;
@@ -1292,7 +1100,7 @@ trx_roll_graph_build(
fork = que_fork_create(NULL, NULL, QUE_FORK_ROLLBACK, heap);
fork->trx = trx;
- thr = que_thr_create(fork, heap);
+ thr = que_thr_create(fork, heap, NULL);
thr->child = row_undo_node_create(trx, thr, heap);
@@ -1308,32 +1116,27 @@ que_thr_t*
trx_rollback_start(
/*===============*/
trx_t* trx, /*!< in: transaction */
- ib_id_t roll_limit) /*!< in: rollback to undo no (for
+ undo_no_t roll_limit) /*!< in: rollback to undo no (for
partial undo), 0 if we are rolling back
the entire transaction */
{
- que_t* roll_graph;
-
ut_ad(trx_mutex_own(trx));
- ut_ad(trx->undo_no_arr == NULL || trx->undo_no_arr->n_used == 0);
-
/* Initialize the rollback field in the transaction */
+ ut_ad(!trx->roll_limit);
+ ut_ad(!trx->in_rollback);
+
trx->roll_limit = roll_limit;
+ ut_d(trx->in_rollback = true);
ut_a(trx->roll_limit <= trx->undo_no);
trx->pages_undone = 0;
- if (trx->undo_no_arr == NULL) {
- /* Single query thread -> 1 */
- trx->undo_no_arr = trx_undo_arr_create(1);
- }
-
/* Build a 'query' graph which will perform the undo operations */
- roll_graph = trx_roll_graph_build(trx);
+ que_t* roll_graph = trx_roll_graph_build(trx);
trx->graph = roll_graph;
@@ -1350,17 +1153,16 @@ trx_rollback_finish(
/*================*/
trx_t* trx) /*!< in: transaction */
{
- ut_a(trx->undo_no_arr == NULL || trx->undo_no_arr->n_used == 0);
-
trx_commit(trx);
+ trx->mod_tables.clear();
+
trx->lock.que_state = TRX_QUE_RUNNING;
}
/*********************************************************************//**
Creates a rollback command node struct.
-@return own: rollback node struct */
-UNIV_INTERN
+@return own: rollback node struct */
roll_node_t*
roll_node_create(
/*=============*/
@@ -1379,8 +1181,7 @@ roll_node_create(
/***********************************************************//**
Performs an execution step for a rollback command node in a query graph.
-@return query thread to run next, or NULL */
-UNIV_INTERN
+@return query thread to run next, or NULL */
que_thr_t*
trx_rollback_step(
/*==============*/
@@ -1398,7 +1199,7 @@ trx_rollback_step(
if (node->state == ROLL_NODE_SEND) {
trx_t* trx;
- ib_id_t roll_limit = 0;
+ ib_id_t roll_limit;
trx = thr_get_trx(thr);
@@ -1408,7 +1209,7 @@ trx_rollback_step(
ut_a(node->undo_thr == NULL);
- roll_limit = node->partial ? node->savept.least_undo_no : 0;
+ roll_limit = node->savept ? node->savept->least_undo_no : 0;
trx_commit_or_rollback_prepare(trx);
diff --git a/storage/innobase/trx/trx0rseg.cc b/storage/innobase/trx/trx0rseg.cc
index 944e2353de8..e76695b7e43 100644
--- a/storage/innobase/trx/trx0rseg.cc
+++ b/storage/innobase/trx/trx0rseg.cc
@@ -1,7 +1,7 @@
/*****************************************************************************
-Copyright (c) 1996, 2011, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2017, MariaDB Corporation.
+Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2017, 2019, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -25,42 +25,31 @@ Created 3/26/1996 Heikki Tuuri
*******************************************************/
#include "trx0rseg.h"
-
-#ifdef UNIV_NONINL
-#include "trx0rseg.ic"
-#endif
-
#include "trx0undo.h"
#include "fut0lst.h"
#include "srv0srv.h"
#include "trx0purge.h"
-#include "ut0bh.h"
#include "srv0mon.h"
-#ifdef UNIV_PFS_MUTEX
-/* Key to register rseg_mutex_key with performance schema */
-UNIV_INTERN mysql_pfs_key_t rseg_mutex_key;
-#endif /* UNIV_PFS_MUTEX */
+#include <algorithm>
-/****************************************************************//**
-Creates a rollback segment header. This function is called only when
-a new rollback segment is created in the database.
-@return page number of the created segment, FIL_NULL if fail */
-UNIV_INTERN
-ulint
+/** Creates a rollback segment header.
+This function is called only when a new rollback segment is created in
+the database.
+@param[in] space space id
+@param[in] max_size max size in pages
+@param[in] rseg_slot_no rseg id == slot number in trx sys
+@param[in,out] mtr mini-transaction
+@return the created rollback segment
+@retval NULL on failure */
+buf_block_t*
trx_rseg_header_create(
-/*===================*/
- ulint space, /*!< in: space id */
- ulint zip_size, /*!< in: compressed page size in bytes
- or 0 for uncompressed pages */
- ulint max_size, /*!< in: max size in pages */
- ulint rseg_slot_no, /*!< in: rseg id == slot number in trx sys */
- mtr_t* mtr) /*!< in: mtr */
+ ulint space,
+ ulint max_size,
+ ulint rseg_slot_no,
+ mtr_t* mtr)
{
- ulint page_no;
- trx_rsegf_t* rsegf;
trx_sysf_t* sys_header;
- ulint i;
buf_block_t* block;
ut_ad(mtr);
@@ -72,50 +61,48 @@ trx_rseg_header_create(
if (block == NULL) {
/* No space left */
-
- return(FIL_NULL);
+ return block;
}
buf_block_dbg_add_level(block, SYNC_RSEG_HEADER_NEW);
- page_no = buf_block_get_page_no(block);
-
- /* Get the rollback segment file page */
- rsegf = trx_rsegf_get_new(space, zip_size, page_no, mtr);
-
/* Initialize max size field */
- mlog_write_ulint(rsegf + TRX_RSEG_MAX_SIZE, max_size,
- MLOG_4BYTES, mtr);
+ mlog_write_ulint(TRX_RSEG + TRX_RSEG_MAX_SIZE + block->frame,
+ max_size, MLOG_4BYTES, mtr);
/* Initialize the history list */
- mlog_write_ulint(rsegf + TRX_RSEG_HISTORY_SIZE, 0, MLOG_4BYTES, mtr);
- flst_init(rsegf + TRX_RSEG_HISTORY, mtr);
+ mlog_write_ulint(TRX_RSEG + TRX_RSEG_HISTORY_SIZE + block->frame, 0,
+ MLOG_4BYTES, mtr);
+ flst_init(TRX_RSEG + TRX_RSEG_HISTORY + block->frame, mtr);
+ trx_rsegf_t* rsegf = TRX_RSEG + block->frame;
/* Reset the undo log slots */
- for (i = 0; i < TRX_RSEG_N_SLOTS; i++) {
-
+ for (ulint i = 0; i < TRX_RSEG_N_SLOTS; i++) {
+ /* This is generating a lot of redo log. MariaDB 10.4
+ introduced MLOG_MEMSET to reduce the redo log volume. */
trx_rsegf_set_nth_undo(rsegf, i, FIL_NULL, mtr);
}
- /* Add the rollback segment info to the free slot in
- the trx system header */
+ if (space != SRV_TMP_SPACE_ID) {
+ /* Add the rollback segment info to the free slot in
+ the trx system header */
+
+ sys_header = trx_sysf_get(mtr);
- sys_header = trx_sysf_get(mtr);
+ trx_sysf_rseg_set_space(sys_header, rseg_slot_no, space, mtr);
- trx_sysf_rseg_set_space(sys_header, rseg_slot_no, space, mtr);
- trx_sysf_rseg_set_page_no(sys_header, rseg_slot_no, page_no, mtr);
+ trx_sysf_rseg_set_page_no(
+ sys_header, rseg_slot_no,
+ block->page.id.page_no(), mtr);
+ }
- return(page_no);
+ return block;
}
-/***********************************************************************//**
-Free's an instance of the rollback segment in memory. */
-UNIV_INTERN
+/** Free a rollback segment in memory. */
void
-trx_rseg_mem_free(
-/*==============*/
- trx_rseg_t* rseg) /* in, own: instance to free */
+trx_rseg_mem_free(trx_rseg_t* rseg)
{
trx_undo_t* undo;
trx_undo_t* next_undo;
@@ -132,7 +119,7 @@ trx_rseg_mem_free(
next_undo = UT_LIST_GET_NEXT(undo_list, undo);
- UT_LIST_REMOVE(undo_list, rseg->update_undo_cached, undo);
+ UT_LIST_REMOVE(rseg->update_undo_cached, undo);
MONITOR_DEC(MONITOR_NUM_UNDO_SLOT_CACHED);
@@ -145,62 +132,58 @@ trx_rseg_mem_free(
next_undo = UT_LIST_GET_NEXT(undo_list, undo);
- UT_LIST_REMOVE(undo_list, rseg->insert_undo_cached, undo);
+ UT_LIST_REMOVE(rseg->insert_undo_cached, undo);
MONITOR_DEC(MONITOR_NUM_UNDO_SLOT_CACHED);
trx_undo_mem_free(undo);
}
- /* const_cast<trx_rseg_t*>() because this function is
- like a destructor. */
-
- *((trx_rseg_t**) trx_sys->rseg_array + rseg->id) = NULL;
-
- mem_free(rseg);
+ ut_free(rseg);
}
-/***************************************************************************
-Creates and initializes a rollback segment object. The values for the
-fields are read from the header. The object is inserted to the rseg
-list of the trx system object and a pointer is inserted in the rseg
-array in the trx system object.
-@return own: rollback segment object */
+/** Create a rollback segment object.
+@param[in] id rollback segment id
+@param[in] space space where the segment is placed
+@param[in] page_no page number of the segment header */
static
trx_rseg_t*
-trx_rseg_mem_create(
-/*================*/
- ulint id, /*!< in: rollback segment id */
- ulint space, /*!< in: space where the segment
- placed */
- ulint zip_size, /*!< in: compressed page size in bytes
- or 0 for uncompressed pages */
- ulint page_no, /*!< in: page number of the segment
- header */
- ib_bh_t* ib_bh, /*!< in/out: rseg queue */
- mtr_t* mtr) /*!< in: mtr */
+trx_rseg_mem_create(ulint id, ulint space, ulint page_no)
{
- ulint len;
- trx_rseg_t* rseg;
- fil_addr_t node_addr;
- trx_rsegf_t* rseg_header;
- trx_ulogf_t* undo_log_hdr;
- ulint sum_of_undo_sizes;
-
- rseg = static_cast<trx_rseg_t*>(mem_zalloc(sizeof(trx_rseg_t)));
+ trx_rseg_t* rseg = static_cast<trx_rseg_t*>(
+ ut_zalloc_nokey(sizeof *rseg));
rseg->id = id;
rseg->space = space;
- rseg->zip_size = zip_size;
rseg->page_no = page_no;
+ rseg->last_page_no = FIL_NULL;
+
+ mutex_create(rseg->is_persistent()
+ ? LATCH_ID_REDO_RSEG : LATCH_ID_NOREDO_RSEG,
+ &rseg->mutex);
- mutex_create(rseg_mutex_key, &rseg->mutex, SYNC_RSEG);
+ UT_LIST_INIT(rseg->update_undo_list, &trx_undo_t::undo_list);
+ UT_LIST_INIT(rseg->update_undo_cached, &trx_undo_t::undo_list);
+ UT_LIST_INIT(rseg->insert_undo_list, &trx_undo_t::undo_list);
+ UT_LIST_INIT(rseg->insert_undo_cached, &trx_undo_t::undo_list);
- /* const_cast<trx_rseg_t*>() because this function is
- like a constructor. */
- *((trx_rseg_t**) trx_sys->rseg_array + rseg->id) = rseg;
+ return(rseg);
+}
- rseg_header = trx_rsegf_get_new(space, zip_size, page_no, mtr);
+/** Restore the state of a persistent rollback segment.
+@param[in,out] rseg persistent rollback segment
+@param[in,out] mtr mini-transaction */
+static
+void
+trx_rseg_mem_restore(trx_rseg_t* rseg, mtr_t* mtr)
+{
+ ulint len;
+ fil_addr_t node_addr;
+ trx_rsegf_t* rseg_header;
+ trx_ulogf_t* undo_log_hdr;
+ ulint sum_of_undo_sizes;
+
+ rseg_header = trx_rsegf_get_new(rseg->space, rseg->page_no, mtr);
rseg->max_size = mtr_read_ulint(
rseg_header + TRX_RSEG_MAX_SIZE, MLOG_4BYTES, mtr);
@@ -213,12 +196,10 @@ trx_rseg_mem_create(
rseg_header + TRX_RSEG_HISTORY_SIZE, MLOG_4BYTES, mtr)
+ 1 + sum_of_undo_sizes;
- len = flst_get_len(rseg_header + TRX_RSEG_HISTORY, mtr);
+ len = flst_get_len(rseg_header + TRX_RSEG_HISTORY);
if (len > 0) {
- rseg_queue_t rseg_queue;
-
- trx_sys->rseg_history_len += len;
+ my_atomic_addlint(&trx_sys->rseg_history_len, len);
node_addr = trx_purge_get_log_from_hist(
flst_get_last(rseg_header + TRX_RSEG_HISTORY, mtr));
@@ -227,8 +208,8 @@ trx_rseg_mem_create(
rseg->last_offset = node_addr.boffset;
undo_log_hdr = trx_undo_page_get(
- rseg->space, rseg->zip_size, node_addr.page,
- mtr) + node_addr.boffset;
+ page_id_t(rseg->space, node_addr.page), mtr)
+ + node_addr.boffset;
rseg->last_trx_no = mach_read_from_8(
undo_log_hdr + TRX_UNDO_TRX_NO);
@@ -236,125 +217,114 @@ trx_rseg_mem_create(
rseg->last_del_marks = mtr_read_ulint(
undo_log_hdr + TRX_UNDO_DEL_MARKS, MLOG_2BYTES, mtr);
- rseg_queue.rseg = rseg;
- rseg_queue.trx_no = rseg->last_trx_no;
+ TrxUndoRsegs elem(rseg->last_trx_no);
+ elem.push_back(rseg);
if (rseg->last_page_no != FIL_NULL) {
- const void* ptr;
/* There is no need to cover this operation by the purge
mutex because we are still bootstrapping. */
- ptr = ib_bh_push(ib_bh, &rseg_queue);
- ut_a(ptr != NULL);
+ purge_sys->purge_queue.push(elem);
}
- } else {
- rseg->last_page_no = FIL_NULL;
}
-
- return(rseg);
}
-/********************************************************************
-Creates the memory copies for the rollback segments and initializes the
-rseg array in trx_sys at a database startup. */
-static
+/** Initialize the rollback segments in memory at database startup. */
void
-trx_rseg_create_instance(
-/*=====================*/
- trx_sysf_t* sys_header, /*!< in: trx system header */
- ib_bh_t* ib_bh, /*!< in/out: rseg queue */
- mtr_t* mtr) /*!< in: mtr */
+trx_rseg_array_init()
{
- ulint i;
-
- for (i = 0; i < TRX_SYS_N_RSEGS; i++) {
- ulint page_no;
+ mtr_t mtr;
- page_no = trx_sysf_rseg_get_page_no(sys_header, i, mtr);
+ for (ulint i = 0; i < TRX_SYS_N_RSEGS; i++) {
+ mtr.start();
+ trx_sysf_t* sys_header = trx_sysf_get(&mtr);
+ ulint page_no = trx_sysf_rseg_get_page_no(
+ sys_header, i, &mtr);
if (page_no != FIL_NULL) {
- ulint space;
- ulint zip_size;
- trx_rseg_t* rseg = NULL;
-
- ut_a(!trx_rseg_get_on_id(i));
-
- space = trx_sysf_rseg_get_space(sys_header, i, mtr);
-
- zip_size = space ? fil_space_get_zip_size(space) : 0;
-
- rseg = trx_rseg_mem_create(
- i, space, zip_size, page_no, ib_bh, mtr);
-
- ut_a(rseg->id == i);
- } else {
- ut_a(trx_sys->rseg_array[i] == NULL);
+ trx_rseg_t* rseg = trx_rseg_mem_create(
+ i,
+ trx_sysf_rseg_get_space(sys_header, i, &mtr),
+ page_no);
+ ut_ad(rseg->is_persistent());
+ ut_ad(!trx_sys->rseg_array[rseg->id]);
+ trx_sys->rseg_array[rseg->id] = rseg;
+ trx_rseg_mem_restore(rseg, &mtr);
}
+
+ mtr.commit();
}
}
-/** Create a rollback segment.
-@param[in] space undo tablespace ID
+/** Create a persistent rollback segment.
+@param[in] space_id system or undo tablespace id
@return pointer to new rollback segment
@retval NULL on failure */
-UNIV_INTERN
trx_rseg_t*
-trx_rseg_create(ulint space)
+trx_rseg_create(ulint space_id)
{
- mtr_t mtr;
- ulint slot_no;
- trx_rseg_t* rseg = NULL;
+ trx_rseg_t* rseg = NULL;
+ mtr_t mtr;
- mtr_start(&mtr);
+ mtr.start();
/* To obey the latching order, acquire the file space
x-latch before the trx_sys->mutex. */
- mtr_x_lock(fil_space_get_latch(space, NULL), &mtr);
-
- slot_no = trx_sysf_rseg_find_free(&mtr);
-
- if (slot_no != ULINT_UNDEFINED) {
- ulint id;
- ulint page_no;
- ulint zip_size;
- trx_sysf_t* sys_header;
-
- page_no = trx_rseg_header_create(
- space, 0, ULINT_MAX, slot_no, &mtr);
-
- if (page_no != FIL_NULL) {
- sys_header = trx_sysf_get(&mtr);
-
- id = trx_sysf_rseg_get_space(sys_header, slot_no, &mtr);
- ut_a(id == space);
-
- zip_size = space ? fil_space_get_zip_size(space) : 0;
-
- rseg = trx_rseg_mem_create(
- slot_no, space, zip_size, page_no,
- purge_sys->ib_bh, &mtr);
- }
+#ifdef UNIV_DEBUG
+ const fil_space_t* space =
+#endif /* UNIV_DEBUG */
+ mtr_x_lock_space(space_id, &mtr);
+ ut_ad(space->purpose == FIL_TYPE_TABLESPACE);
+
+ ulint slot_no = trx_sysf_rseg_find_free(&mtr);
+ if (buf_block_t* block = slot_no == ULINT_UNDEFINED
+ ? NULL
+ : trx_rseg_header_create(space_id, ULINT_MAX, slot_no, &mtr)) {
+ trx_sysf_t* sys_header = trx_sysf_get(&mtr);
+
+ ulint id = trx_sysf_rseg_get_space(
+ sys_header, slot_no, &mtr);
+ ut_a(id == space_id);
+
+ rseg = trx_rseg_mem_create(slot_no, space_id,
+ block->page.id.page_no());
+ ut_ad(rseg->is_persistent());
+ ut_ad(!trx_sys->rseg_array[rseg->id]);
+ trx_sys->rseg_array[rseg->id] = rseg;
+ trx_rseg_mem_restore(rseg, &mtr);
}
- mtr_commit(&mtr);
+ mtr.commit();
+
return(rseg);
}
-/*********************************************************************//**
-Creates the memory copies for rollback segments and initializes the
-rseg array in trx_sys at a database startup. */
-UNIV_INTERN
+/** Create the temporary rollback segments. */
void
-trx_rseg_array_init(
-/*================*/
- trx_sysf_t* sys_header, /* in/out: trx system header */
- ib_bh_t* ib_bh, /*!< in: rseg queue */
- mtr_t* mtr) /*!< in: mtr */
+trx_temp_rseg_create()
{
- trx_sys->rseg_history_len = 0;
+ mtr_t mtr;
- trx_rseg_create_instance(sys_header, ib_bh, mtr);
+ for (ulong i = 0; i < TRX_SYS_N_RSEGS; i++) {
+ mtr.start();
+ mtr.set_log_mode(MTR_LOG_NO_REDO);
+#ifdef UNIV_DEBUG
+ const fil_space_t* space =
+#endif /* UNIV_DEBUG */
+ mtr_x_lock_space(SRV_TMP_SPACE_ID, &mtr);
+ ut_ad(space->purpose == FIL_TYPE_TEMPORARY);
+
+ buf_block_t* block = trx_rseg_header_create(
+ SRV_TMP_SPACE_ID, ULINT_MAX, i, &mtr);
+ trx_rseg_t* rseg = trx_rseg_mem_create(
+ i, SRV_TMP_SPACE_ID, block->page.id.page_no());
+ ut_ad(!rseg->is_persistent());
+ ut_ad(!trx_sys->temp_rsegs[i]);
+ trx_sys->temp_rsegs[i] = rseg;
+ trx_rseg_mem_restore(rseg, &mtr);
+ mtr.commit();
+ }
}
/********************************************************************
@@ -363,7 +333,6 @@ The last space id will be the sentinel value ULINT_UNDEFINED. The array
will be sorted on space id. Note: space_ids should have have space for
TRX_SYS_N_RSEGS + 1 elements.
@return number of unique rollback tablespaces in use. */
-UNIV_INTERN
ulint
trx_rseg_get_n_undo_tablespaces(
/*============================*/
@@ -374,7 +343,6 @@ trx_rseg_get_n_undo_tablespaces(
mtr_t mtr;
trx_sysf_t* sys_header;
ulint n_undo_tablespaces = 0;
- ulint space_ids_aux[TRX_SYS_N_RSEGS + 1];
mtr_start(&mtr);
@@ -417,7 +385,7 @@ trx_rseg_get_n_undo_tablespaces(
space_ids[n_undo_tablespaces] = ULINT_UNDEFINED;
if (n_undo_tablespaces > 0) {
- ut_ulint_sort(space_ids, space_ids_aux, 0, n_undo_tablespaces);
+ std::sort(space_ids, space_ids + n_undo_tablespaces);
}
return(n_undo_tablespaces);
diff --git a/storage/innobase/trx/trx0sys.cc b/storage/innobase/trx/trx0sys.cc
index 13fd37f4e50..415afc4a90b 100644
--- a/storage/innobase/trx/trx0sys.cc
+++ b/storage/innobase/trx/trx0sys.cc
@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 1996, 2017, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2017, 2018, MariaDB Corporation.
+Copyright (c) 2017, 2019, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -24,16 +24,10 @@ Transaction system
Created 3/26/1996 Heikki Tuuri
*******************************************************/
+#include "mysqld.h"
#include "trx0sys.h"
+#include "sql_error.h"
-#ifdef UNIV_NONINL
-#include "trx0sys.ic"
-#endif
-
-#ifdef UNIV_HOTBACKUP
-#include "fsp0types.h"
-
-#else /* !UNIV_HOTBACKUP */
#include "fsp0fsp.h"
#include "mtr0log.h"
#include "mtr0log.h"
@@ -60,29 +54,7 @@ struct file_format_t {
};
/** The transaction system */
-UNIV_INTERN trx_sys_t* trx_sys = NULL;
-
-/** In a MySQL replication slave, in crash recovery we store the master log
-file name and position here. */
-/* @{ */
-/** Master binlog file name */
-UNIV_INTERN char trx_sys_mysql_master_log_name[TRX_SYS_MYSQL_LOG_NAME_LEN];
-/** Master binlog file position. We have successfully got the updates
-up to this position. -1 means that no crash recovery was needed, or
-there was no master log position info inside InnoDB.*/
-UNIV_INTERN ib_int64_t trx_sys_mysql_master_log_pos = -1;
-/* @} */
-
-/** If this MySQL server uses binary logging, after InnoDB has been inited
-and if it has done a crash recovery, we store the binlog file name and position
-here. */
-/* @{ */
-/** Binlog file name */
-UNIV_INTERN char trx_sys_mysql_bin_log_name[TRX_SYS_MYSQL_LOG_NAME_LEN];
-/** Binlog file position, or -1 if unknown */
-UNIV_INTERN ib_int64_t trx_sys_mysql_bin_log_pos = -1;
-/* @} */
-#endif /* !UNIV_HOTBACKUP */
+trx_sys_t* trx_sys;
/** List of animal names representing file format. */
static const char* file_format_name_map[] = {
@@ -118,16 +90,43 @@ static const char* file_format_name_map[] = {
static const ulint FILE_FORMAT_NAME_N
= sizeof(file_format_name_map) / sizeof(file_format_name_map[0]);
-#ifdef UNIV_PFS_MUTEX
-/* Key to register the mutex with performance schema */
-UNIV_INTERN mysql_pfs_key_t file_format_max_mutex_key;
-UNIV_INTERN mysql_pfs_key_t trx_sys_mutex_key;
-#endif /* UNIV_PFS_RWLOCK */
+/** Check whether transaction id is valid.
+@param[in] id transaction id to check
+@param[in] name table name */
+void
+ReadView::check_trx_id_sanity(
+ trx_id_t id,
+ const table_name_t& name)
+{
+ if (id >= trx_sys->max_trx_id) {
+
+ ib::warn() << "A transaction id"
+ << " in a record of table "
+ << name
+ << " is newer than the"
+ << " system-wide maximum.";
+ ut_ad(0);
+ THD *thd = current_thd;
+ if (thd != NULL) {
+ char table_name[MAX_FULL_NAME_LEN + 1];
+
+ innobase_format_name(
+ table_name, sizeof(table_name),
+ name.m_name);
+
+ push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
+ ER_SIGNAL_WARN,
+ "InnoDB: Transaction id"
+ " in a record of table"
+ " %s is newer than system-wide"
+ " maximum.", table_name);
+ }
+ }
+}
-#ifndef UNIV_HOTBACKUP
#ifdef UNIV_DEBUG
/* Flag to control TRX_RSEG_N_SLOTS behavior debugging. */
-UNIV_INTERN uint trx_rseg_n_slots_debug = 0;
+uint trx_rseg_n_slots_debug = 0;
#endif
/** This is used to track the maximum file format id known to InnoDB. It's
@@ -135,44 +134,8 @@ updated via SET GLOBAL innodb_file_format_max = 'x' or when we open
or create a table. */
static file_format_t file_format_max;
-#ifdef UNIV_DEBUG
-/****************************************************************//**
-Checks whether a trx is in one of rw_trx_list or ro_trx_list.
-@return TRUE if is in */
-UNIV_INTERN
-ibool
-trx_in_trx_list(
-/*============*/
- const trx_t* in_trx) /*!< in: transaction */
-{
- const trx_t* trx;
- trx_list_t* trx_list;
-
- /* Non-locking autocommits should not hold any locks. */
- assert_trx_in_list(in_trx);
-
- trx_list = in_trx->read_only
- ? &trx_sys->ro_trx_list : &trx_sys->rw_trx_list;
-
- ut_ad(mutex_own(&trx_sys->mutex));
-
- ut_ad(trx_assert_started(in_trx));
-
- for (trx = UT_LIST_GET_FIRST(*trx_list);
- trx != NULL && trx != in_trx;
- trx = UT_LIST_GET_NEXT(trx_list, trx)) {
-
- assert_trx_in_list(trx);
- ut_ad(trx->read_only == (trx_list == &trx_sys->ro_trx_list));
- }
-
- return(trx != NULL);
-}
-#endif /* UNIV_DEBUG */
-
/*****************************************************************//**
Writes the value of max_trx_id to the file based trx system header. */
-UNIV_INTERN
void
trx_sys_flush_max_trx_id(void)
/*==========================*/
@@ -180,7 +143,10 @@ trx_sys_flush_max_trx_id(void)
mtr_t mtr;
trx_sysf_t* sys_header;
- ut_ad(mutex_own(&trx_sys->mutex));
+ /* wsrep_fake_trx_id violates this assert
+ Copied from trx_sys_get_new_trx_id
+ */
+ ut_ad(trx_sys_mutex_own());
if (!srv_read_only_mode) {
mtr_start(&mtr);
@@ -200,117 +166,72 @@ Updates the offset information about the end of the MySQL binlog entry
which corresponds to the transaction just being committed. In a MySQL
replication slave updates the latest master binlog position up to which
replication has proceeded. */
-UNIV_INTERN
void
trx_sys_update_mysql_binlog_offset(
/*===============================*/
const char* file_name,/*!< in: MySQL log file name */
- ib_int64_t offset, /*!< in: position in that log file */
- ulint field, /*!< in: offset of the MySQL log info field in
- the trx sys header */
-#ifdef WITH_WSREP
+ int64_t offset, /*!< in: position in that log file */
trx_sysf_t* sys_header, /*!< in: trx sys header */
-#endif /* WITH_WSREP */
mtr_t* mtr) /*!< in: mtr */
{
-#ifndef WITH_WSREP
- trx_sysf_t* sys_header;
-#endif /* !WITH_WSREP */
+ DBUG_PRINT("InnoDB",("trx_mysql_binlog_offset: %lld", (longlong) offset));
+
+ const size_t len = strlen(file_name) + 1;
- if (ut_strlen(file_name) >= TRX_SYS_MYSQL_LOG_NAME_LEN) {
+ if (len > TRX_SYS_MYSQL_LOG_NAME_LEN) {
/* We cannot fit the name to the 512 bytes we have reserved */
return;
}
-#ifndef WITH_WSREP
- sys_header = trx_sysf_get(mtr);
-#endif /* !WITH_WSREP */
-
- if (mach_read_from_4(sys_header + field
- + TRX_SYS_MYSQL_LOG_MAGIC_N_FLD)
+ if (mach_read_from_4(TRX_SYS_MYSQL_LOG_MAGIC_N_FLD
+ + TRX_SYS_MYSQL_LOG_INFO + sys_header)
!= TRX_SYS_MYSQL_LOG_MAGIC_N) {
- mlog_write_ulint(sys_header + field
- + TRX_SYS_MYSQL_LOG_MAGIC_N_FLD,
+ mlog_write_ulint(TRX_SYS_MYSQL_LOG_MAGIC_N_FLD
+ + TRX_SYS_MYSQL_LOG_INFO + sys_header,
TRX_SYS_MYSQL_LOG_MAGIC_N,
MLOG_4BYTES, mtr);
}
- if (0 != strcmp((char*) (sys_header + field + TRX_SYS_MYSQL_LOG_NAME),
- file_name)) {
-
- mlog_write_string(sys_header + field
- + TRX_SYS_MYSQL_LOG_NAME,
- (byte*) file_name, 1 + ut_strlen(file_name),
- mtr);
- }
-
- if (mach_read_from_4(sys_header + field
- + TRX_SYS_MYSQL_LOG_OFFSET_HIGH) > 0
- || (offset >> 32) > 0) {
-
- mlog_write_ulint(sys_header + field
- + TRX_SYS_MYSQL_LOG_OFFSET_HIGH,
- (ulint)(offset >> 32),
- MLOG_4BYTES, mtr);
+ if (memcmp(file_name, TRX_SYS_MYSQL_LOG_NAME + TRX_SYS_MYSQL_LOG_INFO
+ + sys_header, len)) {
+ mlog_write_string(TRX_SYS_MYSQL_LOG_NAME
+ + TRX_SYS_MYSQL_LOG_INFO
+ + sys_header,
+ reinterpret_cast<const byte*>(file_name),
+ len, mtr);
}
- mlog_write_ulint(sys_header + field
- + TRX_SYS_MYSQL_LOG_OFFSET_LOW,
- (ulint)(offset & 0xFFFFFFFFUL),
- MLOG_4BYTES, mtr);
+ mlog_write_ull(TRX_SYS_MYSQL_LOG_INFO + TRX_SYS_MYSQL_LOG_OFFSET
+ + sys_header, offset, mtr);
}
-/*****************************************************************//**
-Stores the MySQL binlog offset info in the trx system header if
-the magic number shows it valid, and print the info to stderr */
-UNIV_INTERN
+/** Display the MySQL binlog offset info if it is present in the trx
+system header. */
void
-trx_sys_print_mysql_binlog_offset(void)
-/*===================================*/
+trx_sys_print_mysql_binlog_offset()
{
- trx_sysf_t* sys_header;
mtr_t mtr;
- ulint trx_sys_mysql_bin_log_pos_high;
- ulint trx_sys_mysql_bin_log_pos_low;
-
- mtr_start(&mtr);
- sys_header = trx_sysf_get(&mtr);
+ mtr.start();
- if (mach_read_from_4(sys_header + TRX_SYS_MYSQL_LOG_INFO
- + TRX_SYS_MYSQL_LOG_MAGIC_N_FLD)
- != TRX_SYS_MYSQL_LOG_MAGIC_N) {
-
- mtr_commit(&mtr);
+ const trx_sysf_t* sys_header = trx_sysf_get(&mtr);
- return;
+ if (mach_read_from_4(TRX_SYS_MYSQL_LOG_INFO
+ + TRX_SYS_MYSQL_LOG_MAGIC_N_FLD + sys_header)
+ == TRX_SYS_MYSQL_LOG_MAGIC_N) {
+ ib::info() << "Last binlog file '"
+ << TRX_SYS_MYSQL_LOG_INFO + TRX_SYS_MYSQL_LOG_NAME
+ + sys_header
+ << "', position "
+ << mach_read_from_8(TRX_SYS_MYSQL_LOG_INFO
+ + TRX_SYS_MYSQL_LOG_OFFSET
+ + sys_header);
}
- trx_sys_mysql_bin_log_pos_high = mach_read_from_4(
- sys_header + TRX_SYS_MYSQL_LOG_INFO
- + TRX_SYS_MYSQL_LOG_OFFSET_HIGH);
- trx_sys_mysql_bin_log_pos_low = mach_read_from_4(
- sys_header + TRX_SYS_MYSQL_LOG_INFO
- + TRX_SYS_MYSQL_LOG_OFFSET_LOW);
-
- trx_sys_mysql_bin_log_pos
- = (((ib_int64_t) trx_sys_mysql_bin_log_pos_high) << 32)
- + (ib_int64_t) trx_sys_mysql_bin_log_pos_low;
-
- ut_memcpy(trx_sys_mysql_bin_log_name,
- sys_header + TRX_SYS_MYSQL_LOG_INFO
- + TRX_SYS_MYSQL_LOG_NAME, TRX_SYS_MYSQL_LOG_NAME_LEN);
-
- fprintf(stderr,
- "InnoDB: Last MySQL binlog file position %lu %lu,"
- " file name %s\n",
- trx_sys_mysql_bin_log_pos_high, trx_sys_mysql_bin_log_pos_low,
- trx_sys_mysql_bin_log_name);
-
- mtr_commit(&mtr);
+ mtr.commit();
}
#ifdef WITH_WSREP
@@ -346,11 +267,18 @@ trx_sys_update_wsrep_checkpoint(
trx_sysf_t* sys_header,
mtr_t* mtr)
{
+ ut_ad(xid->formatID == 1);
+ ut_ad(wsrep_is_wsrep_xid(xid));
+
+ if (mach_read_from_4(sys_header + TRX_SYS_WSREP_XID_INFO
+ + TRX_SYS_WSREP_XID_MAGIC_N_FLD)
+ != TRX_SYS_WSREP_XID_MAGIC_N) {
+ mlog_write_ulint(sys_header + TRX_SYS_WSREP_XID_INFO
+ + TRX_SYS_WSREP_XID_MAGIC_N_FLD,
+ TRX_SYS_WSREP_XID_MAGIC_N,
+ MLOG_4BYTES, mtr);
#ifdef UNIV_DEBUG
- if (!xid->is_null()
- && mach_read_from_4(sys_header + TRX_SYS_WSREP_XID_INFO
- + TRX_SYS_WSREP_XID_MAGIC_N_FLD)
- == TRX_SYS_WSREP_XID_MAGIC_N) {
+ } else {
/* Check that seqno is monotonically increasing */
unsigned char xid_uuid[16];
long long xid_seqno = read_wsrep_xid_seqno(xid);
@@ -364,19 +292,7 @@ trx_sys_update_wsrep_checkpoint(
}
trx_sys_cur_xid_seqno = xid_seqno;
- }
#endif /* UNIV_DEBUG */
-
- ut_ad(xid && mtr);
- ut_a(xid->is_null() || wsrep_is_wsrep_xid((const XID *)xid));
-
- if (mach_read_from_4(sys_header + TRX_SYS_WSREP_XID_INFO
- + TRX_SYS_WSREP_XID_MAGIC_N_FLD)
- != TRX_SYS_WSREP_XID_MAGIC_N) {
- mlog_write_ulint(sys_header + TRX_SYS_WSREP_XID_INFO
- + TRX_SYS_WSREP_XID_MAGIC_N_FLD,
- TRX_SYS_WSREP_XID_MAGIC_N,
- MLOG_4BYTES, mtr);
}
mlog_write_ulint(sys_header + TRX_SYS_WSREP_XID_INFO
@@ -397,16 +313,16 @@ trx_sys_update_wsrep_checkpoint(
XIDDATASIZE, mtr);
}
-/** Read WSREP XID from sys_header of TRX_SYS_PAGE_NO = 5.
-@param[out] xid Transaction XID
-@return true on success, false on error. */
+/** Read WSREP checkpoint XID from sys header.
+@param[out] xid WSREP XID
+@return whether the checkpoint was present */
UNIV_INTERN
bool
trx_sys_read_wsrep_checkpoint(XID* xid)
{
- trx_sysf_t* sys_header;
- mtr_t mtr;
- ulint magic;
+ trx_sysf_t* sys_header;
+ mtr_t mtr;
+ ulint magic;
ut_ad(xid);
@@ -415,29 +331,29 @@ trx_sys_read_wsrep_checkpoint(XID* xid)
sys_header = trx_sysf_get(&mtr);
if ((magic = mach_read_from_4(sys_header + TRX_SYS_WSREP_XID_INFO
- + TRX_SYS_WSREP_XID_MAGIC_N_FLD))
- != TRX_SYS_WSREP_XID_MAGIC_N) {
+ + TRX_SYS_WSREP_XID_MAGIC_N_FLD))
+ != TRX_SYS_WSREP_XID_MAGIC_N) {
+ mtr.commit();
xid->null();
xid->gtrid_length = 0;
xid->bqual_length = 0;
memset(xid->data, 0, sizeof xid->data);
- trx_sys_update_wsrep_checkpoint(xid, sys_header, &mtr);
- mtr_commit(&mtr);
+ memset(xid->data + 24, 0xff, 8);
return false;
}
- xid->formatID = (int)mach_read_from_4(
- sys_header
- + TRX_SYS_WSREP_XID_INFO + TRX_SYS_WSREP_XID_FORMAT);
+ xid->formatID = (int)mach_read_from_4(
+ sys_header
+ + TRX_SYS_WSREP_XID_INFO + TRX_SYS_WSREP_XID_FORMAT);
xid->gtrid_length = (int)mach_read_from_4(
- sys_header
- + TRX_SYS_WSREP_XID_INFO + TRX_SYS_WSREP_XID_GTRID_LEN);
+ sys_header
+ + TRX_SYS_WSREP_XID_INFO + TRX_SYS_WSREP_XID_GTRID_LEN);
xid->bqual_length = (int)mach_read_from_4(
- sys_header
- + TRX_SYS_WSREP_XID_INFO + TRX_SYS_WSREP_XID_BQUAL_LEN);
+ sys_header
+ + TRX_SYS_WSREP_XID_INFO + TRX_SYS_WSREP_XID_BQUAL_LEN);
ut_memcpy(xid->data,
- sys_header + TRX_SYS_WSREP_XID_INFO + TRX_SYS_WSREP_XID_DATA,
- XIDDATASIZE);
+ sys_header + TRX_SYS_WSREP_XID_INFO + TRX_SYS_WSREP_XID_DATA,
+ XIDDATASIZE);
mtr_commit(&mtr);
return true;
@@ -445,86 +361,41 @@ trx_sys_read_wsrep_checkpoint(XID* xid)
#endif /* WITH_WSREP */
-/*****************************************************************//**
-Prints to stderr the MySQL master log offset info in the trx system header if
-the magic number shows it valid. */
-UNIV_INTERN
-void
-trx_sys_print_mysql_master_log_pos(void)
-/*====================================*/
+/** @return an unallocated rollback segment slot in the TRX_SYS header
+@retval ULINT_UNDEFINED if not found */
+ulint
+trx_sysf_rseg_find_free(mtr_t* mtr)
{
- trx_sysf_t* sys_header;
- mtr_t mtr;
-
- mtr_start(&mtr);
-
- sys_header = trx_sysf_get(&mtr);
-
- if (mach_read_from_4(sys_header + TRX_SYS_MYSQL_MASTER_LOG_INFO
- + TRX_SYS_MYSQL_LOG_MAGIC_N_FLD)
- != TRX_SYS_MYSQL_LOG_MAGIC_N) {
+ trx_sysf_t* sys_header = trx_sysf_get(mtr);
- mtr_commit(&mtr);
-
- return;
+ for (ulint i = 0; i < TRX_SYS_N_RSEGS; i++) {
+ if (trx_sysf_rseg_get_page_no(sys_header, i, mtr)
+ == FIL_NULL) {
+ return(i);
+ }
}
- fprintf(stderr,
- "InnoDB: In a MySQL replication slave the last"
- " master binlog file\n"
- "InnoDB: position %lu %lu, file name %s\n",
- (ulong) mach_read_from_4(sys_header
- + TRX_SYS_MYSQL_MASTER_LOG_INFO
- + TRX_SYS_MYSQL_LOG_OFFSET_HIGH),
- (ulong) mach_read_from_4(sys_header
- + TRX_SYS_MYSQL_MASTER_LOG_INFO
- + TRX_SYS_MYSQL_LOG_OFFSET_LOW),
- sys_header + TRX_SYS_MYSQL_MASTER_LOG_INFO
- + TRX_SYS_MYSQL_LOG_NAME);
- /* Copy the master log position info to global variables we can
- use in ha_innobase.cc to initialize glob_mi to right values */
-
- ut_memcpy(trx_sys_mysql_master_log_name,
- sys_header + TRX_SYS_MYSQL_MASTER_LOG_INFO
- + TRX_SYS_MYSQL_LOG_NAME,
- TRX_SYS_MYSQL_LOG_NAME_LEN);
-
- trx_sys_mysql_master_log_pos
- = (((ib_int64_t) mach_read_from_4(
- sys_header + TRX_SYS_MYSQL_MASTER_LOG_INFO
- + TRX_SYS_MYSQL_LOG_OFFSET_HIGH)) << 32)
- + ((ib_int64_t) mach_read_from_4(
- sys_header + TRX_SYS_MYSQL_MASTER_LOG_INFO
- + TRX_SYS_MYSQL_LOG_OFFSET_LOW));
- mtr_commit(&mtr);
+ return(ULINT_UNDEFINED);
}
-/****************************************************************//**
-Looks for a free slot for a rollback segment in the trx system file copy.
-@return slot index or ULINT_UNDEFINED if not found */
-UNIV_INTERN
-ulint
-trx_sysf_rseg_find_free(
-/*====================*/
- mtr_t* mtr) /*!< in: mtr */
+/** Count the number of initialized persistent rollback segment slots. */
+static
+void
+trx_sysf_get_n_rseg_slots()
{
- ulint i;
- trx_sysf_t* sys_header;
-
- sys_header = trx_sysf_get(mtr);
-
- for (i = 0; i < TRX_SYS_N_RSEGS; i++) {
- ulint page_no;
-
- page_no = trx_sysf_rseg_get_page_no(sys_header, i, mtr);
+ mtr_t mtr;
+ mtr.start();
- if (page_no == FIL_NULL) {
+ trx_sysf_t* sys_header = trx_sysf_get(&mtr);
+ srv_available_undo_logs = 0;
- return(i);
- }
+ for (ulint i = 0; i < TRX_SYS_N_RSEGS; i++) {
+ srv_available_undo_logs
+ += trx_sysf_rseg_get_page_no(sys_header, i, &mtr)
+ != FIL_NULL;
}
- return(ULINT_UNDEFINED);
+ mtr.commit();
}
/*****************************************************************//**
@@ -540,9 +411,7 @@ trx_sysf_create(
ulint slot_no;
buf_block_t* block;
page_t* page;
- ulint page_no;
byte* ptr;
- ulint len;
ut_ad(mtr);
@@ -550,14 +419,14 @@ trx_sysf_create(
then enter the kernel: we must do it in this order to conform
to the latching order rules. */
- mtr_x_lock(fil_space_get_latch(TRX_SYS_SPACE, NULL), mtr);
+ mtr_x_lock_space(TRX_SYS_SPACE, mtr);
/* Create the trx sys file block in a new allocated file segment */
block = fseg_create(TRX_SYS_SPACE, 0, TRX_SYS + TRX_SYS_FSEG_HEADER,
mtr);
buf_block_dbg_add_level(block, SYNC_TRX_SYS_HEADER);
- ut_a(buf_block_get_page_no(block) == TRX_SYS_PAGE_NO);
+ ut_a(block->page.id.page_no() == TRX_SYS_PAGE_NO);
page = buf_block_get_frame(block);
@@ -577,13 +446,12 @@ trx_sysf_create(
mach_write_to_8(sys_header + TRX_SYS_TRX_ID_STORE, 1);
/* Reset the rollback segment slots. Old versions of InnoDB
- define TRX_SYS_N_RSEGS as 256 (TRX_SYS_OLD_N_RSEGS) and expect
+ (before MySQL 5.5) define TRX_SYS_N_RSEGS as 256 and expect
that the whole array is initialized. */
ptr = TRX_SYS_RSEGS + sys_header;
- len = ut_max(TRX_SYS_OLD_N_RSEGS, TRX_SYS_N_RSEGS)
- * TRX_SYS_RSEG_SLOT_SIZE;
- memset(ptr, 0xff, len);
- ptr += len;
+ compile_time_assert(256 >= TRX_SYS_N_RSEGS);
+ memset(ptr, 0xff, 256 * TRX_SYS_RSEG_SLOT_SIZE);
+ ptr += 256 * TRX_SYS_RSEG_SLOT_SIZE;
ut_a(ptr <= page + (UNIV_PAGE_SIZE - FIL_PAGE_DATA_END));
/* Initialize all of the page. This part used to be uninitialized. */
@@ -594,69 +462,21 @@ trx_sysf_create(
/* Create the first rollback segment in the SYSTEM tablespace */
slot_no = trx_sysf_rseg_find_free(mtr);
- page_no = trx_rseg_header_create(TRX_SYS_SPACE, 0, ULINT_MAX, slot_no,
- mtr);
+ buf_block_t* rblock = trx_rseg_header_create(TRX_SYS_SPACE, ULINT_MAX,
+ slot_no, mtr);
ut_a(slot_no == TRX_SYS_SYSTEM_RSEG_ID);
- ut_a(page_no == FSP_FIRST_RSEG_PAGE_NO);
-}
-
-/*****************************************************************//**
-Compare two trx_rseg_t instances on last_trx_no. */
-static
-int
-trx_rseg_compare_last_trx_no(
-/*=========================*/
- const void* p1, /*!< in: elem to compare */
- const void* p2) /*!< in: elem to compare */
-{
- ib_int64_t cmp;
-
- const rseg_queue_t* rseg_q1 = (const rseg_queue_t*) p1;
- const rseg_queue_t* rseg_q2 = (const rseg_queue_t*) p2;
-
- cmp = rseg_q1->trx_no - rseg_q2->trx_no;
-
- if (cmp < 0) {
- return(-1);
- } else if (cmp > 0) {
- return(1);
- }
-
- return(0);
+ ut_a(rblock->page.id.page_no() == FSP_FIRST_RSEG_PAGE_NO);
}
-/*****************************************************************//**
-Creates and initializes the central memory structures for the transaction
-system. This is called when the database is started.
-@return min binary heap of rsegs to purge */
-UNIV_INTERN
-ib_bh_t*
-trx_sys_init_at_db_start(void)
-/*==========================*/
+/** Initialize the transaction system main-memory data structures. */
+void
+trx_sys_init_at_db_start()
{
- mtr_t mtr;
- ib_bh_t* ib_bh;
trx_sysf_t* sys_header;
ib_uint64_t rows_to_undo = 0;
const char* unit = "";
- /* We create the min binary heap here and pass ownership to
- purge when we init the purge sub-system. Purge is responsible
- for freeing the binary heap. */
-
- ib_bh = ib_bh_create(
- trx_rseg_compare_last_trx_no,
- sizeof(rseg_queue_t), TRX_SYS_N_RSEGS);
-
- mtr_start(&mtr);
-
- sys_header = trx_sysf_get(&mtr);
-
- if (srv_force_recovery < SRV_FORCE_NO_UNDO_LOG_SCAN) {
- trx_rseg_array_init(sys_header, ib_bh, &mtr);
- }
-
/* VERY important: after the database is started, max_trx_id value is
divisible by TRX_SYS_TRX_ID_WRITE_MARGIN, and the 'if' in
trx_sys_get_new_trx_id will evaluate to TRUE when the function
@@ -664,26 +484,26 @@ trx_sys_init_at_db_start(void)
to the disk-based header! Thus trx id values will not overlap when
the database is repeatedly started! */
+ mtr_t mtr;
+ mtr.start();
+
+ sys_header = trx_sysf_get(&mtr);
+
trx_sys->max_trx_id = 2 * TRX_SYS_TRX_ID_WRITE_MARGIN
+ ut_uint64_align_up(mach_read_from_8(sys_header
+ TRX_SYS_TRX_ID_STORE),
TRX_SYS_TRX_ID_WRITE_MARGIN);
+ mtr.commit();
ut_d(trx_sys->rw_max_trx_id = trx_sys->max_trx_id);
- UT_LIST_INIT(trx_sys->mysql_trx_list);
-
- trx_dummy_sess = sess_open();
-
trx_lists_init_at_db_start();
- /* This S lock is not strictly required, it is here only to satisfy
+ /* This mutex is not strictly required, it is here only to satisfy
the debug code (assertions). We are still running in single threaded
bootstrap mode. */
- mutex_enter(&trx_sys->mutex);
-
- ut_a(UT_LIST_GET_LEN(trx_sys->ro_trx_list) == 0);
+ trx_sys_mutex_enter();
if (UT_LIST_GET_LEN(trx_sys->rw_trx_list) > 0) {
const trx_t* trx;
@@ -705,43 +525,45 @@ trx_sys_init_at_db_start(void)
rows_to_undo = rows_to_undo / 1000000;
}
- fprintf(stderr,
- "InnoDB: %lu transaction(s) which must be"
- " rolled back or cleaned up\n"
- "InnoDB: in total %lu%s row operations to undo\n",
- (ulong) UT_LIST_GET_LEN(trx_sys->rw_trx_list),
- (ulong) rows_to_undo, unit);
+ ib::info() << UT_LIST_GET_LEN(trx_sys->rw_trx_list)
+ << " transaction(s) which must be rolled back or"
+ " cleaned up in total " << rows_to_undo << unit
+ << " row operations to undo";
- fprintf(stderr, "InnoDB: Trx id counter is " TRX_ID_FMT "\n",
- trx_sys->max_trx_id);
+ ib::info() << "Trx id counter is " << trx_sys->max_trx_id;
}
- mutex_exit(&trx_sys->mutex);
-
- UT_LIST_INIT(trx_sys->view_list);
+ trx_sys_mutex_exit();
- mtr_commit(&mtr);
-
- return(ib_bh);
+ trx_sys->mvcc->clone_oldest_view(&purge_sys->view);
}
/*****************************************************************//**
-Creates the trx_sys instance and initializes ib_bh and mutex. */
-UNIV_INTERN
+Creates the trx_sys instance and initializes purge_queue and mutex. */
void
trx_sys_create(void)
/*================*/
{
ut_ad(trx_sys == NULL);
- trx_sys = static_cast<trx_sys_t*>(mem_zalloc(sizeof(*trx_sys)));
+ trx_sys = static_cast<trx_sys_t*>(ut_zalloc_nokey(sizeof(*trx_sys)));
+
+ mutex_create(LATCH_ID_TRX_SYS, &trx_sys->mutex);
+
+ UT_LIST_INIT(trx_sys->serialisation_list, &trx_t::no_list);
+ UT_LIST_INIT(trx_sys->rw_trx_list, &trx_t::trx_list);
+ UT_LIST_INIT(trx_sys->mysql_trx_list, &trx_t::mysql_trx_list);
- mutex_create(trx_sys_mutex_key, &trx_sys->mutex, SYNC_TRX_SYS);
+ trx_sys->mvcc = UT_NEW_NOKEY(MVCC(1024));
+
+ new(&trx_sys->rw_trx_ids) trx_ids_t(ut_allocator<trx_id_t>(
+ mem_key_trx_sys_t_rw_trx_ids));
+
+ new(&trx_sys->rw_trx_set) TrxIdSet();
}
/*****************************************************************//**
Creates and initializes the transaction system at the database creation. */
-UNIV_INTERN
void
trx_sys_create_sys_pages(void)
/*==========================*/
@@ -757,7 +579,7 @@ trx_sys_create_sys_pages(void)
/*****************************************************************//**
Update the file format tag.
-@return always TRUE */
+@return always TRUE */
static
ibool
trx_sys_file_format_max_write(
@@ -774,7 +596,8 @@ trx_sys_file_format_max_write(
mtr_start(&mtr);
block = buf_page_get(
- TRX_SYS_SPACE, 0, TRX_SYS_PAGE_NO, RW_X_LATCH, &mtr);
+ page_id_t(TRX_SYS_SPACE, TRX_SYS_PAGE_NO), univ_page_size,
+ RW_X_LATCH, &mtr);
file_format_max.id = format_id;
file_format_max.name = trx_sys_file_format_id_to_name(format_id);
@@ -795,7 +618,7 @@ trx_sys_file_format_max_write(
/*****************************************************************//**
Read the file format tag.
-@return the file format or ULINT_UNDEFINED if not set. */
+@return the file format or ULINT_UNDEFINED if not set. */
static
ulint
trx_sys_file_format_max_read(void)
@@ -811,7 +634,8 @@ trx_sys_file_format_max_read(void)
mtr_start(&mtr);
block = buf_page_get(
- TRX_SYS_SPACE, 0, TRX_SYS_PAGE_NO, RW_X_LATCH, &mtr);
+ page_id_t(TRX_SYS_SPACE, TRX_SYS_PAGE_NO), univ_page_size,
+ RW_X_LATCH, &mtr);
ptr = buf_block_get_frame(block) + TRX_SYS_FILE_FORMAT_TAG;
file_format_id = mach_read_from_8(ptr);
@@ -831,8 +655,7 @@ trx_sys_file_format_max_read(void)
/*****************************************************************//**
Get the name representation of the file format from its id.
-@return pointer to the name */
-UNIV_INTERN
+@return pointer to the name */
const char*
trx_sys_file_format_id_to_name(
/*===========================*/
@@ -846,8 +669,7 @@ trx_sys_file_format_id_to_name(
/*****************************************************************//**
Check for the max file format tag stored on disk. Note: If max_format_id
is == UNIV_FORMAT_MAX + 1 then we only print a warning.
-@return DB_SUCCESS or error code */
-UNIV_INTERN
+@return DB_SUCCESS or error code */
dberr_t
trx_sys_file_format_max_check(
/*==========================*/
@@ -865,19 +687,24 @@ trx_sys_file_format_max_check(
format_id = UNIV_FORMAT_MIN;
}
- ib_logf(IB_LOG_LEVEL_INFO,
- "Highest supported file format is %s.",
- trx_sys_file_format_id_to_name(UNIV_FORMAT_MAX));
+ ib::info() << "Highest supported file format is "
+ << trx_sys_file_format_id_to_name(UNIV_FORMAT_MAX) << ".";
if (format_id > UNIV_FORMAT_MAX) {
ut_a(format_id < FILE_FORMAT_NAME_N);
- ib_logf(max_format_id <= UNIV_FORMAT_MAX
- ? IB_LOG_LEVEL_ERROR : IB_LOG_LEVEL_WARN,
- "The system tablespace is in a file "
- "format that this version doesn't support - %s.",
- trx_sys_file_format_id_to_name(format_id));
+ const std::string msg = std::string("The system"
+ " tablespace is in a file format that this version"
+ " doesn't support - ")
+ + trx_sys_file_format_id_to_name(format_id)
+ + ".";
+
+ if (max_format_id <= UNIV_FORMAT_MAX) {
+ ib::error() << msg;
+ } else {
+ ib::warn() << msg;
+ }
if (max_format_id <= UNIV_FORMAT_MAX) {
return(DB_ERROR);
@@ -897,8 +724,7 @@ trx_sys_file_format_max_check(
/*****************************************************************//**
Set the file format id unconditionally except if it's already the
same value.
-@return TRUE if value updated */
-UNIV_INTERN
+@return TRUE if value updated */
ibool
trx_sys_file_format_max_set(
/*========================*/
@@ -928,7 +754,6 @@ Tags the system table space with minimum format id if it has not been
tagged yet.
WARNING: This function is only called during the startup and AFTER the
redo log application during recovery has finished. */
-UNIV_INTERN
void
trx_sys_file_format_tag_init(void)
/*==============================*/
@@ -946,8 +771,7 @@ trx_sys_file_format_tag_init(void)
/********************************************************************//**
Update the file format tag in the system tablespace only if the given
format id is greater than the known max id.
-@return TRUE if format_id was bigger than the known max id */
-UNIV_INTERN
+@return TRUE if format_id was bigger than the known max id */
ibool
trx_sys_file_format_max_upgrade(
/*============================*/
@@ -974,8 +798,7 @@ trx_sys_file_format_max_upgrade(
/*****************************************************************//**
Get the name representation of the file format from its id.
-@return pointer to the max format name */
-UNIV_INTERN
+@return pointer to the max format name */
const char*
trx_sys_file_format_max_get(void)
/*=============================*/
@@ -985,13 +808,11 @@ trx_sys_file_format_max_get(void)
/*****************************************************************//**
Initializes the tablespace tag system. */
-UNIV_INTERN
void
trx_sys_file_format_init(void)
/*==========================*/
{
- mutex_create(file_format_max_mutex_key,
- &file_format_max.mutex, SYNC_FILE_FORMAT_TAG);
+ mutex_create(LATCH_ID_FILE_FORMAT_MAX, &file_format_max.mutex);
/* We don't need a mutex here, as this function should only
be called once at start up. */
@@ -1003,443 +824,231 @@ trx_sys_file_format_init(void)
/*****************************************************************//**
Closes the tablespace tag system. */
-UNIV_INTERN
void
trx_sys_file_format_close(void)
/*===========================*/
{
- /* Does nothing at the moment */
+ mutex_free(&file_format_max.mutex);
}
-/*********************************************************************
-Creates the rollback segments.
-@return number of rollback segments that are active. */
-UNIV_INTERN
-ulint
-trx_sys_create_rsegs(
-/*=================*/
- ulint n_spaces, /*!< number of tablespaces for UNDO logs */
- ulint n_rsegs) /*!< number of rollback segments to create */
+/** Create the rollback segments.
+@return whether the creation succeeded */
+bool
+trx_sys_create_rsegs()
{
- mtr_t mtr;
- ulint n_used;
+ /* srv_available_undo_logs reflects the number of persistent
+ rollback segments that have been initialized in the
+ transaction system header page.
- ut_a(n_spaces < TRX_SYS_N_RSEGS);
- ut_a(n_rsegs <= TRX_SYS_N_RSEGS);
+ srv_undo_logs determines how many of the
+ srv_available_undo_logs rollback segments may be used for
+ logging new transactions. */
+ ut_ad(srv_undo_tablespaces <= TRX_SYS_MAX_UNDO_SPACES);
+ ut_ad(srv_undo_logs <= TRX_SYS_N_RSEGS);
if (srv_read_only_mode) {
- return(ULINT_UNDEFINED);
+ srv_undo_logs = srv_available_undo_logs = ULONG_UNDEFINED;
+ return(true);
}
/* This is executed in single-threaded mode therefore it is not
necessary to use the same mtr in trx_rseg_create(). n_used cannot
change while the function is executing. */
+ trx_sysf_get_n_rseg_slots();
- mtr_start(&mtr);
- n_used = trx_sysf_rseg_find_free(&mtr);
- mtr_commit(&mtr);
-
- if (n_used == ULINT_UNDEFINED) {
- n_used = TRX_SYS_N_RSEGS;
- }
+ ut_ad(srv_available_undo_logs <= TRX_SYS_N_RSEGS);
- /* Do not create additional rollback segments if innodb_force_recovery
- has been set and the database was not shutdown cleanly. */
+ /* The first persistent rollback segment is always initialized
+ in the system tablespace. */
+ ut_a(srv_available_undo_logs > 0);
- if (!srv_force_recovery && !recv_needed_recovery && n_used < n_rsegs) {
- ulint i;
- ulint new_rsegs = n_rsegs - n_used;
+ if (srv_force_recovery) {
+ /* Do not create additional rollback segments if
+ innodb_force_recovery has been set. */
+ if (srv_undo_logs > srv_available_undo_logs) {
+ srv_undo_logs = srv_available_undo_logs;
+ }
+ } else {
+ for (ulint i = 0; srv_available_undo_logs < srv_undo_logs;
+ i++, srv_available_undo_logs++) {
+ /* Tablespace 0 is the system tablespace.
+ Dedicated undo log tablespaces start from 1. */
+ ulint space = srv_undo_tablespaces > 0
+ ? (i % srv_undo_tablespaces)
+ + srv_undo_space_id_start
+ : TRX_SYS_SPACE;
+
+ if (!trx_rseg_create(space)) {
+ ib::error() << "Unable to allocate the"
+ " requested innodb_undo_logs";
+ return(false);
+ }
- for (i = 0; i < new_rsegs; ++i) {
- ulint space_id;
- space_id = (n_spaces == 0) ? 0
- : (srv_undo_space_id_start + i % n_spaces);
+ /* Increase the number of active undo
+ tablespace in case new rollback segment
+ assigned to new undo tablespace. */
+ if (space > srv_undo_tablespaces_active) {
+ srv_undo_tablespaces_active++;
- /* Tablespace 0 is the system tablespace. */
- if (trx_rseg_create(space_id) != NULL) {
- ++n_used;
- } else {
- break;
+ ut_ad(srv_undo_tablespaces_active == space);
}
}
}
- ib_logf(IB_LOG_LEVEL_INFO,
- "%lu rollback segment(s) are active.", n_used);
-
- return(n_used);
-}
-
-#else /* !UNIV_HOTBACKUP */
-/*****************************************************************//**
-Prints to stderr the MySQL binlog info in the system header if the
-magic number shows it valid. */
-UNIV_INTERN
-void
-trx_sys_print_mysql_binlog_offset_from_page(
-/*========================================*/
- const byte* page) /*!< in: buffer containing the trx
- system header page, i.e., page number
- TRX_SYS_PAGE_NO in the tablespace */
-{
- const trx_sysf_t* sys_header;
-
- sys_header = page + TRX_SYS;
-
- if (mach_read_from_4(sys_header + TRX_SYS_MYSQL_LOG_INFO
- + TRX_SYS_MYSQL_LOG_MAGIC_N_FLD)
- == TRX_SYS_MYSQL_LOG_MAGIC_N) {
-
- fprintf(stderr,
- "mysqlbackup: Last MySQL binlog file position %lu %lu,"
- " file name %s\n",
- (ulong) mach_read_from_4(
- sys_header + TRX_SYS_MYSQL_LOG_INFO
- + TRX_SYS_MYSQL_LOG_OFFSET_HIGH),
- (ulong) mach_read_from_4(
- sys_header + TRX_SYS_MYSQL_LOG_INFO
- + TRX_SYS_MYSQL_LOG_OFFSET_LOW),
- sys_header + TRX_SYS_MYSQL_LOG_INFO
- + TRX_SYS_MYSQL_LOG_NAME);
- }
-}
-
-/*****************************************************************//**
-Reads the file format id from the first system table space file.
-Even if the call succeeds and returns TRUE, the returned format id
-may be ULINT_UNDEFINED signalling that the format id was not present
-in the data file.
-@return TRUE if call succeeds */
-UNIV_INTERN
-ibool
-trx_sys_read_file_format_id(
-/*========================*/
- const char *pathname, /*!< in: pathname of the first system
- table space file */
- ulint *format_id) /*!< out: file format of the system table
- space */
-{
- os_file_t file;
- ibool success;
- byte buf[UNIV_PAGE_SIZE * 2];
- page_t* page = ut_align(buf, UNIV_PAGE_SIZE);
- const byte* ptr;
- ib_id_t file_format_id;
-
- *format_id = ULINT_UNDEFINED;
-
- file = os_file_create_simple_no_error_handling(
- innodb_file_data_key,
- pathname,
- OS_FILE_OPEN,
- OS_FILE_READ_ONLY,
- &success
- );
- if (!success) {
- /* The following call prints an error message */
- os_file_get_last_error(true);
-
- ut_print_timestamp(stderr);
-
- fprintf(stderr,
- " mysqlbackup: Error: trying to read system "
- "tablespace file format,\n"
- " mysqlbackup: but could not open the tablespace "
- "file %s!\n", pathname);
- return(FALSE);
- }
-
- /* Read the page on which file format is stored */
-
- success = os_file_read_no_error_handling(
- file, page, TRX_SYS_PAGE_NO * UNIV_PAGE_SIZE, UNIV_PAGE_SIZE);
-
- if (!success) {
- /* The following call prints an error message */
- os_file_get_last_error(true);
-
- ut_print_timestamp(stderr);
-
- fprintf(stderr,
- " mysqlbackup: Error: trying to read system "
- "tablespace file format,\n"
- " mysqlbackup: but failed to read the tablespace "
- "file %s!\n", pathname);
-
- os_file_close(file);
- return(FALSE);
- }
- os_file_close(file);
-
- /* get the file format from the page */
- ptr = page + TRX_SYS_FILE_FORMAT_TAG;
- file_format_id = mach_read_from_8(ptr);
- file_format_id -= TRX_SYS_FILE_FORMAT_TAG_MAGIC_N;
-
- if (file_format_id >= FILE_FORMAT_NAME_N) {
-
- /* Either it has never been tagged, or garbage in it. */
- return(TRUE);
- }
-
- *format_id = (ulint) file_format_id;
-
- return(TRUE);
-}
-
-/*****************************************************************//**
-Reads the file format id from the given per-table data file.
-@return TRUE if call succeeds */
-UNIV_INTERN
-ibool
-trx_sys_read_pertable_file_format_id(
-/*=================================*/
- const char *pathname, /*!< in: pathname of a per-table
- datafile */
- ulint *format_id) /*!< out: file format of the per-table
- data file */
-{
- os_file_t file;
- ibool success;
- byte buf[UNIV_PAGE_SIZE * 2];
- page_t* page = ut_align(buf, UNIV_PAGE_SIZE);
- const byte* ptr;
- ib_uint32_t flags;
-
- *format_id = ULINT_UNDEFINED;
-
- file = os_file_create_simple_no_error_handling(
- innodb_file_data_key,
- pathname,
- OS_FILE_OPEN,
- OS_FILE_READ_ONLY,
- &success
- );
- if (!success) {
- /* The following call prints an error message */
- os_file_get_last_error(true);
-
- ut_print_timestamp(stderr);
-
- fprintf(stderr,
- " mysqlbackup: Error: trying to read per-table "
- "tablespace format,\n"
- " mysqlbackup: but could not open the tablespace "
- "file %s!\n", pathname);
-
- return(FALSE);
- }
-
- /* Read the first page of the per-table datafile */
+ ut_ad(srv_undo_logs <= srv_available_undo_logs);
- success = os_file_read_no_error_handling(file, page, 0, UNIV_PAGE_SIZE);
-
- if (!success) {
- /* The following call prints an error message */
- os_file_get_last_error(true);
-
- ut_print_timestamp(stderr);
-
- fprintf(stderr,
- " mysqlbackup: Error: trying to per-table data file "
- "format,\n"
- " mysqlbackup: but failed to read the tablespace "
- "file %s!\n", pathname);
-
- os_file_close(file);
- return(FALSE);
+ ib::info info;
+ info << srv_undo_logs << " out of " << srv_available_undo_logs;
+ if (srv_undo_tablespaces_active) {
+ info << " rollback segments in " << srv_undo_tablespaces_active
+ << " undo tablespaces are active.";
+ } else {
+ info << " rollback segments are active.";
}
- os_file_close(file);
-
- /* get the file format from the page */
- ptr = page + 54;
- flags = mach_read_from_4(ptr);
- if (!fsp_flags_is_valid(flags) {
- /* bad tablespace flags */
- return(FALSE);
- }
-
- *format_id = FSP_FLAGS_GET_POST_ANTELOPE(flags);
-
- return(TRUE);
+ return(true);
}
-
-/*****************************************************************//**
-Get the name representation of the file format from its id.
-@return pointer to the name */
-UNIV_INTERN
-const char*
-trx_sys_file_format_id_to_name(
-/*===========================*/
- const ulint id) /*!< in: id of the file format */
-{
- if (!(id < FILE_FORMAT_NAME_N)) {
- /* unknown id */
- return("Unknown");
- }
-
- return(file_format_name_map[id]);
-}
-
-#endif /* !UNIV_HOTBACKUP */
-
-#ifndef UNIV_HOTBACKUP
/*********************************************************************
Shutdown/Close the transaction system. */
-UNIV_INTERN
void
trx_sys_close(void)
/*===============*/
{
- ulint i;
- trx_t* trx;
- read_view_t* view;
-
ut_ad(trx_sys != NULL);
ut_ad(srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS);
- /* Check that all read views are closed except read view owned
- by a purge. */
-
- mutex_enter(&trx_sys->mutex);
-
- if (UT_LIST_GET_LEN(trx_sys->view_list) > 1) {
- fprintf(stderr,
- "InnoDB: Error: all read views were not closed"
- " before shutdown:\n"
- "InnoDB: %lu read views open \n",
- UT_LIST_GET_LEN(trx_sys->view_list) - 1);
+ if (ulint size = trx_sys->mvcc->size()) {
+ ib::error() << "All read views were not closed before"
+ " shutdown: " << size << " read views open";
}
- mutex_exit(&trx_sys->mutex);
-
- sess_close(trx_dummy_sess);
- trx_dummy_sess = NULL;
-
- trx_purge_sys_close();
-
- /* Free the double write data structures. */
- buf_dblwr_free();
-
- ut_a(UT_LIST_GET_LEN(trx_sys->ro_trx_list) == 0);
-
/* Only prepared transactions may be left in the system. Free them. */
- ut_a(UT_LIST_GET_LEN(trx_sys->rw_trx_list) == trx_sys->n_prepared_trx
- || srv_read_only_mode
- || srv_force_recovery >= SRV_FORCE_NO_TRX_UNDO);
-
- while ((trx = UT_LIST_GET_FIRST(trx_sys->rw_trx_list)) != NULL) {
+ while (trx_t* trx = UT_LIST_GET_FIRST(trx_sys->rw_trx_list)) {
trx_free_prepared(trx);
}
/* There can't be any active transactions. */
- for (i = 0; i < TRX_SYS_N_RSEGS; ++i) {
- trx_rseg_t* rseg;
- rseg = trx_sys->rseg_array[i];
+ for (ulint i = 0; i < TRX_SYS_N_RSEGS; ++i) {
+ if (trx_rseg_t* rseg = trx_sys->rseg_array[i]) {
+ trx_rseg_mem_free(rseg);
+ }
- if (rseg != NULL) {
+ if (trx_rseg_t* rseg = trx_sys->temp_rsegs[i]) {
trx_rseg_mem_free(rseg);
- } else {
- break;
}
}
- view = UT_LIST_GET_FIRST(trx_sys->view_list);
-
- while (view != NULL) {
- read_view_t* prev_view = view;
-
- view = UT_LIST_GET_NEXT(view_list, prev_view);
+ UT_DELETE(trx_sys->mvcc);
- /* Views are allocated from the trx_sys->global_read_view_heap.
- So, we simply remove the element here. */
- UT_LIST_REMOVE(view_list, trx_sys->view_list, prev_view);
- }
-
- ut_a(UT_LIST_GET_LEN(trx_sys->view_list) == 0);
- ut_a(UT_LIST_GET_LEN(trx_sys->ro_trx_list) == 0);
ut_a(UT_LIST_GET_LEN(trx_sys->rw_trx_list) == 0);
ut_a(UT_LIST_GET_LEN(trx_sys->mysql_trx_list) == 0);
+ ut_a(UT_LIST_GET_LEN(trx_sys->serialisation_list) == 0);
+ /* We used placement new to create this mutex. Call the destructor. */
mutex_free(&trx_sys->mutex);
- mem_free(trx_sys);
+ trx_sys->rw_trx_ids.~trx_ids_t();
+
+ trx_sys->rw_trx_set.~TrxIdSet();
+
+ ut_free(trx_sys);
trx_sys = NULL;
}
/*********************************************************************
Check if there are any active (non-prepared) transactions.
+This is only used to check if it's safe to shutdown.
@return total number of active transactions or 0 if none */
-UNIV_INTERN
ulint
trx_sys_any_active_transactions(void)
/*=================================*/
{
ulint total_trx = 0;
- mutex_enter(&trx_sys->mutex);
+ trx_sys_mutex_enter();
- total_trx = UT_LIST_GET_LEN(trx_sys->rw_trx_list)
- + UT_LIST_GET_LEN(trx_sys->mysql_trx_list);
+ for (trx_t* trx = UT_LIST_GET_FIRST(trx_sys->rw_trx_list);
+ trx != NULL;
+ trx = UT_LIST_GET_NEXT(trx_list, trx)) {
+ ut_ad(trx->in_rw_trx_list);
+ trx_mutex_enter(trx);
+ switch (trx->state) {
+ case TRX_STATE_NOT_STARTED:
+ DBUG_ASSERT(!"invalid state");
+ /* fall through */
+ case TRX_STATE_PREPARED:
+ case TRX_STATE_PREPARED_RECOVERED:
+ break;
+ case TRX_STATE_ACTIVE:
+ case TRX_STATE_COMMITTED_IN_MEMORY:
+ total_trx++;
+ }
+ trx_mutex_exit(trx);
+ }
- ut_a(total_trx >= trx_sys->n_prepared_trx);
- total_trx -= trx_sys->n_prepared_trx;
+ for (trx_t* trx = UT_LIST_GET_FIRST(trx_sys->mysql_trx_list);
+ trx != NULL;
+ trx = UT_LIST_GET_NEXT(mysql_trx_list, trx)) {
+ ut_ad(trx->in_mysql_trx_list);
+ trx_mutex_enter(trx);
+ /* This may count some ACTIVE transactions twice,
+ both in rw_trx_list and mysql_trx_list. */
+ total_trx += trx->state == TRX_STATE_ACTIVE;
+ /* Any PREPARED or COMMITTED transactions must be
+ in rw_trx_list, so it suffices to count them there. */
+ ut_ad(trx->in_rw_trx_list
+ || trx->state == TRX_STATE_NOT_STARTED
+ || trx->state == TRX_STATE_ACTIVE);
+ trx_mutex_exit(trx);
+ }
- mutex_exit(&trx_sys->mutex);
+ trx_sys_mutex_exit();
return(total_trx);
}
#ifdef UNIV_DEBUG
/*************************************************************//**
-Validate the trx_list_t.
-@return TRUE if valid. */
+Validate the trx_ut_list_t.
+@return true if valid. */
static
-ibool
+bool
trx_sys_validate_trx_list_low(
/*===========================*/
- trx_list_t* trx_list) /*!< in: &trx_sys->ro_trx_list
- or &trx_sys->rw_trx_list */
+ trx_ut_list_t* trx_list) /*!< in: &trx_sys->rw_trx_list */
{
const trx_t* trx;
const trx_t* prev_trx = NULL;
- ut_ad(mutex_own(&trx_sys->mutex));
+ ut_ad(trx_sys_mutex_own());
- ut_ad(trx_list == &trx_sys->ro_trx_list
- || trx_list == &trx_sys->rw_trx_list);
+ ut_ad(trx_list == &trx_sys->rw_trx_list);
for (trx = UT_LIST_GET_FIRST(*trx_list);
trx != NULL;
prev_trx = trx, trx = UT_LIST_GET_NEXT(trx_list, prev_trx)) {
- assert_trx_in_list(trx);
- ut_ad(trx->read_only == (trx_list == &trx_sys->ro_trx_list));
-
+ check_trx_state(trx);
ut_a(prev_trx == NULL || prev_trx->id > trx->id);
}
- return(TRUE);
+ return(true);
}
/*************************************************************//**
-Validate the trx_sys_t::ro_trx_list and trx_sys_t::rw_trx_list.
-@return TRUE if lists are valid. */
-UNIV_INTERN
-ibool
-trx_sys_validate_trx_list(void)
-/*===========================*/
+Validate the trx_sys_t::rw_trx_list.
+@return true if the list is valid. */
+bool
+trx_sys_validate_trx_list()
+/*=======================*/
{
- ut_ad(mutex_own(&trx_sys->mutex));
+ ut_ad(trx_sys_mutex_own());
- ut_a(trx_sys_validate_trx_list_low(&trx_sys->ro_trx_list));
ut_a(trx_sys_validate_trx_list_low(&trx_sys->rw_trx_list));
- return(TRUE);
+ return(true);
}
#endif /* UNIV_DEBUG */
-#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innobase/trx/trx0trx.cc b/storage/innobase/trx/trx0trx.cc
index 53c5d1ca82d..80de62b8b28 100644
--- a/storage/innobase/trx/trx0trx.cc
+++ b/storage/innobase/trx/trx0trx.cc
@@ -26,172 +26,420 @@ Created 3/26/1996 Heikki Tuuri
#include "trx0trx.h"
-#ifdef UNIV_NONINL
-#include "trx0trx.ic"
+#ifdef WITH_WSREP
+#include <mysql/service_wsrep.h>
#endif
-#include <mysql/service_wsrep.h>
+#include <mysql/service_thd_error_context.h>
-#include "trx0undo.h"
-#include "trx0rseg.h"
+#include "btr0sea.h"
+#include "lock0lock.h"
#include "log0log.h"
+#include "os0proc.h"
#include "que0que.h"
-#include "lock0lock.h"
-#include "trx0roll.h"
-#include "usr0sess.h"
#include "read0read.h"
+#include "srv0mon.h"
#include "srv0srv.h"
#include "srv0start.h"
-#include "btr0sea.h"
-#include "os0proc.h"
-#include "trx0xa.h"
-#include "trx0rec.h"
#include "trx0purge.h"
-#include "ha_prototypes.h"
-#include "srv0mon.h"
+#include "trx0rec.h"
+#include "trx0roll.h"
+#include "trx0rseg.h"
+#include "trx0undo.h"
+#include "trx0xa.h"
+#include "ut0pool.h"
#include "ut0vec.h"
-#include<set>
+#include <set>
+#include <new>
extern "C"
int thd_deadlock_victim_preference(const MYSQL_THD thd1, const MYSQL_THD thd2);
-/** Set of table_id */
-typedef std::set<table_id_t> table_id_set;
-
-/** Dummy session used currently in MySQL interface */
-UNIV_INTERN sess_t* trx_dummy_sess = NULL;
+static const ulint MAX_DETAILED_ERROR_LEN = 256;
-#ifdef UNIV_PFS_MUTEX
-/* Key to register the mutex with performance schema */
-UNIV_INTERN mysql_pfs_key_t trx_mutex_key;
-/* Key to register the mutex with performance schema */
-UNIV_INTERN mysql_pfs_key_t trx_undo_mutex_key;
-#endif /* UNIV_PFS_MUTEX */
+/** Set of table_id */
+typedef std::set<
+ table_id_t,
+ std::less<table_id_t>,
+ ut_allocator<table_id_t> > table_id_set;
/*************************************************************//**
Set detailed error message for the transaction. */
-UNIV_INTERN
void
trx_set_detailed_error(
/*===================*/
trx_t* trx, /*!< in: transaction struct */
const char* msg) /*!< in: detailed error message */
{
- ut_strlcpy(trx->detailed_error, msg, sizeof(trx->detailed_error));
+ strncpy(trx->detailed_error, msg, MAX_DETAILED_ERROR_LEN - 1);
+ trx->detailed_error[MAX_DETAILED_ERROR_LEN - 1] = '\0';
}
/*************************************************************//**
Set detailed error message for the transaction from a file. Note that the
file is rewinded before reading from it. */
-UNIV_INTERN
void
trx_set_detailed_error_from_file(
/*=============================*/
trx_t* trx, /*!< in: transaction struct */
FILE* file) /*!< in: file to read message from */
{
- os_file_read_string(file, trx->detailed_error,
- sizeof(trx->detailed_error));
+ os_file_read_string(file, trx->detailed_error, MAX_DETAILED_ERROR_LEN);
}
-/****************************************************************//**
-Creates and initializes a transaction object. It must be explicitly
-started with trx_start_if_not_started() before using it. The default
-isolation level is TRX_ISO_REPEATABLE_READ.
-@return transaction instance, should never be NULL */
+/********************************************************************//**
+Initialize transaction object.
+@param trx trx to initialize */
static
-trx_t*
-trx_create(void)
-/*============*/
+void
+trx_init(
+/*=====*/
+ trx_t* trx)
{
- trx_t* trx;
- mem_heap_t* heap;
- ib_alloc_t* heap_alloc;
-
- trx = static_cast<trx_t*>(mem_zalloc(sizeof(*trx)));
+ trx->no = TRX_ID_MAX;
- mutex_create(trx_mutex_key, &trx->mutex, SYNC_TRX);
+ trx->state = TRX_STATE_NOT_STARTED;
- trx->magic_n = TRX_MAGIC_N;
+ trx->is_recovered = false;
+#ifdef WITH_WSREP
+ trx->wsrep = false;
+#endif /* WITH_WSREP */
- trx->state = TRX_STATE_NOT_STARTED;
+ trx->op_info = "";
trx->active_commit_ordered = 0;
+
trx->isolation_level = TRX_ISO_REPEATABLE_READ;
- trx->no = TRX_ID_MAX;
+ trx->check_foreigns = true;
- trx->support_xa = TRUE;
+ trx->check_unique_secondary = true;
- trx->check_foreigns = TRUE;
- trx->check_unique_secondary = TRUE;
+ trx->lock.n_rec_locks = 0;
trx->dict_operation = TRX_DICT_OP_NONE;
- mutex_create(trx_undo_mutex_key, &trx->undo_mutex, SYNC_TRX_UNDO);
+ trx->table_id = 0;
trx->error_state = DB_SUCCESS;
+ trx->error_key_num = ULINT_UNDEFINED;
+
+ trx->undo_no = 0;
+
+ trx->rsegs.m_redo.rseg = NULL;
+
+ trx->rsegs.m_noredo.rseg = NULL;
+
+ trx->read_only = false;
+
+ trx->auto_commit = false;
+
+ trx->will_lock = 0;
+
+ trx->ddl = false;
+
+ trx->internal = false;
+
+ ut_d(trx->start_file = 0);
+
+ ut_d(trx->start_line = 0);
+
+ trx->magic_n = TRX_MAGIC_N;
+
trx->lock.que_state = TRX_QUE_RUNNING;
- trx->lock.lock_heap = mem_heap_create_typed(
- 256, MEM_HEAP_FOR_LOCK_HEAP);
+ trx->last_sql_stat_start.least_undo_no = 0;
- trx->search_latch_timeout = BTR_SEA_TIMEOUT;
+ ut_ad(!MVCC::is_view_active(trx->read_view));
- trx->global_read_view_heap = mem_heap_create(256);
+ trx->lock.rec_cached = 0;
- trx->xid.null();
+ trx->lock.table_cached = 0;
- trx->op_info = "";
+ ut_ad(trx->get_flush_observer() == NULL);
+}
+
+/** For managing the life-cycle of the trx_t instance that we get
+from the pool. */
+struct TrxFactory {
+
+ /** Initializes a transaction object. It must be explicitly started
+ with trx_start_if_not_started() before using it. The default isolation
+ level is TRX_ISO_REPEATABLE_READ.
+ @param trx Transaction instance to initialise */
+ static void init(trx_t* trx)
+ {
+ /* Explicitly call the constructor of the already
+ allocated object. trx_t objects are allocated by
+ ut_zalloc_nokey() in Pool::Pool() which would not call
+ the constructors of the trx_t members. */
+ new(&trx->mod_tables) trx_mod_tables_t();
+
+ new(&trx->lock.table_locks) lock_list();
+
+ trx_init(trx);
+
+ trx->dict_operation_lock_mode = 0;
+
+ trx->xid = UT_NEW_NOKEY(xid_t());
+
+ trx->detailed_error = reinterpret_cast<char*>(
+ ut_zalloc_nokey(MAX_DETAILED_ERROR_LEN));
+
+ trx->lock.lock_heap = mem_heap_create_typed(
+ 1024, MEM_HEAP_FOR_LOCK_HEAP);
+
+ lock_trx_lock_list_init(&trx->lock.trx_locks);
+
+ UT_LIST_INIT(
+ trx->trx_savepoints,
+ &trx_named_savept_t::trx_savepoints);
+
+ mutex_create(LATCH_ID_TRX, &trx->mutex);
+ mutex_create(LATCH_ID_TRX_UNDO, &trx->undo_mutex);
+ }
+
+ /** Release resources held by the transaction object.
+ @param trx the transaction for which to release resources */
+ static void destroy(trx_t* trx)
+ {
+ ut_a(trx->magic_n == TRX_MAGIC_N);
+ ut_ad(!trx->in_rw_trx_list);
+ ut_ad(!trx->in_mysql_trx_list);
+
+ ut_a(trx->lock.wait_lock == NULL);
+ ut_a(trx->lock.wait_thr == NULL);
+ ut_a(trx->dict_operation_lock_mode == 0);
+
+ if (trx->lock.lock_heap != NULL) {
+ mem_heap_free(trx->lock.lock_heap);
+ trx->lock.lock_heap = NULL;
+ }
+
+ ut_a(UT_LIST_GET_LEN(trx->lock.trx_locks) == 0);
+
+ UT_DELETE(trx->xid);
+ ut_free(trx->detailed_error);
+
+ mutex_free(&trx->mutex);
+ mutex_free(&trx->undo_mutex);
+
+ trx->mod_tables.~trx_mod_tables_t();
+
+ ut_ad(trx->read_view == NULL);
+
+ trx->lock.table_locks.~lock_list();
+ }
+
+ /** Enforce any invariants here, this is called before the transaction
+ is added to the pool.
+ @return true if all OK */
+ static bool debug(const trx_t* trx)
+ {
+ ut_a(trx->error_state == DB_SUCCESS);
+
+ ut_a(trx->magic_n == TRX_MAGIC_N);
- trx->api_trx = false;
+ ut_ad(!trx->read_only);
- trx->api_auto_commit = false;
+ ut_ad(trx->state == TRX_STATE_NOT_STARTED);
- trx->read_write = true;
+ ut_ad(trx->dict_operation == TRX_DICT_OP_NONE);
+
+ ut_ad(trx->mysql_thd == 0);
+
+ ut_ad(!trx->in_rw_trx_list);
+ ut_ad(!trx->in_mysql_trx_list);
+
+ ut_a(trx->lock.wait_thr == NULL);
+ ut_a(trx->lock.wait_lock == NULL);
+ ut_a(trx->dict_operation_lock_mode == 0);
+
+ ut_a(UT_LIST_GET_LEN(trx->lock.trx_locks) == 0);
+
+ ut_ad(trx->autoinc_locks == NULL);
+
+ ut_ad(trx->lock.table_locks.empty());
+
+ return(true);
+ }
+};
+
+/** The lock strategy for TrxPool */
+struct TrxPoolLock {
+ TrxPoolLock() { }
+
+ /** Create the mutex */
+ void create()
+ {
+ mutex_create(LATCH_ID_TRX_POOL, &m_mutex);
+ }
+
+ /** Acquire the mutex */
+ void enter() { mutex_enter(&m_mutex); }
+
+ /** Release the mutex */
+ void exit() { mutex_exit(&m_mutex); }
+
+ /** Free the mutex */
+ void destroy() { mutex_free(&m_mutex); }
+
+ /** Mutex to use */
+ ib_mutex_t m_mutex;
+};
+
+/** The lock strategy for the TrxPoolManager */
+struct TrxPoolManagerLock {
+ TrxPoolManagerLock() { }
+
+ /** Create the mutex */
+ void create()
+ {
+ mutex_create(LATCH_ID_TRX_POOL_MANAGER, &m_mutex);
+ }
+
+ /** Acquire the mutex */
+ void enter() { mutex_enter(&m_mutex); }
+
+ /** Release the mutex */
+ void exit() { mutex_exit(&m_mutex); }
+
+ /** Free the mutex */
+ void destroy() { mutex_free(&m_mutex); }
+
+ /** Mutex to use */
+ ib_mutex_t m_mutex;
+};
+
+/** Use explicit mutexes for the trx_t pool and its manager. */
+typedef Pool<trx_t, TrxFactory, TrxPoolLock> trx_pool_t;
+typedef PoolManager<trx_pool_t, TrxPoolManagerLock > trx_pools_t;
+
+/** The trx_t pool manager */
+static trx_pools_t* trx_pools;
+
+/** Size of on trx_t pool in bytes. */
+static const ulint MAX_TRX_BLOCK_SIZE = 1024 * 1024 * 4;
+
+/** Create the trx_t pool */
+void
+trx_pool_init()
+{
+ trx_pools = UT_NEW_NOKEY(trx_pools_t(MAX_TRX_BLOCK_SIZE));
+
+ ut_a(trx_pools != 0);
+}
+
+/** Destroy the trx_t pool */
+void
+trx_pool_close()
+{
+ UT_DELETE(trx_pools);
+
+ trx_pools = 0;
+}
+
+/** @return a trx_t instance from trx_pools. */
+static
+trx_t*
+trx_create_low()
+{
+ trx_t* trx = trx_pools->get();
+
+ assert_trx_is_free(trx);
+
+ mem_heap_t* heap;
+ ib_alloc_t* alloc;
+
+ /* We just got trx from pool, it should be non locking */
+ ut_ad(trx->will_lock == 0);
+ ut_ad(trx->state == TRX_STATE_NOT_STARTED);
+
+ DBUG_LOG("trx", "Create: " << trx);
heap = mem_heap_create(sizeof(ib_vector_t) + sizeof(void*) * 8);
- heap_alloc = ib_heap_allocator_create(heap);
- /* Remember to free the vector explicitly in trx_free(). */
- trx->autoinc_locks = ib_vector_create(heap_alloc, sizeof(void**), 4);
+ alloc = ib_heap_allocator_create(heap);
/* Remember to free the vector explicitly in trx_free(). */
- heap = mem_heap_create(sizeof(ib_vector_t) + sizeof(void*) * 128);
- heap_alloc = ib_heap_allocator_create(heap);
+ trx->autoinc_locks = ib_vector_create(alloc, sizeof(void**), 4);
- trx->lock.table_locks = ib_vector_create(
- heap_alloc, sizeof(void**), 32);
+ /* Should have been either just initialized or .clear()ed by
+ trx_free(). */
+ ut_ad(trx->mod_tables.empty());
+ ut_ad(trx->lock.table_locks.empty());
+ ut_ad(UT_LIST_GET_LEN(trx->lock.trx_locks) == 0);
+ ut_ad(trx->lock.n_rec_locks == 0);
+ ut_ad(trx->lock.table_cached == 0);
+ ut_ad(trx->lock.rec_cached == 0);
#ifdef WITH_WSREP
trx->wsrep_event = NULL;
#endif /* WITH_WSREP */
+
return(trx);
}
+/**
+Release a trx_t instance back to the pool.
+@param trx the instance to release. */
+static
+void
+trx_free(trx_t*& trx)
+{
+ assert_trx_is_free(trx);
+
+ trx->mysql_thd = 0;
+ trx->mysql_log_file_name = 0;
+
+ // FIXME: We need to avoid this heap free/alloc for each commit.
+ if (trx->autoinc_locks != NULL) {
+ ut_ad(ib_vector_is_empty(trx->autoinc_locks));
+ /* We allocated a dedicated heap for the vector. */
+ ib_vector_free(trx->autoinc_locks);
+ trx->autoinc_locks = NULL;
+ }
+
+ trx->mod_tables.clear();
+
+ ut_ad(trx->read_view == NULL);
+
+ /* trx locking state should have been reset before returning trx
+ to pool */
+ ut_ad(trx->will_lock == 0);
+
+ trx_pools->mem_free(trx);
+ /* Unpoison the memory for innodb_monitor_set_option;
+ it is operating also on the freed transaction objects. */
+ MEM_UNDEFINED(&trx->mutex, sizeof trx->mutex);
+ MEM_UNDEFINED(&trx->undo_mutex, sizeof trx->undo_mutex);
+ /* Declare the contents as initialized for Valgrind;
+ we checked that it was initialized in trx_pools->mem_free(trx). */
+ UNIV_MEM_VALID(&trx->mutex, sizeof trx->mutex);
+ UNIV_MEM_VALID(&trx->undo_mutex, sizeof trx->undo_mutex);
+
+ trx = NULL;
+}
+
/********************************************************************//**
Creates a transaction object for background operations by the master thread.
-@return own: transaction object */
-UNIV_INTERN
+@return own: transaction object */
trx_t*
trx_allocate_for_background(void)
/*=============================*/
{
trx_t* trx;
- trx = trx_create();
-
- trx->sess = trx_dummy_sess;
+ trx = trx_create_low();
return(trx);
}
/********************************************************************//**
Creates a transaction object for MySQL.
-@return own: transaction object */
-UNIV_INTERN
+@return own: transaction object */
trx_t*
trx_allocate_for_mysql(void)
/*========================*/
@@ -200,223 +448,235 @@ trx_allocate_for_mysql(void)
trx = trx_allocate_for_background();
- mutex_enter(&trx_sys->mutex);
+ trx_sys_mutex_enter();
ut_d(trx->in_mysql_trx_list = TRUE);
- UT_LIST_ADD_FIRST(mysql_trx_list, trx_sys->mysql_trx_list, trx);
+ UT_LIST_ADD_FIRST(trx_sys->mysql_trx_list, trx);
- mutex_exit(&trx_sys->mutex);
+ trx_sys_mutex_exit();
return(trx);
}
-/********************************************************************//**
-Frees a transaction object. */
+/** Check state of transaction before freeing it.
+@param trx trx object to validate */
static
void
-trx_free(
-/*=====*/
- trx_t* trx) /*!< in, own: trx object */
+trx_validate_state_before_free(trx_t* trx)
{
- ut_a(trx->magic_n == TRX_MAGIC_N);
- ut_ad(!trx->in_ro_trx_list);
- ut_ad(!trx->in_rw_trx_list);
- ut_ad(!trx->in_mysql_trx_list);
-
- mutex_free(&trx->undo_mutex);
-
- if (trx->undo_no_arr != NULL) {
- trx_undo_arr_free(trx->undo_no_arr);
- }
-
- ut_a(trx->lock.wait_lock == NULL);
- ut_a(trx->lock.wait_thr == NULL);
-
- ut_a(!trx->has_search_latch);
-
- ut_a(trx->dict_operation_lock_mode == 0);
-
- if (trx->lock.lock_heap) {
- mem_heap_free(trx->lock.lock_heap);
- }
-
- ut_a(UT_LIST_GET_LEN(trx->lock.trx_locks) == 0);
-
- if (trx->global_read_view_heap) {
- mem_heap_free(trx->global_read_view_heap);
- }
-
- ut_a(ib_vector_is_empty(trx->autoinc_locks));
- /* We allocated a dedicated heap for the vector. */
- ib_vector_free(trx->autoinc_locks);
-
- if (trx->lock.table_locks != NULL) {
- /* We allocated a dedicated heap for the vector. */
- ib_vector_free(trx->lock.table_locks);
- }
+ ut_ad(!trx->declared_to_be_inside_innodb);
+ ut_ad(!trx->n_mysql_tables_in_use);
+ ut_ad(!trx->mysql_n_tables_locked);
+ ut_ad(!trx->internal);
- mutex_free(&trx->mutex);
-
- mem_free(trx);
-}
-
-/********************************************************************//**
-Frees a transaction object of a background operation of the master thread. */
-UNIV_INTERN
-void
-trx_free_for_background(
-/*====================*/
- trx_t* trx) /*!< in, own: trx object */
-{
if (trx->declared_to_be_inside_innodb) {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Freeing a trx (%p, " TRX_ID_FMT ") which is declared "
- "to be processing inside InnoDB", trx, trx->id);
+ ib::error() << "Freeing a trx (" << trx << ", "
+ << trx_get_id_for_print(trx) << ") which is declared"
+ " to be processing inside InnoDB";
trx_print(stderr, trx, 600);
putc('\n', stderr);
/* This is an error but not a fatal error. We must keep
- the counters like srv_conc_n_threads accurate. */
+ the counters like srv_conc.n_active accurate. */
srv_conc_force_exit_innodb(trx);
}
if (trx->n_mysql_tables_in_use != 0
|| trx->mysql_n_tables_locked != 0) {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "MySQL is freeing a thd though "
- "trx->n_mysql_tables_in_use is %lu and "
- "trx->mysql_n_tables_locked is %lu.",
- (ulong) trx->n_mysql_tables_in_use,
- (ulong) trx->mysql_n_tables_locked);
+ ib::error() << "MySQL is freeing a thd though"
+ " trx->n_mysql_tables_in_use is "
+ << trx->n_mysql_tables_in_use
+ << " and trx->mysql_n_tables_locked is "
+ << trx->mysql_n_tables_locked << ".";
trx_print(stderr, trx, 600);
ut_print_buf(stderr, trx, sizeof(trx_t));
putc('\n', stderr);
}
- ut_a(trx->state == TRX_STATE_NOT_STARTED);
- ut_a(trx->insert_undo == NULL);
- ut_a(trx->update_undo == NULL);
- ut_a(trx->read_view == NULL);
+ trx->dict_operation = TRX_DICT_OP_NONE;
+ assert_trx_is_inactive(trx);
+}
+
+/** Free and initialize a transaction object instantinated during recovery.
+@param trx trx object to free and initialize during recovery */
+void
+trx_free_resurrected(trx_t* trx)
+{
+ trx_validate_state_before_free(trx);
+
+ trx_init(trx);
+
+ trx_free(trx);
+}
+
+/** Free a transaction that was allocated by background or user threads.
+@param trx trx object to free */
+void
+trx_free_for_background(trx_t* trx)
+{
+ trx_validate_state_before_free(trx);
trx_free(trx);
}
+/** Transition to committed state, to release implicit locks. */
+inline void trx_t::commit_state()
+{
+ /* This makes the transaction committed in memory and makes its
+ changes to data visible to other transactions. NOTE that there is a
+ small discrepancy from the strict formal visibility rules here: a
+ user of the database can see modifications made by another
+ transaction T even before the necessary redo log segment has been
+ flushed to the disk. If the database happens to crash before the
+ flush, the user has seen modifications from T which will never be a
+ committed transaction. However, any transaction T2 which sees the
+ modifications of the committing transaction T, and which also itself
+ makes modifications to the database, will get an lsn larger than the
+ committing transaction T. In the case where the log flush fails, and
+ T never gets committed, also T2 will never get committed. */
+ ut_ad(trx_mutex_own(this));
+ ut_ad(state != TRX_STATE_NOT_STARTED);
+ ut_ad(state != TRX_STATE_COMMITTED_IN_MEMORY
+ || (is_recovered && !UT_LIST_GET_LEN(lock.trx_locks)));
+ state= TRX_STATE_COMMITTED_IN_MEMORY;
+
+ /* If the background thread trx_rollback_or_clean_recovered()
+ is still active then there is a chance that the rollback
+ thread may see this trx as COMMITTED_IN_MEMORY and goes ahead
+ to clean it up calling trx_cleanup_at_db_startup(). This can
+ happen in the case we are committing a trx here that is left
+ in PREPARED state during the crash. Note that commit of the
+ rollback of a PREPARED trx happens in the recovery thread
+ while the rollback of other transactions happen in the
+ background thread. To avoid this race we unconditionally unset
+ the is_recovered flag. */
+ is_recovered= false;
+ ut_ad(id || !is_referenced());
+}
+
+/** Release any explicit locks of a committing transaction. */
+inline void trx_t::release_locks()
+{
+ DBUG_ASSERT(state == TRX_STATE_COMMITTED_IN_MEMORY);
+
+ if (UT_LIST_GET_LEN(lock.trx_locks))
+ lock_trx_release_locks(this);
+ else
+ lock.table_locks.clear();
+}
+
/********************************************************************//**
At shutdown, frees a transaction object that is in the PREPARED state. */
-UNIV_INTERN
void
trx_free_prepared(
/*==============*/
trx_t* trx) /*!< in, own: trx object */
{
+ trx_mutex_enter(trx);
+ ut_ad(trx->state == TRX_STATE_PREPARED
+ || trx->state == TRX_STATE_PREPARED_RECOVERED
+ || !srv_was_started
+ || srv_read_only_mode
+ || srv_force_recovery >= SRV_FORCE_NO_TRX_UNDO);
ut_a(trx_state_eq(trx, TRX_STATE_PREPARED)
|| trx_state_eq(trx, TRX_STATE_PREPARED_RECOVERED)
|| (trx->is_recovered
&& (trx_state_eq(trx, TRX_STATE_ACTIVE)
|| trx_state_eq(trx, TRX_STATE_COMMITTED_IN_MEMORY))
- && (srv_read_only_mode
+ && (!srv_was_started || is_mariabackup_restore_or_export()
+ || srv_read_only_mode
|| srv_force_recovery >= SRV_FORCE_NO_TRX_UNDO)));
ut_a(trx->magic_n == TRX_MAGIC_N);
- lock_trx_release_locks(trx);
+ trx->commit_state();
+ trx_mutex_exit(trx);
+ trx->release_locks();
trx_undo_free_prepared(trx);
assert_trx_in_rw_list(trx);
ut_a(!trx->read_only);
- UT_LIST_REMOVE(trx_list, trx_sys->rw_trx_list, trx);
- ut_d(trx->in_rw_trx_list = FALSE);
+ ut_ad(trx->in_rw_trx_list);
+ UT_LIST_REMOVE(trx_sys->rw_trx_list, trx);
+ ut_d(trx->in_rw_trx_list = false);
- /* Undo trx_resurrect_table_locks(). */
- UT_LIST_INIT(trx->lock.trx_locks);
+ DBUG_LOG("trx", "Free prepared: " << trx);
+ trx->state = TRX_STATE_NOT_STARTED;
+ ut_ad(!UT_LIST_GET_LEN(trx->lock.trx_locks));
+ trx->id = 0;
trx_free(trx);
}
-/********************************************************************//**
-Frees a transaction object for MySQL. */
-UNIV_INTERN
+/** Disconnect a transaction from MySQL and optionally mark it as if
+it's been recovered. For the marking the transaction must be in prepared state.
+The recovery-marked transaction is going to survive "alone" so its association
+with the mysql handle is destroyed now rather than when it will be
+finally freed.
+@param[in,out] trx transaction
+@param[in] prepared boolean value to specify whether trx is
+ for recovery or not. */
+inline
void
-trx_free_for_mysql(
-/*===============*/
- trx_t* trx) /*!< in, own: trx object */
+trx_disconnect_from_mysql(
+ trx_t* trx,
+ bool prepared)
{
- mutex_enter(&trx_sys->mutex);
+ trx_sys_mutex_enter();
ut_ad(trx->in_mysql_trx_list);
ut_d(trx->in_mysql_trx_list = FALSE);
- UT_LIST_REMOVE(mysql_trx_list, trx_sys->mysql_trx_list, trx);
-
- ut_ad(trx_sys_validate_trx_list());
- mutex_exit(&trx_sys->mutex);
+ UT_LIST_REMOVE(trx_sys->mysql_trx_list, trx);
- trx_free_for_background(trx);
-}
-
-/****************************************************************//**
-Inserts the trx handle in the trx system trx list in the right position.
-The list is sorted on the trx id so that the biggest id is at the list
-start. This function is used at the database startup to insert incomplete
-transactions to the list. */
-static
-void
-trx_list_rw_insert_ordered(
-/*=======================*/
- trx_t* trx) /*!< in: trx handle */
-{
- trx_t* trx2;
-
- ut_ad(!trx->read_only);
-
- ut_d(trx->start_file = __FILE__);
- ut_d(trx->start_line = __LINE__);
-
- ut_a(srv_is_being_started);
- ut_ad(!trx->in_ro_trx_list);
- ut_ad(!trx->in_rw_trx_list);
- ut_ad(trx->state != TRX_STATE_NOT_STARTED);
- ut_ad(trx->is_recovered);
+ if (trx->read_view != NULL) {
+ trx_sys->mvcc->view_close(trx->read_view, true);
+ }
- for (trx2 = UT_LIST_GET_FIRST(trx_sys->rw_trx_list);
- trx2 != NULL;
- trx2 = UT_LIST_GET_NEXT(trx_list, trx2)) {
+ ut_ad(trx_sys_validate_trx_list());
- assert_trx_in_rw_list(trx2);
+ if (prepared) {
- if (trx->id >= trx2->id) {
+ ut_ad(trx_state_eq(trx, TRX_STATE_PREPARED));
- ut_ad(trx->id > trx2->id);
- break;
- }
+ trx->is_recovered = true;
+ trx->mysql_thd = NULL;
+ /* todo/fixme: suggest to do it at innodb prepare */
+ trx->will_lock = 0;
}
- if (trx2 != NULL) {
- trx2 = UT_LIST_GET_PREV(trx_list, trx2);
+ trx_sys_mutex_exit();
+}
- if (trx2 == NULL) {
- UT_LIST_ADD_FIRST(trx_list, trx_sys->rw_trx_list, trx);
- } else {
- UT_LIST_INSERT_AFTER(
- trx_list, trx_sys->rw_trx_list, trx2, trx);
- }
- } else {
- UT_LIST_ADD_LAST(trx_list, trx_sys->rw_trx_list, trx);
- }
+/** Disconnect a transaction from MySQL.
+@param[in,out] trx transaction */
+inline
+void
+trx_disconnect_plain(trx_t* trx)
+{
+ trx_disconnect_from_mysql(trx, false);
+}
-#ifdef UNIV_DEBUG
- if (trx->id > trx_sys->rw_max_trx_id) {
- trx_sys->rw_max_trx_id = trx->id;
- }
-#endif /* UNIV_DEBUG */
+/** Disconnect a prepared transaction from MySQL.
+@param[in,out] trx transaction */
+void
+trx_disconnect_prepared(trx_t* trx)
+{
+ trx_disconnect_from_mysql(trx, true);
+}
- ut_ad(!trx->in_rw_trx_list);
- ut_d(trx->in_rw_trx_list = TRUE);
+/** Free a transaction object for MySQL.
+@param[in,out] trx transaction */
+void
+trx_free_for_mysql(trx_t* trx)
+{
+ trx_disconnect_plain(trx);
+ trx_free_for_background(trx);
}
/****************************************************************//**
@@ -426,6 +686,8 @@ void
trx_resurrect_table_locks(
/*======================*/
trx_t* trx, /*!< in/out: transaction */
+ const trx_undo_ptr_t* undo_ptr,
+ /*!< in: pointer to undo segment. */
const trx_undo_t* undo) /*!< in: undo log */
{
mtr_t mtr;
@@ -433,40 +695,33 @@ trx_resurrect_table_locks(
trx_undo_rec_t* undo_rec;
table_id_set tables;
- ut_ad(undo == trx->insert_undo || undo == trx->update_undo);
+ ut_ad(undo == undo_ptr->insert_undo || undo == undo_ptr->update_undo);
+
+ if (trx_state_eq(trx, TRX_STATE_COMMITTED_IN_MEMORY) || undo->empty) {
- if (trx_state_eq(trx, TRX_STATE_COMMITTED_IN_MEMORY)
- || undo->empty) {
return;
}
mtr_start(&mtr);
+
/* trx_rseg_mem_create() may have acquired an X-latch on this
page, so we cannot acquire an S-latch. */
undo_page = trx_undo_page_get(
- undo->space, undo->zip_size, undo->top_page_no, &mtr);
+ page_id_t(undo->space, undo->top_page_no), &mtr);
+
undo_rec = undo_page + undo->top_offset;
do {
ulint type;
- ulint cmpl_info;
- bool updated_extern;
undo_no_t undo_no;
table_id_t table_id;
+ ulint cmpl_info;
+ bool updated_extern;
page_t* undo_rec_page = page_align(undo_rec);
if (undo_rec_page != undo_page) {
- if (!mtr_memo_release(&mtr,
- buf_block_align(undo_page),
- MTR_MEMO_PAGE_X_FIX)) {
- /* The page of the previous undo_rec
- should have been latched by
- trx_undo_page_get() or
- trx_undo_get_prev_rec(). */
- ut_ad(0);
- }
-
+ mtr.release_page(undo_page, MTR_MEMO_PAGE_X_FIX);
undo_page = undo_rec_page;
}
@@ -486,8 +741,7 @@ trx_resurrect_table_locks(
i != tables.end(); i++) {
if (dict_table_t* table = dict_table_open_on_id(
*i, FALSE, DICT_TABLE_OP_LOAD_TABLESPACE)) {
- if (table->file_unreadable
- || dict_table_is_temporary(table)) {
+ if (!table->is_readable()) {
mutex_enter(&dict_sys->mutex);
dict_table_close(table, TRUE, FALSE);
dict_table_remove_from_cache(table);
@@ -495,13 +749,17 @@ trx_resurrect_table_locks(
continue;
}
+ if (trx->state == TRX_STATE_PREPARED) {
+ trx->mod_tables.insert(table);
+ }
lock_table_ix_resurrect(table, trx);
DBUG_PRINT("ib_trx",
("resurrect" TRX_ID_FMT
" table '%s' IX lock from %s undo",
- trx->id, table->name,
- undo == trx->insert_undo
+ trx_get_id_for_print(trx),
+ table->name.m_name,
+ undo == undo_ptr->insert_undo
? "insert" : "update"));
dict_table_close(table, FALSE, FALSE);
@@ -512,7 +770,7 @@ trx_resurrect_table_locks(
/****************************************************************//**
Resurrect the transactions that were doing inserts the time of the
crash, they need to be undone.
-@return trx_t instance */
+@return trx_t instance */
static
trx_t*
trx_resurrect_insert(
@@ -524,11 +782,14 @@ trx_resurrect_insert(
trx = trx_allocate_for_background();
- trx->rseg = rseg;
- trx->xid = undo->xid;
+ ut_d(trx->start_file = __FILE__);
+ ut_d(trx->start_line = __LINE__);
+
+ trx->rsegs.m_redo.rseg = rseg;
+ *trx->xid = undo->xid;
trx->id = undo->trx_id;
- trx->insert_undo = undo;
- trx->is_recovered = TRUE;
+ trx->rsegs.m_redo.insert_undo = undo;
+ trx->is_recovered = true;
/* This is single-threaded startup code, we do not need the
protection of trx->mutex or trx_sys->mutex here. */
@@ -540,13 +801,11 @@ trx_resurrect_insert(
if (undo->state == TRX_UNDO_PREPARED) {
- fprintf(stderr,
- "InnoDB: Transaction " TRX_ID_FMT " was in the"
- " XA prepared state.\n", trx->id);
+ ib::info() << "Transaction "
+ << trx_get_id_for_print(trx)
+ << " was in the XA prepared state.";
trx->state = TRX_STATE_PREPARED;
- trx_sys->n_prepared_trx++;
- trx_sys->n_prepared_recovered_trx++;
} else {
trx->state = TRX_STATE_COMMITTED_IN_MEMORY;
}
@@ -558,6 +817,7 @@ trx_resurrect_insert(
undo log structure */
trx->no = trx->id;
+
} else {
trx->state = TRX_STATE_ACTIVE;
@@ -581,6 +841,7 @@ trx_resurrect_insert(
if (!undo->empty) {
trx->undo_no = undo->top_undo_no + 1;
+ trx->undo_rseg_space = undo->rseg->space;
}
return(trx);
@@ -600,16 +861,10 @@ trx_resurrect_update_in_prepared_state(
protection of trx->mutex or trx_sys->mutex here. */
if (undo->state == TRX_UNDO_PREPARED) {
- fprintf(stderr,
- "InnoDB: Transaction " TRX_ID_FMT
- " was in the XA prepared state.\n", trx->id);
-
- if (trx_state_eq(trx, TRX_STATE_NOT_STARTED)) {
- trx_sys->n_prepared_trx++;
- trx_sys->n_prepared_recovered_trx++;
- } else {
- ut_ad(trx_state_eq(trx, TRX_STATE_PREPARED));
- }
+ ib::info() << "Transaction " << trx_get_id_for_print(trx)
+ << " was in the XA prepared state.";
+ ut_ad(trx_state_eq(trx, TRX_STATE_NOT_STARTED)
+ || trx_state_eq(trx, TRX_STATE_PREPARED));
trx->state = TRX_STATE_PREPARED;
} else {
@@ -628,11 +883,11 @@ trx_resurrect_update(
trx_undo_t* undo, /*!< in/out: update UNDO record */
trx_rseg_t* rseg) /*!< in/out: rollback segment */
{
- trx->rseg = rseg;
- trx->xid = undo->xid;
+ trx->rsegs.m_redo.rseg = rseg;
+ *trx->xid = undo->xid;
trx->id = undo->trx_id;
- trx->update_undo = undo;
- trx->is_recovered = TRUE;
+ trx->rsegs.m_redo.update_undo = undo;
+ trx->is_recovered = true;
/* This is single-threaded startup code, we do not need the
protection of trx->mutex or trx_sys->mutex here. */
@@ -663,42 +918,44 @@ trx_resurrect_update(
if (undo->dict_operation) {
trx_set_dict_operation(trx, TRX_DICT_OP_TABLE);
- trx->table_id = undo->table_id;
+ if (!trx->table_id) {
+ trx->table_id = undo->table_id;
+ }
}
if (!undo->empty && undo->top_undo_no >= trx->undo_no) {
trx->undo_no = undo->top_undo_no + 1;
+ trx->undo_rseg_space = undo->rseg->space;
}
}
-/****************************************************************//**
-Creates trx objects for transactions and initializes the trx list of
-trx_sys at database start. Rollback segment and undo log lists must
-already exist when this function is called, because the lists of
-transactions to be rolled back or cleaned up are built based on the
-undo log lists. */
-UNIV_INTERN
+/** Initialize (resurrect) transactions at startup. */
void
-trx_lists_init_at_db_start(void)
-/*============================*/
+trx_lists_init_at_db_start()
{
- ulint i;
-
ut_a(srv_is_being_started);
+ ut_ad(!srv_was_started);
+ ut_ad(!purge_sys);
+
+ purge_sys = UT_NEW_NOKEY(purge_sys_t());
+
+ if (srv_force_recovery >= SRV_FORCE_NO_UNDO_LOG_SCAN) {
+ return;
+ }
- UT_LIST_INIT(trx_sys->ro_trx_list);
- UT_LIST_INIT(trx_sys->rw_trx_list);
+ trx_rseg_array_init();
/* Look from the rollback segments if there exist undo logs for
- transactions */
+ transactions. */
- for (i = 0; i < TRX_SYS_N_RSEGS; ++i) {
+ for (ulint i = 0; i < TRX_SYS_N_RSEGS; ++i) {
trx_undo_t* undo;
- trx_rseg_t* rseg;
-
- rseg = trx_sys->rseg_array[i];
+ trx_rseg_t* rseg = trx_sys->rseg_array[i];
+ /* Some rollback segment may be unavailable,
+ especially if the server was previously run with a
+ non-default value of innodb_undo_logs. */
if (rseg == NULL) {
continue;
}
@@ -707,113 +964,209 @@ trx_lists_init_at_db_start(void)
for (undo = UT_LIST_GET_FIRST(rseg->insert_undo_list);
undo != NULL;
undo = UT_LIST_GET_NEXT(undo_list, undo)) {
+
+ /* trx_purge() will not run before we return,
+ so we can safely increment this without
+ holding rseg->mutex. */
+ ++rseg->trx_ref_count;
+
trx_t* trx;
trx = trx_resurrect_insert(undo, rseg);
- trx_list_rw_insert_ordered(trx);
+ trx_sys_rw_trx_add(trx);
- trx_resurrect_table_locks(trx, undo);
+ trx_resurrect_table_locks(
+ trx, &trx->rsegs.m_redo, undo);
}
/* Ressurrect transactions that were doing updates. */
for (undo = UT_LIST_GET_FIRST(rseg->update_undo_list);
undo != NULL;
undo = UT_LIST_GET_NEXT(undo_list, undo)) {
- trx_t* trx;
- ibool trx_created;
- /* Check the trx_sys->rw_trx_list first. */
- mutex_enter(&trx_sys->mutex);
- trx = trx_get_rw_trx_by_id(undo->trx_id);
- mutex_exit(&trx_sys->mutex);
+ /* Check the trx_sys->rw_trx_set first. */
+ trx_sys_mutex_enter();
+
+ trx_t* trx = trx_get_rw_trx_by_id(undo->trx_id);
+
+ trx_sys_mutex_exit();
if (trx == NULL) {
trx = trx_allocate_for_background();
- trx_created = TRUE;
- } else {
- trx_created = FALSE;
+ ++rseg->trx_ref_count;
+
+ ut_d(trx->start_file = __FILE__);
+ ut_d(trx->start_line = __LINE__);
}
trx_resurrect_update(trx, undo, rseg);
- if (trx_created) {
- trx_list_rw_insert_ordered(trx);
- }
+ trx_sys_rw_trx_add(trx);
- trx_resurrect_table_locks(trx, undo);
+ trx_resurrect_table_locks(
+ trx, &trx->rsegs.m_redo, undo);
}
}
+
+ TrxIdSet::iterator end = trx_sys->rw_trx_set.end();
+
+ for (TrxIdSet::iterator it = trx_sys->rw_trx_set.begin();
+ it != end;
+ ++it) {
+
+ ut_ad(it->m_trx->in_rw_trx_list);
+#ifdef UNIV_DEBUG
+ if (it->m_trx->id > trx_sys->rw_max_trx_id) {
+ trx_sys->rw_max_trx_id = it->m_trx->id;
+ }
+#endif /* UNIV_DEBUG */
+
+ if (it->m_trx->state == TRX_STATE_ACTIVE
+ || it->m_trx->state == TRX_STATE_PREPARED) {
+
+ trx_sys->rw_trx_ids.push_back(it->m_id);
+ }
+
+ UT_LIST_ADD_FIRST(trx_sys->rw_trx_list, it->m_trx);
+ }
}
-/******************************************************************//**
-Assigns a rollback segment to a transaction in a round-robin fashion.
-@return assigned rollback segment instance */
-static
-trx_rseg_t*
-trx_assign_rseg_low(
-/*================*/
- ulong max_undo_logs, /*!< in: maximum number of UNDO logs to use */
- ulint n_tablespaces) /*!< in: number of rollback tablespaces */
+/** Assign a persistent rollback segment in a round-robin fashion,
+evenly distributed between 0 and innodb_undo_logs-1
+@return persistent rollback segment
+@retval NULL if innodb_read_only */
+static trx_rseg_t* trx_assign_rseg_low()
{
- ulint i;
- trx_rseg_t* rseg;
- static ulint latest_rseg = 0;
-
if (srv_read_only_mode) {
- ut_a(max_undo_logs == ULONG_UNDEFINED);
+ ut_ad(srv_undo_logs == ULONG_UNDEFINED);
return(NULL);
}
- /* This breaks true round robin but that should be OK. */
+ /* The first slot is always assigned to the system tablespace. */
+ ut_ad(trx_sys->rseg_array[0]->space == TRX_SYS_SPACE);
- ut_a(max_undo_logs > 0 && max_undo_logs <= TRX_SYS_N_RSEGS);
+ /* Choose a rollback segment evenly distributed between 0 and
+ innodb_undo_logs-1 in a round-robin fashion, skipping those
+ undo tablespaces that are scheduled for truncation.
- i = latest_rseg++;
- i %= max_undo_logs;
-
- /* Note: The assumption here is that there can't be any gaps in
- the array. Once we implement more flexible rollback segment
- management this may not hold. The assertion checks for that case. */
+ Because rseg_slot is not protected by atomics or any mutex, race
+ conditions are possible, meaning that multiple transactions
+ that start modifications concurrently will write their undo
+ log to the same rollback segment. */
+ static ulong rseg_slot;
+ ulint slot = rseg_slot++ % srv_undo_logs;
+ trx_rseg_t* rseg;
- if (trx_sys->rseg_array[0] == NULL) {
- return(NULL);
- }
+#ifdef UNIV_DEBUG
+ ulint start_scan_slot = slot;
+ bool look_for_rollover = false;
+#endif /* UNIV_DEBUG */
- /* Skip the system tablespace if we have more than one tablespace
- defined for rollback segments. We want all UNDO records to be in
- the non-system tablespaces. */
+ bool allocated = false;
do {
- rseg = trx_sys->rseg_array[i];
- ut_a(rseg == NULL || i == rseg->id);
+ for (;;) {
+ rseg = trx_sys->rseg_array[slot];
+
+#ifdef UNIV_DEBUG
+ /* Ensure that we are not revisiting the same
+ slot that we have already inspected. */
+ if (look_for_rollover) {
+ ut_ad(start_scan_slot != slot);
+ }
+ look_for_rollover = true;
+#endif /* UNIV_DEBUG */
+
+ slot = (slot + 1) % srv_undo_logs;
- i = (rseg == NULL) ? 0 : i + 1;
+ if (rseg == NULL) {
+ continue;
+ }
+
+ ut_ad(rseg->is_persistent());
+
+ if (rseg->space != TRX_SYS_SPACE) {
+ if (rseg->skip_allocation
+ || !srv_undo_tablespaces) {
+ continue;
+ }
+ } else if (trx_rseg_t* next
+ = trx_sys->rseg_array[slot]) {
+ if (next->space != TRX_SYS_SPACE
+ && srv_undo_tablespaces > 0) {
+ /** If dedicated
+ innodb_undo_tablespaces have
+ been configured, try to use them
+ instead of the system tablespace. */
+ continue;
+ }
+ }
- } while (rseg == NULL
- || (rseg->space == 0
- && n_tablespaces > 0
- && trx_sys->rseg_array[1] != NULL));
+ break;
+ }
+ /* By now we have only selected the rseg but not marked it
+ allocated. By marking it allocated we are ensuring that it will
+ never be selected for UNDO truncate purge. */
+ mutex_enter(&rseg->mutex);
+ if (!rseg->skip_allocation) {
+ rseg->trx_ref_count++;
+ allocated = true;
+ }
+ mutex_exit(&rseg->mutex);
+ } while (!allocated);
+
+ ut_ad(rseg->trx_ref_count > 0);
+ ut_ad(rseg->is_persistent());
return(rseg);
}
-/****************************************************************//**
-Assign a read-only transaction a rollback-segment, if it is attempting
-to write to a TEMPORARY table. */
-UNIV_INTERN
-void
-trx_assign_rseg(
-/*============*/
- trx_t* trx) /*!< A read-only transaction that
- needs to be assigned a RBS. */
+/** Set the innodb_log_optimize_ddl page flush observer
+@param[in] space_id tablespace id
+@param[in,out] stage performance_schema accounting */
+void trx_t::set_flush_observer(ulint space_id, ut_stage_alter_t* stage)
+{
+ flush_observer = UT_NEW_NOKEY(FlushObserver(space_id, this, stage));
+}
+
+/** Remove the flush observer */
+void trx_t::remove_flush_observer()
{
- ut_a(trx->rseg == 0);
- ut_a(trx->read_only);
- ut_a(!srv_read_only_mode);
- ut_a(!trx_is_autocommit_non_locking(trx));
+ UT_DELETE(flush_observer);
+ flush_observer = NULL;
+}
+
+/** Assign a rollback segment for modifying temporary tables.
+@return the assigned rollback segment */
+trx_rseg_t*
+trx_t::assign_temp_rseg()
+{
+ ut_ad(!rsegs.m_noredo.rseg);
+ ut_ad(!trx_is_autocommit_non_locking(this));
+ compile_time_assert(ut_is_2pow(TRX_SYS_N_RSEGS));
+
+ /* Choose a temporary rollback segment between 0 and 127
+ in a round-robin fashion. Because rseg_slot is not protected by
+ atomics or any mutex, race conditions are possible, meaning that
+ multiple transactions that start modifications concurrently
+ will write their undo log to the same rollback segment. */
+ static ulong rseg_slot;
+ trx_rseg_t* rseg = trx_sys->temp_rsegs[
+ rseg_slot++ & (TRX_SYS_N_RSEGS - 1)];
+ ut_ad(!rseg->is_persistent());
+ rsegs.m_noredo.rseg = rseg;
+
+ if (id == 0) {
+ mutex_enter(&trx_sys->mutex);
+ id = trx_sys_get_new_trx_id();
+ trx_sys->rw_trx_ids.push_back(id);
+ trx_sys->rw_trx_set.insert(TrxTrack(id, this));
+ mutex_exit(&trx_sys->mutex);
+ }
- trx->rseg = trx_assign_rseg_low(srv_undo_logs, srv_undo_tablespaces);
+ ut_ad(!rseg->is_persistent());
+ return(rseg);
}
/****************************************************************//**
@@ -822,38 +1175,35 @@ static
void
trx_start_low(
/*==========*/
- trx_t* trx) /*!< in: transaction */
+ trx_t* trx, /*!< in: transaction */
+ bool read_write) /*!< in: true if read-write transaction */
{
- ut_ad(trx->rseg == NULL);
-
- ut_ad(trx->start_file != 0);
- ut_ad(trx->start_line != 0);
+ ut_ad(!trx->in_rollback);
ut_ad(!trx->is_recovered);
+ ut_ad(trx->start_line != 0);
+ ut_ad(trx->start_file != 0);
+ ut_ad(trx->roll_limit == 0);
+ ut_ad(trx->error_state == DB_SUCCESS);
+ ut_ad(trx->rsegs.m_redo.rseg == NULL);
+ ut_ad(trx->rsegs.m_noredo.rseg == NULL);
ut_ad(trx_state_eq(trx, TRX_STATE_NOT_STARTED));
ut_ad(UT_LIST_GET_LEN(trx->lock.trx_locks) == 0);
/* Check whether it is an AUTOCOMMIT SELECT */
- trx->auto_commit = (trx->api_trx && trx->api_auto_commit)
- || thd_trx_is_auto_commit(trx->mysql_thd);
+ trx->auto_commit = thd_trx_is_auto_commit(trx->mysql_thd);
- trx->read_only =
- (trx->api_trx && !trx->read_write)
- || (!trx->ddl && thd_trx_is_read_only(trx->mysql_thd))
- || srv_read_only_mode;
+ trx->read_only = srv_read_only_mode
+ || (!trx->ddl && !trx->internal
+ && thd_trx_is_read_only(trx->mysql_thd));
if (!trx->auto_commit) {
++trx->will_lock;
} else if (trx->will_lock == 0) {
- trx->read_only = TRUE;
- }
-
- if (!trx->read_only) {
- trx->rseg = trx_assign_rseg_low(
- srv_undo_logs, srv_undo_tablespaces);
+ trx->read_only = true;
}
#ifdef WITH_WSREP
- trx->xid.null();
+ trx->xid->null();
#endif /* WITH_WSREP */
/* The initial value for trx->no: TRX_ID_MAX is used in
@@ -862,173 +1212,222 @@ trx_start_low(
trx->no = TRX_ID_MAX;
ut_a(ib_vector_is_empty(trx->autoinc_locks));
- ut_a(ib_vector_is_empty(trx->lock.table_locks));
-
- mutex_enter(&trx_sys->mutex);
+ ut_a(trx->lock.table_locks.empty());
/* If this transaction came from trx_allocate_for_mysql(),
trx->in_mysql_trx_list would hold. In that case, the trx->state
change must be protected by the trx_sys->mutex, so that
lock_print_info_all_transactions() will have a consistent view. */
- trx->state = TRX_STATE_ACTIVE;
+ ut_ad(!trx->in_rw_trx_list);
- trx->id = trx_sys_get_new_trx_id();
+ /* We tend to over assert and that complicates the code somewhat.
+ e.g., the transaction state can be set earlier but we are forced to
+ set it under the protection of the trx_sys_t::mutex because some
+ trx list assertions are triggered unnecessarily. */
- ut_ad(!trx->in_rw_trx_list);
- ut_ad(!trx->in_ro_trx_list);
+ /* By default all transactions are in the read-only list unless they
+ are non-locking auto-commit read only transactions or background
+ (internal) transactions. Note: Transactions marked explicitly as
+ read only can write to temporary tables, we put those on the RO
+ list too. */
- if (trx->read_only) {
+ if (!trx->read_only
+ && (trx->mysql_thd == 0 || read_write || trx->ddl)) {
- /* Note: The trx_sys_t::ro_trx_list doesn't really need to
- be ordered, we should exploit this using a list type that
- doesn't need a list wide lock to increase concurrency. */
+ trx->rsegs.m_redo.rseg = trx_assign_rseg_low();
- if (!trx_is_autocommit_non_locking(trx)) {
- UT_LIST_ADD_FIRST(trx_list, trx_sys->ro_trx_list, trx);
- ut_d(trx->in_ro_trx_list = TRUE);
- }
- } else {
+ /* Temporary rseg is assigned only if the transaction
+ updates a temporary table */
- ut_ad(trx->rseg != NULL
+ trx_sys_mutex_enter();
+
+ trx->id = trx_sys_get_new_trx_id();
+
+ trx_sys->rw_trx_ids.push_back(trx->id);
+
+ trx_sys_rw_trx_add(trx);
+
+ ut_ad(trx->rsegs.m_redo.rseg != 0
+ || srv_read_only_mode
|| srv_force_recovery >= SRV_FORCE_NO_TRX_UNDO);
- ut_ad(!trx_is_autocommit_non_locking(trx));
- UT_LIST_ADD_FIRST(trx_list, trx_sys->rw_trx_list, trx);
- ut_d(trx->in_rw_trx_list = TRUE);
+ UT_LIST_ADD_FIRST(trx_sys->rw_trx_list, trx);
+
+ ut_d(trx->in_rw_trx_list = true);
#ifdef UNIV_DEBUG
if (trx->id > trx_sys->rw_max_trx_id) {
trx_sys->rw_max_trx_id = trx->id;
}
#endif /* UNIV_DEBUG */
- }
- ut_ad(trx_sys_validate_trx_list());
+ trx->state = TRX_STATE_ACTIVE;
- mutex_exit(&trx_sys->mutex);
+ ut_ad(trx_sys_validate_trx_list());
- trx->start_time = time(NULL);
+ trx_sys_mutex_exit();
+ } else {
+ if (!trx_is_autocommit_non_locking(trx)) {
+
+ /* If this is a read-only transaction that is writing
+ to a temporary table then it needs a transaction id
+ to write to the temporary table. */
+
+ if (read_write) {
+
+ trx_sys_mutex_enter();
+
+ ut_ad(!srv_read_only_mode);
+
+ trx->id = trx_sys_get_new_trx_id();
+ trx_sys->rw_trx_ids.push_back(trx->id);
+
+ trx_sys->rw_trx_set.insert(
+ TrxTrack(trx->id, trx));
+
+ trx_sys_mutex_exit();
+ }
+
+ trx->state = TRX_STATE_ACTIVE;
+
+ } else {
+ ut_ad(!read_write);
+ trx->state = TRX_STATE_ACTIVE;
+ }
+ }
+
+ trx->start_time = time(NULL);
trx->start_time_micro = trx->mysql_thd
? thd_query_start_micro(trx->mysql_thd)
: microsecond_interval_timer();
+ ut_a(trx->error_state == DB_SUCCESS);
+
MONITOR_INC(MONITOR_TRX_ACTIVE);
}
-/****************************************************************//**
-Set the transaction serialisation number. */
+/** Set the serialisation number for a persistent committed transaction.
+@param[in,out] trx committed transaction with persistent changes
+@param[in,out] rseg rollback segment for update_undo, or NULL */
static
void
-trx_serialisation_number_get(
-/*=========================*/
- trx_t* trx) /*!< in: transaction */
+trx_serialise(trx_t* trx, trx_rseg_t* rseg)
{
- trx_rseg_t* rseg;
-
- rseg = trx->rseg;
-
- ut_ad(mutex_own(&rseg->mutex));
+ ut_ad(!rseg || rseg == trx->rsegs.m_redo.rseg);
- mutex_enter(&trx_sys->mutex);
+ trx_sys_mutex_enter();
trx->no = trx_sys_get_new_trx_id();
+ /* Track the minimum serialisation number. */
+ UT_LIST_ADD_LAST(trx_sys->serialisation_list, trx);
+
/* If the rollack segment is not empty then the
new trx_t::no can't be less than any trx_t::no
already in the rollback segment. User threads only
produce events when a rollback segment is empty. */
+ if (rseg && rseg->last_page_no == FIL_NULL) {
+ TrxUndoRsegs elem(trx->no);
+ elem.push_back(rseg);
- if (rseg->last_page_no == FIL_NULL) {
- void* ptr;
- rseg_queue_t rseg_queue;
-
- rseg_queue.rseg = rseg;
- rseg_queue.trx_no = trx->no;
-
- mutex_enter(&purge_sys->bh_mutex);
+ mutex_enter(&purge_sys->pq_mutex);
/* This is to reduce the pressure on the trx_sys_t::mutex
though in reality it should make very little (read no)
difference because this code path is only taken when the
rbs is empty. */
- mutex_exit(&trx_sys->mutex);
+ trx_sys_mutex_exit();
- ptr = ib_bh_push(purge_sys->ib_bh, &rseg_queue);
- ut_a(ptr);
+ purge_sys->purge_queue.push(elem);
- mutex_exit(&purge_sys->bh_mutex);
+ mutex_exit(&purge_sys->pq_mutex);
} else {
- mutex_exit(&trx_sys->mutex);
+ trx_sys_mutex_exit();
}
}
/****************************************************************//**
Assign the transaction its history serialisation number and write the
-update UNDO log record to the assigned rollback segment. */
-static MY_ATTRIBUTE((nonnull))
-void
+update UNDO log record to the assigned rollback segment.
+@return true if a serialisation log was written */
+static
+bool
trx_write_serialisation_history(
/*============================*/
trx_t* trx, /*!< in/out: transaction */
mtr_t* mtr) /*!< in/out: mini-transaction */
{
-#ifdef WITH_WSREP
- trx_sysf_t* sys_header;
-#endif /* WITH_WSREP */
- trx_rseg_t* rseg;
+ /* Change the undo log segment states from TRX_UNDO_ACTIVE to some
+ other state: these modifications to the file data structure define
+ the transaction as committed in the file based domain, at the
+ serialization point of the log sequence number lsn obtained below. */
- rseg = trx->rseg;
+ /* We have to hold the rseg mutex because update log headers have
+ to be put to the history list in the (serialisation) order of the
+ UNDO trx number. This is required for the purge in-memory data
+ structures too. */
- /* Change the undo log segment states from TRX_UNDO_ACTIVE
- to some other state: these modifications to the file data
- structure define the transaction as committed in the file
- based domain, at the serialization point of the log sequence
- number lsn obtained below. */
+ if (trx_undo_t* undo = trx->rsegs.m_noredo.undo) {
+ /* Undo log for temporary tables is discarded at transaction
+ commit. There is no purge for temporary tables, and also no
+ MVCC, because they are private to a session. */
- if (trx->update_undo != NULL) {
- page_t* undo_hdr_page;
- trx_undo_t* undo = trx->update_undo;
+ mtr_t temp_mtr;
+ temp_mtr.start();
+ temp_mtr.set_log_mode(MTR_LOG_NO_REDO);
- /* We have to hold the rseg mutex because update
- log headers have to be put to the history list in the
- (serialisation) order of the UNDO trx number. This is
- required for the purge in-memory data structures too. */
+ mutex_enter(&trx->rsegs.m_noredo.rseg->mutex);
+ trx_undo_set_state_at_finish(undo, &temp_mtr);
+ mutex_exit(&trx->rsegs.m_noredo.rseg->mutex);
+ temp_mtr.commit();
+ }
- mutex_enter(&rseg->mutex);
+ if (!trx->rsegs.m_redo.rseg) {
+ ut_ad(!trx->rsegs.m_redo.insert_undo);
+ ut_ad(!trx->rsegs.m_redo.update_undo);
+ return false;
+ }
- /* Assign the transaction serialisation number and also
- update the purge min binary heap if this is the first
- UNDO log being written to the assigned rollback segment. */
+ trx_undo_t* insert = trx->rsegs.m_redo.insert_undo;
+ trx_undo_t* update = trx->rsegs.m_redo.update_undo;
- trx_serialisation_number_get(trx);
+ if (!insert && !update) {
+ return false;
+ }
- /* It is not necessary to obtain trx->undo_mutex here
- because only a single OS thread is allowed to do the
- transaction commit for this transaction. */
+ ut_ad(!trx->read_only);
+ trx_rseg_t* update_rseg = update ? trx->rsegs.m_redo.rseg : NULL;
+ mutex_enter(&trx->rsegs.m_redo.rseg->mutex);
- undo_hdr_page = trx_undo_set_state_at_finish(undo, mtr);
+ /* Assign the transaction serialisation number and add any
+ update_undo log to the purge queue. */
+ trx_serialise(trx, update_rseg);
- trx_undo_update_cleanup(trx, undo_hdr_page, mtr);
- } else {
- mutex_enter(&rseg->mutex);
+ /* It is not necessary to acquire trx->undo_mutex here because
+ only a single OS thread is allowed to commit this transaction. */
+ if (insert) {
+ trx_undo_set_state_at_finish(insert, mtr);
}
+ if (update) {
+ /* The undo logs and possible delete-marked records
+ for updates and deletes will be purged later. */
+ page_t* undo_hdr_page = trx_undo_set_state_at_finish(
+ update, mtr);
- if (trx->insert_undo != NULL) {
- trx_undo_set_state_at_finish(trx->insert_undo, mtr);
+ trx_undo_update_cleanup(trx, undo_hdr_page, mtr);
}
- mutex_exit(&rseg->mutex);
+ mutex_exit(&trx->rsegs.m_redo.rseg->mutex);
MONITOR_INC(MONITOR_TRX_COMMIT_UNDO);
+ trx_sysf_t* sys_header = trx_sysf_get(mtr);
#ifdef WITH_WSREP
- sys_header = trx_sysf_get(mtr);
/* Update latest MySQL wsrep XID in trx sys header. */
- if (wsrep_is_wsrep_xid(&trx->xid))
- {
- trx_sys_update_wsrep_checkpoint(&trx->xid, sys_header, mtr);
+ if (wsrep_is_wsrep_xid(trx->xid)) {
+ trx_sys_update_wsrep_checkpoint(trx->xid, sys_header, mtr);
}
#endif /* WITH_WSREP */
@@ -1036,37 +1435,35 @@ trx_write_serialisation_history(
in trx sys header if MySQL binlogging is on or the database
server is a MySQL replication slave */
- if (trx->mysql_log_file_name
+ if (trx->mysql_log_file_name != NULL
&& trx->mysql_log_file_name[0] != '\0') {
trx_sys_update_mysql_binlog_offset(
trx->mysql_log_file_name,
trx->mysql_log_offset,
- TRX_SYS_MYSQL_LOG_INFO,
-#ifdef WITH_WSREP
- sys_header,
-#endif /* WITH_WSREP */
+ sys_header,
mtr);
trx->mysql_log_file_name = NULL;
}
+
+ return(true);
}
/********************************************************************
Finalize a transaction containing updates for a FTS table. */
-static MY_ATTRIBUTE((nonnull))
+static
void
trx_finalize_for_fts_table(
/*=======================*/
- fts_trx_table_t* ftt) /* in: FTS trx table */
+ fts_trx_table_t* ftt) /* in: FTS trx table */
{
- fts_t* fts = ftt->table->fts;
- fts_doc_ids_t* doc_ids = ftt->added_doc_ids;
- mem_heap_t* heap;
+ fts_t* fts = ftt->table->fts;
+ fts_doc_ids_t* doc_ids = ftt->added_doc_ids;
ut_a(fts->add_wq);
- heap = static_cast<mem_heap_t*>(doc_ids->self_heap->arg);
+ mem_heap_t* heap = static_cast<mem_heap_t*>(doc_ids->self_heap->arg);
ib_wqueue_add(fts->add_wq, doc_ids, heap);
@@ -1076,7 +1473,7 @@ trx_finalize_for_fts_table(
/******************************************************************//**
Finalize a transaction containing updates to FTS tables. */
-static MY_ATTRIBUTE((nonnull))
+static
void
trx_finalize_for_fts(
/*=================*/
@@ -1121,30 +1518,30 @@ trx_flush_log_if_needed_low(
lsn_t lsn) /*!< in: lsn up to which logs are to be
flushed. */
{
+ bool flush = srv_file_flush_method != SRV_NOSYNC;
+
switch (srv_flush_log_at_trx_commit) {
- case 0:
- /* Do nothing */
- break;
- case 1:
- case 3:
- /* Write the log and optionally flush it to disk */
- log_write_up_to(lsn, LOG_WAIT_ONE_GROUP,
- srv_unix_file_flush_method != SRV_UNIX_NOSYNC);
- break;
+ case 3:
case 2:
/* Write the log but do not flush it to disk */
- log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, FALSE);
-
- break;
- default:
- ut_error;
+ flush = false;
+ /* fall through */
+ case 1:
+ /* Write the log and optionally flush it to disk */
+ log_write_up_to(lsn, flush);
+ return;
+ case 0:
+ /* Do nothing */
+ return;
}
+
+ ut_error;
}
/**********************************************************************//**
If required, flushes the log to disk based on the value of
innodb_flush_log_at_trx_commit. */
-static MY_ATTRIBUTE((nonnull))
+static
void
trx_flush_log_if_needed(
/*====================*/
@@ -1157,30 +1554,116 @@ trx_flush_log_if_needed(
trx->op_info = "";
}
+/**********************************************************************//**
+For each table that has been modified by the given transaction: update
+its dict_table_t::update_time with the current timestamp. Clear the list
+of the modified tables at the end. */
+static
+void
+trx_update_mod_tables_timestamp(
+/*============================*/
+ trx_t* trx) /*!< in: transaction */
+{
+
+ ut_ad(trx->id != 0);
+
+ /* consider using trx->start_time if calling time() is too
+ expensive here */
+ const time_t now = time(NULL);
+
+ trx_mod_tables_t::const_iterator end = trx->mod_tables.end();
+
+ for (trx_mod_tables_t::const_iterator it = trx->mod_tables.begin();
+ it != end;
+ ++it) {
+
+ /* This could be executed by multiple threads concurrently
+ on the same table object. This is fine because time_t is
+ word size or less. And _purely_ _theoretically_, even if
+ time_t write is not atomic, likely the value of 'now' is
+ the same in all threads and even if it is not, getting a
+ "garbage" in table->update_time is justified because
+ protecting it with a latch here would be too performance
+ intrusive. */
+ (*it)->update_time = now;
+ }
+
+ trx->mod_tables.clear();
+}
+
+/**
+Erase the transaction from running transaction lists and serialization
+list. Active RW transaction list of a MVCC snapshot(ReadView::prepare)
+won't include this transaction after this call. All implicit locks are
+also released by this call as trx is removed from rw_trx_list.
+@param[in] trx Transaction to erase, must have an ID > 0
+@param[in] serialised true if serialisation log was written */
+static
+void
+trx_erase_lists(
+ trx_t* trx,
+ bool serialised)
+{
+ ut_ad(trx->id > 0);
+ trx_sys_mutex_enter();
+
+ if (serialised) {
+ UT_LIST_REMOVE(trx_sys->serialisation_list, trx);
+ }
+
+ trx_ids_t::iterator it = std::lower_bound(
+ trx_sys->rw_trx_ids.begin(),
+ trx_sys->rw_trx_ids.end(),
+ trx->id);
+ ut_ad(*it == trx->id);
+ trx_sys->rw_trx_ids.erase(it);
+
+ if (trx->read_only || trx->rsegs.m_redo.rseg == NULL) {
+
+ ut_ad(!trx->in_rw_trx_list);
+ } else {
+
+ UT_LIST_REMOVE(trx_sys->rw_trx_list, trx);
+ ut_d(trx->in_rw_trx_list = false);
+ ut_ad(trx_sys_validate_trx_list());
+
+ if (trx->read_view != NULL) {
+ trx_sys->mvcc->view_close(trx->read_view, true);
+ }
+ }
+
+ trx_sys->rw_trx_set.erase(TrxTrack(trx->id));
+
+ trx_sys_mutex_exit();
+}
+
/****************************************************************//**
Commits a transaction in memory. */
-static MY_ATTRIBUTE((nonnull))
+static
void
trx_commit_in_memory(
/*=================*/
- trx_t* trx, /*!< in/out: transaction */
- lsn_t lsn) /*!< in: log sequence number of the mini-transaction
- commit of trx_write_serialisation_history(), or 0
- if the transaction did not modify anything */
+ trx_t* trx, /*!< in/out: transaction */
+ const mtr_t* mtr, /*!< in: mini-transaction of
+ trx_write_serialisation_history(), or NULL if
+ the transaction did not modify anything */
+ bool serialised)
+ /*!< in: true if serialisation log was
+ written */
{
- trx->must_flush_log_later = FALSE;
+ trx->must_flush_log_later = false;
if (trx_is_autocommit_non_locking(trx)) {
+ ut_ad(trx->id == 0);
ut_ad(trx->read_only);
ut_a(!trx->is_recovered);
- ut_ad(trx->rseg == NULL);
- ut_ad(!trx->in_ro_trx_list);
+ ut_ad(trx->rsegs.m_redo.rseg == NULL);
ut_ad(!trx->in_rw_trx_list);
/* Note: We are asserting without holding the lock mutex. But
that is OK because this transaction is not waiting and cannot
- be rolled back and no new locks can (or should not) be added
- becuase it is flagged as a non-locking read-only transaction. */
+ be rolled back and no new locks can (or should) be added
+ because it is flagged as a non-locking read-only transaction. */
ut_a(UT_LIST_GET_LEN(trx->lock.trx_locks) == 0);
@@ -1193,65 +1676,82 @@ trx_commit_in_memory(
ut_ad(trx_state_eq(trx, TRX_STATE_ACTIVE));
- trx->state = TRX_STATE_NOT_STARTED;
-
- read_view_remove(trx->global_read_view, false);
+ if (trx->read_view != NULL) {
+ trx_sys->mvcc->view_close(trx->read_view, false);
+ }
MONITOR_INC(MONITOR_TRX_NL_RO_COMMIT);
- } else {
- lock_trx_release_locks(trx);
-
- /* Remove the transaction from the list of active
- transactions now that it no longer holds any user locks. */
- ut_ad(trx_state_eq(trx, TRX_STATE_COMMITTED_IN_MEMORY));
+ DBUG_LOG("trx", "Autocommit in memory: " << trx);
+ trx->state = TRX_STATE_NOT_STARTED;
+ } else {
+#ifdef UNIV_DEBUG
+ if (!UT_LIST_GET_LEN(trx->lock.trx_locks)) {
+ for (lock_list::iterator it
+ = trx->lock.table_locks.begin();
+ it != trx->lock.table_locks.end();
+ it++) {
+ ut_ad(!*it);
+ }
+ }
+#endif /* UNIV_DEBUG */
+ trx_mutex_enter(trx);
+ trx->commit_state();
+ trx_mutex_exit(trx);
+
+ if (trx->id) {
+ trx_erase_lists(trx, serialised);
+
+ /* Wait for any implicit-to-explicit lock
+ conversions to cease, so that there will be no
+ race condition in lock_release(). */
+ while (UNIV_UNLIKELY(trx->is_referenced())) {
+ ut_delay(srv_spin_wait_delay);
+ }
- mutex_enter(&trx_sys->mutex);
+ trx->release_locks();
+ trx->id = 0;
+ } else {
+ ut_ad(trx->read_only || !trx->rsegs.m_redo.rseg);
+ ut_ad(!trx->in_rw_trx_list);
+ trx->release_locks();
+ }
- assert_trx_in_list(trx);
+ DEBUG_SYNC_C("after_trx_committed_in_memory");
- if (trx->read_only) {
- UT_LIST_REMOVE(trx_list, trx_sys->ro_trx_list, trx);
- ut_d(trx->in_ro_trx_list = FALSE);
+ if (trx->read_only || !trx->rsegs.m_redo.rseg) {
MONITOR_INC(MONITOR_TRX_RO_COMMIT);
+ if (trx->read_view) {
+ trx_sys->mvcc->view_close(
+ trx->read_view, false);
+ }
} else {
- UT_LIST_REMOVE(trx_list, trx_sys->rw_trx_list, trx);
- ut_d(trx->in_rw_trx_list = FALSE);
MONITOR_INC(MONITOR_TRX_RW_COMMIT);
}
-
- /* If this transaction came from trx_allocate_for_mysql(),
- trx->in_mysql_trx_list would hold. In that case, the
- trx->state change must be protected by trx_sys->mutex, so that
- lock_print_info_all_transactions() will have a consistent
- view. */
-
- trx->state = TRX_STATE_NOT_STARTED;
-
- /* We already own the trx_sys_t::mutex, by doing it here we
- avoid a potential context switch later. */
- read_view_remove(trx->global_read_view, true);
-
- ut_ad(trx_sys_validate_trx_list());
-
- mutex_exit(&trx_sys->mutex);
}
- if (trx->global_read_view != NULL) {
+ ut_ad(!trx->rsegs.m_redo.update_undo);
- mem_heap_empty(trx->global_read_view_heap);
+ if (trx_rseg_t* rseg = trx->rsegs.m_redo.rseg) {
+ mutex_enter(&rseg->mutex);
+ ut_ad(rseg->trx_ref_count > 0);
+ --rseg->trx_ref_count;
+ mutex_exit(&rseg->mutex);
- trx->global_read_view = NULL;
+ if (trx_undo_t*& insert = trx->rsegs.m_redo.insert_undo) {
+ ut_ad(insert->rseg == rseg);
+ trx_undo_commit_cleanup(insert, false);
+ insert = NULL;
+ }
}
- trx->read_view = NULL;
-
- if (lsn) {
- DEBUG_SYNC_C("after_trx_committed_in_memory");
-
- if (trx->insert_undo != NULL) {
+ ut_ad(!trx->rsegs.m_redo.insert_undo);
- trx_undo_insert_cleanup(trx);
+ if (mtr != NULL) {
+ if (trx_undo_t*& undo = trx->rsegs.m_noredo.undo) {
+ ut_ad(undo->rseg == trx->rsegs.m_noredo.rseg);
+ trx_undo_commit_cleanup(undo, true);
+ undo = NULL;
}
/* NOTE that we could possibly make a group commit more
@@ -1282,9 +1782,13 @@ trx_commit_in_memory(
mutex would serialize all commits and prevent a group of
transactions from gathering. */
- if (trx->flush_log_later) {
+ lsn_t lsn = mtr->commit_lsn();
+
+ if (lsn == 0) {
+ /* Nothing to be done. */
+ } else if (trx->flush_log_later) {
/* Do nothing yet */
- trx->must_flush_log_later = TRUE;
+ trx->must_flush_log_later = true;
} else if (srv_flush_log_at_trx_commit == 0) {
/* Do nothing */
} else {
@@ -1300,50 +1804,39 @@ trx_commit_in_memory(
srv_active_wake_master_thread();
}
- /* undo_no is non-zero if we're doing the final commit. */
- bool not_rollback = trx->undo_no != 0;
+ ut_ad(!trx->rsegs.m_noredo.undo);
+
/* Free all savepoints, starting from the first. */
trx_named_savept_t* savep = UT_LIST_GET_FIRST(trx->trx_savepoints);
- trx_roll_savepoints_free(trx, savep);
-
- trx->rseg = NULL;
- trx->undo_no = 0;
- trx->last_sql_stat_start.least_undo_no = 0;
- trx->ddl = false;
-#ifdef UNIV_DEBUG
- ut_ad(trx->start_file != 0);
- ut_ad(trx->start_line != 0);
- trx->start_file = 0;
- trx->start_line = 0;
-#endif /* UNIV_DEBUG */
-
- trx->will_lock = 0;
- trx->read_only = FALSE;
- trx->auto_commit = FALSE;
+ trx_roll_savepoints_free(trx, savep);
- if (trx->fts_trx) {
- trx_finalize_for_fts(trx, not_rollback);
+ if (trx->fts_trx != NULL) {
+ trx_finalize_for_fts(trx, trx->undo_no != 0);
}
- ut_ad(trx->lock.wait_thr == NULL);
- ut_ad(UT_LIST_GET_LEN(trx->lock.trx_locks) == 0);
- ut_ad(!trx->in_ro_trx_list);
- ut_ad(!trx->in_rw_trx_list);
-
- trx->lock.was_chosen_as_deadlock_victim = FALSE;
+ trx_mutex_enter(trx);
trx->dict_operation = TRX_DICT_OP_NONE;
+ trx->lock.was_chosen_as_deadlock_victim = false;
- trx->error_state = DB_SUCCESS;
+ DBUG_LOG("trx", "Commit in memory: " << trx);
+ trx->state = TRX_STATE_NOT_STARTED;
/* trx->in_mysql_trx_list would hold between
trx_allocate_for_mysql() and trx_free_for_mysql(). It does not
hold for recovered transactions or system transactions. */
+ assert_trx_is_free(trx);
+
+ trx_init(trx);
+
+ trx_mutex_exit(trx);
+
+ ut_a(trx->error_state == DB_SUCCESS);
+ srv_wake_purge_thread_if_not_active();
}
/****************************************************************//**
Commits a transaction and a mini-transaction. */
-UNIV_INTERN
void
trx_commit_low(
/*===========*/
@@ -1351,15 +1844,13 @@ trx_commit_low(
mtr_t* mtr) /*!< in/out: mini-transaction (will be committed),
or NULL if trx made no modifications */
{
- lsn_t lsn;
-
assert_trx_nonlocking_or_in_list(trx);
ut_ad(!trx_state_eq(trx, TRX_STATE_COMMITTED_IN_MEMORY));
- ut_ad(!mtr || mtr->state == MTR_ACTIVE);
- ut_ad(!mtr == !(trx->insert_undo || trx->update_undo));
+ ut_ad(!mtr || mtr->is_active());
+ ut_ad(!mtr == !trx->has_logged());
/* undo_no is non-zero if we're doing the final commit. */
- if (trx->fts_trx && trx->undo_no != 0) {
+ if (trx->fts_trx != NULL && trx->undo_no != 0) {
dberr_t error;
ut_a(!trx_is_autocommit_non_locking(trx));
@@ -1380,8 +1871,11 @@ trx_commit_low(
}
}
- if (mtr) {
- trx_write_serialisation_history(trx, mtr);
+ bool serialised;
+
+ if (mtr != NULL) {
+ serialised = trx_write_serialisation_history(trx, mtr);
+
/* The following call commits the mini-transaction, making the
whole transaction committed in the file-based world, at this
log sequence number. The transaction becomes 'durable' when
@@ -1401,30 +1895,53 @@ trx_commit_low(
/*--------------*/
mtr_commit(mtr);
+
+ DBUG_EXECUTE_IF("ib_crash_during_trx_commit_in_mem",
+ if (trx->has_logged()) {
+ log_write_up_to(mtr->commit_lsn(),
+ true);
+ DBUG_SUICIDE();
+ });
/*--------------*/
- lsn = mtr->end_lsn;
+
} else {
- lsn = 0;
+ serialised = false;
+ }
+#ifndef DBUG_OFF
+ /* In case of this function is called from a stack executing
+ THD::release_resources -> ...
+ innobase_connection_close() ->
+ trx_rollback_for_mysql... -> .
+ mysql's thd does not seem to have
+ thd->debug_sync_control defined any longer. However the stack
+ is possible only with a prepared trx not updating any data.
+ */
+ if (trx->mysql_thd != NULL && trx->has_logged_persistent()) {
+ DEBUG_SYNC_C("before_trx_state_committed_in_memory");
}
+#endif
- trx_commit_in_memory(trx, lsn);
+ trx_commit_in_memory(trx, mtr, serialised);
}
/****************************************************************//**
Commits a transaction. */
-UNIV_INTERN
void
trx_commit(
/*=======*/
trx_t* trx) /*!< in/out: transaction */
{
- mtr_t local_mtr;
mtr_t* mtr;
+ mtr_t local_mtr;
+
+ DBUG_EXECUTE_IF("ib_trx_commit_crash_before_trx_commit_start",
+ DBUG_SUICIDE(););
- if (trx->insert_undo || trx->update_undo) {
+ if (trx->has_logged()) {
mtr = &local_mtr;
- mtr_start(mtr);
+ mtr->start();
} else {
+
mtr = NULL;
}
@@ -1435,41 +1952,44 @@ trx_commit(
Cleans up a transaction at database startup. The cleanup is needed if
the transaction already got to the middle of a commit when the database
crashed, and we cannot roll it back. */
-UNIV_INTERN
void
trx_cleanup_at_db_startup(
/*======================*/
trx_t* trx) /*!< in: transaction */
{
ut_ad(trx->is_recovered);
+ ut_ad(!trx->rsegs.m_noredo.undo);
+ ut_ad(!trx->rsegs.m_redo.update_undo);
- if (trx->insert_undo != NULL) {
-
- trx_undo_insert_cleanup(trx);
+ if (trx_undo_t*& undo = trx->rsegs.m_redo.insert_undo) {
+ ut_ad(undo->rseg == trx->rsegs.m_redo.rseg);
+ trx_undo_commit_cleanup(undo, false);
+ undo = NULL;
}
- trx->rseg = NULL;
+ memset(&trx->rsegs, 0x0, sizeof(trx->rsegs));
trx->undo_no = 0;
+ trx->undo_rseg_space = 0;
trx->last_sql_stat_start.least_undo_no = 0;
- mutex_enter(&trx_sys->mutex);
+ trx_sys_mutex_enter();
ut_a(!trx->read_only);
- UT_LIST_REMOVE(trx_list, trx_sys->rw_trx_list, trx);
+ UT_LIST_REMOVE(trx_sys->rw_trx_list, trx);
- assert_trx_in_rw_list(trx);
ut_d(trx->in_rw_trx_list = FALSE);
- mutex_exit(&trx_sys->mutex);
+ trx_sys_mutex_exit();
/* Change the transaction state without mutex protection, now
that it no longer is in the trx_list. Recovered transactions
are never placed in the mysql_trx_list. */
ut_ad(trx->is_recovered);
- ut_ad(!trx->in_ro_trx_list);
ut_ad(!trx->in_rw_trx_list);
ut_ad(!trx->in_mysql_trx_list);
+ DBUG_LOG("trx", "Cleanup at startup: " << trx);
+ trx->id = 0;
trx->state = TRX_STATE_NOT_STARTED;
}
@@ -1477,25 +1997,21 @@ trx_cleanup_at_db_startup(
Assigns a read view for a consistent read query. All the consistent reads
within the same transaction will get the same read view, which is created
when this function is first called for a new started transaction.
-@return consistent read view */
-UNIV_INTERN
-read_view_t*
+@return consistent read view */
+ReadView*
trx_assign_read_view(
/*=================*/
- trx_t* trx) /*!< in: active transaction */
+ trx_t* trx) /*!< in/out: active transaction */
{
ut_ad(trx->state == TRX_STATE_ACTIVE);
- if (trx->read_view != NULL) {
- return(trx->read_view);
- }
-
- if (!trx->read_view) {
+ if (srv_read_only_mode) {
- trx->read_view = read_view_open_now(
- trx->id, trx->global_read_view_heap);
+ ut_ad(trx->read_view == NULL);
+ return(NULL);
- trx->global_read_view = trx->read_view;
+ } else if (!MVCC::is_view_active(trx->read_view)) {
+ trx_sys->mvcc->view_open(trx->read_view, trx);
}
return(trx->read_view);
@@ -1503,7 +2019,6 @@ trx_assign_read_view(
/****************************************************************//**
Prepares a transaction for commit/rollback. */
-UNIV_INTERN
void
trx_commit_or_rollback_prepare(
/*===========================*/
@@ -1516,12 +2031,9 @@ trx_commit_or_rollback_prepare(
switch (trx->state) {
case TRX_STATE_NOT_STARTED:
-#ifdef WITH_WSREP
- ut_d(trx->start_file = __FILE__);
- ut_d(trx->start_line = __LINE__);
-#endif /* WITH_WSREP */
- trx_start_low(trx);
+ trx_start_low(trx, true);
/* fall through */
+
case TRX_STATE_ACTIVE:
case TRX_STATE_PREPARED:
case TRX_STATE_PREPARED_RECOVERED:
@@ -1539,6 +2051,7 @@ trx_commit_or_rollback_prepare(
ut_a(trx->lock.n_active_thrs == 1);
return;
+
case TRX_STATE_COMMITTED_IN_MEMORY:
break;
}
@@ -1548,8 +2061,7 @@ trx_commit_or_rollback_prepare(
/*********************************************************************//**
Creates a commit command node struct.
-@return own: commit node struct */
-UNIV_INTERN
+@return own: commit node struct */
commit_node_t*
trx_commit_node_create(
/*===================*/
@@ -1566,8 +2078,7 @@ trx_commit_node_create(
/***********************************************************//**
Performs an execution step for a commit type node in a query graph.
-@return query thread to run next, or NULL */
-UNIV_INTERN
+@return query thread to run next, or NULL */
que_thr_t*
trx_commit_step(
/*============*/
@@ -1617,8 +2128,7 @@ trx_commit_step(
/**********************************************************************//**
Does the transaction commit for MySQL.
-@return DB_SUCCESS or error number */
-UNIV_INTERN
+@return DB_SUCCESS or error number */
dberr_t
trx_commit_for_mysql(
/*=================*/
@@ -1628,31 +2138,24 @@ trx_commit_for_mysql(
sig to the transaction, we must here make sure that trx has been
started. */
- ut_a(trx);
-
switch (trx->state) {
case TRX_STATE_NOT_STARTED:
- /* Update the info whether we should skip XA steps that eat
- CPU time.
-
- For the duration of the transaction trx->support_xa is
- not reread from thd so any changes in the value take
- effect in the next transaction. This is to avoid a
- scenario where some undo log records generated by a
- transaction contain XA information and other undo log
- records, generated by the same transaction do not. */
- trx->support_xa = thd_supports_xa(trx->mysql_thd);
-
ut_d(trx->start_file = __FILE__);
ut_d(trx->start_line = __LINE__);
- trx_start_low(trx);
+ trx_start_low(trx, true);
/* fall through */
case TRX_STATE_ACTIVE:
case TRX_STATE_PREPARED:
case TRX_STATE_PREPARED_RECOVERED:
trx->op_info = "committing";
+
+ if (trx->id != 0) {
+ trx_update_mod_tables_timestamp(trx);
+ }
+
trx_commit(trx);
+
MONITOR_DEC(MONITOR_TRX_ACTIVE);
trx->op_info = "";
return(DB_SUCCESS);
@@ -1666,27 +2169,25 @@ trx_commit_for_mysql(
/**********************************************************************//**
If required, flushes the log to disk if we called trx_commit_for_mysql()
with trx->flush_log_later == TRUE. */
-UNIV_INTERN
void
trx_commit_complete_for_mysql(
/*==========================*/
trx_t* trx) /*!< in/out: transaction */
{
- ut_a(trx);
-
- if (!trx->must_flush_log_later
+ if (trx->id != 0
+ || !trx->must_flush_log_later
|| (srv_flush_log_at_trx_commit == 1 && trx->active_commit_ordered)) {
+
return;
}
trx_flush_log_if_needed(trx->commit_lsn, trx);
- trx->must_flush_log_later = FALSE;
+ trx->must_flush_log_later = false;
}
/**********************************************************************//**
Marks the latest SQL statement ended. */
-UNIV_INTERN
void
trx_mark_sql_stat_end(
/*==================*/
@@ -1701,11 +2202,12 @@ trx_mark_sql_stat_end(
break;
case TRX_STATE_NOT_STARTED:
trx->undo_no = 0;
+ trx->undo_rseg_space = 0;
/* fall through */
case TRX_STATE_ACTIVE:
trx->last_sql_stat_start.least_undo_no = trx->undo_no;
- if (trx->fts_trx) {
+ if (trx->fts_trx != NULL) {
fts_savepoint_laststmt_refresh(trx);
}
@@ -1718,7 +2220,6 @@ trx_mark_sql_stat_end(
/**********************************************************************//**
Prints info about a transaction.
Caller must hold trx_sys->mutex. */
-UNIV_INTERN
void
trx_print_low(
/*==========*/
@@ -1739,9 +2240,9 @@ trx_print_low(
ibool newline;
const char* op_info;
- ut_ad(mutex_own(&trx_sys->mutex));
+ ut_ad(trx_sys_mutex_own());
- fprintf(f, "TRANSACTION " TRX_ID_FMT, trx->id);
+ fprintf(f, "TRANSACTION " TRX_ID_FMT, trx_get_id_for_print(trx));
/* trx->state cannot change from or to NOT_STARTED while we
are holding the trx_sys->mutex. It may change from ACTIVE to
@@ -1821,11 +2322,6 @@ state_ok:
(ulong) n_rec_locks);
}
- if (trx->has_search_latch) {
- newline = TRUE;
- fputs(", holds adaptive hash latch", f);
- }
-
if (trx->undo_no != 0) {
newline = TRUE;
fprintf(f, ", undo log entries " TRX_ID_FMT, trx->undo_no);
@@ -1835,7 +2331,7 @@ state_ok:
putc('\n', f);
}
- if (trx->mysql_thd != NULL) {
+ if (trx->state != TRX_STATE_NOT_STARTED && trx->mysql_thd != NULL) {
innobase_mysql_print_thd(
f, trx->mysql_thd, static_cast<uint>(max_query_len));
}
@@ -1845,7 +2341,6 @@ state_ok:
Prints info about a transaction.
The caller must hold lock_sys->mutex and trx_sys->mutex.
When possible, use trx_print() instead. */
-UNIV_INTERN
void
trx_print_latched(
/*==============*/
@@ -1855,7 +2350,7 @@ trx_print_latched(
or 0 to use the default max length */
{
ut_ad(lock_mutex_own());
- ut_ad(mutex_own(&trx_sys->mutex));
+ ut_ad(trx_sys_mutex_own());
trx_print_low(f, trx, max_query_len,
lock_number_of_rows_locked(&trx->lock),
@@ -1873,7 +2368,6 @@ without locking lock_sys->mutex. */
UNIV_INTERN
void
wsrep_trx_print_locking(
-/*==========*/
FILE* f,
/*!< in: output stream */
const trx_t* trx,
@@ -1956,11 +2450,6 @@ state_ok:
fprintf(f, "que state %lu ", (ulong) trx->lock.que_state);
}
- if (trx->has_search_latch) {
- newline = TRUE;
- fputs(", holds adaptive hash latch", f);
- }
-
if (trx->undo_no != 0) {
newline = TRUE;
fprintf(f, ", undo log entries " TRX_ID_FMT, trx->undo_no);
@@ -1979,7 +2468,6 @@ state_ok:
/**********************************************************************//**
Prints info about a transaction.
Acquires and releases lock_sys->mutex and trx_sys->mutex. */
-UNIV_INTERN
void
trx_print(
/*======*/
@@ -1999,8 +2487,10 @@ trx_print(
lock_mutex_exit();
mutex_enter(&trx_sys->mutex);
+
trx_print_low(f, trx, max_query_len,
n_rec_locks, n_trx_locks, heap_size);
+
mutex_exit(&trx_sys->mutex);
}
@@ -2009,17 +2499,16 @@ trx_print(
Asserts that a transaction has been started.
The caller must hold trx_sys->mutex.
@return TRUE if started */
-UNIV_INTERN
ibool
trx_assert_started(
/*===============*/
const trx_t* trx) /*!< in: transaction */
{
- ut_ad(mutex_own(&trx_sys->mutex));
+ ut_ad(trx_sys_mutex_own());
/* Non-locking autocommits should not hold any locks and this
function is only called from the locking code. */
- assert_trx_in_list(trx);
+ check_trx_state(trx);
/* trx->state can change from or to NOT_STARTED while we are holding
trx_sys->mutex for non-locking autocommit selects but not for other
@@ -2045,103 +2534,124 @@ trx_assert_started(
#endif /* UNIV_DEBUG */
/*******************************************************************//**
-Compares the "weight" (or size) of two transactions. The heavier the weight,
-the more reluctant we will be to choose the transaction as a deadlock victim.
-@return TRUE if weight(a) >= weight(b) */
-UNIV_INTERN
-ibool
+Compares the "weight" (or size) of two transactions. Transactions that
+have edited non-transactional tables are considered heavier than ones
+that have not.
+@return TRUE if weight(a) >= weight(b) */
+bool
trx_weight_ge(
/*==========*/
- const trx_t* a, /*!< in: the first transaction to be compared */
- const trx_t* b) /*!< in: the second transaction to be compared */
+ const trx_t* a, /*!< in: transaction to be compared */
+ const trx_t* b) /*!< in: transaction to be compared */
{
- int pref;
+ ibool a_notrans_edit;
+ ibool b_notrans_edit;
- /* First ask the upper server layer if it has any preference for which
- to prefer as a deadlock victim. */
- pref= thd_deadlock_victim_preference(a->mysql_thd, b->mysql_thd);
- if (pref < 0) {
- return FALSE;
- } else if (pref > 0) {
- return TRUE;
- }
+ /* If mysql_thd is NULL for a transaction we assume that it has
+ not edited non-transactional tables. */
- /* Upper server layer had no preference, we fall back to comparing the
- number of altered/locked rows. */
+ a_notrans_edit = a->mysql_thd != NULL
+ && thd_has_edited_nontrans_tables(a->mysql_thd);
-#if 0
- fprintf(stderr,
- "%s TRX_WEIGHT(a): %lld+%lu, TRX_WEIGHT(b): %lld+%lu\n",
- __func__,
- a->undo_no, UT_LIST_GET_LEN(a->lock.trx_locks),
- b->undo_no, UT_LIST_GET_LEN(b->lock.trx_locks));
-#endif
+ b_notrans_edit = b->mysql_thd != NULL
+ && thd_has_edited_nontrans_tables(b->mysql_thd);
+
+ if (a_notrans_edit != b_notrans_edit) {
+
+ return(a_notrans_edit);
+ }
+
+ /* Either both had edited non-transactional tables or both had
+ not, we fall back to comparing the number of altered/locked
+ rows. */
return(TRX_WEIGHT(a) >= TRX_WEIGHT(b));
}
-/****************************************************************//**
-Prepares a transaction. */
+/** Prepare a transaction.
+@return log sequence number that makes the XA PREPARE durable
+@retval 0 if no changes needed to be made durable */
static
-void
-trx_prepare(
-/*========*/
- trx_t* trx) /*!< in/out: transaction */
+lsn_t
+trx_prepare_low(trx_t* trx)
{
- trx_rseg_t* rseg;
- lsn_t lsn;
- mtr_t mtr;
+ mtr_t mtr;
- rseg = trx->rseg;
- /* Only fresh user transactions can be prepared.
- Recovered transactions cannot. */
- ut_a(!trx->is_recovered);
+ /* It is not necessary to acquire trx->undo_mutex here because
+ only the owning (connection) thread of the transaction is
+ allowed to perform XA PREPARE. */
- if (trx->insert_undo != NULL || trx->update_undo != NULL) {
+ if (trx_undo_t* undo = trx->rsegs.m_noredo.undo) {
+ ut_ad(undo->rseg == trx->rsegs.m_noredo.rseg);
- mtr_start(&mtr);
+ mtr.start();
+ mtr.set_log_mode(MTR_LOG_NO_REDO);
- /* Change the undo log segment states from TRX_UNDO_ACTIVE
- to TRX_UNDO_PREPARED: these modifications to the file data
- structure define the transaction as prepared in the
- file-based world, at the serialization point of lsn. */
+ mutex_enter(&undo->rseg->mutex);
+ trx_undo_set_state_at_prepare(trx, undo, false, &mtr);
+ mutex_exit(&undo->rseg->mutex);
- mutex_enter(&rseg->mutex);
+ mtr.commit();
+ }
- if (trx->insert_undo != NULL) {
+ trx_undo_t* insert = trx->rsegs.m_redo.insert_undo;
+ trx_undo_t* update = trx->rsegs.m_redo.update_undo;
- /* It is not necessary to obtain trx->undo_mutex here
- because only a single OS thread is allowed to do the
- transaction prepare for this transaction. */
+ if (!insert && !update) {
+ /* There were no changes to persistent tables. */
+ return(0);
+ }
- trx_undo_set_state_at_prepare(trx, trx->insert_undo,
- &mtr);
- }
+ trx_rseg_t* rseg = trx->rsegs.m_redo.rseg;
- if (trx->update_undo) {
- trx_undo_set_state_at_prepare(
- trx, trx->update_undo, &mtr);
- }
+ mtr.start();
- mutex_exit(&rseg->mutex);
+ /* Change the undo log segment states from TRX_UNDO_ACTIVE to
+ TRX_UNDO_PREPARED: these modifications to the file data
+ structure define the transaction as prepared in the file-based
+ world, at the serialization point of lsn. */
- /*--------------*/
- mtr_commit(&mtr); /* This mtr commit makes the
- transaction prepared in the file-based
- world */
- /*--------------*/
- lsn = mtr.end_lsn;
- ut_ad(lsn);
- } else {
- lsn = 0;
+ mutex_enter(&rseg->mutex);
+
+ if (insert) {
+ ut_ad(insert->rseg == rseg);
+ trx_undo_set_state_at_prepare(trx, insert, false, &mtr);
+ }
+
+ if (update) {
+ ut_ad(update->rseg == rseg);
+ trx_undo_set_state_at_prepare(trx, update, false, &mtr);
}
+ mutex_exit(&rseg->mutex);
+
+ /* Make the XA PREPARE durable. */
+ mtr.commit();
+ ut_ad(mtr.commit_lsn() > 0);
+ return(mtr.commit_lsn());
+}
+
+/****************************************************************//**
+Prepares a transaction. */
+static
+void
+trx_prepare(
+/*========*/
+ trx_t* trx) /*!< in/out: transaction */
+{
+ /* Only fresh user transactions can be prepared.
+ Recovered transactions cannot. */
+ ut_a(!trx->is_recovered);
+
+ lsn_t lsn = trx_prepare_low(trx);
+
+ DBUG_EXECUTE_IF("ib_trx_crash_during_xa_prepare_step", DBUG_SUICIDE(););
+
/*--------------------------------------*/
ut_a(trx->state == TRX_STATE_ACTIVE);
- mutex_enter(&trx_sys->mutex);
+ trx_mutex_enter(trx);
trx->state = TRX_STATE_PREPARED;
- trx_sys->n_prepared_trx++;
- mutex_exit(&trx_sys->mutex);
+ trx_mutex_exit(trx);
/*--------------------------------------*/
if (lsn) {
@@ -2159,22 +2669,17 @@ trx_prepare(
there are > 2 users in the database. Then at least 2 users can
gather behind one doing the physical log write to disk.
- TODO: find out if MySQL holds some mutex when calling this.
- That would spoil our group prepare algorithm. */
+ We must not be holding any mutexes or latches here. */
trx_flush_log_if_needed(lsn, trx);
}
}
-/**********************************************************************//**
-Does the transaction prepare for MySQL. */
-UNIV_INTERN
-void
-trx_prepare_for_mysql(
-/*==================*/
- trx_t* trx) /*!< in/out: trx handle */
+/** XA PREPARE a transaction.
+@param[in,out] trx transaction to prepare */
+void trx_prepare_for_mysql(trx_t* trx)
{
- trx_start_if_not_started_xa(trx);
+ trx_start_if_not_started_xa(trx, false);
trx->op_info = "preparing";
@@ -2186,8 +2691,7 @@ trx_prepare_for_mysql(
/**********************************************************************//**
This function is used to find number of prepared transactions and
their transaction objects for a recovery.
-@return number of prepared transactions stored in xid_list */
-UNIV_INTERN
+@return number of prepared transactions stored in xid_list */
int
trx_recover_for_mysql(
/*==================*/
@@ -2203,7 +2707,7 @@ trx_recover_for_mysql(
/* We should set those transactions which are in the prepared state
to the xid_list */
- mutex_enter(&trx_sys->mutex);
+ trx_sys_mutex_enter();
for (trx = UT_LIST_GET_FIRST(trx_sys->rw_trx_list);
trx != NULL;
@@ -2217,26 +2721,19 @@ trx_recover_for_mysql(
trx->is_recovered. It may also change to COMMITTED. */
if (trx_state_eq(trx, TRX_STATE_PREPARED)) {
trx->state = TRX_STATE_PREPARED_RECOVERED;
- xid_list[count] = trx->xid;
+ xid_list[count] = *trx->xid;
if (count == 0) {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Starting recovery for"
- " XA transactions...\n");
+ ib::info() << "Starting recovery for"
+ " XA transactions...";
}
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Transaction " TRX_ID_FMT " in"
- " prepared state after recovery\n",
- trx->id);
+ ib::info() << "Transaction "
+ << trx_get_id_for_print(trx)
+ << " in prepared state after recovery";
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Transaction contains changes"
- " to " TRX_ID_FMT " rows\n",
- trx->undo_no);
+ ib::info() << "Transaction contains changes to "
+ << trx->undo_no << " rows";
count++;
@@ -2257,40 +2754,32 @@ trx_recover_for_mysql(
}
partial:
- mutex_exit(&trx_sys->mutex);
+ trx_sys_mutex_exit();
if (count > 0){
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: %d transactions in prepared state"
- " after recovery\n",
- int (count));
+ ib::info() << count << " transactions in prepared state"
+ " after recovery";
}
return(int (count));
}
-/*******************************************************************//**
-This function is used to find one X/Open XA distributed transaction
-which is in the prepared state
-@return trx on match, the trx->xid will be invalidated;
-note that the trx may have been committed, unless the caller is
-holding lock_sys->mutex */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
-trx_t*
-trx_get_trx_by_xid_low(
-/*===================*/
- const XID* xid) /*!< in: X/Open XA transaction
- identifier */
+/** Look up an X/Open distributed transaction in XA PREPARE state.
+@param[in] xid X/Open XA transaction identifier
+@return trx on match, the trx->xid will be invalidated;
+note that the trx may have been committed before the caller
+acquires trx_t::mutex */
+static MY_ATTRIBUTE((warn_unused_result))
+trx_t* trx_get_trx_by_xid_low(const XID* xid)
{
trx_t* trx;
- ut_ad(mutex_own(&trx_sys->mutex));
+ ut_ad(trx_sys_mutex_own());
for (trx = UT_LIST_GET_FIRST(trx_sys->rw_trx_list);
trx != NULL;
trx = UT_LIST_GET_NEXT(trx_list, trx)) {
-
+ trx_mutex_enter(trx);
assert_trx_in_rw_list(trx);
/* Compare two X/Open XA transaction id's: their
@@ -2301,39 +2790,33 @@ trx_get_trx_by_xid_low(
if (trx->is_recovered
&& (trx_state_eq(trx, TRX_STATE_PREPARED)
|| trx_state_eq(trx, TRX_STATE_PREPARED_RECOVERED))
- && !trx->xid.is_null()
- && xid->gtrid_length == trx->xid.gtrid_length
- && xid->bqual_length == trx->xid.bqual_length
- && memcmp(xid->data, trx->xid.data,
- xid->gtrid_length + xid->bqual_length) == 0) {
-
+ && xid->eq(trx->xid)) {
#ifdef WITH_WSREP
/* The commit of a prepared recovered Galera
transaction needs a valid trx->xid for
invoking trx_sys_update_wsrep_checkpoint(). */
- if (wsrep_is_wsrep_xid(&trx->xid)) break;
+ if (!wsrep_is_wsrep_xid(trx->xid))
#endif
- /* Invalidate the XID, so that subsequent calls
- will not find it. */
- trx->xid.null();
+ /* Invalidate the XID, so that subsequent calls
+ will not find it. */
+ trx->xid->null();
+ trx_mutex_exit(trx);
break;
}
+
+ trx_mutex_exit(trx);
}
return(trx);
}
-/*******************************************************************//**
-This function is used to find one X/Open XA distributed transaction
-which is in the prepared state
-@return trx or NULL; on match, the trx->xid will be invalidated;
-note that the trx may have been committed, unless the caller is
-holding lock_sys->mutex */
-UNIV_INTERN
-trx_t*
-trx_get_trx_by_xid(
-/*===============*/
- const XID* xid) /*!< in: X/Open XA transaction identifier */
+/** Look up an X/Open distributed transaction in XA PREPARE state.
+@param[in] xid X/Open XA transaction identifier
+@return transaction on match (the trx_t::xid will be invalidated);
+note that the trx may have been committed before the caller acquires
+trx_t::mutex
+@retval NULL if no match */
+trx_t* trx_get_trx_by_xid(const XID* xid)
{
trx_t* trx;
@@ -2342,42 +2825,40 @@ trx_get_trx_by_xid(
return(NULL);
}
- mutex_enter(&trx_sys->mutex);
+ trx_sys_mutex_enter();
/* Recovered/Resurrected transactions are always only on the
trx_sys_t::rw_trx_list. */
trx = trx_get_trx_by_xid_low(xid);
- mutex_exit(&trx_sys->mutex);
+ trx_sys_mutex_exit();
return(trx);
}
/*************************************************************//**
Starts the transaction if it is not yet started. */
-UNIV_INTERN
void
trx_start_if_not_started_xa_low(
/*============================*/
- trx_t* trx) /*!< in: transaction */
+ trx_t* trx, /*!< in/out: transaction */
+ bool read_write) /*!< in: true if read write transaction */
{
switch (trx->state) {
case TRX_STATE_NOT_STARTED:
+ trx_start_low(trx, read_write);
+ return;
- /* Update the info whether we should skip XA steps
- that eat CPU time.
-
- For the duration of the transaction trx->support_xa is
- not reread from thd so any changes in the value take
- effect in the next transaction. This is to avoid a
- scenario where some undo generated by a transaction,
- has XA stuff, and other undo, generated by the same
- transaction, doesn't. */
- trx->support_xa = thd_supports_xa(trx->mysql_thd);
-
- trx_start_low(trx);
- /* fall through */
case TRX_STATE_ACTIVE:
+ if (trx->id == 0 && read_write) {
+ /* If the transaction is tagged as read-only then
+ it can only write to temp tables and for such
+ transactions we don't want to move them to the
+ trx_sys_t::rw_trx_list. */
+ if (!trx->read_only) {
+ trx_set_rw_mode(trx);
+ }
+ }
return;
case TRX_STATE_PREPARED:
case TRX_STATE_PREPARED_RECOVERED:
@@ -2390,18 +2871,23 @@ trx_start_if_not_started_xa_low(
/*************************************************************//**
Starts the transaction if it is not yet started. */
-UNIV_INTERN
void
trx_start_if_not_started_low(
-/*=========================*/
- trx_t* trx) /*!< in: transaction */
+/*==========================*/
+ trx_t* trx, /*!< in: transaction */
+ bool read_write) /*!< in: true if read write transaction */
{
switch (trx->state) {
case TRX_STATE_NOT_STARTED:
- trx_start_low(trx);
- /* fall through */
+ trx_start_low(trx, read_write);
+ return;
+
case TRX_STATE_ACTIVE:
+ if (read_write && trx->id == 0 && !trx->read_only) {
+ trx_set_rw_mode(trx);
+ }
return;
+
case TRX_STATE_PREPARED:
case TRX_STATE_PREPARED_RECOVERED:
case TRX_STATE_COMMITTED_IN_MEMORY:
@@ -2412,8 +2898,40 @@ trx_start_if_not_started_low(
}
/*************************************************************//**
+Starts a transaction for internal processing. */
+void
+trx_start_internal_low(
+/*===================*/
+ trx_t* trx) /*!< in/out: transaction */
+{
+ /* Ensure it is not flagged as an auto-commit-non-locking
+ transaction. */
+
+ trx->will_lock = 1;
+
+ trx->internal = true;
+
+ trx_start_low(trx, true);
+}
+
+/** Starts a read-only transaction for internal processing.
+@param[in,out] trx transaction to be started */
+void
+trx_start_internal_read_only_low(
+ trx_t* trx)
+{
+ /* Ensure it is not flagged as an auto-commit-non-locking
+ transaction. */
+
+ trx->will_lock = 1;
+
+ trx->internal = true;
+
+ trx_start_low(trx, false);
+}
+
+/*************************************************************//**
Starts the transaction for a DDL operation. */
-UNIV_INTERN
void
trx_start_for_ddl_low(
/*==================*/
@@ -2431,12 +2949,13 @@ trx_start_for_ddl_low(
transation. */
trx->will_lock = 1;
- trx->ddl = true;
+ trx->ddl= true;
- trx_start_low(trx);
+ trx_start_internal_low(trx);
return;
case TRX_STATE_ACTIVE:
+
/* We have this start if not started idiom, therefore we
can't add stronger checks here. */
trx->ddl = true;
@@ -2444,6 +2963,7 @@ trx_start_for_ddl_low(
ut_ad(trx->dict_operation != TRX_DICT_OP_NONE);
ut_ad(trx->will_lock > 0);
return;
+
case TRX_STATE_PREPARED:
case TRX_STATE_PREPARED_RECOVERED:
case TRX_STATE_COMMITTED_IN_MEMORY:
@@ -2452,3 +2972,62 @@ trx_start_for_ddl_low(
ut_error;
}
+
+/*************************************************************//**
+Set the transaction as a read-write transaction if it is not already
+tagged as such. Read-only transactions that are writing to temporary
+tables are assigned an ID and a rollback segment but are not added
+to the trx read-write list because their updates should not be visible
+to other transactions and therefore their changes can be ignored by
+by MVCC. */
+void
+trx_set_rw_mode(
+/*============*/
+ trx_t* trx) /*!< in/out: transaction that is RW */
+{
+ ut_ad(trx->rsegs.m_redo.rseg == 0);
+ ut_ad(!trx->in_rw_trx_list);
+ ut_ad(!trx_is_autocommit_non_locking(trx));
+ ut_ad(!trx->read_only);
+
+ if (high_level_read_only) {
+ return;
+ }
+
+ /* Function is promoting existing trx from ro mode to rw mode.
+ In this process it has acquired trx_sys->mutex as it plan to
+ move trx from ro list to rw list. If in future, some other thread
+ looks at this trx object while it is being promoted then ensure
+ that both threads are synced by acquring trx->mutex to avoid decision
+ based on in-consistent view formed during promotion. */
+
+ trx->rsegs.m_redo.rseg = trx_assign_rseg_low();
+
+ ut_ad(trx->rsegs.m_redo.rseg != 0);
+
+ mutex_enter(&trx_sys->mutex);
+
+ ut_ad(trx->id == 0);
+ trx->id = trx_sys_get_new_trx_id();
+
+ trx_sys->rw_trx_ids.push_back(trx->id);
+
+ trx_sys->rw_trx_set.insert(TrxTrack(trx->id, trx));
+
+ /* So that we can see our own changes. */
+ if (MVCC::is_view_active(trx->read_view)) {
+ MVCC::set_view_creator_trx_id(trx->read_view, trx->id);
+ }
+
+#ifdef UNIV_DEBUG
+ if (trx->id > trx_sys->rw_max_trx_id) {
+ trx_sys->rw_max_trx_id = trx->id;
+ }
+#endif /* UNIV_DEBUG */
+
+ UT_LIST_ADD_FIRST(trx_sys->rw_trx_list, trx);
+
+ ut_d(trx->in_rw_trx_list = true);
+
+ mutex_exit(&trx_sys->mutex);
+}
diff --git a/storage/innobase/trx/trx0undo.cc b/storage/innobase/trx/trx0undo.cc
index 03acdefd2f4..336506c7b65 100644
--- a/storage/innobase/trx/trx0undo.cc
+++ b/storage/innobase/trx/trx0undo.cc
@@ -25,22 +25,15 @@ Created 3/26/1996 Heikki Tuuri
*******************************************************/
#include "trx0undo.h"
-
-#ifdef UNIV_NONINL
-#include "trx0undo.ic"
-#endif
-
#include "fsp0fsp.h"
-#ifndef UNIV_HOTBACKUP
#include "mach0data.h"
#include "mtr0log.h"
-#include "trx0rseg.h"
-#include "trx0trx.h"
+#include "srv0mon.h"
#include "srv0srv.h"
#include "srv0start.h"
-#include "trx0rec.h"
#include "trx0purge.h"
-#include "srv0mon.h"
+#include "trx0rec.h"
+#include "trx0rseg.h"
/* How should the old versions in the history list be managed?
----------------------------------------------------------
@@ -96,7 +89,6 @@ it until a truncate operation occurs, which can remove undo logs from the end
of the list and release undo log segments. In stepping through the list,
s-latches on the undo log pages are enough, but in a truncate, x-latches must
be obtained on the rollback segment and individual pages. */
-#endif /* !UNIV_HOTBACKUP */
/********************************************************************//**
Initializes the fields in an undo log segment page. */
@@ -108,10 +100,9 @@ trx_undo_page_init(
ulint type, /*!< in: undo log segment type */
mtr_t* mtr); /*!< in: mtr */
-#ifndef UNIV_HOTBACKUP
/********************************************************************//**
Creates and initializes an undo log memory object.
-@return own: the undo log memory object */
+@return own: the undo log memory object */
static
trx_undo_t*
trx_undo_mem_create(
@@ -125,12 +116,11 @@ trx_undo_mem_create(
const XID* xid, /*!< in: X/Open XA transaction identification*/
ulint page_no,/*!< in: undo log header page number */
ulint offset);/*!< in: undo log header byte offset on page */
-#endif /* !UNIV_HOTBACKUP */
/***************************************************************//**
Initializes a cached insert undo log header page for new use. NOTE that this
function has its own log record type MLOG_UNDO_HDR_REUSE. You must NOT change
the operation of this function!
-@return undo log header byte offset on page */
+@return undo log header byte offset on page */
static
ulint
trx_undo_insert_header_reuse(
@@ -139,20 +129,10 @@ trx_undo_insert_header_reuse(
header page, x-latched */
trx_id_t trx_id, /*!< in: transaction id */
mtr_t* mtr); /*!< in: mtr */
-/**********************************************************************//**
-If an update undo log can be discarded immediately, this function frees the
-space, resetting the page to the proper state for caching. */
-static
-void
-trx_undo_discard_latest_update_undo(
-/*================================*/
- page_t* undo_page, /*!< in: header page of an undo log of size 1 */
- mtr_t* mtr); /*!< in: mtr */
-#ifndef UNIV_HOTBACKUP
/***********************************************************************//**
Gets the previous record in an undo log from the previous page.
-@return undo log record, the page s-latched, NULL if none */
+@return undo log record, the page s-latched, NULL if none */
static
trx_undo_rec_t*
trx_undo_get_prev_rec_from_prev_page(
@@ -164,7 +144,6 @@ trx_undo_get_prev_rec_from_prev_page(
mtr_t* mtr) /*!< in: mtr */
{
ulint space;
- ulint zip_size;
ulint prev_page_no;
page_t* prev_page;
page_t* undo_page;
@@ -181,11 +160,11 @@ trx_undo_get_prev_rec_from_prev_page(
}
space = page_get_space_id(undo_page);
- zip_size = fil_space_get_zip_size(space);
- buf_block_t* block = buf_page_get(space, zip_size, prev_page_no,
- shared ? RW_S_LATCH : RW_X_LATCH,
- mtr);
+ buf_block_t* block = buf_page_get(
+ page_id_t(space, prev_page_no), univ_page_size,
+ shared ? RW_S_LATCH : RW_X_LATCH, mtr);
+
buf_block_dbg_add_level(block, SYNC_TRX_UNDO_PAGE);
prev_page = buf_block_get_frame(block);
@@ -195,8 +174,7 @@ trx_undo_get_prev_rec_from_prev_page(
/***********************************************************************//**
Gets the previous record in an undo log.
-@return undo log record, the page s-latched, NULL if none */
-UNIV_INTERN
+@return undo log record, the page s-latched, NULL if none */
trx_undo_rec_t*
trx_undo_get_prev_rec(
/*==================*/
@@ -222,26 +200,28 @@ trx_undo_get_prev_rec(
shared, mtr));
}
-/***********************************************************************//**
-Gets the next record in an undo log from the next page.
-@return undo log record, the page latched, NULL if none */
+/** Gets the next record in an undo log from the next page.
+@param[in] space undo log header space
+@param[in] undo_page undo log page
+@param[in] page_no undo log header page number
+@param[in] offset undo log header offset on page
+@param[in] mode latch mode: RW_S_LATCH or RW_X_LATCH
+@param[in,out] mtr mini-transaction
+@return undo log record, the page latched, NULL if none */
static
trx_undo_rec_t*
trx_undo_get_next_rec_from_next_page(
-/*=================================*/
- ulint space, /*!< in: undo log header space */
- ulint zip_size,/*!< in: compressed page size in bytes
- or 0 for uncompressed pages */
- page_t* undo_page, /*!< in: undo log page */
- ulint page_no,/*!< in: undo log header page number */
- ulint offset, /*!< in: undo log header offset on page */
- ulint mode, /*!< in: latch mode: RW_S_LATCH or RW_X_LATCH */
- mtr_t* mtr) /*!< in: mtr */
+ ulint space,
+ const page_t* undo_page,
+ ulint page_no,
+ ulint offset,
+ ulint mode,
+ mtr_t* mtr)
{
- trx_ulogf_t* log_hdr;
- ulint next_page_no;
- page_t* next_page;
- ulint next;
+ const trx_ulogf_t* log_hdr;
+ ulint next_page_no;
+ page_t* next_page;
+ ulint next;
if (page_no == page_get_page_no(undo_page)) {
@@ -262,13 +242,14 @@ trx_undo_get_next_rec_from_next_page(
return(NULL);
}
+ const page_id_t next_page_id(space, next_page_no);
+
if (mode == RW_S_LATCH) {
- next_page = trx_undo_page_get_s_latched(space, zip_size,
- next_page_no, mtr);
+ next_page = trx_undo_page_get_s_latched(
+ next_page_id, mtr);
} else {
ut_ad(mode == RW_X_LATCH);
- next_page = trx_undo_page_get(space, zip_size,
- next_page_no, mtr);
+ next_page = trx_undo_page_get(next_page_id, mtr);
}
return(trx_undo_page_get_first_rec(next_page, page_no, offset));
@@ -276,8 +257,7 @@ trx_undo_get_next_rec_from_next_page(
/***********************************************************************//**
Gets the next record in an undo log.
-@return undo log record, the page s-latched, NULL if none */
-UNIV_INTERN
+@return undo log record, the page s-latched, NULL if none */
trx_undo_rec_t*
trx_undo_get_next_rec(
/*==================*/
@@ -287,7 +267,6 @@ trx_undo_get_next_rec(
mtr_t* mtr) /*!< in: mtr */
{
ulint space;
- ulint zip_size;
trx_undo_rec_t* next_rec;
next_rec = trx_undo_page_get_next_rec(rec, page_no, offset);
@@ -297,37 +276,37 @@ trx_undo_get_next_rec(
}
space = page_get_space_id(page_align(rec));
- zip_size = fil_space_get_zip_size(space);
- return(trx_undo_get_next_rec_from_next_page(space, zip_size,
+ return(trx_undo_get_next_rec_from_next_page(space,
page_align(rec),
page_no, offset,
RW_S_LATCH, mtr));
}
-/***********************************************************************//**
-Gets the first record in an undo log.
-@return undo log record, the page latched, NULL if none */
-UNIV_INTERN
+/** Gets the first record in an undo log.
+@param[in] space undo log header space
+@param[in] page_no undo log header page number
+@param[in] offset undo log header offset on page
+@param[in] mode latching mode: RW_S_LATCH or RW_X_LATCH
+@param[in,out] mtr mini-transaction
+@return undo log record, the page latched, NULL if none */
trx_undo_rec_t*
trx_undo_get_first_rec(
-/*===================*/
- ulint space, /*!< in: undo log header space */
- ulint zip_size,/*!< in: compressed page size in bytes
- or 0 for uncompressed pages */
- ulint page_no,/*!< in: undo log header page number */
- ulint offset, /*!< in: undo log header offset on page */
- ulint mode, /*!< in: latching mode: RW_S_LATCH or RW_X_LATCH */
- mtr_t* mtr) /*!< in: mtr */
+ ulint space,
+ ulint page_no,
+ ulint offset,
+ ulint mode,
+ mtr_t* mtr)
{
page_t* undo_page;
trx_undo_rec_t* rec;
+ const page_id_t page_id(space, page_no);
+
if (mode == RW_S_LATCH) {
- undo_page = trx_undo_page_get_s_latched(space, zip_size,
- page_no, mtr);
+ undo_page = trx_undo_page_get_s_latched(page_id, mtr);
} else {
- undo_page = trx_undo_page_get(space, zip_size, page_no, mtr);
+ undo_page = trx_undo_page_get(page_id, mtr);
}
rec = trx_undo_page_get_first_rec(undo_page, page_no, offset);
@@ -336,7 +315,7 @@ trx_undo_get_first_rec(
return(rec);
}
- return(trx_undo_get_next_rec_from_next_page(space, zip_size,
+ return(trx_undo_get_next_rec_from_next_page(space,
undo_page, page_no, offset,
mode, mtr));
}
@@ -357,25 +336,21 @@ trx_undo_page_init_log(
mlog_catenate_ulint_compressed(mtr, type);
}
-#else /* !UNIV_HOTBACKUP */
-# define trx_undo_page_init_log(undo_page,type,mtr) ((void) 0)
-#endif /* !UNIV_HOTBACKUP */
/***********************************************************//**
Parses the redo log entry of an undo log page initialization.
-@return end of log record or NULL */
-UNIV_INTERN
+@return end of log record or NULL */
byte*
trx_undo_parse_page_init(
/*=====================*/
- byte* ptr, /*!< in: buffer */
- byte* end_ptr,/*!< in: buffer end */
- page_t* page, /*!< in: page or NULL */
- mtr_t* mtr) /*!< in: mtr or NULL */
+ const byte* ptr, /*!< in: buffer */
+ const byte* end_ptr,/*!< in: buffer end */
+ page_t* page, /*!< in: page or NULL */
+ mtr_t* mtr) /*!< in: mtr or NULL */
{
ulint type;
- ptr = mach_parse_compressed(ptr, end_ptr, &type);
+ type = mach_parse_compressed(&ptr, end_ptr);
if (ptr == NULL) {
@@ -386,7 +361,7 @@ trx_undo_parse_page_init(
trx_undo_page_init(page, type, mtr);
}
- return(ptr);
+ return(const_cast<byte*>(ptr));
}
/********************************************************************//**
@@ -415,12 +390,11 @@ trx_undo_page_init(
trx_undo_page_init_log(undo_page, type, mtr);
}
-#ifndef UNIV_HOTBACKUP
/***************************************************************//**
Creates a new undo log segment in file.
@return DB_SUCCESS if page creation OK possible error codes are:
DB_TOO_MANY_CONCURRENT_TRXS DB_OUT_OF_FILE_SPACE */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
+static MY_ATTRIBUTE((warn_unused_result))
dberr_t
trx_undo_seg_create(
/*================*/
@@ -441,7 +415,7 @@ trx_undo_seg_create(
trx_upagef_t* page_hdr;
trx_usegf_t* seg_hdr;
ulint n_reserved;
- ibool success;
+ bool success;
dberr_t err = DB_SUCCESS;
ut_ad(mtr != NULL);
@@ -455,12 +429,9 @@ trx_undo_seg_create(
slot_no = trx_rsegf_undo_find_free(rseg_hdr, mtr);
if (slot_no == ULINT_UNDEFINED) {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Warning: cannot find a free slot for"
- " an undo log. Do you have too\n"
- "InnoDB: many active transactions"
- " running concurrently?\n");
+ ib::warn() << "Cannot find a free slot for an undo log. Do"
+ " you have too many active transactions running"
+ " concurrently?";
return(DB_TOO_MANY_CONCURRENT_TRXS);
}
@@ -530,15 +501,12 @@ trx_undo_header_create_log(
mlog_catenate_ull_compressed(mtr, trx_id);
}
-#else /* !UNIV_HOTBACKUP */
-# define trx_undo_header_create_log(undo_page,trx_id,mtr) ((void) 0)
-#endif /* !UNIV_HOTBACKUP */
/***************************************************************//**
Creates a new undo log header in file. NOTE that this function has its own
log record type MLOG_UNDO_HDR_CREATE. You must NOT change the operation of
this function!
-@return header byte offset on page */
+@return header byte offset on page */
static
ulint
trx_undo_header_create(
@@ -554,7 +522,6 @@ trx_undo_header_create(
trx_upagef_t* page_hdr;
trx_usegf_t* seg_hdr;
trx_ulogf_t* log_hdr;
- trx_ulogf_t* prev_log_hdr;
ulint prev_log;
ulint free;
ulint new_free;
@@ -581,6 +548,8 @@ trx_undo_header_create(
prev_log = mach_read_from_2(seg_hdr + TRX_UNDO_LAST_LOG);
if (prev_log != 0) {
+ trx_ulogf_t* prev_log_hdr;
+
prev_log_hdr = undo_page + prev_log;
mach_write_to_2(prev_log_hdr + TRX_UNDO_NEXT_LOG, free);
@@ -607,7 +576,6 @@ trx_undo_header_create(
return(free);
}
-#ifndef UNIV_HOTBACKUP
/********************************************************************//**
Write X/Open XA Transaction Identification (XID) to undo log header */
static
@@ -619,15 +587,19 @@ trx_undo_write_xid(
mtr_t* mtr) /*!< in: mtr */
{
mlog_write_ulint(log_hdr + TRX_UNDO_XA_FORMAT,
- (ulint) xid->formatID, MLOG_4BYTES, mtr);
+ static_cast<ulint>(xid->formatID),
+ MLOG_4BYTES, mtr);
mlog_write_ulint(log_hdr + TRX_UNDO_XA_TRID_LEN,
- (ulint) xid->gtrid_length, MLOG_4BYTES, mtr);
+ static_cast<ulint>(xid->gtrid_length),
+ MLOG_4BYTES, mtr);
mlog_write_ulint(log_hdr + TRX_UNDO_XA_BQUAL_LEN,
- (ulint) xid->bqual_length, MLOG_4BYTES, mtr);
+ static_cast<ulint>(xid->bqual_length),
+ MLOG_4BYTES, mtr);
- mlog_write_string(log_hdr + TRX_UNDO_XA_XID, (const byte*) xid->data,
+ mlog_write_string(log_hdr + TRX_UNDO_XA_XID,
+ reinterpret_cast<const byte*>(xid->data),
XIDDATASIZE, mtr);
}
@@ -640,12 +612,14 @@ trx_undo_read_xid(
trx_ulogf_t* log_hdr,/*!< in: undo log header */
XID* xid) /*!< out: X/Open XA Transaction Identification */
{
- xid->formatID = (long) mach_read_from_4(log_hdr + TRX_UNDO_XA_FORMAT);
+ xid->formatID=static_cast<long>(mach_read_from_4(
+ log_hdr + TRX_UNDO_XA_FORMAT));
- xid->gtrid_length
- = (long) mach_read_from_4(log_hdr + TRX_UNDO_XA_TRID_LEN);
- xid->bqual_length
- = (long) mach_read_from_4(log_hdr + TRX_UNDO_XA_BQUAL_LEN);
+ xid->gtrid_length=static_cast<long>(mach_read_from_4(
+ log_hdr + TRX_UNDO_XA_TRID_LEN));
+
+ xid->bqual_length=static_cast<long>(mach_read_from_4(
+ log_hdr + TRX_UNDO_XA_BQUAL_LEN));
memcpy(xid->data, log_hdr + TRX_UNDO_XA_XID, XIDDATASIZE);
}
@@ -702,55 +676,46 @@ trx_undo_insert_header_reuse_log(
mlog_catenate_ull_compressed(mtr, trx_id);
}
-#else /* !UNIV_HOTBACKUP */
-# define trx_undo_insert_header_reuse_log(undo_page,trx_id,mtr) ((void) 0)
-#endif /* !UNIV_HOTBACKUP */
-/***********************************************************//**
-Parses the redo log entry of an undo log page header create or reuse.
-@return end of log record or NULL */
-UNIV_INTERN
+/** Parse the redo log entry of an undo log page header create or reuse.
+@param[in] type MLOG_UNDO_HDR_CREATE or MLOG_UNDO_HDR_REUSE
+@param[in] ptr redo log record
+@param[in] end_ptr end of log buffer
+@param[in,out] page page frame or NULL
+@param[in,out] mtr mini-transaction or NULL
+@return end of log record or NULL */
byte*
trx_undo_parse_page_header(
-/*=======================*/
- ulint type, /*!< in: MLOG_UNDO_HDR_CREATE or MLOG_UNDO_HDR_REUSE */
- byte* ptr, /*!< in: buffer */
- byte* end_ptr,/*!< in: buffer end */
- page_t* page, /*!< in: page or NULL */
- mtr_t* mtr) /*!< in: mtr or NULL */
+ mlog_id_t type,
+ const byte* ptr,
+ const byte* end_ptr,
+ page_t* page,
+ mtr_t* mtr)
{
- trx_id_t trx_id;
- /* Silence a GCC warning about possibly uninitialized variable
- when mach_ull_parse_compressed() is not inlined. */
- ut_d(trx_id = 0);
- /* Declare the variable uninitialized in Valgrind, so that the
- above initialization will not mask any bugs. */
- UNIV_MEM_INVALID(&trx_id, sizeof trx_id);
-
- ptr = mach_ull_parse_compressed(ptr, end_ptr, &trx_id);
-
- if (ptr == NULL) {
+ trx_id_t trx_id = mach_u64_parse_compressed(&ptr, end_ptr);
- return(NULL);
- }
-
- if (page) {
- if (type == MLOG_UNDO_HDR_CREATE) {
+ if (ptr != NULL && page != NULL) {
+ switch (type) {
+ case MLOG_UNDO_HDR_CREATE:
trx_undo_header_create(page, trx_id, mtr);
- } else {
- ut_ad(type == MLOG_UNDO_HDR_REUSE);
+ return(const_cast<byte*>(ptr));
+ case MLOG_UNDO_HDR_REUSE:
trx_undo_insert_header_reuse(page, trx_id, mtr);
+ return(const_cast<byte*>(ptr));
+ default:
+ break;
}
+ ut_ad(0);
}
- return(ptr);
+ return(const_cast<byte*>(ptr));
}
/***************************************************************//**
Initializes a cached insert undo log header page for new use. NOTE that this
function has its own log record type MLOG_UNDO_HDR_REUSE. You must NOT change
the operation of this function!
-@return undo log header byte offset on page */
+@return undo log header byte offset on page */
static
ulint
trx_undo_insert_header_reuse(
@@ -806,122 +771,37 @@ trx_undo_insert_header_reuse(
return(free);
}
-#ifndef UNIV_HOTBACKUP
-/**********************************************************************//**
-Writes the redo log entry of an update undo log header discard. */
-UNIV_INLINE
-void
-trx_undo_discard_latest_log(
-/*========================*/
- page_t* undo_page, /*!< in: undo log header page */
- mtr_t* mtr) /*!< in: mtr */
-{
- mlog_write_initial_log_record(undo_page, MLOG_UNDO_HDR_DISCARD, mtr);
-}
-#else /* !UNIV_HOTBACKUP */
-# define trx_undo_discard_latest_log(undo_page, mtr) ((void) 0)
-#endif /* !UNIV_HOTBACKUP */
-
-/***********************************************************//**
-Parses the redo log entry of an undo log page header discard.
-@return end of log record or NULL */
-UNIV_INTERN
-byte*
-trx_undo_parse_discard_latest(
-/*==========================*/
- byte* ptr, /*!< in: buffer */
- byte* end_ptr MY_ATTRIBUTE((unused)), /*!< in: buffer end */
- page_t* page, /*!< in: page or NULL */
- mtr_t* mtr) /*!< in: mtr or NULL */
-{
- ut_ad(end_ptr);
-
- if (page) {
- trx_undo_discard_latest_update_undo(page, mtr);
- }
-
- return(ptr);
-}
-
-/**********************************************************************//**
-If an update undo log can be discarded immediately, this function frees the
-space, resetting the page to the proper state for caching. */
-static
-void
-trx_undo_discard_latest_update_undo(
-/*================================*/
- page_t* undo_page, /*!< in: header page of an undo log of size 1 */
- mtr_t* mtr) /*!< in: mtr */
-{
- trx_usegf_t* seg_hdr;
- trx_upagef_t* page_hdr;
- trx_ulogf_t* log_hdr;
- trx_ulogf_t* prev_log_hdr;
- ulint free;
- ulint prev_hdr_offset;
-
- seg_hdr = undo_page + TRX_UNDO_SEG_HDR;
- page_hdr = undo_page + TRX_UNDO_PAGE_HDR;
-
- free = mach_read_from_2(seg_hdr + TRX_UNDO_LAST_LOG);
- log_hdr = undo_page + free;
-
- prev_hdr_offset = mach_read_from_2(log_hdr + TRX_UNDO_PREV_LOG);
-
- if (prev_hdr_offset != 0) {
- prev_log_hdr = undo_page + prev_hdr_offset;
-
- mach_write_to_2(page_hdr + TRX_UNDO_PAGE_START,
- mach_read_from_2(prev_log_hdr
- + TRX_UNDO_LOG_START));
- mach_write_to_2(prev_log_hdr + TRX_UNDO_NEXT_LOG, 0);
- }
-
- mach_write_to_2(page_hdr + TRX_UNDO_PAGE_FREE, free);
-
- mach_write_to_2(seg_hdr + TRX_UNDO_STATE, TRX_UNDO_CACHED);
- mach_write_to_2(seg_hdr + TRX_UNDO_LAST_LOG, prev_hdr_offset);
-
- trx_undo_discard_latest_log(undo_page, mtr);
-}
-
-#ifndef UNIV_HOTBACKUP
-/********************************************************************//**
-Tries to add a page to the undo log segment where the undo log is placed.
-@return X-latched block if success, else NULL */
-UNIV_INTERN
+/** Allocate an undo log page.
+@param[in,out] trx transaction
+@param[in,out] undo undo log
+@param[in,out] mtr mini-transaction that does not hold any page latch
+@return X-latched block if success
+@retval NULL on failure */
buf_block_t*
-trx_undo_add_page(
-/*==============*/
- trx_t* trx, /*!< in: transaction */
- trx_undo_t* undo, /*!< in: undo log memory object */
- mtr_t* mtr) /*!< in: mtr which does not have a latch to any
- undo log page; the caller must have reserved
- the rollback segment mutex */
+trx_undo_add_page(trx_t* trx, trx_undo_t* undo, mtr_t* mtr)
{
- page_t* header_page;
- buf_block_t* new_block;
- page_t* new_page;
- trx_rseg_t* rseg;
- ulint n_reserved;
+ ut_ad(mutex_own(&trx->undo_mutex));
- ut_ad(mutex_own(&(trx->undo_mutex)));
- ut_ad(mutex_own(&(trx->rseg->mutex)));
+ trx_rseg_t* rseg = undo->rseg;
+ buf_block_t* new_block = NULL;
+ ulint n_reserved;
+ page_t* header_page;
- rseg = trx->rseg;
+ /* When we add a page to an undo log, this is analogous to
+ a pessimistic insert in a B-tree, and we must reserve the
+ counterpart of the tree latch, which is the rseg mutex. */
+ mutex_enter(&rseg->mutex);
if (rseg->curr_size == rseg->max_size) {
-
- return(NULL);
+ goto func_exit;
}
- header_page = trx_undo_page_get(undo->space, undo->zip_size,
- undo->hdr_page_no, mtr);
+ header_page = trx_undo_page_get(
+ page_id_t(undo->space, undo->hdr_page_no), mtr);
if (!fsp_reserve_free_extents(&n_reserved, undo->space, 1,
FSP_UNDO, mtr)) {
-
- return(NULL);
+ goto func_exit;
}
new_block = fseg_alloc_free_page_general(
@@ -931,32 +811,32 @@ trx_undo_add_page(
fil_space_release_free_extents(undo->space, n_reserved);
- if (new_block == NULL) {
-
- /* No space left */
-
- return(NULL);
+ if (!new_block) {
+ goto func_exit;
}
ut_ad(rw_lock_get_x_lock_count(&new_block->lock) == 1);
buf_block_dbg_add_level(new_block, SYNC_TRX_UNDO_PAGE);
- undo->last_page_no = buf_block_get_page_no(new_block);
-
- new_page = buf_block_get_frame(new_block);
+ undo->last_page_no = new_block->page.id.page_no();
- trx_undo_page_init(new_page, undo->type, mtr);
+ trx_undo_page_init(new_block->frame, undo->type, mtr);
- flst_add_last(header_page + TRX_UNDO_SEG_HDR + TRX_UNDO_PAGE_LIST,
- new_page + TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_NODE, mtr);
+ flst_add_last(TRX_UNDO_SEG_HDR + TRX_UNDO_PAGE_LIST
+ + header_page,
+ TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_NODE
+ + new_block->frame,
+ mtr);
undo->size++;
rseg->curr_size++;
+func_exit:
+ mutex_exit(&rseg->mutex);
return(new_block);
}
/********************************************************************//**
Frees an undo log page that is not the header page.
-@return last page number in remaining log */
+@return last page number in remaining log */
static
ulint
trx_undo_free_page(
@@ -977,30 +857,26 @@ trx_undo_free_page(
fil_addr_t last_addr;
trx_rsegf_t* rseg_header;
ulint hist_size;
- ulint zip_size;
ut_a(hdr_page_no != page_no);
ut_ad(mutex_own(&(rseg->mutex)));
- zip_size = rseg->zip_size;
-
- undo_page = trx_undo_page_get(space, zip_size, page_no, mtr);
+ undo_page = trx_undo_page_get(page_id_t(space, page_no), mtr);
- header_page = trx_undo_page_get(space, zip_size, hdr_page_no, mtr);
+ header_page = trx_undo_page_get(page_id_t(space, hdr_page_no), mtr);
flst_remove(header_page + TRX_UNDO_SEG_HDR + TRX_UNDO_PAGE_LIST,
undo_page + TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_NODE, mtr);
fseg_free_page(header_page + TRX_UNDO_SEG_HDR + TRX_UNDO_FSEG_HEADER,
- space, page_no, mtr);
+ space, page_no, false, mtr);
last_addr = flst_get_last(header_page + TRX_UNDO_SEG_HDR
+ TRX_UNDO_PAGE_LIST, mtr);
rseg->curr_size--;
if (in_history) {
- rseg_header = trx_rsegf_get(space, zip_size,
- rseg->page_no, mtr);
+ rseg_header = trx_rsegf_get(space, rseg->page_no, mtr);
hist_size = mtr_read_ulint(rseg_header + TRX_RSEG_HISTORY_SIZE,
MLOG_4BYTES, mtr);
@@ -1012,22 +888,13 @@ trx_undo_free_page(
return(last_addr.page);
}
-/********************************************************************//**
-Frees the last undo log page.
-The caller must hold the rollback segment mutex. */
-UNIV_INTERN
+/** Free the last undo log page. The caller must hold the rseg mutex.
+@param[in,out] undo undo log
+@param[in,out] mtr mini-transaction that does not hold any undo log page
+ or that has allocated the undo log page */
void
-trx_undo_free_last_page_func(
-/*==========================*/
-#ifdef UNIV_DEBUG
- const trx_t* trx, /*!< in: transaction */
-#endif /* UNIV_DEBUG */
- trx_undo_t* undo, /*!< in/out: undo log memory copy */
- mtr_t* mtr) /*!< in/out: mini-transaction which does not
- have a latch to any undo log page or which
- has allocated the undo log page */
+trx_undo_free_last_page(trx_undo_t* undo, mtr_t* mtr)
{
- ut_ad(mutex_own(&trx->undo_mutex));
ut_ad(undo->hdr_page_no != undo->last_page_no);
ut_ad(undo->size > 0);
@@ -1038,25 +905,26 @@ trx_undo_free_last_page_func(
undo->size--;
}
-/********************************************************************//**
-Empties an undo log header page of undo records for that undo log. Other
-undo logs may still have records on that page, if it is an update undo log. */
+/** Empties an undo log header page of undo records for that undo log.
+Other undo logs may still have records on that page, if it is an update
+undo log.
+@param[in] space space
+@param[in] hdr_page_no header page number
+@param[in] hdr_offset header offset
+@param[in,out] mtr mini-transaction */
static
void
trx_undo_empty_header_page(
-/*=======================*/
- ulint space, /*!< in: space */
- ulint zip_size, /*!< in: compressed page size in bytes
- or 0 for uncompressed pages */
- ulint hdr_page_no, /*!< in: header page number */
- ulint hdr_offset, /*!< in: header offset */
- mtr_t* mtr) /*!< in: mtr */
+ ulint space,
+ ulint hdr_page_no,
+ ulint hdr_offset,
+ mtr_t* mtr)
{
page_t* header_page;
trx_ulogf_t* log_hdr;
ulint end;
- header_page = trx_undo_page_get(space, zip_size, hdr_page_no, mtr);
+ header_page = trx_undo_page_get(page_id_t(space, hdr_page_no), mtr);
log_hdr = header_page + hdr_offset;
@@ -1065,41 +933,28 @@ trx_undo_empty_header_page(
mlog_write_ulint(log_hdr + TRX_UNDO_LOG_START, end, MLOG_2BYTES, mtr);
}
-/***********************************************************************//**
-Truncates an undo log from the end. This function is used during a rollback
-to free space from an undo log. */
-UNIV_INTERN
+/** Truncate the tail of an undo log during rollback.
+@param[in,out] undo undo log
+@param[in] limit all undo logs after this limit will be discarded
+@param[in] is_temp whether this is temporary undo log */
void
-trx_undo_truncate_end_func(
-/*=======================*/
-#ifdef UNIV_DEBUG
- const trx_t* trx, /*!< in: transaction whose undo log it is */
-#endif /* UNIV_DEBUG */
- trx_undo_t* undo, /*!< in: undo log */
- undo_no_t limit) /*!< in: all undo records with undo number
- >= this value should be truncated */
+trx_undo_truncate_end(trx_undo_t* undo, undo_no_t limit, bool is_temp)
{
- page_t* undo_page;
- ulint last_page_no;
- trx_undo_rec_t* rec;
- trx_undo_rec_t* trunc_here;
- mtr_t mtr;
-
- ut_ad(mutex_own(&(trx->undo_mutex)));
- ut_ad(mutex_own(&(trx->rseg->mutex)));
+ ut_ad(mutex_own(&undo->rseg->mutex));
+ ut_ad(is_temp == !undo->rseg->is_persistent());
for (;;) {
- mtr_start(&mtr);
-
- trunc_here = NULL;
-
- last_page_no = undo->last_page_no;
-
- undo_page = trx_undo_page_get(undo->space, undo->zip_size,
- last_page_no, &mtr);
+ mtr_t mtr;
+ mtr.start();
+ if (is_temp) {
+ mtr.set_log_mode(MTR_LOG_NO_REDO);
+ }
- rec = trx_undo_page_get_last_rec(undo_page, undo->hdr_page_no,
- undo->hdr_offset);
+ trx_undo_rec_t* trunc_here = NULL;
+ page_t* undo_page = trx_undo_page_get(
+ page_id_t(undo->space, undo->last_page_no), &mtr);
+ trx_undo_rec_t* rec = trx_undo_page_get_last_rec(
+ undo_page, undo->hdr_page_no, undo->hdr_offset);
while (rec) {
if (trx_undo_rec_get_undo_no(rec) >= limit) {
/* Truncate at least this record off, maybe
@@ -1114,45 +969,38 @@ trx_undo_truncate_end_func(
undo->hdr_offset);
}
- if (last_page_no == undo->hdr_page_no) {
+ if (undo->last_page_no == undo->hdr_page_no) {
+function_exit:
+ if (trunc_here) {
+ mlog_write_ulint(undo_page + TRX_UNDO_PAGE_HDR
+ + TRX_UNDO_PAGE_FREE,
+ trunc_here - undo_page,
+ MLOG_2BYTES, &mtr);
+ }
- goto function_exit;
+ mtr.commit();
+ return;
}
- ut_ad(last_page_no == undo->last_page_no);
- trx_undo_free_last_page(trx, undo, &mtr);
-
- mtr_commit(&mtr);
+ trx_undo_free_last_page(undo, &mtr);
+ mtr.commit();
}
-
-function_exit:
- if (trunc_here) {
- mlog_write_ulint(undo_page + TRX_UNDO_PAGE_HDR
- + TRX_UNDO_PAGE_FREE,
- trunc_here - undo_page, MLOG_2BYTES, &mtr);
- }
-
- mtr_commit(&mtr);
}
-/***********************************************************************//**
-Truncates an undo log from the start. This function is used during a purge
-operation. */
-UNIV_INTERN
+/** Truncate the head of an undo log.
+NOTE that only whole pages are freed; the header page is not
+freed, but emptied, if all the records there are below the limit.
+@param[in,out] rseg rollback segment
+@param[in] hdr_page_no header page number
+@param[in] hdr_offset header offset on the page
+@param[in] limit first undo number to preserve
+(everything below the limit will be truncated) */
void
trx_undo_truncate_start(
-/*====================*/
- trx_rseg_t* rseg, /*!< in: rollback segment */
- ulint space, /*!< in: space id of the log */
- ulint hdr_page_no, /*!< in: header page number */
- ulint hdr_offset, /*!< in: header offset on the page */
- undo_no_t limit) /*!< in: all undo pages with
- undo numbers < this value
- should be truncated; NOTE that
- the function only frees whole
- pages; the header page is not
- freed, but emptied, if all the
- records there are < limit */
+ trx_rseg_t* rseg,
+ ulint hdr_page_no,
+ ulint hdr_offset,
+ undo_no_t limit)
{
page_t* undo_page;
trx_undo_rec_t* rec;
@@ -1163,14 +1011,16 @@ trx_undo_truncate_start(
ut_ad(mutex_own(&(rseg->mutex)));
if (!limit) {
-
return;
}
loop:
mtr_start(&mtr);
- rec = trx_undo_get_first_rec(space, rseg->zip_size,
- hdr_page_no, hdr_offset,
+ if (!rseg->is_persistent()) {
+ mtr.set_log_mode(MTR_LOG_NO_REDO);
+ }
+
+ rec = trx_undo_get_first_rec(rseg->space, hdr_page_no, hdr_offset,
RW_X_LATCH, &mtr);
if (rec == NULL) {
/* Already empty */
@@ -1194,11 +1044,11 @@ loop:
page_no = page_get_page_no(undo_page);
if (page_no == hdr_page_no) {
- trx_undo_empty_header_page(space, rseg->zip_size,
+ trx_undo_empty_header_page(rseg->space,
hdr_page_no, hdr_offset,
&mtr);
} else {
- trx_undo_free_page(rseg, TRUE, space, hdr_page_no,
+ trx_undo_free_page(rseg, TRUE, rseg->space, hdr_page_no,
page_no, &mtr);
}
@@ -1207,13 +1057,14 @@ loop:
goto loop;
}
-/**********************************************************************//**
-Frees an undo log segment which is not in the history list. */
+/** Frees an undo log segment which is not in the history list.
+@param[in] undo undo log
+@param[in] noredo whether the undo tablespace is redo logged */
static
void
trx_undo_seg_free(
-/*==============*/
- trx_undo_t* undo) /*!< in: undo log */
+ const trx_undo_t* undo,
+ bool noredo)
{
trx_rseg_t* rseg;
fseg_header_t* file_seg;
@@ -1228,21 +1079,25 @@ trx_undo_seg_free(
mtr_start(&mtr);
+ if (noredo) {
+ mtr.set_log_mode(MTR_LOG_NO_REDO);
+ }
+
mutex_enter(&(rseg->mutex));
- seg_header = trx_undo_page_get(undo->space, undo->zip_size,
- undo->hdr_page_no,
- &mtr) + TRX_UNDO_SEG_HDR;
+ seg_header = trx_undo_page_get(page_id_t(undo->space,
+ undo->hdr_page_no),
+ &mtr)
+ + TRX_UNDO_SEG_HDR;
file_seg = seg_header + TRX_UNDO_FSEG_HEADER;
- finished = fseg_free_step(file_seg, &mtr);
+ finished = fseg_free_step(file_seg, false, &mtr);
if (finished) {
/* Update the rseg header */
rseg_header = trx_rsegf_get(
- rseg->space, rseg->zip_size, rseg->page_no,
- &mtr);
+ rseg->space, rseg->page_no, &mtr);
trx_rsegf_set_nth_undo(rseg_header, undo->id, FIL_NULL,
&mtr);
@@ -1260,7 +1115,7 @@ trx_undo_seg_free(
Creates and initializes an undo log memory object according to the values
in the header in file, when the database is started. The memory object is
inserted in the appropriate list of rseg.
-@return own: the undo log memory object */
+@return own: the undo log memory object */
static
trx_undo_t*
trx_undo_mem_create_at_db_start(
@@ -1285,14 +1140,9 @@ trx_undo_mem_create_at_db_start(
XID xid;
ibool xid_exists = FALSE;
- if (id >= TRX_RSEG_N_SLOTS) {
- fprintf(stderr,
- "InnoDB: Error: undo->id is %lu\n", (ulong) id);
- ut_error;
- }
+ ut_a(id < TRX_RSEG_N_SLOTS);
- undo_page = trx_undo_page_get(rseg->space, rseg->zip_size,
- page_no, mtr);
+ undo_page = trx_undo_page_get(page_id_t(rseg->space, page_no), mtr);
page_header = undo_page + TRX_UNDO_PAGE_HDR;
@@ -1313,7 +1163,6 @@ trx_undo_mem_create_at_db_start(
/* Read X/Open XA transaction identification if it exists, or
set it to NULL. */
-
xid.null();
if (xid_exists == TRUE) {
@@ -1331,7 +1180,7 @@ trx_undo_mem_create_at_db_start(
undo->table_id = mach_read_from_8(undo_header + TRX_UNDO_TABLE_ID);
undo->state = state;
- undo->size = flst_get_len(seg_header + TRX_UNDO_PAGE_LIST, mtr);
+ undo->size = flst_get_len(seg_header + TRX_UNDO_PAGE_LIST);
/* If the log segment is being freed, the page list is inconsistent! */
if (state == TRX_UNDO_TO_FREE) {
@@ -1344,8 +1193,8 @@ trx_undo_mem_create_at_db_start(
undo->last_page_no = last_addr.page;
undo->top_page_no = last_addr.page;
- last_page = trx_undo_page_get(rseg->space, rseg->zip_size,
- undo->last_page_no, mtr);
+ last_page = trx_undo_page_get(
+ page_id_t(rseg->space, undo->last_page_no), mtr);
rec = trx_undo_page_get_last_rec(last_page, page_no, offset);
@@ -1359,21 +1208,23 @@ trx_undo_mem_create_at_db_start(
add_to_list:
if (type == TRX_UNDO_INSERT) {
if (state != TRX_UNDO_CACHED) {
- UT_LIST_ADD_LAST(undo_list, rseg->insert_undo_list,
- undo);
+
+ UT_LIST_ADD_LAST(rseg->insert_undo_list, undo);
} else {
- UT_LIST_ADD_LAST(undo_list, rseg->insert_undo_cached,
- undo);
+
+ UT_LIST_ADD_LAST(rseg->insert_undo_cached, undo);
+
MONITOR_INC(MONITOR_NUM_UNDO_SLOT_CACHED);
}
} else {
ut_ad(type == TRX_UNDO_UPDATE);
if (state != TRX_UNDO_CACHED) {
- UT_LIST_ADD_LAST(undo_list, rseg->update_undo_list,
- undo);
+
+ UT_LIST_ADD_LAST(rseg->update_undo_list, undo);
} else {
- UT_LIST_ADD_LAST(undo_list, rseg->update_undo_cached,
- undo);
+
+ UT_LIST_ADD_LAST(rseg->update_undo_cached, undo);
+
MONITOR_INC(MONITOR_NUM_UNDO_SLOT_CACHED);
}
}
@@ -1385,8 +1236,7 @@ add_to_list:
Initializes the undo log lists for a rollback segment memory copy. This
function is only called when the database is started or a new rollback
segment is created.
-@return the combined size of undo log segments in pages */
-UNIV_INTERN
+@return the combined size of undo log segments in pages */
ulint
trx_undo_lists_init(
/*================*/
@@ -1397,15 +1247,9 @@ trx_undo_lists_init(
ulint i;
mtr_t mtr;
- UT_LIST_INIT(rseg->update_undo_list);
- UT_LIST_INIT(rseg->update_undo_cached);
- UT_LIST_INIT(rseg->insert_undo_list);
- UT_LIST_INIT(rseg->insert_undo_cached);
-
mtr_start(&mtr);
- rseg_header = trx_rsegf_get_new(
- rseg->space, rseg->zip_size, rseg->page_no, &mtr);
+ rseg_header = trx_rsegf_get_new(rseg->space, rseg->page_no, &mtr);
for (i = 0; i < TRX_RSEG_N_SLOTS; i++) {
ulint page_no;
@@ -1432,8 +1276,7 @@ trx_undo_lists_init(
mtr_start(&mtr);
rseg_header = trx_rsegf_get(
- rseg->space, rseg->zip_size, rseg->page_no,
- &mtr);
+ rseg->space, rseg->page_no, &mtr);
/* Found a used slot */
MONITOR_INC(MONITOR_NUM_UNDO_SLOT_USED);
@@ -1447,7 +1290,7 @@ trx_undo_lists_init(
/********************************************************************//**
Creates and initializes an undo log memory object.
-@return own: the undo log memory object */
+@return own: the undo log memory object */
static
trx_undo_t*
trx_undo_mem_create(
@@ -1466,13 +1309,9 @@ trx_undo_mem_create(
ut_ad(mutex_own(&(rseg->mutex)));
- if (id >= TRX_RSEG_N_SLOTS) {
- fprintf(stderr,
- "InnoDB: Error: undo->id is %lu\n", (ulong) id);
- ut_error;
- }
+ ut_a(id < TRX_RSEG_N_SLOTS);
- undo = static_cast<trx_undo_t*>(mem_alloc(sizeof(*undo)));
+ undo = static_cast<trx_undo_t*>(ut_malloc_nokey(sizeof(*undo)));
if (undo == NULL) {
@@ -1491,7 +1330,6 @@ trx_undo_mem_create(
undo->rseg = rseg;
undo->space = rseg->space;
- undo->zip_size = rseg->zip_size;
undo->hdr_page_no = page_no;
undo->hdr_offset = offset;
undo->last_page_no = page_no;
@@ -1500,6 +1338,7 @@ trx_undo_mem_create(
undo->empty = TRUE;
undo->top_page_no = page_no;
undo->guess_block = NULL;
+ undo->withdraw_clock = 0;
return(undo);
}
@@ -1518,13 +1357,7 @@ trx_undo_mem_init_for_reuse(
{
ut_ad(mutex_own(&((undo->rseg)->mutex)));
- if (UNIV_UNLIKELY(undo->id >= TRX_RSEG_N_SLOTS)) {
- fprintf(stderr, "InnoDB: Error: undo->id is %lu\n",
- (ulong) undo->id);
-
- mem_analyze_corruption(undo);
- ut_error;
- }
+ ut_a(undo->id < TRX_RSEG_N_SLOTS);
undo->state = TRX_UNDO_ACTIVE;
undo->del_marks = FALSE;
@@ -1539,19 +1372,14 @@ trx_undo_mem_init_for_reuse(
/********************************************************************//**
Frees an undo log memory copy. */
-UNIV_INTERN
void
trx_undo_mem_free(
/*==============*/
trx_undo_t* undo) /*!< in: the undo object to be freed */
{
- if (undo->id >= TRX_RSEG_N_SLOTS) {
- fprintf(stderr,
- "InnoDB: Error: undo->id is %lu\n", (ulong) undo->id);
- ut_error;
- }
+ ut_a(undo->id < TRX_RSEG_N_SLOTS);
- mem_free(undo);
+ ut_free(undo);
}
/**********************************************************************//**
@@ -1590,8 +1418,7 @@ trx_undo_create(
rseg->curr_size++;
- rseg_header = trx_rsegf_get(rseg->space, rseg->zip_size, rseg->page_no,
- mtr);
+ rseg_header = trx_rsegf_get(rseg->space, rseg->page_no, mtr);
err = trx_undo_seg_create(rseg, rseg_header, type, &id,
&undo_page, mtr);
@@ -1608,10 +1435,7 @@ trx_undo_create(
offset = trx_undo_header_create(undo_page, trx_id, mtr);
- if (trx->support_xa) {
- trx_undo_header_add_space_for_xid(undo_page,
- undo_page + offset, mtr);
- }
+ trx_undo_header_add_space_for_xid(undo_page, undo_page + offset, mtr);
*undo = trx_undo_mem_create(rseg, id, type, trx_id, xid,
page_no, offset);
@@ -1627,7 +1451,7 @@ trx_undo_create(
/********************************************************************//**
Reuses a cached undo log.
-@return the undo log memory object, NULL if none cached */
+@return the undo log memory object, NULL if none cached */
static
trx_undo_t*
trx_undo_reuse_cached(
@@ -1655,7 +1479,7 @@ trx_undo_reuse_cached(
return(NULL);
}
- UT_LIST_REMOVE(undo_list, rseg->insert_undo_cached, undo);
+ UT_LIST_REMOVE(rseg->insert_undo_cached, undo);
MONITOR_DEC(MONITOR_NUM_UNDO_SLOT_CACHED);
} else {
@@ -1667,30 +1491,22 @@ trx_undo_reuse_cached(
return(NULL);
}
- UT_LIST_REMOVE(undo_list, rseg->update_undo_cached, undo);
+ UT_LIST_REMOVE(rseg->update_undo_cached, undo);
MONITOR_DEC(MONITOR_NUM_UNDO_SLOT_CACHED);
}
ut_ad(undo->size == 1);
+ ut_a(undo->id < TRX_RSEG_N_SLOTS);
- if (undo->id >= TRX_RSEG_N_SLOTS) {
- fprintf(stderr, "InnoDB: Error: undo->id is %lu\n",
- (ulong) undo->id);
- mem_analyze_corruption(undo);
- ut_error;
- }
-
- undo_page = trx_undo_page_get(undo->space, undo->zip_size,
- undo->hdr_page_no, mtr);
+ undo_page = trx_undo_page_get(
+ page_id_t(undo->space, undo->hdr_page_no), mtr);
if (type == TRX_UNDO_INSERT) {
offset = trx_undo_insert_header_reuse(undo_page, trx_id, mtr);
- if (trx->support_xa) {
- trx_undo_header_add_space_for_xid(
- undo_page, undo_page + offset, mtr);
- }
+ trx_undo_header_add_space_for_xid(
+ undo_page, undo_page + offset, mtr);
} else {
ut_a(mach_read_from_2(undo_page + TRX_UNDO_PAGE_HDR
+ TRX_UNDO_PAGE_TYPE)
@@ -1698,10 +1514,8 @@ trx_undo_reuse_cached(
offset = trx_undo_header_create(undo_page, trx_id, mtr);
- if (trx->support_xa) {
- trx_undo_header_add_space_for_xid(
- undo_page, undo_page + offset, mtr);
- }
+ trx_undo_header_add_space_for_xid(
+ undo_page, undo_page + offset, mtr);
}
trx_undo_mem_init_for_reuse(undo, trx_id, xid, offset);
@@ -1709,21 +1523,17 @@ trx_undo_reuse_cached(
return(undo);
}
-/**********************************************************************//**
-Marks an undo log header as a header of a data dictionary operation
-transaction. */
-static
-void
-trx_undo_mark_as_dict_operation(
-/*============================*/
- trx_t* trx, /*!< in: dict op transaction */
- trx_undo_t* undo, /*!< in: assigned undo log */
- mtr_t* mtr) /*!< in: mtr */
+/** Mark that an undo log header belongs to a data dictionary transaction.
+@param[in] trx dictionary transaction
+@param[in,out] undo undo log
+@param[in,out] mtr mini-transaction */
+void trx_undo_mark_as_dict(const trx_t* trx, trx_undo_t* undo, mtr_t* mtr)
{
- page_t* hdr_page;
+ ut_ad(undo == trx->rsegs.m_redo.insert_undo
+ || undo == trx->rsegs.m_redo.update_undo);
- hdr_page = trx_undo_page_get(undo->space, undo->zip_size,
- undo->hdr_page_no, mtr);
+ page_t* hdr_page = trx_undo_page_get(
+ page_id_t(undo->space, undo->hdr_page_no), mtr);
switch (trx_get_dict_operation(trx)) {
case TRX_DICT_OP_NONE:
@@ -1747,35 +1557,43 @@ trx_undo_mark_as_dict_operation(
undo->dict_operation = TRUE;
}
-/**********************************************************************//**
-Assigns an undo log for a transaction. A new undo log is created or a cached
-undo log reused.
-@return DB_SUCCESS if undo log assign successful, possible error codes
-are: DB_TOO_MANY_CONCURRENT_TRXS DB_OUT_OF_FILE_SPACE DB_READ_ONLY
-DB_OUT_OF_MEMORY */
-UNIV_INTERN
+/** Assign an undo log for a transaction.
+A new undo log is created or a cached undo log reused.
+@param[in,out] trx transaction
+@param[in] rseg rollback segment
+@param[out] undo the undo log
+@param[in] type TRX_UNDO_INSERT or TRX_UNDO_UPDATE
+@retval DB_SUCCESS on success
+@retval DB_TOO_MANY_CONCURRENT_TRXS
+@retval DB_OUT_OF_FILE_SPACE
+@retval DB_READ_ONLY
+@retval DB_OUT_OF_MEMORY */
dberr_t
trx_undo_assign_undo(
-/*=================*/
- trx_t* trx, /*!< in: transaction */
- ulint type) /*!< in: TRX_UNDO_INSERT or TRX_UNDO_UPDATE */
+ trx_t* trx,
+ trx_rseg_t* rseg,
+ trx_undo_t** undo,
+ ulint type)
{
- trx_rseg_t* rseg;
- trx_undo_t* undo;
+ const bool is_temp = rseg == trx->rsegs.m_noredo.rseg;
mtr_t mtr;
dberr_t err = DB_SUCCESS;
- ut_ad(trx);
-
- if (trx->rseg == NULL) {
- return(DB_READ_ONLY);
- }
-
- rseg = trx->rseg;
+ ut_ad(mutex_own(&trx->undo_mutex));
+ ut_ad(rseg == trx->rsegs.m_redo.rseg
+ || rseg == trx->rsegs.m_noredo.rseg);
+ ut_ad(type == TRX_UNDO_INSERT || type == TRX_UNDO_UPDATE);
- ut_ad(mutex_own(&(trx->undo_mutex)));
+ mtr.start();
- mtr_start(&mtr);
+ if (is_temp) {
+ mtr.set_log_mode(MTR_LOG_NO_REDO);
+ ut_ad(undo == &trx->rsegs.m_noredo.undo);
+ } else {
+ ut_ad(undo == (type == TRX_UNDO_INSERT
+ ? &trx->rsegs.m_redo.insert_undo
+ : &trx->rsegs.m_redo.update_undo));
+ }
mutex_enter(&rseg->mutex);
@@ -1785,42 +1603,37 @@ trx_undo_assign_undo(
goto func_exit;
);
- undo = trx_undo_reuse_cached(trx, rseg, type, trx->id, &trx->xid,
+ *undo = trx_undo_reuse_cached(trx, rseg, type, trx->id, trx->xid,
&mtr);
- if (undo == NULL) {
- err = trx_undo_create(trx, rseg, type, trx->id, &trx->xid,
- &undo, &mtr);
+ if (*undo == NULL) {
+ err = trx_undo_create(trx, rseg, type, trx->id, trx->xid,
+ undo, &mtr);
if (err != DB_SUCCESS) {
-
goto func_exit;
}
}
- if (type == TRX_UNDO_INSERT) {
- UT_LIST_ADD_FIRST(undo_list, rseg->insert_undo_list, undo);
- ut_ad(trx->insert_undo == NULL);
- trx->insert_undo = undo;
+ if (is_temp) {
+ UT_LIST_ADD_FIRST(rseg->insert_undo_list, *undo);
} else {
- UT_LIST_ADD_FIRST(undo_list, rseg->update_undo_list, undo);
- ut_ad(trx->update_undo == NULL);
- trx->update_undo = undo;
- }
-
- if (trx_get_dict_operation(trx) != TRX_DICT_OP_NONE) {
- trx_undo_mark_as_dict_operation(trx, undo, &mtr);
+ UT_LIST_ADD_FIRST(type == TRX_UNDO_INSERT
+ ? rseg->insert_undo_list
+ : rseg->update_undo_list, *undo);
+ if (trx_get_dict_operation(trx) != TRX_DICT_OP_NONE) {
+ trx_undo_mark_as_dict(trx, *undo, &mtr);
+ }
}
func_exit:
- mutex_exit(&(rseg->mutex));
- mtr_commit(&mtr);
+ mutex_exit(&rseg->mutex);
+ mtr.commit();
return(err);
}
/******************************************************************//**
Sets the state of the undo log segment at a transaction finish.
-@return undo log segment header page, x-latched */
-UNIV_INTERN
+@return undo log segment header page, x-latched */
page_t*
trx_undo_set_state_at_finish(
/*=========================*/
@@ -1832,15 +1645,10 @@ trx_undo_set_state_at_finish(
page_t* undo_page;
ulint state;
- if (undo->id >= TRX_RSEG_N_SLOTS) {
- fprintf(stderr, "InnoDB: Error: undo->id is %lu\n",
- (ulong) undo->id);
- mem_analyze_corruption(undo);
- ut_error;
- }
+ ut_a(undo->id < TRX_RSEG_N_SLOTS);
- undo_page = trx_undo_page_get(undo->space, undo->zip_size,
- undo->hdr_page_no, mtr);
+ undo_page = trx_undo_page_get(
+ page_id_t(undo->space, undo->hdr_page_no), mtr);
seg_hdr = undo_page + TRX_UNDO_SEG_HDR;
page_hdr = undo_page + TRX_UNDO_PAGE_HDR;
@@ -1865,16 +1673,18 @@ trx_undo_set_state_at_finish(
return(undo_page);
}
-/******************************************************************//**
-Sets the state of the undo log segment at a transaction prepare.
-@return undo log segment header page, x-latched */
-UNIV_INTERN
+/** Set the state of the undo log segment at a XA PREPARE or XA ROLLBACK.
+@param[in,out] trx transaction
+@param[in,out] undo insert_undo or update_undo log
+@param[in] rollback false=XA PREPARE, true=XA ROLLBACK
+@param[in,out] mtr mini-transaction
+@return undo log segment header page, x-latched */
page_t*
trx_undo_set_state_at_prepare(
-/*==========================*/
- trx_t* trx, /*!< in: transaction */
- trx_undo_t* undo, /*!< in: undo log memory copy */
- mtr_t* mtr) /*!< in: mtr */
+ trx_t* trx,
+ trx_undo_t* undo,
+ bool rollback,
+ mtr_t* mtr)
{
trx_usegf_t* seg_hdr;
trx_ulogf_t* undo_header;
@@ -1883,21 +1693,24 @@ trx_undo_set_state_at_prepare(
ut_ad(trx && undo && mtr);
- if (undo->id >= TRX_RSEG_N_SLOTS) {
- fprintf(stderr, "InnoDB: Error: undo->id is %lu\n",
- (ulong) undo->id);
- mem_analyze_corruption(undo);
- ut_error;
- }
+ ut_a(undo->id < TRX_RSEG_N_SLOTS);
- undo_page = trx_undo_page_get(undo->space, undo->zip_size,
- undo->hdr_page_no, mtr);
+ undo_page = trx_undo_page_get(
+ page_id_t(undo->space, undo->hdr_page_no), mtr);
seg_hdr = undo_page + TRX_UNDO_SEG_HDR;
+ if (rollback) {
+ ut_ad(undo->state == TRX_UNDO_PREPARED);
+ mlog_write_ulint(seg_hdr + TRX_UNDO_STATE, TRX_UNDO_ACTIVE,
+ MLOG_2BYTES, mtr);
+ return(undo_page);
+ }
+
/*------------------------------*/
+ ut_ad(undo->state == TRX_UNDO_ACTIVE);
undo->state = TRX_UNDO_PREPARED;
- undo->xid = trx->xid;
+ undo->xid = *trx->xid;
/*------------------------------*/
mlog_write_ulint(seg_hdr + TRX_UNDO_STATE, undo->state,
@@ -1918,32 +1731,29 @@ trx_undo_set_state_at_prepare(
Adds the update undo log header as the first in the history list, and
frees the memory object, or puts it to the list of cached update undo log
segments. */
-UNIV_INTERN
void
trx_undo_update_cleanup(
/*====================*/
- trx_t* trx, /*!< in: trx owning the update undo log */
- page_t* undo_page, /*!< in: update undo log header page,
- x-latched */
- mtr_t* mtr) /*!< in: mtr */
+ trx_t* trx, /*!< in: trx owning the update
+ undo log */
+ page_t* undo_page, /*!< in: update undo log header page,
+ x-latched */
+ mtr_t* mtr) /*!< in: mtr */
{
- trx_rseg_t* rseg;
- trx_undo_t* undo;
+ trx_undo_t* undo = trx->rsegs.m_redo.update_undo;
+ trx_rseg_t* rseg = undo->rseg;
- undo = trx->update_undo;
- rseg = trx->rseg;
-
- ut_ad(mutex_own(&(rseg->mutex)));
+ ut_ad(mutex_own(&rseg->mutex));
trx_purge_add_update_undo_to_history(trx, undo_page, mtr);
- UT_LIST_REMOVE(undo_list, rseg->update_undo_list, undo);
+ UT_LIST_REMOVE(rseg->update_undo_list, undo);
- trx->update_undo = NULL;
+ trx->rsegs.m_redo.update_undo = NULL;
if (undo->state == TRX_UNDO_CACHED) {
- UT_LIST_ADD_FIRST(undo_list, rseg->update_undo_cached, undo);
+ UT_LIST_ADD_FIRST(rseg->update_undo_cached, undo);
MONITOR_INC(MONITOR_NUM_UNDO_SLOT_CACHED);
} else {
@@ -1953,60 +1763,45 @@ trx_undo_update_cleanup(
}
}
-/******************************************************************//**
-Frees or caches an insert undo log after a transaction commit or rollback.
-Knowledge of inserts is not needed after a commit or rollback, therefore
-the data can be discarded. */
-UNIV_INTERN
+/** Free an insert or temporary undo log after commit or rollback.
+The information is not needed after a commit or rollback, therefore
+the data can be discarded.
+@param[in,out] undo undo log
+@param[in] is_temp whether this is temporary undo log */
void
-trx_undo_insert_cleanup(
-/*====================*/
- trx_t* trx) /*!< in: transaction handle */
+trx_undo_commit_cleanup(trx_undo_t* undo, bool is_temp)
{
- trx_undo_t* undo;
- trx_rseg_t* rseg;
-
- undo = trx->insert_undo;
- ut_ad(undo);
+ trx_rseg_t* rseg = undo->rseg;
+ ut_ad(is_temp == !rseg->is_persistent());
- rseg = trx->rseg;
-
- mutex_enter(&(rseg->mutex));
+ mutex_enter(&rseg->mutex);
- UT_LIST_REMOVE(undo_list, rseg->insert_undo_list, undo);
- trx->insert_undo = NULL;
+ UT_LIST_REMOVE(rseg->insert_undo_list, undo);
if (undo->state == TRX_UNDO_CACHED) {
-
- UT_LIST_ADD_FIRST(undo_list, rseg->insert_undo_cached, undo);
-
+ UT_LIST_ADD_FIRST(rseg->insert_undo_cached, undo);
MONITOR_INC(MONITOR_NUM_UNDO_SLOT_CACHED);
} else {
ut_ad(undo->state == TRX_UNDO_TO_FREE);
/* Delete first the undo log segment in the file */
-
- mutex_exit(&(rseg->mutex));
-
+ mutex_exit(&rseg->mutex);
if (!srv_read_only_mode) {
- trx_undo_seg_free(undo);
+ trx_undo_seg_free(undo, is_temp);
}
-
- mutex_enter(&(rseg->mutex));
+ mutex_enter(&rseg->mutex);
ut_ad(rseg->curr_size > undo->size);
-
rseg->curr_size -= undo->size;
trx_undo_mem_free(undo);
}
- mutex_exit(&(rseg->mutex));
+ mutex_exit(&rseg->mutex);
}
/********************************************************************//**
At shutdown, frees the undo logs of a PREPARED transaction. */
-UNIV_INTERN
void
trx_undo_free_prepared(
/*===================*/
@@ -2014,8 +1809,8 @@ trx_undo_free_prepared(
{
ut_ad(srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS);
- if (trx->update_undo) {
- switch (trx->update_undo->state) {
+ if (trx->rsegs.m_redo.update_undo) {
+ switch (trx->rsegs.m_redo.update_undo->state) {
case TRX_UNDO_PREPARED:
break;
case TRX_UNDO_CACHED:
@@ -2025,12 +1820,13 @@ trx_undo_free_prepared(
TRX_STATE_COMMITTED_IN_MEMORY));
/* fall through */
case TRX_UNDO_ACTIVE:
- /* lock_trx_release_locks() assigns
+ /* trx_t::commit_state() assigns
trx->is_recovered=false and
trx->state = TRX_STATE_COMMITTED_IN_MEMORY,
also for transactions that we faked
to TRX_STATE_PREPARED in trx_rollback_resurrected(). */
- ut_a(srv_read_only_mode
+ ut_a(!srv_was_started
+ || srv_read_only_mode
|| srv_force_recovery >= SRV_FORCE_NO_TRX_UNDO
|| srv_fast_shutdown);
break;
@@ -2038,12 +1834,15 @@ trx_undo_free_prepared(
ut_error;
}
- UT_LIST_REMOVE(undo_list, trx->rseg->update_undo_list,
- trx->update_undo);
- trx_undo_mem_free(trx->update_undo);
+ UT_LIST_REMOVE(trx->rsegs.m_redo.rseg->update_undo_list,
+ trx->rsegs.m_redo.update_undo);
+ trx_undo_mem_free(trx->rsegs.m_redo.update_undo);
+
+ trx->rsegs.m_redo.update_undo = NULL;
}
- if (trx->insert_undo) {
- switch (trx->insert_undo->state) {
+
+ if (trx->rsegs.m_redo.insert_undo) {
+ switch (trx->rsegs.m_redo.insert_undo->state) {
case TRX_UNDO_PREPARED:
break;
case TRX_UNDO_CACHED:
@@ -2053,12 +1852,13 @@ trx_undo_free_prepared(
TRX_STATE_COMMITTED_IN_MEMORY));
/* fall through */
case TRX_UNDO_ACTIVE:
- /* lock_trx_release_locks() assigns
+ /* trx_t::commit_state() assigns
trx->is_recovered=false and
trx->state = TRX_STATE_COMMITTED_IN_MEMORY,
also for transactions that we faked
to TRX_STATE_PREPARED in trx_rollback_resurrected(). */
- ut_a(srv_read_only_mode
+ ut_a(!srv_was_started
+ || srv_read_only_mode
|| srv_force_recovery >= SRV_FORCE_NO_TRX_UNDO
|| srv_fast_shutdown);
break;
@@ -2066,9 +1866,19 @@ trx_undo_free_prepared(
ut_error;
}
- UT_LIST_REMOVE(undo_list, trx->rseg->insert_undo_list,
- trx->insert_undo);
- trx_undo_mem_free(trx->insert_undo);
+ UT_LIST_REMOVE(trx->rsegs.m_redo.rseg->insert_undo_list,
+ trx->rsegs.m_redo.insert_undo);
+ trx_undo_mem_free(trx->rsegs.m_redo.insert_undo);
+
+ trx->rsegs.m_redo.insert_undo = NULL;
+ }
+
+ if (trx_undo_t*& undo = trx->rsegs.m_noredo.undo) {
+ ut_a(undo->state == TRX_UNDO_PREPARED);
+
+ UT_LIST_REMOVE(trx->rsegs.m_noredo.rseg->insert_undo_list,
+ undo);
+ trx_undo_mem_free(undo);
+ undo = NULL;
}
}
-#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innobase/usr/usr0sess.cc b/storage/innobase/usr/usr0sess.cc
deleted file mode 100644
index 911a3c91ca2..00000000000
--- a/storage/innobase/usr/usr0sess.cc
+++ /dev/null
@@ -1,67 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1996, 2011, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2017, MariaDB Corporation.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file usr/usr0sess.cc
-Sessions
-
-Created 6/25/1996 Heikki Tuuri
-*******************************************************/
-
-#include "usr0sess.h"
-
-#ifdef UNIV_NONINL
-#include "usr0sess.ic"
-#endif
-
-#include "trx0trx.h"
-
-/*********************************************************************//**
-Opens a session.
-@return own: session object */
-UNIV_INTERN
-sess_t*
-sess_open(void)
-/*===========*/
-{
- sess_t* sess;
-
- sess = static_cast<sess_t*>(mem_zalloc(sizeof(*sess)));
-
- sess->state = SESS_ACTIVE;
-
- sess->trx = trx_allocate_for_background();
- sess->trx->sess = sess;
-
- return(sess);
-}
-
-/*********************************************************************//**
-Closes a session, freeing the memory occupied by it. */
-UNIV_INTERN
-void
-sess_close(
-/*=======*/
- sess_t* sess) /*!< in, own: session object */
-{
- ut_a(UT_LIST_GET_LEN(sess->graphs) == 0);
-
- trx_free_for_background(sess->trx);
- mem_free(sess);
-}
diff --git a/storage/innobase/ut/crc32_power8/crc32.S b/storage/innobase/ut/crc32_power8/crc32.S
deleted file mode 100644
index b064ce3dc96..00000000000
--- a/storage/innobase/ut/crc32_power8/crc32.S
+++ /dev/null
@@ -1,775 +0,0 @@
-/*
- * Calculate the checksum of data that is 16 byte aligned and a multiple of
- * 16 bytes.
- *
- * The first step is to reduce it to 1024 bits. We do this in 8 parallel
- * chunks in order to mask the latency of the vpmsum instructions. If we
- * have more than 32 kB of data to checksum we repeat this step multiple
- * times, passing in the previous 1024 bits.
- *
- * The next step is to reduce the 1024 bits to 64 bits. This step adds
- * 32 bits of 0s to the end - this matches what a CRC does. We just
- * calculate constants that land the data in this 32 bits.
- *
- * We then use fixed point Barrett reduction to compute a mod n over GF(2)
- * for n = CRC using POWER8 instructions. We use x = 32.
- *
- * http://en.wikipedia.org/wiki/Barrett_reduction
- *
- * Copyright (C) 2015 Anton Blanchard <anton@au.ibm.com>, IBM
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#ifdef __powerpc__
-
-#include <ppc-asm.h>
-#include "ppc-opcode.h"
-
-#undef toc
-
-#ifndef r1
-#define r1 1
-#endif
-
-#ifndef r2
-#define r2 2
-#endif
-
- .section .rodata
-.balign 16
-
-.byteswap_constant:
- /* byte reverse permute constant */
- .octa 0x0F0E0D0C0B0A09080706050403020100
-
-#define __ASSEMBLY__
-#include "crc32_constants.h"
-
- .text
-
-#if defined(__BIG_ENDIAN__) && defined(REFLECT)
-#define BYTESWAP_DATA
-#elif defined(__LITTLE_ENDIAN__) && !defined(REFLECT)
-#define BYTESWAP_DATA
-#else
-#undef BYTESWAP_DATA
-#endif
-
-#define off16 r25
-#define off32 r26
-#define off48 r27
-#define off64 r28
-#define off80 r29
-#define off96 r30
-#define off112 r31
-
-#define const1 v24
-#define const2 v25
-
-#define byteswap v26
-#define mask_32bit v27
-#define mask_64bit v28
-#define zeroes v29
-
-#ifdef BYTESWAP_DATA
-#define VPERM(A, B, C, D) vperm A, B, C, D
-#else
-#define VPERM(A, B, C, D)
-#endif
-
-/* unsigned int __crc32_vpmsum(unsigned int crc, void *p, unsigned long len) */
-FUNC_START(__crc32_vpmsum)
- std r31,-8(r1)
- std r30,-16(r1)
- std r29,-24(r1)
- std r28,-32(r1)
- std r27,-40(r1)
- std r26,-48(r1)
- std r25,-56(r1)
-
- li off16,16
- li off32,32
- li off48,48
- li off64,64
- li off80,80
- li off96,96
- li off112,112
- li r0,0
-
- /* Enough room for saving 10 non volatile VMX registers */
- subi r6,r1,56+10*16
- subi r7,r1,56+2*16
-
- stvx v20,0,r6
- stvx v21,off16,r6
- stvx v22,off32,r6
- stvx v23,off48,r6
- stvx v24,off64,r6
- stvx v25,off80,r6
- stvx v26,off96,r6
- stvx v27,off112,r6
- stvx v28,0,r7
- stvx v29,off16,r7
-
- mr r10,r3
-
- vxor zeroes,zeroes,zeroes
- vspltisw v0,-1
-
- vsldoi mask_32bit,zeroes,v0,4
- vsldoi mask_64bit,zeroes,v0,8
-
- /* Get the initial value into v8 */
- vxor v8,v8,v8
- MTVRD(v8, r3)
-#ifdef REFLECT
- vsldoi v8,zeroes,v8,8 /* shift into bottom 32 bits */
-#else
- vsldoi v8,v8,zeroes,4 /* shift into top 32 bits */
-#endif
-
-#ifdef BYTESWAP_DATA
- addis r3,r2,.byteswap_constant@toc@ha
- addi r3,r3,.byteswap_constant@toc@l
-
- lvx byteswap,0,r3
- addi r3,r3,16
-#endif
-
- cmpdi r5,256
- blt .Lshort
-
- rldicr r6,r5,0,56
-
- /* Checksum in blocks of MAX_SIZE */
-1: lis r7,MAX_SIZE@h
- ori r7,r7,MAX_SIZE@l
- mr r9,r7
- cmpd r6,r7
- bgt 2f
- mr r7,r6
-2: subf r6,r7,r6
-
- /* our main loop does 128 bytes at a time */
- srdi r7,r7,7
-
- /*
- * Work out the offset into the constants table to start at. Each
- * constant is 16 bytes, and it is used against 128 bytes of input
- * data - 128 / 16 = 8
- */
- sldi r8,r7,4
- srdi r9,r9,3
- subf r8,r8,r9
-
- /* We reduce our final 128 bytes in a separate step */
- addi r7,r7,-1
- mtctr r7
-
- addis r3,r2,.constants@toc@ha
- addi r3,r3,.constants@toc@l
-
- /* Find the start of our constants */
- add r3,r3,r8
-
- /* zero v0-v7 which will contain our checksums */
- vxor v0,v0,v0
- vxor v1,v1,v1
- vxor v2,v2,v2
- vxor v3,v3,v3
- vxor v4,v4,v4
- vxor v5,v5,v5
- vxor v6,v6,v6
- vxor v7,v7,v7
-
- lvx const1,0,r3
-
- /*
- * If we are looping back to consume more data we use the values
- * already in v16-v23.
- */
- cmpdi r0,1
- beq 2f
-
- /* First warm up pass */
- lvx v16,0,r4
- lvx v17,off16,r4
- VPERM(v16,v16,v16,byteswap)
- VPERM(v17,v17,v17,byteswap)
- lvx v18,off32,r4
- lvx v19,off48,r4
- VPERM(v18,v18,v18,byteswap)
- VPERM(v19,v19,v19,byteswap)
- lvx v20,off64,r4
- lvx v21,off80,r4
- VPERM(v20,v20,v20,byteswap)
- VPERM(v21,v21,v21,byteswap)
- lvx v22,off96,r4
- lvx v23,off112,r4
- VPERM(v22,v22,v22,byteswap)
- VPERM(v23,v23,v23,byteswap)
- addi r4,r4,8*16
-
- /* xor in initial value */
- vxor v16,v16,v8
-
-2: bdz .Lfirst_warm_up_done
-
- addi r3,r3,16
- lvx const2,0,r3
-
- /* Second warm up pass */
- VPMSUMD(v8,v16,const1)
- lvx v16,0,r4
- VPERM(v16,v16,v16,byteswap)
- ori r2,r2,0
-
- VPMSUMD(v9,v17,const1)
- lvx v17,off16,r4
- VPERM(v17,v17,v17,byteswap)
- ori r2,r2,0
-
- VPMSUMD(v10,v18,const1)
- lvx v18,off32,r4
- VPERM(v18,v18,v18,byteswap)
- ori r2,r2,0
-
- VPMSUMD(v11,v19,const1)
- lvx v19,off48,r4
- VPERM(v19,v19,v19,byteswap)
- ori r2,r2,0
-
- VPMSUMD(v12,v20,const1)
- lvx v20,off64,r4
- VPERM(v20,v20,v20,byteswap)
- ori r2,r2,0
-
- VPMSUMD(v13,v21,const1)
- lvx v21,off80,r4
- VPERM(v21,v21,v21,byteswap)
- ori r2,r2,0
-
- VPMSUMD(v14,v22,const1)
- lvx v22,off96,r4
- VPERM(v22,v22,v22,byteswap)
- ori r2,r2,0
-
- VPMSUMD(v15,v23,const1)
- lvx v23,off112,r4
- VPERM(v23,v23,v23,byteswap)
-
- addi r4,r4,8*16
-
- bdz .Lfirst_cool_down
-
- /*
- * main loop. We modulo schedule it such that it takes three iterations
- * to complete - first iteration load, second iteration vpmsum, third
- * iteration xor.
- */
- .balign 16
-4: lvx const1,0,r3
- addi r3,r3,16
- ori r2,r2,0
-
- vxor v0,v0,v8
- VPMSUMD(v8,v16,const2)
- lvx v16,0,r4
- VPERM(v16,v16,v16,byteswap)
- ori r2,r2,0
-
- vxor v1,v1,v9
- VPMSUMD(v9,v17,const2)
- lvx v17,off16,r4
- VPERM(v17,v17,v17,byteswap)
- ori r2,r2,0
-
- vxor v2,v2,v10
- VPMSUMD(v10,v18,const2)
- lvx v18,off32,r4
- VPERM(v18,v18,v18,byteswap)
- ori r2,r2,0
-
- vxor v3,v3,v11
- VPMSUMD(v11,v19,const2)
- lvx v19,off48,r4
- VPERM(v19,v19,v19,byteswap)
- lvx const2,0,r3
- ori r2,r2,0
-
- vxor v4,v4,v12
- VPMSUMD(v12,v20,const1)
- lvx v20,off64,r4
- VPERM(v20,v20,v20,byteswap)
- ori r2,r2,0
-
- vxor v5,v5,v13
- VPMSUMD(v13,v21,const1)
- lvx v21,off80,r4
- VPERM(v21,v21,v21,byteswap)
- ori r2,r2,0
-
- vxor v6,v6,v14
- VPMSUMD(v14,v22,const1)
- lvx v22,off96,r4
- VPERM(v22,v22,v22,byteswap)
- ori r2,r2,0
-
- vxor v7,v7,v15
- VPMSUMD(v15,v23,const1)
- lvx v23,off112,r4
- VPERM(v23,v23,v23,byteswap)
-
- addi r4,r4,8*16
-
- bdnz 4b
-
-.Lfirst_cool_down:
- /* First cool down pass */
- lvx const1,0,r3
- addi r3,r3,16
-
- vxor v0,v0,v8
- VPMSUMD(v8,v16,const1)
- ori r2,r2,0
-
- vxor v1,v1,v9
- VPMSUMD(v9,v17,const1)
- ori r2,r2,0
-
- vxor v2,v2,v10
- VPMSUMD(v10,v18,const1)
- ori r2,r2,0
-
- vxor v3,v3,v11
- VPMSUMD(v11,v19,const1)
- ori r2,r2,0
-
- vxor v4,v4,v12
- VPMSUMD(v12,v20,const1)
- ori r2,r2,0
-
- vxor v5,v5,v13
- VPMSUMD(v13,v21,const1)
- ori r2,r2,0
-
- vxor v6,v6,v14
- VPMSUMD(v14,v22,const1)
- ori r2,r2,0
-
- vxor v7,v7,v15
- VPMSUMD(v15,v23,const1)
- ori r2,r2,0
-
-.Lsecond_cool_down:
- /* Second cool down pass */
- vxor v0,v0,v8
- vxor v1,v1,v9
- vxor v2,v2,v10
- vxor v3,v3,v11
- vxor v4,v4,v12
- vxor v5,v5,v13
- vxor v6,v6,v14
- vxor v7,v7,v15
-
-#ifdef REFLECT
- /*
- * vpmsumd produces a 96 bit result in the least significant bits
- * of the register. Since we are bit reflected we have to shift it
- * left 32 bits so it occupies the least significant bits in the
- * bit reflected domain.
- */
- vsldoi v0,v0,zeroes,4
- vsldoi v1,v1,zeroes,4
- vsldoi v2,v2,zeroes,4
- vsldoi v3,v3,zeroes,4
- vsldoi v4,v4,zeroes,4
- vsldoi v5,v5,zeroes,4
- vsldoi v6,v6,zeroes,4
- vsldoi v7,v7,zeroes,4
-#endif
-
- /* xor with last 1024 bits */
- lvx v8,0,r4
- lvx v9,off16,r4
- VPERM(v8,v8,v8,byteswap)
- VPERM(v9,v9,v9,byteswap)
- lvx v10,off32,r4
- lvx v11,off48,r4
- VPERM(v10,v10,v10,byteswap)
- VPERM(v11,v11,v11,byteswap)
- lvx v12,off64,r4
- lvx v13,off80,r4
- VPERM(v12,v12,v12,byteswap)
- VPERM(v13,v13,v13,byteswap)
- lvx v14,off96,r4
- lvx v15,off112,r4
- VPERM(v14,v14,v14,byteswap)
- VPERM(v15,v15,v15,byteswap)
-
- addi r4,r4,8*16
-
- vxor v16,v0,v8
- vxor v17,v1,v9
- vxor v18,v2,v10
- vxor v19,v3,v11
- vxor v20,v4,v12
- vxor v21,v5,v13
- vxor v22,v6,v14
- vxor v23,v7,v15
-
- li r0,1
- cmpdi r6,0
- addi r6,r6,128
- bne 1b
-
- /* Work out how many bytes we have left */
- andi. r5,r5,127
-
- /* Calculate where in the constant table we need to start */
- subfic r6,r5,128
- add r3,r3,r6
-
- /* How many 16 byte chunks are in the tail */
- srdi r7,r5,4
- mtctr r7
-
- /*
- * Reduce the previously calculated 1024 bits to 64 bits, shifting
- * 32 bits to include the trailing 32 bits of zeros
- */
- lvx v0,0,r3
- lvx v1,off16,r3
- lvx v2,off32,r3
- lvx v3,off48,r3
- lvx v4,off64,r3
- lvx v5,off80,r3
- lvx v6,off96,r3
- lvx v7,off112,r3
- addi r3,r3,8*16
-
- VPMSUMW(v0,v16,v0)
- VPMSUMW(v1,v17,v1)
- VPMSUMW(v2,v18,v2)
- VPMSUMW(v3,v19,v3)
- VPMSUMW(v4,v20,v4)
- VPMSUMW(v5,v21,v5)
- VPMSUMW(v6,v22,v6)
- VPMSUMW(v7,v23,v7)
-
- /* Now reduce the tail (0 - 112 bytes) */
- cmpdi r7,0
- beq 1f
-
- lvx v16,0,r4
- lvx v17,0,r3
- VPERM(v16,v16,v16,byteswap)
- VPMSUMW(v16,v16,v17)
- vxor v0,v0,v16
- bdz 1f
-
- lvx v16,off16,r4
- lvx v17,off16,r3
- VPERM(v16,v16,v16,byteswap)
- VPMSUMW(v16,v16,v17)
- vxor v0,v0,v16
- bdz 1f
-
- lvx v16,off32,r4
- lvx v17,off32,r3
- VPERM(v16,v16,v16,byteswap)
- VPMSUMW(v16,v16,v17)
- vxor v0,v0,v16
- bdz 1f
-
- lvx v16,off48,r4
- lvx v17,off48,r3
- VPERM(v16,v16,v16,byteswap)
- VPMSUMW(v16,v16,v17)
- vxor v0,v0,v16
- bdz 1f
-
- lvx v16,off64,r4
- lvx v17,off64,r3
- VPERM(v16,v16,v16,byteswap)
- VPMSUMW(v16,v16,v17)
- vxor v0,v0,v16
- bdz 1f
-
- lvx v16,off80,r4
- lvx v17,off80,r3
- VPERM(v16,v16,v16,byteswap)
- VPMSUMW(v16,v16,v17)
- vxor v0,v0,v16
- bdz 1f
-
- lvx v16,off96,r4
- lvx v17,off96,r3
- VPERM(v16,v16,v16,byteswap)
- VPMSUMW(v16,v16,v17)
- vxor v0,v0,v16
-
- /* Now xor all the parallel chunks together */
-1: vxor v0,v0,v1
- vxor v2,v2,v3
- vxor v4,v4,v5
- vxor v6,v6,v7
-
- vxor v0,v0,v2
- vxor v4,v4,v6
-
- vxor v0,v0,v4
-
-.Lbarrett_reduction:
- /* Barrett constants */
- addis r3,r2,.barrett_constants@toc@ha
- addi r3,r3,.barrett_constants@toc@l
-
- lvx const1,0,r3
- lvx const2,off16,r3
-
- vsldoi v1,v0,v0,8
- vxor v0,v0,v1 /* xor two 64 bit results together */
-
-#ifdef REFLECT
- /* shift left one bit */
- vspltisb v1,1
- vsl v0,v0,v1
-#endif
-
- vand v0,v0,mask_64bit
-
-#ifndef REFLECT
- /*
- * Now for the Barrett reduction algorithm. The idea is to calculate q,
- * the multiple of our polynomial that we need to subtract. By
- * doing the computation 2x bits higher (ie 64 bits) and shifting the
- * result back down 2x bits, we round down to the nearest multiple.
- */
- VPMSUMD(v1,v0,const1) /* ma */
- vsldoi v1,zeroes,v1,8 /* q = floor(ma/(2^64)) */
- VPMSUMD(v1,v1,const2) /* qn */
- vxor v0,v0,v1 /* a - qn, subtraction is xor in GF(2) */
-
- /*
- * Get the result into r3. We need to shift it left 8 bytes:
- * V0 [ 0 1 2 X ]
- * V0 [ 0 X 2 3 ]
- */
- vsldoi v0,v0,zeroes,8 /* shift result into top 64 bits */
-#else
- /*
- * The reflected version of Barrett reduction. Instead of bit
- * reflecting our data (which is expensive to do), we bit reflect our
- * constants and our algorithm, which means the intermediate data in
- * our vector registers goes from 0-63 instead of 63-0. We can reflect
- * the algorithm because we don't carry in mod 2 arithmetic.
- */
- vand v1,v0,mask_32bit /* bottom 32 bits of a */
- VPMSUMD(v1,v1,const1) /* ma */
- vand v1,v1,mask_32bit /* bottom 32bits of ma */
- VPMSUMD(v1,v1,const2) /* qn */
- vxor v0,v0,v1 /* a - qn, subtraction is xor in GF(2) */
-
- /*
- * Since we are bit reflected, the result (ie the low 32 bits) is in
- * the high 32 bits. We just need to shift it left 4 bytes
- * V0 [ 0 1 X 3 ]
- * V0 [ 0 X 2 3 ]
- */
- vsldoi v0,v0,zeroes,4 /* shift result into top 64 bits of */
-#endif
-
-.Lout:
- subi r6,r1,56+10*16
- subi r7,r1,56+2*16
-
- lvx v20,0,r6
- lvx v21,off16,r6
- lvx v22,off32,r6
- lvx v23,off48,r6
- lvx v24,off64,r6
- lvx v25,off80,r6
- lvx v26,off96,r6
- lvx v27,off112,r6
- lvx v28,0,r7
- lvx v29,off16,r7
-
- /* Get it into r3 */
- MFVRD(r3, v0)
-
- ld r31,-8(r1)
- ld r30,-16(r1)
- ld r29,-24(r1)
- ld r28,-32(r1)
- ld r27,-40(r1)
- ld r26,-48(r1)
- ld r25,-56(r1)
-
- blr
-
-.Lfirst_warm_up_done:
- lvx const1,0,r3
- addi r3,r3,16
-
- VPMSUMD(v8,v16,const1)
- VPMSUMD(v9,v17,const1)
- VPMSUMD(v10,v18,const1)
- VPMSUMD(v11,v19,const1)
- VPMSUMD(v12,v20,const1)
- VPMSUMD(v13,v21,const1)
- VPMSUMD(v14,v22,const1)
- VPMSUMD(v15,v23,const1)
-
- b .Lsecond_cool_down
-
-.Lshort:
- cmpdi r5,0
- beq .Lzero
-
- addis r3,r2,.short_constants@toc@ha
- addi r3,r3,.short_constants@toc@l
-
- /* Calculate where in the constant table we need to start */
- subfic r6,r5,256
- add r3,r3,r6
-
- /* How many 16 byte chunks? */
- srdi r7,r5,4
- mtctr r7
-
- vxor v19,v19,v19
- vxor v20,v20,v20
-
- lvx v0,0,r4
- lvx v16,0,r3
- VPERM(v0,v0,v16,byteswap)
- vxor v0,v0,v8 /* xor in initial value */
- VPMSUMW(v0,v0,v16)
- bdz .Lv0
-
- lvx v1,off16,r4
- lvx v17,off16,r3
- VPERM(v1,v1,v17,byteswap)
- VPMSUMW(v1,v1,v17)
- bdz .Lv1
-
- lvx v2,off32,r4
- lvx v16,off32,r3
- VPERM(v2,v2,v16,byteswap)
- VPMSUMW(v2,v2,v16)
- bdz .Lv2
-
- lvx v3,off48,r4
- lvx v17,off48,r3
- VPERM(v3,v3,v17,byteswap)
- VPMSUMW(v3,v3,v17)
- bdz .Lv3
-
- lvx v4,off64,r4
- lvx v16,off64,r3
- VPERM(v4,v4,v16,byteswap)
- VPMSUMW(v4,v4,v16)
- bdz .Lv4
-
- lvx v5,off80,r4
- lvx v17,off80,r3
- VPERM(v5,v5,v17,byteswap)
- VPMSUMW(v5,v5,v17)
- bdz .Lv5
-
- lvx v6,off96,r4
- lvx v16,off96,r3
- VPERM(v6,v6,v16,byteswap)
- VPMSUMW(v6,v6,v16)
- bdz .Lv6
-
- lvx v7,off112,r4
- lvx v17,off112,r3
- VPERM(v7,v7,v17,byteswap)
- VPMSUMW(v7,v7,v17)
- bdz .Lv7
-
- addi r3,r3,128
- addi r4,r4,128
-
- lvx v8,0,r4
- lvx v16,0,r3
- VPERM(v8,v8,v16,byteswap)
- VPMSUMW(v8,v8,v16)
- bdz .Lv8
-
- lvx v9,off16,r4
- lvx v17,off16,r3
- VPERM(v9,v9,v17,byteswap)
- VPMSUMW(v9,v9,v17)
- bdz .Lv9
-
- lvx v10,off32,r4
- lvx v16,off32,r3
- VPERM(v10,v10,v16,byteswap)
- VPMSUMW(v10,v10,v16)
- bdz .Lv10
-
- lvx v11,off48,r4
- lvx v17,off48,r3
- VPERM(v11,v11,v17,byteswap)
- VPMSUMW(v11,v11,v17)
- bdz .Lv11
-
- lvx v12,off64,r4
- lvx v16,off64,r3
- VPERM(v12,v12,v16,byteswap)
- VPMSUMW(v12,v12,v16)
- bdz .Lv12
-
- lvx v13,off80,r4
- lvx v17,off80,r3
- VPERM(v13,v13,v17,byteswap)
- VPMSUMW(v13,v13,v17)
- bdz .Lv13
-
- lvx v14,off96,r4
- lvx v16,off96,r3
- VPERM(v14,v14,v16,byteswap)
- VPMSUMW(v14,v14,v16)
- bdz .Lv14
-
- lvx v15,off112,r4
- lvx v17,off112,r3
- VPERM(v15,v15,v17,byteswap)
- VPMSUMW(v15,v15,v17)
-
-.Lv15: vxor v19,v19,v15
-.Lv14: vxor v20,v20,v14
-.Lv13: vxor v19,v19,v13
-.Lv12: vxor v20,v20,v12
-.Lv11: vxor v19,v19,v11
-.Lv10: vxor v20,v20,v10
-.Lv9: vxor v19,v19,v9
-.Lv8: vxor v20,v20,v8
-.Lv7: vxor v19,v19,v7
-.Lv6: vxor v20,v20,v6
-.Lv5: vxor v19,v19,v5
-.Lv4: vxor v20,v20,v4
-.Lv3: vxor v19,v19,v3
-.Lv2: vxor v20,v20,v2
-.Lv1: vxor v19,v19,v1
-.Lv0: vxor v20,v20,v0
-
- vxor v0,v19,v20
-
- b .Lbarrett_reduction
-
-.Lzero:
- mr r3,r10
- blr
- b .Lout
-
-FUNC_END(__crc32_vpmsum)
-
-#endif /* __powerpc__ */
diff --git a/storage/innobase/ut/crc32_power8/crc32_constants.h b/storage/innobase/ut/crc32_power8/crc32_constants.h
deleted file mode 100644
index ba2592b829c..00000000000
--- a/storage/innobase/ut/crc32_power8/crc32_constants.h
+++ /dev/null
@@ -1,911 +0,0 @@
-#ifndef CRC32_CONSTANTS_H
-#define CRC32_CONSTANTS_H
-
-#ifdef __powerpc__
-
-
-#define CRC 0x1edc6f41
-#define CRC_XOR
-#define REFLECT
-
-#ifndef __ASSEMBLY__
-#ifdef CRC_TABLE
-static const unsigned int crc_table[] = {
- 0x00000000, 0xf26b8303, 0xe13b70f7, 0x1350f3f4,
- 0xc79a971f, 0x35f1141c, 0x26a1e7e8, 0xd4ca64eb,
- 0x8ad958cf, 0x78b2dbcc, 0x6be22838, 0x9989ab3b,
- 0x4d43cfd0, 0xbf284cd3, 0xac78bf27, 0x5e133c24,
- 0x105ec76f, 0xe235446c, 0xf165b798, 0x030e349b,
- 0xd7c45070, 0x25afd373, 0x36ff2087, 0xc494a384,
- 0x9a879fa0, 0x68ec1ca3, 0x7bbcef57, 0x89d76c54,
- 0x5d1d08bf, 0xaf768bbc, 0xbc267848, 0x4e4dfb4b,
- 0x20bd8ede, 0xd2d60ddd, 0xc186fe29, 0x33ed7d2a,
- 0xe72719c1, 0x154c9ac2, 0x061c6936, 0xf477ea35,
- 0xaa64d611, 0x580f5512, 0x4b5fa6e6, 0xb93425e5,
- 0x6dfe410e, 0x9f95c20d, 0x8cc531f9, 0x7eaeb2fa,
- 0x30e349b1, 0xc288cab2, 0xd1d83946, 0x23b3ba45,
- 0xf779deae, 0x05125dad, 0x1642ae59, 0xe4292d5a,
- 0xba3a117e, 0x4851927d, 0x5b016189, 0xa96ae28a,
- 0x7da08661, 0x8fcb0562, 0x9c9bf696, 0x6ef07595,
- 0x417b1dbc, 0xb3109ebf, 0xa0406d4b, 0x522bee48,
- 0x86e18aa3, 0x748a09a0, 0x67dafa54, 0x95b17957,
- 0xcba24573, 0x39c9c670, 0x2a993584, 0xd8f2b687,
- 0x0c38d26c, 0xfe53516f, 0xed03a29b, 0x1f682198,
- 0x5125dad3, 0xa34e59d0, 0xb01eaa24, 0x42752927,
- 0x96bf4dcc, 0x64d4cecf, 0x77843d3b, 0x85efbe38,
- 0xdbfc821c, 0x2997011f, 0x3ac7f2eb, 0xc8ac71e8,
- 0x1c661503, 0xee0d9600, 0xfd5d65f4, 0x0f36e6f7,
- 0x61c69362, 0x93ad1061, 0x80fde395, 0x72966096,
- 0xa65c047d, 0x5437877e, 0x4767748a, 0xb50cf789,
- 0xeb1fcbad, 0x197448ae, 0x0a24bb5a, 0xf84f3859,
- 0x2c855cb2, 0xdeeedfb1, 0xcdbe2c45, 0x3fd5af46,
- 0x7198540d, 0x83f3d70e, 0x90a324fa, 0x62c8a7f9,
- 0xb602c312, 0x44694011, 0x5739b3e5, 0xa55230e6,
- 0xfb410cc2, 0x092a8fc1, 0x1a7a7c35, 0xe811ff36,
- 0x3cdb9bdd, 0xceb018de, 0xdde0eb2a, 0x2f8b6829,
- 0x82f63b78, 0x709db87b, 0x63cd4b8f, 0x91a6c88c,
- 0x456cac67, 0xb7072f64, 0xa457dc90, 0x563c5f93,
- 0x082f63b7, 0xfa44e0b4, 0xe9141340, 0x1b7f9043,
- 0xcfb5f4a8, 0x3dde77ab, 0x2e8e845f, 0xdce5075c,
- 0x92a8fc17, 0x60c37f14, 0x73938ce0, 0x81f80fe3,
- 0x55326b08, 0xa759e80b, 0xb4091bff, 0x466298fc,
- 0x1871a4d8, 0xea1a27db, 0xf94ad42f, 0x0b21572c,
- 0xdfeb33c7, 0x2d80b0c4, 0x3ed04330, 0xccbbc033,
- 0xa24bb5a6, 0x502036a5, 0x4370c551, 0xb11b4652,
- 0x65d122b9, 0x97baa1ba, 0x84ea524e, 0x7681d14d,
- 0x2892ed69, 0xdaf96e6a, 0xc9a99d9e, 0x3bc21e9d,
- 0xef087a76, 0x1d63f975, 0x0e330a81, 0xfc588982,
- 0xb21572c9, 0x407ef1ca, 0x532e023e, 0xa145813d,
- 0x758fe5d6, 0x87e466d5, 0x94b49521, 0x66df1622,
- 0x38cc2a06, 0xcaa7a905, 0xd9f75af1, 0x2b9cd9f2,
- 0xff56bd19, 0x0d3d3e1a, 0x1e6dcdee, 0xec064eed,
- 0xc38d26c4, 0x31e6a5c7, 0x22b65633, 0xd0ddd530,
- 0x0417b1db, 0xf67c32d8, 0xe52cc12c, 0x1747422f,
- 0x49547e0b, 0xbb3ffd08, 0xa86f0efc, 0x5a048dff,
- 0x8ecee914, 0x7ca56a17, 0x6ff599e3, 0x9d9e1ae0,
- 0xd3d3e1ab, 0x21b862a8, 0x32e8915c, 0xc083125f,
- 0x144976b4, 0xe622f5b7, 0xf5720643, 0x07198540,
- 0x590ab964, 0xab613a67, 0xb831c993, 0x4a5a4a90,
- 0x9e902e7b, 0x6cfbad78, 0x7fab5e8c, 0x8dc0dd8f,
- 0xe330a81a, 0x115b2b19, 0x020bd8ed, 0xf0605bee,
- 0x24aa3f05, 0xd6c1bc06, 0xc5914ff2, 0x37faccf1,
- 0x69e9f0d5, 0x9b8273d6, 0x88d28022, 0x7ab90321,
- 0xae7367ca, 0x5c18e4c9, 0x4f48173d, 0xbd23943e,
- 0xf36e6f75, 0x0105ec76, 0x12551f82, 0xe03e9c81,
- 0x34f4f86a, 0xc69f7b69, 0xd5cf889d, 0x27a40b9e,
- 0x79b737ba, 0x8bdcb4b9, 0x988c474d, 0x6ae7c44e,
- 0xbe2da0a5, 0x4c4623a6, 0x5f16d052, 0xad7d5351,};
-
-#endif
-#else
-#define MAX_SIZE 32768
-.constants:
-
- /* Reduce 262144 kbits to 1024 bits */
- /* x^261120 mod p(x)` << 1, x^261184 mod p(x)` << 1 */
- .octa 0x00000000b6ca9e20000000009c37c408
-
- /* x^260096 mod p(x)` << 1, x^260160 mod p(x)` << 1 */
- .octa 0x00000000350249a800000001b51df26c
-
- /* x^259072 mod p(x)` << 1, x^259136 mod p(x)` << 1 */
- .octa 0x00000001862dac54000000000724b9d0
-
- /* x^258048 mod p(x)` << 1, x^258112 mod p(x)` << 1 */
- .octa 0x00000001d87fb48c00000001c00532fe
-
- /* x^257024 mod p(x)` << 1, x^257088 mod p(x)` << 1 */
- .octa 0x00000001f39b699e00000000f05a9362
-
- /* x^256000 mod p(x)` << 1, x^256064 mod p(x)` << 1 */
- .octa 0x0000000101da11b400000001e1007970
-
- /* x^254976 mod p(x)` << 1, x^255040 mod p(x)` << 1 */
- .octa 0x00000001cab571e000000000a57366ee
-
- /* x^253952 mod p(x)` << 1, x^254016 mod p(x)` << 1 */
- .octa 0x00000000c7020cfe0000000192011284
-
- /* x^252928 mod p(x)` << 1, x^252992 mod p(x)` << 1 */
- .octa 0x00000000cdaed1ae0000000162716d9a
-
- /* x^251904 mod p(x)` << 1, x^251968 mod p(x)` << 1 */
- .octa 0x00000001e804effc00000000cd97ecde
-
- /* x^250880 mod p(x)` << 1, x^250944 mod p(x)` << 1 */
- .octa 0x0000000077c3ea3a0000000058812bc0
-
- /* x^249856 mod p(x)` << 1, x^249920 mod p(x)` << 1 */
- .octa 0x0000000068df31b40000000088b8c12e
-
- /* x^248832 mod p(x)` << 1, x^248896 mod p(x)` << 1 */
- .octa 0x00000000b059b6c200000001230b234c
-
- /* x^247808 mod p(x)` << 1, x^247872 mod p(x)` << 1 */
- .octa 0x0000000145fb8ed800000001120b416e
-
- /* x^246784 mod p(x)` << 1, x^246848 mod p(x)` << 1 */
- .octa 0x00000000cbc0916800000001974aecb0
-
- /* x^245760 mod p(x)` << 1, x^245824 mod p(x)` << 1 */
- .octa 0x000000005ceeedc2000000008ee3f226
-
- /* x^244736 mod p(x)` << 1, x^244800 mod p(x)` << 1 */
- .octa 0x0000000047d74e8600000001089aba9a
-
- /* x^243712 mod p(x)` << 1, x^243776 mod p(x)` << 1 */
- .octa 0x00000001407e9e220000000065113872
-
- /* x^242688 mod p(x)` << 1, x^242752 mod p(x)` << 1 */
- .octa 0x00000001da967bda000000005c07ec10
-
- /* x^241664 mod p(x)` << 1, x^241728 mod p(x)` << 1 */
- .octa 0x000000006c8983680000000187590924
-
- /* x^240640 mod p(x)` << 1, x^240704 mod p(x)` << 1 */
- .octa 0x00000000f2d14c9800000000e35da7c6
-
- /* x^239616 mod p(x)` << 1, x^239680 mod p(x)` << 1 */
- .octa 0x00000001993c6ad4000000000415855a
-
- /* x^238592 mod p(x)` << 1, x^238656 mod p(x)` << 1 */
- .octa 0x000000014683d1ac0000000073617758
-
- /* x^237568 mod p(x)` << 1, x^237632 mod p(x)` << 1 */
- .octa 0x00000001a7c93e6c0000000176021d28
-
- /* x^236544 mod p(x)` << 1, x^236608 mod p(x)` << 1 */
- .octa 0x000000010211e90a00000001c358fd0a
-
- /* x^235520 mod p(x)` << 1, x^235584 mod p(x)` << 1 */
- .octa 0x000000001119403e00000001ff7a2c18
-
- /* x^234496 mod p(x)` << 1, x^234560 mod p(x)` << 1 */
- .octa 0x000000001c3261aa00000000f2d9f7e4
-
- /* x^233472 mod p(x)` << 1, x^233536 mod p(x)` << 1 */
- .octa 0x000000014e37a634000000016cf1f9c8
-
- /* x^232448 mod p(x)` << 1, x^232512 mod p(x)` << 1 */
- .octa 0x0000000073786c0c000000010af9279a
-
- /* x^231424 mod p(x)` << 1, x^231488 mod p(x)` << 1 */
- .octa 0x000000011dc037f80000000004f101e8
-
- /* x^230400 mod p(x)` << 1, x^230464 mod p(x)` << 1 */
- .octa 0x0000000031433dfc0000000070bcf184
-
- /* x^229376 mod p(x)` << 1, x^229440 mod p(x)` << 1 */
- .octa 0x000000009cde8348000000000a8de642
-
- /* x^228352 mod p(x)` << 1, x^228416 mod p(x)` << 1 */
- .octa 0x0000000038d3c2a60000000062ea130c
-
- /* x^227328 mod p(x)` << 1, x^227392 mod p(x)` << 1 */
- .octa 0x000000011b25f26000000001eb31cbb2
-
- /* x^226304 mod p(x)` << 1, x^226368 mod p(x)` << 1 */
- .octa 0x000000001629e6f00000000170783448
-
- /* x^225280 mod p(x)` << 1, x^225344 mod p(x)` << 1 */
- .octa 0x0000000160838b4c00000001a684b4c6
-
- /* x^224256 mod p(x)` << 1, x^224320 mod p(x)` << 1 */
- .octa 0x000000007a44011c00000000253ca5b4
-
- /* x^223232 mod p(x)` << 1, x^223296 mod p(x)` << 1 */
- .octa 0x00000000226f417a0000000057b4b1e2
-
- /* x^222208 mod p(x)` << 1, x^222272 mod p(x)` << 1 */
- .octa 0x0000000045eb2eb400000000b6bd084c
-
- /* x^221184 mod p(x)` << 1, x^221248 mod p(x)` << 1 */
- .octa 0x000000014459d70c0000000123c2d592
-
- /* x^220160 mod p(x)` << 1, x^220224 mod p(x)` << 1 */
- .octa 0x00000001d406ed8200000000159dafce
-
- /* x^219136 mod p(x)` << 1, x^219200 mod p(x)` << 1 */
- .octa 0x0000000160c8e1a80000000127e1a64e
-
- /* x^218112 mod p(x)` << 1, x^218176 mod p(x)` << 1 */
- .octa 0x0000000027ba80980000000056860754
-
- /* x^217088 mod p(x)` << 1, x^217152 mod p(x)` << 1 */
- .octa 0x000000006d92d01800000001e661aae8
-
- /* x^216064 mod p(x)` << 1, x^216128 mod p(x)` << 1 */
- .octa 0x000000012ed7e3f200000000f82c6166
-
- /* x^215040 mod p(x)` << 1, x^215104 mod p(x)` << 1 */
- .octa 0x000000002dc8778800000000c4f9c7ae
-
- /* x^214016 mod p(x)` << 1, x^214080 mod p(x)` << 1 */
- .octa 0x0000000018240bb80000000074203d20
-
- /* x^212992 mod p(x)` << 1, x^213056 mod p(x)` << 1 */
- .octa 0x000000001ad381580000000198173052
-
- /* x^211968 mod p(x)` << 1, x^212032 mod p(x)` << 1 */
- .octa 0x00000001396b78f200000001ce8aba54
-
- /* x^210944 mod p(x)` << 1, x^211008 mod p(x)` << 1 */
- .octa 0x000000011a68133400000001850d5d94
-
- /* x^209920 mod p(x)` << 1, x^209984 mod p(x)` << 1 */
- .octa 0x000000012104732e00000001d609239c
-
- /* x^208896 mod p(x)` << 1, x^208960 mod p(x)` << 1 */
- .octa 0x00000000a140d90c000000001595f048
-
- /* x^207872 mod p(x)` << 1, x^207936 mod p(x)` << 1 */
- .octa 0x00000001b7215eda0000000042ccee08
-
- /* x^206848 mod p(x)` << 1, x^206912 mod p(x)` << 1 */
- .octa 0x00000001aaf1df3c000000010a389d74
-
- /* x^205824 mod p(x)` << 1, x^205888 mod p(x)` << 1 */
- .octa 0x0000000029d15b8a000000012a840da6
-
- /* x^204800 mod p(x)` << 1, x^204864 mod p(x)` << 1 */
- .octa 0x00000000f1a96922000000001d181c0c
-
- /* x^203776 mod p(x)` << 1, x^203840 mod p(x)` << 1 */
- .octa 0x00000001ac80d03c0000000068b7d1f6
-
- /* x^202752 mod p(x)` << 1, x^202816 mod p(x)` << 1 */
- .octa 0x000000000f11d56a000000005b0f14fc
-
- /* x^201728 mod p(x)` << 1, x^201792 mod p(x)` << 1 */
- .octa 0x00000001f1c022a20000000179e9e730
-
- /* x^200704 mod p(x)` << 1, x^200768 mod p(x)` << 1 */
- .octa 0x0000000173d00ae200000001ce1368d6
-
- /* x^199680 mod p(x)` << 1, x^199744 mod p(x)` << 1 */
- .octa 0x00000001d4ffe4ac0000000112c3a84c
-
- /* x^198656 mod p(x)` << 1, x^198720 mod p(x)` << 1 */
- .octa 0x000000016edc5ae400000000de940fee
-
- /* x^197632 mod p(x)` << 1, x^197696 mod p(x)` << 1 */
- .octa 0x00000001f1a0214000000000fe896b7e
-
- /* x^196608 mod p(x)` << 1, x^196672 mod p(x)` << 1 */
- .octa 0x00000000ca0b28a000000001f797431c
-
- /* x^195584 mod p(x)` << 1, x^195648 mod p(x)` << 1 */
- .octa 0x00000001928e30a20000000053e989ba
-
- /* x^194560 mod p(x)` << 1, x^194624 mod p(x)` << 1 */
- .octa 0x0000000097b1b002000000003920cd16
-
- /* x^193536 mod p(x)` << 1, x^193600 mod p(x)` << 1 */
- .octa 0x00000000b15bf90600000001e6f579b8
-
- /* x^192512 mod p(x)` << 1, x^192576 mod p(x)` << 1 */
- .octa 0x00000000411c5d52000000007493cb0a
-
- /* x^191488 mod p(x)` << 1, x^191552 mod p(x)` << 1 */
- .octa 0x00000001c36f330000000001bdd376d8
-
- /* x^190464 mod p(x)` << 1, x^190528 mod p(x)` << 1 */
- .octa 0x00000001119227e0000000016badfee6
-
- /* x^189440 mod p(x)` << 1, x^189504 mod p(x)` << 1 */
- .octa 0x00000000114d47020000000071de5c58
-
- /* x^188416 mod p(x)` << 1, x^188480 mod p(x)` << 1 */
- .octa 0x00000000458b5b9800000000453f317c
-
- /* x^187392 mod p(x)` << 1, x^187456 mod p(x)` << 1 */
- .octa 0x000000012e31fb8e0000000121675cce
-
- /* x^186368 mod p(x)` << 1, x^186432 mod p(x)` << 1 */
- .octa 0x000000005cf619d800000001f409ee92
-
- /* x^185344 mod p(x)` << 1, x^185408 mod p(x)` << 1 */
- .octa 0x0000000063f4d8b200000000f36b9c88
-
- /* x^184320 mod p(x)` << 1, x^184384 mod p(x)` << 1 */
- .octa 0x000000004138dc8a0000000036b398f4
-
- /* x^183296 mod p(x)` << 1, x^183360 mod p(x)` << 1 */
- .octa 0x00000001d29ee8e000000001748f9adc
-
- /* x^182272 mod p(x)` << 1, x^182336 mod p(x)` << 1 */
- .octa 0x000000006a08ace800000001be94ec00
-
- /* x^181248 mod p(x)` << 1, x^181312 mod p(x)` << 1 */
- .octa 0x0000000127d4201000000000b74370d6
-
- /* x^180224 mod p(x)` << 1, x^180288 mod p(x)` << 1 */
- .octa 0x0000000019d76b6200000001174d0b98
-
- /* x^179200 mod p(x)` << 1, x^179264 mod p(x)` << 1 */
- .octa 0x00000001b1471f6e00000000befc06a4
-
- /* x^178176 mod p(x)` << 1, x^178240 mod p(x)` << 1 */
- .octa 0x00000001f64c19cc00000001ae125288
-
- /* x^177152 mod p(x)` << 1, x^177216 mod p(x)` << 1 */
- .octa 0x00000000003c0ea00000000095c19b34
-
- /* x^176128 mod p(x)` << 1, x^176192 mod p(x)` << 1 */
- .octa 0x000000014d73abf600000001a78496f2
-
- /* x^175104 mod p(x)` << 1, x^175168 mod p(x)` << 1 */
- .octa 0x00000001620eb84400000001ac5390a0
-
- /* x^174080 mod p(x)` << 1, x^174144 mod p(x)` << 1 */
- .octa 0x0000000147655048000000002a80ed6e
-
- /* x^173056 mod p(x)` << 1, x^173120 mod p(x)` << 1 */
- .octa 0x0000000067b5077e00000001fa9b0128
-
- /* x^172032 mod p(x)` << 1, x^172096 mod p(x)` << 1 */
- .octa 0x0000000010ffe20600000001ea94929e
-
- /* x^171008 mod p(x)` << 1, x^171072 mod p(x)` << 1 */
- .octa 0x000000000fee8f1e0000000125f4305c
-
- /* x^169984 mod p(x)` << 1, x^170048 mod p(x)` << 1 */
- .octa 0x00000001da26fbae00000001471e2002
-
- /* x^168960 mod p(x)` << 1, x^169024 mod p(x)` << 1 */
- .octa 0x00000001b3a8bd880000000132d2253a
-
- /* x^167936 mod p(x)` << 1, x^168000 mod p(x)` << 1 */
- .octa 0x00000000e8f3898e00000000f26b3592
-
- /* x^166912 mod p(x)` << 1, x^166976 mod p(x)` << 1 */
- .octa 0x00000000b0d0d28c00000000bc8b67b0
-
- /* x^165888 mod p(x)` << 1, x^165952 mod p(x)` << 1 */
- .octa 0x0000000030f2a798000000013a826ef2
-
- /* x^164864 mod p(x)` << 1, x^164928 mod p(x)` << 1 */
- .octa 0x000000000fba10020000000081482c84
-
- /* x^163840 mod p(x)` << 1, x^163904 mod p(x)` << 1 */
- .octa 0x00000000bdb9bd7200000000e77307c2
-
- /* x^162816 mod p(x)` << 1, x^162880 mod p(x)` << 1 */
- .octa 0x0000000075d3bf5a00000000d4a07ec8
-
- /* x^161792 mod p(x)` << 1, x^161856 mod p(x)` << 1 */
- .octa 0x00000000ef1f98a00000000017102100
-
- /* x^160768 mod p(x)` << 1, x^160832 mod p(x)` << 1 */
- .octa 0x00000000689c760200000000db406486
-
- /* x^159744 mod p(x)` << 1, x^159808 mod p(x)` << 1 */
- .octa 0x000000016d5fa5fe0000000192db7f88
-
- /* x^158720 mod p(x)` << 1, x^158784 mod p(x)` << 1 */
- .octa 0x00000001d0d2b9ca000000018bf67b1e
-
- /* x^157696 mod p(x)` << 1, x^157760 mod p(x)` << 1 */
- .octa 0x0000000041e7b470000000007c09163e
-
- /* x^156672 mod p(x)` << 1, x^156736 mod p(x)` << 1 */
- .octa 0x00000001cbb6495e000000000adac060
-
- /* x^155648 mod p(x)` << 1, x^155712 mod p(x)` << 1 */
- .octa 0x000000010052a0b000000000bd8316ae
-
- /* x^154624 mod p(x)` << 1, x^154688 mod p(x)` << 1 */
- .octa 0x00000001d8effb5c000000019f09ab54
-
- /* x^153600 mod p(x)` << 1, x^153664 mod p(x)` << 1 */
- .octa 0x00000001d969853c0000000125155542
-
- /* x^152576 mod p(x)` << 1, x^152640 mod p(x)` << 1 */
- .octa 0x00000000523ccce2000000018fdb5882
-
- /* x^151552 mod p(x)` << 1, x^151616 mod p(x)` << 1 */
- .octa 0x000000001e2436bc00000000e794b3f4
-
- /* x^150528 mod p(x)` << 1, x^150592 mod p(x)` << 1 */
- .octa 0x00000000ddd1c3a2000000016f9bb022
-
- /* x^149504 mod p(x)` << 1, x^149568 mod p(x)` << 1 */
- .octa 0x0000000019fcfe3800000000290c9978
-
- /* x^148480 mod p(x)` << 1, x^148544 mod p(x)` << 1 */
- .octa 0x00000001ce95db640000000083c0f350
-
- /* x^147456 mod p(x)` << 1, x^147520 mod p(x)` << 1 */
- .octa 0x00000000af5828060000000173ea6628
-
- /* x^146432 mod p(x)` << 1, x^146496 mod p(x)` << 1 */
- .octa 0x00000001006388f600000001c8b4e00a
-
- /* x^145408 mod p(x)` << 1, x^145472 mod p(x)` << 1 */
- .octa 0x0000000179eca00a00000000de95d6aa
-
- /* x^144384 mod p(x)` << 1, x^144448 mod p(x)` << 1 */
- .octa 0x0000000122410a6a000000010b7f7248
-
- /* x^143360 mod p(x)` << 1, x^143424 mod p(x)` << 1 */
- .octa 0x000000004288e87c00000001326e3a06
-
- /* x^142336 mod p(x)` << 1, x^142400 mod p(x)` << 1 */
- .octa 0x000000016c5490da00000000bb62c2e6
-
- /* x^141312 mod p(x)` << 1, x^141376 mod p(x)` << 1 */
- .octa 0x00000000d1c71f6e0000000156a4b2c2
-
- /* x^140288 mod p(x)` << 1, x^140352 mod p(x)` << 1 */
- .octa 0x00000001b4ce08a6000000011dfe763a
-
- /* x^139264 mod p(x)` << 1, x^139328 mod p(x)` << 1 */
- .octa 0x00000001466ba60c000000007bcca8e2
-
- /* x^138240 mod p(x)` << 1, x^138304 mod p(x)` << 1 */
- .octa 0x00000001f6c488a40000000186118faa
-
- /* x^137216 mod p(x)` << 1, x^137280 mod p(x)` << 1 */
- .octa 0x000000013bfb06820000000111a65a88
-
- /* x^136192 mod p(x)` << 1, x^136256 mod p(x)` << 1 */
- .octa 0x00000000690e9e54000000003565e1c4
-
- /* x^135168 mod p(x)` << 1, x^135232 mod p(x)` << 1 */
- .octa 0x00000000281346b6000000012ed02a82
-
- /* x^134144 mod p(x)` << 1, x^134208 mod p(x)` << 1 */
- .octa 0x000000015646402400000000c486ecfc
-
- /* x^133120 mod p(x)` << 1, x^133184 mod p(x)` << 1 */
- .octa 0x000000016063a8dc0000000001b951b2
-
- /* x^132096 mod p(x)` << 1, x^132160 mod p(x)` << 1 */
- .octa 0x0000000116a663620000000048143916
-
- /* x^131072 mod p(x)` << 1, x^131136 mod p(x)` << 1 */
- .octa 0x000000017e8aa4d200000001dc2ae124
-
- /* x^130048 mod p(x)` << 1, x^130112 mod p(x)` << 1 */
- .octa 0x00000001728eb10c00000001416c58d6
-
- /* x^129024 mod p(x)` << 1, x^129088 mod p(x)` << 1 */
- .octa 0x00000001b08fd7fa00000000a479744a
-
- /* x^128000 mod p(x)` << 1, x^128064 mod p(x)` << 1 */
- .octa 0x00000001092a16e80000000096ca3a26
-
- /* x^126976 mod p(x)` << 1, x^127040 mod p(x)` << 1 */
- .octa 0x00000000a505637c00000000ff223d4e
-
- /* x^125952 mod p(x)` << 1, x^126016 mod p(x)` << 1 */
- .octa 0x00000000d94869b2000000010e84da42
-
- /* x^124928 mod p(x)` << 1, x^124992 mod p(x)` << 1 */
- .octa 0x00000001c8b203ae00000001b61ba3d0
-
- /* x^123904 mod p(x)` << 1, x^123968 mod p(x)` << 1 */
- .octa 0x000000005704aea000000000680f2de8
-
- /* x^122880 mod p(x)` << 1, x^122944 mod p(x)` << 1 */
- .octa 0x000000012e295fa2000000008772a9a8
-
- /* x^121856 mod p(x)` << 1, x^121920 mod p(x)` << 1 */
- .octa 0x000000011d0908bc0000000155f295bc
-
- /* x^120832 mod p(x)` << 1, x^120896 mod p(x)` << 1 */
- .octa 0x0000000193ed97ea00000000595f9282
-
- /* x^119808 mod p(x)` << 1, x^119872 mod p(x)` << 1 */
- .octa 0x000000013a0f1c520000000164b1c25a
-
- /* x^118784 mod p(x)` << 1, x^118848 mod p(x)` << 1 */
- .octa 0x000000010c2c40c000000000fbd67c50
-
- /* x^117760 mod p(x)` << 1, x^117824 mod p(x)` << 1 */
- .octa 0x00000000ff6fac3e0000000096076268
-
- /* x^116736 mod p(x)` << 1, x^116800 mod p(x)` << 1 */
- .octa 0x000000017b3609c000000001d288e4cc
-
- /* x^115712 mod p(x)` << 1, x^115776 mod p(x)` << 1 */
- .octa 0x0000000088c8c92200000001eaac1bdc
-
- /* x^114688 mod p(x)` << 1, x^114752 mod p(x)` << 1 */
- .octa 0x00000001751baae600000001f1ea39e2
-
- /* x^113664 mod p(x)` << 1, x^113728 mod p(x)` << 1 */
- .octa 0x000000010795297200000001eb6506fc
-
- /* x^112640 mod p(x)` << 1, x^112704 mod p(x)` << 1 */
- .octa 0x0000000162b00abe000000010f806ffe
-
- /* x^111616 mod p(x)` << 1, x^111680 mod p(x)` << 1 */
- .octa 0x000000000d7b404c000000010408481e
-
- /* x^110592 mod p(x)` << 1, x^110656 mod p(x)` << 1 */
- .octa 0x00000000763b13d40000000188260534
-
- /* x^109568 mod p(x)` << 1, x^109632 mod p(x)` << 1 */
- .octa 0x00000000f6dc22d80000000058fc73e0
-
- /* x^108544 mod p(x)` << 1, x^108608 mod p(x)` << 1 */
- .octa 0x000000007daae06000000000391c59b8
-
- /* x^107520 mod p(x)` << 1, x^107584 mod p(x)` << 1 */
- .octa 0x000000013359ab7c000000018b638400
-
- /* x^106496 mod p(x)` << 1, x^106560 mod p(x)` << 1 */
- .octa 0x000000008add438a000000011738f5c4
-
- /* x^105472 mod p(x)` << 1, x^105536 mod p(x)` << 1 */
- .octa 0x00000001edbefdea000000008cf7c6da
-
- /* x^104448 mod p(x)` << 1, x^104512 mod p(x)` << 1 */
- .octa 0x000000004104e0f800000001ef97fb16
-
- /* x^103424 mod p(x)` << 1, x^103488 mod p(x)` << 1 */
- .octa 0x00000000b48a82220000000102130e20
-
- /* x^102400 mod p(x)` << 1, x^102464 mod p(x)` << 1 */
- .octa 0x00000001bcb4684400000000db968898
-
- /* x^101376 mod p(x)` << 1, x^101440 mod p(x)` << 1 */
- .octa 0x000000013293ce0a00000000b5047b5e
-
- /* x^100352 mod p(x)` << 1, x^100416 mod p(x)` << 1 */
- .octa 0x00000001710d0844000000010b90fdb2
-
- /* x^99328 mod p(x)` << 1, x^99392 mod p(x)` << 1 */
- .octa 0x0000000117907f6e000000004834a32e
-
- /* x^98304 mod p(x)` << 1, x^98368 mod p(x)` << 1 */
- .octa 0x0000000087ddf93e0000000059c8f2b0
-
- /* x^97280 mod p(x)` << 1, x^97344 mod p(x)` << 1 */
- .octa 0x000000005970e9b00000000122cec508
-
- /* x^96256 mod p(x)` << 1, x^96320 mod p(x)` << 1 */
- .octa 0x0000000185b2b7d0000000000a330cda
-
- /* x^95232 mod p(x)` << 1, x^95296 mod p(x)` << 1 */
- .octa 0x00000001dcee0efc000000014a47148c
-
- /* x^94208 mod p(x)` << 1, x^94272 mod p(x)` << 1 */
- .octa 0x0000000030da27220000000042c61cb8
-
- /* x^93184 mod p(x)` << 1, x^93248 mod p(x)` << 1 */
- .octa 0x000000012f925a180000000012fe6960
-
- /* x^92160 mod p(x)` << 1, x^92224 mod p(x)` << 1 */
- .octa 0x00000000dd2e357c00000000dbda2c20
-
- /* x^91136 mod p(x)` << 1, x^91200 mod p(x)` << 1 */
- .octa 0x00000000071c80de000000011122410c
-
- /* x^90112 mod p(x)` << 1, x^90176 mod p(x)` << 1 */
- .octa 0x000000011513140a00000000977b2070
-
- /* x^89088 mod p(x)` << 1, x^89152 mod p(x)` << 1 */
- .octa 0x00000001df876e8e000000014050438e
-
- /* x^88064 mod p(x)` << 1, x^88128 mod p(x)` << 1 */
- .octa 0x000000015f81d6ce0000000147c840e8
-
- /* x^87040 mod p(x)` << 1, x^87104 mod p(x)` << 1 */
- .octa 0x000000019dd94dbe00000001cc7c88ce
-
- /* x^86016 mod p(x)` << 1, x^86080 mod p(x)` << 1 */
- .octa 0x00000001373d206e00000001476b35a4
-
- /* x^84992 mod p(x)` << 1, x^85056 mod p(x)` << 1 */
- .octa 0x00000000668ccade000000013d52d508
-
- /* x^83968 mod p(x)` << 1, x^84032 mod p(x)` << 1 */
- .octa 0x00000001b192d268000000008e4be32e
-
- /* x^82944 mod p(x)` << 1, x^83008 mod p(x)` << 1 */
- .octa 0x00000000e30f3a7800000000024120fe
-
- /* x^81920 mod p(x)` << 1, x^81984 mod p(x)` << 1 */
- .octa 0x000000010ef1f7bc00000000ddecddb4
-
- /* x^80896 mod p(x)` << 1, x^80960 mod p(x)` << 1 */
- .octa 0x00000001f5ac738000000000d4d403bc
-
- /* x^79872 mod p(x)` << 1, x^79936 mod p(x)` << 1 */
- .octa 0x000000011822ea7000000001734b89aa
-
- /* x^78848 mod p(x)` << 1, x^78912 mod p(x)` << 1 */
- .octa 0x00000000c3a33848000000010e7a58d6
-
- /* x^77824 mod p(x)` << 1, x^77888 mod p(x)` << 1 */
- .octa 0x00000001bd151c2400000001f9f04e9c
-
- /* x^76800 mod p(x)` << 1, x^76864 mod p(x)` << 1 */
- .octa 0x0000000056002d7600000000b692225e
-
- /* x^75776 mod p(x)` << 1, x^75840 mod p(x)` << 1 */
- .octa 0x000000014657c4f4000000019b8d3f3e
-
- /* x^74752 mod p(x)` << 1, x^74816 mod p(x)` << 1 */
- .octa 0x0000000113742d7c00000001a874f11e
-
- /* x^73728 mod p(x)` << 1, x^73792 mod p(x)` << 1 */
- .octa 0x000000019c5920ba000000010d5a4254
-
- /* x^72704 mod p(x)` << 1, x^72768 mod p(x)` << 1 */
- .octa 0x000000005216d2d600000000bbb2f5d6
-
- /* x^71680 mod p(x)` << 1, x^71744 mod p(x)` << 1 */
- .octa 0x0000000136f5ad8a0000000179cc0e36
-
- /* x^70656 mod p(x)` << 1, x^70720 mod p(x)` << 1 */
- .octa 0x000000018b07beb600000001dca1da4a
-
- /* x^69632 mod p(x)` << 1, x^69696 mod p(x)` << 1 */
- .octa 0x00000000db1e93b000000000feb1a192
-
- /* x^68608 mod p(x)` << 1, x^68672 mod p(x)` << 1 */
- .octa 0x000000000b96fa3a00000000d1eeedd6
-
- /* x^67584 mod p(x)` << 1, x^67648 mod p(x)` << 1 */
- .octa 0x00000001d9968af0000000008fad9bb4
-
- /* x^66560 mod p(x)` << 1, x^66624 mod p(x)` << 1 */
- .octa 0x000000000e4a77a200000001884938e4
-
- /* x^65536 mod p(x)` << 1, x^65600 mod p(x)` << 1 */
- .octa 0x00000000508c2ac800000001bc2e9bc0
-
- /* x^64512 mod p(x)` << 1, x^64576 mod p(x)` << 1 */
- .octa 0x0000000021572a8000000001f9658a68
-
- /* x^63488 mod p(x)` << 1, x^63552 mod p(x)` << 1 */
- .octa 0x00000001b859daf2000000001b9224fc
-
- /* x^62464 mod p(x)` << 1, x^62528 mod p(x)` << 1 */
- .octa 0x000000016f7884740000000055b2fb84
-
- /* x^61440 mod p(x)` << 1, x^61504 mod p(x)` << 1 */
- .octa 0x00000001b438810e000000018b090348
-
- /* x^60416 mod p(x)` << 1, x^60480 mod p(x)` << 1 */
- .octa 0x0000000095ddc6f2000000011ccbd5ea
-
- /* x^59392 mod p(x)` << 1, x^59456 mod p(x)` << 1 */
- .octa 0x00000001d977c20c0000000007ae47f8
-
- /* x^58368 mod p(x)` << 1, x^58432 mod p(x)` << 1 */
- .octa 0x00000000ebedb99a0000000172acbec0
-
- /* x^57344 mod p(x)` << 1, x^57408 mod p(x)` << 1 */
- .octa 0x00000001df9e9e9200000001c6e3ff20
-
- /* x^56320 mod p(x)` << 1, x^56384 mod p(x)` << 1 */
- .octa 0x00000001a4a3f95200000000e1b38744
-
- /* x^55296 mod p(x)` << 1, x^55360 mod p(x)` << 1 */
- .octa 0x00000000e2f5122000000000791585b2
-
- /* x^54272 mod p(x)` << 1, x^54336 mod p(x)` << 1 */
- .octa 0x000000004aa01f3e00000000ac53b894
-
- /* x^53248 mod p(x)` << 1, x^53312 mod p(x)` << 1 */
- .octa 0x00000000b3e90a5800000001ed5f2cf4
-
- /* x^52224 mod p(x)` << 1, x^52288 mod p(x)` << 1 */
- .octa 0x000000000c9ca2aa00000001df48b2e0
-
- /* x^51200 mod p(x)` << 1, x^51264 mod p(x)` << 1 */
- .octa 0x000000015168231600000000049c1c62
-
- /* x^50176 mod p(x)` << 1, x^50240 mod p(x)` << 1 */
- .octa 0x0000000036fce78c000000017c460c12
-
- /* x^49152 mod p(x)` << 1, x^49216 mod p(x)` << 1 */
- .octa 0x000000009037dc10000000015be4da7e
-
- /* x^48128 mod p(x)` << 1, x^48192 mod p(x)` << 1 */
- .octa 0x00000000d3298582000000010f38f668
-
- /* x^47104 mod p(x)` << 1, x^47168 mod p(x)` << 1 */
- .octa 0x00000001b42e8ad60000000039f40a00
-
- /* x^46080 mod p(x)` << 1, x^46144 mod p(x)` << 1 */
- .octa 0x00000000142a983800000000bd4c10c4
-
- /* x^45056 mod p(x)` << 1, x^45120 mod p(x)` << 1 */
- .octa 0x0000000109c7f1900000000042db1d98
-
- /* x^44032 mod p(x)` << 1, x^44096 mod p(x)` << 1 */
- .octa 0x0000000056ff931000000001c905bae6
-
- /* x^43008 mod p(x)` << 1, x^43072 mod p(x)` << 1 */
- .octa 0x00000001594513aa00000000069d40ea
-
- /* x^41984 mod p(x)` << 1, x^42048 mod p(x)` << 1 */
- .octa 0x00000001e3b5b1e8000000008e4fbad0
-
- /* x^40960 mod p(x)` << 1, x^41024 mod p(x)` << 1 */
- .octa 0x000000011dd5fc080000000047bedd46
-
- /* x^39936 mod p(x)` << 1, x^40000 mod p(x)` << 1 */
- .octa 0x00000001675f0cc20000000026396bf8
-
- /* x^38912 mod p(x)` << 1, x^38976 mod p(x)` << 1 */
- .octa 0x00000000d1c8dd4400000000379beb92
-
- /* x^37888 mod p(x)` << 1, x^37952 mod p(x)` << 1 */
- .octa 0x0000000115ebd3d8000000000abae54a
-
- /* x^36864 mod p(x)` << 1, x^36928 mod p(x)` << 1 */
- .octa 0x00000001ecbd0dac0000000007e6a128
-
- /* x^35840 mod p(x)` << 1, x^35904 mod p(x)` << 1 */
- .octa 0x00000000cdf67af2000000000ade29d2
-
- /* x^34816 mod p(x)` << 1, x^34880 mod p(x)` << 1 */
- .octa 0x000000004c01ff4c00000000f974c45c
-
- /* x^33792 mod p(x)` << 1, x^33856 mod p(x)` << 1 */
- .octa 0x00000000f2d8657e00000000e77ac60a
-
- /* x^32768 mod p(x)` << 1, x^32832 mod p(x)` << 1 */
- .octa 0x000000006bae74c40000000145895816
-
- /* x^31744 mod p(x)` << 1, x^31808 mod p(x)` << 1 */
- .octa 0x0000000152af8aa00000000038e362be
-
- /* x^30720 mod p(x)` << 1, x^30784 mod p(x)` << 1 */
- .octa 0x0000000004663802000000007f991a64
-
- /* x^29696 mod p(x)` << 1, x^29760 mod p(x)` << 1 */
- .octa 0x00000001ab2f5afc00000000fa366d3a
-
- /* x^28672 mod p(x)` << 1, x^28736 mod p(x)` << 1 */
- .octa 0x0000000074a4ebd400000001a2bb34f0
-
- /* x^27648 mod p(x)` << 1, x^27712 mod p(x)` << 1 */
- .octa 0x00000001d7ab3a4c0000000028a9981e
-
- /* x^26624 mod p(x)` << 1, x^26688 mod p(x)` << 1 */
- .octa 0x00000001a8da60c600000001dbc672be
-
- /* x^25600 mod p(x)` << 1, x^25664 mod p(x)` << 1 */
- .octa 0x000000013cf6382000000000b04d77f6
-
- /* x^24576 mod p(x)` << 1, x^24640 mod p(x)` << 1 */
- .octa 0x00000000bec12e1e0000000124400d96
-
- /* x^23552 mod p(x)` << 1, x^23616 mod p(x)` << 1 */
- .octa 0x00000001c6368010000000014ca4b414
-
- /* x^22528 mod p(x)` << 1, x^22592 mod p(x)` << 1 */
- .octa 0x00000001e6e78758000000012fe2c938
-
- /* x^21504 mod p(x)` << 1, x^21568 mod p(x)` << 1 */
- .octa 0x000000008d7f2b3c00000001faed01e6
-
- /* x^20480 mod p(x)` << 1, x^20544 mod p(x)` << 1 */
- .octa 0x000000016b4a156e000000007e80ecfe
-
- /* x^19456 mod p(x)` << 1, x^19520 mod p(x)` << 1 */
- .octa 0x00000001c63cfeb60000000098daee94
-
- /* x^18432 mod p(x)` << 1, x^18496 mod p(x)` << 1 */
- .octa 0x000000015f902670000000010a04edea
-
- /* x^17408 mod p(x)` << 1, x^17472 mod p(x)` << 1 */
- .octa 0x00000001cd5de11e00000001c00b4524
-
- /* x^16384 mod p(x)` << 1, x^16448 mod p(x)` << 1 */
- .octa 0x000000001acaec540000000170296550
-
- /* x^15360 mod p(x)` << 1, x^15424 mod p(x)` << 1 */
- .octa 0x000000002bd0ca780000000181afaa48
-
- /* x^14336 mod p(x)` << 1, x^14400 mod p(x)` << 1 */
- .octa 0x0000000032d63d5c0000000185a31ffa
-
- /* x^13312 mod p(x)` << 1, x^13376 mod p(x)` << 1 */
- .octa 0x000000001c6d4e4c000000002469f608
-
- /* x^12288 mod p(x)` << 1, x^12352 mod p(x)` << 1 */
- .octa 0x0000000106a60b92000000006980102a
-
- /* x^11264 mod p(x)` << 1, x^11328 mod p(x)` << 1 */
- .octa 0x00000000d3855e120000000111ea9ca8
-
- /* x^10240 mod p(x)` << 1, x^10304 mod p(x)` << 1 */
- .octa 0x00000000e312563600000001bd1d29ce
-
- /* x^9216 mod p(x)` << 1, x^9280 mod p(x)` << 1 */
- .octa 0x000000009e8f7ea400000001b34b9580
-
- /* x^8192 mod p(x)` << 1, x^8256 mod p(x)` << 1 */
- .octa 0x00000001c82e562c000000003076054e
-
- /* x^7168 mod p(x)` << 1, x^7232 mod p(x)` << 1 */
- .octa 0x00000000ca9f09ce000000012a608ea4
-
- /* x^6144 mod p(x)` << 1, x^6208 mod p(x)` << 1 */
- .octa 0x00000000c63764e600000000784d05fe
-
- /* x^5120 mod p(x)` << 1, x^5184 mod p(x)` << 1 */
- .octa 0x0000000168d2e49e000000016ef0d82a
-
- /* x^4096 mod p(x)` << 1, x^4160 mod p(x)` << 1 */
- .octa 0x00000000e986c1480000000075bda454
-
- /* x^3072 mod p(x)` << 1, x^3136 mod p(x)` << 1 */
- .octa 0x00000000cfb65894000000003dc0a1c4
-
- /* x^2048 mod p(x)` << 1, x^2112 mod p(x)` << 1 */
- .octa 0x0000000111cadee400000000e9a5d8be
-
- /* x^1024 mod p(x)` << 1, x^1088 mod p(x)` << 1 */
- .octa 0x0000000171fb63ce00000001609bc4b4
-
-.short_constants:
-
- /* Reduce final 1024-2048 bits to 64 bits, shifting 32 bits to include the trailing 32 bits of zeros */
- /* x^1952 mod p(x)`, x^1984 mod p(x)`, x^2016 mod p(x)`, x^2048 mod p(x)` */
- .octa 0x7fec2963e5bf80485cf015c388e56f72
-
- /* x^1824 mod p(x)`, x^1856 mod p(x)`, x^1888 mod p(x)`, x^1920 mod p(x)` */
- .octa 0x38e888d4844752a9963a18920246e2e6
-
- /* x^1696 mod p(x)`, x^1728 mod p(x)`, x^1760 mod p(x)`, x^1792 mod p(x)` */
- .octa 0x42316c00730206ad419a441956993a31
-
- /* x^1568 mod p(x)`, x^1600 mod p(x)`, x^1632 mod p(x)`, x^1664 mod p(x)` */
- .octa 0x543d5c543e65ddf9924752ba2b830011
-
- /* x^1440 mod p(x)`, x^1472 mod p(x)`, x^1504 mod p(x)`, x^1536 mod p(x)` */
- .octa 0x78e87aaf56767c9255bd7f9518e4a304
-
- /* x^1312 mod p(x)`, x^1344 mod p(x)`, x^1376 mod p(x)`, x^1408 mod p(x)` */
- .octa 0x8f68fcec1903da7f6d76739fe0553f1e
-
- /* x^1184 mod p(x)`, x^1216 mod p(x)`, x^1248 mod p(x)`, x^1280 mod p(x)` */
- .octa 0x3f4840246791d588c133722b1fe0b5c3
-
- /* x^1056 mod p(x)`, x^1088 mod p(x)`, x^1120 mod p(x)`, x^1152 mod p(x)` */
- .octa 0x34c96751b04de25a64b67ee0e55ef1f3
-
- /* x^928 mod p(x)`, x^960 mod p(x)`, x^992 mod p(x)`, x^1024 mod p(x)` */
- .octa 0x156c8e180b4a395b069db049b8fdb1e7
-
- /* x^800 mod p(x)`, x^832 mod p(x)`, x^864 mod p(x)`, x^896 mod p(x)` */
- .octa 0xe0b99ccbe661f7bea11bfaf3c9e90b9e
-
- /* x^672 mod p(x)`, x^704 mod p(x)`, x^736 mod p(x)`, x^768 mod p(x)` */
- .octa 0x041d37768cd75659817cdc5119b29a35
-
- /* x^544 mod p(x)`, x^576 mod p(x)`, x^608 mod p(x)`, x^640 mod p(x)` */
- .octa 0x3a0777818cfaa9651ce9d94b36c41f1c
-
- /* x^416 mod p(x)`, x^448 mod p(x)`, x^480 mod p(x)`, x^512 mod p(x)` */
- .octa 0x0e148e8252377a554f256efcb82be955
-
- /* x^288 mod p(x)`, x^320 mod p(x)`, x^352 mod p(x)`, x^384 mod p(x)` */
- .octa 0x9c25531d19e65ddeec1631edb2dea967
-
- /* x^160 mod p(x)`, x^192 mod p(x)`, x^224 mod p(x)`, x^256 mod p(x)` */
- .octa 0x790606ff9957c0a65d27e147510ac59a
-
- /* x^32 mod p(x)`, x^64 mod p(x)`, x^96 mod p(x)`, x^128 mod p(x)` */
- .octa 0x82f63b786ea2d55ca66805eb18b8ea18
-
-
-.barrett_constants:
- /* 33 bit reflected Barrett constant m - (4^32)/n */
- .octa 0x000000000000000000000000dea713f1 /* x^64 div p(x)` */
- /* 33 bit reflected Barrett constant n */
- .octa 0x00000000000000000000000105ec76f1
-#endif
-
-#endif /* __powerpc__ */
-
-#endif
diff --git a/storage/innobase/ut/crc32_power8/crc32_wrapper.c b/storage/innobase/ut/crc32_power8/crc32_wrapper.c
deleted file mode 100644
index d4c91371fa1..00000000000
--- a/storage/innobase/ut/crc32_power8/crc32_wrapper.c
+++ /dev/null
@@ -1,68 +0,0 @@
-#ifdef __powerpc__
-
-#define CRC_TABLE
-#include "crc32_constants.h"
-
-#define VMX_ALIGN 16
-#define VMX_ALIGN_MASK (VMX_ALIGN-1)
-
-#ifdef REFLECT
-static unsigned int crc32_align(unsigned int crc, unsigned char *p,
- unsigned long len)
-{
- while (len--)
- crc = crc_table[(crc ^ *p++) & 0xff] ^ (crc >> 8);
- return crc;
-}
-#else
-static unsigned int crc32_align(unsigned int crc, unsigned char *p,
- unsigned long len)
-{
- while (len--)
- crc = crc_table[((crc >> 24) ^ *p++) & 0xff] ^ (crc << 8);
- return crc;
-}
-#endif
-
-unsigned int __crc32_vpmsum(unsigned int crc, unsigned char *p,
- unsigned long len);
-
-unsigned int crc32_vpmsum(unsigned int crc, unsigned char *p,
- unsigned long len)
-{
- unsigned int prealign;
- unsigned int tail;
-
-#ifdef CRC_XOR
- crc ^= 0xffffffff;
-#endif
-
- if (len < VMX_ALIGN + VMX_ALIGN_MASK) {
- crc = crc32_align(crc, p, len);
- goto out;
- }
-
- if ((unsigned long)p & VMX_ALIGN_MASK) {
- prealign = VMX_ALIGN - ((unsigned long)p & VMX_ALIGN_MASK);
- crc = crc32_align(crc, p, prealign);
- len -= prealign;
- p += prealign;
- }
-
- crc = __crc32_vpmsum(crc, p, len & ~VMX_ALIGN_MASK);
-
- tail = len & VMX_ALIGN_MASK;
- if (tail) {
- p += len & ~VMX_ALIGN_MASK;
- crc = crc32_align(crc, p, tail);
- }
-
-out:
-#ifdef CRC_XOR
- crc ^= 0xffffffff;
-#endif
-
- return crc;
-}
-
-#endif /* __powerpc__ */
diff --git a/storage/innobase/ut/crc32_power8/ppc-opcode.h b/storage/innobase/ut/crc32_power8/ppc-opcode.h
deleted file mode 100644
index 5942bd4923a..00000000000
--- a/storage/innobase/ut/crc32_power8/ppc-opcode.h
+++ /dev/null
@@ -1,23 +0,0 @@
-#ifndef __OPCODES_H
-#define __OPCODES_H
-
-#define __PPC_RA(a) (((a) & 0x1f) << 16)
-#define __PPC_RB(b) (((b) & 0x1f) << 11)
-#define __PPC_XA(a) ((((a) & 0x1f) << 16) | (((a) & 0x20) >> 3))
-#define __PPC_XB(b) ((((b) & 0x1f) << 11) | (((b) & 0x20) >> 4))
-#define __PPC_XS(s) ((((s) & 0x1f) << 21) | (((s) & 0x20) >> 5))
-#define __PPC_XT(s) __PPC_XS(s)
-#define VSX_XX3(t, a, b) (__PPC_XT(t) | __PPC_XA(a) | __PPC_XB(b))
-#define VSX_XX1(s, a, b) (__PPC_XS(s) | __PPC_RA(a) | __PPC_RB(b))
-
-#define PPC_INST_VPMSUMW 0x10000488
-#define PPC_INST_VPMSUMD 0x100004c8
-#define PPC_INST_MFVSRD 0x7c000066
-#define PPC_INST_MTVSRD 0x7c000166
-
-#define VPMSUMW(t, a, b) .long PPC_INST_VPMSUMW | VSX_XX3((t), a, b)
-#define VPMSUMD(t, a, b) .long PPC_INST_VPMSUMD | VSX_XX3((t), a, b)
-#define MFVRD(a, t) .long PPC_INST_MFVSRD | VSX_XX1((t)+32, a, 0)
-#define MTVRD(t, a) .long PPC_INST_MTVSRD | VSX_XX1((t)+32, a, 0)
-
-#endif
diff --git a/storage/innobase/ut/ut0bh.cc b/storage/innobase/ut/ut0bh.cc
deleted file mode 100644
index a74d56f5094..00000000000
--- a/storage/innobase/ut/ut0bh.cc
+++ /dev/null
@@ -1,159 +0,0 @@
-/***************************************************************************//**
-
-Copyright (c) 2010, 2011, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/******************************************************************//**
-@file ut/ut0bh.cc
-Binary min-heap implementation.
-
-Created 2010-05-28 by Sunny Bains
-*******************************************************/
-
-#include "ut0bh.h"
-#include "ut0mem.h"
-
-#ifdef UNIV_NONINL
-#include "ut0bh.ic"
-#endif
-
-#include <string.h>
-
-/**********************************************************************//**
-Create a binary heap.
-@return a new binary heap */
-UNIV_INTERN
-ib_bh_t*
-ib_bh_create(
-/*=========*/
- ib_bh_cmp_t compare, /*!< in: comparator */
- ulint sizeof_elem, /*!< in: size of one element */
- ulint max_elems) /*!< in: max elements allowed */
-{
- ulint sz;
- ib_bh_t* ib_bh;
-
- sz = sizeof(*ib_bh) + (sizeof_elem * max_elems);
-
- ib_bh = (ib_bh_t*) ut_malloc(sz);
- memset(ib_bh, 0x0, sz);
-
- ib_bh->compare = compare;
- ib_bh->max_elems = max_elems;
- ib_bh->sizeof_elem = sizeof_elem;
-
- return(ib_bh);
-}
-
-/**********************************************************************//**
-Free a binary heap.
-@return a new binary heap */
-UNIV_INTERN
-void
-ib_bh_free(
-/*=======*/
- ib_bh_t* ib_bh) /*!< in/own: instance */
-{
- ut_free(ib_bh);
-}
-
-/**********************************************************************//**
-Add an element to the binary heap. Note: The element is copied.
-@return pointer to added element or NULL if full. */
-UNIV_INTERN
-void*
-ib_bh_push(
-/*=======*/
- ib_bh_t* ib_bh, /*!< in/out: instance */
- const void* elem) /*!< in: element to add */
-{
- void* ptr;
-
- if (ib_bh_is_full(ib_bh)) {
- return(NULL);
- } else if (ib_bh_is_empty(ib_bh)) {
- ++ib_bh->n_elems;
- return(ib_bh_set(ib_bh, 0, elem));
- } else {
- ulint i;
-
- i = ib_bh->n_elems;
-
- ++ib_bh->n_elems;
-
- for (ptr = ib_bh_get(ib_bh, i >> 1);
- i > 0 && ib_bh->compare(ptr, elem) > 0;
- i >>= 1, ptr = ib_bh_get(ib_bh, i >> 1)) {
-
- ib_bh_set(ib_bh, i, ptr);
- }
-
- ptr = ib_bh_set(ib_bh, i, elem);
- }
-
- return(ptr);
-}
-
-/**********************************************************************//**
-Remove the first element from the binary heap. */
-UNIV_INTERN
-void
-ib_bh_pop(
-/*======*/
- ib_bh_t* ib_bh) /*!< in/out: instance */
-{
- byte* ptr;
- byte* last;
- ulint parent = 0;
-
- if (ib_bh_is_empty(ib_bh)) {
- return;
- } else if (ib_bh_size(ib_bh) == 1) {
- --ib_bh->n_elems;
- return;
- }
-
- last = (byte*) ib_bh_last(ib_bh);
-
- /* Start from the child node */
- ptr = (byte*) ib_bh_get(ib_bh, 1);
-
- while (ptr < last) {
- /* If the "right" child node is < "left" child node */
- if (ib_bh->compare(ptr + ib_bh->sizeof_elem, ptr) < 0) {
- ptr += ib_bh->sizeof_elem;
- }
-
- if (ib_bh->compare(last, ptr) <= 0) {
- break;
- }
-
- ib_bh_set(ib_bh, parent, ptr);
-
- parent = (ptr - (byte*) ib_bh_first(ib_bh))
- / ib_bh->sizeof_elem;
-
- if ((parent << 1) >= ib_bh_size(ib_bh)) {
- break;
- }
-
- ptr = (byte*) ib_bh_get(ib_bh, parent << 1);
- }
-
- --ib_bh->n_elems;
-
- ib_bh_set(ib_bh, parent, last);
-}
diff --git a/storage/innobase/ut/ut0byte.cc b/storage/innobase/ut/ut0byte.cc
deleted file mode 100644
index 2a56bcc0bb4..00000000000
--- a/storage/innobase/ut/ut0byte.cc
+++ /dev/null
@@ -1,30 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1994, 2009, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/***************************************************************//**
-@file ut/ut0byte.cc
-Byte utilities
-
-Created 5/11/1994 Heikki Tuuri
-********************************************************************/
-
-#include "ut0byte.h"
-
-#ifdef UNIV_NONINL
-#include "ut0byte.ic"
-#endif
diff --git a/storage/innobase/ut/ut0crc32.cc b/storage/innobase/ut/ut0crc32.cc
index 4d2d311ff48..44b1c4b30b4 100644
--- a/storage/innobase/ut/ut0crc32.cc
+++ b/storage/innobase/ut/ut0crc32.cc
@@ -1,7 +1,8 @@
/*****************************************************************************
-Copyright (C) 2009, 2010 Facebook, Inc. All Rights Reserved.
-Copyright (c) 2011, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2009, 2010 Facebook, Inc. All Rights Reserved.
+Copyright (c) 2011, 2015, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2016, 2018, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -63,10 +64,9 @@ mysys/my_perf.c, contributed by Facebook under the following license.
/* The below CRC32 implementation is based on the implementation included with
* zlib with modifications to process 8 bytes at a time and using SSE 4.2
- * extentions when available. The polynomial constant has been changed to
+ * extensions when available. The polynomial constant has been changed to
* match the one used by SSE 4.2 and does not return the same value as the
- * version used by zlib. This implementation only supports 64-bit
- * little-endian processors. The original zlib copyright notice follows. */
+ * version used by zlib. The original zlib copyright notice follows. */
/* crc32.c -- compute the CRC-32 of a buf stream
* Copyright (C) 1995-2005 Mark Adler
@@ -79,27 +79,382 @@ mysys/my_perf.c, contributed by Facebook under the following license.
* factor of two increase in speed on a Power PC G4 (PPC7455) using gcc -O3.
*/
-#include "univ.i"
+// First include (the generated) my_config.h, to get correct platform defines.
+#include "my_config.h"
+#include <string.h>
+
#include "ut0crc32.h"
-#if defined(__linux__) && defined(__powerpc__)
-/* Used to detect at runtime if we have vpmsum instructions (PowerISA 2.07) */
-#include <sys/auxv.h>
-#include <bits/hwcap.h>
-#endif /* defined(__linux__) && defined(__powerpc__) */
+#ifdef _MSC_VER
+#include <intrin.h>
+#endif
+
+/** Pointer to CRC32 calculation function. */
+ut_crc32_func_t ut_crc32;
+
+#ifdef INNODB_BUG_ENDIAN_CRC32
+/** Pointer to CRC32 calculation function, which uses big-endian byte order
+when converting byte strings to integers internally. */
+ut_crc32_func_t ut_crc32_legacy_big_endian;
+#endif /* INNODB_BUG_ENDIAN_CRC32 */
+
+/** Text description of CRC32 implementation */
+const char* ut_crc32_implementation;
+
+/** Swap the byte order of an 8 byte integer.
+@param[in] i 8-byte integer
+@return 8-byte integer */
+inline
+uint64_t
+ut_crc32_swap_byteorder(
+ uint64_t i)
+{
+ return(i << 56
+ | (i & 0x000000000000FF00ULL) << 40
+ | (i & 0x0000000000FF0000ULL) << 24
+ | (i & 0x00000000FF000000ULL) << 8
+ | (i & 0x000000FF00000000ULL) >> 8
+ | (i & 0x0000FF0000000000ULL) >> 24
+ | (i & 0x00FF000000000000ULL) >> 40
+ | i >> 56);
+}
-#include <string.h>
+/* CRC32 hardware implementation. */
+
+#ifdef HAVE_CRC32_VPMSUM
+extern "C" {
+unsigned int crc32c_vpmsum(unsigned int crc, const unsigned char *p, unsigned long len);
+};
+UNIV_INLINE
+ib_uint32_t
+ut_crc32_power8(
+/*===========*/
+ const byte* buf, /*!< in: data over which to calculate CRC32 */
+ ulint len) /*!< in: data length */
+{
+ return crc32c_vpmsum(0, buf, len);
+}
+#endif
+
+#if (defined(__GNUC__) && defined(__x86_64__)) || defined(_MSC_VER)
+/********************************************************************//**
+Fetches CPU info */
+static
+void
+ut_cpuid(
+/*=====*/
+ uint32_t vend[3], /*!< out: CPU vendor */
+ uint32_t* model, /*!< out: CPU model */
+ uint32_t* family, /*!< out: CPU family */
+ uint32_t* stepping, /*!< out: CPU stepping */
+ uint32_t* features_ecx, /*!< out: CPU features ecx */
+ uint32_t* features_edx) /*!< out: CPU features edx */
+{
+ uint32_t sig;
+#ifdef _MSC_VER
+ int data[4];
+ __cpuid(data, 0);
+ /* ebx */
+ vend[0] = data[1];
+ /* edx */
+ vend[1] = data[3];
+ /* ecx */
+ vend[2] = data[2];
+
+ __cpuid(data, 1);
+ /* eax */
+ sig = data[0];
+ /* ecx */
+ *features_ecx = data[2];
+ /* edx */
+ *features_edx = data[3];
+#else
+ asm("cpuid" : "=b" (vend[0]), "=c" (vend[2]), "=d" (vend[1]) : "a" (0));
+ asm("cpuid" : "=a" (sig), "=c" (*features_ecx), "=d" (*features_edx)
+ : "a" (1)
+ : "ebx");
+#endif
+
+ *model = ((sig >> 4) & 0xF);
+ *family = ((sig >> 8) & 0xF);
+ *stepping = (sig & 0xF);
+
+ if (memcmp(vend, "GenuineIntel", 12) == 0
+ || (memcmp(vend, "AuthenticAMD", 12) == 0 && *family == 0xF)) {
+
+ *model += (((sig >> 16) & 0xF) << 4);
+ *family += ((sig >> 20) & 0xFF);
+ }
+}
+
+/** Calculate CRC32 over 8-bit data using a hardware/CPU instruction.
+@param[in,out] crc crc32 checksum so far when this function is called,
+when the function ends it will contain the new checksum
+@param[in,out] data data to be checksummed, the pointer will be advanced
+with 1 byte
+@param[in,out] len remaining bytes, it will be decremented with 1 */
+inline
+void
+ut_crc32_8_hw(
+ uint32_t* crc,
+ const byte** data,
+ ulint* len)
+{
+#ifdef _MSC_VER
+ *crc = _mm_crc32_u8(*crc, (*data)[0]);
+#else
+ asm("crc32b %1, %0"
+ /* output operands */
+ : "+r" (*crc)
+ /* input operands */
+ : "rm" ((*data)[0]));
+#endif
+
+ (*data)++;
+ (*len)--;
+}
+
+/** Calculate CRC32 over a 64-bit integer using a hardware/CPU instruction.
+@param[in] crc crc32 checksum so far
+@param[in] data data to be checksummed
+@return resulting checksum of crc + crc(data) */
+inline
+uint32_t
+ut_crc32_64_low_hw(
+ uint32_t crc,
+ uint64_t data)
+{
+ uint64_t crc_64bit = crc;
+#ifdef _MSC_VER
+#ifdef _M_X64
+ crc_64bit = _mm_crc32_u64(crc_64bit, data);
+#elif defined(_M_IX86)
+ crc = _mm_crc32_u32(crc, static_cast<uint32_t>(data));
+ crc_64bit = _mm_crc32_u32(crc, static_cast<uint32_t>(data >> 32));
+#else
+#error Not Supported processors type.
+#endif
+#else
+ asm("crc32q %1, %0"
+ /* output operands */
+ : "+r" (crc_64bit)
+ /* input operands */
+ : "rm" (data));
+#endif
+
+ return(static_cast<uint32_t>(crc_64bit));
+}
+
+/** Calculate CRC32 over 64-bit byte string using a hardware/CPU instruction.
+@param[in,out] crc crc32 checksum so far when this function is called,
+when the function ends it will contain the new checksum
+@param[in,out] data data to be checksummed, the pointer will be advanced
+with 8 bytes
+@param[in,out] len remaining bytes, it will be decremented with 8 */
+inline
+void
+ut_crc32_64_hw(
+ uint32_t* crc,
+ const byte** data,
+ ulint* len)
+{
+ uint64_t data_int = *reinterpret_cast<const uint64_t*>(*data);
+
+#ifdef WORDS_BIGENDIAN
+ /* Currently we only support x86_64 (little endian) CPUs. In case
+ some big endian CPU supports a CRC32 instruction, then maybe we will
+ need a byte order swap here. */
+#error Dont know how to handle big endian CPUs
+ /*
+ data_int = ut_crc32_swap_byteorder(data_int);
+ */
+#endif /* WORDS_BIGENDIAN */
+
+ *crc = ut_crc32_64_low_hw(*crc, data_int);
+
+ *data += 8;
+ *len -= 8;
+}
+
+#ifdef INNODB_BUG_ENDIAN_CRC32
+/** Calculate CRC32 over 64-bit byte string using a hardware/CPU instruction.
+The byte string is converted to a 64-bit integer using big endian byte order.
+@param[in,out] crc crc32 checksum so far when this function is called,
+when the function ends it will contain the new checksum
+@param[in,out] data data to be checksummed, the pointer will be advanced
+with 8 bytes
+@param[in,out] len remaining bytes, it will be decremented with 8 */
+inline
+void
+ut_crc32_64_legacy_big_endian_hw(
+ uint32_t* crc,
+ const byte** data,
+ ulint* len)
+{
+ uint64_t data_int = *reinterpret_cast<const uint64_t*>(*data);
+
+#ifndef WORDS_BIGENDIAN
+ data_int = ut_crc32_swap_byteorder(data_int);
+#else
+ /* Currently we only support x86_64 (little endian) CPUs. In case
+ some big endian CPU supports a CRC32 instruction, then maybe we will
+ NOT need a byte order swap here. */
+#error Dont know how to handle big endian CPUs
+#endif /* WORDS_BIGENDIAN */
+
+ *crc = ut_crc32_64_low_hw(*crc, data_int);
+
+ *data += 8;
+ *len -= 8;
+}
+#endif /* INNODB_BUG_ENDIAN_CRC32 */
+
+/** Calculates CRC32 using hardware/CPU instructions.
+@param[in] buf data over which to calculate CRC32
+@param[in] len data length
+@return CRC-32C (polynomial 0x11EDC6F41) */
+uint32_t
+ut_crc32_hw(
+ const byte* buf,
+ ulint len)
+{
+ uint32_t crc = 0xFFFFFFFFU;
+
+ /* Calculate byte-by-byte up to an 8-byte aligned address. After
+ this consume the input 8-bytes at a time. */
+ while (len > 0 && (reinterpret_cast<uintptr_t>(buf) & 7) != 0) {
+ ut_crc32_8_hw(&crc, &buf, &len);
+ }
+
+ /* Perf testing
+ ./unittest/gunit/innodb/merge_innodb_tests-t --gtest_filter=ut0crc32.perf
+ on CPU "Intel(R) Core(TM) i7-4770 CPU @ 3.40GHz"
+ with different N in "while (len >= N) {" shows:
+ N=16
+ 2.867254 sec
+ 2.866860 sec
+ 2.867973 sec
+
+ N=32
+ 2.715725 sec
+ 2.713008 sec
+ 2.712520 sec
+ (5.36% speedup over N=16)
+
+ N=64
+ 2.634140 sec
+ 2.636558 sec
+ 2.636488 sec
+ (2.88% speedup over N=32)
+
+ N=128
+ 2.599534 sec
+ 2.599919 sec
+ 2.598035 sec
+ (1.39% speedup over N=64)
+
+ N=256
+ 2.576993 sec
+ 2.576748 sec
+ 2.575700 sec
+ (0.87% speedup over N=128)
+
+ N=512
+ 2.693928 sec
+ 2.691663 sec
+ 2.692142 sec
+ (4.51% slowdown over N=256)
+ */
+ while (len >= 128) {
+ /* This call is repeated 16 times. 16 * 8 = 128. */
+ ut_crc32_64_hw(&crc, &buf, &len);
+ ut_crc32_64_hw(&crc, &buf, &len);
+ ut_crc32_64_hw(&crc, &buf, &len);
+ ut_crc32_64_hw(&crc, &buf, &len);
+ ut_crc32_64_hw(&crc, &buf, &len);
+ ut_crc32_64_hw(&crc, &buf, &len);
+ ut_crc32_64_hw(&crc, &buf, &len);
+ ut_crc32_64_hw(&crc, &buf, &len);
+ ut_crc32_64_hw(&crc, &buf, &len);
+ ut_crc32_64_hw(&crc, &buf, &len);
+ ut_crc32_64_hw(&crc, &buf, &len);
+ ut_crc32_64_hw(&crc, &buf, &len);
+ ut_crc32_64_hw(&crc, &buf, &len);
+ ut_crc32_64_hw(&crc, &buf, &len);
+ ut_crc32_64_hw(&crc, &buf, &len);
+ ut_crc32_64_hw(&crc, &buf, &len);
+ }
+
+ while (len >= 8) {
+ ut_crc32_64_hw(&crc, &buf, &len);
+ }
+
+ while (len > 0) {
+ ut_crc32_8_hw(&crc, &buf, &len);
+ }
+
+ return(~crc);
+}
+
+# ifdef INNODB_BUG_ENDIAN_CRC32
+/** Calculates CRC32 using hardware/CPU instructions.
+This function uses big endian byte ordering when converting byte sequence to
+integers.
+@param[in] buf data over which to calculate CRC32
+@param[in] len data length
+@return CRC-32C (polynomial 0x11EDC6F41) */
+uint32_t
+ut_crc32_legacy_big_endian_hw(
+ const byte* buf,
+ ulint len)
+{
+ uint32_t crc = 0xFFFFFFFFU;
-ib_ut_crc32_t ut_crc32;
+ /* Calculate byte-by-byte up to an 8-byte aligned address. After
+ this consume the input 8-bytes at a time. */
+ while (len > 0 && (reinterpret_cast<uintptr_t>(buf) & 7) != 0) {
+ ut_crc32_8_hw(&crc, &buf, &len);
+ }
+
+ while (len >= 128) {
+ /* This call is repeated 16 times. 16 * 8 = 128. */
+ ut_crc32_64_legacy_big_endian_hw(&crc, &buf, &len);
+ ut_crc32_64_legacy_big_endian_hw(&crc, &buf, &len);
+ ut_crc32_64_legacy_big_endian_hw(&crc, &buf, &len);
+ ut_crc32_64_legacy_big_endian_hw(&crc, &buf, &len);
+ ut_crc32_64_legacy_big_endian_hw(&crc, &buf, &len);
+ ut_crc32_64_legacy_big_endian_hw(&crc, &buf, &len);
+ ut_crc32_64_legacy_big_endian_hw(&crc, &buf, &len);
+ ut_crc32_64_legacy_big_endian_hw(&crc, &buf, &len);
+ ut_crc32_64_legacy_big_endian_hw(&crc, &buf, &len);
+ ut_crc32_64_legacy_big_endian_hw(&crc, &buf, &len);
+ ut_crc32_64_legacy_big_endian_hw(&crc, &buf, &len);
+ ut_crc32_64_legacy_big_endian_hw(&crc, &buf, &len);
+ ut_crc32_64_legacy_big_endian_hw(&crc, &buf, &len);
+ ut_crc32_64_legacy_big_endian_hw(&crc, &buf, &len);
+ ut_crc32_64_legacy_big_endian_hw(&crc, &buf, &len);
+ ut_crc32_64_legacy_big_endian_hw(&crc, &buf, &len);
+ }
+
+ while (len >= 8) {
+ ut_crc32_64_legacy_big_endian_hw(&crc, &buf, &len);
+ }
+
+ while (len > 0) {
+ ut_crc32_8_hw(&crc, &buf, &len);
+ }
+
+ return(~crc);
+}
+# endif /* INNODB_BUG_ENDIAN_CRC32 */
+#endif /* defined(__GNUC__) && defined(__x86_64__) || (_WIN64) */
+
+/* CRC32 software implementation. */
/* Precalculated table used to generate the CRC32 if the CPU does not
have support for it */
-static ib_uint32_t ut_crc32_slice8_table[8][256];
-static ibool ut_crc32_slice8_table_initialized = FALSE;
-
-/* Flag that tells whether the CPU supports CRC32 or not */
-UNIV_INTERN bool ut_crc32_sse2_enabled = false;
-UNIV_INTERN bool ut_crc32_power8_enabled = false;
+static uint32_t ut_crc32_slice8_table[8][256];
+static bool ut_crc32_slice8_table_initialized = false;
/********************************************************************//**
Initializes the table that is used to generate the CRC32 if the CPU does
@@ -110,10 +465,10 @@ ut_crc32_slice8_table_init()
/*========================*/
{
/* bit-reversed poly 0x1EDC6F41 (from SSE42 crc32 instruction) */
- static const ib_uint32_t poly = 0x82f63b78;
- ib_uint32_t n;
- ib_uint32_t k;
- ib_uint32_t c;
+ static const uint32_t poly = 0x82f63b78;
+ uint32_t n;
+ uint32_t k;
+ uint32_t c;
for (n = 0; n < 256; n++) {
c = n;
@@ -131,206 +486,247 @@ ut_crc32_slice8_table_init()
}
}
- ut_crc32_slice8_table_initialized = TRUE;
+ ut_crc32_slice8_table_initialized = true;
}
-#if defined(__GNUC__) && defined(__x86_64__)
-/********************************************************************//**
-Fetches CPU info */
-static
+/** Calculate CRC32 over 8-bit data using a software implementation.
+@param[in,out] crc crc32 checksum so far when this function is called,
+when the function ends it will contain the new checksum
+@param[in,out] data data to be checksummed, the pointer will be advanced
+with 1 byte
+@param[in,out] len remaining bytes, it will be decremented with 1 */
+inline
void
-ut_cpuid(
-/*=====*/
- ib_uint32_t vend[3], /*!< out: CPU vendor */
- ib_uint32_t* model, /*!< out: CPU model */
- ib_uint32_t* family, /*!< out: CPU family */
- ib_uint32_t* stepping, /*!< out: CPU stepping */
- ib_uint32_t* features_ecx, /*!< out: CPU features ecx */
- ib_uint32_t* features_edx) /*!< out: CPU features edx */
+ut_crc32_8_sw(
+ uint32_t* crc,
+ const byte** data,
+ ulint* len)
{
- ib_uint32_t sig;
- asm("cpuid" : "=b" (vend[0]), "=c" (vend[2]), "=d" (vend[1]) : "a" (0));
- asm("cpuid" : "=a" (sig), "=c" (*features_ecx), "=d" (*features_edx)
- : "a" (1)
- : "ebx");
+ const uint8_t i = (*crc ^ (*data)[0]) & 0xFF;
- *model = ((sig >> 4) & 0xF);
- *family = ((sig >> 8) & 0xF);
- *stepping = (sig & 0xF);
+ *crc = (*crc >> 8) ^ ut_crc32_slice8_table[0][i];
- if (memcmp(vend, "GenuineIntel", 12) == 0
- || (memcmp(vend, "AuthenticAMD", 12) == 0 && *family == 0xF)) {
+ (*data)++;
+ (*len)--;
+}
- *model += (((sig >> 16) & 0xF) << 4);
- *family += ((sig >> 20) & 0xFF);
- }
+/** Calculate CRC32 over a 64-bit integer using a software implementation.
+@param[in] crc crc32 checksum so far
+@param[in] data data to be checksummed
+@return resulting checksum of crc + crc(data) */
+inline
+uint32_t
+ut_crc32_64_low_sw(
+ uint32_t crc,
+ uint64_t data)
+{
+ const uint64_t i = crc ^ data;
+
+ return(
+ ut_crc32_slice8_table[7][(i ) & 0xFF] ^
+ ut_crc32_slice8_table[6][(i >> 8) & 0xFF] ^
+ ut_crc32_slice8_table[5][(i >> 16) & 0xFF] ^
+ ut_crc32_slice8_table[4][(i >> 24) & 0xFF] ^
+ ut_crc32_slice8_table[3][(i >> 32) & 0xFF] ^
+ ut_crc32_slice8_table[2][(i >> 40) & 0xFF] ^
+ ut_crc32_slice8_table[1][(i >> 48) & 0xFF] ^
+ ut_crc32_slice8_table[0][(i >> 56)]
+ );
}
-/* opcodes taken from objdump of "crc32b (%%rdx), %%rcx"
-for RHEL4 support (GCC 3 doesn't support this instruction) */
-#define ut_crc32_sse42_byte \
- asm(".byte 0xf2, 0x48, 0x0f, 0x38, 0xf0, 0x0a" \
- : "=c"(crc) : "c"(crc), "d"(buf)); \
- len--, buf++
-
-/* opcodes taken from objdump of "crc32q (%%rdx), %%rcx"
-for RHEL4 support (GCC 3 doesn't support this instruction) */
-#define ut_crc32_sse42_quadword \
- asm(".byte 0xf2, 0x48, 0x0f, 0x38, 0xf1, 0x0a" \
- : "=c"(crc) : "c"(crc), "d"(buf)); \
- len -= 8, buf += 8
-#endif /* defined(__GNUC__) && defined(__x86_64__) */
-
-#if defined(__powerpc__)
-extern "C" {
-unsigned int crc32_vpmsum(unsigned int crc, const unsigned char *p, unsigned long len);
-};
-#endif /* __powerpc__ */
+/** Calculate CRC32 over 64-bit byte string using a software implementation.
+@param[in,out] crc crc32 checksum so far when this function is called,
+when the function ends it will contain the new checksum
+@param[in,out] data data to be checksummed, the pointer will be advanced
+with 8 bytes
+@param[in,out] len remaining bytes, it will be decremented with 8 */
+inline
+void
+ut_crc32_64_sw(
+ uint32_t* crc,
+ const byte** data,
+ ulint* len)
+{
+ uint64_t data_int = *reinterpret_cast<const uint64_t*>(*data);
-UNIV_INLINE
-ib_uint32_t
-ut_crc32_power8(
-/*===========*/
- const byte* buf, /*!< in: data over which to calculate CRC32 */
- ulint len) /*!< in: data length */
+#ifdef WORDS_BIGENDIAN
+ data_int = ut_crc32_swap_byteorder(data_int);
+#endif /* WORDS_BIGENDIAN */
+
+ *crc = ut_crc32_64_low_sw(*crc, data_int);
+
+ *data += 8;
+ *len -= 8;
+}
+
+#ifdef INNODB_BUG_ENDIAN_CRC32
+/** Calculate CRC32 over 64-bit byte string using a software implementation.
+The byte string is converted to a 64-bit integer using big endian byte order.
+@param[in,out] crc crc32 checksum so far when this function is called,
+when the function ends it will contain the new checksum
+@param[in,out] data data to be checksummed, the pointer will be advanced
+with 8 bytes
+@param[in,out] len remaining bytes, it will be decremented with 8 */
+inline
+void
+ut_crc32_64_legacy_big_endian_sw(
+ uint32_t* crc,
+ const byte** data,
+ ulint* len)
{
-#if defined(__powerpc__) && !defined(WORDS_BIGENDIAN)
- return crc32_vpmsum(0, buf, len);
-#else
- ut_error;
- /* silence compiler warning about unused parameters */
- return((ib_uint32_t) buf[len]);
-#endif /* __powerpc__ */
+ uint64_t data_int = *reinterpret_cast<const uint64_t*>(*data);
+
+#ifndef WORDS_BIGENDIAN
+ data_int = ut_crc32_swap_byteorder(data_int);
+#endif /* WORDS_BIGENDIAN */
+
+ *crc = ut_crc32_64_low_sw(*crc, data_int);
+
+ *data += 8;
+ *len -= 8;
}
+#endif /* INNODB_BUG_ENDIAN_CRC32 */
-/********************************************************************//**
-Calculates CRC32 using CPU instructions.
+/** Calculates CRC32 in software, without using CPU instructions.
+@param[in] buf data over which to calculate CRC32
+@param[in] len data length
@return CRC-32C (polynomial 0x11EDC6F41) */
-UNIV_INLINE
-ib_uint32_t
-ut_crc32_sse42(
-/*===========*/
- const byte* buf, /*!< in: data over which to calculate CRC32 */
- ulint len) /*!< in: data length */
+uint32_t
+ut_crc32_sw(
+ const byte* buf,
+ ulint len)
{
-#if defined(__GNUC__) && defined(__x86_64__)
- ib_uint64_t crc = (ib_uint32_t) (-1);
+ uint32_t crc = 0xFFFFFFFFU;
- ut_a(ut_crc32_sse2_enabled);
+ ut_a(ut_crc32_slice8_table_initialized);
- while (len && ((ulint) buf & 7)) {
- ut_crc32_sse42_byte;
+ /* Calculate byte-by-byte up to an 8-byte aligned address. After
+ this consume the input 8-bytes at a time. */
+ while (len > 0 && (reinterpret_cast<uintptr_t>(buf) & 7) != 0) {
+ ut_crc32_8_sw(&crc, &buf, &len);
}
- while (len >= 32) {
- ut_crc32_sse42_quadword;
- ut_crc32_sse42_quadword;
- ut_crc32_sse42_quadword;
- ut_crc32_sse42_quadword;
+ while (len >= 128) {
+ /* This call is repeated 16 times. 16 * 8 = 128. */
+ ut_crc32_64_sw(&crc, &buf, &len);
+ ut_crc32_64_sw(&crc, &buf, &len);
+ ut_crc32_64_sw(&crc, &buf, &len);
+ ut_crc32_64_sw(&crc, &buf, &len);
+ ut_crc32_64_sw(&crc, &buf, &len);
+ ut_crc32_64_sw(&crc, &buf, &len);
+ ut_crc32_64_sw(&crc, &buf, &len);
+ ut_crc32_64_sw(&crc, &buf, &len);
+ ut_crc32_64_sw(&crc, &buf, &len);
+ ut_crc32_64_sw(&crc, &buf, &len);
+ ut_crc32_64_sw(&crc, &buf, &len);
+ ut_crc32_64_sw(&crc, &buf, &len);
+ ut_crc32_64_sw(&crc, &buf, &len);
+ ut_crc32_64_sw(&crc, &buf, &len);
+ ut_crc32_64_sw(&crc, &buf, &len);
+ ut_crc32_64_sw(&crc, &buf, &len);
}
while (len >= 8) {
- ut_crc32_sse42_quadword;
+ ut_crc32_64_sw(&crc, &buf, &len);
}
- while (len) {
- ut_crc32_sse42_byte;
+ while (len > 0) {
+ ut_crc32_8_sw(&crc, &buf, &len);
}
- return((ib_uint32_t) ((~crc) & 0xFFFFFFFF));
-#else
- ut_error;
- /* silence compiler warning about unused parameters */
- return((ib_uint32_t) buf[len]);
-#endif /* defined(__GNUC__) && defined(__x86_64__) */
+ return(~crc);
}
-#define ut_crc32_slice8_byte \
- crc = (crc >> 8) ^ ut_crc32_slice8_table[0][(crc ^ *buf++) & 0xFF]; \
- len--
-
-#define ut_crc32_slice8_quadword \
- crc ^= *(ib_uint64_t*) buf; \
- crc = ut_crc32_slice8_table[7][(crc ) & 0xFF] ^ \
- ut_crc32_slice8_table[6][(crc >> 8) & 0xFF] ^ \
- ut_crc32_slice8_table[5][(crc >> 16) & 0xFF] ^ \
- ut_crc32_slice8_table[4][(crc >> 24) & 0xFF] ^ \
- ut_crc32_slice8_table[3][(crc >> 32) & 0xFF] ^ \
- ut_crc32_slice8_table[2][(crc >> 40) & 0xFF] ^ \
- ut_crc32_slice8_table[1][(crc >> 48) & 0xFF] ^ \
- ut_crc32_slice8_table[0][(crc >> 56)]; \
- len -= 8, buf += 8
-
-/********************************************************************//**
-Calculates CRC32 manually.
+#ifdef INNODB_BUG_ENDIAN_CRC32
+/** Calculates CRC32 in software, without using CPU instructions.
+This function uses big endian byte ordering when converting byte sequence to
+integers.
+@param[in] buf data over which to calculate CRC32
+@param[in] len data length
@return CRC-32C (polynomial 0x11EDC6F41) */
-UNIV_INLINE
-ib_uint32_t
-ut_crc32_slice8(
-/*============*/
- const byte* buf, /*!< in: data over which to calculate CRC32 */
- ulint len) /*!< in: data length */
+uint32_t
+ut_crc32_legacy_big_endian_sw(
+ const byte* buf,
+ ulint len)
{
- ib_uint64_t crc = (ib_uint32_t) (-1);
+ uint32_t crc = 0xFFFFFFFFU;
ut_a(ut_crc32_slice8_table_initialized);
- while (len && ((ulint) buf & 7)) {
- ut_crc32_slice8_byte;
+ /* Calculate byte-by-byte up to an 8-byte aligned address. After
+ this consume the input 8-bytes at a time. */
+ while (len > 0 && (reinterpret_cast<uintptr_t>(buf) & 7) != 0) {
+ ut_crc32_8_sw(&crc, &buf, &len);
}
- while (len >= 32) {
- ut_crc32_slice8_quadword;
- ut_crc32_slice8_quadword;
- ut_crc32_slice8_quadword;
- ut_crc32_slice8_quadword;
+ while (len >= 128) {
+ /* This call is repeated 16 times. 16 * 8 = 128. */
+ ut_crc32_64_legacy_big_endian_sw(&crc, &buf, &len);
+ ut_crc32_64_legacy_big_endian_sw(&crc, &buf, &len);
+ ut_crc32_64_legacy_big_endian_sw(&crc, &buf, &len);
+ ut_crc32_64_legacy_big_endian_sw(&crc, &buf, &len);
+ ut_crc32_64_legacy_big_endian_sw(&crc, &buf, &len);
+ ut_crc32_64_legacy_big_endian_sw(&crc, &buf, &len);
+ ut_crc32_64_legacy_big_endian_sw(&crc, &buf, &len);
+ ut_crc32_64_legacy_big_endian_sw(&crc, &buf, &len);
+ ut_crc32_64_legacy_big_endian_sw(&crc, &buf, &len);
+ ut_crc32_64_legacy_big_endian_sw(&crc, &buf, &len);
+ ut_crc32_64_legacy_big_endian_sw(&crc, &buf, &len);
+ ut_crc32_64_legacy_big_endian_sw(&crc, &buf, &len);
+ ut_crc32_64_legacy_big_endian_sw(&crc, &buf, &len);
+ ut_crc32_64_legacy_big_endian_sw(&crc, &buf, &len);
+ ut_crc32_64_legacy_big_endian_sw(&crc, &buf, &len);
+ ut_crc32_64_legacy_big_endian_sw(&crc, &buf, &len);
}
while (len >= 8) {
- ut_crc32_slice8_quadword;
+ ut_crc32_64_legacy_big_endian_sw(&crc, &buf, &len);
}
- while (len) {
- ut_crc32_slice8_byte;
+ while (len > 0) {
+ ut_crc32_8_sw(&crc, &buf, &len);
}
- return((ib_uint32_t) ((~crc) & 0xFFFFFFFF));
+ return(~crc);
}
+#endif /* INNODB_BUG_ENDIAN_CRC32 */
/********************************************************************//**
-Initializes the data structures used by ut_crc32(). Does not do any
+Initializes the data structures used by ut_crc32*(). Does not do any
allocations, would not hurt if called twice, but would be pointless. */
-UNIV_INTERN
void
ut_crc32_init()
/*===========*/
{
-#if defined(__GNUC__) && defined(__x86_64__)
- ib_uint32_t vend[3];
- ib_uint32_t model;
- ib_uint32_t family;
- ib_uint32_t stepping;
- ib_uint32_t features_ecx;
- ib_uint32_t features_edx;
+ ut_crc32_slice8_table_init();
+ ut_crc32 = ut_crc32_sw;
+#ifdef INNODB_BUG_ENDIAN_CRC32
+ ut_crc32_legacy_big_endian = ut_crc32_legacy_big_endian_sw;
+#endif /* INNODB_BUG_ENDIAN_CRC32 */
+ ut_crc32_implementation = "Using generic crc32 instructions";
+
+#if (defined(__GNUC__) && defined(__x86_64__)) || defined(_MSC_VER)
+ uint32_t vend[3];
+ uint32_t model;
+ uint32_t family;
+ uint32_t stepping;
+ uint32_t features_ecx;
+ uint32_t features_edx;
ut_cpuid(vend, &model, &family, &stepping,
&features_ecx, &features_edx);
- ut_crc32_sse2_enabled = (features_ecx >> 20) & 1;
-#endif /* defined(__GNUC__) && defined(__x86_64__) */
-
-#if defined(__linux__) && defined(__powerpc__) && defined(AT_HWCAP2) \
- && !defined(WORDS_BIGENDIAN)
- if (getauxval(AT_HWCAP2) & PPC_FEATURE2_ARCH_2_07)
- ut_crc32_power8_enabled = true;
-#endif /* defined(__linux__) && defined(__powerpc__) */
-
- if (ut_crc32_sse2_enabled) {
- ut_crc32 = ut_crc32_sse42;
- } else if (ut_crc32_power8_enabled) {
- ut_crc32 = ut_crc32_power8;
- } else {
- ut_crc32_slice8_table_init();
- ut_crc32 = ut_crc32_slice8;
+ if (features_ecx & 1 << 20) {
+ ut_crc32 = ut_crc32_hw;
+#ifdef INNODB_BUG_ENDIAN_CRC32
+ ut_crc32_legacy_big_endian = ut_crc32_legacy_big_endian_hw;
+#endif /* INNODB_BUG_ENDIAN_CRC32 */
+ ut_crc32_implementation = "Using SSE2 crc32 instructions";
}
+
+#elif defined(HAVE_CRC32_VPMSUM)
+ ut_crc32 = ut_crc32_power8;
+ ut_crc32_implementation = "Using POWER8 crc32 instructions";
+#endif
+
}
diff --git a/storage/innobase/ut/ut0dbg.cc b/storage/innobase/ut/ut0dbg.cc
index fa766de28c0..fc51cce9500 100644
--- a/storage/innobase/ut/ut0dbg.cc
+++ b/storage/innobase/ut/ut0dbg.cc
@@ -1,6 +1,7 @@
/*****************************************************************************
-Copyright (c) 1994, 2009, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1994, 2016, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2017, 2018, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -25,37 +26,20 @@ Created 1/30/1994 Heikki Tuuri
#include "univ.i"
#include "ut0dbg.h"
-#ifndef UNIV_HOTBACKUP
-# include "ha_prototypes.h"
-#endif /* !UNIV_HOTBACKUP */
-
-#if defined(__GNUC__) && (__GNUC__ > 2)
-#else
-/** This is used to eliminate compiler warnings */
-UNIV_INTERN ulint ut_dbg_zero = 0;
-#endif
/*************************************************************//**
Report a failed assertion. */
-UNIV_INTERN
+ATTRIBUTE_NORETURN
void
ut_dbg_assertion_failed(
/*====================*/
const char* expr, /*!< in: the failed assertion (optional) */
const char* file, /*!< in: source file containing the assertion */
- ulint line) /*!< in: line number of the assertion */
+ unsigned line) /*!< in: line number of the assertion */
{
ut_print_timestamp(stderr);
-#ifdef UNIV_HOTBACKUP
- fprintf(stderr, " InnoDB: Assertion failure in file %s line %lu\n",
+ fprintf(stderr, " InnoDB: Assertion failure in file %s line %u\n",
file, line);
-#else /* UNIV_HOTBACKUP */
- fprintf(stderr,
- " InnoDB: Assertion failure in thread %lu"
- " in file %s line %lu\n",
- os_thread_pf(os_thread_get_curr_id()),
- innobase_basename(file), line);
-#endif /* UNIV_HOTBACKUP */
if (expr) {
fprintf(stderr,
"InnoDB: Failing assertion: %s\n", expr);
@@ -68,72 +52,10 @@ ut_dbg_assertion_failed(
" or crashes, even\n"
"InnoDB: immediately after the mysqld startup, there may be\n"
"InnoDB: corruption in the InnoDB tablespace. Please refer to\n"
- "InnoDB: " REFMAN "forcing-innodb-recovery.html\n"
+ "InnoDB: https://mariadb.com/kb/en/library/innodb-recovery-modes/\n"
"InnoDB: about forcing recovery.\n", stderr);
-}
-
-#ifdef UNIV_COMPILE_TEST_FUNCS
-
-#include <sys/types.h>
-#include <sys/time.h>
-#include <sys/resource.h>
-
-#include <unistd.h>
-
-#ifndef timersub
-#define timersub(a, b, r) \
- do { \
- (r)->tv_sec = (a)->tv_sec - (b)->tv_sec; \
- (r)->tv_usec = (a)->tv_usec - (b)->tv_usec; \
- if ((r)->tv_usec < 0) { \
- (r)->tv_sec--; \
- (r)->tv_usec += 1000000; \
- } \
- } while (0)
-#endif /* timersub */
-
-/*******************************************************************//**
-Resets a speedo (records the current time in it). */
-UNIV_INTERN
-void
-speedo_reset(
-/*=========*/
- speedo_t* speedo) /*!< out: speedo */
-{
- gettimeofday(&speedo->tv, NULL);
-
- getrusage(RUSAGE_SELF, &speedo->ru);
-}
-
-/*******************************************************************//**
-Shows the time elapsed and usage statistics since the last reset of a
-speedo. */
-UNIV_INTERN
-void
-speedo_show(
-/*========*/
- const speedo_t* speedo) /*!< in: speedo */
-{
- struct rusage ru_now;
- struct timeval tv_now;
- struct timeval tv_diff;
-
- getrusage(RUSAGE_SELF, &ru_now);
- gettimeofday(&tv_now, NULL);
-
-#define PRINT_TIMEVAL(prefix, tvp) \
- fprintf(stderr, "%s% 5ld.%06ld sec\n", \
- prefix, (tvp)->tv_sec, (tvp)->tv_usec)
-
- timersub(&tv_now, &speedo->tv, &tv_diff);
- PRINT_TIMEVAL("real", &tv_diff);
-
- timersub(&ru_now.ru_utime, &speedo->ru.ru_utime, &tv_diff);
- PRINT_TIMEVAL("user", &tv_diff);
-
- timersub(&ru_now.ru_stime, &speedo->ru.ru_stime, &tv_diff);
- PRINT_TIMEVAL("sys ", &tv_diff);
+ fflush(stderr);
+ fflush(stdout);
+ abort();
}
-
-#endif /* UNIV_COMPILE_TEST_FUNCS */
diff --git a/storage/innobase/ut/ut0list.cc b/storage/innobase/ut/ut0list.cc
index f33b6ed31c2..370c18d4561 100644
--- a/storage/innobase/ut/ut0list.cc
+++ b/storage/innobase/ut/ut0list.cc
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 2006, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2006, 2016, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -24,99 +24,35 @@ Created 4/26/2006 Osku Salerma
************************************************************************/
#include "ut0list.h"
-#ifdef UNIV_NONINL
-#include "ut0list.ic"
-#endif
/****************************************************************//**
Create a new list.
-@return list */
-UNIV_INTERN
+@return list */
ib_list_t*
ib_list_create(void)
/*=================*/
{
- ib_list_t* list;
-
- list = static_cast<ib_list_t*>(mem_alloc(sizeof(*list)));
-
- list->first = NULL;
- list->last = NULL;
- list->is_heap_list = FALSE;
-
- return(list);
-}
-
-/****************************************************************//**
-Create a new list using the given heap. ib_list_free MUST NOT BE CALLED for
-lists created with this function.
-@return list */
-UNIV_INTERN
-ib_list_t*
-ib_list_create_heap(
-/*================*/
- mem_heap_t* heap) /*!< in: memory heap to use */
-{
- ib_list_t* list;
-
- list = static_cast<ib_list_t*>(mem_heap_alloc(heap, sizeof(*list)));
-
- list->first = NULL;
- list->last = NULL;
- list->is_heap_list = TRUE;
-
- return(list);
+ return(static_cast<ib_list_t*>(ut_zalloc_nokey(sizeof(ib_list_t))));
}
/****************************************************************//**
Free a list. */
-UNIV_INTERN
void
ib_list_free(
/*=========*/
ib_list_t* list) /*!< in: list */
{
- ut_a(!list->is_heap_list);
-
/* We don't check that the list is empty because it's entirely valid
to e.g. have all the nodes allocated from a single heap that is then
freed after the list itself is freed. */
- mem_free(list);
-}
-
-/****************************************************************//**
-Add the data to the start of the list.
-@return new list node */
-UNIV_INTERN
-ib_list_node_t*
-ib_list_add_first(
-/*==============*/
- ib_list_t* list, /*!< in: list */
- void* data, /*!< in: data */
- mem_heap_t* heap) /*!< in: memory heap to use */
-{
- return(ib_list_add_after(list, ib_list_get_first(list), data, heap));
-}
-
-/****************************************************************//**
-Add the data to the end of the list.
-@return new list node */
-UNIV_INTERN
-ib_list_node_t*
-ib_list_add_last(
-/*=============*/
- ib_list_t* list, /*!< in: list */
- void* data, /*!< in: data */
- mem_heap_t* heap) /*!< in: memory heap to use */
-{
- return(ib_list_add_after(list, ib_list_get_last(list), data, heap));
+ ut_free(list);
}
/****************************************************************//**
Add the data after the indicated node.
-@return new list node */
-UNIV_INTERN
+@return new list node */
+static
ib_list_node_t*
ib_list_add_after(
/*==============*/
@@ -171,8 +107,20 @@ ib_list_add_after(
}
/****************************************************************//**
+Add the data to the end of the list.
+@return new list node */
+ib_list_node_t*
+ib_list_add_last(
+/*=============*/
+ ib_list_t* list, /*!< in: list */
+ void* data, /*!< in: data */
+ mem_heap_t* heap) /*!< in: memory heap to use */
+{
+ return(ib_list_add_after(list, ib_list_get_last(list), data, heap));
+}
+
+/****************************************************************//**
Remove the node from the list. */
-UNIV_INTERN
void
ib_list_remove(
/*===========*/
diff --git a/storage/innobase/ut/ut0mem.cc b/storage/innobase/ut/ut0mem.cc
index e75418fe71a..faade827283 100644
--- a/storage/innobase/ut/ut0mem.cc
+++ b/storage/innobase/ut/ut0mem.cc
@@ -1,6 +1,7 @@
/*****************************************************************************
-Copyright (c) 1994, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1994, 2016, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2019, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -25,429 +26,13 @@ Created 5/11/1994 Heikki Tuuri
#include "ut0mem.h"
-#ifdef UNIV_NONINL
-#include "ut0mem.ic"
-#endif
-
-#ifndef UNIV_HOTBACKUP
-# include "os0thread.h"
-# include "srv0srv.h"
-
-#include <stdlib.h>
-
-/** The total amount of memory currently allocated from the operating
-system with os_mem_alloc_large() or malloc(). Does not count malloc()
-if srv_use_sys_malloc is set. Protected by ut_list_mutex. */
-UNIV_INTERN ulint ut_total_allocated_memory = 0;
-
-/** Mutex protecting ut_total_allocated_memory and ut_mem_block_list */
-UNIV_INTERN os_fast_mutex_t ut_list_mutex;
-
-#ifdef UNIV_PFS_MUTEX
-/* Key to register server_mutex with performance schema */
-UNIV_INTERN mysql_pfs_key_t ut_list_mutex_key;
-#endif
-
-/** Dynamically allocated memory block */
-struct ut_mem_block_t{
- UT_LIST_NODE_T(ut_mem_block_t) mem_block_list;
- /*!< mem block list node */
- ulint size; /*!< size of allocated memory */
- ulint magic_n;/*!< magic number (UT_MEM_MAGIC_N) */
-};
-
-/** The value of ut_mem_block_t::magic_n. Used in detecting
-memory corruption. */
-#define UT_MEM_MAGIC_N 1601650166
-
-/** List of all memory blocks allocated from the operating system
-with malloc. Protected by ut_list_mutex. */
-static UT_LIST_BASE_NODE_T(ut_mem_block_t) ut_mem_block_list;
-
-/** Flag: has ut_mem_block_list been initialized? */
-static ibool ut_mem_block_list_inited = FALSE;
-
-/** A dummy pointer for generating a null pointer exception in
-ut_malloc_low() */
-static ulint* ut_mem_null_ptr = NULL;
-
-/**********************************************************************//**
-Initializes the mem block list at database startup. */
-UNIV_INTERN
-void
-ut_mem_init(void)
-/*=============*/
-{
- ut_a(!ut_mem_block_list_inited);
- os_fast_mutex_init(ut_list_mutex_key, &ut_list_mutex);
- UT_LIST_INIT(ut_mem_block_list);
- ut_mem_block_list_inited = TRUE;
-}
-#endif /* !UNIV_HOTBACKUP */
-
-/**********************************************************************//**
-Allocates memory.
-@return own: allocated memory */
-UNIV_INTERN
-void*
-ut_malloc_low(
-/*==========*/
- ulint n, /*!< in: number of bytes to allocate */
- ibool assert_on_error)/*!< in: if TRUE, we crash mysqld if the
- memory cannot be allocated */
-{
-#ifndef UNIV_HOTBACKUP
- ulint retry_count;
- void* ret;
-
- if (UNIV_LIKELY(srv_use_sys_malloc)) {
- ret = malloc(n);
- ut_a(ret || !assert_on_error);
-
- return(ret);
- }
-
- ut_ad((sizeof(ut_mem_block_t) % 8) == 0); /* check alignment ok */
- ut_a(ut_mem_block_list_inited);
-
- retry_count = 0;
-retry:
- os_fast_mutex_lock(&ut_list_mutex);
-
- ret = malloc(n + sizeof(ut_mem_block_t));
-
- if (ret == NULL && retry_count < 60) {
- if (retry_count == 0) {
- ut_print_timestamp(stderr);
-
- fprintf(stderr,
- " InnoDB: Error: cannot allocate"
- " %lu bytes of\n"
- "InnoDB: memory with malloc!"
- " Total allocated memory\n"
- "InnoDB: by InnoDB %lu bytes."
- " Operating system errno: %lu\n"
- "InnoDB: Check if you should"
- " increase the swap file or\n"
- "InnoDB: ulimits of your operating system.\n"
- "InnoDB: On FreeBSD check you"
- " have compiled the OS with\n"
- "InnoDB: a big enough maximum process size.\n"
- "InnoDB: Note that in most 32-bit"
- " computers the process\n"
- "InnoDB: memory space is limited"
- " to 2 GB or 4 GB.\n"
- "InnoDB: We keep retrying"
- " the allocation for 60 seconds...\n",
- (ulong) n, (ulong) ut_total_allocated_memory,
-#ifdef __WIN__
- (ulong) GetLastError()
-#else
- (ulong) errno
-#endif
- );
- }
-
- os_fast_mutex_unlock(&ut_list_mutex);
-
- /* Sleep for a second and retry the allocation; maybe this is
- just a temporary shortage of memory */
-
- os_thread_sleep(1000000);
-
- retry_count++;
-
- goto retry;
- }
-
- if (ret == NULL) {
- /* Flush stderr to make more probable that the error
- message gets in the error file before we generate a seg
- fault */
-
- fflush(stderr);
-
- os_fast_mutex_unlock(&ut_list_mutex);
-
- /* Make an intentional seg fault so that we get a stack
- trace */
- if (assert_on_error) {
- ut_print_timestamp(stderr);
-
- fprintf(stderr,
- " InnoDB: We now intentionally"
- " generate a seg fault so that\n"
- "InnoDB: on Linux we get a stack trace.\n");
-
- if (*ut_mem_null_ptr) ut_mem_null_ptr = 0;
- } else {
- return(NULL);
- }
- }
-
- UNIV_MEM_ALLOC(ret, n + sizeof(ut_mem_block_t));
-
- ((ut_mem_block_t*) ret)->size = n + sizeof(ut_mem_block_t);
- ((ut_mem_block_t*) ret)->magic_n = UT_MEM_MAGIC_N;
-
- ut_total_allocated_memory += n + sizeof(ut_mem_block_t);
-
- UT_LIST_ADD_FIRST(mem_block_list, ut_mem_block_list,
- ((ut_mem_block_t*) ret));
- os_fast_mutex_unlock(&ut_list_mutex);
-
- return((void*)((byte*) ret + sizeof(ut_mem_block_t)));
-#else /* !UNIV_HOTBACKUP */
- void* ret = malloc(n);
- ut_a(ret || !assert_on_error);
-
- return(ret);
-#endif /* !UNIV_HOTBACKUP */
-}
-
-/**********************************************************************//**
-Frees a memory block allocated with ut_malloc. Freeing a NULL pointer is
-a nop. */
-UNIV_INTERN
-void
-ut_free(
-/*====*/
- void* ptr) /*!< in, own: memory block, can be NULL */
-{
-#ifndef UNIV_HOTBACKUP
- ut_mem_block_t* block;
-
- if (ptr == NULL) {
- return;
- } else if (UNIV_LIKELY(srv_use_sys_malloc)) {
- free(ptr);
- return;
- }
-
- block = (ut_mem_block_t*)((byte*) ptr - sizeof(ut_mem_block_t));
-
- os_fast_mutex_lock(&ut_list_mutex);
-
- ut_a(block->magic_n == UT_MEM_MAGIC_N);
- ut_a(ut_total_allocated_memory >= block->size);
-
- ut_total_allocated_memory -= block->size;
-
- UT_LIST_REMOVE(mem_block_list, ut_mem_block_list, block);
- free(block);
-
- os_fast_mutex_unlock(&ut_list_mutex);
-#else /* !UNIV_HOTBACKUP */
- free(ptr);
-#endif /* !UNIV_HOTBACKUP */
-}
-
-#ifndef UNIV_HOTBACKUP
-/**********************************************************************//**
-Implements realloc. This is needed by /pars/lexyy.cc. Otherwise, you should not
-use this function because the allocation functions in mem0mem.h are the
-recommended ones in InnoDB.
-
-man realloc in Linux, 2004:
-
- realloc() changes the size of the memory block pointed to
- by ptr to size bytes. The contents will be unchanged to
- the minimum of the old and new sizes; newly allocated mem-
- ory will be uninitialized. If ptr is NULL, the call is
- equivalent to malloc(size); if size is equal to zero, the
- call is equivalent to free(ptr). Unless ptr is NULL, it
- must have been returned by an earlier call to malloc(),
- calloc() or realloc().
-
-RETURN VALUE
- realloc() returns a pointer to the newly allocated memory,
- which is suitably aligned for any kind of variable and may
- be different from ptr, or NULL if the request fails. If
- size was equal to 0, either NULL or a pointer suitable to
- be passed to free() is returned. If realloc() fails the
- original block is left untouched - it is not freed or
- moved.
-@return own: pointer to new mem block or NULL */
-UNIV_INTERN
-void*
-ut_realloc(
-/*=======*/
- void* ptr, /*!< in: pointer to old block or NULL */
- ulint size) /*!< in: desired size */
-{
- ut_mem_block_t* block;
- ulint old_size;
- ulint min_size;
- void* new_ptr;
-
- if (UNIV_LIKELY(srv_use_sys_malloc)) {
- return(realloc(ptr, size));
- }
-
- if (ptr == NULL) {
-
- return(ut_malloc(size));
- }
-
- if (size == 0) {
- ut_free(ptr);
-
- return(NULL);
- }
-
- block = (ut_mem_block_t*)((byte*) ptr - sizeof(ut_mem_block_t));
-
- ut_a(block->magic_n == UT_MEM_MAGIC_N);
-
- old_size = block->size - sizeof(ut_mem_block_t);
-
- if (size < old_size) {
- min_size = size;
- } else {
- min_size = old_size;
- }
-
- new_ptr = ut_malloc(size);
-
- if (new_ptr == NULL) {
-
- return(NULL);
- }
-
- /* Copy the old data from ptr */
- ut_memcpy(new_ptr, ptr, min_size);
-
- ut_free(ptr);
-
- return(new_ptr);
-}
-
-/**********************************************************************//**
-Frees in shutdown all allocated memory not freed yet. */
-UNIV_INTERN
-void
-ut_free_all_mem(void)
-/*=================*/
-{
- ut_mem_block_t* block;
-
- ut_a(ut_mem_block_list_inited);
- ut_mem_block_list_inited = FALSE;
- os_fast_mutex_free(&ut_list_mutex);
-
- while ((block = UT_LIST_GET_FIRST(ut_mem_block_list))) {
-
- ut_a(block->magic_n == UT_MEM_MAGIC_N);
- ut_a(ut_total_allocated_memory >= block->size);
-
- ut_total_allocated_memory -= block->size;
-
- UT_LIST_REMOVE(mem_block_list, ut_mem_block_list, block);
- free(block);
- }
-
- if (ut_total_allocated_memory != 0) {
- fprintf(stderr,
- "InnoDB: Warning: after shutdown"
- " total allocated memory is %lu\n",
- (ulong) ut_total_allocated_memory);
- }
-
- ut_mem_block_list_inited = FALSE;
-}
-#endif /* !UNIV_HOTBACKUP */
-
-/**********************************************************************//**
-Copies up to size - 1 characters from the NUL-terminated string src to
-dst, NUL-terminating the result. Returns strlen(src), so truncation
-occurred if the return value >= size.
-@return strlen(src) */
-UNIV_INTERN
-ulint
-ut_strlcpy(
-/*=======*/
- char* dst, /*!< in: destination buffer */
- const char* src, /*!< in: source buffer */
- ulint size) /*!< in: size of destination buffer */
-{
- ulint src_size = strlen(src);
-
- if (size != 0) {
- ulint n = ut_min(src_size, size - 1);
-
- memcpy(dst, src, n);
- dst[n] = '\0';
- }
-
- return(src_size);
-}
-
-/**********************************************************************//**
-Like ut_strlcpy, but if src doesn't fit in dst completely, copies the last
-(size - 1) bytes of src, not the first.
-@return strlen(src) */
-UNIV_INTERN
-ulint
-ut_strlcpy_rev(
-/*===========*/
- char* dst, /*!< in: destination buffer */
- const char* src, /*!< in: source buffer */
- ulint size) /*!< in: size of destination buffer */
-{
- ulint src_size = strlen(src);
-
- if (size != 0) {
- ulint n = ut_min(src_size, size - 1);
-
- memcpy(dst, src + src_size - n, n + 1);
- }
-
- return(src_size);
-}
-
-#ifndef UNIV_HOTBACKUP
-/**********************************************************************//**
-Return the number of times s2 occurs in s1. Overlapping instances of s2
-are only counted once.
-@return the number of times s2 occurs in s1 */
-UNIV_INTERN
-ulint
-ut_strcount(
-/*========*/
- const char* s1, /*!< in: string to search in */
- const char* s2) /*!< in: string to search for */
-{
- ulint count = 0;
- ulint len = strlen(s2);
-
- if (len == 0) {
-
- return(0);
- }
-
- for (;;) {
- s1 = strstr(s1, s2);
-
- if (!s1) {
-
- break;
- }
-
- count++;
- s1 += len;
- }
-
- return(count);
-}
-
/********************************************************************
Concatenate 3 strings.*/
-
char*
ut_str3cat(
/*=======*/
/* out, own: concatenated string, must be
- freed with mem_free() */
+ freed with ut_free() */
const char* s1, /* in: string 1 */
const char* s2, /* in: string 2 */
const char* s3) /* in: string 3 */
@@ -457,7 +42,7 @@ ut_str3cat(
ulint s2_len = strlen(s2);
ulint s3_len = strlen(s3);
- s = static_cast<char*>(mem_alloc(s1_len + s2_len + s3_len + 1));
+ s = static_cast<char*>(ut_malloc_nokey(s1_len + s2_len + s3_len + 1));
memcpy(s, s1, s1_len);
memcpy(s + s1_len, s2, s2_len);
@@ -467,143 +52,3 @@ ut_str3cat(
return(s);
}
-/**********************************************************************//**
-Replace every occurrence of s1 in str with s2. Overlapping instances of s1
-are only replaced once.
-@return own: modified string, must be freed with mem_free() */
-UNIV_INTERN
-char*
-ut_strreplace(
-/*==========*/
- const char* str, /*!< in: string to operate on */
- const char* s1, /*!< in: string to replace */
- const char* s2) /*!< in: string to replace s1 with */
-{
- char* new_str;
- char* ptr;
- const char* str_end;
- ulint str_len = strlen(str);
- ulint s1_len = strlen(s1);
- ulint s2_len = strlen(s2);
- ulint count = 0;
- int len_delta = (int) s2_len - (int) s1_len;
-
- str_end = str + str_len;
-
- if (len_delta <= 0) {
- len_delta = 0;
- } else {
- count = ut_strcount(str, s1);
- }
-
- new_str = static_cast<char*>(
- mem_alloc(str_len + count * len_delta + 1));
-
- ptr = new_str;
-
- while (str) {
- const char* next = strstr(str, s1);
-
- if (!next) {
- next = str_end;
- }
-
- memcpy(ptr, str, next - str);
- ptr += next - str;
-
- if (next == str_end) {
-
- break;
- }
-
- memcpy(ptr, s2, s2_len);
- ptr += s2_len;
-
- str = next + s1_len;
- }
-
- *ptr = '\0';
-
- return(new_str);
-}
-
-#ifdef UNIV_COMPILE_TEST_FUNCS
-
-void
-test_ut_str_sql_format()
-{
- char buf[128];
- ulint ret;
-
-#define CALL_AND_TEST(str, str_len, buf, buf_size, ret_expected, buf_expected)\
- do {\
- ibool ok = TRUE;\
- memset(buf, 'x', 10);\
- buf[10] = '\0';\
- fprintf(stderr, "TESTING \"%s\", %lu, %lu\n",\
- str, (ulint) str_len, (ulint) buf_size);\
- ret = ut_str_sql_format(str, str_len, buf, buf_size);\
- if (ret != ret_expected) {\
- fprintf(stderr, "expected ret %lu, got %lu\n",\
- (ulint) ret_expected, ret);\
- ok = FALSE;\
- }\
- if (strcmp((char*) buf, buf_expected) != 0) {\
- fprintf(stderr, "expected buf \"%s\", got \"%s\"\n",\
- buf_expected, buf);\
- ok = FALSE;\
- }\
- if (ok) {\
- fprintf(stderr, "OK: %lu, \"%s\"\n\n",\
- (ulint) ret, buf);\
- } else {\
- return;\
- }\
- } while (0)
-
- CALL_AND_TEST("abcd", 4, buf, 0, 0, "xxxxxxxxxx");
-
- CALL_AND_TEST("abcd", 4, buf, 1, 1, "");
-
- CALL_AND_TEST("abcd", 4, buf, 2, 1, "");
-
- CALL_AND_TEST("abcd", 0, buf, 3, 3, "''");
- CALL_AND_TEST("abcd", 1, buf, 3, 1, "");
- CALL_AND_TEST("abcd", 2, buf, 3, 1, "");
- CALL_AND_TEST("abcd", 3, buf, 3, 1, "");
- CALL_AND_TEST("abcd", 4, buf, 3, 1, "");
-
- CALL_AND_TEST("abcd", 0, buf, 4, 3, "''");
- CALL_AND_TEST("abcd", 1, buf, 4, 4, "'a'");
- CALL_AND_TEST("abcd", 2, buf, 4, 4, "'a'");
- CALL_AND_TEST("abcd", 3, buf, 4, 4, "'a'");
- CALL_AND_TEST("abcd", 4, buf, 4, 4, "'a'");
- CALL_AND_TEST("abcde", 5, buf, 4, 4, "'a'");
- CALL_AND_TEST("'", 1, buf, 4, 3, "''");
- CALL_AND_TEST("''", 2, buf, 4, 3, "''");
- CALL_AND_TEST("a'", 2, buf, 4, 4, "'a'");
- CALL_AND_TEST("'a", 2, buf, 4, 3, "''");
- CALL_AND_TEST("ab", 2, buf, 4, 4, "'a'");
-
- CALL_AND_TEST("abcdef", 0, buf, 5, 3, "''");
- CALL_AND_TEST("abcdef", 1, buf, 5, 4, "'a'");
- CALL_AND_TEST("abcdef", 2, buf, 5, 5, "'ab'");
- CALL_AND_TEST("abcdef", 3, buf, 5, 5, "'ab'");
- CALL_AND_TEST("abcdef", 4, buf, 5, 5, "'ab'");
- CALL_AND_TEST("abcdef", 5, buf, 5, 5, "'ab'");
- CALL_AND_TEST("abcdef", 6, buf, 5, 5, "'ab'");
- CALL_AND_TEST("'", 1, buf, 5, 5, "''''");
- CALL_AND_TEST("''", 2, buf, 5, 5, "''''");
- CALL_AND_TEST("a'", 2, buf, 5, 4, "'a'");
- CALL_AND_TEST("'a", 2, buf, 5, 5, "''''");
- CALL_AND_TEST("ab", 2, buf, 5, 5, "'ab'");
- CALL_AND_TEST("abc", 3, buf, 5, 5, "'ab'");
-
- CALL_AND_TEST("ab", 2, buf, 6, 5, "'ab'");
-
- CALL_AND_TEST("a'b'c", 5, buf, 32, 10, "'a''b''c'");
- CALL_AND_TEST("a'b'c'", 6, buf, 32, 12, "'a''b''c'''");
-}
-
-#endif /* UNIV_COMPILE_TEST_FUNCS */
-#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innobase/ut/ut0new.cc b/storage/innobase/ut/ut0new.cc
new file mode 100644
index 00000000000..05c7eac1d83
--- /dev/null
+++ b/storage/innobase/ut/ut0new.cc
@@ -0,0 +1,226 @@
+/*****************************************************************************
+
+Copyright (c) 2014, 2016, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2019, MariaDB Corporation.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file ut/ut0new.cc
+Instrumented memory allocator.
+
+Created May 26, 2014 Vasil Dimov
+*******************************************************/
+
+#include "univ.i"
+
+/** Maximum number of retries to allocate memory. */
+const size_t alloc_max_retries = 60;
+
+/** Keys for registering allocations with performance schema.
+Keep this list alphabetically sorted. */
+#ifdef BTR_CUR_HASH_ADAPT
+PSI_memory_key mem_key_ahi;
+#endif /* BTR_CUR_HASH_ADAPT */
+PSI_memory_key mem_key_buf_buf_pool;
+PSI_memory_key mem_key_dict_stats_bg_recalc_pool_t;
+PSI_memory_key mem_key_dict_stats_index_map_t;
+PSI_memory_key mem_key_dict_stats_n_diff_on_level;
+PSI_memory_key mem_key_other;
+PSI_memory_key mem_key_row_log_buf;
+PSI_memory_key mem_key_row_merge_sort;
+PSI_memory_key mem_key_std;
+PSI_memory_key mem_key_trx_sys_t_rw_trx_ids;
+
+#ifdef UNIV_PFS_MEMORY
+
+/** Auxiliary array of performance schema 'PSI_memory_info'.
+Each allocation appears in
+performance_schema.memory_summary_global_by_event_name (and alike) in the form
+of e.g. 'memory/innodb/NAME' where the last component NAME is picked from
+the list below:
+1. If key is specified, then the respective name is used
+2. Without a specified key, allocations from inside std::* containers use
+ mem_key_std
+3. Without a specified key, allocations from outside std::* pick up the key
+ based on the file name, and if file name is not found in the predefined list
+ (in ut_new_boot()) then mem_key_other is used.
+Keep this list alphabetically sorted. */
+static PSI_memory_info pfs_info[] = {
+#ifdef BTR_CUR_HASH_ADAPT
+ {&mem_key_ahi, "adaptive hash index", 0},
+#endif /* BTR_CUR_HASH_ADAPT */
+ {&mem_key_buf_buf_pool, "buf_buf_pool", 0},
+ {&mem_key_dict_stats_bg_recalc_pool_t, "dict_stats_bg_recalc_pool_t", 0},
+ {&mem_key_dict_stats_index_map_t, "dict_stats_index_map_t", 0},
+ {&mem_key_dict_stats_n_diff_on_level, "dict_stats_n_diff_on_level", 0},
+ {&mem_key_other, "other", 0},
+ {&mem_key_row_log_buf, "row_log_buf", 0},
+ {&mem_key_row_merge_sort, "row_merge_sort", 0},
+ {&mem_key_std, "std", 0},
+ {&mem_key_trx_sys_t_rw_trx_ids, "trx_sys_t::rw_trx_ids", 0},
+};
+
+/** Map used for default performance schema keys, based on file name of the
+caller. The key is the file name of the caller and the value is a pointer
+to a PSI_memory_key variable to be passed to performance schema methods.
+We use ut_strcmp_functor because by default std::map will compare the pointers
+themselves (cont char*) and not do strcmp(). */
+typedef std::map<const char*, PSI_memory_key*, ut_strcmp_functor>
+ mem_keys_auto_t;
+
+/** Map of filename/pfskey, used for tracing allocations that have not
+provided a manually created pfs key. This map is only ever modified (bulk
+insert) at startup in a single-threaded environment by ut_new_boot().
+Later it is only read (only std::map::find() is called) from multithreaded
+environment, thus it is not protected by any latch. */
+static mem_keys_auto_t mem_keys_auto;
+
+#endif /* UNIV_PFS_MEMORY */
+
+/** Setup the internal objects needed for UT_NEW() to operate.
+This must be called before the first call to UT_NEW(). */
+void
+ut_new_boot()
+{
+#ifdef UNIV_PFS_MEMORY
+ static const char* auto_event_names[] = {
+ /* Keep this list alphabetically sorted. */
+ "btr0btr",
+ "btr0bulk",
+ "btr0cur",
+ "btr0pcur",
+ "btr0sea",
+ "buf0buf",
+ "buf0dblwr",
+ "buf0dump",
+ "buf0flu",
+ "buf0lru",
+ "dict0dict",
+ "dict0mem",
+ "dict0stats",
+ "dict0stats_bg",
+ "eval0eval",
+ "fil0fil",
+ "fsp0file",
+ "fsp0space",
+ "fsp0sysspace",
+ "fts0ast",
+ "fts0config",
+ "fts0fts",
+ "fts0opt",
+ "fts0pars",
+ "fts0que",
+ "fts0sql",
+ "gis0sea",
+ "ha0ha",
+ "ha_innodb",
+ "handler0alter",
+ "hash0hash",
+ "i_s",
+ "ibuf0ibuf",
+ "lexyy",
+ "lock0lock",
+ "log0log",
+ "log0recv",
+ "mem0mem",
+ "os0event",
+ "os0file",
+ "page0cur",
+ "page0zip",
+ "pars0lex",
+ "read0read",
+ "rem0rec",
+ "row0ftsort",
+ "row0import",
+ "row0log",
+ "row0merge",
+ "row0mysql",
+ "row0sel",
+ "row0trunc",
+ "srv0conc",
+ "srv0srv",
+ "srv0start",
+ "sync0arr",
+ "sync0debug",
+ "sync0rw",
+ "sync0types",
+ "trx0i_s",
+ "trx0purge",
+ "trx0roll",
+ "trx0rseg",
+ "trx0sys",
+ "trx0trx",
+ "trx0undo",
+ "ut0list",
+ "ut0mem",
+ "ut0mutex",
+ "ut0pool",
+ "ut0rbt",
+ "ut0wqueue",
+ };
+ static const size_t n_auto = UT_ARR_SIZE(auto_event_names);
+ static PSI_memory_key auto_event_keys[n_auto];
+ static PSI_memory_info pfs_info_auto[n_auto];
+
+ for (size_t i = 0; i < n_auto; i++) {
+
+ const std::pair<mem_keys_auto_t::iterator, bool> ret
+ MY_ATTRIBUTE((unused))
+ = mem_keys_auto.insert(
+ mem_keys_auto_t::value_type(auto_event_names[i],
+ &auto_event_keys[i]));
+
+ /* ret.second is true if new element has been inserted */
+ ut_a(ret.second);
+
+ /* e.g. "btr0btr" */
+ pfs_info_auto[i].m_name = auto_event_names[i];
+
+ /* a pointer to the pfs key */
+ pfs_info_auto[i].m_key = &auto_event_keys[i];
+
+ pfs_info_auto[i].m_flags = 0;
+ }
+
+ PSI_MEMORY_CALL(register_memory)("innodb",
+ pfs_info,
+ UT_ARR_SIZE(pfs_info));
+ PSI_MEMORY_CALL(register_memory)("innodb",
+ pfs_info_auto,
+ n_auto);
+#endif /* UNIV_PFS_MEMORY */
+}
+
+#ifdef UNIV_PFS_MEMORY
+
+/** Retrieve a memory key (registered with PFS), given a portion of the file
+name of the caller.
+@param[in] file portion of the filename - basename without an extension
+@return registered memory key or PSI_NOT_INSTRUMENTED if not found */
+PSI_memory_key
+ut_new_get_key_by_file(
+ const char* file)
+{
+ mem_keys_auto_t::const_iterator el = mem_keys_auto.find(file);
+
+ if (el != mem_keys_auto.end()) {
+ return(*(el->second));
+ }
+
+ return(PSI_NOT_INSTRUMENTED);
+}
+
+#endif /* UNIV_PFS_MEMORY */
diff --git a/storage/innobase/ut/ut0rbt.cc b/storage/innobase/ut/ut0rbt.cc
index 693f6a809c7..cdd1ef06775 100644
--- a/storage/innobase/ut/ut0rbt.cc
+++ b/storage/innobase/ut/ut0rbt.cc
@@ -1,6 +1,6 @@
/***************************************************************************//**
-Copyright (c) 2007, 2010, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2007, 2015, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -51,27 +51,10 @@ red-black properties:
#define ROOT(t) (t->root->left)
#define SIZEOF_NODE(t) ((sizeof(ib_rbt_node_t) + t->sizeof_value) - 1)
-/**********************************************************************//**
-Print out the sub-tree recursively. */
-static
-void
-rbt_print_subtree(
-/*==============*/
- const ib_rbt_t* tree, /*!< in: tree to traverse */
- const ib_rbt_node_t* node, /*!< in: node to print */
- ib_rbt_print_node print) /*!< in: print key function */
-{
- /* FIXME: Doesn't do anything yet */
- if (node != tree->nil) {
- print(node);
- rbt_print_subtree(tree, node->left, print);
- rbt_print_subtree(tree, node->right, print);
- }
-}
-
+#if defined UNIV_DEBUG || defined IB_RBT_TESTING
/**********************************************************************//**
Verify that the keys are in order.
-@return TRUE of OK. FALSE if not ordered */
+@return TRUE of OK. FALSE if not ordered */
static
ibool
rbt_check_ordering(
@@ -110,7 +93,7 @@ rbt_check_ordering(
/**********************************************************************//**
Check that every path from the root to the leaves has the same count.
Count is expressed in the number of black nodes.
-@return 0 on failure else black height of the subtree */
+@return 0 on failure else black height of the subtree */
static
ibool
rbt_count_black_nodes(
@@ -154,6 +137,7 @@ rbt_count_black_nodes(
return(result);
}
+#endif /* UNIV_DEBUG || IB_RBT_TESTING */
/**********************************************************************//**
Turn the node's right child's left sub-tree into node's right sub-tree.
@@ -388,7 +372,7 @@ rbt_balance_tree(
/**********************************************************************//**
Find the given node's successor.
-@return successor node or NULL if no successor */
+@return successor node or NULL if no successor */
static
ib_rbt_node_t*
rbt_find_successor(
@@ -428,7 +412,7 @@ rbt_find_successor(
/**********************************************************************//**
Find the given node's precedecessor.
-@return predecessor node or NULL if no predecesor */
+@return predecessor node or NULL if no predecesor */
static
ib_rbt_node_t*
rbt_find_predecessor(
@@ -519,7 +503,7 @@ rbt_replace_node(
/**********************************************************************//**
Detach node from the tree replacing it with one of it's children.
-@return the child node that now occupies the position of the detached node */
+@return the child node that now occupies the position of the detached node */
static
ib_rbt_node_t*
rbt_detach_node(
@@ -562,7 +546,7 @@ rbt_detach_node(
/**********************************************************************//**
Rebalance the right sub-tree after deletion.
-@return node to rebalance if more rebalancing required else NULL */
+@return node to rebalance if more rebalancing required else NULL */
static
ib_rbt_node_t*
rbt_balance_right(
@@ -622,7 +606,7 @@ rbt_balance_right(
/**********************************************************************//**
Rebalance the left sub-tree after deletion.
-@return node to rebalance if more rebalancing required else NULL */
+@return node to rebalance if more rebalancing required else NULL */
static
ib_rbt_node_t*
rbt_balance_left(
@@ -751,7 +735,6 @@ rbt_free_node(
/**********************************************************************//**
Free all the nodes and free the tree. */
-UNIV_INTERN
void
rbt_free(
/*=====*/
@@ -765,8 +748,7 @@ rbt_free(
/**********************************************************************//**
Create an instance of a red black tree, whose comparison function takes
an argument
-@return an empty rb tree */
-UNIV_INTERN
+@return an empty rb tree */
ib_rbt_t*
rbt_create_arg_cmp(
/*===============*/
@@ -788,8 +770,7 @@ rbt_create_arg_cmp(
/**********************************************************************//**
Create an instance of a red black tree.
-@return an empty rb tree */
-UNIV_INTERN
+@return an empty rb tree */
ib_rbt_t*
rbt_create(
/*=======*/
@@ -799,22 +780,19 @@ rbt_create(
ib_rbt_t* tree;
ib_rbt_node_t* node;
- tree = (ib_rbt_t*) ut_malloc(sizeof(*tree));
- memset(tree, 0, sizeof(*tree));
+ tree = (ib_rbt_t*) ut_zalloc_nokey(sizeof(*tree));
tree->sizeof_value = sizeof_value;
/* Create the sentinel (NIL) node. */
- node = tree->nil = (ib_rbt_node_t*) ut_malloc(sizeof(*node));
- memset(node, 0, sizeof(*node));
+ node = tree->nil = (ib_rbt_node_t*) ut_zalloc_nokey(sizeof(*node));
node->color = IB_RBT_BLACK;
node->parent = node->left = node->right = node;
/* Create the "fake" root, the real root node will be the
left child of this node. */
- node = tree->root = (ib_rbt_node_t*) ut_malloc(sizeof(*node));
- memset(node, 0, sizeof(*node));
+ node = tree->root = (ib_rbt_node_t*) ut_zalloc_nokey(sizeof(*node));
node->color = IB_RBT_BLACK;
node->parent = node->left = node->right = tree->nil;
@@ -826,8 +804,7 @@ rbt_create(
/**********************************************************************//**
Generic insert of a value in the rb tree.
-@return inserted node */
-UNIV_INTERN
+@return inserted node */
const ib_rbt_node_t*
rbt_insert(
/*=======*/
@@ -839,7 +816,7 @@ rbt_insert(
ib_rbt_node_t* node;
/* Create the node that will hold the value data. */
- node = (ib_rbt_node_t*) ut_malloc(SIZEOF_NODE(tree));
+ node = (ib_rbt_node_t*) ut_malloc_nokey(SIZEOF_NODE(tree));
memcpy(node->value, value, tree->sizeof_value);
node->parent = node->left = node->right = tree->nil;
@@ -855,8 +832,7 @@ rbt_insert(
/**********************************************************************//**
Add a new node to the tree, useful for data that is pre-sorted.
-@return appended node */
-UNIV_INTERN
+@return appended node */
const ib_rbt_node_t*
rbt_add_node(
/*=========*/
@@ -868,7 +844,7 @@ rbt_add_node(
ib_rbt_node_t* node;
/* Create the node that will hold the value data */
- node = (ib_rbt_node_t*) ut_malloc(SIZEOF_NODE(tree));
+ node = (ib_rbt_node_t*) ut_malloc_nokey(SIZEOF_NODE(tree));
memcpy(node->value, value, tree->sizeof_value);
node->parent = node->left = node->right = tree->nil;
@@ -885,7 +861,7 @@ rbt_add_node(
++tree->n_nodes;
-#if defined(IB_RBT_TESTING)
+#if defined UNIV_DEBUG || defined IB_RBT_TESTING
ut_a(rbt_validate(tree));
#endif
return(node);
@@ -893,8 +869,8 @@ rbt_add_node(
/**********************************************************************//**
Find a matching node in the rb tree.
-@return NULL if not found else the node where key was found */
-UNIV_INTERN
+@return NULL if not found else the node where key was found */
+static
const ib_rbt_node_t*
rbt_lookup(
/*=======*/
@@ -928,8 +904,7 @@ rbt_lookup(
/**********************************************************************//**
Delete a node indentified by key.
-@return TRUE if success FALSE if not found */
-UNIV_INTERN
+@return TRUE if success FALSE if not found */
ibool
rbt_delete(
/*=======*/
@@ -952,8 +927,7 @@ rbt_delete(
/**********************************************************************//**
Remove a node from the rb tree, the node is not free'd, that is the
callers responsibility.
-@return deleted node but without the const */
-UNIV_INTERN
+@return deleted node but without the const */
ib_rbt_node_t*
rbt_remove_node(
/*============*/
@@ -974,91 +948,8 @@ rbt_remove_node(
}
/**********************************************************************//**
-Find the node that has the lowest key that is >= key.
-@return node satisfying the lower bound constraint or NULL */
-UNIV_INTERN
-const ib_rbt_node_t*
-rbt_lower_bound(
-/*============*/
- const ib_rbt_t* tree, /*!< in: rb tree */
- const void* key) /*!< in: key to search */
-{
- ib_rbt_node_t* lb_node = NULL;
- ib_rbt_node_t* current = ROOT(tree);
-
- while (current != tree->nil) {
- int result;
-
- if (tree->cmp_arg) {
- result = tree->compare_with_arg(
- tree->cmp_arg, key, current->value);
- } else {
- result = tree->compare(key, current->value);
- }
-
- if (result > 0) {
-
- current = current->right;
-
- } else if (result < 0) {
-
- lb_node = current;
- current = current->left;
-
- } else {
- lb_node = current;
- break;
- }
- }
-
- return(lb_node);
-}
-
-/**********************************************************************//**
-Find the node that has the greatest key that is <= key.
-@return node satisfying the upper bound constraint or NULL */
-UNIV_INTERN
-const ib_rbt_node_t*
-rbt_upper_bound(
-/*============*/
- const ib_rbt_t* tree, /*!< in: rb tree */
- const void* key) /*!< in: key to search */
-{
- ib_rbt_node_t* ub_node = NULL;
- ib_rbt_node_t* current = ROOT(tree);
-
- while (current != tree->nil) {
- int result;
-
- if (tree->cmp_arg) {
- result = tree->compare_with_arg(
- tree->cmp_arg, key, current->value);
- } else {
- result = tree->compare(key, current->value);
- }
-
- if (result > 0) {
-
- ub_node = current;
- current = current->right;
-
- } else if (result < 0) {
-
- current = current->left;
-
- } else {
- ub_node = current;
- break;
- }
- }
-
- return(ub_node);
-}
-
-/**********************************************************************//**
Find the node that has the greatest key that is <= key.
-@return value of result */
-UNIV_INTERN
+@return value of result */
int
rbt_search(
/*=======*/
@@ -1098,8 +989,7 @@ rbt_search(
/**********************************************************************//**
Find the node that has the greatest key that is <= key. But use the
supplied comparison function.
-@return value of result */
-UNIV_INTERN
+@return value of result */
int
rbt_search_cmp(
/*===========*/
@@ -1143,7 +1033,6 @@ rbt_search_cmp(
/**********************************************************************//**
Return the left most node in the tree. */
-UNIV_INTERN
const ib_rbt_node_t*
rbt_first(
/*======*/
@@ -1163,8 +1052,7 @@ rbt_first(
/**********************************************************************//**
Return the right most node in the tree.
-@return the rightmost node or NULL */
-UNIV_INTERN
+@return the rightmost node or NULL */
const ib_rbt_node_t*
rbt_last(
/*=====*/
@@ -1183,8 +1071,7 @@ rbt_last(
/**********************************************************************//**
Return the next node.
-@return node next from current */
-UNIV_INTERN
+@return node next from current */
const ib_rbt_node_t*
rbt_next(
/*=====*/
@@ -1196,8 +1083,7 @@ rbt_next(
/**********************************************************************//**
Return the previous node.
-@return node prev from current */
-UNIV_INTERN
+@return node prev from current */
const ib_rbt_node_t*
rbt_prev(
/*=====*/
@@ -1208,23 +1094,8 @@ rbt_prev(
}
/**********************************************************************//**
-Reset the tree. Delete all the nodes. */
-UNIV_INTERN
-void
-rbt_clear(
-/*======*/
- ib_rbt_t* tree) /*!< in: rb tree */
-{
- rbt_free_node(ROOT(tree), tree->nil);
-
- tree->n_nodes = 0;
- tree->root->left = tree->root->right = tree->nil;
-}
-
-/**********************************************************************//**
Merge the node from dst into src. Return the number of nodes merged.
-@return no. of recs merged */
-UNIV_INTERN
+@return no. of recs merged */
ulint
rbt_merge_uniq(
/*===========*/
@@ -1250,59 +1121,11 @@ rbt_merge_uniq(
return(n_merged);
}
-/**********************************************************************//**
-Merge the node from dst into src. Return the number of nodes merged.
-Delete the nodes from src after copying node to dst. As a side effect
-the duplicates will be left untouched in the src.
-@return no. of recs merged */
-UNIV_INTERN
-ulint
-rbt_merge_uniq_destructive(
-/*=======================*/
- ib_rbt_t* dst, /*!< in: dst rb tree */
- ib_rbt_t* src) /*!< in: src rb tree */
-{
- ib_rbt_bound_t parent;
- ib_rbt_node_t* src_node;
- ulint old_size = rbt_size(dst);
-
- if (rbt_empty(src) || dst == src) {
- return(0);
- }
-
- for (src_node = (ib_rbt_node_t*) rbt_first(src); src_node; /* */) {
- ib_rbt_node_t* prev = src_node;
-
- src_node = (ib_rbt_node_t*) rbt_next(src, prev);
-
- /* Skip duplicates. */
- if (rbt_search(dst, &parent, prev->value) != 0) {
-
- /* Remove and reset the node but preserve
- the node (data) value. */
- rbt_remove_node_and_rebalance(src, prev);
-
- /* The nil should be taken from the dst tree. */
- prev->parent = prev->left = prev->right = dst->nil;
- rbt_tree_add_child(dst, &parent, prev);
- rbt_balance_tree(dst, prev);
-
- ++dst->n_nodes;
- }
- }
-
-#if defined(IB_RBT_TESTING)
- ut_a(rbt_validate(dst));
- ut_a(rbt_validate(src));
-#endif
- return(rbt_size(dst) - old_size);
-}
-
+#if defined UNIV_DEBUG || defined IB_RBT_TESTING
/**********************************************************************//**
Check that every path from the root to the leaves has the same count and
the tree nodes are in order.
-@return TRUE if OK FALSE otherwise */
-UNIV_INTERN
+@return TRUE if OK FALSE otherwise */
ibool
rbt_validate(
/*=========*/
@@ -1314,15 +1137,4 @@ rbt_validate(
return(FALSE);
}
-
-/**********************************************************************//**
-Iterate over the tree in depth first order. */
-UNIV_INTERN
-void
-rbt_print(
-/*======*/
- const ib_rbt_t* tree, /*!< in: tree to traverse */
- ib_rbt_print_node print) /*!< in: print function */
-{
- rbt_print_subtree(tree, ROOT(tree), print);
-}
+#endif /* UNIV_DEBUG || IB_RBT_TESTING */
diff --git a/storage/innobase/ut/ut0rnd.cc b/storage/innobase/ut/ut0rnd.cc
index f881636e212..8265121ef2e 100644
--- a/storage/innobase/ut/ut0rnd.cc
+++ b/storage/innobase/ut/ut0rnd.cc
@@ -1,6 +1,7 @@
/*****************************************************************************
-Copyright (c) 1994, 2009, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1994, 2016, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2019, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -25,9 +26,8 @@ Created 5/11/1994 Heikki Tuuri
#include "ut0rnd.h"
-#ifdef UNIV_NONINL
-#include "ut0rnd.ic"
-#endif
+/** Seed value of ut_rnd_gen() */
+int32 ut_rnd_current;
/** These random numbers are used in ut_find_prime */
/*@{*/
@@ -36,14 +36,10 @@ Created 5/11/1994 Heikki Tuuri
#define UT_RANDOM_3 1.0132677
/*@}*/
-/** Seed value of ut_rnd_gen_ulint(). */
-UNIV_INTERN ulint ut_rnd_ulint_counter = 65654363;
-
/***********************************************************//**
Looks for a prime number slightly greater than the given argument.
The prime is chosen so that it is not near any power of 2.
-@return prime */
-UNIV_INTERN
+@return prime */
ulint
ut_find_prime(
/*==========*/
diff --git a/storage/innobase/ut/ut0ut.cc b/storage/innobase/ut/ut0ut.cc
index 6e4061bc7dd..252f3a50ae1 100644
--- a/storage/innobase/ut/ut0ut.cc
+++ b/storage/innobase/ut/ut0ut.cc
@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 1994, 2017, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2019, MariaDB Corporation.
+Copyright (c) 2017, 2019, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -24,44 +24,35 @@ Various utilities for Innobase.
Created 5/11/1994 Heikki Tuuri
********************************************************************/
-#include "ut0ut.h"
-
-#ifndef UNIV_INNOCHECKSUM
-
-#include "ut0sort.h"
-#include "os0thread.h" /* thread-ID */
+#include "ha_prototypes.h"
-#ifdef UNIV_NONINL
-#include "ut0ut.ic"
+#if HAVE_SYS_TIME_H
+#include <sys/time.h>
#endif
-#include <stdarg.h>
-#include <string.h>
-#include <ctype.h>
+#ifndef UNIV_INNOCHECKSUM
+#include <mysql_com.h>
+#include "os0thread.h"
+#include "ut0ut.h"
+#include "trx0trx.h"
+#include <string>
+#include "log.h"
-#ifndef UNIV_HOTBACKUP
-# include "trx0trx.h"
-# include "ha_prototypes.h"
-# include "mysql_com.h" /* NAME_LEN */
-# include <string>
/**********************************************************//**
Returns the number of milliseconds since some epoch. The
value may wrap around. It should only be used for heuristic
purposes.
-@return ms since epoch */
-UNIV_INTERN
+@return ms since epoch */
ulint
ut_time_ms(void)
/*============*/
{
return static_cast<ulint>(my_interval_timer() / 1000000);
}
-#endif /* !UNIV_HOTBACKUP */
#endif /* !UNIV_INNOCHECKSUM */
/**********************************************************//**
Prints a timestamp to a file. */
-UNIV_INTERN
void
ut_print_timestamp(
/*===============*/
@@ -71,14 +62,14 @@ ut_print_timestamp(
#ifndef UNIV_INNOCHECKSUM
thread_id = os_thread_pf(os_thread_get_curr_id());
-#endif
+#endif /* !UNIV_INNOCHECKSUM */
-#ifdef __WIN__
+#ifdef _WIN32
SYSTEMTIME cal_tm;
GetLocalTime(&cal_tm);
- fprintf(file, "%d-%02d-%02d %02d:%02d:%02d %lx",
+ fprintf(file, "%d-%02d-%02d %02d:%02d:%02d %#zx",
(int) cal_tm.wYear,
(int) cal_tm.wMonth,
(int) cal_tm.wDay,
@@ -90,16 +81,11 @@ ut_print_timestamp(
struct tm* cal_tm_ptr;
time_t tm;
-#ifdef HAVE_LOCALTIME_R
struct tm cal_tm;
time(&tm);
localtime_r(&tm, &cal_tm);
cal_tm_ptr = &cal_tm;
-#else
- time(&tm);
- cal_tm_ptr = localtime(&tm);
-#endif
- fprintf(file, "%d-%02d-%02d %02d:%02d:%02d %lx",
+ fprintf(file, "%d-%02d-%02d %02d:%02d:%02d %#zx",
cal_tm_ptr->tm_year + 1900,
cal_tm_ptr->tm_mon + 1,
cal_tm_ptr->tm_mday,
@@ -114,13 +100,12 @@ ut_print_timestamp(
/**********************************************************//**
Sprintfs a timestamp to a buffer, 13..14 chars plus terminating NUL. */
-UNIV_INTERN
void
ut_sprintf_timestamp(
/*=================*/
char* buf) /*!< in: buffer where to sprintf */
{
-#ifdef __WIN__
+#ifdef _WIN32
SYSTEMTIME cal_tm;
GetLocalTime(&cal_tm);
@@ -136,15 +121,10 @@ ut_sprintf_timestamp(
struct tm* cal_tm_ptr;
time_t tm;
-#ifdef HAVE_LOCALTIME_R
struct tm cal_tm;
time(&tm);
localtime_r(&tm, &cal_tm);
cal_tm_ptr = &cal_tm;
-#else
- time(&tm);
- cal_tm_ptr = localtime(&tm);
-#endif
sprintf(buf, "%02d%02d%02d %2d:%02d:%02d",
cal_tm_ptr->tm_year % 100,
cal_tm_ptr->tm_mon + 1,
@@ -155,58 +135,10 @@ ut_sprintf_timestamp(
#endif
}
-#ifdef UNIV_HOTBACKUP
-/**********************************************************//**
-Sprintfs a timestamp to a buffer with no spaces and with ':' characters
-replaced by '_'. */
-UNIV_INTERN
-void
-ut_sprintf_timestamp_without_extra_chars(
-/*=====================================*/
- char* buf) /*!< in: buffer where to sprintf */
-{
-#ifdef __WIN__
- SYSTEMTIME cal_tm;
-
- GetLocalTime(&cal_tm);
-
- sprintf(buf, "%02d%02d%02d_%2d_%02d_%02d",
- (int) cal_tm.wYear % 100,
- (int) cal_tm.wMonth,
- (int) cal_tm.wDay,
- (int) cal_tm.wHour,
- (int) cal_tm.wMinute,
- (int) cal_tm.wSecond);
-#else
- struct tm* cal_tm_ptr;
- time_t tm;
-
-#ifdef HAVE_LOCALTIME_R
- struct tm cal_tm;
- time(&tm);
- localtime_r(&tm, &cal_tm);
- cal_tm_ptr = &cal_tm;
-#else
- time(&tm);
- cal_tm_ptr = localtime(&tm);
-#endif
- sprintf(buf, "%02d%02d%02d_%2d_%02d_%02d",
- cal_tm_ptr->tm_year % 100,
- cal_tm_ptr->tm_mon + 1,
- cal_tm_ptr->tm_mday,
- cal_tm_ptr->tm_hour,
- cal_tm_ptr->tm_min,
- cal_tm_ptr->tm_sec);
-#endif
-}
-#endif /* UNIV_HOTBACKUP */
-
-#ifndef UNIV_HOTBACKUP
/*************************************************************//**
Runs an idle loop on CPU. The argument gives the desired delay
in microseconds on 100 MHz Pentium + Visual C++.
-@return dummy value */
-UNIV_INTERN
+@return dummy value */
void
ut_delay(
/*=====*/
@@ -223,11 +155,9 @@ ut_delay(
UT_RESUME_PRIORITY_CPU();
}
-#endif /* !UNIV_HOTBACKUP */
/*************************************************************//**
Prints the contents of a memory buffer in hex and ascii. */
-UNIV_INTERN
void
ut_print_buf(
/*=========*/
@@ -240,10 +170,10 @@ ut_print_buf(
UNIV_MEM_ASSERT_RW(buf, len);
- fprintf(file, " len %lu; hex ", len);
+ fprintf(file, " len " ULINTPF "; hex ", len);
for (data = (const byte*) buf, i = 0; i < len; i++) {
- fprintf(file, "%02lx", (ulong)*data++);
+ fprintf(file, "%02x", *data++);
}
fputs("; asc ", file);
@@ -258,25 +188,59 @@ ut_print_buf(
putc(';', file);
}
-/**********************************************************************//**
-Sort function for ulint arrays. */
-UNIV_INTERN
+/*************************************************************//**
+Prints the contents of a memory buffer in hex. */
void
-ut_ulint_sort(
-/*==========*/
- ulint* arr, /*!< in/out: array to sort */
- ulint* aux_arr, /*!< in/out: aux array to use in sort */
- ulint low, /*!< in: lower bound */
- ulint high) /*!< in: upper bound */
+ut_print_buf_hex(
+/*=============*/
+ std::ostream& o, /*!< in/out: output stream */
+ const void* buf, /*!< in: memory buffer */
+ ulint len) /*!< in: length of the buffer */
+{
+ const byte* data;
+ ulint i;
+
+ static const char hexdigit[16] = {
+ '0','1','2','3','4','5','6','7','8','9','A','B','C','D','E','F'
+ };
+
+ UNIV_MEM_ASSERT_RW(buf, len);
+
+ o << "(0x";
+
+ for (data = static_cast<const byte*>(buf), i = 0; i < len; i++) {
+ byte b = *data++;
+ o << hexdigit[int(b) >> 4] << hexdigit[b & 15];
+ }
+
+ o << ")";
+}
+
+/*************************************************************//**
+Prints the contents of a memory buffer in hex and ascii. */
+void
+ut_print_buf(
+/*=========*/
+ std::ostream& o, /*!< in/out: output stream */
+ const void* buf, /*!< in: memory buffer */
+ ulint len) /*!< in: length of the buffer */
{
- UT_SORT_FUNCTION_BODY(ut_ulint_sort, arr, aux_arr, low, high,
- ut_ulint_cmp);
+ const byte* data;
+ ulint i;
+
+ UNIV_MEM_ASSERT_RW(buf, len);
+
+ for (data = static_cast<const byte*>(buf), i = 0; i < len; i++) {
+ int c = static_cast<int>(*data++);
+ o << (isprint(c) ? static_cast<char>(c) : ' ');
+ }
+
+ ut_print_buf_hex(o, buf, len);
}
/*************************************************************//**
Calculates fast the number rounded up to the nearest power of 2.
-@return first power of 2 which is >= n */
-UNIV_INTERN
+@return first power of 2 which is >= n */
ulint
ut_2_power_up(
/*==========*/
@@ -295,65 +259,18 @@ ut_2_power_up(
return(res);
}
-/**********************************************************************//**
-Outputs a NUL-terminated file name, quoted with apostrophes. */
-UNIV_INTERN
-void
-ut_print_filename(
-/*==============*/
- FILE* f, /*!< in: output stream */
- const char* name) /*!< in: name to print */
-{
- putc('\'', f);
- for (;;) {
- int c = *name++;
- switch (c) {
- case 0:
- goto done;
- case '\'':
- putc(c, f);
- /* fall through */
- default:
- putc(c, f);
- }
- }
-done:
- putc('\'', f);
-}
-#ifndef UNIV_HOTBACKUP
-/**********************************************************************//**
-Outputs a fixed-length string, quoted as an SQL identifier.
-If the string contains a slash '/', the string will be
-output as two identifiers separated by a period (.),
-as in SQL database_name.identifier. */
-UNIV_INTERN
-void
-ut_print_name(
-/*==========*/
- FILE* f, /*!< in: output stream */
- const trx_t* trx, /*!< in: transaction */
- ibool table_id,/*!< in: TRUE=print a table name,
- FALSE=print other identifier */
- const char* name) /*!< in: name to print */
-{
- ut_print_namel(f, trx, table_id, name, strlen(name));
-}
-
-/**********************************************************************//**
-Outputs a fixed-length string, quoted as an SQL identifier.
+/** Get a fixed-length string, quoted as an SQL identifier.
If the string contains a slash '/', the string will be
output as two identifiers separated by a period (.),
-as in SQL database_name.identifier. */
-UNIV_INTERN
-void
-ut_print_namel(
-/*===========*/
- FILE* f, /*!< in: output stream */
- const trx_t* trx, /*!< in: transaction (NULL=no quotes) */
- ibool table_id,/*!< in: TRUE=print a table name,
- FALSE=print other identifier */
- const char* name, /*!< in: name to print */
- ulint namelen)/*!< in: length of name */
+as in SQL database_name.identifier.
+ @param [in] trx transaction (NULL=no quotes).
+ @param [in] name table name.
+ @retval String quoted as an SQL identifier.
+*/
+std::string
+ut_get_name(
+ const trx_t* trx,
+ const char* name)
{
/* 2 * NAME_LEN for database and table name,
and some slack for the #mysql50# prefix and quotes */
@@ -361,11 +278,10 @@ ut_print_namel(
const char* bufend;
bufend = innobase_convert_name(buf, sizeof buf,
- name, namelen,
- trx ? trx->mysql_thd : NULL,
- table_id);
-
- fwrite(buf, 1, bufend - buf, f);
+ name, strlen(name),
+ trx ? trx->mysql_thd : NULL);
+ buf[bufend - buf] = '\0';
+ return(std::string(buf, 0, bufend - buf));
}
/**********************************************************************//**
@@ -373,47 +289,41 @@ Outputs a fixed-length string, quoted as an SQL identifier.
If the string contains a slash '/', the string will be
output as two identifiers separated by a period (.),
as in SQL database_name.identifier. */
-UNIV_INTERN
-std::string
-ut_get_name(
-/*=========*/
- const trx_t* trx, /*!< in: transaction (NULL=no quotes) */
- ibool table_id,/*!< in: TRUE=print a table name,
- FALSE=print other identifier */
+void
+ut_print_name(
+/*==========*/
+ FILE* f, /*!< in: output stream */
+ const trx_t* trx, /*!< in: transaction */
const char* name) /*!< in: name to print */
{
/* 2 * NAME_LEN for database and table name,
and some slack for the #mysql50# prefix and quotes */
char buf[3 * NAME_LEN];
const char* bufend;
- ulint namelen = strlen(name);
bufend = innobase_convert_name(buf, sizeof buf,
- name, namelen,
- trx ? trx->mysql_thd : NULL,
- table_id);
- buf[bufend-buf]='\0';
- std::string str(buf);
- return str;
+ name, strlen(name),
+ trx ? trx->mysql_thd : NULL);
+
+ if (fwrite(buf, 1, bufend - buf, f) != (size_t) (bufend - buf)) {
+ perror("fwrite");
+ }
}
-/**********************************************************************//**
-Formats a table or index name, quoted as an SQL identifier. If the name
-contains a slash '/', the result will contain two identifiers separated by
-a period (.), as in SQL database_name.identifier.
+/** Format a table name, quoted as an SQL identifier.
+If the name contains a slash '/', the result will contain two
+identifiers separated by a period (.), as in SQL
+database_name.table_name.
+@see table_name_t
+@param[in] name table or index name
+@param[out] formatted formatted result, will be NUL-terminated
+@param[in] formatted_size size of the buffer in bytes
@return pointer to 'formatted' */
-UNIV_INTERN
char*
ut_format_name(
-/*===========*/
- const char* name, /*!< in: table or index name, must be
- '\0'-terminated */
- ibool is_table, /*!< in: if TRUE then 'name' is a table
- name */
- char* formatted, /*!< out: formatted result, will be
- '\0'-terminated */
- ulint formatted_size) /*!< out: no more than this number of
- bytes will be written to 'formatted' */
+ const char* name,
+ char* formatted,
+ ulint formatted_size)
{
switch (formatted_size) {
case 1:
@@ -426,7 +336,7 @@ ut_format_name(
char* end;
end = innobase_convert_name(formatted, formatted_size,
- name, strlen(name), NULL, is_table);
+ name, strlen(name), NULL);
/* If the space in 'formatted' was completely used, then sacrifice
the last character in order to write '\0' at the end. */
@@ -443,7 +353,6 @@ ut_format_name(
/**********************************************************************//**
Catenate files. */
-UNIV_INTERN
void
ut_copy_file(
/*=========*/
@@ -459,85 +368,49 @@ ut_copy_file(
? (size_t) len
: sizeof buf;
size_t size = fread(buf, 1, maxs, src);
- fwrite(buf, 1, size, dest);
+ if (fwrite(buf, 1, size, dest) != size) {
+ perror("fwrite");
+ }
len -= (long) size;
if (size < maxs) {
break;
}
} while (len > 0);
}
-#endif /* !UNIV_HOTBACKUP */
-#ifdef __WIN__
-# include <stdarg.h>
-/**********************************************************************//**
-A substitute for vsnprintf(3), formatted output conversion into
-a limited buffer. Note: this function DOES NOT return the number of
-characters that would have been printed if the buffer was unlimited because
-VC's _vsnprintf() returns -1 in this case and we would need to call
-_vscprintf() in addition to estimate that but we would need another copy
-of "ap" for that and VC does not provide va_copy(). */
-UNIV_INTERN
-void
-ut_vsnprintf(
+/** Convert an error number to a human readable text message.
+The returned string is static and should not be freed or modified.
+@param[in] num InnoDB internal error number
+@return string, describing the error */
+std::string
+ut_get_name(
/*=========*/
- char* str, /*!< out: string */
- size_t size, /*!< in: str size */
- const char* fmt, /*!< in: format */
- va_list ap) /*!< in: format values */
-{
- _vsnprintf(str, size, fmt, ap);
- str[size - 1] = '\0';
-}
-
-/**********************************************************************//**
-A substitute for snprintf(3), formatted output conversion into
-a limited buffer.
-@return number of characters that would have been printed if the size
-were unlimited, not including the terminating '\0'. */
-UNIV_INTERN
-int
-ut_snprintf(
-/*========*/
- char* str, /*!< out: string */
- size_t size, /*!< in: str size */
- const char* fmt, /*!< in: format */
- ...) /*!< in: format values */
+ const trx_t* trx, /*!< in: transaction (NULL=no quotes) */
+ ibool table_id,/*!< in: TRUE=print a table name,
+ FALSE=print other identifier */
+ const char* name) /*!< in: name to print */
{
- int res;
- va_list ap1;
- va_list ap2;
-
- va_start(ap1, fmt);
- va_start(ap2, fmt);
-
- res = _vscprintf(fmt, ap1);
- ut_a(res != -1);
-
- if (size > 0) {
- _vsnprintf(str, size, fmt, ap2);
-
- if ((size_t) res >= size) {
- str[size - 1] = '\0';
- }
- }
-
- va_end(ap1);
- va_end(ap2);
+ /* 2 * NAME_LEN for database and table name,
+ and some slack for the #mysql50# prefix and quotes */
+ char buf[3 * NAME_LEN];
+ const char* bufend;
+ ulint namelen = strlen(name);
- return(res);
+ bufend = innobase_convert_name(buf, sizeof buf,
+ name, namelen,
+ trx ? trx->mysql_thd : NULL);
+ buf[bufend-buf]='\0';
+ std::string str(buf);
+ return str;
}
-#endif /* __WIN__ */
-/*************************************************************//**
-Convert an error number to a human readable text message. The
-returned string is static and should not be freed or modified.
-@return string, describing the error */
-UNIV_INTERN
+/** Convert an error number to a human readable text message.
+The returned string is static and should not be freed or modified.
+@param[in] num InnoDB internal error number
+@return string, describing the error */
const char*
ut_strerr(
-/*======*/
- dberr_t num) /*!< in: error number */
+ dberr_t num)
{
switch (num) {
case DB_SUCCESS:
@@ -562,8 +435,6 @@ ut_strerr(
return("Rollback");
case DB_DUPLICATE_KEY:
return("Duplicate key");
- case DB_QUE_THR_SUSPENDED:
- return("The queue thread has been suspended");
case DB_MISSING_HISTORY:
return("Required history data has been deleted");
case DB_CLUSTER_NOT_FOUND:
@@ -596,6 +467,8 @@ ut_strerr(
return("Tablespace already exists");
case DB_TABLESPACE_DELETED:
return("Tablespace deleted or being deleted");
+ case DB_TABLESPACE_TRUNCATED:
+ return("Tablespace was truncated");
case DB_TABLESPACE_NOT_FOUND:
return("Tablespace not found");
case DB_LOCK_TABLE_FULL:
@@ -640,28 +513,38 @@ ut_strerr(
return("I/O error");
case DB_TABLE_IN_FK_CHECK:
return("Table is being used in foreign key check");
- case DB_DATA_MISMATCH:
- return("data mismatch");
- case DB_SCHEMA_NOT_LOCKED:
- return("schema not locked");
case DB_NOT_FOUND:
return("not found");
case DB_ONLINE_LOG_TOO_BIG:
return("Log size exceeded during online index creation");
- case DB_DICT_CHANGED:
- return("Table dictionary has changed");
case DB_IDENTIFIER_TOO_LONG:
return("Identifier name is too long");
case DB_FTS_EXCEED_RESULT_CACHE_LIMIT:
return("FTS query exceeds result cache limit");
- case DB_TEMP_FILE_WRITE_FAILURE:
+ case DB_TEMP_FILE_WRITE_FAIL:
return("Temp file write failure");
+ case DB_CANT_CREATE_GEOMETRY_OBJECT:
+ return("Can't create specificed geometry data object");
+ case DB_CANNOT_OPEN_FILE:
+ return("Cannot open a file");
+ case DB_TABLE_CORRUPT:
+ return("Table is corrupted");
case DB_FTS_TOO_MANY_WORDS_IN_PHRASE:
return("Too many words in a FTS phrase or proximity search");
- case DB_TOO_BIG_FOR_REDO:
- return("BLOB record length is greater than 10%% of redo log");
case DB_DECRYPTION_FAILED:
return("Table is encrypted but decrypt failed.");
+ case DB_IO_PARTIAL_FAILED:
+ return("Partial IO failed");
+ case DB_FORCED_ABORT:
+ return("Transaction aborted by another higher priority "
+ "transaction");
+ case DB_COMPUTE_VALUE_FAILED:
+ return("Compute generated column failed");
+ case DB_NO_FK_ON_S_BASE_COL:
+ return("Cannot add foreign key on the base column "
+ "of stored column");
+ case DB_IO_NO_PUNCH_HOLE:
+ return ("File system does not support punch hole (trim) operation.");
case DB_PAGE_CORRUPTED:
return("Page read from tablespace is corrupted.");
@@ -677,4 +560,113 @@ ut_strerr(
/* NOT REACHED */
return("Unknown error");
}
+
+#ifdef UNIV_PFS_MEMORY
+
+/** Extract the basename of a file without its extension.
+For example, extract "foo0bar" out of "/path/to/foo0bar.cc".
+@param[in] file file path, e.g. "/path/to/foo0bar.cc"
+@param[out] base result, e.g. "foo0bar"
+@param[in] base_size size of the output buffer 'base', if there
+is not enough space, then the result will be truncated, but always
+'\0'-terminated
+@return number of characters that would have been printed if the size
+were unlimited (not including the final ‘\0’) */
+size_t
+ut_basename_noext(
+ const char* file,
+ char* base,
+ size_t base_size)
+{
+ /* Assuming 'file' contains something like the following,
+ extract the file name without the extenstion out of it by
+ setting 'beg' and 'len'.
+ ...mysql-trunk/storage/innobase/dict/dict0dict.cc:302
+ ^-- beg, len=9
+ */
+
+ const char* beg = strrchr(file, OS_PATH_SEPARATOR);
+
+ if (beg == NULL) {
+ beg = file;
+ } else {
+ beg++;
+ }
+
+ size_t len = strlen(beg);
+
+ const char* end = strrchr(beg, '.');
+
+ if (end != NULL) {
+ len = end - beg;
+ }
+
+ const size_t copy_len = std::min(len, base_size - 1);
+
+ memcpy(base, beg, copy_len);
+
+ base[copy_len] = '\0';
+
+ return(len);
+}
+
+#endif /* UNIV_PFS_MEMORY */
+
+namespace ib {
+
+info::~info()
+{
+ sql_print_information("InnoDB: %s", m_oss.str().c_str());
+}
+
+warn::~warn()
+{
+ sql_print_warning("InnoDB: %s", m_oss.str().c_str());
+}
+
+error::~error()
+{
+ sql_print_error("InnoDB: %s", m_oss.str().c_str());
+}
+
+#ifdef _MSC_VER
+/* disable warning
+ "ib::fatal::~fatal': destructor never returns, potential memory leak"
+ on Windows.
+*/
+#pragma warning (push)
+#pragma warning (disable : 4722)
+#endif
+
+ATTRIBUTE_NORETURN
+fatal::~fatal()
+{
+ sql_print_error("[FATAL] InnoDB: %s", m_oss.str().c_str());
+ abort();
+}
+
+#ifdef _MSC_VER
+#pragma warning (pop)
+#endif
+
+error_or_warn::~error_or_warn()
+{
+ if (m_error) {
+ sql_print_error("InnoDB: %s", m_oss.str().c_str());
+ } else {
+ sql_print_warning("InnoDB: %s", m_oss.str().c_str());
+ }
+}
+
+fatal_or_error::~fatal_or_error()
+{
+ sql_print_error(m_fatal ? "[FATAL] InnoDB: %s" : "InnoDB: %s",
+ m_oss.str().c_str());
+ if (m_fatal) {
+ abort();
+ }
+}
+
+} // namespace ib
+
#endif /* !UNIV_INNOCHECKSUM */
diff --git a/storage/innobase/ut/ut0vec.cc b/storage/innobase/ut/ut0vec.cc
index 591895049de..c9262bc9e3a 100644
--- a/storage/innobase/ut/ut0vec.cc
+++ b/storage/innobase/ut/ut0vec.cc
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 2006, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2006, 2016, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -24,14 +24,10 @@ Created 4/6/2006 Osku Salerma
************************************************************************/
#include "ut0vec.h"
-#ifdef UNIV_NONINL
-#include "ut0vec.ic"
-#endif
#include "mem0mem.h"
/********************************************************************
Create a new vector with the given initial size. */
-UNIV_INTERN
ib_vector_t*
ib_vector_create(
/*=============*/
@@ -61,7 +57,6 @@ ib_vector_create(
/********************************************************************
Resize the vector, currently the vector can only grow and we
expand the number of elements it can hold by 2 times. */
-UNIV_INTERN
void
ib_vector_resize(
/*=============*/
diff --git a/storage/innobase/ut/ut0wqueue.cc b/storage/innobase/ut/ut0wqueue.cc
index e6a27263ac3..ae97009430e 100644
--- a/storage/innobase/ut/ut0wqueue.cc
+++ b/storage/innobase/ut/ut0wqueue.cc
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 2006, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2006, 2015, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2019, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
@@ -17,6 +17,8 @@ this program; if not, write to the Free Software Foundation, Inc.,
*****************************************************************************/
+#include "ut0list.h"
+#include "mem0mem.h"
#include "ut0wqueue.h"
/*******************************************************************//**
@@ -28,27 +30,27 @@ Created 4/26/2006 Osku Salerma
/****************************************************************//**
Create a new work queue.
-@return work queue */
-UNIV_INTERN
+@return work queue */
ib_wqueue_t*
ib_wqueue_create(void)
/*===================*/
{
- ib_wqueue_t* wq = static_cast<ib_wqueue_t*>(mem_alloc(sizeof(*wq)));
+ ib_wqueue_t* wq = static_cast<ib_wqueue_t*>(
+ ut_malloc_nokey(sizeof(*wq)));
/* Function ib_wqueue_create() has not been used anywhere,
not necessary to instrument this mutex */
- mutex_create(PFS_NOT_INSTRUMENTED, &wq->mutex, SYNC_WORK_QUEUE);
+
+ mutex_create(LATCH_ID_WORK_QUEUE, &wq->mutex);
wq->items = ib_list_create();
- wq->event = os_event_create();
+ wq->event = os_event_create(0);
return(wq);
}
/****************************************************************//**
Free a work queue. */
-UNIV_INTERN
void
ib_wqueue_free(
/*===========*/
@@ -56,9 +58,9 @@ ib_wqueue_free(
{
mutex_free(&wq->mutex);
ib_list_free(wq->items);
- os_event_free(wq->event);
+ os_event_destroy(wq->event);
- mem_free(wq);
+ ut_free(wq);
}
/** Add a work item to the queue.
@@ -66,7 +68,6 @@ ib_wqueue_free(
@param[in] item work item
@param[in,out] heap memory heap to use for allocating list node
@param[in] wq_locked work queue mutex locked */
-UNIV_INTERN
void
ib_wqueue_add(ib_wqueue_t* wq, void* item, mem_heap_t* heap, bool wq_locked)
{
@@ -84,8 +85,7 @@ ib_wqueue_add(ib_wqueue_t* wq, void* item, mem_heap_t* heap, bool wq_locked)
/****************************************************************//**
Wait for a work item to appear in the queue.
-@return work item */
-UNIV_INTERN
+@return work item */
void*
ib_wqueue_wait(
/*===========*/
@@ -123,7 +123,6 @@ ib_wqueue_wait(
/********************************************************************
Wait for a work item to appear in the queue for specified time. */
-
void*
ib_wqueue_timedwait(
/*================*/
@@ -135,7 +134,7 @@ ib_wqueue_timedwait(
for (;;) {
ulint error;
- ib_int64_t sig_count;
+ int64_t sig_count;
mutex_enter(&wq->mutex);
@@ -195,7 +194,6 @@ ib_wqueue_nowait(
return (node ? node->data : NULL);
}
-
/** Check if queue is empty.
@param wq wait queue
@return whether the queue is empty */
diff --git a/storage/maria/ha_maria.cc b/storage/maria/ha_maria.cc
index 6355781e19c..f1cabfd2039 100644
--- a/storage/maria/ha_maria.cc
+++ b/storage/maria/ha_maria.cc
@@ -256,9 +256,9 @@ static MYSQL_SYSVAR_ULONG(pagecache_file_hash_size, pagecache_file_hash_size,
"value is probably 1/10 of number of possible open Aria files.", 0,0,
512, 128, 16384, 1);
-static MYSQL_SYSVAR_SET(recover, maria_recover_options, PLUGIN_VAR_OPCMDARG,
+static MYSQL_SYSVAR_SET(recover_options, maria_recover_options, PLUGIN_VAR_OPCMDARG,
"Specifies how corrupted tables should be automatically repaired",
- NULL, NULL, HA_RECOVER_DEFAULT, &maria_recover_typelib);
+ NULL, NULL, HA_RECOVER_BACKUP|HA_RECOVER_QUICK, &maria_recover_typelib);
static MYSQL_THDVAR_ULONG(repair_threads, PLUGIN_VAR_RQCMDARG,
"Number of threads to use when repairing Aria tables. The value of 1 "
@@ -355,10 +355,12 @@ static PSI_file_info all_aria_files[]=
{ &key_file_control, "control", PSI_FLAG_GLOBAL}
};
+# ifdef HAVE_PSI_STAGE_INTERFACE
static PSI_stage_info *all_aria_stages[]=
{
& stage_waiting_for_a_resource
};
+# endif /* HAVE_PSI_STAGE_INTERFACE */
static void init_aria_psi_keys(void)
{
@@ -379,9 +381,10 @@ static void init_aria_psi_keys(void)
count= array_elements(all_aria_files);
mysql_file_register(category, all_aria_files, count);
-
+# ifdef HAVE_PSI_STAGE_INTERFACE
count= array_elements(all_aria_stages);
mysql_stage_register(category, all_aria_stages, count);
+# endif /* HAVE_PSI_STAGE_INTERFACE */
}
#else
#define init_aria_psi_keys() /* no-op */
@@ -406,7 +409,7 @@ static void _ma_check_print_msg(HA_CHECK *param, const char *msg_type,
{
THD *thd= (THD *) param->thd;
Protocol *protocol= thd->protocol;
- uint length, msg_length;
+ size_t length, msg_length;
char msgbuf[MYSQL_ERRMSG_SIZE];
char name[NAME_LEN * 2 + 2];
@@ -436,17 +439,17 @@ static void _ma_check_print_msg(HA_CHECK *param, const char *msg_type,
NullS) - name);
/*
TODO: switch from protocol to push_warning here. The main reason we didn't
- it yet is parallel repair. Due to following trace:
- ma_check_print_msg/push_warning/sql_alloc/my_pthread_getspecific_ptr.
+ it yet is parallel repair, which threads have no THD object accessible via
+ current_thd.
Also we likely need to lock mutex here (in both cases with protocol and
push_warning).
*/
protocol->prepare_for_resend();
- protocol->store(name, length, system_charset_info);
+ protocol->store(name, (uint)length, system_charset_info);
protocol->store(param->op_name, system_charset_info);
protocol->store(msg_type, system_charset_info);
- protocol->store(msgbuf, msg_length, system_charset_info);
+ protocol->store(msgbuf, (uint)msg_length, system_charset_info);
if (protocol->write())
sql_print_error("Failed on my_net_write, writing to stderr instead: %s.%s: %s\n",
param->db_name, param->table_name, msgbuf);
@@ -523,6 +526,14 @@ static int table2maria(TABLE *table_arg, data_file_type row_type,
for (j= 0; j < pos->user_defined_key_parts; j++)
{
Field *field= pos->key_part[j].field;
+
+ if (!table_arg->field[field->field_index]->stored_in_db())
+ {
+ my_free(*recinfo_out);
+ my_error(ER_KEY_BASED_ON_GENERATED_VIRTUAL_COLUMN, MYF(0));
+ DBUG_RETURN(HA_ERR_UNSUPPORTED);
+ }
+
type= field->key_type();
keydef[i].seg[j].flag= pos->key_part[j].key_part_flag;
@@ -613,8 +624,8 @@ static int table2maria(TABLE *table_arg, data_file_type row_type,
}
}
}
- DBUG_PRINT("loop", ("found: 0x%lx recpos: %d minpos: %d length: %d",
- (long) found, recpos, minpos, length));
+ DBUG_PRINT("loop", ("found: %p recpos: %d minpos: %d length: %d",
+ found, recpos, minpos, length));
if (!found)
break;
@@ -827,7 +838,10 @@ extern "C" {
int _ma_killed_ptr(HA_CHECK *param)
{
- return thd_killed((THD*)param->thd);
+ if (likely(thd_killed((THD*)param->thd)) == 0)
+ return 0;
+ my_errno= HA_ERR_ABORTED_BY_USER;
+ return 1;
}
@@ -1290,6 +1304,7 @@ int ha_maria::check(THD * thd, HA_CHECK_OPT * check_opt)
if (!file || !param) return HA_ADMIN_INTERNAL_ERROR;
+ unmap_file(file);
maria_chk_init(param);
param->thd= thd;
param->op_name= "check";
@@ -1510,6 +1525,7 @@ int ha_maria::zerofill(THD * thd, HA_CHECK_OPT *check_opt)
if (!file || !param)
return HA_ADMIN_INTERNAL_ERROR;
+ unmap_file(file);
old_trn= file->trn;
maria_chk_init(param);
param->thd= thd;
@@ -1608,6 +1624,7 @@ int ha_maria::repair(THD *thd, HA_CHECK *param, bool do_optimize)
param->out_flag= 0;
share->state.dupp_key= MI_MAX_KEY;
strmov(fixed_name, share->open_file_name.str);
+ unmap_file(file);
/*
Don't lock tables if we have used LOCK TABLE or if we come from
@@ -1665,8 +1682,11 @@ int ha_maria::repair(THD *thd, HA_CHECK *param, bool do_optimize)
}
if (error && file->create_unique_index_by_sort &&
share->state.dupp_key != MAX_KEY)
+ {
+ my_errno= HA_ERR_FOUND_DUPP_KEY;
print_keydup_error(table, &table->key_info[share->state.dupp_key],
MYF(0));
+ }
}
else
{
@@ -1784,7 +1804,6 @@ int ha_maria::assign_to_keycache(THD * thd, HA_CHECK_OPT *check_opt)
TABLE_LIST *table_list= table->pos_in_table_list;
DBUG_ENTER("ha_maria::assign_to_keycache");
-
table->keys_in_use_for_query.clear_all();
if (table_list->process_index_hints(table))
@@ -2729,7 +2748,8 @@ int ha_maria::external_lock(THD *thd, int lock_type)
}
else
{
- TRN *trn= (file->trn != &dummy_transaction_object ? file->trn : 0);
+ /* We have to test for THD_TRN to protect against implicit commits */
+ TRN *trn= (file->trn != &dummy_transaction_object && THD_TRN ? file->trn : 0);
/* End of transaction */
/*
@@ -2765,7 +2785,7 @@ int ha_maria::external_lock(THD *thd, int lock_type)
changes to commit (rollback shouldn't be tested).
*/
DBUG_ASSERT(!thd->get_stmt_da()->is_sent() ||
- thd->killed == KILL_CONNECTION);
+ thd->killed);
/* autocommit ? rollback a transaction */
#ifdef MARIA_CANNOT_ROLLBACK
if (ma_commit(trn))
@@ -2784,9 +2804,12 @@ int ha_maria::external_lock(THD *thd, int lock_type)
}
}
} /* if transactional table */
- DBUG_RETURN(maria_lock_database(file, !table->s->tmp_table ?
+ int result = maria_lock_database(file, !table->s->tmp_table ?
lock_type : ((lock_type == F_UNLCK) ?
- F_UNLCK : F_EXTRA_LCK)));
+ F_UNLCK : F_EXTRA_LCK));
+ if (!file->s->base.born_transactional)
+ file->state= &file->s->state.state; // Restore state if clone
+ DBUG_RETURN(result);
}
int ha_maria::start_stmt(THD *thd, thr_lock_type lock_type)
@@ -2836,7 +2859,20 @@ static void reset_thd_trn(THD *thd, MARIA_HA *first_table)
THD_TRN= NULL;
for (MARIA_HA *table= first_table; table ;
table= table->trn_next)
+ {
_ma_reset_trn_for_table(table);
+
+ /*
+ If table has changed by this statement, invalidate it from the query
+ cache
+ */
+ if (table->row_changes != table->start_row_changes)
+ {
+ table->start_row_changes= table->row_changes;
+ DBUG_ASSERT(table->s->chst_invalidator != NULL);
+ (*table->s->chst_invalidator)(table->s->data_file_name.str);
+ }
+ }
DBUG_VOID_RETURN;
}
@@ -3302,7 +3338,10 @@ static int maria_commit(handlerton *hton __attribute__ ((unused)),
THD *thd, bool all)
{
TRN *trn= THD_TRN;
+ int res;
+ MARIA_HA *used_instances= (MARIA_HA*) trn->used_instances;
DBUG_ENTER("maria_commit");
+
trnman_reset_locked_tables(trn, 0);
trnman_set_flags(trn, trnman_get_flags(trn) & ~TRN_STATE_INFO_LOGGED);
@@ -3310,8 +3349,9 @@ static int maria_commit(handlerton *hton __attribute__ ((unused)),
if ((thd->variables.option_bits & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)) &&
!all)
DBUG_RETURN(0); // end of statement
- reset_thd_trn(thd, (MARIA_HA*) trn->used_instances);
- DBUG_RETURN(ma_commit(trn)); // end of transaction
+ res= ma_commit(trn);
+ reset_thd_trn(thd, used_instances);
+ DBUG_RETURN(res);
}
@@ -3398,7 +3438,7 @@ bool maria_show_status(handlerton *hton,
{
char *file;
const char *status;
- uint length, status_len;
+ size_t length, status_len;
MY_STAT stat_buff, *stat;
const char error[]= "can't stat";
char object[SHOW_MSG_LEN];
@@ -3426,8 +3466,8 @@ bool maria_show_status(handlerton *hton,
status= needed;
status_len= sizeof(needed) - 1;
}
- length= my_snprintf(object, SHOW_MSG_LEN, "Size %12lu ; %s",
- (ulong) stat->st_size, file);
+ length= my_snprintf(object, SHOW_MSG_LEN, "Size %12llu ; %s",
+ (ulonglong) stat->st_size, file);
}
print(thd, engine_name->str, engine_name->length,
@@ -3479,7 +3519,7 @@ static int mark_recovery_start(const char* log_dir)
DBUG_ENTER("mark_recovery_start");
if (!(maria_recover_options & HA_RECOVER_ANY))
ma_message_no_user(ME_JUST_WARNING, "Please consider using option"
- " --aria-recover[=...] to automatically check and"
+ " --aria-recover-options[=...] to automatically check and"
" repair tables when logs are removed by option"
" --aria-force-start-after-recovery-failures=#");
if (recovery_failures >= force_start_after_recovery_failures)
@@ -3682,7 +3722,7 @@ struct st_mysql_sys_var* system_variables[]= {
MYSQL_SYSVAR(pagecache_buffer_size),
MYSQL_SYSVAR(pagecache_division_limit),
MYSQL_SYSVAR(pagecache_file_hash_size),
- MYSQL_SYSVAR(recover),
+ MYSQL_SYSVAR(recover_options),
MYSQL_SYSVAR(repair_threads),
MYSQL_SYSVAR(sort_buffer_size),
MYSQL_SYSVAR(stats_method),
diff --git a/storage/maria/ma_bitmap.c b/storage/maria/ma_bitmap.c
index f03aef9b228..ad6d085b12a 100644
--- a/storage/maria/ma_bitmap.c
+++ b/storage/maria/ma_bitmap.c
@@ -105,7 +105,7 @@
- On checkpoint
(Ie: When we do a checkpoint, we have to ensure that all bitmaps are
put on disk even if they are not in the page cache).
- - When explicitely requested (for example on backup or after recovery,
+ - When explicitly requested (for example on backup or after recovery,
to simplify things)
The flow of writing a row is that:
@@ -878,8 +878,8 @@ static void _ma_print_bitmap_changes(MARIA_FILE_BITMAP *bitmap)
end= bitmap->map + bitmap->used_size;
DBUG_LOCK_FILE;
- fprintf(DBUG_FILE,"\nBitmap page changes at page: %lu bitmap: 0x%lx\n",
- (ulong) bitmap->page, (long) bitmap->map);
+ fprintf(DBUG_FILE,"\nBitmap page changes at page: %lu bitmap: %p\n",
+ (ulong) bitmap->page, bitmap->map);
page= (ulong) bitmap->page+1;
for (pos= bitmap->map, org_pos= bitmap->map + bitmap->block_size ;
@@ -1318,7 +1318,7 @@ static my_bool allocate_head(MARIA_FILE_BITMAP *bitmap, uint size,
if (first_found)
{
first_found= 0;
- bitmap->full_head_size= (data - bitmap->map);
+ bitmap->full_head_size= (uint)(data - bitmap->map);
}
}
if (pattern <= min_bits)
@@ -1437,7 +1437,7 @@ static my_bool allocate_tail(MARIA_FILE_BITMAP *bitmap, uint size,
if (first_found)
{
first_found= 0;
- bitmap->full_tail_size= (data - bitmap->map);
+ bitmap->full_tail_size= (uint)(data - bitmap->map);
}
}
diff --git a/storage/maria/ma_blockrec.c b/storage/maria/ma_blockrec.c
index 9effdd7cd2e..acf108f8886 100644
--- a/storage/maria/ma_blockrec.c
+++ b/storage/maria/ma_blockrec.c
@@ -449,9 +449,7 @@ my_bool _ma_once_end_block_record(MARIA_SHARE *share)
if (share->bitmap.file.file >= 0)
{
if (flush_pagecache_blocks(share->pagecache, &share->bitmap.file,
- ((share->temporary || share->deleting) ?
- FLUSH_IGNORE_CHANGED :
- FLUSH_RELEASE)))
+ share->deleting ? FLUSH_IGNORE_CHANGED : FLUSH_RELEASE))
res= 1;
/*
File must be synced as it is going out of the maria_open_list and so
@@ -5246,6 +5244,8 @@ my_bool _ma_scan_init_block_record(MARIA_HA *info)
{
MARIA_SHARE *share= info->s;
DBUG_ENTER("_ma_scan_init_block_record");
+ DBUG_ASSERT(info->dfile.file == share->bitmap.file.file);
+
/*
bitmap_buff may already be allocated if this is the second call to
rnd_init() without a rnd_end() in between, see sql/handler.h
diff --git a/storage/maria/ma_blockrec.h b/storage/maria/ma_blockrec.h
index a8eaeef7bcc..0382eb44006 100644
--- a/storage/maria/ma_blockrec.h
+++ b/storage/maria/ma_blockrec.h
@@ -50,8 +50,8 @@
#define SUB_RANGE_SIZE 2
#define BLOCK_FILLER_SIZE 2
#define ROW_EXTENT_SIZE (ROW_EXTENT_PAGE_SIZE + ROW_EXTENT_COUNT_SIZE)
-#define TAIL_BIT 0x8000 /* Bit in page_count to signify tail */
-#define START_EXTENT_BIT 0x4000 /* Bit in page_count to signify start*/
+#define TAIL_BIT 0x8000U /* Bit in page_count to signify tail */
+#define START_EXTENT_BIT 0x4000U /* Bit in page_count to signify start*/
/* page_count set by bitmap code for tail pages */
#define TAIL_PAGE_COUNT_MARKER 0xffff
/* Number of extents reserved MARIA_BITMAP_BLOCKS to store head part */
@@ -75,7 +75,7 @@
#define PAGE_TYPE_MASK 7
enum en_page_type { UNALLOCATED_PAGE, HEAD_PAGE, TAIL_PAGE, BLOB_PAGE, MAX_PAGE_TYPE };
-#define PAGE_CAN_BE_COMPACTED 128 /* Bit in PAGE_TYPE */
+#define PAGE_CAN_BE_COMPACTED 128U /* Bit in PAGE_TYPE */
#define PAGE_TYPE_OFFSET LSN_SIZE
#define DIR_COUNT_OFFSET (LSN_SIZE+PAGE_TYPE_SIZE)
@@ -86,12 +86,12 @@ enum en_page_type { UNALLOCATED_PAGE, HEAD_PAGE, TAIL_PAGE, BLOB_PAGE, MAX_PAGE_
#define FULL_PAGE_KEY_VERSION_OFFSET (PAGE_TYPE_OFFSET + PAGE_TYPE_SIZE)
/* Bits used for flag uchar (one byte, first in record) */
-#define ROW_FLAG_TRANSID 1
-#define ROW_FLAG_VER_PTR 2
-#define ROW_FLAG_DELETE_TRANSID 4
-#define ROW_FLAG_NULLS_EXTENDED 8
-#define ROW_FLAG_EXTENTS 128
-#define ROW_FLAG_ALL (1+2+4+8+128)
+#define ROW_FLAG_TRANSID 1U
+#define ROW_FLAG_VER_PTR 2U
+#define ROW_FLAG_DELETE_TRANSID 4U
+#define ROW_FLAG_NULLS_EXTENDED 8U
+#define ROW_FLAG_EXTENTS 128U
+#define ROW_FLAG_ALL (1U+2U+4U+8U+128U)
/* Size for buffer to hold information about bitmap */
#define MAX_BITMAP_INFO_LENGTH ((MARIA_MAX_KEY_BLOCK_LENGTH*8/3)*(61*11/60)+10)
@@ -99,8 +99,8 @@ enum en_page_type { UNALLOCATED_PAGE, HEAD_PAGE, TAIL_PAGE, BLOB_PAGE, MAX_PAGE_
/******** Variables that affects how data pages are utilized ********/
-/* Minium size of tail segment */
-#define MIN_TAIL_SIZE 32
+/* Minimum size of tail segment */
+#define MIN_TAIL_SIZE 32U
/*
Fixed length part of Max possible header size; See row data structure
diff --git a/storage/maria/ma_check.c b/storage/maria/ma_check.c
index 789a4d0d5b6..c6858ff63b8 100644
--- a/storage/maria/ma_check.c
+++ b/storage/maria/ma_check.c
@@ -772,7 +772,7 @@ static
void maria_collect_stats_nonulls_first(HA_KEYSEG *keyseg, ulonglong *notnull,
const uchar *key)
{
- uint first_null, kp;
+ size_t first_null, kp;
first_null= ha_find_null(keyseg, key) - keyseg;
/*
All prefix tuples that don't include keypart_{first_null} are not-null
@@ -814,7 +814,7 @@ int maria_collect_stats_nonulls_next(HA_KEYSEG *keyseg, ulonglong *notnull,
const uchar *last_key)
{
uint diffs[2];
- uint first_null_seg, kp;
+ size_t first_null_seg, kp;
HA_KEYSEG *seg;
/*
@@ -1362,6 +1362,7 @@ static int check_dynamic_record(HA_CHECK *param, MARIA_HA *info, int extend,
pos=block_info.filepos+block_info.block_len;
if (block_info.rec_len > (uint) share->base.max_pack_length)
{
+ my_errno= HA_ERR_WRONG_IN_RECORD;
_ma_check_print_error(param,"Found too long record (%lu) at %s",
(ulong) block_info.rec_len,
llstr(start_recpos,llbuff));
@@ -2516,8 +2517,8 @@ static int maria_drop_all_indexes(HA_CHECK *param, MARIA_HA *info,
DBUG_PRINT("repair", ("creating missing indexes"));
for (i= 0; i < share->base.keys; i++)
{
- DBUG_PRINT("repair", ("index #: %u key_root: 0x%lx active: %d",
- i, (long) state->key_root[i],
+ DBUG_PRINT("repair", ("index #: %u key_root:%lld active: %d",
+ i, state->key_root[i],
maria_is_key_active(state->key_map, i)));
if ((state->key_root[i] != HA_OFFSET_ERROR) &&
!maria_is_key_active(state->key_map, i))
@@ -4474,8 +4475,8 @@ int maria_repair_parallel(HA_CHECK *param, register MARIA_HA *info,
*/
sort_param[i].read_cache= ((rep_quick || !i) ? param->read_cache :
new_data_cache);
- DBUG_PRINT("io_cache_share", ("thread: %u read_cache: 0x%lx",
- i, (long) &sort_param[i].read_cache));
+ DBUG_PRINT("io_cache_share", ("thread: %u read_cache: %p",
+ i, &sort_param[i].read_cache));
/*
two approaches: the same amount of memory for each thread
@@ -4996,6 +4997,7 @@ static int sort_get_next_record(MARIA_SORT_PARAM *sort_param)
param->error_printed=1;
param->retry_repair=1;
param->testflag|=T_RETRY_WITHOUT_QUICK;
+ my_errno= HA_ERR_WRONG_IN_RECORD;
DBUG_RETURN(1); /* Something wrong with data */
}
b_type= _ma_get_block_info(info, &block_info,-1,pos);
@@ -5269,6 +5271,7 @@ static int sort_get_next_record(MARIA_SORT_PARAM *sort_param)
param->error_printed=1;
param->retry_repair=1;
param->testflag|=T_RETRY_WITHOUT_QUICK;
+ my_errno= HA_ERR_WRONG_IN_RECORD;
DBUG_RETURN(1); /* Something wrong with data */
}
sort_param->start_recpos=sort_param->pos;
@@ -5643,7 +5646,7 @@ static int sort_maria_ft_key_write(MARIA_SORT_PARAM *sort_param,
if (ha_compare_text(sort_param->seg->charset,
a+1,a_len-1,
- ft_buf->lastkey+1,val_off-1, 0, 0)==0)
+ ft_buf->lastkey+1,val_off-1, 0)==0)
{
uchar *p;
if (!ft_buf->buf) /* store in second-level tree */
@@ -5666,7 +5669,7 @@ static int sort_maria_ft_key_write(MARIA_SORT_PARAM *sort_param,
key_block++;
sort_info->key_block=key_block;
sort_param->keyinfo= &share->ft2_keyinfo;
- ft_buf->count=(ft_buf->buf - p)/val_len;
+ ft_buf->count=(uint)(ft_buf->buf - p)/val_len;
/* flushing buffer to second-level tree */
for (error=0; !error && p < ft_buf->buf; p+= val_len)
diff --git a/storage/maria/ma_check_standalone.h b/storage/maria/ma_check_standalone.h
index 24c1779fbde..e2e651b43f3 100644
--- a/storage/maria/ma_check_standalone.h
+++ b/storage/maria/ma_check_standalone.h
@@ -23,7 +23,7 @@ void _mi_report_crashed(void *file __attribute__((unused)),
{
}
-static unsigned int no_key()
+static unsigned int no_key(unsigned int not_used __attribute__((unused)))
{
return ENCRYPTION_KEY_VERSION_INVALID;
}
diff --git a/storage/maria/ma_checkpoint.c b/storage/maria/ma_checkpoint.c
index 79b33e0b1fc..c2f3fe6dbd0 100644
--- a/storage/maria/ma_checkpoint.c
+++ b/storage/maria/ma_checkpoint.c
@@ -332,11 +332,12 @@ int ma_checkpoint_init(ulong interval)
res= 1;
else if (interval > 0)
{
+ size_t intv= interval;
compile_time_assert(sizeof(void *) >= sizeof(ulong));
if ((res= mysql_thread_create(key_thread_checkpoint,
&checkpoint_control.thread, NULL,
ma_checkpoint_background,
- (void*) interval)))
+ (void*) intv)))
checkpoint_control.killed= TRUE;
}
else
@@ -375,7 +376,7 @@ static void flush_all_tables(int what_to_flush)
MA_STATE_INFO_WRITE_DONT_MOVE_OFFSET|
MA_STATE_INFO_WRITE_LOCK);
DBUG_PRINT("maria_flush_states",
- ("is_of_horizon: LSN " LSN_FMT "",
+ ("is_of_horizon: LSN " LSN_FMT,
LSN_IN_PARTS(info->s->state.is_of_horizon)));
break;
case 2:
@@ -546,8 +547,8 @@ pthread_handler_t ma_checkpoint_background(void *arg)
right after "case 0", thus having 'dfile' unset. So the thread cares only
about the interval's value when it started.
*/
- const ulong interval= (ulong)arg;
- uint sleeps, sleep_time;
+ const size_t interval= (size_t)arg;
+ size_t sleeps, sleep_time;
TRANSLOG_ADDRESS log_horizon_at_last_checkpoint=
translog_get_horizon();
ulonglong pagecache_flushes_at_last_checkpoint=
diff --git a/storage/maria/ma_checksum.c b/storage/maria/ma_checksum.c
index 0182dc7fd10..baac18af473 100644
--- a/storage/maria/ma_checksum.c
+++ b/storage/maria/ma_checksum.c
@@ -58,7 +58,7 @@ ha_checksum _ma_checksum(MARIA_HA *info, const uchar *record)
length= _ma_calc_blob_length(blob_size_length, pos);
if (length)
{
- memcpy(&pos, pos + blob_size_length, sizeof(char*));
+ memcpy((char**) &pos, pos + blob_size_length, sizeof(char*));
crc= my_checksum(crc, pos, length);
}
continue;
diff --git a/storage/maria/ma_close.c b/storage/maria/ma_close.c
index 5e4b286f8ab..7d4271c794c 100644
--- a/storage/maria/ma_close.c
+++ b/storage/maria/ma_close.c
@@ -20,7 +20,7 @@
to open other files during the time we flush the cache and close this file
*/
-#include "maria_def.h"
+#include "ma_ftdefs.h"
#include "ma_crypt.h"
int maria_close(register MARIA_HA *info)
@@ -30,9 +30,9 @@ int maria_close(register MARIA_HA *info)
MARIA_SHARE *share= info->s;
my_bool internal_table= share->internal_table;
DBUG_ENTER("maria_close");
- DBUG_PRINT("enter",("name: '%s' base: 0x%lx reopen: %u locks: %u",
+ DBUG_PRINT("enter",("name: '%s' base: %p reopen: %u locks: %u",
share->open_file_name.str,
- (long) info, (uint) share->reopen,
+ info, (uint) share->reopen,
(uint) share->tot_locks));
/* Check that we have unlocked key delete-links properly */
@@ -47,9 +47,7 @@ int maria_close(register MARIA_HA *info)
a global mutex
*/
if (flush_pagecache_blocks(share->pagecache, &share->kfile,
- ((share->temporary || share->deleting) ?
- FLUSH_IGNORE_CHANGED :
- FLUSH_RELEASE)))
+ share->deleting ? FLUSH_IGNORE_CHANGED : FLUSH_RELEASE))
error= my_errno;
}
@@ -88,6 +86,7 @@ int maria_close(register MARIA_HA *info)
share->open_list= list_delete(share->open_list, &info->share_list);
}
+ maria_ftparser_call_deinitializer(info);
my_free(info->rec_buff);
(*share->end)(info);
@@ -113,23 +112,11 @@ int maria_close(register MARIA_HA *info)
since the start of the function (very unlikely)
*/
if (flush_pagecache_blocks(share->pagecache, &share->kfile,
- ((share->temporary || share->deleting) ?
- FLUSH_IGNORE_CHANGED :
- FLUSH_RELEASE)))
+ share->deleting ? FLUSH_IGNORE_CHANGED : FLUSH_RELEASE))
error= my_errno;
-#ifdef HAVE_MMAP
- if (share->file_map)
- _ma_unmap_file(info);
-#endif
- /*
- If we are crashed, we can safely flush the current state as it will
- not change the crashed state.
- We can NOT write the state in other cases as other threads
- may be using the file at this point
- IF using --external-locking, which does not apply to Maria.
- */
+ unmap_file(info);
if (((share->changed && share->base.born_transactional) ||
- maria_is_crashed(info)))
+ maria_is_crashed(info) || (share->temporary && !share->deleting)))
{
if (save_global_changed)
{
diff --git a/storage/maria/ma_control_file.c b/storage/maria/ma_control_file.c
index c2a8e439d51..4cf0abea9ad 100644
--- a/storage/maria/ma_control_file.c
+++ b/storage/maria/ma_control_file.c
@@ -146,6 +146,8 @@ static CONTROL_FILE_ERROR create_control_file(const char *name,
{
uint32 sum;
uchar buffer[CF_CREATE_TIME_TOTAL_SIZE];
+ ulong rnd1,rnd2;
+
DBUG_ENTER("maria_create_control_file");
if ((control_file_fd= mysql_file_create(key_file_control, name, 0,
@@ -157,7 +159,9 @@ static CONTROL_FILE_ERROR create_control_file(const char *name,
cf_changeable_size= CF_CHANGEABLE_TOTAL_SIZE;
/* Create unique uuid for the control file */
- my_uuid_init((ulong) &buffer, (ulong) &maria_uuid);
+ my_random_bytes((uchar *)&rnd1, sizeof (rnd1));
+ my_random_bytes((uchar *)&rnd2, sizeof (rnd2));
+ my_uuid_init(rnd1, rnd2);
my_uuid(maria_uuid);
/* Prepare and write the file header */
diff --git a/storage/maria/ma_create.c b/storage/maria/ma_create.c
index 263d8a307b1..2c9c15d8a2a 100644
--- a/storage/maria/ma_create.c
+++ b/storage/maria/ma_create.c
@@ -70,13 +70,13 @@ int maria_create(const char *name, enum data_file_type datafile_type,
myf create_flag;
uint length,max_key_length,packed,pack_bytes,pointer,real_length_diff,
key_length,info_length,key_segs,options,min_key_length,
- base_pos,long_varchar_count,varchar_length,
+ base_pos,long_varchar_count,
unique_key_parts,fulltext_keys,offset, not_block_record_extra_length;
uint max_field_lengths, extra_header_size, column_nr;
uint internal_table= flags & HA_CREATE_INTERNAL_TABLE;
ulong reclength, real_reclength,min_pack_length;
char kfilename[FN_REFLEN], klinkname[FN_REFLEN], *klinkname_ptr;
- char dfilename[FN_REFLEN], dlinkname[FN_REFLEN], *dlinkname_ptr;
+ char dfilename[FN_REFLEN], dlinkname[FN_REFLEN], *dlinkname_ptr= 0;
ulong pack_reclength;
ulonglong tot_length,max_rows, tmp;
enum en_fieldtype type;
@@ -144,9 +144,6 @@ int maria_create(const char *name, enum data_file_type datafile_type,
datafile_type= BLOCK_RECORD;
}
- if (ci->reloc_rows > ci->max_rows)
- ci->reloc_rows=ci->max_rows; /* Check if wrong parameter */
-
if (!(rec_per_key_part=
(double*) my_malloc((keys + uniques)*HA_MAX_KEY_SEG*sizeof(double) +
(keys + uniques)*HA_MAX_KEY_SEG*sizeof(ulong) +
@@ -160,7 +157,7 @@ int maria_create(const char *name, enum data_file_type datafile_type,
/* Start by checking fields and field-types used */
- varchar_length=long_varchar_count=packed= not_block_record_extra_length=
+ long_varchar_count=packed= not_block_record_extra_length=
pack_reclength= max_field_lengths= 0;
reclength= min_pack_length= ci->null_bytes;
forced_packed= 0;
@@ -232,7 +229,6 @@ int maria_create(const char *name, enum data_file_type datafile_type,
}
else if (type == FIELD_VARCHAR)
{
- varchar_length+= column->length-1; /* Used for min_pack_length */
pack_reclength++;
not_block_record_extra_length++;
max_field_lengths++;
@@ -368,6 +364,7 @@ int maria_create(const char *name, enum data_file_type datafile_type,
pack_bytes);
if (!ci->data_file_length && ci->max_rows)
{
+ set_if_bigger(ci->max_rows, ci->reloc_rows);
if (pack_reclength == INT_MAX32 ||
(~(ulonglong) 0)/ci->max_rows < (ulonglong) pack_reclength)
ci->data_file_length= ~(ulonglong) 0;
@@ -396,7 +393,7 @@ int maria_create(const char *name, enum data_file_type datafile_type,
if (rows_per_page > 0)
{
set_if_smaller(rows_per_page, MAX_ROWS_PER_PAGE);
- ci->max_rows= data_file_length / maria_block_size * rows_per_page;
+ ci->max_rows= (data_file_length / maria_block_size+1) * rows_per_page;
}
else
ci->max_rows= data_file_length / (min_pack_length +
@@ -408,6 +405,7 @@ int maria_create(const char *name, enum data_file_type datafile_type,
((options &
HA_OPTION_PACK_RECORD) ?
3 : 0)));
+ set_if_smaller(ci->reloc_rows, ci->max_rows);
}
max_rows= (ulonglong) ci->max_rows;
if (datafile_type == BLOCK_RECORD)
@@ -800,6 +798,7 @@ int maria_create(const char *name, enum data_file_type datafile_type,
share.state.state.data_file_length= maria_block_size;
/* Add length of packed fields + length */
share.base.pack_reclength+= share.base.max_field_lengths+3;
+ share.base.max_pack_length= share.base.pack_reclength;
/* Adjust max_pack_length, to be used if we have short rows */
if (share.base.max_pack_length < maria_block_size)
@@ -1198,7 +1197,6 @@ int maria_create(const char *name, enum data_file_type datafile_type,
{
fn_format(dfilename,name,"", MARIA_NAME_DEXT,
MY_UNPACK_FILENAME | MY_APPEND_EXT);
- dlinkname_ptr= NullS;
create_flag= (flags & HA_CREATE_KEEP_FILES) ? 0 : MY_DELETE_OLD;
}
if ((dfile=
@@ -1218,7 +1216,7 @@ int maria_create(const char *name, enum data_file_type datafile_type,
if (mysql_file_chsize(file,(ulong) share.base.keystart,0,MYF(0)))
goto err;
- if (sync_dir && mysql_file_sync(file, MYF(0)))
+ if (!internal_table && sync_dir && mysql_file_sync(file, MYF(0)))
goto err;
if (! (flags & HA_DONT_TOUCH_DATA))
@@ -1228,7 +1226,7 @@ int maria_create(const char *name, enum data_file_type datafile_type,
share.base.min_pack_length*ci->reloc_rows,0,MYF(0)))
goto err;
#endif
- if (sync_dir && mysql_file_sync(dfile, MYF(0)))
+ if (!internal_table && sync_dir && mysql_file_sync(dfile, MYF(0)))
goto err;
if (mysql_file_close(dfile,MYF(0)))
goto err;
@@ -1252,8 +1250,6 @@ err_no_lock:
switch (errpos) {
case 3:
mysql_file_close(dfile, MYF(0));
- /* fall through */
- case 2:
if (! (flags & HA_DONT_TOUCH_DATA))
{
mysql_file_delete(key_file_dfile, dfilename, MYF(sync_dir));
diff --git a/storage/maria/ma_delete.c b/storage/maria/ma_delete.c
index bb004361dff..2a877e49bba 100644
--- a/storage/maria/ma_delete.c
+++ b/storage/maria/ma_delete.c
@@ -559,9 +559,9 @@ static int del(MARIA_HA *info, MARIA_KEY *key,
MARIA_KEY ret_key;
MARIA_PAGE next_page;
DBUG_ENTER("del");
- DBUG_PRINT("enter",("leaf_page: %lu keypos: 0x%lx",
+ DBUG_PRINT("enter",("leaf_page: %lu keypos: %p",
(ulong) (leaf_page->pos / share->block_size),
- (ulong) keypos));
+ keypos));
DBUG_DUMP("leaf_buff", leaf_page->buff, leaf_page->size);
page_flag= leaf_page->flag;
@@ -771,9 +771,9 @@ static int underflow(MARIA_HA *info, MARIA_KEYDEF *keyinfo,
MARIA_KEY tmp_key, anc_key, leaf_key;
MARIA_PAGE next_page;
DBUG_ENTER("underflow");
- DBUG_PRINT("enter",("leaf_page: %lu keypos: 0x%lx",
+ DBUG_PRINT("enter",("leaf_page: %lu keypos: %p",
(ulong) (leaf_page->pos / share->block_size),
- (ulong) keypos));
+ keypos));
DBUG_DUMP("anc_buff", anc_page->buff, anc_page->size);
DBUG_DUMP("leaf_buff", leaf_page->buff, leaf_page->size);
@@ -916,8 +916,8 @@ static int underflow(MARIA_HA *info, MARIA_KEYDEF *keyinfo,
anc_end_pos= anc_buff + new_anc_length;
- DBUG_PRINT("test",("anc_buff: 0x%lx anc_end_pos: 0x%lx",
- (long) anc_buff, (long) anc_end_pos));
+ DBUG_PRINT("test",("anc_buff:%p anc_end_pos:%p",
+ anc_buff, anc_end_pos));
if (!first_key && !_ma_get_last_key(&anc_key, anc_page, keypos))
goto err;
@@ -1306,8 +1306,8 @@ static uint remove_key(MARIA_KEYDEF *keyinfo, uint page_flag, uint nod_flag,
int s_length;
uchar *start;
DBUG_ENTER("remove_key");
- DBUG_PRINT("enter", ("keypos: 0x%lx page_end: 0x%lx",
- (long) keypos, (long) page_end));
+ DBUG_PRINT("enter", ("keypos:%p page_end: %p",
+ keypos, page_end));
start= s_temp->key_pos= keypos;
s_temp->changed_length= 0;
diff --git a/storage/maria/ma_delete_all.c b/storage/maria/ma_delete_all.c
index e1d04997281..c1019c01c66 100644
--- a/storage/maria/ma_delete_all.c
+++ b/storage/maria/ma_delete_all.c
@@ -38,6 +38,9 @@ int maria_delete_all_rows(MARIA_HA *info)
MARIA_SHARE *share= info->s;
my_bool log_record;
LSN lsn;
+#ifdef HAVE_MMAP
+ my_bool mmap_file= share->file_map != 0;
+#endif
DBUG_ENTER("maria_delete_all_rows");
if (share->options & HA_OPTION_READ_ONLY_DATA)
@@ -95,7 +98,7 @@ int maria_delete_all_rows(MARIA_HA *info)
*/
#ifdef HAVE_MMAP
- if (share->file_map)
+ if (mmap_file)
_ma_unmap_file(info);
#endif
@@ -135,10 +138,13 @@ int maria_delete_all_rows(MARIA_HA *info)
goto err;
}
+ if (info->opt_flag & WRITE_CACHE_USED)
+ reinit_io_cache(&info->rec_cache, WRITE_CACHE, 0, 1, 1);
+
_ma_writeinfo(info, WRITEINFO_UPDATE_KEYFILE);
#ifdef HAVE_MMAP
/* Map again */
- if (share->file_map)
+ if (mmap_file)
_ma_dynmap_file(info, (my_off_t) 0);
#endif
DBUG_RETURN(0);
diff --git a/storage/maria/ma_dynrec.c b/storage/maria/ma_dynrec.c
index db9024ba352..e630ce896ce 100644
--- a/storage/maria/ma_dynrec.c
+++ b/storage/maria/ma_dynrec.c
@@ -1344,8 +1344,8 @@ ulong _ma_rec_unpack(register MARIA_HA *info, register uchar *to, uchar *from,
err:
_ma_set_fatal_error(info->s, HA_ERR_WRONG_IN_RECORD);
- DBUG_PRINT("error",("to_end: 0x%lx -> 0x%lx from_end: 0x%lx -> 0x%lx",
- (long) to, (long) to_end, (long) from, (long) from_end));
+ DBUG_PRINT("error",("to_end: %p -> %p from_end: %p -> %p",
+ to, to_end, from, from_end));
DBUG_DUMP("from", info->rec_buff, info->s->base.min_pack_length);
DBUG_RETURN(MY_FILE_ERROR);
} /* _ma_rec_unpack */
diff --git a/storage/maria/ma_extra.c b/storage/maria/ma_extra.c
index 90ad853aaf8..2697c72f2b8 100644
--- a/storage/maria/ma_extra.c
+++ b/storage/maria/ma_extra.c
@@ -286,7 +286,6 @@ int maria_extra(MARIA_HA *info, enum ha_extra_function function,
We however do a flush here for additional safety.
*/
/** @todo consider porting these flush-es to MyISAM */
- DBUG_ASSERT(share->reopen == 1);
error= _ma_flush_table_files(info, MARIA_FLUSH_DATA | MARIA_FLUSH_INDEX,
FLUSH_FORCE_WRITE, FLUSH_FORCE_WRITE);
if (!error && share->changed)
@@ -314,12 +313,15 @@ int maria_extra(MARIA_HA *info, enum ha_extra_function function,
share->state.open_count= 1;
share->changed= 1;
_ma_mark_file_changed_now(share);
+ if (share->temporary)
+ break;
/* fall through */
case HA_EXTRA_PREPARE_FOR_RENAME:
{
my_bool do_flush= MY_TEST(function != HA_EXTRA_PREPARE_FOR_DROP);
my_bool save_global_changed;
enum flush_type type;
+ DBUG_ASSERT(!share->temporary);
/*
This share, to have last_version=0, needs to save all its data/index
blocks to disk if this is not for a DROP TABLE. Otherwise they would be
diff --git a/storage/maria/ma_ft_boolean_search.c b/storage/maria/ma_ft_boolean_search.c
index 8db97368c31..2f0fba9a0ad 100644
--- a/storage/maria/ma_ft_boolean_search.c
+++ b/storage/maria/ma_ft_boolean_search.c
@@ -163,7 +163,7 @@ static int FTB_WORD_cmp_list(CHARSET_INFO *cs, FTB_WORD **a, FTB_WORD **b)
{
/* ORDER BY word, ndepth */
int i= ha_compare_text(cs, (uchar*) (*a)->word + 1,(*a)->len - 1,
- (uchar*) (*b)->word + 1,(*b)->len - 1, 0, 0);
+ (uchar*) (*b)->word + 1,(*b)->len - 1, 0);
if (!i)
i=CMP_NUM((*a)->ndepth, (*b)->ndepth);
return i;
@@ -413,7 +413,7 @@ static int _ft2_search_no_lock(FTB *ftb, FTB_WORD *ftbw, my_bool init_search)
extra-1,
(uchar*) ftbw->word+1,
ftbw->len-1,
- (my_bool) (ftbw->flags & FTB_FLAG_TRUNC), 0);
+ (my_bool) (ftbw->flags & FTB_FLAG_TRUNC));
}
if (r) /* not found */
@@ -905,7 +905,7 @@ static int ftb_find_relevance_add_word(MYSQL_FTPARSER_PARAM *param,
ftbw= ftb->list[c];
if (ha_compare_text(ftb->charset, (uchar*)word, len,
(uchar*)ftbw->word+1, ftbw->len-1,
- (my_bool)(ftbw->flags&FTB_FLAG_TRUNC), 0) < 0)
+ (my_bool)(ftbw->flags&FTB_FLAG_TRUNC)) < 0)
b= c;
else
a= c;
@@ -932,7 +932,7 @@ static int ftb_find_relevance_add_word(MYSQL_FTPARSER_PARAM *param,
ftbw= ftb->list[c];
if (ha_compare_text(ftb->charset, (uchar*)word, len,
(uchar*)ftbw->word + 1,ftbw->len - 1,
- (my_bool)(ftbw->flags & FTB_FLAG_TRUNC), 0))
+ (my_bool)(ftbw->flags & FTB_FLAG_TRUNC)))
{
if (ftb->with_scan & FTB_FLAG_TRUNC)
continue;
diff --git a/storage/maria/ma_ft_nlq_search.c b/storage/maria/ma_ft_nlq_search.c
index 8e13d127888..2afd493b32d 100644
--- a/storage/maria/ma_ft_nlq_search.c
+++ b/storage/maria/ma_ft_nlq_search.c
@@ -83,13 +83,13 @@ static int walk_and_match(FT_WORD *word, uint32 count, ALL_IN_ONE *aio)
#error
#endif
DBUG_ENTER("walk_and_match");
- LINT_INIT_STRUCT(subkeys);
word->weight=LWS_FOR_QUERY;
_ma_ft_make_key(info, &key, aio->keynr, keybuff, word, 0);
key.data_length-= HA_FT_WLEN;
doc_cnt=0;
+ subkeys.i= 0;
if (share->lock_key_trees)
mysql_rwlock_rdlock(&share->keyinfo[aio->keynr].root_lock);
@@ -119,7 +119,7 @@ static int walk_and_match(FT_WORD *word, uint32 count, ALL_IN_ONE *aio)
info->last_key.data+1,
info->last_key.data_length +
info->last_key.ref_length - extra - 1,
- key.data+1, key.data_length-1, 0, 0))
+ key.data+1, key.data_length-1, 0))
break;
if (subkeys.i < 0)
diff --git a/storage/maria/ma_ft_parser.c b/storage/maria/ma_ft_parser.c
index 8e997cbb5f5..364c6c3887e 100644
--- a/storage/maria/ma_ft_parser.c
+++ b/storage/maria/ma_ft_parser.c
@@ -34,7 +34,7 @@ typedef struct st_my_maria_ft_parser_param
static int FT_WORD_cmp(CHARSET_INFO* cs, FT_WORD *w1, FT_WORD *w2)
{
return ha_compare_text(cs, (uchar*) w1->pos, w1->len,
- (uchar*) w2->pos, w2->len, 0, 0);
+ (uchar*) w2->pos, w2->len, 0);
}
static int walk_and_copy(FT_WORD *word,uint32 count,FT_DOCSTAT *docstat)
diff --git a/storage/maria/ma_ft_update.c b/storage/maria/ma_ft_update.c
index 212d7d94a19..b2109008676 100644
--- a/storage/maria/ma_ft_update.c
+++ b/storage/maria/ma_ft_update.c
@@ -83,7 +83,7 @@ uint _ma_ft_segiterator(register FT_SEG_ITERATOR *ftsi)
if (ftsi->seg->flag & HA_BLOB_PART)
{
ftsi->len= _ma_calc_blob_length(ftsi->seg->bit_start,ftsi->pos);
- memcpy(&ftsi->pos, ftsi->pos+ftsi->seg->bit_start, sizeof(char*));
+ memcpy((char**) &ftsi->pos, ftsi->pos+ftsi->seg->bit_start, sizeof(char*));
DBUG_RETURN(1);
}
ftsi->len=ftsi->seg->length;
@@ -184,7 +184,7 @@ int _ma_ft_cmp(MARIA_HA *info, uint keynr, const uchar *rec1, const uchar *rec2)
if ((ftsi1.pos != ftsi2.pos) &&
(!ftsi1.pos || !ftsi2.pos ||
ha_compare_text(cs, ftsi1.pos,ftsi1.len,
- ftsi2.pos,ftsi2.len,0,0)))
+ ftsi2.pos,ftsi2.len,0)))
DBUG_RETURN(THOSE_TWO_DAMN_KEYS_ARE_REALLY_DIFFERENT);
}
DBUG_RETURN(GEE_THEY_ARE_ABSOLUTELY_IDENTICAL);
@@ -212,7 +212,7 @@ int _ma_ft_update(MARIA_HA *info, uint keynr, uchar *keybuf,
while(old_word->pos && new_word->pos)
{
cmp= ha_compare_text(cs, (uchar*) old_word->pos,old_word->len,
- (uchar*) new_word->pos,new_word->len,0,0);
+ (uchar*) new_word->pos,new_word->len,0);
cmp2= cmp ? 0 : (fabs(old_word->weight - new_word->weight) > 1.e-5);
if (cmp < 0 || cmp2)
diff --git a/storage/maria/ma_key.c b/storage/maria/ma_key.c
index aa9efff357e..9e804a1e9dc 100644
--- a/storage/maria/ma_key.c
+++ b/storage/maria/ma_key.c
@@ -279,7 +279,6 @@ MARIA_KEY *_ma_make_key(MARIA_HA *info, MARIA_KEY *int_key, uint keynr,
}
else if (keyseg->flag & HA_SWAP_KEY)
{ /* Numerical column */
-#ifdef HAVE_ISNAN
if (type == HA_KEYTYPE_FLOAT)
{
float nr;
@@ -303,7 +302,6 @@ MARIA_KEY *_ma_make_key(MARIA_HA *info, MARIA_KEY *int_key, uint keynr,
continue;
}
}
-#endif
pos+=length;
while (length--)
{
@@ -318,7 +316,7 @@ MARIA_KEY *_ma_make_key(MARIA_HA *info, MARIA_KEY *int_key, uint keynr,
key+= length;
}
_ma_dpointer(info->s, key, filepos);
- int_key->data_length= (key - int_key->data);
+ int_key->data_length= (uint)(key - int_key->data);
int_key->ref_length= info->s->rec_reflength;
int_key->flag= 0;
if (_ma_have_versioning(info) && trid)
@@ -449,7 +447,7 @@ MARIA_KEY *_ma_pack_key(register MARIA_HA *info, MARIA_KEY *int_key,
/* set flag to SEARCH_PART_KEY if we are not using all key parts */
int_key->flag= keyseg->type ? SEARCH_PART_KEY : 0;
int_key->ref_length= 0;
- int_key->data_length= (key - int_key->data);
+ int_key->data_length= (uint)(key - int_key->data);
DBUG_PRINT("exit", ("length: %u", int_key->data_length));
DBUG_RETURN(int_key);
diff --git a/storage/maria/ma_key_recover.c b/storage/maria/ma_key_recover.c
index 8bfc41684dd..4dcc3cd9047 100644
--- a/storage/maria/ma_key_recover.c
+++ b/storage/maria/ma_key_recover.c
@@ -933,8 +933,6 @@ err:
@retval 1 Error
*/
-long my_counter= 0;
-
uint _ma_apply_redo_index(MARIA_HA *info,
LSN lsn, const uchar *header, uint head_length)
{
diff --git a/storage/maria/ma_keycache.c b/storage/maria/ma_keycache.c
index 38004ca5d78..101e6c54742 100644
--- a/storage/maria/ma_keycache.c
+++ b/storage/maria/ma_keycache.c
@@ -54,8 +54,8 @@ int maria_assign_to_pagecache(MARIA_HA *info,
MARIA_SHARE* share= info->s;
DBUG_ENTER("maria_assign_to_pagecache");
DBUG_PRINT("enter",
- ("old_pagecache_handle: 0x%lx new_pagecache_handle: 0x%lx",
- (long) share->pagecache, (long) pagecache));
+ ("old_pagecache_handle:%p new_pagecache_handle:%p",
+ share->pagecache, pagecache));
/*
Skip operation if we didn't change key cache. This can happen if we
diff --git a/storage/maria/ma_loghandler.c b/storage/maria/ma_loghandler.c
index beda5f46ff4..9fe746a167b 100644
--- a/storage/maria/ma_loghandler.c
+++ b/storage/maria/ma_loghandler.c
@@ -1,4 +1,5 @@
/* Copyright (C) 2007 MySQL AB & Sanja Belkin. 2010 Monty Program Ab.
+ Copyright (c) 2020, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -957,7 +958,7 @@ char *translog_filename_by_fileno(uint32 file_no, char *path)
length= (uint) (int10_to_str(file_no, buff, 10) - buff);
strmov(end - length +1, buff);
- DBUG_PRINT("info", ("Path: '%s' path: 0x%lx", path, (ulong) path));
+ DBUG_PRINT("info", ("Path: '%s' path: %p", path, path));
DBUG_RETURN(path);
}
@@ -1061,8 +1062,8 @@ static TRANSLOG_FILE *get_logfile_by_number(uint32 file_no)
file= *dynamic_element(&log_descriptor.open_files,
log_descriptor.max_file - file_no, TRANSLOG_FILE **);
mysql_rwlock_unlock(&log_descriptor.open_files_lock);
- DBUG_PRINT("info", ("File 0x%lx File no: %lu, File handler: %d",
- (ulong)file, (ulong)file_no,
+ DBUG_PRINT("info", ("File %p File no: %u, File handler: %d",
+ file, file_no,
(file ? file->handler.file : -1)));
DBUG_ASSERT(!file || file->number == file_no);
DBUG_RETURN(file);
@@ -1530,8 +1531,8 @@ static my_bool translog_buffer_init(struct st_translog_buffer *buffer, int num)
buffer->pre_force_close_horizon=
buffer->prev_last_lsn= buffer->last_lsn=
LSN_IMPOSSIBLE;
- DBUG_PRINT("info", ("last_lsn and prev_last_lsn set to 0 buffer: 0x%lx",
- (ulong) buffer));
+ DBUG_PRINT("info", ("last_lsn and prev_last_lsn set to 0 buffer: %p",
+ buffer));
buffer->buffer_no= (uint8) num;
/* This Buffer File */
@@ -1729,8 +1730,8 @@ static void translog_buffer_lock(struct st_translog_buffer *buffer)
{
DBUG_ENTER("translog_buffer_lock");
DBUG_PRINT("enter",
- ("Lock buffer #%u: (0x%lx)", (uint) buffer->buffer_no,
- (ulong) buffer));
+ ("Lock buffer #%u: %p", buffer->buffer_no,
+ buffer));
mysql_mutex_lock(&buffer->mutex);
DBUG_VOID_RETURN;
}
@@ -1747,8 +1748,8 @@ static void translog_buffer_lock(struct st_translog_buffer *buffer)
static void translog_buffer_unlock(struct st_translog_buffer *buffer)
{
DBUG_ENTER("translog_buffer_unlock");
- DBUG_PRINT("enter", ("Unlock buffer... #%u (0x%lx)",
- (uint) buffer->buffer_no, (ulong) buffer));
+ DBUG_PRINT("enter", ("Unlock buffer... #%u (%p)",
+ (uint) buffer->buffer_no, buffer));
mysql_mutex_unlock(&buffer->mutex);
DBUG_VOID_RETURN;
@@ -1811,16 +1812,16 @@ static void translog_new_page_header(TRANSLOG_ADDRESS *horizon,
ptr+= TRANSLOG_PAGE_SIZE / DISK_DRIVE_SECTOR_SIZE;
}
{
- uint len= (ptr - cursor->ptr);
+ size_t len= (ptr - cursor->ptr);
(*horizon)+= len; /* increasing the offset part of the address */
- cursor->current_page_fill= len;
+ cursor->current_page_fill= (uint16)len;
if (!cursor->chaser)
- cursor->buffer->size+= len;
+ cursor->buffer->size+= (translog_size_t)len;
}
cursor->ptr= ptr;
- DBUG_PRINT("info", ("NewP buffer #%u: 0x%lx chaser: %d Size: %lu (%lu) "
+ DBUG_PRINT("info", ("NewP buffer #%u: %p chaser: %d Size: %lu (%lu) "
"Horizon: " LSN_FMT,
- (uint) cursor->buffer->buffer_no, (ulong) cursor->buffer,
+ (uint) cursor->buffer->buffer_no, cursor->buffer,
cursor->chaser, (ulong) cursor->buffer->size,
(ulong) (cursor->ptr - cursor->buffer->buffer),
LSN_IN_PARTS(*horizon)));
@@ -1923,17 +1924,17 @@ static void translog_finish_page(TRANSLOG_ADDRESS *horizon,
uint16 left= TRANSLOG_PAGE_SIZE - cursor->current_page_fill;
uchar *page= cursor->ptr - cursor->current_page_fill;
DBUG_ENTER("translog_finish_page");
- DBUG_PRINT("enter", ("Buffer: #%u 0x%lx "
+ DBUG_PRINT("enter", ("Buffer: #%u %p "
"Buffer addr: " LSN_FMT " "
"Page addr: " LSN_FMT " "
- "size:%lu (%lu) Pg:%u left:%u",
- (uint) cursor->buffer_no, (ulong) cursor->buffer,
+ "size:%u (%u) Pg:%u left:%u",
+ (uint) cursor->buffer_no, cursor->buffer,
LSN_IN_PARTS(cursor->buffer->offset),
- (uint) LSN_FILE_NO(*horizon),
- (uint) (LSN_OFFSET(*horizon) -
- cursor->current_page_fill),
- (ulong) cursor->buffer->size,
- (ulong) (cursor->ptr -cursor->buffer->buffer),
+ (uint)LSN_FILE_NO(*horizon),
+ (uint)(LSN_OFFSET(*horizon) -
+ cursor->current_page_fill),
+ (uint) cursor->buffer->size,
+ (uint) (cursor->ptr -cursor->buffer->buffer),
(uint) cursor->current_page_fill, (uint) left));
DBUG_ASSERT(LSN_FILE_NO(*horizon) == LSN_FILE_NO(cursor->buffer->offset)
|| translog_status == TRANSLOG_UNINITED);
@@ -1958,10 +1959,10 @@ static void translog_finish_page(TRANSLOG_ADDRESS *horizon,
cursor->buffer->size+= left;
/* We are finishing the page so reset the counter */
cursor->current_page_fill= 0;
- DBUG_PRINT("info", ("Finish Page buffer #%u: 0x%lx "
+ DBUG_PRINT("info", ("Finish Page buffer #%u: %p "
"chaser: %d Size: %lu (%lu)",
(uint) cursor->buffer->buffer_no,
- (ulong) cursor->buffer, cursor->chaser,
+ cursor->buffer, cursor->chaser,
(ulong) cursor->buffer->size,
(ulong) (cursor->ptr - cursor->buffer->buffer)));
translog_check_cursor(cursor);
@@ -2000,9 +2001,9 @@ static void translog_finish_page(TRANSLOG_ADDRESS *horizon,
static void translog_wait_for_closing(struct st_translog_buffer *buffer)
{
DBUG_ENTER("translog_wait_for_closing");
- DBUG_PRINT("enter", ("Buffer #%u 0x%lx copies in progress: %u "
+ DBUG_PRINT("enter", ("Buffer #%u %p copies in progress: %u "
"is closing %u File: %d size: %lu",
- (uint) buffer->buffer_no, (ulong) buffer,
+ (uint) buffer->buffer_no, buffer,
(uint) buffer->copy_to_buffer_in_progress,
(uint) buffer->is_closing_buffer,
(buffer->file ? buffer->file->handler.file : -1),
@@ -2011,12 +2012,12 @@ static void translog_wait_for_closing(struct st_translog_buffer *buffer)
while (buffer->is_closing_buffer)
{
- DBUG_PRINT("info", ("wait for writers... buffer: #%u 0x%lx",
- (uint) buffer->buffer_no, (ulong) buffer));
+ DBUG_PRINT("info", ("wait for writers... buffer: #%u %p",
+ (uint) buffer->buffer_no, buffer));
DBUG_ASSERT(buffer->file != NULL);
mysql_cond_wait(&buffer->waiting_filling_buffer, &buffer->mutex);
- DBUG_PRINT("info", ("wait for writers done buffer: #%u 0x%lx",
- (uint) buffer->buffer_no, (ulong) buffer));
+ DBUG_PRINT("info", ("wait for writers done buffer: #%u %p",
+ (uint) buffer->buffer_no, buffer));
}
DBUG_VOID_RETURN;
@@ -2032,9 +2033,9 @@ static void translog_wait_for_closing(struct st_translog_buffer *buffer)
static void translog_wait_for_writers(struct st_translog_buffer *buffer)
{
DBUG_ENTER("translog_wait_for_writers");
- DBUG_PRINT("enter", ("Buffer #%u 0x%lx copies in progress: %u "
+ DBUG_PRINT("enter", ("Buffer #%u %p copies in progress: %u "
"is closing %u File: %d size: %lu",
- (uint) buffer->buffer_no, (ulong) buffer,
+ (uint) buffer->buffer_no, buffer,
(uint) buffer->copy_to_buffer_in_progress,
(uint) buffer->is_closing_buffer,
(buffer->file ? buffer->file->handler.file : -1),
@@ -2043,12 +2044,12 @@ static void translog_wait_for_writers(struct st_translog_buffer *buffer)
while (buffer->copy_to_buffer_in_progress)
{
- DBUG_PRINT("info", ("wait for writers... buffer: #%u 0x%lx",
- (uint) buffer->buffer_no, (ulong) buffer));
+ DBUG_PRINT("info", ("wait for writers... buffer: #%u %p",
+ (uint) buffer->buffer_no, buffer));
DBUG_ASSERT(buffer->file != NULL);
mysql_cond_wait(&buffer->waiting_filling_buffer, &buffer->mutex);
- DBUG_PRINT("info", ("wait for writers done buffer: #%u 0x%lx",
- (uint) buffer->buffer_no, (ulong) buffer));
+ DBUG_PRINT("info", ("wait for writers done buffer: #%u %p",
+ (uint) buffer->buffer_no, buffer));
}
DBUG_VOID_RETURN;
@@ -2073,9 +2074,9 @@ static void translog_wait_for_buffer_free(struct st_translog_buffer *buffer)
TRANSLOG_FILE *file= buffer->file;
uint8 ver= buffer->ver;
DBUG_ENTER("translog_wait_for_buffer_free");
- DBUG_PRINT("enter", ("Buffer #%u 0x%lx copies in progress: %u "
+ DBUG_PRINT("enter", ("Buffer #%u %p copies in progress: %u "
"is closing %u File: %d size: %lu",
- (uint) buffer->buffer_no, (ulong) buffer,
+ (uint) buffer->buffer_no, buffer,
(uint) buffer->copy_to_buffer_in_progress,
(uint) buffer->is_closing_buffer,
(buffer->file ? buffer->file->handler.file : -1),
@@ -2088,11 +2089,11 @@ static void translog_wait_for_buffer_free(struct st_translog_buffer *buffer)
while (buffer->file != NULL)
{
- DBUG_PRINT("info", ("wait for writers... buffer: #%u 0x%lx",
- (uint) buffer->buffer_no, (ulong) buffer));
+ DBUG_PRINT("info", ("wait for writers... buffer: #%u %p",
+ (uint) buffer->buffer_no, buffer));
mysql_cond_wait(&buffer->waiting_filling_buffer, &buffer->mutex);
- DBUG_PRINT("info", ("wait for writers done. buffer: #%u 0x%lx",
- (uint) buffer->buffer_no, (ulong) buffer));
+ DBUG_PRINT("info", ("wait for writers done. buffer: #%u %p",
+ (uint) buffer->buffer_no, buffer));
}
DBUG_ASSERT(buffer->copy_to_buffer_in_progress == 0);
DBUG_VOID_RETURN;
@@ -2140,15 +2141,15 @@ static void translog_start_buffer(struct st_translog_buffer *buffer,
{
DBUG_ENTER("translog_start_buffer");
DBUG_PRINT("enter",
- ("Assign buffer: #%u (0x%lx) offset: 0x%lx(%lu)",
- (uint) buffer->buffer_no, (ulong) buffer,
- (ulong) LSN_OFFSET(log_descriptor.horizon),
- (ulong) LSN_OFFSET(log_descriptor.horizon)));
+ ("Assign buffer: #%u (%p) offset: 0x%x(%u)",
+ (uint) buffer->buffer_no, buffer,
+ (uint) LSN_OFFSET(log_descriptor.horizon),
+ (uint) LSN_OFFSET(log_descriptor.horizon)));
DBUG_ASSERT(buffer_no == buffer->buffer_no);
buffer->pre_force_close_horizon=
buffer->prev_last_lsn= buffer->last_lsn= LSN_IMPOSSIBLE;
- DBUG_PRINT("info", ("last_lsn and prev_last_lsn set to 0 buffer: 0x%lx",
- (ulong) buffer));
+ DBUG_PRINT("info", ("last_lsn and prev_last_lsn set to 0 buffer: %p",
+ buffer));
buffer->offset= log_descriptor.horizon;
buffer->next_buffer_offset= LSN_IMPOSSIBLE;
buffer->file= get_current_logfile();
@@ -2156,11 +2157,11 @@ static void translog_start_buffer(struct st_translog_buffer *buffer,
buffer->size= 0;
buffer->skipped_data= 0;
translog_cursor_init(cursor, buffer, buffer_no);
- DBUG_PRINT("info", ("file: #%ld (%d) init cursor #%u: 0x%lx "
+ DBUG_PRINT("info", ("file: #%ld (%d) init cursor #%u: %p "
"chaser: %d Size: %lu (%lu)",
(long) (buffer->file ? buffer->file->number : 0),
(buffer->file ? buffer->file->handler.file : -1),
- (uint) cursor->buffer->buffer_no, (ulong) cursor->buffer,
+ (uint) cursor->buffer->buffer_no, cursor->buffer,
cursor->chaser, (ulong) cursor->buffer->size,
(ulong) (cursor->ptr - cursor->buffer->buffer)));
translog_check_cursor(cursor);
@@ -2249,9 +2250,9 @@ static my_bool translog_buffer_next(TRANSLOG_ADDRESS *horizon,
BUFFER_MAX_LSN(log_descriptor.buffers + old_buffer_no);
}
log_descriptor.buffers[old_buffer_no].next_buffer_offset= new_buffer->offset;
- DBUG_PRINT("info", ("prev_last_lsn set to " LSN_FMT " buffer: 0x%lx",
+ DBUG_PRINT("info", ("prev_last_lsn set to " LSN_FMT " buffer:%p",
LSN_IN_PARTS(new_buffer->prev_last_lsn),
- (ulong) new_buffer));
+ new_buffer));
translog_new_page_header(horizon, cursor);
DBUG_RETURN(0);
}
@@ -2578,9 +2579,9 @@ my_bool translog_prev_buffer_flush_wait(struct st_translog_buffer *buffer)
TRANSLOG_FILE *file= buffer->file;
uint8 ver= buffer->ver;
DBUG_ENTER("translog_prev_buffer_flush_wait");
- DBUG_PRINT("enter", ("buffer: 0x%lx #%u offset: " LSN_FMT " "
+ DBUG_PRINT("enter", ("buffer: %p #%u offset: " LSN_FMT " "
"prev sent: " LSN_FMT " prev offset: " LSN_FMT,
- (ulong) buffer, (uint) buffer->buffer_no,
+ buffer, (uint) buffer->buffer_no,
LSN_IN_PARTS(buffer->offset),
LSN_IN_PARTS(buffer->prev_sent_to_disk),
LSN_IN_PARTS(buffer->prev_buffer_offset)));
@@ -2682,7 +2683,7 @@ static my_bool translog_buffer_flush(struct st_translog_buffer *buffer)
DBUG_PRINT("error",
("Can't write page " LSN_FMT " to pagecache, error: %d",
buffer->file->number,
- (uint) (LSN_OFFSET(buffer->offset)+ i),
+ (uint)(LSN_OFFSET(buffer->offset)+ i),
my_errno));
translog_stop_writing();
DBUG_RETURN(1);
@@ -3212,9 +3213,9 @@ restart:
PAGECACHE_LOCK_READ :
PAGECACHE_LOCK_LEFT_UNLOCKED),
direct_link);
- DBUG_PRINT("info", ("Direct link is assigned to : 0x%lx * 0x%lx",
- (ulong) direct_link,
- (ulong)(direct_link ? *direct_link : NULL)));
+ DBUG_PRINT("info", ("Direct link is assigned to : %p * %p",
+ direct_link,
+ (direct_link ? *direct_link : NULL)));
data->was_recovered= file->was_recovered;
DBUG_RETURN(buffer);
}
@@ -3230,8 +3231,8 @@ restart:
static void translog_free_link(PAGECACHE_BLOCK_LINK *direct_link)
{
DBUG_ENTER("translog_free_link");
- DBUG_PRINT("info", ("Direct link: 0x%lx",
- (ulong) direct_link));
+ DBUG_PRINT("info", ("Direct link: %p",
+ direct_link));
if (direct_link)
pagecache_unlock_by_link(log_descriptor.pagecache, direct_link,
PAGECACHE_LOCK_READ_UNLOCK, PAGECACHE_UNPIN,
@@ -3724,8 +3725,8 @@ my_bool translog_init_with_table(const char *directory,
{
if (translog_buffer_init(log_descriptor.buffers + i, i))
goto err;
- DBUG_PRINT("info", ("translog_buffer buffer #%u: 0x%lx",
- i, (ulong) log_descriptor.buffers + i));
+ DBUG_PRINT("info", ("translog_buffer buffer #%u:%p",
+ i, log_descriptor.buffers + i));
}
/*
@@ -3980,9 +3981,9 @@ my_bool translog_init_with_table(const char *directory,
log_descriptor.horizon= LSN_REPLACE_OFFSET(log_descriptor.horizon,
(chunk_offset +
LSN_OFFSET(last_valid_page)));
- DBUG_PRINT("info", ("Move Page #%u: 0x%lx chaser: %d Size: %lu (%lu)",
+ DBUG_PRINT("info", ("Move Page #%u: %p chaser: %d Size: %lu (%lu)",
(uint) log_descriptor.bc.buffer_no,
- (ulong) log_descriptor.bc.buffer,
+ log_descriptor.bc.buffer,
log_descriptor.bc.chaser,
(ulong) log_descriptor.bc.buffer->size,
(ulong) (log_descriptor.bc.ptr - log_descriptor.bc.
@@ -4244,8 +4245,8 @@ static void translog_buffer_destroy(struct st_translog_buffer *buffer)
{
DBUG_ENTER("translog_buffer_destroy");
DBUG_PRINT("enter",
- ("Buffer #%u: 0x%lx file: %d offset: " LSN_FMT " size: %lu",
- (uint) buffer->buffer_no, (ulong) buffer,
+ ("Buffer #%u: %p file: %d offset: " LSN_FMT " size: %lu",
+ (uint) buffer->buffer_no, buffer,
(buffer->file ? buffer->file->handler.file : -1),
LSN_IN_PARTS(buffer->offset),
(ulong) buffer->size));
@@ -4263,7 +4264,7 @@ static void translog_buffer_destroy(struct st_translog_buffer *buffer)
translog_buffer_flush(buffer);
translog_buffer_unlock(buffer);
}
- DBUG_PRINT("info", ("Destroy mutex: 0x%lx", (ulong) &buffer->mutex));
+ DBUG_PRINT("info", ("Destroy mutex: %p", &buffer->mutex));
mysql_mutex_destroy(&buffer->mutex);
mysql_cond_destroy(&buffer->waiting_filling_buffer);
DBUG_VOID_RETURN;
@@ -4373,15 +4374,15 @@ static my_bool translog_page_next(TRANSLOG_ADDRESS *horizon,
TRANSLOG_PAGE_SIZE)))
DBUG_RETURN(1);
*prev_buffer= buffer;
- DBUG_PRINT("info", ("Buffer #%u (0x%lu): have to be flushed",
- (uint) buffer->buffer_no, (ulong) buffer));
+ DBUG_PRINT("info", ("Buffer #%u (%p): have to be flushed",
+ (uint) buffer->buffer_no, buffer));
}
else
{
- DBUG_PRINT("info", ("Use the same buffer #%u (0x%lu): "
+ DBUG_PRINT("info", ("Use the same buffer #%u (%p): "
"Buffer Size: %lu (%lu)",
(uint) buffer->buffer_no,
- (ulong) buffer,
+ buffer,
(ulong) cursor->buffer->size,
(ulong) (cursor->ptr - cursor->buffer->buffer)));
translog_finish_page(horizon, cursor);
@@ -4425,9 +4426,9 @@ static my_bool translog_write_data_on_page(TRANSLOG_ADDRESS *horizon,
cursor->current_page_fill+= length;
if (!cursor->chaser)
cursor->buffer->size+= length;
- DBUG_PRINT("info", ("Write data buffer #%u: 0x%lx "
+ DBUG_PRINT("info", ("Write data buffer #%u: %p "
"chaser: %d Size: %lu (%lu)",
- (uint) cursor->buffer->buffer_no, (ulong) cursor->buffer,
+ (uint) cursor->buffer->buffer_no, cursor->buffer,
cursor->chaser, (ulong) cursor->buffer->size,
(ulong) (cursor->ptr - cursor->buffer->buffer)));
translog_check_cursor(cursor);
@@ -4480,9 +4481,9 @@ static my_bool translog_write_parts_on_page(TRANSLOG_ADDRESS *horizon,
DBUG_ASSERT(cur < parts->elements);
part= parts->parts + cur;
buff= part->str;
- DBUG_PRINT("info", ("Part: %u Length: %lu left: %lu buff: 0x%lx",
+ DBUG_PRINT("info", ("Part: %u Length: %lu left: %lu buff: %p",
(uint) (cur + 1), (ulong) part->length, (ulong) left,
- (ulong) buff));
+ buff));
if (part->length > left)
{
@@ -4499,8 +4500,8 @@ static my_bool translog_write_parts_on_page(TRANSLOG_ADDRESS *horizon,
cur++;
DBUG_PRINT("info", ("moved to next part (len: %lu)", (ulong) len));
}
- DBUG_PRINT("info", ("copy: 0x%lx <- 0x%lx %u",
- (ulong) cursor->ptr, (ulong)buff, (uint)len));
+ DBUG_PRINT("info", ("copy: %p <- %p %u",
+ cursor->ptr, buff, len));
if (likely(len))
{
memcpy(cursor->ptr, buff, len);
@@ -4509,9 +4510,9 @@ static my_bool translog_write_parts_on_page(TRANSLOG_ADDRESS *horizon,
}
} while (left);
- DBUG_PRINT("info", ("Horizon: " LSN_FMT " Length %lu(0x%lx)",
+ DBUG_PRINT("info", ("Horizon: " LSN_FMT " Length %u(0x%x)",
LSN_IN_PARTS(*horizon),
- (ulong) length, (ulong) length));
+ length, length));
parts->current= cur;
(*horizon)+= length; /* offset increasing */
cursor->current_page_fill+= length;
@@ -4521,14 +4522,14 @@ static my_bool translog_write_parts_on_page(TRANSLOG_ADDRESS *horizon,
We do not not updating parts->total_record_length here because it is
need only before writing record to have total length
*/
- DBUG_PRINT("info", ("Write parts buffer #%u: 0x%lx "
+ DBUG_PRINT("info", ("Write parts buffer #%u: %p "
"chaser: %d Size: %lu (%lu) "
- "Horizon: " LSN_FMT " buff offset: 0x%lx",
- (uint) cursor->buffer->buffer_no, (ulong) cursor->buffer,
+ "Horizon: " LSN_FMT " buff offset: 0x%x",
+ (uint) cursor->buffer->buffer_no, cursor->buffer,
cursor->chaser, (ulong) cursor->buffer->size,
(ulong) (cursor->ptr - cursor->buffer->buffer),
LSN_IN_PARTS(*horizon),
- (ulong) (LSN_OFFSET(cursor->buffer->offset) +
+ (uint) (LSN_OFFSET(cursor->buffer->offset) +
cursor->buffer->size)));
translog_check_cursor(cursor);
@@ -4586,8 +4587,8 @@ translog_buffer_increase_writers(struct st_translog_buffer *buffer)
DBUG_ENTER("translog_buffer_increase_writers");
translog_buffer_lock_assert_owner(buffer);
buffer->copy_to_buffer_in_progress++;
- DBUG_PRINT("info", ("copy_to_buffer_in_progress. Buffer #%u 0x%lx progress: %d",
- (uint) buffer->buffer_no, (ulong) buffer,
+ DBUG_PRINT("info", ("copy_to_buffer_in_progress. Buffer #%u %p progress: %d",
+ (uint) buffer->buffer_no, buffer,
buffer->copy_to_buffer_in_progress));
DBUG_VOID_RETURN;
}
@@ -4607,8 +4608,8 @@ static void translog_buffer_decrease_writers(struct st_translog_buffer *buffer)
translog_buffer_lock_assert_owner(buffer);
buffer->copy_to_buffer_in_progress--;
DBUG_PRINT("info",
- ("copy_to_buffer_in_progress. Buffer #%u 0x%lx progress: %d",
- (uint) buffer->buffer_no, (ulong) buffer,
+ ("copy_to_buffer_in_progress. Buffer #%u %p progress: %d",
+ (uint) buffer->buffer_no, buffer,
buffer->copy_to_buffer_in_progress));
if (buffer->copy_to_buffer_in_progress == 0)
mysql_cond_broadcast(&buffer->waiting_filling_buffer);
@@ -4818,7 +4819,7 @@ static my_bool translog_advance_pointer(int pages, uint16 last_page_data,
(uint) LSN_OFFSET(log_descriptor.bc.buffer->offset),
log_descriptor.bc.buffer->size,
(uint) (LSN_OFFSET(log_descriptor.bc.buffer->offset) +
- log_descriptor.bc.buffer->size),
+ log_descriptor.bc.buffer->size),
(uint) LSN_OFFSET(log_descriptor.horizon)));
DBUG_ASSERT(LSN_OFFSET(log_descriptor.bc.buffer->offset) +
log_descriptor.bc.buffer->size ==
@@ -5418,8 +5419,8 @@ static uchar *translog_get_LSN_from_diff(LSN base_lsn, uchar *src, uchar *dst)
uint32 file_no, rec_offset;
uint8 code;
DBUG_ENTER("translog_get_LSN_from_diff");
- DBUG_PRINT("enter", ("Base: " LSN_FMT " src: 0x%lx dst 0x%lx",
- LSN_IN_PARTS(base_lsn), (ulong) src, (ulong) dst));
+ DBUG_PRINT("enter", ("Base: " LSN_FMT " src:%p dst %p",
+ LSN_IN_PARTS(base_lsn), src, dst));
first_byte= *((uint8*) src);
code= first_byte >> 6; /* Length is in 2 most significant bits */
first_byte&= 0x3F;
@@ -5436,19 +5437,19 @@ static uchar *translog_get_LSN_from_diff(LSN base_lsn, uchar *src, uchar *dst)
in real life)
*/
memcpy(dst, src + 1, LSN_STORE_SIZE);
- DBUG_PRINT("info", ("Special case of full LSN, new src: 0x%lx",
- (ulong) (src + 1 + LSN_STORE_SIZE)));
+ DBUG_PRINT("info", ("Special case of full LSN, new src:%p",
+ src + 1 + LSN_STORE_SIZE));
DBUG_RETURN(src + 1 + LSN_STORE_SIZE);
}
- rec_offset= LSN_OFFSET(base_lsn) - ((first_byte << 8) + *((uint8*)src));
+ rec_offset= LSN_OFFSET(base_lsn) - ((first_byte << 8) | *((uint8*)src));
break;
case 1:
diff= uint2korr(src);
- rec_offset= LSN_OFFSET(base_lsn) - ((first_byte << 16) + diff);
+ rec_offset= LSN_OFFSET(base_lsn) - ((first_byte << 16) | diff);
break;
case 2:
diff= uint3korr(src);
- rec_offset= LSN_OFFSET(base_lsn) - ((first_byte << 24) + diff);
+ rec_offset= LSN_OFFSET(base_lsn) - ((first_byte << 24) | diff);
break;
case 3:
{
@@ -5472,7 +5473,7 @@ static uchar *translog_get_LSN_from_diff(LSN base_lsn, uchar *src, uchar *dst)
lsn= MAKE_LSN(file_no, rec_offset);
src+= code + 1;
lsn_store(dst, lsn);
- DBUG_PRINT("info", ("new src: 0x%lx", (ulong) src));
+ DBUG_PRINT("info", ("new src:%p", src));
DBUG_RETURN(src);
}
@@ -5506,7 +5507,7 @@ static void translog_relative_LSN_encode(struct st_translog_parts *parts,
{
uint copied= part->length;
LEX_CUSTRING *next_part;
- DBUG_PRINT("info", ("Using buffer: 0x%lx", (ulong) compressed_LSNs));
+ DBUG_PRINT("info", ("Using buffer:%p", compressed_LSNs));
memcpy(buffer, part->str, part->length);
next_part= parts->parts + parts->current + 1;
do
@@ -6724,8 +6725,8 @@ my_bool translog_scanner_init(LSN lsn,
my_bool use_direct)
{
DBUG_ENTER("translog_scanner_init");
- DBUG_PRINT("enter", ("Scanner: 0x%lx LSN: " LSN_FMT,
- (ulong) scanner, LSN_IN_PARTS(lsn)));
+ DBUG_PRINT("enter", ("Scanner: %p LSN: " LSN_FMT,
+ scanner, LSN_IN_PARTS(lsn)));
DBUG_ASSERT(translog_status == TRANSLOG_OK ||
translog_status == TRANSLOG_READONLY);
@@ -6762,7 +6763,7 @@ my_bool translog_scanner_init(LSN lsn,
void translog_destroy_scanner(TRANSLOG_SCANNER_DATA *scanner)
{
DBUG_ENTER("translog_destroy_scanner");
- DBUG_PRINT("enter", ("Scanner: 0x%lx", (ulong)scanner));
+ DBUG_PRINT("enter", ("Scanner: %p", scanner));
translog_free_link(scanner->direct_link);
DBUG_VOID_RETURN;
}
@@ -7297,7 +7298,7 @@ int translog_read_next_record_header(TRANSLOG_SCANNER_DATA *scanner,
DBUG_ENTER("translog_read_next_record_header");
buff->groups_no= 0; /* to be sure that we will free it right */
- DBUG_PRINT("enter", ("scanner: 0x%lx", (ulong) scanner));
+ DBUG_PRINT("enter", ("scanner: %p", scanner));
DBUG_PRINT("info", ("Scanner: Cur: " LSN_FMT " Hrz: " LSN_FMT " "
"Lst: " LSN_FMT " Offset: %u(%x) fixed: %d",
LSN_IN_PARTS(scanner->page_addr),
@@ -7626,8 +7627,8 @@ static void translog_force_current_buffer_to_finish()
old_buffer,
LSN_IN_PARTS(old_buffer->offset),
LSN_FILE_NO(log_descriptor.horizon),
- (uint) (LSN_OFFSET(log_descriptor.horizon) -
- log_descriptor.bc.current_page_fill),
+ (uint)(LSN_OFFSET(log_descriptor.horizon) -
+ log_descriptor.bc.current_page_fill),
(ulong) old_buffer->size,
(ulong) (log_descriptor.bc.ptr -log_descriptor.bc.
buffer->buffer),
@@ -8290,7 +8291,7 @@ int translog_assign_id_to_share(MARIA_HA *tbl_info, TRN *trn)
}
i= 1; /* scan the whole array */
} while (id == 0);
- DBUG_PRINT("info", ("id_to_share: 0x%lx -> %u", (ulong)share, id));
+ DBUG_PRINT("info", ("id_to_share: %p -> %u", share, id));
fileid_store(log_data, id);
log_array[TRANSLOG_INTERNAL_PARTS + 0].str= log_data;
log_array[TRANSLOG_INTERNAL_PARTS + 0].length= sizeof(log_data);
@@ -8341,8 +8342,8 @@ int translog_assign_id_to_share(MARIA_HA *tbl_info, TRN *trn)
void translog_deassign_id_from_share(MARIA_SHARE *share)
{
- DBUG_PRINT("info", ("id_to_share: 0x%lx id %u -> 0",
- (ulong)share, share->id));
+ DBUG_PRINT("info", ("id_to_share: %p id %u -> 0",
+ share, share->id));
/*
We don't need any mutex as we are called only when closing the last
instance of the table or at the end of REPAIR: no writes can be
diff --git a/storage/maria/ma_loghandler.h b/storage/maria/ma_loghandler.h
index 62efa5ed107..ef0f46c9465 100644
--- a/storage/maria/ma_loghandler.h
+++ b/storage/maria/ma_loghandler.h
@@ -33,9 +33,9 @@
We allow all kind protections to be switched on together for people who
really unsure in their hardware/OS.
*/
-#define TRANSLOG_PAGE_CRC 1
-#define TRANSLOG_SECTOR_PROTECTION (1<<1)
-#define TRANSLOG_RECORD_CRC (1<<2)
+#define TRANSLOG_PAGE_CRC 1U
+#define TRANSLOG_SECTOR_PROTECTION (1U<<1)
+#define TRANSLOG_RECORD_CRC (1U<<2)
#define TRANSLOG_FLAGS_NUM ((TRANSLOG_PAGE_CRC | TRANSLOG_SECTOR_PROTECTION | \
TRANSLOG_RECORD_CRC) + 1)
diff --git a/storage/maria/ma_open.c b/storage/maria/ma_open.c
index 0c4e4f900b9..87bded7ed50 100644
--- a/storage/maria/ma_open.c
+++ b/storage/maria/ma_open.c
@@ -1,4 +1,5 @@
/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
+ Copyright (c) 2009, 2019, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -249,20 +250,6 @@ err:
} /* maria_clone_internal */
-/* Make a clone of a maria table */
-
-MARIA_HA *maria_clone(MARIA_SHARE *share, int mode)
-{
- MARIA_HA *new_info;
- mysql_mutex_lock(&THR_LOCK_maria);
- new_info= maria_clone_internal(share, mode,
- share->data_file_type == BLOCK_RECORD ?
- share->bitmap.file.file : -1, 0);
- mysql_mutex_unlock(&THR_LOCK_maria);
- return new_info;
-}
-
-
/******************************************************************************
open a MARIA table
@@ -282,7 +269,7 @@ MARIA_HA *maria_open(const char *name, int mode, uint open_flags)
size_t info_length;
char name_buff[FN_REFLEN], org_name[FN_REFLEN], index_name[FN_REFLEN],
data_name[FN_REFLEN];
- uchar *disk_cache, *disk_pos, *end_pos;
+ uchar *UNINIT_VAR(disk_cache), *disk_pos, *end_pos;
MARIA_HA info, *UNINIT_VAR(m_info), *old_info;
MARIA_SHARE share_buff,*share;
double *rec_per_key_part;
@@ -567,7 +554,10 @@ MARIA_HA *maria_open(const char *name, int mode, uint open_flags)
set_if_smaller(max_data_file_length, INT_MAX32);
set_if_smaller(max_key_file_length, INT_MAX32);
#endif
- share->base.max_data_file_length=(my_off_t) max_data_file_length;
+ /* For internal temporary tables, max_data_file_length is already set */
+ if (!internal_table || !share->base.max_data_file_length)
+ share->base.max_data_file_length=(my_off_t) max_data_file_length;
+ DBUG_ASSERT(share->base.max_data_file_length);
share->base.max_key_file_length=(my_off_t) max_key_file_length;
if (share->options & HA_OPTION_COMPRESS_RECORD)
@@ -911,8 +901,12 @@ MARIA_HA *maria_open(const char *name, int mode, uint open_flags)
share->block_size * keys : 0));
my_free(disk_cache);
_ma_setup_functions(share);
+ max_data_file_length= share->base.max_data_file_length;
if ((*share->once_init)(share, info.dfile.file))
goto err;
+ if (internal_table)
+ set_if_smaller(share->base.max_data_file_length,
+ max_data_file_length);
if (share->now_transactional)
{
/* Setup initial state that is visible for all */
@@ -1367,7 +1361,7 @@ uint _ma_state_info_write(MARIA_SHARE *share, uint pWrite)
if (pWrite & MA_STATE_INFO_WRITE_LOCK)
mysql_mutex_lock(&share->intern_lock);
- else if (maria_multi_threaded)
+ else if (maria_multi_threaded && !share->temporary)
mysql_mutex_assert_owner(&share->intern_lock);
if (share->base.born_transactional && translog_status == TRANSLOG_OK &&
!maria_in_recovery)
diff --git a/storage/maria/ma_packrec.c b/storage/maria/ma_packrec.c
index 27c5538e51b..eab079f4fb8 100644
--- a/storage/maria/ma_packrec.c
+++ b/storage/maria/ma_packrec.c
@@ -1,4 +1,5 @@
/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
+ Copyright (c) 2020, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -1157,10 +1158,10 @@ static void decode_bytes(MARIA_COLUMNDEF *rec,MARIA_BIT_BUFF *bit_buff,
bit_buff->error=1;
return; /* Can't be right */
}
- bit_buff->current_byte= (bit_buff->current_byte << 32) +
- ((((uint) bit_buff->pos[3])) +
- (((uint) bit_buff->pos[2]) << 8) +
- (((uint) bit_buff->pos[1]) << 16) +
+ bit_buff->current_byte= (bit_buff->current_byte << 32) |
+ ((((uint) bit_buff->pos[3])) |
+ (((uint) bit_buff->pos[2]) << 8) |
+ (((uint) bit_buff->pos[1]) << 16) |
(((uint) bit_buff->pos[0]) << 24));
bit_buff->pos+=4;
bits+=32;
@@ -1251,23 +1252,23 @@ static void decode_bytes(MARIA_COLUMNDEF *rec, MARIA_BIT_BUFF *bit_buff,
return; /* Can't be right */
}
#if BITS_SAVED == 32
- bit_buff->current_byte= (bit_buff->current_byte << 24) +
- (((uint) ((uchar) bit_buff->pos[2]))) +
- (((uint) ((uchar) bit_buff->pos[1])) << 8) +
+ bit_buff->current_byte= (bit_buff->current_byte << 24) |
+ (((uint) ((uchar) bit_buff->pos[2]))) |
+ (((uint) ((uchar) bit_buff->pos[1])) << 8) |
(((uint) ((uchar) bit_buff->pos[0])) << 16);
bit_buff->pos+=3;
bits+=24;
#else
if (bits) /* We must have at leasts 9 bits */
{
- bit_buff->current_byte= (bit_buff->current_byte << 8) +
+ bit_buff->current_byte= (bit_buff->current_byte << 8) |
(uint) ((uchar) bit_buff->pos[0]);
bit_buff->pos++;
bits+=8;
}
else
{
- bit_buff->current_byte= ((uint) ((uchar) bit_buff->pos[0]) << 8) +
+ bit_buff->current_byte= ((uint) ((uchar) bit_buff->pos[0]) << 8) |
((uint) ((uchar) bit_buff->pos[1]));
bit_buff->pos+=2;
bits+=16;
@@ -1291,14 +1292,14 @@ static void decode_bytes(MARIA_COLUMNDEF *rec, MARIA_BIT_BUFF *bit_buff,
if (bits < 8)
{ /* We don't need to check end */
#if BITS_SAVED == 32
- bit_buff->current_byte= (bit_buff->current_byte << 24) +
- (((uint) ((uchar) bit_buff->pos[2]))) +
- (((uint) ((uchar) bit_buff->pos[1])) << 8) +
+ bit_buff->current_byte= (bit_buff->current_byte << 24) |
+ (((uint) ((uchar) bit_buff->pos[2]))) |
+ (((uint) ((uchar) bit_buff->pos[1])) << 8) |
(((uint) ((uchar) bit_buff->pos[0])) << 16);
bit_buff->pos+=3;
bits+=24;
#else
- bit_buff->current_byte= (bit_buff->current_byte << 8) +
+ bit_buff->current_byte= (bit_buff->current_byte << 8) |
(uint) ((uchar) bit_buff->pos[0]);
bit_buff->pos+=1;
bits+=8;
@@ -1488,25 +1489,25 @@ static void fill_buffer(MARIA_BIT_BUFF *bit_buff)
return;
}
#if BITS_SAVED == 64
- bit_buff->current_byte= ((((uint) ((uchar) bit_buff->pos[7]))) +
- (((uint) ((uchar) bit_buff->pos[6])) << 8) +
- (((uint) ((uchar) bit_buff->pos[5])) << 16) +
- (((uint) ((uchar) bit_buff->pos[4])) << 24) +
+ bit_buff->current_byte= ((((uint) ((uchar) bit_buff->pos[7]))) |
+ (((uint) ((uchar) bit_buff->pos[6])) << 8) |
+ (((uint) ((uchar) bit_buff->pos[5])) << 16) |
+ (((uint) ((uchar) bit_buff->pos[4])) << 24) |
((ulonglong)
- ((((uint) ((uchar) bit_buff->pos[3]))) +
- (((uint) ((uchar) bit_buff->pos[2])) << 8) +
- (((uint) ((uchar) bit_buff->pos[1])) << 16) +
+ ((((uint) ((uchar) bit_buff->pos[3]))) |
+ (((uint) ((uchar) bit_buff->pos[2])) << 8) |
+ (((uint) ((uchar) bit_buff->pos[1])) << 16) |
(((uint) ((uchar) bit_buff->pos[0])) << 24)) << 32));
bit_buff->pos+=8;
#else
#if BITS_SAVED == 32
- bit_buff->current_byte= (((uint) ((uchar) bit_buff->pos[3])) +
- (((uint) ((uchar) bit_buff->pos[2])) << 8) +
- (((uint) ((uchar) bit_buff->pos[1])) << 16) +
+ bit_buff->current_byte= (((uint) ((uchar) bit_buff->pos[3])) |
+ (((uint) ((uchar) bit_buff->pos[2])) << 8) |
+ (((uint) ((uchar) bit_buff->pos[1])) << 16) |
(((uint) ((uchar) bit_buff->pos[0])) << 24));
bit_buff->pos+=4;
#else
- bit_buff->current_byte= (uint) (((uint) ((uchar) bit_buff->pos[1]))+
+ bit_buff->current_byte= (uint) (((uint) ((uchar) bit_buff->pos[1])) |
(((uint) ((uchar) bit_buff->pos[0])) << 8));
bit_buff->pos+=2;
#endif
@@ -1564,8 +1565,13 @@ my_bool _ma_memmap_file(MARIA_HA *info)
void _ma_unmap_file(MARIA_HA *info)
{
- my_munmap((char*) info->s->file_map,
- (size_t) info->s->mmaped_length + MEMMAP_EXTRA_MARGIN);
+ MARIA_SHARE *share= info->s;
+ my_munmap((char*) share->file_map,
+ (size_t) share->mmaped_length + MEMMAP_EXTRA_MARGIN);
+ share->file_map= 0;
+ share->file_read= _ma_nommap_pread;
+ share->file_write= _ma_nommap_pwrite;
+ info->opt_flag&= ~MEMMAP_USED;
}
diff --git a/storage/maria/ma_page.c b/storage/maria/ma_page.c
index e1d41d1a1d8..e55c5288d1d 100644
--- a/storage/maria/ma_page.c
+++ b/storage/maria/ma_page.c
@@ -563,8 +563,8 @@ my_bool _ma_compact_keypage(MARIA_PAGE *ma_page, TrID min_read_from)
{
if (!(page= (*ma_page->keyinfo->skip_key)(&key, 0, 0, page)))
{
- DBUG_PRINT("error",("Couldn't find last key: page_pos: 0x%lx",
- (long) page));
+ DBUG_PRINT("error",("Couldn't find last key: page_pos: %p",
+ page));
_ma_set_fatal_error(share, HA_ERR_CRASHED);
DBUG_RETURN(1);
}
diff --git a/storage/maria/ma_pagecache.c b/storage/maria/ma_pagecache.c
index 4ae2821d532..f8c83e12a18 100644
--- a/storage/maria/ma_pagecache.c
+++ b/storage/maria/ma_pagecache.c
@@ -97,10 +97,10 @@
#define PCBLOCK_INFO(B) \
DBUG_PRINT("info", \
- ("block: 0x%lx fd: %lu page: %lu status: 0x%x " \
- "hshL: 0x%lx requests: %u/%u wrlocks: %u rdlocks: %u " \
+ ("block: %p fd: %lu page: %lu status: 0x%x " \
+ "hshL: %p requests: %u/%u wrlocks: %u rdlocks: %u " \
"rdlocks_q: %u pins: %u type: %s", \
- (ulong)(B), \
+ (B), \
(ulong)((B)->hash_link ? \
(B)->hash_link->file.file : \
0), \
@@ -108,7 +108,7 @@
(B)->hash_link->pageno : \
0), \
(uint) (B)->status, \
- (ulong)(B)->hash_link, \
+ (B)->hash_link, \
(uint) (B)->requests, \
(uint)((B)->hash_link ? \
(B)->hash_link->requests : \
@@ -659,9 +659,9 @@ static my_bool pagecache_fwrite(PAGECACHE *pagecache,
/* Todo: Integrate this with write_callback so we have only one callback */
if ((*filedesc->flush_log_callback)(&args))
DBUG_RETURN(1);
- DBUG_PRINT("info", ("pre_write_hook: 0x%lx data: 0x%lx",
- (ulong) filedesc->pre_write_hook,
- (ulong) filedesc->callback_data));
+ DBUG_PRINT("info", ("pre_write_hook:%p data: %p",
+ filedesc->pre_write_hook,
+ filedesc->callback_data));
if ((*filedesc->pre_write_hook)(&args))
{
DBUG_PRINT("error", ("write callback problem"));
@@ -888,11 +888,11 @@ size_t init_pagecache(PAGECACHE *pagecache, size_t use_mem,
pagecache->waiting_for_hash_link.last_thread= NULL;
pagecache->waiting_for_block.last_thread= NULL;
DBUG_PRINT("exit",
- ("disk_blocks: %zu block_root: 0x%lx hash_entries: %zu\
- hash_root: 0x%lx hash_links: %zu hash_link_root: 0x%lx",
- pagecache->disk_blocks, (long) pagecache->block_root,
- pagecache->hash_entries, (long) pagecache->hash_root,
- pagecache->hash_links, (long) pagecache->hash_link_root));
+ ("disk_blocks: %zu block_root: %p hash_entries: %zu\
+ hash_root: %p hash_links: %zu hash_link_root: %p",
+ pagecache->disk_blocks, pagecache->block_root,
+ pagecache->hash_entries, pagecache->hash_root,
+ pagecache->hash_links, pagecache->hash_link_root));
pagecache->blocks= pagecache->disk_blocks > 0 ? pagecache->disk_blocks : 0;
DBUG_RETURN((size_t)pagecache->disk_blocks);
@@ -1078,7 +1078,7 @@ static inline void dec_counter_for_resize_op(PAGECACHE *pagecache)
{
DBUG_PRINT("signal",
("thread %s %ld", last_thread->next->name,
- last_thread->next->id));
+ (ulong) last_thread->next->id));
pagecache_pthread_cond_signal(&last_thread->next->suspend);
}
}
@@ -1164,7 +1164,7 @@ void check_pagecache_is_cleaned_up(PAGECACHE *pagecache)
void end_pagecache(PAGECACHE *pagecache, my_bool cleanup)
{
DBUG_ENTER("end_pagecache");
- DBUG_PRINT("enter", ("key_cache: 0x%lx", (long) pagecache));
+ DBUG_PRINT("enter", ("key_cache: %p", pagecache));
if (!pagecache->inited)
DBUG_VOID_RETURN;
@@ -1187,14 +1187,14 @@ void end_pagecache(PAGECACHE *pagecache, my_bool cleanup)
pagecache->blocks_changed= 0;
}
- DBUG_PRINT("status", ("used: %zu changed: %zu w_requests: %lu "
- "writes: %lu r_requests: %lu reads: %lu",
- pagecache->blocks_used,
- pagecache->global_blocks_changed,
- (ulong) pagecache->global_cache_w_requests,
- (ulong) pagecache->global_cache_write,
- (ulong) pagecache->global_cache_r_requests,
- (ulong) pagecache->global_cache_read));
+ DBUG_PRINT("status", ("used: %zu changed: %zu w_requests: %llu "
+ "writes: %llu r_requests: %llu reads: %llu",
+ pagecache->blocks_used,
+ pagecache->global_blocks_changed,
+ pagecache->global_cache_w_requests,
+ pagecache->global_cache_write,
+ pagecache->global_cache_r_requests,
+ pagecache->global_cache_read));
if (cleanup)
{
@@ -1342,7 +1342,8 @@ static void link_block(PAGECACHE *pagecache, PAGECACHE_BLOCK_LINK *block,
*/
if ((PAGECACHE_HASH_LINK *) thread->keycache_link == hash_link)
{
- DBUG_PRINT("signal", ("thread: %s %ld", thread->name, thread->id));
+ DBUG_PRINT("signal", ("thread: %s %ld", thread->name,
+ (ulong) thread->id));
pagecache_pthread_cond_signal(&thread->suspend);
wqueue_unlink_from_queue(&pagecache->waiting_for_block, thread);
block->requests++;
@@ -1411,7 +1412,7 @@ static void link_block(PAGECACHE *pagecache, PAGECACHE_BLOCK_LINK *block,
static void unlink_block(PAGECACHE *pagecache, PAGECACHE_BLOCK_LINK *block)
{
DBUG_ENTER("unlink_block");
- DBUG_PRINT("pagecache", ("unlink 0x%lx", (ulong)block));
+ DBUG_PRINT("pagecache", ("unlink %p", block));
DBUG_ASSERT(block->next_used != NULL);
if (block->next_used == block)
{
@@ -1436,8 +1437,8 @@ static void unlink_block(PAGECACHE *pagecache, PAGECACHE_BLOCK_LINK *block)
KEYCACHE_DBUG_ASSERT(pagecache->blocks_available != 0);
pagecache->blocks_available--;
KEYCACHE_DBUG_PRINT("pagecache",
- ("unlinked block: 0x%lx (%u) status: %x #requests: %u #available: %u",
- (ulong)block, PCBLOCK_NUMBER(pagecache, block),
+ ("unlinked block: %p (%u) status: %x #requests: %u #available: %u",
+ block, PCBLOCK_NUMBER(pagecache, block),
block->status,
block->requests, pagecache->blocks_available));
PCBLOCK_INFO(block);
@@ -1505,8 +1506,8 @@ static void unreg_request(PAGECACHE *pagecache,
PAGECACHE_BLOCK_LINK *block, int at_end)
{
DBUG_ENTER("unreg_request");
- DBUG_PRINT("enter", ("block 0x%lx (%u) status: %x requests: %u",
- (ulong)block, PCBLOCK_NUMBER(pagecache, block),
+ DBUG_PRINT("enter", ("block %p (%u) status: %x requests: %u",
+ block, PCBLOCK_NUMBER(pagecache, block),
block->status, block->requests));
PCBLOCK_INFO(block);
DBUG_ASSERT(block->requests > 0);
@@ -1523,7 +1524,7 @@ static void unreg_request(PAGECACHE *pagecache,
pagecache->warm_blocks--;
block->temperature= PCBLOCK_HOT;
KEYCACHE_DBUG_PRINT("unreg_request", ("#warm_blocks: %zu",
- pagecache->warm_blocks));
+ pagecache->warm_blocks));
}
link_block(pagecache, block, hot, (my_bool)at_end);
block->last_hit_time= pagecache->time;
@@ -1542,7 +1543,7 @@ static void unreg_request(PAGECACHE *pagecache,
block->temperature= PCBLOCK_WARM;
}
KEYCACHE_DBUG_PRINT("unreg_request", ("#warm_blocks: %zu",
- pagecache->warm_blocks));
+ pagecache->warm_blocks));
}
}
DBUG_VOID_RETURN;
@@ -1580,7 +1581,7 @@ static inline void wait_for_readers(PAGECACHE *pagecache
DBUG_ENTER("wait_for_readers");
DBUG_PRINT("wait",
("suspend thread: %s %ld block: %u",
- thread->name, thread->id,
+ thread->name, (ulong) thread->id,
PCBLOCK_NUMBER(pagecache, block)));
block->condvar= &thread->suspend;
pagecache_pthread_cond_wait(&thread->suspend, &pagecache->cache_lock);
@@ -1605,7 +1606,7 @@ static void wait_for_flush(PAGECACHE *pagecache
do
{
DBUG_PRINT("wait",
- ("suspend thread %s %ld", thread->name, thread->id));
+ ("suspend thread %s %ld", thread->name, (ulong) thread->id));
pagecache_pthread_cond_wait(&thread->suspend,
&pagecache->cache_lock);
}
@@ -1672,7 +1673,8 @@ static void unlink_hash(PAGECACHE *pagecache, PAGECACHE_HASH_LINK *hash_link)
if (page->file.file == hash_link->file.file &&
page->pageno == hash_link->pageno)
{
- DBUG_PRINT("signal", ("thread %s %ld", thread->name, thread->id));
+ DBUG_PRINT("signal", ("thread %s %ld", thread->name,
+ (ulong) thread->id));
pagecache_pthread_cond_signal(&thread->suspend);
wqueue_unlink_from_queue(&pagecache->waiting_for_hash_link, thread);
}
@@ -1812,7 +1814,7 @@ restart:
thread->keycache_link= (void *) &page;
wqueue_link_into_queue(&pagecache->waiting_for_hash_link, thread);
DBUG_PRINT("wait",
- ("suspend thread %s %ld", thread->name, thread->id));
+ ("suspend thread %s %ld", thread->name, (ulong) thread->id));
pagecache_pthread_cond_wait(&thread->suspend,
&pagecache->cache_lock);
thread->keycache_link= NULL;
@@ -1997,7 +1999,8 @@ restart:
do
{
DBUG_PRINT("wait",
- ("suspend thread %s %ld", thread->name, thread->id));
+ ("suspend thread %s %ld", thread->name,
+ (ulong) thread->id));
pagecache_pthread_cond_wait(&thread->suspend,
&pagecache->cache_lock);
}
@@ -2054,8 +2057,8 @@ restart:
block->hash_link= hash_link;
hash_link->block= block;
page_status= PAGE_TO_BE_READ;
- DBUG_PRINT("info", ("page to be read set for page 0x%lx (%u)",
- (ulong) block, PCBLOCK_NUMBER(pagecache, block)));
+ DBUG_PRINT("info", ("page to be read set for page %p (%u)",
+ block, PCBLOCK_NUMBER(pagecache, block)));
KEYCACHE_PRINT("find_block",
("got free or never used block %u",
PCBLOCK_NUMBER(pagecache, block)));
@@ -2088,7 +2091,8 @@ restart:
do
{
DBUG_PRINT("wait",
- ("suspend thread %s %ld", thread->name, thread->id));
+ ("suspend thread %s %ld", thread->name,
+ (ulong) thread->id));
pagecache_pthread_cond_wait(&thread->suspend,
&pagecache->cache_lock);
}
@@ -2189,8 +2193,7 @@ restart:
my_debug_put_break_here();
#endif
page_status= PAGE_TO_BE_READ;
- DBUG_PRINT("info", ("page to be read set for page 0x%lx",
- (ulong)block));
+ DBUG_PRINT("info", ("page to be read set for page %p", block));
KEYCACHE_DBUG_ASSERT(block->hash_link->block == block);
KEYCACHE_DBUG_ASSERT(hash_link->block->hash_link == hash_link);
@@ -2237,13 +2240,12 @@ restart:
KEYCACHE_DBUG_ASSERT(page_status != -1);
*page_st= page_status;
DBUG_PRINT("info",
- ("block: 0x%lx fd: %u pos: %lu block->status: %u page_status: %u",
- (ulong) block, (uint) file->file,
+ ("block: %p fd: %u pos: %lu block->status: %u page_status: %u",
+ block, (uint) file->file,
(ulong) pageno, block->status, (uint) page_status));
KEYCACHE_PRINT("find_block",
- ("block: 0x%lx fd: %d pos: %lu block->status: %u page_status: %d",
- (ulong) block,
- file->file, (ulong) pageno, block->status,
+ ("block: %p fd: %d pos: %lu block->status: %u page_status: %d",
+ block, file->file, (ulong) pageno, block->status,
page_status));
#if !defined(DBUG_OFF) && defined(EXTRA_DEBUG)
@@ -2258,9 +2260,7 @@ restart:
static void add_pin(PAGECACHE_BLOCK_LINK *block)
{
DBUG_ENTER("add_pin");
- DBUG_PRINT("enter", ("block: 0x%lx pins: %u",
- (ulong) block,
- block->pins));
+ DBUG_PRINT("enter", ("block: %p pins: %u", block, block->pins));
PCBLOCK_INFO(block);
block->pins++;
#ifndef DBUG_OFF
@@ -2281,9 +2281,8 @@ static void remove_pin(PAGECACHE_BLOCK_LINK *block, my_bool any
)
{
DBUG_ENTER("remove_pin");
- DBUG_PRINT("enter", ("block: 0x%lx pins: %u any: %d",
- (ulong) block,
- block->pins, (int)any));
+ DBUG_PRINT("enter", ("block: %p pins: %u any: %d", block, block->pins,
+ (int)any));
PCBLOCK_INFO(block);
DBUG_ASSERT(block->pins > 0);
block->pins--;
@@ -2354,14 +2353,14 @@ static my_bool pagecache_wait_lock(PAGECACHE *pagecache,
/* Lock failed we will wait */
struct st_my_thread_var *thread= my_thread_var;
DBUG_ENTER("pagecache_wait_lock");
- DBUG_PRINT("info", ("fail to lock, waiting... 0x%lx", (ulong)block));
+ DBUG_PRINT("info", ("fail to lock, waiting... %p", block));
thread->lock_type= lock_type;
wqueue_add_to_queue(&block->wqueue[COND_FOR_WRLOCK], thread);
dec_counter_for_resize_op(pagecache);
do
{
DBUG_PRINT("wait",
- ("suspend thread %s %ld", thread->name, thread->id));
+ ("suspend thread %s %ld", thread->name, (ulong) thread->id));
pagecache_pthread_cond_wait(&thread->suspend,
&pagecache->cache_lock);
}
@@ -2373,10 +2372,9 @@ static my_bool pagecache_wait_lock(PAGECACHE *pagecache,
file.file != block->hash_link->file.file ||
pageno != block->hash_link->pageno)
{
- DBUG_PRINT("info", ("the block 0x%lx changed => need retry "
+ DBUG_PRINT("info", ("the block %p changed => need retry "
"status: %x files %d != %d or pages %lu != %lu",
- (ulong)block, block->status,
- file.file,
+ block, block->status, file.file,
block->hash_link ? block->hash_link->file.file : -1,
(ulong) pageno,
(ulong) (block->hash_link ? block->hash_link->pageno : 0)));
@@ -2416,10 +2414,9 @@ static my_bool get_wrlock(PAGECACHE *pagecache,
pgcache_page_no_t pageno= block->hash_link->pageno;
pthread_t locker= pthread_self();
DBUG_ENTER("get_wrlock");
- DBUG_PRINT("info", ("the block 0x%lx "
+ DBUG_PRINT("info", ("the block %p "
"files %d(%d) pages %lu(%lu)",
- (ulong) block,
- file.file, block->hash_link->file.file,
+ block, file.file, block->hash_link->file.file,
(ulong) pageno, (ulong) block->hash_link->pageno));
PCBLOCK_INFO(block);
/*
@@ -2437,7 +2434,7 @@ static my_bool get_wrlock(PAGECACHE *pagecache,
/* we are doing it by global cache mutex protection, so it is OK */
block->wlocks++;
block->write_locker= locker;
- DBUG_PRINT("info", ("WR lock set, block 0x%lx", (ulong)block));
+ DBUG_PRINT("info", ("WR lock set, block %p", block));
DBUG_RETURN(0);
}
@@ -2464,10 +2461,9 @@ static my_bool get_rdlock(PAGECACHE *pagecache,
pgcache_page_no_t pageno= block->hash_link->pageno;
pthread_t locker= pthread_self();
DBUG_ENTER("get_rdlock");
- DBUG_PRINT("info", ("the block 0x%lx "
+ DBUG_PRINT("info", ("the block %p "
"files %d(%d) pages %lu(%lu)",
- (ulong) block,
- file.file, block->hash_link->file.file,
+ block, file.file, block->hash_link->file.file,
(ulong) pageno, (ulong) block->hash_link->pageno));
PCBLOCK_INFO(block);
while (block->wlocks && !pthread_equal(block->write_locker, locker))
@@ -2482,12 +2478,12 @@ static my_bool get_rdlock(PAGECACHE *pagecache,
{
DBUG_ASSERT(pthread_equal(block->write_locker, locker));
block->rlocks_queue++;
- DBUG_PRINT("info", ("RD lock put into queue, block 0x%lx", (ulong)block));
+ DBUG_PRINT("info", ("RD lock put into queue, block %p", block));
}
else
{
block->rlocks++;
- DBUG_PRINT("info", ("RD lock set, block 0x%lx", (ulong)block));
+ DBUG_PRINT("info", ("RD lock set, block %p", block));
}
DBUG_RETURN(0);
}
@@ -2520,7 +2516,7 @@ static void release_wrlock(PAGECACHE_BLOCK_LINK *block, my_bool read_lock)
block->wlocks--;
if (block->wlocks > 0)
DBUG_VOID_RETURN; /* Multiple write locked */
- DBUG_PRINT("info", ("WR lock reset, block 0x%lx", (ulong)block));
+ DBUG_PRINT("info", ("WR lock reset, block %p", block));
/* release all threads waiting for read lock or one waiting for write */
if (block->wqueue[COND_FOR_WRLOCK].last_thread)
wqueue_release_one_locktype_from_queue(&block->wqueue[COND_FOR_WRLOCK]);
@@ -2547,16 +2543,16 @@ static void release_rdlock(PAGECACHE_BLOCK_LINK *block)
DBUG_ASSERT(block->rlocks == 0);
DBUG_ASSERT(block->rlocks_queue > 0);
block->rlocks_queue--;
- DBUG_PRINT("info", ("RD lock queue decreased, block 0x%lx", (ulong)block));
+ DBUG_PRINT("info", ("RD lock queue decreased, block %p", block));
DBUG_VOID_RETURN;
}
DBUG_ASSERT(block->rlocks > 0);
DBUG_ASSERT(block->rlocks_queue == 0);
block->rlocks--;
- DBUG_PRINT("info", ("RD lock decreased, block 0x%lx", (ulong)block));
+ DBUG_PRINT("info", ("RD lock decreased, block %p", block));
if (block->rlocks > 0)
DBUG_VOID_RETURN; /* Multiple write locked */
- DBUG_PRINT("info", ("RD lock reset, block 0x%lx", (ulong)block));
+ DBUG_PRINT("info", ("RD lock reset, block %p", block));
/* release all threads waiting for read lock or one waiting for write */
if (block->wqueue[COND_FOR_WRLOCK].last_thread)
wqueue_release_one_locktype_from_queue(&block->wqueue[COND_FOR_WRLOCK]);
@@ -2586,8 +2582,8 @@ static my_bool make_lock_and_pin(PAGECACHE *pagecache,
my_bool any)
{
DBUG_ENTER("make_lock_and_pin");
- DBUG_PRINT("enter", ("block: 0x%lx (%u) lock: %s pin: %s any %d",
- (ulong)block, PCBLOCK_NUMBER(pagecache, block),
+ DBUG_PRINT("enter", ("block: %p (%u) lock: %s pin: %s any %d",
+ block, PCBLOCK_NUMBER(pagecache, block),
page_cache_page_lock_str[lock],
page_cache_page_pin_str[pin], (int)any));
PCBLOCK_INFO(block);
@@ -2664,7 +2660,7 @@ static my_bool make_lock_and_pin(PAGECACHE *pagecache,
PCBLOCK_INFO(block);
DBUG_RETURN(0);
retry:
- DBUG_PRINT("INFO", ("Retry block 0x%lx", (ulong)block));
+ DBUG_PRINT("INFO", ("Retry block %p", block));
PCBLOCK_INFO(block);
DBUG_ASSERT(block->hash_link->requests > 0);
block->hash_link->requests--;
@@ -2700,8 +2696,7 @@ static void read_block(PAGECACHE *pagecache,
my_bool primary)
{
DBUG_ENTER("read_block");
- DBUG_PRINT("enter", ("read block: 0x%lx primary: %d",
- (ulong)block, primary));
+ DBUG_PRINT("enter", ("read block: %p primary: %d", block, primary));
if (primary)
{
size_t error;
@@ -2761,7 +2756,8 @@ static void read_block(PAGECACHE *pagecache,
do
{
DBUG_PRINT("wait",
- ("suspend thread %s %ld", thread->name, thread->id));
+ ("suspend thread %s %ld", thread->name,
+ (ulong) thread->id));
pagecache_pthread_cond_wait(&thread->suspend,
&pagecache->cache_lock);
}
@@ -2895,8 +2891,7 @@ void pagecache_unlock(PAGECACHE *pagecache,
if (!(block->status & PCBLOCK_CHANGED) && was_changed)
link_to_changed_list(pagecache, block);
block->status&= ~PCBLOCK_DIRECT_W;
- DBUG_PRINT("info", ("Drop PCBLOCK_DIRECT_W for block: 0x%lx",
- (ulong) block));
+ DBUG_PRINT("info", ("Drop PCBLOCK_DIRECT_W for block: %p", block));
}
if (make_lock_and_pin(pagecache, block, lock, pin, FALSE))
@@ -3019,9 +3014,8 @@ void pagecache_unlock_by_link(PAGECACHE *pagecache,
my_bool any)
{
DBUG_ENTER("pagecache_unlock_by_link");
- DBUG_PRINT("enter", ("block: 0x%lx fd: %u page: %lu changed: %d %s %s",
- (ulong) block,
- (uint) block->hash_link->file.file,
+ DBUG_PRINT("enter", ("block: %p fd: %u page: %lu changed: %d %s %s",
+ block, (uint) block->hash_link->file.file,
(ulong) block->hash_link->pageno, was_changed,
page_cache_page_lock_str[lock],
page_cache_page_pin_str[pin]));
@@ -3091,8 +3085,7 @@ void pagecache_unlock_by_link(PAGECACHE *pagecache,
if (!(block->status & PCBLOCK_CHANGED) && was_changed)
link_to_changed_list(pagecache, block);
block->status&= ~PCBLOCK_DIRECT_W;
- DBUG_PRINT("info", ("Drop PCBLOCK_DIRECT_W for block: 0x%lx",
- (ulong) block));
+ DBUG_PRINT("info", ("Drop PCBLOCK_DIRECT_W for block: %p", block));
}
if (make_lock_and_pin(pagecache, block, lock, pin, any))
@@ -3132,9 +3125,8 @@ void pagecache_unpin_by_link(PAGECACHE *pagecache,
LSN lsn)
{
DBUG_ENTER("pagecache_unpin_by_link");
- DBUG_PRINT("enter", ("block: 0x%lx fd: %u page: %lu",
- (ulong) block,
- (uint) block->hash_link->file.file,
+ DBUG_PRINT("enter", ("block: %p fd: %u page: %lu",
+ block, (uint) block->hash_link->file.file,
(ulong) block->hash_link->pageno));
pagecache_pthread_mutex_lock(&pagecache->cache_lock);
@@ -3356,10 +3348,10 @@ uchar *pagecache_read(PAGECACHE *pagecache,
#ifndef DBUG_OFF
char llbuf[22];
DBUG_ENTER("pagecache_read");
- DBUG_PRINT("enter", ("fd: %u page: %s buffer: 0x%lx level: %u "
+ DBUG_PRINT("enter", ("fd: %u page: %s buffer: %p level: %u "
"t:%s (%d)%s->%s %s->%s",
(uint) file->file, ullstr(pageno, llbuf),
- (ulong) buff, level,
+ buff, level,
page_cache_page_type_str[type],
lock_to_read[lock].need_lock_change,
page_cache_page_lock_str[lock_to_read[lock].new_lock],
@@ -3453,8 +3445,7 @@ restart:
lock == PAGECACHE_LOCK_LEFT_WRITELOCKED))
{
block->status|= PCBLOCK_DIRECT_W;
- DBUG_PRINT("info", ("Set PCBLOCK_DIRECT_W for block: 0x%lx",
- (ulong) block));
+ DBUG_PRINT("info", ("Set PCBLOCK_DIRECT_W for block: %p", block));
}
}
else
@@ -3473,8 +3464,6 @@ restart:
pagecache_pthread_mutex_lock(&pagecache->cache_lock);
#endif
}
- if (status & PCBLOCK_ERROR)
- my_errno= block->error;
}
remove_reader(block);
@@ -3506,6 +3495,7 @@ restart:
if (status & PCBLOCK_ERROR)
{
+ my_errno= block->error;
DBUG_ASSERT(my_errno != 0);
DBUG_PRINT("error", ("Got error %d when doing page read", my_errno));
DBUG_RETURN((uchar *) 0);
@@ -3550,10 +3540,9 @@ no_key_cache: /* Key cache is not used */
void pagecache_set_write_on_delete_by_link(PAGECACHE_BLOCK_LINK *block)
{
DBUG_ENTER("pagecache_set_write_on_delete_by_link");
- DBUG_PRINT("enter", ("fd: %d block 0x%lx %d -> TRUE",
+ DBUG_PRINT("enter", ("fd: %d block %p %d -> TRUE",
block->hash_link->file.file,
- (ulong) block,
- (int) block->status & PCBLOCK_DEL_WRITE));
+ block, (int) block->status & PCBLOCK_DEL_WRITE));
DBUG_ASSERT(block->pins); /* should be pinned */
DBUG_ASSERT(block->wlocks); /* should be write locked */
@@ -3630,9 +3619,9 @@ static my_bool pagecache_delete_internal(PAGECACHE *pagecache,
args.pageno= block->hash_link->pageno;
args.data= filedesc->callback_data;
/* We are not going to write the page but have to call callbacks */
- DBUG_PRINT("info", ("flush_callback :0x%lx data: 0x%lx",
- (ulong) filedesc->flush_log_callback,
- (ulong) filedesc->callback_data));
+ DBUG_PRINT("info", ("flush_callback: %p data: %p",
+ filedesc->flush_log_callback,
+ filedesc->callback_data));
if ((*filedesc->flush_log_callback)(&args))
{
DBUG_PRINT("error", ("flush or write callback problem"));
@@ -3696,9 +3685,9 @@ my_bool pagecache_delete_by_link(PAGECACHE *pagecache,
my_bool error= 0;
enum pagecache_page_pin pin= PAGECACHE_PIN_LEFT_PINNED;
DBUG_ENTER("pagecache_delete_by_link");
- DBUG_PRINT("enter", ("fd: %d block 0x%lx %s %s",
+ DBUG_PRINT("enter", ("fd: %d block %p %s %s",
block->hash_link->file.file,
- (ulong) block,
+ block,
page_cache_page_lock_str[lock],
page_cache_page_pin_str[pin]));
DBUG_ASSERT(lock == PAGECACHE_LOCK_WRITE ||
@@ -3842,8 +3831,8 @@ restart:
block= page_link->block;
if (block->status & (PCBLOCK_REASSIGNED | PCBLOCK_IN_SWITCH))
{
- DBUG_PRINT("info", ("Block 0x%0lx already is %s",
- (ulong) block,
+ DBUG_PRINT("info", ("Block %p already is %s",
+ block,
((block->status & PCBLOCK_REASSIGNED) ?
"reassigned" : "in switch")));
PCBLOCK_INFO(block);
@@ -4071,8 +4060,7 @@ restart:
block->type= type;
/* we write to the page so it has no sense to keep the flag */
block->status&= ~PCBLOCK_DIRECT_W;
- DBUG_PRINT("info", ("Drop PCBLOCK_DIRECT_W for block: 0x%lx",
- (ulong) block));
+ DBUG_PRINT("info", ("Drop PCBLOCK_DIRECT_W for block: %p", block));
if (make_lock_and_pin(pagecache, block,
write_lock_change_table[lock].new_lock,
@@ -4243,9 +4231,9 @@ static my_bool free_block(PAGECACHE *pagecache, PAGECACHE_BLOCK_LINK *block,
uint status= block->status;
KEYCACHE_THREAD_TRACE("free block");
KEYCACHE_DBUG_PRINT("free_block",
- ("block: %u hash_link 0x%lx",
+ ("block: %u hash_link %p",
PCBLOCK_NUMBER(pagecache, block),
- (long) block->hash_link));
+ block->hash_link));
mysql_mutex_assert_owner(&pagecache->cache_lock);
if (block->hash_link)
{
@@ -4392,10 +4380,10 @@ static int flush_cached_blocks(PAGECACHE *pagecache,
if ((type == FLUSH_KEEP_LAZY && block->pins) || block->wlocks)
{
KEYCACHE_DBUG_PRINT("flush_cached_blocks",
- ("block: %u (0x%lx) pinned",
- PCBLOCK_NUMBER(pagecache, block), (ulong)block));
- DBUG_PRINT("info", ("block: %u (0x%lx) pinned",
- PCBLOCK_NUMBER(pagecache, block), (ulong)block));
+ ("block: %u (%p) pinned",
+ PCBLOCK_NUMBER(pagecache, block), block));
+ DBUG_PRINT("info", ("block: %u (%p) pinned",
+ PCBLOCK_NUMBER(pagecache, block), block));
PCBLOCK_INFO(block);
/* undo the mark put by flush_pagecache_blocks_int(): */
block->status&= ~PCBLOCK_IN_FLUSH;
@@ -4411,10 +4399,10 @@ static int flush_cached_blocks(PAGECACHE *pagecache,
DBUG_ASSERT(0);
KEYCACHE_PRINT("flush_cached_blocks",
- ("block: %u (0x%lx) to be flushed",
- PCBLOCK_NUMBER(pagecache, block), (ulong)block));
- DBUG_PRINT("info", ("block: %u (0x%lx) to be flushed",
- PCBLOCK_NUMBER(pagecache, block), (ulong)block));
+ ("block: %u (%p) to be flushed",
+ PCBLOCK_NUMBER(pagecache, block), block));
+ DBUG_PRINT("info", ("block: %u (%p) to be flushed",
+ PCBLOCK_NUMBER(pagecache, block), block));
PCBLOCK_INFO(block);
/**
@@ -4582,7 +4570,7 @@ static int flush_pagecache_blocks_int(PAGECACHE *pagecache,
{
DBUG_PRINT("wait",
("(1) suspend thread %s %ld",
- thread->name, thread->id));
+ thread->name, (ulong) thread->id));
pagecache_pthread_cond_wait(&thread->suspend,
&pagecache->cache_lock);
}
@@ -4743,7 +4731,7 @@ restart:
{
DBUG_PRINT("wait",
("(2) suspend thread %s %ld",
- thread->name, thread->id));
+ thread->name, (ulong) thread->id));
pagecache_pthread_cond_wait(&thread->suspend,
&pagecache->cache_lock);
}
@@ -4835,7 +4823,7 @@ int flush_pagecache_blocks_with_filter(PAGECACHE *pagecache,
{
int res;
DBUG_ENTER("flush_pagecache_blocks_with_filter");
- DBUG_PRINT("enter", ("pagecache: 0x%lx", (long) pagecache));
+ DBUG_PRINT("enter", ("pagecache: %p", pagecache));
if (pagecache->disk_blocks <= 0)
DBUG_RETURN(0);
@@ -4948,7 +4936,8 @@ my_bool pagecache_collect_changed_blocks_with_lsn(PAGECACHE *pagecache,
do
{
DBUG_PRINT("wait",
- ("suspend thread %s %ld", thread->name, thread->id));
+ ("suspend thread %s %ld", thread->name,
+ (ulong) thread->id));
pagecache_pthread_cond_wait(&thread->suspend,
&pagecache->cache_lock);
}
@@ -5091,7 +5080,8 @@ static void pagecache_dump(PAGECACHE *pagecache)
PAGECACHE_PAGE *page;
uint i;
- fprintf(pagecache_dump_file, "thread: %s %ld\n", thread->name, thread->id);
+ fprintf(pagecache_dump_file, "thread: %s %ld\n", thread->name,
+ (ulong) thread->id);
i=0;
thread=last=waiting_for_hash_link.last_thread;
@@ -5103,7 +5093,7 @@ static void pagecache_dump(PAGECACHE *pagecache)
page= (PAGECACHE_PAGE *) thread->keycache_link;
fprintf(pagecache_dump_file,
"thread: %s %ld, (file,pageno)=(%u,%lu)\n",
- thread->name, thread->id,
+ thread->name, (ulong) thread->id,
(uint) page->file.file,(ulong) page->pageno);
if (++i == MAX_QUEUE_LEN)
break;
@@ -5120,7 +5110,7 @@ static void pagecache_dump(PAGECACHE *pagecache)
hash_link= (PAGECACHE_HASH_LINK *) thread->keycache_link;
fprintf(pagecache_dump_file,
"thread: %s %u hash_link:%u (file,pageno)=(%u,%lu)\n",
- thread->name, thread->id,
+ thread->name, (ulong) thread->id,
(uint) PAGECACHE_HASH_LINK_NUMBER(pagecache, hash_link),
(uint) hash_link->file.file,(ulong) hash_link->pageno);
if (++i == MAX_QUEUE_LEN)
@@ -5150,7 +5140,7 @@ static void pagecache_dump(PAGECACHE *pagecache)
{
thread=thread->next;
fprintf(pagecache_dump_file,
- "thread: %s %ld\n", thread->name, thread->id);
+ "thread: %s %ld\n", thread->name, (ulong) thread->id);
if (++i == MAX_QUEUE_LEN)
break;
}
diff --git a/storage/maria/ma_range.c b/storage/maria/ma_range.c
index 512d827c456..bd434bc48e1 100644
--- a/storage/maria/ma_range.c
+++ b/storage/maria/ma_range.c
@@ -163,7 +163,7 @@ static ha_rows _ma_record_pos(MARIA_HA *info, const uchar *key_data,
operations with a comment like "Not real duplicates", whatever this
means. From the condition above we can see that 'skip_end_space' is
always false for these operations. The result is that trailing space
- counts in key comparison and hence, emtpy strings ('', string length
+ counts in key comparison and hence, empty strings ('', string length
zero, but not NULL) compare less that strings starting with control
characters and these in turn compare less than strings starting with
blanks.
diff --git a/storage/maria/ma_recovery.c b/storage/maria/ma_recovery.c
index 9d3950d588c..c8031030361 100644
--- a/storage/maria/ma_recovery.c
+++ b/storage/maria/ma_recovery.c
@@ -1412,6 +1412,11 @@ static int new_table(uint16 sid, const char *name, LSN lsn_of_file_id)
}
if (cmp_translog_addr(lsn_of_file_id, share->state.create_rename_lsn) <= 0)
{
+ /*
+ This can happen if the table was dropped and re-created since this
+ redo entry or if the table had a bulk insert directly after create,
+ in which case the create_rename_lsn changed.
+ */
tprint(tracef, ", has create_rename_lsn " LSN_FMT " more recent than"
" LOGREC_FILE_ID's LSN " LSN_FMT ", ignoring open request",
LSN_IN_PARTS(share->state.create_rename_lsn),
@@ -3236,7 +3241,7 @@ static LSN parse_checkpoint_record(LSN lsn)
if ((len= translog_read_record_header(lsn, &rec)) == RECHEADER_READ_ERROR ||
rec.type != LOGREC_CHECKPOINT)
{
- eprint(tracef, "Cannot find checkpoint record at LSN " LSN_FMT "",
+ eprint(tracef, "Cannot find checkpoint record at LSN " LSN_FMT,
LSN_IN_PARTS(lsn));
return LSN_ERROR;
}
@@ -3397,7 +3402,7 @@ static LSN parse_checkpoint_record(LSN lsn)
" LSN " LSN_FMT "\n", LSN_IN_PARTS(minimum_rec_lsn_of_dirty_pages));
set_if_smaller(start_address, minimum_rec_lsn_of_dirty_pages);
DBUG_PRINT("info",
- ("checkpoint_start: " LSN_FMT " start_address: " LSN_FMT "",
+ ("checkpoint_start: " LSN_FMT " start_address: " LSN_FMT,
LSN_IN_PARTS(checkpoint_start), LSN_IN_PARTS(start_address)));
return start_address;
}
@@ -3632,8 +3637,16 @@ my_bool _ma_reenable_logging_for_table(MARIA_HA *info, my_bool flush_pages)
{
/* Ensure that recover is not executing any redo before this */
if (!maria_in_recovery)
+ {
+ if (share->id != 0)
+ {
+ mysql_mutex_lock(&share->intern_lock);
+ translog_deassign_id_from_share(share);
+ mysql_mutex_unlock(&share->intern_lock);
+ }
share->state.is_of_horizon= share->state.create_rename_lsn=
share->state.skip_redo_lsn= translog_get_horizon();
+ }
/*
We are going to change callbacks; if a page is flushed at this moment
this can cause race conditions, that's one reason to flush pages
diff --git a/storage/maria/ma_rkey.c b/storage/maria/ma_rkey.c
index 1100745553f..6abf23108c1 100644
--- a/storage/maria/ma_rkey.c
+++ b/storage/maria/ma_rkey.c
@@ -36,8 +36,8 @@ int maria_rkey(MARIA_HA *info, uchar *buf, int inx, const uchar *key_data,
MARIA_KEY key;
ICP_RESULT icp_res= ICP_MATCH;
DBUG_ENTER("maria_rkey");
- DBUG_PRINT("enter", ("base: 0x%lx buf: 0x%lx inx: %d search_flag: %d",
- (long) info, (long) buf, inx, search_flag));
+ DBUG_PRINT("enter", ("base:%p buf:%p inx: %d search_flag: %d",
+ info, buf, inx, search_flag));
if ((inx = _ma_check_index(info,inx)) < 0)
DBUG_RETURN(my_errno);
diff --git a/storage/maria/ma_rt_index.c b/storage/maria/ma_rt_index.c
index a0fb4741813..a90efc4ca38 100644
--- a/storage/maria/ma_rt_index.c
+++ b/storage/maria/ma_rt_index.c
@@ -105,7 +105,7 @@ static int maria_rtree_find_req(MARIA_HA *info, MARIA_KEYDEF *keyinfo,
level + 1)))
{
case 0: /* found - exit from recursion */
- *saved_key= k - page_buf;
+ *saved_key= (uint) (k - page_buf);
goto ok;
case 1: /* not found - continue searching */
info->maria_rtree_recursion_depth= level;
@@ -140,7 +140,7 @@ static int maria_rtree_find_req(MARIA_HA *info, MARIA_KEYDEF *keyinfo,
memcpy(info->last_key.data, k,
info->last_key.data_length + info->last_key.ref_length);
info->maria_rtree_recursion_depth= level;
- *saved_key= last - page_buf;
+ *saved_key= (uint) (last - page_buf);
if (after_key < last)
{
@@ -366,7 +366,7 @@ static int maria_rtree_get_req(MARIA_HA *info, MARIA_KEYDEF *keyinfo,
_ma_kpos(nod_flag, k), level + 1)))
{
case 0: /* found - exit from recursion */
- *saved_key= k - page.buff;
+ *saved_key= (uint) (k - page.buff);
goto ok;
case 1: /* not found - continue searching */
info->maria_rtree_recursion_depth= level;
@@ -398,7 +398,7 @@ static int maria_rtree_get_req(MARIA_HA *info, MARIA_KEYDEF *keyinfo,
info->last_key.data_length + info->last_key.ref_length);
info->maria_rtree_recursion_depth= level;
- *saved_key= k - page.buff;
+ *saved_key= (uint) (k - page.buff);
if (after_key < last)
{
diff --git a/storage/maria/ma_rt_key.c b/storage/maria/ma_rt_key.c
index 88da78edd01..231bd9ba73b 100644
--- a/storage/maria/ma_rt_key.c
+++ b/storage/maria/ma_rt_key.c
@@ -58,7 +58,7 @@ int maria_rtree_add_key(const MARIA_KEY *key, MARIA_PAGE *page,
page->size+= tot_key_length;
page_store_size(share, page);
if (share->now_transactional &&
- _ma_log_add(page, key_pos - page->buff,
+ _ma_log_add(page, (uint)(key_pos - page->buff),
key_pos, tot_key_length, tot_key_length, 0,
KEY_OP_DEBUG_LOG_ADD_1))
DBUG_RETURN(-1);
diff --git a/storage/maria/ma_rt_split.c b/storage/maria/ma_rt_split.c
index c8004cb52b3..1eb0ffb5b89 100644
--- a/storage/maria/ma_rt_split.c
+++ b/storage/maria/ma_rt_split.c
@@ -308,7 +308,7 @@ static my_bool _ma_log_rt_split(MARIA_PAGE *page,
uint translog_parts, extra_length= 0;
my_off_t page_pos;
DBUG_ENTER("_ma_log_rt_split");
- DBUG_PRINT("enter", ("page: %lu", (ulong) page));
+ DBUG_PRINT("enter", ("page: %p", page));
DBUG_ASSERT(share->now_transactional);
page_pos= page->pos / share->block_size;
@@ -477,11 +477,11 @@ int maria_rtree_split_page(const MARIA_KEY *key, MARIA_PAGE *page,
memcpy(to_with_nod_flag, cur_key_with_nod_flag, full_length);
if (log_this_change)
{
- uint to_with_nod_flag_offs= to_with_nod_flag - page->buff;
+ size_t to_with_nod_flag_offs= to_with_nod_flag - page->buff;
if (likely(cur_key != key->data))
{
/* this memcpy() is internal to the page (source in the page) */
- uint cur_key_with_nod_flag_offs= cur_key_with_nod_flag - page->buff;
+ size_t cur_key_with_nod_flag_offs= cur_key_with_nod_flag - page->buff;
int2store(log_internal_copy_ptr, to_with_nod_flag_offs);
log_internal_copy_ptr+= 2;
int2store(log_internal_copy_ptr, cur_key_with_nod_flag_offs);
@@ -526,8 +526,8 @@ int maria_rtree_split_page(const MARIA_KEY *key, MARIA_PAGE *page,
( /* log change to split page */
_ma_log_rt_split(page, key->data - nod_flag,
full_length, log_internal_copy,
- log_internal_copy_ptr - log_internal_copy,
- log_key_copy, org_length - page->size) ||
+ (uint)(log_internal_copy_ptr - log_internal_copy),
+ log_key_copy, (uint)(org_length - page->size)) ||
/* and to new page */
_ma_log_new(&new_page, 0)))
err_code= -1;
diff --git a/storage/maria/ma_search.c b/storage/maria/ma_search.c
index 0a79343c194..9838c84e95d 100644
--- a/storage/maria/ma_search.c
+++ b/storage/maria/ma_search.c
@@ -380,8 +380,8 @@ int _ma_seq_search(const MARIA_KEY *key, const MARIA_PAGE *ma_page,
{
_ma_set_fatal_error(share, HA_ERR_CRASHED);
DBUG_PRINT("error",
- ("Found wrong key: length: %u page: 0x%lx end: 0x%lx",
- length, (long) page, (long) end));
+ ("Found wrong key: length: %u page: %p end: %p",
+ length, page, end));
DBUG_RETURN(MARIA_FOUND_WRONG_KEY);
}
if ((flag= ha_key_cmp(keyinfo->seg, t_buff, key->data,
@@ -389,15 +389,15 @@ int _ma_seq_search(const MARIA_KEY *key, const MARIA_PAGE *ma_page,
comp_flag | tmp_key.flag,
not_used)) >= 0)
break;
- DBUG_PRINT("loop_extra",("page: 0x%lx key: '%s' flag: %d",
- (long) page, t_buff, flag));
+ DBUG_PRINT("loop_extra",("page:%p key: '%s' flag: %d",
+ page, t_buff, flag));
memcpy(buff,t_buff,length);
*ret_pos=page;
}
if (flag == 0)
memcpy(buff,t_buff,length); /* Result is first key */
*last_key= page == end;
- DBUG_PRINT("exit",("flag: %d ret_pos: 0x%lx", flag, (long) *ret_pos));
+ DBUG_PRINT("exit",("flag: %d ret_pos: %p", flag, *ret_pos));
DBUG_RETURN(flag);
} /* _ma_seq_search */
@@ -555,8 +555,8 @@ int _ma_prefix_search(const MARIA_KEY *key, const MARIA_PAGE *ma_page,
{
_ma_set_fatal_error(share, HA_ERR_CRASHED);
DBUG_PRINT("error",
- ("Found wrong key: length: %u page: 0x%lx end: %lx",
- length, (long) page, (long) end));
+ ("Found wrong key: length: %u page: %p end: %p",
+ length, page, end));
DBUG_RETURN(MARIA_FOUND_WRONG_KEY);
}
@@ -692,7 +692,7 @@ int _ma_prefix_search(const MARIA_KEY *key, const MARIA_PAGE *ma_page,
*last_key= page == end;
- DBUG_PRINT("exit",("flag: %d ret_pos: 0x%lx", flag, (long) *ret_pos));
+ DBUG_PRINT("exit",("flag: %d ret_pos: %p", flag, *ret_pos));
DBUG_RETURN(flag);
} /* _ma_prefix_search */
@@ -1047,8 +1047,8 @@ uint _ma_get_pack_key(MARIA_KEY *int_key, uint page_flag,
if (length > keyseg->length)
{
DBUG_PRINT("error",
- ("Found too long null packed key: %u of %u at 0x%lx",
- length, keyseg->length, (long) *page_pos));
+ ("Found too long null packed key: %u of %u at %p",
+ length, keyseg->length, *page_pos));
DBUG_DUMP("key", *page_pos, 16);
_ma_set_fatal_error(keyinfo->share, HA_ERR_CRASHED);
return 0;
@@ -1104,8 +1104,8 @@ uint _ma_get_pack_key(MARIA_KEY *int_key, uint page_flag,
}
if (length > (uint) keyseg->length)
{
- DBUG_PRINT("error",("Found too long packed key: %u of %u at 0x%lx",
- length, keyseg->length, (long) *page_pos));
+ DBUG_PRINT("error",("Found too long packed key: %u of %u at %p",
+ length, keyseg->length, *page_pos));
DBUG_DUMP("key", *page_pos, 16);
_ma_set_fatal_error(keyinfo->share, HA_ERR_CRASHED);
return 0; /* Error */
@@ -1134,7 +1134,7 @@ uint _ma_get_pack_key(MARIA_KEY *int_key, uint page_flag,
page+=length;
}
- int_key->data_length= (key - int_key->data);
+ int_key->data_length= (uint)(key - int_key->data);
int_key->flag= 0;
length= keyseg->length;
if (page_flag & KEYPAGE_FLAG_HAS_TRANSID)
@@ -1263,8 +1263,8 @@ uint _ma_get_binary_pack_key(MARIA_KEY *int_key, uint page_flag, uint nod_flag,
if (length > keyinfo->maxlength)
{
DBUG_PRINT("error",
- ("Found too long binary packed key: %u of %u at 0x%lx",
- length, keyinfo->maxlength, (long) *page_pos));
+ ("Found too long binary packed key: %u of %u at %p",
+ length, keyinfo->maxlength, *page_pos));
DBUG_DUMP("key", *page_pos, 16);
_ma_set_fatal_error(keyinfo->share, HA_ERR_CRASHED);
DBUG_RETURN(0); /* Wrong key */
@@ -1325,8 +1325,8 @@ uint _ma_get_binary_pack_key(MARIA_KEY *int_key, uint page_flag, uint nod_flag,
from=page; from_end=page_end;
}
DBUG_ASSERT((int) length >= 0);
- DBUG_PRINT("info",("key: 0x%lx from: 0x%lx length: %u",
- (long) key, (long) from, length));
+ DBUG_PRINT("info",("key: %p from: %p length: %u",
+ key, from, length));
memmove(key, from, (size_t) length);
key+=length;
from+=length;
@@ -1336,7 +1336,7 @@ uint _ma_get_binary_pack_key(MARIA_KEY *int_key, uint page_flag, uint nod_flag,
If we have mixed key blocks with data pointer and key block pointer,
we have to copy both.
*/
- int_key->data_length= (key - int_key->data);
+ int_key->data_length= (uint)(key - int_key->data);
int_key->ref_length= length= keyseg->length;
int_key->flag= 0;
if ((tmp=(uint) (from_end-from)) <= length)
@@ -1452,7 +1452,7 @@ uchar *_ma_get_key(MARIA_KEY *key, MARIA_PAGE *ma_page, uchar *keypos)
}
}
}
- DBUG_PRINT("exit",("page: 0x%lx length: %u", (long) page,
+ DBUG_PRINT("exit",("page: %p length: %u", page,
key->data_length + key->ref_length));
DBUG_RETURN(page);
} /* _ma_get_key */
@@ -1522,8 +1522,8 @@ uchar *_ma_get_last_key(MARIA_KEY *key, MARIA_PAGE *ma_page, uchar *endpos)
uchar *lastpos, *page;
MARIA_KEYDEF *keyinfo= key->keyinfo;
DBUG_ENTER("_ma_get_last_key");
- DBUG_PRINT("enter",("page: 0x%lx endpos: 0x%lx", (long) ma_page->buff,
- (long) endpos));
+ DBUG_PRINT("enter",("page: %p endpos: %p", ma_page->buff,
+ endpos));
page_flag= ma_page->flag;
nod_flag= ma_page->node;
@@ -1548,14 +1548,14 @@ uchar *_ma_get_last_key(MARIA_KEY *key, MARIA_PAGE *ma_page, uchar *endpos)
lastpos= page;
if (!(*keyinfo->get_key)(key, page_flag, nod_flag, &page))
{
- DBUG_PRINT("error",("Couldn't find last key: page: 0x%lx",
- (long) page));
+ DBUG_PRINT("error",("Couldn't find last key: page: %p",
+ page));
_ma_set_fatal_error(keyinfo->share, HA_ERR_CRASHED);
DBUG_RETURN(0);
}
}
}
- DBUG_PRINT("exit",("lastpos: 0x%lx length: %u", (ulong) lastpos,
+ DBUG_PRINT("exit",("lastpos: %p length: %u", lastpos,
key->data_length + key->ref_length));
DBUG_RETURN(lastpos);
} /* _ma_get_last_key */
@@ -1654,9 +1654,9 @@ int _ma_search_next(register MARIA_HA *info, MARIA_KEY *key,
MARIA_KEY tmp_key;
MARIA_PAGE page;
DBUG_ENTER("_ma_search_next");
- DBUG_PRINT("enter",("nextflag: %u lastpos: %lu int_keypos: 0x%lx page_changed %d keyread_buff_used: %d",
+ DBUG_PRINT("enter",("nextflag: %u lastpos: %lu int_keypos:%p page_changed %d keyread_buff_used: %d",
nextflag, (ulong) info->cur_row.lastpos,
- (ulong) info->int_keypos,
+ info->int_keypos,
info->page_changed, info->keyread_buff_used));
DBUG_EXECUTE("key", _ma_print_key(DBUG_FILE, key););
@@ -2142,8 +2142,8 @@ _ma_calc_var_pack_key_length(const MARIA_KEY *int_key, uint nod_flag,
ref_length=0;
next_length_pack=0;
}
- DBUG_PRINT("test",("length: %d next_key: 0x%lx", length,
- (long) next_key));
+ DBUG_PRINT("test",("length: %d next_key: %p", length,
+ next_key));
{
uint tmp_length;
diff --git a/storage/maria/ma_servicethread.c b/storage/maria/ma_servicethread.c
index 5f91a4943c5..f5af1725581 100644
--- a/storage/maria/ma_servicethread.c
+++ b/storage/maria/ma_servicethread.c
@@ -31,7 +31,7 @@ int ma_service_thread_control_init(MA_SERVICE_THREAD_CONTROL *control)
{
int res= 0;
DBUG_ENTER("ma_service_thread_control_init");
- DBUG_PRINT("init", ("control 0x%lx", (ulong) control));
+ DBUG_PRINT("init", ("control %p", control));
control->inited= TRUE;
control->killed= FALSE;
res= (mysql_mutex_init(key_SERVICE_THREAD_CONTROL_lock,
@@ -57,7 +57,7 @@ int ma_service_thread_control_init(MA_SERVICE_THREAD_CONTROL *control)
void ma_service_thread_control_end(MA_SERVICE_THREAD_CONTROL *control)
{
DBUG_ENTER("ma_service_thread_control_end");
- DBUG_PRINT("init", ("control 0x%lx", (ulong) control));
+ DBUG_PRINT("init", ("control %p", control));
DBUG_ASSERT(control->inited);
mysql_mutex_lock(control->LOCK_control);
if (!control->killed)
@@ -95,7 +95,7 @@ my_bool my_service_thread_sleep(MA_SERVICE_THREAD_CONTROL *control,
struct timespec abstime;
my_bool res= FALSE;
DBUG_ENTER("my_service_thread_sleep");
- DBUG_PRINT("init", ("control 0x%lx", (ulong) control));
+ DBUG_PRINT("init", ("control %p", control));
mysql_mutex_lock(control->LOCK_control);
if (control->killed)
{
diff --git a/storage/maria/ma_sort.c b/storage/maria/ma_sort.c
index a01824bc259..e68d099576d 100644
--- a/storage/maria/ma_sort.c
+++ b/storage/maria/ma_sort.c
@@ -195,8 +195,10 @@ int _ma_create_index_by_sort(MARIA_SORT_PARAM *info, my_bool no_messages,
while ((maxbuffer= (uint) (records/(keys-1)+1)) != maxbuffer_org);
}
- if ((sort_keys=(uchar**) my_malloc(keys*(sort_length+sizeof(char*))+
- HA_FT_MAXBYTELEN, MYF(0))))
+ if ((sort_keys= ((uchar**)
+ my_malloc((size_t) (keys*(sort_length+sizeof(char*))+
+ HA_FT_MAXBYTELEN),
+ MYF(0)))))
{
if (my_init_dynamic_array(&buffpek, sizeof(BUFFPEK), maxbuffer,
MY_MIN(maxbuffer/2, 1000), MYF(0)))
@@ -428,9 +430,9 @@ static my_bool _ma_thr_find_all_keys_exec(MARIA_SORT_PARAM* sort_param)
while ((maxbuffer= (uint) (idx/(keys-1)+1)) != maxbuffer_org);
}
if ((sort_keys= (uchar **)
- my_malloc(keys*(sort_length+sizeof(char*))+
+ my_malloc((size_t)(keys*(sort_length+sizeof(char*))+
((sort_param->keyinfo->flag & HA_FULLTEXT) ?
- HA_FT_MAXBYTELEN : 0), MYF(0))))
+ HA_FT_MAXBYTELEN : 0)), MYF(0))))
{
if (my_init_dynamic_array(&sort_param->buffpek, sizeof(BUFFPEK),
maxbuffer, MY_MIN(maxbuffer / 2, 1000), MYF(0)))
@@ -498,10 +500,10 @@ static my_bool _ma_thr_find_all_keys_exec(MARIA_SORT_PARAM* sort_param)
(BUFFPEK *) alloc_dynamic(&sort_param->buffpek),
&sort_param->tempfile))
goto err;
- sort_param->keys= (sort_param->buffpek.elements - 1) * (keys - 1) + idx;
+ sort_param->keys= (uint)((sort_param->buffpek.elements - 1) * (keys - 1) + idx);
}
else
- sort_param->keys= idx;
+ sort_param->keys= (uint)idx;
DBUG_RETURN(FALSE);
@@ -625,10 +627,10 @@ int _ma_thr_write_keys(MARIA_SORT_PARAM *sort_param)
uint maxbuffer=sinfo->buffpek.elements-1;
if (!mergebuf)
{
- length=param->sort_buffer_length;
+ length=(size_t)param->sort_buffer_length;
while (length >= MIN_SORT_MEMORY)
{
- if ((mergebuf= my_malloc(length, MYF(0))))
+ if ((mergebuf= my_malloc((size_t) length, MYF(0))))
break;
length=length*3/4;
}
@@ -732,8 +734,8 @@ static int write_keys(MARIA_SORT_PARAM *info, register uchar **sort_keys,
if (!buffpek)
DBUG_RETURN(1); /* Out of memory */
- my_qsort2((uchar*) sort_keys,count,sizeof(uchar*),(qsort2_cmp) info->key_cmp,
- info);
+ my_qsort2((uchar*) sort_keys,(size_t) count, sizeof(uchar*),
+ (qsort2_cmp) info->key_cmp, info);
if (!my_b_inited(tempfile) &&
open_cached_file(tempfile, my_tmpdir(info->tmpdir), "ST",
DISK_BUFFER_SIZE, info->sort_info->param->myf_rw))
@@ -778,8 +780,8 @@ static int write_keys_varlen(MARIA_SORT_PARAM *info,
if (!buffpek)
DBUG_RETURN(1); /* Out of memory */
- my_qsort2((uchar*) sort_keys,count,sizeof(uchar*),(qsort2_cmp) info->key_cmp,
- info);
+ my_qsort2((uchar*) sort_keys, (size_t) count, sizeof(uchar*),
+ (qsort2_cmp) info->key_cmp, info);
if (!my_b_inited(tempfile) &&
open_cached_file(tempfile, my_tmpdir(info->tmpdir), "ST",
DISK_BUFFER_SIZE, info->sort_info->param->myf_rw))
@@ -894,8 +896,6 @@ cleanup:
{
DBUG_ASSERT(t_file2.type == WRITE_CACHE);
*t_file=t_file2; /* Copy result file */
- t_file->current_pos= &t_file->write_pos;
- t_file->current_end= &t_file->write_end;
}
DBUG_RETURN(*maxbuffer >= MERGEBUFF2); /* Return 1 if interrupted */
@@ -919,12 +919,13 @@ static my_off_t read_to_buffer(IO_CACHE *fromfile, BUFFPEK *buffpek,
uint sort_length)
{
register ha_keys count;
- my_off_t length;
+ size_t length;
- if ((count= (ha_keys) MY_MIN((ha_rows) buffpek->max_keys,buffpek->count)))
+ if ((count= (ha_keys) MY_MIN((ha_rows) buffpek->max_keys,
+ (ha_rows) buffpek->count)))
{
if (my_b_pread(fromfile, (uchar*) buffpek->base,
- (length= sort_length * count), buffpek->file_pos))
+ (length= sort_length * (size_t)count), buffpek->file_pos))
return(HA_OFFSET_ERROR); /* purecov: inspected */
buffpek->key=buffpek->base;
buffpek->file_pos+= length; /* New filepos */
@@ -989,7 +990,7 @@ static int write_merge_key(MARIA_SORT_PARAM *info __attribute__((unused)),
IO_CACHE *to_file, uchar *key,
uint sort_length, ha_keys count)
{
- return my_b_write(to_file, key, ((size_t) sort_length) * count);
+ return my_b_write(to_file, key, (size_t) (sort_length * count));
}
/*
@@ -1049,7 +1050,7 @@ merge_buffers(MARIA_SORT_PARAM *info, ha_keys keys, IO_CACHE *from_file,
if (to_file)
{
if (info->write_key(info,to_file, buffpek->key,
- (uint) sort_length,1))
+ sort_length, 1))
goto err; /* purecov: inspected */
}
else
@@ -1066,7 +1067,7 @@ merge_buffers(MARIA_SORT_PARAM *info, ha_keys keys, IO_CACHE *from_file,
if (!(read_length= info->read_to_buffer(from_file,buffpek,sort_length)))
{
uchar *base= buffpek->base;
- uint max_keys=buffpek->max_keys;
+ ha_keys max_keys=buffpek->max_keys;
queue_remove_top(&queue);
diff --git a/storage/maria/ma_sp_key.c b/storage/maria/ma_sp_key.c
index 0dc7fe1fe46..1a9abc989ed 100644
--- a/storage/maria/ma_sp_key.c
+++ b/storage/maria/ma_sp_key.c
@@ -77,7 +77,6 @@ MARIA_KEY *_ma_sp_make_key(MARIA_HA *info, MARIA_KEY *ret_key, uint keynr,
DBUG_ASSERT(keyseg->type == HA_KEYTYPE_DOUBLE);
val= mbr[start / sizeof (double)];
-#ifdef HAVE_ISNAN
if (isnan(val))
{
bzero(key, length);
@@ -85,7 +84,6 @@ MARIA_KEY *_ma_sp_make_key(MARIA_HA *info, MARIA_KEY *ret_key, uint keynr,
len+= length;
continue;
}
-#endif
if (keyseg->flag & HA_SWAP_KEY)
{
diff --git a/storage/maria/ma_state.c b/storage/maria/ma_state.c
index 2d30dbcda95..e90822fe500 100644
--- a/storage/maria/ma_state.c
+++ b/storage/maria/ma_state.c
@@ -87,8 +87,8 @@ my_bool _ma_setup_live_state(MARIA_HA *info)
mysql_mutex_lock(&share->intern_lock);
share->in_trans++;
- DBUG_PRINT("info", ("share: 0x%lx in_trans: %d",
- (ulong) share, share->in_trans));
+ DBUG_PRINT("info", ("share: %p in_trans: %d",
+ share, share->in_trans));
history= share->state_history;
@@ -455,7 +455,7 @@ my_bool _ma_trnman_end_trans_hook(TRN *trn, my_bool commit,
MARIA_USED_TABLES *tables, *next;
DBUG_ENTER("_ma_trnman_end_trans_hook");
DBUG_PRINT("enter", ("trn: %p used_tables: %p", trn, trn->used_tables));
-
+
for (tables= (MARIA_USED_TABLES*) trn->used_tables;
tables;
tables= next)
@@ -529,8 +529,8 @@ my_bool _ma_trnman_end_trans_hook(TRN *trn, my_bool commit,
/* Remove not visible states */
share->state_history= _ma_remove_not_visible_states(history, 0, 1);
}
- DBUG_PRINT("info", ("share: 0x%lx in_trans: %d",
- (ulong) share, share->in_trans));
+ DBUG_PRINT("info", ("share: %p in_trans: %d",
+ share, share->in_trans));
}
}
share->in_trans--;
@@ -572,6 +572,7 @@ void _ma_remove_table_from_trnman(MARIA_HA *info)
TRN *trn= info->trn;
MARIA_USED_TABLES *tables, **prev;
MARIA_HA *handler, **prev_file;
+ uint unlinked= 0;
DBUG_ENTER("_ma_remove_table_from_trnman");
DBUG_PRINT("enter", ("trn: %p used_tables: %p share: %p in_trans: %d",
trn, trn->used_tables, share, share->in_trans));
@@ -580,7 +581,7 @@ void _ma_remove_table_from_trnman(MARIA_HA *info)
if (trn == &dummy_transaction_object)
DBUG_VOID_RETURN;
-
+
/* First remove share from used_tables */
for (prev= (MARIA_USED_TABLES**) (char*) &trn->used_tables;
(tables= *prev);
@@ -594,7 +595,7 @@ void _ma_remove_table_from_trnman(MARIA_HA *info)
break;
}
}
- if (tables != 0)
+ if (!tables)
{
/*
This can only happens in case of rename of intermediate table as
@@ -603,18 +604,21 @@ void _ma_remove_table_from_trnman(MARIA_HA *info)
DBUG_PRINT("warning", ("share: %p where not in used_tables_list", share));
}
- /* unlink table from used_instances */
- for (prev_file= (MARIA_HA**) &trn->used_instances;
- (handler= *prev_file);
- prev_file= &handler->trn_next)
+ /* unlink all instances of the table from used_instances */
+ prev_file= (MARIA_HA**) &trn->used_instances;
+ while ((handler= *prev_file))
{
- if (handler == info)
+ if (handler->s == share)
{
- *prev_file= info->trn_next;
- break;
+ unlinked++;
+ *prev_file= handler->trn_next; /* Remove instance */
}
+ else
+ prev_file= &handler->trn_next; /* Continue with next instance */
}
- if (handler != 0)
+
+ DBUG_PRINT("note", ("unlinked tables: %u", unlinked));
+ if (!unlinked)
{
/*
This can only happens in case of rename of intermediate table as
@@ -796,9 +800,18 @@ void maria_versioning(MARIA_HA *info, my_bool versioning)
void _ma_set_share_data_file_length(MARIA_SHARE *share, ulonglong new_length)
{
- mysql_mutex_lock(&share->intern_lock);
+ if (!share->internal_table)
+ mysql_mutex_lock(&share->intern_lock);
if (share->state.state.data_file_length < new_length)
+ {
share->state.state.data_file_length= new_length;
+ if (new_length >= share->base.max_data_file_length)
+ {
+ /* Give an error on next insert */
+ share->state.changed|= STATE_DATA_FILE_FULL;
+ }
+ }
+ if (!share->internal_table)
mysql_mutex_unlock(&share->intern_lock);
}
diff --git a/storage/maria/ma_unique.c b/storage/maria/ma_unique.c
index 4e098d55f71..397c7804170 100644
--- a/storage/maria/ma_unique.c
+++ b/storage/maria/ma_unique.c
@@ -238,7 +238,7 @@ my_bool _ma_unique_comp(MARIA_UNIQUEDEF *def, const uchar *a, const uchar *b,
type == HA_KEYTYPE_VARTEXT2)
{
if (ha_compare_text(keyseg->charset, pos_a, a_length,
- pos_b, b_length, 0, 1))
+ pos_b, b_length, 0))
return 1;
}
else
diff --git a/storage/maria/ma_write.c b/storage/maria/ma_write.c
index cae8ef897be..126554fffdb 100644
--- a/storage/maria/ma_write.c
+++ b/storage/maria/ma_write.c
@@ -107,9 +107,10 @@ int maria_write(MARIA_HA *info, uchar *record)
if (_ma_readinfo(info,F_WRLCK,1))
DBUG_RETURN(my_errno);
- if (share->base.reloc == (ha_rows) 1 &&
- share->base.records == (ha_rows) 1 &&
- share->state.state.records == (ha_rows) 1)
+ if ((share->state.changed & STATE_DATA_FILE_FULL) ||
+ (share->base.reloc == (ha_rows) 1 &&
+ share->base.records == (ha_rows) 1 &&
+ share->state.state.records == (ha_rows) 1))
{ /* System file */
my_errno=HA_ERR_RECORD_FILE_FULL;
goto err2;
@@ -791,7 +792,7 @@ int _ma_insert(register MARIA_HA *info, MARIA_KEY *key,
MARIA_SHARE *share= info->s;
MARIA_KEYDEF *keyinfo= key->keyinfo;
DBUG_ENTER("_ma_insert");
- DBUG_PRINT("enter",("key_pos: 0x%lx", (ulong) key_pos));
+ DBUG_PRINT("enter",("key_pos:%p", key_pos));
DBUG_EXECUTE("key", _ma_print_key(DBUG_FILE, key););
/*
@@ -817,8 +818,8 @@ int _ma_insert(register MARIA_HA *info, MARIA_KEY *key,
{
DBUG_PRINT("test",("t_length: %d ref_len: %d",
t_length,s_temp.ref_length));
- DBUG_PRINT("test",("n_ref_len: %d n_length: %d key_pos: 0x%lx",
- s_temp.n_ref_length, s_temp.n_length, (long) s_temp.key));
+ DBUG_PRINT("test",("n_ref_len: %d n_length: %d key_pos: %p",
+ s_temp.n_ref_length, s_temp.n_length, s_temp.key));
}
#endif
if (t_length > 0)
@@ -884,7 +885,7 @@ ChangeSet@1.2562, 2008-04-09 07:41:40+02:00, serg@janus.mylan +9 -0
DBUG_ASSERT(info->ft1_to_ft2==0);
if (alen == blen &&
ha_compare_text(keyinfo->seg->charset, a, alen,
- b, blen, 0, 0) == 0)
+ b, blen, 0) == 0)
{
/* Yup. converting */
info->ft1_to_ft2=(DYNAMIC_ARRAY *)
@@ -1131,8 +1132,8 @@ uchar *_ma_find_half_pos(MARIA_KEY *key, MARIA_PAGE *ma_page,
DBUG_RETURN(0);
} while (page < end);
*after_key= page;
- DBUG_PRINT("exit",("returns: 0x%lx page: 0x%lx half: 0x%lx",
- (long) lastpos, (long) page, (long) end));
+ DBUG_PRINT("exit",("returns: %p page: %p half: %p",
+ lastpos, page, end));
DBUG_RETURN(lastpos);
} /* _ma_find_half_pos */
@@ -1214,8 +1215,8 @@ static uchar *_ma_find_last_pos(MARIA_KEY *int_key, MARIA_PAGE *ma_page,
} while (page < end);
*after_key=lastpos;
- DBUG_PRINT("exit",("returns: 0x%lx page: 0x%lx end: 0x%lx",
- (long) prevpos,(long) page,(long) end));
+ DBUG_PRINT("exit",("returns: %p page: %p end: %p",
+ prevpos,page,end));
DBUG_RETURN(prevpos);
} /* _ma_find_last_pos */
diff --git a/storage/maria/maria_chk.c b/storage/maria/maria_chk.c
index 058e864f370..defbe79e32a 100644
--- a/storage/maria/maria_chk.c
+++ b/storage/maria/maria_chk.c
@@ -89,7 +89,7 @@ static int sort_record_index(MARIA_SORT_PARAM *sort_param, MARIA_PAGE *page,
uint sortkey, File new_file,
my_bool update_index);
static my_bool write_log_record(HA_CHECK *param);
-static void my_exit(int exit_code) __attribute__ ((noreturn));
+ATTRIBUTE_NORETURN static void my_exit(int exit_code);
HA_CHECK check_param;
@@ -384,13 +384,13 @@ static struct my_option my_long_options[] =
&check_param.read_buffer_length,
&check_param.read_buffer_length, 0, GET_ULONG, REQUIRED_ARG,
(long) READ_BUFFER_INIT, (long) MALLOC_OVERHEAD,
- (long) ~0L, (long) MALLOC_OVERHEAD, (long) 1L, 0},
+ ~0ULL, (long) MALLOC_OVERHEAD, (long) 1L, 0},
{ "write_buffer_size", OPT_WRITE_BUFFER_SIZE,
"Write buffer size for sequential writes during repair of fixed size or dynamic size rows",
&check_param.write_buffer_length,
&check_param.write_buffer_length, 0, GET_ULONG, REQUIRED_ARG,
(long) READ_BUFFER_INIT, (long) MALLOC_OVERHEAD,
- (long) ~0L, (long) MALLOC_OVERHEAD, (long) 1L, 0},
+ ~0UL, (long) MALLOC_OVERHEAD, (long) 1L, 0},
{ "sort_buffer_size", OPT_SORT_BUFFER_SIZE,
"Size of sort buffer. Used by --recover",
&check_param.sort_buffer_length,
@@ -974,6 +974,7 @@ static int maria_chk(HA_CHECK *param, char *filename)
int error,lock_type,recreate;
uint warning_printed_by_chk_status;
my_bool rep_quick= MY_TEST(param->testflag & (T_QUICK | T_FORCE_UNIQUENESS));
+ my_bool born_transactional;
MARIA_HA *info;
File datafile;
char llbuff[22],llbuff2[22];
@@ -1416,6 +1417,7 @@ static int maria_chk(HA_CHECK *param, char *filename)
maria_lock_database(info, F_UNLCK);
end2:
+ born_transactional= share->base.born_transactional;
if (maria_close(info))
{
_ma_check_print_error(param, default_close_errmsg, my_errno, filename);
@@ -1431,7 +1433,7 @@ end2:
MYF(MY_REDEL_MAKE_BACKUP) : MYF(0)));
}
if (opt_transaction_logging &&
- share->base.born_transactional && !error &&
+ born_transactional && !error &&
(param->testflag & (T_REP_ANY | T_SORT_RECORDS | T_SORT_INDEX |
T_ZEROFILL)))
error= write_log_record(param);
diff --git a/storage/maria/maria_def.h b/storage/maria/maria_def.h
index 0a58a0a1786..cba35aabeb3 100644
--- a/storage/maria/maria_def.h
+++ b/storage/maria/maria_def.h
@@ -178,7 +178,7 @@ typedef struct st_maria_state_info
uint changed; /* Changed since maria_chk */
/**
Birthday of the table: no record in the log before this LSN should ever
- be applied to the table. Updated when created, renamed, explicitely
+ be applied to the table. Updated when created, renamed, explicitly
repaired (REPAIR|OPTIMIZE TABLE, ALTER TABLE ENABLE KEYS, maria_chk).
*/
LSN create_rename_lsn;
@@ -640,6 +640,7 @@ struct st_maria_handler
invalidator_by_filename invalidator; /* query cache invalidator */
ulonglong last_auto_increment; /* auto value at start of statement */
ulonglong row_changes; /* Incremented for each change */
+ ulonglong start_row_changes; /* Row changes since start trans */
ulong this_unique; /* uniq filenumber or thread */
ulong last_unique; /* last unique number */
ulong this_loop; /* counter for this open */
@@ -717,49 +718,50 @@ struct st_maria_handler
#define F_EXTRA_LCK -1
/* bits in opt_flag */
-#define MEMMAP_USED 32
-#define REMEMBER_OLD_POS 64
+#define MEMMAP_USED 32U
+#define REMEMBER_OLD_POS 64U
-#define WRITEINFO_UPDATE_KEYFILE 1
-#define WRITEINFO_NO_UNLOCK 2
+#define WRITEINFO_UPDATE_KEYFILE 1U
+#define WRITEINFO_NO_UNLOCK 2U
/* once_flags */
-#define USE_PACKED_KEYS 1
-#define RRND_PRESERVE_LASTINX 2
+#define USE_PACKED_KEYS 1U
+#define RRND_PRESERVE_LASTINX 2U
/* bits in state.changed */
-#define STATE_CHANGED 1
-#define STATE_CRASHED 2
-#define STATE_CRASHED_ON_REPAIR 4
-#define STATE_NOT_ANALYZED 8
-#define STATE_NOT_OPTIMIZED_KEYS 16
-#define STATE_NOT_SORTED_PAGES 32
-#define STATE_NOT_OPTIMIZED_ROWS 64
-#define STATE_NOT_ZEROFILLED 128
-#define STATE_NOT_MOVABLE 256
-#define STATE_MOVED 512 /* set if base->uuid != maria_uuid */
-#define STATE_IN_REPAIR 1024 /* We are running repair on table */
-#define STATE_CRASHED_PRINTED 2048
+#define STATE_CHANGED 1U
+#define STATE_CRASHED 2U
+#define STATE_CRASHED_ON_REPAIR 4U
+#define STATE_NOT_ANALYZED 8U
+#define STATE_NOT_OPTIMIZED_KEYS 16U
+#define STATE_NOT_SORTED_PAGES 32U
+#define STATE_NOT_OPTIMIZED_ROWS 64U
+#define STATE_NOT_ZEROFILLED 128U
+#define STATE_NOT_MOVABLE 256U
+#define STATE_MOVED 512U /* set if base->uuid != maria_uuid */
+#define STATE_IN_REPAIR 1024U /* We are running repair on table */
+#define STATE_CRASHED_PRINTED 2048U
+#define STATE_DATA_FILE_FULL 4096U
#define STATE_CRASHED_FLAGS (STATE_CRASHED | STATE_CRASHED_ON_REPAIR | STATE_CRASHED_PRINTED)
/* options to maria_read_cache */
-#define READING_NEXT 1
-#define READING_HEADER 2
+#define READING_NEXT 1U
+#define READING_HEADER 2U
/* Number of bytes on key pages to indicate used size */
-#define KEYPAGE_USED_SIZE 2
-#define KEYPAGE_KEYID_SIZE 1
-#define KEYPAGE_FLAG_SIZE 1
-#define KEYPAGE_KEY_VERSION_SIZE 4 /* encryption */
-#define KEYPAGE_CHECKSUM_SIZE 4
+#define KEYPAGE_USED_SIZE 2U
+#define KEYPAGE_KEYID_SIZE 1U
+#define KEYPAGE_FLAG_SIZE 1U
+#define KEYPAGE_KEY_VERSION_SIZE 4U /* encryption */
+#define KEYPAGE_CHECKSUM_SIZE 4U
#define MAX_KEYPAGE_HEADER_SIZE (LSN_STORE_SIZE + KEYPAGE_USED_SIZE + \
KEYPAGE_KEYID_SIZE + KEYPAGE_FLAG_SIZE + \
TRANSID_SIZE + KEYPAGE_KEY_VERSION_SIZE)
-#define KEYPAGE_FLAG_ISNOD 1
-#define KEYPAGE_FLAG_HAS_TRANSID 2
+#define KEYPAGE_FLAG_ISNOD 1U
+#define KEYPAGE_FLAG_HAS_TRANSID 2U
#define _ma_get_page_used(share,x) \
((uint) mi_uint2korr((x) + (share)->keypage_header - KEYPAGE_USED_SIZE))
@@ -880,10 +882,10 @@ struct st_maria_handler
#define MEMMAP_EXTRA_MARGIN 7 /* Write this as a suffix for file */
-#define PACK_TYPE_SELECTED 1 /* Bits in field->pack_type */
-#define PACK_TYPE_SPACE_FIELDS 2
-#define PACK_TYPE_ZERO_FILL 4
-#define MARIA_FOUND_WRONG_KEY 32768 /* Impossible value from ha_key_cmp */
+#define PACK_TYPE_SELECTED 1U /* Bits in field->pack_type */
+#define PACK_TYPE_SPACE_FIELDS 2U
+#define PACK_TYPE_ZERO_FILL 4U
+#define MARIA_FOUND_WRONG_KEY 32768U /* Impossible value from ha_key_cmp */
#define MARIA_BLOCK_SIZE(key_length,data_pointer,key_pointer,block_size) (((((key_length)+(data_pointer)+(key_pointer))*4+(key_pointer)+2)/(block_size)+1)*(block_size))
#define MARIA_MAX_KEYPTR_SIZE 5 /* For calculating block lengths */
@@ -905,12 +907,12 @@ extern mysql_mutex_t THR_LOCK_maria;
/* Some tuning parameters */
#define MARIA_MIN_KEYBLOCK_LENGTH 50 /* When to split delete blocks */
-#define MARIA_MIN_SIZE_BULK_INSERT_TREE 16384 /* this is per key */
+#define MARIA_MIN_SIZE_BULK_INSERT_TREE 16384U /* this is per key */
#define MARIA_MIN_ROWS_TO_USE_BULK_INSERT 100
#define MARIA_MIN_ROWS_TO_DISABLE_INDEXES 100
#define MARIA_MIN_ROWS_TO_USE_WRITE_CACHE 10
/* Keep a small buffer for tables only using small blobs */
-#define MARIA_SMALL_BLOB_BUFFER 1024
+#define MARIA_SMALL_BLOB_BUFFER 1024U
#define MARIA_MAX_CONTROL_FILE_LOCK_RETRY 30 /* Retry this many times */
/* Some extern variables */
@@ -1241,12 +1243,12 @@ typedef struct st_maria_block_info
/* bits in return from _ma_get_block_info */
-#define BLOCK_FIRST 1
-#define BLOCK_LAST 2
-#define BLOCK_DELETED 4
-#define BLOCK_ERROR 8 /* Wrong data */
-#define BLOCK_SYNC_ERROR 16 /* Right data at wrong place */
-#define BLOCK_FATAL_ERROR 32 /* hardware-error */
+#define BLOCK_FIRST 1U
+#define BLOCK_LAST 2U
+#define BLOCK_DELETED 4U
+#define BLOCK_ERROR 8U /* Wrong data */
+#define BLOCK_SYNC_ERROR 16U /* Right data at wrong place */
+#define BLOCK_FATAL_ERROR 32U /* hardware-error */
#define NEED_MEM ((uint) 10*4*(IO_SIZE+32)+32) /* Nead for recursion */
#define MAXERR 20
@@ -1255,17 +1257,17 @@ typedef struct st_maria_block_info
#define INDEX_TMP_EXT ".TMM"
#define DATA_TMP_EXT ".TMD"
-#define UPDATE_TIME 1
-#define UPDATE_STAT 2
-#define UPDATE_SORT 4
-#define UPDATE_AUTO_INC 8
-#define UPDATE_OPEN_COUNT 16
+#define UPDATE_TIME 1U
+#define UPDATE_STAT 2U
+#define UPDATE_SORT 4U
+#define UPDATE_AUTO_INC 8U
+#define UPDATE_OPEN_COUNT 16U
/* We use MY_ALIGN_DOWN here mainly to ensure that we get stable values for mysqld --help ) */
#define PAGE_BUFFER_INIT MY_ALIGN_DOWN(1024L*1024L*256L-MALLOC_OVERHEAD, 8192)
#define READ_BUFFER_INIT MY_ALIGN_DOWN(1024L*256L-MALLOC_OVERHEAD, 1024)
#define SORT_BUFFER_INIT MY_ALIGN_DOWN(1024L*1024L*256L-MALLOC_OVERHEAD, 1024)
-#define MIN_SORT_BUFFER 4096
+#define MIN_SORT_BUFFER 4096U
#define fast_ma_writeinfo(INFO) if (!(INFO)->s->tot_locks) (void) _ma_writeinfo((INFO),0)
#define fast_ma_readinfo(INFO) ((INFO)->lock_type == F_UNLCK) && _ma_readinfo((INFO),F_RDLCK,1)
@@ -1430,3 +1432,11 @@ extern my_bool ma_yield_and_check_if_killed(MARIA_HA *info, int inx);
extern my_bool ma_killed_standalone(MARIA_HA *);
extern uint _ma_file_callback_to_id(void *callback_data);
+
+static inline void unmap_file(MARIA_HA *info __attribute__((unused)))
+{
+#ifdef HAVE_MMAP
+ if (info->s->file_map)
+ _ma_unmap_file(info);
+#endif
+}
diff --git a/storage/maria/maria_pack.c b/storage/maria/maria_pack.c
index 5166ae63758..6f2d9fe8a5d 100644
--- a/storage/maria/maria_pack.c
+++ b/storage/maria/maria_pack.c
@@ -782,27 +782,29 @@ static HUFF_COUNTS *init_huff_count(MARIA_HA *info,my_off_t records)
for (i=0 ; i < info->s->base.fields ; i++)
{
enum en_fieldtype type;
- count[i].field_length=info->s->columndef[i].length;
- type= count[i].field_type= (enum en_fieldtype) info->s->columndef[i].type;
+ uint col_nr = info->s->columndef[i].column_nr;
+ count[col_nr].field_length=info->s->columndef[i].length;
+ type= count[col_nr].field_type=
+ (enum en_fieldtype) info->s->columndef[i].type;
if (type == FIELD_INTERVALL ||
type == FIELD_CONSTANT ||
type == FIELD_ZERO)
type = FIELD_NORMAL;
- if (count[i].field_length <= 8 &&
+ if (count[col_nr].field_length <= 8 &&
(type == FIELD_NORMAL ||
type == FIELD_SKIP_ZERO))
- count[i].max_zero_fill= count[i].field_length;
+ count[col_nr].max_zero_fill= count[col_nr].field_length;
/*
For every column initialize a tree, which is used to detect distinct
column values. 'int_tree' works together with 'tree_buff' and
'tree_pos'. It's keys are implemented by pointers into 'tree_buff'.
This is accomplished by '-1' as the element size.
*/
- init_tree(&count[i].int_tree,0,0,-1,(qsort_cmp2) compare_tree, NULL,
+ init_tree(&count[col_nr].int_tree,0,0,-1,(qsort_cmp2) compare_tree, NULL,
NULL, MYF(0));
if (records && type != FIELD_BLOB && type != FIELD_VARCHAR)
- count[i].tree_pos=count[i].tree_buff =
- my_malloc(count[i].field_length > 1 ? tree_buff_length : 2,
+ count[col_nr].tree_pos=count[col_nr].tree_buff =
+ my_malloc(count[col_nr].field_length > 1 ? tree_buff_length : 2,
MYF(MY_WME));
}
}
diff --git a/storage/maria/maria_read_log.c b/storage/maria/maria_read_log.c
index 147d4223997..d9b5be30105 100644
--- a/storage/maria/maria_read_log.c
+++ b/storage/maria/maria_read_log.c
@@ -70,7 +70,7 @@ int main(int argc, char **argv)
fprintf(stderr, "Can't find any log\n");
goto err;
}
- if (init_pagecache(maria_pagecache, opt_page_buffer_size, 0, 0,
+ if (init_pagecache(maria_pagecache, (size_t)opt_page_buffer_size, 0, 0,
maria_block_size, 0, MY_WME) == 0)
{
fprintf(stderr, "Got error in init_pagecache() (errno: %d)\n", errno);
diff --git a/storage/maria/unittest/sequence_storage.c b/storage/maria/unittest/sequence_storage.c
index 1953304e6a1..c6c8caefca1 100644
--- a/storage/maria/unittest/sequence_storage.c
+++ b/storage/maria/unittest/sequence_storage.c
@@ -80,7 +80,7 @@ void seq_storage_destroy(SEQ_STORAGE *seq)
/**
- @brief Starts the sequence from begining
+ @brief Starts the sequence from beginning
@param seq Reference on the sequence storage.
*/
diff --git a/storage/mroonga/CMakeLists.txt b/storage/mroonga/CMakeLists.txt
index ba6c1279c59..9af111baf56 100644
--- a/storage/mroonga/CMakeLists.txt
+++ b/storage/mroonga/CMakeLists.txt
@@ -65,7 +65,7 @@ set(MRN_BUNDLED_GROONGA_DIR
if(EXISTS "${MRN_BUNDLED_GROONGA_DIR}")
set(MRN_GROONGA_BUNDLED TRUE)
if(MSVC)
- message(STATUS "Bundled Mroonga does not support MSVC yet")
+ # Bundled Mroonga does not support MSVC yet
return()
endif()
else()
diff --git a/storage/mroonga/data/install.sql.in b/storage/mroonga/data/install.sql.in
index d7d5f3c4ad6..0a2f308aef4 100644
--- a/storage/mroonga/data/install.sql.in
+++ b/storage/mroonga/data/install.sql.in
@@ -1,6 +1,6 @@
-DELETE IGNORE FROM mysql.plugin WHERE dl = 'ha_mroonga@MRN_PLUGIN_SUFFIX@';
-
-INSTALL PLUGIN Mroonga SONAME 'ha_mroonga@MRN_PLUGIN_SUFFIX@';
+SET @inst=IF(EXISTS(SELECT * FROM mysql.plugin WHERE NAME='mroonga'),'DO 1', "INSTALL PLUGIN mroonga SONAME 'ha_mroonga'");
+PREPARE s FROM @inst;
+EXECUTE s;
DROP FUNCTION IF EXISTS last_insert_grn_id;
CREATE FUNCTION last_insert_grn_id RETURNS INTEGER
diff --git a/storage/mroonga/ha_mroonga.cpp b/storage/mroonga/ha_mroonga.cpp
index cd6dc1a3d55..06ff71c1b5a 100644
--- a/storage/mroonga/ha_mroonga.cpp
+++ b/storage/mroonga/ha_mroonga.cpp
@@ -538,6 +538,15 @@ static const char *mrn_inspect_extra_function(enum ha_extra_function operation)
case HA_EXTRA_DETACH_CHILDREN:
inspected = "HA_EXTRA_DETACH_CHILDREN";
break;
+ case HA_EXTRA_BEGIN_ALTER_COPY:
+ inspected = "HA_EXTRA_BEGIN_ALTER_COPY";
+ break;
+ case HA_EXTRA_END_ALTER_COPY:
+ inspected = "HA_EXTRA_END_ALTER_COPY";
+ break;
+ case HA_EXTRA_FAKE_START_STMT:
+ inspected = "HA_EXTRA_FAKE_START_STMT";
+ break;
#ifdef MRN_HAVE_HA_EXTRA_EXPORT
case HA_EXTRA_EXPORT:
inspected = "HA_EXTRA_EXPORT";
@@ -1284,7 +1293,8 @@ static ST_FIELD_INFO i_s_mrn_stats_fields_info[] =
0,
"Rows read from Groonga",
SKIP_OPEN_TABLE
- }
+ },
+ { 0, 0, MYSQL_TYPE_NULL, 0, 0, 0, 0}
};
static int i_s_mrn_stats_deinit(void* p)
@@ -4716,11 +4726,8 @@ int ha_mroonga::storage_open_columns(void)
if (table_share->blob_fields)
{
- if (blob_buffers)
- {
- delete [] blob_buffers;
- }
- if (!(blob_buffers = new String[n_columns]))
+ DBUG_ASSERT(!blob_buffers);
+ if (!(blob_buffers = new (&table->mem_root) String[n_columns]))
{
DBUG_RETURN(HA_ERR_OUT_OF_MEM);
}
@@ -8987,10 +8994,12 @@ bool ha_mroonga::is_foreign_key_field(const char *table_name,
grn_obj *range = grn_ctx_at(ctx, grn_obj_get_range(ctx, column));
if (!range) {
+ grn_obj_unlink(ctx, column);
DBUG_RETURN(false);
}
if (!mrn::grn::is_table(range)) {
+ grn_obj_unlink(ctx, column);
DBUG_RETURN(false);
}
@@ -9004,6 +9013,7 @@ bool ha_mroonga::is_foreign_key_field(const char *table_name,
DBUG_RETURN(true);
}
+ grn_obj_unlink(ctx, column);
DBUG_RETURN(false);
}
@@ -12852,13 +12862,22 @@ int ha_mroonga::delete_all_rows()
int ha_mroonga::wrapper_truncate()
{
int error = 0;
+ MRN_SHARE *tmp_share;
MRN_DBUG_ENTER_METHOD();
+
+ if (!(tmp_share = mrn_get_share(table->s->table_name.str, table, &error)))
+ DBUG_RETURN(error);
+
MRN_SET_WRAP_SHARE_KEY(share, table->s);
MRN_SET_WRAP_TABLE_KEY(this, table);
- error = wrap_handler->ha_truncate();
+ error = parse_engine_table_options(ha_thd(), tmp_share->hton, table->s)
+ ? MRN_GET_ERROR_NUMBER
+ : wrap_handler->ha_truncate();
MRN_SET_BASE_SHARE_KEY(share, table->s);
MRN_SET_BASE_TABLE_KEY(this, table);
+ mrn_free_share(tmp_share);
+
if (!error && wrapper_have_target_index()) {
error = wrapper_truncate_index();
}
@@ -14506,6 +14525,7 @@ enum_alter_inplace_result ha_mroonga::wrapper_check_if_supported_inplace_alter(
Alter_inplace_info::ALTER_COLUMN_NULLABLE |
Alter_inplace_info::ALTER_COLUMN_NOT_NULLABLE |
Alter_inplace_info::ALTER_COLUMN_STORAGE_TYPE |
+ Alter_inplace_info::ADD_STORED_GENERATED_COLUMN |
Alter_inplace_info::ALTER_COLUMN_COLUMN_FORMAT
)
)
@@ -14624,7 +14644,6 @@ enum_alter_inplace_result ha_mroonga::storage_check_if_supported_inplace_alter(
Alter_inplace_info::DROP_UNIQUE_INDEX |
MRN_ALTER_INPLACE_INFO_ADD_VIRTUAL_COLUMN |
MRN_ALTER_INPLACE_INFO_ADD_STORED_BASE_COLUMN |
- MRN_ALTER_INPLACE_INFO_ADD_STORED_GENERATED_COLUMN |
Alter_inplace_info::DROP_COLUMN |
Alter_inplace_info::ALTER_COLUMN_NAME;
if (ha_alter_info->handler_flags & explicitly_unsupported_flags) {
diff --git a/storage/mroonga/ha_mroonga.hpp b/storage/mroonga/ha_mroonga.hpp
index 29ee48afe23..0494cade0af 100644
--- a/storage/mroonga/ha_mroonga.hpp
+++ b/storage/mroonga/ha_mroonga.hpp
@@ -407,11 +407,11 @@ public:
ha_mroonga(handlerton *hton, TABLE_SHARE *share_arg);
~ha_mroonga();
const char *table_type() const; // required
- const char *index_type(uint inx);
+ const char *index_type(uint inx) mrn_override;
const char **bas_ext() const; // required
- ulonglong table_flags() const; // required
- ulong index_flags(uint idx, uint part, bool all_parts) const; // required
+ ulonglong table_flags() const mrn_override; // required
+ ulong index_flags(uint idx, uint part, bool all_parts) const mrn_override; // required
// required
int create(const char *name, TABLE *form, HA_CREATE_INFO *info
@@ -430,40 +430,40 @@ public:
#ifndef MRN_HANDLER_HAVE_HA_CLOSE
int close(); // required
#endif
- int info(uint flag); // required
+ int info(uint flag) mrn_override; // required
- uint lock_count() const;
+ uint lock_count() const mrn_override;
THR_LOCK_DATA **store_lock(THD *thd, // required
THR_LOCK_DATA **to,
- enum thr_lock_type lock_type);
- int external_lock(THD *thd, int lock_type);
+ enum thr_lock_type lock_type) mrn_override;
+ int external_lock(THD *thd, int lock_type) mrn_override;
- int rnd_init(bool scan); // required
- int rnd_end();
+ int rnd_init(bool scan) mrn_override; // required
+ int rnd_end() mrn_override;
#ifndef MRN_HANDLER_HAVE_HA_RND_NEXT
int rnd_next(uchar *buf); // required
#endif
#ifndef MRN_HANDLER_HAVE_HA_RND_POS
int rnd_pos(uchar *buf, uchar *pos); // required
#endif
- void position(const uchar *record); // required
- int extra(enum ha_extra_function operation);
- int extra_opt(enum ha_extra_function operation, ulong cache_size);
+ void position(const uchar *record) mrn_override; // required
+ int extra(enum ha_extra_function operation) mrn_override;
+ int extra_opt(enum ha_extra_function operation, ulong cache_size) mrn_override;
- int delete_table(const char *name);
- int write_row(uchar *buf);
- int update_row(const uchar *old_data, uchar *new_data);
- int delete_row(const uchar *buf);
+ int delete_table(const char *name) mrn_override;
+ int write_row(uchar *buf) mrn_override;
+ int update_row(const uchar *old_data, uchar *new_data) mrn_override;
+ int delete_row(const uchar *buf) mrn_override;
- uint max_supported_record_length() const;
- uint max_supported_keys() const;
- uint max_supported_key_parts() const;
- uint max_supported_key_length() const;
- uint max_supported_key_part_length() const;
+ uint max_supported_record_length() const mrn_override;
+ uint max_supported_keys() const mrn_override;
+ uint max_supported_key_parts() const mrn_override;
+ uint max_supported_key_length() const mrn_override;
+ uint max_supported_key_part_length() const mrn_override;
- ha_rows records_in_range(uint inx, key_range *min_key, key_range *max_key);
- int index_init(uint idx, bool sorted);
- int index_end();
+ ha_rows records_in_range(uint inx, key_range *min_key, key_range *max_key) mrn_override;
+ int index_init(uint idx, bool sorted) mrn_override;
+ int index_end() mrn_override;
#ifndef MRN_HANDLER_HAVE_HA_INDEX_READ_MAP
int index_read_map(uchar * buf, const uchar * key,
key_part_map keypart_map,
@@ -485,35 +485,35 @@ public:
#ifndef MRN_HANDLER_HAVE_HA_INDEX_LAST
int index_last(uchar *buf);
#endif
- int index_next_same(uchar *buf, const uchar *key, uint keylen);
+ int index_next_same(uchar *buf, const uchar *key, uint keylen) mrn_override;
- int ft_init();
- FT_INFO *ft_init_ext(uint flags, uint inx, String *key);
- int ft_read(uchar *buf);
+ int ft_init() mrn_override;
+ FT_INFO *ft_init_ext(uint flags, uint inx, String *key) mrn_override;
+ int ft_read(uchar *buf) mrn_override;
- const Item *cond_push(const Item *cond);
- void cond_pop();
+ const Item *cond_push(const Item *cond) mrn_override;
+ void cond_pop() mrn_override;
- bool get_error_message(int error, String *buf);
+ bool get_error_message(int error, String *buf) mrn_override;
- int reset();
+ int reset() mrn_override;
- handler *clone(const char *name, MEM_ROOT *mem_root);
- uint8 table_cache_type();
+ handler *clone(const char *name, MEM_ROOT *mem_root) mrn_override;
+ uint8 table_cache_type() mrn_override;
#ifdef MRN_HANDLER_HAVE_MULTI_RANGE_READ
ha_rows multi_range_read_info_const(uint keyno, RANGE_SEQ_IF *seq,
void *seq_init_param,
uint n_ranges, uint *bufsz,
- uint *flags, Cost_estimate *cost);
+ uint *flags, Cost_estimate *cost) mrn_override;
ha_rows multi_range_read_info(uint keyno, uint n_ranges, uint keys,
#ifdef MRN_HANDLER_HAVE_MULTI_RANGE_READ_INFO_KEY_PARTS
uint key_parts,
#endif
- uint *bufsz, uint *flags, Cost_estimate *cost);
+ uint *bufsz, uint *flags, Cost_estimate *cost) mrn_override;
int multi_range_read_init(RANGE_SEQ_IF *seq, void *seq_init_param,
uint n_ranges, uint mode,
- HANDLER_BUFFER *buf);
- int multi_range_read_next(range_id_t *range_info);
+ HANDLER_BUFFER *buf) mrn_override;
+ int multi_range_read_next(range_id_t *range_info) mrn_override;
#else // MRN_HANDLER_HAVE_MULTI_RANGE_READ
int read_multi_range_first(KEY_MULTI_RANGE **found_range_p,
KEY_MULTI_RANGE *ranges,
@@ -523,38 +523,38 @@ public:
int read_multi_range_next(KEY_MULTI_RANGE **found_range_p);
#endif // MRN_HANDLER_HAVE_MULTI_RANGE_READ
#ifdef MRN_HANDLER_START_BULK_INSERT_HAS_FLAGS
- void start_bulk_insert(ha_rows rows, uint flags);
+ void start_bulk_insert(ha_rows rows, uint flags) mrn_override;
#else
void start_bulk_insert(ha_rows rows);
#endif
- int end_bulk_insert();
- int delete_all_rows();
- int truncate();
- double scan_time();
- double read_time(uint index, uint ranges, ha_rows rows);
+ int end_bulk_insert() mrn_override;
+ int delete_all_rows() mrn_override;
+ int truncate() mrn_override;
+ double scan_time() mrn_override;
+ double read_time(uint index, uint ranges, ha_rows rows) mrn_override;
#ifdef MRN_HANDLER_HAVE_KEYS_TO_USE_FOR_SCANNING
- const key_map *keys_to_use_for_scanning();
+ const key_map *keys_to_use_for_scanning() mrn_override;
#endif
- ha_rows estimate_rows_upper_bound();
- void update_create_info(HA_CREATE_INFO* create_info);
- int rename_table(const char *from, const char *to);
- bool is_crashed() const;
- bool auto_repair(int error) const;
+ ha_rows estimate_rows_upper_bound() mrn_override;
+ void update_create_info(HA_CREATE_INFO* create_info) mrn_override;
+ int rename_table(const char *from, const char *to) mrn_override;
+ bool is_crashed() const mrn_override;
+ bool auto_repair(int error) const mrn_override;
bool auto_repair() const;
- int disable_indexes(uint mode);
- int enable_indexes(uint mode);
- int check(THD* thd, HA_CHECK_OPT* check_opt);
- int repair(THD* thd, HA_CHECK_OPT* check_opt);
- bool check_and_repair(THD *thd);
- int analyze(THD* thd, HA_CHECK_OPT* check_opt);
- int optimize(THD* thd, HA_CHECK_OPT* check_opt);
- bool is_fatal_error(int error_num, uint flags=0);
+ int disable_indexes(uint mode) mrn_override;
+ int enable_indexes(uint mode) mrn_override;
+ int check(THD* thd, HA_CHECK_OPT* check_opt) mrn_override;
+ int repair(THD* thd, HA_CHECK_OPT* check_opt) mrn_override;
+ bool check_and_repair(THD *thd) mrn_override;
+ int analyze(THD* thd, HA_CHECK_OPT* check_opt) mrn_override;
+ int optimize(THD* thd, HA_CHECK_OPT* check_opt) mrn_override;
+ bool is_fatal_error(int error_num, uint flags=0) mrn_override;
bool check_if_incompatible_data(HA_CREATE_INFO *create_info,
- uint table_changes);
+ uint table_changes) mrn_override;
#ifdef MRN_HANDLER_HAVE_CHECK_IF_SUPPORTED_INPLACE_ALTER
enum_alter_inplace_result
check_if_supported_inplace_alter(TABLE *altered_table,
- Alter_inplace_info *ha_alter_info);
+ Alter_inplace_info *ha_alter_info) mrn_override;
#else
uint alter_table_flags(uint flags);
# ifdef MRN_HANDLER_HAVE_FINAL_ADD_INDEX
@@ -570,78 +570,78 @@ public:
int update_auto_increment();
void set_next_insert_id(ulonglong id);
void get_auto_increment(ulonglong offset, ulonglong increment, ulonglong nb_desired_values,
- ulonglong *first_value, ulonglong *nb_reserved_values);
+ ulonglong *first_value, ulonglong *nb_reserved_values) mrn_override;
void restore_auto_increment(ulonglong prev_insert_id);
- void release_auto_increment();
- int check_for_upgrade(HA_CHECK_OPT *check_opt);
+ void release_auto_increment() mrn_override;
+ int check_for_upgrade(HA_CHECK_OPT *check_opt) mrn_override;
#ifdef MRN_HANDLER_HAVE_RESET_AUTO_INCREMENT
- int reset_auto_increment(ulonglong value);
+ int reset_auto_increment(ulonglong value) mrn_override;
#endif
- bool was_semi_consistent_read();
- void try_semi_consistent_read(bool yes);
- void unlock_row();
- int start_stmt(THD *thd, thr_lock_type lock_type);
+ bool was_semi_consistent_read() mrn_override;
+ void try_semi_consistent_read(bool yes) mrn_override;
+ void unlock_row() mrn_override;
+ int start_stmt(THD *thd, thr_lock_type lock_type) mrn_override;
protected:
#ifdef MRN_HANDLER_RECORDS_RETURN_ERROR
int records(ha_rows *num_rows);
#else
- ha_rows records();
+ ha_rows records() mrn_override;
#endif
#ifdef MRN_HANDLER_HAVE_HA_RND_NEXT
- int rnd_next(uchar *buf);
+ int rnd_next(uchar *buf) mrn_override;
#endif
#ifdef MRN_HANDLER_HAVE_HA_RND_POS
- int rnd_pos(uchar *buf, uchar *pos);
+ int rnd_pos(uchar *buf, uchar *pos) mrn_override;
#endif
#ifdef MRN_HANDLER_HAVE_HA_INDEX_READ_MAP
int index_read_map(uchar *buf, const uchar *key,
key_part_map keypart_map,
- enum ha_rkey_function find_flag);
+ enum ha_rkey_function find_flag) mrn_override;
#endif
#ifdef MRN_HANDLER_HAVE_HA_INDEX_NEXT
- int index_next(uchar *buf);
+ int index_next(uchar *buf) mrn_override;
#endif
#ifdef MRN_HANDLER_HAVE_HA_INDEX_PREV
- int index_prev(uchar *buf);
+ int index_prev(uchar *buf) mrn_override;
#endif
#ifdef MRN_HANDLER_HAVE_HA_INDEX_FIRST
- int index_first(uchar *buf);
+ int index_first(uchar *buf) mrn_override;
#endif
#ifdef MRN_HANDLER_HAVE_HA_INDEX_LAST
- int index_last(uchar *buf);
+ int index_last(uchar *buf) mrn_override;
#endif
- void change_table_ptr(TABLE *table_arg, TABLE_SHARE *share_arg);
- bool primary_key_is_clustered();
- bool is_fk_defined_on_table_or_index(uint index);
- char *get_foreign_key_create_info();
+ void change_table_ptr(TABLE *table_arg, TABLE_SHARE *share_arg) mrn_override;
+ bool primary_key_is_clustered() mrn_override;
+ bool is_fk_defined_on_table_or_index(uint index) mrn_override;
+ char *get_foreign_key_create_info() mrn_override;
#ifdef MRN_HANDLER_HAVE_GET_TABLESPACE_NAME
char *get_tablespace_name(THD *thd, char *name, uint name_len);
#endif
- bool can_switch_engines();
- int get_foreign_key_list(THD *thd, List<FOREIGN_KEY_INFO> *f_key_list);
- int get_parent_foreign_key_list(THD *thd, List<FOREIGN_KEY_INFO> *f_key_list);
- uint referenced_by_foreign_key();
- void init_table_handle_for_HANDLER();
- void free_foreign_key_create_info(char* str);
+ bool can_switch_engines() mrn_override;
+ int get_foreign_key_list(THD *thd, List<FOREIGN_KEY_INFO> *f_key_list) mrn_override;
+ int get_parent_foreign_key_list(THD *thd, List<FOREIGN_KEY_INFO> *f_key_list) mrn_override;
+ uint referenced_by_foreign_key() mrn_override;
+ void init_table_handle_for_HANDLER() mrn_override;
+ void free_foreign_key_create_info(char* str) mrn_override;
#ifdef MRN_HAVE_HA_REBIND_PSI
- void unbind_psi();
- void rebind_psi();
+ void unbind_psi() mrn_override;
+ void rebind_psi() mrn_override;
#endif
my_bool register_query_cache_table(THD *thd,
char *table_key,
uint key_length,
qc_engine_callback *engine_callback,
- ulonglong *engine_data);
+ ulonglong *engine_data) mrn_override;
#ifdef MRN_HANDLER_HAVE_CHECK_IF_SUPPORTED_INPLACE_ALTER
bool prepare_inplace_alter_table(TABLE *altered_table,
- Alter_inplace_info *ha_alter_info);
+ Alter_inplace_info *ha_alter_info) mrn_override;
bool inplace_alter_table(TABLE *altered_table,
- Alter_inplace_info *ha_alter_info);
+ Alter_inplace_info *ha_alter_info) mrn_override;
bool commit_inplace_alter_table(TABLE *altered_table,
Alter_inplace_info *ha_alter_info,
- bool commit);
- void notify_table_changed();
+ bool commit) mrn_override;
+ void notify_table_changed() mrn_override;
#endif
private:
@@ -692,7 +692,7 @@ private:
int generic_geo_open_cursor(const uchar *key, enum ha_rkey_function find_flag);
#ifdef MRN_HANDLER_HAVE_HA_CLOSE
- int close();
+ int close() mrn_override;
#endif
bool is_dry_write();
bool is_enable_optimization();
diff --git a/storage/mroonga/mrn_table.cpp b/storage/mroonga/mrn_table.cpp
index 8653092e45f..dfa28222de5 100644
--- a/storage/mroonga/mrn_table.cpp
+++ b/storage/mroonga/mrn_table.cpp
@@ -1038,10 +1038,7 @@ TABLE_SHARE *mrn_get_table_share(TABLE_LIST *table_list, int *error)
share = get_table_share(thd, table_list, key, key_length, 0, error,
hash_value);
# elif defined(MRN_HAVE_TDC_ACQUIRE_SHARE)
- share = tdc_acquire_share(thd, table_list->db, table_list->table_name, key,
- key_length,
- table_list->mdl_request.key.tc_hash_value(),
- GTS_TABLE, NULL);
+ share = tdc_acquire_share(thd, table_list, GTS_TABLE);
# else
share = get_table_share(thd, table_list, key, key_length, 0, error);
# endif
diff --git a/storage/mroonga/mysql-test/mroonga/include/mroonga/have_mroonga_deinit.inc b/storage/mroonga/mysql-test/mroonga/include/mroonga/have_mroonga_deinit.inc
index 1b4e8575122..e29300b5db4 100644
--- a/storage/mroonga/mysql-test/mroonga/include/mroonga/have_mroonga_deinit.inc
+++ b/storage/mroonga/mysql-test/mroonga/include/mroonga/have_mroonga_deinit.inc
@@ -33,4 +33,10 @@ if (!$have_mroonga_storage_engine) {
UNINSTALL PLUGIN mroonga;
}
+# Some test re-creates the test database. Put it back in the
+# original character set
+--source include/default_charset.inc
+disable_query_log;
+drop database test;
+create database test;
enable_query_log;
diff --git a/storage/mroonga/mysql-test/mroonga/storage/r/alter_table_change_column_rename_multiple.result b/storage/mroonga/mysql-test/mroonga/storage/r/alter_table_change_column_rename_multiple.result
index 6c87244ba47..f640e8de23b 100644
--- a/storage/mroonga/mysql-test/mroonga/storage/r/alter_table_change_column_rename_multiple.result
+++ b/storage/mroonga/mysql-test/mroonga/storage/r/alter_table_change_column_rename_multiple.result
@@ -24,7 +24,7 @@ diaries CREATE TABLE `diaries` (
`subject` varchar(40) DEFAULT NULL,
PRIMARY KEY (`internal_id`)
) ENGINE=Mroonga DEFAULT CHARSET=utf8
-INSERT INTO diaries (subject, description)
+INSERT IGNORE INTO diaries (subject, description)
VALUES ("groonga (1)", "starting groonga.");
SELECT * FROM diaries;
description internal_id subject
diff --git a/storage/mroonga/mysql-test/mroonga/storage/r/column_date_zero_date.result b/storage/mroonga/mysql-test/mroonga/storage/r/column_date_zero_date.result
index b2364e1158e..022d6c00715 100644
--- a/storage/mroonga/mysql-test/mroonga/storage/r/column_date_zero_date.result
+++ b/storage/mroonga/mysql-test/mroonga/storage/r/column_date_zero_date.result
@@ -10,12 +10,14 @@ timestamps CREATE TABLE `timestamps` (
`create_dt` date DEFAULT NULL,
PRIMARY KEY (`id`)
) ENGINE=Mroonga DEFAULT CHARSET=utf8
+SET sql_mode = '';
INSERT INTO timestamps (create_dt) VALUES ("2012-00-01");
Warnings:
Warning 1265 Data truncated for column 'create_dt' at row 1
INSERT INTO timestamps (create_dt) VALUES ("2012-01-00");
Warnings:
Warning 1265 Data truncated for column 'create_dt' at row 1
+SET sql_mode = DEFAULT;
SELECT * FROM timestamps;
id create_dt
1 2012-01-01
diff --git a/storage/mroonga/mysql-test/mroonga/storage/r/column_datetime_32bit_2038.result b/storage/mroonga/mysql-test/mroonga/storage/r/column_datetime_32bit_2038.result
index 70a980e2e8c..712d87ca251 100644
--- a/storage/mroonga/mysql-test/mroonga/storage/r/column_datetime_32bit_2038.result
+++ b/storage/mroonga/mysql-test/mroonga/storage/r/column_datetime_32bit_2038.result
@@ -6,7 +6,7 @@ created_at DATETIME
) DEFAULT CHARSET UTF8;
INSERT INTO diaries (title, created_at)
VALUES ('2038-01-18 03:14:07', '2038-01-18 03:14:07');
-INSERT INTO diaries (title, created_at)
+INSERT IGNORE INTO diaries (title, created_at)
VALUES ('2038-01-20 03:14:08', '2038-01-20 03:14:08');
Warnings:
Warning 1265 Data truncated for column 'created_at' at row 1
diff --git a/storage/mroonga/mysql-test/mroonga/storage/r/column_datetime_32bit_before_unix_epoch.result b/storage/mroonga/mysql-test/mroonga/storage/r/column_datetime_32bit_before_unix_epoch.result
index a48be4da873..85f091cca86 100644
--- a/storage/mroonga/mysql-test/mroonga/storage/r/column_datetime_32bit_before_unix_epoch.result
+++ b/storage/mroonga/mysql-test/mroonga/storage/r/column_datetime_32bit_before_unix_epoch.result
@@ -4,7 +4,7 @@ id INT PRIMARY KEY AUTO_INCREMENT,
title TEXT,
created_at DATETIME
) DEFAULT CHARSET UTF8;
-INSERT INTO diaries (title, created_at)
+INSERT IGNORE INTO diaries (title, created_at)
VALUES ('1000-01-01 00:00:00', '1000-01-01 00:00:00');
Warnings:
Warning 1265 Data truncated for column 'created_at' at row 1
diff --git a/storage/mroonga/mysql-test/mroonga/storage/r/column_datetime_32bit_max.result b/storage/mroonga/mysql-test/mroonga/storage/r/column_datetime_32bit_max.result
index b28a1744947..9d9e2f610fa 100644
--- a/storage/mroonga/mysql-test/mroonga/storage/r/column_datetime_32bit_max.result
+++ b/storage/mroonga/mysql-test/mroonga/storage/r/column_datetime_32bit_max.result
@@ -4,7 +4,7 @@ id INT PRIMARY KEY AUTO_INCREMENT,
title TEXT,
created_at DATETIME
) DEFAULT CHARSET UTF8;
-INSERT INTO diaries (title, created_at)
+INSERT IGNORE INTO diaries (title, created_at)
VALUES ('9999-12-31 23:59:59', '9999-12-31 23:59:59');
Warnings:
Warning 1265 Data truncated for column 'created_at' at row 1
diff --git a/storage/mroonga/mysql-test/mroonga/storage/r/column_datetime_32bit_out_of_range.result b/storage/mroonga/mysql-test/mroonga/storage/r/column_datetime_32bit_out_of_range.result
index 838eaf45f5c..99611268724 100644
--- a/storage/mroonga/mysql-test/mroonga/storage/r/column_datetime_32bit_out_of_range.result
+++ b/storage/mroonga/mysql-test/mroonga/storage/r/column_datetime_32bit_out_of_range.result
@@ -4,7 +4,7 @@ id INT PRIMARY KEY AUTO_INCREMENT,
title TEXT,
created_at DATETIME
) DEFAULT CHARSET UTF8;
-INSERT INTO diaries (title, created_at)
+INSERT IGNORE INTO diaries (title, created_at)
VALUES ('2012', '2012');
Warnings:
Warning 1265 Data truncated for column 'created_at' at row 1
diff --git a/storage/mroonga/mysql-test/mroonga/storage/r/column_datetime_64bit_strict_sql_mode_out_of_range.result b/storage/mroonga/mysql-test/mroonga/storage/r/column_datetime_64bit_strict_sql_mode_out_of_range.result
index 2d5e5e64147..6617b49d682 100644
--- a/storage/mroonga/mysql-test/mroonga/storage/r/column_datetime_64bit_strict_sql_mode_out_of_range.result
+++ b/storage/mroonga/mysql-test/mroonga/storage/r/column_datetime_64bit_strict_sql_mode_out_of_range.result
@@ -6,7 +6,7 @@ created_at DATETIME
) DEFAULT CHARSET UTF8;
INSERT INTO diaries (title, created_at)
VALUES ('2012', '2012');
-ERROR 22007: Incorrect datetime value: '2012' for column 'created_at' at row 1
+ERROR 22007: Incorrect datetime value: '2012' for column `test`.`diaries`.`created_at` at row 1
SELECT * FROM diaries;
id title created_at
DROP TABLE diaries;
diff --git a/storage/mroonga/mysql-test/mroonga/storage/r/column_datetime_64bit_version_5_6_or_later_out_of_range.result b/storage/mroonga/mysql-test/mroonga/storage/r/column_datetime_64bit_version_5_6_or_later_out_of_range.result
index 352638031b7..3500d651765 100644
--- a/storage/mroonga/mysql-test/mroonga/storage/r/column_datetime_64bit_version_5_6_or_later_out_of_range.result
+++ b/storage/mroonga/mysql-test/mroonga/storage/r/column_datetime_64bit_version_5_6_or_later_out_of_range.result
@@ -9,6 +9,8 @@ VALUES ('2012', '2012');
Warnings:
Warning 1265 Data truncated for column 'created_at' at row 1
Warning 1265 Data truncated for column 'created_at' at row 1
+INSERT INTO diaries (title, created_at) VALUES ('2012', '2012');
+ERROR 22007: Incorrect datetime value: '2012' for column 'created_at' at row 1
SELECT * FROM diaries;
id title created_at
1 2012 0000-01-01 00:00:00
diff --git a/storage/mroonga/mysql-test/mroonga/storage/r/column_generated_stored_add_column.result b/storage/mroonga/mysql-test/mroonga/storage/r/column_generated_stored_add_column.result
index 20213f0cbf8..924c3134a3e 100644
--- a/storage/mroonga/mysql-test/mroonga/storage/r/column_generated_stored_add_column.result
+++ b/storage/mroonga/mysql-test/mroonga/storage/r/column_generated_stored_add_column.result
@@ -1,9 +1,10 @@
-DROP TABLE IF EXISTS logs;
+set names utf8mb4;
CREATE TABLE logs (
id INT,
record JSON
) ENGINE=Mroonga DEFAULT CHARSET=utf8mb4;
INSERT INTO logs(id, record) VALUES (1, '{"level": "info", "message": "start"}');
+INSERT INTO logs(id, record) VALUES (1, json_object('message', repeat('☹', 253)));
ALTER TABLE logs ADD COLUMN message VARCHAR(255) GENERATED ALWAYS AS (json_extract(`record`, '$.message')) STORED;
ALTER TABLE logs ADD FULLTEXT INDEX(message) comment 'tokenizer "TokenBigramSplitSymbolAlphaDigit"';
INSERT INTO logs(id, record) VALUES (2, '{"level": "info", "message": "restart"}');
diff --git a/storage/mroonga/mysql-test/mroonga/storage/r/column_time_fractional_seconds_with_index.result b/storage/mroonga/mysql-test/mroonga/storage/r/column_time_fractional_seconds_with_index.result
index 35434a00160..111fae77a04 100644
--- a/storage/mroonga/mysql-test/mroonga/storage/r/column_time_fractional_seconds_with_index.result
+++ b/storage/mroonga/mysql-test/mroonga/storage/r/column_time_fractional_seconds_with_index.result
@@ -25,6 +25,6 @@ id title average max
SELECT * FROM running_records
WHERE average BETWEEN "-838:59:59.000000" AND "01:00:00.000001";
id title average max
-3 record failure -838:59:59.000000 -838:59:59.000000
1 normal condition 01:00:00.000001 01:05:00.000001
+3 record failure -838:59:59.000000 -838:59:59.000000
DROP TABLE running_records;
diff --git a/storage/mroonga/mysql-test/mroonga/storage/r/column_time_with_index.result b/storage/mroonga/mysql-test/mroonga/storage/r/column_time_with_index.result
index a0b0350a8e3..326c81958cc 100644
--- a/storage/mroonga/mysql-test/mroonga/storage/r/column_time_with_index.result
+++ b/storage/mroonga/mysql-test/mroonga/storage/r/column_time_with_index.result
@@ -25,6 +25,6 @@ id title average max
SELECT * FROM running_records
WHERE average BETWEEN "-838:59:59" AND "01:00:00";
id title average max
-3 record failure -838:59:59 -838:59:59
1 normal condition 01:00:00 01:05:00
+3 record failure -838:59:59 -838:59:59
DROP TABLE running_records;
diff --git a/storage/mroonga/mysql-test/mroonga/storage/r/foreign_key_alter_add.result b/storage/mroonga/mysql-test/mroonga/storage/r/foreign_key_alter_add.result
index bfb263f11b5..e2e712af4b6 100644
--- a/storage/mroonga/mysql-test/mroonga/storage/r/foreign_key_alter_add.result
+++ b/storage/mroonga/mysql-test/mroonga/storage/r/foreign_key_alter_add.result
@@ -17,5 +17,8 @@ articles CREATE TABLE `articles` (
KEY `comment` (`comment`),
CONSTRAINT `comment` FOREIGN KEY (`comment`) REFERENCES `test`.`comments` (`comment`) ON DELETE RESTRICT ON UPDATE RESTRICT
) ENGINE=Mroonga DEFAULT CHARSET=latin1
+SELECT * FROM information_schema.referential_constraints;
+CONSTRAINT_CATALOG CONSTRAINT_SCHEMA CONSTRAINT_NAME UNIQUE_CONSTRAINT_CATALOG UNIQUE_CONSTRAINT_SCHEMA UNIQUE_CONSTRAINT_NAME MATCH_OPTION UPDATE_RULE DELETE_RULE TABLE_NAME REFERENCED_TABLE_NAME
+def test comment def test PRIMARY NONE RESTRICT RESTRICT articles comments
DROP TABLE articles;
DROP TABLE comments;
diff --git a/storage/mroonga/mysql-test/mroonga/storage/r/foreign_key_alter_drop.result b/storage/mroonga/mysql-test/mroonga/storage/r/foreign_key_alter_drop.result
index 101a4a3de4b..fc3cda00499 100644
--- a/storage/mroonga/mysql-test/mroonga/storage/r/foreign_key_alter_drop.result
+++ b/storage/mroonga/mysql-test/mroonga/storage/r/foreign_key_alter_drop.result
@@ -17,5 +17,7 @@ articles CREATE TABLE `articles` (
`comment` int(10) unsigned DEFAULT NULL,
KEY `comment` (`comment`)
) ENGINE=Mroonga DEFAULT CHARSET=latin1
+SELECT * FROM information_schema.referential_constraints;
+CONSTRAINT_CATALOG CONSTRAINT_SCHEMA CONSTRAINT_NAME UNIQUE_CONSTRAINT_CATALOG UNIQUE_CONSTRAINT_SCHEMA UNIQUE_CONSTRAINT_NAME MATCH_OPTION UPDATE_RULE DELETE_RULE TABLE_NAME REFERENCED_TABLE_NAME
DROP TABLE articles;
DROP TABLE comments;
diff --git a/storage/mroonga/mysql-test/mroonga/storage/r/geometry_bulk_insert_null.result b/storage/mroonga/mysql-test/mroonga/storage/r/geometry_bulk_insert_null.result
index 5e831860c5e..fc16c590dc0 100644
--- a/storage/mroonga/mysql-test/mroonga/storage/r/geometry_bulk_insert_null.result
+++ b/storage/mroonga/mysql-test/mroonga/storage/r/geometry_bulk_insert_null.result
@@ -2,7 +2,7 @@ DROP TABLE IF EXISTS shops;
CREATE TABLE shops (
location GEOMETRY NOT NULL
);
-INSERT INTO shops VALUES (NULL), (NULL);
+INSERT IGNORE INTO shops VALUES (NULL), (NULL);
Warnings:
Warning 1048 Column 'location' cannot be null
Warning 1048 Column 'location' cannot be null
diff --git a/storage/mroonga/mysql-test/mroonga/storage/r/index_multiple_column_unique_date_32bit_equal.result b/storage/mroonga/mysql-test/mroonga/storage/r/index_multiple_column_unique_date_32bit_equal.result
index 2174efc1b4f..61ce4406987 100644
--- a/storage/mroonga/mysql-test/mroonga/storage/r/index_multiple_column_unique_date_32bit_equal.result
+++ b/storage/mroonga/mysql-test/mroonga/storage/r/index_multiple_column_unique_date_32bit_equal.result
@@ -5,17 +5,17 @@ start DATE,
end DATE,
UNIQUE KEY range_key(start, end)
);
-INSERT INTO ranges VALUES (1, "1000-01-01", "2012-10-05");
+INSERT IGNORE INTO ranges VALUES (1, "1000-01-01", "2012-10-05");
Warnings:
Warning 1265 Data truncated for column 'start' at row 1
-INSERT INTO ranges VALUES (2, "1000-01-01", "9999-12-31");
+INSERT IGNORE INTO ranges VALUES (2, "1000-01-01", "9999-12-31");
Warnings:
Warning 1265 Data truncated for column 'start' at row 1
Warning 1265 Data truncated for column 'end' at row 1
-INSERT INTO ranges VALUES (3, "2012-10-25", "9999-12-31");
+INSERT IGNORE INTO ranges VALUES (3, "2012-10-25", "9999-12-31");
Warnings:
Warning 1265 Data truncated for column 'end' at row 1
-INSERT INTO ranges VALUES (4, "9999-12-31", "1000-01-01");
+INSERT IGNORE INTO ranges VALUES (4, "9999-12-31", "1000-01-01");
Warnings:
Warning 1265 Data truncated for column 'start' at row 1
Warning 1265 Data truncated for column 'end' at row 1
diff --git a/storage/mroonga/mysql-test/mroonga/storage/r/index_multiple_column_unique_date_order_32bit_asc.result b/storage/mroonga/mysql-test/mroonga/storage/r/index_multiple_column_unique_date_order_32bit_asc.result
index 0a64a822fb5..bc1c3c57daa 100644
--- a/storage/mroonga/mysql-test/mroonga/storage/r/index_multiple_column_unique_date_order_32bit_asc.result
+++ b/storage/mroonga/mysql-test/mroonga/storage/r/index_multiple_column_unique_date_order_32bit_asc.result
@@ -5,17 +5,17 @@ start DATE,
end DATE,
UNIQUE KEY range_key(start, end)
);
-INSERT INTO ranges VALUES (1, "2012-10-25", "9999-12-31");
+INSERT IGNORE INTO ranges VALUES (1, "2012-10-25", "9999-12-31");
Warnings:
Warning 1265 Data truncated for column 'end' at row 1
-INSERT INTO ranges VALUES (2, "1000-01-01", "2012-10-05");
+INSERT IGNORE INTO ranges VALUES (2, "1000-01-01", "2012-10-05");
Warnings:
Warning 1265 Data truncated for column 'start' at row 1
-INSERT INTO ranges VALUES (3, "9999-12-31", "1000-01-01");
+INSERT IGNORE INTO ranges VALUES (3, "9999-12-31", "1000-01-01");
Warnings:
Warning 1265 Data truncated for column 'start' at row 1
Warning 1265 Data truncated for column 'end' at row 1
-INSERT INTO ranges VALUES (4, "1000-01-01", "9999-12-31");
+INSERT IGNORE INTO ranges VALUES (4, "1000-01-01", "9999-12-31");
Warnings:
Warning 1265 Data truncated for column 'start' at row 1
Warning 1265 Data truncated for column 'end' at row 1
diff --git a/storage/mroonga/mysql-test/mroonga/storage/r/index_multiple_column_unique_date_order_32bit_desc.result b/storage/mroonga/mysql-test/mroonga/storage/r/index_multiple_column_unique_date_order_32bit_desc.result
index 24439fdf5fa..820ee4f4465 100644
--- a/storage/mroonga/mysql-test/mroonga/storage/r/index_multiple_column_unique_date_order_32bit_desc.result
+++ b/storage/mroonga/mysql-test/mroonga/storage/r/index_multiple_column_unique_date_order_32bit_desc.result
@@ -5,17 +5,17 @@ start DATE,
end DATE,
UNIQUE KEY range_key(start, end)
);
-INSERT INTO ranges VALUES (1, "2012-10-25", "9999-12-31");
+INSERT IGNORE INTO ranges VALUES (1, "2012-10-25", "9999-12-31");
Warnings:
Warning 1265 Data truncated for column 'end' at row 1
-INSERT INTO ranges VALUES (2, "1000-01-01", "2012-10-05");
+INSERT IGNORE INTO ranges VALUES (2, "1000-01-01", "2012-10-05");
Warnings:
Warning 1265 Data truncated for column 'start' at row 1
-INSERT INTO ranges VALUES (3, "9999-12-31", "1000-01-01");
+INSERT IGNORE INTO ranges VALUES (3, "9999-12-31", "1000-01-01");
Warnings:
Warning 1265 Data truncated for column 'start' at row 1
Warning 1265 Data truncated for column 'end' at row 1
-INSERT INTO ranges VALUES (4, "1000-01-01", "9999-12-31");
+INSERT IGNORE INTO ranges VALUES (4, "1000-01-01", "9999-12-31");
Warnings:
Warning 1265 Data truncated for column 'start' at row 1
Warning 1265 Data truncated for column 'end' at row 1
diff --git a/storage/mroonga/mysql-test/mroonga/storage/r/index_multiple_column_unique_datetime_insert_delete_insert_invalid_value.result b/storage/mroonga/mysql-test/mroonga/storage/r/index_multiple_column_unique_datetime_insert_delete_insert_invalid_value.result
index 155faf85510..130c03edd44 100644
--- a/storage/mroonga/mysql-test/mroonga/storage/r/index_multiple_column_unique_datetime_insert_delete_insert_invalid_value.result
+++ b/storage/mroonga/mysql-test/mroonga/storage/r/index_multiple_column_unique_datetime_insert_delete_insert_invalid_value.result
@@ -5,14 +5,14 @@ start datetime,
end datetime,
UNIQUE KEY range_key(start, end)
);
-INSERT INTO ranges VALUES (1, "1990-00-00 00:00:00", "2012-10-05 23:59:59");
+INSERT IGNORE INTO ranges VALUES (1, "1990-00-00 00:00:00", "2012-10-05 23:59:59");
Warnings:
Warning 1265 Data truncated for column 'start' at row 1
SELECT * FROM ranges;
id start end
1 1990-01-01 00:00:00 2012-10-05 23:59:59
DELETE FROM ranges WHERE id = 1;
-INSERT INTO ranges VALUES (1, "1990-00-00 00:00:00", "2012-10-05 23:59:59");
+INSERT IGNORE INTO ranges VALUES (1, "1990-00-00 00:00:00", "2012-10-05 23:59:59");
Warnings:
Warning 1265 Data truncated for column 'start' at row 1
SELECT * FROM ranges;
diff --git a/storage/mroonga/mysql-test/mroonga/storage/r/index_multiple_column_unique_decimal.result b/storage/mroonga/mysql-test/mroonga/storage/r/index_multiple_column_unique_decimal.result
index 015afdb5cf6..cb8a1c61306 100644
--- a/storage/mroonga/mysql-test/mroonga/storage/r/index_multiple_column_unique_decimal.result
+++ b/storage/mroonga/mysql-test/mroonga/storage/r/index_multiple_column_unique_decimal.result
@@ -30,7 +30,7 @@ c2 c3
123.456000000000000000000000000000 0.000000000000000000000000000001
98765432109876543210987654321098765.432109876543210987654321098765 -123.456000000000000000000000000000
insert into t1 values(6,123.456,0.000000000000000000000000000001);
-ERROR 23000: Duplicate entry '123.456000000000000000000000000000-0.000000000000000000000000000' for key 'uk1'
+ERROR 23000: Duplicate entry '123.456000000000000000000000000000-0.000000000000000000000000...' for key 'uk1'
delete from t1 where c1 = 1;
insert into t1 values(1,123.456,0.000000000000000000000000000001);
drop table t1;
diff --git a/storage/mroonga/mysql-test/mroonga/storage/r/index_multiple_column_unique_year_32bit_equal.result b/storage/mroonga/mysql-test/mroonga/storage/r/index_multiple_column_unique_year_32bit_equal.result
index 04b3ba38395..ad936268b46 100644
--- a/storage/mroonga/mysql-test/mroonga/storage/r/index_multiple_column_unique_year_32bit_equal.result
+++ b/storage/mroonga/mysql-test/mroonga/storage/r/index_multiple_column_unique_year_32bit_equal.result
@@ -5,17 +5,17 @@ start YEAR,
end YEAR,
UNIQUE KEY range_key(start, end)
);
-INSERT INTO ranges VALUES (1, 1901, 2012);
+INSERT IGNORE INTO ranges VALUES (1, 1901, 2012);
Warnings:
Warning 1265 Data truncated for column 'start' at row 1
-INSERT INTO ranges VALUES (2, 1901, 2155);
+INSERT IGNORE INTO ranges VALUES (2, 1901, 2155);
Warnings:
Warning 1265 Data truncated for column 'start' at row 1
Warning 1265 Data truncated for column 'end' at row 1
-INSERT INTO ranges VALUES (3, 2012, 2155);
+INSERT IGNORE INTO ranges VALUES (3, 2012, 2155);
Warnings:
Warning 1265 Data truncated for column 'end' at row 1
-INSERT INTO ranges VALUES (4, 2155, 1901);
+INSERT IGNORE INTO ranges VALUES (4, 2155, 1901);
Warnings:
Warning 1265 Data truncated for column 'start' at row 1
Warning 1265 Data truncated for column 'end' at row 1
diff --git a/storage/mroonga/mysql-test/mroonga/storage/r/index_multiple_column_unique_year_order_32bit_asc.result b/storage/mroonga/mysql-test/mroonga/storage/r/index_multiple_column_unique_year_order_32bit_asc.result
index 9a84d115a87..8200263a93c 100644
--- a/storage/mroonga/mysql-test/mroonga/storage/r/index_multiple_column_unique_year_order_32bit_asc.result
+++ b/storage/mroonga/mysql-test/mroonga/storage/r/index_multiple_column_unique_year_order_32bit_asc.result
@@ -5,17 +5,17 @@ start YEAR,
end YEAR,
UNIQUE KEY range_key(start, end)
);
-INSERT INTO ranges VALUES (1, 2012, 2155);
+INSERT IGNORE INTO ranges VALUES (1, 2012, 2155);
Warnings:
Warning 1265 Data truncated for column 'end' at row 1
-INSERT INTO ranges VALUES (2, 1901, 2012);
+INSERT IGNORE INTO ranges VALUES (2, 1901, 2012);
Warnings:
Warning 1265 Data truncated for column 'start' at row 1
-INSERT INTO ranges VALUES (3, 2155, 1901);
+INSERT IGNORE INTO ranges VALUES (3, 2155, 1901);
Warnings:
Warning 1265 Data truncated for column 'start' at row 1
Warning 1265 Data truncated for column 'end' at row 1
-INSERT INTO ranges VALUES (4, 1901, 2155);
+INSERT IGNORE INTO ranges VALUES (4, 1901, 2155);
Warnings:
Warning 1265 Data truncated for column 'start' at row 1
Warning 1265 Data truncated for column 'end' at row 1
diff --git a/storage/mroonga/mysql-test/mroonga/storage/r/index_multiple_column_unique_year_order_32bit_desc.result b/storage/mroonga/mysql-test/mroonga/storage/r/index_multiple_column_unique_year_order_32bit_desc.result
index 3deb7435030..fc85d2a98a1 100644
--- a/storage/mroonga/mysql-test/mroonga/storage/r/index_multiple_column_unique_year_order_32bit_desc.result
+++ b/storage/mroonga/mysql-test/mroonga/storage/r/index_multiple_column_unique_year_order_32bit_desc.result
@@ -5,17 +5,17 @@ start YEAR,
end YEAR,
UNIQUE KEY range_key(start, end)
);
-INSERT INTO ranges VALUES (1, 2012, 2155);
+INSERT IGNORE INTO ranges VALUES (1, 2012, 2155);
Warnings:
Warning 1265 Data truncated for column 'end' at row 1
-INSERT INTO ranges VALUES (2, 1901, 2012);
+INSERT IGNORE INTO ranges VALUES (2, 1901, 2012);
Warnings:
Warning 1265 Data truncated for column 'start' at row 1
-INSERT INTO ranges VALUES (3, 2155, 1901);
+INSERT IGNORE INTO ranges VALUES (3, 2155, 1901);
Warnings:
Warning 1265 Data truncated for column 'start' at row 1
Warning 1265 Data truncated for column 'end' at row 1
-INSERT INTO ranges VALUES (4, 1901, 2155);
+INSERT IGNORE INTO ranges VALUES (4, 1901, 2155);
Warnings:
Warning 1265 Data truncated for column 'start' at row 1
Warning 1265 Data truncated for column 'end' at row 1
diff --git a/storage/mroonga/mysql-test/mroonga/storage/r/partition_insert.result b/storage/mroonga/mysql-test/mroonga/storage/r/partition_insert.result
index ea1e63e39d0..0252fd905d8 100644
--- a/storage/mroonga/mysql-test/mroonga/storage/r/partition_insert.result
+++ b/storage/mroonga/mysql-test/mroonga/storage/r/partition_insert.result
@@ -16,11 +16,11 @@ logs CREATE TABLE `logs` (
`timestamp` datetime DEFAULT NULL,
`message` text
) ENGINE=Mroonga DEFAULT CHARSET=utf8
-/*!50100 PARTITION BY RANGE (TO_DAYS(timestamp))
+ PARTITION BY RANGE (TO_DAYS(timestamp))
(PARTITION p201501 VALUES LESS THAN (735995) ENGINE = Mroonga,
PARTITION p201502 VALUES LESS THAN (736023) ENGINE = Mroonga,
PARTITION p201503 VALUES LESS THAN (736054) ENGINE = Mroonga,
- PARTITION pfuture VALUES LESS THAN MAXVALUE ENGINE = Mroonga) */
+ PARTITION pfuture VALUES LESS THAN MAXVALUE ENGINE = Mroonga)
INSERT INTO logs VALUES('2015-01-01 00:00:00', 'Start');
INSERT INTO logs VALUES('2015-01-31 23:59:59', 'Shutdown');
INSERT INTO logs VALUES('2015-02-01 00:00:00', 'Start');
diff --git a/storage/mroonga/mysql-test/mroonga/storage/r/partition_update.result b/storage/mroonga/mysql-test/mroonga/storage/r/partition_update.result
index 8d8208f81a9..754c4f98402 100644
--- a/storage/mroonga/mysql-test/mroonga/storage/r/partition_update.result
+++ b/storage/mroonga/mysql-test/mroonga/storage/r/partition_update.result
@@ -16,11 +16,11 @@ logs CREATE TABLE `logs` (
`timestamp` datetime DEFAULT NULL,
`message` text
) ENGINE=Mroonga DEFAULT CHARSET=utf8
-/*!50100 PARTITION BY RANGE (TO_DAYS(timestamp))
+PARTITION BY RANGE (TO_DAYS(timestamp))
(PARTITION p201501 VALUES LESS THAN (735995) ENGINE = Mroonga,
PARTITION p201502 VALUES LESS THAN (736023) ENGINE = Mroonga,
PARTITION p201503 VALUES LESS THAN (736054) ENGINE = Mroonga,
- PARTITION pfuture VALUES LESS THAN MAXVALUE ENGINE = Mroonga) */
+ PARTITION pfuture VALUES LESS THAN MAXVALUE ENGINE = Mroonga)
INSERT INTO logs VALUES('2015-01-01 00:00:00', 'Start');
INSERT INTO logs VALUES('2015-02-01 00:00:00', 'Start');
INSERT INTO logs VALUES('2015-03-01 00:00:00', 'Start');
diff --git a/storage/mroonga/mysql-test/mroonga/storage/suite.pm b/storage/mroonga/mysql-test/mroonga/storage/suite.pm
index 528ccc5d693..7e4f8c1776b 100644
--- a/storage/mroonga/mysql-test/mroonga/storage/suite.pm
+++ b/storage/mroonga/mysql-test/mroonga/storage/suite.pm
@@ -5,7 +5,7 @@ package My::Suite::Mroonga;
return "No Mroonga engine" unless $ENV{HA_MROONGA_SO} or
$::mysqld_variables{'mroonga'} eq "ON";
-sub is_default { 1 }
+sub is_default { not $::opt_embedded_server }
my $groonga_normalizer_mysql_dir=$::basedir . '/storage/mroonga/vendor/groonga/vendor/plugins/groonga-normalizer-mysql';
my $groonga_normalizer_mysql_install_dir=$::basedir . '/lib/groonga/plugins';
diff --git a/storage/mroonga/mysql-test/mroonga/storage/t/alter_table_change_column_rename_multiple.test b/storage/mroonga/mysql-test/mroonga/storage/t/alter_table_change_column_rename_multiple.test
index 5de6368b0c4..fa97b30d8a5 100644
--- a/storage/mroonga/mysql-test/mroonga/storage/t/alter_table_change_column_rename_multiple.test
+++ b/storage/mroonga/mysql-test/mroonga/storage/t/alter_table_change_column_rename_multiple.test
@@ -34,7 +34,7 @@ ALTER TABLE diaries
CHANGE id internal_id INT AUTO_INCREMENT;
SHOW CREATE TABLE diaries;
-INSERT INTO diaries (subject, description)
+INSERT IGNORE INTO diaries (subject, description)
VALUES ("groonga (1)", "starting groonga.");
SELECT * FROM diaries;
diff --git a/storage/mroonga/mysql-test/mroonga/storage/t/column_date_zero_date.test b/storage/mroonga/mysql-test/mroonga/storage/t/column_date_zero_date.test
index d8f0dd4476e..8b069cabbfb 100644
--- a/storage/mroonga/mysql-test/mroonga/storage/t/column_date_zero_date.test
+++ b/storage/mroonga/mysql-test/mroonga/storage/t/column_date_zero_date.test
@@ -28,8 +28,10 @@ CREATE TABLE timestamps (
) DEFAULT CHARSET UTF8;
SHOW CREATE TABLE timestamps;
+SET sql_mode = '';
INSERT INTO timestamps (create_dt) VALUES ("2012-00-01");
INSERT INTO timestamps (create_dt) VALUES ("2012-01-00");
+SET sql_mode = DEFAULT;
SELECT * FROM timestamps;
diff --git a/storage/mroonga/mysql-test/mroonga/storage/t/column_datetime_32bit_2038.test b/storage/mroonga/mysql-test/mroonga/storage/t/column_datetime_32bit_2038.test
index bff42d43df4..c9308b95440 100644
--- a/storage/mroonga/mysql-test/mroonga/storage/t/column_datetime_32bit_2038.test
+++ b/storage/mroonga/mysql-test/mroonga/storage/t/column_datetime_32bit_2038.test
@@ -29,7 +29,7 @@ CREATE TABLE diaries (
INSERT INTO diaries (title, created_at)
VALUES ('2038-01-18 03:14:07', '2038-01-18 03:14:07');
-INSERT INTO diaries (title, created_at)
+INSERT IGNORE INTO diaries (title, created_at)
VALUES ('2038-01-20 03:14:08', '2038-01-20 03:14:08');
SELECT * FROM diaries;
diff --git a/storage/mroonga/mysql-test/mroonga/storage/t/column_datetime_32bit_before_unix_epoch.test b/storage/mroonga/mysql-test/mroonga/storage/t/column_datetime_32bit_before_unix_epoch.test
index 3d2b2715904..5a6cee77900 100644
--- a/storage/mroonga/mysql-test/mroonga/storage/t/column_datetime_32bit_before_unix_epoch.test
+++ b/storage/mroonga/mysql-test/mroonga/storage/t/column_datetime_32bit_before_unix_epoch.test
@@ -27,7 +27,7 @@ CREATE TABLE diaries (
created_at DATETIME
) DEFAULT CHARSET UTF8;
-INSERT INTO diaries (title, created_at)
+INSERT IGNORE INTO diaries (title, created_at)
VALUES ('1000-01-01 00:00:00', '1000-01-01 00:00:00');
SELECT * FROM diaries;
diff --git a/storage/mroonga/mysql-test/mroonga/storage/t/column_datetime_32bit_max.test b/storage/mroonga/mysql-test/mroonga/storage/t/column_datetime_32bit_max.test
index c0e7ee98450..502d261c45c 100644
--- a/storage/mroonga/mysql-test/mroonga/storage/t/column_datetime_32bit_max.test
+++ b/storage/mroonga/mysql-test/mroonga/storage/t/column_datetime_32bit_max.test
@@ -27,7 +27,7 @@ CREATE TABLE diaries (
created_at DATETIME
) DEFAULT CHARSET UTF8;
-INSERT INTO diaries (title, created_at)
+INSERT IGNORE INTO diaries (title, created_at)
VALUES ('9999-12-31 23:59:59', '9999-12-31 23:59:59');
SELECT * FROM diaries;
diff --git a/storage/mroonga/mysql-test/mroonga/storage/t/column_datetime_32bit_out_of_range.test b/storage/mroonga/mysql-test/mroonga/storage/t/column_datetime_32bit_out_of_range.test
index 36c7d371eb5..aa0bf65de96 100644
--- a/storage/mroonga/mysql-test/mroonga/storage/t/column_datetime_32bit_out_of_range.test
+++ b/storage/mroonga/mysql-test/mroonga/storage/t/column_datetime_32bit_out_of_range.test
@@ -27,7 +27,7 @@ CREATE TABLE diaries (
created_at DATETIME
) DEFAULT CHARSET UTF8;
-INSERT INTO diaries (title, created_at)
+INSERT IGNORE INTO diaries (title, created_at)
VALUES ('2012', '2012');
SELECT * FROM diaries;
diff --git a/storage/mroonga/mysql-test/mroonga/storage/t/column_generated_stored_add_column.test b/storage/mroonga/mysql-test/mroonga/storage/t/column_generated_stored_add_column.test
index b50debddbdb..5dc58df17c3 100644
--- a/storage/mroonga/mysql-test/mroonga/storage/t/column_generated_stored_add_column.test
+++ b/storage/mroonga/mysql-test/mroonga/storage/t/column_generated_stored_add_column.test
@@ -18,9 +18,7 @@
--source ../../include/mroonga/skip_mariadb_10_1_or_earlier.inc
--source ../../include/mroonga/have_mroonga.inc
---disable_warnings
-DROP TABLE IF EXISTS logs;
---enable_warnings
+set names utf8mb4;
CREATE TABLE logs (
id INT,
@@ -28,6 +26,7 @@ CREATE TABLE logs (
) ENGINE=Mroonga DEFAULT CHARSET=utf8mb4;
INSERT INTO logs(id, record) VALUES (1, '{"level": "info", "message": "start"}');
+INSERT INTO logs(id, record) VALUES (1, json_object('message', repeat('☹', 253)));
ALTER TABLE logs ADD COLUMN message VARCHAR(255) GENERATED ALWAYS AS (json_extract(`record`, '$.message')) STORED;
ALTER TABLE logs ADD FULLTEXT INDEX(message) comment 'tokenizer "TokenBigramSplitSymbolAlphaDigit"';
diff --git a/storage/mroonga/mysql-test/mroonga/storage/t/geometry_bulk_insert_null.test b/storage/mroonga/mysql-test/mroonga/storage/t/geometry_bulk_insert_null.test
index f26aa24ca22..160fbf53bd3 100644
--- a/storage/mroonga/mysql-test/mroonga/storage/t/geometry_bulk_insert_null.test
+++ b/storage/mroonga/mysql-test/mroonga/storage/t/geometry_bulk_insert_null.test
@@ -27,7 +27,7 @@ CREATE TABLE shops (
location GEOMETRY NOT NULL
);
-INSERT INTO shops VALUES (NULL), (NULL);
+INSERT IGNORE INTO shops VALUES (NULL), (NULL);
SELECT ST_AsText(location) FROM shops;
diff --git a/storage/mroonga/mysql-test/mroonga/storage/t/index_multiple_column_unique_date_32bit_equal.test b/storage/mroonga/mysql-test/mroonga/storage/t/index_multiple_column_unique_date_32bit_equal.test
index e6aff9a7895..bf420af0aa4 100644
--- a/storage/mroonga/mysql-test/mroonga/storage/t/index_multiple_column_unique_date_32bit_equal.test
+++ b/storage/mroonga/mysql-test/mroonga/storage/t/index_multiple_column_unique_date_32bit_equal.test
@@ -29,10 +29,10 @@ CREATE TABLE ranges (
UNIQUE KEY range_key(start, end)
);
-INSERT INTO ranges VALUES (1, "1000-01-01", "2012-10-05");
-INSERT INTO ranges VALUES (2, "1000-01-01", "9999-12-31");
-INSERT INTO ranges VALUES (3, "2012-10-25", "9999-12-31");
-INSERT INTO ranges VALUES (4, "9999-12-31", "1000-01-01");
+INSERT IGNORE INTO ranges VALUES (1, "1000-01-01", "2012-10-05");
+INSERT IGNORE INTO ranges VALUES (2, "1000-01-01", "9999-12-31");
+INSERT IGNORE INTO ranges VALUES (3, "2012-10-25", "9999-12-31");
+INSERT IGNORE INTO ranges VALUES (4, "9999-12-31", "1000-01-01");
SELECT * FROM ranges FORCE INDEX(range_key)
WHERE start = "1000-01-01" AND end = "9999-12-31";
diff --git a/storage/mroonga/mysql-test/mroonga/storage/t/index_multiple_column_unique_date_order_32bit_asc.test b/storage/mroonga/mysql-test/mroonga/storage/t/index_multiple_column_unique_date_order_32bit_asc.test
index 814da2e88d0..767fcce998f 100644
--- a/storage/mroonga/mysql-test/mroonga/storage/t/index_multiple_column_unique_date_order_32bit_asc.test
+++ b/storage/mroonga/mysql-test/mroonga/storage/t/index_multiple_column_unique_date_order_32bit_asc.test
@@ -29,10 +29,10 @@ CREATE TABLE ranges (
UNIQUE KEY range_key(start, end)
);
-INSERT INTO ranges VALUES (1, "2012-10-25", "9999-12-31");
-INSERT INTO ranges VALUES (2, "1000-01-01", "2012-10-05");
-INSERT INTO ranges VALUES (3, "9999-12-31", "1000-01-01");
-INSERT INTO ranges VALUES (4, "1000-01-01", "9999-12-31");
+INSERT IGNORE INTO ranges VALUES (1, "2012-10-25", "9999-12-31");
+INSERT IGNORE INTO ranges VALUES (2, "1000-01-01", "2012-10-05");
+INSERT IGNORE INTO ranges VALUES (3, "9999-12-31", "1000-01-01");
+INSERT IGNORE INTO ranges VALUES (4, "1000-01-01", "9999-12-31");
SELECT * FROM ranges FORCE INDEX(range_key)
ORDER BY start, end;
diff --git a/storage/mroonga/mysql-test/mroonga/storage/t/index_multiple_column_unique_date_order_32bit_desc.test b/storage/mroonga/mysql-test/mroonga/storage/t/index_multiple_column_unique_date_order_32bit_desc.test
index 296c0bb69b1..fa1b841ab52 100644
--- a/storage/mroonga/mysql-test/mroonga/storage/t/index_multiple_column_unique_date_order_32bit_desc.test
+++ b/storage/mroonga/mysql-test/mroonga/storage/t/index_multiple_column_unique_date_order_32bit_desc.test
@@ -29,10 +29,10 @@ CREATE TABLE ranges (
UNIQUE KEY range_key(start, end)
);
-INSERT INTO ranges VALUES (1, "2012-10-25", "9999-12-31");
-INSERT INTO ranges VALUES (2, "1000-01-01", "2012-10-05");
-INSERT INTO ranges VALUES (3, "9999-12-31", "1000-01-01");
-INSERT INTO ranges VALUES (4, "1000-01-01", "9999-12-31");
+INSERT IGNORE INTO ranges VALUES (1, "2012-10-25", "9999-12-31");
+INSERT IGNORE INTO ranges VALUES (2, "1000-01-01", "2012-10-05");
+INSERT IGNORE INTO ranges VALUES (3, "9999-12-31", "1000-01-01");
+INSERT IGNORE INTO ranges VALUES (4, "1000-01-01", "9999-12-31");
SELECT * FROM ranges FORCE INDEX(range_key)
ORDER BY start DESC, end DESC;
diff --git a/storage/mroonga/mysql-test/mroonga/storage/t/index_multiple_column_unique_datetime_insert_delete_insert_invalid_value.test b/storage/mroonga/mysql-test/mroonga/storage/t/index_multiple_column_unique_datetime_insert_delete_insert_invalid_value.test
index 362cf4160aa..dc8cebf77bd 100644
--- a/storage/mroonga/mysql-test/mroonga/storage/t/index_multiple_column_unique_datetime_insert_delete_insert_invalid_value.test
+++ b/storage/mroonga/mysql-test/mroonga/storage/t/index_multiple_column_unique_datetime_insert_delete_insert_invalid_value.test
@@ -28,11 +28,11 @@ CREATE TABLE ranges (
UNIQUE KEY range_key(start, end)
);
-INSERT INTO ranges VALUES (1, "1990-00-00 00:00:00", "2012-10-05 23:59:59");
+INSERT IGNORE INTO ranges VALUES (1, "1990-00-00 00:00:00", "2012-10-05 23:59:59");
SELECT * FROM ranges;
DELETE FROM ranges WHERE id = 1;
-INSERT INTO ranges VALUES (1, "1990-00-00 00:00:00", "2012-10-05 23:59:59");
+INSERT IGNORE INTO ranges VALUES (1, "1990-00-00 00:00:00", "2012-10-05 23:59:59");
SELECT * FROM ranges;
DROP TABLE ranges;
diff --git a/storage/mroonga/mysql-test/mroonga/storage/t/index_multiple_column_unique_year_32bit_equal.test b/storage/mroonga/mysql-test/mroonga/storage/t/index_multiple_column_unique_year_32bit_equal.test
index b0e0720e2d0..1d93df3684d 100644
--- a/storage/mroonga/mysql-test/mroonga/storage/t/index_multiple_column_unique_year_32bit_equal.test
+++ b/storage/mroonga/mysql-test/mroonga/storage/t/index_multiple_column_unique_year_32bit_equal.test
@@ -29,10 +29,10 @@ CREATE TABLE ranges (
UNIQUE KEY range_key(start, end)
);
-INSERT INTO ranges VALUES (1, 1901, 2012);
-INSERT INTO ranges VALUES (2, 1901, 2155);
-INSERT INTO ranges VALUES (3, 2012, 2155);
-INSERT INTO ranges VALUES (4, 2155, 1901);
+INSERT IGNORE INTO ranges VALUES (1, 1901, 2012);
+INSERT IGNORE INTO ranges VALUES (2, 1901, 2155);
+INSERT IGNORE INTO ranges VALUES (3, 2012, 2155);
+INSERT IGNORE INTO ranges VALUES (4, 2155, 1901);
SELECT * FROM ranges FORCE INDEX(range_key)
WHERE start = 1901 AND end = 2155;
diff --git a/storage/mroonga/mysql-test/mroonga/storage/t/index_multiple_column_unique_year_order_32bit_asc.test b/storage/mroonga/mysql-test/mroonga/storage/t/index_multiple_column_unique_year_order_32bit_asc.test
index 12624eb868d..8090ccc593b 100644
--- a/storage/mroonga/mysql-test/mroonga/storage/t/index_multiple_column_unique_year_order_32bit_asc.test
+++ b/storage/mroonga/mysql-test/mroonga/storage/t/index_multiple_column_unique_year_order_32bit_asc.test
@@ -29,10 +29,10 @@ CREATE TABLE ranges (
UNIQUE KEY range_key(start, end)
);
-INSERT INTO ranges VALUES (1, 2012, 2155);
-INSERT INTO ranges VALUES (2, 1901, 2012);
-INSERT INTO ranges VALUES (3, 2155, 1901);
-INSERT INTO ranges VALUES (4, 1901, 2155);
+INSERT IGNORE INTO ranges VALUES (1, 2012, 2155);
+INSERT IGNORE INTO ranges VALUES (2, 1901, 2012);
+INSERT IGNORE INTO ranges VALUES (3, 2155, 1901);
+INSERT IGNORE INTO ranges VALUES (4, 1901, 2155);
SELECT * FROM ranges FORCE INDEX(range_key)
ORDER BY start, end;
diff --git a/storage/mroonga/mysql-test/mroonga/storage/t/index_multiple_column_unique_year_order_32bit_desc.test b/storage/mroonga/mysql-test/mroonga/storage/t/index_multiple_column_unique_year_order_32bit_desc.test
index 70b019ca75a..8927d4bd94e 100644
--- a/storage/mroonga/mysql-test/mroonga/storage/t/index_multiple_column_unique_year_order_32bit_desc.test
+++ b/storage/mroonga/mysql-test/mroonga/storage/t/index_multiple_column_unique_year_order_32bit_desc.test
@@ -29,10 +29,10 @@ CREATE TABLE ranges (
UNIQUE KEY range_key(start, end)
);
-INSERT INTO ranges VALUES (1, 2012, 2155);
-INSERT INTO ranges VALUES (2, 1901, 2012);
-INSERT INTO ranges VALUES (3, 2155, 1901);
-INSERT INTO ranges VALUES (4, 1901, 2155);
+INSERT IGNORE INTO ranges VALUES (1, 2012, 2155);
+INSERT IGNORE INTO ranges VALUES (2, 1901, 2012);
+INSERT IGNORE INTO ranges VALUES (3, 2155, 1901);
+INSERT IGNORE INTO ranges VALUES (4, 1901, 2155);
SELECT * FROM ranges FORCE INDEX(range_key)
ORDER BY start DESC, end DESC;
diff --git a/storage/mroonga/mysql-test/mroonga/wrapper/suite.pm b/storage/mroonga/mysql-test/mroonga/wrapper/suite.pm
index 528ccc5d693..7e4f8c1776b 100644
--- a/storage/mroonga/mysql-test/mroonga/wrapper/suite.pm
+++ b/storage/mroonga/mysql-test/mroonga/wrapper/suite.pm
@@ -5,7 +5,7 @@ package My::Suite::Mroonga;
return "No Mroonga engine" unless $ENV{HA_MROONGA_SO} or
$::mysqld_variables{'mroonga'} eq "ON";
-sub is_default { 1 }
+sub is_default { not $::opt_embedded_server }
my $groonga_normalizer_mysql_dir=$::basedir . '/storage/mroonga/vendor/groonga/vendor/plugins/groonga-normalizer-mysql';
my $groonga_normalizer_mysql_install_dir=$::basedir . '/lib/groonga/plugins';
diff --git a/storage/mroonga/vendor/groonga/examples/dictionary/html/js/jquery-1.7.2.js b/storage/mroonga/vendor/groonga/examples/dictionary/html/js/jquery-1.7.2.js
index 3774ff98613..75ce2617772 100644
--- a/storage/mroonga/vendor/groonga/examples/dictionary/html/js/jquery-1.7.2.js
+++ b/storage/mroonga/vendor/groonga/examples/dictionary/html/js/jquery-1.7.2.js
@@ -8249,7 +8249,7 @@ if ( jQuery.support.ajax ) {
xml;
// Firefox throws exceptions when accessing properties
- // of an xhr when a network error occured
+ // of an xhr when a network error occurred
// http://helpful.knobs-dials.com/index.php/Component_returned_failure_code:_0x80040111_(NS_ERROR_NOT_AVAILABLE)
try {
diff --git a/storage/mroonga/vendor/groonga/examples/dictionary/html/js/jquery-ui-1.8.18.custom.js b/storage/mroonga/vendor/groonga/examples/dictionary/html/js/jquery-ui-1.8.18.custom.js
index d4444b2bd11..a212450c20a 100644
--- a/storage/mroonga/vendor/groonga/examples/dictionary/html/js/jquery-ui-1.8.18.custom.js
+++ b/storage/mroonga/vendor/groonga/examples/dictionary/html/js/jquery-ui-1.8.18.custom.js
@@ -743,7 +743,7 @@ $.widget("ui.mouse", {
return this.mouseDelayMet;
},
- // These are placeholder methods, to be overriden by extending plugin
+ // These are placeholder methods, to be overridden by extending plugin
_mouseStart: function(event) {},
_mouseDrag: function(event) {},
_mouseStop: function(event) {},
diff --git a/storage/mroonga/vendor/groonga/lib/com.c b/storage/mroonga/vendor/groonga/lib/com.c
index 2def22efd39..cc03d6462c3 100644
--- a/storage/mroonga/vendor/groonga/lib/com.c
+++ b/storage/mroonga/vendor/groonga/lib/com.c
@@ -742,7 +742,7 @@ grn_com_send(grn_ctx *ctx, grn_com *cs,
msg.msg_namelen = 0;
msg.msg_iov = msg_iov;
msg.msg_iovlen = 2;
- msg_iov[0].iov_base = header;
+ msg_iov[0].iov_base = (char*) header;
msg_iov[0].iov_len = sizeof(grn_com_header);
msg_iov[1].iov_base = (char *)body;
msg_iov[1].iov_len = size;
diff --git a/storage/mroonga/vendor/groonga/lib/ctx.c b/storage/mroonga/vendor/groonga/lib/ctx.c
index dca4d1d90ba..1fd912d41e6 100644
--- a/storage/mroonga/vendor/groonga/lib/ctx.c
+++ b/storage/mroonga/vendor/groonga/lib/ctx.c
@@ -55,7 +55,8 @@
#define GRN_CTX_INITIALIZER(enc) \
{ GRN_SUCCESS, 0, enc, 0, GRN_LOG_NOTICE,\
- GRN_CTX_FIN, 0, 0, 0, 0, {0}, NULL, NULL, NULL, NULL, NULL }
+ GRN_CTX_FIN, 0, 0, 0, 0, {0}, NULL, NULL, NULL, NULL, NULL, \
+ {NULL, NULL,NULL, NULL,NULL, NULL,NULL, NULL,NULL, NULL,NULL, NULL,NULL, NULL,NULL, NULL}, ""}
#define GRN_CTX_CLOSED(ctx) ((ctx)->stat == GRN_CTX_FIN)
diff --git a/storage/mroonga/vendor/groonga/lib/db.c b/storage/mroonga/vendor/groonga/lib/db.c
index aeaff66f0f4..cba22aa0e64 100644
--- a/storage/mroonga/vendor/groonga/lib/db.c
+++ b/storage/mroonga/vendor/groonga/lib/db.c
@@ -4782,7 +4782,7 @@ grn_column_create(grn_ctx *ctx, grn_obj *table,
{
grn_db *s;
uint32_t value_size;
- grn_obj *db, *res = NULL;
+ grn_obj *db= NULL, *res = NULL;
grn_id id = GRN_ID_NIL;
grn_id range = GRN_ID_NIL;
grn_id domain = GRN_ID_NIL;
diff --git a/storage/mroonga/vendor/groonga/lib/hash.c b/storage/mroonga/vendor/groonga/lib/hash.c
index 8fe180481c9..3fb372ee222 100644
--- a/storage/mroonga/vendor/groonga/lib/hash.c
+++ b/storage/mroonga/vendor/groonga/lib/hash.c
@@ -89,12 +89,6 @@ grn_tiny_array_at_inline(grn_tiny_array *array, grn_id id)
return id ? grn_tiny_array_put(array, id) : NULL;
}
-inline static void *
-grn_tiny_array_next(grn_tiny_array *array)
-{
- return grn_tiny_array_put(array, array->max + 1);
-}
-
void
grn_tiny_array_init(grn_ctx *ctx, grn_tiny_array *array,
uint16_t element_size, uint16_t flags)
@@ -204,15 +198,6 @@ grn_tiny_bitmap_put_byte(grn_tiny_bitmap *bitmap, grn_id bit_id) {
/* Requirements: bit_id != GRN_ID_NIL. */
/* Return value: 1/0 on success, -1 on failure. */
-inline static int
-grn_tiny_bitmap_get(grn_tiny_bitmap *bitmap, grn_id bit_id)
-{
- uint8_t * const ptr = grn_tiny_bitmap_get_byte(bitmap, bit_id);
- return ptr ? ((*ptr >> (bit_id & 7)) & 1) : -1;
-}
-
-/* Requirements: bit_id != GRN_ID_NIL. */
-/* Return value: 1/0 on success, -1 on failure. */
/* Note: A bitmap is extended if needed. */
inline static int
grn_tiny_bitmap_put(grn_tiny_bitmap *bitmap, grn_id bit_id)
@@ -309,18 +294,6 @@ grn_io_array_bit_off(grn_ctx *ctx, grn_io *io,
return ptr;
}
-inline static void *
-grn_io_array_bit_flip(grn_ctx *ctx, grn_io *io,
- uint32_t segment_id, uint32_t offset)
-{
- uint8_t * const ptr = (uint8_t *)grn_io_array_at_inline(
- ctx, io, segment_id, (offset >> 3) + 1, GRN_TABLE_ADD);
- if (ptr) {
- *ptr ^= 1 << (offset & 7);
- }
- return ptr;
-}
-
/* grn_table_queue */
static void
@@ -1738,13 +1711,6 @@ get_value(grn_ctx *ctx, grn_hash *hash, entry_str *n)
return grn_hash_entry_get_value(ctx, hash, (grn_hash_entry *)n);
}
-inline static grn_rc
-put_key(grn_ctx *ctx, grn_hash *hash, entry_str *n, uint32_t h,
- const char *key, unsigned int len)
-{
- return grn_hash_entry_put_key(ctx, hash, (grn_hash_entry *)n, h, key, len);
-}
-
inline static int
match_key(grn_ctx *ctx, grn_hash *hash, entry_str *ee, uint32_t h,
const char *key, unsigned int len)
diff --git a/storage/mroonga/vendor/groonga/lib/ii.c b/storage/mroonga/vendor/groonga/lib/ii.c
index 3a62d03ab0a..cd5559e6958 100644
--- a/storage/mroonga/vendor/groonga/lib/ii.c
+++ b/storage/mroonga/vendor/groonga/lib/ii.c
@@ -575,7 +575,7 @@ chunk_free(grn_ctx *ctx, grn_ii *ii,
}
*/
grn_io_win iw, iw_;
- grn_ii_ginfo *ginfo;
+ grn_ii_ginfo *ginfo= 0;
uint32_t seg, m, *gseg;
seg = offset >> GRN_II_N_CHUNK_VARIATION;
if (size > S_CHUNK) {
@@ -2194,23 +2194,6 @@ buffer_close(grn_ctx *ctx, grn_ii *ii, uint32_t pseg)
return GRN_SUCCESS;
}
-inline static uint32_t
-buffer_open_if_capable(grn_ctx *ctx, grn_ii *ii, int32_t seg, int size, buffer **b)
-{
- uint32_t pseg, pos = SEG2POS(seg, 0);
- if ((pseg = buffer_open(ctx, ii, pos, NULL, b)) != GRN_II_PSEG_NOT_ASSIGNED) {
- uint16_t nterms = (*b)->header.nterms - (*b)->header.nterms_void;
- if (!((nterms < 4096 ||
- (ii->header->total_chunk_size >> ((nterms >> 8) - 6))
- > (*b)->header.chunk_size) &&
- ((*b)->header.buffer_free >= size + sizeof(buffer_term)))) {
- buffer_close(ctx, ii, pseg);
- return GRN_II_PSEG_NOT_ASSIGNED;
- }
- }
- return pseg;
-}
-
typedef struct {
uint32_t rid;
uint32_t sid;
diff --git a/storage/mroonga/vendor/groonga/lib/pat.c b/storage/mroonga/vendor/groonga/lib/pat.c
index e2f98fba0d2..642173e2fdc 100644
--- a/storage/mroonga/vendor/groonga/lib/pat.c
+++ b/storage/mroonga/vendor/groonga/lib/pat.c
@@ -142,20 +142,6 @@ pat_get(grn_ctx *ctx, grn_pat *pat, grn_id id)
return res;
}
-inline static pat_node *
-pat_node_new(grn_ctx *ctx, grn_pat *pat, grn_id *id)
-{
- uint32_t n = pat->header->curr_rec + 1;
- pat_node *res;
- if (n > GRN_ID_MAX) { return NULL; }
- if ((res = pat_get(ctx, pat, n))) {
- pat->header->curr_rec = n;
- pat->header->n_entries++;
- }
- if (id) { *id = n; }
- return res;
-}
-
/* sis operation */
inline static sis_node *
diff --git a/storage/mroonga/vendor/groonga/lib/ts.c b/storage/mroonga/vendor/groonga/lib/ts.c
index 68e363a27d7..909f4864786 100644
--- a/storage/mroonga/vendor/groonga/lib/ts.c
+++ b/storage/mroonga/vendor/groonga/lib/ts.c
@@ -683,7 +683,7 @@ static grn_rc
grn_ts_select_output(grn_ctx *ctx, grn_obj *table, grn_ts_str str,
const grn_ts_record *in, size_t n_in, size_t n_hits)
{
- grn_ts_writer *writer;
+ grn_ts_writer *writer= 0;
grn_rc rc = grn_ts_writer_open(ctx, table, str, &writer);
if (rc != GRN_SUCCESS) {
return rc;
diff --git a/storage/mroonga/vendor/groonga/lib/ts/ts_expr_node.c b/storage/mroonga/vendor/groonga/lib/ts/ts_expr_node.c
index dc64e802fbb..4ae900034bb 100644
--- a/storage/mroonga/vendor/groonga/lib/ts/ts_expr_node.c
+++ b/storage/mroonga/vendor/groonga/lib/ts/ts_expr_node.c
@@ -187,55 +187,6 @@ grn_ts_ref_zero(void)
return (grn_ts_ref){ 0, 0.0 };
}
-/* grn_ts_bool_vector_zero() returns a zero. */
-inline static grn_ts_bool_vector
-grn_ts_bool_vector_zero(void)
-{
- return (grn_ts_bool_vector){ NULL, 0 };
-}
-
-/* grn_ts_int_vector_zero() returns a zero. */
-inline static grn_ts_int_vector
-grn_ts_int_vector_zero(void)
-{
- return (grn_ts_int_vector){ NULL, 0 };
-}
-
-/* grn_ts_float_vector_zero() returns a zero. */
-inline static grn_ts_float_vector
-grn_ts_float_vector_zero(void)
-{
- return (grn_ts_float_vector){ NULL, 0 };
-}
-
-/* grn_ts_time_vector_zero() returns a zero. */
-inline static grn_ts_time_vector
-grn_ts_time_vector_zero(void)
-{
- return (grn_ts_time_vector){ NULL, 0 };
-}
-
-/* grn_ts_text_vector_zero() returns a zero. */
-inline static grn_ts_text_vector
-grn_ts_text_vector_zero(void)
-{
- return (grn_ts_text_vector){ NULL, 0 };
-}
-
-/* grn_ts_geo_vector_zero() returns a zero. */
-inline static grn_ts_geo_vector
-grn_ts_geo_vector_zero(void)
-{
- return (grn_ts_geo_vector){ NULL, 0 };
-}
-
-/* grn_ts_ref_vector_zero() returns a zero. */
-inline static grn_ts_ref_vector
-grn_ts_ref_vector_zero(void)
-{
- return (grn_ts_ref_vector){ NULL, 0 };
-}
-
/* grn_ts_data_type_to_kind() returns a kind associated with a type. */
static grn_ts_data_kind
grn_ts_data_type_to_kind(grn_ts_data_type type)
@@ -5222,7 +5173,7 @@ grn_ts_expr_node_deref(grn_ctx *ctx, grn_ts_expr_node **node_ptr)
{
grn_ts_expr_node *node = *node_ptr, **in_ptr = NULL;
while ((node->data_kind & ~GRN_TS_VECTOR_FLAG) == GRN_TS_REF) {
- grn_ts_expr_node *new_node;
+ grn_ts_expr_node *new_node= 0;
grn_rc rc = grn_ts_expr_node_deref_once(ctx, node, &new_node);
if (rc != GRN_SUCCESS) {
if (in_ptr) {
diff --git a/storage/myisam/NEWS b/storage/myisam/NEWS
index 302adacf942..942926a0fa2 100644
--- a/storage/myisam/NEWS
+++ b/storage/myisam/NEWS
@@ -62,5 +62,5 @@ New features compared to NISAM:
Interface changes compared to NISAM:
- mi_create()
- - keyinfo->seg must be allocated explicitely.
+ - keyinfo->seg must be allocated explicitly.
- One must put number of key segments in keyinfo
diff --git a/storage/myisam/ft_boolean_search.c b/storage/myisam/ft_boolean_search.c
index 6da55c727ee..6ca48fedeab 100644
--- a/storage/myisam/ft_boolean_search.c
+++ b/storage/myisam/ft_boolean_search.c
@@ -163,7 +163,7 @@ static int FTB_WORD_cmp_list(CHARSET_INFO *cs, FTB_WORD **a, FTB_WORD **b)
{
/* ORDER BY word, ndepth */
int i= ha_compare_text(cs, (uchar*) (*a)->word + 1, (*a)->len - 1,
- (uchar*) (*b)->word + 1, (*b)->len - 1, 0, 0);
+ (uchar*) (*b)->word + 1, (*b)->len - 1, 0);
if (!i)
i= CMP_NUM((*a)->ndepth, (*b)->ndepth);
return i;
@@ -412,7 +412,7 @@ static int _ft2_search_no_lock(FTB *ftb, FTB_WORD *ftbw, my_bool init_search)
info->lastkey_length-extra-1,
(uchar*) ftbw->word+1,
ftbw->len-1,
- (my_bool) (ftbw->flags & FTB_FLAG_TRUNC),0);
+ (my_bool) (ftbw->flags & FTB_FLAG_TRUNC));
}
if (r) /* not found */
@@ -909,7 +909,7 @@ static int ftb_find_relevance_add_word(MYSQL_FTPARSER_PARAM *param,
ftbw= ftb->list[c];
if (ha_compare_text(ftb->charset, (uchar*)word, len,
(uchar*)ftbw->word+1, ftbw->len-1,
- (my_bool) (ftbw->flags & FTB_FLAG_TRUNC), 0) < 0)
+ (my_bool) (ftbw->flags & FTB_FLAG_TRUNC)) < 0)
b= c;
else
a= c;
@@ -936,7 +936,7 @@ static int ftb_find_relevance_add_word(MYSQL_FTPARSER_PARAM *param,
ftbw= ftb->list[c];
if (ha_compare_text(ftb->charset, (uchar*)word, len,
(uchar*)ftbw->word + 1,ftbw->len - 1,
- (my_bool)(ftbw->flags & FTB_FLAG_TRUNC), 0))
+ (my_bool)(ftbw->flags & FTB_FLAG_TRUNC)))
{
if (ftb->with_scan & FTB_FLAG_TRUNC)
continue;
diff --git a/storage/myisam/ft_nlq_search.c b/storage/myisam/ft_nlq_search.c
index 2add5c5b31b..4f51879ac00 100644
--- a/storage/myisam/ft_nlq_search.c
+++ b/storage/myisam/ft_nlq_search.c
@@ -81,13 +81,13 @@ static int walk_and_match(FT_WORD *word, uint32 count, ALL_IN_ONE *aio)
#error
#endif
DBUG_ENTER("walk_and_match");
- LINT_INIT_STRUCT(subkeys);
word->weight=LWS_FOR_QUERY;
keylen=_ft_make_key(info,aio->keynr,keybuff,word,0);
keylen-=HA_FT_WLEN;
doc_cnt=0;
+ subkeys.i= 0;
if (share->concurrent_insert)
mysql_rwlock_rdlock(&share->key_root_lock[aio->keynr]);
@@ -114,7 +114,7 @@ static int walk_and_match(FT_WORD *word, uint32 count, ALL_IN_ONE *aio)
if (keylen &&
ha_compare_text(aio->charset,info->lastkey+1,
- info->lastkey_length-extra-1, keybuff+1,keylen-1,0,0))
+ info->lastkey_length-extra-1, keybuff+1,keylen-1,0))
break;
if (subkeys.i < 0)
diff --git a/storage/myisam/ft_parser.c b/storage/myisam/ft_parser.c
index a85f8cc8c78..f6930e91e6e 100644
--- a/storage/myisam/ft_parser.c
+++ b/storage/myisam/ft_parser.c
@@ -32,7 +32,7 @@ typedef struct st_my_ft_parser_param
static int FT_WORD_cmp(CHARSET_INFO* cs, FT_WORD *w1, FT_WORD *w2)
{
return ha_compare_text(cs, (uchar*) w1->pos, w1->len,
- (uchar*) w2->pos, w2->len, 0, 0);
+ (uchar*) w2->pos, w2->len, 0);
}
static int walk_and_copy(FT_WORD *word,uint32 count,FT_DOCSTAT *docstat)
diff --git a/storage/myisam/ft_stopwords.c b/storage/myisam/ft_stopwords.c
index c5ca8b848ed..788709d3c4b 100644
--- a/storage/myisam/ft_stopwords.c
+++ b/storage/myisam/ft_stopwords.c
@@ -35,7 +35,7 @@ static int FT_STOPWORD_cmp(void* cmp_arg __attribute__((unused)),
{
return ha_compare_text(ft_stopword_cs,
(uchar *)w1->pos,w1->len,
- (uchar *)w2->pos,w2->len,0,0);
+ (uchar *)w2->pos,w2->len,0);
}
static void FT_STOPWORD_free(FT_STOPWORD *w, TREE_FREE action,
diff --git a/storage/myisam/ft_update.c b/storage/myisam/ft_update.c
index a688b6704b3..575ab70fcdd 100644
--- a/storage/myisam/ft_update.c
+++ b/storage/myisam/ft_update.c
@@ -83,7 +83,7 @@ uint _mi_ft_segiterator(register FT_SEG_ITERATOR *ftsi)
if (ftsi->seg->flag & HA_BLOB_PART)
{
ftsi->len=_mi_calc_blob_length(ftsi->seg->bit_start,ftsi->pos);
- memcpy(&ftsi->pos, ftsi->pos+ftsi->seg->bit_start, sizeof(char*));
+ memcpy((char**) &ftsi->pos, ftsi->pos+ftsi->seg->bit_start, sizeof(char*));
DBUG_RETURN(1);
}
ftsi->len=ftsi->seg->length;
@@ -180,7 +180,7 @@ int _mi_ft_cmp(MI_INFO *info, uint keynr, const uchar *rec1, const uchar *rec2)
if ((ftsi1.pos != ftsi2.pos) &&
(!ftsi1.pos || !ftsi2.pos ||
ha_compare_text(cs, (uchar*) ftsi1.pos,ftsi1.len,
- (uchar*) ftsi2.pos,ftsi2.len,0,0)))
+ (uchar*) ftsi2.pos,ftsi2.len,0)))
DBUG_RETURN(THOSE_TWO_DAMN_KEYS_ARE_REALLY_DIFFERENT);
}
DBUG_RETURN(GEE_THEY_ARE_ABSOLUTELY_IDENTICAL);
@@ -209,7 +209,7 @@ int _mi_ft_update(MI_INFO *info, uint keynr, uchar *keybuf,
while(old_word->pos && new_word->pos)
{
cmp= ha_compare_text(cs, (uchar*) old_word->pos,old_word->len,
- (uchar*) new_word->pos,new_word->len,0,0);
+ (uchar*) new_word->pos,new_word->len,0);
cmp2= cmp ? 0 : (fabs(old_word->weight - new_word->weight) > 1.e-5);
if (cmp < 0 || cmp2)
diff --git a/storage/myisam/ha_myisam.cc b/storage/myisam/ha_myisam.cc
index 886d898521c..ce52f1af828 100644
--- a/storage/myisam/ha_myisam.cc
+++ b/storage/myisam/ha_myisam.cc
@@ -67,7 +67,7 @@ static MYSQL_SYSVAR_ULONGLONG(max_sort_file_size, myisam_max_temp_length,
static MYSQL_SYSVAR_SET(recover_options, myisam_recover_options,
PLUGIN_VAR_OPCMDARG|PLUGIN_VAR_READONLY,
"Specifies how corrupted tables should be automatically repaired",
- NULL, NULL, 1, &myisam_recover_typelib);
+ NULL, NULL, HA_RECOVER_BACKUP|HA_RECOVER_QUICK, &myisam_recover_typelib);
static MYSQL_THDVAR_ULONG(repair_threads, PLUGIN_VAR_RQCMDARG,
"If larger than 1, when repairing a MyISAM table all indexes will be "
@@ -166,8 +166,8 @@ static void mi_check_print_msg(HA_CHECK *param, const char* msg_type,
name);
/*
TODO: switch from protocol to push_warning here. The main reason we didn't
- it yet is parallel repair. Due to following trace:
- mi_check_print_msg/push_warning/sql_alloc/my_pthread_getspecific_ptr.
+ it yet is parallel repair, which threads have no THD object accessible via
+ current_thd.
Also we likely need to lock mutex here (in both cases with protocol and
push_warning).
@@ -340,8 +340,8 @@ int table2myisam(TABLE *table_arg, MI_KEYDEF **keydef_out,
}
}
}
- DBUG_PRINT("loop", ("found: 0x%lx recpos: %d minpos: %d length: %d",
- (long) found, recpos, minpos, length));
+ DBUG_PRINT("loop", ("found: %p recpos: %d minpos: %d length: %d",
+ found, recpos, minpos, length));
if (recpos != minpos)
{
/* reserve space for null bits */
@@ -573,12 +573,14 @@ int check_definition(MI_KEYDEF *t1_keyinfo, MI_COLUMNDEF *t1_recinfo,
DBUG_RETURN(0);
}
-
extern "C" {
int killed_ptr(HA_CHECK *param)
{
- return thd_killed((THD*)param->thd);
+ if (likely(thd_killed((THD*)param->thd)) == 0)
+ return 0;
+ my_errno= HA_ERR_ABORTED_BY_USER;
+ return 1;
}
void mi_check_print_error(HA_CHECK *param, const char *fmt,...)
@@ -636,7 +638,7 @@ void _mi_report_crashed(MI_INFO *file, const char *message,
char buf[1024];
mysql_mutex_lock(&file->s->intern_lock);
if ((cur_thd= (THD*) file->in_use.data))
- sql_print_error("Got an error from thread_id=%lu, %s:%d", cur_thd->thread_id,
+ sql_print_error("Got an error from thread_id=%lld, %s:%d", cur_thd->thread_id,
sfile, sline);
else
sql_print_error("Got an error from unknown thread, %s:%d", sfile, sline);
@@ -660,6 +662,29 @@ my_bool mi_killed_in_mariadb(MI_INFO *info)
return (((TABLE*) (info->external_ref))->in_use->killed != 0);
}
+static int compute_vcols(MI_INFO *info, uchar *record, int keynum)
+{
+ TABLE *table= (TABLE*)(info->external_ref);
+ table->move_fields(table->field, record, table->field[0]->record_ptr());
+ if (keynum == -1) // update all vcols
+ {
+ int error= table->update_virtual_fields(table->file, VCOL_UPDATE_FOR_READ);
+ if (table->update_virtual_fields(table->file, VCOL_UPDATE_INDEXED))
+ error= 1;
+ return error;
+ }
+ // update only one key
+ KEY *key= table->key_info + keynum;
+ KEY_PART_INFO *kp= key->key_part, *end= kp + key->ext_key_parts;
+ for (; kp < end; kp++)
+ {
+ Field *f= table->field[kp->fieldnr - 1];
+ if (f->vcol_info)
+ table->update_virtual_field(f);
+ }
+ return 0;
+}
+
}
ha_myisam::ha_myisam(handlerton *hton, TABLE_SHARE *table_arg)
@@ -667,6 +692,7 @@ ha_myisam::ha_myisam(handlerton *hton, TABLE_SHARE *table_arg)
int_table_flags(HA_NULL_IN_KEY | HA_CAN_FULLTEXT | HA_CAN_SQL_HANDLER |
HA_BINLOG_ROW_CAPABLE | HA_BINLOG_STMT_CAPABLE |
HA_CAN_VIRTUAL_COLUMNS | HA_CAN_EXPORT |
+ HA_REQUIRES_KEY_COLUMNS_FOR_DELETE |
HA_DUPLICATE_POS | HA_CAN_INDEX_BLOBS | HA_AUTO_PART_KEY |
HA_FILE_BASED | HA_CAN_GEOMETRY | HA_NO_TRANSACTIONS |
HA_CAN_INSERT_DELAYED | HA_CAN_BIT_FIELD | HA_CAN_RTREEKEYS |
@@ -758,7 +784,8 @@ int ha_myisam::open(const char *name, int mode, uint test_if_locked)
/* Set external_ref, mainly for temporary tables */
file->external_ref= (void*) table; // For mi_killed()
- if (!table->s->tmp_table) /* No need to perform a check for tmp table */
+ /* No need to perform a check for tmp table or if it's already checked */
+ if (!table->s->tmp_table && file->s->reopen == 1)
{
if ((my_errno= table2myisam(table, &keyinfo, &recinfo, &recs)))
{
@@ -895,6 +922,44 @@ int ha_myisam::write_row(uchar *buf)
return mi_write(file,buf);
}
+void ha_myisam::setup_vcols_for_repair(HA_CHECK *param)
+{
+ DBUG_ASSERT(file->s->base.reclength <= file->s->vreclength);
+ if (!table->vfield)
+ return;
+
+ if (file->s->base.reclength == file->s->vreclength)
+ {
+ bool indexed_vcols= false;
+ ulong new_vreclength= file->s->vreclength;
+ for (Field **vf= table->vfield; *vf; vf++)
+ {
+ if (!(*vf)->stored_in_db())
+ {
+ uint vf_end= (*vf)->offset(table->record[0]) + (*vf)->pack_length_in_rec();
+ set_if_bigger(new_vreclength, vf_end);
+ indexed_vcols|= (*vf)->flags & PART_KEY_FLAG;
+ }
+ }
+ if (!indexed_vcols)
+ return;
+ file->s->vreclength= new_vreclength;
+ }
+ DBUG_ASSERT(file->s->base.reclength < file->s->vreclength);
+ param->fix_record= compute_vcols;
+ table->use_all_columns();
+ table->vcol_set= &table->s->all_set;
+}
+
+void ha_myisam::restore_vcos_after_repair()
+{
+ if (file->s->base.reclength < file->s->vreclength)
+ {
+ table->move_fields(table->field, table->record[0], table->field[0]->record_ptr());
+ table->default_column_bitmaps();
+ }
+}
+
int ha_myisam::check(THD* thd, HA_CHECK_OPT* check_opt)
{
if (!file) return HA_ADMIN_INTERNAL_ERROR;
@@ -928,6 +993,8 @@ int ha_myisam::check(THD* thd, HA_CHECK_OPT* check_opt)
(uint) (share->global_changed ? 1 : 0)))))
return HA_ADMIN_ALREADY_DONE;
+ setup_vcols_for_repair(param);
+
error = chk_status(param, file); // Not fatal
error = chk_size(param, file);
if (!error)
@@ -980,6 +1047,8 @@ int ha_myisam::check(THD* thd, HA_CHECK_OPT* check_opt)
file->update |= HA_STATE_CHANGED | HA_STATE_ROW_CHANGED;
}
+ restore_vcos_after_repair();
+
thd_proc_info(thd, old_proc_info);
return error ? HA_ADMIN_CORRUPT : HA_ADMIN_OK;
}
@@ -1013,6 +1082,8 @@ int ha_myisam::analyze(THD *thd, HA_CHECK_OPT* check_opt)
if (!(share->state.changed & STATE_NOT_ANALYZED))
return HA_ADMIN_ALREADY_DONE;
+ setup_vcols_for_repair(param);
+
error = chk_key(param, file);
if (!error)
{
@@ -1022,6 +1093,9 @@ int ha_myisam::analyze(THD *thd, HA_CHECK_OPT* check_opt)
}
else if (!mi_is_crashed(file) && !thd->killed)
mi_mark_crashed(file);
+
+ restore_vcos_after_repair();
+
return error ? HA_ADMIN_CORRUPT : HA_ADMIN_OK;
}
@@ -1044,6 +1118,9 @@ int ha_myisam::repair(THD* thd, HA_CHECK_OPT *check_opt)
param->sort_buffer_length= THDVAR(thd, sort_buffer_size);
param->backup_time= check_opt->start_time;
start_records=file->state->records;
+
+ setup_vcols_for_repair(param);
+
while ((error=repair(thd,*param,0)) && param->retry_repair)
{
param->retry_repair=0;
@@ -1058,15 +1135,18 @@ int ha_myisam::repair(THD* thd, HA_CHECK_OPT *check_opt)
continue;
}
param->testflag&= ~T_QUICK;
- if ((param->testflag & T_REP_BY_SORT))
+ if ((param->testflag & (T_REP_BY_SORT | T_REP_PARALLEL)))
{
- param->testflag= (param->testflag & ~T_REP_BY_SORT) | T_REP;
+ param->testflag= (param->testflag & ~T_REP_ANY) | T_REP;
sql_print_information("Retrying repair of: '%s' with keycache",
table->s->path.str);
continue;
}
break;
}
+
+ restore_vcos_after_repair();
+
if (!error && start_records != file->state->records &&
!(check_opt->flags & T_VERY_SILENT))
{
@@ -1093,6 +1173,9 @@ int ha_myisam::optimize(THD* thd, HA_CHECK_OPT *check_opt)
T_REP_BY_SORT | T_STATISTICS | T_SORT_INDEX);
param->tmpfile_createflag= O_RDWR | O_TRUNC;
param->sort_buffer_length= THDVAR(thd, sort_buffer_size);
+
+ setup_vcols_for_repair(param);
+
if ((error= repair(thd,*param,1)) && param->retry_repair)
{
sql_print_warning("Warning: Optimize table got errno %d on %s.%s, retrying",
@@ -1100,6 +1183,9 @@ int ha_myisam::optimize(THD* thd, HA_CHECK_OPT *check_opt)
param->testflag&= ~T_REP_BY_SORT;
error= repair(thd,*param,1);
}
+
+ restore_vcos_after_repair();
+
return error;
}
@@ -1125,9 +1211,6 @@ int ha_myisam::repair(THD *thd, HA_CHECK &param, bool do_optimize)
share->state.dupp_key= MI_MAX_KEY;
strmov(fixed_name,file->filename);
- // Release latches since this can take a long time
- ha_release_temporary_latches(thd);
-
/*
Don't lock tables if we have used LOCK TABLE or if we come from
enable_index()
@@ -1163,6 +1246,11 @@ int ha_myisam::repair(THD *thd, HA_CHECK &param, bool do_optimize)
if (remap)
mi_munmap_file(file);
#endif
+ /*
+ The following is to catch errors when my_errno is no set properly
+ during repairt
+ */
+ my_errno= 0;
if (mi_test_if_sort_rep(file,file->state->records,tmp_key_map,0) &&
(local_testflag & T_REP_BY_SORT))
{
@@ -1185,8 +1273,11 @@ int ha_myisam::repair(THD *thd, HA_CHECK &param, bool do_optimize)
}
if (error && file->create_unique_index_by_sort &&
share->state.dupp_key != MAX_KEY)
+ {
+ my_errno= HA_ERR_FOUND_DUPP_KEY;
print_keydup_error(table, &table->key_info[share->state.dupp_key],
MYF(0));
+ }
}
else
{
@@ -1513,6 +1604,9 @@ int ha_myisam::enable_indexes(uint mode)
param->sort_buffer_length= THDVAR(thd, sort_buffer_size);
param->stats_method= (enum_handler_stats_method)THDVAR(thd, stats_method);
param->tmpdir=&mysql_tmpdir_list;
+
+ setup_vcols_for_repair(param);
+
if ((error= (repair(thd,*param,0) != HA_ADMIN_OK)) && param->retry_repair)
{
sql_print_warning("Warning: Enabling keys got errno %d on %s.%s, retrying",
@@ -1538,6 +1632,8 @@ int ha_myisam::enable_indexes(uint mode)
}
info(HA_STATUS_CONST);
thd_proc_info(thd, save_proc_info);
+
+ restore_vcos_after_repair();
}
else
{
@@ -2028,14 +2124,14 @@ int ha_myisam::create(const char *name, TABLE *table_arg,
TABLE_SHARE *share= table_arg->s;
uint options= share->db_options_in_use;
DBUG_ENTER("ha_myisam::create");
- for (i= 0; i < share->keys; i++)
- {
+
+ for (i= 0; i < share->virtual_fields && !create_flags; i++)
+ if (table_arg->vfield[i]->flags & PART_KEY_FLAG)
+ create_flags|= HA_CREATE_RELIES_ON_SQL_LAYER;
+ for (i= 0; i < share->keys && !create_flags; i++)
if (table_arg->key_info[i].flags & HA_USES_PARSER)
- {
create_flags|= HA_CREATE_RELIES_ON_SQL_LAYER;
- break;
- }
- }
+
if ((error= table2myisam(table_arg, &keydef, &recinfo, &record_count)))
DBUG_RETURN(error); /* purecov: inspected */
bzero((char*) &create_info, sizeof(create_info));
@@ -2206,14 +2302,20 @@ ha_myisam::check_if_supported_inplace_alter(TABLE *new_table,
{
DBUG_ENTER("ha_myisam::check_if_supported_inplace_alter");
- const uint readd_index= Alter_inplace_info::ADD_INDEX |
+ const Alter_inplace_info::HA_ALTER_FLAGS readd_index=
+ Alter_inplace_info::ADD_INDEX |
Alter_inplace_info::DROP_INDEX;
- const uint readd_unique= Alter_inplace_info::ADD_UNIQUE_INDEX |
- Alter_inplace_info::DROP_UNIQUE_INDEX;
- const uint readd_pk= Alter_inplace_info::ADD_PK_INDEX |
- Alter_inplace_info::DROP_PK_INDEX;
+ const Alter_inplace_info::HA_ALTER_FLAGS readd_unique=
+ Alter_inplace_info::ADD_UNIQUE_INDEX |
+ Alter_inplace_info::DROP_UNIQUE_INDEX;
+ const Alter_inplace_info::HA_ALTER_FLAGS readd_pk=
+ Alter_inplace_info::ADD_PK_INDEX |
+ Alter_inplace_info::DROP_PK_INDEX;
+
+ const Alter_inplace_info::HA_ALTER_FLAGS op= alter_info->handler_flags;
- const uint op= alter_info->handler_flags;
+ if (op & Alter_inplace_info::ALTER_COLUMN_VCOL)
+ DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
/*
ha_myisam::open() updates table->key_info->block_size to be the actual
diff --git a/storage/myisam/ha_myisam.h b/storage/myisam/ha_myisam.h
index 8890a4c9b2a..0fe1592543d 100644
--- a/storage/myisam/ha_myisam.h
+++ b/storage/myisam/ha_myisam.h
@@ -48,6 +48,8 @@ class ha_myisam: public handler
char *data_file_name, *index_file_name;
bool can_enable_indexes;
int repair(THD *thd, HA_CHECK &param, bool optimize);
+ void setup_vcols_for_repair(HA_CHECK *param);
+ void restore_vcos_after_repair();
public:
ha_myisam(handlerton *hton, TABLE_SHARE *table_arg);
diff --git a/storage/myisam/mi_check.c b/storage/myisam/mi_check.c
index 62f72078a9e..b18ffb99a11 100644
--- a/storage/myisam/mi_check.c
+++ b/storage/myisam/mi_check.c
@@ -1190,6 +1190,8 @@ int chk_data_link(HA_CHECK *param, MI_INFO *info, my_bool extend)
DBUG_ASSERT(0); /* Impossible */
break;
} /* switch */
+ if (param->fix_record)
+ param->fix_record(info, record, -1);
if (! got_error)
{
intern_record_checksum+=(ha_checksum) start_recpos;
@@ -2207,7 +2209,7 @@ int mi_repair_by_sort(HA_CHECK *param, register MI_INFO *info,
printf("- recovering (with sort) MyISAM-table '%s'\n",name);
printf("Data records: %s\n", llstr(start_records,llbuff));
}
- param->testflag|=T_REP; /* for easy checking */
+ param->testflag|=T_REP_BY_SORT; /* for easy checking */
param->retry_repair= 0;
param->warning_printed= param->error_printed= param->note_printed= 0;
@@ -2637,7 +2639,7 @@ int mi_repair_parallel(HA_CHECK *param, register MI_INFO *info,
printf("- parallel recovering (with sort) MyISAM-table '%s'\n",name);
printf("Data records: %s\n", llstr(start_records,llbuff));
}
- param->testflag|=T_REP; /* for easy checking */
+ param->testflag|=T_REP_PARALLEL; /* for easy checking */
param->retry_repair= 0;
param->warning_printed= 0;
param->error_printed= 0;
@@ -2781,7 +2783,7 @@ int mi_repair_parallel(HA_CHECK *param, register MI_INFO *info,
del=info->state->del;
param->glob_crc=0;
/* for compressed tables */
- max_pack_reclength= share->base.pack_reclength;
+ max_pack_reclength= MY_MAX(share->base.pack_reclength, share->vreclength);
if (share->options & HA_OPTION_COMPRESS_RECORD)
set_if_bigger(max_pack_reclength, share->max_pack_length);
if (!(sort_param=(MI_SORT_PARAM *)
@@ -2914,8 +2916,8 @@ int mi_repair_parallel(HA_CHECK *param, register MI_INFO *info,
*/
sort_param[i].read_cache= ((rep_quick || !i) ? param->read_cache :
new_data_cache);
- DBUG_PRINT("io_cache_share", ("thread: %u read_cache: 0x%lx",
- i, (long) &sort_param[i].read_cache));
+ DBUG_PRINT("io_cache_share", ("thread: %u read_cache: %p",
+ i, &sort_param[i].read_cache));
/*
two approaches: the same amount of memory for each thread
@@ -3126,6 +3128,7 @@ static int sort_key_read(MI_SORT_PARAM *sort_param, void *key)
}
if (info->state->records == sort_info->max_records)
{
+ my_errno= HA_ERR_WRONG_IN_RECORD;
mi_check_print_error(sort_info->param,
"Key %d - Found too many records; Can't continue",
sort_param->key+1);
@@ -3270,12 +3273,9 @@ static int sort_get_next_record(MI_SORT_PARAM *sort_param)
sort_param->max_pos=(sort_param->pos+=share->base.pack_reclength);
if (*sort_param->record)
{
- if (sort_param->calc_checksum)
- param->glob_crc+= (info->checksum=
- (*info->s->calc_check_checksum)(info,
- sort_param->
- record));
- DBUG_RETURN(0);
+ if (sort_param->calc_checksum)
+ info->checksum= (*info->s->calc_check_checksum)(info, sort_param->record);
+ goto finish;
}
if (!sort_param->fix_datafile && sort_param->master)
{
@@ -3335,6 +3335,7 @@ static int sort_get_next_record(MI_SORT_PARAM *sort_param)
param->error_printed=1;
param->retry_repair=1;
param->testflag|=T_RETRY_WITHOUT_QUICK;
+ my_errno= HA_ERR_WRONG_IN_RECORD;
DBUG_RETURN(1); /* Something wrong with data */
}
b_type=_mi_get_block_info(&block_info,-1,pos);
@@ -3557,7 +3558,7 @@ static int sort_get_next_record(MI_SORT_PARAM *sort_param)
if (sort_param->calc_checksum)
info->checksum= (*info->s->calc_check_checksum)(info,
sort_param->record);
- if ((param->testflag & (T_EXTEND | T_REP)) || searching)
+ if ((param->testflag & (T_EXTEND | T_REP_ANY)) || searching)
{
if (_mi_rec_check(info, sort_param->record, sort_param->rec_buff,
sort_param->find_length,
@@ -3570,9 +3571,7 @@ static int sort_get_next_record(MI_SORT_PARAM *sort_param)
goto try_next;
}
}
- if (sort_param->calc_checksum)
- param->glob_crc+= info->checksum;
- DBUG_RETURN(0);
+ goto finish;
}
if (!searching)
mi_check_print_info(param,"Key %d - Found wrong stored record at %s",
@@ -3595,6 +3594,7 @@ static int sort_get_next_record(MI_SORT_PARAM *sort_param)
param->error_printed=1;
param->retry_repair=1;
param->testflag|=T_RETRY_WITHOUT_QUICK;
+ my_errno= HA_ERR_WRONG_IN_RECORD;
DBUG_RETURN(1); /* Something wrong with data */
}
sort_param->start_recpos=sort_param->pos;
@@ -3641,11 +3641,8 @@ static int sort_get_next_record(MI_SORT_PARAM *sort_param)
block_info.rec_len);
info->packed_length=block_info.rec_len;
if (sort_param->calc_checksum)
- param->glob_crc+= (info->checksum=
- (*info->s->calc_check_checksum)(info,
- sort_param->
- record));
- DBUG_RETURN(0);
+ info->checksum= (*info->s->calc_check_checksum)(info, sort_param->record);
+ goto finish;
}
default:
DBUG_ASSERT(0); /* Impossible */
@@ -3653,6 +3650,14 @@ static int sort_get_next_record(MI_SORT_PARAM *sort_param)
}
DBUG_ASSERT(0); /* Impossible */
DBUG_RETURN(1); /* Impossible */
+finish:
+ if (sort_param->calc_checksum)
+ param->glob_crc+= info->checksum;
+ if (param->fix_record)
+ param->fix_record(info, sort_param->record,
+ param->testflag & T_REP_BY_SORT ? (int)sort_param->key
+ : -1);
+ DBUG_RETURN(0);
}
@@ -3941,7 +3946,7 @@ static int sort_ft_key_write(MI_SORT_PARAM *sort_param, const void *a)
if (ha_compare_text(sort_param->seg->charset,
((uchar *)a)+1,a_len-1,
- (uchar*) ft_buf->lastkey+1,val_off-1, 0, 0)==0)
+ (uchar*) ft_buf->lastkey+1,val_off-1, 0)==0)
{
if (!ft_buf->buf) /* store in second-level tree */
{
@@ -3963,7 +3968,7 @@ static int sort_ft_key_write(MI_SORT_PARAM *sort_param, const void *a)
key_block++;
sort_info->key_block=key_block;
sort_param->keyinfo=& sort_info->info->s->ft2_keyinfo;
- ft_buf->count=((uchar*) ft_buf->buf - p)/val_len;
+ ft_buf->count=(int)((uchar*) ft_buf->buf - p)/val_len;
/* flushing buffer to second-level tree */
for (error=0; !error && p < (uchar*) ft_buf->buf; p+= val_len)
@@ -4515,7 +4520,7 @@ void update_auto_increment_key(HA_CHECK *param, MI_INFO *info,
DBUG_VOID_RETURN;
}
if (!(param->testflag & T_SILENT) &&
- !(param->testflag & T_REP))
+ !(param->testflag & T_REP_ANY))
printf("Updating MyISAM file: %s\n", param->isam_file_name);
/*
We have to use an allocated buffer instead of info->rec_buff as
@@ -4790,8 +4795,7 @@ static int replace_data_file(HA_CHECK *param, MI_INFO *info, File new_file)
*/
if (info->s->file_map)
{
- (void) my_munmap((char*) info->s->file_map,
- (size_t) info->s->mmaped_length);
+ (void) my_munmap((char*) info->s->file_map, info->s->mmaped_length);
info->s->file_map= NULL;
}
diff --git a/storage/myisam/mi_close.c b/storage/myisam/mi_close.c
index 8e6515f112c..ca5c7355812 100644
--- a/storage/myisam/mi_close.c
+++ b/storage/myisam/mi_close.c
@@ -20,15 +20,15 @@
to open other files during the time we flush the cache and close this file
*/
-#include "myisamdef.h"
+#include "ftdefs.h"
int mi_close(register MI_INFO *info)
{
int error=0,flag;
MYISAM_SHARE *share=info->s;
DBUG_ENTER("mi_close");
- DBUG_PRINT("enter",("base: 0x%lx reopen: %u locks: %u",
- (long) info, (uint) share->reopen,
+ DBUG_PRINT("enter",("base: %p reopen: %u locks: %u",
+ info, (uint) share->reopen,
(uint) share->tot_locks));
if (info->open_list.data)
@@ -60,16 +60,15 @@ int mi_close(register MI_INFO *info)
mysql_mutex_unlock(&share->intern_lock);
my_free(mi_get_rec_buff_ptr(info, info->rec_buff));
+ ftparser_call_deinitializer(info);
+
if (flag)
{
DBUG_EXECUTE_IF("crash_before_flush_keys",
if (share->kfile >= 0) DBUG_ABORT(););
if (share->kfile >= 0 &&
- flush_key_blocks(share->key_cache, share->kfile,
- &share->dirty_part_map,
- ((share->temporary || share->deleting) ?
- FLUSH_IGNORE_CHANGED :
- FLUSH_RELEASE)))
+ flush_key_blocks(share->key_cache, share->kfile, &share->dirty_part_map,
+ share->deleting ? FLUSH_IGNORE_CHANGED : FLUSH_RELEASE))
error=my_errno;
if (share->kfile >= 0)
{
@@ -77,10 +76,14 @@ int mi_close(register MI_INFO *info)
If we are crashed, we can safely flush the current state as it will
not change the crashed state.
We can NOT write the state in other cases as other threads
- may be using the file at this point
- IF using --external-locking.
+ may be using the file at this point IF using --external-locking.
+
+ Also, write the state if a temporary table is not being dropped
+ (the server might want to reopen it, and mi_lock_database() only
+ writes the state for non-temp ones)
*/
- if (share->mode != O_RDONLY && mi_is_crashed(info))
+ if (share->mode != O_RDONLY &&
+ (mi_is_crashed(info) || (share->temporary && !share->deleting)))
mi_state_info_write(share->kfile, &share->state, 1);
/* Decrement open count must be last I/O on this file. */
_mi_decrement_open_count(info);
diff --git a/storage/myisam/mi_create.c b/storage/myisam/mi_create.c
index 855ac8337b9..fd230698acc 100644
--- a/storage/myisam/mi_create.c
+++ b/storage/myisam/mi_create.c
@@ -47,7 +47,7 @@ int mi_create(const char *name,uint keys,MI_KEYDEF *keydefs,
uint internal_table= flags & HA_CREATE_INTERNAL_TABLE;
ulong reclength, real_reclength,min_pack_length;
char kfilename[FN_REFLEN],klinkname[FN_REFLEN], *klinkname_ptr;
- char dfilename[FN_REFLEN],dlinkname[FN_REFLEN], *dlinkname_ptr;
+ char dfilename[FN_REFLEN],dlinkname[FN_REFLEN], *dlinkname_ptr= 0;
ulong pack_reclength;
ulonglong tot_length,max_rows, tmp;
enum en_fieldtype type;
@@ -274,7 +274,7 @@ int mi_create(const char *name,uint keys,MI_KEYDEF *keydefs,
so we only need to decrease keydef->keysegs.
(see recreate_table() in mi_check.c)
*/
- keydef->keysegs-=sp_segs-1;
+ keydef->keysegs= 1;
}
for (j=0, keyseg=keydef->seg ; (int) j < keydef->keysegs ;
@@ -288,7 +288,8 @@ int mi_create(const char *name,uint keys,MI_KEYDEF *keydefs,
goto err_no_lock;
}
}
- keydef->keysegs+=sp_segs;
+ DBUG_ASSERT(keydef->keysegs == 1);
+ keydef->keysegs= sp_segs + 1;
key_length+=SPLEN*sp_segs;
length++; /* At least one length byte */
min_key_length_skip+=SPLEN*2*SPDIMS;
diff --git a/storage/myisam/mi_delete.c b/storage/myisam/mi_delete.c
index ca8c58bdc37..2c829fa9860 100644
--- a/storage/myisam/mi_delete.c
+++ b/storage/myisam/mi_delete.c
@@ -410,8 +410,8 @@ static int del(register MI_INFO *info, register MI_KEYDEF *keyinfo, uchar *key,
MYISAM_SHARE *share=info->s;
MI_KEY_PARAM s_temp;
DBUG_ENTER("del");
- DBUG_PRINT("enter",("leaf_page: %ld keypos: 0x%lx", (long) leaf_page,
- (ulong) keypos));
+ DBUG_PRINT("enter",("leaf_page: %lld keypos: %p", leaf_page,
+ keypos));
DBUG_DUMP("leaf_buff",(uchar*) leaf_buff,mi_getint(leaf_buff));
endpos=leaf_buff+mi_getint(leaf_buff);
@@ -516,8 +516,8 @@ static int underflow(register MI_INFO *info, register MI_KEYDEF *keyinfo,
MI_KEY_PARAM s_temp;
MYISAM_SHARE *share=info->s;
DBUG_ENTER("underflow");
- DBUG_PRINT("enter",("leaf_page: %ld keypos: 0x%lx",(long) leaf_page,
- (ulong) keypos));
+ DBUG_PRINT("enter",("leaf_page: %lld keypos: %p",leaf_page,
+ keypos));
DBUG_DUMP("anc_buff",(uchar*) anc_buff,mi_getint(anc_buff));
DBUG_DUMP("leaf_buff",(uchar*) leaf_buff,mi_getint(leaf_buff));
@@ -597,8 +597,8 @@ static int underflow(register MI_INFO *info, register MI_KEYDEF *keyinfo,
else
{ /* Page is full */
endpos=anc_buff+anc_length;
- DBUG_PRINT("test",("anc_buff: 0x%lx endpos: 0x%lx",
- (long) anc_buff, (long) endpos));
+ DBUG_PRINT("test",("anc_buff: %p endpos: %p",
+ anc_buff, endpos));
if (keypos != anc_buff+2+key_reflength &&
!_mi_get_last_key(info,keyinfo,anc_buff,anc_key,keypos,&length))
goto err;
@@ -776,7 +776,7 @@ static uint remove_key(MI_KEYDEF *keyinfo, uint nod_flag,
int s_length;
uchar *start;
DBUG_ENTER("remove_key");
- DBUG_PRINT("enter",("keypos: 0x%lx page_end: 0x%lx",(long) keypos, (long) page_end));
+ DBUG_PRINT("enter",("keypos: %p page_end: %p",keypos, page_end));
start=keypos;
if (!(keyinfo->flag &
diff --git a/storage/myisam/mi_delete_all.c b/storage/myisam/mi_delete_all.c
index 31801399c7a..4bfe0e8d66c 100644
--- a/storage/myisam/mi_delete_all.c
+++ b/storage/myisam/mi_delete_all.c
@@ -62,6 +62,10 @@ int mi_delete_all_rows(MI_INFO *info)
if (mysql_file_chsize(info->dfile, 0, 0, MYF(MY_WME)) ||
mysql_file_chsize(share->kfile, share->base.keystart, 0, MYF(MY_WME)))
goto err;
+
+ if (info->opt_flag & WRITE_CACHE_USED)
+ reinit_io_cache(&info->rec_cache, WRITE_CACHE, 0, 1, 1);
+
(void) _mi_writeinfo(info,WRITEINFO_UPDATE_KEYFILE);
DBUG_RETURN(0);
diff --git a/storage/myisam/mi_dynrec.c b/storage/myisam/mi_dynrec.c
index af1765b7e86..df5e7a11c01 100644
--- a/storage/myisam/mi_dynrec.c
+++ b/storage/myisam/mi_dynrec.c
@@ -95,7 +95,7 @@ my_bool mi_dynmap_file(MI_INFO *info, my_off_t size)
#if defined(HAVE_MADVISE)
madvise((char*) info->s->file_map, size, MADV_RANDOM);
#endif
- info->s->mmaped_length= size;
+ info->s->mmaped_length= (size_t) size;
info->s->file_read= mi_mmap_pread;
info->s->file_write= mi_mmap_pwrite;
DBUG_RETURN(0);
@@ -118,8 +118,7 @@ int mi_munmap_file(MI_INFO *info)
{
int ret;
DBUG_ENTER("mi_unmap_file");
- if ((ret= my_munmap((void*) info->s->file_map,
- (size_t) info->s->mmaped_length)))
+ if ((ret= my_munmap((void*) info->s->file_map, info->s->mmaped_length)))
DBUG_RETURN(ret);
info->s->file_read= mi_nommap_pread;
info->s->file_write= mi_nommap_pwrite;
@@ -1344,8 +1343,8 @@ ulong _mi_rec_unpack(register MI_INFO *info, register uchar *to, uchar *from,
err:
my_errno= HA_ERR_WRONG_IN_RECORD;
- DBUG_PRINT("error",("to_end: 0x%lx -> 0x%lx from_end: 0x%lx -> 0x%lx",
- (long) to, (long) to_end, (long) from, (long) from_end));
+ DBUG_PRINT("error",("to_end: %p -> %p from_end: %p -> %p",
+ to, to_end, from, from_end));
DBUG_DUMP("from",(uchar*) info->rec_buff,info->s->base.min_pack_length);
DBUG_RETURN(MY_FILE_ERROR);
} /* _mi_rec_unpack */
diff --git a/storage/myisam/mi_extra.c b/storage/myisam/mi_extra.c
index 3514bc5a66e..27e08f093ff 100644
--- a/storage/myisam/mi_extra.c
+++ b/storage/myisam/mi_extra.c
@@ -260,11 +260,14 @@ int mi_extra(MI_INFO *info, enum ha_extra_function function, void *extra_arg)
break;
case HA_EXTRA_PREPARE_FOR_DROP:
/* Signals about intent to delete this table */
- //share->deleting= TRUE;
+ share->deleting= TRUE;
share->global_changed= FALSE; /* force writing changed flag */
_mi_mark_file_changed(info);
+ if (share->temporary)
+ break;
/* fall through */
case HA_EXTRA_PREPARE_FOR_RENAME:
+ DBUG_ASSERT(!share->temporary);
mysql_mutex_lock(&THR_LOCK_myisam);
share->last_version= 0L; /* Impossible version */
mysql_mutex_lock(&share->intern_lock);
diff --git a/storage/myisam/mi_key.c b/storage/myisam/mi_key.c
index c81bc674685..9247fae9e3c 100644
--- a/storage/myisam/mi_key.c
+++ b/storage/myisam/mi_key.c
@@ -150,7 +150,6 @@ uint _mi_make_key(register MI_INFO *info, uint keynr, uchar *key,
}
else if (keyseg->flag & HA_SWAP_KEY)
{ /* Numerical column */
-#ifdef HAVE_ISNAN
if (type == HA_KEYTYPE_FLOAT)
{
float nr;
@@ -174,7 +173,6 @@ uint _mi_make_key(register MI_INFO *info, uint keynr, uchar *key,
continue;
}
}
-#endif
pos+=length;
while (length--)
{
diff --git a/storage/myisam/mi_open.c b/storage/myisam/mi_open.c
index c1bf4f15ccb..ce028f3440d 100644
--- a/storage/myisam/mi_open.c
+++ b/storage/myisam/mi_open.c
@@ -346,21 +346,22 @@ MI_INFO *mi_open(const char *name, int mode, uint open_flags)
strmov(share->index_file_name, index_name);
strmov(share->data_file_name, data_name);
+ share->vreclength= share->base.reclength;
share->blocksize=MY_MIN(IO_SIZE,myisam_block_size);
{
HA_KEYSEG *pos=share->keyparts;
uint32 ftkey_nr= 1;
for (i=0 ; i < keys ; i++)
{
- share->keyinfo[i].share= share;
- disk_pos=mi_keydef_read(disk_pos, &share->keyinfo[i]);
- disk_pos_assert(disk_pos + share->keyinfo[i].keysegs * HA_KEYSEG_SIZE,
- end_pos);
- if (share->keyinfo[i].key_alg == HA_KEY_ALG_RTREE)
+ MI_KEYDEF *keyinfo= share->keyinfo + i;
+ keyinfo->share= share;
+ disk_pos=mi_keydef_read(disk_pos, keyinfo);
+ disk_pos_assert(disk_pos + keyinfo->keysegs * HA_KEYSEG_SIZE, end_pos);
+ if (keyinfo->key_alg == HA_KEY_ALG_RTREE)
have_rtree=1;
- set_if_smaller(share->blocksize,share->keyinfo[i].block_length);
- share->keyinfo[i].seg=pos;
- for (j=0 ; j < share->keyinfo[i].keysegs; j++,pos++)
+ set_if_smaller(share->blocksize, keyinfo->block_length);
+ keyinfo->seg= pos;
+ for (j=0 ; j < keyinfo->keysegs; j++,pos++)
{
disk_pos=mi_keyseg_read(disk_pos, pos);
if (pos->flag & HA_BLOB_PART &&
@@ -384,35 +385,30 @@ MI_INFO *mi_open(const char *name, int mode, uint open_flags)
}
else if (pos->type == HA_KEYTYPE_BINARY)
pos->charset= &my_charset_bin;
- if (!(share->keyinfo[i].flag & HA_SPATIAL) &&
- pos->start > share->base.reclength)
- {
- my_errno= HA_ERR_CRASHED;
- goto err;
- }
}
- if (share->keyinfo[i].flag & HA_SPATIAL)
+ if (keyinfo->flag & HA_SPATIAL)
{
#ifdef HAVE_SPATIAL
- uint sp_segs=SPDIMS*2;
- share->keyinfo[i].seg=pos-sp_segs;
- share->keyinfo[i].keysegs--;
+ uint sp_segs= SPDIMS*2;
+ keyinfo->seg= pos - sp_segs;
+ DBUG_ASSERT(keyinfo->keysegs == sp_segs + 1);
+ keyinfo->keysegs= sp_segs;
#else
my_errno=HA_ERR_UNSUPPORTED;
goto err;
#endif
}
- else if (share->keyinfo[i].flag & HA_FULLTEXT)
+ else if (keyinfo->flag & HA_FULLTEXT)
{
if (!fulltext_keys)
{ /* 4.0 compatibility code, to be removed in 5.0 */
- share->keyinfo[i].seg=pos-FT_SEGS;
- share->keyinfo[i].keysegs-=FT_SEGS;
+ keyinfo->seg= pos - FT_SEGS;
+ keyinfo->keysegs-= FT_SEGS;
}
else
{
uint k;
- share->keyinfo[i].seg=pos;
+ keyinfo->seg= pos;
for (k=0; k < FT_SEGS; k++)
{
*pos= ft_keysegs[k];
@@ -427,7 +423,7 @@ MI_INFO *mi_open(const char *name, int mode, uint open_flags)
}
if (!share->ft2_keyinfo.seg)
{
- memcpy(& share->ft2_keyinfo, & share->keyinfo[i], sizeof(MI_KEYDEF));
+ memcpy(& share->ft2_keyinfo, keyinfo, sizeof(MI_KEYDEF));
share->ft2_keyinfo.keysegs=1;
share->ft2_keyinfo.flag=0;
share->ft2_keyinfo.keylength=
@@ -437,10 +433,10 @@ MI_INFO *mi_open(const char *name, int mode, uint open_flags)
share->ft2_keyinfo.end=pos;
setup_key_functions(& share->ft2_keyinfo);
}
- share->keyinfo[i].ftkey_nr= ftkey_nr++;
+ keyinfo->ftkey_nr= ftkey_nr++;
}
- setup_key_functions(share->keyinfo+i);
- share->keyinfo[i].end=pos;
+ setup_key_functions(keyinfo);
+ keyinfo->end= pos;
pos->type=HA_KEYTYPE_END; /* End */
pos->length=share->base.rec_reflength;
pos->null_bit=0;
@@ -752,6 +748,7 @@ uchar *mi_alloc_rec_buff(MI_INFO *info, ulong length, uchar **buf)
else
length= info->s->base.pack_reclength;
length= MY_MAX(length, info->s->base.max_key_length);
+ length= MY_MAX(length, info->s->vreclength);
/* Avoid unnecessary realloc */
if (newptr && length == old_length)
return newptr;
@@ -764,7 +761,7 @@ uchar *mi_alloc_rec_buff(MI_INFO *info, ulong length, uchar **buf)
newptr-= MI_REC_BUFF_OFFSET;
if (!(newptr=(uchar*) my_realloc((uchar*)newptr, length+extra+8,
MYF(MY_ALLOW_ZERO_PTR))))
- return newptr;
+ return NULL;
*((uint32 *) newptr)= (uint32) length;
*buf= newptr+(extra ? MI_REC_BUFF_OFFSET : 0);
}
@@ -1387,4 +1384,3 @@ int mi_indexes_are_disabled(MI_INFO *info)
*/
return 2;
}
-
diff --git a/storage/myisam/mi_packrec.c b/storage/myisam/mi_packrec.c
index d03826b55a3..572fe690da8 100644
--- a/storage/myisam/mi_packrec.c
+++ b/storage/myisam/mi_packrec.c
@@ -1,4 +1,5 @@
/* Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved.
+ Copyright (c) 2020, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -1111,10 +1112,10 @@ static void decode_bytes(MI_COLUMNDEF *rec,MI_BIT_BUFF *bit_buff,uchar *to,
bit_buff->error=1;
return; /* Can't be right */
}
- bit_buff->current_byte= (bit_buff->current_byte << 32) +
- ((((uint) bit_buff->pos[3])) +
- (((uint) bit_buff->pos[2]) << 8) +
- (((uint) bit_buff->pos[1]) << 16) +
+ bit_buff->current_byte= (bit_buff->current_byte << 32) |
+ ((((uint) bit_buff->pos[3])) |
+ (((uint) bit_buff->pos[2]) << 8) |
+ (((uint) bit_buff->pos[1]) << 16) |
(((uint) bit_buff->pos[0]) << 24));
bit_buff->pos+=4;
bits+=32;
@@ -1205,23 +1206,23 @@ static void decode_bytes(MI_COLUMNDEF *rec, MI_BIT_BUFF *bit_buff, uchar *to,
return; /* Can't be right */
}
#if BITS_SAVED == 32
- bit_buff->current_byte= (bit_buff->current_byte << 24) +
- (((uint) ((uchar) bit_buff->pos[2]))) +
- (((uint) ((uchar) bit_buff->pos[1])) << 8) +
+ bit_buff->current_byte= (bit_buff->current_byte << 24) |
+ (((uint) ((uchar) bit_buff->pos[2]))) |
+ (((uint) ((uchar) bit_buff->pos[1])) << 8) |
(((uint) ((uchar) bit_buff->pos[0])) << 16);
bit_buff->pos+=3;
bits+=24;
#else
if (bits) /* We must have at leasts 9 bits */
{
- bit_buff->current_byte= (bit_buff->current_byte << 8) +
+ bit_buff->current_byte= (bit_buff->current_byte << 8) |
(uint) ((uchar) bit_buff->pos[0]);
bit_buff->pos++;
bits+=8;
}
else
{
- bit_buff->current_byte= ((uint) ((uchar) bit_buff->pos[0]) << 8) +
+ bit_buff->current_byte= ((uint) ((uchar) bit_buff->pos[0]) << 8) |
((uint) ((uchar) bit_buff->pos[1]));
bit_buff->pos+=2;
bits+=16;
@@ -1245,14 +1246,14 @@ static void decode_bytes(MI_COLUMNDEF *rec, MI_BIT_BUFF *bit_buff, uchar *to,
if (bits < 8)
{ /* We don't need to check end */
#if BITS_SAVED == 32
- bit_buff->current_byte= (bit_buff->current_byte << 24) +
- (((uint) ((uchar) bit_buff->pos[2]))) +
- (((uint) ((uchar) bit_buff->pos[1])) << 8) +
+ bit_buff->current_byte= (bit_buff->current_byte << 24) |
+ (((uint) ((uchar) bit_buff->pos[2]))) |
+ (((uint) ((uchar) bit_buff->pos[1])) << 8) |
(((uint) ((uchar) bit_buff->pos[0])) << 16);
bit_buff->pos+=3;
bits+=24;
#else
- bit_buff->current_byte= (bit_buff->current_byte << 8) +
+ bit_buff->current_byte= (bit_buff->current_byte << 8) |
(uint) ((uchar) bit_buff->pos[0]);
bit_buff->pos+=1;
bits+=8;
@@ -1439,25 +1440,25 @@ static void fill_buffer(MI_BIT_BUFF *bit_buff)
}
#if BITS_SAVED == 64
- bit_buff->current_byte= ((((uint) ((uchar) bit_buff->pos[7]))) +
- (((uint) ((uchar) bit_buff->pos[6])) << 8) +
- (((uint) ((uchar) bit_buff->pos[5])) << 16) +
- (((uint) ((uchar) bit_buff->pos[4])) << 24) +
+ bit_buff->current_byte= ((((uint) ((uchar) bit_buff->pos[7]))) |
+ (((uint) ((uchar) bit_buff->pos[6])) << 8) |
+ (((uint) ((uchar) bit_buff->pos[5])) << 16) |
+ (((uint) ((uchar) bit_buff->pos[4])) << 24) |
((ulonglong)
- ((((uint) ((uchar) bit_buff->pos[3]))) +
- (((uint) ((uchar) bit_buff->pos[2])) << 8) +
- (((uint) ((uchar) bit_buff->pos[1])) << 16) +
+ ((((uint) ((uchar) bit_buff->pos[3]))) |
+ (((uint) ((uchar) bit_buff->pos[2])) << 8) |
+ (((uint) ((uchar) bit_buff->pos[1])) << 16) |
(((uint) ((uchar) bit_buff->pos[0])) << 24)) << 32));
bit_buff->pos+=8;
#else
#if BITS_SAVED == 32
- bit_buff->current_byte= (((uint) ((uchar) bit_buff->pos[3])) +
- (((uint) ((uchar) bit_buff->pos[2])) << 8) +
- (((uint) ((uchar) bit_buff->pos[1])) << 16) +
+ bit_buff->current_byte= (((uint) ((uchar) bit_buff->pos[3])) |
+ (((uint) ((uchar) bit_buff->pos[2])) << 8) |
+ (((uint) ((uchar) bit_buff->pos[1])) << 16) |
(((uint) ((uchar) bit_buff->pos[0])) << 24));
bit_buff->pos+=4;
#else
- bit_buff->current_byte= (uint) (((uint) ((uchar) bit_buff->pos[1]))+
+ bit_buff->current_byte= (uint) (((uint) ((uchar) bit_buff->pos[1])) |
(((uint) ((uchar) bit_buff->pos[0])) << 8));
bit_buff->pos+=2;
#endif
@@ -1551,7 +1552,7 @@ void _mi_unmap_file(MI_INFO *info)
{
DBUG_ASSERT(info->s->options & HA_OPTION_COMPRESS_RECORD);
- (void) my_munmap((char*) info->s->file_map, (size_t) info->s->mmaped_length);
+ (void) my_munmap((char*) info->s->file_map, info->s->mmaped_length);
if (myisam_mmap_size != SIZE_T_MAX)
{
diff --git a/storage/myisam/mi_preload.c b/storage/myisam/mi_preload.c
index bd61540c912..1bf4452dccf 100644
--- a/storage/myisam/mi_preload.c
+++ b/storage/myisam/mi_preload.c
@@ -41,7 +41,7 @@
int mi_preload(MI_INFO *info, ulonglong key_map, my_bool ignore_leaves)
{
uint i;
- ulong length, block_length= 0;
+ size_t length, block_length= 0;
uchar *buff= NULL;
MYISAM_SHARE* share= info->s;
uint keys= share->state.header.keys;
@@ -68,7 +68,7 @@ int mi_preload(MI_INFO *info, ulonglong key_map, my_bool ignore_leaves)
}
}
else
- block_length= share->key_cache->param_block_size;
+ block_length= (size_t)share->key_cache->param_block_size;
length= info->preload_buff_size/block_length * block_length;
set_if_bigger(length, block_length);
@@ -84,7 +84,7 @@ int mi_preload(MI_INFO *info, ulonglong key_map, my_bool ignore_leaves)
{
/* Read the next block of index file into the preload buffer */
if ((my_off_t) length > (key_file_length-pos))
- length= (ulong) (key_file_length-pos);
+ length= (size_t) (key_file_length-pos);
if (mysql_file_pread(share->kfile, (uchar*) buff, length, pos,
MYF(MY_FAE|MY_FNABP)))
goto err;
diff --git a/storage/myisam/mi_rkey.c b/storage/myisam/mi_rkey.c
index fa56b811313..897138e4f62 100644
--- a/storage/myisam/mi_rkey.c
+++ b/storage/myisam/mi_rkey.c
@@ -32,8 +32,8 @@ int mi_rkey(MI_INFO *info, uchar *buf, int inx, const uchar *key,
uint pack_key_length, use_key_length, nextflag;
ICP_RESULT res= ICP_NO_MATCH;
DBUG_ENTER("mi_rkey");
- DBUG_PRINT("enter", ("base: 0x%lx buf: 0x%lx inx: %d search_flag: %d",
- (long) info, (long) buf, inx, search_flag));
+ DBUG_PRINT("enter", ("base: %p buf: %p inx: %d search_flag: %d",
+ info, buf, inx, search_flag));
if ((inx = _mi_check_index(info,inx)) < 0)
DBUG_RETURN(my_errno);
diff --git a/storage/myisam/mi_search.c b/storage/myisam/mi_search.c
index 040f9db6e12..14286e3591d 100644
--- a/storage/myisam/mi_search.c
+++ b/storage/myisam/mi_search.c
@@ -267,8 +267,8 @@ int _mi_seq_search(MI_INFO *info, register MI_KEYDEF *keyinfo, uchar *page,
mi_print_error(info->s, HA_ERR_CRASHED);
my_errno=HA_ERR_CRASHED;
DBUG_PRINT("error",
- ("Found wrong key: length: %u page: 0x%lx end: 0x%lx",
- length, (long) page, (long) end));
+ ("Found wrong key: length: %u page: %p end: %p",
+ length, page, end));
DBUG_RETURN(MI_FOUND_WRONG_KEY);
}
if ((flag=ha_key_cmp(keyinfo->seg,t_buff,key,key_len,comp_flag,
@@ -284,7 +284,7 @@ int _mi_seq_search(MI_INFO *info, register MI_KEYDEF *keyinfo, uchar *page,
if (flag == 0)
memcpy(buff,t_buff,length); /* Result is first key */
*last_key= page == end;
- DBUG_PRINT("exit",("flag: %d ret_pos: 0x%lx", flag, (long) *ret_pos));
+ DBUG_PRINT("exit",("flag: %d ret_pos: %p", flag, *ret_pos));
DBUG_RETURN(flag);
} /* _mi_seq_search */
@@ -419,8 +419,8 @@ int _mi_prefix_search(MI_INFO *info, register MI_KEYDEF *keyinfo, uchar *page,
mi_print_error(info->s, HA_ERR_CRASHED);
my_errno=HA_ERR_CRASHED;
DBUG_PRINT("error",
- ("Found wrong key: length: %u page: 0x%lx end: %lx",
- length, (long) page, (long) end));
+ ("Found wrong key: length: %u page: %p end: %p",
+ length, page, end));
DBUG_RETURN(MI_FOUND_WRONG_KEY);
}
@@ -554,7 +554,7 @@ int _mi_prefix_search(MI_INFO *info, register MI_KEYDEF *keyinfo, uchar *page,
*last_key= page == end;
- DBUG_PRINT("exit",("flag: %d ret_pos: 0x%lx", flag, (long) *ret_pos));
+ DBUG_PRINT("exit",("flag: %d ret_pos: %p", flag, *ret_pos));
DBUG_RETURN(flag);
} /* _mi_prefix_search */
@@ -816,8 +816,8 @@ uint _mi_get_pack_key(register MI_KEYDEF *keyinfo, uint nod_flag,
if (length > keyseg->length)
{
DBUG_PRINT("error",
- ("Found too long null packed key: %u of %u at 0x%lx",
- length, keyseg->length, (long) *page_pos));
+ ("Found too long null packed key: %u of %u at %p",
+ length, keyseg->length, *page_pos));
DBUG_DUMP("key", *page_pos, 16);
mi_print_error(keyinfo->share, HA_ERR_CRASHED);
my_errno=HA_ERR_CRASHED;
@@ -873,8 +873,8 @@ uint _mi_get_pack_key(register MI_KEYDEF *keyinfo, uint nod_flag,
}
if (length > (uint) keyseg->length)
{
- DBUG_PRINT("error",("Found too long packed key: %u of %u at 0x%lx",
- length, keyseg->length, (long) *page_pos));
+ DBUG_PRINT("error",("Found too long packed key: %u of %u at %p",
+ length, keyseg->length, *page_pos));
DBUG_DUMP("key", *page_pos, 16);
mi_print_error(keyinfo->share, HA_ERR_CRASHED);
my_errno=HA_ERR_CRASHED;
@@ -945,8 +945,8 @@ uint _mi_get_binary_pack_key(register MI_KEYDEF *keyinfo, uint nod_flag,
if (length > keyinfo->maxlength)
{
DBUG_PRINT("error",
- ("Found too long binary packed key: %u of %u at 0x%lx",
- length, keyinfo->maxlength, (long) *page_pos));
+ ("Found too long binary packed key: %u of %u at %p",
+ length, keyinfo->maxlength, *page_pos));
DBUG_DUMP("key", *page_pos, 16);
goto crashed; /* Wrong key */
}
@@ -1003,8 +1003,8 @@ uint _mi_get_binary_pack_key(register MI_KEYDEF *keyinfo, uint nod_flag,
length-=tmp;
from=page; from_end=page_end;
}
- DBUG_PRINT("info",("key: 0x%lx from: 0x%lx length: %u",
- (long) key, (long) from, length));
+ DBUG_PRINT("info",("key: %p from: %p length: %u",
+ key, from, length));
memmove((uchar*) key, (uchar*) from, (size_t) length);
key+=length;
from+=length;
@@ -1077,7 +1077,7 @@ uchar *_mi_get_key(MI_INFO *info, MI_KEYDEF *keyinfo, uchar *page,
}
}
}
- DBUG_PRINT("exit",("page: 0x%lx length: %u", (long) page,
+ DBUG_PRINT("exit",("page: %p length: %u", page,
*return_key_length));
DBUG_RETURN(page);
} /* _mi_get_key */
@@ -1130,8 +1130,8 @@ uchar *_mi_get_last_key(MI_INFO *info, MI_KEYDEF *keyinfo, uchar *page,
uint nod_flag;
uchar *lastpos;
DBUG_ENTER("_mi_get_last_key");
- DBUG_PRINT("enter",("page: 0x%lx endpos: 0x%lx", (long) page,
- (long) endpos));
+ DBUG_PRINT("enter",("page:%p endpos: %p", page,
+ endpos));
nod_flag=mi_test_if_nod(page);
if (! (keyinfo->flag & (HA_VAR_LENGTH_KEY | HA_BINARY_PACK_KEY)))
@@ -1151,15 +1151,15 @@ uchar *_mi_get_last_key(MI_INFO *info, MI_KEYDEF *keyinfo, uchar *page,
*return_key_length=(*keyinfo->get_key)(keyinfo,nod_flag,&page,lastkey);
if (*return_key_length == 0)
{
- DBUG_PRINT("error",("Couldn't find last key: page: 0x%lx",
- (long) page));
+ DBUG_PRINT("error",("Couldn't find last key: page: %p",
+ page));
mi_print_error(info->s, HA_ERR_CRASHED);
my_errno=HA_ERR_CRASHED;
DBUG_RETURN(0);
}
}
}
- DBUG_PRINT("exit",("lastpos: 0x%lx length: %u", (long) lastpos,
+ DBUG_PRINT("exit",("lastpos: %p length: %u", lastpos,
*return_key_length));
DBUG_RETURN(lastpos);
} /* _mi_get_last_key */
@@ -1245,9 +1245,9 @@ int _mi_search_next(register MI_INFO *info, register MI_KEYDEF *keyinfo,
uint nod_flag;
uchar lastkey[HA_MAX_KEY_BUFF];
DBUG_ENTER("_mi_search_next");
- DBUG_PRINT("enter",("nextflag: %u lastpos: %lu int_keypos: %lu",
- nextflag, (ulong) info->lastpos,
- (ulong) info->int_keypos));
+ DBUG_PRINT("enter",("nextflag: %u lastpos: %llu int_keypos: %p",
+ nextflag, info->lastpos,
+ info->int_keypos));
DBUG_EXECUTE("key",_mi_print_key(DBUG_FILE,keyinfo->seg,key,key_length););
/* Force full read if we are at last key or if we are not on a leaf
@@ -1697,8 +1697,8 @@ _mi_calc_var_pack_key_length(MI_KEYDEF *keyinfo,uint nod_flag,uchar *next_key,
ref_length=0;
next_length_pack=0;
}
- DBUG_PRINT("test",("length: %d next_key: 0x%lx", length,
- (long) next_key));
+ DBUG_PRINT("test",("length: %d next_key: %p", length,
+ next_key));
{
uint tmp_length;
diff --git a/storage/myisam/mi_unique.c b/storage/myisam/mi_unique.c
index 1f3a35b1965..89f32368b5f 100644
--- a/storage/myisam/mi_unique.c
+++ b/storage/myisam/mi_unique.c
@@ -112,7 +112,7 @@ ha_checksum mi_unique_hash(MI_UNIQUEDEF *def, const uchar *record)
else if (keyseg->flag & HA_BLOB_PART)
{
uint tmp_length=_mi_calc_blob_length(keyseg->bit_start,pos);
- memcpy(&pos, pos+keyseg->bit_start, sizeof(char*));
+ memcpy((char**) &pos, pos+keyseg->bit_start, sizeof(char*));
if (!length || length > tmp_length)
length=tmp_length; /* The whole blob */
}
@@ -207,14 +207,14 @@ int mi_unique_comp(MI_UNIQUEDEF *def, const uchar *a, const uchar *b,
set_if_smaller(a_length, keyseg->length);
set_if_smaller(b_length, keyseg->length);
}
- memcpy(&pos_a, pos_a+keyseg->bit_start, sizeof(char*));
- memcpy(&pos_b, pos_b+keyseg->bit_start, sizeof(char*));
+ memcpy((char**) &pos_a, pos_a+keyseg->bit_start, sizeof(char*));
+ memcpy((char**) &pos_b, pos_b+keyseg->bit_start, sizeof(char*));
}
if (type == HA_KEYTYPE_TEXT || type == HA_KEYTYPE_VARTEXT1 ||
type == HA_KEYTYPE_VARTEXT2)
{
if (ha_compare_text(keyseg->charset, (uchar *) pos_a, a_length,
- (uchar *) pos_b, b_length, 0, 1))
+ (uchar *) pos_b, b_length, 0))
return 1;
}
else
diff --git a/storage/myisam/mi_write.c b/storage/myisam/mi_write.c
index 5ae09b26760..1745f8baaab 100644
--- a/storage/myisam/mi_write.c
+++ b/storage/myisam/mi_write.c
@@ -471,7 +471,7 @@ int _mi_insert(register MI_INFO *info, register MI_KEYDEF *keyinfo,
uchar *endpos, *prev_key;
MI_KEY_PARAM s_temp;
DBUG_ENTER("_mi_insert");
- DBUG_PRINT("enter",("key_pos: 0x%lx", (long) key_pos));
+ DBUG_PRINT("enter",("key_pos: %p", key_pos));
DBUG_EXECUTE("key",_mi_print_key(DBUG_FILE,keyinfo->seg,key,USE_WHOLE_KEY););
nod_flag=mi_test_if_nod(anc_buff);
@@ -492,8 +492,8 @@ int _mi_insert(register MI_INFO *info, register MI_KEYDEF *keyinfo,
{
DBUG_PRINT("test",("t_length: %d ref_len: %d",
t_length,s_temp.ref_length));
- DBUG_PRINT("test",("n_ref_len: %d n_length: %d key_pos: 0x%lx",
- s_temp.n_ref_length,s_temp.n_length, (long) s_temp.key));
+ DBUG_PRINT("test",("n_ref_len: %d n_length: %d key_pos: %p",
+ s_temp.n_ref_length,s_temp.n_length, s_temp.key));
}
#endif
if (t_length > 0)
@@ -543,7 +543,7 @@ int _mi_insert(register MI_INFO *info, register MI_KEYDEF *keyinfo,
get_key_length(alen,a);
DBUG_ASSERT(info->ft1_to_ft2==0);
if (alen == blen &&
- ha_compare_text(keyinfo->seg->charset, a, alen, b, blen, 0, 0)==0)
+ ha_compare_text(keyinfo->seg->charset, a, alen, b, blen, 0)==0)
{
/* yup. converting */
info->ft1_to_ft2=(DYNAMIC_ARRAY *)
@@ -693,8 +693,8 @@ uchar *_mi_find_half_pos(uint nod_flag, MI_KEYDEF *keyinfo, uchar *page,
} while (page < end);
*return_key_length=length;
*after_key=page;
- DBUG_PRINT("exit",("returns: 0x%lx page: 0x%lx half: 0x%lx",
- (long) lastpos, (long) page, (long) end));
+ DBUG_PRINT("exit",("returns: %p page: %p half: %p",
+ lastpos, page, end));
DBUG_RETURN(lastpos);
} /* _mi_find_half_pos */
@@ -750,8 +750,8 @@ static uchar *_mi_find_last_pos(MI_KEYDEF *keyinfo, uchar *page,
*return_key_length=last_length;
*after_key=lastpos;
- DBUG_PRINT("exit",("returns: 0x%lx page: 0x%lx end: 0x%lx",
- (long) prevpos,(long) page,(long) end));
+ DBUG_PRINT("exit",("returns: %p page: %p end: %p",
+ prevpos, page, end));
DBUG_RETURN(prevpos);
} /* _mi_find_last_pos */
diff --git a/storage/myisam/myisamchk.c b/storage/myisam/myisamchk.c
index 136ddbc3117..76c9d0f7828 100644
--- a/storage/myisam/myisamchk.c
+++ b/storage/myisam/myisamchk.c
@@ -28,7 +28,7 @@
static uint decode_bits;
static char **default_argv;
static const char *load_default_groups[]= { "myisamchk", 0 };
-static const char *set_collation_name, *opt_tmpdir;
+static char *set_collation_name, *opt_tmpdir;
static CHARSET_INFO *set_collation;
static long opt_myisam_block_size;
static long opt_key_cache_block_size;
@@ -94,11 +94,10 @@ int main(int argc, char **argv)
(void) fflush(stderr);
if ((check_param.error_printed | check_param.warning_printed) &&
(check_param.testflag & T_FORCE_CREATE) &&
- (!(check_param.testflag & (T_REP | T_REP_BY_SORT | T_SORT_RECORDS |
- T_SORT_INDEX))))
+ (!(check_param.testflag & (T_REP_ANY | T_SORT_RECORDS | T_SORT_INDEX))))
{
ulonglong old_testflag=check_param.testflag;
- if (!(check_param.testflag & T_REP))
+ if (!(check_param.testflag & T_REP_ANY))
check_param.testflag|= T_REP_BY_SORT;
check_param.testflag&= ~T_EXTEND; /* Don't needed */
error|=myisamchk(&check_param, argv[-1]);
@@ -818,20 +817,22 @@ static int myisamchk(HA_CHECK *param, char * filename)
char llbuff[22],llbuff2[22];
my_bool state_updated=0;
MYISAM_SHARE *share;
+ int open_mode;
+ uint open_flags= HA_OPEN_FOR_REPAIR;
DBUG_ENTER("myisamchk");
param->out_flag=error=param->warning_printed=param->error_printed=
recreate=0;
datafile=0;
param->isam_file_name=filename; /* For error messages */
- if (!(info=mi_open(filename,
- (param->testflag & (T_DESCRIPT | T_READONLY)) ?
- O_RDONLY : O_RDWR,
- HA_OPEN_FOR_REPAIR |
- ((param->testflag & T_WAIT_FOREVER) ?
- HA_OPEN_WAIT_IF_LOCKED :
- (param->testflag & T_DESCRIPT) ?
- HA_OPEN_IGNORE_IF_LOCKED : HA_OPEN_ABORT_IF_LOCKED))))
+ open_mode= param->testflag & (T_DESCRIPT | T_READONLY) ? O_RDONLY : O_RDWR;
+ if (param->testflag & T_WAIT_FOREVER)
+ open_flags|= HA_OPEN_WAIT_IF_LOCKED;
+ else if (param->testflag & T_DESCRIPT)
+ open_flags|= HA_OPEN_IGNORE_IF_LOCKED | HA_OPEN_FROM_SQL_LAYER;
+ else
+ open_flags|= HA_OPEN_ABORT_IF_LOCKED;
+ if (!(info=mi_open(filename, open_mode, open_flags)))
{
/* Avoid twice printing of isam file name */
param->error_printed=1;
@@ -1065,7 +1066,7 @@ static int myisamchk(HA_CHECK *param, char * filename)
error=mi_sort_records(param,info,filename,param->opt_sort_key,
/* what is the following parameter for ? */
- (my_bool) !(param->testflag & T_REP),
+ (my_bool) !(param->testflag & T_REP_ANY),
update_index);
datafile=info->dfile; /* This is now locked */
if (!error && !update_index)
@@ -1113,7 +1114,7 @@ static int myisamchk(HA_CHECK *param, char * filename)
{
if (param->testflag & (T_EXTEND | T_MEDIUM))
(void) init_key_cache(dflt_key_cache,opt_key_cache_block_size,
- param->use_buffers, 0, 0, 0, 0);
+ (size_t)param->use_buffers, 0, 0, 0, 0);
(void) init_io_cache(&param->read_cache,datafile,
(uint) param->read_buffer_length,
READ_CACHE,
diff --git a/storage/myisam/myisamdef.h b/storage/myisam/myisamdef.h
index 8123c38fb8a..67024ea36aa 100644
--- a/storage/myisam/myisamdef.h
+++ b/storage/myisam/myisamdef.h
@@ -1,5 +1,6 @@
/*
Copyright (c) 2000, 2012, Oracle and/or its affiliates.
+ Copyright (c) 2017, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -92,16 +93,16 @@ typedef struct st_mi_state_info
} MI_STATE_INFO;
#define MI_STATE_INFO_SIZE (24+14*8+7*4+2*2+8)
-#define MI_STATE_KEY_SIZE 8
-#define MI_STATE_KEYBLOCK_SIZE 8
-#define MI_STATE_KEYSEG_SIZE 4
+#define MI_STATE_KEY_SIZE 8U
+#define MI_STATE_KEYBLOCK_SIZE 8U
+#define MI_STATE_KEYSEG_SIZE 4U
#define MI_STATE_EXTRA_SIZE ((MI_MAX_KEY+MI_MAX_KEY_BLOCK_SIZE)*MI_STATE_KEY_SIZE + MI_MAX_KEY*HA_MAX_KEY_SEG*MI_STATE_KEYSEG_SIZE)
#define MI_KEYDEF_SIZE (2+ 5*2)
#define MI_UNIQUEDEF_SIZE (2+1+1)
#define HA_KEYSEG_SIZE (6+ 2*2 + 4*2)
#define MI_COLUMNDEF_SIZE (2*3+1)
#define MI_BASE_INFO_SIZE (5*8 + 8*4 + 4 + 4*2 + 16)
-#define MI_INDEX_BLOCK_MARGIN 16 /* Safety margin for .MYI tables */
+#define MI_INDEX_BLOCK_MARGIN 16U /* Safety margin for .MYI tables */
typedef struct st_mi_base_info
{
@@ -199,6 +200,7 @@ typedef struct st_mi_isam_share
ulong max_pack_length;
ulong state_diff_length;
uint rec_reflength; /* rec_reflength in use now */
+ ulong vreclength; /* full reclength, including vcols */
uint unique_name_length;
uint32 ftkeys; /* Number of full-text keys + 1 */
File kfile; /* Shared keyfile */
@@ -222,7 +224,7 @@ typedef struct st_mi_isam_share
THR_LOCK lock;
mysql_mutex_t intern_lock; /* Locking for use with _locking */
mysql_rwlock_t *key_root_lock;
- my_off_t mmaped_length;
+ size_t mmaped_length;
uint nonmmaped_inserts; /* counter of writing in non-mmaped
area */
mysql_rwlock_t mmap_lock;
@@ -310,27 +312,27 @@ struct st_myisam_info
#define USE_WHOLE_KEY (HA_MAX_KEY_BUFF*2) /* Use whole key in _mi_search() */
#define F_EXTRA_LCK -1
/* bits in opt_flag */
-#define MEMMAP_USED 32
-#define REMEMBER_OLD_POS 64
+#define MEMMAP_USED 32U
+#define REMEMBER_OLD_POS 64U
-#define WRITEINFO_UPDATE_KEYFILE 1
-#define WRITEINFO_NO_UNLOCK 2
+#define WRITEINFO_UPDATE_KEYFILE 1U
+#define WRITEINFO_NO_UNLOCK 2U
/* once_flags */
-#define USE_PACKED_KEYS 1
-#define RRND_PRESERVE_LASTINX 2
+#define USE_PACKED_KEYS 1U
+#define RRND_PRESERVE_LASTINX 2U
/* bits in state.changed */
-#define STATE_CHANGED 1
-#define STATE_CRASHED 2
-#define STATE_CRASHED_ON_REPAIR 4
-#define STATE_NOT_ANALYZED 8
-#define STATE_NOT_OPTIMIZED_KEYS 16
-#define STATE_NOT_SORTED_PAGES 32
+#define STATE_CHANGED 1U
+#define STATE_CRASHED 2U
+#define STATE_CRASHED_ON_REPAIR 4U
+#define STATE_NOT_ANALYZED 8U
+#define STATE_NOT_OPTIMIZED_KEYS 16U
+#define STATE_NOT_SORTED_PAGES 32U
/* options to mi_read_cache */
-#define READING_NEXT 1
-#define READING_HEADER 2
+#define READING_NEXT 1U
+#define READING_HEADER 2U
#define mi_getint(x) ((uint) mi_uint2korr(x) & 32767)
#define mi_putint(x,y,nod) { uint16 boh=(nod ? (uint16) 32768 : 0) + (uint16) (y);\
@@ -383,17 +385,17 @@ struct st_myisam_info
#define MI_MAX_DYN_BLOCK_HEADER 20 /* Max prefix of record-block */
#define MI_BLOCK_INFO_HEADER_LENGTH 20
#define MI_DYN_DELETE_BLOCK_HEADER 20 /* length of delete-block-header */
-#define MI_DYN_MAX_BLOCK_LENGTH ((1L << 24)-4L)
+#define MI_DYN_MAX_BLOCK_LENGTH ((1UL << 24)-4UL)
#define MI_DYN_MAX_ROW_LENGTH (MI_DYN_MAX_BLOCK_LENGTH - MI_SPLIT_LENGTH)
-#define MI_DYN_ALIGN_SIZE 4 /* Align blocks on this */
+#define MI_DYN_ALIGN_SIZE 4U /* Align blocks on this */
#define MI_MAX_DYN_HEADER_BYTE 13 /* max header byte for dynamic rows */
-#define MI_MAX_BLOCK_LENGTH ((((ulong) 1 << 24)-1) & (~ (ulong) (MI_DYN_ALIGN_SIZE-1)))
+#define MI_MAX_BLOCK_LENGTH (((1U << 24)-1) & (~(MI_DYN_ALIGN_SIZE-1)))
#define MI_REC_BUFF_OFFSET ALIGN_SIZE(MI_DYN_DELETE_BLOCK_HEADER+sizeof(uint32))
-#define PACK_TYPE_SELECTED 1 /* Bits in field->pack_type */
-#define PACK_TYPE_SPACE_FIELDS 2
-#define PACK_TYPE_ZERO_FILL 4
+#define PACK_TYPE_SELECTED 1U /* Bits in field->pack_type */
+#define PACK_TYPE_SPACE_FIELDS 2U
+#define PACK_TYPE_ZERO_FILL 4U
#define MI_FOUND_WRONG_KEY 0x7FFFFFFF /* Impossible value from ha_key_cmp */
#define MI_MAX_KEY_BLOCK_SIZE (MI_MAX_KEY_BLOCK_LENGTH/MI_MIN_KEY_BLOCK_LENGTH)
@@ -401,7 +403,7 @@ struct st_myisam_info
#define MI_MAX_KEYPTR_SIZE 5 /* For calculating block lengths */
#define MI_MIN_KEYBLOCK_LENGTH 50 /* When to split delete blocks */
-#define MI_MIN_SIZE_BULK_INSERT_TREE 16384 /* this is per key */
+#define MI_MIN_SIZE_BULK_INSERT_TREE 16384U /* this is per key */
#define MI_MIN_ROWS_TO_USE_BULK_INSERT 100
#define MI_MIN_ROWS_TO_DISABLE_INDEXES 100
#define MI_MIN_ROWS_TO_USE_WRITE_CACHE 10
@@ -607,12 +609,12 @@ typedef struct st_mi_block_info /* Parameter to _mi_get_block_info */
/* bits in return from _mi_get_block_info */
-#define BLOCK_FIRST 1
-#define BLOCK_LAST 2
-#define BLOCK_DELETED 4
-#define BLOCK_ERROR 8 /* Wrong data */
-#define BLOCK_SYNC_ERROR 16 /* Right data at wrong place */
-#define BLOCK_FATAL_ERROR 32 /* hardware-error */
+#define BLOCK_FIRST 1U
+#define BLOCK_LAST 2U
+#define BLOCK_DELETED 4U
+#define BLOCK_ERROR 8U /* Wrong data */
+#define BLOCK_SYNC_ERROR 16U /* Right data at wrong place */
+#define BLOCK_FATAL_ERROR 32U /* hardware-error */
#define NEED_MEM ((uint) 10*4*(IO_SIZE+32)+32) /* Nead for recursion */
#define MAXERR 20
@@ -621,17 +623,17 @@ typedef struct st_mi_block_info /* Parameter to _mi_get_block_info */
#define INDEX_TMP_EXT ".TMM"
#define DATA_TMP_EXT ".TMD"
-#define UPDATE_TIME 1
-#define UPDATE_STAT 2
-#define UPDATE_SORT 4
-#define UPDATE_AUTO_INC 8
-#define UPDATE_OPEN_COUNT 16
+#define UPDATE_TIME 1U
+#define UPDATE_STAT 2U
+#define UPDATE_SORT 4U
+#define UPDATE_AUTO_INC 8U
+#define UPDATE_OPEN_COUNT 16U
/* We use MY_ALIGN_DOWN here mainly to ensure that we get stable values for mysqld --help ) */
#define KEY_BUFFER_INIT MY_ALIGN_DOWN(1024L*1024L-MALLOC_OVERHEAD, IO_SIZE)
#define READ_BUFFER_INIT MY_ALIGN_DOWN(1024L*256L-MALLOC_OVERHEAD, 1024)
#define SORT_BUFFER_INIT MY_ALIGN_DOWN(1024L*1024L*128L-MALLOC_OVERHEAD, 1024)
-#define MIN_SORT_BUFFER 4096
+#define MIN_SORT_BUFFER 4096U
enum myisam_log_commands
{
diff --git a/storage/myisam/mysql-test/storage_engine/alter_tablespace.rdiff b/storage/myisam/mysql-test/storage_engine/alter_tablespace.rdiff
index 4215af58011..a8c78b117a9 100644
--- a/storage/myisam/mysql-test/storage_engine/alter_tablespace.rdiff
+++ b/storage/myisam/mysql-test/storage_engine/alter_tablespace.rdiff
@@ -13,7 +13,7 @@
-2
-ALTER TABLE t1 DISCARD TABLESPACE;
-SELECT a FROM t1;
--ERROR HY000: Tablespace has been discarded for table 't1'
+-ERROR HY000: Tablespace has been discarded for table `t1`
-ALTER TABLE t1 IMPORT TABLESPACE;
-Warnings:
-Warning 1810 IO Read error: (2, No such file or directory) Error opening './test/t1.cfg', will attempt to import without schema verification
diff --git a/storage/myisam/rt_split.c b/storage/myisam/rt_split.c
index 91465c5a231..1adcdb8e5b0 100644
--- a/storage/myisam/rt_split.c
+++ b/storage/myisam/rt_split.c
@@ -69,8 +69,8 @@ static double mbr_join_square(const double *a, const double *b, int n_dim)
b += 2;
}while (a != end);
- /* Check for infinity or NaN */
- if (my_isinf(square) || isnan(square))
+ /* Check if not finite (i.e. infinity or NaN) */
+ if (!isfinite(square))
square = DBL_MAX;
return square;
diff --git a/storage/myisam/sort.c b/storage/myisam/sort.c
index 33f3f5ae3f4..13b8f17ea0f 100644
--- a/storage/myisam/sort.c
+++ b/storage/myisam/sort.c
@@ -191,8 +191,9 @@ int _create_index_by_sort(MI_SORT_PARAM *info,my_bool no_messages,
while ((maxbuffer= (uint) (records/(keys-1)+1)) != maxbuffer_org);
}
- if ((sort_keys=(uchar **)my_malloc(keys*(sort_length+sizeof(char*))+
- HA_FT_MAXBYTELEN, MYF(0))))
+ if ((sort_keys= ((uchar **)
+ my_malloc((size_t) (keys*(sort_length+sizeof(char*))+
+ HA_FT_MAXBYTELEN), MYF(0)))))
{
if (my_init_dynamic_array(&buffpek, sizeof(BUFFPEK), maxbuffer,
MY_MIN(maxbuffer/2, 1000), MYF(0)))
@@ -407,9 +408,9 @@ static my_bool thr_find_all_keys_exec(MI_SORT_PARAM *sort_param)
}
while ((maxbuffer= (uint) (idx/(keys-1)+1)) != maxbuffer_org);
}
- if ((sort_keys= (uchar**) my_malloc(keys * (sort_length + sizeof(char*)) +
+ if ((sort_keys= (uchar**) my_malloc((size_t)(keys * (sort_length + sizeof(char*)) +
((sort_param->keyinfo->flag & HA_FULLTEXT) ?
- HA_FT_MAXBYTELEN : 0), MYF(0))))
+ HA_FT_MAXBYTELEN : 0)), MYF(0))))
{
if (my_init_dynamic_array(&sort_param->buffpek, sizeof(BUFFPEK),
maxbuffer, MY_MIN(maxbuffer / 2, 1000), MYF(0)))
@@ -608,7 +609,7 @@ int thr_write_keys(MI_SORT_PARAM *sort_param)
length=param->sort_buffer_length;
while (length >= MIN_SORT_BUFFER)
{
- if ((mergebuf= my_malloc(length, MYF(0))))
+ if ((mergebuf= my_malloc((size_t) length, MYF(0))))
break;
length=length*3/4;
}
@@ -700,8 +701,8 @@ static int write_keys(MI_SORT_PARAM *info, register uchar **sort_keys,
if (!buffpek)
DBUG_RETURN(1); /* Out of memory */
- my_qsort2((uchar*) sort_keys,count,sizeof(uchar*),(qsort2_cmp) info->key_cmp,
- info);
+ my_qsort2((uchar*) sort_keys,(size_t) count, sizeof(uchar*),
+ (qsort2_cmp) info->key_cmp, info);
if (!my_b_inited(tempfile) &&
open_cached_file(tempfile, my_tmpdir(info->tmpdir), "ST",
DISK_BUFFER_SIZE, info->sort_info->param->myf_rw))
@@ -746,8 +747,8 @@ static int write_keys_varlen(MI_SORT_PARAM *info,
if (!buffpek)
DBUG_RETURN(1); /* Out of memory */
- my_qsort2((uchar*) sort_keys,count,sizeof(uchar*),(qsort2_cmp) info->key_cmp,
- info);
+ my_qsort2((uchar*) sort_keys, (size_t) count, sizeof(uchar*),
+ (qsort2_cmp) info->key_cmp, info);
if (!my_b_inited(tempfile) &&
open_cached_file(tempfile, my_tmpdir(info->tmpdir), "ST",
DISK_BUFFER_SIZE, info->sort_info->param->myf_rw))
@@ -843,8 +844,6 @@ cleanup:
{
DBUG_ASSERT(t_file2.type == WRITE_CACHE);
*t_file=t_file2; /* Copy result file */
- t_file->current_pos= &t_file->write_pos;
- t_file->current_end= &t_file->write_end;
}
DBUG_RETURN(*maxbuffer >= MERGEBUFF2); /* Return 1 if interrupted */
@@ -868,12 +867,14 @@ static my_off_t read_to_buffer(IO_CACHE *fromfile, BUFFPEK *buffpek,
uint sort_length)
{
register ha_keys count;
- my_off_t length;
+ size_t length;
- if ((count= (ha_keys) MY_MIN((ha_rows) buffpek->max_keys,buffpek->count)))
+ if ((count= (ha_keys) MY_MIN((ha_rows) buffpek->max_keys,
+ (ha_rows) buffpek->count)))
{
if (my_b_pread(fromfile, (uchar*) buffpek->base,
- (length= sort_length * count), buffpek->file_pos))
+ (length= (size_t) (sort_length * count)),
+ buffpek->file_pos))
return(HA_OFFSET_ERROR);
buffpek->key=buffpek->base;
buffpek->file_pos+= length; /* New filepos */
@@ -938,7 +939,7 @@ static int write_merge_key(MI_SORT_PARAM *info __attribute__((unused)),
IO_CACHE *to_file, uchar *key,
uint sort_length, ha_keys count)
{
- return my_b_write(to_file, key, ((size_t) sort_length) * count);
+ return my_b_write(to_file, key, (size_t) (sort_length * count));
}
/*
@@ -998,7 +999,7 @@ merge_buffers(MI_SORT_PARAM *info, ha_keys keys, IO_CACHE *from_file,
if (to_file)
{
if (info->write_key(info,to_file,(uchar*) buffpek->key,
- (uint) sort_length,1))
+ sort_length, 1))
{
error=1; goto err; /* purecov: inspected */
}
@@ -1021,7 +1022,7 @@ merge_buffers(MI_SORT_PARAM *info, ha_keys keys, IO_CACHE *from_file,
if (!(read_length= info->read_to_buffer(from_file,buffpek,sort_length)))
{
uchar *base= buffpek->base;
- uint max_keys=buffpek->max_keys;
+ ha_keys max_keys=buffpek->max_keys;
queue_remove_top(&queue);
diff --git a/storage/myisam/sp_key.c b/storage/myisam/sp_key.c
index c3aeb7553f2..4c6ef75934e 100644
--- a/storage/myisam/sp_key.c
+++ b/storage/myisam/sp_key.c
@@ -66,7 +66,6 @@ uint sp_make_key(register MI_INFO *info, uint keynr, uchar *key,
DBUG_ASSERT(keyseg->type == HA_KEYTYPE_DOUBLE);
val= mbr[start / sizeof (double)];
-#ifdef HAVE_ISNAN
if (isnan(val))
{
bzero(key, length);
@@ -74,7 +73,6 @@ uint sp_make_key(register MI_INFO *info, uint keynr, uchar *key,
len+= length;
continue;
}
-#endif
if (keyseg->flag & HA_SWAP_KEY)
{
diff --git a/storage/myisammrg/ha_myisammrg.cc b/storage/myisammrg/ha_myisammrg.cc
index 4cc4aca3f90..d88af381df6 100644
--- a/storage/myisammrg/ha_myisammrg.cc
+++ b/storage/myisammrg/ha_myisammrg.cc
@@ -101,7 +101,7 @@
#include "../myisam/ha_myisam.h"
#include "ha_myisammrg.h"
#include "myrg_def.h"
-#include "thr_malloc.h" // int_sql_alloc
+#include "thr_malloc.h" // init_sql_alloc
#include "sql_class.h" // THD
#include "debug_sync.h"
@@ -359,7 +359,7 @@ int ha_myisammrg::open(const char *name, int mode __attribute__((unused)),
uint test_if_locked_arg)
{
DBUG_ENTER("ha_myisammrg::open");
- DBUG_PRINT("myrg", ("name: '%s' table: 0x%lx", name, (long) table));
+ DBUG_PRINT("myrg", ("name: '%s' table: %p", name, table));
DBUG_PRINT("myrg", ("test_if_locked_arg: %u", test_if_locked_arg));
/* Must not be used when table is open. */
@@ -413,8 +413,8 @@ int ha_myisammrg::open(const char *name, int mode __attribute__((unused)),
DBUG_RETURN(my_errno ? my_errno : -1);
/* purecov: end */
}
- DBUG_PRINT("myrg", ("MYRG_INFO: 0x%lx child tables: %u",
- (long) file, file->tables));
+ DBUG_PRINT("myrg", ("MYRG_INFO: %p child tables: %u",
+ file, file->tables));
DBUG_RETURN(0);
}
@@ -440,8 +440,8 @@ int ha_myisammrg::add_children_list(void)
List_iterator_fast<Mrg_child_def> it(child_def_list);
Mrg_child_def *mrg_child_def;
DBUG_ENTER("ha_myisammrg::add_children_list");
- DBUG_PRINT("myrg", ("table: '%s'.'%s' 0x%lx", this->table->s->db.str,
- this->table->s->table_name.str, (long) this->table));
+ DBUG_PRINT("myrg", ("table: '%s'.'%s' %p", this->table->s->db.str,
+ this->table->s->table_name.str, this->table));
/* Must call this with open table. */
DBUG_ASSERT(this->file);
@@ -699,12 +699,12 @@ extern "C" MI_INFO *myisammrg_attach_children_callback(void *callback_param)
if ((child->file->ht->db_type != DB_TYPE_MYISAM) ||
!(myisam= ((ha_myisam*) child->file)->file_ptr()))
{
- DBUG_PRINT("error", ("no MyISAM handle for child table: '%s'.'%s' 0x%lx",
+ DBUG_PRINT("error", ("no MyISAM handle for child table: '%s'.'%s' %p",
child->s->db.str, child->s->table_name.str,
- (long) child));
+ child));
}
- DBUG_PRINT("myrg", ("MyISAM handle: 0x%lx", (long) myisam));
+ DBUG_PRINT("myrg", ("MyISAM handle: %p", myisam));
end:
@@ -810,8 +810,8 @@ int ha_myisammrg::attach_children(void)
int error;
Mrg_attach_children_callback_param param(parent_l, this->children_l, child_def_list);
DBUG_ENTER("ha_myisammrg::attach_children");
- DBUG_PRINT("myrg", ("table: '%s'.'%s' 0x%lx", table->s->db.str,
- table->s->table_name.str, (long) table));
+ DBUG_PRINT("myrg", ("table: '%s'.'%s' %p", table->s->db.str,
+ table->s->table_name.str, table));
DBUG_PRINT("myrg", ("test_if_locked: %u", this->test_if_locked));
/* Must call this with open table. */
@@ -1679,7 +1679,7 @@ uint ha_myisammrg::count_query_cache_dependant_tables(uint8 *tables_type)
(*tables_type)|= HA_CACHE_TBL_NONTRANSACT;
but it has no effect because HA_CACHE_TBL_NONTRANSACT is 0
*/
- return (file->end_table - file->open_tables);
+ return (uint)(file->end_table - file->open_tables);
}
diff --git a/storage/myisammrg/myrg_open.c b/storage/myisammrg/myrg_open.c
index 46a801802a1..bf91213dbb0 100644
--- a/storage/myisammrg/myrg_open.c
+++ b/storage/myisammrg/myrg_open.c
@@ -1,5 +1,6 @@
/*
Copyright (c) 2000, 2011, Oracle and/or its affiliates
+ Copyright (c) 2010, 2020, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -38,7 +39,7 @@ MYRG_INFO *myrg_open(const char *name, int mode, int handle_locking)
int save_errno,errpos=0;
uint files= 0, i, dir_length, length, UNINIT_VAR(key_parts), min_keys= 0;
ulonglong file_offset=0;
- char name_buff[FN_REFLEN*2],buff[FN_REFLEN],*end;
+ char name_buff[FN_REFLEN*2],buff[FN_REFLEN];
MYRG_INFO *m_info=0;
File fd;
IO_CACHE file;
@@ -62,8 +63,9 @@ MYRG_INFO *myrg_open(const char *name, int mode, int handle_locking)
dir_length=dirname_part(name_buff, name, &name_buff_length);
while ((length=my_b_gets(&file,buff,FN_REFLEN-1)))
{
- if ((end=buff+length)[-1] == '\n')
- end[-1]='\0';
+ char *end= &buff[length - 1];
+ if (*end == '\n')
+ *end= '\0';
if (buff[0] && buff[0] != '#')
files++;
}
@@ -71,8 +73,9 @@ MYRG_INFO *myrg_open(const char *name, int mode, int handle_locking)
my_b_seek(&file, 0);
while ((length=my_b_gets(&file,buff,FN_REFLEN-1)))
{
- if ((end=buff+length)[-1] == '\n')
- *--end='\0';
+ char *end= &buff[length - 1];
+ if (*end == '\n')
+ *end= '\0';
if (!buff[0])
continue; /* Skip empty lines */
if (buff[0] == '#')
diff --git a/storage/myisammrg/mysql-test/storage_engine/alter_tablespace.rdiff b/storage/myisammrg/mysql-test/storage_engine/alter_tablespace.rdiff
index 19ca1a1b6e1..e5462f8cb1f 100644
--- a/storage/myisammrg/mysql-test/storage_engine/alter_tablespace.rdiff
+++ b/storage/myisammrg/mysql-test/storage_engine/alter_tablespace.rdiff
@@ -13,7 +13,7 @@
-2
-ALTER TABLE t1 DISCARD TABLESPACE;
-SELECT a FROM t1;
--ERROR HY000: Tablespace has been discarded for table 't1'
+-ERROR HY000: Tablespace has been discarded for table `t1`
-ALTER TABLE t1 IMPORT TABLESPACE;
-Warnings:
-Warning 1810 IO Read error: (2, No such file or directory) Error opening './test/t1.cfg', will attempt to import without schema verification
diff --git a/storage/myisammrg/mysql-test/storage_engine/create_table.rdiff b/storage/myisammrg/mysql-test/storage_engine/create_table.rdiff
index 585e5c915ba..5f5c2528a95 100644
--- a/storage/myisammrg/mysql-test/storage_engine/create_table.rdiff
+++ b/storage/myisammrg/mysql-test/storage_engine/create_table.rdiff
@@ -29,7 +29,7 @@
-SHOW CREATE TABLE t1;
-Table Create Table
-t1 CREATE TABLE `t1` (
-- `1` bigint(20) NOT NULL DEFAULT '0'
+- `1` bigint(20) NOT NULL DEFAULT 0
-) ENGINE=<STORAGE_ENGINE> DEFAULT CHARSET=latin1
-SELECT * FROM t1;
-1
diff --git a/storage/myisammrg/mysql-test/storage_engine/disabled.def b/storage/myisammrg/mysql-test/storage_engine/disabled.def
index ca25a5d331b..55fc952c20e 100644
--- a/storage/myisammrg/mysql-test/storage_engine/disabled.def
+++ b/storage/myisammrg/mysql-test/storage_engine/disabled.def
@@ -1,2 +1,4 @@
+insert_delayed : MDEV-12880 - INSERT DELAYED is not detected as inapplicable to a table under lock
+lock_concurrent : MDEV-12882 - Assertion failure
+select_high_prio : MDEV-12885 - MDL_SHARED_READ_ONLY is taken instead of MDL_SHARED_READ
lock : MDEV-17145 (Unexpected ER_LOCK_WAIT_TIMEOUT)
-select_high_prio : MDEV-17145 (Unexpected ER_LOCK_WAIT_TIMEOUT)
diff --git a/storage/myisammrg/mysql-test/storage_engine/parts/repair_table.rdiff b/storage/myisammrg/mysql-test/storage_engine/parts/repair_table.rdiff
index d7bf99fd674..35d4f6b63d6 100644
--- a/storage/myisammrg/mysql-test/storage_engine/parts/repair_table.rdiff
+++ b/storage/myisammrg/mysql-test/storage_engine/parts/repair_table.rdiff
@@ -1,6 +1,6 @@
---- repair_table.result 2013-01-23 01:35:44.388267080 +0400
-+++ repair_table.reject 2013-01-23 03:16:26.468307847 +0400
-@@ -1,234 +1,114 @@
+--- suite/storage_engine/parts/repair_table.result 2017-08-28 19:29:20.491633306 +0300
++++ suite/storage_engine/parts/repair_table.reject 2017-08-28 19:34:41.723633059 +0300
+@@ -1,235 +1,115 @@
call mtr.add_suppression("Table '.*t1.*' is marked as crashed and should be repaired");
DROP TABLE IF EXISTS t1, t2;
CREATE TABLE t1 (a <INT_COLUMN>, b <CHAR_COLUMN>) ENGINE=<STORAGE_ENGINE> <CUSTOM_TABLE_OPTIONS> PARTITION BY HASH(a) PARTITIONS 2;
@@ -144,6 +144,7 @@
call mtr.add_suppression("MySQL thread id .*, query id .* localhost.*root Checking table");
call mtr.add_suppression(" '\..test.t1'");
call mtr.add_suppression("Couldn't repair table: test.t1");
+ call mtr.add_suppression("Table 't1' is marked as crashed.*");
CREATE TABLE t1 (a <INT_COLUMN>, b <CHAR_COLUMN>, <CUSTOM_INDEX> (a)) ENGINE=<STORAGE_ENGINE> <CUSTOM_TABLE_OPTIONS> PARTITION BY HASH(a) PARTITIONS 2;
+ERROR HY000: Engine cannot be used in partitioned tables
+# ERROR: Statement ended with errno 1572, errname ER_PARTITION_MERGE_ERROR (expected to succeed)
diff --git a/storage/myisammrg/mysql-test/storage_engine/parts/truncate_table.rdiff b/storage/myisammrg/mysql-test/storage_engine/parts/truncate_table.rdiff
index 01bf3702a3f..9ba985f7adc 100644
--- a/storage/myisammrg/mysql-test/storage_engine/parts/truncate_table.rdiff
+++ b/storage/myisammrg/mysql-test/storage_engine/parts/truncate_table.rdiff
@@ -27,8 +27,8 @@
- `c` char(8) DEFAULT NULL,
- PRIMARY KEY (`a`)
-) ENGINE=<STORAGE_ENGINE> DEFAULT CHARSET=latin1
--/*!50100 PARTITION BY HASH (a)
--PARTITIONS 2 */
+- PARTITION BY HASH (`a`)
+-PARTITIONS 2
-INSERT INTO t1 (c) VALUES ('a'),('b'),('c');
-SHOW CREATE TABLE t1;
-Table Create Table
@@ -37,8 +37,8 @@
- `c` char(8) DEFAULT NULL,
- PRIMARY KEY (`a`)
-) ENGINE=<STORAGE_ENGINE> AUTO_INCREMENT=4 DEFAULT CHARSET=latin1
--/*!50100 PARTITION BY HASH (a)
--PARTITIONS 2 */
+- PARTITION BY HASH (`a`)
+-PARTITIONS 2
-TRUNCATE TABLE t1;
-SHOW CREATE TABLE t1;
-Table Create Table
@@ -47,8 +47,8 @@
- `c` char(8) DEFAULT NULL,
- PRIMARY KEY (`a`)
-) ENGINE=<STORAGE_ENGINE> DEFAULT CHARSET=latin1
--/*!50100 PARTITION BY HASH (a)
--PARTITIONS 2 */
+- PARTITION BY HASH (`a`)
+-PARTITIONS 2
-INSERT INTO t1 (c) VALUES ('d');
-SHOW CREATE TABLE t1;
-Table Create Table
@@ -57,8 +57,8 @@
- `c` char(8) DEFAULT NULL,
- PRIMARY KEY (`a`)
-) ENGINE=<STORAGE_ENGINE> AUTO_INCREMENT=2 DEFAULT CHARSET=latin1
--/*!50100 PARTITION BY HASH (a)
--PARTITIONS 2 */
+- PARTITION BY HASH (`a`)
+-PARTITIONS 2
-SELECT a,c FROM t1;
-a c
-1 d
diff --git a/storage/myisammrg/mysql-test/storage_engine/repair_table.rdiff b/storage/myisammrg/mysql-test/storage_engine/repair_table.rdiff
index 9ff8f906511..79f6c7040e0 100644
--- a/storage/myisammrg/mysql-test/storage_engine/repair_table.rdiff
+++ b/storage/myisammrg/mysql-test/storage_engine/repair_table.rdiff
@@ -1,5 +1,5 @@
---- repair_table.result 2013-01-23 01:26:05.995538460 +0400
-+++ repair_table.reject 2013-01-23 02:50:55.035560564 +0400
+--- suite/storage_engine/repair_table.result 2017-05-24 01:09:07.274213486 +0300
++++ suite/storage_engine/repair_table.reject 2017-05-24 01:10:25.466214949 +0300
@@ -4,56 +4,50 @@
CREATE TABLE t2 (a <INT_COLUMN>, b <CHAR_COLUMN>) ENGINE=<STORAGE_ENGINE> <CUSTOM_TABLE_OPTIONS>;
REPAIR TABLE t1;
@@ -71,7 +71,7 @@
DROP TABLE t1, t2;
call mtr.add_suppression("Got an error from thread_id=.*");
call mtr.add_suppression("MySQL thread id .*, query id .* localhost.*root Checking table");
-@@ -62,45 +56,32 @@
+@@ -63,45 +57,32 @@
CREATE TABLE t1 (a <INT_COLUMN>, b <CHAR_COLUMN>, <CUSTOM_INDEX> (a)) ENGINE=<STORAGE_ENGINE> <CUSTOM_TABLE_OPTIONS>;
REPAIR TABLE t1;
Table Op Msg_type Msg_text
@@ -104,7 +104,7 @@
-test.t1 check error Corrupt
+test.t1 check status OK
SELECT a,b FROM t1;
--ERROR HY000: Incorrect key file for table 't1'; try to repair it
+-ERROR HY000: Index for table 't1' is corrupt; try to repair it
-# Statement ended with one of expected results (0,ER_NOT_KEYFILE,144).
-# If you got a difference in error message, just add it to rdiff file
-INSERT INTO t1 (a,b) VALUES (14,'n'),(15,'o');
diff --git a/storage/myisammrg/mysql-test/storage_engine/tbl_opt_data_index_dir.rdiff b/storage/myisammrg/mysql-test/storage_engine/tbl_opt_data_dir.rdiff
index e6055278b3c..671e26ec617 100644
--- a/storage/myisammrg/mysql-test/storage_engine/tbl_opt_data_index_dir.rdiff
+++ b/storage/myisammrg/mysql-test/storage_engine/tbl_opt_data_dir.rdiff
@@ -1,18 +1,18 @@
---- tbl_opt_data_index_dir.result 2013-01-22 22:05:05.246633000 +0400
-+++ tbl_opt_data_index_dir.reject 2013-01-23 02:50:59.951498762 +0400
-@@ -4,7 +4,7 @@
+--- suite/storage_engine/tbl_opt_data_dir.result 2017-05-24 00:21:15.550159778 +0300
++++ ../storage/myisammrg/mysql-test/storage_engine/tbl_opt_data_dir.reject 2017-05-24 00:25:45.506164827 +0300
+@@ -5,7 +5,7 @@
t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` char(8) DEFAULT NULL
--) ENGINE=<STORAGE_ENGINE> DEFAULT CHARSET=latin1 DATA DIRECTORY='<DATA_DIR>' INDEX DIRECTORY='<INDEX_DIR>'
+-) ENGINE=<STORAGE_ENGINE> DEFAULT CHARSET=latin1 DATA DIRECTORY='<DATA_DIR_1>'
+) ENGINE=<STORAGE_ENGINE> DEFAULT CHARSET=latin1 INSERT_METHOD=LAST UNION=(`mrg`.`t1`)
+ # For ALTER TABLE the option is ignored
+ # Running ALTER TABLE .. DATA DIRECTORY = <>
Warnings:
- Warning 1618 <INDEX DIRECTORY> option ignored
- SHOW CREATE TABLE t1;
-@@ -12,5 +12,5 @@
+@@ -15,5 +15,5 @@
t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` char(8) DEFAULT NULL
--) ENGINE=<STORAGE_ENGINE> DEFAULT CHARSET=latin1 DATA DIRECTORY='<DATA_DIR>' INDEX DIRECTORY='<INDEX_DIR>'
+-) ENGINE=<STORAGE_ENGINE> DEFAULT CHARSET=latin1 DATA DIRECTORY='<DATA_DIR_1>'
+) ENGINE=<STORAGE_ENGINE> DEFAULT CHARSET=latin1 INSERT_METHOD=LAST UNION=(`mrg`.`t1`)
DROP TABLE t1;
diff --git a/storage/myisammrg/mysql-test/storage_engine/tbl_opt_index_dir.rdiff b/storage/myisammrg/mysql-test/storage_engine/tbl_opt_index_dir.rdiff
new file mode 100644
index 00000000000..ca025861f68
--- /dev/null
+++ b/storage/myisammrg/mysql-test/storage_engine/tbl_opt_index_dir.rdiff
@@ -0,0 +1,18 @@
+--- suite/storage_engine/tbl_opt_index_dir.result 2017-05-24 00:21:15.550159778 +0300
++++ ../storage/myisammrg/mysql-test/storage_engine/tbl_opt_index_dir.reject 2017-05-24 00:25:45.506164827 +0300
+@@ -5,7 +5,7 @@
+ t1 CREATE TABLE `t1` (
+ `a` int(11) DEFAULT NULL,
+ `b` char(8) DEFAULT NULL
+-) ENGINE=<STORAGE_ENGINE> DEFAULT CHARSET=latin1 INDEX DIRECTORY='<INDEX_DIR_1>'
++) ENGINE=<STORAGE_ENGINE> DEFAULT CHARSET=latin1 INSERT_METHOD=LAST UNION=(`mrg`.`t1`)
+ # For ALTER TABLE the option is ignored
+ # Running ALTER TABLE .. INDEX DIRECTORY = <>
+ Warnings:
+@@ -15,5 +15,5 @@
+ t1 CREATE TABLE `t1` (
+ `a` int(11) DEFAULT NULL,
+ `b` char(8) DEFAULT NULL
+-) ENGINE=<STORAGE_ENGINE> DEFAULT CHARSET=latin1 INDEX DIRECTORY='<INDEX_DIR_1>'
++) ENGINE=<STORAGE_ENGINE> DEFAULT CHARSET=latin1 INSERT_METHOD=LAST UNION=(`mrg`.`t1`)
+ DROP TABLE t1;
diff --git a/storage/myisammrg/mysql-test/storage_engine/tbl_opt_row_format.rdiff b/storage/myisammrg/mysql-test/storage_engine/tbl_opt_row_format.rdiff
index f7e0905d4e7..6c756e7b8e1 100644
--- a/storage/myisammrg/mysql-test/storage_engine/tbl_opt_row_format.rdiff
+++ b/storage/myisammrg/mysql-test/storage_engine/tbl_opt_row_format.rdiff
@@ -1,17 +1,33 @@
---- tbl_opt_row_format.result 2013-01-22 22:05:05.246633000 +0400
-+++ tbl_opt_row_format.reject 2013-01-23 02:51:04.743438518 +0400
-@@ -5,12 +5,12 @@
+--- ../storage/myisammrg/mysql-test/storage_engine/tbl_opt_row_format.result~ 2017-05-24 00:50:44.254192857 +0300
++++ ../storage/myisammrg/mysql-test/storage_engine/tbl_opt_row_format.reject 2017-05-24 00:50:44.334192859 +0300
+@@ -5,26 +5,26 @@
+ t1 CREATE TABLE `t1` (
+ `a` int(11) DEFAULT NULL,
+ `b` char(8) DEFAULT NULL
+-) ENGINE=<STORAGE_ENGINE> DEFAULT CHARSET=latin1 ROW_FORMAT=DYNAMIC
++) ENGINE=<STORAGE_ENGINE> DEFAULT CHARSET=latin1 ROW_FORMAT=DYNAMIC INSERT_METHOD=LAST UNION=(`mrg`.`t1`)
+ ALTER TABLE t1 ROW_FORMAT=FIXED;
+ SHOW CREATE TABLE t1;
+ Table Create Table
t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` char(8) DEFAULT NULL
-) ENGINE=<STORAGE_ENGINE> DEFAULT CHARSET=latin1 ROW_FORMAT=FIXED
+) ENGINE=<STORAGE_ENGINE> DEFAULT CHARSET=latin1 ROW_FORMAT=FIXED INSERT_METHOD=LAST UNION=(`mrg`.`t1`)
- ALTER TABLE t1 ROW_FORMAT=DYNAMIC;
+ ALTER TABLE t1 ROW_FORMAT=PAGE;
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` char(8) DEFAULT NULL
--) ENGINE=<STORAGE_ENGINE> DEFAULT CHARSET=latin1 ROW_FORMAT=DYNAMIC
-+) ENGINE=<STORAGE_ENGINE> DEFAULT CHARSET=latin1 ROW_FORMAT=DYNAMIC INSERT_METHOD=LAST UNION=(`mrg`.`t1`)
+-) ENGINE=<STORAGE_ENGINE> DEFAULT CHARSET=latin1 ROW_FORMAT=PAGE
++) ENGINE=<STORAGE_ENGINE> DEFAULT CHARSET=latin1 ROW_FORMAT=PAGE INSERT_METHOD=LAST UNION=(`mrg`.`t1`)
+ ALTER TABLE t1 ROW_FORMAT=COMPACT;
+ SHOW CREATE TABLE t1;
+ Table Create Table
+ t1 CREATE TABLE `t1` (
+ `a` int(11) DEFAULT NULL,
+ `b` char(8) DEFAULT NULL
+-) ENGINE=<STORAGE_ENGINE> DEFAULT CHARSET=latin1 ROW_FORMAT=COMPACT
++) ENGINE=<STORAGE_ENGINE> DEFAULT CHARSET=latin1 ROW_FORMAT=COMPACT INSERT_METHOD=LAST UNION=(`mrg`.`t1`)
DROP TABLE t1;
diff --git a/storage/myisammrg/mysql-test/storage_engine/vcol.rdiff b/storage/myisammrg/mysql-test/storage_engine/vcol.rdiff
index 243bbafe7fa..6c4971c63be 100644
--- a/storage/myisammrg/mysql-test/storage_engine/vcol.rdiff
+++ b/storage/myisammrg/mysql-test/storage_engine/vcol.rdiff
@@ -6,12 +6,12 @@
-SHOW COLUMNS IN t1;
-Field Type Null Key Default Extra
-a int(11) # #
--b int(11) # # VIRTUAL
+-b int(11) # # VIRTUAL GENERATED
-INSERT INTO t1 (a) VALUES (1),(2);
-INSERT INTO t1 (a,b) VALUES (3,3),(4,4);
-Warnings:
--Warning 1906 The value specified for computed column 'b' in table 't1' has been ignored
--Warning 1906 The value specified for computed column 'b' in table 't1' has been ignored
+-Warning 1906 The value specified for generated column 'b' in table 't1' has been ignored
+-Warning 1906 The value specified for generated column 'b' in table 't1' has been ignored
-SELECT a,b FROM t1;
-a b
-1 2
@@ -23,12 +23,12 @@
-SHOW COLUMNS IN t1;
-Field Type Null Key Default Extra
-a int(11) # #
--b int(11) # # PERSISTENT
+-b int(11) # # STORED GENERATED
-INSERT INTO t1 (a) VALUES (1),(2);
-INSERT INTO t1 (a,b) VALUES (3,3),(4,4);
-Warnings:
--Warning 1906 The value specified for computed column 'b' in table 't1' has been ignored
--Warning 1906 The value specified for computed column 'b' in table 't1' has been ignored
+-Warning 1906 The value specified for generated column 'b' in table 't1' has been ignored
+-Warning 1906 The value specified for generated column 'b' in table 't1' has been ignored
-SELECT a,b FROM t1;
-a b
-1 2
@@ -40,12 +40,12 @@
-SHOW COLUMNS IN t1;
-Field Type Null Key Default Extra
-a int(11) # #
--b int(11) # # VIRTUAL
+-b int(11) # # VIRTUAL GENERATED
-INSERT INTO t1 (a) VALUES (1),(2);
-INSERT INTO t1 (a,b) VALUES (3,3),(4,4);
-Warnings:
--Warning 1906 The value specified for computed column 'b' in table 't1' has been ignored
--Warning 1906 The value specified for computed column 'b' in table 't1' has been ignored
+-Warning 1906 The value specified for generated column 'b' in table 't1' has been ignored
+-Warning 1906 The value specified for generated column 'b' in table 't1' has been ignored
-SELECT a,b FROM t1;
-a b
-1 2
@@ -57,12 +57,12 @@
-SHOW COLUMNS IN t1;
-Field Type Null Key Default Extra
-a int(11) # #
--b int(11) # # PERSISTENT
+-b int(11) # # STORED GENERATED
-INSERT INTO t1 (a) VALUES (1),(2);
-INSERT INTO t1 (a,b) VALUES (3,3),(4,4);
-Warnings:
--Warning 1906 The value specified for computed column 'b' in table 't1' has been ignored
--Warning 1906 The value specified for computed column 'b' in table 't1' has been ignored
+-Warning 1906 The value specified for generated column 'b' in table 't1' has been ignored
+-Warning 1906 The value specified for generated column 'b' in table 't1' has been ignored
-SELECT a,b FROM t1;
-a b
-1 2
@@ -70,11 +70,11 @@
-3 4
-4 5
-DROP TABLE t1;
-+ERROR HY000: MRG_MyISAM storage engine does not support computed columns
-+# ERROR: Statement ended with errno 1910, errname ER_UNSUPPORTED_ENGINE_FOR_VIRTUAL_COLUMNS (expected to succeed)
++ERROR HY000: MRG_MyISAM storage engine does not support generated columns
++# ERROR: Statement ended with errno 1910, errname ER_UNSUPPORTED_ENGINE_FOR_GENERATED_COLUMNS (expected to succeed)
+# ------------ UNEXPECTED RESULT ------------
+# [ CREATE TABLE t1 (a INT(11) /*!*/ /*Custom column options*/, b INT(11) /*!*/ /*Custom column options*/ GENERATED ALWAYS AS (a+1)) ENGINE=MRG_MYISAM /*!*/ /*Custom table options*/ UNION(mrg.t1) INSERT_METHOD=LAST ]
-+# The statement|command finished with ER_UNSUPPORTED_ENGINE_FOR_VIRTUAL_COLUMNS.
++# The statement|command finished with ER_UNSUPPORTED_ENGINE_FOR_GENERATED_COLUMNS.
+# Virtual columns or the mix could be unsupported|malfunctioning, or the problem was caused by previous errors.
+# You can change the engine code, or create an rdiff, or disable the test by adding it to disabled.def.
+# Further in this test, the message might sometimes be suppressed; a part of the test might be skipped.
diff --git a/storage/oqgraph/ha_oqgraph.cc b/storage/oqgraph/ha_oqgraph.cc
index 5c244e0b07b..350a4c3b5ff 100644
--- a/storage/oqgraph/ha_oqgraph.cc
+++ b/storage/oqgraph/ha_oqgraph.cc
@@ -623,9 +623,8 @@ int ha_oqgraph::open(const char *name, int mode, uint test_if_locked)
}
if (enum open_frm_error err= open_table_from_share(thd, share, "",
- (uint) (HA_OPEN_KEYFILE | HA_OPEN_RNDFILE |
- HA_GET_INDEX | HA_TRY_READ_ONLY),
- READ_KEYINFO | COMPUTE_TYPES | EXTRA_RECORD,
+ (uint) (HA_OPEN_KEYFILE | HA_TRY_READ_ONLY),
+ EXTRA_RECORD,
thd->open_options, edges, FALSE))
{
open_table_error(share, err, EMFILE); // NOTE - EMFILE is probably bogus, it reports as too many open files (!)
@@ -663,7 +662,7 @@ int ha_oqgraph::open(const char *name, int mode, uint test_if_locked)
{
fprint_error("Column '%s.%s' (origid) is not a not-null integer type",
options->table_name, options->origid);
- closefrm(edges, 0);
+ closefrm(edges);
free_table_share(share);
DBUG_RETURN(-1);
}
@@ -673,7 +672,7 @@ int ha_oqgraph::open(const char *name, int mode, uint test_if_locked)
if (!origid) {
fprint_error("Invalid OQGRAPH backing store ('%s.origid' attribute not set to a valid column of '%s')", p+1, options->table_name);
- closefrm(edges, 0);
+ closefrm(edges);
free_table_share(share);
DBUG_RETURN(-1);
}
@@ -688,7 +687,7 @@ int ha_oqgraph::open(const char *name, int mode, uint test_if_locked)
{
fprint_error("Column '%s.%s' (destid) is not a not-null integer type or is a different type to origid attribute.",
options->table_name, options->destid);
- closefrm(edges, 0);
+ closefrm(edges);
free_table_share(share);
DBUG_RETURN(-1);
}
@@ -698,7 +697,7 @@ int ha_oqgraph::open(const char *name, int mode, uint test_if_locked)
if (!destid) {
fprint_error("Invalid OQGRAPH backing store ('%s.destid' attribute not set to a valid column of '%s')", p+1, options->table_name);
- closefrm(edges, 0);
+ closefrm(edges);
free_table_share(share);
DBUG_RETURN(-1);
}
@@ -706,7 +705,7 @@ int ha_oqgraph::open(const char *name, int mode, uint test_if_locked)
// Make sure origid column != destid column
if (strcmp( origid->field_name, destid->field_name)==0) {
fprint_error("Invalid OQGRAPH backing store ('%s.destid' attribute set to same column as origid attribute)", p+1, options->table_name);
- closefrm(edges, 0);
+ closefrm(edges);
free_table_share(share);
DBUG_RETURN(-1);
}
@@ -720,7 +719,7 @@ int ha_oqgraph::open(const char *name, int mode, uint test_if_locked)
{
fprint_error("Column '%s.%s' (weight) is not a not-null real type",
options->table_name, options->weight);
- closefrm(edges, 0);
+ closefrm(edges);
free_table_share(share);
DBUG_RETURN(-1);
}
@@ -730,7 +729,7 @@ int ha_oqgraph::open(const char *name, int mode, uint test_if_locked)
if (!weight && options->weight) {
fprint_error("Invalid OQGRAPH backing store ('%s.weight' attribute not set to a valid column of '%s')", p+1, options->table_name);
- closefrm(edges, 0);
+ closefrm(edges);
free_table_share(share);
DBUG_RETURN(-1);
}
@@ -738,7 +737,7 @@ int ha_oqgraph::open(const char *name, int mode, uint test_if_locked)
if (!(graph_share = oqgraph::create(edges, origid, destid, weight)))
{
fprint_error("Unable to create graph instance.");
- closefrm(edges, 0);
+ closefrm(edges);
free_table_share(share);
DBUG_RETURN(-1);
}
@@ -763,7 +762,7 @@ int ha_oqgraph::close(void)
if (have_table_share)
{
if (edges->file)
- closefrm(edges, 0);
+ closefrm(edges);
free_table_share(share);
have_table_share = false;
}
diff --git a/storage/oqgraph/mysql-test/oqgraph/connections_mdev5748.result b/storage/oqgraph/mysql-test/oqgraph/connections_mdev5748.result
index 3d9c13bd733..3b71112df90 100644
--- a/storage/oqgraph/mysql-test/oqgraph/connections_mdev5748.result
+++ b/storage/oqgraph/mysql-test/oqgraph/connections_mdev5748.result
@@ -1,3 +1,4 @@
+connect con1,localhost,root,,;
CREATE TABLE oq_backing (
origid INT UNSIGNED NOT NULL,
destid INT UNSIGNED NOT NULL,
@@ -25,6 +26,8 @@ destid bigint(20) unsigned YES NULL
weight double YES NULL
seq bigint(20) unsigned YES NULL
linkid bigint(20) unsigned YES NULL
+disconnect con1;
+connection default;
show tables;
Tables_in_test
oq_backing
diff --git a/storage/oqgraph/mysql-test/oqgraph/regression_mdev6282.result b/storage/oqgraph/mysql-test/oqgraph/regression_mdev6282.result
index 6b1d0a1854d..3ef61cc3e37 100644
--- a/storage/oqgraph/mysql-test/oqgraph/regression_mdev6282.result
+++ b/storage/oqgraph/mysql-test/oqgraph/regression_mdev6282.result
@@ -1,3 +1,4 @@
+connect con1,localhost,root,,test;
CREATE TABLE `db_history` (
`version` VARCHAR(10) NOT NULL,
`updateJSON` MEDIUMTEXT,
@@ -28,10 +29,15 @@ FROM `version_history` AS `v` INNER JOIN `db_history` AS `db` ON `db`.`nodeID` =
WHERE `latch` = 'breadth_first' AND `origid` = '1' ORDER BY `weight` DESC LIMIT 1;
version nodeID
0.0.3 3
+disconnect con1;
+connect con2,localhost,root,,test;
SELECT `db`.`version`, `db`.`nodeID`
FROM `version_history` AS `v` INNER JOIN `db_history` AS `db` ON `db`.`nodeID` = `v`.`linkid`
WHERE `latch` = 'breadth_first' AND `origid` = '1' ORDER BY `weight` DESC LIMIT 1;
version nodeID
0.0.3 3
+disconnect con2;
+connect con3,localhost,root,,test;
DROP TABLE version_history;
DROP TABLE db_history;
+disconnect con3;
diff --git a/storage/oqgraph/oqgraph_thunk.cc b/storage/oqgraph/oqgraph_thunk.cc
index 2bb87737aab..44cc124b01e 100644
--- a/storage/oqgraph/oqgraph_thunk.cc
+++ b/storage/oqgraph/oqgraph_thunk.cc
@@ -193,9 +193,6 @@ int oqgraph3::cursor::restore_position()
return rc;
}
- if (table.vfield)
- update_virtual_fields(table.in_use, &table);
-
table.file->position(table.record[0]);
while (memcmp(table.file->ref, _position.data(), table.file->ref_length))
@@ -206,9 +203,6 @@ int oqgraph3::cursor::restore_position()
return rc;
}
- if (table.vfield)
- update_virtual_fields(table.in_use, &table);
-
if ((_origid && vertex_id(_graph->_source->val_int()) != *_origid) ||
(_destid && vertex_id(_graph->_target->val_int()) != *_destid))
{
@@ -230,9 +224,6 @@ int oqgraph3::cursor::restore_position()
table.file->ha_rnd_end();
return rc;
}
-
- if (table.vfield)
- update_virtual_fields(table.in_use, &table);
}
_graph->_cursor= this;
@@ -310,8 +301,6 @@ int oqgraph3::cursor::seek_next()
return clear_position(rc);
}
- if (table.vfield)
- update_virtual_fields(table.in_use, &table);
_graph->_stale= true;
if ((_origid && vertex_id(_graph->_source->val_int()) != *_origid) ||
@@ -345,8 +334,6 @@ int oqgraph3::cursor::seek_prev()
return clear_position(rc);
}
- if (table.vfield)
- update_virtual_fields(table.in_use, &table);
_graph->_stale= true;
if ((_origid && vertex_id(_graph->_source->val_int()) != *_origid) ||
@@ -507,9 +494,6 @@ int oqgraph3::cursor::seek_to(
return clear_position(rc);
}
- if (table.vfield)
- update_virtual_fields(table.in_use, &table);
-
if ((_origid && vertex_id(_graph->_source->val_int()) != *_origid) ||
(_destid && vertex_id(_graph->_target->val_int()) != *_destid))
{
diff --git a/storage/perfschema/ha_perfschema.cc b/storage/perfschema/ha_perfschema.cc
index 2591ec2a568..17d33aeadb7 100644
--- a/storage/perfschema/ha_perfschema.cc
+++ b/storage/perfschema/ha_perfschema.cc
@@ -278,7 +278,6 @@ int ha_perfschema::write_row(uchar *buf)
DBUG_RETURN(HA_ERR_WRONG_COMMAND);
DBUG_ASSERT(m_table_share);
- ha_statistic_increment(&SSV::ha_write_count);
result= m_table_share->write_row(table, buf, table->field);
DBUG_RETURN(result);
}
@@ -304,7 +303,6 @@ int ha_perfschema::update_row(const uchar *old_data, uchar *new_data)
DBUG_RETURN(0);
DBUG_ASSERT(m_table);
- ha_statistic_increment(&SSV::ha_update_count);
int result= m_table->update_row(table, old_data, new_data, table->field);
DBUG_RETURN(result);
}
@@ -316,7 +314,6 @@ int ha_perfschema::delete_row(const uchar *buf)
DBUG_RETURN(HA_ERR_WRONG_COMMAND);
DBUG_ASSERT(m_table);
- ha_statistic_increment(&SSV::ha_delete_count);
int result= m_table->delete_row(table, buf, table->field);
DBUG_RETURN(result);
}
@@ -361,7 +358,6 @@ int ha_perfschema::rnd_next(uchar *buf)
}
DBUG_ASSERT(m_table);
- ha_statistic_increment(&SSV::ha_read_rnd_next_count);
int result= m_table->rnd_next();
if (result == 0)
@@ -393,7 +389,6 @@ int ha_perfschema::rnd_pos(uchar *buf, uchar *pos)
}
DBUG_ASSERT(m_table);
- ha_statistic_increment(&SSV::ha_read_rnd_count);
int result= m_table->rnd_pos(pos);
if (result == 0)
result= m_table->read_row(table, buf, table->field);
diff --git a/storage/perfschema/pfs.cc b/storage/perfschema/pfs.cc
index 295a7f8c665..dbad7e966a5 100644
--- a/storage/perfschema/pfs.cc
+++ b/storage/perfschema/pfs.cc
@@ -48,7 +48,6 @@
#include "sp_head.h"
#include "pfs_digest.h"
-using std::min;
/**
@page PAGE_PERFORMANCE_SCHEMA The Performance Schema main page
MySQL PERFORMANCE_SCHEMA implementation.
@@ -1273,7 +1272,7 @@ static int build_prefix(const LEX_STRING *prefix, const char *category,
out_ptr+= len;
*out_ptr= '/';
out_ptr++;
- *output_length= out_ptr - output;
+ *output_length= (int)(out_ptr - output);
return 0;
}
@@ -1452,7 +1451,9 @@ static void register_statement_v1(const char *category,
for (; count>0; count--, info++)
{
- DBUG_ASSERT(info->m_name != NULL);
+ if (info->m_name == NULL)
+ continue;
+
len= strlen(info->m_name);
full_length= prefix_length + len;
if (likely(full_length <= PFS_MAX_INFO_NAME_LENGTH))
@@ -1948,7 +1949,7 @@ static void set_thread_id_v1(PSI_thread *thread, ulonglong processlist_id)
PFS_thread *pfs= reinterpret_cast<PFS_thread*> (thread);
if (unlikely(pfs == NULL))
return;
- pfs->m_processlist_id= processlist_id;
+ pfs->m_processlist_id= (ulong)processlist_id;
}
/**
@@ -2027,7 +2028,7 @@ static void set_thread_account_v1(const char *user, int user_len,
DBUG_ASSERT((host != NULL) || (host_len == 0));
DBUG_ASSERT(host_len >= 0);
- host_len= min<size_t>(host_len, sizeof(pfs->m_hostname));
+ host_len= MY_MIN(host_len, static_cast<int>(sizeof(pfs->m_hostname)));
if (unlikely(pfs == NULL))
return;
@@ -4853,6 +4854,7 @@ static void end_statement_v1(PSI_statement_locker *locker, void *stmt_da)
switch(da->status())
{
+ case Diagnostics_area::DA_OK_BULK:
case Diagnostics_area::DA_EMPTY:
break;
case Diagnostics_area::DA_OK:
@@ -4986,6 +4988,7 @@ static void end_statement_v1(PSI_statement_locker *locker, void *stmt_da)
switch (da->status())
{
+ case Diagnostics_area::DA_OK_BULK:
case Diagnostics_area::DA_EMPTY:
break;
case Diagnostics_area::DA_OK:
@@ -5126,7 +5129,7 @@ static void set_socket_info_v1(PSI_socket *socket,
/** Set socket descriptor */
if (fd != NULL)
- pfs->m_fd= *fd;
+ pfs->m_fd= (uint)*fd;
/** Set raw socket address and length */
if (likely(addr != NULL && addr_len > 0))
diff --git a/storage/perfschema/pfs_account.cc b/storage/perfschema/pfs_account.cc
index 6dcf44bc994..be2153e84ae 100644
--- a/storage/perfschema/pfs_account.cc
+++ b/storage/perfschema/pfs_account.cc
@@ -209,7 +209,7 @@ static void set_account_key(PFS_account_key *key,
}
ptr[0]= 0;
ptr++;
- key->m_key_length= ptr - &key->m_hash_key[0];
+ key->m_key_length= (uint)(ptr - &key->m_hash_key[0]);
}
PFS_account *
diff --git a/storage/perfschema/pfs_autosize.cc b/storage/perfschema/pfs_autosize.cc
index 2219d43b8c7..43c754939a4 100644
--- a/storage/perfschema/pfs_autosize.cc
+++ b/storage/perfschema/pfs_autosize.cc
@@ -139,7 +139,7 @@ PFS_sizing_data small_data=
/* Min tables */
200,
/* Load factors */
- 0.90, 0.90, 0.90
+ 0.90f, 0.90f, 0.90f
};
PFS_sizing_data medium_data=
@@ -155,7 +155,7 @@ PFS_sizing_data medium_data=
/* Min tables */
500,
/* Load factors */
- 0.70, 0.80, 0.90
+ 0.70f, 0.80f, 0.90f
};
PFS_sizing_data large_data=
@@ -171,7 +171,7 @@ PFS_sizing_data large_data=
/* Min tables */
10000,
/* Load factors */
- 0.50, 0.65, 0.80
+ 0.50f, 0.65f, 0.80f
};
static inline ulong apply_load_factor(ulong raw_value, float factor)
diff --git a/storage/perfschema/pfs_engine_table.cc b/storage/perfschema/pfs_engine_table.cc
index 38fc44efff1..08c0fe8ecfe 100644
--- a/storage/perfschema/pfs_engine_table.cc
+++ b/storage/perfschema/pfs_engine_table.cc
@@ -1366,7 +1366,7 @@ bool pfs_show_status(handlerton *hton, THD *thd,
break;
}
- buflen= longlong10_to_str(size, buf, 10) - buf;
+ buflen= (uint)(longlong10_to_str(size, buf, 10) - buf);
if (print(thd,
PERFORMANCE_SCHEMA_str.str, PERFORMANCE_SCHEMA_str.length,
name, strlen(name),
diff --git a/storage/perfschema/pfs_events_waits.h b/storage/perfschema/pfs_events_waits.h
index be4fb36619a..702f7e3ce07 100644
--- a/storage/perfschema/pfs_events_waits.h
+++ b/storage/perfschema/pfs_events_waits.h
@@ -1,4 +1,5 @@
/* Copyright (c) 2008, 2011, Oracle and/or its affiliates. All rights reserved.
+ Copyright (c) 2017, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License, version 2.0,
@@ -104,13 +105,13 @@ struct PFS_events_waits : public PFS_events
};
/** TIMED bit in the state flags bitfield. */
-#define STATE_FLAG_TIMED (1<<0)
+#define STATE_FLAG_TIMED (1U<<0)
/** THREAD bit in the state flags bitfield. */
-#define STATE_FLAG_THREAD (1<<1)
+#define STATE_FLAG_THREAD (1U<<1)
/** EVENT bit in the state flags bitfield. */
-#define STATE_FLAG_EVENT (1<<2)
+#define STATE_FLAG_EVENT (1U<<2)
/** DIGEST bit in the state flags bitfield. */
-#define STATE_FLAG_DIGEST (1<<3)
+#define STATE_FLAG_DIGEST (1U<<3)
void insert_events_waits_history(PFS_thread *thread, PFS_events_waits *wait);
diff --git a/storage/perfschema/pfs_global.h b/storage/perfschema/pfs_global.h
index d4451cd112e..9c85506d7c2 100644
--- a/storage/perfschema/pfs_global.h
+++ b/storage/perfschema/pfs_global.h
@@ -38,7 +38,7 @@ extern bool pfs_initialized;
extern size_t pfs_allocated_memory;
#if defined(HAVE_POSIX_MEMALIGN) || defined(HAVE_MEMALIGN) || defined(HAVE_ALIGNED_MALLOC)
-#define PFS_ALIGNEMENT 64
+#define PFS_ALIGNEMENT CPU_LEVEL1_DCACHE_LINESIZE
#define PFS_ALIGNED MY_ALIGNED(PFS_ALIGNEMENT)
#else
/*
diff --git a/storage/perfschema/pfs_host.cc b/storage/perfschema/pfs_host.cc
index 10eb21125d0..d6461ef3851 100644
--- a/storage/perfschema/pfs_host.cc
+++ b/storage/perfschema/pfs_host.cc
@@ -197,7 +197,7 @@ static void set_host_key(PFS_host_key *key,
}
ptr[0]= 0;
ptr++;
- key->m_key_length= ptr - &key->m_hash_key[0];
+ key->m_key_length= (uint)(ptr - &key->m_hash_key[0]);
}
PFS_host *find_or_create_host(PFS_thread *thread,
diff --git a/storage/perfschema/pfs_instr.cc b/storage/perfschema/pfs_instr.cc
index e4ce1ff8e13..675679f9b4d 100644
--- a/storage/perfschema/pfs_instr.cc
+++ b/storage/perfschema/pfs_instr.cc
@@ -959,7 +959,7 @@ PFS_thread* create_thread(PFS_thread_class *klass, const void *identity,
pfs->m_thread_internal_id=
PFS_atomic::add_u64(&thread_internal_id_counter, 1);
pfs->m_parent_thread_internal_id= 0;
- pfs->m_processlist_id= processlist_id;
+ pfs->m_processlist_id= (ulong)processlist_id;
pfs->m_event_id= 1;
pfs->m_stmt_lock.set_allocated();
pfs->m_session_lock.set_allocated();
@@ -1610,7 +1610,7 @@ PFS_socket* create_socket(PFS_socket_class *klass, const my_socket *fd,
uint addr_len_used= addr_len;
if (fd != NULL)
- fd_used= *fd;
+ fd_used= (int)*fd;
if (addr_len_used > sizeof(sockaddr_storage))
addr_len_used= sizeof(sockaddr_storage);
diff --git a/storage/perfschema/pfs_instr_class.cc b/storage/perfschema/pfs_instr_class.cc
index 8209f6e0db2..c75418b28e9 100644
--- a/storage/perfschema/pfs_instr_class.cc
+++ b/storage/perfschema/pfs_instr_class.cc
@@ -1,4 +1,5 @@
/* Copyright (c) 2008, 2015, Oracle and/or its affiliates. All rights reserved.
+ Copyright (c) 2020, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License, version 2.0,
@@ -456,7 +457,7 @@ static void set_table_share_key(PFS_table_share_key *key,
ptr+= table_name_length;
ptr[0]= 0;
ptr++;
- key->m_key_length= ptr - &key->m_hash_key[0];
+ key->m_key_length= (uint)(ptr - &key->m_hash_key[0]);
if (lower_case_table_names)
{
@@ -619,6 +620,7 @@ static void init_instr_class(PFS_instr_class *klass,
DBUG_ASSERT(name_length <= PFS_MAX_INFO_NAME_LENGTH);
memset(klass, 0, sizeof(PFS_instr_class));
strncpy(klass->m_name, name, name_length);
+ klass->m_name[PFS_MAX_INFO_NAME_LENGTH - 1]= '\0';
klass->m_name_length= name_length;
klass->m_flags= flags;
klass->m_enabled= true;
diff --git a/storage/perfschema/pfs_setup_actor.cc b/storage/perfschema/pfs_setup_actor.cc
index 9b2e282a08d..c4cec6c9ff8 100644
--- a/storage/perfschema/pfs_setup_actor.cc
+++ b/storage/perfschema/pfs_setup_actor.cc
@@ -159,7 +159,7 @@ static void set_setup_actor_key(PFS_setup_actor_key *key,
ptr+= role_length;
ptr[0]= 0;
ptr++;
- key->m_key_length= ptr - &key->m_hash_key[0];
+ key->m_key_length= (uint)(ptr - &key->m_hash_key[0]);
}
int insert_setup_actor(const String *user, const String *host, const String *role)
diff --git a/storage/perfschema/pfs_setup_object.cc b/storage/perfschema/pfs_setup_object.cc
index 78234966647..f33030e927f 100644
--- a/storage/perfschema/pfs_setup_object.cc
+++ b/storage/perfschema/pfs_setup_object.cc
@@ -152,7 +152,7 @@ static void set_setup_object_key(PFS_setup_object_key *key,
ptr+= object_length;
ptr[0]= 0;
ptr++;
- key->m_key_length= ptr - &key->m_hash_key[0];
+ key->m_key_length= (uint)(ptr - &key->m_hash_key[0]);
}
int insert_setup_object(enum_object_type object_type, const String *schema,
diff --git a/storage/perfschema/pfs_user.cc b/storage/perfschema/pfs_user.cc
index 099b7eba4d7..14b86e1478e 100644
--- a/storage/perfschema/pfs_user.cc
+++ b/storage/perfschema/pfs_user.cc
@@ -197,7 +197,7 @@ static void set_user_key(PFS_user_key *key,
}
ptr[0]= 0;
ptr++;
- key->m_key_length= ptr - &key->m_hash_key[0];
+ key->m_key_length= (uint)(ptr - &key->m_hash_key[0]);
}
PFS_user *
diff --git a/storage/perfschema/table_events_statements.cc b/storage/perfschema/table_events_statements.cc
index 2a238e6aff5..392f1f747f2 100644
--- a/storage/perfschema/table_events_statements.cc
+++ b/storage/perfschema/table_events_statements.cc
@@ -260,12 +260,9 @@ void table_events_statements_common::make_row_part_1(PFS_events_statements *stat
{
if (cs->mbmaxlen > 1)
{
- int well_formed_error;
- valid_length= cs->cset->well_formed_len(cs,
- statement->m_sqltext,
- statement->m_sqltext + valid_length,
- valid_length,
- &well_formed_error);
+ valid_length= Well_formed_prefix(cs,
+ statement->m_sqltext,
+ valid_length).length();
}
}
diff --git a/storage/perfschema/table_events_waits.cc b/storage/perfschema/table_events_waits.cc
index 385382b2208..d6955a46984 100644
--- a/storage/perfschema/table_events_waits.cc
+++ b/storage/perfschema/table_events_waits.cc
@@ -286,7 +286,7 @@ int table_events_waits_common::make_socket_object_columns(volatile PFS_events_wa
safe_socket->m_addr_len);
/* Convert port number to a string (length includes ':') */
- int port_len= int10_to_str(port, (port_str+1), 10) - port_str + 1;
+ int port_len= (int)(int10_to_str(port, (port_str+1), 10) - port_str + 1);
/* OBJECT NAME */
m_row.m_object_name_length= ip_length + port_len;
diff --git a/storage/perfschema/table_threads.cc b/storage/perfschema/table_threads.cc
index fc29d711812..eccf41db971 100644
--- a/storage/perfschema/table_threads.cc
+++ b/storage/perfschema/table_threads.cc
@@ -264,8 +264,8 @@ int table_threads::read_row_values(TABLE *table,
changed to less than or equal to 64 characters.
*/
set_field_varchar_utf8(f, m_row.m_processlist_state_ptr,
- std::min<uint>(m_row.m_processlist_state_length,
- f->char_length()));
+ MY_MIN(m_row.m_processlist_state_length,
+ f->char_length()));
}
else
f->set_null();
diff --git a/storage/rocksdb/.clang-format b/storage/rocksdb/.clang-format
new file mode 100644
index 00000000000..b1df76bdf2d
--- /dev/null
+++ b/storage/rocksdb/.clang-format
@@ -0,0 +1,137 @@
+# Copyright (c) 2016, 2018, Oracle and/or its affiliates. All rights reserved.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License, version 2.0,
+# as published by the Free Software Foundation.
+#
+# This program is also distributed with certain software (including
+# but not limited to OpenSSL) that is licensed under separate terms,
+# as designated in a particular file or component or in included license
+# documentation. The authors of MySQL hereby grant you an additional
+# permission to link the program and your derivative works with the
+# separately licensed software that they have included with MySQL.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License, version 2.0, for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+
+# This is the output of clang-format-5.0 --style=google --dump-config,
+# except for changes mentioned below. We lock the style so that any newer
+# version of clang-format will give the same result; as time goes, we may
+# update this list, requiring newer versions of clang-format.
+
+Language: Cpp
+# BasedOnStyle: Google
+AccessModifierOffset: -1
+AlignAfterOpenBracket: Align
+AlignConsecutiveAssignments: false
+AlignConsecutiveDeclarations: false
+AlignEscapedNewlines: Left
+AlignOperands: true
+AlignTrailingComments: true
+AllowAllParametersOfDeclarationOnNextLine: true
+AllowShortBlocksOnASingleLine: false
+AllowShortCaseLabelsOnASingleLine: false
+AllowShortFunctionsOnASingleLine: All
+AllowShortIfStatementsOnASingleLine: true
+AllowShortLoopsOnASingleLine: true
+AlwaysBreakAfterDefinitionReturnType: None
+AlwaysBreakAfterReturnType: None
+AlwaysBreakBeforeMultilineStrings: true
+AlwaysBreakTemplateDeclarations: true
+BinPackArguments: true
+BinPackParameters: true
+BraceWrapping:
+ AfterClass: false
+ AfterControlStatement: false
+ AfterEnum: false
+ AfterFunction: false
+ AfterNamespace: false
+ AfterObjCDeclaration: false
+ AfterStruct: false
+ AfterUnion: false
+ BeforeCatch: false
+ BeforeElse: false
+ IndentBraces: false
+ SplitEmptyFunction: true
+ SplitEmptyRecord: true
+ SplitEmptyNamespace: true
+BreakBeforeBinaryOperators: None
+BreakBeforeBraces: Attach
+BreakBeforeInheritanceComma: false
+BreakBeforeTernaryOperators: true
+BreakConstructorInitializersBeforeComma: false
+BreakConstructorInitializers: BeforeColon
+BreakAfterJavaFieldAnnotations: false
+BreakStringLiterals: true
+ColumnLimit: 80
+CommentPragmas: '^ IWYU pragma:'
+CompactNamespaces: false
+ConstructorInitializerAllOnOneLineOrOnePerLine: true
+ConstructorInitializerIndentWidth: 4
+ContinuationIndentWidth: 4
+Cpp11BracedListStyle: true
+DisableFormat: false
+ExperimentalAutoDetectBinPacking: false
+FixNamespaceComments: true
+ForEachMacros:
+ - foreach
+ - Q_FOREACH
+ - BOOST_FOREACH
+IncludeCategories:
+ - Regex: '^<.*\.h>'
+ Priority: 1
+ - Regex: '^<.*'
+ Priority: 2
+ - Regex: '.*'
+ Priority: 3
+IncludeIsMainRegex: '([-_](test|unittest))?$'
+IndentCaseLabels: true
+IndentWidth: 2
+IndentWrappedFunctionNames: false
+JavaScriptQuotes: Leave
+JavaScriptWrapImports: true
+KeepEmptyLinesAtTheStartOfBlocks: false
+MacroBlockBegin: ''
+MacroBlockEnd: ''
+MaxEmptyLinesToKeep: 1
+NamespaceIndentation: None
+ObjCBlockIndentWidth: 2
+ObjCSpaceAfterProperty: false
+ObjCSpaceBeforeProtocolList: false
+PenaltyBreakAssignment: 2
+PenaltyBreakBeforeFirstCallParameter: 1
+PenaltyBreakComment: 300
+PenaltyBreakFirstLessLess: 120
+PenaltyBreakString: 1000
+PenaltyExcessCharacter: 1000000
+PenaltyReturnTypeOnItsOwnLine: 200
+ReflowComments: true
+SortIncludes: true
+SortUsingDeclarations: true
+SpaceAfterCStyleCast: false
+SpaceAfterTemplateKeyword: true
+SpaceBeforeAssignmentOperators: true
+SpaceBeforeParens: ControlStatements
+SpaceInEmptyParentheses: false
+SpacesBeforeTrailingComments: 2
+SpacesInAngles: false
+SpacesInContainerLiterals: true
+SpacesInCStyleCastParentheses: false
+SpacesInParentheses: false
+SpacesInSquareBrackets: false
+TabWidth: 8
+UseTab: Never
+
+# We declare one specific pointer style since right alignment is dominant in
+# the MySQL code base (default --style=google has DerivePointerAlignment true).
+DerivePointerAlignment: false
+PointerAlignment: Right
+
+# MySQL source code is allowed to use C++11 features.
+Standard: Cpp11
diff --git a/storage/rocksdb/.gitignore b/storage/rocksdb/.gitignore
new file mode 100644
index 00000000000..adf3e154c36
--- /dev/null
+++ b/storage/rocksdb/.gitignore
@@ -0,0 +1,2 @@
+build_version.cc
+.*
diff --git a/storage/rocksdb/CMakeLists.txt b/storage/rocksdb/CMakeLists.txt
new file mode 100644
index 00000000000..cef5a8b2517
--- /dev/null
+++ b/storage/rocksdb/CMakeLists.txt
@@ -0,0 +1,281 @@
+# TODO: Copyrights
+
+MACRO(SKIP_ROCKSDB_PLUGIN msg)
+ MESSAGE_ONCE(SKIP_ROCKSDB_PLUGIN "Can't build rocksdb engine - ${msg}")
+ RETURN()
+ENDMACRO()
+
+IF (NOT EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/rocksdb/CMakeLists.txt")
+ SKIP_ROCKSDB_PLUGIN("Missing CMakeLists.txt in rocksdb directory. Try \"git submodule update\".")
+ENDIF()
+
+CHECK_LIBRARY_EXISTS(rt timer_delete "" HAVE_TIMER_DELETE)
+IF (HAVE_TIMER_DELETE)
+ ADD_DEFINITIONS(-DHAVE_TIMER_DELETE)
+ENDIF(HAVE_TIMER_DELETE)
+
+CHECK_FUNCTION_EXISTS(sched_getcpu HAVE_SCHED_GETCPU)
+IF(HAVE_SCHED_GETCPU)
+ ADD_DEFINITIONS(-DHAVE_SCHED_GETCPU=1 -DROCKSDB_SCHED_GETCPU_PRESENT)
+ENDIF()
+
+IF(WITH_VALGRIND)
+ ADD_DEFINITIONS(-DROCKSDB_VALGRIND_RUN=1)
+ENDIF()
+
+# We've had our builders hang during the build process. This prevents MariaRocks
+# to be built on 32 bit intel OS kernels.
+IF(CMAKE_SYSTEM_PROCESSOR MATCHES "i[36]86")
+ SKIP_ROCKSDB_PLUGIN("Intel 32 bit not supported.")
+ENDIF()
+
+# Due to retrieved data being incorrect endian
+include(TestBigEndian)
+test_big_endian(BIG_ENDIAN)
+if(BIG_ENDIAN)
+ SKIP_ROCKSDB_PLUGIN("Big Endian not supported.")
+endif()
+
+#
+# Also, disable building on 32-bit Windows
+#
+IF (WIN32 AND CMAKE_SIZEOF_VOID_P EQUAL 4)
+ SKIP_ROCKSDB_PLUGIN("32-Bit Windows are temporarily disabled")
+ENDIF()
+
+# This plugin needs recent C++ compilers (it is using C++11 features)
+# Skip build for the old compilers
+SET(CXX11_FLAGS)
+SET(OLD_COMPILER_MSG "requires c++11 -capable compiler (minimal supported versions are g++ 4.8, clang 3.3, VS2015)")
+
+IF(CMAKE_CXX_COMPILER_ID MATCHES "GNU")
+ EXECUTE_PROCESS(COMMAND ${CMAKE_CXX_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_VERSION)
+ IF (GCC_VERSION VERSION_LESS 4.8)
+ SKIP_ROCKSDB_PLUGIN("${OLD_COMPILER_MSG}")
+ ENDIF()
+ SET(CXX11_FLAGS "-std=c++11")
+ELSEIF (CMAKE_CXX_COMPILER_ID MATCHES "Clang")
+ IF ((CMAKE_CXX_COMPILER_VERSION AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS 3.3) OR
+ (CLANG_VERSION_STRING AND CLANG_VERSION_STRING VERSION_LESS 3.3))
+ SKIP_ROCKSDB_PLUGIN("${OLD_COMPILER_MSG}")
+ ENDIF()
+ SET(CXX11_FLAGS "-std=c++11 -stdlib=libstdc++")
+ELSEIF(MSVC)
+ IF (MSVC_VERSION LESS 1900)
+ SKIP_ROCKSDB_PLUGIN("${OLD_COMPILER_MSG}")
+ ENDIF()
+ELSE()
+ SKIP_ROCKSDB_PLUGIN("Compiler not supported")
+ENDIF()
+
+IF(CMAKE_VERSION GREATER 3.0)
+ SET(CMAKE_CXX_STANDARD 11)
+ELSEIF(CXX11_FLAGS)
+ ADD_DEFINITIONS(${CXX11_FLAGS})
+ENDIF()
+
+SET(ROCKSDB_SE_SOURCES
+ rdb_mariadb_server_port.cc
+ rdb_mariadb_server_port.h
+ ha_rocksdb.cc
+ ha_rocksdb.h
+ rdb_i_s.cc
+ rdb_i_s.h
+ rdb_io_watchdog.h
+ rdb_io_watchdog.cc
+ rdb_mutex_wrapper.cc
+ rdb_mutex_wrapper.h
+ rdb_index_merge.cc
+ rdb_index_merge.h
+ properties_collector.cc
+ properties_collector.h
+ rdb_datadic.cc
+ rdb_datadic.h
+ rdb_cf_manager.cc
+ rdb_cf_manager.h
+ rdb_utils.cc rdb_utils.h
+ rdb_threads.cc
+ rdb_threads.h
+ rdb_psi.h
+ rdb_psi.cc
+ rdb_sst_info.cc
+ rdb_sst_info.h
+ rdb_converter.cc
+ rdb_converter.h
+)
+
+# MariaDB: the following is added in build_rocksdb.cmake, when appropriate:
+# This is a strong requirement coming from RocksDB. No conditional checks here.
+#ADD_DEFINITIONS(-DROCKSDB_PLATFORM_POSIX -DROCKSDB_LIB_IO_POSIX
+#)
+
+MYSQL_ADD_PLUGIN(rocksdb ${ROCKSDB_SE_SOURCES} MODULE_ONLY STORAGE_ENGINE
+ MODULE_OUTPUT_NAME ha_rocksdb
+ COMPONENT rocksdb-engine)
+
+IF(NOT TARGET rocksdb)
+ # Bail out if compilation with rocksdb engine is not requested
+ RETURN()
+ENDIF()
+
+
+
+CHECK_CXX_SOURCE_COMPILES("
+#if defined(_MSC_VER) && !defined(__thread)
+#define __thread __declspec(thread)
+#endif
+int main() {
+ static __thread int tls;
+ tls=0;
+ return tls;
+}
+" HAVE_THREAD_LOCAL)
+if(HAVE_THREAD_LOCAL)
+ ADD_DEFINITIONS(-DROCKSDB_SUPPORT_THREAD_LOCAL)
+else()
+ MESSAGE(SEND_ERROR "The compiler failed the check for ROCKSDB_SUPPORT_THREAD_LOCAL. "
+ "MyRocks requires that feature.")
+endif()
+
+INCLUDE(build_rocksdb.cmake)
+
+ADD_CONVENIENCE_LIBRARY(rocksdb_aux_lib
+ ha_rocksdb_proto.h
+ logger.h
+ rdb_comparator.h
+ rdb_cf_options.cc
+ rdb_cf_options.h
+ event_listener.cc
+ event_listener.h
+ rdb_perf_context.cc
+ rdb_perf_context.h
+ rdb_buff.h
+ rdb_mariadb_port.h
+ nosql_access.cc nosql_access.h
+)
+
+ADD_DEPENDENCIES(rocksdb_aux_lib GenError)
+
+# MARIAROCKS-TODO: how to properly depend on -lrt ?
+TARGET_LINK_LIBRARIES(rocksdb_aux_lib rocksdblib ${ZLIB_LIBRARY})
+if (UNIX AND NOT APPLE)
+ TARGET_LINK_LIBRARIES(rocksdb_aux_lib -lrt)
+endif()
+
+# IF (WITH_JEMALLOC)
+# FIND_LIBRARY(JEMALLOC_LIBRARY
+# NAMES libjemalloc${PIC_EXT}.a jemalloc
+# HINTS ${WITH_JEMALLOC}/lib)
+# SET(rocksdb_static_libs ${rocksdb_static_libs}
+# ${JEMALLOC_LIBRARY})
+# ADD_DEFINITIONS(-DROCKSDB_JEMALLOC)
+# ADD_DEFINITIONS(-DROCKSDB_MALLOC_USABLE_SIZE)
+# ENDIF()
+
+# MariaDB: Q: why does the upstream add libunwind for a particular
+# storage engine?
+#IF (WITH_UNWIND)
+# FIND_LIBRARY(UNWIND_LIBRARY
+# NAMES libunwind${PIC_EXT}.a unwind
+# HINTS ${WITH_UNWIND}/lib)
+# SET(rocksdb_static_libs ${rocksdb_static_libs}
+# ${UNWIND_LIBRARY})
+#ENDIF()
+
+
+TARGET_LINK_LIBRARIES(rocksdb rocksdb_aux_lib)
+ FIND_LIBRARY(LZ4_LIBRARY
+ NAMES liblz4${PIC_EXT}.a lz4
+ HINTS ${WITH_LZ4}/lib)
+
+IF(CMAKE_CXX_COMPILER_ID MATCHES "GNU" OR CMAKE_CXX_COMPILER_ID MATCHES "Clang")
+
+ # MARIAROCKS_NOT_YET: Add -frtti flag when compiling RocksDB files.
+ # TODO: is this the right way to do this?
+ # - SQL layer and storage/rocksdb/*.cc are compiled with -fnortti
+ # - RocksDB files are compiled with "-fnortti ... -frtti"
+ # - This causes RocksDB headers to be compiled with different settings:
+ # = with RTTI when compiling RocksDB
+ # = without RTTI when compiling storage/rocksdb/*.cc
+ #
+ # (facebook/mysql-5.6 just compiles everything without -f*rtti, which means
+ # everything is compiled with -frtti)
+ #
+ # (also had to add -frtti above, because something that event_listener.cc
+ # includes requires it. So, now everything in MariaRocks is compiled with
+ # -frtti)
+ set_source_files_properties(event_listener.cc rdb_cf_options.cc rdb_sst_info.cc
+ PROPERTIES COMPILE_FLAGS -frtti)
+ENDIF()
+
+CHECK_FUNCTION_EXISTS(sched_getcpu HAVE_SCHED_GETCPU)
+IF(HAVE_SCHED_GETCPU)
+ ADD_DEFINITIONS(-DHAVE_SCHED_GETCPU=1)
+# MariaDB: don't do this:
+# ADD_DEFINITIONS(-DZSTD_STATIC_LINKING_ONLY)
+ENDIF()
+
+IF (WITH_TBB)
+ FIND_LIBRARY(TBB_LIBRARY
+ NAMES libtbb${PIC_EXT}.a tbb
+ HINTS ${WITH_TBB}/lib)
+ SET(rocksdb_static_libs ${rocksdb_static_libs}
+ ${TBB_LIBRARY})
+ ADD_DEFINITIONS(-DTBB)
+ENDIF()
+
+#
+# MariaDB: Dynamic plugin build is not suitable with unittest ATM
+#
+#IF(WITH_UNIT_TESTS AND WITH_EMBEDDED_SERVER)
+# ADD_SUBDIRECTORY(unittest)
+#ENDIF()
+
+if (UNIX AND NOT APPLE)
+ SET(rocksdb_static_libs ${rocksdb_static_libs} "-lrt")
+endif()
+
+
+ADD_LIBRARY(rocksdb_tools STATIC
+ rocksdb/tools/ldb_tool.cc
+ rocksdb/tools/ldb_cmd.cc
+ rocksdb/tools/sst_dump_tool.cc
+)
+
+MYSQL_ADD_EXECUTABLE(sst_dump rocksdb/tools/sst_dump.cc COMPONENT rocksdb-engine)
+TARGET_LINK_LIBRARIES(sst_dump rocksdblib)
+
+MYSQL_ADD_EXECUTABLE(mysql_ldb tools/mysql_ldb.cc COMPONENT rocksdb-engine)
+TARGET_LINK_LIBRARIES(mysql_ldb rocksdb_tools rocksdb_aux_lib)
+
+CONFIGURE_FILE(${CMAKE_CURRENT_SOURCE_DIR}/myrocks_hotbackup.py
+ ${CMAKE_CURRENT_BINARY_DIR}/myrocks_hotbackup @ONLY)
+INSTALL_SCRIPT(${CMAKE_CURRENT_BINARY_DIR}/myrocks_hotbackup COMPONENT rocksdb-engine)
+
+IF(CMAKE_CXX_COMPILER_ID MATCHES "GNU" OR CMAKE_CXX_COMPILER_ID MATCHES "Clang")
+ SET_TARGET_PROPERTIES(rocksdb_tools sst_dump mysql_ldb PROPERTIES COMPILE_FLAGS "-frtti -Wno-error")
+ENDIF()
+IF(MSVC)
+ # RocksDB, the storage engine, overdoes "const" by adding
+ # additional const qualifiers to parameters of the overriden virtual functions
+ # This creates a lot of warnings, that we silence here.
+ ADD_DEFINITIONS(/wd4373)
+
+ # Some checks in C++ runtime that make debug build much slower
+ ADD_DEFINITIONS(-D_ITERATOR_DEBUG_LEVEL=0)
+ENDIF()
+
+IF(GIT_EXECUTABLE)
+ EXECUTE_PROCESS(
+ COMMAND ${GIT_EXECUTABLE} rev-parse HEAD
+ WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/rocksdb
+ OUTPUT_VARIABLE OUT RESULT_VARIABLE RES)
+ IF(RES EQUAL 0)
+ STRING(REGEX REPLACE "\n$" "" ROCKSDB_GIT_HASH "${OUT}")
+ ENDIF()
+ENDIF()
+IF(ROCKSDB_GIT_HASH OR
+ (NOT EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/rdb_source_revision.h))
+ CONFIGURE_FILE(${CMAKE_CURRENT_SOURCE_DIR}/rdb_source_revision.h.in
+ ${CMAKE_CURRENT_BINARY_DIR}/rdb_source_revision.h )
+ENDIF()
diff --git a/storage/rocksdb/README b/storage/rocksdb/README
new file mode 100644
index 00000000000..3af455924a4
--- /dev/null
+++ b/storage/rocksdb/README
@@ -0,0 +1,50 @@
+== Summary ==
+This directory contains RocksDB-based Storage Engine (RDBSE) for MySQL,
+also known as "MyRocks".
+
+== Resources ==
+https://github.com/facebook/mysql-5.6/wiki/Getting-Started-with-MyRocks
+https://www.facebook.com/groups/MyRocks/
+
+== Coding Conventions ==
+The baseline for MyRocks coding conventions for the code in storage/rocksdb/
+is based on the default clang format with a few minor changes. The file
+storage/rocksdb/.clang-format describes conventions and can be integrated
+with Vim or Emacs as described here:
+http://releases.llvm.org/3.6.0/tools/clang/docs/ClangFormat.html#vim-integration
+
+All code outside of storage/rocksdb/ should conform to the MySQL coding
+conventions:
+http://dev.mysql.com/doc/internals/en/coding-guidelines.html.
+
+Several refinements:
+ 0. There is an umbrella C++ namespace named "myrocks" for all MyRocks code.
+ 1. We introduced "RDB" as the super-short abbreviation for "RocksDB". We will
+ use it as a name prefix, with different capitalization (see below), to ease
+ up code navigation with ctags and grep.
+ N.B. For ease of matching, we'll keep the variables and functions dealing
+ with sysvars as close as possible to the outside visible names of
+ sysvars, which start with "rocksdb_" prefix, the outward storage
+ engine name.
+ 2. The names for classes, interfaces, and C++ structures (which act as
+ classes), start with prefix "Rdb_".
+ NB: For historical reasons, we'll keep the "ha_<storage_engine_name>" class
+ name for ha_rocksdb class, which is an exception to the rule.
+ 3. The names for global objects and functions start with prefix "rdb_".
+ 4. The names for macros and constants start with prefix "RDB_".
+ 5. Regular class member names start with "m_".
+ 6. Static class member names start with "s_".
+ 7. Given the 80 character per line limit, we'll not always use full English
+ words in names, when a well known or easily recognizable abbreviation
+ exists (like "tx" for "transaction" or "param" for "parameter" etc).
+ 8. When needing to disambiguate, we use different suffixes for that, like
+ "_arg" for a function argument/parameter, "_arr" for a C style array, and
+ "_vect" for a std::vector etc.
+
+== Running Tests ==
+To run tests from rocksdb, rocksdb_rpl or other rocksdb_* packages, use the
+following parameters:
+ --default-storage-engine=rocksdb
+ --skip-innodb
+ --default-tmp-storage-engine=MyISAM
+ --rocksdb
diff --git a/storage/rocksdb/atomic_stat.h b/storage/rocksdb/atomic_stat.h
new file mode 100644
index 00000000000..04e59bd9a8a
--- /dev/null
+++ b/storage/rocksdb/atomic_stat.h
@@ -0,0 +1,94 @@
+/* This is an atomic integer abstract data type, for high-performance
+ tracking of a single stat. It intentionally permits inconsistent
+ atomic operations and reads, for better performance. This means
+ that, though no data should ever be lost by this stat, reads of it
+ at any time may not include all changes up to any particular point.
+
+ So, values read from these may only be approximately correct.
+
+ If your use-case will fail under these conditions, do not use this.
+
+ Copyright (C) 2012 - 2014 Steaphan Greene <steaphan@gmail.com>
+
+ This program is free software; you can redistribute it and/or
+ modify it under the terms of the GNU General Public License
+ as published by the Free Software Foundation; either version 2
+ of the License, or (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the
+ Free Software Foundation, Inc.
+ 51 Franklin Street, Fifth Floor
+ Boston, MA 02110-1301, USA.
+*/
+
+#ifndef _atomic_stat_h_
+#define _atomic_stat_h_
+
+#include <atomic>
+
+template < typename TYPE >
+class atomic_stat {
+public:
+ // Initialize value to the default for the type
+ atomic_stat() : value_(TYPE()) {};
+
+ // This enforces a strict order, as all absolute sets should
+ void clear() {
+ value_.store(TYPE(), std::memory_order_seq_cst);
+ };
+
+ // Reads can get any valid value, it doesn't matter which, exactly
+ TYPE load() const {
+ return value_.load(std::memory_order_relaxed);
+ };
+
+ // This only supplies relative arithmetic operations
+ // These are all done atomically, and so can show up in any order
+ void inc(const TYPE &other) {
+ value_.fetch_add(other, std::memory_order_relaxed);
+ };
+
+ void dec(const TYPE &other) {
+ value_.fetch_sub(other, std::memory_order_relaxed);
+ };
+
+ void inc() {
+ value_.fetch_add(1, std::memory_order_relaxed);
+ };
+
+ void dec() {
+ value_.fetch_sub(1, std::memory_order_relaxed);
+ };
+
+ // This will make one attempt to set the value to the max of
+ // the current value, and the passed-in value. It can fail
+ // for any reason, and we only try it once.
+ void set_max_maybe(const TYPE &new_val) {
+ TYPE old_val = value_;
+ if (new_val > old_val) {
+ value_.compare_exchange_weak(old_val, new_val,
+ std::memory_order_relaxed,
+ std::memory_order_relaxed);
+ }
+ };
+
+ // This will make one attempt to assign the value to the passed-in
+ // value. It can fail for any reason, and we only try it once.
+ void set_maybe(const TYPE &new_val) {
+ TYPE old_val = value_;
+ value_.compare_exchange_weak(old_val, new_val,
+ std::memory_order_relaxed,
+ std::memory_order_relaxed);
+ };
+
+private:
+ std::atomic<TYPE> value_;
+};
+
+#endif // _atomic_stat_h_
diff --git a/storage/rocksdb/build_rocksdb.cmake b/storage/rocksdb/build_rocksdb.cmake
new file mode 100644
index 00000000000..ca61842fbb8
--- /dev/null
+++ b/storage/rocksdb/build_rocksdb.cmake
@@ -0,0 +1,495 @@
+
+if(POLICY CMP0042)
+ cmake_policy(SET CMP0042 NEW)
+endif()
+
+SET(ROCKSDB_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/rocksdb)
+
+INCLUDE_DIRECTORIES(
+ ${CMAKE_CURRENT_BINARY_DIR}
+ ${ROCKSDB_SOURCE_DIR}
+ ${ROCKSDB_SOURCE_DIR}/include
+ ${ROCKSDB_SOURCE_DIR}/third-party/gtest-1.7.0/fused-src
+)
+
+list(APPEND CMAKE_MODULE_PATH "${ROCKSDB_SOURCE_DIR}/cmake/modules/")
+
+if(WIN32)
+ # include(${ROCKSDB_SOURCE_DIR}/thirdparty.inc)
+else()
+ option(WITH_ROCKSDB_JEMALLOC "build RocksDB with JeMalloc" OFF)
+ if(WITH_ROCKSDB_JEMALLOC)
+ find_package(JeMalloc REQUIRED)
+ add_definitions(-DROCKSDB_JEMALLOC)
+ ADD_DEFINITIONS(-DROCKSDB_MALLOC_USABLE_SIZE)
+ include_directories(${JEMALLOC_INCLUDE_DIR})
+ endif()
+ if(CMAKE_SYSTEM_NAME STREQUAL "FreeBSD")
+ # FreeBSD has jemaloc as default malloc
+ add_definitions(-DROCKSDB_JEMALLOC)
+ ADD_DEFINITIONS(-DROCKSDB_MALLOC_USABLE_SIZE)
+ set(WITH_JEMALLOC ON)
+ endif()
+endif()
+
+
+# Optional compression libraries.
+
+foreach(compression_lib LZ4 BZip2 ZSTD snappy)
+ FIND_PACKAGE(${compression_lib})
+
+ SET(WITH_ROCKSDB_${compression_lib} AUTO CACHE STRING
+ "Build RocksDB with ${compression_lib} compression. Possible values are 'ON', 'OFF', 'AUTO' and default is 'AUTO'")
+
+ if(${WITH_ROCKSDB_${compression_lib}} STREQUAL "ON" AND NOT ${${compression_lib}_FOUND})
+ MESSAGE(FATAL_ERROR
+ "${compression_lib} library was not found, but WITH_ROCKSDB${compression_lib} option is ON.\
+ Either set WITH_ROCKSDB${compression_lib} to OFF, or make sure ${compression_lib} is installed")
+ endif()
+endforeach()
+
+if(LZ4_FOUND AND (NOT WITH_ROCKSDB_LZ4 STREQUAL "OFF"))
+ add_definitions(-DLZ4)
+ include_directories(${LZ4_INCLUDE_DIR})
+ list(APPEND THIRDPARTY_LIBS ${LZ4_LIBRARY})
+endif()
+
+if(BZIP2_FOUND AND (NOT WITH_ROCKSDB_BZip2 STREQUAL "OFF"))
+ add_definitions(-DBZIP2)
+ include_directories(${BZIP2_INCLUDE_DIR})
+ list(APPEND THIRDPARTY_LIBS ${BZIP2_LIBRARIES})
+endif()
+
+if(SNAPPY_FOUND AND (NOT WITH_ROCKSDB_snappy STREQUAL "OFF"))
+ add_definitions(-DSNAPPY)
+ include_directories(${snappy_INCLUDE_DIR})
+ list(APPEND THIRDPARTY_LIBS ${snappy_LIBRARIES})
+endif()
+
+include(CheckFunctionExists)
+if(ZSTD_FOUND AND (NOT WITH_ROCKSDB_zstd STREQUAL "OFF"))
+ SET(CMAKE_REQUIRED_LIBRARIES zstd)
+ CHECK_FUNCTION_EXISTS(ZDICT_trainFromBuffer ZSTD_VALID)
+ UNSET(CMAKE_REQUIRED_LIBRARIES)
+ if (WITH_ROCKSDB_ZSTD STREQUAL "ON" AND NOT ZSTD_VALID)
+ MESSAGE(FATAL_ERROR
+ "WITH_ROCKSDB_ZSTD is ON and ZSTD library was found, but the version needs to be >= 1.1.3")
+ endif()
+ if (ZSTD_VALID)
+ add_definitions(-DZSTD)
+ include_directories(${ZSTD_INCLUDE_DIR})
+ list(APPEND THIRDPARTY_LIBS ${ZSTD_LIBRARY})
+ endif()
+endif()
+
+add_definitions(-DZLIB)
+list(APPEND THIRDPARTY_LIBS ${ZLIB_LIBRARY})
+
+if(CMAKE_SYSTEM_NAME MATCHES "Cygwin")
+ add_definitions(-fno-builtin-memcmp -DCYGWIN)
+elseif(CMAKE_SYSTEM_NAME MATCHES "Darwin")
+ add_definitions(-DOS_MACOSX)
+elseif(CMAKE_SYSTEM_NAME MATCHES "Linux")
+ add_definitions(-DOS_LINUX)
+elseif(CMAKE_SYSTEM_NAME MATCHES "SunOS")
+ add_definitions(-DOS_SOLARIS)
+elseif(CMAKE_SYSTEM_NAME MATCHES "FreeBSD")
+ add_definitions(-DOS_FREEBSD)
+elseif(CMAKE_SYSTEM_NAME MATCHES "NetBSD")
+ add_definitions(-DOS_NETBSD)
+elseif(CMAKE_SYSTEM_NAME MATCHES "OpenBSD")
+ add_definitions(-DOS_OPENBSD)
+elseif(CMAKE_SYSTEM_NAME MATCHES "DragonFly")
+ add_definitions(-DOS_DRAGONFLYBSD)
+elseif(CMAKE_SYSTEM_NAME MATCHES "Android")
+ add_definitions(-DOS_ANDROID)
+elseif(CMAKE_SYSTEM_NAME MATCHES "Windows")
+ add_definitions(-DOS_WIN)
+endif()
+
+IF(MSVC)
+ add_definitions(/wd4244)
+ENDIF()
+if(NOT WIN32)
+ add_definitions(-DROCKSDB_PLATFORM_POSIX -DROCKSDB_LIB_IO_POSIX)
+endif()
+
+include(CheckCCompilerFlag)
+# ppc64 or ppc64le
+if(CMAKE_SYSTEM_PROCESSOR MATCHES "ppc64")
+ CHECK_C_COMPILER_FLAG("-maltivec" HAS_ALTIVEC)
+ if(HAS_ALTIVEC)
+ message(STATUS " HAS_ALTIVEC yes")
+ set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -maltivec")
+ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -maltivec")
+ endif(HAS_ALTIVEC)
+ if(NOT CMAKE_C_FLAGS MATCHES "m(cpu|tune)")
+ set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mcpu=power8")
+ endif()
+ if(NOT CMAKE_CXX_FLAGS MATCHES "m(cpu|tune)")
+ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mcpu=power8")
+ endif()
+ ADD_DEFINITIONS(-DHAVE_POWER8 -DHAS_ALTIVEC)
+endif(CMAKE_SYSTEM_PROCESSOR MATCHES "ppc64")
+
+option(WITH_FALLOCATE "build with fallocate" ON)
+
+if(WITH_FALLOCATE AND UNIX)
+ include(CheckCSourceCompiles)
+ CHECK_C_SOURCE_COMPILES("
+#include <fcntl.h>
+#include <linux/falloc.h>
+int main() {
+ int fd = open(\"/dev/null\", 0);
+ fallocate(fd, FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE, 0, 1024);
+}
+" HAVE_FALLOCATE)
+ if(HAVE_FALLOCATE)
+ add_definitions(-DROCKSDB_FALLOCATE_PRESENT)
+ endif()
+endif()
+
+CHECK_FUNCTION_EXISTS(malloc_usable_size HAVE_MALLOC_USABLE_SIZE)
+if(HAVE_MALLOC_USABLE_SIZE)
+ add_definitions(-DROCKSDB_MALLOC_USABLE_SIZE)
+endif()
+
+include_directories(${ROCKSDB_SOURCE_DIR})
+include_directories(${ROCKSDB_SOURCE_DIR}/include)
+include_directories(SYSTEM ${ROCKSDB_SOURCE_DIR}/third-party/gtest-1.7.0/fused-src)
+
+find_package(Threads REQUIRED)
+if(WIN32)
+ set(SYSTEM_LIBS ${SYSTEM_LIBS} Shlwapi.lib Rpcrt4.lib)
+else()
+ set(SYSTEM_LIBS ${CMAKE_THREAD_LIBS_INIT} ${LIBRT} ${LIBDL})
+endif()
+
+set(ROCKSDB_LIBS rocksdblib})
+set(LIBS ${ROCKSDB_LIBS} ${THIRDPARTY_LIBS} ${SYSTEM_LIBS})
+
+#add_subdirectory(${ROCKSDB_SOURCE_DIR}/tools)
+
+# Main library source code
+# Note : RocksDB has a lot of unittests. We should not include these files
+# in the build, because 1. they are not needed and 2. gtest causes warnings
+# in windows build, which are treated as errors and cause the build to fail.
+#
+# Unit tests themselves:
+# - *_test.cc
+# - *_bench.cc
+#
+# - table/mock_table.cc
+# - utilities/cassandra/cassandra_compaction_filter.cc
+# - utilities/cassandra/format.cc
+# - utilities/cassandra/merge_operator.cc
+# - utilities/cassandra/test_utils.cc
+#
+set(ROCKSDB_SOURCES
+ cache/clock_cache.cc
+ cache/lru_cache.cc
+ cache/sharded_cache.cc
+ db/arena_wrapped_db_iter.cc
+ db/builder.cc
+ db/c.cc
+ db/column_family.cc
+ db/compacted_db_impl.cc
+ db/compaction/compaction.cc
+ db/compaction/compaction_iterator.cc
+ db/compaction/compaction_picker.cc
+ db/compaction/compaction_job.cc
+ db/compaction/compaction_picker_fifo.cc
+ db/compaction/compaction_picker_level.cc
+ db/compaction/compaction_picker_universal.cc
+ db/convenience.cc
+ db/db_filesnapshot.cc
+ db/db_impl/db_impl.cc
+ db/db_impl/db_impl_write.cc
+ db/db_impl/db_impl_compaction_flush.cc
+ db/db_impl/db_impl_files.cc
+ db/db_impl/db_impl_open.cc
+ db/db_impl/db_impl_debug.cc
+ db/db_impl/db_impl_experimental.cc
+ db/db_impl/db_impl_readonly.cc
+ db/db_impl/db_impl_secondary.cc
+ db/db_info_dumper.cc
+ db/db_iter.cc
+ db/dbformat.cc
+ db/error_handler.cc
+ db/event_helpers.cc
+ db/experimental.cc
+ db/external_sst_file_ingestion_job.cc
+ db/file_indexer.cc
+ db/flush_job.cc
+ db/flush_scheduler.cc
+ db/forward_iterator.cc
+ db/import_column_family_job.cc
+ db/internal_stats.cc
+ db/logs_with_prep_tracker.cc
+ db/log_reader.cc
+ db/log_writer.cc
+ db/malloc_stats.cc
+ db/memtable.cc
+ db/memtable_list.cc
+ db/merge_helper.cc
+ db/merge_operator.cc
+ db/range_del_aggregator.cc
+ db/range_tombstone_fragmenter.cc
+ db/repair.cc
+ db/snapshot_impl.cc
+ db/table_cache.cc
+ db/table_properties_collector.cc
+ db/transaction_log_impl.cc
+ db/trim_history_scheduler.cc
+ db/version_builder.cc
+ db/version_edit.cc
+ db/version_set.cc
+ db/wal_manager.cc
+ db/write_batch.cc
+ db/write_batch_base.cc
+ db/write_controller.cc
+ db/write_thread.cc
+ env/env.cc
+ env/env_chroot.cc
+ env/env_encryption.cc
+ env/env_hdfs.cc
+ env/file_system.cc
+ env/mock_env.cc
+ file/delete_scheduler.cc
+ file/file_prefetch_buffer.cc
+ file/file_util.cc
+ file/filename.cc
+ file/random_access_file_reader.cc
+ file/read_write_util.cc
+ file/readahead_raf.cc
+ file/sequence_file_reader.cc
+ file/sst_file_manager_impl.cc
+ file/writable_file_writer.cc
+ logging/auto_roll_logger.cc
+ logging/event_logger.cc
+ logging/log_buffer.cc
+ memory/arena.cc
+ memory/concurrent_arena.cc
+ memory/jemalloc_nodump_allocator.cc
+ memtable/alloc_tracker.cc
+ memtable/hash_linklist_rep.cc
+ memtable/hash_skiplist_rep.cc
+ memtable/skiplistrep.cc
+ memtable/vectorrep.cc
+ memtable/write_buffer_manager.cc
+ monitoring/histogram.cc
+ monitoring/histogram_windowing.cc
+ monitoring/in_memory_stats_history.cc
+ monitoring/instrumented_mutex.cc
+ monitoring/iostats_context.cc
+ monitoring/perf_context.cc
+ monitoring/perf_level.cc
+ monitoring/persistent_stats_history.cc
+ monitoring/statistics.cc
+ monitoring/thread_status_impl.cc
+ monitoring/thread_status_updater.cc
+ monitoring/thread_status_util.cc
+ monitoring/thread_status_util_debug.cc
+ options/cf_options.cc
+ options/db_options.cc
+ options/options.cc
+ options/options_helper.cc
+ options/options_parser.cc
+ options/options_sanity_check.cc
+ port/stack_trace.cc
+ table/adaptive/adaptive_table_factory.cc
+ table/block_based/block.cc
+ table/block_based/block_based_filter_block.cc
+ table/block_based/block_based_table_builder.cc
+ table/block_based/block_based_table_factory.cc
+ table/block_based/block_based_table_reader.cc
+ table/block_based/block_builder.cc
+ table/block_based/block_prefix_index.cc
+ table/block_based/data_block_hash_index.cc
+ table/block_based/data_block_footer.cc
+ table/block_based/filter_block_reader_common.cc
+ table/block_based/filter_policy.cc
+ table/block_based/flush_block_policy.cc
+ table/block_based/full_filter_block.cc
+ table/block_based/index_builder.cc
+ table/block_based/parsed_full_filter_block.cc
+ table/block_based/partitioned_filter_block.cc
+ table/block_based/uncompression_dict_reader.cc
+ table/block_fetcher.cc
+ table/cuckoo/cuckoo_table_builder.cc
+ table/cuckoo/cuckoo_table_factory.cc
+ table/cuckoo/cuckoo_table_reader.cc
+ table/format.cc
+ table/get_context.cc
+ table/iterator.cc
+ table/merging_iterator.cc
+ table/meta_blocks.cc
+ table/persistent_cache_helper.cc
+ table/plain/plain_table_bloom.cc
+ table/plain/plain_table_builder.cc
+ table/plain/plain_table_factory.cc
+ table/plain/plain_table_index.cc
+ table/plain/plain_table_key_coding.cc
+ table/plain/plain_table_reader.cc
+ table/sst_file_reader.cc
+ table/sst_file_writer.cc
+ table/table_properties.cc
+ table/two_level_iterator.cc
+ test_util/sync_point.cc
+ test_util/sync_point_impl.cc
+ test_util/testutil.cc
+ test_util/transaction_test_util.cc
+ tools/block_cache_analyzer/block_cache_trace_analyzer.cc
+ tools/dump/db_dump_tool.cc
+ tools/ldb_cmd.cc
+ tools/ldb_tool.cc
+ tools/sst_dump_tool.cc
+ tools/trace_analyzer_tool.cc
+ trace_replay/trace_replay.cc
+ trace_replay/block_cache_tracer.cc
+ util/coding.cc
+ util/compaction_job_stats_impl.cc
+ util/comparator.cc
+ util/compression_context_cache.cc
+ util/concurrent_task_limiter_impl.cc
+ util/crc32c.cc
+ util/dynamic_bloom.cc
+ util/hash.cc
+ util/murmurhash.cc
+ util/random.cc
+ util/rate_limiter.cc
+ util/slice.cc
+ util/file_checksum_helper.cc
+ util/status.cc
+ util/string_util.cc
+ util/thread_local.cc
+ util/threadpool_imp.cc
+ util/xxhash.cc
+ utilities/backupable/backupable_db.cc
+ utilities/blob_db/blob_compaction_filter.cc
+ utilities/blob_db/blob_db.cc
+ utilities/blob_db/blob_db_impl.cc
+ utilities/blob_db/blob_db_impl_filesnapshot.cc
+ utilities/blob_db/blob_dump_tool.cc
+ utilities/blob_db/blob_file.cc
+ utilities/blob_db/blob_log_reader.cc
+ utilities/blob_db/blob_log_writer.cc
+ utilities/blob_db/blob_log_format.cc
+ utilities/checkpoint/checkpoint_impl.cc
+ utilities/compaction_filters/remove_emptyvalue_compactionfilter.cc
+ utilities/debug.cc
+ utilities/env_mirror.cc
+ utilities/env_timed.cc
+ utilities/leveldb_options/leveldb_options.cc
+ utilities/memory/memory_util.cc
+ utilities/merge_operators/bytesxor.cc
+ utilities/merge_operators/max.cc
+ utilities/merge_operators/put.cc
+ utilities/merge_operators/sortlist.cc
+ utilities/merge_operators/string_append/stringappend.cc
+ utilities/merge_operators/string_append/stringappend2.cc
+ utilities/merge_operators/uint64add.cc
+ utilities/object_registry.cc
+ utilities/option_change_migration/option_change_migration.cc
+ utilities/options/options_util.cc
+ utilities/persistent_cache/block_cache_tier.cc
+ utilities/persistent_cache/block_cache_tier_file.cc
+ utilities/persistent_cache/block_cache_tier_metadata.cc
+ utilities/persistent_cache/persistent_cache_tier.cc
+ utilities/persistent_cache/volatile_tier_impl.cc
+ utilities/simulator_cache/cache_simulator.cc
+ utilities/simulator_cache/sim_cache.cc
+ utilities/table_properties_collectors/compact_on_deletion_collector.cc
+ utilities/trace/file_trace_reader_writer.cc
+ utilities/transactions/optimistic_transaction_db_impl.cc
+ utilities/transactions/optimistic_transaction.cc
+ utilities/transactions/pessimistic_transaction.cc
+ utilities/transactions/pessimistic_transaction_db.cc
+ utilities/transactions/snapshot_checker.cc
+ utilities/transactions/transaction_base.cc
+ utilities/transactions/transaction_db_mutex_impl.cc
+ utilities/transactions/transaction_lock_mgr.cc
+ utilities/transactions/transaction_util.cc
+ utilities/transactions/write_prepared_txn.cc
+ utilities/transactions/write_prepared_txn_db.cc
+ utilities/transactions/write_unprepared_txn.cc
+ utilities/transactions/write_unprepared_txn_db.cc
+ utilities/ttl/db_ttl_impl.cc
+ utilities/write_batch_with_index/write_batch_with_index.cc
+ utilities/write_batch_with_index/write_batch_with_index_internal.cc
+)
+
+
+if(WIN32)
+ list(APPEND ROCKSDB_SOURCES
+ port/win/io_win.cc
+ port/win/env_win.cc
+ port/win/env_default.cc
+ port/win/port_win.cc
+ port/win/win_logger.cc
+ port/win/win_thread.cc
+ port/win/xpress_win.cc)
+else()
+ list(APPEND ROCKSDB_SOURCES
+ port/port_posix.cc
+ env/env_posix.cc
+ env/io_posix.cc
+ env/fs_posix.cc)
+ # ppc64 or ppc64le
+ if(CMAKE_SYSTEM_PROCESSOR MATCHES "ppc64")
+ enable_language(ASM)
+ list(APPEND ROCKSDB_SOURCES
+ util/crc32c_ppc.c
+ util/crc32c_ppc_asm.S)
+ endif(CMAKE_SYSTEM_PROCESSOR MATCHES "ppc64")
+endif()
+SET(SOURCES)
+FOREACH(s ${ROCKSDB_SOURCES})
+ list(APPEND SOURCES ${ROCKSDB_SOURCE_DIR}/${s})
+ENDFOREACH()
+
+if(MSVC)
+ add_definitions(-DHAVE_SSE42 -DHAVE_PCLMUL)
+ # Workaround broken compilation with -DWIN32_LEAN_AND_MEAN
+ # (https://github.com/facebook/rocksdb/issues/4344)
+ set_source_files_properties(${ROCKSDB_SOURCE_DIR}/port/win/env_win.cc
+ PROPERTIES COMPILE_FLAGS "/FI\"windows.h\" /FI\"winioctl.h\"")
+
+ # Workaround Win8.1 SDK bug, that breaks /permissive-
+ string(REPLACE "/permissive-" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
+else()
+ set(CMAKE_REQUIRED_FLAGS "-msse4.2 -mpclmul ${CXX11_FLAGS}")
+
+ CHECK_CXX_SOURCE_COMPILES("
+#include <cstdint>
+#include <nmmintrin.h>
+#include <wmmintrin.h>
+int main() {
+ volatile uint32_t x = _mm_crc32_u32(0, 0);
+ const auto a = _mm_set_epi64x(0, 0);
+ const auto b = _mm_set_epi64x(0, 0);
+ const auto c = _mm_clmulepi64_si128(a, b, 0x00);
+ auto d = _mm_cvtsi128_si64(c);
+}
+" HAVE_SSE42)
+ if(HAVE_SSE42)
+ set_source_files_properties(${ROCKSDB_SOURCE_DIR}/util/crc32c.cc
+ PROPERTIES COMPILE_FLAGS "-DHAVE_SSE42 -DHAVE_PCLMUL -msse4.2 -mpclmul")
+ endif()
+ unset(CMAKE_REQUIRED_FLAGS)
+endif()
+
+IF(CMAKE_VERSION VERSION_GREATER "2.8.10")
+ STRING(TIMESTAMP GIT_DATE_TIME "%Y-%m-%d %H:%M:%S")
+ENDIF()
+
+CONFIGURE_FILE(${ROCKSDB_SOURCE_DIR}/util/build_version.cc.in build_version.cc @ONLY)
+INCLUDE_DIRECTORIES(${ROCKSDB_SOURCE_DIR}/util)
+list(APPEND SOURCES ${CMAKE_CURRENT_BINARY_DIR}/build_version.cc)
+
+ADD_CONVENIENCE_LIBRARY(rocksdblib ${SOURCES})
+target_link_libraries(rocksdblib ${THIRDPARTY_LIBS} ${SYSTEM_LIBS})
+IF(CMAKE_CXX_COMPILER_ID MATCHES "GNU" OR CMAKE_CXX_COMPILER_ID MATCHES "Clang")
+ set_target_properties(rocksdblib PROPERTIES COMPILE_FLAGS "-fPIC -fno-builtin-memcmp -frtti -Wno-error")
+endif()
+
diff --git a/storage/rocksdb/event_listener.cc b/storage/rocksdb/event_listener.cc
new file mode 100644
index 00000000000..7c4eed8fd7e
--- /dev/null
+++ b/storage/rocksdb/event_listener.cc
@@ -0,0 +1,97 @@
+/*
+ Copyright (c) 2015, Facebook, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111-1301 USA */
+
+#include <my_config.h>
+
+/* The C++ file's header */
+#include "./event_listener.h"
+
+/* C++ standard header files */
+#include <string>
+#include <vector>
+
+/* MySQL includes */
+#include <my_global.h>
+#include <mysql/plugin.h>
+
+/* MyRocks includes */
+#include "./ha_rocksdb.h"
+#include "./properties_collector.h"
+#include "./rdb_datadic.h"
+
+namespace myrocks {
+
+static std::vector<Rdb_index_stats> extract_index_stats(
+ const std::vector<std::string> &files,
+ const rocksdb::TablePropertiesCollection &props) {
+ std::vector<Rdb_index_stats> ret;
+ for (auto fn : files) {
+ const auto it = props.find(fn);
+ DBUG_ASSERT(it != props.end());
+ std::vector<Rdb_index_stats> stats;
+ Rdb_tbl_prop_coll::read_stats_from_tbl_props(it->second, &stats);
+ ret.insert(ret.end(), stats.begin(), stats.end());
+ }
+ return ret;
+}
+
+void Rdb_event_listener::update_index_stats(
+ const rocksdb::TableProperties &props) {
+ DBUG_ASSERT(m_ddl_manager != nullptr);
+ const auto tbl_props =
+ std::make_shared<const rocksdb::TableProperties>(props);
+
+ std::vector<Rdb_index_stats> stats;
+ Rdb_tbl_prop_coll::read_stats_from_tbl_props(tbl_props, &stats);
+
+ m_ddl_manager->adjust_stats(stats);
+}
+
+void Rdb_event_listener::OnCompactionCompleted(
+ rocksdb::DB *db, const rocksdb::CompactionJobInfo &ci) {
+ DBUG_ASSERT(db != nullptr);
+ DBUG_ASSERT(m_ddl_manager != nullptr);
+
+ if (ci.status.ok()) {
+ m_ddl_manager->adjust_stats(
+ extract_index_stats(ci.output_files, ci.table_properties),
+ extract_index_stats(ci.input_files, ci.table_properties));
+ }
+}
+
+void Rdb_event_listener::OnFlushCompleted(
+ rocksdb::DB *db, const rocksdb::FlushJobInfo &flush_job_info) {
+ DBUG_ASSERT(db != nullptr);
+ update_index_stats(flush_job_info.table_properties);
+}
+
+void Rdb_event_listener::OnExternalFileIngested(
+ rocksdb::DB *db, const rocksdb::ExternalFileIngestionInfo &info) {
+ DBUG_ASSERT(db != nullptr);
+ update_index_stats(info.table_properties);
+}
+
+void Rdb_event_listener::OnBackgroundError(
+ rocksdb::BackgroundErrorReason reason, rocksdb::Status *status) {
+ rdb_log_status_error(*status, "Error detected in background");
+ // NO_LINT_DEBUG
+ sql_print_error("RocksDB: BackgroundErrorReason: %d", (int)reason);
+ if (status->IsCorruption()) {
+ rdb_persist_corruption_marker();
+ abort();
+ }
+}
+} // namespace myrocks
diff --git a/storage/rocksdb/event_listener.h b/storage/rocksdb/event_listener.h
new file mode 100644
index 00000000000..737973eb9da
--- /dev/null
+++ b/storage/rocksdb/event_listener.h
@@ -0,0 +1,49 @@
+/*
+ Copyright (c) 2015, Facebook, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111-1301 USA */
+#pragma once
+
+#include "rocksdb/listener.h"
+
+namespace myrocks {
+
+class Rdb_ddl_manager;
+
+class Rdb_event_listener : public rocksdb::EventListener {
+ public:
+ Rdb_event_listener(const Rdb_event_listener &) = delete;
+ Rdb_event_listener &operator=(const Rdb_event_listener &) = delete;
+
+ explicit Rdb_event_listener(Rdb_ddl_manager *const ddl_manager)
+ : m_ddl_manager(ddl_manager) {}
+
+ void OnCompactionCompleted(rocksdb::DB *db,
+ const rocksdb::CompactionJobInfo &ci) override;
+ void OnFlushCompleted(rocksdb::DB *db,
+ const rocksdb::FlushJobInfo &flush_job_info) override;
+ void OnExternalFileIngested(
+ rocksdb::DB *db,
+ const rocksdb::ExternalFileIngestionInfo &ingestion_info) override;
+
+ void OnBackgroundError(rocksdb::BackgroundErrorReason reason,
+ rocksdb::Status *status) override;
+
+ private:
+ Rdb_ddl_manager *m_ddl_manager;
+
+ void update_index_stats(const rocksdb::TableProperties &props);
+};
+
+} // namespace myrocks
diff --git a/storage/rocksdb/get_rocksdb_files.sh b/storage/rocksdb/get_rocksdb_files.sh
new file mode 100755
index 00000000000..bd5128a8609
--- /dev/null
+++ b/storage/rocksdb/get_rocksdb_files.sh
@@ -0,0 +1,27 @@
+#!/bin/bash
+MKFILE=`mktemp`
+# create and run a simple makefile
+# include rocksdb make file relative to the path of this script
+echo "include ./storage/rocksdb/rocksdb/src.mk
+all:
+ @echo \$(LIB_SOURCES)" > $MKFILE
+for f in `make --makefile $MKFILE`
+do
+ echo ./rocksdb/$f
+done
+rm $MKFILE
+
+# create build_version.cc file. Only create one if it doesn't exists or if it is different
+# this is so that we don't rebuild mysqld every time
+bv=storage/rocksdb/rocksdb/util/build_version.cc
+date=$(date +%F)
+git_sha=$(pushd storage/rocksdb/rocksdb >/dev/null && git rev-parse HEAD 2>/dev/null && popd >/dev/null)
+if [ ! -f $bv ] || [ -z $git_sha ] || [ ! `grep $git_sha $bv` ]
+then
+echo "#include \"build_version.h\"
+const char* rocksdb_build_git_sha =
+\"rocksdb_build_git_sha:$git_sha\";
+const char* rocksdb_build_git_date =
+\"rocksdb_build_git_date:$date\";
+const char* rocksdb_build_compile_date = __DATE__;" > $bv
+fi
diff --git a/storage/rocksdb/ha_rocksdb.cc b/storage/rocksdb/ha_rocksdb.cc
new file mode 100644
index 00000000000..bf9183d7dea
--- /dev/null
+++ b/storage/rocksdb/ha_rocksdb.cc
@@ -0,0 +1,14580 @@
+/*
+ Copyright (c) 2012, Monty Program Ab
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111-1301 USA */
+
+#ifdef USE_PRAGMA_IMPLEMENTATION
+#pragma implementation // gcc: Class implementation
+#endif
+
+#define MYSQL_SERVER 1
+
+/* For use of 'PRIu64': */
+#define __STDC_FORMAT_MACROS
+
+#include <my_config.h>
+
+#include <inttypes.h>
+
+/* The C++ file's header */
+#include "./ha_rocksdb.h"
+
+/* C++ standard header files */
+#include <inttypes.h>
+#include <algorithm>
+#include <limits>
+#include <map>
+#include <queue>
+#include <set>
+#include <string>
+#include <vector>
+
+/* MySQL includes */
+#include <mysql/psi/mysql_table.h>
+// #include <thread_pool_priv.h>
+#include <mysys_err.h>
+#include "./debug_sync.h"
+#include "./my_bit.h"
+#include "./my_stacktrace.h"
+#include "./my_sys.h"
+#include "./sql_audit.h"
+#include "./sql_table.h"
+#include "./sql_hset.h"
+#ifdef MARIAROCKS_NOT_YET
+#endif
+
+/* RocksDB includes */
+#include "monitoring/histogram.h"
+#include "rocksdb/compaction_filter.h"
+#include "rocksdb/env.h"
+#include "rocksdb/memory_allocator.h"
+#include "rocksdb/persistent_cache.h"
+#include "rocksdb/rate_limiter.h"
+#include "rocksdb/slice_transform.h"
+#include "rocksdb/thread_status.h"
+#include "rocksdb/utilities/checkpoint.h"
+#include "rocksdb/utilities/convenience.h"
+#include "rocksdb/utilities/memory_util.h"
+#include "rocksdb/utilities/sim_cache.h"
+#include "rocksdb/utilities/write_batch_with_index.h"
+#include "util/stop_watch.h"
+#include "./rdb_source_revision.h"
+
+// MariaRocks: this is needed to access RocksDB debug syncpoints:
+#include "test_util/sync_point.h"
+
+/* MyRocks includes */
+#include "./event_listener.h"
+#include "./ha_rocksdb_proto.h"
+#include "./logger.h"
+#include "./nosql_access.h"
+#include "./rdb_cf_manager.h"
+#include "./rdb_cf_options.h"
+#include "./rdb_converter.h"
+#include "./rdb_datadic.h"
+#include "./rdb_i_s.h"
+#include "./rdb_index_merge.h"
+#include "./rdb_mutex_wrapper.h"
+#include "./rdb_psi.h"
+#include "./rdb_threads.h"
+#include "./rdb_mariadb_server_port.h"
+
+// Internal MySQL APIs not exposed in any header.
+extern "C" {
+/**
+ Mark transaction to rollback and mark error as fatal to a sub-statement.
+ @param thd Thread handle
+ @param all TRUE <=> rollback main transaction.
+*/
+void thd_mark_transaction_to_rollback(MYSQL_THD thd, bool all);
+
+/**
+ * Get the user thread's binary logging format
+ * @param thd user thread
+ * @return Value to be used as index into the binlog_format_names array
+ */
+int thd_binlog_format(const MYSQL_THD thd);
+
+/**
+ * Check if binary logging is filtered for thread's current db.
+ * @param thd Thread handle
+ * @retval 1 the query is not filtered, 0 otherwise.
+ */
+bool thd_binlog_filter_ok(const MYSQL_THD thd);
+}
+
+MYSQL_PLUGIN_IMPORT bool my_disable_leak_check;
+extern my_bool opt_core_file;
+
+// Needed in rocksdb_init_func
+void ignore_db_dirs_append(const char *dirname_arg);
+
+
+namespace myrocks {
+
+static st_global_stats global_stats;
+static st_export_stats export_stats;
+static st_memory_stats memory_stats;
+static st_io_stall_stats io_stall_stats;
+
+const std::string DEFAULT_CF_NAME("default");
+const std::string DEFAULT_SYSTEM_CF_NAME("__system__");
+const std::string PER_INDEX_CF_NAME("$per_index_cf");
+
+static std::vector<GL_INDEX_ID> rdb_indexes_to_recalc;
+
+#ifdef MARIADB_NOT_YET
+class Rdb_explicit_snapshot : public explicit_snapshot {
+ public:
+ static std::shared_ptr<Rdb_explicit_snapshot> create(
+ snapshot_info_st *ss_info, rocksdb::DB *db,
+ const rocksdb::Snapshot *snapshot) {
+ std::lock_guard<std::mutex> lock(explicit_snapshot_mutex);
+ auto s = std::unique_ptr<rocksdb::ManagedSnapshot>(
+ new rocksdb::ManagedSnapshot(db, snapshot));
+ if (!s) {
+ return nullptr;
+ }
+ ss_info->snapshot_id = ++explicit_snapshot_counter;
+ auto ret = std::make_shared<Rdb_explicit_snapshot>(*ss_info, std::move(s));
+ if (!ret) {
+ return nullptr;
+ }
+ explicit_snapshots[ss_info->snapshot_id] = ret;
+ return ret;
+ }
+
+ static std::string dump_snapshots() {
+ std::string str;
+ std::lock_guard<std::mutex> lock(explicit_snapshot_mutex);
+ for (const auto &elem : explicit_snapshots) {
+ const auto &ss = elem.second.lock();
+ DBUG_ASSERT(ss != nullptr);
+ const auto &info = ss->ss_info;
+ str += "\nSnapshot ID: " + std::to_string(info.snapshot_id) +
+ "\nBinlog File: " + info.binlog_file +
+ "\nBinlog Pos: " + std::to_string(info.binlog_pos) +
+ "\nGtid Executed: " + info.gtid_executed + "\n";
+ }
+
+ return str;
+ }
+
+ static std::shared_ptr<Rdb_explicit_snapshot> get(
+ const ulonglong snapshot_id) {
+ std::lock_guard<std::mutex> lock(explicit_snapshot_mutex);
+ auto elem = explicit_snapshots.find(snapshot_id);
+ if (elem == explicit_snapshots.end()) {
+ return nullptr;
+ }
+ return elem->second.lock();
+ }
+
+ rocksdb::ManagedSnapshot *get_snapshot() { return snapshot.get(); }
+
+ Rdb_explicit_snapshot(snapshot_info_st ss_info,
+ std::unique_ptr<rocksdb::ManagedSnapshot> &&snapshot)
+ : explicit_snapshot(ss_info), snapshot(std::move(snapshot)) {}
+
+ virtual ~Rdb_explicit_snapshot() {
+ std::lock_guard<std::mutex> lock(explicit_snapshot_mutex);
+ explicit_snapshots.erase(ss_info.snapshot_id);
+ }
+
+ private:
+ std::unique_ptr<rocksdb::ManagedSnapshot> snapshot;
+
+ static std::mutex explicit_snapshot_mutex;
+ static ulonglong explicit_snapshot_counter;
+ static std::unordered_map<ulonglong, std::weak_ptr<Rdb_explicit_snapshot>>
+ explicit_snapshots;
+};
+
+std::mutex Rdb_explicit_snapshot::explicit_snapshot_mutex;
+ulonglong Rdb_explicit_snapshot::explicit_snapshot_counter = 0;
+std::unordered_map<ulonglong, std::weak_ptr<Rdb_explicit_snapshot>>
+ Rdb_explicit_snapshot::explicit_snapshots;
+#endif
+
+/**
+ Updates row counters based on the table type and operation type.
+*/
+void ha_rocksdb::update_row_stats(const operation_type &type) {
+ DBUG_ASSERT(type < ROWS_MAX);
+ // Find if we are modifying system databases.
+ if (table->s && m_tbl_def->m_is_mysql_system_table) {
+ global_stats.system_rows[type].inc();
+ } else {
+ global_stats.rows[type].inc();
+ }
+}
+
+void dbug_dump_database(rocksdb::DB *db);
+static handler *rocksdb_create_handler(my_core::handlerton *hton,
+ my_core::TABLE_SHARE *table_arg,
+ my_core::MEM_ROOT *mem_root);
+
+static rocksdb::CompactRangeOptions getCompactRangeOptions(
+ int concurrency = 0) {
+ rocksdb::CompactRangeOptions compact_range_options;
+ compact_range_options.bottommost_level_compaction =
+ rocksdb::BottommostLevelCompaction::kForce;
+ compact_range_options.exclusive_manual_compaction = false;
+ if (concurrency > 0) {
+ compact_range_options.max_subcompactions = concurrency;
+ }
+ return compact_range_options;
+}
+
+///////////////////////////////////////////////////////////
+// Parameters and settings
+///////////////////////////////////////////////////////////
+static char *rocksdb_default_cf_options = nullptr;
+static char *rocksdb_override_cf_options = nullptr;
+static char *rocksdb_update_cf_options = nullptr;
+
+///////////////////////////////////////////////////////////
+// Globals
+///////////////////////////////////////////////////////////
+handlerton *rocksdb_hton;
+
+rocksdb::TransactionDB *rdb = nullptr;
+rocksdb::HistogramImpl *commit_latency_stats = nullptr;
+
+static std::shared_ptr<rocksdb::Statistics> rocksdb_stats;
+static std::unique_ptr<rocksdb::Env> flashcache_aware_env;
+static std::shared_ptr<Rdb_tbl_prop_coll_factory> properties_collector_factory;
+
+Rdb_dict_manager dict_manager;
+Rdb_cf_manager cf_manager;
+Rdb_ddl_manager ddl_manager;
+Rdb_binlog_manager binlog_manager;
+
+#if !defined(_WIN32) && !defined(__APPLE__)
+Rdb_io_watchdog *io_watchdog = nullptr;
+#endif
+/**
+ MyRocks background thread control
+ N.B. This is besides RocksDB's own background threads
+ (@see rocksdb::CancelAllBackgroundWork())
+*/
+
+static Rdb_background_thread rdb_bg_thread;
+
+static Rdb_manual_compaction_thread rdb_mc_thread;
+
+// List of table names (using regex) that are exceptions to the strict
+// collation check requirement.
+Regex_list_handler *rdb_collation_exceptions;
+
+static const char **rdb_get_error_messages(int nr);
+
+static void rocksdb_flush_all_memtables() {
+ const Rdb_cf_manager &cf_manager = rdb_get_cf_manager();
+ for (const auto &cf_handle : cf_manager.get_all_cf()) {
+ rdb->Flush(rocksdb::FlushOptions(), cf_handle);
+ }
+}
+
+static void rocksdb_delete_column_family_stub(
+ THD *const /* thd */, struct st_mysql_sys_var *const /* var */,
+ void *const /* var_ptr */, const void *const /* save */) {}
+
+static int rocksdb_delete_column_family(
+ THD *const /* thd */, struct st_mysql_sys_var *const /* var */,
+ void *const /* var_ptr */, struct st_mysql_value *const value) {
+ // Return failure for now until the race condition between creating
+ // CF and deleting CF is resolved
+ return HA_EXIT_FAILURE;
+
+ char buff[STRING_BUFFER_USUAL_SIZE];
+ int len = sizeof(buff);
+
+ DBUG_ASSERT(value != nullptr);
+
+ if (const char *const cf = value->val_str(value, buff, &len)) {
+ auto &cf_manager = rdb_get_cf_manager();
+ auto ret = cf_manager.drop_cf(cf);
+ if (ret == HA_EXIT_SUCCESS) {
+ // NO_LINT_DEBUG
+ sql_print_information("RocksDB: Dropped column family: %s\n", cf);
+ } else {
+ // NO_LINT_DEBUG
+ sql_print_error("RocksDB: Failed to drop column family: %s, error: %d\n",
+ cf, ret);
+ }
+
+ return ret;
+ }
+
+ return HA_EXIT_SUCCESS;
+}
+
+///////////////////////////////////////////////////////////
+// Hash map: table name => open table handler
+///////////////////////////////////////////////////////////
+
+namespace // anonymous namespace = not visible outside this source file
+{
+
+typedef Hash_set<Rdb_table_handler> Rdb_table_set;
+
+class Rdb_open_tables_map {
+ private:
+ /* Hash table used to track the handlers of open tables */
+ std::unordered_map<std::string, Rdb_table_handler *> m_table_map;
+
+ /* The mutex used to protect the hash table */
+ mutable mysql_mutex_t m_mutex;
+
+ public:
+ void init() {
+ m_table_map.clear();
+ mysql_mutex_init(rdb_psi_open_tbls_mutex_key, &m_mutex, MY_MUTEX_INIT_FAST);
+ }
+
+ void free() {
+ m_table_map.clear();
+ mysql_mutex_destroy(&m_mutex);
+ }
+ size_t count() { return m_table_map.size(); }
+
+ Rdb_table_handler *get_table_handler(const char *const table_name);
+ void release_table_handler(Rdb_table_handler *const table_handler);
+
+ std::vector<std::string> get_table_names(void) const;
+};
+
+} // anonymous namespace
+
+static Rdb_open_tables_map rdb_open_tables;
+
+static std::string rdb_normalize_dir(std::string dir) {
+ while (dir.size() > 0 && dir.back() == '/') {
+ dir.resize(dir.size() - 1);
+ }
+ return dir;
+}
+
+static int rocksdb_create_checkpoint(
+ THD *const thd MY_ATTRIBUTE((__unused__)),
+ struct st_mysql_sys_var *const var MY_ATTRIBUTE((__unused__)),
+ void *const save MY_ATTRIBUTE((__unused__)),
+ struct st_mysql_value *const value) {
+ char buf[FN_REFLEN];
+ int len = sizeof(buf);
+ const char *const checkpoint_dir_raw = value->val_str(value, buf, &len);
+ if (checkpoint_dir_raw) {
+ if (rdb != nullptr) {
+ std::string checkpoint_dir = rdb_normalize_dir(checkpoint_dir_raw);
+ // NO_LINT_DEBUG
+ sql_print_information("RocksDB: creating checkpoint in directory : %s\n",
+ checkpoint_dir.c_str());
+ rocksdb::Checkpoint *checkpoint;
+ auto status = rocksdb::Checkpoint::Create(rdb, &checkpoint);
+ // We can only return HA_EXIT_FAILURE/HA_EXIT_SUCCESS here which is why
+ // the return code is ignored, but by calling into rdb_error_to_mysql,
+ // it will call my_error for us, which will propogate up to the client.
+ int rc __attribute__((__unused__));
+ if (status.ok()) {
+ status = checkpoint->CreateCheckpoint(checkpoint_dir.c_str());
+ delete checkpoint;
+ if (status.ok()) {
+ // NO_LINT_DEBUG
+ sql_print_information(
+ "RocksDB: created checkpoint in directory : %s\n",
+ checkpoint_dir.c_str());
+ return HA_EXIT_SUCCESS;
+ } else {
+ rc = ha_rocksdb::rdb_error_to_mysql(status);
+ }
+ } else {
+ rc = ha_rocksdb::rdb_error_to_mysql(status);
+ }
+ }
+ }
+ return HA_EXIT_FAILURE;
+}
+
+/* This method is needed to indicate that the
+ ROCKSDB_CREATE_CHECKPOINT command is not read-only */
+static void rocksdb_create_checkpoint_stub(THD *const thd,
+ struct st_mysql_sys_var *const var,
+ void *const var_ptr,
+ const void *const save) {}
+
+static void rocksdb_force_flush_memtable_now_stub(
+ THD *const thd, struct st_mysql_sys_var *const var, void *const var_ptr,
+ const void *const save) {}
+
+static int rocksdb_force_flush_memtable_now(
+ THD *const thd, struct st_mysql_sys_var *const var, void *const var_ptr,
+ struct st_mysql_value *const value) {
+ // NO_LINT_DEBUG
+ sql_print_information("RocksDB: Manual memtable flush.");
+ rocksdb_flush_all_memtables();
+ return HA_EXIT_SUCCESS;
+}
+
+static void rocksdb_force_flush_memtable_and_lzero_now_stub(
+ THD *const thd, struct st_mysql_sys_var *const var, void *const var_ptr,
+ const void *const save) {}
+
+static int rocksdb_force_flush_memtable_and_lzero_now(
+ THD *const thd, struct st_mysql_sys_var *const var, void *const var_ptr,
+ struct st_mysql_value *const value) {
+ // NO_LINT_DEBUG
+ sql_print_information("RocksDB: Manual memtable and L0 flush.");
+ rocksdb_flush_all_memtables();
+
+ const Rdb_cf_manager &cf_manager = rdb_get_cf_manager();
+ rocksdb::CompactionOptions c_options = rocksdb::CompactionOptions();
+ rocksdb::ColumnFamilyMetaData metadata;
+ rocksdb::ColumnFamilyDescriptor cf_descr;
+
+ int i, max_attempts = 3, num_errors = 0;
+
+ for (const auto &cf_handle : cf_manager.get_all_cf()) {
+ for (i = 0; i < max_attempts; i++) {
+ rdb->GetColumnFamilyMetaData(cf_handle, &metadata);
+ cf_handle->GetDescriptor(&cf_descr);
+ c_options.output_file_size_limit = cf_descr.options.target_file_size_base;
+
+ DBUG_ASSERT(metadata.levels[0].level == 0);
+ std::vector<std::string> file_names;
+ for (auto &file : metadata.levels[0].files) {
+ file_names.emplace_back(file.db_path + file.name);
+ }
+
+ if (file_names.empty()) {
+ break;
+ }
+
+ rocksdb::Status s;
+ s = rdb->CompactFiles(c_options, cf_handle, file_names, 1);
+
+ // Due to a race, it's possible for CompactFiles to collide
+ // with auto compaction, causing an error to return
+ // regarding file not found. In that case, retry.
+ if (s.IsInvalidArgument()) {
+ continue;
+ }
+
+ if (!s.ok() && !s.IsAborted()) {
+ rdb_handle_io_error(s, RDB_IO_ERROR_GENERAL);
+ return HA_EXIT_FAILURE;
+ }
+ break;
+ }
+ if (i == max_attempts) {
+ num_errors++;
+ }
+ }
+
+ return num_errors == 0 ? HA_EXIT_SUCCESS : HA_EXIT_FAILURE;
+}
+
+static void rocksdb_drop_index_wakeup_thread(
+ my_core::THD *const thd MY_ATTRIBUTE((__unused__)),
+ struct st_mysql_sys_var *const var MY_ATTRIBUTE((__unused__)),
+ void *const var_ptr MY_ATTRIBUTE((__unused__)), const void *const save);
+
+static my_bool rocksdb_pause_background_work = 0;
+static mysql_mutex_t rdb_sysvars_mutex;
+static mysql_mutex_t rdb_block_cache_resize_mutex;
+
+static void rocksdb_set_pause_background_work(
+ my_core::THD *const thd MY_ATTRIBUTE((__unused__)),
+ struct st_mysql_sys_var *const var MY_ATTRIBUTE((__unused__)),
+ void *const var_ptr MY_ATTRIBUTE((__unused__)), const void *const save) {
+ RDB_MUTEX_LOCK_CHECK(rdb_sysvars_mutex);
+ const bool pause_requested = *static_cast<const bool *>(save);
+ if (rocksdb_pause_background_work != pause_requested) {
+ if (pause_requested) {
+ rdb->PauseBackgroundWork();
+ } else {
+ rdb->ContinueBackgroundWork();
+ }
+ rocksdb_pause_background_work = pause_requested;
+ }
+ RDB_MUTEX_UNLOCK_CHECK(rdb_sysvars_mutex);
+}
+
+static void rocksdb_set_compaction_options(THD *thd,
+ struct st_mysql_sys_var *var,
+ void *var_ptr, const void *save);
+
+static void rocksdb_set_table_stats_sampling_pct(THD *thd,
+ struct st_mysql_sys_var *var,
+ void *var_ptr,
+ const void *save);
+
+static void rocksdb_set_rate_limiter_bytes_per_sec(THD *thd,
+ struct st_mysql_sys_var *var,
+ void *var_ptr,
+ const void *save);
+
+static void rocksdb_set_sst_mgr_rate_bytes_per_sec(THD *thd,
+ struct st_mysql_sys_var *var,
+ void *var_ptr,
+ const void *save);
+
+static void rocksdb_set_delayed_write_rate(THD *thd,
+ struct st_mysql_sys_var *var,
+ void *var_ptr, const void *save);
+
+static void rocksdb_set_max_latest_deadlocks(THD *thd,
+ struct st_mysql_sys_var *var,
+ void *var_ptr, const void *save);
+
+static void rdb_set_collation_exception_list(const char *exception_list);
+static void rocksdb_set_collation_exception_list(THD *thd,
+ struct st_mysql_sys_var *var,
+ void *var_ptr,
+ const void *save);
+
+static int rocksdb_validate_update_cf_options(THD *thd,
+ struct st_mysql_sys_var *var,
+ void *save,
+ st_mysql_value *value);
+
+static void rocksdb_set_update_cf_options(THD *thd,
+ struct st_mysql_sys_var *var,
+ void *var_ptr, const void *save);
+
+static int rocksdb_check_bulk_load(
+ THD *const thd, struct st_mysql_sys_var *var MY_ATTRIBUTE((__unused__)),
+ void *save, struct st_mysql_value *value);
+
+static int rocksdb_check_bulk_load_allow_unsorted(
+ THD *const thd, struct st_mysql_sys_var *var MY_ATTRIBUTE((__unused__)),
+ void *save, struct st_mysql_value *value);
+
+static void rocksdb_set_max_background_jobs(THD *thd,
+ struct st_mysql_sys_var *const var,
+ void *const var_ptr,
+ const void *const save);
+static void rocksdb_set_bytes_per_sync(THD *thd,
+ struct st_mysql_sys_var *const var,
+ void *const var_ptr,
+ const void *const save);
+static void rocksdb_set_wal_bytes_per_sync(THD *thd,
+ struct st_mysql_sys_var *const var,
+ void *const var_ptr,
+ const void *const save);
+static int rocksdb_validate_set_block_cache_size(
+ THD *thd, struct st_mysql_sys_var *const var, void *var_ptr,
+ struct st_mysql_value *value);
+//////////////////////////////////////////////////////////////////////////////
+// Options definitions
+//////////////////////////////////////////////////////////////////////////////
+static long long rocksdb_block_cache_size;
+static long long rocksdb_sim_cache_size;
+static my_bool rocksdb_use_clock_cache;
+static double rocksdb_cache_high_pri_pool_ratio;
+static my_bool rocksdb_cache_dump;
+/* Use unsigned long long instead of uint64_t because of MySQL compatibility */
+static unsigned long long // NOLINT(runtime/int)
+ rocksdb_rate_limiter_bytes_per_sec;
+static unsigned long long // NOLINT(runtime/int)
+ rocksdb_sst_mgr_rate_bytes_per_sec;
+static unsigned long long rocksdb_delayed_write_rate;
+static uint32_t rocksdb_max_latest_deadlocks;
+static unsigned long // NOLINT(runtime/int)
+ rocksdb_persistent_cache_size_mb;
+static ulong rocksdb_info_log_level;
+static char *rocksdb_wal_dir;
+static char *rocksdb_persistent_cache_path;
+static ulong rocksdb_index_type;
+static uint32_t rocksdb_flush_log_at_trx_commit;
+static uint32_t rocksdb_debug_optimizer_n_rows;
+static my_bool rocksdb_force_compute_memtable_stats;
+static uint32_t rocksdb_force_compute_memtable_stats_cachetime;
+static my_bool rocksdb_debug_optimizer_no_zero_cardinality;
+static uint32_t rocksdb_wal_recovery_mode;
+static uint32_t rocksdb_stats_level;
+static uint32_t rocksdb_access_hint_on_compaction_start;
+static char *rocksdb_compact_cf_name;
+static char *rocksdb_delete_cf_name;
+static char *rocksdb_checkpoint_name;
+static my_bool rocksdb_signal_drop_index_thread;
+static my_bool rocksdb_signal_remove_mariabackup_checkpoint;
+static my_bool rocksdb_strict_collation_check = 1;
+static my_bool rocksdb_ignore_unknown_options = 1;
+static my_bool rocksdb_enable_2pc = 0;
+static char *rocksdb_strict_collation_exceptions;
+static my_bool rocksdb_collect_sst_properties = 1;
+static my_bool rocksdb_force_flush_memtable_now_var = 0;
+static my_bool rocksdb_force_flush_memtable_and_lzero_now_var = 0;
+static my_bool rocksdb_enable_ttl = 1;
+static my_bool rocksdb_enable_ttl_read_filtering = 1;
+static int rocksdb_debug_ttl_rec_ts = 0;
+static int rocksdb_debug_ttl_snapshot_ts = 0;
+static int rocksdb_debug_ttl_read_filter_ts = 0;
+static my_bool rocksdb_debug_ttl_ignore_pk = 0;
+static my_bool rocksdb_reset_stats = 0;
+static uint32_t rocksdb_io_write_timeout_secs = 0;
+static uint32_t rocksdb_seconds_between_stat_computes = 3600;
+static long long rocksdb_compaction_sequential_deletes = 0l;
+static long long rocksdb_compaction_sequential_deletes_window = 0l;
+static long long rocksdb_compaction_sequential_deletes_file_size = 0l;
+static uint32_t rocksdb_validate_tables = 1;
+static char *rocksdb_datadir;
+static uint32_t rocksdb_table_stats_sampling_pct;
+static my_bool rocksdb_enable_bulk_load_api = 1;
+static my_bool rocksdb_print_snapshot_conflict_queries = 0;
+static my_bool rocksdb_large_prefix = 0;
+static my_bool rocksdb_allow_to_start_after_corruption = 0;
+static char* rocksdb_git_hash;
+
+char *compression_types_val=
+ const_cast<char*>(get_rocksdb_supported_compression_types());
+static unsigned long rocksdb_write_policy =
+ rocksdb::TxnDBWritePolicy::WRITE_COMMITTED;
+
+#if 0 // MARIAROCKS_NOT_YET : read-free replication is not supported
+char *rocksdb_read_free_rpl_tables;
+std::mutex rocksdb_read_free_rpl_tables_mutex;
+#if defined(HAVE_PSI_INTERFACE)
+Regex_list_handler rdb_read_free_regex_handler(key_rwlock_read_free_rpl_tables);
+#else
+Regex_list_handler rdb_read_free_regex_handler;
+#endif
+enum read_free_rpl_type { OFF = 0, PK_ONLY, PK_SK };
+static unsigned long rocksdb_read_free_rpl = read_free_rpl_type::OFF;
+#endif
+
+static my_bool rocksdb_error_on_suboptimal_collation = 1;
+static uint32_t rocksdb_stats_recalc_rate = 0;
+static uint32_t rocksdb_debug_manual_compaction_delay = 0;
+static uint32_t rocksdb_max_manual_compactions = 0;
+static my_bool rocksdb_rollback_on_timeout = FALSE;
+static my_bool rocksdb_enable_insert_with_update_caching = TRUE;
+
+std::atomic<uint64_t> rocksdb_row_lock_deadlocks(0);
+std::atomic<uint64_t> rocksdb_row_lock_wait_timeouts(0);
+std::atomic<uint64_t> rocksdb_snapshot_conflict_errors(0);
+std::atomic<uint64_t> rocksdb_wal_group_syncs(0);
+std::atomic<uint64_t> rocksdb_manual_compactions_processed(0);
+std::atomic<uint64_t> rocksdb_manual_compactions_running(0);
+#ifndef DBUG_OFF
+std::atomic<uint64_t> rocksdb_num_get_for_update_calls(0);
+#endif
+
+
+
+/*
+ Remove directory with files in it.
+ Used to remove checkpoint created by mariabackup.
+*/
+#ifdef _WIN32
+#include <direct.h> /* unlink*/
+#ifndef F_OK
+#define F_OK 0
+#endif
+#endif
+
+static int rmdir_force(const char *dir) {
+ if (access(dir, F_OK))
+ return true;
+
+ char path[FN_REFLEN];
+ char sep[] = {FN_LIBCHAR, 0};
+ int err = 0;
+
+ MY_DIR *dir_info = my_dir(dir, MYF(MY_DONT_SORT | MY_WANT_STAT));
+ if (!dir_info)
+ return 1;
+
+ for (uint i = 0; i < dir_info->number_of_files; i++) {
+ FILEINFO *file = dir_info->dir_entry + i;
+
+ strxnmov(path, sizeof(path), dir, sep, file->name, NULL);
+
+ err = my_delete(path, 0);
+
+ if (err) {
+ break;
+ }
+ }
+
+ my_dirend(dir_info);
+
+ if (!err)
+ err = rmdir(dir);
+
+ return (err == 0) ? HA_EXIT_SUCCESS : HA_EXIT_FAILURE;
+}
+
+
+static void rocksdb_remove_mariabackup_checkpoint(
+ my_core::THD *const,
+ struct st_mysql_sys_var *const ,
+ void *const var_ptr, const void *const) {
+ std::string mariabackup_checkpoint_dir(rocksdb_datadir);
+
+ mariabackup_checkpoint_dir.append("/mariabackup-checkpoint");
+
+ if (unlink(mariabackup_checkpoint_dir.c_str()) == 0)
+ return;
+
+ rmdir_force(mariabackup_checkpoint_dir.c_str());
+}
+
+
+static std::unique_ptr<rocksdb::DBOptions> rdb_init_rocksdb_db_options(void) {
+ auto o = std::unique_ptr<rocksdb::DBOptions>(new rocksdb::DBOptions());
+
+ o->create_if_missing = true;
+ o->listeners.push_back(std::make_shared<Rdb_event_listener>(&ddl_manager));
+ o->info_log_level = rocksdb::InfoLogLevel::INFO_LEVEL;
+ o->max_subcompactions = DEFAULT_SUBCOMPACTIONS;
+ o->max_open_files = -2; // auto-tune to 50% open_files_limit
+
+ o->two_write_queues = true;
+ o->manual_wal_flush = true;
+ return o;
+}
+
+/* DBOptions contains Statistics and needs to be destructed last */
+static std::unique_ptr<rocksdb::BlockBasedTableOptions> rocksdb_tbl_options =
+ std::unique_ptr<rocksdb::BlockBasedTableOptions>(
+ new rocksdb::BlockBasedTableOptions());
+static std::unique_ptr<rocksdb::DBOptions> rocksdb_db_options =
+ rdb_init_rocksdb_db_options();
+
+static std::shared_ptr<rocksdb::RateLimiter> rocksdb_rate_limiter;
+
+/* This enum needs to be kept up to date with rocksdb::TxnDBWritePolicy */
+static const char *write_policy_names[] = {"write_committed", "write_prepared",
+ "write_unprepared", NullS};
+
+static TYPELIB write_policy_typelib = {array_elements(write_policy_names) - 1,
+ "write_policy_typelib",
+ write_policy_names, nullptr};
+
+#if 0 // MARIAROCKS_NOT_YET : read-free replication is not supported
+/* This array needs to be kept up to date with myrocks::read_free_rpl_type */
+static const char *read_free_rpl_names[] = {"OFF", "PK_ONLY", "PK_SK", NullS};
+
+static TYPELIB read_free_rpl_typelib = {array_elements(read_free_rpl_names) - 1,
+ "read_free_rpl_typelib",
+ read_free_rpl_names, nullptr};
+#endif
+
+/* This enum needs to be kept up to date with rocksdb::InfoLogLevel */
+static const char *info_log_level_names[] = {"debug_level", "info_level",
+ "warn_level", "error_level",
+ "fatal_level", NullS};
+
+static TYPELIB info_log_level_typelib = {
+ array_elements(info_log_level_names) - 1, "info_log_level_typelib",
+ info_log_level_names, nullptr};
+
+static void rocksdb_set_rocksdb_info_log_level(
+ THD *const thd, struct st_mysql_sys_var *const var, void *const var_ptr,
+ const void *const save) {
+ DBUG_ASSERT(save != nullptr);
+
+ RDB_MUTEX_LOCK_CHECK(rdb_sysvars_mutex);
+ rocksdb_info_log_level = *static_cast<const uint64_t *>(save);
+ rocksdb_db_options->info_log->SetInfoLogLevel(
+ static_cast<rocksdb::InfoLogLevel>(rocksdb_info_log_level));
+ RDB_MUTEX_UNLOCK_CHECK(rdb_sysvars_mutex);
+}
+
+static void rocksdb_set_rocksdb_stats_level(THD *const thd,
+ struct st_mysql_sys_var *const var,
+ void *const var_ptr,
+ const void *const save) {
+ DBUG_ASSERT(save != nullptr);
+
+ RDB_MUTEX_LOCK_CHECK(rdb_sysvars_mutex);
+ rocksdb_db_options->statistics->set_stats_level(
+ static_cast<rocksdb::StatsLevel>(
+ *static_cast<const uint64_t *>(save)));
+ // Actual stats level is defined at rocksdb dbopt::statistics::stats_level_
+ // so adjusting rocksdb_stats_level here to make sure it points to
+ // the correct stats level.
+ rocksdb_stats_level = rocksdb_db_options->statistics->get_stats_level();
+ RDB_MUTEX_UNLOCK_CHECK(rdb_sysvars_mutex);
+}
+
+static void rocksdb_set_reset_stats(
+ my_core::THD *const /* unused */,
+ my_core::st_mysql_sys_var *const var MY_ATTRIBUTE((__unused__)),
+ void *const var_ptr, const void *const save) {
+ DBUG_ASSERT(save != nullptr);
+ DBUG_ASSERT(rdb != nullptr);
+ DBUG_ASSERT(rocksdb_stats != nullptr);
+
+ RDB_MUTEX_LOCK_CHECK(rdb_sysvars_mutex);
+
+ *static_cast<bool *>(var_ptr) = *static_cast<const bool *>(save);
+
+ if (rocksdb_reset_stats) {
+ rocksdb::Status s = rdb->ResetStats();
+
+ // RocksDB will always return success. Let's document this assumption here
+ // as well so that we'll get immediately notified when contract changes.
+ DBUG_ASSERT(s == rocksdb::Status::OK());
+
+ s = rocksdb_stats->Reset();
+ DBUG_ASSERT(s == rocksdb::Status::OK());
+ }
+
+ RDB_MUTEX_UNLOCK_CHECK(rdb_sysvars_mutex);
+}
+
+static void rocksdb_set_io_write_timeout(
+ my_core::THD *const thd MY_ATTRIBUTE((__unused__)),
+ my_core::st_mysql_sys_var *const var MY_ATTRIBUTE((__unused__)),
+ void *const var_ptr MY_ATTRIBUTE((__unused__)), const void *const save) {
+ DBUG_ASSERT(save != nullptr);
+ DBUG_ASSERT(rdb != nullptr);
+#if !defined(_WIN32) && !defined(__APPLE__)
+ DBUG_ASSERT(io_watchdog != nullptr);
+#endif
+
+ RDB_MUTEX_LOCK_CHECK(rdb_sysvars_mutex);
+
+ const uint32_t new_val = *static_cast<const uint32_t *>(save);
+
+ rocksdb_io_write_timeout_secs = new_val;
+#if !defined(_WIN32) && !defined(__APPLE__)
+ io_watchdog->reset_timeout(rocksdb_io_write_timeout_secs);
+#endif
+ RDB_MUTEX_UNLOCK_CHECK(rdb_sysvars_mutex);
+}
+
+enum rocksdb_flush_log_at_trx_commit_type : unsigned int {
+ FLUSH_LOG_NEVER = 0,
+ FLUSH_LOG_SYNC,
+ FLUSH_LOG_BACKGROUND,
+ FLUSH_LOG_MAX /* must be last */
+};
+
+static int rocksdb_validate_flush_log_at_trx_commit(
+ THD *const thd,
+ struct st_mysql_sys_var *const var, /* in: pointer to system variable */
+ void *var_ptr, /* out: immediate result for update function */
+ struct st_mysql_value *const value /* in: incoming value */) {
+ long long new_value;
+
+ /* value is NULL */
+ if (value->val_int(value, &new_value)) {
+ return HA_EXIT_FAILURE;
+ }
+
+ if (rocksdb_db_options->allow_mmap_writes && new_value != FLUSH_LOG_NEVER) {
+ return HA_EXIT_FAILURE;
+ }
+
+ *static_cast<uint32_t *>(var_ptr) = static_cast<uint32_t>(new_value);
+ return HA_EXIT_SUCCESS;
+}
+static void rocksdb_compact_column_family_stub(
+ THD *const thd, struct st_mysql_sys_var *const var, void *const var_ptr,
+ const void *const save) {}
+
+static int rocksdb_compact_column_family(THD *const thd,
+ struct st_mysql_sys_var *const var,
+ void *const var_ptr,
+ struct st_mysql_value *const value);
+
+static const char *index_type_names[] = {"kBinarySearch", "kHashSearch", NullS};
+
+static TYPELIB index_type_typelib = {array_elements(index_type_names) - 1,
+ "index_type_typelib", index_type_names,
+ nullptr};
+
+const ulong RDB_MAX_LOCK_WAIT_SECONDS = 1024 * 1024 * 1024;
+const ulong RDB_DEFAULT_MAX_ROW_LOCKS = 1024 * 1024;
+const ulong RDB_MAX_ROW_LOCKS = 1024 * 1024 * 1024;
+const ulong RDB_DEFAULT_BULK_LOAD_SIZE = 1000;
+const ulong RDB_MAX_BULK_LOAD_SIZE = 1024 * 1024 * 1024;
+const size_t RDB_DEFAULT_MERGE_BUF_SIZE = 64 * 1024 * 1024;
+const size_t RDB_MIN_MERGE_BUF_SIZE = 100;
+const size_t RDB_DEFAULT_MERGE_COMBINE_READ_SIZE = 1024 * 1024 * 1024;
+const size_t RDB_MIN_MERGE_COMBINE_READ_SIZE = 100;
+const size_t RDB_DEFAULT_MERGE_TMP_FILE_REMOVAL_DELAY = 0;
+const size_t RDB_MIN_MERGE_TMP_FILE_REMOVAL_DELAY = 0;
+const int64 RDB_DEFAULT_BLOCK_CACHE_SIZE = 512 * 1024 * 1024;
+const int64 RDB_MIN_BLOCK_CACHE_SIZE = 1024;
+const int RDB_MAX_CHECKSUMS_PCT = 100;
+const ulong RDB_DEADLOCK_DETECT_DEPTH = 50;
+
+// TODO: 0 means don't wait at all, and we don't support it yet?
+static MYSQL_THDVAR_ULONG(lock_wait_timeout, PLUGIN_VAR_RQCMDARG,
+ "Number of seconds to wait for lock", nullptr,
+ nullptr, /*default*/ 1, /*min*/ 1,
+ /*max*/ RDB_MAX_LOCK_WAIT_SECONDS, 0);
+
+static MYSQL_THDVAR_BOOL(deadlock_detect, PLUGIN_VAR_RQCMDARG,
+ "Enables deadlock detection", nullptr, nullptr, FALSE);
+
+static MYSQL_THDVAR_ULONG(deadlock_detect_depth, PLUGIN_VAR_RQCMDARG,
+ "Number of transactions deadlock detection will "
+ "traverse through before assuming deadlock",
+ nullptr, nullptr,
+ /*default*/ RDB_DEADLOCK_DETECT_DEPTH,
+ /*min*/ 2,
+ /*max*/ ULONG_MAX, 0);
+
+static MYSQL_THDVAR_BOOL(
+ commit_time_batch_for_recovery, PLUGIN_VAR_RQCMDARG,
+ "TransactionOptions::commit_time_batch_for_recovery for RocksDB", nullptr,
+ nullptr, TRUE);
+
+static MYSQL_THDVAR_BOOL(
+ trace_sst_api, PLUGIN_VAR_RQCMDARG,
+ "Generate trace output in the log for each call to the SstFileWriter",
+ nullptr, nullptr, FALSE);
+
+static MYSQL_THDVAR_BOOL(
+ bulk_load, PLUGIN_VAR_RQCMDARG,
+ "Use bulk-load mode for inserts. This disables "
+ "unique_checks and enables rocksdb_commit_in_the_middle.",
+ rocksdb_check_bulk_load, nullptr, FALSE);
+
+static MYSQL_THDVAR_BOOL(bulk_load_allow_sk, PLUGIN_VAR_RQCMDARG,
+ "Allow bulk loading of sk keys during bulk-load. "
+ "Can be changed only when bulk load is disabled.",
+ /* Intentionally reuse unsorted's check function */
+ rocksdb_check_bulk_load_allow_unsorted, nullptr,
+ FALSE);
+
+static MYSQL_THDVAR_BOOL(bulk_load_allow_unsorted, PLUGIN_VAR_RQCMDARG,
+ "Allow unsorted input during bulk-load. "
+ "Can be changed only when bulk load is disabled.",
+ rocksdb_check_bulk_load_allow_unsorted, nullptr,
+ FALSE);
+
+static MYSQL_SYSVAR_BOOL(enable_bulk_load_api, rocksdb_enable_bulk_load_api,
+ PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
+ "Enables using SstFileWriter for bulk loading",
+ nullptr, nullptr, rocksdb_enable_bulk_load_api);
+
+static MYSQL_SYSVAR_STR(git_hash, rocksdb_git_hash,
+ PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
+ "Git revision of the RocksDB library used by MyRocks",
+ nullptr, nullptr, ROCKSDB_GIT_HASH);
+
+static MYSQL_THDVAR_STR(tmpdir, PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_MEMALLOC,
+ "Directory for temporary files during DDL operations.",
+ nullptr, nullptr, "");
+
+#define DEFAULT_SKIP_UNIQUE_CHECK_TABLES ".*"
+static MYSQL_THDVAR_STR(
+ skip_unique_check_tables, PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_MEMALLOC,
+ "Skip unique constraint checking for the specified tables", nullptr,
+ nullptr, DEFAULT_SKIP_UNIQUE_CHECK_TABLES);
+
+static MYSQL_THDVAR_BOOL(
+ commit_in_the_middle, PLUGIN_VAR_RQCMDARG,
+ "Commit rows implicitly every rocksdb_bulk_load_size, on bulk load/insert, "
+ "update and delete",
+ nullptr, nullptr, FALSE);
+
+static MYSQL_THDVAR_BOOL(
+ blind_delete_primary_key, PLUGIN_VAR_RQCMDARG,
+ "Deleting rows by primary key lookup, without reading rows (Blind Deletes)."
+ " Blind delete is disabled if the table has secondary key",
+ nullptr, nullptr, FALSE);
+
+#if 0 // MARIAROCKS_NOT_YET : read-free replication is not supported
+
+static const char *DEFAULT_READ_FREE_RPL_TABLES = ".*";
+
+static int rocksdb_validate_read_free_rpl_tables(
+ THD *thd MY_ATTRIBUTE((__unused__)),
+ struct st_mysql_sys_var *var MY_ATTRIBUTE((__unused__)), void *save,
+ struct st_mysql_value *value) {
+ char buff[STRING_BUFFER_USUAL_SIZE];
+ int length = sizeof(buff);
+ const char *wlist_buf = value->val_str(value, buff, &length);
+ const auto wlist = wlist_buf ? wlist_buf : DEFAULT_READ_FREE_RPL_TABLES;
+
+#if defined(HAVE_PSI_INTERFACE)
+ Regex_list_handler regex_handler(key_rwlock_read_free_rpl_tables);
+#else
+ Regex_list_handler regex_handler;
+#endif
+
+ if (!regex_handler.set_patterns(wlist)) {
+ warn_about_bad_patterns(&regex_handler, "rocksdb_read_free_rpl_tables");
+ return HA_EXIT_FAILURE;
+ }
+
+ *static_cast<const char **>(save) = my_strdup(wlist, MYF(MY_WME));
+ return HA_EXIT_SUCCESS;
+}
+
+static void rocksdb_update_read_free_rpl_tables(
+ THD *thd MY_ATTRIBUTE((__unused__)),
+ struct st_mysql_sys_var *var MY_ATTRIBUTE((__unused__)), void *var_ptr,
+ const void *save) {
+ const auto wlist = *static_cast<const char *const *>(save);
+ DBUG_ASSERT(wlist != nullptr);
+
+ // This is bound to succeed since we've already checked for bad patterns in
+ // rocksdb_validate_read_free_rpl_tables
+ rdb_read_free_regex_handler.set_patterns(wlist);
+
+ // update all table defs
+ struct Rdb_read_free_rpl_updater : public Rdb_tables_scanner {
+ int add_table(Rdb_tbl_def *tdef) override {
+ tdef->check_and_set_read_free_rpl_table();
+ return HA_EXIT_SUCCESS;
+ }
+ } updater;
+ ddl_manager.scan_for_tables(&updater);
+
+ if (wlist == DEFAULT_READ_FREE_RPL_TABLES) {
+ // If running SET var = DEFAULT, then rocksdb_validate_read_free_rpl_tables
+ // isn't called, and memory is never allocated for the value. Allocate it
+ // here.
+ *static_cast<const char **>(var_ptr) = my_strdup(wlist, MYF(MY_WME));
+ } else {
+ // Otherwise, we just reuse the value allocated from
+ // rocksdb_validate_read_free_rpl_tables.
+ *static_cast<const char **>(var_ptr) = wlist;
+ }
+}
+
+static MYSQL_SYSVAR_STR(
+ read_free_rpl_tables, rocksdb_read_free_rpl_tables,
+ PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_MEMALLOC /*| PLUGIN_VAR_ALLOCATED*/,
+ "List of tables that will use read-free replication on the slave "
+ "(i.e. not lookup a row during replication)",
+ rocksdb_validate_read_free_rpl_tables, rocksdb_update_read_free_rpl_tables,
+ DEFAULT_READ_FREE_RPL_TABLES);
+
+static MYSQL_SYSVAR_ENUM(
+ read_free_rpl, rocksdb_read_free_rpl,
+ PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_MEMALLOC,
+ "Use read-free replication on the slave (i.e. no row lookup during "
+ "replication). Default is OFF, PK_SK will enable it on all tables with "
+ "primary key. PK_ONLY will enable it on tables where the only key is the "
+ "primary key (i.e. no secondary keys).",
+ nullptr, nullptr, read_free_rpl_type::OFF, &read_free_rpl_typelib);
+#endif
+
+static MYSQL_THDVAR_BOOL(skip_bloom_filter_on_read, PLUGIN_VAR_RQCMDARG,
+ "Skip using bloom filter for reads", nullptr, nullptr,
+ FALSE);
+
+static MYSQL_THDVAR_ULONG(max_row_locks, PLUGIN_VAR_RQCMDARG,
+ "Maximum number of locks a transaction can have",
+ nullptr, nullptr,
+ /*default*/ RDB_DEFAULT_MAX_ROW_LOCKS,
+ /*min*/ 1,
+ /*max*/ RDB_MAX_ROW_LOCKS, 0);
+
+static MYSQL_THDVAR_ULONGLONG(
+ write_batch_max_bytes, PLUGIN_VAR_RQCMDARG,
+ "Maximum size of write batch in bytes. 0 means no limit.", nullptr, nullptr,
+ /* default */ 0, /* min */ 0, /* max */ SIZE_T_MAX, 1);
+
+static MYSQL_THDVAR_BOOL(
+ lock_scanned_rows, PLUGIN_VAR_RQCMDARG,
+ "Take and hold locks on rows that are scanned but not updated", nullptr,
+ nullptr, FALSE);
+
+static MYSQL_THDVAR_ULONG(bulk_load_size, PLUGIN_VAR_RQCMDARG,
+ "Max #records in a batch for bulk-load mode", nullptr,
+ nullptr,
+ /*default*/ RDB_DEFAULT_BULK_LOAD_SIZE,
+ /*min*/ 1,
+ /*max*/ RDB_MAX_BULK_LOAD_SIZE, 0);
+
+static MYSQL_THDVAR_ULONGLONG(
+ merge_buf_size, PLUGIN_VAR_RQCMDARG,
+ "Size to allocate for merge sort buffers written out to disk "
+ "during inplace index creation.",
+ nullptr, nullptr,
+ /* default (64MB) */ RDB_DEFAULT_MERGE_BUF_SIZE,
+ /* min (100B) */ RDB_MIN_MERGE_BUF_SIZE,
+ /* max */ SIZE_T_MAX, 1);
+
+static MYSQL_THDVAR_ULONGLONG(
+ merge_combine_read_size, PLUGIN_VAR_RQCMDARG,
+ "Size that we have to work with during combine (reading from disk) phase "
+ "of "
+ "external sort during fast index creation.",
+ nullptr, nullptr,
+ /* default (1GB) */ RDB_DEFAULT_MERGE_COMBINE_READ_SIZE,
+ /* min (100B) */ RDB_MIN_MERGE_COMBINE_READ_SIZE,
+ /* max */ SIZE_T_MAX, 1);
+
+static MYSQL_THDVAR_ULONGLONG(
+ merge_tmp_file_removal_delay_ms, PLUGIN_VAR_RQCMDARG,
+ "Fast index creation creates a large tmp file on disk during index "
+ "creation. Removing this large file all at once when index creation is "
+ "complete can cause trim stalls on Flash. This variable specifies a "
+ "duration to sleep (in milliseconds) between calling chsize() to truncate "
+ "the file in chunks. The chunk size is the same as merge_buf_size.",
+ nullptr, nullptr,
+ /* default (0ms) */ RDB_DEFAULT_MERGE_TMP_FILE_REMOVAL_DELAY,
+ /* min (0ms) */ RDB_MIN_MERGE_TMP_FILE_REMOVAL_DELAY,
+ /* max */ SIZE_T_MAX, 1);
+
+static MYSQL_THDVAR_INT(
+ manual_compaction_threads, PLUGIN_VAR_RQCMDARG,
+ "How many rocksdb threads to run for manual compactions", nullptr, nullptr,
+ /* default rocksdb.dboption max_subcompactions */ 0,
+ /* min */ 0, /* max */ 128, 0);
+
+static MYSQL_SYSVAR_BOOL(
+ create_if_missing,
+ *reinterpret_cast<my_bool *>(&rocksdb_db_options->create_if_missing),
+ PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
+ "DBOptions::create_if_missing for RocksDB", nullptr, nullptr,
+ rocksdb_db_options->create_if_missing);
+
+static MYSQL_SYSVAR_BOOL(
+ two_write_queues,
+ *reinterpret_cast<my_bool *>(&rocksdb_db_options->two_write_queues),
+ PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
+ "DBOptions::two_write_queues for RocksDB", nullptr, nullptr,
+ rocksdb_db_options->two_write_queues);
+
+static MYSQL_SYSVAR_BOOL(
+ manual_wal_flush,
+ *reinterpret_cast<my_bool *>(&rocksdb_db_options->manual_wal_flush),
+ PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
+ "DBOptions::manual_wal_flush for RocksDB", nullptr, nullptr,
+ rocksdb_db_options->manual_wal_flush);
+
+static MYSQL_SYSVAR_ENUM(write_policy, rocksdb_write_policy,
+ PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
+ "DBOptions::write_policy for RocksDB", nullptr,
+ nullptr, rocksdb::TxnDBWritePolicy::WRITE_COMMITTED,
+ &write_policy_typelib);
+
+static MYSQL_SYSVAR_BOOL(
+ create_missing_column_families,
+ *reinterpret_cast<my_bool *>(
+ &rocksdb_db_options->create_missing_column_families),
+ PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
+ "DBOptions::create_missing_column_families for RocksDB", nullptr, nullptr,
+ rocksdb_db_options->create_missing_column_families);
+
+static MYSQL_SYSVAR_BOOL(
+ error_if_exists,
+ *reinterpret_cast<my_bool *>(&rocksdb_db_options->error_if_exists),
+ PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
+ "DBOptions::error_if_exists for RocksDB", nullptr, nullptr,
+ rocksdb_db_options->error_if_exists);
+
+static MYSQL_SYSVAR_BOOL(
+ paranoid_checks,
+ *reinterpret_cast<my_bool *>(&rocksdb_db_options->paranoid_checks),
+ PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
+ "DBOptions::paranoid_checks for RocksDB", nullptr, nullptr,
+ rocksdb_db_options->paranoid_checks);
+
+static MYSQL_SYSVAR_ULONGLONG(
+ rate_limiter_bytes_per_sec, rocksdb_rate_limiter_bytes_per_sec,
+ PLUGIN_VAR_RQCMDARG, "DBOptions::rate_limiter bytes_per_sec for RocksDB",
+ nullptr, rocksdb_set_rate_limiter_bytes_per_sec, /* default */ 0L,
+ /* min */ 0L, /* max */ MAX_RATE_LIMITER_BYTES_PER_SEC, 0);
+
+static MYSQL_SYSVAR_ULONGLONG(
+ sst_mgr_rate_bytes_per_sec, rocksdb_sst_mgr_rate_bytes_per_sec,
+ PLUGIN_VAR_RQCMDARG,
+ "DBOptions::sst_file_manager rate_bytes_per_sec for RocksDB", nullptr,
+ rocksdb_set_sst_mgr_rate_bytes_per_sec,
+ /* default */ DEFAULT_SST_MGR_RATE_BYTES_PER_SEC,
+ /* min */ 0L, /* max */ UINT64_MAX, 0);
+
+static MYSQL_SYSVAR_ULONGLONG(delayed_write_rate, rocksdb_delayed_write_rate,
+ PLUGIN_VAR_RQCMDARG,
+ "DBOptions::delayed_write_rate", nullptr,
+ rocksdb_set_delayed_write_rate,
+ rocksdb_db_options->delayed_write_rate, 0,
+ UINT64_MAX, 0);
+
+static MYSQL_SYSVAR_UINT(max_latest_deadlocks, rocksdb_max_latest_deadlocks,
+ PLUGIN_VAR_RQCMDARG,
+ "Maximum number of recent "
+ "deadlocks to store",
+ nullptr, rocksdb_set_max_latest_deadlocks,
+ rocksdb::kInitialMaxDeadlocks, 0, UINT32_MAX, 0);
+
+static MYSQL_SYSVAR_ENUM(
+ info_log_level, rocksdb_info_log_level, PLUGIN_VAR_RQCMDARG,
+ "Filter level for info logs to be written mysqld error log. "
+ "Valid values include 'debug_level', 'info_level', 'warn_level'"
+ "'error_level' and 'fatal_level'.",
+ nullptr, rocksdb_set_rocksdb_info_log_level,
+ rocksdb::InfoLogLevel::ERROR_LEVEL, &info_log_level_typelib);
+
+static MYSQL_THDVAR_INT(
+ perf_context_level, PLUGIN_VAR_RQCMDARG,
+ "Perf Context Level for rocksdb internal timer stat collection", nullptr,
+ nullptr,
+ /* default */ rocksdb::PerfLevel::kUninitialized,
+ /* min */ rocksdb::PerfLevel::kUninitialized,
+ /* max */ rocksdb::PerfLevel::kOutOfBounds - 1, 0);
+
+static MYSQL_SYSVAR_UINT(
+ wal_recovery_mode, rocksdb_wal_recovery_mode, PLUGIN_VAR_RQCMDARG,
+ "DBOptions::wal_recovery_mode for RocksDB. Default is kAbsoluteConsistency",
+ nullptr, nullptr,
+ /* default */ (uint)rocksdb::WALRecoveryMode::kAbsoluteConsistency,
+ /* min */ (uint)rocksdb::WALRecoveryMode::kTolerateCorruptedTailRecords,
+ /* max */ (uint)rocksdb::WALRecoveryMode::kSkipAnyCorruptedRecords, 0);
+
+static MYSQL_SYSVAR_UINT(
+ stats_level, rocksdb_stats_level, PLUGIN_VAR_RQCMDARG,
+ "Statistics Level for RocksDB. Default is 0 (kExceptHistogramOrTimers)",
+ nullptr, rocksdb_set_rocksdb_stats_level,
+ /* default */ (uint)rocksdb::StatsLevel::kExceptHistogramOrTimers,
+ /* min */ (uint)rocksdb::StatsLevel::kExceptHistogramOrTimers,
+ /* max */ (uint)rocksdb::StatsLevel::kAll, 0);
+
+static MYSQL_SYSVAR_SIZE_T(compaction_readahead_size,
+ rocksdb_db_options->compaction_readahead_size,
+ PLUGIN_VAR_RQCMDARG,
+ "DBOptions::compaction_readahead_size for RocksDB",
+ nullptr, nullptr,
+ rocksdb_db_options->compaction_readahead_size,
+ /* min */ 0L, /* max */ SIZE_T_MAX, 0);
+
+static MYSQL_SYSVAR_BOOL(
+ new_table_reader_for_compaction_inputs,
+ *reinterpret_cast<my_bool *>(
+ &rocksdb_db_options->new_table_reader_for_compaction_inputs),
+ PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
+ "DBOptions::new_table_reader_for_compaction_inputs for RocksDB", nullptr,
+ nullptr, rocksdb_db_options->new_table_reader_for_compaction_inputs);
+
+static MYSQL_SYSVAR_UINT(
+ access_hint_on_compaction_start, rocksdb_access_hint_on_compaction_start,
+ PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
+ "DBOptions::access_hint_on_compaction_start for RocksDB", nullptr, nullptr,
+ /* default */ (uint)rocksdb::Options::AccessHint::NORMAL,
+ /* min */ (uint)rocksdb::Options::AccessHint::NONE,
+ /* max */ (uint)rocksdb::Options::AccessHint::WILLNEED, 0);
+
+static MYSQL_SYSVAR_BOOL(
+ allow_concurrent_memtable_write,
+ *reinterpret_cast<my_bool *>(
+ &rocksdb_db_options->allow_concurrent_memtable_write),
+ PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
+ "DBOptions::allow_concurrent_memtable_write for RocksDB", nullptr, nullptr,
+ false);
+
+static MYSQL_SYSVAR_BOOL(
+ enable_write_thread_adaptive_yield,
+ *reinterpret_cast<my_bool *>(
+ &rocksdb_db_options->enable_write_thread_adaptive_yield),
+ PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
+ "DBOptions::enable_write_thread_adaptive_yield for RocksDB", nullptr,
+ nullptr, false);
+
+static MYSQL_SYSVAR_INT(max_open_files, rocksdb_db_options->max_open_files,
+ PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
+ "DBOptions::max_open_files for RocksDB", nullptr,
+ nullptr, rocksdb_db_options->max_open_files,
+ /* min */ -2, /* max */ INT_MAX, 0);
+
+static MYSQL_SYSVAR_UINT64_T(max_total_wal_size,
+ rocksdb_db_options->max_total_wal_size,
+ PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
+ "DBOptions::max_total_wal_size for RocksDB", nullptr,
+ nullptr, rocksdb_db_options->max_total_wal_size,
+ /* min */ 0, /* max */ LONGLONG_MAX, 0);
+
+static MYSQL_SYSVAR_BOOL(
+ use_fsync, *reinterpret_cast<my_bool *>(&rocksdb_db_options->use_fsync),
+ PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
+ "DBOptions::use_fsync for RocksDB", nullptr, nullptr,
+ rocksdb_db_options->use_fsync);
+
+static MYSQL_SYSVAR_STR(wal_dir, rocksdb_wal_dir,
+ PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
+ "DBOptions::wal_dir for RocksDB", nullptr, nullptr,
+ rocksdb_db_options->wal_dir.c_str());
+
+static MYSQL_SYSVAR_STR(
+ persistent_cache_path, rocksdb_persistent_cache_path,
+ PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
+ "Path for BlockBasedTableOptions::persistent_cache for RocksDB", nullptr,
+ nullptr, "");
+
+static MYSQL_SYSVAR_ULONG(
+ persistent_cache_size_mb, rocksdb_persistent_cache_size_mb,
+ PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
+ "Size of cache in MB for BlockBasedTableOptions::persistent_cache "
+ "for RocksDB",
+ nullptr, nullptr, rocksdb_persistent_cache_size_mb,
+ /* min */ 0L, /* max */ ULONG_MAX, 0);
+
+static MYSQL_SYSVAR_UINT64_T(
+ delete_obsolete_files_period_micros,
+ rocksdb_db_options->delete_obsolete_files_period_micros,
+ PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
+ "DBOptions::delete_obsolete_files_period_micros for RocksDB", nullptr,
+ nullptr, rocksdb_db_options->delete_obsolete_files_period_micros,
+ /* min */ 0, /* max */ LONGLONG_MAX, 0);
+
+static MYSQL_SYSVAR_INT(max_background_jobs,
+ rocksdb_db_options->max_background_jobs,
+ PLUGIN_VAR_RQCMDARG,
+ "DBOptions::max_background_jobs for RocksDB", nullptr,
+ rocksdb_set_max_background_jobs,
+ rocksdb_db_options->max_background_jobs,
+ /* min */ -1, /* max */ MAX_BACKGROUND_JOBS, 0);
+
+static MYSQL_SYSVAR_UINT(max_subcompactions,
+ rocksdb_db_options->max_subcompactions,
+ PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
+ "DBOptions::max_subcompactions for RocksDB", nullptr,
+ nullptr, rocksdb_db_options->max_subcompactions,
+ /* min */ 1, /* max */ MAX_SUBCOMPACTIONS, 0);
+
+static MYSQL_SYSVAR_SIZE_T(max_log_file_size,
+ rocksdb_db_options->max_log_file_size,
+ PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
+ "DBOptions::max_log_file_size for RocksDB", nullptr,
+ nullptr, rocksdb_db_options->max_log_file_size,
+ /* min */ 0L, /* max */ SIZE_T_MAX, 0);
+
+static MYSQL_SYSVAR_SIZE_T(log_file_time_to_roll,
+ rocksdb_db_options->log_file_time_to_roll,
+ PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
+ "DBOptions::log_file_time_to_roll for RocksDB",
+ nullptr, nullptr,
+ rocksdb_db_options->log_file_time_to_roll,
+ /* min */ 0L, /* max */ SIZE_T_MAX, 0);
+
+static MYSQL_SYSVAR_SIZE_T(keep_log_file_num,
+ rocksdb_db_options->keep_log_file_num,
+ PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
+ "DBOptions::keep_log_file_num for RocksDB", nullptr,
+ nullptr, rocksdb_db_options->keep_log_file_num,
+ /* min */ 0L, /* max */ SIZE_T_MAX, 0);
+
+static MYSQL_SYSVAR_UINT64_T(max_manifest_file_size,
+ rocksdb_db_options->max_manifest_file_size,
+ PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
+ "DBOptions::max_manifest_file_size for RocksDB",
+ nullptr, nullptr,
+ rocksdb_db_options->max_manifest_file_size,
+ /* min */ 0L, /* max */ ULONGLONG_MAX, 0);
+
+static MYSQL_SYSVAR_INT(table_cache_numshardbits,
+ rocksdb_db_options->table_cache_numshardbits,
+ PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
+ "DBOptions::table_cache_numshardbits for RocksDB",
+ nullptr, nullptr,
+ rocksdb_db_options->table_cache_numshardbits,
+ // LRUCache limits this to 19 bits, anything greater
+ // fails to create a cache and returns a nullptr
+ /* min */ 0, /* max */ 19, 0);
+
+static MYSQL_SYSVAR_UINT64_T(wal_ttl_seconds, rocksdb_db_options->WAL_ttl_seconds,
+ PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
+ "DBOptions::WAL_ttl_seconds for RocksDB", nullptr,
+ nullptr, rocksdb_db_options->WAL_ttl_seconds,
+ /* min */ 0L, /* max */ LONGLONG_MAX, 0);
+
+static MYSQL_SYSVAR_UINT64_T(wal_size_limit_mb,
+ rocksdb_db_options->WAL_size_limit_MB,
+ PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
+ "DBOptions::WAL_size_limit_MB for RocksDB", nullptr,
+ nullptr, rocksdb_db_options->WAL_size_limit_MB,
+ /* min */ 0L, /* max */ LONGLONG_MAX, 0);
+
+static MYSQL_SYSVAR_SIZE_T(manifest_preallocation_size,
+ rocksdb_db_options->manifest_preallocation_size,
+ PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
+ "DBOptions::manifest_preallocation_size for RocksDB",
+ nullptr, nullptr,
+ rocksdb_db_options->manifest_preallocation_size,
+ /* min */ 0L, /* max */ SIZE_T_MAX, 0);
+
+static MYSQL_SYSVAR_BOOL(
+ use_direct_reads,
+ *reinterpret_cast<my_bool *>(&rocksdb_db_options->use_direct_reads),
+ PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
+ "DBOptions::use_direct_reads for RocksDB", nullptr, nullptr,
+ rocksdb_db_options->use_direct_reads);
+
+static MYSQL_SYSVAR_BOOL(
+ use_direct_io_for_flush_and_compaction,
+ *reinterpret_cast<my_bool *>(&rocksdb_db_options->use_direct_io_for_flush_and_compaction),
+ PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
+ "DBOptions::use_direct_io_for_flush_and_compaction for RocksDB", nullptr, nullptr,
+ rocksdb_db_options->use_direct_io_for_flush_and_compaction);
+
+static MYSQL_SYSVAR_BOOL(
+ allow_mmap_reads,
+ *reinterpret_cast<my_bool *>(&rocksdb_db_options->allow_mmap_reads),
+ PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
+ "DBOptions::allow_mmap_reads for RocksDB", nullptr, nullptr,
+ rocksdb_db_options->allow_mmap_reads);
+
+static MYSQL_SYSVAR_BOOL(
+ allow_mmap_writes,
+ *reinterpret_cast<my_bool *>(&rocksdb_db_options->allow_mmap_writes),
+ PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
+ "DBOptions::allow_mmap_writes for RocksDB", nullptr, nullptr,
+ rocksdb_db_options->allow_mmap_writes);
+
+static MYSQL_SYSVAR_BOOL(
+ is_fd_close_on_exec,
+ *reinterpret_cast<my_bool *>(&rocksdb_db_options->is_fd_close_on_exec),
+ PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
+ "DBOptions::is_fd_close_on_exec for RocksDB", nullptr, nullptr,
+ rocksdb_db_options->is_fd_close_on_exec);
+
+static MYSQL_SYSVAR_UINT(stats_dump_period_sec,
+ rocksdb_db_options->stats_dump_period_sec,
+ PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
+ "DBOptions::stats_dump_period_sec for RocksDB",
+ nullptr, nullptr,
+ rocksdb_db_options->stats_dump_period_sec,
+ /* min */ 0, /* max */ INT_MAX, 0);
+
+static MYSQL_SYSVAR_BOOL(
+ advise_random_on_open,
+ *reinterpret_cast<my_bool *>(&rocksdb_db_options->advise_random_on_open),
+ PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
+ "DBOptions::advise_random_on_open for RocksDB", nullptr, nullptr,
+ rocksdb_db_options->advise_random_on_open);
+
+static MYSQL_SYSVAR_SIZE_T(db_write_buffer_size,
+ rocksdb_db_options->db_write_buffer_size,
+ PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
+ "DBOptions::db_write_buffer_size for RocksDB",
+ nullptr, nullptr,
+ rocksdb_db_options->db_write_buffer_size,
+ /* min */ 0L, /* max */ SIZE_T_MAX, 0);
+
+static MYSQL_SYSVAR_BOOL(
+ use_adaptive_mutex,
+ *reinterpret_cast<my_bool *>(&rocksdb_db_options->use_adaptive_mutex),
+ PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
+ "DBOptions::use_adaptive_mutex for RocksDB", nullptr, nullptr,
+ rocksdb_db_options->use_adaptive_mutex);
+
+static MYSQL_SYSVAR_UINT64_T(bytes_per_sync, rocksdb_db_options->bytes_per_sync,
+ PLUGIN_VAR_RQCMDARG,
+ "DBOptions::bytes_per_sync for RocksDB", nullptr,
+ rocksdb_set_bytes_per_sync,
+ rocksdb_db_options->bytes_per_sync,
+ /* min */ 0L, /* max */ ULONGLONG_MAX, 0);
+
+static MYSQL_SYSVAR_UINT64_T(wal_bytes_per_sync,
+ rocksdb_db_options->wal_bytes_per_sync,
+ PLUGIN_VAR_RQCMDARG,
+ "DBOptions::wal_bytes_per_sync for RocksDB", nullptr,
+ rocksdb_set_wal_bytes_per_sync,
+ rocksdb_db_options->wal_bytes_per_sync,
+ /* min */ 0L, /* max */ ULONGLONG_MAX, 0);
+
+static MYSQL_SYSVAR_BOOL(
+ enable_thread_tracking,
+ *reinterpret_cast<my_bool *>(&rocksdb_db_options->enable_thread_tracking),
+ PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
+ "DBOptions::enable_thread_tracking for RocksDB", nullptr, nullptr, true);
+
+static MYSQL_SYSVAR_LONGLONG(block_cache_size, rocksdb_block_cache_size,
+ PLUGIN_VAR_RQCMDARG,
+ "block_cache size for RocksDB",
+ rocksdb_validate_set_block_cache_size, nullptr,
+ /* default */ RDB_DEFAULT_BLOCK_CACHE_SIZE,
+ /* min */ RDB_MIN_BLOCK_CACHE_SIZE,
+ /* max */ LLONG_MAX,
+ /* Block size */ RDB_MIN_BLOCK_CACHE_SIZE);
+
+static MYSQL_SYSVAR_LONGLONG(sim_cache_size, rocksdb_sim_cache_size,
+ PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
+ "Simulated cache size for RocksDB", nullptr,
+ nullptr,
+ /* default */ 0,
+ /* min */ 0,
+ /* max */ LLONG_MAX,
+ /* Block size */ 0);
+
+static MYSQL_SYSVAR_BOOL(
+ use_clock_cache, rocksdb_use_clock_cache,
+ PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
+ "Use ClockCache instead of default LRUCache for RocksDB", nullptr, nullptr,
+ false);
+
+static MYSQL_SYSVAR_BOOL(cache_dump, rocksdb_cache_dump,
+ PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
+ "Include RocksDB block cache content in core dump.",
+ nullptr, nullptr, true);
+
+static MYSQL_SYSVAR_DOUBLE(cache_high_pri_pool_ratio,
+ rocksdb_cache_high_pri_pool_ratio,
+ PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
+ "Specify the size of block cache high-pri pool",
+ nullptr, nullptr, /* default */ 0.0, /* min */ 0.0,
+ /* max */ 1.0, 0);
+
+static MYSQL_SYSVAR_BOOL(
+ cache_index_and_filter_blocks,
+ *reinterpret_cast<my_bool *>(
+ &rocksdb_tbl_options->cache_index_and_filter_blocks),
+ PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
+ "BlockBasedTableOptions::cache_index_and_filter_blocks for RocksDB",
+ nullptr, nullptr, true);
+
+static MYSQL_SYSVAR_BOOL(
+ cache_index_and_filter_with_high_priority,
+ *reinterpret_cast<my_bool *>(
+ &rocksdb_tbl_options->cache_index_and_filter_blocks_with_high_priority),
+ PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
+ "cache_index_and_filter_blocks_with_high_priority for RocksDB", nullptr,
+ nullptr, true);
+
+// When pin_l0_filter_and_index_blocks_in_cache is true, RocksDB will use the
+// LRU cache, but will always keep the filter & idndex block's handle checked
+// out (=won't call ShardedLRUCache::Release), plus the parsed out objects
+// the LRU cache will never push flush them out, hence they're pinned.
+//
+// This fixes the mutex contention between :ShardedLRUCache::Lookup and
+// ShardedLRUCache::Release which reduced the QPS ratio (QPS using secondary
+// index / QPS using PK).
+static MYSQL_SYSVAR_BOOL(
+ pin_l0_filter_and_index_blocks_in_cache,
+ *reinterpret_cast<my_bool *>(
+ &rocksdb_tbl_options->pin_l0_filter_and_index_blocks_in_cache),
+ PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
+ "pin_l0_filter_and_index_blocks_in_cache for RocksDB", nullptr, nullptr,
+ true);
+
+static MYSQL_SYSVAR_ENUM(index_type, rocksdb_index_type,
+ PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
+ "BlockBasedTableOptions::index_type for RocksDB",
+ nullptr, nullptr,
+ (ulong)rocksdb_tbl_options->index_type,
+ &index_type_typelib);
+
+static MYSQL_SYSVAR_BOOL(
+ hash_index_allow_collision,
+ *reinterpret_cast<my_bool *>(
+ &rocksdb_tbl_options->hash_index_allow_collision),
+ PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
+ "BlockBasedTableOptions::hash_index_allow_collision for RocksDB", nullptr,
+ nullptr, rocksdb_tbl_options->hash_index_allow_collision);
+
+static MYSQL_SYSVAR_BOOL(
+ no_block_cache,
+ *reinterpret_cast<my_bool *>(&rocksdb_tbl_options->no_block_cache),
+ PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
+ "BlockBasedTableOptions::no_block_cache for RocksDB", nullptr, nullptr,
+ rocksdb_tbl_options->no_block_cache);
+
+static MYSQL_SYSVAR_SIZE_T(block_size, rocksdb_tbl_options->block_size,
+ PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
+ "BlockBasedTableOptions::block_size for RocksDB",
+ nullptr, nullptr, rocksdb_tbl_options->block_size,
+ /* min */ 1L, /* max */ SIZE_T_MAX, 0);
+
+static MYSQL_SYSVAR_INT(
+ block_size_deviation, rocksdb_tbl_options->block_size_deviation,
+ PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
+ "BlockBasedTableOptions::block_size_deviation for RocksDB", nullptr,
+ nullptr, rocksdb_tbl_options->block_size_deviation,
+ /* min */ 0, /* max */ INT_MAX, 0);
+
+static MYSQL_SYSVAR_INT(
+ block_restart_interval, rocksdb_tbl_options->block_restart_interval,
+ PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
+ "BlockBasedTableOptions::block_restart_interval for RocksDB", nullptr,
+ nullptr, rocksdb_tbl_options->block_restart_interval,
+ /* min */ 1, /* max */ INT_MAX, 0);
+
+static MYSQL_SYSVAR_BOOL(
+ whole_key_filtering,
+ *reinterpret_cast<my_bool *>(&rocksdb_tbl_options->whole_key_filtering),
+ PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
+ "BlockBasedTableOptions::whole_key_filtering for RocksDB", nullptr, nullptr,
+ rocksdb_tbl_options->whole_key_filtering);
+
+static MYSQL_SYSVAR_STR(default_cf_options, rocksdb_default_cf_options,
+ PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
+ "default cf options for RocksDB", nullptr, nullptr, "");
+
+static MYSQL_SYSVAR_STR(override_cf_options, rocksdb_override_cf_options,
+ PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
+ "option overrides per cf for RocksDB", nullptr, nullptr,
+ "");
+
+static MYSQL_SYSVAR_STR(update_cf_options, rocksdb_update_cf_options,
+ PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_MEMALLOC
+ /* psergey-merge: need this? : PLUGIN_VAR_ALLOCATED*/,
+ "Option updates per column family for RocksDB",
+ rocksdb_validate_update_cf_options,
+ rocksdb_set_update_cf_options, nullptr);
+
+static MYSQL_SYSVAR_UINT(flush_log_at_trx_commit,
+ rocksdb_flush_log_at_trx_commit, PLUGIN_VAR_RQCMDARG,
+ "Sync on transaction commit. Similar to "
+ "innodb_flush_log_at_trx_commit. 1: sync on commit, "
+ "0,2: not sync on commit",
+ rocksdb_validate_flush_log_at_trx_commit, nullptr,
+ /* default */ FLUSH_LOG_SYNC,
+ /* min */ FLUSH_LOG_NEVER,
+ /* max */ FLUSH_LOG_BACKGROUND, 0);
+
+static MYSQL_THDVAR_BOOL(write_disable_wal, PLUGIN_VAR_RQCMDARG,
+ "WriteOptions::disableWAL for RocksDB", nullptr,
+ nullptr, rocksdb::WriteOptions().disableWAL);
+
+static MYSQL_THDVAR_BOOL(
+ write_ignore_missing_column_families, PLUGIN_VAR_RQCMDARG,
+ "WriteOptions::ignore_missing_column_families for RocksDB", nullptr,
+ nullptr, rocksdb::WriteOptions().ignore_missing_column_families);
+
+static MYSQL_THDVAR_BOOL(skip_fill_cache, PLUGIN_VAR_RQCMDARG,
+ "Skip filling block cache on read requests", nullptr,
+ nullptr, FALSE);
+
+static MYSQL_THDVAR_BOOL(
+ unsafe_for_binlog, PLUGIN_VAR_RQCMDARG,
+ "Allowing statement based binary logging which may break consistency",
+ nullptr, nullptr, FALSE);
+
+static MYSQL_THDVAR_UINT(records_in_range, PLUGIN_VAR_RQCMDARG,
+ "Used to override the result of records_in_range(). "
+ "Set to a positive number to override",
+ nullptr, nullptr, 0,
+ /* min */ 0, /* max */ INT_MAX, 0);
+
+static MYSQL_THDVAR_UINT(force_index_records_in_range, PLUGIN_VAR_RQCMDARG,
+ "Used to override the result of records_in_range() "
+ "when FORCE INDEX is used.",
+ nullptr, nullptr, 0,
+ /* min */ 0, /* max */ INT_MAX, 0);
+
+static MYSQL_SYSVAR_UINT(
+ debug_optimizer_n_rows, rocksdb_debug_optimizer_n_rows,
+ PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY | PLUGIN_VAR_NOSYSVAR,
+ "Test only to override rocksdb estimates of table size in a memtable",
+ nullptr, nullptr, 0, /* min */ 0, /* max */ INT_MAX, 0);
+
+static MYSQL_SYSVAR_BOOL(force_compute_memtable_stats,
+ rocksdb_force_compute_memtable_stats,
+ PLUGIN_VAR_RQCMDARG,
+ "Force to always compute memtable stats", nullptr,
+ nullptr, TRUE);
+
+static MYSQL_SYSVAR_UINT(force_compute_memtable_stats_cachetime,
+ rocksdb_force_compute_memtable_stats_cachetime,
+ PLUGIN_VAR_RQCMDARG,
+ "Time in usecs to cache memtable estimates", nullptr,
+ nullptr, /* default */ 60 * 1000 * 1000,
+ /* min */ 0, /* max */ INT_MAX, 0);
+
+static MYSQL_SYSVAR_BOOL(
+ debug_optimizer_no_zero_cardinality,
+ rocksdb_debug_optimizer_no_zero_cardinality, PLUGIN_VAR_RQCMDARG,
+ "In case if cardinality is zero, overrides it with some value", nullptr,
+ nullptr, TRUE);
+
+static MYSQL_SYSVAR_STR(compact_cf, rocksdb_compact_cf_name,
+ PLUGIN_VAR_RQCMDARG, "Compact column family",
+ rocksdb_compact_column_family,
+ rocksdb_compact_column_family_stub, "");
+
+static MYSQL_SYSVAR_STR(delete_cf, rocksdb_delete_cf_name, PLUGIN_VAR_RQCMDARG,
+ "Delete column family", rocksdb_delete_column_family,
+ rocksdb_delete_column_family_stub, "");
+
+static MYSQL_SYSVAR_STR(create_checkpoint, rocksdb_checkpoint_name,
+ PLUGIN_VAR_RQCMDARG, "Checkpoint directory",
+ rocksdb_create_checkpoint,
+ rocksdb_create_checkpoint_stub, "");
+
+static MYSQL_SYSVAR_BOOL(remove_mariabackup_checkpoint,
+ rocksdb_signal_remove_mariabackup_checkpoint,
+ PLUGIN_VAR_RQCMDARG, "Remove mariabackup checkpoint",
+ nullptr, rocksdb_remove_mariabackup_checkpoint, FALSE);
+
+static MYSQL_SYSVAR_BOOL(signal_drop_index_thread,
+ rocksdb_signal_drop_index_thread, PLUGIN_VAR_RQCMDARG,
+ "Wake up drop index thread", nullptr,
+ rocksdb_drop_index_wakeup_thread, FALSE);
+
+static MYSQL_SYSVAR_BOOL(pause_background_work, rocksdb_pause_background_work,
+ PLUGIN_VAR_RQCMDARG,
+ "Disable all rocksdb background operations", nullptr,
+ rocksdb_set_pause_background_work, FALSE);
+
+static MYSQL_SYSVAR_BOOL(
+ enable_ttl, rocksdb_enable_ttl, PLUGIN_VAR_RQCMDARG,
+ "Enable expired TTL records to be dropped during compaction.", nullptr,
+ nullptr, TRUE);
+
+static MYSQL_SYSVAR_BOOL(
+ enable_ttl_read_filtering, rocksdb_enable_ttl_read_filtering,
+ PLUGIN_VAR_RQCMDARG,
+ "For tables with TTL, expired records are skipped/filtered out during "
+ "processing and in query results. Disabling this will allow these records "
+ "to be seen, but as a result rows may disappear in the middle of "
+ "transactions as they are dropped during compaction. Use with caution.",
+ nullptr, nullptr, TRUE);
+
+static MYSQL_SYSVAR_INT(
+ debug_ttl_rec_ts, rocksdb_debug_ttl_rec_ts, PLUGIN_VAR_RQCMDARG,
+ "For debugging purposes only. Overrides the TTL of records to "
+ "now() + debug_ttl_rec_ts. The value can be +/- to simulate "
+ "a record inserted in the past vs a record inserted in the 'future'. "
+ "A value of 0 denotes that the variable is not set. This variable is a "
+ "no-op in non-debug builds.",
+ nullptr, nullptr, 0, /* min */ -3600, /* max */ 3600, 0);
+
+static MYSQL_SYSVAR_INT(
+ debug_ttl_snapshot_ts, rocksdb_debug_ttl_snapshot_ts, PLUGIN_VAR_RQCMDARG,
+ "For debugging purposes only. Sets the snapshot during compaction to "
+ "now() + debug_set_ttl_snapshot_ts. The value can be +/- to simulate "
+ "a snapshot in the past vs a snapshot created in the 'future'. "
+ "A value of 0 denotes that the variable is not set. This variable is a "
+ "no-op in non-debug builds.",
+ nullptr, nullptr, 0, /* min */ -3600, /* max */ 3600, 0);
+
+static MYSQL_SYSVAR_INT(
+ debug_ttl_read_filter_ts, rocksdb_debug_ttl_read_filter_ts,
+ PLUGIN_VAR_RQCMDARG,
+ "For debugging purposes only. Overrides the TTL read filtering time to "
+ "time + debug_ttl_read_filter_ts. A value of 0 denotes that the variable "
+ "is not set. This variable is a no-op in non-debug builds.",
+ nullptr, nullptr, 0, /* min */ -3600, /* max */ 3600, 0);
+
+static MYSQL_SYSVAR_BOOL(
+ debug_ttl_ignore_pk, rocksdb_debug_ttl_ignore_pk, PLUGIN_VAR_RQCMDARG,
+ "For debugging purposes only. If true, compaction filtering will not occur "
+ "on PK TTL data. This variable is a no-op in non-debug builds.",
+ nullptr, nullptr, FALSE);
+
+static MYSQL_SYSVAR_UINT(
+ max_manual_compactions, rocksdb_max_manual_compactions, PLUGIN_VAR_RQCMDARG,
+ "Maximum number of pending + ongoing number of manual compactions.",
+ nullptr, nullptr, /* default */ 10, /* min */ 0, /* max */ UINT_MAX, 0);
+
+static MYSQL_SYSVAR_BOOL(
+ rollback_on_timeout, rocksdb_rollback_on_timeout, PLUGIN_VAR_OPCMDARG,
+ "Whether to roll back the complete transaction or a single statement on "
+ "lock wait timeout (a single statement by default)",
+ NULL, NULL, FALSE);
+
+static MYSQL_SYSVAR_UINT(
+ debug_manual_compaction_delay, rocksdb_debug_manual_compaction_delay,
+ PLUGIN_VAR_RQCMDARG,
+ "For debugging purposes only. Sleeping specified seconds "
+ "for simulating long running compactions.",
+ nullptr, nullptr, 0, /* min */ 0, /* max */ UINT_MAX, 0);
+
+static MYSQL_SYSVAR_BOOL(
+ reset_stats, rocksdb_reset_stats, PLUGIN_VAR_RQCMDARG,
+ "Reset the RocksDB internal statistics without restarting the DB.", nullptr,
+ rocksdb_set_reset_stats, FALSE);
+
+static MYSQL_SYSVAR_UINT(io_write_timeout, rocksdb_io_write_timeout_secs,
+ PLUGIN_VAR_RQCMDARG,
+ "Timeout for experimental I/O watchdog.", nullptr,
+ rocksdb_set_io_write_timeout, /* default */ 0,
+ /* min */ 0L,
+ /* max */ UINT_MAX, 0);
+
+static MYSQL_SYSVAR_BOOL(enable_2pc, rocksdb_enable_2pc, PLUGIN_VAR_RQCMDARG,
+ "Enable two phase commit for MyRocks", nullptr,
+ nullptr, TRUE);
+
+static MYSQL_SYSVAR_BOOL(ignore_unknown_options, rocksdb_ignore_unknown_options,
+ PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_READONLY,
+ "Enable ignoring unknown options passed to RocksDB",
+ nullptr, nullptr, TRUE);
+
+static MYSQL_SYSVAR_BOOL(strict_collation_check, rocksdb_strict_collation_check,
+ PLUGIN_VAR_RQCMDARG,
+ "Enforce case sensitive collation for MyRocks indexes",
+ nullptr, nullptr, TRUE);
+
+static MYSQL_SYSVAR_STR(strict_collation_exceptions,
+ rocksdb_strict_collation_exceptions,
+ PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_MEMALLOC,
+ "List of tables (using regex) that are excluded "
+ "from the case sensitive collation enforcement",
+ nullptr, rocksdb_set_collation_exception_list, "");
+
+static MYSQL_SYSVAR_BOOL(collect_sst_properties, rocksdb_collect_sst_properties,
+ PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
+ "Enables collecting SST file properties on each flush",
+ nullptr, nullptr, rocksdb_collect_sst_properties);
+
+static MYSQL_SYSVAR_BOOL(
+ force_flush_memtable_now, rocksdb_force_flush_memtable_now_var,
+ PLUGIN_VAR_RQCMDARG,
+ "Forces memstore flush which may block all write requests so be careful",
+ rocksdb_force_flush_memtable_now, rocksdb_force_flush_memtable_now_stub,
+ FALSE);
+
+static MYSQL_SYSVAR_BOOL(
+ force_flush_memtable_and_lzero_now,
+ rocksdb_force_flush_memtable_and_lzero_now_var, PLUGIN_VAR_RQCMDARG,
+ "Acts similar to force_flush_memtable_now, but also compacts all L0 files.",
+ rocksdb_force_flush_memtable_and_lzero_now,
+ rocksdb_force_flush_memtable_and_lzero_now_stub, FALSE);
+
+static MYSQL_SYSVAR_UINT(
+ seconds_between_stat_computes, rocksdb_seconds_between_stat_computes,
+ PLUGIN_VAR_RQCMDARG,
+ "Sets a number of seconds to wait between optimizer stats recomputation. "
+ "Only changed indexes will be refreshed.",
+ nullptr, nullptr, rocksdb_seconds_between_stat_computes,
+ /* min */ 0L, /* max */ UINT_MAX, 0);
+
+static MYSQL_SYSVAR_LONGLONG(compaction_sequential_deletes,
+ rocksdb_compaction_sequential_deletes,
+ PLUGIN_VAR_RQCMDARG,
+ "RocksDB will trigger compaction for the file if "
+ "it has more than this number sequential deletes "
+ "per window",
+ nullptr, rocksdb_set_compaction_options,
+ DEFAULT_COMPACTION_SEQUENTIAL_DELETES,
+ /* min */ 0L,
+ /* max */ MAX_COMPACTION_SEQUENTIAL_DELETES, 0);
+
+static MYSQL_SYSVAR_LONGLONG(
+ compaction_sequential_deletes_window,
+ rocksdb_compaction_sequential_deletes_window, PLUGIN_VAR_RQCMDARG,
+ "Size of the window for counting rocksdb_compaction_sequential_deletes",
+ nullptr, rocksdb_set_compaction_options,
+ DEFAULT_COMPACTION_SEQUENTIAL_DELETES_WINDOW,
+ /* min */ 0L, /* max */ MAX_COMPACTION_SEQUENTIAL_DELETES_WINDOW, 0);
+
+static MYSQL_SYSVAR_LONGLONG(
+ compaction_sequential_deletes_file_size,
+ rocksdb_compaction_sequential_deletes_file_size, PLUGIN_VAR_RQCMDARG,
+ "Minimum file size required for compaction_sequential_deletes", nullptr,
+ rocksdb_set_compaction_options, 0L,
+ /* min */ -1L, /* max */ LLONG_MAX, 0);
+
+static MYSQL_SYSVAR_BOOL(
+ compaction_sequential_deletes_count_sd,
+ rocksdb_compaction_sequential_deletes_count_sd, PLUGIN_VAR_RQCMDARG,
+ "Counting SingleDelete as rocksdb_compaction_sequential_deletes", nullptr,
+ nullptr, rocksdb_compaction_sequential_deletes_count_sd);
+
+static MYSQL_SYSVAR_BOOL(
+ print_snapshot_conflict_queries, rocksdb_print_snapshot_conflict_queries,
+ PLUGIN_VAR_RQCMDARG,
+ "Logging queries that got snapshot conflict errors into *.err log", nullptr,
+ nullptr, rocksdb_print_snapshot_conflict_queries);
+
+static MYSQL_THDVAR_INT(checksums_pct, PLUGIN_VAR_RQCMDARG,
+ "How many percentages of rows to be checksummed",
+ nullptr, nullptr, RDB_MAX_CHECKSUMS_PCT,
+ /* min */ 0, /* max */ RDB_MAX_CHECKSUMS_PCT, 0);
+
+static MYSQL_THDVAR_BOOL(store_row_debug_checksums, PLUGIN_VAR_RQCMDARG,
+ "Include checksums when writing index/table records",
+ nullptr, nullptr, false /* default value */);
+
+static MYSQL_THDVAR_BOOL(verify_row_debug_checksums, PLUGIN_VAR_RQCMDARG,
+ "Verify checksums when reading index/table records",
+ nullptr, nullptr, false /* default value */);
+
+static MYSQL_THDVAR_BOOL(master_skip_tx_api, PLUGIN_VAR_RQCMDARG,
+ "Skipping holding any lock on row access. "
+ "Not effective on slave.",
+ nullptr, nullptr, false);
+
+static MYSQL_SYSVAR_UINT(
+ validate_tables, rocksdb_validate_tables,
+ PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
+ "Verify all .frm files match all RocksDB tables (0 means no verification, "
+ "1 means verify and fail on error, and 2 means verify but continue",
+ nullptr, nullptr, 1 /* default value */, 0 /* min value */,
+ 2 /* max value */, 0);
+
+static MYSQL_SYSVAR_STR(datadir, rocksdb_datadir,
+ PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_READONLY,
+ "RocksDB data directory", nullptr, nullptr,
+ "./#rocksdb");
+
+static MYSQL_SYSVAR_STR(supported_compression_types,
+ compression_types_val,
+ PLUGIN_VAR_NOCMDOPT | PLUGIN_VAR_READONLY,
+ "Compression algorithms supported by RocksDB",
+ nullptr, nullptr,
+ compression_types_val);
+
+static MYSQL_SYSVAR_UINT(
+ table_stats_sampling_pct, rocksdb_table_stats_sampling_pct,
+ PLUGIN_VAR_RQCMDARG,
+ "Percentage of entries to sample when collecting statistics about table "
+ "properties. Specify either 0 to sample everything or percentage "
+ "[" STRINGIFY_ARG(RDB_TBL_STATS_SAMPLE_PCT_MIN) ".." STRINGIFY_ARG(
+ RDB_TBL_STATS_SAMPLE_PCT_MAX) "]. "
+ "By default " STRINGIFY_ARG(
+ RDB_DEFAULT_TBL_STATS_SAMPLE_PCT) "% "
+ "of"
+ " e"
+ "nt"
+ "ri"
+ "es"
+ " a"
+ "re"
+ " "
+ "sa"
+ "mp"
+ "le"
+ "d"
+ ".",
+ nullptr, rocksdb_set_table_stats_sampling_pct, /* default */
+ RDB_DEFAULT_TBL_STATS_SAMPLE_PCT, /* everything */ 0,
+ /* max */ RDB_TBL_STATS_SAMPLE_PCT_MAX, 0);
+
+static MYSQL_SYSVAR_UINT(
+ stats_recalc_rate, rocksdb_stats_recalc_rate, PLUGIN_VAR_RQCMDARG,
+ "The number of indexes per second to recalculate statistics for. 0 to "
+ "disable background recalculation.",
+ nullptr, nullptr, 0 /* default value */, 0 /* min value */,
+ UINT_MAX /* max value */, 0);
+
+static MYSQL_SYSVAR_BOOL(
+ large_prefix, rocksdb_large_prefix, PLUGIN_VAR_RQCMDARG,
+ "Support large index prefix length of 3072 bytes. If off, the maximum "
+ "index prefix length is 767.",
+ nullptr, nullptr, FALSE);
+
+static MYSQL_SYSVAR_BOOL(
+ allow_to_start_after_corruption, rocksdb_allow_to_start_after_corruption,
+ PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_READONLY,
+ "Allow server still to start successfully even if RocksDB corruption is "
+ "detected.",
+ nullptr, nullptr, FALSE);
+
+static MYSQL_SYSVAR_BOOL(error_on_suboptimal_collation,
+ rocksdb_error_on_suboptimal_collation,
+ PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_READONLY,
+ "Raise an error instead of warning if a sub-optimal "
+ "collation is used",
+ nullptr, nullptr, TRUE);
+
+static MYSQL_SYSVAR_BOOL(
+ enable_insert_with_update_caching,
+ rocksdb_enable_insert_with_update_caching, PLUGIN_VAR_OPCMDARG,
+ "Whether to enable optimization where we cache the read from a failed "
+ "insertion attempt in INSERT ON DUPLICATE KEY UPDATE",
+ nullptr, nullptr, TRUE);
+
+static const int ROCKSDB_ASSUMED_KEY_VALUE_DISK_SIZE = 100;
+
+static struct st_mysql_sys_var *rocksdb_system_variables[] = {
+ MYSQL_SYSVAR(lock_wait_timeout),
+ MYSQL_SYSVAR(deadlock_detect),
+ MYSQL_SYSVAR(deadlock_detect_depth),
+ MYSQL_SYSVAR(commit_time_batch_for_recovery),
+ MYSQL_SYSVAR(max_row_locks),
+ MYSQL_SYSVAR(write_batch_max_bytes),
+ MYSQL_SYSVAR(lock_scanned_rows),
+ MYSQL_SYSVAR(bulk_load),
+ MYSQL_SYSVAR(bulk_load_allow_sk),
+ MYSQL_SYSVAR(bulk_load_allow_unsorted),
+ MYSQL_SYSVAR(skip_unique_check_tables),
+ MYSQL_SYSVAR(trace_sst_api),
+ MYSQL_SYSVAR(commit_in_the_middle),
+ MYSQL_SYSVAR(blind_delete_primary_key),
+#if 0 // MARIAROCKS_NOT_YET : read-free replication is not supported
+ MYSQL_SYSVAR(read_free_rpl_tables),
+ MYSQL_SYSVAR(read_free_rpl),
+#endif
+ MYSQL_SYSVAR(bulk_load_size),
+ MYSQL_SYSVAR(merge_buf_size),
+ MYSQL_SYSVAR(enable_bulk_load_api),
+ MYSQL_SYSVAR(tmpdir),
+ MYSQL_SYSVAR(merge_combine_read_size),
+ MYSQL_SYSVAR(merge_tmp_file_removal_delay_ms),
+ MYSQL_SYSVAR(skip_bloom_filter_on_read),
+
+ MYSQL_SYSVAR(create_if_missing),
+ MYSQL_SYSVAR(two_write_queues),
+ MYSQL_SYSVAR(manual_wal_flush),
+ MYSQL_SYSVAR(write_policy),
+ MYSQL_SYSVAR(create_missing_column_families),
+ MYSQL_SYSVAR(error_if_exists),
+ MYSQL_SYSVAR(paranoid_checks),
+ MYSQL_SYSVAR(rate_limiter_bytes_per_sec),
+ MYSQL_SYSVAR(sst_mgr_rate_bytes_per_sec),
+ MYSQL_SYSVAR(delayed_write_rate),
+ MYSQL_SYSVAR(max_latest_deadlocks),
+ MYSQL_SYSVAR(info_log_level),
+ MYSQL_SYSVAR(max_open_files),
+ MYSQL_SYSVAR(max_total_wal_size),
+ MYSQL_SYSVAR(use_fsync),
+ MYSQL_SYSVAR(wal_dir),
+ MYSQL_SYSVAR(persistent_cache_path),
+ MYSQL_SYSVAR(persistent_cache_size_mb),
+ MYSQL_SYSVAR(delete_obsolete_files_period_micros),
+ MYSQL_SYSVAR(max_background_jobs),
+ MYSQL_SYSVAR(max_log_file_size),
+ MYSQL_SYSVAR(max_subcompactions),
+ MYSQL_SYSVAR(log_file_time_to_roll),
+ MYSQL_SYSVAR(keep_log_file_num),
+ MYSQL_SYSVAR(max_manifest_file_size),
+ MYSQL_SYSVAR(table_cache_numshardbits),
+ MYSQL_SYSVAR(wal_ttl_seconds),
+ MYSQL_SYSVAR(wal_size_limit_mb),
+ MYSQL_SYSVAR(manifest_preallocation_size),
+ MYSQL_SYSVAR(use_direct_reads),
+ MYSQL_SYSVAR(use_direct_io_for_flush_and_compaction),
+ MYSQL_SYSVAR(allow_mmap_reads),
+ MYSQL_SYSVAR(allow_mmap_writes),
+ MYSQL_SYSVAR(is_fd_close_on_exec),
+ MYSQL_SYSVAR(stats_dump_period_sec),
+ MYSQL_SYSVAR(advise_random_on_open),
+ MYSQL_SYSVAR(db_write_buffer_size),
+ MYSQL_SYSVAR(use_adaptive_mutex),
+ MYSQL_SYSVAR(bytes_per_sync),
+ MYSQL_SYSVAR(wal_bytes_per_sync),
+ MYSQL_SYSVAR(enable_thread_tracking),
+ MYSQL_SYSVAR(perf_context_level),
+ MYSQL_SYSVAR(wal_recovery_mode),
+ MYSQL_SYSVAR(stats_level),
+ MYSQL_SYSVAR(access_hint_on_compaction_start),
+ MYSQL_SYSVAR(new_table_reader_for_compaction_inputs),
+ MYSQL_SYSVAR(compaction_readahead_size),
+ MYSQL_SYSVAR(allow_concurrent_memtable_write),
+ MYSQL_SYSVAR(enable_write_thread_adaptive_yield),
+
+ MYSQL_SYSVAR(block_cache_size),
+ MYSQL_SYSVAR(sim_cache_size),
+ MYSQL_SYSVAR(use_clock_cache),
+ MYSQL_SYSVAR(cache_high_pri_pool_ratio),
+ MYSQL_SYSVAR(cache_dump),
+ MYSQL_SYSVAR(cache_index_and_filter_blocks),
+ MYSQL_SYSVAR(cache_index_and_filter_with_high_priority),
+ MYSQL_SYSVAR(pin_l0_filter_and_index_blocks_in_cache),
+ MYSQL_SYSVAR(index_type),
+ MYSQL_SYSVAR(hash_index_allow_collision),
+ MYSQL_SYSVAR(no_block_cache),
+ MYSQL_SYSVAR(block_size),
+ MYSQL_SYSVAR(block_size_deviation),
+ MYSQL_SYSVAR(block_restart_interval),
+ MYSQL_SYSVAR(whole_key_filtering),
+
+ MYSQL_SYSVAR(default_cf_options),
+ MYSQL_SYSVAR(override_cf_options),
+ MYSQL_SYSVAR(update_cf_options),
+
+ MYSQL_SYSVAR(flush_log_at_trx_commit),
+ MYSQL_SYSVAR(write_disable_wal),
+ MYSQL_SYSVAR(write_ignore_missing_column_families),
+
+ MYSQL_SYSVAR(skip_fill_cache),
+ MYSQL_SYSVAR(unsafe_for_binlog),
+
+ MYSQL_SYSVAR(records_in_range),
+ MYSQL_SYSVAR(force_index_records_in_range),
+ MYSQL_SYSVAR(debug_optimizer_n_rows),
+ MYSQL_SYSVAR(force_compute_memtable_stats),
+ MYSQL_SYSVAR(force_compute_memtable_stats_cachetime),
+ MYSQL_SYSVAR(debug_optimizer_no_zero_cardinality),
+
+ MYSQL_SYSVAR(compact_cf),
+ MYSQL_SYSVAR(delete_cf),
+ MYSQL_SYSVAR(signal_drop_index_thread),
+ MYSQL_SYSVAR(pause_background_work),
+ MYSQL_SYSVAR(enable_2pc),
+ MYSQL_SYSVAR(ignore_unknown_options),
+ MYSQL_SYSVAR(strict_collation_check),
+ MYSQL_SYSVAR(strict_collation_exceptions),
+ MYSQL_SYSVAR(collect_sst_properties),
+ MYSQL_SYSVAR(force_flush_memtable_now),
+ MYSQL_SYSVAR(force_flush_memtable_and_lzero_now),
+ MYSQL_SYSVAR(enable_ttl),
+ MYSQL_SYSVAR(enable_ttl_read_filtering),
+ MYSQL_SYSVAR(debug_ttl_rec_ts),
+ MYSQL_SYSVAR(debug_ttl_snapshot_ts),
+ MYSQL_SYSVAR(debug_ttl_read_filter_ts),
+ MYSQL_SYSVAR(debug_ttl_ignore_pk),
+ MYSQL_SYSVAR(reset_stats),
+ MYSQL_SYSVAR(io_write_timeout),
+ MYSQL_SYSVAR(seconds_between_stat_computes),
+
+ MYSQL_SYSVAR(compaction_sequential_deletes),
+ MYSQL_SYSVAR(compaction_sequential_deletes_window),
+ MYSQL_SYSVAR(compaction_sequential_deletes_file_size),
+ MYSQL_SYSVAR(compaction_sequential_deletes_count_sd),
+ MYSQL_SYSVAR(print_snapshot_conflict_queries),
+
+ MYSQL_SYSVAR(datadir),
+ MYSQL_SYSVAR(supported_compression_types),
+ MYSQL_SYSVAR(create_checkpoint),
+ MYSQL_SYSVAR(remove_mariabackup_checkpoint),
+ MYSQL_SYSVAR(checksums_pct),
+ MYSQL_SYSVAR(store_row_debug_checksums),
+ MYSQL_SYSVAR(verify_row_debug_checksums),
+ MYSQL_SYSVAR(master_skip_tx_api),
+
+ MYSQL_SYSVAR(validate_tables),
+ MYSQL_SYSVAR(table_stats_sampling_pct),
+
+ MYSQL_SYSVAR(large_prefix),
+ MYSQL_SYSVAR(allow_to_start_after_corruption),
+ MYSQL_SYSVAR(git_hash),
+ MYSQL_SYSVAR(error_on_suboptimal_collation),
+ MYSQL_SYSVAR(stats_recalc_rate),
+ MYSQL_SYSVAR(debug_manual_compaction_delay),
+ MYSQL_SYSVAR(max_manual_compactions),
+ MYSQL_SYSVAR(manual_compaction_threads),
+ MYSQL_SYSVAR(rollback_on_timeout),
+
+ MYSQL_SYSVAR(enable_insert_with_update_caching),
+ nullptr};
+
+static rocksdb::WriteOptions rdb_get_rocksdb_write_options(
+ my_core::THD *const thd) {
+ rocksdb::WriteOptions opt;
+
+ opt.sync = (rocksdb_flush_log_at_trx_commit == FLUSH_LOG_SYNC);
+ opt.disableWAL = THDVAR(thd, write_disable_wal);
+ opt.ignore_missing_column_families =
+ THDVAR(thd, write_ignore_missing_column_families);
+
+ return opt;
+}
+
+static int rocksdb_compact_column_family(THD *const thd,
+ struct st_mysql_sys_var *const var,
+ void *const var_ptr,
+ struct st_mysql_value *const value) {
+ char buff[STRING_BUFFER_USUAL_SIZE];
+ int len = sizeof(buff);
+
+ DBUG_ASSERT(value != nullptr);
+
+ if (const char *const cf = value->val_str(value, buff, &len)) {
+ auto cfh = cf_manager.get_cf(cf);
+ if (cfh != nullptr && rdb != nullptr) {
+ int mc_id = rdb_mc_thread.request_manual_compaction(
+ cfh, nullptr, nullptr, THDVAR(thd, manual_compaction_threads));
+ if (mc_id == -1) {
+ my_error(ER_INTERNAL_ERROR, MYF(0),
+ "Can't schedule more manual compactions. "
+ "Increase rocksdb_max_manual_compactions or stop issuing "
+ "more manual compactions.");
+ return HA_EXIT_FAILURE;
+ } else if (mc_id < 0) {
+ return HA_EXIT_FAILURE;
+ }
+ // NO_LINT_DEBUG
+ sql_print_information("RocksDB: Manual compaction of column family: %s\n",
+ cf);
+ // Checking thd state every short cycle (100ms). This is for allowing to
+ // exiting this function without waiting for CompactRange to finish.
+ do {
+ my_sleep(100000);
+ } while (!thd->killed &&
+ !rdb_mc_thread.is_manual_compaction_finished(mc_id));
+
+ if (thd->killed) {
+ // This cancels if requested compaction state is INITED.
+ // TODO(yoshinorim): Cancel running compaction as well once
+ // it is supported in RocksDB.
+ rdb_mc_thread.clear_manual_compaction_request(mc_id, true);
+ }
+ }
+ }
+ return HA_EXIT_SUCCESS;
+}
+
+///////////////////////////////////////////////////////////////////////////////////////////
+
+/*
+ Drop index thread's control
+*/
+
+static Rdb_drop_index_thread rdb_drop_idx_thread;
+
+static void rocksdb_drop_index_wakeup_thread(
+ my_core::THD *const thd MY_ATTRIBUTE((__unused__)),
+ struct st_mysql_sys_var *const var MY_ATTRIBUTE((__unused__)),
+ void *const var_ptr MY_ATTRIBUTE((__unused__)), const void *const save) {
+ if (*static_cast<const bool *>(save)) {
+ rdb_drop_idx_thread.signal();
+ }
+}
+
+static inline uint32_t rocksdb_perf_context_level(THD *const thd) {
+ DBUG_ASSERT(thd != nullptr);
+
+ const int session_perf_context_level = THDVAR(thd, perf_context_level);
+ if (session_perf_context_level > rocksdb::PerfLevel::kUninitialized) {
+ return session_perf_context_level;
+ }
+
+ /*
+ Fallback to global thdvar, if session specific one was not set to a valid
+ value.
+ */
+
+ const int global_perf_context_level = THDVAR(nullptr, perf_context_level);
+ if (global_perf_context_level > rocksdb::PerfLevel::kUninitialized) {
+ return global_perf_context_level;
+ }
+
+ return rocksdb::PerfLevel::kDisable;
+}
+
+/*
+ Very short (functor-like) interface to be passed to
+ Rdb_transaction::walk_tx_list()
+*/
+
+interface Rdb_tx_list_walker {
+ virtual ~Rdb_tx_list_walker() {}
+ virtual void process_tran(const Rdb_transaction *const) = 0;
+};
+
+/*
+ This is a helper class that is passed to RocksDB to get notifications when
+ a snapshot gets created.
+*/
+
+class Rdb_snapshot_notifier : public rocksdb::TransactionNotifier {
+ Rdb_transaction *m_owning_tx;
+
+ void SnapshotCreated(const rocksdb::Snapshot *snapshot) override;
+
+ public:
+ Rdb_snapshot_notifier(const Rdb_snapshot_notifier &) = delete;
+ Rdb_snapshot_notifier &operator=(const Rdb_snapshot_notifier &) = delete;
+
+ explicit Rdb_snapshot_notifier(Rdb_transaction *const owning_tx)
+ : m_owning_tx(owning_tx) {}
+
+ // If the owning Rdb_transaction gets destructed we need to not reference
+ // it anymore.
+ void detach() { m_owning_tx = nullptr; }
+};
+
+
+#ifdef MARIAROCKS_NOT_YET
+// ER_LOCK_WAIT_TIMEOUT error also has a reason in facebook/mysql-5.6
+#endif
+String timeout_message(const char *command, const char *name1,
+ const char *name2)
+{
+ String msg;
+ msg.append("Timeout on ");
+ msg.append(command);
+ msg.append(": ");
+ msg.append(name1);
+ if (name2 && name2[0])
+ {
+ msg.append(".");
+ msg.append(name2);
+ }
+ return msg;
+}
+
+
+/* This is the base class for transactions when interacting with rocksdb.
+ */
+class Rdb_transaction {
+ protected:
+ ulonglong m_write_count = 0;
+ ulonglong m_insert_count = 0;
+ ulonglong m_update_count = 0;
+ ulonglong m_delete_count = 0;
+ ulonglong m_lock_count = 0;
+ std::unordered_map<GL_INDEX_ID, ulonglong> m_auto_incr_map;
+
+ bool m_is_delayed_snapshot = false;
+ bool m_is_two_phase = false;
+
+ private:
+ /*
+ Number of write operations this transaction had when we took the last
+ savepoint (the idea is not to take another savepoint if we haven't made
+ any changes)
+ */
+ ulonglong m_writes_at_last_savepoint;
+
+ protected:
+
+protected:
+ THD *m_thd = nullptr;
+
+ static std::multiset<Rdb_transaction *> s_tx_list;
+ static mysql_mutex_t s_tx_list_mutex;
+
+ Rdb_io_perf *m_tbl_io_perf;
+
+ bool m_tx_read_only = false;
+
+ int m_timeout_sec; /* Cached value of @@rocksdb_lock_wait_timeout */
+
+ /* Maximum number of locks the transaction can have */
+ ulonglong m_max_row_locks;
+
+ bool m_is_tx_failed = false;
+ bool m_rollback_only = false;
+
+ std::shared_ptr<Rdb_snapshot_notifier> m_notifier;
+
+ // This should be used only when updating binlog information.
+ virtual rocksdb::WriteBatchBase *get_write_batch() = 0;
+ virtual bool commit_no_binlog() = 0;
+ virtual rocksdb::Iterator *get_iterator(
+ const rocksdb::ReadOptions &options,
+ rocksdb::ColumnFamilyHandle *column_family) = 0;
+
+protected:
+ /*
+ The following two are helper functions to be overloaded by child classes.
+ They should provide RocksDB's savepoint semantics.
+ */
+ virtual void do_set_savepoint() = 0;
+ virtual void do_rollback_to_savepoint() = 0;
+
+ /*
+ @detail
+ This function takes in the WriteBatch of the transaction to add
+ all the AUTO_INCREMENT merges. It does so by iterating through
+ m_auto_incr_map and then constructing key/value pairs to call merge upon.
+
+ @param wb
+ */
+ rocksdb::Status merge_auto_incr_map(rocksdb::WriteBatchBase *const wb) {
+ DBUG_EXECUTE_IF("myrocks_autoinc_upgrade", return rocksdb::Status::OK(););
+
+ // Iterate through the merge map merging all keys into data dictionary.
+ rocksdb::Status s;
+ for (auto &it : m_auto_incr_map) {
+ s = dict_manager.put_auto_incr_val(wb, it.first, it.second);
+ if (!s.ok()) {
+ return s;
+ }
+ }
+ m_auto_incr_map.clear();
+ return s;
+ }
+
+ public:
+ rocksdb::ReadOptions m_read_opts;
+ const char *m_mysql_log_file_name;
+ my_off_t m_mysql_log_offset;
+#ifdef MARIAROCKS_NOT_YET
+ // TODO: MariaDB probably doesn't need these at all:
+ const char *m_mysql_gtid;
+ const char *m_mysql_max_gtid;
+#endif
+ String m_detailed_error;
+ int64_t m_snapshot_timestamp = 0;
+ bool m_ddl_transaction;
+#ifdef MARIAROCKS_NOT_YET
+ std::shared_ptr<Rdb_explicit_snapshot> m_explicit_snapshot;
+#endif
+
+ /*
+ Tracks the number of tables in use through external_lock.
+ This should not be reset during start_tx().
+ */
+ int64_t m_n_mysql_tables_in_use = 0;
+
+ /*
+ MariaDB's group commit:
+ */
+ bool commit_ordered_done;
+ bool commit_ordered_res;
+
+ /*
+ for distinction between rdb_transaction_impl and rdb_writebatch_impl
+ when using walk tx list
+ */
+ virtual bool is_writebatch_trx() const = 0;
+
+ static void init_mutex() {
+ mysql_mutex_init(key_mutex_tx_list, &s_tx_list_mutex, MY_MUTEX_INIT_FAST);
+ }
+
+ static void term_mutex() {
+ DBUG_ASSERT(s_tx_list.size() == 0);
+ mysql_mutex_destroy(&s_tx_list_mutex);
+ }
+
+ static void walk_tx_list(Rdb_tx_list_walker *walker) {
+ DBUG_ASSERT(walker != nullptr);
+
+ RDB_MUTEX_LOCK_CHECK(s_tx_list_mutex);
+
+ for (auto it : s_tx_list) {
+ walker->process_tran(it);
+ }
+
+ RDB_MUTEX_UNLOCK_CHECK(s_tx_list_mutex);
+ }
+
+ int set_status_error(THD *const thd, const rocksdb::Status &s,
+ const Rdb_key_def &kd, Rdb_tbl_def *const tbl_def,
+ Rdb_table_handler *const table_handler) {
+ DBUG_ASSERT(!s.ok());
+ DBUG_ASSERT(tbl_def != nullptr);
+
+ if (s.IsTimedOut()) {
+ /*
+ SQL layer has weird expectations. If we return an error when
+ doing a read in DELETE IGNORE, it will ignore the error ("because it's
+ an IGNORE command!) but then will fail an assert, because "error code
+ was returned, but no error happened". Do what InnoDB's
+ convert_error_code_to_mysql() does: force a statement
+ rollback before returning HA_ERR_LOCK_WAIT_TIMEOUT:
+ */
+ my_core::thd_mark_transaction_to_rollback(
+ thd, static_cast<bool>(rocksdb_rollback_on_timeout));
+ m_detailed_error.copy(timeout_message(
+ "index", tbl_def->full_tablename().c_str(), kd.get_name().c_str()));
+ table_handler->m_lock_wait_timeout_counter.inc();
+ rocksdb_row_lock_wait_timeouts++;
+
+ return HA_ERR_LOCK_WAIT_TIMEOUT;
+ }
+
+ if (s.IsDeadlock()) {
+ my_core::thd_mark_transaction_to_rollback(thd,
+ true /* whole transaction */);
+ m_detailed_error = String();
+ table_handler->m_deadlock_counter.inc();
+ rocksdb_row_lock_deadlocks++;
+ return HA_ERR_LOCK_DEADLOCK;
+ } else if (s.IsBusy()) {
+ rocksdb_snapshot_conflict_errors++;
+ if (rocksdb_print_snapshot_conflict_queries) {
+ char user_host_buff[MAX_USER_HOST_SIZE + 1];
+ make_user_name(thd, user_host_buff);
+ // NO_LINT_DEBUG
+ sql_print_warning(
+ "Got snapshot conflict errors: User: %s "
+ "Query: %s",
+ user_host_buff, thd->query());
+ }
+ m_detailed_error = String(" (snapshot conflict)", system_charset_info);
+ table_handler->m_deadlock_counter.inc();
+ return HA_ERR_ROCKSDB_STATUS_BUSY;
+ }
+
+ if (s.IsIOError() || s.IsCorruption()) {
+ rdb_handle_io_error(s, RDB_IO_ERROR_GENERAL);
+ }
+
+ return ha_rocksdb::rdb_error_to_mysql(s);
+ }
+
+ THD *get_thd() const { return m_thd; }
+
+ /* Used for tracking io_perf counters */
+ void io_perf_start(Rdb_io_perf *const io_perf) {
+ /*
+ Since perf_context is tracked per thread, it is difficult and expensive
+ to maintain perf_context on a per table basis. Therefore, roll all
+ perf_context data into the first table used in a query. This works well
+ for single table queries and is probably good enough for queries that hit
+ multiple tables.
+
+ perf_context stats gathering is started when the table lock is acquired
+ or when ha_rocksdb::start_stmt is called in case of LOCK TABLES. They
+ are recorded when the table lock is released, or when commit/rollback
+ is called on the transaction, whichever comes first. Table lock release
+ and commit/rollback can happen in different orders. In the case where
+ the lock is released before commit/rollback is called, an extra step to
+ gather stats during commit/rollback is needed.
+ */
+ if (m_tbl_io_perf == nullptr &&
+ io_perf->start(rocksdb_perf_context_level(m_thd))) {
+ m_tbl_io_perf = io_perf;
+ }
+ }
+
+ void io_perf_end_and_record(void) {
+ if (m_tbl_io_perf != nullptr) {
+ m_tbl_io_perf->end_and_record(rocksdb_perf_context_level(m_thd));
+ m_tbl_io_perf = nullptr;
+ }
+ }
+
+ void io_perf_end_and_record(Rdb_io_perf *const io_perf) {
+ if (m_tbl_io_perf == io_perf) {
+ io_perf_end_and_record();
+ }
+ }
+
+ void update_bytes_written(ulonglong bytes_written) {
+ if (m_tbl_io_perf != nullptr) {
+ m_tbl_io_perf->update_bytes_written(rocksdb_perf_context_level(m_thd),
+ bytes_written);
+ }
+ }
+
+ void set_params(int timeout_sec_arg, int max_row_locks_arg) {
+ m_timeout_sec = timeout_sec_arg;
+ m_max_row_locks = max_row_locks_arg;
+ set_lock_timeout(timeout_sec_arg);
+ }
+
+ virtual void set_lock_timeout(int timeout_sec_arg) = 0;
+
+ ulonglong get_write_count() const { return m_write_count; }
+
+ ulonglong get_insert_count() const { return m_insert_count; }
+
+ ulonglong get_update_count() const { return m_update_count; }
+
+ ulonglong get_delete_count() const { return m_delete_count; }
+
+ void incr_insert_count() { ++m_insert_count; }
+
+ void incr_update_count() { ++m_update_count; }
+
+ void incr_delete_count() { ++m_delete_count; }
+
+ int get_timeout_sec() const { return m_timeout_sec; }
+
+ ulonglong get_lock_count() const { return m_lock_count; }
+
+ virtual void set_sync(bool sync) = 0;
+
+ virtual void release_lock(rocksdb::ColumnFamilyHandle *const column_family,
+ const std::string &rowkey) = 0;
+
+ virtual bool prepare(const rocksdb::TransactionName &name) = 0;
+
+ bool commit_or_rollback() {
+ bool res;
+ if (m_is_tx_failed) {
+ rollback();
+ res = false;
+ } else {
+ res = commit();
+ }
+ return res;
+ }
+
+ bool commit() {
+ if (get_write_count() == 0) {
+ rollback();
+ return false;
+ } else if (m_rollback_only) {
+ /*
+ Transactions marked as rollback_only are expected to be rolled back at
+ prepare(). But there are some exceptions like below that prepare() is
+ never called and commit() is called instead.
+ 1. Binlog is disabled
+ 2. No modification exists in binlog cache for the transaction (#195)
+ In both cases, rolling back transaction is safe. Nothing is written to
+ binlog.
+ */
+ my_error(ER_ROLLBACK_ONLY, MYF(0));
+ rollback();
+ return true;
+ } else {
+#ifdef MARIAROCKS_NOT_YET
+ /*
+ Storing binlog position inside MyRocks is needed only for restoring
+ MyRocks from backups. This feature is not supported yet.
+ */
+ mysql_bin_log_commit_pos(m_thd, &m_mysql_log_offset,
+ &m_mysql_log_file_name);
+ binlog_manager.update(m_mysql_log_file_name, m_mysql_log_offset,
+ get_write_batch());
+#endif
+ return commit_no_binlog();
+ }
+ }
+
+ virtual void rollback() = 0;
+
+ void snapshot_created(const rocksdb::Snapshot *const snapshot) {
+ DBUG_ASSERT(snapshot != nullptr);
+
+ m_read_opts.snapshot = snapshot;
+ rdb->GetEnv()->GetCurrentTime(&m_snapshot_timestamp);
+ m_is_delayed_snapshot = false;
+ }
+
+ virtual void acquire_snapshot(bool acquire_now) = 0;
+ virtual void release_snapshot() = 0;
+
+ bool has_snapshot() const { return m_read_opts.snapshot != nullptr; }
+
+ private:
+ // The Rdb_sst_info structures we are currently loading. In a partitioned
+ // table this can have more than one entry
+ std::vector<std::shared_ptr<Rdb_sst_info>> m_curr_bulk_load;
+ std::string m_curr_bulk_load_tablename;
+
+ /* External merge sorts for bulk load: key ID -> merge sort instance */
+ std::unordered_map<GL_INDEX_ID, Rdb_index_merge> m_key_merge;
+
+ public:
+ int get_key_merge(GL_INDEX_ID kd_gl_id, rocksdb::ColumnFamilyHandle *cf,
+ Rdb_index_merge **key_merge) {
+ int res;
+ auto it = m_key_merge.find(kd_gl_id);
+ if (it == m_key_merge.end()) {
+ m_key_merge.emplace(
+ std::piecewise_construct, std::make_tuple(kd_gl_id),
+ std::make_tuple(
+ get_rocksdb_tmpdir(), THDVAR(get_thd(), merge_buf_size),
+ THDVAR(get_thd(), merge_combine_read_size),
+ THDVAR(get_thd(), merge_tmp_file_removal_delay_ms), cf));
+ it = m_key_merge.find(kd_gl_id);
+ if ((res = it->second.init()) != 0) {
+ return res;
+ }
+ }
+ *key_merge = &it->second;
+ return HA_EXIT_SUCCESS;
+ }
+
+ /* Finish bulk loading for all table handlers belongs to one connection */
+ int finish_bulk_load(bool *is_critical_error = nullptr,
+ int print_client_error = true) {
+ Ensure_cleanup cleanup([&]() {
+ // Always clear everything regardless of success/failure
+ m_curr_bulk_load.clear();
+ m_curr_bulk_load_tablename.clear();
+ m_key_merge.clear();
+ });
+
+ int rc = 0;
+ if (is_critical_error) {
+ *is_critical_error = true;
+ }
+
+ // PREPARE phase: finish all on-going bulk loading Rdb_sst_info and
+ // collect all Rdb_sst_commit_info containing (SST files, cf)
+ int rc2 = 0;
+ std::vector<Rdb_sst_info::Rdb_sst_commit_info> sst_commit_list;
+ sst_commit_list.reserve(m_curr_bulk_load.size());
+
+ for (auto &sst_info : m_curr_bulk_load) {
+ Rdb_sst_info::Rdb_sst_commit_info commit_info;
+
+ // Commit the list of SST files and move it to the end of
+ // sst_commit_list, effectively transfer the ownership over
+ rc2 = sst_info->finish(&commit_info, print_client_error);
+ if (rc2 && rc == 0) {
+ // Don't return yet - make sure we finish all the SST infos
+ rc = rc2;
+ }
+
+ // Make sure we have work to do - we might be losing the race
+ if (rc2 == 0 && commit_info.has_work()) {
+ sst_commit_list.emplace_back(std::move(commit_info));
+ DBUG_ASSERT(!commit_info.has_work());
+ }
+ }
+
+ if (rc) {
+ return rc;
+ }
+
+ // MERGING Phase: Flush the index_merge sort buffers into SST files in
+ // Rdb_sst_info and collect all Rdb_sst_commit_info containing
+ // (SST files, cf)
+ if (!m_key_merge.empty()) {
+ Ensure_cleanup malloc_cleanup([]() {
+ /*
+ Explicitly tell jemalloc to clean up any unused dirty pages at this
+ point.
+ See https://reviews.facebook.net/D63723 for more details.
+ */
+ purge_all_jemalloc_arenas();
+ });
+
+ rocksdb::Slice merge_key;
+ rocksdb::Slice merge_val;
+ for (auto it = m_key_merge.begin(); it != m_key_merge.end(); it++) {
+ GL_INDEX_ID index_id = it->first;
+ std::shared_ptr<const Rdb_key_def> keydef =
+ ddl_manager.safe_find(index_id);
+ std::string table_name = ddl_manager.safe_get_table_name(index_id);
+
+ // Unable to find key definition or table name since the
+ // table could have been dropped.
+ // TODO(herman): there is a race here between dropping the table
+ // and detecting a drop here. If the table is dropped while bulk
+ // loading is finishing, these keys being added here may
+ // be missed by the compaction filter and not be marked for
+ // removal. It is unclear how to lock the sql table from the storage
+ // engine to prevent modifications to it while bulk load is occurring.
+ if (keydef == nullptr) {
+ if (is_critical_error) {
+ // We used to set the error but simply ignores it. This follows
+ // current behavior and we should revisit this later
+ *is_critical_error = false;
+ }
+ return HA_ERR_KEY_NOT_FOUND;
+ } else if (table_name.empty()) {
+ if (is_critical_error) {
+ // We used to set the error but simply ignores it. This follows
+ // current behavior and we should revisit this later
+ *is_critical_error = false;
+ }
+ return HA_ERR_NO_SUCH_TABLE;
+ }
+ const std::string &index_name = keydef->get_name();
+ Rdb_index_merge &rdb_merge = it->second;
+
+ // Rdb_sst_info expects a denormalized table name in the form of
+ // "./database/table"
+ std::replace(table_name.begin(), table_name.end(), '.', '/');
+ table_name = "./" + table_name;
+ auto sst_info = std::make_shared<Rdb_sst_info>(
+ rdb, table_name, index_name, rdb_merge.get_cf(),
+ *rocksdb_db_options, THDVAR(get_thd(), trace_sst_api));
+
+ while ((rc2 = rdb_merge.next(&merge_key, &merge_val)) == 0) {
+ if ((rc2 = sst_info->put(merge_key, merge_val)) != 0) {
+ rc = rc2;
+
+ // Don't return yet - make sure we finish the sst_info
+ break;
+ }
+ }
+
+ // -1 => no more items
+ if (rc2 != -1 && rc != 0) {
+ rc = rc2;
+ }
+
+ Rdb_sst_info::Rdb_sst_commit_info commit_info;
+ rc2 = sst_info->finish(&commit_info, print_client_error);
+ if (rc2 != 0 && rc == 0) {
+ // Only set the error from sst_info->finish if finish failed and we
+ // didn't fail before. In other words, we don't have finish's
+ // success mask earlier failures
+ rc = rc2;
+ }
+
+ if (rc) {
+ return rc;
+ }
+
+ if (commit_info.has_work()) {
+ sst_commit_list.emplace_back(std::move(commit_info));
+ DBUG_ASSERT(!commit_info.has_work());
+ }
+ }
+ }
+
+ // Early return in case we lost the race completely and end up with no
+ // work at all
+ if (sst_commit_list.size() == 0) {
+ return rc;
+ }
+
+ // INGEST phase: Group all Rdb_sst_commit_info by cf (as they might
+ // have the same cf across different indexes) and call out to RocksDB
+ // to ingest all SST files in one atomic operation
+ rocksdb::IngestExternalFileOptions options;
+ options.move_files = true;
+ options.snapshot_consistency = false;
+ options.allow_global_seqno = false;
+ options.allow_blocking_flush = false;
+
+ std::map<rocksdb::ColumnFamilyHandle *, rocksdb::IngestExternalFileArg>
+ arg_map;
+
+ // Group by column_family
+ for (auto &commit_info : sst_commit_list) {
+ if (arg_map.find(commit_info.get_cf()) == arg_map.end()) {
+ rocksdb::IngestExternalFileArg arg;
+ arg.column_family = commit_info.get_cf(),
+ arg.external_files = commit_info.get_committed_files(),
+ arg.options = options;
+
+ arg_map.emplace(commit_info.get_cf(), arg);
+ } else {
+ auto &files = arg_map[commit_info.get_cf()].external_files;
+ files.insert(files.end(), commit_info.get_committed_files().begin(),
+ commit_info.get_committed_files().end());
+ }
+ }
+
+ std::vector<rocksdb::IngestExternalFileArg> args;
+ size_t file_count = 0;
+ for (auto &cf_files_pair : arg_map) {
+ args.push_back(cf_files_pair.second);
+ file_count += cf_files_pair.second.external_files.size();
+ }
+
+ const rocksdb::Status s = rdb->IngestExternalFiles(args);
+ if (THDVAR(m_thd, trace_sst_api)) {
+ // NO_LINT_DEBUG
+ sql_print_information(
+ "SST Tracing: IngestExternalFile '%zu' files returned %s", file_count,
+ s.ok() ? "ok" : "not ok");
+ }
+
+ if (!s.ok()) {
+ if (print_client_error) {
+ Rdb_sst_info::report_error_msg(s, nullptr);
+ }
+ return HA_ERR_ROCKSDB_BULK_LOAD;
+ }
+
+ // COMMIT phase: mark everything as completed. This avoids SST file
+ // deletion kicking in. Otherwise SST files would get deleted if this
+ // entire operation is aborted
+ for (auto &commit_info : sst_commit_list) {
+ commit_info.commit();
+ }
+
+ return rc;
+ }
+
+ int start_bulk_load(ha_rocksdb *const bulk_load,
+ std::shared_ptr<Rdb_sst_info> sst_info) {
+ /*
+ If we already have an open bulk load of a table and the name doesn't
+ match the current one, close out the currently running one. This allows
+ multiple bulk loads to occur on a partitioned table, but then closes
+ them all out when we switch to another table.
+ */
+ DBUG_ASSERT(bulk_load != nullptr);
+
+ if (!m_curr_bulk_load.empty() &&
+ bulk_load->get_table_basename() != m_curr_bulk_load_tablename) {
+ const auto res = finish_bulk_load();
+ if (res != HA_EXIT_SUCCESS) {
+ return res;
+ }
+ }
+
+ /*
+ This used to track ha_rocksdb handler objects, but those can be
+ freed by the table cache while this was referencing them. Instead
+ of tracking ha_rocksdb handler objects, this now tracks the
+ Rdb_sst_info allocated, and both the ha_rocksdb handler and the
+ Rdb_transaction both have shared pointers to them.
+
+ On transaction complete, it will commit each Rdb_sst_info structure found.
+ If the ha_rocksdb object is freed, etc., it will also commit
+ the Rdb_sst_info. The Rdb_sst_info commit path needs to be idempotent.
+ */
+ m_curr_bulk_load.push_back(sst_info);
+ m_curr_bulk_load_tablename = bulk_load->get_table_basename();
+ return HA_EXIT_SUCCESS;
+ }
+
+ int num_ongoing_bulk_load() const { return m_curr_bulk_load.size(); }
+
+ const char *get_rocksdb_tmpdir() const {
+ const char *tmp_dir = THDVAR(get_thd(), tmpdir);
+
+ /*
+ We want to treat an empty string as nullptr, in these cases DDL operations
+ will use the default --tmpdir passed to mysql instead.
+ */
+ if (tmp_dir != nullptr && *tmp_dir == '\0') {
+ tmp_dir = nullptr;
+ }
+ return (tmp_dir);
+ }
+
+ /*
+ Flush the data accumulated so far. This assumes we're doing a bulk insert.
+
+ @detail
+ This should work like transaction commit, except that we don't
+ synchronize with the binlog (there is no API that would allow to have
+ binlog flush the changes accumulated so far and return its current
+ position)
+
+ @todo
+ Add test coverage for what happens when somebody attempts to do bulk
+ inserts while inside a multi-statement transaction.
+ */
+ bool flush_batch() {
+ if (get_write_count() == 0) return false;
+
+ /* Commit the current transaction */
+ if (commit_no_binlog()) return true;
+
+ /* Start another one */
+ start_tx();
+ return false;
+ }
+
+ void set_auto_incr(const GL_INDEX_ID &gl_index_id, ulonglong curr_id) {
+ m_auto_incr_map[gl_index_id] =
+ std::max(m_auto_incr_map[gl_index_id], curr_id);
+ }
+
+#ifndef DBUG_OFF
+ ulonglong get_auto_incr(const GL_INDEX_ID &gl_index_id) {
+ if (m_auto_incr_map.count(gl_index_id) > 0) {
+ return m_auto_incr_map[gl_index_id];
+ }
+ return 0;
+ }
+#endif
+
+ virtual rocksdb::Status put(rocksdb::ColumnFamilyHandle *const column_family,
+ const rocksdb::Slice &key,
+ const rocksdb::Slice &value,
+ const bool assume_tracked) = 0;
+ virtual rocksdb::Status delete_key(
+ rocksdb::ColumnFamilyHandle *const column_family,
+ const rocksdb::Slice &key, const bool assume_tracked) = 0;
+ virtual rocksdb::Status single_delete(
+ rocksdb::ColumnFamilyHandle *const column_family,
+ const rocksdb::Slice &key, const bool assume_tracked) = 0;
+
+ virtual bool has_modifications() const = 0;
+
+ virtual rocksdb::WriteBatchBase *get_indexed_write_batch() = 0;
+ /*
+ Return a WriteBatch that one can write to. The writes will skip any
+ transaction locking. The writes will NOT be visible to the transaction.
+ */
+ rocksdb::WriteBatchBase *get_blind_write_batch() {
+ return get_indexed_write_batch()->GetWriteBatch();
+ }
+
+ virtual rocksdb::Status get(rocksdb::ColumnFamilyHandle *const column_family,
+ const rocksdb::Slice &key,
+ rocksdb::PinnableSlice *const value) const = 0;
+ virtual rocksdb::Status get_for_update(
+ rocksdb::ColumnFamilyHandle *const column_family,
+ const rocksdb::Slice &key, rocksdb::PinnableSlice *const value,
+ bool exclusive, const bool do_validate) = 0;
+
+ rocksdb::Iterator *get_iterator(
+ rocksdb::ColumnFamilyHandle *const column_family, bool skip_bloom_filter,
+ bool fill_cache, const rocksdb::Slice &eq_cond_lower_bound,
+ const rocksdb::Slice &eq_cond_upper_bound, bool read_current = false,
+ bool create_snapshot = true) {
+ // Make sure we are not doing both read_current (which implies we don't
+ // want a snapshot) and create_snapshot which makes sure we create
+ // a snapshot
+ DBUG_ASSERT(column_family != nullptr);
+ DBUG_ASSERT(!read_current || !create_snapshot);
+
+ if (create_snapshot) acquire_snapshot(true);
+
+ rocksdb::ReadOptions options = m_read_opts;
+
+ if (skip_bloom_filter) {
+ options.total_order_seek = true;
+ options.iterate_lower_bound = &eq_cond_lower_bound;
+ options.iterate_upper_bound = &eq_cond_upper_bound;
+ } else {
+ // With this option, Iterator::Valid() returns false if key
+ // is outside of the prefix bloom filter range set at Seek().
+ // Must not be set to true if not using bloom filter.
+ options.prefix_same_as_start = true;
+ }
+ options.fill_cache = fill_cache;
+ if (read_current) {
+ options.snapshot = nullptr;
+ }
+ return get_iterator(options, column_family);
+ }
+
+ virtual bool is_tx_started() const = 0;
+ virtual void start_tx() = 0;
+ virtual void start_stmt() = 0;
+
+ void set_initial_savepoint() {
+ /*
+ Set the initial savepoint. If the first statement in the transaction
+ fails, we need something to roll back to, without rolling back the
+ entire transaction.
+ */
+ do_set_savepoint();
+ m_writes_at_last_savepoint = m_write_count;
+ }
+
+ /*
+ Called when a "top-level" statement inside a transaction completes
+ successfully and its changes become part of the transaction's changes.
+ */
+ int make_stmt_savepoint_permanent() {
+ // Take another RocksDB savepoint only if we had changes since the last
+ // one. This is very important for long transactions doing lots of
+ // SELECTs.
+ if (m_writes_at_last_savepoint != m_write_count) {
+ rocksdb::WriteBatchBase *batch = get_write_batch();
+ rocksdb::Status status = rocksdb::Status::NotFound();
+ while ((status = batch->PopSavePoint()) == rocksdb::Status::OK()) {
+ }
+
+ if (status != rocksdb::Status::NotFound()) {
+ return HA_EXIT_FAILURE;
+ }
+
+ do_set_savepoint();
+ m_writes_at_last_savepoint = m_write_count;
+ }
+
+ return HA_EXIT_SUCCESS;
+ }
+
+ /*
+ Rollback to the savepoint we've set before the last statement
+ */
+ void rollback_to_stmt_savepoint() {
+ if (m_writes_at_last_savepoint != m_write_count) {
+ do_rollback_to_savepoint();
+ /*
+ RollbackToSavePoint "removes the most recent SetSavePoint()", so
+ we need to set it again so that next statement can roll back to this
+ stage.
+ It's ok to do it here at statement end (instead of doing it at next
+ statement start) because setting a savepoint is cheap.
+ */
+ do_set_savepoint();
+ m_writes_at_last_savepoint = m_write_count;
+ }
+ }
+
+ virtual void rollback_stmt() = 0;
+
+ void set_tx_failed(bool failed_arg) { m_is_tx_failed = failed_arg; }
+
+ bool can_prepare() const {
+ if (m_rollback_only) {
+ my_error(ER_ROLLBACK_ONLY, MYF(0));
+ return false;
+ }
+ return true;
+ }
+
+ int rollback_to_savepoint(void *const savepoint) {
+ if (has_modifications()) {
+ my_error(ER_ROLLBACK_TO_SAVEPOINT, MYF(0));
+ m_rollback_only = true;
+ return HA_EXIT_FAILURE;
+ }
+ return HA_EXIT_SUCCESS;
+ }
+
+ /*
+ This is used by transactions started with "START TRANSACTION WITH "
+ "CONSISTENT [ROCKSDB] SNAPSHOT". When tx_read_only is turned on,
+ snapshot has to be created via DB::GetSnapshot(), not via Transaction
+ API.
+ */
+ bool is_tx_read_only() const { return m_tx_read_only; }
+
+ bool is_two_phase() const { return m_is_two_phase; }
+
+ void set_tx_read_only(bool val) { m_tx_read_only = val; }
+
+ explicit Rdb_transaction(THD *const thd)
+ : m_thd(thd), m_tbl_io_perf(nullptr) {
+ RDB_MUTEX_LOCK_CHECK(s_tx_list_mutex);
+ s_tx_list.insert(this);
+ RDB_MUTEX_UNLOCK_CHECK(s_tx_list_mutex);
+ }
+
+ virtual ~Rdb_transaction() {
+ RDB_MUTEX_LOCK_CHECK(s_tx_list_mutex);
+ s_tx_list.erase(this);
+ RDB_MUTEX_UNLOCK_CHECK(s_tx_list_mutex);
+ }
+};
+
+/*
+ This is a rocksdb transaction. Its members represent the current transaction,
+ which consists of:
+ - the snapshot
+ - the changes we've made but are not seeing yet.
+
+ The changes are made to individual tables, which store them here and then
+ this object commits them on commit.
+*/
+class Rdb_transaction_impl : public Rdb_transaction {
+ rocksdb::Transaction *m_rocksdb_tx = nullptr;
+ rocksdb::Transaction *m_rocksdb_reuse_tx = nullptr;
+
+ public:
+ void set_lock_timeout(int timeout_sec_arg) override {
+ if (m_rocksdb_tx) {
+ m_rocksdb_tx->SetLockTimeout(rdb_convert_sec_to_ms(m_timeout_sec));
+ }
+ }
+
+ void set_sync(bool sync) override {
+ if (m_rocksdb_tx)
+ m_rocksdb_tx->GetWriteOptions()->sync = sync;
+ }
+
+ void release_lock(rocksdb::ColumnFamilyHandle *const column_family,
+ const std::string &rowkey) override {
+ if (!THDVAR(m_thd, lock_scanned_rows)) {
+ m_rocksdb_tx->UndoGetForUpdate(column_family, rocksdb::Slice(rowkey));
+ }
+ }
+
+ virtual bool is_writebatch_trx() const override { return false; }
+
+ private:
+ void release_tx(void) {
+ // We are done with the current active transaction object. Preserve it
+ // for later reuse.
+ DBUG_ASSERT(m_rocksdb_reuse_tx == nullptr);
+ m_rocksdb_reuse_tx = m_rocksdb_tx;
+ m_rocksdb_tx = nullptr;
+ }
+
+ bool prepare(const rocksdb::TransactionName &name) override {
+ rocksdb::Status s;
+ s = m_rocksdb_tx->SetName(name);
+ if (!s.ok()) {
+ rdb_handle_io_error(s, RDB_IO_ERROR_TX_COMMIT);
+ return false;
+ }
+
+ s = merge_auto_incr_map(m_rocksdb_tx->GetWriteBatch()->GetWriteBatch());
+ if (!s.ok()) {
+ rdb_handle_io_error(s, RDB_IO_ERROR_TX_COMMIT);
+ return false;
+ }
+
+ s = m_rocksdb_tx->Prepare();
+ if (!s.ok()) {
+ rdb_handle_io_error(s, RDB_IO_ERROR_TX_COMMIT);
+ return false;
+ }
+ return true;
+ }
+
+ bool commit_no_binlog() override {
+ bool res = false;
+ rocksdb::Status s;
+
+ s = merge_auto_incr_map(m_rocksdb_tx->GetWriteBatch()->GetWriteBatch());
+ if (!s.ok()) {
+ rdb_handle_io_error(s, RDB_IO_ERROR_TX_COMMIT);
+ res = true;
+ goto error;
+ }
+
+ release_snapshot();
+ s = m_rocksdb_tx->Commit();
+ if (!s.ok()) {
+ rdb_handle_io_error(s, RDB_IO_ERROR_TX_COMMIT);
+ res = true;
+ goto error;
+ }
+
+ error:
+ /* Save the transaction object to be reused */
+ release_tx();
+
+ m_write_count = 0;
+ m_insert_count = 0;
+ m_update_count = 0;
+ m_delete_count = 0;
+ m_lock_count = 0;
+ set_tx_read_only(false);
+ m_rollback_only = false;
+ return res;
+ }
+
+ public:
+ void rollback() override {
+ m_write_count = 0;
+ m_insert_count = 0;
+ m_update_count = 0;
+ m_delete_count = 0;
+ m_lock_count = 0;
+ m_auto_incr_map.clear();
+ m_ddl_transaction = false;
+ if (m_rocksdb_tx) {
+ release_snapshot();
+ /* This will also release all of the locks: */
+ m_rocksdb_tx->Rollback();
+
+ /* Save the transaction object to be reused */
+ release_tx();
+
+ set_tx_read_only(false);
+ m_rollback_only = false;
+ }
+ }
+
+ void acquire_snapshot(bool acquire_now) override {
+ if (m_read_opts.snapshot == nullptr) {
+#ifdef MARIAROCKS_NOT_YET
+ const auto thd_ss = std::static_pointer_cast<Rdb_explicit_snapshot>(
+ m_thd->get_explicit_snapshot());
+ if (thd_ss) {
+ m_explicit_snapshot = thd_ss;
+ }
+ if (m_explicit_snapshot) {
+ auto snapshot = m_explicit_snapshot->get_snapshot()->snapshot();
+ snapshot_created(snapshot);
+ } else
+#endif
+ if (is_tx_read_only()) {
+ snapshot_created(rdb->GetSnapshot());
+ } else if (acquire_now) {
+ m_rocksdb_tx->SetSnapshot();
+ snapshot_created(m_rocksdb_tx->GetSnapshot());
+ } else if (!m_is_delayed_snapshot) {
+ m_rocksdb_tx->SetSnapshotOnNextOperation(m_notifier);
+ m_is_delayed_snapshot = true;
+ }
+ }
+ }
+
+ void release_snapshot() override {
+ bool need_clear = m_is_delayed_snapshot;
+
+ if (m_read_opts.snapshot != nullptr) {
+ m_snapshot_timestamp = 0;
+#ifdef MARIAROCKS_NOT_YET
+ if (m_explicit_snapshot) {
+ m_explicit_snapshot.reset();
+ need_clear = false;
+ } else
+#endif
+ if (is_tx_read_only()) {
+ rdb->ReleaseSnapshot(m_read_opts.snapshot);
+ need_clear = false;
+ } else {
+ need_clear = true;
+ }
+ m_read_opts.snapshot = nullptr;
+ }
+
+ if (need_clear && m_rocksdb_tx != nullptr) m_rocksdb_tx->ClearSnapshot();
+ }
+
+ bool has_snapshot() { return m_read_opts.snapshot != nullptr; }
+
+ rocksdb::Status put(rocksdb::ColumnFamilyHandle *const column_family,
+ const rocksdb::Slice &key, const rocksdb::Slice &value,
+ const bool assume_tracked) override {
+ ++m_write_count;
+ ++m_lock_count;
+ if (m_write_count > m_max_row_locks || m_lock_count > m_max_row_locks) {
+ return rocksdb::Status::Aborted(rocksdb::Status::kLockLimit);
+ }
+ return m_rocksdb_tx->Put(column_family, key, value, assume_tracked);
+ }
+
+ rocksdb::Status delete_key(rocksdb::ColumnFamilyHandle *const column_family,
+ const rocksdb::Slice &key,
+ const bool assume_tracked) override {
+ ++m_write_count;
+ ++m_lock_count;
+ if (m_write_count > m_max_row_locks || m_lock_count > m_max_row_locks) {
+ return rocksdb::Status::Aborted(rocksdb::Status::kLockLimit);
+ }
+ return m_rocksdb_tx->Delete(column_family, key, assume_tracked);
+ }
+
+ rocksdb::Status single_delete(
+ rocksdb::ColumnFamilyHandle *const column_family,
+ const rocksdb::Slice &key, const bool assume_tracked) override {
+ ++m_write_count;
+ ++m_lock_count;
+ if (m_write_count > m_max_row_locks || m_lock_count > m_max_row_locks) {
+ return rocksdb::Status::Aborted(rocksdb::Status::kLockLimit);
+ }
+ return m_rocksdb_tx->SingleDelete(column_family, key, assume_tracked);
+ }
+
+ bool has_modifications() const override {
+ return m_rocksdb_tx->GetWriteBatch() &&
+ m_rocksdb_tx->GetWriteBatch()->GetWriteBatch() &&
+ m_rocksdb_tx->GetWriteBatch()->GetWriteBatch()->Count() > 0;
+ }
+
+ rocksdb::WriteBatchBase *get_write_batch() override {
+ if (is_two_phase()) {
+ return m_rocksdb_tx->GetCommitTimeWriteBatch();
+ }
+ return m_rocksdb_tx->GetWriteBatch()->GetWriteBatch();
+ }
+
+ /*
+ Return a WriteBatch that one can write to. The writes will skip any
+ transaction locking. The writes WILL be visible to the transaction.
+ */
+ rocksdb::WriteBatchBase *get_indexed_write_batch() override {
+ ++m_write_count;
+ return m_rocksdb_tx->GetWriteBatch();
+ }
+
+ rocksdb::Status get(rocksdb::ColumnFamilyHandle *const column_family,
+ const rocksdb::Slice &key,
+ rocksdb::PinnableSlice *const value) const override {
+ // clean PinnableSlice right begfore Get() for multiple gets per statement
+ // the resources after the last Get in a statement are cleared in
+ // handler::reset call
+ value->Reset();
+ global_stats.queries[QUERIES_POINT].inc();
+ return m_rocksdb_tx->Get(m_read_opts, column_family, key, value);
+ }
+
+ rocksdb::Status get_for_update(
+ rocksdb::ColumnFamilyHandle *const column_family,
+ const rocksdb::Slice &key, rocksdb::PinnableSlice *const value,
+ bool exclusive, const bool do_validate) override {
+ if (++m_lock_count > m_max_row_locks) {
+ return rocksdb::Status::Aborted(rocksdb::Status::kLockLimit);
+ }
+
+ if (value != nullptr) {
+ value->Reset();
+ }
+ rocksdb::Status s;
+ // If snapshot is null, pass it to GetForUpdate and snapshot is
+ // initialized there. Snapshot validation is skipped in that case.
+ if (m_read_opts.snapshot == nullptr || do_validate) {
+ s = m_rocksdb_tx->GetForUpdate(
+ m_read_opts, column_family, key, value, exclusive,
+ m_read_opts.snapshot ? do_validate : false);
+ } else {
+ // If snapshot is set, and if skipping validation,
+ // call GetForUpdate without validation and set back old snapshot
+ auto saved_snapshot = m_read_opts.snapshot;
+ m_read_opts.snapshot = nullptr;
+ s = m_rocksdb_tx->GetForUpdate(m_read_opts, column_family, key, value,
+ exclusive, false);
+ m_read_opts.snapshot = saved_snapshot;
+ }
+ return s;
+ }
+
+ rocksdb::Iterator *get_iterator(
+ const rocksdb::ReadOptions &options,
+ rocksdb::ColumnFamilyHandle *const column_family) override {
+ global_stats.queries[QUERIES_RANGE].inc();
+ return m_rocksdb_tx->GetIterator(options, column_family);
+ }
+
+ const rocksdb::Transaction *get_rdb_trx() const { return m_rocksdb_tx; }
+
+ bool is_tx_started() const override { return (m_rocksdb_tx != nullptr); }
+
+ void start_tx() override {
+ rocksdb::TransactionOptions tx_opts;
+ rocksdb::WriteOptions write_opts;
+ tx_opts.set_snapshot = false;
+ tx_opts.lock_timeout = rdb_convert_sec_to_ms(m_timeout_sec);
+ tx_opts.deadlock_detect = THDVAR(m_thd, deadlock_detect);
+ tx_opts.deadlock_detect_depth = THDVAR(m_thd, deadlock_detect_depth);
+ // If this variable is set, this will write commit time write batch
+ // information on recovery or memtable flush.
+ tx_opts.use_only_the_last_commit_time_batch_for_recovery =
+ THDVAR(m_thd, commit_time_batch_for_recovery);
+ tx_opts.max_write_batch_size = THDVAR(m_thd, write_batch_max_bytes);
+
+ write_opts.sync = (rocksdb_flush_log_at_trx_commit == FLUSH_LOG_SYNC);
+ write_opts.disableWAL = THDVAR(m_thd, write_disable_wal);
+ write_opts.ignore_missing_column_families =
+ THDVAR(m_thd, write_ignore_missing_column_families);
+ m_is_two_phase = rocksdb_enable_2pc;
+
+ commit_ordered_done= false;
+
+ /*
+ If m_rocksdb_reuse_tx is null this will create a new transaction object.
+ Otherwise it will reuse the existing one.
+ */
+ m_rocksdb_tx =
+ rdb->BeginTransaction(write_opts, tx_opts, m_rocksdb_reuse_tx);
+ m_rocksdb_reuse_tx = nullptr;
+
+ m_read_opts = rocksdb::ReadOptions();
+
+ set_initial_savepoint();
+
+ m_ddl_transaction = false;
+ }
+
+ /* Implementations of do_*savepoint based on rocksdB::Transaction savepoints
+ */
+ void do_set_savepoint() override { m_rocksdb_tx->SetSavePoint(); }
+
+ void do_rollback_to_savepoint() override {
+ m_rocksdb_tx->RollbackToSavePoint();
+ }
+
+ /*
+ Start a statement inside a multi-statement transaction.
+
+ @todo: are we sure this is called once (and not several times) per
+ statement start?
+
+ For hooking to start of statement that is its own transaction, see
+ ha_rocksdb::external_lock().
+ */
+ void start_stmt() override {
+ // Set the snapshot to delayed acquisition (SetSnapshotOnNextOperation)
+ acquire_snapshot(false);
+ }
+
+ /*
+ This must be called when last statement is rolled back, but the transaction
+ continues
+ */
+ void rollback_stmt() override {
+ /* TODO: here we must release the locks taken since the start_stmt() call */
+ if (m_rocksdb_tx) {
+ const rocksdb::Snapshot *const org_snapshot = m_rocksdb_tx->GetSnapshot();
+ rollback_to_stmt_savepoint();
+
+ const rocksdb::Snapshot *const cur_snapshot = m_rocksdb_tx->GetSnapshot();
+ if (org_snapshot != cur_snapshot) {
+ if (org_snapshot != nullptr) m_snapshot_timestamp = 0;
+
+ m_read_opts.snapshot = cur_snapshot;
+ if (cur_snapshot != nullptr) {
+ rdb->GetEnv()->GetCurrentTime(&m_snapshot_timestamp);
+ } else {
+ m_is_delayed_snapshot = true;
+ }
+ }
+ }
+ }
+
+ explicit Rdb_transaction_impl(THD *const thd)
+ : Rdb_transaction(thd), m_rocksdb_tx(nullptr) {
+ // Create a notifier that can be called when a snapshot gets generated.
+ m_notifier = std::make_shared<Rdb_snapshot_notifier>(this);
+ }
+
+ virtual ~Rdb_transaction_impl() override {
+ rollback();
+
+ // Theoretically the notifier could outlive the Rdb_transaction_impl
+ // (because of the shared_ptr), so let it know it can't reference
+ // the transaction anymore.
+ m_notifier->detach();
+
+ // Free any transaction memory that is still hanging around.
+ delete m_rocksdb_reuse_tx;
+ DBUG_ASSERT(m_rocksdb_tx == nullptr);
+ }
+};
+
+/* This is a rocksdb write batch. This class doesn't hold or wait on any
+ transaction locks (skips rocksdb transaction API) thus giving better
+ performance.
+
+ Currently this is only used for replication threads which are guaranteed
+ to be non-conflicting. Any further usage of this class should completely
+ be thought thoroughly.
+*/
+class Rdb_writebatch_impl : public Rdb_transaction {
+ rocksdb::WriteBatchWithIndex *m_batch;
+ rocksdb::WriteOptions write_opts;
+ // Called after commit/rollback.
+ void reset() {
+ m_batch->Clear();
+ m_read_opts = rocksdb::ReadOptions();
+ m_ddl_transaction = false;
+ }
+
+ private:
+ bool prepare(const rocksdb::TransactionName &name) override { return true; }
+
+ bool commit_no_binlog() override {
+ bool res = false;
+ rocksdb::Status s;
+ rocksdb::TransactionDBWriteOptimizations optimize;
+ optimize.skip_concurrency_control = true;
+
+ s = merge_auto_incr_map(m_batch->GetWriteBatch());
+ if (!s.ok()) {
+ rdb_handle_io_error(s, RDB_IO_ERROR_TX_COMMIT);
+ res = true;
+ goto error;
+ }
+
+ release_snapshot();
+
+ s = rdb->Write(write_opts, optimize, m_batch->GetWriteBatch());
+ if (!s.ok()) {
+ rdb_handle_io_error(s, RDB_IO_ERROR_TX_COMMIT);
+ res = true;
+ goto error;
+ }
+ error:
+ reset();
+
+ m_write_count = 0;
+ m_insert_count = 0;
+ m_update_count = 0;
+ m_delete_count = 0;
+ set_tx_read_only(false);
+ m_rollback_only = false;
+ return res;
+ }
+
+ /* Implementations of do_*savepoint based on rocksdB::WriteBatch savepoints */
+ void do_set_savepoint() override { m_batch->SetSavePoint(); }
+
+ void do_rollback_to_savepoint() override { m_batch->RollbackToSavePoint(); }
+
+
+ public:
+ bool is_writebatch_trx() const override { return true; }
+
+ void set_lock_timeout(int timeout_sec_arg) override {
+ // Nothing to do here.
+ }
+
+ void set_sync(bool sync) override { write_opts.sync = sync; }
+
+ void release_lock(rocksdb::ColumnFamilyHandle *const column_family,
+ const std::string &rowkey) override {
+ // Nothing to do here since we don't hold any row locks.
+ }
+
+ void rollback() override {
+ m_write_count = 0;
+ m_insert_count = 0;
+ m_update_count = 0;
+ m_delete_count = 0;
+ m_lock_count = 0;
+ release_snapshot();
+
+ reset();
+ set_tx_read_only(false);
+ m_rollback_only = false;
+ }
+
+ void acquire_snapshot(bool acquire_now) override {
+ if (m_read_opts.snapshot == nullptr) snapshot_created(rdb->GetSnapshot());
+ }
+
+ void release_snapshot() override {
+ if (m_read_opts.snapshot != nullptr) {
+ rdb->ReleaseSnapshot(m_read_opts.snapshot);
+ m_read_opts.snapshot = nullptr;
+ }
+ }
+
+ rocksdb::Status put(rocksdb::ColumnFamilyHandle *const column_family,
+ const rocksdb::Slice &key, const rocksdb::Slice &value,
+ const bool assume_tracked) override {
+ ++m_write_count;
+ m_batch->Put(column_family, key, value);
+ // Note Put/Delete in write batch doesn't return any error code. We simply
+ // return OK here.
+ return rocksdb::Status::OK();
+ }
+
+ rocksdb::Status delete_key(rocksdb::ColumnFamilyHandle *const column_family,
+ const rocksdb::Slice &key,
+ const bool assume_tracked) override {
+ ++m_write_count;
+ m_batch->Delete(column_family, key);
+ return rocksdb::Status::OK();
+ }
+
+ rocksdb::Status single_delete(
+ rocksdb::ColumnFamilyHandle *const column_family,
+ const rocksdb::Slice &key, const bool /* assume_tracked */) override {
+ ++m_write_count;
+ m_batch->SingleDelete(column_family, key);
+ return rocksdb::Status::OK();
+ }
+
+ bool has_modifications() const override {
+ return m_batch->GetWriteBatch()->Count() > 0;
+ }
+
+ rocksdb::WriteBatchBase *get_write_batch() override { return m_batch; }
+
+ rocksdb::WriteBatchBase *get_indexed_write_batch() override {
+ ++m_write_count;
+ return m_batch;
+ }
+
+ rocksdb::Status get(rocksdb::ColumnFamilyHandle *const column_family,
+ const rocksdb::Slice &key,
+ rocksdb::PinnableSlice *const value) const override {
+ value->Reset();
+ return m_batch->GetFromBatchAndDB(rdb, m_read_opts, column_family, key,
+ value);
+ }
+
+ rocksdb::Status get_for_update(
+ rocksdb::ColumnFamilyHandle *const column_family,
+ const rocksdb::Slice &key, rocksdb::PinnableSlice *const value,
+ bool /* exclusive */, const bool /* do_validate */) override {
+ if (value == nullptr) {
+ rocksdb::PinnableSlice pin_val;
+ rocksdb::Status s = get(column_family, key, &pin_val);
+ pin_val.Reset();
+ return s;
+ }
+
+ return get(column_family, key, value);
+ }
+
+ rocksdb::Iterator *get_iterator(
+ const rocksdb::ReadOptions &options,
+ rocksdb::ColumnFamilyHandle *const /* column_family */) override {
+ const auto it = rdb->NewIterator(options);
+ return m_batch->NewIteratorWithBase(it);
+ }
+
+ bool is_tx_started() const override { return (m_batch != nullptr); }
+
+ void start_tx() override {
+ commit_ordered_done= false; // Do we need this here?
+ reset();
+ write_opts.sync = (rocksdb_flush_log_at_trx_commit == FLUSH_LOG_SYNC);
+ write_opts.disableWAL = THDVAR(m_thd, write_disable_wal);
+ write_opts.ignore_missing_column_families =
+ THDVAR(m_thd, write_ignore_missing_column_families);
+
+ set_initial_savepoint();
+ }
+
+ void start_stmt() override {}
+
+ void rollback_stmt() override {
+ if (m_batch) rollback_to_stmt_savepoint();
+ }
+
+ explicit Rdb_writebatch_impl(THD *const thd)
+ : Rdb_transaction(thd), m_batch(nullptr) {
+ m_batch = new rocksdb::WriteBatchWithIndex(rocksdb::BytewiseComparator(), 0,
+ true);
+ }
+
+ virtual ~Rdb_writebatch_impl() override {
+ rollback();
+ delete m_batch;
+ }
+};
+
+void Rdb_snapshot_notifier::SnapshotCreated(
+ const rocksdb::Snapshot *const snapshot) {
+ if (m_owning_tx != nullptr) {
+ m_owning_tx->snapshot_created(snapshot);
+ }
+}
+
+std::multiset<Rdb_transaction *> Rdb_transaction::s_tx_list;
+mysql_mutex_t Rdb_transaction::s_tx_list_mutex;
+
+static Rdb_transaction *get_tx_from_thd(THD *const thd) {
+ return reinterpret_cast<Rdb_transaction *>(
+ my_core::thd_get_ha_data(thd, rocksdb_hton));
+}
+
+namespace {
+
+class Rdb_perf_context_guard {
+ Rdb_io_perf m_io_perf;
+ Rdb_io_perf *m_io_perf_ptr;
+ Rdb_transaction *m_tx;
+ uint m_level;
+
+ public:
+ Rdb_perf_context_guard(const Rdb_perf_context_guard &) = delete;
+ Rdb_perf_context_guard &operator=(const Rdb_perf_context_guard &) = delete;
+
+ explicit Rdb_perf_context_guard(Rdb_io_perf *io_perf, uint level)
+ : m_io_perf_ptr(io_perf), m_tx(nullptr), m_level(level) {
+ m_io_perf_ptr->start(m_level);
+ }
+
+ explicit Rdb_perf_context_guard(Rdb_transaction *tx, uint level)
+ : m_io_perf_ptr(nullptr), m_tx(tx), m_level(level) {
+ /*
+ if perf_context information is already being recorded, this becomes a
+ no-op
+ */
+ if (tx != nullptr) {
+ tx->io_perf_start(&m_io_perf);
+ }
+ }
+
+ ~Rdb_perf_context_guard() {
+ if (m_tx != nullptr) {
+ m_tx->io_perf_end_and_record();
+ } else if (m_io_perf_ptr != nullptr) {
+ m_io_perf_ptr->end_and_record(m_level);
+ }
+ }
+};
+
+} // anonymous namespace
+
+/*
+ TODO: maybe, call this in external_lock() and store in ha_rocksdb..
+*/
+
+static Rdb_transaction *get_or_create_tx(THD *const thd) {
+ Rdb_transaction *tx = get_tx_from_thd(thd);
+ // TODO: this is called too many times.. O(#rows)
+ if (tx == nullptr) {
+ bool rpl_skip_tx_api= false; // MARIAROCKS_NOT_YET.
+ if ((rpl_skip_tx_api && thd->rgi_slave) ||
+ (THDVAR(thd, master_skip_tx_api) && !thd->rgi_slave))
+ {
+ tx = new Rdb_writebatch_impl(thd);
+ } else {
+ tx = new Rdb_transaction_impl(thd);
+ }
+ tx->set_params(THDVAR(thd, lock_wait_timeout), THDVAR(thd, max_row_locks));
+ tx->start_tx();
+ my_core::thd_set_ha_data(thd, rocksdb_hton, tx);
+ } else {
+ tx->set_params(THDVAR(thd, lock_wait_timeout), THDVAR(thd, max_row_locks));
+ if (!tx->is_tx_started()) {
+ tx->start_tx();
+ }
+ }
+
+ return tx;
+}
+
+static int rocksdb_close_connection(handlerton *const hton, THD *const thd) {
+ Rdb_transaction *tx = get_tx_from_thd(thd);
+ if (tx != nullptr) {
+ bool is_critical_error;
+ int rc = tx->finish_bulk_load(&is_critical_error, false);
+ if (rc != 0 && is_critical_error) {
+ // NO_LINT_DEBUG
+ sql_print_error(
+ "RocksDB: Error %d finalizing last SST file while "
+ "disconnecting",
+ rc);
+ }
+
+ delete tx;
+ }
+ return HA_EXIT_SUCCESS;
+}
+
+/*
+ * Serializes an xid to a string so that it can
+ * be used as a rocksdb transaction name
+ */
+static std::string rdb_xid_to_string(const XID &src) {
+ DBUG_ASSERT(src.gtrid_length >= 0 && src.gtrid_length <= MAXGTRIDSIZE);
+ DBUG_ASSERT(src.bqual_length >= 0 && src.bqual_length <= MAXBQUALSIZE);
+
+ std::string buf;
+ buf.reserve(RDB_XIDHDR_LEN + src.gtrid_length + src.bqual_length);
+
+ /*
+ * expand formatID to fill 8 bytes if it doesn't already
+ * then reinterpret bit pattern as unsigned and store in network order
+ */
+ uchar fidbuf[RDB_FORMATID_SZ];
+ int64 signed_fid8 = src.formatID;
+ const uint64 raw_fid8 = *reinterpret_cast<uint64 *>(&signed_fid8);
+ rdb_netbuf_store_uint64(fidbuf, raw_fid8);
+ buf.append(reinterpret_cast<const char *>(fidbuf), RDB_FORMATID_SZ);
+
+ buf.push_back(src.gtrid_length);
+ buf.push_back(src.bqual_length);
+ buf.append(src.data, (src.gtrid_length) + (src.bqual_length));
+ return buf;
+}
+
+#if 0
+// MARIAROCKS: MariaDB doesn't have flush_wal method
+/**
+ Called by hton->flush_logs after MySQL group commit prepares a set of
+ transactions.
+*/
+static bool rocksdb_flush_wal(handlerton* hton __attribute__((__unused__)))
+ DBUG_ASSERT(rdb != nullptr);
+
+ rocksdb::Status s;
+ /*
+ target_lsn is set to 0 when MySQL wants to sync the wal files
+ */
+ if ((target_lsn == 0 && !rocksdb_db_options->allow_mmap_writes) ||
+ rocksdb_flush_log_at_trx_commit != FLUSH_LOG_NEVER) {
+ rocksdb_wal_group_syncs++;
+ s = rdb->FlushWAL(target_lsn == 0 ||
+ rocksdb_flush_log_at_trx_commit == FLUSH_LOG_SYNC);
+ }
+
+ if (!s.ok()) {
+ rdb_log_status_error(s);
+ return HA_EXIT_FAILURE;
+ }
+ return HA_EXIT_SUCCESS;
+}
+#endif
+
+/**
+ For a slave, prepare() updates the slave_gtid_info table which tracks the
+ replication progress.
+*/
+static int rocksdb_prepare(handlerton* hton, THD* thd, bool prepare_tx)
+{
+ bool async=false; // This is "ASYNC_COMMIT" feature which is only present in webscalesql
+
+ Rdb_transaction *tx = get_tx_from_thd(thd);
+ if (!tx->can_prepare()) {
+ return HA_EXIT_FAILURE;
+ }
+ if (prepare_tx ||
+ (!my_core::thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN))) {
+ /* We were instructed to prepare the whole transaction, or
+ this is an SQL statement end and autocommit is on */
+
+#ifdef MARIAROCKS_NOT_YET
+ /*
+ Storing binlog position inside MyRocks is needed only for restoring
+ MyRocks from backups. This feature is not supported yet.
+ */
+ std::vector<st_slave_gtid_info> slave_gtid_info;
+ my_core::thd_slave_gtid_info(thd, &slave_gtid_info);
+ for (const auto &it : slave_gtid_info) {
+ rocksdb::WriteBatchBase *const write_batch = tx->get_blind_write_batch();
+ binlog_manager.update_slave_gtid_info(it.id, it.db, it.gtid, write_batch);
+ }
+#endif
+
+ if (tx->is_two_phase()) {
+
+ /*
+ MariaDB: the following branch is never taken.
+ We always flush at Prepare and rely on RocksDB's internal Group Commit
+ to do some grouping.
+ */
+ if (thd->durability_property == HA_IGNORE_DURABILITY || async) {
+ tx->set_sync(false);
+ }
+
+ /*
+ MariaDB: do not flush logs if we are running in a non-crash-safe mode.
+ */
+ if (!rocksdb_flush_log_at_trx_commit)
+ tx->set_sync(false);
+
+ XID xid;
+ thd_get_xid(thd, reinterpret_cast<MYSQL_XID *>(&xid));
+ if (!tx->prepare(rdb_xid_to_string(xid))) {
+ return HA_EXIT_FAILURE;
+ }
+
+ /*
+ MariaDB: our Group Commit implementation does not use the
+ hton->flush_logs call (at least currently) so the following is not
+ needed (TODO: will we need this for binlog rotation?)
+ */
+#ifdef MARIAROCKS_NOT_YET
+ if (thd->durability_property == HA_IGNORE_DURABILITY )
+ (rocksdb_flush_log_at_trx_commit != FLUSH_LOG_NEVER))
+ &&
+ THDVAR(thd, flush_log_at_trx_commit))
+#endif
+#ifdef MARIAROCKS_NOT_YET
+ {
+ // MariaRocks: disable the
+ // "write/sync redo log before flushing binlog cache to file"
+ // feature. See a869c56d361bb44f46c0efeb11a8f03561676247
+ /**
+ we set the log sequence as '1' just to trigger hton->flush_logs
+ */
+ thd_store_lsn(thd, 1, DB_TYPE_ROCKSDB);
+ }
+#endif
+ }
+
+ DEBUG_SYNC(thd, "rocksdb.prepared");
+ } else {
+ tx->make_stmt_savepoint_permanent();
+ }
+ return HA_EXIT_SUCCESS;
+}
+
+/**
+ do nothing for prepare/commit by xid
+ this is needed to avoid crashes in XA scenarios
+*/
+static int rocksdb_commit_by_xid(handlerton *const hton, XID *const xid) {
+ DBUG_ENTER_FUNC();
+
+ DBUG_ASSERT(hton != nullptr);
+ DBUG_ASSERT(xid != nullptr);
+ DBUG_ASSERT(commit_latency_stats != nullptr);
+
+ rocksdb::StopWatchNano timer(rocksdb::Env::Default(), true);
+
+ const auto name = rdb_xid_to_string(*xid);
+ DBUG_ASSERT(!name.empty());
+
+ rocksdb::Transaction *const trx = rdb->GetTransactionByName(name);
+
+ if (trx == nullptr) {
+ DBUG_RETURN(HA_EXIT_FAILURE);
+ }
+
+ const rocksdb::Status s = trx->Commit();
+
+ if (!s.ok()) {
+ rdb_log_status_error(s);
+ DBUG_RETURN(HA_EXIT_FAILURE);
+ }
+
+ delete trx;
+
+ // `Add()` is implemented in a thread-safe manner.
+ commit_latency_stats->Add(timer.ElapsedNanos() / 1000);
+
+ DBUG_RETURN(HA_EXIT_SUCCESS);
+}
+
+static int rocksdb_rollback_by_xid(
+ handlerton *const hton MY_ATTRIBUTE((__unused__)), XID *const xid) {
+ DBUG_ENTER_FUNC();
+
+ DBUG_ASSERT(hton != nullptr);
+ DBUG_ASSERT(xid != nullptr);
+ DBUG_ASSERT(rdb != nullptr);
+
+ const auto name = rdb_xid_to_string(*xid);
+
+ rocksdb::Transaction *const trx = rdb->GetTransactionByName(name);
+
+ if (trx == nullptr) {
+ DBUG_RETURN(HA_EXIT_FAILURE);
+ }
+
+ const rocksdb::Status s = trx->Rollback();
+
+ if (!s.ok()) {
+ rdb_log_status_error(s);
+ DBUG_RETURN(HA_EXIT_FAILURE);
+ }
+
+ delete trx;
+
+ DBUG_RETURN(HA_EXIT_SUCCESS);
+}
+
+/**
+ Rebuilds an XID from a serialized version stored in a string.
+*/
+static void rdb_xid_from_string(const std::string &src, XID *const dst) {
+ DBUG_ASSERT(dst != nullptr);
+ uint offset = 0;
+ uint64 raw_fid8 =
+ rdb_netbuf_to_uint64(reinterpret_cast<const uchar *>(src.data()));
+ const int64 signed_fid8 = *reinterpret_cast<int64 *>(&raw_fid8);
+ dst->formatID = signed_fid8;
+ offset += RDB_FORMATID_SZ;
+ dst->gtrid_length = src.at(offset);
+ offset += RDB_GTRID_SZ;
+ dst->bqual_length = src.at(offset);
+ offset += RDB_BQUAL_SZ;
+
+ DBUG_ASSERT(dst->gtrid_length >= 0 && dst->gtrid_length <= MAXGTRIDSIZE);
+ DBUG_ASSERT(dst->bqual_length >= 0 && dst->bqual_length <= MAXBQUALSIZE);
+
+ memset(dst->data, 0, XIDDATASIZE);
+ src.copy(dst->data, (dst->gtrid_length) + (dst->bqual_length),
+ RDB_XIDHDR_LEN);
+}
+
+/**
+ Reading last committed binary log info from RocksDB system row.
+ The info is needed for crash safe slave/master to work.
+*/
+static int rocksdb_recover(handlerton* hton, XID* xid_list, uint len)
+#ifdef MARIAROCKS_NOT_YET
+ char* const binlog_file,
+ my_off_t *const binlog_pos,
+ Gtid *const binlog_max_gtid) {
+#endif
+{
+#ifdef MARIAROCKS_NOT_YET
+ if (binlog_file && binlog_pos) {
+ char file_buf[FN_REFLEN + 1] = {0};
+ my_off_t pos;
+ char gtid_buf[FN_REFLEN + 1] = {0};
+ if (binlog_manager.read(file_buf, &pos, gtid_buf)) {
+ if (is_binlog_advanced(binlog_file, *binlog_pos, file_buf, pos)) {
+ memcpy(binlog_file, file_buf, FN_REFLEN + 1);
+ *binlog_pos = pos;
+ // NO_LINT_DEBUG
+ fprintf(stderr,
+ "RocksDB: Last binlog file position %llu,"
+ " file name %s\n",
+ pos, file_buf);
+ if (*gtid_buf) {
+ global_sid_lock->rdlock();
+ binlog_max_gtid->parse(global_sid_map, gtid_buf);
+ global_sid_lock->unlock();
+ // NO_LINT_DEBUG
+ fprintf(stderr, "RocksDB: Last MySQL Gtid %s\n", gtid_buf);
+ }
+ }
+ }
+ }
+#endif
+
+ if (len == 0 || xid_list == nullptr) {
+ return HA_EXIT_SUCCESS;
+ }
+
+ std::vector<rocksdb::Transaction *> trans_list;
+ rdb->GetAllPreparedTransactions(&trans_list);
+
+ uint count = 0;
+ for (auto &trans : trans_list) {
+ if (count >= len) {
+ break;
+ }
+ auto name = trans->GetName();
+ rdb_xid_from_string(name, &xid_list[count]);
+ count++;
+ }
+ return count;
+}
+
+
+/*
+ Handle a commit checkpoint request from server layer.
+
+ InnoDB does this:
+ We put the request in a queue, so that we can notify upper layer about
+ checkpoint complete when we have flushed the redo log.
+ If we have already flushed all relevant redo log, we notify immediately.
+
+ MariaRocks just flushes everything right away ATM
+*/
+
+static void rocksdb_checkpoint_request(handlerton *hton,
+ void *cookie)
+{
+ const rocksdb::Status s= rdb->SyncWAL();
+ //TODO: what to do on error?
+ if (s.ok())
+ {
+ rocksdb_wal_group_syncs++;
+ commit_checkpoint_notify_ha(hton, cookie);
+ }
+}
+
+/*
+ @param all: TRUE - commit the transaction
+ FALSE - SQL statement ended
+*/
+static void rocksdb_commit_ordered(handlerton *hton, THD* thd, bool all)
+{
+ // Same assert as InnoDB has
+ DBUG_ASSERT(all || (!thd_test_options(thd, OPTION_NOT_AUTOCOMMIT |
+ OPTION_BEGIN)));
+ Rdb_transaction *tx = get_tx_from_thd(thd);
+ if (!tx->is_two_phase()) {
+ /*
+ ordered_commit is supposedly slower as it is done sequentially
+ in order to preserve commit order.
+
+ if we are not required do 2-phase commit with the binlog, do not do
+ anything here.
+ */
+ return;
+ }
+
+ tx->set_sync(false);
+
+ /* This will note the master position also */
+ tx->commit_ordered_res= tx->commit();
+ tx->commit_ordered_done= true;
+
+}
+
+
+static int rocksdb_commit(handlerton* hton, THD* thd, bool commit_tx)
+{
+ DBUG_ENTER_FUNC();
+
+ DBUG_ASSERT(hton != nullptr);
+ DBUG_ASSERT(thd != nullptr);
+ DBUG_ASSERT(commit_latency_stats != nullptr);
+
+ rocksdb::StopWatchNano timer(rocksdb::Env::Default(), true);
+
+ /* note: h->external_lock(F_UNLCK) is called after this function is called) */
+ Rdb_transaction *tx = get_tx_from_thd(thd);
+
+ /* this will trigger saving of perf_context information */
+ Rdb_perf_context_guard guard(tx, rocksdb_perf_context_level(thd));
+
+ if (tx != nullptr) {
+ if (commit_tx || (!my_core::thd_test_options(
+ thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN))) {
+ /*
+ This will not add anything to commit_latency_stats, and this is correct
+ right?
+ */
+ if (tx->commit_ordered_done)
+ {
+ thd_wakeup_subsequent_commits(thd, 0);
+ DBUG_RETURN((tx->commit_ordered_res? HA_ERR_INTERNAL_ERROR: 0));
+ }
+
+ /*
+ We get here
+ - For a COMMIT statement that finishes a multi-statement transaction
+ - For a statement that has its own transaction
+ */
+ if (thd->slave_thread)
+ {
+ // An attempt to make parallel slave performant (not fully successful,
+ // see MDEV-15372):
+
+ // First, commit without syncing. This establishes the commit order
+ tx->set_sync(false);
+ bool tx_had_writes = tx->get_write_count()? true : false ;
+ if (tx->commit()) {
+ DBUG_RETURN(HA_ERR_ROCKSDB_COMMIT_FAILED);
+ }
+ thd_wakeup_subsequent_commits(thd, 0);
+
+ if (tx_had_writes && rocksdb_flush_log_at_trx_commit == FLUSH_LOG_SYNC)
+ {
+ rocksdb::Status s= rdb->FlushWAL(true);
+ if (!s.ok())
+ DBUG_RETURN(HA_ERR_INTERNAL_ERROR);
+ }
+ }
+ else
+ {
+ /* Not a slave thread */
+ if (tx->commit()) {
+ DBUG_RETURN(HA_ERR_ROCKSDB_COMMIT_FAILED);
+ }
+ }
+ } else {
+ /*
+ We get here when committing a statement within a transaction.
+ */
+ tx->make_stmt_savepoint_permanent();
+ }
+
+ if (my_core::thd_tx_isolation(thd) <= ISO_READ_COMMITTED) {
+ // For READ_COMMITTED, we release any existing snapshot so that we will
+ // see any changes that occurred since the last statement.
+ tx->release_snapshot();
+ }
+ }
+
+ // `Add()` is implemented in a thread-safe manner.
+ commit_latency_stats->Add(timer.ElapsedNanos() / 1000);
+
+ DBUG_RETURN(HA_EXIT_SUCCESS);
+}
+
+
+static int rocksdb_rollback(handlerton *const hton, THD *const thd,
+ bool rollback_tx) {
+ Rdb_transaction *tx = get_tx_from_thd(thd);
+ Rdb_perf_context_guard guard(tx, rocksdb_perf_context_level(thd));
+
+ if (tx != nullptr) {
+ if (rollback_tx) {
+ /*
+ We get here, when
+ - ROLLBACK statement is issued.
+
+ Discard the changes made by the transaction
+ */
+ tx->rollback();
+ } else {
+ /*
+ We get here when
+ - a statement with AUTOCOMMIT=1 is being rolled back (because of some
+ error)
+ - a statement inside a transaction is rolled back
+ */
+
+ tx->rollback_stmt();
+ tx->set_tx_failed(true);
+ }
+
+ if (my_core::thd_tx_isolation(thd) <= ISO_READ_COMMITTED) {
+ // For READ_COMMITTED, we release any existing snapshot so that we will
+ // see any changes that occurred since the last statement.
+ tx->release_snapshot();
+ }
+ }
+ return HA_EXIT_SUCCESS;
+}
+
+static bool print_stats(THD *const thd, std::string const &type,
+ std::string const &name, std::string const &status,
+ stat_print_fn *stat_print) {
+ return stat_print(thd, type.c_str(), type.size(), name.c_str(), name.size(),
+ status.c_str(), status.size());
+}
+
+static std::string format_string(const char *const format, ...) {
+ std::string res;
+ va_list args;
+ va_list args_copy;
+ char static_buff[256];
+
+ DBUG_ASSERT(format != nullptr);
+
+ va_start(args, format);
+ va_copy(args_copy, args);
+
+ // Calculate how much space we will need
+ int len = vsnprintf(nullptr, 0, format, args);
+ va_end(args);
+
+ if (len < 0) {
+ res = std::string("<format error>");
+ } else if (len == 0) {
+ // Shortcut for an empty string
+ res = std::string("");
+ } else {
+ // For short enough output use a static buffer
+ char *buff = static_buff;
+ std::unique_ptr<char[]> dynamic_buff = nullptr;
+
+ len++; // Add one for null terminator
+
+ // for longer output use an allocated buffer
+ if (static_cast<uint>(len) > sizeof(static_buff)) {
+ dynamic_buff.reset(new char[len]);
+ buff = dynamic_buff.get();
+ }
+
+ // Now re-do the vsnprintf with the buffer which is now large enough
+ (void)vsnprintf(buff, len, format, args_copy);
+
+ // Convert to a std::string. Note we could have created a std::string
+ // large enough and then converted the buffer to a 'char*' and created
+ // the output in place. This would probably work but feels like a hack.
+ // Since this isn't code that needs to be super-performant we are going
+ // with this 'safer' method.
+ res = std::string(buff);
+ }
+
+ va_end(args_copy);
+
+ return res;
+}
+
+class Rdb_snapshot_status : public Rdb_tx_list_walker {
+ private:
+ std::string m_data;
+
+ static std::string current_timestamp(void) {
+ static const char *const format = "%d-%02d-%02d %02d:%02d:%02d";
+ time_t currtime;
+ struct tm currtm;
+
+ time(&currtime);
+
+ localtime_r(&currtime, &currtm);
+
+ return format_string(format, currtm.tm_year + 1900, currtm.tm_mon + 1,
+ currtm.tm_mday, currtm.tm_hour, currtm.tm_min,
+ currtm.tm_sec);
+ }
+
+ static std::string get_header(void) {
+ return "\n============================================================\n" +
+ current_timestamp() +
+ " ROCKSDB TRANSACTION MONITOR OUTPUT\n"
+ "============================================================\n"
+ "---------\n"
+ "SNAPSHOTS\n"
+ "---------\n"
+ "LIST OF SNAPSHOTS FOR EACH SESSION:\n";
+ }
+
+ static std::string get_footer(void) {
+ return "-----------------------------------------\n"
+ "END OF ROCKSDB TRANSACTION MONITOR OUTPUT\n"
+ "=========================================\n";
+ }
+
+ static Rdb_deadlock_info::Rdb_dl_trx_info get_dl_txn_info(
+ const rocksdb::DeadlockInfo &txn, const GL_INDEX_ID &gl_index_id) {
+ Rdb_deadlock_info::Rdb_dl_trx_info txn_data;
+
+ txn_data.trx_id = txn.m_txn_id;
+
+ txn_data.table_name = ddl_manager.safe_get_table_name(gl_index_id);
+ if (txn_data.table_name.empty()) {
+ txn_data.table_name =
+ "NOT FOUND; INDEX_ID: " + std::to_string(gl_index_id.index_id);
+ }
+
+ auto kd = ddl_manager.safe_find(gl_index_id);
+ txn_data.index_name =
+ (kd) ? kd->get_name()
+ : "NOT FOUND; INDEX_ID: " + std::to_string(gl_index_id.index_id);
+
+ rocksdb::ColumnFamilyHandle *cfh = cf_manager.get_cf(txn.m_cf_id);
+ txn_data.cf_name = cfh->GetName();
+
+ txn_data.waiting_key =
+ rdb_hexdump(txn.m_waiting_key.c_str(), txn.m_waiting_key.length());
+
+ txn_data.exclusive_lock = txn.m_exclusive;
+
+ return txn_data;
+ }
+
+ static Rdb_deadlock_info get_dl_path_trx_info(
+ const rocksdb::DeadlockPath &path_entry) {
+ Rdb_deadlock_info deadlock_info;
+
+ for (auto it = path_entry.path.begin(); it != path_entry.path.end(); it++) {
+ const auto &txn = *it;
+ const GL_INDEX_ID gl_index_id = {
+ txn.m_cf_id, rdb_netbuf_to_uint32(reinterpret_cast<const uchar *>(
+ txn.m_waiting_key.c_str()))};
+ deadlock_info.path.push_back(get_dl_txn_info(txn, gl_index_id));
+ }
+ DBUG_ASSERT_IFF(path_entry.limit_exceeded, path_entry.path.empty());
+ /* print the first txn in the path to display the full deadlock cycle */
+ if (!path_entry.path.empty() && !path_entry.limit_exceeded) {
+ const auto &deadlocking_txn = *(path_entry.path.end() - 1);
+ deadlock_info.victim_trx_id = deadlocking_txn.m_txn_id;
+ deadlock_info.deadlock_time = path_entry.deadlock_time;
+ }
+ return deadlock_info;
+ }
+
+ public:
+ Rdb_snapshot_status() : m_data(get_header()) {}
+
+ std::string getResult() { return m_data + get_footer(); }
+
+ /* Implement Rdb_transaction interface */
+ /* Create one row in the snapshot status table */
+ void process_tran(const Rdb_transaction *const tx) override {
+ DBUG_ASSERT(tx != nullptr);
+
+ /* Calculate the duration the snapshot has existed */
+ int64_t snapshot_timestamp = tx->m_snapshot_timestamp;
+ if (snapshot_timestamp != 0) {
+ int64_t curr_time;
+ rdb->GetEnv()->GetCurrentTime(&curr_time);
+
+ char buffer[1024];
+#ifdef MARIAROCKS_NOT_YET
+ thd_security_context(tx->get_thd(), buffer, sizeof buffer, 0);
+#endif
+ m_data += format_string(
+ "---SNAPSHOT, ACTIVE %lld sec\n"
+ "%s\n"
+ "lock count %llu, write count %llu\n"
+ "insert count %llu, update count %llu, delete count %llu\n",
+ (longlong)(curr_time - snapshot_timestamp), buffer, tx->get_lock_count(),
+ tx->get_write_count(), tx->get_insert_count(), tx->get_update_count(),
+ tx->get_delete_count());
+ }
+ }
+
+ void populate_deadlock_buffer() {
+ auto dlock_buffer = rdb->GetDeadlockInfoBuffer();
+ m_data += "----------LATEST DETECTED DEADLOCKS----------\n";
+
+ for (const auto &path_entry : dlock_buffer) {
+ std::string path_data;
+ if (path_entry.limit_exceeded) {
+ path_data += "\n-------DEADLOCK EXCEEDED MAX DEPTH-------\n";
+ } else {
+ path_data +=
+ "\n*** DEADLOCK PATH\n"
+ "=========================================\n";
+ const auto dl_info = get_dl_path_trx_info(path_entry);
+ const auto deadlock_time = dl_info.deadlock_time;
+ for (auto it = dl_info.path.begin(); it != dl_info.path.end(); it++) {
+ const auto &trx_info = *it;
+ path_data += format_string(
+ "TIMESTAMP: %" PRId64
+ "\n"
+ "TRANSACTION ID: %u\n"
+ "COLUMN FAMILY NAME: %s\n"
+ "WAITING KEY: %s\n"
+ "LOCK TYPE: %s\n"
+ "INDEX NAME: %s\n"
+ "TABLE NAME: %s\n",
+ deadlock_time, trx_info.trx_id, trx_info.cf_name.c_str(),
+ trx_info.waiting_key.c_str(),
+ trx_info.exclusive_lock ? "EXCLUSIVE" : "SHARED",
+ trx_info.index_name.c_str(), trx_info.table_name.c_str());
+ if (it != dl_info.path.end() - 1) {
+ path_data += "---------------WAITING FOR---------------\n";
+ }
+ }
+ path_data += format_string(
+ "\n--------TRANSACTION ID: %u GOT DEADLOCK---------\n",
+ dl_info.victim_trx_id);
+ }
+ m_data += path_data;
+ }
+ }
+
+ std::vector<Rdb_deadlock_info> get_deadlock_info() {
+ std::vector<Rdb_deadlock_info> deadlock_info;
+ auto dlock_buffer = rdb->GetDeadlockInfoBuffer();
+ for (const auto &path_entry : dlock_buffer) {
+ if (!path_entry.limit_exceeded) {
+ deadlock_info.push_back(get_dl_path_trx_info(path_entry));
+ }
+ }
+ return deadlock_info;
+ }
+};
+
+/**
+ * @brief
+ * walks through all non-replication transactions and copies
+ * out relevant information for information_schema.rocksdb_trx
+ */
+class Rdb_trx_info_aggregator : public Rdb_tx_list_walker {
+ private:
+ std::vector<Rdb_trx_info> *m_trx_info;
+
+ public:
+ explicit Rdb_trx_info_aggregator(std::vector<Rdb_trx_info> *const trx_info)
+ : m_trx_info(trx_info) {}
+
+ void process_tran(const Rdb_transaction *const tx) override {
+ static const std::map<int, std::string> state_map = {
+ {rocksdb::Transaction::STARTED, "STARTED"},
+ {rocksdb::Transaction::AWAITING_PREPARE, "AWAITING_PREPARE"},
+ {rocksdb::Transaction::PREPARED, "PREPARED"},
+ {rocksdb::Transaction::AWAITING_COMMIT, "AWAITING_COMMIT"},
+ {rocksdb::Transaction::COMMITED, "COMMITED"},
+ {rocksdb::Transaction::AWAITING_ROLLBACK, "AWAITING_ROLLBACK"},
+ {rocksdb::Transaction::ROLLEDBACK, "ROLLEDBACK"},
+ };
+
+ DBUG_ASSERT(tx != nullptr);
+
+ THD *const thd = tx->get_thd();
+ ulong thread_id = thd_get_thread_id(thd);
+
+ if (tx->is_writebatch_trx()) {
+ const auto wb_impl = static_cast<const Rdb_writebatch_impl *>(tx);
+ DBUG_ASSERT(wb_impl);
+ m_trx_info->push_back(
+ {"", /* name */
+ 0, /* trx_id */
+ wb_impl->get_write_count(), 0, /* lock_count */
+ 0, /* timeout_sec */
+ "", /* state */
+ "", /* waiting_key */
+ 0, /* waiting_cf_id */
+ 1, /*is_replication */
+ 1, /* skip_trx_api */
+ wb_impl->is_tx_read_only(), 0, /* deadlock detection */
+ wb_impl->num_ongoing_bulk_load(), thread_id, "" /* query string */});
+ } else {
+ const auto tx_impl = static_cast<const Rdb_transaction_impl *>(tx);
+ DBUG_ASSERT(tx_impl);
+ const rocksdb::Transaction *rdb_trx = tx_impl->get_rdb_trx();
+
+ if (rdb_trx == nullptr) {
+ return;
+ }
+
+ char query_buf[NAME_LEN+1];
+ thd_query_safe(thd, query_buf, sizeof(query_buf));
+ std::string query_str(query_buf);
+
+ const auto state_it = state_map.find(rdb_trx->GetState());
+ DBUG_ASSERT(state_it != state_map.end());
+ const int is_replication = (thd->rgi_slave != nullptr);
+ uint32_t waiting_cf_id;
+ std::string waiting_key;
+ rdb_trx->GetWaitingTxns(&waiting_cf_id, &waiting_key),
+
+ m_trx_info->push_back(
+ {rdb_trx->GetName(), rdb_trx->GetID(), tx_impl->get_write_count(),
+ tx_impl->get_lock_count(), tx_impl->get_timeout_sec(),
+ state_it->second, waiting_key, waiting_cf_id, is_replication,
+ 0, /* skip_trx_api */
+ tx_impl->is_tx_read_only(), rdb_trx->IsDeadlockDetect(),
+ tx_impl->num_ongoing_bulk_load(), thread_id, query_str});
+ }
+ }
+};
+
+/*
+ returns a vector of info for all non-replication threads
+ for use by information_schema.rocksdb_trx
+*/
+std::vector<Rdb_trx_info> rdb_get_all_trx_info() {
+ std::vector<Rdb_trx_info> trx_info;
+ Rdb_trx_info_aggregator trx_info_agg(&trx_info);
+ Rdb_transaction::walk_tx_list(&trx_info_agg);
+ return trx_info;
+}
+
+
+/*
+ returns a vector of info of recent deadlocks
+ for use by information_schema.rocksdb_deadlock
+*/
+std::vector<Rdb_deadlock_info> rdb_get_deadlock_info() {
+ Rdb_snapshot_status showStatus;
+ Rdb_transaction::walk_tx_list(&showStatus);
+ return showStatus.get_deadlock_info();
+}
+
+#ifdef MARIAROCKS_NOT_YET
+/* Generate the snapshot status table */
+static bool rocksdb_show_snapshot_status(handlerton *const hton, THD *const thd,
+ stat_print_fn *const stat_print) {
+ Rdb_snapshot_status showStatus;
+
+ Rdb_transaction::walk_tx_list(&showStatus);
+ showStatus.populate_deadlock_buffer();
+
+ /* Send the result data back to MySQL */
+ return print_stats(thd, "rocksdb", "", showStatus.getResult(), stat_print);
+}
+#endif
+
+/*
+ This is called for SHOW ENGINE ROCKSDB STATUS | LOGS | etc.
+
+ For now, produce info about live files (which gives an imprecise idea about
+ what column families are there).
+*/
+static bool rocksdb_show_status(handlerton *const hton, THD *const thd,
+ stat_print_fn *const stat_print,
+ enum ha_stat_type stat_type) {
+ DBUG_ASSERT(hton != nullptr);
+ DBUG_ASSERT(thd != nullptr);
+ DBUG_ASSERT(stat_print != nullptr);
+
+ bool res = false;
+ char buf[100] = {'\0'};
+
+ if (stat_type == HA_ENGINE_STATUS) {
+ DBUG_ASSERT(rdb != nullptr);
+
+ std::string str;
+
+ /* Global DB Statistics */
+ if (rocksdb_stats) {
+ str = rocksdb_stats->ToString();
+
+ // Use the same format as internal RocksDB statistics entries to make
+ // sure that output will look unified.
+ DBUG_ASSERT(commit_latency_stats != nullptr);
+
+ snprintf(buf, sizeof(buf),
+ "rocksdb.commit_latency statistics "
+ "Percentiles :=> 50 : %.2f 95 : %.2f "
+ "99 : %.2f 100 : %.2f\n",
+ commit_latency_stats->Percentile(50),
+ commit_latency_stats->Percentile(95),
+ commit_latency_stats->Percentile(99),
+ commit_latency_stats->Percentile(100));
+ str.append(buf);
+
+ uint64_t v = 0;
+
+ // Retrieve additional stalling related numbers from RocksDB and append
+ // them to the buffer meant for displaying detailed statistics. The intent
+ // here is to avoid adding another row to the query output because of
+ // just two numbers.
+ //
+ // NB! We're replacing hyphens with underscores in output to better match
+ // the existing naming convention.
+ if (rdb->GetIntProperty("rocksdb.is-write-stopped", &v)) {
+ snprintf(buf, sizeof(buf), "rocksdb.is_write_stopped COUNT : %llu\n", (ulonglong)v);
+ str.append(buf);
+ }
+
+ if (rdb->GetIntProperty("rocksdb.actual-delayed-write-rate", &v)) {
+ snprintf(buf, sizeof(buf),
+ "COUNT : %llu\n",
+ (ulonglong)v);
+ str.append(buf);
+ }
+
+ res |= print_stats(thd, "STATISTICS", "rocksdb", str, stat_print);
+ }
+
+ /* Per DB stats */
+ if (rdb->GetProperty("rocksdb.dbstats", &str)) {
+ res |= print_stats(thd, "DBSTATS", "rocksdb", str, stat_print);
+ }
+
+ /* Per column family stats */
+ for (const auto &cf_name : cf_manager.get_cf_names()) {
+ rocksdb::ColumnFamilyHandle *cfh = cf_manager.get_cf(cf_name);
+ if (cfh == nullptr) {
+ continue;
+ }
+
+ if (!rdb->GetProperty(cfh, "rocksdb.cfstats", &str)) {
+ continue;
+ }
+
+ res |= print_stats(thd, "CF_COMPACTION", cf_name, str, stat_print);
+ }
+
+ /* Memory Statistics */
+ std::vector<rocksdb::DB *> dbs;
+ std::unordered_set<const rocksdb::Cache *> cache_set;
+ size_t internal_cache_count = 0;
+ size_t kDefaultInternalCacheSize = 8 * 1024 * 1024;
+
+ dbs.push_back(rdb);
+ cache_set.insert(rocksdb_tbl_options->block_cache.get());
+
+ for (const auto &cf_handle : cf_manager.get_all_cf()) {
+ rocksdb::ColumnFamilyDescriptor cf_desc;
+ cf_handle->GetDescriptor(&cf_desc);
+ auto *const table_factory = cf_desc.options.table_factory.get();
+
+ if (table_factory != nullptr) {
+ std::string tf_name = table_factory->Name();
+
+ if (tf_name.find("BlockBasedTable") != std::string::npos) {
+ const rocksdb::BlockBasedTableOptions *const bbt_opt =
+ reinterpret_cast<rocksdb::BlockBasedTableOptions *>(
+ table_factory->GetOptions());
+
+ if (bbt_opt != nullptr) {
+ if (bbt_opt->block_cache.get() != nullptr) {
+ cache_set.insert(bbt_opt->block_cache.get());
+ } else {
+ internal_cache_count++;
+ }
+ cache_set.insert(bbt_opt->block_cache_compressed.get());
+ }
+ }
+ }
+ }
+
+ std::map<rocksdb::MemoryUtil::UsageType, uint64_t> temp_usage_by_type;
+ str.clear();
+ rocksdb::MemoryUtil::GetApproximateMemoryUsageByType(dbs, cache_set,
+ &temp_usage_by_type);
+ snprintf(buf, sizeof(buf), "\nMemTable Total: %llu",
+ (ulonglong)temp_usage_by_type[rocksdb::MemoryUtil::kMemTableTotal]);
+ str.append(buf);
+ snprintf(buf, sizeof(buf), "\nMemTable Unflushed: %llu",
+ (ulonglong)temp_usage_by_type[rocksdb::MemoryUtil::kMemTableUnFlushed]);
+ str.append(buf);
+ snprintf(buf, sizeof(buf), "\nTable Readers Total: %llu",
+ (ulonglong)temp_usage_by_type[rocksdb::MemoryUtil::kTableReadersTotal]);
+ str.append(buf);
+ snprintf(buf, sizeof(buf), "\nCache Total: %llu",
+ (ulonglong)temp_usage_by_type[rocksdb::MemoryUtil::kCacheTotal]);
+ str.append(buf);
+ snprintf(buf, sizeof(buf), "\nDefault Cache Capacity: %llu",
+ (ulonglong)internal_cache_count * kDefaultInternalCacheSize);
+ str.append(buf);
+ res |= print_stats(thd, "MEMORY_STATS", "rocksdb", str, stat_print);
+
+ /* Show the background thread status */
+ std::vector<rocksdb::ThreadStatus> thread_list;
+ rocksdb::Status s = rdb->GetEnv()->GetThreadList(&thread_list);
+
+ if (!s.ok()) {
+ // NO_LINT_DEBUG
+ sql_print_error("RocksDB: Returned error (%s) from GetThreadList.\n",
+ s.ToString().c_str());
+ res |= true;
+ } else {
+ /* For each background thread retrieved, print out its information */
+ for (auto &it : thread_list) {
+ /* Only look at background threads. Ignore user threads, if any. */
+ if (it.thread_type > rocksdb::ThreadStatus::LOW_PRIORITY) {
+ continue;
+ }
+
+ str = "\nthread_type: " + it.GetThreadTypeName(it.thread_type) +
+ "\ncf_name: " + it.cf_name +
+ "\noperation_type: " + it.GetOperationName(it.operation_type) +
+ "\noperation_stage: " +
+ it.GetOperationStageName(it.operation_stage) +
+ "\nelapsed_time_ms: " + it.MicrosToString(it.op_elapsed_micros);
+
+ for (auto &it_props : it.InterpretOperationProperties(
+ it.operation_type, it.op_properties)) {
+ str += "\n" + it_props.first + ": " + std::to_string(it_props.second);
+ }
+
+ str += "\nstate_type: " + it.GetStateName(it.state_type);
+
+ res |= print_stats(thd, "BG_THREADS", std::to_string(it.thread_id), str,
+ stat_print);
+ }
+ }
+
+#ifdef MARIAROCKS_NOT_YET
+ /* Explicit snapshot information */
+ str = Rdb_explicit_snapshot::dump_snapshots();
+#endif
+
+ if (!str.empty()) {
+ res |= print_stats(thd, "EXPLICIT_SNAPSHOTS", "rocksdb", str, stat_print);
+ }
+#ifdef MARIAROCKS_NOT_YET
+ } else if (stat_type == HA_ENGINE_TRX) {
+ /* Handle the SHOW ENGINE ROCKSDB TRANSACTION STATUS command */
+ res |= rocksdb_show_snapshot_status(hton, thd, stat_print);
+#endif
+ }
+ return res;
+}
+
+static inline void rocksdb_register_tx(handlerton *const hton, THD *const thd,
+ Rdb_transaction *const tx) {
+ DBUG_ASSERT(tx != nullptr);
+
+ trans_register_ha(thd, FALSE, rocksdb_hton);
+ if (my_core::thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)) {
+ tx->start_stmt();
+ trans_register_ha(thd, TRUE, rocksdb_hton);
+ }
+}
+
+static const char *ha_rocksdb_exts[] = {NullS};
+
+#ifdef MARIAROCKS_NOT_YET
+static bool rocksdb_explicit_snapshot(
+ handlerton *const /* hton */, /*!< in: RocksDB handlerton */
+ THD *const thd, /*!< in: MySQL thread handle */
+ snapshot_info_st *ss_info) /*!< out: Snapshot information */
+{
+ switch (ss_info->op) {
+ case snapshot_operation::SNAPSHOT_CREATE: {
+ if (mysql_bin_log_is_open()) {
+ mysql_bin_log_lock_commits(ss_info);
+ }
+ auto s = Rdb_explicit_snapshot::create(ss_info, rdb, rdb->GetSnapshot());
+ if (mysql_bin_log_is_open()) {
+ mysql_bin_log_unlock_commits(ss_info);
+ }
+
+ thd->set_explicit_snapshot(s);
+ return s == nullptr;
+ }
+ case snapshot_operation::SNAPSHOT_ATTACH: {
+ auto s = Rdb_explicit_snapshot::get(ss_info->snapshot_id);
+ if (!s) {
+ return true;
+ }
+ *ss_info = s->ss_info;
+ thd->set_explicit_snapshot(s);
+ return false;
+ }
+ case snapshot_operation::SNAPSHOT_RELEASE: {
+ if (!thd->get_explicit_snapshot()) {
+ return true;
+ }
+ *ss_info = thd->get_explicit_snapshot()->ss_info;
+ thd->set_explicit_snapshot(nullptr);
+ return false;
+ }
+ default:
+ DBUG_ASSERT(false);
+ return true;
+ }
+ return true;
+}
+#endif
+
+/*
+ Supporting START TRANSACTION WITH CONSISTENT [ROCKSDB] SNAPSHOT
+
+ Features:
+ 1. Supporting START TRANSACTION WITH CONSISTENT SNAPSHOT
+ 2. Getting current binlog position in addition to #1.
+
+ The second feature is done by START TRANSACTION WITH
+ CONSISTENT ROCKSDB SNAPSHOT. This is Facebook's extension, and
+ it works like existing START TRANSACTION WITH CONSISTENT INNODB SNAPSHOT.
+
+ - When not setting engine, START TRANSACTION WITH CONSISTENT SNAPSHOT
+ takes both InnoDB and RocksDB snapshots, and both InnoDB and RocksDB
+ participate in transaction. When executing COMMIT, both InnoDB and
+ RocksDB modifications are committed. Remember that XA is not supported yet,
+ so mixing engines is not recommended anyway.
+
+ - When setting engine, START TRANSACTION WITH CONSISTENT.. takes
+ snapshot for the specified engine only. But it starts both
+ InnoDB and RocksDB transactions.
+*/
+static int rocksdb_start_tx_and_assign_read_view(
+ handlerton *const hton, /*!< in: RocksDB handlerton */
+ THD *const thd /*!< in: MySQL thread handle of the
+ user for whom the transaction should
+ be committed */
+)
+#ifdef MARIAROCKS_NOT_YET
+ snapshot_info_st *ss_info) /*!< in/out: Snapshot info like binlog file, pos,
+ gtid executed and snapshot ID */
+#endif
+{
+ ulong const tx_isolation = my_core::thd_tx_isolation(thd);
+
+ if (tx_isolation != ISO_REPEATABLE_READ) {
+ my_error(ER_ISOLATION_LEVEL_WITH_CONSISTENT_SNAPSHOT, MYF(0));
+ return HA_EXIT_FAILURE;
+ }
+
+#ifdef MARIADB_NOT_YET
+ if (ss_info) {
+ if (mysql_bin_log_is_open()) {
+ mysql_bin_log_lock_commits(ss_info);
+ } else {
+ return HA_EXIT_FAILURE;
+ }
+#endif
+
+ /*
+ MariaDB: there is no need to call mysql_bin_log_lock_commits and then
+ unlock back.
+ SQL layer calls start_consistent_snapshot() for all engines, including the
+ binlog under LOCK_commit_ordered mutex.
+
+ The mutex prevents binlog commits from happening (right?) while the storage
+ engine(s) allocate read snapshots. That way, each storage engine is
+ synchronized with current binlog position.
+ */
+ mysql_mutex_assert_owner(&LOCK_commit_ordered);
+
+ Rdb_transaction *const tx = get_or_create_tx(thd);
+ Rdb_perf_context_guard guard(tx, rocksdb_perf_context_level(thd));
+
+ DBUG_ASSERT(!tx->has_snapshot());
+ tx->set_tx_read_only(true);
+ rocksdb_register_tx(hton, thd, tx);
+ tx->acquire_snapshot(true);
+
+#ifdef MARIADB_NOT_YET
+ if (ss_info) {
+ mysql_bin_log_unlock_commits(ss_info);
+ }
+#endif
+ return HA_EXIT_SUCCESS;
+}
+
+#ifdef MARIADB_NOT_YET
+static int rocksdb_start_tx_with_shared_read_view(
+ handlerton *const hton, /*!< in: RocksDB handlerton */
+ THD *const thd) /*!< in: MySQL thread handle of the
+ user for whom the transaction should
+ be committed */
+#ifdef MARIADB_NOT_YET
+ snapshot_info_st *ss_info) /*!< out: Snapshot info like binlog file, pos,
+ gtid executed and snapshot ID */
+#endif
+{
+ DBUG_ASSERT(thd != nullptr);
+
+ int error = HA_EXIT_SUCCESS;
+
+ ulong const tx_isolation = my_core::thd_tx_isolation(thd);
+ if (tx_isolation != ISO_REPEATABLE_READ) {
+ my_error(ER_ISOLATION_LEVEL_WITH_CONSISTENT_SNAPSHOT, MYF(0));
+ return HA_EXIT_FAILURE;
+ }
+
+ Rdb_transaction *tx = nullptr;
+#ifdef MARIADB_NOT_YET
+ std::shared_ptr<Rdb_explicit_snapshot> explicit_snapshot;
+ const auto op = ss_info->op;
+
+ DBUG_ASSERT(op == snapshot_operation::SNAPSHOT_CREATE ||
+ op == snapshot_operation::SNAPSHOT_ATTACH);
+
+ // case: if binlogs are available get binlog file/pos and gtid info
+ if (op == snapshot_operation::SNAPSHOT_CREATE && mysql_bin_log_is_open()) {
+ mysql_bin_log_lock_commits(ss_info);
+ }
+
+ if (op == snapshot_operation::SNAPSHOT_ATTACH) {
+ explicit_snapshot = Rdb_explicit_snapshot::get(ss_info->snapshot_id);
+ if (!explicit_snapshot) {
+ my_printf_error(ER_UNKNOWN_ERROR, "Snapshot %llu does not exist", MYF(0),
+ ss_info->snapshot_id);
+ error = HA_EXIT_FAILURE;
+ }
+ }
+#endif
+
+ // case: all good till now
+ if (error == HA_EXIT_SUCCESS) {
+ tx = get_or_create_tx(thd);
+ Rdb_perf_context_guard guard(tx, rocksdb_perf_context_level(thd));
+
+#ifdef MARIADB_NOT_YET
+ if (explicit_snapshot) {
+ tx->m_explicit_snapshot = explicit_snapshot;
+ }
+#endif
+
+ DBUG_ASSERT(!tx->has_snapshot());
+ tx->set_tx_read_only(true);
+ rocksdb_register_tx(hton, thd, tx);
+ tx->acquire_snapshot(true);
+
+#ifdef MARIADB_NOT_YET
+ // case: an explicit snapshot was not assigned to this transaction
+ if (!tx->m_explicit_snapshot) {
+ tx->m_explicit_snapshot =
+ Rdb_explicit_snapshot::create(ss_info, rdb, tx->m_read_opts.snapshot);
+ if (!tx->m_explicit_snapshot) {
+ my_printf_error(ER_UNKNOWN_ERROR, "Could not create snapshot", MYF(0));
+ error = HA_EXIT_FAILURE;
+ }
+ }
+#endif
+ }
+
+#ifdef MARIADB_NOT_YET
+ // case: unlock the binlog
+ if (op == snapshot_operation::SNAPSHOT_CREATE && mysql_bin_log_is_open()) {
+ mysql_bin_log_unlock_commits(ss_info);
+ }
+
+ DBUG_ASSERT(error == HA_EXIT_FAILURE || tx->m_explicit_snapshot);
+
+ // copy over the snapshot details to pass to the upper layers
+ if (tx->m_explicit_snapshot) {
+ *ss_info = tx->m_explicit_snapshot->ss_info;
+ ss_info->op = op;
+ }
+#endif
+
+ return error;
+}
+#endif
+
+/* Dummy SAVEPOINT support. This is needed for long running transactions
+ * like mysqldump (https://bugs.mysql.com/bug.php?id=71017).
+ * Current SAVEPOINT does not correctly handle ROLLBACK and does not return
+ * errors. This needs to be addressed in future versions (Issue#96).
+ */
+static int rocksdb_savepoint(handlerton *const hton, THD *const thd,
+ void *const savepoint) {
+ return HA_EXIT_SUCCESS;
+}
+
+static int rocksdb_rollback_to_savepoint(handlerton *const hton, THD *const thd,
+ void *const savepoint) {
+ Rdb_transaction *tx = get_tx_from_thd(thd);
+ return tx->rollback_to_savepoint(savepoint);
+}
+
+static bool rocksdb_rollback_to_savepoint_can_release_mdl(
+ handlerton *const /* hton */, THD *const /* thd */) {
+ return true;
+}
+
+#ifdef MARIAROCKS_NOT_YET
+/*
+ This is called for INFORMATION_SCHEMA
+*/
+static void rocksdb_update_table_stats(
+ /* per-table stats callback */
+ void (*cb)(const char *db, const char *tbl, bool is_partition,
+ my_io_perf_t *r, my_io_perf_t *w, my_io_perf_t *r_blob,
+ my_io_perf_t *r_primary, my_io_perf_t *r_secondary,
+ page_stats_t *page_stats, comp_stats_t *comp_stats,
+ int n_lock_wait, int n_lock_wait_timeout, int n_lock_deadlock,
+ const char *engine)) {
+ my_io_perf_t io_perf_read;
+ my_io_perf_t io_perf_write;
+ my_io_perf_t io_perf;
+ page_stats_t page_stats;
+ comp_stats_t comp_stats;
+ uint lock_wait_timeout_stats;
+ uint deadlock_stats;
+ uint lock_wait_stats;
+ std::vector<std::string> tablenames;
+
+ /*
+ Most of these are for innodb, so setting them to 0.
+ TODO: possibly separate out primary vs. secondary index reads
+ */
+ memset(&io_perf, 0, sizeof(io_perf));
+ memset(&page_stats, 0, sizeof(page_stats));
+ memset(&comp_stats, 0, sizeof(comp_stats));
+ memset(&io_perf_write, 0, sizeof(io_perf_write));
+
+ tablenames = rdb_open_tables.get_table_names();
+
+ for (const auto &it : tablenames) {
+ Rdb_table_handler *table_handler;
+ std::string str, dbname, tablename, partname;
+ char dbname_sys[NAME_LEN + 1];
+ char tablename_sys[NAME_LEN + 1];
+ bool is_partition;
+
+ if (rdb_normalize_tablename(it, &str) != HA_EXIT_SUCCESS) {
+ /* Function needs to return void because of the interface and we've
+ * detected an error which shouldn't happen. There's no way to let
+ * caller know that something failed.
+ */
+ SHIP_ASSERT(false);
+ return;
+ }
+
+ if (rdb_split_normalized_tablename(str, &dbname, &tablename, &partname)) {
+ continue;
+ }
+
+ is_partition = (partname.size() != 0);
+
+ table_handler = rdb_open_tables.get_table_handler(it.c_str());
+ if (table_handler == nullptr) {
+ continue;
+ }
+
+ io_perf_read.bytes = table_handler->m_io_perf_read.bytes.load();
+ io_perf_read.requests = table_handler->m_io_perf_read.requests.load();
+ io_perf_write.bytes = table_handler->m_io_perf_write.bytes.load();
+ io_perf_write.requests = table_handler->m_io_perf_write.requests.load();
+ lock_wait_timeout_stats = table_handler->m_lock_wait_timeout_counter.load();
+ deadlock_stats = table_handler->m_deadlock_counter.load();
+ lock_wait_stats =
+ table_handler->m_table_perf_context.m_value[PC_KEY_LOCK_WAIT_COUNT]
+ .load();
+
+ /*
+ Convert from rocksdb timer to mysql timer. RocksDB values are
+ in nanoseconds, but table statistics expect the value to be
+ in my_timer format.
+ */
+ io_perf_read.svc_time = my_core::microseconds_to_my_timer(
+ table_handler->m_io_perf_read.svc_time.load() / 1000);
+ io_perf_read.svc_time_max = my_core::microseconds_to_my_timer(
+ table_handler->m_io_perf_read.svc_time_max.load() / 1000);
+ io_perf_read.wait_time = my_core::microseconds_to_my_timer(
+ table_handler->m_io_perf_read.wait_time.load() / 1000);
+ io_perf_read.wait_time_max = my_core::microseconds_to_my_timer(
+ table_handler->m_io_perf_read.wait_time_max.load() / 1000);
+ io_perf_read.slow_ios = table_handler->m_io_perf_read.slow_ios.load();
+ rdb_open_tables.release_table_handler(table_handler);
+
+ /*
+ Table stats expects our database and table name to be in system encoding,
+ not filename format. Convert before calling callback.
+ */
+ my_core::filename_to_tablename(dbname.c_str(), dbname_sys,
+ sizeof(dbname_sys));
+ my_core::filename_to_tablename(tablename.c_str(), tablename_sys,
+ sizeof(tablename_sys));
+ (*cb)(dbname_sys, tablename_sys, is_partition, &io_perf_read,
+ &io_perf_write, &io_perf, &io_perf, &io_perf, &page_stats,
+ &comp_stats, lock_wait_stats, lock_wait_timeout_stats, deadlock_stats,
+ rocksdb_hton_name);
+ }
+}
+#endif
+static rocksdb::Status check_rocksdb_options_compatibility(
+ const char *const dbpath, const rocksdb::Options &main_opts,
+ const std::vector<rocksdb::ColumnFamilyDescriptor> &cf_descr) {
+ DBUG_ASSERT(rocksdb_datadir != nullptr);
+
+ rocksdb::DBOptions loaded_db_opt;
+ std::vector<rocksdb::ColumnFamilyDescriptor> loaded_cf_descs;
+ rocksdb::Status status =
+ LoadLatestOptions(dbpath, rocksdb::Env::Default(), &loaded_db_opt,
+ &loaded_cf_descs, rocksdb_ignore_unknown_options);
+
+ // If we're starting from scratch and there are no options saved yet then this
+ // is a valid case. Therefore we can't compare the current set of options to
+ // anything.
+ if (status.IsNotFound()) {
+ return rocksdb::Status::OK();
+ }
+
+ if (!status.ok()) {
+ return status;
+ }
+
+ if (loaded_cf_descs.size() != cf_descr.size()) {
+ return rocksdb::Status::NotSupported(
+ "Mismatched size of column family "
+ "descriptors.");
+ }
+
+ // Please see RocksDB documentation for more context about why we need to set
+ // user-defined functions and pointer-typed options manually.
+ for (size_t i = 0; i < loaded_cf_descs.size(); i++) {
+ loaded_cf_descs[i].options.compaction_filter =
+ cf_descr[i].options.compaction_filter;
+ loaded_cf_descs[i].options.compaction_filter_factory =
+ cf_descr[i].options.compaction_filter_factory;
+ loaded_cf_descs[i].options.comparator = cf_descr[i].options.comparator;
+ loaded_cf_descs[i].options.memtable_factory =
+ cf_descr[i].options.memtable_factory;
+ loaded_cf_descs[i].options.merge_operator =
+ cf_descr[i].options.merge_operator;
+ loaded_cf_descs[i].options.prefix_extractor =
+ cf_descr[i].options.prefix_extractor;
+ loaded_cf_descs[i].options.table_factory =
+ cf_descr[i].options.table_factory;
+ }
+
+ // This is the essence of the function - determine if it's safe to open the
+ // database or not.
+ status = CheckOptionsCompatibility(dbpath, rocksdb::Env::Default(), main_opts,
+ loaded_cf_descs,
+ rocksdb_ignore_unknown_options);
+
+ return status;
+}
+
+bool prevent_myrocks_loading= false;
+
+
+/*
+ Storage Engine initialization function, invoked when plugin is loaded.
+*/
+
+static int rocksdb_init_func(void *const p) {
+
+ DBUG_ENTER_FUNC();
+
+ if (prevent_myrocks_loading)
+ {
+ my_error(ER_INTERNAL_ERROR, MYF(0),
+ "Loading MyRocks plugin after it has been unloaded is not "
+ "supported. Please restart mysqld");
+ DBUG_RETURN(1);
+ }
+
+ if (rdb_check_rocksdb_corruption()) {
+ // NO_LINT_DEBUG
+ sql_print_error(
+ "RocksDB: There was a corruption detected in RockDB files. "
+ "Check error log emitted earlier for more details.");
+ if (rocksdb_allow_to_start_after_corruption) {
+ // NO_LINT_DEBUG
+ sql_print_information(
+ "RocksDB: Remove rocksdb_allow_to_start_after_corruption to prevent "
+ "server operating if RocksDB corruption is detected.");
+ } else {
+ // NO_LINT_DEBUG
+ sql_print_error(
+ "RocksDB: The server will exit normally and stop restart "
+ "attempts. Remove %s file from data directory and "
+ "start mysqld manually.",
+ rdb_corruption_marker_file_name().c_str());
+ exit(0);
+ }
+ }
+
+ // Validate the assumption about the size of ROCKSDB_SIZEOF_HIDDEN_PK_COLUMN.
+ static_assert(sizeof(longlong) == 8, "Assuming that longlong is 8 bytes.");
+
+ init_rocksdb_psi_keys();
+
+ rocksdb_hton = (handlerton *)p;
+
+ rdb_open_tables.init();
+ Ensure_cleanup rdb_open_tables_cleanup([]() { rdb_open_tables.free(); });
+
+#ifdef HAVE_PSI_INTERFACE
+ rdb_bg_thread.init(rdb_signal_bg_psi_mutex_key, rdb_signal_bg_psi_cond_key);
+ rdb_drop_idx_thread.init(rdb_signal_drop_idx_psi_mutex_key,
+ rdb_signal_drop_idx_psi_cond_key);
+ rdb_mc_thread.init(rdb_signal_mc_psi_mutex_key, rdb_signal_mc_psi_cond_key);
+#else
+ rdb_bg_thread.init();
+ rdb_drop_idx_thread.init();
+ rdb_mc_thread.init();
+#endif
+ mysql_mutex_init(rdb_collation_data_mutex_key, &rdb_collation_data_mutex,
+ MY_MUTEX_INIT_FAST);
+ mysql_mutex_init(rdb_mem_cmp_space_mutex_key, &rdb_mem_cmp_space_mutex,
+ MY_MUTEX_INIT_FAST);
+
+ const char* initial_rocksdb_datadir_for_ignore_dirs= rocksdb_datadir;
+ if (!strncmp(rocksdb_datadir, "./", 2))
+ initial_rocksdb_datadir_for_ignore_dirs += 2;
+ ignore_db_dirs_append(initial_rocksdb_datadir_for_ignore_dirs);
+
+#if defined(HAVE_PSI_INTERFACE)
+ rdb_collation_exceptions =
+ new Regex_list_handler(key_rwlock_collation_exception_list);
+#else
+ rdb_collation_exceptions = new Regex_list_handler();
+#endif
+
+ mysql_mutex_init(rdb_sysvars_psi_mutex_key, &rdb_sysvars_mutex,
+ MY_MUTEX_INIT_FAST);
+ mysql_mutex_init(rdb_block_cache_resize_mutex_key,
+ &rdb_block_cache_resize_mutex, MY_MUTEX_INIT_FAST);
+ Rdb_transaction::init_mutex();
+
+ rocksdb_hton->state = SHOW_OPTION_YES;
+ rocksdb_hton->create = rocksdb_create_handler;
+ rocksdb_hton->close_connection = rocksdb_close_connection;
+
+ rocksdb_hton->prepare = rocksdb_prepare;
+ rocksdb_hton->prepare_ordered = NULL; // Do not need it
+
+ rocksdb_hton->commit_by_xid = rocksdb_commit_by_xid;
+ rocksdb_hton->rollback_by_xid = rocksdb_rollback_by_xid;
+ rocksdb_hton->recover = rocksdb_recover;
+
+ rocksdb_hton->commit_ordered= rocksdb_commit_ordered;
+ rocksdb_hton->commit = rocksdb_commit;
+
+ rocksdb_hton->commit_checkpoint_request= rocksdb_checkpoint_request;
+
+ rocksdb_hton->rollback = rocksdb_rollback;
+ rocksdb_hton->show_status = rocksdb_show_status;
+#ifdef MARIADB_NOT_YET
+ rocksdb_hton->explicit_snapshot = rocksdb_explicit_snapshot;
+#endif
+ rocksdb_hton->start_consistent_snapshot =
+ rocksdb_start_tx_and_assign_read_view;
+#ifdef MARIADB_NOT_YET
+ rocksdb_hton->start_shared_snapshot = rocksdb_start_tx_with_shared_read_view;
+#endif
+ rocksdb_hton->savepoint_set = rocksdb_savepoint;
+ rocksdb_hton->savepoint_rollback = rocksdb_rollback_to_savepoint;
+ rocksdb_hton->savepoint_rollback_can_release_mdl =
+ rocksdb_rollback_to_savepoint_can_release_mdl;
+#ifdef MARIAROCKS_NOT_YET
+ rocksdb_hton->update_table_stats = rocksdb_update_table_stats;
+#endif // MARIAROCKS_NOT_YET
+
+ /*
+ Not needed in MariaDB:
+ rocksdb_hton->flush_logs = rocksdb_flush_wal;
+ rocksdb_hton->handle_single_table_select = rocksdb_handle_single_table_select;
+
+ */
+
+ rocksdb_hton->flags = HTON_TEMPORARY_NOT_SUPPORTED |
+ HTON_SUPPORTS_EXTENDED_KEYS | HTON_CAN_RECREATE;
+
+ rocksdb_hton->tablefile_extensions= ha_rocksdb_exts;
+ DBUG_ASSERT(!mysqld_embedded);
+
+ if (rocksdb_db_options->max_open_files > (long)open_files_limit) {
+ // NO_LINT_DEBUG
+ sql_print_information(
+ "RocksDB: rocksdb_max_open_files should not be "
+ "greater than the open_files_limit, effective value "
+ "of rocksdb_max_open_files is being set to "
+ "open_files_limit / 2.");
+ rocksdb_db_options->max_open_files = open_files_limit / 2;
+ } else if (rocksdb_db_options->max_open_files == -2) {
+ rocksdb_db_options->max_open_files = open_files_limit / 2;
+ }
+
+#if 0 // MARIAROCKS_NOT_YET : read-free replication is not supported
+ rdb_read_free_regex_handler.set_patterns(DEFAULT_READ_FREE_RPL_TABLES);
+#endif
+
+ rocksdb_stats = rocksdb::CreateDBStatistics();
+ rocksdb_stats->set_stats_level(
+ static_cast<rocksdb::StatsLevel>(rocksdb_stats_level));
+ rocksdb_stats_level = rocksdb_stats->get_stats_level();
+ rocksdb_db_options->statistics = rocksdb_stats;
+
+ if (rocksdb_rate_limiter_bytes_per_sec != 0) {
+ rocksdb_rate_limiter.reset(
+ rocksdb::NewGenericRateLimiter(rocksdb_rate_limiter_bytes_per_sec));
+ rocksdb_db_options->rate_limiter = rocksdb_rate_limiter;
+ }
+
+ rocksdb_db_options->delayed_write_rate = rocksdb_delayed_write_rate;
+
+ std::shared_ptr<Rdb_logger> myrocks_logger = std::make_shared<Rdb_logger>();
+ rocksdb::Status s = rocksdb::CreateLoggerFromOptions(
+ rocksdb_datadir, *rocksdb_db_options, &rocksdb_db_options->info_log);
+ if (s.ok()) {
+ myrocks_logger->SetRocksDBLogger(rocksdb_db_options->info_log);
+ }
+
+ rocksdb_db_options->info_log = myrocks_logger;
+ myrocks_logger->SetInfoLogLevel(
+ static_cast<rocksdb::InfoLogLevel>(rocksdb_info_log_level));
+ rocksdb_db_options->wal_dir = rocksdb_wal_dir;
+
+ rocksdb_db_options->wal_recovery_mode =
+ static_cast<rocksdb::WALRecoveryMode>(rocksdb_wal_recovery_mode);
+
+ rocksdb_db_options->access_hint_on_compaction_start =
+ static_cast<rocksdb::Options::AccessHint>(
+ rocksdb_access_hint_on_compaction_start);
+
+ if (rocksdb_db_options->allow_mmap_reads &&
+ rocksdb_db_options->use_direct_reads) {
+ // allow_mmap_reads implies !use_direct_reads and RocksDB will not open if
+ // mmap_reads and direct_reads are both on. (NO_LINT_DEBUG)
+ sql_print_error(
+ "RocksDB: Can't enable both use_direct_reads "
+ "and allow_mmap_reads\n");
+ DBUG_RETURN(HA_EXIT_FAILURE);
+ }
+
+ // Check whether the filesystem backing rocksdb_datadir allows O_DIRECT
+ if (rocksdb_db_options->use_direct_reads ||
+ rocksdb_db_options->use_direct_io_for_flush_and_compaction) {
+ rocksdb::EnvOptions soptions;
+ rocksdb::Status check_status;
+ rocksdb::Env *const env = rocksdb_db_options->env;
+
+ std::string fname = format_string("%s/DIRECT_CHECK", rocksdb_datadir);
+ if (env->FileExists(fname).ok()) {
+ std::unique_ptr<rocksdb::SequentialFile> file;
+ soptions.use_direct_reads = true;
+ check_status = env->NewSequentialFile(fname, &file, soptions);
+ } else {
+ std::unique_ptr<rocksdb::WritableFile> file;
+ soptions.use_direct_writes = true;
+ check_status = env->ReopenWritableFile(fname, &file, soptions);
+ if (file != nullptr) {
+ file->Close();
+ }
+ env->DeleteFile(fname);
+ }
+
+ if (!check_status.ok()) {
+ // NO_LINT_DEBUG
+ sql_print_error(
+ "RocksDB: Unable to use direct io in rocksdb-datadir:"
+ "(%s)",
+ check_status.getState());
+ DBUG_RETURN(HA_EXIT_FAILURE);
+ }
+ }
+
+ if (rocksdb_db_options->allow_mmap_writes &&
+ rocksdb_db_options->use_direct_io_for_flush_and_compaction) {
+ // See above comment for allow_mmap_reads. (NO_LINT_DEBUG)
+ sql_print_error(
+ "RocksDB: Can't enable both "
+ "use_direct_io_for_flush_and_compaction and "
+ "allow_mmap_writes\n");
+ DBUG_RETURN(HA_EXIT_FAILURE);
+ }
+
+ if (rocksdb_db_options->allow_mmap_writes &&
+ rocksdb_flush_log_at_trx_commit != FLUSH_LOG_NEVER) {
+ // NO_LINT_DEBUG
+ sql_print_error(
+ "RocksDB: rocksdb_flush_log_at_trx_commit needs to be 0 "
+ "to use allow_mmap_writes");
+ DBUG_RETURN(HA_EXIT_FAILURE);
+ }
+
+ // sst_file_manager will move deleted rocksdb sst files to trash_dir
+ // to be deleted in a background thread.
+ std::string trash_dir = std::string(rocksdb_datadir) + "/trash";
+ rocksdb_db_options->sst_file_manager.reset(NewSstFileManager(
+ rocksdb_db_options->env, myrocks_logger, trash_dir,
+ rocksdb_sst_mgr_rate_bytes_per_sec, true /* delete_existing_trash */));
+
+ std::vector<std::string> cf_names;
+ rocksdb::Status status;
+ status = rocksdb::DB::ListColumnFamilies(*rocksdb_db_options, rocksdb_datadir,
+ &cf_names);
+ if (!status.ok()) {
+ /*
+ When we start on an empty datadir, ListColumnFamilies returns IOError,
+ and RocksDB doesn't provide any way to check what kind of error it was.
+ Checking system errno happens to work right now.
+ */
+ if (status.IsIOError()
+#ifndef _WIN32
+ && errno == ENOENT
+#endif
+ ) {
+ sql_print_information("RocksDB: Got ENOENT when listing column families");
+
+ // NO_LINT_DEBUG
+ sql_print_information(
+ "RocksDB: assuming that we're creating a new database");
+ } else {
+ rdb_log_status_error(status, "Error listing column families");
+ DBUG_RETURN(HA_EXIT_FAILURE);
+ }
+ } else {
+ // NO_LINT_DEBUG
+ sql_print_information("RocksDB: %ld column families found",
+ cf_names.size());
+ }
+
+ std::vector<rocksdb::ColumnFamilyDescriptor> cf_descr;
+ std::vector<rocksdb::ColumnFamilyHandle *> cf_handles;
+
+ rocksdb_tbl_options->index_type =
+ (rocksdb::BlockBasedTableOptions::IndexType)rocksdb_index_type;
+
+ if (!rocksdb_tbl_options->no_block_cache) {
+ std::shared_ptr<rocksdb::MemoryAllocator> memory_allocator;
+ if (!rocksdb_cache_dump) {
+ size_t block_size = rocksdb_tbl_options->block_size;
+ rocksdb::JemallocAllocatorOptions alloc_opt;
+ // Limit jemalloc tcache memory usage. The range
+ // [block_size/4, block_size] should be enough to cover most of
+ // block cache allocation sizes.
+ alloc_opt.limit_tcache_size = true;
+ alloc_opt.tcache_size_lower_bound = block_size / 4;
+ alloc_opt.tcache_size_upper_bound = block_size;
+ rocksdb::Status new_alloc_status =
+ rocksdb::NewJemallocNodumpAllocator(alloc_opt, &memory_allocator);
+ if (!new_alloc_status.ok()) {
+ // Fallback to use default malloc/free.
+ rdb_log_status_error(new_alloc_status,
+ "Error excluding block cache from core dump");
+ memory_allocator = nullptr;
+ DBUG_RETURN(HA_EXIT_FAILURE);
+ }
+ }
+ std::shared_ptr<rocksdb::Cache> block_cache =
+ rocksdb_use_clock_cache
+ ? rocksdb::NewClockCache(rocksdb_block_cache_size)
+ : rocksdb::NewLRUCache(
+ rocksdb_block_cache_size, -1 /*num_shard_bits*/,
+ false /*strict_capcity_limit*/,
+ rocksdb_cache_high_pri_pool_ratio, memory_allocator);
+ if (rocksdb_sim_cache_size > 0) {
+ // Simulated cache enabled
+ // Wrap block cache inside a simulated cache and pass it to RocksDB
+ rocksdb_tbl_options->block_cache =
+ rocksdb::NewSimCache(block_cache, rocksdb_sim_cache_size, 6);
+ } else {
+ // Pass block cache to RocksDB
+ rocksdb_tbl_options->block_cache = block_cache;
+ }
+ }
+ // Using newer BlockBasedTable format version for better compression
+ // and better memory allocation.
+ // See:
+ // https://github.com/facebook/rocksdb/commit/9ab5adfc59a621d12357580c94451d9f7320c2dd
+ rocksdb_tbl_options->format_version = 2;
+
+ if (rocksdb_collect_sst_properties) {
+ properties_collector_factory =
+ std::make_shared<Rdb_tbl_prop_coll_factory>(&ddl_manager);
+
+ rocksdb_set_compaction_options(nullptr, nullptr, nullptr, nullptr);
+
+ RDB_MUTEX_LOCK_CHECK(rdb_sysvars_mutex);
+
+ DBUG_ASSERT(rocksdb_table_stats_sampling_pct <=
+ RDB_TBL_STATS_SAMPLE_PCT_MAX);
+ properties_collector_factory->SetTableStatsSamplingPct(
+ rocksdb_table_stats_sampling_pct);
+
+ RDB_MUTEX_UNLOCK_CHECK(rdb_sysvars_mutex);
+ }
+
+ if (rocksdb_persistent_cache_size_mb > 0) {
+ std::shared_ptr<rocksdb::PersistentCache> pcache;
+ uint64_t cache_size_bytes = rocksdb_persistent_cache_size_mb * 1024 * 1024;
+ status = rocksdb::NewPersistentCache(
+ rocksdb::Env::Default(), std::string(rocksdb_persistent_cache_path),
+ cache_size_bytes, myrocks_logger, true, &pcache);
+ if (!status.ok()) {
+ // NO_LINT_DEBUG
+ sql_print_error("RocksDB: Persistent cache returned error: (%s)",
+ status.getState());
+ DBUG_RETURN(HA_EXIT_FAILURE);
+ }
+ rocksdb_tbl_options->persistent_cache = pcache;
+ } else if (strlen(rocksdb_persistent_cache_path)) {
+ // NO_LINT_DEBUG
+ sql_print_error("RocksDB: Must specify rocksdb_persistent_cache_size_mb");
+ DBUG_RETURN(HA_EXIT_FAILURE);
+ }
+
+ std::unique_ptr<Rdb_cf_options> cf_options_map(new Rdb_cf_options());
+ if (!cf_options_map->init(*rocksdb_tbl_options, properties_collector_factory,
+ rocksdb_default_cf_options,
+ rocksdb_override_cf_options)) {
+ // NO_LINT_DEBUG
+ sql_print_error("RocksDB: Failed to initialize CF options map.");
+ DBUG_RETURN(HA_EXIT_FAILURE);
+ }
+
+ /*
+ If there are no column families, we're creating the new database.
+ Create one column family named "default".
+ */
+ if (cf_names.size() == 0) cf_names.push_back(DEFAULT_CF_NAME);
+
+ std::vector<int> compaction_enabled_cf_indices;
+
+ // NO_LINT_DEBUG
+ sql_print_information("RocksDB: Column Families at start:");
+ for (size_t i = 0; i < cf_names.size(); ++i) {
+ rocksdb::ColumnFamilyOptions opts;
+ cf_options_map->get_cf_options(cf_names[i], &opts);
+
+ // NO_LINT_DEBUG
+ sql_print_information(" cf=%s", cf_names[i].c_str());
+
+ // NO_LINT_DEBUG
+ sql_print_information(" write_buffer_size=%ld", opts.write_buffer_size);
+
+ // NO_LINT_DEBUG
+ sql_print_information(" target_file_size_base=%" PRIu64,
+ opts.target_file_size_base);
+
+ /*
+ Temporarily disable compactions to prevent a race condition where
+ compaction starts before compaction filter is ready.
+ */
+ if (!opts.disable_auto_compactions) {
+ compaction_enabled_cf_indices.push_back(i);
+ opts.disable_auto_compactions = true;
+ }
+ cf_descr.push_back(rocksdb::ColumnFamilyDescriptor(cf_names[i], opts));
+ }
+
+ rocksdb::Options main_opts(*rocksdb_db_options,
+ cf_options_map->get_defaults());
+
+ rocksdb::TransactionDBOptions tx_db_options;
+ tx_db_options.transaction_lock_timeout = 2000; // 2 seconds
+ tx_db_options.custom_mutex_factory = std::make_shared<Rdb_mutex_factory>();
+ tx_db_options.write_policy =
+ static_cast<rocksdb::TxnDBWritePolicy>(rocksdb_write_policy);
+
+ status =
+ check_rocksdb_options_compatibility(rocksdb_datadir, main_opts, cf_descr);
+
+ // We won't start if we'll determine that there's a chance of data corruption
+ // because of incompatible options.
+ if (!status.ok()) {
+ rdb_log_status_error(
+ status, "Compatibility check against existing database options failed");
+ DBUG_RETURN(HA_EXIT_FAILURE);
+ }
+
+ status = rocksdb::TransactionDB::Open(
+ main_opts, tx_db_options, rocksdb_datadir, cf_descr, &cf_handles, &rdb);
+
+ if (!status.ok()) {
+ rdb_log_status_error(status, "Error opening instance");
+ DBUG_RETURN(HA_EXIT_FAILURE);
+ }
+ cf_manager.init(std::move(cf_options_map), &cf_handles);
+
+ if (dict_manager.init(rdb, &cf_manager)) {
+ // NO_LINT_DEBUG
+ sql_print_error("RocksDB: Failed to initialize data dictionary.");
+ DBUG_RETURN(HA_EXIT_FAILURE);
+ }
+
+ if (binlog_manager.init(&dict_manager)) {
+ // NO_LINT_DEBUG
+ sql_print_error("RocksDB: Failed to initialize binlog manager.");
+ DBUG_RETURN(HA_EXIT_FAILURE);
+ }
+
+ if (ddl_manager.init(&dict_manager, &cf_manager, rocksdb_validate_tables)) {
+ // NO_LINT_DEBUG
+ sql_print_error("RocksDB: Failed to initialize DDL manager.");
+ DBUG_RETURN(HA_EXIT_FAILURE);
+ }
+
+ Rdb_sst_info::init(rdb);
+
+ /*
+ Enable auto compaction, things needed for compaction filter are finished
+ initializing
+ */
+ std::vector<rocksdb::ColumnFamilyHandle *> compaction_enabled_cf_handles;
+ compaction_enabled_cf_handles.reserve(compaction_enabled_cf_indices.size());
+ for (const auto &index : compaction_enabled_cf_indices) {
+ compaction_enabled_cf_handles.push_back(cf_handles[index]);
+ }
+
+ status = rdb->EnableAutoCompaction(compaction_enabled_cf_handles);
+
+ if (!status.ok()) {
+ rdb_log_status_error(status, "Error enabling compaction");
+ DBUG_RETURN(HA_EXIT_FAILURE);
+ }
+
+#ifndef HAVE_PSI_INTERFACE
+ auto err = rdb_bg_thread.create_thread(BG_THREAD_NAME);
+#else
+ auto err = rdb_bg_thread.create_thread(BG_THREAD_NAME,
+ rdb_background_psi_thread_key);
+#endif
+ if (err != 0) {
+ // NO_LINT_DEBUG
+ sql_print_error("RocksDB: Couldn't start the background thread: (errno=%d)",
+ err);
+ DBUG_RETURN(HA_EXIT_FAILURE);
+ }
+
+#ifndef HAVE_PSI_INTERFACE
+ err = rdb_drop_idx_thread.create_thread(INDEX_THREAD_NAME);
+#else
+ err = rdb_drop_idx_thread.create_thread(INDEX_THREAD_NAME,
+ rdb_drop_idx_psi_thread_key);
+#endif
+ if (err != 0) {
+ // NO_LINT_DEBUG
+ sql_print_error("RocksDB: Couldn't start the drop index thread: (errno=%d)",
+ err);
+ DBUG_RETURN(HA_EXIT_FAILURE);
+ }
+
+ err = rdb_mc_thread.create_thread(MANUAL_COMPACTION_THREAD_NAME
+#ifdef HAVE_PSI_INTERFACE
+ ,
+ rdb_mc_psi_thread_key
+#endif
+ );
+ if (err != 0) {
+ // NO_LINT_DEBUG
+ sql_print_error(
+ "RocksDB: Couldn't start the manual compaction thread: (errno=%d)",
+ err);
+ DBUG_RETURN(HA_EXIT_FAILURE);
+ }
+
+ rdb_set_collation_exception_list(rocksdb_strict_collation_exceptions);
+
+ if (rocksdb_pause_background_work) {
+ rdb->PauseBackgroundWork();
+ }
+
+ // NO_LINT_DEBUG
+ sql_print_information("RocksDB: global statistics using %s indexer",
+ STRINGIFY_ARG(RDB_INDEXER));
+#if defined(HAVE_SCHED_GETCPU)
+ if (sched_getcpu() == -1) {
+ // NO_LINT_DEBUG
+ sql_print_information(
+ "RocksDB: sched_getcpu() failed - "
+ "global statistics will use thread_id_indexer_t instead");
+ }
+#endif
+
+ /**
+ Rocksdb does not always shutdown its threads, when
+ plugin is shut down. Disable server's leak check
+ at exit to avoid crash.
+ */
+ my_disable_leak_check = true;
+
+ err = my_error_register(rdb_get_error_messages, HA_ERR_ROCKSDB_FIRST,
+ HA_ERR_ROCKSDB_LAST);
+ if (err != 0) {
+ // NO_LINT_DEBUG
+ sql_print_error("RocksDB: Couldn't initialize error messages");
+ DBUG_RETURN(HA_EXIT_FAILURE);
+ }
+
+
+
+ // Creating an instance of HistogramImpl should only happen after RocksDB
+ // has been successfully initialized.
+ commit_latency_stats = new rocksdb::HistogramImpl();
+
+ // Construct a list of directories which will be monitored by I/O watchdog
+ // to make sure that we won't lose write access to them.
+ std::vector<std::string> directories;
+
+ // 1. Data directory.
+ directories.push_back(mysql_real_data_home);
+
+ // 2. Transaction logs.
+ if (myrocks::rocksdb_wal_dir && *myrocks::rocksdb_wal_dir) {
+ directories.push_back(myrocks::rocksdb_wal_dir);
+ }
+
+#if !defined(_WIN32) && !defined(__APPLE__)
+ io_watchdog = new Rdb_io_watchdog(std::move(directories));
+ io_watchdog->reset_timeout(rocksdb_io_write_timeout_secs);
+#endif
+
+ // NO_LINT_DEBUG
+ sql_print_information(
+ "MyRocks storage engine plugin has been successfully "
+ "initialized.");
+
+ // Skip cleaning up rdb_open_tables as we've succeeded
+ rdb_open_tables_cleanup.skip();
+
+ DBUG_RETURN(HA_EXIT_SUCCESS);
+}
+
+/*
+ Storage Engine deinitialization function, invoked when plugin is unloaded.
+*/
+
+static int rocksdb_done_func(void *const p) {
+ DBUG_ENTER_FUNC();
+
+ int error = 0;
+
+ // signal the drop index thread to stop
+ rdb_drop_idx_thread.signal(true);
+
+ // Flush all memtables for not losing data, even if WAL is disabled.
+ rocksdb_flush_all_memtables();
+
+ // Stop all rocksdb background work
+ CancelAllBackgroundWork(rdb->GetBaseDB(), true);
+
+ // Signal the background thread to stop and to persist all stats collected
+ // from background flushes and compactions. This will add more keys to a new
+ // memtable, but since the memtables were just flushed, it should not trigger
+ // a flush that can stall due to background threads being stopped. As long
+ // as these keys are stored in a WAL file, they can be retrieved on restart.
+ rdb_bg_thread.signal(true);
+
+ // Wait for the background thread to finish.
+ auto err = rdb_bg_thread.join();
+ if (err != 0) {
+ // We'll log the message and continue because we're shutting down and
+ // continuation is the optimal strategy.
+ // NO_LINT_DEBUG
+ sql_print_error("RocksDB: Couldn't stop the background thread: (errno=%d)",
+ err);
+ }
+
+ // Wait for the drop index thread to finish.
+ err = rdb_drop_idx_thread.join();
+ if (err != 0) {
+ // NO_LINT_DEBUG
+ sql_print_error("RocksDB: Couldn't stop the index thread: (errno=%d)", err);
+ }
+
+ // signal the manual compaction thread to stop
+ rdb_mc_thread.signal(true);
+ // Wait for the manual compaction thread to finish.
+ err = rdb_mc_thread.join();
+ if (err != 0) {
+ // NO_LINT_DEBUG
+ sql_print_error(
+ "RocksDB: Couldn't stop the manual compaction thread: (errno=%d)", err);
+ }
+
+ if (rdb_open_tables.count()) {
+ // Looks like we are getting unloaded and yet we have some open tables
+ // left behind.
+ error = 1;
+ }
+
+ rdb_open_tables.free();
+ /*
+ destructors for static objects can be called at _exit(),
+ but we want to free the memory at dlclose()
+ */
+ // MARIADB_MERGE_2019: rdb_open_tables.m_hash.~Rdb_table_set();
+ mysql_mutex_destroy(&rdb_sysvars_mutex);
+ mysql_mutex_destroy(&rdb_block_cache_resize_mutex);
+
+
+ delete rdb_collation_exceptions;
+
+ mysql_mutex_destroy(&rdb_collation_data_mutex);
+ mysql_mutex_destroy(&rdb_mem_cmp_space_mutex);
+
+ Rdb_transaction::term_mutex();
+
+ for (auto &it : rdb_collation_data) {
+ delete it;
+ it = nullptr;
+ }
+
+ ddl_manager.cleanup();
+ binlog_manager.cleanup();
+ dict_manager.cleanup();
+ cf_manager.cleanup();
+
+ delete rdb;
+ rdb = nullptr;
+
+ delete commit_latency_stats;
+ commit_latency_stats = nullptr;
+
+#if !defined(_WIN32) && !defined(__APPLE__)
+ delete io_watchdog;
+ io_watchdog = nullptr;
+#endif
+
+// Disown the cache data since we're shutting down.
+// This results in memory leaks but it improved the shutdown time.
+// Don't disown when running under valgrind
+#ifndef HAVE_valgrind
+ if (rocksdb_tbl_options->block_cache) {
+ rocksdb_tbl_options->block_cache->DisownData();
+ }
+#endif /* HAVE_valgrind */
+
+ /*
+ MariaDB: don't clear rocksdb_db_options and rocksdb_tbl_options.
+ MyRocks' plugin variables refer to them.
+
+ The plugin cannot be loaded again (see prevent_myrocks_loading) but plugin
+ variables are processed before myrocks::rocksdb_init_func is invoked, so
+ they must point to valid memory.
+ */
+ //rocksdb_db_options = nullptr;
+ rocksdb_db_options->statistics = nullptr;
+ //rocksdb_tbl_options = nullptr;
+ rocksdb_stats = nullptr;
+
+ my_error_unregister(HA_ERR_ROCKSDB_FIRST, HA_ERR_ROCKSDB_LAST);
+
+ /*
+ Prevent loading the plugin after it has been loaded and then unloaded. This
+ doesn't work currently.
+ */
+ prevent_myrocks_loading= true;
+
+ DBUG_RETURN(error);
+}
+
+static inline void rocksdb_smart_seek(bool seek_backward,
+ rocksdb::Iterator *const iter,
+ const rocksdb::Slice &key_slice) {
+ if (seek_backward) {
+ iter->SeekForPrev(key_slice);
+ } else {
+ iter->Seek(key_slice);
+ }
+}
+
+static inline void rocksdb_smart_next(bool seek_backward,
+ rocksdb::Iterator *const iter) {
+ if (seek_backward) {
+ iter->Prev();
+ } else {
+ iter->Next();
+ }
+}
+
+#ifndef DBUG_OFF
+// simulate that RocksDB has reported corrupted data
+static void dbug_change_status_to_corrupted(rocksdb::Status *status) {
+ *status = rocksdb::Status::Corruption();
+}
+#endif
+
+// If the iterator is not valid it might be because of EOF but might be due
+// to IOError or corruption. The good practice is always check it.
+// https://github.com/facebook/rocksdb/wiki/Iterator#error-handling
+static inline bool is_valid(rocksdb::Iterator *scan_it) {
+ if (scan_it->Valid()) {
+ return true;
+ } else {
+ rocksdb::Status s = scan_it->status();
+ DBUG_EXECUTE_IF("rocksdb_return_status_corrupted",
+ dbug_change_status_to_corrupted(&s););
+ if (s.IsIOError() || s.IsCorruption()) {
+ if (s.IsCorruption()) {
+ rdb_persist_corruption_marker();
+ }
+ rdb_handle_io_error(s, RDB_IO_ERROR_GENERAL);
+ }
+ return false;
+ }
+}
+
+/**
+ @brief
+ Example of simple lock controls. The "table_handler" it creates is a
+ structure we will pass to each ha_rocksdb handler. Do you have to have
+ one of these? Well, you have pieces that are used for locking, and
+ they are needed to function.
+*/
+
+Rdb_table_handler *Rdb_open_tables_map::get_table_handler(
+ const char *const table_name) {
+ DBUG_ASSERT(table_name != nullptr);
+
+ Rdb_table_handler *table_handler;
+
+ std::string table_name_str(table_name);
+
+ // First, look up the table in the hash map.
+ RDB_MUTEX_LOCK_CHECK(m_mutex);
+ const auto it = m_table_map.find(table_name_str);
+ if (it != m_table_map.end()) {
+ // Found it
+ table_handler = it->second;
+ } else {
+ char *tmp_name;
+
+ // Since we did not find it in the hash map, attempt to create and add it
+ // to the hash map.
+ if (!(table_handler = reinterpret_cast<Rdb_table_handler *>(my_multi_malloc(
+ MYF(MY_WME | MY_ZEROFILL), &table_handler, sizeof(*table_handler),
+ &tmp_name, table_name_str.length() + 1, NullS)))) {
+ // Allocating a new Rdb_table_handler and a new table name failed.
+ RDB_MUTEX_UNLOCK_CHECK(m_mutex);
+ return nullptr;
+ }
+
+ table_handler->m_ref_count = 0;
+ table_handler->m_table_name_length = table_name_str.length();
+ table_handler->m_table_name = tmp_name;
+ strmov(table_handler->m_table_name, table_name);
+
+ m_table_map.emplace(table_name_str, table_handler);
+
+ thr_lock_init(&table_handler->m_thr_lock);
+#ifdef MARIAROCKS_NOT_YET
+ table_handler->m_io_perf_read.init();
+ table_handler->m_io_perf_write.init();
+#endif
+ }
+ DBUG_ASSERT(table_handler->m_ref_count >= 0);
+ table_handler->m_ref_count++;
+
+ RDB_MUTEX_UNLOCK_CHECK(m_mutex);
+
+ return table_handler;
+}
+
+std::vector<std::string> rdb_get_open_table_names(void) {
+ return rdb_open_tables.get_table_names();
+}
+
+std::vector<std::string> Rdb_open_tables_map::get_table_names(void) const {
+ const Rdb_table_handler *table_handler;
+ std::vector<std::string> names;
+
+ RDB_MUTEX_LOCK_CHECK(m_mutex);
+ for (const auto &kv : m_table_map) {
+ table_handler = kv.second;
+ DBUG_ASSERT(table_handler != nullptr);
+ names.push_back(table_handler->m_table_name);
+ }
+ RDB_MUTEX_UNLOCK_CHECK(m_mutex);
+
+ return names;
+}
+
+/*
+ Inspired by innobase_get_int_col_max_value from InnoDB. This returns the
+ maximum value a type can take on.
+*/
+static ulonglong rdb_get_int_col_max_value(const Field *field) {
+ ulonglong max_value = 0;
+ switch (field->key_type()) {
+ case HA_KEYTYPE_BINARY:
+ max_value = 0xFFULL;
+ break;
+ case HA_KEYTYPE_INT8:
+ max_value = 0x7FULL;
+ break;
+ case HA_KEYTYPE_USHORT_INT:
+ max_value = 0xFFFFULL;
+ break;
+ case HA_KEYTYPE_SHORT_INT:
+ max_value = 0x7FFFULL;
+ break;
+ case HA_KEYTYPE_UINT24:
+ max_value = 0xFFFFFFULL;
+ break;
+ case HA_KEYTYPE_INT24:
+ max_value = 0x7FFFFFULL;
+ break;
+ case HA_KEYTYPE_ULONG_INT:
+ max_value = 0xFFFFFFFFULL;
+ break;
+ case HA_KEYTYPE_LONG_INT:
+ max_value = 0x7FFFFFFFULL;
+ break;
+ case HA_KEYTYPE_ULONGLONG:
+ max_value = 0xFFFFFFFFFFFFFFFFULL;
+ break;
+ case HA_KEYTYPE_LONGLONG:
+ max_value = 0x7FFFFFFFFFFFFFFFULL;
+ break;
+ case HA_KEYTYPE_FLOAT:
+ max_value = 0x1000000ULL;
+ break;
+ case HA_KEYTYPE_DOUBLE:
+ max_value = 0x20000000000000ULL;
+ break;
+ default:
+ abort();
+ }
+
+ return max_value;
+}
+
+void ha_rocksdb::load_auto_incr_value() {
+ ulonglong auto_incr = 0;
+ bool validate_last = false, use_datadic = true;
+#ifndef DBUG_OFF
+ DBUG_EXECUTE_IF("myrocks_autoinc_upgrade", use_datadic = false;);
+ validate_last = true;
+#endif
+
+ if (use_datadic && dict_manager.get_auto_incr_val(
+ m_tbl_def->get_autoincr_gl_index_id(), &auto_incr)) {
+ update_auto_incr_val(auto_incr);
+ }
+
+ // If we find nothing in the data dictionary, or if we are in debug mode,
+ // then call index_last to get the last value.
+ //
+ // This is needed when upgrading from a server that did not support
+ // persistent auto_increment, of if the table is empty.
+ //
+ // For debug mode, we are just verifying that the data dictionary value is
+ // greater than or equal to the maximum value in the table.
+ if (auto_incr == 0 || validate_last) {
+ auto_incr = load_auto_incr_value_from_index();
+ update_auto_incr_val(auto_incr);
+ }
+
+ // If we failed to find anything from the data dictionary and index, then
+ // initialize auto_increment to 1.
+ if (m_tbl_def->m_auto_incr_val == 0) {
+ update_auto_incr_val(1);
+ }
+}
+
+ulonglong ha_rocksdb::load_auto_incr_value_from_index() {
+ const int save_active_index = active_index;
+ active_index = table->s->next_number_index;
+ const uint8 save_table_status = table->status;
+ ulonglong last_val = 0;
+
+ Rdb_transaction *const tx = get_or_create_tx(table->in_use);
+ const bool is_new_snapshot = !tx->has_snapshot();
+ if (is_new_snapshot) {
+ tx->acquire_snapshot(true);
+ }
+
+ // Do a lookup. We only need index column, so it should be index-only.
+ // (another reason to make it index-only is that table->read_set is not set
+ // appropriately and non-index-only lookup will not read the value)
+ const bool save_keyread_only = m_keyread_only;
+ m_keyread_only = true;
+ m_converter->set_is_key_requested(true);
+
+ if (!index_last(table->record[0])) {
+ Field *field =
+ table->key_info[table->s->next_number_index].key_part[0].field;
+ ulonglong max_val = rdb_get_int_col_max_value(field);
+ my_bitmap_map *const old_map =
+ dbug_tmp_use_all_columns(table, table->read_set);
+ last_val = field->val_int();
+ if (last_val != max_val) {
+ last_val++;
+ }
+#ifndef DBUG_OFF
+ ulonglong dd_val;
+ if (last_val <= max_val) {
+ const auto &gl_index_id = m_tbl_def->get_autoincr_gl_index_id();
+ if (dict_manager.get_auto_incr_val(gl_index_id, &dd_val) &&
+ tx->get_auto_incr(gl_index_id) == 0) {
+ DBUG_ASSERT(dd_val >= last_val);
+ }
+ }
+#endif
+ dbug_tmp_restore_column_map(table->read_set, old_map);
+ }
+
+ m_keyread_only = save_keyread_only;
+ if (is_new_snapshot) {
+ tx->release_snapshot();
+ }
+
+ table->status = save_table_status;
+ active_index = save_active_index;
+
+ /*
+ Do what ha_rocksdb::index_end() does.
+ (Why don't we use index_init/index_end? class handler defines index_init
+ as private, for some reason).
+ */
+ release_scan_iterator();
+
+ return last_val;
+}
+
+void ha_rocksdb::update_auto_incr_val(ulonglong val) {
+ ulonglong auto_incr_val = m_tbl_def->m_auto_incr_val;
+ while (
+ auto_incr_val < val &&
+ !m_tbl_def->m_auto_incr_val.compare_exchange_weak(auto_incr_val, val)) {
+ // Do nothing - just loop until auto_incr_val is >= val or we successfully
+ // set it
+ }
+}
+
+void ha_rocksdb::update_auto_incr_val_from_field() {
+ Field *field;
+ ulonglong new_val, max_val;
+ field = table->key_info[table->s->next_number_index].key_part[0].field;
+ max_val = rdb_get_int_col_max_value(field);
+
+ my_bitmap_map *const old_map =
+ dbug_tmp_use_all_columns(table, table->read_set);
+ new_val = field->val_int();
+ // don't increment if we would wrap around
+ if (new_val != max_val) {
+ new_val++;
+ }
+
+ dbug_tmp_restore_column_map(table->read_set, old_map);
+
+ // Only update if positive value was set for auto_incr column.
+ if (new_val <= max_val) {
+ Rdb_transaction *const tx = get_or_create_tx(table->in_use);
+ tx->set_auto_incr(m_tbl_def->get_autoincr_gl_index_id(), new_val);
+
+ // Update the in memory auto_incr value in m_tbl_def.
+ update_auto_incr_val(new_val);
+ }
+}
+
+int ha_rocksdb::load_hidden_pk_value() {
+ const int save_active_index = active_index;
+ active_index = m_tbl_def->m_key_count - 1;
+ const uint8 save_table_status = table->status;
+
+ Rdb_transaction *const tx = get_or_create_tx(table->in_use);
+ const bool is_new_snapshot = !tx->has_snapshot();
+
+ longlong hidden_pk_id = 1;
+ // Do a lookup.
+ if (!index_last(table->record[0])) {
+ /*
+ Decode PK field from the key
+ */
+ auto err = read_hidden_pk_id_from_rowkey(&hidden_pk_id);
+ if (err) {
+ if (is_new_snapshot) {
+ tx->release_snapshot();
+ }
+ return err;
+ }
+
+ hidden_pk_id++;
+ }
+
+ longlong old = m_tbl_def->m_hidden_pk_val;
+ while (old < hidden_pk_id &&
+ !m_tbl_def->m_hidden_pk_val.compare_exchange_weak(old, hidden_pk_id)) {
+ }
+
+ if (is_new_snapshot) {
+ tx->release_snapshot();
+ }
+
+ table->status = save_table_status;
+ active_index = save_active_index;
+
+ release_scan_iterator();
+
+ return HA_EXIT_SUCCESS;
+}
+
+/* Get PK value from m_tbl_def->m_hidden_pk_info. */
+longlong ha_rocksdb::update_hidden_pk_val() {
+ DBUG_ASSERT(has_hidden_pk(table));
+ const longlong new_val = m_tbl_def->m_hidden_pk_val++;
+ return new_val;
+}
+
+/* Get the id of the hidden pk id from m_last_rowkey */
+int ha_rocksdb::read_hidden_pk_id_from_rowkey(longlong *const hidden_pk_id) {
+ DBUG_ASSERT(table != nullptr);
+ DBUG_ASSERT(has_hidden_pk(table));
+
+ rocksdb::Slice rowkey_slice(m_last_rowkey.ptr(), m_last_rowkey.length());
+
+ // Get hidden primary key from old key slice
+ Rdb_string_reader reader(&rowkey_slice);
+ if ((!reader.read(Rdb_key_def::INDEX_NUMBER_SIZE))) {
+ return HA_ERR_ROCKSDB_CORRUPT_DATA;
+ }
+
+ const int length= 8; /* was Field_longlong::PACK_LENGTH in FB MySQL tree */
+ const uchar *from = reinterpret_cast<const uchar *>(reader.read(length));
+ if (from == nullptr) {
+ /* Mem-comparable image doesn't have enough bytes */
+ return HA_ERR_ROCKSDB_CORRUPT_DATA;
+ }
+
+ *hidden_pk_id = rdb_netbuf_read_uint64(&from);
+ return HA_EXIT_SUCCESS;
+}
+
+/**
+ @brief
+ Free lock controls. We call this whenever we close a table. If the table had
+ the last reference to the table_handler, then we free the memory associated
+ with it.
+*/
+
+void Rdb_open_tables_map::release_table_handler(
+ Rdb_table_handler *const table_handler) {
+ RDB_MUTEX_LOCK_CHECK(m_mutex);
+
+ DBUG_ASSERT(table_handler != nullptr);
+ DBUG_ASSERT(table_handler->m_ref_count > 0);
+ if (!--table_handler->m_ref_count) {
+ // Last reference was released. Tear down the hash entry.
+ const auto ret MY_ATTRIBUTE((__unused__)) =
+ m_table_map.erase(std::string(table_handler->m_table_name));
+ DBUG_ASSERT(ret == 1); // the hash entry must actually be found and deleted
+ my_core::thr_lock_delete(&table_handler->m_thr_lock);
+ my_free(table_handler);
+ }
+
+ RDB_MUTEX_UNLOCK_CHECK(m_mutex);
+}
+
+static handler *rocksdb_create_handler(my_core::handlerton *const hton,
+ my_core::TABLE_SHARE *const table_arg,
+ my_core::MEM_ROOT *const mem_root) {
+ return new (mem_root) ha_rocksdb(hton, table_arg);
+}
+
+ha_rocksdb::ha_rocksdb(my_core::handlerton *const hton,
+ my_core::TABLE_SHARE *const table_arg)
+ : handler(hton, table_arg),
+ m_table_handler(nullptr),
+ m_scan_it(nullptr),
+ m_scan_it_skips_bloom(false),
+ m_scan_it_snapshot(nullptr),
+ m_scan_it_lower_bound(nullptr),
+ m_scan_it_upper_bound(nullptr),
+ m_tbl_def(nullptr),
+ m_pk_descr(nullptr),
+ m_key_descr_arr(nullptr),
+ m_pk_can_be_decoded(false),
+ m_pk_tuple(nullptr),
+ m_pk_packed_tuple(nullptr),
+ m_sk_packed_tuple(nullptr),
+ m_end_key_packed_tuple(nullptr),
+ m_sk_match_prefix(nullptr),
+ m_sk_match_prefix_buf(nullptr),
+ m_sk_packed_tuple_old(nullptr),
+ m_dup_sk_packed_tuple(nullptr),
+ m_dup_sk_packed_tuple_old(nullptr),
+ m_pack_buffer(nullptr),
+ m_lock_rows(RDB_LOCK_NONE),
+ m_keyread_only(false),
+ m_insert_with_update(false),
+ m_dup_pk_found(false),
+ m_in_rpl_delete_rows(false),
+ m_in_rpl_update_rows(false),
+ m_force_skip_unique_check(false) {}
+
+
+const std::string &ha_rocksdb::get_table_basename() const {
+ return m_tbl_def->base_tablename();
+}
+
+/**
+ @return
+ false OK
+ other Error inpacking the data
+*/
+bool ha_rocksdb::init_with_fields() {
+ DBUG_ENTER_FUNC();
+
+ const uint pk = table_share->primary_key;
+ if (pk != MAX_KEY) {
+ const uint key_parts = table_share->key_info[pk].user_defined_key_parts;
+ check_keyread_allowed(pk /*PK*/, key_parts - 1, true);
+ } else {
+ m_pk_can_be_decoded = false;
+ }
+ cached_table_flags = table_flags();
+
+ DBUG_RETURN(false); /* Ok */
+}
+
+/*
+ If the key is a TTL key, we may need to filter it out.
+
+ The purpose of read filtering for tables with TTL is to ensure that
+ during a transaction a key which has expired already but not removed by
+ compaction yet is not returned to the user.
+
+ Without this the user might be hit with problems such as disappearing
+ rows within a transaction, etc, because the compaction filter ignores
+ snapshots when filtering keys.
+*/
+bool ha_rocksdb::should_hide_ttl_rec(const Rdb_key_def &kd,
+ const rocksdb::Slice &ttl_rec_val,
+ const int64_t curr_ts) {
+ DBUG_ASSERT(kd.has_ttl());
+ DBUG_ASSERT(kd.m_ttl_rec_offset != UINT_MAX);
+
+ /*
+ Curr_ts can only be 0 if there are no snapshots open.
+ should_hide_ttl_rec can only be called when there is >=1 snapshots, unless
+ we are filtering on the write path (single INSERT/UPDATE) in which case
+ we are passed in the current time as curr_ts.
+
+ In the event curr_ts is 0, we always decide not to filter the record. We
+ also log a warning and increment a diagnostic counter.
+ */
+ if (curr_ts == 0) {
+ update_row_stats(ROWS_HIDDEN_NO_SNAPSHOT);
+ return false;
+ }
+
+ if (!rdb_is_ttl_read_filtering_enabled() || !rdb_is_ttl_enabled()) {
+ return false;
+ }
+
+ Rdb_string_reader reader(&ttl_rec_val);
+
+ /*
+ Find where the 8-byte ttl is for each record in this index.
+ */
+ uint64 ts;
+ if (!reader.read(kd.m_ttl_rec_offset) || reader.read_uint64(&ts)) {
+ /*
+ This condition should never be reached since all TTL records have an
+ 8 byte ttl field in front. Don't filter the record out, and log an error.
+ */
+ std::string buf;
+ buf = rdb_hexdump(ttl_rec_val.data(), ttl_rec_val.size(),
+ RDB_MAX_HEXDUMP_LEN);
+ const GL_INDEX_ID gl_index_id = kd.get_gl_index_id();
+ // NO_LINT_DEBUG
+ sql_print_error(
+ "Decoding ttl from PK value failed, "
+ "for index (%u,%u), val: %s",
+ gl_index_id.cf_id, gl_index_id.index_id, buf.c_str());
+ DBUG_ASSERT(0);
+ return false;
+ }
+
+ /* Hide record if it has expired before the current snapshot time. */
+ uint64 read_filter_ts = 0;
+#ifndef DBUG_OFF
+ read_filter_ts += rdb_dbug_set_ttl_read_filter_ts();
+#endif
+ bool is_hide_ttl =
+ ts + kd.m_ttl_duration + read_filter_ts <= static_cast<uint64>(curr_ts);
+ if (is_hide_ttl) {
+ update_row_stats(ROWS_FILTERED);
+
+ /* increment examined row count when rows are skipped */
+ THD *thd = ha_thd();
+ thd->inc_examined_row_count(1);
+ DEBUG_SYNC(thd, "rocksdb.ttl_rows_examined");
+ }
+ return is_hide_ttl;
+}
+
+int ha_rocksdb::rocksdb_skip_expired_records(const Rdb_key_def &kd,
+ rocksdb::Iterator *const iter,
+ bool seek_backward) {
+ if (kd.has_ttl()) {
+ THD *thd = ha_thd();
+ while (iter->Valid() &&
+ should_hide_ttl_rec(
+ kd, iter->value(),
+ get_or_create_tx(table->in_use)->m_snapshot_timestamp)) {
+ DEBUG_SYNC(thd, "rocksdb.check_flags_ser");
+ if (thd && thd->killed) {
+ return HA_ERR_QUERY_INTERRUPTED;
+ }
+ rocksdb_smart_next(seek_backward, iter);
+ }
+ }
+ return HA_EXIT_SUCCESS;
+}
+
+#ifndef DBUG_OFF
+void dbug_append_garbage_at_end(rocksdb::PinnableSlice *on_disk_rec) {
+ std::string str(on_disk_rec->data(), on_disk_rec->size());
+ on_disk_rec->Reset();
+ str.append("abc");
+ on_disk_rec->PinSelf(rocksdb::Slice(str));
+}
+
+void dbug_truncate_record(rocksdb::PinnableSlice *on_disk_rec) {
+ on_disk_rec->remove_suffix(on_disk_rec->size());
+}
+
+void dbug_modify_rec_varchar12(rocksdb::PinnableSlice *on_disk_rec) {
+ std::string res;
+ // The record is NULL-byte followed by VARCHAR(10).
+ // Put the NULL-byte
+ res.append("\0", 1);
+ // Then, add a valid VARCHAR(12) value.
+ res.append("\xC", 1);
+ res.append("123456789ab", 12);
+
+ on_disk_rec->Reset();
+ on_disk_rec->PinSelf(rocksdb::Slice(res));
+}
+
+void dbug_create_err_inplace_alter() {
+ my_printf_error(ER_UNKNOWN_ERROR,
+ "Intentional failure in inplace alter occurred.", MYF(0));
+}
+#endif
+
+int ha_rocksdb::convert_record_from_storage_format(
+ const rocksdb::Slice *const key, uchar *const buf) {
+ DBUG_EXECUTE_IF("myrocks_simulate_bad_row_read1",
+ dbug_append_garbage_at_end(&m_retrieved_record););
+ DBUG_EXECUTE_IF("myrocks_simulate_bad_row_read2",
+ dbug_truncate_record(&m_retrieved_record););
+ DBUG_EXECUTE_IF("myrocks_simulate_bad_row_read3",
+ dbug_modify_rec_varchar12(&m_retrieved_record););
+
+ return convert_record_from_storage_format(key, &m_retrieved_record, buf);
+}
+
+/*
+ @brief
+ Unpack the record in this->m_retrieved_record and this->m_last_rowkey from
+ storage format into buf (which can be table->record[0] or table->record[1]).
+
+ @param key Table record's key in mem-comparable form.
+ @param buf Store record in table->record[0] format here
+
+ @detail
+ If the table has blobs, the unpacked data in buf may keep pointers to the
+ data in this->m_retrieved_record.
+
+ The key is only needed to check its checksum value (the checksum is in
+ m_retrieved_record).
+
+ @seealso
+ rdb_converter::setup_read_decoders() Sets up data structures which tell
+ which columns to decode.
+
+ @return
+ 0 OK
+ other Error inpacking the data
+*/
+
+int ha_rocksdb::convert_record_from_storage_format(
+ const rocksdb::Slice *const key, const rocksdb::Slice *const value,
+ uchar *const buf) {
+ return m_converter->decode(m_pk_descr, buf, key, value);
+}
+
+int ha_rocksdb::alloc_key_buffers(const TABLE *const table_arg,
+ const Rdb_tbl_def *const tbl_def_arg,
+ bool alloc_alter_buffers) {
+ DBUG_ENTER_FUNC();
+
+ DBUG_ASSERT(m_pk_tuple == nullptr);
+
+ std::shared_ptr<Rdb_key_def> *const kd_arr = tbl_def_arg->m_key_descr_arr;
+
+ uint key_len = 0;
+ uint max_packed_sk_len = 0;
+ uint pack_key_len = 0;
+
+ m_pk_descr = kd_arr[pk_index(table_arg, tbl_def_arg)];
+ if (has_hidden_pk(table_arg)) {
+ m_pk_key_parts = 1;
+ } else {
+ m_pk_key_parts =
+ table->key_info[table->s->primary_key].user_defined_key_parts;
+ key_len = table->key_info[table->s->primary_key].key_length;
+ }
+
+ // move this into get_table_handler() ??
+ m_pk_descr->setup(table_arg, tbl_def_arg);
+
+ m_pk_tuple = reinterpret_cast<uchar *>(my_malloc(key_len, MYF(0)));
+
+ pack_key_len = m_pk_descr->max_storage_fmt_length();
+ m_pk_packed_tuple =
+ reinterpret_cast<uchar *>(my_malloc(pack_key_len, MYF(0)));
+
+ /* Sometimes, we may use m_sk_packed_tuple for storing packed PK */
+ max_packed_sk_len = pack_key_len;
+ for (uint i = 0; i < table_arg->s->keys; i++) {
+ /* Primary key was processed above */
+ if (i == table_arg->s->primary_key) continue;
+
+ // TODO: move this into get_table_handler() ??
+ kd_arr[i]->setup(table_arg, tbl_def_arg);
+
+ const uint packed_len = kd_arr[i]->max_storage_fmt_length();
+ if (packed_len > max_packed_sk_len) {
+ max_packed_sk_len = packed_len;
+ }
+ }
+
+ m_sk_packed_tuple =
+ reinterpret_cast<uchar *>(my_malloc(max_packed_sk_len, MYF(0)));
+ m_sk_match_prefix_buf =
+ reinterpret_cast<uchar *>(my_malloc(max_packed_sk_len, MYF(0)));
+ m_sk_packed_tuple_old =
+ reinterpret_cast<uchar *>(my_malloc(max_packed_sk_len, MYF(0)));
+ m_end_key_packed_tuple =
+ reinterpret_cast<uchar *>(my_malloc(max_packed_sk_len, MYF(0)));
+ m_pack_buffer =
+ reinterpret_cast<uchar *>(my_malloc(max_packed_sk_len, MYF(0)));
+
+ m_scan_it_lower_bound =
+ reinterpret_cast<uchar *>(my_malloc(max_packed_sk_len, MYF(0)));
+ m_scan_it_upper_bound =
+ reinterpret_cast<uchar *>(my_malloc(max_packed_sk_len, MYF(0)));
+
+ /*
+ If inplace alter is happening, allocate special buffers for unique
+ secondary index duplicate checking.
+ */
+ if (alloc_alter_buffers) {
+ m_dup_sk_packed_tuple =
+ reinterpret_cast<uchar *>(my_malloc(max_packed_sk_len, MYF(0)));
+ m_dup_sk_packed_tuple_old =
+ reinterpret_cast<uchar *>(my_malloc(max_packed_sk_len, MYF(0)));
+ }
+
+ if (m_pk_tuple == nullptr || m_pk_packed_tuple == nullptr ||
+ m_sk_packed_tuple == nullptr || m_sk_packed_tuple_old == nullptr ||
+ m_end_key_packed_tuple == nullptr || m_pack_buffer == nullptr ||
+ m_scan_it_upper_bound == nullptr || m_scan_it_lower_bound == nullptr ||
+ (alloc_alter_buffers && (m_dup_sk_packed_tuple == nullptr ||
+ m_dup_sk_packed_tuple_old == nullptr))) {
+ // One or more of the above allocations failed. Clean up and exit
+ free_key_buffers();
+
+ DBUG_RETURN(HA_ERR_OUT_OF_MEM);
+ }
+
+ DBUG_RETURN(HA_EXIT_SUCCESS);
+}
+
+void ha_rocksdb::free_key_buffers() {
+ my_free(m_pk_tuple);
+ m_pk_tuple = nullptr;
+
+ my_free(m_pk_packed_tuple);
+ m_pk_packed_tuple = nullptr;
+
+ my_free(m_sk_packed_tuple);
+ m_sk_packed_tuple = nullptr;
+
+ my_free(m_sk_match_prefix_buf);
+ m_sk_match_prefix_buf = nullptr;
+
+ my_free(m_sk_packed_tuple_old);
+ m_sk_packed_tuple_old = nullptr;
+
+ my_free(m_end_key_packed_tuple);
+ m_end_key_packed_tuple = nullptr;
+
+ my_free(m_pack_buffer);
+ m_pack_buffer = nullptr;
+
+ my_free(m_dup_sk_packed_tuple);
+ m_dup_sk_packed_tuple = nullptr;
+
+ my_free(m_dup_sk_packed_tuple_old);
+ m_dup_sk_packed_tuple_old = nullptr;
+
+ my_free(m_scan_it_lower_bound);
+ m_scan_it_lower_bound = nullptr;
+
+ my_free(m_scan_it_upper_bound);
+ m_scan_it_upper_bound = nullptr;
+}
+
+#ifdef MARIAROCKS_NOT_YET
+#endif
+void ha_rocksdb::set_skip_unique_check_tables(const char *const whitelist) {
+ const char *const wl =
+ whitelist ? whitelist : DEFAULT_SKIP_UNIQUE_CHECK_TABLES;
+
+#if defined(HAVE_PSI_INTERFACE)
+ Regex_list_handler regex_handler(key_rwlock_skip_unique_check_tables);
+#else
+ Regex_list_handler regex_handler;
+#endif
+
+ if (!regex_handler.set_patterns(wl)) {
+ warn_about_bad_patterns(&regex_handler, "skip_unique_check_tables");
+ }
+
+ m_skip_unique_check = regex_handler.matches(m_tbl_def->base_tablename());
+}
+
+/**
+ @return
+ HA_EXIT_SUCCESS OK
+ other HA_ERR error code (can be SE-specific)
+*/
+int ha_rocksdb::open(const char *const name, int mode, uint test_if_locked) {
+ DBUG_ENTER_FUNC();
+
+ int err = close();
+ if (err) {
+ DBUG_RETURN(err);
+ }
+
+ m_table_handler = rdb_open_tables.get_table_handler(name);
+
+ if (m_table_handler == nullptr) {
+ DBUG_RETURN(HA_ERR_OUT_OF_MEM);
+ }
+
+ my_core::thr_lock_data_init(&m_table_handler->m_thr_lock, &m_db_lock,
+ nullptr);
+ m_io_perf.init(&m_table_handler->m_table_perf_context,
+ &m_table_handler->m_io_perf_read,
+ &m_table_handler->m_io_perf_write, &stats);
+ Rdb_perf_context_guard guard(&m_io_perf,
+ rocksdb_perf_context_level(ha_thd()));
+
+ std::string fullname;
+ err = rdb_normalize_tablename(name, &fullname);
+ if (err != HA_EXIT_SUCCESS) {
+ DBUG_RETURN(err);
+ }
+
+ m_tbl_def = ddl_manager.find(fullname);
+ if (m_tbl_def == nullptr) {
+ my_error(ER_INTERNAL_ERROR, MYF(0),
+ "Attempt to open a table that is not present in RocksDB-SE data "
+ "dictionary");
+ DBUG_RETURN(HA_ERR_ROCKSDB_INVALID_TABLE);
+ }
+
+ m_lock_rows = RDB_LOCK_NONE;
+ m_key_descr_arr = m_tbl_def->m_key_descr_arr;
+
+ /*
+ Full table scan actually uses primary key
+ (UPDATE needs to know this, otherwise it will go into infinite loop on
+ queries like "UPDATE tbl SET pk=pk+100")
+ */
+ key_used_on_scan = table->s->primary_key;
+
+ // close() above has already called free_key_buffers(). No need to do it here.
+ err = alloc_key_buffers(table, m_tbl_def);
+
+ if (err) {
+ DBUG_RETURN(err);
+ }
+
+ /*
+ init_with_fields() is used to initialize table flags based on the field
+ definitions in table->field[].
+ It is called by open_binary_frm(), but that function calls the method for
+ a temporary ha_rocksdb object which is later destroyed.
+
+ If we are here in ::open(), then init_with_fields() has not been called
+ for this object. Call it ourselves, we want all member variables to be
+ properly initialized.
+ */
+ init_with_fields();
+
+ /* Initialize decoder */
+ m_converter = std::make_shared<Rdb_converter>(ha_thd(), m_tbl_def, table);
+
+ /*
+ Update m_ttl_bytes address to same as Rdb_converter's m_ttl_bytes.
+ Remove this code after moving convert_record_to_storage_format() into
+ Rdb_converter class.
+ */
+ m_ttl_bytes = m_converter->get_ttl_bytes_buffer();
+
+ /*
+ MariaDB: adjust field->part_of_key for PK columns. We can only do it here
+ because SE API is just relying on the HA_PRIMARY_KEY_IN_READ_INDEX which
+ does not allow to distinguish between unpack'able and non-unpack'able
+ columns.
+ Upstream uses handler->init_with_fields() but we don't have that call.
+ */
+ {
+ if (!has_hidden_pk(table)) {
+ KEY *const pk_info = &table->key_info[table->s->primary_key];
+ for (uint kp = 0; kp < pk_info->user_defined_key_parts; kp++) {
+ if (!m_pk_descr->can_unpack(kp)) {
+ //
+ uint field_index= pk_info->key_part[kp].field->field_index;
+ table->field[field_index]->part_of_key.clear_all();
+ table->field[field_index]->part_of_key.set_bit(table->s->primary_key);
+ }
+ }
+ }
+
+ for (uint key= 0; key < table->s->keys; key++) {
+ KEY *const key_info = &table->key_info[key];
+ if (key == table->s->primary_key)
+ continue;
+ for (uint kp = 0; kp < key_info->usable_key_parts; kp++) {
+ uint field_index= key_info->key_part[kp].field->field_index;
+ if (m_key_descr_arr[key]->can_unpack(kp)) {
+ table->field[field_index]->part_of_key.set_bit(key);
+ } else {
+ table->field[field_index]->part_of_key.clear_bit(key);
+ }
+ }
+ }
+ }
+
+ info(HA_STATUS_NO_LOCK | HA_STATUS_VARIABLE | HA_STATUS_CONST);
+
+ /*
+ The following load_XXX code calls row decode functions, and they do
+ that without having done ::external_lock() or index_init()/rnd_init().
+ (Note: this also means we're doing a read when there was no
+ rdb_converter::setup_field_encoders() call)
+
+ Initialize the necessary variables for them:
+ */
+
+ /* Load auto_increment value only once on first use. */
+ if (table->found_next_number_field && m_tbl_def->m_auto_incr_val == 0) {
+ load_auto_incr_value();
+ }
+
+ /* Load hidden pk only once on first use. */
+ if (has_hidden_pk(table) && m_tbl_def->m_hidden_pk_val == 0 &&
+ (err = load_hidden_pk_value()) != HA_EXIT_SUCCESS) {
+ free_key_buffers();
+ DBUG_RETURN(err);
+ }
+
+ /* Index block size in MyRocks: used by MySQL in query optimization */
+ stats.block_size = rocksdb_tbl_options->block_size;
+
+#ifdef MARIAROCKS_NOT_YET // MDEV-10976
+#endif
+ /* Determine at open whether we should skip unique checks for this table */
+ set_skip_unique_check_tables(THDVAR(ha_thd(), skip_unique_check_tables));
+
+ DBUG_RETURN(HA_EXIT_SUCCESS);
+}
+
+int ha_rocksdb::close(void) {
+ DBUG_ENTER_FUNC();
+
+ m_pk_descr = nullptr;
+ m_key_descr_arr = nullptr;
+ m_converter = nullptr;
+ free_key_buffers();
+
+ if (m_table_handler != nullptr) {
+ rdb_open_tables.release_table_handler(m_table_handler);
+ m_table_handler = nullptr;
+ }
+
+ // These are needed to suppress valgrind errors in rocksdb.partition
+ m_last_rowkey.free();
+ m_sk_tails.free();
+ m_sk_tails_old.free();
+ m_pk_unpack_info.free();
+
+ DBUG_RETURN(HA_EXIT_SUCCESS);
+}
+
+static const char *rdb_error_messages[] = {
+ "Table must have a PRIMARY KEY.",
+ "Specifying DATA DIRECTORY for an individual table is not supported.",
+ "Specifying INDEX DIRECTORY for an individual table is not supported.",
+ "RocksDB commit failed.",
+ "Failure during bulk load operation.",
+ "Found data corruption.",
+ "CRC checksum mismatch.",
+ "Invalid table.",
+ "Could not access RocksDB properties.",
+ "File I/O error during merge/sort operation.",
+ "RocksDB status: not found.",
+ "RocksDB status: corruption.",
+ "RocksDB status: invalid argument.",
+ "RocksDB status: io error.",
+ "RocksDB status: no space.",
+ "RocksDB status: merge in progress.",
+ "RocksDB status: incomplete.",
+ "RocksDB status: shutdown in progress.",
+ "RocksDB status: timed out.",
+ "RocksDB status: aborted.",
+ "RocksDB status: lock limit reached.",
+ "RocksDB status: busy.",
+ "RocksDB status: deadlock.",
+ "RocksDB status: expired.",
+ "RocksDB status: try again.",
+};
+
+static_assert((sizeof(rdb_error_messages) / sizeof(rdb_error_messages[0])) ==
+ ((HA_ERR_ROCKSDB_LAST - HA_ERR_ROCKSDB_FIRST) + 1),
+ "Number of error messages doesn't match number of error codes");
+
+//psergey-merge: do we need this in MariaDB: we have get_error_messages
+//below...
+#if 0
+static const char *rdb_get_error_message(int nr) {
+ return rdb_error_messages[nr - HA_ERR_ROCKSDB_FIRST];
+}
+#endif
+
+static const char **rdb_get_error_messages(int nr) { return rdb_error_messages; }
+
+bool ha_rocksdb::get_error_message(const int error, String *const buf) {
+ DBUG_ENTER_FUNC();
+
+ static_assert(HA_ERR_ROCKSDB_LAST > HA_ERR_FIRST,
+ "HA_ERR_ROCKSDB_LAST > HA_ERR_FIRST");
+ static_assert(HA_ERR_ROCKSDB_LAST > HA_ERR_LAST,
+ "HA_ERR_ROCKSDB_LAST > HA_ERR_LAST");
+
+ if (error == HA_ERR_LOCK_WAIT_TIMEOUT || error == HA_ERR_LOCK_DEADLOCK ||
+ error == HA_ERR_ROCKSDB_STATUS_BUSY) {
+ Rdb_transaction *const tx = get_tx_from_thd(ha_thd());
+ DBUG_ASSERT(tx != nullptr);
+ buf->append(tx->m_detailed_error);
+ DBUG_RETURN(true);
+ }
+
+ if (error >= HA_ERR_ROCKSDB_FIRST && error <= HA_ERR_ROCKSDB_LAST) {
+ buf->append(rdb_error_messages[error - HA_ERR_ROCKSDB_FIRST]);
+ }
+
+ // We can be called with the values which are < HA_ERR_FIRST because most
+ // MySQL internal functions will just return HA_EXIT_FAILURE in case of
+ // an error.
+
+ DBUG_RETURN(false);
+}
+
+/*
+ Generalized way to convert RocksDB status errors into MySQL error code, and
+ print error message.
+
+ Each error code below maps to a RocksDB status code found in:
+ rocksdb/include/rocksdb/status.h
+*/
+int ha_rocksdb::rdb_error_to_mysql(const rocksdb::Status &s,
+ const char *opt_msg) {
+ DBUG_ASSERT(!s.ok());
+
+ int err;
+ switch (s.code()) {
+ case rocksdb::Status::Code::kOk:
+ err = HA_EXIT_SUCCESS;
+ break;
+ case rocksdb::Status::Code::kNotFound:
+ err = HA_ERR_ROCKSDB_STATUS_NOT_FOUND;
+ break;
+ case rocksdb::Status::Code::kCorruption:
+ err = HA_ERR_ROCKSDB_STATUS_CORRUPTION;
+ break;
+ case rocksdb::Status::Code::kNotSupported:
+ err = HA_ERR_ROCKSDB_STATUS_NOT_SUPPORTED;
+ break;
+ case rocksdb::Status::Code::kInvalidArgument:
+ err = HA_ERR_ROCKSDB_STATUS_INVALID_ARGUMENT;
+ break;
+ case rocksdb::Status::Code::kIOError:
+ err = (s.IsNoSpace()) ? HA_ERR_ROCKSDB_STATUS_NO_SPACE
+ : HA_ERR_ROCKSDB_STATUS_IO_ERROR;
+ break;
+ case rocksdb::Status::Code::kMergeInProgress:
+ err = HA_ERR_ROCKSDB_STATUS_MERGE_IN_PROGRESS;
+ break;
+ case rocksdb::Status::Code::kIncomplete:
+ err = HA_ERR_ROCKSDB_STATUS_INCOMPLETE;
+ break;
+ case rocksdb::Status::Code::kShutdownInProgress:
+ err = HA_ERR_ROCKSDB_STATUS_SHUTDOWN_IN_PROGRESS;
+ break;
+ case rocksdb::Status::Code::kTimedOut:
+ err = HA_ERR_ROCKSDB_STATUS_TIMED_OUT;
+ break;
+ case rocksdb::Status::Code::kAborted:
+ err = (s.IsLockLimit()) ? HA_ERR_ROCKSDB_STATUS_LOCK_LIMIT
+ : HA_ERR_ROCKSDB_STATUS_ABORTED;
+ break;
+ case rocksdb::Status::Code::kBusy:
+ err = (s.IsDeadlock()) ? HA_ERR_ROCKSDB_STATUS_DEADLOCK
+ : HA_ERR_ROCKSDB_STATUS_BUSY;
+ break;
+ case rocksdb::Status::Code::kExpired:
+ err = HA_ERR_ROCKSDB_STATUS_EXPIRED;
+ break;
+ case rocksdb::Status::Code::kTryAgain:
+ err = HA_ERR_ROCKSDB_STATUS_TRY_AGAIN;
+ break;
+ default:
+ DBUG_ASSERT(0);
+ return -1;
+ }
+
+ std::string errMsg;
+ if (s.IsLockLimit()) {
+ errMsg =
+ "Operation aborted: Failed to acquire lock due to "
+ "rocksdb_max_row_locks limit";
+ } else {
+ errMsg = s.ToString();
+ }
+
+ if (opt_msg) {
+ std::string concatenated_error = errMsg + " (" + std::string(opt_msg) + ")";
+ my_error(ER_GET_ERRMSG, MYF(0), s.code(), concatenated_error.c_str(),
+ rocksdb_hton_name);
+ } else {
+ my_error(ER_GET_ERRMSG, MYF(0), s.code(), errMsg.c_str(),
+ rocksdb_hton_name);
+ }
+
+ return err;
+}
+
+/* MyRocks supports only the following collations for indexed columns */
+static const std::set<uint> RDB_INDEX_COLLATIONS = {
+ COLLATION_BINARY, COLLATION_UTF8_BIN, COLLATION_LATIN1_BIN};
+
+static bool rdb_is_index_collation_supported(
+ const my_core::Field *const field) {
+ const my_core::enum_field_types type = field->real_type();
+ /* Handle [VAR](CHAR|BINARY) or TEXT|BLOB */
+ if (type == MYSQL_TYPE_VARCHAR || type == MYSQL_TYPE_STRING ||
+ type == MYSQL_TYPE_BLOB) {
+
+ return (RDB_INDEX_COLLATIONS.find(field->charset()->number) !=
+ RDB_INDEX_COLLATIONS.end()) ||
+ rdb_is_collation_supported(field->charset());
+ }
+ return true;
+}
+
+
+static bool
+rdb_field_uses_nopad_collation(const my_core::Field *const field) {
+ const my_core::enum_field_types type = field->real_type();
+ /* Handle [VAR](CHAR|BINARY) or TEXT|BLOB */
+ if (type == MYSQL_TYPE_VARCHAR || type == MYSQL_TYPE_STRING ||
+ type == MYSQL_TYPE_BLOB) {
+
+ /*
+ This is technically a NOPAD collation but it's a binary collation
+ that we can handle.
+ */
+ if (RDB_INDEX_COLLATIONS.find(field->charset()->number) !=
+ RDB_INDEX_COLLATIONS.end())
+ return false;
+
+ return (field->charset()->state & MY_CS_NOPAD);
+ }
+ return false;
+}
+
+
+/*
+ Create structures needed for storing data in rocksdb. This is called when the
+ table is created. The structures will be shared by all TABLE* objects.
+
+ @param
+ table_arg Table with definition
+ db_table "dbname.tablename"
+ len strlen of the above
+ tbl_def_arg tbl_def whose key_descr is being created/populated
+ old_tbl_def_arg tbl_def from which keys are being copied over from
+ (for use during inplace alter)
+
+ @return
+ 0 - Ok
+ other - error, either given table ddl is not supported by rocksdb or OOM.
+*/
+int ha_rocksdb::create_key_defs(
+ const TABLE *const table_arg, Rdb_tbl_def *const tbl_def_arg,
+ const TABLE *const old_table_arg /* = nullptr */,
+ const Rdb_tbl_def *const old_tbl_def_arg
+ /* = nullptr */) const {
+ DBUG_ENTER_FUNC();
+
+ DBUG_ASSERT(table_arg->s != nullptr);
+
+ /*
+ These need to be one greater than MAX_INDEXES since the user can create
+ MAX_INDEXES secondary keys and no primary key which would cause us
+ to generate a hidden one.
+ */
+ std::array<key_def_cf_info, MAX_INDEXES + 1> cfs;
+
+ /*
+ NOTE: All new column families must be created before new index numbers are
+ allocated to each key definition. See below for more details.
+ http://github.com/MySQLOnRocksDB/mysql-5.6/issues/86#issuecomment-138515501
+ */
+ if (create_cfs(table_arg, tbl_def_arg, &cfs)) {
+ DBUG_RETURN(HA_EXIT_FAILURE);
+ }
+
+ uint64 ttl_duration = 0;
+ std::string ttl_column;
+ uint ttl_field_offset;
+
+ uint err;
+ if ((err = Rdb_key_def::extract_ttl_duration(table_arg, tbl_def_arg,
+ &ttl_duration))) {
+ DBUG_RETURN(err);
+ }
+
+ if ((err = Rdb_key_def::extract_ttl_col(table_arg, tbl_def_arg, &ttl_column,
+ &ttl_field_offset))) {
+ DBUG_RETURN(err);
+ }
+
+ /* We don't currently support TTL on tables with hidden primary keys. */
+ if (ttl_duration > 0 && has_hidden_pk(table_arg)) {
+ my_error(ER_RDB_TTL_UNSUPPORTED, MYF(0));
+ DBUG_RETURN(HA_EXIT_FAILURE);
+ }
+
+ /*
+ If TTL duration is not specified but TTL column was specified, throw an
+ error because TTL column requires duration.
+ */
+ if (ttl_duration == 0 && !ttl_column.empty()) {
+ my_error(ER_RDB_TTL_COL_FORMAT, MYF(0), ttl_column.c_str());
+ DBUG_RETURN(HA_EXIT_FAILURE);
+ }
+
+ if (!old_tbl_def_arg) {
+ /*
+ old_tbl_def doesn't exist. this means we are in the process of creating
+ a new table.
+
+ Get the index numbers (this will update the next_index_number)
+ and create Rdb_key_def structures.
+ */
+ for (uint i = 0; i < tbl_def_arg->m_key_count; i++) {
+ if (create_key_def(table_arg, i, tbl_def_arg, &m_key_descr_arr[i], cfs[i],
+ ttl_duration, ttl_column)) {
+ DBUG_RETURN(HA_EXIT_FAILURE);
+ }
+ }
+ } else {
+ /*
+ old_tbl_def exists. This means we are creating a new tbl_def as part of
+ in-place alter table. Copy over existing keys from the old_tbl_def and
+ generate the necessary new key definitions if any.
+ */
+ if (create_inplace_key_defs(table_arg, tbl_def_arg, old_table_arg,
+ old_tbl_def_arg, cfs, ttl_duration,
+ ttl_column)) {
+ DBUG_RETURN(HA_EXIT_FAILURE);
+ }
+ }
+
+ DBUG_RETURN(HA_EXIT_SUCCESS);
+}
+
+/*
+ Checks index parameters and creates column families needed for storing data
+ in rocksdb if necessary.
+
+ @param in
+ table_arg Table with definition
+ db_table Table name
+ tbl_def_arg Table def structure being populated
+
+ @param out
+ cfs CF info for each key definition in 'key_info' order
+
+ @return
+ 0 - Ok
+ other - error
+*/
+int ha_rocksdb::create_cfs(
+ const TABLE *const table_arg, Rdb_tbl_def *const tbl_def_arg,
+ std::array<struct key_def_cf_info, MAX_INDEXES + 1> *const cfs) const {
+ DBUG_ENTER_FUNC();
+
+ DBUG_ASSERT(table_arg->s != nullptr);
+
+ char tablename_sys[NAME_LEN + 1];
+ bool tsys_set= false;
+
+ /*
+ The first loop checks the index parameters and creates
+ column families if necessary.
+ */
+ for (uint i = 0; i < tbl_def_arg->m_key_count; i++) {
+ rocksdb::ColumnFamilyHandle *cf_handle;
+
+ if (!is_hidden_pk(i, table_arg, tbl_def_arg) &&
+ tbl_def_arg->base_tablename().find(tmp_file_prefix) != 0) {
+ if (!tsys_set)
+ {
+ tsys_set= true;
+ my_core::filename_to_tablename(tbl_def_arg->base_tablename().c_str(),
+ tablename_sys, sizeof(tablename_sys));
+ }
+
+ for (uint part = 0; part < table_arg->key_info[i].ext_key_parts;
+ part++)
+ {
+ /* MariaDB: disallow NOPAD collations */
+ if (rdb_field_uses_nopad_collation(
+ table_arg->key_info[i].key_part[part].field))
+ {
+ my_error(ER_MYROCKS_CANT_NOPAD_COLLATION, MYF(0));
+ DBUG_RETURN(HA_EXIT_FAILURE);
+ }
+
+ if (rocksdb_strict_collation_check &&
+ !rdb_is_index_collation_supported(
+ table_arg->key_info[i].key_part[part].field) &&
+ !rdb_collation_exceptions->matches(tablename_sys)) {
+
+ char buf[1024];
+ my_snprintf(buf, sizeof(buf),
+ "Indexed column %s.%s uses a collation that does not "
+ "allow index-only access in secondary key and has "
+ "reduced disk space efficiency in primary key.",
+ tbl_def_arg->full_tablename().c_str(),
+ table_arg->key_info[i].key_part[part].field->field_name);
+
+ my_error(ER_INTERNAL_ERROR, MYF(ME_JUST_WARNING), buf);
+ }
+ }
+ }
+
+ // Internal consistency check to make sure that data in TABLE and
+ // Rdb_tbl_def structures matches. Either both are missing or both are
+ // specified. Yes, this is critical enough to make it into SHIP_ASSERT.
+ SHIP_ASSERT(IF_PARTITIONING(!table_arg->part_info,true) == tbl_def_arg->base_partition().empty());
+
+ // Generate the name for the column family to use.
+ bool per_part_match_found = false;
+ std::string cf_name =
+ generate_cf_name(i, table_arg, tbl_def_arg, &per_part_match_found);
+
+ // Prevent create from using the system column family.
+ if (cf_name == DEFAULT_SYSTEM_CF_NAME) {
+ my_error(ER_WRONG_ARGUMENTS, MYF(0),
+ "column family not valid for storing index data.");
+ DBUG_RETURN(HA_EXIT_FAILURE);
+ }
+
+ // Here's how `get_or_create_cf` will use the input parameters:
+ //
+ // `cf_name` - will be used as a CF name.
+ cf_handle = cf_manager.get_or_create_cf(rdb, cf_name);
+
+ if (!cf_handle) {
+ DBUG_RETURN(HA_EXIT_FAILURE);
+ }
+
+ auto &cf = (*cfs)[i];
+
+ cf.cf_handle = cf_handle;
+ cf.is_reverse_cf = Rdb_cf_manager::is_cf_name_reverse(cf_name.c_str());
+ cf.is_per_partition_cf = per_part_match_found;
+ }
+
+ DBUG_RETURN(HA_EXIT_SUCCESS);
+}
+
+/*
+ Create key definition needed for storing data in rocksdb during ADD index
+ inplace operations.
+
+ @param in
+ table_arg Table with definition
+ tbl_def_arg New table def structure being populated
+ old_tbl_def_arg Old(current) table def structure
+ cfs Struct array which contains column family information
+
+ @return
+ 0 - Ok
+ other - error, either given table ddl is not supported by rocksdb or OOM.
+*/
+int ha_rocksdb::create_inplace_key_defs(
+ const TABLE *const table_arg, Rdb_tbl_def *const tbl_def_arg,
+ const TABLE *const old_table_arg, const Rdb_tbl_def *const old_tbl_def_arg,
+ const std::array<key_def_cf_info, MAX_INDEXES + 1> &cfs,
+ uint64 ttl_duration, const std::string &ttl_column) const {
+ DBUG_ENTER_FUNC();
+
+ std::shared_ptr<Rdb_key_def> *const old_key_descr =
+ old_tbl_def_arg->m_key_descr_arr;
+ std::shared_ptr<Rdb_key_def> *const new_key_descr =
+ tbl_def_arg->m_key_descr_arr;
+ const std::unordered_map<std::string, uint> old_key_pos =
+ get_old_key_positions(table_arg, tbl_def_arg, old_table_arg,
+ old_tbl_def_arg);
+
+ uint i;
+ for (i = 0; i < tbl_def_arg->m_key_count; i++) {
+ const auto &it = old_key_pos.find(get_key_name(i, table_arg, tbl_def_arg));
+
+ if (it != old_key_pos.end()) {
+ /*
+ Found matching index in old table definition, so copy it over to the
+ new one created.
+ */
+ const Rdb_key_def &okd = *old_key_descr[it->second];
+
+ const GL_INDEX_ID gl_index_id = okd.get_gl_index_id();
+ struct Rdb_index_info index_info;
+ if (!dict_manager.get_index_info(gl_index_id, &index_info)) {
+ // NO_LINT_DEBUG
+ sql_print_error(
+ "RocksDB: Could not get index information "
+ "for Index Number (%u,%u), table %s",
+ gl_index_id.cf_id, gl_index_id.index_id,
+ old_tbl_def_arg->full_tablename().c_str());
+ DBUG_RETURN(HA_EXIT_FAILURE);
+ }
+
+ uint32 ttl_rec_offset =
+ Rdb_key_def::has_index_flag(index_info.m_index_flags,
+ Rdb_key_def::TTL_FLAG)
+ ? Rdb_key_def::calculate_index_flag_offset(
+ index_info.m_index_flags, Rdb_key_def::TTL_FLAG)
+ : UINT_MAX;
+
+ /*
+ We can't use the copy constructor because we need to update the
+ keynr within the pack_info for each field and the keyno of the keydef
+ itself.
+ */
+ new_key_descr[i] = std::make_shared<Rdb_key_def>(
+ okd.get_index_number(), i, okd.get_cf(),
+ index_info.m_index_dict_version, index_info.m_index_type,
+ index_info.m_kv_version, okd.m_is_reverse_cf,
+ okd.m_is_per_partition_cf, okd.m_name.c_str(),
+ dict_manager.get_stats(gl_index_id), index_info.m_index_flags,
+ ttl_rec_offset, index_info.m_ttl_duration);
+ } else if (create_key_def(table_arg, i, tbl_def_arg, &new_key_descr[i],
+ cfs[i], ttl_duration, ttl_column)) {
+ DBUG_RETURN(HA_EXIT_FAILURE);
+ }
+
+ DBUG_ASSERT(new_key_descr[i] != nullptr);
+ new_key_descr[i]->setup(table_arg, tbl_def_arg);
+ }
+
+ DBUG_RETURN(HA_EXIT_SUCCESS);
+}
+
+std::unordered_map<std::string, uint> ha_rocksdb::get_old_key_positions(
+ const TABLE *const table_arg, const Rdb_tbl_def *const tbl_def_arg,
+ const TABLE *const old_table_arg,
+ const Rdb_tbl_def *const old_tbl_def_arg) const {
+ DBUG_ENTER_FUNC();
+
+ std::shared_ptr<Rdb_key_def> *const old_key_descr =
+ old_tbl_def_arg->m_key_descr_arr;
+ std::unordered_map<std::string, uint> old_key_pos;
+ std::unordered_map<std::string, uint> new_key_pos;
+ uint i;
+
+ for (i = 0; i < tbl_def_arg->m_key_count; i++) {
+ new_key_pos[get_key_name(i, table_arg, tbl_def_arg)] = i;
+ }
+
+ for (i = 0; i < old_tbl_def_arg->m_key_count; i++) {
+ if (is_hidden_pk(i, old_table_arg, old_tbl_def_arg)) {
+ old_key_pos[old_key_descr[i]->m_name] = i;
+ continue;
+ }
+
+ /*
+ In case of matching key name, need to check key parts of keys as well,
+ in case a simultaneous drop + add is performed, where the key name is the
+ same but the key parts are different.
+
+ Example:
+ CREATE TABLE t1 (a INT, b INT, KEY ka(a)) ENGINE=RocksDB;
+ ALTER TABLE t1 DROP INDEX ka, ADD INDEX ka(b), ALGORITHM=INPLACE;
+ */
+ const KEY *const old_key = &old_table_arg->key_info[i];
+ const auto &it = new_key_pos.find(old_key->name);
+ if (it == new_key_pos.end()) {
+ continue;
+ }
+
+ KEY *const new_key = &table_arg->key_info[it->second];
+
+ /*
+ Check that the key is identical between old and new tables.
+ If not, we still need to create a new index.
+
+ The exception is if there is an index changed from unique to non-unique,
+ in these cases we don't need to rebuild as they are stored the same way in
+ RocksDB.
+ */
+ bool unique_to_non_unique =
+ ((old_key->flags ^ new_key->flags) == HA_NOSAME) &&
+ (old_key->flags & HA_NOSAME);
+
+ if (compare_keys(old_key, new_key) && !unique_to_non_unique) {
+ continue;
+ }
+
+ /* Check to make sure key parts match. */
+ if (compare_key_parts(old_key, new_key)) {
+ continue;
+ }
+
+ old_key_pos[old_key->name] = i;
+ }
+
+ DBUG_RETURN(old_key_pos);
+}
+
+/* Check to see if two keys are identical. */
+int ha_rocksdb::compare_keys(const KEY *const old_key,
+ const KEY *const new_key) const {
+ DBUG_ENTER_FUNC();
+
+ /* Check index name. */
+ if (strcmp(old_key->name, new_key->name) != 0) {
+ DBUG_RETURN(HA_EXIT_FAILURE);
+ }
+
+ /* If index algorithms are different then keys are different. */
+ if (old_key->algorithm != new_key->algorithm) {
+ DBUG_RETURN(HA_EXIT_FAILURE);
+ }
+
+ /* Check that the key is identical between old and new tables. */
+ if ((old_key->flags ^ new_key->flags) & HA_KEYFLAG_MASK) {
+ DBUG_RETURN(HA_EXIT_FAILURE);
+ }
+
+ /* Check index comment. (for column family changes) */
+ std::string old_comment(old_key->comment.str, old_key->comment.length);
+ std::string new_comment(new_key->comment.str, new_key->comment.length);
+ if (old_comment.compare(new_comment) != 0) {
+ DBUG_RETURN(HA_EXIT_FAILURE);
+ }
+
+ DBUG_RETURN(HA_EXIT_SUCCESS);
+}
+
+/* Check two keys to ensure that key parts within keys match */
+int ha_rocksdb::compare_key_parts(const KEY *const old_key,
+ const KEY *const new_key) const {
+ DBUG_ENTER_FUNC();
+
+ /* Skip if key parts do not match, as it is a different key */
+ if (new_key->user_defined_key_parts != old_key->user_defined_key_parts) {
+ DBUG_RETURN(HA_EXIT_FAILURE);
+ }
+
+ /* Check to see that key parts themselves match */
+ for (uint i = 0; i < old_key->user_defined_key_parts; i++) {
+ if (strcmp(old_key->key_part[i].field->field_name,
+ new_key->key_part[i].field->field_name) != 0) {
+ DBUG_RETURN(HA_EXIT_FAILURE);
+ }
+
+ /* Check if prefix index key part length has changed */
+ if (old_key->key_part[i].length != new_key->key_part[i].length) {
+ DBUG_RETURN(HA_EXIT_FAILURE);
+ }
+ }
+
+ DBUG_RETURN(HA_EXIT_SUCCESS);
+}
+
+/*
+ Create key definition needed for storing data in rocksdb.
+ This can be called either during CREATE table or doing ADD index operations.
+
+ @param in
+ table_arg Table with definition
+ i Position of index being created inside table_arg->key_info
+ tbl_def_arg Table def structure being populated
+ cf_info Struct which contains column family information
+
+ @param out
+ new_key_def Newly created index definition.
+
+ @return
+ 0 - Ok
+ other - error, either given table ddl is not supported by rocksdb or OOM.
+*/
+int ha_rocksdb::create_key_def(const TABLE *const table_arg, const uint i,
+ const Rdb_tbl_def *const tbl_def_arg,
+ std::shared_ptr<Rdb_key_def> *const new_key_def,
+ const struct key_def_cf_info &cf_info,
+ uint64 ttl_duration,
+ const std::string &ttl_column) const {
+ DBUG_ENTER_FUNC();
+
+ DBUG_ASSERT(*new_key_def == nullptr);
+
+ const uint index_id = ddl_manager.get_and_update_next_number(&dict_manager);
+ const uint16_t index_dict_version = Rdb_key_def::INDEX_INFO_VERSION_LATEST;
+ uchar index_type;
+ uint16_t kv_version;
+
+ if (is_hidden_pk(i, table_arg, tbl_def_arg)) {
+ index_type = Rdb_key_def::INDEX_TYPE_HIDDEN_PRIMARY;
+ kv_version = Rdb_key_def::PRIMARY_FORMAT_VERSION_LATEST;
+ } else if (i == table_arg->s->primary_key) {
+ index_type = Rdb_key_def::INDEX_TYPE_PRIMARY;
+ uint16 pk_latest_version = Rdb_key_def::PRIMARY_FORMAT_VERSION_LATEST;
+ kv_version = pk_latest_version;
+ } else {
+ index_type = Rdb_key_def::INDEX_TYPE_SECONDARY;
+ uint16 sk_latest_version = Rdb_key_def::SECONDARY_FORMAT_VERSION_LATEST;
+ kv_version = sk_latest_version;
+ }
+
+ // Use PRIMARY_FORMAT_VERSION_UPDATE1 here since it is the same value as
+ // SECONDARY_FORMAT_VERSION_UPDATE1 so it doesn't matter if this is a
+ // primary key or secondary key.
+ DBUG_EXECUTE_IF("MYROCKS_LEGACY_VARBINARY_FORMAT", {
+ kv_version = Rdb_key_def::PRIMARY_FORMAT_VERSION_UPDATE1;
+ });
+
+ DBUG_EXECUTE_IF("MYROCKS_NO_COVERED_BITMAP_FORMAT", {
+ if (index_type == Rdb_key_def::INDEX_TYPE_SECONDARY) {
+ kv_version = Rdb_key_def::SECONDARY_FORMAT_VERSION_UPDATE2;
+ }
+ });
+
+ uint32 index_flags = (ttl_duration > 0 ? Rdb_key_def::TTL_FLAG : 0);
+
+ uint32 ttl_rec_offset =
+ Rdb_key_def::has_index_flag(index_flags, Rdb_key_def::TTL_FLAG)
+ ? Rdb_key_def::calculate_index_flag_offset(index_flags,
+ Rdb_key_def::TTL_FLAG)
+ : UINT_MAX;
+
+ const char *const key_name = get_key_name(i, table_arg, m_tbl_def);
+ *new_key_def = std::make_shared<Rdb_key_def>(
+ index_id, i, cf_info.cf_handle, index_dict_version, index_type,
+ kv_version, cf_info.is_reverse_cf, cf_info.is_per_partition_cf, key_name,
+ Rdb_index_stats(), index_flags, ttl_rec_offset, ttl_duration);
+
+ if (!ttl_column.empty()) {
+ (*new_key_def)->m_ttl_column = ttl_column;
+ }
+ // initialize key_def
+ (*new_key_def)->setup(table_arg, tbl_def_arg);
+ DBUG_RETURN(HA_EXIT_SUCCESS);
+}
+
+int rdb_normalize_tablename(const std::string &tablename,
+ std::string *const strbuf) {
+ if (tablename.size() < 2 || tablename[0] != '.' ||
+ (tablename[1] != FN_LIBCHAR && tablename[1] != FN_LIBCHAR2)) {
+ DBUG_ASSERT(0); // We were not passed table name?
+ return HA_ERR_ROCKSDB_INVALID_TABLE;
+ }
+
+ size_t pos = tablename.find_first_of(FN_LIBCHAR, 2);
+ if (pos == std::string::npos) {
+ pos = tablename.find_first_of(FN_LIBCHAR2, 2);
+ }
+
+ if (pos == std::string::npos) {
+ DBUG_ASSERT(0); // We were not passed table name?
+ return HA_ERR_ROCKSDB_INVALID_TABLE;
+ }
+
+ *strbuf = tablename.substr(2, pos - 2) + "." + tablename.substr(pos + 1);
+
+ return HA_EXIT_SUCCESS;
+}
+
+/*
+ Check to see if the user's original statement includes foreign key
+ references
+*/
+bool ha_rocksdb::contains_foreign_key(THD *const thd) {
+ bool success;
+ const char *str = thd_query_string(thd)->str;
+
+ DBUG_ASSERT(str != nullptr);
+
+ while (*str != '\0') {
+ // Scan from our current pos looking for 'FOREIGN'
+ str = rdb_find_in_string(str, "FOREIGN", &success);
+ if (!success) {
+ return false;
+ }
+
+ // Skip past the found "FOREIGN'
+ str = rdb_check_next_token(&my_charset_bin, str, "FOREIGN", &success);
+ DBUG_ASSERT(success);
+
+ if (!my_isspace(&my_charset_bin, *str)) {
+ return false;
+ }
+
+ // See if the next token is 'KEY'
+ str = rdb_check_next_token(&my_charset_bin, str, "KEY", &success);
+ if (!success) {
+ continue;
+ }
+
+ // See if the next token is '('
+ str = rdb_check_next_token(&my_charset_bin, str, "(", &success);
+ if (!success) {
+ // There is an optional index id after 'FOREIGN KEY', skip it
+ str = rdb_skip_id(&my_charset_bin, str);
+
+ // Now check for '(' again
+ str = rdb_check_next_token(&my_charset_bin, str, "(", &success);
+ }
+
+ // If we have found 'FOREIGN KEY [<word>] (' we can be confident we have
+ // a foreign key clause.
+ return success;
+ }
+
+ // We never found a valid foreign key clause
+ return false;
+}
+
+/**
+ @brief
+ splits the normalized table name of <dbname>.<tablename>#P#<part_no> into
+ the <dbname>, <tablename> and <part_no> components.
+
+ @param dbbuf returns database name/table_schema
+ @param tablebuf returns tablename
+ @param partitionbuf returns partition suffix if there is one
+ @return HA_EXIT_SUCCESS on success, non-zero on failure to split
+*/
+int rdb_split_normalized_tablename(const std::string &fullname,
+ std::string *const db,
+ std::string *const table,
+ std::string *const partition) {
+ DBUG_ASSERT(!fullname.empty());
+
+#define RDB_PARTITION_STR "#P#"
+
+ /* Normalize returns dbname.tablename. */
+ size_t dotpos = fullname.find('.');
+
+ /* Invalid table name? */
+ if (dotpos == std::string::npos) {
+ return HA_ERR_ROCKSDB_INVALID_TABLE;
+ }
+
+ // Table must have a database name associated with it.
+ DBUG_ASSERT(dotpos > 0);
+
+ if (db != nullptr) {
+ *db = fullname.substr(0, dotpos);
+ }
+
+ dotpos++;
+
+ const size_t partpos =
+ fullname.find(RDB_PARTITION_STR, dotpos, strlen(RDB_PARTITION_STR));
+
+ if (partpos != std::string::npos) {
+ DBUG_ASSERT(partpos >= dotpos);
+
+ if (table != nullptr) {
+ *table = fullname.substr(dotpos, partpos - dotpos);
+ }
+
+ if (partition != nullptr) {
+ *partition = fullname.substr(partpos + strlen(RDB_PARTITION_STR));
+ }
+ } else if (table != nullptr) {
+ *table = fullname.substr(dotpos);
+ }
+
+ return HA_EXIT_SUCCESS;
+}
+
+/*
+ Create a table's Rdb_tbl_def and its Rdb_key_defs and store table information
+ into MyRocks Data Dictionary
+ The method is called during create table/partition, truncate table/partition
+
+ @param table_name IN table's name formated as
+ 'dbname.tablename'
+ @param table_arg IN sql table
+ @param auto_increment_value IN specified table's auto increment value
+
+ @return
+ HA_EXIT_SUCCESS OK
+ other HA_ERR error code (can be SE-specific)
+*/
+int ha_rocksdb::create_table(const std::string &table_name,
+ const TABLE *table_arg,
+ ulonglong auto_increment_value) {
+ DBUG_ENTER_FUNC();
+
+
+
+ int err;
+
+ const std::unique_ptr<rocksdb::WriteBatch> wb = dict_manager.begin();
+ rocksdb::WriteBatch *const batch = wb.get();
+
+ /* Create table/key descriptions and put them into the data dictionary */
+ m_tbl_def = new Rdb_tbl_def(table_name);
+
+ uint n_keys = table_arg->s->keys;
+
+ /*
+ If no primary key found, create a hidden PK and place it inside table
+ definition
+ */
+ if (has_hidden_pk(table_arg)) {
+ n_keys += 1;
+ // reset hidden pk id
+ // the starting valid value for hidden pk is 1
+ m_tbl_def->m_hidden_pk_val = 1;
+ }
+
+ m_key_descr_arr = new std::shared_ptr<Rdb_key_def>[n_keys];
+ m_tbl_def->m_key_count = n_keys;
+ m_tbl_def->m_key_descr_arr = m_key_descr_arr;
+
+ err = create_key_defs(table_arg, m_tbl_def);
+ if (err != HA_EXIT_SUCCESS) {
+ goto error;
+ }
+
+ m_pk_descr = m_key_descr_arr[pk_index(table_arg, m_tbl_def)];
+
+ if (auto_increment_value) {
+ bool autoinc_upgrade_test = false;
+ m_tbl_def->m_auto_incr_val = auto_increment_value;
+ DBUG_EXECUTE_IF("myrocks_autoinc_upgrade", autoinc_upgrade_test = true;);
+ if (!autoinc_upgrade_test) {
+ auto s = dict_manager.put_auto_incr_val(
+ batch, m_tbl_def->get_autoincr_gl_index_id(),
+ m_tbl_def->m_auto_incr_val);
+ if (!s.ok()) {
+ goto error;
+ }
+ }
+ }
+
+ dict_manager.lock();
+ err = ddl_manager.put_and_write(m_tbl_def, batch);
+ if (err != HA_EXIT_SUCCESS) {
+ dict_manager.unlock();
+ goto error;
+ }
+
+ err = dict_manager.commit(batch);
+ if (err != HA_EXIT_SUCCESS) {
+ dict_manager.unlock();
+ goto error;
+ }
+
+ dict_manager.unlock();
+
+ DBUG_RETURN(HA_EXIT_SUCCESS);
+
+error:
+ /* Delete what we have allocated so far */
+ delete m_tbl_def;
+ m_tbl_def = nullptr;
+ m_key_descr_arr = nullptr;
+
+ DBUG_RETURN(err);
+}
+
+/**
+ @brief
+ create() is called to create a table. The variable name will have the name
+ of the table.
+
+ @details
+ When create() is called you do not need to worry about
+ opening the table. Also, the .frm file will have already been
+ created so adjusting create_info is not necessary. You can overwrite
+ the .frm file at this point if you wish to change the table
+ definition, but there are no methods currently provided for doing
+ so.
+
+ Called from handle.cc by ha_create_table().
+
+ @return
+ HA_EXIT_SUCCESS OK
+ other HA_ERR error code (can be SE-specific)
+
+ @see
+ ha_create_table() in handle.cc
+*/
+
+int ha_rocksdb::create(const char *const name, TABLE *const table_arg,
+ HA_CREATE_INFO *const create_info) {
+ DBUG_ENTER_FUNC();
+
+ DBUG_ASSERT(table_arg != nullptr);
+ DBUG_ASSERT(create_info != nullptr);
+
+ if (create_info->data_file_name) {
+ // DATA DIRECTORY is used to create tables under a specific location
+ // outside the MySQL data directory. We don't support this for MyRocks.
+ // The `rocksdb_datadir` setting should be used to configure RocksDB data
+ // directory.
+ DBUG_RETURN(HA_ERR_ROCKSDB_TABLE_DATA_DIRECTORY_NOT_SUPPORTED);
+ }
+
+ if (create_info->index_file_name) {
+ // Similar check for INDEX DIRECTORY as well.
+ DBUG_RETURN(HA_ERR_ROCKSDB_TABLE_INDEX_DIRECTORY_NOT_SUPPORTED);
+ }
+
+ int err;
+ /*
+ Construct dbname.tablename ourselves, because parititioning
+ passes strings like "./test/t14#P#p0" for individual partitions,
+ while table_arg->s->table_name has none of that.
+ */
+ std::string str;
+ err = rdb_normalize_tablename(name, &str);
+ if (err != HA_EXIT_SUCCESS) {
+ DBUG_RETURN(err);
+ }
+
+ // FOREIGN KEY isn't supported yet
+ THD *const thd = my_core::thd_get_current_thd();
+ if (contains_foreign_key(thd)) {
+ my_error(ER_NOT_SUPPORTED_YET, MYF(0),
+ "FOREIGN KEY for the RocksDB storage engine");
+ DBUG_RETURN(HA_ERR_UNSUPPORTED);
+ }
+
+ // Check whether Data Dictionary contain information
+ Rdb_tbl_def *tbl = ddl_manager.find(str);
+ if (tbl != nullptr) {
+ if (thd->lex->sql_command == SQLCOM_TRUNCATE) {
+ err = delete_table(tbl);
+ if (err != HA_EXIT_SUCCESS) {
+ DBUG_RETURN(err);
+ }
+ } else {
+ my_error(ER_METADATA_INCONSISTENCY, MYF(0), str.c_str(), name);
+ DBUG_RETURN(HA_ERR_ROCKSDB_CORRUPT_DATA);
+ }
+ }
+
+ // The below adds/clears hooks in RocksDB sync points. There's no reason for
+ // this code to be in ::create() but it needs to be somewhere where it is
+ // away from any tight loops and where one can invoke it from mtr:
+ DBUG_EXECUTE_IF("rocksdb_enable_delay_commits",
+ {
+ auto syncpoint= rocksdb::SyncPoint::GetInstance();
+ syncpoint->SetCallBack("DBImpl::WriteImpl:BeforeLeaderEnters",
+ [&](void* /*arg*/) {my_sleep(500);} );
+ syncpoint->EnableProcessing();
+ push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, ER_WRONG_ARGUMENTS,
+ "enable_delay_commits_mode ON");
+
+ });
+ DBUG_EXECUTE_IF("rocksdb_disable_delay_commits",
+ {
+ auto syncpoint= rocksdb::SyncPoint::GetInstance();
+ syncpoint->ClearCallBack("DBImpl::WriteImpl:BeforeLeaderEnters");
+ syncpoint->DisableProcessing();
+ push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, ER_WRONG_ARGUMENTS,
+ "enable_delay_commits_mode OFF");
+ });
+
+ DBUG_RETURN(create_table(str, table_arg, create_info->auto_increment_value));
+}
+
+/**
+ @note
+ This function is used only when the table has not yet been opened, and
+ keyread_allowed bitmap doesn't have the correct values yet.
+
+ See comment in ha_rocksdb::index_flags() for details.
+*/
+
+bool ha_rocksdb::check_keyread_allowed(uint inx, uint part,
+ bool all_parts) const {
+ bool res = true;
+ KEY *const key_info = &table_share->key_info[inx];
+
+ Rdb_field_packing dummy1;
+ res = dummy1.setup(nullptr, key_info->key_part[part].field, inx, part,
+ key_info->key_part[part].length);
+
+ if (res && all_parts) {
+ for (uint i = 0; i < part; i++) {
+ Field *field;
+ if ((field = key_info->key_part[i].field)) {
+ Rdb_field_packing dummy;
+ if (!dummy.setup(nullptr, field, inx, i,
+ key_info->key_part[i].length)) {
+ /* Cannot do index-only reads for this column */
+ res = false;
+ break;
+ }
+ }
+ }
+ }
+
+ const uint pk = table_share->primary_key;
+ if (inx == pk && all_parts &&
+ part + 1 == table_share->key_info[pk].user_defined_key_parts) {
+ m_pk_can_be_decoded = res;
+ }
+
+ return res;
+}
+
+int ha_rocksdb::read_key_exact(const Rdb_key_def &kd,
+ rocksdb::Iterator *const iter,
+ const bool /* unused */,
+ const rocksdb::Slice &key_slice,
+ const int64_t ttl_filter_ts) {
+ THD *thd = ha_thd();
+ /*
+ We are looking for the first record such that
+ index_tuple= lookup_tuple.
+ lookup_tuple may be a prefix of the index.
+ */
+ rocksdb_smart_seek(kd.m_is_reverse_cf, iter, key_slice);
+
+ while (iter->Valid() && kd.value_matches_prefix(iter->key(), key_slice)) {
+ if (thd && thd->killed) {
+ return HA_ERR_QUERY_INTERRUPTED;
+ }
+ /*
+ If TTL is enabled we need to check if the given key has already expired
+ from the POV of the current transaction. If it has, try going to the next
+ key.
+ */
+ if (kd.has_ttl() && should_hide_ttl_rec(kd, iter->value(), ttl_filter_ts)) {
+ rocksdb_smart_next(kd.m_is_reverse_cf, iter);
+ continue;
+ }
+
+ return HA_EXIT_SUCCESS;
+ }
+
+ /*
+ Got a record that is not equal to the lookup value, or even a record
+ from another table.index.
+ */
+ return HA_ERR_KEY_NOT_FOUND;
+}
+
+int ha_rocksdb::read_before_key(const Rdb_key_def &kd,
+ const bool full_key_match,
+ const rocksdb::Slice &key_slice,
+ const int64_t ttl_filter_ts) {
+ THD *thd = ha_thd();
+ /*
+ We are looking for record with the biggest t.key such that
+ t.key < lookup_tuple.
+ */
+ rocksdb_smart_seek(!kd.m_is_reverse_cf, m_scan_it, key_slice);
+
+ while (is_valid(m_scan_it)) {
+ if (thd && thd->killed) {
+ return HA_ERR_QUERY_INTERRUPTED;
+ }
+ /*
+ We are using full key and we've hit an exact match, or...
+
+ If TTL is enabled we need to check if the given key has already expired
+ from the POV of the current transaction. If it has, try going to the next
+ key.
+ */
+ if ((full_key_match &&
+ kd.value_matches_prefix(m_scan_it->key(), key_slice)) ||
+ (kd.has_ttl() &&
+ should_hide_ttl_rec(kd, m_scan_it->value(), ttl_filter_ts))) {
+ rocksdb_smart_next(!kd.m_is_reverse_cf, m_scan_it);
+ continue;
+ }
+
+ return HA_EXIT_SUCCESS;
+ }
+
+ return HA_ERR_KEY_NOT_FOUND;
+}
+
+int ha_rocksdb::read_after_key(const Rdb_key_def &kd,
+ const rocksdb::Slice &key_slice,
+ const int64_t ttl_filter_ts) {
+ THD *thd = ha_thd();
+ /*
+ We are looking for the first record such that
+
+ index_tuple $GT lookup_tuple
+
+ with HA_READ_AFTER_KEY, $GT = '>',
+ with HA_READ_KEY_OR_NEXT, $GT = '>='
+ */
+ rocksdb_smart_seek(kd.m_is_reverse_cf, m_scan_it, key_slice);
+
+ /*
+ If TTL is enabled we need to check if the given key has already expired
+ from the POV of the current transaction. If it has, try going to the next
+ key.
+ */
+ while (is_valid(m_scan_it) && kd.has_ttl() &&
+ should_hide_ttl_rec(kd, m_scan_it->value(), ttl_filter_ts)) {
+ if (thd && thd->killed) {
+ return HA_ERR_QUERY_INTERRUPTED;
+ }
+ rocksdb_smart_next(kd.m_is_reverse_cf, m_scan_it);
+ }
+
+ return is_valid(m_scan_it) ? HA_EXIT_SUCCESS : HA_ERR_KEY_NOT_FOUND;
+}
+
+int ha_rocksdb::position_to_correct_key(
+ const Rdb_key_def &kd, const enum ha_rkey_function &find_flag,
+ const bool full_key_match, const uchar *const key,
+ const key_part_map &keypart_map, const rocksdb::Slice &key_slice,
+ bool *const move_forward, const int64_t ttl_filter_ts) {
+ int rc = 0;
+
+ *move_forward = true;
+
+ switch (find_flag) {
+ case HA_READ_KEY_EXACT:
+ rc = read_key_exact(kd, m_scan_it, full_key_match, key_slice,
+ ttl_filter_ts);
+ break;
+ case HA_READ_BEFORE_KEY:
+ *move_forward = false;
+ rc = read_before_key(kd, full_key_match, key_slice, ttl_filter_ts);
+ if (rc == 0 && !kd.covers_key(m_scan_it->key())) {
+ /* The record we've got is not from this index */
+ rc = HA_ERR_KEY_NOT_FOUND;
+ }
+ break;
+ case HA_READ_AFTER_KEY:
+ case HA_READ_KEY_OR_NEXT:
+ rc = read_after_key(kd, key_slice, ttl_filter_ts);
+ if (rc == 0 && !kd.covers_key(m_scan_it->key())) {
+ /* The record we've got is not from this index */
+ rc = HA_ERR_KEY_NOT_FOUND;
+ }
+ break;
+ case HA_READ_KEY_OR_PREV:
+ case HA_READ_PREFIX:
+ /* This flag is not used by the SQL layer, so we don't support it yet. */
+ rc = HA_ERR_UNSUPPORTED;
+ break;
+ case HA_READ_PREFIX_LAST:
+ case HA_READ_PREFIX_LAST_OR_PREV:
+ *move_forward = false;
+ /*
+ Find the last record with the specified index prefix lookup.
+ - HA_READ_PREFIX_LAST requires that the record has the
+ prefix=lookup (if there are no such records,
+ HA_ERR_KEY_NOT_FOUND should be returned).
+ - HA_READ_PREFIX_LAST_OR_PREV has no such requirement. If there are no
+ records with prefix=lookup, we should return the last record
+ before that.
+ */
+ rc = read_before_key(kd, full_key_match, key_slice, ttl_filter_ts);
+ if (rc == 0) {
+ const rocksdb::Slice &rkey = m_scan_it->key();
+ if (!kd.covers_key(rkey)) {
+ /* The record we've got is not from this index */
+ rc = HA_ERR_KEY_NOT_FOUND;
+ } else if (find_flag == HA_READ_PREFIX_LAST) {
+ uint size = kd.pack_index_tuple(table, m_pack_buffer,
+ m_sk_packed_tuple, key, keypart_map);
+ rocksdb::Slice lookup_tuple(
+ reinterpret_cast<char *>(m_sk_packed_tuple), size);
+
+ // We need to compare the key we've got with the original search
+ // prefix.
+ if (!kd.value_matches_prefix(rkey, lookup_tuple)) {
+ rc = HA_ERR_KEY_NOT_FOUND;
+ }
+ }
+ }
+ break;
+ default:
+ DBUG_ASSERT(0);
+ break;
+ }
+
+ return rc;
+}
+
+int ha_rocksdb::calc_eq_cond_len(const Rdb_key_def &kd,
+ const enum ha_rkey_function &find_flag,
+ const rocksdb::Slice &slice,
+ const int bytes_changed_by_succ,
+ const key_range *const end_key,
+ uint *const end_key_packed_size) {
+ if (find_flag == HA_READ_KEY_EXACT) return slice.size();
+
+ if (find_flag == HA_READ_PREFIX_LAST) {
+ /*
+ We have made the kd.successor(m_sk_packed_tuple) call above.
+
+ The slice is at least Rdb_key_def::INDEX_NUMBER_SIZE bytes long.
+ */
+ return slice.size() - bytes_changed_by_succ;
+ }
+
+ if (end_key) {
+ *end_key_packed_size =
+ kd.pack_index_tuple(table, m_pack_buffer, m_end_key_packed_tuple,
+ end_key->key, end_key->keypart_map);
+
+ /*
+ Calculating length of the equal conditions here. 4 byte index id is
+ included.
+ Example1: id1 BIGINT, id2 INT, id3 BIGINT, PRIMARY KEY (id1, id2, id3)
+ WHERE id1=1 AND id2=1 AND id3>=2 => eq_cond_len= 4+8+4= 16
+ WHERE id1=1 AND id2>=1 AND id3>=2 => eq_cond_len= 4+8= 12
+ Example2: id1 VARCHAR(30), id2 INT, PRIMARY KEY (id1, id2)
+ WHERE id1 = 'AAA' and id2 < 3; => eq_cond_len=13 (varchar used 9 bytes)
+ */
+ rocksdb::Slice end_slice(reinterpret_cast<char *>(m_end_key_packed_tuple),
+ *end_key_packed_size);
+ return slice.difference_offset(end_slice);
+ }
+
+ /*
+ On range scan without any end key condition, there is no
+ eq cond, and eq cond length is the same as index_id size (4 bytes).
+ Example1: id1 BIGINT, id2 INT, id3 BIGINT, PRIMARY KEY (id1, id2, id3)
+ WHERE id1>=1 AND id2 >= 2 and id2 <= 5 => eq_cond_len= 4
+ */
+ return Rdb_key_def::INDEX_NUMBER_SIZE;
+}
+
+int ha_rocksdb::read_row_from_primary_key(uchar *const buf) {
+ int rc;
+ const rocksdb::Slice &rkey = m_scan_it->key();
+ const uint pk_size = rkey.size();
+ const char *pk_data = rkey.data();
+
+ memcpy(m_pk_packed_tuple, pk_data, pk_size);
+ m_last_rowkey.copy(pk_data, pk_size, &my_charset_bin);
+
+ if (m_lock_rows != RDB_LOCK_NONE) {
+ /* We need to put a lock and re-read */
+ rc = get_row_by_rowid(buf, m_pk_packed_tuple, pk_size);
+ } else {
+ /* Unpack from the row we've read */
+ const rocksdb::Slice &value = m_scan_it->value();
+ rc = convert_record_from_storage_format(&rkey, &value, buf);
+ }
+
+ return rc;
+}
+
+int ha_rocksdb::read_row_from_secondary_key(uchar *const buf,
+ const Rdb_key_def &kd,
+ bool move_forward) {
+ int rc = 0;
+ uint pk_size;
+
+ /* Get the key columns and primary key value */
+ const rocksdb::Slice &rkey = m_scan_it->key();
+ const rocksdb::Slice &value = m_scan_it->value();
+
+#ifndef DBUG_OFF
+ bool save_keyread_only = m_keyread_only;
+#endif
+ DBUG_EXECUTE_IF("dbug.rocksdb.HA_EXTRA_KEYREAD", { m_keyread_only = true; });
+
+ bool covered_lookup = (m_keyread_only && kd.can_cover_lookup()) ||
+ kd.covers_lookup(&value, &m_lookup_bitmap);
+
+#ifndef DBUG_OFF
+ m_keyread_only = save_keyread_only;
+#endif
+
+ if (covered_lookup && m_lock_rows == RDB_LOCK_NONE) {
+ pk_size =
+ kd.get_primary_key_tuple(table, *m_pk_descr, &rkey, m_pk_packed_tuple);
+ if (pk_size == RDB_INVALID_KEY_LEN) {
+ rc = HA_ERR_ROCKSDB_CORRUPT_DATA;
+ } else {
+ rc = kd.unpack_record(table, buf, &rkey, &value,
+ m_converter->get_verify_row_debug_checksums());
+ global_stats.covered_secondary_key_lookups.inc();
+ }
+ } else {
+ if (kd.m_is_reverse_cf) move_forward = !move_forward;
+
+ rc = find_icp_matching_index_rec(move_forward, buf);
+ if (!rc) {
+ const rocksdb::Slice &rkey = m_scan_it->key();
+ pk_size = kd.get_primary_key_tuple(table, *m_pk_descr, &rkey,
+ m_pk_packed_tuple);
+ if (pk_size == RDB_INVALID_KEY_LEN) {
+ rc = HA_ERR_ROCKSDB_CORRUPT_DATA;
+ } else {
+ rc = get_row_by_rowid(buf, m_pk_packed_tuple, pk_size);
+ }
+ }
+ }
+
+ if (!rc) {
+ m_last_rowkey.copy((const char *)m_pk_packed_tuple, pk_size,
+ &my_charset_bin);
+ }
+
+ return rc;
+}
+
+/**
+ @note
+ The problem with this function is that SQL layer calls it, when
+ - the table has not been yet opened (no ::open() call done)
+ - this->table_share already exists, but it is in the process of being
+ filled, so some of fields are still NULL.
+ - In particular, table_share->key_info[inx].key_part[] is filled only up
+ to part #part. Subsequent key parts are not yet filled.
+
+ To complicate things further, SQL layer will call index_flags() with
+ all_parts=TRUE. Essentially, we're asked to provide flags for reading
+ keyparts whose datatype is not yet known.
+
+ We walk around this problem by using check_keyread_allowed(), which uses
+ table_share object and is careful not to step on unitialized data.
+
+ When we get a call with all_parts=TRUE, we try to analyze all parts but
+ ignore those that have key_part->field==nullptr (these are not initialized
+ yet).
+*/
+
+ulong ha_rocksdb::index_flags(uint inx, uint part, bool all_parts) const {
+ DBUG_ENTER_FUNC();
+
+ ulong base_flags = HA_READ_NEXT | // doesn't seem to be used
+ HA_READ_ORDER | HA_READ_RANGE | HA_READ_PREV;
+
+ if (check_keyread_allowed(inx, part, all_parts)) {
+ base_flags |= HA_KEYREAD_ONLY;
+ }
+
+ if (inx == table_share->primary_key) {
+ /*
+ Index-only reads on primary key are the same as table scan for us. Still,
+ we need to explicitly "allow" them, otherwise SQL layer will miss some
+ plans.
+ */
+ base_flags |= HA_KEYREAD_ONLY | HA_CLUSTERED_INDEX;
+ } else {
+ /*
+ We can Index Condition Pushdown any key except the primary. With primary
+ key, we get (pk, record) pair immediately, there is no place to put the
+ ICP check.
+ */
+ base_flags |= HA_DO_INDEX_COND_PUSHDOWN;
+ }
+
+ DBUG_RETURN(base_flags);
+}
+
+/**
+ @brief
+ Read next index tuple through the secondary index.
+
+ @details
+ m_scan_it points at the index key-value pair that we should read the (pk,row)
+ pair for.
+*/
+int ha_rocksdb::secondary_index_read(const int keyno, uchar *const buf) {
+ DBUG_ASSERT(table != nullptr);
+#ifdef MARIAROCKS_NOT_YET
+ stats.rows_requested++;
+#endif
+ /* Use STATUS_NOT_FOUND when record not found or some error occurred */
+ table->status = STATUS_NOT_FOUND;
+
+ if (is_valid(m_scan_it)) {
+ rocksdb::Slice key = m_scan_it->key();
+
+ /* Check if we've ran out of records of this index */
+ if (m_key_descr_arr[keyno]->covers_key(key)) {
+ int rc = 0;
+
+ // TODO: We could here check if we have ran out of range we're scanning
+ const uint size = m_key_descr_arr[keyno]->get_primary_key_tuple(
+ table, *m_pk_descr, &key, m_pk_packed_tuple);
+ if (size == RDB_INVALID_KEY_LEN) {
+ return HA_ERR_ROCKSDB_CORRUPT_DATA;
+ }
+
+ m_last_rowkey.copy((const char *)m_pk_packed_tuple, size,
+ &my_charset_bin);
+
+ rocksdb::Slice value = m_scan_it->value();
+ bool covered_lookup =
+ (m_keyread_only && m_key_descr_arr[keyno]->can_cover_lookup()) ||
+ m_key_descr_arr[keyno]->covers_lookup(&value, &m_lookup_bitmap);
+ if (covered_lookup && m_lock_rows == RDB_LOCK_NONE) {
+ rc = m_key_descr_arr[keyno]->unpack_record(
+ table, buf, &key, &value,
+ m_converter->get_verify_row_debug_checksums());
+ global_stats.covered_secondary_key_lookups.inc();
+ } else {
+ DEBUG_SYNC(ha_thd(), "rocksdb_concurrent_delete_sk");
+ rc = get_row_by_rowid(buf, m_pk_packed_tuple, size);
+ }
+
+ if (!rc) {
+ table->status = 0;
+#ifdef MARIAROCKS_NOT_YET
+ stats.rows_read++;
+ stats.rows_index_next++;
+#endif
+ update_row_stats(ROWS_READ);
+ }
+ return rc;
+ }
+ }
+ return HA_ERR_END_OF_FILE;
+}
+
+bool ha_rocksdb::is_using_full_key(key_part_map keypart_map,
+ uint actual_key_parts)
+{
+ return (keypart_map == HA_WHOLE_KEY) ||
+ (keypart_map == ((key_part_map(1) << actual_key_parts)
+ - 1));
+}
+
+/*
+ ha_rocksdb::read_range_first overrides handler::read_range_first.
+ The only difference from handler::read_range_first is that
+ ha_rocksdb::read_range_first passes end_key to
+ ha_rocksdb::index_read_map_impl function.
+
+ @return
+ HA_EXIT_SUCCESS OK
+ other HA_ERR error code (can be SE-specific)
+*/
+int ha_rocksdb::read_range_first(const key_range *const start_key,
+ const key_range *const end_key,
+ bool eq_range_arg, bool sorted) {
+ DBUG_ENTER_FUNC();
+
+ int result;
+
+ eq_range = eq_range_arg;
+
+ /* MariaDB: Pass RANGE_SCAN_ASC when we support ICP on backwards scans */
+ set_end_range(end_key);
+
+ range_key_part = table->key_info[active_index].key_part;
+
+ if (!start_key) {
+ // Read first record
+ result = ha_index_first(table->record[0]);
+ } else {
+#ifdef MARIAROCKS_NOT_YET
+ if (is_using_prohibited_gap_locks(
+ is_using_full_unique_key(active_index, start_key->keypart_map,
+ start_key->flag))) {
+ DBUG_RETURN(HA_ERR_LOCK_DEADLOCK);
+ }
+#endif
+ increment_statistics(&SSV::ha_read_key_count);
+
+ MYSQL_TABLE_IO_WAIT(m_psi, PSI_TABLE_FETCH_ROW, active_index, 0, {
+ result =
+ index_read_map_impl(table->record[0], start_key->key,
+ start_key->keypart_map, start_key->flag, end_key);
+ })
+ }
+ if (result) {
+ DBUG_RETURN((result == HA_ERR_KEY_NOT_FOUND) ? HA_ERR_END_OF_FILE : result);
+ }
+
+ if (compare_key(end_range) <= 0) {
+ DBUG_RETURN(HA_EXIT_SUCCESS);
+ } else {
+ /*
+ The last read row does not fall in the range. So request
+ storage engine to release row lock if possible.
+ */
+ unlock_row();
+ DBUG_RETURN(HA_ERR_END_OF_FILE);
+ }
+}
+
+int ha_rocksdb::prepare_index_scan()
+{
+ range_key_part= table->key_info[active_index].key_part;
+ set_end_range(NULL);
+ return 0;
+}
+
+
+int ha_rocksdb::prepare_range_scan(const key_range *start_key,
+ const key_range *end_key)
+{
+ range_key_part= table->key_info[active_index].key_part;
+
+ if (start_key)
+ {
+ m_save_start_range= *start_key;
+ m_start_range= &m_save_start_range;
+ }
+ else
+ m_start_range= NULL;
+
+ set_end_range(end_key);
+ return 0;
+}
+
+
+/**
+ @return
+ HA_EXIT_SUCCESS OK
+ other HA_ERR error code (can be SE-specific)
+*/
+
+int ha_rocksdb::index_read_map(uchar *const buf, const uchar *const key,
+ key_part_map keypart_map,
+ enum ha_rkey_function find_flag) {
+ DBUG_ENTER_FUNC();
+
+ DBUG_RETURN(index_read_map_impl(buf, key, keypart_map, find_flag, nullptr));
+}
+
+/*
+ See storage/rocksdb/rocksdb-range-access.txt for description of how MySQL
+ index navigation commands are converted into RocksDB lookup commands.
+
+ This function takes end_key as an argument, and it is set on range scan.
+ MyRocks needs to decide whether prefix bloom filter can be used or not.
+ To decide to use prefix bloom filter or not, calculating equal condition
+ length
+ is needed. On equal lookups (find_flag == HA_READ_KEY_EXACT), equal
+ condition length is the same as rocksdb::Slice.size() of the start key.
+ On range scan, equal condition length is MIN(start_key, end_key) of the
+ rocksdb::Slice expression.
+
+ @return
+ HA_EXIT_SUCCESS OK
+ other HA_ERR error code (can be SE-specific)
+*/
+int ha_rocksdb::index_read_map_impl(uchar *const buf, const uchar *const key,
+ key_part_map keypart_map,
+ enum ha_rkey_function find_flag,
+ const key_range *end_key) {
+ DBUG_ENTER_FUNC();
+
+ DBUG_EXECUTE_IF("myrocks_busy_loop_on_row_read", int debug_i = 0;
+ while (1) { debug_i++; });
+
+ int rc = 0;
+
+ THD *thd = ha_thd();
+ DEBUG_SYNC(thd, "rocksdb.check_flags_rmi");
+ if (thd && thd->killed) {
+ rc = HA_ERR_QUERY_INTERRUPTED;
+ DBUG_RETURN(rc);
+ }
+
+ const Rdb_key_def &kd = *m_key_descr_arr[active_index];
+ const uint actual_key_parts = kd.get_key_parts();
+ bool using_full_key = is_using_full_key(keypart_map, actual_key_parts);
+
+ if (!end_key) end_key = end_range;
+
+ /* By default, we don't need the retrieved records to match the prefix */
+ m_sk_match_prefix = nullptr;
+#ifdef MARIAROCKS_NOT_YET
+ stats.rows_requested++;
+#endif
+ if (active_index == table->s->primary_key && find_flag == HA_READ_KEY_EXACT &&
+ using_full_key) {
+ /*
+ Equality lookup over primary key, using full tuple.
+ This is a special case, use DB::Get.
+ */
+ const uint size = kd.pack_index_tuple(table, m_pack_buffer,
+ m_pk_packed_tuple, key, keypart_map);
+ bool skip_lookup = is_blind_delete_enabled();
+
+ rc = get_row_by_rowid(buf, m_pk_packed_tuple, size, skip_lookup, false);
+
+ if (!rc && !skip_lookup) {
+#ifdef MARIAROCKS_NOT_YET
+ stats.rows_read++;
+ stats.rows_index_first++;
+#endif
+ update_row_stats(ROWS_READ);
+ }
+ DBUG_RETURN(rc);
+ }
+
+ /*
+ Unique secondary index performs lookups without the extended key fields
+ */
+ uint packed_size;
+ if (active_index != table->s->primary_key &&
+ table->key_info[active_index].flags & HA_NOSAME &&
+ find_flag == HA_READ_KEY_EXACT && using_full_key) {
+ key_part_map tmp_map = (key_part_map(1) << table->key_info[active_index]
+ .user_defined_key_parts) -
+ 1;
+ packed_size = kd.pack_index_tuple(table, m_pack_buffer, m_sk_packed_tuple,
+ key, tmp_map);
+ if (table->key_info[active_index].user_defined_key_parts !=
+ kd.get_key_parts()) {
+ using_full_key = false;
+ }
+ } else {
+ packed_size = kd.pack_index_tuple(table, m_pack_buffer, m_sk_packed_tuple,
+ key, keypart_map);
+ }
+
+ if ((pushed_idx_cond && pushed_idx_cond_keyno == active_index) &&
+ (find_flag == HA_READ_KEY_EXACT || find_flag == HA_READ_PREFIX_LAST)) {
+ /*
+ We are doing a point index lookup, and ICP is enabled. It is possible
+ that this call will be followed by ha_rocksdb->index_next_same() call.
+
+ Do what InnoDB does: save the lookup tuple now. We will need it in
+ index_next_same/find_icp_matching_index_rec in order to stop scanning
+ as soon as index record doesn't match the lookup tuple.
+
+ When not using ICP, handler::index_next_same() will make sure that rows
+ that don't match the lookup prefix are not returned.
+ row matches the lookup prefix.
+ */
+ m_sk_match_prefix = m_sk_match_prefix_buf;
+ m_sk_match_length = packed_size;
+ memcpy(m_sk_match_prefix, m_sk_packed_tuple, packed_size);
+ }
+
+ int bytes_changed_by_succ = 0;
+ if (find_flag == HA_READ_PREFIX_LAST_OR_PREV ||
+ find_flag == HA_READ_PREFIX_LAST || find_flag == HA_READ_AFTER_KEY) {
+ /* See below */
+ bytes_changed_by_succ = kd.successor(m_sk_packed_tuple, packed_size);
+ }
+
+ rocksdb::Slice slice(reinterpret_cast<const char *>(m_sk_packed_tuple),
+ packed_size);
+
+ uint end_key_packed_size = 0;
+ /*
+ In MariaDB, the end_key is always the bigger end of the range.
+ If we are doing a reverse-ordered scan (that is, walking from the bigger
+ key values to smaller), we should use the smaller end of range as end_key.
+ */
+ const key_range *cur_end_key= end_key;
+ if (find_flag == HA_READ_PREFIX_LAST_OR_PREV ||
+ find_flag == HA_READ_BEFORE_KEY)
+ {
+ cur_end_key= m_start_range;
+ }
+
+ const uint eq_cond_len =
+ calc_eq_cond_len(kd, find_flag, slice, bytes_changed_by_succ, cur_end_key,
+ &end_key_packed_size);
+
+ bool use_all_keys = false;
+ if (find_flag == HA_READ_KEY_EXACT &&
+ my_count_bits(keypart_map) == kd.get_key_parts()) {
+ use_all_keys = true;
+ }
+
+ Rdb_transaction *const tx = get_or_create_tx(table->in_use);
+ const bool is_new_snapshot = !tx->has_snapshot();
+ // Loop as long as we get a deadlock error AND we end up creating the
+ // snapshot here (i.e. it did not exist prior to this)
+ for (;;) {
+ DEBUG_SYNC(thd, "rocksdb.check_flags_rmi_scan");
+ if (thd && thd->killed) {
+ rc = HA_ERR_QUERY_INTERRUPTED;
+ break;
+ }
+ /*
+ This will open the iterator and position it at a record that's equal or
+ greater than the lookup tuple.
+ */
+ setup_scan_iterator(kd, &slice, use_all_keys, eq_cond_len);
+
+ /*
+ Once we are positioned on from above, move to the position we really
+ want: See storage/rocksdb/rocksdb-range-access.txt
+ */
+ bool move_forward;
+ rc =
+ position_to_correct_key(kd, find_flag, using_full_key, key, keypart_map,
+ slice, &move_forward, tx->m_snapshot_timestamp);
+
+ if (rc) {
+ break;
+ }
+
+ m_skip_scan_it_next_call = false;
+
+ /*
+ Now get the data for the row into 'buf'. If we were using a primary key
+ then we have all the rows we need. For a secondary key we now need to
+ lookup the primary key.
+ */
+ if (active_index == table->s->primary_key) {
+ rc = read_row_from_primary_key(buf);
+ } else {
+ rc = read_row_from_secondary_key(buf, kd, move_forward);
+ }
+
+ if (!should_recreate_snapshot(rc, is_new_snapshot)) {
+ break; /* Exit the loop */
+ }
+
+ // release the snapshot and iterator so they will be regenerated
+ tx->release_snapshot();
+ release_scan_iterator();
+ }
+
+ if (rc) {
+ /*
+ This status is returned on any error
+ the only possible error condition is record-not-found
+ */
+ table->status = STATUS_NOT_FOUND;
+ } else {
+ table->status = 0;
+#ifdef MARIAROCKS_NOT_YET
+ stats.rows_read++;
+ stats.rows_index_first++;
+#endif
+ update_row_stats(ROWS_READ);
+ }
+
+ DBUG_RETURN(rc);
+}
+
+/*
+ @brief
+ Scan the secondary index until we find an index record that satisfies ICP
+
+ @param move_forward TRUE <=> move m_scan_it forward
+ FALSE <=> move m_scan_it backward
+ @param buf Record buffer (must be the same buffer that
+ pushed index condition points to, in practice
+ it is table->record[0])
+
+ @detail
+ Move the current iterator m_scan_it until we get an index tuple that
+ satisfies the pushed Index Condition.
+ (if there is no pushed index condition, return right away)
+
+ @return
+ 0 - Index tuple satisfies ICP, can do index read.
+ other - error code
+*/
+
+int ha_rocksdb::find_icp_matching_index_rec(const bool move_forward,
+ uchar *const buf) {
+ if (pushed_idx_cond && pushed_idx_cond_keyno == active_index) {
+ const Rdb_key_def &kd = *m_key_descr_arr[active_index];
+ THD *thd = ha_thd();
+
+ while (1) {
+ int rc = rocksdb_skip_expired_records(kd, m_scan_it, !move_forward);
+ if (rc != HA_EXIT_SUCCESS) {
+ return rc;
+ }
+
+ if (thd && thd->killed) {
+ return HA_ERR_QUERY_INTERRUPTED;
+ }
+
+ if (!is_valid(m_scan_it)) {
+ table->status = STATUS_NOT_FOUND;
+ return HA_ERR_END_OF_FILE;
+ }
+ const rocksdb::Slice rkey = m_scan_it->key();
+
+ if (!kd.covers_key(rkey)) {
+ table->status = STATUS_NOT_FOUND;
+ return HA_ERR_END_OF_FILE;
+ }
+
+ if (m_sk_match_prefix) {
+ const rocksdb::Slice prefix((const char *)m_sk_match_prefix,
+ m_sk_match_length);
+ if (!kd.value_matches_prefix(rkey, prefix)) {
+ table->status = STATUS_NOT_FOUND;
+ return HA_ERR_END_OF_FILE;
+ }
+ }
+
+ const rocksdb::Slice value = m_scan_it->value();
+ int err = kd.unpack_record(table, buf, &rkey, &value,
+ m_converter->get_verify_row_debug_checksums());
+ if (err != HA_EXIT_SUCCESS) {
+ return err;
+ }
+
+ const enum icp_result icp_status= handler_index_cond_check(this);
+ if (icp_status == ICP_NO_MATCH) {
+ rocksdb_smart_next(!move_forward, m_scan_it);
+ continue; /* Get the next (or prev) index tuple */
+ }
+ else if (icp_status == ICP_OUT_OF_RANGE || icp_status == ICP_ABORTED_BY_USER) {
+ /* We have walked out of range we are scanning */
+ table->status = STATUS_NOT_FOUND;
+ return HA_ERR_END_OF_FILE;
+ }
+ else /* icp_status == ICP_MATCH */
+ {
+ /* Index Condition is satisfied. We have rc==0, proceed to fetch the
+ * row. */
+ break;
+ }
+ }
+ }
+ return HA_EXIT_SUCCESS;
+}
+
+/**
+ @return
+ HA_EXIT_SUCCESS OK
+ other HA_ERR error code (can be SE-specific)
+*/
+
+/**
+ @return
+ HA_ADMIN_OK OK
+ other HA_ADMIN error code
+*/
+int ha_rocksdb::check(THD *const thd, HA_CHECK_OPT *const check_opt) {
+ DBUG_ENTER_FUNC();
+
+ DBUG_ASSERT(thd != nullptr);
+ DBUG_ASSERT(check_opt != nullptr);
+
+ const uint pk = pk_index(table, m_tbl_def);
+ String rowkey_copy;
+ String sec_key_copy;
+ const char *const table_name = table->s->table_name.str;
+
+ // Only when debugging: don't use snapshot when reading
+ // Rdb_transaction *tx= get_or_create_tx(table->in_use);
+ // tx->snapshot= nullptr;
+
+ bool save_verify_row_debug_checksums =
+ m_converter->get_verify_row_debug_checksums();
+ m_converter->set_verify_row_debug_checksums(true);
+ /* For each secondary index, check that we can get a PK value from it */
+ // NO_LINT_DEBUG
+ sql_print_verbose_info("CHECKTABLE %s: Checking table %s", table_name,
+ table_name);
+ ha_rows UNINIT_VAR(row_checksums_at_start); // set/used iff first_index==true
+ ha_rows row_checksums = ha_rows(-1);
+ bool first_index = true;
+
+ for (uint keyno = 0; keyno < table->s->keys; keyno++) {
+ if (keyno != pk) {
+ extra(HA_EXTRA_KEYREAD);
+ ha_index_init(keyno, true);
+ ha_rows rows = 0;
+ ha_rows checksums = 0;
+ if (first_index) {
+ row_checksums_at_start = m_converter->get_row_checksums_checked();
+ }
+ int res;
+ // NO_LINT_DEBUG
+ sql_print_verbose_info("CHECKTABLE %s: Checking index %s", table_name,
+ table->key_info[keyno].name);
+ while (1) {
+ if (!rows) {
+ res = index_first(table->record[0]);
+ } else {
+ res = index_next(table->record[0]);
+ }
+
+ if (res == HA_ERR_END_OF_FILE) break;
+ if (res) {
+ // error
+ // NO_LINT_DEBUG
+ sql_print_error("CHECKTABLE %s: .. row %lld: index scan error %d",
+ table_name, rows, res);
+ goto error;
+ }
+ rocksdb::Slice key = m_scan_it->key();
+ sec_key_copy.copy(key.data(), key.size(), &my_charset_bin);
+ rowkey_copy.copy(m_last_rowkey.ptr(), m_last_rowkey.length(),
+ &my_charset_bin);
+
+ if (m_key_descr_arr[keyno]->unpack_info_has_checksum(
+ m_scan_it->value())) {
+ checksums++;
+ }
+
+ if ((res = get_row_by_rowid(table->record[0], rowkey_copy.ptr(),
+ rowkey_copy.length()))) {
+ // NO_LINT_DEBUG
+ sql_print_error(
+ "CHECKTABLE %s: .. row %lld: "
+ "failed to fetch row by rowid",
+ table_name, rows);
+ goto error;
+ }
+
+ longlong hidden_pk_id = 0;
+ if (has_hidden_pk(table) &&
+ read_hidden_pk_id_from_rowkey(&hidden_pk_id)) {
+ goto error;
+ }
+
+ /* Check if we get the same PK value */
+ uint packed_size = m_pk_descr->pack_record(
+ table, m_pack_buffer, table->record[0], m_pk_packed_tuple, nullptr,
+ false, hidden_pk_id);
+ if (packed_size != rowkey_copy.length() ||
+ memcmp(m_pk_packed_tuple, rowkey_copy.ptr(), packed_size)) {
+ // NO_LINT_DEBUG
+ sql_print_error("CHECKTABLE %s: .. row %lld: PK value mismatch",
+ table_name, rows);
+ goto print_and_error;
+ }
+
+ /* Check if we get the same secondary key value */
+ packed_size = m_key_descr_arr[keyno]->pack_record(
+ table, m_pack_buffer, table->record[0], m_sk_packed_tuple,
+ &m_sk_tails, false, hidden_pk_id);
+ if (packed_size != sec_key_copy.length() ||
+ memcmp(m_sk_packed_tuple, sec_key_copy.ptr(), packed_size)) {
+ // NO_LINT_DEBUG
+ sql_print_error(
+ "CHECKTABLE %s: .. row %lld: "
+ "secondary index value mismatch",
+ table_name, rows);
+ goto print_and_error;
+ }
+ rows++;
+ continue;
+
+ print_and_error : {
+ std::string buf;
+ buf = rdb_hexdump(rowkey_copy.ptr(), rowkey_copy.length(),
+ RDB_MAX_HEXDUMP_LEN);
+ // NO_LINT_DEBUG
+ sql_print_error("CHECKTABLE %s: rowkey: %s", table_name, buf.c_str());
+
+ buf = rdb_hexdump(m_retrieved_record.data(), m_retrieved_record.size(),
+ RDB_MAX_HEXDUMP_LEN);
+ // NO_LINT_DEBUG
+ sql_print_error("CHECKTABLE %s: record: %s", table_name, buf.c_str());
+
+ buf = rdb_hexdump(sec_key_copy.ptr(), sec_key_copy.length(),
+ RDB_MAX_HEXDUMP_LEN);
+ // NO_LINT_DEBUG
+ sql_print_error("CHECKTABLE %s: index: %s", table_name, buf.c_str());
+
+ goto error;
+ }
+ }
+ // NO_LINT_DEBUG
+ sql_print_verbose_info("CHECKTABLE %s: ... %lld index entries checked "
+ "(%lld had checksums)",
+ table_name, rows, checksums);
+
+ if (first_index) {
+ row_checksums =
+ m_converter->get_row_checksums_checked() - row_checksums_at_start;
+ first_index = false;
+ }
+ ha_index_end();
+ }
+ }
+ if (row_checksums != ha_rows(-1)) {
+ // NO_LINT_DEBUG
+ sql_print_verbose_info("CHECKTABLE %s: %lld table records had checksums",
+ table_name, row_checksums);
+ }
+ extra(HA_EXTRA_NO_KEYREAD);
+
+ m_converter->set_verify_row_debug_checksums(save_verify_row_debug_checksums);
+ /*
+ TODO(sergiy): we should check also for PK records that are missing in
+ the secondary indexes.
+ For that, need to walk through the PK and check that every PK record has a
+ proper counterpart in each secondary index.
+ */
+ DBUG_RETURN(HA_ADMIN_OK);
+error:
+ m_converter->set_verify_row_debug_checksums(save_verify_row_debug_checksums);
+ ha_index_or_rnd_end();
+ extra(HA_EXTRA_NO_KEYREAD);
+
+ DBUG_RETURN(HA_ADMIN_CORRUPT);
+}
+
+static void dbug_dump_str(FILE *const out, const char *const str, int len) {
+ fprintf(out, "\"");
+ for (int i = 0; i < len; i++) {
+ if (str[i] > 32) {
+ fprintf(out, "%c", str[i]);
+ } else {
+ fprintf(out, "\\%d", str[i]);
+ }
+ }
+ fprintf(out, "\"");
+}
+
+/*
+ Debugging help: dump the whole database into a human-readable file.
+ Usage:
+ dbug_dump_database(rdb);
+*/
+
+void dbug_dump_database(rocksdb::DB *const db) {
+ FILE *const out = fopen("/tmp/rocksdb.dump", "wt");
+ if (!out) return;
+
+ rocksdb::Iterator *it = db->NewIterator(rocksdb::ReadOptions());
+ for (it->SeekToFirst(); it->Valid(); it->Next()) {
+ rocksdb::Slice key = it->key();
+ rocksdb::Slice val = it->value();
+ dbug_dump_str(out, key.data(), key.size());
+ fprintf(out, " -> ");
+ dbug_dump_str(out, val.data(), val.size());
+ fprintf(out, "\n");
+ }
+
+ delete it;
+ fclose(out);
+}
+
+rocksdb::Status ha_rocksdb::get_for_update(
+ Rdb_transaction *const tx, rocksdb::ColumnFamilyHandle *const column_family,
+ const rocksdb::Slice &key, rocksdb::PinnableSlice *const value) const {
+ DBUG_ASSERT(m_lock_rows != RDB_LOCK_NONE);
+ const bool exclusive = m_lock_rows != RDB_LOCK_READ;
+
+ const bool do_validate =
+ my_core::thd_tx_isolation(ha_thd()) > ISO_READ_COMMITTED;
+ rocksdb::Status s =
+ tx->get_for_update(column_family, key, value, exclusive, do_validate);
+
+#ifndef DBUG_OFF
+ ++rocksdb_num_get_for_update_calls;
+#endif
+ return s;
+}
+
+bool ha_rocksdb::is_blind_delete_enabled() {
+ THD *thd = ha_thd();
+ /*
+ Note: in MariaDB, thd->lex->table_count is only set for multi-table DELETE,
+ not for single-table DELETE. So we check thd->lex->query_tables instead.
+ */
+ return (THDVAR(thd, blind_delete_primary_key) &&
+ thd->lex->sql_command == SQLCOM_DELETE &&
+ thd->lex->query_tables && !thd->lex->query_tables->next_global &&
+ table->s->keys == 1 &&
+ !has_hidden_pk(table) && !thd->rgi_slave);
+}
+
+/*
+ Given a rowid (i.e. packed PK) as a parameter, get the record.
+
+ @return
+ HA_EXIT_SUCCESS OK
+ other HA_ERR error code (can be SE-specific)
+*/
+
+int ha_rocksdb::get_row_by_rowid(uchar *const buf, const char *const rowid,
+ const uint rowid_size, const bool skip_lookup,
+ const bool skip_ttl_check) {
+ DBUG_ENTER_FUNC();
+
+ DBUG_ASSERT(table != nullptr);
+
+ int rc;
+
+ rocksdb::Slice key_slice(rowid, rowid_size);
+
+ Rdb_transaction *const tx = get_or_create_tx(table->in_use);
+ DBUG_ASSERT(tx != nullptr);
+
+ DEBUG_SYNC(ha_thd(), "rocksdb.get_row_by_rowid");
+ DBUG_EXECUTE_IF("dbug.rocksdb.get_row_by_rowid", {
+ THD *thd = ha_thd();
+ const char act[] =
+ "now signal Reached "
+ "wait_for signal.rocksdb.get_row_by_rowid_let_running";
+ DBUG_ASSERT(opt_debug_sync_timeout > 0);
+ DBUG_ASSERT(!debug_sync_set_action(thd, STRING_WITH_LEN(act)));
+ };);
+
+ bool found;
+ rocksdb::Status s;
+
+ /* Pretend row found without looking up */
+ if (skip_lookup) {
+#ifdef MARIAROCKS_NOT_YET
+ stats.rows_deleted_blind++;
+#endif
+ update_row_stats(ROWS_DELETED_BLIND);
+ m_last_rowkey.copy((const char *)rowid, rowid_size, &my_charset_bin);
+ table->status = 0;
+ DBUG_RETURN(0);
+ }
+
+ if (m_lock_rows == RDB_LOCK_NONE) {
+ tx->acquire_snapshot(true);
+ s = tx->get(m_pk_descr->get_cf(), key_slice, &m_retrieved_record);
+ } else if (m_insert_with_update && m_dup_pk_found) {
+ DBUG_ASSERT(m_pk_descr->get_keyno() == m_dupp_errkey);
+ DBUG_ASSERT(m_dup_pk_retrieved_record.length() ==
+ m_retrieved_record.size());
+ DBUG_ASSERT(memcmp(m_dup_pk_retrieved_record.ptr(),
+ m_retrieved_record.data(),
+ m_retrieved_record.size()) == 0);
+
+ // do nothing - we already have the result in m_retrieved_record and
+ // already taken the lock
+ s = rocksdb::Status::OK();
+ } else {
+ s = get_for_update(tx, m_pk_descr->get_cf(), key_slice,
+ &m_retrieved_record);
+ }
+
+ DBUG_EXECUTE_IF("rocksdb_return_status_corrupted",
+ dbug_change_status_to_corrupted(&s););
+
+ if (!s.IsNotFound() && !s.ok()) {
+ DBUG_RETURN(tx->set_status_error(table->in_use, s, *m_pk_descr, m_tbl_def,
+ m_table_handler));
+ }
+ found = !s.IsNotFound();
+
+ table->status = STATUS_NOT_FOUND;
+ if (found) {
+ /* If we found the record, but it's expired, pretend we didn't find it. */
+ if (!skip_ttl_check && m_pk_descr->has_ttl() &&
+ should_hide_ttl_rec(*m_pk_descr, m_retrieved_record,
+ tx->m_snapshot_timestamp)) {
+ DBUG_RETURN(HA_ERR_KEY_NOT_FOUND);
+ }
+
+ m_last_rowkey.copy((const char *)rowid, rowid_size, &my_charset_bin);
+ rc = convert_record_from_storage_format(&key_slice, buf);
+
+ if (!rc) {
+ table->status = 0;
+ }
+ } else {
+ /*
+ Note: we don't need to unlock the row. It is intentional that we keep
+ locks on rows that don't exist.
+ */
+ rc = HA_ERR_KEY_NOT_FOUND;
+ }
+
+ DBUG_RETURN(rc);
+}
+
+/**
+ @return
+ HA_EXIT_SUCCESS OK
+ other HA_ERR error code (can be SE-specific)
+*/
+int ha_rocksdb::index_next(uchar *const buf) {
+ DBUG_ENTER_FUNC();
+
+ bool moves_forward = true;
+ if (m_key_descr_arr[active_index]->m_is_reverse_cf) {
+ moves_forward = false;
+ }
+
+ int rc = index_next_with_direction(buf, moves_forward);
+ if (rc == HA_ERR_KEY_NOT_FOUND) rc = HA_ERR_END_OF_FILE;
+
+ DBUG_RETURN(rc);
+}
+
+/**
+ @return
+ HA_EXIT_SUCCESS OK
+ other HA_ERR error code (can be SE-specific)
+*/
+int ha_rocksdb::index_prev(uchar *const buf) {
+ DBUG_ENTER_FUNC();
+
+ bool moves_forward = false;
+ if (m_key_descr_arr[active_index]->m_is_reverse_cf) {
+ moves_forward = true;
+ }
+
+ int rc = index_next_with_direction(buf, moves_forward);
+ if (rc == HA_ERR_KEY_NOT_FOUND) rc = HA_ERR_END_OF_FILE;
+
+ DBUG_RETURN(rc);
+}
+
+int ha_rocksdb::index_next_with_direction(uchar *const buf, bool move_forward) {
+ DBUG_ENTER_FUNC();
+
+ int rc;
+
+ if (active_index == pk_index(table, m_tbl_def)) {
+ rc = rnd_next_with_direction(buf, move_forward);
+ } else {
+ THD *thd = ha_thd();
+ for (;;) {
+ DEBUG_SYNC(thd, "rocksdb.check_flags_inwd");
+ if (thd && thd->killed) {
+ rc = HA_ERR_QUERY_INTERRUPTED;
+ break;
+ }
+ if (m_skip_scan_it_next_call) {
+ m_skip_scan_it_next_call = false;
+ } else {
+ if (move_forward) {
+ m_scan_it->Next(); /* this call cannot fail */
+ } else {
+ m_scan_it->Prev();
+ }
+ }
+ rc = rocksdb_skip_expired_records(*m_key_descr_arr[active_index],
+ m_scan_it, !move_forward);
+ if (rc != HA_EXIT_SUCCESS) {
+ break;
+ }
+ rc = find_icp_matching_index_rec(move_forward, buf);
+ if (!rc) rc = secondary_index_read(active_index, buf);
+ if (!should_skip_invalidated_record(rc)) {
+ break;
+ }
+ }
+ }
+
+ DBUG_RETURN(rc);
+}
+
+/**
+ @return
+ HA_EXIT_SUCCESS OK
+ other HA_ERR error code (can be SE-specific)
+*/
+int ha_rocksdb::index_first(uchar *const buf) {
+ DBUG_ENTER_FUNC();
+
+ m_sk_match_prefix = nullptr;
+ int rc = m_key_descr_arr[active_index]->m_is_reverse_cf
+ ? index_last_intern(buf)
+ : index_first_intern(buf);
+ if (rc == HA_ERR_KEY_NOT_FOUND) rc = HA_ERR_END_OF_FILE;
+
+ DBUG_RETURN(rc);
+}
+
+/**
+ @return
+ HA_EXIT_SUCCESS OK
+ other HA_ERR error code (can be SE-specific)
+*/
+int ha_rocksdb::index_last(uchar *const buf) {
+ DBUG_ENTER_FUNC();
+
+ m_sk_match_prefix = nullptr;
+ int rc = m_key_descr_arr[active_index]->m_is_reverse_cf
+ ? index_first_intern(buf)
+ : index_last_intern(buf);
+ if (rc == HA_ERR_KEY_NOT_FOUND) rc = HA_ERR_END_OF_FILE;
+
+ DBUG_RETURN(rc);
+}
+
+/*
+ Start scanning from the "first" value.
+
+ The 'first' here means "the first from start of the key space".
+ For reverse-ordered key spaces, the first value will be the biggest, 'zzzz'.
+
+ An picture of a forward-ordered keyspace (remember, the keys have form
+ 'indexnr-keyval'. Suppose the index we are at has number n)
+
+ (n-1) - ...
+ ( n ) <--- 1. (n) doesn't exist in the db but it would be here.
+ ( n ) - aaa <--- 2. Seek("n") will put us here on the first index
+ ( n ) - bbb record.
+ ( n ) - cc
+
+ So, need to do: Seek(n);
+
+ A backward-ordered keyspace:
+
+ (n+1) - bbb
+ (n+1) - aaa
+ (n+1) <--- (n+1) doesn't exist in the db but would be here.
+ ( n ) - ccc <--- 1. We need to be here.
+ ( n ) - bbb
+ ( n ) - aaa
+ ( n )
+
+ So, need to: Seek(n+1);
+
+*/
+
+int ha_rocksdb::index_first_intern(uchar *const buf) {
+ DBUG_ENTER_FUNC();
+
+ uchar *key;
+ uint key_size;
+ int rc;
+
+ if (is_pk(active_index, table, m_tbl_def)) {
+ key = m_pk_packed_tuple;
+ } else {
+ key = m_sk_packed_tuple;
+ }
+
+ DBUG_ASSERT(key != nullptr);
+
+ const Rdb_key_def &kd = *m_key_descr_arr[active_index];
+ int key_start_matching_bytes = kd.get_first_key(key, &key_size);
+
+ rocksdb::Slice index_key((const char *)key, key_size);
+
+ Rdb_transaction *const tx = get_or_create_tx(table->in_use);
+ DBUG_ASSERT(tx != nullptr);
+
+ const bool is_new_snapshot = !tx->has_snapshot();
+ // Loop as long as we get a deadlock error AND we end up creating the
+ // snapshot here (i.e. it did not exist prior to this)
+ for (;;) {
+ setup_scan_iterator(kd, &index_key, false, key_start_matching_bytes);
+ m_scan_it->Seek(index_key);
+ m_skip_scan_it_next_call = true;
+
+ rc = index_next_with_direction(buf, true);
+ if (!should_recreate_snapshot(rc, is_new_snapshot)) {
+ break; /* exit the loop */
+ }
+
+ // release the snapshot and iterator so they will be regenerated
+ tx->release_snapshot();
+ release_scan_iterator();
+ }
+
+ if (!rc) {
+ /*
+ index_next is always incremented on success, so decrement if it is
+ index_first instead
+ */
+#ifdef MARIAROCKS_NOT_YET
+ stats.rows_index_first++;
+ stats.rows_index_next--;
+#endif
+ }
+
+ DBUG_RETURN(rc);
+}
+
+/**
+ @details
+ Start scanning from the "last" value
+
+ The 'last' here means "the last from start of the key space".
+ For reverse-ordered key spaces, we will actually read the smallest value.
+
+ An picture of a forward-ordered keyspace (remember, the keys have form
+ 'indexnr-keyval'. Suppose the we are at a key that has number n)
+
+ (n-1)-something
+ ( n )-aaa
+ ( n )-bbb
+ ( n )-ccc <----------- Need to seek to here.
+ (n+1) <---- Doesn't exist, but would be here.
+ (n+1)-smth, or no value at all
+
+ RocksDB's Iterator::SeekForPrev($val) seeks to "at $val or last value that's
+ smaller". We can't seek to "(n)-ccc" directly, because we don't know what
+ is the value of 'ccc' (the biggest record with prefix (n)). Instead, we seek
+ to "(n+1)", which is the least possible value that's greater than any value
+ in index #n.
+
+ So, need to: it->SeekForPrev(n+1)
+
+ A backward-ordered keyspace:
+
+ (n+1)-something
+ ( n ) - ccc
+ ( n ) - bbb
+ ( n ) - aaa <---------------- (*) Need to seek here.
+ ( n ) <--- Doesn't exist, but would be here.
+ (n-1)-smth, or no value at all
+
+ So, need to: it->SeekForPrev(n)
+*/
+
+int ha_rocksdb::index_last_intern(uchar *const buf) {
+ DBUG_ENTER_FUNC();
+
+ uchar *key;
+ uint key_size;
+ int rc;
+
+ if (is_pk(active_index, table, m_tbl_def)) {
+ key = m_pk_packed_tuple;
+ } else {
+ key = m_sk_packed_tuple;
+ }
+
+ DBUG_ASSERT(key != nullptr);
+
+ const Rdb_key_def &kd = *m_key_descr_arr[active_index];
+ int key_end_matching_bytes = kd.get_last_key(key, &key_size);
+
+ rocksdb::Slice index_key((const char *)key, key_size);
+
+ Rdb_transaction *const tx = get_or_create_tx(table->in_use);
+ DBUG_ASSERT(tx != nullptr);
+
+ bool is_new_snapshot = !tx->has_snapshot();
+ // Loop as long as we get a deadlock error AND we end up creating the
+ // snapshot here (i.e. it did not exist prior to this)
+ for (;;) {
+ setup_scan_iterator(kd, &index_key, false, key_end_matching_bytes);
+ m_scan_it->SeekForPrev(index_key);
+ m_skip_scan_it_next_call = false;
+
+ if (is_pk(active_index, table, m_tbl_def)) {
+ m_skip_scan_it_next_call = true;
+ rc = rnd_next_with_direction(buf, false);
+ } else {
+ rc = find_icp_matching_index_rec(false /*move_forward*/, buf);
+ if (!rc) rc = secondary_index_read(active_index, buf);
+ }
+
+ if (!should_recreate_snapshot(rc, is_new_snapshot)) {
+ break; /* exit the loop */
+ }
+
+ // release the snapshot and iterator so they will be regenerated
+ tx->release_snapshot();
+ release_scan_iterator();
+ }
+
+ if (!rc) {
+ /*
+ index_next is always incremented on success, so decrement if it is
+ index_first instead
+ */
+#ifdef MARIAROCKS_NOT_YET
+ stats.rows_index_first++;
+ stats.rows_index_next--;
+#endif
+ }
+
+ DBUG_RETURN(rc);
+}
+
+void ha_rocksdb::unlock_row() {
+ DBUG_ENTER_FUNC();
+
+ if (m_lock_rows != RDB_LOCK_NONE) {
+ Rdb_transaction *const tx = get_or_create_tx(table->in_use);
+ tx->release_lock(m_pk_descr->get_cf(),
+ std::string(m_last_rowkey.ptr(), m_last_rowkey.length()));
+ }
+
+ DBUG_VOID_RETURN;
+}
+
+/*
+ Returning true if SingleDelete can be used.
+ - Secondary Indexes can always use SingleDelete.
+ - If the index is PRIMARY KEY, and if all of the columns of the table
+ are covered by the PRIMARY KEY, SingleDelete can be used.
+*/
+bool ha_rocksdb::can_use_single_delete(const uint index) const {
+ return (index != pk_index(table, m_tbl_def) ||
+ (!has_hidden_pk(table) &&
+ table->key_info[index].ext_key_parts == table->s->fields));
+}
+
+bool ha_rocksdb::skip_unique_check() const {
+ /*
+ We want to skip unique checks if:
+ 1) bulk_load is on
+ 2) this table is in the whitelist of tables to skip and the replication
+ lag has reached a large enough value (see unique_check_lag_threshold
+ and unique_check_lage_reset_threshold)
+ 3) the user set unique_checks option to 0, and the table does not have
+ any indexes. If the table has secondary keys, then those might becomes
+ inconsisted/corrupted
+ 4) We're using read-free replication
+ */
+ return THDVAR(table->in_use, bulk_load) ||
+ (m_force_skip_unique_check && m_skip_unique_check) ||
+ (my_core::thd_test_options(table->in_use,
+ OPTION_RELAXED_UNIQUE_CHECKS) &&
+ m_tbl_def->m_key_count == 1) ||
+#ifdef MARIAROCKS_NOT_YET
+ use_read_free_rpl();
+#else
+ FALSE;
+#endif
+}
+
+#ifdef MARIAROCKS_NOT_YET // MDEV-10975
+void ha_rocksdb::set_force_skip_unique_check(bool skip) {
+ DBUG_ENTER_FUNC();
+
+ m_force_skip_unique_check = skip;
+
+ DBUG_VOID_RETURN;
+}
+#endif
+
+bool ha_rocksdb::commit_in_the_middle() {
+ return THDVAR(table->in_use, bulk_load) ||
+ THDVAR(table->in_use, commit_in_the_middle);
+}
+
+/*
+ Executing bulk commit if it should.
+ @retval true if bulk commit failed
+ @retval false if bulk commit was skipped or succeeded
+*/
+bool ha_rocksdb::do_bulk_commit(Rdb_transaction *const tx) {
+ return commit_in_the_middle() &&
+ tx->get_write_count() >= THDVAR(table->in_use, bulk_load_size) &&
+ tx->flush_batch();
+}
+
+/*
+ If table was created without primary key, SQL layer represents the primary
+ key number as MAX_INDEXES. Hence, this function returns true if the table
+ does not contain a primary key. (In which case we generate a hidden
+ 'auto-incremented' pk.)
+*/
+bool ha_rocksdb::has_hidden_pk(const TABLE *const table) const {
+ return Rdb_key_def::table_has_hidden_pk(table);
+}
+
+/*
+ Returns true if given index number is a hidden_pk.
+ - This is used when a table is created with no primary key.
+*/
+bool ha_rocksdb::is_hidden_pk(const uint index, const TABLE *const table_arg,
+ const Rdb_tbl_def *const tbl_def_arg) {
+ DBUG_ASSERT(table_arg->s != nullptr);
+
+ return (table_arg->s->primary_key == MAX_INDEXES &&
+ index == tbl_def_arg->m_key_count - 1);
+}
+
+/* Returns index of primary key */
+uint ha_rocksdb::pk_index(const TABLE *const table_arg,
+ const Rdb_tbl_def *const tbl_def_arg) {
+ DBUG_ASSERT(table_arg->s != nullptr);
+
+ return table_arg->s->primary_key == MAX_INDEXES ? tbl_def_arg->m_key_count - 1
+ : table_arg->s->primary_key;
+}
+
+/* Returns true if given index number is a primary key */
+bool ha_rocksdb::is_pk(const uint index, const TABLE *const table_arg,
+ const Rdb_tbl_def *const tbl_def_arg) {
+ DBUG_ASSERT(table_arg->s != nullptr);
+
+ return index == table_arg->s->primary_key ||
+ is_hidden_pk(index, table_arg, tbl_def_arg);
+}
+
+uint ha_rocksdb::max_supported_key_part_length() const {
+ DBUG_ENTER_FUNC();
+ DBUG_RETURN(rocksdb_large_prefix ? MAX_INDEX_COL_LEN_LARGE
+ : MAX_INDEX_COL_LEN_SMALL);
+}
+
+const char *ha_rocksdb::get_key_name(const uint index,
+ const TABLE *const table_arg,
+ const Rdb_tbl_def *const tbl_def_arg) {
+ if (is_hidden_pk(index, table_arg, tbl_def_arg)) {
+ return HIDDEN_PK_NAME;
+ }
+
+ DBUG_ASSERT(table_arg->key_info != nullptr);
+ DBUG_ASSERT(table_arg->key_info[index].name != nullptr);
+
+ return table_arg->key_info[index].name;
+}
+
+const char *ha_rocksdb::get_key_comment(const uint index,
+ const TABLE *const table_arg,
+ const Rdb_tbl_def *const tbl_def_arg) {
+ if (is_hidden_pk(index, table_arg, tbl_def_arg)) {
+ return nullptr;
+ }
+
+ DBUG_ASSERT(table_arg->key_info != nullptr);
+
+ return table_arg->key_info[index].comment.str;
+}
+
+const std::string ha_rocksdb::generate_cf_name(
+ const uint index, const TABLE *const table_arg,
+ const Rdb_tbl_def *const tbl_def_arg, bool *per_part_match_found) {
+ DBUG_ASSERT(table_arg != nullptr);
+ DBUG_ASSERT(tbl_def_arg != nullptr);
+ DBUG_ASSERT(per_part_match_found != nullptr);
+
+ // When creating CF-s the caller needs to know if there was a custom CF name
+ // specified for a given paritition.
+ *per_part_match_found = false;
+
+ // Index comment is used to define the column family name specification(s).
+ // If there was no comment, we get an emptry string, and it means "use the
+ // default column family".
+ const char *const comment = get_key_comment(index, table_arg, tbl_def_arg);
+
+ // `get_key_comment` can return `nullptr`, that's why this.
+ std::string key_comment = comment ? comment : "";
+
+ std::string cf_name = Rdb_key_def::parse_comment_for_qualifier(
+ key_comment, table_arg, tbl_def_arg, per_part_match_found,
+ RDB_CF_NAME_QUALIFIER);
+
+ if (IF_PARTITIONING(table_arg->part_info,nullptr) != nullptr && !*per_part_match_found) {
+ // At this point we tried to search for a custom CF name for a partition,
+ // but none was specified. Therefore default one will be used.
+ return "";
+ }
+
+ // If we didn't find any partitioned/non-partitioned qualifiers, return the
+ // comment itself. NOTE: this currently handles returning the cf name
+ // specified in the index comment in the case of no partitions, which doesn't
+ // use any qualifiers at the moment. (aka its a special case)
+ if (cf_name.empty() && !key_comment.empty()) {
+ return key_comment;
+ }
+
+ return cf_name;
+}
+
+const std::string ha_rocksdb::get_table_comment(const TABLE *const table_arg) {
+ DBUG_ASSERT(table_arg->s != nullptr);
+
+ return table_arg->s->comment.str;
+}
+
+/**
+ Write a new row
+
+ @param[in] buf new row data to write
+ @return
+ HA_EXIT_SUCCESS OK
+ other HA_ERR error code (can be SE-specific)
+*/
+int ha_rocksdb::write_row(uchar *const buf) {
+ DBUG_ENTER_FUNC();
+
+ DBUG_ASSERT(buf != nullptr);
+ DBUG_ASSERT(buf == table->record[0]);
+ DBUG_ASSERT(m_lock_rows == RDB_LOCK_WRITE);
+
+ /*
+ Note: "buf == table->record[0]" is copied from innodb. I am not aware of
+ any use cases where this condition is not true.
+ */
+ if (table->next_number_field && buf == table->record[0]) {
+ int err;
+ if ((err = update_auto_increment())) {
+ DBUG_RETURN(err);
+ }
+ }
+
+ // clear cache at beginning of write for INSERT ON DUPLICATE
+ // we may get multiple write->fail->read->update if there are multiple
+ // values from INSERT
+ m_dup_pk_found = false;
+
+ const int rv = update_write_row(nullptr, buf, skip_unique_check());
+
+ if (rv == 0) {
+#ifdef MARIAROCKS_NOT_YET
+ stats.rows_inserted++;
+#endif
+ update_row_stats(ROWS_INSERTED);
+ }
+
+ DBUG_RETURN(rv);
+}
+
+/**
+ Constructing m_last_rowkey (MyRocks key expression) from
+ before_update|delete image (MySQL row expression).
+ m_last_rowkey is normally set during lookup phase, such as
+ rnd_next_with_direction() and rnd_pos(). With Read Free Replication,
+ these read functions are skipped and update_rows(), delete_rows() are
+ called without setting m_last_rowkey. This function sets m_last_rowkey
+ for Read Free Replication.
+*/
+void ha_rocksdb::set_last_rowkey(const uchar *const old_data) {
+#ifdef MARIAROCKS_NOT_YET
+ if (old_data && use_read_free_rpl()) {
+ const int old_pk_size = m_pk_descr->pack_record(
+ table, m_pack_buffer, old_data, m_pk_packed_tuple, nullptr, false);
+ m_last_rowkey.copy((const char *)m_pk_packed_tuple, old_pk_size,
+ &my_charset_bin);
+ }
+#endif
+}
+
+/**
+ Collect update data for primary key
+
+ @param[in, out] row_info hold all data for update row, such as
+ new row data/old row data
+ @return
+ HA_EXIT_SUCCESS OK
+ other HA_ERR error code (can be SE-specific)
+*/
+int ha_rocksdb::get_pk_for_update(struct update_row_info *const row_info) {
+ int size;
+
+ /*
+ Get new row key for any insert, and any update where the pk is not hidden.
+ Row key for updates with hidden pk is handled below.
+ */
+ if (!has_hidden_pk(table)) {
+ row_info->hidden_pk_id = 0;
+
+ row_info->new_pk_unpack_info = &m_pk_unpack_info;
+
+ size = m_pk_descr->pack_record(
+ table, m_pack_buffer, row_info->new_data, m_pk_packed_tuple,
+ row_info->new_pk_unpack_info, false, 0, 0, nullptr);
+ } else if (row_info->old_data == nullptr) {
+ row_info->hidden_pk_id = update_hidden_pk_val();
+ size =
+ m_pk_descr->pack_hidden_pk(row_info->hidden_pk_id, m_pk_packed_tuple);
+ } else {
+ /*
+ If hidden primary key, rowkey for new record will always be the same as
+ before
+ */
+ size = row_info->old_pk_slice.size();
+ memcpy(m_pk_packed_tuple, row_info->old_pk_slice.data(), size);
+ int err = read_hidden_pk_id_from_rowkey(&row_info->hidden_pk_id);
+ if (err) {
+ return err;
+ }
+ }
+
+ row_info->new_pk_slice =
+ rocksdb::Slice((const char *)m_pk_packed_tuple, size);
+
+ return HA_EXIT_SUCCESS;
+}
+
+/**
+ Check the specified primary key value is unique and also lock the row
+
+ @param[in] key_id key index
+ @param[in] row_info hold all data for update row, such as old row
+ data and new row data
+ @param[out] found whether the primary key exists before.
+ @param[out] pk_changed whether primary key is changed
+ @return
+ HA_EXIT_SUCCESS OK
+ other HA_ERR error code (can be SE-specific)
+*/
+int ha_rocksdb::check_and_lock_unique_pk(const uint key_id,
+ const struct update_row_info &row_info,
+ bool *const found) {
+ DBUG_ASSERT(found != nullptr);
+
+ DBUG_ASSERT(row_info.old_pk_slice.size() == 0 ||
+ row_info.new_pk_slice.compare(row_info.old_pk_slice) != 0);
+
+ /* Ignore PK violations if this is a optimized 'replace into' */
+#ifdef MARIAROCKS_NOT_YET
+ const bool ignore_pk_unique_check = ha_thd()->lex->blind_replace_into;
+#else
+ const bool ignore_pk_unique_check= false;
+#endif
+
+ /*
+ Perform a read to determine if a duplicate entry exists. For primary
+ keys, a point lookup will be sufficient.
+
+ note: we intentionally don't set options.snapshot here. We want to read
+ the latest committed data.
+ */
+
+ /*
+ To prevent race conditions like below, it is necessary to
+ take a lock for a target row. get_for_update() holds a gap lock if
+ target key does not exist, so below conditions should never
+ happen.
+
+ 1) T1 Get(empty) -> T2 Get(empty) -> T1 Put(insert) -> T1 commit
+ -> T2 Put(overwrite) -> T2 commit
+ 2) T1 Get(empty) -> T1 Put(insert, not committed yet) -> T2 Get(empty)
+ -> T2 Put(insert, blocked) -> T1 commit -> T2 commit(overwrite)
+ */
+ const rocksdb::Status s =
+ get_for_update(row_info.tx, m_pk_descr->get_cf(), row_info.new_pk_slice,
+ ignore_pk_unique_check ? nullptr : &m_retrieved_record);
+ if (!s.ok() && !s.IsNotFound()) {
+ return row_info.tx->set_status_error(
+ table->in_use, s, *m_key_descr_arr[key_id], m_tbl_def, m_table_handler);
+ }
+
+ bool key_found = ignore_pk_unique_check ? false : !s.IsNotFound();
+
+ /*
+ If the pk key has ttl, we may need to pretend the row wasn't
+ found if it is already expired.
+ */
+ if (key_found && m_pk_descr->has_ttl() &&
+ should_hide_ttl_rec(*m_pk_descr, m_retrieved_record,
+ (row_info.tx->m_snapshot_timestamp
+ ? row_info.tx->m_snapshot_timestamp
+ : static_cast<int64_t>(std::time(nullptr))))) {
+ key_found = false;
+ }
+
+ if (key_found && row_info.old_data == nullptr && m_insert_with_update) {
+ // In INSERT ON DUPLICATE KEY UPDATE ... case, if the insert failed
+ // due to a duplicate key, remember the last key and skip the check
+ // next time
+ m_dup_pk_found = true;
+
+#ifndef DBUG_OFF
+ // save it for sanity checking later
+ m_dup_pk_retrieved_record.copy(m_retrieved_record.data(),
+ m_retrieved_record.size(), &my_charset_bin);
+#endif
+ }
+
+ *found = key_found;
+
+ return HA_EXIT_SUCCESS;
+}
+
+/**
+ Check the specified secondary key value is unique and also lock the row
+
+ @param[in] key_id key index
+ @param[in] row_info hold all data for update row, such as old row
+ data and new row data
+ @param[out] found whether specified key value exists before.
+ @return
+ HA_EXIT_SUCCESS OK
+ other HA_ERR error code (can be SE-specific)
+*/
+int ha_rocksdb::check_and_lock_sk(const uint key_id,
+ const struct update_row_info &row_info,
+ bool *const found) {
+ DBUG_ASSERT(found != nullptr);
+ *found = false;
+
+ /*
+ Can skip checking this key if none of the key fields have changed.
+ */
+ if (row_info.old_data != nullptr && !m_update_scope.is_set(key_id)) {
+ return HA_EXIT_SUCCESS;
+ }
+
+ KEY *key_info = nullptr;
+ uint n_null_fields = 0;
+ uint user_defined_key_parts = 1;
+
+ key_info = &table->key_info[key_id];
+ user_defined_key_parts = key_info->user_defined_key_parts;
+ /*
+ If there are no uniqueness requirements, there's no need to obtain a
+ lock for this key.
+ */
+ if (!(key_info->flags & HA_NOSAME)) {
+ return HA_EXIT_SUCCESS;
+ }
+
+ const Rdb_key_def &kd = *m_key_descr_arr[key_id];
+
+ /*
+ Calculate the new key for obtaining the lock
+
+ For unique secondary indexes, the key used for locking does not
+ include the extended fields.
+ */
+ int size =
+ kd.pack_record(table, m_pack_buffer, row_info.new_data, m_sk_packed_tuple,
+ nullptr, false, 0, user_defined_key_parts, &n_null_fields);
+ if (n_null_fields > 0) {
+ /*
+ If any fields are marked as NULL this will never match another row as
+ to NULL never matches anything else including another NULL.
+ */
+ return HA_EXIT_SUCCESS;
+ }
+
+ const rocksdb::Slice new_slice =
+ rocksdb::Slice((const char *)m_sk_packed_tuple, size);
+
+ /*
+ Acquire lock on the old key in case of UPDATE
+ */
+ if (row_info.old_data != nullptr) {
+ size = kd.pack_record(table, m_pack_buffer, row_info.old_data,
+ m_sk_packed_tuple_old, nullptr, false, 0,
+ user_defined_key_parts);
+ const rocksdb::Slice old_slice =
+ rocksdb::Slice((const char *)m_sk_packed_tuple_old, size);
+
+ const rocksdb::Status s =
+ get_for_update(row_info.tx, kd.get_cf(), old_slice, nullptr);
+ if (!s.ok()) {
+ return row_info.tx->set_status_error(table->in_use, s, kd, m_tbl_def,
+ m_table_handler);
+ }
+
+ /*
+ If the old and new keys are the same we're done since we've already taken
+ the lock on the old key
+ */
+ if (!new_slice.compare(old_slice)) {
+ return HA_EXIT_SUCCESS;
+ }
+ }
+
+ /*
+ Perform a read to determine if a duplicate entry exists - since this is
+ a secondary indexes a range scan is needed.
+
+ note: we intentionally don't set options.snapshot here. We want to read
+ the latest committed data.
+ */
+
+ const bool all_parts_used = (user_defined_key_parts == kd.get_key_parts());
+
+ /*
+ This iterator seems expensive since we need to allocate and free
+ memory for each unique index.
+
+ If this needs to be optimized, for keys without NULL fields, the
+ extended primary key fields can be migrated to the value portion of the
+ key. This enables using Get() instead of Seek() as in the primary key
+ case.
+
+ The bloom filter may need to be disabled for this lookup.
+ */
+ uchar lower_bound_buf[Rdb_key_def::INDEX_NUMBER_SIZE];
+ uchar upper_bound_buf[Rdb_key_def::INDEX_NUMBER_SIZE];
+ rocksdb::Slice lower_bound_slice;
+ rocksdb::Slice upper_bound_slice;
+
+ const bool total_order_seek = !check_bloom_and_set_bounds(
+ ha_thd(), kd, new_slice, all_parts_used, Rdb_key_def::INDEX_NUMBER_SIZE,
+ lower_bound_buf, upper_bound_buf, &lower_bound_slice, &upper_bound_slice);
+ const bool fill_cache = !THDVAR(ha_thd(), skip_fill_cache);
+
+ const rocksdb::Status s =
+ get_for_update(row_info.tx, kd.get_cf(), new_slice, nullptr);
+ if (!s.ok() && !s.IsNotFound()) {
+ return row_info.tx->set_status_error(table->in_use, s, kd, m_tbl_def,
+ m_table_handler);
+ }
+
+ rocksdb::Iterator *const iter = row_info.tx->get_iterator(
+ kd.get_cf(), total_order_seek, fill_cache, lower_bound_slice,
+ upper_bound_slice, true /* read current data */,
+ false /* acquire snapshot */);
+ /*
+ Need to scan the transaction to see if there is a duplicate key.
+ Also need to scan RocksDB and verify the key has not been deleted
+ in the transaction.
+ */
+ iter->Seek(new_slice);
+ *found = !read_key_exact(kd, iter, all_parts_used, new_slice,
+ row_info.tx->m_snapshot_timestamp);
+ delete iter;
+
+ return HA_EXIT_SUCCESS;
+}
+
+/**
+ Enumerate all keys to check their uniquess and also lock it
+
+ @param[in] row_info hold all data for update row, such as old row
+ data and new row data
+ @param[out] pk_changed whether primary key is changed
+ @return
+ HA_EXIT_SUCCESS OK
+ other HA_ERR error code (can be SE-specific)
+*/
+int ha_rocksdb::check_uniqueness_and_lock(
+ const struct update_row_info &row_info, bool pk_changed) {
+ /*
+ Go through each index and determine if the index has uniqueness
+ requirements. If it does, then try to obtain a row lock on the new values.
+ Once all locks have been obtained, then perform the changes needed to
+ update/insert the row.
+ */
+ for (uint key_id = 0; key_id < m_tbl_def->m_key_count; key_id++) {
+ bool found;
+ int rc;
+
+ if (is_pk(key_id, table, m_tbl_def)) {
+ if (row_info.old_pk_slice.size() > 0 && !pk_changed) {
+ found = false;
+ rc = HA_EXIT_SUCCESS;
+ } else {
+ rc = check_and_lock_unique_pk(key_id, row_info, &found);
+ }
+ } else {
+ rc = check_and_lock_sk(key_id, row_info, &found);
+ }
+
+ if (rc != HA_EXIT_SUCCESS) {
+ return rc;
+ }
+
+ if (found) {
+ /* There is a row with this key already, so error out. */
+ errkey = key_id;
+ m_dupp_errkey = errkey;
+
+ return HA_ERR_FOUND_DUPP_KEY;
+ }
+ }
+
+ return HA_EXIT_SUCCESS;
+}
+
+/**
+ Check whether secondary key value is duplicate or not
+
+ @param[in] table_arg the table currently working on
+ @param[in key_def the key_def is being checked
+ @param[in] key secondary key storage data
+ @param[out] sk_info hold secondary key memcmp datas(new/old)
+ @return
+ HA_EXIT_SUCCESS OK
+ other HA_ERR error code (can be SE-specific)
+*/
+
+int ha_rocksdb::check_duplicate_sk(const TABLE *table_arg,
+ const Rdb_key_def &key_def,
+ const rocksdb::Slice *key,
+ struct unique_sk_buf_info *sk_info) {
+ uint n_null_fields = 0;
+ const rocksdb::Comparator *index_comp = key_def.get_cf()->GetComparator();
+
+ /* Get proper SK buffer. */
+ uchar *sk_buf = sk_info->swap_and_get_sk_buf();
+
+ /* Get memcmp form of sk without extended pk tail */
+ uint sk_memcmp_size =
+ key_def.get_memcmp_sk_parts(table_arg, *key, sk_buf, &n_null_fields);
+
+ sk_info->sk_memcmp_key =
+ rocksdb::Slice(reinterpret_cast<char *>(sk_buf), sk_memcmp_size);
+
+ if (sk_info->sk_memcmp_key_old.size() > 0 && n_null_fields == 0 &&
+ index_comp->Compare(sk_info->sk_memcmp_key, sk_info->sk_memcmp_key_old) ==
+ 0) {
+ return 1;
+ }
+
+ sk_info->sk_memcmp_key_old = sk_info->sk_memcmp_key;
+ return 0;
+}
+
+int ha_rocksdb::bulk_load_key(Rdb_transaction *const tx, const Rdb_key_def &kd,
+ const rocksdb::Slice &key,
+ const rocksdb::Slice &value, bool sort) {
+ DBUG_ENTER_FUNC();
+ int res;
+ THD *thd = ha_thd();
+ if (thd && thd->killed) {
+ DBUG_RETURN(HA_ERR_QUERY_INTERRUPTED);
+ }
+
+ rocksdb::ColumnFamilyHandle *cf = kd.get_cf();
+
+ // In the case of unsorted inserts, m_sst_info allocated here is not
+ // used to store the keys. It is still used to indicate when tables
+ // are switched.
+ if (m_sst_info == nullptr || m_sst_info->is_done()) {
+ m_sst_info.reset(new Rdb_sst_info(rdb, m_table_handler->m_table_name,
+ kd.get_name(), cf, *rocksdb_db_options,
+ THDVAR(ha_thd(), trace_sst_api)));
+ res = tx->start_bulk_load(this, m_sst_info);
+ if (res != HA_EXIT_SUCCESS) {
+ DBUG_RETURN(res);
+ }
+ }
+ DBUG_ASSERT(m_sst_info);
+
+ if (sort) {
+ Rdb_index_merge *key_merge;
+ DBUG_ASSERT(cf != nullptr);
+
+ res = tx->get_key_merge(kd.get_gl_index_id(), cf, &key_merge);
+ if (res == HA_EXIT_SUCCESS) {
+ res = key_merge->add(key, value);
+ }
+ } else {
+ res = m_sst_info->put(key, value);
+ }
+
+ DBUG_RETURN(res);
+}
+
+int ha_rocksdb::finalize_bulk_load(bool print_client_error) {
+ DBUG_ENTER_FUNC();
+
+ int res = HA_EXIT_SUCCESS;
+
+ /* Skip if there are no possible ongoing bulk loads */
+ if (m_sst_info) {
+ if (m_sst_info->is_done()) {
+ m_sst_info.reset();
+ DBUG_RETURN(res);
+ }
+
+ Rdb_sst_info::Rdb_sst_commit_info commit_info;
+
+ // Wrap up the current work in m_sst_info and get ready to commit
+ // This transfer the responsibility of commit over to commit_info
+ res = m_sst_info->finish(&commit_info, print_client_error);
+ if (res == 0) {
+ // Make sure we have work to do - under race condition we could lose
+ // to another thread and end up with no work
+ if (commit_info.has_work()) {
+ rocksdb::IngestExternalFileOptions opts;
+ opts.move_files = true;
+ opts.snapshot_consistency = false;
+ opts.allow_global_seqno = false;
+ opts.allow_blocking_flush = false;
+
+ const rocksdb::Status s = rdb->IngestExternalFile(
+ commit_info.get_cf(), commit_info.get_committed_files(), opts);
+ if (!s.ok()) {
+ if (print_client_error) {
+ Rdb_sst_info::report_error_msg(s, nullptr);
+ }
+ res = HA_ERR_ROCKSDB_BULK_LOAD;
+ } else {
+ // Mark the list of SST files as committed, otherwise they'll get
+ // cleaned up when commit_info destructs
+ commit_info.commit();
+ }
+ }
+ }
+ m_sst_info.reset();
+ }
+ DBUG_RETURN(res);
+}
+
+/**
+ Update an existing primary key record or write a new primary key record
+
+ @param[in] kd the primary key is being update/write
+ @param[in] update_row_info hold all row data, such as old row data and
+ new row data
+ @param[in] pk_changed whether primary key is changed
+ @return
+ HA_EXIT_SUCCESS OK
+ Other HA_ERR error code (can be SE-specific)
+ */
+int ha_rocksdb::update_write_pk(const Rdb_key_def &kd,
+ const struct update_row_info &row_info,
+ bool pk_changed) {
+ uint key_id = kd.get_keyno();
+ bool hidden_pk = is_hidden_pk(key_id, table, m_tbl_def);
+ ulonglong bytes_written = 0;
+
+ /*
+ If the PK has changed, or if this PK uses single deletes and this is an
+ update, the old key needs to be deleted. In the single delete case, it
+ might be possible to have this sequence of keys: PUT(X), PUT(X), SD(X),
+ resulting in the first PUT(X) showing up.
+ */
+ if (!hidden_pk && (pk_changed || ((row_info.old_pk_slice.size() > 0) &&
+ can_use_single_delete(key_id)))) {
+ const rocksdb::Status s = delete_or_singledelete(
+ key_id, row_info.tx, kd.get_cf(), row_info.old_pk_slice);
+ if (!s.ok()) {
+ return row_info.tx->set_status_error(table->in_use, s, kd, m_tbl_def,
+ m_table_handler);
+ } else {
+ bytes_written = row_info.old_pk_slice.size();
+ }
+ }
+
+ if (table->found_next_number_field) {
+ update_auto_incr_val_from_field();
+ }
+
+ int rc = HA_EXIT_SUCCESS;
+ rocksdb::Slice value_slice;
+ /* Prepare the new record to be written into RocksDB */
+ if ((rc = m_converter->encode_value_slice(
+ m_pk_descr, row_info.new_pk_slice, row_info.new_pk_unpack_info,
+ !row_info.old_pk_slice.empty(), should_store_row_debug_checksums(),
+ m_ttl_bytes, &m_ttl_bytes_updated, &value_slice))) {
+ return rc;
+ }
+
+ const auto cf = m_pk_descr->get_cf();
+ if (rocksdb_enable_bulk_load_api && THDVAR(table->in_use, bulk_load) &&
+ !hidden_pk) {
+ /*
+ Write the primary key directly to an SST file using an SstFileWriter
+ */
+ rc = bulk_load_key(row_info.tx, kd, row_info.new_pk_slice, value_slice,
+ THDVAR(table->in_use, bulk_load_allow_unsorted));
+ } else if (row_info.skip_unique_check || row_info.tx->m_ddl_transaction) {
+ /*
+ It is responsibility of the user to make sure that the data being
+ inserted doesn't violate any unique keys.
+ */
+ row_info.tx->get_indexed_write_batch()->Put(cf, row_info.new_pk_slice,
+ value_slice);
+ } else {
+ const bool assume_tracked = can_assume_tracked(ha_thd());
+ const auto s = row_info.tx->put(cf, row_info.new_pk_slice, value_slice,
+ assume_tracked);
+ if (!s.ok()) {
+ if (s.IsBusy()) {
+ errkey = table->s->primary_key;
+ m_dupp_errkey = errkey;
+ rc = HA_ERR_FOUND_DUPP_KEY;
+ } else {
+ rc = row_info.tx->set_status_error(table->in_use, s, *m_pk_descr,
+ m_tbl_def, m_table_handler);
+ }
+ }
+ }
+
+ if (rc == HA_EXIT_SUCCESS) {
+ row_info.tx->update_bytes_written(
+ bytes_written + row_info.new_pk_slice.size() + value_slice.size());
+ }
+ return rc;
+}
+
+/**
+ update an existing secondary key record or write a new secondary key record
+
+ @param[in] table_arg Table we're working on
+ @param[in] kd The secondary key being update/write
+ @param[in] row_info data structure contains old row data and new row data
+ @param[in] bulk_load_sk whether support bulk load. Currently it is only
+ support for write
+ @return
+ HA_EXIT_SUCCESS OK
+ Other HA_ERR error code (can be SE-specific)
+ */
+int ha_rocksdb::update_write_sk(const TABLE *const table_arg,
+ const Rdb_key_def &kd,
+ const struct update_row_info &row_info,
+ const bool bulk_load_sk) {
+ int new_packed_size;
+ int old_packed_size;
+ int rc = HA_EXIT_SUCCESS;
+
+ rocksdb::Slice new_key_slice;
+ rocksdb::Slice new_value_slice;
+ rocksdb::Slice old_key_slice;
+
+ const uint key_id = kd.get_keyno();
+
+ ulonglong bytes_written = 0;
+
+ /*
+ Can skip updating this key if none of the key fields have changed and, if
+ this table has TTL, the TTL timestamp has not changed.
+ */
+ if (row_info.old_data != nullptr && !m_update_scope.is_set(key_id) &&
+ (!kd.has_ttl() || !m_ttl_bytes_updated)) {
+ return HA_EXIT_SUCCESS;
+ }
+
+ bool store_row_debug_checksums = should_store_row_debug_checksums();
+ new_packed_size =
+ kd.pack_record(table_arg, m_pack_buffer, row_info.new_data,
+ m_sk_packed_tuple, &m_sk_tails, store_row_debug_checksums,
+ row_info.hidden_pk_id, 0, nullptr, m_ttl_bytes);
+
+ if (row_info.old_data != nullptr) {
+ // The old value
+ old_packed_size = kd.pack_record(
+ table_arg, m_pack_buffer, row_info.old_data, m_sk_packed_tuple_old,
+ &m_sk_tails_old, store_row_debug_checksums, row_info.hidden_pk_id, 0,
+ nullptr, m_ttl_bytes);
+
+ /*
+ Check if we are going to write the same value. This can happen when
+ one does
+ UPDATE tbl SET col='foo'
+ and we are looking at the row that already has col='foo'.
+
+ We also need to compare the unpack info. Suppose, the collation is
+ case-insensitive, and unpack info contains information about whether
+ the letters were uppercase and lowercase. Then, both 'foo' and 'FOO'
+ will have the same key value, but different data in unpack_info.
+
+ (note: anyone changing bytewise_compare should take this code into
+ account)
+ */
+ if (old_packed_size == new_packed_size &&
+ m_sk_tails_old.get_current_pos() == m_sk_tails.get_current_pos() &&
+ !(kd.has_ttl() && m_ttl_bytes_updated) &&
+ memcmp(m_sk_packed_tuple_old, m_sk_packed_tuple, old_packed_size) ==
+ 0 &&
+ memcmp(m_sk_tails_old.ptr(), m_sk_tails.ptr(),
+ m_sk_tails.get_current_pos()) == 0) {
+ return HA_EXIT_SUCCESS;
+ }
+
+ /*
+ Deleting entries from secondary index should skip locking, but
+ be visible to the transaction.
+ (also note that DDL statements do not delete rows, so this is not a DDL
+ statement)
+ */
+ old_key_slice = rocksdb::Slice(
+ reinterpret_cast<const char *>(m_sk_packed_tuple_old), old_packed_size);
+
+ row_info.tx->get_indexed_write_batch()->SingleDelete(kd.get_cf(),
+ old_key_slice);
+
+ bytes_written = old_key_slice.size();
+ }
+
+ new_key_slice = rocksdb::Slice(
+ reinterpret_cast<const char *>(m_sk_packed_tuple), new_packed_size);
+ new_value_slice =
+ rocksdb::Slice(reinterpret_cast<const char *>(m_sk_tails.ptr()),
+ m_sk_tails.get_current_pos());
+
+ if (bulk_load_sk && row_info.old_data == nullptr) {
+ rc = bulk_load_key(row_info.tx, kd, new_key_slice, new_value_slice, true);
+ } else {
+ row_info.tx->get_indexed_write_batch()->Put(kd.get_cf(), new_key_slice,
+ new_value_slice);
+ }
+
+ row_info.tx->update_bytes_written(bytes_written + new_key_slice.size() +
+ new_value_slice.size());
+
+ return rc;
+}
+
+/**
+ Update existing indexes(PK/SKs) or write new indexes(PK/SKs)
+
+ @param[in] row_info hold all row data, such as old key/new key
+ @param[in] pk_changed whether primary key is changed
+ @return
+ HA_EXIT_SUCCESS OK
+ Other HA_ERR error code (can be SE-specific)
+ */
+int ha_rocksdb::update_write_indexes(const struct update_row_info &row_info,
+ const bool pk_changed) {
+ int rc;
+ bool bulk_load_sk;
+
+ // The PK must be updated first to pull out the TTL value.
+ rc = update_write_pk(*m_pk_descr, row_info, pk_changed);
+ if (rc != HA_EXIT_SUCCESS) {
+ return rc;
+ }
+
+ // Update the remaining indexes. Allow bulk loading only if
+ // allow_sk is enabled
+ bulk_load_sk = rocksdb_enable_bulk_load_api &&
+ THDVAR(table->in_use, bulk_load) &&
+ THDVAR(table->in_use, bulk_load_allow_sk);
+ for (uint key_id = 0; key_id < m_tbl_def->m_key_count; key_id++) {
+ if (is_pk(key_id, table, m_tbl_def)) {
+ continue;
+ }
+
+ rc = update_write_sk(table, *m_key_descr_arr[key_id], row_info,
+ bulk_load_sk);
+ if (rc != HA_EXIT_SUCCESS) {
+ return rc;
+ }
+ }
+
+ return HA_EXIT_SUCCESS;
+}
+
+/**
+ Update an existing row or write a new row
+
+ @param[in] old_data nullptr for write, non-null for update
+ @param[in] new_data non-null for write/update
+ @param[in] skip_unique_check whether to check uniqueness
+ @return
+ HA_EXIT_SUCCESS OK
+ Other HA_ERR error code (can be SE-specific)
+ */
+int ha_rocksdb::update_write_row(const uchar *const old_data,
+ const uchar *const new_data,
+ const bool skip_unique_check) {
+ DBUG_ENTER_FUNC();
+
+ THD *thd = ha_thd();
+ if (thd && thd->killed) {
+ DBUG_RETURN(HA_ERR_QUERY_INTERRUPTED);
+ }
+
+ bool pk_changed = false;
+ struct update_row_info row_info;
+
+ row_info.old_data = old_data;
+ row_info.new_data = new_data;
+ row_info.skip_unique_check = skip_unique_check;
+ row_info.new_pk_unpack_info = nullptr;
+ set_last_rowkey(old_data);
+
+ row_info.tx = get_or_create_tx(table->in_use);
+
+ if (old_data != nullptr) {
+ row_info.old_pk_slice =
+ rocksdb::Slice(m_last_rowkey.ptr(), m_last_rowkey.length());
+
+ /* Determine which indexes need updating. */
+ calc_updated_indexes();
+ }
+
+ /*
+ Get the new row key into row_info.new_pk_slice
+ */
+ int rc = get_pk_for_update(&row_info);
+ if (rc != HA_EXIT_SUCCESS) {
+ DBUG_RETURN(rc);
+ }
+
+ /*
+ For UPDATEs, if the key has changed, we need to obtain a lock. INSERTs
+ always require locking.
+ */
+ if (row_info.old_pk_slice.size() > 0) {
+ pk_changed = row_info.new_pk_slice.compare(row_info.old_pk_slice) != 0;
+ }
+
+ if (!skip_unique_check) {
+ /*
+ Check to see if we are going to have failures because of unique
+ keys. Also lock the appropriate key values.
+ */
+ rc = check_uniqueness_and_lock(row_info, pk_changed);
+ if (rc != HA_EXIT_SUCCESS) {
+ DBUG_RETURN(rc);
+ }
+ }
+
+ DEBUG_SYNC(ha_thd(), "rocksdb.update_write_row_after_unique_check");
+
+ /*
+ At this point, all locks have been obtained, and all checks for duplicate
+ keys have been performed. No further errors can be allowed to occur from
+ here because updates to the transaction will be made and those updates
+ cannot be easily removed without rolling back the entire transaction.
+ */
+ rc = update_write_indexes(row_info, pk_changed);
+ if (rc != HA_EXIT_SUCCESS) {
+ DBUG_RETURN(rc);
+ }
+
+ if (old_data != nullptr) {
+ row_info.tx->incr_update_count();
+ } else {
+ row_info.tx->incr_insert_count();
+ }
+
+ if (do_bulk_commit(row_info.tx)) {
+ DBUG_RETURN(HA_ERR_ROCKSDB_BULK_LOAD);
+ }
+
+ DBUG_RETURN(HA_EXIT_SUCCESS);
+}
+
+/*
+ Setting iterator upper/lower bounds for Seek/SeekForPrev.
+ This makes RocksDB to avoid scanning tombstones outside of
+ the given key ranges, when prefix_same_as_start=true was not passed
+ (when prefix bloom filter can not be used).
+ Inversing upper/lower bound is necessary on reverse order CF.
+ This covers HA_READ_PREFIX_LAST* case as well. For example,
+ if given query eq condition was 12 bytes and condition was
+ 0x0000b3eb003f65c5e78858b8, and if doing HA_READ_PREFIX_LAST,
+ eq_cond_len was 11 (see calc_eq_cond_len() for details).
+ If the index was reverse order, upper bound would be
+ 0x0000b3eb003f65c5e78857, and lower bound would be
+ 0x0000b3eb003f65c5e78859. These cover given eq condition range.
+
+ @param lower_bound_buf IN Buffer for lower bound
+ @param upper_bound_buf IN Buffer for upper bound
+
+ @param outer_u
+*/
+void ha_rocksdb::setup_iterator_bounds(
+ const Rdb_key_def &kd, const rocksdb::Slice &eq_cond, size_t bound_len,
+ uchar *const lower_bound, uchar *const upper_bound,
+ rocksdb::Slice *lower_bound_slice, rocksdb::Slice *upper_bound_slice) {
+ // If eq_cond is shorter than Rdb_key_def::INDEX_NUMBER_SIZE, we should be
+ // able to get better bounds just by using index id directly.
+ if (eq_cond.size() <= Rdb_key_def::INDEX_NUMBER_SIZE) {
+ DBUG_ASSERT(bound_len == Rdb_key_def::INDEX_NUMBER_SIZE);
+ uint size;
+ kd.get_infimum_key(lower_bound, &size);
+ DBUG_ASSERT(size == Rdb_key_def::INDEX_NUMBER_SIZE);
+ kd.get_supremum_key(upper_bound, &size);
+ DBUG_ASSERT(size == Rdb_key_def::INDEX_NUMBER_SIZE);
+ } else {
+ DBUG_ASSERT(bound_len <= eq_cond.size());
+ memcpy(upper_bound, eq_cond.data(), bound_len);
+ kd.successor(upper_bound, bound_len);
+ memcpy(lower_bound, eq_cond.data(), bound_len);
+ kd.predecessor(lower_bound, bound_len);
+ }
+
+ if (kd.m_is_reverse_cf) {
+ *upper_bound_slice = rocksdb::Slice((const char *)lower_bound, bound_len);
+ *lower_bound_slice = rocksdb::Slice((const char *)upper_bound, bound_len);
+ } else {
+ *upper_bound_slice = rocksdb::Slice((const char *)upper_bound, bound_len);
+ *lower_bound_slice = rocksdb::Slice((const char *)lower_bound, bound_len);
+ }
+}
+
+/*
+ Open a cursor
+*/
+
+void ha_rocksdb::setup_scan_iterator(const Rdb_key_def &kd,
+ rocksdb::Slice *const slice,
+ const bool use_all_keys,
+ const uint eq_cond_len) {
+ DBUG_ASSERT(slice->size() >= eq_cond_len);
+
+ Rdb_transaction *const tx = get_or_create_tx(table->in_use);
+
+ bool skip_bloom = true;
+
+ const rocksdb::Slice eq_cond(slice->data(), eq_cond_len);
+ // The size of m_scan_it_lower_bound (and upper) is technically
+ // max_packed_sk_len as calculated in ha_rocksdb::alloc_key_buffers. Rather
+ // than recalculating that number, we pass in the max of eq_cond_len and
+ // Rdb_key_def::INDEX_NUMBER_SIZE which is guaranteed to be smaller than
+ // max_packed_sk_len, hence ensuring no buffer overrun.
+ //
+ // See ha_rocksdb::setup_iterator_bounds on how the bound_len parameter is
+ // used.
+ if (check_bloom_and_set_bounds(
+ ha_thd(), kd, eq_cond, use_all_keys,
+ std::max(eq_cond_len, (uint)Rdb_key_def::INDEX_NUMBER_SIZE),
+ m_scan_it_lower_bound, m_scan_it_upper_bound,
+ &m_scan_it_lower_bound_slice, &m_scan_it_upper_bound_slice)) {
+ skip_bloom = false;
+ }
+
+ /*
+ In some cases, setup_scan_iterator() is called multiple times from
+ the same query but bloom filter can not always be used.
+ Suppose the following query example. id2 is VARCHAR(30) and PRIMARY KEY
+ (id1, id2).
+ select count(*) from t2 WHERE id1=100 and id2 IN ('00000000000000000000',
+ '100');
+ In this case, setup_scan_iterator() is called twice, the first time is for
+ (id1, id2)=(100, '00000000000000000000') and the second time is for (100,
+ '100').
+ If prefix bloom filter length is 24 bytes, prefix bloom filter can be used
+ for the
+ first condition but not for the second condition.
+ If bloom filter condition is changed, currently it is necessary to destroy
+ and
+ re-create Iterator.
+ */
+ if (m_scan_it_skips_bloom != skip_bloom) {
+ release_scan_iterator();
+ }
+
+ /*
+ SQL layer can call rnd_init() multiple times in a row.
+ In that case, re-use the iterator, but re-position it at the table start.
+ */
+ if (!m_scan_it) {
+ const bool fill_cache = !THDVAR(ha_thd(), skip_fill_cache);
+ if (commit_in_the_middle()) {
+ DBUG_ASSERT(m_scan_it_snapshot == nullptr);
+ m_scan_it_snapshot = rdb->GetSnapshot();
+
+ auto read_opts = rocksdb::ReadOptions();
+ // TODO(mung): set based on WHERE conditions
+ read_opts.total_order_seek = true;
+ read_opts.snapshot = m_scan_it_snapshot;
+ m_scan_it = rdb->NewIterator(read_opts, kd.get_cf());
+ } else {
+ m_scan_it = tx->get_iterator(kd.get_cf(), skip_bloom, fill_cache,
+ m_scan_it_lower_bound_slice,
+ m_scan_it_upper_bound_slice);
+ }
+ m_scan_it_skips_bloom = skip_bloom;
+ }
+}
+
+void ha_rocksdb::release_scan_iterator() {
+ delete m_scan_it;
+ m_scan_it = nullptr;
+
+ if (m_scan_it_snapshot) {
+ rdb->ReleaseSnapshot(m_scan_it_snapshot);
+ m_scan_it_snapshot = nullptr;
+ }
+}
+
+void ha_rocksdb::setup_iterator_for_rnd_scan() {
+ uint key_size;
+
+ int key_start_matching_bytes =
+ m_pk_descr->get_first_key(m_pk_packed_tuple, &key_size);
+
+ rocksdb::Slice table_key((const char *)m_pk_packed_tuple, key_size);
+
+ setup_scan_iterator(*m_pk_descr, &table_key, false, key_start_matching_bytes);
+ m_scan_it->Seek(table_key);
+ m_skip_scan_it_next_call = true;
+}
+
+/**
+ @return
+ HA_EXIT_SUCCESS OK
+ other HA_ERR error code (can be SE-specific)
+*/
+int ha_rocksdb::rnd_init(bool scan) {
+ DBUG_ENTER_FUNC();
+
+ THD *thd = ha_thd();
+ if (thd && thd->killed) {
+ DBUG_RETURN(HA_ERR_QUERY_INTERRUPTED);
+ }
+
+ Rdb_transaction *const tx = get_or_create_tx(table->in_use);
+
+ // when this table is being updated, decode all fields
+ m_converter->setup_field_decoders(table->read_set,
+ m_lock_rows == RDB_LOCK_WRITE);
+
+ if (scan) {
+ m_rnd_scan_is_new_snapshot = !tx->has_snapshot();
+ setup_iterator_for_rnd_scan();
+ } else {
+ /* We don't need any preparations for rnd_pos() calls. */
+ }
+
+ // If m_lock_rows is on then we will be doing a get_for_update when accessing
+ // the index, so don't acquire the snapshot right away. Otherwise acquire
+ // the snapshot immediately.
+ tx->acquire_snapshot(m_lock_rows == RDB_LOCK_NONE);
+
+ DBUG_RETURN(HA_EXIT_SUCCESS);
+}
+
+/**
+ @return
+ HA_EXIT_SUCCESS OK
+ other HA_ERR error code (can be SE-specific)
+*/
+int ha_rocksdb::rnd_next(uchar *const buf) {
+ DBUG_ENTER_FUNC();
+
+ int rc;
+ for (;;) {
+ rc = rnd_next_with_direction(buf, true);
+ if (!should_recreate_snapshot(rc, m_rnd_scan_is_new_snapshot)) {
+ break; /* exit the loop */
+ }
+ // release the snapshot and iterator and then regenerate them
+ Rdb_transaction *tx = get_or_create_tx(table->in_use);
+ tx->release_snapshot();
+ release_scan_iterator();
+ setup_iterator_for_rnd_scan();
+ }
+
+ m_rnd_scan_is_new_snapshot = false;
+
+ if (rc == HA_ERR_KEY_NOT_FOUND) rc = HA_ERR_END_OF_FILE;
+
+ DBUG_RETURN(rc);
+}
+
+/*
+ See also secondary_index_read().
+*/
+int ha_rocksdb::rnd_next_with_direction(uchar *const buf, bool move_forward) {
+ DBUG_ENTER_FUNC();
+
+ int rc;
+ THD *thd = ha_thd();
+
+ table->status = STATUS_NOT_FOUND;
+#ifdef MARIAROCKS_NOT_YET
+ stats.rows_requested++;
+#endif
+ if (!m_scan_it || !is_valid(m_scan_it)) {
+ /*
+ We can get here when SQL layer has called
+
+ h->index_init(PRIMARY);
+ h->index_read_map(full index tuple, HA_READ_KEY_EXACT);
+
+ In this case, we should return EOF.
+ */
+ DBUG_RETURN(HA_ERR_END_OF_FILE);
+ }
+
+ for (;;) {
+ DEBUG_SYNC(thd, "rocksdb.check_flags_rnwd");
+ if (thd && thd->killed) {
+ rc = HA_ERR_QUERY_INTERRUPTED;
+ break;
+ }
+
+ if (m_skip_scan_it_next_call) {
+ m_skip_scan_it_next_call = false;
+ } else {
+ if (move_forward) {
+ m_scan_it->Next(); /* this call cannot fail */
+ } else {
+ m_scan_it->Prev(); /* this call cannot fail */
+ }
+ }
+
+ if (!is_valid(m_scan_it)) {
+ rc = HA_ERR_END_OF_FILE;
+ break;
+ }
+
+ /* check if we're out of this table */
+ const rocksdb::Slice key = m_scan_it->key();
+ if (!m_pk_descr->covers_key(key)) {
+ rc = HA_ERR_END_OF_FILE;
+ break;
+ }
+
+ if (m_lock_rows != RDB_LOCK_NONE) {
+ /*
+ Lock the row we've just read.
+
+ Now we call get_for_update which will 1) Take a lock and 2) Will fail
+ if the row was deleted since the snapshot was taken.
+ */
+ Rdb_transaction *const tx = get_or_create_tx(table->in_use);
+ DEBUG_SYNC(ha_thd(), "rocksdb_concurrent_delete");
+
+ if (m_pk_descr->has_ttl() &&
+ should_hide_ttl_rec(*m_pk_descr, m_scan_it->value(),
+ tx->m_snapshot_timestamp)) {
+ continue;
+ }
+
+ const rocksdb::Status s =
+ get_for_update(tx, m_pk_descr->get_cf(), key, &m_retrieved_record);
+ if (s.IsNotFound() &&
+ should_skip_invalidated_record(HA_ERR_KEY_NOT_FOUND)) {
+ continue;
+ }
+
+ if (!s.ok()) {
+ DBUG_RETURN(tx->set_status_error(table->in_use, s, *m_pk_descr,
+ m_tbl_def, m_table_handler));
+ }
+
+ // If we called get_for_update() use the value from that call not from
+ // the iterator as it may be stale since we don't have a snapshot
+ // when m_lock_rows is not RDB_LOCK_NONE.
+ m_last_rowkey.copy(key.data(), key.size(), &my_charset_bin);
+ rc = convert_record_from_storage_format(&key, buf);
+ } else {
+ // Use the value from the iterator
+ rocksdb::Slice value = m_scan_it->value();
+
+ if (m_pk_descr->has_ttl() &&
+ should_hide_ttl_rec(
+ *m_pk_descr, value,
+ get_or_create_tx(table->in_use)->m_snapshot_timestamp)) {
+ continue;
+ }
+
+ m_last_rowkey.copy(key.data(), key.size(), &my_charset_bin);
+ rc = convert_record_from_storage_format(&key, &value, buf);
+ }
+
+ table->status = 0;
+ break;
+ }
+
+ if (!rc) {
+#ifdef MARIAROCKS_NOT_YET
+ stats.rows_read++;
+ stats.rows_index_next++;
+#endif
+ update_row_stats(ROWS_READ);
+ }
+
+ DBUG_RETURN(rc);
+}
+
+int ha_rocksdb::rnd_end() {
+ DBUG_ENTER_FUNC();
+
+ release_scan_iterator();
+
+ DBUG_RETURN(HA_EXIT_SUCCESS);
+}
+
+/**
+ @return
+ HA_EXIT_SUCCESS OK
+ other HA_ERR error code (can be SE-specific)
+*/
+int ha_rocksdb::index_init(uint idx, bool sorted) {
+ DBUG_ENTER_FUNC();
+
+ THD *thd = ha_thd();
+ if (thd && thd->killed) {
+ DBUG_RETURN(HA_ERR_QUERY_INTERRUPTED);
+ }
+
+ Rdb_transaction *const tx = get_or_create_tx(table->in_use);
+ DBUG_ASSERT(tx != nullptr);
+
+ m_start_range= NULL;
+ // when this table is being updated, decode all fields
+ m_converter->setup_field_decoders(table->read_set,
+ m_lock_rows == RDB_LOCK_WRITE);
+
+ if (!m_keyread_only) {
+ m_key_descr_arr[idx]->get_lookup_bitmap(table, &m_lookup_bitmap);
+ }
+
+ // If m_lock_rows is not RDB_LOCK_NONE then we will be doing a get_for_update
+ // when accessing the index, so don't acquire the snapshot right away.
+ // Otherwise acquire the snapshot immediately.
+ tx->acquire_snapshot(m_lock_rows == RDB_LOCK_NONE);
+
+ active_index = idx;
+
+ DBUG_RETURN(HA_EXIT_SUCCESS);
+}
+
+/**
+ @return
+ HA_EXIT_SUCCESS OK
+*/
+int ha_rocksdb::index_end() {
+ DBUG_ENTER_FUNC();
+
+ release_scan_iterator();
+
+ bitmap_free(&m_lookup_bitmap);
+
+ active_index = MAX_KEY;
+ in_range_check_pushed_down = FALSE;
+ m_start_range= NULL;
+
+ DBUG_RETURN(HA_EXIT_SUCCESS);
+}
+
+/**
+ @return
+ HA_EXIT_SUCCESS OK
+ other HA_ERR error code (can be SE-specific)
+*/
+int ha_rocksdb::truncate() {
+ DBUG_ENTER_FUNC();
+
+ DBUG_ASSERT(m_tbl_def != nullptr);
+
+ // Save table name to use later
+ std::string table_name = m_tbl_def->full_tablename();
+
+ // Delete current table
+ int err = delete_table(m_tbl_def);
+ if (err != HA_EXIT_SUCCESS) {
+ DBUG_RETURN(err);
+ }
+
+ // Reset auto_increment_value to 1 if auto-increment feature is enabled
+ // By default, the starting valid value for auto_increment_value is 1
+ DBUG_RETURN(create_table(
+ table_name, table,
+ table->found_next_number_field ? 1 : 0 /* auto_increment_value */));
+}
+
+/*
+ Delete the row we've last read. The row is also passed as parameter.
+
+ @detail
+ The caller guarantees table buf points to the row that was just read.
+ The row is either table->record[0] or table->record[1].
+ (Check out InnoDB: row_update_for_mysql() has "UT_NOT_USED(mysql_rec)"
+
+ @return
+ HA_EXIT_SUCCESS OK
+ other HA_ERR error code (can be SE-specific)
+*/
+int ha_rocksdb::delete_row(const uchar *const buf) {
+ DBUG_ENTER_FUNC();
+
+ DBUG_ASSERT(buf != nullptr);
+
+ set_last_rowkey(buf);
+
+ rocksdb::Slice key_slice(m_last_rowkey.ptr(), m_last_rowkey.length());
+ Rdb_transaction *const tx = get_or_create_tx(table->in_use);
+ ulonglong bytes_written = 0;
+
+ const uint index = pk_index(table, m_tbl_def);
+ rocksdb::Status s =
+ delete_or_singledelete(index, tx, m_pk_descr->get_cf(), key_slice);
+ if (!s.ok()) {
+ DBUG_RETURN(tx->set_status_error(table->in_use, s, *m_pk_descr, m_tbl_def,
+ m_table_handler));
+ } else {
+ bytes_written = key_slice.size();
+ }
+
+ longlong hidden_pk_id = 0;
+ if (m_tbl_def->m_key_count > 1 && has_hidden_pk(table)) {
+ int err = read_hidden_pk_id_from_rowkey(&hidden_pk_id);
+ if (err) {
+ DBUG_RETURN(err);
+ }
+ }
+
+ // Delete the record for every secondary index
+ for (uint i = 0; i < m_tbl_def->m_key_count; i++) {
+ if (!is_pk(i, table, m_tbl_def)) {
+ int packed_size;
+ const Rdb_key_def &kd = *m_key_descr_arr[i];
+ packed_size = kd.pack_record(table, m_pack_buffer, buf, m_sk_packed_tuple,
+ nullptr, false, hidden_pk_id);
+ rocksdb::Slice secondary_key_slice(
+ reinterpret_cast<const char *>(m_sk_packed_tuple), packed_size);
+ /* Deleting on secondary key doesn't need any locks: */
+ tx->get_indexed_write_batch()->SingleDelete(kd.get_cf(),
+ secondary_key_slice);
+ bytes_written += secondary_key_slice.size();
+ }
+ }
+
+ tx->incr_delete_count();
+
+ if (do_bulk_commit(tx)) {
+ DBUG_RETURN(HA_ERR_ROCKSDB_BULK_LOAD);
+ }
+#ifdef MARIAROCKS_NOT_YET
+ stats.rows_deleted++;
+#endif
+ update_row_stats(ROWS_DELETED);
+ tx->update_bytes_written(bytes_written);
+
+ DBUG_RETURN(HA_EXIT_SUCCESS);
+}
+
+rocksdb::Status ha_rocksdb::delete_or_singledelete(
+ uint index, Rdb_transaction *const tx,
+ rocksdb::ColumnFamilyHandle *const column_family,
+ const rocksdb::Slice &key) {
+ const bool assume_tracked = can_assume_tracked(ha_thd());
+ if (can_use_single_delete(index)) {
+ return tx->single_delete(column_family, key, assume_tracked);
+ }
+ return tx->delete_key(column_family, key, assume_tracked);
+}
+
+void ha_rocksdb::update_stats(void) {
+ DBUG_ENTER_FUNC();
+
+ stats.records = 0;
+ stats.index_file_length = 0ul;
+ stats.data_file_length = 0ul;
+ stats.mean_rec_length = 0;
+
+ for (uint i = 0; i < m_tbl_def->m_key_count; i++) {
+ if (is_pk(i, table, m_tbl_def)) {
+ stats.data_file_length = m_pk_descr->m_stats.m_actual_disk_size;
+ stats.records = m_pk_descr->m_stats.m_rows;
+ } else {
+ stats.index_file_length += m_key_descr_arr[i]->m_stats.m_actual_disk_size;
+ }
+ }
+
+ DBUG_VOID_RETURN;
+}
+
+/**
+ @return
+ HA_EXIT_SUCCESS OK
+ HA_EXIT_FAILURE Error
+*/
+int ha_rocksdb::info(uint flag) {
+ DBUG_ENTER_FUNC();
+
+ if (!table) {
+ DBUG_RETURN(HA_EXIT_FAILURE);
+ }
+
+ if (flag & HA_STATUS_VARIABLE) {
+ /*
+ Test only to simulate corrupted stats
+ */
+ DBUG_EXECUTE_IF("myrocks_simulate_negative_stats",
+ m_pk_descr->m_stats.m_actual_disk_size =
+ -m_pk_descr->m_stats.m_actual_disk_size;);
+
+ update_stats();
+
+ /*
+ If any stats are negative due to bad cached stats, re-run analyze table
+ and re-retrieve the stats.
+ */
+ if (static_cast<longlong>(stats.data_file_length) < 0 ||
+ static_cast<longlong>(stats.index_file_length) < 0 ||
+ static_cast<longlong>(stats.records) < 0) {
+ if (calculate_stats_for_table()) {
+ DBUG_RETURN(HA_EXIT_FAILURE);
+ }
+
+ update_stats();
+ }
+
+ // if number of records is hardcoded, we do not want to force computation
+ // of memtable cardinalities
+ if (stats.records == 0 || (rocksdb_force_compute_memtable_stats &&
+ rocksdb_debug_optimizer_n_rows == 0)) {
+ // First, compute SST files stats
+ uchar buf[Rdb_key_def::INDEX_NUMBER_SIZE * 2];
+ auto r = get_range(pk_index(table, m_tbl_def), buf);
+ uint64_t sz = 0;
+ uint8_t include_flags = rocksdb::DB::INCLUDE_FILES;
+ // recompute SST files stats only if records count is 0
+ if (stats.records == 0) {
+ rdb->GetApproximateSizes(m_pk_descr->get_cf(), &r, 1, &sz,
+ include_flags);
+ stats.records += sz / ROCKSDB_ASSUMED_KEY_VALUE_DISK_SIZE;
+ stats.data_file_length += sz;
+ }
+ // Second, compute memtable stats. This call is expensive, so cache
+ // values computed for some time.
+ uint64_t cachetime = rocksdb_force_compute_memtable_stats_cachetime;
+ uint64_t time = (cachetime == 0) ? 0 : my_interval_timer() / 1000;
+ if (cachetime == 0 ||
+ time > m_table_handler->m_mtcache_last_update + cachetime) {
+ uint64_t memtableCount;
+ uint64_t memtableSize;
+
+ // the stats below are calculated from skiplist wich is a probablistic
+ // data structure, so the results vary between test runs
+ // it also can return 0 for quite a large tables which means that
+ // cardinality for memtable only indxes will be reported as 0
+ rdb->GetApproximateMemTableStats(m_pk_descr->get_cf(), r,
+ &memtableCount, &memtableSize);
+
+ // Atomically update all of these fields at the same time
+ if (cachetime > 0) {
+ if (m_table_handler->m_mtcache_lock.fetch_add(
+ 1, std::memory_order_acquire) == 0) {
+ m_table_handler->m_mtcache_count = memtableCount;
+ m_table_handler->m_mtcache_size = memtableSize;
+ m_table_handler->m_mtcache_last_update = time;
+ }
+ m_table_handler->m_mtcache_lock.fetch_sub(1,
+ std::memory_order_release);
+ }
+
+ stats.records += memtableCount;
+ stats.data_file_length += memtableSize;
+ } else {
+ // Cached data is still valid, so use it instead
+ stats.records += m_table_handler->m_mtcache_count;
+ stats.data_file_length += m_table_handler->m_mtcache_size;
+ }
+ }
+
+ if (rocksdb_debug_optimizer_n_rows > 0) {
+ stats.records = rocksdb_debug_optimizer_n_rows;
+ }
+
+ if (stats.records != 0) {
+ stats.mean_rec_length = stats.data_file_length / stats.records;
+ }
+ }
+
+ if (flag & HA_STATUS_CONST) {
+ ref_length = m_pk_descr->max_storage_fmt_length();
+
+ for (uint i = 0; i < m_tbl_def->m_key_count; i++) {
+ if (is_hidden_pk(i, table, m_tbl_def)) {
+ continue;
+ }
+ KEY *const k = &table->key_info[i];
+ for (uint j = 0; j < k->ext_key_parts; j++) {
+ const Rdb_index_stats &k_stats = m_key_descr_arr[i]->m_stats;
+ uint x;
+
+ if (k_stats.m_distinct_keys_per_prefix.size() > j &&
+ k_stats.m_distinct_keys_per_prefix[j] > 0) {
+ x = k_stats.m_rows / k_stats.m_distinct_keys_per_prefix[j];
+ /*
+ If the number of rows is less than the number of prefixes (due to
+ sampling), the average number of rows with the same prefix is 1.
+ */
+ if (x == 0) {
+ x = 1;
+ }
+ } else {
+ x = 0;
+ }
+ if (x > stats.records) x = stats.records;
+ if ((x == 0 && rocksdb_debug_optimizer_no_zero_cardinality) ||
+ rocksdb_debug_optimizer_n_rows > 0) {
+ // Fake cardinality implementation. For example, (idx1, idx2, idx3)
+ // index
+ /*
+ Make MariaRocks behave the same way as MyRocks does:
+ 1. SQL layer thinks that unique secondary indexes are not extended
+ with PK columns (both in MySQL and MariaDB)
+ 2. MariaDB also thinks that indexes with partially-covered columns
+ are not extended with PK columns. Use the same number of
+ keyparts that MyRocks would use.
+ */
+ uint ext_key_parts2;
+ if (k->flags & HA_NOSAME)
+ ext_key_parts2= k->ext_key_parts; // This is #1
+ else
+ ext_key_parts2= m_key_descr_arr[i]->get_key_parts(); // This is #2.
+
+ // will have rec_per_key for (idx1)=4, (idx1,2)=2, and (idx1,2,3)=1.
+ // rec_per_key for the whole index is 1, and multiplied by 2^n if
+ // n suffix columns of the index are not used.
+ x = 1 << (ext_key_parts2 - j - 1);
+ }
+ k->rec_per_key[j] = x;
+ }
+ }
+ }
+
+ if (flag & HA_STATUS_ERRKEY) {
+ /*
+ Currently we support only primary keys so we know which key had a
+ uniqueness violation.
+ */
+ errkey = m_dupp_errkey;
+ dup_ref = m_pk_tuple; // TODO(?): this should store packed PK.
+ }
+
+ if (flag & HA_STATUS_AUTO) {
+ stats.auto_increment_value = m_tbl_def->m_auto_incr_val;
+ }
+
+ DBUG_RETURN(HA_EXIT_SUCCESS);
+}
+
+void ha_rocksdb::position(const uchar *const record) {
+ DBUG_ENTER_FUNC();
+
+ longlong hidden_pk_id = 0;
+ if (has_hidden_pk(table) && read_hidden_pk_id_from_rowkey(&hidden_pk_id)) {
+ DBUG_ASSERT(false); // should never reach here
+ }
+
+ /*
+ Get packed primary key value from the record.
+
+ (Note: m_last_rowkey has the packed PK of last-read-row, which allows to
+ handle most cases, but there is an exception to this: when slave applies
+ RBR events, it fills the record and then calls position(); rnd_pos())
+
+ Also note that we *can* rely on PK column values being available. This is
+ because table_flags() includes HA_PRIMARY_KEY_REQUIRED_FOR_POSITION bit.
+ When that is true, table->prepare_for_position() adds PK columns into the
+ read set (this may potentially disable index-only access when PK column
+ cannot be restored from its mem-comparable form in the secondary indexes).
+ */
+ const uint packed_size = m_pk_descr->pack_record(
+ table, m_pack_buffer, record, ref, nullptr, false, hidden_pk_id);
+
+ /*
+ It could be that mem-comparable form of PK occupies less than ref_length
+ bytes. Fill the remainder with zeros.
+ */
+ if (ref_length > packed_size) {
+ memset(ref + packed_size, 0, ref_length - packed_size);
+ }
+
+ DBUG_VOID_RETURN;
+}
+
+/**
+ @return
+ HA_EXIT_SUCCESS OK
+ other HA_ERR error code (can be SE-specific)
+*/
+int ha_rocksdb::rnd_pos(uchar *const buf, uchar *const pos) {
+ DBUG_ENTER_FUNC();
+
+ int rc;
+ size_t len;
+
+#ifdef MARIAROCKS_NOT_YET
+ stats.rows_requested++;
+#endif
+ len = m_pk_descr->key_length(table,
+ rocksdb::Slice((const char *)pos, ref_length));
+ if (len == size_t(-1)) {
+ DBUG_RETURN(HA_ERR_ROCKSDB_CORRUPT_DATA); /* Data corruption? */
+ }
+
+ rc = get_row_by_rowid(buf, pos, len);
+
+ if (!rc) {
+#ifdef MARIAROCKS_NOT_YET
+ stats.rows_read++;
+#endif
+ update_row_stats(ROWS_READ);
+ }
+
+ DBUG_RETURN(rc);
+}
+
+/*
+ @brief
+ Calculate (if needed) the bitmap of indexes that are modified by the
+ current query
+
+ @detail
+ The calculation done by checking index definitions against the
+ table->write_set
+ column bitmap.
+*/
+
+void ha_rocksdb::calc_updated_indexes() {
+ if (!m_update_scope_is_valid) {
+ m_update_scope_is_valid = true;
+ m_update_scope.clear_all();
+
+ for (uint keynr = 0; keynr < table->s->keys; keynr++) {
+ const Rdb_key_def &kd = *m_key_descr_arr[keynr];
+ /* Walk over all key parts, including the "extended key" suffix */
+ const uint key_parts = kd.get_key_parts();
+ for (uint kp = 0; kp < key_parts; kp++) {
+ if (has_hidden_pk(table) && kp + 1 == key_parts) break;
+
+ Field *const field = kd.get_table_field_for_part_no(table, kp);
+ if (bitmap_is_set(table->write_set, field->field_index)) {
+ m_update_scope.set_bit(keynr);
+ break;
+ }
+ }
+ }
+ }
+}
+
+/**
+ Update an existing row
+ @param[in] old_data nullptr for write, non-null for update
+ @param[in] new_data non-null for write/update
+ @return
+ HA_EXIT_SUCCESS OK
+ other HA_ERR error code (can be SE-specific)
+*/
+int ha_rocksdb::update_row(const uchar *const old_data, uchar *const new_data) {
+ DBUG_ENTER_FUNC();
+
+ DBUG_ASSERT(old_data != nullptr);
+ DBUG_ASSERT(new_data != nullptr);
+ DBUG_ASSERT(m_lock_rows == RDB_LOCK_WRITE);
+ /*
+ old_data points to record we're updating. It is the same as the record
+ we've just read (for multi-table UPDATE, too, because SQL layer will make
+ an rnd_pos() call to re-read the record before calling update_row())
+ */
+ DBUG_ASSERT(new_data == table->record[0]);
+
+ const int rv = update_write_row(old_data, new_data, skip_unique_check());
+
+ if (rv == 0) {
+#ifdef MARIAROCKS_NOT_YET
+ stats.rows_updated++;
+#endif
+ update_row_stats(ROWS_UPDATED);
+ }
+
+ DBUG_RETURN(rv);
+}
+
+/*
+ MariaDB's temporary: MyRocks has this function in sql/handler.cc:
+*/
+
+bool can_hold_read_locks_on_select(THD *thd, thr_lock_type lock_type)
+{
+ return (lock_type == TL_READ_WITH_SHARED_LOCKS
+ || lock_type == TL_READ_NO_INSERT
+ || (lock_type != TL_IGNORE
+ && thd->lex->sql_command != SQLCOM_SELECT));
+}
+
+
+/* The following function was copied from ha_blackhole::store_lock: */
+THR_LOCK_DATA **ha_rocksdb::store_lock(THD *const thd, THR_LOCK_DATA **to,
+ enum thr_lock_type lock_type) {
+ DBUG_ENTER_FUNC();
+
+ DBUG_ASSERT(thd != nullptr);
+ DBUG_ASSERT(to != nullptr);
+
+ bool in_lock_tables = my_core::thd_in_lock_tables(thd);
+
+ /* First, make a decision about MyRocks's internal locking */
+ if (lock_type >= TL_WRITE_ALLOW_WRITE) {
+ m_lock_rows = RDB_LOCK_WRITE;
+ } else if (lock_type == TL_READ_WITH_SHARED_LOCKS) {
+ m_lock_rows = RDB_LOCK_READ;
+ } else if (lock_type != TL_IGNORE) {
+ m_lock_rows = RDB_LOCK_NONE;
+ if (THDVAR(thd, lock_scanned_rows)) {
+ /*
+ The following logic was copied directly from
+ ha_innobase::store_lock_with_x_type() in
+ storage/innobase/handler/ha_innodb.cc and causes MyRocks to leave
+ locks in place on rows that are in a table that is not being updated.
+ */
+ const uint sql_command = my_core::thd_sql_command(thd);
+ if ((lock_type == TL_READ && in_lock_tables) ||
+ (lock_type == TL_READ_HIGH_PRIORITY && in_lock_tables) ||
+ can_hold_read_locks_on_select(thd, lock_type)) {
+ ulong tx_isolation = my_core::thd_tx_isolation(thd);
+ if (sql_command != SQLCOM_CHECKSUM &&
+ ((my_core::thd_test_options(thd, OPTION_BIN_LOG) &&
+ tx_isolation > ISO_READ_COMMITTED) ||
+ tx_isolation == ISO_SERIALIZABLE ||
+ (lock_type != TL_READ && lock_type != TL_READ_NO_INSERT) ||
+ (sql_command != SQLCOM_INSERT_SELECT &&
+ sql_command != SQLCOM_REPLACE_SELECT &&
+ sql_command != SQLCOM_UPDATE && sql_command != SQLCOM_DELETE &&
+ sql_command != SQLCOM_CREATE_TABLE))) {
+ m_lock_rows = RDB_LOCK_READ;
+ }
+ }
+ }
+ }
+
+ /* Then, tell the SQL layer what kind of locking it should use: */
+ if (lock_type != TL_IGNORE && m_db_lock.type == TL_UNLOCK) {
+ /*
+ Here is where we get into the guts of a row level lock.
+ If TL_UNLOCK is set
+ If we are not doing a LOCK TABLE or DISCARD/IMPORT
+ TABLESPACE, then allow multiple writers
+ */
+
+ if ((lock_type >= TL_WRITE_CONCURRENT_INSERT && lock_type <= TL_WRITE) &&
+ !in_lock_tables && !my_core::thd_tablespace_op(thd)) {
+ lock_type = TL_WRITE_ALLOW_WRITE;
+ }
+
+ /*
+ In queries of type INSERT INTO t1 SELECT ... FROM t2 ...
+ MySQL would use the lock TL_READ_NO_INSERT on t2, and that
+ would conflict with TL_WRITE_ALLOW_WRITE, blocking all inserts
+ to t2. Convert the lock to a normal read lock to allow
+ concurrent inserts to t2.
+ */
+
+ if (lock_type == TL_READ_NO_INSERT && !in_lock_tables) {
+ lock_type = TL_READ;
+ }
+
+ m_db_lock.type = lock_type;
+ }
+
+ *to++ = &m_db_lock;
+
+ DBUG_RETURN(to);
+}
+
+void ha_rocksdb::read_thd_vars(THD *const thd) {
+ m_store_row_debug_checksums = THDVAR(thd, store_row_debug_checksums);
+ m_converter->set_verify_row_debug_checksums(
+ THDVAR(thd, verify_row_debug_checksums));
+ m_checksums_pct = THDVAR(thd, checksums_pct);
+}
+
+ulonglong ha_rocksdb::table_flags() const
+{
+ DBUG_ENTER_FUNC();
+
+ /*
+ HA_BINLOG_STMT_CAPABLE
+ Upstream: MyRocks advertises itself as it supports SBR, but has additional
+ checks in ha_rocksdb::external_lock()/ start_stmt() which will return an
+ error if one tries to run the statement.
+ Exceptions: @@rocksdb_unsafe_for_binlog or we are an SQL slave thread.
+
+ MariaDB: Inform the upper layer we don't support SBR, so it switches to RBR
+ if possible. The exceptions are the same as with the upstream.
+
+ HA_REC_NOT_IN_SEQ
+ If we don't set it, filesort crashes, because it assumes rowids are
+ 1..8 byte numbers
+ HA_PRIMARY_KEY_IN_READ_INDEX
+ This flag is always set, even for tables that:
+ - have no PK
+ - have some (or all) of PK that can't be decoded from the secondary
+ index.
+ */
+ THD *thd= ha_thd();
+ DBUG_RETURN(HA_BINLOG_ROW_CAPABLE |
+ ((thd && (THDVAR(thd, unsafe_for_binlog) ||thd->rgi_slave))?
+ HA_BINLOG_STMT_CAPABLE : 0) |
+ HA_REC_NOT_IN_SEQ | HA_CAN_INDEX_BLOBS |
+ HA_PRIMARY_KEY_IN_READ_INDEX |
+ HA_PRIMARY_KEY_REQUIRED_FOR_POSITION | HA_NULL_IN_KEY |
+ HA_PARTIAL_COLUMN_READ |
+ HA_TABLE_SCAN_ON_INDEX);
+}
+
+
+
+/**
+ @return
+ HA_EXIT_SUCCESS OK
+ other HA_ERR error code (cannot be SE-specific)
+*/
+int ha_rocksdb::external_lock(THD *const thd, int lock_type) {
+ DBUG_ENTER_FUNC();
+
+ DBUG_ASSERT(thd != nullptr);
+
+ int res = HA_EXIT_SUCCESS;
+#if 0
+ // MariaDB uses a different way to implement this, see ha_rocksdb::table_flags
+
+ int binlog_format = my_core::thd_binlog_format(thd);
+ bool unsafe_for_binlog = THDVAR(ha_thd(), unsafe_for_binlog);
+
+ /*
+ A note about (*) below: In fb/mysql, LOCK TABLE myrocks_table WRITE
+ - produces an error when @@binlog_format='ROW'.
+ - does not produce an error with @@binlog_format='STATEMENT'
+
+ We cannot easily provide this behavior in MariaDB, because LOCK ROWS
+ changes @@binlog_format for the duration of the statement.
+
+ Instead we will make the check in ha_rocksdb::start_stmt and will produce
+ an error in the first DML statement done under LOCK TABLE
+
+ (**) - The same happens for other "DDL-type" commands, so allow them here
+ as well.
+ */
+ if (lock_type == F_WRLCK && !thd->rgi_slave && !unsafe_for_binlog &&
+ binlog_format != BINLOG_FORMAT_ROW &&
+ binlog_format != BINLOG_FORMAT_UNSPEC &&
+ thd->lex->sql_command != SQLCOM_LOCK_TABLES && // (*)
+ thd->lex->sql_command != SQLCOM_ANALYZE && // (**)
+ thd->lex->sql_command != SQLCOM_OPTIMIZE && // (**)
+ thd->lex->sql_command != SQLCOM_FLUSH && // (**)
+ my_core::thd_binlog_filter_ok(thd)) {
+ my_error(ER_REQUIRE_ROW_BINLOG_FORMAT, MYF(0));
+ DBUG_RETURN(HA_ERR_UNSUPPORTED);
+ }
+#endif
+
+ if (lock_type == F_UNLCK) {
+ Rdb_transaction *const tx = get_tx_from_thd(thd);
+
+ if (tx) {
+ tx->io_perf_end_and_record(&m_io_perf);
+ tx->m_n_mysql_tables_in_use--;
+ if (tx->m_n_mysql_tables_in_use == 0 &&
+ !my_core::thd_test_options(thd,
+ OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)) {
+ /*
+ Do like InnoDB: when we get here, it's time to commit a
+ single-statement transaction.
+
+ If the statement involved multiple tables, this code will be executed
+ for each of them, but that's ok because non-first tx->commit() calls
+ will be no-ops.
+ */
+ if (tx->commit_or_rollback()) {
+ res = HA_ERR_INTERNAL_ERROR;
+ }
+ }
+ }
+ } else {
+ if (my_core::thd_tx_isolation(thd) < ISO_READ_COMMITTED ||
+ my_core::thd_tx_isolation(thd) > ISO_REPEATABLE_READ) {
+ my_error(ER_ISOLATION_MODE_NOT_SUPPORTED, MYF(0),
+ tx_isolation_names[my_core::thd_tx_isolation(thd)]);
+ DBUG_RETURN(HA_ERR_UNSUPPORTED);
+ }
+ /*
+ It's nice to do the following on start of every statement. The problem
+ is, handler->start_stmt() is not called for INSERTs.
+ So, we put this code here.
+ */
+ Rdb_transaction *const tx = get_or_create_tx(thd);
+ read_thd_vars(thd);
+
+ m_update_scope_is_valid = false;
+
+ if (skip_unique_check()) {
+ if ((thd->lex->sql_command == SQLCOM_INSERT ||
+ thd->lex->sql_command == SQLCOM_LOAD ||
+ thd->lex->sql_command == SQLCOM_REPLACE) &&
+ (thd->lex->duplicates == DUP_REPLACE ||
+ thd->lex->duplicates == DUP_UPDATE)) {
+ my_error(ER_ON_DUPLICATE_DISABLED, MYF(0), thd->query());
+ DBUG_RETURN(HA_ERR_UNSUPPORTED);
+ }
+ }
+
+ if (lock_type == F_WRLCK) {
+ if (tx->is_tx_read_only()) {
+ my_error(ER_UPDATES_WITH_CONSISTENT_SNAPSHOT, MYF(0));
+ DBUG_RETURN(HA_ERR_UNSUPPORTED);
+ }
+
+#ifdef MARIADB_NOT_YET
+ if (thd->get_explicit_snapshot()) {
+ my_error(ER_UPDATES_WITH_EXPLICIT_SNAPSHOT, MYF(0));
+ DBUG_RETURN(HA_ERR_UNSUPPORTED);
+ }
+#endif
+
+ /*
+ SQL layer signals us to take a write lock. It does so when starting DML
+ statement. We should put locks on the rows we're reading.
+
+ Note: sometimes, external_lock() can be called without a prior
+ ::store_lock call. That's why we need to set lock_* members here, too.
+ */
+ m_lock_rows = RDB_LOCK_WRITE;
+
+ if (thd->lex->sql_command == SQLCOM_CREATE_INDEX ||
+ thd->lex->sql_command == SQLCOM_DROP_INDEX ||
+ thd->lex->sql_command == SQLCOM_ALTER_TABLE) {
+ tx->m_ddl_transaction = true;
+ }
+ }
+ tx->m_n_mysql_tables_in_use++;
+ rocksdb_register_tx(rocksdb_hton, thd, tx);
+ tx->io_perf_start(&m_io_perf);
+ }
+
+ DBUG_RETURN(res);
+}
+
+/**
+ @note
+ A quote from ha_innobase::start_stmt():
+ <quote>
+ MySQL calls this function at the start of each SQL statement inside LOCK
+ TABLES. Inside LOCK TABLES the ::external_lock method does not work to
+ mark SQL statement borders.
+ </quote>
+
+ @return
+ HA_EXIT_SUCCESS OK
+*/
+
+int ha_rocksdb::start_stmt(THD *const thd, thr_lock_type lock_type) {
+ DBUG_ENTER_FUNC();
+
+ DBUG_ASSERT(thd != nullptr);
+
+ Rdb_transaction *const tx = get_or_create_tx(thd);
+ read_thd_vars(thd);
+ rocksdb_register_tx(ht, thd, tx);
+ tx->io_perf_start(&m_io_perf);
+
+ DBUG_RETURN(HA_EXIT_SUCCESS);
+}
+
+rocksdb::Range get_range(uint32_t i,
+ uchar buf[Rdb_key_def::INDEX_NUMBER_SIZE * 2],
+ int offset1, int offset2) {
+ uchar *buf_begin = buf;
+ uchar *buf_end = buf + Rdb_key_def::INDEX_NUMBER_SIZE;
+ rdb_netbuf_store_index(buf_begin, i + offset1);
+ rdb_netbuf_store_index(buf_end, i + offset2);
+
+ return rocksdb::Range(
+ rocksdb::Slice((const char *)buf_begin, Rdb_key_def::INDEX_NUMBER_SIZE),
+ rocksdb::Slice((const char *)buf_end, Rdb_key_def::INDEX_NUMBER_SIZE));
+}
+
+static rocksdb::Range get_range(const Rdb_key_def &kd,
+ uchar buf[Rdb_key_def::INDEX_NUMBER_SIZE * 2],
+ int offset1, int offset2) {
+ return get_range(kd.get_index_number(), buf, offset1, offset2);
+}
+
+rocksdb::Range get_range(const Rdb_key_def &kd,
+ uchar buf[Rdb_key_def::INDEX_NUMBER_SIZE * 2]) {
+ if (kd.m_is_reverse_cf) {
+ return myrocks::get_range(kd, buf, 1, 0);
+ } else {
+ return myrocks::get_range(kd, buf, 0, 1);
+ }
+}
+
+rocksdb::Range ha_rocksdb::get_range(
+ const int i, uchar buf[Rdb_key_def::INDEX_NUMBER_SIZE * 2]) const {
+ return myrocks::get_range(*m_key_descr_arr[i], buf);
+}
+
+/*
+ This function is called with total_order_seek=true, but
+ upper/lower bound setting is not necessary.
+ Boundary set is useful when there is no matching key,
+ but in drop_index_thread's case, it means index is marked as removed,
+ so no further seek will happen for the index id.
+*/
+static bool is_myrocks_index_empty(rocksdb::ColumnFamilyHandle *cfh,
+ const bool is_reverse_cf,
+ const rocksdb::ReadOptions &read_opts,
+ const uint index_id) {
+ bool index_removed = false;
+ uchar key_buf[Rdb_key_def::INDEX_NUMBER_SIZE] = {0};
+ rdb_netbuf_store_uint32(key_buf, index_id);
+ const rocksdb::Slice key =
+ rocksdb::Slice(reinterpret_cast<char *>(key_buf), sizeof(key_buf));
+ std::unique_ptr<rocksdb::Iterator> it(rdb->NewIterator(read_opts, cfh));
+ rocksdb_smart_seek(is_reverse_cf, it.get(), key);
+ if (!it->Valid()) {
+ index_removed = true;
+ } else {
+ if (memcmp(it->key().data(), key_buf, Rdb_key_def::INDEX_NUMBER_SIZE)) {
+ // Key does not have same prefix
+ index_removed = true;
+ }
+ }
+ return index_removed;
+}
+
+/*
+ Drop index thread's main logic
+*/
+
+void Rdb_drop_index_thread::run() {
+ RDB_MUTEX_LOCK_CHECK(m_signal_mutex);
+
+ for (;;) {
+ // The stop flag might be set by shutdown command
+ // after drop_index_thread releases signal_mutex
+ // (i.e. while executing expensive Seek()). To prevent drop_index_thread
+ // from entering long cond_timedwait, checking if stop flag
+ // is true or not is needed, with drop_index_interrupt_mutex held.
+ if (m_stop) {
+ break;
+ }
+
+ timespec ts;
+ int sec= dict_manager.is_drop_index_empty()
+ ? 24 * 60 * 60 // no filtering
+ : 60; // filtering
+ set_timespec(ts,sec);
+
+ const auto ret MY_ATTRIBUTE((__unused__)) =
+ mysql_cond_timedwait(&m_signal_cond, &m_signal_mutex, &ts);
+ if (m_stop) {
+ break;
+ }
+ // make sure, no program error is returned
+ DBUG_ASSERT(ret == 0 || ret == ETIMEDOUT);
+ RDB_MUTEX_UNLOCK_CHECK(m_signal_mutex);
+
+ std::unordered_set<GL_INDEX_ID> indices;
+ dict_manager.get_ongoing_drop_indexes(&indices);
+ if (!indices.empty()) {
+ std::unordered_set<GL_INDEX_ID> finished;
+ rocksdb::ReadOptions read_opts;
+ read_opts.total_order_seek = true; // disable bloom filter
+
+ for (const auto d : indices) {
+ uint32 cf_flags = 0;
+ if (!dict_manager.get_cf_flags(d.cf_id, &cf_flags)) {
+ // NO_LINT_DEBUG
+ sql_print_error(
+ "RocksDB: Failed to get column family flags "
+ "from cf id %u. MyRocks data dictionary may "
+ "get corrupted.",
+ d.cf_id);
+ abort();
+ }
+ rocksdb::ColumnFamilyHandle *cfh = cf_manager.get_cf(d.cf_id);
+ DBUG_ASSERT(cfh);
+ const bool is_reverse_cf = cf_flags & Rdb_key_def::REVERSE_CF_FLAG;
+
+ uchar buf[Rdb_key_def::INDEX_NUMBER_SIZE * 2];
+ rocksdb::Range range = get_range(d.index_id, buf, is_reverse_cf ? 1 : 0,
+ is_reverse_cf ? 0 : 1);
+ rocksdb::Status status = DeleteFilesInRange(rdb->GetBaseDB(), cfh,
+ &range.start, &range.limit);
+ if (!status.ok()) {
+ if (status.IsShutdownInProgress()) {
+ break;
+ }
+ rdb_handle_io_error(status, RDB_IO_ERROR_BG_THREAD);
+ }
+ status = rdb->CompactRange(getCompactRangeOptions(), cfh, &range.start,
+ &range.limit);
+ if (!status.ok()) {
+ if (status.IsShutdownInProgress()) {
+ break;
+ }
+ rdb_handle_io_error(status, RDB_IO_ERROR_BG_THREAD);
+ }
+ if (is_myrocks_index_empty(cfh, is_reverse_cf, read_opts, d.index_id)) {
+ finished.insert(d);
+ }
+ }
+
+ if (!finished.empty()) {
+ dict_manager.finish_drop_indexes(finished);
+ }
+ }
+ RDB_MUTEX_LOCK_CHECK(m_signal_mutex);
+ }
+
+ RDB_MUTEX_UNLOCK_CHECK(m_signal_mutex);
+}
+
+Rdb_tbl_def *ha_rocksdb::get_table_if_exists(const char *const tablename) {
+ std::string str;
+ if (rdb_normalize_tablename(tablename, &str) != HA_EXIT_SUCCESS) {
+ // We were not passed table name?
+ DBUG_ASSERT(0);
+ return nullptr;
+ }
+
+ return ddl_manager.find(str);
+}
+
+/*
+ Overload func for delete table ---it deletes table meta data in data
+ dictionary immediately and delete real data in background thread(async)
+
+ @param tbl IN MyRocks table definition
+
+ @return
+ HA_EXIT_SUCCESS OK
+ other HA_ERR error code (can be SE-specific)
+*/
+int ha_rocksdb::delete_table(Rdb_tbl_def *const tbl) {
+ DBUG_ENTER_FUNC();
+
+ DBUG_ASSERT(tbl != nullptr);
+ DBUG_ASSERT(m_tbl_def == nullptr || m_tbl_def == tbl);
+
+ const std::unique_ptr<rocksdb::WriteBatch> wb = dict_manager.begin();
+ rocksdb::WriteBatch *const batch = wb.get();
+
+ dict_manager.add_drop_table(tbl->m_key_descr_arr, tbl->m_key_count, batch);
+
+ /*
+ Remove the table entry in data dictionary (this will also remove it from
+ the persistent data dictionary).
+ */
+ ddl_manager.remove(tbl, batch, true);
+
+ int err = dict_manager.commit(batch);
+ if (err) {
+ DBUG_RETURN(err);
+ }
+
+ rdb_drop_idx_thread.signal();
+ // avoid dangling pointer
+ m_tbl_def = nullptr;
+ DBUG_RETURN(HA_EXIT_SUCCESS);
+}
+
+/*
+ Note: the following function is called when the table is not open. That is,
+ this->table==nullptr, pk_key_descr==nullptr, etc.
+
+ tablename points to line in form "./dbname/tablename".
+
+ @return
+ HA_EXIT_SUCCESS OK
+ other HA_ERR error code (can be SE-specific)
+*/
+
+int ha_rocksdb::delete_table(const char *const tablename) {
+ DBUG_ENTER_FUNC();
+
+ DBUG_ASSERT(tablename != nullptr);
+
+ /* Find the table in the hash */
+ Rdb_tbl_def *const tbl = get_table_if_exists(tablename);
+ if (!tbl) {
+ DBUG_RETURN(HA_ERR_NO_SUCH_TABLE);
+ }
+
+ DBUG_RETURN(delete_table(tbl));
+}
+
+int ha_rocksdb::remove_rows(Rdb_tbl_def *const tbl) {
+ const rocksdb::WriteOptions wo =
+ rdb_get_rocksdb_write_options(handler::ha_thd());
+
+ rocksdb::ReadOptions opts;
+ opts.total_order_seek = true;
+ Rdb_transaction *const tx = get_or_create_tx(table->in_use);
+
+ char key_buf[MAX_KEY_LENGTH];
+ uint key_len;
+ ulonglong bytes_written = 0;
+
+ uchar lower_bound_buf[Rdb_key_def::INDEX_NUMBER_SIZE];
+ uchar upper_bound_buf[Rdb_key_def::INDEX_NUMBER_SIZE];
+ rocksdb::Slice lower_bound_slice;
+ rocksdb::Slice upper_bound_slice;
+
+ /*
+ Remove all records in each index.
+ (This is is not crash-safe, but it doesn't matter, because bulk row
+ deletion will be handled on rocksdb side)
+ */
+ for (uint i = 0; i < tbl->m_key_count; i++) {
+ const Rdb_key_def &kd = *tbl->m_key_descr_arr[i];
+ kd.get_infimum_key(reinterpret_cast<uchar *>(key_buf), &key_len);
+ rocksdb::ColumnFamilyHandle *cf = kd.get_cf();
+ const rocksdb::Slice table_key(key_buf, key_len);
+ setup_iterator_bounds(kd, table_key, Rdb_key_def::INDEX_NUMBER_SIZE,
+ lower_bound_buf, upper_bound_buf, &lower_bound_slice,
+ &upper_bound_slice);
+ DBUG_ASSERT(key_len == Rdb_key_def::INDEX_NUMBER_SIZE);
+ opts.iterate_lower_bound = &lower_bound_slice;
+ opts.iterate_upper_bound = &upper_bound_slice;
+ std::unique_ptr<rocksdb::Iterator> it(rdb->NewIterator(opts, cf));
+
+ it->Seek(table_key);
+ while (it->Valid()) {
+ const rocksdb::Slice key = it->key();
+ if (!kd.covers_key(key)) {
+ break;
+ }
+
+ rocksdb::Status s;
+ if (can_use_single_delete(i)) {
+ s = rdb->SingleDelete(wo, cf, key);
+ } else {
+ s = rdb->Delete(wo, cf, key);
+ }
+
+ if (!s.ok()) {
+ return tx->set_status_error(table->in_use, s, *m_pk_descr, m_tbl_def,
+ m_table_handler);
+ }
+ bytes_written += key.size();
+ it->Next();
+ }
+ }
+
+ tx->update_bytes_written(bytes_written);
+
+ return HA_EXIT_SUCCESS;
+}
+
+/**
+ @return
+ HA_EXIT_SUCCESS OK
+ other HA_ERR error code (cannot be SE-specific)
+*/
+int ha_rocksdb::rename_table(const char *const from, const char *const to) {
+ DBUG_ENTER_FUNC();
+
+ std::string from_str;
+ std::string to_str;
+ std::string from_db;
+ std::string to_db;
+
+ int rc = rdb_normalize_tablename(from, &from_str);
+ if (rc != HA_EXIT_SUCCESS) {
+ DBUG_RETURN(rc);
+ }
+
+ rc = rdb_split_normalized_tablename(from_str, &from_db);
+ if (rc != HA_EXIT_SUCCESS) {
+ DBUG_RETURN(rc);
+ }
+
+ rc = rdb_normalize_tablename(to, &to_str);
+ if (rc != HA_EXIT_SUCCESS) {
+ DBUG_RETURN(rc);
+ }
+
+ rc = rdb_split_normalized_tablename(to_str, &to_db);
+ if (rc != HA_EXIT_SUCCESS) {
+ DBUG_RETURN(rc);
+ }
+
+ // If the user changed the database part of the name then validate that the
+ // 'to' database exists.
+ if (from_db != to_db && !rdb_database_exists(to_db)) {
+ // If we return a RocksDB specific error code here we get
+ // "error: 206 - Unknown error 206". InnoDB gets
+ // "error -1 - Unknown error -1" so let's match them.
+ DBUG_RETURN(-1);
+ }
+
+ DBUG_EXECUTE_IF("gen_sql_table_name", to_str = to_str + "#sql-test";);
+
+ const std::unique_ptr<rocksdb::WriteBatch> wb = dict_manager.begin();
+ rocksdb::WriteBatch *const batch = wb.get();
+ dict_manager.lock();
+
+ if (ddl_manager.rename(from_str, to_str, batch)) {
+ rc = HA_ERR_NO_SUCH_TABLE;
+ } else {
+ rc = dict_manager.commit(batch);
+ }
+ dict_manager.unlock();
+
+ DBUG_RETURN(rc);
+}
+
+/**
+ check_if_incompatible_data() called if ALTER TABLE can't detect otherwise
+ if new and old definition are compatible
+
+ @details If there are no other explicit signs like changed number of
+ fields this function will be called by compare_tables()
+ (sql/sql_tables.cc) to decide should we rewrite whole table or only .frm
+ file.
+
+*/
+
+bool ha_rocksdb::check_if_incompatible_data(HA_CREATE_INFO *const info,
+ uint table_changes) {
+ DBUG_ENTER_FUNC();
+
+ DBUG_ASSERT(info != nullptr);
+
+ // this function is needed only for online alter-table
+ DBUG_RETURN(COMPATIBLE_DATA_NO);
+}
+
+/**
+ @return
+ HA_EXIT_SUCCESS OK
+*/
+int ha_rocksdb::extra(enum ha_extra_function operation) {
+ DBUG_ENTER_FUNC();
+
+ switch (operation) {
+ case HA_EXTRA_KEYREAD:
+ m_keyread_only = true;
+ break;
+ case HA_EXTRA_NO_KEYREAD:
+ m_keyread_only = false;
+ break;
+ case HA_EXTRA_FLUSH:
+ /*
+ If the table has blobs, then they are part of m_retrieved_record.
+ This call invalidates them.
+ */
+ m_retrieved_record.Reset();
+ break;
+ case HA_EXTRA_INSERT_WITH_UPDATE:
+ // INSERT ON DUPLICATE KEY UPDATE
+ if (rocksdb_enable_insert_with_update_caching) {
+ m_insert_with_update = true;
+ }
+ break;
+ case HA_EXTRA_NO_IGNORE_DUP_KEY:
+ // PAIRED with HA_EXTRA_INSERT_WITH_UPDATE or HA_EXTRA_WRITE_CAN_REPLACE
+ // that indicates the end of REPLACE / INSERT ON DUPLICATE KEY
+ m_insert_with_update = false;
+ break;
+
+ default:
+ break;
+ }
+
+ DBUG_RETURN(HA_EXIT_SUCCESS);
+}
+
+/*
+ Given a starting key and an ending key, estimate the number of rows that
+ will exist between the two keys.
+*/
+ha_rows ha_rocksdb::records_in_range(uint inx, key_range *const min_key,
+ key_range *const max_key) {
+ DBUG_ENTER_FUNC();
+
+ ha_rows ret = THDVAR(ha_thd(), records_in_range);
+ if (ret) {
+ DBUG_RETURN(ret);
+ }
+ if (table->force_index) {
+ const ha_rows force_rows = THDVAR(ha_thd(), force_index_records_in_range);
+ if (force_rows) {
+ DBUG_RETURN(force_rows);
+ }
+ }
+
+ const Rdb_key_def &kd = *m_key_descr_arr[inx];
+
+ uint size1 = 0;
+ if (min_key) {
+ size1 = kd.pack_index_tuple(table, m_pack_buffer, m_sk_packed_tuple,
+ min_key->key, min_key->keypart_map);
+ if (min_key->flag == HA_READ_PREFIX_LAST_OR_PREV ||
+ min_key->flag == HA_READ_PREFIX_LAST ||
+ min_key->flag == HA_READ_AFTER_KEY) {
+ kd.successor(m_sk_packed_tuple, size1);
+ }
+ } else {
+ kd.get_infimum_key(m_sk_packed_tuple, &size1);
+ }
+
+ uint size2 = 0;
+ if (max_key) {
+ size2 = kd.pack_index_tuple(table, m_pack_buffer, m_sk_packed_tuple_old,
+ max_key->key, max_key->keypart_map);
+ if (max_key->flag == HA_READ_PREFIX_LAST_OR_PREV ||
+ max_key->flag == HA_READ_PREFIX_LAST ||
+ max_key->flag == HA_READ_AFTER_KEY) {
+ kd.successor(m_sk_packed_tuple_old, size2);
+ }
+ // pad the upper key with FFFFs to make sure it is more than the lower
+ if (size1 > size2) {
+ memset(m_sk_packed_tuple_old + size2, 0xff, size1 - size2);
+ size2 = size1;
+ }
+ } else {
+ kd.get_supremum_key(m_sk_packed_tuple_old, &size2);
+ }
+
+ const rocksdb::Slice slice1((const char *)m_sk_packed_tuple, size1);
+ const rocksdb::Slice slice2((const char *)m_sk_packed_tuple_old, size2);
+
+ // slice1 >= slice2 means no row will match
+ if (slice1.compare(slice2) >= 0) {
+ DBUG_RETURN(HA_EXIT_SUCCESS);
+ }
+
+ rocksdb::Range r(kd.m_is_reverse_cf ? slice2 : slice1,
+ kd.m_is_reverse_cf ? slice1 : slice2);
+
+ uint64_t sz = 0;
+ auto disk_size = kd.m_stats.m_actual_disk_size;
+ if (disk_size == 0) disk_size = kd.m_stats.m_data_size;
+ auto rows = kd.m_stats.m_rows;
+ if (rows == 0 || disk_size == 0) {
+ rows = 1;
+ disk_size = ROCKSDB_ASSUMED_KEY_VALUE_DISK_SIZE;
+ }
+
+ // Getting statistics, including from Memtables
+ uint8_t include_flags = rocksdb::DB::INCLUDE_FILES;
+ rdb->GetApproximateSizes(kd.get_cf(), &r, 1, &sz, include_flags);
+ ret = rows * sz / disk_size;
+ uint64_t memTableCount;
+ rdb->GetApproximateMemTableStats(kd.get_cf(), r, &memTableCount, &sz);
+ ret += memTableCount;
+
+ /*
+ GetApproximateSizes() gives estimates so ret might exceed stats.records.
+ MySQL then decides to use full index scan rather than range scan, which
+ is not efficient for most cases.
+ To prevent this, changing estimated records slightly smaller than
+ stats.records.
+ */
+ if (ret >= stats.records) {
+ ret = stats.records * 0.99;
+ }
+
+ if (rocksdb_debug_optimizer_n_rows > 0) {
+ ret = rocksdb_debug_optimizer_n_rows;
+ } else if (ret == 0) {
+ ret = 1;
+ }
+
+ DBUG_RETURN(ret);
+}
+
+void ha_rocksdb::update_create_info(HA_CREATE_INFO *const create_info) {
+ DBUG_ENTER_FUNC();
+
+ DBUG_ASSERT(create_info != nullptr);
+
+ if (!(create_info->used_fields & HA_CREATE_USED_AUTO)) {
+ create_info->auto_increment_value = m_tbl_def->m_auto_incr_val;
+ }
+
+ DBUG_VOID_RETURN;
+}
+
+/**
+ @brief
+ Doing manual compaction on OPTIMIZE TABLE in RocksDB.
+ Compaction itself is executed by background thread in RocksDB, but
+ CompactRange() waits until compaction completes so this function
+ may take a long time.
+ Since RocksDB dataset is allocated per index id, OPTIMIZE TABLE
+ triggers manual compaction for all indexes of the table.
+ @details
+ Compaction range is from the beginning of the index id to
+ the first row of the next index id. When using reverse order
+ column family, the first row of the next index id should be
+ the last row of the previous index id.
+
+ @return
+ HA_ADMIN_OK OK
+ other HA_ADMIN error code
+*/
+int ha_rocksdb::optimize(THD *const thd, HA_CHECK_OPT *const check_opt) {
+ DBUG_ENTER_FUNC();
+
+ DBUG_ASSERT(thd != nullptr);
+ DBUG_ASSERT(check_opt != nullptr);
+
+ for (uint i = 0; i < table->s->keys; i++) {
+ uchar buf[Rdb_key_def::INDEX_NUMBER_SIZE * 2];
+ auto range = get_range(i, buf);
+ const rocksdb::Status s = rdb->CompactRange(getCompactRangeOptions(),
+ m_key_descr_arr[i]->get_cf(),
+ &range.start, &range.limit);
+ if (!s.ok()) {
+ DBUG_RETURN(rdb_error_to_mysql(s));
+ }
+ }
+
+ DBUG_RETURN(HA_EXIT_SUCCESS);
+}
+
+static int calculate_stats(
+ const std::unordered_map<GL_INDEX_ID, std::shared_ptr<const Rdb_key_def>>
+ &to_recalc,
+ bool include_memtables) {
+ DBUG_ENTER_FUNC();
+
+ // find per column family key ranges which need to be queried
+ std::unordered_map<rocksdb::ColumnFamilyHandle *, std::vector<rocksdb::Range>>
+ ranges;
+ std::unordered_map<GL_INDEX_ID, Rdb_index_stats> stats;
+ std::vector<uchar> buf(to_recalc.size() * 2 * Rdb_key_def::INDEX_NUMBER_SIZE);
+
+ uchar *bufp = buf.data();
+ for (const auto &it : to_recalc) {
+ const GL_INDEX_ID index_id = it.first;
+ auto &kd = it.second;
+ ranges[kd->get_cf()].push_back(myrocks::get_range(*kd, bufp));
+ bufp += 2 * Rdb_key_def::INDEX_NUMBER_SIZE;
+
+ stats[index_id] = Rdb_index_stats(index_id);
+ DBUG_ASSERT(kd->get_key_parts() > 0);
+ stats[index_id].m_distinct_keys_per_prefix.resize(kd->get_key_parts());
+ }
+
+ // get RocksDB table properties for these ranges
+ rocksdb::TablePropertiesCollection props;
+ for (const auto &it : ranges) {
+ const auto old_size MY_ATTRIBUTE((__unused__)) = props.size();
+ const auto status = rdb->GetPropertiesOfTablesInRange(
+ it.first, &it.second[0], it.second.size(), &props);
+ DBUG_ASSERT(props.size() >= old_size);
+ if (!status.ok()) {
+ DBUG_RETURN(ha_rocksdb::rdb_error_to_mysql(
+ status, "Could not access RocksDB properties"));
+ }
+ }
+
+ int num_sst = 0;
+ for (const auto &it : props) {
+ std::vector<Rdb_index_stats> sst_stats;
+ Rdb_tbl_prop_coll::read_stats_from_tbl_props(it.second, &sst_stats);
+ /*
+ sst_stats is a list of index statistics for indexes that have entries
+ in the current SST file.
+ */
+ for (const auto &it1 : sst_stats) {
+ /*
+ Only update statistics for indexes that belong to this SQL table.
+
+ The reason is: We are walking through all SST files that have
+ entries from this table (and so can compute good statistics). For
+ other SQL tables, it can be that we're only seeing a small fraction
+ of table's entries (and so we can't update statistics based on that).
+ */
+ if (stats.find(it1.m_gl_index_id) == stats.end()) {
+ continue;
+ }
+
+ auto it_index = to_recalc.find(it1.m_gl_index_id);
+ DBUG_ASSERT(it_index != to_recalc.end());
+ if (it_index == to_recalc.end()) {
+ continue;
+ }
+ stats[it1.m_gl_index_id].merge(
+ it1, true, it_index->second->max_storage_fmt_length());
+ }
+ num_sst++;
+ }
+
+ if (include_memtables) {
+ // calculate memtable cardinality
+ Rdb_tbl_card_coll cardinality_collector(rocksdb_table_stats_sampling_pct);
+ auto read_opts = rocksdb::ReadOptions();
+ read_opts.read_tier = rocksdb::ReadTier::kMemtableTier;
+ for (const auto &it_kd : to_recalc) {
+ const std::shared_ptr<const Rdb_key_def> &kd = it_kd.second;
+ Rdb_index_stats &stat = stats[kd->get_gl_index_id()];
+
+ uchar r_buf[Rdb_key_def::INDEX_NUMBER_SIZE * 2];
+ auto r = myrocks::get_range(*kd, r_buf);
+ uint64_t memtableCount;
+ uint64_t memtableSize;
+ rdb->GetApproximateMemTableStats(kd->get_cf(), r, &memtableCount,
+ &memtableSize);
+ if (memtableCount < (uint64_t)stat.m_rows / 10) {
+ // skip tables that already have enough stats from SST files to reduce
+ // overhead and avoid degradation of big tables stats by sampling from
+ // relatively tiny (less than 10% of full data set) memtable dataset
+ continue;
+ }
+
+ std::unique_ptr<rocksdb::Iterator> it =
+ std::unique_ptr<rocksdb::Iterator>(
+ rdb->NewIterator(read_opts, kd->get_cf()));
+
+ rocksdb::Slice first_index_key((const char *)r_buf,
+ Rdb_key_def::INDEX_NUMBER_SIZE);
+
+ cardinality_collector.Reset();
+ for (it->Seek(first_index_key); is_valid(it.get()); it->Next()) {
+ const rocksdb::Slice key = it->key();
+ if (!kd->covers_key(key)) {
+ break; // end of this index
+ }
+ stat.m_rows++;
+
+ cardinality_collector.ProcessKey(key, kd.get(), &stat);
+ }
+ cardinality_collector.AdjustStats(&stat);
+ }
+ }
+
+ // set and persist new stats
+ ddl_manager.set_stats(stats);
+ ddl_manager.persist_stats(true);
+
+ DBUG_RETURN(HA_EXIT_SUCCESS);
+}
+
+int ha_rocksdb::calculate_stats_for_table() {
+ DBUG_ENTER_FUNC();
+
+ std::unordered_map<GL_INDEX_ID, std::shared_ptr<const Rdb_key_def>>
+ ids_to_check;
+ for (uint i = 0; i < table->s->keys; i++) {
+ ids_to_check.insert(std::make_pair(m_key_descr_arr[i]->get_gl_index_id(),
+ m_key_descr_arr[i]));
+ }
+
+ DBUG_RETURN(calculate_stats(ids_to_check, true));
+}
+
+/*
+ @return
+ HA_ADMIN_OK OK
+ other HA_ADMIN error code
+*/
+int ha_rocksdb::analyze(THD *const thd, HA_CHECK_OPT *const check_opt) {
+ DBUG_ENTER_FUNC();
+
+ if (table) {
+ if (calculate_stats_for_table() != HA_EXIT_SUCCESS) {
+ DBUG_RETURN(HA_ADMIN_FAILED);
+ }
+ }
+
+ // A call to ::info is needed to repopulate some SQL level structs. This is
+ // necessary for online analyze because we cannot rely on another ::open
+ // call to call info for us.
+ if (info(HA_STATUS_CONST | HA_STATUS_VARIABLE) != HA_EXIT_SUCCESS) {
+ DBUG_RETURN(HA_ADMIN_FAILED);
+ }
+
+ DBUG_RETURN(HA_ADMIN_OK);
+}
+
+void ha_rocksdb::get_auto_increment(ulonglong off, ulonglong inc,
+ ulonglong nb_desired_values,
+ ulonglong *const first_value,
+ ulonglong *const nb_reserved_values) {
+ /*
+ MySQL has a somewhat complicated way of handling the auto-increment value.
+ The first time get_auto_increment is called for a statement,
+ nb_desired_values is the estimate for how many values will be needed. The
+ engine can then reserve some values, and those will be automatically used
+ by MySQL, until a hard-coded value shows up in the insert statement, after
+ which MySQL again calls this function to reset its starting value.
+ *
+ For simplicity we will just ignore nb_desired_values - we aren't going to
+ reserve any extra values for a multi-insert statement. Each row will
+ simply acquire the next value as needed and we will always tell MySQL that
+ we only reserved 1 value. Since we are using an atomic value for
+ m_auto_incr_val this should be safe - if we had to grab a mutex, doing
+ an actual reserve of some values might be a better solution.
+ */
+ DEBUG_SYNC(ha_thd(), "rocksdb.autoinc_vars");
+ DEBUG_SYNC(ha_thd(), "rocksdb.autoinc_vars2");
+
+ if (off > inc) {
+ off = 1;
+ }
+
+ Field *field;
+ ulonglong new_val, max_val;
+ field = table->key_info[table->s->next_number_index].key_part[0].field;
+ max_val = rdb_get_int_col_max_value(field);
+
+ // Local variable reference to simplify code below
+ auto &auto_incr = m_tbl_def->m_auto_incr_val;
+
+ if (inc == 1) {
+ DBUG_ASSERT(off == 1);
+ // Optimization for the standard case where we are always simply
+ // incrementing from the last position
+
+ // Use CAS operation in a loop to make sure automically get the next auto
+ // increment value while ensuring that we don't wrap around to a negative
+ // number.
+ //
+ // We set auto_incr to the min of max_val and new_val + 1. This means that
+ // if we're at the maximum, we should be returning the same value for
+ // multiple rows, resulting in duplicate key errors (as expected).
+ //
+ // If we return values greater than the max, the SQL layer will "truncate"
+ // the value anyway, but it means that we store invalid values into
+ // auto_incr that will be visible in SHOW CREATE TABLE.
+ new_val = auto_incr;
+ while (new_val != std::numeric_limits<ulonglong>::max()) {
+ if (auto_incr.compare_exchange_weak(new_val,
+ std::min(new_val + 1, max_val))) {
+ break;
+ }
+ }
+ } else {
+ // The next value can be more complicated if either 'inc' or 'off' is not 1
+ ulonglong last_val = auto_incr;
+
+ if (last_val > max_val) {
+ new_val = std::numeric_limits<ulonglong>::max();
+ } else {
+ // Loop until we can correctly update the atomic value
+ do {
+ DBUG_ASSERT(last_val > 0);
+ // Calculate the next value in the auto increment series: offset
+ // + N * increment where N is 0, 1, 2, ...
+ //
+ // For further information please visit:
+ // http://dev.mysql.com/doc/refman/5.7/en/replication-options-master.html
+ //
+ // The following is confusing so here is an explanation:
+ // To get the next number in the sequence above you subtract out the
+ // offset, calculate the next sequence (N * increment) and then add the
+ // offset back in.
+ //
+ // The additions are rearranged to avoid overflow. The following is
+ // equivalent to (last_val - 1 + inc - off) / inc. This uses the fact
+ // that (a+b)/c = a/c + b/c + (a%c + b%c)/c. To show why:
+ //
+ // (a+b)/c
+ // = (a - a%c + a%c + b - b%c + b%c) / c
+ // = (a - a%c) / c + (b - b%c) / c + (a%c + b%c) / c
+ // = a/c + b/c + (a%c + b%c) / c
+ //
+ // Now, substitute a = last_val - 1, b = inc - off, c = inc to get the
+ // following statement.
+ ulonglong n =
+ (last_val - 1) / inc + ((last_val - 1) % inc + inc - off) / inc;
+
+ // Check if n * inc + off will overflow. This can only happen if we have
+ // an UNSIGNED BIGINT field.
+ if (n > (std::numeric_limits<ulonglong>::max() - off) / inc) {
+ DBUG_ASSERT(max_val == std::numeric_limits<ulonglong>::max());
+ // The 'last_val' value is already equal to or larger than the largest
+ // value in the sequence. Continuing would wrap around (technically
+ // the behavior would be undefined). What should we do?
+ // We could:
+ // 1) set the new value to the last possible number in our sequence
+ // as described above. The problem with this is that this
+ // number could be smaller than a value in an existing row.
+ // 2) set the new value to the largest possible number. This number
+ // may not be in our sequence, but it is guaranteed to be equal
+ // to or larger than any other value already inserted.
+ //
+ // For now I'm going to take option 2.
+ //
+ // Returning ULLONG_MAX from get_auto_increment will cause the SQL
+ // layer to fail with ER_AUTOINC_READ_FAILED. This means that due to
+ // the SE API for get_auto_increment, inserts will fail with
+ // ER_AUTOINC_READ_FAILED if the column is UNSIGNED BIGINT, but
+ // inserts will fail with ER_DUP_ENTRY for other types (or no failure
+ // if the column is in a non-unique SK).
+ new_val = std::numeric_limits<ulonglong>::max();
+ auto_incr = new_val; // Store the largest value into auto_incr
+ break;
+ }
+
+ new_val = n * inc + off;
+
+ // Attempt to store the new value (plus 1 since m_auto_incr_val contains
+ // the next available value) into the atomic value. If the current
+ // value no longer matches what we have in 'last_val' this will fail and
+ // we will repeat the loop (`last_val` will automatically get updated
+ // with the current value).
+ //
+ // See above explanation for inc == 1 for why we use std::min.
+ } while (!auto_incr.compare_exchange_weak(
+ last_val, std::min(new_val + 1, max_val)));
+ }
+ }
+
+ *first_value = new_val;
+ *nb_reserved_values = 1;
+}
+
+#ifndef DBUG_OFF
+
+/* Debugger help function */
+static char dbug_item_print_buf[512];
+
+const char *dbug_print_item(Item *const item) {
+ char *const buf = dbug_item_print_buf;
+ String str(buf, sizeof(dbug_item_print_buf), &my_charset_bin);
+ str.length(0);
+ if (!item) return "(Item*)nullptr";
+ item->print(&str, QT_ORDINARY);
+ if (str.c_ptr() == buf) {
+ return buf;
+ } else {
+ return "Couldn't fit into buffer";
+ }
+}
+
+#endif /*DBUG_OFF*/
+
+/**
+ SQL layer calls this function to push an index condition.
+
+ @details
+ The condition is for index keyno (only one condition can be pushed at a
+ time).
+ The caller guarantees that condition refers only to index fields; besides
+ that, fields must have
+
+ $field->part_of_key.set_bit(keyno)
+
+ which means that
+
+ (handler->index_flags(keyno, $keypart, 0) & HA_KEYREAD_ONLY) == 1
+
+ which means that field value can be restored from the index tuple.
+
+ @return
+ Part of condition we couldn't check (always nullptr).
+*/
+
+class Item *ha_rocksdb::idx_cond_push(uint keyno, class Item *const idx_cond) {
+ DBUG_ENTER_FUNC();
+
+ DBUG_ASSERT(keyno != MAX_KEY);
+ DBUG_ASSERT(idx_cond != nullptr);
+
+ pushed_idx_cond = idx_cond;
+ pushed_idx_cond_keyno = keyno;
+ in_range_check_pushed_down = TRUE;
+
+ /* We will check the whole condition */
+ DBUG_RETURN(nullptr);
+}
+
+/*
+ Checks if inplace alter is supported for a given operation.
+*/
+
+my_core::enum_alter_inplace_result ha_rocksdb::check_if_supported_inplace_alter(
+ TABLE *altered_table, my_core::Alter_inplace_info *const ha_alter_info) {
+ DBUG_ENTER_FUNC();
+
+ DBUG_ASSERT(ha_alter_info != nullptr);
+
+ if (ha_alter_info->handler_flags &
+ ~(my_core::Alter_inplace_info::DROP_INDEX |
+ my_core::Alter_inplace_info::DROP_UNIQUE_INDEX |
+ my_core::Alter_inplace_info::ADD_INDEX |
+ my_core::Alter_inplace_info::ALTER_PARTITIONED |
+ my_core::Alter_inplace_info::ADD_UNIQUE_INDEX |
+ my_core::Alter_inplace_info::CHANGE_CREATE_OPTION)) {
+ DBUG_RETURN(my_core::HA_ALTER_INPLACE_NOT_SUPPORTED);
+ }
+
+ /* We don't support unique keys on table w/ no primary keys */
+ if ((ha_alter_info->handler_flags &
+ my_core::Alter_inplace_info::ADD_UNIQUE_INDEX) &&
+ has_hidden_pk(altered_table)) {
+ DBUG_RETURN(my_core::HA_ALTER_INPLACE_NOT_SUPPORTED);
+ }
+
+ /* We only support changing auto_increment for table options. */
+ if ((ha_alter_info->handler_flags &
+ my_core::Alter_inplace_info::CHANGE_CREATE_OPTION) &&
+ !(ha_alter_info->create_info->used_fields & HA_CREATE_USED_AUTO)) {
+ DBUG_RETURN(my_core::HA_ALTER_INPLACE_NOT_SUPPORTED);
+ }
+
+ DBUG_RETURN(my_core::HA_ALTER_INPLACE_SHARED_LOCK_AFTER_PREPARE);
+}
+
+/**
+ Allows the storage engine to update internal structures with concurrent
+ writes blocked. If check_if_supported_inplace_alter() returns
+ HA_ALTER_INPLACE_NO_LOCK_AFTER_PREPARE or
+ HA_ALTER_INPLACE_SHARED_AFTER_PREPARE, this function is called with
+ exclusive lock otherwise the same level of locking as for
+ inplace_alter_table() will be used.
+
+ @note Storage engines are responsible for reporting any errors by
+ calling my_error()/print_error()
+
+ @note If this function reports error, commit_inplace_alter_table()
+ will be called with commit= false.
+
+ @note For partitioning, failing to prepare one partition, means that
+ commit_inplace_alter_table() will be called to roll back changes for
+ all partitions. This means that commit_inplace_alter_table() might be
+ called without prepare_inplace_alter_table() having been called first
+ for a given partition.
+
+ @param altered_table TABLE object for new version of table.
+ @param ha_alter_info Structure describing changes to be done
+ by ALTER TABLE and holding data used
+ during in-place alter.
+
+ @retval true Error
+ @retval false Success
+*/
+bool ha_rocksdb::prepare_inplace_alter_table(
+ TABLE *const altered_table,
+ my_core::Alter_inplace_info *const ha_alter_info) {
+ DBUG_ENTER_FUNC();
+
+ DBUG_ASSERT(altered_table != nullptr);
+ DBUG_ASSERT(ha_alter_info != nullptr);
+
+ Rdb_tbl_def *new_tdef = nullptr;
+ std::shared_ptr<Rdb_key_def> *old_key_descr = nullptr;
+ std::shared_ptr<Rdb_key_def> *new_key_descr = nullptr;
+ uint old_n_keys = m_tbl_def->m_key_count;
+ uint new_n_keys = altered_table->s->keys;
+ std::unordered_set<std::shared_ptr<Rdb_key_def>> added_indexes;
+ std::unordered_set<GL_INDEX_ID> dropped_index_ids;
+ uint n_dropped_keys = 0;
+ uint n_added_keys = 0;
+ ulonglong max_auto_incr = 0;
+
+ if (ha_alter_info->handler_flags &
+ (my_core::Alter_inplace_info::DROP_INDEX |
+ my_core::Alter_inplace_info::DROP_UNIQUE_INDEX |
+ my_core::Alter_inplace_info::ADD_INDEX |
+ my_core::Alter_inplace_info::ADD_UNIQUE_INDEX)) {
+ if (has_hidden_pk(altered_table)) {
+ new_n_keys += 1;
+ }
+
+ const TABLE *const old_table = table;
+ old_key_descr = m_tbl_def->m_key_descr_arr;
+ new_key_descr = new std::shared_ptr<Rdb_key_def>[new_n_keys];
+
+ new_tdef = new Rdb_tbl_def(m_tbl_def->full_tablename());
+ new_tdef->m_key_descr_arr = new_key_descr;
+ new_tdef->m_key_count = new_n_keys;
+ new_tdef->m_auto_incr_val =
+ m_tbl_def->m_auto_incr_val.load(std::memory_order_relaxed);
+ new_tdef->m_hidden_pk_val =
+ m_tbl_def->m_hidden_pk_val.load(std::memory_order_relaxed);
+
+ if (create_key_defs(altered_table, new_tdef, table, m_tbl_def)) {
+ /* Delete the new key descriptors */
+ delete[] new_key_descr;
+
+ /*
+ Explicitly mark as nullptr so we don't accidentally remove entries
+ from data dictionary on cleanup (or cause double delete[]).
+ */
+ new_tdef->m_key_descr_arr = nullptr;
+ delete new_tdef;
+
+ my_error(ER_KEY_CREATE_DURING_ALTER, MYF(0));
+ DBUG_RETURN(HA_EXIT_FAILURE);
+ }
+
+ uint i;
+ uint j;
+
+ /* Determine which(if any) key definition(s) need to be dropped */
+ for (i = 0; i < ha_alter_info->index_drop_count; i++) {
+ const KEY *const dropped_key = ha_alter_info->index_drop_buffer[i];
+ for (j = 0; j < old_n_keys; j++) {
+ const KEY *const old_key =
+ &old_table->key_info[old_key_descr[j]->get_keyno()];
+
+ if (!compare_keys(old_key, dropped_key)) {
+ dropped_index_ids.insert(old_key_descr[j]->get_gl_index_id());
+ break;
+ }
+ }
+ }
+
+ /* Determine which(if any) key definitions(s) need to be added */
+ int identical_indexes_found = 0;
+ for (i = 0; i < ha_alter_info->index_add_count; i++) {
+ const KEY *const added_key =
+ &ha_alter_info->key_info_buffer[ha_alter_info->index_add_buffer[i]];
+ for (j = 0; j < new_n_keys; j++) {
+ const KEY *const new_key =
+ &altered_table->key_info[new_key_descr[j]->get_keyno()];
+ if (!compare_keys(new_key, added_key)) {
+ /*
+ Check for cases where an 'identical' index is being dropped and
+ re-added in a single ALTER statement. Turn this into a no-op as the
+ index has not changed.
+
+ E.G. Unique index -> non-unique index requires no change
+
+ Note that cases where the index name remains the same but the
+ key-parts are changed is already handled in create_inplace_key_defs.
+ In these cases the index needs to be rebuilt.
+ */
+ if (dropped_index_ids.count(new_key_descr[j]->get_gl_index_id())) {
+ dropped_index_ids.erase(new_key_descr[j]->get_gl_index_id());
+ identical_indexes_found++;
+ } else {
+ added_indexes.insert(new_key_descr[j]);
+ }
+
+ break;
+ }
+ }
+ }
+
+ n_dropped_keys = ha_alter_info->index_drop_count - identical_indexes_found;
+ n_added_keys = ha_alter_info->index_add_count - identical_indexes_found;
+ DBUG_ASSERT(dropped_index_ids.size() == n_dropped_keys);
+ DBUG_ASSERT(added_indexes.size() == n_added_keys);
+ DBUG_ASSERT(new_n_keys == (old_n_keys - n_dropped_keys + n_added_keys));
+ }
+ if (ha_alter_info->handler_flags &
+ my_core::Alter_inplace_info::CHANGE_CREATE_OPTION) {
+ if (!new_tdef) {
+ new_tdef = m_tbl_def;
+ }
+ if (table->found_next_number_field) {
+ max_auto_incr = load_auto_incr_value_from_index();
+ }
+ }
+
+ ha_alter_info->handler_ctx = new Rdb_inplace_alter_ctx(
+ new_tdef, old_key_descr, new_key_descr, old_n_keys, new_n_keys,
+ added_indexes, dropped_index_ids, n_added_keys, n_dropped_keys,
+ max_auto_incr);
+ DBUG_RETURN(HA_EXIT_SUCCESS);
+}
+
+/**
+ Alter the table structure in-place with operations specified using
+ HA_ALTER_FLAGS and Alter_inplace_info. The level of concurrency allowed
+ during this operation depends on the return value from
+ check_if_supported_inplace_alter().
+
+ @note Storage engines are responsible for reporting any errors by
+ calling my_error()/print_error()
+
+ @note If this function reports error, commit_inplace_alter_table()
+ will be called with commit= false.
+
+ @param altered_table TABLE object for new version of table.
+ @param ha_alter_info Structure describing changes to be done
+ by ALTER TABLE and holding data used
+ during in-place alter.
+
+ @retval true Error
+ @retval false Success
+*/
+bool ha_rocksdb::inplace_alter_table(
+ TABLE *const altered_table,
+ my_core::Alter_inplace_info *const ha_alter_info) {
+ DBUG_ENTER_FUNC();
+
+ DBUG_ASSERT(altered_table != nullptr);
+ DBUG_ASSERT(ha_alter_info != nullptr);
+ DBUG_ASSERT(ha_alter_info->handler_ctx != nullptr);
+
+ Rdb_inplace_alter_ctx *const ctx =
+ static_cast<Rdb_inplace_alter_ctx *>(ha_alter_info->handler_ctx);
+
+ if (ha_alter_info->handler_flags &
+ (my_core::Alter_inplace_info::ADD_INDEX |
+ my_core::Alter_inplace_info::ADD_UNIQUE_INDEX)) {
+ /*
+ Buffers need to be set up again to account for new, possibly longer
+ secondary keys.
+ */
+ free_key_buffers();
+
+ DBUG_ASSERT(ctx != nullptr);
+
+ /*
+ If adding unique index, allocate special buffers for duplicate checking.
+ */
+ int err;
+ if ((err = alloc_key_buffers(
+ altered_table, ctx->m_new_tdef,
+ ha_alter_info->handler_flags &
+ my_core::Alter_inplace_info::ADD_UNIQUE_INDEX))) {
+ my_error(ER_OUT_OF_RESOURCES, MYF(0));
+ DBUG_RETURN(err);
+ }
+
+ /* Populate all new secondary keys by scanning the primary key. */
+ if ((err = inplace_populate_sk(altered_table, ctx->m_added_indexes))) {
+ my_error(ER_SK_POPULATE_DURING_ALTER, MYF(0));
+ DBUG_RETURN(HA_EXIT_FAILURE);
+ }
+ }
+
+ DBUG_EXECUTE_IF("myrocks_simulate_index_create_rollback", {
+ dbug_create_err_inplace_alter();
+ DBUG_RETURN(HA_EXIT_FAILURE);
+ };);
+
+ DBUG_RETURN(HA_EXIT_SUCCESS);
+}
+
+/**
+ Scan the Primary Key index entries and populate the new secondary keys.
+*/
+int ha_rocksdb::inplace_populate_sk(
+ TABLE *const new_table_arg,
+ const std::unordered_set<std::shared_ptr<Rdb_key_def>> &indexes) {
+ DBUG_ENTER_FUNC();
+ int res = HA_EXIT_SUCCESS;
+ const std::unique_ptr<rocksdb::WriteBatch> wb = dict_manager.begin();
+ rocksdb::WriteBatch *const batch = wb.get();
+
+ /* Update the data dictionary */
+ std::unordered_set<GL_INDEX_ID> create_index_ids;
+ for (const auto &index : indexes) {
+ create_index_ids.insert(index->get_gl_index_id());
+ }
+ dict_manager.add_create_index(create_index_ids, batch);
+ res = dict_manager.commit(batch);
+ if (res != HA_EXIT_SUCCESS) {
+ return res;
+ }
+
+ /*
+ Add uncommitted key definitons to ddl_manager. We need to do this
+ so that the property collector can find this keydef when it needs to
+ update stats. The property collector looks for the keydef in the
+ data dictionary, but it won't be there yet since this key definition
+ is still in the creation process.
+ */
+ ddl_manager.add_uncommitted_keydefs(indexes);
+
+ const bool hidden_pk_exists = has_hidden_pk(table);
+
+ Rdb_transaction *tx = get_or_create_tx(table->in_use);
+
+ /*
+ There is one specific scenario where m_sst_info may not be nullptr. This
+ happens if the handler we're using happens to be the handler where the PK
+ bulk load was done on. The sequence of events that lead to this is as
+ follows (T1 is PK bulk load, T2 is SK alter table):
+
+ T1: Execute last INSERT statement
+ T1: Return TABLE and handler object back to Table_cache_manager
+ T1: Close connection
+ T2: Execute ALTER statement
+ T2: Take same TABLE/handler from Table_cache_manager
+ T2: Call closefrm which will call finalize_bulk_load on every other open
+ table/handler *except* the one it's on.
+ T2: Acquire stale snapshot of PK
+ T1: Call finalize_bulk_load
+
+ This is rare because usually, closefrm will call the destructor (and thus
+ finalize_bulk_load) on the handler where PK bulk load is done. However, if
+ the thread ids of the bulk load thread and the alter thread differ by a
+ multiple of table_cache_instances (8 by default), then they hash to the
+ same bucket in Table_cache_manager and the alter thread will not not call
+ the destructor on the handler it is holding. Thus, its m_sst_info will not
+ be nullptr.
+
+ At this point, it is safe to refresh the snapshot because we know all other
+ open handlers have been closed at this point, and the one we're on is the
+ only one left.
+ */
+ if (m_sst_info) {
+ if ((res = finalize_bulk_load())) {
+ DBUG_RETURN(res);
+ }
+ tx->commit();
+ }
+
+ const ulonglong rdb_merge_buf_size = THDVAR(ha_thd(), merge_buf_size);
+ const ulonglong rdb_merge_combine_read_size =
+ THDVAR(ha_thd(), merge_combine_read_size);
+ const ulonglong rdb_merge_tmp_file_removal_delay =
+ THDVAR(ha_thd(), merge_tmp_file_removal_delay_ms);
+
+ for (const auto &index : indexes) {
+ bool is_unique_index =
+ new_table_arg->key_info[index->get_keyno()].flags & HA_NOSAME;
+
+ Rdb_index_merge rdb_merge(tx->get_rocksdb_tmpdir(), rdb_merge_buf_size,
+ rdb_merge_combine_read_size,
+ rdb_merge_tmp_file_removal_delay,
+ index->get_cf());
+
+ if ((res = rdb_merge.init())) {
+ DBUG_RETURN(res);
+ }
+
+ /*
+ Note: We pass in the currently existing table + tbl_def object here,
+ as the pk index position may have changed in the case of hidden primary
+ keys.
+ */
+ const uint pk = pk_index(table, m_tbl_def);
+ ha_index_init(pk, true);
+
+ /* Scan each record in the primary key in order */
+ for (res = index_first(table->record[0]); res == 0;
+ res = index_next(table->record[0])) {
+ longlong hidden_pk_id = 0;
+ if (hidden_pk_exists &&
+ (res = read_hidden_pk_id_from_rowkey(&hidden_pk_id))) {
+ // NO_LINT_DEBUG
+ sql_print_error("Error retrieving hidden pk id.");
+ ha_index_end();
+ DBUG_RETURN(res);
+ }
+
+ /* Create new secondary index entry */
+ const int new_packed_size = index->pack_record(
+ new_table_arg, m_pack_buffer, table->record[0], m_sk_packed_tuple,
+ &m_sk_tails, should_store_row_debug_checksums(), hidden_pk_id, 0,
+ nullptr, m_ttl_bytes);
+
+ const rocksdb::Slice key = rocksdb::Slice(
+ reinterpret_cast<const char *>(m_sk_packed_tuple), new_packed_size);
+ const rocksdb::Slice val =
+ rocksdb::Slice(reinterpret_cast<const char *>(m_sk_tails.ptr()),
+ m_sk_tails.get_current_pos());
+
+ /*
+ Add record to offset tree in preparation for writing out to
+ disk in sorted chunks.
+ */
+ if ((res = rdb_merge.add(key, val))) {
+ ha_index_end();
+ DBUG_RETURN(res);
+ }
+ }
+
+ if (res != HA_ERR_END_OF_FILE) {
+ // NO_LINT_DEBUG
+ sql_print_error("Error retrieving index entry from primary key.");
+ ha_index_end();
+ DBUG_RETURN(res);
+ }
+
+ ha_index_end();
+
+ /*
+ Perform an n-way merge of n sorted buffers on disk, then writes all
+ results to RocksDB via SSTFileWriter API.
+ */
+ rocksdb::Slice merge_key;
+ rocksdb::Slice merge_val;
+
+ struct unique_sk_buf_info sk_info;
+ sk_info.dup_sk_buf = m_dup_sk_packed_tuple;
+ sk_info.dup_sk_buf_old = m_dup_sk_packed_tuple_old;
+
+ while ((res = rdb_merge.next(&merge_key, &merge_val)) == 0) {
+ /* Perform uniqueness check if needed */
+ if (is_unique_index) {
+ if (check_duplicate_sk(new_table_arg, *index, &merge_key, &sk_info)) {
+ /*
+ Duplicate entry found when trying to create unique secondary key.
+ We need to unpack the record into new_table_arg->record[0] as it
+ is used inside print_keydup_error so that the error message shows
+ the duplicate record.
+ */
+ if (index->unpack_record(
+ new_table_arg, new_table_arg->record[0], &merge_key,
+ &merge_val, m_converter->get_verify_row_debug_checksums())) {
+ /* Should never reach here */
+ DBUG_ASSERT(0);
+ }
+
+ print_keydup_error(new_table_arg,
+ &new_table_arg->key_info[index->get_keyno()],
+ MYF(0));
+ DBUG_RETURN(ER_DUP_ENTRY);
+ }
+ }
+
+ /*
+ Insert key and slice to SST via SSTFileWriter API.
+ */
+ if ((res = bulk_load_key(tx, *index, merge_key, merge_val, false))) {
+ break;
+ }
+ }
+
+ /*
+ Here, res == -1 means that we are finished, while > 0 means an error
+ occurred.
+ */
+ if (res > 0) {
+ // NO_LINT_DEBUG
+ sql_print_error("Error while bulk loading keys in external merge sort.");
+ DBUG_RETURN(res);
+ }
+
+ bool is_critical_error;
+ res = tx->finish_bulk_load(&is_critical_error);
+ if (res && is_critical_error) {
+ // NO_LINT_DEBUG
+ sql_print_error("Error finishing bulk load.");
+ DBUG_RETURN(res);
+ }
+ }
+
+ /*
+ Explicitly tell jemalloc to clean up any unused dirty pages at this point.
+ See https://reviews.facebook.net/D63723 for more details.
+ */
+ purge_all_jemalloc_arenas();
+
+ DBUG_EXECUTE_IF("crash_during_online_index_creation", DBUG_SUICIDE(););
+ DBUG_RETURN(res);
+}
+
+/**
+ Commit or rollback the changes made during prepare_inplace_alter_table()
+ and inplace_alter_table() inside the storage engine.
+ Note that in case of rollback the allowed level of concurrency during
+ this operation will be the same as for inplace_alter_table() and thus
+ might be higher than during prepare_inplace_alter_table(). (For example,
+ concurrent writes were blocked during prepare, but might not be during
+ rollback).
+
+ @note Storage engines are responsible for reporting any errors by
+ calling my_error()/print_error()
+
+ @note If this function with commit= true reports error, it will be called
+ again with commit= false.
+
+ @note In case of partitioning, this function might be called for rollback
+ without prepare_inplace_alter_table() having been called first.
+ Also partitioned tables sets ha_alter_info->group_commit_ctx to a NULL
+ terminated array of the partitions handlers and if all of them are
+ committed as one, then group_commit_ctx should be set to NULL to indicate
+ to the partitioning handler that all partitions handlers are committed.
+ @see prepare_inplace_alter_table().
+
+ @param altered_table TABLE object for new version of table.
+ @param ha_alter_info Structure describing changes to be done
+ by ALTER TABLE and holding data used
+ during in-place alter.
+ @param commit True => Commit, False => Rollback.
+
+ @retval true Error
+ @retval false Success
+*/
+bool ha_rocksdb::commit_inplace_alter_table(
+ my_core::TABLE *const altered_table,
+ my_core::Alter_inplace_info *const ha_alter_info, bool commit) {
+ DBUG_ENTER_FUNC();
+
+ DBUG_ASSERT(altered_table != nullptr);
+ DBUG_ASSERT(ha_alter_info != nullptr);
+
+ Rdb_inplace_alter_ctx *const ctx0 =
+ static_cast<Rdb_inplace_alter_ctx *>(ha_alter_info->handler_ctx);
+
+ DEBUG_SYNC(ha_thd(), "rocksdb.commit_in_place_alter_table");
+
+ /*
+ IMPORTANT: When rollback is requested, mysql will abort with
+ an assertion failure. That means every failed commit during inplace alter
+ table will result in a fatal error on the server. Indexes ongoing creation
+ will be detected when the server restarts, and dropped.
+
+ For partitioned tables, a rollback call to this function (commit == false)
+ is done for each partition. A successful commit call only executes once
+ for all partitions.
+ */
+ if (!commit) {
+ /* If ctx has not been created yet, nothing to do here */
+ if (!ctx0) {
+ DBUG_RETURN(HA_EXIT_SUCCESS);
+ }
+
+ /*
+ Cannot call destructor for Rdb_tbl_def directly because we don't want to
+ erase the mappings inside the ddl_manager, as the old_key_descr is still
+ using them.
+ */
+ if (ctx0->m_new_key_descr) {
+ /* Delete the new key descriptors */
+ for (uint i = 0; i < ctx0->m_new_tdef->m_key_count; i++) {
+ ctx0->m_new_key_descr[i] = nullptr;
+ }
+
+ delete[] ctx0->m_new_key_descr;
+ ctx0->m_new_key_descr = nullptr;
+ ctx0->m_new_tdef->m_key_descr_arr = nullptr;
+
+ delete ctx0->m_new_tdef;
+ }
+
+ /* Remove uncommitted key definitons from ddl_manager */
+ ddl_manager.remove_uncommitted_keydefs(ctx0->m_added_indexes);
+
+ /* Rollback any partially created indexes */
+ dict_manager.rollback_ongoing_index_creation();
+
+ DBUG_RETURN(HA_EXIT_SUCCESS);
+ }
+
+ DBUG_ASSERT(ctx0);
+
+ /*
+ For partitioned tables, we need to commit all changes to all tables at
+ once, unlike in the other inplace alter API methods.
+ */
+ inplace_alter_handler_ctx **ctx_array;
+ inplace_alter_handler_ctx *ctx_single[2];
+
+ if (ha_alter_info->group_commit_ctx) {
+ DBUG_EXECUTE_IF("crash_during_index_creation_partition", DBUG_SUICIDE(););
+ ctx_array = ha_alter_info->group_commit_ctx;
+ } else {
+ ctx_single[0] = ctx0;
+ ctx_single[1] = nullptr;
+ ctx_array = ctx_single;
+ }
+
+ DBUG_ASSERT(ctx0 == ctx_array[0]);
+ ha_alter_info->group_commit_ctx = nullptr;
+
+ if (ha_alter_info->handler_flags &
+ (my_core::Alter_inplace_info::DROP_INDEX |
+ my_core::Alter_inplace_info::DROP_UNIQUE_INDEX |
+ my_core::Alter_inplace_info::ADD_INDEX |
+ my_core::Alter_inplace_info::ADD_UNIQUE_INDEX)) {
+ const std::unique_ptr<rocksdb::WriteBatch> wb = dict_manager.begin();
+ rocksdb::WriteBatch *const batch = wb.get();
+ std::unordered_set<GL_INDEX_ID> create_index_ids;
+
+ m_tbl_def = ctx0->m_new_tdef;
+ m_key_descr_arr = m_tbl_def->m_key_descr_arr;
+ m_pk_descr = m_key_descr_arr[pk_index(altered_table, m_tbl_def)];
+
+ dict_manager.lock();
+ for (inplace_alter_handler_ctx **pctx = ctx_array; *pctx; pctx++) {
+ Rdb_inplace_alter_ctx *const ctx =
+ static_cast<Rdb_inplace_alter_ctx *>(*pctx);
+
+ /* Mark indexes to be dropped */
+ dict_manager.add_drop_index(ctx->m_dropped_index_ids, batch);
+
+ for (const auto &index : ctx->m_added_indexes) {
+ create_index_ids.insert(index->get_gl_index_id());
+ }
+
+ if (ddl_manager.put_and_write(ctx->m_new_tdef, batch)) {
+ /*
+ Failed to write new entry into data dictionary, this should never
+ happen.
+ */
+ DBUG_ASSERT(0);
+ }
+
+ /*
+ Remove uncommitted key definitons from ddl_manager, as they are now
+ committed into the data dictionary.
+ */
+ ddl_manager.remove_uncommitted_keydefs(ctx->m_added_indexes);
+ }
+
+ if (dict_manager.commit(batch)) {
+ /*
+ Should never reach here. We assume MyRocks will abort if commit fails.
+ */
+ DBUG_ASSERT(0);
+ }
+
+ dict_manager.unlock();
+
+ /* Mark ongoing create indexes as finished/remove from data dictionary */
+ dict_manager.finish_indexes_operation(
+ create_index_ids, Rdb_key_def::DDL_CREATE_INDEX_ONGOING);
+
+ rdb_drop_idx_thread.signal();
+ }
+
+ if (ha_alter_info->handler_flags &
+ (my_core::Alter_inplace_info::CHANGE_CREATE_OPTION)) {
+ const std::unique_ptr<rocksdb::WriteBatch> wb = dict_manager.begin();
+ rocksdb::WriteBatch *const batch = wb.get();
+ std::unordered_set<GL_INDEX_ID> create_index_ids;
+
+ ulonglong auto_incr_val = ha_alter_info->create_info->auto_increment_value;
+
+ for (inplace_alter_handler_ctx **pctx = ctx_array; *pctx; pctx++) {
+ Rdb_inplace_alter_ctx *const ctx =
+ static_cast<Rdb_inplace_alter_ctx *>(*pctx);
+ auto_incr_val = std::max(auto_incr_val, ctx->m_max_auto_incr);
+ dict_manager.put_auto_incr_val(
+ batch, ctx->m_new_tdef->get_autoincr_gl_index_id(), auto_incr_val,
+ true /* overwrite */);
+ ctx->m_new_tdef->m_auto_incr_val = auto_incr_val;
+ }
+
+ if (dict_manager.commit(batch)) {
+ DBUG_ASSERT(0);
+ }
+ }
+
+ DBUG_RETURN(HA_EXIT_SUCCESS);
+}
+
+#define SHOW_FNAME(name) rocksdb_show_##name
+
+#define DEF_SHOW_FUNC(name, key) \
+ static int SHOW_FNAME(name)(MYSQL_THD thd, SHOW_VAR * var, char *buff) { \
+ rocksdb_status_counters.name = \
+ rocksdb_stats->getTickerCount(rocksdb::key); \
+ var->type = SHOW_LONGLONG; \
+ var->value = reinterpret_cast<char *>(&rocksdb_status_counters.name); \
+ return HA_EXIT_SUCCESS; \
+ }
+
+#define DEF_STATUS_VAR(name) \
+ { "rocksdb_" #name, (char *)&SHOW_FNAME(name), SHOW_FUNC }
+
+#define DEF_STATUS_VAR_PTR(name, ptr, option) \
+ { "rocksdb_" name, (char *)ptr, option }
+
+#define DEF_STATUS_VAR_FUNC(name, ptr, option) \
+ { name, reinterpret_cast<char *>(ptr), option }
+
+struct rocksdb_status_counters_t {
+ uint64_t block_cache_miss;
+ uint64_t block_cache_hit;
+ uint64_t block_cache_add;
+ uint64_t block_cache_add_failures;
+ uint64_t block_cache_index_miss;
+ uint64_t block_cache_index_hit;
+ uint64_t block_cache_index_add;
+ uint64_t block_cache_index_bytes_insert;
+ uint64_t block_cache_index_bytes_evict;
+ uint64_t block_cache_filter_miss;
+ uint64_t block_cache_filter_hit;
+ uint64_t block_cache_filter_add;
+ uint64_t block_cache_filter_bytes_insert;
+ uint64_t block_cache_filter_bytes_evict;
+ uint64_t block_cache_bytes_read;
+ uint64_t block_cache_bytes_write;
+ uint64_t block_cache_data_bytes_insert;
+ uint64_t block_cache_data_miss;
+ uint64_t block_cache_data_hit;
+ uint64_t block_cache_data_add;
+ uint64_t bloom_filter_useful;
+ uint64_t bloom_filter_full_positive;
+ uint64_t bloom_filter_full_true_positive;
+ uint64_t memtable_hit;
+ uint64_t memtable_miss;
+ uint64_t get_hit_l0;
+ uint64_t get_hit_l1;
+ uint64_t get_hit_l2_and_up;
+ uint64_t compaction_key_drop_new;
+ uint64_t compaction_key_drop_obsolete;
+ uint64_t compaction_key_drop_user;
+ uint64_t number_keys_written;
+ uint64_t number_keys_read;
+ uint64_t number_keys_updated;
+ uint64_t bytes_written;
+ uint64_t bytes_read;
+ uint64_t number_db_seek;
+ uint64_t number_db_seek_found;
+ uint64_t number_db_next;
+ uint64_t number_db_next_found;
+ uint64_t number_db_prev;
+ uint64_t number_db_prev_found;
+ uint64_t iter_bytes_read;
+ uint64_t no_file_closes;
+ uint64_t no_file_opens;
+ uint64_t no_file_errors;
+ uint64_t stall_micros;
+ uint64_t num_iterators;
+ uint64_t number_multiget_get;
+ uint64_t number_multiget_keys_read;
+ uint64_t number_multiget_bytes_read;
+ uint64_t number_deletes_filtered;
+ uint64_t number_merge_failures;
+ uint64_t bloom_filter_prefix_checked;
+ uint64_t bloom_filter_prefix_useful;
+ uint64_t number_reseeks_iteration;
+ uint64_t getupdatessince_calls;
+ uint64_t block_cachecompressed_miss;
+ uint64_t block_cachecompressed_hit;
+ uint64_t wal_synced;
+ uint64_t wal_bytes;
+ uint64_t write_self;
+ uint64_t write_other;
+ uint64_t write_timedout;
+ uint64_t write_wal;
+ uint64_t flush_write_bytes;
+ uint64_t compact_read_bytes;
+ uint64_t compact_write_bytes;
+ uint64_t number_superversion_acquires;
+ uint64_t number_superversion_releases;
+ uint64_t number_superversion_cleanups;
+ uint64_t number_block_not_compressed;
+};
+
+static rocksdb_status_counters_t rocksdb_status_counters;
+
+DEF_SHOW_FUNC(block_cache_miss, BLOCK_CACHE_MISS)
+DEF_SHOW_FUNC(block_cache_hit, BLOCK_CACHE_HIT)
+DEF_SHOW_FUNC(block_cache_add, BLOCK_CACHE_ADD)
+DEF_SHOW_FUNC(block_cache_add_failures, BLOCK_CACHE_ADD_FAILURES)
+DEF_SHOW_FUNC(block_cache_index_miss, BLOCK_CACHE_INDEX_MISS)
+DEF_SHOW_FUNC(block_cache_index_hit, BLOCK_CACHE_INDEX_HIT)
+DEF_SHOW_FUNC(block_cache_index_add, BLOCK_CACHE_INDEX_ADD)
+DEF_SHOW_FUNC(block_cache_index_bytes_insert, BLOCK_CACHE_INDEX_BYTES_INSERT)
+DEF_SHOW_FUNC(block_cache_index_bytes_evict, BLOCK_CACHE_INDEX_BYTES_EVICT)
+DEF_SHOW_FUNC(block_cache_filter_miss, BLOCK_CACHE_FILTER_MISS)
+DEF_SHOW_FUNC(block_cache_filter_hit, BLOCK_CACHE_FILTER_HIT)
+DEF_SHOW_FUNC(block_cache_filter_add, BLOCK_CACHE_FILTER_ADD)
+DEF_SHOW_FUNC(block_cache_filter_bytes_insert, BLOCK_CACHE_FILTER_BYTES_INSERT)
+DEF_SHOW_FUNC(block_cache_filter_bytes_evict, BLOCK_CACHE_FILTER_BYTES_EVICT)
+DEF_SHOW_FUNC(block_cache_bytes_read, BLOCK_CACHE_BYTES_READ)
+DEF_SHOW_FUNC(block_cache_bytes_write, BLOCK_CACHE_BYTES_WRITE)
+DEF_SHOW_FUNC(block_cache_data_bytes_insert, BLOCK_CACHE_DATA_BYTES_INSERT)
+DEF_SHOW_FUNC(block_cache_data_miss, BLOCK_CACHE_DATA_MISS)
+DEF_SHOW_FUNC(block_cache_data_hit, BLOCK_CACHE_DATA_HIT)
+DEF_SHOW_FUNC(block_cache_data_add, BLOCK_CACHE_DATA_ADD)
+DEF_SHOW_FUNC(bloom_filter_useful, BLOOM_FILTER_USEFUL)
+DEF_SHOW_FUNC(bloom_filter_full_positive, BLOOM_FILTER_FULL_POSITIVE)
+DEF_SHOW_FUNC(bloom_filter_full_true_positive, BLOOM_FILTER_FULL_TRUE_POSITIVE)
+DEF_SHOW_FUNC(memtable_hit, MEMTABLE_HIT)
+DEF_SHOW_FUNC(memtable_miss, MEMTABLE_MISS)
+DEF_SHOW_FUNC(get_hit_l0, GET_HIT_L0)
+DEF_SHOW_FUNC(get_hit_l1, GET_HIT_L1)
+DEF_SHOW_FUNC(get_hit_l2_and_up, GET_HIT_L2_AND_UP)
+DEF_SHOW_FUNC(compaction_key_drop_new, COMPACTION_KEY_DROP_NEWER_ENTRY)
+DEF_SHOW_FUNC(compaction_key_drop_obsolete, COMPACTION_KEY_DROP_OBSOLETE)
+DEF_SHOW_FUNC(compaction_key_drop_user, COMPACTION_KEY_DROP_USER)
+DEF_SHOW_FUNC(number_keys_written, NUMBER_KEYS_WRITTEN)
+DEF_SHOW_FUNC(number_keys_read, NUMBER_KEYS_READ)
+DEF_SHOW_FUNC(number_keys_updated, NUMBER_KEYS_UPDATED)
+DEF_SHOW_FUNC(bytes_written, BYTES_WRITTEN)
+DEF_SHOW_FUNC(bytes_read, BYTES_READ)
+DEF_SHOW_FUNC(number_db_seek, NUMBER_DB_SEEK)
+DEF_SHOW_FUNC(number_db_seek_found, NUMBER_DB_SEEK_FOUND)
+DEF_SHOW_FUNC(number_db_next, NUMBER_DB_NEXT)
+DEF_SHOW_FUNC(number_db_next_found, NUMBER_DB_NEXT_FOUND)
+DEF_SHOW_FUNC(number_db_prev, NUMBER_DB_PREV)
+DEF_SHOW_FUNC(number_db_prev_found, NUMBER_DB_PREV_FOUND)
+DEF_SHOW_FUNC(iter_bytes_read, ITER_BYTES_READ)
+DEF_SHOW_FUNC(no_file_closes, NO_FILE_CLOSES)
+DEF_SHOW_FUNC(no_file_opens, NO_FILE_OPENS)
+DEF_SHOW_FUNC(no_file_errors, NO_FILE_ERRORS)
+DEF_SHOW_FUNC(stall_micros, STALL_MICROS)
+DEF_SHOW_FUNC(num_iterators, NO_ITERATORS)
+DEF_SHOW_FUNC(number_multiget_get, NUMBER_MULTIGET_CALLS)
+DEF_SHOW_FUNC(number_multiget_keys_read, NUMBER_MULTIGET_KEYS_READ)
+DEF_SHOW_FUNC(number_multiget_bytes_read, NUMBER_MULTIGET_BYTES_READ)
+DEF_SHOW_FUNC(number_deletes_filtered, NUMBER_FILTERED_DELETES)
+DEF_SHOW_FUNC(number_merge_failures, NUMBER_MERGE_FAILURES)
+DEF_SHOW_FUNC(bloom_filter_prefix_checked, BLOOM_FILTER_PREFIX_CHECKED)
+DEF_SHOW_FUNC(bloom_filter_prefix_useful, BLOOM_FILTER_PREFIX_USEFUL)
+DEF_SHOW_FUNC(number_reseeks_iteration, NUMBER_OF_RESEEKS_IN_ITERATION)
+DEF_SHOW_FUNC(getupdatessince_calls, GET_UPDATES_SINCE_CALLS)
+DEF_SHOW_FUNC(block_cachecompressed_miss, BLOCK_CACHE_COMPRESSED_MISS)
+DEF_SHOW_FUNC(block_cachecompressed_hit, BLOCK_CACHE_COMPRESSED_HIT)
+DEF_SHOW_FUNC(wal_synced, WAL_FILE_SYNCED)
+DEF_SHOW_FUNC(wal_bytes, WAL_FILE_BYTES)
+DEF_SHOW_FUNC(write_self, WRITE_DONE_BY_SELF)
+DEF_SHOW_FUNC(write_other, WRITE_DONE_BY_OTHER)
+DEF_SHOW_FUNC(write_timedout, WRITE_TIMEDOUT)
+DEF_SHOW_FUNC(write_wal, WRITE_WITH_WAL)
+DEF_SHOW_FUNC(flush_write_bytes, FLUSH_WRITE_BYTES)
+DEF_SHOW_FUNC(compact_read_bytes, COMPACT_READ_BYTES)
+DEF_SHOW_FUNC(compact_write_bytes, COMPACT_WRITE_BYTES)
+DEF_SHOW_FUNC(number_superversion_acquires, NUMBER_SUPERVERSION_ACQUIRES)
+DEF_SHOW_FUNC(number_superversion_releases, NUMBER_SUPERVERSION_RELEASES)
+DEF_SHOW_FUNC(number_superversion_cleanups, NUMBER_SUPERVERSION_CLEANUPS)
+DEF_SHOW_FUNC(number_block_not_compressed, NUMBER_BLOCK_NOT_COMPRESSED)
+
+static void myrocks_update_status() {
+ export_stats.rows_deleted = global_stats.rows[ROWS_DELETED];
+ export_stats.rows_inserted = global_stats.rows[ROWS_INSERTED];
+ export_stats.rows_read = global_stats.rows[ROWS_READ];
+ export_stats.rows_updated = global_stats.rows[ROWS_UPDATED];
+ export_stats.rows_deleted_blind = global_stats.rows[ROWS_DELETED_BLIND];
+ export_stats.rows_expired = global_stats.rows[ROWS_EXPIRED];
+ export_stats.rows_filtered = global_stats.rows[ROWS_FILTERED];
+
+ export_stats.system_rows_deleted = global_stats.system_rows[ROWS_DELETED];
+ export_stats.system_rows_inserted = global_stats.system_rows[ROWS_INSERTED];
+ export_stats.system_rows_read = global_stats.system_rows[ROWS_READ];
+ export_stats.system_rows_updated = global_stats.system_rows[ROWS_UPDATED];
+
+ export_stats.queries_point = global_stats.queries[QUERIES_POINT];
+ export_stats.queries_range = global_stats.queries[QUERIES_RANGE];
+
+ export_stats.covered_secondary_key_lookups =
+ global_stats.covered_secondary_key_lookups;
+}
+
+static void myrocks_update_memory_status() {
+ std::vector<rocksdb::DB *> dbs;
+ std::unordered_set<const rocksdb::Cache *> cache_set;
+ dbs.push_back(rdb);
+ std::map<rocksdb::MemoryUtil::UsageType, uint64_t> temp_usage_by_type;
+ rocksdb::MemoryUtil::GetApproximateMemoryUsageByType(dbs, cache_set,
+ &temp_usage_by_type);
+ memory_stats.memtable_total =
+ temp_usage_by_type[rocksdb::MemoryUtil::kMemTableTotal];
+ memory_stats.memtable_unflushed =
+ temp_usage_by_type[rocksdb::MemoryUtil::kMemTableUnFlushed];
+}
+
+static SHOW_VAR myrocks_status_variables[] = {
+ DEF_STATUS_VAR_FUNC("rows_deleted", &export_stats.rows_deleted,
+ SHOW_LONGLONG),
+ DEF_STATUS_VAR_FUNC("rows_inserted", &export_stats.rows_inserted,
+ SHOW_LONGLONG),
+ DEF_STATUS_VAR_FUNC("rows_read", &export_stats.rows_read, SHOW_LONGLONG),
+ DEF_STATUS_VAR_FUNC("rows_updated", &export_stats.rows_updated,
+ SHOW_LONGLONG),
+ DEF_STATUS_VAR_FUNC("rows_deleted_blind", &export_stats.rows_deleted_blind,
+ SHOW_LONGLONG),
+ DEF_STATUS_VAR_FUNC("rows_expired", &export_stats.rows_expired,
+ SHOW_LONGLONG),
+ DEF_STATUS_VAR_FUNC("rows_filtered", &export_stats.rows_filtered,
+ SHOW_LONGLONG),
+ DEF_STATUS_VAR_FUNC("system_rows_deleted",
+ &export_stats.system_rows_deleted, SHOW_LONGLONG),
+ DEF_STATUS_VAR_FUNC("system_rows_inserted",
+ &export_stats.system_rows_inserted, SHOW_LONGLONG),
+ DEF_STATUS_VAR_FUNC("system_rows_read", &export_stats.system_rows_read,
+ SHOW_LONGLONG),
+ DEF_STATUS_VAR_FUNC("system_rows_updated",
+ &export_stats.system_rows_updated, SHOW_LONGLONG),
+ DEF_STATUS_VAR_FUNC("memtable_total", &memory_stats.memtable_total,
+ SHOW_LONGLONG),
+ DEF_STATUS_VAR_FUNC("memtable_unflushed", &memory_stats.memtable_unflushed,
+ SHOW_LONGLONG),
+ DEF_STATUS_VAR_FUNC("queries_point", &export_stats.queries_point,
+ SHOW_LONGLONG),
+ DEF_STATUS_VAR_FUNC("queries_range", &export_stats.queries_range,
+ SHOW_LONGLONG),
+ DEF_STATUS_VAR_FUNC("covered_secondary_key_lookups",
+ &export_stats.covered_secondary_key_lookups,
+ SHOW_LONGLONG),
+
+ {NullS, NullS, SHOW_LONG}};
+
+static void show_myrocks_vars(THD *thd, SHOW_VAR *var, char *buff) {
+ myrocks_update_status();
+ myrocks_update_memory_status();
+ var->type = SHOW_ARRAY;
+ var->value = reinterpret_cast<char *>(&myrocks_status_variables);
+}
+
+static ulonglong io_stall_prop_value(
+ const std::map<std::string, std::string> &props, const std::string &key) {
+ std::map<std::string, std::string>::const_iterator iter =
+ props.find("io_stalls." + key);
+ if (iter != props.end()) {
+ return std::stoull(iter->second);
+ } else {
+ DBUG_PRINT("warning",
+ ("RocksDB GetMapPropery hasn't returned key=%s", key.c_str()));
+ DBUG_ASSERT(0);
+ return 0;
+ }
+}
+
+static void update_rocksdb_stall_status() {
+ st_io_stall_stats local_io_stall_stats;
+ for (const auto &cf_name : cf_manager.get_cf_names()) {
+ rocksdb::ColumnFamilyHandle *cfh = cf_manager.get_cf(cf_name);
+ if (cfh == nullptr) {
+ continue;
+ }
+
+ std::map<std::string, std::string> props;
+ if (!rdb->GetMapProperty(cfh, "rocksdb.cfstats", &props)) {
+ continue;
+ }
+
+ local_io_stall_stats.level0_slowdown +=
+ io_stall_prop_value(props, "level0_slowdown");
+ local_io_stall_stats.level0_slowdown_with_compaction +=
+ io_stall_prop_value(props, "level0_slowdown_with_compaction");
+ local_io_stall_stats.level0_numfiles +=
+ io_stall_prop_value(props, "level0_numfiles");
+ local_io_stall_stats.level0_numfiles_with_compaction +=
+ io_stall_prop_value(props, "level0_numfiles_with_compaction");
+ local_io_stall_stats.stop_for_pending_compaction_bytes +=
+ io_stall_prop_value(props, "stop_for_pending_compaction_bytes");
+ local_io_stall_stats.slowdown_for_pending_compaction_bytes +=
+ io_stall_prop_value(props, "slowdown_for_pending_compaction_bytes");
+ local_io_stall_stats.memtable_compaction +=
+ io_stall_prop_value(props, "memtable_compaction");
+ local_io_stall_stats.memtable_slowdown +=
+ io_stall_prop_value(props, "memtable_slowdown");
+ local_io_stall_stats.total_stop += io_stall_prop_value(props, "total_stop");
+ local_io_stall_stats.total_slowdown +=
+ io_stall_prop_value(props, "total_slowdown");
+ }
+ io_stall_stats = local_io_stall_stats;
+}
+
+static SHOW_VAR rocksdb_stall_status_variables[] = {
+ DEF_STATUS_VAR_FUNC("l0_file_count_limit_slowdowns",
+ &io_stall_stats.level0_slowdown, SHOW_LONGLONG),
+ DEF_STATUS_VAR_FUNC("locked_l0_file_count_limit_slowdowns",
+ &io_stall_stats.level0_slowdown_with_compaction,
+ SHOW_LONGLONG),
+ DEF_STATUS_VAR_FUNC("l0_file_count_limit_stops",
+ &io_stall_stats.level0_numfiles, SHOW_LONGLONG),
+ DEF_STATUS_VAR_FUNC("locked_l0_file_count_limit_stops",
+ &io_stall_stats.level0_numfiles_with_compaction,
+ SHOW_LONGLONG),
+ DEF_STATUS_VAR_FUNC("pending_compaction_limit_stops",
+ &io_stall_stats.stop_for_pending_compaction_bytes,
+ SHOW_LONGLONG),
+ DEF_STATUS_VAR_FUNC("pending_compaction_limit_slowdowns",
+ &io_stall_stats.slowdown_for_pending_compaction_bytes,
+ SHOW_LONGLONG),
+ DEF_STATUS_VAR_FUNC("memtable_limit_stops",
+ &io_stall_stats.memtable_compaction, SHOW_LONGLONG),
+ DEF_STATUS_VAR_FUNC("memtable_limit_slowdowns",
+ &io_stall_stats.memtable_slowdown, SHOW_LONGLONG),
+ DEF_STATUS_VAR_FUNC("total_stops", &io_stall_stats.total_stop,
+ SHOW_LONGLONG),
+ DEF_STATUS_VAR_FUNC("total_slowdowns", &io_stall_stats.total_slowdown,
+ SHOW_LONGLONG),
+ // end of the array marker
+ {NullS, NullS, SHOW_LONG}};
+
+static void show_rocksdb_stall_vars(THD *thd, SHOW_VAR *var, char *buff) {
+ update_rocksdb_stall_status();
+ var->type = SHOW_ARRAY;
+ var->value = reinterpret_cast<char *>(&rocksdb_stall_status_variables);
+}
+
+static SHOW_VAR rocksdb_status_vars[] = {
+ DEF_STATUS_VAR(block_cache_miss),
+ DEF_STATUS_VAR(block_cache_hit),
+ DEF_STATUS_VAR(block_cache_add),
+ DEF_STATUS_VAR(block_cache_add_failures),
+ DEF_STATUS_VAR(block_cache_index_miss),
+ DEF_STATUS_VAR(block_cache_index_hit),
+ DEF_STATUS_VAR(block_cache_index_add),
+ DEF_STATUS_VAR(block_cache_index_bytes_insert),
+ DEF_STATUS_VAR(block_cache_index_bytes_evict),
+ DEF_STATUS_VAR(block_cache_filter_miss),
+ DEF_STATUS_VAR(block_cache_filter_hit),
+ DEF_STATUS_VAR(block_cache_filter_add),
+ DEF_STATUS_VAR(block_cache_filter_bytes_insert),
+ DEF_STATUS_VAR(block_cache_filter_bytes_evict),
+ DEF_STATUS_VAR(block_cache_bytes_read),
+ DEF_STATUS_VAR(block_cache_bytes_write),
+ DEF_STATUS_VAR(block_cache_data_bytes_insert),
+ DEF_STATUS_VAR(block_cache_data_miss),
+ DEF_STATUS_VAR(block_cache_data_hit),
+ DEF_STATUS_VAR(block_cache_data_add),
+ DEF_STATUS_VAR(bloom_filter_useful),
+ DEF_STATUS_VAR(bloom_filter_full_positive),
+ DEF_STATUS_VAR(bloom_filter_full_true_positive),
+ DEF_STATUS_VAR(memtable_hit),
+ DEF_STATUS_VAR(memtable_miss),
+ DEF_STATUS_VAR(get_hit_l0),
+ DEF_STATUS_VAR(get_hit_l1),
+ DEF_STATUS_VAR(get_hit_l2_and_up),
+ DEF_STATUS_VAR(compaction_key_drop_new),
+ DEF_STATUS_VAR(compaction_key_drop_obsolete),
+ DEF_STATUS_VAR(compaction_key_drop_user),
+ DEF_STATUS_VAR(number_keys_written),
+ DEF_STATUS_VAR(number_keys_read),
+ DEF_STATUS_VAR(number_keys_updated),
+ DEF_STATUS_VAR(bytes_written),
+ DEF_STATUS_VAR(bytes_read),
+ DEF_STATUS_VAR(number_db_seek),
+ DEF_STATUS_VAR(number_db_seek_found),
+ DEF_STATUS_VAR(number_db_next),
+ DEF_STATUS_VAR(number_db_next_found),
+ DEF_STATUS_VAR(number_db_prev),
+ DEF_STATUS_VAR(number_db_prev_found),
+ DEF_STATUS_VAR(iter_bytes_read),
+ DEF_STATUS_VAR(no_file_closes),
+ DEF_STATUS_VAR(no_file_opens),
+ DEF_STATUS_VAR(no_file_errors),
+ DEF_STATUS_VAR(stall_micros),
+ DEF_STATUS_VAR(num_iterators),
+ DEF_STATUS_VAR(number_multiget_get),
+ DEF_STATUS_VAR(number_multiget_keys_read),
+ DEF_STATUS_VAR(number_multiget_bytes_read),
+ DEF_STATUS_VAR(number_deletes_filtered),
+ DEF_STATUS_VAR(number_merge_failures),
+ DEF_STATUS_VAR(bloom_filter_prefix_checked),
+ DEF_STATUS_VAR(bloom_filter_prefix_useful),
+ DEF_STATUS_VAR(number_reseeks_iteration),
+ DEF_STATUS_VAR(getupdatessince_calls),
+ DEF_STATUS_VAR(block_cachecompressed_miss),
+ DEF_STATUS_VAR(block_cachecompressed_hit),
+ DEF_STATUS_VAR(wal_synced),
+ DEF_STATUS_VAR(wal_bytes),
+ DEF_STATUS_VAR(write_self),
+ DEF_STATUS_VAR(write_other),
+ DEF_STATUS_VAR(write_timedout),
+ DEF_STATUS_VAR(write_wal),
+ DEF_STATUS_VAR(flush_write_bytes),
+ DEF_STATUS_VAR(compact_read_bytes),
+ DEF_STATUS_VAR(compact_write_bytes),
+ DEF_STATUS_VAR(number_superversion_acquires),
+ DEF_STATUS_VAR(number_superversion_releases),
+ DEF_STATUS_VAR(number_superversion_cleanups),
+ DEF_STATUS_VAR(number_block_not_compressed),
+ DEF_STATUS_VAR_PTR("row_lock_deadlocks", &rocksdb_row_lock_deadlocks,
+ SHOW_LONGLONG),
+ DEF_STATUS_VAR_PTR("row_lock_wait_timeouts",
+ &rocksdb_row_lock_wait_timeouts, SHOW_LONGLONG),
+ DEF_STATUS_VAR_PTR("snapshot_conflict_errors",
+ &rocksdb_snapshot_conflict_errors, SHOW_LONGLONG),
+ DEF_STATUS_VAR_PTR("wal_group_syncs", &rocksdb_wal_group_syncs,
+ SHOW_LONGLONG),
+ DEF_STATUS_VAR_PTR("manual_compactions_processed",
+ &rocksdb_manual_compactions_processed, SHOW_LONGLONG),
+ DEF_STATUS_VAR_PTR("manual_compactions_running",
+ &rocksdb_manual_compactions_running, SHOW_LONGLONG),
+ DEF_STATUS_VAR_PTR("number_sst_entry_put", &rocksdb_num_sst_entry_put,
+ SHOW_LONGLONG),
+ DEF_STATUS_VAR_PTR("number_sst_entry_delete", &rocksdb_num_sst_entry_delete,
+ SHOW_LONGLONG),
+ DEF_STATUS_VAR_PTR("number_sst_entry_singledelete",
+ &rocksdb_num_sst_entry_singledelete, SHOW_LONGLONG),
+ DEF_STATUS_VAR_PTR("number_sst_entry_merge", &rocksdb_num_sst_entry_merge,
+ SHOW_LONGLONG),
+ DEF_STATUS_VAR_PTR("number_sst_entry_other", &rocksdb_num_sst_entry_other,
+ SHOW_LONGLONG),
+#ifndef DBUG_OFF
+ DEF_STATUS_VAR_PTR("num_get_for_update_calls",
+ &rocksdb_num_get_for_update_calls, SHOW_LONGLONG),
+#endif
+ // the variables generated by SHOW_FUNC are sorted only by prefix (first
+ // arg in the tuple below), so make sure it is unique to make sorting
+ // deterministic as quick sort is not stable
+ {"rocksdb", reinterpret_cast<char *>(&show_myrocks_vars), SHOW_FUNC},
+ {"rocksdb_stall", reinterpret_cast<char *>(&show_rocksdb_stall_vars),
+ SHOW_FUNC},
+ {NullS, NullS, SHOW_LONG}};
+
+/*
+ Background thread's main logic
+*/
+
+void Rdb_background_thread::run() {
+ // How many seconds to wait till flushing the WAL next time.
+ const int WAKE_UP_INTERVAL = 1;
+
+ timespec ts_next_sync;
+ set_timespec(ts_next_sync, WAKE_UP_INTERVAL);
+
+ for (;;) {
+ // Wait until the next timeout or until we receive a signal to stop the
+ // thread. Request to stop the thread should only be triggered when the
+ // storage engine is being unloaded.
+ RDB_MUTEX_LOCK_CHECK(m_signal_mutex);
+ const auto ret MY_ATTRIBUTE((__unused__)) =
+ mysql_cond_timedwait(&m_signal_cond, &m_signal_mutex, &ts_next_sync);
+
+ // Check that we receive only the expected error codes.
+ DBUG_ASSERT(ret == 0 || ret == ETIMEDOUT);
+ const bool local_stop = m_stop;
+ const bool local_save_stats = m_save_stats;
+ reset();
+ RDB_MUTEX_UNLOCK_CHECK(m_signal_mutex);
+
+ if (local_stop) {
+ // If we're here then that's because condition variable was signaled by
+ // another thread and we're shutting down. Break out the loop to make
+ // sure that shutdown thread can proceed.
+ break;
+ }
+
+ // This path should be taken only when the timer expired.
+ DBUG_ASSERT(ret == ETIMEDOUT);
+
+ if (local_save_stats) {
+ ddl_manager.persist_stats();
+ }
+
+ // Set the next timestamp for mysql_cond_timedwait() (which ends up calling
+ // pthread_cond_timedwait()) to wait on.
+ set_timespec(ts_next_sync, WAKE_UP_INTERVAL);
+
+ // Flush the WAL. Sync it for both background and never modes to copy
+ // InnoDB's behavior. For mode never, the wal file isn't even written,
+ // whereas background writes to the wal file, but issues the syncs in a
+ // background thread.
+ if (rdb && (rocksdb_flush_log_at_trx_commit != FLUSH_LOG_SYNC) &&
+ !rocksdb_db_options->allow_mmap_writes) {
+ const rocksdb::Status s = rdb->FlushWAL(true);
+ if (!s.ok()) {
+ rdb_handle_io_error(s, RDB_IO_ERROR_BG_THREAD);
+ }
+ }
+ // Recalculate statistics for indexes.
+ if (rocksdb_stats_recalc_rate) {
+ std::unordered_map<GL_INDEX_ID, std::shared_ptr<const Rdb_key_def>>
+ to_recalc;
+
+ if (rdb_indexes_to_recalc.empty()) {
+ struct Rdb_index_collector : public Rdb_tables_scanner {
+ int add_table(Rdb_tbl_def *tdef) override {
+ for (uint i = 0; i < tdef->m_key_count; i++) {
+ rdb_indexes_to_recalc.push_back(
+ tdef->m_key_descr_arr[i]->get_gl_index_id());
+ }
+ return HA_EXIT_SUCCESS;
+ }
+ } collector;
+ ddl_manager.scan_for_tables(&collector);
+ }
+
+ while (to_recalc.size() < rocksdb_stats_recalc_rate &&
+ !rdb_indexes_to_recalc.empty()) {
+ const auto index_id = rdb_indexes_to_recalc.back();
+ rdb_indexes_to_recalc.pop_back();
+
+ std::shared_ptr<const Rdb_key_def> keydef =
+ ddl_manager.safe_find(index_id);
+
+ if (keydef) {
+ to_recalc.insert(std::make_pair(keydef->get_gl_index_id(), keydef));
+ }
+ }
+
+ if (!to_recalc.empty()) {
+ calculate_stats(to_recalc, false);
+ }
+ }
+
+ }
+
+ // save remaining stats which might've left unsaved
+ ddl_manager.persist_stats();
+}
+
+/*
+ A background thread to handle manual compactions,
+ except for dropping indexes/tables. Every second, it checks
+ pending manual compactions, and it calls CompactRange if there is.
+*/
+void Rdb_manual_compaction_thread::run() {
+ mysql_mutex_init(0, &m_mc_mutex, MY_MUTEX_INIT_FAST);
+ RDB_MUTEX_LOCK_CHECK(m_signal_mutex);
+ for (;;) {
+ if (m_stop) {
+ break;
+ }
+ timespec ts;
+ set_timespec(ts, 1);
+
+ const auto ret MY_ATTRIBUTE((__unused__)) =
+ mysql_cond_timedwait(&m_signal_cond, &m_signal_mutex, &ts);
+ if (m_stop) {
+ break;
+ }
+ // make sure, no program error is returned
+ DBUG_ASSERT(ret == 0 || ret == ETIMEDOUT);
+ RDB_MUTEX_UNLOCK_CHECK(m_signal_mutex);
+
+ RDB_MUTEX_LOCK_CHECK(m_mc_mutex);
+ // Grab the first item and proceed, if not empty.
+ if (m_requests.empty()) {
+ RDB_MUTEX_UNLOCK_CHECK(m_mc_mutex);
+ RDB_MUTEX_LOCK_CHECK(m_signal_mutex);
+ continue;
+ }
+ Manual_compaction_request &mcr = m_requests.begin()->second;
+ DBUG_ASSERT(mcr.cf != nullptr);
+ DBUG_ASSERT(mcr.state == Manual_compaction_request::INITED);
+ mcr.state = Manual_compaction_request::RUNNING;
+ RDB_MUTEX_UNLOCK_CHECK(m_mc_mutex);
+
+ DBUG_ASSERT(mcr.state == Manual_compaction_request::RUNNING);
+ // NO_LINT_DEBUG
+ sql_print_information("Manual Compaction id %d cf %s started.", mcr.mc_id,
+ mcr.cf->GetName().c_str());
+ rocksdb_manual_compactions_running++;
+ if (rocksdb_debug_manual_compaction_delay > 0) {
+ my_sleep(rocksdb_debug_manual_compaction_delay * 1000000);
+ }
+ // CompactRange may take a very long time. On clean shutdown,
+ // it is cancelled by CancelAllBackgroundWork, then status is
+ // set to shutdownInProgress.
+ const rocksdb::Status s = rdb->CompactRange(
+ getCompactRangeOptions(mcr.concurrency), mcr.cf, mcr.start, mcr.limit);
+ rocksdb_manual_compactions_running--;
+ if (s.ok()) {
+ // NO_LINT_DEBUG
+ sql_print_information("Manual Compaction id %d cf %s ended.", mcr.mc_id,
+ mcr.cf->GetName().c_str());
+ } else {
+ // NO_LINT_DEBUG
+ sql_print_information("Manual Compaction id %d cf %s aborted. %s",
+ mcr.mc_id, mcr.cf->GetName().c_str(), s.getState());
+ if (!s.IsShutdownInProgress()) {
+ rdb_handle_io_error(s, RDB_IO_ERROR_BG_THREAD);
+ } else {
+ DBUG_ASSERT(m_requests.size() == 1);
+ }
+ }
+ rocksdb_manual_compactions_processed++;
+ clear_manual_compaction_request(mcr.mc_id, false);
+ RDB_MUTEX_LOCK_CHECK(m_signal_mutex);
+ }
+ clear_all_manual_compaction_requests();
+ DBUG_ASSERT(m_requests.empty());
+ RDB_MUTEX_UNLOCK_CHECK(m_signal_mutex);
+ mysql_mutex_destroy(&m_mc_mutex);
+}
+
+void Rdb_manual_compaction_thread::clear_all_manual_compaction_requests() {
+ RDB_MUTEX_LOCK_CHECK(m_mc_mutex);
+ m_requests.clear();
+ RDB_MUTEX_UNLOCK_CHECK(m_mc_mutex);
+}
+
+void Rdb_manual_compaction_thread::clear_manual_compaction_request(
+ int mc_id, bool init_only) {
+ bool erase = true;
+ RDB_MUTEX_LOCK_CHECK(m_mc_mutex);
+ auto it = m_requests.find(mc_id);
+ if (it != m_requests.end()) {
+ if (init_only) {
+ Manual_compaction_request mcr = it->second;
+ if (mcr.state != Manual_compaction_request::INITED) {
+ erase = false;
+ }
+ }
+ if (erase) {
+ m_requests.erase(it);
+ }
+ } else {
+ // Current code path guarantees that erasing by the same mc_id happens
+ // at most once. INITED state may be erased by a thread that requested
+ // the compaction. RUNNING state is erased by mc thread only.
+ DBUG_ASSERT(0);
+ }
+ RDB_MUTEX_UNLOCK_CHECK(m_mc_mutex);
+}
+
+int Rdb_manual_compaction_thread::request_manual_compaction(
+ rocksdb::ColumnFamilyHandle *cf, rocksdb::Slice *start,
+ rocksdb::Slice *limit, int concurrency) {
+ int mc_id = -1;
+ RDB_MUTEX_LOCK_CHECK(m_mc_mutex);
+ if (m_requests.size() >= rocksdb_max_manual_compactions) {
+ RDB_MUTEX_UNLOCK_CHECK(m_mc_mutex);
+ return mc_id;
+ }
+ Manual_compaction_request mcr;
+ mc_id = mcr.mc_id = ++m_latest_mc_id;
+ mcr.state = Manual_compaction_request::INITED;
+ mcr.cf = cf;
+ mcr.start = start;
+ mcr.limit = limit;
+ mcr.concurrency = concurrency;
+ m_requests.insert(std::make_pair(mcr.mc_id, mcr));
+ RDB_MUTEX_UNLOCK_CHECK(m_mc_mutex);
+ return mc_id;
+}
+
+bool Rdb_manual_compaction_thread::is_manual_compaction_finished(int mc_id) {
+ bool finished = false;
+ RDB_MUTEX_LOCK_CHECK(m_mc_mutex);
+ if (m_requests.count(mc_id) == 0) {
+ finished = true;
+ }
+ RDB_MUTEX_UNLOCK_CHECK(m_mc_mutex);
+ return finished;
+}
+
+/**
+ * Locking read + Not Found + Read Committed occurs if we accessed
+ * a row by Seek, tried to lock it, failed, released and reacquired the
+ * snapshot (because of READ COMMITTED mode) and the row was deleted by
+ * someone else in the meantime.
+ * If so, we either just skipping the row, or re-creating a snapshot
+ * and seek again. In both cases, Read Committed constraint is not broken.
+ */
+bool ha_rocksdb::should_skip_invalidated_record(const int rc) {
+ if ((m_lock_rows != RDB_LOCK_NONE && rc == HA_ERR_KEY_NOT_FOUND &&
+ my_core::thd_tx_isolation(ha_thd()) == ISO_READ_COMMITTED)) {
+ return true;
+ }
+ return false;
+}
+/**
+ * Indicating snapshot needs to be re-created and retrying seek again,
+ * instead of returning errors or empty set. This is normally applicable
+ * when hitting kBusy when locking the first row of the transaction,
+ * with Repeatable Read isolation level.
+ */
+bool ha_rocksdb::should_recreate_snapshot(const int rc,
+ const bool is_new_snapshot) {
+ if (should_skip_invalidated_record(rc) ||
+ (rc == HA_ERR_ROCKSDB_STATUS_BUSY && is_new_snapshot)) {
+ return true;
+ }
+ return false;
+}
+
+/**
+ * If calling put/delete/singledelete without locking the row,
+ * it is necessary to pass assume_tracked=false to RocksDB TX API.
+ * Read Free Replication and Blind Deletes are the cases when
+ * using TX API and skipping row locking.
+ */
+bool ha_rocksdb::can_assume_tracked(THD *thd) {
+ if (/* MARIAROCKS_NOT_YET use_read_free_rpl() ||*/ (THDVAR(thd, blind_delete_primary_key))) {
+ return false;
+ }
+ return true;
+}
+
+bool ha_rocksdb::check_bloom_and_set_bounds(
+ THD *thd, const Rdb_key_def &kd, const rocksdb::Slice &eq_cond,
+ const bool use_all_keys, size_t bound_len, uchar *const lower_bound,
+ uchar *const upper_bound, rocksdb::Slice *lower_bound_slice,
+ rocksdb::Slice *upper_bound_slice) {
+ bool can_use_bloom = can_use_bloom_filter(thd, kd, eq_cond, use_all_keys);
+ if (!can_use_bloom) {
+ setup_iterator_bounds(kd, eq_cond, bound_len, lower_bound, upper_bound,
+ lower_bound_slice, upper_bound_slice);
+ }
+ return can_use_bloom;
+}
+
+/**
+ Deciding if it is possible to use bloom filter or not.
+
+ @detail
+ Even if bloom filter exists, it is not always possible
+ to use bloom filter. If using bloom filter when you shouldn't,
+ false negative may happen -- fewer rows than expected may be returned.
+ It is users' responsibility to use bloom filter correctly.
+
+ If bloom filter does not exist, return value does not matter because
+ RocksDB does not use bloom filter internally.
+
+ @param kd
+ @param eq_cond Equal condition part of the key. This always includes
+ system index id (4 bytes).
+ @param use_all_keys True if all key parts are set with equal conditions.
+ This is aware of extended keys.
+*/
+bool ha_rocksdb::can_use_bloom_filter(THD *thd, const Rdb_key_def &kd,
+ const rocksdb::Slice &eq_cond,
+ const bool use_all_keys) {
+ bool can_use = false;
+
+ if (THDVAR(thd, skip_bloom_filter_on_read)) {
+ return can_use;
+ }
+
+ const rocksdb::SliceTransform *prefix_extractor = kd.get_extractor();
+ if (prefix_extractor) {
+ /*
+ This is an optimized use case for CappedPrefixTransform.
+ If eq_cond length >= prefix extractor length and if
+ all keys are used for equal lookup, it is
+ always possible to use bloom filter.
+
+ Prefix bloom filter can't be used on descending scan with
+ prefix lookup (i.e. WHERE id1=1 ORDER BY id2 DESC), because of
+ RocksDB's limitation. On ascending (or not sorting) scan,
+ keys longer than the capped prefix length will be truncated down
+ to the capped length and the resulting key is added to the bloom filter.
+
+ Keys shorter than the capped prefix length will be added to
+ the bloom filter. When keys are looked up, key conditionals
+ longer than the capped length can be used; key conditionals
+ shorter require all parts of the key to be available
+ for the short key match.
+ */
+ if ((use_all_keys && prefix_extractor->InRange(eq_cond)) ||
+ prefix_extractor->SameResultWhenAppended(eq_cond)) {
+ can_use = true;
+ } else {
+ can_use = false;
+ }
+ } else {
+ /*
+ if prefix extractor is not defined, all key parts have to be
+ used by eq_cond.
+ */
+ if (use_all_keys) {
+ can_use = true;
+ } else {
+ can_use = false;
+ }
+ }
+
+ return can_use;
+}
+
+/* For modules that need access to the global data structures */
+rocksdb::TransactionDB *rdb_get_rocksdb_db() { return rdb; }
+
+Rdb_cf_manager &rdb_get_cf_manager() { return cf_manager; }
+
+const rocksdb::BlockBasedTableOptions &rdb_get_table_options() {
+ return *rocksdb_tbl_options;
+}
+
+bool rdb_is_ttl_enabled() { return rocksdb_enable_ttl; }
+bool rdb_is_ttl_read_filtering_enabled() {
+ return rocksdb_enable_ttl_read_filtering;
+}
+#ifndef DBUG_OFF
+int rdb_dbug_set_ttl_rec_ts() { return rocksdb_debug_ttl_rec_ts; }
+int rdb_dbug_set_ttl_snapshot_ts() { return rocksdb_debug_ttl_snapshot_ts; }
+int rdb_dbug_set_ttl_read_filter_ts() {
+ return rocksdb_debug_ttl_read_filter_ts;
+}
+bool rdb_dbug_set_ttl_ignore_pk() { return rocksdb_debug_ttl_ignore_pk; }
+#endif
+
+void rdb_update_global_stats(const operation_type &type, uint count,
+ bool is_system_table) {
+ DBUG_ASSERT(type < ROWS_MAX);
+
+ if (count == 0) {
+ return;
+ }
+
+ if (is_system_table) {
+ global_stats.system_rows[type].add(count);
+ } else {
+ global_stats.rows[type].add(count);
+ }
+}
+
+int rdb_get_table_perf_counters(const char *const tablename,
+ Rdb_perf_counters *const counters) {
+ DBUG_ASSERT(tablename != nullptr);
+
+ Rdb_table_handler *table_handler;
+ table_handler = rdb_open_tables.get_table_handler(tablename);
+ if (table_handler == nullptr) {
+ return HA_ERR_ROCKSDB_INVALID_TABLE;
+ }
+
+ counters->load(table_handler->m_table_perf_context);
+
+ rdb_open_tables.release_table_handler(table_handler);
+ return HA_EXIT_SUCCESS;
+}
+
+const char *get_rdb_io_error_string(const RDB_IO_ERROR_TYPE err_type) {
+ // If this assertion fails then this means that a member has been either added
+ // to or removed from RDB_IO_ERROR_TYPE enum and this function needs to be
+ // changed to return the appropriate value.
+ static_assert(RDB_IO_ERROR_LAST == 4, "Please handle all the error types.");
+
+ switch (err_type) {
+ case RDB_IO_ERROR_TYPE::RDB_IO_ERROR_TX_COMMIT:
+ return "RDB_IO_ERROR_TX_COMMIT";
+ case RDB_IO_ERROR_TYPE::RDB_IO_ERROR_DICT_COMMIT:
+ return "RDB_IO_ERROR_DICT_COMMIT";
+ case RDB_IO_ERROR_TYPE::RDB_IO_ERROR_BG_THREAD:
+ return "RDB_IO_ERROR_BG_THREAD";
+ case RDB_IO_ERROR_TYPE::RDB_IO_ERROR_GENERAL:
+ return "RDB_IO_ERROR_GENERAL";
+ default:
+ DBUG_ASSERT(false);
+ return "(unknown)";
+ }
+}
+
+// In case of core dump generation we want this function NOT to be optimized
+// so that we can capture as much data as possible to debug the root cause
+// more efficiently.
+#ifdef __GNUC__
+#endif
+void rdb_handle_io_error(const rocksdb::Status status,
+ const RDB_IO_ERROR_TYPE err_type) {
+ if (status.IsIOError()) {
+ /* skip dumping core if write failed and we are allowed to do so */
+#ifdef MARIAROCKS_NOT_YET
+ if (skip_core_dump_on_error) {
+ opt_core_file = false;
+ }
+#endif
+ switch (err_type) {
+ case RDB_IO_ERROR_TX_COMMIT:
+ case RDB_IO_ERROR_DICT_COMMIT: {
+ rdb_log_status_error(status, "failed to write to WAL");
+ /* NO_LINT_DEBUG */
+ sql_print_error("MyRocks: aborting on WAL write error.");
+ abort();
+ break;
+ }
+ case RDB_IO_ERROR_BG_THREAD: {
+ rdb_log_status_error(status, "BG thread failed to write to RocksDB");
+ /* NO_LINT_DEBUG */
+ sql_print_error("MyRocks: aborting on BG write error.");
+ abort();
+ break;
+ }
+ case RDB_IO_ERROR_GENERAL: {
+ rdb_log_status_error(status, "failed on I/O");
+ /* NO_LINT_DEBUG */
+ sql_print_error("MyRocks: aborting on I/O error.");
+ abort();
+ break;
+ }
+ default:
+ DBUG_ASSERT(0);
+ break;
+ }
+ } else if (status.IsCorruption()) {
+ rdb_log_status_error(status, "data corruption detected!");
+ rdb_persist_corruption_marker();
+ /* NO_LINT_DEBUG */
+ sql_print_error("MyRocks: aborting because of data corruption.");
+ abort();
+ } else if (!status.ok()) {
+ switch (err_type) {
+ case RDB_IO_ERROR_DICT_COMMIT: {
+ rdb_log_status_error(status, "Failed to write to WAL (dictionary)");
+ /* NO_LINT_DEBUG */
+ sql_print_error("MyRocks: aborting on WAL write error.");
+ abort();
+ break;
+ }
+ default:
+ rdb_log_status_error(status, "Failed to read/write in RocksDB");
+ break;
+ }
+ }
+}
+#ifdef __GNUC__
+#endif
+Rdb_dict_manager *rdb_get_dict_manager(void) { return &dict_manager; }
+
+Rdb_ddl_manager *rdb_get_ddl_manager(void) { return &ddl_manager; }
+
+Rdb_binlog_manager *rdb_get_binlog_manager(void) { return &binlog_manager; }
+
+void rocksdb_set_compaction_options(
+ my_core::THD *const thd MY_ATTRIBUTE((__unused__)),
+ my_core::st_mysql_sys_var *const var MY_ATTRIBUTE((__unused__)),
+ void *const var_ptr, const void *const save) {
+ if (var_ptr && save) {
+ *(uint64_t *)var_ptr = *(const uint64_t *)save;
+ }
+ const Rdb_compact_params params = {
+ (uint64_t)rocksdb_compaction_sequential_deletes,
+ (uint64_t)rocksdb_compaction_sequential_deletes_window,
+ (uint64_t)rocksdb_compaction_sequential_deletes_file_size};
+ if (properties_collector_factory) {
+ properties_collector_factory->SetCompactionParams(params);
+ }
+}
+
+void rocksdb_set_table_stats_sampling_pct(
+ my_core::THD *const thd MY_ATTRIBUTE((__unused__)),
+ my_core::st_mysql_sys_var *const var MY_ATTRIBUTE((__unused__)),
+ void *const var_ptr MY_ATTRIBUTE((__unused__)), const void *const save) {
+ RDB_MUTEX_LOCK_CHECK(rdb_sysvars_mutex);
+
+ const uint32_t new_val = *static_cast<const uint32_t *>(save);
+
+ if (new_val != rocksdb_table_stats_sampling_pct) {
+ rocksdb_table_stats_sampling_pct = new_val;
+
+ if (properties_collector_factory) {
+ properties_collector_factory->SetTableStatsSamplingPct(
+ rocksdb_table_stats_sampling_pct);
+ }
+ }
+
+ RDB_MUTEX_UNLOCK_CHECK(rdb_sysvars_mutex);
+}
+
+/*
+ This function allows setting the rate limiter's bytes per second value
+ but only if the rate limiter is turned on which has to be done at startup.
+ If the rate is already 0 (turned off) or we are changing it to 0 (trying
+ to turn it off) this function will push a warning to the client and do
+ nothing.
+ This is similar to the code in innodb_doublewrite_update (found in
+ storage/innobase/handler/ha_innodb.cc).
+*/
+void rocksdb_set_rate_limiter_bytes_per_sec(
+ my_core::THD *const thd,
+ my_core::st_mysql_sys_var *const var MY_ATTRIBUTE((__unused__)),
+ void *const var_ptr MY_ATTRIBUTE((__unused__)), const void *const save) {
+ const uint64_t new_val = *static_cast<const uint64_t *>(save);
+ if (new_val == 0 || rocksdb_rate_limiter_bytes_per_sec == 0) {
+ /*
+ If a rate_limiter was not enabled at startup we can't change it nor
+ can we disable it if one was created at startup
+ */
+ push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, ER_WRONG_ARGUMENTS,
+ "RocksDB: rocksdb_rate_limiter_bytes_per_sec cannot "
+ "be dynamically changed to or from 0. Do a clean "
+ "shutdown if you want to change it from or to 0.");
+ } else if (new_val != rocksdb_rate_limiter_bytes_per_sec) {
+ /* Apply the new value to the rate limiter and store it locally */
+ DBUG_ASSERT(rocksdb_rate_limiter != nullptr);
+ rocksdb_rate_limiter_bytes_per_sec = new_val;
+ rocksdb_rate_limiter->SetBytesPerSecond(new_val);
+ }
+}
+
+void rocksdb_set_sst_mgr_rate_bytes_per_sec(
+ my_core::THD *const thd,
+ my_core::st_mysql_sys_var *const var MY_ATTRIBUTE((__unused__)),
+ void *const var_ptr MY_ATTRIBUTE((__unused__)), const void *const save) {
+ RDB_MUTEX_LOCK_CHECK(rdb_sysvars_mutex);
+
+ const uint64_t new_val = *static_cast<const uint64_t *>(save);
+
+ if (new_val != rocksdb_sst_mgr_rate_bytes_per_sec) {
+ rocksdb_sst_mgr_rate_bytes_per_sec = new_val;
+
+ rocksdb_db_options->sst_file_manager->SetDeleteRateBytesPerSecond(
+ rocksdb_sst_mgr_rate_bytes_per_sec);
+ }
+
+ RDB_MUTEX_UNLOCK_CHECK(rdb_sysvars_mutex);
+}
+
+void rocksdb_set_delayed_write_rate(THD *thd, struct st_mysql_sys_var *var,
+ void *var_ptr, const void *save) {
+ RDB_MUTEX_LOCK_CHECK(rdb_sysvars_mutex);
+ const uint64_t new_val = *static_cast<const uint64_t *>(save);
+ if (rocksdb_delayed_write_rate != new_val) {
+ rocksdb_delayed_write_rate = new_val;
+ rocksdb::Status s =
+ rdb->SetDBOptions({{"delayed_write_rate", std::to_string(new_val)}});
+
+ if (!s.ok()) {
+ /* NO_LINT_DEBUG */
+ sql_print_warning(
+ "MyRocks: failed to update delayed_write_rate. "
+ "status code = %d, status = %s",
+ s.code(), s.ToString().c_str());
+ }
+ }
+ RDB_MUTEX_UNLOCK_CHECK(rdb_sysvars_mutex);
+}
+
+void rocksdb_set_max_latest_deadlocks(THD *thd, struct st_mysql_sys_var *var,
+ void *var_ptr, const void *save) {
+ RDB_MUTEX_LOCK_CHECK(rdb_sysvars_mutex);
+ const uint32_t new_val = *static_cast<const uint32_t *>(save);
+ if (rocksdb_max_latest_deadlocks != new_val) {
+ rocksdb_max_latest_deadlocks = new_val;
+ rdb->SetDeadlockInfoBufferSize(rocksdb_max_latest_deadlocks);
+ }
+ RDB_MUTEX_UNLOCK_CHECK(rdb_sysvars_mutex);
+}
+
+void rdb_set_collation_exception_list(const char *const exception_list) {
+ DBUG_ASSERT(rdb_collation_exceptions != nullptr);
+
+ if (!rdb_collation_exceptions->set_patterns(exception_list)) {
+ my_core::warn_about_bad_patterns(rdb_collation_exceptions,
+ "strict_collation_exceptions");
+ }
+}
+
+void rocksdb_set_collation_exception_list(THD *const thd,
+ struct st_mysql_sys_var *const var,
+ void *const var_ptr,
+ const void *const save) {
+ const char *const val = *static_cast<const char *const *>(save);
+
+ rdb_set_collation_exception_list(val == nullptr ? "" : val);
+
+ //psergey-todo: what is the purpose of the below??
+ const char *val_copy= val? my_strdup(val, MYF(0)): nullptr;
+ my_free(*static_cast<char**>(var_ptr));
+ *static_cast<const char**>(var_ptr) = val_copy;
+}
+
+int mysql_value_to_bool(struct st_mysql_value *value, my_bool *return_value) {
+ int new_value_type = value->value_type(value);
+ if (new_value_type == MYSQL_VALUE_TYPE_STRING) {
+ char buf[16];
+ int len = sizeof(buf);
+ const char *str = value->val_str(value, buf, &len);
+ if (str && (my_strcasecmp(system_charset_info, "true", str) == 0 ||
+ my_strcasecmp(system_charset_info, "on", str) == 0)) {
+ *return_value = TRUE;
+ } else if (str && (my_strcasecmp(system_charset_info, "false", str) == 0 ||
+ my_strcasecmp(system_charset_info, "off", str) == 0)) {
+ *return_value = FALSE;
+ } else {
+ return 1;
+ }
+ } else if (new_value_type == MYSQL_VALUE_TYPE_INT) {
+ long long intbuf;
+ value->val_int(value, &intbuf);
+ if (intbuf > 1) return 1;
+ *return_value = intbuf > 0 ? TRUE : FALSE;
+ } else {
+ return 1;
+ }
+
+ return 0;
+}
+
+int rocksdb_check_bulk_load(
+ THD *const thd, struct st_mysql_sys_var *var MY_ATTRIBUTE((__unused__)),
+ void *save, struct st_mysql_value *value) {
+ my_bool new_value;
+ if (mysql_value_to_bool(value, &new_value) != 0) {
+ return 1;
+ }
+
+ Rdb_transaction *tx = get_tx_from_thd(thd);
+ if (tx != nullptr) {
+ bool is_critical_error;
+ const int rc = tx->finish_bulk_load(&is_critical_error);
+ if (rc != 0 && is_critical_error) {
+ // NO_LINT_DEBUG
+ sql_print_error(
+ "RocksDB: Error %d finalizing last SST file while "
+ "setting bulk loading variable",
+ rc);
+ THDVAR(thd, bulk_load) = 0;
+ return 1;
+ }
+ }
+
+ *static_cast<bool *>(save) = new_value;
+ return 0;
+}
+
+int rocksdb_check_bulk_load_allow_unsorted(
+ THD *const thd, struct st_mysql_sys_var *var MY_ATTRIBUTE((__unused__)),
+ void *save, struct st_mysql_value *value) {
+ my_bool new_value;
+ if (mysql_value_to_bool(value, &new_value) != 0) {
+ return 1;
+ }
+
+ if (THDVAR(thd, bulk_load)) {
+ my_error(ER_ERROR_WHEN_EXECUTING_COMMAND, MYF(0), "SET",
+ "Cannot change this setting while bulk load is enabled");
+
+ return 1;
+ }
+
+ *static_cast<bool *>(save) = new_value;
+ return 0;
+}
+
+static void rocksdb_set_max_background_jobs(THD *thd,
+ struct st_mysql_sys_var *const var,
+ void *const var_ptr,
+ const void *const save) {
+ DBUG_ASSERT(save != nullptr);
+ DBUG_ASSERT(rocksdb_db_options != nullptr);
+ DBUG_ASSERT(rocksdb_db_options->env != nullptr);
+
+ RDB_MUTEX_LOCK_CHECK(rdb_sysvars_mutex);
+
+ const int new_val = *static_cast<const int *>(save);
+
+ if (rocksdb_db_options->max_background_jobs != new_val) {
+ rocksdb_db_options->max_background_jobs = new_val;
+ rocksdb::Status s =
+ rdb->SetDBOptions({{"max_background_jobs", std::to_string(new_val)}});
+
+ if (!s.ok()) {
+ /* NO_LINT_DEBUG */
+ sql_print_warning(
+ "MyRocks: failed to update max_background_jobs. "
+ "Status code = %d, status = %s.",
+ s.code(), s.ToString().c_str());
+ }
+ }
+
+ RDB_MUTEX_UNLOCK_CHECK(rdb_sysvars_mutex);
+}
+
+static void rocksdb_set_bytes_per_sync(
+ THD *thd MY_ATTRIBUTE((__unused__)),
+ struct st_mysql_sys_var *const var MY_ATTRIBUTE((__unused__)),
+ void *const var_ptr MY_ATTRIBUTE((__unused__)), const void *const save) {
+ DBUG_ASSERT(save != nullptr);
+ DBUG_ASSERT(rocksdb_db_options != nullptr);
+ DBUG_ASSERT(rocksdb_db_options->env != nullptr);
+
+ RDB_MUTEX_LOCK_CHECK(rdb_sysvars_mutex);
+
+ const ulonglong new_val = *static_cast<const ulonglong *>(save);
+
+ if (rocksdb_db_options->bytes_per_sync != new_val) {
+ rocksdb_db_options->bytes_per_sync = new_val;
+ rocksdb::Status s =
+ rdb->SetDBOptions({{"bytes_per_sync", std::to_string(new_val)}});
+
+ if (!s.ok()) {
+ /* NO_LINT_DEBUG */
+ sql_print_warning(
+ "MyRocks: failed to update max_background_jobs. "
+ "Status code = %d, status = %s.",
+ s.code(), s.ToString().c_str());
+ }
+ }
+
+ RDB_MUTEX_UNLOCK_CHECK(rdb_sysvars_mutex);
+}
+
+static void rocksdb_set_wal_bytes_per_sync(
+ THD *thd MY_ATTRIBUTE((__unused__)),
+ struct st_mysql_sys_var *const var MY_ATTRIBUTE((__unused__)),
+ void *const var_ptr MY_ATTRIBUTE((__unused__)), const void *const save) {
+ DBUG_ASSERT(save != nullptr);
+ DBUG_ASSERT(rocksdb_db_options != nullptr);
+ DBUG_ASSERT(rocksdb_db_options->env != nullptr);
+
+ RDB_MUTEX_LOCK_CHECK(rdb_sysvars_mutex);
+
+ const ulonglong new_val = *static_cast<const ulonglong *>(save);
+
+ if (rocksdb_db_options->wal_bytes_per_sync != new_val) {
+ rocksdb_db_options->wal_bytes_per_sync = new_val;
+ rocksdb::Status s =
+ rdb->SetDBOptions({{"wal_bytes_per_sync", std::to_string(new_val)}});
+
+ if (!s.ok()) {
+ /* NO_LINT_DEBUG */
+ sql_print_warning(
+ "MyRocks: failed to update max_background_jobs. "
+ "Status code = %d, status = %s.",
+ s.code(), s.ToString().c_str());
+ }
+ }
+
+ RDB_MUTEX_UNLOCK_CHECK(rdb_sysvars_mutex);
+}
+
+/*
+ Validating and updating block cache size via sys_var::check path.
+ SetCapacity may take seconds when reducing block cache, and
+ sys_var::update holds LOCK_global_system_variables mutex, so
+ updating block cache size is done at check path instead.
+*/
+static int rocksdb_validate_set_block_cache_size(
+ THD *thd MY_ATTRIBUTE((__unused__)),
+ struct st_mysql_sys_var *const var MY_ATTRIBUTE((__unused__)),
+ void *var_ptr, struct st_mysql_value *value) {
+ DBUG_ASSERT(value != nullptr);
+
+ long long new_value;
+
+ /* value is NULL */
+ if (value->val_int(value, &new_value)) {
+ return HA_EXIT_FAILURE;
+ }
+
+ if (new_value < RDB_MIN_BLOCK_CACHE_SIZE ||
+ (uint64_t)new_value > (uint64_t)LLONG_MAX) {
+ return HA_EXIT_FAILURE;
+ }
+
+ RDB_MUTEX_LOCK_CHECK(rdb_block_cache_resize_mutex);
+ const rocksdb::BlockBasedTableOptions &table_options =
+ rdb_get_table_options();
+
+ if (rocksdb_block_cache_size != new_value && table_options.block_cache) {
+ table_options.block_cache->SetCapacity(new_value);
+ }
+ *static_cast<int64_t *>(var_ptr) = static_cast<int64_t>(new_value);
+ RDB_MUTEX_UNLOCK_CHECK(rdb_block_cache_resize_mutex);
+ return HA_EXIT_SUCCESS;
+}
+
+static int rocksdb_validate_update_cf_options(
+ THD * /* unused */, struct st_mysql_sys_var * /*unused*/, void *save,
+ struct st_mysql_value *value) {
+ char buff[STRING_BUFFER_USUAL_SIZE];
+ const char *str;
+ int length;
+ length = sizeof(buff);
+ str = value->val_str(value, buff, &length);
+ // In some cases, str can point to buff in the stack.
+ // This can cause invalid memory access after validation is finished.
+ // To avoid this kind case, let's alway duplicate the str if str is not
+ // nullptr
+ *(const char **)save = (str == nullptr) ? nullptr : my_strdup(str, MYF(0));
+
+ if (str == nullptr) {
+ return HA_EXIT_SUCCESS;
+ }
+
+ Rdb_cf_options::Name_to_config_t option_map;
+
+ // Basic sanity checking and parsing the options into a map. If this fails
+ // then there's no point to proceed.
+ if (!Rdb_cf_options::parse_cf_options(str, &option_map)) {
+ my_error(ER_WRONG_VALUE_FOR_VAR, MYF(0), "rocksdb_update_cf_options", str);
+ // Free what we've copied with my_strdup above.
+ my_free((void*)(*(const char **)save));
+ return HA_EXIT_FAILURE;
+ }
+ // Loop through option_map and create missing column families
+ for (Rdb_cf_options::Name_to_config_t::iterator it = option_map.begin();
+ it != option_map.end(); ++it) {
+ cf_manager.get_or_create_cf(rdb, it->first);
+ }
+ return HA_EXIT_SUCCESS;
+}
+
+static void rocksdb_set_update_cf_options(
+ THD *const /* unused */, struct st_mysql_sys_var *const /* unused */,
+ void *const var_ptr, const void *const save) {
+ const char *const val = *static_cast<const char *const *>(save);
+
+ RDB_MUTEX_LOCK_CHECK(rdb_sysvars_mutex);
+
+ my_free(*reinterpret_cast<void **>(var_ptr));
+
+ if (!val) {
+ *reinterpret_cast<char **>(var_ptr) = nullptr;
+ RDB_MUTEX_UNLOCK_CHECK(rdb_sysvars_mutex);
+ return;
+ }
+
+ DBUG_ASSERT(val != nullptr);
+
+ // Reset the pointers regardless of how much success we had with updating
+ // the CF options. This will results in consistent behavior and avoids
+ // dealing with cases when only a subset of CF-s was successfully updated.
+ *reinterpret_cast<const char **>(var_ptr) = val;
+
+ // Do the real work of applying the changes.
+ Rdb_cf_options::Name_to_config_t option_map;
+
+ // This should never fail, because of rocksdb_validate_update_cf_options
+ if (!Rdb_cf_options::parse_cf_options(val, &option_map)) {
+ my_free(*reinterpret_cast<char**>(var_ptr));
+ RDB_MUTEX_UNLOCK_CHECK(rdb_sysvars_mutex);
+ return;
+ }
+
+ // For each CF we have, see if we need to update any settings.
+ for (const auto &cf_name : cf_manager.get_cf_names()) {
+ DBUG_ASSERT(!cf_name.empty());
+
+ rocksdb::ColumnFamilyHandle *cfh = cf_manager.get_cf(cf_name);
+ DBUG_ASSERT(cfh != nullptr);
+
+ const auto it = option_map.find(cf_name);
+ std::string per_cf_options = (it != option_map.end()) ? it->second : "";
+
+ if (!per_cf_options.empty()) {
+ Rdb_cf_options::Name_to_config_t opt_map;
+ rocksdb::Status s = rocksdb::StringToMap(per_cf_options, &opt_map);
+
+ if (s != rocksdb::Status::OK()) {
+ // NO_LINT_DEBUG
+ sql_print_warning(
+ "MyRocks: failed to convert the options for column "
+ "family '%s' to a map. %s",
+ cf_name.c_str(), s.ToString().c_str());
+ } else {
+ DBUG_ASSERT(rdb != nullptr);
+
+ // Finally we can apply the options.
+ s = rdb->SetOptions(cfh, opt_map);
+
+ if (s != rocksdb::Status::OK()) {
+ // NO_LINT_DEBUG
+ sql_print_warning(
+ "MyRocks: failed to apply the options for column "
+ "family '%s'. %s",
+ cf_name.c_str(), s.ToString().c_str());
+ } else {
+ // NO_LINT_DEBUG
+ sql_print_information(
+ "MyRocks: options for column family '%s' "
+ "have been successfully updated.",
+ cf_name.c_str());
+
+ // Make sure that data is internally consistent as well and update
+ // the CF options. This is necessary also to make sure that the CF
+ // options will be correctly reflected in the relevant table:
+ // ROCKSDB_CF_OPTIONS in INFORMATION_SCHEMA.
+ rocksdb::ColumnFamilyOptions cf_options = rdb->GetOptions(cfh);
+ std::string updated_options;
+
+ s = rocksdb::GetStringFromColumnFamilyOptions(&updated_options,
+ cf_options);
+
+ DBUG_ASSERT(s == rocksdb::Status::OK());
+ DBUG_ASSERT(!updated_options.empty());
+
+ cf_manager.update_options_map(cf_name, updated_options);
+ }
+ }
+ }
+ }
+
+ // Our caller (`plugin_var_memalloc_global_update`) will call `my_free` to
+ // free up resources used before.
+
+ RDB_MUTEX_UNLOCK_CHECK(rdb_sysvars_mutex);
+}
+
+void rdb_queue_save_stats_request() { rdb_bg_thread.request_save_stats(); }
+
+#ifdef MARIAROCKS_NOT_YET // MDEV-10976
+
+void ha_rocksdb::rpl_before_delete_rows() {
+ DBUG_ENTER_FUNC();
+
+ m_in_rpl_delete_rows = true;
+
+ DBUG_VOID_RETURN;
+}
+
+void ha_rocksdb::rpl_after_delete_rows() {
+ DBUG_ENTER_FUNC();
+
+ m_in_rpl_delete_rows = false;
+
+ DBUG_VOID_RETURN;
+}
+
+void ha_rocksdb::rpl_before_update_rows() {
+ DBUG_ENTER_FUNC();
+
+ m_in_rpl_update_rows = true;
+
+ DBUG_VOID_RETURN;
+}
+
+void ha_rocksdb::rpl_after_update_rows() {
+ DBUG_ENTER_FUNC();
+
+ m_in_rpl_update_rows = false;
+
+ DBUG_VOID_RETURN;
+}
+
+#if 0
+bool ha_rocksdb::is_read_free_rpl_table() const {
+ return table->s && m_tbl_def->m_is_read_free_rpl_table;
+}
+#endif
+
+/**
+ @brief
+ Read Free Replication can be used or not. Returning true means
+ Read Free Replication can be used.
+*/
+bool ha_rocksdb::use_read_free_rpl() const {
+ DBUG_ENTER_FUNC();
+
+ if (!ha_thd()->rli_slave || table->triggers || /* !is_read_free_rpl_table()*/ ) {
+ DBUG_RETURN(false);
+ }
+
+#if 0 // MARIAROCKS_NOT_YET : read-free replication is not supported
+ switch (rocksdb_read_free_rpl) {
+ case read_free_rpl_type::OFF:
+ DBUG_RETURN(false);
+ case read_free_rpl_type::PK_ONLY:
+ DBUG_RETURN(!has_hidden_pk(table) && table->s->keys == 1);
+ case read_free_rpl_type::PK_SK:
+ DBUG_RETURN(!has_hidden_pk(table));
+ }
+#else
+ DBUG_RETURN(false);
+#endif
+
+ DBUG_ASSERT(false);
+ DBUG_RETURN(false);
+}
+#endif // MARIAROCKS_NOT_YET
+
+double ha_rocksdb::read_time(uint index, uint ranges, ha_rows rows) {
+ DBUG_ENTER_FUNC();
+
+ if (index != table->s->primary_key) {
+ /* Non covering index range scan */
+ DBUG_RETURN(handler::read_time(index, ranges, rows));
+ }
+
+ DBUG_RETURN((rows / 20.0) + 1);
+}
+
+void ha_rocksdb::print_error(int error, myf errflag) {
+ if (error == HA_ERR_ROCKSDB_STATUS_BUSY) {
+ error = HA_ERR_LOCK_DEADLOCK;
+ }
+ handler::print_error(error, errflag);
+}
+
+std::string rdb_corruption_marker_file_name() {
+ std::string ret(rocksdb_datadir);
+ ret.append("/ROCKSDB_CORRUPTED");
+ return ret;
+}
+
+void sql_print_verbose_info(const char *format, ...)
+{
+ va_list args;
+
+ if (global_system_variables.log_warnings > 2) {
+ va_start(args, format);
+ sql_print_information_v(format, args);
+ va_end(args);
+ }
+}
+
+} // namespace myrocks
+
+
+/**
+ Construct and emit duplicate key error message using information
+ from table's record buffer.
+
+ @sa print_keydup_error(table, key, msg, errflag, thd, org_table_name).
+*/
+
+void print_keydup_error(TABLE *table, KEY *key, myf errflag,
+ const THD *thd, const char *org_table_name)
+{
+ print_keydup_error(table, key, ER(ER_DUP_ENTRY_WITH_KEY_NAME), errflag);
+}
+
+/*
+ Register the storage engine plugin outside of myrocks namespace
+ so that mysql_declare_plugin does not get confused when it does
+ its name generation.
+*/
+
+
+struct st_mysql_storage_engine rocksdb_storage_engine = {
+ MYSQL_HANDLERTON_INTERFACE_VERSION};
+
+maria_declare_plugin(rocksdb_se){
+ MYSQL_STORAGE_ENGINE_PLUGIN, /* Plugin Type */
+ &rocksdb_storage_engine, /* Plugin Descriptor */
+ "ROCKSDB", /* Plugin Name */
+ "Monty Program Ab", /* Plugin Author */
+ "RocksDB storage engine", /* Plugin Description */
+ PLUGIN_LICENSE_GPL, /* Plugin Licence */
+ myrocks::rocksdb_init_func, /* Plugin Entry Point */
+ myrocks::rocksdb_done_func, /* Plugin Deinitializer */
+ 0x0001, /* version number (0.1) */
+ myrocks::rocksdb_status_vars, /* status variables */
+ myrocks::rocksdb_system_variables, /* system variables */
+ "1.0", /* string version */
+ myrocks::MYROCKS_MARIADB_PLUGIN_MATURITY_LEVEL
+},
+ myrocks::rdb_i_s_cfstats, myrocks::rdb_i_s_dbstats,
+ myrocks::rdb_i_s_perf_context, myrocks::rdb_i_s_perf_context_global,
+ myrocks::rdb_i_s_cfoptions, myrocks::rdb_i_s_compact_stats,
+ myrocks::rdb_i_s_global_info, myrocks::rdb_i_s_ddl,
+ myrocks::rdb_i_s_sst_props, myrocks::rdb_i_s_index_file_map,
+ myrocks::rdb_i_s_lock_info, myrocks::rdb_i_s_trx_info,
+ myrocks::rdb_i_s_deadlock_info
+maria_declare_plugin_end;
diff --git a/storage/rocksdb/ha_rocksdb.h b/storage/rocksdb/ha_rocksdb.h
new file mode 100644
index 00000000000..437c8667994
--- /dev/null
+++ b/storage/rocksdb/ha_rocksdb.h
@@ -0,0 +1,1067 @@
+/*
+ Copyright (c) 2012,2013 Monty Program Ab
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111-1301 USA */
+#pragma once
+
+#ifdef USE_PRAGMA_INTERFACE
+#pragma interface /* gcc class implementation */
+#endif
+
+/* C++ standard header files */
+#include <set>
+#include <string>
+#include <unordered_map>
+#include <unordered_set>
+#include <vector>
+
+/* MySQL header files */
+#include "./handler.h" /* handler */
+#include "./my_global.h" /* ulonglong */
+#include "./sql_string.h"
+
+#ifdef _WIN32
+#undef pthread_key_create
+#undef pthread_key_delete
+#undef pthread_setspecific
+#undef pthread_getspecific
+#endif
+
+
+/* RocksDB header files */
+#include "rocksdb/cache.h"
+#include "rocksdb/merge_operator.h"
+#include "rocksdb/perf_context.h"
+#include "rocksdb/sst_file_manager.h"
+#include "rocksdb/statistics.h"
+#include "rocksdb/utilities/options_util.h"
+#include "rocksdb/utilities/transaction_db.h"
+#include "rocksdb/utilities/write_batch_with_index.h"
+
+/* MyRocks header files */
+#include "./rdb_buff.h"
+#include "./rdb_comparator.h"
+#include "./rdb_global.h"
+#include "./rdb_index_merge.h"
+#include "./rdb_io_watchdog.h"
+#include "./rdb_perf_context.h"
+#include "./rdb_sst_info.h"
+#include "./rdb_utils.h"
+#include "./ut0counter.h"
+
+#include "rdb_mariadb_server_port.h"
+
+class Field_blob;
+class Field_varstring;
+/**
+ @note MyRocks Coding Conventions:
+ MyRocks code follows the baseline MySQL coding conventions, available at
+ http://dev.mysql.com/doc/internals/en/coding-guidelines.html, with several
+ refinements (@see /storage/rocksdb/README file).
+*/
+
+namespace myrocks {
+
+class Rdb_converter;
+class Rdb_key_def;
+class Rdb_tbl_def;
+class Rdb_transaction;
+class Rdb_transaction_impl;
+class Rdb_writebatch_impl;
+class Rdb_field_encoder;
+/* collations, used in MariaRocks */
+enum collations_used {
+ COLLATION_UTF8MB4_BIN = 46,
+ COLLATION_LATIN1_BIN = 47,
+ COLLATION_UTF16LE_BIN = 55,
+ COLLATION_UTF32_BIN = 61,
+ COLLATION_UTF16_BIN = 62,
+ COLLATION_BINARY = 63,
+ COLLATION_UTF8_BIN = 83
+};
+
+#if 0 // MARIAROCKS_NOT_YET : read-free replication is not supported
+extern char *rocksdb_read_free_rpl_tables;
+#if defined(HAVE_PSI_INTERFACE)
+extern PSI_rwlock_key key_rwlock_read_free_rpl_tables;
+#endif
+extern Regex_list_handler rdb_read_free_regex_handler;
+#endif
+
+/**
+ @brief
+ Rdb_table_handler is a reference-counted structure storing information for
+ each open table. All the objects are stored in a global hash map.
+
+ //TODO: join this with Rdb_tbl_def ?
+*/
+struct Rdb_table_handler {
+ char *m_table_name;
+ uint m_table_name_length;
+ int m_ref_count;
+ atomic_stat<int> m_lock_wait_timeout_counter;
+ atomic_stat<int> m_deadlock_counter;
+
+ my_core::THR_LOCK m_thr_lock; ///< MySQL latch needed by m_db_lock
+
+ /* Stores cumulative table statistics */
+ my_io_perf_atomic_t m_io_perf_read;
+ my_io_perf_atomic_t m_io_perf_write;
+ Rdb_atomic_perf_counters m_table_perf_context;
+
+ /* Stores cached memtable estimate statistics */
+ std::atomic_uint m_mtcache_lock;
+ uint64_t m_mtcache_count;
+ uint64_t m_mtcache_size;
+ uint64_t m_mtcache_last_update;
+};
+
+} // namespace myrocks
+
+/* Provide hash function for GL_INDEX_ID so we can include it in sets */
+namespace std {
+template <>
+struct hash<myrocks::GL_INDEX_ID> {
+ std::size_t operator()(const myrocks::GL_INDEX_ID &gl_index_id) const {
+ const uint64_t val =
+ ((uint64_t)gl_index_id.cf_id << 32 | (uint64_t)gl_index_id.index_id);
+ return std::hash<uint64_t>()(val);
+ }
+};
+} // namespace std
+
+namespace myrocks {
+
+/**
+ @brief
+ Class definition for ROCKSDB storage engine plugin handler
+*/
+
+class ha_rocksdb : public my_core::handler {
+ my_core::THR_LOCK_DATA m_db_lock; ///< MySQL database lock
+
+ Rdb_table_handler *m_table_handler; ///< Open table handler
+
+ /* Iterator used for range scans and for full table/index scans */
+ rocksdb::Iterator *m_scan_it;
+
+ /* Same as handler::end_key but for start. Reverse-ordered scans need it */
+ key_range m_save_start_range;
+ const key_range *m_start_range;
+
+ /* Whether m_scan_it was created with skip_bloom=true */
+ bool m_scan_it_skips_bloom;
+
+ const rocksdb::Snapshot *m_scan_it_snapshot;
+
+ /* Buffers used for upper/lower bounds for m_scan_it. */
+ uchar *m_scan_it_lower_bound;
+ uchar *m_scan_it_upper_bound;
+ rocksdb::Slice m_scan_it_lower_bound_slice;
+ rocksdb::Slice m_scan_it_upper_bound_slice;
+
+ Rdb_tbl_def *m_tbl_def;
+
+ /* Primary Key encoder from KeyTupleFormat to StorageFormat */
+ std::shared_ptr<Rdb_key_def> m_pk_descr;
+
+ /* Array of index descriptors */
+ std::shared_ptr<Rdb_key_def> *m_key_descr_arr;
+
+ bool check_keyread_allowed(uint inx, uint part, bool all_parts) const;
+
+ /*
+ Number of key parts in PK. This is the same as
+ table->key_info[table->s->primary_key].keyparts
+ */
+ uint m_pk_key_parts;
+
+ /*
+ TRUE <=> Primary Key columns can be decoded from the index
+ */
+ mutable bool m_pk_can_be_decoded;
+
+ uchar *m_pk_tuple; /* Buffer for storing PK in KeyTupleFormat */
+ uchar *m_pk_packed_tuple; /* Buffer for storing PK in StorageFormat */
+ // ^^ todo: change it to 'char*'? TODO: ^ can we join this with last_rowkey?
+
+ /*
+ Temporary buffers for storing the key part of the Key/Value pair
+ for secondary indexes.
+ */
+ uchar *m_sk_packed_tuple;
+
+ /*
+ Temporary buffers for storing end key part of the Key/Value pair.
+ This is used for range scan only.
+ */
+ uchar *m_end_key_packed_tuple;
+
+ Rdb_string_writer m_sk_tails;
+ Rdb_string_writer m_pk_unpack_info;
+
+ /*
+ ha_rockdb->index_read_map(.. HA_READ_KEY_EXACT or similar) will save here
+ mem-comparable form of the index lookup tuple.
+ */
+ uchar *m_sk_match_prefix;
+ uint m_sk_match_length;
+
+ /* Buffer space for the above */
+ uchar *m_sk_match_prefix_buf;
+
+ /* Second buffers, used by UPDATE. */
+ uchar *m_sk_packed_tuple_old;
+ Rdb_string_writer m_sk_tails_old;
+
+ /* Buffers used for duplicate checking during unique_index_creation */
+ uchar *m_dup_sk_packed_tuple;
+ uchar *m_dup_sk_packed_tuple_old;
+
+ /*
+ Temporary space for packing VARCHARs (we provide it to
+ pack_record()/pack_index_tuple() calls).
+ */
+ uchar *m_pack_buffer;
+
+ /* class to convert between Mysql format and RocksDB format*/
+ std::shared_ptr<Rdb_converter> m_converter;
+
+ /*
+ Pointer to the original TTL timestamp value (8 bytes) during UPDATE.
+ */
+ char *m_ttl_bytes;
+ /*
+ The TTL timestamp value can change if the explicit TTL column is
+ updated. If we detect this when updating the PK, we indicate it here so
+ we know we must always update any SK's.
+ */
+ bool m_ttl_bytes_updated;
+
+ /* rowkey of the last record we've read, in StorageFormat. */
+ String m_last_rowkey;
+
+ /*
+ Last retrieved record, in table->record[0] data format.
+
+ This is used only when we get the record with rocksdb's Get() call (The
+ other option is when we get a rocksdb::Slice from an iterator)
+ */
+ rocksdb::PinnableSlice m_retrieved_record;
+
+ /* Type of locking to apply to rows */
+ enum { RDB_LOCK_NONE, RDB_LOCK_READ, RDB_LOCK_WRITE } m_lock_rows;
+
+ /* TRUE means we're doing an index-only read. FALSE means otherwise. */
+ bool m_keyread_only;
+
+ bool m_skip_scan_it_next_call;
+
+ /* TRUE means we are accessing the first row after a snapshot was created */
+ bool m_rnd_scan_is_new_snapshot;
+
+ /*
+ TRUE means we should skip unique key checks for this table if the
+ replication lag gets too large
+ */
+ bool m_skip_unique_check;
+
+ /*
+ TRUE means INSERT ON DUPLICATE KEY UPDATE. In such case we can optimize by
+ remember the failed attempt (if there is one that violates uniqueness check)
+ in write_row and in the following index_read to skip the lock check and read
+ entirely
+ */
+ bool m_insert_with_update;
+
+ /* TRUE if last time the insertion failed due to duplicated PK */
+ bool m_dup_pk_found;
+
+#ifndef DBUG_OFF
+ /* Last retreived record for sanity checking */
+ String m_dup_pk_retrieved_record;
+#endif
+
+ /**
+ @brief
+ This is a bitmap of indexes (i.e. a set) whose keys (in future, values) may
+ be changed by this statement. Indexes that are not in the bitmap do not need
+ to be updated.
+ @note Valid inside UPDATE statements, IIF(m_update_scope_is_valid == true).
+ */
+ my_core::key_map m_update_scope;
+ bool m_update_scope_is_valid;
+
+ /* SST information used for bulk loading the primary key */
+ std::shared_ptr<Rdb_sst_info> m_sst_info;
+
+ /*
+ MySQL index number for duplicate key error
+ */
+ uint m_dupp_errkey;
+
+ int create_key_defs(const TABLE *const table_arg,
+ Rdb_tbl_def *const tbl_def_arg,
+ const TABLE *const old_table_arg = nullptr,
+ const Rdb_tbl_def *const old_tbl_def_arg = nullptr) const
+ MY_ATTRIBUTE((__nonnull__(2, 3), __warn_unused_result__));
+ int secondary_index_read(const int keyno, uchar *const buf)
+ MY_ATTRIBUTE((__nonnull__, __warn_unused_result__));
+ void setup_iterator_for_rnd_scan();
+ bool is_ascending(const Rdb_key_def &keydef,
+ enum ha_rkey_function find_flag) const
+ MY_ATTRIBUTE((__nonnull__, __warn_unused_result__));
+ void setup_iterator_bounds(const Rdb_key_def &kd,
+ const rocksdb::Slice &eq_cond, size_t bound_len,
+ uchar *const lower_bound, uchar *const upper_bound,
+ rocksdb::Slice *lower_bound_slice,
+ rocksdb::Slice *upper_bound_slice);
+ bool can_use_bloom_filter(THD *thd, const Rdb_key_def &kd,
+ const rocksdb::Slice &eq_cond,
+ const bool use_all_keys);
+ bool check_bloom_and_set_bounds(THD *thd, const Rdb_key_def &kd,
+ const rocksdb::Slice &eq_cond,
+ const bool use_all_keys, size_t bound_len,
+ uchar *const lower_bound,
+ uchar *const upper_bound,
+ rocksdb::Slice *lower_bound_slice,
+ rocksdb::Slice *upper_bound_slice);
+ void setup_scan_iterator(const Rdb_key_def &kd, rocksdb::Slice *slice,
+ const bool use_all_keys, const uint eq_cond_len)
+ MY_ATTRIBUTE((__nonnull__));
+ void release_scan_iterator(void);
+
+ rocksdb::Status get_for_update(
+ Rdb_transaction *const tx,
+ rocksdb::ColumnFamilyHandle *const column_family,
+ const rocksdb::Slice &key, rocksdb::PinnableSlice *value) const;
+
+ int get_row_by_rowid(uchar *const buf, const char *const rowid,
+ const uint rowid_size, const bool skip_lookup = false,
+ const bool skip_ttl_check = true)
+ MY_ATTRIBUTE((__nonnull__, __warn_unused_result__));
+ int get_row_by_rowid(uchar *const buf, const uchar *const rowid,
+ const uint rowid_size, const bool skip_lookup = false,
+ const bool skip_ttl_check = true)
+ MY_ATTRIBUTE((__nonnull__, __warn_unused_result__)) {
+ return get_row_by_rowid(buf, reinterpret_cast<const char *>(rowid),
+ rowid_size, skip_lookup, skip_ttl_check);
+ }
+
+ void load_auto_incr_value();
+ ulonglong load_auto_incr_value_from_index();
+ void update_auto_incr_val(ulonglong val);
+ void update_auto_incr_val_from_field();
+ rocksdb::Status get_datadic_auto_incr(Rdb_transaction *const tx,
+ const GL_INDEX_ID &gl_index_id,
+ ulonglong *new_val) const;
+ longlong update_hidden_pk_val();
+ int load_hidden_pk_value() MY_ATTRIBUTE((__warn_unused_result__));
+ int read_hidden_pk_id_from_rowkey(longlong *const hidden_pk_id)
+ MY_ATTRIBUTE((__nonnull__, __warn_unused_result__));
+ bool can_use_single_delete(const uint index) const
+ MY_ATTRIBUTE((__warn_unused_result__));
+ bool is_blind_delete_enabled();
+ bool skip_unique_check() const MY_ATTRIBUTE((__warn_unused_result__));
+#ifdef MARIAROCKS_NOT_YET // MDEV-10975
+ void set_force_skip_unique_check(bool skip) override;
+#endif
+ bool commit_in_the_middle() MY_ATTRIBUTE((__warn_unused_result__));
+ bool do_bulk_commit(Rdb_transaction *const tx)
+ MY_ATTRIBUTE((__nonnull__, __warn_unused_result__));
+ bool has_hidden_pk(const TABLE *const table) const
+ MY_ATTRIBUTE((__nonnull__, __warn_unused_result__));
+
+ void update_row_stats(const operation_type &type);
+
+ void set_last_rowkey(const uchar *const old_data);
+
+ /*
+ For the active index, indicates which columns must be covered for the
+ current lookup to be covered. If the bitmap field is null, that means this
+ index does not cover the current lookup for any record.
+ */
+ MY_BITMAP m_lookup_bitmap = {nullptr, nullptr, nullptr, 0, 0};
+
+ int alloc_key_buffers(const TABLE *const table_arg,
+ const Rdb_tbl_def *const tbl_def_arg,
+ bool alloc_alter_buffers = false)
+ MY_ATTRIBUTE((__nonnull__, __warn_unused_result__));
+ void free_key_buffers();
+
+ // the buffer size should be at least 2*Rdb_key_def::INDEX_NUMBER_SIZE
+ rocksdb::Range get_range(const int i, uchar buf[]) const;
+
+ /*
+ Perf timers for data reads
+ */
+ Rdb_io_perf m_io_perf;
+
+ /*
+ Update stats
+ */
+ void update_stats(void);
+
+ public:
+ /*
+ The following two are currently only used for getting the range bounds
+ from QUICK_SELECT_DESC.
+ We don't need to implement prepare_index_key_scan[_map] because it is
+ only used with HA_READ_KEY_EXACT and HA_READ_PREFIX_LAST where one
+ can infer the bounds of the range being scanned, anyway.
+ */
+ int prepare_index_scan() override;
+ int prepare_range_scan(const key_range *start_key,
+ const key_range *end_key) override;
+
+ /*
+ Controls whether writes include checksums. This is updated from the session
+ variable
+ at the start of each query.
+ */
+ bool m_store_row_debug_checksums;
+
+ int m_checksums_pct;
+
+ ha_rocksdb(my_core::handlerton *const hton,
+ my_core::TABLE_SHARE *const table_arg);
+ virtual ~ha_rocksdb() override {
+ int err MY_ATTRIBUTE((__unused__));
+ err = finalize_bulk_load(false);
+ if (err != 0) {
+ // NO_LINT_DEBUG
+ sql_print_error(
+ "RocksDB: Error %d finalizing bulk load while closing "
+ "handler.",
+ err);
+ }
+ }
+
+ /** @brief
+ The name that will be used for display purposes.
+ */
+ const char *table_type() const /*override*/ {
+ DBUG_ENTER_FUNC();
+ // MariaDB: this function is not virtual, however ha_innodb
+ // declares it (and then never uses!) psergey-merge-todo:.
+ DBUG_RETURN(rocksdb_hton_name);
+ }
+
+ /* The following is only used by SHOW KEYS: */
+ const char *index_type(uint inx) override {
+ DBUG_ENTER_FUNC();
+
+ DBUG_RETURN("LSMTREE");
+ }
+
+ /*
+ Not present in MariaDB:
+ const char **bas_ext() const override;
+ */
+
+ /*
+ Returns the name of the table's base name
+ */
+ const std::string &get_table_basename() const;
+
+ /** @brief
+ This is a list of flags that indicate what functionality the storage engine
+ implements. The current table flags are documented in handler.h
+ */
+ ulonglong table_flags() const override ;
+private:
+ bool init_with_fields(); /* no 'override' in MariaDB */
+public:
+ /** @brief
+ This is a bitmap of flags that indicates how the storage engine
+ implements indexes. The current index flags are documented in
+ handler.h. If you do not implement indexes, just return zero here.
+
+ @details
+ part is the key part to check. First key part is 0.
+ If all_parts is set, MySQL wants to know the flags for the combined
+ index, up to and including 'part'.
+ */
+ ulong index_flags(uint inx, uint part, bool all_parts) const override;
+
+ const key_map *keys_to_use_for_scanning() override {
+ DBUG_ENTER_FUNC();
+
+ DBUG_RETURN(&key_map_full);
+ }
+
+ bool primary_key_is_clustered() override {
+ DBUG_ENTER_FUNC();
+
+ DBUG_RETURN(true);
+ }
+
+ bool should_store_row_debug_checksums() const {
+ return m_store_row_debug_checksums && (rand() % 100 < m_checksums_pct);
+ }
+
+ int rename_table(const char *const from, const char *const to) override
+ MY_ATTRIBUTE((__nonnull__, __warn_unused_result__));
+
+ int convert_record_from_storage_format(const rocksdb::Slice *const key,
+ const rocksdb::Slice *const value,
+ uchar *const buf)
+ MY_ATTRIBUTE((__nonnull__, __warn_unused_result__));
+
+ int convert_record_from_storage_format(const rocksdb::Slice *const key,
+ uchar *const buf)
+ MY_ATTRIBUTE((__nonnull__, __warn_unused_result__));
+
+ static const std::vector<std::string> parse_into_tokens(const std::string &s,
+ const char delim);
+
+ static const std::string generate_cf_name(
+ const uint index, const TABLE *const table_arg,
+ const Rdb_tbl_def *const tbl_def_arg, bool *per_part_match_found);
+
+ static const char *get_key_name(const uint index,
+ const TABLE *const table_arg,
+ const Rdb_tbl_def *const tbl_def_arg)
+ MY_ATTRIBUTE((__nonnull__, __warn_unused_result__));
+
+ static const char *get_key_comment(const uint index,
+ const TABLE *const table_arg,
+ const Rdb_tbl_def *const tbl_def_arg)
+ MY_ATTRIBUTE((__nonnull__, __warn_unused_result__));
+
+ static const std::string get_table_comment(const TABLE *const table_arg)
+ MY_ATTRIBUTE((__nonnull__, __warn_unused_result__));
+
+ static bool is_hidden_pk(const uint index, const TABLE *const table_arg,
+ const Rdb_tbl_def *const tbl_def_arg)
+ MY_ATTRIBUTE((__nonnull__, __warn_unused_result__));
+
+ static uint pk_index(const TABLE *const table_arg,
+ const Rdb_tbl_def *const tbl_def_arg)
+ MY_ATTRIBUTE((__nonnull__, __warn_unused_result__));
+
+ static bool is_pk(const uint index, const TABLE *table_arg,
+ const Rdb_tbl_def *tbl_def_arg)
+ MY_ATTRIBUTE((__nonnull__, __warn_unused_result__));
+ /** @brief
+ unireg.cc will call max_supported_record_length(), max_supported_keys(),
+ max_supported_key_parts(), uint max_supported_key_length()
+ to make sure that the storage engine can handle the data it is about to
+ send. Return *real* limits of your storage engine here; MySQL will do
+ min(your_limits, MySQL_limits) automatically.
+ */
+ uint max_supported_record_length() const override {
+ DBUG_ENTER_FUNC();
+
+ DBUG_RETURN(HA_MAX_REC_LENGTH);
+ }
+
+ uint max_supported_keys() const override {
+ DBUG_ENTER_FUNC();
+
+ DBUG_RETURN(MAX_INDEXES);
+ }
+
+ uint max_supported_key_parts() const override {
+ DBUG_ENTER_FUNC();
+
+ DBUG_RETURN(MAX_REF_PARTS);
+ }
+
+ uint max_supported_key_part_length() const override;
+
+ /** @brief
+ unireg.cc will call this to make sure that the storage engine can handle
+ the data it is about to send. Return *real* limits of your storage engine
+ here; MySQL will do min(your_limits, MySQL_limits) automatically.
+
+ @details
+ There is no need to implement ..._key_... methods if your engine doesn't
+ support indexes.
+ */
+ uint max_supported_key_length() const override {
+ DBUG_ENTER_FUNC();
+
+ DBUG_RETURN(16 * 1024); /* just to return something*/
+ }
+
+ /**
+ TODO: return actual upper bound of number of records in the table.
+ (e.g. save number of records seen on full table scan and/or use file size
+ as upper bound)
+ */
+ ha_rows estimate_rows_upper_bound() override {
+ DBUG_ENTER_FUNC();
+
+ DBUG_RETURN(HA_POS_ERROR);
+ }
+
+ /* At the moment, we're ok with default handler::index_init() implementation.
+ */
+ int index_read_map(uchar *const buf, const uchar *const key,
+ key_part_map keypart_map,
+ enum ha_rkey_function find_flag) override
+ MY_ATTRIBUTE((__warn_unused_result__));
+
+ int index_read_map_impl(uchar *const buf, const uchar *const key,
+ key_part_map keypart_map,
+ enum ha_rkey_function find_flag,
+ const key_range *end_key)
+ MY_ATTRIBUTE((__warn_unused_result__));
+
+ bool is_using_full_key(key_part_map keypart_map, uint actual_key_parts);
+ int read_range_first(const key_range *const start_key,
+ const key_range *const end_key, bool eq_range,
+ bool sorted) override
+ MY_ATTRIBUTE((__warn_unused_result__));
+
+ virtual double scan_time() override {
+ DBUG_ENTER_FUNC();
+
+ DBUG_RETURN(
+ static_cast<double>((stats.records + stats.deleted) / 20.0 + 10));
+ }
+
+ virtual double read_time(uint, uint, ha_rows rows) override;
+ virtual void print_error(int error, myf errflag) override;
+
+ int open(const char *const name, int mode, uint test_if_locked) override
+ MY_ATTRIBUTE((__warn_unused_result__));
+ int close(void) override MY_ATTRIBUTE((__warn_unused_result__));
+
+ int write_row(uchar *const buf) override
+ MY_ATTRIBUTE((__warn_unused_result__));
+ int update_row(const uchar *const old_data, uchar *const new_data) override
+ MY_ATTRIBUTE((__warn_unused_result__));
+ int delete_row(const uchar *const buf) override
+ MY_ATTRIBUTE((__warn_unused_result__));
+ rocksdb::Status delete_or_singledelete(uint index, Rdb_transaction *const tx,
+ rocksdb::ColumnFamilyHandle *const cf,
+ const rocksdb::Slice &key)
+ MY_ATTRIBUTE((__warn_unused_result__));
+
+ int index_next(uchar *const buf) override
+ MY_ATTRIBUTE((__warn_unused_result__));
+ int index_next_with_direction(uchar *const buf, bool move_forward)
+ MY_ATTRIBUTE((__warn_unused_result__));
+ int index_prev(uchar *const buf) override
+ MY_ATTRIBUTE((__warn_unused_result__));
+
+ int index_first(uchar *const buf) override
+ MY_ATTRIBUTE((__warn_unused_result__));
+ int index_last(uchar *const buf) override
+ MY_ATTRIBUTE((__warn_unused_result__));
+
+ class Item *idx_cond_push(uint keyno, class Item *const idx_cond) override;
+ /*
+ Default implementation from cancel_pushed_idx_cond() suits us
+ */
+ private:
+ struct key_def_cf_info {
+ rocksdb::ColumnFamilyHandle *cf_handle;
+ bool is_reverse_cf;
+ bool is_per_partition_cf;
+ };
+
+ struct update_row_info {
+ Rdb_transaction *tx;
+ const uchar *new_data;
+ const uchar *old_data;
+ rocksdb::Slice new_pk_slice;
+ rocksdb::Slice old_pk_slice;
+ rocksdb::Slice old_pk_rec;
+
+ // "unpack_info" data for the new PK value
+ Rdb_string_writer *new_pk_unpack_info;
+
+ longlong hidden_pk_id;
+ bool skip_unique_check;
+ };
+
+ /*
+ Used to check for duplicate entries during fast unique secondary index
+ creation.
+ */
+ struct unique_sk_buf_info {
+ bool sk_buf_switch = false;
+ rocksdb::Slice sk_memcmp_key;
+ rocksdb::Slice sk_memcmp_key_old;
+ uchar *dup_sk_buf;
+ uchar *dup_sk_buf_old;
+
+ /*
+ This method is meant to be called back to back during inplace creation
+ of unique indexes. It will switch between two buffers, which
+ will each store the memcmp form of secondary keys, which are then
+ converted to slices in sk_memcmp_key or sk_memcmp_key_old.
+
+ Switching buffers on each iteration allows us to retain the
+ sk_memcmp_key_old value for duplicate comparison.
+ */
+ inline uchar *swap_and_get_sk_buf() {
+ sk_buf_switch = !sk_buf_switch;
+ return sk_buf_switch ? dup_sk_buf : dup_sk_buf_old;
+ }
+ };
+
+ int create_cfs(const TABLE *const table_arg, Rdb_tbl_def *const tbl_def_arg,
+ std::array<struct key_def_cf_info, MAX_INDEXES + 1> *const cfs)
+ const MY_ATTRIBUTE((__nonnull__, __warn_unused_result__));
+
+ int create_key_def(const TABLE *const table_arg, const uint i,
+ const Rdb_tbl_def *const tbl_def_arg,
+ std::shared_ptr<Rdb_key_def> *const new_key_def,
+ const struct key_def_cf_info &cf_info, uint64 ttl_duration,
+ const std::string &ttl_column) const
+ MY_ATTRIBUTE((__nonnull__, __warn_unused_result__));
+
+ int create_inplace_key_defs(
+ const TABLE *const table_arg, Rdb_tbl_def *vtbl_def_arg,
+ const TABLE *const old_table_arg,
+ const Rdb_tbl_def *const old_tbl_def_arg,
+ const std::array<key_def_cf_info, MAX_INDEXES + 1> &cf,
+ uint64 ttl_duration, const std::string &ttl_column) const
+ MY_ATTRIBUTE((__nonnull__, __warn_unused_result__));
+
+ std::unordered_map<std::string, uint> get_old_key_positions(
+ const TABLE *table_arg, const Rdb_tbl_def *tbl_def_arg,
+ const TABLE *old_table_arg, const Rdb_tbl_def *old_tbl_def_arg) const
+ MY_ATTRIBUTE((__nonnull__));
+
+ int compare_key_parts(const KEY *const old_key,
+ const KEY *const new_key) const
+ MY_ATTRIBUTE((__nonnull__, __warn_unused_result__));
+
+ int compare_keys(const KEY *const old_key, const KEY *const new_key) const
+ MY_ATTRIBUTE((__nonnull__, __warn_unused_result__));
+
+ bool should_hide_ttl_rec(const Rdb_key_def &kd,
+ const rocksdb::Slice &ttl_rec_val,
+ const int64_t curr_ts)
+ MY_ATTRIBUTE((__warn_unused_result__));
+ int rocksdb_skip_expired_records(const Rdb_key_def &kd,
+ rocksdb::Iterator *const iter,
+ bool seek_backward);
+
+ int index_first_intern(uchar *buf)
+ MY_ATTRIBUTE((__nonnull__, __warn_unused_result__));
+ int index_last_intern(uchar *buf)
+ MY_ATTRIBUTE((__nonnull__, __warn_unused_result__));
+
+ int find_icp_matching_index_rec(const bool move_forward, uchar *const buf)
+ MY_ATTRIBUTE((__nonnull__, __warn_unused_result__));
+
+ void calc_updated_indexes();
+ int update_write_row(const uchar *const old_data, const uchar *const new_data,
+ const bool skip_unique_check)
+ MY_ATTRIBUTE((__warn_unused_result__));
+ int get_pk_for_update(struct update_row_info *const row_info);
+ int check_and_lock_unique_pk(const uint key_id,
+ const struct update_row_info &row_info,
+ bool *const found)
+ MY_ATTRIBUTE((__warn_unused_result__));
+ int check_and_lock_sk(const uint key_id,
+ const struct update_row_info &row_info,
+ bool *const found)
+ MY_ATTRIBUTE((__warn_unused_result__));
+ int check_uniqueness_and_lock(const struct update_row_info &row_info,
+ bool pk_changed)
+ MY_ATTRIBUTE((__warn_unused_result__));
+ bool over_bulk_load_threshold(int *err)
+ MY_ATTRIBUTE((__warn_unused_result__));
+ int check_duplicate_sk(const TABLE *table_arg, const Rdb_key_def &key_def,
+ const rocksdb::Slice *key,
+ struct unique_sk_buf_info *sk_info)
+ MY_ATTRIBUTE((__nonnull__, __warn_unused_result__));
+ int bulk_load_key(Rdb_transaction *const tx, const Rdb_key_def &kd,
+ const rocksdb::Slice &key, const rocksdb::Slice &value,
+ bool sort)
+ MY_ATTRIBUTE((__nonnull__, __warn_unused_result__));
+ void update_bytes_written(ulonglong bytes_written);
+ int update_write_pk(const Rdb_key_def &kd,
+ const struct update_row_info &row_info,
+ const bool pk_changed)
+ MY_ATTRIBUTE((__warn_unused_result__));
+ int update_write_sk(const TABLE *const table_arg, const Rdb_key_def &kd,
+ const struct update_row_info &row_info,
+ const bool bulk_load_sk)
+ MY_ATTRIBUTE((__warn_unused_result__));
+ int update_write_indexes(const struct update_row_info &row_info,
+ const bool pk_changed)
+ MY_ATTRIBUTE((__warn_unused_result__));
+
+ int read_key_exact(const Rdb_key_def &kd, rocksdb::Iterator *const iter,
+ const bool using_full_key, const rocksdb::Slice &key_slice,
+ const int64_t ttl_filter_ts)
+ MY_ATTRIBUTE((__nonnull__, __warn_unused_result__));
+ int read_before_key(const Rdb_key_def &kd, const bool using_full_key,
+ const rocksdb::Slice &key_slice,
+ const int64_t ttl_filter_ts)
+ MY_ATTRIBUTE((__nonnull__, __warn_unused_result__));
+ int read_after_key(const Rdb_key_def &kd, const rocksdb::Slice &key_slice,
+ const int64_t ttl_filter_ts)
+ MY_ATTRIBUTE((__nonnull__, __warn_unused_result__));
+ int position_to_correct_key(const Rdb_key_def &kd,
+ const enum ha_rkey_function &find_flag,
+ const bool full_key_match, const uchar *const key,
+ const key_part_map &keypart_map,
+ const rocksdb::Slice &key_slice,
+ bool *const move_forward,
+ const int64_t ttl_filter_ts)
+ MY_ATTRIBUTE((__warn_unused_result__));
+
+ int read_row_from_primary_key(uchar *const buf)
+ MY_ATTRIBUTE((__nonnull__, __warn_unused_result__));
+ int read_row_from_secondary_key(uchar *const buf, const Rdb_key_def &kd,
+ bool move_forward)
+ MY_ATTRIBUTE((__nonnull__, __warn_unused_result__));
+
+ int calc_eq_cond_len(const Rdb_key_def &kd,
+ const enum ha_rkey_function &find_flag,
+ const rocksdb::Slice &slice,
+ const int bytes_changed_by_succ,
+ const key_range *const end_key,
+ uint *const end_key_packed_size)
+ MY_ATTRIBUTE((__warn_unused_result__));
+
+ Rdb_tbl_def *get_table_if_exists(const char *const tablename)
+ MY_ATTRIBUTE((__nonnull__, __warn_unused_result__));
+ void read_thd_vars(THD *const thd) MY_ATTRIBUTE((__nonnull__));
+
+ bool contains_foreign_key(THD *const thd)
+ MY_ATTRIBUTE((__nonnull__, __warn_unused_result__));
+
+ int inplace_populate_sk(
+ TABLE *const table_arg,
+ const std::unordered_set<std::shared_ptr<Rdb_key_def>> &indexes)
+ MY_ATTRIBUTE((__nonnull__, __warn_unused_result__));
+
+ int finalize_bulk_load(bool print_client_error = true)
+ MY_ATTRIBUTE((__warn_unused_result__));
+
+ int calculate_stats_for_table() MY_ATTRIBUTE((__warn_unused_result__));
+
+ bool should_skip_invalidated_record(const int rc);
+ bool should_recreate_snapshot(const int rc, const bool is_new_snapshot);
+ bool can_assume_tracked(THD *thd);
+
+ public:
+ int index_init(uint idx, bool sorted) override
+ MY_ATTRIBUTE((__warn_unused_result__));
+ int index_end() override MY_ATTRIBUTE((__warn_unused_result__));
+
+ void unlock_row() override;
+
+ /** @brief
+ Unlike index_init(), rnd_init() can be called two consecutive times
+ without rnd_end() in between (it only makes sense if scan=1). In this
+ case, the second call should prepare for the new table scan (e.g if
+ rnd_init() allocates the cursor, the second call should position the
+ cursor to the start of the table; no need to deallocate and allocate
+ it again. This is a required method.
+ */
+ int rnd_init(bool scan) override MY_ATTRIBUTE((__warn_unused_result__));
+ int rnd_end() override MY_ATTRIBUTE((__warn_unused_result__));
+
+ int rnd_next(uchar *const buf) override
+ MY_ATTRIBUTE((__warn_unused_result__));
+ int rnd_next_with_direction(uchar *const buf, bool move_forward)
+ MY_ATTRIBUTE((__warn_unused_result__));
+
+ int rnd_pos(uchar *const buf, uchar *const pos) override
+ MY_ATTRIBUTE((__warn_unused_result__));
+ void position(const uchar *const record) override;
+ int info(uint) override;
+
+ /* This function will always return success, therefore no annotation related
+ * to checking the return value. Can't change the signature because it's
+ * required by the interface. */
+ int extra(enum ha_extra_function operation) override;
+
+ int start_stmt(THD *const thd, thr_lock_type lock_type) override
+ MY_ATTRIBUTE((__warn_unused_result__));
+ int external_lock(THD *const thd, int lock_type) override
+ MY_ATTRIBUTE((__warn_unused_result__));
+ int truncate() override MY_ATTRIBUTE((__warn_unused_result__));
+
+ int reset() override {
+ DBUG_ENTER_FUNC();
+
+ /* Free blob data */
+ m_retrieved_record.Reset();
+
+ DBUG_RETURN(HA_EXIT_SUCCESS);
+ }
+
+ int check(THD *const thd, HA_CHECK_OPT *const check_opt) override
+ MY_ATTRIBUTE((__warn_unused_result__));
+ int remove_rows(Rdb_tbl_def *const tbl);
+ ha_rows records_in_range(uint inx, key_range *const min_key,
+ key_range *const max_key) override
+ MY_ATTRIBUTE((__warn_unused_result__));
+
+ int delete_table(Rdb_tbl_def *const tbl);
+ int delete_table(const char *const from) override
+ MY_ATTRIBUTE((__warn_unused_result__));
+ int create(const char *const name, TABLE *const form,
+ HA_CREATE_INFO *const create_info) override
+ MY_ATTRIBUTE((__warn_unused_result__));
+ int create_table(const std::string &table_name, const TABLE *table_arg,
+ ulonglong auto_increment_value);
+ bool check_if_incompatible_data(HA_CREATE_INFO *const info,
+ uint table_changes) override
+ MY_ATTRIBUTE((__warn_unused_result__));
+
+ THR_LOCK_DATA **store_lock(THD *const thd, THR_LOCK_DATA **to,
+ enum thr_lock_type lock_type) override
+ MY_ATTRIBUTE((__warn_unused_result__));
+
+ my_bool register_query_cache_table(THD *const thd, char *const table_key,
+ uint key_length,
+ qc_engine_callback *const engine_callback,
+ ulonglong *const engine_data) override {
+ DBUG_ENTER_FUNC();
+
+ /* Currently, we don't support query cache */
+ DBUG_RETURN(FALSE);
+ }
+
+ bool get_error_message(const int error, String *const buf) override
+ MY_ATTRIBUTE((__nonnull__));
+
+ static int rdb_error_to_mysql(const rocksdb::Status &s,
+ const char *msg = nullptr)
+ MY_ATTRIBUTE((__warn_unused_result__));
+
+ void get_auto_increment(ulonglong offset, ulonglong increment,
+ ulonglong nb_desired_values,
+ ulonglong *const first_value,
+ ulonglong *const nb_reserved_values) override;
+ void update_create_info(HA_CREATE_INFO *const create_info) override;
+ int optimize(THD *const thd, HA_CHECK_OPT *const check_opt) override
+ MY_ATTRIBUTE((__warn_unused_result__));
+ int analyze(THD *const thd, HA_CHECK_OPT *const check_opt) override
+ MY_ATTRIBUTE((__warn_unused_result__));
+
+ enum_alter_inplace_result check_if_supported_inplace_alter(
+ TABLE *altered_table,
+ my_core::Alter_inplace_info *const ha_alter_info) override;
+
+ bool prepare_inplace_alter_table(
+ TABLE *const altered_table,
+ my_core::Alter_inplace_info *const ha_alter_info) override;
+
+ bool inplace_alter_table(
+ TABLE *const altered_table,
+ my_core::Alter_inplace_info *const ha_alter_info) override;
+
+ bool commit_inplace_alter_table(
+ TABLE *const altered_table,
+ my_core::Alter_inplace_info *const ha_alter_info, bool commit) override;
+
+ void set_skip_unique_check_tables(const char *const whitelist);
+#ifdef MARIAROCKS_NOT_YET // MDEV-10976
+ bool is_read_free_rpl_table() const;
+#endif
+
+#ifdef MARIAROCKS_NOT_YET // MDEV-10976
+ public:
+ virtual void rpl_before_delete_rows() override;
+ virtual void rpl_after_delete_rows() override;
+ virtual void rpl_before_update_rows() override;
+ virtual void rpl_after_update_rows() override;
+ virtual bool use_read_free_rpl() const override;
+#endif // MARIAROCKS_NOT_YET
+
+ private:
+ /* Flags tracking if we are inside different replication operation */
+ bool m_in_rpl_delete_rows;
+ bool m_in_rpl_update_rows;
+
+ bool m_force_skip_unique_check;
+};
+
+/*
+ Helper class for in-place alter, for storing handler context between inplace
+ alter calls
+*/
+struct Rdb_inplace_alter_ctx : public my_core::inplace_alter_handler_ctx {
+ /* The new table definition */
+ Rdb_tbl_def *const m_new_tdef;
+
+ /* Stores the original key definitions */
+ std::shared_ptr<Rdb_key_def> *const m_old_key_descr;
+
+ /* Stores the new key definitions */
+ std::shared_ptr<Rdb_key_def> *m_new_key_descr;
+
+ /* Stores the old number of key definitions */
+ const uint m_old_n_keys;
+
+ /* Stores the new number of key definitions */
+ const uint m_new_n_keys;
+
+ /* Stores the added key glids */
+ const std::unordered_set<std::shared_ptr<Rdb_key_def>> m_added_indexes;
+
+ /* Stores the dropped key glids */
+ const std::unordered_set<GL_INDEX_ID> m_dropped_index_ids;
+
+ /* Stores number of keys to add */
+ const uint m_n_added_keys;
+
+ /* Stores number of keys to drop */
+ const uint m_n_dropped_keys;
+
+ /* Stores the largest current auto increment value in the index */
+ const ulonglong m_max_auto_incr;
+
+ Rdb_inplace_alter_ctx(
+ Rdb_tbl_def *new_tdef, std::shared_ptr<Rdb_key_def> *old_key_descr,
+ std::shared_ptr<Rdb_key_def> *new_key_descr, uint old_n_keys,
+ uint new_n_keys,
+ std::unordered_set<std::shared_ptr<Rdb_key_def>> added_indexes,
+ std::unordered_set<GL_INDEX_ID> dropped_index_ids, uint n_added_keys,
+ uint n_dropped_keys, ulonglong max_auto_incr)
+ : my_core::inplace_alter_handler_ctx(),
+ m_new_tdef(new_tdef),
+ m_old_key_descr(old_key_descr),
+ m_new_key_descr(new_key_descr),
+ m_old_n_keys(old_n_keys),
+ m_new_n_keys(new_n_keys),
+ m_added_indexes(added_indexes),
+ m_dropped_index_ids(dropped_index_ids),
+ m_n_added_keys(n_added_keys),
+ m_n_dropped_keys(n_dropped_keys),
+ m_max_auto_incr(max_auto_incr) {}
+
+ ~Rdb_inplace_alter_ctx() {}
+
+ private:
+ /* Disable Copying */
+ Rdb_inplace_alter_ctx(const Rdb_inplace_alter_ctx &);
+ Rdb_inplace_alter_ctx &operator=(const Rdb_inplace_alter_ctx &);
+};
+
+// file name indicating RocksDB data corruption
+std::string rdb_corruption_marker_file_name();
+
+const int MYROCKS_MARIADB_PLUGIN_MATURITY_LEVEL= MariaDB_PLUGIN_MATURITY_STABLE;
+
+extern bool prevent_myrocks_loading;
+
+void sql_print_verbose_info(const char *format, ...);
+
+} // namespace myrocks
+
diff --git a/storage/rocksdb/ha_rocksdb_proto.h b/storage/rocksdb/ha_rocksdb_proto.h
new file mode 100644
index 00000000000..03d24957a23
--- /dev/null
+++ b/storage/rocksdb/ha_rocksdb_proto.h
@@ -0,0 +1,103 @@
+/*
+ Copyright (c) 2012,2013 Monty Program Ab
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111-1301 USA */
+#pragma once
+
+/* C++ standard header files */
+#include <string>
+#include <vector>
+
+/* MySQL header files */
+#include "./sql_string.h"
+
+/* RocksDB includes */
+#include "rocksdb/table.h"
+#include "rocksdb/utilities/transaction_db.h"
+
+namespace myrocks {
+
+enum RDB_IO_ERROR_TYPE {
+ RDB_IO_ERROR_TX_COMMIT,
+ RDB_IO_ERROR_DICT_COMMIT,
+ RDB_IO_ERROR_BG_THREAD,
+ RDB_IO_ERROR_GENERAL,
+ RDB_IO_ERROR_LAST
+};
+
+const char *get_rdb_io_error_string(const RDB_IO_ERROR_TYPE err_type);
+
+void rdb_handle_io_error(const rocksdb::Status status,
+ const RDB_IO_ERROR_TYPE err_type)
+#if defined(__clang__)
+ MY_ATTRIBUTE((optnone));
+#else
+ MY_ATTRIBUTE((noinline,noclone));
+#endif
+
+int rdb_normalize_tablename(const std::string &tablename, std::string *str)
+ MY_ATTRIBUTE((__nonnull__, __warn_unused_result__));
+
+int rdb_split_normalized_tablename(const std::string &fullname, std::string *db,
+ std::string *table = nullptr,
+ std::string *partition = nullptr)
+ MY_ATTRIBUTE((__warn_unused_result__));
+
+std::vector<std::string> rdb_get_open_table_names(void);
+
+class Rdb_perf_counters;
+int rdb_get_table_perf_counters(const char *tablename,
+ Rdb_perf_counters *counters)
+ MY_ATTRIBUTE((__nonnull__(2)));
+
+void rdb_get_global_perf_counters(Rdb_perf_counters *counters)
+ MY_ATTRIBUTE((__nonnull__(1)));
+
+void rdb_queue_save_stats_request();
+
+/*
+ Access to singleton objects.
+*/
+
+rocksdb::TransactionDB *rdb_get_rocksdb_db();
+
+class Rdb_cf_manager;
+Rdb_cf_manager &rdb_get_cf_manager();
+
+const rocksdb::BlockBasedTableOptions &rdb_get_table_options();
+bool rdb_is_ttl_enabled();
+bool rdb_is_ttl_read_filtering_enabled();
+#ifndef DBUG_OFF
+int rdb_dbug_set_ttl_rec_ts();
+int rdb_dbug_set_ttl_snapshot_ts();
+int rdb_dbug_set_ttl_read_filter_ts();
+bool rdb_dbug_set_ttl_ignore_pk();
+#endif
+
+enum operation_type : int;
+void rdb_update_global_stats(const operation_type &type, uint count,
+ bool is_system_table = false);
+
+class Rdb_dict_manager;
+Rdb_dict_manager *rdb_get_dict_manager(void)
+ MY_ATTRIBUTE((__warn_unused_result__));
+
+class Rdb_ddl_manager;
+Rdb_ddl_manager *rdb_get_ddl_manager(void)
+ MY_ATTRIBUTE((__warn_unused_result__));
+
+class Rdb_binlog_manager;
+Rdb_binlog_manager *rdb_get_binlog_manager(void)
+ MY_ATTRIBUTE((__warn_unused_result__));
+} // namespace myrocks
diff --git a/storage/rocksdb/logger.h b/storage/rocksdb/logger.h
new file mode 100644
index 00000000000..8902bc18893
--- /dev/null
+++ b/storage/rocksdb/logger.h
@@ -0,0 +1,85 @@
+/*
+ Copyright (c) 2015, Facebook, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111-1301 USA */
+#pragma once
+
+#include <log.h>
+#include <sstream>
+#include <string>
+
+namespace myrocks {
+
+class Rdb_logger : public rocksdb::Logger {
+ public:
+ explicit Rdb_logger(const rocksdb::InfoLogLevel log_level =
+ rocksdb::InfoLogLevel::ERROR_LEVEL)
+ : m_mysql_log_level(log_level) {}
+
+ void Logv(const rocksdb::InfoLogLevel log_level, const char *format,
+ va_list ap) override {
+ DBUG_ASSERT(format != nullptr);
+
+ enum loglevel mysql_log_level;
+
+ if (m_logger) {
+ m_logger->Logv(log_level, format, ap);
+ }
+
+ if (log_level < m_mysql_log_level) {
+ return;
+ }
+
+ if (log_level >= rocksdb::InfoLogLevel::ERROR_LEVEL) {
+ mysql_log_level = ERROR_LEVEL;
+ } else if (log_level >= rocksdb::InfoLogLevel::WARN_LEVEL) {
+ mysql_log_level = WARNING_LEVEL;
+ } else {
+ mysql_log_level = INFORMATION_LEVEL;
+ }
+
+ // log to MySQL
+ std::string f("LibRocksDB:");
+ f.append(format);
+ error_log_print(mysql_log_level, f.c_str(), ap);
+ }
+
+ void Logv(const char *format, va_list ap) override {
+ DBUG_ASSERT(format != nullptr);
+ // If no level is specified, it is by default at information level
+ Logv(rocksdb::InfoLogLevel::INFO_LEVEL, format, ap);
+ }
+
+ void SetRocksDBLogger(const std::shared_ptr<rocksdb::Logger> logger) {
+ m_logger = logger;
+ }
+
+ void SetInfoLogLevel(const rocksdb::InfoLogLevel log_level) override {
+ // The InfoLogLevel for the logger is used by rocksdb to filter
+ // messages, so it needs to be the lower of the two loggers
+ rocksdb::InfoLogLevel base_level = log_level;
+
+ if (m_logger && m_logger->GetInfoLogLevel() < base_level) {
+ base_level = m_logger->GetInfoLogLevel();
+ }
+ rocksdb::Logger::SetInfoLogLevel(base_level);
+ m_mysql_log_level = log_level;
+ }
+
+ private:
+ std::shared_ptr<rocksdb::Logger> m_logger;
+ rocksdb::InfoLogLevel m_mysql_log_level;
+};
+
+} // namespace myrocks
diff --git a/storage/rocksdb/myrocks_hotbackup.py b/storage/rocksdb/myrocks_hotbackup.py
new file mode 100755
index 00000000000..fcb3e7088da
--- /dev/null
+++ b/storage/rocksdb/myrocks_hotbackup.py
@@ -0,0 +1,698 @@
+#!@PYTHON_SHEBANG@
+
+from __future__ import division
+from optparse import OptionParser
+import collections
+import signal
+import os
+import stat
+import sys
+import re
+import commands
+import subprocess
+import logging
+import logging.handlers
+import time
+import datetime
+import shutil
+import traceback
+import tempfile
+
+import MySQLdb
+import MySQLdb.connections
+from MySQLdb import OperationalError, ProgrammingError
+
+logger = None
+opts = None
+rocksdb_files = ['MANIFEST', 'CURRENT', 'OPTIONS']
+rocksdb_data_suffix = '.sst'
+rocksdb_wal_suffix = '.log'
+exclude_files = ['master.info', 'relay-log.info', 'worker-relay-log.info',
+ 'auto.cnf', 'gaplock.log', 'ibdata', 'ib_logfile', '.trash']
+wdt_bin = 'wdt'
+
+def is_manifest(fname):
+ for m in rocksdb_files:
+ if fname.startswith(m):
+ return True
+ return False
+
+class Writer(object):
+ a = None
+ def __init__(self):
+ a = None
+
+class StreamWriter(Writer):
+ stream_cmd= ''
+
+ def __init__(self, stream_option, direct = 0):
+ super(StreamWriter, self).__init__()
+ if stream_option == 'tar':
+ self.stream_cmd= 'tar chf -'
+ elif stream_option == 'xbstream':
+ self.stream_cmd= 'xbstream -c'
+ if direct:
+ self.stream_cmd = self.stream_cmd + ' -d'
+ else:
+ raise Exception("Only tar or xbstream is supported as streaming option.")
+
+ def write(self, file_name):
+ rc= os.system(self.stream_cmd + " " + file_name)
+ if (rc != 0):
+ raise Exception("Got error on stream write: " + str(rc) + " " + file_name)
+
+
+class MiscFilesProcessor():
+ datadir = None
+ wildcard = r'.*\.[frm|MYD|MYI|MAD|MAI|MRG|TRG|TRN|ARM|ARZ|CSM|CSV|opt|par]'
+ regex = None
+ start_backup_time = None
+ skip_check_frm_timestamp = None
+
+ def __init__(self, datadir, skip_check_frm_timestamp, start_backup_time):
+ self.datadir = datadir
+ self.regex = re.compile(self.wildcard)
+ self.skip_check_frm_timestamp = skip_check_frm_timestamp
+ self.start_backup_time = start_backup_time
+
+ def process_db(self, db):
+ # do nothing
+ pass
+
+ def process_file(self, path):
+ # do nothing
+ pass
+
+ def check_frm_timestamp(self, fname, path):
+ if not self.skip_check_frm_timestamp and fname.endswith('.frm'):
+ if os.path.getmtime(path) > self.start_backup_time:
+ logger.error('FRM file %s was updated after starting backups. '
+ 'Schema could have changed and the resulting copy may '
+ 'not be valid. Aborting. '
+ '(backup time: %s, file modifled time: %s)',
+ path, datetime.datetime.fromtimestamp(self.start_backup_time).strftime('%Y-%m-%d %H:%M:%S'),
+ datetime.datetime.fromtimestamp(os.path.getmtime(path)).strftime('%Y-%m-%d %H:%M:%S'))
+ raise Exception("Inconsistent frm file timestamp");
+
+ def process(self):
+ os.chdir(self.datadir)
+ for db in self.get_databases():
+ logger.info("Starting MySQL misc file traversal from database %s..", db)
+ self.process_db(db)
+ for f in self.get_files(db):
+ if self.match(f):
+ rel_path = os.path.join(db, f)
+ self.check_frm_timestamp(f, rel_path)
+ self.process_file(rel_path)
+ logger.info("Traversing misc files from data directory..")
+ for f in self.get_files(""):
+ should_skip = False
+ for e in exclude_files:
+ if f.startswith(e) or f.endswith(e):
+ logger.info("Skipping %s", f)
+ should_skip = True
+ break
+ if not should_skip:
+ self.process_file(f)
+
+ def match(self, filename):
+ if self.regex.match(filename):
+ return True
+ else:
+ return False
+
+ def get_databases(self):
+ dbs = []
+ dirs = [ d for d in os.listdir(self.datadir) \
+ if not os.path.isfile(os.path.join(self.datadir,d))]
+ for db in dirs:
+ if not db.startswith('.') and not self._is_socket(db) and not db == "#rocksdb":
+ dbs.append(db)
+ return dbs
+
+ def get_files(self, db):
+ dbdir = self.datadir + "/" + db
+ return [ f for f in os.listdir(dbdir) \
+ if os.path.isfile(os.path.join(dbdir,f))]
+
+ def _is_socket(self, item):
+ mode = os.stat(os.path.join(self.datadir, item)).st_mode
+ if stat.S_ISSOCK(mode):
+ return True
+ return False
+
+
+class MySQLBackup(MiscFilesProcessor):
+ writer = None
+
+ def __init__(self, datadir, writer, skip_check_frm_timestamp, start_backup_time):
+ MiscFilesProcessor.__init__(self, datadir, skip_check_frm_timestamp, start_backup_time)
+ self.writer = writer
+
+ def process_file(self, fname): # overriding base class
+ self.writer.write(fname)
+
+
+class MiscFilesLinkCreator(MiscFilesProcessor):
+ snapshot_dir = None
+
+ def __init__(self, datadir, snapshot_dir, skip_check_frm_timestamp, start_backup_time):
+ MiscFilesProcessor.__init__(self, datadir, skip_check_frm_timestamp, start_backup_time)
+ self.snapshot_dir = snapshot_dir
+
+ def process_db(self, db):
+ snapshot_sub_dir = os.path.join(self.snapshot_dir, db)
+ os.makedirs(snapshot_sub_dir)
+
+ def process_file(self, path):
+ dst_path = os.path.join(self.snapshot_dir, path)
+ os.link(path, dst_path)
+
+
+# RocksDB backup
+class RocksDBBackup():
+ source_dir = None
+ writer = None
+ # sst files sent in this backup round
+ sent_sst = {}
+ # target sst files in this backup round
+ target_sst = {}
+ # sst files sent in all backup rounds
+ total_sent_sst= {}
+ # sum of sst file size sent in this backup round
+ sent_sst_size = 0
+ # sum of target sst file size in this backup round
+ # if sent_sst_size becomes equal to target_sst_size,
+ # it means the backup round finished backing up all sst files
+ target_sst_size = 0
+ # sum of all sst file size sent all backup rounds
+ total_sent_sst_size= 0
+ # sum of all target sst file size from all backup rounds
+ total_target_sst_size = 0
+ show_progress_size_interval= 1073741824 # 1GB
+ wal_files= []
+ manifest_files= []
+ finished= False
+
+ def __init__(self, source_dir, writer, prev):
+ self.source_dir = source_dir
+ self.writer = writer
+ os.chdir(self.source_dir)
+ self.init_target_files(prev)
+
+ def init_target_files(self, prev):
+ sst = {}
+ self.sent_sst = {}
+ self.target_sst= {}
+ self.total_sent_sst = {}
+ self.sent_sst_size = 0
+ self.target_sst_size = 0
+ self.total_sent_sst_size= 0
+ self.total_target_sst_size= 0
+ self.wal_files= []
+ self.manifest_files= []
+
+ for f in os.listdir(self.source_dir):
+ if f.endswith(rocksdb_data_suffix):
+ # exactly the same file (same size) was sent in previous backup rounds
+ if prev is not None and f in prev.total_sent_sst and int(os.stat(f).st_size) == prev.total_sent_sst[f]:
+ continue
+ sst[f]= int(os.stat(f).st_size)
+ self.target_sst_size = self.target_sst_size + os.stat(f).st_size
+ elif is_manifest(f):
+ self.manifest_files.append(f)
+ elif f.endswith(rocksdb_wal_suffix):
+ self.wal_files.append(f)
+ self.target_sst= collections.OrderedDict(sorted(sst.items()))
+
+ if prev is not None:
+ self.total_sent_sst = prev.total_sent_sst
+ self.total_sent_sst_size = prev.total_sent_sst_size
+ self.total_target_sst_size = self.target_sst_size + prev.total_sent_sst_size
+ else:
+ self.total_target_sst_size = self.target_sst_size
+
+ def do_backup_single(self, fname):
+ self.writer.write(fname)
+ os.remove(fname)
+
+ def do_backup_sst(self, fname, size):
+ self.do_backup_single(fname)
+ self.sent_sst[fname]= size
+ self.total_sent_sst[fname]= size
+ self.sent_sst_size = self.sent_sst_size + size
+ self.total_sent_sst_size = self.total_sent_sst_size + size
+
+ def do_backup_manifest(self):
+ for f in self.manifest_files:
+ self.do_backup_single(f)
+
+ def do_backup_wal(self):
+ for f in self.wal_files:
+ self.do_backup_single(f)
+
+ # this is the last snapshot round. backing up all the rest files
+ def do_backup_final(self):
+ logger.info("Backup WAL..")
+ self.do_backup_wal()
+ logger.info("Backup Manifest..")
+ self.do_backup_manifest()
+ self.do_cleanup()
+ self.finished= True
+
+ def do_cleanup(self):
+ shutil.rmtree(self.source_dir)
+ logger.info("Cleaned up checkpoint from %s", self.source_dir)
+
+ def do_backup_until(self, time_limit):
+ logger.info("Starting backup from snapshot: target files %d", len(self.target_sst))
+ start_time= time.time()
+ last_progress_time= start_time
+ progress_size= 0
+ for fname, size in self.target_sst.iteritems():
+ self.do_backup_sst(fname, size)
+ progress_size= progress_size + size
+ elapsed_seconds = time.time() - start_time
+ progress_seconds = time.time() - last_progress_time
+
+ if self.should_show_progress(size):
+ self.show_progress(progress_size, progress_seconds)
+ progress_size=0
+ last_progress_time= time.time()
+
+ if elapsed_seconds > time_limit and self.has_sent_all_sst() is False:
+ logger.info("Snapshot round finished. Elapsed Time: %5.2f. Remaining sst files: %d",
+ elapsed_seconds, len(self.target_sst) - len(self.sent_sst))
+ self.do_cleanup()
+ break;
+ if self.has_sent_all_sst():
+ self.do_backup_final()
+
+ return self
+
+ def should_show_progress(self, size):
+ if int(self.total_sent_sst_size/self.show_progress_size_interval) > int((self.total_sent_sst_size-size)/self.show_progress_size_interval):
+ return True
+ else:
+ return False
+
+ def show_progress(self, size, seconds):
+ logger.info("Backup Progress: %5.2f%% Sent %6.2f GB of %6.2f GB data, Transfer Speed: %6.2f MB/s",
+ self.total_sent_sst_size*100/self.total_target_sst_size,
+ self.total_sent_sst_size/1024/1024/1024,
+ self.total_target_sst_size/1024/1024/1024,
+ size/seconds/1024/1024)
+
+ def print_backup_report(self):
+ logger.info("Sent %6.2f GB of sst files, %d files in total.",
+ self.total_sent_sst_size/1024/1024/1024,
+ len(self.total_sent_sst))
+
+ def has_sent_all_sst(self):
+ if self.sent_sst_size == self.target_sst_size:
+ return True
+ return False
+
+
+class MySQLUtil:
+ @staticmethod
+ def connect(user, password, port, socket=None):
+ if socket:
+ dbh = MySQLdb.Connect(user=user,
+ passwd=password,
+ unix_socket=socket)
+ else:
+ dbh = MySQLdb.Connect(user=user,
+ passwd=password,
+ port=port,
+ host="127.0.0.1")
+ return dbh
+
+ @staticmethod
+ def create_checkpoint(dbh, checkpoint_dir):
+ sql = ("SET GLOBAL rocksdb_create_checkpoint='{0}'"
+ .format(checkpoint_dir))
+ cur= dbh.cursor()
+ cur.execute(sql)
+ cur.close()
+
+ @staticmethod
+ def get_datadir(dbh):
+ sql = "SELECT @@datadir"
+ cur = dbh.cursor()
+ cur.execute(sql)
+ row = cur.fetchone()
+ return row[0]
+
+ @staticmethod
+ def is_directio_enabled(dbh):
+ sql = "SELECT @@global.rocksdb_use_direct_reads"
+ cur = dbh.cursor()
+ cur.execute(sql)
+ row = cur.fetchone()
+ return row[0]
+
+class BackupRunner:
+ datadir = None
+ start_backup_time = None
+
+ def __init__(self, datadir):
+ self.datadir = datadir
+ self.start_backup_time = time.time()
+
+ def start_backup_round(self, backup_round, prev_backup):
+ def signal_handler(*args):
+ logger.info("Got signal. Exit")
+ if b is not None:
+ logger.info("Cleaning up snapshot directory..")
+ b.do_cleanup()
+ sys.exit(1)
+
+ b = None
+ try:
+ signal.signal(signal.SIGINT, signal_handler)
+ w = None
+ if not opts.output_stream:
+ raise Exception("Currently only streaming backup is supported.")
+
+ snapshot_dir = opts.checkpoint_directory + '/' + str(backup_round)
+ dbh = MySQLUtil.connect(opts.mysql_user,
+ opts.mysql_password,
+ opts.mysql_port,
+ opts.mysql_socket)
+ direct = MySQLUtil.is_directio_enabled(dbh)
+ logger.info("Direct I/O: %d", direct)
+
+ w = StreamWriter(opts.output_stream, direct)
+
+ if not self.datadir:
+ self.datadir = MySQLUtil.get_datadir(dbh)
+ logger.info("Set datadir: %s", self.datadir)
+ logger.info("Creating checkpoint at %s", snapshot_dir)
+ MySQLUtil.create_checkpoint(dbh, snapshot_dir)
+ logger.info("Created checkpoint at %s", snapshot_dir)
+ b = RocksDBBackup(snapshot_dir, w, prev_backup)
+ return b.do_backup_until(opts.checkpoint_interval)
+ except Exception as e:
+ logger.error(e)
+ logger.error(traceback.format_exc())
+ if b is not None:
+ logger.info("Cleaning up snapshot directory.")
+ b.do_cleanup()
+ sys.exit(1)
+
+ def backup_mysql(self):
+ try:
+ w = None
+ if opts.output_stream:
+ w = StreamWriter(opts.output_stream)
+ else:
+ raise Exception("Currently only streaming backup is supported.")
+ b = MySQLBackup(self.datadir, w, opts.skip_check_frm_timestamp,
+ self.start_backup_time)
+ logger.info("Taking MySQL misc backups..")
+ b.process()
+ logger.info("MySQL misc backups done.")
+ except Exception as e:
+ logger.error(e)
+ logger.error(traceback.format_exc())
+ sys.exit(1)
+
+
+class WDTBackup:
+ datadir = None
+ start_backup_time = None
+
+ def __init__(self, datadir):
+ self.datadir = datadir
+ self.start_backup_time = time.time()
+
+ def cleanup(self, snapshot_dir, server_log):
+ if server_log:
+ server_log.seek(0)
+ logger.info("WDT server log:")
+ logger.info(server_log.read())
+ server_log.close()
+ if snapshot_dir:
+ logger.info("Cleaning up snapshot dir %s", snapshot_dir)
+ shutil.rmtree(snapshot_dir)
+
+ def backup_with_timeout(self, backup_round):
+ def signal_handler(*args):
+ logger.info("Got signal. Exit")
+ self.cleanup(snapshot_dir, server_log)
+ sys.exit(1)
+
+ logger.info("Starting backup round %d", backup_round)
+ snapshot_dir = None
+ server_log = None
+ try:
+ signal.signal(signal.SIGINT, signal_handler)
+ # create rocksdb snapshot
+ snapshot_dir = os.path.join(opts.checkpoint_directory, str(backup_round))
+ dbh = MySQLUtil.connect(opts.mysql_user,
+ opts.mysql_password,
+ opts.mysql_port,
+ opts.mysql_socket)
+ logger.info("Creating checkpoint at %s", snapshot_dir)
+ MySQLUtil.create_checkpoint(dbh, snapshot_dir)
+ logger.info("Created checkpoint at %s", snapshot_dir)
+
+ # get datadir if not provided
+ if not self.datadir:
+ self.datadir = MySQLUtil.get_datadir(dbh)
+ logger.info("Set datadir: %s", self.datadir)
+
+ # create links for misc files
+ link_creator = MiscFilesLinkCreator(self.datadir, snapshot_dir,
+ opts.skip_check_frm_timestamp,
+ self.start_backup_time)
+ link_creator.process()
+
+ current_path = os.path.join(opts.backupdir, "CURRENT")
+
+ # construct receiver cmd, using the data directory as recovery-id.
+ # we delete the current file because it is not append-only, therefore not
+ # resumable.
+ remote_cmd = (
+ "ssh {0} rm -f {1}; "
+ "{2} -directory {3} -enable_download_resumption "
+ "-recovery_id {4} -start_port 0 -abort_after_seconds {5} {6}"
+ ).format(opts.destination,
+ current_path,
+ wdt_bin,
+ opts.backupdir,
+ self.datadir,
+ opts.checkpoint_interval,
+ opts.extra_wdt_receiver_options)
+ logger.info("WDT remote cmd %s", remote_cmd)
+ server_log = tempfile.TemporaryFile()
+ remote_process = subprocess.Popen(remote_cmd.split(),
+ stdout=subprocess.PIPE,
+ stderr=server_log)
+ wdt_url = remote_process.stdout.readline().strip()
+ if not wdt_url:
+ raise Exception("Unable to get connection url from wdt receiver")
+ sender_cmd = (
+ "{0} -connection_url \'{1}\' -directory {2} -app_name=myrocks "
+ "-avg_mbytes_per_sec {3} "
+ "-enable_download_resumption -abort_after_seconds {4} {5}"
+ ).format(wdt_bin,
+ wdt_url,
+ snapshot_dir,
+ opts.avg_mbytes_per_sec,
+ opts.checkpoint_interval,
+ opts.extra_wdt_sender_options)
+ sender_status = os.system(sender_cmd) >> 8
+ remote_status = remote_process.wait()
+ self.cleanup(snapshot_dir, server_log)
+ # TODO: handle retryable and non-retyable errors differently
+ return (sender_status == 0 and remote_status == 0)
+
+ except Exception as e:
+ logger.error(e)
+ logger.error(traceback.format_exc())
+ self.cleanup(snapshot_dir, server_log)
+ sys.exit(1)
+
+
+def backup_using_wdt():
+ if not opts.destination:
+ logger.error("Must provide remote destination when using WDT")
+ sys.exit(1)
+
+ # TODO: detect whether WDT is installed
+ logger.info("Backing up myrocks to %s using WDT", opts.destination)
+ wdt_backup = WDTBackup(opts.datadir)
+ finished = False
+ backup_round = 1
+ while not finished:
+ start_time = time.time()
+ finished = wdt_backup.backup_with_timeout(backup_round)
+ end_time = time.time()
+ duration_seconds = end_time - start_time
+ if (not finished) and (duration_seconds < opts.checkpoint_interval):
+ # round finished before timeout
+ sleep_duration = (opts.checkpoint_interval - duration_seconds)
+ logger.info("Sleeping for %f seconds", sleep_duration)
+ time.sleep(sleep_duration)
+
+ backup_round = backup_round + 1
+ logger.info("Finished myrocks backup using WDT")
+
+
+def init_logger():
+ global logger
+ logger = logging.getLogger('myrocks_hotbackup')
+ logger.setLevel(logging.INFO)
+ h1= logging.StreamHandler(sys.stderr)
+ f = logging.Formatter("%(asctime)s.%(msecs)03d %(levelname)s %(message)s",
+ "%Y-%m-%d %H:%M:%S")
+ h1.setFormatter(f)
+ logger.addHandler(h1)
+
+backup_wdt_usage = ("Backup using WDT: myrocks_hotbackup "
+ "--user=root --password=pw --stream=wdt "
+ "--checkpoint_dir=<directory where temporary backup hard links "
+ "are created> --destination=<remote host name> --backup_dir="
+ "<remote directory name>. This has to be executed at the src "
+ "host.")
+backup_usage= "Backup: set -o pipefail; myrocks_hotbackup --user=root --password=pw --port=3306 --checkpoint_dir=<directory where temporary backup hard links are created> | ssh -o NoneEnabled=yes remote_server 'tar -xi -C <directory on remote server where backups will be sent>' . You need to execute backup command on a server where you take backups."
+move_back_usage= "Move-Back: myrocks_hotbackup --move_back --datadir=<dest mysql datadir> --rocksdb_datadir=<dest rocksdb datadir> --rocksdb_waldir=<dest rocksdb wal dir> --backup_dir=<where backup files are stored> . You need to execute move-back command on a server where backup files are sent."
+
+
+def parse_options():
+ global opts
+ parser = OptionParser(usage = "\n\n" + backup_usage + "\n\n" + \
+ backup_wdt_usage + "\n\n" + move_back_usage)
+ parser.add_option('-i', '--interval', type='int', dest='checkpoint_interval',
+ default=300,
+ help='Number of seconds to renew checkpoint')
+ parser.add_option('-c', '--checkpoint_dir', type='string', dest='checkpoint_directory',
+ default='/data/mysql/backup/snapshot',
+ help='Local directory name where checkpoints will be created.')
+ parser.add_option('-d', '--datadir', type='string', dest='datadir',
+ default=None,
+ help='backup mode: src MySQL datadir. move_back mode: dest MySQL datadir')
+ parser.add_option('-s', '--stream', type='string', dest='output_stream',
+ default='tar',
+ help='Setting streaming backup options. Currently tar, WDT '
+ 'and xbstream are supported. Default is tar')
+ parser.add_option('--destination', type='string', dest='destination',
+ default='',
+ help='Remote server name. Only used for WDT mode so far.')
+ parser.add_option('--avg_mbytes_per_sec', type='int',
+ dest='avg_mbytes_per_sec',
+ default=500,
+ help='Average backup rate in MBytes/sec. WDT only.')
+ parser.add_option('--extra_wdt_sender_options', type='string',
+ dest='extra_wdt_sender_options',
+ default='',
+ help='Extra options for WDT sender')
+ parser.add_option('--extra_wdt_receiver_options', type='string',
+ dest='extra_wdt_receiver_options',
+ default='',
+ help='Extra options for WDT receiver')
+ parser.add_option('-u', '--user', type='string', dest='mysql_user',
+ default='root',
+ help='MySQL user name')
+ parser.add_option('-p', '--password', type='string', dest='mysql_password',
+ default='',
+ help='MySQL password name')
+ parser.add_option('-P', '--port', type='int', dest='mysql_port',
+ default=3306,
+ help='MySQL port number')
+ parser.add_option('-S', '--socket', type='string', dest='mysql_socket',
+ default=None,
+ help='MySQL socket path. Takes precedence over --port.')
+ parser.add_option('-m', '--move_back', action='store_true', dest='move_back',
+ default=False,
+ help='Moving MyRocks backup files to proper locations.')
+ parser.add_option('-r', '--rocksdb_datadir', type='string', dest='rocksdb_datadir',
+ default=None,
+ help='RocksDB target data directory where backup data files will be moved. Must be empty.')
+ parser.add_option('-w', '--rocksdb_waldir', type='string', dest='rocksdb_waldir',
+ default=None,
+ help='RocksDB target data directory where backup wal files will be moved. Must be empty.')
+ parser.add_option('-b', '--backup_dir', type='string', dest='backupdir',
+ default=None,
+ help='backup mode for WDT: Remote directory to store '
+ 'backup. move_back mode: Locations where backup '
+ 'files are stored.')
+ parser.add_option('-f', '--skip_check_frm_timestamp',
+ dest='skip_check_frm_timestamp',
+ action='store_true', default=False,
+ help='skipping to check if frm files are updated after starting backup.')
+ parser.add_option('-D', '--debug_signal_file', type='string', dest='debug_signal_file',
+ default=None,
+ help='debugging purpose: waiting until the specified file is created')
+
+ opts, args = parser.parse_args()
+
+
+def create_moveback_dir(directory):
+ if not os.path.exists(directory):
+ os.makedirs(directory)
+ else:
+ for f in os.listdir(directory):
+ logger.error("Directory %s has file or directory %s!", directory, f)
+ raise
+
+def print_move_back_usage():
+ logger.warning(move_back_usage)
+
+def move_back():
+ if opts.rocksdb_datadir is None or opts.rocksdb_waldir is None or opts.backupdir is None or opts.datadir is None:
+ print_move_back_usage()
+ sys.exit()
+ create_moveback_dir(opts.datadir)
+ create_moveback_dir(opts.rocksdb_datadir)
+ create_moveback_dir(opts.rocksdb_waldir)
+
+ os.chdir(opts.backupdir)
+ for f in os.listdir(opts.backupdir):
+ if os.path.isfile(os.path.join(opts.backupdir,f)):
+ if f.endswith(rocksdb_wal_suffix):
+ shutil.move(f, opts.rocksdb_waldir)
+ elif f.endswith(rocksdb_data_suffix) or is_manifest(f):
+ shutil.move(f, opts.rocksdb_datadir)
+ else:
+ shutil.move(f, opts.datadir)
+ else: #directory
+ if f.endswith('.rocksdb'):
+ continue
+ shutil.move(f, opts.datadir)
+
+def start_backup():
+ logger.info("Starting backup.")
+ runner = BackupRunner(opts.datadir)
+ b = None
+ backup_round= 1
+ while True:
+ b = runner.start_backup_round(backup_round, b)
+ backup_round = backup_round + 1
+ if b.finished is True:
+ b.print_backup_report()
+ logger.info("RocksDB Backup Done.")
+ break
+ if opts.debug_signal_file:
+ while not os.path.exists(opts.debug_signal_file):
+ logger.info("Waiting until %s is created..", opts.debug_signal_file)
+ time.sleep(1)
+ runner.backup_mysql()
+ logger.info("All Backups Done.")
+
+
+def main():
+ parse_options()
+ init_logger()
+
+ if opts.move_back is True:
+ move_back()
+ elif opts.output_stream == 'wdt':
+ backup_using_wdt()
+ else:
+ start_backup()
+
+if __name__ == "__main__":
+ main()
diff --git a/storage/rocksdb/mysql-test/rocksdb/combinations b/storage/rocksdb/mysql-test/rocksdb/combinations
new file mode 100644
index 00000000000..be8080d4b9b
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/combinations
@@ -0,0 +1,5 @@
+[write_committed]
+loose-rocksdb_write_policy=write_committed
+
+[write_prepared]
+loose-rocksdb_write_policy=write_prepared
diff --git a/storage/rocksdb/mysql-test/rocksdb/include/autoinc_crash_safe.inc b/storage/rocksdb/mysql-test/rocksdb/include/autoinc_crash_safe.inc
new file mode 100644
index 00000000000..ba2e7ace0c5
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/include/autoinc_crash_safe.inc
@@ -0,0 +1,150 @@
+--echo #
+--echo # Testing concurrent transactions.
+--echo #
+
+--source include/count_sessions.inc
+connect (con1,localhost,root,,);
+connect (con2,localhost,root,,);
+connect (con3,localhost,root,,);
+
+connection con1;
+begin;
+insert into t values (); # 1
+
+connection con2;
+begin;
+insert into t values (); # 2
+
+connection con3;
+begin;
+insert into t values (); # 3
+
+connection con1;
+insert into t values (); # 4
+
+connection con2;
+insert into t values (); # 5
+
+connection con3;
+insert into t values (); # 6
+
+connection con2;
+commit;
+
+connection con3;
+rollback;
+
+connection con1;
+commit;
+
+delete from t;
+
+--echo # Master value before restart
+select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't';
+
+--echo # Slave value before restart
+sync_slave_with_master;
+select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't';
+
+connection slave;
+--source include/stop_slave.inc
+--let $rpl_server_number = 1
+--source include/rpl_restart_server.inc
+
+connection default;
+--echo # Master value after restart
+select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't';
+
+--let $rpl_server_number = 2
+--source include/rpl_restart_server.inc
+
+connection slave;
+--source include/start_slave.inc
+--echo # Slave value after restart
+select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't';
+
+disconnect con1;
+disconnect con2;
+disconnect con3;
+--source include/wait_until_count_sessions.inc
+
+--echo #
+--echo # Testing interaction of merge markers with various DDL statements.
+--echo #
+connection slave;
+--source include/stop_slave.inc
+
+connection default;
+
+--echo # Drop and add primary key.
+alter table t modify i int;
+alter table t drop primary key;
+alter table t add primary key (i);
+alter table t modify i int auto_increment;
+
+--let $rpl_server_number = 1
+--source include/rpl_restart_server.inc
+select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't';
+
+--echo # Remove auto_increment property.
+alter table t modify i int;
+--let $rpl_server_number = 1
+--source include/rpl_restart_server.inc
+select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't';
+
+--echo # Add auto_increment property.
+insert into t values (123);
+alter table t modify i int auto_increment;
+--let $rpl_server_number = 1
+--source include/rpl_restart_server.inc
+select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't';
+
+--echo # Add column j.
+alter table t add column j int;
+--let $rpl_server_number = 1
+--source include/rpl_restart_server.inc
+select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't';
+
+--echo # Rename tables.
+rename table t to t2;
+rename table t2 to t;
+
+--let $rpl_server_number = 1
+--source include/rpl_restart_server.inc
+select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't';
+
+--echo # Change auto_increment property
+alter table t auto_increment = 1000;
+--let $rpl_server_number = 1
+--source include/rpl_restart_server.inc
+select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't';
+
+alter table t auto_increment = 1;
+--let $rpl_server_number = 1
+--source include/rpl_restart_server.inc
+select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't';
+
+alter table t drop primary key, add key (i), auto_increment = 1;
+--let $rpl_server_number = 1
+--source include/rpl_restart_server.inc
+select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't';
+
+alter table t add key (j), auto_increment = 1;
+--let $rpl_server_number = 1
+--source include/rpl_restart_server.inc
+select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't';
+
+alter table t modify i int;
+alter table t add column (k int auto_increment), add key(k), auto_increment=15;
+--let $rpl_server_number = 1
+--source include/rpl_restart_server.inc
+select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't';
+
+--echo # Drop table.
+drop table t;
+
+--let $rpl_server_number = 1
+--source include/rpl_restart_server.inc
+
+connection slave;
+--source include/start_slave.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb/include/bulk_load.inc b/storage/rocksdb/mysql-test/rocksdb/include/bulk_load.inc
new file mode 100644
index 00000000000..29ec94188a2
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/include/bulk_load.inc
@@ -0,0 +1,165 @@
+--source include/count_sessions.inc
+
+if ($data_order_desc)
+{
+ --echo Data will be ordered in descending order
+}
+
+if (!$data_order_desc)
+{
+ --echo Data will be ordered in ascending order
+}
+
+# Create a table with a primary key and one secondary key as well as one
+# more column
+eval CREATE TABLE t1(
+ pk CHAR(5),
+ a CHAR(30),
+ b CHAR(30),
+ PRIMARY KEY(pk) COMMENT "$pk_cf",
+ KEY(a)
+) ENGINE=ROCKSDB COLLATE 'latin1_bin';
+
+# Create a second identical table to validate that bulk loading different
+# tables in the same session works
+eval CREATE TABLE t2(
+ pk CHAR(5),
+ a CHAR(30),
+ b CHAR(30),
+ PRIMARY KEY(pk) COMMENT "$pk_cf",
+ KEY(a)
+) ENGINE=ROCKSDB COLLATE 'latin1_bin';
+
+# Create a third table using partitions to validate that bulk loading works
+# across a partitioned table
+eval CREATE TABLE t3(
+ pk CHAR(5),
+ a CHAR(30),
+ b CHAR(30),
+ PRIMARY KEY(pk) COMMENT "$pk_cf",
+ KEY(a)
+) ENGINE=ROCKSDB COLLATE 'latin1_bin' PARTITION BY KEY() PARTITIONS 4;
+
+--let $file = `SELECT CONCAT(@@datadir, "test_loadfile.txt")`
+
+--let MTR_DATA_ORDER_DESC = $data_order_desc;
+
+# Create a text file with data to import into the table.
+# The primary key is in sorted order and the secondary keys are randomly generated
+--let ROCKSDB_INFILE = $file
+perl;
+my $fn = $ENV{'ROCKSDB_INFILE'};
+open(my $fh, '>', $fn) || die "perl open($fn): $!";
+my $max = 2500000;
+my $desc = $ENV{'MTR_DATA_ORDER_DESC'};
+my @chars = ("A".."Z", "a".."z", "0".."9");
+my @powers_of_26 = (26 * 26 * 26 * 26, 26 * 26 * 26, 26 * 26, 26, 1);
+for (my $ii = 0; $ii < $max; $ii++)
+{
+ my $pk;
+ my $tmp = $ii;
+ foreach (@powers_of_26)
+ {
+ if ($desc == 1)
+ {
+ $pk .= chr(ord('z') - int($tmp / $_));
+ }
+ else
+ {
+ $pk .= chr(ord('a') + int($tmp / $_));
+ }
+
+ $tmp = $tmp % $_;
+ }
+
+ my $num = int(rand(25)) + 6;
+ my $a;
+ $a .= $chars[rand(@chars)] for 1..$num;
+
+ $num = int(rand(25)) + 6;
+ my $b;
+ $b .= $chars[rand(@chars)] for 1..$num;
+ print $fh "$pk\t$a\t$b\n";
+}
+close($fh);
+EOF
+
+--file_exists $file
+
+# Make sure a snapshot held by another user doesn't block the bulk load
+connect (other,localhost,root,,);
+set session transaction isolation level repeatable read;
+start transaction with consistent snapshot;
+
+# Assert that there is a pending snapshot
+select VALUE > 0 as 'Has opened snapshots' from information_schema.rocksdb_dbstats where stat_type='DB_NUM_SNAPSHOTS';
+
+connection default;
+
+# Update CF to smaller value to create multiple SST in ingestion
+eval SET @@GLOBAL.ROCKSDB_UPDATE_CF_OPTIONS=
+ '$pk_cf_name={write_buffer_size=8m;target_file_size_base=1m};';
+
+set rocksdb_bulk_load=1;
+set rocksdb_bulk_load_size=100000;
+--disable_query_log
+--echo LOAD DATA INFILE <input_file> INTO TABLE t1;
+eval LOAD DATA INFILE '$file' INTO TABLE t1;
+# There should be no SST being ingested
+select * from t1;
+--echo LOAD DATA INFILE <input_file> INTO TABLE t2;
+eval LOAD DATA INFILE '$file' INTO TABLE t2;
+# There should be no SST being ingested
+select * from t2;
+--echo LOAD DATA INFILE <input_file> INTO TABLE t3;
+eval LOAD DATA INFILE '$file' INTO TABLE t3;
+# There should be no SST being ingested
+select * from t3;
+--enable_query_log
+set rocksdb_bulk_load=0;
+
+--remove_file $file
+
+# Make sure row count index stats are correct
+--replace_column 6 # 7 # 8 # 9 #
+SHOW TABLE STATUS WHERE name LIKE 't%';
+
+ANALYZE TABLE t1, t2, t3;
+
+--replace_column 6 # 7 # 8 # 9 #
+SHOW TABLE STATUS WHERE name LIKE 't%';
+
+# Make sure all the data is there.
+select count(pk),count(a) from t1;
+select count(b) from t1;
+select count(pk),count(a) from t2;
+select count(b) from t2;
+select count(pk),count(a) from t3;
+select count(b) from t3;
+
+# Create a dummy file with a bulk load extesion. It should be removed when
+# the server starts
+--let $tmpext = .bulk_load.tmp
+--let $MYSQLD_DATADIR= `SELECT @@datadir`
+--let $datadir = $MYSQLD_DATADIR/#rocksdb
+--write_file $datadir/test$tmpext
+dummy data
+EOF
+--write_file $datadir/longfilenamethatvalidatesthatthiswillgetdeleted$tmpext
+dummy data
+EOF
+
+# Show the files exists
+--list_files $datadir *$tmpext
+
+# Now restart the server and make sure it automatically removes this test file
+--source include/restart_mysqld.inc
+
+# Show the files do not exist
+--list_files $datadir *$tmpext
+
+# Cleanup
+disconnect other;
+DROP TABLE t1, t2, t3;
+
+--source include/wait_until_count_sessions.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb/include/bulk_load_unsorted.inc b/storage/rocksdb/mysql-test/rocksdb/include/bulk_load_unsorted.inc
new file mode 100644
index 00000000000..46aea8f23b5
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/include/bulk_load_unsorted.inc
@@ -0,0 +1,143 @@
+--source include/have_partition.inc
+--source include/count_sessions.inc
+
+SET rocksdb_bulk_load_size=3;
+SET rocksdb_bulk_load_allow_unsorted=1;
+
+### Test individual INSERTs ###
+
+# A table with only a PK won't have rows until the bulk load is finished
+eval CREATE TABLE t1(a INT, b INT, PRIMARY KEY(a) COMMENT "$pk_cf")
+ ENGINE=ROCKSDB;
+SET rocksdb_bulk_load=1;
+--disable_query_log
+let $sign = 1;
+let $max = 5;
+let $i = 1;
+while ($i <= $max) {
+ let $a = 1 + $sign * $i;
+ let $b = 1 - $sign * $i;
+ let $sign = -$sign;
+ let $insert = INSERT INTO t1 VALUES ($a, $b);
+ eval $insert;
+ inc $i;
+}
+--enable_query_log
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+SET rocksdb_bulk_load=0;
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+DROP TABLE t1;
+
+# A table with a PK and a SK shows rows immediately
+eval CREATE TABLE t1(a INT, b INT, PRIMARY KEY(a) COMMENT "$pk_cf", KEY(b))
+ ENGINE=ROCKSDB;
+SET rocksdb_bulk_load=1;
+--disable_query_log
+let $sign = 1;
+let $max = 5;
+let $i = 1;
+while ($i <= $max) {
+ let $a = 1 + $sign * $i;
+ let $b = 1 - $sign * $i;
+ let $sign = -$sign;
+ let $insert = INSERT INTO t1 VALUES ($a, $b);
+ eval $insert;
+ inc $i;
+}
+--enable_query_log
+
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+SET rocksdb_bulk_load=0;
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+DROP TABLE t1;
+
+# Inserting into another table finishes bulk load to the previous table
+eval CREATE TABLE t1(a INT, b INT, PRIMARY KEY(a) COMMENT "$pk_cf")
+ ENGINE=ROCKSDB;
+eval CREATE TABLE t2(a INT, b INT, PRIMARY KEY(a) COMMENT "$pk_cf")
+ ENGINE=ROCKSDB;
+
+SET rocksdb_bulk_load=1;
+INSERT INTO t1 VALUES (1,1);
+INSERT INTO t2 VALUES (1,1);
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+INSERT INTO t1 VALUES (2,2);
+SELECT * FROM t2 FORCE INDEX (PRIMARY);
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+SET rocksdb_bulk_load=0;
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+DROP TABLE t1, t2;
+
+### Test bulk load from a file ###
+eval CREATE TABLE t1(a INT, b INT, PRIMARY KEY(a) COMMENT "$pk_cf")
+ ENGINE=ROCKSDB;
+eval CREATE TABLE t2(a INT, b INT, PRIMARY KEY(b) COMMENT "$pk_cf")
+ ENGINE=ROCKSDB;
+eval CREATE TABLE t3(a INT, b INT, PRIMARY KEY(a) COMMENT "$pk_cf")
+ ENGINE=ROCKSDB PARTITION BY KEY() PARTITIONS 4;
+
+--let $file = `SELECT CONCAT(@@datadir, "test_loadfile.txt")`
+# Create a text file with data to import into the table.
+# PK and SK are not in any order
+--let ROCKSDB_INFILE = $file
+perl;
+my $fn = $ENV{'ROCKSDB_INFILE'};
+open(my $fh, '>', $fn) || die "perl open($fn): $!";
+binmode $fh;
+my $max = 2500000;
+my $sign = 1;
+for (my $ii = 0; $ii < $max; $ii++)
+{
+ my $a = 1 + $sign * $ii;
+ my $b = 1 - $sign * $ii;
+ $sign = -$sign;
+ print $fh "$a\t$b\n";
+}
+close($fh);
+EOF
+--file_exists $file
+
+# Make sure a snapshot held by another user doesn't block the bulk load
+connect (other,localhost,root,,);
+set session transaction isolation level repeatable read;
+start transaction with consistent snapshot;
+
+# Assert that there is a pending snapshot
+select VALUE > 0 as 'Has opened snapshots' from information_schema.rocksdb_dbstats where stat_type='DB_NUM_SNAPSHOTS';
+
+connection default;
+set rocksdb_bulk_load=1;
+set rocksdb_bulk_load_size=100000;
+--disable_query_log
+--echo LOAD DATA INFILE <input_file> INTO TABLE t1;
+eval LOAD DATA INFILE '$file' INTO TABLE t1;
+--echo LOAD DATA INFILE <input_file> INTO TABLE t2;
+eval LOAD DATA INFILE '$file' INTO TABLE t2;
+--echo LOAD DATA INFILE <input_file> INTO TABLE t3;
+eval LOAD DATA INFILE '$file' INTO TABLE t3;
+--enable_query_log
+set rocksdb_bulk_load=0;
+
+--remove_file $file
+
+# Make sure row count index stats are correct
+--replace_column 6 # 7 # 8 # 9 #
+SHOW TABLE STATUS WHERE name LIKE 't%';
+
+ANALYZE TABLE t1, t2, t3;
+
+--replace_column 6 # 7 # 8 # 9 #
+SHOW TABLE STATUS WHERE name LIKE 't%';
+
+# Make sure all the data is there.
+select count(a),count(b) from t1;
+select count(a),count(b) from t2;
+select count(a),count(b) from t3;
+
+SELECT * FROM t1 FORCE INDEX (PRIMARY) LIMIT 3;
+SELECT * FROM t2 FORCE INDEX (PRIMARY) LIMIT 3;
+
+disconnect other;
+DROP TABLE t1, t2, t3;
+
+--source include/wait_until_count_sessions.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb/include/bypass_create_table.inc b/storage/rocksdb/mysql-test/rocksdb/include/bypass_create_table.inc
new file mode 100644
index 00000000000..233635b369e
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/include/bypass_create_table.inc
@@ -0,0 +1,298 @@
+CREATE TABLE `link_table` (
+ `id1` bigint(20) unsigned NOT NULL DEFAULT '0' ,
+ `id1_type` int(10) unsigned NOT NULL DEFAULT '0' ,
+ `id2` bigint(20) unsigned NOT NULL DEFAULT '0' ,
+ `id2_type` int(10) unsigned NOT NULL DEFAULT '0' ,
+ `link_type` bigint(20) unsigned NOT NULL DEFAULT '0' ,
+ `visibility` tinyint(3) NOT NULL DEFAULT '0' ,
+ `data` varchar(255) COLLATE latin1_bin NOT NULL DEFAULT '' ,
+ `time` int(10) unsigned NOT NULL DEFAULT '0' ,
+ `version` bigint(20) unsigned NOT NULL DEFAULT '0' ,
+ PRIMARY KEY (`link_type` , `id1` , `id2`) COMMENT 'cf_link' ,
+ KEY `id1_type` (`id1` , `link_type` , `visibility` , `time` , `id2` ,
+ `version` , `data`) COMMENT 'rev:cf_link_id1_type'
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1 COLLATE=latin1_bin
+ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=8;
+
+CREATE TABLE `link_table2` (
+ `id1` bigint(20) unsigned NOT NULL DEFAULT '0' ,
+ `id1_type` int(10) unsigned NOT NULL DEFAULT '0' ,
+ `id2` bigint(20) unsigned NOT NULL DEFAULT '0' ,
+ `id2_type` int(10) unsigned NOT NULL DEFAULT '0' ,
+ `link_type` bigint(20) unsigned NOT NULL DEFAULT '0' ,
+ `visibility` tinyint(3) NOT NULL DEFAULT '0' ,
+ `data` varchar(255) COLLATE latin1_bin NOT NULL DEFAULT '' ,
+ `time` int(10) unsigned NOT NULL DEFAULT '0' ,
+ `version` bigint(20) unsigned NOT NULL DEFAULT '0' ,
+ PRIMARY KEY (`link_type` , `id1` , `id2`)
+ COMMENT 'cf_link' ,
+ KEY `id1_type` (`id1` , `link_type` , `visibility` , `time` , `id2` ,
+ `version` , `data`) COMMENT 'cf_link_id1_type'
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1 COLLATE=latin1_bin
+ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=9;
+
+insert into link_table values (1, 1, 1, 2, 3, 4, 'a10', 10, 125);
+insert into link_table values (1, 1, 2, 2, 3, 3, 'a10', 10, 125);
+insert into link_table values (1, 1, 3, 2, 3, 4, 'a11', 11, 125);
+insert into link_table values (1, 1, 4, 2, 3, 4, 'a11', 11, 125);
+insert into link_table values (1, 1, 5, 2, 3, 3, 'a12', 12, 125);
+insert into link_table values (1, 1, 6, 2, 3, 4, 'a12', 12, 125);
+insert into link_table values (1, 1, 7, 2, 3, 4, 'a12', 12, 125);
+insert into link_table values (1, 1, 8, 2, 3, 4, 'a13', 13, 125);
+insert into link_table values (1, 1, 9, 2, 3, 4, 'a14', 14, 125);
+insert into link_table values (1, 1, 10, 2, 3, 4, 'a15', 15, 125);
+insert into link_table values (2, 1, 1, 2, 3, 4, 'a10', 10, 125);
+insert into link_table values (2, 1, 2, 2, 3, 4, 'a10', 10, 125);
+insert into link_table values (2, 1, 3, 2, 3, 4, 'a11', 11, 125);
+insert into link_table values (2, 1, 4, 2, 3, 4, 'a11', 11, 125);
+insert into link_table values (2, 1, 5, 2, 3, 4, 'a12', 12, 125);
+insert into link_table values (2, 1, 6, 2, 3, 4, 'a12', 12, 125);
+insert into link_table values (2, 1, 7, 2, 3, 4, 'a12', 12, 125);
+insert into link_table values (2, 1, 8, 2, 3, 4, 'a13', 13, 125);
+insert into link_table values (2, 1, 9, 2, 3, 4, 'a14', 14, 125);
+insert into link_table values (2, 1, 10, 2, 3, 4, 'a15', 15, 125);
+insert into link_table values (2, 1, 1, 2, 4, 4, 'a10', 10, 125);
+insert into link_table values (2, 1, 2, 2, 4, 4, 'a10', 10, 125);
+insert into link_table values (2, 1, 3, 2, 4, 4, 'a11', 11, 125);
+insert into link_table values (2, 1, 4, 2, 4, 4, 'a11', 11, 125);
+insert into link_table values (2, 1, 5, 2, 4, 4, 'a12', 12, 125);
+insert into link_table values (2, 1, 6, 2, 4, 4, 'a12', 12, 125);
+insert into link_table values (2, 1, 7, 2, 4, 4, 'a12', 12, 125);
+insert into link_table values (2, 1, 8, 2, 4, 4, 'a13', 13, 125);
+insert into link_table values (2, 1, 9, 2, 4, 4, 'a14', 14, 125);
+insert into link_table values (2, 1, 10, 2, 4, 4, 'a15', 15, 125);
+insert into link_table values (3, 1, 10, 2, 3, 4, 'a10', 10, 125);
+insert into link_table values (3, 1, 9, 2, 3, 4, 'a10', 10, 125);
+insert into link_table values (3, 1, 8, 2, 3, 4, 'a11', 11, 125);
+insert into link_table values (3, 1, 7, 2, 3, 4, 'a11', 11, 125);
+insert into link_table values (3, 1, 6, 2, 3, 4, 'a12', 12, 125);
+insert into link_table values (3, 1, 5, 2, 3, 4, 'a12', 12, 125);
+insert into link_table values (3, 1, 4, 2, 3, 4, 'a12', 12, 125);
+insert into link_table values (3, 1, 3, 2, 3, 4, 'a13', 13, 125);
+insert into link_table values (3, 1, 2, 2, 3, 4, 'a14', 14, 125);
+insert into link_table values (3, 1, 1, 2, 3, 4, 'a15', 15, 125);
+insert into link_table values (9, 1, 9, 2, 5, 6, '0 ', 10, 125);
+insert into link_table values (9, 1, 8, 2, 5, 6, '01 ', 11, 125);
+insert into link_table values (9, 1, 7, 2, 5, 6, '012 ', 11, 125);
+insert into link_table values (9, 1, 6, 2, 5, 6, '0123 ', 12, 125);
+insert into link_table values (9, 1, 5, 2, 5, 6, '01234 ', 12, 125);
+insert into link_table values (9, 1, 4, 2, 5, 6, '012345 ', 12, 125);
+insert into link_table values (9, 1, 3, 2, 5, 6, '0123456 ', 13, 125);
+insert into link_table values (9, 1, 2, 2, 5, 6, '01234567 ', 14, 125);
+insert into link_table values (9, 1, 1, 2, 5, 6, '012345678 ', 15, 125);
+insert into link_table values (9, 1, 0, 2, 5, 6, '0123456789 ', 15, 125);
+
+insert into link_table2 select * from link_table;
+
+CREATE TABLE `id_table` (
+ `id` bigint(20) NOT NULL DEFAULT '0',
+ `type` int(11) NOT NULL DEFAULT '0',
+ `row_created_time` int(11) NOT NULL DEFAULT '0',
+ `hash_key` varchar(255) NOT NULL DEFAULT '',
+ `is_deleted` tinyint(4) DEFAULT '0',
+ PRIMARY KEY (`id`),
+ KEY `type_id` (`type`,`id`)
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1 ROW_FORMAT=COMPRESSED
+KEY_BLOCK_SIZE=8;
+
+insert into id_table values (1, 1, 10, '111', 0);
+insert into id_table values (2, 1, 10, '111', 1);
+insert into id_table values (3, 1, 10, '111', 0);
+insert into id_table values (4, 1, 10, '111', 1);
+insert into id_table values (5, 1, 10, '111', 0);
+insert into id_table values (6, 1, 10, '111', 1);
+insert into id_table values (7, 1, 10, '111', 0);
+insert into id_table values (8, 1, 10, '111', 1);
+insert into id_table values (9, 1, 10, '111', 0);
+insert into id_table values (10, 1, 10, '111', 1);
+
+CREATE TABLE `node_table` (
+ `id` bigint(20) unsigned NOT NULL DEFAULT '0',
+ `type` int(10) unsigned NOT NULL DEFAULT '0',
+ `version` bigint(20) unsigned NOT NULL DEFAULT '0',
+ `update_time` int(10) unsigned NOT NULL DEFAULT '0',
+ `data` mediumtext COLLATE latin1_bin NOT NULL,
+ PRIMARY KEY (`type`,`id`) COMMENT 'cf_node_type_id',
+ KEY `id` (`id`) COMMENT 'cf_node'
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1 COLLATE=latin1_bin
+ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=8;
+
+insert into node_table values (1, 1, 1, 10, 'data');
+
+insert into node_table values (2, 1, 1, 10, 'data');
+
+insert into node_table values (3, 1, 1, 10, 'data');
+
+insert into node_table values (4, 1, 1, 10, 'data');
+
+insert into node_table values (5, 1, 1, 10, 'data');
+
+insert into node_table values (6, 1, 1, 10, 'data');
+
+insert into node_table values (7, 1, 1, 10, 'data');
+
+insert into node_table values (8, 1, 1, 10, 'data');
+
+insert into node_table values (9, 1, 1, 10, 'data');
+
+insert into node_table values (10, 1, 1, 10, 'data');
+
+CREATE TABLE `count_table` (
+ `id` bigint(20) unsigned NOT NULL DEFAULT '0',
+ `type` int(10) unsigned NOT NULL DEFAULT '0',
+ `link_type` bigint(20) unsigned NOT NULL DEFAULT '0',
+ `count` int(10) unsigned NOT NULL DEFAULT '0',
+ `time` int(10) unsigned NOT NULL DEFAULT '0',
+ `version` bigint(20) unsigned NOT NULL DEFAULT '0',
+ PRIMARY KEY (`id`,`link_type`) COMMENT 'cf_count_table'
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1 COLLATE=latin1_bin
+ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=8;
+
+
+insert into count_table values (2, 1, 1, 1, 10, 20);
+
+insert into count_table values (3, 1, 1, 1, 10, 20);
+
+insert into count_table values (4, 1, 1, 1, 10, 20);
+
+insert into count_table values (5, 1, 1, 1, 10, 20);
+
+insert into count_table values (6, 1, 1, 1, 10, 20);
+
+insert into count_table values (7, 1, 1, 1, 10, 20);
+
+insert into count_table values (8, 1, 1, 1, 10, 20);
+
+insert into count_table values (9, 1, 1, 1, 10, 20);
+
+insert into count_table values (10, 1, 1, 1, 10, 20);
+
+CREATE TABLE `link_table5` (
+ `id1` bigint(20) unsigned NOT NULL DEFAULT '0',
+ `id1_type` int(10) unsigned NOT NULL DEFAULT '0',
+ `id2` bigint(20) unsigned NOT NULL DEFAULT '0',
+ `id2_type` int(10) unsigned NOT NULL DEFAULT '0',
+ `link_type` bigint(20) unsigned NOT NULL DEFAULT '0',
+ `visibility` tinyint(3) NOT NULL DEFAULT '0',
+ `data` varchar(255) COLLATE latin1_bin NOT NULL DEFAULT '',
+ `time` int(10) unsigned NOT NULL DEFAULT '0',
+ `version` bigint(20) unsigned NOT NULL DEFAULT '0',
+ PRIMARY KEY (`link_type`,`id1`,`id2`) COMMENT 'cf_link'
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1 COLLATE=latin1_bin
+ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=8;
+
+insert into link_table5 values (1, 1, 2, 2, 1, 1, 'data12', 1, 1);
+insert into link_table5 values (1, 1, 3, 2, 1, 2, 'data13', 1, 1);
+insert into link_table5 values (1, 1, 4, 2, 1, 2, 'data14', 1, 1);
+insert into link_table5 values (1, 1, 5, 2, 1, 1, 'data15', 1, 1);
+insert into link_table5 values (2, 1, 1, 2, 1, 1, 'data21', 1, 1);
+insert into link_table5 values (2, 1, 2, 2, 1, 1, 'data22', 1, 1);
+insert into link_table5 values (2, 1, 3, 2, 1, 1, 'data32', 1, 1);
+
+
+CREATE TABLE `link_table3` (
+ `id1` bigint(20) unsigned NOT NULL DEFAULT '0',
+ `id1_type` int(10) unsigned NOT NULL DEFAULT '0',
+ `id2` bigint(20) unsigned NOT NULL DEFAULT '0',
+ `id2_type` int(10) unsigned NOT NULL DEFAULT '0',
+ `link_type` bigint(20) unsigned NOT NULL DEFAULT '0',
+ `visibility` tinyint(4) NOT NULL DEFAULT '0',
+ `data` text COLLATE latin1_bin NOT NULL,
+ `time` int(10) unsigned NOT NULL DEFAULT '0',
+ `version` bigint(20) unsigned NOT NULL DEFAULT '0',
+ PRIMARY KEY (`link_type`,`id1`,`id2`) COMMENT 'cf_link',
+ KEY `id1_type` (`id1`,`link_type`,`visibility`,`time`,`id2`,`version`)
+ COMMENT 'rev:cf_link_id1_type'
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1 COLLATE=latin1_bin
+ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=4;
+
+insert into link_table3 values (1, 1, 2, 2, 1, 1, 'data12', 1, 1);
+insert into link_table3 values (1, 1, 3, 2, 1, 2, 'data13', 1, 1);
+insert into link_table3 values (1, 1, 4, 2, 1, 2, 'data14', 1, 1);
+insert into link_table3 values (1, 1, 5, 2, 1, 1, 'data15', 1, 1);
+insert into link_table3 values (2, 1, 1, 2, 1, 1, 'data21', 1, 1);
+insert into link_table3 values (2, 1, 2, 2, 1, 1, 'data22', 1, 1);
+insert into link_table3 values (2, 1, 3, 2, 1, 1, 'data32', 1, 1);
+
+CREATE TABLE `link_table6` (
+ `id1` bigint(20) unsigned NOT NULL DEFAULT '0',
+ `id1_type` int(10) unsigned NOT NULL DEFAULT '0',
+ `id2` bigint(20) unsigned NOT NULL DEFAULT '0',
+ `id2_type` int(10) unsigned NOT NULL DEFAULT '0',
+ `link_type` bigint(20) unsigned NOT NULL DEFAULT '0',
+ `visibility` tinyint(4) NOT NULL DEFAULT '0',
+ `data` text COLLATE latin1_bin NOT NULL,
+ `time` int(10) unsigned NOT NULL DEFAULT '0',
+ `version` bigint(20) unsigned NOT NULL DEFAULT '0',
+ PRIMARY KEY (`link_type`,`id1`,`id2`) COMMENT 'cf_link',
+ KEY `id1_type` (`id1`,`link_type`,`visibility`,`time`,`id2`,`version`,
+ `data`(255)) COMMENT 'rev:cf_link_id1_type'
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1 COLLATE=latin1_bin
+ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=4;
+
+insert into link_table6 values (1, 1, 2, 2, 1, 1,
+ 'data12_12345678901234567890123456789012345678901234567890123456789'
+ '0123456789012345678901234567890123456789012345678901234567890123456789'
+ '0123456789012345678901234567890123456789012345678901234567890123456789'
+ '0123456789012345678901234567890123456789012345678901234567890123456789'
+ '0123456789012345678901234567890', 1, 1);
+insert into link_table6 values (1, 1, 3, 2, 1, 2,
+ 'data13_12345678901234567890123456789012345678901234567890123456789'
+ '0123456789012345678901234567890123456789012345678901234567890123456789'
+ '0123456789012345678901234567890123456789012345678901234567890123456789'
+ '0123456789012345678901234567890123456789012345678901234567890123456789'
+ '0123456789012345678901234567890', 1, 1);
+insert into link_table6 values (1, 1, 4, 2, 1, 2,
+ 'data14_12345678901234567890123456789012345678901234567890123456789'
+ '0123456789012345678901234567890123456789012345678901234567890123456789'
+ '0123456789012345678901234567890123456789012345678901234567890123456789'
+ '0123456789012345678901234567890123456789012345678901234567890123456789'
+ '0123456789012345678901234567890', 1, 1);
+insert into link_table6 values (1, 1, 5, 2, 1, 1,
+ 'data15_12345678901234567890123456789012345678901234567890123456789'
+ '0123456789012345678901234567890123456789012345678901234567890123456789'
+ '0123456789012345678901234567890123456789012345678901234567890123456789'
+ '0123456789012345678901234567890123456789012345678901234567890123456789'
+ '0123456789012345678901234567890', 1, 1);
+insert into link_table6 values (2, 1, 1, 2, 1, 1,
+ 'data21_12345678901234567890123456789012345678901234567890123456789'
+ '0123456789012345678901234567890123456789012345678901234567890123456789'
+ '0123456789012345678901234567890123456789012345678901234567890123456789'
+ '0123456789012345678901234567890123456789012345678901234567890123456789'
+ '0123456789012345678901234567890', 1, 1);
+insert into link_table6 values (2, 1, 2, 2, 1, 1,
+ 'data22_12345678901234567890123456789012345678901234567890123456789'
+ '0123456789012345678901234567890123456789012345678901234567890123456789'
+ '0123456789012345678901234567890123456789012345678901234567890123456789'
+ '0123456789012345678901234567890123456789012345678901234567890123456789'
+ '0123456789012345678901234567890', 1, 1);
+insert into link_table6 values (2, 1, 3, 2, 1, 1,
+ 'data32_12345678901234567890123456789012345678901234567890123456789'
+ '0123456789012345678901234567890123456789012345678901234567890123456789'
+ '0123456789012345678901234567890123456789012345678901234567890123456789'
+ '0123456789012345678901234567890123456789012345678901234567890123456789'
+ '0123456789012345678901234567890', 1, 1);
+
+CREATE TABLE `link_table4` (
+ `id1` binary(16) NOT NULL DEFAULT '\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0',
+ `raw_key` text COLLATE latin1_bin,
+ `id2` bigint(20) unsigned NOT NULL DEFAULT '0',
+ `id2_type` int(10) unsigned NOT NULL DEFAULT '0',
+ `link_type` bigint(20) unsigned NOT NULL DEFAULT '0',
+ `visibility` tinyint(3) NOT NULL DEFAULT '0',
+ `data` varchar(255) COLLATE latin1_bin NOT NULL DEFAULT '',
+ `time` int(10) unsigned NOT NULL DEFAULT '0',
+ `version` bigint(20) unsigned NOT NULL DEFAULT '0',
+ PRIMARY KEY (`link_type`,`id1`,`id2`) COMMENT 'cf_link',
+ KEY `id1_type` (`id1`,`link_type`,`visibility`,`time`,`id2`,`version`,`data`)
+ COMMENT 'rev:cf_link_id1_type'
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1 COLLATE=latin1_bin
+ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=8;
+
+insert into link_table4 values ('a1', "rk1", 2, 2, 1, 1, 'data12', 1, 1);
+insert into link_table4 values ('a1', "rk2", 3, 2, 1, 2, 'data13', 1, 1);
+insert into link_table4 values ('a1', "rk3", 4, 2, 1, 2, 'data14', 1, 1);
+insert into link_table4 values ('a1', "rk4", 5, 2, 1, 1, 'data15', 1, 1);
+insert into link_table4 values ('b1', "rk5", 1, 2, 1, 1, 'data21', 1, 1);
+insert into link_table4 values ('b1', "rk6", 2, 2, 1, 1, 'data22', 1, 1);
+insert into link_table4 values ('b1', "rk7", 3, 2, 1, 1, 'data32', 1, 1);
diff --git a/storage/rocksdb/mysql-test/rocksdb/include/ddl_high_priority.inc b/storage/rocksdb/mysql-test/rocksdb/include/ddl_high_priority.inc
new file mode 100644
index 00000000000..7adca5d7cf2
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/include/ddl_high_priority.inc
@@ -0,0 +1,174 @@
+###############################################################################
+# Common test file for high priority DDL
+###############################################################################
+
+
+create user test_user1@localhost;
+grant all on test to test_user1@localhost;
+create user test_user2@localhost;
+grant all on test to test_user2@localhost;
+
+# Default values
+--let $con_block = con1
+--let $con_kill = default
+--let $should_kill = 1
+--let $recreate_table = 1
+--let $throw_error = 1
+
+##
+## killing conflicting shared locks by alter table
+##
+
+--let $blocking_sql = lock tables t1 read;
+--let $cmd = alter table t1 modify i bigint;
+--let $high_priority_cmd = alter high_priority table t1 modify i bigint;
+
+--source include/ddl_high_priority_module.inc
+
+##
+## killing conflicting shared lock in a transaction
+## transaction will rollback
+##
+
+--let $blocking_sql = begin; insert into t1 values (4); select i from t1;
+--let $cmd = alter table t1 rename t1_new;
+--let $high_priority_cmd = alter high_priority table t1 rename t1_new;
+
+--source include/ddl_high_priority_module.inc
+
+select * from t1_new;
+drop table t1_new;
+
+##
+## simulate conflicting DDL which will not be killed
+##
+
+# Simulate conflicting DDL
+# This will hold MDL_SHARED_NO_READ_WRITE, which may be upgraded to exclusive
+# locks to run DDLs like ALTER TABLE
+# the upgradable/exclusive lock should not be killed
+
+--let $should_kill = 0
+
+--let $blocking_sql = lock tables t1 write;
+--let $cmd = drop table t1;
+--let $high_priority_cmd = drop high_priority table t1;
+
+--source include/ddl_high_priority_module.inc
+
+# restore $should_kill
+--let $should_kill = 1
+
+##
+## killing conflicting transaction by drop table DDL
+##
+
+--let $blocking_sql = lock tables t1 read; begin; insert into t1 values (4);
+--let $cmd = drop table t1;
+--let $high_priority_cmd = drop high_priority table t1;
+
+--source include/ddl_high_priority_module.inc
+
+##
+## no effect for regular users
+##
+
+connect (con2,localhost,test_user2,,test,,);
+# $con_kill is regular user
+--let $con_kill = con2
+--let $should_kill = 0
+
+--let $blocking_sql = lock tables t1 read;
+--let $cmd = alter table t1 modify i bigint;
+--let $high_priority_cmd = alter high_priority table t1 modify i bigint;
+
+--source include/ddl_high_priority_module.inc
+
+disconnect con2;
+
+# restore $con_kill
+--let $con_kill = default
+# restore $should_kill
+--let $should_kill = 1
+
+##
+## create/drop index
+##
+
+# create index
+
+--let $blocking_sql = lock tables t1 read;
+--let $cmd = create index idx1 on t1 (i);
+--let $high_priority_cmd = create high_priority index idx1 on t1 (i);
+
+--source include/ddl_high_priority_module.inc
+
+# drop index (use the previously created table)
+--let $recreate_table = 0
+
+--let $cmd = drop index idx1 on t1;
+--let $high_priority_cmd = drop high_priority index idx1 on t1;
+
+--source include/ddl_high_priority_module.inc
+
+# restore $recreate_table
+--let $recreate_table = 1
+
+##
+## high_priority truncate table
+##
+
+--let $blocking_sql = lock tables t1 read;
+--let $cmd = truncate t1;
+--let $high_priority_cmd = truncate high_priority t1;
+
+--source include/ddl_high_priority_module.inc
+
+##
+## high_priority create/drop trigger
+##
+
+--let $blocking_sql = lock tables t1 read;
+--let $cmd = create trigger ins_sum before insert on t1 for each row set @sum = @sum + new.i;
+--let $high_priority_cmd = create high_priority trigger ins_sum before insert on t1 for each row set @sum = @sum + new.i;
+
+--source include/ddl_high_priority_module.inc
+
+# drop trigger (use the previously created table)
+--let $recreate_table = 0
+
+--let $cmd = drop trigger ins_sum;
+--let $high_priority_cmd = drop high_priority trigger ins_sum;
+
+--source include/ddl_high_priority_module.inc
+
+# restore $recreate_table
+--let $recreate_table = 1
+
+##
+## high_priority optimize table
+##
+## "optimize table" doesn't throw errors. It catches all errors, and
+## returns a result set in a table
+##
+
+--let $throw_error = 0
+
+--let $blocking_sql = lock tables t1 read;
+--let $cmd = optimize table t1;
+--let $high_priority_cmd = optimize high_priority table t1;
+
+--source include/ddl_high_priority_module.inc
+
+# restore throw_error
+--let $throw_error = 1
+
+##
+## clean up
+##
+
+drop user test_user1@localhost;
+drop user test_user2@localhost;
+--disable_warnings
+drop table if exists t1;
+--enable_warnings
diff --git a/storage/rocksdb/mysql-test/rocksdb/include/ddl_high_priority_module.inc b/storage/rocksdb/mysql-test/rocksdb/include/ddl_high_priority_module.inc
new file mode 100644
index 00000000000..ffbdc306455
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/include/ddl_high_priority_module.inc
@@ -0,0 +1,141 @@
+###############################################################################
+# This file plays as a function/module for ddl_high_priority test
+#
+# Usage: set the following variables before including
+#
+# $use_sys_var: whether using sys_var or syntax to trigger high_priority
+# value: 0/1
+#
+# $con_block: a blocking connection
+# value: con1/con2/default
+#
+# $con_kill: a connection that will attempt to kill $con_blocking
+# value: con1/con2/default
+#
+# $cmd: a regular command to evaluate (to use with sys var)
+# value: sql command
+#
+# $high_priority_cmd: a high_priority command to evaluate
+# value: sql command
+#
+# $should_kill: Expect the con_block to be killed or not
+# value: 0/1
+#
+# $recreate_table: Should recreate the test table or not
+# value: 0/1
+#
+# $throw_error: whether a command will throw lock_wait_timeout error.
+# Note, optimize table catches all errors.
+# value: 0/1
+###############################################################################
+
+##
+## Print out the parameters of the test set
+## (useful for debugging)
+##
+--echo
+--echo ## Test parameters:
+--echo ## use_sys_var = $use_sys_var
+--echo ## con_block = $con_block
+--echo ## con_kill = $con_kill
+--echo ## cmd = $cmd
+--echo ## high_priority_cmd = $high_priority_cmd
+--echo ## should_kill = $should_kill
+--echo ## recreate_table = $recreate_table
+--echo ## throw_error = $throw_error
+--echo
+
+
+##
+## Setup
+##
+
+connection default;
+
+# create con1
+connect (con1,localhost,test_user1,,test,,);
+
+if ($recreate_table) {
+ # create t1
+ --disable_warnings
+ drop table if exists t1;
+ --enable_warnings
+ create table t1 (i int);
+ show create table t1;
+ insert into t1 values (1), (2), (3);
+}
+
+##
+## Testing
+##
+
+--echo connection: $con_block
+--connection $con_block
+--eval $blocking_sql
+
+--echo connection: $con_kill
+--connection $con_kill
+set lock_wait_timeout = 0.02;
+set high_priority_lock_wait_timeout = 0.02;
+
+describe t1;
+
+--echo connection: default (for show processlist)
+connection default;
+--echo # both $con_block and $con_kill exist
+--replace_column 1 <Id> 3 <Host> 5 <Command> 6 <Time> 7 <State> 8 <Info> 9 <RExam> 10 <RSent> 11 <TID>
+show processlist;
+
+--echo connection: $con_kill
+--connection $con_kill
+
+# command will fail without high_priority
+if ($throw_error) {
+ --error ER_LOCK_WAIT_TIMEOUT
+ --eval $cmd
+}
+
+if (!$throw_error) {
+ --eval $cmd
+}
+
+if ($use_sys_var) {
+ set high_priority_ddl = 1;
+ select @@high_priority_ddl;
+
+ # non-supported command will timeout
+ --error ER_LOCK_WAIT_TIMEOUT
+ lock tables t1 write;
+
+ if (!$should_kill) {
+ # regular user ddl will fail regardless of high_priority_ddl being on
+ --error ER_LOCK_WAIT_TIMEOUT
+ --eval $cmd
+ }
+
+ if ($should_kill) {
+ --eval $cmd
+ }
+
+ # reset high_priority_ddl
+ set high_priority_ddl = 0;
+}
+
+if (!$use_sys_var) {
+ if (!$should_kill) {
+ # regular user ddl will fail regardless of high_priority being on
+ --error ER_LOCK_WAIT_TIMEOUT
+ --eval $high_priority_cmd
+ }
+
+ if ($should_kill) {
+ --eval $high_priority_cmd
+ }
+}
+
+--echo connection: default (for show processlist)
+connection default;
+--replace_column 1 <Id> 3 <Host> 5 <Command> 6 <Time> 7 <State> 8 <Info> 9 <RExam> 10 <RSent> 11 <TID>
+show processlist;
+
+disconnect con1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/include/deadlock_stats.inc b/storage/rocksdb/mysql-test/rocksdb/include/deadlock_stats.inc
new file mode 100644
index 00000000000..48ef6f816bd
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/include/deadlock_stats.inc
@@ -0,0 +1,52 @@
+let $prior_set_lwt = `select concat('set @prior_lock_wait_timeout = @@',
+ '$engine', '_lock_wait_timeout;')`;
+let $prior_set_dld = `select concat('set @prior_deadlock_detect = @@',
+ '$engine', '_deadlock_detect;')`;
+let $global_dld = `select concat('set global ', '$engine',
+ '_deadlock_detect = on;')`;
+let $global_lwt = `select concat('set global ', '$engine',
+ '_lock_wait_timeout = 100000;')`;
+eval $prior_set_lwt $prior_set_dld $global_dld $global_lwt;
+
+--source include/count_sessions.inc
+connect (con1,localhost,root,,);
+let $con1= `SELECT CONNECTION_ID()`;
+
+connect (con2,localhost,root,,);
+let $con2= `SELECT CONNECTION_ID()`;
+
+connection default;
+eval create table t (i int primary key) engine=$engine;
+insert into t values (1), (2), (3);
+
+--echo #
+--echo # The following is disabled due:
+--echo # MDEV-13404: MyRocks upstream uses I_S.table_statistics.row_lock_deadlocks, should we import?
+--echo #
+--disable_parsing
+--source include/simple_deadlock.inc
+connection default;
+select row_lock_deadlocks from information_schema.table_statistics where
+table_name = "t";
+
+select row_lock_deadlocks from information_schema.table_statistics where
+table_name = "t";
+--source include/simple_deadlock.inc
+connection default;
+select row_lock_deadlocks from information_schema.table_statistics where
+table_name = "t";
+
+select row_lock_deadlocks from information_schema.table_statistics where
+table_name = "t";
+--enable_parsing
+
+disconnect con1;
+disconnect con2;
+
+let $restore_lwt = `select concat('set global ', '$engine',
+ '_lock_wait_timeout = @prior_lock_wait_timeout;')`;
+let $restore_dld = `select concat('set global ', '$engine',
+ '_deadlock_detect = @prior_deadlock_detect;')`;
+eval $restore_lwt $restore_dld;
+drop table t;
+--source include/wait_until_count_sessions.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb/include/dup_key_update.inc b/storage/rocksdb/mysql-test/rocksdb/include/dup_key_update.inc
new file mode 100644
index 00000000000..82ceda1914d
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/include/dup_key_update.inc
@@ -0,0 +1,69 @@
+let $max_table = 2;
+
+--disable_query_log
+let $table = 1;
+while ($table <= $max_table) {
+ let $max = 9;
+ let $i = 2;
+ while ($i <= $max) {
+ let $insert = INSERT INTO t$table VALUES ($i, $i, $i);
+ eval $insert;
+ inc $i;
+ }
+ inc $table;
+}
+--enable_query_log
+
+let $table = 1;
+while ($table <= $max_table) {
+ let $i = 1;
+ let $j = 9;
+ while ($i <= $max) {
+
+ let $insert = INSERT INTO t$table VALUES ($i, $i, $i) ON DUPLICATE KEY UPDATE id2 = $j;
+ eval $insert;
+
+ let $select = SELECT * FROM t$table WHERE id1 = $i;
+ eval $select;
+
+ let $select = SELECT * FROM t$table FORCE INDEX (id3) WHERE id3 = $i;
+ eval $select;
+
+ inc $j;
+
+ let $insert = INSERT INTO t$table VALUES ($i, $i, $i) ON DUPLICATE KEY UPDATE id2 = $j;
+ eval $insert;
+
+ let $select = SELECT * FROM t$table WHERE id1 = $i;
+ eval $select;
+
+ let $select = SELECT * FROM t$table FORCE INDEX (id3) WHERE id3 = $i;
+ eval $select;
+
+ inc $j;
+
+ let $insert = INSERT INTO t$table VALUES ($i, $i, $i) ON DUPLICATE KEY UPDATE id2 = $j;
+ eval $insert;
+
+ let $select = SELECT * FROM t$table WHERE id1 = $i;
+ eval $select;
+
+ let $select = SELECT * FROM t$table FORCE INDEX (id3) WHERE id3 = $i;
+ eval $select;
+
+ inc $j;
+
+ inc $i;
+ inc $i;
+ inc $i;
+ inc $i;
+ }
+
+ let $select = SELECT * FROM t$table;
+ eval $select;
+
+ let $select = SELECT * FROM t$table FORCE INDEX (id3);
+ eval $select;
+
+ inc $table;
+}
diff --git a/storage/rocksdb/mysql-test/rocksdb/include/group_min_max.inc b/storage/rocksdb/mysql-test/rocksdb/include/group_min_max.inc
new file mode 100644
index 00000000000..40fabce0517
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/include/group_min_max.inc
@@ -0,0 +1,1438 @@
+#
+# Test file for WL#1724 (Min/Max Optimization for Queries with Group By Clause).
+# The queries in this file test query execution via QUICK_GROUP_MIN_MAX_SELECT.
+#
+
+#
+# TODO:
+# Add queries with:
+# - C != const
+# - C IS NOT NULL
+# - HAVING clause
+
+--source include/default_optimizer_switch.inc
+
+--disable_warnings
+drop table if exists t1;
+--enable_warnings
+
+eval create table t1 (
+ a1 char(64), a2 char(64), b char(16), c char(16) not null, d char(16), dummy char(248) default ' '
+) engine=$engine;
+
+insert into t1 (a1, a2, b, c, d) values
+('a','a','a','a111','xy1'),('a','a','a','b111','xy2'),('a','a','a','c111','xy3'),('a','a','a','d111','xy4'),
+('a','a','b','e112','xy1'),('a','a','b','f112','xy2'),('a','a','b','g112','xy3'),('a','a','b','h112','xy4'),
+('a','b','a','i121','xy1'),('a','b','a','j121','xy2'),('a','b','a','k121','xy3'),('a','b','a','l121','xy4'),
+('a','b','b','m122','xy1'),('a','b','b','n122','xy2'),('a','b','b','o122','xy3'),('a','b','b','p122','xy4'),
+('b','a','a','a211','xy1'),('b','a','a','b211','xy2'),('b','a','a','c211','xy3'),('b','a','a','d211','xy4'),
+('b','a','b','e212','xy1'),('b','a','b','f212','xy2'),('b','a','b','g212','xy3'),('b','a','b','h212','xy4'),
+('b','b','a','i221','xy1'),('b','b','a','j221','xy2'),('b','b','a','k221','xy3'),('b','b','a','l221','xy4'),
+('b','b','b','m222','xy1'),('b','b','b','n222','xy2'),('b','b','b','o222','xy3'),('b','b','b','p222','xy4'),
+('c','a','a','a311','xy1'),('c','a','a','b311','xy2'),('c','a','a','c311','xy3'),('c','a','a','d311','xy4'),
+('c','a','b','e312','xy1'),('c','a','b','f312','xy2'),('c','a','b','g312','xy3'),('c','a','b','h312','xy4'),
+('c','b','a','i321','xy1'),('c','b','a','j321','xy2'),('c','b','a','k321','xy3'),('c','b','a','l321','xy4'),
+('c','b','b','m322','xy1'),('c','b','b','n322','xy2'),('c','b','b','o322','xy3'),('c','b','b','p322','xy4'),
+('d','a','a','a411','xy1'),('d','a','a','b411','xy2'),('d','a','a','c411','xy3'),('d','a','a','d411','xy4'),
+('d','a','b','e412','xy1'),('d','a','b','f412','xy2'),('d','a','b','g412','xy3'),('d','a','b','h412','xy4'),
+('d','b','a','i421','xy1'),('d','b','a','j421','xy2'),('d','b','a','k421','xy3'),('d','b','a','l421','xy4'),
+('d','b','b','m422','xy1'),('d','b','b','n422','xy2'),('d','b','b','o422','xy3'),('d','b','b','p422','xy4'),
+('a','a','a','a111','xy1'),('a','a','a','b111','xy2'),('a','a','a','c111','xy3'),('a','a','a','d111','xy4'),
+('a','a','b','e112','xy1'),('a','a','b','f112','xy2'),('a','a','b','g112','xy3'),('a','a','b','h112','xy4'),
+('a','b','a','i121','xy1'),('a','b','a','j121','xy2'),('a','b','a','k121','xy3'),('a','b','a','l121','xy4'),
+('a','b','b','m122','xy1'),('a','b','b','n122','xy2'),('a','b','b','o122','xy3'),('a','b','b','p122','xy4'),
+('b','a','a','a211','xy1'),('b','a','a','b211','xy2'),('b','a','a','c211','xy3'),('b','a','a','d211','xy4'),
+('b','a','b','e212','xy1'),('b','a','b','f212','xy2'),('b','a','b','g212','xy3'),('b','a','b','h212','xy4'),
+('b','b','a','i221','xy1'),('b','b','a','j221','xy2'),('b','b','a','k221','xy3'),('b','b','a','l221','xy4'),
+('b','b','b','m222','xy1'),('b','b','b','n222','xy2'),('b','b','b','o222','xy3'),('b','b','b','p222','xy4'),
+('c','a','a','a311','xy1'),('c','a','a','b311','xy2'),('c','a','a','c311','xy3'),('c','a','a','d311','xy4'),
+('c','a','b','e312','xy1'),('c','a','b','f312','xy2'),('c','a','b','g312','xy3'),('c','a','b','h312','xy4'),
+('c','b','a','i321','xy1'),('c','b','a','j321','xy2'),('c','b','a','k321','xy3'),('c','b','a','l321','xy4'),
+('c','b','b','m322','xy1'),('c','b','b','n322','xy2'),('c','b','b','o322','xy3'),('c','b','b','p322','xy4'),
+('d','a','a','a411','xy1'),('d','a','a','b411','xy2'),('d','a','a','c411','xy3'),('d','a','a','d411','xy4'),
+('d','a','b','e412','xy1'),('d','a','b','f412','xy2'),('d','a','b','g412','xy3'),('d','a','b','h412','xy4'),
+('d','b','a','i421','xy1'),('d','b','a','j421','xy2'),('d','b','a','k421','xy3'),('d','b','a','l421','xy4'),
+('d','b','b','m422','xy1'),('d','b','b','n422','xy2'),('d','b','b','o422','xy3'),('d','b','b','p422','xy4');
+
+create index idx_t1_0 on t1 (a1);
+create index idx_t1_1 on t1 (a1,a2,b,c);
+create index idx_t1_2 on t1 (a1,a2,b);
+analyze table t1;
+
+# t2 is the same as t1, but with some NULLs in the MIN/MAX column, and
+# one more nullable attribute
+
+--disable_warnings
+drop table if exists t2;
+--enable_warnings
+
+eval create table t2 (
+ a1 char(64), a2 char(64) not null, b char(16), c char(16), d char(16), dummy char(248) default ' '
+) engine=$engine;
+insert into t2 select * from t1;
+# add few rows with NULL's in the MIN/MAX column
+insert into t2 (a1, a2, b, c, d) values
+('a','a',NULL,'a777','xyz'),('a','a',NULL,'a888','xyz'),('a','a',NULL,'a999','xyz'),
+('a','a','a',NULL,'xyz'),
+('a','a','b',NULL,'xyz'),
+('a','b','a',NULL,'xyz'),
+('c','a',NULL,'c777','xyz'),('c','a',NULL,'c888','xyz'),('c','a',NULL,'c999','xyz'),
+('d','b','b',NULL,'xyz'),
+('e','a','a',NULL,'xyz'),('e','a','a',NULL,'xyz'),('e','a','a',NULL,'xyz'),('e','a','a',NULL,'xyz'),
+('e','a','b',NULL,'xyz'),('e','a','b',NULL,'xyz'),('e','a','b',NULL,'xyz'),('e','a','b',NULL,'xyz'),
+('a','a',NULL,'a777','xyz'),('a','a',NULL,'a888','xyz'),('a','a',NULL,'a999','xyz'),
+('a','a','a',NULL,'xyz'),
+('a','a','b',NULL,'xyz'),
+('a','b','a',NULL,'xyz'),
+('c','a',NULL,'c777','xyz'),('c','a',NULL,'c888','xyz'),('c','a',NULL,'c999','xyz'),
+('d','b','b',NULL,'xyz'),
+('e','a','a',NULL,'xyz'),('e','a','a',NULL,'xyz'),('e','a','a',NULL,'xyz'),('e','a','a',NULL,'xyz'),
+('e','a','b',NULL,'xyz'),('e','a','b',NULL,'xyz'),('e','a','b',NULL,'xyz'),('e','a','b',NULL,'xyz');
+
+create index idx_t2_0 on t2 (a1);
+create index idx_t2_1 on t2 (a1,a2,b,c);
+create index idx_t2_2 on t2 (a1,a2,b);
+analyze table t2;
+
+# Table t3 is the same as t1, but with smaller column lenghts.
+# This allows to test different branches of the cost computation procedure
+# when the number of keys per block are less than the number of keys in the
+# sub-groups formed by predicates over non-group attributes.
+
+--disable_warnings
+drop table if exists t3;
+--enable_warnings
+
+eval create table t3 (
+ a1 char(1), a2 char(1), b char(1), c char(4) not null, d char(3), dummy char(1) default ' '
+) engine=$engine;
+
+insert into t3 (a1, a2, b, c, d) values
+('a','a','a','a111','xy1'),('a','a','a','b111','xy2'),('a','a','a','c111','xy3'),('a','a','a','d111','xy4'),
+('a','a','b','e112','xy1'),('a','a','b','f112','xy2'),('a','a','b','g112','xy3'),('a','a','b','h112','xy4'),
+('a','b','a','i121','xy1'),('a','b','a','j121','xy2'),('a','b','a','k121','xy3'),('a','b','a','l121','xy4'),
+('a','b','b','m122','xy1'),('a','b','b','n122','xy2'),('a','b','b','o122','xy3'),('a','b','b','p122','xy4'),
+('b','a','a','a211','xy1'),('b','a','a','b211','xy2'),('b','a','a','c211','xy3'),('b','a','a','d211','xy4'),
+('b','a','b','e212','xy1'),('b','a','b','f212','xy2'),('b','a','b','g212','xy3'),('b','a','b','h212','xy4'),
+('b','b','a','i221','xy1'),('b','b','a','j221','xy2'),('b','b','a','k221','xy3'),('b','b','a','l221','xy4'),
+('b','b','b','m222','xy1'),('b','b','b','n222','xy2'),('b','b','b','o222','xy3'),('b','b','b','p222','xy4'),
+('c','a','a','a311','xy1'),('c','a','a','b311','xy2'),('c','a','a','c311','xy3'),('c','a','a','d311','xy4'),
+('c','a','b','e312','xy1'),('c','a','b','f312','xy2'),('c','a','b','g312','xy3'),('c','a','b','h312','xy4'),
+('c','b','a','i321','xy1'),('c','b','a','j321','xy2'),('c','b','a','k321','xy3'),('c','b','a','l321','xy4'),
+('c','b','b','m322','xy1'),('c','b','b','n322','xy2'),('c','b','b','o322','xy3'),('c','b','b','p322','xy4');
+insert into t3 (a1, a2, b, c, d) values
+('a','a','a','a111','xy1'),('a','a','a','b111','xy2'),('a','a','a','c111','xy3'),('a','a','a','d111','xy4'),
+('a','a','b','e112','xy1'),('a','a','b','f112','xy2'),('a','a','b','g112','xy3'),('a','a','b','h112','xy4'),
+('a','b','a','i121','xy1'),('a','b','a','j121','xy2'),('a','b','a','k121','xy3'),('a','b','a','l121','xy4'),
+('a','b','b','m122','xy1'),('a','b','b','n122','xy2'),('a','b','b','o122','xy3'),('a','b','b','p122','xy4'),
+('b','a','a','a211','xy1'),('b','a','a','b211','xy2'),('b','a','a','c211','xy3'),('b','a','a','d211','xy4'),
+('b','a','b','e212','xy1'),('b','a','b','f212','xy2'),('b','a','b','g212','xy3'),('b','a','b','h212','xy4'),
+('b','b','a','i221','xy1'),('b','b','a','j221','xy2'),('b','b','a','k221','xy3'),('b','b','a','l221','xy4'),
+('b','b','b','m222','xy1'),('b','b','b','n222','xy2'),('b','b','b','o222','xy3'),('b','b','b','p222','xy4'),
+('c','a','a','a311','xy1'),('c','a','a','b311','xy2'),('c','a','a','c311','xy3'),('c','a','a','d311','xy4'),
+('c','a','b','e312','xy1'),('c','a','b','f312','xy2'),('c','a','b','g312','xy3'),('c','a','b','h312','xy4'),
+('c','b','a','i321','xy1'),('c','b','a','j321','xy2'),('c','b','a','k321','xy3'),('c','b','a','l321','xy4'),
+('c','b','b','m322','xy1'),('c','b','b','n322','xy2'),('c','b','b','o322','xy3'),('c','b','b','p322','xy4');
+insert into t3 (a1, a2, b, c, d) values
+('a','a','a','a111','xy1'),('a','a','a','b111','xy2'),('a','a','a','c111','xy3'),('a','a','a','d111','xy4'),
+('a','a','b','e112','xy1'),('a','a','b','f112','xy2'),('a','a','b','g112','xy3'),('a','a','b','h112','xy4'),
+('a','b','a','i121','xy1'),('a','b','a','j121','xy2'),('a','b','a','k121','xy3'),('a','b','a','l121','xy4'),
+('a','b','b','m122','xy1'),('a','b','b','n122','xy2'),('a','b','b','o122','xy3'),('a','b','b','p122','xy4'),
+('b','a','a','a211','xy1'),('b','a','a','b211','xy2'),('b','a','a','c211','xy3'),('b','a','a','d211','xy4'),
+('b','a','b','e212','xy1'),('b','a','b','f212','xy2'),('b','a','b','g212','xy3'),('b','a','b','h212','xy4'),
+('b','b','a','i221','xy1'),('b','b','a','j221','xy2'),('b','b','a','k221','xy3'),('b','b','a','l221','xy4'),
+('b','b','b','m222','xy1'),('b','b','b','n222','xy2'),('b','b','b','o222','xy3'),('b','b','b','p222','xy4'),
+('c','a','a','a311','xy1'),('c','a','a','b311','xy2'),('c','a','a','c311','xy3'),('c','a','a','d311','xy4'),
+('c','a','b','e312','xy1'),('c','a','b','f312','xy2'),('c','a','b','g312','xy3'),('c','a','b','h312','xy4'),
+('c','b','a','i321','xy1'),('c','b','a','j321','xy2'),('c','b','a','k321','xy3'),('c','b','a','l321','xy4'),
+('c','b','b','m322','xy1'),('c','b','b','n322','xy2'),('c','b','b','o322','xy3'),('c','b','b','p322','xy4');
+insert into t3 (a1, a2, b, c, d) values
+('a','a','a','a111','xy1'),('a','a','a','b111','xy2'),('a','a','a','c111','xy3'),('a','a','a','d111','xy4'),
+('a','a','b','e112','xy1'),('a','a','b','f112','xy2'),('a','a','b','g112','xy3'),('a','a','b','h112','xy4'),
+('a','b','a','i121','xy1'),('a','b','a','j121','xy2'),('a','b','a','k121','xy3'),('a','b','a','l121','xy4'),
+('a','b','b','m122','xy1'),('a','b','b','n122','xy2'),('a','b','b','o122','xy3'),('a','b','b','p122','xy4'),
+('b','a','a','a211','xy1'),('b','a','a','b211','xy2'),('b','a','a','c211','xy3'),('b','a','a','d211','xy4'),
+('b','a','b','e212','xy1'),('b','a','b','f212','xy2'),('b','a','b','g212','xy3'),('b','a','b','h212','xy4'),
+('b','b','a','i221','xy1'),('b','b','a','j221','xy2'),('b','b','a','k221','xy3'),('b','b','a','l221','xy4'),
+('b','b','b','m222','xy1'),('b','b','b','n222','xy2'),('b','b','b','o222','xy3'),('b','b','b','p222','xy4'),
+('c','a','a','a311','xy1'),('c','a','a','b311','xy2'),('c','a','a','c311','xy3'),('c','a','a','d311','xy4'),
+('c','a','b','e312','xy1'),('c','a','b','f312','xy2'),('c','a','b','g312','xy3'),('c','a','b','h312','xy4'),
+('c','b','a','i321','xy1'),('c','b','a','j321','xy2'),('c','b','a','k321','xy3'),('c','b','a','l321','xy4'),
+('c','b','b','m322','xy1'),('c','b','b','n322','xy2'),('c','b','b','o322','xy3'),('c','b','b','p322','xy4');
+
+create index idx_t3_0 on t3 (a1);
+create index idx_t3_1 on t3 (a1,a2,b,c);
+create index idx_t3_2 on t3 (a1,a2,b);
+analyze table t3;
+
+
+#
+# Queries without a WHERE clause. These queries do not use ranges.
+#
+
+# plans
+explain select a1, min(a2) from t1 group by a1;
+explain select a1, max(a2) from t1 group by a1;
+explain select a1, min(a2), max(a2) from t1 group by a1;
+explain select a1, a2, b, min(c), max(c) from t1 group by a1,a2,b;
+explain select a1,a2,b,max(c),min(c) from t1 group by a1,a2,b;
+--replace_column 7 # 9 #
+explain select a1,a2,b,max(c),min(c) from t2 group by a1,a2,b;
+# Select fields in different order
+explain select min(a2), a1, max(a2), min(a2), a1 from t1 group by a1;
+explain select a1, b, min(c), a1, max(c), b, a2, max(c), max(c) from t1 group by a1, a2, b;
+explain select min(a2) from t1 group by a1;
+explain select a2, min(c), max(c) from t1 group by a1,a2,b;
+
+# queries
+select a1, min(a2) from t1 group by a1;
+select a1, max(a2) from t1 group by a1;
+select a1, min(a2), max(a2) from t1 group by a1;
+select a1, a2, b, min(c), max(c) from t1 group by a1,a2,b;
+select a1,a2,b,max(c),min(c) from t1 group by a1,a2,b;
+select a1,a2,b,max(c),min(c) from t2 group by a1,a2,b;
+# Select fields in different order
+select min(a2), a1, max(a2), min(a2), a1 from t1 group by a1;
+select a1, b, min(c), a1, max(c), b, a2, max(c), max(c) from t1 group by a1, a2, b;
+select min(a2) from t1 group by a1;
+select a2, min(c), max(c) from t1 group by a1,a2,b;
+
+#
+# Queries with a where clause
+#
+
+# A) Preds only over the group 'A' attributes
+# plans
+explain select a1,a2,b,min(c),max(c) from t1 where a1 < 'd' group by a1,a2,b;
+explain select a1,a2,b,min(c),max(c) from t1 where a1 >= 'b' group by a1,a2,b;
+explain select a1,a2,b, max(c) from t1 where a1 >= 'c' or a1 < 'b' group by a1,a2,b;
+explain select a1, max(c) from t1 where a1 >= 'c' or a1 < 'b' group by a1,a2,b;
+explain select a1,a2,b,min(c),max(c) from t1 where a1 >= 'c' or a2 < 'b' group by a1,a2,b;
+explain select a1,a2,b, max(c) from t1 where a1 = 'z' or a1 = 'b' or a1 = 'd' group by a1,a2,b;
+explain select a1,a2,b,min(c),max(c) from t1 where a1 = 'z' or a1 = 'b' or a1 = 'd' group by a1,a2,b;
+explain select a1,a2,b, max(c) from t1 where (a1 = 'b' or a1 = 'd' or a1 = 'a' or a1 = 'c') and (a2 > 'a') group by a1,a2,b;
+explain select a1,a2,b,min(c),max(c) from t1 where (a1 = 'b' or a1 = 'd' or a1 = 'a' or a1 = 'c') and (a2 > 'a') group by a1,a2,b;
+explain select a1,min(c),max(c) from t1 where a1 >= 'b' group by a1,a2,b;
+explain select a1, max(c) from t1 where a1 in ('a','b','d') group by a1,a2,b;
+
+--replace_column 9 #
+explain select a1,a2,b, max(c) from t2 where a1 < 'd' group by a1,a2,b;
+--replace_column 9 #
+explain select a1,a2,b,min(c),max(c) from t2 where a1 < 'd' group by a1,a2,b;
+--replace_column 9 #
+explain select a1,a2,b,min(c),max(c) from t2 where a1 >= 'b' group by a1,a2,b;
+--replace_column 9 #
+explain select a1,a2,b, max(c) from t2 where a1 >= 'c' or a1 < 'b' group by a1,a2,b;
+--replace_column 9 #
+explain select a1, max(c) from t2 where a1 >= 'c' or a1 < 'b' group by a1,a2,b;
+--replace_column 9 #
+explain select a1,a2,b,min(c),max(c) from t2 where a1 >= 'c' or a2 < 'b' group by a1,a2,b;
+--replace_column 9 #
+explain select a1,a2,b, max(c) from t2 where a1 = 'z' or a1 = 'b' or a1 = 'd' group by a1,a2,b;
+--replace_column 9 #
+explain select a1,a2,b,min(c),max(c) from t2 where a1 = 'z' or a1 = 'b' or a1 = 'd' group by a1,a2,b;
+--replace_column 9 #
+explain select a1,a2,b, max(c) from t2 where (a1 = 'b' or a1 = 'd' or a1 = 'a' or a1 = 'c') and (a2 > 'a') group by a1,a2,b;
+--replace_column 9 #
+explain select a1,a2,b,min(c),max(c) from t2 where (a1 = 'b' or a1 = 'd' or a1 = 'a' or a1 = 'c') and (a2 > 'a') group by a1,a2,b;
+--replace_column 9 #
+explain select a1,min(c),max(c) from t2 where a1 >= 'b' group by a1,a2,b;
+--replace_column 9 #
+explain select a1, max(c) from t2 where a1 in ('a','b','d') group by a1,a2,b;
+
+# queries
+select a1,a2,b,min(c),max(c) from t1 where a1 < 'd' group by a1,a2,b;
+select a1,a2,b,min(c),max(c) from t1 where a1 >= 'b' group by a1,a2,b;
+select a1,a2,b, max(c) from t1 where a1 >= 'c' or a1 < 'b' group by a1,a2,b;
+select a1, max(c) from t1 where a1 >= 'c' or a1 < 'b' group by a1,a2,b;
+select a1,a2,b,min(c),max(c) from t1 where a1 >= 'c' or a2 < 'b' group by a1,a2,b;
+select a1,a2,b, max(c) from t1 where a1 = 'z' or a1 = 'b' or a1 = 'd' group by a1,a2,b;
+select a1,a2,b,min(c),max(c) from t1 where a1 = 'z' or a1 = 'b' or a1 = 'd' group by a1,a2,b;
+select a1,a2,b, max(c) from t1 where (a1 = 'b' or a1 = 'd' or a1 = 'a' or a1 = 'c') and (a2 > 'a') group by a1,a2,b;
+select a1,a2,b,min(c),max(c) from t1 where (a1 = 'b' or a1 = 'd' or a1 = 'a' or a1 = 'c') and (a2 > 'a') group by a1,a2,b;
+select a1,min(c),max(c) from t1 where a1 >= 'b' group by a1,a2,b;
+select a1, max(c) from t1 where a1 in ('a','b','d') group by a1,a2,b;
+
+select a1,a2,b, max(c) from t2 where a1 < 'd' group by a1,a2,b;
+select a1,a2,b,min(c),max(c) from t2 where a1 < 'd' group by a1,a2,b;
+select a1,a2,b,min(c),max(c) from t2 where a1 >= 'b' group by a1,a2,b;
+select a1,a2,b, max(c) from t2 where a1 >= 'c' or a1 < 'b' group by a1,a2,b;
+select a1, max(c) from t2 where a1 >= 'c' or a1 < 'b' group by a1,a2,b;
+select a1,a2,b,min(c),max(c) from t2 where a1 >= 'c' or a2 < 'b' group by a1,a2,b;
+select a1,a2,b, max(c) from t2 where a1 = 'z' or a1 = 'b' or a1 = 'd' group by a1,a2,b;
+select a1,a2,b,min(c),max(c) from t2 where a1 = 'z' or a1 = 'b' or a1 = 'd' group by a1,a2,b;
+select a1,a2,b, max(c) from t2 where (a1 = 'b' or a1 = 'd' or a1 = 'a' or a1 = 'c') and (a2 > 'a') group by a1,a2,b;
+select a1,a2,b,min(c),max(c) from t2 where (a1 = 'b' or a1 = 'd' or a1 = 'a' or a1 = 'c') and (a2 > 'a') group by a1,a2,b;
+select a1,min(c),max(c) from t2 where a1 >= 'b' group by a1,a2,b;
+select a1, max(c) from t2 where a1 in ('a','b','d') group by a1,a2,b;
+
+# B) Equalities only over the non-group 'B' attributes
+# plans
+--echo #
+--echo # MariaDB: we dont have the following patch:
+--echo #
+--echo # commit 60a92a79a3b7fde3c6efe91799e344b977c8e5c3
+--echo # Author: Manuel Ung <mung@fb.com>
+--echo # Date: Thu Apr 19 23:06:27 2018 -0700
+--echo #
+--echo # Enhance group-by loose index scan
+--echo #
+--echo # So the following results are not very meaningful, but are still kept here
+
+explain select a1,a2,b,max(c),min(c) from t1 where (a2 = 'a') and (b = 'b') group by a1;
+explain select a1,a2,b,max(c),min(c) from t1 where (a2 = 'a' or a2 = 'b') and (b = 'b') group by a1;
+explain select a1,a2,b,max(c),min(c) from t1 where (a2 = 'a') and (b = 'b' or b = 'a') group by a1;
+explain select a1,a2,b,max(c),min(c) from t1 where (a2 = 'a' or a2 = 'b') and (b = 'b' or b = 'a') group by a1;
+
+explain select a1,max(c),min(c) from t1 where (a2 = 'a') and (b = 'b') group by a1;
+explain select a1,max(c),min(c) from t1 where (a2 = 'a' or a2 = 'b') and (b = 'b') group by a1;
+explain select a1,max(c),min(c) from t1 where (a2 = 'a') and (b = 'b' or b = 'a') group by a1;
+
+explain select a1,a2,b, max(c) from t1 where (b = 'b') group by a1,a2;
+explain select a1,a2,b, max(c) from t1 where (b = 'b' or b = 'a') group by a1,a2;
+
+explain select a1,a2,b,min(c),max(c) from t1 where (b = 'b') group by a1,a2;
+explain select a1,a2,b,min(c),max(c) from t1 where (b = 'b' or b = 'a') group by a1,a2;
+
+explain select a1,a2, max(c) from t1 where (b = 'b') group by a1,a2;
+explain select a1,a2, max(c) from t1 where (b = 'b' or b = 'a') group by a1,a2;
+
+explain select a1,a2,b,max(c),min(c) from t2 where (a2 = 'a') and (b = 'b') group by a1;
+explain select a1,a2,b,max(c),min(c) from t2 where (a2 = 'a' or a2 = 'b') and (b = 'b') group by a1;
+explain select a1,a2,b,max(c),min(c) from t2 where (a2 = 'a') and (b = 'b' or b = 'a') group by a1;
+
+explain select a1,max(c),min(c) from t2 where (a2 = 'a') and (b = 'b') group by a1;
+explain select a1,max(c),min(c) from t2 where (a2 = 'a' or a2 = 'b') and (b = 'b') group by a1;
+explain select a1,max(c),min(c) from t2 where (a2 = 'a') and (b = 'b' or b = 'a') group by a1;
+
+explain select a1,a2,b, max(c) from t2 where (b = 'b') group by a1,a2;
+explain select a1,a2,b, max(c) from t2 where (b = 'b' or b = 'a') group by a1,a2;
+
+explain select a1,a2,b,min(c),max(c) from t2 where (b = 'b') group by a1,a2;
+explain select a1,a2,b,min(c),max(c) from t2 where (b = 'b' or b = 'a') group by a1,a2;
+
+explain select a1,a2, max(c) from t2 where (b = 'b') group by a1,a2;
+explain select a1,a2, max(c) from t2 where (b = 'b' or b = 'a') group by a1,a2;
+
+# these queries test case 2) in TRP_GROUP_MIN_MAX::update_cost()
+explain select a1,a2,b,max(c),min(c) from t3 where (a2 = 'a') and (b = 'b') group by a1;
+explain select a1,a2,b,max(c),min(c) from t3 where (a2 = 'a' or a2 = 'b') and (b = 'b') group by a1;
+explain select a1,a2,b,max(c),min(c) from t3 where (a2 = 'a') and (b = 'b' or b = 'a') group by a1;
+
+explain select a1,max(c),min(c) from t3 where (a2 = 'a') and (b = 'b') group by a1;
+explain select a1,max(c),min(c) from t3 where (a2 = 'a' or a2 = 'b') and (b = 'b') group by a1;
+explain select a1,max(c),min(c) from t3 where (a2 = 'a') and (b = 'b' or b = 'a') group by a1;
+
+# queries
+select a1,a2,b,max(c),min(c) from t1 where (a2 = 'a') and (b = 'b') group by a1;
+select a1,a2,b,max(c),min(c) from t1 where (a2 = 'a' or a2 = 'b') and (b = 'b') group by a1;
+select a1,a2,b,max(c),min(c) from t1 where (a2 = 'a') and (b = 'b' or b = 'a') group by a1;
+select a1,a2,b,max(c),min(c) from t1 where (a2 = 'a' or a2 = 'b') and (b = 'b' or b = 'a') group by a1;
+
+select a1,max(c),min(c) from t1 where (a2 = 'a') and (b = 'b') group by a1;
+select a1,max(c),min(c) from t1 where (a2 = 'a' or a2 = 'b') and (b = 'b') group by a1;
+select a1,max(c),min(c) from t1 where (a2 = 'a') and (b = 'b' or b = 'a') group by a1;
+
+select a1,a2,b, max(c) from t1 where (b = 'b') group by a1,a2;
+select a1,a2,b, max(c) from t1 where (b = 'b' or b = 'a') group by a1,a2;
+
+select a1,a2,b,min(c),max(c) from t1 where (b = 'b') group by a1,a2;
+select a1,a2,b,min(c),max(c) from t1 where (b = 'b' or b = 'a') group by a1,a2;
+
+select a1,a2, max(c) from t1 where (b = 'b') group by a1,a2;
+select a1,a2, max(c) from t1 where (b = 'b' or b = 'a') group by a1,a2;
+
+select a1,a2,b,max(c),min(c) from t2 where (a2 = 'a') and (b = 'b') group by a1;
+select a1,a2,b,max(c),min(c) from t2 where (a2 = 'a' or a2 = 'b') and (b = 'b') group by a1;
+select a1,a2,b,max(c),min(c) from t2 where (a2 = 'a') and (b = 'b' or b = 'a') group by a1;
+
+select a1,max(c),min(c) from t2 where (a2 = 'a') and (b = 'b') group by a1;
+select a1,max(c),min(c) from t2 where (a2 = 'a' or a2 = 'b') and (b = 'b') group by a1;
+select a1,max(c),min(c) from t2 where (a2 = 'a') and (b = 'b' or b = 'a') group by a1;
+
+select a1,a2,b, max(c) from t2 where (b = 'b') group by a1,a2;
+select a1,a2,b, max(c) from t2 where (b = 'b' or b = 'a') group by a1,a2;
+
+select a1,a2,b,min(c),max(c) from t2 where (b = 'b') group by a1,a2;
+select a1,a2,b,min(c),max(c) from t2 where (b = 'b' or b = 'a') group by a1,a2;
+
+select a1,a2, max(c) from t2 where (b = 'b') group by a1,a2;
+select a1,a2, max(c) from t2 where (b = 'b' or b = 'a') group by a1,a2;
+
+# these queries test case 2) in TRP_GROUP_MIN_MAX::update_cost()
+select a1,a2,b,max(c),min(c) from t3 where (a2 = 'a') and (b = 'b') group by a1;
+select a1,a2,b,max(c),min(c) from t3 where (a2 = 'a' or a2 = 'b') and (b = 'b') group by a1;
+select a1,a2,b,max(c),min(c) from t3 where (a2 = 'a') and (b = 'b' or b = 'a') group by a1;
+
+select a1,max(c),min(c) from t3 where (a2 = 'a') and (b = 'b') group by a1;
+select a1,max(c),min(c) from t3 where (a2 = 'a' or a2 = 'b') and (b = 'b') group by a1;
+select a1,max(c),min(c) from t3 where (a2 = 'a') and (b = 'b' or b = 'a') group by a1;
+
+
+# IS NULL (makes sense for t2 only)
+# plans
+explain select a1,a2,b,min(c) from t2 where (a2 = 'a') and b is NULL group by a1;
+explain select a1,a2,b,min(c) from t2 where (a2 = 'a' or a2 = 'b') and b is NULL group by a1;
+
+explain select a1,a2,b,max(c) from t2 where (a2 = 'a') and b is NULL group by a1;
+explain select a1,a2,b,max(c) from t2 where (a2 = 'a' or a2 = 'b') and b is NULL group by a1;
+
+explain select a1,a2,b,min(c) from t2 where b is NULL group by a1,a2;
+explain select a1,a2,b,max(c) from t2 where b is NULL group by a1,a2;
+explain select a1,a2,b,min(c),max(c) from t2 where b is NULL group by a1,a2;
+
+# queries
+select a1,a2,b,min(c) from t2 where (a2 = 'a') and b is NULL group by a1;
+select a1,a2,b,min(c) from t2 where (a2 = 'a' or a2 = 'b') and b is NULL group by a1;
+select a1,a2,b,max(c) from t2 where (a2 = 'a') and b is NULL group by a1;
+select a1,a2,b,max(c) from t2 where (a2 = 'a' or a2 = 'b') and b is NULL group by a1;
+select a1,a2,b,min(c) from t2 where b is NULL group by a1,a2;
+select a1,a2,b,max(c) from t2 where b is NULL group by a1,a2;
+select a1,a2,b,min(c),max(c) from t2 where b is NULL group by a1,a2;
+select a1,a2,b,min(c),max(c) from t2 where b is NULL group by a1,a2;
+
+# C) Range predicates for the MIN/MAX attribute
+# plans
+--replace_column 9 #
+explain select a1,a2,b, max(c) from t1 where (c > 'b1') group by a1,a2,b;
+explain select a1,a2,b,min(c),max(c) from t1 where (c > 'b1') group by a1,a2,b;
+explain select a1,a2,b, max(c) from t1 where (c > 'f123') group by a1,a2,b;
+explain select a1,a2,b,min(c),max(c) from t1 where (c > 'f123') group by a1,a2,b;
+explain select a1,a2,b, max(c) from t1 where (c < 'a0') group by a1,a2,b;
+explain select a1,a2,b,min(c),max(c) from t1 where (c < 'a0') group by a1,a2,b;
+explain select a1,a2,b, max(c) from t1 where (c < 'k321') group by a1,a2,b;
+explain select a1,a2,b,min(c),max(c) from t1 where (c < 'k321') group by a1,a2,b;
+explain select a1,a2,b, max(c) from t1 where (c < 'a0') or (c > 'b1') group by a1,a2,b;
+explain select a1,a2,b,min(c),max(c) from t1 where (c < 'a0') or (c > 'b1') group by a1,a2,b;
+explain select a1,a2,b, max(c) from t1 where (c > 'b1') or (c <= 'g1') group by a1,a2,b;
+explain select a1,a2,b,min(c),max(c) from t1 where (c > 'b1') or (c <= 'g1') group by a1,a2,b;
+explain select a1,a2,b,min(c),max(c) from t1 where (c > 'b111') and (c <= 'g112') group by a1,a2,b;
+explain select a1,a2,b,min(c),max(c) from t1 where (c < 'c5') or (c = 'g412') or (c = 'k421') group by a1,a2,b;
+explain select a1,a2,b,min(c),max(c) from t1 where ((c > 'b111') and (c <= 'g112')) or ((c > 'd000') and (c <= 'i110')) group by a1,a2,b;
+explain select a1,a2,b,min(c),max(c) from t1 where (c between 'b111' and 'g112') or (c between 'd000' and 'i110') group by a1,a2,b;
+
+--replace_column 9 #
+explain select a1,a2,b, max(c) from t2 where (c > 'b1') group by a1,a2,b;
+--replace_column 9 #
+explain select a1,a2,b,min(c),max(c) from t2 where (c > 'b1') group by a1,a2,b;
+--replace_column 9 #
+explain select a1,a2,b, max(c) from t2 where (c > 'f123') group by a1,a2,b;
+--replace_column 9 #
+explain select a1,a2,b,min(c),max(c) from t2 where (c > 'f123') group by a1,a2,b;
+--replace_column 9 #
+explain select a1,a2,b, max(c) from t2 where (c < 'a0') group by a1,a2,b;
+--replace_column 9 #
+explain select a1,a2,b,min(c),max(c) from t2 where (c < 'a0') group by a1,a2,b;
+--replace_column 9 #
+explain select a1,a2,b, max(c) from t2 where (c < 'k321') group by a1,a2,b;
+--replace_column 9 #
+explain select a1,a2,b,min(c),max(c) from t2 where (c < 'k321') group by a1,a2,b;
+--replace_column 9 #
+explain select a1,a2,b, max(c) from t2 where (c < 'a0') or (c > 'b1') group by a1,a2,b;
+--replace_column 9 #
+explain select a1,a2,b,min(c),max(c) from t2 where (c < 'a0') or (c > 'b1') group by a1,a2,b;
+--replace_column 9 #
+explain select a1,a2,b, max(c) from t2 where (c > 'b1') or (c <= 'g1') group by a1,a2,b;
+--replace_column 9 #
+explain select a1,a2,b,min(c),max(c) from t2 where (c > 'b1') or (c <= 'g1') group by a1,a2,b;
+--replace_column 9 #
+explain select a1,a2,b,min(c),max(c) from t2 where (c > 'b111') and (c <= 'g112') group by a1,a2,b;
+--replace_column 9 #
+explain select a1,a2,b,min(c),max(c) from t2 where (c < 'c5') or (c = 'g412') or (c = 'k421') group by a1,a2,b;
+--replace_column 9 #
+explain select a1,a2,b,min(c),max(c) from t2 where ((c > 'b111') and (c <= 'g112')) or ((c > 'd000') and (c <= 'i110')) group by a1,a2,b;
+
+# queries
+select a1,a2,b, max(c) from t1 where (c > 'b1') group by a1,a2,b;
+select a1,a2,b,min(c),max(c) from t1 where (c > 'b1') group by a1,a2,b;
+select a1,a2,b, max(c) from t1 where (c > 'f123') group by a1,a2,b;
+select a1,a2,b,min(c),max(c) from t1 where (c > 'f123') group by a1,a2,b;
+select a1,a2,b, max(c) from t1 where (c < 'a0') group by a1,a2,b;
+select a1,a2,b,min(c),max(c) from t1 where (c < 'a0') group by a1,a2,b;
+select a1,a2,b, max(c) from t1 where (c < 'k321') group by a1,a2,b;
+select a1,a2,b,min(c),max(c) from t1 where (c < 'k321') group by a1,a2,b;
+select a1,a2,b, max(c) from t1 where (c < 'a0') or (c > 'b1') group by a1,a2,b;
+select a1,a2,b,min(c),max(c) from t1 where (c < 'a0') or (c > 'b1') group by a1,a2,b;
+select a1,a2,b, max(c) from t1 where (c > 'b1') or (c <= 'g1') group by a1,a2,b;
+select a1,a2,b,min(c),max(c) from t1 where (c > 'b1') or (c <= 'g1') group by a1,a2,b;
+select a1,a2,b,min(c),max(c) from t1 where (c > 'b111') and (c <= 'g112') group by a1,a2,b;
+select a1,a2,b,min(c),max(c) from t1 where (c < 'c5') or (c = 'g412') or (c = 'k421') group by a1,a2,b;
+select a1,a2,b,min(c),max(c) from t1 where ((c > 'b111') and (c <= 'g112')) or ((c > 'd000') and (c <= 'i110')) group by a1,a2,b;
+select a1,a2,b,min(c),max(c) from t1 where (c between 'b111' and 'g112') or (c between 'd000' and 'i110') group by a1,a2,b;
+
+select a1,a2,b, max(c) from t2 where (c > 'b1') group by a1,a2,b;
+select a1,a2,b,min(c),max(c) from t2 where (c > 'b1') group by a1,a2,b;
+select a1,a2,b, max(c) from t2 where (c > 'f123') group by a1,a2,b;
+select a1,a2,b,min(c),max(c) from t2 where (c > 'f123') group by a1,a2,b;
+select a1,a2,b, max(c) from t2 where (c < 'a0') group by a1,a2,b;
+select a1,a2,b,min(c),max(c) from t2 where (c < 'a0') group by a1,a2,b;
+select a1,a2,b, max(c) from t2 where (c < 'k321') group by a1,a2,b;
+select a1,a2,b,min(c),max(c) from t2 where (c < 'k321') group by a1,a2,b;
+select a1,a2,b, max(c) from t2 where (c < 'a0') or (c > 'b1') group by a1,a2,b;
+select a1,a2,b,min(c),max(c) from t2 where (c < 'a0') or (c > 'b1') group by a1,a2,b;
+select a1,a2,b, max(c) from t2 where (c > 'b1') or (c <= 'g1') group by a1,a2,b;
+select a1,a2,b,min(c),max(c) from t2 where (c > 'b1') or (c <= 'g1') group by a1,a2,b;
+select a1,a2,b,min(c),max(c) from t2 where (c > 'b111') and (c <= 'g112') group by a1,a2,b;
+select a1,a2,b,min(c),max(c) from t2 where (c < 'c5') or (c = 'g412') or (c = 'k421') group by a1,a2,b;
+select a1,a2,b,min(c),max(c) from t2 where ((c > 'b111') and (c <= 'g112')) or ((c > 'd000') and (c <= 'i110')) group by a1,a2,b;
+
+# analyze the sub-select
+explain select a1,a2,b,min(c),max(c) from t1
+where exists ( select * from t2 where t2.c = t1.c )
+group by a1,a2,b;
+
+# the sub-select is unrelated to MIN/MAX
+explain select a1,a2,b,min(c),max(c) from t1
+where exists ( select * from t2 where t2.c > 'b1' )
+group by a1,a2,b;
+
+
+# A,B,C) Predicates referencing mixed classes of attributes
+# plans
+explain select a1,a2,b,min(c),max(c) from t1 where (a1 >= 'c' or a2 < 'b') and (b > 'a') group by a1,a2,b;
+explain select a1,a2,b,min(c),max(c) from t1 where (a1 >= 'c' or a2 < 'b') and (c > 'b111') group by a1,a2,b;
+explain select a1,a2,b,min(c),max(c) from t1 where (a2 >= 'b') and (b = 'a') and (c > 'b111') group by a1,a2,b;
+explain select a1,a2,b,min(c) from t1 where ((a1 > 'a') or (a1 < '9')) and ((a2 >= 'b') and (a2 < 'z')) and (b = 'a') and ((c < 'h112') or (c = 'j121') or (c > 'k121' and c < 'm122') or (c > 'o122')) group by a1,a2,b;
+explain select a1,a2,b,min(c) from t1 where ((a1 > 'a') or (a1 < '9')) and ((a2 >= 'b') and (a2 < 'z')) and (b = 'a') and ((c = 'j121') or (c > 'k121' and c < 'm122') or (c > 'o122') or (c < 'h112') or (c = 'c111')) group by a1,a2,b;
+explain select a1,a2,b,min(c) from t1 where (a1 > 'a') and (a2 > 'a') and (b = 'c') group by a1,a2,b;
+explain select a1,a2,b,min(c) from t1 where (ord(a1) > 97) and (ord(a2) + ord(a1) > 194) and (b = 'c') group by a1,a2,b;
+
+--replace_column 9 #
+explain select a1,a2,b,min(c),max(c) from t2 where (a1 >= 'c' or a2 < 'b') and (b > 'a') group by a1,a2,b;
+--replace_column 9 #
+explain select a1,a2,b,min(c),max(c) from t2 where (a1 >= 'c' or a2 < 'b') and (c > 'b111') group by a1,a2,b;
+--replace_column 9 #
+explain select a1,a2,b,min(c),max(c) from t2 where (a2 >= 'b') and (b = 'a') and (c > 'b111') group by a1,a2,b;
+--replace_column 9 #
+explain select a1,a2,b,min(c) from t2 where ((a1 > 'a') or (a1 < '9')) and ((a2 >= 'b') and (a2 < 'z')) and (b = 'a') and ((c < 'h112') or (c = 'j121') or (c > 'k121' and c < 'm122') or (c > 'o122')) group by a1,a2,b;
+--replace_column 9 #
+explain select a1,a2,b,min(c) from t2 where ((a1 > 'a') or (a1 < '9')) and ((a2 >= 'b') and (a2 < 'z')) and (b = 'a') and ((c = 'j121') or (c > 'k121' and c < 'm122') or (c > 'o122') or (c < 'h112') or (c = 'c111')) group by a1,a2,b;
+--replace_column 9 #
+explain select a1,a2,b,min(c) from t2 where (a1 > 'a') and (a2 > 'a') and (b = 'c') group by a1,a2,b;
+
+# queries
+select a1,a2,b,min(c),max(c) from t1 where (a1 >= 'c' or a2 < 'b') and (b > 'a') group by a1,a2,b;
+select a1,a2,b,min(c),max(c) from t1 where (a1 >= 'c' or a2 < 'b') and (c > 'b111') group by a1,a2,b;
+select a1,a2,b,min(c),max(c) from t1 where (a2 >= 'b') and (b = 'a') and (c > 'b111') group by a1,a2,b;
+select a1,a2,b,min(c) from t1 where ((a1 > 'a') or (a1 < '9')) and ((a2 >= 'b') and (a2 < 'z')) and (b = 'a') and ((c < 'h112') or (c = 'j121') or (c > 'k121' and c < 'm122') or (c > 'o122')) group by a1,a2,b;
+select a1,a2,b,min(c) from t1 where ((a1 > 'a') or (a1 < '9')) and ((a2 >= 'b') and (a2 < 'z')) and (b = 'a') and ((c = 'j121') or (c > 'k121' and c < 'm122') or (c > 'o122') or (c < 'h112') or (c = 'c111')) group by a1,a2,b;
+select a1,a2,b,min(c) from t1 where (a1 > 'a') and (a2 > 'a') and (b = 'c') group by a1,a2,b;
+select a1,a2,b,min(c) from t1 where (ord(a1) > 97) and (ord(a2) + ord(a1) > 194) and (b = 'c') group by a1,a2,b;
+
+select a1,a2,b,min(c),max(c) from t2 where (a1 >= 'c' or a2 < 'b') and (b > 'a') group by a1,a2,b;
+select a1,a2,b,min(c),max(c) from t2 where (a1 >= 'c' or a2 < 'b') and (c > 'b111') group by a1,a2,b;
+select a1,a2,b,min(c),max(c) from t2 where (a2 >= 'b') and (b = 'a') and (c > 'b111') group by a1,a2,b;
+select a1,a2,b,min(c) from t2 where ((a1 > 'a') or (a1 < '9')) and ((a2 >= 'b') and (a2 < 'z')) and (b = 'a') and ((c < 'h112') or (c = 'j121') or (c > 'k121' and c < 'm122') or (c > 'o122')) group by a1,a2,b;
+select a1,a2,b,min(c) from t2 where ((a1 > 'a') or (a1 < '9')) and ((a2 >= 'b') and (a2 < 'z')) and (b = 'a') and ((c = 'j121') or (c > 'k121' and c < 'm122') or (c > 'o122') or (c < 'h112') or (c = 'c111')) group by a1,a2,b;
+select a1,a2,b,min(c) from t2 where (a1 > 'a') and (a2 > 'a') and (b = 'c') group by a1,a2,b;
+
+
+#
+# GROUP BY queries without MIN/MAX
+#
+
+# plans
+explain select a1,a2,b from t1 where (a1 >= 'c' or a2 < 'b') and (b > 'a') group by a1,a2,b;
+explain select a1,a2,b from t1 where (a2 >= 'b') and (b = 'a') group by a1,a2,b;
+explain select a1,a2,b,c from t1 where (a2 >= 'b') and (b = 'a') and (c = 'i121') group by a1,a2,b;
+explain select a1,a2,b,c from t1 where (a2 >= 'b') and (b = 'a') and (c = 'i121' or c = 'i121') group by a1,a2,b;
+explain select a1,a2,b from t1 where (a1 > 'a') and (a2 > 'a') and (b = 'c') group by a1,a2,b;
+
+--replace_column 9 #
+explain select a1,a2,b from t2 where (a1 >= 'c' or a2 < 'b') and (b > 'a') group by a1,a2,b;
+--replace_column 9 #
+explain select a1,a2,b from t2 where (a2 >= 'b') and (b = 'a') group by a1,a2,b;
+--replace_column 9 #
+explain select a1,a2,b,c from t2 where (a2 >= 'b') and (b = 'a') and (c = 'i121') group by a1,a2,b;
+--replace_column 9 #
+explain select a1,a2,b,c from t2 where (a2 >= 'b') and (b = 'a') and (c = 'i121' or c = 'i121') group by a1,a2,b;
+--replace_column 9 #
+explain select a1,a2,b from t2 where (a1 > 'a') and (a2 > 'a') and (b = 'c') group by a1,a2,b;
+
+# queries
+select a1,a2,b from t1 where (a1 >= 'c' or a2 < 'b') and (b > 'a') group by a1,a2,b;
+select a1,a2,b from t1 where (a2 >= 'b') and (b = 'a') group by a1,a2,b;
+select a1,a2,b,c from t1 where (a2 >= 'b') and (b = 'a') and (c = 'i121') group by a1,a2,b;
+select a1,a2,b,c from t1 where (a2 >= 'b') and (b = 'a') and (c = 'i121' or c = 'i121') group by a1,a2,b;
+select a1,a2,b from t1 where (a1 > 'a') and (a2 > 'a') and (b = 'c') group by a1,a2,b;
+
+select a1,a2,b from t2 where (a1 >= 'c' or a2 < 'b') and (b > 'a') group by a1,a2,b;
+select a1,a2,b from t2 where (a2 >= 'b') and (b = 'a') group by a1,a2,b;
+select a1,a2,b,c from t2 where (a2 >= 'b') and (b = 'a') and (c = 'i121') group by a1,a2,b;
+select a1,a2,b,c from t2 where (a2 >= 'b') and (b = 'a') and (c = 'i121' or c = 'i121') group by a1,a2,b;
+select a1,a2,b from t2 where (a1 > 'a') and (a2 > 'a') and (b = 'c') group by a1,a2,b;
+
+#
+# DISTINCT queries
+#
+
+# plans
+explain select distinct a1,a2,b from t1;
+explain select distinct a1,a2,b from t1 where (a2 >= 'b') and (b = 'a');
+explain extended select distinct a1,a2,b,c from t1 where (a2 >= 'b') and (b = 'a') and (c = 'i121');
+explain select distinct a1,a2,b from t1 where (a1 > 'a') and (a2 > 'a') and (b = 'c');
+explain select distinct b from t1 where (a2 >= 'b') and (b = 'a');
+explain select distinct a1 from t1 where a1 in ('a', 'd') and a2 = 'b';
+explain select distinct a1 from t1 where a1 in ('a', 'd') and a2 = 'e';
+
+--replace_column 9 #
+explain select distinct a1,a2,b from t2;
+--replace_column 9 #
+explain select distinct a1,a2,b from t2 where (a2 >= 'b') and (b = 'a');
+explain extended select distinct a1,a2,b,c from t2 where (a2 >= 'b') and (b = 'a') and (c = 'i121');
+--replace_column 9 #
+explain select distinct a1,a2,b from t2 where (a1 > 'a') and (a2 > 'a') and (b = 'c');
+explain select distinct b from t2 where (a2 >= 'b') and (b = 'a');
+explain select distinct a1 from t2 where a1 in ('a', 'd') and a2 = 'b';
+explain select distinct a1 from t2 where a1 in ('a', 'd') and a2 = 'e';
+
+# queries
+select distinct a1,a2,b from t1;
+select distinct a1,a2,b from t1 where (a2 >= 'b') and (b = 'a');
+select distinct a1,a2,b,c from t1 where (a2 >= 'b') and (b = 'a') and (c = 'i121');
+select distinct a1,a2,b from t1 where (a1 > 'a') and (a2 > 'a') and (b = 'c');
+select distinct b from t1 where (a2 >= 'b') and (b = 'a');
+select distinct a1 from t1 where a1 in ('a', 'd') and a2 = 'b';
+select distinct a1 from t1 where a1 in ('a', 'd') and a2 = 'e';
+
+select distinct a1,a2,b from t2;
+select distinct a1,a2,b from t2 where (a2 >= 'b') and (b = 'a');
+select distinct a1,a2,b,c from t2 where (a2 >= 'b') and (b = 'a') and (c = 'i121');
+select distinct a1,a2,b from t2 where (a1 > 'a') and (a2 > 'a') and (b = 'c');
+select distinct b from t2 where (a2 >= 'b') and (b = 'a');
+select distinct a1 from t2 where a1 in ('a', 'd') and a2 = 'b';
+select distinct a1 from t2 where a1 in ('a', 'd') and a2 = 'e';
+
+# BUG #6303
+select distinct t_00.a1
+from t1 t_00
+where exists ( select * from t2 where a1 = t_00.a1 );
+
+# BUG #8532 - SELECT DISTINCT a, a causes server to crash
+select distinct a1,a1 from t1;
+select distinct a2,a1,a2,a1 from t1;
+select distinct t1.a1,t2.a1 from t1,t2;
+
+
+#
+# DISTINCT queries with GROUP-BY
+#
+
+# plans
+explain select distinct a1,a2,b from t1;
+explain select distinct a1,a2,b from t1 where (a2 >= 'b') and (b = 'a') group by a1,a2,b;
+explain select distinct a1,a2,b,c from t1 where (a2 >= 'b') and (b = 'a') and (c = 'i121') group by a1,a2,b;
+explain select distinct a1,a2,b from t1 where (a1 > 'a') and (a2 > 'a') and (b = 'c') group by a1,a2,b;
+explain select distinct b from t1 where (a2 >= 'b') and (b = 'a') group by a1,a2,b;
+explain select distinct a1 from t1 where a1 in ('a', 'd') and a2 = 'b' group by a1;
+explain select distinct a1 from t1 where a1 in ('a', 'd') and a2 = 'e' group by a1;
+
+--replace_column 9 #
+explain select distinct a1,a2,b from t2;
+--replace_column 9 #
+explain select distinct a1,a2,b from t2 where (a2 >= 'b') and (b = 'a') group by a1,a2,b;
+--replace_column 9 #
+explain select distinct a1,a2,b,c from t2 where (a2 >= 'b') and (b = 'a') and (c = 'i121') group by a1,a2,b;
+--replace_column 9 #
+explain select distinct a1,a2,b from t2 where (a1 > 'a') and (a2 > 'a') and (b = 'c') group by a1,a2,b;
+--replace_column 9 #
+explain select distinct b from t2 where (a2 >= 'b') and (b = 'a') group by a1,a2,b;
+--replace_column 9 #
+explain select distinct a1 from t2 where a1 in ('a', 'd') and a2 = 'b' group by a1;
+--replace_column 9 #
+explain select distinct a1 from t2 where a1 in ('a', 'd') and a2 = 'e' group by a1;
+
+# queries
+select distinct a1,a2,b from t1;
+select distinct a1,a2,b from t1 where (a2 >= 'b') and (b = 'a') group by a1,a2,b;
+select distinct a1,a2,b,c from t1 where (a2 >= 'b') and (b = 'a') and (c = 'i121') group by a1,a2,b;
+select distinct a1,a2,b from t1 where (a1 > 'a') and (a2 > 'a') and (b = 'c') group by a1,a2,b;
+select distinct b from t1 where (a2 >= 'b') and (b = 'a') group by a1,a2,b;
+select distinct a1 from t1 where a1 in ('a', 'd') and a2 = 'b' group by a1;
+select distinct a1 from t1 where a1 in ('a', 'd') and a2 = 'e' group by a1;
+
+select distinct a1,a2,b from t2;
+select distinct a1,a2,b from t2 where (a2 >= 'b') and (b = 'a') group by a1,a2,b;
+select distinct a1,a2,b,c from t2 where (a2 >= 'b') and (b = 'a') and (c = 'i121') group by a1,a2,b;
+select distinct a1,a2,b from t2 where (a1 > 'a') and (a2 > 'a') and (b = 'c') group by a1,a2,b;
+select distinct b from t2 where (a2 >= 'b') and (b = 'a') group by a1,a2,b;
+select distinct a1 from t2 where a1 in ('a', 'd') and a2 = 'b' group by a1;
+select distinct a1 from t2 where a1 in ('a', 'd') and a2 = 'e' group by a1;
+
+
+#
+# COUNT (DISTINCT cols) queries
+#
+
+explain select count(distinct a1,a2,b) from t1 where (a2 >= 'b') and (b = 'a');
+explain select count(distinct a1,a2,b,c) from t1 where (a2 >= 'b') and (b = 'a') and (c = 'i121');
+explain extended select count(distinct a1,a2,b) from t1 where (a1 > 'a') and (a2 > 'a') and (b = 'c');
+explain select count(distinct b) from t1 where (a2 >= 'b') and (b = 'a');
+explain extended select 98 + count(distinct a1,a2,b) from t1 where (a1 > 'a') and (a2 > 'a');
+
+select count(distinct a1,a2,b) from t1 where (a2 >= 'b') and (b = 'a');
+select count(distinct a1,a2,b,c) from t1 where (a2 >= 'b') and (b = 'a') and (c = 'i121');
+select count(distinct a1,a2,b) from t1 where (a1 > 'a') and (a2 > 'a') and (b = 'c');
+select count(distinct b) from t1 where (a2 >= 'b') and (b = 'a');
+select 98 + count(distinct a1,a2,b) from t1 where (a1 > 'a') and (a2 > 'a');
+
+#
+# Queries with expressions in the select clause
+#
+
+explain select a1,a2,b, concat(min(c), max(c)) from t1 where a1 < 'd' group by a1,a2,b;
+explain select concat(a1,min(c)),b from t1 where a1 < 'd' group by a1,a2,b;
+explain select concat(a1,min(c)),b,max(c) from t1 where a1 < 'd' group by a1,a2,b;
+explain select concat(a1,a2),b,min(c),max(c) from t1 where a1 < 'd' group by a1,a2,b;
+explain select concat(ord(min(b)),ord(max(b))),min(b),max(b) from t1 group by a1,a2;
+
+select a1,a2,b, concat(min(c), max(c)) from t1 where a1 < 'd' group by a1,a2,b;
+select concat(a1,min(c)),b from t1 where a1 < 'd' group by a1,a2,b;
+select concat(a1,min(c)),b,max(c) from t1 where a1 < 'd' group by a1,a2,b;
+select concat(a1,a2),b,min(c),max(c) from t1 where a1 < 'd' group by a1,a2,b;
+select concat(ord(min(b)),ord(max(b))),min(b),max(b) from t1 group by a1,a2;
+
+
+#
+# Negative examples: queries that should NOT be treated as optimizable by
+# QUICK_GROUP_MIN_MAX_SELECT
+#
+
+# select a non-indexed attribute
+explain select a1,a2,b,d,min(c),max(c) from t1 group by a1,a2,b;
+
+explain select a1,a2,b,d from t1 group by a1,a2,b;
+
+# predicate that references an attribute that is after the MIN/MAX argument
+# in the index
+explain extended select a1,a2,min(b),max(b) from t1
+where (a1 = 'b' or a1 = 'd' or a1 = 'a' or a1 = 'c') and (a2 > 'a') and (c > 'a111') group by a1,a2;
+
+# predicate that references a non-indexed attribute
+explain extended select a1,a2,b,min(c),max(c) from t1
+where (a1 = 'b' or a1 = 'd' or a1 = 'a' or a1 = 'c') and (a2 > 'a') and (d > 'xy2') group by a1,a2,b;
+
+explain extended select a1,a2,b,c from t1
+where (a1 = 'b' or a1 = 'd' or a1 = 'a' or a1 = 'c') and (a2 > 'a') and (d > 'xy2') group by a1,a2,b,c;
+
+# non-equality predicate for a non-group select attribute
+explain select a1,a2,b,max(c),min(c) from t2 where (a2 = 'a') and (b = 'b') or (b < 'b') group by a1;
+explain select a1,a2,b,max(c),min(c) from t2 where (a2 = 'a') and (b < 'b') group by a1;
+explain select a1,a2,b,max(c),min(c) from t2 where (a2 = 'a') and (b <= 'b') group by a1;
+explain select a1,a2,b,max(c),min(c) from t2 where (a2 = 'a') and (b <= 'b' and b >= 'a') group by a1;
+explain extended select a1,a2,b from t1 where (a1 = 'b' or a1 = 'd' or a1 = 'a' or a1 = 'c') and (a2 > 'a') and (c > 'a111') group by a1,a2,b;
+
+# non-group field with an equality predicate that references a keypart after the
+# MIN/MAX argument
+explain select a1,a2,min(b),c from t2 where (a2 = 'a') and (c = 'a111') group by a1;
+select a1,a2,min(b),c from t2 where (a2 = 'a') and (c = 'a111') group by a1;
+
+# disjunction for a non-group select attribute
+explain select a1,a2,b,max(c),min(c) from t2 where (a2 = 'a') and (b = 'b') or (b = 'a') group by a1;
+
+# non-range predicate for the MIN/MAX attribute
+explain select a1,a2,b,min(c),max(c) from t2
+where (c > 'a000') and (c <= 'd999') and (c like '_8__') group by a1,a2,b;
+
+# not all attributes are indexed by one index
+explain select a1, a2, b, c, min(d), max(d) from t1 group by a1,a2,b,c;
+
+# other aggregate functions than MIN/MAX
+explain select a1,a2,count(a2) from t1 group by a1,a2,b;
+explain extended select a1,a2,count(a2) from t1 where (a1 > 'a') group by a1,a2,b;
+explain extended select sum(ord(a1)) from t1 where (a1 > 'a') group by a1,a2,b;
+
+# test multi_range_groupby flag
+#MariaDB: no support: set optimizer_switch = 'multi_range_groupby=off';
+explain select a1,a2,b,max(c),min(c) from t2 where (a2 = 'a') and (b = 'a' or b = 'b') group by a1;
+#set optimizer_switch = 'default';
+explain select a1,a2,b,max(c),min(c) from t2 where (a2 = 'a') and (b = 'a' or b = 'b') group by a1;
+
+
+#
+# Bug #16710: select distinct doesn't return all it should
+#
+
+explain select distinct(a1) from t1 where ord(a2) = 98;
+select distinct(a1) from t1 where ord(a2) = 98;
+
+#
+# BUG#11044: DISTINCT or GROUP BY queries with equality predicates instead of MIN/MAX.
+#
+
+explain select a1 from t1 where a2 = 'b' group by a1;
+select a1 from t1 where a2 = 'b' group by a1;
+
+explain select distinct a1 from t1 where a2 = 'b';
+select distinct a1 from t1 where a2 = 'b';
+
+#
+# Bug #12672: primary key implcitly included in every innodb index
+#
+# Test case moved to group_min_max_innodb
+
+
+#
+# Bug #6142: a problem with the empty innodb table
+#
+# Test case moved to group_min_max_innodb
+
+
+#
+# Bug #9798: group by with rollup
+#
+# Test case moved to group_min_max_innodb
+
+
+#
+# Bug #13293 Wrongly used index results in endless loop.
+#
+# Test case moved to group_min_max_innodb
+
+
+drop table t1,t2,t3;
+
+#
+# Bug #14920 Ordering aggregated result sets with composite primary keys
+# corrupts resultset
+#
+eval create table t1 (c1 int not null,c2 int not null, primary key(c1,c2)) engine=$engine;
+insert into t1 (c1,c2) values
+(10,1),(10,2),(10,3),(20,4),(20,5),(20,6),(30,7),(30,8),(30,9);
+select distinct c1, c2 from t1 order by c2;
+select c1,min(c2) as c2 from t1 group by c1 order by c2;
+select c1,c2 from t1 group by c1,c2 order by c2;
+drop table t1;
+
+#
+# Bug #16203: Analysis for possible min/max optimization erroneously
+# returns impossible range
+#
+
+eval CREATE TABLE t1 (a varchar(5), b int(11), PRIMARY KEY (a,b)) engine=$engine;
+INSERT INTO t1 VALUES ('AA',1), ('AA',2), ('AA',3), ('BB',1), ('AA',4);
+OPTIMIZE TABLE t1;
+
+SELECT a FROM t1 WHERE a='AA' GROUP BY a;
+SELECT a FROM t1 WHERE a='BB' GROUP BY a;
+
+EXPLAIN SELECT a FROM t1 WHERE a='AA' GROUP BY a;
+EXPLAIN SELECT a FROM t1 WHERE a='BB' GROUP BY a;
+
+SELECT DISTINCT a FROM t1 WHERE a='BB';
+SELECT DISTINCT a FROM t1 WHERE a LIKE 'B%';
+SELECT a FROM t1 WHERE a LIKE 'B%' GROUP BY a;
+
+DROP TABLE t1;
+
+
+#
+# Bug #15102: select distinct returns empty result, select count
+# distinct > 0 (correct)
+#
+
+CREATE TABLE t1 (
+ a int(11) NOT NULL DEFAULT '0',
+ b varchar(16) COLLATE latin1_general_ci NOT NULL DEFAULT '',
+ PRIMARY KEY (a,b)
+ ) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_general_ci;
+
+delimiter |;
+
+CREATE PROCEDURE a(x INT)
+BEGIN
+ DECLARE rnd INT;
+ DECLARE cnt INT;
+
+ WHILE x > 0 DO
+ SET rnd= x % 100;
+ SET cnt = (SELECT COUNT(*) FROM t1 WHERE a = rnd);
+ INSERT INTO t1(a,b) VALUES (rnd, CAST(cnt AS CHAR));
+ SET x= x - 1;
+ END WHILE;
+END|
+
+DELIMITER ;|
+
+CALL a(1000);
+
+SELECT a FROM t1 WHERE a=0;
+SELECT DISTINCT a FROM t1 WHERE a=0;
+SELECT COUNT(DISTINCT a) FROM t1 WHERE a=0;
+
+DROP TABLE t1;
+DROP PROCEDURE a;
+
+#
+# Bug #18068: SELECT DISTINCT
+#
+
+eval CREATE TABLE t1 (a varchar(64) NOT NULL default '', PRIMARY KEY(a)) engine=$engine;
+
+INSERT INTO t1 (a) VALUES
+ (''), ('CENTRAL'), ('EASTERN'), ('GREATER LONDON'),
+ ('NORTH CENTRAL'), ('NORTH EAST'), ('NORTH WEST'), ('SCOTLAND'),
+ ('SOUTH EAST'), ('SOUTH WEST'), ('WESTERN');
+
+EXPLAIN SELECT DISTINCT a,a FROM t1 ORDER BY a;
+SELECT DISTINCT a,a FROM t1 ORDER BY a;
+
+DROP TABLE t1;
+
+#
+# Bug #21007: NATURAL JOIN (any JOIN (2 x NATURAL JOIN)) crashes the server
+#
+
+eval CREATE TABLE t1 (id1 INT, id2 INT) engine=$engine;
+eval CREATE TABLE t2 (id2 INT, id3 INT, id5 INT) engine=$engine;
+eval CREATE TABLE t3 (id3 INT, id4 INT) engine=$engine;
+eval CREATE TABLE t4 (id4 INT) engine=$engine;
+eval CREATE TABLE t5 (id5 INT, id6 INT) engine=$engine;
+eval CREATE TABLE t6 (id6 INT) engine=$engine;
+
+INSERT INTO t1 VALUES(1,1);
+INSERT INTO t2 VALUES(1,1,1);
+INSERT INTO t3 VALUES(1,1);
+INSERT INTO t4 VALUES(1);
+INSERT INTO t5 VALUES(1,1);
+INSERT INTO t6 VALUES(1);
+
+# original bug query
+SELECT * FROM
+t1
+ NATURAL JOIN
+(t2 JOIN (t3 NATURAL JOIN t4, t5 NATURAL JOIN t6)
+ ON (t3.id3 = t2.id3 AND t5.id5 = t2.id5));
+
+# inner join swapped
+SELECT * FROM
+t1
+ NATURAL JOIN
+(((t3 NATURAL JOIN t4) join (t5 NATURAL JOIN t6) on t3.id4 = t5.id5) JOIN t2
+ ON (t3.id3 = t2.id3 AND t5.id5 = t2.id5));
+
+# one join less, no ON cond
+SELECT * FROM t1 NATURAL JOIN ((t3 join (t5 NATURAL JOIN t6)) JOIN t2);
+
+# wrong error message: 'id2' - ambiguous column
+SELECT * FROM
+(t2 JOIN (t3 NATURAL JOIN t4, t5 NATURAL JOIN t6)
+ ON (t3.id3 = t2.id3 AND t5.id5 = t2.id5))
+ NATURAL JOIN
+t1;
+SELECT * FROM
+(t2 JOIN ((t3 NATURAL JOIN t4) join (t5 NATURAL JOIN t6)))
+ NATURAL JOIN
+t1;
+
+DROP TABLE t1,t2,t3,t4,t5,t6;
+
+#
+# Bug#22342: No results returned for query using max and group by
+#
+eval CREATE TABLE t1 (a int, b int, PRIMARY KEY (a,b), KEY b (b)) engine=$engine;
+INSERT INTO t1 VALUES (1,1),(1,2),(1,0),(1,3);
+ANALYZE TABLE t1;
+
+explain SELECT MAX(b), a FROM t1 WHERE b < 2 AND a = 1 GROUP BY a;
+SELECT MAX(b), a FROM t1 WHERE b < 2 AND a = 1 GROUP BY a;
+SELECT MIN(b), a FROM t1 WHERE b > 1 AND a = 1 GROUP BY a;
+eval CREATE TABLE t2 (a int, b int, c int, PRIMARY KEY (a,b,c)) engine=$engine;
+INSERT INTO t2 SELECT a,b,b FROM t1;
+ANALYZE TABLE t2;
+explain SELECT MIN(c) FROM t2 WHERE b = 2 and a = 1 and c > 1 GROUP BY a;
+SELECT MIN(c) FROM t2 WHERE b = 2 and a = 1 and c > 1 GROUP BY a;
+
+DROP TABLE t1,t2;
+
+#
+# Bug#24156: Loose index scan not used with CREATE TABLE ...SELECT and similar statements
+#
+
+eval CREATE TABLE t1 (a INT, b INT, INDEX (a,b)) engine=$engine;
+INSERT INTO t1 (a, b) VALUES (1,1), (1,2), (1,3), (1,4), (1,5),
+ (2,2), (2,3), (2,1), (3,1), (4,1), (4,2), (4,3), (4,4), (4,5), (4,6);
+ANALYZE TABLE t1;
+EXPLAIN SELECT max(b), a FROM t1 GROUP BY a;
+FLUSH STATUS;
+SELECT max(b), a FROM t1 GROUP BY a;
+SHOW STATUS LIKE 'handler_read__e%';
+EXPLAIN SELECT max(b), a FROM t1 GROUP BY a;
+FLUSH STATUS;
+eval CREATE TABLE t2 engine=$engine SELECT max(b), a FROM t1 GROUP BY a;
+SHOW STATUS LIKE 'handler_read__e%';
+FLUSH STATUS;
+SELECT * FROM (SELECT max(b), a FROM t1 GROUP BY a) b;
+SHOW STATUS LIKE 'handler_read__e%';
+FLUSH STATUS;
+(SELECT max(b), a FROM t1 GROUP BY a) UNION
+ (SELECT max(b), a FROM t1 GROUP BY a);
+SHOW STATUS LIKE 'handler_read__e%';
+EXPLAIN (SELECT max(b), a FROM t1 GROUP BY a) UNION
+ (SELECT max(b), a FROM t1 GROUP BY a);
+
+EXPLAIN SELECT (SELECT max(b) FROM t1 GROUP BY a HAVING a < 2) x
+ FROM t1 AS t1_outer;
+EXPLAIN SELECT 1 FROM t1 AS t1_outer WHERE EXISTS
+ (SELECT max(b) FROM t1 GROUP BY a HAVING a < 2);
+EXPLAIN SELECT 1 FROM t1 AS t1_outer WHERE
+ (SELECT max(b) FROM t1 GROUP BY a HAVING a < 2) > 12;
+EXPLAIN SELECT 1 FROM t1 AS t1_outer WHERE
+ a IN (SELECT max(b) FROM t1 GROUP BY a HAVING a < 2);
+EXPLAIN SELECT 1 FROM t1 AS t1_outer GROUP BY a HAVING
+ a > (SELECT max(b) FROM t1 GROUP BY a HAVING a < 2);
+EXPLAIN SELECT 1 FROM t1 AS t1_outer1 JOIN t1 AS t1_outer2
+ ON t1_outer1.a = (SELECT max(b) FROM t1 GROUP BY a HAVING a < 2)
+ AND t1_outer1.b = t1_outer2.b;
+EXPLAIN SELECT (SELECT (SELECT max(b) FROM t1 GROUP BY a HAVING a < 2) x
+ FROM t1 AS t1_outer) x2 FROM t1 AS t1_outer2;
+
+CREATE TABLE t3 LIKE t1;
+FLUSH STATUS;
+INSERT INTO t3 SELECT a,MAX(b) FROM t1 GROUP BY a;
+SHOW STATUS LIKE 'handler_read__e%';
+DELETE FROM t3;
+FLUSH STATUS;
+INSERT INTO t3 SELECT 1, (SELECT MAX(b) FROM t1 GROUP BY a HAVING a < 2)
+ FROM t1 LIMIT 1;
+SHOW STATUS LIKE 'handler_read__e%';
+FLUSH STATUS;
+DELETE FROM t3 WHERE (SELECT MAX(b) FROM t1 GROUP BY a HAVING a < 2) > 10000;
+SHOW STATUS LIKE 'handler_read__e%';
+FLUSH STATUS;
+--error ER_SUBQUERY_NO_1_ROW
+DELETE FROM t3 WHERE (SELECT (SELECT MAX(b) FROM t1 GROUP BY a HAVING a < 2) x
+ FROM t1) > 10000;
+SHOW STATUS LIKE 'handler_read__e%';
+
+DROP TABLE t1,t2,t3;
+
+#
+# Bug#25602: queries with DISTINCT and SQL_BIG_RESULT hint
+# for which loose scan optimization is applied
+#
+
+eval CREATE TABLE t1 (a int, INDEX idx(a)) engine=$engine;
+INSERT INTO t1 VALUES
+ (4), (2), (1), (2), (4), (2), (1), (4),
+ (4), (2), (1), (2), (2), (4), (1), (4);
+ANALYZE TABLE t1;
+
+EXPLAIN SELECT DISTINCT(a) FROM t1;
+SELECT DISTINCT(a) FROM t1;
+EXPLAIN SELECT SQL_BIG_RESULT DISTINCT(a) FROM t1;
+SELECT SQL_BIG_RESULT DISTINCT(a) FROM t1;
+
+DROP TABLE t1;
+
+#
+# Bug #32268: Indexed queries give bogus MIN and MAX results
+#
+
+eval CREATE TABLE t1 (a INT, b INT) engine=$engine;
+INSERT INTO t1 (a, b) VALUES (1,1), (1,2), (1,3);
+INSERT INTO t1 SELECT a + 1, b FROM t1;
+INSERT INTO t1 SELECT a + 2, b FROM t1;
+ANALYZE TABLE t1;
+
+EXPLAIN
+SELECT a, MIN(b), MAX(b) FROM t1 GROUP BY a ORDER BY a DESC;
+SELECT a, MIN(b), MAX(b) FROM t1 GROUP BY a ORDER BY a DESC;
+
+CREATE INDEX break_it ON t1 (a, b);
+
+EXPLAIN
+SELECT a, MIN(b), MAX(b) FROM t1 GROUP BY a ORDER BY a;
+SELECT a, MIN(b), MAX(b) FROM t1 GROUP BY a ORDER BY a;
+
+EXPLAIN
+SELECT a, MIN(b), MAX(b) FROM t1 GROUP BY a ORDER BY a DESC;
+SELECT a, MIN(b), MAX(b) FROM t1 GROUP BY a ORDER BY a DESC;
+
+EXPLAIN
+SELECT a, MIN(b), MAX(b), AVG(b) FROM t1 GROUP BY a ORDER BY a DESC;
+SELECT a, MIN(b), MAX(b), AVG(b) FROM t1 GROUP BY a ORDER BY a DESC;
+
+DROP TABLE t1;
+
+#
+# Bug#38195: Incorrect handling of aggregate functions when loose index scan is
+# used causes server crash.
+#
+create table t1 (a int, b int, primary key (a,b), key `index` (a,b)) engine=MyISAM;
+insert into t1 (a,b) values
+(0,0),(0,1),(0,2),(0,3),(0,4),(0,5),(0,6),
+ (0,7),(0,8),(0,9),(0,10),(0,11),(0,12),(0,13),
+(1,0),(1,1),(1,2),(1,3),(1,4),(1,5),(1,6),
+ (1,7),(1,8),(1,9),(1,10),(1,11),(1,12),(1,13),
+(2,0),(2,1),(2,2),(2,3),(2,4),(2,5),(2,6),
+ (2,7),(2,8),(2,9),(2,10),(2,11),(2,12),(2,13),
+(3,0),(3,1),(3,2),(3,3),(3,4),(3,5),(3,6),
+ (3,7),(3,8),(3,9),(3,10),(3,11),(3,12),(3,13);
+insert into t1 (a,b) select a, max(b)+1 from t1 where a = 0 group by a;
+ANALYZE TABLE t1;
+select * from t1;
+explain extended select sql_buffer_result a, max(b)+1 from t1 where a = 0 group by a;
+drop table t1;
+
+
+#
+# Bug #41610: key_infix_len can be overwritten causing some group by queries
+# to return no rows
+#
+
+eval CREATE TABLE t1 (a int, b int, c int, d int,
+ KEY foo (c,d,a,b), KEY bar (c,a,b,d)) engine=$engine;
+
+INSERT INTO t1 VALUES (1, 1, 1, 1), (1, 1, 1, 2), (1, 1, 1, 3), (1, 1, 1, 4);
+INSERT INTO t1 SELECT * FROM t1;
+INSERT INTO t1 SELECT * FROM t1;
+INSERT INTO t1 SELECT a,b,c+1,d FROM t1;
+ANALYZE TABLE t1;
+
+#Should be non-empty
+EXPLAIN SELECT DISTINCT c FROM t1 WHERE d=4;
+SELECT DISTINCT c FROM t1 WHERE d=4;
+
+DROP TABLE t1;
+
+--echo #
+--echo # Bug #45386: Wrong query result with MIN function in field list,
+--echo # WHERE and GROUP BY clause
+--echo #
+
+eval CREATE TABLE t (a INT, b INT, INDEX (a,b)) engine=$engine;
+INSERT INTO t VALUES (2,0), (2,0), (2,1), (2,1);
+INSERT INTO t SELECT * FROM t;
+INSERT INTO t SELECT * FROM t;
+ANALYZE TABLE t;
+
+--echo # test MIN
+--echo #should use range with index for group by
+EXPLAIN
+SELECT a, MIN(b) FROM t WHERE b <> 0 GROUP BY a;
+--echo #should return 1 row
+SELECT a, MIN(b) FROM t WHERE b <> 0 GROUP BY a;
+
+--echo # test MAX
+--echo #should use range with index for group by
+EXPLAIN
+SELECT a, MAX(b) FROM t WHERE b <> 1 GROUP BY a;
+--echo #should return 1 row
+SELECT a, MAX(b) FROM t WHERE b <> 1 GROUP BY a;
+
+--echo # test 3 ranges and use the middle one
+INSERT INTO t SELECT a, 2 FROM t;
+
+--echo #should use range with index for group by
+EXPLAIN
+SELECT a, MAX(b) FROM t WHERE b > 0 AND b < 2 GROUP BY a;
+--echo #should return 1 row
+SELECT a, MAX(b) FROM t WHERE b > 0 AND b < 2 GROUP BY a;
+
+DROP TABLE t;
+
+--echo #
+--echo # Bug #48472: Loose index scan inappropriately chosen for some WHERE
+--echo # conditions
+--echo #
+
+eval CREATE TABLE t (a INT, b INT, INDEX (a,b)) engine=$engine;
+INSERT INTO t VALUES (2,0), (2,0), (2,1), (2,1);
+INSERT INTO t SELECT * FROM t;
+ANALYZE TABLE t;
+
+SELECT a, MAX(b) FROM t WHERE 0=b+0 GROUP BY a;
+
+DROP TABLE t;
+
+--echo End of 5.0 tests
+
+--echo #
+--echo # Bug #46607: Assertion failed: (cond_type == Item::FUNC_ITEM) results in
+--echo # server crash
+--echo #
+
+eval CREATE TABLE t (a INT, b INT, INDEX (a,b)) engine=$engine;
+INSERT INTO t VALUES (2,0), (2,0), (2,1), (2,1);
+INSERT INTO t SELECT * FROM t;
+
+SELECT a, MAX(b) FROM t WHERE b GROUP BY a;
+
+DROP TABLE t;
+
+#
+# BUG#49902 - SELECT returns incorrect results
+#
+eval CREATE TABLE t1(a INT NOT NULL, b INT NOT NULL, KEY (b)) engine=$engine;
+INSERT INTO t1 VALUES(1,1),(2,1);
+ANALYZE TABLE t1;
+SELECT 1 AS c, b FROM t1 WHERE b IN (1,2) GROUP BY c, b;
+SELECT a FROM t1 WHERE b=1;
+DROP TABLE t1;
+
+--echo #
+--echo # Bug#47762: Incorrect result from MIN() when WHERE tests NOT NULL column
+--echo # for NULL
+--echo #
+
+--echo ## Test for NULLs allowed
+eval CREATE TABLE t1 ( a INT, KEY (a) ) engine=$engine;
+INSERT INTO t1 VALUES (1), (2), (3);
+ANALYZE TABLE t1;
+--source include/min_null_cond.inc
+INSERT INTO t1 VALUES (NULL), (NULL);
+ANALYZE TABLE t1;
+--source include/min_null_cond.inc
+DROP TABLE t1;
+
+--echo ## Test for NOT NULLs
+eval CREATE TABLE t1 ( a INT NOT NULL PRIMARY KEY) engine=$engine;
+INSERT INTO t1 VALUES (1), (2), (3);
+ANALYZE TABLE t1;
+--echo #
+--echo # NULL-safe operator test disabled for non-NULL indexed columns.
+--echo #
+--echo # See bugs
+--echo #
+--echo # - Bug#52173: Reading NULL value from non-NULL index gives
+--echo # wrong result in embedded server
+--echo #
+--echo # - Bug#52174: Sometimes wrong plan when reading a MAX value from
+--echo # non-NULL index
+--echo #
+--let $skip_null_safe_test= 1
+--source include/min_null_cond.inc
+DROP TABLE t1;
+
+--echo #
+--echo # Bug#53859: Valgrind: opt_sum_query(TABLE_LIST*, List<Item>&, Item*) at
+--echo # opt_sum.cc:305
+--echo #
+eval CREATE TABLE t1 ( a INT, KEY (a) ) engine=$engine;
+INSERT INTO t1 VALUES (1), (2), (3);
+
+SELECT MIN( a ) AS min_a
+FROM t1
+WHERE a > 1 AND a IS NULL
+ORDER BY min_a;
+
+DROP TABLE t1;
+
+
+--echo End of 5.1 tests
+
+
+--echo #
+--echo # WL#3220 (Loose index scan for COUNT DISTINCT)
+--echo #
+
+eval CREATE TABLE t1 (a INT, b INT, c INT, KEY (a,b)) engine=$engine;
+INSERT INTO t1 VALUES (1,1,1), (1,2,1), (1,3,1), (1,4,1);
+INSERT INTO t1 SELECT a, b + 4, 1 FROM t1;
+INSERT INTO t1 SELECT a + 1, b, 1 FROM t1;
+ANALYZE TABLE t1;
+eval CREATE TABLE t2 (a INT, b INT, c INT, d INT, e INT, f INT, KEY (a,b,c)) engine=$engine;
+INSERT INTO t2 VALUES (1,1,1,1,1,1), (1,2,1,1,1,1), (1,3,1,1,1,1),
+ (1,4,1,1,1,1);
+INSERT INTO t2 SELECT a, b + 4, c,d,e,f FROM t2;
+INSERT INTO t2 SELECT a + 1, b, c,d,e,f FROM t2;
+ANALYZE TABLE t2;
+
+EXPLAIN SELECT COUNT(DISTINCT a) FROM t1;
+SELECT COUNT(DISTINCT a) FROM t1;
+
+EXPLAIN SELECT COUNT(DISTINCT a,b) FROM t1;
+SELECT COUNT(DISTINCT a,b) FROM t1;
+
+EXPLAIN SELECT COUNT(DISTINCT b,a) FROM t1;
+SELECT COUNT(DISTINCT b,a) FROM t1;
+
+EXPLAIN SELECT COUNT(DISTINCT b) FROM t1;
+SELECT COUNT(DISTINCT b) FROM t1;
+
+EXPLAIN SELECT COUNT(DISTINCT a) FROM t1 GROUP BY a;
+SELECT COUNT(DISTINCT a) FROM t1 GROUP BY a;
+
+EXPLAIN SELECT COUNT(DISTINCT b) FROM t1 GROUP BY a;
+SELECT COUNT(DISTINCT b) FROM t1 GROUP BY a;
+
+EXPLAIN SELECT COUNT(DISTINCT a) FROM t1 GROUP BY b;
+SELECT COUNT(DISTINCT a) FROM t1 GROUP BY b;
+
+EXPLAIN SELECT DISTINCT COUNT(DISTINCT a) FROM t1;
+SELECT DISTINCT COUNT(DISTINCT a) FROM t1;
+
+EXPLAIN SELECT COUNT(DISTINCT a, b + 0) FROM t1;
+SELECT COUNT(DISTINCT a, b + 0) FROM t1;
+
+EXPLAIN SELECT COUNT(DISTINCT a) FROM t1 HAVING COUNT(DISTINCT b) < 10;
+SELECT COUNT(DISTINCT a) FROM t1 HAVING COUNT(DISTINCT b) < 10;
+
+EXPLAIN SELECT COUNT(DISTINCT a) FROM t1 HAVING COUNT(DISTINCT c) < 10;
+SELECT COUNT(DISTINCT a) FROM t1 HAVING COUNT(DISTINCT c) < 10;
+
+EXPLAIN SELECT 1 FROM t1 HAVING COUNT(DISTINCT a) < 10;
+SELECT 1 FROM t1 HAVING COUNT(DISTINCT a) < 10;
+
+EXPLAIN SELECT 1 FROM t1 GROUP BY a HAVING COUNT(DISTINCT b) > 1;
+SELECT 1 FROM t1 GROUP BY a HAVING COUNT(DISTINCT b) > 1;
+
+EXPLAIN SELECT COUNT(DISTINCT t1_1.a) FROM t1 t1_1, t1 t1_2 GROUP BY t1_1.a;
+SELECT COUNT(DISTINCT t1_1.a) FROM t1 t1_1, t1 t1_2 GROUP BY t1_1.a;
+
+EXPLAIN SELECT COUNT(DISTINCT a), 12 FROM t1;
+SELECT COUNT(DISTINCT a), 12 FROM t1;
+
+EXPLAIN SELECT COUNT(DISTINCT a, b, c) FROM t2;
+SELECT COUNT(DISTINCT a, b, c) FROM t2;
+
+EXPLAIN SELECT COUNT(DISTINCT a), SUM(DISTINCT a), AVG(DISTINCT a) FROM t2;
+SELECT COUNT(DISTINCT a), SUM(DISTINCT a), AVG(DISTINCT a) FROM t2;
+
+EXPLAIN SELECT COUNT(DISTINCT a), SUM(DISTINCT a), AVG(DISTINCT f) FROM t2;
+SELECT COUNT(DISTINCT a), SUM(DISTINCT a), AVG(DISTINCT f) FROM t2;
+
+EXPLAIN SELECT COUNT(DISTINCT a, b), COUNT(DISTINCT b, a) FROM t2;
+SELECT COUNT(DISTINCT a, b), COUNT(DISTINCT b, a) FROM t2;
+
+EXPLAIN SELECT COUNT(DISTINCT a, b), COUNT(DISTINCT b, f) FROM t2;
+SELECT COUNT(DISTINCT a, b), COUNT(DISTINCT b, f) FROM t2;
+
+EXPLAIN SELECT COUNT(DISTINCT a, b), COUNT(DISTINCT b, d) FROM t2;
+SELECT COUNT(DISTINCT a, b), COUNT(DISTINCT b, d) FROM t2;
+
+EXPLAIN SELECT a, c, COUNT(DISTINCT c, a, b) FROM t2 GROUP BY a, b, c;
+SELECT a, c, COUNT(DISTINCT c, a, b) FROM t2 GROUP BY a, b, c;
+
+EXPLAIN SELECT COUNT(DISTINCT c, a, b) FROM t2
+ WHERE a > 5 AND b BETWEEN 10 AND 20 GROUP BY a, b, c;
+SELECT COUNT(DISTINCT c, a, b) FROM t2
+ WHERE a > 5 AND b BETWEEN 10 AND 20 GROUP BY a, b, c;
+
+EXPLAIN SELECT COUNT(DISTINCT b), SUM(DISTINCT b) FROM t2 WHERE a = 5
+ GROUP BY b;
+SELECT COUNT(DISTINCT b), SUM(DISTINCT b) FROM t2 WHERE a = 5
+ GROUP BY b;
+
+EXPLAIN SELECT a, COUNT(DISTINCT b), SUM(DISTINCT b) FROM t2 GROUP BY a;
+SELECT a, COUNT(DISTINCT b), SUM(DISTINCT b) FROM t2 GROUP BY a;
+
+EXPLAIN SELECT COUNT(DISTINCT b), SUM(DISTINCT b) FROM t2 GROUP BY a;
+SELECT COUNT(DISTINCT b), SUM(DISTINCT b) FROM t2 GROUP BY a;
+
+EXPLAIN SELECT COUNT(DISTINCT a, b) FROM t2 WHERE c = 13 AND d = 42;
+SELECT COUNT(DISTINCT a, b) FROM t2 WHERE c = 13 AND d = 42;
+
+EXPLAIN SELECT a, COUNT(DISTINCT a), SUM(DISTINCT a) FROM t2
+ WHERE b = 13 AND c = 42 GROUP BY a;
+SELECT a, COUNT(DISTINCT a), SUM(DISTINCT a) FROM t2
+ WHERE b = 13 AND c = 42 GROUP BY a;
+
+--echo # This query could have been resolved using loose index scan since
+--echo # the second part of count(..) is defined by a constant predicate
+EXPLAIN SELECT COUNT(DISTINCT a, b), SUM(DISTINCT a) FROM t2 WHERE b = 42;
+SELECT COUNT(DISTINCT a, b), SUM(DISTINCT a) FROM t2 WHERE b = 42;
+
+EXPLAIN SELECT SUM(DISTINCT a), MAX(b) FROM t2 GROUP BY a;
+SELECT SUM(DISTINCT a), MAX(b) FROM t2 GROUP BY a;
+
+EXPLAIN SELECT 42 * (a + c + COUNT(DISTINCT c, a, b)) FROM t2 GROUP BY a, b, c;
+SELECT 42 * (a + c + COUNT(DISTINCT c, a, b)) FROM t2 GROUP BY a, b, c;
+
+EXPLAIN SELECT (SUM(DISTINCT a) + MAX(b)) FROM t2 GROUP BY a;
+SELECT (SUM(DISTINCT a) + MAX(b)) FROM t2 GROUP BY a;
+
+DROP TABLE t1,t2;
+
+--echo # end of WL#3220 tests
+
+--echo #
+--echo # Bug#50539: Wrong result when loose index scan is used for an aggregate
+--echo # function with distinct
+--echo #
+eval CREATE TABLE t1 (
+ f1 int(11) NOT NULL DEFAULT '0',
+ f2 char(1) NOT NULL DEFAULT '',
+ PRIMARY KEY (f1,f2)
+) engine=$engine;
+insert into t1 values(1,'A'),(1 , 'B'), (1, 'C'), (2, 'A'),
+(3, 'A'), (3, 'B'), (3, 'C'), (3, 'D');
+ANALYZE TABLE t1;
+
+SELECT f1, COUNT(DISTINCT f2) FROM t1 GROUP BY f1;
+explain SELECT f1, COUNT(DISTINCT f2) FROM t1 GROUP BY f1;
+
+drop table t1;
+--echo # End of test#50539.
+
+--echo #
+--echo # Bug#17217128 - BAD INTERACTION BETWEEN MIN/MAX AND
+--echo # "HAVING SUM(DISTINCT)": WRONG RESULTS.
+--echo #
+
+eval CREATE TABLE t (a INT, b INT, KEY(a,b)) engine=$engine;
+INSERT INTO t VALUES (1,1), (2,2), (3,3), (4,4), (1,0), (3,2), (4,5);
+ANALYZE TABLE t;
+# MariaDB: 10.2 doesn't have trace, yet: let $DEFAULT_TRACE_MEM_SIZE=1048576; # 1MB
+# eval set optimizer_trace_max_mem_size=$DEFAULT_TRACE_MEM_SIZE;
+# set @@session.optimizer_trace='enabled=on';
+# set end_markers_in_json=on;
+
+ANALYZE TABLE t;
+
+SELECT a, SUM(DISTINCT a), MIN(b) FROM t GROUP BY a;
+EXPLAIN SELECT a, SUM(DISTINCT a), MIN(b) FROM t GROUP BY a;
+#SELECT TRACE RLIKE 'have_both_agg_distinct_and_min_max' AS OK
+# FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
+
+SELECT a, SUM(DISTINCT a), MAX(b) FROM t GROUP BY a;
+EXPLAIN SELECT a, SUM(DISTINCT a), MAX(b) FROM t GROUP BY a;
+#SELECT TRACE RLIKE 'have_both_agg_distinct_and_min_max' AS OK
+# FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
+
+SELECT a, MAX(b) FROM t GROUP BY a HAVING SUM(DISTINCT a);
+EXPLAIN SELECT a, MAX(b) FROM t GROUP BY a HAVING SUM(DISTINCT a);
+#SELECT TRACE RLIKE 'have_both_agg_distinct_and_min_max' AS OK
+# FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
+
+SELECT SUM(DISTINCT a), MIN(b), MAX(b) FROM t;
+EXPLAIN SELECT SUM(DISTINCT a), MIN(b), MAX(b) FROM t;
+#SELECT TRACE RLIKE 'have_both_agg_distinct_and_min_max' AS OK
+# FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
+
+SELECT a, SUM(DISTINCT a), MIN(b), MAX(b) FROM t GROUP BY a;
+EXPLAIN SELECT a, SUM(DISTINCT a), MIN(b), MAX(b) FROM t GROUP BY a;
+#SELECT TRACE RLIKE 'have_both_agg_distinct_and_min_max' AS OK
+# FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
+
+#SET optimizer_trace_max_mem_size=DEFAULT;
+#SET optimizer_trace=DEFAULT;
+#SET end_markers_in_json=DEFAULT;
+
+DROP TABLE t;
+
+--echo #
+--echo # Bug#18109609: LOOSE INDEX SCAN IS NOT USED WHEN IT SHOULD
+--echo #
+
+eval CREATE TABLE t1 (
+id INT AUTO_INCREMENT PRIMARY KEY,
+c1 INT,
+c2 INT,
+KEY(c1,c2)) engine=$engine;
+
+INSERT INTO t1(c1,c2) VALUES
+(1, 1), (1,2), (2,1), (2,2), (3,1), (3,2), (3,3), (4,1), (4,2), (4,3),
+(4,4), (4,5), (4,6), (4,7), (4,8), (4,9), (4,10), (4,11), (4,12), (4,13),
+(4,14), (4,15), (4,16), (4,17), (4,18), (4,19), (4,20),(5,5);
+ANALYZE TABLE t1;
+
+EXPLAIN SELECT MAX(c2), c1 FROM t1 WHERE c1 = 4 GROUP BY c1;
+FLUSH STATUS;
+SELECT MAX(c2), c1 FROM t1 WHERE c1 = 4 GROUP BY c1;
+SHOW SESSION STATUS LIKE 'Handler_read%';
+
+DROP TABLE t1;
+
+--echo # End of test for Bug#18109609
diff --git a/storage/rocksdb/mysql-test/rocksdb/include/have_direct_io.inc b/storage/rocksdb/mysql-test/rocksdb/include/have_direct_io.inc
new file mode 100644
index 00000000000..d9b4b46b25a
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/include/have_direct_io.inc
@@ -0,0 +1,23 @@
+# Common test pattern for options that control direct i/o
+#
+# Required input:
+# $io_option - name and assignment to enable on server command line
+
+--perl
+use Cwd 'abs_path';
+
+open(FILE, ">", "$ENV{MYSQL_TMP_DIR}/data_in_shm.inc") or die;
+my $real_path= abs_path($ENV{'MYSQLTEST_VARDIR'});
+my $in_shm= (index($real_path, "/dev/shm") != -1) ||
+ (index($real_path, "/run/shm") != -1);
+print FILE "let \$DATA_IN_SHM= $in_shm;\n";
+close FILE;
+EOF
+
+--source $MYSQL_TMP_DIR/data_in_shm.inc
+--remove_file $MYSQL_TMP_DIR/data_in_shm.inc
+
+if ($DATA_IN_SHM)
+{
+ --skip DATADIR is in /{dev|run}/shm, possibly due to --mem
+}
diff --git a/storage/rocksdb/mysql-test/rocksdb/include/have_rocksdb.inc b/storage/rocksdb/mysql-test/rocksdb/include/have_rocksdb.inc
new file mode 100644
index 00000000000..1f762d38c64
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/include/have_rocksdb.inc
@@ -0,0 +1,10 @@
+if (`SELECT COUNT(*) = 0 FROM INFORMATION_SCHEMA.ENGINES WHERE engine = 'rocksdb' AND support IN ('YES', 'DEFAULT', 'ENABLED')`)
+{
+ --skip Test requires engine RocksDB.
+}
+
+--disable_query_log
+# Table statistics can vary depending on when the memtables are flushed, so
+# flush them at the beginning of the test to ensure the test runs consistently.
+set global rocksdb_force_flush_memtable_now = true;
+--enable_query_log
diff --git a/storage/rocksdb/mysql-test/rocksdb/include/have_rocksdb.opt b/storage/rocksdb/mysql-test/rocksdb/include/have_rocksdb.opt
new file mode 100644
index 00000000000..36d7dda1609
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/include/have_rocksdb.opt
@@ -0,0 +1,12 @@
+--loose-enable-rocksdb
+--loose-enable-rocksdb_global_info
+--loose-enable-rocksdb_ddl
+--loose-enable-rocksdb_cf_options
+--loose-enable_rocksdb_perf_context
+--loose-enable_rocksdb_perf_context_global
+--loose-enable-rocksdb_index_file_map
+--loose-enable-rocksdb_dbstats
+--loose-enable-rocksdb_cfstats
+--loose-enable-rocksdb_lock_info
+--loose-enable-rocksdb_trx
+--loose-enable-rocksdb_locks
diff --git a/storage/rocksdb/mysql-test/rocksdb/include/have_rocksdb_default.inc b/storage/rocksdb/mysql-test/rocksdb/include/have_rocksdb_default.inc
new file mode 100644
index 00000000000..2c50afd5014
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/include/have_rocksdb_default.inc
@@ -0,0 +1,10 @@
+if (`SELECT COUNT(*) = 0 FROM INFORMATION_SCHEMA.ENGINES WHERE engine = 'rocksdb'` AND support in ('DEFAULT')`)
+{
+ --skip Test requires engine RocksDB as default.
+}
+
+--disable_query_log
+# Table statistics can vary depending on when the memtables are flushed, so
+# flush them at the beginning of the test to ensure the test runs consistently.
+set global rocksdb_force_flush_memtable_now = true;
+--enable_query_log
diff --git a/storage/rocksdb/mysql-test/rocksdb/include/have_rocksdb_replication.inc b/storage/rocksdb/mysql-test/rocksdb/include/have_rocksdb_replication.inc
new file mode 100644
index 00000000000..92261211bf5
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/include/have_rocksdb_replication.inc
@@ -0,0 +1,11 @@
+# MARIAROCKS_NOT_YET: replication doesn't work yet:
+#if (`select count(*) = 0 from information_schema.tables where engine='rocksdb' and table_name='slave_gtid_info'`)
+#{
+# --skip Test requires default engine RocksDB
+#}
+
+--disable_query_log
+# Table statistics can vary depending on when the memtables are flushed, so
+# flush them at the beginning of the test to ensure the test runs consistently.
+set global rocksdb_force_flush_memtable_now = true;
+--enable_query_log
diff --git a/storage/rocksdb/mysql-test/rocksdb/include/have_write_committed.inc b/storage/rocksdb/mysql-test/rocksdb/include/have_write_committed.inc
new file mode 100644
index 00000000000..681b966f680
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/include/have_write_committed.inc
@@ -0,0 +1,3 @@
+if (`select count(*) = 0 from information_schema.session_variables where variable_name = 'rocksdb_write_policy' and variable_value = 'write_committed';`) {
+ --skip Test requires write_committed policy
+}
diff --git a/storage/rocksdb/mysql-test/rocksdb/include/have_write_prepared.inc b/storage/rocksdb/mysql-test/rocksdb/include/have_write_prepared.inc
new file mode 100644
index 00000000000..df088ea047d
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/include/have_write_prepared.inc
@@ -0,0 +1,3 @@
+if (`select count(*) = 0 from information_schema.session_variables where variable_name = 'rocksdb_write_policy' and variable_value = 'write_prepared';`) {
+ --skip Test requires write_prepared policy
+}
diff --git a/storage/rocksdb/mysql-test/rocksdb/include/index_merge1.inc b/storage/rocksdb/mysql-test/rocksdb/include/index_merge1.inc
new file mode 100644
index 00000000000..b5cf7bff763
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/include/index_merge1.inc
@@ -0,0 +1,910 @@
+# include/index_merge1.inc
+#
+# Index merge tests
+#
+# The variables
+# $engine_type -- storage engine to be tested
+# $merge_table_support -- 1 storage engine supports merge tables
+# -- 0 storage engine does not support merge tables
+# have to be set before sourcing this script.
+#
+# Note: The comments/expectations refer to MyISAM.
+# They might be not valid for other storage engines.
+#
+# Last update:
+# 2006-08-02 ML test refactored
+# old name was t/index_merge.test
+# main code went into include/index_merge1.inc
+#
+
+--echo #---------------- Index merge test 1 -------------------------------------------
+
+eval SET SESSION DEFAULT_STORAGE_ENGINE = $engine_type;
+
+--disable_warnings
+drop table if exists t0, t1, t2, t3, t4;
+--enable_warnings
+
+# Create and fill a table with simple keys
+create table t0
+(
+ key1 int not null,
+ key2 int not null,
+ key3 int not null,
+ key4 int not null,
+ key5 int not null,
+ key6 int not null,
+ key7 int not null,
+ key8 int not null,
+ INDEX i1(key1),
+ INDEX i2(key2),
+ INDEX i3(key3),
+ INDEX i4(key4),
+ INDEX i5(key5),
+ INDEX i6(key6),
+ INDEX i7(key7),
+ INDEX i8(key8)
+);
+
+--disable_query_log
+insert into t0 values (1,1,1,1,1,1,1,1023),(2,2,2,2,2,2,2,1022);
+
+let $1=9;
+set @d=2;
+while ($1)
+{
+ eval insert into t0 select key1+@d, key2+@d, key3+@d, key4+@d, key5+@d,
+ key6+@d, key7+@d, key8-@d from t0;
+ eval set @d=@d*2;
+ dec $1;
+}
+if ($engine_type == RocksDB)
+{
+ set global rocksdb_force_flush_memtable_now=1;
+}
+--enable_query_log
+
+analyze table t0;
+
+# 1. One index
+explain select * from t0 where key1 < 3 or key1 > 1020;
+
+# 2. Simple cases
+explain
+select * from t0 where key1 < 3 or key2 > 1020;
+select * from t0 where key1 < 3 or key2 > 1020;
+
+if ($index_merge_random_rows_in_EXPLAIN)
+{
+ --replace_column 9 #
+}
+explain select * from t0 where key1 < 2 or key2 <3;
+
+if ($index_merge_random_rows_in_EXPLAIN)
+{
+ --replace_column 9 #
+}
+explain
+select * from t0 where (key1 > 30 and key1<35) or (key2 >32 and key2 < 40);
+# Bug#21277: InnoDB, wrong result set, index_merge strategy, second index not evaluated
+select * from t0 where (key1 > 30 and key1<35) or (key2 >32 and key2 < 40);
+
+# 3. Check that index_merge doesn't break "ignore/force/use index"
+if ($index_merge_random_rows_in_EXPLAIN)
+{
+ --replace_column 9 #
+}
+explain select * from t0 ignore index (i2) where key1 < 3 or key2 <4;
+
+if ($index_merge_random_rows_in_EXPLAIN)
+{
+ --replace_column 9 #
+}
+explain select * from t0 where (key1 < 3 or key2 <4) and key3 = 50;
+
+if ($index_merge_random_rows_in_EXPLAIN)
+{
+ --replace_column 9 #
+}
+explain select * from t0 use index (i1,i2) where (key1 < 2 or key2 <3) and key3 = 50;
+
+if ($index_merge_random_rows_in_EXPLAIN)
+{
+ --replace_column 9 #
+}
+explain select * from t0 where (key1 > 1 or key2 > 2);
+
+if ($index_merge_random_rows_in_EXPLAIN)
+{
+ --replace_column 9 #
+}
+explain select * from t0 force index (i1,i2) where (key1 > 1 or key2 > 2);
+
+
+# 4. Check if conjuncts are grouped by keyuse
+if ($index_merge_random_rows_in_EXPLAIN)
+{
+ --replace_column 9 #
+}
+explain
+ select * from t0 where key1<2 or key2<3 or (key1>5 and key1<7) or
+ (key1>10 and key1<12) or (key2>100 and key2<102);
+
+# 5. Check index_merge with conjuncts that are always true/false
+# verify fallback to "range" if there is only one non-confluent condition
+if ($index_merge_random_rows_in_EXPLAIN)
+{
+ --replace_column 9 #
+}
+explain select * from t0 where key2 = 45 or key1 <=> null;
+
+if ($index_merge_random_rows_in_EXPLAIN)
+{
+ --replace_column 9 #
+}
+explain select * from t0 where key2 = 45 or key1 is not null;
+
+if ($index_merge_random_rows_in_EXPLAIN)
+{
+ --replace_column 9 #
+}
+explain select * from t0 where key2 = 45 or key1 is null;
+
+# the last conj. is always false and will be discarded
+if ($index_merge_random_rows_in_EXPLAIN)
+{
+ --replace_column 9 #
+}
+explain select * from t0 where key2=10 or key3=3 or key4 <=> null;
+
+# the last conj. is always true and will cause 'all' scan
+if ($index_merge_random_rows_in_EXPLAIN)
+{
+ --replace_column 9 #
+}
+explain select * from t0 where key2=10 or key3=3 or key4 is null;
+
+# some more complicated cases
+
+if ($index_merge_random_rows_in_EXPLAIN)
+{
+ --replace_column 9 #
+}
+explain select key1 from t0 where (key1 <=> null) or (key2 < 2) or
+ (key3=10) or (key4 <=> null);
+
+if ($index_merge_random_rows_in_EXPLAIN)
+{
+ --replace_column 9 #
+}
+explain select key1 from t0 where (key1 <=> null) or (key1 < 5) or
+ (key3=10) or (key4 <=> null);
+
+# 6.Several ways to do index_merge, (ignored) index_merge vs. range
+if ($index_merge_random_rows_in_EXPLAIN)
+{
+ --replace_column 9 #
+}
+explain select * from t0 where
+ (key1 < 2 or key2 < 2) and (key3 < 3 or key4 < 3) and (key5 < 5 or key6 < 5);
+
+if ($index_merge_random_rows_in_EXPLAIN)
+{
+ --replace_column 9 #
+}
+explain
+select * from t0 where (key1 < 2 or key2 < 4) and (key1 < 5 or key3 < 3);
+
+select * from t0 where (key1 < 2 or key2 < 4) and (key1 < 5 or key3 < 3);
+
+
+if ($index_merge_random_rows_in_EXPLAIN)
+{
+ --replace_column 9 #
+}
+explain select * from t0 where
+ (key1 < 3 or key2 < 2) and (key3 < 3 or key4 < 3) and (key5 < 2 or key6 < 2);
+
+if ($index_merge_random_rows_in_EXPLAIN)
+{
+ --replace_column 9 #
+}
+explain select * from t0 where
+ (key1 < 3 or key2 < 3) and (key3 < 70);
+
+if ($index_merge_random_rows_in_EXPLAIN)
+{
+ --replace_column 9 #
+}
+explain select * from t0 where
+ (key1 < 3 or key2 < 3) and (key3 < 1000);
+
+
+# 7. Complex cases
+# tree_or(List<SEL_IMERGE>, range SEL_TREE).
+if ($index_merge_random_rows_in_EXPLAIN)
+{
+ --replace_column 9 #
+}
+explain select * from t0 where
+ ((key1 < 3 or key2 < 3) and (key2 <4 or key3 < 3))
+ or
+ key2 > 4;
+
+if ($index_merge_random_rows_in_EXPLAIN)
+{
+ --replace_column 9 #
+}
+explain select * from t0 where
+ ((key1 < 4 or key2 < 4) and (key2 <4 or key3 < 3))
+ or
+ key1 < 5;
+
+select * from t0 where
+ ((key1 < 4 or key2 < 4) and (key2 <4 or key3 < 3))
+ or
+ key1 < 5;
+
+# tree_or(List<SEL_IMERGE>, List<SEL_IMERGE>).
+if ($index_merge_random_rows_in_EXPLAIN)
+{
+ --replace_column 9 #
+}
+explain select * from t0 where
+ ((key1 < 2 or key2 < 2) and (key3 <4 or key5 < 3))
+ or
+ ((key5 < 3 or key6 < 3) and (key7 <3 or key8 < 3));
+
+if ($index_merge_random_rows_in_EXPLAIN)
+{
+ --replace_column 9 #
+}
+explain select * from t0 where
+ ((key3 <3 or key5 < 4) and (key1 < 3 or key2 < 3))
+ or
+ ((key7 <5 or key8 < 3) and (key5 < 4 or key6 < 4));
+
+if ($index_merge_random_rows_in_EXPLAIN)
+{
+ --replace_column 9 #
+}
+explain select * from t0 where
+ ((key3 <3 or key5 < 4) and (key1 < 3 or key2 < 4))
+ or
+ ((key3 <4 or key5 < 2) and (key5 < 5 or key6 < 3));
+
+if ($index_merge_random_rows_in_EXPLAIN)
+{
+ --replace_column 9 #
+}
+explain select * from t0 where
+ ((key3 <4 or key5 < 3) and (key1 < 3 or key2 < 3))
+ or
+ (((key3 <5 and key7 < 5) or key5 < 2) and (key5 < 4 or key6 < 4));
+
+if ($index_merge_random_rows_in_EXPLAIN)
+{
+ --replace_column 9 #
+}
+explain select * from t0 where
+ ((key3 <5 or key5 < 4) and (key1 < 4 or key2 < 4))
+ or
+ ((key3 >5 or key5 < 2) and (key5 < 5 or key6 < 6));
+
+if ($index_merge_random_rows_in_EXPLAIN)
+{
+ --replace_column 9 #
+}
+explain select * from t0 force index(i1, i2, i3, i4, i5, i6 ) where
+ ((key3 <3 or key5 < 4) and (key1 < 3 or key2 < 3))
+ or
+ ((key3 >4 or key5 < 2) and (key5 < 5 or key6 < 4));
+
+# Can't merge any indexes here (predicate on key3 is always true)
+if ($index_merge_random_rows_in_EXPLAIN)
+{
+ --replace_column 9 #
+}
+explain select * from t0 force index(i1, i2, i3, i4, i5, i6 ) where
+ ((key3 <5 or key5 < 4) and (key1 < 4 or key2 < 4))
+ or
+ ((key3 >=5 or key5 < 2) and (key5 < 5 or key6 < 6));
+
+# 8. Verify that "order by" after index merge uses filesort
+select * from t0 where key1 < 3 or key8 < 2 order by key1;
+
+if ($index_merge_random_rows_in_EXPLAIN)
+{
+ --replace_column 9 #
+}
+explain
+select * from t0 where key1 < 3 or key8 < 2 order by key1;
+
+# 9. Check that index_merge cost is compared to 'index' where possible
+create table t2 like t0;
+insert into t2 select * from t0;
+
+alter table t2 add index i1_3(key1, key3);
+alter table t2 add index i2_3(key2, key3);
+alter table t2 drop index i1;
+alter table t2 drop index i2;
+alter table t2 add index i321(key3, key2, key1);
+
+-- disable_query_log
+-- disable_result_log
+analyze table t2;
+if ($engine_type == RocksDB)
+{
+ set global rocksdb_force_flush_memtable_now=1;
+}
+-- enable_result_log
+-- enable_query_log
+
+# index_merge vs 'index', index_merge is better.
+if ($index_merge_random_rows_in_EXPLAIN)
+{
+ --replace_column 9 #
+}
+explain select key3 from t2 where key1 = 100 or key2 = 100;
+
+# index_merge vs 'index', 'index' is better.
+if ($index_merge_random_rows_in_EXPLAIN)
+{
+ --replace_column 9 #
+}
+explain select key3 from t2 where key1 <100 or key2 < 100;
+
+# index_merge vs 'all', index_merge is better.
+if ($index_merge_random_rows_in_EXPLAIN)
+{
+ --replace_column 9 #
+}
+explain select key7 from t2 where key1 <100 or key2 < 100;
+
+# 10. Multipart keys.
+create table t4 (
+ key1a int not null,
+ key1b int not null,
+ key2 int not null,
+ key2_1 int not null,
+ key2_2 int not null,
+ key3 int not null,
+ index i1a (key1a, key1b),
+ index i1b (key1b, key1a),
+ index i2_1(key2, key2_1),
+ index i2_2(key2, key2_1)
+);
+
+insert into t4 select key1,key1,key1 div 10, key1 % 10, key1 % 10, key1 from t0;
+
+-- disable_query_log
+-- disable_result_log
+if ($engine_type == RocksDB)
+{
+ set global rocksdb_force_flush_memtable_now=1;
+}
+analyze table t4;
+-- enable_result_log
+-- enable_query_log
+
+# the following will be handled by index_merge:
+select * from t4 where key1a = 3 or key1b = 4;
+explain select * from t4 where key1a = 3 or key1b = 4;
+
+# and the following will not
+explain select * from t4 where key2 = 1 and (key2_1 = 1 or key3 = 5);
+
+explain select * from t4 where key2 = 1 and (key2_1 = 1 or key2_2 = 5);
+
+if ($index_merge_random_rows_in_EXPLAIN)
+{
+ --replace_column 9 #
+}
+explain select * from t4 where key2_1 = 1 or key2_2 = 5;
+
+
+# 11. Multitable selects
+create table t1 like t0;
+insert into t1 select * from t0;
+
+-- disable_query_log
+-- disable_result_log
+if ($engine_type == RocksDB)
+{
+ set global rocksdb_force_flush_memtable_now=1;
+}
+analyze table t1;
+-- enable_result_log
+-- enable_query_log
+
+# index_merge on first table in join
+if ($index_merge_random_rows_in_EXPLAIN)
+{
+ --replace_column 9 #
+}
+explain select * from t0 left join t1 on (t0.key1=t1.key1)
+ where t0.key1=3 or t0.key2=4;
+
+select * from t0 left join t1 on (t0.key1=t1.key1)
+ where t0.key1=3 or t0.key2=4;
+
+if ($index_merge_random_rows_in_EXPLAIN)
+{
+ --replace_column 9 #
+}
+explain
+select * from t0,t1 where (t0.key1=t1.key1) and ( t0.key1=3 or t0.key2=4);
+
+# index_merge vs. ref
+if (!$index_merge_random_rows_in_EXPLAIN)
+{
+#this plan varies too much for InnoDB
+explain
+select * from t0,t1 where (t0.key1=t1.key1) and
+ (t0.key1=3 or t0.key2=4) and t1.key1<200;
+}
+
+# index_merge vs. ref
+explain
+select * from t0,t1 where (t0.key1=t1.key1) and
+ (t0.key1=3 or t0.key2<4) and t1.key1=2;
+
+# index_merge on second table in join
+explain select * from t0,t1 where t0.key1 = 5 and
+ (t1.key1 = t0.key1 or t1.key8 = t0.key1);
+
+# Fix for bug#1974
+if ($index_merge_random_rows_in_EXPLAIN)
+{
+ --replace_column 9 #
+}
+explain select * from t0,t1 where t0.key1 < 3 and
+ (t1.key1 = t0.key1 or t1.key8 = t0.key1);
+
+# index_merge inside union
+explain select * from t1 where key1=3 or key2=4
+ union select * from t1 where key1<4 or key3=5;
+
+# index merge in subselect
+explain select * from (select * from t1 where key1 = 3 or key2 =3) as Z where key8 >5;
+
+# 12. check for long index_merges.
+create table t3 like t0;
+insert into t3 select * from t0;
+alter table t3 add key9 int not null, add index i9(key9);
+alter table t3 add keyA int not null, add index iA(keyA);
+alter table t3 add keyB int not null, add index iB(keyB);
+alter table t3 add keyC int not null, add index iC(keyC);
+update t3 set key9=key1,keyA=key1,keyB=key1,keyC=key1;
+
+-- disable_query_log
+-- disable_result_log
+if ($engine_type == RocksDB)
+{
+ set global rocksdb_force_flush_memtable_now=1;
+}
+analyze table t3;
+-- enable_result_log
+-- enable_query_log
+
+explain select * from t3 where
+ key1=1 or key2=2 or key3=3 or key4=4 or
+ key5=5 or key6=6 or key7=7 or key8=8 or
+ key9=9 or keyA=10 or keyB=11 or keyC=12;
+
+select * from t3 where
+ key1=1 or key2=2 or key3=3 or key4=4 or
+ key5=5 or key6=6 or key7=7 or key8=8 or
+ key9=9 or keyA=10 or keyB=11 or keyC=12;
+
+# Test for Bug#3183
+explain select * from t0 where key1 < 3 or key2 < 4;
+# Bug#21277: InnoDB, wrong result set, index_merge strategy, second index not evaluated
+select * from t0 where key1 < 3 or key2 < 4;
+
+update t0 set key8=123 where key1 < 3 or key2 < 4;
+
+-- disable_query_log
+-- disable_result_log
+if ($engine_type == RocksDB)
+{
+ set global rocksdb_force_flush_memtable_now=1;
+}
+analyze table t0;
+-- enable_result_log
+-- enable_query_log
+
+# Bug#21277: InnoDB, wrong result set, index_merge strategy, second index not evaluated
+select * from t0 where key1 < 3 or key2 < 4;
+
+delete from t0 where key1 < 3 or key2 < 4;
+-- disable_query_log
+-- disable_result_log
+if ($engine_type == RocksDB)
+{
+ set global rocksdb_force_flush_memtable_now=1;
+}
+analyze table t0;
+-- enable_result_log
+-- enable_query_log
+
+select * from t0 where key1 < 3 or key2 < 4;
+select count(*) from t0;
+
+# Test for BUG#4177
+drop table t4;
+create table t4 (a int);
+insert into t4 values (1),(4),(3);
+-- disable_query_log
+-- disable_result_log
+if ($engine_type == RocksDB)
+{
+ set global rocksdb_force_flush_memtable_now=1;
+}
+analyze table t4;
+-- enable_result_log
+-- enable_query_log
+
+set @save_join_buffer_size=@@join_buffer_size;
+set join_buffer_size= 4096;
+
+if ($index_merge_random_rows_in_EXPLAIN)
+{
+ --replace_column 9 #
+}
+explain select max(A.key1 + B.key1 + A.key2 + B.key2 + A.key3 + B.key3 + A.key4 + B.key4 + A.key5 + B.key5)
+ from t0 as A force index(i1,i2), t0 as B force index (i1,i2)
+ where (A.key1 < 500000 or A.key2 < 3)
+ and (B.key1 < 500000 or B.key2 < 3);
+
+select max(A.key1 + B.key1 + A.key2 + B.key2 + A.key3 + B.key3 + A.key4 + B.key4 + A.key5 + B.key5)
+ from t0 as A force index(i1,i2), t0 as B force index (i1,i2)
+ where (A.key1 < 500000 or A.key2 < 3)
+ and (B.key1 < 500000 or B.key2 < 3);
+
+update t0 set key1=1;
+-- disable_query_log
+-- disable_result_log
+if ($engine_type == RocksDB)
+{
+ set global rocksdb_force_flush_memtable_now=1;
+}
+analyze table t0;
+-- enable_result_log
+-- enable_query_log
+
+if ($index_merge_random_rows_in_EXPLAIN)
+{
+ --replace_column 9 #
+}
+explain select max(A.key1 + B.key1 + A.key2 + B.key2 + A.key3 + B.key3 + A.key4 + B.key4 + A.key5 + B.key5)
+ from t0 as A force index(i1,i2), t0 as B force index (i1,i2)
+ where (A.key1 = 1 or A.key2 = 1)
+ and (B.key1 = 1 or B.key2 = 1);
+
+select max(A.key1 + B.key1 + A.key2 + B.key2 + A.key3 + B.key3 + A.key4 + B.key4 + A.key5 + B.key5)
+ from t0 as A force index(i1,i2), t0 as B force index (i1,i2)
+ where (A.key1 = 1 or A.key2 = 1)
+ and (B.key1 = 1 or B.key2 = 1);
+
+alter table t0 add filler1 char(200), add filler2 char(200), add filler3 char(200);
+update t0 set key2=1, key3=1, key4=1, key5=1,key6=1,key7=1 where key7 < 500;
+
+-- disable_query_log
+-- disable_result_log
+if ($engine_type == RocksDB)
+{
+ set global rocksdb_force_flush_memtable_now=1;
+}
+analyze table t0;
+-- enable_result_log
+-- enable_query_log
+
+# The next query will not use index i7 in intersection if the OS doesn't
+# support file sizes > 2GB. (ha_myisam::ref_length depends on this and index
+# scan cost estimates depend on ha_myisam::ref_length)
+if (!$index_merge_random_rows_in_EXPLAIN)
+{
+ # Too unstable for innodb
+ --replace_column 9 #
+ --replace_result "4,4,4,4,4,4,4" X "4,4,4,4,4,4" X "i6,i7" "i6,i7?" "i6" "i6,i7?"
+ explain select max(A.key1 + B.key1 + A.key2 + B.key2 + A.key3 + B.key3 + A.key4 + B.key4 + A.key5 + B.key5)
+ from t0 as A, t0 as B
+ where (A.key1 = 1 and A.key2 = 1 and A.key3 = 1 and A.key4=1 and A.key5=1 and A.key6=1 and A.key7 = 1 or A.key8=1)
+ and (B.key1 = 1 and B.key2 = 1 and B.key3 = 1 and B.key4=1 and B.key5=1 and B.key6=1 and B.key7 = 1 or B.key8=1);
+}
+select max(A.key1 + B.key1 + A.key2 + B.key2 + A.key3 + B.key3 + A.key4 + B.key4 + A.key5 + B.key5)
+ from t0 as A, t0 as B
+ where (A.key1 = 1 and A.key2 = 1 and A.key3 = 1 and A.key4=1 and A.key5=1 and A.key6=1 and A.key7 = 1 or A.key8=1)
+ and (B.key1 = 1 and B.key2 = 1 and B.key3 = 1 and B.key4=1 and B.key5=1 and B.key6=1 and B.key7 = 1 or B.key8=1);
+
+set join_buffer_size= @save_join_buffer_size;
+# Test for BUG#4177 ends
+
+drop table t0, t1, t2, t3, t4;
+
+# BUG#16166
+CREATE TABLE t1 (
+ cola char(3) not null, colb char(3) not null, filler char(200),
+ key(cola), key(colb)
+);
+INSERT INTO t1 VALUES ('foo','bar', 'ZZ'),('fuz','baz', 'ZZ');
+
+--disable_query_log
+let $1=9;
+while ($1)
+{
+ eval INSERT INTO t1 SELECT * from t1 WHERE cola = 'foo';
+ dec $1;
+}
+
+let $1=13;
+while ($1)
+{
+ eval INSERT INTO t1 SELECT * from t1 WHERE cola <> 'foo';
+ dec $1;
+}
+
+--enable_query_log
+
+OPTIMIZE TABLE t1;
+select count(*) from t1;
+
+-- disable_query_log
+-- disable_result_log
+if ($engine_type == RocksDB)
+{
+ set global rocksdb_force_flush_memtable_now=1;
+}
+analyze table t1;
+-- enable_result_log
+-- enable_query_log
+
+if ($index_merge_random_rows_in_EXPLAIN)
+{
+ --replace_column 9 #
+}
+explain select * from t1 WHERE cola = 'foo' AND colb = 'bar';
+
+if ($index_merge_random_rows_in_EXPLAIN)
+{
+ --replace_column 9 #
+}
+explain select * from t1 force index(cola,colb) WHERE cola = 'foo' AND colb = 'bar';
+drop table t1;
+
+if ($merge_table_support)
+{
+#
+# BUG#17314: Index_merge/intersection not choosen by the optimizer for MERGE tables
+#
+create table t0 (a int);
+insert into t0 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
+create table t1 (
+ a int, b int,
+ filler1 char(200), filler2 char(200),
+ key(a),key(b)
+);
+insert into t1 select @v:= A.a, @v, 't1', 'filler2' from t0 A, t0 B, t0 C;
+create table t2 like t1;
+
+create table t3 (
+ a int, b int,
+ filler1 char(200), filler2 char(200),
+ key(a),key(b)
+) engine=merge union=(t1,t2);
+
+-- disable_query_log
+-- disable_result_log
+if ($engine_type == RocksDB)
+{
+ set global rocksdb_force_flush_memtable_now=1;
+}
+analyze table t0;
+analyze table t1;
+analyze table t2;
+analyze table t3;
+-- enable_result_log
+-- enable_query_log
+
+--replace_column 9 #
+explain select * from t1 where a=1 and b=1;
+--replace_column 9 #
+explain select * from t3 where a=1 and b=1;
+
+drop table t3;
+drop table t0, t1, t2;
+}
+
+#
+# BUG#20256 - LOCK WRITE - MyISAM
+#
+CREATE TABLE t1(a INT);
+INSERT INTO t1 VALUES(1);
+CREATE TABLE t2(a INT, b INT, dummy CHAR(16) DEFAULT '', KEY(a), KEY(b));
+INSERT INTO t2(a,b) VALUES
+(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),
+(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),
+(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),
+(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),
+(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),
+(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),
+(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),
+(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),
+(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),
+(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),
+(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),
+(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),
+(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),
+(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),
+(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),
+(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),
+(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),
+(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),
+(1,2);
+LOCK TABLES t1 WRITE, t2 WRITE;
+INSERT INTO t2(a,b) VALUES(1,2);
+SELECT t2.a FROM t1,t2 WHERE t2.b=2 AND t2.a=1;
+UNLOCK TABLES;
+DROP TABLE t1, t2;
+
+#
+# BUG#29740: HA_KEY_SCAN_NOT_ROR wasn't set for HEAP engine
+#
+CREATE TABLE `t1` (
+ `a` int(11) DEFAULT NULL,
+ `filler` char(200) DEFAULT NULL,
+ `b` int(11) DEFAULT NULL,
+ KEY `a` (`a`),
+ KEY `b` (`b`)
+) ENGINE=MEMORY DEFAULT CHARSET=latin1;
+
+insert into t1 values
+(0, 'filler', 0), (1, 'filler', 1), (2, 'filler', 2), (3, 'filler', 3),
+(4, 'filler', 4), (5, 'filler', 5), (6, 'filler', 6), (7, 'filler', 7),
+(8, 'filler', 8), (9, 'filler', 9), (0, 'filler', 0), (1, 'filler', 1),
+(2, 'filler', 2), (3, 'filler', 3), (4, 'filler', 4), (5, 'filler', 5),
+(6, 'filler', 6), (7, 'filler', 7), (8, 'filler', 8), (9, 'filler', 9),
+(10, 'filler', 10), (11, 'filler', 11), (12, 'filler', 12), (13, 'filler', 13),
+(14, 'filler', 14), (15, 'filler', 15), (16, 'filler', 16), (17, 'filler', 17),
+(18, 'filler', 18), (19, 'filler', 19), (4, '5 ', 0), (5, '4 ', 0),
+(4, '4 ', 0), (4, 'qq ', 5), (5, 'qq ', 4), (4, 'zz ', 4);
+
+create table t2(
+ `a` int(11) DEFAULT NULL,
+ `filler` char(200) DEFAULT NULL,
+ `b` int(11) DEFAULT NULL,
+ KEY USING BTREE (`a`),
+ KEY USING BTREE (`b`)
+) ENGINE=MEMORY DEFAULT CHARSET=latin1;
+insert into t2 select * from t1;
+
+-- disable_query_log
+-- disable_result_log
+if ($engine_type == RocksDB)
+{
+ set global rocksdb_force_flush_memtable_now=1;
+}
+analyze table t1;
+analyze table t2;
+-- enable_result_log
+-- enable_query_log
+
+--echo must use sort-union rather than union:
+--replace_column 9 #
+explain select * from t1 where a=4 or b=4;
+--sorted_result
+select * from t1 where a=4 or b=4;
+--sorted_result
+select * from t1 ignore index(a,b) where a=4 or b=4;
+
+--echo must use union, not sort-union:
+--replace_column 9 #
+explain select * from t2 where a=4 or b=4;
+--sorted_result
+select * from t2 where a=4 or b=4;
+
+drop table t1, t2;
+
+#
+# Bug #37943: Reproducible mysqld crash/sigsegv in sel_trees_can_be_ored
+#
+
+CREATE TABLE t1 (a varchar(8), b set('a','b','c','d','e','f','g','h'),
+ KEY b(b), KEY a(a));
+INSERT INTO t1 VALUES ('y',''), ('z','');
+
+#should not crash
+SELECT b,a from t1 WHERE (b!='c' AND b!='f' && b!='h') OR
+ (a='pure-S') OR (a='DE80337a') OR (a='DE80799');
+
+DROP TABLE t1;
+
+--echo #
+--echo # BUG#40974: Incorrect query results when using clause evaluated using range check
+--echo #
+create table t0 (a int);
+insert into t0 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
+
+create table t1 (a int);
+insert into t1 values (1),(2);
+create table t2(a int, b int);
+insert into t2 values (1,1), (2, 1000);
+create table t3 (a int, b int, filler char(100), key(a), key(b));
+
+insert into t3 select 1000, 1000,'filler' from t0 A, t0 B, t0 C;
+insert into t3 values (1,1,'data');
+insert into t3 values (1,1,'data');
+-- echo The plan should be ALL/ALL/ALL(Range checked for each record (index map: 0x3)
+
+-- disable_query_log
+-- disable_result_log
+if ($engine_type == RocksDB)
+{
+ set global rocksdb_force_flush_memtable_now=1;
+}
+analyze table t0;
+analyze table t1;
+analyze table t2;
+analyze table t3;
+-- enable_result_log
+-- enable_query_log
+
+if ($index_merge_random_rows_in_EXPLAIN)
+{
+ --replace_column 9 #
+}
+explain select * from t1
+where exists (select 1 from t2, t3
+ where t2.a=t1.a and (t3.a=t2.b or t3.b=t2.b or t3.b=t2.b+1));
+
+select * from t1
+where exists (select 1 from t2, t3
+ where t2.a=t1.a and (t3.a=t2.b or t3.b=t2.b or t3.b=t2.b+1));
+
+drop table t0, t1, t2, t3;
+
+--echo #
+--echo # BUG#44810: index merge and order by with low sort_buffer_size
+--echo # crashes server!
+--echo #
+CREATE TABLE t1(a VARCHAR(128),b VARCHAR(128),KEY(A),KEY(B));
+INSERT INTO t1 VALUES (REPEAT('a',128),REPEAT('b',128));
+INSERT INTO t1 SELECT * FROM t1;
+INSERT INTO t1 SELECT * FROM t1;
+INSERT INTO t1 SELECT * FROM t1;
+INSERT INTO t1 SELECT * FROM t1;
+INSERT INTO t1 SELECT * FROM t1;
+INSERT INTO t1 SELECT * FROM t1;
+-- disable_query_log
+-- disable_result_log
+if ($engine_type == RocksDB)
+{
+ set global rocksdb_force_flush_memtable_now=1;
+}
+analyze table t1;
+-- enable_result_log
+-- enable_query_log
+
+# Causes "out of sort memory" error in MariaDB:
+#SET SESSION sort_buffer_size=1;
+
+if ($index_merge_random_rows_in_EXPLAIN)
+{
+ --replace_column 9 #
+}
+EXPLAIN
+SELECT * FROM t1 FORCE INDEX(a,b) WHERE a LIKE 'a%' OR b LIKE 'b%'
+ ORDER BY a,b;
+
+# we don't actually care about the result : we're checking if it crashes
+--disable_result_log
+SELECT * FROM t1 FORCE INDEX(a,b) WHERE a LIKE 'a%' OR b LIKE 'b%'
+ ORDER BY a,b;
+--enable_result_log
+
+SET SESSION sort_buffer_size=DEFAULT;
+DROP TABLE t1;
+
+
+--echo End of 5.0 tests
diff --git a/storage/rocksdb/mysql-test/rocksdb/include/index_merge2.inc b/storage/rocksdb/mysql-test/rocksdb/include/index_merge2.inc
new file mode 100644
index 00000000000..7e5cec40a80
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/include/index_merge2.inc
@@ -0,0 +1,520 @@
+# include/index_merge2.inc
+#
+# Index merge tests
+#
+# The variable
+# $engine_type -- storage engine to be tested
+# has to be set before sourcing this script.
+#
+# Note: The comments/expectations refer to InnoDB.
+# They might be not valid for other storage engines.
+#
+# Last update:
+# 2006-08-02 ML test refactored
+# old name was t/index_merge_innodb.test
+# main code went into include/index_merge2.inc
+#
+
+--echo #---------------- Index merge test 2 -------------------------------------------
+
+eval SET SESSION DEFAULT_STORAGE_ENGINE = $engine_type;
+
+--disable_warnings
+drop table if exists t1,t2;
+--enable_warnings
+
+create table t1
+(
+ key1 int not null,
+ key2 int not null,
+
+ INDEX i1(key1),
+ INDEX i2(key2)
+);
+
+--disable_query_log
+let $1=200;
+while ($1)
+{
+ eval insert into t1 values (200-$1, $1);
+ dec $1;
+}
+--enable_query_log
+
+-- disable_query_log
+-- disable_result_log
+if ($engine_type == RocksDB)
+{
+ set global rocksdb_force_flush_memtable_now=1;
+}
+analyze table t1;
+-- enable_result_log
+-- enable_query_log
+
+# No primary key
+explain select * from t1 where key1 < 5 or key2 > 197;
+
+select * from t1 where key1 < 5 or key2 > 197;
+
+explain select * from t1 where key1 < 3 or key2 > 195;
+select * from t1 where key1 < 3 or key2 > 195;
+
+# Primary key as case-sensitive string with \0s.
+# also make primary key be longer then max. index length of MyISAM.
+alter table t1 add str1 char (255) not null,
+ add zeroval int not null default 0,
+ add str2 char (255) not null,
+ add str3 char (255) not null;
+
+update t1 set str1='aaa', str2='bbb', str3=concat(key2, '-', key1 div 2, '_' ,if(key1 mod 2 = 0, 'a', 'A'));
+
+alter table t1 add primary key (str1, zeroval, str2, str3);
+
+-- disable_query_log
+-- disable_result_log
+if ($engine_type == RocksDB)
+{
+ set global rocksdb_force_flush_memtable_now=1;
+}
+analyze table t1;
+-- enable_result_log
+-- enable_query_log
+
+explain select * from t1 where key1 < 5 or key2 > 197;
+
+select * from t1 where key1 < 5 or key2 > 197;
+
+explain select * from t1 where key1 < 3 or key2 > 195;
+select * from t1 where key1 < 3 or key2 > 195;
+
+# Test for BUG#5401
+drop table t1;
+create table t1 (
+ pk integer not null auto_increment primary key,
+ key1 integer,
+ key2 integer not null,
+ filler char (200),
+ index (key1),
+ index (key2)
+);
+show warnings;
+--disable_query_log
+let $1=30;
+while ($1)
+{
+ eval insert into t1 (key1, key2, filler) values ($1/4, $1/8, 'filler-data');
+ dec $1;
+}
+--enable_query_log
+
+-- disable_query_log
+-- disable_result_log
+if ($engine_type == RocksDB)
+{
+ set global rocksdb_force_flush_memtable_now=1;
+}
+analyze table t1;
+-- enable_result_log
+-- enable_query_log
+
+explain select pk from t1 where key1 = 1 and key2 = 1;
+select pk from t1 where key2 = 1 and key1 = 1;
+select pk from t1 ignore index(key1,key2) where key2 = 1 and key1 = 1;
+
+# More tests for BUG#5401.
+drop table t1;
+create table t1 (
+ pk int primary key auto_increment,
+ key1a int,
+ key2a int,
+ key1b int,
+ key2b int,
+ dummy1 int,
+ dummy2 int,
+ dummy3 int,
+ dummy4 int,
+ key3a int,
+ key3b int,
+ filler1 char (200),
+ index i1(key1a, key1b),
+ index i2(key2a, key2b),
+ index i3(key3a, key3b)
+);
+
+create table t2 (a int);
+insert into t2 values (0),(1),(2),(3),(4),(NULL);
+
+insert into t1 (key1a, key1b, key2a, key2b, key3a, key3b)
+ select A.a, B.a, C.a, D.a, C.a, D.a from t2 A,t2 B,t2 C, t2 D;
+insert into t1 (key1a, key1b, key2a, key2b, key3a, key3b)
+ select key1a, key1b, key2a, key2b, key3a, key3b from t1;
+insert into t1 (key1a, key1b, key2a, key2b, key3a, key3b)
+ select key1a, key1b, key2a, key2b, key3a, key3b from t1;
+analyze table t1;
+select count(*) from t1;
+
+-- disable_query_log
+-- disable_result_log
+if ($engine_type == RocksDB)
+{
+ set global rocksdb_force_flush_memtable_now=1;
+}
+analyze table t2;
+-- enable_result_log
+-- enable_query_log
+
+if (!$skip_ror_EXPLAIN_for_MyRocks)
+{
+if ($index_merge_random_rows_in_EXPLAIN)
+{
+ --replace_column 9 #
+}
+explain select count(*) from t1 where
+ key1a = 2 and key1b is null and key2a = 2 and key2b is null;
+}
+
+select count(*) from t1 where
+ key1a = 2 and key1b is null and key2a = 2 and key2b is null;
+
+if (!$skip_ror_EXPLAIN_for_MyRocks)
+{
+if ($index_merge_random_rows_in_EXPLAIN)
+{
+ --replace_column 9 #
+}
+explain select count(*) from t1 where
+ key1a = 2 and key1b is null and key3a = 2 and key3b is null;
+}
+
+select count(*) from t1 where
+ key1a = 2 and key1b is null and key3a = 2 and key3b is null;
+
+drop table t1,t2;
+
+# Test for BUG#8441
+create table t1 (
+ id1 int,
+ id2 date ,
+ index idx2 (id1,id2),
+ index idx1 (id2)
+);
+insert into t1 values(1,'20040101'), (2,'20040102');
+select * from t1 where id1 = 1 and id2= '20040101';
+drop table t1;
+
+# Test for BUG#12720
+--disable_warnings
+drop view if exists v1;
+--enable_warnings
+CREATE TABLE t1 (
+ `oid` int(11) unsigned NOT NULL auto_increment,
+ `fk_bbk_niederlassung` int(11) unsigned NOT NULL,
+ `fk_wochentag` int(11) unsigned NOT NULL,
+ `uhrzeit_von` time NOT NULL COMMENT 'HH:MM',
+ `uhrzeit_bis` time NOT NULL COMMENT 'HH:MM',
+ `geloescht` tinyint(4) NOT NULL,
+ `version` int(5) NOT NULL,
+ PRIMARY KEY (`oid`),
+ KEY `fk_bbk_niederlassung` (`fk_bbk_niederlassung`),
+ KEY `fk_wochentag` (`fk_wochentag`),
+ KEY `ix_version` (`version`)
+) DEFAULT CHARSET=latin1;
+
+insert into t1 values
+(1, 38, 1, '08:00:00', '13:00:00', 0, 1),
+(2, 38, 2, '08:00:00', '13:00:00', 0, 1),
+(3, 38, 3, '08:00:00', '13:00:00', 0, 1),
+(4, 38, 4, '08:00:00', '13:00:00', 0, 1),
+(5, 38, 5, '08:00:00', '13:00:00', 0, 1),
+(6, 38, 5, '08:00:00', '13:00:00', 1, 2),
+(7, 38, 3, '08:00:00', '13:00:00', 1, 2),
+(8, 38, 1, '08:00:00', '13:00:00', 1, 2),
+(9, 38, 2, '08:00:00', '13:00:00', 1, 2),
+(10, 38, 4, '08:00:00', '13:00:00', 1, 2),
+(11, 38, 1, '08:00:00', '13:00:00', 0, 3),
+(12, 38, 2, '08:00:00', '13:00:00', 0, 3),
+(13, 38, 3, '08:00:00', '13:00:00', 0, 3),
+(14, 38, 4, '08:00:00', '13:00:00', 0, 3),
+(15, 38, 5, '08:00:00', '13:00:00', 0, 3),
+(16, 38, 4, '08:00:00', '13:00:00', 0, 4),
+(17, 38, 5, '08:00:00', '13:00:00', 0, 4),
+(18, 38, 1, '08:00:00', '13:00:00', 0, 4),
+(19, 38, 2, '08:00:00', '13:00:00', 0, 4),
+(20, 38, 3, '08:00:00', '13:00:00', 0, 4),
+(21, 7, 1, '08:00:00', '13:00:00', 0, 1),
+(22, 7, 2, '08:00:00', '13:00:00', 0, 1),
+(23, 7, 3, '08:00:00', '13:00:00', 0, 1),
+(24, 7, 4, '08:00:00', '13:00:00', 0, 1),
+(25, 7, 5, '08:00:00', '13:00:00', 0, 1);
+
+create view v1 as
+select
+ zeit1.oid AS oid,
+ zeit1.fk_bbk_niederlassung AS fk_bbk_niederlassung,
+ zeit1.fk_wochentag AS fk_wochentag,
+ zeit1.uhrzeit_von AS uhrzeit_von,
+ zeit1.uhrzeit_bis AS uhrzeit_bis,
+ zeit1.geloescht AS geloescht,
+ zeit1.version AS version
+from
+ t1 zeit1
+where
+(zeit1.version =
+ (select max(zeit2.version) AS `max(version)`
+ from t1 zeit2
+ where
+ ((zeit1.fk_bbk_niederlassung = zeit2.fk_bbk_niederlassung) and
+ (zeit1.fk_wochentag = zeit2.fk_wochentag) and
+ (zeit1.uhrzeit_von = zeit2.uhrzeit_von) and
+ (zeit1.uhrzeit_bis = zeit2.uhrzeit_bis)
+ )
+ )
+)
+and (zeit1.geloescht = 0);
+
+select * from v1 where oid = 21;
+drop view v1;
+drop table t1;
+##
+CREATE TABLE t1(
+ t_cpac varchar(2) NOT NULL,
+ t_vers varchar(4) NOT NULL,
+ t_rele varchar(2) NOT NULL,
+ t_cust varchar(4) NOT NULL,
+ filler1 char(250) default NULL,
+ filler2 char(250) default NULL,
+ PRIMARY KEY (t_cpac,t_vers,t_rele,t_cust),
+ UNIQUE KEY IX_4 (t_cust,t_cpac,t_vers,t_rele),
+ KEY IX_5 (t_vers,t_rele,t_cust)
+);
+
+insert into t1 values
+('tm','2.5 ','a ',' ','',''), ('tm','2.5U','a ','stnd','',''),
+('da','3.3 ','b ',' ','',''), ('da','3.3U','b ','stnd','',''),
+('tl','7.6 ','a ',' ','',''), ('tt','7.6 ','a ',' ','',''),
+('bc','B61 ','a ',' ','',''), ('bp','B61 ','a ',' ','',''),
+('ca','B61 ','a ',' ','',''), ('ci','B61 ','a ',' ','',''),
+('cp','B61 ','a ',' ','',''), ('dm','B61 ','a ',' ','',''),
+('ec','B61 ','a ',' ','',''), ('ed','B61 ','a ',' ','',''),
+('fm','B61 ','a ',' ','',''), ('nt','B61 ','a ',' ','',''),
+('qm','B61 ','a ',' ','',''), ('tc','B61 ','a ',' ','',''),
+('td','B61 ','a ',' ','',''), ('tf','B61 ','a ',' ','',''),
+('tg','B61 ','a ',' ','',''), ('ti','B61 ','a ',' ','',''),
+('tp','B61 ','a ',' ','',''), ('ts','B61 ','a ',' ','',''),
+('wh','B61 ','a ',' ','',''), ('bc','B61U','a ','stnd','',''),
+('bp','B61U','a ','stnd','',''), ('ca','B61U','a ','stnd','',''),
+('ci','B61U','a ','stnd','',''), ('cp','B61U','a ','stnd','',''),
+('dm','B61U','a ','stnd','',''), ('ec','B61U','a ','stnd','',''),
+('fm','B61U','a ','stnd','',''), ('nt','B61U','a ','stnd','',''),
+('qm','B61U','a ','stnd','',''), ('tc','B61U','a ','stnd','',''),
+('td','B61U','a ','stnd','',''), ('tf','B61U','a ','stnd','',''),
+('tg','B61U','a ','stnd','',''), ('ti','B61U','a ','stnd','',''),
+('tp','B61U','a ','stnd','',''), ('ts','B61U','a ','stnd','',''),
+('wh','B61U','a ','stnd','','');
+show create table t1;
+
+select t_vers,t_rele,t_cust,filler1 from t1 where t_vers = '7.6';
+select t_vers,t_rele,t_cust,filler1 from t1 where t_vers = '7.6'
+ and t_rele='a' and t_cust = ' ';
+
+drop table t1;
+
+# BUG#19021: Crash in index_merge/ROR-intersection optimizer under
+# specific circumstances.
+create table t1 (
+ pk int(11) not null auto_increment,
+ a int(11) not null default '0',
+ b int(11) not null default '0',
+ c int(11) not null default '0',
+
+ filler1 datetime, filler2 varchar(15),
+ filler3 longtext,
+
+ kp1 varchar(4), kp2 varchar(7),
+ kp3 varchar(2), kp4 varchar(4),
+ kp5 varchar(7),
+ filler4 char(1),
+
+ primary key (pk),
+ key idx1(a,b,c),
+ key idx2(c),
+ key idx3(kp1,kp2,kp3,kp4,kp5)
+) default charset=latin1;
+--disable_query_log
+set @fill= uncompress(unhex(concat(
+'F91D0000789CDD993D6FDB301086F7FE0A6D4E0105B8E3F1335D5BA028DA0EEDE28E1D320408',
+'52A0713BF4D7571FB62C51A475924839080307B603E77DEE787C8FA41F9E9EEF7F1F8A87A7C3',
+'AFE280C5DF9F8F7FEE9F8B1B2CB114D6902E918455245DB91300FA16E42D5201FA4EE29DA05D',
+'B9FB3718A33718A3FA8C30AEFAFDE1F317D016AA67BA7A60FDE45BF5F8BA7B5BDE8812AA9F1A',
+'069DB03C9804346644F3A3A6A1338DB572756A3C4D1BCC804CABF912C654AE9BB855A2B85962',
+'3A479259CAE6A86C0411D01AE5483581EDCBD9A39C45252D532E533979EB9F82E971D979BDB4',
+'8531105670740AFBFD1E34AAB0029E4AD0A1D46A6D0946A21A16038A5CD965CD2D524673F712',
+'20C304477315CE18405EAF9BD0AFFEAC74FDA14F1FBF5BD34C769D73FBBEDF4750ADD4E5A99C',
+'5C8DC04934AFA275D483D536D174C11B12AF27F8F888B41B6FC9DBA569E1FD7BD72D698130B7',
+'91B23A98803512B3D31881E8DCDA2AC1754E3644C4BB3A8466750B911681274A39E35E8624B7',
+'444A42AC1213F354758E3CF1A4CDD5A688C767CF1B11ABC5867CB15D8A18E0B91E9EC275BB94',
+'58F33C2936F64690D55BC29E4A293D95A798D84217736CEAAA538CE1354269EE2162053FBC66',
+'496D90CB53323CB279D3A6AF651B4B22B9E430743D83BE48E995A09D4FC9871C22D8D189B945',
+'706911BCB8C3C774B9C08D2FC6ED853ADACA37A14A4CB2E027630E5B80ECACD939431B1CDF62',
+'7D71487536EA2C678F59685E91F4B6C144BCCB94C1EBA9FA6F5552DDCA4E4539BE326A2720CB',
+'45ED028EB3616AC93C46E775FEA9FA6DA7CFCEC6DEBA5FCD1F915EED4D983BDDB881528AD9AB',
+'43C1576F29AAB35BDFBC21D422F52B307D350589D45225A887AC46C8EDD72D99EC3ED2E1BCEF',
+'7AF26FC4C74097B6768A5EDAFA660CC64278F7E63F99AC954B')));
+prepare x from @fill;
+execute x;
+deallocate prepare x;
+--enable_query_log
+set @fill=NULL;
+SELECT COUNT(*) FROM t1 WHERE b = 0 AND a = 0 AND c = 13286427 AND
+ kp1='279' AND kp2='ELM0678' AND kp3='6' AND kp4='10' AND kp5 = 'R ';
+
+drop table t1;
+
+# BUG#21277: Index Merge/sort_union: wrong query results
+create table t1
+(
+ key1 int not null,
+ key2 int not null default 0,
+ key3 int not null default 0
+);
+
+insert into t1(key1) values (1),(2),(3),(4),(5),(6),(7),(8);
+
+let $1=7;
+set @d=8;
+while ($1)
+{
+ eval insert into t1 (key1) select key1+@d from t1;
+ eval set @d=@d*2;
+ dec $1;
+}
+
+alter table t1 add index i2(key2);
+alter table t1 add index i3(key3);
+update t1 set key2=key1,key3=key1;
+
+-- disable_query_log
+-- disable_result_log
+if ($engine_type == RocksDB)
+{
+ set global rocksdb_force_flush_memtable_now=1;
+}
+analyze table t1;
+-- enable_result_log
+-- enable_query_log
+
+if (!$skip_ror_EXPLAIN_for_MyRocks)
+{
+if ($index_merge_random_rows_in_EXPLAIN)
+{
+ --replace_column 9 #
+}
+# to test the bug, the following must use "sort_union":
+explain select * from t1 where (key3 > 30 and key3<35) or (key2 >32 and key2 < 40);
+}
+select * from t1 where (key3 > 30 and key3<35) or (key2 >32 and key2 < 40);
+drop table t1;
+
+--echo #
+--echo # Bug#56423: Different count with SELECT and CREATE SELECT queries
+--echo #
+
+CREATE TABLE t1 (
+ a INT,
+ b INT,
+ c INT,
+ d INT,
+ PRIMARY KEY (a),
+ KEY (c),
+ KEY bd (b,d)
+);
+
+INSERT INTO t1 VALUES
+(1, 0, 1, 0),
+(2, 1, 1, 1),
+(3, 1, 1, 1),
+(4, 0, 1, 1);
+
+-- disable_query_log
+-- disable_result_log
+if ($engine_type == RocksDB)
+{
+ set global rocksdb_force_flush_memtable_now=1;
+}
+analyze table t1;
+-- enable_result_log
+-- enable_query_log
+
+EXPLAIN
+SELECT a
+FROM t1
+WHERE c = 1 AND b = 1 AND d = 1;
+
+CREATE TABLE t2 ( a INT )
+SELECT a
+FROM t1
+WHERE c = 1 AND b = 1 AND d = 1;
+
+SELECT * FROM t2;
+
+DROP TABLE t1, t2;
+
+CREATE TABLE t1( a INT, b INT, KEY(a), KEY(b) );
+INSERT INTO t1 VALUES (1, 2), (1, 2), (1, 2), (1, 2);
+SELECT * FROM t1 FORCE INDEX(a, b) WHERE a = 1 AND b = 2;
+
+DROP TABLE t1;
+
+--echo # Code coverage of fix.
+CREATE TABLE t1 ( a INT NOT NULL AUTO_INCREMENT PRIMARY KEY, b INT);
+INSERT INTO t1 (b) VALUES (1);
+UPDATE t1 SET b = 2 WHERE a = 1;
+SELECT * FROM t1;
+
+CREATE TABLE t2 ( a INT NOT NULL AUTO_INCREMENT PRIMARY KEY, b VARCHAR(1) );
+INSERT INTO t2 (b) VALUES ('a');
+UPDATE t2 SET b = 'b' WHERE a = 1;
+SELECT * FROM t2;
+
+DROP TABLE t1, t2;
+
+--echo #
+--echo # BUG#13970015: ASSERT `MIN_ENDP || MAX_ENDP' FAILED IN
+--echo # HANDLER::MULTI_RANGE_READ_INFO_CONST
+--echo #
+
+CREATE TABLE t1 (
+ pk INT NOT NULL,
+ col_int_key INT NOT NULL,
+ col_varchar_key VARCHAR(1) NOT NULL,
+ PRIMARY KEY (pk),
+ KEY col_int_key (col_int_key),
+ KEY col_varchar_key (col_varchar_key,col_int_key)
+);
+
+INSERT INTO t1 VALUES (1,1,'a'), (2,2,'b');
+
+-- disable_query_log
+-- disable_result_log
+if ($engine_type == RocksDB)
+{
+ set global rocksdb_force_flush_memtable_now=1;
+}
+analyze table t1;
+-- enable_result_log
+-- enable_query_log
+
+EXPLAIN
+SELECT col_int_key
+FROM t1
+WHERE col_varchar_key >= 'l' OR
+ (((pk BETWEEN 141 AND 141) OR col_varchar_key <> 'l')
+ AND ((pk BETWEEN 141 AND 141) OR (col_int_key > 141)));
+
+SELECT col_int_key
+FROM t1
+WHERE col_varchar_key >= 'l' OR
+ (((pk BETWEEN 141 AND 141) OR col_varchar_key <> 'l')
+ AND ((pk BETWEEN 141 AND 141) OR (col_int_key > 141)));
+
+DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/include/index_merge_2sweeps.inc b/storage/rocksdb/mysql-test/rocksdb/include/index_merge_2sweeps.inc
new file mode 100644
index 00000000000..4f195051dd7
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/include/index_merge_2sweeps.inc
@@ -0,0 +1,80 @@
+# include/index_merge_2sweeps.inc
+#
+# 2-sweeps read Index_merge test
+#
+# The variable
+# $engine_type -- storage engine to be tested
+# has to be set before sourcing this script.
+#
+# Last update:
+# 2006-08-02 ML test refactored
+# old name was index_merge_innodb2.test
+# main code went into include/index_merge_2sweeps.inc
+#
+
+--echo #---------------- 2-sweeps read Index merge test 2 -------------------------------
+
+eval SET SESSION DEFAULT_STORAGE_ENGINE = $engine_type;
+
+--disable_warnings
+drop table if exists t1;
+--enable_warnings
+
+create table t1 (
+ pk int primary key,
+ key1 int,
+ key2 int,
+ filler char(200),
+ filler2 char(200),
+ index(key1),
+ index(key2)
+);
+
+
+--disable_query_log
+let $1=1000;
+while ($1)
+{
+ eval insert into t1 values($1, $1, $1, 'filler-data','filler-data-2');
+ dec $1;
+}
+--enable_query_log
+
+if ($sorted_result) {
+ --sorted_result
+}
+select * from t1 where (key1 >= 2 and key1 <= 10) or (pk >= 4 and pk <=8 );
+
+set @maxv=1000;
+
+if ($sorted_result) {
+ --sorted_result
+}
+select * from t1 where
+ (pk < 5) or (pk > 10 and pk < 15) or (pk >= 50 and pk < 55 ) or (pk > @maxv-10)
+ or key1=18 or key1=60;
+
+if ($sorted_result) {
+ --sorted_result
+}
+select * from t1 where
+ (pk < 5) or (pk > 10 and pk < 15) or (pk >= 50 and pk < 55 ) or (pk > @maxv-10)
+ or key1 < 3 or key1 > @maxv-11;
+
+if ($sorted_result) {
+ --sorted_result
+}
+select * from t1 where
+ (pk < 5) or (pk > 10 and pk < 15) or (pk >= 50 and pk < 55 ) or (pk > @maxv-10)
+ or
+ (key1 < 5) or (key1 > 10 and key1 < 15) or (key1 >= 50 and key1 < 55 ) or (key1 > @maxv-10);
+
+if ($sorted_result) {
+ --sorted_result
+}
+select * from t1 where
+ (pk > 10 and pk < 15) or (pk >= 50 and pk < 55 )
+ or
+ (key1 < 5) or (key1 > @maxv-10);
+
+drop table t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/include/index_merge_ror.inc b/storage/rocksdb/mysql-test/rocksdb/include/index_merge_ror.inc
new file mode 100644
index 00000000000..21219d1aa95
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/include/index_merge_ror.inc
@@ -0,0 +1,463 @@
+# include/index_merge_ror.inc
+#
+# ROR-index_merge tests.
+#
+# The variable
+# $engine_type -- storage engine to be tested
+# has to be set before sourcing this script.
+#
+# Note: The comments/expectations refer to MyISAM.
+# They might be not valid for other storage engines.
+#
+# Last update:
+# 2006-08-02 ML test refactored
+# old name was t/index_merge_ror.test
+# main code went into include/index_merge_ror.inc
+#
+
+--echo #---------------- ROR-index_merge tests -----------------------
+
+eval SET SESSION DEFAULT_STORAGE_ENGINE = $engine_type;
+
+--disable_warnings
+drop table if exists t0,t1,t2;
+--enable_warnings
+create table t1
+(
+ /* Field names reflect value(rowid) distribution, st=STairs, swt= SaWTooth */
+ st_a int not null default 0,
+ swt1a int not null default 0,
+ swt2a int not null default 0,
+
+ st_b int not null default 0,
+ swt1b int not null default 0,
+ swt2b int not null default 0,
+
+ /* fields/keys for row retrieval tests */
+ key1 int,
+ key2 int,
+ key3 int,
+ key4 int,
+
+ /* make rows much bigger then keys */
+ filler1 char (200),
+ filler2 char (200),
+ filler3 char (200),
+ filler4 char (200),
+ filler5 char (200),
+ filler6 char (200),
+
+ /* order of keys is important */
+ key sta_swt12a(st_a,swt1a,swt2a),
+ key sta_swt1a(st_a,swt1a),
+ key sta_swt2a(st_a,swt2a),
+ key sta_swt21a(st_a,swt2a,swt1a),
+
+ key st_a(st_a),
+ key stb_swt1a_2b(st_b,swt1b,swt2a),
+ key stb_swt1b(st_b,swt1b),
+ key st_b(st_b),
+
+ key(key1),
+ key(key2),
+ key(key3),
+ key(key4)
+) ;
+
+# Fill table
+create table t0 as select * from t1;
+--disable_query_log
+--echo # Printing of many insert into t0 values (....) disabled.
+let $cnt=1000;
+while ($cnt)
+{
+ eval insert into t0 values (1, 2, 3, 1, 2, 3, 0, 0, 0, 0, 'data1', 'data2', 'data3', 'data4', 'data5', 'data6');
+ dec $cnt;
+}
+--enable_query_log
+
+alter table t1 disable keys;
+--disable_query_log
+--echo # Printing of many insert into t1 select .... from t0 disabled.
+let $1=4;
+while ($1)
+{
+ let $2=4;
+ while ($2)
+ {
+ let $3=4;
+ while ($3)
+ {
+ eval insert into t1 select $1, $2, $3, $1 ,$2, $3, key1, key2, key3, key4, filler1, filler2, filler3, filler4, filler5, filler6 from t0;
+ dec $3;
+ }
+ dec $2;
+ }
+ dec $1;
+}
+
+--echo # Printing of many insert into t1 (...) values (....) disabled.
+# Row retrieval tests
+# -1 is used for values 'out of any range we are using'
+# insert enough rows for index intersection to be used for (key1,key2)
+insert into t1 (key1, key2, key3, key4, filler1) values (100, 100, 100, 100,'key1-key2-key3-key4');
+let $cnt=400;
+while ($cnt)
+{
+ eval insert into t1 (key1, key2, key3, key4, filler1) values (100, -1, 100, -1,'key1-key3');
+ dec $cnt;
+}
+let $cnt=400;
+while ($cnt)
+{
+ eval insert into t1 (key1, key2, key3, key4, filler1) values (-1, 100, -1, 100,'key2-key4');
+ dec $cnt;
+}
+--enable_query_log
+alter table t1 enable keys;
+select count(*) from t1;
+
+-- disable_query_log
+-- disable_result_log
+if ($engine_type == RocksDB)
+{
+ set global rocksdb_force_flush_memtable_now=1;
+}
+analyze table t0;
+analyze table t1;
+-- enable_result_log
+-- enable_query_log
+
+# One row results tests for cases where a single row matches all conditions
+--replace_column 9 #
+explain select key1,key2 from t1 where key1=100 and key2=100;
+select key1,key2 from t1 where key1=100 and key2=100;
+if (!$skip_ror_EXPLAIN_for_MyRocks)
+{
+ explain select key1,key2,key3,key4,filler1 from t1 where key1=100 and key2=100 or key3=100 and key4=100;
+ explain format=json select key1,key2,key3,key4,filler1 from t1 where key1=100 and key2=100 or key3=100 and key4=100;
+}
+select key1,key2,key3,key4,filler1 from t1 where key1=100 and key2=100 or key3=100 and key4=100;
+
+# Several-rows results
+insert into t1 (key1, key2, key3, key4, filler1) values (100, 100, -1, -1, 'key1-key2');
+insert into t1 (key1, key2, key3, key4, filler1) values (-1, -1, 100, 100, 'key4-key3');
+
+-- disable_query_log
+-- disable_result_log
+if ($engine_type == RocksDB)
+{
+ set global rocksdb_force_flush_memtable_now=1;
+}
+analyze table t1;
+-- enable_result_log
+-- enable_query_log
+
+# ROR-intersection, not covering
+if (!$skip_ror_EXPLAIN_for_MyRocks)
+{
+ explain select key1,key2,filler1 from t1 where key1=100 and key2=100;
+}
+select key1,key2,filler1 from t1 where key1=100 and key2=100;
+
+# ROR-intersection, covering
+if (!$skip_ror_EXPLAIN_for_MyRocks)
+{
+ explain select key1,key2 from t1 where key1=100 and key2=100;
+}
+select key1,key2 from t1 where key1=100 and key2=100;
+
+# ROR-union of ROR-intersections
+if (!$skip_ror_EXPLAIN_for_MyRocks)
+{
+ explain select key1,key2,key3,key4 from t1 where key1=100 and key2=100 or key3=100 and key4=100;
+}
+select key1,key2,key3,key4 from t1 where key1=100 and key2=100 or key3=100 and key4=100;
+if (!$skip_ror_EXPLAIN_for_MyRocks)
+{
+ explain select key1,key2,key3,key4,filler1 from t1 where key1=100 and key2=100 or key3=100 and key4=100;
+}
+select key1,key2,key3,key4,filler1 from t1 where key1=100 and key2=100 or key3=100 and key4=100;
+
+# 3-way ROR-intersection
+if (!$skip_ror_EXPLAIN_for_MyRocks)
+{
+ explain select key1,key2,key3 from t1 where key1=100 and key2=100 and key3=100;
+}
+select key1,key2,key3 from t1 where key1=100 and key2=100 and key3=100;
+
+# ROR-union(ROR-intersection, ROR-range)
+insert into t1 (key1,key2,key3,key4,filler1) values (101,101,101,101, 'key1234-101');
+if (!$skip_ror_EXPLAIN_for_MyRocks)
+{
+ explain select key1,key2,key3,key4,filler1 from t1 where key1=100 and key2=100 or key3=101;
+}
+select key1,key2,key3,key4,filler1 from t1 where key1=100 and key2=100 or key3=101;
+
+# Run some ROR updates/deletes
+select key1,key2, filler1 from t1 where key1=100 and key2=100;
+update t1 set filler1='to be deleted' where key1=100 and key2=100;
+update t1 set key1=200,key2=200 where key1=100 and key2=100;
+delete from t1 where key1=200 and key2=200;
+-- disable_query_log
+-- disable_result_log
+if ($engine_type == RocksDB)
+{
+ set global rocksdb_force_flush_memtable_now=1;
+}
+analyze table t1;
+-- enable_result_log
+-- enable_query_log
+select key1,key2,filler1 from t1 where key2=100 and key2=200;
+
+# ROR-union(ROR-intersection) with one of ROR-intersection giving empty
+# results
+if (!$skip_ror_EXPLAIN_for_MyRocks)
+{
+ explain select key1,key2,key3,key4,filler1 from t1 where key1=100 and key2=100 or key3=100 and key4=100;
+}
+select key1,key2,key3,key4,filler1 from t1 where key1=100 and key2=100 or key3=100 and key4=100;
+
+delete from t1 where key3=100 and key4=100;
+
+-- disable_query_log
+-- disable_result_log
+if ($engine_type == RocksDB)
+{
+ set global rocksdb_force_flush_memtable_now=1;
+}
+analyze table t1;
+-- enable_result_log
+-- enable_query_log
+
+# ROR-union with all ROR-intersections giving empty results
+if (!$skip_ror_EXPLAIN_for_MyRocks)
+{
+ explain select key1,key2,key3,key4,filler1 from t1 where key1=100 and key2=100 or key3=100 and key4=100;
+}
+select key1,key2,key3,key4,filler1 from t1 where key1=100 and key2=100 or key3=100 and key4=100;
+
+# ROR-intersection with empty result
+if (!$skip_ror_EXPLAIN_for_MyRocks)
+{
+ explain select key1,key2 from t1 where key1=100 and key2=100;
+}
+select key1,key2 from t1 where key1=100 and key2=100;
+
+# ROR-union tests with various cases.
+# All scans returning duplicate rows:
+insert into t1 (key1, key2, key3, key4, filler1) values (100, 100, 200, 200,'key1-key2-key3-key4-1');
+insert into t1 (key1, key2, key3, key4, filler1) values (100, 100, 200, 200,'key1-key2-key3-key4-2');
+insert into t1 (key1, key2, key3, key4, filler1) values (100, 100, 200, 200,'key1-key2-key3-key4-3');
+
+-- disable_query_log
+-- disable_result_log
+if ($engine_type == RocksDB)
+{
+ set global rocksdb_force_flush_memtable_now=1;
+}
+analyze table t1;
+-- enable_result_log
+-- enable_query_log
+
+if (!$skip_ror_EXPLAIN_for_MyRocks)
+{
+ explain select key1,key2,key3,key4,filler1 from t1 where key3=200 or (key1=100 and key2=100) or key4=200;
+}
+select key1,key2,key3,key4,filler1 from t1 where key3=200 or (key1=100 and key2=100) or key4=200;
+
+insert into t1 (key1, key2, key3, key4, filler1) values (-1, -1, -1, 200,'key4');
+
+-- disable_query_log
+-- disable_result_log
+if ($engine_type == RocksDB)
+{
+ set global rocksdb_force_flush_memtable_now=1;
+}
+analyze table t1;
+-- enable_result_log
+-- enable_query_log
+
+if (!$skip_ror_EXPLAIN_for_MyRocks)
+{
+ explain select key1,key2,key3,key4,filler1 from t1 where key3=200 or (key1=100 and key2=100) or key4=200;
+}
+select key1,key2,key3,key4,filler1 from t1 where key3=200 or (key1=100 and key2=100) or key4=200;
+
+insert into t1 (key1, key2, key3, key4, filler1) values (-1, -1, 200, -1,'key3');
+
+-- disable_query_log
+-- disable_result_log
+if ($engine_type == RocksDB)
+{
+ set global rocksdb_force_flush_memtable_now=1;
+}
+analyze table t1;
+-- enable_result_log
+-- enable_query_log
+
+if (!$skip_ror_EXPLAIN_for_MyRocks)
+{
+ explain select key1,key2,key3,key4,filler1 from t1 where key3=200 or (key1=100 and key2=100) or key4=200;
+}
+select key1,key2,key3,key4,filler1 from t1 where key3=200 or (key1=100 and key2=100) or key4=200;
+
+##
+## Optimizer tests
+##
+
+# Check that the shortest key is used for ROR-intersection, covering and non-covering.
+if (!$index_merge_random_rows_in_EXPLAIN)
+{
+ # Too unstable on InnoDB
+ explain select * from t1 where st_a=1 and st_b=1;
+ explain select st_a,st_b from t1 where st_a=1 and st_b=1;
+ explain select st_a from t1 ignore index (st_a) where st_a=1 and st_b=1;
+}
+
+# Do many tests
+# Check that keys that don't improve selectivity are skipped.
+#
+if (!$skip_ror_EXPLAIN_for_MyRocks)
+{
+# Different value on 32 and 64 bit
+if ($random_rows_in_EXPLAIN)
+{
+ --replace_column 9 #
+}
+--replace_result sta_swt12a sta_swt21a sta_swt12a, sta_swt12a,
+explain select * from t1 where st_a=1 and swt1a=1 and swt2a=1;
+
+if ($random_rows_in_EXPLAIN)
+{
+ --replace_column 9 #
+}
+explain select * from t1 where st_b=1 and swt1b=1 and swt2b=1;
+
+if ($index_merge_random_rows_in_EXPLAIN)
+{
+ --replace_column 9 #
+}
+explain select * from t1 where st_a=1 and swt1a=1 and swt2a=1 and st_b=1 and swt1b=1 and swt2b=1;
+
+if ($index_merge_random_rows_in_EXPLAIN)
+{
+ --replace_column 9 #
+}
+explain select * from t1 ignore index (sta_swt21a, stb_swt1a_2b)
+ where st_a=1 and swt1a=1 and swt2a=1 and st_b=1 and swt1b=1 and swt2b=1;
+
+if ($index_merge_random_rows_in_EXPLAIN)
+{
+ --replace_column 9 #
+}
+explain select * from t1 ignore index (sta_swt21a, sta_swt12a, stb_swt1a_2b)
+ where st_a=1 and swt1a=1 and swt2a=1 and st_b=1 and swt1b=1 and swt2b=1;
+
+if ($index_merge_random_rows_in_EXPLAIN)
+{
+ --replace_column 9 #
+}
+explain select * from t1 ignore index (sta_swt21a, sta_swt12a, stb_swt1a_2b, stb_swt1b)
+ where st_a=1 and swt1a=1 and swt2a=1 and st_b=1 and swt1b=1 and swt2b=1;
+
+if ($index_merge_random_rows_in_EXPLAIN)
+{
+ --replace_column 9 #
+}
+explain select * from t1
+ where st_a=1 and swt1a=1 and swt2a=1 and st_b=1 and swt1b=1;
+
+if ($index_merge_random_rows_in_EXPLAIN)
+{
+ --replace_column 9 #
+}
+explain select * from t1
+ where st_a=1 and swt1a=1 and st_b=1 and swt1b=1 and swt1b=1;
+
+if ($index_merge_random_rows_in_EXPLAIN)
+{
+ --replace_column 9 #
+}
+explain select st_a from t1
+ where st_a=1 and swt1a=1 and st_b=1 and swt1b=1 and swt1b=1;
+
+if ($index_merge_random_rows_in_EXPLAIN)
+{
+ --replace_column 9 #
+}
+explain select st_a from t1
+ where st_a=1 and swt1a=1 and st_b=1 and swt1b=1 and swt1b=1;
+}
+
+drop table t0,t1;
+
+# 'Partially' covered fields test
+
+create table t2 (
+ a char(10),
+ b char(10),
+ filler1 char(255),
+ filler2 char(255),
+ key(a(5)),
+ key(b(5))
+);
+
+--disable_query_log
+let $1=8;
+while ($1)
+{
+ eval insert into t2 values (repeat(char($1+64), 8),repeat(char($1+64), 8),'filler1', 'filler2');
+ dec $1;
+}
+insert into t2 select * from t2;
+insert into t2 select * from t2;
+--enable_query_log
+
+# The table row buffer is reused. Fill it with rows that don't match.
+select count(a) from t2 where a='BBBBBBBB';
+select count(a) from t2 where b='BBBBBBBB';
+
+-- disable_query_log
+-- disable_result_log
+if ($engine_type == RocksDB)
+{
+ set global rocksdb_force_flush_memtable_now=1;
+}
+analyze table t2;
+-- enable_result_log
+-- enable_query_log
+
+# BUG#1:
+--replace_result a a_or_b b a_or_b
+explain select count(a) from t2 where a='AAAAAAAA' and b='AAAAAAAA';
+select count(a) from t2 where a='AAAAAAAA' and b='AAAAAAAA';
+select count(a) from t2 ignore index(a,b) where a='AAAAAAAA' and b='AAAAAAAA';
+
+insert into t2 values ('ab', 'ab', 'uh', 'oh');
+-- disable_query_log
+-- disable_result_log
+if ($engine_type == RocksDB)
+{
+ set global rocksdb_force_flush_memtable_now=1;
+}
+analyze table t2;
+-- enable_result_log
+-- enable_query_log
+explain select a from t2 where a='ab';
+drop table t2;
+
+#
+# BUG#25048 - ERROR 126 : Incorrect key file for table '.XXXX.MYI'; try to
+# repair it
+#
+CREATE TABLE t1(c1 INT, c2 INT DEFAULT 0, c3 CHAR(255) DEFAULT '',
+KEY(c1), KEY(c2), KEY(c3));
+INSERT INTO t1(c1) VALUES(0),(0),(0),(0),(0),(0),(0),(0),(0),(0),(0),(0),(0),
+(0),(0),(0),(0),(0),(0),(0),(0),(0),(0),(0),(0),(0),(0),(0),(0);
+INSERT INTO t1 VALUES(0,0,0);
+CREATE TABLE t2(c1 int);
+INSERT INTO t2 VALUES(1);
+DELETE t1 FROM t1,t2 WHERE t1.c1=0 AND t1.c2=0;
+SELECT * FROM t1;
+DROP TABLE t1,t2;
diff --git a/storage/rocksdb/mysql-test/rocksdb/include/index_merge_ror_cpk.inc b/storage/rocksdb/mysql-test/rocksdb/include/index_merge_ror_cpk.inc
new file mode 100644
index 00000000000..f0d18a50bff
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/include/index_merge_ror_cpk.inc
@@ -0,0 +1,178 @@
+# include/index_merge_ror_cpk.inc
+#
+# Clustered PK ROR-index_merge tests
+#
+# The variable
+# $engine_type -- storage engine to be tested
+# has to be set before sourcing this script.
+#
+# Note: The comments/expectations refer to InnoDB.
+# They might be not valid for other storage engines.
+#
+# Last update:
+# 2006-08-02 ML test refactored
+# old name was t/index_merge_ror_cpk.test
+# main code went into include/index_merge_ror_cpk.inc
+#
+
+--echo #---------------- Clustered PK ROR-index_merge tests -----------------------------
+
+eval SET SESSION DEFAULT_STORAGE_ENGINE = $engine_type;
+
+--disable_warnings
+drop table if exists t1;
+--enable_warnings
+
+create table t1
+(
+ pk1 int not null,
+ pk2 int not null,
+
+ key1 int not null,
+ key2 int not null,
+
+ pktail1ok int not null,
+ pktail2ok int not null,
+ pktail3bad int not null,
+ pktail4bad int not null,
+ pktail5bad int not null,
+
+ pk2copy int not null,
+ badkey int not null,
+
+ filler1 char (200),
+ filler2 char (200),
+ key (key1),
+ key (key2),
+
+ /* keys with tails from CPK members */
+ key (pktail1ok, pk1),
+ key (pktail2ok, pk1, pk2),
+ key (pktail3bad, pk2, pk1),
+ key (pktail4bad, pk1, pk2copy),
+ key (pktail5bad, pk1, pk2, pk2copy),
+
+ primary key (pk1, pk2)
+);
+
+--disable_query_log
+set autocommit=0;
+let $1=10000;
+while ($1)
+{
+ eval insert into t1 values ($1 div 10,$1 mod 100, $1/100,$1/100, $1/100,$1/100,$1/100,$1/100,$1/100, $1 mod 100, $1/1000,'filler-data-$1','filler2');
+ dec $1;
+}
+set autocommit=1;
+--enable_query_log
+
+-- disable_query_log
+-- disable_result_log
+if ($engine_type == RocksDB)
+{
+ set global rocksdb_force_flush_memtable_now=1;
+}
+analyze table t1;
+-- enable_result_log
+-- enable_query_log
+
+# Verify that range scan on CPK is ROR
+# (use index_intersection because it is impossible to check that for index union)
+# Column 9, rows, can change depending on innodb-page-size.
+--replace_column 9 ROWS
+explain select * from t1 where pk1 = 1 and pk2 < 80 and key1=0;
+# CPK scan + 1 ROR range scan is a special case
+--sorted_result
+select * from t1 where pk1 = 1 and pk2 < 80 and key1=0;
+
+# Verify that CPK fields are considered to be covered by index scans
+explain select pk1,pk2 from t1 where key1 = 10 and key2=10 and 2*pk1+1 < 2*96+1;
+select pk1,pk2 from t1 where key1 = 10 and key2=10 and 2*pk1+1 < 2*96+1;
+
+# Verify that CPK is always used for index intersection scans
+# (this is because it is used as a filter, not for retrieval)
+# The expected number of rows can vary depending on page size
+--replace_column 9 ROWS
+explain select * from t1 where badkey=1 and key1=10;
+# The expected number of rows can vary depending on page size
+--replace_column 9 ROWS
+explain select * from t1 where pk1 < 7500 and key1 = 10;
+
+# Verify that keys with 'tails' of PK members are ok.
+explain select * from t1 where pktail1ok=1 and key1=10;
+explain select * from t1 where pktail2ok=1 and key1=10;
+
+# Note: The following is actually a deficiency, it uses sort_union currently.
+# This comment refers to InnoDB and is probably not valid for other engines.
+# The expected number of rows can vary depending on page size
+--replace_column 9 ROWS
+explain select * from t1 where (pktail2ok=1 and pk1< 50000) or key1=10;
+
+# The expected column used for KEY vary depending on page size
+# The expected number of rows can vary depending on page size and platform
+--replace_column 6 EITHER_KEY 9 ROWS
+explain select * from t1 where pktail3bad=1 and key1=10;
+# The expected column used for KEY vary depending on page size
+--replace_column 9 ROWS
+explain select * from t1 where pktail4bad=1 and key1=10;
+# The expected column used for KEY vary depending on page size
+--replace_column 9 ROWS
+explain select * from t1 where pktail5bad=1 and key1=10;
+
+# Test for problem with innodb key values prefetch buffer:
+explain select pk1,pk2,key1,key2 from t1 where key1 = 10 and key2=10 limit 10;
+select pk1,pk2,key1,key2 from t1 where key1 = 10 and key2=10 limit 10;
+
+drop table t1;
+# Testcase for BUG#4984
+create table t1
+(
+ RUNID varchar(22),
+ SUBMITNR varchar(5),
+ ORDERNR char(1),
+ PROGRAMM varchar(8),
+ TESTID varchar(4),
+ UCCHECK char(1),
+ ETEXT varchar(80),
+ ETEXT_TYPE char(1),
+ INFO char(1),
+ SEVERITY tinyint(3),
+ TADIRFLAG char(1),
+ PRIMARY KEY (RUNID,SUBMITNR,ORDERNR,PROGRAMM,TESTID,UCCHECK),
+ KEY `TVERM~KEY` (PROGRAMM,TESTID,UCCHECK)
+) DEFAULT CHARSET=latin1;
+
+update t1 set `ETEXT` = '', `ETEXT_TYPE`='', `INFO`='', `SEVERITY`='', `TADIRFLAG`=''
+WHERE
+ `RUNID`= '' AND `SUBMITNR`= '' AND `ORDERNR`='' AND `PROGRAMM`='' AND
+ `TESTID`='' AND `UCCHECK`='';
+
+drop table t1;
+
+--echo #
+--echo # Bug#50402 Optimizer producing wrong results when using Index Merge on InnoDB
+--echo #
+CREATE TABLE t1 (f1 INT, PRIMARY KEY (f1));
+INSERT INTO t1 VALUES (2);
+CREATE TABLE t2 (f1 INT, f2 INT, f3 char(1),
+ PRIMARY KEY (f1), KEY (f2), KEY (f3) );
+INSERT INTO t2 VALUES (1, 1, 'h'), (2, 3, 'h'), (3, 2, ''), (4, 2, '');
+
+SELECT t1.f1 FROM t1
+WHERE (SELECT COUNT(*) FROM t2 WHERE t2.f3 = 'h' AND t2.f2 = t1.f1) = 0 AND t1.f1 = 2;
+
+-- disable_query_log
+-- disable_result_log
+if ($engine_type == RocksDB)
+{
+ set global rocksdb_force_flush_memtable_now=1;
+}
+analyze table t1;
+analyze table t2;
+-- enable_result_log
+-- enable_query_log
+
+EXPLAIN SELECT t1.f1 FROM t1
+WHERE (SELECT COUNT(*) FROM t2 WHERE t2.f3 = 'h' AND t2.f2 = t1.f1) = 0 AND t1.f1 = 2;
+
+DROP TABLE t1,t2;
diff --git a/storage/rocksdb/mysql-test/rocksdb/include/locking_issues_case1_1.inc b/storage/rocksdb/mysql-test/rocksdb/include/locking_issues_case1_1.inc
new file mode 100644
index 00000000000..6dc5a78e3a0
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/include/locking_issues_case1_1.inc
@@ -0,0 +1,51 @@
+#
+# Check concurrent locking issues:
+# Locking rows that do not exist when using all primary key columns in a
+# WHERE clause
+#
+# To call this, set $isolation_level and call this file
+#
+# let $isolation_level = REPEATABLE READ;
+# --source suite/rocksdb/include/locking_issues_case1_1.inc
+#
+
+--echo
+--echo -----------------------------------------------------------------------
+--echo - Locking issues case 1.1:
+--echo - Locking rows that do not exist when using all primary key columns in
+--echo - a WHERE clause
+--echo - using $isolation_level transaction isolation level
+--echo -----------------------------------------------------------------------
+
+--disable_warnings
+DROP TABLE IF EXISTS t0;
+--enable_warnings
+
+CREATE TABLE t0(id1 INT, id2 INT, value INT, PRIMARY KEY(id1, id2));
+INSERT INTO t0 VALUES (1,1,0), (3,3,0), (4,4,0), (6,6,0);
+
+connect (con1,localhost,root,,);
+connect (con2,localhost,root,,);
+
+connection con1;
+eval SET SESSION TRANSACTION ISOLATION LEVEL $isolation_level;
+BEGIN;
+SELECT * FROM t0 WHERE id1=1 AND id2=5 FOR UPDATE;
+
+connection con2;
+eval SET SESSION TRANSACTION ISOLATION LEVEL $isolation_level;
+BEGIN;
+--error ER_LOCK_WAIT_TIMEOUT
+INSERT INTO t0 VALUES (1,5,0);
+
+--error ER_LOCK_WAIT_TIMEOUT
+SELECT * FROM t0 WHERE id1=1 AND id2=5 FOR UPDATE;
+
+connection con1;
+COMMIT;
+
+connection default;
+disconnect con1;
+disconnect con2;
+
+DROP TABLE t0;
diff --git a/storage/rocksdb/mysql-test/rocksdb/include/locking_issues_case1_2.inc b/storage/rocksdb/mysql-test/rocksdb/include/locking_issues_case1_2.inc
new file mode 100644
index 00000000000..13083bf82d9
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/include/locking_issues_case1_2.inc
@@ -0,0 +1,48 @@
+#
+# Check concurrent locking issues:
+# Locking rows that do not exist without using all primary key columns in a
+# WHERE clause
+#
+# To call this, set $isolation_level and call this file
+#
+# let $isolation_level = REPEATABLE READ;
+# --source suite/rocksdb/include/locking_issues_case1_2.inc
+#
+
+--echo
+--echo -----------------------------------------------------------------------
+--echo - Locking issues case 1.2:
+--echo - Locking rows that do not exist without using all primary key
+--echo - columns in a WHERE clause
+--echo - using $isolation_level transaction isolation level
+--echo -----------------------------------------------------------------------
+
+--disable_warnings
+DROP TABLE IF EXISTS t0;
+--enable_warnings
+
+CREATE TABLE t0(id1 INT, id2 INT, value INT, PRIMARY KEY(id1, id2));
+INSERT INTO t0 VALUES (1,1,0), (3,3,0), (4,4,0), (6,6,0);
+
+connect (con1,localhost,root,,);
+connect (con2,localhost,root,,);
+
+connection con1;
+eval SET SESSION TRANSACTION ISOLATION LEVEL $isolation_level;
+BEGIN;
+SELECT * FROM t0 WHERE id1=1 FOR UPDATE;
+
+connection con2;
+eval SET SESSION TRANSACTION ISOLATION LEVEL $isolation_level;
+BEGIN;
+SELECT * FROM t0 WHERE id1=1 AND id2=4 FOR UPDATE;
+INSERT INTO t0 VALUES (1,5,0);
+
+connection con1;
+COMMIT;
+
+connection default;
+disconnect con1;
+disconnect con2;
+
+DROP TABLE t0;
diff --git a/storage/rocksdb/mysql-test/rocksdb/include/locking_issues_case2.inc b/storage/rocksdb/mysql-test/rocksdb/include/locking_issues_case2.inc
new file mode 100644
index 00000000000..61c604dd6d3
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/include/locking_issues_case2.inc
@@ -0,0 +1,97 @@
+#
+# Check concurrent locking issues:
+# Rows that are scanned but do not match the WHERE clause are not locked.
+#
+# To call this, set $isolation_level and call this file
+# If you want to enable rocksdb_lock_scanned_rows set $lock_scanned_rows=1
+#
+# let $isolation_level = REPEATABLE READ;
+# let $lock_scanned_rows = 1 (optional)
+# --source suite/rocksdb/include/locking_issues_case2.inc
+#
+
+--echo
+--echo -----------------------------------------------------------------------
+--echo - Locking issues case 2:
+--echo - Rows that are scanned but do not match the WHERE are not locked
+--echo - using $isolation_level transaction isolation level unless
+--echo - rocksdb_lock_scanned_rows is on
+--echo -----------------------------------------------------------------------
+
+--disable_warnings
+DROP TABLE IF EXISTS t0;
+--enable_warnings
+
+SELECT @@global.rocksdb_lock_scanned_rows;
+
+if ($lock_scanned_rows)
+{
+ let $original_val=query_get_value(
+ select @@global.rocksdb_lock_scanned_rows as val, val, 1);
+ SET GLOBAL rocksdb_lock_scanned_rows=ON;
+}
+
+CREATE TABLE t0(id INT PRIMARY KEY, value INT);
+INSERT INTO t0 VALUES (1,0), (2,1), (3,0), (4,0), (5,1);
+
+connect (con1,localhost,root,,);
+connect (con2,localhost,root,,);
+
+connection con1;
+eval SET SESSION TRANSACTION ISOLATION LEVEL $isolation_level;
+BEGIN;
+
+connection con2;
+eval SET SESSION TRANSACTION ISOLATION LEVEL $isolation_level;
+BEGIN;
+
+if ($lock_scanned_rows == 1)
+{
+ connection con1;
+ # This is expected to leave locks on all the rows in t0
+ SELECT * FROM t0 WHERE value > 0 FOR UPDATE;
+
+ connection con2;
+ --error ER_LOCK_WAIT_TIMEOUT
+ UPDATE t0 SET VALUE=10 WHERE id=1;
+}
+
+if ($lock_scanned_rows == 0)
+{
+ connection con1;
+ # This is expected to release locks on rows with value=0
+ SELECT * FROM t0 WHERE value > 0 FOR UPDATE;
+
+ connection con2;
+ # This should succeed as con1 should have released the lock on row (1,0)
+ UPDATE t0 SET VALUE=10 WHERE id=1;
+
+ # This should fail because lock on row (5,1) is still held.
+ --error ER_LOCK_WAIT_TIMEOUT
+ UPDATE t0 SET VALUE=10 WHERE id=5;
+
+ connection con1;
+ # Do another operation
+ UPDATE t0 SET value=100 WHERE id in (4,5) and value>0;
+
+ connection con2;
+ # Check that row (4,0) is still not locked
+ SELECT * FROM t0 WHERE id=4 FOR UPDATE;
+
+ COMMIT;
+ SELECT * FROM t0;
+}
+
+connection con1;
+COMMIT;
+
+connection default;
+disconnect con1;
+disconnect con2;
+
+DROP TABLE t0;
+
+if ($lock_scanned_rows == 1)
+{
+ eval SET GLOBAL rocksdb_lock_scanned_rows=$original_val;
+}
diff --git a/storage/rocksdb/mysql-test/rocksdb/include/locking_issues_case3.inc b/storage/rocksdb/mysql-test/rocksdb/include/locking_issues_case3.inc
new file mode 100644
index 00000000000..34947cb0ecb
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/include/locking_issues_case3.inc
@@ -0,0 +1,71 @@
+#
+# Check concurrent locking issues:
+# After creating a snapshot, other clients updating rows
+#
+# To call this, set $isolation_level and call this file
+#
+# let $isolation_level = REPEATABLE READ;
+# --source suite/rocksdb/include/locking_issues_case3.inc
+#
+
+--echo
+--echo -----------------------------------------------------------------------
+--echo - Locking issues case 3:
+--echo - After creating a snapshot, other clients updating rows
+--echo - using $isolation_level transaction isolation level
+--echo -----------------------------------------------------------------------
+
+--disable_warnings
+DROP TABLE IF EXISTS t0;
+--enable_warnings
+
+CREATE TABLE t0(id INT AUTO_INCREMENT PRIMARY KEY, value INT);
+
+# Insert 200,000 rows, breaking it up into inserts of 1000 rows at a time
+--echo Inserting 200,000 rows
+--disable_query_log
+SET @save_rocksdb_bulk_load=@@rocksdb_bulk_load;
+SET rocksdb_bulk_load=1;
+SET @save_rocksdb_write_disable_wal=@@rocksdb_write_disable_wal;
+SET GLOBAL rocksdb_write_disable_wal=1;
+let $i = 1;
+while ($i <= 200) {
+ eval BEGIN;
+ let $j = 1;
+ while ($j <= 100) {
+ eval INSERT INTO t0(value) VALUES (0),(0),(0),(0),(0),(0),(0),(0),(0),(0);
+ inc $j;
+ }
+ eval COMMIT;
+ inc $i;
+}
+SET rocksdb_bulk_load=@save_rocksdb_bulk_load;
+SET GLOBAL rocksdb_write_disable_wal=@save_rocksdb_write_disable_wal;
+--enable_query_log
+
+connect (con1,localhost,root,,);
+connect (con2,localhost,root,,);
+
+connection con1;
+eval SET SESSION TRANSACTION ISOLATION LEVEL $isolation_level;
+let $ID = `SELECT connection_id()`;
+send SELECT * FROM t0 WHERE value > 0 FOR UPDATE;
+
+connection con2;
+let $wait_condition =
+ SELECT 1 FROM information_schema.processlist
+ WHERE (id = $ID/* OR srv_id = $ID*/) AND state = "Sending data";
+--source include/wait_condition.inc
+eval SET SESSION TRANSACTION ISOLATION LEVEL $isolation_level;
+UPDATE t0 SET VALUE=VALUE+1 WHERE id=190000;
+
+connection con1;
+--error 0,ER_LOCK_DEADLOCK
+reap;
+--echo ERROR: $mysql_errno
+
+connection default;
+disconnect con1;
+disconnect con2;
+
+DROP TABLE t0;
diff --git a/storage/rocksdb/mysql-test/rocksdb/include/locking_issues_case4.inc b/storage/rocksdb/mysql-test/rocksdb/include/locking_issues_case4.inc
new file mode 100644
index 00000000000..8140b81a95e
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/include/locking_issues_case4.inc
@@ -0,0 +1,69 @@
+#
+# Check concurrent locking issues:
+# Phantom rows
+#
+# To call this, set $isolation_level and call this file
+#
+# let $isolation_level = REPEATABLE READ;
+# --source suite/rocksdb/include/locking_issues_case4.inc
+#
+
+--echo
+--echo -----------------------------------------------------------------------
+--echo - Locking issues case 4:
+--echo - Phantom rows
+--echo - using $isolation_level transaction isolation level
+--echo -----------------------------------------------------------------------
+
+--disable_warnings
+DROP TABLE IF EXISTS t0;
+--enable_warnings
+
+CREATE TABLE t0(id INT AUTO_INCREMENT PRIMARY KEY, value INT);
+
+# Insert 200,000 rows, breaking it up into inserts of 1000 rows at a time
+--echo Inserting 200,000 rows
+--disable_query_log
+SET @save_rocksdb_bulk_load=@@rocksdb_bulk_load;
+SET rocksdb_bulk_load=1;
+SET @save_rocksdb_write_disable_wal=@@rocksdb_write_disable_wal;
+SET GLOBAL rocksdb_write_disable_wal=1;
+let $i = 1;
+while ($i <= 200) {
+ eval BEGIN;
+ let $j = 1;
+ while ($j <= 100) {
+ eval INSERT INTO t0(value) VALUES (0),(0),(0),(0),(0),(0),(0),(0),(0),(0);
+ inc $j;
+ }
+ eval COMMIT;
+ inc $i;
+}
+SET rocksdb_bulk_load=@save_rocksdb_bulk_load;
+SET GLOBAL rocksdb_write_disable_wal=@save_rocksdb_write_disable_wal;
+--enable_query_log
+
+connect (con1,localhost,root,,);
+connect (con2,localhost,root,,);
+
+connection con1;
+eval SET SESSION TRANSACTION ISOLATION LEVEL $isolation_level;
+let $ID = `SELECT connection_id()`;
+send SELECT * FROM t0 WHERE value > 0 FOR UPDATE;
+
+connection con2;
+let $wait_condition =
+ SELECT 1 FROM information_schema.processlist
+ WHERE (id = $ID/* OR srv_id = $ID*/) AND state = "Sending data";
+--source include/wait_condition.inc
+eval SET SESSION TRANSACTION ISOLATION LEVEL $isolation_level;
+INSERT INTO t0 VALUES(200001,1), (-1,1);
+
+connection con1;
+reap;
+
+connection default;
+disconnect con1;
+disconnect con2;
+
+DROP TABLE t0;
diff --git a/storage/rocksdb/mysql-test/rocksdb/include/locking_issues_case5.inc b/storage/rocksdb/mysql-test/rocksdb/include/locking_issues_case5.inc
new file mode 100644
index 00000000000..e28f1c90b3a
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/include/locking_issues_case5.inc
@@ -0,0 +1,77 @@
+#
+# Check concurrent locking issues:
+# Deleting primary key
+#
+# To call this, set $isolation_level and call this file
+#
+# let $isolation_level = REPEATABLE READ;
+# --source suite/rocksdb/include/locking_issues_case5.inc
+#
+
+--echo
+--echo -----------------------------------------------------------------------
+--echo - Locking issues case 5:
+--echo - Deleting primary key
+--echo - using $isolation_level transaction isolation level
+--echo -----------------------------------------------------------------------
+
+--disable_warnings
+DROP TABLE IF EXISTS t0;
+--enable_warnings
+
+CREATE TABLE t0(id INT AUTO_INCREMENT PRIMARY KEY, value INT);
+
+# Insert 200,000 rows, breaking it up into inserts of 1000 rows at a time
+--echo Inserting 200,000 rows
+--disable_query_log
+SET @save_rocksdb_bulk_load=@@rocksdb_bulk_load;
+SET rocksdb_bulk_load=1;
+SET @save_rocksdb_write_disable_wal=@@rocksdb_write_disable_wal;
+SET GLOBAL rocksdb_write_disable_wal=1;
+let $i = 1;
+while ($i <= 200) {
+ eval BEGIN;
+ let $j = 1;
+ while ($j <= 100) {
+ eval INSERT INTO t0(value) VALUES (0),(0),(0),(0),(0),(0),(0),(0),(0),(0);
+ inc $j;
+ }
+ eval COMMIT;
+ inc $i;
+}
+SET rocksdb_bulk_load=@save_rocksdb_bulk_load;
+SET GLOBAL rocksdb_write_disable_wal=@save_rocksdb_write_disable_wal;
+--enable_query_log
+
+UPDATE t0 SET value=100 WHERE id=190000;
+
+connect (con1,localhost,root,,);
+connect (con2,localhost,root,,);
+
+connection con1;
+eval SET SESSION TRANSACTION ISOLATION LEVEL $isolation_level;
+BEGIN;
+let $ID = `SELECT connection_id()`;
+send SELECT * FROM t0 WHERE value > 0 FOR UPDATE;
+
+connection con2;
+let $wait_condition =
+ SELECT 1 FROM information_schema.processlist
+ WHERE (id = $ID /* OR srv_id = $ID*/) AND state = "Sending data";
+--source include/wait_condition.inc
+eval SET SESSION TRANSACTION ISOLATION LEVEL $isolation_level;
+BEGIN;
+DELETE FROM t0 WHERE id=190000;
+COMMIT;
+
+connection con1;
+--error 0,ER_LOCK_DEADLOCK
+reap;
+--echo ERROR: $mysql_errno
+COMMIT;
+
+connection default;
+disconnect con1;
+disconnect con2;
+
+DROP TABLE t0;
diff --git a/storage/rocksdb/mysql-test/rocksdb/include/locking_issues_case6.inc b/storage/rocksdb/mysql-test/rocksdb/include/locking_issues_case6.inc
new file mode 100644
index 00000000000..13ceca07913
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/include/locking_issues_case6.inc
@@ -0,0 +1,77 @@
+#
+# Check concurrent locking issues:
+# Changing primary key
+#
+# To call this, set $isolation_level and call this file
+#
+# let $isolation_level = REPEATABLE READ;
+# --source suite/rocksdb/include/locking_issues_case6.inc
+#
+
+--echo
+--echo -----------------------------------------------------------------------
+--echo - Locking issues case 6:
+--echo - Changing primary key
+--echo - using $isolation_level transaction isolation level
+--echo -----------------------------------------------------------------------
+
+--disable_warnings
+DROP TABLE IF EXISTS t0;
+--enable_warnings
+
+CREATE TABLE t0(id INT AUTO_INCREMENT PRIMARY KEY, value INT);
+
+# Insert 200,000 rows, breaking it up into inserts of 1000 rows at a time
+--echo Inserting 200,000 rows
+--disable_query_log
+SET @save_rocksdb_bulk_load=@@rocksdb_bulk_load;
+SET rocksdb_bulk_load=1;
+SET @save_rocksdb_write_disable_wal=@@rocksdb_write_disable_wal;
+SET GLOBAL rocksdb_write_disable_wal=1;
+let $i = 1;
+while ($i <= 200) {
+ eval BEGIN;
+ let $j = 1;
+ while ($j <= 100) {
+ eval INSERT INTO t0(value) VALUES (0),(0),(0),(0),(0),(0),(0),(0),(0),(0);
+ inc $j;
+ }
+ eval COMMIT;
+ inc $i;
+}
+SET rocksdb_bulk_load=@save_rocksdb_bulk_load;
+SET GLOBAL rocksdb_write_disable_wal=@save_rocksdb_write_disable_wal;
+--enable_query_log
+
+UPDATE t0 SET value=100 WHERE id=190000;
+
+connect (con1,localhost,root,,);
+connect (con2,localhost,root,,);
+
+connection con1;
+eval SET SESSION TRANSACTION ISOLATION LEVEL $isolation_level;
+BEGIN;
+let $ID = `SELECT connection_id()`;
+send SELECT * FROM t0 WHERE value > 0 FOR UPDATE;
+
+connection con2;
+let $wait_condition =
+ SELECT 1 FROM information_schema.processlist
+ WHERE (id = $ID/* OR srv_id = $ID*/) AND state = "Sending data";
+--source include/wait_condition.inc
+eval SET SESSION TRANSACTION ISOLATION LEVEL $isolation_level;
+BEGIN;
+UPDATE t0 SET id=200001 WHERE id=190000;
+COMMIT;
+
+connection con1;
+--error 0,ER_LOCK_DEADLOCK
+reap;
+--echo ERROR: $mysql_errno
+COMMIT;
+
+connection default;
+disconnect con1;
+disconnect con2;
+
+DROP TABLE t0;
diff --git a/storage/rocksdb/mysql-test/rocksdb/include/locking_issues_case7.inc b/storage/rocksdb/mysql-test/rocksdb/include/locking_issues_case7.inc
new file mode 100644
index 00000000000..d71d398982e
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/include/locking_issues_case7.inc
@@ -0,0 +1,89 @@
+#
+# Check concurrent locking issues:
+# Rows scanned but are not in the updated table should be locked when
+# rocksdb_lock_scanned_rows is on but not locked otherwise.
+#
+# To call this, set $isolation_level and $lock_scanned_rows and call this file
+#
+# let $isolation_level = REPEATABLE READ;
+# let $lock_scanned_rows = 0 (or 1)
+# --source suite/rocksdb/include/locking_issues_case7.inc
+#
+
+--echo
+--echo -----------------------------------------------------------------------
+--echo - Locking issues case 7:
+--echo - Rows that are scanned as part of a query but not in the table being
+--echo - updated should not be locked unless rocksdb_lock_scanned_rows is on
+--echo -----------------------------------------------------------------------
+
+--disable_warnings
+DROP TABLE IF EXISTS t1, t2;
+--enable_warnings
+
+SELECT @@global.rocksdb_lock_scanned_rows;
+
+if ($lock_scanned_rows)
+{
+ let $original_val=query_get_value(
+ select @@global.rocksdb_lock_scanned_rows as val, val, 1);
+ SET GLOBAL rocksdb_lock_scanned_rows=ON;
+}
+
+CREATE TABLE t1(id INT PRIMARY KEY, value INT);
+CREATE TABLE t2(id INT PRIMARY KEY, value INT);
+INSERT INTO t1 VALUES (1,1), (2,2), (3,3);
+INSERT INTO t2 VALUES (1,1), (2,2), (3,3), (4,4), (5,5);
+
+connect (con1,localhost,root,,);
+connect (con2,localhost,root,,);
+
+connection con1;
+eval SET SESSION TRANSACTION ISOLATION LEVEL $isolation_level;
+BEGIN;
+
+connection con2;
+eval SET SESSION TRANSACTION ISOLATION LEVEL $isolation_level;
+BEGIN;
+
+--echo lock_scanned_rows is $lock_scanned_rows
+if ($lock_scanned_rows == 1)
+{
+ connection con1;
+ # This is expected to leave a lock id=3 in t2;
+ UPDATE t1 JOIN t2 ON t1.id = t2.id SET t1.value=t1.value+100 WHERE t2.id=3;
+
+ connection con2;
+ --error ER_LOCK_WAIT_TIMEOUT
+ UPDATE t2 SET value=value+100 WHERE id=3;
+
+ # No other row in t2 should be locked;
+ UPDATE t2 SET value=value+100 WHERE id IN (1,2,4,5);
+ SELECT * FROM t2;
+}
+
+if ($lock_scanned_rows == 0)
+{
+ connection con1;
+ # This should leave no locks on any row in t2;
+ UPDATE t1 JOIN t2 ON t1.id = t2.id SET t1.value=t1.value+100 WHERE t2.id=3;
+
+ connection con2;
+ UPDATE t2 SET value=value+100;
+ SELECT * FROM t2;
+}
+
+connection con1;
+COMMIT;
+
+connection default;
+disconnect con1;
+disconnect con2;
+
+DROP TABLE t1;
+DROP TABLE t2;
+
+if ($lock_scanned_rows == 1)
+{
+ eval SET GLOBAL rocksdb_lock_scanned_rows=$original_val;
+}
diff --git a/storage/rocksdb/mysql-test/rocksdb/include/prefix_index_only_query_check.inc b/storage/rocksdb/mysql-test/rocksdb/include/prefix_index_only_query_check.inc
new file mode 100644
index 00000000000..e96eb573c1f
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/include/prefix_index_only_query_check.inc
@@ -0,0 +1,21 @@
+#
+# A helper include file for prefix index index-only query tests
+#
+# Parameters:
+# $prefix_index_check_title - title of the test
+# $prefix_index_check_query - test query
+# $prefix_index_check_read_avoided_delta - expected change of
+# 'rocksdb_covered_secondary_key_lookups' status variable
+# value after running the query
+
+--let $show_count_statement = show status like 'rocksdb_covered_secondary_key_lookups'
+
+--echo # $prefix_index_check_title
+--let $base_count = query_get_value($show_count_statement, Value, 1)
+
+--eval $prefix_index_check_query
+
+--let $count = query_get_value($show_count_statement, Value, 1)
+--let $assert_text= $prefix_index_check_title: $prefix_index_check_read_avoided_delta rocksdb_covered_secondary_key_lookups
+--let $assert_cond= $count - $base_count = $prefix_index_check_read_avoided_delta
+--source include/assert.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb/include/restart_mysqld_with_invalid_option.inc b/storage/rocksdb/mysql-test/rocksdb/include/restart_mysqld_with_invalid_option.inc
new file mode 100644
index 00000000000..8eef7ed2162
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/include/restart_mysqld_with_invalid_option.inc
@@ -0,0 +1,8 @@
+--source include/shutdown_mysqld.inc
+
+# Expect the server to fail to come up with these options
+--error 1
+--exec $MYSQLD_CMD --plugin_load=$HA_ROCKSDB_SO $_mysqld_option
+
+# Restart the server with the default options
+--source include/start_mysqld.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb/include/restart_mysqld_with_option.inc b/storage/rocksdb/mysql-test/rocksdb/include/restart_mysqld_with_option.inc
new file mode 100644
index 00000000000..4250b368b1a
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/include/restart_mysqld_with_option.inc
@@ -0,0 +1,31 @@
+
+if ($rpl_inited)
+{
+ if (!$allow_rpl_inited)
+ {
+ --die ERROR IN TEST: This script does not support replication
+ }
+}
+
+# Write file to make mysql-test-run.pl expect the "crash", but don't start
+# it until it's told to
+--let $_server_id= `SELECT @@server_id`
+--let $_expect_file_name= $MYSQLTEST_VARDIR/tmp/mysqld.$_server_id.expect
+--exec echo "wait" > $_expect_file_name
+
+# Send shutdown to the connected server and give
+# it 10 seconds to die before zapping it
+shutdown_server 10;
+
+# Write file to make mysql-test-run.pl start up the server again
+--exec echo "restart:$_mysqld_option" > $_expect_file_name
+
+# Turn on reconnect
+--enable_reconnect
+
+# Call script that will poll the server waiting for it to be back online again
+--source include/wait_until_connected_again.inc
+
+# Turn off reconnect again
+--disable_reconnect
+
diff --git a/storage/rocksdb/mysql-test/rocksdb/include/rocksdb_icp.inc b/storage/rocksdb/mysql-test/rocksdb/include/rocksdb_icp.inc
new file mode 100644
index 00000000000..c76b52d4cc1
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/include/rocksdb_icp.inc
@@ -0,0 +1,199 @@
+#
+# Testing Index Condition Pushdown for MyRocks
+# Test file parameter: $cf_name specifies the CF to store test data in
+# It can be forward or reverse-ordered CF
+#
+select * from information_schema.engines where engine = 'rocksdb';
+
+--disable_warnings
+drop table if exists t0,t1,t2,t3;
+--enable_warnings
+create table t0 (a int) engine=myisam;
+insert into t0 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
+
+create table t1(a int) engine=myisam;
+insert into t1 select A.a + B.a* 10 + C.a * 100 from t0 A, t0 B, t0 C;
+
+eval
+create table t2 (
+ pk int primary key,
+ kp1 int,
+ kp2 int,
+ col1 int,
+ key (kp1,kp2) comment '$cf_name'
+) engine=rocksdb;
+
+insert into t2 select a,a,a,a from t1;
+
+--echo # Try a basic case:
+--replace_column 9 #
+explain
+select * from t2 where kp1 between 1 and 10 and mod(kp2,2)=0;
+select * from t2 where kp1 between 1 and 10 and mod(kp2,2)=0;
+
+--echo # Check that ICP doesnt work for columns where column value
+--echo # cant be restored from mem-comparable form:
+
+eval
+create table t3 (
+ pk int primary key,
+ kp1 int,
+ kp2 varchar(10) collate utf8_general_ci,
+ col1 int,
+ key (kp1,kp2) comment '$cf_name'
+) engine=rocksdb;
+
+insert into t3 select a,a/10,a,a from t1;
+--echo # This must not use ICP:
+--replace_column 9 #
+explain
+select * from t3 where kp1=3 and kp2 like '%foo%';
+
+--replace_column 9 #
+explain format=json
+select * from t3 where kp1 between 2 and 4 and mod(kp1,3)=0 and kp2 like '%foo%';
+
+--echo # Check that we handle the case where out-of-range is encountered sooner
+--echo # than matched index condition
+--replace_column 9 #
+explain
+select * from t2 where kp1< 3 and kp2+1>50000;
+select * from t2 where kp1< 3 and kp2+1>50000;
+
+--replace_column 9 #
+explain
+select * from t2 where kp1< 3 and kp2+1>50000;
+select * from t2 where kp1< 3 and kp2+1>50000;
+
+--echo # Try doing backwards scans
+--echo # MariaDB: ICP is not supported for reverse scans.
+
+--replace_column 9 #
+explain
+select * from t2 where kp1 between 1 and 10 and mod(kp2,2)=0 order by kp1 desc;
+select * from t2 where kp1 between 1 and 10 and mod(kp2,2)=0 order by kp1 desc;
+
+--replace_column 9 #
+explain
+select * from t2 where kp1 >990 and mod(kp2,2)=0 order by kp1 desc;
+select * from t2 where kp1 >990 and mod(kp2,2)=0 order by kp1 desc;
+
+--replace_column 9 #
+explain
+select * from t2 where kp1< 3 and kp2+1>50000 order by kp1 desc;
+select * from t2 where kp1< 3 and kp2+1>50000 order by kp1 desc;
+
+drop table t0,t1,t2,t3;
+
+--echo #
+--echo # Check how ICP affects counters
+--echo #
+--echo # First, some preparations
+--echo #
+--echo # in facebook/mysql-5.6, it was:
+--echo # select ROWS_READ, ROWS_REQUESTED, ROWS_INDEX_FIRST, ROWS_INDEX_NEXT
+--echo #
+--echo # In MariaDB, we do:
+delimiter |;
+create procedure save_read_stats()
+begin
+ set @rr=(select ROWS_READ
+ from information_schema.table_statistics
+ where table_name='t4' and table_schema=database());
+
+ set @rif= (select VARIABLE_VALUE
+ from information_schema.session_status
+ where VARIABLE_NAME='Handler_read_first');
+
+ set @rin=(select VARIABLE_VALUE
+ from information_schema.session_status
+ where VARIABLE_NAME='Handler_read_next');
+
+ set @icp_attempts=(select VARIABLE_VALUE
+ from information_schema.session_status
+ where VARIABLE_NAME='Handler_icp_attempts');
+
+ set @icp_matches=(select VARIABLE_VALUE
+ from information_schema.session_status
+ where VARIABLE_NAME='Handler_icp_match');
+end|
+
+create procedure get_read_stats()
+begin
+ select
+ (select ROWS_READ
+ from information_schema.table_statistics
+ where table_name='t4' and table_schema=database()
+ ) - @rr as ROWS_READ_DIFF,
+
+ (select VARIABLE_VALUE - @rif
+ from information_schema.session_status
+ where VARIABLE_NAME='Handler_read_first') as ROWS_INDEX_FIRST,
+
+ (select VARIABLE_VALUE - @rin
+ from information_schema.session_status
+ where VARIABLE_NAME='Handler_read_next') as ROWS_INDEX_NEXT,
+
+ (select VARIABLE_VALUE - @icp_attempts
+ from information_schema.session_status
+ where VARIABLE_NAME='Handler_icp_attempts') as ICP_ATTEMPTS,
+
+ (select VARIABLE_VALUE - @icp_matches
+ from information_schema.session_status
+ where VARIABLE_NAME='Handler_icp_match') as ICP_MATCHES;
+end|
+
+delimiter ;|
+
+eval
+create table t4 (
+ id int,
+ id1 int,
+ id2 int,
+ value int,
+ value2 varchar(100),
+ primary key (id),
+ key id1_id2 (id1, id2) comment '$cf_name'
+) engine=rocksdb charset=latin1 collate latin1_bin;
+
+insert into t4 values
+(1,1,1,1,1), (2,1,2,2,2), (3,1,3,3,3),(4,1,4,4,4),(5,1,5,5,5),
+(6,1,6,6,6), (7,1,7,7,7), (8,1,8,8,8),(9,1,9,9,9),(10,1,10,10,10);
+
+--echo #
+--echo # Now, the test itself
+--echo #
+call save_read_stats();
+call get_read_stats();
+
+
+--echo # ============== index-only query ==============
+--replace_column 9 #
+explain
+select id1,id2 from t4 force index (id1_id2) where id1=1 and id2 % 10 = 1;
+call save_read_stats();
+select id1,id2 from t4 force index (id1_id2) where id1=1 and id2 % 10 = 1;
+query_vertical call get_read_stats();
+
+--echo # ============== Query without ICP ==============
+set optimizer_switch='index_condition_pushdown=off';
+--replace_column 9 #
+explain
+select * from t4 force index (id1_id2) where id1=1 and id2 % 10 = 1;
+call save_read_stats();
+select * from t4 force index (id1_id2) where id1=1 and id2 % 10 = 1;
+query_vertical call get_read_stats();
+
+--echo # ============== Query with ICP ==============
+set optimizer_switch='index_condition_pushdown=on';
+--replace_column 9 #
+explain
+select * from t4 force index (id1_id2) where id1=1 and id2 % 10 = 1;
+call save_read_stats();
+select * from t4 force index (id1_id2) where id1=1 and id2 % 10 = 1;
+query_vertical call get_read_stats();
+
+drop table t4;
+drop procedure save_read_stats;
+drop procedure get_read_stats;
+
diff --git a/storage/rocksdb/mysql-test/rocksdb/include/simple_deadlock.inc b/storage/rocksdb/mysql-test/rocksdb/include/simple_deadlock.inc
new file mode 100644
index 00000000000..0afdfea76db
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/include/simple_deadlock.inc
@@ -0,0 +1,29 @@
+connection con1;
+begin;
+select * from t where i=1 for update;
+
+connection con2;
+begin;
+select * from t where i=2 for update;
+
+connection con1;
+--send select * from t where i=2 for update
+
+connection con2;
+if ($engine == "rocksdb"){
+ let $wait_condition = select count(*) = 1 from information_schema.rocksdb_trx
+ where thread_id = $con1 and waiting_key != "";
+}
+if ($engine == "innodb"){
+ let $wait_condition = select count(*) = 1 from information_schema.innodb_trx
+ where trx_mysql_thread_id = $con1 and trx_state="LOCK WAIT";
+}
+--source include/wait_condition.inc
+
+--error ER_LOCK_DEADLOCK
+select * from t where i=1 for update;
+rollback;
+
+connection con1;
+--reap
+rollback;
diff --git a/storage/rocksdb/mysql-test/rocksdb/include/start_mysqld_with_option.inc b/storage/rocksdb/mysql-test/rocksdb/include/start_mysqld_with_option.inc
new file mode 100644
index 00000000000..73e30b3e46c
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/include/start_mysqld_with_option.inc
@@ -0,0 +1,14 @@
+# Include this script only after using shutdown_mysqld.inc
+# where $_expect_file_name was initialized.
+# Write file to make mysql-test-run.pl start up the server again
+--exec echo "restart:$_mysqld_option" > $_expect_file_name
+
+# Turn on reconnect
+--enable_reconnect
+
+# Call script that will poll the server waiting for it to be back online again
+--source include/wait_until_connected_again.inc
+
+# Turn off reconnect again
+--disable_reconnect
+
diff --git a/storage/rocksdb/mysql-test/rocksdb/include/use_direct_io_option.inc b/storage/rocksdb/mysql-test/rocksdb/include/use_direct_io_option.inc
new file mode 100644
index 00000000000..da16e1c9c3b
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/include/use_direct_io_option.inc
@@ -0,0 +1,23 @@
+# Common test pattern for options that control direct i/o
+#
+# Required input:
+# $io_option - name and assignment to enable on server command line
+
+--source include/have_direct_io.inc
+
+--echo Checking direct reads
+--let $_mysqld_option=$io_option
+--source include/restart_mysqld_with_option.inc
+
+CREATE TABLE t1 (pk INT PRIMARY KEY DEFAULT '0', a INT(11), b CHAR(8)) ENGINE=rocksdb;
+SHOW CREATE TABLE t1;
+INSERT INTO t1 VALUES (1, 1,'a');
+INSERT INTO t1 (a,b) VALUES (2,'b');
+set global rocksdb_force_flush_memtable_now=1;
+--sorted_result
+SELECT a,b FROM t1;
+DROP TABLE t1;
+
+# cleanup
+--let _$mysqld_option=
+--source include/restart_mysqld.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb/my.cnf b/storage/rocksdb/mysql-test/rocksdb/my.cnf
new file mode 100644
index 00000000000..ab89713cc58
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/my.cnf
@@ -0,0 +1,19 @@
+!include include/default_my.cnf
+
+[server]
+skip-innodb
+default-storage-engine=rocksdb
+
+sql-mode=NO_ENGINE_SUBSTITUTION
+explicit-defaults-for-timestamp=1
+loose-rocksdb_lock_wait_timeout=1
+loose-rocksdb_strict_collation_check=0
+
+# MariaDB: speed up the tests:
+loose-rocksdb-flush-log-at-trx-commit=0
+
+loose-rocksdb_force_compute_memtable_stats_cachetime=0
+
+# The following is to get rid of the harmless
+# "Deadlock found when trying to get lock" errors, see MDEV-12285.
+log-warnings=1
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/1st.result b/storage/rocksdb/mysql-test/rocksdb/r/1st.result
new file mode 100644
index 00000000000..323b614ea36
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/1st.result
@@ -0,0 +1,22 @@
+DROP TABLE IF EXISTS t1;
+CREATE TABLE t1 (pk INT PRIMARY KEY DEFAULT '0', a INT(11), b CHAR(8)) ENGINE=rocksdb;
+SHOW CREATE TABLE t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `pk` int(11) NOT NULL DEFAULT 0,
+ `a` int(11) DEFAULT NULL,
+ `b` char(8) DEFAULT NULL,
+ PRIMARY KEY (`pk`)
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+SHOW COLUMNS IN t1;
+Field Type Null Key Default Extra
+pk int(11) NO PRI 0
+a int(11) YES NULL
+b char(8) YES NULL
+INSERT INTO t1 VALUES (1, 1,'a');
+INSERT INTO t1 (a,b) VALUES (2,'b');
+SELECT a,b FROM t1;
+a b
+1 a
+2 b
+DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/2pc_group_commit.result b/storage/rocksdb/mysql-test/rocksdb/r/2pc_group_commit.result
new file mode 100644
index 00000000000..a6bb9eb64c7
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/2pc_group_commit.result
@@ -0,0 +1,101 @@
+# Disable for valgrind because this takes too long
+DROP DATABASE IF EXISTS mysqlslap;
+CREATE DATABASE mysqlslap;
+USE mysqlslap;
+CREATE TABLE t1(id BIGINT AUTO_INCREMENT, value BIGINT, PRIMARY KEY(id)) ENGINE=rocksdb;
+SET @save_rocksdb_enable_2pc= @@rocksdb_enable_2pc;
+SET @save_rocksdb_flush_log_at_trx_commit= @@rocksdb_flush_log_at_trx_commit;
+# 2PC enabled, MyRocks durability enabled
+SET GLOBAL rocksdb_enable_2pc=1;
+SET GLOBAL rocksdb_flush_log_at_trx_commit=1;
+##
+## 2PC + durability + single thread
+##
+select variable_value into @b1 from information_schema.global_status where variable_name='Binlog_commits';
+select variable_value into @b2 from information_schema.global_status where variable_name='Binlog_group_commits';
+select variable_value into @b3 from information_schema.global_status where variable_name='Rocksdb_wal_synced';
+select IF(variable_value - @b1 = 1000, 'OK', variable_value - @b1) as Binlog_commits
+from information_schema.global_status where variable_name='Binlog_commits';
+Binlog_commits
+OK
+select IF(variable_value - @b2 = 1000, 'OK', variable_value - @b2) as Binlog_group_commits
+from information_schema.global_status where variable_name='Binlog_group_commits';
+Binlog_group_commits
+OK
+# Prepare operations sync, commits don't. We expect slightly more than 1K syncs:
+select IF(variable_value - @b3 between 1000 and 1500, 'OK', variable_value - @b3) as Rocksdb_wal_synced
+from information_schema.global_status where variable_name='Rocksdb_wal_synced';
+Rocksdb_wal_synced
+OK
+set debug_dbug='+d,rocksdb_enable_delay_commits';
+create table dummy10(a int) engine=rocksdb;
+Warnings:
+Warning 1210 enable_delay_commits_mode ON
+drop table dummy10;
+set debug_dbug='-d,rocksdb_enable_delay_commits';
+##
+## 2PC + durability + group commit
+##
+select variable_value into @b1 from information_schema.global_status where variable_name='Binlog_commits';
+select variable_value into @b2 from information_schema.global_status where variable_name='Binlog_group_commits';
+select variable_value into @b3 from information_schema.global_status where variable_name='Rocksdb_wal_synced';
+select IF(variable_value - @b1 = 10000, 'OK', variable_value - @b1) as Binlog_commits
+from information_schema.global_status where variable_name='Binlog_commits';
+Binlog_commits
+OK
+select IF(variable_value - @b2 between 100 and 5000, 'OK', variable_value - @b2) as Binlog_group_commits
+from information_schema.global_status where variable_name='Binlog_group_commits';
+Binlog_group_commits
+OK
+select IF(variable_value - @b3 between 1 and 9000, 'OK', variable_value - @b3)
+from information_schema.global_status where variable_name='Rocksdb_wal_synced';
+IF(variable_value - @b3 between 1 and 9000, 'OK', variable_value - @b3)
+OK
+set debug_dbug='+d,rocksdb_disable_delay_commits';
+create table dummy10(a int) engine=rocksdb;
+Warnings:
+Warning 1210 enable_delay_commits_mode OFF
+drop table dummy10;
+set debug_dbug='-d,rocksdb_disable_delay_commits';
+##
+# 2PC enabled, MyRocks durability disabled, single thread
+##
+SET GLOBAL rocksdb_enable_2pc=1;
+SET GLOBAL rocksdb_flush_log_at_trx_commit=0;
+select variable_value into @b1 from information_schema.global_status where variable_name='Binlog_commits';
+select variable_value into @b2 from information_schema.global_status where variable_name='Binlog_group_commits';
+select variable_value into @b3 from information_schema.global_status where variable_name='Rocksdb_wal_synced';
+select IF(variable_value - @b1 = 1000, 'OK', variable_value - @b1) as Binlog_commits
+from information_schema.global_status where variable_name='Binlog_commits';
+Binlog_commits
+OK
+select IF(variable_value - @b2 = 1000, 'OK', variable_value - @b2) as Binlog_group_commits
+from information_schema.global_status where variable_name='Binlog_group_commits';
+Binlog_group_commits
+OK
+select IF(variable_value - @b3 < 10, 'OK', variable_value - @b3)
+from information_schema.global_status where variable_name='Rocksdb_wal_synced';
+IF(variable_value - @b3 < 10, 'OK', variable_value - @b3)
+OK
+##
+# 2PC enabled, MyRocks durability disabled, concurrent workload
+##
+select variable_value into @b1 from information_schema.global_status where variable_name='Binlog_commits';
+select variable_value into @b2 from information_schema.global_status where variable_name='Binlog_group_commits';
+select variable_value into @b3 from information_schema.global_status where variable_name='Rocksdb_wal_synced';
+select IF(variable_value - @b1 = 10000, 'OK', variable_value - @b1) as Binlog_commits
+from information_schema.global_status where variable_name='Binlog_commits';
+Binlog_commits
+OK
+select IF(variable_value - @b2 < 8000, 'OK', variable_value - @b2) as Binlog_group_commits
+from information_schema.global_status where variable_name='Binlog_group_commits';
+Binlog_group_commits
+OK
+select IF(variable_value - @b3 < 10, 'OK', variable_value - @b3)
+from information_schema.global_status where variable_name='Rocksdb_wal_synced';
+IF(variable_value - @b3 < 10, 'OK', variable_value - @b3)
+OK
+SET GLOBAL rocksdb_enable_2pc= @save_rocksdb_enable_2pc;
+SET GLOBAL rocksdb_flush_log_at_trx_commit= @save_rocksdb_flush_log_at_trx_commit;
+DROP TABLE t1;
+DROP DATABASE mysqlslap;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/add_index_inplace.result b/storage/rocksdb/mysql-test/rocksdb/r/add_index_inplace.result
new file mode 100644
index 00000000000..32c0537c780
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/add_index_inplace.result
@@ -0,0 +1,489 @@
+drop table if exists t1;
+CREATE TABLE t1 (a INT, b INT, KEY ka(a), KEY kab(a,b)) ENGINE=RocksDB;
+INSERT INTO t1 (a, b) VALUES (1, 5);
+INSERT INTO t1 (a, b) VALUES (2, 6);
+INSERT INTO t1 (a, b) VALUES (3, 7);
+ALTER TABLE t1 ADD INDEX kb(b), ALGORITHM=INPLACE;
+SHOW CREATE TABLE t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `a` int(11) DEFAULT NULL,
+ `b` int(11) DEFAULT NULL,
+ KEY `ka` (`a`),
+ KEY `kab` (`a`,`b`),
+ KEY `kb` (`b`)
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+CHECK TABLE t1;
+Table Op Msg_type Msg_text
+test.t1 check status OK
+SELECT * FROM t1 FORCE INDEX(kb) WHERE b > 5;
+a b
+2 6
+3 7
+SELECT * FROM t1 FORCE INDEX(kab) WHERE a > 2;
+a b
+3 7
+DROP TABLE t1;
+CREATE TABLE t1 (a INT, b INT, KEY ka(a), KEY kab(a,b)) ENGINE=RocksDB;
+INSERT INTO t1 (a, b) VALUES (1, 5);
+INSERT INTO t1 (a, b) VALUES (2, 6);
+INSERT INTO t1 (a, b) VALUES (3, 7);
+ALTER TABLE t1 ADD INDEX kb(b), DROP INDEX ka, ALGORITHM=INPLACE;
+SHOW CREATE TABLE t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `a` int(11) DEFAULT NULL,
+ `b` int(11) DEFAULT NULL,
+ KEY `kab` (`a`,`b`),
+ KEY `kb` (`b`)
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+CHECK TABLE t1;
+Table Op Msg_type Msg_text
+test.t1 check status OK
+SELECT * FROM t1 FORCE INDEX(kb) WHERE b > 5;
+a b
+2 6
+3 7
+SELECT * FROM t1 FORCE INDEX(kab) WHERE a > 2;
+a b
+3 7
+DROP TABLE t1;
+CREATE TABLE t1 (a INT, b INT, KEY ka(a), KEY kab(a,b)) ENGINE=RocksDB;
+INSERT INTO t1 (a, b) VALUES (1, 5);
+INSERT INTO t1 (a, b) VALUES (2, 6);
+INSERT INTO t1 (a, b) VALUES (3, 7);
+ALTER TABLE t1 DROP INDEX ka, DROP INDEX kab, ALGORITHM=INPLACE;
+ALTER TABLE t1 ADD INDEX kb(b), ADD INDEX kab(a,b), ALGORITHM=INPLACE;
+SHOW CREATE TABLE t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `a` int(11) DEFAULT NULL,
+ `b` int(11) DEFAULT NULL,
+ KEY `kb` (`b`),
+ KEY `kab` (`a`,`b`)
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+CHECK TABLE t1;
+Table Op Msg_type Msg_text
+test.t1 check status OK
+SELECT * FROM t1 FORCE INDEX(kb) WHERE b > 5;
+a b
+2 6
+3 7
+SELECT * FROM t1 FORCE INDEX(kab) WHERE a > 2;
+a b
+3 7
+DROP TABLE t1;
+CREATE TABLE t1 (a INT, b INT, KEY ka(a), KEY kab(a,b)) ENGINE=RocksDB;
+INSERT INTO t1 (a, b) VALUES (1, 5);
+INSERT INTO t1 (a, b) VALUES (2, 6);
+INSERT INTO t1 (a, b) VALUES (3, 7);
+ALTER TABLE t1 ADD INDEX kb(b), DROP INDEX ka, ADD INDEX kba(b,a), DROP INDEX kab, ALGORITHM=INPLACE;
+SHOW CREATE TABLE t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `a` int(11) DEFAULT NULL,
+ `b` int(11) DEFAULT NULL,
+ KEY `kb` (`b`),
+ KEY `kba` (`b`,`a`)
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+CHECK TABLE t1;
+Table Op Msg_type Msg_text
+test.t1 check status OK
+SELECT * FROM t1 FORCE INDEX(kb) WHERE b > 5;
+a b
+2 6
+3 7
+SELECT * FROM t1 FORCE INDEX(kba) WHERE a > 2;
+a b
+3 7
+DROP TABLE t1;
+CREATE TABLE t1 (a INT, b INT, KEY ka(a), KEY kab(a,b)) ENGINE=RocksDB;
+ALTER TABLE t1 DROP INDEX ka, ADD INDEX ka(b), ALGORITHM=INPLACE;
+SHOW CREATE TABLE t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `a` int(11) DEFAULT NULL,
+ `b` int(11) DEFAULT NULL,
+ KEY `kab` (`a`,`b`),
+ KEY `ka` (`b`)
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+CHECK TABLE t1;
+Table Op Msg_type Msg_text
+test.t1 check status OK
+SELECT * FROM t1 FORCE INDEX(ka) WHERE b > 5;
+a b
+SELECT * FROM t1 FORCE INDEX(kab) WHERE a > 2;
+a b
+DROP TABLE t1;
+CREATE TABLE t1 (pk CHAR(8) PRIMARY KEY, a VARCHAR(11), b INT UNSIGNED) ENGINE=rocksdb charset utf8 collate utf8_bin;
+SHOW CREATE TABLE t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `pk` char(8) COLLATE utf8_bin NOT NULL,
+ `a` varchar(11) COLLATE utf8_bin DEFAULT NULL,
+ `b` int(10) unsigned DEFAULT NULL,
+ PRIMARY KEY (`pk`)
+) ENGINE=ROCKSDB DEFAULT CHARSET=utf8 COLLATE=utf8_bin
+SHOW COLUMNS IN t1;
+Field Type Null Key Default Extra
+pk char(8) NO PRI NULL
+a varchar(11) YES NULL
+b int(10) unsigned YES NULL
+INSERT INTO t1 VALUES ('aaa', '1111', 1);
+INSERT INTO t1 VALUES ('bbb', '2222', 2);
+INSERT INTO t1 VALUES ('ccc', '3333', 3);
+ALTER TABLE t1 ADD INDEX kab(a,b), ALGORITHM=INPLACE;
+SHOW CREATE TABLE t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `pk` char(8) COLLATE utf8_bin NOT NULL,
+ `a` varchar(11) COLLATE utf8_bin DEFAULT NULL,
+ `b` int(10) unsigned DEFAULT NULL,
+ PRIMARY KEY (`pk`),
+ KEY `kab` (`a`,`b`)
+) ENGINE=ROCKSDB DEFAULT CHARSET=utf8 COLLATE=utf8_bin
+CHECK TABLE t1;
+Table Op Msg_type Msg_text
+test.t1 check status OK
+SELECT * FROM t1 FORCE INDEX(kab) WHERE a > '2' AND b < 3;
+pk a b
+bbb 2222 2
+DROP TABLE t1;
+CREATE TABLE t1 (pk CHAR(8) PRIMARY KEY, a VARCHAR(11), b INT UNSIGNED) ENGINE=rocksdb charset utf8 collate utf8_bin;
+SHOW CREATE TABLE t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `pk` char(8) COLLATE utf8_bin NOT NULL,
+ `a` varchar(11) COLLATE utf8_bin DEFAULT NULL,
+ `b` int(10) unsigned DEFAULT NULL,
+ PRIMARY KEY (`pk`)
+) ENGINE=ROCKSDB DEFAULT CHARSET=utf8 COLLATE=utf8_bin
+SHOW COLUMNS IN t1;
+Field Type Null Key Default Extra
+pk char(8) NO PRI NULL
+a varchar(11) YES NULL
+b int(10) unsigned YES NULL
+INSERT INTO t1 VALUES ('aaa', '1111', 1);
+INSERT INTO t1 VALUES ('bbb', '2222', 2);
+INSERT INTO t1 VALUES ('ccc', '3333', 3);
+ALTER TABLE t1 ADD INDEX kab(a,b), ALGORITHM=INPLACE;
+ALTER TABLE t1 ADD INDEX ka(a), DROP INDEX kab, ALGORITHM=INPLACE;
+SHOW CREATE TABLE t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `pk` char(8) COLLATE utf8_bin NOT NULL,
+ `a` varchar(11) COLLATE utf8_bin DEFAULT NULL,
+ `b` int(10) unsigned DEFAULT NULL,
+ PRIMARY KEY (`pk`),
+ KEY `ka` (`a`)
+) ENGINE=ROCKSDB DEFAULT CHARSET=utf8 COLLATE=utf8_bin
+CHECK TABLE t1;
+Table Op Msg_type Msg_text
+test.t1 check status OK
+SELECT * FROM t1 FORCE INDEX(ka) WHERE a > '2' AND b < 3;
+pk a b
+bbb 2222 2
+DROP TABLE t1;
+CREATE TABLE t1 (pk CHAR(8) PRIMARY KEY, a VARCHAR(11), b INT UNSIGNED) ENGINE=rocksdb charset utf8 collate utf8_bin;
+SHOW CREATE TABLE t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `pk` char(8) COLLATE utf8_bin NOT NULL,
+ `a` varchar(11) COLLATE utf8_bin DEFAULT NULL,
+ `b` int(10) unsigned DEFAULT NULL,
+ PRIMARY KEY (`pk`)
+) ENGINE=ROCKSDB DEFAULT CHARSET=utf8 COLLATE=utf8_bin
+SHOW COLUMNS IN t1;
+Field Type Null Key Default Extra
+pk char(8) NO PRI NULL
+a varchar(11) YES NULL
+b int(10) unsigned YES NULL
+INSERT INTO t1 VALUES ('aaa', '1111', 1);
+INSERT INTO t1 VALUES ('bbb', '2222', 2);
+INSERT INTO t1 VALUES ('ccc', '3333', 3);
+ALTER TABLE t1 ADD INDEX kab(a,b), ADD INDEX ka(a), ADD INDEX kb(b), ALGORITHM=INPLACE;
+ALTER TABLE t1 DROP INDEX ka, DROP INDEX kb, ALGORITHM=INPLACE;
+SHOW CREATE TABLE t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `pk` char(8) COLLATE utf8_bin NOT NULL,
+ `a` varchar(11) COLLATE utf8_bin DEFAULT NULL,
+ `b` int(10) unsigned DEFAULT NULL,
+ PRIMARY KEY (`pk`),
+ KEY `kab` (`a`,`b`)
+) ENGINE=ROCKSDB DEFAULT CHARSET=utf8 COLLATE=utf8_bin
+CHECK TABLE t1;
+Table Op Msg_type Msg_text
+test.t1 check status OK
+SELECT * FROM t1 FORCE INDEX(kab) WHERE a > '2' AND b < 3;
+pk a b
+bbb 2222 2
+DROP TABLE t1;
+CREATE TABLE t1 (a INT, b INT, KEY ka(a), KEY kab(a,b)) ENGINE=RocksDB;
+INSERT INTO t1 (a, b) VALUES (1, 5);
+INSERT INTO t1 (a, b) VALUES (2, 6);
+INSERT INTO t1 (a, b) VALUES (3, 7);
+CREATE INDEX kb on t1 (b);
+CREATE INDEX kba on t1 (b,a);
+DROP INDEX ka on t1;
+DROP INDEX kab on t1;
+SHOW CREATE TABLE t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `a` int(11) DEFAULT NULL,
+ `b` int(11) DEFAULT NULL,
+ KEY `kb` (`b`),
+ KEY `kba` (`b`,`a`)
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+CHECK TABLE t1;
+Table Op Msg_type Msg_text
+test.t1 check status OK
+SELECT * FROM t1 FORCE INDEX(kb) WHERE b > 5;
+a b
+2 6
+3 7
+SELECT * FROM t1 FORCE INDEX(kba) WHERE a > 2;
+a b
+3 7
+DROP TABLE t1;
+CREATE TABLE t1 (i INT, j INT, k INT, PRIMARY KEY (i), KEY(j)) ENGINE = ROCKSDB PARTITION BY KEY(i) PARTITIONS 4;
+ALTER TABLE t1 ADD INDEX kij(i,j), ALGORITHM=INPLACE;
+DROP INDEX kij ON t1;
+SHOW CREATE TABLE t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `i` int(11) NOT NULL,
+ `j` int(11) DEFAULT NULL,
+ `k` int(11) DEFAULT NULL,
+ PRIMARY KEY (`i`),
+ KEY `j` (`j`)
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+ PARTITION BY KEY (`i`)
+PARTITIONS 4
+SELECT * FROM t1 ORDER BY i LIMIT 10;
+i j k
+1 1 1
+2 2 2
+3 3 3
+4 4 4
+5 5 5
+6 6 6
+7 7 7
+8 8 8
+9 9 9
+10 10 10
+SELECT COUNT(*) FROM t1;
+COUNT(*)
+100
+DROP TABLE t1;
+set @tmp_rocksdb_strict_collation_check= @@rocksdb_strict_collation_check;
+set global rocksdb_strict_collation_check=1;
+CREATE TABLE t1 (a INT, b TEXT);
+# MariaDB no longer gives ER_UNSUPPORTED_COLLATION
+ALTER TABLE t1 ADD KEY kb(b(10));
+ALTER TABLE t1 ADD PRIMARY KEY(a);
+DROP TABLE t1;
+CREATE TABLE t1 (a INT, b TEXT collate utf8_general_ci);
+# MariaDB no longer gives ER_UNSUPPORTED_COLLATION
+ALTER TABLE t1 ADD KEY kb(b(10));
+Warnings:
+Warning 1815 Internal error: Indexed column test.t1.b uses a collation that does not allow index-only access in secondary key and has reduced disk space efficiency in primary key.
+ALTER TABLE t1 ADD PRIMARY KEY(a);
+DROP TABLE t1;
+set global rocksdb_strict_collation_check= @tmp_rocksdb_strict_collation_check;
+set global rocksdb_bulk_load=1;
+# Establish connection con1 (user=root)
+connect con1,localhost,root,,;
+# Switch to connection con1
+connection con1;
+show global variables like 'rocksdb_bulk_load%';
+Variable_name Value
+rocksdb_bulk_load ON
+rocksdb_bulk_load_allow_sk OFF
+rocksdb_bulk_load_allow_unsorted OFF
+rocksdb_bulk_load_size 1000
+show session variables like 'rocksdb_bulk_load%';
+Variable_name Value
+rocksdb_bulk_load ON
+rocksdb_bulk_load_allow_sk OFF
+rocksdb_bulk_load_allow_unsorted OFF
+rocksdb_bulk_load_size 1000
+CREATE TABLE t1 (i INT, j INT, PRIMARY KEY (i)) ENGINE = ROCKSDB;
+INSERT INTO t1 VALUES (1,1);
+# Disconnecting on con1
+disconnect con1;
+# Establish connection con2 (user=root)
+connect con2,localhost,root,,;
+# Switch to connection con2
+connection con2;
+ALTER TABLE t1 ADD INDEX kj(j), ALGORITHM=INPLACE;
+SELECT COUNT(*) FROM t1 FORCE INDEX(PRIMARY);
+COUNT(*)
+1
+SELECT COUNT(*) FROM t1 FORCE INDEX(kj);
+COUNT(*)
+1
+DROP TABLE t1;
+disconnect con2;
+# Establish connection con1 (user=root)
+connect con1,localhost,root,,;
+# Establish connection con2 (user=root)
+connect con2,localhost,root,,;
+# Switch to connection con1
+connection con1;
+CREATE TABLE t1 (i INT, j INT, PRIMARY KEY (i)) ENGINE = ROCKSDB;
+set rocksdb_bulk_load=1;
+INSERT INTO t1 VALUES (1,1);
+# Switch to connection con2
+connection con2;
+SELECT COUNT(*) FROM t1 FORCE INDEX(PRIMARY);
+COUNT(*)
+0
+ALTER TABLE t1 ADD INDEX kj(j), ALGORITHM=INPLACE;
+SELECT COUNT(*) FROM t1 FORCE INDEX(PRIMARY);
+COUNT(*)
+1
+SELECT COUNT(*) FROM t1 FORCE INDEX(kj);
+COUNT(*)
+1
+set global rocksdb_bulk_load=0;
+DROP TABLE t1;
+connection default;
+SET @prior_rocksdb_merge_combine_read_size= @@rocksdb_merge_combine_read_size;
+SET @prior_rocksdb_strict_collation_check= @@rocksdb_strict_collation_check;
+SET @prior_rocksdb_merge_buf_size = @@rocksdb_merge_buf_size;
+SET global rocksdb_strict_collation_check = off;
+SET session rocksdb_merge_combine_read_size = 566;
+SET session rocksdb_merge_buf_size = 340;
+show variables like 'rocksdb_bulk_load%';
+Variable_name Value
+rocksdb_bulk_load OFF
+rocksdb_bulk_load_allow_sk OFF
+rocksdb_bulk_load_allow_unsorted OFF
+rocksdb_bulk_load_size 1000
+CREATE TABLE t1 (a VARCHAR(80)) ENGINE=RocksDB;
+INSERT INTO t1 (a) VALUES (REPEAT("a", 80));
+INSERT INTO t1 (a) VALUES (REPEAT("a", 80));
+INSERT INTO t1 (a) VALUES (REPEAT("a", 80));
+INSERT INTO t1 (a) VALUES (REPEAT("a", 80));
+ALTER TABLE t1 ADD INDEX ka(a), ALGORITHM=INPLACE;
+SHOW CREATE TABLE t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `a` varchar(80) DEFAULT NULL,
+ KEY `ka` (`a`)
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+CHECK TABLE t1;
+Table Op Msg_type Msg_text
+test.t1 check status OK
+SELECT * FROM t1 FORCE INDEX(ka) WHERE a > "";
+a
+aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
+aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
+aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
+aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
+DROP TABLE t1;
+SET session rocksdb_merge_buf_size = @prior_rocksdb_merge_buf_size;
+SET session rocksdb_merge_combine_read_size = @prior_rocksdb_merge_combine_read_size;
+SET global rocksdb_strict_collation_check = @prior_rocksdb_strict_collation_check;
+CREATE TABLE t1 (i INT, j INT, PRIMARY KEY (i)) ENGINE = ROCKSDB;
+set global rocksdb_force_flush_memtable_now=1;
+ALTER TABLE t1 ADD INDEX kj(j), ALGORITHM=INPLACE;
+larger
+1
+larger
+1
+Table Op Msg_type Msg_text
+test.t1 analyze status OK
+larger
+1
+larger
+1
+Table Op Msg_type Msg_text
+test.t1 analyze status OK
+Table Op Msg_type Msg_text
+test.t1 analyze status OK
+Table Op Msg_type Msg_text
+test.t1 analyze status OK
+Table Op Msg_type Msg_text
+test.t1 analyze status OK
+Table Op Msg_type Msg_text
+test.t1 analyze status OK
+Table Op Msg_type Msg_text
+test.t1 analyze status OK
+Table Op Msg_type Msg_text
+test.t1 analyze status OK
+Table Op Msg_type Msg_text
+test.t1 analyze status OK
+Table Op Msg_type Msg_text
+test.t1 analyze status OK
+Table Op Msg_type Msg_text
+test.t1 analyze status OK
+select 1300 < 1300 * 1.5 as "same";
+same
+1
+DROP TABLE t1;
+CREATE TABLE t1 (
+a INT PRIMARY KEY,
+b INT,
+c INT,
+KEY kbc(b,c)) ENGINE = ROCKSDB;
+INSERT INTO t1 (a,b,c) VALUES (1,1,1);
+INSERT INTO t1 (a,b,c) VALUES (2,2,2);
+INSERT INTO t1 (a,b,c) VALUES (3,3,3);
+SHOW CREATE TABLE t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `a` int(11) NOT NULL,
+ `b` int(11) DEFAULT NULL,
+ `c` int(11) DEFAULT NULL,
+ PRIMARY KEY (`a`),
+ KEY `kbc` (`b`,`c`)
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+ALTER TABLE t1 DROP INDEX kbc, ADD INDEX kbc(b,c), ALGORITHM=INPLACE;
+ALTER TABLE t1 DROP INDEX kbc;
+DROP TABLE t1;
+CREATE TABLE t1 (
+a INT PRIMARY KEY,
+b varchar(10),
+index kb(b(5))
+) ENGINE = ROCKSDB charset utf8 collate utf8_bin;
+INSERT INTO t1 (a,b) VALUES (1,'1111122222');
+INSERT INTO t1 (a,b) VALUES (2,'2222233333');
+INSERT INTO t1 (a,b) VALUES (3,'3333344444');
+ALTER TABLE t1 DROP INDEX kb, ADD INDEX kb(b(8)), ALGORITHM=INPLACE;
+SELECT * FROM t1 FORCE INDEX(kb);
+a b
+1 1111122222
+2 2222233333
+3 3333344444
+SHOW CREATE TABLE t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `a` int(11) NOT NULL,
+ `b` varchar(10) COLLATE utf8_bin DEFAULT NULL,
+ PRIMARY KEY (`a`),
+ KEY `kb` (`b`(8))
+) ENGINE=ROCKSDB DEFAULT CHARSET=utf8 COLLATE=utf8_bin
+DROP TABLE t1;
+SET @prior_rocksdb_table_stats_sampling_pct = @@rocksdb_table_stats_sampling_pct;
+set global rocksdb_table_stats_sampling_pct = 100;
+CREATE TABLE t1 (a INT, b INT, PRIMARY KEY ka(a)) ENGINE=RocksDB;
+INSERT INTO t1 (a, b) VALUES (1, 10);
+INSERT INTO t1 (a, b) VALUES (2, 10);
+INSERT INTO t1 (a, b) VALUES (3, 20);
+INSERT INTO t1 (a, b) VALUES (4, 20);
+set global rocksdb_force_flush_memtable_now=1;
+analyze table t1;
+Table Op Msg_type Msg_text
+test.t1 analyze status OK
+SHOW INDEX in t1;
+Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment Index_comment
+t1 0 PRIMARY 1 a A 4 NULL NULL LSMTREE
+ALTER TABLE t1 ADD INDEX kb(b), ALGORITHM=INPLACE;
+SHOW INDEX in t1;
+Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment Index_comment
+t1 0 PRIMARY 1 a A 4 NULL NULL LSMTREE
+t1 1 kb 1 b A 2 NULL NULL YES LSMTREE
+DROP TABLE t1;
+SET global rocksdb_table_stats_sampling_pct = @prior_rocksdb_table_stats_sampling_pct;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/add_index_inplace_cardinality.result b/storage/rocksdb/mysql-test/rocksdb/r/add_index_inplace_cardinality.result
new file mode 100644
index 00000000000..61105fa1ba2
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/add_index_inplace_cardinality.result
@@ -0,0 +1,24 @@
+DROP TABLE IF EXISTS t1;
+CREATE TABLE t1 (i INT PRIMARY KEY, j INT) ENGINE = ROCKSDB;
+INSERT INTO t1 VALUES (1,2), (2,4), (3,6), (4,8), (5,10);
+SET debug_sync= 'rocksdb.commit_in_place_alter_table WAIT_FOR flushed';
+ALTER TABLE t1 ADD INDEX kj(j), ALGORITHM=INPLACE;
+connect con1,localhost,root,,;
+SET GLOBAL rocksdb_force_flush_memtable_now = 1;
+SET debug_sync= 'now SIGNAL flushed';
+connection default;
+SELECT * FROM INFORMATION_SCHEMA.ROCKSDB_INDEX_FILE_MAP
+WHERE INDEX_NUMBER =
+(SELECT INDEX_NUMBER FROM INFORMATION_SCHEMA.ROCKSDB_DDL
+WHERE TABLE_NAME = 't1' AND INDEX_NAME = "PRIMARY");
+COLUMN_FAMILY INDEX_NUMBER SST_NAME NUM_ROWS DATA_SIZE ENTRY_DELETES ENTRY_SINGLEDELETES ENTRY_MERGES ENTRY_OTHERS DISTINCT_KEYS_PREFIX
+# # SSTNAME 5 # # # # # 5
+SELECT * FROM INFORMATION_SCHEMA.ROCKSDB_INDEX_FILE_MAP
+WHERE INDEX_NUMBER =
+(SELECT INDEX_NUMBER FROM INFORMATION_SCHEMA.ROCKSDB_DDL
+WHERE TABLE_NAME = 't1' AND INDEX_NAME = "kj");
+COLUMN_FAMILY INDEX_NUMBER SST_NAME NUM_ROWS DATA_SIZE ENTRY_DELETES ENTRY_SINGLEDELETES ENTRY_MERGES ENTRY_OTHERS DISTINCT_KEYS_PREFIX
+# # SSTNAME 5 # # # # # 5,5
+disconnect con1;
+SET debug_sync='RESET';
+DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/add_index_inplace_crash.result b/storage/rocksdb/mysql-test/rocksdb/r/add_index_inplace_crash.result
new file mode 100644
index 00000000000..d3801258f0c
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/add_index_inplace_crash.result
@@ -0,0 +1,93 @@
+drop table if exists t1;
+CREATE TABLE t1 (a INT, b INT, KEY ka(a), KEY kab(a,b)) ENGINE=RocksDB;
+INSERT INTO t1 (a, b) VALUES (1, 5);
+INSERT INTO t1 (a, b) VALUES (2, 6);
+INSERT INTO t1 (a, b) VALUES (3, 7);
+# crash_during_online_index_creation
+flush logs;
+SET SESSION debug_dbug="+d,crash_during_online_index_creation";
+ALTER TABLE t1 ADD INDEX kb(b), ALGORITHM=INPLACE;
+ERROR HY000: Lost connection to MySQL server during query
+SET SESSION debug_dbug="-d,crash_during_online_index_creation";
+SHOW CREATE TABLE t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `a` int(11) DEFAULT NULL,
+ `b` int(11) DEFAULT NULL,
+ KEY `ka` (`a`),
+ KEY `kab` (`a`,`b`)
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+CHECK TABLE t1;
+Table Op Msg_type Msg_text
+test.t1 check status OK
+DROP TABLE t1;
+CREATE TABLE t1 (i INT, j INT, k INT, PRIMARY KEY (i), KEY(j)) ENGINE = ROCKSDB PARTITION BY KEY(i) PARTITIONS 4;
+# crash_during_index_creation_partition
+flush logs;
+SET SESSION debug_dbug="+d,crash_during_index_creation_partition";
+ALTER TABLE t1 ADD INDEX kij(i,j), ALGORITHM=INPLACE;
+ERROR HY000: Lost connection to MySQL server during query
+SET SESSION debug_dbug="-d,crash_during_index_creation_partition";
+SHOW CREATE TABLE t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `i` int(11) NOT NULL,
+ `j` int(11) DEFAULT NULL,
+ `k` int(11) DEFAULT NULL,
+ PRIMARY KEY (`i`),
+ KEY `j` (`j`)
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+ PARTITION BY KEY (`i`)
+PARTITIONS 4
+ALTER TABLE t1 ADD INDEX kij(i,j), ALGORITHM=INPLACE;
+SELECT * FROM t1 ORDER BY i LIMIT 10;
+i j k
+1 1 1
+2 2 2
+3 3 3
+4 4 4
+5 5 5
+6 6 6
+7 7 7
+8 8 8
+9 9 9
+10 10 10
+SELECT COUNT(*) FROM t1;
+COUNT(*)
+100
+DROP TABLE t1;
+CREATE TABLE t1 (i INT, j INT, k INT, PRIMARY KEY (i), KEY(j)) ENGINE = ROCKSDB PARTITION BY KEY(i) PARTITIONS 4;
+# crash_during_index_creation_partition
+flush logs;
+SET SESSION debug_dbug="+d,myrocks_simulate_index_create_rollback";
+ALTER TABLE t1 ADD INDEX kij(i,j), ALGORITHM=INPLACE;
+ERROR HY000: Intentional failure in inplace alter occurred.
+SET SESSION debug_dbug="-d,myrocks_simulate_index_create_rollback";
+SHOW CREATE TABLE t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `i` int(11) NOT NULL,
+ `j` int(11) DEFAULT NULL,
+ `k` int(11) DEFAULT NULL,
+ PRIMARY KEY (`i`),
+ KEY `j` (`j`)
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+ PARTITION BY KEY (`i`)
+PARTITIONS 4
+ALTER TABLE t1 ADD INDEX kij(i,j), ALGORITHM=INPLACE;
+SHOW CREATE TABLE t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `i` int(11) NOT NULL,
+ `j` int(11) DEFAULT NULL,
+ `k` int(11) DEFAULT NULL,
+ PRIMARY KEY (`i`),
+ KEY `j` (`j`),
+ KEY `kij` (`i`,`j`)
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+ PARTITION BY KEY (`i`)
+PARTITIONS 4
+SELECT COUNT(*) FROM t1;
+COUNT(*)
+100
+DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/add_index_inplace_sstfilewriter.result b/storage/rocksdb/mysql-test/rocksdb/r/add_index_inplace_sstfilewriter.result
new file mode 100644
index 00000000000..e7883f7e03e
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/add_index_inplace_sstfilewriter.result
@@ -0,0 +1,79 @@
+drop table if exists t1;
+CREATE TABLE t1(pk CHAR(5) PRIMARY KEY, a char(30), b char(30)) COLLATE 'latin1_bin';
+set rocksdb_bulk_load=1;
+set rocksdb_bulk_load_size=10000;
+LOAD DATA INFILE <input_file> INTO TABLE t1;
+set rocksdb_bulk_load=0;
+select count(pk) from t1;
+count(pk)
+300000
+select count(a) from t1;
+count(a)
+300000
+select count(b) from t1;
+count(b)
+300000
+ALTER TABLE t1 ADD INDEX kb(b), ALGORITHM=INPLACE;
+set @tmp= @@rocksdb_max_row_locks;
+set session rocksdb_max_row_locks=1000;
+ALTER TABLE t1 ADD INDEX kb_copy(b), ALGORITHM=COPY;
+ERROR HY000: Got error 10 'Operation aborted: Failed to acquire lock due to rocksdb_max_row_locks limit' from ROCKSDB
+set session rocksdb_bulk_load=1;
+ALTER TABLE t1 ADD INDEX kb_copy(b), ALGORITHM=COPY;
+set session rocksdb_bulk_load=0;
+set session rocksdb_max_row_locks=@tmp;
+SELECT COUNT(*) as c FROM
+(SELECT COALESCE(LOWER(CONV(BIT_XOR(CAST(CRC32(CONCAT_WS('#', `b`, CONCAT(ISNULL(`b`)))) AS UNSIGNED)), 10, 16)), 0) AS crc FROM `t1` FORCE INDEX(`kb`)
+UNION DISTINCT
+SELECT COALESCE(LOWER(CONV(BIT_XOR(CAST(CRC32(CONCAT_WS('#',
+`b`, CONCAT(ISNULL(`b`)))) AS UNSIGNED)), 10, 16)), 0) AS crc FROM `t1` FORCE
+INDEX(`kb_copy`)) as temp;
+c
+1
+select count(*) from t1 FORCE INDEX(kb);
+count(*)
+300000
+select count(*) from t1 FORCE INDEX(kb_copy);
+count(*)
+300000
+select count(*) from t1 FORCE INDEX(PRIMARY);
+count(*)
+300000
+ALTER TABLE t1 DROP INDEX kb, ALGORITHM=INPLACE;
+ALTER TABLE t1 DROP INDEX kb_copy, ALGORITHM=INPLACE;
+ALTER TABLE t1 ADD INDEX kb(b), ADD INDEX kab(a,b), ALGORITHM=INPLACE;
+SELECT COUNT(*) FROM t1 FORCE INDEX(kab);
+COUNT(*)
+300000
+SELECT COUNT(*) FROM t1 FORCE INDEX(kb);
+COUNT(*)
+300000
+SHOW CREATE TABLE t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `pk` char(5) COLLATE latin1_bin NOT NULL,
+ `a` char(30) COLLATE latin1_bin DEFAULT NULL,
+ `b` char(30) COLLATE latin1_bin DEFAULT NULL,
+ PRIMARY KEY (`pk`),
+ KEY `kb` (`b`),
+ KEY `kab` (`a`,`b`)
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1 COLLATE=latin1_bin
+DROP TABLE t1;
+CREATE TABLE t1 (a INT PRIMARY KEY, b INT, KEY kab(a,b)) ENGINE=RocksDB;
+INSERT INTO t1 (a, b) VALUES (1, 5);
+INSERT INTO t1 (a, b) VALUES (2, 6);
+INSERT INTO t1 (a, b) VALUES (3, 7);
+ALTER TABLE t1 DROP INDEX kab, ALGORITHM=INPLACE;
+ALTER TABLE t1 ADD INDEX kb(b) comment 'rev:cf1', ALGORITHM=INPLACE;
+SHOW CREATE TABLE t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `a` int(11) NOT NULL,
+ `b` int(11) DEFAULT NULL,
+ PRIMARY KEY (`a`),
+ KEY `kb` (`b`) COMMENT 'rev:cf1'
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+SELECT COUNT(*) FROM t1 FORCE INDEX(kb);
+COUNT(*)
+3
+DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/add_unique_index_inplace.result b/storage/rocksdb/mysql-test/rocksdb/r/add_unique_index_inplace.result
new file mode 100644
index 00000000000..f7c4bab685d
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/add_unique_index_inplace.result
@@ -0,0 +1,103 @@
+drop table if exists t1;
+CREATE TABLE t1 (a INT, b INT, PRIMARY KEY ka(a)) ENGINE=RocksDB;
+INSERT INTO t1 (a, b) VALUES (1, 5);
+INSERT INTO t1 (a, b) VALUES (2, 6);
+INSERT INTO t1 (a, b) VALUES (3, 7);
+INSERT INTO t1 (a,b) VALUES (4,5);
+ALTER TABLE t1 ADD UNIQUE INDEX kb(b), ALGORITHM=INPLACE;
+ERROR 23000: Duplicate entry '5' for key 'kb'
+SHOW CREATE TABLE t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `a` int(11) NOT NULL,
+ `b` int(11) DEFAULT NULL,
+ PRIMARY KEY (`a`)
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+DROP TABLE t1;
+CREATE TABLE t1 (a INT, b INT, PRIMARY KEY ka(a)) ENGINE=RocksDB;
+INSERT INTO t1 (a, b) VALUES (1, 5);
+INSERT INTO t1 (a, b) VALUES (2, 6);
+INSERT INTO t1 (a, b) VALUES (3, 7);
+ALTER TABLE t1 ADD UNIQUE INDEX kb(b), ALGORITHM=INPLACE;
+INSERT INTO t1 (a,b) VALUES (4,5);
+ERROR 23000: Duplicate entry '5' for key 'kb'
+INSERT INTO t1 (a,b) VALUES (5,8);
+SHOW CREATE TABLE t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `a` int(11) NOT NULL,
+ `b` int(11) DEFAULT NULL,
+ PRIMARY KEY (`a`),
+ UNIQUE KEY `kb` (`b`)
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+SELECT * FROM t1 FORCE INDEX(kb);
+a b
+1 5
+2 6
+3 7
+5 8
+DROP TABLE t1;
+CREATE TABLE t1 (a INT, b INT, PRIMARY KEY ka(a)) ENGINE=RocksDB;
+INSERT INTO t1 (a, b) VALUES (1, 5);
+INSERT INTO t1 (a, b) VALUES (2, NULL);
+INSERT INTO t1 (a, b) VALUES (3, NULL);
+ALTER TABLE t1 ADD UNIQUE INDEX kb(b), ALGORITHM=INPLACE;
+INSERT INTO t1 (a, b) VALUES (4, NULL);
+SHOW CREATE TABLE t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `a` int(11) NOT NULL,
+ `b` int(11) DEFAULT NULL,
+ PRIMARY KEY (`a`),
+ UNIQUE KEY `kb` (`b`)
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+SELECT COUNT(*) FROM t1 FORCE INDEX(kb);
+COUNT(*)
+4
+DROP TABLE t1;
+CREATE TABLE t1 (a INT, b INT, c INT, PRIMARY KEY ka(a)) ENGINE=RocksDB;
+INSERT INTO t1 (a,b,c) VALUES (1,1,NULL);
+INSERT INTO t1 (a,b,c) VALUES (2,1,NULL);
+INSERT INTO t1 (a,b,c) VALUES (3,1,NULL);
+INSERT INTO t1 (a,b,c) VALUES (4,1,5);
+ALTER TABLE t1 ADD UNIQUE INDEX kbc(b,c), ALGORITHM=INPLACE;
+SHOW CREATE TABLE t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `a` int(11) NOT NULL,
+ `b` int(11) DEFAULT NULL,
+ `c` int(11) DEFAULT NULL,
+ PRIMARY KEY (`a`),
+ UNIQUE KEY `kbc` (`b`,`c`)
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+SELECT COUNT(*) FROM t1 FORCE INDEX(kbc);
+COUNT(*)
+4
+DROP TABLE t1;
+CREATE TABLE t1 (a INT, b INT) ENGINE=RocksDB;
+INSERT INTO t1 (a, b) VALUES (1, 5);
+INSERT INTO t1 (a, b) VALUES (2, 6);
+INSERT INTO t1 (a, b) VALUES (3, 7);
+ALTER TABLE t1 ADD UNIQUE INDEX kb(b);
+INSERT INTO t1 (a, b) VALUES (4, 8);
+INSERT INTO t1 (a, b) VALUES (5, 5);
+ERROR 23000: Duplicate entry '5' for key 'kb'
+SHOW CREATE TABLE t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `a` int(11) DEFAULT NULL,
+ `b` int(11) DEFAULT NULL,
+ UNIQUE KEY `kb` (`b`)
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+DROP TABLE t1;
+CREATE TABLE t1 (
+a INT PRIMARY KEY,
+b INT,
+c INT,
+KEY kbc(b,c)) ENGINE = ROCKSDB;
+INSERT INTO t1 (a,b,c) VALUES (1,1,1);
+INSERT INTO t1 (a,b,c) VALUES (2,2,2);
+INSERT INTO t1 (a,b,c) VALUES (3,2,2);
+ALTER TABLE t1 DROP INDEX kbc, ADD UNIQUE INDEX kbc(b,c), ALGORITHM=INPLACE;
+ERROR 23000: Duplicate entry '2-2' for key 'kbc'
+DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/allow_no_pk_concurrent_insert.result b/storage/rocksdb/mysql-test/rocksdb/r/allow_no_pk_concurrent_insert.result
new file mode 100644
index 00000000000..4fef9bce405
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/allow_no_pk_concurrent_insert.result
@@ -0,0 +1,7 @@
+drop table if exists t1;
+# Binary must be compiled with debug for this test
+CREATE TABLE t1 (a INT) ENGINE=rocksdb;
+SELECT COUNT(*) from t1;
+COUNT(*)
+400
+DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/allow_no_primary_key.result b/storage/rocksdb/mysql-test/rocksdb/r/allow_no_primary_key.result
new file mode 100644
index 00000000000..a8d5c07072c
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/allow_no_primary_key.result
@@ -0,0 +1,295 @@
+DROP TABLE IF EXISTS t1;
+CREATE TABLE t1 (a INT, b CHAR(8)) ENGINE=rocksdb;
+SHOW CREATE TABLE t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `a` int(11) DEFAULT NULL,
+ `b` char(8) DEFAULT NULL
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+SHOW COLUMNS IN t1;
+Field Type Null Key Default Extra
+a int(11) YES NULL
+b char(8) YES NULL
+INSERT INTO t1 (a,b) VALUES (76,'bar');
+INSERT INTO t1 (a,b) VALUES (35,'foo');
+INSERT INTO t1 (a,b) VALUES (77,'baz');
+SELECT * FROM t1 WHERE a = 35;
+a b
+35 foo
+SELECT * FROM t1 WHERE a = 35 AND b = 'foo';
+a b
+35 foo
+SELECT * FROM t1 WHERE a = 77 OR b = 'bar';
+a b
+76 bar
+77 baz
+SELECT * FROM t1 WHERE a > 35;
+a b
+76 bar
+77 baz
+SELECT * FROM t1;
+a b
+35 foo
+76 bar
+77 baz
+UPDATE t1 SET a=a+100;
+SELECT * FROM t1;
+a b
+135 foo
+176 bar
+177 baz
+UPDATE t1 SET a=a-100, b='bbb' WHERE a>100;
+SELECT * FROM t1;
+a b
+35 bbb
+76 bbb
+77 bbb
+UPDATE t1 SET a=300, b='ccc' WHERE a>70;
+SELECT * FROM t1;
+a b
+300 ccc
+300 ccc
+35 bbb
+UPDATE t1 SET a=123 WHERE a=35;
+SELECT * FROM t1;
+a b
+123 bbb
+300 ccc
+300 ccc
+UPDATE t1 SET a=321 WHERE b='ccc';
+SELECT * FROM t1;
+a b
+123 bbb
+321 ccc
+321 ccc
+INSERT INTO t1 (a,b) VALUES (45,'bob');
+SELECT * FROM t1;
+a b
+123 bbb
+321 ccc
+321 ccc
+45 bob
+DELETE FROM t1 WHERE a=123;
+SELECT * FROM t1;
+a b
+321 ccc
+321 ccc
+45 bob
+DELETE FROM t1 WHERE b > 'bbb' AND a > 100;
+SELECT * FROM t1;
+a b
+45 bob
+TRUNCATE TABLE t1;
+DROP TABLE t1;
+CREATE TABLE t1 (a INT, c CHAR(8)) ENGINE=rocksdb;
+INSERT INTO t1 VALUES (1,'a'),(5,'z');
+ALTER TABLE t1 ADD COLUMN b INT;
+SHOW CREATE TABLE t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `a` int(11) DEFAULT NULL,
+ `c` char(8) DEFAULT NULL,
+ `b` int(11) DEFAULT NULL
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+SELECT * FROM t1;
+a c b
+1 a NULL
+5 z NULL
+ALTER TABLE t1 DROP COLUMN b;
+SHOW CREATE TABLE t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `a` int(11) DEFAULT NULL,
+ `c` char(8) DEFAULT NULL
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+SELECT * FROM t1;
+a c
+1 a
+5 z
+DROP TABLE t1;
+CREATE TABLE t1 (a INT, b CHAR(8), pk INT AUTO_INCREMENT PRIMARY KEY) ENGINE=rocksdb;
+ALTER TABLE t1 DROP COLUMN pk;
+SHOW CREATE TABLE t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `a` int(11) DEFAULT NULL,
+ `b` char(8) DEFAULT NULL
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+SHOW COLUMNS IN t1;
+Field Type Null Key Default Extra
+a int(11) YES NULL
+b char(8) YES NULL
+INSERT INTO t1 (a,b) VALUES (76,'bar');
+INSERT INTO t1 (a,b) VALUES (35,'foo');
+INSERT INTO t1 (a,b) VALUES (77,'baz');
+SELECT * FROM t1 WHERE a = 35;
+a b
+35 foo
+SELECT * FROM t1 WHERE a = 35 AND b = 'foo';
+a b
+35 foo
+SELECT * FROM t1 WHERE a = 77 OR b = 'bar';
+a b
+76 bar
+77 baz
+SELECT * FROM t1 WHERE a > 35;
+a b
+76 bar
+77 baz
+SELECT * FROM t1;
+a b
+35 foo
+76 bar
+77 baz
+UPDATE t1 SET a=a+100;
+SELECT * FROM t1;
+a b
+135 foo
+176 bar
+177 baz
+UPDATE t1 SET a=a-100, b='bbb' WHERE a>100;
+SELECT * FROM t1;
+a b
+35 bbb
+76 bbb
+77 bbb
+UPDATE t1 SET a=300, b='ccc' WHERE a>70;
+SELECT * FROM t1;
+a b
+300 ccc
+300 ccc
+35 bbb
+UPDATE t1 SET a=123 WHERE a=35;
+SELECT * FROM t1;
+a b
+123 bbb
+300 ccc
+300 ccc
+UPDATE t1 SET a=321 WHERE b='ccc';
+SELECT * FROM t1;
+a b
+123 bbb
+321 ccc
+321 ccc
+INSERT INTO t1 (a,b) VALUES (45,'bob');
+SELECT * FROM t1;
+a b
+123 bbb
+321 ccc
+321 ccc
+45 bob
+DELETE FROM t1 WHERE a=123;
+SELECT * FROM t1;
+a b
+321 ccc
+321 ccc
+45 bob
+DELETE FROM t1 WHERE b > 'bbb' AND a > 100;
+SELECT * FROM t1;
+a b
+45 bob
+TRUNCATE TABLE t1;
+DROP TABLE t1;
+DROP TABLE IF EXISTS t1,t2;
+CREATE TABLE t1 (a INT, b CHAR(8)) ENGINE=rocksdb;
+INSERT INTO t1 (a,b) VALUES (1,'a'),(2,'b');
+CREATE TABLE t2 (a INT, b CHAR(8)) ENGINE=rocksdb;
+CHECK TABLE t1;
+Table Op Msg_type Msg_text
+test.t1 check status OK
+INSERT INTO t1 (a,b) VALUES (3,'c');
+INSERT INTO t2 (a,b) VALUES (4,'d');
+CHECK TABLE t1, t2 FOR UPGRADE;
+Table Op Msg_type Msg_text
+test.t1 check status OK
+test.t2 check status OK
+INSERT INTO t2 (a,b) VALUES (5,'e');
+CHECK TABLE t2 QUICK;
+Table Op Msg_type Msg_text
+test.t2 check status OK
+INSERT INTO t1 (a,b) VALUES (6,'f');
+CHECK TABLE t1 FAST;
+Table Op Msg_type Msg_text
+test.t1 check status OK
+INSERT INTO t1 (a,b) VALUES (7,'g');
+INSERT INTO t2 (a,b) VALUES (8,'h');
+CHECK TABLE t2, t1 MEDIUM;
+Table Op Msg_type Msg_text
+test.t2 check status OK
+test.t1 check status OK
+INSERT INTO t1 (a,b) VALUES (9,'i');
+INSERT INTO t2 (a,b) VALUES (10,'j');
+CHECK TABLE t1, t2 EXTENDED;
+Table Op Msg_type Msg_text
+test.t1 check status OK
+test.t2 check status OK
+INSERT INTO t1 (a,b) VALUES (11,'k');
+CHECK TABLE t1 CHANGED;
+Table Op Msg_type Msg_text
+test.t1 check status OK
+DROP TABLE t1, t2;
+CREATE TABLE t1 (a INT, b CHAR(8), UNIQUE INDEX(a)) ENGINE=rocksdb;
+INSERT INTO t1 (a,b) VALUES (1,'a'),(2,'b');
+INSERT INTO t1 (a,b) VALUES (1,'c');
+ERROR 23000: Duplicate entry '1' for key 'a'
+SELECT * FROM t1;
+a b
+1 a
+2 b
+SELECT * FROM t1 WHERE a = 2;
+a b
+2 b
+EXPLAIN SELECT * FROM t1 WHERE a = 2;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 const a a 5 const 1
+DROP TABLE t1;
+CREATE TABLE t1 (a INT, b CHAR(8)) ENGINE=rocksdb;
+SHOW CREATE TABLE t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `a` int(11) DEFAULT NULL,
+ `b` char(8) DEFAULT NULL
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+SHOW COLUMNS IN t1;
+Field Type Null Key Default Extra
+a int(11) YES NULL
+b char(8) YES NULL
+INSERT INTO t1 (a,b) VALUES (35,'foo');
+INSERT INTO t1 (a,b) VALUES (35,'foo');
+INSERT INTO t1 (a,b) VALUES (36,'foo');
+DELETE FROM t1 WHERE a = 35 AND b = 'foo';
+SELECT * FROM t1;
+a b
+36 foo
+DROP TABLE t1;
+#
+# Issue #834/MDEV-15304 ALTER TABLE table_with_hidden_pk causes Can't
+# write; duplicate key in table error and/or crash
+#
+CREATE TABLE t1 (a INT, KEY(a)) ENGINE=RocksDB;
+INSERT INTO t1 VALUES (1),(1+1);
+create table t2 (a int);
+insert into t2 values (10),(20),(30);
+BEGIN;
+select * from t2;
+a
+10
+20
+30
+connect con1,localhost,root,,;
+connection con1;
+alter table t1 force;
+connection default;
+select * from t1;
+a
+connection con1;
+insert into t1 values (100);
+select * from t1;
+a
+1
+2
+100
+disconnect con1;
+connection default;
+rollback;
+drop table t1,t2;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/allow_no_primary_key_with_sk.result b/storage/rocksdb/mysql-test/rocksdb/r/allow_no_primary_key_with_sk.result
new file mode 100644
index 00000000000..5d947603ec5
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/allow_no_primary_key_with_sk.result
@@ -0,0 +1,797 @@
+DROP TABLE IF EXISTS t1;
+CREATE TABLE t1 (a INT, b CHAR(8), KEY(a)) ENGINE=rocksdb;
+SHOW CREATE TABLE t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `a` int(11) DEFAULT NULL,
+ `b` char(8) DEFAULT NULL,
+ KEY `a` (`a`)
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+SHOW COLUMNS IN t1;
+Field Type Null Key Default Extra
+a int(11) YES MUL NULL
+b char(8) YES NULL
+INSERT INTO t1 (a,b) VALUES (76,'bar');
+INSERT INTO t1 (a,b) VALUES (35,'foo');
+INSERT INTO t1 (a,b) VALUES (77,'baz');
+SELECT * FROM t1 WHERE a = 35;
+a b
+35 foo
+SELECT * FROM t1 WHERE a = 35 AND b = 'foo';
+a b
+35 foo
+SELECT * FROM t1 WHERE a = 77 OR b = 'bar';
+a b
+76 bar
+77 baz
+SELECT * FROM t1 WHERE a > 35;
+a b
+76 bar
+77 baz
+SELECT * FROM t1;
+a b
+35 foo
+76 bar
+77 baz
+UPDATE t1 SET a=a+100;
+SELECT * FROM t1;
+a b
+135 foo
+176 bar
+177 baz
+UPDATE t1 SET a=a-100, b='bbb' WHERE a>100;
+SELECT * FROM t1;
+a b
+35 bbb
+76 bbb
+77 bbb
+UPDATE t1 SET a=300, b='ccc' WHERE a>70;
+SELECT * FROM t1;
+a b
+300 ccc
+300 ccc
+35 bbb
+UPDATE t1 SET a=123 WHERE a=35;
+SELECT * FROM t1;
+a b
+123 bbb
+300 ccc
+300 ccc
+UPDATE t1 SET a=321 WHERE b='ccc';
+SELECT * FROM t1;
+a b
+123 bbb
+321 ccc
+321 ccc
+INSERT INTO t1 (a,b) VALUES (45,'bob');
+SELECT * FROM t1;
+a b
+123 bbb
+321 ccc
+321 ccc
+45 bob
+DELETE FROM t1 WHERE a=123;
+SELECT * FROM t1;
+a b
+321 ccc
+321 ccc
+45 bob
+DELETE FROM t1 WHERE b > 'bbb' AND a > 100;
+SELECT * FROM t1;
+a b
+45 bob
+TRUNCATE TABLE t1;
+DROP TABLE t1;
+CREATE TABLE t1 (a INT, b CHAR(8)) ENGINE=rocksdb;
+ALTER TABLE t1 ADD INDEX (b);
+SHOW CREATE TABLE t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `a` int(11) DEFAULT NULL,
+ `b` char(8) DEFAULT NULL,
+ KEY `b` (`b`)
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+SHOW COLUMNS IN t1;
+Field Type Null Key Default Extra
+a int(11) YES NULL
+b char(8) YES MUL NULL
+INSERT INTO t1 (a,b) VALUES (76,'bar');
+INSERT INTO t1 (a,b) VALUES (35,'foo');
+INSERT INTO t1 (a,b) VALUES (77,'baz');
+SELECT * FROM t1 WHERE a = 35;
+a b
+35 foo
+SELECT * FROM t1 WHERE a = 35 AND b = 'foo';
+a b
+35 foo
+SELECT * FROM t1 WHERE a = 77 OR b = 'bar';
+a b
+76 bar
+77 baz
+SELECT * FROM t1 WHERE a > 35;
+a b
+76 bar
+77 baz
+SELECT * FROM t1;
+a b
+35 foo
+76 bar
+77 baz
+UPDATE t1 SET a=a+100;
+SELECT * FROM t1;
+a b
+135 foo
+176 bar
+177 baz
+UPDATE t1 SET a=a-100, b='bbb' WHERE a>100;
+SELECT * FROM t1;
+a b
+35 bbb
+76 bbb
+77 bbb
+UPDATE t1 SET a=300, b='ccc' WHERE a>70;
+SELECT * FROM t1;
+a b
+300 ccc
+300 ccc
+35 bbb
+UPDATE t1 SET a=123 WHERE a=35;
+SELECT * FROM t1;
+a b
+123 bbb
+300 ccc
+300 ccc
+UPDATE t1 SET a=321 WHERE b='ccc';
+SELECT * FROM t1;
+a b
+123 bbb
+321 ccc
+321 ccc
+INSERT INTO t1 (a,b) VALUES (45,'bob');
+SELECT * FROM t1;
+a b
+123 bbb
+321 ccc
+321 ccc
+45 bob
+DELETE FROM t1 WHERE a=123;
+SELECT * FROM t1;
+a b
+321 ccc
+321 ccc
+45 bob
+DELETE FROM t1 WHERE b > 'bbb' AND a > 100;
+SELECT * FROM t1;
+a b
+45 bob
+TRUNCATE TABLE t1;
+ALTER TABLE t1 DROP INDEX b;
+SHOW CREATE TABLE t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `a` int(11) DEFAULT NULL,
+ `b` char(8) DEFAULT NULL
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+SHOW COLUMNS IN t1;
+Field Type Null Key Default Extra
+a int(11) YES NULL
+b char(8) YES NULL
+INSERT INTO t1 (a,b) VALUES (76,'bar');
+INSERT INTO t1 (a,b) VALUES (35,'foo');
+INSERT INTO t1 (a,b) VALUES (77,'baz');
+SELECT * FROM t1 WHERE a = 35;
+a b
+35 foo
+SELECT * FROM t1 WHERE a = 35 AND b = 'foo';
+a b
+35 foo
+SELECT * FROM t1 WHERE a = 77 OR b = 'bar';
+a b
+76 bar
+77 baz
+SELECT * FROM t1 WHERE a > 35;
+a b
+76 bar
+77 baz
+SELECT * FROM t1;
+a b
+35 foo
+76 bar
+77 baz
+UPDATE t1 SET a=a+100;
+SELECT * FROM t1;
+a b
+135 foo
+176 bar
+177 baz
+UPDATE t1 SET a=a-100, b='bbb' WHERE a>100;
+SELECT * FROM t1;
+a b
+35 bbb
+76 bbb
+77 bbb
+UPDATE t1 SET a=300, b='ccc' WHERE a>70;
+SELECT * FROM t1;
+a b
+300 ccc
+300 ccc
+35 bbb
+UPDATE t1 SET a=123 WHERE a=35;
+SELECT * FROM t1;
+a b
+123 bbb
+300 ccc
+300 ccc
+UPDATE t1 SET a=321 WHERE b='ccc';
+SELECT * FROM t1;
+a b
+123 bbb
+321 ccc
+321 ccc
+INSERT INTO t1 (a,b) VALUES (45,'bob');
+SELECT * FROM t1;
+a b
+123 bbb
+321 ccc
+321 ccc
+45 bob
+DELETE FROM t1 WHERE a=123;
+SELECT * FROM t1;
+a b
+321 ccc
+321 ccc
+45 bob
+DELETE FROM t1 WHERE b > 'bbb' AND a > 100;
+SELECT * FROM t1;
+a b
+45 bob
+TRUNCATE TABLE t1;
+DROP TABLE t1;
+CREATE TABLE t1 (a INT, b CHAR(8), pk INT AUTO_INCREMENT PRIMARY KEY) ENGINE=rocksdb;
+ALTER TABLE t1 DROP COLUMN pk;
+SHOW CREATE TABLE t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `a` int(11) DEFAULT NULL,
+ `b` char(8) DEFAULT NULL
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+SHOW COLUMNS IN t1;
+Field Type Null Key Default Extra
+a int(11) YES NULL
+b char(8) YES NULL
+INSERT INTO t1 (a,b) VALUES (76,'bar');
+INSERT INTO t1 (a,b) VALUES (35,'foo');
+INSERT INTO t1 (a,b) VALUES (77,'baz');
+SELECT * FROM t1 WHERE a = 35;
+a b
+35 foo
+SELECT * FROM t1 WHERE a = 35 AND b = 'foo';
+a b
+35 foo
+SELECT * FROM t1 WHERE a = 77 OR b = 'bar';
+a b
+76 bar
+77 baz
+SELECT * FROM t1 WHERE a > 35;
+a b
+76 bar
+77 baz
+SELECT * FROM t1;
+a b
+35 foo
+76 bar
+77 baz
+UPDATE t1 SET a=a+100;
+SELECT * FROM t1;
+a b
+135 foo
+176 bar
+177 baz
+UPDATE t1 SET a=a-100, b='bbb' WHERE a>100;
+SELECT * FROM t1;
+a b
+35 bbb
+76 bbb
+77 bbb
+UPDATE t1 SET a=300, b='ccc' WHERE a>70;
+SELECT * FROM t1;
+a b
+300 ccc
+300 ccc
+35 bbb
+UPDATE t1 SET a=123 WHERE a=35;
+SELECT * FROM t1;
+a b
+123 bbb
+300 ccc
+300 ccc
+UPDATE t1 SET a=321 WHERE b='ccc';
+SELECT * FROM t1;
+a b
+123 bbb
+321 ccc
+321 ccc
+INSERT INTO t1 (a,b) VALUES (45,'bob');
+SELECT * FROM t1;
+a b
+123 bbb
+321 ccc
+321 ccc
+45 bob
+DELETE FROM t1 WHERE a=123;
+SELECT * FROM t1;
+a b
+321 ccc
+321 ccc
+45 bob
+DELETE FROM t1 WHERE b > 'bbb' AND a > 100;
+SELECT * FROM t1;
+a b
+45 bob
+TRUNCATE TABLE t1;
+DROP TABLE t1;
+#
+# MDEV-4313: RocksDB: Server crashes in Rdb_key_def::setup on dropping the primary key column
+#
+CREATE TABLE t1 (pk INT PRIMARY KEY, i INT NOT NULL, KEY(i)) ENGINE=RocksDB;
+ALTER TABLE t1 DROP COLUMN `pk`;
+DROP TABLE t1;
+CREATE TABLE t1 (a INT, b CHAR(8), KEY(a), KEY(b)) ENGINE=rocksdb;
+SHOW CREATE TABLE t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `a` int(11) DEFAULT NULL,
+ `b` char(8) DEFAULT NULL,
+ KEY `a` (`a`),
+ KEY `b` (`b`)
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+SHOW COLUMNS IN t1;
+Field Type Null Key Default Extra
+a int(11) YES MUL NULL
+b char(8) YES MUL NULL
+INSERT INTO t1 (a,b) VALUES (76,'bar');
+INSERT INTO t1 (a,b) VALUES (35,'foo');
+INSERT INTO t1 (a,b) VALUES (77,'baz');
+SELECT * FROM t1 WHERE a = 35;
+a b
+35 foo
+SELECT * FROM t1 WHERE a = 35 AND b = 'foo';
+a b
+35 foo
+SELECT * FROM t1 WHERE a = 77 OR b = 'bar';
+a b
+76 bar
+77 baz
+SELECT * FROM t1 WHERE a > 35;
+a b
+76 bar
+77 baz
+SELECT * FROM t1;
+a b
+35 foo
+76 bar
+77 baz
+UPDATE t1 SET a=a+100;
+SELECT * FROM t1;
+a b
+135 foo
+176 bar
+177 baz
+UPDATE t1 SET a=a-100, b='bbb' WHERE a>100;
+SELECT * FROM t1;
+a b
+35 bbb
+76 bbb
+77 bbb
+UPDATE t1 SET a=300, b='ccc' WHERE a>70;
+SELECT * FROM t1;
+a b
+300 ccc
+300 ccc
+35 bbb
+UPDATE t1 SET a=123 WHERE a=35;
+SELECT * FROM t1;
+a b
+123 bbb
+300 ccc
+300 ccc
+UPDATE t1 SET a=321 WHERE b='ccc';
+SELECT * FROM t1;
+a b
+123 bbb
+321 ccc
+321 ccc
+INSERT INTO t1 (a,b) VALUES (45,'bob');
+SELECT * FROM t1;
+a b
+123 bbb
+321 ccc
+321 ccc
+45 bob
+DELETE FROM t1 WHERE a=123;
+SELECT * FROM t1;
+a b
+321 ccc
+321 ccc
+45 bob
+DELETE FROM t1 WHERE b > 'bbb' AND a > 100;
+SELECT * FROM t1;
+a b
+45 bob
+TRUNCATE TABLE t1;
+DROP TABLE t1;
+CREATE TABLE t1 (a INT, b CHAR(8), KEY(a, b)) ENGINE=rocksdb;
+SHOW CREATE TABLE t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `a` int(11) DEFAULT NULL,
+ `b` char(8) DEFAULT NULL,
+ KEY `a` (`a`,`b`)
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+SHOW COLUMNS IN t1;
+Field Type Null Key Default Extra
+a int(11) YES MUL NULL
+b char(8) YES NULL
+INSERT INTO t1 (a,b) VALUES (76,'bar');
+INSERT INTO t1 (a,b) VALUES (35,'foo');
+INSERT INTO t1 (a,b) VALUES (77,'baz');
+SELECT * FROM t1 WHERE a = 35;
+a b
+35 foo
+SELECT * FROM t1 WHERE a = 35 AND b = 'foo';
+a b
+35 foo
+SELECT * FROM t1 WHERE a = 77 OR b = 'bar';
+a b
+76 bar
+77 baz
+SELECT * FROM t1 WHERE a > 35;
+a b
+76 bar
+77 baz
+SELECT * FROM t1;
+a b
+35 foo
+76 bar
+77 baz
+UPDATE t1 SET a=a+100;
+SELECT * FROM t1;
+a b
+135 foo
+176 bar
+177 baz
+UPDATE t1 SET a=a-100, b='bbb' WHERE a>100;
+SELECT * FROM t1;
+a b
+35 bbb
+76 bbb
+77 bbb
+UPDATE t1 SET a=300, b='ccc' WHERE a>70;
+SELECT * FROM t1;
+a b
+300 ccc
+300 ccc
+35 bbb
+UPDATE t1 SET a=123 WHERE a=35;
+SELECT * FROM t1;
+a b
+123 bbb
+300 ccc
+300 ccc
+UPDATE t1 SET a=321 WHERE b='ccc';
+SELECT * FROM t1;
+a b
+123 bbb
+321 ccc
+321 ccc
+INSERT INTO t1 (a,b) VALUES (45,'bob');
+SELECT * FROM t1;
+a b
+123 bbb
+321 ccc
+321 ccc
+45 bob
+DELETE FROM t1 WHERE a=123;
+SELECT * FROM t1;
+a b
+321 ccc
+321 ccc
+45 bob
+DELETE FROM t1 WHERE b > 'bbb' AND a > 100;
+SELECT * FROM t1;
+a b
+45 bob
+TRUNCATE TABLE t1;
+DROP TABLE t1;
+CREATE TABLE t1 (a INT, b CHAR(8), KEY(a), KEY(b)) ENGINE=rocksdb;
+SHOW CREATE TABLE t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `a` int(11) DEFAULT NULL,
+ `b` char(8) DEFAULT NULL,
+ KEY `a` (`a`),
+ KEY `b` (`b`)
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+SHOW COLUMNS IN t1;
+Field Type Null Key Default Extra
+a int(11) YES MUL NULL
+b char(8) YES MUL NULL
+INSERT INTO t1 (a,b) VALUES (76,'bar');
+INSERT INTO t1 (a,b) VALUES (35,'foo');
+INSERT INTO t1 (a,b) VALUES (77,'baz');
+SELECT * FROM t1 WHERE a = 35;
+a b
+35 foo
+SELECT * FROM t1 WHERE a = 35 AND b = 'foo';
+a b
+35 foo
+SELECT * FROM t1 WHERE a = 77 OR b = 'bar';
+a b
+76 bar
+77 baz
+SELECT * FROM t1 WHERE a > 35;
+a b
+76 bar
+77 baz
+SELECT * FROM t1;
+a b
+35 foo
+76 bar
+77 baz
+UPDATE t1 SET a=a+100;
+SELECT * FROM t1;
+a b
+135 foo
+176 bar
+177 baz
+UPDATE t1 SET a=a-100, b='bbb' WHERE a>100;
+SELECT * FROM t1;
+a b
+35 bbb
+76 bbb
+77 bbb
+UPDATE t1 SET a=300, b='ccc' WHERE a>70;
+SELECT * FROM t1;
+a b
+300 ccc
+300 ccc
+35 bbb
+UPDATE t1 SET a=123 WHERE a=35;
+SELECT * FROM t1;
+a b
+123 bbb
+300 ccc
+300 ccc
+UPDATE t1 SET a=321 WHERE b='ccc';
+SELECT * FROM t1;
+a b
+123 bbb
+321 ccc
+321 ccc
+INSERT INTO t1 (a,b) VALUES (45,'bob');
+SELECT * FROM t1;
+a b
+123 bbb
+321 ccc
+321 ccc
+45 bob
+DELETE FROM t1 WHERE a=123;
+SELECT * FROM t1;
+a b
+321 ccc
+321 ccc
+45 bob
+DELETE FROM t1 WHERE b > 'bbb' AND a > 100;
+SELECT * FROM t1;
+a b
+45 bob
+TRUNCATE TABLE t1;
+DROP TABLE t1;
+CREATE TABLE t1 (a INT, b CHAR(8), KEY(a)) ENGINE=rocksdb;
+INSERT INTO t1 (a) VALUES (1),(2),(5);
+CHECK TABLE t1;
+Table Op Msg_type Msg_text
+test.t1 check status OK
+INSERT INTO t1 (a) VALUES (6),(8),(12);
+CHECK TABLE t1 FOR UPGRADE;
+Table Op Msg_type Msg_text
+test.t1 check status OK
+INSERT INTO t1 (a) VALUES (13),(15),(16);
+CHECK TABLE t1 QUICK;
+Table Op Msg_type Msg_text
+test.t1 check status OK
+INSERT INTO t1 (a) VALUES (17),(120),(132);
+CHECK TABLE t1 FAST;
+Table Op Msg_type Msg_text
+test.t1 check status OK
+INSERT INTO t1 (a) VALUES (801),(900),(7714);
+CHECK TABLE t1 MEDIUM;
+Table Op Msg_type Msg_text
+test.t1 check status OK
+INSERT INTO t1 (a) VALUES (8760),(10023),(12000);
+CHECK TABLE t1 EXTENDED;
+Table Op Msg_type Msg_text
+test.t1 check status OK
+INSERT INTO t1 (a) VALUES (13345),(24456),(78302),(143028);
+CHECK TABLE t1 CHANGED;
+Table Op Msg_type Msg_text
+test.t1 check status OK
+DROP TABLE t1;
+CREATE TABLE t1 (a INT, b INT, c INT, d INT, KEY kab(a, b), KEY kbc(b, c), KEY kabc(a,b,c)) ENGINE=rocksdb;
+SHOW CREATE TABLE t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `a` int(11) DEFAULT NULL,
+ `b` int(11) DEFAULT NULL,
+ `c` int(11) DEFAULT NULL,
+ `d` int(11) DEFAULT NULL,
+ KEY `kab` (`a`,`b`),
+ KEY `kbc` (`b`,`c`),
+ KEY `kabc` (`a`,`b`,`c`)
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+SHOW COLUMNS IN t1;
+Field Type Null Key Default Extra
+a int(11) YES MUL NULL
+b int(11) YES MUL NULL
+c int(11) YES NULL
+d int(11) YES NULL
+INSERT INTO t1 (a,b,c,d) VALUES (1,2,3,4);
+INSERT INTO t1 (a,b,c,d) VALUES (5,6,7,8);
+INSERT INTO t1 (a,b,c,d) VALUES (10,11,12,13);
+INSERT INTO t1 (a,b,c,d) VALUES (14,15,16,17);
+SELECT * FROM t1;
+a b c d
+1 2 3 4
+10 11 12 13
+14 15 16 17
+5 6 7 8
+SELECT * FROM t1 WHERE a = 1 OR a = 10;
+a b c d
+1 2 3 4
+10 11 12 13
+SELECT * FROM t1 WHERE c = 3 OR d = 17;
+a b c d
+1 2 3 4
+14 15 16 17
+SELECT * FROM t1 WHERE a > 5 OR d > 5;
+a b c d
+10 11 12 13
+14 15 16 17
+5 6 7 8
+SELECT a, b, c FROM t1 FORCE INDEX (kabc) WHERE a=1 OR b=11;
+a b c
+1 2 3
+10 11 12
+SELECT d FROM t1 FORCE INDEX (kbc) WHERE b > 6 AND c > 12;
+d
+17
+UPDATE t1 SET a=a+100;
+UPDATE t1 SET a=a-100, b=99 WHERE a>100;
+SELECT * FROM t1;
+a b c d
+1 99 3 4
+10 99 12 13
+14 99 16 17
+5 99 7 8
+DELETE FROM t1 WHERE a>5;
+DELETE FROM t1 WHERE b=99 AND d>4;
+SELECT * FROM t1;
+a b c d
+1 99 3 4
+TRUNCATE TABLE t1;
+DROP TABLE t1;
+CREATE TABLE t1 (a INT, b CHAR(8), KEY ka(a) comment 'rev:cf1', KEY kb(b)
+comment 'rev:cf1', KEY kab(a,b) comment 'rev:cf2') ENGINE=rocksdb;
+SHOW CREATE TABLE t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `a` int(11) DEFAULT NULL,
+ `b` char(8) DEFAULT NULL,
+ KEY `ka` (`a`) COMMENT 'rev:cf1',
+ KEY `kb` (`b`) COMMENT 'rev:cf1',
+ KEY `kab` (`a`,`b`) COMMENT 'rev:cf2'
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+SHOW COLUMNS IN t1;
+Field Type Null Key Default Extra
+a int(11) YES MUL NULL
+b char(8) YES MUL NULL
+INSERT INTO t1 (a,b) VALUES (76,'bar');
+INSERT INTO t1 (a,b) VALUES (35,'foo');
+INSERT INTO t1 (a,b) VALUES (77,'baz');
+SELECT * FROM t1 WHERE a = 35;
+a b
+35 foo
+SELECT * FROM t1 WHERE a = 35 AND b = 'foo';
+a b
+35 foo
+SELECT * FROM t1 WHERE a = 77 OR b = 'bar';
+a b
+76 bar
+77 baz
+SELECT * FROM t1 WHERE a > 35;
+a b
+76 bar
+77 baz
+SELECT * FROM t1;
+a b
+35 foo
+76 bar
+77 baz
+UPDATE t1 SET a=a+100;
+SELECT * FROM t1;
+a b
+135 foo
+176 bar
+177 baz
+UPDATE t1 SET a=a-100, b='bbb' WHERE a>100;
+SELECT * FROM t1;
+a b
+35 bbb
+76 bbb
+77 bbb
+UPDATE t1 SET a=300, b='ccc' WHERE a>70;
+SELECT * FROM t1;
+a b
+300 ccc
+300 ccc
+35 bbb
+UPDATE t1 SET a=123 WHERE a=35;
+SELECT * FROM t1;
+a b
+123 bbb
+300 ccc
+300 ccc
+UPDATE t1 SET a=321 WHERE b='ccc';
+SELECT * FROM t1;
+a b
+123 bbb
+321 ccc
+321 ccc
+INSERT INTO t1 (a,b) VALUES (45,'bob');
+SELECT * FROM t1;
+a b
+123 bbb
+321 ccc
+321 ccc
+45 bob
+DELETE FROM t1 WHERE a=123;
+SELECT * FROM t1;
+a b
+321 ccc
+321 ccc
+45 bob
+DELETE FROM t1 WHERE b > 'bbb' AND a > 100;
+SELECT * FROM t1;
+a b
+45 bob
+TRUNCATE TABLE t1;
+DROP TABLE t1;
+CREATE TABLE t1 (col1 int, col2 int, KEY kcol1(col1)) ENGINE=ROCKSDB;
+INSERT INTO t1 (col1, col2) values (2,2);
+ALTER TABLE t1 ADD COLUMN extra INT;
+UPDATE t1 SET col2 = 1;
+select * from t1;
+col1 col2 extra
+2 1 NULL
+DELETE FROM t1 WHERE col1 = 2;
+set global rocksdb_force_flush_memtable_now = true;
+select * from t1;
+col1 col2 extra
+DROP TABLE t1;
+create table t1 (i int auto_increment, key(i)) engine=rocksdb;
+insert into t1 values();
+insert into t1 values();
+insert into t1 values();
+show create table t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `i` int(11) NOT NULL AUTO_INCREMENT,
+ KEY `i` (`i`)
+) ENGINE=ROCKSDB AUTO_INCREMENT=4 DEFAULT CHARSET=latin1
+show create table t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `i` int(11) NOT NULL AUTO_INCREMENT,
+ KEY `i` (`i`)
+) ENGINE=ROCKSDB AUTO_INCREMENT=4 DEFAULT CHARSET=latin1
+drop table t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/allow_to_start_after_corruption.result b/storage/rocksdb/mysql-test/rocksdb/r/allow_to_start_after_corruption.result
new file mode 100644
index 00000000000..9b5a335b6f8
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/allow_to_start_after_corruption.result
@@ -0,0 +1,38 @@
+#
+# Test how MyRocks behaves when RocksDB reports corrupted data.
+#
+#
+# Test server crashes on corrupted data and restarts
+#
+create table t1 (
+pk int not null primary key,
+col1 varchar(10)
+) engine=rocksdb;
+insert into t1 values (1,1),(2,2),(3,3);
+select * from t1 where pk=1;
+pk col1
+1 1
+set session debug_dbug= "+d,rocksdb_return_status_corrupted";
+select * from t1 where pk=1;
+ERROR HY000: Lost connection to MySQL server during query
+FOUND 1 /data corruption detected/ in allow_to_start_after_corruption_debug.err
+#
+# The same for scan queries
+#
+select * from t1;
+pk col1
+1 1
+2 2
+3 3
+set session debug_dbug= "+d,rocksdb_return_status_corrupted";
+select * from t1;
+ERROR HY000: Lost connection to MySQL server during query
+FOUND 1 /data corruption detected/ in allow_to_start_after_corruption_debug.err
+#
+# Test restart failure. The server is shutdown at this point.
+#
+FOUND 1 /The server will exit normally and stop restart attempts/ in allow_to_start_after_corruption_debug.err
+#
+# Remove corruption file and restart cleanly
+#
+drop table t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/alter_table.result b/storage/rocksdb/mysql-test/rocksdb/r/alter_table.result
new file mode 100644
index 00000000000..a4e00626122
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/alter_table.result
@@ -0,0 +1,183 @@
+DROP TABLE IF EXISTS t1, t2;
+CREATE TABLE t1 (pk INT PRIMARY KEY, a INT, c CHAR(8)) ENGINE=rocksdb;
+INSERT INTO t1 VALUES (1,1,'a'),(2,5,'z');
+ALTER TABLE t1 ADD COLUMN b INT;
+SHOW CREATE TABLE t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `pk` int(11) NOT NULL,
+ `a` int(11) DEFAULT NULL,
+ `c` char(8) DEFAULT NULL,
+ `b` int(11) DEFAULT NULL,
+ PRIMARY KEY (`pk`)
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+ALTER TABLE t1 ALTER COLUMN a SET DEFAULT '0';
+SHOW CREATE TABLE t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `pk` int(11) NOT NULL,
+ `a` int(11) DEFAULT 0,
+ `c` char(8) DEFAULT NULL,
+ `b` int(11) DEFAULT NULL,
+ PRIMARY KEY (`pk`)
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+ALTER TABLE t1 ALTER a DROP DEFAULT;
+SHOW CREATE TABLE t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `pk` int(11) NOT NULL,
+ `a` int(11),
+ `c` char(8) DEFAULT NULL,
+ `b` int(11) DEFAULT NULL,
+ PRIMARY KEY (`pk`)
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+ALTER TABLE t1 CHANGE COLUMN b b1 CHAR(8) FIRST;
+SHOW CREATE TABLE t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `b1` char(8) DEFAULT NULL,
+ `pk` int(11) NOT NULL,
+ `a` int(11),
+ `c` char(8) DEFAULT NULL,
+ PRIMARY KEY (`pk`)
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+ALTER TABLE t1 CHANGE b1 b INT AFTER c;
+SHOW CREATE TABLE t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `pk` int(11) NOT NULL,
+ `a` int(11),
+ `c` char(8) DEFAULT NULL,
+ `b` int(11) DEFAULT NULL,
+ PRIMARY KEY (`pk`)
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+ALTER TABLE t1 CHANGE b b CHAR(8);
+SHOW CREATE TABLE t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `pk` int(11) NOT NULL,
+ `a` int(11),
+ `c` char(8) DEFAULT NULL,
+ `b` char(8) DEFAULT NULL,
+ PRIMARY KEY (`pk`)
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+ALTER TABLE t1 MODIFY COLUMN b INT;
+SHOW CREATE TABLE t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `pk` int(11) NOT NULL,
+ `a` int(11),
+ `c` char(8) DEFAULT NULL,
+ `b` int(11) DEFAULT NULL,
+ PRIMARY KEY (`pk`)
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+ALTER TABLE t1 MODIFY COLUMN b CHAR(8) FIRST;
+SHOW CREATE TABLE t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `b` char(8) DEFAULT NULL,
+ `pk` int(11) NOT NULL,
+ `a` int(11),
+ `c` char(8) DEFAULT NULL,
+ PRIMARY KEY (`pk`)
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+ALTER TABLE t1 MODIFY COLUMN b INT AFTER a;
+SHOW CREATE TABLE t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `pk` int(11) NOT NULL,
+ `a` int(11),
+ `b` int(11) DEFAULT NULL,
+ `c` char(8) DEFAULT NULL,
+ PRIMARY KEY (`pk`)
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+ALTER TABLE t1 DROP COLUMN b;
+SHOW CREATE TABLE t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `pk` int(11) NOT NULL,
+ `a` int(11),
+ `c` char(8) DEFAULT NULL,
+ PRIMARY KEY (`pk`)
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+ALTER TABLE t1 RENAME TO t2;
+SHOW CREATE TABLE t1;
+ERROR 42S02: Table 'test.t1' doesn't exist
+SHOW CREATE TABLE t2;
+Table Create Table
+t2 CREATE TABLE `t2` (
+ `pk` int(11) NOT NULL,
+ `a` int(11),
+ `c` char(8) DEFAULT NULL,
+ PRIMARY KEY (`pk`)
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+DROP TABLE t2;
+CREATE TABLE t1 (pk INT PRIMARY KEY, a INT, b INT) ENGINE=rocksdb;
+INSERT INTO t1 VALUES (1,1,5),(2,2,2),(3,4,3);
+SHOW CREATE TABLE t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `pk` int(11) NOT NULL,
+ `a` int(11) DEFAULT NULL,
+ `b` int(11) DEFAULT NULL,
+ PRIMARY KEY (`pk`)
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+ALTER TABLE t1 ORDER BY b ASC, a DESC, pk DESC;
+Warnings:
+Warning 1105 ORDER BY ignored as there is a user-defined clustered index in the table 't1'
+SHOW CREATE TABLE t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `pk` int(11) NOT NULL,
+ `a` int(11) DEFAULT NULL,
+ `b` int(11) DEFAULT NULL,
+ PRIMARY KEY (`pk`)
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+SELECT * FROM t1;
+pk a b
+1 1 5
+2 2 2
+3 4 3
+DROP TABLE t1;
+CREATE TABLE t1 (pk INT PRIMARY KEY, a INT, b CHAR(8), c CHAR(8)) ENGINE=rocksdb CHARACTER SET latin1 COLLATE latin1_general_cs;
+INSERT INTO t1 VALUES (1,5,'z','t');
+SHOW CREATE TABLE t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `pk` int(11) NOT NULL,
+ `a` int(11) DEFAULT NULL,
+ `b` char(8) COLLATE latin1_general_cs DEFAULT NULL,
+ `c` char(8) COLLATE latin1_general_cs DEFAULT NULL,
+ PRIMARY KEY (`pk`)
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1 COLLATE=latin1_general_cs
+ALTER TABLE t1 CONVERT TO CHARACTER SET utf8;
+SHOW CREATE TABLE t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `pk` int(11) NOT NULL,
+ `a` int(11) DEFAULT NULL,
+ `b` char(8) DEFAULT NULL,
+ `c` char(8) DEFAULT NULL,
+ PRIMARY KEY (`pk`)
+) ENGINE=ROCKSDB DEFAULT CHARSET=utf8
+ALTER TABLE t1 DEFAULT CHARACTER SET = latin1 COLLATE latin1_general_ci;
+SHOW CREATE TABLE t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `pk` int(11) NOT NULL,
+ `a` int(11) DEFAULT NULL,
+ `b` char(8) CHARACTER SET utf8 DEFAULT NULL,
+ `c` char(8) CHARACTER SET utf8 DEFAULT NULL,
+ PRIMARY KEY (`pk`)
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1 COLLATE=latin1_general_ci
+ALTER TABLE t1 FORCE;
+SHOW CREATE TABLE t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `pk` int(11) NOT NULL,
+ `a` int(11) DEFAULT NULL,
+ `b` char(8) CHARACTER SET utf8 DEFAULT NULL,
+ `c` char(8) CHARACTER SET utf8 DEFAULT NULL,
+ PRIMARY KEY (`pk`)
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1 COLLATE=latin1_general_ci
+DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/analyze_table.result b/storage/rocksdb/mysql-test/rocksdb/r/analyze_table.result
new file mode 100644
index 00000000000..b666a17c81c
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/analyze_table.result
@@ -0,0 +1,55 @@
+DROP TABLE IF EXISTS t1,t2;
+CREATE TABLE t1 (pk INT PRIMARY KEY, a INT(11), b CHAR(8)) ENGINE=rocksdb;
+INSERT INTO t1 VALUES (1,1,'a'),(2,2,'b');
+CREATE TABLE t2 (pk INT PRIMARY KEY, a INT(11), b CHAR(8)) ENGINE=rocksdb;
+INSERT INTO t1 VALUES (3,3,'c');
+ANALYZE TABLE t1;
+Table Op Msg_type Msg_text
+test.t1 analyze status OK
+INSERT INTO t2 VALUES (1,4,'d');
+ANALYZE NO_WRITE_TO_BINLOG TABLE t2;
+Table Op Msg_type Msg_text
+test.t2 analyze status OK
+INSERT INTO t1 VALUES (4,5,'e');
+INSERT INTO t2 VALUES (2,6,'f');
+ANALYZE LOCAL TABLE t1, t2;
+Table Op Msg_type Msg_text
+test.t1 analyze status OK
+test.t2 analyze status OK
+DROP TABLE t1, t2;
+CREATE TABLE t1 (pk INT PRIMARY KEY, a INT(11), KEY(a)) ENGINE=rocksdb;
+INSERT INTO t1 VALUES (1,1),(2,2),(3,4),(4,7);
+ANALYZE TABLE t1;
+Table Op Msg_type Msg_text
+test.t1 analyze status OK
+INSERT INTO t1 VALUES (5,8),(6,10),(7,11),(8,12);
+ANALYZE TABLE t1;
+Table Op Msg_type Msg_text
+test.t1 analyze status OK
+DROP TABLE t1;
+#
+# MDEV-12465: Server crashes in my_scan_weight_utf8_bin upon
+# collecting stats for RocksDB table
+#
+CREATE TABLE t1 (
+pk INT,
+f1 CHAR(255),
+f2 TEXT,
+f3 VARCHAR(255),
+f4 TEXT,
+PRIMARY KEY (pk),
+KEY (f4(255))
+) ENGINE=RocksDB
+CHARSET utf8
+COLLATE utf8_bin
+PARTITION BY KEY (pk) PARTITIONS 2;
+INSERT INTO t1 VALUES
+(1,'foo','bar','foo','bar'), (2,'bar','foo','bar','foo');
+ANALYZE TABLE t1 PERSISTENT FOR ALL;
+Table Op Msg_type Msg_text
+test.t1 analyze status Engine-independent statistics collected
+test.t1 analyze Warning Engine-independent statistics are not collected for column 'f2'
+test.t1 analyze Warning Engine-independent statistics are not collected for column 'f4'
+test.t1 analyze status OK
+drop table t1;
+# End of 10.2 tests
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/apply_changes_iter.result b/storage/rocksdb/mysql-test/rocksdb/r/apply_changes_iter.result
new file mode 100644
index 00000000000..a5d81031cd2
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/apply_changes_iter.result
@@ -0,0 +1,64 @@
+DROP TABLE IF EXISTS t1;
+DROP TABLE IF EXISTS t2;
+CREATE TABLE t1 (
+pk INT NOT NULL PRIMARY KEY,
+key1 INT NOT NULL,
+KEY (key1)
+) ENGINE=ROCKSDB;
+INSERT INTO t1 VALUES (12,12);
+INSERT INTO t1 VALUES (6,6);
+BEGIN;
+INSERT INTO t1 VALUES (8,8), (10,10);
+SELECT * FROM t1 WHERE key1 BETWEEN 4 and 11 ORDER BY KEY1 DESC;
+pk key1
+10 10
+8 8
+6 6
+SELECT * FROM t1 WHERE key1 BETWEEN 4 and 11 ORDER BY KEY1 ASC;
+pk key1
+6 6
+8 8
+10 10
+SELECT * FROM t1 IGNORE INDEX(key1) WHERE key1 BETWEEN 4 and 11 ORDER BY key1 DESC;
+pk key1
+10 10
+8 8
+6 6
+SELECT * FROM t1 IGNORE INDEX(key1) WHERE key1 BETWEEN 4 and 11 ORDER BY key1 ASC;
+pk key1
+6 6
+8 8
+10 10
+ROLLBACK;
+CREATE TABLE t2 (
+pk INT NOT NULL PRIMARY KEY,
+key1 INT NOT NULL,
+KEY (key1) COMMENT 'rev:cf'
+) ENGINE=ROCKSDB;
+INSERT INTO t2 VALUES (12,12);
+INSERT INTO t2 VALUES (6,6);
+BEGIN;
+INSERT INTO t2 VALUES (8,8), (10,10);
+SELECT * FROM t2 WHERE key1 BETWEEN 4 and 11 ORDER BY KEY1 DESC;
+pk key1
+10 10
+8 8
+6 6
+SELECT * FROM t2 WHERE key1 BETWEEN 4 and 11 ORDER BY KEY1 ASC;
+pk key1
+6 6
+8 8
+10 10
+SELECT * FROM t2 IGNORE INDEX(key1) WHERE key1 BETWEEN 4 and 11 ORDER BY key1 DESC;
+pk key1
+10 10
+8 8
+6 6
+SELECT * FROM t2 IGNORE INDEX(key1) WHERE key1 BETWEEN 4 and 11 ORDER BY key1 ASC;
+pk key1
+6 6
+8 8
+10 10
+ROLLBACK;
+DROP TABLE t1;
+DROP TABLE t2;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/autoinc_crash_safe.result b/storage/rocksdb/mysql-test/rocksdb/r/autoinc_crash_safe.result
new file mode 100644
index 00000000000..60395eced7e
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/autoinc_crash_safe.result
@@ -0,0 +1,132 @@
+include/master-slave.inc
+[connection master]
+create table t (i int primary key auto_increment) engine=rocksdb;
+#
+# Testing concurrent transactions.
+#
+connect con1,localhost,root,,;
+connect con2,localhost,root,,;
+connect con3,localhost,root,,;
+connection con1;
+begin;
+insert into t values ();
+connection con2;
+begin;
+insert into t values ();
+connection con3;
+begin;
+insert into t values ();
+connection con1;
+insert into t values ();
+connection con2;
+insert into t values ();
+connection con3;
+insert into t values ();
+connection con2;
+commit;
+connection con3;
+rollback;
+connection con1;
+commit;
+delete from t;
+# Master value before restart
+select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't';
+table_schema table_name auto_increment
+test t 7
+# Slave value before restart
+connection slave;
+select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't';
+table_schema table_name auto_increment
+test t 6
+connection slave;
+include/stop_slave.inc
+include/rpl_restart_server.inc [server_number=1]
+connection default;
+# Master value after restart
+select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't';
+table_schema table_name auto_increment
+test t 6
+include/rpl_restart_server.inc [server_number=2]
+connection slave;
+include/start_slave.inc
+# Slave value after restart
+select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't';
+table_schema table_name auto_increment
+test t 6
+disconnect con1;
+disconnect con2;
+disconnect con3;
+#
+# Testing interaction of merge markers with various DDL statements.
+#
+connection slave;
+include/stop_slave.inc
+connection default;
+# Drop and add primary key.
+alter table t modify i int;
+alter table t drop primary key;
+alter table t add primary key (i);
+alter table t modify i int auto_increment;
+include/rpl_restart_server.inc [server_number=1]
+select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't';
+table_schema table_name auto_increment
+test t 6
+# Remove auto_increment property.
+alter table t modify i int;
+include/rpl_restart_server.inc [server_number=1]
+select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't';
+table_schema table_name auto_increment
+test t NULL
+# Add auto_increment property.
+insert into t values (123);
+alter table t modify i int auto_increment;
+include/rpl_restart_server.inc [server_number=1]
+select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't';
+table_schema table_name auto_increment
+test t 124
+# Add column j.
+alter table t add column j int;
+include/rpl_restart_server.inc [server_number=1]
+select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't';
+table_schema table_name auto_increment
+test t 124
+# Rename tables.
+rename table t to t2;
+rename table t2 to t;
+include/rpl_restart_server.inc [server_number=1]
+select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't';
+table_schema table_name auto_increment
+test t 124
+# Change auto_increment property
+alter table t auto_increment = 1000;
+include/rpl_restart_server.inc [server_number=1]
+select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't';
+table_schema table_name auto_increment
+test t 1000
+alter table t auto_increment = 1;
+include/rpl_restart_server.inc [server_number=1]
+select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't';
+table_schema table_name auto_increment
+test t 124
+alter table t drop primary key, add key (i), auto_increment = 1;
+include/rpl_restart_server.inc [server_number=1]
+select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't';
+table_schema table_name auto_increment
+test t 124
+alter table t add key (j), auto_increment = 1;
+include/rpl_restart_server.inc [server_number=1]
+select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't';
+table_schema table_name auto_increment
+test t 124
+alter table t modify i int;
+alter table t add column (k int auto_increment), add key(k), auto_increment=15;
+include/rpl_restart_server.inc [server_number=1]
+select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't';
+table_schema table_name auto_increment
+test t 16
+# Drop table.
+drop table t;
+include/rpl_restart_server.inc [server_number=1]
+connection slave;
+include/start_slave.inc
+include/rpl_end.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/autoinc_crash_safe_partition.result b/storage/rocksdb/mysql-test/rocksdb/r/autoinc_crash_safe_partition.result
new file mode 100644
index 00000000000..c837fb7c77d
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/autoinc_crash_safe_partition.result
@@ -0,0 +1,132 @@
+include/master-slave.inc
+[connection master]
+create table t (i int primary key auto_increment) engine=rocksdb partition by key (i) partitions 3;
+#
+# Testing concurrent transactions.
+#
+connect con1,localhost,root,,;
+connect con2,localhost,root,,;
+connect con3,localhost,root,,;
+connection con1;
+begin;
+insert into t values ();
+connection con2;
+begin;
+insert into t values ();
+connection con3;
+begin;
+insert into t values ();
+connection con1;
+insert into t values ();
+connection con2;
+insert into t values ();
+connection con3;
+insert into t values ();
+connection con2;
+commit;
+connection con3;
+rollback;
+connection con1;
+commit;
+delete from t;
+# Master value before restart
+select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't';
+table_schema table_name auto_increment
+test t 7
+# Slave value before restart
+connection slave;
+select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't';
+table_schema table_name auto_increment
+test t 6
+connection slave;
+include/stop_slave.inc
+include/rpl_restart_server.inc [server_number=1]
+connection default;
+# Master value after restart
+select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't';
+table_schema table_name auto_increment
+test t 6
+include/rpl_restart_server.inc [server_number=2]
+connection slave;
+include/start_slave.inc
+# Slave value after restart
+select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't';
+table_schema table_name auto_increment
+test t 6
+disconnect con1;
+disconnect con2;
+disconnect con3;
+#
+# Testing interaction of merge markers with various DDL statements.
+#
+connection slave;
+include/stop_slave.inc
+connection default;
+# Drop and add primary key.
+alter table t modify i int;
+alter table t drop primary key;
+alter table t add primary key (i);
+alter table t modify i int auto_increment;
+include/rpl_restart_server.inc [server_number=1]
+select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't';
+table_schema table_name auto_increment
+test t 1
+# Remove auto_increment property.
+alter table t modify i int;
+include/rpl_restart_server.inc [server_number=1]
+select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't';
+table_schema table_name auto_increment
+test t NULL
+# Add auto_increment property.
+insert into t values (123);
+alter table t modify i int auto_increment;
+include/rpl_restart_server.inc [server_number=1]
+select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't';
+table_schema table_name auto_increment
+test t 124
+# Add column j.
+alter table t add column j int;
+include/rpl_restart_server.inc [server_number=1]
+select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't';
+table_schema table_name auto_increment
+test t 124
+# Rename tables.
+rename table t to t2;
+rename table t2 to t;
+include/rpl_restart_server.inc [server_number=1]
+select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't';
+table_schema table_name auto_increment
+test t 124
+# Change auto_increment property
+alter table t auto_increment = 1000;
+include/rpl_restart_server.inc [server_number=1]
+select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't';
+table_schema table_name auto_increment
+test t 1000
+alter table t auto_increment = 1;
+include/rpl_restart_server.inc [server_number=1]
+select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't';
+table_schema table_name auto_increment
+test t 124
+alter table t drop primary key, add key (i), auto_increment = 1;
+include/rpl_restart_server.inc [server_number=1]
+select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't';
+table_schema table_name auto_increment
+test t 124
+alter table t add key (j), auto_increment = 1;
+include/rpl_restart_server.inc [server_number=1]
+select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't';
+table_schema table_name auto_increment
+test t 124
+alter table t modify i int;
+alter table t add column (k int auto_increment), add key(k), auto_increment=15;
+include/rpl_restart_server.inc [server_number=1]
+select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't';
+table_schema table_name auto_increment
+test t 16
+# Drop table.
+drop table t;
+include/rpl_restart_server.inc [server_number=1]
+connection slave;
+include/start_slave.inc
+include/rpl_end.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/autoinc_debug.result b/storage/rocksdb/mysql-test/rocksdb/r/autoinc_debug.result
new file mode 100644
index 00000000000..604e5572eab
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/autoinc_debug.result
@@ -0,0 +1,99 @@
+#
+# Testing upgrading from server without merges for auto_increment
+# to new server with such support.
+#
+set debug_dbug='+d,myrocks_autoinc_upgrade';
+create table t (i int primary key auto_increment);
+insert into t values ();
+insert into t values ();
+insert into t values ();
+select * from t;
+i
+1
+2
+3
+delete from t where i > 1;
+select * from t;
+i
+1
+select table_name, index_name, auto_increment
+from information_schema.rocksdb_ddl where table_name = 't';
+table_name index_name auto_increment
+t PRIMARY NULL
+set debug_dbug='-d,myrocks_autoinc_upgrade';
+insert into t values ();
+insert into t values ();
+insert into t values ();
+select * from t;
+i
+1
+2
+3
+4
+select table_name, index_name, auto_increment
+from information_schema.rocksdb_ddl where table_name = 't';
+table_name index_name auto_increment
+t PRIMARY 5
+delete from t where i > 1;
+insert into t values ();
+insert into t values ();
+insert into t values ();
+select * from t;
+i
+1
+5
+6
+7
+drop table t;
+#
+# Testing crash safety of transactions.
+#
+create table t (i int primary key auto_increment);
+insert into t values ();
+insert into t values ();
+insert into t values ();
+# Before anything
+begin;
+insert into t values ();
+insert into t values ();
+set debug_dbug="+d,crash_commit_before";
+commit;
+ERROR HY000: Lost connection to MySQL server during query
+select max(i) into @row_max from t;
+select table_schema, table_name, auto_increment > @row_max from information_schema.tables where table_name = 't';
+table_schema table_name auto_increment > @row_max
+test t 1
+# After engine prepare
+begin;
+insert into t values ();
+insert into t values ();
+set debug_dbug="+d,crash_commit_after_prepare";
+commit;
+ERROR HY000: Lost connection to MySQL server during query
+select max(i) into @row_max from t;
+select table_schema, table_name, auto_increment > @row_max from information_schema.tables where table_name = 't';
+table_schema table_name auto_increment > @row_max
+test t 1
+# After binlog
+begin;
+insert into t values ();
+insert into t values ();
+set debug_dbug="+d,crash_commit_after_log";
+commit;
+ERROR HY000: Lost connection to MySQL server during query
+select max(i) into @row_max from t;
+select table_schema, table_name, auto_increment > @row_max from information_schema.tables where table_name = 't';
+table_schema table_name auto_increment > @row_max
+test t 1
+# After everything
+begin;
+insert into t values ();
+insert into t values ();
+set debug_dbug="+d,crash_commit_after";
+commit;
+ERROR HY000: Lost connection to MySQL server during query
+select max(i) into @row_max from t;
+select table_schema, table_name, auto_increment > @row_max from information_schema.tables where table_name = 't';
+table_schema table_name auto_increment > @row_max
+test t 1
+drop table t;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/autoinc_secondary.result b/storage/rocksdb/mysql-test/rocksdb/r/autoinc_secondary.result
new file mode 100644
index 00000000000..100bc5fd638
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/autoinc_secondary.result
@@ -0,0 +1,16 @@
+DROP TABLE IF EXISTS t1;
+CREATE TABLE t1 (pk INT PRIMARY KEY, a INT AUTO_INCREMENT, KEY(a)) ENGINE=rocksdb;
+INSERT INTO t1 (pk) VALUES (3), (2), (1);
+SELECT * FROM t1;
+pk a
+3 1
+2 2
+1 3
+INSERT INTO t1 (pk) VALUES (4);
+SELECT * FROM t1;
+pk a
+3 1
+2 2
+1 3
+4 4
+DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/autoinc_vars.result b/storage/rocksdb/mysql-test/rocksdb/r/autoinc_vars.result
new file mode 100644
index 00000000000..cc47ceff7ca
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/autoinc_vars.result
@@ -0,0 +1,199 @@
+DROP TABLE IF EXISTS t1;
+#---------------------------
+# auto_increment_offset
+#---------------------------
+SET auto_increment_offset = 200;
+CREATE TABLE t1 (a INT AUTO_INCREMENT PRIMARY KEY, b CHAR(8)) ENGINE=rocksdb;
+INSERT INTO t1 (a,b) VALUES (NULL,'a'),(NULL,'b'),(NULL,'c');
+SELECT LAST_INSERT_ID();
+LAST_INSERT_ID()
+1
+SELECT a,b FROM t1 ORDER BY a;
+a b
+1 a
+2 b
+3 c
+#---------------------------
+# auto_increment_increment
+#---------------------------
+SET auto_increment_increment = 300;
+INSERT INTO t1 (a,b) VALUES (NULL,'d'),(NULL,'e'),(NULL,'f');
+SELECT LAST_INSERT_ID();
+LAST_INSERT_ID()
+200
+SELECT a,b FROM t1 ORDER BY a;
+a b
+1 a
+2 b
+3 c
+200 d
+500 e
+800 f
+SET auto_increment_increment = 50;
+INSERT INTO t1 (a,b) VALUES (NULL,'g'),(NULL,'h'),(NULL,'i');
+SELECT LAST_INSERT_ID();
+LAST_INSERT_ID()
+850
+SELECT a,b FROM t1 ORDER BY a;
+a b
+1 a
+2 b
+3 c
+200 d
+500 e
+800 f
+850 g
+900 h
+950 i
+DROP TABLE t1;
+#---------------------------
+# offset is greater than the max value
+#---------------------------
+SET auto_increment_increment = 500;
+SET auto_increment_offset = 300;
+CREATE TABLE t1 (a TINYINT AUTO_INCREMENT PRIMARY KEY) ENGINE=rocksdb;
+# In MariaDB, this is an error:
+INSERT INTO t1 (a) VALUES (NULL);
+ERROR 22003: Out of range value for column 'a' at row 1
+SELECT LAST_INSERT_ID();
+LAST_INSERT_ID()
+850
+SELECT a FROM t1 ORDER BY a;
+a
+DROP TABLE t1;
+#---------------------------
+# test large autoincrement values
+#---------------------------
+SET auto_increment_increment = 1;
+SET auto_increment_offset = 1;
+CREATE TABLE t1 (a BIGINT UNSIGNED AUTO_INCREMENT PRIMARY KEY, b CHAR(8)) ENGINE=rocksdb;
+INSERT INTO t1 VALUES (18446744073709551613, 'a');
+SHOW CREATE TABLE t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `a` bigint(20) unsigned NOT NULL AUTO_INCREMENT,
+ `b` char(8) DEFAULT NULL,
+ PRIMARY KEY (`a`)
+) ENGINE=ROCKSDB AUTO_INCREMENT=18446744073709551614 DEFAULT CHARSET=latin1
+INSERT INTO t1 VALUES (NULL, 'b');
+SHOW CREATE TABLE t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `a` bigint(20) unsigned NOT NULL AUTO_INCREMENT,
+ `b` char(8) DEFAULT NULL,
+ PRIMARY KEY (`a`)
+) ENGINE=ROCKSDB AUTO_INCREMENT=18446744073709551615 DEFAULT CHARSET=latin1
+INSERT INTO t1 VALUES (NULL, 'c');
+ERROR HY000: Failed to read auto-increment value from storage engine
+SELECT * FROM t1;
+a b
+18446744073709551613 a
+18446744073709551614 b
+DROP TABLE t1;
+SET auto_increment_increment = 300;
+CREATE TABLE t1 (a BIGINT UNSIGNED AUTO_INCREMENT PRIMARY KEY, b CHAR(8)) ENGINE=rocksdb;
+INSERT INTO t1 VALUES (18446744073709551613, 'a');
+SHOW CREATE TABLE t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `a` bigint(20) unsigned NOT NULL AUTO_INCREMENT,
+ `b` char(8) DEFAULT NULL,
+ PRIMARY KEY (`a`)
+) ENGINE=ROCKSDB AUTO_INCREMENT=18446744073709551614 DEFAULT CHARSET=latin1
+INSERT INTO t1 VALUES (NULL, 'b');
+ERROR HY000: Failed to read auto-increment value from storage engine
+SHOW CREATE TABLE t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `a` bigint(20) unsigned NOT NULL AUTO_INCREMENT,
+ `b` char(8) DEFAULT NULL,
+ PRIMARY KEY (`a`)
+) ENGINE=ROCKSDB AUTO_INCREMENT=18446744073709551615 DEFAULT CHARSET=latin1
+INSERT INTO t1 VALUES (NULL, 'c');
+ERROR HY000: Failed to read auto-increment value from storage engine
+SELECT * FROM t1;
+a b
+18446744073709551613 a
+DROP TABLE t1;
+SET auto_increment_offset = 200;
+CREATE TABLE t1 (a BIGINT UNSIGNED AUTO_INCREMENT PRIMARY KEY, b CHAR(8)) ENGINE=rocksdb;
+INSERT INTO t1 VALUES (18446744073709551613, 'a');
+SHOW CREATE TABLE t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `a` bigint(20) unsigned NOT NULL AUTO_INCREMENT,
+ `b` char(8) DEFAULT NULL,
+ PRIMARY KEY (`a`)
+) ENGINE=ROCKSDB AUTO_INCREMENT=18446744073709551614 DEFAULT CHARSET=latin1
+INSERT INTO t1 VALUES (NULL, 'b');
+ERROR HY000: Failed to read auto-increment value from storage engine
+SHOW CREATE TABLE t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `a` bigint(20) unsigned NOT NULL AUTO_INCREMENT,
+ `b` char(8) DEFAULT NULL,
+ PRIMARY KEY (`a`)
+) ENGINE=ROCKSDB AUTO_INCREMENT=18446744073709551615 DEFAULT CHARSET=latin1
+INSERT INTO t1 VALUES (NULL, 'c');
+ERROR HY000: Failed to read auto-increment value from storage engine
+SELECT * FROM t1;
+a b
+18446744073709551613 a
+DROP TABLE t1;
+#----------------------------------
+# Issue #792 Crash in autoincrement
+#----------------------------------
+CREATE TABLE t1(C1 DOUBLE AUTO_INCREMENT KEY,C2 CHAR) ENGINE=ROCKSDB;
+INSERT INTO t1 VALUES(2177,0);
+DROP TABLE t1;
+CREATE TABLE t0(c0 BLOB) ENGINE=ROCKSDB;
+INSERT INTO t0 VALUES(0);
+ALTER TABLE t0 AUTO_INCREMENT=0;
+DROP TABLE t0;
+#---------------------------------------------------------------
+# MDEV-16703 Assertion failed in load_auto_incr_value_from_index
+#---------------------------------------------------------------
+CREATE TABLE t1 (pk INT AUTO_INCREMENT, a INT, PRIMARY KEY(pk)) ENGINE=RocksDB;
+INSERT INTO t1 (a) VALUES (1);
+UPDATE t1 SET pk = 3;
+ALTER TABLE t1 AUTO_INCREMENT 2;
+DROP TABLE t1;
+#----------------------------------
+# Issue #792 Crash in autoincrement
+#----------------------------------
+CREATE TABLE t1(C1 DOUBLE AUTO_INCREMENT KEY,C2 CHAR) ENGINE=ROCKSDB;
+INSERT INTO t1 VALUES(2177,0);
+DROP TABLE t1;
+CREATE TABLE t0(c0 BLOB) ENGINE=ROCKSDB;
+INSERT INTO t0 VALUES(0);
+ALTER TABLE t0 AUTO_INCREMENT=0;
+DROP TABLE t0;
+#----------------------------------
+# Issue #869 Crash in autoincrement
+#----------------------------------
+CREATE TABLE t1 (pk INT AUTO_INCREMENT, a INT, PRIMARY KEY(pk)) ENGINE=RocksDB;
+INSERT INTO t1 (a) VALUES (1);
+UPDATE t1 SET pk = 3;
+ALTER TABLE t1 AUTO_INCREMENT 2;
+DROP TABLE t1;
+#----------------------------------
+# Issue #902 Debug assert in autoincrement with small field type
+#----------------------------------
+SET auto_increment_increment=100, auto_increment_offset=10;
+CREATE TABLE t1(i INT AUTO_INCREMENT PRIMARY KEY) ENGINE=ROCKSDB AUTO_INCREMENT=18446744073709551615;
+INSERT INTO t1 VALUES (NULL);
+ERROR HY000: Failed to read auto-increment value from storage engine
+SELECT * FROM t1;
+i
+ALTER TABLE t1 AUTO_INCREMENT=1;
+INSERT INTO t1 VALUES (NULL);
+SELECT * FROM t1;
+i
+10
+ALTER TABLE t1 AUTO_INCREMENT=18446744073709551615;
+INSERT INTO t1 VALUES (NULL);
+ERROR HY000: Failed to read auto-increment value from storage engine
+SELECT * FROM t1;
+i
+10
+DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/autoinc_vars_thread.result b/storage/rocksdb/mysql-test/rocksdb/r/autoinc_vars_thread.result
new file mode 100644
index 00000000000..9331b4e22a5
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/autoinc_vars_thread.result
@@ -0,0 +1,39 @@
+#---------------------------
+# two threads inserting simultaneously with increment > 1
+# Issue #390
+#---------------------------
+CREATE TABLE t1 (a INT AUTO_INCREMENT PRIMARY KEY) ENGINE=rocksdb;
+connect con1, localhost, root,,;
+SET auto_increment_increment = 2;
+SET auto_increment_offset = 1;
+INSERT INTO t1 VALUES(NULL);
+connect con2, localhost, root,,;
+SET auto_increment_increment = 2;
+SET auto_increment_offset = 1;
+connect con3, localhost, root,,;
+connection con1;
+SET debug_sync='rocksdb.autoinc_vars2 SIGNAL go2';
+SET debug_sync='rocksdb.autoinc_vars SIGNAL parked1 WAIT_FOR go1';
+INSERT INTO t1 VALUES(NULL);
+connection default;
+SET debug_sync='now WAIT_FOR parked1';
+connection con2;
+SET debug_sync='rocksdb.autoinc_vars SIGNAL parked2 WAIT_FOR go2';
+INSERT INTO t1 VALUES(NULL);
+connection default;
+SET debug_sync='now WAIT_FOR parked2';
+SET debug_sync='now SIGNAL go1';
+connection con3;
+connection default;
+connection con1;
+connection con2;
+connection default;
+SET debug_sync='RESET';
+disconnect con1;
+disconnect con2;
+SELECT * FROM t1;
+a
+1
+3
+5
+DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/autoinc_vars_thread_2.result b/storage/rocksdb/mysql-test/rocksdb/r/autoinc_vars_thread_2.result
new file mode 100644
index 00000000000..a14ffdec2e3
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/autoinc_vars_thread_2.result
@@ -0,0 +1,96 @@
+#---------------------------
+# ten threads inserting simultaneously with increment > 1
+# Issue #390
+#---------------------------
+CREATE TABLE t1 (pk INT AUTO_INCREMENT PRIMARY KEY, thr INT) ENGINE=rocksdb;
+connect con$i, localhost, root,,;
+SET auto_increment_increment = 100;
+SET auto_increment_offset = 9 + 1;
+connect con$i, localhost, root,,;
+SET auto_increment_increment = 100;
+SET auto_increment_offset = 8 + 1;
+connect con$i, localhost, root,,;
+SET auto_increment_increment = 100;
+SET auto_increment_offset = 7 + 1;
+connect con$i, localhost, root,,;
+SET auto_increment_increment = 100;
+SET auto_increment_offset = 6 + 1;
+connect con$i, localhost, root,,;
+SET auto_increment_increment = 100;
+SET auto_increment_offset = 5 + 1;
+connect con$i, localhost, root,,;
+SET auto_increment_increment = 100;
+SET auto_increment_offset = 4 + 1;
+connect con$i, localhost, root,,;
+SET auto_increment_increment = 100;
+SET auto_increment_offset = 3 + 1;
+connect con$i, localhost, root,,;
+SET auto_increment_increment = 100;
+SET auto_increment_offset = 2 + 1;
+connect con$i, localhost, root,,;
+SET auto_increment_increment = 100;
+SET auto_increment_offset = 1 + 1;
+connect con$i, localhost, root,,;
+SET auto_increment_increment = 100;
+SET auto_increment_offset = 0 + 1;
+connection default;
+connection con9;
+LOAD DATA INFILE <input_file> INTO TABLE t1;
+connection con8;
+LOAD DATA INFILE <input_file> INTO TABLE t1;
+connection con7;
+LOAD DATA INFILE <input_file> INTO TABLE t1;
+connection con6;
+LOAD DATA INFILE <input_file> INTO TABLE t1;
+connection con5;
+LOAD DATA INFILE <input_file> INTO TABLE t1;
+connection con4;
+LOAD DATA INFILE <input_file> INTO TABLE t1;
+connection con3;
+LOAD DATA INFILE <input_file> INTO TABLE t1;
+connection con2;
+LOAD DATA INFILE <input_file> INTO TABLE t1;
+connection con1;
+LOAD DATA INFILE <input_file> INTO TABLE t1;
+connection con0;
+LOAD DATA INFILE <input_file> INTO TABLE t1;
+connection default;
+connection con9;
+connection con8;
+connection con7;
+connection con6;
+connection con5;
+connection con4;
+connection con3;
+connection con2;
+connection con1;
+connection con0;
+connection default;
+SELECT COUNT(*) FROM t1;
+COUNT(*)
+1000000
+SELECT thr, COUNT(pk) FROM t1 GROUP BY thr;
+thr COUNT(pk)
+0 100000
+1 100000
+2 100000
+3 100000
+4 100000
+5 100000
+6 100000
+7 100000
+8 100000
+9 100000
+disconnect con9;
+disconnect con8;
+disconnect con7;
+disconnect con6;
+disconnect con5;
+disconnect con4;
+disconnect con3;
+disconnect con2;
+disconnect con1;
+disconnect con0;
+SELECT * FROM t1 ORDER BY pk INTO OUTFILE <output_file>;
+All pk values matched their expected values
+DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/blind_delete_rc.result b/storage/rocksdb/mysql-test/rocksdb/r/blind_delete_rc.result
new file mode 100644
index 00000000000..973d1876fa0
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/blind_delete_rc.result
@@ -0,0 +1,87 @@
+include/master-slave.inc
+Warnings:
+Note #### Sending passwords in plain text without SSL/TLS is extremely insecure.
+Note #### Storing MySQL user name or password information in the master info repository is not secure and is therefore not recommended. Please consider using the USER and PASSWORD connection options for START SLAVE; see the 'START SLAVE Syntax' in the MySQL Manual for more information.
+[connection master]
+SET SESSION TRANSACTION ISOLATION LEVEL READ COMMITTED;
+set @save_rocksdb_blind_delete_primary_key=@@session.rocksdb_blind_delete_primary_key;
+set @save_rocksdb_master_skip_tx_api=@@session.rocksdb_master_skip_tx_api;
+DROP TABLE IF EXISTS t1,t2;
+create table t1 (id int primary key, value int, value2 varchar(200)) engine=rocksdb;
+create table t2 (id int primary key, value int, value2 varchar(200), index(value)) engine=rocksdb;
+SET session rocksdb_blind_delete_primary_key=1;
+select variable_value into @c from information_schema.global_status where variable_name='rocksdb_rows_deleted_blind';
+select variable_value-@c from information_schema.global_status where variable_name='rocksdb_rows_deleted_blind';
+variable_value-@c
+1000
+SELECT count(*) FROM t1;
+count(*)
+9000
+include/sync_slave_sql_with_master.inc
+SET SESSION TRANSACTION ISOLATION LEVEL READ COMMITTED;
+SELECT count(*) FROM t1;
+count(*)
+9000
+select variable_value into @c from information_schema.global_status where variable_name='rocksdb_rows_deleted_blind';
+select variable_value-@c from information_schema.global_status where variable_name='rocksdb_rows_deleted_blind';
+variable_value-@c
+0
+SELECT count(*) FROM t2;
+count(*)
+9000
+SET session rocksdb_master_skip_tx_api=1;
+select variable_value into @c from information_schema.global_status where variable_name='rocksdb_rows_deleted_blind';
+select variable_value-@c from information_schema.global_status where variable_name='rocksdb_rows_deleted_blind';
+variable_value-@c
+1000
+SELECT count(*) FROM t1;
+count(*)
+8000
+SELECT count(*) FROM t2;
+count(*)
+8000
+include/sync_slave_sql_with_master.inc
+SELECT count(*) FROM t1;
+count(*)
+8000
+SELECT count(*) FROM t2;
+count(*)
+8000
+select variable_value into @c from information_schema.global_status where variable_name='rocksdb_rows_deleted_blind';
+DELETE FROM t1 WHERE id BETWEEN 3001 AND 4000;
+DELETE FROM t2 WHERE id BETWEEN 3001 AND 4000;
+select variable_value-@c from information_schema.global_status where variable_name='rocksdb_rows_deleted_blind';
+variable_value-@c
+0
+SELECT count(*) FROM t1;
+count(*)
+7000
+SELECT count(*) FROM t2;
+count(*)
+7000
+include/sync_slave_sql_with_master.inc
+SELECT count(*) FROM t1;
+count(*)
+7000
+SELECT count(*) FROM t2;
+count(*)
+7000
+DELETE FROM t1 WHERE id = 10;
+SELECT count(*) FROM t1;
+count(*)
+7000
+call mtr.add_suppression("Slave SQL.*Could not execute Delete_rows event on table test.t1.*Error_code.*");
+call mtr.add_suppression("Slave: Can't find record in 't1'.*");
+include/wait_for_slave_sql_error.inc [errno=1032]
+set @save_rocksdb_read_free_rpl=@@global.rocksdb_read_free_rpl;
+set global rocksdb_read_free_rpl=PK_SK;
+START SLAVE;
+include/sync_slave_sql_with_master.inc
+SELECT count(*) FROM t1;
+count(*)
+7000
+set global rocksdb_read_free_rpl=@save_rocksdb_read_free_rpl;
+SET session rocksdb_blind_delete_primary_key=@save_rocksdb_blind_delete_primary_key;
+SET session rocksdb_master_skip_tx_api=@save_rocksdb_master_skip_tx_api;
+DROP TABLE t1, t2;
+include/rpl_end.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/blind_delete_rr.result b/storage/rocksdb/mysql-test/rocksdb/r/blind_delete_rr.result
new file mode 100644
index 00000000000..683b672e360
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/blind_delete_rr.result
@@ -0,0 +1,87 @@
+include/master-slave.inc
+Warnings:
+Note #### Sending passwords in plain text without SSL/TLS is extremely insecure.
+Note #### Storing MySQL user name or password information in the master info repository is not secure and is therefore not recommended. Please consider using the USER and PASSWORD connection options for START SLAVE; see the 'START SLAVE Syntax' in the MySQL Manual for more information.
+[connection master]
+SET SESSION TRANSACTION ISOLATION LEVEL REPEATABLE READ;
+set @save_rocksdb_blind_delete_primary_key=@@session.rocksdb_blind_delete_primary_key;
+set @save_rocksdb_master_skip_tx_api=@@session.rocksdb_master_skip_tx_api;
+DROP TABLE IF EXISTS t1,t2;
+create table t1 (id int primary key, value int, value2 varchar(200)) engine=rocksdb;
+create table t2 (id int primary key, value int, value2 varchar(200), index(value)) engine=rocksdb;
+SET session rocksdb_blind_delete_primary_key=1;
+select variable_value into @c from information_schema.global_status where variable_name='rocksdb_rows_deleted_blind';
+select variable_value-@c from information_schema.global_status where variable_name='rocksdb_rows_deleted_blind';
+variable_value-@c
+1000
+SELECT count(*) FROM t1;
+count(*)
+9000
+include/sync_slave_sql_with_master.inc
+SET SESSION TRANSACTION ISOLATION LEVEL REPEATABLE READ;
+SELECT count(*) FROM t1;
+count(*)
+9000
+select variable_value into @c from information_schema.global_status where variable_name='rocksdb_rows_deleted_blind';
+select variable_value-@c from information_schema.global_status where variable_name='rocksdb_rows_deleted_blind';
+variable_value-@c
+0
+SELECT count(*) FROM t2;
+count(*)
+9000
+SET session rocksdb_master_skip_tx_api=1;
+select variable_value into @c from information_schema.global_status where variable_name='rocksdb_rows_deleted_blind';
+select variable_value-@c from information_schema.global_status where variable_name='rocksdb_rows_deleted_blind';
+variable_value-@c
+1000
+SELECT count(*) FROM t1;
+count(*)
+8000
+SELECT count(*) FROM t2;
+count(*)
+8000
+include/sync_slave_sql_with_master.inc
+SELECT count(*) FROM t1;
+count(*)
+8000
+SELECT count(*) FROM t2;
+count(*)
+8000
+select variable_value into @c from information_schema.global_status where variable_name='rocksdb_rows_deleted_blind';
+DELETE FROM t1 WHERE id BETWEEN 3001 AND 4000;
+DELETE FROM t2 WHERE id BETWEEN 3001 AND 4000;
+select variable_value-@c from information_schema.global_status where variable_name='rocksdb_rows_deleted_blind';
+variable_value-@c
+0
+SELECT count(*) FROM t1;
+count(*)
+7000
+SELECT count(*) FROM t2;
+count(*)
+7000
+include/sync_slave_sql_with_master.inc
+SELECT count(*) FROM t1;
+count(*)
+7000
+SELECT count(*) FROM t2;
+count(*)
+7000
+DELETE FROM t1 WHERE id = 10;
+SELECT count(*) FROM t1;
+count(*)
+7000
+call mtr.add_suppression("Slave SQL.*Could not execute Delete_rows event on table test.t1.*Error_code.*");
+call mtr.add_suppression("Slave: Can't find record in 't1'.*");
+include/wait_for_slave_sql_error.inc [errno=1032]
+set @save_rocksdb_read_free_rpl=@@global.rocksdb_read_free_rpl;
+set global rocksdb_read_free_rpl=PK_SK;
+START SLAVE;
+include/sync_slave_sql_with_master.inc
+SELECT count(*) FROM t1;
+count(*)
+7000
+set global rocksdb_read_free_rpl=@save_rocksdb_read_free_rpl;
+SET session rocksdb_blind_delete_primary_key=@save_rocksdb_blind_delete_primary_key;
+SET session rocksdb_master_skip_tx_api=@save_rocksdb_master_skip_tx_api;
+DROP TABLE t1, t2;
+include/rpl_end.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/bloomfilter.result b/storage/rocksdb/mysql-test/rocksdb/r/bloomfilter.result
new file mode 100644
index 00000000000..bc5d685f89b
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/bloomfilter.result
@@ -0,0 +1,2042 @@
+CREATE PROCEDURE bloom_start()
+BEGIN
+select variable_value into @c from information_schema.global_status where variable_name='rocksdb_bloom_filter_prefix_checked';
+select variable_value into @u from information_schema.global_status where variable_name='rocksdb_bloom_filter_prefix_useful';
+END//
+CREATE PROCEDURE bloom_end()
+BEGIN
+select case when variable_value-@c > 0 then 'true' else 'false' end as checked from information_schema.global_status where variable_name='rocksdb_bloom_filter_prefix_checked';
+END//
+create or replace table t1 (
+id1 bigint not null,
+id2 bigint not null,
+id3 varchar(100) not null,
+id4 int not null,
+id5 int not null,
+value bigint,
+value2 varchar(100),
+primary key (id1, id2, id3, id4) ,
+index id2 (id2) ,
+index id2_id1 (id2, id1) ,
+index id2_id3 (id2, id3) ,
+index id2_id4 (id2, id4) ,
+index id2_id3_id1_id4 (id2, id3, id1, id4) ,
+index id3_id2 (id3, id2)
+) engine=ROCKSDB;
+create or replace table t2 (
+id1 bigint not null,
+id2 bigint not null,
+id3 varchar(100) not null,
+id4 int not null,
+id5 int not null,
+value bigint,
+value2 varchar(100),
+primary key (id4) ,
+index id2 (id2) ,
+index id2_id3 (id2, id3) ,
+index id2_id4 (id2, id4) ,
+index id2_id4_id5 (id2, id4, id5) ,
+index id3_id4 (id3, id4) ,
+index id3_id5 (id3, id5)
+) engine=ROCKSDB;
+insert t1
+select (seq+9) div 10, (seq+4) div 5, (seq+4) div 5, seq, seq, 1000, "aaabbbccc"
+ from seq_1_to_10000;
+insert t2 select * from t1;
+set global rocksdb_force_flush_memtable_now=1;
+call bloom_start();
+select count(*) from t1;
+count(*)
+10000
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2;
+count(*)
+10000
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index(PRIMARY) where id1 >= 1;
+count(*)
+10000
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index(id2_id1) where id2 >= 1;
+count(*)
+10000
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2 force index(id3_id4) where id3 >= '1';
+count(*)
+10000
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index(id2_id1) where id2=2 and id1=1;
+count(*)
+5
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t1 force index(id2_id1) where id2=24 and id1=12;
+count(*)
+5
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t1 force index(id2_id1) where id2=88 and id1=44;
+count(*)
+5
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t1 force index(id2_id1) where id2=100 and id1=50;
+count(*)
+5
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t1 force index(id2_id1) where id2=428 and id1=214;
+count(*)
+5
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t2 force index (id2_id4_id5) where id2=1 and id4=1 and id5=1;
+count(*)
+1
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t2 force index (id2_id4_id5) where id2=23 and id4=115 and id5=115;
+count(*)
+1
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t2 force index (id2_id4_id5) where id2=500 and id4=2500 and id5=2500;
+count(*)
+1
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t2 force index (id2_id4_id5) where id2=601 and id4=3005 and id5=3005;
+count(*)
+1
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t2 force index (id2_id3) where id2=1;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2 force index (id2_id3) where id2=23;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2 force index (id2_id3) where id2=345;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2 force index (id2_id3) where id2=456;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2 force index (id2_id4) where id2=1;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2 force index (id2_id4) where id2=23;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2 force index (id2_id4) where id2=345;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2 force index (id2_id4) where id2=456;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index (id2_id3_id1_id4) where id2=1 and id3='1' and id1=1 order by id4;
+count(*)
+5
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t1 force index (id2_id3_id1_id4) where id2=36 and id3='36' and id1=18 order by id4;
+count(*)
+5
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t1 force index (id2_id3_id1_id4) where id2=124 and id3='124' and id1=62 order by id4;
+count(*)
+5
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t1 force index (id2_id3_id1_id4) where id2=888 and id3='888' and id1=444 order by id4;
+count(*)
+5
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t1 force index (id2_id3_id1_id4) where id2=124 and id3='124';
+count(*)
+5
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t2 force index (id2_id3) where id2=1 and id3='1' and id4=1;
+count(*)
+1
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t2 force index (id2_id3) where id2=12 and id3='12' and id4=60;
+count(*)
+1
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t1 force index (id2_id3) where id2=1 and id3='1';
+count(*)
+5
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t1 force index (id2_id3) where id2=23 and id3='23';
+count(*)
+5
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t1 force index (id3_id2) where id2=1 and id3='1';
+count(*)
+5
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t1 force index (id3_id2) where id2=23 and id3='23';
+count(*)
+5
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t1 force index (PRIMARY) where id1=1;
+count(*)
+10
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index (PRIMARY) where id1=12;
+count(*)
+10
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index (PRIMARY) where id1=23;
+count(*)
+10
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index (PRIMARY) where id1=100;
+count(*)
+10
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index (PRIMARY) where id1=234;
+count(*)
+10
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index (id2_id3_id1_id4) where id2=36;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index (id2_id3_id1_id4) where id2=234;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2 force index (id2) where id2=1 and id4=1;
+count(*)
+1
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t2 force index (id2) where id2=23 and id4=115;
+count(*)
+1
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t2 force index (id2) where id2=500 and id4=2500;
+count(*)
+1
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t2 force index (id2) where id2=601 and id4=3005;
+count(*)
+1
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t2 force index (id3_id4) where id3='1' and id4=1;
+count(*)
+1
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t2 force index (id3_id4) where id3='12' and id4=60;
+count(*)
+1
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t1 force index (id2) where id2=1;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index (id2) where id2=23;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index (id2) where id2=345;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index (id2) where id2=456;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2 force index (id3_id5) where id3='100' and id5=500;
+count(*)
+1
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2 force index (id3_id5) where id3='240' and id5=1200;
+count(*)
+1
+call bloom_end();
+checked
+false
+create or replace table t1 (
+id1 bigint not null,
+id2 bigint not null,
+id3 varchar(100) not null,
+id4 int not null,
+id5 int not null,
+value bigint,
+value2 varchar(100),
+primary key (id1, id2, id3, id4) COMMENT 'cf_short_prefix',
+index id2 (id2) COMMENT 'cf_short_prefix',
+index id2_id1 (id2, id1) COMMENT 'cf_short_prefix',
+index id2_id3 (id2, id3) COMMENT 'cf_short_prefix',
+index id2_id4 (id2, id4) COMMENT 'cf_short_prefix',
+index id2_id3_id1_id4 (id2, id3, id1, id4) COMMENT 'cf_short_prefix',
+index id3_id2 (id3, id2) COMMENT 'cf_short_prefix'
+) engine=ROCKSDB;
+create or replace table t2 (
+id1 bigint not null,
+id2 bigint not null,
+id3 varchar(100) not null,
+id4 int not null,
+id5 int not null,
+value bigint,
+value2 varchar(100),
+primary key (id4) COMMENT 'cf_short_prefix',
+index id2 (id2) COMMENT 'cf_short_prefix',
+index id2_id3 (id2, id3) COMMENT 'cf_short_prefix',
+index id2_id4 (id2, id4) COMMENT 'cf_short_prefix',
+index id2_id4_id5 (id2, id4, id5) COMMENT 'cf_short_prefix',
+index id3_id4 (id3, id4) COMMENT 'cf_short_prefix',
+index id3_id5 (id3, id5) COMMENT 'cf_short_prefix'
+) engine=ROCKSDB;
+insert t1
+select (seq+9) div 10, (seq+4) div 5, (seq+4) div 5, seq, seq, 1000, "aaabbbccc"
+ from seq_1_to_10000;
+insert t2 select * from t1;
+set global rocksdb_force_flush_memtable_now=1;
+call bloom_start();
+select count(*) from t1;
+count(*)
+10000
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t2;
+count(*)
+10000
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t1 force index(PRIMARY) where id1 >= 1;
+count(*)
+10000
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t1 force index(id2_id1) where id2 >= 1;
+count(*)
+10000
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t2 force index(id3_id4) where id3 >= '1';
+count(*)
+10000
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t1 force index(id2_id1) where id2=2 and id1=1;
+count(*)
+5
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t1 force index(id2_id1) where id2=24 and id1=12;
+count(*)
+5
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t1 force index(id2_id1) where id2=88 and id1=44;
+count(*)
+5
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t1 force index(id2_id1) where id2=100 and id1=50;
+count(*)
+5
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t1 force index(id2_id1) where id2=428 and id1=214;
+count(*)
+5
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t2 force index (id2_id4_id5) where id2=1 and id4=1 and id5=1;
+count(*)
+1
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t2 force index (id2_id4_id5) where id2=23 and id4=115 and id5=115;
+count(*)
+1
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t2 force index (id2_id4_id5) where id2=500 and id4=2500 and id5=2500;
+count(*)
+1
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t2 force index (id2_id4_id5) where id2=601 and id4=3005 and id5=3005;
+count(*)
+1
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t2 force index (id2_id3) where id2=1;
+count(*)
+5
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t2 force index (id2_id3) where id2=23;
+count(*)
+5
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t2 force index (id2_id3) where id2=345;
+count(*)
+5
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t2 force index (id2_id3) where id2=456;
+count(*)
+5
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t2 force index (id2_id4) where id2=1;
+count(*)
+5
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t2 force index (id2_id4) where id2=23;
+count(*)
+5
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t2 force index (id2_id4) where id2=345;
+count(*)
+5
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t2 force index (id2_id4) where id2=456;
+count(*)
+5
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t1 force index (id2_id3_id1_id4) where id2=1 and id3='1' and id1=1 order by id4;
+count(*)
+5
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t1 force index (id2_id3_id1_id4) where id2=36 and id3='36' and id1=18 order by id4;
+count(*)
+5
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t1 force index (id2_id3_id1_id4) where id2=124 and id3='124' and id1=62 order by id4;
+count(*)
+5
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t1 force index (id2_id3_id1_id4) where id2=888 and id3='888' and id1=444 order by id4;
+count(*)
+5
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t1 force index (id2_id3_id1_id4) where id2=124 and id3='124';
+count(*)
+5
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t2 force index (id2_id3) where id2=1 and id3='1' and id4=1;
+count(*)
+1
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t2 force index (id2_id3) where id2=12 and id3='12' and id4=60;
+count(*)
+1
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t1 force index (id2_id3) where id2=1 and id3='1';
+count(*)
+5
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t1 force index (id2_id3) where id2=23 and id3='23';
+count(*)
+5
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t1 force index (id3_id2) where id2=1 and id3='1';
+count(*)
+5
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t1 force index (id3_id2) where id2=23 and id3='23';
+count(*)
+5
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t1 force index (PRIMARY) where id1=1;
+count(*)
+10
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t1 force index (PRIMARY) where id1=12;
+count(*)
+10
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t1 force index (PRIMARY) where id1=23;
+count(*)
+10
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t1 force index (PRIMARY) where id1=100;
+count(*)
+10
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t1 force index (PRIMARY) where id1=234;
+count(*)
+10
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t1 force index (id2_id3_id1_id4) where id2=36;
+count(*)
+5
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t1 force index (id2_id3_id1_id4) where id2=234;
+count(*)
+5
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t2 force index (id2) where id2=1 and id4=1;
+count(*)
+1
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t2 force index (id2) where id2=23 and id4=115;
+count(*)
+1
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t2 force index (id2) where id2=500 and id4=2500;
+count(*)
+1
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t2 force index (id2) where id2=601 and id4=3005;
+count(*)
+1
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t2 force index (id3_id4) where id3='1' and id4=1;
+count(*)
+1
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t2 force index (id3_id4) where id3='12' and id4=60;
+count(*)
+1
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t1 force index (id2) where id2=1;
+count(*)
+5
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t1 force index (id2) where id2=23;
+count(*)
+5
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t1 force index (id2) where id2=345;
+count(*)
+5
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t1 force index (id2) where id2=456;
+count(*)
+5
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t2 force index (id3_id5) where id3='100' and id5=500;
+count(*)
+1
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t2 force index (id3_id5) where id3='240' and id5=1200;
+count(*)
+1
+call bloom_end();
+checked
+true
+create or replace table t1 (
+id1 bigint not null,
+id2 bigint not null,
+id3 varchar(100) not null,
+id4 int not null,
+id5 int not null,
+value bigint,
+value2 varchar(100),
+primary key (id1, id2, id3, id4) COMMENT 'rev:cf_short_prefix',
+index id2 (id2) COMMENT 'rev:cf_short_prefix',
+index id2_id1 (id2, id1) COMMENT 'rev:cf_short_prefix',
+index id2_id3 (id2, id3) COMMENT 'rev:cf_short_prefix',
+index id2_id4 (id2, id4) COMMENT 'rev:cf_short_prefix',
+index id2_id3_id1_id4 (id2, id3, id1, id4) COMMENT 'rev:cf_short_prefix',
+index id3_id2 (id3, id2) COMMENT 'rev:cf_short_prefix'
+) engine=ROCKSDB;
+create or replace table t2 (
+id1 bigint not null,
+id2 bigint not null,
+id3 varchar(100) not null,
+id4 int not null,
+id5 int not null,
+value bigint,
+value2 varchar(100),
+primary key (id4) COMMENT 'rev:cf_short_prefix',
+index id2 (id2) COMMENT 'rev:cf_short_prefix',
+index id2_id3 (id2, id3) COMMENT 'rev:cf_short_prefix',
+index id2_id4 (id2, id4) COMMENT 'rev:cf_short_prefix',
+index id2_id4_id5 (id2, id4, id5) COMMENT 'rev:cf_short_prefix',
+index id3_id4 (id3, id4) COMMENT 'rev:cf_short_prefix',
+index id3_id5 (id3, id5) COMMENT 'rev:cf_short_prefix'
+) engine=ROCKSDB;
+insert t1
+select (seq+9) div 10, (seq+4) div 5, (seq+4) div 5, seq, seq, 1000, "aaabbbccc"
+ from seq_1_to_10000;
+insert t2 select * from t1;
+set global rocksdb_force_flush_memtable_now=1;
+call bloom_start();
+select count(*) from t1;
+count(*)
+10000
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t2;
+count(*)
+10000
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t1 force index(PRIMARY) where id1 >= 1;
+count(*)
+10000
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t1 force index(id2_id1) where id2 >= 1;
+count(*)
+10000
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t2 force index(id3_id4) where id3 >= '1';
+count(*)
+10000
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t1 force index(id2_id1) where id2=2 and id1=1;
+count(*)
+5
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t1 force index(id2_id1) where id2=24 and id1=12;
+count(*)
+5
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t1 force index(id2_id1) where id2=88 and id1=44;
+count(*)
+5
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t1 force index(id2_id1) where id2=100 and id1=50;
+count(*)
+5
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t1 force index(id2_id1) where id2=428 and id1=214;
+count(*)
+5
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t2 force index (id2_id4_id5) where id2=1 and id4=1 and id5=1;
+count(*)
+1
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t2 force index (id2_id4_id5) where id2=23 and id4=115 and id5=115;
+count(*)
+1
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t2 force index (id2_id4_id5) where id2=500 and id4=2500 and id5=2500;
+count(*)
+1
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t2 force index (id2_id4_id5) where id2=601 and id4=3005 and id5=3005;
+count(*)
+1
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t2 force index (id2_id3) where id2=1;
+count(*)
+5
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t2 force index (id2_id3) where id2=23;
+count(*)
+5
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t2 force index (id2_id3) where id2=345;
+count(*)
+5
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t2 force index (id2_id3) where id2=456;
+count(*)
+5
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t2 force index (id2_id4) where id2=1;
+count(*)
+5
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t2 force index (id2_id4) where id2=23;
+count(*)
+5
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t2 force index (id2_id4) where id2=345;
+count(*)
+5
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t2 force index (id2_id4) where id2=456;
+count(*)
+5
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t1 force index (id2_id3_id1_id4) where id2=1 and id3='1' and id1=1 order by id4;
+count(*)
+5
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t1 force index (id2_id3_id1_id4) where id2=36 and id3='36' and id1=18 order by id4;
+count(*)
+5
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t1 force index (id2_id3_id1_id4) where id2=124 and id3='124' and id1=62 order by id4;
+count(*)
+5
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t1 force index (id2_id3_id1_id4) where id2=888 and id3='888' and id1=444 order by id4;
+count(*)
+5
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t1 force index (id2_id3_id1_id4) where id2=124 and id3='124';
+count(*)
+5
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t2 force index (id2_id3) where id2=1 and id3='1' and id4=1;
+count(*)
+1
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t2 force index (id2_id3) where id2=12 and id3='12' and id4=60;
+count(*)
+1
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t1 force index (id2_id3) where id2=1 and id3='1';
+count(*)
+5
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t1 force index (id2_id3) where id2=23 and id3='23';
+count(*)
+5
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t1 force index (id3_id2) where id2=1 and id3='1';
+count(*)
+5
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t1 force index (id3_id2) where id2=23 and id3='23';
+count(*)
+5
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t1 force index (PRIMARY) where id1=1;
+count(*)
+10
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t1 force index (PRIMARY) where id1=12;
+count(*)
+10
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t1 force index (PRIMARY) where id1=23;
+count(*)
+10
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t1 force index (PRIMARY) where id1=100;
+count(*)
+10
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t1 force index (PRIMARY) where id1=234;
+count(*)
+10
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t1 force index (id2_id3_id1_id4) where id2=36;
+count(*)
+5
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t1 force index (id2_id3_id1_id4) where id2=234;
+count(*)
+5
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t2 force index (id2) where id2=1 and id4=1;
+count(*)
+1
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t2 force index (id2) where id2=23 and id4=115;
+count(*)
+1
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t2 force index (id2) where id2=500 and id4=2500;
+count(*)
+1
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t2 force index (id2) where id2=601 and id4=3005;
+count(*)
+1
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t2 force index (id3_id4) where id3='1' and id4=1;
+count(*)
+1
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t2 force index (id3_id4) where id3='12' and id4=60;
+count(*)
+1
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t1 force index (id2) where id2=1;
+count(*)
+5
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t1 force index (id2) where id2=23;
+count(*)
+5
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t1 force index (id2) where id2=345;
+count(*)
+5
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t1 force index (id2) where id2=456;
+count(*)
+5
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t2 force index (id3_id5) where id3='100' and id5=500;
+count(*)
+1
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t2 force index (id3_id5) where id3='240' and id5=1200;
+count(*)
+1
+call bloom_end();
+checked
+true
+create or replace table t1 (
+id1 bigint not null,
+id2 bigint not null,
+id3 varchar(100) not null,
+id4 int not null,
+id5 int not null,
+value bigint,
+value2 varchar(100),
+primary key (id1, id2, id3, id4) COMMENT 'cf_long_prefix',
+index id2 (id2) COMMENT 'cf_long_prefix',
+index id2_id1 (id2, id1) COMMENT 'cf_long_prefix',
+index id2_id3 (id2, id3) COMMENT 'cf_long_prefix',
+index id2_id4 (id2, id4) COMMENT 'cf_long_prefix',
+index id2_id3_id1_id4 (id2, id3, id1, id4) COMMENT 'cf_long_prefix',
+index id3_id2 (id3, id2) COMMENT 'cf_long_prefix'
+) engine=ROCKSDB;
+create or replace table t2 (
+id1 bigint not null,
+id2 bigint not null,
+id3 varchar(100) not null,
+id4 int not null,
+id5 int not null,
+value bigint,
+value2 varchar(100),
+primary key (id4) COMMENT 'cf_long_prefix',
+index id2 (id2) COMMENT 'cf_long_prefix',
+index id2_id3 (id2, id3) COMMENT 'cf_long_prefix',
+index id2_id4 (id2, id4) COMMENT 'cf_long_prefix',
+index id2_id4_id5 (id2, id4, id5) COMMENT 'cf_long_prefix',
+index id3_id4 (id3, id4) COMMENT 'cf_long_prefix',
+index id3_id5 (id3, id5) COMMENT 'cf_long_prefix'
+) engine=ROCKSDB;
+insert t1
+select (seq+9) div 10, (seq+4) div 5, (seq+4) div 5, seq, seq, 1000, "aaabbbccc"
+ from seq_1_to_10000;
+insert t2 select * from t1;
+set global rocksdb_force_flush_memtable_now=1;
+call bloom_start();
+select count(*) from t1;
+count(*)
+10000
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2;
+count(*)
+10000
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index(PRIMARY) where id1 >= 1;
+count(*)
+10000
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index(id2_id1) where id2 >= 1;
+count(*)
+10000
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2 force index(id3_id4) where id3 >= '1';
+count(*)
+10000
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index(id2_id1) where id2=2 and id1=1;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index(id2_id1) where id2=24 and id1=12;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index(id2_id1) where id2=88 and id1=44;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index(id2_id1) where id2=100 and id1=50;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index(id2_id1) where id2=428 and id1=214;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2 force index (id2_id4_id5) where id2=1 and id4=1 and id5=1;
+count(*)
+1
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t2 force index (id2_id4_id5) where id2=23 and id4=115 and id5=115;
+count(*)
+1
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t2 force index (id2_id4_id5) where id2=500 and id4=2500 and id5=2500;
+count(*)
+1
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t2 force index (id2_id4_id5) where id2=601 and id4=3005 and id5=3005;
+count(*)
+1
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t2 force index (id2_id3) where id2=1;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2 force index (id2_id3) where id2=23;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2 force index (id2_id3) where id2=345;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2 force index (id2_id3) where id2=456;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2 force index (id2_id4) where id2=1;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2 force index (id2_id4) where id2=23;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2 force index (id2_id4) where id2=345;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2 force index (id2_id4) where id2=456;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index (id2_id3_id1_id4) where id2=1 and id3='1' and id1=1 order by id4;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index (id2_id3_id1_id4) where id2=36 and id3='36' and id1=18 order by id4;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index (id2_id3_id1_id4) where id2=124 and id3='124' and id1=62 order by id4;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index (id2_id3_id1_id4) where id2=888 and id3='888' and id1=444 order by id4;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index (id2_id3_id1_id4) where id2=124 and id3='124';
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2 force index (id2_id3) where id2=1 and id3='1' and id4=1;
+count(*)
+1
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t2 force index (id2_id3) where id2=12 and id3='12' and id4=60;
+count(*)
+1
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t1 force index (id2_id3) where id2=1 and id3='1';
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index (id2_id3) where id2=23 and id3='23';
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index (id3_id2) where id2=1 and id3='1';
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index (id3_id2) where id2=23 and id3='23';
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index (PRIMARY) where id1=1;
+count(*)
+10
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index (PRIMARY) where id1=12;
+count(*)
+10
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index (PRIMARY) where id1=23;
+count(*)
+10
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index (PRIMARY) where id1=100;
+count(*)
+10
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index (PRIMARY) where id1=234;
+count(*)
+10
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index (id2_id3_id1_id4) where id2=36;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index (id2_id3_id1_id4) where id2=234;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2 force index (id2) where id2=1 and id4=1;
+count(*)
+1
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t2 force index (id2) where id2=23 and id4=115;
+count(*)
+1
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t2 force index (id2) where id2=500 and id4=2500;
+count(*)
+1
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t2 force index (id2) where id2=601 and id4=3005;
+count(*)
+1
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t2 force index (id3_id4) where id3='1' and id4=1;
+count(*)
+1
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t2 force index (id3_id4) where id3='12' and id4=60;
+count(*)
+1
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t1 force index (id2) where id2=1;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index (id2) where id2=23;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index (id2) where id2=345;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index (id2) where id2=456;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2 force index (id3_id5) where id3='100' and id5=500;
+count(*)
+1
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2 force index (id3_id5) where id3='240' and id5=1200;
+count(*)
+1
+call bloom_end();
+checked
+false
+create or replace table t1 (
+id1 bigint not null,
+id2 bigint not null,
+id3 varchar(100) not null,
+id4 int not null,
+id5 int not null,
+value bigint,
+value2 varchar(100),
+primary key (id1, id2, id3, id4) COMMENT 'rev:cf_long_prefix',
+index id2 (id2) COMMENT 'rev:cf_long_prefix',
+index id2_id1 (id2, id1) COMMENT 'rev:cf_long_prefix',
+index id2_id3 (id2, id3) COMMENT 'rev:cf_long_prefix',
+index id2_id4 (id2, id4) COMMENT 'rev:cf_long_prefix',
+index id2_id3_id1_id4 (id2, id3, id1, id4) COMMENT 'rev:cf_long_prefix',
+index id3_id2 (id3, id2) COMMENT 'rev:cf_long_prefix'
+) engine=ROCKSDB;
+create or replace table t2 (
+id1 bigint not null,
+id2 bigint not null,
+id3 varchar(100) not null,
+id4 int not null,
+id5 int not null,
+value bigint,
+value2 varchar(100),
+primary key (id4) COMMENT 'rev:cf_long_prefix',
+index id2 (id2) COMMENT 'rev:cf_long_prefix',
+index id2_id3 (id2, id3) COMMENT 'rev:cf_long_prefix',
+index id2_id4 (id2, id4) COMMENT 'rev:cf_long_prefix',
+index id2_id4_id5 (id2, id4, id5) COMMENT 'rev:cf_long_prefix',
+index id3_id4 (id3, id4) COMMENT 'rev:cf_long_prefix',
+index id3_id5 (id3, id5) COMMENT 'rev:cf_long_prefix'
+) engine=ROCKSDB;
+insert t1
+select (seq+9) div 10, (seq+4) div 5, (seq+4) div 5, seq, seq, 1000, "aaabbbccc"
+ from seq_1_to_10000;
+insert t2 select * from t1;
+set global rocksdb_force_flush_memtable_now=1;
+call bloom_start();
+select count(*) from t1;
+count(*)
+10000
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2;
+count(*)
+10000
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index(PRIMARY) where id1 >= 1;
+count(*)
+10000
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index(id2_id1) where id2 >= 1;
+count(*)
+10000
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2 force index(id3_id4) where id3 >= '1';
+count(*)
+10000
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index(id2_id1) where id2=2 and id1=1;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index(id2_id1) where id2=24 and id1=12;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index(id2_id1) where id2=88 and id1=44;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index(id2_id1) where id2=100 and id1=50;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index(id2_id1) where id2=428 and id1=214;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2 force index (id2_id4_id5) where id2=1 and id4=1 and id5=1;
+count(*)
+1
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t2 force index (id2_id4_id5) where id2=23 and id4=115 and id5=115;
+count(*)
+1
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t2 force index (id2_id4_id5) where id2=500 and id4=2500 and id5=2500;
+count(*)
+1
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t2 force index (id2_id4_id5) where id2=601 and id4=3005 and id5=3005;
+count(*)
+1
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t2 force index (id2_id3) where id2=1;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2 force index (id2_id3) where id2=23;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2 force index (id2_id3) where id2=345;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2 force index (id2_id3) where id2=456;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2 force index (id2_id4) where id2=1;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2 force index (id2_id4) where id2=23;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2 force index (id2_id4) where id2=345;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2 force index (id2_id4) where id2=456;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index (id2_id3_id1_id4) where id2=1 and id3='1' and id1=1 order by id4;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index (id2_id3_id1_id4) where id2=36 and id3='36' and id1=18 order by id4;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index (id2_id3_id1_id4) where id2=124 and id3='124' and id1=62 order by id4;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index (id2_id3_id1_id4) where id2=888 and id3='888' and id1=444 order by id4;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index (id2_id3_id1_id4) where id2=124 and id3='124';
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2 force index (id2_id3) where id2=1 and id3='1' and id4=1;
+count(*)
+1
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t2 force index (id2_id3) where id2=12 and id3='12' and id4=60;
+count(*)
+1
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t1 force index (id2_id3) where id2=1 and id3='1';
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index (id2_id3) where id2=23 and id3='23';
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index (id3_id2) where id2=1 and id3='1';
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index (id3_id2) where id2=23 and id3='23';
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index (PRIMARY) where id1=1;
+count(*)
+10
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index (PRIMARY) where id1=12;
+count(*)
+10
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index (PRIMARY) where id1=23;
+count(*)
+10
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index (PRIMARY) where id1=100;
+count(*)
+10
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index (PRIMARY) where id1=234;
+count(*)
+10
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index (id2_id3_id1_id4) where id2=36;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index (id2_id3_id1_id4) where id2=234;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2 force index (id2) where id2=1 and id4=1;
+count(*)
+1
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t2 force index (id2) where id2=23 and id4=115;
+count(*)
+1
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t2 force index (id2) where id2=500 and id4=2500;
+count(*)
+1
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t2 force index (id2) where id2=601 and id4=3005;
+count(*)
+1
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t2 force index (id3_id4) where id3='1' and id4=1;
+count(*)
+1
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t2 force index (id3_id4) where id3='12' and id4=60;
+count(*)
+1
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t1 force index (id2) where id2=1;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index (id2) where id2=23;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index (id2) where id2=345;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index (id2) where id2=456;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2 force index (id3_id5) where id3='100' and id5=500;
+count(*)
+1
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2 force index (id3_id5) where id3='240' and id5=1200;
+count(*)
+1
+call bloom_end();
+checked
+false
+create table r1 (id1 bigint, id2 bigint, id3 bigint, v1 int, v2 text, primary key (id1, id2, id3)) engine=rocksdb DEFAULT CHARSET=latin1 collate latin1_bin;
+call bloom_start();
+select * from r1 where id1=1 and id2 in (1) order by id3 asc;
+id1 id2 id3 v1 v2
+1 1 1 1 1
+call bloom_end();
+checked
+true
+call bloom_start();
+select * from r1 where id1=1 and id2 in (1) order by id3 desc;
+id1 id2 id3 v1 v2
+1 1 1 1 1
+call bloom_end();
+checked
+false
+DROP PROCEDURE bloom_start;
+DROP PROCEDURE bloom_end;
+truncate table t1;
+optimize table t1;
+Table Op Msg_type Msg_text
+test.t1 optimize status OK
+truncate table t2;
+optimize table t2;
+Table Op Msg_type Msg_text
+test.t2 optimize status OK
+drop table if exists t1;
+drop table if exists t2;
+drop table if exists r1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/bloomfilter2.result b/storage/rocksdb/mysql-test/rocksdb/r/bloomfilter2.result
new file mode 100644
index 00000000000..d5369e2dbed
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/bloomfilter2.result
@@ -0,0 +1,71 @@
+CREATE TABLE t0 (id1 VARCHAR(30), id2 INT, value INT, PRIMARY KEY (id1, id2)) ENGINE=rocksdb collate latin1_bin;
+select variable_value into @u from information_schema.global_status where variable_name='rocksdb_bloom_filter_prefix_useful';
+SELECT COUNT(*) FROM t0 WHERE id1='X' AND id2>=1;
+COUNT(*)
+10000
+select case when variable_value-@u = 0 then 'true' else 'false' end from information_schema.global_status where variable_name='rocksdb_bloom_filter_prefix_useful';
+case when variable_value-@u = 0 then 'true' else 'false' end
+true
+DROP TABLE t0;
+CREATE TABLE t1 (id1 BIGINT, id2 INT, id3 BIGINT, value INT, PRIMARY KEY (id1, id2, id3)) ENGINE=rocksdb;
+select variable_value into @u from information_schema.global_status where variable_name='rocksdb_bloom_filter_prefix_useful';
+SELECT COUNT(*) FROM t1 WHERE id1=1 AND id2=1 AND id3>=2;
+COUNT(*)
+9999
+select case when variable_value-@u = 0 then 'true' else 'false' end from information_schema.global_status where variable_name='rocksdb_bloom_filter_prefix_useful';
+case when variable_value-@u = 0 then 'true' else 'false' end
+true
+select variable_value into @u from information_schema.global_status where variable_name='rocksdb_bloom_filter_prefix_useful';
+SELECT COUNT(*) FROM t1 WHERE id1=1 AND id2>=1 AND id3>=2;
+COUNT(*)
+9999
+select case when variable_value-@u = 0 then 'true' else 'false' end from information_schema.global_status where variable_name='rocksdb_bloom_filter_prefix_useful';
+case when variable_value-@u = 0 then 'true' else 'false' end
+true
+DROP TABLE t1;
+CREATE TABLE t2 (id1 INT, id2 VARCHAR(100), id3 BIGINT, value INT, PRIMARY KEY (id1, id2, id3)) ENGINE=rocksdb collate latin1_bin;
+select variable_value into @u from information_schema.global_status where variable_name='rocksdb_bloom_filter_prefix_useful';
+select count(*) from t2 WHERE id1=100 and id2 IN ('00000000000000000000', '100');
+count(*)
+1
+select case when variable_value-@u > 0 then 'true' else 'false' end from information_schema.global_status where variable_name='rocksdb_bloom_filter_prefix_useful';
+case when variable_value-@u > 0 then 'true' else 'false' end
+true
+select variable_value into @u from information_schema.global_status where variable_name='rocksdb_bloom_filter_prefix_useful';
+select count(*) from t2 WHERE id1=200 and id2 IN ('00000000000000000000', '200');
+count(*)
+1
+select case when variable_value-@u > 0 then 'true' else 'false' end from information_schema.global_status where variable_name='rocksdb_bloom_filter_prefix_useful';
+case when variable_value-@u > 0 then 'true' else 'false' end
+true
+select variable_value into @u from information_schema.global_status where variable_name='rocksdb_bloom_filter_prefix_useful';
+select count(*) from t2 WHERE id1=200 and id2 IN ('3', '200');
+count(*)
+1
+select case when variable_value-@u = 0 then 'true' else 'false' end from information_schema.global_status where variable_name='rocksdb_bloom_filter_prefix_useful';
+case when variable_value-@u = 0 then 'true' else 'false' end
+true
+DROP TABLE t2;
+CREATE TABLE t3 (id1 BIGINT, id2 BIGINT, id3 BIGINT, id4 BIGINT, PRIMARY KEY (id1, id2, id3, id4)) ENGINE=rocksdb collate latin1_bin;
+select variable_value into @u from information_schema.global_status where variable_name='rocksdb_bloom_filter_useful';
+SELECT COUNT(*) FROM t3 WHERE id1=1 AND id2=5000 AND id3=1 AND id4=1;
+COUNT(*)
+0
+select case when variable_value-@u > 0 then 'true' else 'false' end from information_schema.global_status where variable_name='rocksdb_bloom_filter_useful';
+case when variable_value-@u > 0 then 'true' else 'false' end
+true
+select variable_value into @u from information_schema.global_status where variable_name='rocksdb_bloom_filter_prefix_useful';
+SELECT COUNT(*) FROM t3 WHERE id1=1 AND id2=1 AND id3=1;
+COUNT(*)
+1
+select case when variable_value-@u > 0 then 'true' else 'false' end from information_schema.global_status where variable_name='rocksdb_bloom_filter_prefix_useful';
+case when variable_value-@u > 0 then 'true' else 'false' end
+true
+select variable_value into @u from information_schema.global_status where variable_name='rocksdb_bloom_filter_prefix_useful';
+SELECT COUNT(*) FROM t3 WHERE id1=1 AND id2=1 AND id3=1 AND id4 <= 500;
+COUNT(*)
+1
+select case when variable_value-@u > 0 then 'true' else 'false' end from information_schema.global_status where variable_name='rocksdb_bloom_filter_prefix_useful';
+case when variable_value-@u > 0 then 'true' else 'false' end
+true
+DROP TABLE t3;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/bloomfilter3.result b/storage/rocksdb/mysql-test/rocksdb/r/bloomfilter3.result
new file mode 100644
index 00000000000..f3c4fdf1040
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/bloomfilter3.result
@@ -0,0 +1,140 @@
+CREATE TABLE `linktable` (
+`id1` bigint(20) unsigned NOT NULL DEFAULT '0',
+`id1_type` int(10) unsigned NOT NULL DEFAULT '0',
+`id2` bigint(20) unsigned NOT NULL DEFAULT '0',
+`id2_type` int(10) unsigned NOT NULL DEFAULT '0',
+`link_type` bigint(20) unsigned NOT NULL DEFAULT '0',
+`visibility` tinyint(3) NOT NULL DEFAULT '0',
+`data` varchar(255) NOT NULL DEFAULT '',
+`time` bigint(20) unsigned NOT NULL DEFAULT '0',
+`version` int(11) unsigned NOT NULL DEFAULT '0',
+PRIMARY KEY (link_type, `id1`,`id2`) COMMENT 'cf_link_pk',
+KEY `id1_type` (`id1`,`link_type`,`visibility`,`time`,`version`,`data`) COMMENT 'rev:cf_link_id1_type',
+KEY `id1_type2` (`id1`,`link_type`,`time`,`version`,`data`,`visibility`) COMMENT 'rev:cf_link_id1_type2',
+KEY `id1_type3` (`id1`,`visibility`,`time`,`version`,`data`,`link_type`) COMMENT 'rev:cf_link_id1_type3'
+) ENGINE=RocksDB DEFAULT COLLATE=latin1_bin;
+select variable_value into @c from information_schema.global_status where variable_name='rocksdb_bloom_filter_prefix_checked';
+select id1, id2, link_type, visibility, data, time, version from linktable FORCE INDEX(`id1_type`) where id1 = 100 and link_type = 1 and time >= 0 and time <= 9223372036854775807 and visibility = 1 order by time desc;
+id1 id2 link_type visibility data time version
+100 100 1 1 100 100 100
+select case when variable_value-@c > 0 then 'true' else 'false' end from information_schema.global_status where variable_name='rocksdb_bloom_filter_prefix_checked';
+case when variable_value-@c > 0 then 'true' else 'false' end
+true
+# MariaDB: we don't have optimizer_force_index_for_range, but we can use EITS
+# to get the query plan we want.
+set @tmp_use_stat_tables= @@use_stat_tables;
+set use_stat_tables='preferably';
+analyze table linktable persistent for all;
+Table Op Msg_type Msg_text
+test.linktable analyze status Engine-independent statistics collected
+test.linktable analyze status OK
+flush tables;
+explain select * from linktable;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE linktable ALL NULL NULL NULL NULL 10000
+# This must use range(id1_type2), key_len=24
+explain
+select id1, id2, link_type, visibility, data, time, version from linktable
+FORCE INDEX(`id1_type2`) where id1 = 100 and link_type = 1 and time >= 0 and time <= 9223372036854775807 order by time desc;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE linktable range id1_type2 id1_type2 24 NULL 1000 Using where; Using index
+select variable_value into @c from information_schema.global_status where variable_name='rocksdb_bloom_filter_prefix_checked';
+select id1, id2, link_type, visibility, data, time, version from linktable FORCE INDEX(`id1_type2`) where id1 = 100 and link_type = 1 and time >= 0 and time <= 9223372036854775807 order by time desc;
+id1 id2 link_type visibility data time version
+100 100 1 1 100 100 100
+select case when variable_value-@c > 0 then 'true' else 'false' end from information_schema.global_status where variable_name='rocksdb_bloom_filter_prefix_checked';
+case when variable_value-@c > 0 then 'true' else 'false' end
+true
+select variable_value into @c from information_schema.global_status where variable_name='rocksdb_bloom_filter_prefix_checked';
+select id1, id2, link_type, visibility, data, time, version from linktable FORCE INDEX(`id1_type3`) where id1 = 100 and time >= 0 and time <= 9223372036854775807 and visibility = 1 order by time desc;
+id1 id2 link_type visibility data time version
+100 100 1 1 100 100 100
+select case when variable_value-@c = 0 then 'true' else 'false' end from information_schema.global_status where variable_name='rocksdb_bloom_filter_prefix_checked';
+case when variable_value-@c = 0 then 'true' else 'false' end
+true
+select variable_value into @c from information_schema.global_status where variable_name='rocksdb_bloom_filter_prefix_checked';
+select id1, id2, link_type, visibility, data, time, version from linktable FORCE INDEX(`id1_type`) where id1 = 100 and link_type = 1 and visibility = 1 and time >= 0 order by time desc;
+id1 id2 link_type visibility data time version
+100 100 1 1 100 100 100
+select case when variable_value-@c > 0 then 'true' else 'false' end from information_schema.global_status where variable_name='rocksdb_bloom_filter_prefix_checked';
+case when variable_value-@c > 0 then 'true' else 'false' end
+true
+select variable_value into @c from information_schema.global_status where variable_name='rocksdb_bloom_filter_prefix_checked';
+select id1, id2, link_type, visibility, data, time, version from linktable FORCE INDEX(`id1_type2`) where id1 = 100 and link_type = 1 and time >= 0 order by time desc;
+id1 id2 link_type visibility data time version
+100 100 1 1 100 100 100
+select case when variable_value-@c = 0 then 'true' else 'false' end from information_schema.global_status where variable_name='rocksdb_bloom_filter_prefix_checked';
+case when variable_value-@c = 0 then 'true' else 'false' end
+true
+## HA_READ_PREFIX_LAST
+# BF len 20
+select variable_value into @c from information_schema.global_status where variable_name='rocksdb_bloom_filter_prefix_checked';
+select id1, id2, link_type, visibility, data, time, version from linktable FORCE INDEX(`id1_type`) where id1 = 100 and link_type = 1 and visibility = 1 order by time desc;
+id1 id2 link_type visibility data time version
+100 100 1 1 100 100 100
+select case when variable_value-@c > 0 then 'true' else 'false' end from information_schema.global_status where variable_name='rocksdb_bloom_filter_prefix_checked';
+case when variable_value-@c > 0 then 'true' else 'false' end
+true
+# BF len 19
+select variable_value into @c from information_schema.global_status where variable_name='rocksdb_bloom_filter_prefix_checked';
+select id1, id2, link_type, visibility, data, time, version from linktable FORCE INDEX(`id1_type2`) where id1 = 100 and link_type = 1 order by time desc;
+id1 id2 link_type visibility data time version
+100 100 1 1 100 100 100
+select case when variable_value-@c = 0 then 'true' else 'false' end from information_schema.global_status where variable_name='rocksdb_bloom_filter_prefix_checked';
+case when variable_value-@c = 0 then 'true' else 'false' end
+true
+# BF len 12
+select variable_value into @c from information_schema.global_status where variable_name='rocksdb_bloom_filter_prefix_checked';
+select id1, id2, link_type, visibility, data, time, version from linktable FORCE INDEX(`id1_type3`) where id1 = 100 and visibility = 1 order by time desc;
+id1 id2 link_type visibility data time version
+100 100 1 1 100 100 100
+select case when variable_value-@c = 0 then 'true' else 'false' end from information_schema.global_status where variable_name='rocksdb_bloom_filter_prefix_checked';
+case when variable_value-@c = 0 then 'true' else 'false' end
+true
+DROP TABLE linktable;
+#
+# bloom filter prefix is 20 byte
+# Create a key which is longer than that, so that we see that
+# eq_cond_len= slice.size() - 1;
+# doesnt work.
+#
+# indexnr 4
+# kp0 + 4 = 8
+# kp1 + 8 = 16
+# kp2 + 8 = 24 24>20 byte length prefix
+# kp3 + 8 = 28
+create table t1 (
+pk int primary key,
+kp0 int not null,
+kp1 bigint not null,
+kp2 bigint not null,
+kp3 bigint not null,
+key kp12(kp0, kp1, kp2, kp3) comment 'rev:x1'
+) engine=rocksdb;
+insert into t1 values (1, 1,1, 1,1);
+insert into t1 values (10,1,1,0x12FFFFFFFFFF,1);
+insert into t1 values (11,1,1,0x12FFFFFFFFFF,1);
+insert into t1 values (20,2,2,0x12FFFFFFFFFF,1);
+insert into t1 values (21,2,2,0x12FFFFFFFFFF,1);
+explain
+select * from t1 where kp0=1 and kp1=1 and kp2=0x12FFFFFFFFFF order by kp3 desc;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 ref kp12 kp12 20 const,const,const # Using where; Using index
+show status like '%rocksdb_bloom_filter_prefix%';
+Variable_name Value
+Rocksdb_bloom_filter_prefix_checked 0
+Rocksdb_bloom_filter_prefix_useful 0
+select variable_value into @c from information_schema.global_status where variable_name='rocksdb_bloom_filter_prefix_checked';
+select * from t1 where kp0=1 and kp1=1 and kp2=0x12FFFFFFFFFF order by kp3 desc;
+pk kp0 kp1 kp2 kp3
+11 1 1 20890720927743 1
+10 1 1 20890720927743 1
+show status like '%rocksdb_bloom_filter_prefix%';
+Variable_name Value
+Rocksdb_bloom_filter_prefix_checked 0
+Rocksdb_bloom_filter_prefix_useful 0
+# The following MUST show TRUE:
+select case when variable_value-@c = 0 then 'true' else 'false' end from information_schema.global_status where variable_name='rocksdb_bloom_filter_prefix_checked';
+case when variable_value-@c = 0 then 'true' else 'false' end
+true
+drop table t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/bloomfilter4.result b/storage/rocksdb/mysql-test/rocksdb/r/bloomfilter4.result
new file mode 100644
index 00000000000..1f4d1a641a2
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/bloomfilter4.result
@@ -0,0 +1,30 @@
+CREATE TABLE t1 (
+`id1` int unsigned NOT NULL DEFAULT '0',
+`id2` int unsigned NOT NULL DEFAULT '0',
+`link_type` int unsigned NOT NULL DEFAULT '0',
+`visibility` tinyint NOT NULL DEFAULT '0',
+`data` varchar(255) NOT NULL DEFAULT '',
+`time` int unsigned NOT NULL DEFAULT '0',
+`version` int unsigned NOT NULL DEFAULT '0',
+PRIMARY KEY (id1, link_type, visibility, id2) COMMENT 'rev:cf_link_pk'
+) ENGINE=RocksDB DEFAULT COLLATE=latin1_bin;
+CREATE PROCEDURE select_test()
+BEGIN
+DECLARE id1_cond INT;
+SET id1_cond = 1;
+WHILE id1_cond <= 20000 DO
+SELECT count(*) AS cnt FROM (SELECT id1 FROM t1 FORCE INDEX (PRIMARY) WHERE id1 = id1_cond AND link_type = 1 AND visibility = 1 ORDER BY id2 DESC) AS t INTO @cnt;
+IF @cnt < 1 THEN
+SELECT id1_cond, @cnt;
+END IF;
+SET id1_cond = id1_cond + 1;
+END WHILE;
+END//
+"Skipping bloom filter"
+SET session rocksdb_skip_bloom_filter_on_read=1;
+CALL select_test();
+"Using bloom filter"
+SET session rocksdb_skip_bloom_filter_on_read=0;
+CALL select_test();
+DROP PROCEDURE select_test;
+drop table t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/bloomfilter5.result b/storage/rocksdb/mysql-test/rocksdb/r/bloomfilter5.result
new file mode 100644
index 00000000000..daf4f5e30ba
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/bloomfilter5.result
@@ -0,0 +1,85 @@
+#
+# Issue #809: Wrong query result with bloom filters
+#
+create table t1 (
+id1 bigint not null,
+id2 bigint not null,
+id3 varchar(100) not null,
+id4 int not null,
+id5 int not null,
+value bigint,
+value2 varchar(100),
+primary key (id1, id2, id3, id4) COMMENT 'rev:bf5_1'
+) engine=ROCKSDB;
+create table t2(a int);
+insert into t2 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
+create table t3(seq int);
+insert into t3
+select
+1+ A.a + B.a* 10 + C.a * 100 + D.a * 1000
+from t2 A, t2 B, t2 C, t2 D;
+insert t1
+select
+(seq+9) div 10, (seq+4) div 5, (seq+4) div 5, seq, seq, 1000, "aaabbbccc"
+from t3;
+set global rocksdb_force_flush_memtable_now=1;
+# Full table scan
+explain
+select * from t1 limit 10;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 ALL NULL NULL NULL NULL 10000
+select * from t1 limit 10;
+id1 id2 id3 id4 id5 value value2
+1000 2000 2000 10000 10000 1000 aaabbbccc
+1000 2000 2000 9999 9999 1000 aaabbbccc
+1000 2000 2000 9998 9998 1000 aaabbbccc
+1000 2000 2000 9997 9997 1000 aaabbbccc
+1000 2000 2000 9996 9996 1000 aaabbbccc
+1000 1999 1999 9995 9995 1000 aaabbbccc
+1000 1999 1999 9994 9994 1000 aaabbbccc
+1000 1999 1999 9993 9993 1000 aaabbbccc
+1000 1999 1999 9992 9992 1000 aaabbbccc
+1000 1999 1999 9991 9991 1000 aaabbbccc
+# An index scan starting from the end of the table:
+explain
+select * from t1 order by id1 desc,id2 desc, id3 desc, id4 desc limit 1;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 index NULL PRIMARY 122 NULL 1
+select * from t1 order by id1 desc,id2 desc, id3 desc, id4 desc limit 1;
+id1 id2 id3 id4 id5 value value2
+1000 2000 2000 10000 10000 1000 aaabbbccc
+create table t4 (
+pk int unsigned not null primary key,
+kp1 int unsigned not null,
+kp2 int unsigned not null,
+col1 int unsigned,
+key(kp1, kp2) comment 'rev:bf5_2'
+) engine=rocksdb;
+insert into t4 values (1, 0xFFFF, 0xFFF, 12345);
+# This must not fail an assert:
+select * from t4 force index(kp1) where kp1=0xFFFFFFFF and kp2<=0xFFFFFFFF order by kp2 desc;
+pk kp1 kp2 col1
+#
+# Issue #881: Issue #809 still occurs for reverse scans on forward cfs
+#
+create table t5 (
+id1 bigint not null,
+id2 bigint not null,
+id3 varchar(100) not null,
+id4 int not null,
+id5 int not null,
+value bigint,
+value2 varchar(100),
+primary key (id1, id2, id3, id4) COMMENT 'bf5_1'
+) engine=ROCKSDB;
+insert into t5 select * from t1;
+set global rocksdb_force_flush_memtable_now=1;
+# An index scan starting from the end of the table:
+explain
+select * from t5 order by id1 desc,id2 desc, id3 desc, id4 desc limit 1;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t5 index NULL PRIMARY 122 NULL 1
+select * from t5 order by id1 desc,id2 desc, id3 desc, id4 desc limit 1;
+id1 id2 id3 id4 id5 value value2
+1000 2000 2000 10000 10000 1000 aaabbbccc
+drop table t1,t2,t3,t4,t5;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/bloomfilter_bulk_load.result b/storage/rocksdb/mysql-test/rocksdb/r/bloomfilter_bulk_load.result
new file mode 100644
index 00000000000..4b02d1103cf
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/bloomfilter_bulk_load.result
@@ -0,0 +1,15 @@
+create table r1 (id bigint primary key, value bigint) engine=rocksdb;
+create table r2 (id bigint, value bigint, primary key (id) comment 'cf2') engine=rocksdb;
+set session rocksdb_bulk_load=1;
+set session rocksdb_bulk_load=0;
+select variable_value into @h from information_schema.global_status where variable_name='rocksdb_block_cache_filter_hit';
+insert into r1 values (100, 100);
+select variable_value-@h from information_schema.global_status where variable_name='rocksdb_block_cache_filter_hit';
+variable_value-@h
+1
+select variable_value into @h from information_schema.global_status where variable_name='rocksdb_block_cache_filter_hit';
+insert into r2 values (100, 100);
+select variable_value-@h from information_schema.global_status where variable_name='rocksdb_block_cache_filter_hit';
+variable_value-@h
+0
+DROP TABLE r1, r2;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/bloomfilter_skip.result b/storage/rocksdb/mysql-test/rocksdb/r/bloomfilter_skip.result
new file mode 100644
index 00000000000..28475630564
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/bloomfilter_skip.result
@@ -0,0 +1,2042 @@
+CREATE PROCEDURE bloom_start()
+BEGIN
+select variable_value into @c from information_schema.global_status where variable_name='rocksdb_bloom_filter_prefix_checked';
+select variable_value into @u from information_schema.global_status where variable_name='rocksdb_bloom_filter_prefix_useful';
+END//
+CREATE PROCEDURE bloom_end()
+BEGIN
+select case when variable_value-@c > 0 then 'true' else 'false' end as checked from information_schema.global_status where variable_name='rocksdb_bloom_filter_prefix_checked';
+END//
+create or replace table t1 (
+id1 bigint not null,
+id2 bigint not null,
+id3 varchar(100) not null,
+id4 int not null,
+id5 int not null,
+value bigint,
+value2 varchar(100),
+primary key (id1, id2, id3, id4) ,
+index id2 (id2) ,
+index id2_id1 (id2, id1) ,
+index id2_id3 (id2, id3) ,
+index id2_id4 (id2, id4) ,
+index id2_id3_id1_id4 (id2, id3, id1, id4) ,
+index id3_id2 (id3, id2)
+) engine=ROCKSDB;
+create or replace table t2 (
+id1 bigint not null,
+id2 bigint not null,
+id3 varchar(100) not null,
+id4 int not null,
+id5 int not null,
+value bigint,
+value2 varchar(100),
+primary key (id4) ,
+index id2 (id2) ,
+index id2_id3 (id2, id3) ,
+index id2_id4 (id2, id4) ,
+index id2_id4_id5 (id2, id4, id5) ,
+index id3_id4 (id3, id4) ,
+index id3_id5 (id3, id5)
+) engine=ROCKSDB;
+insert t1
+select (seq+9) div 10, (seq+4) div 5, (seq+4) div 5, seq, seq, 1000, "aaabbbccc"
+ from seq_1_to_10000;
+insert t2 select * from t1;
+set global rocksdb_force_flush_memtable_now=1;
+call bloom_start();
+select count(*) from t1;
+count(*)
+10000
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2;
+count(*)
+10000
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index(PRIMARY) where id1 >= 1;
+count(*)
+10000
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index(id2_id1) where id2 >= 1;
+count(*)
+10000
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2 force index(id3_id4) where id3 >= '1';
+count(*)
+10000
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index(id2_id1) where id2=2 and id1=1;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index(id2_id1) where id2=24 and id1=12;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index(id2_id1) where id2=88 and id1=44;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index(id2_id1) where id2=100 and id1=50;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index(id2_id1) where id2=428 and id1=214;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2 force index (id2_id4_id5) where id2=1 and id4=1 and id5=1;
+count(*)
+1
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2 force index (id2_id4_id5) where id2=23 and id4=115 and id5=115;
+count(*)
+1
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2 force index (id2_id4_id5) where id2=500 and id4=2500 and id5=2500;
+count(*)
+1
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2 force index (id2_id4_id5) where id2=601 and id4=3005 and id5=3005;
+count(*)
+1
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2 force index (id2_id3) where id2=1;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2 force index (id2_id3) where id2=23;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2 force index (id2_id3) where id2=345;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2 force index (id2_id3) where id2=456;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2 force index (id2_id4) where id2=1;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2 force index (id2_id4) where id2=23;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2 force index (id2_id4) where id2=345;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2 force index (id2_id4) where id2=456;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index (id2_id3_id1_id4) where id2=1 and id3='1' and id1=1 order by id4;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index (id2_id3_id1_id4) where id2=36 and id3='36' and id1=18 order by id4;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index (id2_id3_id1_id4) where id2=124 and id3='124' and id1=62 order by id4;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index (id2_id3_id1_id4) where id2=888 and id3='888' and id1=444 order by id4;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index (id2_id3_id1_id4) where id2=124 and id3='124';
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2 force index (id2_id3) where id2=1 and id3='1' and id4=1;
+count(*)
+1
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2 force index (id2_id3) where id2=12 and id3='12' and id4=60;
+count(*)
+1
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index (id2_id3) where id2=1 and id3='1';
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index (id2_id3) where id2=23 and id3='23';
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index (id3_id2) where id2=1 and id3='1';
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index (id3_id2) where id2=23 and id3='23';
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index (PRIMARY) where id1=1;
+count(*)
+10
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index (PRIMARY) where id1=12;
+count(*)
+10
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index (PRIMARY) where id1=23;
+count(*)
+10
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index (PRIMARY) where id1=100;
+count(*)
+10
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index (PRIMARY) where id1=234;
+count(*)
+10
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index (id2_id3_id1_id4) where id2=36;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index (id2_id3_id1_id4) where id2=234;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2 force index (id2) where id2=1 and id4=1;
+count(*)
+1
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2 force index (id2) where id2=23 and id4=115;
+count(*)
+1
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2 force index (id2) where id2=500 and id4=2500;
+count(*)
+1
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2 force index (id2) where id2=601 and id4=3005;
+count(*)
+1
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2 force index (id3_id4) where id3='1' and id4=1;
+count(*)
+1
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2 force index (id3_id4) where id3='12' and id4=60;
+count(*)
+1
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index (id2) where id2=1;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index (id2) where id2=23;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index (id2) where id2=345;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index (id2) where id2=456;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2 force index (id3_id5) where id3='100' and id5=500;
+count(*)
+1
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2 force index (id3_id5) where id3='240' and id5=1200;
+count(*)
+1
+call bloom_end();
+checked
+false
+create or replace table t1 (
+id1 bigint not null,
+id2 bigint not null,
+id3 varchar(100) not null,
+id4 int not null,
+id5 int not null,
+value bigint,
+value2 varchar(100),
+primary key (id1, id2, id3, id4) COMMENT 'cf_short_prefix',
+index id2 (id2) COMMENT 'cf_short_prefix',
+index id2_id1 (id2, id1) COMMENT 'cf_short_prefix',
+index id2_id3 (id2, id3) COMMENT 'cf_short_prefix',
+index id2_id4 (id2, id4) COMMENT 'cf_short_prefix',
+index id2_id3_id1_id4 (id2, id3, id1, id4) COMMENT 'cf_short_prefix',
+index id3_id2 (id3, id2) COMMENT 'cf_short_prefix'
+) engine=ROCKSDB;
+create or replace table t2 (
+id1 bigint not null,
+id2 bigint not null,
+id3 varchar(100) not null,
+id4 int not null,
+id5 int not null,
+value bigint,
+value2 varchar(100),
+primary key (id4) COMMENT 'cf_short_prefix',
+index id2 (id2) COMMENT 'cf_short_prefix',
+index id2_id3 (id2, id3) COMMENT 'cf_short_prefix',
+index id2_id4 (id2, id4) COMMENT 'cf_short_prefix',
+index id2_id4_id5 (id2, id4, id5) COMMENT 'cf_short_prefix',
+index id3_id4 (id3, id4) COMMENT 'cf_short_prefix',
+index id3_id5 (id3, id5) COMMENT 'cf_short_prefix'
+) engine=ROCKSDB;
+insert t1
+select (seq+9) div 10, (seq+4) div 5, (seq+4) div 5, seq, seq, 1000, "aaabbbccc"
+ from seq_1_to_10000;
+insert t2 select * from t1;
+set global rocksdb_force_flush_memtable_now=1;
+call bloom_start();
+select count(*) from t1;
+count(*)
+10000
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2;
+count(*)
+10000
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index(PRIMARY) where id1 >= 1;
+count(*)
+10000
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index(id2_id1) where id2 >= 1;
+count(*)
+10000
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2 force index(id3_id4) where id3 >= '1';
+count(*)
+10000
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index(id2_id1) where id2=2 and id1=1;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index(id2_id1) where id2=24 and id1=12;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index(id2_id1) where id2=88 and id1=44;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index(id2_id1) where id2=100 and id1=50;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index(id2_id1) where id2=428 and id1=214;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2 force index (id2_id4_id5) where id2=1 and id4=1 and id5=1;
+count(*)
+1
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2 force index (id2_id4_id5) where id2=23 and id4=115 and id5=115;
+count(*)
+1
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2 force index (id2_id4_id5) where id2=500 and id4=2500 and id5=2500;
+count(*)
+1
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2 force index (id2_id4_id5) where id2=601 and id4=3005 and id5=3005;
+count(*)
+1
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2 force index (id2_id3) where id2=1;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2 force index (id2_id3) where id2=23;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2 force index (id2_id3) where id2=345;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2 force index (id2_id3) where id2=456;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2 force index (id2_id4) where id2=1;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2 force index (id2_id4) where id2=23;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2 force index (id2_id4) where id2=345;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2 force index (id2_id4) where id2=456;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index (id2_id3_id1_id4) where id2=1 and id3='1' and id1=1 order by id4;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index (id2_id3_id1_id4) where id2=36 and id3='36' and id1=18 order by id4;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index (id2_id3_id1_id4) where id2=124 and id3='124' and id1=62 order by id4;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index (id2_id3_id1_id4) where id2=888 and id3='888' and id1=444 order by id4;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index (id2_id3_id1_id4) where id2=124 and id3='124';
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2 force index (id2_id3) where id2=1 and id3='1' and id4=1;
+count(*)
+1
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2 force index (id2_id3) where id2=12 and id3='12' and id4=60;
+count(*)
+1
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index (id2_id3) where id2=1 and id3='1';
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index (id2_id3) where id2=23 and id3='23';
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index (id3_id2) where id2=1 and id3='1';
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index (id3_id2) where id2=23 and id3='23';
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index (PRIMARY) where id1=1;
+count(*)
+10
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index (PRIMARY) where id1=12;
+count(*)
+10
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index (PRIMARY) where id1=23;
+count(*)
+10
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index (PRIMARY) where id1=100;
+count(*)
+10
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index (PRIMARY) where id1=234;
+count(*)
+10
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index (id2_id3_id1_id4) where id2=36;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index (id2_id3_id1_id4) where id2=234;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2 force index (id2) where id2=1 and id4=1;
+count(*)
+1
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2 force index (id2) where id2=23 and id4=115;
+count(*)
+1
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2 force index (id2) where id2=500 and id4=2500;
+count(*)
+1
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2 force index (id2) where id2=601 and id4=3005;
+count(*)
+1
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2 force index (id3_id4) where id3='1' and id4=1;
+count(*)
+1
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2 force index (id3_id4) where id3='12' and id4=60;
+count(*)
+1
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index (id2) where id2=1;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index (id2) where id2=23;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index (id2) where id2=345;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index (id2) where id2=456;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2 force index (id3_id5) where id3='100' and id5=500;
+count(*)
+1
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2 force index (id3_id5) where id3='240' and id5=1200;
+count(*)
+1
+call bloom_end();
+checked
+false
+create or replace table t1 (
+id1 bigint not null,
+id2 bigint not null,
+id3 varchar(100) not null,
+id4 int not null,
+id5 int not null,
+value bigint,
+value2 varchar(100),
+primary key (id1, id2, id3, id4) COMMENT 'rev:cf_short_prefix',
+index id2 (id2) COMMENT 'rev:cf_short_prefix',
+index id2_id1 (id2, id1) COMMENT 'rev:cf_short_prefix',
+index id2_id3 (id2, id3) COMMENT 'rev:cf_short_prefix',
+index id2_id4 (id2, id4) COMMENT 'rev:cf_short_prefix',
+index id2_id3_id1_id4 (id2, id3, id1, id4) COMMENT 'rev:cf_short_prefix',
+index id3_id2 (id3, id2) COMMENT 'rev:cf_short_prefix'
+) engine=ROCKSDB;
+create or replace table t2 (
+id1 bigint not null,
+id2 bigint not null,
+id3 varchar(100) not null,
+id4 int not null,
+id5 int not null,
+value bigint,
+value2 varchar(100),
+primary key (id4) COMMENT 'rev:cf_short_prefix',
+index id2 (id2) COMMENT 'rev:cf_short_prefix',
+index id2_id3 (id2, id3) COMMENT 'rev:cf_short_prefix',
+index id2_id4 (id2, id4) COMMENT 'rev:cf_short_prefix',
+index id2_id4_id5 (id2, id4, id5) COMMENT 'rev:cf_short_prefix',
+index id3_id4 (id3, id4) COMMENT 'rev:cf_short_prefix',
+index id3_id5 (id3, id5) COMMENT 'rev:cf_short_prefix'
+) engine=ROCKSDB;
+insert t1
+select (seq+9) div 10, (seq+4) div 5, (seq+4) div 5, seq, seq, 1000, "aaabbbccc"
+ from seq_1_to_10000;
+insert t2 select * from t1;
+set global rocksdb_force_flush_memtable_now=1;
+call bloom_start();
+select count(*) from t1;
+count(*)
+10000
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2;
+count(*)
+10000
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index(PRIMARY) where id1 >= 1;
+count(*)
+10000
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index(id2_id1) where id2 >= 1;
+count(*)
+10000
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2 force index(id3_id4) where id3 >= '1';
+count(*)
+10000
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index(id2_id1) where id2=2 and id1=1;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index(id2_id1) where id2=24 and id1=12;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index(id2_id1) where id2=88 and id1=44;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index(id2_id1) where id2=100 and id1=50;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index(id2_id1) where id2=428 and id1=214;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2 force index (id2_id4_id5) where id2=1 and id4=1 and id5=1;
+count(*)
+1
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2 force index (id2_id4_id5) where id2=23 and id4=115 and id5=115;
+count(*)
+1
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2 force index (id2_id4_id5) where id2=500 and id4=2500 and id5=2500;
+count(*)
+1
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2 force index (id2_id4_id5) where id2=601 and id4=3005 and id5=3005;
+count(*)
+1
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2 force index (id2_id3) where id2=1;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2 force index (id2_id3) where id2=23;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2 force index (id2_id3) where id2=345;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2 force index (id2_id3) where id2=456;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2 force index (id2_id4) where id2=1;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2 force index (id2_id4) where id2=23;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2 force index (id2_id4) where id2=345;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2 force index (id2_id4) where id2=456;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index (id2_id3_id1_id4) where id2=1 and id3='1' and id1=1 order by id4;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index (id2_id3_id1_id4) where id2=36 and id3='36' and id1=18 order by id4;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index (id2_id3_id1_id4) where id2=124 and id3='124' and id1=62 order by id4;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index (id2_id3_id1_id4) where id2=888 and id3='888' and id1=444 order by id4;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index (id2_id3_id1_id4) where id2=124 and id3='124';
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2 force index (id2_id3) where id2=1 and id3='1' and id4=1;
+count(*)
+1
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2 force index (id2_id3) where id2=12 and id3='12' and id4=60;
+count(*)
+1
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index (id2_id3) where id2=1 and id3='1';
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index (id2_id3) where id2=23 and id3='23';
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index (id3_id2) where id2=1 and id3='1';
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index (id3_id2) where id2=23 and id3='23';
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index (PRIMARY) where id1=1;
+count(*)
+10
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index (PRIMARY) where id1=12;
+count(*)
+10
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index (PRIMARY) where id1=23;
+count(*)
+10
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index (PRIMARY) where id1=100;
+count(*)
+10
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index (PRIMARY) where id1=234;
+count(*)
+10
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index (id2_id3_id1_id4) where id2=36;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index (id2_id3_id1_id4) where id2=234;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2 force index (id2) where id2=1 and id4=1;
+count(*)
+1
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2 force index (id2) where id2=23 and id4=115;
+count(*)
+1
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2 force index (id2) where id2=500 and id4=2500;
+count(*)
+1
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2 force index (id2) where id2=601 and id4=3005;
+count(*)
+1
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2 force index (id3_id4) where id3='1' and id4=1;
+count(*)
+1
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2 force index (id3_id4) where id3='12' and id4=60;
+count(*)
+1
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index (id2) where id2=1;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index (id2) where id2=23;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index (id2) where id2=345;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index (id2) where id2=456;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2 force index (id3_id5) where id3='100' and id5=500;
+count(*)
+1
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2 force index (id3_id5) where id3='240' and id5=1200;
+count(*)
+1
+call bloom_end();
+checked
+false
+create or replace table t1 (
+id1 bigint not null,
+id2 bigint not null,
+id3 varchar(100) not null,
+id4 int not null,
+id5 int not null,
+value bigint,
+value2 varchar(100),
+primary key (id1, id2, id3, id4) COMMENT 'cf_long_prefix',
+index id2 (id2) COMMENT 'cf_long_prefix',
+index id2_id1 (id2, id1) COMMENT 'cf_long_prefix',
+index id2_id3 (id2, id3) COMMENT 'cf_long_prefix',
+index id2_id4 (id2, id4) COMMENT 'cf_long_prefix',
+index id2_id3_id1_id4 (id2, id3, id1, id4) COMMENT 'cf_long_prefix',
+index id3_id2 (id3, id2) COMMENT 'cf_long_prefix'
+) engine=ROCKSDB;
+create or replace table t2 (
+id1 bigint not null,
+id2 bigint not null,
+id3 varchar(100) not null,
+id4 int not null,
+id5 int not null,
+value bigint,
+value2 varchar(100),
+primary key (id4) COMMENT 'cf_long_prefix',
+index id2 (id2) COMMENT 'cf_long_prefix',
+index id2_id3 (id2, id3) COMMENT 'cf_long_prefix',
+index id2_id4 (id2, id4) COMMENT 'cf_long_prefix',
+index id2_id4_id5 (id2, id4, id5) COMMENT 'cf_long_prefix',
+index id3_id4 (id3, id4) COMMENT 'cf_long_prefix',
+index id3_id5 (id3, id5) COMMENT 'cf_long_prefix'
+) engine=ROCKSDB;
+insert t1
+select (seq+9) div 10, (seq+4) div 5, (seq+4) div 5, seq, seq, 1000, "aaabbbccc"
+ from seq_1_to_10000;
+insert t2 select * from t1;
+set global rocksdb_force_flush_memtable_now=1;
+call bloom_start();
+select count(*) from t1;
+count(*)
+10000
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2;
+count(*)
+10000
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index(PRIMARY) where id1 >= 1;
+count(*)
+10000
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index(id2_id1) where id2 >= 1;
+count(*)
+10000
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2 force index(id3_id4) where id3 >= '1';
+count(*)
+10000
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index(id2_id1) where id2=2 and id1=1;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index(id2_id1) where id2=24 and id1=12;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index(id2_id1) where id2=88 and id1=44;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index(id2_id1) where id2=100 and id1=50;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index(id2_id1) where id2=428 and id1=214;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2 force index (id2_id4_id5) where id2=1 and id4=1 and id5=1;
+count(*)
+1
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2 force index (id2_id4_id5) where id2=23 and id4=115 and id5=115;
+count(*)
+1
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2 force index (id2_id4_id5) where id2=500 and id4=2500 and id5=2500;
+count(*)
+1
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2 force index (id2_id4_id5) where id2=601 and id4=3005 and id5=3005;
+count(*)
+1
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2 force index (id2_id3) where id2=1;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2 force index (id2_id3) where id2=23;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2 force index (id2_id3) where id2=345;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2 force index (id2_id3) where id2=456;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2 force index (id2_id4) where id2=1;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2 force index (id2_id4) where id2=23;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2 force index (id2_id4) where id2=345;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2 force index (id2_id4) where id2=456;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index (id2_id3_id1_id4) where id2=1 and id3='1' and id1=1 order by id4;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index (id2_id3_id1_id4) where id2=36 and id3='36' and id1=18 order by id4;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index (id2_id3_id1_id4) where id2=124 and id3='124' and id1=62 order by id4;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index (id2_id3_id1_id4) where id2=888 and id3='888' and id1=444 order by id4;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index (id2_id3_id1_id4) where id2=124 and id3='124';
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2 force index (id2_id3) where id2=1 and id3='1' and id4=1;
+count(*)
+1
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2 force index (id2_id3) where id2=12 and id3='12' and id4=60;
+count(*)
+1
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index (id2_id3) where id2=1 and id3='1';
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index (id2_id3) where id2=23 and id3='23';
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index (id3_id2) where id2=1 and id3='1';
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index (id3_id2) where id2=23 and id3='23';
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index (PRIMARY) where id1=1;
+count(*)
+10
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index (PRIMARY) where id1=12;
+count(*)
+10
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index (PRIMARY) where id1=23;
+count(*)
+10
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index (PRIMARY) where id1=100;
+count(*)
+10
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index (PRIMARY) where id1=234;
+count(*)
+10
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index (id2_id3_id1_id4) where id2=36;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index (id2_id3_id1_id4) where id2=234;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2 force index (id2) where id2=1 and id4=1;
+count(*)
+1
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2 force index (id2) where id2=23 and id4=115;
+count(*)
+1
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2 force index (id2) where id2=500 and id4=2500;
+count(*)
+1
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2 force index (id2) where id2=601 and id4=3005;
+count(*)
+1
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2 force index (id3_id4) where id3='1' and id4=1;
+count(*)
+1
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2 force index (id3_id4) where id3='12' and id4=60;
+count(*)
+1
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index (id2) where id2=1;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index (id2) where id2=23;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index (id2) where id2=345;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index (id2) where id2=456;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2 force index (id3_id5) where id3='100' and id5=500;
+count(*)
+1
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2 force index (id3_id5) where id3='240' and id5=1200;
+count(*)
+1
+call bloom_end();
+checked
+false
+create or replace table t1 (
+id1 bigint not null,
+id2 bigint not null,
+id3 varchar(100) not null,
+id4 int not null,
+id5 int not null,
+value bigint,
+value2 varchar(100),
+primary key (id1, id2, id3, id4) COMMENT 'rev:cf_long_prefix',
+index id2 (id2) COMMENT 'rev:cf_long_prefix',
+index id2_id1 (id2, id1) COMMENT 'rev:cf_long_prefix',
+index id2_id3 (id2, id3) COMMENT 'rev:cf_long_prefix',
+index id2_id4 (id2, id4) COMMENT 'rev:cf_long_prefix',
+index id2_id3_id1_id4 (id2, id3, id1, id4) COMMENT 'rev:cf_long_prefix',
+index id3_id2 (id3, id2) COMMENT 'rev:cf_long_prefix'
+) engine=ROCKSDB;
+create or replace table t2 (
+id1 bigint not null,
+id2 bigint not null,
+id3 varchar(100) not null,
+id4 int not null,
+id5 int not null,
+value bigint,
+value2 varchar(100),
+primary key (id4) COMMENT 'rev:cf_long_prefix',
+index id2 (id2) COMMENT 'rev:cf_long_prefix',
+index id2_id3 (id2, id3) COMMENT 'rev:cf_long_prefix',
+index id2_id4 (id2, id4) COMMENT 'rev:cf_long_prefix',
+index id2_id4_id5 (id2, id4, id5) COMMENT 'rev:cf_long_prefix',
+index id3_id4 (id3, id4) COMMENT 'rev:cf_long_prefix',
+index id3_id5 (id3, id5) COMMENT 'rev:cf_long_prefix'
+) engine=ROCKSDB;
+insert t1
+select (seq+9) div 10, (seq+4) div 5, (seq+4) div 5, seq, seq, 1000, "aaabbbccc"
+ from seq_1_to_10000;
+insert t2 select * from t1;
+set global rocksdb_force_flush_memtable_now=1;
+call bloom_start();
+select count(*) from t1;
+count(*)
+10000
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2;
+count(*)
+10000
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index(PRIMARY) where id1 >= 1;
+count(*)
+10000
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index(id2_id1) where id2 >= 1;
+count(*)
+10000
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2 force index(id3_id4) where id3 >= '1';
+count(*)
+10000
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index(id2_id1) where id2=2 and id1=1;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index(id2_id1) where id2=24 and id1=12;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index(id2_id1) where id2=88 and id1=44;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index(id2_id1) where id2=100 and id1=50;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index(id2_id1) where id2=428 and id1=214;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2 force index (id2_id4_id5) where id2=1 and id4=1 and id5=1;
+count(*)
+1
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2 force index (id2_id4_id5) where id2=23 and id4=115 and id5=115;
+count(*)
+1
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2 force index (id2_id4_id5) where id2=500 and id4=2500 and id5=2500;
+count(*)
+1
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2 force index (id2_id4_id5) where id2=601 and id4=3005 and id5=3005;
+count(*)
+1
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2 force index (id2_id3) where id2=1;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2 force index (id2_id3) where id2=23;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2 force index (id2_id3) where id2=345;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2 force index (id2_id3) where id2=456;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2 force index (id2_id4) where id2=1;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2 force index (id2_id4) where id2=23;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2 force index (id2_id4) where id2=345;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2 force index (id2_id4) where id2=456;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index (id2_id3_id1_id4) where id2=1 and id3='1' and id1=1 order by id4;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index (id2_id3_id1_id4) where id2=36 and id3='36' and id1=18 order by id4;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index (id2_id3_id1_id4) where id2=124 and id3='124' and id1=62 order by id4;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index (id2_id3_id1_id4) where id2=888 and id3='888' and id1=444 order by id4;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index (id2_id3_id1_id4) where id2=124 and id3='124';
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2 force index (id2_id3) where id2=1 and id3='1' and id4=1;
+count(*)
+1
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2 force index (id2_id3) where id2=12 and id3='12' and id4=60;
+count(*)
+1
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index (id2_id3) where id2=1 and id3='1';
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index (id2_id3) where id2=23 and id3='23';
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index (id3_id2) where id2=1 and id3='1';
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index (id3_id2) where id2=23 and id3='23';
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index (PRIMARY) where id1=1;
+count(*)
+10
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index (PRIMARY) where id1=12;
+count(*)
+10
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index (PRIMARY) where id1=23;
+count(*)
+10
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index (PRIMARY) where id1=100;
+count(*)
+10
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index (PRIMARY) where id1=234;
+count(*)
+10
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index (id2_id3_id1_id4) where id2=36;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index (id2_id3_id1_id4) where id2=234;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2 force index (id2) where id2=1 and id4=1;
+count(*)
+1
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2 force index (id2) where id2=23 and id4=115;
+count(*)
+1
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2 force index (id2) where id2=500 and id4=2500;
+count(*)
+1
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2 force index (id2) where id2=601 and id4=3005;
+count(*)
+1
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2 force index (id3_id4) where id3='1' and id4=1;
+count(*)
+1
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2 force index (id3_id4) where id3='12' and id4=60;
+count(*)
+1
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index (id2) where id2=1;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index (id2) where id2=23;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index (id2) where id2=345;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index (id2) where id2=456;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2 force index (id3_id5) where id3='100' and id5=500;
+count(*)
+1
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2 force index (id3_id5) where id3='240' and id5=1200;
+count(*)
+1
+call bloom_end();
+checked
+false
+create table r1 (id1 bigint, id2 bigint, id3 bigint, v1 int, v2 text, primary key (id1, id2, id3)) engine=rocksdb DEFAULT CHARSET=latin1 collate latin1_bin;
+call bloom_start();
+select * from r1 where id1=1 and id2 in (1) order by id3 asc;
+id1 id2 id3 v1 v2
+1 1 1 1 1
+call bloom_end();
+checked
+false
+call bloom_start();
+select * from r1 where id1=1 and id2 in (1) order by id3 desc;
+id1 id2 id3 v1 v2
+1 1 1 1 1
+call bloom_end();
+checked
+false
+DROP PROCEDURE bloom_start;
+DROP PROCEDURE bloom_end;
+truncate table t1;
+optimize table t1;
+Table Op Msg_type Msg_text
+test.t1 optimize status OK
+truncate table t2;
+optimize table t2;
+Table Op Msg_type Msg_text
+test.t2 optimize status OK
+drop table if exists t1;
+drop table if exists t2;
+drop table if exists r1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/bulk_load.result b/storage/rocksdb/mysql-test/rocksdb/r/bulk_load.result
new file mode 100644
index 00000000000..e532ee6cf47
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/bulk_load.result
@@ -0,0 +1,77 @@
+Data will be ordered in ascending order
+CREATE TABLE t1(
+pk CHAR(5),
+a CHAR(30),
+b CHAR(30),
+PRIMARY KEY(pk) COMMENT "cf1",
+KEY(a)
+) ENGINE=ROCKSDB COLLATE 'latin1_bin';
+CREATE TABLE t2(
+pk CHAR(5),
+a CHAR(30),
+b CHAR(30),
+PRIMARY KEY(pk) COMMENT "cf1",
+KEY(a)
+) ENGINE=ROCKSDB COLLATE 'latin1_bin';
+CREATE TABLE t3(
+pk CHAR(5),
+a CHAR(30),
+b CHAR(30),
+PRIMARY KEY(pk) COMMENT "cf1",
+KEY(a)
+) ENGINE=ROCKSDB COLLATE 'latin1_bin' PARTITION BY KEY() PARTITIONS 4;
+connect other,localhost,root,,;
+set session transaction isolation level repeatable read;
+start transaction with consistent snapshot;
+select VALUE > 0 as 'Has opened snapshots' from information_schema.rocksdb_dbstats where stat_type='DB_NUM_SNAPSHOTS';
+Has opened snapshots
+1
+connection default;
+SET @@GLOBAL.ROCKSDB_UPDATE_CF_OPTIONS=
+'cf1={write_buffer_size=8m;target_file_size_base=1m};';
+set rocksdb_bulk_load=1;
+set rocksdb_bulk_load_size=100000;
+LOAD DATA INFILE <input_file> INTO TABLE t1;
+pk a b
+LOAD DATA INFILE <input_file> INTO TABLE t2;
+pk a b
+LOAD DATA INFILE <input_file> INTO TABLE t3;
+pk a b
+set rocksdb_bulk_load=0;
+SHOW TABLE STATUS WHERE name LIKE 't%';
+Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
+t1 ROCKSDB 10 Fixed 2500000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL
+t2 ROCKSDB 10 Fixed 2500000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL
+t3 ROCKSDB 10 Fixed 2500000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL partitioned
+ANALYZE TABLE t1, t2, t3;
+Table Op Msg_type Msg_text
+test.t1 analyze status OK
+test.t2 analyze status OK
+test.t3 analyze status OK
+SHOW TABLE STATUS WHERE name LIKE 't%';
+Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
+t1 ROCKSDB 10 Fixed 2500000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL
+t2 ROCKSDB 10 Fixed 2500000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL
+t3 ROCKSDB 10 Fixed 2500000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL partitioned
+select count(pk),count(a) from t1;
+count(pk) count(a)
+2500000 2500000
+select count(b) from t1;
+count(b)
+2500000
+select count(pk),count(a) from t2;
+count(pk) count(a)
+2500000 2500000
+select count(b) from t2;
+count(b)
+2500000
+select count(pk),count(a) from t3;
+count(pk) count(a)
+2500000 2500000
+select count(b) from t3;
+count(b)
+2500000
+longfilenamethatvalidatesthatthiswillgetdeleted.bulk_load.tmp
+test.bulk_load.tmp
+disconnect other;
+DROP TABLE t1, t2, t3;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_drop_table.result b/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_drop_table.result
new file mode 100644
index 00000000000..4e79d82810e
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_drop_table.result
@@ -0,0 +1,11 @@
+CREATE TABLE t1 (pk INT, PRIMARY KEY (pk)) ENGINE=ROCKSDB;
+SET rocksdb_bulk_load_allow_unsorted=1;
+SET rocksdb_bulk_load=1;
+INSERT INTO t1 VALUES (1);
+connect con1,localhost,root,,;
+DROP TABLE t1;
+connection default;
+disconnect con1;
+SET rocksdb_bulk_load=0;
+SELECT * FROM t1;
+ERROR 42S02: Table 'test.t1' doesn't exist
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_errors.result b/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_errors.result
new file mode 100644
index 00000000000..4ea8cbccc1e
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_errors.result
@@ -0,0 +1,99 @@
+CREATE TABLE t1(pk INT, PRIMARY KEY(pk)) ENGINE=ROCKSDB;
+SET rocksdb_bulk_load=1;
+INSERT INTO t1 VALUES(10);
+INSERT INTO t1 VALUES(11);
+INSERT INTO t1 VALUES(9);
+ERROR HY000: Rows must be inserted in primary key order during bulk load operation
+SET rocksdb_bulk_load=0;
+SELECT * FROM t1;
+pk
+10
+11
+SET rocksdb_bulk_load=1;
+INSERT INTO t1 VALUES(1);
+INSERT INTO t1 VALUES(2);
+INSERT INTO t1 VALUES(20);
+INSERT INTO t1 VALUES(21);
+SET rocksdb_bulk_load=0;
+ERROR HY000: Rows inserted during bulk load must not overlap existing rows
+SHOW VARIABLES LIKE 'rocksdb_bulk_load';
+Variable_name Value
+rocksdb_bulk_load OFF
+call mtr.add_suppression('finalizing last SST file while setting bulk loading variable');
+SELECT * FROM t1;
+pk
+10
+11
+FOUND 1 /RocksDB: Error [0-9]+ finalizing last SST file while setting bulk loading variable/ in rocksdb.bulk_load_errors.1.err
+connect con1,localhost,root,,;
+SET rocksdb_bulk_load=1;
+INSERT INTO t1 VALUES(1);
+INSERT INTO t1 VALUES(2);
+INSERT INTO t1 VALUES(20);
+INSERT INTO t1 VALUES(21);
+connection default;
+disconnect con1;
+SELECT * FROM t1;
+pk
+10
+11
+FOUND 1 /RocksDB: Error [0-9]+ finalizing last SST file while disconnecting/ in rocksdb.bulk_load_errors.2.err
+TRUNCATE TABLE t1;
+SET rocksdb_bulk_load_allow_unsorted=1;
+SET rocksdb_bulk_load=1;
+INSERT INTO t1 VALUES(100);
+INSERT INTO t1 VALUES(101);
+INSERT INTO t1 VALUES(99);
+SET rocksdb_bulk_load=0;
+SELECT * FROM t1;
+pk
+99
+100
+101
+TRUNCATE TABLE t1;
+SET rocksdb_bulk_load=1;
+INSERT INTO t1 VALUES(201);
+INSERT INTO t1 VALUES(200);
+INSERT INTO t1 VALUES(202);
+INSERT INTO t1 VALUES(201);
+ERROR 23000: Failed to insert the record: the key already exists
+SET rocksdb_bulk_load=0;
+SELECT * FROM t1;
+pk
+200
+201
+202
+SET rocksdb_bulk_load_allow_unsorted=DEFAULT;
+DROP TABLE t1;
+CREATE TABLE t1(c1 INT KEY) ENGINE=ROCKSDB;
+SET rocksdb_bulk_load=1;
+INSERT INTO t1 VALUES (),(),();
+ERROR HY000: Rows must be inserted in primary key order during bulk load operation
+SET rocksdb_bulk_load=0;
+DROP TABLE t1;
+SET @orig_table_open_cache=@@global.table_open_cache;
+CREATE TABLE t1(a INT AUTO_INCREMENT, b INT, PRIMARY KEY (a)) ENGINE=ROCKSDB DEFAULT CHARSET=latin1;
+SET rocksdb_bulk_load=1;
+INSERT INTO t1 VALUES(13, 0);
+INSERT INTO t1 VALUES(2, 'test 2');
+Warnings:
+Warning 1366 Incorrect integer value: 'test 2' for column `test`.`t1`.`b` at row 1
+INSERT INTO t1 VALUES(@id, @arg04);
+SET @@global.table_open_cache=FALSE;
+Warnings:
+Warning 1292 Truncated incorrect table_open_cache value: '0'
+INSERT INTO t1 VALUES(51479+0.333333333,1);
+DROP TABLE t1;
+SET @@global.table_open_cache=@orig_table_open_cache;
+FOUND 1 /RocksDB: Error [0-9]+ finalizing bulk load while closing handler/ in rocksdb.bulk_load_errors.3.err
+CREATE TABLE t1 (pk INT, PRIMARY KEY (pk)) ENGINE=ROCKSDB;
+CREATE TABLE t2 (pk INT, PRIMARY KEY (pk)) ENGINE=ROCKSDB;
+SET rocksdb_bulk_load=1;
+INSERT INTO t1 VALUES (1), (2);
+INSERT INTO t2 VALUES (1), (2);
+INSERT INTO t1 VALUES (1);
+INSERT INTO t2 VALUES (3);
+ERROR HY000: Rows inserted during bulk load must not overlap existing rows
+SET rocksdb_bulk_load=0;
+DROP TABLE t1;
+DROP TABLE t2;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_rev_cf.result b/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_rev_cf.result
new file mode 100644
index 00000000000..a00cbef5f76
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_rev_cf.result
@@ -0,0 +1,77 @@
+Data will be ordered in ascending order
+CREATE TABLE t1(
+pk CHAR(5),
+a CHAR(30),
+b CHAR(30),
+PRIMARY KEY(pk) COMMENT "rev:cf1",
+KEY(a)
+) ENGINE=ROCKSDB COLLATE 'latin1_bin';
+CREATE TABLE t2(
+pk CHAR(5),
+a CHAR(30),
+b CHAR(30),
+PRIMARY KEY(pk) COMMENT "rev:cf1",
+KEY(a)
+) ENGINE=ROCKSDB COLLATE 'latin1_bin';
+CREATE TABLE t3(
+pk CHAR(5),
+a CHAR(30),
+b CHAR(30),
+PRIMARY KEY(pk) COMMENT "rev:cf1",
+KEY(a)
+) ENGINE=ROCKSDB COLLATE 'latin1_bin' PARTITION BY KEY() PARTITIONS 4;
+connect other,localhost,root,,;
+set session transaction isolation level repeatable read;
+start transaction with consistent snapshot;
+select VALUE > 0 as 'Has opened snapshots' from information_schema.rocksdb_dbstats where stat_type='DB_NUM_SNAPSHOTS';
+Has opened snapshots
+1
+connection default;
+SET @@GLOBAL.ROCKSDB_UPDATE_CF_OPTIONS=
+'cf1={write_buffer_size=8m;target_file_size_base=1m};';
+set rocksdb_bulk_load=1;
+set rocksdb_bulk_load_size=100000;
+LOAD DATA INFILE <input_file> INTO TABLE t1;
+pk a b
+LOAD DATA INFILE <input_file> INTO TABLE t2;
+pk a b
+LOAD DATA INFILE <input_file> INTO TABLE t3;
+pk a b
+set rocksdb_bulk_load=0;
+SHOW TABLE STATUS WHERE name LIKE 't%';
+Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
+t1 ROCKSDB 10 Fixed 2500000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL
+t2 ROCKSDB 10 Fixed 2500000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL
+t3 ROCKSDB 10 Fixed 2500000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL partitioned
+ANALYZE TABLE t1, t2, t3;
+Table Op Msg_type Msg_text
+test.t1 analyze status OK
+test.t2 analyze status OK
+test.t3 analyze status OK
+SHOW TABLE STATUS WHERE name LIKE 't%';
+Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
+t1 ROCKSDB 10 Fixed 2500000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL
+t2 ROCKSDB 10 Fixed 2500000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL
+t3 ROCKSDB 10 Fixed 2500000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL partitioned
+select count(pk),count(a) from t1;
+count(pk) count(a)
+2500000 2500000
+select count(b) from t1;
+count(b)
+2500000
+select count(pk),count(a) from t2;
+count(pk) count(a)
+2500000 2500000
+select count(b) from t2;
+count(b)
+2500000
+select count(pk),count(a) from t3;
+count(pk) count(a)
+2500000 2500000
+select count(b) from t3;
+count(b)
+2500000
+longfilenamethatvalidatesthatthiswillgetdeleted.bulk_load.tmp
+test.bulk_load.tmp
+disconnect other;
+DROP TABLE t1, t2, t3;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_rev_cf_and_data.result b/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_rev_cf_and_data.result
new file mode 100644
index 00000000000..edefadeadef
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_rev_cf_and_data.result
@@ -0,0 +1,77 @@
+Data will be ordered in descending order
+CREATE TABLE t1(
+pk CHAR(5),
+a CHAR(30),
+b CHAR(30),
+PRIMARY KEY(pk) COMMENT "rev:cf1",
+KEY(a)
+) ENGINE=ROCKSDB COLLATE 'latin1_bin';
+CREATE TABLE t2(
+pk CHAR(5),
+a CHAR(30),
+b CHAR(30),
+PRIMARY KEY(pk) COMMENT "rev:cf1",
+KEY(a)
+) ENGINE=ROCKSDB COLLATE 'latin1_bin';
+CREATE TABLE t3(
+pk CHAR(5),
+a CHAR(30),
+b CHAR(30),
+PRIMARY KEY(pk) COMMENT "rev:cf1",
+KEY(a)
+) ENGINE=ROCKSDB COLLATE 'latin1_bin' PARTITION BY KEY() PARTITIONS 4;
+connect other,localhost,root,,;
+set session transaction isolation level repeatable read;
+start transaction with consistent snapshot;
+select VALUE > 0 as 'Has opened snapshots' from information_schema.rocksdb_dbstats where stat_type='DB_NUM_SNAPSHOTS';
+Has opened snapshots
+1
+connection default;
+SET @@GLOBAL.ROCKSDB_UPDATE_CF_OPTIONS=
+'cf1={write_buffer_size=8m;target_file_size_base=1m};';
+set rocksdb_bulk_load=1;
+set rocksdb_bulk_load_size=100000;
+LOAD DATA INFILE <input_file> INTO TABLE t1;
+pk a b
+LOAD DATA INFILE <input_file> INTO TABLE t2;
+pk a b
+LOAD DATA INFILE <input_file> INTO TABLE t3;
+pk a b
+set rocksdb_bulk_load=0;
+SHOW TABLE STATUS WHERE name LIKE 't%';
+Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
+t1 ROCKSDB 10 Fixed 2500000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL
+t2 ROCKSDB 10 Fixed 2500000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL
+t3 ROCKSDB 10 Fixed 2500000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL partitioned
+ANALYZE TABLE t1, t2, t3;
+Table Op Msg_type Msg_text
+test.t1 analyze status OK
+test.t2 analyze status OK
+test.t3 analyze status OK
+SHOW TABLE STATUS WHERE name LIKE 't%';
+Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
+t1 ROCKSDB 10 Fixed 2500000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL
+t2 ROCKSDB 10 Fixed 2500000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL
+t3 ROCKSDB 10 Fixed 2500000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL partitioned
+select count(pk),count(a) from t1;
+count(pk) count(a)
+2500000 2500000
+select count(b) from t1;
+count(b)
+2500000
+select count(pk),count(a) from t2;
+count(pk) count(a)
+2500000 2500000
+select count(b) from t2;
+count(b)
+2500000
+select count(pk),count(a) from t3;
+count(pk) count(a)
+2500000 2500000
+select count(b) from t3;
+count(b)
+2500000
+longfilenamethatvalidatesthatthiswillgetdeleted.bulk_load.tmp
+test.bulk_load.tmp
+disconnect other;
+DROP TABLE t1, t2, t3;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_rev_data.result b/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_rev_data.result
new file mode 100644
index 00000000000..96de6edf9c8
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_rev_data.result
@@ -0,0 +1,77 @@
+Data will be ordered in descending order
+CREATE TABLE t1(
+pk CHAR(5),
+a CHAR(30),
+b CHAR(30),
+PRIMARY KEY(pk) COMMENT "cf1",
+KEY(a)
+) ENGINE=ROCKSDB COLLATE 'latin1_bin';
+CREATE TABLE t2(
+pk CHAR(5),
+a CHAR(30),
+b CHAR(30),
+PRIMARY KEY(pk) COMMENT "cf1",
+KEY(a)
+) ENGINE=ROCKSDB COLLATE 'latin1_bin';
+CREATE TABLE t3(
+pk CHAR(5),
+a CHAR(30),
+b CHAR(30),
+PRIMARY KEY(pk) COMMENT "cf1",
+KEY(a)
+) ENGINE=ROCKSDB COLLATE 'latin1_bin' PARTITION BY KEY() PARTITIONS 4;
+connect other,localhost,root,,;
+set session transaction isolation level repeatable read;
+start transaction with consistent snapshot;
+select VALUE > 0 as 'Has opened snapshots' from information_schema.rocksdb_dbstats where stat_type='DB_NUM_SNAPSHOTS';
+Has opened snapshots
+1
+connection default;
+SET @@GLOBAL.ROCKSDB_UPDATE_CF_OPTIONS=
+'cf1={write_buffer_size=8m;target_file_size_base=1m};';
+set rocksdb_bulk_load=1;
+set rocksdb_bulk_load_size=100000;
+LOAD DATA INFILE <input_file> INTO TABLE t1;
+pk a b
+LOAD DATA INFILE <input_file> INTO TABLE t2;
+pk a b
+LOAD DATA INFILE <input_file> INTO TABLE t3;
+pk a b
+set rocksdb_bulk_load=0;
+SHOW TABLE STATUS WHERE name LIKE 't%';
+Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
+t1 ROCKSDB 10 Fixed 2500000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL
+t2 ROCKSDB 10 Fixed 2500000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL
+t3 ROCKSDB 10 Fixed 2500000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL partitioned
+ANALYZE TABLE t1, t2, t3;
+Table Op Msg_type Msg_text
+test.t1 analyze status OK
+test.t2 analyze status OK
+test.t3 analyze status OK
+SHOW TABLE STATUS WHERE name LIKE 't%';
+Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
+t1 ROCKSDB 10 Fixed 2500000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL
+t2 ROCKSDB 10 Fixed 2500000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL
+t3 ROCKSDB 10 Fixed 2500000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL partitioned
+select count(pk),count(a) from t1;
+count(pk) count(a)
+2500000 2500000
+select count(b) from t1;
+count(b)
+2500000
+select count(pk),count(a) from t2;
+count(pk) count(a)
+2500000 2500000
+select count(b) from t2;
+count(b)
+2500000
+select count(pk),count(a) from t3;
+count(pk) count(a)
+2500000 2500000
+select count(b) from t3;
+count(b)
+2500000
+longfilenamethatvalidatesthatthiswillgetdeleted.bulk_load.tmp
+test.bulk_load.tmp
+disconnect other;
+DROP TABLE t1, t2, t3;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_sk.result b/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_sk.result
new file mode 100644
index 00000000000..42f820a2a42
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_sk.result
@@ -0,0 +1,229 @@
+SET rocksdb_bulk_load_size=15;
+CREATE TABLE t4 (a INT, b INT, c INT,
+PRIMARY KEY (a),
+KEY (b),
+KEY (c) COMMENT "rev:cf") ENGINE=ROCKSDB;
+CREATE TABLE t3 (a INT, b INT, c INT,
+PRIMARY KEY (a),
+KEY (b),
+KEY (c) COMMENT "rev:cf") ENGINE=ROCKSDB;
+CREATE TABLE t2 (a INT, b INT, c INT,
+PRIMARY KEY (a),
+KEY (b),
+KEY (c) COMMENT "rev:cf") ENGINE=ROCKSDB;
+CREATE TABLE t1 (a INT, b INT, c INT,
+PRIMARY KEY (a),
+KEY (b),
+KEY (c) COMMENT "rev:cf") ENGINE=ROCKSDB;
+SET rocksdb_bulk_load=1;
+INSERT INTO t1 SELECT * FROM t3 FORCE INDEX (PRIMARY) ORDER BY a;
+SELECT count(*) FROM t1 FORCE INDEX (PRIMARY);
+count(*)
+0
+SELECT count(*) FROM t1 FORCE INDEX (b);
+count(*)
+10
+SELECT count(*) FROM t1 FORCE INDEX (c);
+count(*)
+10
+SET rocksdb_bulk_load=0;
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+a b c
+-9 11 11
+-7 9 9
+-5 7 7
+-3 5 5
+-1 3 3
+2 0 0
+4 -2 -2
+6 -4 -4
+8 -6 -6
+10 -8 -8
+SELECT b FROM t1 FORCE INDEX (b);
+b
+-8
+-6
+-4
+-2
+0
+3
+5
+7
+9
+11
+SELECT c FROM t1 FORCE INDEX (c);
+c
+-8
+-6
+-4
+-2
+0
+3
+5
+7
+9
+11
+Checksums should match
+CHECKSUM TABLE t3;
+Table Checksum
+test.t3 3862424802
+CHECKSUM TABLE t1;
+Table Checksum
+test.t1 3862424802
+SET rocksdb_bulk_load_allow_sk=1;
+SET rocksdb_bulk_load=1;
+INSERT INTO t4 SELECT * FROM t3 FORCE INDEX (PRIMARY) ORDER BY a;
+SELECT count(*) FROM t4 FORCE INDEX (PRIMARY);
+count(*)
+0
+SELECT count(*) FROM t4 FORCE INDEX (b);
+count(*)
+0
+SELECT count(*) FROM t4 FORCE INDEX (c);
+count(*)
+0
+SET rocksdb_bulk_load=0;
+SELECT * FROM t4 FORCE INDEX (PRIMARY);
+a b c
+-9 11 11
+-7 9 9
+-5 7 7
+-3 5 5
+-1 3 3
+2 0 0
+4 -2 -2
+6 -4 -4
+8 -6 -6
+10 -8 -8
+SELECT b FROM t4 FORCE INDEX (b);
+b
+-8
+-6
+-4
+-2
+0
+3
+5
+7
+9
+11
+SELECT c FROM t4 FORCE INDEX (c);
+c
+-8
+-6
+-4
+-2
+0
+3
+5
+7
+9
+11
+Checksums should match
+CHECKSUM TABLE t3;
+Table Checksum
+test.t3 3862424802
+CHECKSUM TABLE t4;
+Table Checksum
+test.t4 3862424802
+SET rocksdb_bulk_load_allow_unsorted=1;
+SET rocksdb_bulk_load_allow_sk=1;
+SET rocksdb_bulk_load=1;
+INSERT INTO t2 SELECT * FROM t3 WHERE b >= 0 ORDER BY b;
+INSERT INTO t2 SELECT * FROM t3 WHERE b < 0 ORDER BY b;
+SELECT count(*) FROM t2 FORCE INDEX (PRIMARY);
+count(*)
+0
+SELECT count(*) FROM t2 FORCE INDEX (b);
+count(*)
+0
+SELECT count(*) FROM t2 FORCE INDEX (c);
+count(*)
+0
+SELECT count(*) FROM t2 FORCE INDEX (PRIMARY);
+count(*)
+0
+SELECT count(*) FROM t2 FORCE INDEX (b);
+count(*)
+0
+SELECT count(*) FROM t2 FORCE INDEX (c);
+count(*)
+0
+SET rocksdb_bulk_load=0;
+SELECT * FROM t2 FORCE INDEX (PRIMARY);
+a b c
+-19 21 21
+-17 19 19
+-15 17 17
+-13 15 15
+-11 13 13
+-9 11 11
+-7 9 9
+-5 7 7
+-3 5 5
+-1 3 3
+2 0 0
+4 -2 -2
+6 -4 -4
+8 -6 -6
+10 -8 -8
+12 -10 -10
+14 -12 -12
+16 -14 -14
+18 -16 -16
+20 -18 -18
+SELECT b FROM t2 FORCE INDEX (b);
+b
+-18
+-16
+-14
+-12
+-10
+-8
+-6
+-4
+-2
+0
+3
+5
+7
+9
+11
+13
+15
+17
+19
+21
+SELECT c FROM t2 FORCE INDEX (c);
+c
+-18
+-16
+-14
+-12
+-10
+-8
+-6
+-4
+-2
+0
+3
+5
+7
+9
+11
+13
+15
+17
+19
+21
+Checksums should match
+CHECKSUM TABLE t3;
+Table Checksum
+test.t3 1495594118
+CHECKSUM TABLE t2;
+Table Checksum
+test.t2 1495594118
+DROP TABLE t1;
+DROP TABLE t2;
+DROP TABLE t3;
+DROP TABLE t4;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_unsorted.result b/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_unsorted.result
new file mode 100644
index 00000000000..1041e96b802
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_unsorted.result
@@ -0,0 +1,108 @@
+SET rocksdb_bulk_load_size=3;
+SET rocksdb_bulk_load_allow_unsorted=1;
+CREATE TABLE t1(a INT, b INT, PRIMARY KEY(a) COMMENT "cf1")
+ENGINE=ROCKSDB;
+SET rocksdb_bulk_load=1;
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+a b
+SET rocksdb_bulk_load=0;
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+a b
+-3 5
+-1 3
+2 0
+4 -2
+6 -4
+DROP TABLE t1;
+CREATE TABLE t1(a INT, b INT, PRIMARY KEY(a) COMMENT "cf1", KEY(b))
+ENGINE=ROCKSDB;
+SET rocksdb_bulk_load=1;
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+a b
+SET rocksdb_bulk_load=0;
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+a b
+-3 5
+-1 3
+2 0
+4 -2
+6 -4
+DROP TABLE t1;
+CREATE TABLE t1(a INT, b INT, PRIMARY KEY(a) COMMENT "cf1")
+ENGINE=ROCKSDB;
+CREATE TABLE t2(a INT, b INT, PRIMARY KEY(a) COMMENT "cf1")
+ENGINE=ROCKSDB;
+SET rocksdb_bulk_load=1;
+INSERT INTO t1 VALUES (1,1);
+INSERT INTO t2 VALUES (1,1);
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+a b
+1 1
+INSERT INTO t1 VALUES (2,2);
+SELECT * FROM t2 FORCE INDEX (PRIMARY);
+a b
+1 1
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+a b
+1 1
+SET rocksdb_bulk_load=0;
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+a b
+1 1
+2 2
+DROP TABLE t1, t2;
+CREATE TABLE t1(a INT, b INT, PRIMARY KEY(a) COMMENT "cf1")
+ENGINE=ROCKSDB;
+CREATE TABLE t2(a INT, b INT, PRIMARY KEY(b) COMMENT "cf1")
+ENGINE=ROCKSDB;
+CREATE TABLE t3(a INT, b INT, PRIMARY KEY(a) COMMENT "cf1")
+ENGINE=ROCKSDB PARTITION BY KEY() PARTITIONS 4;
+connect other,localhost,root,,;
+set session transaction isolation level repeatable read;
+start transaction with consistent snapshot;
+select VALUE > 0 as 'Has opened snapshots' from information_schema.rocksdb_dbstats where stat_type='DB_NUM_SNAPSHOTS';
+Has opened snapshots
+1
+connection default;
+set rocksdb_bulk_load=1;
+set rocksdb_bulk_load_size=100000;
+LOAD DATA INFILE <input_file> INTO TABLE t1;
+LOAD DATA INFILE <input_file> INTO TABLE t2;
+LOAD DATA INFILE <input_file> INTO TABLE t3;
+set rocksdb_bulk_load=0;
+SHOW TABLE STATUS WHERE name LIKE 't%';
+Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
+t1 ROCKSDB 10 Fixed 2500000 # # # # 0 NULL NULL NULL NULL latin1_swedish_ci NULL
+t2 ROCKSDB 10 Fixed 2500000 # # # # 0 NULL NULL NULL NULL latin1_swedish_ci NULL
+t3 ROCKSDB 10 Fixed 2500000 # # # # 0 NULL NULL NULL NULL latin1_swedish_ci NULL partitioned
+ANALYZE TABLE t1, t2, t3;
+Table Op Msg_type Msg_text
+test.t1 analyze status OK
+test.t2 analyze status OK
+test.t3 analyze status OK
+SHOW TABLE STATUS WHERE name LIKE 't%';
+Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
+t1 ROCKSDB 10 Fixed 2500000 # # # # 0 NULL NULL NULL NULL latin1_swedish_ci NULL
+t2 ROCKSDB 10 Fixed 2500000 # # # # 0 NULL NULL NULL NULL latin1_swedish_ci NULL
+t3 ROCKSDB 10 Fixed 2500000 # # # # 0 NULL NULL NULL NULL latin1_swedish_ci NULL partitioned
+select count(a),count(b) from t1;
+count(a) count(b)
+2500000 2500000
+select count(a),count(b) from t2;
+count(a) count(b)
+2500000 2500000
+select count(a),count(b) from t3;
+count(a) count(b)
+2500000 2500000
+SELECT * FROM t1 FORCE INDEX (PRIMARY) LIMIT 3;
+a b
+-2499998 2500000
+-2499996 2499998
+-2499994 2499996
+SELECT * FROM t2 FORCE INDEX (PRIMARY) LIMIT 3;
+a b
+2499999 -2499997
+2499997 -2499995
+2499995 -2499993
+disconnect other;
+DROP TABLE t1, t2, t3;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_unsorted_errors.result b/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_unsorted_errors.result
new file mode 100644
index 00000000000..d8e5b92e897
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_unsorted_errors.result
@@ -0,0 +1,4 @@
+SET rocksdb_bulk_load=1;
+SET rocksdb_bulk_load_allow_unsorted=1;
+ERROR HY000: Error when executing command SET: Cannot change this setting while bulk load is enabled
+SET rocksdb_bulk_load=0;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_unsorted_rev.result b/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_unsorted_rev.result
new file mode 100644
index 00000000000..34b14e9e5de
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_unsorted_rev.result
@@ -0,0 +1,108 @@
+SET rocksdb_bulk_load_size=3;
+SET rocksdb_bulk_load_allow_unsorted=1;
+CREATE TABLE t1(a INT, b INT, PRIMARY KEY(a) COMMENT "rev:cf1")
+ENGINE=ROCKSDB;
+SET rocksdb_bulk_load=1;
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+a b
+SET rocksdb_bulk_load=0;
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+a b
+6 -4
+4 -2
+2 0
+-1 3
+-3 5
+DROP TABLE t1;
+CREATE TABLE t1(a INT, b INT, PRIMARY KEY(a) COMMENT "rev:cf1", KEY(b))
+ENGINE=ROCKSDB;
+SET rocksdb_bulk_load=1;
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+a b
+SET rocksdb_bulk_load=0;
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+a b
+6 -4
+4 -2
+2 0
+-1 3
+-3 5
+DROP TABLE t1;
+CREATE TABLE t1(a INT, b INT, PRIMARY KEY(a) COMMENT "rev:cf1")
+ENGINE=ROCKSDB;
+CREATE TABLE t2(a INT, b INT, PRIMARY KEY(a) COMMENT "rev:cf1")
+ENGINE=ROCKSDB;
+SET rocksdb_bulk_load=1;
+INSERT INTO t1 VALUES (1,1);
+INSERT INTO t2 VALUES (1,1);
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+a b
+1 1
+INSERT INTO t1 VALUES (2,2);
+SELECT * FROM t2 FORCE INDEX (PRIMARY);
+a b
+1 1
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+a b
+1 1
+SET rocksdb_bulk_load=0;
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+a b
+2 2
+1 1
+DROP TABLE t1, t2;
+CREATE TABLE t1(a INT, b INT, PRIMARY KEY(a) COMMENT "rev:cf1")
+ENGINE=ROCKSDB;
+CREATE TABLE t2(a INT, b INT, PRIMARY KEY(b) COMMENT "rev:cf1")
+ENGINE=ROCKSDB;
+CREATE TABLE t3(a INT, b INT, PRIMARY KEY(a) COMMENT "rev:cf1")
+ENGINE=ROCKSDB PARTITION BY KEY() PARTITIONS 4;
+connect other,localhost,root,,;
+set session transaction isolation level repeatable read;
+start transaction with consistent snapshot;
+select VALUE > 0 as 'Has opened snapshots' from information_schema.rocksdb_dbstats where stat_type='DB_NUM_SNAPSHOTS';
+Has opened snapshots
+1
+connection default;
+set rocksdb_bulk_load=1;
+set rocksdb_bulk_load_size=100000;
+LOAD DATA INFILE <input_file> INTO TABLE t1;
+LOAD DATA INFILE <input_file> INTO TABLE t2;
+LOAD DATA INFILE <input_file> INTO TABLE t3;
+set rocksdb_bulk_load=0;
+SHOW TABLE STATUS WHERE name LIKE 't%';
+Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
+t1 ROCKSDB 10 Fixed 2500000 # # # # 0 NULL NULL NULL NULL latin1_swedish_ci NULL
+t2 ROCKSDB 10 Fixed 2500000 # # # # 0 NULL NULL NULL NULL latin1_swedish_ci NULL
+t3 ROCKSDB 10 Fixed 2500000 # # # # 0 NULL NULL NULL NULL latin1_swedish_ci NULL partitioned
+ANALYZE TABLE t1, t2, t3;
+Table Op Msg_type Msg_text
+test.t1 analyze status OK
+test.t2 analyze status OK
+test.t3 analyze status OK
+SHOW TABLE STATUS WHERE name LIKE 't%';
+Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
+t1 ROCKSDB 10 Fixed 2500000 # # # # 0 NULL NULL NULL NULL latin1_swedish_ci NULL
+t2 ROCKSDB 10 Fixed 2500000 # # # # 0 NULL NULL NULL NULL latin1_swedish_ci NULL
+t3 ROCKSDB 10 Fixed 2500000 # # # # 0 NULL NULL NULL NULL latin1_swedish_ci NULL partitioned
+select count(a),count(b) from t1;
+count(a) count(b)
+2500000 2500000
+select count(a),count(b) from t2;
+count(a) count(b)
+2500000 2500000
+select count(a),count(b) from t3;
+count(a) count(b)
+2500000 2500000
+SELECT * FROM t1 FORCE INDEX (PRIMARY) LIMIT 3;
+a b
+2499999 -2499997
+2499997 -2499995
+2499995 -2499993
+SELECT * FROM t2 FORCE INDEX (PRIMARY) LIMIT 3;
+a b
+-2499998 2500000
+-2499996 2499998
+-2499994 2499996
+disconnect other;
+DROP TABLE t1, t2, t3;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/bypass_select_basic.result b/storage/rocksdb/mysql-test/rocksdb/r/bypass_select_basic.result
new file mode 100644
index 00000000000..1f687dfec53
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/bypass_select_basic.result
@@ -0,0 +1,693 @@
+CREATE TABLE `link_table` (
+`id1` bigint(20) unsigned NOT NULL DEFAULT '0' ,
+`id1_type` int(10) unsigned NOT NULL DEFAULT '0' ,
+`id2` bigint(20) unsigned NOT NULL DEFAULT '0' ,
+`id2_type` int(10) unsigned NOT NULL DEFAULT '0' ,
+`link_type` bigint(20) unsigned NOT NULL DEFAULT '0' ,
+`visibility` tinyint(3) NOT NULL DEFAULT '0' ,
+`data` varchar(255) COLLATE latin1_bin NOT NULL DEFAULT '' ,
+`time` int(10) unsigned NOT NULL DEFAULT '0' ,
+`version` bigint(20) unsigned NOT NULL DEFAULT '0' ,
+PRIMARY KEY (`link_type` , `id1` , `id2`) COMMENT 'cf_link' ,
+KEY `id1_type` (`id1` , `link_type` , `visibility` , `time` , `id2` ,
+`version` , `data`) COMMENT 'rev:cf_link_id1_type'
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1 COLLATE=latin1_bin
+ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=8;
+CREATE TABLE `link_table2` (
+`id1` bigint(20) unsigned NOT NULL DEFAULT '0' ,
+`id1_type` int(10) unsigned NOT NULL DEFAULT '0' ,
+`id2` bigint(20) unsigned NOT NULL DEFAULT '0' ,
+`id2_type` int(10) unsigned NOT NULL DEFAULT '0' ,
+`link_type` bigint(20) unsigned NOT NULL DEFAULT '0' ,
+`visibility` tinyint(3) NOT NULL DEFAULT '0' ,
+`data` varchar(255) COLLATE latin1_bin NOT NULL DEFAULT '' ,
+`time` int(10) unsigned NOT NULL DEFAULT '0' ,
+`version` bigint(20) unsigned NOT NULL DEFAULT '0' ,
+PRIMARY KEY (`link_type` , `id1` , `id2`)
+COMMENT 'cf_link' ,
+KEY `id1_type` (`id1` , `link_type` , `visibility` , `time` , `id2` ,
+`version` , `data`) COMMENT 'cf_link_id1_type'
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1 COLLATE=latin1_bin
+ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=9;
+insert into link_table values (1, 1, 1, 2, 3, 4, 'a10', 10, 125);
+insert into link_table values (1, 1, 2, 2, 3, 3, 'a10', 10, 125);
+insert into link_table values (1, 1, 3, 2, 3, 4, 'a11', 11, 125);
+insert into link_table values (1, 1, 4, 2, 3, 4, 'a11', 11, 125);
+insert into link_table values (1, 1, 5, 2, 3, 3, 'a12', 12, 125);
+insert into link_table values (1, 1, 6, 2, 3, 4, 'a12', 12, 125);
+insert into link_table values (1, 1, 7, 2, 3, 4, 'a12', 12, 125);
+insert into link_table values (1, 1, 8, 2, 3, 4, 'a13', 13, 125);
+insert into link_table values (1, 1, 9, 2, 3, 4, 'a14', 14, 125);
+insert into link_table values (1, 1, 10, 2, 3, 4, 'a15', 15, 125);
+insert into link_table values (2, 1, 1, 2, 3, 4, 'a10', 10, 125);
+insert into link_table values (2, 1, 2, 2, 3, 4, 'a10', 10, 125);
+insert into link_table values (2, 1, 3, 2, 3, 4, 'a11', 11, 125);
+insert into link_table values (2, 1, 4, 2, 3, 4, 'a11', 11, 125);
+insert into link_table values (2, 1, 5, 2, 3, 4, 'a12', 12, 125);
+insert into link_table values (2, 1, 6, 2, 3, 4, 'a12', 12, 125);
+insert into link_table values (2, 1, 7, 2, 3, 4, 'a12', 12, 125);
+insert into link_table values (2, 1, 8, 2, 3, 4, 'a13', 13, 125);
+insert into link_table values (2, 1, 9, 2, 3, 4, 'a14', 14, 125);
+insert into link_table values (2, 1, 10, 2, 3, 4, 'a15', 15, 125);
+insert into link_table values (2, 1, 1, 2, 4, 4, 'a10', 10, 125);
+insert into link_table values (2, 1, 2, 2, 4, 4, 'a10', 10, 125);
+insert into link_table values (2, 1, 3, 2, 4, 4, 'a11', 11, 125);
+insert into link_table values (2, 1, 4, 2, 4, 4, 'a11', 11, 125);
+insert into link_table values (2, 1, 5, 2, 4, 4, 'a12', 12, 125);
+insert into link_table values (2, 1, 6, 2, 4, 4, 'a12', 12, 125);
+insert into link_table values (2, 1, 7, 2, 4, 4, 'a12', 12, 125);
+insert into link_table values (2, 1, 8, 2, 4, 4, 'a13', 13, 125);
+insert into link_table values (2, 1, 9, 2, 4, 4, 'a14', 14, 125);
+insert into link_table values (2, 1, 10, 2, 4, 4, 'a15', 15, 125);
+insert into link_table values (3, 1, 10, 2, 3, 4, 'a10', 10, 125);
+insert into link_table values (3, 1, 9, 2, 3, 4, 'a10', 10, 125);
+insert into link_table values (3, 1, 8, 2, 3, 4, 'a11', 11, 125);
+insert into link_table values (3, 1, 7, 2, 3, 4, 'a11', 11, 125);
+insert into link_table values (3, 1, 6, 2, 3, 4, 'a12', 12, 125);
+insert into link_table values (3, 1, 5, 2, 3, 4, 'a12', 12, 125);
+insert into link_table values (3, 1, 4, 2, 3, 4, 'a12', 12, 125);
+insert into link_table values (3, 1, 3, 2, 3, 4, 'a13', 13, 125);
+insert into link_table values (3, 1, 2, 2, 3, 4, 'a14', 14, 125);
+insert into link_table values (3, 1, 1, 2, 3, 4, 'a15', 15, 125);
+insert into link_table values (9, 1, 9, 2, 5, 6, '0 ', 10, 125);
+insert into link_table values (9, 1, 8, 2, 5, 6, '01 ', 11, 125);
+insert into link_table values (9, 1, 7, 2, 5, 6, '012 ', 11, 125);
+insert into link_table values (9, 1, 6, 2, 5, 6, '0123 ', 12, 125);
+insert into link_table values (9, 1, 5, 2, 5, 6, '01234 ', 12, 125);
+insert into link_table values (9, 1, 4, 2, 5, 6, '012345 ', 12, 125);
+insert into link_table values (9, 1, 3, 2, 5, 6, '0123456 ', 13, 125);
+insert into link_table values (9, 1, 2, 2, 5, 6, '01234567 ', 14, 125);
+insert into link_table values (9, 1, 1, 2, 5, 6, '012345678 ', 15, 125);
+insert into link_table values (9, 1, 0, 2, 5, 6, '0123456789 ', 15, 125);
+insert into link_table2 select * from link_table;
+CREATE TABLE `id_table` (
+`id` bigint(20) NOT NULL DEFAULT '0',
+`type` int(11) NOT NULL DEFAULT '0',
+`row_created_time` int(11) NOT NULL DEFAULT '0',
+`hash_key` varchar(255) NOT NULL DEFAULT '',
+`is_deleted` tinyint(4) DEFAULT '0',
+PRIMARY KEY (`id`),
+KEY `type_id` (`type`,`id`)
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1 ROW_FORMAT=COMPRESSED
+KEY_BLOCK_SIZE=8;
+insert into id_table values (1, 1, 10, '111', 0);
+insert into id_table values (2, 1, 10, '111', 1);
+insert into id_table values (3, 1, 10, '111', 0);
+insert into id_table values (4, 1, 10, '111', 1);
+insert into id_table values (5, 1, 10, '111', 0);
+insert into id_table values (6, 1, 10, '111', 1);
+insert into id_table values (7, 1, 10, '111', 0);
+insert into id_table values (8, 1, 10, '111', 1);
+insert into id_table values (9, 1, 10, '111', 0);
+insert into id_table values (10, 1, 10, '111', 1);
+CREATE TABLE `node_table` (
+`id` bigint(20) unsigned NOT NULL DEFAULT '0',
+`type` int(10) unsigned NOT NULL DEFAULT '0',
+`version` bigint(20) unsigned NOT NULL DEFAULT '0',
+`update_time` int(10) unsigned NOT NULL DEFAULT '0',
+`data` mediumtext COLLATE latin1_bin NOT NULL,
+PRIMARY KEY (`type`,`id`) COMMENT 'cf_node_type_id',
+KEY `id` (`id`) COMMENT 'cf_node'
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1 COLLATE=latin1_bin
+ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=8;
+insert into node_table values (1, 1, 1, 10, 'data');
+insert into node_table values (2, 1, 1, 10, 'data');
+insert into node_table values (3, 1, 1, 10, 'data');
+insert into node_table values (4, 1, 1, 10, 'data');
+insert into node_table values (5, 1, 1, 10, 'data');
+insert into node_table values (6, 1, 1, 10, 'data');
+insert into node_table values (7, 1, 1, 10, 'data');
+insert into node_table values (8, 1, 1, 10, 'data');
+insert into node_table values (9, 1, 1, 10, 'data');
+insert into node_table values (10, 1, 1, 10, 'data');
+CREATE TABLE `count_table` (
+`id` bigint(20) unsigned NOT NULL DEFAULT '0',
+`type` int(10) unsigned NOT NULL DEFAULT '0',
+`link_type` bigint(20) unsigned NOT NULL DEFAULT '0',
+`count` int(10) unsigned NOT NULL DEFAULT '0',
+`time` int(10) unsigned NOT NULL DEFAULT '0',
+`version` bigint(20) unsigned NOT NULL DEFAULT '0',
+PRIMARY KEY (`id`,`link_type`) COMMENT 'cf_count_table'
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1 COLLATE=latin1_bin
+ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=8;
+insert into count_table values (2, 1, 1, 1, 10, 20);
+insert into count_table values (3, 1, 1, 1, 10, 20);
+insert into count_table values (4, 1, 1, 1, 10, 20);
+insert into count_table values (5, 1, 1, 1, 10, 20);
+insert into count_table values (6, 1, 1, 1, 10, 20);
+insert into count_table values (7, 1, 1, 1, 10, 20);
+insert into count_table values (8, 1, 1, 1, 10, 20);
+insert into count_table values (9, 1, 1, 1, 10, 20);
+insert into count_table values (10, 1, 1, 1, 10, 20);
+CREATE TABLE `link_table5` (
+`id1` bigint(20) unsigned NOT NULL DEFAULT '0',
+`id1_type` int(10) unsigned NOT NULL DEFAULT '0',
+`id2` bigint(20) unsigned NOT NULL DEFAULT '0',
+`id2_type` int(10) unsigned NOT NULL DEFAULT '0',
+`link_type` bigint(20) unsigned NOT NULL DEFAULT '0',
+`visibility` tinyint(3) NOT NULL DEFAULT '0',
+`data` varchar(255) COLLATE latin1_bin NOT NULL DEFAULT '',
+`time` int(10) unsigned NOT NULL DEFAULT '0',
+`version` bigint(20) unsigned NOT NULL DEFAULT '0',
+PRIMARY KEY (`link_type`,`id1`,`id2`) COMMENT 'cf_link'
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1 COLLATE=latin1_bin
+ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=8;
+insert into link_table5 values (1, 1, 2, 2, 1, 1, 'data12', 1, 1);
+insert into link_table5 values (1, 1, 3, 2, 1, 2, 'data13', 1, 1);
+insert into link_table5 values (1, 1, 4, 2, 1, 2, 'data14', 1, 1);
+insert into link_table5 values (1, 1, 5, 2, 1, 1, 'data15', 1, 1);
+insert into link_table5 values (2, 1, 1, 2, 1, 1, 'data21', 1, 1);
+insert into link_table5 values (2, 1, 2, 2, 1, 1, 'data22', 1, 1);
+insert into link_table5 values (2, 1, 3, 2, 1, 1, 'data32', 1, 1);
+CREATE TABLE `link_table3` (
+`id1` bigint(20) unsigned NOT NULL DEFAULT '0',
+`id1_type` int(10) unsigned NOT NULL DEFAULT '0',
+`id2` bigint(20) unsigned NOT NULL DEFAULT '0',
+`id2_type` int(10) unsigned NOT NULL DEFAULT '0',
+`link_type` bigint(20) unsigned NOT NULL DEFAULT '0',
+`visibility` tinyint(4) NOT NULL DEFAULT '0',
+`data` text COLLATE latin1_bin NOT NULL,
+`time` int(10) unsigned NOT NULL DEFAULT '0',
+`version` bigint(20) unsigned NOT NULL DEFAULT '0',
+PRIMARY KEY (`link_type`,`id1`,`id2`) COMMENT 'cf_link',
+KEY `id1_type` (`id1`,`link_type`,`visibility`,`time`,`id2`,`version`)
+COMMENT 'rev:cf_link_id1_type'
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1 COLLATE=latin1_bin
+ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=4;
+insert into link_table3 values (1, 1, 2, 2, 1, 1, 'data12', 1, 1);
+insert into link_table3 values (1, 1, 3, 2, 1, 2, 'data13', 1, 1);
+insert into link_table3 values (1, 1, 4, 2, 1, 2, 'data14', 1, 1);
+insert into link_table3 values (1, 1, 5, 2, 1, 1, 'data15', 1, 1);
+insert into link_table3 values (2, 1, 1, 2, 1, 1, 'data21', 1, 1);
+insert into link_table3 values (2, 1, 2, 2, 1, 1, 'data22', 1, 1);
+insert into link_table3 values (2, 1, 3, 2, 1, 1, 'data32', 1, 1);
+CREATE TABLE `link_table6` (
+`id1` bigint(20) unsigned NOT NULL DEFAULT '0',
+`id1_type` int(10) unsigned NOT NULL DEFAULT '0',
+`id2` bigint(20) unsigned NOT NULL DEFAULT '0',
+`id2_type` int(10) unsigned NOT NULL DEFAULT '0',
+`link_type` bigint(20) unsigned NOT NULL DEFAULT '0',
+`visibility` tinyint(4) NOT NULL DEFAULT '0',
+`data` text COLLATE latin1_bin NOT NULL,
+`time` int(10) unsigned NOT NULL DEFAULT '0',
+`version` bigint(20) unsigned NOT NULL DEFAULT '0',
+PRIMARY KEY (`link_type`,`id1`,`id2`) COMMENT 'cf_link',
+KEY `id1_type` (`id1`,`link_type`,`visibility`,`time`,`id2`,`version`,
+`data`(255)) COMMENT 'rev:cf_link_id1_type'
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1 COLLATE=latin1_bin
+ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=4;
+insert into link_table6 values (1, 1, 2, 2, 1, 1,
+'data12_12345678901234567890123456789012345678901234567890123456789'
+ '0123456789012345678901234567890123456789012345678901234567890123456789'
+ '0123456789012345678901234567890123456789012345678901234567890123456789'
+ '0123456789012345678901234567890123456789012345678901234567890123456789'
+ '0123456789012345678901234567890', 1, 1);
+insert into link_table6 values (1, 1, 3, 2, 1, 2,
+'data13_12345678901234567890123456789012345678901234567890123456789'
+ '0123456789012345678901234567890123456789012345678901234567890123456789'
+ '0123456789012345678901234567890123456789012345678901234567890123456789'
+ '0123456789012345678901234567890123456789012345678901234567890123456789'
+ '0123456789012345678901234567890', 1, 1);
+insert into link_table6 values (1, 1, 4, 2, 1, 2,
+'data14_12345678901234567890123456789012345678901234567890123456789'
+ '0123456789012345678901234567890123456789012345678901234567890123456789'
+ '0123456789012345678901234567890123456789012345678901234567890123456789'
+ '0123456789012345678901234567890123456789012345678901234567890123456789'
+ '0123456789012345678901234567890', 1, 1);
+insert into link_table6 values (1, 1, 5, 2, 1, 1,
+'data15_12345678901234567890123456789012345678901234567890123456789'
+ '0123456789012345678901234567890123456789012345678901234567890123456789'
+ '0123456789012345678901234567890123456789012345678901234567890123456789'
+ '0123456789012345678901234567890123456789012345678901234567890123456789'
+ '0123456789012345678901234567890', 1, 1);
+insert into link_table6 values (2, 1, 1, 2, 1, 1,
+'data21_12345678901234567890123456789012345678901234567890123456789'
+ '0123456789012345678901234567890123456789012345678901234567890123456789'
+ '0123456789012345678901234567890123456789012345678901234567890123456789'
+ '0123456789012345678901234567890123456789012345678901234567890123456789'
+ '0123456789012345678901234567890', 1, 1);
+insert into link_table6 values (2, 1, 2, 2, 1, 1,
+'data22_12345678901234567890123456789012345678901234567890123456789'
+ '0123456789012345678901234567890123456789012345678901234567890123456789'
+ '0123456789012345678901234567890123456789012345678901234567890123456789'
+ '0123456789012345678901234567890123456789012345678901234567890123456789'
+ '0123456789012345678901234567890', 1, 1);
+insert into link_table6 values (2, 1, 3, 2, 1, 1,
+'data32_12345678901234567890123456789012345678901234567890123456789'
+ '0123456789012345678901234567890123456789012345678901234567890123456789'
+ '0123456789012345678901234567890123456789012345678901234567890123456789'
+ '0123456789012345678901234567890123456789012345678901234567890123456789'
+ '0123456789012345678901234567890', 1, 1);
+CREATE TABLE `link_table4` (
+`id1` binary(16) NOT NULL DEFAULT '\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0',
+`raw_key` text COLLATE latin1_bin,
+`id2` bigint(20) unsigned NOT NULL DEFAULT '0',
+`id2_type` int(10) unsigned NOT NULL DEFAULT '0',
+`link_type` bigint(20) unsigned NOT NULL DEFAULT '0',
+`visibility` tinyint(3) NOT NULL DEFAULT '0',
+`data` varchar(255) COLLATE latin1_bin NOT NULL DEFAULT '',
+`time` int(10) unsigned NOT NULL DEFAULT '0',
+`version` bigint(20) unsigned NOT NULL DEFAULT '0',
+PRIMARY KEY (`link_type`,`id1`,`id2`) COMMENT 'cf_link',
+KEY `id1_type` (`id1`,`link_type`,`visibility`,`time`,`id2`,`version`,`data`)
+COMMENT 'rev:cf_link_id1_type'
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1 COLLATE=latin1_bin
+ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=8;
+insert into link_table4 values ('a1', "rk1", 2, 2, 1, 1, 'data12', 1, 1);
+insert into link_table4 values ('a1', "rk2", 3, 2, 1, 2, 'data13', 1, 1);
+insert into link_table4 values ('a1', "rk3", 4, 2, 1, 2, 'data14', 1, 1);
+insert into link_table4 values ('a1', "rk4", 5, 2, 1, 1, 'data15', 1, 1);
+insert into link_table4 values ('b1', "rk5", 1, 2, 1, 1, 'data21', 1, 1);
+insert into link_table4 values ('b1', "rk6", 2, 2, 1, 1, 'data22', 1, 1);
+insert into link_table4 values ('b1', "rk7", 3, 2, 1, 1, 'data32', 1, 1);
+SELECT /*+ bypass */ id1,id2,id1_type,id2_type,data,version from link_table
+WHERE id1=1 and id2=2 and link_type=3;
+id1 id2 id1_type id2_type data version
+1 2 1 2 a10 125
+SELECT /*+ no_bypass */ id1,id2,id1_type,id2_type,data,version from link_table
+WHERE id1=1 and id2=2 and link_type=3;
+id1 id2 id1_type id2_type data version
+1 2 1 2 a10 125
+SELECT id1,id2,id1_type,id2_type,data,version from link_table
+WHERE id1=1 and id2=2 and link_type=3;
+id1 id2 id1_type id2_type data version
+1 2 1 2 a10 125
+SELECT /*+bypassabc*/ id1,id2,id1_type,id2_type,data,version from link_table
+WHERE id1=1 and id2=2 and link_type=3;
+id1 id2 id1_type id2_type data version
+1 2 1 2 a10 125
+SELECT /*+bypass */ id1,id2,id1_type,id2_type,data,version from link_table
+WHERE id1=1 and id2=2 and link_type=3;
+id1 id2 id1_type id2_type data version
+1 2 1 2 a10 125
+SELECT /* +bypassabc*/ id1,id2,id1_type,id2_type,data,version from link_table
+WHERE id1=1 and id2=2 and link_type=3;
+id1 id2 id1_type id2_type data version
+1 2 1 2 a10 125
+SELECT /*aaaaaaaaabbbbbbbbb*/ id1,id2,id1_type,id2_type,data,version
+from link_table WHERE id1=1 and id2=2 and link_type=3;
+id1 id2 id1_type id2_type data version
+1 2 1 2 a10 125
+SELECT /*+*/ id1,id2,id1_type,id2_type,data,version from link_table
+WHERE id1=1 and id2=2 and link_type=3;
+id1 id2 id1_type id2_type data version
+1 2 1 2 a10 125
+SELECT /*+b*/ id1,id2,id1_type,id2_type,data,version from link_table
+WHERE id1=1 and id2=2 and link_type=3;
+id1 id2 id1_type id2_type data version
+1 2 1 2 a10 125
+SELECT /*+byp*/ id1,id2,id1_type,id2_type,data,version from link_table
+WHERE id1=1 and id2=2 and link_type=3;
+id1 id2 id1_type id2_type data version
+1 2 1 2 a10 125
+SELECT /*+bypw*/ id1,id2,id1_type,id2_type,data,version from link_table
+WHERE id1=1 and id2=2 and link_type=3;
+id1 id2 id1_type id2_type data version
+1 2 1 2 a10 125
+SELECT /*-b*/ id1,id2,id1_type,id2_type,data,version from link_table
+WHERE id1=1 and id2=2 and link_type=3;
+id1 id2 id1_type id2_type data version
+1 2 1 2 a10 125
+SELECT /**/ id1,id2,id1_type,id2_type,data,version from link_table
+WHERE id1=1 and id2=2 and link_type=3;
+id1 id2 id1_type id2_type data version
+1 2 1 2 a10 125
+# Point query
+SELECT /*+ bypass */ id1,id2,id1_type,id2_type,data,version from link_table
+WHERE id1=1 and id2=2 and link_type=3;
+id1 id2 id1_type id2_type data version
+1 2 1 2 a10 125
+SELECT /*+ bypass */ id1,id2,id1_type,id2_type,data,version from link_table
+WHERE id1=1 and id2 IN (2, 3, 4) and link_type=3;
+id1 id2 id1_type id2_type data version
+1 2 1 2 a10 125
+1 3 1 2 a11 125
+1 4 1 2 a11 125
+SELECT /*+ bypass */ id1,id2,id1_type,id2_type,data,version from link_table
+WHERE id1=1 and id2 IN (2) and link_type=3;
+id1 id2 id1_type id2_type data version
+1 2 1 2 a10 125
+SELECT /*+ bypass */ id1,id2,id1_type,id2_type,data,version from link_table
+WHERE id1 IN (1) and id2 IN (2) and link_type=3;
+id1 id2 id1_type id2_type data version
+1 2 1 2 a10 125
+SELECT /*+ bypass */ id1,id2,id1_type,id2_type,data,version from link_table
+WHERE id1 IN (1, 2) and id2 IN (2, 3, 4) and link_type=3;
+id1 id2 id1_type id2_type data version
+1 2 1 2 a10 125
+1 3 1 2 a11 125
+1 4 1 2 a11 125
+2 2 1 2 a10 125
+2 3 1 2 a11 125
+2 4 1 2 a11 125
+# Prefix range query
+# Prefix range query with SK
+SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version
+FROM link_table FORCE INDEX (id1_type)
+WHERE link_type = 3 AND id1 = 1 AND visibility = 3 AND time = 10
+ORDER BY TIME DESC LIMIT 10;
+id1 id2 link_type visibility data time version
+1 2 3 3 a10 10 125
+SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version
+FROM link_table FORCE INDEX (id1_type)
+WHERE link_type = 3 AND id1 = 1 AND visibility = 3 AND time = 10
+ORDER BY TIME ASC LIMIT 10;
+id1 id2 link_type visibility data time version
+1 2 3 3 a10 10 125
+# Prefix range query with SK with limits
+SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version
+FROM link_table FORCE INDEX (id1_type)
+WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10
+ORDER BY TIME DESC;
+id1 id2 link_type visibility data time version
+1 10 3 4 a15 15 125
+1 9 3 4 a14 14 125
+1 8 3 4 a13 13 125
+1 7 3 4 a12 12 125
+1 6 3 4 a12 12 125
+1 4 3 4 a11 11 125
+1 3 3 4 a11 11 125
+1 1 3 4 a10 10 125
+SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version
+FROM link_table FORCE INDEX (id1_type)
+WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10
+ORDER BY TIME DESC LIMIT 10;
+id1 id2 link_type visibility data time version
+1 10 3 4 a15 15 125
+1 9 3 4 a14 14 125
+1 8 3 4 a13 13 125
+1 7 3 4 a12 12 125
+1 6 3 4 a12 12 125
+1 4 3 4 a11 11 125
+1 3 3 4 a11 11 125
+1 1 3 4 a10 10 125
+SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version
+FROM link_table FORCE INDEX (id1_type)
+WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10
+ORDER BY TIME DESC LIMIT 5;
+id1 id2 link_type visibility data time version
+1 10 3 4 a15 15 125
+1 9 3 4 a14 14 125
+1 8 3 4 a13 13 125
+1 7 3 4 a12 12 125
+1 6 3 4 a12 12 125
+SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version
+FROM link_table FORCE INDEX (id1_type)
+WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10
+ORDER BY TIME DESC LIMIT 1;
+id1 id2 link_type visibility data time version
+1 10 3 4 a15 15 125
+SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version
+FROM link_table FORCE INDEX (id1_type)
+WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10
+ORDER BY TIME DESC LIMIT 0;
+id1 id2 link_type visibility data time version
+SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version
+FROM link_table FORCE INDEX (id1_type)
+WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10
+ORDER BY TIME DESC LIMIT 0,10;
+id1 id2 link_type visibility data time version
+1 10 3 4 a15 15 125
+1 9 3 4 a14 14 125
+1 8 3 4 a13 13 125
+1 7 3 4 a12 12 125
+1 6 3 4 a12 12 125
+1 4 3 4 a11 11 125
+1 3 3 4 a11 11 125
+1 1 3 4 a10 10 125
+SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version
+FROM link_table FORCE INDEX (id1_type)
+WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10
+ORDER BY TIME DESC LIMIT 0,5;
+id1 id2 link_type visibility data time version
+1 10 3 4 a15 15 125
+1 9 3 4 a14 14 125
+1 8 3 4 a13 13 125
+1 7 3 4 a12 12 125
+1 6 3 4 a12 12 125
+SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version
+FROM link_table FORCE INDEX (id1_type)
+WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10
+ORDER BY TIME DESC LIMIT 0,1;
+id1 id2 link_type visibility data time version
+1 10 3 4 a15 15 125
+SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version
+FROM link_table FORCE INDEX (id1_type)
+WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10
+ORDER BY TIME DESC LIMIT 1,0;
+id1 id2 link_type visibility data time version
+SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version
+FROM link_table FORCE INDEX (id1_type)
+WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10
+ORDER BY TIME DESC LIMIT 1,10;
+id1 id2 link_type visibility data time version
+1 9 3 4 a14 14 125
+1 8 3 4 a13 13 125
+1 7 3 4 a12 12 125
+1 6 3 4 a12 12 125
+1 4 3 4 a11 11 125
+1 3 3 4 a11 11 125
+1 1 3 4 a10 10 125
+SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version
+FROM link_table FORCE INDEX (id1_type)
+WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10
+ORDER BY TIME DESC LIMIT 1,5;
+id1 id2 link_type visibility data time version
+1 9 3 4 a14 14 125
+1 8 3 4 a13 13 125
+1 7 3 4 a12 12 125
+1 6 3 4 a12 12 125
+1 4 3 4 a11 11 125
+SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version
+FROM link_table FORCE INDEX (id1_type)
+WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10
+ORDER BY TIME DESC LIMIT 1,1;
+id1 id2 link_type visibility data time version
+1 9 3 4 a14 14 125
+SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version
+FROM link_table FORCE INDEX (id1_type)
+WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10
+ORDER BY TIME DESC LIMIT 1,0;
+id1 id2 link_type visibility data time version
+SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version
+FROM link_table FORCE INDEX (id1_type)
+WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10
+ORDER BY TIME DESC LIMIT 5,10;
+id1 id2 link_type visibility data time version
+1 4 3 4 a11 11 125
+1 3 3 4 a11 11 125
+1 1 3 4 a10 10 125
+SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version
+FROM link_table FORCE INDEX (id1_type)
+WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10
+ORDER BY TIME DESC LIMIT 5,5;
+id1 id2 link_type visibility data time version
+1 4 3 4 a11 11 125
+1 3 3 4 a11 11 125
+1 1 3 4 a10 10 125
+SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version
+FROM link_table FORCE INDEX (id1_type)
+WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10
+ORDER BY TIME DESC LIMIT 5,1;
+id1 id2 link_type visibility data time version
+1 4 3 4 a11 11 125
+SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version
+FROM link_table FORCE INDEX (id1_type)
+WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10
+ORDER BY TIME DESC LIMIT 5,0;
+id1 id2 link_type visibility data time version
+SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version
+FROM link_table FORCE INDEX (id1_type)
+WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10
+ORDER BY TIME DESC LIMIT 10,10;
+id1 id2 link_type visibility data time version
+SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version
+FROM link_table FORCE INDEX (id1_type)
+WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10
+ORDER BY TIME DESC LIMIT 10,5;
+id1 id2 link_type visibility data time version
+SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version
+FROM link_table FORCE INDEX (id1_type)
+WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10
+ORDER BY TIME DESC LIMIT 10,1;
+id1 id2 link_type visibility data time version
+SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version
+FROM link_table FORCE INDEX (id1_type)
+WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10
+ORDER BY TIME DESC LIMIT 10,0;
+id1 id2 link_type visibility data time version
+# Prefix range query with PK
+SELECT /*+ bypass */ id1, id2, link_type FROM link_table FORCE INDEX (PRIMARY)
+WHERE link_type=3 and id1=1 ORDER BY id2 DESC;
+id1 id2 link_type
+1 10 3
+1 9 3
+1 8 3
+1 7 3
+1 6 3
+1 5 3
+1 4 3
+1 3 3
+1 2 3
+1 1 3
+SELECT /*+ bypass */ id1, id2, link_type FROM link_table FORCE INDEX (PRIMARY)
+WHERE link_type=3 and id1=1 ORDER BY id2 ASC;
+id1 id2 link_type
+1 1 3
+1 2 3
+1 3 3
+1 4 3
+1 5 3
+1 6 3
+1 7 3
+1 8 3
+1 9 3
+1 10 3
+# Prefix range query with PK + value
+SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version
+FROM link_table FORCE INDEX (PRIMARY)
+WHERE link_type=3 and id1=1 ORDER BY id2 DESC;
+id1 id2 link_type visibility data time version
+1 10 3 4 a15 15 125
+1 9 3 4 a14 14 125
+1 8 3 4 a13 13 125
+1 7 3 4 a12 12 125
+1 6 3 4 a12 12 125
+1 5 3 3 a12 12 125
+1 4 3 4 a11 11 125
+1 3 3 4 a11 11 125
+1 2 3 3 a10 10 125
+1 1 3 4 a10 10 125
+SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version
+FROM link_table FORCE INDEX (PRIMARY)
+WHERE link_type=3 and id1=1 ORDER BY id2 ASC;
+id1 id2 link_type visibility data time version
+1 1 3 4 a10 10 125
+1 2 3 3 a10 10 125
+1 3 3 4 a11 11 125
+1 4 3 4 a11 11 125
+1 5 3 3 a12 12 125
+1 6 3 4 a12 12 125
+1 7 3 4 a12 12 125
+1 8 3 4 a13 13 125
+1 9 3 4 a14 14 125
+1 10 3 4 a15 15 125
+# Transaction
+BEGIN;
+SELECT /*+ bypass */ id1,id2,id1_type,id2_type,data,version from link_table
+WHERE id1=1 and id2=2 and link_type=3;
+id1 id2 id1_type id2_type data version
+1 2 1 2 a10 125
+UPDATE link_table set data="bcd" WHERE id1=1 and id2=2 and link_type = 3;
+SELECT /*+ bypass */ id1,id2,id1_type,id2_type,data,version from link_table
+WHERE id1=1 and id2=2 and link_type=3;
+id1 id2 id1_type id2_type data version
+1 2 1 2 bcd 125
+COMMIT;
+BEGIN;
+SELECT /*+ bypass */ id1,id2,id1_type,id2_type,data,version from link_table
+WHERE id1=1 and id2=2 and link_type=3;
+id1 id2 id1_type id2_type data version
+1 2 1 2 bcd 125
+UPDATE link_table set data="cde" WHERE id1=1 and id2=2 and link_type = 3;
+SELECT /*+ bypass */ id1,id2,id1_type,id2_type,data,version from link_table
+WHERE id1=1 and id2=2 and link_type=3;
+id1 id2 id1_type id2_type data version
+1 2 1 2 cde 125
+ROLLBACK;
+SELECT /*+ bypass */ id1,id2,id1_type,id2_type,data,version from link_table
+WHERE id1=1 and id2=2 and link_type=3;
+id1 id2 id1_type id2_type data version
+1 2 1 2 bcd 125
+# Data types
+SELECT /*+ bypass */ id1 FROM link_table where link_type="3";
+id1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+3
+3
+3
+3
+3
+3
+3
+3
+3
+3
+SELECT /*+ bypass */ id1 FROM link_table where link_type="3" AND id1="1";
+id1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+SELECT /*+ bypass */ id1 FROM link_table where link_type="3" AND id1=True;
+id1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+SELECT /*+ bypass */ id1 FROM link_table where link_type="3" AND id1=b'1';
+id1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+SELECT /*+ bypass */ id1 FROM link_table where link_type="3" AND id1=x'01';
+id1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+SELECT /*+ bypass */ id1 FROM link_table where link_type="3" AND id1=NULL;
+id1
+DROP TABLE count_table;
+DROP TABLE link_table;
+DROP TABLE link_table3;
+DROP TABLE link_table2;
+DROP TABLE id_table;
+DROP TABLE node_table;
+DROP TABLE link_table5;
+DROP TABLE link_table6;
+DROP TABLE link_table4;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/bypass_select_basic_bloom.result b/storage/rocksdb/mysql-test/rocksdb/r/bypass_select_basic_bloom.result
new file mode 100644
index 00000000000..1f687dfec53
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/bypass_select_basic_bloom.result
@@ -0,0 +1,693 @@
+CREATE TABLE `link_table` (
+`id1` bigint(20) unsigned NOT NULL DEFAULT '0' ,
+`id1_type` int(10) unsigned NOT NULL DEFAULT '0' ,
+`id2` bigint(20) unsigned NOT NULL DEFAULT '0' ,
+`id2_type` int(10) unsigned NOT NULL DEFAULT '0' ,
+`link_type` bigint(20) unsigned NOT NULL DEFAULT '0' ,
+`visibility` tinyint(3) NOT NULL DEFAULT '0' ,
+`data` varchar(255) COLLATE latin1_bin NOT NULL DEFAULT '' ,
+`time` int(10) unsigned NOT NULL DEFAULT '0' ,
+`version` bigint(20) unsigned NOT NULL DEFAULT '0' ,
+PRIMARY KEY (`link_type` , `id1` , `id2`) COMMENT 'cf_link' ,
+KEY `id1_type` (`id1` , `link_type` , `visibility` , `time` , `id2` ,
+`version` , `data`) COMMENT 'rev:cf_link_id1_type'
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1 COLLATE=latin1_bin
+ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=8;
+CREATE TABLE `link_table2` (
+`id1` bigint(20) unsigned NOT NULL DEFAULT '0' ,
+`id1_type` int(10) unsigned NOT NULL DEFAULT '0' ,
+`id2` bigint(20) unsigned NOT NULL DEFAULT '0' ,
+`id2_type` int(10) unsigned NOT NULL DEFAULT '0' ,
+`link_type` bigint(20) unsigned NOT NULL DEFAULT '0' ,
+`visibility` tinyint(3) NOT NULL DEFAULT '0' ,
+`data` varchar(255) COLLATE latin1_bin NOT NULL DEFAULT '' ,
+`time` int(10) unsigned NOT NULL DEFAULT '0' ,
+`version` bigint(20) unsigned NOT NULL DEFAULT '0' ,
+PRIMARY KEY (`link_type` , `id1` , `id2`)
+COMMENT 'cf_link' ,
+KEY `id1_type` (`id1` , `link_type` , `visibility` , `time` , `id2` ,
+`version` , `data`) COMMENT 'cf_link_id1_type'
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1 COLLATE=latin1_bin
+ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=9;
+insert into link_table values (1, 1, 1, 2, 3, 4, 'a10', 10, 125);
+insert into link_table values (1, 1, 2, 2, 3, 3, 'a10', 10, 125);
+insert into link_table values (1, 1, 3, 2, 3, 4, 'a11', 11, 125);
+insert into link_table values (1, 1, 4, 2, 3, 4, 'a11', 11, 125);
+insert into link_table values (1, 1, 5, 2, 3, 3, 'a12', 12, 125);
+insert into link_table values (1, 1, 6, 2, 3, 4, 'a12', 12, 125);
+insert into link_table values (1, 1, 7, 2, 3, 4, 'a12', 12, 125);
+insert into link_table values (1, 1, 8, 2, 3, 4, 'a13', 13, 125);
+insert into link_table values (1, 1, 9, 2, 3, 4, 'a14', 14, 125);
+insert into link_table values (1, 1, 10, 2, 3, 4, 'a15', 15, 125);
+insert into link_table values (2, 1, 1, 2, 3, 4, 'a10', 10, 125);
+insert into link_table values (2, 1, 2, 2, 3, 4, 'a10', 10, 125);
+insert into link_table values (2, 1, 3, 2, 3, 4, 'a11', 11, 125);
+insert into link_table values (2, 1, 4, 2, 3, 4, 'a11', 11, 125);
+insert into link_table values (2, 1, 5, 2, 3, 4, 'a12', 12, 125);
+insert into link_table values (2, 1, 6, 2, 3, 4, 'a12', 12, 125);
+insert into link_table values (2, 1, 7, 2, 3, 4, 'a12', 12, 125);
+insert into link_table values (2, 1, 8, 2, 3, 4, 'a13', 13, 125);
+insert into link_table values (2, 1, 9, 2, 3, 4, 'a14', 14, 125);
+insert into link_table values (2, 1, 10, 2, 3, 4, 'a15', 15, 125);
+insert into link_table values (2, 1, 1, 2, 4, 4, 'a10', 10, 125);
+insert into link_table values (2, 1, 2, 2, 4, 4, 'a10', 10, 125);
+insert into link_table values (2, 1, 3, 2, 4, 4, 'a11', 11, 125);
+insert into link_table values (2, 1, 4, 2, 4, 4, 'a11', 11, 125);
+insert into link_table values (2, 1, 5, 2, 4, 4, 'a12', 12, 125);
+insert into link_table values (2, 1, 6, 2, 4, 4, 'a12', 12, 125);
+insert into link_table values (2, 1, 7, 2, 4, 4, 'a12', 12, 125);
+insert into link_table values (2, 1, 8, 2, 4, 4, 'a13', 13, 125);
+insert into link_table values (2, 1, 9, 2, 4, 4, 'a14', 14, 125);
+insert into link_table values (2, 1, 10, 2, 4, 4, 'a15', 15, 125);
+insert into link_table values (3, 1, 10, 2, 3, 4, 'a10', 10, 125);
+insert into link_table values (3, 1, 9, 2, 3, 4, 'a10', 10, 125);
+insert into link_table values (3, 1, 8, 2, 3, 4, 'a11', 11, 125);
+insert into link_table values (3, 1, 7, 2, 3, 4, 'a11', 11, 125);
+insert into link_table values (3, 1, 6, 2, 3, 4, 'a12', 12, 125);
+insert into link_table values (3, 1, 5, 2, 3, 4, 'a12', 12, 125);
+insert into link_table values (3, 1, 4, 2, 3, 4, 'a12', 12, 125);
+insert into link_table values (3, 1, 3, 2, 3, 4, 'a13', 13, 125);
+insert into link_table values (3, 1, 2, 2, 3, 4, 'a14', 14, 125);
+insert into link_table values (3, 1, 1, 2, 3, 4, 'a15', 15, 125);
+insert into link_table values (9, 1, 9, 2, 5, 6, '0 ', 10, 125);
+insert into link_table values (9, 1, 8, 2, 5, 6, '01 ', 11, 125);
+insert into link_table values (9, 1, 7, 2, 5, 6, '012 ', 11, 125);
+insert into link_table values (9, 1, 6, 2, 5, 6, '0123 ', 12, 125);
+insert into link_table values (9, 1, 5, 2, 5, 6, '01234 ', 12, 125);
+insert into link_table values (9, 1, 4, 2, 5, 6, '012345 ', 12, 125);
+insert into link_table values (9, 1, 3, 2, 5, 6, '0123456 ', 13, 125);
+insert into link_table values (9, 1, 2, 2, 5, 6, '01234567 ', 14, 125);
+insert into link_table values (9, 1, 1, 2, 5, 6, '012345678 ', 15, 125);
+insert into link_table values (9, 1, 0, 2, 5, 6, '0123456789 ', 15, 125);
+insert into link_table2 select * from link_table;
+CREATE TABLE `id_table` (
+`id` bigint(20) NOT NULL DEFAULT '0',
+`type` int(11) NOT NULL DEFAULT '0',
+`row_created_time` int(11) NOT NULL DEFAULT '0',
+`hash_key` varchar(255) NOT NULL DEFAULT '',
+`is_deleted` tinyint(4) DEFAULT '0',
+PRIMARY KEY (`id`),
+KEY `type_id` (`type`,`id`)
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1 ROW_FORMAT=COMPRESSED
+KEY_BLOCK_SIZE=8;
+insert into id_table values (1, 1, 10, '111', 0);
+insert into id_table values (2, 1, 10, '111', 1);
+insert into id_table values (3, 1, 10, '111', 0);
+insert into id_table values (4, 1, 10, '111', 1);
+insert into id_table values (5, 1, 10, '111', 0);
+insert into id_table values (6, 1, 10, '111', 1);
+insert into id_table values (7, 1, 10, '111', 0);
+insert into id_table values (8, 1, 10, '111', 1);
+insert into id_table values (9, 1, 10, '111', 0);
+insert into id_table values (10, 1, 10, '111', 1);
+CREATE TABLE `node_table` (
+`id` bigint(20) unsigned NOT NULL DEFAULT '0',
+`type` int(10) unsigned NOT NULL DEFAULT '0',
+`version` bigint(20) unsigned NOT NULL DEFAULT '0',
+`update_time` int(10) unsigned NOT NULL DEFAULT '0',
+`data` mediumtext COLLATE latin1_bin NOT NULL,
+PRIMARY KEY (`type`,`id`) COMMENT 'cf_node_type_id',
+KEY `id` (`id`) COMMENT 'cf_node'
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1 COLLATE=latin1_bin
+ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=8;
+insert into node_table values (1, 1, 1, 10, 'data');
+insert into node_table values (2, 1, 1, 10, 'data');
+insert into node_table values (3, 1, 1, 10, 'data');
+insert into node_table values (4, 1, 1, 10, 'data');
+insert into node_table values (5, 1, 1, 10, 'data');
+insert into node_table values (6, 1, 1, 10, 'data');
+insert into node_table values (7, 1, 1, 10, 'data');
+insert into node_table values (8, 1, 1, 10, 'data');
+insert into node_table values (9, 1, 1, 10, 'data');
+insert into node_table values (10, 1, 1, 10, 'data');
+CREATE TABLE `count_table` (
+`id` bigint(20) unsigned NOT NULL DEFAULT '0',
+`type` int(10) unsigned NOT NULL DEFAULT '0',
+`link_type` bigint(20) unsigned NOT NULL DEFAULT '0',
+`count` int(10) unsigned NOT NULL DEFAULT '0',
+`time` int(10) unsigned NOT NULL DEFAULT '0',
+`version` bigint(20) unsigned NOT NULL DEFAULT '0',
+PRIMARY KEY (`id`,`link_type`) COMMENT 'cf_count_table'
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1 COLLATE=latin1_bin
+ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=8;
+insert into count_table values (2, 1, 1, 1, 10, 20);
+insert into count_table values (3, 1, 1, 1, 10, 20);
+insert into count_table values (4, 1, 1, 1, 10, 20);
+insert into count_table values (5, 1, 1, 1, 10, 20);
+insert into count_table values (6, 1, 1, 1, 10, 20);
+insert into count_table values (7, 1, 1, 1, 10, 20);
+insert into count_table values (8, 1, 1, 1, 10, 20);
+insert into count_table values (9, 1, 1, 1, 10, 20);
+insert into count_table values (10, 1, 1, 1, 10, 20);
+CREATE TABLE `link_table5` (
+`id1` bigint(20) unsigned NOT NULL DEFAULT '0',
+`id1_type` int(10) unsigned NOT NULL DEFAULT '0',
+`id2` bigint(20) unsigned NOT NULL DEFAULT '0',
+`id2_type` int(10) unsigned NOT NULL DEFAULT '0',
+`link_type` bigint(20) unsigned NOT NULL DEFAULT '0',
+`visibility` tinyint(3) NOT NULL DEFAULT '0',
+`data` varchar(255) COLLATE latin1_bin NOT NULL DEFAULT '',
+`time` int(10) unsigned NOT NULL DEFAULT '0',
+`version` bigint(20) unsigned NOT NULL DEFAULT '0',
+PRIMARY KEY (`link_type`,`id1`,`id2`) COMMENT 'cf_link'
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1 COLLATE=latin1_bin
+ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=8;
+insert into link_table5 values (1, 1, 2, 2, 1, 1, 'data12', 1, 1);
+insert into link_table5 values (1, 1, 3, 2, 1, 2, 'data13', 1, 1);
+insert into link_table5 values (1, 1, 4, 2, 1, 2, 'data14', 1, 1);
+insert into link_table5 values (1, 1, 5, 2, 1, 1, 'data15', 1, 1);
+insert into link_table5 values (2, 1, 1, 2, 1, 1, 'data21', 1, 1);
+insert into link_table5 values (2, 1, 2, 2, 1, 1, 'data22', 1, 1);
+insert into link_table5 values (2, 1, 3, 2, 1, 1, 'data32', 1, 1);
+CREATE TABLE `link_table3` (
+`id1` bigint(20) unsigned NOT NULL DEFAULT '0',
+`id1_type` int(10) unsigned NOT NULL DEFAULT '0',
+`id2` bigint(20) unsigned NOT NULL DEFAULT '0',
+`id2_type` int(10) unsigned NOT NULL DEFAULT '0',
+`link_type` bigint(20) unsigned NOT NULL DEFAULT '0',
+`visibility` tinyint(4) NOT NULL DEFAULT '0',
+`data` text COLLATE latin1_bin NOT NULL,
+`time` int(10) unsigned NOT NULL DEFAULT '0',
+`version` bigint(20) unsigned NOT NULL DEFAULT '0',
+PRIMARY KEY (`link_type`,`id1`,`id2`) COMMENT 'cf_link',
+KEY `id1_type` (`id1`,`link_type`,`visibility`,`time`,`id2`,`version`)
+COMMENT 'rev:cf_link_id1_type'
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1 COLLATE=latin1_bin
+ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=4;
+insert into link_table3 values (1, 1, 2, 2, 1, 1, 'data12', 1, 1);
+insert into link_table3 values (1, 1, 3, 2, 1, 2, 'data13', 1, 1);
+insert into link_table3 values (1, 1, 4, 2, 1, 2, 'data14', 1, 1);
+insert into link_table3 values (1, 1, 5, 2, 1, 1, 'data15', 1, 1);
+insert into link_table3 values (2, 1, 1, 2, 1, 1, 'data21', 1, 1);
+insert into link_table3 values (2, 1, 2, 2, 1, 1, 'data22', 1, 1);
+insert into link_table3 values (2, 1, 3, 2, 1, 1, 'data32', 1, 1);
+CREATE TABLE `link_table6` (
+`id1` bigint(20) unsigned NOT NULL DEFAULT '0',
+`id1_type` int(10) unsigned NOT NULL DEFAULT '0',
+`id2` bigint(20) unsigned NOT NULL DEFAULT '0',
+`id2_type` int(10) unsigned NOT NULL DEFAULT '0',
+`link_type` bigint(20) unsigned NOT NULL DEFAULT '0',
+`visibility` tinyint(4) NOT NULL DEFAULT '0',
+`data` text COLLATE latin1_bin NOT NULL,
+`time` int(10) unsigned NOT NULL DEFAULT '0',
+`version` bigint(20) unsigned NOT NULL DEFAULT '0',
+PRIMARY KEY (`link_type`,`id1`,`id2`) COMMENT 'cf_link',
+KEY `id1_type` (`id1`,`link_type`,`visibility`,`time`,`id2`,`version`,
+`data`(255)) COMMENT 'rev:cf_link_id1_type'
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1 COLLATE=latin1_bin
+ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=4;
+insert into link_table6 values (1, 1, 2, 2, 1, 1,
+'data12_12345678901234567890123456789012345678901234567890123456789'
+ '0123456789012345678901234567890123456789012345678901234567890123456789'
+ '0123456789012345678901234567890123456789012345678901234567890123456789'
+ '0123456789012345678901234567890123456789012345678901234567890123456789'
+ '0123456789012345678901234567890', 1, 1);
+insert into link_table6 values (1, 1, 3, 2, 1, 2,
+'data13_12345678901234567890123456789012345678901234567890123456789'
+ '0123456789012345678901234567890123456789012345678901234567890123456789'
+ '0123456789012345678901234567890123456789012345678901234567890123456789'
+ '0123456789012345678901234567890123456789012345678901234567890123456789'
+ '0123456789012345678901234567890', 1, 1);
+insert into link_table6 values (1, 1, 4, 2, 1, 2,
+'data14_12345678901234567890123456789012345678901234567890123456789'
+ '0123456789012345678901234567890123456789012345678901234567890123456789'
+ '0123456789012345678901234567890123456789012345678901234567890123456789'
+ '0123456789012345678901234567890123456789012345678901234567890123456789'
+ '0123456789012345678901234567890', 1, 1);
+insert into link_table6 values (1, 1, 5, 2, 1, 1,
+'data15_12345678901234567890123456789012345678901234567890123456789'
+ '0123456789012345678901234567890123456789012345678901234567890123456789'
+ '0123456789012345678901234567890123456789012345678901234567890123456789'
+ '0123456789012345678901234567890123456789012345678901234567890123456789'
+ '0123456789012345678901234567890', 1, 1);
+insert into link_table6 values (2, 1, 1, 2, 1, 1,
+'data21_12345678901234567890123456789012345678901234567890123456789'
+ '0123456789012345678901234567890123456789012345678901234567890123456789'
+ '0123456789012345678901234567890123456789012345678901234567890123456789'
+ '0123456789012345678901234567890123456789012345678901234567890123456789'
+ '0123456789012345678901234567890', 1, 1);
+insert into link_table6 values (2, 1, 2, 2, 1, 1,
+'data22_12345678901234567890123456789012345678901234567890123456789'
+ '0123456789012345678901234567890123456789012345678901234567890123456789'
+ '0123456789012345678901234567890123456789012345678901234567890123456789'
+ '0123456789012345678901234567890123456789012345678901234567890123456789'
+ '0123456789012345678901234567890', 1, 1);
+insert into link_table6 values (2, 1, 3, 2, 1, 1,
+'data32_12345678901234567890123456789012345678901234567890123456789'
+ '0123456789012345678901234567890123456789012345678901234567890123456789'
+ '0123456789012345678901234567890123456789012345678901234567890123456789'
+ '0123456789012345678901234567890123456789012345678901234567890123456789'
+ '0123456789012345678901234567890', 1, 1);
+CREATE TABLE `link_table4` (
+`id1` binary(16) NOT NULL DEFAULT '\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0',
+`raw_key` text COLLATE latin1_bin,
+`id2` bigint(20) unsigned NOT NULL DEFAULT '0',
+`id2_type` int(10) unsigned NOT NULL DEFAULT '0',
+`link_type` bigint(20) unsigned NOT NULL DEFAULT '0',
+`visibility` tinyint(3) NOT NULL DEFAULT '0',
+`data` varchar(255) COLLATE latin1_bin NOT NULL DEFAULT '',
+`time` int(10) unsigned NOT NULL DEFAULT '0',
+`version` bigint(20) unsigned NOT NULL DEFAULT '0',
+PRIMARY KEY (`link_type`,`id1`,`id2`) COMMENT 'cf_link',
+KEY `id1_type` (`id1`,`link_type`,`visibility`,`time`,`id2`,`version`,`data`)
+COMMENT 'rev:cf_link_id1_type'
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1 COLLATE=latin1_bin
+ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=8;
+insert into link_table4 values ('a1', "rk1", 2, 2, 1, 1, 'data12', 1, 1);
+insert into link_table4 values ('a1', "rk2", 3, 2, 1, 2, 'data13', 1, 1);
+insert into link_table4 values ('a1', "rk3", 4, 2, 1, 2, 'data14', 1, 1);
+insert into link_table4 values ('a1', "rk4", 5, 2, 1, 1, 'data15', 1, 1);
+insert into link_table4 values ('b1', "rk5", 1, 2, 1, 1, 'data21', 1, 1);
+insert into link_table4 values ('b1', "rk6", 2, 2, 1, 1, 'data22', 1, 1);
+insert into link_table4 values ('b1', "rk7", 3, 2, 1, 1, 'data32', 1, 1);
+SELECT /*+ bypass */ id1,id2,id1_type,id2_type,data,version from link_table
+WHERE id1=1 and id2=2 and link_type=3;
+id1 id2 id1_type id2_type data version
+1 2 1 2 a10 125
+SELECT /*+ no_bypass */ id1,id2,id1_type,id2_type,data,version from link_table
+WHERE id1=1 and id2=2 and link_type=3;
+id1 id2 id1_type id2_type data version
+1 2 1 2 a10 125
+SELECT id1,id2,id1_type,id2_type,data,version from link_table
+WHERE id1=1 and id2=2 and link_type=3;
+id1 id2 id1_type id2_type data version
+1 2 1 2 a10 125
+SELECT /*+bypassabc*/ id1,id2,id1_type,id2_type,data,version from link_table
+WHERE id1=1 and id2=2 and link_type=3;
+id1 id2 id1_type id2_type data version
+1 2 1 2 a10 125
+SELECT /*+bypass */ id1,id2,id1_type,id2_type,data,version from link_table
+WHERE id1=1 and id2=2 and link_type=3;
+id1 id2 id1_type id2_type data version
+1 2 1 2 a10 125
+SELECT /* +bypassabc*/ id1,id2,id1_type,id2_type,data,version from link_table
+WHERE id1=1 and id2=2 and link_type=3;
+id1 id2 id1_type id2_type data version
+1 2 1 2 a10 125
+SELECT /*aaaaaaaaabbbbbbbbb*/ id1,id2,id1_type,id2_type,data,version
+from link_table WHERE id1=1 and id2=2 and link_type=3;
+id1 id2 id1_type id2_type data version
+1 2 1 2 a10 125
+SELECT /*+*/ id1,id2,id1_type,id2_type,data,version from link_table
+WHERE id1=1 and id2=2 and link_type=3;
+id1 id2 id1_type id2_type data version
+1 2 1 2 a10 125
+SELECT /*+b*/ id1,id2,id1_type,id2_type,data,version from link_table
+WHERE id1=1 and id2=2 and link_type=3;
+id1 id2 id1_type id2_type data version
+1 2 1 2 a10 125
+SELECT /*+byp*/ id1,id2,id1_type,id2_type,data,version from link_table
+WHERE id1=1 and id2=2 and link_type=3;
+id1 id2 id1_type id2_type data version
+1 2 1 2 a10 125
+SELECT /*+bypw*/ id1,id2,id1_type,id2_type,data,version from link_table
+WHERE id1=1 and id2=2 and link_type=3;
+id1 id2 id1_type id2_type data version
+1 2 1 2 a10 125
+SELECT /*-b*/ id1,id2,id1_type,id2_type,data,version from link_table
+WHERE id1=1 and id2=2 and link_type=3;
+id1 id2 id1_type id2_type data version
+1 2 1 2 a10 125
+SELECT /**/ id1,id2,id1_type,id2_type,data,version from link_table
+WHERE id1=1 and id2=2 and link_type=3;
+id1 id2 id1_type id2_type data version
+1 2 1 2 a10 125
+# Point query
+SELECT /*+ bypass */ id1,id2,id1_type,id2_type,data,version from link_table
+WHERE id1=1 and id2=2 and link_type=3;
+id1 id2 id1_type id2_type data version
+1 2 1 2 a10 125
+SELECT /*+ bypass */ id1,id2,id1_type,id2_type,data,version from link_table
+WHERE id1=1 and id2 IN (2, 3, 4) and link_type=3;
+id1 id2 id1_type id2_type data version
+1 2 1 2 a10 125
+1 3 1 2 a11 125
+1 4 1 2 a11 125
+SELECT /*+ bypass */ id1,id2,id1_type,id2_type,data,version from link_table
+WHERE id1=1 and id2 IN (2) and link_type=3;
+id1 id2 id1_type id2_type data version
+1 2 1 2 a10 125
+SELECT /*+ bypass */ id1,id2,id1_type,id2_type,data,version from link_table
+WHERE id1 IN (1) and id2 IN (2) and link_type=3;
+id1 id2 id1_type id2_type data version
+1 2 1 2 a10 125
+SELECT /*+ bypass */ id1,id2,id1_type,id2_type,data,version from link_table
+WHERE id1 IN (1, 2) and id2 IN (2, 3, 4) and link_type=3;
+id1 id2 id1_type id2_type data version
+1 2 1 2 a10 125
+1 3 1 2 a11 125
+1 4 1 2 a11 125
+2 2 1 2 a10 125
+2 3 1 2 a11 125
+2 4 1 2 a11 125
+# Prefix range query
+# Prefix range query with SK
+SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version
+FROM link_table FORCE INDEX (id1_type)
+WHERE link_type = 3 AND id1 = 1 AND visibility = 3 AND time = 10
+ORDER BY TIME DESC LIMIT 10;
+id1 id2 link_type visibility data time version
+1 2 3 3 a10 10 125
+SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version
+FROM link_table FORCE INDEX (id1_type)
+WHERE link_type = 3 AND id1 = 1 AND visibility = 3 AND time = 10
+ORDER BY TIME ASC LIMIT 10;
+id1 id2 link_type visibility data time version
+1 2 3 3 a10 10 125
+# Prefix range query with SK with limits
+SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version
+FROM link_table FORCE INDEX (id1_type)
+WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10
+ORDER BY TIME DESC;
+id1 id2 link_type visibility data time version
+1 10 3 4 a15 15 125
+1 9 3 4 a14 14 125
+1 8 3 4 a13 13 125
+1 7 3 4 a12 12 125
+1 6 3 4 a12 12 125
+1 4 3 4 a11 11 125
+1 3 3 4 a11 11 125
+1 1 3 4 a10 10 125
+SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version
+FROM link_table FORCE INDEX (id1_type)
+WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10
+ORDER BY TIME DESC LIMIT 10;
+id1 id2 link_type visibility data time version
+1 10 3 4 a15 15 125
+1 9 3 4 a14 14 125
+1 8 3 4 a13 13 125
+1 7 3 4 a12 12 125
+1 6 3 4 a12 12 125
+1 4 3 4 a11 11 125
+1 3 3 4 a11 11 125
+1 1 3 4 a10 10 125
+SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version
+FROM link_table FORCE INDEX (id1_type)
+WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10
+ORDER BY TIME DESC LIMIT 5;
+id1 id2 link_type visibility data time version
+1 10 3 4 a15 15 125
+1 9 3 4 a14 14 125
+1 8 3 4 a13 13 125
+1 7 3 4 a12 12 125
+1 6 3 4 a12 12 125
+SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version
+FROM link_table FORCE INDEX (id1_type)
+WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10
+ORDER BY TIME DESC LIMIT 1;
+id1 id2 link_type visibility data time version
+1 10 3 4 a15 15 125
+SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version
+FROM link_table FORCE INDEX (id1_type)
+WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10
+ORDER BY TIME DESC LIMIT 0;
+id1 id2 link_type visibility data time version
+SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version
+FROM link_table FORCE INDEX (id1_type)
+WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10
+ORDER BY TIME DESC LIMIT 0,10;
+id1 id2 link_type visibility data time version
+1 10 3 4 a15 15 125
+1 9 3 4 a14 14 125
+1 8 3 4 a13 13 125
+1 7 3 4 a12 12 125
+1 6 3 4 a12 12 125
+1 4 3 4 a11 11 125
+1 3 3 4 a11 11 125
+1 1 3 4 a10 10 125
+SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version
+FROM link_table FORCE INDEX (id1_type)
+WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10
+ORDER BY TIME DESC LIMIT 0,5;
+id1 id2 link_type visibility data time version
+1 10 3 4 a15 15 125
+1 9 3 4 a14 14 125
+1 8 3 4 a13 13 125
+1 7 3 4 a12 12 125
+1 6 3 4 a12 12 125
+SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version
+FROM link_table FORCE INDEX (id1_type)
+WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10
+ORDER BY TIME DESC LIMIT 0,1;
+id1 id2 link_type visibility data time version
+1 10 3 4 a15 15 125
+SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version
+FROM link_table FORCE INDEX (id1_type)
+WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10
+ORDER BY TIME DESC LIMIT 1,0;
+id1 id2 link_type visibility data time version
+SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version
+FROM link_table FORCE INDEX (id1_type)
+WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10
+ORDER BY TIME DESC LIMIT 1,10;
+id1 id2 link_type visibility data time version
+1 9 3 4 a14 14 125
+1 8 3 4 a13 13 125
+1 7 3 4 a12 12 125
+1 6 3 4 a12 12 125
+1 4 3 4 a11 11 125
+1 3 3 4 a11 11 125
+1 1 3 4 a10 10 125
+SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version
+FROM link_table FORCE INDEX (id1_type)
+WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10
+ORDER BY TIME DESC LIMIT 1,5;
+id1 id2 link_type visibility data time version
+1 9 3 4 a14 14 125
+1 8 3 4 a13 13 125
+1 7 3 4 a12 12 125
+1 6 3 4 a12 12 125
+1 4 3 4 a11 11 125
+SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version
+FROM link_table FORCE INDEX (id1_type)
+WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10
+ORDER BY TIME DESC LIMIT 1,1;
+id1 id2 link_type visibility data time version
+1 9 3 4 a14 14 125
+SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version
+FROM link_table FORCE INDEX (id1_type)
+WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10
+ORDER BY TIME DESC LIMIT 1,0;
+id1 id2 link_type visibility data time version
+SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version
+FROM link_table FORCE INDEX (id1_type)
+WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10
+ORDER BY TIME DESC LIMIT 5,10;
+id1 id2 link_type visibility data time version
+1 4 3 4 a11 11 125
+1 3 3 4 a11 11 125
+1 1 3 4 a10 10 125
+SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version
+FROM link_table FORCE INDEX (id1_type)
+WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10
+ORDER BY TIME DESC LIMIT 5,5;
+id1 id2 link_type visibility data time version
+1 4 3 4 a11 11 125
+1 3 3 4 a11 11 125
+1 1 3 4 a10 10 125
+SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version
+FROM link_table FORCE INDEX (id1_type)
+WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10
+ORDER BY TIME DESC LIMIT 5,1;
+id1 id2 link_type visibility data time version
+1 4 3 4 a11 11 125
+SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version
+FROM link_table FORCE INDEX (id1_type)
+WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10
+ORDER BY TIME DESC LIMIT 5,0;
+id1 id2 link_type visibility data time version
+SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version
+FROM link_table FORCE INDEX (id1_type)
+WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10
+ORDER BY TIME DESC LIMIT 10,10;
+id1 id2 link_type visibility data time version
+SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version
+FROM link_table FORCE INDEX (id1_type)
+WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10
+ORDER BY TIME DESC LIMIT 10,5;
+id1 id2 link_type visibility data time version
+SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version
+FROM link_table FORCE INDEX (id1_type)
+WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10
+ORDER BY TIME DESC LIMIT 10,1;
+id1 id2 link_type visibility data time version
+SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version
+FROM link_table FORCE INDEX (id1_type)
+WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10
+ORDER BY TIME DESC LIMIT 10,0;
+id1 id2 link_type visibility data time version
+# Prefix range query with PK
+SELECT /*+ bypass */ id1, id2, link_type FROM link_table FORCE INDEX (PRIMARY)
+WHERE link_type=3 and id1=1 ORDER BY id2 DESC;
+id1 id2 link_type
+1 10 3
+1 9 3
+1 8 3
+1 7 3
+1 6 3
+1 5 3
+1 4 3
+1 3 3
+1 2 3
+1 1 3
+SELECT /*+ bypass */ id1, id2, link_type FROM link_table FORCE INDEX (PRIMARY)
+WHERE link_type=3 and id1=1 ORDER BY id2 ASC;
+id1 id2 link_type
+1 1 3
+1 2 3
+1 3 3
+1 4 3
+1 5 3
+1 6 3
+1 7 3
+1 8 3
+1 9 3
+1 10 3
+# Prefix range query with PK + value
+SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version
+FROM link_table FORCE INDEX (PRIMARY)
+WHERE link_type=3 and id1=1 ORDER BY id2 DESC;
+id1 id2 link_type visibility data time version
+1 10 3 4 a15 15 125
+1 9 3 4 a14 14 125
+1 8 3 4 a13 13 125
+1 7 3 4 a12 12 125
+1 6 3 4 a12 12 125
+1 5 3 3 a12 12 125
+1 4 3 4 a11 11 125
+1 3 3 4 a11 11 125
+1 2 3 3 a10 10 125
+1 1 3 4 a10 10 125
+SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version
+FROM link_table FORCE INDEX (PRIMARY)
+WHERE link_type=3 and id1=1 ORDER BY id2 ASC;
+id1 id2 link_type visibility data time version
+1 1 3 4 a10 10 125
+1 2 3 3 a10 10 125
+1 3 3 4 a11 11 125
+1 4 3 4 a11 11 125
+1 5 3 3 a12 12 125
+1 6 3 4 a12 12 125
+1 7 3 4 a12 12 125
+1 8 3 4 a13 13 125
+1 9 3 4 a14 14 125
+1 10 3 4 a15 15 125
+# Transaction
+BEGIN;
+SELECT /*+ bypass */ id1,id2,id1_type,id2_type,data,version from link_table
+WHERE id1=1 and id2=2 and link_type=3;
+id1 id2 id1_type id2_type data version
+1 2 1 2 a10 125
+UPDATE link_table set data="bcd" WHERE id1=1 and id2=2 and link_type = 3;
+SELECT /*+ bypass */ id1,id2,id1_type,id2_type,data,version from link_table
+WHERE id1=1 and id2=2 and link_type=3;
+id1 id2 id1_type id2_type data version
+1 2 1 2 bcd 125
+COMMIT;
+BEGIN;
+SELECT /*+ bypass */ id1,id2,id1_type,id2_type,data,version from link_table
+WHERE id1=1 and id2=2 and link_type=3;
+id1 id2 id1_type id2_type data version
+1 2 1 2 bcd 125
+UPDATE link_table set data="cde" WHERE id1=1 and id2=2 and link_type = 3;
+SELECT /*+ bypass */ id1,id2,id1_type,id2_type,data,version from link_table
+WHERE id1=1 and id2=2 and link_type=3;
+id1 id2 id1_type id2_type data version
+1 2 1 2 cde 125
+ROLLBACK;
+SELECT /*+ bypass */ id1,id2,id1_type,id2_type,data,version from link_table
+WHERE id1=1 and id2=2 and link_type=3;
+id1 id2 id1_type id2_type data version
+1 2 1 2 bcd 125
+# Data types
+SELECT /*+ bypass */ id1 FROM link_table where link_type="3";
+id1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+3
+3
+3
+3
+3
+3
+3
+3
+3
+3
+SELECT /*+ bypass */ id1 FROM link_table where link_type="3" AND id1="1";
+id1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+SELECT /*+ bypass */ id1 FROM link_table where link_type="3" AND id1=True;
+id1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+SELECT /*+ bypass */ id1 FROM link_table where link_type="3" AND id1=b'1';
+id1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+SELECT /*+ bypass */ id1 FROM link_table where link_type="3" AND id1=x'01';
+id1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+SELECT /*+ bypass */ id1 FROM link_table where link_type="3" AND id1=NULL;
+id1
+DROP TABLE count_table;
+DROP TABLE link_table;
+DROP TABLE link_table3;
+DROP TABLE link_table2;
+DROP TABLE id_table;
+DROP TABLE node_table;
+DROP TABLE link_table5;
+DROP TABLE link_table6;
+DROP TABLE link_table4;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/bytes_written.result b/storage/rocksdb/mysql-test/rocksdb/r/bytes_written.result
new file mode 100644
index 00000000000..d9d29e6ac69
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/bytes_written.result
@@ -0,0 +1,10 @@
+DROP TABLE IF EXISTS stats_test_table;
+CREATE TABLE stats_test_table (a INT, b INT, PRIMARY KEY (a)) ENGINE=ROCKSDB;
+SET GLOBAL rocksdb_perf_context_level=3;
+INSERT INTO stats_test_table VALUES (7,1);
+INSERT INTO stats_test_table VALUES (2,2);
+SELECT io_write_bytes > 0 FROM INFORMATION_SCHEMA.TABLE_STATISTICS WHERE TABLE_NAME = "stats_test_table";
+io_write_bytes > 0
+1
+DROP TABLE stats_test_table;
+SET GLOBAL rocksdb_perf_context_level=DEFAULT;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/cardinality.result b/storage/rocksdb/mysql-test/rocksdb/r/cardinality.result
new file mode 100644
index 00000000000..d037c636a16
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/cardinality.result
@@ -0,0 +1,100 @@
+CREATE TABLE t0 (id int PRIMARY KEY, a int, INDEX ix_a (a)) engine=rocksdb;
+insert into t0 values (0, 0),(1, 1),(2, 2),(3, 3),(4, 4),
+(5, 4),(6, 4),(7, 4),(8, 4),(9, 4);
+SELECT cardinality FROM information_schema.statistics where table_name="t0" and
+column_name="id";
+cardinality
+NULL
+SELECT cardinality FROM information_schema.statistics where table_name="t0" and
+column_name="a";
+cardinality
+NULL
+ANALYZE TABLE t0;
+SELECT table_rows into @N FROM information_schema.tables
+WHERE table_name = "t0";
+SELECT FLOOR(@N/cardinality) FROM
+information_schema.statistics where table_name="t0" and column_name="id";
+FLOOR(@N/cardinality)
+1
+SELECT FLOOR(@N/cardinality) FROM
+information_schema.statistics where table_name="t0" and column_name="a";
+FLOOR(@N/cardinality)
+2
+SET GLOBAL rocksdb_force_flush_memtable_now = 1;
+ANALYZE TABLE t0;
+SELECT table_rows into @N FROM information_schema.tables
+WHERE table_name = "t0";
+SELECT FLOOR(@N/cardinality) FROM
+information_schema.statistics where table_name="t0" and column_name="id";
+FLOOR(@N/cardinality)
+1
+SELECT FLOOR(@N/cardinality) FROM
+information_schema.statistics where table_name="t0" and column_name="a";
+FLOOR(@N/cardinality)
+2
+drop table t0;
+DROP TABLE IF EXISTS t1,t10,t11;
+create table t1(
+id bigint not null primary key,
+i1 bigint, #unique
+i2 bigint, #repeating
+c1 varchar(20), #unique
+c2 varchar(20), #repeating
+index t1_1(id, i1),
+index t1_2(i1, i2),
+index t1_3(i2, i1),
+index t1_4(c1, c2),
+index t1_5(c2, c1)
+) engine=rocksdb;
+optimize table t1;
+Table Op Msg_type Msg_text
+test.t1 optimize status OK
+show index in t1;
+Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment Index_comment
+t1 0 PRIMARY 1 id A 100000 NULL NULL LSMTREE
+t1 1 t1_1 1 id A 100000 NULL NULL LSMTREE
+t1 1 t1_1 2 i1 A 100000 NULL NULL YES LSMTREE
+t1 1 t1_2 1 i1 A 100000 NULL NULL YES LSMTREE
+t1 1 t1_2 2 i2 A 100000 NULL NULL YES LSMTREE
+t1 1 t1_3 1 i2 A 11111 NULL NULL YES LSMTREE
+t1 1 t1_3 2 i1 A 100000 NULL NULL YES LSMTREE
+t1 1 t1_4 1 c1 A 100000 NULL NULL YES LSMTREE
+t1 1 t1_4 2 c2 A 100000 NULL NULL YES LSMTREE
+t1 1 t1_5 1 c2 A 11111 NULL NULL YES LSMTREE
+t1 1 t1_5 2 c1 A 100000 NULL NULL YES LSMTREE
+SELECT table_name, table_rows FROM information_schema.tables WHERE table_schema = DATABASE();
+table_name table_rows
+t1 100000
+restarting...
+show index in t1;
+Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment Index_comment
+t1 0 PRIMARY 1 id A 100000 NULL NULL LSMTREE
+t1 1 t1_1 1 id A 100000 NULL NULL LSMTREE
+t1 1 t1_1 2 i1 A 100000 NULL NULL YES LSMTREE
+t1 1 t1_2 1 i1 A 100000 NULL NULL YES LSMTREE
+t1 1 t1_2 2 i2 A 100000 NULL NULL YES LSMTREE
+t1 1 t1_3 1 i2 A 11111 NULL NULL YES LSMTREE
+t1 1 t1_3 2 i1 A 100000 NULL NULL YES LSMTREE
+t1 1 t1_4 1 c1 A 100000 NULL NULL YES LSMTREE
+t1 1 t1_4 2 c2 A 100000 NULL NULL YES LSMTREE
+t1 1 t1_5 1 c2 A 11111 NULL NULL YES LSMTREE
+t1 1 t1_5 2 c1 A 100000 NULL NULL YES LSMTREE
+SELECT table_name, table_rows FROM information_schema.tables WHERE table_schema = DATABASE();
+table_name table_rows
+t1 100000
+CREATE TABLE t2 (a INT, b INT, c INT, d INT, e INT, f INT, g INT,
+PRIMARY KEY (a), KEY (c, b, a, d, e, f, g))
+ENGINE=ROCKSDB;
+SET GLOBAL rocksdb_force_flush_memtable_now = 1;
+ANALYZE TABLE t2;
+Table Op Msg_type Msg_text
+test.t2 analyze status OK
+cardinality of the columns after 'a' must be equal to the cardinality of column 'a'
+SELECT CARDINALITY INTO @c FROM information_schema.statistics WHERE TABLE_NAME='t2' AND INDEX_NAME='c' AND COLUMN_NAME='a';
+SELECT COLUMN_NAME, CARDINALITY = @c FROM information_schema.statistics WHERE TABLE_NAME='t2' AND INDEX_NAME='c' AND SEQ_IN_INDEX > 3;
+COLUMN_NAME CARDINALITY = @c
+d 1
+e 1
+f 1
+g 1
+drop table t1, t2;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/check_flags.result b/storage/rocksdb/mysql-test/rocksdb/r/check_flags.result
new file mode 100644
index 00000000000..12c5bc4f85c
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/check_flags.result
@@ -0,0 +1,66 @@
+set debug_sync='RESET';
+set global rocksdb_debug_ttl_read_filter_ts = -10;
+connect conn1, localhost, root,,;
+connection default;
+CREATE TABLE t1 (id INT, value INT, KEY (id), KEY (value)) ENGINE=ROCKSDB;
+CREATE TABLE t2 (id INT, value INT) ENGINE=ROCKSDB;
+CREATE TABLE t3 (id INT, kp1 INT, PRIMARY KEY (id), KEY(kp1)) ENGINE=ROCKSDB COMMENT='ttl_duration=1';
+INSERT INTO t1 VALUES (1,1), (2,2), (3,3), (4,4), (5,5);
+INSERT INTO t2 SELECT * FROM t1;
+INSERT INTO t3 SELECT * FROM t1;
+connection conn1;
+set debug_sync='rocksdb.check_flags_rmi SIGNAL parked WAIT_FOR go';
+SELECT value FROM t1 WHERE value = 3;
+connection default;
+set debug_sync='now WAIT_FOR parked';
+KILL QUERY $conn1_id;
+set debug_sync='now SIGNAL go';
+connection conn1;
+ERROR 70100: Query execution was interrupted
+set debug_sync='RESET';
+connection conn1;
+set debug_sync='rocksdb.check_flags_rmi_scan SIGNAL parked WAIT_FOR go';
+SELECT DISTINCT(id) FROM t1 WHERE value = 5 AND id IN (1, 3, 5);
+connection default;
+set debug_sync='now WAIT_FOR parked';
+KILL QUERY $conn1_id;
+set debug_sync='now SIGNAL go';
+connection conn1;
+ERROR 70100: Query execution was interrupted
+set debug_sync='RESET';
+connection conn1;
+set debug_sync='rocksdb.check_flags_inwd SIGNAL parked WAIT_FOR go';
+SELECT value FROM t1 WHERE value > 3;
+connection default;
+set debug_sync='now WAIT_FOR parked';
+KILL QUERY $conn1_id;
+set debug_sync='now SIGNAL go';
+connection conn1;
+ERROR 70100: Query execution was interrupted
+set debug_sync='RESET';
+connection conn1;
+set debug_sync='rocksdb.check_flags_rnwd SIGNAL parked WAIT_FOR go';
+SELECT id FROM t2;
+connection default;
+set debug_sync='now WAIT_FOR parked';
+KILL QUERY $conn1_id;
+set debug_sync='now SIGNAL go';
+connection conn1;
+ERROR 70100: Query execution was interrupted
+set debug_sync='RESET';
+connection conn1;
+set debug_sync='rocksdb.check_flags_ser SIGNAL parked WAIT_FOR go';
+SELECT kp1 FROM t3 ORDER BY kp1;
+connection default;
+set debug_sync='now WAIT_FOR parked';
+KILL QUERY $conn1_id;
+set debug_sync='now SIGNAL go';
+connection conn1;
+ERROR 70100: Query execution was interrupted
+connection default;
+disconnect conn1;
+set debug_sync='RESET';
+set global rocksdb_debug_ttl_read_filter_ts = DEFAULT;
+DROP TABLE t1;
+DROP TABLE t2;
+DROP TABLE t3;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/check_ignore_unknown_options.result b/storage/rocksdb/mysql-test/rocksdb/r/check_ignore_unknown_options.result
new file mode 100644
index 00000000000..6ff49908a51
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/check_ignore_unknown_options.result
@@ -0,0 +1,7 @@
+select variable_name, variable_value from information_schema.global_variables where variable_name="rocksdb_ignore_unknown_options";
+variable_name variable_value
+ROCKSDB_IGNORE_UNKNOWN_OPTIONS ON
+FOUND 1 /RocksDB: Compatibility check against existing database options failed/ in my_restart.err
+select variable_name, variable_value from information_schema.global_variables where variable_name="rocksdb_ignore_unknown_options";
+variable_name variable_value
+ROCKSDB_IGNORE_UNKNOWN_OPTIONS ON
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/check_table.result b/storage/rocksdb/mysql-test/rocksdb/r/check_table.result
new file mode 100644
index 00000000000..116c168c4da
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/check_table.result
@@ -0,0 +1,68 @@
+DROP TABLE IF EXISTS t1,t2;
+CREATE TABLE t1 (a INT, b CHAR(8), pk INT AUTO_INCREMENT PRIMARY KEY) ENGINE=rocksdb;
+INSERT INTO t1 (a,b) VALUES (1,'a'),(2,'b');
+CREATE TABLE t2 (a INT, b CHAR(8), pk INT AUTO_INCREMENT PRIMARY KEY) ENGINE=rocksdb;
+CHECK TABLE t1;
+Table Op Msg_type Msg_text
+test.t1 check status OK
+INSERT INTO t1 (a,b) VALUES (3,'c');
+INSERT INTO t2 (a,b) VALUES (4,'d');
+CHECK TABLE t1, t2 FOR UPGRADE;
+Table Op Msg_type Msg_text
+test.t1 check status OK
+test.t2 check status OK
+INSERT INTO t2 (a,b) VALUES (5,'e');
+CHECK TABLE t2 QUICK;
+Table Op Msg_type Msg_text
+test.t2 check status OK
+INSERT INTO t1 (a,b) VALUES (6,'f');
+CHECK TABLE t1 FAST;
+Table Op Msg_type Msg_text
+test.t1 check status OK
+INSERT INTO t1 (a,b) VALUES (7,'g');
+INSERT INTO t2 (a,b) VALUES (8,'h');
+CHECK TABLE t2, t1 MEDIUM;
+Table Op Msg_type Msg_text
+test.t2 check status OK
+test.t1 check status OK
+INSERT INTO t1 (a,b) VALUES (9,'i');
+INSERT INTO t2 (a,b) VALUES (10,'j');
+CHECK TABLE t1, t2 EXTENDED;
+Table Op Msg_type Msg_text
+test.t1 check status OK
+test.t2 check status OK
+INSERT INTO t1 (a,b) VALUES (11,'k');
+CHECK TABLE t1 CHANGED;
+Table Op Msg_type Msg_text
+test.t1 check status OK
+DROP TABLE t1, t2;
+CREATE TABLE t1 (a INT, b CHAR(8), pk INT AUTO_INCREMENT PRIMARY KEY, KEY(a)) ENGINE=rocksdb;
+INSERT INTO t1 (a) VALUES (1),(2),(5);
+CHECK TABLE t1;
+Table Op Msg_type Msg_text
+test.t1 check status OK
+INSERT INTO t1 (a) VALUES (6),(8),(12);
+CHECK TABLE t1 FOR UPGRADE;
+Table Op Msg_type Msg_text
+test.t1 check status OK
+INSERT INTO t1 (a) VALUES (13),(15),(16);
+CHECK TABLE t1 QUICK;
+Table Op Msg_type Msg_text
+test.t1 check status OK
+INSERT INTO t1 (a) VALUES (17),(120),(132);
+CHECK TABLE t1 FAST;
+Table Op Msg_type Msg_text
+test.t1 check status OK
+INSERT INTO t1 (a) VALUES (801),(900),(7714);
+CHECK TABLE t1 MEDIUM;
+Table Op Msg_type Msg_text
+test.t1 check status OK
+INSERT INTO t1 (a) VALUES (8760),(10023),(12000);
+CHECK TABLE t1 EXTENDED;
+Table Op Msg_type Msg_text
+test.t1 check status OK
+INSERT INTO t1 (a) VALUES (13345),(24456),(78302),(143028);
+CHECK TABLE t1 CHANGED;
+Table Op Msg_type Msg_text
+test.t1 check status OK
+DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/checkpoint.result b/storage/rocksdb/mysql-test/rocksdb/r/checkpoint.result
new file mode 100644
index 00000000000..fd1ac63629f
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/checkpoint.result
@@ -0,0 +1,59 @@
+DROP TABLE IF EXISTS t1;
+DROP TABLE IF EXISTS t2;
+DROP TABLE IF EXISTS t3;
+DROP TABLE IF EXISTS t4;
+DROP TABLE IF EXISTS t5;
+CREATE TABLE t1 (
+a int not null,
+b int not null,
+primary key (a,b) comment 'cf1',
+key (b) comment 'rev:cf2'
+) ENGINE=RocksDB;
+CREATE TABLE t2 (
+a int not null,
+b int not null,
+primary key (a,b) comment 'cf1',
+key (b) comment 'rev:cf2'
+) ENGINE=RocksDB;
+CREATE TABLE t3 (
+a int not null,
+b int not null,
+primary key (a,b) comment 'cf1',
+key (b) comment 'rev:cf2'
+) ENGINE=RocksDB;
+CREATE TABLE t4 (
+a int not null,
+b int not null,
+primary key (a,b) comment 'cf1',
+key (b) comment 'rev:cf2'
+) ENGINE=RocksDB;
+DELETE FROM t1;
+DELETE FROM t2;
+DELETE FROM t3;
+DELETE FROM t4;
+CREATE TABLE t5 (
+a int not null,
+b int not null,
+primary key (a,b) comment 'cf1',
+key (b) comment 'rev:cf2'
+) ENGINE=RocksDB;
+DELETE FROM t5;
+SET GLOBAL ROCKSDB_CREATE_CHECKPOINT = '[CHECKPOINT]';
+CURRENT
+SET GLOBAL ROCKSDB_CREATE_CHECKPOINT = '[CHECKPOINT]';
+CURRENT
+truncate table t1;
+optimize table t1;
+truncate table t2;
+optimize table t2;
+truncate table t3;
+optimize table t3;
+truncate table t4;
+optimize table t4;
+truncate table t5;
+optimize table t5;
+drop table if exists t1;
+drop table if exists t2;
+drop table if exists t3;
+drop table if exists t4;
+drop table if exists t5;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/checksum_table.result b/storage/rocksdb/mysql-test/rocksdb/r/checksum_table.result
new file mode 100644
index 00000000000..bb209856a97
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/checksum_table.result
@@ -0,0 +1,92 @@
+DROP TABLE IF EXISTS t1,t2;
+CREATE TABLE t1 (a INT PRIMARY KEY, b CHAR(8)) ENGINE=rocksdb CHECKSUM=0;
+INSERT INTO t1 (a,b) VALUES (1,'a'),(2,'b');
+CREATE TABLE t2 (a INT PRIMARY KEY, b CHAR(8)) ENGINE=rocksdb CHECKSUM=0;
+CHECKSUM TABLE t1;
+Table Checksum
+test.t1 4259194219
+CHECKSUM TABLE t2, t1;
+Table Checksum
+test.t2 0
+test.t1 4259194219
+CHECKSUM TABLE t1, t2 QUICK;
+Table Checksum
+test.t1 NULL
+test.t2 NULL
+CHECKSUM TABLE t1, t2 EXTENDED;
+Table Checksum
+test.t1 4259194219
+test.t2 0
+DROP TABLE t1, t2;
+#
+# Issue #110: SQL command checksum returns inconsistent result
+#
+create table t1 (pk int primary key, col1 varchar(10)) engine=rocksdb;
+insert into t1 values (2,'fooo');
+insert into t1 values (1,NULL);
+checksum table t1;
+Table Checksum
+test.t1 1303411884
+checksum table t1;
+Table Checksum
+test.t1 1303411884
+select * from t1 where pk=2;
+pk col1
+2 fooo
+checksum table t1;
+Table Checksum
+test.t1 1303411884
+checksum table t1;
+Table Checksum
+test.t1 1303411884
+flush tables;
+checksum table t1;
+Table Checksum
+test.t1 1303411884
+checksum table t1;
+Table Checksum
+test.t1 1303411884
+drop table t1;
+#
+# The following test is about making sure MyRocks CHECKSUM TABLE
+# values are the same as with InnoDB.
+# If you see checksum values changed, make sure their counterparts
+# in suite/innodb/r/checksum-matches-myrocks.result match.
+#
+create table t1 (pk int primary key, col1 varchar(10)) engine=rocksdb;
+insert into t1 values (2,'fooo');
+insert into t1 values (1,NULL);
+checksum table t1;
+Table Checksum
+test.t1 1303411884
+drop table t1;
+create table t1 (
+pk bigint unsigned primary key,
+col1 varchar(10),
+col2 tinyint,
+col3 double
+) engine=rocksdb;
+# MariaDB has changed the checksumming algorithm
+# Enable the old algorithm:
+set @tmp_old=@@old;
+set old=1;
+checksum table t1;
+Table Checksum
+test.t1 0
+insert into t1 values (1, NULL, NULL, NULL);
+insert into t1 values (2, 'foo', NULL, NULL);
+checksum table t1;
+Table Checksum
+test.t1 3633741545
+insert into t1 values (3, NULL, 123, NULL);
+insert into t1 values (4, NULL, NULL, 2.78);
+checksum table t1;
+Table Checksum
+test.t1 390004011
+insert into t1 values (5, 'xxxYYYzzzT', NULL, 2.78);
+insert into t1 values (6, '', NULL, 2.78);
+checksum table t1;
+Table Checksum
+test.t1 3183101003
+set old=@tmp_old;
+drop table t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/checksum_table_live.result b/storage/rocksdb/mysql-test/rocksdb/r/checksum_table_live.result
new file mode 100644
index 00000000000..fb86c0af260
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/checksum_table_live.result
@@ -0,0 +1,20 @@
+DROP TABLE IF EXISTS t1,t2;
+CREATE TABLE t1 (a INT PRIMARY KEY, b CHAR(8)) ENGINE=rocksdb CHECKSUM=1;
+INSERT INTO t1 (a,b) VALUES (1,'a'),(2,'b');
+CREATE TABLE t2 (a INT PRIMARY KEY, b CHAR(8)) ENGINE=rocksdb CHECKSUM=1;
+CHECKSUM TABLE t1;
+Table Checksum
+test.t1 4259194219
+CHECKSUM TABLE t2, t1;
+Table Checksum
+test.t2 0
+test.t1 4259194219
+CHECKSUM TABLE t1, t2 QUICK;
+Table Checksum
+test.t1 NULL
+test.t2 NULL
+CHECKSUM TABLE t1, t2 EXTENDED;
+Table Checksum
+test.t1 4259194219
+test.t2 0
+DROP TABLE t1, t2;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/col_opt_default.result b/storage/rocksdb/mysql-test/rocksdb/r/col_opt_default.result
new file mode 100644
index 00000000000..af53f061753
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/col_opt_default.result
@@ -0,0 +1,20 @@
+DROP TABLE IF EXISTS t1;
+CREATE TABLE t1 (a INT PRIMARY KEY DEFAULT '0') ENGINE=rocksdb;
+SHOW COLUMNS IN t1;
+Field Type Null Key Default Extra
+a int(11) NO PRI 0
+INSERT INTO t1 (a) VALUES (1);
+SELECT a FROM t1;
+a
+1
+ALTER TABLE t1 ADD COLUMN b CHAR(8) DEFAULT '';
+SHOW COLUMNS IN t1;
+Field Type Null Key Default Extra
+a int(11) NO PRI 0
+b char(8) YES
+INSERT INTO t1 (b) VALUES ('a');
+SELECT a,b FROM t1 ORDER BY a,b;
+a b
+0 a
+1
+DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/col_opt_not_null.result b/storage/rocksdb/mysql-test/rocksdb/r/col_opt_not_null.result
new file mode 100644
index 00000000000..64d87b7116d
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/col_opt_not_null.result
@@ -0,0 +1,2613 @@
+########################
+# BINARY columns
+########################
+DROP TABLE IF EXISTS t1;
+CREATE TABLE t1 (
+b BINARY NOT NULL,
+b0 BINARY(0) NOT NULL,
+b1 BINARY(1) NOT NULL,
+b20 BINARY(20) NOT NULL,
+b255 BINARY(255) NOT NULL,
+pk BINARY PRIMARY KEY
+) ENGINE=rocksdb;
+SHOW COLUMNS IN t1;
+Field Type Null Key Default Extra
+b binary(1) NO NULL
+b0 binary(0) NO NULL
+b1 binary(1) NO NULL
+b20 binary(20) NO NULL
+b255 binary(255) NO NULL
+pk binary(1) NO PRI NULL
+INSERT INTO t1 VALUES ('','','','','','');
+INSERT INTO t1 VALUES ('a','','b','abcdefghi klmnopqrst', 'Creating an article for the Knowledgebase is similar to asking questions. First, navigate to the category where you feel the article should be. Once there, double check that an article doesn\'t already exist which would work.','a');
+SELECT HEX(b), HEX(b0), HEX(b1), HEX(b20), HEX(b255), HEX(pk) FROM t1 ORDER BY pk;
+HEX(b) HEX(b0) HEX(b1) HEX(b20) HEX(b255) HEX(pk)
+00 00 0000000000000000000000000000000000000000 000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 00
+61 62 616263646566676869206B6C6D6E6F7071727374 4372656174696E6720616E2061727469636C6520666F7220746865204B6E6F776C65646765626173652069732073696D696C617220746F2061736B696E67207175657374696F6E732E2046697273742C206E6176696761746520746F207468652063617465676F727920776865726520796F75206665656C207468652061727469636C652073686F756C642062652E204F6E63652074686572652C20646F75626C6520636865636B207468617420616E2061727469636C6520646F65736E277420616C726561647920657869737420776869636820776F756C6420776F726B2E00000000000000000000000000000000000000000000000000000000000000 61
+INSERT INTO t1 VALUES ('abc', 'a', 'abc', REPEAT('a',21), REPEAT('x',256),'b');
+Warnings:
+Warning 1265 Data truncated for column 'b' at row 1
+Warning 1265 Data truncated for column 'b0' at row 1
+Warning 1265 Data truncated for column 'b1' at row 1
+Warning 1265 Data truncated for column 'b20' at row 1
+Warning 1265 Data truncated for column 'b255' at row 1
+INSERT INTO t1 SELECT b255, b255, b255, b255, CONCAT('a',b255,b255), 'c' FROM t1;
+ERROR 23000: Duplicate entry 'c' for key 'PRIMARY'
+SELECT HEX(b), HEX(b0), HEX(b1), HEX(b20), HEX(b255), HEX(pk) FROM t1 ORDER BY pk;
+HEX(b) HEX(b0) HEX(b1) HEX(b20) HEX(b255) HEX(pk)
+00 00 0000000000000000000000000000000000000000 000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 00
+61 61 6161616161616161616161616161616161616161 787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878 62
+61 62 616263646566676869206B6C6D6E6F7071727374 4372656174696E6720616E2061727469636C6520666F7220746865204B6E6F776C65646765626173652069732073696D696C617220746F2061736B696E67207175657374696F6E732E2046697273742C206E6176696761746520746F207468652063617465676F727920776865726520796F75206665656C207468652061727469636C652073686F756C642062652E204F6E63652074686572652C20646F75626C6520636865636B207468617420616E2061727469636C6520646F65736E277420616C726561647920657869737420776869636820776F756C6420776F726B2E00000000000000000000000000000000000000000000000000000000000000 61
+ALTER TABLE t1 ADD COLUMN b257 BINARY(257) NOT NULL;
+ERROR 42000: Column length too big for column 'b257' (max = 255); use BLOB or TEXT instead
+SHOW COLUMNS IN t1;
+Field Type Null Key Default Extra
+b binary(1) NO NULL
+b0 binary(0) NO NULL
+b1 binary(1) NO NULL
+b20 binary(20) NO NULL
+b255 binary(255) NO NULL
+pk binary(1) NO PRI NULL
+DROP TABLE t1;
+DROP TABLE IF EXISTS t1;
+#----------------------------------
+# BINARY NOT NULL columns without a default
+#----------------------------------
+CREATE TABLE t1 (pk INT AUTO_INCREMENT PRIMARY KEY, c BINARY NOT NULL) ENGINE=rocksdb;
+SHOW COLUMNS IN t1;
+Field Type Null Key Default Extra
+pk int(11) NO PRI NULL auto_increment
+c binary(1) NO NULL
+INSERT INTO t1 (c) VALUES (NULL);
+ERROR 23000: Column 'c' cannot be null
+INSERT INTO t1 (c) VALUES (0);
+SELECT HEX(c) FROM t1;
+HEX(c)
+30
+DROP TABLE t1;
+#----------------------------------
+# BINARY NOT NULL columns with a default
+#----------------------------------
+CREATE TABLE t1 (
+pk INT AUTO_INCREMENT PRIMARY KEY,
+c BINARY NOT NULL DEFAULT 0
+) ENGINE=rocksdb;
+SHOW COLUMNS IN t1;
+Field Type Null Key Default Extra
+pk int(11) NO PRI NULL auto_increment
+c binary(1) NO 0
+ALTER TABLE t1 ADD COLUMN err BINARY NOT NULL DEFAULT NULL;
+ERROR 42000: Invalid default value for 'err'
+INSERT INTO t1 (c) VALUES (NULL);
+ERROR 23000: Column 'c' cannot be null
+INSERT INTO t1 (c) VALUES (0);
+INSERT INTO t1 () VALUES ();
+SELECT pk, HEX(c) FROM t1 ORDER BY pk;
+pk HEX(c)
+1 30
+2 30
+DROP TABLE t1;
+########################
+# VARBINARY columns
+########################
+DROP TABLE IF EXISTS t1, t2;
+CREATE TABLE t1 (
+v0 VARBINARY(0) NOT NULL,
+v1 VARBINARY(1) NOT NULL,
+v64 VARBINARY(64) NOT NULL,
+v65000 VARBINARY(65000) NOT NULL,
+PRIMARY KEY (v64)
+) ENGINE=rocksdb;
+SHOW COLUMNS IN t1;
+Field Type Null Key Default Extra
+v0 varbinary(0) NO NULL
+v1 varbinary(1) NO NULL
+v64 varbinary(64) NO PRI NULL
+v65000 varbinary(65000) NO NULL
+CREATE TABLE t2 (v VARBINARY(65532) NOT NULL, PRIMARY KEY(v(255))) ENGINE=rocksdb;
+SHOW COLUMNS IN t2;
+Field Type Null Key Default Extra
+v varbinary(65532) NO PRI NULL
+INSERT INTO t1 (v0,v1,v64,v65000) VALUES ('','','','');
+INSERT INTO t1 (v0,v1,v64,v65000) VALUES ('','y','Once there, double check that an article doesn\'t already exist','Here is a list of recommended books on MariaDB and MySQL. We\'ve provided links to Amazon.com here for convenience, but they can be found at many other bookstores, both online and off.
+
+ If you want to have your favorite MySQL / MariaDB book listed here, please leave a comment.
+ For developers who want to code on MariaDB or MySQL
+
+ * Understanding MySQL Internals by Sasha Pachev, former MySQL developer at MySQL AB.
+ o This is the only book we know about that describes the internals of MariaDB / MySQL. A must have for anyone who wants to understand and develop on MariaDB!
+ o Not all topics are covered and some parts are slightly outdated, but still the best book on this topic.
+ * MySQL 5.1 Plugin Development by Sergei Golubchik and Andrew Hutchings
+ o A must read for anyone wanting to write a plugin for MariaDB, written by the Sergei who designed the plugin interface for MySQL and MariaDB!
+
+ For MariaDB / MySQL end users
+
+ * MariaDB Crash Course by Ben Forta
+ o First MariaDB book!
+ o For people who want to learn SQL and the basics of MariaDB.
+ o Now shipping. Purchase at Amazon.com or your favorite bookseller.
+
+ * SQL-99 Complete, Really by Peter Gulutzan & Trudy Pelzer.
+ o Everything you wanted to know about the SQL 99 standard. Excellent reference book!
+ o Free to read in the Knowledgebase!
+
+ * MySQL (4th Edition) by Paul DuBois
+ o The \'default\' book to read if you wont to learn to use MySQL / MariaDB.
+
+ * MySQL Cookbook by Paul DuBois
+ o A lot of examples of how to use MySQL. As with all of Paul\'s books, it\'s worth its weight in gold and even enjoyable reading for such a \'dry\' subject.
+
+ * High Performance MySQL, Second Edition, By Baron Schwartz, Peter Zaitsev, Vadim Tkachenko, Jeremy D. Zawodny, Arjen Lentz, Derek J. Balling, et al.
+ o \"High Performance MySQL is the definitive guide to building fast, reliable systems with MySQL. Written by noted experts with years of real-world experience building very large systems, this book covers every aspect of MySQL performance in detail, and focuses on robustness, security, and data integrity. Learn advanced techniques in depth so you can bring out MySQL\'s full power.\" (From the book description at O\'Reilly)
+
+ * MySQL Admin Cookbook
+ o A quick step-by-step guide for MySQL users and database administrators to tackle real-world challenges with MySQL configuration and administration
+
+ * MySQL 5.0 Certification Study Guide, By Paul DuBois, Stefan Hinz, Carsten Pedersen
+ o This is the official guide to cover the passing of the two MySQL Certification examinations. It is valid till version 5.0 of the server, so while it misses all the features available in MySQL 5.1 and greater (including MariaDB 5.1 and greater), it provides a good basic understanding of MySQL for the end-user. ');
+SELECT HEX(v0), HEX(v1), HEX(v64), HEX(v65000) FROM t1;
+HEX(v0) HEX(v1) HEX(v64) HEX(v65000)
+
+ 79 4F6E63652074686572652C20646F75626C6520636865636B207468617420616E2061727469636C6520646F65736E277420616C7265616479206578697374 486572652069732061206C697374206F66207265636F6D6D656E64656420626F6F6B73206F6E204D61726961444220616E64204D7953514C2E2057652776652070726F7669646564206C696E6B7320746F20416D617A6F6E2E636F6D206865726520666F7220636F6E76656E69656E63652C2062757420746865792063616E20626520666F756E64206174206D616E79206F7468657220626F6F6B73746F7265732C20626F7468206F6E6C696E6520616E64206F66662E0A0A2020496620796F752077616E7420746F206861766520796F7572206661766F72697465204D7953514C202F204D61726961444220626F6F6B206C697374656420686572652C20706C65617365206C65617665206120636F6D6D656E742E0A2020466F7220646576656C6F706572732077686F2077616E7420746F20636F6465206F6E204D617269614442206F72204D7953514C0A0A2020202020202A20556E6465727374616E64696E67204D7953514C20496E7465726E616C73206279205361736861205061636865762C20666F726D6572204D7953514C20646576656C6F706572206174204D7953514C2041422E0A2020202020202020202020206F205468697320697320746865206F6E6C7920626F6F6B207765206B6E6F772061626F75742074686174206465736372696265732074686520696E7465726E616C73206F66204D617269614442202F204D7953514C2E2041206D757374206861766520666F7220616E796F6E652077686F2077616E747320746F20756E6465727374616E6420616E6420646576656C6F70206F6E204D617269614442210A2020202020202020202020206F204E6F7420616C6C20746F706963732061726520636F766572656420616E6420736F6D652070617274732061726520736C696768746C79206F757464617465642C20627574207374696C6C20746865206265737420626F6F6B206F6E207468697320746F7069632E200A2020202020202A204D7953514C20352E3120506C7567696E20446576656C6F706D656E742062792053657267656920476F6C75626368696B20616E6420416E64726577204875746368696E67730A2020202020202020202020206F2041206D757374207265616420666F7220616E796F6E652077616E74696E6720746F207772697465206120706C7567696E20666F72204D6172696144422C207772697474656E20627920746865205365726765692077686F2064657369676E65642074686520706C7567696E20696E7465726661636520666F72204D7953514C20616E64204D61726961444221200A0A2020466F72204D617269614442202F204D7953514C20656E642075736572730A0A2020202020202A204D61726961444220437261736820436F757273652062792042656E20466F7274610A2020202020202020202020206F204669727374204D61726961444220626F6F6B210A2020202020202020202020206F20466F722070656F706C652077686F2077616E7420746F206C6561726E2053514C20616E642074686520626173696373206F66204D6172696144422E0A2020202020202020202020206F204E6F77207368697070696E672E20507572636861736520617420416D617A6F6E2E636F6D206F7220796F7572206661766F7269746520626F6F6B73656C6C65722E200A0A2020202020202A2053514C2D393920436F6D706C6574652C205265616C6C792062792050657465722047756C75747A616E20262054727564792050656C7A65722E0A2020202020202020202020206F2045766572797468696E6720796F752077616E74656420746F206B6E6F772061626F7574207468652053514C203939207374616E646172642E20457863656C6C656E74207265666572656E636520626F6F6B210A2020202020202020202020206F204672656520746F207265616420696E20746865204B6E6F776C656467656261736521200A0A2020202020202A204D7953514C20283474682045646974696F6E29206279205061756C204475426F69730A2020202020202020202020206F20546865202764656661756C742720626F6F6B20746F207265616420696620796F7520776F6E7420746F206C6561726E20746F20757365204D7953514C202F204D6172696144422E200A0A2020202020202A204D7953514C20436F6F6B626F6F6B206279205061756C204475426F69730A2020202020202020202020206F2041206C6F74206F66206578616D706C6573206F6620686F7720746F20757365204D7953514C2E204173207769746820616C6C206F66205061756C277320626F6F6B732C206974277320776F727468206974732077656967687420696E20676F6C6420616E64206576656E20656E6A6F7961626C652072656164696E6720666F7220737563682061202764727927207375626A6563742E200A0A2020202020202A204869676820506572666F726D616E6365204D7953514C2C205365636F6E642045646974696F6E2C204279204261726F6E20536368776172747A2C205065746572205A6169747365762C20566164696D20546B616368656E6B6F2C204A6572656D7920442E205A61776F646E792C2041726A656E204C656E747A2C20446572656B204A2E2042616C6C696E672C20657420616C2E0A2020202020202020202020206F20224869676820506572666F726D616E6365204D7953514C2069732074686520646566696E697469766520677569646520746F206275696C64696E6720666173742C2072656C6961626C652073797374656D732077697468204D7953514C2E205772697474656E206279206E6F74656420657870657274732077697468207965617273206F66207265616C2D776F726C6420657870657269656E6365206275696C64696E672076657279206C617267652073797374656D732C207468697320626F6F6B20636F7665727320657665727920617370656374206F66204D7953514C20706572666F726D616E636520696E2064657461696C2C20616E6420666F6375736573206F6E20726F627573746E6573732C2073656375726974792C20616E64206461746120696E746567726974792E204C6561726E20616476616E63656420746563686E697175657320696E20646570746820736F20796F752063616E206272696E67206F7574204D7953514C27732066756C6C20706F7765722E22202846726F6D2074686520626F6F6B206465736372697074696F6E206174204F275265696C6C7929200A0A2020202020202A204D7953514C2041646D696E20436F6F6B626F6F6B0A2020202020202020202020206F204120717569636B20737465702D62792D7374657020677569646520666F72204D7953514C20757365727320616E642064617461626173652061646D696E6973747261746F727320746F207461636B6C65207265616C2D776F726C64206368616C6C656E6765732077697468204D7953514C20636F6E66696775726174696F6E20616E642061646D696E697374726174696F6E200A0A2020202020202A204D7953514C20352E302043657274696669636174696F6E2053747564792047756964652C204279205061756C204475426F69732C2053746566616E2048696E7A2C204361727374656E20506564657273656E0A2020202020202020202020206F205468697320697320746865206F6666696369616C20677569646520746F20636F766572207468652070617373696E67206F66207468652074776F204D7953514C2043657274696669636174696F6E206578616D696E6174696F6E732E2049742069732076616C69642074696C6C2076657273696F6E20352E30206F6620746865207365727665722C20736F207768696C65206974206D697373657320616C6C2074686520666561747572657320617661696C61626C6520696E204D7953514C20352E3120616E6420677265617465722028696E636C7564696E67204D61726961444220352E3120616E642067726561746572292C2069742070726F7669646573206120676F6F6420626173696320756E6465727374616E64696E67206F66204D7953514C20666F722074686520656E642D757365722E20
+INSERT INTO t1 (v0,v1,v64,v65000) VALUES ('y', 'yy', REPEAT('c',65), REPEAT('abcdefghi ',6501));
+Warnings:
+Warning 1265 Data truncated for column 'v0' at row 1
+Warning 1265 Data truncated for column 'v1' at row 1
+Warning 1265 Data truncated for column 'v64' at row 1
+Warning 1265 Data truncated for column 'v65000' at row 1
+INSERT INTO t1 (v0,v1,v64,v65000) SELECT v65000, v65000, CONCAT('a',v65000), CONCAT(v65000,v1) FROM t1;
+Warnings:
+Warning 1265 Data truncated for column 'v0' at row 5
+Warning 1265 Data truncated for column 'v1' at row 5
+Warning 1265 Data truncated for column 'v64' at row 5
+Warning 1265 Data truncated for column 'v0' at row 6
+Warning 1265 Data truncated for column 'v1' at row 6
+Warning 1265 Data truncated for column 'v64' at row 6
+Warning 1265 Data truncated for column 'v65000' at row 6
+SELECT HEX(v0), HEX(v1), HEX(v64), LENGTH(HEX(v65000)) FROM t1;
+HEX(v0) HEX(v1) HEX(v64) LENGTH(HEX(v65000))
+ 0
+ 61 0
+ 48 61486572652069732061206C697374206F66207265636F6D6D656E64656420626F6F6B73206F6E204D61726961444220616E64204D7953514C2E205765277665 5932
+ 61 61616263646566676869206162636465666768692061626364656667686920616263646566676869206162636465666768692061626364656667686920616263 130000
+ 79 4F6E63652074686572652C20646F75626C6520636865636B207468617420616E2061727469636C6520646F65736E277420616C7265616479206578697374 5930
+ 79 63636363636363636363636363636363636363636363636363636363636363636363636363636363636363636363636363636363636363636363636363636363 130000
+ALTER TABLE t1 ADD COLUMN v65536 VARBINARY(65536) NOT NULL;
+Warnings:
+Note 1246 Converting column 'v65536' from VARBINARY to BLOB
+SHOW COLUMNS IN t1;
+Field Type Null Key Default Extra
+v0 varbinary(0) NO NULL
+v1 varbinary(1) NO NULL
+v64 varbinary(64) NO PRI NULL
+v65000 varbinary(65000) NO NULL
+v65536 mediumblob NO NULL
+DROP TABLE t1, t2;
+DROP TABLE IF EXISTS t1;
+#----------------------------------
+# VARBINARY(64) NOT NULL columns without a default
+#----------------------------------
+CREATE TABLE t1 (pk INT AUTO_INCREMENT PRIMARY KEY, c VARBINARY(64) NOT NULL) ENGINE=rocksdb;
+SHOW COLUMNS IN t1;
+Field Type Null Key Default Extra
+pk int(11) NO PRI NULL auto_increment
+c varbinary(64) NO NULL
+INSERT INTO t1 (c) VALUES (NULL);
+ERROR 23000: Column 'c' cannot be null
+INSERT INTO t1 (c) VALUES ('test');
+SELECT HEX(c) FROM t1;
+HEX(c)
+74657374
+DROP TABLE t1;
+#----------------------------------
+# VARBINARY(64) NOT NULL columns with a default
+#----------------------------------
+CREATE TABLE t1 (
+pk INT AUTO_INCREMENT PRIMARY KEY,
+c VARBINARY(64) NOT NULL DEFAULT 'test'
+) ENGINE=rocksdb;
+SHOW COLUMNS IN t1;
+Field Type Null Key Default Extra
+pk int(11) NO PRI NULL auto_increment
+c varbinary(64) NO test
+ALTER TABLE t1 ADD COLUMN err VARBINARY(64) NOT NULL DEFAULT NULL;
+ERROR 42000: Invalid default value for 'err'
+INSERT INTO t1 (c) VALUES (NULL);
+ERROR 23000: Column 'c' cannot be null
+INSERT INTO t1 (c) VALUES ('test');
+INSERT INTO t1 () VALUES ();
+SELECT pk, HEX(c) FROM t1 ORDER BY pk;
+pk HEX(c)
+1 74657374
+2 74657374
+DROP TABLE t1;
+########################
+# BIT columns
+########################
+DROP TABLE IF EXISTS t1;
+CREATE TABLE t1 (
+a BIT NOT NULL,
+b BIT(20) NOT NULL,
+c BIT(64) NOT NULL,
+d BIT(1) NOT NULL,
+PRIMARY KEY (c)
+) ENGINE=rocksdb;
+SHOW COLUMNS IN t1;
+Field Type Null Key Default Extra
+a bit(1) NO NULL
+b bit(20) NO NULL
+c bit(64) NO PRI NULL
+d bit(1) NO NULL
+ALTER TABLE t1 DROP COLUMN d;
+ALTER TABLE t1 ADD COLUMN d BIT(0) NOT NULL;
+SHOW COLUMNS IN t1;
+Field Type Null Key Default Extra
+a bit(1) NO NULL
+b bit(20) NO NULL
+c bit(64) NO PRI NULL
+d bit(1) NO NULL
+INSERT INTO t1 (a,b,c,d) VALUES (0,POW(2,20)-1,b'1111111111111111111111111111111111111111111111111111111111111111',1);
+SELECT BIN(a), HEX(b), c+0 FROM t1 WHERE d>0;
+BIN(a) HEX(b) c+0
+0 FFFFF 18446744073709551615
+INSERT INTO t1 (a,b,c,d) VALUES (1,0,-2,0);
+SELECT a+0, b+0, c+0 FROM t1 WHERE d<100;
+a+0 b+0 c+0
+0 1048575 18446744073709551615
+1 0 18446744073709551614
+INSERT INTO t1 (a,b,c,d) VALUES (b'1', 'f', 0xFF, 0x0);
+SELECT a+0, b+0, c+0 FROM t1 WHERE d IN (0, 2);
+a+0 b+0 c+0
+1 0 18446744073709551614
+1 102 255
+DELETE FROM t1;
+INSERT INTO t1 (a,b,c,d) VALUES (0x10,0,0,1);
+Warnings:
+Warning 1264 Out of range value for column 'a' at row 1
+SELECT a+0,b+0,c+0,d+0 FROM t1;
+a+0 b+0 c+0 d+0
+1 0 0 1
+INSERT INTO t1 (a,b,c,d) VALUES (0x01,0,0x10000000000000000,0);
+Warnings:
+Warning 1264 Out of range value for column 'c' at row 1
+SELECT a+0,b+0,c+0,d+0 FROM t1;
+a+0 b+0 c+0 d+0
+1 0 0 1
+1 0 18446744073709551615 0
+DROP TABLE t1;
+CREATE TABLE t1 (pk INT PRIMARY KEY, a BIT(65) NOT NULL) ENGINE=rocksdb;
+ERROR 42000: Display width out of range for 'a' (max = 64)
+DROP TABLE IF EXISTS t1;
+#----------------------------------
+# BIT NOT NULL columns without a default
+#----------------------------------
+CREATE TABLE t1 (pk INT AUTO_INCREMENT PRIMARY KEY, c BIT NOT NULL) ENGINE=rocksdb;
+SHOW COLUMNS IN t1;
+Field Type Null Key Default Extra
+pk int(11) NO PRI NULL auto_increment
+c bit(1) NO NULL
+INSERT INTO t1 (c) VALUES (NULL);
+ERROR 23000: Column 'c' cannot be null
+INSERT INTO t1 (c) VALUES (1);
+SELECT HEX(c) FROM t1;
+HEX(c)
+1
+DROP TABLE t1;
+#----------------------------------
+# BIT NOT NULL columns with a default
+#----------------------------------
+CREATE TABLE t1 (
+pk INT AUTO_INCREMENT PRIMARY KEY,
+c BIT NOT NULL DEFAULT 1
+) ENGINE=rocksdb;
+SHOW COLUMNS IN t1;
+Field Type Null Key Default Extra
+pk int(11) NO PRI NULL auto_increment
+c bit(1) NO b'1'
+ALTER TABLE t1 ADD COLUMN err BIT NOT NULL DEFAULT NULL;
+ERROR 42000: Invalid default value for 'err'
+INSERT INTO t1 (c) VALUES (NULL);
+ERROR 23000: Column 'c' cannot be null
+INSERT INTO t1 (c) VALUES (1);
+INSERT INTO t1 () VALUES ();
+SELECT pk, HEX(c) FROM t1 ORDER BY pk;
+pk HEX(c)
+1 1
+2 1
+DROP TABLE t1;
+########################
+# BLOB columns
+########################
+DROP TABLE IF EXISTS t1;
+CREATE TABLE t1 (
+pk INT AUTO_INCREMENT PRIMARY KEY,
+b BLOB NOT NULL,
+b0 BLOB(0) NOT NULL,
+b1 BLOB(1) NOT NULL,
+b300 BLOB(300) NOT NULL,
+bm BLOB(65535) NOT NULL,
+b70k BLOB(70000) NOT NULL,
+b17m BLOB(17000000) NOT NULL,
+t TINYBLOB NOT NULL,
+m MEDIUMBLOB NOT NULL,
+l LONGBLOB NOT NULL
+) ENGINE=rocksdb;
+SHOW COLUMNS IN t1;
+Field Type Null Key Default Extra
+pk int(11) NO PRI NULL auto_increment
+b blob NO NULL
+b0 blob NO NULL
+b1 tinyblob NO NULL
+b300 blob NO NULL
+bm blob NO NULL
+b70k mediumblob NO NULL
+b17m longblob NO NULL
+t tinyblob NO NULL
+m mediumblob NO NULL
+l longblob NO NULL
+INSERT INTO t1 (b,b0,b1,b300,bm,b70k,b17m,t,m,l) VALUES
+('','','','','','','','','',''),
+('a','b','c','d','e','f','g','h','i','j'),
+('test1','test2','test3','test4','test5','test6','test7','test8','test9','test10'),
+( REPEAT('a',65535), REPEAT('b',65535), REPEAT('c',255), REPEAT('d',65535), REPEAT('e',65535), REPEAT('f',1048576), HEX(REPEAT('g',1048576)), REPEAT('h',255), REPEAT('i',1048576), HEX(REPEAT('j',1048576)) );
+SELECT LENGTH(b), LENGTH(b0), LENGTH(b1), LENGTH(b300), LENGTH(bm), LENGTH(b70k), LENGTH(b17m), LENGTH(t), LENGTH(m), LENGTH(l) FROM t1;
+LENGTH(b) LENGTH(b0) LENGTH(b1) LENGTH(b300) LENGTH(bm) LENGTH(b70k) LENGTH(b17m) LENGTH(t) LENGTH(m) LENGTH(l)
+0 0 0 0 0 0 0 0 0 0
+1 1 1 1 1 1 1 1 1 1
+5 5 5 5 5 5 5 5 5 6
+65535 65535 255 65535 65535 1048576 2097152 255 1048576 2097152
+INSERT INTO t1 (b,b0,b1,b300,bm,b70k,b17m,t,m,l) VALUES
+( REPEAT('a',65536), REPEAT('b',65536), REPEAT('c',256), REPEAT('d',65536), REPEAT('e',65536), REPEAT('f',1048576), REPEAT('g',1048576), REPEAT('h',256), REPEAT('i',1048576), REPEAT('j',1048576) );
+Warnings:
+Warning 1265 Data truncated for column 'b' at row 1
+Warning 1265 Data truncated for column 'b0' at row 1
+Warning 1265 Data truncated for column 'b1' at row 1
+Warning 1265 Data truncated for column 'b300' at row 1
+Warning 1265 Data truncated for column 'bm' at row 1
+Warning 1265 Data truncated for column 't' at row 1
+SELECT LENGTH(b), LENGTH(b0), LENGTH(b1), LENGTH(b300), LENGTH(bm), LENGTH(b70k), LENGTH(b17m), LENGTH(t), LENGTH(m), LENGTH(l) FROM t1;
+LENGTH(b) LENGTH(b0) LENGTH(b1) LENGTH(b300) LENGTH(bm) LENGTH(b70k) LENGTH(b17m) LENGTH(t) LENGTH(m) LENGTH(l)
+0 0 0 0 0 0 0 0 0 0
+1 1 1 1 1 1 1 1 1 1
+5 5 5 5 5 5 5 5 5 6
+65535 65535 255 65535 65535 1048576 1048576 255 1048576 1048576
+65535 65535 255 65535 65535 1048576 2097152 255 1048576 2097152
+ALTER TABLE t1 ADD COLUMN bbb BLOB(4294967296);
+ERROR 42000: Display width out of range for 'bbb' (max = 4294967295)
+DROP TABLE t1;
+DROP TABLE IF EXISTS t1;
+#----------------------------------
+# BLOB NOT NULL columns without a default
+#----------------------------------
+CREATE TABLE t1 (pk INT AUTO_INCREMENT PRIMARY KEY, c BLOB NOT NULL) ENGINE=rocksdb;
+SHOW COLUMNS IN t1;
+Field Type Null Key Default Extra
+pk int(11) NO PRI NULL auto_increment
+c blob NO NULL
+INSERT INTO t1 (c) VALUES (NULL);
+ERROR 23000: Column 'c' cannot be null
+INSERT INTO t1 (c) VALUES ('');
+SELECT HEX(c) FROM t1;
+HEX(c)
+
+DROP TABLE t1;
+#----------------------------------
+# BLOB NOT NULL columns with a default
+#----------------------------------
+CREATE TABLE t1 (
+pk INT AUTO_INCREMENT PRIMARY KEY,
+c BLOB NOT NULL DEFAULT ''
+) ENGINE=rocksdb;
+SHOW COLUMNS IN t1;
+Field Type Null Key Default Extra
+pk int(11) NO PRI NULL auto_increment
+c blob NO ''
+ALTER TABLE t1 ADD COLUMN err BLOB NOT NULL DEFAULT NULL;
+ERROR 42000: Invalid default value for 'err'
+INSERT INTO t1 (c) VALUES (NULL);
+ERROR 23000: Column 'c' cannot be null
+INSERT INTO t1 (c) VALUES ('');
+INSERT INTO t1 () VALUES ();
+SELECT pk, HEX(c) FROM t1 ORDER BY pk;
+pk HEX(c)
+1
+2
+DROP TABLE t1;
+DROP TABLE IF EXISTS t1;
+#----------------------------------
+# TINYBLOB NOT NULL columns without a default
+#----------------------------------
+CREATE TABLE t1 (pk INT AUTO_INCREMENT PRIMARY KEY, c TINYBLOB NOT NULL) ENGINE=rocksdb;
+SHOW COLUMNS IN t1;
+Field Type Null Key Default Extra
+pk int(11) NO PRI NULL auto_increment
+c tinyblob NO NULL
+INSERT INTO t1 (c) VALUES (NULL);
+ERROR 23000: Column 'c' cannot be null
+INSERT INTO t1 (c) VALUES ('');
+SELECT HEX(c) FROM t1;
+HEX(c)
+
+DROP TABLE t1;
+#----------------------------------
+# TINYBLOB NOT NULL columns with a default
+#----------------------------------
+CREATE TABLE t1 (
+pk INT AUTO_INCREMENT PRIMARY KEY,
+c TINYBLOB NOT NULL DEFAULT ''
+) ENGINE=rocksdb;
+SHOW COLUMNS IN t1;
+Field Type Null Key Default Extra
+pk int(11) NO PRI NULL auto_increment
+c tinyblob NO ''
+ALTER TABLE t1 ADD COLUMN err TINYBLOB NOT NULL DEFAULT NULL;
+ERROR 42000: Invalid default value for 'err'
+INSERT INTO t1 (c) VALUES (NULL);
+ERROR 23000: Column 'c' cannot be null
+INSERT INTO t1 (c) VALUES ('');
+INSERT INTO t1 () VALUES ();
+SELECT pk, HEX(c) FROM t1 ORDER BY pk;
+pk HEX(c)
+1
+2
+DROP TABLE t1;
+DROP TABLE IF EXISTS t1;
+#----------------------------------
+# MEDIUMBLOB NOT NULL columns without a default
+#----------------------------------
+CREATE TABLE t1 (pk INT AUTO_INCREMENT PRIMARY KEY, c MEDIUMBLOB NOT NULL) ENGINE=rocksdb;
+SHOW COLUMNS IN t1;
+Field Type Null Key Default Extra
+pk int(11) NO PRI NULL auto_increment
+c mediumblob NO NULL
+INSERT INTO t1 (c) VALUES (NULL);
+ERROR 23000: Column 'c' cannot be null
+INSERT INTO t1 (c) VALUES ('');
+SELECT HEX(c) FROM t1;
+HEX(c)
+
+DROP TABLE t1;
+#----------------------------------
+# MEDIUMBLOB NOT NULL columns with a default
+#----------------------------------
+CREATE TABLE t1 (
+pk INT AUTO_INCREMENT PRIMARY KEY,
+c MEDIUMBLOB NOT NULL DEFAULT ''
+) ENGINE=rocksdb;
+SHOW COLUMNS IN t1;
+Field Type Null Key Default Extra
+pk int(11) NO PRI NULL auto_increment
+c mediumblob NO ''
+ALTER TABLE t1 ADD COLUMN err MEDIUMBLOB NOT NULL DEFAULT NULL;
+ERROR 42000: Invalid default value for 'err'
+INSERT INTO t1 (c) VALUES (NULL);
+ERROR 23000: Column 'c' cannot be null
+INSERT INTO t1 (c) VALUES ('');
+INSERT INTO t1 () VALUES ();
+SELECT pk, HEX(c) FROM t1 ORDER BY pk;
+pk HEX(c)
+1
+2
+DROP TABLE t1;
+DROP TABLE IF EXISTS t1;
+#----------------------------------
+# LONGBLOB NOT NULL columns without a default
+#----------------------------------
+CREATE TABLE t1 (pk INT AUTO_INCREMENT PRIMARY KEY, c LONGBLOB NOT NULL) ENGINE=rocksdb;
+SHOW COLUMNS IN t1;
+Field Type Null Key Default Extra
+pk int(11) NO PRI NULL auto_increment
+c longblob NO NULL
+INSERT INTO t1 (c) VALUES (NULL);
+ERROR 23000: Column 'c' cannot be null
+INSERT INTO t1 (c) VALUES ('');
+SELECT HEX(c) FROM t1;
+HEX(c)
+
+DROP TABLE t1;
+#----------------------------------
+# LONGBLOB NOT NULL columns with a default
+#----------------------------------
+CREATE TABLE t1 (
+pk INT AUTO_INCREMENT PRIMARY KEY,
+c LONGBLOB NOT NULL DEFAULT ''
+) ENGINE=rocksdb;
+SHOW COLUMNS IN t1;
+Field Type Null Key Default Extra
+pk int(11) NO PRI NULL auto_increment
+c longblob NO ''
+ALTER TABLE t1 ADD COLUMN err LONGBLOB NOT NULL DEFAULT NULL;
+ERROR 42000: Invalid default value for 'err'
+INSERT INTO t1 (c) VALUES (NULL);
+ERROR 23000: Column 'c' cannot be null
+INSERT INTO t1 (c) VALUES ('');
+INSERT INTO t1 () VALUES ();
+SELECT pk, HEX(c) FROM t1 ORDER BY pk;
+pk HEX(c)
+1
+2
+DROP TABLE t1;
+########################
+# BOOL columns
+########################
+DROP TABLE IF EXISTS t1;
+CREATE TABLE t1 (
+pk INT AUTO_INCREMENT PRIMARY KEY,
+b1 BOOL NOT NULL,
+b2 BOOLEAN NOT NULL
+) ENGINE=rocksdb;
+SHOW COLUMNS IN t1;
+Field Type Null Key Default Extra
+pk int(11) NO PRI NULL auto_increment
+b1 tinyint(1) NO NULL
+b2 tinyint(1) NO NULL
+INSERT INTO t1 (b1,b2) VALUES (1,TRUE);
+SELECT b1,b2 FROM t1;
+b1 b2
+1 1
+INSERT INTO t1 (b1,b2) VALUES (FALSE,0);
+SELECT b1,b2 FROM t1;
+b1 b2
+0 0
+1 1
+INSERT INTO t1 (b1,b2) VALUES (2,3);
+SELECT b1,b2 FROM t1;
+b1 b2
+0 0
+1 1
+2 3
+INSERT INTO t1 (b1,b2) VALUES (-1,-2);
+SELECT b1,b2 FROM t1;
+b1 b2
+-1 -2
+0 0
+1 1
+2 3
+SELECT IF(b1,'true','false') AS a, IF(b2,'true','false') AS b FROM t1;
+a b
+false false
+true true
+true true
+true true
+SELECT b1,b2 FROM t1 WHERE b1 = TRUE;
+b1 b2
+1 1
+SELECT b1,b2 FROM t1 WHERE b2 = FALSE;
+b1 b2
+0 0
+INSERT INTO t1 (b1,b2) VALUES ('a','b');
+Warnings:
+Warning 1366 Incorrect integer value: 'a' for column `test`.`t1`.`b1` at row 1
+Warning 1366 Incorrect integer value: 'b' for column `test`.`t1`.`b2` at row 1
+SELECT b1,b2 FROM t1;
+b1 b2
+-1 -2
+0 0
+0 0
+1 1
+2 3
+INSERT INTO t1 (b1,b2) VALUES (128,-129);
+Warnings:
+Warning 1264 Out of range value for column 'b1' at row 1
+Warning 1264 Out of range value for column 'b2' at row 1
+SELECT b1,b2 FROM t1;
+b1 b2
+-1 -2
+0 0
+0 0
+1 1
+127 -128
+2 3
+ALTER TABLE t1 ADD COLUMN b3 BOOLEAN UNSIGNED NOT NULL;
+ERROR 42000: You have an error in your SQL syntax; check the manual that corresponds to your MariaDB server version for the right syntax to use near 'UNSIGNED NOT NULL' at line 1
+ALTER TABLE ADD COLUMN b3 BOOL ZEROFILL NOT NULL;
+ERROR 42000: You have an error in your SQL syntax; check the manual that corresponds to your MariaDB server version for the right syntax to use near 'ADD COLUMN b3 BOOL ZEROFILL NOT NULL' at line 1
+DROP TABLE t1;
+DROP TABLE IF EXISTS t1;
+#----------------------------------
+# BOOL NOT NULL columns without a default
+#----------------------------------
+CREATE TABLE t1 (pk INT AUTO_INCREMENT PRIMARY KEY, c BOOL NOT NULL) ENGINE=rocksdb;
+SHOW COLUMNS IN t1;
+Field Type Null Key Default Extra
+pk int(11) NO PRI NULL auto_increment
+c tinyint(1) NO NULL
+INSERT INTO t1 (c) VALUES (NULL);
+ERROR 23000: Column 'c' cannot be null
+INSERT INTO t1 (c) VALUES ('0');
+SELECT HEX(c) FROM t1;
+HEX(c)
+0
+DROP TABLE t1;
+#----------------------------------
+# BOOL NOT NULL columns with a default
+#----------------------------------
+CREATE TABLE t1 (
+pk INT AUTO_INCREMENT PRIMARY KEY,
+c BOOL NOT NULL DEFAULT '0'
+) ENGINE=rocksdb;
+SHOW COLUMNS IN t1;
+Field Type Null Key Default Extra
+pk int(11) NO PRI NULL auto_increment
+c tinyint(1) NO 0
+ALTER TABLE t1 ADD COLUMN err BOOL NOT NULL DEFAULT NULL;
+ERROR 42000: Invalid default value for 'err'
+INSERT INTO t1 (c) VALUES (NULL);
+ERROR 23000: Column 'c' cannot be null
+INSERT INTO t1 (c) VALUES ('0');
+INSERT INTO t1 () VALUES ();
+SELECT pk, HEX(c) FROM t1 ORDER BY pk;
+pk HEX(c)
+1 0
+2 0
+DROP TABLE t1;
+########################
+# CHAR columns
+########################
+DROP TABLE IF EXISTS t1;
+CREATE TABLE t1 (
+c CHAR NOT NULL,
+c0 CHAR(0) NOT NULL,
+c1 CHAR(1) NOT NULL,
+c20 CHAR(20) NOT NULL,
+c255 CHAR(255) NOT NULL,
+PRIMARY KEY (c255)
+) ENGINE=rocksdb;
+SHOW COLUMNS IN t1;
+Field Type Null Key Default Extra
+c char(1) NO NULL
+c0 char(0) NO NULL
+c1 char(1) NO NULL
+c20 char(20) NO NULL
+c255 char(255) NO PRI NULL
+INSERT INTO t1 (c,c0,c1,c20,c255) VALUES ('','','','','');
+INSERT INTO t1 (c,c0,c1,c20,c255) VALUES ('a','','b','abcdefghi klmnopqrst', 'Creating an article for the Knowledgebase is similar to asking questions. First, navigate to the category where you feel the article should be. Once there, double check that an article doesn\'t already exist which would work.');
+SELECT c,c0,c1,c20,c255 FROM t1;
+c c0 c1 c20 c255
+
+a b abcdefghi klmnopqrst Creating an article for the Knowledgebase is similar to asking questions. First, navigate to the category where you feel the article should be. Once there, double check that an article doesn't already exist which would work.
+INSERT INTO t1 (c,c0,c1,c20,c255) VALUES ('abc', 'a', 'abc', REPEAT('a',21), REPEAT('x',256));
+Warnings:
+Warning 1265 Data truncated for column 'c' at row 1
+Warning 1265 Data truncated for column 'c0' at row 1
+Warning 1265 Data truncated for column 'c1' at row 1
+Warning 1265 Data truncated for column 'c20' at row 1
+Warning 1265 Data truncated for column 'c255' at row 1
+INSERT INTO t1 (c,c0,c1,c20,c255) SELECT c255, c255, c255, c255, CONCAT('a',c255,c1) FROM t1;
+Warnings:
+Warning 1265 Data truncated for column 'c' at row 5
+Warning 1265 Data truncated for column 'c0' at row 5
+Warning 1265 Data truncated for column 'c1' at row 5
+Warning 1265 Data truncated for column 'c20' at row 5
+Warning 1265 Data truncated for column 'c' at row 6
+Warning 1265 Data truncated for column 'c0' at row 6
+Warning 1265 Data truncated for column 'c1' at row 6
+Warning 1265 Data truncated for column 'c20' at row 6
+Warning 1265 Data truncated for column 'c255' at row 6
+SELECT c,c0,c1,c20,c255 FROM t1;
+c c0 c1 c20 c255
+
+ a
+C C Creating an article aCreating an article for the Knowledgebase is similar to asking questions. First, navigate to the category where you feel the article should be. Once there, double check that an article doesn't already exist which would work.b
+a a aaaaaaaaaaaaaaaaaaaa xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+a b abcdefghi klmnopqrst Creating an article for the Knowledgebase is similar to asking questions. First, navigate to the category where you feel the article should be. Once there, double check that an article doesn't already exist which would work.
+x x xxxxxxxxxxxxxxxxxxxx axxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+SELECT DISTINCT c20, REPEAT('a',LENGTH(c20)), COUNT(*) FROM t1 GROUP BY c1, c20;
+c20 REPEAT('a',LENGTH(c20)) COUNT(*)
+ 2
+Creating an article aaaaaaaaaaaaaaaaaaa 1
+aaaaaaaaaaaaaaaaaaaa aaaaaaaaaaaaaaaaaaaa 1
+abcdefghi klmnopqrst aaaaaaaaaaaaaaaaaaaa 1
+xxxxxxxxxxxxxxxxxxxx aaaaaaaaaaaaaaaaaaaa 1
+ALTER TABLE t1 ADD COLUMN c257 CHAR(257) NOT NULL;
+ERROR 42000: Column length too big for column 'c257' (max = 255); use BLOB or TEXT instead
+DROP TABLE t1;
+DROP TABLE IF EXISTS t1;
+#----------------------------------
+# CHAR NOT NULL columns without a default
+#----------------------------------
+CREATE TABLE t1 (pk INT AUTO_INCREMENT PRIMARY KEY, c CHAR NOT NULL) ENGINE=rocksdb;
+SHOW COLUMNS IN t1;
+Field Type Null Key Default Extra
+pk int(11) NO PRI NULL auto_increment
+c char(1) NO NULL
+INSERT INTO t1 (c) VALUES (NULL);
+ERROR 23000: Column 'c' cannot be null
+INSERT INTO t1 (c) VALUES ('_');
+SELECT HEX(c) FROM t1;
+HEX(c)
+5F
+DROP TABLE t1;
+#----------------------------------
+# CHAR NOT NULL columns with a default
+#----------------------------------
+CREATE TABLE t1 (
+pk INT AUTO_INCREMENT PRIMARY KEY,
+c CHAR NOT NULL DEFAULT '_'
+) ENGINE=rocksdb;
+SHOW COLUMNS IN t1;
+Field Type Null Key Default Extra
+pk int(11) NO PRI NULL auto_increment
+c char(1) NO _
+ALTER TABLE t1 ADD COLUMN err CHAR NOT NULL DEFAULT NULL;
+ERROR 42000: Invalid default value for 'err'
+INSERT INTO t1 (c) VALUES (NULL);
+ERROR 23000: Column 'c' cannot be null
+INSERT INTO t1 (c) VALUES ('_');
+INSERT INTO t1 () VALUES ();
+SELECT pk, HEX(c) FROM t1 ORDER BY pk;
+pk HEX(c)
+1 5F
+2 5F
+DROP TABLE t1;
+########################
+# VARCHAR columns
+########################
+DROP TABLE IF EXISTS t1, t2;
+CREATE TABLE t1 (
+v0 VARCHAR(0) NOT NULL,
+v1 VARCHAR(1) NOT NULL,
+v64 VARCHAR(64) NOT NULL,
+v65000 VARCHAR(65000) NOT NULL,
+PRIMARY KEY (v64)
+) ENGINE=rocksdb;
+SHOW COLUMNS IN t1;
+Field Type Null Key Default Extra
+v0 varchar(0) NO NULL
+v1 varchar(1) NO NULL
+v64 varchar(64) NO PRI NULL
+v65000 varchar(65000) NO NULL
+CREATE TABLE t2 (v VARCHAR(65532), PRIMARY KEY (v(255))) ENGINE=rocksdb;
+SHOW COLUMNS IN t2;
+Field Type Null Key Default Extra
+v varchar(65532) NO PRI NULL
+INSERT INTO t1 (v0,v1,v64,v65000) VALUES ('','','','');
+INSERT INTO t1 (v0,v1,v64,v65000) VALUES ('','y','Once there, double check that an article doesn\'t already exist','Here is a list of recommended books on MariaDB and MySQL. We\'ve provided links to Amazon.com here for convenience, but they can be found at many other bookstores, both online and off.
+
+ If you want to have your favorite MySQL / MariaDB book listed here, please leave a comment.
+ For developers who want to code on MariaDB or MySQL
+
+ * Understanding MySQL Internals by Sasha Pachev, former MySQL developer at MySQL AB.
+ o This is the only book we know about that describes the internals of MariaDB / MySQL. A must have for anyone who wants to understand and develop on MariaDB!
+ o Not all topics are covered and some parts are slightly outdated, but still the best book on this topic.
+ * MySQL 5.1 Plugin Development by Sergei Golubchik and Andrew Hutchings
+ o A must read for anyone wanting to write a plugin for MariaDB, written by the Sergei who designed the plugin interface for MySQL and MariaDB!
+
+ For MariaDB / MySQL end users
+
+ * MariaDB Crash Course by Ben Forta
+ o First MariaDB book!
+ o For people who want to learn SQL and the basics of MariaDB.
+ o Now shipping. Purchase at Amazon.com or your favorite bookseller.
+
+ * SQL-99 Complete, Really by Peter Gulutzan & Trudy Pelzer.
+ o Everything you wanted to know about the SQL 99 standard. Excellent reference book!
+ o Free to read in the Knowledgebase!
+
+ * MySQL (4th Edition) by Paul DuBois
+ o The \'default\' book to read if you wont to learn to use MySQL / MariaDB.
+
+ * MySQL Cookbook by Paul DuBois
+ o A lot of examples of how to use MySQL. As with all of Paul\'s books, it\'s worth its weight in gold and even enjoyable reading for such a \'dry\' subject.
+
+ * High Performance MySQL, Second Edition, By Baron Schwartz, Peter Zaitsev, Vadim Tkachenko, Jeremy D. Zawodny, Arjen Lentz, Derek J. Balling, et al.
+ o \"High Performance MySQL is the definitive guide to building fast, reliable systems with MySQL. Written by noted experts with years of real-world experience building very large systems, this book covers every aspect of MySQL performance in detail, and focuses on robustness, security, and data integrity. Learn advanced techniques in depth so you can bring out MySQL\'s full power.\" (From the book description at O\'Reilly)
+
+ * MySQL Admin Cookbook
+ o A quick step-by-step guide for MySQL users and database administrators to tackle real-world challenges with MySQL configuration and administration
+
+ * MySQL 5.0 Certification Study Guide, By Paul DuBois, Stefan Hinz, Carsten Pedersen
+ o This is the official guide to cover the passing of the two MySQL Certification examinations. It is valid till version 5.0 of the server, so while it misses all the features available in MySQL 5.1 and greater (including MariaDB 5.1 and greater), it provides a good basic understanding of MySQL for the end-user. ');
+SELECT v0,v1,v64,v65000 FROM t1;
+v0 v1 v64 v65000
+
+
+
+
+
+
+
+
+
+
+
+ y Once there, double check that an article doesn't already exist Here is a list of recommended books on MariaDB and MySQL. We've provided links to Amazon.com here for convenience, but they can be found at many other bookstores, both online and off.
+ o "High Performance MySQL is the definitive guide to building fast, reliable systems with MySQL. Written by noted experts with years of real-world experience building very large systems, this book covers every aspect of MySQL performance in detail, and focuses on robustness, security, and data integrity. Learn advanced techniques in depth so you can bring out MySQL's full power." (From the book description at O'Reilly)
+ o A lot of examples of how to use MySQL. As with all of Paul's books, it's worth its weight in gold and even enjoyable reading for such a 'dry' subject.
+ o A must read for anyone wanting to write a plugin for MariaDB, written by the Sergei who designed the plugin interface for MySQL and MariaDB!
+ o A quick step-by-step guide for MySQL users and database administrators to tackle real-world challenges with MySQL configuration and administration
+ o Everything you wanted to know about the SQL 99 standard. Excellent reference book!
+ o First MariaDB book!
+ o For people who want to learn SQL and the basics of MariaDB.
+ o Free to read in the Knowledgebase!
+ o Not all topics are covered and some parts are slightly outdated, but still the best book on this topic.
+ o Now shipping. Purchase at Amazon.com or your favorite bookseller.
+ o The 'default' book to read if you wont to learn to use MySQL / MariaDB.
+ o This is the official guide to cover the passing of the two MySQL Certification examinations. It is valid till version 5.0 of the server, so while it misses all the features available in MySQL 5.1 and greater (including MariaDB 5.1 and greater), it provides a good basic understanding of MySQL for the end-user.
+ o This is the only book we know about that describes the internals of MariaDB / MySQL. A must have for anyone who wants to understand and develop on MariaDB!
+ * High Performance MySQL, Second Edition, By Baron Schwartz, Peter Zaitsev, Vadim Tkachenko, Jeremy D. Zawodny, Arjen Lentz, Derek J. Balling, et al.
+ * MariaDB Crash Course by Ben Forta
+ * MySQL (4th Edition) by Paul DuBois
+ * MySQL 5.0 Certification Study Guide, By Paul DuBois, Stefan Hinz, Carsten Pedersen
+ * MySQL 5.1 Plugin Development by Sergei Golubchik and Andrew Hutchings
+ * MySQL Admin Cookbook
+ * MySQL Cookbook by Paul DuBois
+ * SQL-99 Complete, Really by Peter Gulutzan & Trudy Pelzer.
+ * Understanding MySQL Internals by Sasha Pachev, former MySQL developer at MySQL AB.
+ For MariaDB / MySQL end users
+ For developers who want to code on MariaDB or MySQL
+ If you want to have your favorite MySQL / MariaDB book listed here, please leave a comment.
+INSERT INTO t1 (v0,v1,v64,v65000) VALUES ('y', 'yy', REPEAT('c',65), REPEAT('abcdefghi ',6501));
+Warnings:
+Warning 1265 Data truncated for column 'v0' at row 1
+Warning 1265 Data truncated for column 'v1' at row 1
+Warning 1265 Data truncated for column 'v64' at row 1
+Warning 1265 Data truncated for column 'v65000' at row 1
+INSERT INTO t1 (v0,v1,v64,v65000) SELECT v65000, v65000, CONCAT('a',v65000), CONCAT(v65000,v1) FROM t1;
+Warnings:
+Warning 1265 Data truncated for column 'v0' at row 5
+Warning 1265 Data truncated for column 'v1' at row 5
+Warning 1265 Data truncated for column 'v64' at row 5
+Warning 1265 Data truncated for column 'v65000' at row 5
+Warning 1265 Data truncated for column 'v0' at row 6
+Warning 1265 Data truncated for column 'v1' at row 6
+Warning 1265 Data truncated for column 'v64' at row 6
+SELECT v0, v1, v64, LENGTH(v65000) FROM t1;
+v0 v1 v64 LENGTH(v65000)
+ 0
+ a 0
+ H aHere is a list of recommended books on MariaDB and MySQL. We've 2966
+ a aabcdefghi abcdefghi abcdefghi abcdefghi abcdefghi abcdefghi abc 65000
+ y Once there, double check that an article doesn't already exist 2965
+ y cccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc 65000
+ALTER TABLE t1 ADD COLUMN v65536 VARCHAR(65536) NOT NULL;
+Warnings:
+Note 1246 Converting column 'v65536' from VARCHAR to TEXT
+SHOW COLUMNS IN t1;
+Field Type Null Key Default Extra
+v0 varchar(0) NO NULL
+v1 varchar(1) NO NULL
+v64 varchar(64) NO PRI NULL
+v65000 varchar(65000) NO NULL
+v65536 mediumtext NO NULL
+DROP TABLE t1, t2;
+DROP TABLE IF EXISTS t1;
+#----------------------------------
+# VARCHAR(64) NOT NULL columns without a default
+#----------------------------------
+CREATE TABLE t1 (pk INT AUTO_INCREMENT PRIMARY KEY, c VARCHAR(64) NOT NULL) ENGINE=rocksdb;
+SHOW COLUMNS IN t1;
+Field Type Null Key Default Extra
+pk int(11) NO PRI NULL auto_increment
+c varchar(64) NO NULL
+INSERT INTO t1 (c) VALUES (NULL);
+ERROR 23000: Column 'c' cannot be null
+INSERT INTO t1 (c) VALUES ('test default');
+SELECT HEX(c) FROM t1;
+HEX(c)
+746573742064656661756C74
+DROP TABLE t1;
+#----------------------------------
+# VARCHAR(64) NOT NULL columns with a default
+#----------------------------------
+CREATE TABLE t1 (
+pk INT AUTO_INCREMENT PRIMARY KEY,
+c VARCHAR(64) NOT NULL DEFAULT 'test default'
+) ENGINE=rocksdb;
+SHOW COLUMNS IN t1;
+Field Type Null Key Default Extra
+pk int(11) NO PRI NULL auto_increment
+c varchar(64) NO test default
+ALTER TABLE t1 ADD COLUMN err VARCHAR(64) NOT NULL DEFAULT NULL;
+ERROR 42000: Invalid default value for 'err'
+INSERT INTO t1 (c) VALUES (NULL);
+ERROR 23000: Column 'c' cannot be null
+INSERT INTO t1 (c) VALUES ('test default');
+INSERT INTO t1 () VALUES ();
+SELECT pk, HEX(c) FROM t1 ORDER BY pk;
+pk HEX(c)
+1 746573742064656661756C74
+2 746573742064656661756C74
+DROP TABLE t1;
+########################
+# date and time columns
+########################
+set @col_opt_not_nullsave_time_zone=@@time_zone;
+set time_zone='UTC';
+DROP TABLE IF EXISTS t1;
+set @save_time_zone=@@time_zone;
+set time_zone='UTC';
+CREATE TABLE t1 (
+d DATE NOT NULL,
+dt DATETIME NOT NULL,
+ts TIMESTAMP NOT NULL,
+t TIME NOT NULL,
+y YEAR NOT NULL,
+y4 YEAR(4) NOT NULL,
+y2 YEAR(2) NOT NULL,
+pk DATETIME PRIMARY KEY
+) ENGINE=rocksdb;
+Warnings:
+Note 1287 'YEAR(2)' is deprecated and will be removed in a future release. Please use YEAR(4) instead
+SHOW COLUMNS IN t1;
+Field Type Null Key Default Extra
+d date NO NULL
+dt datetime NO NULL
+ts timestamp NO NULL
+t time NO NULL
+y year(4) NO NULL
+y4 year(4) NO NULL
+y2 year(2) NO NULL
+pk datetime NO PRI NULL
+SET @tm = '2012-04-09 05:27:00';
+INSERT INTO t1 (d,dt,ts,t,y,y4,y2,pk) VALUES
+('1000-01-01', '1000-01-01 00:00:00', FROM_UNIXTIME(1), '-838:59:59', '1901', '1901', '00','2012-12-12 12:12:12'),
+('9999-12-31', '9999-12-31 23:59:59', FROM_UNIXTIME(2147483647), '838:59:59', '2155', '2155', '99','2012-12-12 12:12:13'),
+('0000-00-00', '0000-00-00 00:00:00', '0000-00-00 00:00:00', '00:00:00', '0', '0', '0','2012-12-12 12:12:14'),
+(DATE(@tm),@tm,TIMESTAMP(@tm),TIME(@tm),YEAR(@tm),YEAR(@tm),YEAR(@tm),'2012-12-12 12:12:15');
+SELECT d,dt,ts,t,y,y4,y2 FROM t1;
+d dt ts t y y4 y2
+0000-00-00 0000-00-00 00:00:00 0000-00-00 00:00:00 00:00:00 2000 2000 00
+1000-01-01 1000-01-01 00:00:00 1970-01-01 00:00:01 -838:59:59 1901 1901 00
+2012-04-09 2012-04-09 05:27:00 2012-04-09 05:27:00 05:27:00 2012 2012 12
+9999-12-31 9999-12-31 23:59:59 2038-01-19 03:14:07 838:59:59 2155 2155 99
+INSERT INTO t1 (d,dt,ts,t,y,y4,y2,pk) VALUES
+('999-13-32', '999-11-31 00:00:00', '0', '-839:00:00', '1900', '1900', '-1','2012-12-12 12:12:16');
+Warnings:
+Warning 1265 Data truncated for column 'd' at row 1
+Warning 1265 Data truncated for column 'dt' at row 1
+Warning 1265 Data truncated for column 'ts' at row 1
+Warning 1264 Out of range value for column 't' at row 1
+Warning 1264 Out of range value for column 'y' at row 1
+Warning 1264 Out of range value for column 'y4' at row 1
+Warning 1264 Out of range value for column 'y2' at row 1
+SELECT d,dt,ts,t,y,y4,y2 FROM t1;
+d dt ts t y y4 y2
+1000-01-01 1000-01-01 00:00:00 1970-01-01 00:00:01 -838:59:59 1901 1901 00
+9999-12-31 9999-12-31 23:59:59 2038-01-19 03:14:07 838:59:59 2155 2155 99
+0000-00-00 0000-00-00 00:00:00 0000-00-00 00:00:00 00:00:00 2000 2000 00
+2012-04-09 2012-04-09 05:27:00 2012-04-09 05:27:00 05:27:00 2012 2012 12
+0000-00-00 0000-00-00 00:00:00 0000-00-00 00:00:00 -838:59:59 0000 0000 00
+set time_zone=@save_time_zone;
+DROP TABLE t1;
+SET TIMESTAMP=UNIX_TIMESTAMP('2013-12-12 12:12:12');
+DROP TABLE IF EXISTS t1;
+#----------------------------------
+# DATE NOT NULL columns without a default
+#----------------------------------
+CREATE TABLE t1 (pk INT AUTO_INCREMENT PRIMARY KEY, c DATE NOT NULL) ENGINE=rocksdb;
+SHOW COLUMNS IN t1;
+Field Type Null Key Default Extra
+pk int(11) NO PRI NULL auto_increment
+c date NO NULL
+INSERT INTO t1 (c) VALUES (NULL);
+ERROR 23000: Column 'c' cannot be null
+INSERT INTO t1 (c) VALUES ('2012-12-21');
+SELECT HEX(c) FROM t1;
+HEX(c)
+323031322D31322D3231
+DROP TABLE t1;
+#----------------------------------
+# DATE NOT NULL columns with a default
+#----------------------------------
+CREATE TABLE t1 (
+pk INT AUTO_INCREMENT PRIMARY KEY,
+c DATE NOT NULL DEFAULT '2012-12-21'
+) ENGINE=rocksdb;
+SHOW COLUMNS IN t1;
+Field Type Null Key Default Extra
+pk int(11) NO PRI NULL auto_increment
+c date NO 2012-12-21
+ALTER TABLE t1 ADD COLUMN err DATE NOT NULL DEFAULT NULL;
+ERROR 42000: Invalid default value for 'err'
+INSERT INTO t1 (c) VALUES (NULL);
+ERROR 23000: Column 'c' cannot be null
+INSERT INTO t1 (c) VALUES ('2012-12-21');
+INSERT INTO t1 () VALUES ();
+SELECT pk, HEX(c) FROM t1 ORDER BY pk;
+pk HEX(c)
+1 323031322D31322D3231
+2 323031322D31322D3231
+DROP TABLE t1;
+DROP TABLE IF EXISTS t1;
+#----------------------------------
+# DATETIME NOT NULL columns without a default
+#----------------------------------
+CREATE TABLE t1 (pk INT AUTO_INCREMENT PRIMARY KEY, c DATETIME NOT NULL) ENGINE=rocksdb;
+SHOW COLUMNS IN t1;
+Field Type Null Key Default Extra
+pk int(11) NO PRI NULL auto_increment
+c datetime NO NULL
+INSERT INTO t1 (c) VALUES (NULL);
+ERROR 23000: Column 'c' cannot be null
+INSERT INTO t1 (c) VALUES ('2012-12-21 12:21:12');
+SELECT HEX(c) FROM t1;
+HEX(c)
+323031322D31322D32312031323A32313A3132
+DROP TABLE t1;
+#----------------------------------
+# DATETIME NOT NULL columns with a default
+#----------------------------------
+CREATE TABLE t1 (
+pk INT AUTO_INCREMENT PRIMARY KEY,
+c DATETIME NOT NULL DEFAULT '2012-12-21 12:21:12'
+) ENGINE=rocksdb;
+SHOW COLUMNS IN t1;
+Field Type Null Key Default Extra
+pk int(11) NO PRI NULL auto_increment
+c datetime NO 2012-12-21 12:21:12
+ALTER TABLE t1 ADD COLUMN err DATETIME NOT NULL DEFAULT NULL;
+ERROR 42000: Invalid default value for 'err'
+INSERT INTO t1 (c) VALUES (NULL);
+ERROR 23000: Column 'c' cannot be null
+INSERT INTO t1 (c) VALUES ('2012-12-21 12:21:12');
+INSERT INTO t1 () VALUES ();
+SELECT pk, HEX(c) FROM t1 ORDER BY pk;
+pk HEX(c)
+1 323031322D31322D32312031323A32313A3132
+2 323031322D31322D32312031323A32313A3132
+DROP TABLE t1;
+DROP TABLE IF EXISTS t1;
+#----------------------------------
+# TIMESTAMP NOT NULL column without a default
+#----------------------------------
+CREATE TABLE t1 (pk INT AUTO_INCREMENT PRIMARY KEY, c TIMESTAMP NOT NULL) ENGINE=rocksdb;
+SHOW COLUMNS IN t1;
+Field Type Null Key Default Extra
+pk int(11) NO PRI NULL auto_increment
+c timestamp NO NULL
+INSERT INTO t1 (c) VALUES (NULL);
+INSERT INTO t1 (c) VALUES ('2012-12-21 12:21:12');
+SELECT HEX(c) FROM t1;
+HEX(c)
+323031332D31322D31322031323A31323A3132
+323031322D31322D32312031323A32313A3132
+DROP TABLE t1;
+#----------------------------------
+# TIMESTAMP NOT NULL columns with a default
+#----------------------------------
+CREATE TABLE t1 (
+pk INT AUTO_INCREMENT PRIMARY KEY,
+c TIMESTAMP NOT NULL DEFAULT '2012-12-21 12:21:12'
+) ENGINE=rocksdb;
+SHOW COLUMNS IN t1;
+Field Type Null Key Default Extra
+pk int(11) NO PRI NULL auto_increment
+c timestamp NO 2012-12-21 12:21:12
+ALTER TABLE t1 ADD COLUMN err TIMESTAMP NOT NULL DEFAULT NULL;
+ERROR 42000: Invalid default value for 'err'
+set @save_ts=@@timestamp;
+set timestamp=1478923914;
+INSERT INTO t1 (c) VALUES (NULL);
+set timestamp=@save_ts;
+INSERT INTO t1 (c) VALUES ('2012-12-21 12:21:12');
+INSERT INTO t1 () VALUES ();
+SELECT pk, HEX(c) FROM t1 ORDER BY pk;
+pk HEX(c)
+1 323031362D31312D31322030343A31313A3534
+2 323031322D31322D32312031323A32313A3132
+3 323031322D31322D32312031323A32313A3132
+DROP TABLE t1;
+DROP TABLE IF EXISTS t1;
+#----------------------------------
+# TIME NOT NULL columns without a default
+#----------------------------------
+CREATE TABLE t1 (pk INT AUTO_INCREMENT PRIMARY KEY, c TIME NOT NULL) ENGINE=rocksdb;
+SHOW COLUMNS IN t1;
+Field Type Null Key Default Extra
+pk int(11) NO PRI NULL auto_increment
+c time NO NULL
+INSERT INTO t1 (c) VALUES (NULL);
+ERROR 23000: Column 'c' cannot be null
+INSERT INTO t1 (c) VALUES ('12:21:12');
+SELECT HEX(c) FROM t1;
+HEX(c)
+31323A32313A3132
+DROP TABLE t1;
+#----------------------------------
+# TIME NOT NULL columns with a default
+#----------------------------------
+CREATE TABLE t1 (
+pk INT AUTO_INCREMENT PRIMARY KEY,
+c TIME NOT NULL DEFAULT '12:21:12'
+) ENGINE=rocksdb;
+SHOW COLUMNS IN t1;
+Field Type Null Key Default Extra
+pk int(11) NO PRI NULL auto_increment
+c time NO 12:21:12
+ALTER TABLE t1 ADD COLUMN err TIME NOT NULL DEFAULT NULL;
+ERROR 42000: Invalid default value for 'err'
+INSERT INTO t1 (c) VALUES (NULL);
+ERROR 23000: Column 'c' cannot be null
+INSERT INTO t1 (c) VALUES ('12:21:12');
+INSERT INTO t1 () VALUES ();
+SELECT pk, HEX(c) FROM t1 ORDER BY pk;
+pk HEX(c)
+1 31323A32313A3132
+2 31323A32313A3132
+DROP TABLE t1;
+DROP TABLE IF EXISTS t1;
+#----------------------------------
+# YEAR NOT NULL columns without a default
+#----------------------------------
+CREATE TABLE t1 (pk INT AUTO_INCREMENT PRIMARY KEY, c YEAR NOT NULL) ENGINE=rocksdb;
+SHOW COLUMNS IN t1;
+Field Type Null Key Default Extra
+pk int(11) NO PRI NULL auto_increment
+c year(4) NO NULL
+INSERT INTO t1 (c) VALUES (NULL);
+ERROR 23000: Column 'c' cannot be null
+INSERT INTO t1 (c) VALUES ('2012');
+SELECT HEX(c) FROM t1;
+HEX(c)
+7DC
+DROP TABLE t1;
+#----------------------------------
+# YEAR NOT NULL columns with a default
+#----------------------------------
+CREATE TABLE t1 (
+pk INT AUTO_INCREMENT PRIMARY KEY,
+c YEAR NOT NULL DEFAULT '2012'
+) ENGINE=rocksdb;
+SHOW COLUMNS IN t1;
+Field Type Null Key Default Extra
+pk int(11) NO PRI NULL auto_increment
+c year(4) NO 2012
+ALTER TABLE t1 ADD COLUMN err YEAR NOT NULL DEFAULT NULL;
+ERROR 42000: Invalid default value for 'err'
+INSERT INTO t1 (c) VALUES (NULL);
+ERROR 23000: Column 'c' cannot be null
+INSERT INTO t1 (c) VALUES ('2012');
+INSERT INTO t1 () VALUES ();
+SELECT pk, HEX(c) FROM t1 ORDER BY pk;
+pk HEX(c)
+1 7DC
+2 7DC
+DROP TABLE t1;
+DROP TABLE IF EXISTS t1;
+#----------------------------------
+# YEAR(2) NOT NULL columns without a default
+#----------------------------------
+CREATE TABLE t1 (pk INT AUTO_INCREMENT PRIMARY KEY, c YEAR(2) NOT NULL) ENGINE=rocksdb;
+Warnings:
+Note 1287 'YEAR(2)' is deprecated and will be removed in a future release. Please use YEAR(4) instead
+SHOW COLUMNS IN t1;
+Field Type Null Key Default Extra
+pk int(11) NO PRI NULL auto_increment
+c year(2) NO NULL
+INSERT INTO t1 (c) VALUES (NULL);
+ERROR 23000: Column 'c' cannot be null
+INSERT INTO t1 (c) VALUES ('12');
+SELECT HEX(c) FROM t1;
+HEX(c)
+C
+DROP TABLE t1;
+#----------------------------------
+# YEAR(2) NOT NULL columns with a default
+#----------------------------------
+CREATE TABLE t1 (
+pk INT AUTO_INCREMENT PRIMARY KEY,
+c YEAR(2) NOT NULL DEFAULT '12'
+) ENGINE=rocksdb;
+Warnings:
+Note 1287 'YEAR(2)' is deprecated and will be removed in a future release. Please use YEAR(4) instead
+SHOW COLUMNS IN t1;
+Field Type Null Key Default Extra
+pk int(11) NO PRI NULL auto_increment
+c year(2) NO 12
+ALTER TABLE t1 ADD COLUMN err YEAR(2) NOT NULL DEFAULT NULL;
+ERROR 42000: Invalid default value for 'err'
+INSERT INTO t1 (c) VALUES (NULL);
+ERROR 23000: Column 'c' cannot be null
+INSERT INTO t1 (c) VALUES ('12');
+INSERT INTO t1 () VALUES ();
+SELECT pk, HEX(c) FROM t1 ORDER BY pk;
+pk HEX(c)
+1 C
+2 C
+DROP TABLE t1;
+set time_zone= @col_opt_not_nullsave_time_zone;
+########################
+# ENUM columns
+########################
+DROP TABLE IF EXISTS t1;
+CREATE TABLE t1 (
+a ENUM('') NOT NULL,
+b ENUM('test1','test2','test3','test4','test5') NOT NULL,
+c ENUM('1','2','3','4','5','6','7','8','9','a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','x','y','z',' ','11','12','13','14','15','16','17','18','19','1a','1b','1c','1d','1e','1f','1g','1h','1i','1j','1k','1l','1m','1n','1o','1p','1q','1r','1s','1t','1u','1v','1w','1x','1y','1z','20','21','22','23','24','25','26','27','28','29','2a','2b','2c','2d','2e','2f','2g','2h','2i','2j','2k','2l','2m','2n','2o','2p','2q','2r','2s','2t','2u','2v','2w','2x','2y','2z','30','31','32','33','34','35','36','37','38','39','3a','3b','3c','3d','3e','3f','3g','3h','3i','3j','3k','3l','3m','3n','3o','3p','3q','3r','3s','3t','3u','3v','3w','3x','3y','3z','40','41','42','43','44','45','46','47','48','49','4a','4b','4c','4d','4e','4f','4g','4h','4i','4j','4k','4l','4m','4n','4o','4p','4q','4r','4s','4t','4u','4v','4w','4x','4y','4z','50','51','52','53','54','55','56','57','58','59','5a','5b','5c','5d','5e','5f','5g','5h','5i','5j','5k','5l','5m','5n','5o','5p','5q','5r','5s','5t','5u','5v','5w','5x','5y','5z','60','61','62','63','64','65','66','67','68','69','6a','6b','6c','6d','6e','6f','6g','6h','6i','6j','6k','6l','6m','6n','6o','6p','6q','6r','6s','6t','6u','6v','6w','6x','6y','6z','70','71','72','73','74','75') NOT NULL,
+PRIMARY KEY (b)
+) ENGINE=rocksdb;
+SHOW COLUMNS IN t1;
+Field Type Null Key Default Extra
+a enum('') NO NULL
+b enum('test1','test2','test3','test4','test5') NO PRI NULL
+c enum('1','2','3','4','5','6','7','8','9','a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','x','y','z','','11','12','13','14','15','16','17','18','19','1a','1b','1c','1d','1e','1f','1g','1h','1i','1j','1k','1l','1m','1n','1o','1p','1q','1r','1s','1t','1u','1v','1w','1x','1y','1z','20','21','22','23','24','25','26','27','28','29','2a','2b','2c','2d','2e','2f','2g','2h','2i','2j','2k','2l','2m','2n','2o','2p','2q','2r','2s','2t','2u','2v','2w','2x','2y','2z','30','31','32','33','34','35','36','37','38','39','3a','3b','3c','3d','3e','3f','3g','3h','3i','3j','3k','3l','3m','3n','3o','3p','3q','3r','3s','3t','3u','3v','3w','3x','3y','3z','40','41','42','43','44','45','46','47','48','49','4a','4b','4c','4d','4e','4f','4g','4h','4i','4j','4k','4l','4m','4n','4o','4p','4q','4r','4s','4t','4u','4v','4w','4x','4y','4z','50','51','52','53','54','55','56','57','58','59','5a','5b','5c','5d','5e','5f','5g','5h','5i','5j','5k','5l','5m','5n','5o','5p','5q','5r','5s','5t','5u','5v','5w','5x','5y','5z','60','61','62','63','64','65','66','67','68','69','6a','6b','6c','6d','6e','6f','6g','6h','6i','6j','6k','6l','6m','6n','6o','6p','6q','6r','6s','6t','6u','6v','6w','6x','6y','6z','70','71','72','73','74','75') NO NULL
+INSERT INTO t1 (a,b,c) VALUES ('','test2','4'),('',5,2);
+SELECT a,b,c FROM t1;
+a b c
+ test2 4
+ test5 2
+INSERT INTO t1 (a,b,c) VALUES (0,'test6',-1);
+Warnings:
+Warning 1265 Data truncated for column 'a' at row 1
+Warning 1265 Data truncated for column 'b' at row 1
+Warning 1265 Data truncated for column 'c' at row 1
+SELECT a,b,c FROM t1;
+a b c
+
+ test2 4
+ test5 2
+ALTER TABLE t1 ADD COLUMN e ENUM('a','A') NOT NULL;
+Warnings:
+Note 1291 Column 'e' has duplicated value 'a' in ENUM
+SHOW COLUMNS IN t1;
+Field Type Null Key Default Extra
+a enum('') NO NULL
+b enum('test1','test2','test3','test4','test5') NO PRI NULL
+c enum('1','2','3','4','5','6','7','8','9','a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','x','y','z','','11','12','13','14','15','16','17','18','19','1a','1b','1c','1d','1e','1f','1g','1h','1i','1j','1k','1l','1m','1n','1o','1p','1q','1r','1s','1t','1u','1v','1w','1x','1y','1z','20','21','22','23','24','25','26','27','28','29','2a','2b','2c','2d','2e','2f','2g','2h','2i','2j','2k','2l','2m','2n','2o','2p','2q','2r','2s','2t','2u','2v','2w','2x','2y','2z','30','31','32','33','34','35','36','37','38','39','3a','3b','3c','3d','3e','3f','3g','3h','3i','3j','3k','3l','3m','3n','3o','3p','3q','3r','3s','3t','3u','3v','3w','3x','3y','3z','40','41','42','43','44','45','46','47','48','49','4a','4b','4c','4d','4e','4f','4g','4h','4i','4j','4k','4l','4m','4n','4o','4p','4q','4r','4s','4t','4u','4v','4w','4x','4y','4z','50','51','52','53','54','55','56','57','58','59','5a','5b','5c','5d','5e','5f','5g','5h','5i','5j','5k','5l','5m','5n','5o','5p','5q','5r','5s','5t','5u','5v','5w','5x','5y','5z','60','61','62','63','64','65','66','67','68','69','6a','6b','6c','6d','6e','6f','6g','6h','6i','6j','6k','6l','6m','6n','6o','6p','6q','6r','6s','6t','6u','6v','6w','6x','6y','6z','70','71','72','73','74','75') NO NULL
+e enum('a','A') NO NULL
+INSERT INTO t1 (a,b,c,e) VALUES ('','test3','75','A');
+SELECT a,b,c,e FROM t1;
+a b c e
+ a
+ test2 4 a
+ test3 75 a
+ test5 2 a
+SELECT a,b,c,e FROM t1 WHERE b='test2' OR a != '';
+a b c e
+ test2 4 a
+DROP TABLE t1;
+DROP TABLE IF EXISTS t1;
+#----------------------------------
+# ENUM('test1','test2','test3') NOT NULL columns without a default
+#----------------------------------
+CREATE TABLE t1 (pk INT AUTO_INCREMENT PRIMARY KEY, c ENUM('test1','test2','test3') NOT NULL) ENGINE=rocksdb;
+SHOW COLUMNS IN t1;
+Field Type Null Key Default Extra
+pk int(11) NO PRI NULL auto_increment
+c enum('test1','test2','test3') NO NULL
+INSERT INTO t1 (c) VALUES (NULL);
+ERROR 23000: Column 'c' cannot be null
+INSERT INTO t1 (c) VALUES ('test2');
+SELECT HEX(c) FROM t1;
+HEX(c)
+7465737432
+DROP TABLE t1;
+#----------------------------------
+# ENUM('test1','test2','test3') NOT NULL columns with a default
+#----------------------------------
+CREATE TABLE t1 (
+pk INT AUTO_INCREMENT PRIMARY KEY,
+c ENUM('test1','test2','test3') NOT NULL DEFAULT 'test2'
+) ENGINE=rocksdb;
+SHOW COLUMNS IN t1;
+Field Type Null Key Default Extra
+pk int(11) NO PRI NULL auto_increment
+c enum('test1','test2','test3') NO test2
+ALTER TABLE t1 ADD COLUMN err ENUM('test1','test2','test3') NOT NULL DEFAULT NULL;
+ERROR 42000: Invalid default value for 'err'
+INSERT INTO t1 (c) VALUES (NULL);
+ERROR 23000: Column 'c' cannot be null
+INSERT INTO t1 (c) VALUES ('test2');
+INSERT INTO t1 () VALUES ();
+SELECT pk, HEX(c) FROM t1 ORDER BY pk;
+pk HEX(c)
+1 7465737432
+2 7465737432
+DROP TABLE t1;
+########################
+# Fixed point columns (NUMERIC, DECIMAL)
+########################
+DROP TABLE IF EXISTS t1;
+CREATE TABLE t1 (
+d DECIMAL NOT NULL,
+d0 DECIMAL(0) NOT NULL,
+d1_1 DECIMAL(1,1) NOT NULL,
+d10_2 DECIMAL(10,2) NOT NULL,
+d60_10 DECIMAL(60,10) NOT NULL,
+n NUMERIC NOT NULL,
+n0_0 NUMERIC(0,0) NOT NULL,
+n1 NUMERIC(1) NOT NULL,
+n20_4 NUMERIC(20,4) NOT NULL,
+n65_4 NUMERIC(65,4) NOT NULL,
+pk NUMERIC NOT NULL PRIMARY KEY
+) ENGINE=rocksdb;
+SHOW COLUMNS IN t1;
+Field Type Null Key Default Extra
+d decimal(10,0) NO NULL
+d0 decimal(10,0) NO NULL
+d1_1 decimal(1,1) NO NULL
+d10_2 decimal(10,2) NO NULL
+d60_10 decimal(60,10) NO NULL
+n decimal(10,0) NO NULL
+n0_0 decimal(10,0) NO NULL
+n1 decimal(1,0) NO NULL
+n20_4 decimal(20,4) NO NULL
+n65_4 decimal(65,4) NO NULL
+pk decimal(10,0) NO PRI NULL
+INSERT INTO t1 (d,d0,d1_1,d10_2,d60_10,n,n0_0,n1,n20_4,n65_4,pk) VALUES (100,123456,0.3,40000.25,123456789123456789.10001,1024,7000.0,8.0,999999.9,9223372036854775807,1);
+INSERT INTO t1 (d,d0,d1_1,d10_2,d60_10,n,n0_0,n1,n20_4,n65_4,pk) VALUES (0,0,0,0,0,0,0,0,0,0,2);
+INSERT INTO t1 (d,d0,d1_1,d10_2,d60_10,n,n0_0,n1,n20_4,n65_4,pk) VALUES (9999999999.0,9999999999.0,0.9,99999999.99,99999999999999999999999999999999999999999999999999.9999999999,9999999999.0,9999999999.0,9.0,9999999999999999.9999,9999999999999999999999999999999999999999999999999999999999999.9999,3);
+SELECT d,d0,d1_1,d10_2,d60_10,n,n0_0,n1,n20_4,n65_4 FROM t1;
+d d0 d1_1 d10_2 d60_10 n n0_0 n1 n20_4 n65_4
+0 0 0.0 0.00 0.0000000000 0 0 0 0.0000 0.0000
+100 123456 0.3 40000.25 123456789123456789.1000100000 1024 7000 8 999999.9000 9223372036854775807.0000
+9999999999 9999999999 0.9 99999999.99 99999999999999999999999999999999999999999999999999.9999999999 9999999999 9999999999 9 9999999999999999.9999 9999999999999999999999999999999999999999999999999999999999999.9999
+INSERT INTO t1 (d,d0,d1_1,d10_2,d60_10,n,n0_0,n1,n20_4,n65_4,pk) VALUES (-100,-123456,-0.3,-40000.25,-123456789123456789.10001,-1024,-7000.0,-8.0,-999999.9,-9223372036854775807,4);
+INSERT INTO t1 (d,d0,d1_1,d10_2,d60_10,n,n0_0,n1,n20_4,n65_4,pk) VALUES (-9999999999.0,-9999999999.0,-0.9,-99999999.99,-99999999999999999999999999999999999999999999999999.9999999999,-9999999999.0,-9999999999.0,-9.0,-9999999999999999.9999,-9999999999999999999999999999999999999999999999999999999999999.9999,5);
+SELECT d,d0,d1_1,d10_2,d60_10,n,n0_0,n1,n20_4,n65_4 FROM t1;
+d d0 d1_1 d10_2 d60_10 n n0_0 n1 n20_4 n65_4
+-100 -123456 -0.3 -40000.25 -123456789123456789.1000100000 -1024 -7000 -8 -999999.9000 -9223372036854775807.0000
+-9999999999 -9999999999 -0.9 -99999999.99 -99999999999999999999999999999999999999999999999999.9999999999 -9999999999 -9999999999 -9 -9999999999999999.9999 -9999999999999999999999999999999999999999999999999999999999999.9999
+0 0 0.0 0.00 0.0000000000 0 0 0 0.0000 0.0000
+100 123456 0.3 40000.25 123456789123456789.1000100000 1024 7000 8 999999.9000 9223372036854775807.0000
+9999999999 9999999999 0.9 99999999.99 99999999999999999999999999999999999999999999999999.9999999999 9999999999 9999999999 9 9999999999999999.9999 9999999999999999999999999999999999999999999999999999999999999.9999
+SELECT d,d0,d1_1,d10_2,d60_10,n,n0_0,n1,n20_4,n65_4 FROM t1 WHERE n20_4 = 9999999999999999.9999 OR d < 100;
+d d0 d1_1 d10_2 d60_10 n n0_0 n1 n20_4 n65_4
+-100 -123456 -0.3 -40000.25 -123456789123456789.1000100000 -1024 -7000 -8 -999999.9000 -9223372036854775807.0000
+-9999999999 -9999999999 -0.9 -99999999.99 -99999999999999999999999999999999999999999999999999.9999999999 -9999999999 -9999999999 -9 -9999999999999999.9999 -9999999999999999999999999999999999999999999999999999999999999.9999
+0 0 0.0 0.00 0.0000000000 0 0 0 0.0000 0.0000
+9999999999 9999999999 0.9 99999999.99 99999999999999999999999999999999999999999999999999.9999999999 9999999999 9999999999 9 9999999999999999.9999 9999999999999999999999999999999999999999999999999999999999999.9999
+INSERT INTO t1 (d,d0,d1_1,d10_2,d60_10,n,n0_0,n1,n20_4,n65_4,pk) VALUES (
+9999999999999999999999999999999999999999999999999999999999999.9999,
+9999999999999999999999999999999999999999999999999999999999999.9999,
+9999999999999999999999999999999999999999999999999999999999999.9999,
+9999999999999999999999999999999999999999999999999999999999999.9999,
+9999999999999999999999999999999999999999999999999999999999999.9999,
+9999999999999999999999999999999999999999999999999999999999999.9999,
+9999999999999999999999999999999999999999999999999999999999999.9999,
+9999999999999999999999999999999999999999999999999999999999999.9999,
+9999999999999999999999999999999999999999999999999999999999999.9999,
+9999999999999999999999999999999999999999999999999999999999999.9999,
+6
+);
+Warnings:
+Warning 1264 Out of range value for column 'd' at row 1
+Warning 1264 Out of range value for column 'd0' at row 1
+Warning 1264 Out of range value for column 'd1_1' at row 1
+Warning 1264 Out of range value for column 'd10_2' at row 1
+Warning 1264 Out of range value for column 'd60_10' at row 1
+Warning 1264 Out of range value for column 'n' at row 1
+Warning 1264 Out of range value for column 'n0_0' at row 1
+Warning 1264 Out of range value for column 'n1' at row 1
+Warning 1264 Out of range value for column 'n20_4' at row 1
+SELECT d,d0,d1_1,d10_2,d60_10,n,n0_0,n1,n20_4,n65_4 FROM t1;
+d d0 d1_1 d10_2 d60_10 n n0_0 n1 n20_4 n65_4
+-100 -123456 -0.3 -40000.25 -123456789123456789.1000100000 -1024 -7000 -8 -999999.9000 -9223372036854775807.0000
+-9999999999 -9999999999 -0.9 -99999999.99 -99999999999999999999999999999999999999999999999999.9999999999 -9999999999 -9999999999 -9 -9999999999999999.9999 -9999999999999999999999999999999999999999999999999999999999999.9999
+0 0 0.0 0.00 0.0000000000 0 0 0 0.0000 0.0000
+100 123456 0.3 40000.25 123456789123456789.1000100000 1024 7000 8 999999.9000 9223372036854775807.0000
+9999999999 9999999999 0.9 99999999.99 99999999999999999999999999999999999999999999999999.9999999999 9999999999 9999999999 9 9999999999999999.9999 9999999999999999999999999999999999999999999999999999999999999.9999
+9999999999 9999999999 0.9 99999999.99 99999999999999999999999999999999999999999999999999.9999999999 9999999999 9999999999 9 9999999999999999.9999 9999999999999999999999999999999999999999999999999999999999999.9999
+INSERT INTO t1 (d,d0,d1_1,d10_2,d60_10,n,n0_0,n1,n20_4,n65_4,pk) VALUES (10000000000.0,10000000000.0,1.1,100000000.99,100000000000000000000000000000000000000000000000000.0,10000000000.0,10000000000.0,10.0,10000000000000000.9999,10000000000000000000000000000000000000000000000000000000000000.9999,7);
+Warnings:
+Warning 1264 Out of range value for column 'd' at row 1
+Warning 1264 Out of range value for column 'd0' at row 1
+Warning 1264 Out of range value for column 'd1_1' at row 1
+Warning 1264 Out of range value for column 'd10_2' at row 1
+Warning 1264 Out of range value for column 'd60_10' at row 1
+Warning 1264 Out of range value for column 'n' at row 1
+Warning 1264 Out of range value for column 'n0_0' at row 1
+Warning 1264 Out of range value for column 'n1' at row 1
+Warning 1264 Out of range value for column 'n20_4' at row 1
+Warning 1264 Out of range value for column 'n65_4' at row 1
+SELECT d,d0,d1_1,d10_2,d60_10,n,n0_0,n1,n20_4,n65_4 FROM t1;
+d d0 d1_1 d10_2 d60_10 n n0_0 n1 n20_4 n65_4
+-100 -123456 -0.3 -40000.25 -123456789123456789.1000100000 -1024 -7000 -8 -999999.9000 -9223372036854775807.0000
+-9999999999 -9999999999 -0.9 -99999999.99 -99999999999999999999999999999999999999999999999999.9999999999 -9999999999 -9999999999 -9 -9999999999999999.9999 -9999999999999999999999999999999999999999999999999999999999999.9999
+0 0 0.0 0.00 0.0000000000 0 0 0 0.0000 0.0000
+100 123456 0.3 40000.25 123456789123456789.1000100000 1024 7000 8 999999.9000 9223372036854775807.0000
+9999999999 9999999999 0.9 99999999.99 99999999999999999999999999999999999999999999999999.9999999999 9999999999 9999999999 9 9999999999999999.9999 9999999999999999999999999999999999999999999999999999999999999.9999
+9999999999 9999999999 0.9 99999999.99 99999999999999999999999999999999999999999999999999.9999999999 9999999999 9999999999 9 9999999999999999.9999 9999999999999999999999999999999999999999999999999999999999999.9999
+9999999999 9999999999 0.9 99999999.99 99999999999999999999999999999999999999999999999999.9999999999 9999999999 9999999999 9 9999999999999999.9999 9999999999999999999999999999999999999999999999999999999999999.9999
+INSERT INTO t1 (d,d0,d1_1,d10_2,d60_10,n,n0_0,n1,n20_4,n65_4,pk) VALUES (9999999999.1,9999999999.1,1.9,99999999.001,99999999999999999999999999999999999999999999999999.99999999991,9999999999.1,9999999999.1,9.1,9999999999999999.00001,9999999999999999999999999999999999999999999999999999999999999.11111,8);
+Warnings:
+Note 1265 Data truncated for column 'd' at row 1
+Note 1265 Data truncated for column 'd0' at row 1
+Warning 1264 Out of range value for column 'd1_1' at row 1
+Note 1265 Data truncated for column 'd10_2' at row 1
+Note 1265 Data truncated for column 'd60_10' at row 1
+Note 1265 Data truncated for column 'n' at row 1
+Note 1265 Data truncated for column 'n0_0' at row 1
+Note 1265 Data truncated for column 'n1' at row 1
+Note 1265 Data truncated for column 'n20_4' at row 1
+Note 1265 Data truncated for column 'n65_4' at row 1
+SELECT d,d0,d1_1,d10_2,d60_10,n,n0_0,n1,n20_4,n65_4 FROM t1;
+d d0 d1_1 d10_2 d60_10 n n0_0 n1 n20_4 n65_4
+-100 -123456 -0.3 -40000.25 -123456789123456789.1000100000 -1024 -7000 -8 -999999.9000 -9223372036854775807.0000
+-9999999999 -9999999999 -0.9 -99999999.99 -99999999999999999999999999999999999999999999999999.9999999999 -9999999999 -9999999999 -9 -9999999999999999.9999 -9999999999999999999999999999999999999999999999999999999999999.9999
+0 0 0.0 0.00 0.0000000000 0 0 0 0.0000 0.0000
+100 123456 0.3 40000.25 123456789123456789.1000100000 1024 7000 8 999999.9000 9223372036854775807.0000
+9999999999 9999999999 0.9 99999999.00 99999999999999999999999999999999999999999999999999.9999999999 9999999999 9999999999 9 9999999999999999.0000 9999999999999999999999999999999999999999999999999999999999999.1111
+9999999999 9999999999 0.9 99999999.99 99999999999999999999999999999999999999999999999999.9999999999 9999999999 9999999999 9 9999999999999999.9999 9999999999999999999999999999999999999999999999999999999999999.9999
+9999999999 9999999999 0.9 99999999.99 99999999999999999999999999999999999999999999999999.9999999999 9999999999 9999999999 9 9999999999999999.9999 9999999999999999999999999999999999999999999999999999999999999.9999
+9999999999 9999999999 0.9 99999999.99 99999999999999999999999999999999999999999999999999.9999999999 9999999999 9999999999 9 9999999999999999.9999 9999999999999999999999999999999999999999999999999999999999999.9999
+ALTER TABLE t1 ADD COLUMN n66 NUMERIC(66) NOT NULL;
+ERROR 42000: Too big precision 66 specified for 'n66'. Maximum is 65
+ALTER TABLE t1 ADD COLUMN n66_6 DECIMAL(66,6) NOT NULL;
+ERROR 42000: Too big precision 66 specified for 'n66_6'. Maximum is 65
+ALTER TABLE t1 ADD COLUMN n66_66 DECIMAL(66,66) NOT NULL;
+ERROR 42000: Too big scale 66 specified for 'n66_66'. Maximum is 38
+DROP TABLE t1;
+DROP TABLE IF EXISTS t1;
+#----------------------------------
+# DECIMAL NOT NULL columns without a default
+#----------------------------------
+CREATE TABLE t1 (pk INT AUTO_INCREMENT PRIMARY KEY, c DECIMAL NOT NULL) ENGINE=rocksdb;
+SHOW COLUMNS IN t1;
+Field Type Null Key Default Extra
+pk int(11) NO PRI NULL auto_increment
+c decimal(10,0) NO NULL
+INSERT INTO t1 (c) VALUES (NULL);
+ERROR 23000: Column 'c' cannot be null
+INSERT INTO t1 (c) VALUES (1.1);
+Warnings:
+Note 1265 Data truncated for column 'c' at row 1
+SELECT HEX(c) FROM t1;
+HEX(c)
+1
+DROP TABLE t1;
+#----------------------------------
+# DECIMAL NOT NULL columns with a default
+#----------------------------------
+CREATE TABLE t1 (
+pk INT AUTO_INCREMENT PRIMARY KEY,
+c DECIMAL NOT NULL DEFAULT 1.1
+) ENGINE=rocksdb;
+Warnings:
+Note 1265 Data truncated for column 'c' at row 1
+SHOW COLUMNS IN t1;
+Field Type Null Key Default Extra
+pk int(11) NO PRI NULL auto_increment
+c decimal(10,0) NO 1
+ALTER TABLE t1 ADD COLUMN err DECIMAL NOT NULL DEFAULT NULL;
+ERROR 42000: Invalid default value for 'err'
+INSERT INTO t1 (c) VALUES (NULL);
+ERROR 23000: Column 'c' cannot be null
+INSERT INTO t1 (c) VALUES (1.1);
+Warnings:
+Note 1265 Data truncated for column 'c' at row 1
+INSERT INTO t1 () VALUES ();
+SELECT pk, HEX(c) FROM t1 ORDER BY pk;
+pk HEX(c)
+1 1
+2 1
+DROP TABLE t1;
+DROP TABLE IF EXISTS t1;
+#----------------------------------
+# NUMERIC NOT NULL columns without a default
+#----------------------------------
+CREATE TABLE t1 (pk INT AUTO_INCREMENT PRIMARY KEY, c NUMERIC NOT NULL) ENGINE=rocksdb;
+SHOW COLUMNS IN t1;
+Field Type Null Key Default Extra
+pk int(11) NO PRI NULL auto_increment
+c decimal(10,0) NO NULL
+INSERT INTO t1 (c) VALUES (NULL);
+ERROR 23000: Column 'c' cannot be null
+INSERT INTO t1 (c) VALUES (0);
+SELECT HEX(c) FROM t1;
+HEX(c)
+0
+DROP TABLE t1;
+#----------------------------------
+# NUMERIC NOT NULL columns with a default
+#----------------------------------
+CREATE TABLE t1 (
+pk INT AUTO_INCREMENT PRIMARY KEY,
+c NUMERIC NOT NULL DEFAULT 0
+) ENGINE=rocksdb;
+SHOW COLUMNS IN t1;
+Field Type Null Key Default Extra
+pk int(11) NO PRI NULL auto_increment
+c decimal(10,0) NO 0
+ALTER TABLE t1 ADD COLUMN err NUMERIC NOT NULL DEFAULT NULL;
+ERROR 42000: Invalid default value for 'err'
+INSERT INTO t1 (c) VALUES (NULL);
+ERROR 23000: Column 'c' cannot be null
+INSERT INTO t1 (c) VALUES (0);
+INSERT INTO t1 () VALUES ();
+SELECT pk, HEX(c) FROM t1 ORDER BY pk;
+pk HEX(c)
+1 0
+2 0
+DROP TABLE t1;
+########################
+# Floating point columns (FLOAT, DOUBLE)
+########################
+DROP TABLE IF EXISTS t1;
+CREATE TABLE t1 (
+f FLOAT NOT NULL,
+f0 FLOAT(0) NOT NULL,
+r1_1 REAL(1,1) NOT NULL,
+f23_0 FLOAT(23) NOT NULL,
+f20_3 FLOAT(20,3) NOT NULL,
+d DOUBLE NOT NULL,
+d1_0 DOUBLE(1,0) NOT NULL,
+d10_10 DOUBLE PRECISION (10,10) NOT NULL,
+d53 DOUBLE(53,0) NOT NULL,
+d53_10 DOUBLE(53,10) NOT NULL,
+pk DOUBLE NOT NULL PRIMARY KEY
+) ENGINE=rocksdb;
+SHOW COLUMNS IN t1;
+Field Type Null Key Default Extra
+f float NO NULL
+f0 float NO NULL
+r1_1 double(1,1) NO NULL
+f23_0 float NO NULL
+f20_3 float(20,3) NO NULL
+d double NO NULL
+d1_0 double(1,0) NO NULL
+d10_10 double(10,10) NO NULL
+d53 double(53,0) NO NULL
+d53_10 double(53,10) NO NULL
+pk double NO PRI NULL
+INSERT INTO t1 (f,f0,r1_1,f23_0,f20_3,d,d1_0,d10_10,d53,d53_10,pk) VALUES (12345.12345,12345.12345,0.9,123456789.123,56789.987,11111111.111,8.0,0.0123456789,1234566789123456789,99999999999999999.99999999,1);
+SELECT f,f0,r1_1,f23_0,f20_3,d,d1_0,d10_10,d53,d53_10 FROM t1;
+f 12345.1
+d 11111111.111
+d10_10 0.0123456789
+d1_0 8
+d53 1234566789123456800
+d53_10 100000000000000000.0000000000
+f0 12345.1
+f20_3 56789.988
+f23_0 123457000
+r1_1 0.9
+INSERT INTO t1 (f,f0,r1_1,f23_0,f20_3,d,d1_0,d10_10,d53,d53_10,pk) VALUES (0,0,0,0,0,0,0,0,0,0,2);
+INSERT INTO t1 (f,f0,r1_1,f23_0,f20_3,d,d1_0,d10_10,d53,d53_10,pk) VALUES (
+99999999999999999999999999999999999999,
+99999999999999999999999999999999999999.9999999999999999,
+0.9,
+99999999999999999999999999999999999999.9,
+99999999999999999.999,
+999999999999999999999999999999999999999999999999999999999999999999999999999999999,
+9,
+0.9999999999,
+1999999999999999999999999999999999999999999999999999999,
+19999999999999999999999999999999999999999999.9999999999,
+3
+);
+Warnings:
+Warning 1264 Out of range value for column 'd53' at row 1
+Warning 1264 Out of range value for column 'd53_10' at row 1
+SELECT f,f0,r1_1,f23_0,f20_3,d,d1_0,d10_10,d53,d53_10 FROM t1;
+f 12345.1
+d 0
+d 11111111.111
+d 1e81
+d10_10 0.0000000000
+d10_10 0.0123456789
+d10_10 0.9999999999
+d1_0 0
+d1_0 8
+d1_0 9
+d53 0
+d53 100000000000000000000000000000000000000000000000000000
+d53 1234566789123456800
+d53_10 0.0000000000
+d53_10 100000000000000000.0000000000
+d53_10 10000000000000000000000000000000000000000000.0000000000
+f 0
+f 1e38
+f0 0
+f0 12345.1
+f0 1e38
+f20_3 0.000
+f20_3 56789.988
+f20_3 99999998430674940.000
+f23_0 0
+f23_0 123457000
+f23_0 1e38
+r1_1 0.0
+r1_1 0.9
+r1_1 0.9
+INSERT INTO t1 (f,f0,r1_1,f23_0,f20_3,d,d1_0,d10_10,d53,d53_10,pk) VALUES (-999999999999999999999999,-99999999999.999999999999,-0.9,-999.99999999999999999999,-99999999999999999.999,-999999999999999999999999999999999999999999999999999999999999-0.999,-9,-.9999999999,-999999999999999999999999999999.99999999999999999999999,-9999999999999999999999999999999999999999999.9999999999,4);
+SELECT f,f0,r1_1,f23_0,f20_3,d,d1_0,d10_10,d53,d53_10 FROM t1;
+f 12345.1
+d -1e60
+d 0
+d 11111111.111
+d 1e81
+d10_10 -0.9999999999
+d10_10 0.0000000000
+d10_10 0.0123456789
+d10_10 0.9999999999
+d1_0 -9
+d1_0 0
+d1_0 8
+d1_0 9
+d53 -1000000000000000000000000000000
+d53 0
+d53 100000000000000000000000000000000000000000000000000000
+d53 1234566789123456800
+d53_10 -10000000000000000000000000000000000000000000.0000000000
+d53_10 0.0000000000
+d53_10 100000000000000000.0000000000
+d53_10 10000000000000000000000000000000000000000000.0000000000
+f -1e24
+f 0
+f 1e38
+f0 -100000000000
+f0 0
+f0 12345.1
+f0 1e38
+f20_3 -99999998430674940.000
+f20_3 0.000
+f20_3 56789.988
+f20_3 99999998430674940.000
+f23_0 -1000
+f23_0 0
+f23_0 123457000
+f23_0 1e38
+r1_1 -0.9
+r1_1 0.0
+r1_1 0.9
+r1_1 0.9
+SELECT
+CONCAT('', MAX(f)),
+CONCAT('', MAX(f0)),
+CONCAT('', MAX(r1_1)),
+CONCAT('', MAX(f23_0)),
+CONCAT('', MAX(f20_3)),
+CONCAT('', MAX(d)),
+CONCAT('', MAX(d1_0)),
+CONCAT('', MAX(d10_10)),
+CONCAT('', MAX(d53)),
+CONCAT('', MAX(d53_10)) FROM t1;
+CONCAT('', MAX(f)) 9.999999680285692e37
+CONCAT('', MAX(d)) 1e81
+CONCAT('', MAX(d10_10)) 0.9999999999
+CONCAT('', MAX(d1_0)) 9
+CONCAT('', MAX(d53)) 100000000000000000000000000000000000000000000000000000
+CONCAT('', MAX(d53_10)) 10000000000000000000000000000000000000000000.0000000000
+CONCAT('', MAX(f0)) 9.999999680285692e37
+CONCAT('', MAX(f20_3)) 99999998430674940.000
+CONCAT('', MAX(f23_0)) 9.999999680285692e37
+CONCAT('', MAX(r1_1)) 0.9
+INSERT INTO t1 (f,f0,r1_1,f23_0,f20_3,d,d1_0,d10_10,d53,d53_10,pk) VALUES (
+9999999999999999999999999999999999999999999999999999999999999.9999,
+9999999999999999999999999999999999999999999999999999999999999.9999,
+9999999999999999999999999999999999999999999999999999999999999.9999,
+9999999999999999999999999999999999999999999999999999999999999.9999,
+9999999999999999999999999999999999999999999999999999999999999.9999,
+9999999999999999999999999999999999999999999999999999999999999.9999,
+9999999999999999999999999999999999999999999999999999999999999.9999,
+9999999999999999999999999999999999999999999999999999999999999.9999,
+9999999999999999999999999999999999999999999999999999999999999.9999,
+9999999999999999999999999999999999999999999999999999999999999.9999,
+5
+);
+Warnings:
+Warning 1264 Out of range value for column 'f' at row 1
+Warning 1264 Out of range value for column 'f0' at row 1
+Warning 1264 Out of range value for column 'r1_1' at row 1
+Warning 1264 Out of range value for column 'f23_0' at row 1
+Warning 1264 Out of range value for column 'f20_3' at row 1
+Warning 1264 Out of range value for column 'd1_0' at row 1
+Warning 1264 Out of range value for column 'd10_10' at row 1
+Warning 1264 Out of range value for column 'd53' at row 1
+Warning 1264 Out of range value for column 'd53_10' at row 1
+SELECT f,f0,r1_1,f23_0,f20_3,d,d1_0,d10_10,d53,d53_10 FROM t1;
+f 12345.1
+d -1e60
+d 0
+d 11111111.111
+d 1e61
+d 1e81
+d10_10 -0.9999999999
+d10_10 0.0000000000
+d10_10 0.0123456789
+d10_10 0.9999999999
+d10_10 0.9999999999
+d1_0 -9
+d1_0 0
+d1_0 8
+d1_0 9
+d1_0 9
+d53 -1000000000000000000000000000000
+d53 0
+d53 100000000000000000000000000000000000000000000000000000
+d53 100000000000000000000000000000000000000000000000000000
+d53 1234566789123456800
+d53_10 -10000000000000000000000000000000000000000000.0000000000
+d53_10 0.0000000000
+d53_10 100000000000000000.0000000000
+d53_10 10000000000000000000000000000000000000000000.0000000000
+d53_10 10000000000000000000000000000000000000000000.0000000000
+f -1e24
+f 0
+f 1e38
+f 3.40282e38
+f0 -100000000000
+f0 0
+f0 12345.1
+f0 1e38
+f0 3.40282e38
+f20_3 -99999998430674940.000
+f20_3 0.000
+f20_3 56789.988
+f20_3 99999998430674940.000
+f20_3 99999998430674940.000
+f23_0 -1000
+f23_0 0
+f23_0 123457000
+f23_0 1e38
+f23_0 3.40282e38
+r1_1 -0.9
+r1_1 0.0
+r1_1 0.9
+r1_1 0.9
+r1_1 0.9
+INSERT INTO t1 (f,f0,r1_1,f23_0,f20_3,d,d1_0,d10_10,d53,d53_10,pk) VALUES (
+999999999999999999999999999999999999999,
+999999999999999999999999999999999999999.9999999999999999,
+1.9,
+999999999999999999999999999999999999999.9,
+999999999999999999.999,
+9999999999999999999999999999999999999999999999999999999999999999999999999999999999,
+99,
+1.9999999999,
+1999999999999999999999999999999999999999999999999999999,
+19999999999999999999999999999999999999999999.9999999999,
+6
+);
+Warnings:
+Warning 1916 Got overflow when converting '' to DECIMAL. Value truncated
+Warning 1264 Out of range value for column 'f' at row 1
+Warning 1264 Out of range value for column 'f0' at row 1
+Warning 1264 Out of range value for column 'r1_1' at row 1
+Warning 1264 Out of range value for column 'f23_0' at row 1
+Warning 1264 Out of range value for column 'f20_3' at row 1
+Warning 1264 Out of range value for column 'd1_0' at row 1
+Warning 1264 Out of range value for column 'd10_10' at row 1
+Warning 1264 Out of range value for column 'd53' at row 1
+Warning 1264 Out of range value for column 'd53_10' at row 1
+SELECT f,f0,r1_1,f23_0,f20_3,d,d1_0,d10_10,d53,d53_10 FROM t1;
+f 12345.1
+d -1e60
+d 0
+d 11111111.111
+d 1e61
+d 1e65
+d 1e81
+d10_10 -0.9999999999
+d10_10 0.0000000000
+d10_10 0.0123456789
+d10_10 0.9999999999
+d10_10 0.9999999999
+d10_10 0.9999999999
+d1_0 -9
+d1_0 0
+d1_0 8
+d1_0 9
+d1_0 9
+d1_0 9
+d53 -1000000000000000000000000000000
+d53 0
+d53 100000000000000000000000000000000000000000000000000000
+d53 100000000000000000000000000000000000000000000000000000
+d53 100000000000000000000000000000000000000000000000000000
+d53 1234566789123456800
+d53_10 -10000000000000000000000000000000000000000000.0000000000
+d53_10 0.0000000000
+d53_10 100000000000000000.0000000000
+d53_10 10000000000000000000000000000000000000000000.0000000000
+d53_10 10000000000000000000000000000000000000000000.0000000000
+d53_10 10000000000000000000000000000000000000000000.0000000000
+f -1e24
+f 0
+f 1e38
+f 3.40282e38
+f 3.40282e38
+f0 -100000000000
+f0 0
+f0 12345.1
+f0 1e38
+f0 3.40282e38
+f0 3.40282e38
+f20_3 -99999998430674940.000
+f20_3 0.000
+f20_3 56789.988
+f20_3 99999998430674940.000
+f20_3 99999998430674940.000
+f20_3 99999998430674940.000
+f23_0 -1000
+f23_0 0
+f23_0 123457000
+f23_0 1e38
+f23_0 3.40282e38
+f23_0 3.40282e38
+r1_1 -0.9
+r1_1 0.0
+r1_1 0.9
+r1_1 0.9
+r1_1 0.9
+r1_1 0.9
+ALTER TABLE t1 ADD COLUMN d0_0 DOUBLE(0,0) NOT NULL;
+ALTER TABLE t1 ADD COLUMN n66_6 DECIMAL(256,1) NOT NULL;
+ERROR 42000: Too big precision 256 specified for 'n66_6'. Maximum is 65
+ALTER TABLE t1 ADD COLUMN n66_66 DECIMAL(40,35) NOT NULL;
+DROP TABLE t1;
+DROP TABLE IF EXISTS t1;
+#----------------------------------
+# FLOAT NOT NULL columns without a default
+#----------------------------------
+CREATE TABLE t1 (pk INT AUTO_INCREMENT PRIMARY KEY, c FLOAT NOT NULL) ENGINE=rocksdb;
+SHOW COLUMNS IN t1;
+Field Type Null Key Default Extra
+pk int(11) NO PRI NULL auto_increment
+c float NO NULL
+INSERT INTO t1 (c) VALUES (NULL);
+ERROR 23000: Column 'c' cannot be null
+INSERT INTO t1 (c) VALUES (1.1);
+SELECT HEX(c) FROM t1;
+HEX(c)
+1
+DROP TABLE t1;
+#----------------------------------
+# FLOAT NOT NULL columns with a default
+#----------------------------------
+CREATE TABLE t1 (
+pk INT AUTO_INCREMENT PRIMARY KEY,
+c FLOAT NOT NULL DEFAULT 1.1
+) ENGINE=rocksdb;
+SHOW COLUMNS IN t1;
+Field Type Null Key Default Extra
+pk int(11) NO PRI NULL auto_increment
+c float NO 1.1
+ALTER TABLE t1 ADD COLUMN err FLOAT NOT NULL DEFAULT NULL;
+ERROR 42000: Invalid default value for 'err'
+INSERT INTO t1 (c) VALUES (NULL);
+ERROR 23000: Column 'c' cannot be null
+INSERT INTO t1 (c) VALUES (1.1);
+INSERT INTO t1 () VALUES ();
+SELECT pk, HEX(c) FROM t1 ORDER BY pk;
+pk HEX(c)
+1 1
+2 1
+DROP TABLE t1;
+DROP TABLE IF EXISTS t1;
+#----------------------------------
+# DOUBLE NOT NULL columns without a default
+#----------------------------------
+CREATE TABLE t1 (pk INT AUTO_INCREMENT PRIMARY KEY, c DOUBLE NOT NULL) ENGINE=rocksdb;
+SHOW COLUMNS IN t1;
+Field Type Null Key Default Extra
+pk int(11) NO PRI NULL auto_increment
+c double NO NULL
+INSERT INTO t1 (c) VALUES (NULL);
+ERROR 23000: Column 'c' cannot be null
+INSERT INTO t1 (c) VALUES (0);
+SELECT HEX(c) FROM t1;
+HEX(c)
+0
+DROP TABLE t1;
+#----------------------------------
+# DOUBLE NOT NULL columns with a default
+#----------------------------------
+CREATE TABLE t1 (
+pk INT AUTO_INCREMENT PRIMARY KEY,
+c DOUBLE NOT NULL DEFAULT 0
+) ENGINE=rocksdb;
+SHOW COLUMNS IN t1;
+Field Type Null Key Default Extra
+pk int(11) NO PRI NULL auto_increment
+c double NO 0
+ALTER TABLE t1 ADD COLUMN err DOUBLE NOT NULL DEFAULT NULL;
+ERROR 42000: Invalid default value for 'err'
+INSERT INTO t1 (c) VALUES (NULL);
+ERROR 23000: Column 'c' cannot be null
+INSERT INTO t1 (c) VALUES (0);
+INSERT INTO t1 () VALUES ();
+SELECT pk, HEX(c) FROM t1 ORDER BY pk;
+pk HEX(c)
+1 0
+2 0
+DROP TABLE t1;
+########################
+# INT columns
+########################
+DROP TABLE IF EXISTS t1;
+CREATE TABLE t1 (
+i INT NOT NULL,
+i0 INT(0) NOT NULL,
+i1 INT(1) NOT NULL,
+i20 INT(20) NOT NULL,
+t TINYINT NOT NULL,
+t0 TINYINT(0) NOT NULL,
+t1 TINYINT(1) NOT NULL,
+t20 TINYINT(20) NOT NULL,
+s SMALLINT NOT NULL,
+s0 SMALLINT(0) NOT NULL,
+s1 SMALLINT(1) NOT NULL,
+s20 SMALLINT(20) NOT NULL,
+m MEDIUMINT NOT NULL,
+m0 MEDIUMINT(0) NOT NULL,
+m1 MEDIUMINT(1) NOT NULL,
+m20 MEDIUMINT(20) NOT NULL,
+b BIGINT NOT NULL,
+b0 BIGINT(0) NOT NULL,
+b1 BIGINT(1) NOT NULL,
+b20 BIGINT(20) NOT NULL,
+pk INT AUTO_INCREMENT PRIMARY KEY
+) ENGINE=rocksdb;
+SHOW COLUMNS IN t1;
+Field Type Null Key Default Extra
+i int(11) NO NULL
+i0 int(11) NO NULL
+i1 int(1) NO NULL
+i20 int(20) NO NULL
+t tinyint(4) NO NULL
+t0 tinyint(4) NO NULL
+t1 tinyint(1) NO NULL
+t20 tinyint(20) NO NULL
+s smallint(6) NO NULL
+s0 smallint(6) NO NULL
+s1 smallint(1) NO NULL
+s20 smallint(20) NO NULL
+m mediumint(9) NO NULL
+m0 mediumint(9) NO NULL
+m1 mediumint(1) NO NULL
+m20 mediumint(20) NO NULL
+b bigint(20) NO NULL
+b0 bigint(20) NO NULL
+b1 bigint(1) NO NULL
+b20 bigint(20) NO NULL
+pk int(11) NO PRI NULL auto_increment
+INSERT INTO t1 (i,i0,i1,i20,t,t0,t1,t20,s,s0,s1,s20,m,m0,m1,m20,b,b0,b1,b20) VALUES (1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20);
+INSERT INTO t1 (i,i0,i1,i20,t,t0,t1,t20,s,s0,s1,s20,m,m0,m1,m20,b,b0,b1,b20) VALUES (0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0);
+INSERT INTO t1 (i,i0,i1,i20,t,t0,t1,t20,s,s0,s1,s20,m,m0,m1,m20,b,b0,b1,b20) VALUES (2147483647,2147483647,2147483647,2147483647,127,127,127,127,32767,32767,32767,32767,8388607,8388607,8388607,8388607,9223372036854775807,9223372036854775807,9223372036854775807,9223372036854775807);
+SELECT i,i0,i1,i20,t,t0,t1,t20,s,s0,s1,s20,m,m0,m1,m20,b,b0,b1,b20 FROM t1;
+i i0 i1 i20 t t0 t1 t20 s s0 s1 s20 m m0 m1 m20 b b0 b1 b20
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
+2147483647 2147483647 2147483647 2147483647 127 127 127 127 32767 32767 32767 32767 8388607 8388607 8388607 8388607 9223372036854775807 9223372036854775807 9223372036854775807 9223372036854775807
+INSERT INTO t1 (i,i0,i1,i20,t,t0,t1,t20,s,s0,s1,s20,m,m0,m1,m20,b,b0,b1,b20) VALUES (-2147483648,-2147483648,-2147483648,-2147483648,-128,-128,-128,-128,-32768,-32768,-32768,-32768,-8388608,-8388608,-8388608,-8388608,-9223372036854775808,-9223372036854775808,-9223372036854775808,-9223372036854775808);
+INSERT INTO t1 (i,i0,i1,i20,t,t0,t1,t20,s,s0,s1,s20,m,m0,m1,m20,b,b0,b1,b20) VALUES (4294967295,4294967295,4294967295,4294967295,255,255,255,255,65535,65535,65535,65535,16777215,16777215,16777215,16777215,18446744073709551615,18446744073709551615,18446744073709551615,18446744073709551615);
+Warnings:
+Warning 1264 Out of range value for column 'i' at row 1
+Warning 1264 Out of range value for column 'i0' at row 1
+Warning 1264 Out of range value for column 'i1' at row 1
+Warning 1264 Out of range value for column 'i20' at row 1
+Warning 1264 Out of range value for column 't' at row 1
+Warning 1264 Out of range value for column 't0' at row 1
+Warning 1264 Out of range value for column 't1' at row 1
+Warning 1264 Out of range value for column 't20' at row 1
+Warning 1264 Out of range value for column 's' at row 1
+Warning 1264 Out of range value for column 's0' at row 1
+Warning 1264 Out of range value for column 's1' at row 1
+Warning 1264 Out of range value for column 's20' at row 1
+Warning 1264 Out of range value for column 'm' at row 1
+Warning 1264 Out of range value for column 'm0' at row 1
+Warning 1264 Out of range value for column 'm1' at row 1
+Warning 1264 Out of range value for column 'm20' at row 1
+Warning 1264 Out of range value for column 'b' at row 1
+Warning 1264 Out of range value for column 'b0' at row 1
+Warning 1264 Out of range value for column 'b1' at row 1
+Warning 1264 Out of range value for column 'b20' at row 1
+SELECT i,i0,i1,i20,t,t0,t1,t20,s,s0,s1,s20,m,m0,m1,m20,b,b0,b1,b20 FROM t1;
+i i0 i1 i20 t t0 t1 t20 s s0 s1 s20 m m0 m1 m20 b b0 b1 b20
+-2147483648 -2147483648 -2147483648 -2147483648 -128 -128 -128 -128 -32768 -32768 -32768 -32768 -8388608 -8388608 -8388608 -8388608 -9223372036854775808 -9223372036854775808 -9223372036854775808 -9223372036854775808
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
+2147483647 2147483647 2147483647 2147483647 127 127 127 127 32767 32767 32767 32767 8388607 8388607 8388607 8388607 9223372036854775807 9223372036854775807 9223372036854775807 9223372036854775807
+2147483647 2147483647 2147483647 2147483647 127 127 127 127 32767 32767 32767 32767 8388607 8388607 8388607 8388607 9223372036854775807 9223372036854775807 9223372036854775807 9223372036854775807
+INSERT INTO t1 (i,i0,i1,i20,t,t0,t1,t20,s,s0,s1,s20,m,m0,m1,m20,b,b0,b1,b20) VALUES (-2147483649,-2147483649,-2147483649,-2147483649,-129,-129,-129,-129,-32769,-32769,-32769,-32769,-8388609,-8388609,-8388609,-8388609,-9223372036854775809,-9223372036854775809,-9223372036854775809,-9223372036854775809);
+Warnings:
+Warning 1264 Out of range value for column 'i' at row 1
+Warning 1264 Out of range value for column 'i0' at row 1
+Warning 1264 Out of range value for column 'i1' at row 1
+Warning 1264 Out of range value for column 'i20' at row 1
+Warning 1264 Out of range value for column 't' at row 1
+Warning 1264 Out of range value for column 't0' at row 1
+Warning 1264 Out of range value for column 't1' at row 1
+Warning 1264 Out of range value for column 't20' at row 1
+Warning 1264 Out of range value for column 's' at row 1
+Warning 1264 Out of range value for column 's0' at row 1
+Warning 1264 Out of range value for column 's1' at row 1
+Warning 1264 Out of range value for column 's20' at row 1
+Warning 1264 Out of range value for column 'm' at row 1
+Warning 1264 Out of range value for column 'm0' at row 1
+Warning 1264 Out of range value for column 'm1' at row 1
+Warning 1264 Out of range value for column 'm20' at row 1
+Warning 1264 Out of range value for column 'b' at row 1
+Warning 1264 Out of range value for column 'b0' at row 1
+Warning 1264 Out of range value for column 'b1' at row 1
+Warning 1264 Out of range value for column 'b20' at row 1
+INSERT INTO t1 (i,i0,i1,i20,t,t0,t1,t20,s,s0,s1,s20,m,m0,m1,m20,b,b0,b1,b20) VALUES (4294967296,4294967296,4294967296,4294967296,256,256,256,256,65536,65536,65536,65536,16777216,16777216,16777216,16777216,18446744073709551616,18446744073709551616,18446744073709551616,18446744073709551616);
+Warnings:
+Warning 1264 Out of range value for column 'i' at row 1
+Warning 1264 Out of range value for column 'i0' at row 1
+Warning 1264 Out of range value for column 'i1' at row 1
+Warning 1264 Out of range value for column 'i20' at row 1
+Warning 1264 Out of range value for column 't' at row 1
+Warning 1264 Out of range value for column 't0' at row 1
+Warning 1264 Out of range value for column 't1' at row 1
+Warning 1264 Out of range value for column 't20' at row 1
+Warning 1264 Out of range value for column 's' at row 1
+Warning 1264 Out of range value for column 's0' at row 1
+Warning 1264 Out of range value for column 's1' at row 1
+Warning 1264 Out of range value for column 's20' at row 1
+Warning 1264 Out of range value for column 'm' at row 1
+Warning 1264 Out of range value for column 'm0' at row 1
+Warning 1264 Out of range value for column 'm1' at row 1
+Warning 1264 Out of range value for column 'm20' at row 1
+Warning 1264 Out of range value for column 'b' at row 1
+Warning 1264 Out of range value for column 'b0' at row 1
+Warning 1264 Out of range value for column 'b1' at row 1
+Warning 1264 Out of range value for column 'b20' at row 1
+INSERT INTO t1 (i,i0,i1,i20,t,t0,t1,t20,s,s0,s1,s20,m,m0,m1,m20,b,b0,b1,b20) SELECT b,b,b,b,b,b,b,b,b,b,b,b,b,b,b,b,b,b,b,b FROM t1 WHERE b IN (-9223372036854775808,9223372036854775807,18446744073709551615);
+Warnings:
+Warning 1264 Out of range value for column 'i' at row 8
+Warning 1264 Out of range value for column 'i0' at row 8
+Warning 1264 Out of range value for column 'i1' at row 8
+Warning 1264 Out of range value for column 'i20' at row 8
+Warning 1264 Out of range value for column 't' at row 8
+Warning 1264 Out of range value for column 't0' at row 8
+Warning 1264 Out of range value for column 't1' at row 8
+Warning 1264 Out of range value for column 't20' at row 8
+Warning 1264 Out of range value for column 's' at row 8
+Warning 1264 Out of range value for column 's0' at row 8
+Warning 1264 Out of range value for column 's1' at row 8
+Warning 1264 Out of range value for column 's20' at row 8
+Warning 1264 Out of range value for column 'm' at row 8
+Warning 1264 Out of range value for column 'm0' at row 8
+Warning 1264 Out of range value for column 'm1' at row 8
+Warning 1264 Out of range value for column 'm20' at row 8
+Warning 1264 Out of range value for column 'i' at row 9
+Warning 1264 Out of range value for column 'i0' at row 9
+Warning 1264 Out of range value for column 'i1' at row 9
+Warning 1264 Out of range value for column 'i20' at row 9
+Warning 1264 Out of range value for column 't' at row 9
+Warning 1264 Out of range value for column 't0' at row 9
+Warning 1264 Out of range value for column 't1' at row 9
+Warning 1264 Out of range value for column 't20' at row 9
+Warning 1264 Out of range value for column 's' at row 9
+Warning 1264 Out of range value for column 's0' at row 9
+Warning 1264 Out of range value for column 's1' at row 9
+Warning 1264 Out of range value for column 's20' at row 9
+Warning 1264 Out of range value for column 'm' at row 9
+Warning 1264 Out of range value for column 'm0' at row 9
+Warning 1264 Out of range value for column 'm1' at row 9
+Warning 1264 Out of range value for column 'm20' at row 9
+Warning 1264 Out of range value for column 'i' at row 10
+Warning 1264 Out of range value for column 'i0' at row 10
+Warning 1264 Out of range value for column 'i1' at row 10
+Warning 1264 Out of range value for column 'i20' at row 10
+Warning 1264 Out of range value for column 't' at row 10
+Warning 1264 Out of range value for column 't0' at row 10
+Warning 1264 Out of range value for column 't1' at row 10
+Warning 1264 Out of range value for column 't20' at row 10
+Warning 1264 Out of range value for column 's' at row 10
+Warning 1264 Out of range value for column 's0' at row 10
+Warning 1264 Out of range value for column 's1' at row 10
+Warning 1264 Out of range value for column 's20' at row 10
+Warning 1264 Out of range value for column 'm' at row 10
+Warning 1264 Out of range value for column 'm0' at row 10
+Warning 1264 Out of range value for column 'm1' at row 10
+Warning 1264 Out of range value for column 'm20' at row 10
+Warning 1264 Out of range value for column 'i' at row 11
+Warning 1264 Out of range value for column 'i0' at row 11
+Warning 1264 Out of range value for column 'i1' at row 11
+Warning 1264 Out of range value for column 'i20' at row 11
+Warning 1264 Out of range value for column 't' at row 11
+Warning 1264 Out of range value for column 't0' at row 11
+Warning 1264 Out of range value for column 't1' at row 11
+Warning 1264 Out of range value for column 't20' at row 11
+Warning 1264 Out of range value for column 's' at row 11
+Warning 1264 Out of range value for column 's0' at row 11
+Warning 1264 Out of range value for column 's1' at row 11
+Warning 1264 Out of range value for column 's20' at row 11
+Warning 1264 Out of range value for column 'm' at row 11
+Warning 1264 Out of range value for column 'm0' at row 11
+Warning 1264 Out of range value for column 'm1' at row 11
+Warning 1264 Out of range value for column 'm20' at row 11
+SELECT i,i0,i1,i20,t,t0,t1,t20,s,s0,s1,s20,m,m0,m1,m20,b,b0,b1,b20 FROM t1;
+i i0 i1 i20 t t0 t1 t20 s s0 s1 s20 m m0 m1 m20 b b0 b1 b20
+-2147483648 -2147483648 -2147483648 -2147483648 -128 -128 -128 -128 -32768 -32768 -32768 -32768 -8388608 -8388608 -8388608 -8388608 -9223372036854775808 -9223372036854775808 -9223372036854775808 -9223372036854775808
+-2147483648 -2147483648 -2147483648 -2147483648 -128 -128 -128 -128 -32768 -32768 -32768 -32768 -8388608 -8388608 -8388608 -8388608 -9223372036854775808 -9223372036854775808 -9223372036854775808 -9223372036854775808
+-2147483648 -2147483648 -2147483648 -2147483648 -128 -128 -128 -128 -32768 -32768 -32768 -32768 -8388608 -8388608 -8388608 -8388608 -9223372036854775808 -9223372036854775808 -9223372036854775808 -9223372036854775808
+-2147483648 -2147483648 -2147483648 -2147483648 -128 -128 -128 -128 -32768 -32768 -32768 -32768 -8388608 -8388608 -8388608 -8388608 -9223372036854775808 -9223372036854775808 -9223372036854775808 -9223372036854775808
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
+2147483647 2147483647 2147483647 2147483647 127 127 127 127 32767 32767 32767 32767 8388607 8388607 8388607 8388607 9223372036854775807 9223372036854775807 9223372036854775807 9223372036854775807
+2147483647 2147483647 2147483647 2147483647 127 127 127 127 32767 32767 32767 32767 8388607 8388607 8388607 8388607 9223372036854775807 9223372036854775807 9223372036854775807 9223372036854775807
+2147483647 2147483647 2147483647 2147483647 127 127 127 127 32767 32767 32767 32767 8388607 8388607 8388607 8388607 9223372036854775807 9223372036854775807 9223372036854775807 9223372036854775807
+2147483647 2147483647 2147483647 2147483647 127 127 127 127 32767 32767 32767 32767 8388607 8388607 8388607 8388607 9223372036854775807 9223372036854775807 9223372036854775807 9223372036854775807
+2147483647 2147483647 2147483647 2147483647 127 127 127 127 32767 32767 32767 32767 8388607 8388607 8388607 8388607 9223372036854775807 9223372036854775807 9223372036854775807 9223372036854775807
+2147483647 2147483647 2147483647 2147483647 127 127 127 127 32767 32767 32767 32767 8388607 8388607 8388607 8388607 9223372036854775807 9223372036854775807 9223372036854775807 9223372036854775807
+ALTER TABLE t1 ADD COLUMN i257 INT(257) NOT NULL;
+ERROR 42000: Display width out of range for 'i257' (max = 255)
+DROP TABLE t1;
+DROP TABLE IF EXISTS t1;
+#----------------------------------
+# INT NOT NULL columns without a default
+#----------------------------------
+CREATE TABLE t1 (pk INT AUTO_INCREMENT PRIMARY KEY, c INT NOT NULL) ENGINE=rocksdb;
+SHOW COLUMNS IN t1;
+Field Type Null Key Default Extra
+pk int(11) NO PRI NULL auto_increment
+c int(11) NO NULL
+INSERT INTO t1 (c) VALUES (NULL);
+ERROR 23000: Column 'c' cannot be null
+INSERT INTO t1 (c) VALUES (2147483647);
+SELECT HEX(c) FROM t1;
+HEX(c)
+7FFFFFFF
+DROP TABLE t1;
+#----------------------------------
+# INT NOT NULL columns with a default
+#----------------------------------
+CREATE TABLE t1 (
+pk INT AUTO_INCREMENT PRIMARY KEY,
+c INT NOT NULL DEFAULT 2147483647
+) ENGINE=rocksdb;
+SHOW COLUMNS IN t1;
+Field Type Null Key Default Extra
+pk int(11) NO PRI NULL auto_increment
+c int(11) NO 2147483647
+ALTER TABLE t1 ADD COLUMN err INT NOT NULL DEFAULT NULL;
+ERROR 42000: Invalid default value for 'err'
+INSERT INTO t1 (c) VALUES (NULL);
+ERROR 23000: Column 'c' cannot be null
+INSERT INTO t1 (c) VALUES (2147483647);
+INSERT INTO t1 () VALUES ();
+SELECT pk, HEX(c) FROM t1 ORDER BY pk;
+pk HEX(c)
+1 7FFFFFFF
+2 7FFFFFFF
+DROP TABLE t1;
+DROP TABLE IF EXISTS t1;
+#----------------------------------
+# TINYINT NOT NULL columns without a default
+#----------------------------------
+CREATE TABLE t1 (pk INT AUTO_INCREMENT PRIMARY KEY, c TINYINT NOT NULL) ENGINE=rocksdb;
+SHOW COLUMNS IN t1;
+Field Type Null Key Default Extra
+pk int(11) NO PRI NULL auto_increment
+c tinyint(4) NO NULL
+INSERT INTO t1 (c) VALUES (NULL);
+ERROR 23000: Column 'c' cannot be null
+INSERT INTO t1 (c) VALUES (127);
+SELECT HEX(c) FROM t1;
+HEX(c)
+7F
+DROP TABLE t1;
+#----------------------------------
+# TINYINT NOT NULL columns with a default
+#----------------------------------
+CREATE TABLE t1 (
+pk INT AUTO_INCREMENT PRIMARY KEY,
+c TINYINT NOT NULL DEFAULT 127
+) ENGINE=rocksdb;
+SHOW COLUMNS IN t1;
+Field Type Null Key Default Extra
+pk int(11) NO PRI NULL auto_increment
+c tinyint(4) NO 127
+ALTER TABLE t1 ADD COLUMN err TINYINT NOT NULL DEFAULT NULL;
+ERROR 42000: Invalid default value for 'err'
+INSERT INTO t1 (c) VALUES (NULL);
+ERROR 23000: Column 'c' cannot be null
+INSERT INTO t1 (c) VALUES (127);
+INSERT INTO t1 () VALUES ();
+SELECT pk, HEX(c) FROM t1 ORDER BY pk;
+pk HEX(c)
+1 7F
+2 7F
+DROP TABLE t1;
+DROP TABLE IF EXISTS t1;
+#----------------------------------
+# SMALLINT NOT NULL columns without a default
+#----------------------------------
+CREATE TABLE t1 (pk INT AUTO_INCREMENT PRIMARY KEY, c SMALLINT NOT NULL) ENGINE=rocksdb;
+SHOW COLUMNS IN t1;
+Field Type Null Key Default Extra
+pk int(11) NO PRI NULL auto_increment
+c smallint(6) NO NULL
+INSERT INTO t1 (c) VALUES (NULL);
+ERROR 23000: Column 'c' cannot be null
+INSERT INTO t1 (c) VALUES (0);
+SELECT HEX(c) FROM t1;
+HEX(c)
+0
+DROP TABLE t1;
+#----------------------------------
+# SMALLINT NOT NULL columns with a default
+#----------------------------------
+CREATE TABLE t1 (
+pk INT AUTO_INCREMENT PRIMARY KEY,
+c SMALLINT NOT NULL DEFAULT 0
+) ENGINE=rocksdb;
+SHOW COLUMNS IN t1;
+Field Type Null Key Default Extra
+pk int(11) NO PRI NULL auto_increment
+c smallint(6) NO 0
+ALTER TABLE t1 ADD COLUMN err SMALLINT NOT NULL DEFAULT NULL;
+ERROR 42000: Invalid default value for 'err'
+INSERT INTO t1 (c) VALUES (NULL);
+ERROR 23000: Column 'c' cannot be null
+INSERT INTO t1 (c) VALUES (0);
+INSERT INTO t1 () VALUES ();
+SELECT pk, HEX(c) FROM t1 ORDER BY pk;
+pk HEX(c)
+1 0
+2 0
+DROP TABLE t1;
+DROP TABLE IF EXISTS t1;
+#----------------------------------
+# MEDIUMINT NOT NULL columns without a default
+#----------------------------------
+CREATE TABLE t1 (pk INT AUTO_INCREMENT PRIMARY KEY, c MEDIUMINT NOT NULL) ENGINE=rocksdb;
+SHOW COLUMNS IN t1;
+Field Type Null Key Default Extra
+pk int(11) NO PRI NULL auto_increment
+c mediumint(9) NO NULL
+INSERT INTO t1 (c) VALUES (NULL);
+ERROR 23000: Column 'c' cannot be null
+INSERT INTO t1 (c) VALUES (1);
+SELECT HEX(c) FROM t1;
+HEX(c)
+1
+DROP TABLE t1;
+#----------------------------------
+# MEDIUMINT NOT NULL columns with a default
+#----------------------------------
+CREATE TABLE t1 (
+pk INT AUTO_INCREMENT PRIMARY KEY,
+c MEDIUMINT NOT NULL DEFAULT 1
+) ENGINE=rocksdb;
+SHOW COLUMNS IN t1;
+Field Type Null Key Default Extra
+pk int(11) NO PRI NULL auto_increment
+c mediumint(9) NO 1
+ALTER TABLE t1 ADD COLUMN err MEDIUMINT NOT NULL DEFAULT NULL;
+ERROR 42000: Invalid default value for 'err'
+INSERT INTO t1 (c) VALUES (NULL);
+ERROR 23000: Column 'c' cannot be null
+INSERT INTO t1 (c) VALUES (1);
+INSERT INTO t1 () VALUES ();
+SELECT pk, HEX(c) FROM t1 ORDER BY pk;
+pk HEX(c)
+1 1
+2 1
+DROP TABLE t1;
+DROP TABLE IF EXISTS t1;
+#----------------------------------
+# BIGINT NOT NULL columns without a default
+#----------------------------------
+CREATE TABLE t1 (pk INT AUTO_INCREMENT PRIMARY KEY, c BIGINT NOT NULL) ENGINE=rocksdb;
+SHOW COLUMNS IN t1;
+Field Type Null Key Default Extra
+pk int(11) NO PRI NULL auto_increment
+c bigint(20) NO NULL
+INSERT INTO t1 (c) VALUES (NULL);
+ERROR 23000: Column 'c' cannot be null
+INSERT INTO t1 (c) VALUES (9223372036854775807);
+SELECT HEX(c) FROM t1;
+HEX(c)
+7FFFFFFFFFFFFFFF
+DROP TABLE t1;
+#----------------------------------
+# BIGINT NOT NULL columns with a default
+#----------------------------------
+CREATE TABLE t1 (
+pk INT AUTO_INCREMENT PRIMARY KEY,
+c BIGINT NOT NULL DEFAULT 9223372036854775807
+) ENGINE=rocksdb;
+SHOW COLUMNS IN t1;
+Field Type Null Key Default Extra
+pk int(11) NO PRI NULL auto_increment
+c bigint(20) NO 9223372036854775807
+ALTER TABLE t1 ADD COLUMN err BIGINT NOT NULL DEFAULT NULL;
+ERROR 42000: Invalid default value for 'err'
+INSERT INTO t1 (c) VALUES (NULL);
+ERROR 23000: Column 'c' cannot be null
+INSERT INTO t1 (c) VALUES (9223372036854775807);
+INSERT INTO t1 () VALUES ();
+SELECT pk, HEX(c) FROM t1 ORDER BY pk;
+pk HEX(c)
+1 7FFFFFFFFFFFFFFF
+2 7FFFFFFFFFFFFFFF
+DROP TABLE t1;
+########################
+# SET columns
+########################
+DROP TABLE IF EXISTS t1;
+CREATE TABLE t1 (
+a SET('') NOT NULL,
+b SET('test1','test2','test3','test4','test5') NOT NULL,
+c SET('01','02','03','04','05','06','07','08','09','10','11','12','13','14','15','16','17','18','19','20','21','22','23','24','25','26','27','28','29','30','31','32','33','34','35','36','37','38','39','40','41','42','43','44','45','46','47','48','49','50''51','52','53','54','55','56','57','58','59','60','61','62','63','64') NOT NULL,
+PRIMARY KEY (c)
+) ENGINE=rocksdb;
+SHOW COLUMNS IN t1;
+Field Type Null Key Default Extra
+a set('') NO NULL
+b set('test1','test2','test3','test4','test5') NO NULL
+c set('01','02','03','04','05','06','07','08','09','10','11','12','13','14','15','16','17','18','19','20','21','22','23','24','25','26','27','28','29','30','31','32','33','34','35','36','37','38','39','40','41','42','43','44','45','46','47','48','49','50''51','52','53','54','55','56','57','58','59','60','61','62','63','64') NO PRI NULL
+INSERT INTO t1 (a,b,c) VALUES
+('','test2,test3','01,34,44,,23'),
+('',5,2),
+(',','test4,test2','');
+Warnings:
+Warning 1265 Data truncated for column 'c' at row 1
+SELECT a,b,c FROM t1;
+a b c
+ test1,test3 02
+ test2,test3 01,23,34,44
+ test2,test4
+INSERT INTO t1 (a,b,c) VALUES (0,'test6',-1);
+Warnings:
+Warning 1265 Data truncated for column 'b' at row 1
+Warning 1265 Data truncated for column 'c' at row 1
+SELECT a,b,c FROM t1;
+a b c
+ 01,02,03,04,05,06,07,08,09,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50'51,52,53,54,55,56,57,58,59,60,61,62,63,64
+ test1,test3 02
+ test2,test3 01,23,34,44
+ test2,test4
+ALTER TABLE t1 ADD COLUMN e SET('a','A') NOT NULL;
+Warnings:
+Note 1291 Column 'e' has duplicated value 'a' in SET
+SHOW COLUMNS IN t1;
+Field Type Null Key Default Extra
+a set('') NO NULL
+b set('test1','test2','test3','test4','test5') NO NULL
+c set('01','02','03','04','05','06','07','08','09','10','11','12','13','14','15','16','17','18','19','20','21','22','23','24','25','26','27','28','29','30','31','32','33','34','35','36','37','38','39','40','41','42','43','44','45','46','47','48','49','50''51','52','53','54','55','56','57','58','59','60','61','62','63','64') NO PRI NULL
+e set('a','A') NO NULL
+ALTER TABLE t1 ADD COLUMN f SET('1','2','3','4','5','6','7','8','9','a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','x','y','z',' ','11','12','13','14','15','16','17','18','19','1a','1b','1c','1d','1e','1f','1g','1h','1i','1j','1k','1l','1m','1n','1o','1p','1q','1r','1s','1t','1u','1v','1w','1x','1y','1z','20','21','22','23','24','25','26','27','28','29','2a','2b','2c','2d','2e','2f','2g','2h','2i','2j','2k','2l','2m','2n','2o','2p','2q','2r','2s','2t','2u','2v','2w','2x','2y','2z','30','31','32','33','34','35','36','37','38','39','3a','3b','3c','3d','3e','3f','3g','3h','3i') NOT NULL;
+ERROR HY000: Too many strings for column f and SET
+SELECT a,b,c,e FROM t1 WHERE FIND_IN_SET('test2',b)>0 OR a != '';
+a b c e
+ test2,test3 01,23,34,44
+ test2,test4
+DROP TABLE t1;
+DROP TABLE IF EXISTS t1;
+#----------------------------------
+# SET('test1','test2','test3') NOT NULL columns without a default
+#----------------------------------
+CREATE TABLE t1 (pk INT AUTO_INCREMENT PRIMARY KEY, c SET('test1','test2','test3') NOT NULL) ENGINE=rocksdb;
+SHOW COLUMNS IN t1;
+Field Type Null Key Default Extra
+pk int(11) NO PRI NULL auto_increment
+c set('test1','test2','test3') NO NULL
+INSERT INTO t1 (c) VALUES (NULL);
+ERROR 23000: Column 'c' cannot be null
+INSERT INTO t1 (c) VALUES ('test2,test3');
+SELECT HEX(c) FROM t1;
+HEX(c)
+74657374322C7465737433
+DROP TABLE t1;
+#----------------------------------
+# SET('test1','test2','test3') NOT NULL columns with a default
+#----------------------------------
+CREATE TABLE t1 (
+pk INT AUTO_INCREMENT PRIMARY KEY,
+c SET('test1','test2','test3') NOT NULL DEFAULT 'test2,test3'
+) ENGINE=rocksdb;
+SHOW COLUMNS IN t1;
+Field Type Null Key Default Extra
+pk int(11) NO PRI NULL auto_increment
+c set('test1','test2','test3') NO test2,test3
+ALTER TABLE t1 ADD COLUMN err SET('test1','test2','test3') NOT NULL DEFAULT NULL;
+ERROR 42000: Invalid default value for 'err'
+INSERT INTO t1 (c) VALUES (NULL);
+ERROR 23000: Column 'c' cannot be null
+INSERT INTO t1 (c) VALUES ('test2,test3');
+INSERT INTO t1 () VALUES ();
+SELECT pk, HEX(c) FROM t1 ORDER BY pk;
+pk HEX(c)
+1 74657374322C7465737433
+2 74657374322C7465737433
+DROP TABLE t1;
+########################
+# TEXT columns
+########################
+DROP TABLE IF EXISTS t1;
+CREATE TABLE t1 (
+pk INT AUTO_INCREMENT PRIMARY KEY,
+t TEXT NOT NULL,
+t0 TEXT(0) NOT NULL,
+t1 TEXT(1) NOT NULL,
+t300 TEXT(300) NOT NULL,
+tm TEXT(65535) NOT NULL,
+t70k TEXT(70000) NOT NULL,
+t17m TEXT(17000000) NOT NULL,
+tt TINYTEXT NOT NULL,
+m MEDIUMTEXT NOT NULL,
+l LONGTEXT NOT NULL
+) ENGINE=rocksdb;
+SHOW COLUMNS IN t1;
+Field Type Null Key Default Extra
+pk int(11) NO PRI NULL auto_increment
+t text NO NULL
+t0 text NO NULL
+t1 tinytext NO NULL
+t300 text NO NULL
+tm text NO NULL
+t70k mediumtext NO NULL
+t17m longtext NO NULL
+tt tinytext NO NULL
+m mediumtext NO NULL
+l longtext NO NULL
+INSERT INTO t1 (t,t0,t1,t300,tm,t70k,t17m,tt,m,l) VALUES
+('','','','','','','','','',''),
+('a','b','c','d','e','f','g','h','i','j'),
+('test1','test2','test3','test4','test5','test6','test7','test8','test9','test10'),
+( REPEAT('a',65535), REPEAT('b',65535), REPEAT('c',255), REPEAT('d',65535), REPEAT('e',65535), REPEAT('f',1048576), REPEAT('g',1048576), REPEAT('h',255), REPEAT('i',1048576), REPEAT('j',1048576) );
+SELECT LENGTH(t), LENGTH(t0), LENGTH(t1), LENGTH(t300), LENGTH(tm), LENGTH(t70k), LENGTH(t17m), LENGTH(tt), LENGTH(m), LENGTH(l) FROM t1;
+LENGTH(t) LENGTH(t0) LENGTH(t1) LENGTH(t300) LENGTH(tm) LENGTH(t70k) LENGTH(t17m) LENGTH(tt) LENGTH(m) LENGTH(l)
+0 0 0 0 0 0 0 0 0 0
+1 1 1 1 1 1 1 1 1 1
+5 5 5 5 5 5 5 5 5 6
+65535 65535 255 65535 65535 1048576 1048576 255 1048576 1048576
+INSERT INTO t1 (t,t0,t1,t300,tm,t70k,t17m,tt,m,l) VALUES
+( REPEAT('a',65536), REPEAT('b',65536), REPEAT('c',256), REPEAT('d',65536), REPEAT('e',65536), REPEAT('f',1048576), REPEAT('g',1048576), REPEAT('h',256), REPEAT('i',1048576), REPEAT('j',1048576) );
+Warnings:
+Warning 1265 Data truncated for column 't' at row 1
+Warning 1265 Data truncated for column 't0' at row 1
+Warning 1265 Data truncated for column 't1' at row 1
+Warning 1265 Data truncated for column 't300' at row 1
+Warning 1265 Data truncated for column 'tm' at row 1
+Warning 1265 Data truncated for column 'tt' at row 1
+SELECT LENGTH(t), LENGTH(t0), LENGTH(t1), LENGTH(t300), LENGTH(tm), LENGTH(t70k), LENGTH(t17m), LENGTH(tt), LENGTH(m), LENGTH(l) FROM t1;
+LENGTH(t) LENGTH(t0) LENGTH(t1) LENGTH(t300) LENGTH(tm) LENGTH(t70k) LENGTH(t17m) LENGTH(tt) LENGTH(m) LENGTH(l)
+0 0 0 0 0 0 0 0 0 0
+1 1 1 1 1 1 1 1 1 1
+5 5 5 5 5 5 5 5 5 6
+65535 65535 255 65535 65535 1048576 1048576 255 1048576 1048576
+65535 65535 255 65535 65535 1048576 1048576 255 1048576 1048576
+ALTER TABLE t1 ADD COLUMN ttt TEXT(4294967296) NOT NULL;
+ERROR 42000: Display width out of range for 'ttt' (max = 4294967295)
+DROP TABLE t1;
+DROP TABLE IF EXISTS t1;
+#----------------------------------
+# TEXT NOT NULL columns without a default
+#----------------------------------
+CREATE TABLE t1 (pk INT AUTO_INCREMENT PRIMARY KEY, c TEXT NOT NULL) ENGINE=rocksdb;
+SHOW COLUMNS IN t1;
+Field Type Null Key Default Extra
+pk int(11) NO PRI NULL auto_increment
+c text NO NULL
+INSERT INTO t1 (c) VALUES (NULL);
+ERROR 23000: Column 'c' cannot be null
+INSERT INTO t1 (c) VALUES ('');
+SELECT HEX(c) FROM t1;
+HEX(c)
+
+DROP TABLE t1;
+#----------------------------------
+# TEXT NOT NULL columns with a default
+#----------------------------------
+CREATE TABLE t1 (
+pk INT AUTO_INCREMENT PRIMARY KEY,
+c TEXT NOT NULL DEFAULT ''
+) ENGINE=rocksdb;
+SHOW COLUMNS IN t1;
+Field Type Null Key Default Extra
+pk int(11) NO PRI NULL auto_increment
+c text NO ''
+ALTER TABLE t1 ADD COLUMN err TEXT NOT NULL DEFAULT NULL;
+ERROR 42000: Invalid default value for 'err'
+INSERT INTO t1 (c) VALUES (NULL);
+ERROR 23000: Column 'c' cannot be null
+INSERT INTO t1 (c) VALUES ('');
+INSERT INTO t1 () VALUES ();
+SELECT pk, HEX(c) FROM t1 ORDER BY pk;
+pk HEX(c)
+1
+2
+DROP TABLE t1;
+DROP TABLE IF EXISTS t1;
+#----------------------------------
+# TINYTEXT NOT NULL columns without a default
+#----------------------------------
+CREATE TABLE t1 (pk INT AUTO_INCREMENT PRIMARY KEY, c TINYTEXT NOT NULL) ENGINE=rocksdb;
+SHOW COLUMNS IN t1;
+Field Type Null Key Default Extra
+pk int(11) NO PRI NULL auto_increment
+c tinytext NO NULL
+INSERT INTO t1 (c) VALUES (NULL);
+ERROR 23000: Column 'c' cannot be null
+INSERT INTO t1 (c) VALUES ('');
+SELECT HEX(c) FROM t1;
+HEX(c)
+
+DROP TABLE t1;
+#----------------------------------
+# TINYTEXT NOT NULL columns with a default
+#----------------------------------
+CREATE TABLE t1 (
+pk INT AUTO_INCREMENT PRIMARY KEY,
+c TINYTEXT NOT NULL DEFAULT ''
+) ENGINE=rocksdb;
+SHOW COLUMNS IN t1;
+Field Type Null Key Default Extra
+pk int(11) NO PRI NULL auto_increment
+c tinytext NO ''
+ALTER TABLE t1 ADD COLUMN err TINYTEXT NOT NULL DEFAULT NULL;
+ERROR 42000: Invalid default value for 'err'
+INSERT INTO t1 (c) VALUES (NULL);
+ERROR 23000: Column 'c' cannot be null
+INSERT INTO t1 (c) VALUES ('');
+INSERT INTO t1 () VALUES ();
+SELECT pk, HEX(c) FROM t1 ORDER BY pk;
+pk HEX(c)
+1
+2
+DROP TABLE t1;
+DROP TABLE IF EXISTS t1;
+#----------------------------------
+# MEDIUMTEXT NOT NULL columns without a default
+#----------------------------------
+CREATE TABLE t1 (pk INT AUTO_INCREMENT PRIMARY KEY, c MEDIUMTEXT NOT NULL) ENGINE=rocksdb;
+SHOW COLUMNS IN t1;
+Field Type Null Key Default Extra
+pk int(11) NO PRI NULL auto_increment
+c mediumtext NO NULL
+INSERT INTO t1 (c) VALUES (NULL);
+ERROR 23000: Column 'c' cannot be null
+INSERT INTO t1 (c) VALUES ('');
+SELECT HEX(c) FROM t1;
+HEX(c)
+
+DROP TABLE t1;
+#----------------------------------
+# MEDIUMTEXT NOT NULL columns with a default
+#----------------------------------
+CREATE TABLE t1 (
+pk INT AUTO_INCREMENT PRIMARY KEY,
+c MEDIUMTEXT NOT NULL DEFAULT ''
+) ENGINE=rocksdb;
+SHOW COLUMNS IN t1;
+Field Type Null Key Default Extra
+pk int(11) NO PRI NULL auto_increment
+c mediumtext NO ''
+ALTER TABLE t1 ADD COLUMN err MEDIUMTEXT NOT NULL DEFAULT NULL;
+ERROR 42000: Invalid default value for 'err'
+INSERT INTO t1 (c) VALUES (NULL);
+ERROR 23000: Column 'c' cannot be null
+INSERT INTO t1 (c) VALUES ('');
+INSERT INTO t1 () VALUES ();
+SELECT pk, HEX(c) FROM t1 ORDER BY pk;
+pk HEX(c)
+1
+2
+DROP TABLE t1;
+DROP TABLE IF EXISTS t1;
+#----------------------------------
+# LONGTEXT NOT NULL columns without a default
+#----------------------------------
+CREATE TABLE t1 (pk INT AUTO_INCREMENT PRIMARY KEY, c LONGTEXT NOT NULL) ENGINE=rocksdb;
+SHOW COLUMNS IN t1;
+Field Type Null Key Default Extra
+pk int(11) NO PRI NULL auto_increment
+c longtext NO NULL
+INSERT INTO t1 (c) VALUES (NULL);
+ERROR 23000: Column 'c' cannot be null
+INSERT INTO t1 (c) VALUES ('');
+SELECT HEX(c) FROM t1;
+HEX(c)
+
+DROP TABLE t1;
+#----------------------------------
+# LONGTEXT NOT NULL columns with a default
+#----------------------------------
+CREATE TABLE t1 (
+pk INT AUTO_INCREMENT PRIMARY KEY,
+c LONGTEXT NOT NULL DEFAULT ''
+) ENGINE=rocksdb;
+SHOW COLUMNS IN t1;
+Field Type Null Key Default Extra
+pk int(11) NO PRI NULL auto_increment
+c longtext NO ''
+ALTER TABLE t1 ADD COLUMN err LONGTEXT NOT NULL DEFAULT NULL;
+ERROR 42000: Invalid default value for 'err'
+INSERT INTO t1 (c) VALUES (NULL);
+ERROR 23000: Column 'c' cannot be null
+INSERT INTO t1 (c) VALUES ('');
+INSERT INTO t1 () VALUES ();
+SELECT pk, HEX(c) FROM t1 ORDER BY pk;
+pk HEX(c)
+1
+2
+DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/col_opt_null.result b/storage/rocksdb/mysql-test/rocksdb/r/col_opt_null.result
new file mode 100644
index 00000000000..ba651fcbb14
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/col_opt_null.result
@@ -0,0 +1,2268 @@
+########################
+# BINARY columns
+########################
+DROP TABLE IF EXISTS t1;
+CREATE TABLE t1 (
+b BINARY NULL,
+b0 BINARY(0) NULL,
+b1 BINARY(1) NULL,
+b20 BINARY(20) NULL,
+b255 BINARY(255) NULL,
+pk BINARY PRIMARY KEY
+) ENGINE=rocksdb;
+SHOW COLUMNS IN t1;
+Field Type Null Key Default Extra
+b binary(1) YES NULL
+b0 binary(0) YES NULL
+b1 binary(1) YES NULL
+b20 binary(20) YES NULL
+b255 binary(255) YES NULL
+pk binary(1) NO PRI NULL
+INSERT INTO t1 VALUES ('','','','','','');
+INSERT INTO t1 VALUES ('a','','b','abcdefghi klmnopqrst', 'Creating an article for the Knowledgebase is similar to asking questions. First, navigate to the category where you feel the article should be. Once there, double check that an article doesn\'t already exist which would work.','a');
+SELECT HEX(b), HEX(b0), HEX(b1), HEX(b20), HEX(b255), HEX(pk) FROM t1 ORDER BY pk;
+HEX(b) HEX(b0) HEX(b1) HEX(b20) HEX(b255) HEX(pk)
+00 00 0000000000000000000000000000000000000000 000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 00
+61 62 616263646566676869206B6C6D6E6F7071727374 4372656174696E6720616E2061727469636C6520666F7220746865204B6E6F776C65646765626173652069732073696D696C617220746F2061736B696E67207175657374696F6E732E2046697273742C206E6176696761746520746F207468652063617465676F727920776865726520796F75206665656C207468652061727469636C652073686F756C642062652E204F6E63652074686572652C20646F75626C6520636865636B207468617420616E2061727469636C6520646F65736E277420616C726561647920657869737420776869636820776F756C6420776F726B2E00000000000000000000000000000000000000000000000000000000000000 61
+INSERT INTO t1 VALUES ('abc', 'a', 'abc', REPEAT('a',21), REPEAT('x',256),'b');
+Warnings:
+Warning 1265 Data truncated for column 'b' at row 1
+Warning 1265 Data truncated for column 'b0' at row 1
+Warning 1265 Data truncated for column 'b1' at row 1
+Warning 1265 Data truncated for column 'b20' at row 1
+Warning 1265 Data truncated for column 'b255' at row 1
+INSERT INTO t1 SELECT b255, b255, b255, b255, CONCAT('a',b255,b255), 'c' FROM t1;
+ERROR 23000: Duplicate entry 'c' for key 'PRIMARY'
+SELECT HEX(b), HEX(b0), HEX(b1), HEX(b20), HEX(b255), HEX(pk) FROM t1 ORDER BY pk;
+HEX(b) HEX(b0) HEX(b1) HEX(b20) HEX(b255) HEX(pk)
+00 00 0000000000000000000000000000000000000000 000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 00
+61 61 6161616161616161616161616161616161616161 787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878 62
+61 62 616263646566676869206B6C6D6E6F7071727374 4372656174696E6720616E2061727469636C6520666F7220746865204B6E6F776C65646765626173652069732073696D696C617220746F2061736B696E67207175657374696F6E732E2046697273742C206E6176696761746520746F207468652063617465676F727920776865726520796F75206665656C207468652061727469636C652073686F756C642062652E204F6E63652074686572652C20646F75626C6520636865636B207468617420616E2061727469636C6520646F65736E277420616C726561647920657869737420776869636820776F756C6420776F726B2E00000000000000000000000000000000000000000000000000000000000000 61
+ALTER TABLE t1 ADD COLUMN b257 BINARY(257) NULL;
+ERROR 42000: Column length too big for column 'b257' (max = 255); use BLOB or TEXT instead
+SHOW COLUMNS IN t1;
+Field Type Null Key Default Extra
+b binary(1) YES NULL
+b0 binary(0) YES NULL
+b1 binary(1) YES NULL
+b20 binary(20) YES NULL
+b255 binary(255) YES NULL
+pk binary(1) NO PRI NULL
+DROP TABLE t1;
+DROP TABLE IF EXISTS t1;
+CREATE TABLE t1 (
+c BINARY NULL,
+c1 BINARY NULL DEFAULT NULL,
+c2 BINARY NULL DEFAULT 0,
+pk INT AUTO_INCREMENT PRIMARY KEY
+) ENGINE=rocksdb;
+SHOW COLUMNS IN t1;
+Field Type Null Key Default Extra
+c binary(1) YES NULL
+c1 binary(1) YES NULL
+c2 binary(1) YES 0
+pk int(11) NO PRI NULL auto_increment
+INSERT INTO t1 (c,c1,c2) VALUES (NULL,NULL,NULL);
+INSERT INTO t1 (c,c1,c2) VALUES (0,0,0);
+INSERT INTO t1 () VALUES ();
+SELECT pk, HEX(c), HEX(c1), HEX(c2) FROM t1 ORDER BY pk;
+pk HEX(c) HEX(c1) HEX(c2)
+1 NULL NULL NULL
+2 30 30 30
+3 NULL NULL 30
+SELECT pk, HEX(c2) FROM t1 ORDER BY pk;
+pk HEX(c2)
+1 NULL
+2 30
+3 30
+DROP TABLE t1;
+########################
+# VARBINARY columns
+########################
+DROP TABLE IF EXISTS t1, t2;
+CREATE TABLE t1 (
+v0 VARBINARY(0) NULL,
+v1 VARBINARY(1) NULL,
+v64 VARBINARY(64) NULL,
+v65000 VARBINARY(65000) NULL,
+PRIMARY KEY (v64)
+) ENGINE=rocksdb;
+SHOW COLUMNS IN t1;
+Field Type Null Key Default Extra
+v0 varbinary(0) YES NULL
+v1 varbinary(1) YES NULL
+v64 varbinary(64) NO PRI NULL
+v65000 varbinary(65000) YES NULL
+CREATE TABLE t2 (v VARBINARY(65532) NULL, PRIMARY KEY(v(255))) ENGINE=rocksdb;
+SHOW COLUMNS IN t2;
+Field Type Null Key Default Extra
+v varbinary(65532) NO PRI NULL
+INSERT INTO t1 (v0,v1,v64,v65000) VALUES ('','','','');
+INSERT INTO t1 (v0,v1,v64,v65000) VALUES ('','y','Once there, double check that an article doesn\'t already exist','Here is a list of recommended books on MariaDB and MySQL. We\'ve provided links to Amazon.com here for convenience, but they can be found at many other bookstores, both online and off.
+
+ If you want to have your favorite MySQL / MariaDB book listed here, please leave a comment.
+ For developers who want to code on MariaDB or MySQL
+
+ * Understanding MySQL Internals by Sasha Pachev, former MySQL developer at MySQL AB.
+ o This is the only book we know about that describes the internals of MariaDB / MySQL. A must have for anyone who wants to understand and develop on MariaDB!
+ o Not all topics are covered and some parts are slightly outdated, but still the best book on this topic.
+ * MySQL 5.1 Plugin Development by Sergei Golubchik and Andrew Hutchings
+ o A must read for anyone wanting to write a plugin for MariaDB, written by the Sergei who designed the plugin interface for MySQL and MariaDB!
+
+ For MariaDB / MySQL end users
+
+ * MariaDB Crash Course by Ben Forta
+ o First MariaDB book!
+ o For people who want to learn SQL and the basics of MariaDB.
+ o Now shipping. Purchase at Amazon.com or your favorite bookseller.
+
+ * SQL-99 Complete, Really by Peter Gulutzan & Trudy Pelzer.
+ o Everything you wanted to know about the SQL 99 standard. Excellent reference book!
+ o Free to read in the Knowledgebase!
+
+ * MySQL (4th Edition) by Paul DuBois
+ o The \'default\' book to read if you wont to learn to use MySQL / MariaDB.
+
+ * MySQL Cookbook by Paul DuBois
+ o A lot of examples of how to use MySQL. As with all of Paul\'s books, it\'s worth its weight in gold and even enjoyable reading for such a \'dry\' subject.
+
+ * High Performance MySQL, Second Edition, By Baron Schwartz, Peter Zaitsev, Vadim Tkachenko, Jeremy D. Zawodny, Arjen Lentz, Derek J. Balling, et al.
+ o \"High Performance MySQL is the definitive guide to building fast, reliable systems with MySQL. Written by noted experts with years of real-world experience building very large systems, this book covers every aspect of MySQL performance in detail, and focuses on robustness, security, and data integrity. Learn advanced techniques in depth so you can bring out MySQL\'s full power.\" (From the book description at O\'Reilly)
+
+ * MySQL Admin Cookbook
+ o A quick step-by-step guide for MySQL users and database administrators to tackle real-world challenges with MySQL configuration and administration
+
+ * MySQL 5.0 Certification Study Guide, By Paul DuBois, Stefan Hinz, Carsten Pedersen
+ o This is the official guide to cover the passing of the two MySQL Certification examinations. It is valid till version 5.0 of the server, so while it misses all the features available in MySQL 5.1 and greater (including MariaDB 5.1 and greater), it provides a good basic understanding of MySQL for the end-user. ');
+SELECT HEX(v0), HEX(v1), HEX(v64), HEX(v65000) FROM t1;
+HEX(v0) HEX(v1) HEX(v64) HEX(v65000)
+
+ 79 4F6E63652074686572652C20646F75626C6520636865636B207468617420616E2061727469636C6520646F65736E277420616C7265616479206578697374 486572652069732061206C697374206F66207265636F6D6D656E64656420626F6F6B73206F6E204D61726961444220616E64204D7953514C2E2057652776652070726F7669646564206C696E6B7320746F20416D617A6F6E2E636F6D206865726520666F7220636F6E76656E69656E63652C2062757420746865792063616E20626520666F756E64206174206D616E79206F7468657220626F6F6B73746F7265732C20626F7468206F6E6C696E6520616E64206F66662E0A0A2020496620796F752077616E7420746F206861766520796F7572206661766F72697465204D7953514C202F204D61726961444220626F6F6B206C697374656420686572652C20706C65617365206C65617665206120636F6D6D656E742E0A2020466F7220646576656C6F706572732077686F2077616E7420746F20636F6465206F6E204D617269614442206F72204D7953514C0A0A2020202020202A20556E6465727374616E64696E67204D7953514C20496E7465726E616C73206279205361736861205061636865762C20666F726D6572204D7953514C20646576656C6F706572206174204D7953514C2041422E0A2020202020202020202020206F205468697320697320746865206F6E6C7920626F6F6B207765206B6E6F772061626F75742074686174206465736372696265732074686520696E7465726E616C73206F66204D617269614442202F204D7953514C2E2041206D757374206861766520666F7220616E796F6E652077686F2077616E747320746F20756E6465727374616E6420616E6420646576656C6F70206F6E204D617269614442210A2020202020202020202020206F204E6F7420616C6C20746F706963732061726520636F766572656420616E6420736F6D652070617274732061726520736C696768746C79206F757464617465642C20627574207374696C6C20746865206265737420626F6F6B206F6E207468697320746F7069632E200A2020202020202A204D7953514C20352E3120506C7567696E20446576656C6F706D656E742062792053657267656920476F6C75626368696B20616E6420416E64726577204875746368696E67730A2020202020202020202020206F2041206D757374207265616420666F7220616E796F6E652077616E74696E6720746F207772697465206120706C7567696E20666F72204D6172696144422C207772697474656E20627920746865205365726765692077686F2064657369676E65642074686520706C7567696E20696E7465726661636520666F72204D7953514C20616E64204D61726961444221200A0A2020466F72204D617269614442202F204D7953514C20656E642075736572730A0A2020202020202A204D61726961444220437261736820436F757273652062792042656E20466F7274610A2020202020202020202020206F204669727374204D61726961444220626F6F6B210A2020202020202020202020206F20466F722070656F706C652077686F2077616E7420746F206C6561726E2053514C20616E642074686520626173696373206F66204D6172696144422E0A2020202020202020202020206F204E6F77207368697070696E672E20507572636861736520617420416D617A6F6E2E636F6D206F7220796F7572206661766F7269746520626F6F6B73656C6C65722E200A0A2020202020202A2053514C2D393920436F6D706C6574652C205265616C6C792062792050657465722047756C75747A616E20262054727564792050656C7A65722E0A2020202020202020202020206F2045766572797468696E6720796F752077616E74656420746F206B6E6F772061626F7574207468652053514C203939207374616E646172642E20457863656C6C656E74207265666572656E636520626F6F6B210A2020202020202020202020206F204672656520746F207265616420696E20746865204B6E6F776C656467656261736521200A0A2020202020202A204D7953514C20283474682045646974696F6E29206279205061756C204475426F69730A2020202020202020202020206F20546865202764656661756C742720626F6F6B20746F207265616420696620796F7520776F6E7420746F206C6561726E20746F20757365204D7953514C202F204D6172696144422E200A0A2020202020202A204D7953514C20436F6F6B626F6F6B206279205061756C204475426F69730A2020202020202020202020206F2041206C6F74206F66206578616D706C6573206F6620686F7720746F20757365204D7953514C2E204173207769746820616C6C206F66205061756C277320626F6F6B732C206974277320776F727468206974732077656967687420696E20676F6C6420616E64206576656E20656E6A6F7961626C652072656164696E6720666F7220737563682061202764727927207375626A6563742E200A0A2020202020202A204869676820506572666F726D616E6365204D7953514C2C205365636F6E642045646974696F6E2C204279204261726F6E20536368776172747A2C205065746572205A6169747365762C20566164696D20546B616368656E6B6F2C204A6572656D7920442E205A61776F646E792C2041726A656E204C656E747A2C20446572656B204A2E2042616C6C696E672C20657420616C2E0A2020202020202020202020206F20224869676820506572666F726D616E6365204D7953514C2069732074686520646566696E697469766520677569646520746F206275696C64696E6720666173742C2072656C6961626C652073797374656D732077697468204D7953514C2E205772697474656E206279206E6F74656420657870657274732077697468207965617273206F66207265616C2D776F726C6420657870657269656E6365206275696C64696E672076657279206C617267652073797374656D732C207468697320626F6F6B20636F7665727320657665727920617370656374206F66204D7953514C20706572666F726D616E636520696E2064657461696C2C20616E6420666F6375736573206F6E20726F627573746E6573732C2073656375726974792C20616E64206461746120696E746567726974792E204C6561726E20616476616E63656420746563686E697175657320696E20646570746820736F20796F752063616E206272696E67206F7574204D7953514C27732066756C6C20706F7765722E22202846726F6D2074686520626F6F6B206465736372697074696F6E206174204F275265696C6C7929200A0A2020202020202A204D7953514C2041646D696E20436F6F6B626F6F6B0A2020202020202020202020206F204120717569636B20737465702D62792D7374657020677569646520666F72204D7953514C20757365727320616E642064617461626173652061646D696E6973747261746F727320746F207461636B6C65207265616C2D776F726C64206368616C6C656E6765732077697468204D7953514C20636F6E66696775726174696F6E20616E642061646D696E697374726174696F6E200A0A2020202020202A204D7953514C20352E302043657274696669636174696F6E2053747564792047756964652C204279205061756C204475426F69732C2053746566616E2048696E7A2C204361727374656E20506564657273656E0A2020202020202020202020206F205468697320697320746865206F6666696369616C20677569646520746F20636F766572207468652070617373696E67206F66207468652074776F204D7953514C2043657274696669636174696F6E206578616D696E6174696F6E732E2049742069732076616C69642074696C6C2076657273696F6E20352E30206F6620746865207365727665722C20736F207768696C65206974206D697373657320616C6C2074686520666561747572657320617661696C61626C6520696E204D7953514C20352E3120616E6420677265617465722028696E636C7564696E67204D61726961444220352E3120616E642067726561746572292C2069742070726F7669646573206120676F6F6420626173696320756E6465727374616E64696E67206F66204D7953514C20666F722074686520656E642D757365722E20
+INSERT INTO t1 (v0,v1,v64,v65000) VALUES ('y', 'yy', REPEAT('c',65), REPEAT('abcdefghi ',6501));
+Warnings:
+Warning 1265 Data truncated for column 'v0' at row 1
+Warning 1265 Data truncated for column 'v1' at row 1
+Warning 1265 Data truncated for column 'v64' at row 1
+Warning 1265 Data truncated for column 'v65000' at row 1
+INSERT INTO t1 (v0,v1,v64,v65000) SELECT v65000, v65000, CONCAT('a',v65000), CONCAT(v65000,v1) FROM t1;
+Warnings:
+Warning 1265 Data truncated for column 'v0' at row 5
+Warning 1265 Data truncated for column 'v1' at row 5
+Warning 1265 Data truncated for column 'v64' at row 5
+Warning 1265 Data truncated for column 'v0' at row 6
+Warning 1265 Data truncated for column 'v1' at row 6
+Warning 1265 Data truncated for column 'v64' at row 6
+Warning 1265 Data truncated for column 'v65000' at row 6
+SELECT HEX(v0), HEX(v1), HEX(v64), LENGTH(HEX(v65000)) FROM t1;
+HEX(v0) HEX(v1) HEX(v64) LENGTH(HEX(v65000))
+ 0
+ 61 0
+ 48 61486572652069732061206C697374206F66207265636F6D6D656E64656420626F6F6B73206F6E204D61726961444220616E64204D7953514C2E205765277665 5932
+ 61 61616263646566676869206162636465666768692061626364656667686920616263646566676869206162636465666768692061626364656667686920616263 130000
+ 79 4F6E63652074686572652C20646F75626C6520636865636B207468617420616E2061727469636C6520646F65736E277420616C7265616479206578697374 5930
+ 79 63636363636363636363636363636363636363636363636363636363636363636363636363636363636363636363636363636363636363636363636363636363 130000
+ALTER TABLE t1 ADD COLUMN v65536 VARBINARY(65536) NULL;
+Warnings:
+Note 1246 Converting column 'v65536' from VARBINARY to BLOB
+SHOW COLUMNS IN t1;
+Field Type Null Key Default Extra
+v0 varbinary(0) YES NULL
+v1 varbinary(1) YES NULL
+v64 varbinary(64) NO PRI NULL
+v65000 varbinary(65000) YES NULL
+v65536 mediumblob YES NULL
+DROP TABLE t1, t2;
+DROP TABLE IF EXISTS t1;
+CREATE TABLE t1 (
+c VARBINARY(64) NULL,
+c1 VARBINARY(64) NULL DEFAULT NULL,
+c2 VARBINARY(64) NULL DEFAULT 'test',
+pk INT AUTO_INCREMENT PRIMARY KEY
+) ENGINE=rocksdb;
+SHOW COLUMNS IN t1;
+Field Type Null Key Default Extra
+c varbinary(64) YES NULL
+c1 varbinary(64) YES NULL
+c2 varbinary(64) YES test
+pk int(11) NO PRI NULL auto_increment
+INSERT INTO t1 (c,c1,c2) VALUES (NULL,NULL,NULL);
+INSERT INTO t1 (c,c1,c2) VALUES ('test','test','test');
+INSERT INTO t1 () VALUES ();
+SELECT pk, HEX(c), HEX(c1), HEX(c2) FROM t1 ORDER BY pk;
+pk HEX(c) HEX(c1) HEX(c2)
+1 NULL NULL NULL
+2 74657374 74657374 74657374
+3 NULL NULL 74657374
+SELECT pk, HEX(c2) FROM t1 ORDER BY pk;
+pk HEX(c2)
+1 NULL
+2 74657374
+3 74657374
+DROP TABLE t1;
+########################
+# BIT columns
+########################
+DROP TABLE IF EXISTS t1;
+CREATE TABLE t1 (
+a BIT NULL,
+b BIT(20) NULL,
+c BIT(64) NULL,
+d BIT(1) NULL,
+PRIMARY KEY (c)
+) ENGINE=rocksdb;
+SHOW COLUMNS IN t1;
+Field Type Null Key Default Extra
+a bit(1) YES NULL
+b bit(20) YES NULL
+c bit(64) NO PRI NULL
+d bit(1) YES NULL
+ALTER TABLE t1 DROP COLUMN d;
+ALTER TABLE t1 ADD COLUMN d BIT(0) NULL;
+SHOW COLUMNS IN t1;
+Field Type Null Key Default Extra
+a bit(1) YES NULL
+b bit(20) YES NULL
+c bit(64) NO PRI NULL
+d bit(1) YES NULL
+INSERT INTO t1 (a,b,c,d) VALUES (0,POW(2,20)-1,b'1111111111111111111111111111111111111111111111111111111111111111',1);
+SELECT BIN(a), HEX(b), c+0 FROM t1 WHERE d>0;
+BIN(a) HEX(b) c+0
+0 FFFFF 18446744073709551615
+INSERT INTO t1 (a,b,c,d) VALUES (1,0,-2,0);
+SELECT a+0, b+0, c+0 FROM t1 WHERE d<100;
+a+0 b+0 c+0
+0 1048575 18446744073709551615
+1 0 18446744073709551614
+INSERT INTO t1 (a,b,c,d) VALUES (b'1', 'f', 0xFF, 0x0);
+SELECT a+0, b+0, c+0 FROM t1 WHERE d IN (0, 2);
+a+0 b+0 c+0
+1 0 18446744073709551614
+1 102 255
+DELETE FROM t1;
+INSERT INTO t1 (a,b,c,d) VALUES (0x10,0,0,1);
+Warnings:
+Warning 1264 Out of range value for column 'a' at row 1
+SELECT a+0,b+0,c+0,d+0 FROM t1;
+a+0 b+0 c+0 d+0
+1 0 0 1
+INSERT INTO t1 (a,b,c,d) VALUES (0x01,0,0x10000000000000000,0);
+Warnings:
+Warning 1264 Out of range value for column 'c' at row 1
+SELECT a+0,b+0,c+0,d+0 FROM t1;
+a+0 b+0 c+0 d+0
+1 0 0 1
+1 0 18446744073709551615 0
+DROP TABLE t1;
+CREATE TABLE t1 (pk INT PRIMARY KEY, a BIT(65) NULL) ENGINE=rocksdb;
+ERROR 42000: Display width out of range for 'a' (max = 64)
+DROP TABLE IF EXISTS t1;
+CREATE TABLE t1 (
+c BIT NULL,
+c1 BIT NULL DEFAULT NULL,
+c2 BIT NULL DEFAULT 1,
+pk INT AUTO_INCREMENT PRIMARY KEY
+) ENGINE=rocksdb;
+SHOW COLUMNS IN t1;
+Field Type Null Key Default Extra
+c bit(1) YES NULL
+c1 bit(1) YES NULL
+c2 bit(1) YES b'1'
+pk int(11) NO PRI NULL auto_increment
+INSERT INTO t1 (c,c1,c2) VALUES (NULL,NULL,NULL);
+INSERT INTO t1 (c,c1,c2) VALUES (1,1,1);
+INSERT INTO t1 () VALUES ();
+SELECT pk, HEX(c), HEX(c1), HEX(c2) FROM t1 ORDER BY pk;
+pk HEX(c) HEX(c1) HEX(c2)
+1 NULL NULL NULL
+2 1 1 1
+3 NULL NULL 1
+SELECT pk, HEX(c2) FROM t1 ORDER BY pk;
+pk HEX(c2)
+1 NULL
+2 1
+3 1
+DROP TABLE t1;
+########################
+# BLOB columns
+########################
+DROP TABLE IF EXISTS t1;
+CREATE TABLE t1 (
+pk INT AUTO_INCREMENT PRIMARY KEY,
+b BLOB NULL,
+b0 BLOB(0) NULL,
+b1 BLOB(1) NULL,
+b300 BLOB(300) NULL,
+bm BLOB(65535) NULL,
+b70k BLOB(70000) NULL,
+b17m BLOB(17000000) NULL,
+t TINYBLOB NULL,
+m MEDIUMBLOB NULL,
+l LONGBLOB NULL
+) ENGINE=rocksdb;
+SHOW COLUMNS IN t1;
+Field Type Null Key Default Extra
+pk int(11) NO PRI NULL auto_increment
+b blob YES NULL
+b0 blob YES NULL
+b1 tinyblob YES NULL
+b300 blob YES NULL
+bm blob YES NULL
+b70k mediumblob YES NULL
+b17m longblob YES NULL
+t tinyblob YES NULL
+m mediumblob YES NULL
+l longblob YES NULL
+INSERT INTO t1 (b,b0,b1,b300,bm,b70k,b17m,t,m,l) VALUES
+('','','','','','','','','',''),
+('a','b','c','d','e','f','g','h','i','j'),
+('test1','test2','test3','test4','test5','test6','test7','test8','test9','test10'),
+( REPEAT('a',65535), REPEAT('b',65535), REPEAT('c',255), REPEAT('d',65535), REPEAT('e',65535), REPEAT('f',1048576), HEX(REPEAT('g',1048576)), REPEAT('h',255), REPEAT('i',1048576), HEX(REPEAT('j',1048576)) );
+SELECT LENGTH(b), LENGTH(b0), LENGTH(b1), LENGTH(b300), LENGTH(bm), LENGTH(b70k), LENGTH(b17m), LENGTH(t), LENGTH(m), LENGTH(l) FROM t1;
+LENGTH(b) LENGTH(b0) LENGTH(b1) LENGTH(b300) LENGTH(bm) LENGTH(b70k) LENGTH(b17m) LENGTH(t) LENGTH(m) LENGTH(l)
+0 0 0 0 0 0 0 0 0 0
+1 1 1 1 1 1 1 1 1 1
+5 5 5 5 5 5 5 5 5 6
+65535 65535 255 65535 65535 1048576 2097152 255 1048576 2097152
+INSERT INTO t1 (b,b0,b1,b300,bm,b70k,b17m,t,m,l) VALUES
+( REPEAT('a',65536), REPEAT('b',65536), REPEAT('c',256), REPEAT('d',65536), REPEAT('e',65536), REPEAT('f',1048576), REPEAT('g',1048576), REPEAT('h',256), REPEAT('i',1048576), REPEAT('j',1048576) );
+Warnings:
+Warning 1265 Data truncated for column 'b' at row 1
+Warning 1265 Data truncated for column 'b0' at row 1
+Warning 1265 Data truncated for column 'b1' at row 1
+Warning 1265 Data truncated for column 'b300' at row 1
+Warning 1265 Data truncated for column 'bm' at row 1
+Warning 1265 Data truncated for column 't' at row 1
+SELECT LENGTH(b), LENGTH(b0), LENGTH(b1), LENGTH(b300), LENGTH(bm), LENGTH(b70k), LENGTH(b17m), LENGTH(t), LENGTH(m), LENGTH(l) FROM t1;
+LENGTH(b) LENGTH(b0) LENGTH(b1) LENGTH(b300) LENGTH(bm) LENGTH(b70k) LENGTH(b17m) LENGTH(t) LENGTH(m) LENGTH(l)
+0 0 0 0 0 0 0 0 0 0
+1 1 1 1 1 1 1 1 1 1
+5 5 5 5 5 5 5 5 5 6
+65535 65535 255 65535 65535 1048576 1048576 255 1048576 1048576
+65535 65535 255 65535 65535 1048576 2097152 255 1048576 2097152
+ALTER TABLE t1 ADD COLUMN bbb BLOB(4294967296);
+ERROR 42000: Display width out of range for 'bbb' (max = 4294967295)
+DROP TABLE t1;
+DROP TABLE IF EXISTS t1;
+CREATE TABLE t1 (
+c BLOB NULL,
+c1 BLOB NULL DEFAULT NULL,
+c2 BLOB NULL DEFAULT '',
+pk INT AUTO_INCREMENT PRIMARY KEY
+) ENGINE=rocksdb;
+SHOW COLUMNS IN t1;
+Field Type Null Key Default Extra
+c blob YES NULL
+c1 blob YES NULL
+c2 blob YES ''
+pk int(11) NO PRI NULL auto_increment
+INSERT INTO t1 (c,c1,c2) VALUES (NULL,NULL,NULL);
+INSERT INTO t1 (c,c1,c2) VALUES ('','','');
+INSERT INTO t1 () VALUES ();
+SELECT pk, HEX(c), HEX(c1), HEX(c2) FROM t1 ORDER BY pk;
+pk HEX(c) HEX(c1) HEX(c2)
+1 NULL NULL NULL
+2
+3 NULL NULL
+SELECT pk, HEX(c2) FROM t1 ORDER BY pk;
+pk HEX(c2)
+1 NULL
+2
+3
+DROP TABLE t1;
+DROP TABLE IF EXISTS t1;
+CREATE TABLE t1 (
+c TINYBLOB NULL,
+c1 TINYBLOB NULL DEFAULT NULL,
+c2 TINYBLOB NULL DEFAULT '',
+pk INT AUTO_INCREMENT PRIMARY KEY
+) ENGINE=rocksdb;
+SHOW COLUMNS IN t1;
+Field Type Null Key Default Extra
+c tinyblob YES NULL
+c1 tinyblob YES NULL
+c2 tinyblob YES ''
+pk int(11) NO PRI NULL auto_increment
+INSERT INTO t1 (c,c1,c2) VALUES (NULL,NULL,NULL);
+INSERT INTO t1 (c,c1,c2) VALUES ('','','');
+INSERT INTO t1 () VALUES ();
+SELECT pk, HEX(c), HEX(c1), HEX(c2) FROM t1 ORDER BY pk;
+pk HEX(c) HEX(c1) HEX(c2)
+1 NULL NULL NULL
+2
+3 NULL NULL
+SELECT pk, HEX(c2) FROM t1 ORDER BY pk;
+pk HEX(c2)
+1 NULL
+2
+3
+DROP TABLE t1;
+DROP TABLE IF EXISTS t1;
+CREATE TABLE t1 (
+c MEDIUMBLOB NULL,
+c1 MEDIUMBLOB NULL DEFAULT NULL,
+c2 MEDIUMBLOB NULL DEFAULT '',
+pk INT AUTO_INCREMENT PRIMARY KEY
+) ENGINE=rocksdb;
+SHOW COLUMNS IN t1;
+Field Type Null Key Default Extra
+c mediumblob YES NULL
+c1 mediumblob YES NULL
+c2 mediumblob YES ''
+pk int(11) NO PRI NULL auto_increment
+INSERT INTO t1 (c,c1,c2) VALUES (NULL,NULL,NULL);
+INSERT INTO t1 (c,c1,c2) VALUES ('','','');
+INSERT INTO t1 () VALUES ();
+SELECT pk, HEX(c), HEX(c1), HEX(c2) FROM t1 ORDER BY pk;
+pk HEX(c) HEX(c1) HEX(c2)
+1 NULL NULL NULL
+2
+3 NULL NULL
+SELECT pk, HEX(c2) FROM t1 ORDER BY pk;
+pk HEX(c2)
+1 NULL
+2
+3
+DROP TABLE t1;
+DROP TABLE IF EXISTS t1;
+CREATE TABLE t1 (
+c LONGBLOB NULL,
+c1 LONGBLOB NULL DEFAULT NULL,
+c2 LONGBLOB NULL DEFAULT '',
+pk INT AUTO_INCREMENT PRIMARY KEY
+) ENGINE=rocksdb;
+SHOW COLUMNS IN t1;
+Field Type Null Key Default Extra
+c longblob YES NULL
+c1 longblob YES NULL
+c2 longblob YES ''
+pk int(11) NO PRI NULL auto_increment
+INSERT INTO t1 (c,c1,c2) VALUES (NULL,NULL,NULL);
+INSERT INTO t1 (c,c1,c2) VALUES ('','','');
+INSERT INTO t1 () VALUES ();
+SELECT pk, HEX(c), HEX(c1), HEX(c2) FROM t1 ORDER BY pk;
+pk HEX(c) HEX(c1) HEX(c2)
+1 NULL NULL NULL
+2
+3 NULL NULL
+SELECT pk, HEX(c2) FROM t1 ORDER BY pk;
+pk HEX(c2)
+1 NULL
+2
+3
+DROP TABLE t1;
+########################
+# BOOL columns
+########################
+DROP TABLE IF EXISTS t1;
+CREATE TABLE t1 (
+pk INT AUTO_INCREMENT PRIMARY KEY,
+b1 BOOL NULL,
+b2 BOOLEAN NULL
+) ENGINE=rocksdb;
+SHOW COLUMNS IN t1;
+Field Type Null Key Default Extra
+pk int(11) NO PRI NULL auto_increment
+b1 tinyint(1) YES NULL
+b2 tinyint(1) YES NULL
+INSERT INTO t1 (b1,b2) VALUES (1,TRUE);
+SELECT b1,b2 FROM t1;
+b1 b2
+1 1
+INSERT INTO t1 (b1,b2) VALUES (FALSE,0);
+SELECT b1,b2 FROM t1;
+b1 b2
+0 0
+1 1
+INSERT INTO t1 (b1,b2) VALUES (2,3);
+SELECT b1,b2 FROM t1;
+b1 b2
+0 0
+1 1
+2 3
+INSERT INTO t1 (b1,b2) VALUES (-1,-2);
+SELECT b1,b2 FROM t1;
+b1 b2
+-1 -2
+0 0
+1 1
+2 3
+SELECT IF(b1,'true','false') AS a, IF(b2,'true','false') AS b FROM t1;
+a b
+false false
+true true
+true true
+true true
+SELECT b1,b2 FROM t1 WHERE b1 = TRUE;
+b1 b2
+1 1
+SELECT b1,b2 FROM t1 WHERE b2 = FALSE;
+b1 b2
+0 0
+INSERT INTO t1 (b1,b2) VALUES ('a','b');
+Warnings:
+Warning 1366 Incorrect integer value: 'a' for column `test`.`t1`.`b1` at row 1
+Warning 1366 Incorrect integer value: 'b' for column `test`.`t1`.`b2` at row 1
+SELECT b1,b2 FROM t1;
+b1 b2
+-1 -2
+0 0
+0 0
+1 1
+2 3
+INSERT INTO t1 (b1,b2) VALUES (128,-129);
+Warnings:
+Warning 1264 Out of range value for column 'b1' at row 1
+Warning 1264 Out of range value for column 'b2' at row 1
+SELECT b1,b2 FROM t1;
+b1 b2
+-1 -2
+0 0
+0 0
+1 1
+127 -128
+2 3
+ALTER TABLE t1 ADD COLUMN b3 BOOLEAN UNSIGNED NULL;
+ERROR 42000: You have an error in your SQL syntax; check the manual that corresponds to your MariaDB server version for the right syntax to use near 'UNSIGNED NULL' at line 1
+ALTER TABLE ADD COLUMN b3 BOOL ZEROFILL NULL;
+ERROR 42000: You have an error in your SQL syntax; check the manual that corresponds to your MariaDB server version for the right syntax to use near 'ADD COLUMN b3 BOOL ZEROFILL NULL' at line 1
+DROP TABLE t1;
+DROP TABLE IF EXISTS t1;
+CREATE TABLE t1 (
+c BOOL NULL,
+c1 BOOL NULL DEFAULT NULL,
+c2 BOOL NULL DEFAULT '0',
+pk INT AUTO_INCREMENT PRIMARY KEY
+) ENGINE=rocksdb;
+SHOW COLUMNS IN t1;
+Field Type Null Key Default Extra
+c tinyint(1) YES NULL
+c1 tinyint(1) YES NULL
+c2 tinyint(1) YES 0
+pk int(11) NO PRI NULL auto_increment
+INSERT INTO t1 (c,c1,c2) VALUES (NULL,NULL,NULL);
+INSERT INTO t1 (c,c1,c2) VALUES ('0','0','0');
+INSERT INTO t1 () VALUES ();
+SELECT pk, HEX(c), HEX(c1), HEX(c2) FROM t1 ORDER BY pk;
+pk HEX(c) HEX(c1) HEX(c2)
+1 NULL NULL NULL
+2 0 0 0
+3 NULL NULL 0
+SELECT pk, HEX(c2) FROM t1 ORDER BY pk;
+pk HEX(c2)
+1 NULL
+2 0
+3 0
+DROP TABLE t1;
+########################
+# CHAR columns
+########################
+DROP TABLE IF EXISTS t1;
+CREATE TABLE t1 (
+c CHAR NULL,
+c0 CHAR(0) NULL,
+c1 CHAR(1) NULL,
+c20 CHAR(20) NULL,
+c255 CHAR(255) NULL,
+PRIMARY KEY (c255)
+) ENGINE=rocksdb;
+SHOW COLUMNS IN t1;
+Field Type Null Key Default Extra
+c char(1) YES NULL
+c0 char(0) YES NULL
+c1 char(1) YES NULL
+c20 char(20) YES NULL
+c255 char(255) NO PRI NULL
+INSERT INTO t1 (c,c0,c1,c20,c255) VALUES ('','','','','');
+INSERT INTO t1 (c,c0,c1,c20,c255) VALUES ('a','','b','abcdefghi klmnopqrst', 'Creating an article for the Knowledgebase is similar to asking questions. First, navigate to the category where you feel the article should be. Once there, double check that an article doesn\'t already exist which would work.');
+SELECT c,c0,c1,c20,c255 FROM t1;
+c c0 c1 c20 c255
+
+a b abcdefghi klmnopqrst Creating an article for the Knowledgebase is similar to asking questions. First, navigate to the category where you feel the article should be. Once there, double check that an article doesn't already exist which would work.
+INSERT INTO t1 (c,c0,c1,c20,c255) VALUES ('abc', 'a', 'abc', REPEAT('a',21), REPEAT('x',256));
+Warnings:
+Warning 1265 Data truncated for column 'c' at row 1
+Warning 1265 Data truncated for column 'c0' at row 1
+Warning 1265 Data truncated for column 'c1' at row 1
+Warning 1265 Data truncated for column 'c20' at row 1
+Warning 1265 Data truncated for column 'c255' at row 1
+INSERT INTO t1 (c,c0,c1,c20,c255) SELECT c255, c255, c255, c255, CONCAT('a',c255,c1) FROM t1;
+Warnings:
+Warning 1265 Data truncated for column 'c' at row 5
+Warning 1265 Data truncated for column 'c0' at row 5
+Warning 1265 Data truncated for column 'c1' at row 5
+Warning 1265 Data truncated for column 'c20' at row 5
+Warning 1265 Data truncated for column 'c' at row 6
+Warning 1265 Data truncated for column 'c0' at row 6
+Warning 1265 Data truncated for column 'c1' at row 6
+Warning 1265 Data truncated for column 'c20' at row 6
+Warning 1265 Data truncated for column 'c255' at row 6
+SELECT c,c0,c1,c20,c255 FROM t1;
+c c0 c1 c20 c255
+
+ a
+C C Creating an article aCreating an article for the Knowledgebase is similar to asking questions. First, navigate to the category where you feel the article should be. Once there, double check that an article doesn't already exist which would work.b
+a a aaaaaaaaaaaaaaaaaaaa xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+a b abcdefghi klmnopqrst Creating an article for the Knowledgebase is similar to asking questions. First, navigate to the category where you feel the article should be. Once there, double check that an article doesn't already exist which would work.
+x x xxxxxxxxxxxxxxxxxxxx axxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+SELECT DISTINCT c20, REPEAT('a',LENGTH(c20)), COUNT(*) FROM t1 GROUP BY c1, c20;
+c20 REPEAT('a',LENGTH(c20)) COUNT(*)
+ 2
+Creating an article aaaaaaaaaaaaaaaaaaa 1
+aaaaaaaaaaaaaaaaaaaa aaaaaaaaaaaaaaaaaaaa 1
+abcdefghi klmnopqrst aaaaaaaaaaaaaaaaaaaa 1
+xxxxxxxxxxxxxxxxxxxx aaaaaaaaaaaaaaaaaaaa 1
+ALTER TABLE t1 ADD COLUMN c257 CHAR(257) NULL;
+ERROR 42000: Column length too big for column 'c257' (max = 255); use BLOB or TEXT instead
+DROP TABLE t1;
+DROP TABLE IF EXISTS t1;
+CREATE TABLE t1 (
+c CHAR NULL,
+c1 CHAR NULL DEFAULT NULL,
+c2 CHAR NULL DEFAULT '_',
+pk INT AUTO_INCREMENT PRIMARY KEY
+) ENGINE=rocksdb;
+SHOW COLUMNS IN t1;
+Field Type Null Key Default Extra
+c char(1) YES NULL
+c1 char(1) YES NULL
+c2 char(1) YES _
+pk int(11) NO PRI NULL auto_increment
+INSERT INTO t1 (c,c1,c2) VALUES (NULL,NULL,NULL);
+INSERT INTO t1 (c,c1,c2) VALUES ('_','_','_');
+INSERT INTO t1 () VALUES ();
+SELECT pk, HEX(c), HEX(c1), HEX(c2) FROM t1 ORDER BY pk;
+pk HEX(c) HEX(c1) HEX(c2)
+1 NULL NULL NULL
+2 5F 5F 5F
+3 NULL NULL 5F
+SELECT pk, HEX(c2) FROM t1 ORDER BY pk;
+pk HEX(c2)
+1 NULL
+2 5F
+3 5F
+DROP TABLE t1;
+########################
+# VARCHAR columns
+########################
+DROP TABLE IF EXISTS t1, t2;
+CREATE TABLE t1 (
+v0 VARCHAR(0) NULL,
+v1 VARCHAR(1) NULL,
+v64 VARCHAR(64) NULL,
+v65000 VARCHAR(65000) NULL,
+PRIMARY KEY (v64)
+) ENGINE=rocksdb;
+SHOW COLUMNS IN t1;
+Field Type Null Key Default Extra
+v0 varchar(0) YES NULL
+v1 varchar(1) YES NULL
+v64 varchar(64) NO PRI NULL
+v65000 varchar(65000) YES NULL
+CREATE TABLE t2 (v VARCHAR(65532), PRIMARY KEY (v(255))) ENGINE=rocksdb;
+SHOW COLUMNS IN t2;
+Field Type Null Key Default Extra
+v varchar(65532) NO PRI NULL
+INSERT INTO t1 (v0,v1,v64,v65000) VALUES ('','','','');
+INSERT INTO t1 (v0,v1,v64,v65000) VALUES ('','y','Once there, double check that an article doesn\'t already exist','Here is a list of recommended books on MariaDB and MySQL. We\'ve provided links to Amazon.com here for convenience, but they can be found at many other bookstores, both online and off.
+
+ If you want to have your favorite MySQL / MariaDB book listed here, please leave a comment.
+ For developers who want to code on MariaDB or MySQL
+
+ * Understanding MySQL Internals by Sasha Pachev, former MySQL developer at MySQL AB.
+ o This is the only book we know about that describes the internals of MariaDB / MySQL. A must have for anyone who wants to understand and develop on MariaDB!
+ o Not all topics are covered and some parts are slightly outdated, but still the best book on this topic.
+ * MySQL 5.1 Plugin Development by Sergei Golubchik and Andrew Hutchings
+ o A must read for anyone wanting to write a plugin for MariaDB, written by the Sergei who designed the plugin interface for MySQL and MariaDB!
+
+ For MariaDB / MySQL end users
+
+ * MariaDB Crash Course by Ben Forta
+ o First MariaDB book!
+ o For people who want to learn SQL and the basics of MariaDB.
+ o Now shipping. Purchase at Amazon.com or your favorite bookseller.
+
+ * SQL-99 Complete, Really by Peter Gulutzan & Trudy Pelzer.
+ o Everything you wanted to know about the SQL 99 standard. Excellent reference book!
+ o Free to read in the Knowledgebase!
+
+ * MySQL (4th Edition) by Paul DuBois
+ o The \'default\' book to read if you wont to learn to use MySQL / MariaDB.
+
+ * MySQL Cookbook by Paul DuBois
+ o A lot of examples of how to use MySQL. As with all of Paul\'s books, it\'s worth its weight in gold and even enjoyable reading for such a \'dry\' subject.
+
+ * High Performance MySQL, Second Edition, By Baron Schwartz, Peter Zaitsev, Vadim Tkachenko, Jeremy D. Zawodny, Arjen Lentz, Derek J. Balling, et al.
+ o \"High Performance MySQL is the definitive guide to building fast, reliable systems with MySQL. Written by noted experts with years of real-world experience building very large systems, this book covers every aspect of MySQL performance in detail, and focuses on robustness, security, and data integrity. Learn advanced techniques in depth so you can bring out MySQL\'s full power.\" (From the book description at O\'Reilly)
+
+ * MySQL Admin Cookbook
+ o A quick step-by-step guide for MySQL users and database administrators to tackle real-world challenges with MySQL configuration and administration
+
+ * MySQL 5.0 Certification Study Guide, By Paul DuBois, Stefan Hinz, Carsten Pedersen
+ o This is the official guide to cover the passing of the two MySQL Certification examinations. It is valid till version 5.0 of the server, so while it misses all the features available in MySQL 5.1 and greater (including MariaDB 5.1 and greater), it provides a good basic understanding of MySQL for the end-user. ');
+SELECT v0,v1,v64,v65000 FROM t1;
+v0 v1 v64 v65000
+
+
+
+
+
+
+
+
+
+
+
+ y Once there, double check that an article doesn't already exist Here is a list of recommended books on MariaDB and MySQL. We've provided links to Amazon.com here for convenience, but they can be found at many other bookstores, both online and off.
+ o "High Performance MySQL is the definitive guide to building fast, reliable systems with MySQL. Written by noted experts with years of real-world experience building very large systems, this book covers every aspect of MySQL performance in detail, and focuses on robustness, security, and data integrity. Learn advanced techniques in depth so you can bring out MySQL's full power." (From the book description at O'Reilly)
+ o A lot of examples of how to use MySQL. As with all of Paul's books, it's worth its weight in gold and even enjoyable reading for such a 'dry' subject.
+ o A must read for anyone wanting to write a plugin for MariaDB, written by the Sergei who designed the plugin interface for MySQL and MariaDB!
+ o A quick step-by-step guide for MySQL users and database administrators to tackle real-world challenges with MySQL configuration and administration
+ o Everything you wanted to know about the SQL 99 standard. Excellent reference book!
+ o First MariaDB book!
+ o For people who want to learn SQL and the basics of MariaDB.
+ o Free to read in the Knowledgebase!
+ o Not all topics are covered and some parts are slightly outdated, but still the best book on this topic.
+ o Now shipping. Purchase at Amazon.com or your favorite bookseller.
+ o The 'default' book to read if you wont to learn to use MySQL / MariaDB.
+ o This is the official guide to cover the passing of the two MySQL Certification examinations. It is valid till version 5.0 of the server, so while it misses all the features available in MySQL 5.1 and greater (including MariaDB 5.1 and greater), it provides a good basic understanding of MySQL for the end-user.
+ o This is the only book we know about that describes the internals of MariaDB / MySQL. A must have for anyone who wants to understand and develop on MariaDB!
+ * High Performance MySQL, Second Edition, By Baron Schwartz, Peter Zaitsev, Vadim Tkachenko, Jeremy D. Zawodny, Arjen Lentz, Derek J. Balling, et al.
+ * MariaDB Crash Course by Ben Forta
+ * MySQL (4th Edition) by Paul DuBois
+ * MySQL 5.0 Certification Study Guide, By Paul DuBois, Stefan Hinz, Carsten Pedersen
+ * MySQL 5.1 Plugin Development by Sergei Golubchik and Andrew Hutchings
+ * MySQL Admin Cookbook
+ * MySQL Cookbook by Paul DuBois
+ * SQL-99 Complete, Really by Peter Gulutzan & Trudy Pelzer.
+ * Understanding MySQL Internals by Sasha Pachev, former MySQL developer at MySQL AB.
+ For MariaDB / MySQL end users
+ For developers who want to code on MariaDB or MySQL
+ If you want to have your favorite MySQL / MariaDB book listed here, please leave a comment.
+INSERT INTO t1 (v0,v1,v64,v65000) VALUES ('y', 'yy', REPEAT('c',65), REPEAT('abcdefghi ',6501));
+Warnings:
+Warning 1265 Data truncated for column 'v0' at row 1
+Warning 1265 Data truncated for column 'v1' at row 1
+Warning 1265 Data truncated for column 'v64' at row 1
+Warning 1265 Data truncated for column 'v65000' at row 1
+INSERT INTO t1 (v0,v1,v64,v65000) SELECT v65000, v65000, CONCAT('a',v65000), CONCAT(v65000,v1) FROM t1;
+Warnings:
+Warning 1265 Data truncated for column 'v0' at row 5
+Warning 1265 Data truncated for column 'v1' at row 5
+Warning 1265 Data truncated for column 'v64' at row 5
+Warning 1265 Data truncated for column 'v65000' at row 5
+Warning 1265 Data truncated for column 'v0' at row 6
+Warning 1265 Data truncated for column 'v1' at row 6
+Warning 1265 Data truncated for column 'v64' at row 6
+SELECT v0, v1, v64, LENGTH(v65000) FROM t1;
+v0 v1 v64 LENGTH(v65000)
+ 0
+ a 0
+ H aHere is a list of recommended books on MariaDB and MySQL. We've 2966
+ a aabcdefghi abcdefghi abcdefghi abcdefghi abcdefghi abcdefghi abc 65000
+ y Once there, double check that an article doesn't already exist 2965
+ y cccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc 65000
+ALTER TABLE t1 ADD COLUMN v65536 VARCHAR(65536) NULL;
+Warnings:
+Note 1246 Converting column 'v65536' from VARCHAR to TEXT
+SHOW COLUMNS IN t1;
+Field Type Null Key Default Extra
+v0 varchar(0) YES NULL
+v1 varchar(1) YES NULL
+v64 varchar(64) NO PRI NULL
+v65000 varchar(65000) YES NULL
+v65536 mediumtext YES NULL
+DROP TABLE t1, t2;
+DROP TABLE IF EXISTS t1;
+CREATE TABLE t1 (
+c VARCHAR(64) NULL,
+c1 VARCHAR(64) NULL DEFAULT NULL,
+c2 VARCHAR(64) NULL DEFAULT 'test default',
+pk INT AUTO_INCREMENT PRIMARY KEY
+) ENGINE=rocksdb;
+SHOW COLUMNS IN t1;
+Field Type Null Key Default Extra
+c varchar(64) YES NULL
+c1 varchar(64) YES NULL
+c2 varchar(64) YES test default
+pk int(11) NO PRI NULL auto_increment
+INSERT INTO t1 (c,c1,c2) VALUES (NULL,NULL,NULL);
+INSERT INTO t1 (c,c1,c2) VALUES ('test default','test default','test default');
+INSERT INTO t1 () VALUES ();
+SELECT pk, HEX(c), HEX(c1), HEX(c2) FROM t1 ORDER BY pk;
+pk HEX(c) HEX(c1) HEX(c2)
+1 NULL NULL NULL
+2 746573742064656661756C74 746573742064656661756C74 746573742064656661756C74
+3 NULL NULL 746573742064656661756C74
+SELECT pk, HEX(c2) FROM t1 ORDER BY pk;
+pk HEX(c2)
+1 NULL
+2 746573742064656661756C74
+3 746573742064656661756C74
+DROP TABLE t1;
+########################
+# date and time columns
+########################
+set @col_opt_nullsave_time_zone=@@time_zone;
+set time_zone='UTC';
+DROP TABLE IF EXISTS t1;
+set @save_time_zone=@@time_zone;
+set time_zone='UTC';
+CREATE TABLE t1 (
+d DATE NULL,
+dt DATETIME NULL,
+ts TIMESTAMP NULL,
+t TIME NULL,
+y YEAR NULL,
+y4 YEAR(4) NULL,
+y2 YEAR(2) NULL,
+pk DATETIME PRIMARY KEY
+) ENGINE=rocksdb;
+Warnings:
+Note 1287 'YEAR(2)' is deprecated and will be removed in a future release. Please use YEAR(4) instead
+SHOW COLUMNS IN t1;
+Field Type Null Key Default Extra
+d date YES NULL
+dt datetime YES NULL
+ts timestamp YES NULL
+t time YES NULL
+y year(4) YES NULL
+y4 year(4) YES NULL
+y2 year(2) YES NULL
+pk datetime NO PRI NULL
+SET @tm = '2012-04-09 05:27:00';
+INSERT INTO t1 (d,dt,ts,t,y,y4,y2,pk) VALUES
+('1000-01-01', '1000-01-01 00:00:00', FROM_UNIXTIME(1), '-838:59:59', '1901', '1901', '00','2012-12-12 12:12:12'),
+('9999-12-31', '9999-12-31 23:59:59', FROM_UNIXTIME(2147483647), '838:59:59', '2155', '2155', '99','2012-12-12 12:12:13'),
+('0000-00-00', '0000-00-00 00:00:00', '0000-00-00 00:00:00', '00:00:00', '0', '0', '0','2012-12-12 12:12:14'),
+(DATE(@tm),@tm,TIMESTAMP(@tm),TIME(@tm),YEAR(@tm),YEAR(@tm),YEAR(@tm),'2012-12-12 12:12:15');
+SELECT d,dt,ts,t,y,y4,y2 FROM t1;
+d dt ts t y y4 y2
+0000-00-00 0000-00-00 00:00:00 0000-00-00 00:00:00 00:00:00 2000 2000 00
+1000-01-01 1000-01-01 00:00:00 1970-01-01 00:00:01 -838:59:59 1901 1901 00
+2012-04-09 2012-04-09 05:27:00 2012-04-09 05:27:00 05:27:00 2012 2012 12
+9999-12-31 9999-12-31 23:59:59 2038-01-19 03:14:07 838:59:59 2155 2155 99
+INSERT INTO t1 (d,dt,ts,t,y,y4,y2,pk) VALUES
+('999-13-32', '999-11-31 00:00:00', '0', '-839:00:00', '1900', '1900', '-1','2012-12-12 12:12:16');
+Warnings:
+Warning 1265 Data truncated for column 'd' at row 1
+Warning 1265 Data truncated for column 'dt' at row 1
+Warning 1265 Data truncated for column 'ts' at row 1
+Warning 1264 Out of range value for column 't' at row 1
+Warning 1264 Out of range value for column 'y' at row 1
+Warning 1264 Out of range value for column 'y4' at row 1
+Warning 1264 Out of range value for column 'y2' at row 1
+SELECT d,dt,ts,t,y,y4,y2 FROM t1;
+d dt ts t y y4 y2
+1000-01-01 1000-01-01 00:00:00 1970-01-01 00:00:01 -838:59:59 1901 1901 00
+9999-12-31 9999-12-31 23:59:59 2038-01-19 03:14:07 838:59:59 2155 2155 99
+0000-00-00 0000-00-00 00:00:00 0000-00-00 00:00:00 00:00:00 2000 2000 00
+2012-04-09 2012-04-09 05:27:00 2012-04-09 05:27:00 05:27:00 2012 2012 12
+0000-00-00 0000-00-00 00:00:00 0000-00-00 00:00:00 -838:59:59 0000 0000 00
+set time_zone=@save_time_zone;
+DROP TABLE t1;
+DROP TABLE IF EXISTS t1;
+CREATE TABLE t1 (
+c DATE NULL,
+c1 DATE NULL DEFAULT NULL,
+c2 DATE NULL DEFAULT '2012-12-21',
+pk INT AUTO_INCREMENT PRIMARY KEY
+) ENGINE=rocksdb;
+SHOW COLUMNS IN t1;
+Field Type Null Key Default Extra
+c date YES NULL
+c1 date YES NULL
+c2 date YES 2012-12-21
+pk int(11) NO PRI NULL auto_increment
+INSERT INTO t1 (c,c1,c2) VALUES (NULL,NULL,NULL);
+INSERT INTO t1 (c,c1,c2) VALUES ('2012-12-21','2012-12-21','2012-12-21');
+INSERT INTO t1 () VALUES ();
+SELECT pk, HEX(c), HEX(c1), HEX(c2) FROM t1 ORDER BY pk;
+pk HEX(c) HEX(c1) HEX(c2)
+1 NULL NULL NULL
+2 323031322D31322D3231 323031322D31322D3231 323031322D31322D3231
+3 NULL NULL 323031322D31322D3231
+SELECT pk, HEX(c2) FROM t1 ORDER BY pk;
+pk HEX(c2)
+1 NULL
+2 323031322D31322D3231
+3 323031322D31322D3231
+DROP TABLE t1;
+DROP TABLE IF EXISTS t1;
+CREATE TABLE t1 (
+c DATETIME NULL,
+c1 DATETIME NULL DEFAULT NULL,
+c2 DATETIME NULL DEFAULT '2012-12-21 12:21:12',
+pk INT AUTO_INCREMENT PRIMARY KEY
+) ENGINE=rocksdb;
+SHOW COLUMNS IN t1;
+Field Type Null Key Default Extra
+c datetime YES NULL
+c1 datetime YES NULL
+c2 datetime YES 2012-12-21 12:21:12
+pk int(11) NO PRI NULL auto_increment
+INSERT INTO t1 (c,c1,c2) VALUES (NULL,NULL,NULL);
+INSERT INTO t1 (c,c1,c2) VALUES ('2012-12-21 12:21:12','2012-12-21 12:21:12','2012-12-21 12:21:12');
+INSERT INTO t1 () VALUES ();
+SELECT pk, HEX(c), HEX(c1), HEX(c2) FROM t1 ORDER BY pk;
+pk HEX(c) HEX(c1) HEX(c2)
+1 NULL NULL NULL
+2 323031322D31322D32312031323A32313A3132 323031322D31322D32312031323A32313A3132 323031322D31322D32312031323A32313A3132
+3 NULL NULL 323031322D31322D32312031323A32313A3132
+SELECT pk, HEX(c2) FROM t1 ORDER BY pk;
+pk HEX(c2)
+1 NULL
+2 323031322D31322D32312031323A32313A3132
+3 323031322D31322D32312031323A32313A3132
+DROP TABLE t1;
+DROP TABLE IF EXISTS t1;
+CREATE TABLE t1 (
+c TIMESTAMP NULL,
+c1 TIMESTAMP NULL DEFAULT NULL,
+c2 TIMESTAMP NULL DEFAULT '2012-12-21 12:21:12',
+pk INT AUTO_INCREMENT PRIMARY KEY
+) ENGINE=rocksdb;
+SHOW COLUMNS IN t1;
+Field Type Null Key Default Extra
+c timestamp YES NULL
+c1 timestamp YES NULL
+c2 timestamp YES 2012-12-21 12:21:12
+pk int(11) NO PRI NULL auto_increment
+INSERT INTO t1 (c,c1,c2) VALUES (NULL,NULL,NULL);
+INSERT INTO t1 (c,c1,c2) VALUES ('2012-12-21 12:21:12','2012-12-21 12:21:12','2012-12-21 12:21:12');
+INSERT INTO t1 () VALUES ();
+SELECT pk, HEX(c), HEX(c1), HEX(c2) FROM t1 ORDER BY pk;
+pk HEX(c) HEX(c1) HEX(c2)
+1 NULL NULL NULL
+2 323031322D31322D32312031323A32313A3132 323031322D31322D32312031323A32313A3132 323031322D31322D32312031323A32313A3132
+3 NULL NULL 323031322D31322D32312031323A32313A3132
+SELECT pk, HEX(c2) FROM t1 ORDER BY pk;
+pk HEX(c2)
+1 NULL
+2 323031322D31322D32312031323A32313A3132
+3 323031322D31322D32312031323A32313A3132
+DROP TABLE t1;
+DROP TABLE IF EXISTS t1;
+CREATE TABLE t1 (
+c TIME NULL,
+c1 TIME NULL DEFAULT NULL,
+c2 TIME NULL DEFAULT '12:21:12',
+pk INT AUTO_INCREMENT PRIMARY KEY
+) ENGINE=rocksdb;
+SHOW COLUMNS IN t1;
+Field Type Null Key Default Extra
+c time YES NULL
+c1 time YES NULL
+c2 time YES 12:21:12
+pk int(11) NO PRI NULL auto_increment
+INSERT INTO t1 (c,c1,c2) VALUES (NULL,NULL,NULL);
+INSERT INTO t1 (c,c1,c2) VALUES ('12:21:12','12:21:12','12:21:12');
+INSERT INTO t1 () VALUES ();
+SELECT pk, HEX(c), HEX(c1), HEX(c2) FROM t1 ORDER BY pk;
+pk HEX(c) HEX(c1) HEX(c2)
+1 NULL NULL NULL
+2 31323A32313A3132 31323A32313A3132 31323A32313A3132
+3 NULL NULL 31323A32313A3132
+SELECT pk, HEX(c2) FROM t1 ORDER BY pk;
+pk HEX(c2)
+1 NULL
+2 31323A32313A3132
+3 31323A32313A3132
+DROP TABLE t1;
+DROP TABLE IF EXISTS t1;
+CREATE TABLE t1 (
+c YEAR NULL,
+c1 YEAR NULL DEFAULT NULL,
+c2 YEAR NULL DEFAULT '2012',
+pk INT AUTO_INCREMENT PRIMARY KEY
+) ENGINE=rocksdb;
+SHOW COLUMNS IN t1;
+Field Type Null Key Default Extra
+c year(4) YES NULL
+c1 year(4) YES NULL
+c2 year(4) YES 2012
+pk int(11) NO PRI NULL auto_increment
+INSERT INTO t1 (c,c1,c2) VALUES (NULL,NULL,NULL);
+INSERT INTO t1 (c,c1,c2) VALUES ('2012','2012','2012');
+INSERT INTO t1 () VALUES ();
+SELECT pk, HEX(c), HEX(c1), HEX(c2) FROM t1 ORDER BY pk;
+pk HEX(c) HEX(c1) HEX(c2)
+1 NULL NULL NULL
+2 7DC 7DC 7DC
+3 NULL NULL 7DC
+SELECT pk, HEX(c2) FROM t1 ORDER BY pk;
+pk HEX(c2)
+1 NULL
+2 7DC
+3 7DC
+DROP TABLE t1;
+DROP TABLE IF EXISTS t1;
+CREATE TABLE t1 (
+c YEAR(2) NULL,
+c1 YEAR(2) NULL DEFAULT NULL,
+c2 YEAR(2) NULL DEFAULT '12',
+pk INT AUTO_INCREMENT PRIMARY KEY
+) ENGINE=rocksdb;
+Warnings:
+Note 1287 'YEAR(2)' is deprecated and will be removed in a future release. Please use YEAR(4) instead
+Note 1287 'YEAR(2)' is deprecated and will be removed in a future release. Please use YEAR(4) instead
+Note 1287 'YEAR(2)' is deprecated and will be removed in a future release. Please use YEAR(4) instead
+SHOW COLUMNS IN t1;
+Field Type Null Key Default Extra
+c year(2) YES NULL
+c1 year(2) YES NULL
+c2 year(2) YES 12
+pk int(11) NO PRI NULL auto_increment
+INSERT INTO t1 (c,c1,c2) VALUES (NULL,NULL,NULL);
+INSERT INTO t1 (c,c1,c2) VALUES ('12','12','12');
+INSERT INTO t1 () VALUES ();
+SELECT pk, HEX(c), HEX(c1), HEX(c2) FROM t1 ORDER BY pk;
+pk HEX(c) HEX(c1) HEX(c2)
+1 NULL NULL NULL
+2 C C C
+3 NULL NULL C
+SELECT pk, HEX(c2) FROM t1 ORDER BY pk;
+pk HEX(c2)
+1 NULL
+2 C
+3 C
+DROP TABLE t1;
+set time_zone=@col_opt_nullsave_time_zone;
+########################
+# ENUM columns
+########################
+DROP TABLE IF EXISTS t1;
+CREATE TABLE t1 (
+a ENUM('') NULL,
+b ENUM('test1','test2','test3','test4','test5') NULL,
+c ENUM('1','2','3','4','5','6','7','8','9','a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','x','y','z',' ','11','12','13','14','15','16','17','18','19','1a','1b','1c','1d','1e','1f','1g','1h','1i','1j','1k','1l','1m','1n','1o','1p','1q','1r','1s','1t','1u','1v','1w','1x','1y','1z','20','21','22','23','24','25','26','27','28','29','2a','2b','2c','2d','2e','2f','2g','2h','2i','2j','2k','2l','2m','2n','2o','2p','2q','2r','2s','2t','2u','2v','2w','2x','2y','2z','30','31','32','33','34','35','36','37','38','39','3a','3b','3c','3d','3e','3f','3g','3h','3i','3j','3k','3l','3m','3n','3o','3p','3q','3r','3s','3t','3u','3v','3w','3x','3y','3z','40','41','42','43','44','45','46','47','48','49','4a','4b','4c','4d','4e','4f','4g','4h','4i','4j','4k','4l','4m','4n','4o','4p','4q','4r','4s','4t','4u','4v','4w','4x','4y','4z','50','51','52','53','54','55','56','57','58','59','5a','5b','5c','5d','5e','5f','5g','5h','5i','5j','5k','5l','5m','5n','5o','5p','5q','5r','5s','5t','5u','5v','5w','5x','5y','5z','60','61','62','63','64','65','66','67','68','69','6a','6b','6c','6d','6e','6f','6g','6h','6i','6j','6k','6l','6m','6n','6o','6p','6q','6r','6s','6t','6u','6v','6w','6x','6y','6z','70','71','72','73','74','75') NULL,
+PRIMARY KEY (b)
+) ENGINE=rocksdb;
+SHOW COLUMNS IN t1;
+Field Type Null Key Default Extra
+a enum('') YES NULL
+b enum('test1','test2','test3','test4','test5') NO PRI NULL
+c enum('1','2','3','4','5','6','7','8','9','a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','x','y','z','','11','12','13','14','15','16','17','18','19','1a','1b','1c','1d','1e','1f','1g','1h','1i','1j','1k','1l','1m','1n','1o','1p','1q','1r','1s','1t','1u','1v','1w','1x','1y','1z','20','21','22','23','24','25','26','27','28','29','2a','2b','2c','2d','2e','2f','2g','2h','2i','2j','2k','2l','2m','2n','2o','2p','2q','2r','2s','2t','2u','2v','2w','2x','2y','2z','30','31','32','33','34','35','36','37','38','39','3a','3b','3c','3d','3e','3f','3g','3h','3i','3j','3k','3l','3m','3n','3o','3p','3q','3r','3s','3t','3u','3v','3w','3x','3y','3z','40','41','42','43','44','45','46','47','48','49','4a','4b','4c','4d','4e','4f','4g','4h','4i','4j','4k','4l','4m','4n','4o','4p','4q','4r','4s','4t','4u','4v','4w','4x','4y','4z','50','51','52','53','54','55','56','57','58','59','5a','5b','5c','5d','5e','5f','5g','5h','5i','5j','5k','5l','5m','5n','5o','5p','5q','5r','5s','5t','5u','5v','5w','5x','5y','5z','60','61','62','63','64','65','66','67','68','69','6a','6b','6c','6d','6e','6f','6g','6h','6i','6j','6k','6l','6m','6n','6o','6p','6q','6r','6s','6t','6u','6v','6w','6x','6y','6z','70','71','72','73','74','75') YES NULL
+INSERT INTO t1 (a,b,c) VALUES ('','test2','4'),('',5,2);
+SELECT a,b,c FROM t1;
+a b c
+ test2 4
+ test5 2
+INSERT INTO t1 (a,b,c) VALUES (0,'test6',-1);
+Warnings:
+Warning 1265 Data truncated for column 'a' at row 1
+Warning 1265 Data truncated for column 'b' at row 1
+Warning 1265 Data truncated for column 'c' at row 1
+SELECT a,b,c FROM t1;
+a b c
+
+ test2 4
+ test5 2
+ALTER TABLE t1 ADD COLUMN e ENUM('a','A') NULL;
+Warnings:
+Note 1291 Column 'e' has duplicated value 'a' in ENUM
+SHOW COLUMNS IN t1;
+Field Type Null Key Default Extra
+a enum('') YES NULL
+b enum('test1','test2','test3','test4','test5') NO PRI NULL
+c enum('1','2','3','4','5','6','7','8','9','a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','x','y','z','','11','12','13','14','15','16','17','18','19','1a','1b','1c','1d','1e','1f','1g','1h','1i','1j','1k','1l','1m','1n','1o','1p','1q','1r','1s','1t','1u','1v','1w','1x','1y','1z','20','21','22','23','24','25','26','27','28','29','2a','2b','2c','2d','2e','2f','2g','2h','2i','2j','2k','2l','2m','2n','2o','2p','2q','2r','2s','2t','2u','2v','2w','2x','2y','2z','30','31','32','33','34','35','36','37','38','39','3a','3b','3c','3d','3e','3f','3g','3h','3i','3j','3k','3l','3m','3n','3o','3p','3q','3r','3s','3t','3u','3v','3w','3x','3y','3z','40','41','42','43','44','45','46','47','48','49','4a','4b','4c','4d','4e','4f','4g','4h','4i','4j','4k','4l','4m','4n','4o','4p','4q','4r','4s','4t','4u','4v','4w','4x','4y','4z','50','51','52','53','54','55','56','57','58','59','5a','5b','5c','5d','5e','5f','5g','5h','5i','5j','5k','5l','5m','5n','5o','5p','5q','5r','5s','5t','5u','5v','5w','5x','5y','5z','60','61','62','63','64','65','66','67','68','69','6a','6b','6c','6d','6e','6f','6g','6h','6i','6j','6k','6l','6m','6n','6o','6p','6q','6r','6s','6t','6u','6v','6w','6x','6y','6z','70','71','72','73','74','75') YES NULL
+e enum('a','A') YES NULL
+INSERT INTO t1 (a,b,c,e) VALUES ('','test3','75','A');
+SELECT a,b,c,e FROM t1;
+a b c e
+ NULL
+ test2 4 NULL
+ test3 75 a
+ test5 2 NULL
+SELECT a,b,c,e FROM t1 WHERE b='test2' OR a != '';
+a b c e
+ test2 4 NULL
+DROP TABLE t1;
+DROP TABLE IF EXISTS t1;
+CREATE TABLE t1 (
+c ENUM('test1','test2','test3') NULL,
+c1 ENUM('test1','test2','test3') NULL DEFAULT NULL,
+c2 ENUM('test1','test2','test3') NULL DEFAULT 'test2',
+pk INT AUTO_INCREMENT PRIMARY KEY
+) ENGINE=rocksdb;
+SHOW COLUMNS IN t1;
+Field Type Null Key Default Extra
+c enum('test1','test2','test3') YES NULL
+c1 enum('test1','test2','test3') YES NULL
+c2 enum('test1','test2','test3') YES test2
+pk int(11) NO PRI NULL auto_increment
+INSERT INTO t1 (c,c1,c2) VALUES (NULL,NULL,NULL);
+INSERT INTO t1 (c,c1,c2) VALUES ('test2','test2','test2');
+INSERT INTO t1 () VALUES ();
+SELECT pk, HEX(c), HEX(c1), HEX(c2) FROM t1 ORDER BY pk;
+pk HEX(c) HEX(c1) HEX(c2)
+1 NULL NULL NULL
+2 7465737432 7465737432 7465737432
+3 NULL NULL 7465737432
+SELECT pk, HEX(c2) FROM t1 ORDER BY pk;
+pk HEX(c2)
+1 NULL
+2 7465737432
+3 7465737432
+DROP TABLE t1;
+########################
+# Fixed point columns (NUMERIC, DECIMAL)
+########################
+DROP TABLE IF EXISTS t1;
+CREATE TABLE t1 (
+d DECIMAL NULL,
+d0 DECIMAL(0) NULL,
+d1_1 DECIMAL(1,1) NULL,
+d10_2 DECIMAL(10,2) NULL,
+d60_10 DECIMAL(60,10) NULL,
+n NUMERIC NULL,
+n0_0 NUMERIC(0,0) NULL,
+n1 NUMERIC(1) NULL,
+n20_4 NUMERIC(20,4) NULL,
+n65_4 NUMERIC(65,4) NULL,
+pk NUMERIC NULL PRIMARY KEY
+) ENGINE=rocksdb;
+SHOW COLUMNS IN t1;
+Field Type Null Key Default Extra
+d decimal(10,0) YES NULL
+d0 decimal(10,0) YES NULL
+d1_1 decimal(1,1) YES NULL
+d10_2 decimal(10,2) YES NULL
+d60_10 decimal(60,10) YES NULL
+n decimal(10,0) YES NULL
+n0_0 decimal(10,0) YES NULL
+n1 decimal(1,0) YES NULL
+n20_4 decimal(20,4) YES NULL
+n65_4 decimal(65,4) YES NULL
+pk decimal(10,0) NO PRI NULL
+INSERT INTO t1 (d,d0,d1_1,d10_2,d60_10,n,n0_0,n1,n20_4,n65_4,pk) VALUES (100,123456,0.3,40000.25,123456789123456789.10001,1024,7000.0,8.0,999999.9,9223372036854775807,1);
+INSERT INTO t1 (d,d0,d1_1,d10_2,d60_10,n,n0_0,n1,n20_4,n65_4,pk) VALUES (0,0,0,0,0,0,0,0,0,0,2);
+INSERT INTO t1 (d,d0,d1_1,d10_2,d60_10,n,n0_0,n1,n20_4,n65_4,pk) VALUES (9999999999.0,9999999999.0,0.9,99999999.99,99999999999999999999999999999999999999999999999999.9999999999,9999999999.0,9999999999.0,9.0,9999999999999999.9999,9999999999999999999999999999999999999999999999999999999999999.9999,3);
+SELECT d,d0,d1_1,d10_2,d60_10,n,n0_0,n1,n20_4,n65_4 FROM t1;
+d d0 d1_1 d10_2 d60_10 n n0_0 n1 n20_4 n65_4
+0 0 0.0 0.00 0.0000000000 0 0 0 0.0000 0.0000
+100 123456 0.3 40000.25 123456789123456789.1000100000 1024 7000 8 999999.9000 9223372036854775807.0000
+9999999999 9999999999 0.9 99999999.99 99999999999999999999999999999999999999999999999999.9999999999 9999999999 9999999999 9 9999999999999999.9999 9999999999999999999999999999999999999999999999999999999999999.9999
+INSERT INTO t1 (d,d0,d1_1,d10_2,d60_10,n,n0_0,n1,n20_4,n65_4,pk) VALUES (-100,-123456,-0.3,-40000.25,-123456789123456789.10001,-1024,-7000.0,-8.0,-999999.9,-9223372036854775807,4);
+INSERT INTO t1 (d,d0,d1_1,d10_2,d60_10,n,n0_0,n1,n20_4,n65_4,pk) VALUES (-9999999999.0,-9999999999.0,-0.9,-99999999.99,-99999999999999999999999999999999999999999999999999.9999999999,-9999999999.0,-9999999999.0,-9.0,-9999999999999999.9999,-9999999999999999999999999999999999999999999999999999999999999.9999,5);
+SELECT d,d0,d1_1,d10_2,d60_10,n,n0_0,n1,n20_4,n65_4 FROM t1;
+d d0 d1_1 d10_2 d60_10 n n0_0 n1 n20_4 n65_4
+-100 -123456 -0.3 -40000.25 -123456789123456789.1000100000 -1024 -7000 -8 -999999.9000 -9223372036854775807.0000
+-9999999999 -9999999999 -0.9 -99999999.99 -99999999999999999999999999999999999999999999999999.9999999999 -9999999999 -9999999999 -9 -9999999999999999.9999 -9999999999999999999999999999999999999999999999999999999999999.9999
+0 0 0.0 0.00 0.0000000000 0 0 0 0.0000 0.0000
+100 123456 0.3 40000.25 123456789123456789.1000100000 1024 7000 8 999999.9000 9223372036854775807.0000
+9999999999 9999999999 0.9 99999999.99 99999999999999999999999999999999999999999999999999.9999999999 9999999999 9999999999 9 9999999999999999.9999 9999999999999999999999999999999999999999999999999999999999999.9999
+SELECT d,d0,d1_1,d10_2,d60_10,n,n0_0,n1,n20_4,n65_4 FROM t1 WHERE n20_4 = 9999999999999999.9999 OR d < 100;
+d d0 d1_1 d10_2 d60_10 n n0_0 n1 n20_4 n65_4
+-100 -123456 -0.3 -40000.25 -123456789123456789.1000100000 -1024 -7000 -8 -999999.9000 -9223372036854775807.0000
+-9999999999 -9999999999 -0.9 -99999999.99 -99999999999999999999999999999999999999999999999999.9999999999 -9999999999 -9999999999 -9 -9999999999999999.9999 -9999999999999999999999999999999999999999999999999999999999999.9999
+0 0 0.0 0.00 0.0000000000 0 0 0 0.0000 0.0000
+9999999999 9999999999 0.9 99999999.99 99999999999999999999999999999999999999999999999999.9999999999 9999999999 9999999999 9 9999999999999999.9999 9999999999999999999999999999999999999999999999999999999999999.9999
+INSERT INTO t1 (d,d0,d1_1,d10_2,d60_10,n,n0_0,n1,n20_4,n65_4,pk) VALUES (
+9999999999999999999999999999999999999999999999999999999999999.9999,
+9999999999999999999999999999999999999999999999999999999999999.9999,
+9999999999999999999999999999999999999999999999999999999999999.9999,
+9999999999999999999999999999999999999999999999999999999999999.9999,
+9999999999999999999999999999999999999999999999999999999999999.9999,
+9999999999999999999999999999999999999999999999999999999999999.9999,
+9999999999999999999999999999999999999999999999999999999999999.9999,
+9999999999999999999999999999999999999999999999999999999999999.9999,
+9999999999999999999999999999999999999999999999999999999999999.9999,
+9999999999999999999999999999999999999999999999999999999999999.9999,
+6
+);
+Warnings:
+Warning 1264 Out of range value for column 'd' at row 1
+Warning 1264 Out of range value for column 'd0' at row 1
+Warning 1264 Out of range value for column 'd1_1' at row 1
+Warning 1264 Out of range value for column 'd10_2' at row 1
+Warning 1264 Out of range value for column 'd60_10' at row 1
+Warning 1264 Out of range value for column 'n' at row 1
+Warning 1264 Out of range value for column 'n0_0' at row 1
+Warning 1264 Out of range value for column 'n1' at row 1
+Warning 1264 Out of range value for column 'n20_4' at row 1
+SELECT d,d0,d1_1,d10_2,d60_10,n,n0_0,n1,n20_4,n65_4 FROM t1;
+d d0 d1_1 d10_2 d60_10 n n0_0 n1 n20_4 n65_4
+-100 -123456 -0.3 -40000.25 -123456789123456789.1000100000 -1024 -7000 -8 -999999.9000 -9223372036854775807.0000
+-9999999999 -9999999999 -0.9 -99999999.99 -99999999999999999999999999999999999999999999999999.9999999999 -9999999999 -9999999999 -9 -9999999999999999.9999 -9999999999999999999999999999999999999999999999999999999999999.9999
+0 0 0.0 0.00 0.0000000000 0 0 0 0.0000 0.0000
+100 123456 0.3 40000.25 123456789123456789.1000100000 1024 7000 8 999999.9000 9223372036854775807.0000
+9999999999 9999999999 0.9 99999999.99 99999999999999999999999999999999999999999999999999.9999999999 9999999999 9999999999 9 9999999999999999.9999 9999999999999999999999999999999999999999999999999999999999999.9999
+9999999999 9999999999 0.9 99999999.99 99999999999999999999999999999999999999999999999999.9999999999 9999999999 9999999999 9 9999999999999999.9999 9999999999999999999999999999999999999999999999999999999999999.9999
+INSERT INTO t1 (d,d0,d1_1,d10_2,d60_10,n,n0_0,n1,n20_4,n65_4,pk) VALUES (10000000000.0,10000000000.0,1.1,100000000.99,100000000000000000000000000000000000000000000000000.0,10000000000.0,10000000000.0,10.0,10000000000000000.9999,10000000000000000000000000000000000000000000000000000000000000.9999,7);
+Warnings:
+Warning 1264 Out of range value for column 'd' at row 1
+Warning 1264 Out of range value for column 'd0' at row 1
+Warning 1264 Out of range value for column 'd1_1' at row 1
+Warning 1264 Out of range value for column 'd10_2' at row 1
+Warning 1264 Out of range value for column 'd60_10' at row 1
+Warning 1264 Out of range value for column 'n' at row 1
+Warning 1264 Out of range value for column 'n0_0' at row 1
+Warning 1264 Out of range value for column 'n1' at row 1
+Warning 1264 Out of range value for column 'n20_4' at row 1
+Warning 1264 Out of range value for column 'n65_4' at row 1
+SELECT d,d0,d1_1,d10_2,d60_10,n,n0_0,n1,n20_4,n65_4 FROM t1;
+d d0 d1_1 d10_2 d60_10 n n0_0 n1 n20_4 n65_4
+-100 -123456 -0.3 -40000.25 -123456789123456789.1000100000 -1024 -7000 -8 -999999.9000 -9223372036854775807.0000
+-9999999999 -9999999999 -0.9 -99999999.99 -99999999999999999999999999999999999999999999999999.9999999999 -9999999999 -9999999999 -9 -9999999999999999.9999 -9999999999999999999999999999999999999999999999999999999999999.9999
+0 0 0.0 0.00 0.0000000000 0 0 0 0.0000 0.0000
+100 123456 0.3 40000.25 123456789123456789.1000100000 1024 7000 8 999999.9000 9223372036854775807.0000
+9999999999 9999999999 0.9 99999999.99 99999999999999999999999999999999999999999999999999.9999999999 9999999999 9999999999 9 9999999999999999.9999 9999999999999999999999999999999999999999999999999999999999999.9999
+9999999999 9999999999 0.9 99999999.99 99999999999999999999999999999999999999999999999999.9999999999 9999999999 9999999999 9 9999999999999999.9999 9999999999999999999999999999999999999999999999999999999999999.9999
+9999999999 9999999999 0.9 99999999.99 99999999999999999999999999999999999999999999999999.9999999999 9999999999 9999999999 9 9999999999999999.9999 9999999999999999999999999999999999999999999999999999999999999.9999
+INSERT INTO t1 (d,d0,d1_1,d10_2,d60_10,n,n0_0,n1,n20_4,n65_4,pk) VALUES (9999999999.1,9999999999.1,1.9,99999999.001,99999999999999999999999999999999999999999999999999.99999999991,9999999999.1,9999999999.1,9.1,9999999999999999.00001,9999999999999999999999999999999999999999999999999999999999999.11111,8);
+Warnings:
+Note 1265 Data truncated for column 'd' at row 1
+Note 1265 Data truncated for column 'd0' at row 1
+Warning 1264 Out of range value for column 'd1_1' at row 1
+Note 1265 Data truncated for column 'd10_2' at row 1
+Note 1265 Data truncated for column 'd60_10' at row 1
+Note 1265 Data truncated for column 'n' at row 1
+Note 1265 Data truncated for column 'n0_0' at row 1
+Note 1265 Data truncated for column 'n1' at row 1
+Note 1265 Data truncated for column 'n20_4' at row 1
+Note 1265 Data truncated for column 'n65_4' at row 1
+SELECT d,d0,d1_1,d10_2,d60_10,n,n0_0,n1,n20_4,n65_4 FROM t1;
+d d0 d1_1 d10_2 d60_10 n n0_0 n1 n20_4 n65_4
+-100 -123456 -0.3 -40000.25 -123456789123456789.1000100000 -1024 -7000 -8 -999999.9000 -9223372036854775807.0000
+-9999999999 -9999999999 -0.9 -99999999.99 -99999999999999999999999999999999999999999999999999.9999999999 -9999999999 -9999999999 -9 -9999999999999999.9999 -9999999999999999999999999999999999999999999999999999999999999.9999
+0 0 0.0 0.00 0.0000000000 0 0 0 0.0000 0.0000
+100 123456 0.3 40000.25 123456789123456789.1000100000 1024 7000 8 999999.9000 9223372036854775807.0000
+9999999999 9999999999 0.9 99999999.00 99999999999999999999999999999999999999999999999999.9999999999 9999999999 9999999999 9 9999999999999999.0000 9999999999999999999999999999999999999999999999999999999999999.1111
+9999999999 9999999999 0.9 99999999.99 99999999999999999999999999999999999999999999999999.9999999999 9999999999 9999999999 9 9999999999999999.9999 9999999999999999999999999999999999999999999999999999999999999.9999
+9999999999 9999999999 0.9 99999999.99 99999999999999999999999999999999999999999999999999.9999999999 9999999999 9999999999 9 9999999999999999.9999 9999999999999999999999999999999999999999999999999999999999999.9999
+9999999999 9999999999 0.9 99999999.99 99999999999999999999999999999999999999999999999999.9999999999 9999999999 9999999999 9 9999999999999999.9999 9999999999999999999999999999999999999999999999999999999999999.9999
+ALTER TABLE t1 ADD COLUMN n66 NUMERIC(66) NULL;
+ERROR 42000: Too big precision 66 specified for 'n66'. Maximum is 65
+ALTER TABLE t1 ADD COLUMN n66_6 DECIMAL(66,6) NULL;
+ERROR 42000: Too big precision 66 specified for 'n66_6'. Maximum is 65
+ALTER TABLE t1 ADD COLUMN n66_66 DECIMAL(66,66) NULL;
+ERROR 42000: Too big scale 66 specified for 'n66_66'. Maximum is 38
+DROP TABLE t1;
+DROP TABLE IF EXISTS t1;
+CREATE TABLE t1 (
+c DECIMAL NULL,
+c1 DECIMAL NULL DEFAULT NULL,
+c2 DECIMAL NULL DEFAULT 1.1,
+pk INT AUTO_INCREMENT PRIMARY KEY
+) ENGINE=rocksdb;
+Warnings:
+Note 1265 Data truncated for column 'c2' at row 1
+SHOW COLUMNS IN t1;
+Field Type Null Key Default Extra
+c decimal(10,0) YES NULL
+c1 decimal(10,0) YES NULL
+c2 decimal(10,0) YES 1
+pk int(11) NO PRI NULL auto_increment
+INSERT INTO t1 (c,c1,c2) VALUES (NULL,NULL,NULL);
+INSERT INTO t1 (c,c1,c2) VALUES (1.1,1.1,1.1);
+Warnings:
+Note 1265 Data truncated for column 'c' at row 1
+Note 1265 Data truncated for column 'c1' at row 1
+Note 1265 Data truncated for column 'c2' at row 1
+INSERT INTO t1 () VALUES ();
+SELECT pk, HEX(c), HEX(c1), HEX(c2) FROM t1 ORDER BY pk;
+pk HEX(c) HEX(c1) HEX(c2)
+1 NULL NULL NULL
+2 1 1 1
+3 NULL NULL 1
+SELECT pk, HEX(c2) FROM t1 ORDER BY pk;
+pk HEX(c2)
+1 NULL
+2 1
+3 1
+DROP TABLE t1;
+DROP TABLE IF EXISTS t1;
+CREATE TABLE t1 (
+c NUMERIC NULL,
+c1 NUMERIC NULL DEFAULT NULL,
+c2 NUMERIC NULL DEFAULT 0 ,
+pk INT AUTO_INCREMENT PRIMARY KEY
+) ENGINE=rocksdb;
+SHOW COLUMNS IN t1;
+Field Type Null Key Default Extra
+c decimal(10,0) YES NULL
+c1 decimal(10,0) YES NULL
+c2 decimal(10,0) YES 0
+pk int(11) NO PRI NULL auto_increment
+INSERT INTO t1 (c,c1,c2) VALUES (NULL,NULL,NULL);
+INSERT INTO t1 (c,c1,c2) VALUES (0 ,0 ,0 );
+INSERT INTO t1 () VALUES ();
+SELECT pk, HEX(c), HEX(c1), HEX(c2) FROM t1 ORDER BY pk;
+pk HEX(c) HEX(c1) HEX(c2)
+1 NULL NULL NULL
+2 0 0 0
+3 NULL NULL 0
+SELECT pk, HEX(c2) FROM t1 ORDER BY pk;
+pk HEX(c2)
+1 NULL
+2 0
+3 0
+DROP TABLE t1;
+########################
+# Floating point columns (FLOAT, DOUBLE)
+########################
+DROP TABLE IF EXISTS t1;
+CREATE TABLE t1 (
+f FLOAT NULL,
+f0 FLOAT(0) NULL,
+r1_1 REAL(1,1) NULL,
+f23_0 FLOAT(23) NULL,
+f20_3 FLOAT(20,3) NULL,
+d DOUBLE NULL,
+d1_0 DOUBLE(1,0) NULL,
+d10_10 DOUBLE PRECISION (10,10) NULL,
+d53 DOUBLE(53,0) NULL,
+d53_10 DOUBLE(53,10) NULL,
+pk DOUBLE NULL PRIMARY KEY
+) ENGINE=rocksdb;
+SHOW COLUMNS IN t1;
+Field Type Null Key Default Extra
+f float YES NULL
+f0 float YES NULL
+r1_1 double(1,1) YES NULL
+f23_0 float YES NULL
+f20_3 float(20,3) YES NULL
+d double YES NULL
+d1_0 double(1,0) YES NULL
+d10_10 double(10,10) YES NULL
+d53 double(53,0) YES NULL
+d53_10 double(53,10) YES NULL
+pk double NO PRI NULL
+INSERT INTO t1 (f,f0,r1_1,f23_0,f20_3,d,d1_0,d10_10,d53,d53_10,pk) VALUES (12345.12345,12345.12345,0.9,123456789.123,56789.987,11111111.111,8.0,0.0123456789,1234566789123456789,99999999999999999.99999999,1);
+SELECT f,f0,r1_1,f23_0,f20_3,d,d1_0,d10_10,d53,d53_10 FROM t1;
+f 12345.1
+d 11111111.111
+d10_10 0.0123456789
+d1_0 8
+d53 1234566789123456800
+d53_10 100000000000000000.0000000000
+f0 12345.1
+f20_3 56789.988
+f23_0 123457000
+r1_1 0.9
+INSERT INTO t1 (f,f0,r1_1,f23_0,f20_3,d,d1_0,d10_10,d53,d53_10,pk) VALUES (0,0,0,0,0,0,0,0,0,0,2);
+INSERT INTO t1 (f,f0,r1_1,f23_0,f20_3,d,d1_0,d10_10,d53,d53_10,pk) VALUES (
+99999999999999999999999999999999999999,
+99999999999999999999999999999999999999.9999999999999999,
+0.9,
+99999999999999999999999999999999999999.9,
+99999999999999999.999,
+999999999999999999999999999999999999999999999999999999999999999999999999999999999,
+9,
+0.9999999999,
+1999999999999999999999999999999999999999999999999999999,
+19999999999999999999999999999999999999999999.9999999999,
+3
+);
+Warnings:
+Warning 1264 Out of range value for column 'd53' at row 1
+Warning 1264 Out of range value for column 'd53_10' at row 1
+SELECT f,f0,r1_1,f23_0,f20_3,d,d1_0,d10_10,d53,d53_10 FROM t1;
+f 12345.1
+d 0
+d 11111111.111
+d 1e81
+d10_10 0.0000000000
+d10_10 0.0123456789
+d10_10 0.9999999999
+d1_0 0
+d1_0 8
+d1_0 9
+d53 0
+d53 100000000000000000000000000000000000000000000000000000
+d53 1234566789123456800
+d53_10 0.0000000000
+d53_10 100000000000000000.0000000000
+d53_10 10000000000000000000000000000000000000000000.0000000000
+f 0
+f 1e38
+f0 0
+f0 12345.1
+f0 1e38
+f20_3 0.000
+f20_3 56789.988
+f20_3 99999998430674940.000
+f23_0 0
+f23_0 123457000
+f23_0 1e38
+r1_1 0.0
+r1_1 0.9
+r1_1 0.9
+INSERT INTO t1 (f,f0,r1_1,f23_0,f20_3,d,d1_0,d10_10,d53,d53_10,pk) VALUES (-999999999999999999999999,-99999999999.999999999999,-0.9,-999.99999999999999999999,-99999999999999999.999,-999999999999999999999999999999999999999999999999999999999999-0.999,-9,-.9999999999,-999999999999999999999999999999.99999999999999999999999,-9999999999999999999999999999999999999999999.9999999999,4);
+SELECT f,f0,r1_1,f23_0,f20_3,d,d1_0,d10_10,d53,d53_10 FROM t1;
+f 12345.1
+d -1e60
+d 0
+d 11111111.111
+d 1e81
+d10_10 -0.9999999999
+d10_10 0.0000000000
+d10_10 0.0123456789
+d10_10 0.9999999999
+d1_0 -9
+d1_0 0
+d1_0 8
+d1_0 9
+d53 -1000000000000000000000000000000
+d53 0
+d53 100000000000000000000000000000000000000000000000000000
+d53 1234566789123456800
+d53_10 -10000000000000000000000000000000000000000000.0000000000
+d53_10 0.0000000000
+d53_10 100000000000000000.0000000000
+d53_10 10000000000000000000000000000000000000000000.0000000000
+f -1e24
+f 0
+f 1e38
+f0 -100000000000
+f0 0
+f0 12345.1
+f0 1e38
+f20_3 -99999998430674940.000
+f20_3 0.000
+f20_3 56789.988
+f20_3 99999998430674940.000
+f23_0 -1000
+f23_0 0
+f23_0 123457000
+f23_0 1e38
+r1_1 -0.9
+r1_1 0.0
+r1_1 0.9
+r1_1 0.9
+SELECT
+CONCAT('', MAX(f)),
+CONCAT('', MAX(f0)),
+CONCAT('', MAX(r1_1)),
+CONCAT('', MAX(f23_0)),
+CONCAT('', MAX(f20_3)),
+CONCAT('', MAX(d)),
+CONCAT('', MAX(d1_0)),
+CONCAT('', MAX(d10_10)),
+CONCAT('', MAX(d53)),
+CONCAT('', MAX(d53_10)) FROM t1;
+CONCAT('', MAX(f)) 9.999999680285692e37
+CONCAT('', MAX(d)) 1e81
+CONCAT('', MAX(d10_10)) 0.9999999999
+CONCAT('', MAX(d1_0)) 9
+CONCAT('', MAX(d53)) 100000000000000000000000000000000000000000000000000000
+CONCAT('', MAX(d53_10)) 10000000000000000000000000000000000000000000.0000000000
+CONCAT('', MAX(f0)) 9.999999680285692e37
+CONCAT('', MAX(f20_3)) 99999998430674940.000
+CONCAT('', MAX(f23_0)) 9.999999680285692e37
+CONCAT('', MAX(r1_1)) 0.9
+INSERT INTO t1 (f,f0,r1_1,f23_0,f20_3,d,d1_0,d10_10,d53,d53_10,pk) VALUES (
+9999999999999999999999999999999999999999999999999999999999999.9999,
+9999999999999999999999999999999999999999999999999999999999999.9999,
+9999999999999999999999999999999999999999999999999999999999999.9999,
+9999999999999999999999999999999999999999999999999999999999999.9999,
+9999999999999999999999999999999999999999999999999999999999999.9999,
+9999999999999999999999999999999999999999999999999999999999999.9999,
+9999999999999999999999999999999999999999999999999999999999999.9999,
+9999999999999999999999999999999999999999999999999999999999999.9999,
+9999999999999999999999999999999999999999999999999999999999999.9999,
+9999999999999999999999999999999999999999999999999999999999999.9999,
+5
+);
+Warnings:
+Warning 1264 Out of range value for column 'f' at row 1
+Warning 1264 Out of range value for column 'f0' at row 1
+Warning 1264 Out of range value for column 'r1_1' at row 1
+Warning 1264 Out of range value for column 'f23_0' at row 1
+Warning 1264 Out of range value for column 'f20_3' at row 1
+Warning 1264 Out of range value for column 'd1_0' at row 1
+Warning 1264 Out of range value for column 'd10_10' at row 1
+Warning 1264 Out of range value for column 'd53' at row 1
+Warning 1264 Out of range value for column 'd53_10' at row 1
+SELECT f,f0,r1_1,f23_0,f20_3,d,d1_0,d10_10,d53,d53_10 FROM t1;
+f 12345.1
+d -1e60
+d 0
+d 11111111.111
+d 1e61
+d 1e81
+d10_10 -0.9999999999
+d10_10 0.0000000000
+d10_10 0.0123456789
+d10_10 0.9999999999
+d10_10 0.9999999999
+d1_0 -9
+d1_0 0
+d1_0 8
+d1_0 9
+d1_0 9
+d53 -1000000000000000000000000000000
+d53 0
+d53 100000000000000000000000000000000000000000000000000000
+d53 100000000000000000000000000000000000000000000000000000
+d53 1234566789123456800
+d53_10 -10000000000000000000000000000000000000000000.0000000000
+d53_10 0.0000000000
+d53_10 100000000000000000.0000000000
+d53_10 10000000000000000000000000000000000000000000.0000000000
+d53_10 10000000000000000000000000000000000000000000.0000000000
+f -1e24
+f 0
+f 1e38
+f 3.40282e38
+f0 -100000000000
+f0 0
+f0 12345.1
+f0 1e38
+f0 3.40282e38
+f20_3 -99999998430674940.000
+f20_3 0.000
+f20_3 56789.988
+f20_3 99999998430674940.000
+f20_3 99999998430674940.000
+f23_0 -1000
+f23_0 0
+f23_0 123457000
+f23_0 1e38
+f23_0 3.40282e38
+r1_1 -0.9
+r1_1 0.0
+r1_1 0.9
+r1_1 0.9
+r1_1 0.9
+INSERT INTO t1 (f,f0,r1_1,f23_0,f20_3,d,d1_0,d10_10,d53,d53_10,pk) VALUES (
+999999999999999999999999999999999999999,
+999999999999999999999999999999999999999.9999999999999999,
+1.9,
+999999999999999999999999999999999999999.9,
+999999999999999999.999,
+9999999999999999999999999999999999999999999999999999999999999999999999999999999999,
+99,
+1.9999999999,
+1999999999999999999999999999999999999999999999999999999,
+19999999999999999999999999999999999999999999.9999999999,
+6
+);
+Warnings:
+Warning 1916 Got overflow when converting '' to DECIMAL. Value truncated
+Warning 1264 Out of range value for column 'f' at row 1
+Warning 1264 Out of range value for column 'f0' at row 1
+Warning 1264 Out of range value for column 'r1_1' at row 1
+Warning 1264 Out of range value for column 'f23_0' at row 1
+Warning 1264 Out of range value for column 'f20_3' at row 1
+Warning 1264 Out of range value for column 'd1_0' at row 1
+Warning 1264 Out of range value for column 'd10_10' at row 1
+Warning 1264 Out of range value for column 'd53' at row 1
+Warning 1264 Out of range value for column 'd53_10' at row 1
+SELECT f,f0,r1_1,f23_0,f20_3,d,d1_0,d10_10,d53,d53_10 FROM t1;
+f 12345.1
+d -1e60
+d 0
+d 11111111.111
+d 1e61
+d 1e65
+d 1e81
+d10_10 -0.9999999999
+d10_10 0.0000000000
+d10_10 0.0123456789
+d10_10 0.9999999999
+d10_10 0.9999999999
+d10_10 0.9999999999
+d1_0 -9
+d1_0 0
+d1_0 8
+d1_0 9
+d1_0 9
+d1_0 9
+d53 -1000000000000000000000000000000
+d53 0
+d53 100000000000000000000000000000000000000000000000000000
+d53 100000000000000000000000000000000000000000000000000000
+d53 100000000000000000000000000000000000000000000000000000
+d53 1234566789123456800
+d53_10 -10000000000000000000000000000000000000000000.0000000000
+d53_10 0.0000000000
+d53_10 100000000000000000.0000000000
+d53_10 10000000000000000000000000000000000000000000.0000000000
+d53_10 10000000000000000000000000000000000000000000.0000000000
+d53_10 10000000000000000000000000000000000000000000.0000000000
+f -1e24
+f 0
+f 1e38
+f 3.40282e38
+f 3.40282e38
+f0 -100000000000
+f0 0
+f0 12345.1
+f0 1e38
+f0 3.40282e38
+f0 3.40282e38
+f20_3 -99999998430674940.000
+f20_3 0.000
+f20_3 56789.988
+f20_3 99999998430674940.000
+f20_3 99999998430674940.000
+f20_3 99999998430674940.000
+f23_0 -1000
+f23_0 0
+f23_0 123457000
+f23_0 1e38
+f23_0 3.40282e38
+f23_0 3.40282e38
+r1_1 -0.9
+r1_1 0.0
+r1_1 0.9
+r1_1 0.9
+r1_1 0.9
+r1_1 0.9
+ALTER TABLE t1 ADD COLUMN d0_0 DOUBLE(0,0) NULL;
+ALTER TABLE t1 ADD COLUMN n66_6 DECIMAL(256,1) NULL;
+ERROR 42000: Too big precision 256 specified for 'n66_6'. Maximum is 65
+ALTER TABLE t1 ADD COLUMN n66_66 DECIMAL(40,35) NULL;
+DROP TABLE t1;
+DROP TABLE IF EXISTS t1;
+CREATE TABLE t1 (
+c FLOAT NULL,
+c1 FLOAT NULL DEFAULT NULL,
+c2 FLOAT NULL DEFAULT 1.1 ,
+pk INT AUTO_INCREMENT PRIMARY KEY
+) ENGINE=rocksdb;
+SHOW COLUMNS IN t1;
+Field Type Null Key Default Extra
+c float YES NULL
+c1 float YES NULL
+c2 float YES 1.1
+pk int(11) NO PRI NULL auto_increment
+INSERT INTO t1 (c,c1,c2) VALUES (NULL,NULL,NULL);
+INSERT INTO t1 (c,c1,c2) VALUES (1.1 ,1.1 ,1.1 );
+INSERT INTO t1 () VALUES ();
+SELECT pk, HEX(c), HEX(c1), HEX(c2) FROM t1 ORDER BY pk;
+pk HEX(c) HEX(c1) HEX(c2)
+1 NULL NULL NULL
+2 1 1 1
+3 NULL NULL 1
+SELECT pk, HEX(c2) FROM t1 ORDER BY pk;
+pk HEX(c2)
+1 NULL
+2 1
+3 1
+DROP TABLE t1;
+DROP TABLE IF EXISTS t1;
+CREATE TABLE t1 (
+c DOUBLE NULL,
+c1 DOUBLE NULL DEFAULT NULL,
+c2 DOUBLE NULL DEFAULT 0 ,
+pk INT AUTO_INCREMENT PRIMARY KEY
+) ENGINE=rocksdb;
+SHOW COLUMNS IN t1;
+Field Type Null Key Default Extra
+c double YES NULL
+c1 double YES NULL
+c2 double YES 0
+pk int(11) NO PRI NULL auto_increment
+INSERT INTO t1 (c,c1,c2) VALUES (NULL,NULL,NULL);
+INSERT INTO t1 (c,c1,c2) VALUES (0 ,0 ,0 );
+INSERT INTO t1 () VALUES ();
+SELECT pk, HEX(c), HEX(c1), HEX(c2) FROM t1 ORDER BY pk;
+pk HEX(c) HEX(c1) HEX(c2)
+1 NULL NULL NULL
+2 0 0 0
+3 NULL NULL 0
+SELECT pk, HEX(c2) FROM t1 ORDER BY pk;
+pk HEX(c2)
+1 NULL
+2 0
+3 0
+DROP TABLE t1;
+########################
+# INT columns
+########################
+DROP TABLE IF EXISTS t1;
+CREATE TABLE t1 (
+i INT NULL,
+i0 INT(0) NULL,
+i1 INT(1) NULL,
+i20 INT(20) NULL,
+t TINYINT NULL,
+t0 TINYINT(0) NULL,
+t1 TINYINT(1) NULL,
+t20 TINYINT(20) NULL,
+s SMALLINT NULL,
+s0 SMALLINT(0) NULL,
+s1 SMALLINT(1) NULL,
+s20 SMALLINT(20) NULL,
+m MEDIUMINT NULL,
+m0 MEDIUMINT(0) NULL,
+m1 MEDIUMINT(1) NULL,
+m20 MEDIUMINT(20) NULL,
+b BIGINT NULL,
+b0 BIGINT(0) NULL,
+b1 BIGINT(1) NULL,
+b20 BIGINT(20) NULL,
+pk INT AUTO_INCREMENT PRIMARY KEY
+) ENGINE=rocksdb;
+SHOW COLUMNS IN t1;
+Field Type Null Key Default Extra
+i int(11) YES NULL
+i0 int(11) YES NULL
+i1 int(1) YES NULL
+i20 int(20) YES NULL
+t tinyint(4) YES NULL
+t0 tinyint(4) YES NULL
+t1 tinyint(1) YES NULL
+t20 tinyint(20) YES NULL
+s smallint(6) YES NULL
+s0 smallint(6) YES NULL
+s1 smallint(1) YES NULL
+s20 smallint(20) YES NULL
+m mediumint(9) YES NULL
+m0 mediumint(9) YES NULL
+m1 mediumint(1) YES NULL
+m20 mediumint(20) YES NULL
+b bigint(20) YES NULL
+b0 bigint(20) YES NULL
+b1 bigint(1) YES NULL
+b20 bigint(20) YES NULL
+pk int(11) NO PRI NULL auto_increment
+INSERT INTO t1 (i,i0,i1,i20,t,t0,t1,t20,s,s0,s1,s20,m,m0,m1,m20,b,b0,b1,b20) VALUES (1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20);
+INSERT INTO t1 (i,i0,i1,i20,t,t0,t1,t20,s,s0,s1,s20,m,m0,m1,m20,b,b0,b1,b20) VALUES (0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0);
+INSERT INTO t1 (i,i0,i1,i20,t,t0,t1,t20,s,s0,s1,s20,m,m0,m1,m20,b,b0,b1,b20) VALUES (2147483647,2147483647,2147483647,2147483647,127,127,127,127,32767,32767,32767,32767,8388607,8388607,8388607,8388607,9223372036854775807,9223372036854775807,9223372036854775807,9223372036854775807);
+SELECT i,i0,i1,i20,t,t0,t1,t20,s,s0,s1,s20,m,m0,m1,m20,b,b0,b1,b20 FROM t1;
+i i0 i1 i20 t t0 t1 t20 s s0 s1 s20 m m0 m1 m20 b b0 b1 b20
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
+2147483647 2147483647 2147483647 2147483647 127 127 127 127 32767 32767 32767 32767 8388607 8388607 8388607 8388607 9223372036854775807 9223372036854775807 9223372036854775807 9223372036854775807
+INSERT INTO t1 (i,i0,i1,i20,t,t0,t1,t20,s,s0,s1,s20,m,m0,m1,m20,b,b0,b1,b20) VALUES (-2147483648,-2147483648,-2147483648,-2147483648,-128,-128,-128,-128,-32768,-32768,-32768,-32768,-8388608,-8388608,-8388608,-8388608,-9223372036854775808,-9223372036854775808,-9223372036854775808,-9223372036854775808);
+INSERT INTO t1 (i,i0,i1,i20,t,t0,t1,t20,s,s0,s1,s20,m,m0,m1,m20,b,b0,b1,b20) VALUES (4294967295,4294967295,4294967295,4294967295,255,255,255,255,65535,65535,65535,65535,16777215,16777215,16777215,16777215,18446744073709551615,18446744073709551615,18446744073709551615,18446744073709551615);
+Warnings:
+Warning 1264 Out of range value for column 'i' at row 1
+Warning 1264 Out of range value for column 'i0' at row 1
+Warning 1264 Out of range value for column 'i1' at row 1
+Warning 1264 Out of range value for column 'i20' at row 1
+Warning 1264 Out of range value for column 't' at row 1
+Warning 1264 Out of range value for column 't0' at row 1
+Warning 1264 Out of range value for column 't1' at row 1
+Warning 1264 Out of range value for column 't20' at row 1
+Warning 1264 Out of range value for column 's' at row 1
+Warning 1264 Out of range value for column 's0' at row 1
+Warning 1264 Out of range value for column 's1' at row 1
+Warning 1264 Out of range value for column 's20' at row 1
+Warning 1264 Out of range value for column 'm' at row 1
+Warning 1264 Out of range value for column 'm0' at row 1
+Warning 1264 Out of range value for column 'm1' at row 1
+Warning 1264 Out of range value for column 'm20' at row 1
+Warning 1264 Out of range value for column 'b' at row 1
+Warning 1264 Out of range value for column 'b0' at row 1
+Warning 1264 Out of range value for column 'b1' at row 1
+Warning 1264 Out of range value for column 'b20' at row 1
+SELECT i,i0,i1,i20,t,t0,t1,t20,s,s0,s1,s20,m,m0,m1,m20,b,b0,b1,b20 FROM t1;
+i i0 i1 i20 t t0 t1 t20 s s0 s1 s20 m m0 m1 m20 b b0 b1 b20
+-2147483648 -2147483648 -2147483648 -2147483648 -128 -128 -128 -128 -32768 -32768 -32768 -32768 -8388608 -8388608 -8388608 -8388608 -9223372036854775808 -9223372036854775808 -9223372036854775808 -9223372036854775808
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
+2147483647 2147483647 2147483647 2147483647 127 127 127 127 32767 32767 32767 32767 8388607 8388607 8388607 8388607 9223372036854775807 9223372036854775807 9223372036854775807 9223372036854775807
+2147483647 2147483647 2147483647 2147483647 127 127 127 127 32767 32767 32767 32767 8388607 8388607 8388607 8388607 9223372036854775807 9223372036854775807 9223372036854775807 9223372036854775807
+INSERT INTO t1 (i,i0,i1,i20,t,t0,t1,t20,s,s0,s1,s20,m,m0,m1,m20,b,b0,b1,b20) VALUES (-2147483649,-2147483649,-2147483649,-2147483649,-129,-129,-129,-129,-32769,-32769,-32769,-32769,-8388609,-8388609,-8388609,-8388609,-9223372036854775809,-9223372036854775809,-9223372036854775809,-9223372036854775809);
+Warnings:
+Warning 1264 Out of range value for column 'i' at row 1
+Warning 1264 Out of range value for column 'i0' at row 1
+Warning 1264 Out of range value for column 'i1' at row 1
+Warning 1264 Out of range value for column 'i20' at row 1
+Warning 1264 Out of range value for column 't' at row 1
+Warning 1264 Out of range value for column 't0' at row 1
+Warning 1264 Out of range value for column 't1' at row 1
+Warning 1264 Out of range value for column 't20' at row 1
+Warning 1264 Out of range value for column 's' at row 1
+Warning 1264 Out of range value for column 's0' at row 1
+Warning 1264 Out of range value for column 's1' at row 1
+Warning 1264 Out of range value for column 's20' at row 1
+Warning 1264 Out of range value for column 'm' at row 1
+Warning 1264 Out of range value for column 'm0' at row 1
+Warning 1264 Out of range value for column 'm1' at row 1
+Warning 1264 Out of range value for column 'm20' at row 1
+Warning 1264 Out of range value for column 'b' at row 1
+Warning 1264 Out of range value for column 'b0' at row 1
+Warning 1264 Out of range value for column 'b1' at row 1
+Warning 1264 Out of range value for column 'b20' at row 1
+INSERT INTO t1 (i,i0,i1,i20,t,t0,t1,t20,s,s0,s1,s20,m,m0,m1,m20,b,b0,b1,b20) VALUES (4294967296,4294967296,4294967296,4294967296,256,256,256,256,65536,65536,65536,65536,16777216,16777216,16777216,16777216,18446744073709551616,18446744073709551616,18446744073709551616,18446744073709551616);
+Warnings:
+Warning 1264 Out of range value for column 'i' at row 1
+Warning 1264 Out of range value for column 'i0' at row 1
+Warning 1264 Out of range value for column 'i1' at row 1
+Warning 1264 Out of range value for column 'i20' at row 1
+Warning 1264 Out of range value for column 't' at row 1
+Warning 1264 Out of range value for column 't0' at row 1
+Warning 1264 Out of range value for column 't1' at row 1
+Warning 1264 Out of range value for column 't20' at row 1
+Warning 1264 Out of range value for column 's' at row 1
+Warning 1264 Out of range value for column 's0' at row 1
+Warning 1264 Out of range value for column 's1' at row 1
+Warning 1264 Out of range value for column 's20' at row 1
+Warning 1264 Out of range value for column 'm' at row 1
+Warning 1264 Out of range value for column 'm0' at row 1
+Warning 1264 Out of range value for column 'm1' at row 1
+Warning 1264 Out of range value for column 'm20' at row 1
+Warning 1264 Out of range value for column 'b' at row 1
+Warning 1264 Out of range value for column 'b0' at row 1
+Warning 1264 Out of range value for column 'b1' at row 1
+Warning 1264 Out of range value for column 'b20' at row 1
+INSERT INTO t1 (i,i0,i1,i20,t,t0,t1,t20,s,s0,s1,s20,m,m0,m1,m20,b,b0,b1,b20) SELECT b,b,b,b,b,b,b,b,b,b,b,b,b,b,b,b,b,b,b,b FROM t1 WHERE b IN (-9223372036854775808,9223372036854775807,18446744073709551615);
+Warnings:
+Warning 1264 Out of range value for column 'i' at row 8
+Warning 1264 Out of range value for column 'i0' at row 8
+Warning 1264 Out of range value for column 'i1' at row 8
+Warning 1264 Out of range value for column 'i20' at row 8
+Warning 1264 Out of range value for column 't' at row 8
+Warning 1264 Out of range value for column 't0' at row 8
+Warning 1264 Out of range value for column 't1' at row 8
+Warning 1264 Out of range value for column 't20' at row 8
+Warning 1264 Out of range value for column 's' at row 8
+Warning 1264 Out of range value for column 's0' at row 8
+Warning 1264 Out of range value for column 's1' at row 8
+Warning 1264 Out of range value for column 's20' at row 8
+Warning 1264 Out of range value for column 'm' at row 8
+Warning 1264 Out of range value for column 'm0' at row 8
+Warning 1264 Out of range value for column 'm1' at row 8
+Warning 1264 Out of range value for column 'm20' at row 8
+Warning 1264 Out of range value for column 'i' at row 9
+Warning 1264 Out of range value for column 'i0' at row 9
+Warning 1264 Out of range value for column 'i1' at row 9
+Warning 1264 Out of range value for column 'i20' at row 9
+Warning 1264 Out of range value for column 't' at row 9
+Warning 1264 Out of range value for column 't0' at row 9
+Warning 1264 Out of range value for column 't1' at row 9
+Warning 1264 Out of range value for column 't20' at row 9
+Warning 1264 Out of range value for column 's' at row 9
+Warning 1264 Out of range value for column 's0' at row 9
+Warning 1264 Out of range value for column 's1' at row 9
+Warning 1264 Out of range value for column 's20' at row 9
+Warning 1264 Out of range value for column 'm' at row 9
+Warning 1264 Out of range value for column 'm0' at row 9
+Warning 1264 Out of range value for column 'm1' at row 9
+Warning 1264 Out of range value for column 'm20' at row 9
+Warning 1264 Out of range value for column 'i' at row 10
+Warning 1264 Out of range value for column 'i0' at row 10
+Warning 1264 Out of range value for column 'i1' at row 10
+Warning 1264 Out of range value for column 'i20' at row 10
+Warning 1264 Out of range value for column 't' at row 10
+Warning 1264 Out of range value for column 't0' at row 10
+Warning 1264 Out of range value for column 't1' at row 10
+Warning 1264 Out of range value for column 't20' at row 10
+Warning 1264 Out of range value for column 's' at row 10
+Warning 1264 Out of range value for column 's0' at row 10
+Warning 1264 Out of range value for column 's1' at row 10
+Warning 1264 Out of range value for column 's20' at row 10
+Warning 1264 Out of range value for column 'm' at row 10
+Warning 1264 Out of range value for column 'm0' at row 10
+Warning 1264 Out of range value for column 'm1' at row 10
+Warning 1264 Out of range value for column 'm20' at row 10
+Warning 1264 Out of range value for column 'i' at row 11
+Warning 1264 Out of range value for column 'i0' at row 11
+Warning 1264 Out of range value for column 'i1' at row 11
+Warning 1264 Out of range value for column 'i20' at row 11
+Warning 1264 Out of range value for column 't' at row 11
+Warning 1264 Out of range value for column 't0' at row 11
+Warning 1264 Out of range value for column 't1' at row 11
+Warning 1264 Out of range value for column 't20' at row 11
+Warning 1264 Out of range value for column 's' at row 11
+Warning 1264 Out of range value for column 's0' at row 11
+Warning 1264 Out of range value for column 's1' at row 11
+Warning 1264 Out of range value for column 's20' at row 11
+Warning 1264 Out of range value for column 'm' at row 11
+Warning 1264 Out of range value for column 'm0' at row 11
+Warning 1264 Out of range value for column 'm1' at row 11
+Warning 1264 Out of range value for column 'm20' at row 11
+SELECT i,i0,i1,i20,t,t0,t1,t20,s,s0,s1,s20,m,m0,m1,m20,b,b0,b1,b20 FROM t1;
+i i0 i1 i20 t t0 t1 t20 s s0 s1 s20 m m0 m1 m20 b b0 b1 b20
+-2147483648 -2147483648 -2147483648 -2147483648 -128 -128 -128 -128 -32768 -32768 -32768 -32768 -8388608 -8388608 -8388608 -8388608 -9223372036854775808 -9223372036854775808 -9223372036854775808 -9223372036854775808
+-2147483648 -2147483648 -2147483648 -2147483648 -128 -128 -128 -128 -32768 -32768 -32768 -32768 -8388608 -8388608 -8388608 -8388608 -9223372036854775808 -9223372036854775808 -9223372036854775808 -9223372036854775808
+-2147483648 -2147483648 -2147483648 -2147483648 -128 -128 -128 -128 -32768 -32768 -32768 -32768 -8388608 -8388608 -8388608 -8388608 -9223372036854775808 -9223372036854775808 -9223372036854775808 -9223372036854775808
+-2147483648 -2147483648 -2147483648 -2147483648 -128 -128 -128 -128 -32768 -32768 -32768 -32768 -8388608 -8388608 -8388608 -8388608 -9223372036854775808 -9223372036854775808 -9223372036854775808 -9223372036854775808
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
+2147483647 2147483647 2147483647 2147483647 127 127 127 127 32767 32767 32767 32767 8388607 8388607 8388607 8388607 9223372036854775807 9223372036854775807 9223372036854775807 9223372036854775807
+2147483647 2147483647 2147483647 2147483647 127 127 127 127 32767 32767 32767 32767 8388607 8388607 8388607 8388607 9223372036854775807 9223372036854775807 9223372036854775807 9223372036854775807
+2147483647 2147483647 2147483647 2147483647 127 127 127 127 32767 32767 32767 32767 8388607 8388607 8388607 8388607 9223372036854775807 9223372036854775807 9223372036854775807 9223372036854775807
+2147483647 2147483647 2147483647 2147483647 127 127 127 127 32767 32767 32767 32767 8388607 8388607 8388607 8388607 9223372036854775807 9223372036854775807 9223372036854775807 9223372036854775807
+2147483647 2147483647 2147483647 2147483647 127 127 127 127 32767 32767 32767 32767 8388607 8388607 8388607 8388607 9223372036854775807 9223372036854775807 9223372036854775807 9223372036854775807
+2147483647 2147483647 2147483647 2147483647 127 127 127 127 32767 32767 32767 32767 8388607 8388607 8388607 8388607 9223372036854775807 9223372036854775807 9223372036854775807 9223372036854775807
+ALTER TABLE t1 ADD COLUMN i257 INT(257) NULL;
+ERROR 42000: Display width out of range for 'i257' (max = 255)
+DROP TABLE t1;
+DROP TABLE IF EXISTS t1;
+CREATE TABLE t1 (
+c INT NULL,
+c1 INT NULL DEFAULT NULL,
+c2 INT NULL DEFAULT 2147483647,
+pk INT AUTO_INCREMENT PRIMARY KEY
+) ENGINE=rocksdb;
+SHOW COLUMNS IN t1;
+Field Type Null Key Default Extra
+c int(11) YES NULL
+c1 int(11) YES NULL
+c2 int(11) YES 2147483647
+pk int(11) NO PRI NULL auto_increment
+INSERT INTO t1 (c,c1,c2) VALUES (NULL,NULL,NULL);
+INSERT INTO t1 (c,c1,c2) VALUES (2147483647,2147483647,2147483647);
+INSERT INTO t1 () VALUES ();
+SELECT pk, HEX(c), HEX(c1), HEX(c2) FROM t1 ORDER BY pk;
+pk HEX(c) HEX(c1) HEX(c2)
+1 NULL NULL NULL
+2 7FFFFFFF 7FFFFFFF 7FFFFFFF
+3 NULL NULL 7FFFFFFF
+SELECT pk, HEX(c2) FROM t1 ORDER BY pk;
+pk HEX(c2)
+1 NULL
+2 7FFFFFFF
+3 7FFFFFFF
+DROP TABLE t1;
+DROP TABLE IF EXISTS t1;
+CREATE TABLE t1 (
+c TINYINT NULL,
+c1 TINYINT NULL DEFAULT NULL,
+c2 TINYINT NULL DEFAULT 127 ,
+pk INT AUTO_INCREMENT PRIMARY KEY
+) ENGINE=rocksdb;
+SHOW COLUMNS IN t1;
+Field Type Null Key Default Extra
+c tinyint(4) YES NULL
+c1 tinyint(4) YES NULL
+c2 tinyint(4) YES 127
+pk int(11) NO PRI NULL auto_increment
+INSERT INTO t1 (c,c1,c2) VALUES (NULL,NULL,NULL);
+INSERT INTO t1 (c,c1,c2) VALUES (127 ,127 ,127 );
+INSERT INTO t1 () VALUES ();
+SELECT pk, HEX(c), HEX(c1), HEX(c2) FROM t1 ORDER BY pk;
+pk HEX(c) HEX(c1) HEX(c2)
+1 NULL NULL NULL
+2 7F 7F 7F
+3 NULL NULL 7F
+SELECT pk, HEX(c2) FROM t1 ORDER BY pk;
+pk HEX(c2)
+1 NULL
+2 7F
+3 7F
+DROP TABLE t1;
+DROP TABLE IF EXISTS t1;
+CREATE TABLE t1 (
+c SMALLINT NULL,
+c1 SMALLINT NULL DEFAULT NULL,
+c2 SMALLINT NULL DEFAULT 0,
+pk INT AUTO_INCREMENT PRIMARY KEY
+) ENGINE=rocksdb;
+SHOW COLUMNS IN t1;
+Field Type Null Key Default Extra
+c smallint(6) YES NULL
+c1 smallint(6) YES NULL
+c2 smallint(6) YES 0
+pk int(11) NO PRI NULL auto_increment
+INSERT INTO t1 (c,c1,c2) VALUES (NULL,NULL,NULL);
+INSERT INTO t1 (c,c1,c2) VALUES (0,0,0);
+INSERT INTO t1 () VALUES ();
+SELECT pk, HEX(c), HEX(c1), HEX(c2) FROM t1 ORDER BY pk;
+pk HEX(c) HEX(c1) HEX(c2)
+1 NULL NULL NULL
+2 0 0 0
+3 NULL NULL 0
+SELECT pk, HEX(c2) FROM t1 ORDER BY pk;
+pk HEX(c2)
+1 NULL
+2 0
+3 0
+DROP TABLE t1;
+DROP TABLE IF EXISTS t1;
+CREATE TABLE t1 (
+c MEDIUMINT NULL,
+c1 MEDIUMINT NULL DEFAULT NULL,
+c2 MEDIUMINT NULL DEFAULT 1,
+pk INT AUTO_INCREMENT PRIMARY KEY
+) ENGINE=rocksdb;
+SHOW COLUMNS IN t1;
+Field Type Null Key Default Extra
+c mediumint(9) YES NULL
+c1 mediumint(9) YES NULL
+c2 mediumint(9) YES 1
+pk int(11) NO PRI NULL auto_increment
+INSERT INTO t1 (c,c1,c2) VALUES (NULL,NULL,NULL);
+INSERT INTO t1 (c,c1,c2) VALUES (1,1,1);
+INSERT INTO t1 () VALUES ();
+SELECT pk, HEX(c), HEX(c1), HEX(c2) FROM t1 ORDER BY pk;
+pk HEX(c) HEX(c1) HEX(c2)
+1 NULL NULL NULL
+2 1 1 1
+3 NULL NULL 1
+SELECT pk, HEX(c2) FROM t1 ORDER BY pk;
+pk HEX(c2)
+1 NULL
+2 1
+3 1
+DROP TABLE t1;
+DROP TABLE IF EXISTS t1;
+CREATE TABLE t1 (
+c BIGINT NULL,
+c1 BIGINT NULL DEFAULT NULL,
+c2 BIGINT NULL DEFAULT 9223372036854775807,
+pk INT AUTO_INCREMENT PRIMARY KEY
+) ENGINE=rocksdb;
+SHOW COLUMNS IN t1;
+Field Type Null Key Default Extra
+c bigint(20) YES NULL
+c1 bigint(20) YES NULL
+c2 bigint(20) YES 9223372036854775807
+pk int(11) NO PRI NULL auto_increment
+INSERT INTO t1 (c,c1,c2) VALUES (NULL,NULL,NULL);
+INSERT INTO t1 (c,c1,c2) VALUES (9223372036854775807,9223372036854775807,9223372036854775807);
+INSERT INTO t1 () VALUES ();
+SELECT pk, HEX(c), HEX(c1), HEX(c2) FROM t1 ORDER BY pk;
+pk HEX(c) HEX(c1) HEX(c2)
+1 NULL NULL NULL
+2 7FFFFFFFFFFFFFFF 7FFFFFFFFFFFFFFF 7FFFFFFFFFFFFFFF
+3 NULL NULL 7FFFFFFFFFFFFFFF
+SELECT pk, HEX(c2) FROM t1 ORDER BY pk;
+pk HEX(c2)
+1 NULL
+2 7FFFFFFFFFFFFFFF
+3 7FFFFFFFFFFFFFFF
+DROP TABLE t1;
+########################
+# SET columns
+########################
+DROP TABLE IF EXISTS t1;
+CREATE TABLE t1 (
+a SET('') NULL,
+b SET('test1','test2','test3','test4','test5') NULL,
+c SET('01','02','03','04','05','06','07','08','09','10','11','12','13','14','15','16','17','18','19','20','21','22','23','24','25','26','27','28','29','30','31','32','33','34','35','36','37','38','39','40','41','42','43','44','45','46','47','48','49','50''51','52','53','54','55','56','57','58','59','60','61','62','63','64') NULL,
+PRIMARY KEY (c)
+) ENGINE=rocksdb;
+SHOW COLUMNS IN t1;
+Field Type Null Key Default Extra
+a set('') YES NULL
+b set('test1','test2','test3','test4','test5') YES NULL
+c set('01','02','03','04','05','06','07','08','09','10','11','12','13','14','15','16','17','18','19','20','21','22','23','24','25','26','27','28','29','30','31','32','33','34','35','36','37','38','39','40','41','42','43','44','45','46','47','48','49','50''51','52','53','54','55','56','57','58','59','60','61','62','63','64') NO PRI NULL
+INSERT INTO t1 (a,b,c) VALUES
+('','test2,test3','01,34,44,,23'),
+('',5,2),
+(',','test4,test2','');
+Warnings:
+Warning 1265 Data truncated for column 'c' at row 1
+SELECT a,b,c FROM t1;
+a b c
+ test1,test3 02
+ test2,test3 01,23,34,44
+ test2,test4
+INSERT INTO t1 (a,b,c) VALUES (0,'test6',-1);
+Warnings:
+Warning 1265 Data truncated for column 'b' at row 1
+Warning 1265 Data truncated for column 'c' at row 1
+SELECT a,b,c FROM t1;
+a b c
+ 01,02,03,04,05,06,07,08,09,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50'51,52,53,54,55,56,57,58,59,60,61,62,63,64
+ test1,test3 02
+ test2,test3 01,23,34,44
+ test2,test4
+ALTER TABLE t1 ADD COLUMN e SET('a','A') NULL;
+Warnings:
+Note 1291 Column 'e' has duplicated value 'a' in SET
+SHOW COLUMNS IN t1;
+Field Type Null Key Default Extra
+a set('') YES NULL
+b set('test1','test2','test3','test4','test5') YES NULL
+c set('01','02','03','04','05','06','07','08','09','10','11','12','13','14','15','16','17','18','19','20','21','22','23','24','25','26','27','28','29','30','31','32','33','34','35','36','37','38','39','40','41','42','43','44','45','46','47','48','49','50''51','52','53','54','55','56','57','58','59','60','61','62','63','64') NO PRI NULL
+e set('a','A') YES NULL
+ALTER TABLE t1 ADD COLUMN f SET('1','2','3','4','5','6','7','8','9','a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','x','y','z',' ','11','12','13','14','15','16','17','18','19','1a','1b','1c','1d','1e','1f','1g','1h','1i','1j','1k','1l','1m','1n','1o','1p','1q','1r','1s','1t','1u','1v','1w','1x','1y','1z','20','21','22','23','24','25','26','27','28','29','2a','2b','2c','2d','2e','2f','2g','2h','2i','2j','2k','2l','2m','2n','2o','2p','2q','2r','2s','2t','2u','2v','2w','2x','2y','2z','30','31','32','33','34','35','36','37','38','39','3a','3b','3c','3d','3e','3f','3g','3h','3i') NULL;
+ERROR HY000: Too many strings for column f and SET
+SELECT a,b,c,e FROM t1 WHERE FIND_IN_SET('test2',b)>0 OR a != '';
+a b c e
+ test2,test3 01,23,34,44 NULL
+ test2,test4 NULL
+DROP TABLE t1;
+DROP TABLE IF EXISTS t1;
+CREATE TABLE t1 (
+c SET('test1','test2','test3') NULL,
+c1 SET('test1','test2','test3') NULL DEFAULT NULL,
+c2 SET('test1','test2','test3') NULL DEFAULT 'test2,test3',
+pk INT AUTO_INCREMENT PRIMARY KEY
+) ENGINE=rocksdb;
+SHOW COLUMNS IN t1;
+Field Type Null Key Default Extra
+c set('test1','test2','test3') YES NULL
+c1 set('test1','test2','test3') YES NULL
+c2 set('test1','test2','test3') YES test2,test3
+pk int(11) NO PRI NULL auto_increment
+INSERT INTO t1 (c,c1,c2) VALUES (NULL,NULL,NULL);
+INSERT INTO t1 (c,c1,c2) VALUES ('test2,test3','test2,test3','test2,test3');
+INSERT INTO t1 () VALUES ();
+SELECT pk, HEX(c), HEX(c1), HEX(c2) FROM t1 ORDER BY pk;
+pk HEX(c) HEX(c1) HEX(c2)
+1 NULL NULL NULL
+2 74657374322C7465737433 74657374322C7465737433 74657374322C7465737433
+3 NULL NULL 74657374322C7465737433
+SELECT pk, HEX(c2) FROM t1 ORDER BY pk;
+pk HEX(c2)
+1 NULL
+2 74657374322C7465737433
+3 74657374322C7465737433
+DROP TABLE t1;
+########################
+# TEXT columns
+########################
+DROP TABLE IF EXISTS t1;
+CREATE TABLE t1 (
+pk INT AUTO_INCREMENT PRIMARY KEY,
+t TEXT NULL,
+t0 TEXT(0) NULL,
+t1 TEXT(1) NULL,
+t300 TEXT(300) NULL,
+tm TEXT(65535) NULL,
+t70k TEXT(70000) NULL,
+t17m TEXT(17000000) NULL,
+tt TINYTEXT NULL,
+m MEDIUMTEXT NULL,
+l LONGTEXT NULL
+) ENGINE=rocksdb;
+SHOW COLUMNS IN t1;
+Field Type Null Key Default Extra
+pk int(11) NO PRI NULL auto_increment
+t text YES NULL
+t0 text YES NULL
+t1 tinytext YES NULL
+t300 text YES NULL
+tm text YES NULL
+t70k mediumtext YES NULL
+t17m longtext YES NULL
+tt tinytext YES NULL
+m mediumtext YES NULL
+l longtext YES NULL
+INSERT INTO t1 (t,t0,t1,t300,tm,t70k,t17m,tt,m,l) VALUES
+('','','','','','','','','',''),
+('a','b','c','d','e','f','g','h','i','j'),
+('test1','test2','test3','test4','test5','test6','test7','test8','test9','test10'),
+( REPEAT('a',65535), REPEAT('b',65535), REPEAT('c',255), REPEAT('d',65535), REPEAT('e',65535), REPEAT('f',1048576), REPEAT('g',1048576), REPEAT('h',255), REPEAT('i',1048576), REPEAT('j',1048576) );
+SELECT LENGTH(t), LENGTH(t0), LENGTH(t1), LENGTH(t300), LENGTH(tm), LENGTH(t70k), LENGTH(t17m), LENGTH(tt), LENGTH(m), LENGTH(l) FROM t1;
+LENGTH(t) LENGTH(t0) LENGTH(t1) LENGTH(t300) LENGTH(tm) LENGTH(t70k) LENGTH(t17m) LENGTH(tt) LENGTH(m) LENGTH(l)
+0 0 0 0 0 0 0 0 0 0
+1 1 1 1 1 1 1 1 1 1
+5 5 5 5 5 5 5 5 5 6
+65535 65535 255 65535 65535 1048576 1048576 255 1048576 1048576
+INSERT INTO t1 (t,t0,t1,t300,tm,t70k,t17m,tt,m,l) VALUES
+( REPEAT('a',65536), REPEAT('b',65536), REPEAT('c',256), REPEAT('d',65536), REPEAT('e',65536), REPEAT('f',1048576), REPEAT('g',1048576), REPEAT('h',256), REPEAT('i',1048576), REPEAT('j',1048576) );
+Warnings:
+Warning 1265 Data truncated for column 't' at row 1
+Warning 1265 Data truncated for column 't0' at row 1
+Warning 1265 Data truncated for column 't1' at row 1
+Warning 1265 Data truncated for column 't300' at row 1
+Warning 1265 Data truncated for column 'tm' at row 1
+Warning 1265 Data truncated for column 'tt' at row 1
+SELECT LENGTH(t), LENGTH(t0), LENGTH(t1), LENGTH(t300), LENGTH(tm), LENGTH(t70k), LENGTH(t17m), LENGTH(tt), LENGTH(m), LENGTH(l) FROM t1;
+LENGTH(t) LENGTH(t0) LENGTH(t1) LENGTH(t300) LENGTH(tm) LENGTH(t70k) LENGTH(t17m) LENGTH(tt) LENGTH(m) LENGTH(l)
+0 0 0 0 0 0 0 0 0 0
+1 1 1 1 1 1 1 1 1 1
+5 5 5 5 5 5 5 5 5 6
+65535 65535 255 65535 65535 1048576 1048576 255 1048576 1048576
+65535 65535 255 65535 65535 1048576 1048576 255 1048576 1048576
+ALTER TABLE t1 ADD COLUMN ttt TEXT(4294967296) NULL;
+ERROR 42000: Display width out of range for 'ttt' (max = 4294967295)
+DROP TABLE t1;
+DROP TABLE IF EXISTS t1;
+CREATE TABLE t1 (
+c TEXT NULL,
+c1 TEXT NULL DEFAULT NULL,
+c2 TEXT NULL DEFAULT '',
+pk INT AUTO_INCREMENT PRIMARY KEY
+) ENGINE=rocksdb;
+SHOW COLUMNS IN t1;
+Field Type Null Key Default Extra
+c text YES NULL
+c1 text YES NULL
+c2 text YES ''
+pk int(11) NO PRI NULL auto_increment
+INSERT INTO t1 (c,c1,c2) VALUES (NULL,NULL,NULL);
+INSERT INTO t1 (c,c1,c2) VALUES ('','','');
+INSERT INTO t1 () VALUES ();
+SELECT pk, HEX(c), HEX(c1), HEX(c2) FROM t1 ORDER BY pk;
+pk HEX(c) HEX(c1) HEX(c2)
+1 NULL NULL NULL
+2
+3 NULL NULL
+SELECT pk, HEX(c2) FROM t1 ORDER BY pk;
+pk HEX(c2)
+1 NULL
+2
+3
+DROP TABLE t1;
+DROP TABLE IF EXISTS t1;
+CREATE TABLE t1 (
+c TINYTEXT NULL,
+c1 TINYTEXT NULL DEFAULT NULL,
+c2 TINYTEXT NULL DEFAULT '',
+pk INT AUTO_INCREMENT PRIMARY KEY
+) ENGINE=rocksdb;
+SHOW COLUMNS IN t1;
+Field Type Null Key Default Extra
+c tinytext YES NULL
+c1 tinytext YES NULL
+c2 tinytext YES ''
+pk int(11) NO PRI NULL auto_increment
+INSERT INTO t1 (c,c1,c2) VALUES (NULL,NULL,NULL);
+INSERT INTO t1 (c,c1,c2) VALUES ('','','');
+INSERT INTO t1 () VALUES ();
+SELECT pk, HEX(c), HEX(c1), HEX(c2) FROM t1 ORDER BY pk;
+pk HEX(c) HEX(c1) HEX(c2)
+1 NULL NULL NULL
+2
+3 NULL NULL
+SELECT pk, HEX(c2) FROM t1 ORDER BY pk;
+pk HEX(c2)
+1 NULL
+2
+3
+DROP TABLE t1;
+DROP TABLE IF EXISTS t1;
+CREATE TABLE t1 (
+c MEDIUMTEXT NULL,
+c1 MEDIUMTEXT NULL DEFAULT NULL,
+c2 MEDIUMTEXT NULL DEFAULT '',
+pk INT AUTO_INCREMENT PRIMARY KEY
+) ENGINE=rocksdb;
+SHOW COLUMNS IN t1;
+Field Type Null Key Default Extra
+c mediumtext YES NULL
+c1 mediumtext YES NULL
+c2 mediumtext YES ''
+pk int(11) NO PRI NULL auto_increment
+INSERT INTO t1 (c,c1,c2) VALUES (NULL,NULL,NULL);
+INSERT INTO t1 (c,c1,c2) VALUES ('','','');
+INSERT INTO t1 () VALUES ();
+SELECT pk, HEX(c), HEX(c1), HEX(c2) FROM t1 ORDER BY pk;
+pk HEX(c) HEX(c1) HEX(c2)
+1 NULL NULL NULL
+2
+3 NULL NULL
+SELECT pk, HEX(c2) FROM t1 ORDER BY pk;
+pk HEX(c2)
+1 NULL
+2
+3
+DROP TABLE t1;
+DROP TABLE IF EXISTS t1;
+CREATE TABLE t1 (
+c LONGTEXT NULL,
+c1 LONGTEXT NULL DEFAULT NULL,
+c2 LONGTEXT NULL DEFAULT '',
+pk INT AUTO_INCREMENT PRIMARY KEY
+) ENGINE=rocksdb;
+SHOW COLUMNS IN t1;
+Field Type Null Key Default Extra
+c longtext YES NULL
+c1 longtext YES NULL
+c2 longtext YES ''
+pk int(11) NO PRI NULL auto_increment
+INSERT INTO t1 (c,c1,c2) VALUES (NULL,NULL,NULL);
+INSERT INTO t1 (c,c1,c2) VALUES ('','','');
+INSERT INTO t1 () VALUES ();
+SELECT pk, HEX(c), HEX(c1), HEX(c2) FROM t1 ORDER BY pk;
+pk HEX(c) HEX(c1) HEX(c2)
+1 NULL NULL NULL
+2
+3 NULL NULL
+SELECT pk, HEX(c2) FROM t1 ORDER BY pk;
+pk HEX(c2)
+1 NULL
+2
+3
+DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/col_opt_unsigned.result b/storage/rocksdb/mysql-test/rocksdb/r/col_opt_unsigned.result
new file mode 100644
index 00000000000..b931743d59a
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/col_opt_unsigned.result
@@ -0,0 +1,749 @@
+########################
+# Fixed point columns (NUMERIC, DECIMAL)
+########################
+DROP TABLE IF EXISTS t1;
+CREATE TABLE t1 (
+d DECIMAL UNSIGNED,
+d0 DECIMAL(0) UNSIGNED,
+d1_1 DECIMAL(1,1) UNSIGNED,
+d10_2 DECIMAL(10,2) UNSIGNED,
+d60_10 DECIMAL(60,10) UNSIGNED,
+n NUMERIC UNSIGNED,
+n0_0 NUMERIC(0,0) UNSIGNED,
+n1 NUMERIC(1) UNSIGNED,
+n20_4 NUMERIC(20,4) UNSIGNED,
+n65_4 NUMERIC(65,4) UNSIGNED,
+pk NUMERIC UNSIGNED PRIMARY KEY
+) ENGINE=rocksdb;
+SHOW COLUMNS IN t1;
+Field Type Null Key Default Extra
+d decimal(10,0) unsigned YES NULL
+d0 decimal(10,0) unsigned YES NULL
+d1_1 decimal(1,1) unsigned YES NULL
+d10_2 decimal(10,2) unsigned YES NULL
+d60_10 decimal(60,10) unsigned YES NULL
+n decimal(10,0) unsigned YES NULL
+n0_0 decimal(10,0) unsigned YES NULL
+n1 decimal(1,0) unsigned YES NULL
+n20_4 decimal(20,4) unsigned YES NULL
+n65_4 decimal(65,4) unsigned YES NULL
+pk decimal(10,0) unsigned NO PRI NULL
+INSERT INTO t1 (d,d0,d1_1,d10_2,d60_10,n,n0_0,n1,n20_4,n65_4,pk) VALUES (100,123456,0.3,40000.25,123456789123456789.10001,1024,7000.0,8.0,999999.9,9223372036854775807,1);
+INSERT INTO t1 (d,d0,d1_1,d10_2,d60_10,n,n0_0,n1,n20_4,n65_4,pk) VALUES (0,0,0,0,0,0,0,0,0,0,2);
+INSERT INTO t1 (d,d0,d1_1,d10_2,d60_10,n,n0_0,n1,n20_4,n65_4,pk) VALUES (9999999999.0,9999999999.0,0.9,99999999.99,99999999999999999999999999999999999999999999999999.9999999999,9999999999.0,9999999999.0,9.0,9999999999999999.9999,9999999999999999999999999999999999999999999999999999999999999.9999,3);
+SELECT d,d0,d1_1,d10_2,d60_10,n,n0_0,n1,n20_4,n65_4 FROM t1;
+d d0 d1_1 d10_2 d60_10 n n0_0 n1 n20_4 n65_4
+0 0 0.0 0.00 0.0000000000 0 0 0 0.0000 0.0000
+100 123456 0.3 40000.25 123456789123456789.1000100000 1024 7000 8 999999.9000 9223372036854775807.0000
+9999999999 9999999999 0.9 99999999.99 99999999999999999999999999999999999999999999999999.9999999999 9999999999 9999999999 9 9999999999999999.9999 9999999999999999999999999999999999999999999999999999999999999.9999
+INSERT INTO t1 (d,d0,d1_1,d10_2,d60_10,n,n0_0,n1,n20_4,n65_4,pk) VALUES (-100,-123456,-0.3,-40000.25,-123456789123456789.10001,-1024,-7000.0,-8.0,-999999.9,-9223372036854775807,4);
+Warnings:
+Warning 1264 Out of range value for column 'd' at row 1
+Warning 1264 Out of range value for column 'd0' at row 1
+Warning 1264 Out of range value for column 'd1_1' at row 1
+Warning 1264 Out of range value for column 'd10_2' at row 1
+Warning 1264 Out of range value for column 'd60_10' at row 1
+Warning 1264 Out of range value for column 'n' at row 1
+Warning 1264 Out of range value for column 'n0_0' at row 1
+Warning 1264 Out of range value for column 'n1' at row 1
+Warning 1264 Out of range value for column 'n20_4' at row 1
+Warning 1264 Out of range value for column 'n65_4' at row 1
+INSERT INTO t1 (d,d0,d1_1,d10_2,d60_10,n,n0_0,n1,n20_4,n65_4,pk) VALUES (-9999999999.0,-9999999999.0,-0.9,-99999999.99,-99999999999999999999999999999999999999999999999999.9999999999,-9999999999.0,-9999999999.0,-9.0,-9999999999999999.9999,-9999999999999999999999999999999999999999999999999999999999999.9999,5);
+Warnings:
+Warning 1264 Out of range value for column 'd' at row 1
+Warning 1264 Out of range value for column 'd0' at row 1
+Warning 1264 Out of range value for column 'd1_1' at row 1
+Warning 1264 Out of range value for column 'd10_2' at row 1
+Warning 1264 Out of range value for column 'd60_10' at row 1
+Warning 1264 Out of range value for column 'n' at row 1
+Warning 1264 Out of range value for column 'n0_0' at row 1
+Warning 1264 Out of range value for column 'n1' at row 1
+Warning 1264 Out of range value for column 'n20_4' at row 1
+Warning 1264 Out of range value for column 'n65_4' at row 1
+SELECT d,d0,d1_1,d10_2,d60_10,n,n0_0,n1,n20_4,n65_4 FROM t1;
+d d0 d1_1 d10_2 d60_10 n n0_0 n1 n20_4 n65_4
+0 0 0.0 0.00 0.0000000000 0 0 0 0.0000 0.0000
+0 0 0.0 0.00 0.0000000000 0 0 0 0.0000 0.0000
+0 0 0.0 0.00 0.0000000000 0 0 0 0.0000 0.0000
+100 123456 0.3 40000.25 123456789123456789.1000100000 1024 7000 8 999999.9000 9223372036854775807.0000
+9999999999 9999999999 0.9 99999999.99 99999999999999999999999999999999999999999999999999.9999999999 9999999999 9999999999 9 9999999999999999.9999 9999999999999999999999999999999999999999999999999999999999999.9999
+SELECT d,d0,d1_1,d10_2,d60_10,n,n0_0,n1,n20_4,n65_4 FROM t1 WHERE n20_4 = 9999999999999999.9999 OR d < 100;
+d d0 d1_1 d10_2 d60_10 n n0_0 n1 n20_4 n65_4
+0 0 0.0 0.00 0.0000000000 0 0 0 0.0000 0.0000
+0 0 0.0 0.00 0.0000000000 0 0 0 0.0000 0.0000
+0 0 0.0 0.00 0.0000000000 0 0 0 0.0000 0.0000
+9999999999 9999999999 0.9 99999999.99 99999999999999999999999999999999999999999999999999.9999999999 9999999999 9999999999 9 9999999999999999.9999 9999999999999999999999999999999999999999999999999999999999999.9999
+INSERT INTO t1 (d,d0,d1_1,d10_2,d60_10,n,n0_0,n1,n20_4,n65_4,pk) VALUES (
+9999999999999999999999999999999999999999999999999999999999999.9999,
+9999999999999999999999999999999999999999999999999999999999999.9999,
+9999999999999999999999999999999999999999999999999999999999999.9999,
+9999999999999999999999999999999999999999999999999999999999999.9999,
+9999999999999999999999999999999999999999999999999999999999999.9999,
+9999999999999999999999999999999999999999999999999999999999999.9999,
+9999999999999999999999999999999999999999999999999999999999999.9999,
+9999999999999999999999999999999999999999999999999999999999999.9999,
+9999999999999999999999999999999999999999999999999999999999999.9999,
+9999999999999999999999999999999999999999999999999999999999999.9999,
+6
+);
+Warnings:
+Warning 1264 Out of range value for column 'd' at row 1
+Warning 1264 Out of range value for column 'd0' at row 1
+Warning 1264 Out of range value for column 'd1_1' at row 1
+Warning 1264 Out of range value for column 'd10_2' at row 1
+Warning 1264 Out of range value for column 'd60_10' at row 1
+Warning 1264 Out of range value for column 'n' at row 1
+Warning 1264 Out of range value for column 'n0_0' at row 1
+Warning 1264 Out of range value for column 'n1' at row 1
+Warning 1264 Out of range value for column 'n20_4' at row 1
+SELECT d,d0,d1_1,d10_2,d60_10,n,n0_0,n1,n20_4,n65_4 FROM t1;
+d d0 d1_1 d10_2 d60_10 n n0_0 n1 n20_4 n65_4
+0 0 0.0 0.00 0.0000000000 0 0 0 0.0000 0.0000
+0 0 0.0 0.00 0.0000000000 0 0 0 0.0000 0.0000
+0 0 0.0 0.00 0.0000000000 0 0 0 0.0000 0.0000
+100 123456 0.3 40000.25 123456789123456789.1000100000 1024 7000 8 999999.9000 9223372036854775807.0000
+9999999999 9999999999 0.9 99999999.99 99999999999999999999999999999999999999999999999999.9999999999 9999999999 9999999999 9 9999999999999999.9999 9999999999999999999999999999999999999999999999999999999999999.9999
+9999999999 9999999999 0.9 99999999.99 99999999999999999999999999999999999999999999999999.9999999999 9999999999 9999999999 9 9999999999999999.9999 9999999999999999999999999999999999999999999999999999999999999.9999
+INSERT INTO t1 (d,d0,d1_1,d10_2,d60_10,n,n0_0,n1,n20_4,n65_4,pk) VALUES (10000000000.0,10000000000.0,1.1,100000000.99,100000000000000000000000000000000000000000000000000.0,10000000000.0,10000000000.0,10.0,10000000000000000.9999,10000000000000000000000000000000000000000000000000000000000000.9999,7);
+Warnings:
+Warning 1264 Out of range value for column 'd' at row 1
+Warning 1264 Out of range value for column 'd0' at row 1
+Warning 1264 Out of range value for column 'd1_1' at row 1
+Warning 1264 Out of range value for column 'd10_2' at row 1
+Warning 1264 Out of range value for column 'd60_10' at row 1
+Warning 1264 Out of range value for column 'n' at row 1
+Warning 1264 Out of range value for column 'n0_0' at row 1
+Warning 1264 Out of range value for column 'n1' at row 1
+Warning 1264 Out of range value for column 'n20_4' at row 1
+Warning 1264 Out of range value for column 'n65_4' at row 1
+SELECT d,d0,d1_1,d10_2,d60_10,n,n0_0,n1,n20_4,n65_4 FROM t1;
+d d0 d1_1 d10_2 d60_10 n n0_0 n1 n20_4 n65_4
+0 0 0.0 0.00 0.0000000000 0 0 0 0.0000 0.0000
+0 0 0.0 0.00 0.0000000000 0 0 0 0.0000 0.0000
+0 0 0.0 0.00 0.0000000000 0 0 0 0.0000 0.0000
+100 123456 0.3 40000.25 123456789123456789.1000100000 1024 7000 8 999999.9000 9223372036854775807.0000
+9999999999 9999999999 0.9 99999999.99 99999999999999999999999999999999999999999999999999.9999999999 9999999999 9999999999 9 9999999999999999.9999 9999999999999999999999999999999999999999999999999999999999999.9999
+9999999999 9999999999 0.9 99999999.99 99999999999999999999999999999999999999999999999999.9999999999 9999999999 9999999999 9 9999999999999999.9999 9999999999999999999999999999999999999999999999999999999999999.9999
+9999999999 9999999999 0.9 99999999.99 99999999999999999999999999999999999999999999999999.9999999999 9999999999 9999999999 9 9999999999999999.9999 9999999999999999999999999999999999999999999999999999999999999.9999
+INSERT INTO t1 (d,d0,d1_1,d10_2,d60_10,n,n0_0,n1,n20_4,n65_4,pk) VALUES (9999999999.1,9999999999.1,1.9,99999999.001,99999999999999999999999999999999999999999999999999.99999999991,9999999999.1,9999999999.1,9.1,9999999999999999.00001,9999999999999999999999999999999999999999999999999999999999999.11111,8);
+Warnings:
+Note 1265 Data truncated for column 'd' at row 1
+Note 1265 Data truncated for column 'd0' at row 1
+Warning 1264 Out of range value for column 'd1_1' at row 1
+Note 1265 Data truncated for column 'd10_2' at row 1
+Note 1265 Data truncated for column 'd60_10' at row 1
+Note 1265 Data truncated for column 'n' at row 1
+Note 1265 Data truncated for column 'n0_0' at row 1
+Note 1265 Data truncated for column 'n1' at row 1
+Note 1265 Data truncated for column 'n20_4' at row 1
+Note 1265 Data truncated for column 'n65_4' at row 1
+SELECT d,d0,d1_1,d10_2,d60_10,n,n0_0,n1,n20_4,n65_4 FROM t1;
+d d0 d1_1 d10_2 d60_10 n n0_0 n1 n20_4 n65_4
+0 0 0.0 0.00 0.0000000000 0 0 0 0.0000 0.0000
+0 0 0.0 0.00 0.0000000000 0 0 0 0.0000 0.0000
+0 0 0.0 0.00 0.0000000000 0 0 0 0.0000 0.0000
+100 123456 0.3 40000.25 123456789123456789.1000100000 1024 7000 8 999999.9000 9223372036854775807.0000
+9999999999 9999999999 0.9 99999999.00 99999999999999999999999999999999999999999999999999.9999999999 9999999999 9999999999 9 9999999999999999.0000 9999999999999999999999999999999999999999999999999999999999999.1111
+9999999999 9999999999 0.9 99999999.99 99999999999999999999999999999999999999999999999999.9999999999 9999999999 9999999999 9 9999999999999999.9999 9999999999999999999999999999999999999999999999999999999999999.9999
+9999999999 9999999999 0.9 99999999.99 99999999999999999999999999999999999999999999999999.9999999999 9999999999 9999999999 9 9999999999999999.9999 9999999999999999999999999999999999999999999999999999999999999.9999
+9999999999 9999999999 0.9 99999999.99 99999999999999999999999999999999999999999999999999.9999999999 9999999999 9999999999 9 9999999999999999.9999 9999999999999999999999999999999999999999999999999999999999999.9999
+ALTER TABLE t1 ADD COLUMN n66 NUMERIC(66) UNSIGNED;
+ERROR 42000: Too big precision 66 specified for 'n66'. Maximum is 65
+ALTER TABLE t1 ADD COLUMN n66_6 DECIMAL(66,6) UNSIGNED;
+ERROR 42000: Too big precision 66 specified for 'n66_6'. Maximum is 65
+ALTER TABLE t1 ADD COLUMN n66_66 DECIMAL(66,66) UNSIGNED;
+ERROR 42000: Too big scale 66 specified for 'n66_66'. Maximum is 38
+DROP TABLE t1;
+CREATE TABLE t1 (
+a DECIMAL UNSIGNED,
+b NUMERIC UNSIGNED,
+PRIMARY KEY (a)
+) ENGINE=rocksdb;
+SHOW COLUMNS IN t1;
+Field Type Null Key Default Extra
+a decimal(10,0) unsigned NO PRI NULL
+b decimal(10,0) unsigned YES NULL
+INSERT INTO t1 (a,b) VALUES (1.0,-1.0);
+Warnings:
+Warning 1264 Out of range value for column 'b' at row 1
+INSERT INTO t1 (a,b) VALUES (-100,100);
+Warnings:
+Warning 1264 Out of range value for column 'a' at row 1
+SELECT a,b FROM t1;
+a b
+0 100
+1 0
+DROP TABLE t1;
+########################
+# Floating point columns (FLOAT, DOUBLE)
+########################
+DROP TABLE IF EXISTS t1;
+CREATE TABLE t1 (
+f FLOAT UNSIGNED,
+f0 FLOAT(0) UNSIGNED,
+r1_1 REAL(1,1) UNSIGNED,
+f23_0 FLOAT(23) UNSIGNED,
+f20_3 FLOAT(20,3) UNSIGNED,
+d DOUBLE UNSIGNED,
+d1_0 DOUBLE(1,0) UNSIGNED,
+d10_10 DOUBLE PRECISION (10,10) UNSIGNED,
+d53 DOUBLE(53,0) UNSIGNED,
+d53_10 DOUBLE(53,10) UNSIGNED,
+pk DOUBLE UNSIGNED PRIMARY KEY
+) ENGINE=rocksdb;
+SHOW COLUMNS IN t1;
+Field Type Null Key Default Extra
+f float unsigned YES NULL
+f0 float unsigned YES NULL
+r1_1 double(1,1) unsigned YES NULL
+f23_0 float unsigned YES NULL
+f20_3 float(20,3) unsigned YES NULL
+d double unsigned YES NULL
+d1_0 double(1,0) unsigned YES NULL
+d10_10 double(10,10) unsigned YES NULL
+d53 double(53,0) unsigned YES NULL
+d53_10 double(53,10) unsigned YES NULL
+pk double unsigned NO PRI NULL
+INSERT INTO t1 (f,f0,r1_1,f23_0,f20_3,d,d1_0,d10_10,d53,d53_10,pk) VALUES (12345.12345,12345.12345,0.9,123456789.123,56789.987,11111111.111,8.0,0.0123456789,1234566789123456789,99999999999999999.99999999,1);
+SELECT f,f0,r1_1,f23_0,f20_3,d,d1_0,d10_10,d53,d53_10 FROM t1;
+f 12345.1
+d 11111111.111
+d10_10 0.0123456789
+d1_0 8
+d53 1234566789123456800
+d53_10 100000000000000000.0000000000
+f0 12345.1
+f20_3 56789.988
+f23_0 123457000
+r1_1 0.9
+INSERT INTO t1 (f,f0,r1_1,f23_0,f20_3,d,d1_0,d10_10,d53,d53_10,pk) VALUES (0,0,0,0,0,0,0,0,0,0,2);
+INSERT INTO t1 (f,f0,r1_1,f23_0,f20_3,d,d1_0,d10_10,d53,d53_10,pk) VALUES (
+99999999999999999999999999999999999999,
+99999999999999999999999999999999999999.9999999999999999,
+0.9,
+99999999999999999999999999999999999999.9,
+99999999999999999.999,
+999999999999999999999999999999999999999999999999999999999999999999999999999999999,
+9,
+0.9999999999,
+1999999999999999999999999999999999999999999999999999999,
+19999999999999999999999999999999999999999999.9999999999,
+3
+);
+Warnings:
+Warning 1264 Out of range value for column 'd53' at row 1
+Warning 1264 Out of range value for column 'd53_10' at row 1
+SELECT f,f0,r1_1,f23_0,f20_3,d,d1_0,d10_10,d53,d53_10 FROM t1;
+f 12345.1
+d 0
+d 11111111.111
+d 1e81
+d10_10 0.0000000000
+d10_10 0.0123456789
+d10_10 0.9999999999
+d1_0 0
+d1_0 8
+d1_0 9
+d53 0
+d53 100000000000000000000000000000000000000000000000000000
+d53 1234566789123456800
+d53_10 0.0000000000
+d53_10 100000000000000000.0000000000
+d53_10 10000000000000000000000000000000000000000000.0000000000
+f 0
+f 1e38
+f0 0
+f0 12345.1
+f0 1e38
+f20_3 0.000
+f20_3 56789.988
+f20_3 99999998430674940.000
+f23_0 0
+f23_0 123457000
+f23_0 1e38
+r1_1 0.0
+r1_1 0.9
+r1_1 0.9
+INSERT INTO t1 (f,f0,r1_1,f23_0,f20_3,d,d1_0,d10_10,d53,d53_10,pk) VALUES (-999999999999999999999999,-99999999999.999999999999,-0.9,-999.99999999999999999999,-99999999999999999.999,-999999999999999999999999999999999999999999999999999999999999-0.999,-9,-.9999999999,-999999999999999999999999999999.99999999999999999999999,-9999999999999999999999999999999999999999999.9999999999,4);
+Warnings:
+Warning 1264 Out of range value for column 'f' at row 1
+Warning 1264 Out of range value for column 'f0' at row 1
+Warning 1264 Out of range value for column 'r1_1' at row 1
+Warning 1264 Out of range value for column 'f23_0' at row 1
+Warning 1264 Out of range value for column 'f20_3' at row 1
+Warning 1264 Out of range value for column 'd' at row 1
+Warning 1264 Out of range value for column 'd1_0' at row 1
+Warning 1264 Out of range value for column 'd10_10' at row 1
+Warning 1264 Out of range value for column 'd53' at row 1
+Warning 1264 Out of range value for column 'd53_10' at row 1
+SELECT f,f0,r1_1,f23_0,f20_3,d,d1_0,d10_10,d53,d53_10 FROM t1;
+f 12345.1
+d 0
+d 0
+d 11111111.111
+d 1e81
+d10_10 0.0000000000
+d10_10 0.0000000000
+d10_10 0.0123456789
+d10_10 0.9999999999
+d1_0 0
+d1_0 0
+d1_0 8
+d1_0 9
+d53 0
+d53 0
+d53 100000000000000000000000000000000000000000000000000000
+d53 1234566789123456800
+d53_10 0.0000000000
+d53_10 0.0000000000
+d53_10 100000000000000000.0000000000
+d53_10 10000000000000000000000000000000000000000000.0000000000
+f 0
+f 0
+f 1e38
+f0 0
+f0 0
+f0 12345.1
+f0 1e38
+f20_3 0.000
+f20_3 0.000
+f20_3 56789.988
+f20_3 99999998430674940.000
+f23_0 0
+f23_0 0
+f23_0 123457000
+f23_0 1e38
+r1_1 0.0
+r1_1 0.0
+r1_1 0.9
+r1_1 0.9
+SELECT
+CONCAT('', MAX(f)),
+CONCAT('', MAX(f0)),
+CONCAT('', MAX(r1_1)),
+CONCAT('', MAX(f23_0)),
+CONCAT('', MAX(f20_3)),
+CONCAT('', MAX(d)),
+CONCAT('', MAX(d1_0)),
+CONCAT('', MAX(d10_10)),
+CONCAT('', MAX(d53)),
+CONCAT('', MAX(d53_10)) FROM t1;
+CONCAT('', MAX(f)) 9.999999680285692e37
+CONCAT('', MAX(d)) 1e81
+CONCAT('', MAX(d10_10)) 0.9999999999
+CONCAT('', MAX(d1_0)) 9
+CONCAT('', MAX(d53)) 100000000000000000000000000000000000000000000000000000
+CONCAT('', MAX(d53_10)) 10000000000000000000000000000000000000000000.0000000000
+CONCAT('', MAX(f0)) 9.999999680285692e37
+CONCAT('', MAX(f20_3)) 99999998430674940.000
+CONCAT('', MAX(f23_0)) 9.999999680285692e37
+CONCAT('', MAX(r1_1)) 0.9
+INSERT INTO t1 (f,f0,r1_1,f23_0,f20_3,d,d1_0,d10_10,d53,d53_10,pk) VALUES (
+9999999999999999999999999999999999999999999999999999999999999.9999,
+9999999999999999999999999999999999999999999999999999999999999.9999,
+9999999999999999999999999999999999999999999999999999999999999.9999,
+9999999999999999999999999999999999999999999999999999999999999.9999,
+9999999999999999999999999999999999999999999999999999999999999.9999,
+9999999999999999999999999999999999999999999999999999999999999.9999,
+9999999999999999999999999999999999999999999999999999999999999.9999,
+9999999999999999999999999999999999999999999999999999999999999.9999,
+9999999999999999999999999999999999999999999999999999999999999.9999,
+9999999999999999999999999999999999999999999999999999999999999.9999,
+5
+);
+Warnings:
+Warning 1264 Out of range value for column 'f' at row 1
+Warning 1264 Out of range value for column 'f0' at row 1
+Warning 1264 Out of range value for column 'r1_1' at row 1
+Warning 1264 Out of range value for column 'f23_0' at row 1
+Warning 1264 Out of range value for column 'f20_3' at row 1
+Warning 1264 Out of range value for column 'd1_0' at row 1
+Warning 1264 Out of range value for column 'd10_10' at row 1
+Warning 1264 Out of range value for column 'd53' at row 1
+Warning 1264 Out of range value for column 'd53_10' at row 1
+SELECT f,f0,r1_1,f23_0,f20_3,d,d1_0,d10_10,d53,d53_10 FROM t1;
+f 12345.1
+d 0
+d 0
+d 11111111.111
+d 1e61
+d 1e81
+d10_10 0.0000000000
+d10_10 0.0000000000
+d10_10 0.0123456789
+d10_10 0.9999999999
+d10_10 0.9999999999
+d1_0 0
+d1_0 0
+d1_0 8
+d1_0 9
+d1_0 9
+d53 0
+d53 0
+d53 100000000000000000000000000000000000000000000000000000
+d53 100000000000000000000000000000000000000000000000000000
+d53 1234566789123456800
+d53_10 0.0000000000
+d53_10 0.0000000000
+d53_10 100000000000000000.0000000000
+d53_10 10000000000000000000000000000000000000000000.0000000000
+d53_10 10000000000000000000000000000000000000000000.0000000000
+f 0
+f 0
+f 1e38
+f 3.40282e38
+f0 0
+f0 0
+f0 12345.1
+f0 1e38
+f0 3.40282e38
+f20_3 0.000
+f20_3 0.000
+f20_3 56789.988
+f20_3 99999998430674940.000
+f20_3 99999998430674940.000
+f23_0 0
+f23_0 0
+f23_0 123457000
+f23_0 1e38
+f23_0 3.40282e38
+r1_1 0.0
+r1_1 0.0
+r1_1 0.9
+r1_1 0.9
+r1_1 0.9
+INSERT INTO t1 (f,f0,r1_1,f23_0,f20_3,d,d1_0,d10_10,d53,d53_10,pk) VALUES (
+999999999999999999999999999999999999999,
+999999999999999999999999999999999999999.9999999999999999,
+1.9,
+999999999999999999999999999999999999999.9,
+999999999999999999.999,
+9999999999999999999999999999999999999999999999999999999999999999999999999999999999,
+99,
+1.9999999999,
+1999999999999999999999999999999999999999999999999999999,
+19999999999999999999999999999999999999999999.9999999999,
+6
+);
+Warnings:
+Warning 1916 Got overflow when converting '' to DECIMAL. Value truncated
+Warning 1264 Out of range value for column 'f' at row 1
+Warning 1264 Out of range value for column 'f0' at row 1
+Warning 1264 Out of range value for column 'r1_1' at row 1
+Warning 1264 Out of range value for column 'f23_0' at row 1
+Warning 1264 Out of range value for column 'f20_3' at row 1
+Warning 1264 Out of range value for column 'd1_0' at row 1
+Warning 1264 Out of range value for column 'd10_10' at row 1
+Warning 1264 Out of range value for column 'd53' at row 1
+Warning 1264 Out of range value for column 'd53_10' at row 1
+SELECT f,f0,r1_1,f23_0,f20_3,d,d1_0,d10_10,d53,d53_10 FROM t1;
+f 12345.1
+d 0
+d 0
+d 11111111.111
+d 1e61
+d 1e65
+d 1e81
+d10_10 0.0000000000
+d10_10 0.0000000000
+d10_10 0.0123456789
+d10_10 0.9999999999
+d10_10 0.9999999999
+d10_10 0.9999999999
+d1_0 0
+d1_0 0
+d1_0 8
+d1_0 9
+d1_0 9
+d1_0 9
+d53 0
+d53 0
+d53 100000000000000000000000000000000000000000000000000000
+d53 100000000000000000000000000000000000000000000000000000
+d53 100000000000000000000000000000000000000000000000000000
+d53 1234566789123456800
+d53_10 0.0000000000
+d53_10 0.0000000000
+d53_10 100000000000000000.0000000000
+d53_10 10000000000000000000000000000000000000000000.0000000000
+d53_10 10000000000000000000000000000000000000000000.0000000000
+d53_10 10000000000000000000000000000000000000000000.0000000000
+f 0
+f 0
+f 1e38
+f 3.40282e38
+f 3.40282e38
+f0 0
+f0 0
+f0 12345.1
+f0 1e38
+f0 3.40282e38
+f0 3.40282e38
+f20_3 0.000
+f20_3 0.000
+f20_3 56789.988
+f20_3 99999998430674940.000
+f20_3 99999998430674940.000
+f20_3 99999998430674940.000
+f23_0 0
+f23_0 0
+f23_0 123457000
+f23_0 1e38
+f23_0 3.40282e38
+f23_0 3.40282e38
+r1_1 0.0
+r1_1 0.0
+r1_1 0.9
+r1_1 0.9
+r1_1 0.9
+r1_1 0.9
+ALTER TABLE t1 ADD COLUMN d0_0 DOUBLE(0,0) UNSIGNED;
+ALTER TABLE t1 ADD COLUMN n66_6 DECIMAL(256,1) UNSIGNED;
+ERROR 42000: Too big precision 256 specified for 'n66_6'. Maximum is 65
+ALTER TABLE t1 ADD COLUMN n66_66 DECIMAL(40,35) UNSIGNED;
+DROP TABLE t1;
+CREATE TABLE t1 (
+a DOUBLE UNSIGNED,
+b FLOAT UNSIGNED,
+PRIMARY KEY (b)
+) ENGINE=rocksdb;
+SHOW COLUMNS IN t1;
+Field Type Null Key Default Extra
+a double unsigned YES NULL
+b float unsigned NO PRI NULL
+INSERT INTO t1 (a,b) VALUES (1.0,-1.0);
+Warnings:
+Warning 1264 Out of range value for column 'b' at row 1
+INSERT INTO t1 (a,b) VALUES (-100,100);
+Warnings:
+Warning 1264 Out of range value for column 'a' at row 1
+SELECT a,b FROM t1;
+a b
+0 100
+1 0
+DROP TABLE t1;
+########################
+# INT columns
+########################
+DROP TABLE IF EXISTS t1;
+CREATE TABLE t1 (
+i INT UNSIGNED,
+i0 INT(0) UNSIGNED,
+i1 INT(1) UNSIGNED,
+i20 INT(20) UNSIGNED,
+t TINYINT UNSIGNED,
+t0 TINYINT(0) UNSIGNED,
+t1 TINYINT(1) UNSIGNED,
+t20 TINYINT(20) UNSIGNED,
+s SMALLINT UNSIGNED,
+s0 SMALLINT(0) UNSIGNED,
+s1 SMALLINT(1) UNSIGNED,
+s20 SMALLINT(20) UNSIGNED,
+m MEDIUMINT UNSIGNED,
+m0 MEDIUMINT(0) UNSIGNED,
+m1 MEDIUMINT(1) UNSIGNED,
+m20 MEDIUMINT(20) UNSIGNED,
+b BIGINT UNSIGNED,
+b0 BIGINT(0) UNSIGNED,
+b1 BIGINT(1) UNSIGNED,
+b20 BIGINT(20) UNSIGNED,
+pk INT AUTO_INCREMENT PRIMARY KEY
+) ENGINE=rocksdb;
+SHOW COLUMNS IN t1;
+Field Type Null Key Default Extra
+i int(10) unsigned YES NULL
+i0 int(10) unsigned YES NULL
+i1 int(1) unsigned YES NULL
+i20 int(20) unsigned YES NULL
+t tinyint(3) unsigned YES NULL
+t0 tinyint(3) unsigned YES NULL
+t1 tinyint(1) unsigned YES NULL
+t20 tinyint(20) unsigned YES NULL
+s smallint(5) unsigned YES NULL
+s0 smallint(5) unsigned YES NULL
+s1 smallint(1) unsigned YES NULL
+s20 smallint(20) unsigned YES NULL
+m mediumint(8) unsigned YES NULL
+m0 mediumint(8) unsigned YES NULL
+m1 mediumint(1) unsigned YES NULL
+m20 mediumint(20) unsigned YES NULL
+b bigint(20) unsigned YES NULL
+b0 bigint(20) unsigned YES NULL
+b1 bigint(1) unsigned YES NULL
+b20 bigint(20) unsigned YES NULL
+pk int(11) NO PRI NULL auto_increment
+INSERT INTO t1 (i,i0,i1,i20,t,t0,t1,t20,s,s0,s1,s20,m,m0,m1,m20,b,b0,b1,b20) VALUES (1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20);
+INSERT INTO t1 (i,i0,i1,i20,t,t0,t1,t20,s,s0,s1,s20,m,m0,m1,m20,b,b0,b1,b20) VALUES (0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0);
+INSERT INTO t1 (i,i0,i1,i20,t,t0,t1,t20,s,s0,s1,s20,m,m0,m1,m20,b,b0,b1,b20) VALUES (2147483647,2147483647,2147483647,2147483647,127,127,127,127,32767,32767,32767,32767,8388607,8388607,8388607,8388607,9223372036854775807,9223372036854775807,9223372036854775807,9223372036854775807);
+SELECT i,i0,i1,i20,t,t0,t1,t20,s,s0,s1,s20,m,m0,m1,m20,b,b0,b1,b20 FROM t1;
+i i0 i1 i20 t t0 t1 t20 s s0 s1 s20 m m0 m1 m20 b b0 b1 b20
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
+2147483647 2147483647 2147483647 2147483647 127 127 127 127 32767 32767 32767 32767 8388607 8388607 8388607 8388607 9223372036854775807 9223372036854775807 9223372036854775807 9223372036854775807
+INSERT INTO t1 (i,i0,i1,i20,t,t0,t1,t20,s,s0,s1,s20,m,m0,m1,m20,b,b0,b1,b20) VALUES (-2147483648,-2147483648,-2147483648,-2147483648,-128,-128,-128,-128,-32768,-32768,-32768,-32768,-8388608,-8388608,-8388608,-8388608,-9223372036854775808,-9223372036854775808,-9223372036854775808,-9223372036854775808);
+Warnings:
+Warning 1264 Out of range value for column 'i' at row 1
+Warning 1264 Out of range value for column 'i0' at row 1
+Warning 1264 Out of range value for column 'i1' at row 1
+Warning 1264 Out of range value for column 'i20' at row 1
+Warning 1264 Out of range value for column 't' at row 1
+Warning 1264 Out of range value for column 't0' at row 1
+Warning 1264 Out of range value for column 't1' at row 1
+Warning 1264 Out of range value for column 't20' at row 1
+Warning 1264 Out of range value for column 's' at row 1
+Warning 1264 Out of range value for column 's0' at row 1
+Warning 1264 Out of range value for column 's1' at row 1
+Warning 1264 Out of range value for column 's20' at row 1
+Warning 1264 Out of range value for column 'm' at row 1
+Warning 1264 Out of range value for column 'm0' at row 1
+Warning 1264 Out of range value for column 'm1' at row 1
+Warning 1264 Out of range value for column 'm20' at row 1
+Warning 1264 Out of range value for column 'b' at row 1
+Warning 1264 Out of range value for column 'b0' at row 1
+Warning 1264 Out of range value for column 'b1' at row 1
+Warning 1264 Out of range value for column 'b20' at row 1
+INSERT INTO t1 (i,i0,i1,i20,t,t0,t1,t20,s,s0,s1,s20,m,m0,m1,m20,b,b0,b1,b20) VALUES (4294967295,4294967295,4294967295,4294967295,255,255,255,255,65535,65535,65535,65535,16777215,16777215,16777215,16777215,18446744073709551615,18446744073709551615,18446744073709551615,18446744073709551615);
+SELECT i,i0,i1,i20,t,t0,t1,t20,s,s0,s1,s20,m,m0,m1,m20,b,b0,b1,b20 FROM t1;
+i i0 i1 i20 t t0 t1 t20 s s0 s1 s20 m m0 m1 m20 b b0 b1 b20
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
+2147483647 2147483647 2147483647 2147483647 127 127 127 127 32767 32767 32767 32767 8388607 8388607 8388607 8388607 9223372036854775807 9223372036854775807 9223372036854775807 9223372036854775807
+4294967295 4294967295 4294967295 4294967295 255 255 255 255 65535 65535 65535 65535 16777215 16777215 16777215 16777215 18446744073709551615 18446744073709551615 18446744073709551615 18446744073709551615
+INSERT INTO t1 (i,i0,i1,i20,t,t0,t1,t20,s,s0,s1,s20,m,m0,m1,m20,b,b0,b1,b20) VALUES (-2147483649,-2147483649,-2147483649,-2147483649,-129,-129,-129,-129,-32769,-32769,-32769,-32769,-8388609,-8388609,-8388609,-8388609,-9223372036854775809,-9223372036854775809,-9223372036854775809,-9223372036854775809);
+Warnings:
+Warning 1264 Out of range value for column 'i' at row 1
+Warning 1264 Out of range value for column 'i0' at row 1
+Warning 1264 Out of range value for column 'i1' at row 1
+Warning 1264 Out of range value for column 'i20' at row 1
+Warning 1264 Out of range value for column 't' at row 1
+Warning 1264 Out of range value for column 't0' at row 1
+Warning 1264 Out of range value for column 't1' at row 1
+Warning 1264 Out of range value for column 't20' at row 1
+Warning 1264 Out of range value for column 's' at row 1
+Warning 1264 Out of range value for column 's0' at row 1
+Warning 1264 Out of range value for column 's1' at row 1
+Warning 1264 Out of range value for column 's20' at row 1
+Warning 1264 Out of range value for column 'm' at row 1
+Warning 1264 Out of range value for column 'm0' at row 1
+Warning 1264 Out of range value for column 'm1' at row 1
+Warning 1264 Out of range value for column 'm20' at row 1
+Warning 1264 Out of range value for column 'b' at row 1
+Warning 1264 Out of range value for column 'b0' at row 1
+Warning 1264 Out of range value for column 'b1' at row 1
+Warning 1264 Out of range value for column 'b20' at row 1
+INSERT INTO t1 (i,i0,i1,i20,t,t0,t1,t20,s,s0,s1,s20,m,m0,m1,m20,b,b0,b1,b20) VALUES (4294967296,4294967296,4294967296,4294967296,256,256,256,256,65536,65536,65536,65536,16777216,16777216,16777216,16777216,18446744073709551616,18446744073709551616,18446744073709551616,18446744073709551616);
+Warnings:
+Warning 1264 Out of range value for column 'i' at row 1
+Warning 1264 Out of range value for column 'i0' at row 1
+Warning 1264 Out of range value for column 'i1' at row 1
+Warning 1264 Out of range value for column 'i20' at row 1
+Warning 1264 Out of range value for column 't' at row 1
+Warning 1264 Out of range value for column 't0' at row 1
+Warning 1264 Out of range value for column 't1' at row 1
+Warning 1264 Out of range value for column 't20' at row 1
+Warning 1264 Out of range value for column 's' at row 1
+Warning 1264 Out of range value for column 's0' at row 1
+Warning 1264 Out of range value for column 's1' at row 1
+Warning 1264 Out of range value for column 's20' at row 1
+Warning 1264 Out of range value for column 'm' at row 1
+Warning 1264 Out of range value for column 'm0' at row 1
+Warning 1264 Out of range value for column 'm1' at row 1
+Warning 1264 Out of range value for column 'm20' at row 1
+Warning 1264 Out of range value for column 'b' at row 1
+Warning 1264 Out of range value for column 'b0' at row 1
+Warning 1264 Out of range value for column 'b1' at row 1
+Warning 1264 Out of range value for column 'b20' at row 1
+INSERT INTO t1 (i,i0,i1,i20,t,t0,t1,t20,s,s0,s1,s20,m,m0,m1,m20,b,b0,b1,b20) SELECT b,b,b,b,b,b,b,b,b,b,b,b,b,b,b,b,b,b,b,b FROM t1 WHERE b IN (-9223372036854775808,9223372036854775807,18446744073709551615);
+Warnings:
+Warning 1264 Out of range value for column 'i' at row 8
+Warning 1264 Out of range value for column 'i0' at row 8
+Warning 1264 Out of range value for column 'i1' at row 8
+Warning 1264 Out of range value for column 'i20' at row 8
+Warning 1264 Out of range value for column 't' at row 8
+Warning 1264 Out of range value for column 't0' at row 8
+Warning 1264 Out of range value for column 't1' at row 8
+Warning 1264 Out of range value for column 't20' at row 8
+Warning 1264 Out of range value for column 's' at row 8
+Warning 1264 Out of range value for column 's0' at row 8
+Warning 1264 Out of range value for column 's1' at row 8
+Warning 1264 Out of range value for column 's20' at row 8
+Warning 1264 Out of range value for column 'm' at row 8
+Warning 1264 Out of range value for column 'm0' at row 8
+Warning 1264 Out of range value for column 'm1' at row 8
+Warning 1264 Out of range value for column 'm20' at row 8
+Warning 1264 Out of range value for column 'i' at row 9
+Warning 1264 Out of range value for column 'i0' at row 9
+Warning 1264 Out of range value for column 'i1' at row 9
+Warning 1264 Out of range value for column 'i20' at row 9
+Warning 1264 Out of range value for column 't' at row 9
+Warning 1264 Out of range value for column 't0' at row 9
+Warning 1264 Out of range value for column 't1' at row 9
+Warning 1264 Out of range value for column 't20' at row 9
+Warning 1264 Out of range value for column 's' at row 9
+Warning 1264 Out of range value for column 's0' at row 9
+Warning 1264 Out of range value for column 's1' at row 9
+Warning 1264 Out of range value for column 's20' at row 9
+Warning 1264 Out of range value for column 'm' at row 9
+Warning 1264 Out of range value for column 'm0' at row 9
+Warning 1264 Out of range value for column 'm1' at row 9
+Warning 1264 Out of range value for column 'm20' at row 9
+Warning 1264 Out of range value for column 'i' at row 10
+Warning 1264 Out of range value for column 'i0' at row 10
+Warning 1264 Out of range value for column 'i1' at row 10
+Warning 1264 Out of range value for column 'i20' at row 10
+Warning 1264 Out of range value for column 't' at row 10
+Warning 1264 Out of range value for column 't0' at row 10
+Warning 1264 Out of range value for column 't1' at row 10
+Warning 1264 Out of range value for column 't20' at row 10
+Warning 1264 Out of range value for column 's' at row 10
+Warning 1264 Out of range value for column 's0' at row 10
+Warning 1264 Out of range value for column 's1' at row 10
+Warning 1264 Out of range value for column 's20' at row 10
+Warning 1264 Out of range value for column 'm' at row 10
+Warning 1264 Out of range value for column 'm0' at row 10
+Warning 1264 Out of range value for column 'm1' at row 10
+Warning 1264 Out of range value for column 'm20' at row 10
+SELECT i,i0,i1,i20,t,t0,t1,t20,s,s0,s1,s20,m,m0,m1,m20,b,b0,b1,b20 FROM t1;
+i i0 i1 i20 t t0 t1 t20 s s0 s1 s20 m m0 m1 m20 b b0 b1 b20
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
+2147483647 2147483647 2147483647 2147483647 127 127 127 127 32767 32767 32767 32767 8388607 8388607 8388607 8388607 9223372036854775807 9223372036854775807 9223372036854775807 9223372036854775807
+4294967295 4294967295 4294967295 4294967295 255 255 255 255 65535 65535 65535 65535 16777215 16777215 16777215 16777215 18446744073709551615 18446744073709551615 18446744073709551615 18446744073709551615
+4294967295 4294967295 4294967295 4294967295 255 255 255 255 65535 65535 65535 65535 16777215 16777215 16777215 16777215 18446744073709551615 18446744073709551615 18446744073709551615 18446744073709551615
+4294967295 4294967295 4294967295 4294967295 255 255 255 255 65535 65535 65535 65535 16777215 16777215 16777215 16777215 18446744073709551615 18446744073709551615 18446744073709551615 18446744073709551615
+4294967295 4294967295 4294967295 4294967295 255 255 255 255 65535 65535 65535 65535 16777215 16777215 16777215 16777215 18446744073709551615 18446744073709551615 18446744073709551615 18446744073709551615
+4294967295 4294967295 4294967295 4294967295 255 255 255 255 65535 65535 65535 65535 16777215 16777215 16777215 16777215 9223372036854775807 9223372036854775807 9223372036854775807 9223372036854775807
+ALTER TABLE t1 ADD COLUMN i257 INT(257) UNSIGNED;
+ERROR 42000: Display width out of range for 'i257' (max = 255)
+DROP TABLE t1;
+CREATE TABLE t1 (
+t TINYINT UNSIGNED,
+s SMALLINT UNSIGNED,
+m MEDIUMINT UNSIGNED,
+i INT UNSIGNED,
+b BIGINT UNSIGNED,
+PRIMARY KEY (b)
+) ENGINE=rocksdb;
+SHOW COLUMNS IN t1;
+Field Type Null Key Default Extra
+t tinyint(3) unsigned YES NULL
+s smallint(5) unsigned YES NULL
+m mediumint(8) unsigned YES NULL
+i int(10) unsigned YES NULL
+b bigint(20) unsigned NO PRI NULL
+INSERT INTO t1 (t,s,m,i,b) VALUES (255,65535,16777215,4294967295,18446744073709551615);
+INSERT INTO t1 (t,s,m,i,b) VALUES (-1,-1,-1,-1,-1);
+Warnings:
+Warning 1264 Out of range value for column 't' at row 1
+Warning 1264 Out of range value for column 's' at row 1
+Warning 1264 Out of range value for column 'm' at row 1
+Warning 1264 Out of range value for column 'i' at row 1
+Warning 1264 Out of range value for column 'b' at row 1
+SELECT t,s,m,i,b FROM t1;
+t s m i b
+0 0 0 0 0
+255 65535 16777215 4294967295 18446744073709551615
+DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/col_opt_zerofill.result b/storage/rocksdb/mysql-test/rocksdb/r/col_opt_zerofill.result
new file mode 100644
index 00000000000..f8fdfed86e7
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/col_opt_zerofill.result
@@ -0,0 +1,731 @@
+########################
+# Fixed point columns (NUMERIC, DECIMAL)
+########################
+DROP TABLE IF EXISTS t1;
+CREATE TABLE t1 (
+d DECIMAL ZEROFILL,
+d0 DECIMAL(0) ZEROFILL,
+d1_1 DECIMAL(1,1) ZEROFILL,
+d10_2 DECIMAL(10,2) ZEROFILL,
+d60_10 DECIMAL(60,10) ZEROFILL,
+n NUMERIC ZEROFILL,
+n0_0 NUMERIC(0,0) ZEROFILL,
+n1 NUMERIC(1) ZEROFILL,
+n20_4 NUMERIC(20,4) ZEROFILL,
+n65_4 NUMERIC(65,4) ZEROFILL,
+pk NUMERIC ZEROFILL PRIMARY KEY
+) ENGINE=rocksdb;
+SHOW COLUMNS IN t1;
+Field Type Null Key Default Extra
+d decimal(10,0) unsigned zerofill YES NULL
+d0 decimal(10,0) unsigned zerofill YES NULL
+d1_1 decimal(1,1) unsigned zerofill YES NULL
+d10_2 decimal(10,2) unsigned zerofill YES NULL
+d60_10 decimal(60,10) unsigned zerofill YES NULL
+n decimal(10,0) unsigned zerofill YES NULL
+n0_0 decimal(10,0) unsigned zerofill YES NULL
+n1 decimal(1,0) unsigned zerofill YES NULL
+n20_4 decimal(20,4) unsigned zerofill YES NULL
+n65_4 decimal(65,4) unsigned zerofill YES NULL
+pk decimal(10,0) unsigned zerofill NO PRI NULL
+INSERT INTO t1 (d,d0,d1_1,d10_2,d60_10,n,n0_0,n1,n20_4,n65_4,pk) VALUES (100,123456,0.3,40000.25,123456789123456789.10001,1024,7000.0,8.0,999999.9,9223372036854775807,1);
+INSERT INTO t1 (d,d0,d1_1,d10_2,d60_10,n,n0_0,n1,n20_4,n65_4,pk) VALUES (0,0,0,0,0,0,0,0,0,0,2);
+INSERT INTO t1 (d,d0,d1_1,d10_2,d60_10,n,n0_0,n1,n20_4,n65_4,pk) VALUES (9999999999.0,9999999999.0,0.9,99999999.99,99999999999999999999999999999999999999999999999999.9999999999,9999999999.0,9999999999.0,9.0,9999999999999999.9999,9999999999999999999999999999999999999999999999999999999999999.9999,3);
+SELECT d,d0,d1_1,d10_2,d60_10,n,n0_0,n1,n20_4,n65_4 FROM t1;
+d d0 d1_1 d10_2 d60_10 n n0_0 n1 n20_4 n65_4
+0000000000 0000000000 0.0 00000000.00 00000000000000000000000000000000000000000000000000.0000000000 0000000000 0000000000 0 0000000000000000.0000 0000000000000000000000000000000000000000000000000000000000000.0000
+0000000100 0000123456 0.3 00040000.25 00000000000000000000000000000000123456789123456789.1000100000 0000001024 0000007000 8 0000000000999999.9000 0000000000000000000000000000000000000000009223372036854775807.0000
+9999999999 9999999999 0.9 99999999.99 99999999999999999999999999999999999999999999999999.9999999999 9999999999 9999999999 9 9999999999999999.9999 9999999999999999999999999999999999999999999999999999999999999.9999
+INSERT INTO t1 (d,d0,d1_1,d10_2,d60_10,n,n0_0,n1,n20_4,n65_4,pk) VALUES (-100,-123456,-0.3,-40000.25,-123456789123456789.10001,-1024,-7000.0,-8.0,-999999.9,-9223372036854775807,4);
+Warnings:
+Warning 1264 Out of range value for column 'd' at row 1
+Warning 1264 Out of range value for column 'd0' at row 1
+Warning 1264 Out of range value for column 'd1_1' at row 1
+Warning 1264 Out of range value for column 'd10_2' at row 1
+Warning 1264 Out of range value for column 'd60_10' at row 1
+Warning 1264 Out of range value for column 'n' at row 1
+Warning 1264 Out of range value for column 'n0_0' at row 1
+Warning 1264 Out of range value for column 'n1' at row 1
+Warning 1264 Out of range value for column 'n20_4' at row 1
+Warning 1264 Out of range value for column 'n65_4' at row 1
+INSERT INTO t1 (d,d0,d1_1,d10_2,d60_10,n,n0_0,n1,n20_4,n65_4,pk) VALUES (-9999999999.0,-9999999999.0,-0.9,-99999999.99,-99999999999999999999999999999999999999999999999999.9999999999,-9999999999.0,-9999999999.0,-9.0,-9999999999999999.9999,-9999999999999999999999999999999999999999999999999999999999999.9999,5);
+Warnings:
+Warning 1264 Out of range value for column 'd' at row 1
+Warning 1264 Out of range value for column 'd0' at row 1
+Warning 1264 Out of range value for column 'd1_1' at row 1
+Warning 1264 Out of range value for column 'd10_2' at row 1
+Warning 1264 Out of range value for column 'd60_10' at row 1
+Warning 1264 Out of range value for column 'n' at row 1
+Warning 1264 Out of range value for column 'n0_0' at row 1
+Warning 1264 Out of range value for column 'n1' at row 1
+Warning 1264 Out of range value for column 'n20_4' at row 1
+Warning 1264 Out of range value for column 'n65_4' at row 1
+SELECT d,d0,d1_1,d10_2,d60_10,n,n0_0,n1,n20_4,n65_4 FROM t1;
+d d0 d1_1 d10_2 d60_10 n n0_0 n1 n20_4 n65_4
+0000000000 0000000000 0.0 00000000.00 00000000000000000000000000000000000000000000000000.0000000000 0000000000 0000000000 0 0000000000000000.0000 0000000000000000000000000000000000000000000000000000000000000.0000
+0000000000 0000000000 0.0 00000000.00 00000000000000000000000000000000000000000000000000.0000000000 0000000000 0000000000 0 0000000000000000.0000 0000000000000000000000000000000000000000000000000000000000000.0000
+0000000000 0000000000 0.0 00000000.00 00000000000000000000000000000000000000000000000000.0000000000 0000000000 0000000000 0 0000000000000000.0000 0000000000000000000000000000000000000000000000000000000000000.0000
+0000000100 0000123456 0.3 00040000.25 00000000000000000000000000000000123456789123456789.1000100000 0000001024 0000007000 8 0000000000999999.9000 0000000000000000000000000000000000000000009223372036854775807.0000
+9999999999 9999999999 0.9 99999999.99 99999999999999999999999999999999999999999999999999.9999999999 9999999999 9999999999 9 9999999999999999.9999 9999999999999999999999999999999999999999999999999999999999999.9999
+SELECT d,d0,d1_1,d10_2,d60_10,n,n0_0,n1,n20_4,n65_4 FROM t1 WHERE n20_4 = 9999999999999999.9999 OR d < 100;
+d d0 d1_1 d10_2 d60_10 n n0_0 n1 n20_4 n65_4
+0000000000 0000000000 0.0 00000000.00 00000000000000000000000000000000000000000000000000.0000000000 0000000000 0000000000 0 0000000000000000.0000 0000000000000000000000000000000000000000000000000000000000000.0000
+0000000000 0000000000 0.0 00000000.00 00000000000000000000000000000000000000000000000000.0000000000 0000000000 0000000000 0 0000000000000000.0000 0000000000000000000000000000000000000000000000000000000000000.0000
+0000000000 0000000000 0.0 00000000.00 00000000000000000000000000000000000000000000000000.0000000000 0000000000 0000000000 0 0000000000000000.0000 0000000000000000000000000000000000000000000000000000000000000.0000
+9999999999 9999999999 0.9 99999999.99 99999999999999999999999999999999999999999999999999.9999999999 9999999999 9999999999 9 9999999999999999.9999 9999999999999999999999999999999999999999999999999999999999999.9999
+INSERT INTO t1 (d,d0,d1_1,d10_2,d60_10,n,n0_0,n1,n20_4,n65_4,pk) VALUES (
+9999999999999999999999999999999999999999999999999999999999999.9999,
+9999999999999999999999999999999999999999999999999999999999999.9999,
+9999999999999999999999999999999999999999999999999999999999999.9999,
+9999999999999999999999999999999999999999999999999999999999999.9999,
+9999999999999999999999999999999999999999999999999999999999999.9999,
+9999999999999999999999999999999999999999999999999999999999999.9999,
+9999999999999999999999999999999999999999999999999999999999999.9999,
+9999999999999999999999999999999999999999999999999999999999999.9999,
+9999999999999999999999999999999999999999999999999999999999999.9999,
+9999999999999999999999999999999999999999999999999999999999999.9999,
+6
+);
+Warnings:
+Warning 1264 Out of range value for column 'd' at row 1
+Warning 1264 Out of range value for column 'd0' at row 1
+Warning 1264 Out of range value for column 'd1_1' at row 1
+Warning 1264 Out of range value for column 'd10_2' at row 1
+Warning 1264 Out of range value for column 'd60_10' at row 1
+Warning 1264 Out of range value for column 'n' at row 1
+Warning 1264 Out of range value for column 'n0_0' at row 1
+Warning 1264 Out of range value for column 'n1' at row 1
+Warning 1264 Out of range value for column 'n20_4' at row 1
+SELECT d,d0,d1_1,d10_2,d60_10,n,n0_0,n1,n20_4,n65_4 FROM t1;
+d d0 d1_1 d10_2 d60_10 n n0_0 n1 n20_4 n65_4
+0000000000 0000000000 0.0 00000000.00 00000000000000000000000000000000000000000000000000.0000000000 0000000000 0000000000 0 0000000000000000.0000 0000000000000000000000000000000000000000000000000000000000000.0000
+0000000000 0000000000 0.0 00000000.00 00000000000000000000000000000000000000000000000000.0000000000 0000000000 0000000000 0 0000000000000000.0000 0000000000000000000000000000000000000000000000000000000000000.0000
+0000000000 0000000000 0.0 00000000.00 00000000000000000000000000000000000000000000000000.0000000000 0000000000 0000000000 0 0000000000000000.0000 0000000000000000000000000000000000000000000000000000000000000.0000
+0000000100 0000123456 0.3 00040000.25 00000000000000000000000000000000123456789123456789.1000100000 0000001024 0000007000 8 0000000000999999.9000 0000000000000000000000000000000000000000009223372036854775807.0000
+9999999999 9999999999 0.9 99999999.99 99999999999999999999999999999999999999999999999999.9999999999 9999999999 9999999999 9 9999999999999999.9999 9999999999999999999999999999999999999999999999999999999999999.9999
+9999999999 9999999999 0.9 99999999.99 99999999999999999999999999999999999999999999999999.9999999999 9999999999 9999999999 9 9999999999999999.9999 9999999999999999999999999999999999999999999999999999999999999.9999
+INSERT INTO t1 (d,d0,d1_1,d10_2,d60_10,n,n0_0,n1,n20_4,n65_4,pk) VALUES (10000000000.0,10000000000.0,1.1,100000000.99,100000000000000000000000000000000000000000000000000.0,10000000000.0,10000000000.0,10.0,10000000000000000.9999,10000000000000000000000000000000000000000000000000000000000000.9999,7);
+Warnings:
+Warning 1264 Out of range value for column 'd' at row 1
+Warning 1264 Out of range value for column 'd0' at row 1
+Warning 1264 Out of range value for column 'd1_1' at row 1
+Warning 1264 Out of range value for column 'd10_2' at row 1
+Warning 1264 Out of range value for column 'd60_10' at row 1
+Warning 1264 Out of range value for column 'n' at row 1
+Warning 1264 Out of range value for column 'n0_0' at row 1
+Warning 1264 Out of range value for column 'n1' at row 1
+Warning 1264 Out of range value for column 'n20_4' at row 1
+Warning 1264 Out of range value for column 'n65_4' at row 1
+SELECT d,d0,d1_1,d10_2,d60_10,n,n0_0,n1,n20_4,n65_4 FROM t1;
+d d0 d1_1 d10_2 d60_10 n n0_0 n1 n20_4 n65_4
+0000000000 0000000000 0.0 00000000.00 00000000000000000000000000000000000000000000000000.0000000000 0000000000 0000000000 0 0000000000000000.0000 0000000000000000000000000000000000000000000000000000000000000.0000
+0000000000 0000000000 0.0 00000000.00 00000000000000000000000000000000000000000000000000.0000000000 0000000000 0000000000 0 0000000000000000.0000 0000000000000000000000000000000000000000000000000000000000000.0000
+0000000000 0000000000 0.0 00000000.00 00000000000000000000000000000000000000000000000000.0000000000 0000000000 0000000000 0 0000000000000000.0000 0000000000000000000000000000000000000000000000000000000000000.0000
+0000000100 0000123456 0.3 00040000.25 00000000000000000000000000000000123456789123456789.1000100000 0000001024 0000007000 8 0000000000999999.9000 0000000000000000000000000000000000000000009223372036854775807.0000
+9999999999 9999999999 0.9 99999999.99 99999999999999999999999999999999999999999999999999.9999999999 9999999999 9999999999 9 9999999999999999.9999 9999999999999999999999999999999999999999999999999999999999999.9999
+9999999999 9999999999 0.9 99999999.99 99999999999999999999999999999999999999999999999999.9999999999 9999999999 9999999999 9 9999999999999999.9999 9999999999999999999999999999999999999999999999999999999999999.9999
+9999999999 9999999999 0.9 99999999.99 99999999999999999999999999999999999999999999999999.9999999999 9999999999 9999999999 9 9999999999999999.9999 9999999999999999999999999999999999999999999999999999999999999.9999
+INSERT INTO t1 (d,d0,d1_1,d10_2,d60_10,n,n0_0,n1,n20_4,n65_4,pk) VALUES (9999999999.1,9999999999.1,1.9,99999999.001,99999999999999999999999999999999999999999999999999.99999999991,9999999999.1,9999999999.1,9.1,9999999999999999.00001,9999999999999999999999999999999999999999999999999999999999999.11111,8);
+Warnings:
+Note 1265 Data truncated for column 'd' at row 1
+Note 1265 Data truncated for column 'd0' at row 1
+Warning 1264 Out of range value for column 'd1_1' at row 1
+Note 1265 Data truncated for column 'd10_2' at row 1
+Note 1265 Data truncated for column 'd60_10' at row 1
+Note 1265 Data truncated for column 'n' at row 1
+Note 1265 Data truncated for column 'n0_0' at row 1
+Note 1265 Data truncated for column 'n1' at row 1
+Note 1265 Data truncated for column 'n20_4' at row 1
+Note 1265 Data truncated for column 'n65_4' at row 1
+SELECT d,d0,d1_1,d10_2,d60_10,n,n0_0,n1,n20_4,n65_4 FROM t1;
+d d0 d1_1 d10_2 d60_10 n n0_0 n1 n20_4 n65_4
+0000000000 0000000000 0.0 00000000.00 00000000000000000000000000000000000000000000000000.0000000000 0000000000 0000000000 0 0000000000000000.0000 0000000000000000000000000000000000000000000000000000000000000.0000
+0000000000 0000000000 0.0 00000000.00 00000000000000000000000000000000000000000000000000.0000000000 0000000000 0000000000 0 0000000000000000.0000 0000000000000000000000000000000000000000000000000000000000000.0000
+0000000000 0000000000 0.0 00000000.00 00000000000000000000000000000000000000000000000000.0000000000 0000000000 0000000000 0 0000000000000000.0000 0000000000000000000000000000000000000000000000000000000000000.0000
+0000000100 0000123456 0.3 00040000.25 00000000000000000000000000000000123456789123456789.1000100000 0000001024 0000007000 8 0000000000999999.9000 0000000000000000000000000000000000000000009223372036854775807.0000
+9999999999 9999999999 0.9 99999999.00 99999999999999999999999999999999999999999999999999.9999999999 9999999999 9999999999 9 9999999999999999.0000 9999999999999999999999999999999999999999999999999999999999999.1111
+9999999999 9999999999 0.9 99999999.99 99999999999999999999999999999999999999999999999999.9999999999 9999999999 9999999999 9 9999999999999999.9999 9999999999999999999999999999999999999999999999999999999999999.9999
+9999999999 9999999999 0.9 99999999.99 99999999999999999999999999999999999999999999999999.9999999999 9999999999 9999999999 9 9999999999999999.9999 9999999999999999999999999999999999999999999999999999999999999.9999
+9999999999 9999999999 0.9 99999999.99 99999999999999999999999999999999999999999999999999.9999999999 9999999999 9999999999 9 9999999999999999.9999 9999999999999999999999999999999999999999999999999999999999999.9999
+ALTER TABLE t1 ADD COLUMN n66 NUMERIC(66) ZEROFILL;
+ERROR 42000: Too big precision 66 specified for 'n66'. Maximum is 65
+ALTER TABLE t1 ADD COLUMN n66_6 DECIMAL(66,6) ZEROFILL;
+ERROR 42000: Too big precision 66 specified for 'n66_6'. Maximum is 65
+ALTER TABLE t1 ADD COLUMN n66_66 DECIMAL(66,66) ZEROFILL;
+ERROR 42000: Too big scale 66 specified for 'n66_66'. Maximum is 38
+DROP TABLE t1;
+CREATE TABLE t1 (
+a DECIMAL ZEROFILL,
+b NUMERIC ZEROFILL,
+PRIMARY KEY (a)
+) ENGINE=rocksdb;
+SHOW COLUMNS IN t1;
+Field Type Null Key Default Extra
+a decimal(10,0) unsigned zerofill NO PRI NULL
+b decimal(10,0) unsigned zerofill YES NULL
+INSERT INTO t1 (a,b) VALUES (1.1,1234);
+Warnings:
+Note 1265 Data truncated for column 'a' at row 1
+SELECT a,b FROM t1;
+a b
+0000000001 0000001234
+DROP TABLE t1;
+########################
+# Floating point columns (FLOAT, DOUBLE)
+########################
+DROP TABLE IF EXISTS t1;
+CREATE TABLE t1 (
+f FLOAT ZEROFILL,
+f0 FLOAT(0) ZEROFILL,
+r1_1 REAL(1,1) ZEROFILL,
+f23_0 FLOAT(23) ZEROFILL,
+f20_3 FLOAT(20,3) ZEROFILL,
+d DOUBLE ZEROFILL,
+d1_0 DOUBLE(1,0) ZEROFILL,
+d10_10 DOUBLE PRECISION (10,10) ZEROFILL,
+d53 DOUBLE(53,0) ZEROFILL,
+d53_10 DOUBLE(53,10) ZEROFILL,
+pk DOUBLE ZEROFILL PRIMARY KEY
+) ENGINE=rocksdb;
+SHOW COLUMNS IN t1;
+Field Type Null Key Default Extra
+f float unsigned zerofill YES NULL
+f0 float unsigned zerofill YES NULL
+r1_1 double(1,1) unsigned zerofill YES NULL
+f23_0 float unsigned zerofill YES NULL
+f20_3 float(20,3) unsigned zerofill YES NULL
+d double unsigned zerofill YES NULL
+d1_0 double(1,0) unsigned zerofill YES NULL
+d10_10 double(10,10) unsigned zerofill YES NULL
+d53 double(53,0) unsigned zerofill YES NULL
+d53_10 double(53,10) unsigned zerofill YES NULL
+pk double unsigned zerofill NO PRI NULL
+INSERT INTO t1 (f,f0,r1_1,f23_0,f20_3,d,d1_0,d10_10,d53,d53_10,pk) VALUES (12345.12345,12345.12345,0.9,123456789.123,56789.987,11111111.111,8.0,0.0123456789,1234566789123456789,99999999999999999.99999999,1);
+SELECT f,f0,r1_1,f23_0,f20_3,d,d1_0,d10_10,d53,d53_10 FROM t1;
+f 0000012345.1
+d 000000000011111111.111
+d10_10 0.0123456789
+d1_0 8
+d53 00000000000000000000000000000000001234566789123456800
+d53_10 000000000000000000000000100000000000000000.0000000000
+f0 0000012345.1
+f20_3 0000000000056789.988
+f23_0 000123457000
+r1_1 0.9
+INSERT INTO t1 (f,f0,r1_1,f23_0,f20_3,d,d1_0,d10_10,d53,d53_10,pk) VALUES (0,0,0,0,0,0,0,0,0,0,2);
+INSERT INTO t1 (f,f0,r1_1,f23_0,f20_3,d,d1_0,d10_10,d53,d53_10,pk) VALUES (
+99999999999999999999999999999999999999,
+99999999999999999999999999999999999999.9999999999999999,
+0.9,
+99999999999999999999999999999999999999.9,
+99999999999999999.999,
+999999999999999999999999999999999999999999999999999999999999999999999999999999999,
+9,
+0.9999999999,
+1999999999999999999999999999999999999999999999999999999,
+19999999999999999999999999999999999999999999.9999999999,
+3
+);
+Warnings:
+Warning 1264 Out of range value for column 'd53' at row 1
+Warning 1264 Out of range value for column 'd53_10' at row 1
+SELECT f,f0,r1_1,f23_0,f20_3,d,d1_0,d10_10,d53,d53_10 FROM t1;
+f 0000012345.1
+d 0000000000000000000000
+d 0000000000000000001e81
+d 000000000011111111.111
+d10_10 0.0000000000
+d10_10 0.0123456789
+d10_10 0.9999999999
+d1_0 0
+d1_0 8
+d1_0 9
+d53 00000000000000000000000000000000000000000000000000000
+d53 00000000000000000000000000000000001234566789123456800
+d53 100000000000000000000000000000000000000000000000000000
+d53_10 000000000000000000000000000000000000000000.0000000000
+d53_10 000000000000000000000000100000000000000000.0000000000
+d53_10 10000000000000000000000000000000000000000000.0000000000
+f 000000000000
+f 000000001e38
+f0 000000000000
+f0 000000001e38
+f0 0000012345.1
+f20_3 0000000000000000.000
+f20_3 0000000000056789.988
+f20_3 99999998430674940.000
+f23_0 000000000000
+f23_0 000000001e38
+f23_0 000123457000
+r1_1 0.0
+r1_1 0.9
+r1_1 0.9
+INSERT INTO t1 (f,f0,r1_1,f23_0,f20_3,d,d1_0,d10_10,d53,d53_10,pk) VALUES (-999999999999999999999999,-99999999999.999999999999,-0.9,-999.99999999999999999999,-99999999999999999.999,-999999999999999999999999999999999999999999999999999999999999-0.999,-9,-.9999999999,-999999999999999999999999999999.99999999999999999999999,-9999999999999999999999999999999999999999999.9999999999,4);
+Warnings:
+Warning 1264 Out of range value for column 'f' at row 1
+Warning 1264 Out of range value for column 'f0' at row 1
+Warning 1264 Out of range value for column 'r1_1' at row 1
+Warning 1264 Out of range value for column 'f23_0' at row 1
+Warning 1264 Out of range value for column 'f20_3' at row 1
+Warning 1264 Out of range value for column 'd' at row 1
+Warning 1264 Out of range value for column 'd1_0' at row 1
+Warning 1264 Out of range value for column 'd10_10' at row 1
+Warning 1264 Out of range value for column 'd53' at row 1
+Warning 1264 Out of range value for column 'd53_10' at row 1
+SELECT f,f0,r1_1,f23_0,f20_3,d,d1_0,d10_10,d53,d53_10 FROM t1;
+f 0000012345.1
+d 0000000000000000000000
+d 0000000000000000000000
+d 0000000000000000001e81
+d 000000000011111111.111
+d10_10 0.0000000000
+d10_10 0.0000000000
+d10_10 0.0123456789
+d10_10 0.9999999999
+d1_0 0
+d1_0 0
+d1_0 8
+d1_0 9
+d53 00000000000000000000000000000000000000000000000000000
+d53 00000000000000000000000000000000000000000000000000000
+d53 00000000000000000000000000000000001234566789123456800
+d53 100000000000000000000000000000000000000000000000000000
+d53_10 000000000000000000000000000000000000000000.0000000000
+d53_10 000000000000000000000000000000000000000000.0000000000
+d53_10 000000000000000000000000100000000000000000.0000000000
+d53_10 10000000000000000000000000000000000000000000.0000000000
+f 000000000000
+f 000000000000
+f 000000001e38
+f0 000000000000
+f0 000000000000
+f0 000000001e38
+f0 0000012345.1
+f20_3 0000000000000000.000
+f20_3 0000000000000000.000
+f20_3 0000000000056789.988
+f20_3 99999998430674940.000
+f23_0 000000000000
+f23_0 000000000000
+f23_0 000000001e38
+f23_0 000123457000
+r1_1 0.0
+r1_1 0.0
+r1_1 0.9
+r1_1 0.9
+SELECT
+CONCAT('', MAX(f)),
+CONCAT('', MAX(f0)),
+CONCAT('', MAX(r1_1)),
+CONCAT('', MAX(f23_0)),
+CONCAT('', MAX(f20_3)),
+CONCAT('', MAX(d)),
+CONCAT('', MAX(d1_0)),
+CONCAT('', MAX(d10_10)),
+CONCAT('', MAX(d53)),
+CONCAT('', MAX(d53_10)) FROM t1;
+CONCAT('', MAX(f)) 9.999999680285692e37
+CONCAT('', MAX(d)) 1e81
+CONCAT('', MAX(d10_10)) 0.9999999999
+CONCAT('', MAX(d1_0)) 9
+CONCAT('', MAX(d53)) 100000000000000000000000000000000000000000000000000000
+CONCAT('', MAX(d53_10)) 10000000000000000000000000000000000000000000.0000000000
+CONCAT('', MAX(f0)) 9.999999680285692e37
+CONCAT('', MAX(f20_3)) 99999998430674940.000
+CONCAT('', MAX(f23_0)) 9.999999680285692e37
+CONCAT('', MAX(r1_1)) 0.9
+INSERT INTO t1 (f,f0,r1_1,f23_0,f20_3,d,d1_0,d10_10,d53,d53_10,pk) VALUES (
+9999999999999999999999999999999999999999999999999999999999999.9999,
+9999999999999999999999999999999999999999999999999999999999999.9999,
+9999999999999999999999999999999999999999999999999999999999999.9999,
+9999999999999999999999999999999999999999999999999999999999999.9999,
+9999999999999999999999999999999999999999999999999999999999999.9999,
+9999999999999999999999999999999999999999999999999999999999999.9999,
+9999999999999999999999999999999999999999999999999999999999999.9999,
+9999999999999999999999999999999999999999999999999999999999999.9999,
+9999999999999999999999999999999999999999999999999999999999999.9999,
+9999999999999999999999999999999999999999999999999999999999999.9999,
+5
+);
+Warnings:
+Warning 1264 Out of range value for column 'f' at row 1
+Warning 1264 Out of range value for column 'f0' at row 1
+Warning 1264 Out of range value for column 'r1_1' at row 1
+Warning 1264 Out of range value for column 'f23_0' at row 1
+Warning 1264 Out of range value for column 'f20_3' at row 1
+Warning 1264 Out of range value for column 'd1_0' at row 1
+Warning 1264 Out of range value for column 'd10_10' at row 1
+Warning 1264 Out of range value for column 'd53' at row 1
+Warning 1264 Out of range value for column 'd53_10' at row 1
+SELECT f,f0,r1_1,f23_0,f20_3,d,d1_0,d10_10,d53,d53_10 FROM t1;
+f 0000012345.1
+d 0000000000000000000000
+d 0000000000000000000000
+d 0000000000000000001e61
+d 0000000000000000001e81
+d 000000000011111111.111
+d10_10 0.0000000000
+d10_10 0.0000000000
+d10_10 0.0123456789
+d10_10 0.9999999999
+d10_10 0.9999999999
+d1_0 0
+d1_0 0
+d1_0 8
+d1_0 9
+d1_0 9
+d53 00000000000000000000000000000000000000000000000000000
+d53 00000000000000000000000000000000000000000000000000000
+d53 00000000000000000000000000000000001234566789123456800
+d53 100000000000000000000000000000000000000000000000000000
+d53 100000000000000000000000000000000000000000000000000000
+d53_10 000000000000000000000000000000000000000000.0000000000
+d53_10 000000000000000000000000000000000000000000.0000000000
+d53_10 000000000000000000000000100000000000000000.0000000000
+d53_10 10000000000000000000000000000000000000000000.0000000000
+d53_10 10000000000000000000000000000000000000000000.0000000000
+f 000000000000
+f 000000000000
+f 000000001e38
+f 003.40282e38
+f0 000000000000
+f0 000000000000
+f0 000000001e38
+f0 0000012345.1
+f0 003.40282e38
+f20_3 0000000000000000.000
+f20_3 0000000000000000.000
+f20_3 0000000000056789.988
+f20_3 99999998430674940.000
+f20_3 99999998430674940.000
+f23_0 000000000000
+f23_0 000000000000
+f23_0 000000001e38
+f23_0 000123457000
+f23_0 003.40282e38
+r1_1 0.0
+r1_1 0.0
+r1_1 0.9
+r1_1 0.9
+r1_1 0.9
+INSERT INTO t1 (f,f0,r1_1,f23_0,f20_3,d,d1_0,d10_10,d53,d53_10,pk) VALUES (
+999999999999999999999999999999999999999,
+999999999999999999999999999999999999999.9999999999999999,
+1.9,
+999999999999999999999999999999999999999.9,
+999999999999999999.999,
+9999999999999999999999999999999999999999999999999999999999999999999999999999999999,
+99,
+1.9999999999,
+1999999999999999999999999999999999999999999999999999999,
+19999999999999999999999999999999999999999999.9999999999,
+6
+);
+Warnings:
+Warning 1916 Got overflow when converting '' to DECIMAL. Value truncated
+Warning 1264 Out of range value for column 'f' at row 1
+Warning 1264 Out of range value for column 'f0' at row 1
+Warning 1264 Out of range value for column 'r1_1' at row 1
+Warning 1264 Out of range value for column 'f23_0' at row 1
+Warning 1264 Out of range value for column 'f20_3' at row 1
+Warning 1264 Out of range value for column 'd1_0' at row 1
+Warning 1264 Out of range value for column 'd10_10' at row 1
+Warning 1264 Out of range value for column 'd53' at row 1
+Warning 1264 Out of range value for column 'd53_10' at row 1
+SELECT f,f0,r1_1,f23_0,f20_3,d,d1_0,d10_10,d53,d53_10 FROM t1;
+f 0000012345.1
+d 0000000000000000000000
+d 0000000000000000000000
+d 0000000000000000001e61
+d 0000000000000000001e65
+d 0000000000000000001e81
+d 000000000011111111.111
+d10_10 0.0000000000
+d10_10 0.0000000000
+d10_10 0.0123456789
+d10_10 0.9999999999
+d10_10 0.9999999999
+d10_10 0.9999999999
+d1_0 0
+d1_0 0
+d1_0 8
+d1_0 9
+d1_0 9
+d1_0 9
+d53 00000000000000000000000000000000000000000000000000000
+d53 00000000000000000000000000000000000000000000000000000
+d53 00000000000000000000000000000000001234566789123456800
+d53 100000000000000000000000000000000000000000000000000000
+d53 100000000000000000000000000000000000000000000000000000
+d53 100000000000000000000000000000000000000000000000000000
+d53_10 000000000000000000000000000000000000000000.0000000000
+d53_10 000000000000000000000000000000000000000000.0000000000
+d53_10 000000000000000000000000100000000000000000.0000000000
+d53_10 10000000000000000000000000000000000000000000.0000000000
+d53_10 10000000000000000000000000000000000000000000.0000000000
+d53_10 10000000000000000000000000000000000000000000.0000000000
+f 000000000000
+f 000000000000
+f 000000001e38
+f 003.40282e38
+f 003.40282e38
+f0 000000000000
+f0 000000000000
+f0 000000001e38
+f0 0000012345.1
+f0 003.40282e38
+f0 003.40282e38
+f20_3 0000000000000000.000
+f20_3 0000000000000000.000
+f20_3 0000000000056789.988
+f20_3 99999998430674940.000
+f20_3 99999998430674940.000
+f20_3 99999998430674940.000
+f23_0 000000000000
+f23_0 000000000000
+f23_0 000000001e38
+f23_0 000123457000
+f23_0 003.40282e38
+f23_0 003.40282e38
+r1_1 0.0
+r1_1 0.0
+r1_1 0.9
+r1_1 0.9
+r1_1 0.9
+r1_1 0.9
+ALTER TABLE t1 ADD COLUMN d0_0 DOUBLE(0,0) ZEROFILL;
+ALTER TABLE t1 ADD COLUMN n66_6 DECIMAL(256,1) ZEROFILL;
+ERROR 42000: Too big precision 256 specified for 'n66_6'. Maximum is 65
+ALTER TABLE t1 ADD COLUMN n66_66 DECIMAL(40,35) ZEROFILL;
+DROP TABLE t1;
+CREATE TABLE t1 (
+a DOUBLE ZEROFILL,
+b FLOAT ZEROFILL,
+PRIMARY KEY (b)
+) ENGINE=rocksdb;
+SHOW COLUMNS IN t1;
+Field Type Null Key Default Extra
+a double unsigned zerofill YES NULL
+b float unsigned zerofill NO PRI NULL
+INSERT INTO t1 (a,b) VALUES (1,1234.5);
+SELECT a,b FROM t1;
+a b
+0000000000000000000001 0000001234.5
+DROP TABLE t1;
+########################
+# INT columns
+########################
+DROP TABLE IF EXISTS t1;
+CREATE TABLE t1 (
+i INT ZEROFILL,
+i0 INT(0) ZEROFILL,
+i1 INT(1) ZEROFILL,
+i20 INT(20) ZEROFILL,
+t TINYINT ZEROFILL,
+t0 TINYINT(0) ZEROFILL,
+t1 TINYINT(1) ZEROFILL,
+t20 TINYINT(20) ZEROFILL,
+s SMALLINT ZEROFILL,
+s0 SMALLINT(0) ZEROFILL,
+s1 SMALLINT(1) ZEROFILL,
+s20 SMALLINT(20) ZEROFILL,
+m MEDIUMINT ZEROFILL,
+m0 MEDIUMINT(0) ZEROFILL,
+m1 MEDIUMINT(1) ZEROFILL,
+m20 MEDIUMINT(20) ZEROFILL,
+b BIGINT ZEROFILL,
+b0 BIGINT(0) ZEROFILL,
+b1 BIGINT(1) ZEROFILL,
+b20 BIGINT(20) ZEROFILL,
+pk INT AUTO_INCREMENT PRIMARY KEY
+) ENGINE=rocksdb;
+SHOW COLUMNS IN t1;
+Field Type Null Key Default Extra
+i int(10) unsigned zerofill YES NULL
+i0 int(10) unsigned zerofill YES NULL
+i1 int(1) unsigned zerofill YES NULL
+i20 int(20) unsigned zerofill YES NULL
+t tinyint(3) unsigned zerofill YES NULL
+t0 tinyint(3) unsigned zerofill YES NULL
+t1 tinyint(1) unsigned zerofill YES NULL
+t20 tinyint(20) unsigned zerofill YES NULL
+s smallint(5) unsigned zerofill YES NULL
+s0 smallint(5) unsigned zerofill YES NULL
+s1 smallint(1) unsigned zerofill YES NULL
+s20 smallint(20) unsigned zerofill YES NULL
+m mediumint(8) unsigned zerofill YES NULL
+m0 mediumint(8) unsigned zerofill YES NULL
+m1 mediumint(1) unsigned zerofill YES NULL
+m20 mediumint(20) unsigned zerofill YES NULL
+b bigint(20) unsigned zerofill YES NULL
+b0 bigint(20) unsigned zerofill YES NULL
+b1 bigint(1) unsigned zerofill YES NULL
+b20 bigint(20) unsigned zerofill YES NULL
+pk int(11) NO PRI NULL auto_increment
+INSERT INTO t1 (i,i0,i1,i20,t,t0,t1,t20,s,s0,s1,s20,m,m0,m1,m20,b,b0,b1,b20) VALUES (1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20);
+INSERT INTO t1 (i,i0,i1,i20,t,t0,t1,t20,s,s0,s1,s20,m,m0,m1,m20,b,b0,b1,b20) VALUES (0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0);
+INSERT INTO t1 (i,i0,i1,i20,t,t0,t1,t20,s,s0,s1,s20,m,m0,m1,m20,b,b0,b1,b20) VALUES (2147483647,2147483647,2147483647,2147483647,127,127,127,127,32767,32767,32767,32767,8388607,8388607,8388607,8388607,9223372036854775807,9223372036854775807,9223372036854775807,9223372036854775807);
+SELECT i,i0,i1,i20,t,t0,t1,t20,s,s0,s1,s20,m,m0,m1,m20,b,b0,b1,b20 FROM t1;
+i i0 i1 i20 t t0 t1 t20 s s0 s1 s20 m m0 m1 m20 b b0 b1 b20
+0000000000 0000000000 0 00000000000000000000 000 000 0 00000000000000000000 00000 00000 0 00000000000000000000 00000000 00000000 0 00000000000000000000 00000000000000000000 00000000000000000000 0 00000000000000000000
+0000000001 0000000002 3 00000000000000000004 005 006 7 00000000000000000008 00009 00010 11 00000000000000000012 00000013 00000014 15 00000000000000000016 00000000000000000017 00000000000000000018 19 00000000000000000020
+2147483647 2147483647 2147483647 00000000002147483647 127 127 127 00000000000000000127 32767 32767 32767 00000000000000032767 08388607 08388607 8388607 00000000000008388607 09223372036854775807 09223372036854775807 9223372036854775807 09223372036854775807
+INSERT INTO t1 (i,i0,i1,i20,t,t0,t1,t20,s,s0,s1,s20,m,m0,m1,m20,b,b0,b1,b20) VALUES (-2147483648,-2147483648,-2147483648,-2147483648,-128,-128,-128,-128,-32768,-32768,-32768,-32768,-8388608,-8388608,-8388608,-8388608,-9223372036854775808,-9223372036854775808,-9223372036854775808,-9223372036854775808);
+Warnings:
+Warning 1264 Out of range value for column 'i' at row 1
+Warning 1264 Out of range value for column 'i0' at row 1
+Warning 1264 Out of range value for column 'i1' at row 1
+Warning 1264 Out of range value for column 'i20' at row 1
+Warning 1264 Out of range value for column 't' at row 1
+Warning 1264 Out of range value for column 't0' at row 1
+Warning 1264 Out of range value for column 't1' at row 1
+Warning 1264 Out of range value for column 't20' at row 1
+Warning 1264 Out of range value for column 's' at row 1
+Warning 1264 Out of range value for column 's0' at row 1
+Warning 1264 Out of range value for column 's1' at row 1
+Warning 1264 Out of range value for column 's20' at row 1
+Warning 1264 Out of range value for column 'm' at row 1
+Warning 1264 Out of range value for column 'm0' at row 1
+Warning 1264 Out of range value for column 'm1' at row 1
+Warning 1264 Out of range value for column 'm20' at row 1
+Warning 1264 Out of range value for column 'b' at row 1
+Warning 1264 Out of range value for column 'b0' at row 1
+Warning 1264 Out of range value for column 'b1' at row 1
+Warning 1264 Out of range value for column 'b20' at row 1
+INSERT INTO t1 (i,i0,i1,i20,t,t0,t1,t20,s,s0,s1,s20,m,m0,m1,m20,b,b0,b1,b20) VALUES (4294967295,4294967295,4294967295,4294967295,255,255,255,255,65535,65535,65535,65535,16777215,16777215,16777215,16777215,18446744073709551615,18446744073709551615,18446744073709551615,18446744073709551615);
+SELECT i,i0,i1,i20,t,t0,t1,t20,s,s0,s1,s20,m,m0,m1,m20,b,b0,b1,b20 FROM t1;
+i i0 i1 i20 t t0 t1 t20 s s0 s1 s20 m m0 m1 m20 b b0 b1 b20
+0000000000 0000000000 0 00000000000000000000 000 000 0 00000000000000000000 00000 00000 0 00000000000000000000 00000000 00000000 0 00000000000000000000 00000000000000000000 00000000000000000000 0 00000000000000000000
+0000000000 0000000000 0 00000000000000000000 000 000 0 00000000000000000000 00000 00000 0 00000000000000000000 00000000 00000000 0 00000000000000000000 00000000000000000000 00000000000000000000 0 00000000000000000000
+0000000001 0000000002 3 00000000000000000004 005 006 7 00000000000000000008 00009 00010 11 00000000000000000012 00000013 00000014 15 00000000000000000016 00000000000000000017 00000000000000000018 19 00000000000000000020
+2147483647 2147483647 2147483647 00000000002147483647 127 127 127 00000000000000000127 32767 32767 32767 00000000000000032767 08388607 08388607 8388607 00000000000008388607 09223372036854775807 09223372036854775807 9223372036854775807 09223372036854775807
+4294967295 4294967295 4294967295 00000000004294967295 255 255 255 00000000000000000255 65535 65535 65535 00000000000000065535 16777215 16777215 16777215 00000000000016777215 18446744073709551615 18446744073709551615 18446744073709551615 18446744073709551615
+INSERT INTO t1 (i,i0,i1,i20,t,t0,t1,t20,s,s0,s1,s20,m,m0,m1,m20,b,b0,b1,b20) VALUES (-2147483649,-2147483649,-2147483649,-2147483649,-129,-129,-129,-129,-32769,-32769,-32769,-32769,-8388609,-8388609,-8388609,-8388609,-9223372036854775809,-9223372036854775809,-9223372036854775809,-9223372036854775809);
+Warnings:
+Warning 1264 Out of range value for column 'i' at row 1
+Warning 1264 Out of range value for column 'i0' at row 1
+Warning 1264 Out of range value for column 'i1' at row 1
+Warning 1264 Out of range value for column 'i20' at row 1
+Warning 1264 Out of range value for column 't' at row 1
+Warning 1264 Out of range value for column 't0' at row 1
+Warning 1264 Out of range value for column 't1' at row 1
+Warning 1264 Out of range value for column 't20' at row 1
+Warning 1264 Out of range value for column 's' at row 1
+Warning 1264 Out of range value for column 's0' at row 1
+Warning 1264 Out of range value for column 's1' at row 1
+Warning 1264 Out of range value for column 's20' at row 1
+Warning 1264 Out of range value for column 'm' at row 1
+Warning 1264 Out of range value for column 'm0' at row 1
+Warning 1264 Out of range value for column 'm1' at row 1
+Warning 1264 Out of range value for column 'm20' at row 1
+Warning 1264 Out of range value for column 'b' at row 1
+Warning 1264 Out of range value for column 'b0' at row 1
+Warning 1264 Out of range value for column 'b1' at row 1
+Warning 1264 Out of range value for column 'b20' at row 1
+INSERT INTO t1 (i,i0,i1,i20,t,t0,t1,t20,s,s0,s1,s20,m,m0,m1,m20,b,b0,b1,b20) VALUES (4294967296,4294967296,4294967296,4294967296,256,256,256,256,65536,65536,65536,65536,16777216,16777216,16777216,16777216,18446744073709551616,18446744073709551616,18446744073709551616,18446744073709551616);
+Warnings:
+Warning 1264 Out of range value for column 'i' at row 1
+Warning 1264 Out of range value for column 'i0' at row 1
+Warning 1264 Out of range value for column 'i1' at row 1
+Warning 1264 Out of range value for column 'i20' at row 1
+Warning 1264 Out of range value for column 't' at row 1
+Warning 1264 Out of range value for column 't0' at row 1
+Warning 1264 Out of range value for column 't1' at row 1
+Warning 1264 Out of range value for column 't20' at row 1
+Warning 1264 Out of range value for column 's' at row 1
+Warning 1264 Out of range value for column 's0' at row 1
+Warning 1264 Out of range value for column 's1' at row 1
+Warning 1264 Out of range value for column 's20' at row 1
+Warning 1264 Out of range value for column 'm' at row 1
+Warning 1264 Out of range value for column 'm0' at row 1
+Warning 1264 Out of range value for column 'm1' at row 1
+Warning 1264 Out of range value for column 'm20' at row 1
+Warning 1264 Out of range value for column 'b' at row 1
+Warning 1264 Out of range value for column 'b0' at row 1
+Warning 1264 Out of range value for column 'b1' at row 1
+Warning 1264 Out of range value for column 'b20' at row 1
+INSERT INTO t1 (i,i0,i1,i20,t,t0,t1,t20,s,s0,s1,s20,m,m0,m1,m20,b,b0,b1,b20) SELECT b,b,b,b,b,b,b,b,b,b,b,b,b,b,b,b,b,b,b,b FROM t1 WHERE b IN (-9223372036854775808,9223372036854775807,18446744073709551615);
+Warnings:
+Warning 1264 Out of range value for column 'i' at row 8
+Warning 1264 Out of range value for column 'i0' at row 8
+Warning 1264 Out of range value for column 'i1' at row 8
+Warning 1264 Out of range value for column 'i20' at row 8
+Warning 1264 Out of range value for column 't' at row 8
+Warning 1264 Out of range value for column 't0' at row 8
+Warning 1264 Out of range value for column 't1' at row 8
+Warning 1264 Out of range value for column 't20' at row 8
+Warning 1264 Out of range value for column 's' at row 8
+Warning 1264 Out of range value for column 's0' at row 8
+Warning 1264 Out of range value for column 's1' at row 8
+Warning 1264 Out of range value for column 's20' at row 8
+Warning 1264 Out of range value for column 'm' at row 8
+Warning 1264 Out of range value for column 'm0' at row 8
+Warning 1264 Out of range value for column 'm1' at row 8
+Warning 1264 Out of range value for column 'm20' at row 8
+Warning 1264 Out of range value for column 'i' at row 9
+Warning 1264 Out of range value for column 'i0' at row 9
+Warning 1264 Out of range value for column 'i1' at row 9
+Warning 1264 Out of range value for column 'i20' at row 9
+Warning 1264 Out of range value for column 't' at row 9
+Warning 1264 Out of range value for column 't0' at row 9
+Warning 1264 Out of range value for column 't1' at row 9
+Warning 1264 Out of range value for column 't20' at row 9
+Warning 1264 Out of range value for column 's' at row 9
+Warning 1264 Out of range value for column 's0' at row 9
+Warning 1264 Out of range value for column 's1' at row 9
+Warning 1264 Out of range value for column 's20' at row 9
+Warning 1264 Out of range value for column 'm' at row 9
+Warning 1264 Out of range value for column 'm0' at row 9
+Warning 1264 Out of range value for column 'm1' at row 9
+Warning 1264 Out of range value for column 'm20' at row 9
+Warning 1264 Out of range value for column 'i' at row 10
+Warning 1264 Out of range value for column 'i0' at row 10
+Warning 1264 Out of range value for column 'i1' at row 10
+Warning 1264 Out of range value for column 'i20' at row 10
+Warning 1264 Out of range value for column 't' at row 10
+Warning 1264 Out of range value for column 't0' at row 10
+Warning 1264 Out of range value for column 't1' at row 10
+Warning 1264 Out of range value for column 't20' at row 10
+Warning 1264 Out of range value for column 's' at row 10
+Warning 1264 Out of range value for column 's0' at row 10
+Warning 1264 Out of range value for column 's1' at row 10
+Warning 1264 Out of range value for column 's20' at row 10
+Warning 1264 Out of range value for column 'm' at row 10
+Warning 1264 Out of range value for column 'm0' at row 10
+Warning 1264 Out of range value for column 'm1' at row 10
+Warning 1264 Out of range value for column 'm20' at row 10
+SELECT i,i0,i1,i20,t,t0,t1,t20,s,s0,s1,s20,m,m0,m1,m20,b,b0,b1,b20 FROM t1;
+i i0 i1 i20 t t0 t1 t20 s s0 s1 s20 m m0 m1 m20 b b0 b1 b20
+0000000000 0000000000 0 00000000000000000000 000 000 0 00000000000000000000 00000 00000 0 00000000000000000000 00000000 00000000 0 00000000000000000000 00000000000000000000 00000000000000000000 0 00000000000000000000
+0000000000 0000000000 0 00000000000000000000 000 000 0 00000000000000000000 00000 00000 0 00000000000000000000 00000000 00000000 0 00000000000000000000 00000000000000000000 00000000000000000000 0 00000000000000000000
+0000000000 0000000000 0 00000000000000000000 000 000 0 00000000000000000000 00000 00000 0 00000000000000000000 00000000 00000000 0 00000000000000000000 00000000000000000000 00000000000000000000 0 00000000000000000000
+0000000001 0000000002 3 00000000000000000004 005 006 7 00000000000000000008 00009 00010 11 00000000000000000012 00000013 00000014 15 00000000000000000016 00000000000000000017 00000000000000000018 19 00000000000000000020
+2147483647 2147483647 2147483647 00000000002147483647 127 127 127 00000000000000000127 32767 32767 32767 00000000000000032767 08388607 08388607 8388607 00000000000008388607 09223372036854775807 09223372036854775807 9223372036854775807 09223372036854775807
+4294967295 4294967295 4294967295 00000000004294967295 255 255 255 00000000000000000255 65535 65535 65535 00000000000000065535 16777215 16777215 16777215 00000000000016777215 09223372036854775807 09223372036854775807 9223372036854775807 09223372036854775807
+4294967295 4294967295 4294967295 00000000004294967295 255 255 255 00000000000000000255 65535 65535 65535 00000000000000065535 16777215 16777215 16777215 00000000000016777215 18446744073709551615 18446744073709551615 18446744073709551615 18446744073709551615
+4294967295 4294967295 4294967295 00000000004294967295 255 255 255 00000000000000000255 65535 65535 65535 00000000000000065535 16777215 16777215 16777215 00000000000016777215 18446744073709551615 18446744073709551615 18446744073709551615 18446744073709551615
+4294967295 4294967295 4294967295 00000000004294967295 255 255 255 00000000000000000255 65535 65535 65535 00000000000000065535 16777215 16777215 16777215 00000000000016777215 18446744073709551615 18446744073709551615 18446744073709551615 18446744073709551615
+4294967295 4294967295 4294967295 00000000004294967295 255 255 255 00000000000000000255 65535 65535 65535 00000000000000065535 16777215 16777215 16777215 00000000000016777215 18446744073709551615 18446744073709551615 18446744073709551615 18446744073709551615
+ALTER TABLE t1 ADD COLUMN i257 INT(257) ZEROFILL;
+ERROR 42000: Display width out of range for 'i257' (max = 255)
+DROP TABLE t1;
+CREATE TABLE t1 (
+t TINYINT ZEROFILL,
+s SMALLINT ZEROFILL,
+m MEDIUMINT ZEROFILL,
+i INT ZEROFILL,
+b BIGINT ZEROFILL,
+PRIMARY KEY (b)
+) ENGINE=rocksdb;
+SHOW COLUMNS IN t1;
+Field Type Null Key Default Extra
+t tinyint(3) unsigned zerofill YES NULL
+s smallint(5) unsigned zerofill YES NULL
+m mediumint(8) unsigned zerofill YES NULL
+i int(10) unsigned zerofill YES NULL
+b bigint(20) unsigned zerofill NO PRI NULL
+INSERT INTO t1 (t,s,m,i,b) VALUES (1,10,100,1000,0);
+SELECT t,s,m,i,b FROM t1;
+t s m i b
+001 00010 00000100 0000001000 00000000000000000000
+DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/collation.result b/storage/rocksdb/mysql-test/rocksdb/r/collation.result
new file mode 100644
index 00000000000..10e0d9b0002
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/collation.result
@@ -0,0 +1,144 @@
+call mtr.add_suppression("Invalid pattern");
+CREATE TABLE t1 (id INT primary key, value varchar(50), value2 varbinary(50), value3 text) engine=rocksdb charset utf8;
+ALTER TABLE t1 ADD INDEX (value);
+ERROR HY000: Unsupported collation on string indexed column test.t1.value Use binary collation (binary, latin1_bin, utf8_bin).
+DROP TABLE t1;
+CREATE TABLE t1 (id INT primary key, value varchar(50), value2 varbinary(50), value3 text, index(value)) engine=rocksdb charset utf8;
+ERROR HY000: Unsupported collation on string indexed column test.t1.value Use binary collation (latin1_bin, binary, utf8_bin).
+CREATE TABLE t1 (id INT primary key, value varchar(50), value2 varbinary(50), value3 text, index(value3(50))) engine=rocksdb charset utf8;
+ERROR HY000: Unsupported collation on string indexed column test.t1.value3 Use binary collation (latin1_bin, binary, utf8_bin).
+SET GLOBAL rocksdb_strict_collation_check=0;
+CREATE TABLE t1 (id INT primary key, value varchar(50), value2 varbinary(50), value3 text, index(value3(50))) engine=rocksdb charset utf8;
+DROP TABLE t1;
+SET GLOBAL rocksdb_strict_collation_check=1;
+CREATE TABLE t1 (id INT primary key, value varchar(50), value2 varbinary(50), value3 text, index(value2)) engine=rocksdb charset utf8;
+DROP TABLE t1;
+CREATE TABLE t1 (id varchar(20), value varchar(50), value2 varchar(50), value3 text, primary key (id), index(value, value2)) engine=rocksdb charset latin1 collate latin1_bin;
+ALTER TABLE t1 collate=latin1_general_ci;
+DROP TABLE t1;
+CREATE TABLE t1 (id varchar(20), value varchar(50), value2 varchar(50), value3 text, primary key (id), index(value, value2)) engine=rocksdb charset utf8 collate utf8_bin;
+DROP TABLE t1;
+CREATE TABLE t1 (id varchar(20) collate latin1_bin, value varchar(50) collate utf8_bin, value2 varchar(50) collate latin1_bin, value3 text, primary key (id), index(value, value2)) engine=rocksdb;
+DROP TABLE t1;
+SET GLOBAL rocksdb_strict_collation_exceptions=t1;
+CREATE TABLE t1 (id INT primary key, value varchar(50), index(value)) engine=rocksdb charset utf8;
+DROP TABLE t1;
+CREATE TABLE t2 (id INT primary key, value varchar(50), index(value)) engine=rocksdb charset utf8;
+ERROR HY000: Unsupported collation on string indexed column test.t2.value Use binary collation (latin1_bin, binary, utf8_bin).
+SET GLOBAL rocksdb_strict_collation_exceptions="t.*";
+CREATE TABLE t123 (id INT primary key, value varchar(50), index(value)) engine=rocksdb charset utf8;
+DROP TABLE t123;
+CREATE TABLE s123 (id INT primary key, value varchar(50), index(value)) engine=rocksdb charset utf8;
+ERROR HY000: Unsupported collation on string indexed column test.s123.value Use binary collation (latin1_bin, binary, utf8_bin).
+SET GLOBAL rocksdb_strict_collation_exceptions=".t.*";
+CREATE TABLE xt123 (id INT primary key, value varchar(50), index(value)) engine=rocksdb charset utf8;
+DROP TABLE xt123;
+CREATE TABLE t123 (id INT primary key, value varchar(50), index(value)) engine=rocksdb charset utf8;
+ERROR HY000: Unsupported collation on string indexed column test.t123.value Use binary collation (latin1_bin, binary, utf8_bin).
+SET GLOBAL rocksdb_strict_collation_exceptions="s.*,t.*";
+CREATE TABLE s1 (id INT primary key, value varchar(50), index(value)) engine=rocksdb charset utf8;
+DROP TABLE s1;
+CREATE TABLE t1 (id INT primary key, value varchar(50), index(value)) engine=rocksdb charset utf8;
+DROP TABLE t1;
+CREATE TABLE u1 (id INT primary key, value varchar(50), index(value)) engine=rocksdb charset utf8;
+ERROR HY000: Unsupported collation on string indexed column test.u1.value Use binary collation (latin1_bin, binary, utf8_bin).
+SET GLOBAL rocksdb_strict_collation_exceptions="s.*|t.*";
+CREATE TABLE s1 (id INT primary key, value varchar(50), index(value)) engine=rocksdb charset utf8;
+DROP TABLE s1;
+CREATE TABLE t1 (id INT primary key, value varchar(50), index(value)) engine=rocksdb charset utf8;
+DROP TABLE t1;
+CREATE TABLE u1 (id INT primary key, value varchar(50), index(value)) engine=rocksdb charset utf8;
+ERROR HY000: Unsupported collation on string indexed column test.u1.value Use binary collation (latin1_bin, binary, utf8_bin).
+SET GLOBAL rocksdb_strict_collation_exceptions=",s.*,t.*";
+CREATE TABLE s1 (id INT primary key, value varchar(50), index(value)) engine=rocksdb charset utf8;
+DROP TABLE s1;
+CREATE TABLE t1 (id INT primary key, value varchar(50), index(value)) engine=rocksdb charset utf8;
+DROP TABLE t1;
+CREATE TABLE u1 (id INT primary key, value varchar(50), index(value)) engine=rocksdb charset utf8;
+ERROR HY000: Unsupported collation on string indexed column test.u1.value Use binary collation (latin1_bin, binary, utf8_bin).
+SET GLOBAL rocksdb_strict_collation_exceptions="|s.*|t.*";
+CREATE TABLE s1 (id INT primary key, value varchar(50), index(value)) engine=rocksdb charset utf8;
+DROP TABLE s1;
+CREATE TABLE t1 (id INT primary key, value varchar(50), index(value)) engine=rocksdb charset utf8;
+DROP TABLE t1;
+CREATE TABLE u1 (id INT primary key, value varchar(50), index(value)) engine=rocksdb charset utf8;
+ERROR HY000: Unsupported collation on string indexed column test.u1.value Use binary collation (latin1_bin, binary, utf8_bin).
+SET GLOBAL rocksdb_strict_collation_exceptions="s.*,,t.*";
+CREATE TABLE s1 (id INT primary key, value varchar(50), index(value)) engine=rocksdb charset utf8;
+DROP TABLE s1;
+CREATE TABLE t1 (id INT primary key, value varchar(50), index(value)) engine=rocksdb charset utf8;
+DROP TABLE t1;
+CREATE TABLE u1 (id INT primary key, value varchar(50), index(value)) engine=rocksdb charset utf8;
+ERROR HY000: Unsupported collation on string indexed column test.u1.value Use binary collation (latin1_bin, binary, utf8_bin).
+SET GLOBAL rocksdb_strict_collation_exceptions="s.*||t.*";
+CREATE TABLE s1 (id INT primary key, value varchar(50), index(value)) engine=rocksdb charset utf8;
+DROP TABLE s1;
+CREATE TABLE t1 (id INT primary key, value varchar(50), index(value)) engine=rocksdb charset utf8;
+DROP TABLE t1;
+CREATE TABLE u1 (id INT primary key, value varchar(50), index(value)) engine=rocksdb charset utf8;
+ERROR HY000: Unsupported collation on string indexed column test.u1.value Use binary collation (latin1_bin, binary, utf8_bin).
+SET GLOBAL rocksdb_strict_collation_exceptions="s.*,t.*,";
+CREATE TABLE s1 (id INT primary key, value varchar(50), index(value)) engine=rocksdb charset utf8;
+DROP TABLE s1;
+CREATE TABLE t1 (id INT primary key, value varchar(50), index(value)) engine=rocksdb charset utf8;
+DROP TABLE t1;
+CREATE TABLE u1 (id INT primary key, value varchar(50), index(value)) engine=rocksdb charset utf8;
+ERROR HY000: Unsupported collation on string indexed column test.u1.value Use binary collation (latin1_bin, binary, utf8_bin).
+SET GLOBAL rocksdb_strict_collation_exceptions="s.*|t.*|";
+CREATE TABLE s1 (id INT primary key, value varchar(50), index(value)) engine=rocksdb charset utf8;
+DROP TABLE s1;
+CREATE TABLE t1 (id INT primary key, value varchar(50), index(value)) engine=rocksdb charset utf8;
+DROP TABLE t1;
+CREATE TABLE u1 (id INT primary key, value varchar(50), index(value)) engine=rocksdb charset utf8;
+ERROR HY000: Unsupported collation on string indexed column test.u1.value Use binary collation (latin1_bin, binary, utf8_bin).
+SET GLOBAL rocksdb_strict_collation_exceptions="||||,,,,s.*,,|,,||,t.*,,|||,,,";
+CREATE TABLE s1 (id INT primary key, value varchar(50), index(value)) engine=rocksdb charset utf8;
+DROP TABLE s1;
+CREATE TABLE t1 (id INT primary key, value varchar(50), index(value)) engine=rocksdb charset utf8;
+DROP TABLE t1;
+CREATE TABLE u1 (id INT primary key, value varchar(50), index(value)) engine=rocksdb charset utf8;
+ERROR HY000: Unsupported collation on string indexed column test.u1.value Use binary collation (latin1_bin, binary, utf8_bin).
+SET GLOBAL rocksdb_strict_collation_exceptions='t1';
+CREATE TABLE t1 (id INT primary key, value varchar(50), index(value)) engine=rocksdb;
+ALTER TABLE t1 AUTO_INCREMENT=1;
+DROP TABLE t1;
+CREATE TABLE t2 (id INT primary key, value varchar(50), index(value)) engine=rocksdb;
+ERROR HY000: Unsupported collation on string indexed column test.t2.value Use binary collation (latin1_bin, binary, utf8_bin).
+CREATE TABLE t2 (id INT primary key, value varchar(50)) engine=rocksdb;
+ALTER TABLE t2 ADD INDEX(value);
+ERROR HY000: Unsupported collation on string indexed column test.t2.value Use binary collation (latin1_bin, binary, utf8_bin).
+DROP TABLE t2;
+SET GLOBAL rocksdb_strict_collation_exceptions="[a-b";
+FOUND 1 /Invalid pattern in strict_collation_exceptions: \[a-b/ in mysqld.1.err
+CREATE TABLE a (id INT PRIMARY KEY, value varchar(50), index(value)) engine=rocksdb charset utf8;
+ERROR HY000: Unsupported collation on string indexed column test.a.value Use binary collation (latin1_bin, binary, utf8_bin).
+SET GLOBAL rocksdb_strict_collation_exceptions="[a-b]";
+CREATE TABLE a (id INT PRIMARY KEY, value varchar(50), index(value)) engine=rocksdb charset utf8;
+CREATE TABLE b (id INT PRIMARY KEY, value varchar(50), index(value)) engine=rocksdb charset utf8;
+CREATE TABLE c (id INT PRIMARY KEY, value varchar(50), index(value)) engine=rocksdb charset utf8;
+ERROR HY000: Unsupported collation on string indexed column test.c.value Use binary collation (latin1_bin, binary, utf8_bin).
+DROP TABLE a, b;
+call mtr.add_suppression("Invalid pattern in strict_collation_exceptions:");
+SET GLOBAL rocksdb_strict_collation_exceptions="abc\\";
+FOUND 1 /Invalid pattern in strict_collation_exceptions: abc/ in mysqld.1.err
+CREATE TABLE abc (id INT PRIMARY KEY, value varchar(50), index(value)) engine=rocksdb charset utf8;
+ERROR HY000: Unsupported collation on string indexed column test.abc.value Use binary collation (latin1_bin, binary, utf8_bin).
+SET GLOBAL rocksdb_strict_collation_exceptions="abc";
+CREATE TABLE abc (id INT PRIMARY KEY, value varchar(50), index(value)) engine=rocksdb charset utf8;
+CREATE TABLE abcd (id INT PRIMARY KEY, value varchar(50), index(value)) engine=rocksdb charset utf8;
+ERROR HY000: Unsupported collation on string indexed column test.abcd.value Use binary collation (latin1_bin, binary, utf8_bin).
+DROP TABLE abc;
+SET GLOBAL rocksdb_strict_collation_exceptions=null;
+SET GLOBAL rocksdb_strict_collation_check=1;
+CREATE TABLE t1 (id INT primary key, value varchar(50), value2 varbinary(50), value3 text, index(value)) engine=rocksdb charset utf8;
+Warnings:
+Warning 1210 Unsupported collation on string indexed column test.t1.value Use binary collation (binary, latin1_bin, utf8_bin).
+DROP TABLE t1;
+CREATE TABLE t1 (id INT primary key, value varchar(50), value2 varbinary(50), value3 text) engine=rocksdb charset utf8;
+ALTER TABLE t1 ADD INDEX (value);
+Warnings:
+Warning 1210 Unsupported collation on string indexed column test.t1.value Use binary collation (binary, latin1_bin, utf8_bin).
+DROP TABLE t1;
+CREATE TABLE t1 (id varchar(20), value varchar(50), value2 varchar(50), value3 text, primary key (id), index(value, value2)) engine=rocksdb charset latin1 collate latin1_bin;
+ALTER TABLE t1 collate=latin1_general_ci;
+DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/collation_exception.result b/storage/rocksdb/mysql-test/rocksdb/r/collation_exception.result
new file mode 100644
index 00000000000..83d72d6c449
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/collation_exception.result
@@ -0,0 +1,25 @@
+CREATE TABLE `r1.lol` (
+`c1` int(10) NOT NULL DEFAULT '0',
+`c2` int(11) NOT NULL DEFAULT '0',
+`c3` int(1) NOT NULL DEFAULT '0',
+`c4` int(11) NOT NULL DEFAULT '0',
+`c5` int(11) NOT NULL DEFAULT '0',
+`c6` varchar(100) NOT NULL DEFAULT '',
+`c7` varchar(100) NOT NULL DEFAULT '',
+`c8` varchar(255) NOT NULL DEFAULT '',
+`c9` int(10) NOT NULL DEFAULT '125',
+`c10` int(10) NOT NULL DEFAULT '125',
+`c11` text NOT NULL,
+`c12` int(11) NOT NULL DEFAULT '0',
+`c13` int(10) NOT NULL DEFAULT '0',
+`c14` text NOT NULL,
+`c15` blob NOT NULL,
+`c16` int(11) NOT NULL DEFAULT '0',
+`c17` int(11) NOT NULL DEFAULT '0',
+`c18` int(11) NOT NULL DEFAULT '0',
+PRIMARY KEY (`c1`),
+KEY i1 (`c4`),
+KEY i2 (`c7`),
+KEY i3 (`c2`)) ENGINE=RocksDB DEFAULT CHARSET=latin1;
+DROP INDEX i1 ON `r1.lol`;
+DROP TABLE `r1.lol`;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/com_rpc_tx.result b/storage/rocksdb/mysql-test/rocksdb/r/com_rpc_tx.result
new file mode 100644
index 00000000000..789ce12e900
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/com_rpc_tx.result
@@ -0,0 +1,21 @@
+CREATE DATABASE db_rpc;
+USE db_rpc;
+CREATE TABLE t1(pk INT PRIMARY KEY) ENGINE=rocksdb;
+SET GLOBAL rocksdb_enable_2pc=1;
+SET autocommit = 0;
+SET autocommit = 0;
+BEGIN;
+BEGIN;
+SELECT * from t1;
+pk
+SELECT * from t1;
+pk
+INSERT INTO t1 VALUES(1);
+INSERT INTO t1 VALUES(2);
+COMMIT;
+COMMIT;
+SELECT * from db_rpc.t1;
+pk
+1
+2
+DROP DATABASE db_rpc;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/commit_in_the_middle_ddl.result b/storage/rocksdb/mysql-test/rocksdb/r/commit_in_the_middle_ddl.result
new file mode 100644
index 00000000000..4d64d12816f
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/commit_in_the_middle_ddl.result
@@ -0,0 +1,14 @@
+DROP TABLE IF EXISTS a;
+create table a (id int, value int, primary key (id) comment 'cf_a') engine=rocksdb;
+set rocksdb_bulk_load=1;
+set rocksdb_commit_in_the_middle=1;
+alter table a add index v (value) COMMENT 'cf_a';
+set rocksdb_bulk_load=0;
+set rocksdb_commit_in_the_middle=0;
+select count(*) from a force index(primary);
+count(*)
+100000
+select count(*) from a force index(v);
+count(*)
+100000
+DROP TABLE a;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/compact_deletes.result b/storage/rocksdb/mysql-test/rocksdb/r/compact_deletes.result
new file mode 100644
index 00000000000..5b3cfaf7839
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/compact_deletes.result
@@ -0,0 +1,78 @@
+DROP TABLE IF EXISTS r1;
+create table r1 (
+id1 int,
+id2 int,
+type int,
+value varchar(100),
+value2 int,
+value3 int,
+primary key (type, id1, id2),
+index id1_type (id1, type, value2, value, id2)
+) engine=rocksdb collate latin1_bin;
+select 'loading data';
+loading data
+loading data
+set global rocksdb_force_flush_memtable_now=1;
+optimize table r1;
+Table Op Msg_type Msg_text
+test.r1 optimize status OK
+Test 1: Do a bunch of updates without setting the compaction sysvar
+Expect: no compaction
+set global rocksdb_compaction_sequential_deletes_window=0;
+set global rocksdb_compaction_sequential_deletes= 0;
+set global rocksdb_compaction_sequential_deletes_file_size=0;
+set global rocksdb_force_flush_memtable_now=1;
+wait_for_delete: 0
+There are deletes left
+SET GLOBAL rocksdb_compaction_sequential_deletes= 0;
+SET GLOBAL rocksdb_compaction_sequential_deletes_file_size= 0;
+SET GLOBAL rocksdb_compaction_sequential_deletes_window= 0;
+Test 2: Do a bunch of updates and set the compaction sysvar
+Expect: compaction
+set global rocksdb_compaction_sequential_deletes_window=1000;
+set global rocksdb_compaction_sequential_deletes= 990;
+set global rocksdb_compaction_sequential_deletes_file_size=0;
+set global rocksdb_force_flush_memtable_now=1;
+wait_for_delete: 1
+No more deletes left
+SET GLOBAL rocksdb_compaction_sequential_deletes= 0;
+SET GLOBAL rocksdb_compaction_sequential_deletes_file_size= 0;
+SET GLOBAL rocksdb_compaction_sequential_deletes_window= 0;
+Test 3: Do a bunch of updates and set the compaction sysvar and a file size to something large
+Expect: no compaction
+set global rocksdb_compaction_sequential_deletes_window=1000;
+set global rocksdb_compaction_sequential_deletes= 1000;
+set global rocksdb_compaction_sequential_deletes_file_size=1000000;
+set global rocksdb_force_flush_memtable_now=1;
+wait_for_delete: 0
+There are deletes left
+SET GLOBAL rocksdb_compaction_sequential_deletes= 0;
+SET GLOBAL rocksdb_compaction_sequential_deletes_file_size= 0;
+SET GLOBAL rocksdb_compaction_sequential_deletes_window= 0;
+Test 4: Do a bunch of secondary key updates and set the compaction sysvar
+Expect: compaction
+set global rocksdb_compaction_sequential_deletes_window=1000;
+set global rocksdb_compaction_sequential_deletes= 50;
+set global rocksdb_compaction_sequential_deletes_file_size=0;
+set global rocksdb_force_flush_memtable_now=1;
+wait_for_delete: 1
+No more deletes left
+SET GLOBAL rocksdb_compaction_sequential_deletes= 0;
+SET GLOBAL rocksdb_compaction_sequential_deletes_file_size= 0;
+SET GLOBAL rocksdb_compaction_sequential_deletes_window= 0;
+Test 5: Do a bunch of secondary key updates and set the compaction sysvar,
+and rocksdb_compaction_sequential_deletes_count_sd turned on
+Expect: compaction
+SET @save_rocksdb_compaction_sequential_deletes_count_sd = @@global.rocksdb_compaction_sequential_deletes_count_sd;
+SET GLOBAL rocksdb_compaction_sequential_deletes_count_sd= ON;
+set global rocksdb_compaction_sequential_deletes_window=1000;
+set global rocksdb_compaction_sequential_deletes= 50;
+set global rocksdb_compaction_sequential_deletes_file_size=0;
+set global rocksdb_force_flush_memtable_now=1;
+wait_for_delete: 1
+No more deletes left
+SET GLOBAL rocksdb_compaction_sequential_deletes= 0;
+SET GLOBAL rocksdb_compaction_sequential_deletes_file_size= 0;
+SET GLOBAL rocksdb_compaction_sequential_deletes_window= 0;
+SET GLOBAL rocksdb_compaction_sequential_deletes_count_sd= @save_rocksdb_compaction_sequential_deletes_count_sd;
+drop table r1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/compression_zstd.result b/storage/rocksdb/mysql-test/rocksdb/r/compression_zstd.result
new file mode 100644
index 00000000000..62a6dbbdaca
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/compression_zstd.result
@@ -0,0 +1,2 @@
+create table t (id int primary key) engine=rocksdb;
+drop table t;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/concurrent_alter.result b/storage/rocksdb/mysql-test/rocksdb/r/concurrent_alter.result
new file mode 100644
index 00000000000..b8c73b2418e
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/concurrent_alter.result
@@ -0,0 +1,12 @@
+DROP DATABASE IF EXISTS mysqlslap;
+CREATE DATABASE mysqlslap;
+use mysqlslap;
+CREATE TABLE a1 (a int, b int) ENGINE=ROCKSDB;
+INSERT INTO a1 VALUES (1, 1);
+SHOW CREATE TABLE a1;
+Table Create Table
+a1 CREATE TABLE `a1` (
+ `a` int(11) DEFAULT NULL,
+ `b` int(11) DEFAULT NULL
+) ENGINE=ROCKSDB DEFAULT CHARSET=DEFAULT_CHARSET
+DROP DATABASE mysqlslap;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/cons_snapshot_read_committed.result b/storage/rocksdb/mysql-test/rocksdb/r/cons_snapshot_read_committed.result
new file mode 100644
index 00000000000..637354e013e
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/cons_snapshot_read_committed.result
@@ -0,0 +1,151 @@
+DROP TABLE IF EXISTS t1;
+connect con1,localhost,root,,;
+connect con2,localhost,root,,;
+connection con1;
+CREATE TABLE t1 (a INT, pk INT AUTO_INCREMENT PRIMARY KEY) ENGINE=ROCKSDB;
+SET SESSION TRANSACTION ISOLATION LEVEL READ COMMITTED;
+START TRANSACTION WITH CONSISTENT SNAPSHOT;
+ERROR: 4062
+connection con2;
+select * from information_schema.rocksdb_dbstats where stat_type='DB_NUM_SNAPSHOTS';
+STAT_TYPE VALUE
+DB_NUM_SNAPSHOTS 0
+connection con1;
+COMMIT;
+connection con2;
+select * from information_schema.rocksdb_dbstats where stat_type='DB_NUM_SNAPSHOTS';
+STAT_TYPE VALUE
+DB_NUM_SNAPSHOTS 0
+connection con1;
+START TRANSACTION WITH CONSISTENT SNAPSHOT;
+ERROR: 4062
+connection con2;
+INSERT INTO t1 (a) VALUES (1);
+connection con1;
+# If consistent read works on this isolation level (READ COMMITTED), the following SELECT should not return the value we inserted (1)
+SELECT a FROM t1;
+a
+1
+COMMIT;
+connection default;
+disconnect con1;
+disconnect con2;
+DROP TABLE t1;
+connect con1,localhost,root,,;
+connect con2,localhost,root,,;
+connection con1;
+CREATE TABLE r1 (id int primary key, value int, value2 int) engine=ROCKSDB;
+SET SESSION TRANSACTION ISOLATION LEVEL READ COMMITTED;
+insert into r1 values (1,1,1),(2,2,2),(3,3,3),(4,4,4);
+BEGIN;
+connection con2;
+INSERT INTO r1 values (5,5,5);
+connection con1;
+SELECT * FROM r1;
+id value value2
+1 1 1
+2 2 2
+3 3 3
+4 4 4
+5 5 5
+connection con2;
+INSERT INTO r1 values (6,6,6);
+connection con1;
+SELECT * FROM r1;
+id value value2
+1 1 1
+2 2 2
+3 3 3
+4 4 4
+5 5 5
+6 6 6
+COMMIT;
+SELECT * FROM r1;
+id value value2
+1 1 1
+2 2 2
+3 3 3
+4 4 4
+5 5 5
+6 6 6
+START TRANSACTION WITH CONSISTENT SNAPSHOT;
+ERROR: 4062
+connection con2;
+INSERT INTO r1 values (7,7,7);
+connection con1;
+SELECT * FROM r1;
+id value value2
+1 1 1
+2 2 2
+3 3 3
+4 4 4
+5 5 5
+6 6 6
+7 7 7
+connection con2;
+INSERT INTO r1 values (8,8,8);
+connection con1;
+SELECT * FROM r1;
+id value value2
+1 1 1
+2 2 2
+3 3 3
+4 4 4
+5 5 5
+6 6 6
+7 7 7
+8 8 8
+COMMIT;
+SELECT * FROM r1;
+id value value2
+1 1 1
+2 2 2
+3 3 3
+4 4 4
+5 5 5
+6 6 6
+7 7 7
+8 8 8
+START TRANSACTION WITH CONSISTENT SNAPSHOT;
+ERROR: 4062
+connection con2;
+INSERT INTO r1 values (9,9,9);
+connection con1;
+START TRANSACTION WITH CONSISTENT SNAPSHOT;
+ERROR: 4062
+connection con2;
+INSERT INTO r1 values (10,10,10);
+connection con1;
+SELECT * FROM r1;
+id value value2
+1 1 1
+2 2 2
+3 3 3
+4 4 4
+5 5 5
+6 6 6
+7 7 7
+8 8 8
+9 9 9
+10 10 10
+START TRANSACTION WITH CONSISTENT SNAPSHOT;
+ERROR: 4062
+INSERT INTO r1 values (11,11,11);
+ERROR: 0
+SELECT * FROM r1;
+id value value2
+1 1 1
+2 2 2
+3 3 3
+4 4 4
+5 5 5
+6 6 6
+7 7 7
+8 8 8
+9 9 9
+10 10 10
+11 11 11
+drop table r1;
+connection default;
+disconnect con1;
+disconnect con2;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/cons_snapshot_repeatable_read.result b/storage/rocksdb/mysql-test/rocksdb/r/cons_snapshot_repeatable_read.result
new file mode 100644
index 00000000000..d9be37ee18d
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/cons_snapshot_repeatable_read.result
@@ -0,0 +1,144 @@
+DROP TABLE IF EXISTS t1;
+connect con1,localhost,root,,;
+connect con2,localhost,root,,;
+connection con1;
+CREATE TABLE t1 (a INT, pk INT AUTO_INCREMENT PRIMARY KEY) ENGINE=ROCKSDB;
+SET SESSION TRANSACTION ISOLATION LEVEL REPEATABLE READ;
+START TRANSACTION WITH CONSISTENT SNAPSHOT;
+ERROR: 0
+connection con2;
+select * from information_schema.rocksdb_dbstats where stat_type='DB_NUM_SNAPSHOTS';
+STAT_TYPE VALUE
+DB_NUM_SNAPSHOTS 1
+connection con1;
+COMMIT;
+connection con2;
+select * from information_schema.rocksdb_dbstats where stat_type='DB_NUM_SNAPSHOTS';
+STAT_TYPE VALUE
+DB_NUM_SNAPSHOTS 0
+connection con1;
+START TRANSACTION WITH CONSISTENT SNAPSHOT;
+ERROR: 0
+connection con2;
+INSERT INTO t1 (a) VALUES (1);
+connection con1;
+# If consistent read works on this isolation level (REPEATABLE READ), the following SELECT should not return the value we inserted (1)
+SELECT a FROM t1;
+a
+COMMIT;
+connection default;
+disconnect con1;
+disconnect con2;
+DROP TABLE t1;
+connect con1,localhost,root,,;
+connect con2,localhost,root,,;
+connection con1;
+CREATE TABLE r1 (id int primary key, value int, value2 int) engine=ROCKSDB;
+SET SESSION TRANSACTION ISOLATION LEVEL REPEATABLE READ;
+insert into r1 values (1,1,1),(2,2,2),(3,3,3),(4,4,4);
+BEGIN;
+connection con2;
+INSERT INTO r1 values (5,5,5);
+connection con1;
+SELECT * FROM r1;
+id value value2
+1 1 1
+2 2 2
+3 3 3
+4 4 4
+5 5 5
+connection con2;
+INSERT INTO r1 values (6,6,6);
+connection con1;
+SELECT * FROM r1;
+id value value2
+1 1 1
+2 2 2
+3 3 3
+4 4 4
+5 5 5
+COMMIT;
+SELECT * FROM r1;
+id value value2
+1 1 1
+2 2 2
+3 3 3
+4 4 4
+5 5 5
+6 6 6
+START TRANSACTION WITH CONSISTENT SNAPSHOT;
+ERROR: 0
+connection con2;
+INSERT INTO r1 values (7,7,7);
+connection con1;
+SELECT * FROM r1;
+id value value2
+1 1 1
+2 2 2
+3 3 3
+4 4 4
+5 5 5
+6 6 6
+connection con2;
+INSERT INTO r1 values (8,8,8);
+connection con1;
+SELECT * FROM r1;
+id value value2
+1 1 1
+2 2 2
+3 3 3
+4 4 4
+5 5 5
+6 6 6
+COMMIT;
+SELECT * FROM r1;
+id value value2
+1 1 1
+2 2 2
+3 3 3
+4 4 4
+5 5 5
+6 6 6
+7 7 7
+8 8 8
+START TRANSACTION WITH CONSISTENT SNAPSHOT;
+ERROR: 0
+connection con2;
+INSERT INTO r1 values (9,9,9);
+connection con1;
+START TRANSACTION WITH CONSISTENT SNAPSHOT;
+ERROR: 0
+connection con2;
+INSERT INTO r1 values (10,10,10);
+connection con1;
+SELECT * FROM r1;
+id value value2
+1 1 1
+2 2 2
+3 3 3
+4 4 4
+5 5 5
+6 6 6
+7 7 7
+8 8 8
+9 9 9
+START TRANSACTION WITH CONSISTENT SNAPSHOT;
+ERROR: 0
+INSERT INTO r1 values (11,11,11);
+ERROR: 4059
+SELECT * FROM r1;
+id value value2
+1 1 1
+2 2 2
+3 3 3
+4 4 4
+5 5 5
+6 6 6
+7 7 7
+8 8 8
+9 9 9
+10 10 10
+drop table r1;
+connection default;
+disconnect con1;
+disconnect con2;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/cons_snapshot_serializable.result b/storage/rocksdb/mysql-test/rocksdb/r/cons_snapshot_serializable.result
new file mode 100644
index 00000000000..9c55b0dd689
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/cons_snapshot_serializable.result
@@ -0,0 +1,24 @@
+# -- WARNING ----------------------------------------------------------------
+# According to I_S.ENGINES, does not support transactions.
+# If it is true, the test will most likely fail; you can
+# either create an rdiff file, or add the test to disabled.def.
+# If transactions should be supported, check the data in Information Schema.
+# ---------------------------------------------------------------------------
+DROP TABLE IF EXISTS t1;
+connect con1,localhost,root,,;
+connect con2,localhost,root,,;
+connection con1;
+CREATE TABLE t1 (a INT, pk INT AUTO_INCREMENT PRIMARY KEY) ENGINE=ROCKSDB;
+SET SESSION TRANSACTION ISOLATION LEVEL SERIALIZABLE;
+START TRANSACTION WITH CONSISTENT SNAPSHOT;
+connection con2;
+INSERT INTO t1 (a) VALUES (1);
+connection con1;
+# If consistent read works on this isolation level (SERIALIZABLE), the following SELECT should not return the value we inserted (1)
+SELECT a FROM t1;
+a
+COMMIT;
+connection default;
+disconnect con1;
+disconnect con2;
+DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/corrupted_data_reads_debug.result b/storage/rocksdb/mysql-test/rocksdb/r/corrupted_data_reads_debug.result
new file mode 100644
index 00000000000..5cf9c620341
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/corrupted_data_reads_debug.result
@@ -0,0 +1,74 @@
+#
+# Test how MyRocks handles reading corrupted data from disk.
+# Data corruption is simulated at source-code level.
+#
+#
+# A test for case when data in the table *record* is longer
+# than table DDL expects it to be
+#
+create table t1 (
+pk int not null primary key,
+col1 varchar(10)
+) engine=rocksdb;
+insert into t1 values (1,1),(2,2),(3,3);
+select * from t1;
+pk col1
+1 1
+2 2
+3 3
+set @tmp1=@@rocksdb_verify_row_debug_checksums;
+set rocksdb_verify_row_debug_checksums=1;
+set session debug_dbug= "+d,myrocks_simulate_bad_row_read1";
+select * from t1 where pk=1;
+ERROR HY000: Got error 200 'Found data corruption.' from ROCKSDB
+set session debug_dbug= "-d,myrocks_simulate_bad_row_read1";
+set rocksdb_verify_row_debug_checksums=@tmp1;
+select * from t1 where pk=1;
+pk col1
+1 1
+set session debug_dbug= "+d,myrocks_simulate_bad_row_read2";
+select * from t1 where pk=1;
+ERROR HY000: Got error 200 'Found data corruption.' from ROCKSDB
+set session debug_dbug= "-d,myrocks_simulate_bad_row_read2";
+set session debug_dbug= "+d,myrocks_simulate_bad_row_read3";
+select * from t1 where pk=1;
+ERROR HY000: Got error 200 'Found data corruption.' from ROCKSDB
+set session debug_dbug= "-d,myrocks_simulate_bad_row_read3";
+insert into t1 values(4,'0123456789');
+select * from t1;
+pk col1
+1 1
+2 2
+3 3
+4 0123456789
+drop table t1;
+#
+# A test for case when index data is longer than table DDL
+# expects it to be
+#
+create table t2 (
+pk varchar(4) not null primary key,
+col1 int not null
+) engine=rocksdb collate latin1_bin;
+insert into t2 values ('ABCD',1);
+select * from t2;
+pk col1
+ABCD 1
+set session debug_dbug= "+d,myrocks_simulate_bad_pk_read1";
+select * from t2;
+ERROR HY000: Got error 200 'Found data corruption.' from ROCKSDB
+set session debug_dbug= "-d,myrocks_simulate_bad_pk_read1";
+drop table t2;
+create table t2 (
+pk varchar(4) not null primary key,
+col1 int not null
+) engine=rocksdb;
+insert into t2 values ('ABCD',1);
+select * from t2;
+pk col1
+ABCD 1
+set session debug_dbug= "+d,myrocks_simulate_bad_pk_read1";
+select * from t2;
+ERROR HY000: Got error 200 'Found data corruption.' from ROCKSDB
+set session debug_dbug= "-d,myrocks_simulate_bad_pk_read1";
+drop table t2;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/covered_unpack_info_format.result b/storage/rocksdb/mysql-test/rocksdb/r/covered_unpack_info_format.result
new file mode 100644
index 00000000000..195215331b8
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/covered_unpack_info_format.result
@@ -0,0 +1,73 @@
+DROP TABLE IF EXISTS t1;
+CREATE TABLE t1 (
+id INT,
+fake_id INT,
+bigfield VARCHAR(4096),
+PRIMARY KEY (id),
+KEY bf (bigfield(32)),
+KEY fid (fake_id, bigfield(32))
+) ENGINE=rocksdb;
+INSERT INTO t1 VALUES (1, 1001, REPEAT('a', 1)),
+(8, 1008, REPEAT('b', 8)),
+(24, 1024, REPEAT('c', 24)),
+(31, 1031, REPEAT('d', 31)),
+(32, 1032, REPEAT('x', 32)),
+(33, 1033, REPEAT('y', 33)),
+(128, 1128, REPEAT('z', 128));
+SELECT * FROM t1;
+id fake_id bigfield
+1 1001 a
+8 1008 bbbbbbbb
+24 1024 cccccccccccccccccccccccc
+31 1031 ddddddddddddddddddddddddddddddd
+32 1032 xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+33 1033 yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy
+128 1128 zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz
+# Eligible for optimization, access via fake_id only
+SELECT id, bigfield FROM t1 FORCE INDEX(fid) WHERE fake_id = 1031;
+id bigfield
+31 ddddddddddddddddddddddddddddddd
+include/assert.inc [Eligible for optimization, access via fake_id only: 2 rocksdb_covered_secondary_key_lookups]
+# Not eligible for optimization, access via fake_id of big row.
+SELECT id, bigfield FROM t1 FORCE INDEX(fid) WHERE fake_id = 1033;
+id bigfield
+33 yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy
+include/assert.inc [Not eligible for optimization, access via fake_id of big row.: 0 rocksdb_covered_secondary_key_lookups]
+DROP TABLE t1;
+set session debug= '+d,MYROCKS_NO_COVERED_BITMAP_FORMAT';
+CREATE TABLE t1 (
+id INT,
+fake_id INT,
+bigfield VARCHAR(4096),
+PRIMARY KEY (id),
+KEY bf (bigfield(32)),
+KEY fid (fake_id, bigfield(32))
+) ENGINE=rocksdb;
+set session debug= '-d,MYROCKS_NO_COVERED_BITMAP_FORMAT';
+INSERT INTO t1 VALUES (1, 1001, REPEAT('a', 1)),
+(8, 1008, REPEAT('b', 8)),
+(24, 1024, REPEAT('c', 24)),
+(31, 1031, REPEAT('d', 31)),
+(32, 1032, REPEAT('x', 32)),
+(33, 1033, REPEAT('y', 33)),
+(128, 1128, REPEAT('z', 128));
+SELECT * FROM t1;
+id fake_id bigfield
+1 1001 a
+8 1008 bbbbbbbb
+24 1024 cccccccccccccccccccccccc
+31 1031 ddddddddddddddddddddddddddddddd
+32 1032 xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+33 1033 yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy
+128 1128 zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz
+# No longer eligible for optimization since no covered bitmap was stored.
+SELECT id, bigfield FROM t1 FORCE INDEX(fid) WHERE fake_id = 1031;
+id bigfield
+31 ddddddddddddddddddddddddddddddd
+include/assert.inc [No longer eligible for optimization since no covered bitmap was stored.: 0 rocksdb_covered_secondary_key_lookups]
+# Not eligible for optimization.
+SELECT id, bigfield FROM t1 FORCE INDEX(fid) WHERE fake_id = 1033;
+id bigfield
+33 yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy
+include/assert.inc [Not eligible for optimization.: 0 rocksdb_covered_secondary_key_lookups]
+DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/create_no_primary_key_table.result b/storage/rocksdb/mysql-test/rocksdb/r/create_no_primary_key_table.result
new file mode 100644
index 00000000000..1c45cfd09fe
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/create_no_primary_key_table.result
@@ -0,0 +1,52 @@
+USE mysql;
+CREATE TABLE mysql_table (a INT) ENGINE=ROCKSDB;
+CREATE TABLE test.mysql_table (a INT) ENGINE=ROCKSDB;
+ERROR HY000: Table without primary key cannot be created outside mysql schema.
+USE test;
+CREATE TABLE mysql_table (a INT) ENGINE=ROCKSDB;
+ERROR HY000: Table without primary key cannot be created outside mysql schema.
+CREATE TABLE IF NOT EXISTS mysql_table_2 (a INT) ENGINE=ROCKSDB;
+ERROR HY000: Table without primary key cannot be created outside mysql schema.
+CREATE TABLE mysql_table_no_cols ENGINE=ROCKSDB;
+ERROR 42000: A table must have at least 1 column
+CREATE TABLE mysql.mysql_table_2 (a INT) ENGINE=ROCKSDB;
+CREATE TABLE mysql_primkey (a INT PRIMARY KEY, b INT, c INT, d INT, INDEX (c)) ENGINE=ROCKSDB;
+ALTER TABLE mysql_primkey DROP b, DROP a, ADD (f INT PRIMARY KEY);
+ALTER TABLE mysql_primkey DROP PRIMARY KEY;
+ERROR HY000: Table without primary key cannot be created outside mysql schema.
+CREATE TABLE mysql_primkey2 (a INT PRIMARY KEY, b INT, c INT) ENGINE=ROCKSDB;
+ALTER TABLE mysql_primkey2 DROP b;
+ALTER TABLE mysql_primkey2 ADD (b INT);
+ALTER TABLE mysql_primkey2 DROP c, DROP A;
+ERROR HY000: Table without primary key cannot be created outside mysql schema.
+CREATE TABLE mysql_primkey3 (a INT PRIMARY KEY, b INT, c INT, INDEX indexonb (b), INDEX indexonc (c)) ENGINE=ROCKSDB;
+ALTER TABLE mysql_primkey3 DROP INDEX indexonb;
+ALTER TABLE mysql_primkey3 DROP c;
+ALTER TABLE mysql_primkey3 DROP PRIMARY KEY, ADD PRIMARY KEY(b);
+CREATE TABLE mysql_primkey4(a INT, b INT, PRIMARY KEY(a), INDEX si (a, b)) ENGINE=ROCKSDB;
+DROP INDEX si ON mysql_primkey4;
+DROP INDEX `PRIMARY` ON mysql_primkey4;
+ERROR HY000: Table without primary key cannot be created outside mysql schema.
+ALTER TABLE mysql.mysql_table ADD PRIMARY KEY (a);
+ALTER TABLE mysql.mysql_table DROP PRIMARY KEY;
+SET default_storage_engine=ROCKSDB;
+CREATE TABLE mysql_noeng(a INT, b INT);
+ERROR HY000: Table without primary key cannot be created outside mysql schema.
+SET sql_mode="";
+CREATE TABLE mysql_noeng_sub(a INT, b INT) ENGINE=BOGUS_ENGINE;
+ERROR HY000: Table without primary key cannot be created outside mysql schema.
+CREATE TABLE mysql_primkey5 LIKE mysql_primkey;
+SET @@global.block_create_no_primary_key = false;
+CREATE TABLE mysql_no_primkey (a INT) ENGINE=ROCKSDB;
+SET @@global.block_create_no_primary_key = true;
+CREATE TABLE mysql_block_no_primkey LIKE mysql_no_primkey;
+ERROR HY000: Table without primary key cannot be created outside mysql schema.
+DROP TABLE mysql_primkey;
+DROP TABLE mysql_primkey2;
+DROP TABLE mysql_primkey3;
+DROP TABLE mysql_primkey4;
+DROP TABLE mysql_primkey5;
+DROP TABLE mysql_no_primkey;
+USE mysql;
+DROP TABLE mysql_table;
+DROP TABLE mysql_table_2;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/create_table.result b/storage/rocksdb/mysql-test/rocksdb/r/create_table.result
new file mode 100644
index 00000000000..8c879d82611
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/create_table.result
@@ -0,0 +1,165 @@
+DROP TABLE IF EXISTS t1,t2;
+CREATE TABLE t1 (a INT PRIMARY KEY) ENGINE=rocksdb;
+SHOW CREATE TABLE t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `a` int(11) NOT NULL,
+ PRIMARY KEY (`a`)
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+CREATE TABLE IF NOT EXISTS t1 (a INT PRIMARY KEY) ENGINE=rocksdb;
+Warnings:
+Note 1050 Table 't1' already exists
+CREATE TABLE t2 LIKE t1;
+SHOW CREATE TABLE t2;
+Table Create Table
+t2 CREATE TABLE `t2` (
+ `a` int(11) NOT NULL,
+ PRIMARY KEY (`a`)
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+CREATE TEMPORARY TABLE t2 (a INT PRIMARY KEY) ENGINE=rocksdb;
+ERROR HY000: Table storage engine 'ROCKSDB' does not support the create option 'TEMPORARY'
+DROP TABLE t2;
+DROP TABLE IF EXISTS t1;
+SET default_storage_engine = rocksdb;
+CREATE TABLE t1 (a INT PRIMARY KEY);
+SHOW CREATE TABLE t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `a` int(11) NOT NULL,
+ PRIMARY KEY (`a`)
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+DROP TABLE t1;
+CREATE TABLE t1 (a INT PRIMARY KEY) AS SELECT 1 AS a UNION SELECT 2 AS a;
+SHOW CREATE TABLE t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `a` int(11) NOT NULL,
+ PRIMARY KEY (`a`)
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+SELECT * FROM t1;
+a
+1
+2
+FLUSH LOGS;
+DROP TABLE IF EXISTS t1;
+CREATE TABLE t1(c1 INT,c2 INT,c3 INT,c4 INT,c5 INT,c6 INT,c7 INT,c8 INT,c9 INT,
+c10 INT,c11 INT,c12 INT,c13 INT,c14 INT,c15 INT,c16 INT,c17 INT,
+c18 INT,c19 INT,c20 INT,c21 INT,c22 INT,c23 INT,c24 INT,c25 INT,
+c26 INT,c27 INT,c28 INT,c29 INT,c30 INT,c31 INT,c32 INT,c33 INT,
+c34 INT,c35 INT,c36 INT,c37 INT,c38 INT,c39 INT,c40 INT,c41 INT,
+c42 INT,c43 INT,c44 INT,c45 INT,c46 INT,c47 INT,c48 INT,c49 INT,
+c50 INT,c51 INT,c52 INT,c53 INT,c54 INT,c55 INT,c56 INT,c57 INT,
+c58 INT,c59 INT,c60 INT,c61 INT,c62 INT,c63 INT,c64 INT,c65 INT,
+c66 INT,c67 INT,c68 INT,c69 INT,c70 INT,c71 INT,c72 INT,c73 INT,
+c74 INT,c75 INT,c76 INT,c77 INT,c78 INT,c79 INT,c80 INT,c81 INT,
+c82 INT,c83 INT,c84 INT,c85 INT,c86 INT,c87 INT,c88 INT,c89 INT,
+c90 INT,c91 INT,c92 INT,c93 INT,c94 INT,c95 INT,c96 INT,c97 INT,
+c98 INT,c99 INT,c100 INT,c101 INT,c102 INT,c103 INT,c104 INT,
+c105 INT,c106 INT,c107 INT,c108 INT,c109 INT,c110 INT,c111 INT,
+c112 INT,c113 INT,c114 INT,c115 INT,c116 INT,c117 INT,c118 INT,
+c119 INT,c120 INT,c121 INT,c122 INT,c123 INT,c124 INT,c125 INT,
+c126 INT,c127 INT,c128 INT,c129 INT,c130 INT,c131 INT,c132 INT,
+c133 INT,c134 INT,c135 INT,c136 INT,c137 INT,c138 INT,c139 INT,
+c140 INT,c141 INT,c142 INT,c143 INT,c144 INT,c145 INT,c146 INT,
+c147 INT,c148 INT,c149 INT,c150 INT,c151 INT,c152 INT,c153 INT,
+c154 INT,c155 INT,c156 INT,c157 INT,c158 INT,c159 INT,c160 INT,
+c161 INT,c162 INT,c163 INT,c164 INT,c165 INT,c166 INT,c167 INT,
+c168 INT,c169 INT,c170 INT,c171 INT,c172 INT,c173 INT,c174 INT,
+c175 INT,c176 INT,c177 INT,c178 INT,c179 INT,c180 INT,c181 INT,
+c182 INT,c183 INT,c184 INT,c185 INT,c186 INT,c187 INT,c188 INT,
+c189 INT,c190 INT,c191 INT,c192 INT,c193 INT,c194 INT,c195 INT,
+c196 INT,c197 INT,c198 INT,c199 INT,c200 INT,c201 INT,c202 INT,
+c203 INT,c204 INT,c205 INT,c206 INT,c207 INT,c208 INT,c209 INT,
+c210 INT,c211 INT,c212 INT,c213 INT,c214 INT,c215 INT,c216 INT,
+c217 INT,c218 INT,c219 INT,c220 INT,c221 INT,c222 INT,c223 INT,
+c224 INT,c225 INT,c226 INT,c227 INT,c228 INT,c229 INT,c230 INT,
+c231 INT,c232 INT,c233 INT,c234 INT,c235 INT,c236 INT,c237 INT,
+c238 INT,c239 INT,c240 INT,c241 INT,c242 INT,c243 INT,c244 INT,
+c245 INT,c246 INT,c247 INT,c248 INT,c249 INT,c250 INT,c251 INT,
+c252 INT,c253 INT,c254 INT,c255 INT,c256 INT,c257 INT,c258 INT,
+c259 INT,c260 INT,c261 INT,c262 INT,c263 INT,c264 INT,c265 INT,
+c266 INT,c267 INT,c268 INT,c269 INT,c270 INT,c271 INT,c272 INT,
+c273 INT,c274 INT,c275 INT,c276 INT,c277 INT,c278 INT,c279 INT,
+c280 INT,c281 INT,c282 INT,c283 INT,c284 INT,c285 INT,c286 INT,
+c287 INT,c288 INT,c289 INT,c290 INT,c291 INT,c292 INT,c293 INT,
+c294 INT,c295 INT,c296 INT,c297 INT,c298 INT,c299 INT,c300 INT,
+c301 INT,c302 INT,c303 INT,c304 INT,c305 INT,c306 INT,c307 INT,
+c308 INT,c309 INT,c310 INT,c311 INT,c312 INT,c313 INT,c314 INT,
+c315 INT,c316 INT,c317 INT,c318 INT,c319 INT,c320 INT,c321 INT,
+c322 INT,c323 INT,c324 INT,c325 INT,c326 INT,c327 INT,c328 INT,
+c329 INT,c330 INT,c331 INT,c332 INT,c333 INT,c334 INT,c335 INT,
+c336 INT,c337 INT,c338 INT,c339 INT,c340 INT,c341 INT,c342 INT,
+c343 INT,c344 INT,c345 INT,c346 INT,c347 INT,c348 INT,c349 INT,
+c350 INT,c351 INT,c352 INT,c353 INT,c354 INT,c355 INT,c356 INT,
+c357 INT,c358 INT,c359 INT,c360 INT,c361 INT,c362 INT,c363 INT,
+c364 INT,c365 INT,c366 INT,c367 INT,c368 INT,c369 INT,c370 INT,
+c371 INT,c372 INT,c373 INT,c374 INT,c375 INT,c376 INT,c377 INT,
+c378 INT,c379 INT,c380 INT,c381 INT,c382 INT,c383 INT,c384 INT,
+c385 INT,c386 INT,c387 INT,c388 INT,c389 INT,c390 INT,c391 INT,
+c392 INT,c393 INT,c394 INT,c395 INT,c396 INT,c397 INT,c398 INT,
+c399 INT,c400 INT,c401 INT,c402 INT,c403 INT,c404 INT,c405 INT,
+c406 INT,c407 INT,c408 INT,c409 INT,c410 INT,c411 INT,c412 INT,
+c413 INT,c414 INT,c415 INT,c416 INT,c417 INT,c418 INT,c419 INT,
+c420 INT,c421 INT,c422 INT,c423 INT,c424 INT,c425 INT,c426 INT,
+c427 INT,c428 INT,c429 INT,c430 INT,c431 INT,c432 INT,c433 INT,
+c434 INT,c435 INT,c436 INT,c437 INT,c438 INT,c439 INT,c440 INT,
+c441 INT,c442 INT,c443 INT,c444 INT,c445 INT,c446 INT,c447 INT,
+c448 INT,
+KEY (c1,c2,c3,c4,c5,c6,c7),KEY (c8,c9,c10,c11,c12,c13,c14),
+KEY (c15,c16,c17,c18,c19,c20,c21),KEY (c22,c23,c24,c25,c26,c27,c28),
+KEY (c29,c30,c31,c32,c33,c34,c35),KEY (c36,c37,c38,c39,c40,c41,c42),
+KEY (c43,c44,c45,c46,c47,c48,c49),KEY (c50,c51,c52,c53,c54,c55,c56),
+KEY (c57,c58,c59,c60,c61,c62,c63),KEY (c64,c65,c66,c67,c68,c69,c70),
+KEY (c71,c72,c73,c74,c75,c76,c77),KEY (c78,c79,c80,c81,c82,c83,c84),
+KEY (c85,c86,c87,c88,c89,c90,c91),KEY (c92,c93,c94,c95,c96,c97,c98),
+KEY (c99,c100,c101,c102,c103,c104,c105),
+KEY (c106,c107,c108,c109,c110,c111,c112),
+KEY (c113,c114,c115,c116,c117,c118,c119),
+KEY (c120,c121,c122,c123,c124,c125,c126),
+KEY (c127,c128,c129,c130,c131,c132,c133),
+KEY (c134,c135,c136,c137,c138,c139,c140),
+KEY (c141,c142,c143,c144,c145,c146,c147),
+KEY (c148,c149,c150,c151,c152,c153,c154),
+KEY (c155,c156,c157,c158,c159,c160,c161),
+KEY (c162,c163,c164,c165,c166,c167,c168),
+KEY (c169,c170,c171,c172,c173,c174,c175),
+KEY (c176,c177,c178,c179,c180,c181,c182),
+KEY (c183,c184,c185,c186,c187,c188,c189),
+KEY (c190,c191,c192,c193,c194,c195,c196),
+KEY (c197,c198,c199,c200,c201,c202,c203),
+KEY (c204,c205,c206,c207,c208,c209,c210),
+KEY (c211,c212,c213,c214,c215,c216,c217),
+KEY (c218,c219,c220,c221,c222,c223,c224),
+KEY (c225,c226,c227,c228,c229,c230,c231),
+KEY (c232,c233,c234,c235,c236,c237,c238),
+KEY (c239,c240,c241,c242,c243,c244,c245),
+KEY (c246,c247,c248,c249,c250,c251,c252),
+KEY (c253,c254,c255,c256,c257,c258,c259),
+KEY (c260,c261,c262,c263,c264,c265,c266),
+KEY (c267,c268,c269,c270,c271,c272,c273),
+KEY (c274,c275,c276,c277,c278,c279,c280),
+KEY (c281,c282,c283,c284,c285,c286,c287),
+KEY (c288,c289,c290,c291,c292,c293,c294),
+KEY (c295,c296,c297,c298,c299,c300,c301),
+KEY (c302,c303,c304,c305,c306,c307,c308),
+KEY (c309,c310,c311,c312,c313,c314,c315),
+KEY (c316,c317,c318,c319,c320,c321,c322),
+KEY (c323,c324,c325,c326,c327,c328,c329),
+KEY (c330,c331,c332,c333,c334,c335,c336),
+KEY (c337,c338,c339,c340,c341,c342,c343),
+KEY (c344,c345,c346,c347,c348,c349,c350),
+KEY (c351,c352,c353,c354,c355,c356,c357),
+KEY (c358,c359,c360,c361,c362,c363,c364),
+KEY (c365,c366,c367,c368,c369,c370,c371),
+KEY (c372,c373,c374,c375,c376,c377,c378),
+KEY (c379,c380,c381,c382,c383,c384,c385),
+KEY (c386,c387,c388,c389,c390,c391,c392),
+KEY (c393,c394,c395,c396,c397,c398,c399),
+KEY (c400,c401,c402,c403,c404,c405,c406),
+KEY (c407,c408,c409,c410,c411,c412,c413),
+KEY (c414,c415,c416,c417,c418,c419,c420),
+KEY (c421,c422,c423,c424,c425,c426,c427),
+KEY (c428,c429,c430,c431,c432,c433,c434),
+KEY (c435,c436,c437,c438,c439,c440,c441),
+KEY (c442,c443,c444,c445,c446,c447,c448));
+DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/ddl_high_priority.result b/storage/rocksdb/mysql-test/rocksdb/r/ddl_high_priority.result
new file mode 100644
index 00000000000..1e2636c873a
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/ddl_high_priority.result
@@ -0,0 +1,1058 @@
+##
+## Using the system variable high_priority_ddl"
+##
+create user test_user1@localhost;
+grant all on test to test_user1@localhost;
+create user test_user2@localhost;
+grant all on test to test_user2@localhost;
+
+## Test parameters:
+## use_sys_var = 1;
+## con_block = con1
+## con_kill = default
+## cmd = alter table t1 modify i bigint;
+## high_priority_cmd = alter high_priority table t1 modify i bigint;
+## should_kill = 1
+## recreate_table = 1
+## throw_error = 1
+
+drop table if exists t1;
+create table t1 (i int);
+show create table t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `i` int(11) DEFAULT NULL
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+insert into t1 values (1), (2), (3);
+connection: con1
+lock tables t1 read;;
+connection: default
+set lock_wait_timeout = 0.02;
+set high_priority_lock_wait_timeout = 0.02;
+describe t1;
+Field Type Null Key Default Extra
+i int(11) YES NULL
+connection: default (for show processlist)
+# both con1 and default exist
+show processlist;
+Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID>
+<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID>
+connection: default
+alter table t1 modify i bigint;;
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on table metadata: test.t1
+set high_priority_ddl = 1;
+select @@high_priority_ddl;
+@@high_priority_ddl
+1
+rename table t1 to t2;
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on table metadata: test.t1
+alter table t1 modify i bigint;;
+set high_priority_ddl = 0;
+connection: default (for show processlist)
+show processlist;
+Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID>
+
+## Test parameters:
+## use_sys_var = 1;
+## con_block = con1
+## con_kill = default
+## cmd = alter table t1 rename t1_new;
+## high_priority_cmd = alter high_priority table t1 rename t1_new;
+## should_kill = 1
+## recreate_table = 1
+## throw_error = 1
+
+drop table if exists t1;
+create table t1 (i int);
+show create table t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `i` int(11) DEFAULT NULL
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+insert into t1 values (1), (2), (3);
+connection: con1
+begin; insert into t1 values (4); select i from t1;;
+i
+1
+2
+3
+4
+connection: default
+set lock_wait_timeout = 0.02;
+set high_priority_lock_wait_timeout = 0.02;
+describe t1;
+Field Type Null Key Default Extra
+i int(11) YES NULL
+connection: default (for show processlist)
+# both con1 and default exist
+show processlist;
+Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID>
+<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID>
+connection: default
+alter table t1 rename t1_new;;
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on table metadata: test.t1
+set high_priority_ddl = 1;
+select @@high_priority_ddl;
+@@high_priority_ddl
+1
+rename table t1 to t2;
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on table metadata: test.t1
+alter table t1 rename t1_new;;
+set high_priority_ddl = 0;
+connection: default (for show processlist)
+show processlist;
+Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID>
+select * from t1_new;
+i
+1
+2
+3
+drop table t1_new;
+
+## Test parameters:
+## use_sys_var = 1;
+## con_block = con1
+## con_kill = default
+## cmd = drop table t1;
+## high_priority_cmd = drop high_priority table t1;
+## should_kill = 0
+## recreate_table = 1
+## throw_error = 1
+
+drop table if exists t1;
+create table t1 (i int);
+show create table t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `i` int(11) DEFAULT NULL
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+insert into t1 values (1), (2), (3);
+connection: con1
+lock tables t1 write;;
+connection: default
+set lock_wait_timeout = 0.02;
+set high_priority_lock_wait_timeout = 0.02;
+describe t1;
+Field Type Null Key Default Extra
+i int(11) YES NULL
+connection: default (for show processlist)
+# both con1 and default exist
+show processlist;
+Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID>
+<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID>
+connection: default
+drop table t1;;
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on table metadata: test.t1
+set high_priority_ddl = 1;
+select @@high_priority_ddl;
+@@high_priority_ddl
+1
+rename table t1 to t2;
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on table metadata: test.t1
+drop table t1;;
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on table metadata: test.t1
+set high_priority_ddl = 0;
+connection: default (for show processlist)
+show processlist;
+Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID>
+<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID>
+
+## Test parameters:
+## use_sys_var = 1;
+## con_block = con1
+## con_kill = default
+## cmd = drop table t1;
+## high_priority_cmd = drop high_priority table t1;
+## should_kill = 1
+## recreate_table = 1
+## throw_error = 1
+
+drop table if exists t1;
+create table t1 (i int);
+show create table t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `i` int(11) DEFAULT NULL
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+insert into t1 values (1), (2), (3);
+connection: con1
+lock tables t1 read; begin; insert into t1 values (4);;
+connection: default
+set lock_wait_timeout = 0.02;
+set high_priority_lock_wait_timeout = 0.02;
+describe t1;
+Field Type Null Key Default Extra
+i int(11) YES NULL
+connection: default (for show processlist)
+# both con1 and default exist
+show processlist;
+Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID>
+<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID>
+connection: default
+drop table t1;;
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on table metadata: test.t1
+set high_priority_ddl = 1;
+select @@high_priority_ddl;
+@@high_priority_ddl
+1
+rename table t1 to t2;
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on table metadata: test.t1
+drop table t1;;
+set high_priority_ddl = 0;
+connection: default (for show processlist)
+show processlist;
+Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID>
+
+## Test parameters:
+## use_sys_var = 1;
+## con_block = con1
+## con_kill = con2
+## cmd = alter table t1 modify i bigint;
+## high_priority_cmd = alter high_priority table t1 modify i bigint;
+## should_kill = 0
+## recreate_table = 1
+## throw_error = 1
+
+drop table if exists t1;
+create table t1 (i int);
+show create table t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `i` int(11) DEFAULT NULL
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+insert into t1 values (1), (2), (3);
+connection: con1
+lock tables t1 read;;
+connection: con2
+set lock_wait_timeout = 0.02;
+set high_priority_lock_wait_timeout = 0.02;
+describe t1;
+Field Type Null Key Default Extra
+i int(11) YES NULL
+connection: default (for show processlist)
+# both con1 and con2 exist
+show processlist;
+Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID>
+<Id> test_user2 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID>
+<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID>
+connection: con2
+alter table t1 modify i bigint;;
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on table metadata: test.t1
+set high_priority_ddl = 1;
+select @@high_priority_ddl;
+@@high_priority_ddl
+1
+rename table t1 to t2;
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on table metadata: test.t1
+alter table t1 modify i bigint;;
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on table metadata: test.t1
+set high_priority_ddl = 0;
+connection: default (for show processlist)
+show processlist;
+Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID>
+<Id> test_user2 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID>
+<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID>
+
+## Test parameters:
+## use_sys_var = 1;
+## con_block = con1
+## con_kill = default
+## cmd = create index idx1 on t1 (i);
+## high_priority_cmd = create high_priority index idx1 on t1 (i);
+## should_kill = 1
+## recreate_table = 1
+## throw_error = 1
+
+drop table if exists t1;
+create table t1 (i int);
+show create table t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `i` int(11) DEFAULT NULL
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+insert into t1 values (1), (2), (3);
+connection: con1
+lock tables t1 read;;
+connection: default
+set lock_wait_timeout = 0.02;
+set high_priority_lock_wait_timeout = 0.02;
+describe t1;
+Field Type Null Key Default Extra
+i int(11) YES NULL
+connection: default (for show processlist)
+# both con1 and default exist
+show processlist;
+Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID>
+<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID>
+connection: default
+create index idx1 on t1 (i);;
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on table metadata: test.t1
+set high_priority_ddl = 1;
+select @@high_priority_ddl;
+@@high_priority_ddl
+1
+rename table t1 to t2;
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on table metadata: test.t1
+create index idx1 on t1 (i);;
+set high_priority_ddl = 0;
+connection: default (for show processlist)
+show processlist;
+Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID>
+
+## Test parameters:
+## use_sys_var = 1;
+## con_block = con1
+## con_kill = default
+## cmd = drop index idx1 on t1;
+## high_priority_cmd = drop high_priority index idx1 on t1;
+## should_kill = 1
+## recreate_table = 0
+## throw_error = 1
+
+connection: con1
+lock tables t1 read;;
+connection: default
+set lock_wait_timeout = 0.02;
+set high_priority_lock_wait_timeout = 0.02;
+describe t1;
+Field Type Null Key Default Extra
+i int(11) YES MUL NULL
+connection: default (for show processlist)
+# both con1 and default exist
+show processlist;
+Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID>
+<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID>
+connection: default
+drop index idx1 on t1;;
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on table metadata: test.t1
+set high_priority_ddl = 1;
+select @@high_priority_ddl;
+@@high_priority_ddl
+1
+rename table t1 to t2;
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on table metadata: test.t1
+drop index idx1 on t1;;
+set high_priority_ddl = 0;
+connection: default (for show processlist)
+show processlist;
+Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID>
+
+## Test parameters:
+## use_sys_var = 1;
+## con_block = con1
+## con_kill = default
+## cmd = truncate t1;
+## high_priority_cmd = truncate high_priority t1;
+## should_kill = 1
+## recreate_table = 1
+## throw_error = 1
+
+drop table if exists t1;
+create table t1 (i int);
+show create table t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `i` int(11) DEFAULT NULL
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+insert into t1 values (1), (2), (3);
+connection: con1
+lock tables t1 read;;
+connection: default
+set lock_wait_timeout = 0.02;
+set high_priority_lock_wait_timeout = 0.02;
+describe t1;
+Field Type Null Key Default Extra
+i int(11) YES NULL
+connection: default (for show processlist)
+# both con1 and default exist
+show processlist;
+Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID>
+<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID>
+connection: default
+truncate t1;;
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on table metadata: test.t1
+set high_priority_ddl = 1;
+select @@high_priority_ddl;
+@@high_priority_ddl
+1
+rename table t1 to t2;
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on table metadata: test.t1
+truncate t1;;
+set high_priority_ddl = 0;
+connection: default (for show processlist)
+show processlist;
+Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID>
+
+## Test parameters:
+## use_sys_var = 1;
+## con_block = con1
+## con_kill = default
+## cmd = create trigger ins_sum before insert on t1 for each row set @sum = @sum + new.i;
+## high_priority_cmd = create high_priority trigger ins_sum before insert on t1 for each row set @sum = @sum + new.i;
+## should_kill = 1
+## recreate_table = 1
+## throw_error = 1
+
+drop table if exists t1;
+create table t1 (i int);
+show create table t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `i` int(11) DEFAULT NULL
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+insert into t1 values (1), (2), (3);
+connection: con1
+lock tables t1 read;;
+connection: default
+set lock_wait_timeout = 0.02;
+set high_priority_lock_wait_timeout = 0.02;
+describe t1;
+Field Type Null Key Default Extra
+i int(11) YES NULL
+connection: default (for show processlist)
+# both con1 and default exist
+show processlist;
+Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID>
+<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID>
+connection: default
+create trigger ins_sum before insert on t1 for each row set @sum = @sum + new.i;;
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on table metadata: test.t1
+set high_priority_ddl = 1;
+select @@high_priority_ddl;
+@@high_priority_ddl
+1
+rename table t1 to t2;
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on table metadata: test.t1
+create trigger ins_sum before insert on t1 for each row set @sum = @sum + new.i;;
+set high_priority_ddl = 0;
+connection: default (for show processlist)
+show processlist;
+Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID>
+
+## Test parameters:
+## use_sys_var = 1;
+## con_block = con1
+## con_kill = default
+## cmd = drop trigger ins_sum;
+## high_priority_cmd = drop high_priority trigger ins_sum;
+## should_kill = 1
+## recreate_table = 0
+## throw_error = 1
+
+connection: con1
+lock tables t1 read;;
+connection: default
+set lock_wait_timeout = 0.02;
+set high_priority_lock_wait_timeout = 0.02;
+describe t1;
+Field Type Null Key Default Extra
+i int(11) YES NULL
+connection: default (for show processlist)
+# both con1 and default exist
+show processlist;
+Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID>
+<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID>
+connection: default
+drop trigger ins_sum;;
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on table metadata: test.t1
+set high_priority_ddl = 1;
+select @@high_priority_ddl;
+@@high_priority_ddl
+1
+rename table t1 to t2;
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on table metadata: test.t1
+drop trigger ins_sum;;
+set high_priority_ddl = 0;
+connection: default (for show processlist)
+show processlist;
+Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID>
+
+## Test parameters:
+## use_sys_var = 1;
+## con_block = con1
+## con_kill = default
+## cmd = optimize table t1;
+## high_priority_cmd = optimize high_priority table t1;
+## should_kill = 1
+## recreate_table = 1
+## throw_error = 0
+
+drop table if exists t1;
+create table t1 (i int);
+show create table t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `i` int(11) DEFAULT NULL
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+insert into t1 values (1), (2), (3);
+connection: con1
+lock tables t1 read;;
+connection: default
+set lock_wait_timeout = 0.02;
+set high_priority_lock_wait_timeout = 0.02;
+describe t1;
+Field Type Null Key Default Extra
+i int(11) YES NULL
+connection: default (for show processlist)
+# both con1 and default exist
+show processlist;
+Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID>
+<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID>
+connection: default
+optimize table t1;;
+Table Op Msg_type Msg_text
+test.t1 optimize Error Lock wait timeout exceeded; try restarting transaction: Timeout on table metadata: test.t1
+test.t1 optimize status Operation failed
+set high_priority_ddl = 1;
+select @@high_priority_ddl;
+@@high_priority_ddl
+1
+rename table t1 to t2;
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on table metadata: test.t1
+optimize table t1;;
+Table Op Msg_type Msg_text
+test.t1 optimize status OK
+set high_priority_ddl = 0;
+connection: default (for show processlist)
+show processlist;
+Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID>
+
+## Test parameters:
+## use_sys_var = 1;
+## con_block = con1
+## con_kill = default
+## cmd = lock tables t1 write;
+## high_priority_cmd = optimize high_priority table t1;
+## should_kill = 1
+## recreate_table = 1
+## throw_error = 1
+
+drop table if exists t1;
+create table t1 (i int);
+show create table t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `i` int(11) DEFAULT NULL
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+insert into t1 values (1), (2), (3);
+connection: con1
+lock tables t1 read;;
+connection: default
+set lock_wait_timeout = 0.02;
+set high_priority_lock_wait_timeout = 0.02;
+describe t1;
+Field Type Null Key Default Extra
+i int(11) YES NULL
+connection: default (for show processlist)
+# both con1 and default exist
+show processlist;
+Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID>
+<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID>
+connection: default
+lock tables t1 write;;
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on table metadata: test.t1
+set high_priority_ddl = 1;
+select @@high_priority_ddl;
+@@high_priority_ddl
+1
+rename table t1 to t2;
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on table metadata: test.t1
+lock tables t1 write;;
+set high_priority_ddl = 0;
+connection: default (for show processlist)
+show processlist;
+Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID>
+unlock tables;
+drop user test_user1@localhost;
+drop user test_user2@localhost;
+drop table if exists t1;
+##
+## Using HIGH_PRIORITY syntax
+##
+create user test_user1@localhost;
+grant all on test to test_user1@localhost;
+create user test_user2@localhost;
+grant all on test to test_user2@localhost;
+
+## Test parameters:
+## use_sys_var = 0;
+## con_block = con1
+## con_kill = default
+## cmd = alter table t1 modify i bigint;
+## high_priority_cmd = alter high_priority table t1 modify i bigint;
+## should_kill = 1
+## recreate_table = 1
+## throw_error = 1
+
+drop table if exists t1;
+create table t1 (i int);
+show create table t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `i` int(11) DEFAULT NULL
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+insert into t1 values (1), (2), (3);
+connection: con1
+lock tables t1 read;;
+connection: default
+set lock_wait_timeout = 0.02;
+set high_priority_lock_wait_timeout = 0.02;
+describe t1;
+Field Type Null Key Default Extra
+i int(11) YES NULL
+connection: default (for show processlist)
+# both con1 and default exist
+show processlist;
+Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID>
+<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID>
+connection: default
+alter table t1 modify i bigint;;
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on table metadata: test.t1
+alter high_priority table t1 modify i bigint;;
+connection: default (for show processlist)
+show processlist;
+Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID>
+
+## Test parameters:
+## use_sys_var = 0;
+## con_block = con1
+## con_kill = default
+## cmd = alter table t1 rename t1_new;
+## high_priority_cmd = alter high_priority table t1 rename t1_new;
+## should_kill = 1
+## recreate_table = 1
+## throw_error = 1
+
+drop table if exists t1;
+create table t1 (i int);
+show create table t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `i` int(11) DEFAULT NULL
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+insert into t1 values (1), (2), (3);
+connection: con1
+begin; insert into t1 values (4); select i from t1;;
+i
+1
+2
+3
+4
+connection: default
+set lock_wait_timeout = 0.02;
+set high_priority_lock_wait_timeout = 0.02;
+describe t1;
+Field Type Null Key Default Extra
+i int(11) YES NULL
+connection: default (for show processlist)
+# both con1 and default exist
+show processlist;
+Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID>
+<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID>
+connection: default
+alter table t1 rename t1_new;;
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on table metadata: test.t1
+alter high_priority table t1 rename t1_new;;
+connection: default (for show processlist)
+show processlist;
+Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID>
+select * from t1_new;
+i
+1
+2
+3
+drop table t1_new;
+
+## Test parameters:
+## use_sys_var = 0;
+## con_block = con1
+## con_kill = default
+## cmd = drop table t1;
+## high_priority_cmd = drop high_priority table t1;
+## should_kill = 0
+## recreate_table = 1
+## throw_error = 1
+
+drop table if exists t1;
+create table t1 (i int);
+show create table t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `i` int(11) DEFAULT NULL
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+insert into t1 values (1), (2), (3);
+connection: con1
+lock tables t1 write;;
+connection: default
+set lock_wait_timeout = 0.02;
+set high_priority_lock_wait_timeout = 0.02;
+describe t1;
+Field Type Null Key Default Extra
+i int(11) YES NULL
+connection: default (for show processlist)
+# both con1 and default exist
+show processlist;
+Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID>
+<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID>
+connection: default
+drop table t1;;
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on table metadata: test.t1
+drop high_priority table t1;;
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on table metadata: test.t1
+connection: default (for show processlist)
+show processlist;
+Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID>
+<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID>
+
+## Test parameters:
+## use_sys_var = 0;
+## con_block = con1
+## con_kill = default
+## cmd = drop table t1;
+## high_priority_cmd = drop high_priority table t1;
+## should_kill = 1
+## recreate_table = 1
+## throw_error = 1
+
+drop table if exists t1;
+create table t1 (i int);
+show create table t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `i` int(11) DEFAULT NULL
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+insert into t1 values (1), (2), (3);
+connection: con1
+lock tables t1 read; begin; insert into t1 values (4);;
+connection: default
+set lock_wait_timeout = 0.02;
+set high_priority_lock_wait_timeout = 0.02;
+describe t1;
+Field Type Null Key Default Extra
+i int(11) YES NULL
+connection: default (for show processlist)
+# both con1 and default exist
+show processlist;
+Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID>
+<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID>
+connection: default
+drop table t1;;
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on table metadata: test.t1
+drop high_priority table t1;;
+connection: default (for show processlist)
+show processlist;
+Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID>
+
+## Test parameters:
+## use_sys_var = 0;
+## con_block = con1
+## con_kill = con2
+## cmd = alter table t1 modify i bigint;
+## high_priority_cmd = alter high_priority table t1 modify i bigint;
+## should_kill = 0
+## recreate_table = 1
+## throw_error = 1
+
+drop table if exists t1;
+create table t1 (i int);
+show create table t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `i` int(11) DEFAULT NULL
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+insert into t1 values (1), (2), (3);
+connection: con1
+lock tables t1 read;;
+connection: con2
+set lock_wait_timeout = 0.02;
+set high_priority_lock_wait_timeout = 0.02;
+describe t1;
+Field Type Null Key Default Extra
+i int(11) YES NULL
+connection: default (for show processlist)
+# both con1 and con2 exist
+show processlist;
+Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID>
+<Id> test_user2 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID>
+<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID>
+connection: con2
+alter table t1 modify i bigint;;
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on table metadata: test.t1
+alter high_priority table t1 modify i bigint;;
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on table metadata: test.t1
+connection: default (for show processlist)
+show processlist;
+Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID>
+<Id> test_user2 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID>
+<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID>
+
+## Test parameters:
+## use_sys_var = 0;
+## con_block = con1
+## con_kill = default
+## cmd = create index idx1 on t1 (i);
+## high_priority_cmd = create high_priority index idx1 on t1 (i);
+## should_kill = 1
+## recreate_table = 1
+## throw_error = 1
+
+drop table if exists t1;
+create table t1 (i int);
+show create table t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `i` int(11) DEFAULT NULL
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+insert into t1 values (1), (2), (3);
+connection: con1
+lock tables t1 read;;
+connection: default
+set lock_wait_timeout = 0.02;
+set high_priority_lock_wait_timeout = 0.02;
+describe t1;
+Field Type Null Key Default Extra
+i int(11) YES NULL
+connection: default (for show processlist)
+# both con1 and default exist
+show processlist;
+Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID>
+<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID>
+connection: default
+create index idx1 on t1 (i);;
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on table metadata: test.t1
+create high_priority index idx1 on t1 (i);;
+connection: default (for show processlist)
+show processlist;
+Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID>
+
+## Test parameters:
+## use_sys_var = 0;
+## con_block = con1
+## con_kill = default
+## cmd = drop index idx1 on t1;
+## high_priority_cmd = drop high_priority index idx1 on t1;
+## should_kill = 1
+## recreate_table = 0
+## throw_error = 1
+
+connection: con1
+lock tables t1 read;;
+connection: default
+set lock_wait_timeout = 0.02;
+set high_priority_lock_wait_timeout = 0.02;
+describe t1;
+Field Type Null Key Default Extra
+i int(11) YES MUL NULL
+connection: default (for show processlist)
+# both con1 and default exist
+show processlist;
+Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID>
+<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID>
+connection: default
+drop index idx1 on t1;;
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on table metadata: test.t1
+drop high_priority index idx1 on t1;;
+connection: default (for show processlist)
+show processlist;
+Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID>
+
+## Test parameters:
+## use_sys_var = 0;
+## con_block = con1
+## con_kill = default
+## cmd = truncate t1;
+## high_priority_cmd = truncate high_priority t1;
+## should_kill = 1
+## recreate_table = 1
+## throw_error = 1
+
+drop table if exists t1;
+create table t1 (i int);
+show create table t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `i` int(11) DEFAULT NULL
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+insert into t1 values (1), (2), (3);
+connection: con1
+lock tables t1 read;;
+connection: default
+set lock_wait_timeout = 0.02;
+set high_priority_lock_wait_timeout = 0.02;
+describe t1;
+Field Type Null Key Default Extra
+i int(11) YES NULL
+connection: default (for show processlist)
+# both con1 and default exist
+show processlist;
+Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID>
+<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID>
+connection: default
+truncate t1;;
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on table metadata: test.t1
+truncate high_priority t1;;
+connection: default (for show processlist)
+show processlist;
+Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID>
+
+## Test parameters:
+## use_sys_var = 0;
+## con_block = con1
+## con_kill = default
+## cmd = create trigger ins_sum before insert on t1 for each row set @sum = @sum + new.i;
+## high_priority_cmd = create high_priority trigger ins_sum before insert on t1 for each row set @sum = @sum + new.i;
+## should_kill = 1
+## recreate_table = 1
+## throw_error = 1
+
+drop table if exists t1;
+create table t1 (i int);
+show create table t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `i` int(11) DEFAULT NULL
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+insert into t1 values (1), (2), (3);
+connection: con1
+lock tables t1 read;;
+connection: default
+set lock_wait_timeout = 0.02;
+set high_priority_lock_wait_timeout = 0.02;
+describe t1;
+Field Type Null Key Default Extra
+i int(11) YES NULL
+connection: default (for show processlist)
+# both con1 and default exist
+show processlist;
+Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID>
+<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID>
+connection: default
+create trigger ins_sum before insert on t1 for each row set @sum = @sum + new.i;;
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on table metadata: test.t1
+create high_priority trigger ins_sum before insert on t1 for each row set @sum = @sum + new.i;;
+connection: default (for show processlist)
+show processlist;
+Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID>
+
+## Test parameters:
+## use_sys_var = 0;
+## con_block = con1
+## con_kill = default
+## cmd = drop trigger ins_sum;
+## high_priority_cmd = drop high_priority trigger ins_sum;
+## should_kill = 1
+## recreate_table = 0
+## throw_error = 1
+
+connection: con1
+lock tables t1 read;;
+connection: default
+set lock_wait_timeout = 0.02;
+set high_priority_lock_wait_timeout = 0.02;
+describe t1;
+Field Type Null Key Default Extra
+i int(11) YES NULL
+connection: default (for show processlist)
+# both con1 and default exist
+show processlist;
+Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID>
+<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID>
+connection: default
+drop trigger ins_sum;;
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on table metadata: test.t1
+drop high_priority trigger ins_sum;;
+connection: default (for show processlist)
+show processlist;
+Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID>
+
+## Test parameters:
+## use_sys_var = 0;
+## con_block = con1
+## con_kill = default
+## cmd = optimize table t1;
+## high_priority_cmd = optimize high_priority table t1;
+## should_kill = 1
+## recreate_table = 1
+## throw_error = 0
+
+drop table if exists t1;
+create table t1 (i int);
+show create table t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `i` int(11) DEFAULT NULL
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+insert into t1 values (1), (2), (3);
+connection: con1
+lock tables t1 read;;
+connection: default
+set lock_wait_timeout = 0.02;
+set high_priority_lock_wait_timeout = 0.02;
+describe t1;
+Field Type Null Key Default Extra
+i int(11) YES NULL
+connection: default (for show processlist)
+# both con1 and default exist
+show processlist;
+Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID>
+<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID>
+connection: default
+optimize table t1;;
+Table Op Msg_type Msg_text
+test.t1 optimize Error Lock wait timeout exceeded; try restarting transaction: Timeout on table metadata: test.t1
+test.t1 optimize status Operation failed
+optimize high_priority table t1;;
+Table Op Msg_type Msg_text
+test.t1 optimize status OK
+connection: default (for show processlist)
+show processlist;
+Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID>
+drop user test_user1@localhost;
+drop user test_user2@localhost;
+drop table if exists t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/deadlock.result b/storage/rocksdb/mysql-test/rocksdb/r/deadlock.result
new file mode 100644
index 00000000000..3e2f5709ca0
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/deadlock.result
@@ -0,0 +1,37 @@
+#
+# Validate that deadlock errors don't occur with a high level of concurrency
+#
+# Disable for valgrind because this takes too long
+DROP DATABASE IF EXISTS mysqlslap;
+CREATE DATABASE mysqlslap;
+USE mysqlslap;
+CREATE TABLE t1(id1 BIGINT, id2 BIGINT, count INT, PRIMARY KEY(id1, id2), KEY(id2)) ENGINE=rocksdb;
+CREATE TABLE t1rev(id1 BIGINT, id2 BIGINT, count INT, PRIMARY KEY(id1, id2) COMMENT "rev:cf2", KEY(id2) COMMENT "rev:cf2") ENGINE=rocksdb;
+SET @save = @@global.rocksdb_lock_wait_timeout;
+SET GLOBAL rocksdb_lock_wait_timeout = 60;
+SELECT count from t1;
+count
+50000
+SELECT count from t1;
+count
+100000
+SELECT count from t1;
+count
+150000
+SELECT count from t1;
+count
+200000
+SELECT count from t1rev;
+count
+50000
+SELECT count from t1rev;
+count
+100000
+SELECT count from t1rev;
+count
+150000
+SELECT count from t1rev;
+count
+200000
+SET GLOBAL rocksdb_lock_wait_timeout = @save;
+DROP DATABASE mysqlslap;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/deadlock_stats.result b/storage/rocksdb/mysql-test/rocksdb/r/deadlock_stats.result
new file mode 100644
index 00000000000..79cb6bb0f61
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/deadlock_stats.result
@@ -0,0 +1,14 @@
+set @prior_lock_wait_timeout = @@rocksdb_lock_wait_timeout; set @prior_deadlock_detect = @@rocksdb_deadlock_detect; set global rocksdb_deadlock_detect = on; set global rocksdb_lock_wait_timeout = 100000;;
+connect con1,localhost,root,,;
+connect con2,localhost,root,,;
+connection default;
+create table t (i int primary key) engine=rocksdb;
+insert into t values (1), (2), (3);
+#
+# The following is disabled due:
+# MDEV-13404: MyRocks upstream uses I_S.table_statistics.row_lock_deadlocks, should we import?
+#
+disconnect con1;
+disconnect con2;
+set global rocksdb_lock_wait_timeout = @prior_lock_wait_timeout; set global rocksdb_deadlock_detect = @prior_deadlock_detect;;
+drop table t;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/deadlock_tracking.result b/storage/rocksdb/mysql-test/rocksdb/r/deadlock_tracking.result
new file mode 100644
index 00000000000..fffae916c12
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/deadlock_tracking.result
@@ -0,0 +1,488 @@
+set @prior_lock_wait_timeout = @@rocksdb_lock_wait_timeout;
+set @prior_deadlock_detect = @@rocksdb_deadlock_detect;
+set @prior_max_latest_deadlocks = @@rocksdb_max_latest_deadlocks;
+set global rocksdb_deadlock_detect = on;
+set global rocksdb_lock_wait_timeout = 10000;
+# Clears deadlock buffer of any prior deadlocks.
+set global rocksdb_max_latest_deadlocks = 0;
+set global rocksdb_max_latest_deadlocks = @prior_max_latest_deadlocks;
+create table t (i int primary key) engine=rocksdb;
+insert into t values (1), (2), (3);
+show engine rocksdb transaction status;
+Type Name Status
+rocksdb
+============================================================
+TIMESTAMP ROCKSDB TRANSACTION MONITOR OUTPUT
+============================================================
+---------
+SNAPSHOTS
+---------
+LIST OF SNAPSHOTS FOR EACH SESSION:
+----------LATEST DETECTED DEADLOCKS----------
+-----------------------------------------
+END OF ROCKSDB TRANSACTION MONITOR OUTPUT
+=========================================
+
+Deadlock #1
+begin;
+select * from t where i=1 for update;
+i
+1
+begin;
+select * from t where i=2 for update;
+i
+2
+select * from t where i=2 for update;
+select * from t where i=1 for update;
+ERROR 40001: Deadlock found when trying to get lock; try restarting transaction
+rollback;
+i
+2
+rollback;
+show engine rocksdb transaction status;
+Type Name Status
+rocksdb
+============================================================
+TIMESTAMP ROCKSDB TRANSACTION MONITOR OUTPUT
+============================================================
+---------
+SNAPSHOTS
+---------
+LIST OF SNAPSHOTS FOR EACH SESSION:
+----------LATEST DETECTED DEADLOCKS----------
+
+*** DEADLOCK PATH
+=========================================
+TSTAMP
+TXN_ID
+COLUMN FAMILY NAME: default
+KEY
+LOCK TYPE: EXCLUSIVE
+INDEX NAME: PRIMARY
+TABLE NAME: test.t
+---------------WAITING FOR---------------
+TSTAMP
+TXN_ID
+COLUMN FAMILY NAME: default
+KEY
+LOCK TYPE: EXCLUSIVE
+INDEX NAME: PRIMARY
+TABLE NAME: test.t
+
+--------TXN_ID GOT DEADLOCK---------
+-----------------------------------------
+END OF ROCKSDB TRANSACTION MONITOR OUTPUT
+=========================================
+
+Deadlock #2
+begin;
+select * from t where i=1 for update;
+i
+1
+begin;
+select * from t where i=2 for update;
+i
+2
+select * from t where i=2 for update;
+select * from t where i=1 for update;
+ERROR 40001: Deadlock found when trying to get lock; try restarting transaction
+rollback;
+i
+2
+rollback;
+show engine rocksdb transaction status;
+Type Name Status
+rocksdb
+============================================================
+TIMESTAMP ROCKSDB TRANSACTION MONITOR OUTPUT
+============================================================
+---------
+SNAPSHOTS
+---------
+LIST OF SNAPSHOTS FOR EACH SESSION:
+----------LATEST DETECTED DEADLOCKS----------
+
+*** DEADLOCK PATH
+=========================================
+TSTAMP
+TXN_ID
+COLUMN FAMILY NAME: default
+KEY
+LOCK TYPE: EXCLUSIVE
+INDEX NAME: PRIMARY
+TABLE NAME: test.t
+---------------WAITING FOR---------------
+TSTAMP
+TXN_ID
+COLUMN FAMILY NAME: default
+KEY
+LOCK TYPE: EXCLUSIVE
+INDEX NAME: PRIMARY
+TABLE NAME: test.t
+
+--------TXN_ID GOT DEADLOCK---------
+
+*** DEADLOCK PATH
+=========================================
+TSTAMP
+TXN_ID
+COLUMN FAMILY NAME: default
+KEY
+LOCK TYPE: EXCLUSIVE
+INDEX NAME: PRIMARY
+TABLE NAME: test.t
+---------------WAITING FOR---------------
+TSTAMP
+TXN_ID
+COLUMN FAMILY NAME: default
+KEY
+LOCK TYPE: EXCLUSIVE
+INDEX NAME: PRIMARY
+TABLE NAME: test.t
+
+--------TXN_ID GOT DEADLOCK---------
+-----------------------------------------
+END OF ROCKSDB TRANSACTION MONITOR OUTPUT
+=========================================
+
+set global rocksdb_max_latest_deadlocks = 10;
+Deadlock #3
+begin;
+select * from t where i=1 for update;
+i
+1
+begin;
+select * from t where i=2 for update;
+i
+2
+select * from t where i=2 for update;
+select * from t where i=1 for update;
+ERROR 40001: Deadlock found when trying to get lock; try restarting transaction
+rollback;
+i
+2
+rollback;
+show engine rocksdb transaction status;
+Type Name Status
+rocksdb
+============================================================
+TIMESTAMP ROCKSDB TRANSACTION MONITOR OUTPUT
+============================================================
+---------
+SNAPSHOTS
+---------
+LIST OF SNAPSHOTS FOR EACH SESSION:
+----------LATEST DETECTED DEADLOCKS----------
+
+*** DEADLOCK PATH
+=========================================
+TSTAMP
+TXN_ID
+COLUMN FAMILY NAME: default
+KEY
+LOCK TYPE: EXCLUSIVE
+INDEX NAME: PRIMARY
+TABLE NAME: test.t
+---------------WAITING FOR---------------
+TSTAMP
+TXN_ID
+COLUMN FAMILY NAME: default
+KEY
+LOCK TYPE: EXCLUSIVE
+INDEX NAME: PRIMARY
+TABLE NAME: test.t
+
+--------TXN_ID GOT DEADLOCK---------
+
+*** DEADLOCK PATH
+=========================================
+TSTAMP
+TXN_ID
+COLUMN FAMILY NAME: default
+KEY
+LOCK TYPE: EXCLUSIVE
+INDEX NAME: PRIMARY
+TABLE NAME: test.t
+---------------WAITING FOR---------------
+TSTAMP
+TXN_ID
+COLUMN FAMILY NAME: default
+KEY
+LOCK TYPE: EXCLUSIVE
+INDEX NAME: PRIMARY
+TABLE NAME: test.t
+
+--------TXN_ID GOT DEADLOCK---------
+
+*** DEADLOCK PATH
+=========================================
+TSTAMP
+TXN_ID
+COLUMN FAMILY NAME: default
+KEY
+LOCK TYPE: EXCLUSIVE
+INDEX NAME: PRIMARY
+TABLE NAME: test.t
+---------------WAITING FOR---------------
+TSTAMP
+TXN_ID
+COLUMN FAMILY NAME: default
+KEY
+LOCK TYPE: EXCLUSIVE
+INDEX NAME: PRIMARY
+TABLE NAME: test.t
+
+--------TXN_ID GOT DEADLOCK---------
+-----------------------------------------
+END OF ROCKSDB TRANSACTION MONITOR OUTPUT
+=========================================
+
+set global rocksdb_max_latest_deadlocks = 1;
+show engine rocksdb transaction status;
+Type Name Status
+rocksdb
+============================================================
+TIMESTAMP ROCKSDB TRANSACTION MONITOR OUTPUT
+============================================================
+---------
+SNAPSHOTS
+---------
+LIST OF SNAPSHOTS FOR EACH SESSION:
+----------LATEST DETECTED DEADLOCKS----------
+
+*** DEADLOCK PATH
+=========================================
+TSTAMP
+TXN_ID
+COLUMN FAMILY NAME: default
+KEY
+LOCK TYPE: EXCLUSIVE
+INDEX NAME: PRIMARY
+TABLE NAME: test.t
+---------------WAITING FOR---------------
+TSTAMP
+TXN_ID
+COLUMN FAMILY NAME: default
+KEY
+LOCK TYPE: EXCLUSIVE
+INDEX NAME: PRIMARY
+TABLE NAME: test.t
+
+--------TXN_ID GOT DEADLOCK---------
+-----------------------------------------
+END OF ROCKSDB TRANSACTION MONITOR OUTPUT
+=========================================
+
+set rocksdb_deadlock_detect_depth = 2;
+Deadlock #4
+begin;
+select * from t where i=1 for update;
+i
+1
+begin;
+select * from t where i=2 for update;
+i
+2
+begin;
+select * from t where i=3 for update;
+i
+3
+select * from t where i=2 for update;
+select * from t where i=3 for update;
+select variable_value into @a from information_schema.global_status where variable_name='rocksdb_row_lock_deadlocks';
+select * from t where i=1 for update;
+ERROR 40001: Deadlock found when trying to get lock; try restarting transaction
+select case when variable_value-@a = 1 then 'true' else 'false' end as deadlocks from information_schema.global_status where variable_name='rocksdb_row_lock_deadlocks';
+deadlocks
+true
+rollback;
+i
+3
+rollback;
+i
+2
+rollback;
+set global rocksdb_max_latest_deadlocks = 5;
+show engine rocksdb transaction status;
+Type Name Status
+rocksdb
+============================================================
+TIMESTAMP ROCKSDB TRANSACTION MONITOR OUTPUT
+============================================================
+---------
+SNAPSHOTS
+---------
+LIST OF SNAPSHOTS FOR EACH SESSION:
+----------LATEST DETECTED DEADLOCKS----------
+
+-------DEADLOCK EXCEEDED MAX DEPTH-------
+-----------------------------------------
+END OF ROCKSDB TRANSACTION MONITOR OUTPUT
+=========================================
+
+Deadlock #5
+begin;
+select * from t where i=1 for update;
+i
+1
+begin;
+select * from t where i=2 for update;
+i
+2
+begin;
+select * from t where i=3 lock in share mode;
+i
+3
+select * from t where i=100 for update;
+i
+select * from t where i=101 for update;
+i
+select * from t where i=2 for update;
+select * from t where i=3 lock in share mode;
+i
+3
+select * from t where i=200 for update;
+i
+select * from t where i=201 for update;
+i
+select * from t where i=1 lock in share mode;
+ERROR 40001: Deadlock found when trying to get lock; try restarting transaction
+rollback;
+i
+2
+rollback;
+rollback;
+show engine rocksdb transaction status;
+Type Name Status
+rocksdb
+============================================================
+TIMESTAMP ROCKSDB TRANSACTION MONITOR OUTPUT
+============================================================
+---------
+SNAPSHOTS
+---------
+LIST OF SNAPSHOTS FOR EACH SESSION:
+----------LATEST DETECTED DEADLOCKS----------
+
+*** DEADLOCK PATH
+=========================================
+TSTAMP
+TXN_ID
+COLUMN FAMILY NAME: default
+KEY
+LOCK TYPE: EXCLUSIVE
+INDEX NAME: PRIMARY
+TABLE NAME: test.t
+---------------WAITING FOR---------------
+TSTAMP
+TXN_ID
+COLUMN FAMILY NAME: default
+KEY
+LOCK TYPE: SHARED
+INDEX NAME: PRIMARY
+TABLE NAME: test.t
+
+--------TXN_ID GOT DEADLOCK---------
+
+-------DEADLOCK EXCEEDED MAX DEPTH-------
+-----------------------------------------
+END OF ROCKSDB TRANSACTION MONITOR OUTPUT
+=========================================
+
+Deadlock #6
+create table t1 (id int primary key, value int) engine=rocksdb;
+insert into t1 values (1,1),(2,2),(3,3),(4,4),(5,5);
+begin;
+update t1 set value=value+100 where id=1;
+update t1 set value=value+100 where id=2;
+begin;
+update t1 set value=value+200 where id=3;
+update t1 set value=value+100 where id=3;
+update t1 set value=value+200 where id=1;
+ERROR 40001: Deadlock found when trying to get lock; try restarting transaction
+select * from t1;
+id value
+1 101
+2 102
+3 103
+4 4
+5 5
+drop table t1;
+set global rocksdb_lock_wait_timeout = @prior_lock_wait_timeout;
+set global rocksdb_deadlock_detect = @prior_deadlock_detect;
+drop table t;
+show engine rocksdb transaction status;
+Type Name Status
+rocksdb
+============================================================
+TIMESTAMP ROCKSDB TRANSACTION MONITOR OUTPUT
+============================================================
+---------
+SNAPSHOTS
+---------
+LIST OF SNAPSHOTS FOR EACH SESSION:
+----------LATEST DETECTED DEADLOCKS----------
+
+*** DEADLOCK PATH
+=========================================
+TSTAMP
+TXN_ID
+COLUMN FAMILY NAME: default
+KEY
+LOCK TYPE: EXCLUSIVE
+INDEX NAME: NOT FOUND; IDX_ID
+TABLE NAME: NOT FOUND; IDX_ID
+---------------WAITING FOR---------------
+TSTAMP
+TXN_ID
+COLUMN FAMILY NAME: default
+KEY
+LOCK TYPE: EXCLUSIVE
+INDEX NAME: NOT FOUND; IDX_ID
+TABLE NAME: NOT FOUND; IDX_ID
+
+--------TXN_ID GOT DEADLOCK---------
+
+*** DEADLOCK PATH
+=========================================
+TSTAMP
+TXN_ID
+COLUMN FAMILY NAME: default
+KEY
+LOCK TYPE: EXCLUSIVE
+INDEX NAME: NOT FOUND; IDX_ID
+TABLE NAME: NOT FOUND; IDX_ID
+---------------WAITING FOR---------------
+TSTAMP
+TXN_ID
+COLUMN FAMILY NAME: default
+KEY
+LOCK TYPE: SHARED
+INDEX NAME: NOT FOUND; IDX_ID
+TABLE NAME: NOT FOUND; IDX_ID
+
+--------TXN_ID GOT DEADLOCK---------
+
+-------DEADLOCK EXCEEDED MAX DEPTH-------
+-----------------------------------------
+END OF ROCKSDB TRANSACTION MONITOR OUTPUT
+=========================================
+
+set global rocksdb_max_latest_deadlocks = 0;
+# Clears deadlock buffer of any existent deadlocks.
+set global rocksdb_max_latest_deadlocks = @prior_max_latest_deadlocks;
+show engine rocksdb transaction status;
+Type Name Status
+rocksdb
+============================================================
+TIMESTAMP ROCKSDB TRANSACTION MONITOR OUTPUT
+============================================================
+---------
+SNAPSHOTS
+---------
+LIST OF SNAPSHOTS FOR EACH SESSION:
+----------LATEST DETECTED DEADLOCKS----------
+-----------------------------------------
+END OF ROCKSDB TRANSACTION MONITOR OUTPUT
+=========================================
+
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/delete.result b/storage/rocksdb/mysql-test/rocksdb/r/delete.result
new file mode 100644
index 00000000000..8ec3c50f466
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/delete.result
@@ -0,0 +1,166 @@
+DROP TABLE IF EXISTS t1,t2;
+CREATE TABLE t1 (a INT, b CHAR(8), pk INT AUTO_INCREMENT PRIMARY KEY) ENGINE=rocksdb;
+INSERT INTO t1 (a,b) VALUES (10000,'foobar'),(1,'a'),(2,'b'),(3,'c'),(4,'d'),(5,'e');
+INSERT INTO t1 (a,b) SELECT a, b FROM t1;
+DELETE FROM t1 WHERE b IN ('c');
+SELECT a,b FROM t1;
+a b
+1 a
+1 a
+10000 foobar
+10000 foobar
+2 b
+2 b
+4 d
+4 d
+5 e
+5 e
+DELETE FROM t1 WHERE a < 0 OR b = 'a';
+SELECT a,b FROM t1;
+a b
+10000 foobar
+10000 foobar
+2 b
+2 b
+4 d
+4 d
+5 e
+5 e
+DELETE FROM t1 WHERE a <= 4 ORDER BY b DESC LIMIT 1;
+SELECT a,b FROM t1;
+a b
+10000 foobar
+10000 foobar
+2 b
+2 b
+4 d
+5 e
+5 e
+CREATE TABLE t2 (c CHAR(8), d INT, pk INT AUTO_INCREMENT PRIMARY KEY) ENGINE=rocksdb;
+INSERT INTO t2 (c,d) SELECT b, a FROM t1;
+SELECT c,d FROM t2;
+c d
+b 2
+b 2
+d 4
+e 5
+e 5
+foobar 10000
+foobar 10000
+DELETE t2.* FROM t1, t2 WHERE c < b AND a + d != 1;
+SELECT a,b FROM t1;
+a b
+10000 foobar
+10000 foobar
+2 b
+2 b
+4 d
+5 e
+5 e
+SELECT c,d FROM t2;
+c d
+foobar 10000
+foobar 10000
+DELETE FROM t2, t1.* USING t2, t1 WHERE c = 'foobar' and b = c;
+SELECT a,b FROM t1;
+a b
+2 b
+2 b
+4 d
+5 e
+5 e
+SELECT c,d FROM t2;
+c d
+DELETE FROM t1;
+SELECT a,b FROM t1;
+a b
+DROP TABLE t1, t2;
+CREATE TABLE t1 (a INT, b CHAR(8), pk INT AUTO_INCREMENT PRIMARY KEY) ENGINE=rocksdb;
+INSERT INTO t1 (a,b) VALUES (1,'a'),(2,'b'),(3,'c'),(4,'d'),(5,'e'),(6,'f'),(7,'g'),(8,'h'),(10000,'foobar');
+INSERT INTO t1 (a,b) SELECT a, b FROM t1;
+BEGIN;
+DELETE FROM t1 WHERE b IN ('c');
+SELECT a,b FROM t1;
+a b
+1 a
+1 a
+10000 foobar
+10000 foobar
+2 b
+2 b
+4 d
+4 d
+5 e
+5 e
+6 f
+6 f
+7 g
+7 g
+8 h
+8 h
+DELETE FROM t1 WHERE a < 0 OR b = 'a';
+COMMIT;
+SELECT a,b FROM t1;
+a b
+10000 foobar
+10000 foobar
+2 b
+2 b
+4 d
+4 d
+5 e
+5 e
+6 f
+6 f
+7 g
+7 g
+8 h
+8 h
+BEGIN;
+DELETE FROM t1 WHERE a <= 4 ORDER BY b DESC LIMIT 1;
+SAVEPOINT spt1;
+DELETE FROM t1;
+RELEASE SAVEPOINT spt1;
+ROLLBACK;
+SELECT a,b FROM t1;
+a b
+10000 foobar
+10000 foobar
+2 b
+2 b
+4 d
+4 d
+5 e
+5 e
+6 f
+6 f
+7 g
+7 g
+8 h
+8 h
+BEGIN;
+DELETE FROM t1 WHERE a <= 4 ORDER BY b DESC LIMIT 1;
+SAVEPOINT spt1;
+DELETE FROM t1;
+INSERT INTO t1 (a,b) VALUES (1,'a');
+ROLLBACK TO SAVEPOINT spt1;
+ERROR HY000: MyRocks currently does not support ROLLBACK TO SAVEPOINT if modifying rows.
+COMMIT;
+ERROR HY000: This transaction was rolled back and cannot be committed. Only supported operation is to roll it back, so all pending changes will be discarded. Please restart another transaction.
+SELECT a,b FROM t1;
+a b
+10000 foobar
+10000 foobar
+2 b
+2 b
+4 d
+4 d
+5 e
+5 e
+6 f
+6 f
+7 g
+7 g
+8 h
+8 h
+DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/delete_ignore.result b/storage/rocksdb/mysql-test/rocksdb/r/delete_ignore.result
new file mode 100644
index 00000000000..f8ac42c4e72
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/delete_ignore.result
@@ -0,0 +1,59 @@
+DROP TABLE IF EXISTS t1,t2;
+CREATE TABLE t1 (a INT, b CHAR(8), pk INT AUTO_INCREMENT PRIMARY KEY) ENGINE=rocksdb;
+INSERT INTO t1 (a,b) VALUES (10000,'foobar'),(1,'a'),(2,'b'),(3,'c'),(4,'d'),(5,'e');
+INSERT INTO t1 (a,b) SELECT a, b FROM t1;
+CREATE TABLE t2 (pk INT AUTO_INCREMENT PRIMARY KEY, c CHAR(8), d INT) ENGINE=rocksdb;
+INSERT INTO t2 (c,d) SELECT b, a FROM t1;
+SELECT a,b FROM t1;
+a b
+1 a
+1 a
+10000 foobar
+10000 foobar
+2 b
+2 b
+3 c
+3 c
+4 d
+4 d
+5 e
+5 e
+SELECT c,d FROM t2;
+c d
+a 1
+a 1
+b 2
+b 2
+c 3
+c 3
+d 4
+d 4
+e 5
+e 5
+foobar 10000
+foobar 10000
+DELETE IGNORE FROM t1 WHERE b IS NOT NULL ORDER BY a LIMIT 1;
+SELECT a,b FROM t1;
+a b
+1 a
+10000 foobar
+10000 foobar
+2 b
+2 b
+3 c
+3 c
+4 d
+4 d
+5 e
+5 e
+DELETE IGNORE t1.*, t2.* FROM t1, t2 WHERE c < b OR a != ( SELECT 1 UNION SELECT 2 );
+Warnings:
+Warning 1242 Subquery returns more than 1 row
+SELECT a,b FROM t1;
+a b
+1 a
+SELECT c,d FROM t2;
+c d
+foobar 10000
+foobar 10000
+DROP TABLE t1, t2;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/delete_quick.result b/storage/rocksdb/mysql-test/rocksdb/r/delete_quick.result
new file mode 100644
index 00000000000..4173d875a82
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/delete_quick.result
@@ -0,0 +1,24 @@
+DROP TABLE IF EXISTS t1,t2;
+CREATE TABLE t1 (a INT, b CHAR(8), PRIMARY KEY (a)) ENGINE=rocksdb;
+INSERT INTO t1 (a,b) VALUES (1,'a'),(2,'b'),(3,'c'),(4,'d'),(5,'e');
+DELETE QUICK FROM t1 WHERE a = 1 OR b > 'foo';
+SELECT a,b FROM t1;
+a b
+2 b
+3 c
+4 d
+5 e
+CREATE TABLE t2 (c CHAR(8), d INT, PRIMARY KEY (c)) ENGINE=rocksdb;
+INSERT INTO t2 (c,d) SELECT b, a FROM t1;
+SELECT c,d FROM t2;
+c d
+b 2
+c 3
+d 4
+e 5
+DELETE QUICK FROM t2, t1.* USING t2, t1 WHERE c IS NULL OR a = d;
+SELECT a,b FROM t1;
+a b
+SELECT c,d FROM t2;
+c d
+DROP TABLE t1, t2;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/delete_with_keys.result b/storage/rocksdb/mysql-test/rocksdb/r/delete_with_keys.result
new file mode 100644
index 00000000000..c94708b872f
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/delete_with_keys.result
@@ -0,0 +1,38 @@
+DROP TABLE IF EXISTS t1;
+CREATE TABLE t1 (a INT, b CHAR(8), pk INT AUTO_INCREMENT PRIMARY KEY, KEY(b)) ENGINE=rocksdb;
+INSERT INTO t1 (a,b) VALUES (1,'a'),(2,'b'),(3,'c'),(4,'d'),(5,'e'),(6,'x'),(7,'y'),(8,'z');
+DELETE FROM t1 WHERE b > 'y';
+DELETE FROM t1 WHERE a=2;
+SELECT a,b FROM t1;
+a b
+1 a
+3 c
+4 d
+5 e
+6 x
+7 y
+DELETE FROM t1;
+DROP TABLE t1;
+CREATE TABLE t1 (a INT PRIMARY KEY, b CHAR(8)) ENGINE=rocksdb;
+INSERT INTO t1 (a,b) VALUES (1,'a'),(2,'b'),(3,'c'),(4,'d'),(5,'e'),(6,'x'),(7,'y'),(8,'z');
+DELETE FROM t1 WHERE b > 'y';
+DELETE FROM t1 WHERE a=2;
+SELECT a,b FROM t1;
+a b
+1 a
+3 c
+4 d
+5 e
+6 x
+7 y
+DELETE FROM t1;
+DROP TABLE t1;
+CREATE TABLE t1 (a INT, b INT, c INT, pk INT AUTO_INCREMENT PRIMARY KEY, KEY(a), KEY (b)) ENGINE=rocksdb;
+INSERT INTO t1 (a,b,c) VALUES (1,2,3),(4,5,6),(7,8,9);
+DELETE FROM t1 WHERE a = 10 OR b = 20 ORDER BY c LIMIT 1;
+SELECT a,b,c FROM t1;
+a b c
+1 2 3
+4 5 6
+7 8 9
+DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/describe.result b/storage/rocksdb/mysql-test/rocksdb/r/describe.result
new file mode 100644
index 00000000000..6d43f89c9bd
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/describe.result
@@ -0,0 +1,19 @@
+DROP TABLE IF EXISTS t1, t2, t3;
+CREATE TABLE t1 (a INT, b CHAR(8), PRIMARY KEY (a)) ENGINE=rocksdb;
+INSERT INTO t1 (a,b) VALUES (100,'foo'),(2, 'b');
+CREATE TABLE t2 (a INT, b CHAR(8), PRIMARY KEY (b)) ENGINE=rocksdb CHARACTER SET utf8;
+INSERT INTO t2 (a,b) VALUES (1, 'bar');
+CREATE TABLE t3 (a INT, b CHAR(8), pk INT AUTO_INCREMENT PRIMARY KEY) ENGINE=rocksdb CHARACTER SET utf8;
+DESCRIBE t1;
+Field Type Null Key Default Extra
+a int(11) NO PRI NULL
+b char(8) YES NULL
+DESC t2 a;
+Field Type Null Key Default Extra
+a int(11) YES NULL
+DESCRIBE t3 '%';
+Field Type Null Key Default Extra
+a int(11) YES NULL
+b char(8) YES NULL
+pk int(11) NO PRI NULL auto_increment
+DROP TABLE t1, t2, t3;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/drop_database.result b/storage/rocksdb/mysql-test/rocksdb/r/drop_database.result
new file mode 100644
index 00000000000..f220efe9f38
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/drop_database.result
@@ -0,0 +1,6 @@
+DROP DATABASE IF EXISTS test_drop_database;
+CREATE DATABASE test_drop_database;
+CREATE TABLE t1 (a int, b int, c int, primary key (a), unique key (b)) ENGINE=ROCKSDB;
+ALTER TABLE t1 DROP PRIMARY KEY, ADD PRIMARY KEY (a);
+DROP TABLE t1;
+DROP DATABASE test_drop_database;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/drop_index_inplace.result b/storage/rocksdb/mysql-test/rocksdb/r/drop_index_inplace.result
new file mode 100644
index 00000000000..dfa5c5b2590
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/drop_index_inplace.result
@@ -0,0 +1,154 @@
+drop table if exists t1;
+CREATE TABLE t1 (a INT, b INT AUTO_INCREMENT, KEY ka(a), KEY kb(a,b), PRIMARY KEY(b)) ENGINE=rocksdb;
+SHOW CREATE TABLE t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `a` int(11) DEFAULT NULL,
+ `b` int(11) NOT NULL AUTO_INCREMENT,
+ PRIMARY KEY (`b`),
+ KEY `ka` (`a`),
+ KEY `kb` (`a`,`b`)
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+INSERT INTO t1 (a) VALUES (1);
+INSERT INTO t1 (a) VALUES (3);
+INSERT INTO t1 (a) VALUES (5);
+ALTER TABLE t1 DROP INDEX ka, ALGORITHM=INPLACE;
+SHOW CREATE TABLE t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `a` int(11) DEFAULT NULL,
+ `b` int(11) NOT NULL AUTO_INCREMENT,
+ PRIMARY KEY (`b`),
+ KEY `kb` (`a`,`b`)
+) ENGINE=ROCKSDB AUTO_INCREMENT=4 DEFAULT CHARSET=latin1
+SELECT * FROM t1 FORCE INDEX(ka) where a > 1;
+ERROR 42000: Key 'ka' doesn't exist in table 't1'
+SELECT * FROM t1 FORCE INDEX(kb) where a > 1;
+a b
+3 2
+5 3
+SELECT * FROM t1 where b > 1;
+a b
+3 2
+5 3
+DROP TABLE t1;
+CREATE TABLE t1 (a INT AUTO_INCREMENT, b INT, c INT, KEY kb(b), KEY kbc(b,c), KEY kc(c), PRIMARY KEY(a)) ENGINE=rocksdb;
+SHOW CREATE TABLE t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `a` int(11) NOT NULL AUTO_INCREMENT,
+ `b` int(11) DEFAULT NULL,
+ `c` int(11) DEFAULT NULL,
+ PRIMARY KEY (`a`),
+ KEY `kb` (`b`),
+ KEY `kbc` (`b`,`c`),
+ KEY `kc` (`c`)
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+INSERT INTO t1 (b,c) VALUES (1,2);
+INSERT INTO t1 (b,c) VALUES (3,4);
+INSERT INTO t1 (b,c) VALUES (5,6);
+ALTER TABLE t1 DROP INDEX kb, DROP INDEX kbc, ALGORITHM=INPLACE;
+SHOW CREATE TABLE t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `a` int(11) NOT NULL AUTO_INCREMENT,
+ `b` int(11) DEFAULT NULL,
+ `c` int(11) DEFAULT NULL,
+ PRIMARY KEY (`a`),
+ KEY `kc` (`c`)
+) ENGINE=ROCKSDB AUTO_INCREMENT=4 DEFAULT CHARSET=latin1
+SHOW CREATE TABLE t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `a` int(11) NOT NULL AUTO_INCREMENT,
+ `b` int(11) DEFAULT NULL,
+ `c` int(11) DEFAULT NULL,
+ PRIMARY KEY (`a`),
+ KEY `kc` (`c`)
+) ENGINE=ROCKSDB AUTO_INCREMENT=4 DEFAULT CHARSET=latin1
+INSERT INTO t1 (b,c) VALUES (1,2);
+INSERT INTO t1 (b,c) VALUES (3,4);
+INSERT INTO t1 (b,c) VALUES (5,6);
+SELECT * FROM t1 FORCE INDEX(kc) where c > 3;
+a b c
+2 3 4
+3 5 6
+5 3 4
+6 5 6
+SELECT * FROM t1 where b > 3;
+a b c
+3 5 6
+6 5 6
+DROP TABLE t1;
+CREATE TABLE t1 (a INT, b INT, c INT, KEY kb(b), KEY kbc(b,c), KEY kc(c), PRIMARY KEY(a)) ENGINE=rocksdb;
+SHOW INDEX IN t1;
+Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment Index_comment
+t1 0 PRIMARY 1 a A 0 NULL NULL LSMTREE
+t1 1 kb 1 b A 0 NULL NULL YES LSMTREE
+t1 1 kbc 1 b A 0 NULL NULL YES LSMTREE
+t1 1 kbc 2 c A 0 NULL NULL YES LSMTREE
+t1 1 kc 1 c A 0 NULL NULL YES LSMTREE
+ALTER TABLE t1 DROP INDEX kb, DROP INDEX kbc, ALGORITHM=INPLACE;
+SHOW INDEX IN t1;
+Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment Index_comment
+t1 0 PRIMARY 1 a A 0 NULL NULL LSMTREE
+t1 1 kc 1 c A 0 NULL NULL YES LSMTREE
+ALTER TABLE t1 DROP PRIMARY KEY;
+SHOW INDEX IN t1;
+Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment Index_comment
+t1 1 kc 1 c A 0 NULL NULL YES LSMTREE
+ALTER TABLE t1 DROP INDEX kc, ALGORITHM=INPLACE;
+SHOW INDEX IN t1;
+Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment Index_comment
+DROP TABLE t1;
+CREATE TABLE t1 (a INT AUTO_INCREMENT, b INT, c INT, PRIMARY KEY(a)) ENGINE=rocksdb;
+ALTER TABLE t1 ADD UNIQUE INDEX kb(b);
+ALTER TABLE t1 ADD UNIQUE INDEX kbc(b,c);
+ALTER TABLE t1 ADD UNIQUE INDEX kc(c);
+SHOW INDEX IN t1;
+Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment Index_comment
+t1 0 PRIMARY 1 a A 0 NULL NULL LSMTREE
+t1 0 kb 1 b A 0 NULL NULL YES LSMTREE
+t1 0 kbc 1 b A 0 NULL NULL YES LSMTREE
+t1 0 kbc 2 c A 0 NULL NULL YES LSMTREE
+t1 0 kc 1 c A 0 NULL NULL YES LSMTREE
+ALTER TABLE t1 DROP INDEX kb, DROP INDEX kbc;
+SHOW INDEX IN t1;
+Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment Index_comment
+t1 0 PRIMARY 1 a A 0 NULL NULL LSMTREE
+t1 0 kc 1 c A 0 NULL NULL YES LSMTREE
+INSERT INTO t1 (b,c) VALUES (1,2);
+INSERT INTO t1 (b,c) VALUES (3,4);
+INSERT INTO t1 (b,c) VALUES (5,6);
+SELECT * FROM t1 FORCE INDEX(kc) where c > 3;
+a b c
+2 3 4
+3 5 6
+ALTER TABLE t1 DROP INDEX kc, ALGORITHM=INPLACE;
+SHOW CREATE TABLE t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `a` int(11) NOT NULL AUTO_INCREMENT,
+ `b` int(11) DEFAULT NULL,
+ `c` int(11) DEFAULT NULL,
+ PRIMARY KEY (`a`)
+) ENGINE=ROCKSDB AUTO_INCREMENT=4 DEFAULT CHARSET=latin1
+DROP TABLE t1;
+CREATE TABLE IF NOT EXISTS t1 (col1 INT, col2 INT, col3 INT);
+INSERT INTO t1 (col1,col2,col3) VALUES (1,2,3);
+ALTER TABLE t1 ADD KEY idx ( col1, col2 );
+ANALYZE TABLE t1;
+Table Op Msg_type Msg_text
+test.t1 analyze status OK
+ALTER TABLE t1 DROP COLUMN col2;
+ALTER TABLE t1 DROP COLUMN col3;
+DROP TABLE t1;
+CREATE TABLE IF NOT EXISTS t1 (col1 INT, col2 INT, col3 INT);
+INSERT INTO t1 (col1,col2,col3) VALUES (1,2,3);
+ALTER TABLE t1 ADD KEY idx ( col1, col2 );
+ANALYZE TABLE t1;
+Table Op Msg_type Msg_text
+test.t1 analyze status OK
+ALTER TABLE t1 DROP COLUMN col2;
+ALTER TABLE t1 DROP COLUMN col3;
+DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/drop_table.result b/storage/rocksdb/mysql-test/rocksdb/r/drop_table.result
new file mode 100644
index 00000000000..4d20242f43e
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/drop_table.result
@@ -0,0 +1,79 @@
+call mtr.add_suppression("Column family 'cf1' not found");
+call mtr.add_suppression("Column family 'rev:cf2' not found");
+DROP TABLE IF EXISTS t1;
+DROP TABLE IF EXISTS t2;
+DROP TABLE IF EXISTS t3;
+DROP TABLE IF EXISTS t4;
+DROP TABLE IF EXISTS t5;
+call mtr.add_suppression("Column family 'cf1' not found");
+call mtr.add_suppression("Column family 'rev:cf2' not found");
+call mtr.add_suppression("LibRocksDB");
+set global rocksdb_compact_cf = 'cf1';
+set global rocksdb_compact_cf = 'rev:cf2';
+set global rocksdb_signal_drop_index_thread = 1;
+CREATE TABLE t1 (
+a int not null,
+b int not null,
+primary key (a,b) comment 'cf1',
+key (b) comment 'rev:cf2'
+) ENGINE=RocksDB;
+CREATE TABLE t2 (
+a int not null,
+b int not null,
+primary key (a,b) comment 'cf1',
+key (b) comment 'rev:cf2'
+) ENGINE=RocksDB;
+CREATE TABLE t3 (
+a int not null,
+b int not null,
+primary key (a,b) comment 'cf1',
+key (b) comment 'rev:cf2'
+) ENGINE=RocksDB;
+CREATE TABLE t4 (
+a int not null,
+b int not null,
+primary key (a,b) comment 'cf1',
+key (b) comment 'rev:cf2'
+) ENGINE=RocksDB;
+DELETE FROM t1;
+DELETE FROM t2;
+DELETE FROM t3;
+DELETE FROM t4;
+drop table t2;
+DELETE FROM t1;
+DELETE FROM t4;
+drop table t3;
+DELETE FROM t1;
+DELETE FROM t4;
+SET GLOBAL rocksdb_max_manual_compactions = 2;
+SET GLOBAL rocksdb_debug_manual_compaction_delay = 3600;
+connect con1, localhost, root,,;
+connect con2, localhost, root,,;
+connect con3, localhost, root,,;
+connection con1;
+SET GLOBAL rocksdb_compact_cf='cf1';
+connection con2;
+SET GLOBAL rocksdb_compact_cf='rev:cf2';
+connection default;
+select * from information_schema.global_status where variable_name='rocksdb_manual_compactions_running';
+VARIABLE_NAME VARIABLE_VALUE
+ROCKSDB_MANUAL_COMPACTIONS_RUNNING 1
+connection con3;
+SET GLOBAL rocksdb_compact_cf='cf1';
+ERROR HY000: Internal error: Can't schedule more manual compactions. Increase rocksdb_max_manual_compactions or stop issuing more manual compactions.
+SET GLOBAL rocksdb_compact_cf='rev:cf2';
+ERROR HY000: Internal error: Can't schedule more manual compactions. Increase rocksdb_max_manual_compactions or stop issuing more manual compactions.
+connection default;
+drop table t4;
+CREATE TABLE t5 (
+a int not null,
+b int not null,
+primary key (a,b) comment 'cf1',
+key (b) comment 'rev:cf2'
+) ENGINE=RocksDB;
+DELETE FROM t5;
+drop table t5;
+set global rocksdb_compact_cf = 'cf1';
+set global rocksdb_compact_cf = 'rev:cf2';
+set global rocksdb_signal_drop_index_thread = 1;
+drop table t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/drop_table2.result b/storage/rocksdb/mysql-test/rocksdb/r/drop_table2.result
new file mode 100644
index 00000000000..fad2939d206
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/drop_table2.result
@@ -0,0 +1,63 @@
+call mtr.add_suppression("Column family 'cf1' not found");
+call mtr.add_suppression("Column family 'rev:cf2' not found");
+DROP TABLE IF EXISTS t1;
+DROP TABLE IF EXISTS t2;
+DROP TABLE IF EXISTS t3;
+DROP TABLE IF EXISTS t4;
+DROP TABLE IF EXISTS t5;
+call mtr.add_suppression("Column family 'cf1' not found");
+call mtr.add_suppression("Column family 'rev:cf2' not found");
+set global rocksdb_compact_cf = 'cf1';
+set global rocksdb_compact_cf = 'rev:cf2';
+set global rocksdb_signal_drop_index_thread = 1;
+CREATE TABLE t1 (
+a int not null,
+b int not null,
+primary key (a,b) comment 'cf1',
+key (b) comment 'rev:cf2'
+) ENGINE=RocksDB;
+CREATE TABLE t2 (
+a int not null,
+b int not null,
+primary key (a,b) comment 'cf1',
+key (b) comment 'rev:cf2'
+) ENGINE=RocksDB;
+CREATE TABLE t3 (
+a int not null,
+b int not null,
+primary key (a,b) comment 'cf1',
+key (b) comment 'rev:cf2'
+) ENGINE=RocksDB;
+CREATE TABLE t4 (
+a int not null,
+b int not null,
+primary key (a,b) comment 'cf1',
+key (b) comment 'rev:cf2'
+) ENGINE=RocksDB;
+DELETE FROM t1;
+DELETE FROM t2;
+DELETE FROM t3;
+DELETE FROM t4;
+DELETE FROM t1;
+DELETE FROM t4;
+DELETE FROM t1;
+DELETE FROM t4;
+CREATE TABLE t5 (
+a int not null,
+b int not null,
+primary key (a,b) comment 'cf1',
+key (b) comment 'rev:cf2'
+) ENGINE=RocksDB;
+DELETE FROM t5;
+set @@global.rocksdb_compact_cf = 'cf1';
+set @@global.rocksdb_compact_cf = 'rev:cf2';
+set @@global.rocksdb_compact_cf = 'default';
+drop table t1;
+drop table t2;
+drop table t3;
+drop table t4;
+drop table t5;
+set @@global.rocksdb_compact_cf = 'cf1';
+set @@global.rocksdb_compact_cf = 'rev:cf2';
+set @@global.rocksdb_compact_cf = 'default';
+Compacted
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/drop_table3.result b/storage/rocksdb/mysql-test/rocksdb/r/drop_table3.result
new file mode 100644
index 00000000000..7a33fa83cb4
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/drop_table3.result
@@ -0,0 +1,24 @@
+call mtr.add_suppression("Column family 'cf1' not found");
+call mtr.add_suppression("Column family 'rev:cf2' not found");
+DROP TABLE IF EXISTS t1;
+call mtr.add_suppression("Column family 'cf1' not found");
+call mtr.add_suppression("Column family 'rev:cf2' not found");
+set global rocksdb_compact_cf = 'cf1';
+set global rocksdb_compact_cf = 'rev:cf2';
+set global rocksdb_signal_drop_index_thread = 1;
+CREATE TABLE t1 (
+a int not null,
+b int not null,
+c varchar(500) not null,
+primary key (a,b) comment 'cf1',
+key (b) comment 'rev:cf2'
+) ENGINE=RocksDB;
+DELETE FROM t1;
+select variable_value into @a from information_schema.global_status where variable_name='rocksdb_compact_read_bytes';
+drop table t1;
+select case when variable_value-@a < 500000 then 'true' else 'false' end from information_schema.global_status where variable_name='rocksdb_compact_read_bytes';
+case when variable_value-@a < 500000 then 'true' else 'false' end
+true
+DROP TABLE IF EXISTS t1;
+Warnings:
+Note 1051 Unknown table 'test.t1'
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/dup_key_update.result b/storage/rocksdb/mysql-test/rocksdb/r/dup_key_update.result
new file mode 100644
index 00000000000..b4cebb08bb1
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/dup_key_update.result
@@ -0,0 +1,366 @@
+DROP TABLE IF EXISTS t1;
+DROP TABLE IF EXISTS t2;
+CREATE TABLE t1 (id1 INT, id2 INT, id3 INT,
+PRIMARY KEY (id1, id2, id3),
+UNIQUE KEY (id3, id1)) ENGINE=ROCKSDB;
+CREATE TABLE t2 (id1 INT, id2 INT, id3 INT,
+PRIMARY KEY (id1, id2, id3),
+UNIQUE KEY (id3, id1) COMMENT 'rev:cf') ENGINE=ROCKSDB;
+INSERT INTO t1 VALUES (1, 1, 1) ON DUPLICATE KEY UPDATE id2 = 9;
+SELECT * FROM t1 WHERE id1 = 1;
+id1 id2 id3
+1 1 1
+SELECT * FROM t1 FORCE INDEX (id3) WHERE id3 = 1;
+id1 id2 id3
+1 1 1
+INSERT INTO t1 VALUES (1, 1, 1) ON DUPLICATE KEY UPDATE id2 = 10;
+SELECT * FROM t1 WHERE id1 = 1;
+id1 id2 id3
+1 10 1
+SELECT * FROM t1 FORCE INDEX (id3) WHERE id3 = 1;
+id1 id2 id3
+1 10 1
+INSERT INTO t1 VALUES (1, 1, 1) ON DUPLICATE KEY UPDATE id2 = 11;
+SELECT * FROM t1 WHERE id1 = 1;
+id1 id2 id3
+1 11 1
+SELECT * FROM t1 FORCE INDEX (id3) WHERE id3 = 1;
+id1 id2 id3
+1 11 1
+INSERT INTO t1 VALUES (5, 5, 5) ON DUPLICATE KEY UPDATE id2 = 12;
+SELECT * FROM t1 WHERE id1 = 5;
+id1 id2 id3
+5 12 5
+SELECT * FROM t1 FORCE INDEX (id3) WHERE id3 = 5;
+id1 id2 id3
+5 12 5
+INSERT INTO t1 VALUES (5, 5, 5) ON DUPLICATE KEY UPDATE id2 = 13;
+SELECT * FROM t1 WHERE id1 = 5;
+id1 id2 id3
+5 13 5
+SELECT * FROM t1 FORCE INDEX (id3) WHERE id3 = 5;
+id1 id2 id3
+5 13 5
+INSERT INTO t1 VALUES (5, 5, 5) ON DUPLICATE KEY UPDATE id2 = 14;
+SELECT * FROM t1 WHERE id1 = 5;
+id1 id2 id3
+5 14 5
+SELECT * FROM t1 FORCE INDEX (id3) WHERE id3 = 5;
+id1 id2 id3
+5 14 5
+INSERT INTO t1 VALUES (9, 9, 9) ON DUPLICATE KEY UPDATE id2 = 15;
+SELECT * FROM t1 WHERE id1 = 9;
+id1 id2 id3
+9 15 9
+SELECT * FROM t1 FORCE INDEX (id3) WHERE id3 = 9;
+id1 id2 id3
+9 15 9
+INSERT INTO t1 VALUES (9, 9, 9) ON DUPLICATE KEY UPDATE id2 = 16;
+SELECT * FROM t1 WHERE id1 = 9;
+id1 id2 id3
+9 16 9
+SELECT * FROM t1 FORCE INDEX (id3) WHERE id3 = 9;
+id1 id2 id3
+9 16 9
+INSERT INTO t1 VALUES (9, 9, 9) ON DUPLICATE KEY UPDATE id2 = 17;
+SELECT * FROM t1 WHERE id1 = 9;
+id1 id2 id3
+9 17 9
+SELECT * FROM t1 FORCE INDEX (id3) WHERE id3 = 9;
+id1 id2 id3
+9 17 9
+SELECT * FROM t1;
+id1 id2 id3
+1 11 1
+2 2 2
+3 3 3
+4 4 4
+5 14 5
+6 6 6
+7 7 7
+8 8 8
+9 17 9
+SELECT * FROM t1 FORCE INDEX (id3);
+id1 id2 id3
+1 11 1
+2 2 2
+3 3 3
+4 4 4
+5 14 5
+6 6 6
+7 7 7
+8 8 8
+9 17 9
+INSERT INTO t2 VALUES (1, 1, 1) ON DUPLICATE KEY UPDATE id2 = 9;
+SELECT * FROM t2 WHERE id1 = 1;
+id1 id2 id3
+1 1 1
+SELECT * FROM t2 FORCE INDEX (id3) WHERE id3 = 1;
+id1 id2 id3
+1 1 1
+INSERT INTO t2 VALUES (1, 1, 1) ON DUPLICATE KEY UPDATE id2 = 10;
+SELECT * FROM t2 WHERE id1 = 1;
+id1 id2 id3
+1 10 1
+SELECT * FROM t2 FORCE INDEX (id3) WHERE id3 = 1;
+id1 id2 id3
+1 10 1
+INSERT INTO t2 VALUES (1, 1, 1) ON DUPLICATE KEY UPDATE id2 = 11;
+SELECT * FROM t2 WHERE id1 = 1;
+id1 id2 id3
+1 11 1
+SELECT * FROM t2 FORCE INDEX (id3) WHERE id3 = 1;
+id1 id2 id3
+1 11 1
+INSERT INTO t2 VALUES (5, 5, 5) ON DUPLICATE KEY UPDATE id2 = 12;
+SELECT * FROM t2 WHERE id1 = 5;
+id1 id2 id3
+5 12 5
+SELECT * FROM t2 FORCE INDEX (id3) WHERE id3 = 5;
+id1 id2 id3
+5 12 5
+INSERT INTO t2 VALUES (5, 5, 5) ON DUPLICATE KEY UPDATE id2 = 13;
+SELECT * FROM t2 WHERE id1 = 5;
+id1 id2 id3
+5 13 5
+SELECT * FROM t2 FORCE INDEX (id3) WHERE id3 = 5;
+id1 id2 id3
+5 13 5
+INSERT INTO t2 VALUES (5, 5, 5) ON DUPLICATE KEY UPDATE id2 = 14;
+SELECT * FROM t2 WHERE id1 = 5;
+id1 id2 id3
+5 14 5
+SELECT * FROM t2 FORCE INDEX (id3) WHERE id3 = 5;
+id1 id2 id3
+5 14 5
+INSERT INTO t2 VALUES (9, 9, 9) ON DUPLICATE KEY UPDATE id2 = 15;
+SELECT * FROM t2 WHERE id1 = 9;
+id1 id2 id3
+9 15 9
+SELECT * FROM t2 FORCE INDEX (id3) WHERE id3 = 9;
+id1 id2 id3
+9 15 9
+INSERT INTO t2 VALUES (9, 9, 9) ON DUPLICATE KEY UPDATE id2 = 16;
+SELECT * FROM t2 WHERE id1 = 9;
+id1 id2 id3
+9 16 9
+SELECT * FROM t2 FORCE INDEX (id3) WHERE id3 = 9;
+id1 id2 id3
+9 16 9
+INSERT INTO t2 VALUES (9, 9, 9) ON DUPLICATE KEY UPDATE id2 = 17;
+SELECT * FROM t2 WHERE id1 = 9;
+id1 id2 id3
+9 17 9
+SELECT * FROM t2 FORCE INDEX (id3) WHERE id3 = 9;
+id1 id2 id3
+9 17 9
+SELECT * FROM t2;
+id1 id2 id3
+1 11 1
+2 2 2
+3 3 3
+4 4 4
+5 14 5
+6 6 6
+7 7 7
+8 8 8
+9 17 9
+SELECT * FROM t2 FORCE INDEX (id3);
+id1 id2 id3
+1 11 1
+2 2 2
+3 3 3
+4 4 4
+5 14 5
+6 6 6
+7 7 7
+8 8 8
+9 17 9
+DROP TABLE t1;
+DROP TABLE t2;
+set global rocksdb_large_prefix=1;
+CREATE TABLE t1 (id1 varchar(128) CHARACTER SET latin1 COLLATE latin1_bin,
+id2 varchar(256) CHARACTER SET utf8 COLLATE utf8_bin,
+id3 varchar(200) CHARACTER SET latin1 COLLATE latin1_swedish_ci,
+PRIMARY KEY (id1, id2, id3),
+UNIQUE KEY (id3, id1)) ENGINE=ROCKSDB;
+set global rocksdb_large_prefix=DEFAULT;
+set global rocksdb_large_prefix=1;
+CREATE TABLE t2 (id1 varchar(128) CHARACTER SET latin1 COLLATE latin1_bin,
+id2 varchar(256) CHARACTER SET utf8 COLLATE utf8_bin,
+id3 varchar(200) CHARACTER SET latin1 COLLATE latin1_swedish_ci,
+PRIMARY KEY (id1, id2, id3),
+UNIQUE KEY (id3, id1) COMMENT 'rev:cf') ENGINE=ROCKSDB;
+set global rocksdb_large_prefix=DEFAULT;
+INSERT INTO t1 VALUES (1, 1, 1) ON DUPLICATE KEY UPDATE id2 = 9;
+SELECT * FROM t1 WHERE id1 = 1;
+id1 id2 id3
+1 1 1
+SELECT * FROM t1 FORCE INDEX (id3) WHERE id3 = 1;
+id1 id2 id3
+1 1 1
+INSERT INTO t1 VALUES (1, 1, 1) ON DUPLICATE KEY UPDATE id2 = 10;
+SELECT * FROM t1 WHERE id1 = 1;
+id1 id2 id3
+1 10 1
+SELECT * FROM t1 FORCE INDEX (id3) WHERE id3 = 1;
+id1 id2 id3
+1 10 1
+INSERT INTO t1 VALUES (1, 1, 1) ON DUPLICATE KEY UPDATE id2 = 11;
+SELECT * FROM t1 WHERE id1 = 1;
+id1 id2 id3
+1 11 1
+SELECT * FROM t1 FORCE INDEX (id3) WHERE id3 = 1;
+id1 id2 id3
+1 11 1
+INSERT INTO t1 VALUES (5, 5, 5) ON DUPLICATE KEY UPDATE id2 = 12;
+SELECT * FROM t1 WHERE id1 = 5;
+id1 id2 id3
+5 12 5
+SELECT * FROM t1 FORCE INDEX (id3) WHERE id3 = 5;
+id1 id2 id3
+5 12 5
+INSERT INTO t1 VALUES (5, 5, 5) ON DUPLICATE KEY UPDATE id2 = 13;
+SELECT * FROM t1 WHERE id1 = 5;
+id1 id2 id3
+5 13 5
+SELECT * FROM t1 FORCE INDEX (id3) WHERE id3 = 5;
+id1 id2 id3
+5 13 5
+INSERT INTO t1 VALUES (5, 5, 5) ON DUPLICATE KEY UPDATE id2 = 14;
+SELECT * FROM t1 WHERE id1 = 5;
+id1 id2 id3
+5 14 5
+SELECT * FROM t1 FORCE INDEX (id3) WHERE id3 = 5;
+id1 id2 id3
+5 14 5
+INSERT INTO t1 VALUES (9, 9, 9) ON DUPLICATE KEY UPDATE id2 = 15;
+SELECT * FROM t1 WHERE id1 = 9;
+id1 id2 id3
+9 15 9
+SELECT * FROM t1 FORCE INDEX (id3) WHERE id3 = 9;
+id1 id2 id3
+9 15 9
+INSERT INTO t1 VALUES (9, 9, 9) ON DUPLICATE KEY UPDATE id2 = 16;
+SELECT * FROM t1 WHERE id1 = 9;
+id1 id2 id3
+9 16 9
+SELECT * FROM t1 FORCE INDEX (id3) WHERE id3 = 9;
+id1 id2 id3
+9 16 9
+INSERT INTO t1 VALUES (9, 9, 9) ON DUPLICATE KEY UPDATE id2 = 17;
+SELECT * FROM t1 WHERE id1 = 9;
+id1 id2 id3
+9 17 9
+SELECT * FROM t1 FORCE INDEX (id3) WHERE id3 = 9;
+id1 id2 id3
+9 17 9
+SELECT * FROM t1;
+id1 id2 id3
+1 11 1
+2 2 2
+3 3 3
+4 4 4
+5 14 5
+6 6 6
+7 7 7
+8 8 8
+9 17 9
+SELECT * FROM t1 FORCE INDEX (id3);
+id1 id2 id3
+1 11 1
+2 2 2
+3 3 3
+4 4 4
+5 14 5
+6 6 6
+7 7 7
+8 8 8
+9 17 9
+INSERT INTO t2 VALUES (1, 1, 1) ON DUPLICATE KEY UPDATE id2 = 9;
+SELECT * FROM t2 WHERE id1 = 1;
+id1 id2 id3
+1 1 1
+SELECT * FROM t2 FORCE INDEX (id3) WHERE id3 = 1;
+id1 id2 id3
+1 1 1
+INSERT INTO t2 VALUES (1, 1, 1) ON DUPLICATE KEY UPDATE id2 = 10;
+SELECT * FROM t2 WHERE id1 = 1;
+id1 id2 id3
+1 10 1
+SELECT * FROM t2 FORCE INDEX (id3) WHERE id3 = 1;
+id1 id2 id3
+1 10 1
+INSERT INTO t2 VALUES (1, 1, 1) ON DUPLICATE KEY UPDATE id2 = 11;
+SELECT * FROM t2 WHERE id1 = 1;
+id1 id2 id3
+1 11 1
+SELECT * FROM t2 FORCE INDEX (id3) WHERE id3 = 1;
+id1 id2 id3
+1 11 1
+INSERT INTO t2 VALUES (5, 5, 5) ON DUPLICATE KEY UPDATE id2 = 12;
+SELECT * FROM t2 WHERE id1 = 5;
+id1 id2 id3
+5 12 5
+SELECT * FROM t2 FORCE INDEX (id3) WHERE id3 = 5;
+id1 id2 id3
+5 12 5
+INSERT INTO t2 VALUES (5, 5, 5) ON DUPLICATE KEY UPDATE id2 = 13;
+SELECT * FROM t2 WHERE id1 = 5;
+id1 id2 id3
+5 13 5
+SELECT * FROM t2 FORCE INDEX (id3) WHERE id3 = 5;
+id1 id2 id3
+5 13 5
+INSERT INTO t2 VALUES (5, 5, 5) ON DUPLICATE KEY UPDATE id2 = 14;
+SELECT * FROM t2 WHERE id1 = 5;
+id1 id2 id3
+5 14 5
+SELECT * FROM t2 FORCE INDEX (id3) WHERE id3 = 5;
+id1 id2 id3
+5 14 5
+INSERT INTO t2 VALUES (9, 9, 9) ON DUPLICATE KEY UPDATE id2 = 15;
+SELECT * FROM t2 WHERE id1 = 9;
+id1 id2 id3
+9 15 9
+SELECT * FROM t2 FORCE INDEX (id3) WHERE id3 = 9;
+id1 id2 id3
+9 15 9
+INSERT INTO t2 VALUES (9, 9, 9) ON DUPLICATE KEY UPDATE id2 = 16;
+SELECT * FROM t2 WHERE id1 = 9;
+id1 id2 id3
+9 16 9
+SELECT * FROM t2 FORCE INDEX (id3) WHERE id3 = 9;
+id1 id2 id3
+9 16 9
+INSERT INTO t2 VALUES (9, 9, 9) ON DUPLICATE KEY UPDATE id2 = 17;
+SELECT * FROM t2 WHERE id1 = 9;
+id1 id2 id3
+9 17 9
+SELECT * FROM t2 FORCE INDEX (id3) WHERE id3 = 9;
+id1 id2 id3
+9 17 9
+SELECT * FROM t2;
+id1 id2 id3
+1 11 1
+2 2 2
+3 3 3
+4 4 4
+5 14 5
+6 6 6
+7 7 7
+8 8 8
+9 17 9
+SELECT * FROM t2 FORCE INDEX (id3);
+id1 id2 id3
+1 11 1
+2 2 2
+3 3 3
+4 4 4
+5 14 5
+6 6 6
+7 7 7
+8 8 8
+9 17 9
+DROP TABLE t1;
+DROP TABLE t2;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/duplicate_table.result b/storage/rocksdb/mysql-test/rocksdb/r/duplicate_table.result
new file mode 100644
index 00000000000..ba16aaa6d35
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/duplicate_table.result
@@ -0,0 +1,15 @@
+DROP TABLE IF EXISTS t;
+CREATE TABLE t(id int primary key) engine=rocksdb;
+INSERT INTO t values (1), (2), (3);
+CREATE TABLE t(id int primary key) engine=rocksdb;
+ERROR 42S01: Table 't' already exists
+FLUSH TABLES;
+CREATE TABLE t(id int primary key) engine=rocksdb;
+ERROR HY000: Table 'test.t' does not exist, but metadata information exists inside MyRocks. This is a sign of data inconsistency. Please check if './test/t.frm' exists, and try to restore it if it does not exist.
+FLUSH TABLES;
+SELECT * FROM t;
+id
+1
+2
+3
+DROP TABLE t;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/explicit_snapshot.result b/storage/rocksdb/mysql-test/rocksdb/r/explicit_snapshot.result
new file mode 100644
index 00000000000..14f5ef65c59
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/explicit_snapshot.result
@@ -0,0 +1,265 @@
+DROP TABLE IF EXISTS t1;
+CREATE TABLE T1 (a INT PRIMARY KEY AUTO_INCREMENT) ENGINE=ROCKSDB;
+INSERT INTO T1 VALUES();
+"con1: Creating explict snapshot"
+SELECT * FROM T1;
+a
+1
+"con2: Inserting a row"
+INSERT INTO T1 VALUES();
+SELECT * FROM T1;
+a
+1
+2
+"con2: Attaching snapshot id 1"
+ATTACH EXPLICIT ROCKSDB SNAPSHOT 1;
+"con2: New row should not be visible"
+SELECT * FROM T1;
+a
+1
+"con2: Releasing snapshot"
+RELEASE EXPLICIT ROCKSDB SNAPSHOT;
+"con2: New row should be visible"
+SELECT * FROM T1;
+a
+1
+2
+"con1: New row should not be visible"
+SELECT * FROM T1;
+a
+1
+"con1: Releasing snapshot"
+RELEASE EXPLICIT ROCKSDB SNAPSHOT;
+"con1: New row should be visible"
+SELECT * FROM T1;
+a
+1
+2
+"con1: Starting shared snapshot"
+SELECT * FROM T1;
+a
+1
+2
+"con2: Inserting a row"
+INSERT INTO T1 VALUES();
+SELECT * FROM T1;
+a
+1
+2
+3
+"con2: Starting existing snapshot"
+START TRANSACTION WITH EXISTING ROCKSDB SNAPSHOT 2;
+"con2: New row should not be visible"
+SELECT * FROM T1;
+a
+1
+2
+COMMIT;
+"con2: New row should be visible"
+SELECT * FROM T1;
+a
+1
+2
+3
+COMMIT;
+"con1: New row should be visible"
+SELECT * FROM T1;
+a
+1
+2
+3
+"con1: Creating explict snapshot"
+"con2: Trying to insert row"
+INSERT INTO T1 VALUES();
+ERROR HY000: Can't execute updates when an explicit snapshot is associated with the connection using CREATE|ATTACH EXPLICIT [ENGINE] SNAPSHOT
+"con2: Attaching existing snapshot"
+ATTACH EXPLICIT ROCKSDB SNAPSHOT 3;
+"con2: Trying to insert row"
+INSERT INTO T1 VALUES();
+ERROR HY000: Can't execute updates when an explicit snapshot is associated with the connection using CREATE|ATTACH EXPLICIT [ENGINE] SNAPSHOT
+RELEASE EXPLICIT ROCKSDB SNAPSHOT;
+RELEASE EXPLICIT ROCKSDB SNAPSHOT;
+"con1: Starting shared snapshot"
+"con1: Trying to insert row"
+INSERT INTO T1 VALUES();
+ERROR HY000: Can't execute updates when you started a transaction with START TRANSACTION WITH CONSISTENT|SHARED|EXISTING [ROCKSDB] SNAPSHOT.
+"con2: Starting existing snapshot"
+START TRANSACTION WITH EXISTING ROCKSDB SNAPSHOT 4;
+"con2: Trying to insert row"
+INSERT INTO T1 VALUES();
+ERROR HY000: Can't execute updates when you started a transaction with START TRANSACTION WITH CONSISTENT|SHARED|EXISTING [ROCKSDB] SNAPSHOT.
+COMMIT;
+COMMIT;
+"con1: Creating explicit snapshot"
+CREATE EXPLICIT ROCKSDB SNAPSHOT;
+SELECT * FROM T1;
+a
+1
+2
+3
+"con2: Inserting a row"
+INSERT INTO T1 VALUES();
+SELECT * FROM T1;
+a
+1
+2
+3
+4
+"con1: New row should not be seen"
+SELECT * FROM T1;
+a
+1
+2
+3
+"con1: Creating another explicit snapshot"
+CREATE EXPLICIT ROCKSDB SNAPSHOT;
+"con1: Now the new row should be seen"
+SELECT * FROM T1;
+a
+1
+2
+3
+4
+"con1: Starting transaction with consistent snapshot"
+START TRANSACTION WITH CONSISTENT ROCKSDB SNAPSHOT;
+SELECT * FROM T1;
+a
+1
+2
+3
+4
+"con2: Inserting a row"
+INSERT INTO T1 VALUES();
+SELECT * FROM T1;
+a
+1
+2
+3
+4
+5
+"con1: The new row should not be seen"
+SELECT * FROM T1;
+a
+1
+2
+3
+4
+"con1: Creating another explicit snapshot"
+CREATE EXPLICIT ROCKSDB SNAPSHOT;
+"con1: The new row should still not be seen"
+SELECT * FROM T1;
+a
+1
+2
+3
+4
+"con1: Committing trx"
+COMMIT;
+"con1: The new row should now be seen because of the new explicit snapshot created above"
+SELECT * FROM T1;
+a
+1
+2
+3
+4
+5
+"con1: Releasing explicit snapshot"
+RELEASE EXPLICIT ROCKSDB SNAPSHOT;
+"con1: Starting transaction with shared snapshot"
+START TRANSACTION WITH SHARED ROCKSDB SNAPSHOT;
+SELECT * FROM T1;
+a
+1
+2
+3
+4
+5
+"con2: Inserting a row"
+INSERT INTO T1 VALUES();
+SELECT * FROM T1;
+a
+1
+2
+3
+4
+5
+6
+"con1: The new row should not be seen"
+SELECT * FROM T1;
+a
+1
+2
+3
+4
+5
+"con1: Starting another transaction with shared snapshot"
+START TRANSACTION WITH SHARED ROCKSDB SNAPSHOT;
+"con1: The new row should now be seen"
+SELECT * FROM T1;
+a
+1
+2
+3
+4
+5
+6
+COMMIT;
+"con1: Creating explicit snapshot"
+CREATE EXPLICIT ROCKSDB SNAPSHOT;
+SELECT * FROM T1;
+a
+1
+2
+3
+4
+5
+6
+"con1: Releasing explicit snapshot"
+RELEASE EXPLICIT ROCKSDB SNAPSHOT;
+"con1: Releasing explicit snapshot again"
+RELEASE EXPLICIT ROCKSDB SNAPSHOT;
+ERROR HY000: Cannot process explicit snapshot
+"con1: Starting transaction with shared snapshot"
+START TRANSACTION WITH SHARED ROCKSDB SNAPSHOT;
+SELECT * FROM T1;
+a
+1
+2
+3
+4
+5
+6
+"con2: Inserting a row"
+INSERT INTO T1 VALUES();
+SELECT * FROM T1;
+a
+1
+2
+3
+4
+5
+6
+7
+"con1: Creating explicit snapshot"
+CREATE EXPLICIT ROCKSDB SNAPSHOT;
+SELECT * FROM T1;
+a
+1
+2
+3
+4
+5
+6
+"con1: Releasing explicit snapshot"
+RELEASE EXPLICIT ROCKSDB SNAPSHOT;
+"con1: The new row should not be seen"
+SELECT* FROM T1;
+a
+1
+2
+3
+4
+5
+6
+COMMIT;
+DROP TABLE T1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/fail_system_cf.result b/storage/rocksdb/mysql-test/rocksdb/r/fail_system_cf.result
new file mode 100644
index 00000000000..df90f2b3670
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/fail_system_cf.result
@@ -0,0 +1,4 @@
+DROP TABLE IF EXISTS t1;
+CREATE TABLE t1 (i INT, PRIMARY KEY (i) COMMENT '__system__') ENGINE = ROCKSDB;
+ERROR HY000: Incorrect arguments to column family not valid for storing index data.
+DROP TABLE IF EXISTS t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/fast_prefix_index_fetch.result b/storage/rocksdb/mysql-test/rocksdb/r/fast_prefix_index_fetch.result
new file mode 100644
index 00000000000..963f9706ee8
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/fast_prefix_index_fetch.result
@@ -0,0 +1,80 @@
+DROP TABLE IF EXISTS t1;
+CREATE TABLE t1 (
+id INT,
+fake_id INT,
+bigfield VARCHAR(4096),
+PRIMARY KEY (id),
+KEY bf (bigfield(32)),
+KEY fid (fake_id, bigfield(32))
+) ENGINE=rocksdb;
+INSERT INTO t1 VALUES (1, 1001, REPEAT('a', 1)),
+(8, 1008, REPEAT('b', 8)),
+(24, 1024, REPEAT('c', 24)),
+(31, 1031, REPEAT('d', 31)),
+(32, 1032, REPEAT('x', 32)),
+(33, 1033, REPEAT('y', 33)),
+(128, 1128, REPEAT('z', 128));
+SELECT * FROM t1;
+id fake_id bigfield
+1 1001 a
+8 1008 bbbbbbbb
+24 1024 cccccccccccccccccccccccc
+31 1031 ddddddddddddddddddddddddddddddd
+32 1032 xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+33 1033 yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy
+128 1128 zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz
+# Baseline sanity check
+no-op query
+no-op query
+include/assert.inc [Baseline sanity check: 0 rocksdb_covered_secondary_key_lookups]
+# Eligible for optimization.
+id bigfield
+31 ddddddddddddddddddddddddddddddd
+include/assert.inc [Eligible for optimization.: 2 rocksdb_covered_secondary_key_lookups]
+# Eligible for optimization, access via fake_id only
+id bigfield
+31 ddddddddddddddddddddddddddddddd
+include/assert.inc [Eligible for optimization, access via fake_id only: 2 rocksdb_covered_secondary_key_lookups]
+# Not eligible for optimization, access via fake_id of big row.
+id bigfield
+33 yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy
+include/assert.inc [Not eligible for optimization, access via fake_id of big row.: 0 rocksdb_covered_secondary_key_lookups]
+# Eligible for optimization.
+id bigfield
+32 xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+include/assert.inc [Eligible for optimization.: 1 rocksdb_covered_secondary_key_lookups]
+# Not eligible for optimization.
+id bigfield
+33 yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy
+include/assert.inc [Not eligible for optimization.: 0 rocksdb_covered_secondary_key_lookups]
+# Eligible for optimization.
+id bigfield
+8 bbbbbbbb
+include/assert.inc [Eligible for optimization.: 2 rocksdb_covered_secondary_key_lookups]
+# Eligible for optimization.
+id bigfield
+24 cccccccccccccccccccccccc
+include/assert.inc [Eligible for optimization.: 2 rocksdb_covered_secondary_key_lookups]
+# Not eligible for optimization.
+id bigfield
+128 zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz
+include/assert.inc [Not eligible for optimization.: 0 rocksdb_covered_secondary_key_lookups]
+#
+# Test that multi-byte charsets are handled correctly
+#
+# Charset record obviously shorter than the prefix
+a b
+1 a
+include/assert.inc [Charset record obviously shorter than the prefix: 2 rocksdb_covered_secondary_key_lookups]
+# Charset record shorter than prefix
+a b
+2 cc
+include/assert.inc [Charset record shorter than prefix: 2 rocksdb_covered_secondary_key_lookups]
+# Charset record with glyphs shorter than prefix
+a b
+3 ŽŽ
+include/assert.inc [Charset record with glyphs shorter than prefix: 1 rocksdb_covered_secondary_key_lookups]
+# Charset record longer than prefix
+a b
+4 žžžž
+include/assert.inc [Charset record longer than prefix: 0 rocksdb_covered_secondary_key_lookups]
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/force_shutdown.result b/storage/rocksdb/mysql-test/rocksdb/r/force_shutdown.result
new file mode 100644
index 00000000000..4386ad590ae
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/force_shutdown.result
@@ -0,0 +1,38 @@
+create table t1 (
+pk int not null primary key,
+col1 varchar(10)
+) engine=rocksdb;
+insert into t1 values (1,1),(2,2),(3,3);
+set session debug= "+d,myrocks_busy_loop_on_row_read";
+select * from t1 where pk=1;
+# testing unclean shutdown on stuck instance
+# Run shutdown sql command with forcing kill (exit code 127)
+shutdown 1;
+Got one of the listed errors
+# verifying exit code is printed
+# restart the server
+shutdown 230;
+Got one of the listed errors
+# restart the server
+# verifying SHUTDOWN is refused if exit code > 255
+SHUTDOWN 256;
+ERROR HY000: exit code must be 0..255
+SHUTDOWN 10000;
+ERROR HY000: exit code must be 0..255
+# verifying SHUTDOWN is refused if instances are not read only
+SHUTDOWN 0 read_only;
+ERROR HY000: Only read_only instance can be killed.
+SHUTDOWN 127 read_only;
+ERROR HY000: Only read_only instance can be killed.
+SHUTDOWN 127;
+Got one of the listed errors
+# restart the server
+set session debug= "+d,myrocks_busy_loop_on_row_read";
+select * from t1 where pk=1;
+SET GLOBAL read_only=1;
+# verifying SHUTDOWN read_only works with read_only instance
+# Run shutdown sql command with forcing kill (exit code 127)
+shutdown 255 read_only;
+Got one of the listed errors
+# restart the server
+drop table t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/foreign_key.result b/storage/rocksdb/mysql-test/rocksdb/r/foreign_key.result
new file mode 100644
index 00000000000..fa3809e9758
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/foreign_key.result
@@ -0,0 +1,25 @@
+DROP TABLE IF EXISTS t1, t2;
+CREATE TABLE t1 (b INT PRIMARY KEY);
+CREATE TABLE t2 (a INT NOT NULL, b INT NOT NULL, FOREIGN KEY (b) REFERENCES t1(b));
+ERROR 42000: This version of MariaDB doesn't yet support 'FOREIGN KEY for the RocksDB storage engine'
+CREATE TABLE t2 (a INT NOT NULL, bforeign INT NOT NULL);
+DROP TABLE t2;
+CREATE TABLE t2 (a INT NOT NULL, foreignkey INT NOT NULL);
+DROP TABLE t2;
+CREATE TABLE t2 (a INT NOT NULL, bforeign INT not null, FOREIGN KEY (bforeign) REFERENCES t1(b));
+ERROR 42000: This version of MariaDB doesn't yet support 'FOREIGN KEY for the RocksDB storage engine'
+CREATE TABLE t2 (a INT NOT NULL, b INT NOT NULL);
+ALTER TABLE t2 ADD FOREIGN KEY (b) REFERENCES t1(b);
+ERROR 42000: This version of MariaDB doesn't yet support 'FOREIGN KEY for the RocksDB storage engine'
+DROP TABLE t2;
+CREATE TABLE t2 (a INT NOT NULL);
+ALTER TABLE t2 ADD bforeign INT NOT NULL;
+DROP TABLE t2;
+CREATE TABLE t2 (a INT NOT NULL);
+ALTER TABLE t2 ADD foreignkey INT NOT NULL;
+DROP TABLE t2;
+CREATE TABLE t2 (a INT NOT NULL);
+ALTER TABLE t2 ADD bforeign INT NOT NULL, ADD FOREIGN KEY (bforeign) REFERENCES t1(b);
+ERROR 42000: This version of MariaDB doesn't yet support 'FOREIGN KEY for the RocksDB storage engine'
+DROP TABLE t2;
+DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/gap_lock_issue254.result b/storage/rocksdb/mysql-test/rocksdb/r/gap_lock_issue254.result
new file mode 100644
index 00000000000..d42041183c8
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/gap_lock_issue254.result
@@ -0,0 +1,9 @@
+create table t (id int primary key, value int);
+begin;
+update t set value=100 where id in (1, 2);
+commit;
+begin;
+select * from t for update;
+ERROR HY000: Using Gap Lock without full unique key in multi-table or multi-statement transactions is not allowed. You need either 1: Execute 'SET SESSION gap_lock_raise_error=0' if you are sure that your application does not rely on Gap Lock. 2: Rewrite queries to use all unique key columns in WHERE equal conditions. 3: Rewrite to single-table, single-statement transaction. Query: select * from t for update
+commit;
+drop table t;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/gap_lock_raise_error.result b/storage/rocksdb/mysql-test/rocksdb/r/gap_lock_raise_error.result
new file mode 100644
index 00000000000..c1cf1e77ecf
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/gap_lock_raise_error.result
@@ -0,0 +1,504 @@
+drop table if exists gap1,gap2,gap3;
+CREATE DATABASE mysqlslap;
+CREATE TABLE gap1 (id1 INT, id2 INT, id3 INT, c1 INT, value INT,
+PRIMARY KEY (id1, id2, id3),
+INDEX i (c1)) ENGINE=rocksdb;
+CREATE TABLE gap2 like gap1;
+CREATE TABLE gap3 (id INT, value INT,
+PRIMARY KEY (id),
+UNIQUE KEY ui(value)) ENGINE=rocksdb;
+insert into gap3 values (1,1), (2,2),(3,3),(4,4),(5,5);
+create table gap4 (
+pk int primary key,
+a int,
+b int,
+key(a)
+) ENGINE=rocksdb;
+insert into gap4 values (1,1,1), (2,2,2), (3,3,3), (4,4,4);
+create table gap5 like gap4;
+insert into gap5 values (1,1,1), (2,2,2), (3,3,3), (4,4,4);
+set session gap_lock_raise_error=1;
+set session gap_lock_write_log=1;
+set @save_gap_lock_write_log = @@gap_lock_write_log;
+set @save_gap_lock_raise_error = @@gap_lock_raise_error;
+set gap_lock_write_log = 1;
+set gap_lock_raise_error = 0;
+begin;
+update gap4 set a= (select 1+max(a) from gap5 where gap5.pk between 1 and 3 and gap5.b=gap4.b);
+1
+update gap4 set a= (select 2+max(a) from gap5 where gap5.pk between 1 and 3 and gap5.b=gap4.b);
+update gap4 set a= (select 3+max(a) from gap5 where gap5.pk between 1 and 3 and gap5.b=gap4.b);
+1
+1
+0
+flush logs;
+0
+rollback;
+set gap_lock_write_log = @save_gap_lock_write_log;
+set gap_lock_raise_error = @save_gap_lock_raise_error;
+set global gap_lock_write_log = 1;
+set global gap_lock_write_log = 0;
+1000
+set session autocommit=0;
+select * from gap1 limit 1 for update;
+ERROR HY000: Using Gap Lock without full unique key in multi-table or multi-statement transactions is not allowed. You need either 1: Execute 'SET SESSION gap_lock_raise_error=0' if you are sure that your application does not rely on Gap Lock. 2: Rewrite queries to use all unique key columns in WHERE equal conditions. 3: Rewrite to single-table, single-statement transaction. Query: select * from gap1 limit 1 for update
+select * from gap1 where value != 100 limit 1 for update;
+ERROR HY000: Using Gap Lock without full unique key in multi-table or multi-statement transactions is not allowed. You need either 1: Execute 'SET SESSION gap_lock_raise_error=0' if you are sure that your application does not rely on Gap Lock. 2: Rewrite queries to use all unique key columns in WHERE equal conditions. 3: Rewrite to single-table, single-statement transaction. Query: select * from gap1 where value != 100 limit 1 for update
+select * from gap1 where id1=1 for update;
+ERROR HY000: Using Gap Lock without full unique key in multi-table or multi-statement transactions is not allowed. You need either 1: Execute 'SET SESSION gap_lock_raise_error=0' if you are sure that your application does not rely on Gap Lock. 2: Rewrite queries to use all unique key columns in WHERE equal conditions. 3: Rewrite to single-table, single-statement transaction. Query: select * from gap1 where id1=1 for update
+select * from gap1 where id1=1 and id2= 1 for update;
+ERROR HY000: Using Gap Lock without full unique key in multi-table or multi-statement transactions is not allowed. You need either 1: Execute 'SET SESSION gap_lock_raise_error=0' if you are sure that your application does not rely on Gap Lock. 2: Rewrite queries to use all unique key columns in WHERE equal conditions. 3: Rewrite to single-table, single-statement transaction. Query: select * from gap1 where id1=1 and id2= 1 for update
+select * from gap1 where id1=1 and id2= 1 and id3 != 1 for update;
+ERROR HY000: Using Gap Lock without full unique key in multi-table or multi-statement transactions is not allowed. You need either 1: Execute 'SET SESSION gap_lock_raise_error=0' if you are sure that your application does not rely on Gap Lock. 2: Rewrite queries to use all unique key columns in WHERE equal conditions. 3: Rewrite to single-table, single-statement transaction. Query: select * from gap1 where id1=1 and id2= 1 and id3 != 1 for update
+select * from gap1 where id1=1 and id2= 1 and id3
+between 1 and 3 for update;
+ERROR HY000: Using Gap Lock without full unique key in multi-table or multi-statement transactions is not allowed. You need either 1: Execute 'SET SESSION gap_lock_raise_error=0' if you are sure that your application does not rely on Gap Lock. 2: Rewrite queries to use all unique key columns in WHERE equal conditions. 3: Rewrite to single-table, single-statement transaction. Query: select * from gap1 where id1=1 and id2= 1 and id3
+between 1 and 3 for update
+select * from gap1 where id1=1 and id2= 1 order by id3 asc
+limit 1 for update;
+ERROR HY000: Using Gap Lock without full unique key in multi-table or multi-statement transactions is not allowed. You need either 1: Execute 'SET SESSION gap_lock_raise_error=0' if you are sure that your application does not rely on Gap Lock. 2: Rewrite queries to use all unique key columns in WHERE equal conditions. 3: Rewrite to single-table, single-statement transaction. Query: select * from gap1 where id1=1 and id2= 1 order by id3 asc
+limit 1 for update
+select * from gap1 where id1=1 and id2= 1 order by id3 desc
+limit 1 for update;
+ERROR HY000: Using Gap Lock without full unique key in multi-table or multi-statement transactions is not allowed. You need either 1: Execute 'SET SESSION gap_lock_raise_error=0' if you are sure that your application does not rely on Gap Lock. 2: Rewrite queries to use all unique key columns in WHERE equal conditions. 3: Rewrite to single-table, single-statement transaction. Query: select * from gap1 where id1=1 and id2= 1 order by id3 desc
+limit 1 for update
+select * from gap1 order by id1 asc limit 1 for update;
+ERROR HY000: Using Gap Lock without full unique key in multi-table or multi-statement transactions is not allowed. You need either 1: Execute 'SET SESSION gap_lock_raise_error=0' if you are sure that your application does not rely on Gap Lock. 2: Rewrite queries to use all unique key columns in WHERE equal conditions. 3: Rewrite to single-table, single-statement transaction. Query: select * from gap1 order by id1 asc limit 1 for update
+select * from gap1 order by id1 asc, id2 asc, id3 asc limit 1 for update;
+ERROR HY000: Using Gap Lock without full unique key in multi-table or multi-statement transactions is not allowed. You need either 1: Execute 'SET SESSION gap_lock_raise_error=0' if you are sure that your application does not rely on Gap Lock. 2: Rewrite queries to use all unique key columns in WHERE equal conditions. 3: Rewrite to single-table, single-statement transaction. Query: select * from gap1 order by id1 asc, id2 asc, id3 asc limit 1 for update
+select * from gap1 order by id1 desc limit 1 for update;
+ERROR HY000: Using Gap Lock without full unique key in multi-table or multi-statement transactions is not allowed. You need either 1: Execute 'SET SESSION gap_lock_raise_error=0' if you are sure that your application does not rely on Gap Lock. 2: Rewrite queries to use all unique key columns in WHERE equal conditions. 3: Rewrite to single-table, single-statement transaction. Query: select * from gap1 order by id1 desc limit 1 for update
+select * from gap1 order by id1 desc, id2 desc, id3 desc
+limit 1 for update;
+ERROR HY000: Using Gap Lock without full unique key in multi-table or multi-statement transactions is not allowed. You need either 1: Execute 'SET SESSION gap_lock_raise_error=0' if you are sure that your application does not rely on Gap Lock. 2: Rewrite queries to use all unique key columns in WHERE equal conditions. 3: Rewrite to single-table, single-statement transaction. Query: select * from gap1 order by id1 desc, id2 desc, id3 desc
+limit 1 for update
+select * from gap1 force index(i) where c1=1 for update;
+ERROR HY000: Using Gap Lock without full unique key in multi-table or multi-statement transactions is not allowed. You need either 1: Execute 'SET SESSION gap_lock_raise_error=0' if you are sure that your application does not rely on Gap Lock. 2: Rewrite queries to use all unique key columns in WHERE equal conditions. 3: Rewrite to single-table, single-statement transaction. Query: select * from gap1 force index(i) where c1=1 for update
+select * from gap3 force index(ui) where value=1 for update;
+id value
+1 1
+select * from gap1 where id1=1 and id2=1 and id3=1 for update;
+id1 id2 id3 c1 value
+select * from gap1 where id1=1 and id2=1 and id3 in (1, 2, 3) for update;
+id1 id2 id3 c1 value
+select * from gap1 where id1=1 and id2=1 and id3=1 and value=1
+order by c1 for update;
+id1 id2 id3 c1 value
+select * from gap3 where id=1 for update;
+id value
+1 1
+set session autocommit=1;
+select * from gap1 limit 1 for update;
+id1 id2 id3 c1 value
+0 0 1 1 1
+select * from gap1 where value != 100 limit 1 for update;
+id1 id2 id3 c1 value
+0 0 1 1 1
+select * from gap1 where id1=1 for update;
+id1 id2 id3 c1 value
+1 0 2 2 2
+1 0 3 3 3
+select * from gap1 where id1=1 and id2= 1 for update;
+id1 id2 id3 c1 value
+select * from gap1 where id1=1 and id2= 1 and id3 != 1 for update;
+id1 id2 id3 c1 value
+select * from gap1 where id1=1 and id2= 1 and id3
+between 1 and 3 for update;
+id1 id2 id3 c1 value
+select * from gap1 where id1=1 and id2= 1 order by id3 asc
+limit 1 for update;
+id1 id2 id3 c1 value
+select * from gap1 where id1=1 and id2= 1 order by id3 desc
+limit 1 for update;
+id1 id2 id3 c1 value
+select * from gap1 order by id1 asc limit 1 for update;
+id1 id2 id3 c1 value
+0 0 1 1 1
+select * from gap1 order by id1 asc, id2 asc, id3 asc limit 1 for update;
+id1 id2 id3 c1 value
+0 0 1 1 1
+select * from gap1 order by id1 desc limit 1 for update;
+id1 id2 id3 c1 value
+500 100 1000 1000 1000
+select * from gap1 order by id1 desc, id2 desc, id3 desc
+limit 1 for update;
+id1 id2 id3 c1 value
+500 100 1000 1000 1000
+select * from gap1 force index(i) where c1=1 for update;
+id1 id2 id3 c1 value
+0 0 1 1 1
+select * from gap3 force index(ui) where value=1 for update;
+id value
+1 1
+select * from gap1 where id1=1 and id2=1 and id3=1 for update;
+id1 id2 id3 c1 value
+select * from gap1 where id1=1 and id2=1 and id3 in (1, 2, 3) for update;
+id1 id2 id3 c1 value
+select * from gap1 where id1=1 and id2=1 and id3=1 and value=1
+order by c1 for update;
+id1 id2 id3 c1 value
+select * from gap3 where id=1 for update;
+id value
+1 1
+set session autocommit=0;
+select * from gap1 limit 1 lock in share mode;
+ERROR HY000: Using Gap Lock without full unique key in multi-table or multi-statement transactions is not allowed. You need either 1: Execute 'SET SESSION gap_lock_raise_error=0' if you are sure that your application does not rely on Gap Lock. 2: Rewrite queries to use all unique key columns in WHERE equal conditions. 3: Rewrite to single-table, single-statement transaction. Query: select * from gap1 limit 1 lock in share mode
+select * from gap1 where value != 100 limit 1 lock in share mode;
+ERROR HY000: Using Gap Lock without full unique key in multi-table or multi-statement transactions is not allowed. You need either 1: Execute 'SET SESSION gap_lock_raise_error=0' if you are sure that your application does not rely on Gap Lock. 2: Rewrite queries to use all unique key columns in WHERE equal conditions. 3: Rewrite to single-table, single-statement transaction. Query: select * from gap1 where value != 100 limit 1 lock in share mode
+select * from gap1 where id1=1 lock in share mode;
+ERROR HY000: Using Gap Lock without full unique key in multi-table or multi-statement transactions is not allowed. You need either 1: Execute 'SET SESSION gap_lock_raise_error=0' if you are sure that your application does not rely on Gap Lock. 2: Rewrite queries to use all unique key columns in WHERE equal conditions. 3: Rewrite to single-table, single-statement transaction. Query: select * from gap1 where id1=1 lock in share mode
+select * from gap1 where id1=1 and id2= 1 lock in share mode;
+ERROR HY000: Using Gap Lock without full unique key in multi-table or multi-statement transactions is not allowed. You need either 1: Execute 'SET SESSION gap_lock_raise_error=0' if you are sure that your application does not rely on Gap Lock. 2: Rewrite queries to use all unique key columns in WHERE equal conditions. 3: Rewrite to single-table, single-statement transaction. Query: select * from gap1 where id1=1 and id2= 1 lock in share mode
+select * from gap1 where id1=1 and id2= 1 and id3 != 1 lock in share mode;
+ERROR HY000: Using Gap Lock without full unique key in multi-table or multi-statement transactions is not allowed. You need either 1: Execute 'SET SESSION gap_lock_raise_error=0' if you are sure that your application does not rely on Gap Lock. 2: Rewrite queries to use all unique key columns in WHERE equal conditions. 3: Rewrite to single-table, single-statement transaction. Query: select * from gap1 where id1=1 and id2= 1 and id3 != 1 lock in share mode
+select * from gap1 where id1=1 and id2= 1 and id3
+between 1 and 3 lock in share mode;
+ERROR HY000: Using Gap Lock without full unique key in multi-table or multi-statement transactions is not allowed. You need either 1: Execute 'SET SESSION gap_lock_raise_error=0' if you are sure that your application does not rely on Gap Lock. 2: Rewrite queries to use all unique key columns in WHERE equal conditions. 3: Rewrite to single-table, single-statement transaction. Query: select * from gap1 where id1=1 and id2= 1 and id3
+between 1 and 3 lock in share mode
+select * from gap1 where id1=1 and id2= 1 order by id3 asc
+limit 1 lock in share mode;
+ERROR HY000: Using Gap Lock without full unique key in multi-table or multi-statement transactions is not allowed. You need either 1: Execute 'SET SESSION gap_lock_raise_error=0' if you are sure that your application does not rely on Gap Lock. 2: Rewrite queries to use all unique key columns in WHERE equal conditions. 3: Rewrite to single-table, single-statement transaction. Query: select * from gap1 where id1=1 and id2= 1 order by id3 asc
+limit 1 lock in share mode
+select * from gap1 where id1=1 and id2= 1 order by id3 desc
+limit 1 lock in share mode;
+ERROR HY000: Using Gap Lock without full unique key in multi-table or multi-statement transactions is not allowed. You need either 1: Execute 'SET SESSION gap_lock_raise_error=0' if you are sure that your application does not rely on Gap Lock. 2: Rewrite queries to use all unique key columns in WHERE equal conditions. 3: Rewrite to single-table, single-statement transaction. Query: select * from gap1 where id1=1 and id2= 1 order by id3 desc
+limit 1 lock in share mode
+select * from gap1 order by id1 asc limit 1 lock in share mode;
+ERROR HY000: Using Gap Lock without full unique key in multi-table or multi-statement transactions is not allowed. You need either 1: Execute 'SET SESSION gap_lock_raise_error=0' if you are sure that your application does not rely on Gap Lock. 2: Rewrite queries to use all unique key columns in WHERE equal conditions. 3: Rewrite to single-table, single-statement transaction. Query: select * from gap1 order by id1 asc limit 1 lock in share mode
+select * from gap1 order by id1 asc, id2 asc, id3 asc limit 1 lock in share mode;
+ERROR HY000: Using Gap Lock without full unique key in multi-table or multi-statement transactions is not allowed. You need either 1: Execute 'SET SESSION gap_lock_raise_error=0' if you are sure that your application does not rely on Gap Lock. 2: Rewrite queries to use all unique key columns in WHERE equal conditions. 3: Rewrite to single-table, single-statement transaction. Query: select * from gap1 order by id1 asc, id2 asc, id3 asc limit 1 lock in share mode
+select * from gap1 order by id1 desc limit 1 lock in share mode;
+ERROR HY000: Using Gap Lock without full unique key in multi-table or multi-statement transactions is not allowed. You need either 1: Execute 'SET SESSION gap_lock_raise_error=0' if you are sure that your application does not rely on Gap Lock. 2: Rewrite queries to use all unique key columns in WHERE equal conditions. 3: Rewrite to single-table, single-statement transaction. Query: select * from gap1 order by id1 desc limit 1 lock in share mode
+select * from gap1 order by id1 desc, id2 desc, id3 desc
+limit 1 lock in share mode;
+ERROR HY000: Using Gap Lock without full unique key in multi-table or multi-statement transactions is not allowed. You need either 1: Execute 'SET SESSION gap_lock_raise_error=0' if you are sure that your application does not rely on Gap Lock. 2: Rewrite queries to use all unique key columns in WHERE equal conditions. 3: Rewrite to single-table, single-statement transaction. Query: select * from gap1 order by id1 desc, id2 desc, id3 desc
+limit 1 lock in share mode
+select * from gap1 force index(i) where c1=1 lock in share mode;
+ERROR HY000: Using Gap Lock without full unique key in multi-table or multi-statement transactions is not allowed. You need either 1: Execute 'SET SESSION gap_lock_raise_error=0' if you are sure that your application does not rely on Gap Lock. 2: Rewrite queries to use all unique key columns in WHERE equal conditions. 3: Rewrite to single-table, single-statement transaction. Query: select * from gap1 force index(i) where c1=1 lock in share mode
+select * from gap3 force index(ui) where value=1 lock in share mode;
+id value
+1 1
+select * from gap1 where id1=1 and id2=1 and id3=1 lock in share mode;
+id1 id2 id3 c1 value
+select * from gap1 where id1=1 and id2=1 and id3 in (1, 2, 3) lock in share mode;
+id1 id2 id3 c1 value
+select * from gap1 where id1=1 and id2=1 and id3=1 and value=1
+order by c1 lock in share mode;
+id1 id2 id3 c1 value
+select * from gap3 where id=1 lock in share mode;
+id value
+1 1
+set session autocommit=1;
+select * from gap1 limit 1 lock in share mode;
+id1 id2 id3 c1 value
+0 0 1 1 1
+select * from gap1 where value != 100 limit 1 lock in share mode;
+id1 id2 id3 c1 value
+0 0 1 1 1
+select * from gap1 where id1=1 lock in share mode;
+id1 id2 id3 c1 value
+1 0 2 2 2
+1 0 3 3 3
+select * from gap1 where id1=1 and id2= 1 lock in share mode;
+id1 id2 id3 c1 value
+select * from gap1 where id1=1 and id2= 1 and id3 != 1 lock in share mode;
+id1 id2 id3 c1 value
+select * from gap1 where id1=1 and id2= 1 and id3
+between 1 and 3 lock in share mode;
+id1 id2 id3 c1 value
+select * from gap1 where id1=1 and id2= 1 order by id3 asc
+limit 1 lock in share mode;
+id1 id2 id3 c1 value
+select * from gap1 where id1=1 and id2= 1 order by id3 desc
+limit 1 lock in share mode;
+id1 id2 id3 c1 value
+select * from gap1 order by id1 asc limit 1 lock in share mode;
+id1 id2 id3 c1 value
+0 0 1 1 1
+select * from gap1 order by id1 asc, id2 asc, id3 asc limit 1 lock in share mode;
+id1 id2 id3 c1 value
+0 0 1 1 1
+select * from gap1 order by id1 desc limit 1 lock in share mode;
+id1 id2 id3 c1 value
+500 100 1000 1000 1000
+select * from gap1 order by id1 desc, id2 desc, id3 desc
+limit 1 lock in share mode;
+id1 id2 id3 c1 value
+500 100 1000 1000 1000
+select * from gap1 force index(i) where c1=1 lock in share mode;
+id1 id2 id3 c1 value
+0 0 1 1 1
+select * from gap3 force index(ui) where value=1 lock in share mode;
+id value
+1 1
+select * from gap1 where id1=1 and id2=1 and id3=1 lock in share mode;
+id1 id2 id3 c1 value
+select * from gap1 where id1=1 and id2=1 and id3 in (1, 2, 3) lock in share mode;
+id1 id2 id3 c1 value
+select * from gap1 where id1=1 and id2=1 and id3=1 and value=1
+order by c1 lock in share mode;
+id1 id2 id3 c1 value
+select * from gap3 where id=1 lock in share mode;
+id value
+1 1
+set session autocommit=0;
+select * from gap1 limit 1 ;
+id1 id2 id3 c1 value
+0 0 1 1 1
+select * from gap1 where value != 100 limit 1 ;
+id1 id2 id3 c1 value
+0 0 1 1 1
+select * from gap1 where id1=1 ;
+id1 id2 id3 c1 value
+1 0 2 2 2
+1 0 3 3 3
+select * from gap1 where id1=1 and id2= 1 ;
+id1 id2 id3 c1 value
+select * from gap1 where id1=1 and id2= 1 and id3 != 1 ;
+id1 id2 id3 c1 value
+select * from gap1 where id1=1 and id2= 1 and id3
+between 1 and 3 ;
+id1 id2 id3 c1 value
+select * from gap1 where id1=1 and id2= 1 order by id3 asc
+limit 1 ;
+id1 id2 id3 c1 value
+select * from gap1 where id1=1 and id2= 1 order by id3 desc
+limit 1 ;
+id1 id2 id3 c1 value
+select * from gap1 order by id1 asc limit 1 ;
+id1 id2 id3 c1 value
+0 0 1 1 1
+select * from gap1 order by id1 asc, id2 asc, id3 asc limit 1 ;
+id1 id2 id3 c1 value
+0 0 1 1 1
+select * from gap1 order by id1 desc limit 1 ;
+id1 id2 id3 c1 value
+500 100 1000 1000 1000
+select * from gap1 order by id1 desc, id2 desc, id3 desc
+limit 1 ;
+id1 id2 id3 c1 value
+500 100 1000 1000 1000
+select * from gap1 force index(i) where c1=1 ;
+id1 id2 id3 c1 value
+0 0 1 1 1
+select * from gap3 force index(ui) where value=1 ;
+id value
+1 1
+select * from gap1 where id1=1 and id2=1 and id3=1 ;
+id1 id2 id3 c1 value
+select * from gap1 where id1=1 and id2=1 and id3 in (1, 2, 3) ;
+id1 id2 id3 c1 value
+select * from gap1 where id1=1 and id2=1 and id3=1 and value=1
+order by c1 ;
+id1 id2 id3 c1 value
+select * from gap3 where id=1 ;
+id value
+1 1
+set session autocommit=1;
+select * from gap1 limit 1 ;
+id1 id2 id3 c1 value
+0 0 1 1 1
+select * from gap1 where value != 100 limit 1 ;
+id1 id2 id3 c1 value
+0 0 1 1 1
+select * from gap1 where id1=1 ;
+id1 id2 id3 c1 value
+1 0 2 2 2
+1 0 3 3 3
+select * from gap1 where id1=1 and id2= 1 ;
+id1 id2 id3 c1 value
+select * from gap1 where id1=1 and id2= 1 and id3 != 1 ;
+id1 id2 id3 c1 value
+select * from gap1 where id1=1 and id2= 1 and id3
+between 1 and 3 ;
+id1 id2 id3 c1 value
+select * from gap1 where id1=1 and id2= 1 order by id3 asc
+limit 1 ;
+id1 id2 id3 c1 value
+select * from gap1 where id1=1 and id2= 1 order by id3 desc
+limit 1 ;
+id1 id2 id3 c1 value
+select * from gap1 order by id1 asc limit 1 ;
+id1 id2 id3 c1 value
+0 0 1 1 1
+select * from gap1 order by id1 asc, id2 asc, id3 asc limit 1 ;
+id1 id2 id3 c1 value
+0 0 1 1 1
+select * from gap1 order by id1 desc limit 1 ;
+id1 id2 id3 c1 value
+500 100 1000 1000 1000
+select * from gap1 order by id1 desc, id2 desc, id3 desc
+limit 1 ;
+id1 id2 id3 c1 value
+500 100 1000 1000 1000
+select * from gap1 force index(i) where c1=1 ;
+id1 id2 id3 c1 value
+0 0 1 1 1
+select * from gap3 force index(ui) where value=1 ;
+id value
+1 1
+select * from gap1 where id1=1 and id2=1 and id3=1 ;
+id1 id2 id3 c1 value
+select * from gap1 where id1=1 and id2=1 and id3 in (1, 2, 3) ;
+id1 id2 id3 c1 value
+select * from gap1 where id1=1 and id2=1 and id3=1 and value=1
+order by c1 ;
+id1 id2 id3 c1 value
+select * from gap3 where id=1 ;
+id value
+1 1
+set session autocommit=0;
+insert into gap1 (id1, id2, id3) values (-1,-1,-1);
+insert into gap1 (id1, id2, id3) values (-1,-1,-1)
+on duplicate key update value=100;
+update gap1 set value=100 where id1=1;
+ERROR HY000: Using Gap Lock without full unique key in multi-table or multi-statement transactions is not allowed. You need either 1: Execute 'SET SESSION gap_lock_raise_error=0' if you are sure that your application does not rely on Gap Lock. 2: Rewrite queries to use all unique key columns in WHERE equal conditions. 3: Rewrite to single-table, single-statement transaction. Query: update gap1 set value=100 where id1=1
+update gap1 set value=100 where id1=1 and id2=1 and id3=1;
+delete from gap1 where id1=2;
+ERROR HY000: Using Gap Lock without full unique key in multi-table or multi-statement transactions is not allowed. You need either 1: Execute 'SET SESSION gap_lock_raise_error=0' if you are sure that your application does not rely on Gap Lock. 2: Rewrite queries to use all unique key columns in WHERE equal conditions. 3: Rewrite to single-table, single-statement transaction. Query: delete from gap1 where id1=2
+delete from gap1 where id1=-1 and id2=-1 and id3=-1;
+commit;
+set session autocommit=1;
+insert into gap1 (id1, id2, id3) values (-1,-1,-1);
+insert into gap1 (id1, id2, id3) values (-1,-1,-1)
+on duplicate key update value=100;
+update gap1 set value=100 where id1=1;
+update gap1 set value=100 where id1=1 and id2=1 and id3=1;
+delete from gap1 where id1=2;
+delete from gap1 where id1=-1 and id2=-1 and id3=-1;
+commit;
+set session autocommit=1;
+insert into gap2 select * from gap1;
+ERROR HY000: Using Gap Lock without full unique key in multi-table or multi-statement transactions is not allowed. You need either 1: Execute 'SET SESSION gap_lock_raise_error=0' if you are sure that your application does not rely on Gap Lock. 2: Rewrite queries to use all unique key columns in WHERE equal conditions. 3: Rewrite to single-table, single-statement transaction. Query: insert into gap2 select * from gap1
+insert into gap2 select * from gap1 where id1=1;
+ERROR HY000: Using Gap Lock without full unique key in multi-table or multi-statement transactions is not allowed. You need either 1: Execute 'SET SESSION gap_lock_raise_error=0' if you are sure that your application does not rely on Gap Lock. 2: Rewrite queries to use all unique key columns in WHERE equal conditions. 3: Rewrite to single-table, single-statement transaction. Query: insert into gap2 select * from gap1 where id1=1
+insert into gap2 select * from gap1 where id1=1 and id2=1 and id3=1;
+create table t4 select * from gap1 where id1=1 and id2=1 and id3=1;
+drop table t4;
+create table t4 select * from gap1;
+ERROR HY000: Using Gap Lock without full unique key in multi-table or multi-statement transactions is not allowed. You need either 1: Execute 'SET SESSION gap_lock_raise_error=0' if you are sure that your application does not rely on Gap Lock. 2: Rewrite queries to use all unique key columns in WHERE equal conditions. 3: Rewrite to single-table, single-statement transaction. Query: create table t4 select * from gap1
+create table t4 select * from gap1 where id1=1;
+ERROR HY000: Using Gap Lock without full unique key in multi-table or multi-statement transactions is not allowed. You need either 1: Execute 'SET SESSION gap_lock_raise_error=0' if you are sure that your application does not rely on Gap Lock. 2: Rewrite queries to use all unique key columns in WHERE equal conditions. 3: Rewrite to single-table, single-statement transaction. Query: create table t4 select * from gap1 where id1=1
+update gap1 join gap2 on gap1.id1 and gap1.id2=gap2.id2 set gap1.value=100 where gap2.id1=3
+and gap2.id2=3 and gap2.id3=3;
+update gap1 join gap2 on gap1.id1 and gap1.id2=gap2.id2 set gap1.value=100 where gap2.id1=3;
+ERROR HY000: Using Gap Lock without full unique key in multi-table or multi-statement transactions is not allowed. You need either 1: Execute 'SET SESSION gap_lock_raise_error=0' if you are sure that your application does not rely on Gap Lock. 2: Rewrite queries to use all unique key columns in WHERE equal conditions. 3: Rewrite to single-table, single-statement transaction. Query: update gap1 join gap2 on gap1.id1 and gap1.id2=gap2.id2 set gap1.value=100 where gap2.id1=3
+update gap1 join gap2 on gap1.id1 and gap1.id2=gap2.id2 join gap3 on gap1.id1=gap3.id
+set gap1.value=100 where gap2.id1=3;
+ERROR HY000: Using Gap Lock without full unique key in multi-table or multi-statement transactions is not allowed. You need either 1: Execute 'SET SESSION gap_lock_raise_error=0' if you are sure that your application does not rely on Gap Lock. 2: Rewrite queries to use all unique key columns in WHERE equal conditions. 3: Rewrite to single-table, single-statement transaction. Query: update gap1 join gap2 on gap1.id1 and gap1.id2=gap2.id2 join gap3 on gap1.id1=gap3.id
+set gap1.value=100 where gap2.id1=3
+update gap1 set gap1.value= (select count(*) from gap2);
+ERROR HY000: Using Gap Lock without full unique key in multi-table or multi-statement transactions is not allowed. You need either 1: Execute 'SET SESSION gap_lock_raise_error=0' if you are sure that your application does not rely on Gap Lock. 2: Rewrite queries to use all unique key columns in WHERE equal conditions. 3: Rewrite to single-table, single-statement transaction. Query: update gap1 set gap1.value= (select count(*) from gap2)
+delete gap1 from gap1 join gap2 on gap1.id1 and gap1.id2=gap2.id2 where gap2.id1=3
+and gap2.id2=3 and gap2.id3=3;
+delete gap1 from gap1 join gap2 on gap1.id1 and gap1.id2=gap2.id2 where gap2.id1=3;
+ERROR HY000: Using Gap Lock without full unique key in multi-table or multi-statement transactions is not allowed. You need either 1: Execute 'SET SESSION gap_lock_raise_error=0' if you are sure that your application does not rely on Gap Lock. 2: Rewrite queries to use all unique key columns in WHERE equal conditions. 3: Rewrite to single-table, single-statement transaction. Query: delete gap1 from gap1 join gap2 on gap1.id1 and gap1.id2=gap2.id2 where gap2.id1=3
+select * from gap1, gap2 limit 1 for update;
+ERROR HY000: Using Gap Lock without full unique key in multi-table or multi-statement transactions is not allowed. You need either 1: Execute 'SET SESSION gap_lock_raise_error=0' if you are sure that your application does not rely on Gap Lock. 2: Rewrite queries to use all unique key columns in WHERE equal conditions. 3: Rewrite to single-table, single-statement transaction. Query: select * from gap1, gap2 limit 1 for update
+select * from gap1 a, gap1 b limit 1 for update;
+ERROR HY000: Using Gap Lock without full unique key in multi-table or multi-statement transactions is not allowed. You need either 1: Execute 'SET SESSION gap_lock_raise_error=0' if you are sure that your application does not rely on Gap Lock. 2: Rewrite queries to use all unique key columns in WHERE equal conditions. 3: Rewrite to single-table, single-statement transaction. Query: select * from gap1 a, gap1 b limit 1 for update
+create table u1(
+c1 int,
+c2 int,
+c3 int,
+c4 int,
+primary key (c1, c2, c3),
+unique key (c3, c1)
+);
+set session gap_lock_raise_error=1;
+begin;
+insert into u1 values (1,1,1,1);
+commit;
+begin;
+insert into u1 values (1,2,1,1) on duplicate key update c4=10;
+commit;
+begin;
+select * from u1 where c3=1 and c1 = 1 for update;
+c1 c2 c3 c4
+1 1 1 10
+select * from u1 where c3=1 for update;
+ERROR HY000: Using Gap Lock without full unique key in multi-table or multi-statement transactions is not allowed. You need either 1: Execute 'SET SESSION gap_lock_raise_error=0' if you are sure that your application does not rely on Gap Lock. 2: Rewrite queries to use all unique key columns in WHERE equal conditions. 3: Rewrite to single-table, single-statement transaction. Query: select * from u1 where c3=1 for update
+commit;
+drop table u1;
+set global gap_lock_write_log= 0;
+set global gap_lock_raise_error= 0;
+drop table if exists gap1, gap2, gap3, gap4, gap5;
+DROP DATABASE mysqlslap;
+0
+SET GLOBAL gap_lock_log_file='<GAP_LOCK_ORIG>';
+SET GLOBAL gap_lock_log_file='<GAP_LOCK>';
+flush general logs;
+SET @save_gap_lock_exceptions = @@global.gap_lock_exceptions;
+SET GLOBAL gap_lock_exceptions="t.*";
+drop table if exists gap1,gap2,gap3;
+CREATE DATABASE mysqlslap;
+CREATE TABLE gap1 (id1 INT, id2 INT, id3 INT, c1 INT, value INT,
+PRIMARY KEY (id1, id2, id3),
+INDEX i (c1)) ENGINE=rocksdb;
+CREATE TABLE gap2 like gap1;
+CREATE TABLE gap3 (id INT, value INT,
+PRIMARY KEY (id),
+UNIQUE KEY ui(value)) ENGINE=rocksdb;
+insert into gap3 values (1,1), (2,2),(3,3),(4,4),(5,5);
+create table gap4 (
+pk int primary key,
+a int,
+b int,
+key(a)
+) ENGINE=rocksdb;
+insert into gap4 values (1,1,1), (2,2,2), (3,3,3), (4,4,4);
+create table gap5 like gap4;
+insert into gap5 values (1,1,1), (2,2,2), (3,3,3), (4,4,4);
+set session gap_lock_raise_error=1;
+set session gap_lock_write_log=1;
+set session autocommit=0;
+select * from gap1 limit 1 for update;
+ERROR HY000: Using Gap Lock without full unique key in multi-table or multi-statement transactions is not allowed. You need either 1: Execute 'SET SESSION gap_lock_raise_error=0' if you are sure that your application does not rely on Gap Lock. 2: Rewrite queries to use all unique key columns in WHERE equal conditions. 3: Rewrite to single-table, single-statement transaction. Query: select * from gap1 limit 1 for update
+select * from gap1 where value != 100 limit 1 for update;
+ERROR HY000: Using Gap Lock without full unique key in multi-table or multi-statement transactions is not allowed. You need either 1: Execute 'SET SESSION gap_lock_raise_error=0' if you are sure that your application does not rely on Gap Lock. 2: Rewrite queries to use all unique key columns in WHERE equal conditions. 3: Rewrite to single-table, single-statement transaction. Query: select * from gap1 where value != 100 limit 1 for update
+set global gap_lock_write_log= 0;
+set global gap_lock_raise_error= 0;
+drop table if exists gap1, gap2, gap3, gap4, gap5;
+DROP DATABASE mysqlslap;
+0
+SET GLOBAL gap_lock_log_file='<GAP_LOCK_ORIG>';
+SET GLOBAL gap_lock_log_file='<GAP_LOCK>';
+flush general logs;
+SET GLOBAL gap_lock_exceptions="gap.*";
+drop table if exists gap1,gap2,gap3;
+CREATE DATABASE mysqlslap;
+CREATE TABLE gap1 (id1 INT, id2 INT, id3 INT, c1 INT, value INT,
+PRIMARY KEY (id1, id2, id3),
+INDEX i (c1)) ENGINE=rocksdb;
+CREATE TABLE gap2 like gap1;
+CREATE TABLE gap3 (id INT, value INT,
+PRIMARY KEY (id),
+UNIQUE KEY ui(value)) ENGINE=rocksdb;
+insert into gap3 values (1,1), (2,2),(3,3),(4,4),(5,5);
+create table gap4 (
+pk int primary key,
+a int,
+b int,
+key(a)
+) ENGINE=rocksdb;
+insert into gap4 values (1,1,1), (2,2,2), (3,3,3), (4,4,4);
+create table gap5 like gap4;
+insert into gap5 values (1,1,1), (2,2,2), (3,3,3), (4,4,4);
+set session gap_lock_raise_error=1;
+set session gap_lock_write_log=1;
+set session autocommit=0;
+select * from gap1 limit 1 for update;
+id1 id2 id3 c1 value
+0 0 1 1 1
+select * from gap1 where value != 100 limit 1 for update;
+id1 id2 id3 c1 value
+0 0 1 1 1
+set global gap_lock_write_log= 0;
+set global gap_lock_raise_error= 0;
+drop table if exists gap1, gap2, gap3, gap4, gap5;
+DROP DATABASE mysqlslap;
+0
+SET GLOBAL gap_lock_log_file='<GAP_LOCK_ORIG>';
+SET GLOBAL gap_lock_log_file='<GAP_LOCK>';
+flush general logs;
+SET GLOBAL gap_lock_exceptions=@save_gap_lock_exceptions;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/get_error_message.result b/storage/rocksdb/mysql-test/rocksdb/r/get_error_message.result
new file mode 100644
index 00000000000..04dcac1fcb4
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/get_error_message.result
@@ -0,0 +1,8 @@
+DROP TABLE IF EXISTS t1;
+CREATE TABLE t1(a INT,b INT,KEY (a)) PARTITION BY HASH (a) PARTITIONS 3;
+SHOW TABLES;
+Tables_in_test
+t1
+ALTER TABLE t1 ADD PARTITION(PARTITION p3 DATA DIRECTORY='G:/mysqltest/p3Data' INDEX DIRECTORY='H:/mysqltest/p3Index');
+ERROR 42000: Incorrect table name 'H:/mysqltest/p3Index'
+DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/group_min_max.result b/storage/rocksdb/mysql-test/rocksdb/r/group_min_max.result
new file mode 100644
index 00000000000..e6a3dee961c
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/group_min_max.result
@@ -0,0 +1,3504 @@
+set @debug_tmp= @@debug_dbug;
+set global debug_dbug="+d,force_group_by";
+drop table if exists t1;
+create table t1 (
+a1 char(64), a2 char(64), b char(16), c char(16) not null, d char(16), dummy char(248) default ' '
+) engine=RocksDB;
+insert into t1 (a1, a2, b, c, d) values
+('a','a','a','a111','xy1'),('a','a','a','b111','xy2'),('a','a','a','c111','xy3'),('a','a','a','d111','xy4'),
+('a','a','b','e112','xy1'),('a','a','b','f112','xy2'),('a','a','b','g112','xy3'),('a','a','b','h112','xy4'),
+('a','b','a','i121','xy1'),('a','b','a','j121','xy2'),('a','b','a','k121','xy3'),('a','b','a','l121','xy4'),
+('a','b','b','m122','xy1'),('a','b','b','n122','xy2'),('a','b','b','o122','xy3'),('a','b','b','p122','xy4'),
+('b','a','a','a211','xy1'),('b','a','a','b211','xy2'),('b','a','a','c211','xy3'),('b','a','a','d211','xy4'),
+('b','a','b','e212','xy1'),('b','a','b','f212','xy2'),('b','a','b','g212','xy3'),('b','a','b','h212','xy4'),
+('b','b','a','i221','xy1'),('b','b','a','j221','xy2'),('b','b','a','k221','xy3'),('b','b','a','l221','xy4'),
+('b','b','b','m222','xy1'),('b','b','b','n222','xy2'),('b','b','b','o222','xy3'),('b','b','b','p222','xy4'),
+('c','a','a','a311','xy1'),('c','a','a','b311','xy2'),('c','a','a','c311','xy3'),('c','a','a','d311','xy4'),
+('c','a','b','e312','xy1'),('c','a','b','f312','xy2'),('c','a','b','g312','xy3'),('c','a','b','h312','xy4'),
+('c','b','a','i321','xy1'),('c','b','a','j321','xy2'),('c','b','a','k321','xy3'),('c','b','a','l321','xy4'),
+('c','b','b','m322','xy1'),('c','b','b','n322','xy2'),('c','b','b','o322','xy3'),('c','b','b','p322','xy4'),
+('d','a','a','a411','xy1'),('d','a','a','b411','xy2'),('d','a','a','c411','xy3'),('d','a','a','d411','xy4'),
+('d','a','b','e412','xy1'),('d','a','b','f412','xy2'),('d','a','b','g412','xy3'),('d','a','b','h412','xy4'),
+('d','b','a','i421','xy1'),('d','b','a','j421','xy2'),('d','b','a','k421','xy3'),('d','b','a','l421','xy4'),
+('d','b','b','m422','xy1'),('d','b','b','n422','xy2'),('d','b','b','o422','xy3'),('d','b','b','p422','xy4'),
+('a','a','a','a111','xy1'),('a','a','a','b111','xy2'),('a','a','a','c111','xy3'),('a','a','a','d111','xy4'),
+('a','a','b','e112','xy1'),('a','a','b','f112','xy2'),('a','a','b','g112','xy3'),('a','a','b','h112','xy4'),
+('a','b','a','i121','xy1'),('a','b','a','j121','xy2'),('a','b','a','k121','xy3'),('a','b','a','l121','xy4'),
+('a','b','b','m122','xy1'),('a','b','b','n122','xy2'),('a','b','b','o122','xy3'),('a','b','b','p122','xy4'),
+('b','a','a','a211','xy1'),('b','a','a','b211','xy2'),('b','a','a','c211','xy3'),('b','a','a','d211','xy4'),
+('b','a','b','e212','xy1'),('b','a','b','f212','xy2'),('b','a','b','g212','xy3'),('b','a','b','h212','xy4'),
+('b','b','a','i221','xy1'),('b','b','a','j221','xy2'),('b','b','a','k221','xy3'),('b','b','a','l221','xy4'),
+('b','b','b','m222','xy1'),('b','b','b','n222','xy2'),('b','b','b','o222','xy3'),('b','b','b','p222','xy4'),
+('c','a','a','a311','xy1'),('c','a','a','b311','xy2'),('c','a','a','c311','xy3'),('c','a','a','d311','xy4'),
+('c','a','b','e312','xy1'),('c','a','b','f312','xy2'),('c','a','b','g312','xy3'),('c','a','b','h312','xy4'),
+('c','b','a','i321','xy1'),('c','b','a','j321','xy2'),('c','b','a','k321','xy3'),('c','b','a','l321','xy4'),
+('c','b','b','m322','xy1'),('c','b','b','n322','xy2'),('c','b','b','o322','xy3'),('c','b','b','p322','xy4'),
+('d','a','a','a411','xy1'),('d','a','a','b411','xy2'),('d','a','a','c411','xy3'),('d','a','a','d411','xy4'),
+('d','a','b','e412','xy1'),('d','a','b','f412','xy2'),('d','a','b','g412','xy3'),('d','a','b','h412','xy4'),
+('d','b','a','i421','xy1'),('d','b','a','j421','xy2'),('d','b','a','k421','xy3'),('d','b','a','l421','xy4'),
+('d','b','b','m422','xy1'),('d','b','b','n422','xy2'),('d','b','b','o422','xy3'),('d','b','b','p422','xy4');
+create index idx_t1_0 on t1 (a1);
+create index idx_t1_1 on t1 (a1,a2,b,c);
+create index idx_t1_2 on t1 (a1,a2,b);
+analyze table t1;
+Table Op Msg_type Msg_text
+test.t1 analyze status OK
+drop table if exists t2;
+create table t2 (
+a1 char(64), a2 char(64) not null, b char(16), c char(16), d char(16), dummy char(248) default ' '
+) engine=RocksDB;
+insert into t2 select * from t1;
+insert into t2 (a1, a2, b, c, d) values
+('a','a',NULL,'a777','xyz'),('a','a',NULL,'a888','xyz'),('a','a',NULL,'a999','xyz'),
+('a','a','a',NULL,'xyz'),
+('a','a','b',NULL,'xyz'),
+('a','b','a',NULL,'xyz'),
+('c','a',NULL,'c777','xyz'),('c','a',NULL,'c888','xyz'),('c','a',NULL,'c999','xyz'),
+('d','b','b',NULL,'xyz'),
+('e','a','a',NULL,'xyz'),('e','a','a',NULL,'xyz'),('e','a','a',NULL,'xyz'),('e','a','a',NULL,'xyz'),
+('e','a','b',NULL,'xyz'),('e','a','b',NULL,'xyz'),('e','a','b',NULL,'xyz'),('e','a','b',NULL,'xyz'),
+('a','a',NULL,'a777','xyz'),('a','a',NULL,'a888','xyz'),('a','a',NULL,'a999','xyz'),
+('a','a','a',NULL,'xyz'),
+('a','a','b',NULL,'xyz'),
+('a','b','a',NULL,'xyz'),
+('c','a',NULL,'c777','xyz'),('c','a',NULL,'c888','xyz'),('c','a',NULL,'c999','xyz'),
+('d','b','b',NULL,'xyz'),
+('e','a','a',NULL,'xyz'),('e','a','a',NULL,'xyz'),('e','a','a',NULL,'xyz'),('e','a','a',NULL,'xyz'),
+('e','a','b',NULL,'xyz'),('e','a','b',NULL,'xyz'),('e','a','b',NULL,'xyz'),('e','a','b',NULL,'xyz');
+create index idx_t2_0 on t2 (a1);
+create index idx_t2_1 on t2 (a1,a2,b,c);
+create index idx_t2_2 on t2 (a1,a2,b);
+analyze table t2;
+Table Op Msg_type Msg_text
+test.t2 analyze status OK
+drop table if exists t3;
+create table t3 (
+a1 char(1), a2 char(1), b char(1), c char(4) not null, d char(3), dummy char(1) default ' '
+) engine=RocksDB;
+insert into t3 (a1, a2, b, c, d) values
+('a','a','a','a111','xy1'),('a','a','a','b111','xy2'),('a','a','a','c111','xy3'),('a','a','a','d111','xy4'),
+('a','a','b','e112','xy1'),('a','a','b','f112','xy2'),('a','a','b','g112','xy3'),('a','a','b','h112','xy4'),
+('a','b','a','i121','xy1'),('a','b','a','j121','xy2'),('a','b','a','k121','xy3'),('a','b','a','l121','xy4'),
+('a','b','b','m122','xy1'),('a','b','b','n122','xy2'),('a','b','b','o122','xy3'),('a','b','b','p122','xy4'),
+('b','a','a','a211','xy1'),('b','a','a','b211','xy2'),('b','a','a','c211','xy3'),('b','a','a','d211','xy4'),
+('b','a','b','e212','xy1'),('b','a','b','f212','xy2'),('b','a','b','g212','xy3'),('b','a','b','h212','xy4'),
+('b','b','a','i221','xy1'),('b','b','a','j221','xy2'),('b','b','a','k221','xy3'),('b','b','a','l221','xy4'),
+('b','b','b','m222','xy1'),('b','b','b','n222','xy2'),('b','b','b','o222','xy3'),('b','b','b','p222','xy4'),
+('c','a','a','a311','xy1'),('c','a','a','b311','xy2'),('c','a','a','c311','xy3'),('c','a','a','d311','xy4'),
+('c','a','b','e312','xy1'),('c','a','b','f312','xy2'),('c','a','b','g312','xy3'),('c','a','b','h312','xy4'),
+('c','b','a','i321','xy1'),('c','b','a','j321','xy2'),('c','b','a','k321','xy3'),('c','b','a','l321','xy4'),
+('c','b','b','m322','xy1'),('c','b','b','n322','xy2'),('c','b','b','o322','xy3'),('c','b','b','p322','xy4');
+insert into t3 (a1, a2, b, c, d) values
+('a','a','a','a111','xy1'),('a','a','a','b111','xy2'),('a','a','a','c111','xy3'),('a','a','a','d111','xy4'),
+('a','a','b','e112','xy1'),('a','a','b','f112','xy2'),('a','a','b','g112','xy3'),('a','a','b','h112','xy4'),
+('a','b','a','i121','xy1'),('a','b','a','j121','xy2'),('a','b','a','k121','xy3'),('a','b','a','l121','xy4'),
+('a','b','b','m122','xy1'),('a','b','b','n122','xy2'),('a','b','b','o122','xy3'),('a','b','b','p122','xy4'),
+('b','a','a','a211','xy1'),('b','a','a','b211','xy2'),('b','a','a','c211','xy3'),('b','a','a','d211','xy4'),
+('b','a','b','e212','xy1'),('b','a','b','f212','xy2'),('b','a','b','g212','xy3'),('b','a','b','h212','xy4'),
+('b','b','a','i221','xy1'),('b','b','a','j221','xy2'),('b','b','a','k221','xy3'),('b','b','a','l221','xy4'),
+('b','b','b','m222','xy1'),('b','b','b','n222','xy2'),('b','b','b','o222','xy3'),('b','b','b','p222','xy4'),
+('c','a','a','a311','xy1'),('c','a','a','b311','xy2'),('c','a','a','c311','xy3'),('c','a','a','d311','xy4'),
+('c','a','b','e312','xy1'),('c','a','b','f312','xy2'),('c','a','b','g312','xy3'),('c','a','b','h312','xy4'),
+('c','b','a','i321','xy1'),('c','b','a','j321','xy2'),('c','b','a','k321','xy3'),('c','b','a','l321','xy4'),
+('c','b','b','m322','xy1'),('c','b','b','n322','xy2'),('c','b','b','o322','xy3'),('c','b','b','p322','xy4');
+insert into t3 (a1, a2, b, c, d) values
+('a','a','a','a111','xy1'),('a','a','a','b111','xy2'),('a','a','a','c111','xy3'),('a','a','a','d111','xy4'),
+('a','a','b','e112','xy1'),('a','a','b','f112','xy2'),('a','a','b','g112','xy3'),('a','a','b','h112','xy4'),
+('a','b','a','i121','xy1'),('a','b','a','j121','xy2'),('a','b','a','k121','xy3'),('a','b','a','l121','xy4'),
+('a','b','b','m122','xy1'),('a','b','b','n122','xy2'),('a','b','b','o122','xy3'),('a','b','b','p122','xy4'),
+('b','a','a','a211','xy1'),('b','a','a','b211','xy2'),('b','a','a','c211','xy3'),('b','a','a','d211','xy4'),
+('b','a','b','e212','xy1'),('b','a','b','f212','xy2'),('b','a','b','g212','xy3'),('b','a','b','h212','xy4'),
+('b','b','a','i221','xy1'),('b','b','a','j221','xy2'),('b','b','a','k221','xy3'),('b','b','a','l221','xy4'),
+('b','b','b','m222','xy1'),('b','b','b','n222','xy2'),('b','b','b','o222','xy3'),('b','b','b','p222','xy4'),
+('c','a','a','a311','xy1'),('c','a','a','b311','xy2'),('c','a','a','c311','xy3'),('c','a','a','d311','xy4'),
+('c','a','b','e312','xy1'),('c','a','b','f312','xy2'),('c','a','b','g312','xy3'),('c','a','b','h312','xy4'),
+('c','b','a','i321','xy1'),('c','b','a','j321','xy2'),('c','b','a','k321','xy3'),('c','b','a','l321','xy4'),
+('c','b','b','m322','xy1'),('c','b','b','n322','xy2'),('c','b','b','o322','xy3'),('c','b','b','p322','xy4');
+insert into t3 (a1, a2, b, c, d) values
+('a','a','a','a111','xy1'),('a','a','a','b111','xy2'),('a','a','a','c111','xy3'),('a','a','a','d111','xy4'),
+('a','a','b','e112','xy1'),('a','a','b','f112','xy2'),('a','a','b','g112','xy3'),('a','a','b','h112','xy4'),
+('a','b','a','i121','xy1'),('a','b','a','j121','xy2'),('a','b','a','k121','xy3'),('a','b','a','l121','xy4'),
+('a','b','b','m122','xy1'),('a','b','b','n122','xy2'),('a','b','b','o122','xy3'),('a','b','b','p122','xy4'),
+('b','a','a','a211','xy1'),('b','a','a','b211','xy2'),('b','a','a','c211','xy3'),('b','a','a','d211','xy4'),
+('b','a','b','e212','xy1'),('b','a','b','f212','xy2'),('b','a','b','g212','xy3'),('b','a','b','h212','xy4'),
+('b','b','a','i221','xy1'),('b','b','a','j221','xy2'),('b','b','a','k221','xy3'),('b','b','a','l221','xy4'),
+('b','b','b','m222','xy1'),('b','b','b','n222','xy2'),('b','b','b','o222','xy3'),('b','b','b','p222','xy4'),
+('c','a','a','a311','xy1'),('c','a','a','b311','xy2'),('c','a','a','c311','xy3'),('c','a','a','d311','xy4'),
+('c','a','b','e312','xy1'),('c','a','b','f312','xy2'),('c','a','b','g312','xy3'),('c','a','b','h312','xy4'),
+('c','b','a','i321','xy1'),('c','b','a','j321','xy2'),('c','b','a','k321','xy3'),('c','b','a','l321','xy4'),
+('c','b','b','m322','xy1'),('c','b','b','n322','xy2'),('c','b','b','o322','xy3'),('c','b','b','p322','xy4');
+create index idx_t3_0 on t3 (a1);
+create index idx_t3_1 on t3 (a1,a2,b,c);
+create index idx_t3_2 on t3 (a1,a2,b);
+analyze table t3;
+Table Op Msg_type Msg_text
+test.t3 analyze status OK
+explain select a1, min(a2) from t1 group by a1;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range NULL idx_t1_1 130 NULL 63 Using index for group-by
+explain select a1, max(a2) from t1 group by a1;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range NULL idx_t1_1 65 NULL 63 Using index for group-by
+explain select a1, min(a2), max(a2) from t1 group by a1;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range NULL idx_t1_1 130 NULL 63 Using index for group-by
+explain select a1, a2, b, min(c), max(c) from t1 group by a1,a2,b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range NULL idx_t1_1 147 NULL 251 Using index for group-by
+explain select a1,a2,b,max(c),min(c) from t1 group by a1,a2,b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range NULL idx_t1_1 147 NULL 251 Using index for group-by
+explain select a1,a2,b,max(c),min(c) from t2 group by a1,a2,b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 range NULL idx_t2_1 # NULL # Using index for group-by
+explain select min(a2), a1, max(a2), min(a2), a1 from t1 group by a1;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range NULL idx_t1_1 130 NULL 63 Using index for group-by
+explain select a1, b, min(c), a1, max(c), b, a2, max(c), max(c) from t1 group by a1, a2, b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range NULL idx_t1_1 147 NULL 251 Using index for group-by
+explain select min(a2) from t1 group by a1;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range NULL idx_t1_1 130 NULL 63 Using index for group-by
+explain select a2, min(c), max(c) from t1 group by a1,a2,b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range NULL idx_t1_1 147 NULL 251 Using index for group-by
+select a1, min(a2) from t1 group by a1;
+a1 min(a2)
+a a
+b a
+c a
+d a
+select a1, max(a2) from t1 group by a1;
+a1 max(a2)
+a b
+b b
+c b
+d b
+select a1, min(a2), max(a2) from t1 group by a1;
+a1 min(a2) max(a2)
+a a b
+b a b
+c a b
+d a b
+select a1, a2, b, min(c), max(c) from t1 group by a1,a2,b;
+a1 a2 b min(c) max(c)
+a a a a111 d111
+a a b e112 h112
+a b a i121 l121
+a b b m122 p122
+b a a a211 d211
+b a b e212 h212
+b b a i221 l221
+b b b m222 p222
+c a a a311 d311
+c a b e312 h312
+c b a i321 l321
+c b b m322 p322
+d a a a411 d411
+d a b e412 h412
+d b a i421 l421
+d b b m422 p422
+select a1,a2,b,max(c),min(c) from t1 group by a1,a2,b;
+a1 a2 b max(c) min(c)
+a a a d111 a111
+a a b h112 e112
+a b a l121 i121
+a b b p122 m122
+b a a d211 a211
+b a b h212 e212
+b b a l221 i221
+b b b p222 m222
+c a a d311 a311
+c a b h312 e312
+c b a l321 i321
+c b b p322 m322
+d a a d411 a411
+d a b h412 e412
+d b a l421 i421
+d b b p422 m422
+select a1,a2,b,max(c),min(c) from t2 group by a1,a2,b;
+a1 a2 b max(c) min(c)
+a a NULL a999 a777
+a a a d111 a111
+a a b h112 e112
+a b a l121 i121
+a b b p122 m122
+b a a d211 a211
+b a b h212 e212
+b b a l221 i221
+b b b p222 m222
+c a NULL c999 c777
+c a a d311 a311
+c a b h312 e312
+c b a l321 i321
+c b b p322 m322
+d a a d411 a411
+d a b h412 e412
+d b a l421 i421
+d b b p422 m422
+e a a NULL NULL
+e a b NULL NULL
+select min(a2), a1, max(a2), min(a2), a1 from t1 group by a1;
+min(a2) a1 max(a2) min(a2) a1
+a a b a a
+a b b a b
+a c b a c
+a d b a d
+select a1, b, min(c), a1, max(c), b, a2, max(c), max(c) from t1 group by a1, a2, b;
+a1 b min(c) a1 max(c) b a2 max(c) max(c)
+a a a111 a d111 a a d111 d111
+a b e112 a h112 b a h112 h112
+a a i121 a l121 a b l121 l121
+a b m122 a p122 b b p122 p122
+b a a211 b d211 a a d211 d211
+b b e212 b h212 b a h212 h212
+b a i221 b l221 a b l221 l221
+b b m222 b p222 b b p222 p222
+c a a311 c d311 a a d311 d311
+c b e312 c h312 b a h312 h312
+c a i321 c l321 a b l321 l321
+c b m322 c p322 b b p322 p322
+d a a411 d d411 a a d411 d411
+d b e412 d h412 b a h412 h412
+d a i421 d l421 a b l421 l421
+d b m422 d p422 b b p422 p422
+select min(a2) from t1 group by a1;
+min(a2)
+a
+a
+a
+a
+select a2, min(c), max(c) from t1 group by a1,a2,b;
+a2 min(c) max(c)
+a a111 d111
+a e112 h112
+b i121 l121
+b m122 p122
+a a211 d211
+a e212 h212
+b i221 l221
+b m222 p222
+a a311 d311
+a e312 h312
+b i321 l321
+b m322 p322
+a a411 d411
+a e412 h412
+b i421 l421
+b m422 p422
+explain select a1,a2,b,min(c),max(c) from t1 where a1 < 'd' group by a1,a2,b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range idx_t1_0,idx_t1_1,idx_t1_2 idx_t1_1 147 NULL 251 Using where; Using index for group-by
+explain select a1,a2,b,min(c),max(c) from t1 where a1 >= 'b' group by a1,a2,b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range idx_t1_0,idx_t1_1,idx_t1_2 idx_t1_1 147 NULL 251 Using where; Using index for group-by
+explain select a1,a2,b, max(c) from t1 where a1 >= 'c' or a1 < 'b' group by a1,a2,b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range idx_t1_0,idx_t1_1,idx_t1_2 idx_t1_1 147 NULL 251 Using where; Using index for group-by
+explain select a1, max(c) from t1 where a1 >= 'c' or a1 < 'b' group by a1,a2,b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range idx_t1_0,idx_t1_1,idx_t1_2 idx_t1_1 147 NULL 251 Using where; Using index for group-by
+explain select a1,a2,b,min(c),max(c) from t1 where a1 >= 'c' or a2 < 'b' group by a1,a2,b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range idx_t1_0,idx_t1_1,idx_t1_2 idx_t1_1 147 NULL 251 Using where; Using index for group-by
+explain select a1,a2,b, max(c) from t1 where a1 = 'z' or a1 = 'b' or a1 = 'd' group by a1,a2,b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range idx_t1_0,idx_t1_1,idx_t1_2 idx_t1_1 147 NULL 251 Using where; Using index for group-by
+explain select a1,a2,b,min(c),max(c) from t1 where a1 = 'z' or a1 = 'b' or a1 = 'd' group by a1,a2,b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range idx_t1_0,idx_t1_1,idx_t1_2 idx_t1_1 147 NULL 251 Using where; Using index for group-by
+explain select a1,a2,b, max(c) from t1 where (a1 = 'b' or a1 = 'd' or a1 = 'a' or a1 = 'c') and (a2 > 'a') group by a1,a2,b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range idx_t1_0,idx_t1_1,idx_t1_2 idx_t1_1 147 NULL 251 Using where; Using index for group-by
+explain select a1,a2,b,min(c),max(c) from t1 where (a1 = 'b' or a1 = 'd' or a1 = 'a' or a1 = 'c') and (a2 > 'a') group by a1,a2,b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range idx_t1_0,idx_t1_1,idx_t1_2 idx_t1_1 147 NULL 251 Using where; Using index for group-by
+explain select a1,min(c),max(c) from t1 where a1 >= 'b' group by a1,a2,b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range idx_t1_0,idx_t1_1,idx_t1_2 idx_t1_1 147 NULL 251 Using where; Using index for group-by
+explain select a1, max(c) from t1 where a1 in ('a','b','d') group by a1,a2,b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range idx_t1_0,idx_t1_1,idx_t1_2 idx_t1_1 147 NULL 251 Using where; Using index for group-by
+explain select a1,a2,b, max(c) from t2 where a1 < 'd' group by a1,a2,b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 range idx_t2_0,idx_t2_1,idx_t2_2 idx_t2_1 146 NULL # Using where; Using index for group-by
+explain select a1,a2,b,min(c),max(c) from t2 where a1 < 'd' group by a1,a2,b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 range idx_t2_0,idx_t2_1,idx_t2_2 idx_t2_1 163 NULL # Using where; Using index for group-by
+explain select a1,a2,b,min(c),max(c) from t2 where a1 >= 'b' group by a1,a2,b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 range idx_t2_0,idx_t2_1,idx_t2_2 idx_t2_1 163 NULL # Using where; Using index for group-by
+explain select a1,a2,b, max(c) from t2 where a1 >= 'c' or a1 < 'b' group by a1,a2,b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 range idx_t2_0,idx_t2_1,idx_t2_2 idx_t2_1 146 NULL # Using where; Using index for group-by
+explain select a1, max(c) from t2 where a1 >= 'c' or a1 < 'b' group by a1,a2,b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 range idx_t2_0,idx_t2_1,idx_t2_2 idx_t2_1 146 NULL # Using where; Using index for group-by
+explain select a1,a2,b,min(c),max(c) from t2 where a1 >= 'c' or a2 < 'b' group by a1,a2,b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 range idx_t2_0,idx_t2_1,idx_t2_2 idx_t2_1 163 NULL # Using where; Using index for group-by
+explain select a1,a2,b, max(c) from t2 where a1 = 'z' or a1 = 'b' or a1 = 'd' group by a1,a2,b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 range idx_t2_0,idx_t2_1,idx_t2_2 idx_t2_1 146 NULL # Using where; Using index for group-by
+explain select a1,a2,b,min(c),max(c) from t2 where a1 = 'z' or a1 = 'b' or a1 = 'd' group by a1,a2,b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 range idx_t2_0,idx_t2_1,idx_t2_2 idx_t2_1 163 NULL # Using where; Using index for group-by
+explain select a1,a2,b, max(c) from t2 where (a1 = 'b' or a1 = 'd' or a1 = 'a' or a1 = 'c') and (a2 > 'a') group by a1,a2,b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 range idx_t2_0,idx_t2_1,idx_t2_2 idx_t2_1 146 NULL # Using where; Using index for group-by
+explain select a1,a2,b,min(c),max(c) from t2 where (a1 = 'b' or a1 = 'd' or a1 = 'a' or a1 = 'c') and (a2 > 'a') group by a1,a2,b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 range idx_t2_0,idx_t2_1,idx_t2_2 idx_t2_1 163 NULL # Using where; Using index for group-by
+explain select a1,min(c),max(c) from t2 where a1 >= 'b' group by a1,a2,b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 range idx_t2_0,idx_t2_1,idx_t2_2 idx_t2_1 163 NULL # Using where; Using index for group-by
+explain select a1, max(c) from t2 where a1 in ('a','b','d') group by a1,a2,b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 range idx_t2_0,idx_t2_1,idx_t2_2 idx_t2_1 146 NULL # Using where; Using index for group-by
+select a1,a2,b,min(c),max(c) from t1 where a1 < 'd' group by a1,a2,b;
+a1 a2 b min(c) max(c)
+a a a a111 d111
+a a b e112 h112
+a b a i121 l121
+a b b m122 p122
+b a a a211 d211
+b a b e212 h212
+b b a i221 l221
+b b b m222 p222
+c a a a311 d311
+c a b e312 h312
+c b a i321 l321
+c b b m322 p322
+select a1,a2,b,min(c),max(c) from t1 where a1 >= 'b' group by a1,a2,b;
+a1 a2 b min(c) max(c)
+b a a a211 d211
+b a b e212 h212
+b b a i221 l221
+b b b m222 p222
+c a a a311 d311
+c a b e312 h312
+c b a i321 l321
+c b b m322 p322
+d a a a411 d411
+d a b e412 h412
+d b a i421 l421
+d b b m422 p422
+select a1,a2,b, max(c) from t1 where a1 >= 'c' or a1 < 'b' group by a1,a2,b;
+a1 a2 b max(c)
+a a a d111
+a a b h112
+a b a l121
+a b b p122
+c a a d311
+c a b h312
+c b a l321
+c b b p322
+d a a d411
+d a b h412
+d b a l421
+d b b p422
+select a1, max(c) from t1 where a1 >= 'c' or a1 < 'b' group by a1,a2,b;
+a1 max(c)
+a d111
+a h112
+a l121
+a p122
+c d311
+c h312
+c l321
+c p322
+d d411
+d h412
+d l421
+d p422
+select a1,a2,b,min(c),max(c) from t1 where a1 >= 'c' or a2 < 'b' group by a1,a2,b;
+a1 a2 b min(c) max(c)
+a a a a111 d111
+a a b e112 h112
+b a a a211 d211
+b a b e212 h212
+c a a a311 d311
+c a b e312 h312
+c b a i321 l321
+c b b m322 p322
+d a a a411 d411
+d a b e412 h412
+d b a i421 l421
+d b b m422 p422
+select a1,a2,b, max(c) from t1 where a1 = 'z' or a1 = 'b' or a1 = 'd' group by a1,a2,b;
+a1 a2 b max(c)
+b a a d211
+b a b h212
+b b a l221
+b b b p222
+d a a d411
+d a b h412
+d b a l421
+d b b p422
+select a1,a2,b,min(c),max(c) from t1 where a1 = 'z' or a1 = 'b' or a1 = 'd' group by a1,a2,b;
+a1 a2 b min(c) max(c)
+b a a a211 d211
+b a b e212 h212
+b b a i221 l221
+b b b m222 p222
+d a a a411 d411
+d a b e412 h412
+d b a i421 l421
+d b b m422 p422
+select a1,a2,b, max(c) from t1 where (a1 = 'b' or a1 = 'd' or a1 = 'a' or a1 = 'c') and (a2 > 'a') group by a1,a2,b;
+a1 a2 b max(c)
+a b a l121
+a b b p122
+b b a l221
+b b b p222
+c b a l321
+c b b p322
+d b a l421
+d b b p422
+select a1,a2,b,min(c),max(c) from t1 where (a1 = 'b' or a1 = 'd' or a1 = 'a' or a1 = 'c') and (a2 > 'a') group by a1,a2,b;
+a1 a2 b min(c) max(c)
+a b a i121 l121
+a b b m122 p122
+b b a i221 l221
+b b b m222 p222
+c b a i321 l321
+c b b m322 p322
+d b a i421 l421
+d b b m422 p422
+select a1,min(c),max(c) from t1 where a1 >= 'b' group by a1,a2,b;
+a1 min(c) max(c)
+b a211 d211
+b e212 h212
+b i221 l221
+b m222 p222
+c a311 d311
+c e312 h312
+c i321 l321
+c m322 p322
+d a411 d411
+d e412 h412
+d i421 l421
+d m422 p422
+select a1, max(c) from t1 where a1 in ('a','b','d') group by a1,a2,b;
+a1 max(c)
+a d111
+a h112
+a l121
+a p122
+b d211
+b h212
+b l221
+b p222
+d d411
+d h412
+d l421
+d p422
+select a1,a2,b, max(c) from t2 where a1 < 'd' group by a1,a2,b;
+a1 a2 b max(c)
+a a NULL a999
+a a a d111
+a a b h112
+a b a l121
+a b b p122
+b a a d211
+b a b h212
+b b a l221
+b b b p222
+c a NULL c999
+c a a d311
+c a b h312
+c b a l321
+c b b p322
+select a1,a2,b,min(c),max(c) from t2 where a1 < 'd' group by a1,a2,b;
+a1 a2 b min(c) max(c)
+a a NULL a777 a999
+a a a a111 d111
+a a b e112 h112
+a b a i121 l121
+a b b m122 p122
+b a a a211 d211
+b a b e212 h212
+b b a i221 l221
+b b b m222 p222
+c a NULL c777 c999
+c a a a311 d311
+c a b e312 h312
+c b a i321 l321
+c b b m322 p322
+select a1,a2,b,min(c),max(c) from t2 where a1 >= 'b' group by a1,a2,b;
+a1 a2 b min(c) max(c)
+b a a a211 d211
+b a b e212 h212
+b b a i221 l221
+b b b m222 p222
+c a NULL c777 c999
+c a a a311 d311
+c a b e312 h312
+c b a i321 l321
+c b b m322 p322
+d a a a411 d411
+d a b e412 h412
+d b a i421 l421
+d b b m422 p422
+e a a NULL NULL
+e a b NULL NULL
+select a1,a2,b, max(c) from t2 where a1 >= 'c' or a1 < 'b' group by a1,a2,b;
+a1 a2 b max(c)
+a a NULL a999
+a a a d111
+a a b h112
+a b a l121
+a b b p122
+c a NULL c999
+c a a d311
+c a b h312
+c b a l321
+c b b p322
+d a a d411
+d a b h412
+d b a l421
+d b b p422
+e a a NULL
+e a b NULL
+select a1, max(c) from t2 where a1 >= 'c' or a1 < 'b' group by a1,a2,b;
+a1 max(c)
+a a999
+a d111
+a h112
+a l121
+a p122
+c c999
+c d311
+c h312
+c l321
+c p322
+d d411
+d h412
+d l421
+d p422
+e NULL
+e NULL
+select a1,a2,b,min(c),max(c) from t2 where a1 >= 'c' or a2 < 'b' group by a1,a2,b;
+a1 a2 b min(c) max(c)
+a a NULL a777 a999
+a a a a111 d111
+a a b e112 h112
+b a a a211 d211
+b a b e212 h212
+c a NULL c777 c999
+c a a a311 d311
+c a b e312 h312
+c b a i321 l321
+c b b m322 p322
+d a a a411 d411
+d a b e412 h412
+d b a i421 l421
+d b b m422 p422
+e a a NULL NULL
+e a b NULL NULL
+select a1,a2,b, max(c) from t2 where a1 = 'z' or a1 = 'b' or a1 = 'd' group by a1,a2,b;
+a1 a2 b max(c)
+b a a d211
+b a b h212
+b b a l221
+b b b p222
+d a a d411
+d a b h412
+d b a l421
+d b b p422
+select a1,a2,b,min(c),max(c) from t2 where a1 = 'z' or a1 = 'b' or a1 = 'd' group by a1,a2,b;
+a1 a2 b min(c) max(c)
+b a a a211 d211
+b a b e212 h212
+b b a i221 l221
+b b b m222 p222
+d a a a411 d411
+d a b e412 h412
+d b a i421 l421
+d b b m422 p422
+select a1,a2,b, max(c) from t2 where (a1 = 'b' or a1 = 'd' or a1 = 'a' or a1 = 'c') and (a2 > 'a') group by a1,a2,b;
+a1 a2 b max(c)
+a b a l121
+a b b p122
+b b a l221
+b b b p222
+c b a l321
+c b b p322
+d b a l421
+d b b p422
+select a1,a2,b,min(c),max(c) from t2 where (a1 = 'b' or a1 = 'd' or a1 = 'a' or a1 = 'c') and (a2 > 'a') group by a1,a2,b;
+a1 a2 b min(c) max(c)
+a b a i121 l121
+a b b m122 p122
+b b a i221 l221
+b b b m222 p222
+c b a i321 l321
+c b b m322 p322
+d b a i421 l421
+d b b m422 p422
+select a1,min(c),max(c) from t2 where a1 >= 'b' group by a1,a2,b;
+a1 min(c) max(c)
+b a211 d211
+b e212 h212
+b i221 l221
+b m222 p222
+c c777 c999
+c a311 d311
+c e312 h312
+c i321 l321
+c m322 p322
+d a411 d411
+d e412 h412
+d i421 l421
+d m422 p422
+e NULL NULL
+e NULL NULL
+select a1, max(c) from t2 where a1 in ('a','b','d') group by a1,a2,b;
+a1 max(c)
+a a999
+a d111
+a h112
+a l121
+a p122
+b d211
+b h212
+b l221
+b p222
+d d411
+d h412
+d l421
+d p422
+#
+# MariaDB: we dont have the following patch:
+#
+# commit 60a92a79a3b7fde3c6efe91799e344b977c8e5c3
+# Author: Manuel Ung <mung@fb.com>
+# Date: Thu Apr 19 23:06:27 2018 -0700
+#
+# Enhance group-by loose index scan
+#
+# So the following results are not very meaningful, but are still kept here
+explain select a1,a2,b,max(c),min(c) from t1 where (a2 = 'a') and (b = 'b') group by a1;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range NULL idx_t1_1 147 NULL 63 Using where; Using index for group-by
+explain select a1,a2,b,max(c),min(c) from t1 where (a2 = 'a' or a2 = 'b') and (b = 'b') group by a1;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 index NULL idx_t1_1 163 NULL 1000 Using where; Using index
+explain select a1,a2,b,max(c),min(c) from t1 where (a2 = 'a') and (b = 'b' or b = 'a') group by a1;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 index NULL idx_t1_1 163 NULL 1000 Using where; Using index
+explain select a1,a2,b,max(c),min(c) from t1 where (a2 = 'a' or a2 = 'b') and (b = 'b' or b = 'a') group by a1;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 index NULL idx_t1_1 163 NULL 1000 Using where; Using index
+explain select a1,max(c),min(c) from t1 where (a2 = 'a') and (b = 'b') group by a1;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range NULL idx_t1_1 147 NULL 63 Using where; Using index for group-by
+explain select a1,max(c),min(c) from t1 where (a2 = 'a' or a2 = 'b') and (b = 'b') group by a1;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 index NULL idx_t1_1 163 NULL 1000 Using where; Using index
+explain select a1,max(c),min(c) from t1 where (a2 = 'a') and (b = 'b' or b = 'a') group by a1;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 index NULL idx_t1_1 163 NULL 1000 Using where; Using index
+explain select a1,a2,b, max(c) from t1 where (b = 'b') group by a1,a2;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range NULL idx_t1_1 147 NULL 126 Using where; Using index for group-by
+explain select a1,a2,b, max(c) from t1 where (b = 'b' or b = 'a') group by a1,a2;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 index NULL idx_t1_1 163 NULL 1000 Using where; Using index
+explain select a1,a2,b,min(c),max(c) from t1 where (b = 'b') group by a1,a2;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range NULL idx_t1_1 147 NULL 126 Using where; Using index for group-by
+explain select a1,a2,b,min(c),max(c) from t1 where (b = 'b' or b = 'a') group by a1,a2;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 index NULL idx_t1_1 163 NULL 1000 Using where; Using index
+explain select a1,a2, max(c) from t1 where (b = 'b') group by a1,a2;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range NULL idx_t1_1 147 NULL 126 Using where; Using index for group-by
+explain select a1,a2, max(c) from t1 where (b = 'b' or b = 'a') group by a1,a2;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 index NULL idx_t1_1 163 NULL 1000 Using where; Using index
+explain select a1,a2,b,max(c),min(c) from t2 where (a2 = 'a') and (b = 'b') group by a1;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 range NULL idx_t2_1 163 NULL 63 Using where; Using index for group-by
+explain select a1,a2,b,max(c),min(c) from t2 where (a2 = 'a' or a2 = 'b') and (b = 'b') group by a1;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 index NULL idx_t2_1 163 NULL 1000 Using where; Using index
+explain select a1,a2,b,max(c),min(c) from t2 where (a2 = 'a') and (b = 'b' or b = 'a') group by a1;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 index NULL idx_t2_1 163 NULL 1000 Using where; Using index
+explain select a1,max(c),min(c) from t2 where (a2 = 'a') and (b = 'b') group by a1;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 range NULL idx_t2_1 163 NULL 63 Using where; Using index for group-by
+explain select a1,max(c),min(c) from t2 where (a2 = 'a' or a2 = 'b') and (b = 'b') group by a1;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 index NULL idx_t2_1 163 NULL 1000 Using where; Using index
+explain select a1,max(c),min(c) from t2 where (a2 = 'a') and (b = 'b' or b = 'a') group by a1;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 index NULL idx_t2_1 163 NULL 1000 Using where; Using index
+explain select a1,a2,b, max(c) from t2 where (b = 'b') group by a1,a2;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 range NULL idx_t2_1 146 NULL 126 Using where; Using index for group-by
+explain select a1,a2,b, max(c) from t2 where (b = 'b' or b = 'a') group by a1,a2;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 index NULL idx_t2_1 163 NULL 1000 Using where; Using index
+explain select a1,a2,b,min(c),max(c) from t2 where (b = 'b') group by a1,a2;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 range NULL idx_t2_1 163 NULL 126 Using where; Using index for group-by
+explain select a1,a2,b,min(c),max(c) from t2 where (b = 'b' or b = 'a') group by a1,a2;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 index NULL idx_t2_1 163 NULL 1000 Using where; Using index
+explain select a1,a2, max(c) from t2 where (b = 'b') group by a1,a2;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 range NULL idx_t2_1 146 NULL 126 Using where; Using index for group-by
+explain select a1,a2, max(c) from t2 where (b = 'b' or b = 'a') group by a1,a2;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 index NULL idx_t2_1 163 NULL 1000 Using where; Using index
+explain select a1,a2,b,max(c),min(c) from t3 where (a2 = 'a') and (b = 'b') group by a1;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t3 range NULL idx_t3_1 6 NULL 63 Using where; Using index for group-by
+explain select a1,a2,b,max(c),min(c) from t3 where (a2 = 'a' or a2 = 'b') and (b = 'b') group by a1;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t3 index NULL idx_t3_1 10 NULL 1000 Using where; Using index
+explain select a1,a2,b,max(c),min(c) from t3 where (a2 = 'a') and (b = 'b' or b = 'a') group by a1;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t3 index NULL idx_t3_1 10 NULL 1000 Using where; Using index
+explain select a1,max(c),min(c) from t3 where (a2 = 'a') and (b = 'b') group by a1;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t3 range NULL idx_t3_1 6 NULL 63 Using where; Using index for group-by
+explain select a1,max(c),min(c) from t3 where (a2 = 'a' or a2 = 'b') and (b = 'b') group by a1;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t3 index NULL idx_t3_1 10 NULL 1000 Using where; Using index
+explain select a1,max(c),min(c) from t3 where (a2 = 'a') and (b = 'b' or b = 'a') group by a1;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t3 index NULL idx_t3_1 10 NULL 1000 Using where; Using index
+select a1,a2,b,max(c),min(c) from t1 where (a2 = 'a') and (b = 'b') group by a1;
+a1 a2 b max(c) min(c)
+a a b h112 e112
+b a b h212 e212
+c a b h312 e312
+d a b h412 e412
+select a1,a2,b,max(c),min(c) from t1 where (a2 = 'a' or a2 = 'b') and (b = 'b') group by a1;
+a1 a2 b max(c) min(c)
+a a b p122 e112
+b a b p222 e212
+c a b p322 e312
+d a b p422 e412
+select a1,a2,b,max(c),min(c) from t1 where (a2 = 'a') and (b = 'b' or b = 'a') group by a1;
+a1 a2 b max(c) min(c)
+a a a h112 a111
+b a a h212 a211
+c a a h312 a311
+d a a h412 a411
+select a1,a2,b,max(c),min(c) from t1 where (a2 = 'a' or a2 = 'b') and (b = 'b' or b = 'a') group by a1;
+a1 a2 b max(c) min(c)
+a a a p122 a111
+b a a p222 a211
+c a a p322 a311
+d a a p422 a411
+select a1,max(c),min(c) from t1 where (a2 = 'a') and (b = 'b') group by a1;
+a1 max(c) min(c)
+a h112 e112
+b h212 e212
+c h312 e312
+d h412 e412
+select a1,max(c),min(c) from t1 where (a2 = 'a' or a2 = 'b') and (b = 'b') group by a1;
+a1 max(c) min(c)
+a p122 e112
+b p222 e212
+c p322 e312
+d p422 e412
+select a1,max(c),min(c) from t1 where (a2 = 'a') and (b = 'b' or b = 'a') group by a1;
+a1 max(c) min(c)
+a h112 a111
+b h212 a211
+c h312 a311
+d h412 a411
+select a1,a2,b, max(c) from t1 where (b = 'b') group by a1,a2;
+a1 a2 b max(c)
+a a b h112
+a b b p122
+b a b h212
+b b b p222
+c a b h312
+c b b p322
+d a b h412
+d b b p422
+select a1,a2,b, max(c) from t1 where (b = 'b' or b = 'a') group by a1,a2;
+a1 a2 b max(c)
+a a a h112
+a b a p122
+b a a h212
+b b a p222
+c a a h312
+c b a p322
+d a a h412
+d b a p422
+select a1,a2,b,min(c),max(c) from t1 where (b = 'b') group by a1,a2;
+a1 a2 b min(c) max(c)
+a a b e112 h112
+a b b m122 p122
+b a b e212 h212
+b b b m222 p222
+c a b e312 h312
+c b b m322 p322
+d a b e412 h412
+d b b m422 p422
+select a1,a2,b,min(c),max(c) from t1 where (b = 'b' or b = 'a') group by a1,a2;
+a1 a2 b min(c) max(c)
+a a a a111 h112
+a b a i121 p122
+b a a a211 h212
+b b a i221 p222
+c a a a311 h312
+c b a i321 p322
+d a a a411 h412
+d b a i421 p422
+select a1,a2, max(c) from t1 where (b = 'b') group by a1,a2;
+a1 a2 max(c)
+a a h112
+a b p122
+b a h212
+b b p222
+c a h312
+c b p322
+d a h412
+d b p422
+select a1,a2, max(c) from t1 where (b = 'b' or b = 'a') group by a1,a2;
+a1 a2 max(c)
+a a h112
+a b p122
+b a h212
+b b p222
+c a h312
+c b p322
+d a h412
+d b p422
+select a1,a2,b,max(c),min(c) from t2 where (a2 = 'a') and (b = 'b') group by a1;
+a1 a2 b max(c) min(c)
+a a b h112 e112
+b a b h212 e212
+c a b h312 e312
+d a b h412 e412
+e a b NULL NULL
+select a1,a2,b,max(c),min(c) from t2 where (a2 = 'a' or a2 = 'b') and (b = 'b') group by a1;
+a1 a2 b max(c) min(c)
+a a b p122 e112
+b a b p222 e212
+c a b p322 e312
+d a b p422 e412
+e a b NULL NULL
+select a1,a2,b,max(c),min(c) from t2 where (a2 = 'a') and (b = 'b' or b = 'a') group by a1;
+a1 a2 b max(c) min(c)
+a a a h112 a111
+b a a h212 a211
+c a a h312 a311
+d a a h412 a411
+e a a NULL NULL
+select a1,max(c),min(c) from t2 where (a2 = 'a') and (b = 'b') group by a1;
+a1 max(c) min(c)
+a h112 e112
+b h212 e212
+c h312 e312
+d h412 e412
+e NULL NULL
+select a1,max(c),min(c) from t2 where (a2 = 'a' or a2 = 'b') and (b = 'b') group by a1;
+a1 max(c) min(c)
+a p122 e112
+b p222 e212
+c p322 e312
+d p422 e412
+e NULL NULL
+select a1,max(c),min(c) from t2 where (a2 = 'a') and (b = 'b' or b = 'a') group by a1;
+a1 max(c) min(c)
+a h112 a111
+b h212 a211
+c h312 a311
+d h412 a411
+e NULL NULL
+select a1,a2,b, max(c) from t2 where (b = 'b') group by a1,a2;
+a1 a2 b max(c)
+a a b h112
+a b b p122
+b a b h212
+b b b p222
+c a b h312
+c b b p322
+d a b h412
+d b b p422
+e a b NULL
+select a1,a2,b, max(c) from t2 where (b = 'b' or b = 'a') group by a1,a2;
+a1 a2 b max(c)
+a a a h112
+a b a p122
+b a a h212
+b b a p222
+c a a h312
+c b a p322
+d a a h412
+d b a p422
+e a a NULL
+select a1,a2,b,min(c),max(c) from t2 where (b = 'b') group by a1,a2;
+a1 a2 b min(c) max(c)
+a a b e112 h112
+a b b m122 p122
+b a b e212 h212
+b b b m222 p222
+c a b e312 h312
+c b b m322 p322
+d a b e412 h412
+d b b m422 p422
+e a b NULL NULL
+select a1,a2,b,min(c),max(c) from t2 where (b = 'b' or b = 'a') group by a1,a2;
+a1 a2 b min(c) max(c)
+a a a a111 h112
+a b a i121 p122
+b a a a211 h212
+b b a i221 p222
+c a a a311 h312
+c b a i321 p322
+d a a a411 h412
+d b a i421 p422
+e a a NULL NULL
+select a1,a2, max(c) from t2 where (b = 'b') group by a1,a2;
+a1 a2 max(c)
+a a h112
+a b p122
+b a h212
+b b p222
+c a h312
+c b p322
+d a h412
+d b p422
+e a NULL
+select a1,a2, max(c) from t2 where (b = 'b' or b = 'a') group by a1,a2;
+a1 a2 max(c)
+a a h112
+a b p122
+b a h212
+b b p222
+c a h312
+c b p322
+d a h412
+d b p422
+e a NULL
+select a1,a2,b,max(c),min(c) from t3 where (a2 = 'a') and (b = 'b') group by a1;
+a1 a2 b max(c) min(c)
+a a b h112 e112
+b a b h212 e212
+c a b h312 e312
+select a1,a2,b,max(c),min(c) from t3 where (a2 = 'a' or a2 = 'b') and (b = 'b') group by a1;
+a1 a2 b max(c) min(c)
+a a b p122 e112
+b a b p222 e212
+c a b p322 e312
+select a1,a2,b,max(c),min(c) from t3 where (a2 = 'a') and (b = 'b' or b = 'a') group by a1;
+a1 a2 b max(c) min(c)
+a a a h112 a111
+b a a h212 a211
+c a a h312 a311
+select a1,max(c),min(c) from t3 where (a2 = 'a') and (b = 'b') group by a1;
+a1 max(c) min(c)
+a h112 e112
+b h212 e212
+c h312 e312
+select a1,max(c),min(c) from t3 where (a2 = 'a' or a2 = 'b') and (b = 'b') group by a1;
+a1 max(c) min(c)
+a p122 e112
+b p222 e212
+c p322 e312
+select a1,max(c),min(c) from t3 where (a2 = 'a') and (b = 'b' or b = 'a') group by a1;
+a1 max(c) min(c)
+a h112 a111
+b h212 a211
+c h312 a311
+explain select a1,a2,b,min(c) from t2 where (a2 = 'a') and b is NULL group by a1;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 range NULL idx_t2_1 163 NULL 63 Using where; Using index for group-by
+explain select a1,a2,b,min(c) from t2 where (a2 = 'a' or a2 = 'b') and b is NULL group by a1;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 index NULL idx_t2_1 163 NULL 1000 Using where; Using index
+explain select a1,a2,b,max(c) from t2 where (a2 = 'a') and b is NULL group by a1;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 range NULL idx_t2_1 146 NULL 63 Using where; Using index for group-by
+explain select a1,a2,b,max(c) from t2 where (a2 = 'a' or a2 = 'b') and b is NULL group by a1;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 index NULL idx_t2_1 163 NULL 1000 Using where; Using index
+explain select a1,a2,b,min(c) from t2 where b is NULL group by a1,a2;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 range NULL idx_t2_1 163 NULL 126 Using where; Using index for group-by
+explain select a1,a2,b,max(c) from t2 where b is NULL group by a1,a2;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 range NULL idx_t2_1 146 NULL 126 Using where; Using index for group-by
+explain select a1,a2,b,min(c),max(c) from t2 where b is NULL group by a1,a2;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 range NULL idx_t2_1 163 NULL 126 Using where; Using index for group-by
+select a1,a2,b,min(c) from t2 where (a2 = 'a') and b is NULL group by a1;
+a1 a2 b min(c)
+a a NULL a777
+c a NULL c777
+select a1,a2,b,min(c) from t2 where (a2 = 'a' or a2 = 'b') and b is NULL group by a1;
+a1 a2 b min(c)
+a a NULL a777
+c a NULL c777
+select a1,a2,b,max(c) from t2 where (a2 = 'a') and b is NULL group by a1;
+a1 a2 b max(c)
+a a NULL a999
+c a NULL c999
+select a1,a2,b,max(c) from t2 where (a2 = 'a' or a2 = 'b') and b is NULL group by a1;
+a1 a2 b max(c)
+a a NULL a999
+c a NULL c999
+select a1,a2,b,min(c) from t2 where b is NULL group by a1,a2;
+a1 a2 b min(c)
+a a NULL a777
+c a NULL c777
+select a1,a2,b,max(c) from t2 where b is NULL group by a1,a2;
+a1 a2 b max(c)
+a a NULL a999
+c a NULL c999
+select a1,a2,b,min(c),max(c) from t2 where b is NULL group by a1,a2;
+a1 a2 b min(c) max(c)
+a a NULL a777 a999
+c a NULL c777 c999
+select a1,a2,b,min(c),max(c) from t2 where b is NULL group by a1,a2;
+a1 a2 b min(c) max(c)
+a a NULL a777 a999
+c a NULL c777 c999
+explain select a1,a2,b, max(c) from t1 where (c > 'b1') group by a1,a2,b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range NULL idx_t1_1 147 NULL # Using where; Using index for group-by
+explain select a1,a2,b,min(c),max(c) from t1 where (c > 'b1') group by a1,a2,b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range NULL idx_t1_1 163 NULL 251 Using where; Using index for group-by
+explain select a1,a2,b, max(c) from t1 where (c > 'f123') group by a1,a2,b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range NULL idx_t1_1 147 NULL 251 Using where; Using index for group-by
+explain select a1,a2,b,min(c),max(c) from t1 where (c > 'f123') group by a1,a2,b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range NULL idx_t1_1 163 NULL 251 Using where; Using index for group-by
+explain select a1,a2,b, max(c) from t1 where (c < 'a0') group by a1,a2,b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range NULL idx_t1_1 163 NULL 251 Using where; Using index for group-by
+explain select a1,a2,b,min(c),max(c) from t1 where (c < 'a0') group by a1,a2,b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range NULL idx_t1_1 163 NULL 251 Using where; Using index for group-by
+explain select a1,a2,b, max(c) from t1 where (c < 'k321') group by a1,a2,b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range NULL idx_t1_1 163 NULL 251 Using where; Using index for group-by
+explain select a1,a2,b,min(c),max(c) from t1 where (c < 'k321') group by a1,a2,b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range NULL idx_t1_1 163 NULL 251 Using where; Using index for group-by
+explain select a1,a2,b, max(c) from t1 where (c < 'a0') or (c > 'b1') group by a1,a2,b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range NULL idx_t1_1 163 NULL 251 Using where; Using index for group-by
+explain select a1,a2,b,min(c),max(c) from t1 where (c < 'a0') or (c > 'b1') group by a1,a2,b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range NULL idx_t1_1 163 NULL 251 Using where; Using index for group-by
+explain select a1,a2,b, max(c) from t1 where (c > 'b1') or (c <= 'g1') group by a1,a2,b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range NULL idx_t1_1 147 NULL 251 Using where; Using index for group-by
+explain select a1,a2,b,min(c),max(c) from t1 where (c > 'b1') or (c <= 'g1') group by a1,a2,b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range NULL idx_t1_1 147 NULL 251 Using where; Using index for group-by
+explain select a1,a2,b,min(c),max(c) from t1 where (c > 'b111') and (c <= 'g112') group by a1,a2,b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range NULL idx_t1_1 163 NULL 251 Using where; Using index for group-by
+explain select a1,a2,b,min(c),max(c) from t1 where (c < 'c5') or (c = 'g412') or (c = 'k421') group by a1,a2,b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range NULL idx_t1_1 163 NULL 251 Using where; Using index for group-by
+explain select a1,a2,b,min(c),max(c) from t1 where ((c > 'b111') and (c <= 'g112')) or ((c > 'd000') and (c <= 'i110')) group by a1,a2,b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range NULL idx_t1_1 163 NULL 251 Using where; Using index for group-by
+explain select a1,a2,b,min(c),max(c) from t1 where (c between 'b111' and 'g112') or (c between 'd000' and 'i110') group by a1,a2,b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range NULL idx_t1_1 163 NULL 251 Using where; Using index for group-by
+explain select a1,a2,b, max(c) from t2 where (c > 'b1') group by a1,a2,b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 range NULL idx_t2_1 146 NULL # Using where; Using index for group-by
+explain select a1,a2,b,min(c),max(c) from t2 where (c > 'b1') group by a1,a2,b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 range NULL idx_t2_1 163 NULL # Using where; Using index for group-by
+explain select a1,a2,b, max(c) from t2 where (c > 'f123') group by a1,a2,b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 range NULL idx_t2_1 146 NULL # Using where; Using index for group-by
+explain select a1,a2,b,min(c),max(c) from t2 where (c > 'f123') group by a1,a2,b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 range NULL idx_t2_1 163 NULL # Using where; Using index for group-by
+explain select a1,a2,b, max(c) from t2 where (c < 'a0') group by a1,a2,b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 range NULL idx_t2_1 163 NULL # Using where; Using index for group-by
+explain select a1,a2,b,min(c),max(c) from t2 where (c < 'a0') group by a1,a2,b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 range NULL idx_t2_1 163 NULL # Using where; Using index for group-by
+explain select a1,a2,b, max(c) from t2 where (c < 'k321') group by a1,a2,b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 range NULL idx_t2_1 163 NULL # Using where; Using index for group-by
+explain select a1,a2,b,min(c),max(c) from t2 where (c < 'k321') group by a1,a2,b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 range NULL idx_t2_1 163 NULL # Using where; Using index for group-by
+explain select a1,a2,b, max(c) from t2 where (c < 'a0') or (c > 'b1') group by a1,a2,b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 range NULL idx_t2_1 163 NULL # Using where; Using index for group-by
+explain select a1,a2,b,min(c),max(c) from t2 where (c < 'a0') or (c > 'b1') group by a1,a2,b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 range NULL idx_t2_1 163 NULL # Using where; Using index for group-by
+explain select a1,a2,b, max(c) from t2 where (c > 'b1') or (c <= 'g1') group by a1,a2,b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 range NULL idx_t2_1 146 NULL # Using where; Using index for group-by
+explain select a1,a2,b,min(c),max(c) from t2 where (c > 'b1') or (c <= 'g1') group by a1,a2,b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 range NULL idx_t2_1 163 NULL # Using where; Using index for group-by
+explain select a1,a2,b,min(c),max(c) from t2 where (c > 'b111') and (c <= 'g112') group by a1,a2,b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 range NULL idx_t2_1 163 NULL # Using where; Using index for group-by
+explain select a1,a2,b,min(c),max(c) from t2 where (c < 'c5') or (c = 'g412') or (c = 'k421') group by a1,a2,b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 range NULL idx_t2_1 163 NULL # Using where; Using index for group-by
+explain select a1,a2,b,min(c),max(c) from t2 where ((c > 'b111') and (c <= 'g112')) or ((c > 'd000') and (c <= 'i110')) group by a1,a2,b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 range NULL idx_t2_1 163 NULL # Using where; Using index for group-by
+select a1,a2,b, max(c) from t1 where (c > 'b1') group by a1,a2,b;
+a1 a2 b max(c)
+a a a d111
+a a b h112
+a b a l121
+a b b p122
+b a a d211
+b a b h212
+b b a l221
+b b b p222
+c a a d311
+c a b h312
+c b a l321
+c b b p322
+d a a d411
+d a b h412
+d b a l421
+d b b p422
+select a1,a2,b,min(c),max(c) from t1 where (c > 'b1') group by a1,a2,b;
+a1 a2 b min(c) max(c)
+a a a b111 d111
+a a b e112 h112
+a b a i121 l121
+a b b m122 p122
+b a a b211 d211
+b a b e212 h212
+b b a i221 l221
+b b b m222 p222
+c a a b311 d311
+c a b e312 h312
+c b a i321 l321
+c b b m322 p322
+d a a b411 d411
+d a b e412 h412
+d b a i421 l421
+d b b m422 p422
+select a1,a2,b, max(c) from t1 where (c > 'f123') group by a1,a2,b;
+a1 a2 b max(c)
+a a b h112
+a b a l121
+a b b p122
+b a b h212
+b b a l221
+b b b p222
+c a b h312
+c b a l321
+c b b p322
+d a b h412
+d b a l421
+d b b p422
+select a1,a2,b,min(c),max(c) from t1 where (c > 'f123') group by a1,a2,b;
+a1 a2 b min(c) max(c)
+a a b g112 h112
+a b a i121 l121
+a b b m122 p122
+b a b f212 h212
+b b a i221 l221
+b b b m222 p222
+c a b f312 h312
+c b a i321 l321
+c b b m322 p322
+d a b f412 h412
+d b a i421 l421
+d b b m422 p422
+select a1,a2,b, max(c) from t1 where (c < 'a0') group by a1,a2,b;
+a1 a2 b max(c)
+select a1,a2,b,min(c),max(c) from t1 where (c < 'a0') group by a1,a2,b;
+a1 a2 b min(c) max(c)
+select a1,a2,b, max(c) from t1 where (c < 'k321') group by a1,a2,b;
+a1 a2 b max(c)
+a a a d111
+a a b h112
+a b a k121
+b a a d211
+b a b h212
+b b a k221
+c a a d311
+c a b h312
+c b a j321
+d a a d411
+d a b h412
+d b a j421
+select a1,a2,b,min(c),max(c) from t1 where (c < 'k321') group by a1,a2,b;
+a1 a2 b min(c) max(c)
+a a a a111 d111
+a a b e112 h112
+a b a i121 k121
+b a a a211 d211
+b a b e212 h212
+b b a i221 k221
+c a a a311 d311
+c a b e312 h312
+c b a i321 j321
+d a a a411 d411
+d a b e412 h412
+d b a i421 j421
+select a1,a2,b, max(c) from t1 where (c < 'a0') or (c > 'b1') group by a1,a2,b;
+a1 a2 b max(c)
+a a a d111
+a a b h112
+a b a l121
+a b b p122
+b a a d211
+b a b h212
+b b a l221
+b b b p222
+c a a d311
+c a b h312
+c b a l321
+c b b p322
+d a a d411
+d a b h412
+d b a l421
+d b b p422
+select a1,a2,b,min(c),max(c) from t1 where (c < 'a0') or (c > 'b1') group by a1,a2,b;
+a1 a2 b min(c) max(c)
+a a a b111 d111
+a a b e112 h112
+a b a i121 l121
+a b b m122 p122
+b a a b211 d211
+b a b e212 h212
+b b a i221 l221
+b b b m222 p222
+c a a b311 d311
+c a b e312 h312
+c b a i321 l321
+c b b m322 p322
+d a a b411 d411
+d a b e412 h412
+d b a i421 l421
+d b b m422 p422
+select a1,a2,b, max(c) from t1 where (c > 'b1') or (c <= 'g1') group by a1,a2,b;
+a1 a2 b max(c)
+a a a d111
+a a b h112
+a b a l121
+a b b p122
+b a a d211
+b a b h212
+b b a l221
+b b b p222
+c a a d311
+c a b h312
+c b a l321
+c b b p322
+d a a d411
+d a b h412
+d b a l421
+d b b p422
+select a1,a2,b,min(c),max(c) from t1 where (c > 'b1') or (c <= 'g1') group by a1,a2,b;
+a1 a2 b min(c) max(c)
+a a a a111 d111
+a a b e112 h112
+a b a i121 l121
+a b b m122 p122
+b a a a211 d211
+b a b e212 h212
+b b a i221 l221
+b b b m222 p222
+c a a a311 d311
+c a b e312 h312
+c b a i321 l321
+c b b m322 p322
+d a a a411 d411
+d a b e412 h412
+d b a i421 l421
+d b b m422 p422
+select a1,a2,b,min(c),max(c) from t1 where (c > 'b111') and (c <= 'g112') group by a1,a2,b;
+a1 a2 b min(c) max(c)
+a a a c111 d111
+a a b e112 g112
+b a a b211 d211
+b a b e212 f212
+c a a b311 d311
+c a b e312 f312
+d a a b411 d411
+d a b e412 f412
+select a1,a2,b,min(c),max(c) from t1 where (c < 'c5') or (c = 'g412') or (c = 'k421') group by a1,a2,b;
+a1 a2 b min(c) max(c)
+a a a a111 c111
+b a a a211 c211
+c a a a311 c311
+d a a a411 c411
+d a b g412 g412
+d b a k421 k421
+select a1,a2,b,min(c),max(c) from t1 where ((c > 'b111') and (c <= 'g112')) or ((c > 'd000') and (c <= 'i110')) group by a1,a2,b;
+a1 a2 b min(c) max(c)
+a a a c111 d111
+a a b e112 h112
+b a a b211 d211
+b a b e212 h212
+c a a b311 d311
+c a b e312 h312
+d a a b411 d411
+d a b e412 h412
+select a1,a2,b,min(c),max(c) from t1 where (c between 'b111' and 'g112') or (c between 'd000' and 'i110') group by a1,a2,b;
+a1 a2 b min(c) max(c)
+a a a b111 d111
+a a b e112 h112
+b a a b211 d211
+b a b e212 h212
+c a a b311 d311
+c a b e312 h312
+d a a b411 d411
+d a b e412 h412
+select a1,a2,b, max(c) from t2 where (c > 'b1') group by a1,a2,b;
+a1 a2 b max(c)
+a a a d111
+a a b h112
+a b a l121
+a b b p122
+b a a d211
+b a b h212
+b b a l221
+b b b p222
+c a NULL c999
+c a a d311
+c a b h312
+c b a l321
+c b b p322
+d a a d411
+d a b h412
+d b a l421
+d b b p422
+select a1,a2,b,min(c),max(c) from t2 where (c > 'b1') group by a1,a2,b;
+a1 a2 b min(c) max(c)
+a a a b111 d111
+a a b e112 h112
+a b a i121 l121
+a b b m122 p122
+b a a b211 d211
+b a b e212 h212
+b b a i221 l221
+b b b m222 p222
+c a NULL c777 c999
+c a a b311 d311
+c a b e312 h312
+c b a i321 l321
+c b b m322 p322
+d a a b411 d411
+d a b e412 h412
+d b a i421 l421
+d b b m422 p422
+select a1,a2,b, max(c) from t2 where (c > 'f123') group by a1,a2,b;
+a1 a2 b max(c)
+a a b h112
+a b a l121
+a b b p122
+b a b h212
+b b a l221
+b b b p222
+c a b h312
+c b a l321
+c b b p322
+d a b h412
+d b a l421
+d b b p422
+select a1,a2,b,min(c),max(c) from t2 where (c > 'f123') group by a1,a2,b;
+a1 a2 b min(c) max(c)
+a a b g112 h112
+a b a i121 l121
+a b b m122 p122
+b a b f212 h212
+b b a i221 l221
+b b b m222 p222
+c a b f312 h312
+c b a i321 l321
+c b b m322 p322
+d a b f412 h412
+d b a i421 l421
+d b b m422 p422
+select a1,a2,b, max(c) from t2 where (c < 'a0') group by a1,a2,b;
+a1 a2 b max(c)
+select a1,a2,b,min(c),max(c) from t2 where (c < 'a0') group by a1,a2,b;
+a1 a2 b min(c) max(c)
+select a1,a2,b, max(c) from t2 where (c < 'k321') group by a1,a2,b;
+a1 a2 b max(c)
+a a NULL a999
+a a a d111
+a a b h112
+a b a k121
+b a a d211
+b a b h212
+b b a k221
+c a NULL c999
+c a a d311
+c a b h312
+c b a j321
+d a a d411
+d a b h412
+d b a j421
+select a1,a2,b,min(c),max(c) from t2 where (c < 'k321') group by a1,a2,b;
+a1 a2 b min(c) max(c)
+a a NULL a777 a999
+a a a a111 d111
+a a b e112 h112
+a b a i121 k121
+b a a a211 d211
+b a b e212 h212
+b b a i221 k221
+c a NULL c777 c999
+c a a a311 d311
+c a b e312 h312
+c b a i321 j321
+d a a a411 d411
+d a b e412 h412
+d b a i421 j421
+select a1,a2,b, max(c) from t2 where (c < 'a0') or (c > 'b1') group by a1,a2,b;
+a1 a2 b max(c)
+a a a d111
+a a b h112
+a b a l121
+a b b p122
+b a a d211
+b a b h212
+b b a l221
+b b b p222
+c a NULL c999
+c a a d311
+c a b h312
+c b a l321
+c b b p322
+d a a d411
+d a b h412
+d b a l421
+d b b p422
+select a1,a2,b,min(c),max(c) from t2 where (c < 'a0') or (c > 'b1') group by a1,a2,b;
+a1 a2 b min(c) max(c)
+a a a b111 d111
+a a b e112 h112
+a b a i121 l121
+a b b m122 p122
+b a a b211 d211
+b a b e212 h212
+b b a i221 l221
+b b b m222 p222
+c a NULL c777 c999
+c a a b311 d311
+c a b e312 h312
+c b a i321 l321
+c b b m322 p322
+d a a b411 d411
+d a b e412 h412
+d b a i421 l421
+d b b m422 p422
+select a1,a2,b, max(c) from t2 where (c > 'b1') or (c <= 'g1') group by a1,a2,b;
+a1 a2 b max(c)
+a a NULL a999
+a a a d111
+a a b h112
+a b a l121
+a b b p122
+b a a d211
+b a b h212
+b b a l221
+b b b p222
+c a NULL c999
+c a a d311
+c a b h312
+c b a l321
+c b b p322
+d a a d411
+d a b h412
+d b a l421
+d b b p422
+select a1,a2,b,min(c),max(c) from t2 where (c > 'b1') or (c <= 'g1') group by a1,a2,b;
+a1 a2 b min(c) max(c)
+a a NULL a777 a999
+a a a a111 d111
+a a b e112 h112
+a b a i121 l121
+a b b m122 p122
+b a a a211 d211
+b a b e212 h212
+b b a i221 l221
+b b b m222 p222
+c a NULL c777 c999
+c a a a311 d311
+c a b e312 h312
+c b a i321 l321
+c b b m322 p322
+d a a a411 d411
+d a b e412 h412
+d b a i421 l421
+d b b m422 p422
+select a1,a2,b,min(c),max(c) from t2 where (c > 'b111') and (c <= 'g112') group by a1,a2,b;
+a1 a2 b min(c) max(c)
+a a a c111 d111
+a a b e112 g112
+b a a b211 d211
+b a b e212 f212
+c a NULL c777 c999
+c a a b311 d311
+c a b e312 f312
+d a a b411 d411
+d a b e412 f412
+select a1,a2,b,min(c),max(c) from t2 where (c < 'c5') or (c = 'g412') or (c = 'k421') group by a1,a2,b;
+a1 a2 b min(c) max(c)
+a a NULL a777 a999
+a a a a111 c111
+b a a a211 c211
+c a a a311 c311
+d a a a411 c411
+d a b g412 g412
+d b a k421 k421
+select a1,a2,b,min(c),max(c) from t2 where ((c > 'b111') and (c <= 'g112')) or ((c > 'd000') and (c <= 'i110')) group by a1,a2,b;
+a1 a2 b min(c) max(c)
+a a a c111 d111
+a a b e112 h112
+b a a b211 d211
+b a b e212 h212
+c a NULL c777 c999
+c a a b311 d311
+c a b e312 h312
+d a a b411 d411
+d a b e412 h412
+explain select a1,a2,b,min(c),max(c) from t1
+where exists ( select * from t2 where t2.c = t1.c )
+group by a1,a2,b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 PRIMARY t1 index NULL idx_t1_1 163 NULL 1000 Using index
+1 PRIMARY <subquery2> eq_ref distinct_key distinct_key 16 func 1
+2 MATERIALIZED t2 index NULL idx_t2_1 163 NULL 1000 Using index
+explain select a1,a2,b,min(c),max(c) from t1
+where exists ( select * from t2 where t2.c > 'b1' )
+group by a1,a2,b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 PRIMARY t1 index NULL idx_t1_1 163 NULL 1000 Using index
+2 SUBQUERY t2 index NULL idx_t2_1 163 NULL 1000 Using where; Using index
+explain select a1,a2,b,min(c),max(c) from t1 where (a1 >= 'c' or a2 < 'b') and (b > 'a') group by a1,a2,b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range idx_t1_0,idx_t1_1,idx_t1_2 idx_t1_1 147 NULL 251 Using where; Using index for group-by
+explain select a1,a2,b,min(c),max(c) from t1 where (a1 >= 'c' or a2 < 'b') and (c > 'b111') group by a1,a2,b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range idx_t1_0,idx_t1_1,idx_t1_2 idx_t1_1 163 NULL 251 Using where; Using index for group-by
+explain select a1,a2,b,min(c),max(c) from t1 where (a2 >= 'b') and (b = 'a') and (c > 'b111') group by a1,a2,b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range NULL idx_t1_1 163 NULL 251 Using where; Using index for group-by
+explain select a1,a2,b,min(c) from t1 where ((a1 > 'a') or (a1 < '9')) and ((a2 >= 'b') and (a2 < 'z')) and (b = 'a') and ((c < 'h112') or (c = 'j121') or (c > 'k121' and c < 'm122') or (c > 'o122')) group by a1,a2,b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range idx_t1_0,idx_t1_1,idx_t1_2 idx_t1_1 163 NULL 251 Using where; Using index for group-by
+explain select a1,a2,b,min(c) from t1 where ((a1 > 'a') or (a1 < '9')) and ((a2 >= 'b') and (a2 < 'z')) and (b = 'a') and ((c = 'j121') or (c > 'k121' and c < 'm122') or (c > 'o122') or (c < 'h112') or (c = 'c111')) group by a1,a2,b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range idx_t1_0,idx_t1_1,idx_t1_2 idx_t1_1 163 NULL 251 Using where; Using index for group-by
+explain select a1,a2,b,min(c) from t1 where (a1 > 'a') and (a2 > 'a') and (b = 'c') group by a1,a2,b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range idx_t1_0,idx_t1_1,idx_t1_2 idx_t1_1 147 NULL 251 Using where; Using index for group-by
+explain select a1,a2,b,min(c) from t1 where (ord(a1) > 97) and (ord(a2) + ord(a1) > 194) and (b = 'c') group by a1,a2,b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range NULL idx_t1_1 147 NULL 251 Using where; Using index for group-by
+explain select a1,a2,b,min(c),max(c) from t2 where (a1 >= 'c' or a2 < 'b') and (b > 'a') group by a1,a2,b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 range idx_t2_0,idx_t2_1,idx_t2_2 idx_t2_1 163 NULL # Using where; Using index for group-by
+explain select a1,a2,b,min(c),max(c) from t2 where (a1 >= 'c' or a2 < 'b') and (c > 'b111') group by a1,a2,b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 range idx_t2_0,idx_t2_1,idx_t2_2 idx_t2_1 163 NULL # Using where; Using index for group-by
+explain select a1,a2,b,min(c),max(c) from t2 where (a2 >= 'b') and (b = 'a') and (c > 'b111') group by a1,a2,b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 range NULL idx_t2_1 163 NULL # Using where; Using index for group-by
+explain select a1,a2,b,min(c) from t2 where ((a1 > 'a') or (a1 < '9')) and ((a2 >= 'b') and (a2 < 'z')) and (b = 'a') and ((c < 'h112') or (c = 'j121') or (c > 'k121' and c < 'm122') or (c > 'o122')) group by a1,a2,b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 range idx_t2_0,idx_t2_1,idx_t2_2 idx_t2_1 163 NULL # Using where; Using index for group-by
+explain select a1,a2,b,min(c) from t2 where ((a1 > 'a') or (a1 < '9')) and ((a2 >= 'b') and (a2 < 'z')) and (b = 'a') and ((c = 'j121') or (c > 'k121' and c < 'm122') or (c > 'o122') or (c < 'h112') or (c = 'c111')) group by a1,a2,b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 range idx_t2_0,idx_t2_1,idx_t2_2 idx_t2_1 163 NULL # Using where; Using index for group-by
+explain select a1,a2,b,min(c) from t2 where (a1 > 'a') and (a2 > 'a') and (b = 'c') group by a1,a2,b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 range idx_t2_0,idx_t2_1,idx_t2_2 idx_t2_1 163 NULL # Using where; Using index for group-by
+select a1,a2,b,min(c),max(c) from t1 where (a1 >= 'c' or a2 < 'b') and (b > 'a') group by a1,a2,b;
+a1 a2 b min(c) max(c)
+a a b e112 h112
+b a b e212 h212
+c a b e312 h312
+c b b m322 p322
+d a b e412 h412
+d b b m422 p422
+select a1,a2,b,min(c),max(c) from t1 where (a1 >= 'c' or a2 < 'b') and (c > 'b111') group by a1,a2,b;
+a1 a2 b min(c) max(c)
+a a a c111 d111
+a a b e112 h112
+b a a b211 d211
+b a b e212 h212
+c a a b311 d311
+c a b e312 h312
+c b a i321 l321
+c b b m322 p322
+d a a b411 d411
+d a b e412 h412
+d b a i421 l421
+d b b m422 p422
+select a1,a2,b,min(c),max(c) from t1 where (a2 >= 'b') and (b = 'a') and (c > 'b111') group by a1,a2,b;
+a1 a2 b min(c) max(c)
+a b a i121 l121
+b b a i221 l221
+c b a i321 l321
+d b a i421 l421
+select a1,a2,b,min(c) from t1 where ((a1 > 'a') or (a1 < '9')) and ((a2 >= 'b') and (a2 < 'z')) and (b = 'a') and ((c < 'h112') or (c = 'j121') or (c > 'k121' and c < 'm122') or (c > 'o122')) group by a1,a2,b;
+a1 a2 b min(c)
+b b a k221
+c b a k321
+d b a k421
+select a1,a2,b,min(c) from t1 where ((a1 > 'a') or (a1 < '9')) and ((a2 >= 'b') and (a2 < 'z')) and (b = 'a') and ((c = 'j121') or (c > 'k121' and c < 'm122') or (c > 'o122') or (c < 'h112') or (c = 'c111')) group by a1,a2,b;
+a1 a2 b min(c)
+b b a k221
+c b a k321
+d b a k421
+select a1,a2,b,min(c) from t1 where (a1 > 'a') and (a2 > 'a') and (b = 'c') group by a1,a2,b;
+a1 a2 b min(c)
+select a1,a2,b,min(c) from t1 where (ord(a1) > 97) and (ord(a2) + ord(a1) > 194) and (b = 'c') group by a1,a2,b;
+a1 a2 b min(c)
+select a1,a2,b,min(c),max(c) from t2 where (a1 >= 'c' or a2 < 'b') and (b > 'a') group by a1,a2,b;
+a1 a2 b min(c) max(c)
+a a b e112 h112
+b a b e212 h212
+c a b e312 h312
+c b b m322 p322
+d a b e412 h412
+d b b m422 p422
+e a b NULL NULL
+select a1,a2,b,min(c),max(c) from t2 where (a1 >= 'c' or a2 < 'b') and (c > 'b111') group by a1,a2,b;
+a1 a2 b min(c) max(c)
+a a a c111 d111
+a a b e112 h112
+b a a b211 d211
+b a b e212 h212
+c a NULL c777 c999
+c a a b311 d311
+c a b e312 h312
+c b a i321 l321
+c b b m322 p322
+d a a b411 d411
+d a b e412 h412
+d b a i421 l421
+d b b m422 p422
+select a1,a2,b,min(c),max(c) from t2 where (a2 >= 'b') and (b = 'a') and (c > 'b111') group by a1,a2,b;
+a1 a2 b min(c) max(c)
+a b a i121 l121
+b b a i221 l221
+c b a i321 l321
+d b a i421 l421
+select a1,a2,b,min(c) from t2 where ((a1 > 'a') or (a1 < '9')) and ((a2 >= 'b') and (a2 < 'z')) and (b = 'a') and ((c < 'h112') or (c = 'j121') or (c > 'k121' and c < 'm122') or (c > 'o122')) group by a1,a2,b;
+a1 a2 b min(c)
+b b a k221
+c b a k321
+d b a k421
+select a1,a2,b,min(c) from t2 where ((a1 > 'a') or (a1 < '9')) and ((a2 >= 'b') and (a2 < 'z')) and (b = 'a') and ((c = 'j121') or (c > 'k121' and c < 'm122') or (c > 'o122') or (c < 'h112') or (c = 'c111')) group by a1,a2,b;
+a1 a2 b min(c)
+b b a k221
+c b a k321
+d b a k421
+select a1,a2,b,min(c) from t2 where (a1 > 'a') and (a2 > 'a') and (b = 'c') group by a1,a2,b;
+a1 a2 b min(c)
+explain select a1,a2,b from t1 where (a1 >= 'c' or a2 < 'b') and (b > 'a') group by a1,a2,b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range idx_t1_0,idx_t1_1,idx_t1_2 idx_t1_1 147 NULL 251 Using where; Using index for group-by
+explain select a1,a2,b from t1 where (a2 >= 'b') and (b = 'a') group by a1,a2,b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range NULL idx_t1_1 147 NULL 251 Using where; Using index for group-by
+explain select a1,a2,b,c from t1 where (a2 >= 'b') and (b = 'a') and (c = 'i121') group by a1,a2,b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range NULL idx_t1_1 163 NULL 251 Using where; Using index for group-by
+explain select a1,a2,b,c from t1 where (a2 >= 'b') and (b = 'a') and (c = 'i121' or c = 'i121') group by a1,a2,b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range NULL idx_t1_1 163 NULL 251 Using where; Using index for group-by
+explain select a1,a2,b from t1 where (a1 > 'a') and (a2 > 'a') and (b = 'c') group by a1,a2,b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range idx_t1_0,idx_t1_1,idx_t1_2 idx_t1_1 147 NULL 251 Using where; Using index for group-by
+explain select a1,a2,b from t2 where (a1 >= 'c' or a2 < 'b') and (b > 'a') group by a1,a2,b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 range idx_t2_0,idx_t2_1,idx_t2_2 idx_t2_1 146 NULL # Using where; Using index for group-by
+explain select a1,a2,b from t2 where (a2 >= 'b') and (b = 'a') group by a1,a2,b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 range NULL idx_t2_1 146 NULL # Using where; Using index for group-by
+explain select a1,a2,b,c from t2 where (a2 >= 'b') and (b = 'a') and (c = 'i121') group by a1,a2,b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 range NULL idx_t2_1 163 NULL # Using where; Using index for group-by
+explain select a1,a2,b,c from t2 where (a2 >= 'b') and (b = 'a') and (c = 'i121' or c = 'i121') group by a1,a2,b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 range NULL idx_t2_1 163 NULL # Using where; Using index for group-by
+explain select a1,a2,b from t2 where (a1 > 'a') and (a2 > 'a') and (b = 'c') group by a1,a2,b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 range idx_t2_0,idx_t2_1,idx_t2_2 idx_t2_1 146 NULL # Using where; Using index for group-by
+select a1,a2,b from t1 where (a1 >= 'c' or a2 < 'b') and (b > 'a') group by a1,a2,b;
+a1 a2 b
+a a b
+b a b
+c a b
+c b b
+d a b
+d b b
+select a1,a2,b from t1 where (a2 >= 'b') and (b = 'a') group by a1,a2,b;
+a1 a2 b
+a b a
+b b a
+c b a
+d b a
+select a1,a2,b,c from t1 where (a2 >= 'b') and (b = 'a') and (c = 'i121') group by a1,a2,b;
+a1 a2 b c
+a b a i121
+select a1,a2,b,c from t1 where (a2 >= 'b') and (b = 'a') and (c = 'i121' or c = 'i121') group by a1,a2,b;
+a1 a2 b c
+a b a i121
+select a1,a2,b from t1 where (a1 > 'a') and (a2 > 'a') and (b = 'c') group by a1,a2,b;
+a1 a2 b
+select a1,a2,b from t2 where (a1 >= 'c' or a2 < 'b') and (b > 'a') group by a1,a2,b;
+a1 a2 b
+a a b
+b a b
+c a b
+c b b
+d a b
+d b b
+e a b
+select a1,a2,b from t2 where (a2 >= 'b') and (b = 'a') group by a1,a2,b;
+a1 a2 b
+a b a
+b b a
+c b a
+d b a
+select a1,a2,b,c from t2 where (a2 >= 'b') and (b = 'a') and (c = 'i121') group by a1,a2,b;
+a1 a2 b c
+a b a i121
+select a1,a2,b,c from t2 where (a2 >= 'b') and (b = 'a') and (c = 'i121' or c = 'i121') group by a1,a2,b;
+a1 a2 b c
+a b a i121
+select a1,a2,b from t2 where (a1 > 'a') and (a2 > 'a') and (b = 'c') group by a1,a2,b;
+a1 a2 b
+explain select distinct a1,a2,b from t1;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range NULL idx_t1_1 147 NULL 251 Using index for group-by
+explain select distinct a1,a2,b from t1 where (a2 >= 'b') and (b = 'a');
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range NULL idx_t1_1 147 NULL 251 Using where; Using index for group-by
+explain extended select distinct a1,a2,b,c from t1 where (a2 >= 'b') and (b = 'a') and (c = 'i121');
+id select_type table type possible_keys key key_len ref rows filtered Extra
+1 SIMPLE t1 range NULL idx_t1_1 163 NULL 501 100.00 Using where; Using index for group-by
+Warnings:
+Note 1003 select distinct `test`.`t1`.`a1` AS `a1`,`test`.`t1`.`a2` AS `a2`,`test`.`t1`.`b` AS `b`,`test`.`t1`.`c` AS `c` from `test`.`t1` where `test`.`t1`.`b` = 'a' and `test`.`t1`.`c` = 'i121' and `test`.`t1`.`a2` >= 'b'
+explain select distinct a1,a2,b from t1 where (a1 > 'a') and (a2 > 'a') and (b = 'c');
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range idx_t1_0,idx_t1_1,idx_t1_2 idx_t1_1 147 NULL 251 Using where; Using index for group-by
+explain select distinct b from t1 where (a2 >= 'b') and (b = 'a');
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 index NULL idx_t1_2 147 NULL 1000 Using where; Using index
+explain select distinct a1 from t1 where a1 in ('a', 'd') and a2 = 'b';
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range idx_t1_0,idx_t1_1,idx_t1_2 idx_t1_1 130 NULL 63 Using where; Using index for group-by
+explain select distinct a1 from t1 where a1 in ('a', 'd') and a2 = 'e';
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range idx_t1_0,idx_t1_1,idx_t1_2 idx_t1_1 130 NULL 63 Using where; Using index for group-by
+explain select distinct a1,a2,b from t2;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 range NULL idx_t2_1 146 NULL # Using index for group-by
+explain select distinct a1,a2,b from t2 where (a2 >= 'b') and (b = 'a');
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 range NULL idx_t2_1 146 NULL # Using where; Using index for group-by
+explain extended select distinct a1,a2,b,c from t2 where (a2 >= 'b') and (b = 'a') and (c = 'i121');
+id select_type table type possible_keys key key_len ref rows filtered Extra
+1 SIMPLE t2 range NULL idx_t2_1 163 NULL 501 100.00 Using where; Using index for group-by
+Warnings:
+Note 1003 select distinct `test`.`t2`.`a1` AS `a1`,`test`.`t2`.`a2` AS `a2`,`test`.`t2`.`b` AS `b`,`test`.`t2`.`c` AS `c` from `test`.`t2` where `test`.`t2`.`b` = 'a' and `test`.`t2`.`c` = 'i121' and `test`.`t2`.`a2` >= 'b'
+explain select distinct a1,a2,b from t2 where (a1 > 'a') and (a2 > 'a') and (b = 'c');
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 range idx_t2_0,idx_t2_1,idx_t2_2 idx_t2_1 146 NULL # Using where; Using index for group-by
+explain select distinct b from t2 where (a2 >= 'b') and (b = 'a');
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 index NULL idx_t2_2 146 NULL 1000 Using where; Using index
+explain select distinct a1 from t2 where a1 in ('a', 'd') and a2 = 'b';
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 range idx_t2_0,idx_t2_1,idx_t2_2 idx_t2_1 129 NULL 63 Using where; Using index for group-by
+explain select distinct a1 from t2 where a1 in ('a', 'd') and a2 = 'e';
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 range idx_t2_0,idx_t2_1,idx_t2_2 idx_t2_1 129 NULL 63 Using where; Using index for group-by
+select distinct a1,a2,b from t1;
+a1 a2 b
+a a a
+a a b
+a b a
+a b b
+b a a
+b a b
+b b a
+b b b
+c a a
+c a b
+c b a
+c b b
+d a a
+d a b
+d b a
+d b b
+select distinct a1,a2,b from t1 where (a2 >= 'b') and (b = 'a');
+a1 a2 b
+a b a
+b b a
+c b a
+d b a
+select distinct a1,a2,b,c from t1 where (a2 >= 'b') and (b = 'a') and (c = 'i121');
+a1 a2 b c
+a b a i121
+select distinct a1,a2,b from t1 where (a1 > 'a') and (a2 > 'a') and (b = 'c');
+a1 a2 b
+select distinct b from t1 where (a2 >= 'b') and (b = 'a');
+b
+a
+select distinct a1 from t1 where a1 in ('a', 'd') and a2 = 'b';
+a1
+a
+d
+select distinct a1 from t1 where a1 in ('a', 'd') and a2 = 'e';
+a1
+select distinct a1,a2,b from t2;
+a1 a2 b
+a a NULL
+a a a
+a a b
+a b a
+a b b
+b a a
+b a b
+b b a
+b b b
+c a NULL
+c a a
+c a b
+c b a
+c b b
+d a a
+d a b
+d b a
+d b b
+e a a
+e a b
+select distinct a1,a2,b from t2 where (a2 >= 'b') and (b = 'a');
+a1 a2 b
+a b a
+b b a
+c b a
+d b a
+select distinct a1,a2,b,c from t2 where (a2 >= 'b') and (b = 'a') and (c = 'i121');
+a1 a2 b c
+a b a i121
+select distinct a1,a2,b from t2 where (a1 > 'a') and (a2 > 'a') and (b = 'c');
+a1 a2 b
+select distinct b from t2 where (a2 >= 'b') and (b = 'a');
+b
+a
+select distinct a1 from t2 where a1 in ('a', 'd') and a2 = 'b';
+a1
+a
+d
+select distinct a1 from t2 where a1 in ('a', 'd') and a2 = 'e';
+a1
+select distinct t_00.a1
+from t1 t_00
+where exists ( select * from t2 where a1 = t_00.a1 );
+a1
+a
+b
+c
+d
+select distinct a1,a1 from t1;
+a1 a1
+a a
+b b
+c c
+d d
+select distinct a2,a1,a2,a1 from t1;
+a2 a1 a2 a1
+a a a a
+b a b a
+a b a b
+b b b b
+a c a c
+b c b c
+a d a d
+b d b d
+select distinct t1.a1,t2.a1 from t1,t2;
+a1 a1
+a a
+b a
+c a
+d a
+a b
+b b
+c b
+d b
+a c
+b c
+c c
+d c
+a d
+b d
+c d
+d d
+a e
+b e
+c e
+d e
+explain select distinct a1,a2,b from t1;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range NULL idx_t1_1 147 NULL 251 Using index for group-by
+explain select distinct a1,a2,b from t1 where (a2 >= 'b') and (b = 'a') group by a1,a2,b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range NULL idx_t1_1 147 NULL 251 Using where; Using index for group-by
+explain select distinct a1,a2,b,c from t1 where (a2 >= 'b') and (b = 'a') and (c = 'i121') group by a1,a2,b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range NULL idx_t1_1 163 NULL 251 Using where; Using index for group-by
+explain select distinct a1,a2,b from t1 where (a1 > 'a') and (a2 > 'a') and (b = 'c') group by a1,a2,b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range idx_t1_0,idx_t1_1,idx_t1_2 idx_t1_1 147 NULL 251 Using where; Using index for group-by
+explain select distinct b from t1 where (a2 >= 'b') and (b = 'a') group by a1,a2,b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range NULL idx_t1_1 147 NULL 251 Using where; Using index for group-by; Using temporary; Using filesort
+explain select distinct a1 from t1 where a1 in ('a', 'd') and a2 = 'b' group by a1;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range idx_t1_0,idx_t1_1,idx_t1_2 idx_t1_1 130 NULL 63 Using where; Using index for group-by
+explain select distinct a1 from t1 where a1 in ('a', 'd') and a2 = 'e' group by a1;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range idx_t1_0,idx_t1_1,idx_t1_2 idx_t1_1 130 NULL 63 Using where; Using index for group-by
+explain select distinct a1,a2,b from t2;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 range NULL idx_t2_1 146 NULL # Using index for group-by
+explain select distinct a1,a2,b from t2 where (a2 >= 'b') and (b = 'a') group by a1,a2,b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 range NULL idx_t2_1 146 NULL # Using where; Using index for group-by
+explain select distinct a1,a2,b,c from t2 where (a2 >= 'b') and (b = 'a') and (c = 'i121') group by a1,a2,b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 range NULL idx_t2_1 163 NULL # Using where; Using index for group-by
+explain select distinct a1,a2,b from t2 where (a1 > 'a') and (a2 > 'a') and (b = 'c') group by a1,a2,b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 range idx_t2_0,idx_t2_1,idx_t2_2 idx_t2_1 146 NULL # Using where; Using index for group-by
+explain select distinct b from t2 where (a2 >= 'b') and (b = 'a') group by a1,a2,b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 range NULL idx_t2_1 146 NULL # Using where; Using index for group-by; Using temporary; Using filesort
+explain select distinct a1 from t2 where a1 in ('a', 'd') and a2 = 'b' group by a1;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 range idx_t2_0,idx_t2_1,idx_t2_2 idx_t2_1 129 NULL # Using where; Using index for group-by
+explain select distinct a1 from t2 where a1 in ('a', 'd') and a2 = 'e' group by a1;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 range idx_t2_0,idx_t2_1,idx_t2_2 idx_t2_1 129 NULL # Using where; Using index for group-by
+select distinct a1,a2,b from t1;
+a1 a2 b
+a a a
+a a b
+a b a
+a b b
+b a a
+b a b
+b b a
+b b b
+c a a
+c a b
+c b a
+c b b
+d a a
+d a b
+d b a
+d b b
+select distinct a1,a2,b from t1 where (a2 >= 'b') and (b = 'a') group by a1,a2,b;
+a1 a2 b
+a b a
+b b a
+c b a
+d b a
+select distinct a1,a2,b,c from t1 where (a2 >= 'b') and (b = 'a') and (c = 'i121') group by a1,a2,b;
+a1 a2 b c
+a b a i121
+select distinct a1,a2,b from t1 where (a1 > 'a') and (a2 > 'a') and (b = 'c') group by a1,a2,b;
+a1 a2 b
+select distinct b from t1 where (a2 >= 'b') and (b = 'a') group by a1,a2,b;
+b
+a
+select distinct a1 from t1 where a1 in ('a', 'd') and a2 = 'b' group by a1;
+a1
+a
+d
+select distinct a1 from t1 where a1 in ('a', 'd') and a2 = 'e' group by a1;
+a1
+select distinct a1,a2,b from t2;
+a1 a2 b
+a a NULL
+a a a
+a a b
+a b a
+a b b
+b a a
+b a b
+b b a
+b b b
+c a NULL
+c a a
+c a b
+c b a
+c b b
+d a a
+d a b
+d b a
+d b b
+e a a
+e a b
+select distinct a1,a2,b from t2 where (a2 >= 'b') and (b = 'a') group by a1,a2,b;
+a1 a2 b
+a b a
+b b a
+c b a
+d b a
+select distinct a1,a2,b,c from t2 where (a2 >= 'b') and (b = 'a') and (c = 'i121') group by a1,a2,b;
+a1 a2 b c
+a b a i121
+select distinct a1,a2,b from t2 where (a1 > 'a') and (a2 > 'a') and (b = 'c') group by a1,a2,b;
+a1 a2 b
+select distinct b from t2 where (a2 >= 'b') and (b = 'a') group by a1,a2,b;
+b
+a
+select distinct a1 from t2 where a1 in ('a', 'd') and a2 = 'b' group by a1;
+a1
+a
+d
+select distinct a1 from t2 where a1 in ('a', 'd') and a2 = 'e' group by a1;
+a1
+explain select count(distinct a1,a2,b) from t1 where (a2 >= 'b') and (b = 'a');
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range NULL idx_t1_1 147 NULL 251 Using where; Using index for group-by
+explain select count(distinct a1,a2,b,c) from t1 where (a2 >= 'b') and (b = 'a') and (c = 'i121');
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range NULL idx_t1_1 163 NULL 501 Using where; Using index for group-by (scanning)
+explain extended select count(distinct a1,a2,b) from t1 where (a1 > 'a') and (a2 > 'a') and (b = 'c');
+id select_type table type possible_keys key key_len ref rows filtered Extra
+1 SIMPLE t1 range idx_t1_0,idx_t1_1,idx_t1_2 idx_t1_1 147 NULL 251 100.00 Using where; Using index for group-by
+Warnings:
+Note 1003 select count(distinct `test`.`t1`.`a1`,`test`.`t1`.`a2`,`test`.`t1`.`b`) AS `count(distinct a1,a2,b)` from `test`.`t1` where `test`.`t1`.`b` = 'c' and `test`.`t1`.`a1` > 'a' and `test`.`t1`.`a2` > 'a'
+explain select count(distinct b) from t1 where (a2 >= 'b') and (b = 'a');
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 index NULL idx_t1_2 147 NULL 1000 Using where; Using index
+explain extended select 98 + count(distinct a1,a2,b) from t1 where (a1 > 'a') and (a2 > 'a');
+id select_type table type possible_keys key key_len ref rows filtered Extra
+1 SIMPLE t1 range idx_t1_0,idx_t1_1,idx_t1_2 idx_t1_1 147 NULL 251 100.00 Using where; Using index for group-by
+Warnings:
+Note 1003 select 98 + count(distinct `test`.`t1`.`a1`,`test`.`t1`.`a2`,`test`.`t1`.`b`) AS `98 + count(distinct a1,a2,b)` from `test`.`t1` where `test`.`t1`.`a1` > 'a' and `test`.`t1`.`a2` > 'a'
+select count(distinct a1,a2,b) from t1 where (a2 >= 'b') and (b = 'a');
+count(distinct a1,a2,b)
+4
+select count(distinct a1,a2,b,c) from t1 where (a2 >= 'b') and (b = 'a') and (c = 'i121');
+count(distinct a1,a2,b,c)
+1
+select count(distinct a1,a2,b) from t1 where (a1 > 'a') and (a2 > 'a') and (b = 'c');
+count(distinct a1,a2,b)
+0
+select count(distinct b) from t1 where (a2 >= 'b') and (b = 'a');
+count(distinct b)
+1
+select 98 + count(distinct a1,a2,b) from t1 where (a1 > 'a') and (a2 > 'a');
+98 + count(distinct a1,a2,b)
+104
+explain select a1,a2,b, concat(min(c), max(c)) from t1 where a1 < 'd' group by a1,a2,b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range idx_t1_0,idx_t1_1,idx_t1_2 idx_t1_1 147 NULL 251 Using where; Using index for group-by
+explain select concat(a1,min(c)),b from t1 where a1 < 'd' group by a1,a2,b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range idx_t1_0,idx_t1_1,idx_t1_2 idx_t1_1 147 NULL 251 Using where; Using index for group-by
+explain select concat(a1,min(c)),b,max(c) from t1 where a1 < 'd' group by a1,a2,b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range idx_t1_0,idx_t1_1,idx_t1_2 idx_t1_1 147 NULL 251 Using where; Using index for group-by
+explain select concat(a1,a2),b,min(c),max(c) from t1 where a1 < 'd' group by a1,a2,b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range idx_t1_0,idx_t1_1,idx_t1_2 idx_t1_1 147 NULL 251 Using where; Using index for group-by
+explain select concat(ord(min(b)),ord(max(b))),min(b),max(b) from t1 group by a1,a2;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range NULL idx_t1_1 147 NULL 126 Using index for group-by
+select a1,a2,b, concat(min(c), max(c)) from t1 where a1 < 'd' group by a1,a2,b;
+a1 a2 b concat(min(c), max(c))
+a a a a111d111
+a a b e112h112
+a b a i121l121
+a b b m122p122
+b a a a211d211
+b a b e212h212
+b b a i221l221
+b b b m222p222
+c a a a311d311
+c a b e312h312
+c b a i321l321
+c b b m322p322
+select concat(a1,min(c)),b from t1 where a1 < 'd' group by a1,a2,b;
+concat(a1,min(c)) b
+aa111 a
+ae112 b
+ai121 a
+am122 b
+ba211 a
+be212 b
+bi221 a
+bm222 b
+ca311 a
+ce312 b
+ci321 a
+cm322 b
+select concat(a1,min(c)),b,max(c) from t1 where a1 < 'd' group by a1,a2,b;
+concat(a1,min(c)) b max(c)
+aa111 a d111
+ae112 b h112
+ai121 a l121
+am122 b p122
+ba211 a d211
+be212 b h212
+bi221 a l221
+bm222 b p222
+ca311 a d311
+ce312 b h312
+ci321 a l321
+cm322 b p322
+select concat(a1,a2),b,min(c),max(c) from t1 where a1 < 'd' group by a1,a2,b;
+concat(a1,a2) b min(c) max(c)
+aa a a111 d111
+aa b e112 h112
+ab a i121 l121
+ab b m122 p122
+ba a a211 d211
+ba b e212 h212
+bb a i221 l221
+bb b m222 p222
+ca a a311 d311
+ca b e312 h312
+cb a i321 l321
+cb b m322 p322
+select concat(ord(min(b)),ord(max(b))),min(b),max(b) from t1 group by a1,a2;
+concat(ord(min(b)),ord(max(b))) min(b) max(b)
+9798 a b
+9798 a b
+9798 a b
+9798 a b
+9798 a b
+9798 a b
+9798 a b
+9798 a b
+explain select a1,a2,b,d,min(c),max(c) from t1 group by a1,a2,b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 index NULL idx_t1_2 147 NULL 1000
+explain select a1,a2,b,d from t1 group by a1,a2,b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 index NULL idx_t1_2 147 NULL 1000
+explain extended select a1,a2,min(b),max(b) from t1
+where (a1 = 'b' or a1 = 'd' or a1 = 'a' or a1 = 'c') and (a2 > 'a') and (c > 'a111') group by a1,a2;
+id select_type table type possible_keys key key_len ref rows filtered Extra
+1 SIMPLE t1 range idx_t1_0,idx_t1_1,idx_t1_2 idx_t1_1 130 NULL 1000 100.00 Using where; Using index
+Warnings:
+Note 1003 select `test`.`t1`.`a1` AS `a1`,`test`.`t1`.`a2` AS `a2`,min(`test`.`t1`.`b`) AS `min(b)`,max(`test`.`t1`.`b`) AS `max(b)` from `test`.`t1` where (`test`.`t1`.`a1` = 'b' or `test`.`t1`.`a1` = 'd' or `test`.`t1`.`a1` = 'a' or `test`.`t1`.`a1` = 'c') and `test`.`t1`.`a2` > 'a' and `test`.`t1`.`c` > 'a111' group by `test`.`t1`.`a1`,`test`.`t1`.`a2`
+explain extended select a1,a2,b,min(c),max(c) from t1
+where (a1 = 'b' or a1 = 'd' or a1 = 'a' or a1 = 'c') and (a2 > 'a') and (d > 'xy2') group by a1,a2,b;
+id select_type table type possible_keys key key_len ref rows filtered Extra
+1 SIMPLE t1 range idx_t1_0,idx_t1_1,idx_t1_2 idx_t1_2 130 NULL 1000 100.00 Using where
+Warnings:
+Note 1003 select `test`.`t1`.`a1` AS `a1`,`test`.`t1`.`a2` AS `a2`,`test`.`t1`.`b` AS `b`,min(`test`.`t1`.`c`) AS `min(c)`,max(`test`.`t1`.`c`) AS `max(c)` from `test`.`t1` where (`test`.`t1`.`a1` = 'b' or `test`.`t1`.`a1` = 'd' or `test`.`t1`.`a1` = 'a' or `test`.`t1`.`a1` = 'c') and `test`.`t1`.`a2` > 'a' and `test`.`t1`.`d` > 'xy2' group by `test`.`t1`.`a1`,`test`.`t1`.`a2`,`test`.`t1`.`b`
+explain extended select a1,a2,b,c from t1
+where (a1 = 'b' or a1 = 'd' or a1 = 'a' or a1 = 'c') and (a2 > 'a') and (d > 'xy2') group by a1,a2,b,c;
+id select_type table type possible_keys key key_len ref rows filtered Extra
+1 SIMPLE t1 range idx_t1_0,idx_t1_1,idx_t1_2 idx_t1_1 130 NULL 1000 100.00 Using where
+Warnings:
+Note 1003 select `test`.`t1`.`a1` AS `a1`,`test`.`t1`.`a2` AS `a2`,`test`.`t1`.`b` AS `b`,`test`.`t1`.`c` AS `c` from `test`.`t1` where (`test`.`t1`.`a1` = 'b' or `test`.`t1`.`a1` = 'd' or `test`.`t1`.`a1` = 'a' or `test`.`t1`.`a1` = 'c') and `test`.`t1`.`a2` > 'a' and `test`.`t1`.`d` > 'xy2' group by `test`.`t1`.`a1`,`test`.`t1`.`a2`,`test`.`t1`.`b`,`test`.`t1`.`c`
+explain select a1,a2,b,max(c),min(c) from t2 where (a2 = 'a') and (b = 'b') or (b < 'b') group by a1;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 index NULL idx_t2_1 163 NULL 1000 Using where; Using index
+explain select a1,a2,b,max(c),min(c) from t2 where (a2 = 'a') and (b < 'b') group by a1;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 index NULL idx_t2_1 163 NULL 1000 Using where; Using index
+explain select a1,a2,b,max(c),min(c) from t2 where (a2 = 'a') and (b <= 'b') group by a1;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 index NULL idx_t2_1 163 NULL 1000 Using where; Using index
+explain select a1,a2,b,max(c),min(c) from t2 where (a2 = 'a') and (b <= 'b' and b >= 'a') group by a1;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 index NULL idx_t2_1 163 NULL 1000 Using where; Using index
+explain extended select a1,a2,b from t1 where (a1 = 'b' or a1 = 'd' or a1 = 'a' or a1 = 'c') and (a2 > 'a') and (c > 'a111') group by a1,a2,b;
+id select_type table type possible_keys key key_len ref rows filtered Extra
+1 SIMPLE t1 range idx_t1_0,idx_t1_1,idx_t1_2 idx_t1_1 130 NULL 1000 100.00 Using where; Using index
+Warnings:
+Note 1003 select `test`.`t1`.`a1` AS `a1`,`test`.`t1`.`a2` AS `a2`,`test`.`t1`.`b` AS `b` from `test`.`t1` where (`test`.`t1`.`a1` = 'b' or `test`.`t1`.`a1` = 'd' or `test`.`t1`.`a1` = 'a' or `test`.`t1`.`a1` = 'c') and `test`.`t1`.`a2` > 'a' and `test`.`t1`.`c` > 'a111' group by `test`.`t1`.`a1`,`test`.`t1`.`a2`,`test`.`t1`.`b`
+explain select a1,a2,min(b),c from t2 where (a2 = 'a') and (c = 'a111') group by a1;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 index NULL idx_t2_1 163 NULL 1000 Using where; Using index
+select a1,a2,min(b),c from t2 where (a2 = 'a') and (c = 'a111') group by a1;
+a1 a2 min(b) c
+a a a a111
+explain select a1,a2,b,max(c),min(c) from t2 where (a2 = 'a') and (b = 'b') or (b = 'a') group by a1;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 index NULL idx_t2_1 163 NULL 1000 Using where; Using index
+explain select a1,a2,b,min(c),max(c) from t2
+where (c > 'a000') and (c <= 'd999') and (c like '_8__') group by a1,a2,b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 index NULL idx_t2_1 163 NULL 1000 Using where; Using index
+explain select a1, a2, b, c, min(d), max(d) from t1 group by a1,a2,b,c;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 index NULL idx_t1_1 163 NULL 1000
+explain select a1,a2,count(a2) from t1 group by a1,a2,b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 index NULL idx_t1_2 147 NULL 1000 Using index
+explain extended select a1,a2,count(a2) from t1 where (a1 > 'a') group by a1,a2,b;
+id select_type table type possible_keys key key_len ref rows filtered Extra
+1 SIMPLE t1 range idx_t1_0,idx_t1_1,idx_t1_2 idx_t1_2 65 NULL 1000 100.00 Using where; Using index
+Warnings:
+Note 1003 select `test`.`t1`.`a1` AS `a1`,`test`.`t1`.`a2` AS `a2`,count(`test`.`t1`.`a2`) AS `count(a2)` from `test`.`t1` where `test`.`t1`.`a1` > 'a' group by `test`.`t1`.`a1`,`test`.`t1`.`a2`,`test`.`t1`.`b`
+explain extended select sum(ord(a1)) from t1 where (a1 > 'a') group by a1,a2,b;
+id select_type table type possible_keys key key_len ref rows filtered Extra
+1 SIMPLE t1 range idx_t1_0,idx_t1_1,idx_t1_2 idx_t1_2 65 NULL 1000 100.00 Using where; Using index
+Warnings:
+Note 1003 select sum(ord(`test`.`t1`.`a1`)) AS `sum(ord(a1))` from `test`.`t1` where `test`.`t1`.`a1` > 'a' group by `test`.`t1`.`a1`,`test`.`t1`.`a2`,`test`.`t1`.`b`
+explain select a1,a2,b,max(c),min(c) from t2 where (a2 = 'a') and (b = 'a' or b = 'b') group by a1;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 index NULL idx_t2_1 163 NULL 1000 Using where; Using index
+explain select a1,a2,b,max(c),min(c) from t2 where (a2 = 'a') and (b = 'a' or b = 'b') group by a1;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 index NULL idx_t2_1 163 NULL 1000 Using where; Using index
+explain select distinct(a1) from t1 where ord(a2) = 98;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 index NULL idx_t1_2 147 NULL 1000 Using where; Using index
+select distinct(a1) from t1 where ord(a2) = 98;
+a1
+a
+b
+c
+d
+explain select a1 from t1 where a2 = 'b' group by a1;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range NULL idx_t1_1 130 NULL 63 Using where; Using index for group-by
+select a1 from t1 where a2 = 'b' group by a1;
+a1
+a
+b
+c
+d
+explain select distinct a1 from t1 where a2 = 'b';
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range NULL idx_t1_1 130 NULL 63 Using where; Using index for group-by
+select distinct a1 from t1 where a2 = 'b';
+a1
+a
+b
+c
+d
+drop table t1,t2,t3;
+create table t1 (c1 int not null,c2 int not null, primary key(c1,c2)) engine=RocksDB;
+insert into t1 (c1,c2) values
+(10,1),(10,2),(10,3),(20,4),(20,5),(20,6),(30,7),(30,8),(30,9);
+select distinct c1, c2 from t1 order by c2;
+c1 c2
+10 1
+10 2
+10 3
+20 4
+20 5
+20 6
+30 7
+30 8
+30 9
+select c1,min(c2) as c2 from t1 group by c1 order by c2;
+c1 c2
+10 1
+20 4
+30 7
+select c1,c2 from t1 group by c1,c2 order by c2;
+c1 c2
+10 1
+10 2
+10 3
+20 4
+20 5
+20 6
+30 7
+30 8
+30 9
+drop table t1;
+CREATE TABLE t1 (a varchar(5), b int(11), PRIMARY KEY (a,b)) engine=RocksDB;
+INSERT INTO t1 VALUES ('AA',1), ('AA',2), ('AA',3), ('BB',1), ('AA',4);
+OPTIMIZE TABLE t1;
+Table Op Msg_type Msg_text
+test.t1 optimize status OK
+SELECT a FROM t1 WHERE a='AA' GROUP BY a;
+a
+AA
+SELECT a FROM t1 WHERE a='BB' GROUP BY a;
+a
+BB
+EXPLAIN SELECT a FROM t1 WHERE a='AA' GROUP BY a;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 ref PRIMARY PRIMARY 7 const 1000 Using where; Using index
+EXPLAIN SELECT a FROM t1 WHERE a='BB' GROUP BY a;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 ref PRIMARY PRIMARY 7 const 1000 Using where; Using index
+SELECT DISTINCT a FROM t1 WHERE a='BB';
+a
+BB
+SELECT DISTINCT a FROM t1 WHERE a LIKE 'B%';
+a
+BB
+SELECT a FROM t1 WHERE a LIKE 'B%' GROUP BY a;
+a
+BB
+DROP TABLE t1;
+CREATE TABLE t1 (
+a int(11) NOT NULL DEFAULT '0',
+b varchar(16) COLLATE latin1_general_ci NOT NULL DEFAULT '',
+PRIMARY KEY (a,b)
+) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_general_ci;
+CREATE PROCEDURE a(x INT)
+BEGIN
+DECLARE rnd INT;
+DECLARE cnt INT;
+WHILE x > 0 DO
+SET rnd= x % 100;
+SET cnt = (SELECT COUNT(*) FROM t1 WHERE a = rnd);
+INSERT INTO t1(a,b) VALUES (rnd, CAST(cnt AS CHAR));
+SET x= x - 1;
+END WHILE;
+END|
+CALL a(1000);
+SELECT a FROM t1 WHERE a=0;
+a
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+SELECT DISTINCT a FROM t1 WHERE a=0;
+a
+0
+SELECT COUNT(DISTINCT a) FROM t1 WHERE a=0;
+COUNT(DISTINCT a)
+1
+DROP TABLE t1;
+DROP PROCEDURE a;
+CREATE TABLE t1 (a varchar(64) NOT NULL default '', PRIMARY KEY(a)) engine=RocksDB;
+INSERT INTO t1 (a) VALUES
+(''), ('CENTRAL'), ('EASTERN'), ('GREATER LONDON'),
+('NORTH CENTRAL'), ('NORTH EAST'), ('NORTH WEST'), ('SCOTLAND'),
+('SOUTH EAST'), ('SOUTH WEST'), ('WESTERN');
+EXPLAIN SELECT DISTINCT a,a FROM t1 ORDER BY a;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 index NULL PRIMARY 66 NULL 1000 Using index
+SELECT DISTINCT a,a FROM t1 ORDER BY a;
+a a
+
+CENTRAL CENTRAL
+EASTERN EASTERN
+GREATER LONDON GREATER LONDON
+NORTH CENTRAL NORTH CENTRAL
+NORTH EAST NORTH EAST
+NORTH WEST NORTH WEST
+SCOTLAND SCOTLAND
+SOUTH EAST SOUTH EAST
+SOUTH WEST SOUTH WEST
+WESTERN WESTERN
+DROP TABLE t1;
+CREATE TABLE t1 (id1 INT, id2 INT) engine=RocksDB;
+CREATE TABLE t2 (id2 INT, id3 INT, id5 INT) engine=RocksDB;
+CREATE TABLE t3 (id3 INT, id4 INT) engine=RocksDB;
+CREATE TABLE t4 (id4 INT) engine=RocksDB;
+CREATE TABLE t5 (id5 INT, id6 INT) engine=RocksDB;
+CREATE TABLE t6 (id6 INT) engine=RocksDB;
+INSERT INTO t1 VALUES(1,1);
+INSERT INTO t2 VALUES(1,1,1);
+INSERT INTO t3 VALUES(1,1);
+INSERT INTO t4 VALUES(1);
+INSERT INTO t5 VALUES(1,1);
+INSERT INTO t6 VALUES(1);
+SELECT * FROM
+t1
+NATURAL JOIN
+(t2 JOIN (t3 NATURAL JOIN t4, t5 NATURAL JOIN t6)
+ON (t3.id3 = t2.id3 AND t5.id5 = t2.id5));
+id2 id1 id3 id5 id4 id3 id6 id5
+1 1 1 1 1 1 1 1
+SELECT * FROM
+t1
+NATURAL JOIN
+(((t3 NATURAL JOIN t4) join (t5 NATURAL JOIN t6) on t3.id4 = t5.id5) JOIN t2
+ON (t3.id3 = t2.id3 AND t5.id5 = t2.id5));
+id2 id1 id4 id3 id6 id5 id3 id5
+1 1 1 1 1 1 1 1
+SELECT * FROM t1 NATURAL JOIN ((t3 join (t5 NATURAL JOIN t6)) JOIN t2);
+id2 id1 id3 id4 id6 id5 id3 id5
+1 1 1 1 1 1 1 1
+SELECT * FROM
+(t2 JOIN (t3 NATURAL JOIN t4, t5 NATURAL JOIN t6)
+ON (t3.id3 = t2.id3 AND t5.id5 = t2.id5))
+NATURAL JOIN
+t1;
+id2 id3 id5 id4 id3 id6 id5 id1
+1 1 1 1 1 1 1 1
+SELECT * FROM
+(t2 JOIN ((t3 NATURAL JOIN t4) join (t5 NATURAL JOIN t6)))
+NATURAL JOIN
+t1;
+id2 id3 id5 id4 id3 id6 id5 id1
+1 1 1 1 1 1 1 1
+DROP TABLE t1,t2,t3,t4,t5,t6;
+CREATE TABLE t1 (a int, b int, PRIMARY KEY (a,b), KEY b (b)) engine=RocksDB;
+INSERT INTO t1 VALUES (1,1),(1,2),(1,0),(1,3);
+ANALYZE TABLE t1;
+Table Op Msg_type Msg_text
+test.t1 analyze status OK
+explain SELECT MAX(b), a FROM t1 WHERE b < 2 AND a = 1 GROUP BY a;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range PRIMARY,b PRIMARY 8 NULL 501 Using where; Using index for group-by
+SELECT MAX(b), a FROM t1 WHERE b < 2 AND a = 1 GROUP BY a;
+MAX(b) a
+1 1
+SELECT MIN(b), a FROM t1 WHERE b > 1 AND a = 1 GROUP BY a;
+MIN(b) a
+2 1
+CREATE TABLE t2 (a int, b int, c int, PRIMARY KEY (a,b,c)) engine=RocksDB;
+INSERT INTO t2 SELECT a,b,b FROM t1;
+ANALYZE TABLE t2;
+Table Op Msg_type Msg_text
+test.t2 analyze status OK
+explain SELECT MIN(c) FROM t2 WHERE b = 2 and a = 1 and c > 1 GROUP BY a;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 range PRIMARY PRIMARY 12 NULL 251 Using where; Using index for group-by
+SELECT MIN(c) FROM t2 WHERE b = 2 and a = 1 and c > 1 GROUP BY a;
+MIN(c)
+2
+DROP TABLE t1,t2;
+CREATE TABLE t1 (a INT, b INT, INDEX (a,b)) engine=RocksDB;
+INSERT INTO t1 (a, b) VALUES (1,1), (1,2), (1,3), (1,4), (1,5),
+(2,2), (2,3), (2,1), (3,1), (4,1), (4,2), (4,3), (4,4), (4,5), (4,6);
+ANALYZE TABLE t1;
+Table Op Msg_type Msg_text
+test.t1 analyze status OK
+EXPLAIN SELECT max(b), a FROM t1 GROUP BY a;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range NULL a 5 NULL 251 Using index for group-by
+FLUSH STATUS;
+SELECT max(b), a FROM t1 GROUP BY a;
+max(b) a
+5 1
+3 2
+1 3
+6 4
+SHOW STATUS LIKE 'handler_read__e%';
+Variable_name Value
+Handler_read_key 8
+Handler_read_next 0
+Handler_read_retry 0
+EXPLAIN SELECT max(b), a FROM t1 GROUP BY a;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range NULL a 5 NULL 251 Using index for group-by
+FLUSH STATUS;
+CREATE TABLE t2 engine=RocksDB SELECT max(b), a FROM t1 GROUP BY a;
+SHOW STATUS LIKE 'handler_read__e%';
+Variable_name Value
+Handler_read_key 8
+Handler_read_next 0
+Handler_read_retry 0
+FLUSH STATUS;
+SELECT * FROM (SELECT max(b), a FROM t1 GROUP BY a) b;
+max(b) a
+5 1
+3 2
+1 3
+6 4
+SHOW STATUS LIKE 'handler_read__e%';
+Variable_name Value
+Handler_read_key 8
+Handler_read_next 0
+Handler_read_retry 0
+FLUSH STATUS;
+(SELECT max(b), a FROM t1 GROUP BY a) UNION
+(SELECT max(b), a FROM t1 GROUP BY a);
+max(b) a
+5 1
+3 2
+1 3
+6 4
+SHOW STATUS LIKE 'handler_read__e%';
+Variable_name Value
+Handler_read_key 16
+Handler_read_next 0
+Handler_read_retry 0
+EXPLAIN (SELECT max(b), a FROM t1 GROUP BY a) UNION
+(SELECT max(b), a FROM t1 GROUP BY a);
+id select_type table type possible_keys key key_len ref rows Extra
+1 PRIMARY t1 range NULL a 5 NULL 251 Using index for group-by
+2 UNION t1 range NULL a 5 NULL 251 Using index for group-by
+NULL UNION RESULT <union1,2> ALL NULL NULL NULL NULL NULL
+EXPLAIN SELECT (SELECT max(b) FROM t1 GROUP BY a HAVING a < 2) x
+FROM t1 AS t1_outer;
+id select_type table type possible_keys key key_len ref rows Extra
+1 PRIMARY t1_outer index NULL a 10 NULL 1000 Using index
+2 SUBQUERY t1 range NULL a 5 NULL 251 Using index for group-by
+EXPLAIN SELECT 1 FROM t1 AS t1_outer WHERE EXISTS
+(SELECT max(b) FROM t1 GROUP BY a HAVING a < 2);
+id select_type table type possible_keys key key_len ref rows Extra
+1 PRIMARY t1_outer index NULL a 10 NULL 1000 Using index
+2 SUBQUERY t1 index NULL a 10 NULL 1000 Using index
+EXPLAIN SELECT 1 FROM t1 AS t1_outer WHERE
+(SELECT max(b) FROM t1 GROUP BY a HAVING a < 2) > 12;
+id select_type table type possible_keys key key_len ref rows Extra
+1 PRIMARY t1_outer index NULL a 10 NULL 1000 Using index
+2 SUBQUERY t1 range NULL a 5 NULL 251 Using index for group-by
+EXPLAIN SELECT 1 FROM t1 AS t1_outer WHERE
+a IN (SELECT max(b) FROM t1 GROUP BY a HAVING a < 2);
+id select_type table type possible_keys key key_len ref rows Extra
+1 PRIMARY <subquery2> ALL distinct_key NULL NULL NULL 251
+1 PRIMARY t1_outer ref a a 5 <subquery2>.max(b) 4 Using index
+2 MATERIALIZED t1 range NULL a 5 NULL 251 Using index for group-by
+EXPLAIN SELECT 1 FROM t1 AS t1_outer GROUP BY a HAVING
+a > (SELECT max(b) FROM t1 GROUP BY a HAVING a < 2);
+id select_type table type possible_keys key key_len ref rows Extra
+1 PRIMARY t1_outer range NULL a 5 NULL 251 Using index for group-by
+2 SUBQUERY t1 range NULL a 5 NULL 251 Using index for group-by
+EXPLAIN SELECT 1 FROM t1 AS t1_outer1 JOIN t1 AS t1_outer2
+ON t1_outer1.a = (SELECT max(b) FROM t1 GROUP BY a HAVING a < 2)
+AND t1_outer1.b = t1_outer2.b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 PRIMARY t1_outer1 ref a a 5 const 4 Using where; Using index
+1 PRIMARY t1_outer2 index NULL a 10 NULL 1000 Using where; Using index; Using join buffer (flat, BNL join)
+2 SUBQUERY t1 range NULL a 5 NULL 251 Using index for group-by
+EXPLAIN SELECT (SELECT (SELECT max(b) FROM t1 GROUP BY a HAVING a < 2) x
+FROM t1 AS t1_outer) x2 FROM t1 AS t1_outer2;
+id select_type table type possible_keys key key_len ref rows Extra
+1 PRIMARY t1_outer2 index NULL a 10 NULL 1000 Using index
+2 SUBQUERY t1_outer index NULL a 10 NULL 1000 Using index
+3 SUBQUERY t1 range NULL a 5 NULL 251 Using index for group-by
+CREATE TABLE t3 LIKE t1;
+FLUSH STATUS;
+INSERT INTO t3 SELECT a,MAX(b) FROM t1 GROUP BY a;
+SHOW STATUS LIKE 'handler_read__e%';
+Variable_name Value
+Handler_read_key 8
+Handler_read_next 0
+Handler_read_retry 0
+DELETE FROM t3;
+FLUSH STATUS;
+INSERT INTO t3 SELECT 1, (SELECT MAX(b) FROM t1 GROUP BY a HAVING a < 2)
+FROM t1 LIMIT 1;
+SHOW STATUS LIKE 'handler_read__e%';
+Variable_name Value
+Handler_read_key 8
+Handler_read_next 0
+Handler_read_retry 0
+FLUSH STATUS;
+DELETE FROM t3 WHERE (SELECT MAX(b) FROM t1 GROUP BY a HAVING a < 2) > 10000;
+SHOW STATUS LIKE 'handler_read__e%';
+Variable_name Value
+Handler_read_key 8
+Handler_read_next 0
+Handler_read_retry 0
+FLUSH STATUS;
+DELETE FROM t3 WHERE (SELECT (SELECT MAX(b) FROM t1 GROUP BY a HAVING a < 2) x
+FROM t1) > 10000;
+ERROR 21000: Subquery returns more than 1 row
+SHOW STATUS LIKE 'handler_read__e%';
+Variable_name Value
+Handler_read_key 8
+Handler_read_next 1
+Handler_read_retry 0
+DROP TABLE t1,t2,t3;
+CREATE TABLE t1 (a int, INDEX idx(a)) engine=RocksDB;
+INSERT INTO t1 VALUES
+(4), (2), (1), (2), (4), (2), (1), (4),
+(4), (2), (1), (2), (2), (4), (1), (4);
+ANALYZE TABLE t1;
+Table Op Msg_type Msg_text
+test.t1 analyze status OK
+EXPLAIN SELECT DISTINCT(a) FROM t1;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range NULL idx 5 NULL 501 Using index for group-by
+SELECT DISTINCT(a) FROM t1;
+a
+1
+2
+4
+EXPLAIN SELECT SQL_BIG_RESULT DISTINCT(a) FROM t1;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range NULL idx 5 NULL 501 Using index for group-by
+SELECT SQL_BIG_RESULT DISTINCT(a) FROM t1;
+a
+1
+2
+4
+DROP TABLE t1;
+CREATE TABLE t1 (a INT, b INT) engine=RocksDB;
+INSERT INTO t1 (a, b) VALUES (1,1), (1,2), (1,3);
+INSERT INTO t1 SELECT a + 1, b FROM t1;
+INSERT INTO t1 SELECT a + 2, b FROM t1;
+ANALYZE TABLE t1;
+Table Op Msg_type Msg_text
+test.t1 analyze status OK
+EXPLAIN
+SELECT a, MIN(b), MAX(b) FROM t1 GROUP BY a ORDER BY a DESC;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 ALL NULL NULL NULL NULL 1000 Using temporary; Using filesort
+SELECT a, MIN(b), MAX(b) FROM t1 GROUP BY a ORDER BY a DESC;
+a MIN(b) MAX(b)
+4 1 3
+3 1 3
+2 1 3
+1 1 3
+CREATE INDEX break_it ON t1 (a, b);
+EXPLAIN
+SELECT a, MIN(b), MAX(b) FROM t1 GROUP BY a ORDER BY a;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range NULL break_it 10 NULL 251 Using index for group-by
+SELECT a, MIN(b), MAX(b) FROM t1 GROUP BY a ORDER BY a;
+a MIN(b) MAX(b)
+1 1 3
+2 1 3
+3 1 3
+4 1 3
+EXPLAIN
+SELECT a, MIN(b), MAX(b) FROM t1 GROUP BY a ORDER BY a DESC;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range NULL break_it 10 NULL 251 Using index for group-by; Using temporary; Using filesort
+SELECT a, MIN(b), MAX(b) FROM t1 GROUP BY a ORDER BY a DESC;
+a MIN(b) MAX(b)
+4 1 3
+3 1 3
+2 1 3
+1 1 3
+EXPLAIN
+SELECT a, MIN(b), MAX(b), AVG(b) FROM t1 GROUP BY a ORDER BY a DESC;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 index NULL break_it 10 NULL 1000 Using index
+SELECT a, MIN(b), MAX(b), AVG(b) FROM t1 GROUP BY a ORDER BY a DESC;
+a MIN(b) MAX(b) AVG(b)
+4 1 3 2.0000
+3 1 3 2.0000
+2 1 3 2.0000
+1 1 3 2.0000
+DROP TABLE t1;
+create table t1 (a int, b int, primary key (a,b), key `index` (a,b)) engine=MyISAM;
+insert into t1 (a,b) values
+(0,0),(0,1),(0,2),(0,3),(0,4),(0,5),(0,6),
+(0,7),(0,8),(0,9),(0,10),(0,11),(0,12),(0,13),
+(1,0),(1,1),(1,2),(1,3),(1,4),(1,5),(1,6),
+(1,7),(1,8),(1,9),(1,10),(1,11),(1,12),(1,13),
+(2,0),(2,1),(2,2),(2,3),(2,4),(2,5),(2,6),
+(2,7),(2,8),(2,9),(2,10),(2,11),(2,12),(2,13),
+(3,0),(3,1),(3,2),(3,3),(3,4),(3,5),(3,6),
+(3,7),(3,8),(3,9),(3,10),(3,11),(3,12),(3,13);
+insert into t1 (a,b) select a, max(b)+1 from t1 where a = 0 group by a;
+ANALYZE TABLE t1;
+Table Op Msg_type Msg_text
+test.t1 analyze status OK
+select * from t1;
+a b
+0 0
+0 1
+0 2
+0 3
+0 4
+0 5
+0 6
+0 7
+0 8
+0 9
+0 10
+0 11
+0 12
+0 13
+0 14
+1 0
+1 1
+1 2
+1 3
+1 4
+1 5
+1 6
+1 7
+1 8
+1 9
+1 10
+1 11
+1 12
+1 13
+2 0
+2 1
+2 2
+2 3
+2 4
+2 5
+2 6
+2 7
+2 8
+2 9
+2 10
+2 11
+2 12
+2 13
+3 0
+3 1
+3 2
+3 3
+3 4
+3 5
+3 6
+3 7
+3 8
+3 9
+3 10
+3 11
+3 12
+3 13
+explain extended select sql_buffer_result a, max(b)+1 from t1 where a = 0 group by a;
+id select_type table type possible_keys key key_len ref rows filtered Extra
+1 SIMPLE t1 ref PRIMARY,index PRIMARY 4 const 15 100.00 Using index; Using temporary
+Warnings:
+Note 1003 select sql_buffer_result `test`.`t1`.`a` AS `a`,max(`test`.`t1`.`b`) + 1 AS `max(b)+1` from `test`.`t1` where `test`.`t1`.`a` = 0 group by `test`.`t1`.`a`
+drop table t1;
+CREATE TABLE t1 (a int, b int, c int, d int,
+KEY foo (c,d,a,b), KEY bar (c,a,b,d)) engine=RocksDB;
+INSERT INTO t1 VALUES (1, 1, 1, 1), (1, 1, 1, 2), (1, 1, 1, 3), (1, 1, 1, 4);
+INSERT INTO t1 SELECT * FROM t1;
+INSERT INTO t1 SELECT * FROM t1;
+INSERT INTO t1 SELECT a,b,c+1,d FROM t1;
+ANALYZE TABLE t1;
+Table Op Msg_type Msg_text
+test.t1 analyze status OK
+EXPLAIN SELECT DISTINCT c FROM t1 WHERE d=4;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range NULL foo 10 NULL 63 Using where; Using index for group-by
+SELECT DISTINCT c FROM t1 WHERE d=4;
+c
+1
+2
+DROP TABLE t1;
+#
+# Bug #45386: Wrong query result with MIN function in field list,
+# WHERE and GROUP BY clause
+#
+CREATE TABLE t (a INT, b INT, INDEX (a,b)) engine=RocksDB;
+INSERT INTO t VALUES (2,0), (2,0), (2,1), (2,1);
+INSERT INTO t SELECT * FROM t;
+INSERT INTO t SELECT * FROM t;
+ANALYZE TABLE t;
+Table Op Msg_type Msg_text
+test.t analyze status OK
+# test MIN
+#should use range with index for group by
+EXPLAIN
+SELECT a, MIN(b) FROM t WHERE b <> 0 GROUP BY a;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t range NULL a 10 NULL 251 Using where; Using index for group-by
+#should return 1 row
+SELECT a, MIN(b) FROM t WHERE b <> 0 GROUP BY a;
+a MIN(b)
+2 1
+# test MAX
+#should use range with index for group by
+EXPLAIN
+SELECT a, MAX(b) FROM t WHERE b <> 1 GROUP BY a;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t range NULL a 10 NULL 251 Using where; Using index for group-by
+#should return 1 row
+SELECT a, MAX(b) FROM t WHERE b <> 1 GROUP BY a;
+a MAX(b)
+2 0
+# test 3 ranges and use the middle one
+INSERT INTO t SELECT a, 2 FROM t;
+#should use range with index for group by
+EXPLAIN
+SELECT a, MAX(b) FROM t WHERE b > 0 AND b < 2 GROUP BY a;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t range NULL a 10 NULL 251 Using where; Using index for group-by
+#should return 1 row
+SELECT a, MAX(b) FROM t WHERE b > 0 AND b < 2 GROUP BY a;
+a MAX(b)
+2 1
+DROP TABLE t;
+#
+# Bug #48472: Loose index scan inappropriately chosen for some WHERE
+# conditions
+#
+CREATE TABLE t (a INT, b INT, INDEX (a,b)) engine=RocksDB;
+INSERT INTO t VALUES (2,0), (2,0), (2,1), (2,1);
+INSERT INTO t SELECT * FROM t;
+ANALYZE TABLE t;
+Table Op Msg_type Msg_text
+test.t analyze status OK
+SELECT a, MAX(b) FROM t WHERE 0=b+0 GROUP BY a;
+a MAX(b)
+2 0
+DROP TABLE t;
+End of 5.0 tests
+#
+# Bug #46607: Assertion failed: (cond_type == Item::FUNC_ITEM) results in
+# server crash
+#
+CREATE TABLE t (a INT, b INT, INDEX (a,b)) engine=RocksDB;
+INSERT INTO t VALUES (2,0), (2,0), (2,1), (2,1);
+INSERT INTO t SELECT * FROM t;
+SELECT a, MAX(b) FROM t WHERE b GROUP BY a;
+a MAX(b)
+2 1
+DROP TABLE t;
+CREATE TABLE t1(a INT NOT NULL, b INT NOT NULL, KEY (b)) engine=RocksDB;
+INSERT INTO t1 VALUES(1,1),(2,1);
+ANALYZE TABLE t1;
+Table Op Msg_type Msg_text
+test.t1 analyze status OK
+SELECT 1 AS c, b FROM t1 WHERE b IN (1,2) GROUP BY c, b;
+c b
+1 1
+SELECT a FROM t1 WHERE b=1;
+a
+1
+2
+DROP TABLE t1;
+#
+# Bug#47762: Incorrect result from MIN() when WHERE tests NOT NULL column
+# for NULL
+#
+## Test for NULLs allowed
+CREATE TABLE t1 ( a INT, KEY (a) ) engine=RocksDB;
+INSERT INTO t1 VALUES (1), (2), (3);
+ANALYZE TABLE t1;
+Table Op Msg_type Msg_text
+test.t1 analyze status OK
+EXPLAIN
+SELECT MIN( a ) FROM t1 WHERE a = NULL;
+id select_type table type possible_keys key key_len ref rows Extra
+x x x x x x x x x Impossible WHERE noticed after reading const tables
+SELECT MIN( a ) FROM t1 WHERE a = NULL;
+MIN( a )
+NULL
+EXPLAIN
+SELECT MIN( a ) FROM t1 WHERE a <> NULL;
+id select_type table type possible_keys key key_len ref rows Extra
+x x x x x x x x x Impossible WHERE noticed after reading const tables
+SELECT MIN( a ) FROM t1 WHERE a <> NULL;
+MIN( a )
+NULL
+EXPLAIN
+SELECT MIN( a ) FROM t1 WHERE a > NULL;
+id select_type table type possible_keys key key_len ref rows Extra
+x x x x x x x x x Impossible WHERE noticed after reading const tables
+SELECT MIN( a ) FROM t1 WHERE a > NULL;
+MIN( a )
+NULL
+EXPLAIN
+SELECT MIN( a ) FROM t1 WHERE a < NULL;
+id select_type table type possible_keys key key_len ref rows Extra
+x x x x x x x x x Impossible WHERE noticed after reading const tables
+SELECT MIN( a ) FROM t1 WHERE a < NULL;
+MIN( a )
+NULL
+EXPLAIN
+SELECT MIN( a ) FROM t1 WHERE a <=> NULL;
+id select_type table type possible_keys key key_len ref rows Extra
+x x x x x x x x x No matching min/max row
+SELECT MIN( a ) FROM t1 WHERE a <=> NULL;
+MIN( a )
+NULL
+EXPLAIN
+SELECT MIN( a ) FROM t1 WHERE a BETWEEN NULL AND 10;
+id select_type table type possible_keys key key_len ref rows Extra
+x x x x x x x x x Impossible WHERE noticed after reading const tables
+SELECT MIN( a ) FROM t1 WHERE a BETWEEN NULL AND 10;
+MIN( a )
+NULL
+EXPLAIN
+SELECT MIN( a ) FROM t1 WHERE a BETWEEN NULL AND NULL;
+id select_type table type possible_keys key key_len ref rows Extra
+x x x x x x x x x Impossible WHERE noticed after reading const tables
+SELECT MIN( a ) FROM t1 WHERE a BETWEEN NULL AND NULL;
+MIN( a )
+NULL
+EXPLAIN
+SELECT MIN( a ) FROM t1 WHERE a BETWEEN 10 AND NULL;
+id select_type table type possible_keys key key_len ref rows Extra
+x x x x x x x x x Impossible WHERE noticed after reading const tables
+SELECT MIN( a ) FROM t1 WHERE a BETWEEN 10 AND NULL;
+MIN( a )
+NULL
+EXPLAIN
+SELECT MIN( a ) FROM t1 WHERE a = (SELECT a FROM t1 WHERE a < 0);
+id select_type table type possible_keys key key_len ref rows Extra
+x x x x x x x x x Using where; Using index
+x x x x x x x x x Using where; Using index
+SELECT MIN( a ) FROM t1 WHERE a = (SELECT a FROM t1 WHERE a < 0);
+MIN( a )
+NULL
+EXPLAIN
+SELECT MIN( a ) FROM t1 WHERE a IS NULL;
+id select_type table type possible_keys key key_len ref rows Extra
+x x x x x x x x x No matching min/max row
+SELECT MIN( a ) FROM t1 WHERE a IS NULL;
+MIN( a )
+NULL
+INSERT INTO t1 VALUES (NULL), (NULL);
+ANALYZE TABLE t1;
+Table Op Msg_type Msg_text
+test.t1 analyze status OK
+EXPLAIN
+SELECT MIN( a ) FROM t1 WHERE a = NULL;
+id select_type table type possible_keys key key_len ref rows Extra
+x x x x x x x x x Impossible WHERE noticed after reading const tables
+SELECT MIN( a ) FROM t1 WHERE a = NULL;
+MIN( a )
+NULL
+EXPLAIN
+SELECT MIN( a ) FROM t1 WHERE a <> NULL;
+id select_type table type possible_keys key key_len ref rows Extra
+x x x x x x x x x Impossible WHERE noticed after reading const tables
+SELECT MIN( a ) FROM t1 WHERE a <> NULL;
+MIN( a )
+NULL
+EXPLAIN
+SELECT MIN( a ) FROM t1 WHERE a > NULL;
+id select_type table type possible_keys key key_len ref rows Extra
+x x x x x x x x x Impossible WHERE noticed after reading const tables
+SELECT MIN( a ) FROM t1 WHERE a > NULL;
+MIN( a )
+NULL
+EXPLAIN
+SELECT MIN( a ) FROM t1 WHERE a < NULL;
+id select_type table type possible_keys key key_len ref rows Extra
+x x x x x x x x x Impossible WHERE noticed after reading const tables
+SELECT MIN( a ) FROM t1 WHERE a < NULL;
+MIN( a )
+NULL
+EXPLAIN
+SELECT MIN( a ) FROM t1 WHERE a <=> NULL;
+id select_type table type possible_keys key key_len ref rows Extra
+x x x x x x x x x Select tables optimized away
+SELECT MIN( a ) FROM t1 WHERE a <=> NULL;
+MIN( a )
+NULL
+EXPLAIN
+SELECT MIN( a ) FROM t1 WHERE a BETWEEN NULL AND 10;
+id select_type table type possible_keys key key_len ref rows Extra
+x x x x x x x x x Impossible WHERE noticed after reading const tables
+SELECT MIN( a ) FROM t1 WHERE a BETWEEN NULL AND 10;
+MIN( a )
+NULL
+EXPLAIN
+SELECT MIN( a ) FROM t1 WHERE a BETWEEN NULL AND NULL;
+id select_type table type possible_keys key key_len ref rows Extra
+x x x x x x x x x Impossible WHERE noticed after reading const tables
+SELECT MIN( a ) FROM t1 WHERE a BETWEEN NULL AND NULL;
+MIN( a )
+NULL
+EXPLAIN
+SELECT MIN( a ) FROM t1 WHERE a BETWEEN 10 AND NULL;
+id select_type table type possible_keys key key_len ref rows Extra
+x x x x x x x x x Impossible WHERE noticed after reading const tables
+SELECT MIN( a ) FROM t1 WHERE a BETWEEN 10 AND NULL;
+MIN( a )
+NULL
+EXPLAIN
+SELECT MIN( a ) FROM t1 WHERE a = (SELECT a FROM t1 WHERE a < 0);
+id select_type table type possible_keys key key_len ref rows Extra
+x x x x x x x x x Using where; Using index
+x x x x x x x x x Using where; Using index
+SELECT MIN( a ) FROM t1 WHERE a = (SELECT a FROM t1 WHERE a < 0);
+MIN( a )
+NULL
+EXPLAIN
+SELECT MIN( a ) FROM t1 WHERE a IS NULL;
+id select_type table type possible_keys key key_len ref rows Extra
+x x x x x x x x x Select tables optimized away
+SELECT MIN( a ) FROM t1 WHERE a IS NULL;
+MIN( a )
+NULL
+DROP TABLE t1;
+## Test for NOT NULLs
+CREATE TABLE t1 ( a INT NOT NULL PRIMARY KEY) engine=RocksDB;
+INSERT INTO t1 VALUES (1), (2), (3);
+ANALYZE TABLE t1;
+Table Op Msg_type Msg_text
+test.t1 analyze status OK
+#
+# NULL-safe operator test disabled for non-NULL indexed columns.
+#
+# See bugs
+#
+# - Bug#52173: Reading NULL value from non-NULL index gives
+# wrong result in embedded server
+#
+# - Bug#52174: Sometimes wrong plan when reading a MAX value from
+# non-NULL index
+#
+EXPLAIN
+SELECT MIN( a ) FROM t1 WHERE a = NULL;
+id select_type table type possible_keys key key_len ref rows Extra
+x x x x x x x x x Impossible WHERE noticed after reading const tables
+SELECT MIN( a ) FROM t1 WHERE a = NULL;
+MIN( a )
+NULL
+EXPLAIN
+SELECT MIN( a ) FROM t1 WHERE a <> NULL;
+id select_type table type possible_keys key key_len ref rows Extra
+x x x x x x x x x Impossible WHERE noticed after reading const tables
+SELECT MIN( a ) FROM t1 WHERE a <> NULL;
+MIN( a )
+NULL
+EXPLAIN
+SELECT MIN( a ) FROM t1 WHERE a > NULL;
+id select_type table type possible_keys key key_len ref rows Extra
+x x x x x x x x x Impossible WHERE noticed after reading const tables
+SELECT MIN( a ) FROM t1 WHERE a > NULL;
+MIN( a )
+NULL
+EXPLAIN
+SELECT MIN( a ) FROM t1 WHERE a < NULL;
+id select_type table type possible_keys key key_len ref rows Extra
+x x x x x x x x x Impossible WHERE noticed after reading const tables
+SELECT MIN( a ) FROM t1 WHERE a < NULL;
+MIN( a )
+NULL
+EXPLAIN
+SELECT MIN( a ) FROM t1 WHERE a BETWEEN NULL AND 10;
+id select_type table type possible_keys key key_len ref rows Extra
+x x x x x x x x x Impossible WHERE noticed after reading const tables
+SELECT MIN( a ) FROM t1 WHERE a BETWEEN NULL AND 10;
+MIN( a )
+NULL
+EXPLAIN
+SELECT MIN( a ) FROM t1 WHERE a BETWEEN NULL AND NULL;
+id select_type table type possible_keys key key_len ref rows Extra
+x x x x x x x x x Impossible WHERE noticed after reading const tables
+SELECT MIN( a ) FROM t1 WHERE a BETWEEN NULL AND NULL;
+MIN( a )
+NULL
+EXPLAIN
+SELECT MIN( a ) FROM t1 WHERE a BETWEEN 10 AND NULL;
+id select_type table type possible_keys key key_len ref rows Extra
+x x x x x x x x x Impossible WHERE noticed after reading const tables
+SELECT MIN( a ) FROM t1 WHERE a BETWEEN 10 AND NULL;
+MIN( a )
+NULL
+EXPLAIN
+SELECT MIN( a ) FROM t1 WHERE a = (SELECT a FROM t1 WHERE a < 0);
+id select_type table type possible_keys key key_len ref rows Extra
+x x x x x x x x x Using where; Using index
+x x x x x x x x x Using where; Using index
+SELECT MIN( a ) FROM t1 WHERE a = (SELECT a FROM t1 WHERE a < 0);
+MIN( a )
+NULL
+EXPLAIN
+SELECT MIN( a ) FROM t1 WHERE a IS NULL;
+id select_type table type possible_keys key key_len ref rows Extra
+x x x x x x x x x Impossible WHERE
+SELECT MIN( a ) FROM t1 WHERE a IS NULL;
+MIN( a )
+NULL
+DROP TABLE t1;
+#
+# Bug#53859: Valgrind: opt_sum_query(TABLE_LIST*, List<Item>&, Item*) at
+# opt_sum.cc:305
+#
+CREATE TABLE t1 ( a INT, KEY (a) ) engine=RocksDB;
+INSERT INTO t1 VALUES (1), (2), (3);
+SELECT MIN( a ) AS min_a
+FROM t1
+WHERE a > 1 AND a IS NULL
+ORDER BY min_a;
+min_a
+NULL
+DROP TABLE t1;
+End of 5.1 tests
+#
+# WL#3220 (Loose index scan for COUNT DISTINCT)
+#
+CREATE TABLE t1 (a INT, b INT, c INT, KEY (a,b)) engine=RocksDB;
+INSERT INTO t1 VALUES (1,1,1), (1,2,1), (1,3,1), (1,4,1);
+INSERT INTO t1 SELECT a, b + 4, 1 FROM t1;
+INSERT INTO t1 SELECT a + 1, b, 1 FROM t1;
+ANALYZE TABLE t1;
+Table Op Msg_type Msg_text
+test.t1 analyze status OK
+CREATE TABLE t2 (a INT, b INT, c INT, d INT, e INT, f INT, KEY (a,b,c)) engine=RocksDB;
+INSERT INTO t2 VALUES (1,1,1,1,1,1), (1,2,1,1,1,1), (1,3,1,1,1,1),
+(1,4,1,1,1,1);
+INSERT INTO t2 SELECT a, b + 4, c,d,e,f FROM t2;
+INSERT INTO t2 SELECT a + 1, b, c,d,e,f FROM t2;
+ANALYZE TABLE t2;
+Table Op Msg_type Msg_text
+test.t2 analyze status OK
+EXPLAIN SELECT COUNT(DISTINCT a) FROM t1;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range NULL a 5 NULL 251 Using index for group-by
+SELECT COUNT(DISTINCT a) FROM t1;
+COUNT(DISTINCT a)
+2
+EXPLAIN SELECT COUNT(DISTINCT a,b) FROM t1;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range NULL a 10 NULL 501 Using index for group-by
+SELECT COUNT(DISTINCT a,b) FROM t1;
+COUNT(DISTINCT a,b)
+16
+EXPLAIN SELECT COUNT(DISTINCT b,a) FROM t1;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range NULL a 10 NULL 501 Using index for group-by
+SELECT COUNT(DISTINCT b,a) FROM t1;
+COUNT(DISTINCT b,a)
+16
+EXPLAIN SELECT COUNT(DISTINCT b) FROM t1;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 index NULL a 10 NULL 1000 Using index
+SELECT COUNT(DISTINCT b) FROM t1;
+COUNT(DISTINCT b)
+8
+EXPLAIN SELECT COUNT(DISTINCT a) FROM t1 GROUP BY a;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range NULL a 5 NULL 251 Using index for group-by
+SELECT COUNT(DISTINCT a) FROM t1 GROUP BY a;
+COUNT(DISTINCT a)
+1
+1
+EXPLAIN SELECT COUNT(DISTINCT b) FROM t1 GROUP BY a;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range NULL a 10 NULL 501 Using index for group-by
+SELECT COUNT(DISTINCT b) FROM t1 GROUP BY a;
+COUNT(DISTINCT b)
+8
+8
+EXPLAIN SELECT COUNT(DISTINCT a) FROM t1 GROUP BY b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 index NULL a 10 NULL 1000 Using index; Using filesort
+SELECT COUNT(DISTINCT a) FROM t1 GROUP BY b;
+COUNT(DISTINCT a)
+2
+2
+2
+2
+2
+2
+2
+2
+EXPLAIN SELECT DISTINCT COUNT(DISTINCT a) FROM t1;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 index NULL a 10 NULL 1000 Using index
+SELECT DISTINCT COUNT(DISTINCT a) FROM t1;
+COUNT(DISTINCT a)
+2
+EXPLAIN SELECT COUNT(DISTINCT a, b + 0) FROM t1;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 index NULL a 10 NULL 1000 Using index
+SELECT COUNT(DISTINCT a, b + 0) FROM t1;
+COUNT(DISTINCT a, b + 0)
+16
+EXPLAIN SELECT COUNT(DISTINCT a) FROM t1 HAVING COUNT(DISTINCT b) < 10;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 index NULL a 10 NULL 1000 Using index
+SELECT COUNT(DISTINCT a) FROM t1 HAVING COUNT(DISTINCT b) < 10;
+COUNT(DISTINCT a)
+2
+EXPLAIN SELECT COUNT(DISTINCT a) FROM t1 HAVING COUNT(DISTINCT c) < 10;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 ALL NULL NULL NULL NULL 1000
+SELECT COUNT(DISTINCT a) FROM t1 HAVING COUNT(DISTINCT c) < 10;
+COUNT(DISTINCT a)
+2
+EXPLAIN SELECT 1 FROM t1 HAVING COUNT(DISTINCT a) < 10;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range NULL a 5 NULL 251 Using index for group-by
+SELECT 1 FROM t1 HAVING COUNT(DISTINCT a) < 10;
+1
+1
+EXPLAIN SELECT 1 FROM t1 GROUP BY a HAVING COUNT(DISTINCT b) > 1;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range NULL a 10 NULL 501 Using index for group-by
+SELECT 1 FROM t1 GROUP BY a HAVING COUNT(DISTINCT b) > 1;
+1
+1
+1
+EXPLAIN SELECT COUNT(DISTINCT t1_1.a) FROM t1 t1_1, t1 t1_2 GROUP BY t1_1.a;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1_1 index NULL a 10 NULL 1000 Using index; Using temporary; Using filesort
+1 SIMPLE t1_2 index NULL a 10 NULL 1000 Using index; Using join buffer (flat, BNL join)
+SELECT COUNT(DISTINCT t1_1.a) FROM t1 t1_1, t1 t1_2 GROUP BY t1_1.a;
+COUNT(DISTINCT t1_1.a)
+1
+1
+EXPLAIN SELECT COUNT(DISTINCT a), 12 FROM t1;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range NULL a 5 NULL 251 Using index for group-by
+SELECT COUNT(DISTINCT a), 12 FROM t1;
+COUNT(DISTINCT a) 12
+2 12
+EXPLAIN SELECT COUNT(DISTINCT a, b, c) FROM t2;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 range NULL a 15 NULL 501 Using index for group-by
+SELECT COUNT(DISTINCT a, b, c) FROM t2;
+COUNT(DISTINCT a, b, c)
+16
+EXPLAIN SELECT COUNT(DISTINCT a), SUM(DISTINCT a), AVG(DISTINCT a) FROM t2;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 range NULL a 5 NULL 126 Using index for group-by
+SELECT COUNT(DISTINCT a), SUM(DISTINCT a), AVG(DISTINCT a) FROM t2;
+COUNT(DISTINCT a) SUM(DISTINCT a) AVG(DISTINCT a)
+2 3 1.5000
+EXPLAIN SELECT COUNT(DISTINCT a), SUM(DISTINCT a), AVG(DISTINCT f) FROM t2;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 ALL NULL NULL NULL NULL 1000
+SELECT COUNT(DISTINCT a), SUM(DISTINCT a), AVG(DISTINCT f) FROM t2;
+COUNT(DISTINCT a) SUM(DISTINCT a) AVG(DISTINCT f)
+2 3 1.0000
+EXPLAIN SELECT COUNT(DISTINCT a, b), COUNT(DISTINCT b, a) FROM t2;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 range NULL a 10 NULL 251 Using index for group-by
+SELECT COUNT(DISTINCT a, b), COUNT(DISTINCT b, a) FROM t2;
+COUNT(DISTINCT a, b) COUNT(DISTINCT b, a)
+16 16
+EXPLAIN SELECT COUNT(DISTINCT a, b), COUNT(DISTINCT b, f) FROM t2;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 ALL NULL NULL NULL NULL 1000
+SELECT COUNT(DISTINCT a, b), COUNT(DISTINCT b, f) FROM t2;
+COUNT(DISTINCT a, b) COUNT(DISTINCT b, f)
+16 8
+EXPLAIN SELECT COUNT(DISTINCT a, b), COUNT(DISTINCT b, d) FROM t2;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 ALL NULL NULL NULL NULL 1000
+SELECT COUNT(DISTINCT a, b), COUNT(DISTINCT b, d) FROM t2;
+COUNT(DISTINCT a, b) COUNT(DISTINCT b, d)
+16 8
+EXPLAIN SELECT a, c, COUNT(DISTINCT c, a, b) FROM t2 GROUP BY a, b, c;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 range NULL a 15 NULL 501 Using index for group-by
+SELECT a, c, COUNT(DISTINCT c, a, b) FROM t2 GROUP BY a, b, c;
+a c COUNT(DISTINCT c, a, b)
+1 1 1
+1 1 1
+1 1 1
+1 1 1
+1 1 1
+1 1 1
+1 1 1
+1 1 1
+2 1 1
+2 1 1
+2 1 1
+2 1 1
+2 1 1
+2 1 1
+2 1 1
+2 1 1
+EXPLAIN SELECT COUNT(DISTINCT c, a, b) FROM t2
+WHERE a > 5 AND b BETWEEN 10 AND 20 GROUP BY a, b, c;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 range a a 15 NULL 501 Using where; Using index for group-by
+SELECT COUNT(DISTINCT c, a, b) FROM t2
+WHERE a > 5 AND b BETWEEN 10 AND 20 GROUP BY a, b, c;
+COUNT(DISTINCT c, a, b)
+EXPLAIN SELECT COUNT(DISTINCT b), SUM(DISTINCT b) FROM t2 WHERE a = 5
+GROUP BY b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 ref a a 5 const 1000 Using where; Using index
+SELECT COUNT(DISTINCT b), SUM(DISTINCT b) FROM t2 WHERE a = 5
+GROUP BY b;
+COUNT(DISTINCT b) SUM(DISTINCT b)
+EXPLAIN SELECT a, COUNT(DISTINCT b), SUM(DISTINCT b) FROM t2 GROUP BY a;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 range NULL a 10 NULL 251 Using index for group-by
+SELECT a, COUNT(DISTINCT b), SUM(DISTINCT b) FROM t2 GROUP BY a;
+a COUNT(DISTINCT b) SUM(DISTINCT b)
+1 8 36
+2 8 36
+EXPLAIN SELECT COUNT(DISTINCT b), SUM(DISTINCT b) FROM t2 GROUP BY a;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 range NULL a 10 NULL 251 Using index for group-by
+SELECT COUNT(DISTINCT b), SUM(DISTINCT b) FROM t2 GROUP BY a;
+COUNT(DISTINCT b) SUM(DISTINCT b)
+8 36
+8 36
+EXPLAIN SELECT COUNT(DISTINCT a, b) FROM t2 WHERE c = 13 AND d = 42;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 ALL NULL NULL NULL NULL 1000 Using where
+SELECT COUNT(DISTINCT a, b) FROM t2 WHERE c = 13 AND d = 42;
+COUNT(DISTINCT a, b)
+0
+EXPLAIN SELECT a, COUNT(DISTINCT a), SUM(DISTINCT a) FROM t2
+WHERE b = 13 AND c = 42 GROUP BY a;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 range NULL a 15 NULL 126 Using where; Using index for group-by
+SELECT a, COUNT(DISTINCT a), SUM(DISTINCT a) FROM t2
+WHERE b = 13 AND c = 42 GROUP BY a;
+a COUNT(DISTINCT a) SUM(DISTINCT a)
+# This query could have been resolved using loose index scan since
+# the second part of count(..) is defined by a constant predicate
+EXPLAIN SELECT COUNT(DISTINCT a, b), SUM(DISTINCT a) FROM t2 WHERE b = 42;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 index NULL a 15 NULL 1000 Using where; Using index
+SELECT COUNT(DISTINCT a, b), SUM(DISTINCT a) FROM t2 WHERE b = 42;
+COUNT(DISTINCT a, b) SUM(DISTINCT a)
+0 NULL
+EXPLAIN SELECT SUM(DISTINCT a), MAX(b) FROM t2 GROUP BY a;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 index NULL a 15 NULL 1000 Using index
+SELECT SUM(DISTINCT a), MAX(b) FROM t2 GROUP BY a;
+SUM(DISTINCT a) MAX(b)
+1 8
+2 8
+EXPLAIN SELECT 42 * (a + c + COUNT(DISTINCT c, a, b)) FROM t2 GROUP BY a, b, c;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 range NULL a 15 NULL 501 Using index for group-by
+SELECT 42 * (a + c + COUNT(DISTINCT c, a, b)) FROM t2 GROUP BY a, b, c;
+42 * (a + c + COUNT(DISTINCT c, a, b))
+126
+126
+126
+126
+126
+126
+126
+126
+168
+168
+168
+168
+168
+168
+168
+168
+EXPLAIN SELECT (SUM(DISTINCT a) + MAX(b)) FROM t2 GROUP BY a;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 index NULL a 15 NULL 1000 Using index
+SELECT (SUM(DISTINCT a) + MAX(b)) FROM t2 GROUP BY a;
+(SUM(DISTINCT a) + MAX(b))
+9
+10
+DROP TABLE t1,t2;
+# end of WL#3220 tests
+#
+# Bug#50539: Wrong result when loose index scan is used for an aggregate
+# function with distinct
+#
+CREATE TABLE t1 (
+f1 int(11) NOT NULL DEFAULT '0',
+f2 char(1) NOT NULL DEFAULT '',
+PRIMARY KEY (f1,f2)
+) engine=RocksDB;
+insert into t1 values(1,'A'),(1 , 'B'), (1, 'C'), (2, 'A'),
+(3, 'A'), (3, 'B'), (3, 'C'), (3, 'D');
+ANALYZE TABLE t1;
+Table Op Msg_type Msg_text
+test.t1 analyze status OK
+SELECT f1, COUNT(DISTINCT f2) FROM t1 GROUP BY f1;
+f1 COUNT(DISTINCT f2)
+1 3
+2 1
+3 4
+explain SELECT f1, COUNT(DISTINCT f2) FROM t1 GROUP BY f1;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 index NULL PRIMARY 5 NULL 1000 Using index
+drop table t1;
+# End of test#50539.
+#
+# Bug#17217128 - BAD INTERACTION BETWEEN MIN/MAX AND
+# "HAVING SUM(DISTINCT)": WRONG RESULTS.
+#
+CREATE TABLE t (a INT, b INT, KEY(a,b)) engine=RocksDB;
+INSERT INTO t VALUES (1,1), (2,2), (3,3), (4,4), (1,0), (3,2), (4,5);
+ANALYZE TABLE t;
+Table Op Msg_type Msg_text
+test.t analyze status OK
+ANALYZE TABLE t;
+Table Op Msg_type Msg_text
+test.t analyze status OK
+SELECT a, SUM(DISTINCT a), MIN(b) FROM t GROUP BY a;
+a SUM(DISTINCT a) MIN(b)
+1 1 0
+2 2 2
+3 3 2
+4 4 4
+EXPLAIN SELECT a, SUM(DISTINCT a), MIN(b) FROM t GROUP BY a;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t index NULL a 10 NULL 1000 Using index
+SELECT a, SUM(DISTINCT a), MAX(b) FROM t GROUP BY a;
+a SUM(DISTINCT a) MAX(b)
+1 1 1
+2 2 2
+3 3 3
+4 4 5
+EXPLAIN SELECT a, SUM(DISTINCT a), MAX(b) FROM t GROUP BY a;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t index NULL a 10 NULL 1000 Using index
+SELECT a, MAX(b) FROM t GROUP BY a HAVING SUM(DISTINCT a);
+a MAX(b)
+1 1
+2 2
+3 3
+4 5
+EXPLAIN SELECT a, MAX(b) FROM t GROUP BY a HAVING SUM(DISTINCT a);
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t index NULL a 10 NULL 1000 Using index
+SELECT SUM(DISTINCT a), MIN(b), MAX(b) FROM t;
+SUM(DISTINCT a) MIN(b) MAX(b)
+10 0 5
+EXPLAIN SELECT SUM(DISTINCT a), MIN(b), MAX(b) FROM t;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t index NULL a 10 NULL 1000 Using index
+SELECT a, SUM(DISTINCT a), MIN(b), MAX(b) FROM t GROUP BY a;
+a SUM(DISTINCT a) MIN(b) MAX(b)
+1 1 0 1
+2 2 2 2
+3 3 2 3
+4 4 4 5
+EXPLAIN SELECT a, SUM(DISTINCT a), MIN(b), MAX(b) FROM t GROUP BY a;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t index NULL a 10 NULL 1000 Using index
+DROP TABLE t;
+#
+# Bug#18109609: LOOSE INDEX SCAN IS NOT USED WHEN IT SHOULD
+#
+CREATE TABLE t1 (
+id INT AUTO_INCREMENT PRIMARY KEY,
+c1 INT,
+c2 INT,
+KEY(c1,c2)) engine=RocksDB;
+INSERT INTO t1(c1,c2) VALUES
+(1, 1), (1,2), (2,1), (2,2), (3,1), (3,2), (3,3), (4,1), (4,2), (4,3),
+(4,4), (4,5), (4,6), (4,7), (4,8), (4,9), (4,10), (4,11), (4,12), (4,13),
+(4,14), (4,15), (4,16), (4,17), (4,18), (4,19), (4,20),(5,5);
+ANALYZE TABLE t1;
+Table Op Msg_type Msg_text
+test.t1 analyze status OK
+EXPLAIN SELECT MAX(c2), c1 FROM t1 WHERE c1 = 4 GROUP BY c1;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 ref c1 c1 5 const 1000 Using index
+FLUSH STATUS;
+SELECT MAX(c2), c1 FROM t1 WHERE c1 = 4 GROUP BY c1;
+MAX(c2) c1
+20 4
+SHOW SESSION STATUS LIKE 'Handler_read%';
+Variable_name Value
+Handler_read_first 0
+Handler_read_key 1
+Handler_read_last 0
+Handler_read_next 20
+Handler_read_prev 0
+Handler_read_retry 0
+Handler_read_rnd 0
+Handler_read_rnd_deleted 0
+Handler_read_rnd_next 0
+DROP TABLE t1;
+# End of test for Bug#18109609
+set global debug_dbug=@debug_tmp;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/ha_extra_keyread.result b/storage/rocksdb/mysql-test/rocksdb/r/ha_extra_keyread.result
new file mode 100644
index 00000000000..93c8a464577
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/ha_extra_keyread.result
@@ -0,0 +1,10 @@
+CREATE TABLE t1 (a INT, b CHAR(8), KEY ab(a, b)) ENGINE=rocksdb DEFAULT CHARSET utf8mb4 COLLATE utf8mb4_bin;
+INSERT INTO t1 (a,b) VALUES (76,'bar');
+INSERT INTO t1 (a,b) VALUES (35,'foo');
+INSERT INTO t1 (a,b) VALUES (77,'baz');
+SET debug_dbug="+d,dbug.rocksdb.HA_EXTRA_KEYREAD";
+SELECT b FROM t1 FORCE INDEX(ab) WHERE a=35;
+b
+foo
+SET debug_dbug="-d,dbug.rocksdb.HA_EXTRA_KEYREAD";
+DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/handler_basic.result b/storage/rocksdb/mysql-test/rocksdb/r/handler_basic.result
new file mode 100644
index 00000000000..efc6ccde500
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/handler_basic.result
@@ -0,0 +1,127 @@
+DROP TABLE IF EXISTS t1;
+FLUSH STATUS;
+CREATE TABLE t1 (id INT PRIMARY KEY, a VARCHAR(100), b INT,
+INDEX b(b)) ENGINE=rocksdb;
+INSERT INTO t1 (id,a,b) VALUES (1,'foobar',100),(2,'z',0),(3,'bar',50);
+SHOW SESSION STATUS LIKE 'Handler_write%';
+Variable_name Value
+Handler_write 3
+UPDATE t1 SET b=1000 WHERE id=1;
+SHOW SESSION STATUS LIKE 'Handler_update%';
+Variable_name Value
+Handler_update 1
+DELETE FROM t1 WHERE id=2;
+SHOW SESSION STATUS LIKE 'Handler_delete%';
+Variable_name Value
+Handler_delete 1
+INSERT INTO t1 (id,b) VALUES(4,4),(5,5),(6,6),(7,7),(8,8),(9,9),(10,10);
+SHOW SESSION STATUS LIKE 'Handler_write%';
+Variable_name Value
+Handler_write 10
+FLUSH STATUS;
+SELECT * FROM t1 WHERE id=8;
+id a b
+8 NULL 8
+SHOW SESSION STATUS LIKE 'Handler_read%';
+Variable_name Value
+Handler_read_first 0
+Handler_read_key 1
+Handler_read_last 0
+Handler_read_next 0
+Handler_read_prev 0
+Handler_read_retry 0
+Handler_read_rnd 0
+Handler_read_rnd_deleted 0
+Handler_read_rnd_next 0
+FLUSH STATUS;
+SELECT * FROM t1 WHERE b=6;
+id a b
+6 NULL 6
+SHOW SESSION STATUS LIKE 'Handler_read%';
+Variable_name Value
+Handler_read_first 0
+Handler_read_key 1
+Handler_read_last 0
+Handler_read_next 1
+Handler_read_prev 0
+Handler_read_retry 0
+Handler_read_rnd 0
+Handler_read_rnd_deleted 0
+Handler_read_rnd_next 0
+FLUSH STATUS;
+SELECT * FROM t1;
+id a b
+1 foobar 1000
+10 NULL 10
+3 bar 50
+4 NULL 4
+5 NULL 5
+6 NULL 6
+7 NULL 7
+8 NULL 8
+9 NULL 9
+SHOW SESSION STATUS LIKE 'Handler_read%';
+Variable_name Value
+Handler_read_first 0
+Handler_read_key 0
+Handler_read_last 0
+Handler_read_next 0
+Handler_read_prev 0
+Handler_read_retry 0
+Handler_read_rnd 0
+Handler_read_rnd_deleted 0
+Handler_read_rnd_next 10
+FLUSH STATUS;
+SELECT * FROM t1 FORCE INDEX(b) WHERE b <=5 ORDER BY b;
+id a b
+4 NULL 4
+5 NULL 5
+SHOW SESSION STATUS LIKE 'Handler_read%';
+Variable_name Value
+Handler_read_first 0
+Handler_read_key 1
+Handler_read_last 0
+Handler_read_next 2
+Handler_read_prev 0
+Handler_read_retry 0
+Handler_read_rnd 0
+Handler_read_rnd_deleted 0
+Handler_read_rnd_next 0
+FLUSH STATUS;
+SELECT * FROM t1 WHERE id >=8 ORDER BY id;
+id a b
+8 NULL 8
+9 NULL 9
+10 NULL 10
+SHOW SESSION STATUS LIKE 'Handler_read%';
+Variable_name Value
+Handler_read_first 0
+Handler_read_key 1
+Handler_read_last 0
+Handler_read_next 3
+Handler_read_prev 0
+Handler_read_retry 0
+Handler_read_rnd 0
+Handler_read_rnd_deleted 0
+Handler_read_rnd_next 0
+FLUSH STATUS;
+SELECT * FROM t1 WHERE id < 8 ORDER BY id;
+id a b
+1 foobar 1000
+3 bar 50
+4 NULL 4
+5 NULL 5
+6 NULL 6
+7 NULL 7
+SHOW SESSION STATUS LIKE 'Handler_read%';
+Variable_name Value
+Handler_read_first 1
+Handler_read_key 0
+Handler_read_last 0
+Handler_read_next 6
+Handler_read_prev 0
+Handler_read_retry 0
+Handler_read_rnd 0
+Handler_read_rnd_deleted 0
+Handler_read_rnd_next 0
+DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/hermitage.result b/storage/rocksdb/mysql-test/rocksdb/r/hermitage.result
new file mode 100644
index 00000000000..8bf2416aa78
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/hermitage.result
@@ -0,0 +1,648 @@
+DROP TABLE IF EXISTS test;
+connect con1,localhost,root,,;
+SET SESSION TRANSACTION ISOLATION LEVEL READ COMMITTED;
+connect con2,localhost,root,,;
+SET SESSION TRANSACTION ISOLATION LEVEL READ COMMITTED;
+connect con3,localhost,root,,;
+SET SESSION TRANSACTION ISOLATION LEVEL READ COMMITTED;
+connection con1;
+create table test (id int primary key, value int) engine=rocksdb;
+connection con1;
+truncate table test;
+insert into test (id, value) values (1, 10), (2, 20);
+begin;
+connection con2;
+begin;
+connection con3;
+begin;
+connection con1;
+select * from test;
+id value
+1 10
+2 20
+update test set value = 101 where id = 1;
+connection con2;
+select * from test;
+id value
+1 10
+2 20
+connection con1;
+rollback;
+connection con2;
+select * from test;
+id value
+1 10
+2 20
+commit;
+connection con1;
+truncate table test;
+insert into test (id, value) values (1, 10), (2, 20);
+begin;
+connection con2;
+begin;
+connection con3;
+begin;
+connection con1;
+update test set value = 101 where id = 1;
+connection con2;
+select * from test;
+id value
+1 10
+2 20
+connection con1;
+update test set value = 11 where id = 1;
+commit;
+connection con2;
+select * from test;
+id value
+1 11
+2 20
+commit;
+connection con1;
+truncate table test;
+insert into test (id, value) values (1, 10), (2, 20);
+begin;
+connection con2;
+begin;
+connection con3;
+begin;
+connection con1;
+update test set value = 11 where id = 1;
+connection con2;
+update test set value = 22 where id = 2;
+connection con1;
+select * from test where id = 2;
+id value
+2 20
+connection con2;
+select * from test where id = 1;
+id value
+1 10
+connection con1;
+commit;
+connection con2;
+commit;
+connection con1;
+truncate table test;
+insert into test (id, value) values (1, 10), (2, 20);
+begin;
+connection con2;
+begin;
+connection con3;
+begin;
+connection con1;
+update test set value = 11 where id = 1;
+update test set value = 19 where id = 2;
+connection con2;
+update test set value = 12 where id = 1;
+connection con1;
+commit;
+connection con2;
+connection con3;
+select * from test;
+id value
+1 11
+2 19
+connection con2;
+update test set value = 18 where id = 2;
+connection con3;
+select * from test;
+id value
+1 11
+2 19
+connection con2;
+commit;
+connection con3;
+select * from test;
+id value
+1 12
+2 18
+commit;
+connection con1;
+truncate table test;
+insert into test (id, value) values (1, 10), (2, 20);
+begin;
+connection con2;
+begin;
+connection con3;
+begin;
+connection con1;
+select * from test where value = 30;
+id value
+connection con2;
+insert into test (id, value) values(3, 30);
+commit;
+connection con1;
+select * from test where value % 3 = 0;
+id value
+3 30
+commit;
+connection con1;
+truncate table test;
+insert into test (id, value) values (1, 10), (2, 20);
+begin;
+connection con2;
+begin;
+connection con3;
+begin;
+connection con1;
+update test set value = value + 10;
+connection con2;
+select variable_value into @a from information_schema.global_status where variable_name='rocksdb_snapshot_conflict_errors';
+select * from test;
+id value
+1 10
+2 20
+delete from test where value = 20;
+connection con1;
+commit;
+connection con2;
+select * from test;
+id value
+2 30
+commit;
+connection con1;
+truncate table test;
+insert into test (id, value) values (1, 10), (2, 20);
+begin;
+connection con2;
+begin;
+connection con3;
+begin;
+connection con1;
+select * from test where id = 1;
+id value
+1 10
+connection con2;
+select * from test where id = 1;
+id value
+1 10
+connection con1;
+update test set value = 11 where id = 1;
+connection con2;
+update test set value = 12 where id = 1;
+connection con1;
+commit;
+connection con2;
+select * from test;
+id value
+1 12
+2 20
+commit;
+connection con1;
+truncate table test;
+insert into test (id, value) values (1, 10), (2, 20);
+begin;
+connection con2;
+begin;
+connection con3;
+begin;
+connection con1;
+select * from test where id = 1;
+id value
+1 10
+connection con2;
+select * from test where id = 1;
+id value
+1 10
+select * from test where id = 2;
+id value
+2 20
+update test set value = 12 where id = 1;
+update test set value = 18 where id = 2;
+commit;
+connection con1;
+select * from test where id = 2;
+id value
+2 18
+commit;
+connection con1;
+truncate table test;
+insert into test (id, value) values (1, 10), (2, 20);
+begin;
+connection con2;
+begin;
+connection con3;
+begin;
+connection con1;
+select * from test where value % 5 = 0;
+id value
+1 10
+2 20
+connection con2;
+update test set value = 12 where value = 10;
+commit;
+connection con1;
+select * from test where value % 3 = 0;
+id value
+1 12
+commit;
+connection con1;
+truncate table test;
+insert into test (id, value) values (1, 10), (2, 20);
+begin;
+connection con2;
+begin;
+connection con3;
+begin;
+connection con1;
+select * from test where id = 1;
+id value
+1 10
+connection con2;
+select * from test;
+id value
+1 10
+2 20
+update test set value = 12 where id = 1;
+update test set value = 18 where id = 2;
+commit;
+connection con1;
+delete from test where value = 20;
+select * from test where id = 2;
+id value
+2 18
+commit;
+connection con1;
+truncate table test;
+insert into test (id, value) values (1, 10), (2, 20);
+begin;
+connection con2;
+begin;
+connection con3;
+begin;
+connection con1;
+select * from test where id in (1,2);
+id value
+1 10
+2 20
+connection con2;
+select * from test where id in (1,2);
+id value
+1 10
+2 20
+connection con1;
+update test set value = 11 where id = 1;
+connection con2;
+update test set value = 21 where id = 2;
+connection con1;
+commit;
+connection con2;
+commit;
+connection con1;
+truncate table test;
+insert into test (id, value) values (1, 10), (2, 20);
+begin;
+connection con2;
+begin;
+connection con3;
+begin;
+connection con1;
+select * from test where value % 3 = 0;
+id value
+connection con2;
+select * from test where value % 3 = 0;
+id value
+connection con1;
+insert into test (id, value) values(3, 30);
+connection con2;
+insert into test (id, value) values(4, 42);
+connection con1;
+commit;
+connection con2;
+commit;
+select * from test where value % 3 = 0;
+id value
+3 30
+4 42
+connection con1;
+select * from test where value % 3 = 0;
+id value
+3 30
+4 42
+connection default;
+drop table test;
+disconnect con1;
+disconnect con2;
+disconnect con3;
+DROP TABLE IF EXISTS test;
+connect con1,localhost,root,,;
+SET SESSION TRANSACTION ISOLATION LEVEL REPEATABLE READ;
+connect con2,localhost,root,,;
+SET SESSION TRANSACTION ISOLATION LEVEL REPEATABLE READ;
+connect con3,localhost,root,,;
+SET SESSION TRANSACTION ISOLATION LEVEL REPEATABLE READ;
+connection con1;
+create table test (id int primary key, value int) engine=rocksdb;
+connection con1;
+truncate table test;
+insert into test (id, value) values (1, 10), (2, 20);
+begin;
+connection con2;
+begin;
+connection con3;
+begin;
+connection con1;
+select * from test;
+id value
+1 10
+2 20
+update test set value = 101 where id = 1;
+connection con2;
+select * from test;
+id value
+1 10
+2 20
+connection con1;
+rollback;
+connection con2;
+select * from test;
+id value
+1 10
+2 20
+commit;
+connection con1;
+truncate table test;
+insert into test (id, value) values (1, 10), (2, 20);
+begin;
+connection con2;
+begin;
+connection con3;
+begin;
+connection con1;
+update test set value = 101 where id = 1;
+connection con2;
+select * from test;
+id value
+1 10
+2 20
+connection con1;
+update test set value = 11 where id = 1;
+commit;
+connection con2;
+select * from test;
+id value
+1 10
+2 20
+commit;
+connection con1;
+truncate table test;
+insert into test (id, value) values (1, 10), (2, 20);
+begin;
+connection con2;
+begin;
+connection con3;
+begin;
+connection con1;
+update test set value = 11 where id = 1;
+connection con2;
+update test set value = 22 where id = 2;
+connection con1;
+select * from test where id = 2;
+id value
+2 20
+connection con2;
+select * from test where id = 1;
+id value
+1 10
+connection con1;
+commit;
+connection con2;
+commit;
+connection con1;
+truncate table test;
+insert into test (id, value) values (1, 10), (2, 20);
+begin;
+connection con2;
+begin;
+connection con3;
+begin;
+connection con1;
+update test set value = 11 where id = 1;
+update test set value = 19 where id = 2;
+connection con2;
+update test set value = 12 where id = 1;
+connection con1;
+commit;
+connection con2;
+connection con3;
+select * from test;
+id value
+1 11
+2 19
+connection con2;
+update test set value = 18 where id = 2;
+connection con3;
+select * from test;
+id value
+1 11
+2 19
+connection con2;
+commit;
+connection con3;
+select * from test;
+id value
+1 11
+2 19
+commit;
+connection con1;
+truncate table test;
+insert into test (id, value) values (1, 10), (2, 20);
+begin;
+connection con2;
+begin;
+connection con3;
+begin;
+connection con1;
+select * from test where value = 30;
+id value
+connection con2;
+insert into test (id, value) values(3, 30);
+commit;
+connection con1;
+select * from test where value % 3 = 0;
+id value
+commit;
+connection con1;
+truncate table test;
+insert into test (id, value) values (1, 10), (2, 20);
+begin;
+connection con2;
+begin;
+connection con3;
+begin;
+connection con1;
+update test set value = value + 10;
+connection con2;
+select variable_value into @a from information_schema.global_status where variable_name='rocksdb_snapshot_conflict_errors';
+select * from test;
+id value
+1 10
+2 20
+delete from test where value = 20;
+connection con1;
+commit;
+connection con2;
+ERROR 40001: Deadlock found when trying to get lock; try restarting transaction (snapshot conflict)
+select variable_value-@a from information_schema.global_status where variable_name='rocksdb_snapshot_conflict_errors';
+variable_value-@a
+1
+commit;
+connection con1;
+truncate table test;
+insert into test (id, value) values (1, 10), (2, 20);
+begin;
+connection con2;
+begin;
+connection con3;
+begin;
+connection con1;
+select * from test where id = 1;
+id value
+1 10
+connection con2;
+select * from test where id = 1;
+id value
+1 10
+connection con1;
+update test set value = 11 where id = 1;
+connection con2;
+update test set value = 12 where id = 1;
+connection con1;
+commit;
+connection con2;
+ERROR 40001: Deadlock found when trying to get lock; try restarting transaction (snapshot conflict)
+commit;
+connection con1;
+truncate table test;
+insert into test (id, value) values (1, 10), (2, 20);
+begin;
+connection con2;
+begin;
+connection con3;
+begin;
+connection con1;
+select * from test where id = 1;
+id value
+1 10
+connection con2;
+select * from test where id = 1;
+id value
+1 10
+select * from test where id = 2;
+id value
+2 20
+update test set value = 12 where id = 1;
+update test set value = 18 where id = 2;
+commit;
+connection con1;
+select * from test where id = 2;
+id value
+2 20
+commit;
+connection con1;
+truncate table test;
+insert into test (id, value) values (1, 10), (2, 20);
+begin;
+connection con2;
+begin;
+connection con3;
+begin;
+connection con1;
+select * from test where value % 5 = 0;
+id value
+1 10
+2 20
+connection con2;
+update test set value = 12 where value = 10;
+commit;
+connection con1;
+select * from test where value % 3 = 0;
+id value
+commit;
+connection con1;
+truncate table test;
+insert into test (id, value) values (1, 10), (2, 20);
+begin;
+connection con2;
+begin;
+connection con3;
+begin;
+connection con1;
+select * from test where id = 1;
+id value
+1 10
+connection con2;
+select * from test;
+id value
+1 10
+2 20
+update test set value = 12 where id = 1;
+update test set value = 18 where id = 2;
+commit;
+connection con1;
+delete from test where value = 20;
+ERROR 40001: Deadlock found when trying to get lock; try restarting transaction (snapshot conflict)
+commit;
+connection con1;
+truncate table test;
+insert into test (id, value) values (1, 10), (2, 20);
+begin;
+connection con2;
+begin;
+connection con3;
+begin;
+connection con1;
+select * from test where id in (1,2);
+id value
+1 10
+2 20
+connection con2;
+select * from test where id in (1,2);
+id value
+1 10
+2 20
+connection con1;
+update test set value = 11 where id = 1;
+connection con2;
+update test set value = 21 where id = 2;
+connection con1;
+commit;
+connection con2;
+commit;
+connection con1;
+truncate table test;
+insert into test (id, value) values (1, 10), (2, 20);
+begin;
+connection con2;
+begin;
+connection con3;
+begin;
+connection con1;
+select * from test where value % 3 = 0;
+id value
+connection con2;
+select * from test where value % 3 = 0;
+id value
+connection con1;
+insert into test (id, value) values(3, 30);
+connection con2;
+insert into test (id, value) values(4, 42);
+connection con1;
+commit;
+connection con2;
+commit;
+select * from test where value % 3 = 0;
+id value
+3 30
+4 42
+connection con1;
+select * from test where value % 3 = 0;
+id value
+3 30
+4 42
+connection default;
+drop table test;
+disconnect con1;
+disconnect con2;
+disconnect con3;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/i_s_ddl.result b/storage/rocksdb/mysql-test/rocksdb/r/i_s_ddl.result
new file mode 100644
index 00000000000..6bca2cbad2d
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/i_s_ddl.result
@@ -0,0 +1,22 @@
+DROP TABLE IF EXISTS is_ddl_t1;
+DROP TABLE IF EXISTS is_ddl_t2;
+DROP TABLE IF EXISTS is_ddl_t3;
+CREATE TABLE is_ddl_t1 (i INT, j INT, k INT, l INT,
+PRIMARY KEY (i), KEY (j), KEY (k, l) COMMENT 'kl_cf')
+ENGINE = ROCKSDB;
+CREATE TABLE is_ddl_t2 (x INT, y INT, z INT,
+PRIMARY KEY (z, y) COMMENT 'zy_cf',
+KEY (x)) ENGINE = ROCKSDB;
+CREATE TABLE is_ddl_t3 (a INT, b INT, c INT, PRIMARY KEY (a)) ENGINE = ROCKSDB
+COMMENT "ttl_duration=3600;";
+SELECT TABLE_SCHEMA,TABLE_NAME,PARTITION_NAME,INDEX_NAME,INDEX_TYPE,KV_FORMAT_VERSION,CF,TTL_DURATION,INDEX_FLAGS FROM INFORMATION_SCHEMA.ROCKSDB_DDL WHERE TABLE_NAME like 'is_ddl_t%';
+TABLE_SCHEMA TABLE_NAME PARTITION_NAME INDEX_NAME INDEX_TYPE KV_FORMAT_VERSION CF TTL_DURATION INDEX_FLAGS
+test is_ddl_t1 NULL PRIMARY 1 13 default 0 0
+test is_ddl_t1 NULL j 2 13 default 0 0
+test is_ddl_t1 NULL k 2 13 kl_cf 0 0
+test is_ddl_t2 NULL PRIMARY 1 13 zy_cf 0 0
+test is_ddl_t2 NULL x 2 13 default 0 0
+test is_ddl_t3 NULL PRIMARY 1 13 default 3600 1
+DROP TABLE is_ddl_t1;
+DROP TABLE is_ddl_t2;
+DROP TABLE is_ddl_t3;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/i_s_deadlock.result b/storage/rocksdb/mysql-test/rocksdb/r/i_s_deadlock.result
new file mode 100644
index 00000000000..3ec9294e3a1
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/i_s_deadlock.result
@@ -0,0 +1,216 @@
+set @prior_lock_wait_timeout = @@rocksdb_lock_wait_timeout;
+set @prior_deadlock_detect = @@rocksdb_deadlock_detect;
+set @prior_max_latest_deadlocks = @@rocksdb_max_latest_deadlocks;
+set global rocksdb_deadlock_detect = on;
+set global rocksdb_lock_wait_timeout = 10000;
+# Clears deadlock buffer of any prior deadlocks.
+set global rocksdb_max_latest_deadlocks = 0;
+set global rocksdb_max_latest_deadlocks = @prior_max_latest_deadlocks;
+connect con1,localhost,root,,;
+connect con2,localhost,root,,;
+connect con3,localhost,root,,;
+connection default;
+show create table information_schema.rocksdb_deadlock;
+Table Create Table
+ROCKSDB_DEADLOCK CREATE TEMPORARY TABLE `ROCKSDB_DEADLOCK` (
+ `DEADLOCK_ID` bigint(8) NOT NULL DEFAULT 0,
+ `TIMESTAMP` bigint(8) NOT NULL DEFAULT 0,
+ `TRANSACTION_ID` bigint(8) NOT NULL DEFAULT 0,
+ `CF_NAME` varchar(193) NOT NULL DEFAULT '',
+ `WAITING_KEY` varchar(513) NOT NULL DEFAULT '',
+ `LOCK_TYPE` varchar(193) NOT NULL DEFAULT '',
+ `INDEX_NAME` varchar(193) NOT NULL DEFAULT '',
+ `TABLE_NAME` varchar(193) NOT NULL DEFAULT '',
+ `ROLLED_BACK` bigint(8) NOT NULL DEFAULT 0
+) ENGINE=MEMORY DEFAULT CHARSET=utf8
+create table t (i int primary key) engine=rocksdb;
+insert into t values (1), (2), (3);
+select * from information_schema.rocksdb_deadlock;
+DEADLOCK_ID TIMESTAMP TRANSACTION_ID CF_NAME WAITING_KEY LOCK_TYPE INDEX_NAME TABLE_NAME ROLLED_BACK
+Deadlock #1
+connection con1;
+begin;
+select * from t where i=1 for update;
+i
+1
+connection con2;
+begin;
+select * from t where i=2 for update;
+i
+2
+connection con1;
+select * from t where i=2 for update;
+connection con2;
+select * from t where i=1 for update;
+ERROR 40001: Deadlock found when trying to get lock; try restarting transaction
+rollback;
+connection con1;
+i
+2
+rollback;
+connection default;
+select * from information_schema.rocksdb_deadlock;
+DEADLOCK_ID TIMESTAMP TRANSACTION_ID CF_NAME WAITING_KEY LOCK_TYPE INDEX_NAME TABLE_NAME ROLLED_BACK
+DEADLOCK_ID TIMESTAMP TRANSACTION_ID default WAITING_KEY EXCLUSIVE PRIMARY test.t 0
+DEADLOCK_ID TIMESTAMP TRANSACTION_ID default WAITING_KEY EXCLUSIVE PRIMARY test.t 1
+Deadlock #2
+connection con1;
+begin;
+select * from t where i=1 for update;
+i
+1
+connection con2;
+begin;
+select * from t where i=2 for update;
+i
+2
+connection con1;
+select * from t where i=2 for update;
+connection con2;
+select * from t where i=1 for update;
+ERROR 40001: Deadlock found when trying to get lock; try restarting transaction
+rollback;
+connection con1;
+i
+2
+rollback;
+connection default;
+select * from information_schema.rocksdb_deadlock;
+DEADLOCK_ID TIMESTAMP TRANSACTION_ID CF_NAME WAITING_KEY LOCK_TYPE INDEX_NAME TABLE_NAME ROLLED_BACK
+DEADLOCK_ID TIMESTAMP TRANSACTION_ID default WAITING_KEY EXCLUSIVE PRIMARY test.t 0
+DEADLOCK_ID TIMESTAMP TRANSACTION_ID default WAITING_KEY EXCLUSIVE PRIMARY test.t 1
+DEADLOCK_ID TIMESTAMP TRANSACTION_ID default WAITING_KEY EXCLUSIVE PRIMARY test.t 0
+DEADLOCK_ID TIMESTAMP TRANSACTION_ID default WAITING_KEY EXCLUSIVE PRIMARY test.t 1
+set global rocksdb_max_latest_deadlocks = 10;
+Deadlock #3
+connection con1;
+begin;
+select * from t where i=1 for update;
+i
+1
+connection con2;
+begin;
+select * from t where i=2 for update;
+i
+2
+connection con1;
+select * from t where i=2 for update;
+connection con2;
+select * from t where i=1 for update;
+ERROR 40001: Deadlock found when trying to get lock; try restarting transaction
+rollback;
+connection con1;
+i
+2
+rollback;
+connection default;
+select * from information_schema.rocksdb_deadlock;
+DEADLOCK_ID TIMESTAMP TRANSACTION_ID CF_NAME WAITING_KEY LOCK_TYPE INDEX_NAME TABLE_NAME ROLLED_BACK
+DEADLOCK_ID TIMESTAMP TRANSACTION_ID default WAITING_KEY EXCLUSIVE PRIMARY test.t 0
+DEADLOCK_ID TIMESTAMP TRANSACTION_ID default WAITING_KEY EXCLUSIVE PRIMARY test.t 1
+DEADLOCK_ID TIMESTAMP TRANSACTION_ID default WAITING_KEY EXCLUSIVE PRIMARY test.t 0
+DEADLOCK_ID TIMESTAMP TRANSACTION_ID default WAITING_KEY EXCLUSIVE PRIMARY test.t 1
+DEADLOCK_ID TIMESTAMP TRANSACTION_ID default WAITING_KEY EXCLUSIVE PRIMARY test.t 0
+DEADLOCK_ID TIMESTAMP TRANSACTION_ID default WAITING_KEY EXCLUSIVE PRIMARY test.t 1
+set global rocksdb_max_latest_deadlocks = 1;
+select * from information_schema.rocksdb_deadlock;
+DEADLOCK_ID TIMESTAMP TRANSACTION_ID CF_NAME WAITING_KEY LOCK_TYPE INDEX_NAME TABLE_NAME ROLLED_BACK
+DEADLOCK_ID TIMESTAMP TRANSACTION_ID default WAITING_KEY EXCLUSIVE PRIMARY test.t 0
+DEADLOCK_ID TIMESTAMP TRANSACTION_ID default WAITING_KEY EXCLUSIVE PRIMARY test.t 1
+connection con3;
+set rocksdb_deadlock_detect_depth = 2;
+Deadlock #4
+connection con1;
+begin;
+select * from t where i=1 for update;
+i
+1
+connection con2;
+begin;
+select * from t where i=2 for update;
+i
+2
+connection con3;
+begin;
+select * from t where i=3 for update;
+i
+3
+connection con1;
+select * from t where i=2 for update;
+connection con2;
+select * from t where i=3 for update;
+connection con3;
+select * from t where i=1 for update;
+ERROR 40001: Deadlock found when trying to get lock; try restarting transaction
+rollback;
+connection con2;
+i
+3
+rollback;
+connection con1;
+i
+2
+rollback;
+connection default;
+set global rocksdb_max_latest_deadlocks = 5;
+select * from information_schema.rocksdb_deadlock;
+DEADLOCK_ID TIMESTAMP TRANSACTION_ID CF_NAME WAITING_KEY LOCK_TYPE INDEX_NAME TABLE_NAME ROLLED_BACK
+Deadlock #5
+connection con1;
+begin;
+select * from t where i=1 for update;
+i
+1
+connection con2;
+begin;
+select * from t where i=2 for update;
+i
+2
+connection con3;
+begin;
+select * from t where i=3 lock in share mode;
+i
+3
+connection con1;
+select * from t where i=100 for update;
+i
+select * from t where i=101 for update;
+i
+select * from t where i=2 for update;
+connection con2;
+select * from t where i=3 lock in share mode;
+i
+3
+select * from t where i=200 for update;
+i
+select * from t where i=201 for update;
+i
+select * from t where i=1 lock in share mode;
+ERROR 40001: Deadlock found when trying to get lock; try restarting transaction
+rollback;
+connection con1;
+i
+2
+rollback;
+connection con3;
+rollback;
+connection default;
+select * from information_schema.rocksdb_deadlock;
+DEADLOCK_ID TIMESTAMP TRANSACTION_ID CF_NAME WAITING_KEY LOCK_TYPE INDEX_NAME TABLE_NAME ROLLED_BACK
+DEADLOCK_ID TIMESTAMP TRANSACTION_ID default WAITING_KEY EXCLUSIVE PRIMARY test.t 0
+DEADLOCK_ID TIMESTAMP TRANSACTION_ID default WAITING_KEY SHARED PRIMARY test.t 1
+disconnect con1;
+disconnect con2;
+disconnect con3;
+set global rocksdb_lock_wait_timeout = @prior_lock_wait_timeout;
+set global rocksdb_deadlock_detect = @prior_deadlock_detect;
+drop table t;
+select * from information_schema.rocksdb_deadlock;
+DEADLOCK_ID TIMESTAMP TRANSACTION_ID CF_NAME WAITING_KEY LOCK_TYPE INDEX_NAME TABLE_NAME ROLLED_BACK
+DEADLOCK_ID TIMESTAMP TRANSACTION_ID default WAITING_KEY EXCLUSIVE INDEX_NAME TABLE_NAME 0
+DEADLOCK_ID TIMESTAMP TRANSACTION_ID default WAITING_KEY SHARED INDEX_NAME TABLE_NAME 1
+set global rocksdb_max_latest_deadlocks = 0;
+# Clears deadlock buffer of any existent deadlocks.
+set global rocksdb_max_latest_deadlocks = @prior_max_latest_deadlocks;
+select * from information_schema.rocksdb_deadlock;
+DEADLOCK_ID TIMESTAMP TRANSACTION_ID CF_NAME WAITING_KEY LOCK_TYPE INDEX_NAME TABLE_NAME ROLLED_BACK
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/index.result b/storage/rocksdb/mysql-test/rocksdb/r/index.result
new file mode 100644
index 00000000000..0920d0e01b2
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/index.result
@@ -0,0 +1,89 @@
+CREATE TABLE t1 (a INT,
+b CHAR(8),
+pk INT PRIMARY KEY,
+KEY (a)
+) ENGINE=rocksdb;
+SHOW KEYS IN t1;
+Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment Index_comment
+t1 0 PRIMARY 1 pk A # NULL NULL LSMTREE
+t1 1 a 1 a A # NULL NULL YES LSMTREE
+DROP TABLE t1;
+CREATE TABLE t1 (a INT,
+b CHAR(8),
+pk INT PRIMARY KEY,
+KEY a_b (a,b) COMMENT 'a_b index'
+) ENGINE=rocksdb;
+SHOW KEYS IN t1;
+Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment Index_comment
+t1 0 PRIMARY 1 pk A # NULL NULL LSMTREE
+t1 1 a_b 1 a A # NULL NULL YES LSMTREE a_b index
+t1 1 a_b 2 b A # NULL NULL YES LSMTREE a_b index
+DROP TABLE t1;
+CREATE TABLE t1 (a INT,
+b CHAR(8),
+pk INT PRIMARY KEY,
+KEY (a),
+KEY (b)
+) ENGINE=rocksdb;
+SHOW KEYS IN t1;
+Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment Index_comment
+t1 0 PRIMARY 1 pk A # NULL NULL LSMTREE
+t1 1 a 1 a A # NULL NULL YES LSMTREE
+t1 1 b 1 b A # NULL NULL YES LSMTREE
+DROP TABLE t1;
+CREATE TABLE t1 (a INT, b CHAR(8), pk INT AUTO_INCREMENT PRIMARY KEY) ENGINE=rocksdb;
+INSERT INTO t1 (a,b) VALUES (100,'z');
+ALTER TABLE t1 ADD KEY (a) COMMENT 'simple index on a';
+SHOW INDEX FROM t1;
+Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment Index_comment
+t1 0 PRIMARY 1 pk A # NULL NULL LSMTREE
+t1 1 a 1 a A # NULL NULL YES LSMTREE simple index on a
+ALTER TABLE t1 DROP KEY a;
+DROP TABLE t1;
+set global rocksdb_large_prefix=0;
+CREATE TABLE t1 (
+a BLOB(1024),
+KEY (a(767))
+) ENGINE=rocksdb;
+DROP TABLE t1;
+CREATE TABLE t1 (
+a BLOB(1024),
+KEY (a(768))
+) ENGINE=rocksdb;
+Warnings:
+Note 1071 Specified key was too long; max key length is 767 bytes
+DROP TABLE t1;
+set global rocksdb_large_prefix=1;
+CREATE TABLE t1 (
+a BLOB(4096),
+KEY (a(3072))
+) ENGINE=rocksdb;
+DROP TABLE t1;
+CREATE TABLE t1 (
+a BLOB(4096),
+KEY (a(3073))
+) ENGINE=rocksdb;
+Warnings:
+Note 1071 Specified key was too long; max key length is 3072 bytes
+DROP TABLE t1;
+set global rocksdb_large_prefix=DEFAULT;
+#
+# Issue #376: MyRocks: ORDER BY optimizer is unable to use the index extension
+#
+create table t0 (a int);
+insert into t0 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
+create table t1(a int);
+insert into t1 select A.a + B.a* 10 + C.a * 100 from t0 A, t0 B, t0 C;
+create table t2 (
+pk int not null,
+a int not null,
+b int not null,
+primary key(pk),
+key(a)
+) engine=rocksdb;
+insert into t2 select A.a, FLOOR(A.a/10), A.a from t1 A;
+# This must have type=range, index=a, and must not have 'Using filesort':
+explain select * from t2 force index (a) where a=0 and pk>=3 order by pk;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 range a a 8 NULL # Using index condition
+drop table t0,t1,t2;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/index_file_map.result b/storage/rocksdb/mysql-test/rocksdb/r/index_file_map.result
new file mode 100644
index 00000000000..ad007d71e15
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/index_file_map.result
@@ -0,0 +1,31 @@
+DROP TABLE IF EXISTS t1;
+DROP TABLE IF EXISTS t2;
+CREATE TABLE t1 (i INT PRIMARY KEY, j INT, INDEX(j)) ENGINE = ROCKSDB;
+CREATE TABLE t2 (k INT PRIMARY KEY, l INT REFERENCES t1.i) ENGINE = ROCKSDB;
+INSERT INTO t1 VALUES (1,2), (2,4), (3,6), (4,8), (5,10);
+INSERT INTO t2 VALUES (100,1), (200,2), (300,3), (400,4);
+COMMIT;
+SET GLOBAL rocksdb_force_flush_memtable_now = 1;
+SELECT * FROM INFORMATION_SCHEMA.ROCKSDB_INDEX_FILE_MAP
+WHERE INDEX_NUMBER =
+(SELECT INDEX_NUMBER FROM INFORMATION_SCHEMA.ROCKSDB_DDL
+WHERE TABLE_NAME = 't1' AND INDEX_NAME = "PRIMARY");
+COLUMN_FAMILY INDEX_NUMBER SST_NAME NUM_ROWS DATA_SIZE ENTRY_DELETES ENTRY_SINGLEDELETES ENTRY_MERGES ENTRY_OTHERS DISTINCT_KEYS_PREFIX
+# # SSTNAME 5 # # # # # 5
+SELECT * FROM INFORMATION_SCHEMA.ROCKSDB_INDEX_FILE_MAP
+WHERE INDEX_NUMBER =
+(SELECT INDEX_NUMBER FROM INFORMATION_SCHEMA.ROCKSDB_DDL
+WHERE TABLE_NAME = 't1' AND INDEX_NAME = "j");
+COLUMN_FAMILY INDEX_NUMBER SST_NAME NUM_ROWS DATA_SIZE ENTRY_DELETES ENTRY_SINGLEDELETES ENTRY_MERGES ENTRY_OTHERS DISTINCT_KEYS_PREFIX
+# # SSTNAME 5 # # # # # 5,5
+SELECT * FROM INFORMATION_SCHEMA.ROCKSDB_INDEX_FILE_MAP
+WHERE INDEX_NUMBER =
+(SELECT INDEX_NUMBER FROM INFORMATION_SCHEMA.ROCKSDB_DDL
+WHERE TABLE_NAME = 't2' AND INDEX_NAME = "PRIMARY");
+COLUMN_FAMILY INDEX_NUMBER SST_NAME NUM_ROWS DATA_SIZE ENTRY_DELETES ENTRY_SINGLEDELETES ENTRY_MERGES ENTRY_OTHERS DISTINCT_KEYS_PREFIX
+# # SSTNAME 4 # # # # # 4
+select count(*) > 0 from information_schema.rocksdb_sst_props;
+count(*) > 0
+1
+DROP TABLE t1;
+DROP TABLE t2;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/index_key_block_size.result b/storage/rocksdb/mysql-test/rocksdb/r/index_key_block_size.result
new file mode 100644
index 00000000000..b0113d79bb2
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/index_key_block_size.result
@@ -0,0 +1,51 @@
+DROP TABLE IF EXISTS t1;
+CREATE TABLE t1 (a INT,
+b CHAR(8),
+pk INT PRIMARY KEY,
+KEY (a) KEY_BLOCK_SIZE=8
+) ENGINE=rocksdb;
+SHOW KEYS IN t1;
+Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment Index_comment
+t1 0 PRIMARY 1 pk A # NULL NULL LSMTREE
+t1 1 a 1 a A # NULL NULL YES LSMTREE
+DROP TABLE t1;
+CREATE TABLE t1 (a INT,
+b CHAR(8),
+pk INT PRIMARY KEY,
+KEY ind1(b ASC) KEY_BLOCK_SIZE=0
+) ENGINE=rocksdb;
+SHOW INDEX IN t1;
+Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment Index_comment
+t1 0 PRIMARY 1 pk A # NULL NULL LSMTREE
+t1 1 ind1 1 b A # NULL NULL YES LSMTREE
+DROP TABLE t1;
+CREATE TABLE t1 (a INT,
+b CHAR(8),
+PRIMARY KEY ind2(b(1) DESC) KEY_BLOCK_SIZE=32768 COMMENT 'big key_block_size value'
+) ENGINE=rocksdb;
+SHOW INDEX IN t1;
+Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment Index_comment
+t1 0 PRIMARY 1 b A # 1 NULL LSMTREE big key_block_size value
+DROP TABLE t1;
+CREATE TABLE t1 (a INT,
+b CHAR(8),
+pk INT AUTO_INCREMENT PRIMARY KEY,
+KEY a_b(a,b) KEY_BLOCK_SIZE=8192
+) ENGINE=rocksdb;
+SHOW INDEX IN t1;
+Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment Index_comment
+t1 0 PRIMARY 1 pk A # NULL NULL LSMTREE
+t1 1 a_b 1 a A # NULL NULL YES LSMTREE
+t1 1 a_b 2 b A # NULL NULL YES LSMTREE
+DROP TABLE t1;
+CREATE TABLE t1 (a INT,
+b CHAR(8),
+PRIMARY KEY (b)
+) ENGINE=rocksdb;
+INSERT INTO t1 (a,b) VALUES (100,'z');
+ALTER TABLE t1 ADD KEY(a) KEY_BLOCK_SIZE 8192;
+SHOW INDEX FROM t1;
+Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment Index_comment
+t1 0 PRIMARY 1 b A # NULL NULL LSMTREE
+t1 1 a 1 a A # NULL NULL YES LSMTREE
+DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/index_merge_rocksdb.result b/storage/rocksdb/mysql-test/rocksdb/r/index_merge_rocksdb.result
new file mode 100644
index 00000000000..7fb9055083b
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/index_merge_rocksdb.result
@@ -0,0 +1,48 @@
+CREATE TABLE t1
+(
+/* fields/keys for row retrieval tests */
+key1 INT,
+key2 INT,
+key3 INT,
+key4 INT,
+/* make rows much bigger then keys */
+filler1 CHAR(200),
+KEY(key1),
+KEY(key2)
+) ENGINE=ROCKSDB;
+CREATE TABLE t0 AS SELECT * FROM t1;
+# Printing of many insert into t0 values (....) disabled.
+# Printing of many insert into t1 select .... from t0 disabled.
+# Printing of many insert into t1 (...) values (....) disabled.
+SELECT COUNT(*) FROM t1;
+COUNT(*)
+7201
+SET GLOBAL rocksdb_force_flush_memtable_now = 1;
+EXPLAIN UPDATE t1 SET filler1='to be deleted' WHERE key1=100 AND key2=100;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 index_merge key1,key2 key1,key2 5,5 NULL # Using intersect(key1,key2); Using where
+UPDATE t1 SET filler1='to be deleted' WHERE key1=100 and key2=100;
+DROP TABLE t0, t1;
+create table t1 (key1 int, key2 int, key3 int, key (key1), key (key2), key(key3)) engine=rocksdb;
+insert into t1 values (1, 100, 100), (1, 200, 200), (1, 300, 300);
+set global rocksdb_force_flush_memtable_now=1;
+analyze table t1;
+Table Op Msg_type Msg_text
+test.t1 analyze status OK
+explain select * from t1 where key1 = 1;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 ref key1 key1 5 const #
+explain select key1,key2 from t1 where key1 = 1 or key2 = 1;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 index_merge key1,key2 key1,key2 5,5 NULL # Using union(key1,key2); Using where
+select * from t1 where key1 = 1;
+key1 key2 key3
+1 100 100
+1 200 200
+1 300 300
+select key1,key2 from t1 where key1 = 1 or key2 = 1;
+key1 key2
+1 100
+1 200
+1 300
+drop table t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/index_merge_rocksdb2.result b/storage/rocksdb/mysql-test/rocksdb/r/index_merge_rocksdb2.result
new file mode 100644
index 00000000000..4603e049724
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/index_merge_rocksdb2.result
@@ -0,0 +1,1416 @@
+set global rocksdb_force_flush_memtable_now=1;
+#---------------- Index merge test 1 -------------------------------------------
+SET SESSION DEFAULT_STORAGE_ENGINE = RocksDB;
+drop table if exists t0, t1, t2, t3, t4;
+create table t0
+(
+key1 int not null,
+key2 int not null,
+key3 int not null,
+key4 int not null,
+key5 int not null,
+key6 int not null,
+key7 int not null,
+key8 int not null,
+INDEX i1(key1),
+INDEX i2(key2),
+INDEX i3(key3),
+INDEX i4(key4),
+INDEX i5(key5),
+INDEX i6(key6),
+INDEX i7(key7),
+INDEX i8(key8)
+);
+analyze table t0;
+Table Op Msg_type Msg_text
+test.t0 analyze status OK
+explain select * from t0 where key1 < 3 or key1 > 1020;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t0 range i1 i1 4 NULL 4 Using index condition; Using where
+explain
+select * from t0 where key1 < 3 or key2 > 1020;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t0 index_merge i1,i2 i1,i2 4,4 NULL 4 Using sort_union(i1,i2); Using where
+select * from t0 where key1 < 3 or key2 > 1020;
+key1 key2 key3 key4 key5 key6 key7 key8
+1 1 1 1 1 1 1 1023
+2 2 2 2 2 2 2 1022
+1021 1021 1021 1021 1021 1021 1021 3
+1022 1022 1022 1022 1022 1022 1022 2
+1023 1023 1023 1023 1023 1023 1023 1
+1024 1024 1024 1024 1024 1024 1024 0
+explain select * from t0 where key1 < 2 or key2 <3;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t0 index_merge i1,i2 i1,i2 4,4 NULL # Using sort_union(i1,i2); Using where
+explain
+select * from t0 where (key1 > 30 and key1<35) or (key2 >32 and key2 < 40);
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t0 index_merge i1,i2 i1,i2 4,4 NULL # Using sort_union(i1,i2); Using where
+select * from t0 where (key1 > 30 and key1<35) or (key2 >32 and key2 < 40);
+key1 key2 key3 key4 key5 key6 key7 key8
+31 31 31 31 31 31 31 993
+32 32 32 32 32 32 32 992
+33 33 33 33 33 33 33 991
+34 34 34 34 34 34 34 990
+35 35 35 35 35 35 35 989
+36 36 36 36 36 36 36 988
+37 37 37 37 37 37 37 987
+38 38 38 38 38 38 38 986
+39 39 39 39 39 39 39 985
+explain select * from t0 ignore index (i2) where key1 < 3 or key2 <4;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t0 ALL i1 NULL NULL NULL # Using where
+explain select * from t0 where (key1 < 3 or key2 <4) and key3 = 50;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t0 ref i1,i2,i3 i3 4 const # Using where
+explain select * from t0 use index (i1,i2) where (key1 < 2 or key2 <3) and key3 = 50;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t0 index_merge i1,i2 i1,i2 4,4 NULL # Using sort_union(i1,i2); Using where
+explain select * from t0 where (key1 > 1 or key2 > 2);
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t0 index_merge i1,i2 i1,i2 4,4 NULL # Using sort_union(i1,i2); Using where
+explain select * from t0 force index (i1,i2) where (key1 > 1 or key2 > 2);
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t0 index_merge i1,i2 i1,i2 4,4 NULL # Using sort_union(i1,i2); Using where
+explain
+select * from t0 where key1<2 or key2<3 or (key1>5 and key1<7) or
+(key1>10 and key1<12) or (key2>100 and key2<102);
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t0 index_merge i1,i2 i1,i2 4,4 NULL # Using sort_union(i1,i2); Using where
+explain select * from t0 where key2 = 45 or key1 <=> null;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t0 range i1,i2 i2 4 NULL # Using index condition
+explain select * from t0 where key2 = 45 or key1 is not null;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t0 ALL i1,i2 NULL NULL NULL # Using where
+explain select * from t0 where key2 = 45 or key1 is null;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t0 ref i2 i2 4 const #
+explain select * from t0 where key2=10 or key3=3 or key4 <=> null;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t0 index_merge i2,i3,i4 i2,i3 4,4 NULL # Using union(i2,i3); Using where
+explain select * from t0 where key2=10 or key3=3 or key4 is null;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t0 index_merge i2,i3 i2,i3 4,4 NULL # Using union(i2,i3); Using where
+explain select key1 from t0 where (key1 <=> null) or (key2 < 2) or
+(key3=10) or (key4 <=> null);
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t0 index_merge i1,i2,i3,i4 i2,i3 4,4 NULL # Using sort_union(i2,i3); Using where
+explain select key1 from t0 where (key1 <=> null) or (key1 < 5) or
+(key3=10) or (key4 <=> null);
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t0 index_merge i1,i3,i4 i1,i3 4,4 NULL # Using sort_union(i1,i3); Using where
+explain select * from t0 where
+(key1 < 2 or key2 < 2) and (key3 < 3 or key4 < 3) and (key5 < 5 or key6 < 5);
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t0 index_merge i1,i2,i3,i4,i5,i6 i1,i2 4,4 NULL # Using sort_union(i1,i2); Using where
+explain
+select * from t0 where (key1 < 2 or key2 < 4) and (key1 < 5 or key3 < 3);
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t0 index_merge i1,i2,i3 i1,i2 4,4 NULL # Using sort_union(i1,i2); Using where
+select * from t0 where (key1 < 2 or key2 < 4) and (key1 < 5 or key3 < 3);
+key1 key2 key3 key4 key5 key6 key7 key8
+1 1 1 1 1 1 1 1023
+2 2 2 2 2 2 2 1022
+3 3 3 3 3 3 3 1021
+explain select * from t0 where
+(key1 < 3 or key2 < 2) and (key3 < 3 or key4 < 3) and (key5 < 2 or key6 < 2);
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t0 index_merge i1,i2,i3,i4,i5,i6 i1,i2 4,4 NULL # Using sort_union(i1,i2); Using where
+explain select * from t0 where
+(key1 < 3 or key2 < 3) and (key3 < 70);
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t0 range i1,i2,i3 i3 4 NULL # Using index condition; Using where
+explain select * from t0 where
+(key1 < 3 or key2 < 3) and (key3 < 1000);
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t0 range i1,i2,i3 i3 4 NULL # Using index condition; Using where
+explain select * from t0 where
+((key1 < 3 or key2 < 3) and (key2 <4 or key3 < 3))
+or
+key2 > 4;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t0 index_merge i1,i2,i3 i1,i2 4,4 NULL # Using sort_union(i1,i2); Using where
+explain select * from t0 where
+((key1 < 4 or key2 < 4) and (key2 <4 or key3 < 3))
+or
+key1 < 5;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t0 index_merge i1,i2,i3 i1,i2 4,4 NULL # Using sort_union(i1,i2); Using where
+select * from t0 where
+((key1 < 4 or key2 < 4) and (key2 <4 or key3 < 3))
+or
+key1 < 5;
+key1 key2 key3 key4 key5 key6 key7 key8
+1 1 1 1 1 1 1 1023
+2 2 2 2 2 2 2 1022
+3 3 3 3 3 3 3 1021
+4 4 4 4 4 4 4 1020
+explain select * from t0 where
+((key1 < 2 or key2 < 2) and (key3 <4 or key5 < 3))
+or
+((key5 < 3 or key6 < 3) and (key7 <3 or key8 < 3));
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t0 index_merge i1,i2,i3,i5,i6,i7,i8 i1,i2,i5,i6 4,4,4,4 NULL # Using sort_union(i1,i2,i5,i6); Using where
+explain select * from t0 where
+((key3 <3 or key5 < 4) and (key1 < 3 or key2 < 3))
+or
+((key7 <5 or key8 < 3) and (key5 < 4 or key6 < 4));
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t0 index_merge i1,i2,i3,i5,i6,i7,i8 i3,i5,i7,i8 4,4,4,4 NULL # Using sort_union(i3,i5,i7,i8); Using where
+explain select * from t0 where
+((key3 <3 or key5 < 4) and (key1 < 3 or key2 < 4))
+or
+((key3 <4 or key5 < 2) and (key5 < 5 or key6 < 3));
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t0 index_merge i1,i2,i3,i5,i6 i3,i5 4,4 NULL # Using sort_union(i3,i5); Using where
+explain select * from t0 where
+((key3 <4 or key5 < 3) and (key1 < 3 or key2 < 3))
+or
+(((key3 <5 and key7 < 5) or key5 < 2) and (key5 < 4 or key6 < 4));
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t0 index_merge i1,i2,i3,i5,i6,i7 i3,i5 4,4 NULL # Using sort_union(i3,i5); Using where
+explain select * from t0 where
+((key3 <5 or key5 < 4) and (key1 < 4 or key2 < 4))
+or
+((key3 >5 or key5 < 2) and (key5 < 5 or key6 < 6));
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t0 index_merge i1,i2,i3,i5,i6 i3,i5 4,4 NULL # Using sort_union(i3,i5); Using where
+explain select * from t0 force index(i1, i2, i3, i4, i5, i6 ) where
+((key3 <3 or key5 < 4) and (key1 < 3 or key2 < 3))
+or
+((key3 >4 or key5 < 2) and (key5 < 5 or key6 < 4));
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t0 index_merge i1,i2,i3,i5,i6 i3,i5 4,4 NULL # Using sort_union(i3,i5); Using where
+explain select * from t0 force index(i1, i2, i3, i4, i5, i6 ) where
+((key3 <5 or key5 < 4) and (key1 < 4 or key2 < 4))
+or
+((key3 >=5 or key5 < 2) and (key5 < 5 or key6 < 6));
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t0 index_merge i1,i2,i3,i5,i6 i3,i5 0,4 NULL # Using sort_union(i3,i5); Using where
+select * from t0 where key1 < 3 or key8 < 2 order by key1;
+key1 key2 key3 key4 key5 key6 key7 key8
+1 1 1 1 1 1 1 1023
+2 2 2 2 2 2 2 1022
+1023 1023 1023 1023 1023 1023 1023 1
+1024 1024 1024 1024 1024 1024 1024 0
+explain
+select * from t0 where key1 < 3 or key8 < 2 order by key1;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t0 index_merge i1,i8 i1,i8 4,4 NULL # Using sort_union(i1,i8); Using where; Using filesort
+create table t2 like t0;
+insert into t2 select * from t0;
+alter table t2 add index i1_3(key1, key3);
+alter table t2 add index i2_3(key2, key3);
+alter table t2 drop index i1;
+alter table t2 drop index i2;
+alter table t2 add index i321(key3, key2, key1);
+explain select key3 from t2 where key1 = 100 or key2 = 100;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 index_merge i1_3,i2_3 i1_3,i2_3 4,4 NULL # Using sort_union(i1_3,i2_3); Using where
+explain select key3 from t2 where key1 <100 or key2 < 100;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 index_merge i1_3,i2_3 i1_3,i2_3 4,4 NULL # Using sort_union(i1_3,i2_3); Using where
+explain select key7 from t2 where key1 <100 or key2 < 100;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 index_merge i1_3,i2_3 i1_3,i2_3 4,4 NULL # Using sort_union(i1_3,i2_3); Using where
+create table t4 (
+key1a int not null,
+key1b int not null,
+key2 int not null,
+key2_1 int not null,
+key2_2 int not null,
+key3 int not null,
+index i1a (key1a, key1b),
+index i1b (key1b, key1a),
+index i2_1(key2, key2_1),
+index i2_2(key2, key2_1)
+);
+Warnings:
+Note 1831 Duplicate index `i2_2`. This is deprecated and will be disallowed in a future release
+insert into t4 select key1,key1,key1 div 10, key1 % 10, key1 % 10, key1 from t0;
+select * from t4 where key1a = 3 or key1b = 4;
+key1a key1b key2 key2_1 key2_2 key3
+3 3 0 3 3 3
+4 4 0 4 4 4
+explain select * from t4 where key1a = 3 or key1b = 4;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t4 index_merge i1a,i1b i1a,i1b 4,4 NULL 4 Using sort_union(i1a,i1b); Using where
+explain select * from t4 where key2 = 1 and (key2_1 = 1 or key3 = 5);
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t4 ref i2_1,i2_2 i2_1 4 const 2 Using where
+explain select * from t4 where key2 = 1 and (key2_1 = 1 or key2_2 = 5);
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t4 ref i2_1,i2_2 i2_1 4 const 2 Using where
+explain select * from t4 where key2_1 = 1 or key2_2 = 5;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t4 ALL NULL NULL NULL NULL # Using where
+create table t1 like t0;
+insert into t1 select * from t0;
+explain select * from t0 left join t1 on (t0.key1=t1.key1)
+where t0.key1=3 or t0.key2=4;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t0 index_merge i1,i2 i1,i2 4,4 NULL # Using union(i1,i2); Using where
+1 SIMPLE t1 ref i1 i1 4 test.t0.key1 #
+select * from t0 left join t1 on (t0.key1=t1.key1)
+where t0.key1=3 or t0.key2=4;
+key1 key2 key3 key4 key5 key6 key7 key8 key1 key2 key3 key4 key5 key6 key7 key8
+3 3 3 3 3 3 3 1021 3 3 3 3 3 3 3 1021
+4 4 4 4 4 4 4 1020 4 4 4 4 4 4 4 1020
+explain
+select * from t0,t1 where (t0.key1=t1.key1) and ( t0.key1=3 or t0.key2=4);
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t0 index_merge i1,i2 i1,i2 4,4 NULL # Using union(i1,i2); Using where
+1 SIMPLE t1 ref i1 i1 4 test.t0.key1 #
+explain
+select * from t0,t1 where (t0.key1=t1.key1) and
+(t0.key1=3 or t0.key2<4) and t1.key1=2;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t0 ref i1,i2 i1 4 const 2 Using where
+1 SIMPLE t1 ref i1 i1 4 const 2
+explain select * from t0,t1 where t0.key1 = 5 and
+(t1.key1 = t0.key1 or t1.key8 = t0.key1);
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t0 ref i1 i1 4 const 2
+1 SIMPLE t1 index_merge i1,i8 i1,i8 4,4 NULL 4 Using union(i1,i8); Using where; Using join buffer (flat, BNL join)
+explain select * from t0,t1 where t0.key1 < 3 and
+(t1.key1 = t0.key1 or t1.key8 = t0.key1);
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t0 range i1 i1 4 NULL # Using index condition
+1 SIMPLE t1 ALL i1,i8 NULL NULL NULL # Range checked for each record (index map: 0x81)
+explain select * from t1 where key1=3 or key2=4
+union select * from t1 where key1<4 or key3=5;
+id select_type table type possible_keys key key_len ref rows Extra
+1 PRIMARY t1 index_merge i1,i2 i1,i2 4,4 NULL 4 Using union(i1,i2); Using where
+2 UNION t1 index_merge i1,i3 i1,i3 4,4 NULL 4 Using sort_union(i1,i3); Using where
+NULL UNION RESULT <union1,2> ALL NULL NULL NULL NULL NULL
+explain select * from (select * from t1 where key1 = 3 or key2 =3) as Z where key8 >5;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range i1,i2,i8 i8 4 NULL 2 Using index condition; Using where
+create table t3 like t0;
+insert into t3 select * from t0;
+alter table t3 add key9 int not null, add index i9(key9);
+alter table t3 add keyA int not null, add index iA(keyA);
+alter table t3 add keyB int not null, add index iB(keyB);
+alter table t3 add keyC int not null, add index iC(keyC);
+update t3 set key9=key1,keyA=key1,keyB=key1,keyC=key1;
+explain select * from t3 where
+key1=1 or key2=2 or key3=3 or key4=4 or
+key5=5 or key6=6 or key7=7 or key8=8 or
+key9=9 or keyA=10 or keyB=11 or keyC=12;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t3 index_merge i1,i2,i3,i4,i5,i6,i7,i8,i9,iA,iB,iC i1,i2,i3,i4,i5,i6,i7,i8,i9,iA,iB,iC 4,4,4,4,4,4,4,4,4,4,4,4 NULL 24 Using union(i1,i2,i3,i4,i5,i6,i7,i8,i9,iA,iB,iC); Using where
+select * from t3 where
+key1=1 or key2=2 or key3=3 or key4=4 or
+key5=5 or key6=6 or key7=7 or key8=8 or
+key9=9 or keyA=10 or keyB=11 or keyC=12;
+key1 key2 key3 key4 key5 key6 key7 key8 key9 keyA keyB keyC
+1 1 1 1 1 1 1 1023 1 1 1 1
+2 2 2 2 2 2 2 1022 2 2 2 2
+3 3 3 3 3 3 3 1021 3 3 3 3
+4 4 4 4 4 4 4 1020 4 4 4 4
+5 5 5 5 5 5 5 1019 5 5 5 5
+6 6 6 6 6 6 6 1018 6 6 6 6
+7 7 7 7 7 7 7 1017 7 7 7 7
+9 9 9 9 9 9 9 1015 9 9 9 9
+10 10 10 10 10 10 10 1014 10 10 10 10
+11 11 11 11 11 11 11 1013 11 11 11 11
+12 12 12 12 12 12 12 1012 12 12 12 12
+1016 1016 1016 1016 1016 1016 1016 8 1016 1016 1016 1016
+explain select * from t0 where key1 < 3 or key2 < 4;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t0 index_merge i1,i2 i1,i2 4,4 NULL 4 Using sort_union(i1,i2); Using where
+select * from t0 where key1 < 3 or key2 < 4;
+key1 key2 key3 key4 key5 key6 key7 key8
+1 1 1 1 1 1 1 1023
+2 2 2 2 2 2 2 1022
+3 3 3 3 3 3 3 1021
+update t0 set key8=123 where key1 < 3 or key2 < 4;
+select * from t0 where key1 < 3 or key2 < 4;
+key1 key2 key3 key4 key5 key6 key7 key8
+1 1 1 1 1 1 1 123
+2 2 2 2 2 2 2 123
+3 3 3 3 3 3 3 123
+delete from t0 where key1 < 3 or key2 < 4;
+select * from t0 where key1 < 3 or key2 < 4;
+key1 key2 key3 key4 key5 key6 key7 key8
+select count(*) from t0;
+count(*)
+1021
+drop table t4;
+create table t4 (a int);
+insert into t4 values (1),(4),(3);
+set @save_join_buffer_size=@@join_buffer_size;
+set join_buffer_size= 4096;
+explain select max(A.key1 + B.key1 + A.key2 + B.key2 + A.key3 + B.key3 + A.key4 + B.key4 + A.key5 + B.key5)
+from t0 as A force index(i1,i2), t0 as B force index (i1,i2)
+where (A.key1 < 500000 or A.key2 < 3)
+and (B.key1 < 500000 or B.key2 < 3);
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE A index_merge i1,i2 i1,i2 4,4 NULL # Using sort_union(i1,i2); Using where
+1 SIMPLE B index_merge i1,i2 i1,i2 4,4 NULL # Using sort_union(i1,i2); Using where; Using join buffer (flat, BNL join)
+select max(A.key1 + B.key1 + A.key2 + B.key2 + A.key3 + B.key3 + A.key4 + B.key4 + A.key5 + B.key5)
+from t0 as A force index(i1,i2), t0 as B force index (i1,i2)
+where (A.key1 < 500000 or A.key2 < 3)
+and (B.key1 < 500000 or B.key2 < 3);
+max(A.key1 + B.key1 + A.key2 + B.key2 + A.key3 + B.key3 + A.key4 + B.key4 + A.key5 + B.key5)
+10240
+update t0 set key1=1;
+explain select max(A.key1 + B.key1 + A.key2 + B.key2 + A.key3 + B.key3 + A.key4 + B.key4 + A.key5 + B.key5)
+from t0 as A force index(i1,i2), t0 as B force index (i1,i2)
+where (A.key1 = 1 or A.key2 = 1)
+and (B.key1 = 1 or B.key2 = 1);
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE A index_merge i1,i2 i1,i2 4,4 NULL # Using union(i1,i2); Using where
+1 SIMPLE B index_merge i1,i2 i1,i2 4,4 NULL # Using union(i1,i2); Using where; Using join buffer (flat, BNL join)
+select max(A.key1 + B.key1 + A.key2 + B.key2 + A.key3 + B.key3 + A.key4 + B.key4 + A.key5 + B.key5)
+from t0 as A force index(i1,i2), t0 as B force index (i1,i2)
+where (A.key1 = 1 or A.key2 = 1)
+and (B.key1 = 1 or B.key2 = 1);
+max(A.key1 + B.key1 + A.key2 + B.key2 + A.key3 + B.key3 + A.key4 + B.key4 + A.key5 + B.key5)
+8194
+alter table t0 add filler1 char(200), add filler2 char(200), add filler3 char(200);
+update t0 set key2=1, key3=1, key4=1, key5=1,key6=1,key7=1 where key7 < 500;
+select max(A.key1 + B.key1 + A.key2 + B.key2 + A.key3 + B.key3 + A.key4 + B.key4 + A.key5 + B.key5)
+from t0 as A, t0 as B
+where (A.key1 = 1 and A.key2 = 1 and A.key3 = 1 and A.key4=1 and A.key5=1 and A.key6=1 and A.key7 = 1 or A.key8=1)
+and (B.key1 = 1 and B.key2 = 1 and B.key3 = 1 and B.key4=1 and B.key5=1 and B.key6=1 and B.key7 = 1 or B.key8=1);
+max(A.key1 + B.key1 + A.key2 + B.key2 + A.key3 + B.key3 + A.key4 + B.key4 + A.key5 + B.key5)
+8186
+set join_buffer_size= @save_join_buffer_size;
+drop table t0, t1, t2, t3, t4;
+CREATE TABLE t1 (
+cola char(3) not null, colb char(3) not null, filler char(200),
+key(cola), key(colb)
+);
+INSERT INTO t1 VALUES ('foo','bar', 'ZZ'),('fuz','baz', 'ZZ');
+OPTIMIZE TABLE t1;
+Table Op Msg_type Msg_text
+test.t1 optimize status OK
+select count(*) from t1;
+count(*)
+8704
+explain select * from t1 WHERE cola = 'foo' AND colb = 'bar';
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 ref cola,colb cola 3 const # Using index condition; Using where
+explain select * from t1 force index(cola,colb) WHERE cola = 'foo' AND colb = 'bar';
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 ref cola,colb cola 3 const # Using index condition; Using where
+drop table t1;
+CREATE TABLE t1(a INT);
+INSERT INTO t1 VALUES(1);
+CREATE TABLE t2(a INT, b INT, dummy CHAR(16) DEFAULT '', KEY(a), KEY(b));
+INSERT INTO t2(a,b) VALUES
+(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),
+(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),
+(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),
+(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),
+(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),
+(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),
+(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),
+(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),
+(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),
+(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),
+(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),
+(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),
+(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),
+(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),
+(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),
+(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),
+(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),
+(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),
+(1,2);
+LOCK TABLES t1 WRITE, t2 WRITE;
+INSERT INTO t2(a,b) VALUES(1,2);
+SELECT t2.a FROM t1,t2 WHERE t2.b=2 AND t2.a=1;
+a
+1
+1
+UNLOCK TABLES;
+DROP TABLE t1, t2;
+CREATE TABLE `t1` (
+`a` int(11) DEFAULT NULL,
+`filler` char(200) DEFAULT NULL,
+`b` int(11) DEFAULT NULL,
+KEY `a` (`a`),
+KEY `b` (`b`)
+) ENGINE=MEMORY DEFAULT CHARSET=latin1;
+insert into t1 values
+(0, 'filler', 0), (1, 'filler', 1), (2, 'filler', 2), (3, 'filler', 3),
+(4, 'filler', 4), (5, 'filler', 5), (6, 'filler', 6), (7, 'filler', 7),
+(8, 'filler', 8), (9, 'filler', 9), (0, 'filler', 0), (1, 'filler', 1),
+(2, 'filler', 2), (3, 'filler', 3), (4, 'filler', 4), (5, 'filler', 5),
+(6, 'filler', 6), (7, 'filler', 7), (8, 'filler', 8), (9, 'filler', 9),
+(10, 'filler', 10), (11, 'filler', 11), (12, 'filler', 12), (13, 'filler', 13),
+(14, 'filler', 14), (15, 'filler', 15), (16, 'filler', 16), (17, 'filler', 17),
+(18, 'filler', 18), (19, 'filler', 19), (4, '5 ', 0), (5, '4 ', 0),
+(4, '4 ', 0), (4, 'qq ', 5), (5, 'qq ', 4), (4, 'zz ', 4);
+create table t2(
+`a` int(11) DEFAULT NULL,
+`filler` char(200) DEFAULT NULL,
+`b` int(11) DEFAULT NULL,
+KEY USING BTREE (`a`),
+KEY USING BTREE (`b`)
+) ENGINE=MEMORY DEFAULT CHARSET=latin1;
+insert into t2 select * from t1;
+must use sort-union rather than union:
+explain select * from t1 where a=4 or b=4;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 index_merge a,b a,b 5,5 NULL # Using sort_union(a,b); Using where
+select * from t1 where a=4 or b=4;
+a filler b
+4 4 0
+4 5 0
+4 filler 4
+4 filler 4
+4 qq 5
+4 zz 4
+5 qq 4
+select * from t1 ignore index(a,b) where a=4 or b=4;
+a filler b
+4 4 0
+4 5 0
+4 filler 4
+4 filler 4
+4 qq 5
+4 zz 4
+5 qq 4
+must use union, not sort-union:
+explain select * from t2 where a=4 or b=4;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 index_merge a,b a,b 5,5 NULL # Using union(a,b); Using where
+select * from t2 where a=4 or b=4;
+a filler b
+4 4 0
+4 5 0
+4 filler 4
+4 filler 4
+4 qq 5
+4 zz 4
+5 qq 4
+drop table t1, t2;
+CREATE TABLE t1 (a varchar(8), b set('a','b','c','d','e','f','g','h'),
+KEY b(b), KEY a(a));
+INSERT INTO t1 VALUES ('y',''), ('z','');
+SELECT b,a from t1 WHERE (b!='c' AND b!='f' && b!='h') OR
+(a='pure-S') OR (a='DE80337a') OR (a='DE80799');
+b a
+ y
+ z
+DROP TABLE t1;
+#
+# BUG#40974: Incorrect query results when using clause evaluated using range check
+#
+create table t0 (a int);
+insert into t0 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
+create table t1 (a int);
+insert into t1 values (1),(2);
+create table t2(a int, b int);
+insert into t2 values (1,1), (2, 1000);
+create table t3 (a int, b int, filler char(100), key(a), key(b));
+insert into t3 select 1000, 1000,'filler' from t0 A, t0 B, t0 C;
+insert into t3 values (1,1,'data');
+insert into t3 values (1,1,'data');
+The plan should be ALL/ALL/ALL(Range checked for each record (index map: 0x3)
+explain select * from t1
+where exists (select 1 from t2, t3
+where t2.a=t1.a and (t3.a=t2.b or t3.b=t2.b or t3.b=t2.b+1));
+id select_type table type possible_keys key key_len ref rows Extra
+1 PRIMARY t1 ALL NULL NULL NULL NULL #
+1 PRIMARY <subquery2> eq_ref distinct_key distinct_key 4 func #
+2 MATERIALIZED t2 ALL NULL NULL NULL NULL #
+2 MATERIALIZED t3 ALL a,b NULL NULL NULL # Range checked for each record (index map: 0x3)
+select * from t1
+where exists (select 1 from t2, t3
+where t2.a=t1.a and (t3.a=t2.b or t3.b=t2.b or t3.b=t2.b+1));
+a
+1
+2
+drop table t0, t1, t2, t3;
+#
+# BUG#44810: index merge and order by with low sort_buffer_size
+# crashes server!
+#
+CREATE TABLE t1(a VARCHAR(128),b VARCHAR(128),KEY(A),KEY(B));
+INSERT INTO t1 VALUES (REPEAT('a',128),REPEAT('b',128));
+INSERT INTO t1 SELECT * FROM t1;
+INSERT INTO t1 SELECT * FROM t1;
+INSERT INTO t1 SELECT * FROM t1;
+INSERT INTO t1 SELECT * FROM t1;
+INSERT INTO t1 SELECT * FROM t1;
+INSERT INTO t1 SELECT * FROM t1;
+EXPLAIN
+SELECT * FROM t1 FORCE INDEX(a,b) WHERE a LIKE 'a%' OR b LIKE 'b%'
+ORDER BY a,b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 index_merge a,b a,b 131,131 NULL # Using sort_union(a,b); Using where; Using filesort
+SELECT * FROM t1 FORCE INDEX(a,b) WHERE a LIKE 'a%' OR b LIKE 'b%'
+ORDER BY a,b;
+SET SESSION sort_buffer_size=DEFAULT;
+DROP TABLE t1;
+End of 5.0 tests
+set global rocksdb_force_flush_memtable_now=1;
+#---------------- ROR-index_merge tests -----------------------
+SET SESSION DEFAULT_STORAGE_ENGINE = RocksDB;
+drop table if exists t0,t1,t2;
+create table t1
+(
+/* Field names reflect value(rowid) distribution, st=STairs, swt= SaWTooth */
+st_a int not null default 0,
+swt1a int not null default 0,
+swt2a int not null default 0,
+st_b int not null default 0,
+swt1b int not null default 0,
+swt2b int not null default 0,
+/* fields/keys for row retrieval tests */
+key1 int,
+key2 int,
+key3 int,
+key4 int,
+/* make rows much bigger then keys */
+filler1 char (200),
+filler2 char (200),
+filler3 char (200),
+filler4 char (200),
+filler5 char (200),
+filler6 char (200),
+/* order of keys is important */
+key sta_swt12a(st_a,swt1a,swt2a),
+key sta_swt1a(st_a,swt1a),
+key sta_swt2a(st_a,swt2a),
+key sta_swt21a(st_a,swt2a,swt1a),
+key st_a(st_a),
+key stb_swt1a_2b(st_b,swt1b,swt2a),
+key stb_swt1b(st_b,swt1b),
+key st_b(st_b),
+key(key1),
+key(key2),
+key(key3),
+key(key4)
+) ;
+create table t0 as select * from t1;
+# Printing of many insert into t0 values (....) disabled.
+alter table t1 disable keys;
+Warnings:
+Note 1031 Storage engine ROCKSDB of the table `test`.`t1` doesn't have this option
+# Printing of many insert into t1 select .... from t0 disabled.
+# Printing of many insert into t1 (...) values (....) disabled.
+alter table t1 enable keys;
+Warnings:
+Note 1031 Storage engine ROCKSDB of the table `test`.`t1` doesn't have this option
+select count(*) from t1;
+count(*)
+64801
+explain select key1,key2 from t1 where key1=100 and key2=100;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 index_merge key1,key2 key1,key2 5,5 NULL # Using intersect(key1,key2); Using where; Using index
+select key1,key2 from t1 where key1=100 and key2=100;
+key1 key2
+100 100
+select key1,key2,key3,key4,filler1 from t1 where key1=100 and key2=100 or key3=100 and key4=100;
+key1 key2 key3 key4 filler1
+100 100 100 100 key1-key2-key3-key4
+insert into t1 (key1, key2, key3, key4, filler1) values (100, 100, -1, -1, 'key1-key2');
+insert into t1 (key1, key2, key3, key4, filler1) values (-1, -1, 100, 100, 'key4-key3');
+select key1,key2,filler1 from t1 where key1=100 and key2=100;
+key1 key2 filler1
+100 100 key1-key2-key3-key4
+100 100 key1-key2
+select key1,key2 from t1 where key1=100 and key2=100;
+key1 key2
+100 100
+100 100
+select key1,key2,key3,key4 from t1 where key1=100 and key2=100 or key3=100 and key4=100;
+key1 key2 key3 key4
+100 100 100 100
+100 100 -1 -1
+-1 -1 100 100
+select key1,key2,key3,key4,filler1 from t1 where key1=100 and key2=100 or key3=100 and key4=100;
+key1 key2 key3 key4 filler1
+100 100 100 100 key1-key2-key3-key4
+100 100 -1 -1 key1-key2
+-1 -1 100 100 key4-key3
+select key1,key2,key3 from t1 where key1=100 and key2=100 and key3=100;
+key1 key2 key3
+100 100 100
+insert into t1 (key1,key2,key3,key4,filler1) values (101,101,101,101, 'key1234-101');
+select key1,key2,key3,key4,filler1 from t1 where key1=100 and key2=100 or key3=101;
+key1 key2 key3 key4 filler1
+100 100 100 100 key1-key2-key3-key4
+100 100 -1 -1 key1-key2
+101 101 101 101 key1234-101
+select key1,key2, filler1 from t1 where key1=100 and key2=100;
+key1 key2 filler1
+100 100 key1-key2-key3-key4
+100 100 key1-key2
+update t1 set filler1='to be deleted' where key1=100 and key2=100;
+update t1 set key1=200,key2=200 where key1=100 and key2=100;
+delete from t1 where key1=200 and key2=200;
+select key1,key2,filler1 from t1 where key2=100 and key2=200;
+key1 key2 filler1
+select key1,key2,key3,key4,filler1 from t1 where key1=100 and key2=100 or key3=100 and key4=100;
+key1 key2 key3 key4 filler1
+-1 -1 100 100 key4-key3
+delete from t1 where key3=100 and key4=100;
+select key1,key2,key3,key4,filler1 from t1 where key1=100 and key2=100 or key3=100 and key4=100;
+key1 key2 key3 key4 filler1
+select key1,key2 from t1 where key1=100 and key2=100;
+key1 key2
+insert into t1 (key1, key2, key3, key4, filler1) values (100, 100, 200, 200,'key1-key2-key3-key4-1');
+insert into t1 (key1, key2, key3, key4, filler1) values (100, 100, 200, 200,'key1-key2-key3-key4-2');
+insert into t1 (key1, key2, key3, key4, filler1) values (100, 100, 200, 200,'key1-key2-key3-key4-3');
+select key1,key2,key3,key4,filler1 from t1 where key3=200 or (key1=100 and key2=100) or key4=200;
+key1 key2 key3 key4 filler1
+100 100 200 200 key1-key2-key3-key4-1
+100 100 200 200 key1-key2-key3-key4-2
+100 100 200 200 key1-key2-key3-key4-3
+insert into t1 (key1, key2, key3, key4, filler1) values (-1, -1, -1, 200,'key4');
+select key1,key2,key3,key4,filler1 from t1 where key3=200 or (key1=100 and key2=100) or key4=200;
+key1 key2 key3 key4 filler1
+100 100 200 200 key1-key2-key3-key4-1
+100 100 200 200 key1-key2-key3-key4-2
+100 100 200 200 key1-key2-key3-key4-3
+-1 -1 -1 200 key4
+insert into t1 (key1, key2, key3, key4, filler1) values (-1, -1, 200, -1,'key3');
+select key1,key2,key3,key4,filler1 from t1 where key3=200 or (key1=100 and key2=100) or key4=200;
+key1 key2 key3 key4 filler1
+100 100 200 200 key1-key2-key3-key4-1
+100 100 200 200 key1-key2-key3-key4-2
+100 100 200 200 key1-key2-key3-key4-3
+-1 -1 -1 200 key4
+-1 -1 200 -1 key3
+drop table t0,t1;
+create table t2 (
+a char(10),
+b char(10),
+filler1 char(255),
+filler2 char(255),
+key(a(5)),
+key(b(5))
+);
+select count(a) from t2 where a='BBBBBBBB';
+count(a)
+4
+select count(a) from t2 where b='BBBBBBBB';
+count(a)
+4
+expla_or_bin select count(a_or_b) from t2 where a_or_b='AAAAAAAA' a_or_bnd a_or_b='AAAAAAAA';
+id select_type ta_or_ba_or_ble type possia_or_ble_keys key key_len ref rows Extra_or_b
+1 SIMPLE t2 ref a_or_b,a_or_b a_or_b 6 const 2 Using where
+select count(a) from t2 where a='AAAAAAAA' and b='AAAAAAAA';
+count(a)
+4
+select count(a) from t2 ignore index(a,b) where a='AAAAAAAA' and b='AAAAAAAA';
+count(a)
+4
+insert into t2 values ('ab', 'ab', 'uh', 'oh');
+explain select a from t2 where a='ab';
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 ref a a 6 const 2 Using where
+drop table t2;
+CREATE TABLE t1(c1 INT, c2 INT DEFAULT 0, c3 CHAR(255) DEFAULT '',
+KEY(c1), KEY(c2), KEY(c3));
+INSERT INTO t1(c1) VALUES(0),(0),(0),(0),(0),(0),(0),(0),(0),(0),(0),(0),(0),
+(0),(0),(0),(0),(0),(0),(0),(0),(0),(0),(0),(0),(0),(0),(0),(0);
+INSERT INTO t1 VALUES(0,0,0);
+CREATE TABLE t2(c1 int);
+INSERT INTO t2 VALUES(1);
+DELETE t1 FROM t1,t2 WHERE t1.c1=0 AND t1.c2=0;
+SELECT * FROM t1;
+c1 c2 c3
+DROP TABLE t1,t2;
+set global rocksdb_force_flush_memtable_now=1;
+#---------------- Index merge test 2 -------------------------------------------
+SET SESSION DEFAULT_STORAGE_ENGINE = RocksDB;
+drop table if exists t1,t2;
+create table t1
+(
+key1 int not null,
+key2 int not null,
+INDEX i1(key1),
+INDEX i2(key2)
+);
+explain select * from t1 where key1 < 5 or key2 > 197;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 index_merge i1,i2 i1,i2 4,4 NULL 4 Using sort_union(i1,i2); Using where
+select * from t1 where key1 < 5 or key2 > 197;
+key1 key2
+0 200
+1 199
+2 198
+3 197
+4 196
+explain select * from t1 where key1 < 3 or key2 > 195;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 index_merge i1,i2 i1,i2 4,4 NULL 4 Using sort_union(i1,i2); Using where
+select * from t1 where key1 < 3 or key2 > 195;
+key1 key2
+0 200
+1 199
+2 198
+3 197
+4 196
+alter table t1 add str1 char (255) not null,
+add zeroval int not null default 0,
+add str2 char (255) not null,
+add str3 char (255) not null;
+update t1 set str1='aaa', str2='bbb', str3=concat(key2, '-', key1 div 2, '_' ,if(key1 mod 2 = 0, 'a', 'A'));
+alter table t1 add primary key (str1, zeroval, str2, str3);
+explain select * from t1 where key1 < 5 or key2 > 197;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 index_merge i1,i2 i1,i2 4,4 NULL 4 Using sort_union(i1,i2); Using where
+select * from t1 where key1 < 5 or key2 > 197;
+key1 key2 str1 zeroval str2 str3
+4 196 aaa 0 bbb 196-2_a
+3 197 aaa 0 bbb 197-1_A
+2 198 aaa 0 bbb 198-1_a
+1 199 aaa 0 bbb 199-0_A
+0 200 aaa 0 bbb 200-0_a
+explain select * from t1 where key1 < 3 or key2 > 195;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 index_merge i1,i2 i1,i2 4,4 NULL 4 Using sort_union(i1,i2); Using where
+select * from t1 where key1 < 3 or key2 > 195;
+key1 key2 str1 zeroval str2 str3
+4 196 aaa 0 bbb 196-2_a
+3 197 aaa 0 bbb 197-1_A
+2 198 aaa 0 bbb 198-1_a
+1 199 aaa 0 bbb 199-0_A
+0 200 aaa 0 bbb 200-0_a
+drop table t1;
+create table t1 (
+pk integer not null auto_increment primary key,
+key1 integer,
+key2 integer not null,
+filler char (200),
+index (key1),
+index (key2)
+);
+show warnings;
+Level Code Message
+explain select pk from t1 where key1 = 1 and key2 = 1;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 index_merge key1,key2 key2,key1 4,5 NULL 1 Using intersect(key2,key1); Using where; Using index
+select pk from t1 where key2 = 1 and key1 = 1;
+pk
+26
+27
+select pk from t1 ignore index(key1,key2) where key2 = 1 and key1 = 1;
+pk
+26
+27
+drop table t1;
+create table t1 (
+pk int primary key auto_increment,
+key1a int,
+key2a int,
+key1b int,
+key2b int,
+dummy1 int,
+dummy2 int,
+dummy3 int,
+dummy4 int,
+key3a int,
+key3b int,
+filler1 char (200),
+index i1(key1a, key1b),
+index i2(key2a, key2b),
+index i3(key3a, key3b)
+);
+create table t2 (a int);
+insert into t2 values (0),(1),(2),(3),(4),(NULL);
+insert into t1 (key1a, key1b, key2a, key2b, key3a, key3b)
+select A.a, B.a, C.a, D.a, C.a, D.a from t2 A,t2 B,t2 C, t2 D;
+insert into t1 (key1a, key1b, key2a, key2b, key3a, key3b)
+select key1a, key1b, key2a, key2b, key3a, key3b from t1;
+insert into t1 (key1a, key1b, key2a, key2b, key3a, key3b)
+select key1a, key1b, key2a, key2b, key3a, key3b from t1;
+analyze table t1;
+Table Op Msg_type Msg_text
+test.t1 analyze status OK
+select count(*) from t1;
+count(*)
+5184
+select count(*) from t1 where
+key1a = 2 and key1b is null and key2a = 2 and key2b is null;
+count(*)
+4
+select count(*) from t1 where
+key1a = 2 and key1b is null and key3a = 2 and key3b is null;
+count(*)
+4
+drop table t1,t2;
+create table t1 (
+id1 int,
+id2 date ,
+index idx2 (id1,id2),
+index idx1 (id2)
+);
+insert into t1 values(1,'20040101'), (2,'20040102');
+select * from t1 where id1 = 1 and id2= '20040101';
+id1 id2
+1 2004-01-01
+drop table t1;
+drop view if exists v1;
+CREATE TABLE t1 (
+`oid` int(11) unsigned NOT NULL auto_increment,
+`fk_bbk_niederlassung` int(11) unsigned NOT NULL,
+`fk_wochentag` int(11) unsigned NOT NULL,
+`uhrzeit_von` time NOT NULL COMMENT 'HH:MM',
+`uhrzeit_bis` time NOT NULL COMMENT 'HH:MM',
+`geloescht` tinyint(4) NOT NULL,
+`version` int(5) NOT NULL,
+PRIMARY KEY (`oid`),
+KEY `fk_bbk_niederlassung` (`fk_bbk_niederlassung`),
+KEY `fk_wochentag` (`fk_wochentag`),
+KEY `ix_version` (`version`)
+) DEFAULT CHARSET=latin1;
+insert into t1 values
+(1, 38, 1, '08:00:00', '13:00:00', 0, 1),
+(2, 38, 2, '08:00:00', '13:00:00', 0, 1),
+(3, 38, 3, '08:00:00', '13:00:00', 0, 1),
+(4, 38, 4, '08:00:00', '13:00:00', 0, 1),
+(5, 38, 5, '08:00:00', '13:00:00', 0, 1),
+(6, 38, 5, '08:00:00', '13:00:00', 1, 2),
+(7, 38, 3, '08:00:00', '13:00:00', 1, 2),
+(8, 38, 1, '08:00:00', '13:00:00', 1, 2),
+(9, 38, 2, '08:00:00', '13:00:00', 1, 2),
+(10, 38, 4, '08:00:00', '13:00:00', 1, 2),
+(11, 38, 1, '08:00:00', '13:00:00', 0, 3),
+(12, 38, 2, '08:00:00', '13:00:00', 0, 3),
+(13, 38, 3, '08:00:00', '13:00:00', 0, 3),
+(14, 38, 4, '08:00:00', '13:00:00', 0, 3),
+(15, 38, 5, '08:00:00', '13:00:00', 0, 3),
+(16, 38, 4, '08:00:00', '13:00:00', 0, 4),
+(17, 38, 5, '08:00:00', '13:00:00', 0, 4),
+(18, 38, 1, '08:00:00', '13:00:00', 0, 4),
+(19, 38, 2, '08:00:00', '13:00:00', 0, 4),
+(20, 38, 3, '08:00:00', '13:00:00', 0, 4),
+(21, 7, 1, '08:00:00', '13:00:00', 0, 1),
+(22, 7, 2, '08:00:00', '13:00:00', 0, 1),
+(23, 7, 3, '08:00:00', '13:00:00', 0, 1),
+(24, 7, 4, '08:00:00', '13:00:00', 0, 1),
+(25, 7, 5, '08:00:00', '13:00:00', 0, 1);
+create view v1 as
+select
+zeit1.oid AS oid,
+zeit1.fk_bbk_niederlassung AS fk_bbk_niederlassung,
+zeit1.fk_wochentag AS fk_wochentag,
+zeit1.uhrzeit_von AS uhrzeit_von,
+zeit1.uhrzeit_bis AS uhrzeit_bis,
+zeit1.geloescht AS geloescht,
+zeit1.version AS version
+from
+t1 zeit1
+where
+(zeit1.version =
+(select max(zeit2.version) AS `max(version)`
+ from t1 zeit2
+where
+((zeit1.fk_bbk_niederlassung = zeit2.fk_bbk_niederlassung) and
+(zeit1.fk_wochentag = zeit2.fk_wochentag) and
+(zeit1.uhrzeit_von = zeit2.uhrzeit_von) and
+(zeit1.uhrzeit_bis = zeit2.uhrzeit_bis)
+)
+)
+)
+and (zeit1.geloescht = 0);
+select * from v1 where oid = 21;
+oid fk_bbk_niederlassung fk_wochentag uhrzeit_von uhrzeit_bis geloescht version
+21 7 1 08:00:00 13:00:00 0 1
+drop view v1;
+drop table t1;
+CREATE TABLE t1(
+t_cpac varchar(2) NOT NULL,
+t_vers varchar(4) NOT NULL,
+t_rele varchar(2) NOT NULL,
+t_cust varchar(4) NOT NULL,
+filler1 char(250) default NULL,
+filler2 char(250) default NULL,
+PRIMARY KEY (t_cpac,t_vers,t_rele,t_cust),
+UNIQUE KEY IX_4 (t_cust,t_cpac,t_vers,t_rele),
+KEY IX_5 (t_vers,t_rele,t_cust)
+);
+insert into t1 values
+('tm','2.5 ','a ',' ','',''), ('tm','2.5U','a ','stnd','',''),
+('da','3.3 ','b ',' ','',''), ('da','3.3U','b ','stnd','',''),
+('tl','7.6 ','a ',' ','',''), ('tt','7.6 ','a ',' ','',''),
+('bc','B61 ','a ',' ','',''), ('bp','B61 ','a ',' ','',''),
+('ca','B61 ','a ',' ','',''), ('ci','B61 ','a ',' ','',''),
+('cp','B61 ','a ',' ','',''), ('dm','B61 ','a ',' ','',''),
+('ec','B61 ','a ',' ','',''), ('ed','B61 ','a ',' ','',''),
+('fm','B61 ','a ',' ','',''), ('nt','B61 ','a ',' ','',''),
+('qm','B61 ','a ',' ','',''), ('tc','B61 ','a ',' ','',''),
+('td','B61 ','a ',' ','',''), ('tf','B61 ','a ',' ','',''),
+('tg','B61 ','a ',' ','',''), ('ti','B61 ','a ',' ','',''),
+('tp','B61 ','a ',' ','',''), ('ts','B61 ','a ',' ','',''),
+('wh','B61 ','a ',' ','',''), ('bc','B61U','a ','stnd','',''),
+('bp','B61U','a ','stnd','',''), ('ca','B61U','a ','stnd','',''),
+('ci','B61U','a ','stnd','',''), ('cp','B61U','a ','stnd','',''),
+('dm','B61U','a ','stnd','',''), ('ec','B61U','a ','stnd','',''),
+('fm','B61U','a ','stnd','',''), ('nt','B61U','a ','stnd','',''),
+('qm','B61U','a ','stnd','',''), ('tc','B61U','a ','stnd','',''),
+('td','B61U','a ','stnd','',''), ('tf','B61U','a ','stnd','',''),
+('tg','B61U','a ','stnd','',''), ('ti','B61U','a ','stnd','',''),
+('tp','B61U','a ','stnd','',''), ('ts','B61U','a ','stnd','',''),
+('wh','B61U','a ','stnd','','');
+show create table t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `t_cpac` varchar(2) NOT NULL,
+ `t_vers` varchar(4) NOT NULL,
+ `t_rele` varchar(2) NOT NULL,
+ `t_cust` varchar(4) NOT NULL,
+ `filler1` char(250) DEFAULT NULL,
+ `filler2` char(250) DEFAULT NULL,
+ PRIMARY KEY (`t_cpac`,`t_vers`,`t_rele`,`t_cust`),
+ UNIQUE KEY `IX_4` (`t_cust`,`t_cpac`,`t_vers`,`t_rele`),
+ KEY `IX_5` (`t_vers`,`t_rele`,`t_cust`)
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+select t_vers,t_rele,t_cust,filler1 from t1 where t_vers = '7.6';
+t_vers t_rele t_cust filler1
+7.6 a
+7.6 a
+select t_vers,t_rele,t_cust,filler1 from t1 where t_vers = '7.6'
+ and t_rele='a' and t_cust = ' ';
+t_vers t_rele t_cust filler1
+7.6 a
+7.6 a
+drop table t1;
+create table t1 (
+pk int(11) not null auto_increment,
+a int(11) not null default '0',
+b int(11) not null default '0',
+c int(11) not null default '0',
+filler1 datetime, filler2 varchar(15),
+filler3 longtext,
+kp1 varchar(4), kp2 varchar(7),
+kp3 varchar(2), kp4 varchar(4),
+kp5 varchar(7),
+filler4 char(1),
+primary key (pk),
+key idx1(a,b,c),
+key idx2(c),
+key idx3(kp1,kp2,kp3,kp4,kp5)
+) default charset=latin1;
+set @fill=NULL;
+SELECT COUNT(*) FROM t1 WHERE b = 0 AND a = 0 AND c = 13286427 AND
+kp1='279' AND kp2='ELM0678' AND kp3='6' AND kp4='10' AND kp5 = 'R ';
+COUNT(*)
+1
+drop table t1;
+create table t1
+(
+key1 int not null,
+key2 int not null default 0,
+key3 int not null default 0
+);
+insert into t1(key1) values (1),(2),(3),(4),(5),(6),(7),(8);
+set @d=8;
+insert into t1 (key1) select key1+@d from t1;
+set @d=@d*2;
+insert into t1 (key1) select key1+@d from t1;
+set @d=@d*2;
+insert into t1 (key1) select key1+@d from t1;
+set @d=@d*2;
+insert into t1 (key1) select key1+@d from t1;
+set @d=@d*2;
+insert into t1 (key1) select key1+@d from t1;
+set @d=@d*2;
+insert into t1 (key1) select key1+@d from t1;
+set @d=@d*2;
+insert into t1 (key1) select key1+@d from t1;
+set @d=@d*2;
+alter table t1 add index i2(key2);
+alter table t1 add index i3(key3);
+update t1 set key2=key1,key3=key1;
+select * from t1 where (key3 > 30 and key3<35) or (key2 >32 and key2 < 40);
+key1 key2 key3
+31 31 31
+32 32 32
+33 33 33
+34 34 34
+35 35 35
+36 36 36
+37 37 37
+38 38 38
+39 39 39
+drop table t1;
+#
+# Bug#56423: Different count with SELECT and CREATE SELECT queries
+#
+CREATE TABLE t1 (
+a INT,
+b INT,
+c INT,
+d INT,
+PRIMARY KEY (a),
+KEY (c),
+KEY bd (b,d)
+);
+INSERT INTO t1 VALUES
+(1, 0, 1, 0),
+(2, 1, 1, 1),
+(3, 1, 1, 1),
+(4, 0, 1, 1);
+EXPLAIN
+SELECT a
+FROM t1
+WHERE c = 1 AND b = 1 AND d = 1;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 index_merge c,bd c,bd 5,10 NULL 1 Using intersect(c,bd); Using where; Using index
+CREATE TABLE t2 ( a INT )
+SELECT a
+FROM t1
+WHERE c = 1 AND b = 1 AND d = 1;
+SELECT * FROM t2;
+a
+2
+3
+DROP TABLE t1, t2;
+CREATE TABLE t1( a INT, b INT, KEY(a), KEY(b) );
+INSERT INTO t1 VALUES (1, 2), (1, 2), (1, 2), (1, 2);
+SELECT * FROM t1 FORCE INDEX(a, b) WHERE a = 1 AND b = 2;
+a b
+1 2
+1 2
+1 2
+1 2
+DROP TABLE t1;
+# Code coverage of fix.
+CREATE TABLE t1 ( a INT NOT NULL AUTO_INCREMENT PRIMARY KEY, b INT);
+INSERT INTO t1 (b) VALUES (1);
+UPDATE t1 SET b = 2 WHERE a = 1;
+SELECT * FROM t1;
+a b
+1 2
+CREATE TABLE t2 ( a INT NOT NULL AUTO_INCREMENT PRIMARY KEY, b VARCHAR(1) );
+INSERT INTO t2 (b) VALUES ('a');
+UPDATE t2 SET b = 'b' WHERE a = 1;
+SELECT * FROM t2;
+a b
+1 b
+DROP TABLE t1, t2;
+#
+# BUG#13970015: ASSERT `MIN_ENDP || MAX_ENDP' FAILED IN
+# HANDLER::MULTI_RANGE_READ_INFO_CONST
+#
+CREATE TABLE t1 (
+pk INT NOT NULL,
+col_int_key INT NOT NULL,
+col_varchar_key VARCHAR(1) NOT NULL,
+PRIMARY KEY (pk),
+KEY col_int_key (col_int_key),
+KEY col_varchar_key (col_varchar_key,col_int_key)
+);
+INSERT INTO t1 VALUES (1,1,'a'), (2,2,'b');
+EXPLAIN
+SELECT col_int_key
+FROM t1
+WHERE col_varchar_key >= 'l' OR
+(((pk BETWEEN 141 AND 141) OR col_varchar_key <> 'l')
+AND ((pk BETWEEN 141 AND 141) OR (col_int_key > 141)));
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 index PRIMARY,col_int_key,col_varchar_key col_varchar_key 7 NULL 2 Using where; Using index
+SELECT col_int_key
+FROM t1
+WHERE col_varchar_key >= 'l' OR
+(((pk BETWEEN 141 AND 141) OR col_varchar_key <> 'l')
+AND ((pk BETWEEN 141 AND 141) OR (col_int_key > 141)));
+col_int_key
+DROP TABLE t1;
+set global rocksdb_force_flush_memtable_now=1;
+#---------------- 2-sweeps read Index merge test 2 -------------------------------
+SET SESSION DEFAULT_STORAGE_ENGINE = RocksDB;
+drop table if exists t1;
+create table t1 (
+pk int primary key,
+key1 int,
+key2 int,
+filler char(200),
+filler2 char(200),
+index(key1),
+index(key2)
+);
+select * from t1 where (key1 >= 2 and key1 <= 10) or (pk >= 4 and pk <=8 );
+pk key1 key2 filler filler2
+10 10 10 filler-data filler-data-2
+2 2 2 filler-data filler-data-2
+3 3 3 filler-data filler-data-2
+4 4 4 filler-data filler-data-2
+5 5 5 filler-data filler-data-2
+6 6 6 filler-data filler-data-2
+7 7 7 filler-data filler-data-2
+8 8 8 filler-data filler-data-2
+9 9 9 filler-data filler-data-2
+set @maxv=1000;
+select * from t1 where
+(pk < 5) or (pk > 10 and pk < 15) or (pk >= 50 and pk < 55 ) or (pk > @maxv-10)
+or key1=18 or key1=60;
+pk key1 key2 filler filler2
+1 1 1 filler-data filler-data-2
+1000 1000 1000 filler-data filler-data-2
+11 11 11 filler-data filler-data-2
+12 12 12 filler-data filler-data-2
+13 13 13 filler-data filler-data-2
+14 14 14 filler-data filler-data-2
+18 18 18 filler-data filler-data-2
+2 2 2 filler-data filler-data-2
+3 3 3 filler-data filler-data-2
+4 4 4 filler-data filler-data-2
+50 50 50 filler-data filler-data-2
+51 51 51 filler-data filler-data-2
+52 52 52 filler-data filler-data-2
+53 53 53 filler-data filler-data-2
+54 54 54 filler-data filler-data-2
+60 60 60 filler-data filler-data-2
+991 991 991 filler-data filler-data-2
+992 992 992 filler-data filler-data-2
+993 993 993 filler-data filler-data-2
+994 994 994 filler-data filler-data-2
+995 995 995 filler-data filler-data-2
+996 996 996 filler-data filler-data-2
+997 997 997 filler-data filler-data-2
+998 998 998 filler-data filler-data-2
+999 999 999 filler-data filler-data-2
+select * from t1 where
+(pk < 5) or (pk > 10 and pk < 15) or (pk >= 50 and pk < 55 ) or (pk > @maxv-10)
+or key1 < 3 or key1 > @maxv-11;
+pk key1 key2 filler filler2
+1 1 1 filler-data filler-data-2
+1000 1000 1000 filler-data filler-data-2
+11 11 11 filler-data filler-data-2
+12 12 12 filler-data filler-data-2
+13 13 13 filler-data filler-data-2
+14 14 14 filler-data filler-data-2
+2 2 2 filler-data filler-data-2
+3 3 3 filler-data filler-data-2
+4 4 4 filler-data filler-data-2
+50 50 50 filler-data filler-data-2
+51 51 51 filler-data filler-data-2
+52 52 52 filler-data filler-data-2
+53 53 53 filler-data filler-data-2
+54 54 54 filler-data filler-data-2
+990 990 990 filler-data filler-data-2
+991 991 991 filler-data filler-data-2
+992 992 992 filler-data filler-data-2
+993 993 993 filler-data filler-data-2
+994 994 994 filler-data filler-data-2
+995 995 995 filler-data filler-data-2
+996 996 996 filler-data filler-data-2
+997 997 997 filler-data filler-data-2
+998 998 998 filler-data filler-data-2
+999 999 999 filler-data filler-data-2
+select * from t1 where
+(pk < 5) or (pk > 10 and pk < 15) or (pk >= 50 and pk < 55 ) or (pk > @maxv-10)
+or
+(key1 < 5) or (key1 > 10 and key1 < 15) or (key1 >= 50 and key1 < 55 ) or (key1 > @maxv-10);
+pk key1 key2 filler filler2
+1 1 1 filler-data filler-data-2
+1000 1000 1000 filler-data filler-data-2
+11 11 11 filler-data filler-data-2
+12 12 12 filler-data filler-data-2
+13 13 13 filler-data filler-data-2
+14 14 14 filler-data filler-data-2
+2 2 2 filler-data filler-data-2
+3 3 3 filler-data filler-data-2
+4 4 4 filler-data filler-data-2
+50 50 50 filler-data filler-data-2
+51 51 51 filler-data filler-data-2
+52 52 52 filler-data filler-data-2
+53 53 53 filler-data filler-data-2
+54 54 54 filler-data filler-data-2
+991 991 991 filler-data filler-data-2
+992 992 992 filler-data filler-data-2
+993 993 993 filler-data filler-data-2
+994 994 994 filler-data filler-data-2
+995 995 995 filler-data filler-data-2
+996 996 996 filler-data filler-data-2
+997 997 997 filler-data filler-data-2
+998 998 998 filler-data filler-data-2
+999 999 999 filler-data filler-data-2
+select * from t1 where
+(pk > 10 and pk < 15) or (pk >= 50 and pk < 55 )
+or
+(key1 < 5) or (key1 > @maxv-10);
+pk key1 key2 filler filler2
+1 1 1 filler-data filler-data-2
+1000 1000 1000 filler-data filler-data-2
+11 11 11 filler-data filler-data-2
+12 12 12 filler-data filler-data-2
+13 13 13 filler-data filler-data-2
+14 14 14 filler-data filler-data-2
+2 2 2 filler-data filler-data-2
+3 3 3 filler-data filler-data-2
+4 4 4 filler-data filler-data-2
+50 50 50 filler-data filler-data-2
+51 51 51 filler-data filler-data-2
+52 52 52 filler-data filler-data-2
+53 53 53 filler-data filler-data-2
+54 54 54 filler-data filler-data-2
+991 991 991 filler-data filler-data-2
+992 992 992 filler-data filler-data-2
+993 993 993 filler-data filler-data-2
+994 994 994 filler-data filler-data-2
+995 995 995 filler-data filler-data-2
+996 996 996 filler-data filler-data-2
+997 997 997 filler-data filler-data-2
+998 998 998 filler-data filler-data-2
+999 999 999 filler-data filler-data-2
+drop table t1;
+set global rocksdb_force_flush_memtable_now=1;
+#---------------- Clustered PK ROR-index_merge tests -----------------------------
+SET SESSION DEFAULT_STORAGE_ENGINE = RocksDB;
+drop table if exists t1;
+create table t1
+(
+pk1 int not null,
+pk2 int not null,
+key1 int not null,
+key2 int not null,
+pktail1ok int not null,
+pktail2ok int not null,
+pktail3bad int not null,
+pktail4bad int not null,
+pktail5bad int not null,
+pk2copy int not null,
+badkey int not null,
+filler1 char (200),
+filler2 char (200),
+key (key1),
+key (key2),
+/* keys with tails from CPK members */
+key (pktail1ok, pk1),
+key (pktail2ok, pk1, pk2),
+key (pktail3bad, pk2, pk1),
+key (pktail4bad, pk1, pk2copy),
+key (pktail5bad, pk1, pk2, pk2copy),
+primary key (pk1, pk2)
+);
+explain select * from t1 where pk1 = 1 and pk2 < 80 and key1=0;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range PRIMARY,key1 PRIMARY 8 NULL ROWS Using where
+select * from t1 where pk1 = 1 and pk2 < 80 and key1=0;
+pk1 pk2 key1 key2 pktail1ok pktail2ok pktail3bad pktail4bad pktail5bad pk2copy badkey filler1 filler2
+1 10 0 0 0 0 0 0 0 10 0 filler-data-10 filler2
+1 11 0 0 0 0 0 0 0 11 0 filler-data-11 filler2
+1 12 0 0 0 0 0 0 0 12 0 filler-data-12 filler2
+1 13 0 0 0 0 0 0 0 13 0 filler-data-13 filler2
+1 14 0 0 0 0 0 0 0 14 0 filler-data-14 filler2
+1 15 0 0 0 0 0 0 0 15 0 filler-data-15 filler2
+1 16 0 0 0 0 0 0 0 16 0 filler-data-16 filler2
+1 17 0 0 0 0 0 0 0 17 0 filler-data-17 filler2
+1 18 0 0 0 0 0 0 0 18 0 filler-data-18 filler2
+1 19 0 0 0 0 0 0 0 19 0 filler-data-19 filler2
+explain select pk1,pk2 from t1 where key1 = 10 and key2=10 and 2*pk1+1 < 2*96+1;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 index_merge key1,key2 key1,key2 4,4 NULL 1 Using intersect(key1,key2); Using where; Using index
+select pk1,pk2 from t1 where key1 = 10 and key2=10 and 2*pk1+1 < 2*96+1;
+pk1 pk2
+95 50
+95 51
+95 52
+95 53
+95 54
+95 55
+95 56
+95 57
+95 58
+95 59
+explain select * from t1 where badkey=1 and key1=10;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 ref key1 key1 4 const ROWS Using where
+explain select * from t1 where pk1 < 7500 and key1 = 10;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range PRIMARY,key1 PRIMARY 4 NULL ROWS Using where
+explain select * from t1 where pktail1ok=1 and key1=10;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 ref key1,pktail1ok key1 4 const 2 Using where
+explain select * from t1 where pktail2ok=1 and key1=10;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 ref key1,pktail2ok key1 4 const 2 Using where
+explain select * from t1 where (pktail2ok=1 and pk1< 50000) or key1=10;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 index_merge PRIMARY,key1,pktail2ok PRIMARY,key1 4,4 NULL ROWS Using union(PRIMARY,key1); Using where
+explain select * from t1 where pktail3bad=1 and key1=10;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 ref key1,pktail3bad EITHER_KEY 4 const ROWS Using where
+explain select * from t1 where pktail4bad=1 and key1=10;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 ref key1,pktail4bad key1 4 const ROWS Using where
+explain select * from t1 where pktail5bad=1 and key1=10;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 ref key1,pktail5bad key1 4 const ROWS Using where
+explain select pk1,pk2,key1,key2 from t1 where key1 = 10 and key2=10 limit 10;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 index_merge key1,key2 key1,key2 4,4 NULL 1 Using intersect(key1,key2); Using where; Using index
+select pk1,pk2,key1,key2 from t1 where key1 = 10 and key2=10 limit 10;
+pk1 pk2 key1 key2
+95 50 10 10
+95 51 10 10
+95 52 10 10
+95 53 10 10
+95 54 10 10
+95 55 10 10
+95 56 10 10
+95 57 10 10
+95 58 10 10
+95 59 10 10
+drop table t1;
+create table t1
+(
+RUNID varchar(22),
+SUBMITNR varchar(5),
+ORDERNR char(1),
+PROGRAMM varchar(8),
+TESTID varchar(4),
+UCCHECK char(1),
+ETEXT varchar(80),
+ETEXT_TYPE char(1),
+INFO char(1),
+SEVERITY tinyint(3),
+TADIRFLAG char(1),
+PRIMARY KEY (RUNID,SUBMITNR,ORDERNR,PROGRAMM,TESTID,UCCHECK),
+KEY `TVERM~KEY` (PROGRAMM,TESTID,UCCHECK)
+) DEFAULT CHARSET=latin1;
+update t1 set `ETEXT` = '', `ETEXT_TYPE`='', `INFO`='', `SEVERITY`='', `TADIRFLAG`=''
+WHERE
+`RUNID`= '' AND `SUBMITNR`= '' AND `ORDERNR`='' AND `PROGRAMM`='' AND
+`TESTID`='' AND `UCCHECK`='';
+drop table t1;
+#
+# Bug#50402 Optimizer producing wrong results when using Index Merge on InnoDB
+#
+CREATE TABLE t1 (f1 INT, PRIMARY KEY (f1));
+INSERT INTO t1 VALUES (2);
+CREATE TABLE t2 (f1 INT, f2 INT, f3 char(1),
+PRIMARY KEY (f1), KEY (f2), KEY (f3) );
+INSERT INTO t2 VALUES (1, 1, 'h'), (2, 3, 'h'), (3, 2, ''), (4, 2, '');
+SELECT t1.f1 FROM t1
+WHERE (SELECT COUNT(*) FROM t2 WHERE t2.f3 = 'h' AND t2.f2 = t1.f1) = 0 AND t1.f1 = 2;
+f1
+2
+EXPLAIN SELECT t1.f1 FROM t1
+WHERE (SELECT COUNT(*) FROM t2 WHERE t2.f3 = 'h' AND t2.f2 = t1.f1) = 0 AND t1.f1 = 2;
+id select_type table type possible_keys key key_len ref rows Extra
+1 PRIMARY t1 const PRIMARY PRIMARY 4 const 1 Using index
+2 SUBQUERY t2 index_merge f2,f3 f3,f2 2,5 NULL 1 Using intersect(f3,f2); Using where; Using index
+DROP TABLE t1,t2;
+set global rocksdb_force_flush_memtable_now=1;
+#
+# Bug#11747423 32254: INDEX MERGE USED UNNECESSARILY
+#
+CREATE TABLE t1 (
+id INT NOT NULL PRIMARY KEY,
+id2 INT NOT NULL,
+id3 INT NOT NULL,
+KEY (id2),
+KEY (id3),
+KEY covering_index (id2,id3)
+) ENGINE=RocksDB;
+INSERT INTO t1 VALUES (0, 0, 0), (1, 1, 1), (2, 2, 2), (3, 3, 3), (4, 4, 4), (5, 5, 5), (6, 6, 6), (7, 7, 7);
+INSERT INTO t1 SELECT id + 8, id2 + 8, id3 +8 FROM t1;
+INSERT INTO t1 SELECT id + 16, 7, 0 FROM t1;
+EXPLAIN SELECT SQL_NO_CACHE count(*) FROM t1 WHERE id2=7 AND id3=0;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 ref id2,id3,covering_index covering_index 8 const,const 2 Using index
+DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/index_primary.result b/storage/rocksdb/mysql-test/rocksdb/r/index_primary.result
new file mode 100644
index 00000000000..1f4c88e5a33
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/index_primary.result
@@ -0,0 +1,71 @@
+DROP TABLE IF EXISTS t1;
+CREATE TABLE t1 (a INT PRIMARY KEY,
+b CHAR(8)
+) ENGINE=rocksdb;
+SHOW KEYS IN t1;
+Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment Index_comment
+t1 0 PRIMARY 1 a A # NULL NULL LSMTREE
+INSERT INTO t1 (a,b) VALUES (1,'a'),(2,'b');
+INSERT INTO t1 (a,b) VALUES (1,'c');
+ERROR 23000: Duplicate entry '1' for key 'PRIMARY'
+DROP TABLE t1;
+CREATE TABLE t1 (a INT PRIMARY KEY,
+b CHAR(8) PRIMARY KEY
+) ENGINE=rocksdb;
+ERROR 42000: Multiple primary key defined
+CREATE TABLE t1 (a INT,
+b CHAR(8),
+PRIMARY KEY (a,b)
+) ENGINE=rocksdb;
+SHOW INDEX IN t1;
+Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment Index_comment
+t1 0 PRIMARY 1 a A # NULL NULL LSMTREE
+t1 0 PRIMARY 2 b A # NULL NULL LSMTREE
+INSERT INTO t1 (a,b) VALUES (1,'a'),(1,'b'),(2,'a'),(2,'b');
+INSERT INTO t1 (a,b) VALUES (1,'b');
+ERROR 23000: Duplicate entry '1-b' for key 'PRIMARY'
+DROP TABLE t1;
+CREATE TABLE t1 (a INT KEY,
+b CHAR(8),
+KEY (b)
+) ENGINE=rocksdb;
+SHOW INDEX IN t1;
+Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment Index_comment
+t1 0 PRIMARY 1 a A # NULL NULL LSMTREE
+t1 1 b 1 b A # NULL NULL YES LSMTREE
+DROP TABLE t1;
+CREATE TABLE t1 (a INT,
+b CHAR(8) PRIMARY KEY
+) ENGINE=rocksdb;
+SHOW INDEX IN t1;
+Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment Index_comment
+t1 0 PRIMARY 1 b A # NULL NULL LSMTREE
+ALTER TABLE t1 ADD CONSTRAINT PRIMARY KEY pk (a);
+ERROR 42000: Multiple primary key defined
+SHOW KEYS IN t1;
+Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment Index_comment
+t1 0 PRIMARY 1 b A # NULL NULL LSMTREE
+DROP TABLE t1;
+set global rocksdb_large_prefix=0;
+CREATE TABLE t1 (
+a BLOB(1024),
+PRIMARY KEY (a(767))
+) ENGINE=rocksdb;
+DROP TABLE t1;
+CREATE TABLE t1 (
+a BLOB(1024),
+PRIMARY KEY (a(768))
+) ENGINE=rocksdb;
+ERROR 42000: Specified key was too long; max key length is 767 bytes
+set global rocksdb_large_prefix=1;
+CREATE TABLE t1 (
+a BLOB(4096),
+PRIMARY KEY (a(3072))
+) ENGINE=rocksdb;
+DROP TABLE t1;
+CREATE TABLE t1 (
+a BLOB(4096),
+PRIMARY KEY (a(3073))
+) ENGINE=rocksdb;
+ERROR 42000: Specified key was too long; max key length is 3072 bytes
+set global rocksdb_large_prefix=DEFAULT;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/index_type_btree.result b/storage/rocksdb/mysql-test/rocksdb/r/index_type_btree.result
new file mode 100644
index 00000000000..9eaab9f53aa
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/index_type_btree.result
@@ -0,0 +1,69 @@
+CREATE TABLE t1 (a INT,
+b CHAR(8),
+pk INT PRIMARY KEY,
+KEY USING BTREE (a)
+) ENGINE=rocksdb;
+SHOW KEYS IN t1;
+Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment Index_comment
+t1 0 PRIMARY 1 pk A # NULL NULL LSMTREE
+t1 1 a 1 a A # NULL NULL YES LSMTREE
+DROP TABLE t1;
+CREATE TABLE t1 (a INT,
+b CHAR(8),
+pk INT PRIMARY KEY,
+KEY a_b USING BTREE (a,b) COMMENT 'a_b index'
+) ENGINE=rocksdb;
+SHOW KEYS IN t1;
+Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment Index_comment
+t1 0 PRIMARY 1 pk A # NULL NULL LSMTREE
+t1 1 a_b 1 a A # NULL NULL YES LSMTREE a_b index
+t1 1 a_b 2 b A # NULL NULL YES LSMTREE a_b index
+DROP TABLE t1;
+CREATE TABLE t1 (a INT,
+b CHAR(8),
+pk INT PRIMARY KEY,
+KEY USING BTREE (a),
+KEY USING BTREE (b)
+) ENGINE=rocksdb;
+SHOW KEYS IN t1;
+Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment Index_comment
+t1 0 PRIMARY 1 pk A # NULL NULL LSMTREE
+t1 1 a 1 a A # NULL NULL YES LSMTREE
+t1 1 b 1 b A # NULL NULL YES LSMTREE
+DROP TABLE t1;
+CREATE TABLE t1 (a INT, b CHAR(8), pk INT AUTO_INCREMENT PRIMARY KEY) ENGINE=rocksdb;
+INSERT INTO t1 (a,b) VALUES (100,'z');
+ALTER TABLE t1 ADD KEY (a) USING BTREE COMMENT 'simple index on a';
+SHOW INDEX FROM t1;
+Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment Index_comment
+t1 0 PRIMARY 1 pk A # NULL NULL LSMTREE
+t1 1 a 1 a A # NULL NULL YES LSMTREE simple index on a
+ALTER TABLE t1 DROP KEY a;
+DROP TABLE t1;
+set global rocksdb_large_prefix=0;
+CREATE TABLE t1 (
+a BLOB(1024),
+KEY (a(767))
+) ENGINE=rocksdb;
+DROP TABLE t1;
+CREATE TABLE t1 (
+a BLOB(1024),
+KEY (a(768))
+) ENGINE=rocksdb;
+Warnings:
+Note 1071 Specified key was too long; max key length is 767 bytes
+DROP TABLE t1;
+set global rocksdb_large_prefix=1;
+CREATE TABLE t1 (
+a BLOB(4096),
+KEY (a(3072))
+) ENGINE=rocksdb;
+DROP TABLE t1;
+CREATE TABLE t1 (
+a BLOB(4096),
+KEY (a(3073))
+) ENGINE=rocksdb;
+Warnings:
+Note 1071 Specified key was too long; max key length is 3072 bytes
+DROP TABLE t1;
+set global rocksdb_large_prefix=DEFAULT;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/index_type_hash.result b/storage/rocksdb/mysql-test/rocksdb/r/index_type_hash.result
new file mode 100644
index 00000000000..bc1a96fa726
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/index_type_hash.result
@@ -0,0 +1,69 @@
+CREATE TABLE t1 (a INT,
+b CHAR(8),
+pk INT PRIMARY KEY,
+KEY USING HASH (a)
+) ENGINE=rocksdb;
+SHOW KEYS IN t1;
+Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment Index_comment
+t1 0 PRIMARY 1 pk A # NULL NULL LSMTREE
+t1 1 a 1 a A # NULL NULL YES LSMTREE
+DROP TABLE t1;
+CREATE TABLE t1 (a INT,
+b CHAR(8),
+pk INT PRIMARY KEY,
+KEY a_b USING HASH (a,b) COMMENT 'a_b index'
+) ENGINE=rocksdb;
+SHOW KEYS IN t1;
+Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment Index_comment
+t1 0 PRIMARY 1 pk A # NULL NULL LSMTREE
+t1 1 a_b 1 a A # NULL NULL YES LSMTREE a_b index
+t1 1 a_b 2 b A # NULL NULL YES LSMTREE a_b index
+DROP TABLE t1;
+CREATE TABLE t1 (a INT,
+b CHAR(8),
+pk INT PRIMARY KEY,
+KEY USING HASH (a),
+KEY USING HASH (b)
+) ENGINE=rocksdb;
+SHOW KEYS IN t1;
+Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment Index_comment
+t1 0 PRIMARY 1 pk A # NULL NULL LSMTREE
+t1 1 a 1 a A # NULL NULL YES LSMTREE
+t1 1 b 1 b A # NULL NULL YES LSMTREE
+DROP TABLE t1;
+CREATE TABLE t1 (a INT, b CHAR(8), pk INT AUTO_INCREMENT PRIMARY KEY) ENGINE=rocksdb;
+INSERT INTO t1 (a,b) VALUES (100,'z');
+ALTER TABLE t1 ADD KEY (a) USING HASH COMMENT 'simple index on a';
+SHOW INDEX FROM t1;
+Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment Index_comment
+t1 0 PRIMARY 1 pk A # NULL NULL LSMTREE
+t1 1 a 1 a A # NULL NULL YES LSMTREE simple index on a
+ALTER TABLE t1 DROP KEY a;
+DROP TABLE t1;
+set global rocksdb_large_prefix=0;
+CREATE TABLE t1 (
+a BLOB(1024),
+KEY (a(767))
+) ENGINE=rocksdb;
+DROP TABLE t1;
+CREATE TABLE t1 (
+a BLOB(1024),
+KEY (a(768))
+) ENGINE=rocksdb;
+Warnings:
+Note 1071 Specified key was too long; max key length is 767 bytes
+DROP TABLE t1;
+set global rocksdb_large_prefix=1;
+CREATE TABLE t1 (
+a BLOB(4096),
+KEY (a(3072))
+) ENGINE=rocksdb;
+DROP TABLE t1;
+CREATE TABLE t1 (
+a BLOB(4096),
+KEY (a(3073))
+) ENGINE=rocksdb;
+Warnings:
+Note 1071 Specified key was too long; max key length is 3072 bytes
+DROP TABLE t1;
+set global rocksdb_large_prefix=DEFAULT;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/information_schema.result b/storage/rocksdb/mysql-test/rocksdb/r/information_schema.result
new file mode 100644
index 00000000000..e22a85fd5c6
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/information_schema.result
@@ -0,0 +1,83 @@
+DROP TABLE IF EXISTS t1;
+DROP TABLE IF EXISTS t2;
+DROP TABLE IF EXISTS t3;
+SET GLOBAL ROCKSDB_PAUSE_BACKGROUND_WORK=1;
+create table t1 (a int) engine=rocksdb;
+drop table t1;
+select * from INFORMATION_SCHEMA.ROCKSDB_GLOBAL_INFO where type<>'DDL_DROP_INDEX_ONGOING';
+TYPE NAME VALUE
+MAX_INDEX_ID MAX_INDEX_ID max_index_id
+CF_FLAGS 0 default [0]
+CF_FLAGS 1 __system__ [0]
+select count(*) from INFORMATION_SCHEMA.ROCKSDB_GLOBAL_INFO where type<>'DDL_DROP_INDEX_ONGOING';
+count(*)
+3
+SET GLOBAL ROCKSDB_PAUSE_BACKGROUND_WORK=0;
+select VALUE into @keysIn from INFORMATION_SCHEMA.ROCKSDB_COMPACTION_STATS where CF_NAME = 'default' and LEVEL = 'Sum' and TYPE = 'KeyIn';
+CREATE TABLE t1 (i1 INT, i2 INT, PRIMARY KEY (i1)) ENGINE = ROCKSDB;
+INSERT INTO t1 VALUES (1, 1), (2, 2), (3, 3);
+set global rocksdb_force_flush_memtable_now = true;
+select * from INFORMATION_SCHEMA.ROCKSDB_GLOBAL_INFO;
+TYPE NAME VALUE
+MAX_INDEX_ID MAX_INDEX_ID max_index_id
+CF_FLAGS 0 default [0]
+CF_FLAGS 1 __system__ [0]
+select count(*) from INFORMATION_SCHEMA.ROCKSDB_GLOBAL_INFO;
+count(*)
+3
+set global rocksdb_force_flush_memtable_now = true;
+set global rocksdb_compact_cf='default';
+select case when VALUE-@keysIn >= 3 then 'true' else 'false' end from INFORMATION_SCHEMA.ROCKSDB_COMPACTION_STATS where CF_NAME = 'default' and LEVEL = 'Sum' and TYPE = 'KeyIn';
+case when VALUE-@keysIn >= 3 then 'true' else 'false' end
+true
+CREATE INDEX tindex1 on t1 (i1);
+CREATE INDEX tindex2 on t1 (i2);
+select * from INFORMATION_SCHEMA.ROCKSDB_GLOBAL_INFO where TYPE = 'CF_FLAGS';
+TYPE NAME VALUE
+CF_FLAGS 0 default [0]
+CF_FLAGS 1 __system__ [0]
+CREATE TABLE t2 (
+a int,
+b int,
+c int,
+d int,
+PRIMARY KEY (a) COMMENT "cf_a",
+KEY (b) COMMENT "cf_b",
+KEY (c) COMMENT "cf_c",
+KEY (d) COMMENT "rev:cf_d") ENGINE=ROCKSDB;
+select * from INFORMATION_SCHEMA.ROCKSDB_GLOBAL_INFO where TYPE = 'CF_FLAGS';
+TYPE NAME VALUE
+CF_FLAGS 0 default [0]
+CF_FLAGS 1 __system__ [0]
+CF_FLAGS 2 cf_a [0]
+CF_FLAGS 3 cf_b [0]
+CF_FLAGS 4 cf_c [0]
+CF_FLAGS 5 rev:cf_d [1]
+CREATE TABLE t3 (a INT, PRIMARY KEY (a)) ENGINE=ROCKSDB;
+insert into t3 (a) values (1), (2), (3);
+SET @ORIG_ROCKSDB_PAUSE_BACKGROUND_WORK = @@GLOBAL.ROCKSDB_PAUSE_BACKGROUND_WORK;
+SHOW GLOBAL VARIABLES LIKE 'ROCKSDB_PAUSE_BACKGROUND_WORK';
+Variable_name Value
+rocksdb_pause_background_work OFF
+SET GLOBAL ROCKSDB_PAUSE_BACKGROUND_WORK=1;
+SHOW GLOBAL VARIABLES LIKE 'ROCKSDB_PAUSE_BACKGROUND_WORK';
+Variable_name Value
+rocksdb_pause_background_work ON
+SET GLOBAL ROCKSDB_PAUSE_BACKGROUND_WORK=1;
+SHOW GLOBAL VARIABLES LIKE 'ROCKSDB_PAUSE_BACKGROUND_WORK';
+Variable_name Value
+rocksdb_pause_background_work ON
+DROP TABLE t3;
+cf_id:0,index_id:264
+SET GLOBAL ROCKSDB_PAUSE_BACKGROUND_WORK=0;
+SHOW GLOBAL VARIABLES LIKE 'ROCKSDB_PAUSE_BACKGROUND_WORK';
+Variable_name Value
+rocksdb_pause_background_work OFF
+next line shouldn't cause assertion to fail
+SET GLOBAL ROCKSDB_PAUSE_BACKGROUND_WORK=0;
+SHOW GLOBAL VARIABLES LIKE 'ROCKSDB_PAUSE_BACKGROUND_WORK';
+Variable_name Value
+rocksdb_pause_background_work OFF
+SET GLOBAL ROCKSDB_PAUSE_BACKGROUND_WORK = @ORIG_ROCKSDB_PAUSE_BACKGROUND_WORK;
+DROP TABLE t1;
+DROP TABLE t2;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/innodb_i_s_tables_disabled.result b/storage/rocksdb/mysql-test/rocksdb/r/innodb_i_s_tables_disabled.result
new file mode 100644
index 00000000000..55599a1268a
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/innodb_i_s_tables_disabled.result
@@ -0,0 +1,344 @@
+SELECT * FROM INFORMATION_SCHEMA.INNODB_TRX;
+trx_id trx_state trx_started trx_requested_lock_id trx_wait_started trx_weight trx_mysql_thread_id trx_query trx_operation_state trx_tables_in_use trx_tables_locked trx_lock_structs trx_lock_memory_bytes trx_rows_locked trx_rows_modified trx_concurrency_tickets trx_isolation_level trx_unique_checks trx_foreign_key_checks trx_last_foreign_key_error trx_adaptive_hash_latched trx_is_read_only trx_autocommit_non_locking
+SELECT * FROM INFORMATION_SCHEMA.INNODB_LOCKS;
+lock_id lock_trx_id lock_mode lock_type lock_table lock_index lock_space lock_page lock_rec lock_data
+SELECT * FROM INFORMATION_SCHEMA.INNODB_LOCK_WAITS;
+requesting_trx_id requested_lock_id blocking_trx_id blocking_lock_id
+SELECT * FROM INFORMATION_SCHEMA.INNODB_CMP;
+page_size compress_ops compress_ops_ok compress_time uncompress_ops uncompress_time
+SELECT * FROM INFORMATION_SCHEMA.INNODB_CMP_RESET;
+page_size compress_ops compress_ops_ok compress_time uncompress_ops uncompress_time
+SELECT * FROM INFORMATION_SCHEMA.INNODB_CMP_PER_INDEX;
+database_name table_name index_name compress_ops compress_ops_ok compress_time uncompress_ops uncompress_time
+SELECT * FROM INFORMATION_SCHEMA.INNODB_CMP_PER_INDEX_RESET;
+database_name table_name index_name compress_ops compress_ops_ok compress_time uncompress_ops uncompress_time
+SELECT * FROM INFORMATION_SCHEMA.INNODB_CMPMEM;
+page_size buffer_pool_instance pages_used pages_free relocation_ops relocation_time
+SELECT * FROM INFORMATION_SCHEMA.INNODB_CMPMEM_RESET;
+page_size buffer_pool_instance pages_used pages_free relocation_ops relocation_time
+SELECT * FROM INFORMATION_SCHEMA.INNODB_METRICS;
+NAME SUBSYSTEM COUNT MAX_COUNT MIN_COUNT AVG_COUNT COUNT_RESET MAX_COUNT_RESET MIN_COUNT_RESET AVG_COUNT_RESET TIME_ENABLED TIME_DISABLED TIME_ELAPSED TIME_RESET STATUS TYPE COMMENT
+metadata_table_handles_opened metadata 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled counter Number of table handles opened
+metadata_table_handles_closed metadata 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled counter Number of table handles closed
+metadata_table_reference_count metadata 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled counter Table reference counter
+lock_deadlocks lock 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled counter Number of deadlocks
+lock_timeouts lock 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled counter Number of lock timeouts
+lock_rec_lock_waits lock 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled counter Number of times enqueued into record lock wait queue
+lock_table_lock_waits lock 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled counter Number of times enqueued into table lock wait queue
+lock_rec_lock_requests lock 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled counter Number of record locks requested
+lock_rec_lock_created lock 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled counter Number of record locks created
+lock_rec_lock_removed lock 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled counter Number of record locks removed from the lock queue
+lock_rec_locks lock 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled counter Current number of record locks on tables
+lock_table_lock_created lock 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled counter Number of table locks created
+lock_table_lock_removed lock 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled counter Number of table locks removed from the lock queue
+lock_table_locks lock 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled counter Current number of table locks on tables
+lock_row_lock_current_waits lock 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled status_counter Number of row locks currently being waited for (innodb_row_lock_current_waits)
+lock_row_lock_time lock 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled status_counter Time spent in acquiring row locks, in milliseconds (innodb_row_lock_time)
+lock_row_lock_time_max lock 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled value The maximum time to acquire a row lock, in milliseconds (innodb_row_lock_time_max)
+lock_row_lock_waits lock 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled status_counter Number of times a row lock had to be waited for (innodb_row_lock_waits)
+lock_row_lock_time_avg lock 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled value The average time to acquire a row lock, in milliseconds (innodb_row_lock_time_avg)
+buffer_pool_size server 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled value Server buffer pool size (all buffer pools) in bytes
+buffer_pool_reads buffer 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled status_counter Number of reads directly from disk (innodb_buffer_pool_reads)
+buffer_pool_read_requests buffer 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled status_counter Number of logical read requests (innodb_buffer_pool_read_requests)
+buffer_pool_write_requests buffer 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled status_counter Number of write requests (innodb_buffer_pool_write_requests)
+buffer_pool_wait_free buffer 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled status_counter Number of times waited for free buffer (innodb_buffer_pool_wait_free)
+buffer_pool_read_ahead buffer 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled status_counter Number of pages read as read ahead (innodb_buffer_pool_read_ahead)
+buffer_pool_read_ahead_evicted buffer 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled status_counter Read-ahead pages evicted without being accessed (innodb_buffer_pool_read_ahead_evicted)
+buffer_pool_pages_total buffer 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled value Total buffer pool size in pages (innodb_buffer_pool_pages_total)
+buffer_pool_pages_misc buffer 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled value Buffer pages for misc use such as row locks or the adaptive hash index (innodb_buffer_pool_pages_misc)
+buffer_pool_pages_data buffer 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled value Buffer pages containing data (innodb_buffer_pool_pages_data)
+buffer_pool_bytes_data buffer 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled value Buffer bytes containing data (innodb_buffer_pool_bytes_data)
+buffer_pool_pages_dirty buffer 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled value Buffer pages currently dirty (innodb_buffer_pool_pages_dirty)
+buffer_pool_bytes_dirty buffer 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled value Buffer bytes currently dirty (innodb_buffer_pool_bytes_dirty)
+buffer_pool_pages_free buffer 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled value Buffer pages currently free (innodb_buffer_pool_pages_free)
+buffer_pages_created buffer 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled status_counter Number of pages created (innodb_pages_created)
+buffer_pages_written buffer 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled status_counter Number of pages written (innodb_pages_written)
+buffer_index_pages_written buffer 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled status_counter Number of index pages written (innodb_index_pages_written)
+buffer_non_index_pages_written buffer 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled status_counter Number of non index pages written (innodb_non_index_pages_written)
+buffer_pages_read buffer 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled status_counter Number of pages read (innodb_pages_read)
+buffer_pages0_read buffer 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled status_counter Number of page 0 read (innodb_pages0_read)
+buffer_index_sec_rec_cluster_reads buffer 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled status_counter Number of secondary record reads triggered cluster read
+buffer_index_sec_rec_cluster_reads_avoided buffer 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled status_counter Number of secondary record reads avoided triggering cluster read
+buffer_data_reads buffer 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled status_counter Amount of data read in bytes (innodb_data_reads)
+buffer_data_written buffer 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled status_counter Amount of data written in bytes (innodb_data_written)
+buffer_flush_batch_scanned buffer 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled set_owner Total pages scanned as part of flush batch
+buffer_flush_batch_num_scan buffer 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled set_member Number of times buffer flush list flush is called
+buffer_flush_batch_scanned_per_call buffer 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled set_member Pages scanned per flush batch scan
+buffer_flush_batch_total_pages buffer 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled set_owner Total pages flushed as part of flush batch
+buffer_flush_batches buffer 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled set_member Number of flush batches
+buffer_flush_batch_pages buffer 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled set_member Pages queued as a flush batch
+buffer_flush_neighbor_total_pages buffer 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled set_owner Total neighbors flushed as part of neighbor flush
+buffer_flush_neighbor buffer 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled set_member Number of times neighbors flushing is invoked
+buffer_flush_neighbor_pages buffer 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled set_member Pages queued as a neighbor batch
+buffer_flush_n_to_flush_requested buffer 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled counter Number of pages requested for flushing.
+buffer_flush_n_to_flush_by_age buffer 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled counter Number of pages target by LSN Age for flushing.
+buffer_flush_adaptive_avg_time_slot buffer 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled counter Avg time (ms) spent for adaptive flushing recently per slot.
+buffer_LRU_batch_flush_avg_time_slot buffer 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled counter Avg time (ms) spent for LRU batch flushing recently per slot.
+buffer_flush_adaptive_avg_time_thread buffer 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled counter Avg time (ms) spent for adaptive flushing recently per thread.
+buffer_LRU_batch_flush_avg_time_thread buffer 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled counter Avg time (ms) spent for LRU batch flushing recently per thread.
+buffer_flush_adaptive_avg_time_est buffer 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled counter Estimated time (ms) spent for adaptive flushing recently.
+buffer_LRU_batch_flush_avg_time_est buffer 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled counter Estimated time (ms) spent for LRU batch flushing recently.
+buffer_flush_avg_time buffer 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled counter Avg time (ms) spent for flushing recently.
+buffer_flush_adaptive_avg_pass buffer 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled counter Numner of adaptive flushes passed during the recent Avg period.
+buffer_LRU_batch_flush_avg_pass buffer 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled counter Number of LRU batch flushes passed during the recent Avg period.
+buffer_flush_avg_pass buffer 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled counter Number of flushes passed during the recent Avg period.
+buffer_LRU_get_free_loops buffer 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled counter Total loops in LRU get free.
+buffer_LRU_get_free_waits buffer 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled counter Total sleep waits in LRU get free.
+buffer_flush_avg_page_rate buffer 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled counter Average number of pages at which flushing is happening
+buffer_flush_lsn_avg_rate buffer 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled counter Average redo generation rate
+buffer_flush_pct_for_dirty buffer 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled counter Percent of IO capacity used to avoid max dirty page limit
+buffer_flush_pct_for_lsn buffer 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled counter Percent of IO capacity used to avoid reusable redo space limit
+buffer_flush_sync_waits buffer 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled counter Number of times a wait happens due to sync flushing
+buffer_flush_adaptive_total_pages buffer 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled set_owner Total pages flushed as part of adaptive flushing
+buffer_flush_adaptive buffer 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled set_member Number of adaptive batches
+buffer_flush_adaptive_pages buffer 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled set_member Pages queued as an adaptive batch
+buffer_flush_sync_total_pages buffer 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled set_owner Total pages flushed as part of sync batches
+buffer_flush_sync buffer 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled set_member Number of sync batches
+buffer_flush_sync_pages buffer 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled set_member Pages queued as a sync batch
+buffer_flush_background_total_pages buffer 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled set_owner Total pages flushed as part of background batches
+buffer_flush_background buffer 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled set_member Number of background batches
+buffer_flush_background_pages buffer 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled set_member Pages queued as a background batch
+buffer_LRU_batch_scanned buffer 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled set_owner Total pages scanned as part of LRU batch
+buffer_LRU_batch_num_scan buffer 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled set_member Number of times LRU batch is called
+buffer_LRU_batch_scanned_per_call buffer 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled set_member Pages scanned per LRU batch call
+buffer_LRU_batch_flush_total_pages buffer 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled set_owner Total pages flushed as part of LRU batches
+buffer_LRU_batches_flush buffer 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled set_member Number of LRU batches
+buffer_LRU_batch_flush_pages buffer 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled set_member Pages queued as an LRU batch
+buffer_LRU_batch_evict_total_pages buffer 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled set_owner Total pages evicted as part of LRU batches
+buffer_LRU_batches_evict buffer 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled set_member Number of LRU batches
+buffer_LRU_batch_evict_pages buffer 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled set_member Pages queued as an LRU batch
+buffer_LRU_single_flush_scanned buffer 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled set_owner Total pages scanned as part of single page LRU flush
+buffer_LRU_single_flush_num_scan buffer 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled set_member Number of times single page LRU flush is called
+buffer_LRU_single_flush_scanned_per_call buffer 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled set_member Page scanned per single LRU flush
+buffer_LRU_single_flush_failure_count Buffer 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled counter Number of times attempt to flush a single page from LRU failed
+buffer_LRU_get_free_search Buffer 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled counter Number of searches performed for a clean page
+buffer_LRU_search_scanned buffer 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled set_owner Total pages scanned as part of LRU search
+buffer_LRU_search_num_scan buffer 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled set_member Number of times LRU search is performed
+buffer_LRU_search_scanned_per_call buffer 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled set_member Page scanned per single LRU search
+buffer_LRU_unzip_search_scanned buffer 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled set_owner Total pages scanned as part of LRU unzip search
+buffer_LRU_unzip_search_num_scan buffer 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled set_member Number of times LRU unzip search is performed
+buffer_LRU_unzip_search_scanned_per_call buffer 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled set_member Page scanned per single LRU unzip search
+buffer_page_read_index_leaf buffer_page_io 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled counter Number of Index Leaf Pages read
+buffer_page_read_index_non_leaf buffer_page_io 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled counter Number of Index Non-leaf Pages read
+buffer_page_read_index_ibuf_leaf buffer_page_io 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled counter Number of Insert Buffer Index Leaf Pages read
+buffer_page_read_index_ibuf_non_leaf buffer_page_io 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled counter Number of Insert Buffer Index Non-Leaf Pages read
+buffer_page_read_undo_log buffer_page_io 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled counter Number of Undo Log Pages read
+buffer_page_read_index_inode buffer_page_io 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled counter Number of Index Inode Pages read
+buffer_page_read_ibuf_free_list buffer_page_io 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled counter Number of Insert Buffer Free List Pages read
+buffer_page_read_ibuf_bitmap buffer_page_io 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled counter Number of Insert Buffer Bitmap Pages read
+buffer_page_read_system_page buffer_page_io 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled counter Number of System Pages read
+buffer_page_read_trx_system buffer_page_io 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled counter Number of Transaction System Pages read
+buffer_page_read_fsp_hdr buffer_page_io 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled counter Number of File Space Header Pages read
+buffer_page_read_xdes buffer_page_io 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled counter Number of Extent Descriptor Pages read
+buffer_page_read_blob buffer_page_io 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled counter Number of Uncompressed BLOB Pages read
+buffer_page_read_zblob buffer_page_io 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled counter Number of First Compressed BLOB Pages read
+buffer_page_read_zblob2 buffer_page_io 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled counter Number of Subsequent Compressed BLOB Pages read
+buffer_page_read_other buffer_page_io 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled counter Number of other/unknown (old version of InnoDB) Pages read
+buffer_page_written_index_leaf buffer_page_io 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled counter Number of Index Leaf Pages written
+buffer_page_written_index_non_leaf buffer_page_io 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled counter Number of Index Non-leaf Pages written
+buffer_page_written_index_ibuf_leaf buffer_page_io 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled counter Number of Insert Buffer Index Leaf Pages written
+buffer_page_written_index_ibuf_non_leaf buffer_page_io 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled counter Number of Insert Buffer Index Non-Leaf Pages written
+buffer_page_written_undo_log buffer_page_io 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled counter Number of Undo Log Pages written
+buffer_page_written_index_inode buffer_page_io 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled counter Number of Index Inode Pages written
+buffer_page_written_ibuf_free_list buffer_page_io 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled counter Number of Insert Buffer Free List Pages written
+buffer_page_written_ibuf_bitmap buffer_page_io 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled counter Number of Insert Buffer Bitmap Pages written
+buffer_page_written_system_page buffer_page_io 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled counter Number of System Pages written
+buffer_page_written_trx_system buffer_page_io 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled counter Number of Transaction System Pages written
+buffer_page_written_fsp_hdr buffer_page_io 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled counter Number of File Space Header Pages written
+buffer_page_written_xdes buffer_page_io 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled counter Number of Extent Descriptor Pages written
+buffer_page_written_blob buffer_page_io 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled counter Number of Uncompressed BLOB Pages written
+buffer_page_written_zblob buffer_page_io 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled counter Number of First Compressed BLOB Pages written
+buffer_page_written_zblob2 buffer_page_io 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled counter Number of Subsequent Compressed BLOB Pages written
+buffer_page_written_other buffer_page_io 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled counter Number of other/unknown (old version InnoDB) Pages written
+os_data_reads os 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled status_counter Number of reads initiated (innodb_data_reads)
+os_data_writes os 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled status_counter Number of writes initiated (innodb_data_writes)
+os_data_fsyncs os 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled status_counter Number of fsync() calls (innodb_data_fsyncs)
+os_pending_reads os 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled counter Number of reads pending
+os_pending_writes os 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled counter Number of writes pending
+os_log_bytes_written os 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled status_counter Bytes of log written (innodb_os_log_written)
+os_log_fsyncs os 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled status_counter Number of fsync log writes (innodb_os_log_fsyncs)
+os_log_pending_fsyncs os 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled status_counter Number of pending fsync write (innodb_os_log_pending_fsyncs)
+os_log_pending_writes os 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled status_counter Number of pending log file writes (innodb_os_log_pending_writes)
+trx_rw_commits transaction 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled counter Number of read-write transactions committed
+trx_ro_commits transaction 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled counter Number of read-only transactions committed
+trx_nl_ro_commits transaction 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled counter Number of non-locking auto-commit read-only transactions committed
+trx_commits_insert_update transaction 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled counter Number of transactions committed with inserts and updates
+trx_rollbacks transaction 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled counter Number of transactions rolled back
+trx_rollbacks_savepoint transaction 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled counter Number of transactions rolled back to savepoint
+trx_rollback_active transaction 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled counter Number of resurrected active transactions rolled back
+trx_active_transactions transaction 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled counter Number of active transactions
+trx_rseg_history_len transaction 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled value Length of the TRX_RSEG_HISTORY list
+trx_undo_slots_used transaction 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled counter Number of undo slots used
+trx_undo_slots_cached transaction 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled counter Number of undo slots cached
+trx_rseg_current_size transaction 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled value Current rollback segment size in pages
+purge_del_mark_records purge 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled counter Number of delete-marked rows purged
+purge_upd_exist_or_extern_records purge 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled counter Number of purges on updates of existing records and updates on delete marked record with externally stored field
+purge_invoked purge 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled counter Number of times purge was invoked
+purge_undo_log_pages purge 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled counter Number of undo log pages handled by the purge
+purge_dml_delay_usec purge 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled value Microseconds DML to be delayed due to purge lagging
+purge_stop_count purge 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled value Number of times purge was stopped
+purge_resume_count purge 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled value Number of times purge was resumed
+log_checkpoints recovery 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled counter Number of checkpoints
+log_lsn_last_flush recovery 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled value LSN of Last flush
+log_lsn_last_checkpoint recovery 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled value LSN at last checkpoint
+log_lsn_current recovery 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled value Current LSN value
+log_lsn_checkpoint_age recovery 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled counter Current LSN value minus LSN at last checkpoint
+log_lsn_buf_pool_oldest recovery 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled value The oldest modified block LSN in the buffer pool
+log_max_modified_age_async recovery 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled value Maximum LSN difference; when exceeded, start asynchronous preflush
+log_max_modified_age_sync recovery 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled value Maximum LSN difference; when exceeded, start synchronous preflush
+log_pending_log_flushes recovery 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled value Pending log flushes
+log_pending_checkpoint_writes recovery 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled value Pending checkpoints
+log_num_log_io recovery 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled value Number of log I/Os
+log_waits recovery 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled status_counter Number of log waits due to small log buffer (innodb_log_waits)
+log_write_requests recovery 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled status_counter Number of log write requests (innodb_log_write_requests)
+log_writes recovery 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled status_counter Number of log writes (innodb_log_writes)
+log_padded recovery 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled status_counter Bytes of log padded for log write ahead
+compress_pages_compressed compression 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled counter Number of pages compressed
+compress_pages_decompressed compression 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled counter Number of pages decompressed
+compression_pad_increments compression 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled counter Number of times padding is incremented to avoid compression failures
+compression_pad_decrements compression 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled counter Number of times padding is decremented due to good compressibility
+compress_saved compression 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled counter Number of bytes saved by page compression
+compress_pages_page_compressed compression 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled counter Number of pages compressed by page compression
+compress_page_compressed_trim_op compression 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled counter Number of TRIM operation performed by page compression
+compress_pages_page_decompressed compression 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled counter Number of pages decompressed by page compression
+compress_pages_page_compression_error compression 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled counter Number of page compression errors
+compress_pages_encrypted compression 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled counter Number of pages encrypted
+compress_pages_decrypted compression 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled counter Number of pages decrypted
+index_page_splits index 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled counter Number of index page splits
+index_page_merge_attempts index 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled counter Number of index page merge attempts
+index_page_merge_successful index 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled counter Number of successful index page merges
+index_page_reorg_attempts index 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled counter Number of index page reorganization attempts
+index_page_reorg_successful index 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled counter Number of successful index page reorganizations
+index_page_discards index 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled counter Number of index pages discarded
+adaptive_hash_searches adaptive_hash_index 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled status_counter Number of successful searches using Adaptive Hash Index
+adaptive_hash_searches_btree adaptive_hash_index 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled status_counter Number of searches using B-tree on an index search
+adaptive_hash_pages_added adaptive_hash_index 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled counter Number of index pages on which the Adaptive Hash Index is built
+adaptive_hash_pages_removed adaptive_hash_index 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled counter Number of index pages whose corresponding Adaptive Hash Index entries were removed
+adaptive_hash_rows_added adaptive_hash_index 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled counter Number of Adaptive Hash Index rows added
+adaptive_hash_rows_removed adaptive_hash_index 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled counter Number of Adaptive Hash Index rows removed
+adaptive_hash_rows_deleted_no_hash_entry adaptive_hash_index 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled counter Number of rows deleted that did not have corresponding Adaptive Hash Index entries
+adaptive_hash_rows_updated adaptive_hash_index 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled counter Number of Adaptive Hash Index rows updated
+file_num_open_files file_system 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled value Number of files currently open (innodb_num_open_files)
+ibuf_merges_insert change_buffer 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled status_counter Number of inserted records merged by change buffering
+ibuf_merges_delete_mark change_buffer 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled status_counter Number of deleted records merged by change buffering
+ibuf_merges_delete change_buffer 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled status_counter Number of purge records merged by change buffering
+ibuf_merges_discard_insert change_buffer 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled status_counter Number of insert merged operations discarded
+ibuf_merges_discard_delete_mark change_buffer 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled status_counter Number of deleted merged operations discarded
+ibuf_merges_discard_delete change_buffer 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled status_counter Number of purge merged operations discarded
+ibuf_merges change_buffer 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled status_counter Number of change buffer merges
+ibuf_size change_buffer 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled status_counter Change buffer size in pages
+innodb_master_thread_sleeps server 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled counter Number of times (seconds) master thread sleeps
+innodb_activity_count server 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled status_counter Current server activity count
+innodb_master_active_loops server 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled counter Number of times master thread performs its tasks when server is active
+innodb_master_idle_loops server 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled counter Number of times master thread performs its tasks when server is idle
+innodb_background_drop_table_usec server 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled counter Time (in microseconds) spent to process drop table list
+innodb_ibuf_merge_usec server 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled counter Time (in microseconds) spent to process change buffer merge
+innodb_log_flush_usec server 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled counter Time (in microseconds) spent to flush log records
+innodb_mem_validate_usec server 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled counter Time (in microseconds) spent to do memory validation
+innodb_master_purge_usec server 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled counter Time (in microseconds) spent by master thread to purge records
+innodb_dict_lru_usec server 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled counter Time (in microseconds) spent to process DICT LRU list
+innodb_dict_lru_count_active server 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled counter Number of tables evicted from DICT LRU list in the active loop
+innodb_dict_lru_count_idle server 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled counter Number of tables evicted from DICT LRU list in the idle loop
+innodb_checkpoint_usec server 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled counter Time (in microseconds) spent by master thread to do checkpoint
+innodb_dblwr_writes server 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled status_counter Number of doublewrite operations that have been performed (innodb_dblwr_writes)
+innodb_dblwr_pages_written server 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled status_counter Number of pages that have been written for doublewrite operations (innodb_dblwr_pages_written)
+innodb_page_size server 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled value InnoDB page size in bytes (innodb_page_size)
+innodb_rwlock_s_spin_waits server 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled status_counter Number of rwlock spin waits due to shared latch request
+innodb_rwlock_x_spin_waits server 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled status_counter Number of rwlock spin waits due to exclusive latch request
+innodb_rwlock_sx_spin_waits server 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled status_counter Number of rwlock spin waits due to sx latch request
+innodb_rwlock_s_spin_rounds server 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled status_counter Number of rwlock spin loop rounds due to shared latch request
+innodb_rwlock_x_spin_rounds server 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled status_counter Number of rwlock spin loop rounds due to exclusive latch request
+innodb_rwlock_sx_spin_rounds server 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled status_counter Number of rwlock spin loop rounds due to sx latch request
+innodb_rwlock_s_os_waits server 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled status_counter Number of OS waits due to shared latch request
+innodb_rwlock_x_os_waits server 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled status_counter Number of OS waits due to exclusive latch request
+innodb_rwlock_sx_os_waits server 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled status_counter Number of OS waits due to sx latch request
+dml_reads dml 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled status_counter Number of rows read
+dml_inserts dml 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled status_counter Number of rows inserted
+dml_deletes dml 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled status_counter Number of rows deleted
+dml_updates dml 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled status_counter Number of rows updated
+dml_system_reads dml 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled status_counter Number of system rows read
+dml_system_inserts dml 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled status_counter Number of system rows inserted
+dml_system_deletes dml 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled status_counter Number of system rows deleted
+dml_system_updates dml 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled status_counter Number of system rows updated
+ddl_background_drop_indexes ddl 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled counter Number of indexes waiting to be dropped after failed index creation
+ddl_background_drop_tables ddl 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled counter Number of tables in background drop table list
+ddl_online_create_index ddl 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled counter Number of indexes being created online
+ddl_pending_alter_table ddl 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled counter Number of ALTER TABLE, CREATE INDEX, DROP INDEX in progress
+ddl_sort_file_alter_table ddl 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled counter Number of sort files created during alter table
+ddl_log_file_alter_table ddl 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled counter Number of log files created during alter table
+icp_attempts icp 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled counter Number of attempts for index push-down condition checks
+icp_no_match icp 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled counter Index push-down condition does not match
+icp_out_of_range icp 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled counter Index push-down condition out of range
+icp_match icp 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled counter Index push-down condition matches
+SELECT * FROM INFORMATION_SCHEMA.INNODB_FT_DEFAULT_STOPWORD;
+value
+a
+about
+an
+are
+as
+at
+be
+by
+com
+de
+en
+for
+from
+how
+i
+in
+is
+it
+la
+of
+on
+or
+that
+the
+this
+to
+was
+what
+when
+where
+who
+will
+with
+und
+the
+www
+SELECT * FROM INFORMATION_SCHEMA.INNODB_FT_DELETED;
+DOC_ID
+SELECT * FROM INFORMATION_SCHEMA.INNODB_FT_BEING_DELETED;
+DOC_ID
+SELECT * FROM INFORMATION_SCHEMA.INNODB_FT_INDEX_CACHE;
+WORD FIRST_DOC_ID LAST_DOC_ID DOC_COUNT DOC_ID POSITION
+SELECT * FROM INFORMATION_SCHEMA.INNODB_FT_INDEX_TABLE;
+WORD FIRST_DOC_ID LAST_DOC_ID DOC_COUNT DOC_ID POSITION
+SELECT * FROM INFORMATION_SCHEMA.INNODB_FT_CONFIG;
+KEY VALUE
+SELECT * FROM INFORMATION_SCHEMA.INNODB_BUFFER_POOL_STATS;
+POOL_ID POOL_SIZE FREE_BUFFERS DATABASE_PAGES OLD_DATABASE_PAGES MODIFIED_DATABASE_PAGES PENDING_DECOMPRESS PENDING_READS PENDING_FLUSH_LRU PENDING_FLUSH_LIST PAGES_MADE_YOUNG PAGES_NOT_MADE_YOUNG PAGES_MADE_YOUNG_RATE PAGES_MADE_NOT_YOUNG_RATE NUMBER_PAGES_READ NUMBER_PAGES_CREATED NUMBER_PAGES_WRITTEN PAGES_READ_RATE PAGES_CREATE_RATE PAGES_WRITTEN_RATE NUMBER_PAGES_GET HIT_RATE YOUNG_MAKE_PER_THOUSAND_GETS NOT_YOUNG_MAKE_PER_THOUSAND_GETS NUMBER_PAGES_READ_AHEAD NUMBER_READ_AHEAD_EVICTED READ_AHEAD_RATE READ_AHEAD_EVICTED_RATE LRU_IO_TOTAL LRU_IO_CURRENT UNCOMPRESS_TOTAL UNCOMPRESS_CURRENT
+SELECT * FROM INFORMATION_SCHEMA.INNODB_BUFFER_PAGE;
+POOL_ID BLOCK_ID SPACE PAGE_NUMBER PAGE_TYPE FLUSH_TYPE FIX_COUNT IS_HASHED NEWEST_MODIFICATION OLDEST_MODIFICATION ACCESS_TIME TABLE_NAME INDEX_NAME NUMBER_RECORDS DATA_SIZE COMPRESSED_SIZE PAGE_STATE IO_FIX IS_OLD FREE_PAGE_CLOCK
+SELECT * FROM INFORMATION_SCHEMA.INNODB_BUFFER_PAGE_LRU;
+POOL_ID LRU_POSITION SPACE PAGE_NUMBER PAGE_TYPE FLUSH_TYPE FIX_COUNT IS_HASHED NEWEST_MODIFICATION OLDEST_MODIFICATION ACCESS_TIME TABLE_NAME INDEX_NAME NUMBER_RECORDS DATA_SIZE COMPRESSED_SIZE COMPRESSED IO_FIX IS_OLD FREE_PAGE_CLOCK
+SELECT * FROM INFORMATION_SCHEMA.INNODB_SYS_TABLES;
+TABLE_ID NAME FLAG N_COLS SPACE FILE_FORMAT ROW_FORMAT ZIP_PAGE_SIZE SPACE_TYPE
+SELECT * FROM INFORMATION_SCHEMA.INNODB_SYS_TABLESTATS;
+TABLE_ID NAME STATS_INITIALIZED NUM_ROWS CLUST_INDEX_SIZE OTHER_INDEX_SIZE MODIFIED_COUNTER AUTOINC REF_COUNT
+SELECT * FROM INFORMATION_SCHEMA.INNODB_SYS_INDEXES;
+INDEX_ID NAME TABLE_ID TYPE N_FIELDS PAGE_NO SPACE MERGE_THRESHOLD
+SELECT * FROM INFORMATION_SCHEMA.INNODB_SYS_COLUMNS;
+TABLE_ID NAME POS MTYPE PRTYPE LEN
+SELECT * FROM INFORMATION_SCHEMA.INNODB_SYS_FIELDS;
+INDEX_ID NAME POS
+SELECT * FROM INFORMATION_SCHEMA.INNODB_SYS_FOREIGN;
+ID FOR_NAME REF_NAME N_COLS TYPE
+SELECT * FROM INFORMATION_SCHEMA.INNODB_SYS_FOREIGN_COLS;
+ID FOR_COL_NAME REF_COL_NAME POS
+SELECT * FROM INFORMATION_SCHEMA.INNODB_SYS_TABLESPACES;
+SPACE NAME FLAG FILE_FORMAT ROW_FORMAT PAGE_SIZE ZIP_PAGE_SIZE SPACE_TYPE FS_BLOCK_SIZE FILE_SIZE ALLOCATED_SIZE
+SELECT * FROM INFORMATION_SCHEMA.INNODB_SYS_DATAFILES;
+SPACE PATH
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/insert.result b/storage/rocksdb/mysql-test/rocksdb/r/insert.result
new file mode 100644
index 00000000000..a1fb3ae90be
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/insert.result
@@ -0,0 +1,202 @@
+DROP TABLE IF EXISTS t1, t2;
+CREATE TABLE t1 (a INT, b CHAR(8), pk INT AUTO_INCREMENT PRIMARY KEY) ENGINE=rocksdb;
+INSERT INTO t1 VALUES (100,'foobar',100),(1,'a',101),(2,'b',103),(3,'c',104),(4,'d',105),(5,'e',106);
+SELECT a,b FROM t1;
+a b
+1 a
+100 foobar
+2 b
+3 c
+4 d
+5 e
+INSERT t1 VALUE (10,'foo',107),(11,'abc',108);
+SELECT a,b FROM t1;
+a b
+1 a
+10 foo
+100 foobar
+11 abc
+2 b
+3 c
+4 d
+5 e
+INSERT INTO t1 (b,a) VALUES ('test',0);
+SELECT a,b FROM t1;
+a b
+0 test
+1 a
+10 foo
+100 foobar
+11 abc
+2 b
+3 c
+4 d
+5 e
+INSERT INTO t1 VALUES (DEFAULT,DEFAULT,NULL);
+SELECT a,b FROM t1;
+a b
+0 test
+1 a
+10 foo
+100 foobar
+11 abc
+2 b
+3 c
+4 d
+5 e
+NULL NULL
+INSERT t1 (a) VALUE (10),(20);
+SELECT a,b FROM t1;
+a b
+0 test
+1 a
+10 NULL
+10 foo
+100 foobar
+11 abc
+2 b
+20 NULL
+3 c
+4 d
+5 e
+NULL NULL
+INSERT INTO t1 SET a = 11, b = 'f';
+SELECT a,b FROM t1;
+a b
+0 test
+1 a
+10 NULL
+10 foo
+100 foobar
+11 abc
+11 f
+2 b
+20 NULL
+3 c
+4 d
+5 e
+NULL NULL
+INSERT t1 SET b = DEFAULT;
+SELECT a,b FROM t1;
+a b
+0 test
+1 a
+10 NULL
+10 foo
+100 foobar
+11 abc
+11 f
+2 b
+20 NULL
+3 c
+4 d
+5 e
+NULL NULL
+NULL NULL
+CREATE TABLE t2 (a INT, b CHAR(8), pk INT AUTO_INCREMENT PRIMARY KEY) ENGINE=rocksdb;
+INSERT INTO t2 SELECT a,b,pk FROM t1;
+INSERT INTO t1 (a) SELECT a FROM t2 WHERE b = 'foo';
+SELECT a,b FROM t1;
+a b
+0 test
+1 a
+10 NULL
+10 NULL
+10 foo
+100 foobar
+11 abc
+11 f
+2 b
+20 NULL
+3 c
+4 d
+5 e
+NULL NULL
+NULL NULL
+INSERT t1 (a,b) SELECT a,b FROM t1;
+SELECT a,b FROM t1;
+a b
+0 test
+0 test
+1 a
+1 a
+10 NULL
+10 NULL
+10 NULL
+10 NULL
+10 foo
+10 foo
+100 foobar
+100 foobar
+11 abc
+11 abc
+11 f
+11 f
+2 b
+2 b
+20 NULL
+20 NULL
+3 c
+3 c
+4 d
+4 d
+5 e
+5 e
+NULL NULL
+NULL NULL
+NULL NULL
+NULL NULL
+DROP TABLE t1, t2;
+CREATE TABLE t1 (a INT, b CHAR(8), pk INT AUTO_INCREMENT PRIMARY KEY) ENGINE=rocksdb;
+BEGIN;
+INSERT INTO t1 (a,b) VALUES (1,'a'),(2,'b'),(3,'c'),(4,'d'),(5,'e'),(100,'foo');
+INSERT t1 (a,b) VALUE (10,'foo'),(11,'abc');
+COMMIT;
+SELECT a,b FROM t1;
+a b
+1 a
+10 foo
+100 foo
+11 abc
+2 b
+3 c
+4 d
+5 e
+BEGIN;
+INSERT INTO t1 (b,a) VALUES ('test',0);
+SAVEPOINT spt1;
+INSERT INTO t1 (a,b) VALUES (DEFAULT,DEFAULT);
+RELEASE SAVEPOINT spt1;
+INSERT INTO t1 (a,b) VALUES (DEFAULT,DEFAULT);
+ROLLBACK;
+SELECT a,b FROM t1;
+a b
+1 a
+10 foo
+100 foo
+11 abc
+2 b
+3 c
+4 d
+5 e
+BEGIN;
+INSERT t1 (a) VALUE (10),(20);
+SAVEPOINT spt1;
+INSERT INTO t1 SET a = 11, b = 'f';
+INSERT t1 SET b = DEFAULT;
+ROLLBACK TO SAVEPOINT spt1;
+ERROR HY000: MyRocks currently does not support ROLLBACK TO SAVEPOINT if modifying rows.
+INSERT INTO t1 (b,a) VALUES ('test1',10);
+COMMIT;
+ERROR HY000: This transaction was rolled back and cannot be committed. Only supported operation is to roll it back, so all pending changes will be discarded. Please restart another transaction.
+SELECT a,b FROM t1;
+a b
+1 a
+10 foo
+100 foo
+11 abc
+2 b
+3 c
+4 d
+5 e
+DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/insert_optimized_config.result b/storage/rocksdb/mysql-test/rocksdb/r/insert_optimized_config.result
new file mode 100644
index 00000000000..ded48057854
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/insert_optimized_config.result
@@ -0,0 +1,16 @@
+DROP TABLE IF EXISTS t1;
+create table t1(
+id bigint not null primary key,
+i1 bigint, #unique
+i2 bigint, #repeating
+c1 varchar(20), #unique
+c2 varchar(20), #repeating
+index t1_2(i1)
+) engine=rocksdb;
+select count(*), sum(id), sum(i1), sum(i2) from t1;
+count(*) sum(id) sum(i1) sum(i2)
+50000 1250025000 1250025000 124980000
+select count(*), sum(id), sum(i1), sum(i2) from t1;
+count(*) sum(id) sum(i1) sum(i2)
+50000 1250025000 1250025000 124980000
+drop table t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/insert_with_keys.result b/storage/rocksdb/mysql-test/rocksdb/r/insert_with_keys.result
new file mode 100644
index 00000000000..6d4139caefa
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/insert_with_keys.result
@@ -0,0 +1,262 @@
+DROP TABLE IF EXISTS t1;
+CREATE TABLE t1 (a INT, b CHAR(8), pk INT AUTO_INCREMENT PRIMARY KEY, KEY(b)) ENGINE=rocksdb;
+INSERT INTO t1 (a,b) VALUES (1,'a'),(2,'b'),(3,'c'),(4,'d'),(5,'e');
+INSERT INTO t1 (a,b) VALUES (100,'a'), (6,'f');
+INSERT INTO t1 (a,b) VALUES (30,'m'),(29,'n');
+INSERT INTO t1 (a,b) VALUES (1,'a'),(12345,'z');
+INSERT INTO t1 (a,b) VALUES (3,'a'),(0,'');
+SELECT a,b FROM t1;
+a b
+0
+1 a
+1 a
+100 a
+12345 z
+2 b
+29 n
+3 a
+3 c
+30 m
+4 d
+5 e
+6 f
+DROP TABLE t1;
+CREATE TABLE t1 (a INT, b CHAR(8), pk INT AUTO_INCREMENT PRIMARY KEY, UNIQUE INDEX(a)) ENGINE=rocksdb;
+INSERT INTO t1 (a,b) VALUES (1,'a'),(2,'b'),(3,'c'),(4,'d'),(5,'e');
+INSERT INTO t1 (a,b) VALUES (100,'a'), (6,'f');
+INSERT INTO t1 (a,b) VALUES (30,'m'),(29,'n');
+INSERT INTO t1 (a,b) VALUES (1,'a'),(12345,'z');
+ERROR 23000: Duplicate entry '1' for key 'a'
+INSERT INTO t1 (a,b) VALUES (3,'a'),(0,'');
+ERROR 23000: Duplicate entry '3' for key 'a'
+INSERT INTO t1 (a,b) VALUES (0,'');
+SELECT a,b FROM t1;
+a b
+0
+1 a
+100 a
+2 b
+29 n
+3 c
+30 m
+4 d
+5 e
+6 f
+INSERT IGNORE INTO t1 (a,b) VALUES (1,'a'),(12345,'z');
+Warnings:
+Warning 1062 Duplicate entry '1' for key 'a'
+INSERT INTO t1 (a,b) VALUES (3,'a'),(4,'d') ON DUPLICATE KEY UPDATE a = a+10;
+SELECT a,b FROM t1;
+a b
+0
+1 a
+100 a
+12345 z
+13 c
+14 d
+2 b
+29 n
+30 m
+5 e
+6 f
+DROP TABLE t1;
+CREATE TABLE t1 (a INT, b CHAR(8), pk INT AUTO_INCREMENT PRIMARY KEY, UNIQUE INDEX(a,b)) ENGINE=rocksdb;
+INSERT INTO t1 (a,b) VALUES (1,'a'),(2,'b'),(3,'c'),(4,'d'),(5,'e');
+INSERT INTO t1 (a,b) VALUES (100,'a'), (6,'f');
+INSERT INTO t1 (a,b) VALUES (30,'m'),(29,'n');
+INSERT INTO t1 (a,b) VALUES (100,'b'), (2,'c');
+INSERT INTO t1 (a,b) VALUES (1,'a'),(12345,'z');
+ERROR 23000: Duplicate entry '1-a' for key 'a'
+SELECT a,b FROM t1;
+a b
+1 a
+100 a
+100 b
+2 b
+2 c
+29 n
+3 c
+30 m
+4 d
+5 e
+6 f
+INSERT IGNORE INTO t1 (a,b) VALUES (1,'a'),(12345,'z');
+Warnings:
+Warning 1062 Duplicate entry '1-a' for key 'a'
+INSERT INTO t1 (a,b) VALUES (1,'a'),(12345,'z') ON DUPLICATE KEY UPDATE a = a+VALUES(a);
+SELECT a,b FROM t1;
+a b
+100 a
+100 b
+2 a
+2 b
+2 c
+24690 z
+29 n
+3 c
+30 m
+4 d
+5 e
+6 f
+INSERT INTO t1 (a,b) VALUES (101,'x'),(101,'x');
+ERROR 23000: Duplicate entry '101-x' for key 'a'
+SELECT a,b FROM t1;
+a b
+100 a
+100 b
+2 a
+2 b
+2 c
+24690 z
+29 n
+3 c
+30 m
+4 d
+5 e
+6 f
+DROP TABLE t1;
+CREATE TABLE t1 (a INT PRIMARY KEY, b CHAR(8)) ENGINE=rocksdb;
+INSERT INTO t1 (a,b) VALUES (1,'a'),(2,'b'),(3,'c'),(4,'d'),(5,'e');
+INSERT INTO t1 (a,b) VALUES (100,'a'), (6,'f');
+INSERT INTO t1 (a,b) VALUES (30,'m'),(29,'n');
+INSERT INTO t1 (a,b) VALUES (1,'a'),(12345,'z');
+ERROR 23000: Duplicate entry '1' for key 'PRIMARY'
+INSERT INTO t1 (a,b) VALUES (3,'a'),(0,'');
+ERROR 23000: Duplicate entry '3' for key 'PRIMARY'
+INSERT INTO t1 (a,b) VALUES (0,'');
+SELECT a,b FROM t1;
+a b
+0
+1 a
+100 a
+2 b
+29 n
+3 c
+30 m
+4 d
+5 e
+6 f
+INSERT IGNORE INTO t1 (a,b) VALUES (1,'a'),(12345,'z');
+Warnings:
+Warning 1062 Duplicate entry '1' for key 'PRIMARY'
+INSERT INTO t1 (a,b) VALUES (1,'a'),(12345,'z') ON DUPLICATE KEY UPDATE b = CONCAT(b,b);
+SELECT a,b FROM t1;
+a b
+0
+1 aa
+100 a
+12345 zz
+2 b
+29 n
+3 c
+30 m
+4 d
+5 e
+6 f
+DROP TABLE t1;
+
+INSERT on DUPLICATE KEY UPDATE with multiple keys
+
+CREATE TABLE t1 (a INT PRIMARY KEY, b VARCHAR(255)) ENGINE=rocksdb;
+INSERT INTO t1 (a,b) VALUES (1,'a');
+INSERT INTO t1 (a,b) VALUES (1,'a'), (1,'a'), (1,'a'), (1,'a'), (1,'a'), (1,'a'), (1,'a'), (1,'a'), (1,'a'), (1,'a') ON DUPLICATE KEY UPDATE b = CONCAT(b, 'a');
+INSERT INTO t1 (a,b) VALUES (1,'a'), (2,'a'), (3,'a'), (4,'a'), (5,'a'), (6,'a'), (7,'a'), (8,'a'), (9,'a'), (10,'a') ON DUPLICATE KEY UPDATE b = CONCAT(b, 'a');
+INSERT INTO t1 (a,b) VALUES (1,'a'), (2,'a'), (3,'a'), (4,'a'), (5,'a'), (6,'a'), (7,'a'), (8,'a'), (9,'a'), (10,'a') ON DUPLICATE KEY UPDATE b = CONCAT(b, 'a');
+INSERT INTO t1 (a,b) VALUES (1,'a'), (2,'a'), (1,'a'), (2,'a'), (1,'a'), (2,'a'), (1,'a'), (2,'a'), (1,'a'), (2,'a') ON DUPLICATE KEY UPDATE b = CONCAT(b, 'a');
+SELECT a,b FROM t1;
+a b
+1 aaaaaaaaaaaaaaaaaa
+10 aa
+2 aaaaaaa
+3 aa
+4 aa
+5 aa
+6 aa
+7 aa
+8 aa
+9 aa
+DROP TABLE t1;
+
+INSERT on DUPLICATE KEY UPDATE with secondary key
+
+CREATE TABLE t1 (a INT, b CHAR(8), c INT DEFAULT 0, pk INT AUTO_INCREMENT PRIMARY KEY, UNIQUE INDEX(a,b)) ENGINE=rocksdb;
+INSERT INTO t1 (a,b) VALUES (1,'a');
+INSERT INTO t1 (a,b) VALUES (1,'a'), (1,'a'), (1,'a'), (1,'a'), (1,'a'), (1,'a'), (1,'a'), (1,'a'), (1,'a'), (1,'a') ON DUPLICATE KEY UPDATE c = c + 1;
+INSERT INTO t1 (a,b) VALUES (1,'a'), (2,'b'), (3,'c'), (4,'d'), (5,'e'), (6,'f'), (7,'g'), (8,'h'), (9,'i'), (10,'j') ON DUPLICATE KEY UPDATE c = c + 1;
+INSERT INTO t1 (a,b) VALUES (1,'a'), (2,'b'), (3,'c'), (4,'d'), (5,'e'), (6,'f'), (7,'g'), (8,'h'), (9,'i'), (10,'j') ON DUPLICATE KEY UPDATE c = c + 1;
+INSERT INTO t1 (a,b) VALUES (1,'a'), (2,'b'), (1,'a'), (2,'b'), (1,'a'), (2,'b'), (1,'a'), (2,'b'), (1,'a'), (2,'b') ON DUPLICATE KEY UPDATE c = c + 1;
+INSERT INTO t1 (a,b) VALUES (1,'a'), (2,'c'), (1,'a'), (2,'c'), (1,'a'), (2,'c'), (1,'a'), (2,'c'), (1,'a'), (2,'c') ON DUPLICATE KEY UPDATE c = c + 1;
+SELECT a,b,c,pk FROM t1;
+a b c pk
+1 a 22 1
+10 j 1 11
+2 b 6 3
+2 c 4 14
+3 c 1 4
+4 d 1 5
+5 e 1 6
+6 f 1 7
+7 g 1 8
+8 h 1 9
+9 i 1 10
+DROP TABLE t1;
+
+Disable caching and see if it still functions properly
+
+SELECT @@rocksdb_enable_insert_with_update_caching;
+@@rocksdb_enable_insert_with_update_caching
+1
+SET GLOBAL rocksdb_enable_insert_with_update_caching=0;
+SELECT @@rocksdb_enable_insert_with_update_caching;
+@@rocksdb_enable_insert_with_update_caching
+0
+
+INSERT on DUPLICATE KEY UPDATE with multiple keys
+
+CREATE TABLE t1 (a INT PRIMARY KEY, b VARCHAR(255)) ENGINE=rocksdb;
+INSERT INTO t1 (a,b) VALUES (1,'a');
+INSERT INTO t1 (a,b) VALUES (1,'a'), (1,'a'), (1,'a'), (1,'a'), (1,'a'), (1,'a'), (1,'a'), (1,'a'), (1,'a'), (1,'a') ON DUPLICATE KEY UPDATE b = CONCAT(b, 'a');
+INSERT INTO t1 (a,b) VALUES (1,'a'), (2,'a'), (3,'a'), (4,'a'), (5,'a'), (6,'a'), (7,'a'), (8,'a'), (9,'a'), (10,'a') ON DUPLICATE KEY UPDATE b = CONCAT(b, 'a');
+INSERT INTO t1 (a,b) VALUES (1,'a'), (2,'a'), (3,'a'), (4,'a'), (5,'a'), (6,'a'), (7,'a'), (8,'a'), (9,'a'), (10,'a') ON DUPLICATE KEY UPDATE b = CONCAT(b, 'a');
+INSERT INTO t1 (a,b) VALUES (1,'a'), (2,'a'), (1,'a'), (2,'a'), (1,'a'), (2,'a'), (1,'a'), (2,'a'), (1,'a'), (2,'a') ON DUPLICATE KEY UPDATE b = CONCAT(b, 'a');
+SELECT a,b FROM t1;
+a b
+1 aaaaaaaaaaaaaaaaaa
+10 aa
+2 aaaaaaa
+3 aa
+4 aa
+5 aa
+6 aa
+7 aa
+8 aa
+9 aa
+DROP TABLE t1;
+
+INSERT on DUPLICATE KEY UPDATE with secondary key
+
+CREATE TABLE t1 (a INT, b CHAR(8), c INT DEFAULT 0, pk INT AUTO_INCREMENT PRIMARY KEY, UNIQUE INDEX(a,b)) ENGINE=rocksdb;
+INSERT INTO t1 (a,b) VALUES (1,'a');
+INSERT INTO t1 (a,b) VALUES (1,'a'), (1,'a'), (1,'a'), (1,'a'), (1,'a'), (1,'a'), (1,'a'), (1,'a'), (1,'a'), (1,'a') ON DUPLICATE KEY UPDATE c = c + 1;
+INSERT INTO t1 (a,b) VALUES (1,'a'), (2,'b'), (3,'c'), (4,'d'), (5,'e'), (6,'f'), (7,'g'), (8,'h'), (9,'i'), (10,'j') ON DUPLICATE KEY UPDATE c = c + 1;
+INSERT INTO t1 (a,b) VALUES (1,'a'), (2,'b'), (3,'c'), (4,'d'), (5,'e'), (6,'f'), (7,'g'), (8,'h'), (9,'i'), (10,'j') ON DUPLICATE KEY UPDATE c = c + 1;
+INSERT INTO t1 (a,b) VALUES (1,'a'), (2,'b'), (1,'a'), (2,'b'), (1,'a'), (2,'b'), (1,'a'), (2,'b'), (1,'a'), (2,'b') ON DUPLICATE KEY UPDATE c = c + 1;
+INSERT INTO t1 (a,b) VALUES (1,'a'), (2,'c'), (1,'a'), (2,'c'), (1,'a'), (2,'c'), (1,'a'), (2,'c'), (1,'a'), (2,'c') ON DUPLICATE KEY UPDATE c = c + 1;
+SELECT a,b,c,pk FROM t1;
+a b c pk
+1 a 22 1
+10 j 1 11
+2 b 6 3
+2 c 4 14
+3 c 1 4
+4 d 1 5
+5 e 1 6
+6 f 1 7
+7 g 1 8
+8 h 1 9
+9 i 1 10
+DROP TABLE t1;
+
+Cleanup
+
+SET GLOBAL rocksdb_enable_insert_with_update_caching=1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/issue100.result b/storage/rocksdb/mysql-test/rocksdb/r/issue100.result
new file mode 100644
index 00000000000..ee73ac3e134
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/issue100.result
@@ -0,0 +1,23 @@
+create table t1 (
+id int,
+value int,
+primary key (id)
+) engine=rocksdb;
+insert into t1 values(1,1),(2,2);
+set autocommit=0;
+begin;
+insert into t1 values (50,50);
+select * from t1;
+id value
+1 1
+2 2
+50 50
+update t1 set id=id+100;
+select * from t1;
+id value
+101 1
+102 2
+150 50
+rollback;
+set autocommit=1;
+drop table t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/issue100_delete.result b/storage/rocksdb/mysql-test/rocksdb/r/issue100_delete.result
new file mode 100644
index 00000000000..9e55ebd006f
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/issue100_delete.result
@@ -0,0 +1,17 @@
+create table ten(a int primary key);
+insert into ten values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
+create table one_k(a int primary key);
+insert into one_k select A.a + B.a* 10 + C.a * 100 from ten A, ten B, ten C;
+create table t100(pk int primary key, a int, b int, key(a));
+insert into t100 select a,a,a from test.one_k;
+set global rocksdb_force_flush_memtable_now=1;
+select num_rows, entry_deletes, entry_singledeletes from information_schema.rocksdb_index_file_map where index_number = (select max(index_number) from information_schema.rocksdb_index_file_map) order by entry_deletes, entry_singledeletes;
+num_rows entry_deletes entry_singledeletes
+1000 0 0
+update t100 set a=a+1;
+set global rocksdb_force_flush_memtable_now=1;
+select num_rows, entry_deletes, entry_singledeletes from information_schema.rocksdb_index_file_map where index_number = (select max(index_number) from information_schema.rocksdb_index_file_map) order by entry_deletes, entry_singledeletes;
+num_rows entry_deletes entry_singledeletes
+1000 0 0
+1000 0 1000
+drop table ten, t100, one_k;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/issue111.result b/storage/rocksdb/mysql-test/rocksdb/r/issue111.result
new file mode 100644
index 00000000000..796a637e787
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/issue111.result
@@ -0,0 +1,37 @@
+connect con2,localhost,root,,;
+connection default;
+create table t1 (
+pk int not null primary key,
+col1 int not null,
+col2 int not null,
+key(col1)
+) engine=rocksdb;
+create table ten(a int primary key);
+insert into ten values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
+create table one_k(a int primary key);
+insert into one_k select A.a + B.a* 10 + C.a * 100 from ten A, ten B, ten C;
+insert into t1 select a,a,a from one_k;
+# Start the transaction, get the snapshot
+begin;
+select * from t1 where col1<10;
+pk col1 col2
+0 0 0
+1 1 1
+2 2 2
+3 3 3
+4 4 4
+5 5 5
+6 6 6
+7 7 7
+8 8 8
+9 9 9
+# Connect with another connection and make a conflicting change
+connection con2;
+begin;
+update t1 set col2=123456 where pk=0;
+commit;
+connection default;
+update t1 set col2=col2+1 where col1 < 10 limit 5;
+ERROR 40001: Deadlock found when trying to get lock; try restarting transaction (snapshot conflict)
+disconnect con2;
+drop table t1, ten, one_k;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/issue243_transactionStatus.result b/storage/rocksdb/mysql-test/rocksdb/r/issue243_transactionStatus.result
new file mode 100644
index 00000000000..d4d211b9288
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/issue243_transactionStatus.result
@@ -0,0 +1,161 @@
+DROP TABLE IF EXISTS t1;
+CREATE TABLE t1 (
+id INT,
+val1 INT,
+val2 INT,
+PRIMARY KEY (id)
+) ENGINE=rocksdb;
+INSERT INTO t1 VALUES(1,1,1),(2,1,2);
+SELECT * FROM t1;
+id val1 val2
+1 1 1
+2 1 2
+UPDATE t1 SET val1=2 WHERE id=2;
+SELECT * FROM t1;
+id val1 val2
+1 1 1
+2 2 2
+SHOW ENGINE rocksdb TRANSACTION STATUS;
+Type Name Status
+rocksdb
+============================================================
+TIMESTAMP ROCKSDB TRANSACTION MONITOR OUTPUT
+============================================================
+---------
+SNAPSHOTS
+---------
+LIST OF SNAPSHOTS FOR EACH SESSION:
+----------LATEST DETECTED DEADLOCKS----------
+-----------------------------------------
+END OF ROCKSDB TRANSACTION MONITOR OUTPUT
+=========================================
+
+SET AUTOCOMMIT=0;
+START TRANSACTION;
+INSERT INTO t1 VALUES(20,1,1),(30,30,30);
+SELECT * FROM t1;
+id val1 val2
+1 1 1
+2 2 2
+20 1 1
+30 30 30
+UPDATE t1 SET val1=20, val2=20 WHERE id=20;
+SELECT * FROM t1;
+id val1 val2
+1 1 1
+2 2 2
+20 20 20
+30 30 30
+DELETE FROM t1 WHERE id=30;
+SHOW ENGINE rocksdb TRANSACTION STATUS;
+Type Name Status
+rocksdb
+============================================================
+TIMESTAMP ROCKSDB TRANSACTION MONITOR OUTPUT
+============================================================
+---------
+SNAPSHOTS
+---------
+LIST OF SNAPSHOTS FOR EACH SESSION:
+---SNAPSHOT, ACTIVE NUM sec
+MySQL thread id TID, OS thread handle PTR, query id QID localhost root ACTION
+SHOW ENGINE rocksdb TRANSACTION STATUS
+lock count 8, write count 4
+insert count 2, update count 1, delete count 1
+----------LATEST DETECTED DEADLOCKS----------
+-----------------------------------------
+END OF ROCKSDB TRANSACTION MONITOR OUTPUT
+=========================================
+
+ROLLBACK;
+SHOW ENGINE rocksdb TRANSACTION STATUS;
+Type Name Status
+rocksdb
+============================================================
+TIMESTAMP ROCKSDB TRANSACTION MONITOR OUTPUT
+============================================================
+---------
+SNAPSHOTS
+---------
+LIST OF SNAPSHOTS FOR EACH SESSION:
+----------LATEST DETECTED DEADLOCKS----------
+-----------------------------------------
+END OF ROCKSDB TRANSACTION MONITOR OUTPUT
+=========================================
+
+START TRANSACTION;
+INSERT INTO t1 VALUES(40,40,40);
+SHOW ENGINE rocksdb TRANSACTION STATUS;
+Type Name Status
+rocksdb
+============================================================
+TIMESTAMP ROCKSDB TRANSACTION MONITOR OUTPUT
+============================================================
+---------
+SNAPSHOTS
+---------
+LIST OF SNAPSHOTS FOR EACH SESSION:
+---SNAPSHOT, ACTIVE NUM sec
+MySQL thread id TID, OS thread handle PTR, query id QID localhost root ACTION
+SHOW ENGINE rocksdb TRANSACTION STATUS
+lock count 2, write count 1
+insert count 1, update count 0, delete count 0
+----------LATEST DETECTED DEADLOCKS----------
+-----------------------------------------
+END OF ROCKSDB TRANSACTION MONITOR OUTPUT
+=========================================
+
+COMMIT;
+SHOW ENGINE rocksdb TRANSACTION STATUS;
+Type Name Status
+rocksdb
+============================================================
+TIMESTAMP ROCKSDB TRANSACTION MONITOR OUTPUT
+============================================================
+---------
+SNAPSHOTS
+---------
+LIST OF SNAPSHOTS FOR EACH SESSION:
+----------LATEST DETECTED DEADLOCKS----------
+-----------------------------------------
+END OF ROCKSDB TRANSACTION MONITOR OUTPUT
+=========================================
+
+SET AUTOCOMMIT=1;
+DROP TABLE t1;
+DROP TABLE IF EXISTS t2;
+CREATE TABLE t2 (
+id1 INT,
+id2 INT,
+value INT,
+PRIMARY KEY (id1),
+KEY (id2)
+) ENGINE=rocksdb;
+SET AUTOCOMMIT=0;
+START TRANSACTION;
+INSERT INTO t2 VALUES(1,2,0),(10,20,30);
+UPDATE t2 SET value=3 WHERE id2=2;
+DELETE FROM t2 WHERE id1=10;
+SHOW ENGINE rocksdb TRANSACTION STATUS;
+Type Name Status
+rocksdb
+============================================================
+TIMESTAMP ROCKSDB TRANSACTION MONITOR OUTPUT
+============================================================
+---------
+SNAPSHOTS
+---------
+LIST OF SNAPSHOTS FOR EACH SESSION:
+---SNAPSHOT, ACTIVE NUM sec
+MySQL thread id TID, OS thread handle PTR, query id QID localhost root ACTION
+SHOW ENGINE rocksdb TRANSACTION STATUS
+lock count 9, write count 7
+insert count 2, update count 1, delete count 1
+----------LATEST DETECTED DEADLOCKS----------
+-----------------------------------------
+END OF ROCKSDB TRANSACTION MONITOR OUTPUT
+=========================================
+
+ROLLBACK;
+SET AUTOCOMMIT=1;
+DROP TABLE t2;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/issue255.result b/storage/rocksdb/mysql-test/rocksdb/r/issue255.result
new file mode 100644
index 00000000000..c1ce3be2276
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/issue255.result
@@ -0,0 +1,68 @@
+CREATE TABLE t1 (pk BIGINT NOT NULL PRIMARY KEY AUTO_INCREMENT);
+INSERT INTO t1 VALUES (5);
+SHOW TABLE STATUS LIKE 't1';
+Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
+t1 ROCKSDB # Fixed # # # # # # 6 NULL NULL NULL latin1_swedish_ci NULL
+INSERT INTO t1 VALUES ('538647864786478647864');
+Warnings:
+Warning 1264 Out of range value for column 'pk' at row 1
+SELECT * FROM t1;
+pk
+5
+9223372036854775807
+SHOW TABLE STATUS LIKE 't1';
+Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
+t1 ROCKSDB 10 Fixed 2 22 44 0 0 0 9223372036854775807 NULL NULL NULL latin1_swedish_ci NULL
+INSERT INTO t1 VALUES ();
+ERROR 23000: Duplicate entry '9223372036854775807' for key 'PRIMARY'
+SELECT * FROM t1;
+pk
+5
+9223372036854775807
+SHOW TABLE STATUS LIKE 't1';
+Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
+t1 ROCKSDB # Fixed # # # # # # 9223372036854775807 NULL NULL NULL latin1_swedish_ci NULL
+INSERT INTO t1 VALUES ();
+ERROR 23000: Duplicate entry '9223372036854775807' for key 'PRIMARY'
+SELECT * FROM t1;
+pk
+5
+9223372036854775807
+SHOW TABLE STATUS LIKE 't1';
+Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
+t1 ROCKSDB # Fixed # # # # # # 9223372036854775807 NULL NULL NULL latin1_swedish_ci NULL
+DROP TABLE t1;
+CREATE TABLE t1 (pk TINYINT NOT NULL PRIMARY KEY AUTO_INCREMENT);
+INSERT INTO t1 VALUES (5);
+SHOW TABLE STATUS LIKE 't1';
+Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
+t1 ROCKSDB # Fixed # # # # # # 6 NULL NULL NULL latin1_swedish_ci NULL
+INSERT INTO t1 VALUES (1000);
+Warnings:
+Warning 1264 Out of range value for column 'pk' at row 1
+SELECT * FROM t1;
+pk
+5
+127
+SHOW TABLE STATUS LIKE 't1';
+Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
+t1 ROCKSDB # Fixed # # # # # # 127 NULL NULL NULL latin1_swedish_ci NULL
+INSERT INTO t1 VALUES ();
+ERROR 23000: Duplicate entry '127' for key 'PRIMARY'
+SELECT * FROM t1;
+pk
+5
+127
+SHOW TABLE STATUS LIKE 't1';
+Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
+t1 ROCKSDB # Fixed # # # # # # 127 NULL NULL NULL latin1_swedish_ci NULL
+INSERT INTO t1 VALUES ();
+ERROR 23000: Duplicate entry '127' for key 'PRIMARY'
+SELECT * FROM t1;
+pk
+5
+127
+SHOW TABLE STATUS LIKE 't1';
+Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
+t1 ROCKSDB # Fixed # # # # # # 127 NULL NULL NULL latin1_swedish_ci NULL
+DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/issue290.result b/storage/rocksdb/mysql-test/rocksdb/r/issue290.result
new file mode 100644
index 00000000000..1a83a93bcbb
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/issue290.result
@@ -0,0 +1,28 @@
+CREATE TABLE `linktable` (
+`id1` bigint(20) unsigned NOT NULL DEFAULT '0',
+`id1_type` int(10) unsigned NOT NULL DEFAULT '0',
+`id2` bigint(20) unsigned NOT NULL DEFAULT '0',
+`id2_type` int(10) unsigned NOT NULL DEFAULT '0',
+`link_type` bigint(20) unsigned NOT NULL DEFAULT '0',
+`visibility` tinyint(3) NOT NULL DEFAULT '0',
+`data` varchar(255) NOT NULL DEFAULT '',
+`time` bigint(20) unsigned NOT NULL DEFAULT '0',
+`version` int(11) unsigned NOT NULL DEFAULT '0',
+PRIMARY KEY (link_type, `id1`,`id2`) COMMENT 'cf_link_pk',
+KEY `id1_type` (`id1`,`link_type`,`visibility`,`time`,`id2`,`version`,`data`) COMMENT 'rev:cf_link_id1_type'
+) ENGINE=RocksDB DEFAULT COLLATE=latin1_bin;
+set global rocksdb_force_flush_memtable_now=1;
+insert into linktable (id1, link_type, id2) values (2, 1, 1);
+insert into linktable (id1, link_type, id2) values (2, 1, 2);
+insert into linktable (id1, link_type, id2) values (2, 1, 3);
+insert into linktable (id1, link_type, id2) values (2, 1, 4);
+insert into linktable (id1, link_type, id2) values (2, 1, 5);
+insert into linktable (id1, link_type, id2) values (2, 1, 6);
+insert into linktable (id1, link_type, id2) values (2, 1, 7);
+insert into linktable (id1, link_type, id2) values (2, 1, 8);
+insert into linktable (id1, link_type, id2) values (2, 1, 9);
+insert into linktable (id1, link_type, id2) values (2, 1, 10);
+explain select id1, id2, link_type, data from linktable force index(primary) where id1=2 and link_type=1 and (id2=1 or id2=2 or id2=3 or id2=4 or id2=5);
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE linktable range PRIMARY PRIMARY 24 NULL # Using where
+drop table linktable;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/issue314.result b/storage/rocksdb/mysql-test/rocksdb/r/issue314.result
new file mode 100644
index 00000000000..eee90800286
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/issue314.result
@@ -0,0 +1,12 @@
+drop table if exists t1;
+SET SESSION TRANSACTION ISOLATION LEVEL SERIALIZABLE;
+CREATE TABLE t1(a int);
+SET TRANSACTION ISOLATION LEVEL READ COMMITTED;
+INSERT INTO t1 VALUES(1);
+select * from t1;
+ERROR HY000: MyRocks supports only READ COMMITTED and REPEATABLE READ isolation levels. Please change from current isolation level SERIALIZABLE
+SET SESSION TRANSACTION ISOLATION LEVEL READ COMMITTED;
+select * from t1;
+a
+1
+drop table t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/issue495.result b/storage/rocksdb/mysql-test/rocksdb/r/issue495.result
new file mode 100644
index 00000000000..c7ac34c6294
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/issue495.result
@@ -0,0 +1,30 @@
+drop table if exists t;
+create table t (
+a int,
+b int,
+c varchar(12249) collate latin1_bin,
+d datetime,
+e int,
+f int,
+g blob,
+h int,
+i int,
+key (b,e),
+key (h,b)
+) engine=rocksdb
+partition by linear hash (i) partitions 8 ;
+insert into t values (1,1,'a',now(),1,1,'a',1,1);
+insert into t values (1,1,'a',now(),1,1,'a',1,1);
+insert into t values (1,1,'a',now(),1,1,'a',1,1);
+insert into t values (1,1,'a',now(),1,1,'a',1,1);
+insert into t values (1,1,'a',now(),1,1,'a',1,1);
+insert into t values (1,1,'a',now(),1,1,'a',1,1);
+insert into t values (1,1,'a',now(),1,1,'a',1,1);
+insert into t values (1,1,'a',now(),1,1,'a',1,1);
+select i from t group by h;
+i
+1
+select i from t group by h;
+i
+1
+drop table t;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/issue884.result b/storage/rocksdb/mysql-test/rocksdb/r/issue884.result
new file mode 100644
index 00000000000..acfaca96d68
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/issue884.result
@@ -0,0 +1,79 @@
+create table test (
+a bigint(20) not null,
+b bigint(20) not null,
+c varchar(500) not null,
+d bigint(20) not null,
+e bigint(20) not null,
+f varchar(500) not null,
+g varchar(500) not null,
+h varchar(500) not null,
+i varchar(1000) not null,
+j varchar(16384) not null,
+k varchar(200) not null,
+l varchar(500) not null,
+m varchar(100) not null,
+n bigint(20) not null,
+primary key (a, b, m, c(100), l(100), d, e, f(100), g(100), h(100), n),
+key n (n),
+key d (d, a)
+) engine = rocksdb default charset = latin1;
+Table Op Msg_type Msg_text
+test.test analyze status OK
+explain
+select * from test where d = 10 and a = 10 and b = 2;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE test index_merge PRIMARY,d d,PRIMARY 24,16 NULL # Using intersect(d,PRIMARY); Using where
+select * from test where d = 10 and a = 10 and b = 2;
+a b c d e f g h i j k l m n
+10 2 i 10 950 f g h i j k l m 950
+10 2 i 10 951 f g h i j k l m 951
+10 2 i 10 952 f g h i j k l m 952
+10 2 i 10 953 f g h i j k l m 953
+10 2 i 10 954 f g h i j k l m 954
+10 2 i 10 955 f g h i j k l m 955
+10 2 i 10 956 f g h i j k l m 956
+10 2 i 10 957 f g h i j k l m 957
+10 2 i 10 958 f g h i j k l m 958
+10 2 i 10 959 f g h i j k l m 959
+10 2 i 10 960 f g h i j k l m 960
+10 2 i 10 961 f g h i j k l m 961
+10 2 i 10 962 f g h i j k l m 962
+10 2 i 10 963 f g h i j k l m 963
+10 2 i 10 964 f g h i j k l m 964
+10 2 i 10 965 f g h i j k l m 965
+10 2 i 10 966 f g h i j k l m 966
+10 2 i 10 967 f g h i j k l m 967
+10 2 i 10 968 f g h i j k l m 968
+10 2 i 10 969 f g h i j k l m 969
+10 2 i 10 970 f g h i j k l m 970
+10 2 i 10 971 f g h i j k l m 971
+10 2 i 10 972 f g h i j k l m 972
+10 2 i 10 973 f g h i j k l m 973
+10 2 i 10 974 f g h i j k l m 974
+10 2 i 10 975 f g h i j k l m 975
+10 2 i 10 976 f g h i j k l m 976
+10 2 i 10 977 f g h i j k l m 977
+10 2 i 10 978 f g h i j k l m 978
+10 2 i 10 979 f g h i j k l m 979
+10 2 i 10 980 f g h i j k l m 980
+10 2 i 10 981 f g h i j k l m 981
+10 2 i 10 982 f g h i j k l m 982
+10 2 i 10 983 f g h i j k l m 983
+10 2 i 10 984 f g h i j k l m 984
+10 2 i 10 985 f g h i j k l m 985
+10 2 i 10 986 f g h i j k l m 986
+10 2 i 10 987 f g h i j k l m 987
+10 2 i 10 988 f g h i j k l m 988
+10 2 i 10 989 f g h i j k l m 989
+10 2 i 10 990 f g h i j k l m 990
+10 2 i 10 991 f g h i j k l m 991
+10 2 i 10 992 f g h i j k l m 992
+10 2 i 10 993 f g h i j k l m 993
+10 2 i 10 994 f g h i j k l m 994
+10 2 i 10 995 f g h i j k l m 995
+10 2 i 10 996 f g h i j k l m 996
+10 2 i 10 997 f g h i j k l m 997
+10 2 i 10 998 f g h i j k l m 998
+10 2 i 10 999 f g h i j k l m 999
+10 2 i 10 1000 f g h i j k l m 1000
+drop table test;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/issue896.result b/storage/rocksdb/mysql-test/rocksdb/r/issue896.result
new file mode 100644
index 00000000000..917c95733f7
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/issue896.result
@@ -0,0 +1,17 @@
+CREATE TABLE `t1` (
+`a` bigint(20) NOT NULL,
+`b` varchar(10) CHARACTER SET utf8 COLLATE utf8_bin NOT NULL,
+`u` bigint(20) unsigned NOT NULL,
+`d` bigint(20) DEFAULT NULL,
+PRIMARY KEY (`a`,`b`),
+KEY `d` (`d`)
+) ENGINE=ROCKSDB DEFAULT CHARSET=utf8 COLLATE=utf8_bin COMMENT='ttl_duration=1000;ttl_col=u';
+INSERT INTO t1 VALUES (100, 'aaabbb', UNIX_TIMESTAMP(), 200);
+EXPLAIN SELECT COUNT(*) FROM t1 FORCE INDEX(d);
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 index NULL d 11 NULL # Using index
+# segfault here without the fix
+SELECT COUNT(*) FROM t1 FORCE INDEX(d);
+COUNT(*)
+1
+DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/issue900.result b/storage/rocksdb/mysql-test/rocksdb/r/issue900.result
new file mode 100644
index 00000000000..062d0da0864
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/issue900.result
@@ -0,0 +1,11 @@
+CREATE TABLE t1(c1 VARCHAR(1) CHARACTER SET 'utf8' COLLATE 'utf8_bin', c2 YEAR, c3 REAL(1,0) UNSIGNED, PRIMARY KEY(c1)) ENGINE=RocksDB;
+INSERT INTO t1 VALUES(0,'0','0');
+INSERT INTO t1 VALUES('{0}','0','0');
+Warnings:
+Warning 1265 Data truncated for column 'c1' at row 1
+INSERT INTO t1 VALUES('1','0','1');
+ALTER TABLE t1 ADD INDEX(c3), ADD UNIQUE (c3);
+ERROR 23000: Duplicate entry '0' for key 'c3_2'
+SELECT c3 FROM t1 FORCE INDEX(c3) ORDER BY c3;
+ERROR 42000: Key 'c3' doesn't exist in table 't1'
+DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/iterator_bounds.result b/storage/rocksdb/mysql-test/rocksdb/r/iterator_bounds.result
new file mode 100644
index 00000000000..600f19e0d61
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/iterator_bounds.result
@@ -0,0 +1,15 @@
+create table t (i int primary key) engine=rocksdb;
+drop table t;
+create table t (i int primary key, j int, key(j) comment 'rev:bf5_2') engine=rocksdb;
+select RIGHT(HEX(index_number), 2) from information_schema.rocksdb_ddl where table_name = 't';
+RIGHT(HEX(index_number), 2)
+FE
+FF
+insert into t values (1, 1);
+select j from t order by j asc;
+j
+1
+select j from t order by j desc;
+j
+1
+drop table t;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/kill.result b/storage/rocksdb/mysql-test/rocksdb/r/kill.result
new file mode 100644
index 00000000000..19614e1fd95
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/kill.result
@@ -0,0 +1,6 @@
+CREATE TABLE t1 (pk INT PRIMARY KEY) ENGINE=RocksDB;
+LOCK TABLE t1 WRITE;
+SET SESSION TRANSACTION ISOLATION LEVEL SERIALIZABLE;
+FLUSH TABLES;
+ERROR HY000: MyRocks supports only READ COMMITTED and REPEATABLE READ isolation levels. Please change from current isolation level SERIALIZABLE
+DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/level_read_committed.result b/storage/rocksdb/mysql-test/rocksdb/r/level_read_committed.result
new file mode 100644
index 00000000000..b68b37cf6c5
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/level_read_committed.result
@@ -0,0 +1,111 @@
+DROP TABLE IF EXISTS t1;
+connect con1,localhost,root,,;
+SET SESSION TRANSACTION ISOLATION LEVEL READ COMMITTED;
+connect con2,localhost,root,,;
+SET SESSION TRANSACTION ISOLATION LEVEL READ COMMITTED;
+connection con1;
+CREATE TABLE t1 (a INT, pk INT AUTO_INCREMENT PRIMARY KEY) ENGINE=rocksdb;
+START TRANSACTION;
+SELECT a FROM t1;
+a
+connection con2;
+BEGIN;
+INSERT INTO t1 (a) VALUES(1);
+connection con1;
+SELECT a FROM t1;
+a
+connection con2;
+INSERT INTO t1 (a) VALUES (2);
+connection con1;
+SELECT a FROM t1;
+a
+INSERT INTO t1 (a) SELECT a+100 FROM t1;
+SELECT a FROM t1;
+a
+connection con2;
+SELECT a FROM t1;
+a
+1
+2
+COMMIT;
+SELECT a FROM t1;
+a
+1
+2
+connection con1;
+SELECT a FROM t1;
+a
+1
+2
+INSERT INTO t1 (a) SELECT a+200 FROM t1;
+SELECT a FROM t1;
+a
+1
+2
+201
+202
+COMMIT;
+SELECT a FROM t1;
+a
+1
+2
+201
+202
+connection con2;
+SELECT a FROM t1;
+a
+1
+2
+201
+202
+connection default;
+CREATE TABLE t2 (a INT PRIMARY KEY) ENGINE=rocksdb;
+INSERT INTO t2 (a) VALUES (1);
+COMMIT;
+connection con1;
+BEGIN;
+SELECT a from t2;
+a
+1
+INSERT INTO t2 (a) VALUES (1), (3);
+ERROR 23000: Duplicate entry '1' for key 'PRIMARY'
+connection con2;
+INSERT INTO t2 (a) VALUES (2);
+COMMIT;
+connection con1;
+SELECT a from t2;
+a
+1
+2
+COMMIT;
+connection default;
+disconnect con1;
+disconnect con2;
+DROP TABLE t1;
+DROP TABLE t2;
+CREATE TABLE t3 (
+pk int unsigned PRIMARY KEY,
+count int unsigned DEFAULT '0'
+) ENGINE=ROCKSDB;
+connect con1,localhost,root,,;
+SET SESSION TRANSACTION ISOLATION LEVEL READ COMMITTED;
+connect con2,localhost,root,,;
+SET SESSION TRANSACTION ISOLATION LEVEL READ COMMITTED;
+connection con1;
+BEGIN;
+SELECT * FROM t3;
+pk count
+connection con2;
+BEGIN;
+INSERT INTO t3 (pk) VALUES(1) ON DUPLICATE KEY UPDATE count=count+1;
+COMMIT;
+connection con1;
+INSERT INTO t3 (pk) VALUES(1) ON DUPLICATE KEY UPDATE count=count+1;
+COMMIT;
+SELECT count FROM t3;
+count
+1
+connection default;
+disconnect con1;
+disconnect con2;
+DROP TABLE t3;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/level_read_uncommitted.result b/storage/rocksdb/mysql-test/rocksdb/r/level_read_uncommitted.result
new file mode 100644
index 00000000000..68fbe5632cb
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/level_read_uncommitted.result
@@ -0,0 +1,116 @@
+DROP TABLE IF EXISTS t1;
+connect con1,localhost,root,,;
+SET SESSION TRANSACTION ISOLATION LEVEL READ UNCOMMITTED;
+connect con2,localhost,root,,;
+SET SESSION TRANSACTION ISOLATION LEVEL READ UNCOMMITTED;
+connection con1;
+CREATE TABLE t1 (a <INT_COLUMN>) ENGINE=<STORAGE_ENGINE> <CUSTOM_TABLE_OPTIONS>;
+START TRANSACTION;
+SELECT a FROM t1;
+a
+connection con2;
+BEGIN;
+INSERT INTO t1 (a) VALUES(1);
+# WARNING: Statement ended with errno 0, errname ''.
+# If it differs from the result file, it might indicate a problem.
+connection con1;
+SELECT a FROM t1;
+a
+1
+connection con2;
+INSERT INTO t1 (a) VALUES (2);
+# WARNING: Statement ended with errno 0, errname ''.
+# If it differs from the result file, it might indicate a problem.
+connection con1;
+SELECT a FROM t1;
+a
+1
+2
+INSERT INTO t1 (a) SELECT a+100 FROM t1;
+# WARNING: Statement ended with errno 0, errname ''.
+# If it differs from the result file, it might indicate a problem.
+SELECT a FROM t1;
+a
+1
+101
+102
+2
+connection con2;
+SELECT a FROM t1;
+a
+1
+101
+102
+2
+COMMIT;
+SELECT a FROM t1;
+a
+1
+101
+102
+2
+connection con1;
+SELECT a FROM t1;
+a
+1
+101
+102
+2
+INSERT INTO t1 (a) SELECT a+200 FROM t1;
+# WARNING: Statement ended with errno 0, errname ''.
+# If it differs from the result file, it might indicate a problem.
+SELECT a FROM t1;
+a
+1
+101
+102
+2
+201
+202
+301
+302
+COMMIT;
+SELECT a FROM t1;
+a
+1
+101
+102
+2
+201
+202
+301
+302
+connection con2;
+SELECT a FROM t1;
+a
+1
+101
+102
+2
+201
+202
+301
+302
+connection default;
+disconnect con1;
+disconnect con2;
+DROP TABLE t1;
+DROP TABLE IF EXISTS t1;
+connect con1,localhost,root,,;
+connect con2,localhost,root,,;
+connection con1;
+CREATE TABLE t1 (a <INT_COLUMN>) ENGINE=<STORAGE_ENGINE> <CUSTOM_TABLE_OPTIONS>;
+SET SESSION TRANSACTION ISOLATION LEVEL READ UNCOMMITTED;
+START TRANSACTION WITH CONSISTENT SNAPSHOT;
+connection con2;
+INSERT INTO t1 (a) VALUES (1);
+connection con1;
+# If consistent read works on this isolation level (READ UNCOMMITTED), the following SELECT should not return the value we inserted (1)
+SELECT a FROM t1;
+a
+1
+COMMIT;
+connection default;
+disconnect con1;
+disconnect con2;
+DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/level_repeatable_read.result b/storage/rocksdb/mysql-test/rocksdb/r/level_repeatable_read.result
new file mode 100644
index 00000000000..13da8a0ffeb
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/level_repeatable_read.result
@@ -0,0 +1,100 @@
+DROP TABLE IF EXISTS t1;
+connect con1,localhost,root,,;
+SET SESSION TRANSACTION ISOLATION LEVEL REPEATABLE READ;
+connect con2,localhost,root,,;
+SET SESSION TRANSACTION ISOLATION LEVEL REPEATABLE READ;
+connection con1;
+CREATE TABLE t1 (a INT, pk INT AUTO_INCREMENT PRIMARY KEY) ENGINE=rocksdb;
+START TRANSACTION;
+SELECT a FROM t1;
+a
+connection con2;
+BEGIN;
+INSERT INTO t1 (a) VALUES(1);
+connection con1;
+SELECT a FROM t1;
+a
+connection con2;
+INSERT INTO t1 (a) VALUES (2);
+connection con1;
+SELECT a FROM t1;
+a
+INSERT INTO t1 (a) SELECT a+100 FROM t1;
+SELECT a FROM t1;
+a
+connection con2;
+SELECT a FROM t1;
+a
+1
+2
+COMMIT;
+SELECT a FROM t1;
+a
+1
+2
+connection con1;
+SELECT a FROM t1;
+a
+INSERT INTO t1 (a) SELECT a+200 FROM t1;
+SELECT a FROM t1;
+a
+COMMIT;
+SELECT a FROM t1;
+a
+1
+2
+connection con2;
+SELECT a FROM t1;
+a
+1
+2
+connection default;
+CREATE TABLE t2 (a INT PRIMARY KEY) ENGINE=rocksdb;
+INSERT INTO t2 (a) VALUES (1);
+COMMIT;
+connection con1;
+BEGIN;
+SELECT a from t2;
+a
+1
+INSERT INTO t2 (a) VALUES (1), (3);
+ERROR 23000: Duplicate entry '1' for key 'PRIMARY'
+connection con2;
+INSERT INTO t2 (a) VALUES (2);
+COMMIT;
+connection con1;
+SELECT a from t2;
+a
+1
+COMMIT;
+connection default;
+disconnect con1;
+disconnect con2;
+DROP TABLE t1;
+DROP TABLE t2;
+CREATE TABLE t3 (
+pk int unsigned PRIMARY KEY,
+count int unsigned DEFAULT '0'
+) ENGINE=ROCKSDB;
+connect con1,localhost,root,,;
+SET SESSION TRANSACTION ISOLATION LEVEL REPEATABLE READ;
+connect con2,localhost,root,,;
+SET SESSION TRANSACTION ISOLATION LEVEL REPEATABLE READ;
+connection con1;
+BEGIN;
+SELECT * FROM t3;
+pk count
+connection con2;
+BEGIN;
+INSERT INTO t3 (pk) VALUES(1) ON DUPLICATE KEY UPDATE count=count+1;
+COMMIT;
+connection con1;
+INSERT INTO t3 (pk) VALUES(1) ON DUPLICATE KEY UPDATE count=count+1;
+COMMIT;
+SELECT count FROM t3;
+count
+0
+connection default;
+disconnect con1;
+disconnect con2;
+DROP TABLE t3;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/level_serializable.result b/storage/rocksdb/mysql-test/rocksdb/r/level_serializable.result
new file mode 100644
index 00000000000..3f57395fa37
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/level_serializable.result
@@ -0,0 +1,56 @@
+DROP TABLE IF EXISTS t1;
+connect con1,localhost,root,,;
+SET SESSION TRANSACTION ISOLATION LEVEL SERIALIZABLE;
+connect con2,localhost,root,,;
+SET SESSION TRANSACTION ISOLATION LEVEL SERIALIZABLE;
+connection con1;
+CREATE TABLE t1 (a <INT_COLUMN>) ENGINE=<STORAGE_ENGINE> <CUSTOM_TABLE_OPTIONS>;
+START TRANSACTION;
+SELECT a FROM t1;
+a
+connection con2;
+BEGIN;
+INSERT INTO t1 (a) VALUES(1);
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction
+# WARNING: Statement ended with errno 1205, errname 'ER_LOCK_WAIT_TIMEOUT'.
+# If it differs from the result file, it might indicate a problem.
+connection con1;
+SELECT a FROM t1;
+a
+connection con2;
+INSERT INTO t1 (a) VALUES (2);
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction
+# WARNING: Statement ended with errno 1205, errname 'ER_LOCK_WAIT_TIMEOUT'.
+# If it differs from the result file, it might indicate a problem.
+connection con1;
+SELECT a FROM t1;
+a
+INSERT INTO t1 (a) SELECT a+100 FROM t1;
+# WARNING: Statement ended with errno 0, errname ''.
+# If it differs from the result file, it might indicate a problem.
+SELECT a FROM t1;
+a
+connection con2;
+SELECT a FROM t1;
+a
+COMMIT;
+SELECT a FROM t1;
+a
+connection con1;
+SELECT a FROM t1;
+a
+INSERT INTO t1 (a) SELECT a+200 FROM t1;
+# WARNING: Statement ended with errno 0, errname ''.
+# If it differs from the result file, it might indicate a problem.
+SELECT a FROM t1;
+a
+COMMIT;
+SELECT a FROM t1;
+a
+connection con2;
+SELECT a FROM t1;
+a
+connection default;
+disconnect con1;
+disconnect con2;
+DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/loaddata.result b/storage/rocksdb/mysql-test/rocksdb/r/loaddata.result
new file mode 100644
index 00000000000..a9f9c0b49e8
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/loaddata.result
@@ -0,0 +1,239 @@
+DROP TABLE IF EXISTS t1;
+CREATE TABLE t1 (a INT, b CHAR(8), pk INT AUTO_INCREMENT PRIMARY KEY) ENGINE=rocksdb;
+LOAD DATA INFILE '<DATADIR>/se_loaddata.dat' INTO TABLE t1
+FIELDS TERMINATED BY ',' (a,b);
+SELECT a,b FROM t1;
+a b
+1 foo
+2 bar
+3
+4 abc
+LOAD DATA LOCAL INFILE '<DATADIR>/se_loaddata.dat' INTO TABLE t1
+CHARACTER SET utf8 COLUMNS TERMINATED BY ','
+ ESCAPED BY '/' (a,b);
+SELECT a,b FROM t1;
+a b
+1 foo
+1 foo
+2 bar
+2 bar
+3
+3
+4 abc
+4 abc
+LOAD DATA LOCAL INFILE '<DATADIR>/se_loaddata.dat' INTO TABLE t1
+FIELDS TERMINATED BY ';'
+ (a) SET b='loaded';
+Warnings:
+Warning 1262 Row 1 was truncated; it contained more data than there were input columns
+Warning 1262 Row 2 was truncated; it contained more data than there were input columns
+Warning 1262 Row 3 was truncated; it contained more data than there were input columns
+SELECT a,b FROM t1;
+a b
+0 loaded
+1 foo
+1 foo
+102 loaded
+2 bar
+2 bar
+3
+3
+4 abc
+4 abc
+5 loaded
+LOAD DATA INFILE '<DATADIR>/se_loaddata.dat' INTO TABLE t1
+FIELDS TERMINATED BY ';'
+ OPTIONALLY ENCLOSED BY ''''
+ LINES STARTING BY 'prefix:'
+IGNORE 2 LINES (a,b);
+Warnings:
+Warning 1262 Row 2 was truncated; it contained more data than there were input columns
+SELECT a,b FROM t1;
+a b
+0
+0 loaded
+1 foo
+1 foo
+100 foo
+102 loaded
+2 bar
+2 bar
+3
+3
+4 abc
+4 abc
+5 loaded
+7 test
+LOAD DATA INFILE '<DATADIR>/se_loaddata.dat' INTO TABLE t1;
+Warnings:
+Warning 1261 Row 1 doesn't contain data for all columns
+Warning 1261 Row 2 doesn't contain data for all columns
+Warning 1261 Row 3 doesn't contain data for all columns
+Warning 1261 Row 4 doesn't contain data for all columns
+SELECT a,b FROM t1;
+a b
+0
+0 loaded
+1 foo
+1 foo
+1 foo
+100 foo
+102 loaded
+2 bar
+2 bar
+2 bar
+3
+3
+3
+4 abc
+4 abc
+4 abc
+5 loaded
+7 test
+LOAD DATA INFILE '<DATADIR>/se_replacedata.dat' REPLACE INTO TABLE t1;
+Warnings:
+Warning 1261 Row 1 doesn't contain data for all columns
+Warning 1261 Row 2 doesn't contain data for all columns
+Warning 1261 Row 3 doesn't contain data for all columns
+Warning 1261 Row 4 doesn't contain data for all columns
+SELECT a,b FROM t1;
+a b
+0
+0 loaded
+1 aaa
+1 foo
+1 foo
+1 foo
+100 foo
+102 loaded
+2 bar
+2 bar
+2 bar
+2 bbb
+3
+3
+3
+3 ccc
+4 abc
+4 abc
+4 abc
+4 ddd
+5 loaded
+7 test
+DROP TABLE t1;
+set session unique_checks=0;
+DROP TABLE IF EXISTS t1;
+CREATE TABLE t1 (a INT, b CHAR(8), pk INT AUTO_INCREMENT PRIMARY KEY) ENGINE=rocksdb;
+LOAD DATA INFILE '<DATADIR>/se_loaddata.dat' INTO TABLE t1
+FIELDS TERMINATED BY ',' (a,b);
+SELECT a,b FROM t1;
+a b
+1 foo
+2 bar
+3
+4 abc
+LOAD DATA LOCAL INFILE '<DATADIR>/se_loaddata.dat' INTO TABLE t1
+CHARACTER SET utf8 COLUMNS TERMINATED BY ','
+ ESCAPED BY '/' (a,b);
+SELECT a,b FROM t1;
+a b
+1 foo
+1 foo
+2 bar
+2 bar
+3
+3
+4 abc
+4 abc
+LOAD DATA LOCAL INFILE '<DATADIR>/se_loaddata.dat' INTO TABLE t1
+FIELDS TERMINATED BY ';'
+ (a) SET b='loaded';
+Warnings:
+Warning 1262 Row 1 was truncated; it contained more data than there were input columns
+Warning 1262 Row 2 was truncated; it contained more data than there were input columns
+Warning 1262 Row 3 was truncated; it contained more data than there were input columns
+SELECT a,b FROM t1;
+a b
+0 loaded
+1 foo
+1 foo
+102 loaded
+2 bar
+2 bar
+3
+3
+4 abc
+4 abc
+5 loaded
+LOAD DATA INFILE '<DATADIR>/se_loaddata.dat' INTO TABLE t1
+FIELDS TERMINATED BY ';'
+ OPTIONALLY ENCLOSED BY ''''
+ LINES STARTING BY 'prefix:'
+IGNORE 2 LINES (a,b);
+Warnings:
+Warning 1262 Row 2 was truncated; it contained more data than there were input columns
+SELECT a,b FROM t1;
+a b
+0
+0 loaded
+1 foo
+1 foo
+100 foo
+102 loaded
+2 bar
+2 bar
+3
+3
+4 abc
+4 abc
+5 loaded
+7 test
+LOAD DATA INFILE '<DATADIR>/se_loaddata.dat' INTO TABLE t1;
+Warnings:
+Warning 1261 Row 1 doesn't contain data for all columns
+Warning 1261 Row 2 doesn't contain data for all columns
+Warning 1261 Row 3 doesn't contain data for all columns
+Warning 1261 Row 4 doesn't contain data for all columns
+SELECT a,b FROM t1;
+a b
+0
+0 loaded
+1 foo
+1 foo
+1 foo
+100 foo
+102 loaded
+2 bar
+2 bar
+2 bar
+3
+3
+3
+4 abc
+4 abc
+4 abc
+5 loaded
+7 test
+LOAD DATA INFILE '<DATADIR>/se_replacedata.dat' REPLACE INTO TABLE t1;
+ERROR HY000: When unique checking is disabled in MyRocks, INSERT,UPDATE,LOAD statements with clauses that update or replace the key (i.e. INSERT ON DUPLICATE KEY UPDATE, REPLACE) are not allowed. Query: LOAD DATA INFILE '<DATADIR>/se_replacedata.dat' REPLACE INTO TABLE t1
+SELECT a,b FROM t1;
+a b
+0
+0 loaded
+1 foo
+1 foo
+1 foo
+100 foo
+102 loaded
+2 bar
+2 bar
+2 bar
+3
+3
+3
+4 abc
+4 abc
+4 abc
+5 loaded
+7 test
+DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/lock.result b/storage/rocksdb/mysql-test/rocksdb/r/lock.result
new file mode 100644
index 00000000000..7c4f54778d0
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/lock.result
@@ -0,0 +1,121 @@
+connect con1,localhost,root,,;
+SET lock_wait_timeout=1;
+connection default;
+DROP TABLE IF EXISTS t1, t2, t3;
+CREATE TABLE t1 (id INT, id2 INT, pk INT AUTO_INCREMENT PRIMARY KEY) ENGINE=rocksdb;
+INSERT INTO t1 (id,id2) VALUES (1,1),(1,2),(1,3);
+LOCK TABLE t1 LOW_PRIORITY WRITE;
+SELECT id2,COUNT(DISTINCT id) FROM t1 GROUP BY id2;
+id2 COUNT(DISTINCT id)
+1 1
+2 1
+3 1
+UPDATE t1 SET id=-1 WHERE id=1;
+connection con1;
+SELECT id,id2 FROM t1;
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction
+LOCK TABLE t1 READ;
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction
+connection default;
+LOCK TABLE t1 READ;
+UPDATE t1 SET id=1 WHERE id=1;
+ERROR HY000: Table 't1' was locked with a READ lock and can't be updated
+connection con1;
+SELECT COUNT(DISTINCT id) FROM t1;
+COUNT(DISTINCT id)
+1
+UPDATE t1 SET id=2 WHERE id=2;
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction
+LOCK TABLE t1 WRITE;
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction
+LOCK TABLE t1 READ;
+UNLOCK TABLES;
+connection default;
+CREATE TABLE t2 (a INT, b CHAR(8), PRIMARY KEY(a)) ENGINE=rocksdb;
+ERROR HY000: Table 't2' was not locked with LOCK TABLES
+UNLOCK TABLES;
+CREATE TABLE t2 (id INT, id2 INT, pk INT AUTO_INCREMENT PRIMARY KEY) ENGINE=rocksdb;
+LOCK TABLE t1 WRITE, t2 WRITE;
+INSERT INTO t2 (id,id2) SELECT id,id2 FROM t1;
+UPDATE t1 SET id=1 WHERE id=-1;
+DROP TABLE t1,t2;
+CREATE TABLE t1 (i1 INT, nr INT, pk INT AUTO_INCREMENT PRIMARY KEY) ENGINE=rocksdb;
+CREATE TABLE t2 (nr INT, nm INT, pk INT AUTO_INCREMENT PRIMARY KEY) ENGINE=rocksdb;
+INSERT INTO t2 (nr,nm) VALUES (1,3);
+INSERT INTO t2 (nr,nm) VALUES (2,4);
+LOCK TABLES t1 WRITE, t2 READ;
+INSERT INTO t1 (i1,nr) SELECT 1, nr FROM t2 WHERE nm=3;
+INSERT INTO t1 (i1,nr) SELECT 2, nr FROM t2 WHERE nm=4;
+UNLOCK TABLES;
+LOCK TABLES t1 WRITE;
+INSERT INTO t1 (i1,nr) SELECT i1, nr FROM t1;
+ERROR HY000: Table 't1' was not locked with LOCK TABLES
+UNLOCK TABLES;
+LOCK TABLES t1 WRITE, t1 AS t1_alias READ;
+INSERT INTO t1 (i1,nr) SELECT i1, nr FROM t1 AS t1_alias;
+DROP TABLE t1,t2;
+ERROR HY000: Table 't2' was not locked with LOCK TABLES
+UNLOCK TABLES;
+DROP TABLE t1,t2;
+CREATE TABLE t1 (a INT, b CHAR(8), PRIMARY KEY(a)) ENGINE=rocksdb;
+CREATE TABLE t2 (a INT, b CHAR(8), PRIMARY KEY(b)) ENGINE=rocksdb;
+CREATE TABLE t3 (a INT, b CHAR(8), pk INT PRIMARY KEY) ENGINE=rocksdb;
+LOCK TABLES t1 WRITE, t2 WRITE, t3 WRITE;
+DROP TABLE t2, t3, t1;
+CREATE TABLE t1 (a INT, b CHAR(8), PRIMARY KEY(a)) ENGINE=rocksdb;
+CREATE TABLE t2 (a INT, b CHAR(8), PRIMARY KEY(b)) ENGINE=rocksdb;
+CREATE TABLE t3 (a INT, b CHAR(8), pk INT PRIMARY KEY) ENGINE=rocksdb;
+LOCK TABLES t1 WRITE, t2 WRITE, t3 WRITE, t1 AS t4 READ;
+ALTER TABLE t2 ADD COLUMN c2 INT;
+DROP TABLE t1, t2, t3;
+CREATE TABLE t1 (a INT, b CHAR(8), PRIMARY KEY(a)) ENGINE=rocksdb;
+CREATE TABLE t2 (a INT, b CHAR(8), PRIMARY KEY(b)) ENGINE=rocksdb;
+LOCK TABLE t1 READ, t2 READ;
+FLUSH TABLE t1;
+ERROR HY000: Table 't1' was locked with a READ lock and can't be updated
+FLUSH TABLES;
+ERROR HY000: Table 't2' was locked with a READ lock and can't be updated
+FLUSH TABLES t1, t2 WITH READ LOCK;
+ERROR HY000: Can't execute the given command because you have active locked tables or an active transaction
+UNLOCK TABLES;
+FLUSH TABLES t1, t2 WITH READ LOCK;
+connection con1;
+INSERT INTO t1 (a,b) VALUES (1,'a'),(2,'b');
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction
+connection default;
+UNLOCK TABLES;
+FLUSH TABLES WITH READ LOCK;
+connection con1;
+INSERT INTO t1 (a,b) VALUES (1,'a'),(2,'b');
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction
+connection default;
+UNLOCK TABLES;
+INSERT INTO t1 (a,b) VALUES (1,'a'),(2,'b');
+FLUSH TABLES WITH READ LOCK;
+DROP TABLE t1, t2;
+ERROR HY000: Can't execute the query because you have a conflicting read lock
+UNLOCK TABLES;
+DROP TABLE t1, t2;
+disconnect con1;
+CREATE TABLE t1 (a INT, b CHAR(8), PRIMARY KEY(a)) ENGINE=rocksdb;
+CREATE TABLE t2 (a INT, b CHAR(8), PRIMARY KEY(b)) ENGINE=rocksdb;
+LOCK TABLE t1 WRITE, t2 WRITE;
+SELECT a,b FROM t1;
+a b
+UNLOCK TABLES;
+DROP TABLE t1, t2;
+CREATE TABLE t1 (i INT) ENGINE=MyISAM;
+HANDLER t1 OPEN h;
+CREATE TABLE t2 (i INT) ENGINE=RocksDB;
+LOCK TABLES t2 WRITE;
+connect con1,localhost,root,,test;
+connection con1;
+FLUSH TABLES WITH READ LOCK;
+connection default;
+INSERT INTO t2 VALUES (1);
+UNLOCK TABLES;
+HANDLER h CLOSE;
+connection con1;
+disconnect con1;
+connection default;
+DROP TABLE t1, t2;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/lock_info.result b/storage/rocksdb/mysql-test/rocksdb/r/lock_info.result
new file mode 100644
index 00000000000..d0f1221e472
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/lock_info.result
@@ -0,0 +1,31 @@
+DROP TABLE IF EXISTS t1;
+DROP TABLE IF EXISTS t2;
+create table t1 (a int, primary key(a) comment 'lock_into_cf1') engine=rocksdb;
+insert into t1 values (1);
+insert into t1 values (2);
+create table t2 (a int, primary key(a) comment 'lock_info_cf2') engine=rocksdb;
+insert into t2 values (1);
+insert into t2 values (2);
+set autocommit=0;
+select * from t1 for update;
+a
+1
+2
+select * from t2 for update;
+a
+1
+2
+use information_schema;
+select rocksdb_ddl.cf, rocksdb_locks.transaction_id, rocksdb_locks.key
+from rocksdb_locks
+left join rocksdb_ddl
+on rocksdb_locks.column_family_id=rocksdb_ddl.column_family
+order by rocksdb_ddl.cf;
+cf transaction_id key
+lock_info_cf2 _txn_id_ _key_
+lock_info_cf2 _txn_id_ _key_
+lock_into_cf1 _txn_id_ _key_
+lock_into_cf1 _txn_id_ _key_
+use test;
+DROP TABLE t1;
+DROP TABLE t2;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/lock_rows_not_exist.result b/storage/rocksdb/mysql-test/rocksdb/r/lock_rows_not_exist.result
new file mode 100644
index 00000000000..7898489d98d
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/lock_rows_not_exist.result
@@ -0,0 +1,40 @@
+DROP TABLE IF EXISTS t1;
+connect con1,localhost,root,,;
+connect con2,localhost,root,,;
+connection con1;
+CREATE TABLE t (id1 int, id2 int, id3 int, value int, PRIMARY KEY (id1, id2, id3)) ENGINE=RocksDB;
+SET SESSION TRANSACTION ISOLATION LEVEL REPEATABLE READ;
+BEGIN;
+SELECT * FROM t WHERE id1=1 AND id2=1 AND id3=1 FOR UPDATE;
+id1 id2 id3 value
+connection con2;
+SET SESSION TRANSACTION ISOLATION LEVEL REPEATABLE READ;
+BEGIN;
+SELECT * FROM t WHERE id1=1 AND id2=1 AND id3=1 FOR UPDATE;
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction
+SELECT * FROM t WHERE id1=1 AND id2=1 AND id3=2 FOR UPDATE;
+id1 id2 id3 value
+connection con1;
+ROLLBACK;
+BEGIN;
+UPDATE t SET value=value+100 WHERE id1=1 AND id2=1 AND id3=1;
+connection con2;
+ROLLBACK;
+BEGIN;
+UPDATE t SET value=value+100 WHERE id1=1 AND id2=1 AND id3=1;
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction
+UPDATE t SET value=value+100 WHERE id1=1 AND id2=0 AND id3=1;
+connection con1;
+ROLLBACK;
+BEGIN;
+DELETE FROM t WHERE id1=1 AND id2=1 AND id3=1;
+connection con2;
+ROLLBACK;
+BEGIN;
+DELETE FROM t WHERE id1=1 AND id2=1 AND id3=1;
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction
+DELETE FROM t WHERE id1=1 AND id2=1 AND id3=0;
+connection default;
+disconnect con1;
+disconnect con2;
+DROP TABLE t;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/lock_wait_timeout_stats.result b/storage/rocksdb/mysql-test/rocksdb/r/lock_wait_timeout_stats.result
new file mode 100644
index 00000000000..96efca6e2b7
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/lock_wait_timeout_stats.result
@@ -0,0 +1,35 @@
+create table t (a int primary key) engine=rocksdb;
+begin;
+insert into t values (0);
+set @@rocksdb_lock_wait_timeout=1;
+select ROW_LOCK_WAIT_TIMEOUTS from information_schema.table_statistics where table_name="t";
+ROW_LOCK_WAIT_TIMEOUTS
+0
+begin;
+set @@rocksdb_lock_wait_timeout=1;
+begin;
+select variable_value into @a from information_schema.global_status where variable_name='rocksdb_row_lock_wait_timeouts';
+insert into t values(0);
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on index: test.t.PRIMARY
+select ROW_LOCK_WAIT_TIMEOUTS from information_schema.table_statistics where table_name="t";
+ROW_LOCK_WAIT_TIMEOUTS
+1
+select ROW_LOCK_WAIT_TIMEOUTS from information_schema.table_statistics where table_name="t";
+ROW_LOCK_WAIT_TIMEOUTS
+1
+select case when variable_value-@a = 1 then 'true' else 'false' end as waits from information_schema.global_status where variable_name='rocksdb_row_lock_wait_timeouts';
+waits
+true
+select variable_value into @a from information_schema.global_status where variable_name='rocksdb_row_lock_wait_timeouts';
+insert into t values(0);
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on index: test.t.PRIMARY
+select ROW_LOCK_WAIT_TIMEOUTS from information_schema.table_statistics where table_name="t";
+ROW_LOCK_WAIT_TIMEOUTS
+2
+select ROW_LOCK_WAIT_TIMEOUTS from information_schema.table_statistics where table_name="t";
+ROW_LOCK_WAIT_TIMEOUTS
+2
+select case when variable_value-@a = 1 then 'true' else 'false' end as waits from information_schema.global_status where variable_name='rocksdb_row_lock_wait_timeouts';
+waits
+true
+drop table t;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/locking_issues.result b/storage/rocksdb/mysql-test/rocksdb/r/locking_issues.result
new file mode 100644
index 00000000000..84c93481c79
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/locking_issues.result
@@ -0,0 +1 @@
+tests moved to rocksdb.locking_issues_case*
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/locking_issues_case1_1_rc.result b/storage/rocksdb/mysql-test/rocksdb/r/locking_issues_case1_1_rc.result
new file mode 100644
index 00000000000..a47aa3c7d90
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/locking_issues_case1_1_rc.result
@@ -0,0 +1,30 @@
+
+-----------------------------------------------------------------------
+- Locking issues case 1.1:
+- Locking rows that do not exist when using all primary key columns in
+- a WHERE clause
+- using READ COMMITTED transaction isolation level
+-----------------------------------------------------------------------
+DROP TABLE IF EXISTS t0;
+CREATE TABLE t0(id1 INT, id2 INT, value INT, PRIMARY KEY(id1, id2));
+INSERT INTO t0 VALUES (1,1,0), (3,3,0), (4,4,0), (6,6,0);
+connect con1,localhost,root,,;
+connect con2,localhost,root,,;
+connection con1;
+SET SESSION TRANSACTION ISOLATION LEVEL READ COMMITTED;
+BEGIN;
+SELECT * FROM t0 WHERE id1=1 AND id2=5 FOR UPDATE;
+id1 id2 value
+connection con2;
+SET SESSION TRANSACTION ISOLATION LEVEL READ COMMITTED;
+BEGIN;
+INSERT INTO t0 VALUES (1,5,0);
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction
+SELECT * FROM t0 WHERE id1=1 AND id2=5 FOR UPDATE;
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction
+connection con1;
+COMMIT;
+connection default;
+disconnect con1;
+disconnect con2;
+DROP TABLE t0;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/locking_issues_case1_1_rr.result b/storage/rocksdb/mysql-test/rocksdb/r/locking_issues_case1_1_rr.result
new file mode 100644
index 00000000000..c923c34c98e
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/locking_issues_case1_1_rr.result
@@ -0,0 +1,30 @@
+
+-----------------------------------------------------------------------
+- Locking issues case 1.1:
+- Locking rows that do not exist when using all primary key columns in
+- a WHERE clause
+- using REPEATABLE READ transaction isolation level
+-----------------------------------------------------------------------
+DROP TABLE IF EXISTS t0;
+CREATE TABLE t0(id1 INT, id2 INT, value INT, PRIMARY KEY(id1, id2));
+INSERT INTO t0 VALUES (1,1,0), (3,3,0), (4,4,0), (6,6,0);
+connect con1,localhost,root,,;
+connect con2,localhost,root,,;
+connection con1;
+SET SESSION TRANSACTION ISOLATION LEVEL REPEATABLE READ;
+BEGIN;
+SELECT * FROM t0 WHERE id1=1 AND id2=5 FOR UPDATE;
+id1 id2 value
+connection con2;
+SET SESSION TRANSACTION ISOLATION LEVEL REPEATABLE READ;
+BEGIN;
+INSERT INTO t0 VALUES (1,5,0);
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction
+SELECT * FROM t0 WHERE id1=1 AND id2=5 FOR UPDATE;
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction
+connection con1;
+COMMIT;
+connection default;
+disconnect con1;
+disconnect con2;
+DROP TABLE t0;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/locking_issues_case1_2_rc.result b/storage/rocksdb/mysql-test/rocksdb/r/locking_issues_case1_2_rc.result
new file mode 100644
index 00000000000..01c4e7e3b2f
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/locking_issues_case1_2_rc.result
@@ -0,0 +1,30 @@
+
+-----------------------------------------------------------------------
+- Locking issues case 1.2:
+- Locking rows that do not exist without using all primary key
+- columns in a WHERE clause
+- using READ COMMITTED transaction isolation level
+-----------------------------------------------------------------------
+DROP TABLE IF EXISTS t0;
+CREATE TABLE t0(id1 INT, id2 INT, value INT, PRIMARY KEY(id1, id2));
+INSERT INTO t0 VALUES (1,1,0), (3,3,0), (4,4,0), (6,6,0);
+connect con1,localhost,root,,;
+connect con2,localhost,root,,;
+connection con1;
+SET SESSION TRANSACTION ISOLATION LEVEL READ COMMITTED;
+BEGIN;
+SELECT * FROM t0 WHERE id1=1 FOR UPDATE;
+id1 id2 value
+1 1 0
+connection con2;
+SET SESSION TRANSACTION ISOLATION LEVEL READ COMMITTED;
+BEGIN;
+SELECT * FROM t0 WHERE id1=1 AND id2=4 FOR UPDATE;
+id1 id2 value
+INSERT INTO t0 VALUES (1,5,0);
+connection con1;
+COMMIT;
+connection default;
+disconnect con1;
+disconnect con2;
+DROP TABLE t0;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/locking_issues_case1_2_rr.result b/storage/rocksdb/mysql-test/rocksdb/r/locking_issues_case1_2_rr.result
new file mode 100644
index 00000000000..798fd15b76e
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/locking_issues_case1_2_rr.result
@@ -0,0 +1,30 @@
+
+-----------------------------------------------------------------------
+- Locking issues case 1.2:
+- Locking rows that do not exist without using all primary key
+- columns in a WHERE clause
+- using REPEATABLE READ transaction isolation level
+-----------------------------------------------------------------------
+DROP TABLE IF EXISTS t0;
+CREATE TABLE t0(id1 INT, id2 INT, value INT, PRIMARY KEY(id1, id2));
+INSERT INTO t0 VALUES (1,1,0), (3,3,0), (4,4,0), (6,6,0);
+connect con1,localhost,root,,;
+connect con2,localhost,root,,;
+connection con1;
+SET SESSION TRANSACTION ISOLATION LEVEL REPEATABLE READ;
+BEGIN;
+SELECT * FROM t0 WHERE id1=1 FOR UPDATE;
+id1 id2 value
+1 1 0
+connection con2;
+SET SESSION TRANSACTION ISOLATION LEVEL REPEATABLE READ;
+BEGIN;
+SELECT * FROM t0 WHERE id1=1 AND id2=4 FOR UPDATE;
+id1 id2 value
+INSERT INTO t0 VALUES (1,5,0);
+connection con1;
+COMMIT;
+connection default;
+disconnect con1;
+disconnect con2;
+DROP TABLE t0;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/locking_issues_case2_rc.result b/storage/rocksdb/mysql-test/rocksdb/r/locking_issues_case2_rc.result
new file mode 100644
index 00000000000..16480da8e80
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/locking_issues_case2_rc.result
@@ -0,0 +1,50 @@
+
+-----------------------------------------------------------------------
+- Locking issues case 2:
+- Rows that are scanned but do not match the WHERE are not locked
+- using READ COMMITTED transaction isolation level unless
+- rocksdb_lock_scanned_rows is on
+-----------------------------------------------------------------------
+DROP TABLE IF EXISTS t0;
+SELECT @@global.rocksdb_lock_scanned_rows;
+@@global.rocksdb_lock_scanned_rows
+0
+CREATE TABLE t0(id INT PRIMARY KEY, value INT);
+INSERT INTO t0 VALUES (1,0), (2,1), (3,0), (4,0), (5,1);
+connect con1,localhost,root,,;
+connect con2,localhost,root,,;
+connection con1;
+SET SESSION TRANSACTION ISOLATION LEVEL READ COMMITTED;
+BEGIN;
+connection con2;
+SET SESSION TRANSACTION ISOLATION LEVEL READ COMMITTED;
+BEGIN;
+connection con1;
+SELECT * FROM t0 WHERE value > 0 FOR UPDATE;
+id value
+2 1
+5 1
+connection con2;
+UPDATE t0 SET VALUE=10 WHERE id=1;
+UPDATE t0 SET VALUE=10 WHERE id=5;
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction
+connection con1;
+UPDATE t0 SET value=100 WHERE id in (4,5) and value>0;
+connection con2;
+SELECT * FROM t0 WHERE id=4 FOR UPDATE;
+id value
+4 0
+COMMIT;
+SELECT * FROM t0;
+id value
+1 10
+2 1
+3 0
+4 0
+5 1
+connection con1;
+COMMIT;
+connection default;
+disconnect con1;
+disconnect con2;
+DROP TABLE t0;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/locking_issues_case2_rc_lsr.result b/storage/rocksdb/mysql-test/rocksdb/r/locking_issues_case2_rc_lsr.result
new file mode 100644
index 00000000000..330cd09d33e
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/locking_issues_case2_rc_lsr.result
@@ -0,0 +1,37 @@
+
+-----------------------------------------------------------------------
+- Locking issues case 2:
+- Rows that are scanned but do not match the WHERE are not locked
+- using READ COMMITTED transaction isolation level unless
+- rocksdb_lock_scanned_rows is on
+-----------------------------------------------------------------------
+DROP TABLE IF EXISTS t0;
+SELECT @@global.rocksdb_lock_scanned_rows;
+@@global.rocksdb_lock_scanned_rows
+0
+SET GLOBAL rocksdb_lock_scanned_rows=ON;
+CREATE TABLE t0(id INT PRIMARY KEY, value INT);
+INSERT INTO t0 VALUES (1,0), (2,1), (3,0), (4,0), (5,1);
+connect con1,localhost,root,,;
+connect con2,localhost,root,,;
+connection con1;
+SET SESSION TRANSACTION ISOLATION LEVEL READ COMMITTED;
+BEGIN;
+connection con2;
+SET SESSION TRANSACTION ISOLATION LEVEL READ COMMITTED;
+BEGIN;
+connection con1;
+SELECT * FROM t0 WHERE value > 0 FOR UPDATE;
+id value
+2 1
+5 1
+connection con2;
+UPDATE t0 SET VALUE=10 WHERE id=1;
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction
+connection con1;
+COMMIT;
+connection default;
+disconnect con1;
+disconnect con2;
+DROP TABLE t0;
+SET GLOBAL rocksdb_lock_scanned_rows=0;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/locking_issues_case2_rr.result b/storage/rocksdb/mysql-test/rocksdb/r/locking_issues_case2_rr.result
new file mode 100644
index 00000000000..3e6b63afaa8
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/locking_issues_case2_rr.result
@@ -0,0 +1,50 @@
+
+-----------------------------------------------------------------------
+- Locking issues case 2:
+- Rows that are scanned but do not match the WHERE are not locked
+- using REPEATABLE READ transaction isolation level unless
+- rocksdb_lock_scanned_rows is on
+-----------------------------------------------------------------------
+DROP TABLE IF EXISTS t0;
+SELECT @@global.rocksdb_lock_scanned_rows;
+@@global.rocksdb_lock_scanned_rows
+0
+CREATE TABLE t0(id INT PRIMARY KEY, value INT);
+INSERT INTO t0 VALUES (1,0), (2,1), (3,0), (4,0), (5,1);
+connect con1,localhost,root,,;
+connect con2,localhost,root,,;
+connection con1;
+SET SESSION TRANSACTION ISOLATION LEVEL REPEATABLE READ;
+BEGIN;
+connection con2;
+SET SESSION TRANSACTION ISOLATION LEVEL REPEATABLE READ;
+BEGIN;
+connection con1;
+SELECT * FROM t0 WHERE value > 0 FOR UPDATE;
+id value
+2 1
+5 1
+connection con2;
+UPDATE t0 SET VALUE=10 WHERE id=1;
+UPDATE t0 SET VALUE=10 WHERE id=5;
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction
+connection con1;
+UPDATE t0 SET value=100 WHERE id in (4,5) and value>0;
+connection con2;
+SELECT * FROM t0 WHERE id=4 FOR UPDATE;
+id value
+4 0
+COMMIT;
+SELECT * FROM t0;
+id value
+1 10
+2 1
+3 0
+4 0
+5 1
+connection con1;
+COMMIT;
+connection default;
+disconnect con1;
+disconnect con2;
+DROP TABLE t0;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/locking_issues_case2_rr_lsr.result b/storage/rocksdb/mysql-test/rocksdb/r/locking_issues_case2_rr_lsr.result
new file mode 100644
index 00000000000..088e975ebc5
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/locking_issues_case2_rr_lsr.result
@@ -0,0 +1,37 @@
+
+-----------------------------------------------------------------------
+- Locking issues case 2:
+- Rows that are scanned but do not match the WHERE are not locked
+- using REPEATABLE READ transaction isolation level unless
+- rocksdb_lock_scanned_rows is on
+-----------------------------------------------------------------------
+DROP TABLE IF EXISTS t0;
+SELECT @@global.rocksdb_lock_scanned_rows;
+@@global.rocksdb_lock_scanned_rows
+0
+SET GLOBAL rocksdb_lock_scanned_rows=ON;
+CREATE TABLE t0(id INT PRIMARY KEY, value INT);
+INSERT INTO t0 VALUES (1,0), (2,1), (3,0), (4,0), (5,1);
+connect con1,localhost,root,,;
+connect con2,localhost,root,,;
+connection con1;
+SET SESSION TRANSACTION ISOLATION LEVEL REPEATABLE READ;
+BEGIN;
+connection con2;
+SET SESSION TRANSACTION ISOLATION LEVEL REPEATABLE READ;
+BEGIN;
+connection con1;
+SELECT * FROM t0 WHERE value > 0 FOR UPDATE;
+id value
+2 1
+5 1
+connection con2;
+UPDATE t0 SET VALUE=10 WHERE id=1;
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction
+connection con1;
+COMMIT;
+connection default;
+disconnect con1;
+disconnect con2;
+DROP TABLE t0;
+SET GLOBAL rocksdb_lock_scanned_rows=0;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/locking_issues_case3_rc.result b/storage/rocksdb/mysql-test/rocksdb/r/locking_issues_case3_rc.result
new file mode 100644
index 00000000000..9a6f02cd41d
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/locking_issues_case3_rc.result
@@ -0,0 +1,25 @@
+
+-----------------------------------------------------------------------
+- Locking issues case 3:
+- After creating a snapshot, other clients updating rows
+- using READ COMMITTED transaction isolation level
+-----------------------------------------------------------------------
+DROP TABLE IF EXISTS t0;
+CREATE TABLE t0(id INT AUTO_INCREMENT PRIMARY KEY, value INT);
+Inserting 200,000 rows
+connect con1,localhost,root,,;
+connect con2,localhost,root,,;
+connection con1;
+SET SESSION TRANSACTION ISOLATION LEVEL READ COMMITTED;
+SELECT * FROM t0 WHERE value > 0 FOR UPDATE;
+connection con2;
+SET SESSION TRANSACTION ISOLATION LEVEL READ COMMITTED;
+UPDATE t0 SET VALUE=VALUE+1 WHERE id=190000;
+connection con1;
+id value
+190000 1
+ERROR: 0
+connection default;
+disconnect con1;
+disconnect con2;
+DROP TABLE t0;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/locking_issues_case3_rr.result b/storage/rocksdb/mysql-test/rocksdb/r/locking_issues_case3_rr.result
new file mode 100644
index 00000000000..fdd4d8fcaca
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/locking_issues_case3_rr.result
@@ -0,0 +1,23 @@
+
+-----------------------------------------------------------------------
+- Locking issues case 3:
+- After creating a snapshot, other clients updating rows
+- using REPEATABLE READ transaction isolation level
+-----------------------------------------------------------------------
+DROP TABLE IF EXISTS t0;
+CREATE TABLE t0(id INT AUTO_INCREMENT PRIMARY KEY, value INT);
+Inserting 200,000 rows
+connect con1,localhost,root,,;
+connect con2,localhost,root,,;
+connection con1;
+SET SESSION TRANSACTION ISOLATION LEVEL REPEATABLE READ;
+SELECT * FROM t0 WHERE value > 0 FOR UPDATE;
+connection con2;
+SET SESSION TRANSACTION ISOLATION LEVEL REPEATABLE READ;
+UPDATE t0 SET VALUE=VALUE+1 WHERE id=190000;
+connection con1;
+ERROR: 1213
+connection default;
+disconnect con1;
+disconnect con2;
+DROP TABLE t0;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/locking_issues_case4_rc.result b/storage/rocksdb/mysql-test/rocksdb/r/locking_issues_case4_rc.result
new file mode 100644
index 00000000000..1a35d276192
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/locking_issues_case4_rc.result
@@ -0,0 +1,23 @@
+
+-----------------------------------------------------------------------
+- Locking issues case 4:
+- Phantom rows
+- using READ COMMITTED transaction isolation level
+-----------------------------------------------------------------------
+DROP TABLE IF EXISTS t0;
+CREATE TABLE t0(id INT AUTO_INCREMENT PRIMARY KEY, value INT);
+Inserting 200,000 rows
+connect con1,localhost,root,,;
+connect con2,localhost,root,,;
+connection con1;
+SET SESSION TRANSACTION ISOLATION LEVEL READ COMMITTED;
+SELECT * FROM t0 WHERE value > 0 FOR UPDATE;
+connection con2;
+SET SESSION TRANSACTION ISOLATION LEVEL READ COMMITTED;
+INSERT INTO t0 VALUES(200001,1), (-1,1);
+connection con1;
+id value
+connection default;
+disconnect con1;
+disconnect con2;
+DROP TABLE t0;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/locking_issues_case4_rr.result b/storage/rocksdb/mysql-test/rocksdb/r/locking_issues_case4_rr.result
new file mode 100644
index 00000000000..7ecb1ac5f7a
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/locking_issues_case4_rr.result
@@ -0,0 +1,23 @@
+
+-----------------------------------------------------------------------
+- Locking issues case 4:
+- Phantom rows
+- using REPEATABLE READ transaction isolation level
+-----------------------------------------------------------------------
+DROP TABLE IF EXISTS t0;
+CREATE TABLE t0(id INT AUTO_INCREMENT PRIMARY KEY, value INT);
+Inserting 200,000 rows
+connect con1,localhost,root,,;
+connect con2,localhost,root,,;
+connection con1;
+SET SESSION TRANSACTION ISOLATION LEVEL REPEATABLE READ;
+SELECT * FROM t0 WHERE value > 0 FOR UPDATE;
+connection con2;
+SET SESSION TRANSACTION ISOLATION LEVEL REPEATABLE READ;
+INSERT INTO t0 VALUES(200001,1), (-1,1);
+connection con1;
+id value
+connection default;
+disconnect con1;
+disconnect con2;
+DROP TABLE t0;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/locking_issues_case5_rc.result b/storage/rocksdb/mysql-test/rocksdb/r/locking_issues_case5_rc.result
new file mode 100644
index 00000000000..4d707e83eec
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/locking_issues_case5_rc.result
@@ -0,0 +1,29 @@
+
+-----------------------------------------------------------------------
+- Locking issues case 5:
+- Deleting primary key
+- using READ COMMITTED transaction isolation level
+-----------------------------------------------------------------------
+DROP TABLE IF EXISTS t0;
+CREATE TABLE t0(id INT AUTO_INCREMENT PRIMARY KEY, value INT);
+Inserting 200,000 rows
+UPDATE t0 SET value=100 WHERE id=190000;
+connect con1,localhost,root,,;
+connect con2,localhost,root,,;
+connection con1;
+SET SESSION TRANSACTION ISOLATION LEVEL READ COMMITTED;
+BEGIN;
+SELECT * FROM t0 WHERE value > 0 FOR UPDATE;
+connection con2;
+SET SESSION TRANSACTION ISOLATION LEVEL READ COMMITTED;
+BEGIN;
+DELETE FROM t0 WHERE id=190000;
+COMMIT;
+connection con1;
+id value
+ERROR: 0
+COMMIT;
+connection default;
+disconnect con1;
+disconnect con2;
+DROP TABLE t0;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/locking_issues_case5_rr.result b/storage/rocksdb/mysql-test/rocksdb/r/locking_issues_case5_rr.result
new file mode 100644
index 00000000000..0ebfe8e6079
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/locking_issues_case5_rr.result
@@ -0,0 +1,28 @@
+
+-----------------------------------------------------------------------
+- Locking issues case 5:
+- Deleting primary key
+- using REPEATABLE READ transaction isolation level
+-----------------------------------------------------------------------
+DROP TABLE IF EXISTS t0;
+CREATE TABLE t0(id INT AUTO_INCREMENT PRIMARY KEY, value INT);
+Inserting 200,000 rows
+UPDATE t0 SET value=100 WHERE id=190000;
+connect con1,localhost,root,,;
+connect con2,localhost,root,,;
+connection con1;
+SET SESSION TRANSACTION ISOLATION LEVEL REPEATABLE READ;
+BEGIN;
+SELECT * FROM t0 WHERE value > 0 FOR UPDATE;
+connection con2;
+SET SESSION TRANSACTION ISOLATION LEVEL REPEATABLE READ;
+BEGIN;
+DELETE FROM t0 WHERE id=190000;
+COMMIT;
+connection con1;
+ERROR: 1213
+COMMIT;
+connection default;
+disconnect con1;
+disconnect con2;
+DROP TABLE t0;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/locking_issues_case6_rc.result b/storage/rocksdb/mysql-test/rocksdb/r/locking_issues_case6_rc.result
new file mode 100644
index 00000000000..f16ffbeaa63
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/locking_issues_case6_rc.result
@@ -0,0 +1,29 @@
+
+-----------------------------------------------------------------------
+- Locking issues case 6:
+- Changing primary key
+- using READ COMMITTED transaction isolation level
+-----------------------------------------------------------------------
+DROP TABLE IF EXISTS t0;
+CREATE TABLE t0(id INT AUTO_INCREMENT PRIMARY KEY, value INT);
+Inserting 200,000 rows
+UPDATE t0 SET value=100 WHERE id=190000;
+connect con1,localhost,root,,;
+connect con2,localhost,root,,;
+connection con1;
+SET SESSION TRANSACTION ISOLATION LEVEL READ COMMITTED;
+BEGIN;
+SELECT * FROM t0 WHERE value > 0 FOR UPDATE;
+connection con2;
+SET SESSION TRANSACTION ISOLATION LEVEL READ COMMITTED;
+BEGIN;
+UPDATE t0 SET id=200001 WHERE id=190000;
+COMMIT;
+connection con1;
+id value
+ERROR: 0
+COMMIT;
+connection default;
+disconnect con1;
+disconnect con2;
+DROP TABLE t0;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/locking_issues_case6_rr.result b/storage/rocksdb/mysql-test/rocksdb/r/locking_issues_case6_rr.result
new file mode 100644
index 00000000000..c2323937d15
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/locking_issues_case6_rr.result
@@ -0,0 +1,28 @@
+
+-----------------------------------------------------------------------
+- Locking issues case 6:
+- Changing primary key
+- using REPEATABLE READ transaction isolation level
+-----------------------------------------------------------------------
+DROP TABLE IF EXISTS t0;
+CREATE TABLE t0(id INT AUTO_INCREMENT PRIMARY KEY, value INT);
+Inserting 200,000 rows
+UPDATE t0 SET value=100 WHERE id=190000;
+connect con1,localhost,root,,;
+connect con2,localhost,root,,;
+connection con1;
+SET SESSION TRANSACTION ISOLATION LEVEL REPEATABLE READ;
+BEGIN;
+SELECT * FROM t0 WHERE value > 0 FOR UPDATE;
+connection con2;
+SET SESSION TRANSACTION ISOLATION LEVEL REPEATABLE READ;
+BEGIN;
+UPDATE t0 SET id=200001 WHERE id=190000;
+COMMIT;
+connection con1;
+ERROR: 1213
+COMMIT;
+connection default;
+disconnect con1;
+disconnect con2;
+DROP TABLE t0;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/locking_issues_case7_rc.result b/storage/rocksdb/mysql-test/rocksdb/r/locking_issues_case7_rc.result
new file mode 100644
index 00000000000..f30dbab045d
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/locking_issues_case7_rc.result
@@ -0,0 +1,41 @@
+
+-----------------------------------------------------------------------
+- Locking issues case 7:
+- Rows that are scanned as part of a query but not in the table being
+- updated should not be locked unless rocksdb_lock_scanned_rows is on
+-----------------------------------------------------------------------
+DROP TABLE IF EXISTS t1, t2;
+SELECT @@global.rocksdb_lock_scanned_rows;
+@@global.rocksdb_lock_scanned_rows
+0
+CREATE TABLE t1(id INT PRIMARY KEY, value INT);
+CREATE TABLE t2(id INT PRIMARY KEY, value INT);
+INSERT INTO t1 VALUES (1,1), (2,2), (3,3);
+INSERT INTO t2 VALUES (1,1), (2,2), (3,3), (4,4), (5,5);
+connect con1,localhost,root,,;
+connect con2,localhost,root,,;
+connection con1;
+SET SESSION TRANSACTION ISOLATION LEVEL READ COMMITTED;
+BEGIN;
+connection con2;
+SET SESSION TRANSACTION ISOLATION LEVEL READ COMMITTED;
+BEGIN;
+lock_scanned_rows is 0
+connection con1;
+UPDATE t1 JOIN t2 ON t1.id = t2.id SET t1.value=t1.value+100 WHERE t2.id=3;
+connection con2;
+UPDATE t2 SET value=value+100;
+SELECT * FROM t2;
+id value
+1 101
+2 102
+3 103
+4 104
+5 105
+connection con1;
+COMMIT;
+connection default;
+disconnect con1;
+disconnect con2;
+DROP TABLE t1;
+DROP TABLE t2;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/locking_issues_case7_rc_lsr.result b/storage/rocksdb/mysql-test/rocksdb/r/locking_issues_case7_rc_lsr.result
new file mode 100644
index 00000000000..d43e3efbfe0
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/locking_issues_case7_rc_lsr.result
@@ -0,0 +1,45 @@
+
+-----------------------------------------------------------------------
+- Locking issues case 7:
+- Rows that are scanned as part of a query but not in the table being
+- updated should not be locked unless rocksdb_lock_scanned_rows is on
+-----------------------------------------------------------------------
+DROP TABLE IF EXISTS t1, t2;
+SELECT @@global.rocksdb_lock_scanned_rows;
+@@global.rocksdb_lock_scanned_rows
+0
+SET GLOBAL rocksdb_lock_scanned_rows=ON;
+CREATE TABLE t1(id INT PRIMARY KEY, value INT);
+CREATE TABLE t2(id INT PRIMARY KEY, value INT);
+INSERT INTO t1 VALUES (1,1), (2,2), (3,3);
+INSERT INTO t2 VALUES (1,1), (2,2), (3,3), (4,4), (5,5);
+connect con1,localhost,root,,;
+connect con2,localhost,root,,;
+connection con1;
+SET SESSION TRANSACTION ISOLATION LEVEL READ COMMITTED;
+BEGIN;
+connection con2;
+SET SESSION TRANSACTION ISOLATION LEVEL READ COMMITTED;
+BEGIN;
+lock_scanned_rows is 1
+connection con1;
+UPDATE t1 JOIN t2 ON t1.id = t2.id SET t1.value=t1.value+100 WHERE t2.id=3;
+connection con2;
+UPDATE t2 SET value=value+100 WHERE id=3;
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction
+UPDATE t2 SET value=value+100 WHERE id IN (1,2,4,5);
+SELECT * FROM t2;
+id value
+1 101
+2 102
+3 3
+4 104
+5 105
+connection con1;
+COMMIT;
+connection default;
+disconnect con1;
+disconnect con2;
+DROP TABLE t1;
+DROP TABLE t2;
+SET GLOBAL rocksdb_lock_scanned_rows=0;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/locking_issues_case7_rr.result b/storage/rocksdb/mysql-test/rocksdb/r/locking_issues_case7_rr.result
new file mode 100644
index 00000000000..dbcb0a03aef
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/locking_issues_case7_rr.result
@@ -0,0 +1,41 @@
+
+-----------------------------------------------------------------------
+- Locking issues case 7:
+- Rows that are scanned as part of a query but not in the table being
+- updated should not be locked unless rocksdb_lock_scanned_rows is on
+-----------------------------------------------------------------------
+DROP TABLE IF EXISTS t1, t2;
+SELECT @@global.rocksdb_lock_scanned_rows;
+@@global.rocksdb_lock_scanned_rows
+0
+CREATE TABLE t1(id INT PRIMARY KEY, value INT);
+CREATE TABLE t2(id INT PRIMARY KEY, value INT);
+INSERT INTO t1 VALUES (1,1), (2,2), (3,3);
+INSERT INTO t2 VALUES (1,1), (2,2), (3,3), (4,4), (5,5);
+connect con1,localhost,root,,;
+connect con2,localhost,root,,;
+connection con1;
+SET SESSION TRANSACTION ISOLATION LEVEL REPEATABLE READ;
+BEGIN;
+connection con2;
+SET SESSION TRANSACTION ISOLATION LEVEL REPEATABLE READ;
+BEGIN;
+lock_scanned_rows is 0
+connection con1;
+UPDATE t1 JOIN t2 ON t1.id = t2.id SET t1.value=t1.value+100 WHERE t2.id=3;
+connection con2;
+UPDATE t2 SET value=value+100;
+SELECT * FROM t2;
+id value
+1 101
+2 102
+3 103
+4 104
+5 105
+connection con1;
+COMMIT;
+connection default;
+disconnect con1;
+disconnect con2;
+DROP TABLE t1;
+DROP TABLE t2;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/locking_issues_case7_rr_lsr.result b/storage/rocksdb/mysql-test/rocksdb/r/locking_issues_case7_rr_lsr.result
new file mode 100644
index 00000000000..d9f7e333d3c
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/locking_issues_case7_rr_lsr.result
@@ -0,0 +1,45 @@
+
+-----------------------------------------------------------------------
+- Locking issues case 7:
+- Rows that are scanned as part of a query but not in the table being
+- updated should not be locked unless rocksdb_lock_scanned_rows is on
+-----------------------------------------------------------------------
+DROP TABLE IF EXISTS t1, t2;
+SELECT @@global.rocksdb_lock_scanned_rows;
+@@global.rocksdb_lock_scanned_rows
+0
+SET GLOBAL rocksdb_lock_scanned_rows=ON;
+CREATE TABLE t1(id INT PRIMARY KEY, value INT);
+CREATE TABLE t2(id INT PRIMARY KEY, value INT);
+INSERT INTO t1 VALUES (1,1), (2,2), (3,3);
+INSERT INTO t2 VALUES (1,1), (2,2), (3,3), (4,4), (5,5);
+connect con1,localhost,root,,;
+connect con2,localhost,root,,;
+connection con1;
+SET SESSION TRANSACTION ISOLATION LEVEL REPEATABLE READ;
+BEGIN;
+connection con2;
+SET SESSION TRANSACTION ISOLATION LEVEL REPEATABLE READ;
+BEGIN;
+lock_scanned_rows is 1
+connection con1;
+UPDATE t1 JOIN t2 ON t1.id = t2.id SET t1.value=t1.value+100 WHERE t2.id=3;
+connection con2;
+UPDATE t2 SET value=value+100 WHERE id=3;
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction
+UPDATE t2 SET value=value+100 WHERE id IN (1,2,4,5);
+SELECT * FROM t2;
+id value
+1 101
+2 102
+3 3
+4 104
+5 105
+connection con1;
+COMMIT;
+connection default;
+disconnect con1;
+disconnect con2;
+DROP TABLE t1;
+DROP TABLE t2;
+SET GLOBAL rocksdb_lock_scanned_rows=0;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/mariadb_ignore_dirs.result b/storage/rocksdb/mysql-test/rocksdb/r/mariadb_ignore_dirs.result
new file mode 100644
index 00000000000..9b91cdb5551
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/mariadb_ignore_dirs.result
@@ -0,0 +1,9 @@
+#
+# RocksDB plugin adds #rocksdb to ignore_db_dirs
+#
+select @@ignore_db_dirs;
+@@ignore_db_dirs
+#rocksdb
+select @@ignore_db_dirs;
+@@ignore_db_dirs
+aa,bbb,#rocksdb
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/mariadb_misc_binlog.result b/storage/rocksdb/mysql-test/rocksdb/r/mariadb_misc_binlog.result
new file mode 100644
index 00000000000..c37ab9461af
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/mariadb_misc_binlog.result
@@ -0,0 +1,33 @@
+create table t1 (a int) engine=rocksdb;
+# Should have binlog ON
+select @@log_bin;
+@@log_bin
+1
+set binlog_format='row';
+# Should succeed
+optimize table t1;
+Table Op Msg_type Msg_text
+test.t1 optimize status OK
+#
+# MDEV-13602: rocksdb.index_merge_rocksdb2 failed in buildbot
+#
+lock tables t1 write;
+insert into t1 values(1);
+unlock tables;
+set @tmp_bf= @@binlog_format;
+set binlog_format='STATEMENT';
+lock tables t1 write;
+insert into t1 values(1);
+ERROR HY000: Cannot execute statement: impossible to write to binary log since BINLOG_FORMAT = STATEMENT and at least one table uses a storage engine limited to row-based logging.
+unlock tables;
+set @@binlog_format=@tmp_bf;
+drop table t1;
+#
+# MDEV-17045: MyRocks tables cannot be updated when binlog_format=MIXED.
+#
+set @tmp_bf= @@binlog_format;
+set binlog_format='MIXED';
+create table t1 (pk int primary key) engine=rocksdb;
+insert into t1 values (1);
+drop table t1;
+set @@binlog_format=@tmp_bf;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/mariadb_plugin.result b/storage/rocksdb/mysql-test/rocksdb/r/mariadb_plugin.result
new file mode 100644
index 00000000000..6d6cb1db54e
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/mariadb_plugin.result
@@ -0,0 +1,35 @@
+#
+# MDEV-14843: Assertion `s_tx_list.size() == 0' failed in myrocks::Rdb_transaction::term_mutex
+#
+INSTALL SONAME 'ha_rocksdb';
+connect con1,localhost,root,,test;
+CREATE TABLE t1 (i INT) ENGINE=RocksDB;
+insert into t1 values (1);
+DROP TABLE t1;
+connection default;
+UNINSTALL SONAME 'ha_rocksdb';
+Warnings:
+Warning 1620 Plugin is busy and will be uninstalled on shutdown
+SELECT ENGINE, SUPPORT FROM INFORMATION_SCHEMA.ENGINES WHERE ENGINE='ROCKSDB';
+ENGINE SUPPORT
+ROCKSDB NO
+disconnect con1;
+#
+# MDEV-15686: Loading MyRocks plugin back after it has been unloaded causes a crash
+#
+call mtr.add_suppression("Plugin 'ROCKSDB.*' init function returned error.");
+call mtr.add_suppression("Plugin 'ROCKSDB.*' registration as a INFORMATION SCHEMA failed.");
+call mtr.add_suppression("Plugin 'ROCKSDB' registration as a STORAGE ENGINE failed");
+#
+# There are two possible scenarios:
+# ha_rocksdb.{dll,so} is still loaded into mysqld's address space. Its
+# global variables are in the state that doesn't allow it to be
+# initialized back (this is what MDEV-15686 is about). This is handled
+# by intentionally returning an error from rocksdb_init_func.
+#
+# The second case is when ha_rocksdb.{ddl,so} has been fully unloaded
+# and so it will be now loaded as if it happens for the first time.
+INSTALL SONAME 'ha_rocksdb';
+# Whatever happened on the previous step, restore things to the way they
+# were at testcase start.
+UNINSTALL SONAME 'ha_rocksdb';
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/mariadb_port_fixes.result b/storage/rocksdb/mysql-test/rocksdb/r/mariadb_port_fixes.result
new file mode 100644
index 00000000000..df27fbeddf1
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/mariadb_port_fixes.result
@@ -0,0 +1,119 @@
+#
+# MDEV-14433: RocksDB may show empty or incorrect output with rocksdb_strict_collation_check=off
+#
+set global rocksdb_strict_collation_check=off;
+set @tmp_rscc=@@rocksdb_strict_collation_check;
+CREATE TABLE t1(
+a varchar(10) NOT NULL,
+b char(1) DEFAULT 'X',
+c char(2) NOT NULL DEFAULT '??',
+d varchar(10) NOT NULL,
+e int(11) DEFAULT 0,
+PRIMARY KEY (a,d),
+KEY (e)
+) ENGINE=ROCKSDB DEFAULT CHARSET=utf8;
+insert into t1 select 1,1,1,1,0;
+insert into t1 select 2,1,1,1,0;
+insert into t1 select 3,1,1,1,0;
+explain
+select a from t1 force index(e) where e<10000;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range e e 5 NULL # Using index condition
+select a from t1;
+a
+1
+2
+3
+select * from t1;
+a b c d e
+1 1 1 1 0
+2 1 1 1 0
+3 1 1 1 0
+DROP TABLE t1;
+#
+# MDEV-14563: Wrong query plan for query with no PK
+#
+CREATE TABLE t1(
+pk int primary key,
+a varchar(10) NOT NULL,
+e int(11) DEFAULT 0,
+KEY (a)
+) ENGINE=ROCKSDB DEFAULT CHARSET=utf8;
+insert into t1 values (1,1,1),(2,2,2);
+explain select a from t1 where a <'zzz';
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range a a 32 NULL # Using where
+CREATE TABLE t2(
+pk int,
+a varchar(10) NOT NULL,
+e int(11) DEFAULT 0,
+KEY (a)
+) ENGINE=ROCKSDB DEFAULT CHARSET=utf8;
+insert into t2 values (1,1,1),(2,2,2);
+explain select a from t2 where a <'zzz';
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 range a a 32 NULL # Using where
+drop table t1,t2;
+set global rocksdb_strict_collation_check=@tmp_rscc;
+#
+# MDEV-14389: MyRocks and NOPAD collations
+#
+create table t1 (pk varchar(10) collate latin1_nopad_bin, primary key(pk)) engine=rocksdb;
+ERROR HY000: MyRocks doesn't currently support collations with "No pad" attribute.
+set global rocksdb_strict_collation_check=off;
+create table t1 (pk varchar(10) collate latin1_nopad_bin, primary key(pk)) engine=rocksdb;
+ERROR HY000: MyRocks doesn't currently support collations with "No pad" attribute.
+set global rocksdb_strict_collation_check=@tmp_rscc;
+#
+# MDEV-14679: RocksdB plugin fails to load with "Loading of unknown plugin ROCKSDB_CFSTATS
+#
+select plugin_name, plugin_maturity from information_schema.plugins where plugin_name like '%rocksdb%';
+plugin_name plugin_maturity
+ROCKSDB Stable
+ROCKSDB_CFSTATS Stable
+ROCKSDB_DBSTATS Stable
+ROCKSDB_PERF_CONTEXT Stable
+ROCKSDB_PERF_CONTEXT_GLOBAL Stable
+ROCKSDB_CF_OPTIONS Stable
+ROCKSDB_COMPACTION_STATS Stable
+ROCKSDB_GLOBAL_INFO Stable
+ROCKSDB_DDL Stable
+ROCKSDB_SST_PROPS Stable
+ROCKSDB_INDEX_FILE_MAP Stable
+ROCKSDB_LOCKS Stable
+ROCKSDB_TRX Stable
+ROCKSDB_DEADLOCK Stable
+#
+# MDEV-12466 : Assertion `thd->transaction.stmt.is_empty() || thd->in_sub_stmt || ...
+#
+CREATE TABLE t1 (i INT) ENGINE=RocksDB;
+FLUSH TABLE t1 FOR EXPORT;
+ERROR HY000: Storage engine ROCKSDB of the table `test`.`t1` doesn't have this option
+DROP TABLE t1;
+#
+# MDEV-16154 Server crashes in in myrocks::ha_rocksdb::load_auto_incr_value_from_inde
+#
+CREATE TABLE t1 (a INT) ENGINE=RocksDB;
+INSERT INTO t1 VALUES (1);
+ALTER TABLE t1 AUTO_INCREMENT 10;
+DROP TABLE t1;
+#
+# MDEV-16155: UPDATE on RocksDB table with unique constraint does not work
+#
+CREATE TABLE t1 (a INT, b CHAR(8), UNIQUE INDEX(a)) ENGINE=RocksDB;
+INSERT INTO t1 (a,b) VALUES (1,'foo'),(2,'bar');
+UPDATE t1 SET a=a+100;
+SELECT * FROM t1;
+a b
+101 foo
+102 bar
+DROP TABLE t1;
+#
+# MDEV-15319: [SQL Layer] Server crashes in Field::set_null / myrocks::ha_rocksdb::convert_record_from_storage_format
+# (just a testcase)
+#
+CREATE TABLE t1 (i INT);
+INSERT INTO t1 VALUES (1);
+CREATE TABLE t2 ENGINE=RocksDB AS SELECT VALUES(i) AS a FROM t1;
+DELETE FROM t2;
+DROP TABLE t1,t2;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/mariadb_port_rpl.result b/storage/rocksdb/mysql-test/rocksdb/r/mariadb_port_rpl.result
new file mode 100644
index 00000000000..98408c7864d
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/mariadb_port_rpl.result
@@ -0,0 +1,14 @@
+#
+# MDEV-15472: Assertion `!is_set() || (m_status == DA_OK_BULK && is_bulk_op())' failure ...
+#
+select @@log_bin;
+@@log_bin
+1
+select @@binlog_format;
+@@binlog_format
+ROW
+CREATE OR REPLACE TABLE t1 (i INT) ENGINE=RocksDB;
+LOCK TABLE t1 WRITE;
+FLUSH TABLES;
+UNLOCK TABLES;
+DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/max_open_files.result b/storage/rocksdb/mysql-test/rocksdb/r/max_open_files.result
new file mode 100644
index 00000000000..5d34f4e9640
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/max_open_files.result
@@ -0,0 +1,21 @@
+CALL mtr.add_suppression("RocksDB: rocksdb_max_open_files should not be greater than the open_files_limit*");
+FOUND 1 /RocksDB: rocksdb_max_open_files should not be greater than the open_files_limit/ in rocksdb.max_open_files.err
+SELECT FLOOR(@@global.open_files_limit / 2) = @@global.rocksdb_max_open_files;
+FLOOR(@@global.open_files_limit / 2) = @@global.rocksdb_max_open_files
+1
+SELECT @@global.open_files_limit - 1 = @@global.rocksdb_max_open_files;
+@@global.open_files_limit - 1 = @@global.rocksdb_max_open_files
+1
+SELECT @@global.rocksdb_max_open_files;
+@@global.rocksdb_max_open_files
+0
+CREATE TABLE t1(a INT) ENGINE=ROCKSDB;
+INSERT INTO t1 VALUES(0),(1),(2),(3),(4);
+SET GLOBAL rocksdb_force_flush_memtable_and_lzero_now=1;
+DROP TABLE t1;
+SELECT @@global.rocksdb_max_open_files;
+@@global.rocksdb_max_open_files
+-1
+SELECT FLOOR(@@global.open_files_limit / 2) = @@global.rocksdb_max_open_files;
+FLOOR(@@global.open_files_limit / 2) = @@global.rocksdb_max_open_files
+1
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/misc.result b/storage/rocksdb/mysql-test/rocksdb/r/misc.result
new file mode 100644
index 00000000000..4a39f1cbff4
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/misc.result
@@ -0,0 +1,95 @@
+INSERT INTO mysql.event (
+db,
+name,
+body,
+definer,
+interval_value,
+interval_field,
+originator,
+character_set_client,
+collation_connection,
+db_collation,
+body_utf8)
+values (
+database(),
+"ev1",
+"select 1",
+user(),
+100,
+"SECOND_MICROSECOND",
+1,
+'utf8',
+'utf8_general_ci',
+'utf8_general_ci',
+'select 1');
+SHOW EVENTS;
+ERROR 42000: This version of MariaDB doesn't yet support 'MICROSECOND'
+DROP EVENT ev1;
+SELECT TABLE_NAME, COLUMN_NAME, REFERENCED_TABLE_NAME, REFERENCED_COLUMN_NAME
+FROM INFORMATION_SCHEMA.KEY_COLUMN_USAGE ORDER BY TABLE_NAME;
+TABLE_NAME COLUMN_NAME REFERENCED_TABLE_NAME REFERENCED_COLUMN_NAME
+Warning 1286 Unknown storage engine 'InnoDB'
+Warning 1286 Unknown storage engine 'InnoDB'
+Warnings:
+column_stats column_name NULL NULL
+column_stats db_name NULL NULL
+column_stats table_name NULL NULL
+columns_priv Column_name NULL NULL
+columns_priv Db NULL NULL
+columns_priv Host NULL NULL
+columns_priv Table_name NULL NULL
+columns_priv User NULL NULL
+db Db NULL NULL
+db Host NULL NULL
+db User NULL NULL
+event db NULL NULL
+event name NULL NULL
+func name NULL NULL
+gtid_slave_pos domain_id NULL NULL
+gtid_slave_pos sub_id NULL NULL
+help_category help_category_id NULL NULL
+help_category name NULL NULL
+help_keyword help_keyword_id NULL NULL
+help_keyword name NULL NULL
+help_relation help_keyword_id NULL NULL
+help_relation help_topic_id NULL NULL
+help_topic help_topic_id NULL NULL
+help_topic name NULL NULL
+host Db NULL NULL
+host Host NULL NULL
+index_stats db_name NULL NULL
+index_stats index_name NULL NULL
+index_stats prefix_arity NULL NULL
+index_stats table_name NULL NULL
+plugin name NULL NULL
+proc db NULL NULL
+proc name NULL NULL
+proc type NULL NULL
+procs_priv Db NULL NULL
+procs_priv Host NULL NULL
+procs_priv Routine_name NULL NULL
+procs_priv Routine_type NULL NULL
+procs_priv User NULL NULL
+proxies_priv Host NULL NULL
+proxies_priv Proxied_host NULL NULL
+proxies_priv Proxied_user NULL NULL
+proxies_priv User NULL NULL
+roles_mapping Host NULL NULL
+roles_mapping Role NULL NULL
+roles_mapping User NULL NULL
+servers Server_name NULL NULL
+table_stats db_name NULL NULL
+table_stats table_name NULL NULL
+tables_priv Db NULL NULL
+tables_priv Host NULL NULL
+tables_priv Table_name NULL NULL
+tables_priv User NULL NULL
+time_zone Time_zone_id NULL NULL
+time_zone_leap_second Transition_time NULL NULL
+time_zone_name Name NULL NULL
+time_zone_transition Time_zone_id NULL NULL
+time_zone_transition Transition_time NULL NULL
+time_zone_transition_type Time_zone_id NULL NULL
+time_zone_transition_type Transition_type_id NULL NULL
+user Host NULL NULL
+user User NULL NULL
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/multi_varchar_sk_lookup.result b/storage/rocksdb/mysql-test/rocksdb/r/multi_varchar_sk_lookup.result
new file mode 100644
index 00000000000..86ba6d923a8
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/multi_varchar_sk_lookup.result
@@ -0,0 +1,37 @@
+DROP TABLE IF EXISTS T1;
+CREATE TABLE T1 (
+P1 VARCHAR(64), P2 VARCHAR(64), P3 VARCHAR(64), P4 VARCHAR(64),
+S1 VARCHAR(64), S2 VARCHAR(64), S3 VARCHAR(64), S4 VARCHAR(64),
+S5 VARCHAR(64), S6 VARCHAR(64), S7 VARCHAR(64), S8 VARCHAR(64),
+S9 VARCHAR(64), S10 VARCHAR(64), S11 VARCHAR(64), S12 VARCHAR(64),
+S13 VARCHAR(64), S14 VARCHAR(64), S15 VARCHAR(64), S16 VARCHAR(64),
+PRIMARY KEY (P1(8), P2(8), P3(8), P4(8)),
+KEY SK (S1(8), S2(8), S3(8), S4(8),
+S5(8), S6(8), S7(8), S8(8),
+S9(8), S10(8), S11(8), S12(8),
+S13(8), S14(8), S15(8), S16(8))
+) ENGINE=rocksdb;
+INSERT INTO T1 VALUES ('1', '2', '3', '4',
+'5', '6', '7', '8',
+'9', '10', '11', '12',
+'13', '14', '15', '16',
+'17', '18', '19', '20');
+SELECT * FROM T1;
+P1 P2 P3 P4 S1 S2 S3 S4 S5 S6 S7 S8 S9 S10 S11 S12 S13 S14 S15 S16
+1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
+# Not eligible for optimization, shorter than prefix length.
+SELECT P4, S2 FROM T1 FORCE INDEX(SK) WHERE S1='5';
+P4 S2
+4 6
+include/assert.inc [Not eligible for optimization, shorter than prefix length.: 0 rocksdb_covered_secondary_key_lookups]
+DELETE FROM T1;
+INSERT INTO T1 VALUES ('100000000', '200000000', '300000000', '400000000',
+'500000000', '600000000', '700000000', '800000000',
+'900000000', '100000000', '110000000', '120000000',
+'130000000', '140000000', '150000000', '160000000',
+'170000000', '180000000', '190000000', '200000000');
+# Not eligible for optimization, longer than prefix length.
+SELECT P4, S2 FROM T1 FORCE INDEX(SK) WHERE S1='5';
+P4 S2
+include/assert.inc [Not eligible for optimization, longer than prefix length.: 0 rocksdb_covered_secondary_key_lookups]
+DROP TABLE T1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/mysqlbinlog_blind_replace.result b/storage/rocksdb/mysql-test/rocksdb/r/mysqlbinlog_blind_replace.result
new file mode 100644
index 00000000000..ff4625698ca
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/mysqlbinlog_blind_replace.result
@@ -0,0 +1,128 @@
+reset master;
+set GLOBAL binlog_format= 'ROW';
+SET GLOBAL enable_blind_replace=ON;
+set binlog_format=row;
+create table t5 (c1 int primary key, c2 int);
+insert into t5 values (1, 1);
+insert into t5 values (2, 2);
+insert into t5 values (3, 3);
+select * from t5;
+c1 c2
+1 1
+2 2
+3 3
+select variable_value into @c from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+replace into t5 values (1, 11);
+replace into t5 values (2, 22);
+replace into t5 values (3, 33);
+select case when variable_value-@c = 3 then 'true' else 'false' end as read_free from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+read_free
+true
+select * from t5;
+c1 c2
+1 11
+2 22
+3 33
+include/show_binlog_events.inc
+Log_name Pos Event_type Server_id End_log_pos Info
+master-bin.000001 # Query # # use `test`; create table t5 (c1 int primary key, c2 int)
+master-bin.000001 # Query # # BEGIN
+master-bin.000001 # Table_map # # table_id: # (test.t5)
+master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
+master-bin.000001 # Xid # # COMMIT /* XID */
+master-bin.000001 # Query # # BEGIN
+master-bin.000001 # Table_map # # table_id: # (test.t5)
+master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
+master-bin.000001 # Xid # # COMMIT /* XID */
+master-bin.000001 # Query # # BEGIN
+master-bin.000001 # Table_map # # table_id: # (test.t5)
+master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
+master-bin.000001 # Xid # # COMMIT /* XID */
+master-bin.000001 # Query # # BEGIN
+master-bin.000001 # Table_map # # table_id: # (test.t5)
+master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
+master-bin.000001 # Xid # # COMMIT /* XID */
+master-bin.000001 # Query # # BEGIN
+master-bin.000001 # Table_map # # table_id: # (test.t5)
+master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
+master-bin.000001 # Xid # # COMMIT /* XID */
+master-bin.000001 # Query # # BEGIN
+master-bin.000001 # Table_map # # table_id: # (test.t5)
+master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
+master-bin.000001 # Xid # # COMMIT /* XID */
+flush logs;
+drop table t5;
+reset master;
+Replaying binlog events containing blind replace statements should work
+select * from t5;
+c1 c2
+1 11
+2 22
+3 33
+include/show_binlog_events.inc
+Log_name Pos Event_type Server_id End_log_pos Info
+master-bin.000001 # Query # # use `test`; create table t5 (c1 int primary key, c2 int)
+master-bin.000001 # Query # # BEGIN
+master-bin.000001 # Table_map # # table_id: # (test.t5)
+master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
+master-bin.000001 # Xid # # COMMIT /* XID */
+master-bin.000001 # Query # # BEGIN
+master-bin.000001 # Table_map # # table_id: # (test.t5)
+master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
+master-bin.000001 # Xid # # COMMIT /* XID */
+master-bin.000001 # Query # # BEGIN
+master-bin.000001 # Table_map # # table_id: # (test.t5)
+master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
+master-bin.000001 # Xid # # COMMIT /* XID */
+master-bin.000001 # Query # # BEGIN
+master-bin.000001 # Table_map # # table_id: # (test.t5)
+master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
+master-bin.000001 # Xid # # COMMIT /* XID */
+master-bin.000001 # Query # # BEGIN
+master-bin.000001 # Table_map # # table_id: # (test.t5)
+master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
+master-bin.000001 # Xid # # COMMIT /* XID */
+master-bin.000001 # Query # # BEGIN
+master-bin.000001 # Table_map # # table_id: # (test.t5)
+master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
+master-bin.000001 # Xid # # COMMIT /* XID */
+drop table t5;
+reset master;
+Replaying the same binlog events with blind replace disabled should work
+The server should internally convert such events into updates
+SET GLOBAL enable_blind_replace=OFF;
+select * from t5;
+c1 c2
+1 11
+2 22
+3 33
+include/show_binlog_events.inc
+Log_name Pos Event_type Server_id End_log_pos Info
+master-bin.000001 # Query # # use `test`; create table t5 (c1 int primary key, c2 int)
+master-bin.000001 # Query # # BEGIN
+master-bin.000001 # Table_map # # table_id: # (test.t5)
+master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
+master-bin.000001 # Xid # # COMMIT /* XID */
+master-bin.000001 # Query # # BEGIN
+master-bin.000001 # Table_map # # table_id: # (test.t5)
+master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
+master-bin.000001 # Xid # # COMMIT /* XID */
+master-bin.000001 # Query # # BEGIN
+master-bin.000001 # Table_map # # table_id: # (test.t5)
+master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
+master-bin.000001 # Xid # # COMMIT /* XID */
+master-bin.000001 # Query # # BEGIN
+master-bin.000001 # Table_map # # table_id: # (test.t5)
+master-bin.000001 # Update_rows # # table_id: # flags: STMT_END_F
+master-bin.000001 # Xid # # COMMIT /* XID */
+master-bin.000001 # Query # # BEGIN
+master-bin.000001 # Table_map # # table_id: # (test.t5)
+master-bin.000001 # Update_rows # # table_id: # flags: STMT_END_F
+master-bin.000001 # Xid # # COMMIT /* XID */
+master-bin.000001 # Query # # BEGIN
+master-bin.000001 # Table_map # # table_id: # (test.t5)
+master-bin.000001 # Update_rows # # table_id: # flags: STMT_END_F
+master-bin.000001 # Xid # # COMMIT /* XID */
+set GLOBAL binlog_format=DEFAULT;
+SET GLOBAL enable_blind_replace=DEFAULT;
+drop table t5;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/mysqlbinlog_gtid_skip_empty_trans_rocksdb.result b/storage/rocksdb/mysql-test/rocksdb/r/mysqlbinlog_gtid_skip_empty_trans_rocksdb.result
new file mode 100644
index 00000000000..835361eea35
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/mysqlbinlog_gtid_skip_empty_trans_rocksdb.result
@@ -0,0 +1,143 @@
+reset master;
+set timestamp=1000000000;
+set SESSION binlog_format = 'ROW';
+create database test2;
+create database test3;
+use test;
+create table t1 (a int primary key, b char(8)) ENGINE=rocksdb;
+insert into t1 values(1, 'a');
+insert into t1 values(2, 'b');
+create table t2 (a int primary key, b char(8)) ENGINE=rocksdb;
+start transaction;
+insert into t2 values(1, 'a');
+insert into t2 values(2, 'b');
+insert into t2 values(3, 'c');
+insert into t2 values(4, 'd');
+commit;
+use test2;
+create table t1 (a int primary key, b char(8)) ENGINE=rocksdb;
+insert into t1 values(1, 'a');
+insert into t1 values(2, 'b');
+create table t2 (a int primary key, b char(8)) ENGINE=rocksdb;
+start transaction;
+insert into t2 values(1, 'a');
+insert into t2 values(2, 'b');
+insert into t2 values(3, 'c');
+insert into t2 values(4, 'd');
+commit;
+use test3;
+create table t1 (a int primary key, b char(8)) ENGINE=rocksdb;
+insert into t1 values(1, 'a');
+insert into t1 values(2, 'b');
+create table t2 (a int primary key, b char(8)) ENGINE=rocksdb;
+start transaction;
+insert into t2 values(1, 'a');
+insert into t2 values(2, 'b');
+insert into t2 values(3, 'c');
+insert into t2 values(4, 'd');
+commit;
+FLUSH LOGS;
+==== Output of mysqlbinlog with --short-form --skip-empty-trans, --database and --skip-gtids options ====
+/*!50530 SET @@SESSION.PSEUDO_SLAVE_MODE=1*/;
+/*!40019 SET @@session.max_insert_delayed_threads=0*/;
+/*!50003 SET @OLD_COMPLETION_TYPE=@@COMPLETION_TYPE,COMPLETION_TYPE=0*/;
+DELIMITER /*!*/;
+ROLLBACK/*!*/;
+SET TIMESTAMP=1000000000/*!*/;
+SET @@session.pseudo_thread_id=999999999/*!*/;
+SET @@session.foreign_key_checks=1, @@session.sql_auto_is_null=0, @@session.unique_checks=1, @@session.autocommit=1/*!*/;
+SET @@session.sql_mode=1073741824/*!*/;
+SET @@session.auto_increment_increment=1, @@session.auto_increment_offset=1/*!*/;
+/*!\C latin1 *//*!*/;
+SET @@session.character_set_client=8,@@session.collation_connection=8,@@session.collation_server=8/*!*/;
+SET @@session.lc_time_names=0/*!*/;
+SET @@session.collation_database=DEFAULT/*!*/;
+create database test2
+/*!*/;
+use `test2`/*!*/;
+SET TIMESTAMP=1000000000/*!*/;
+create table t1 (a int primary key, b char(8)) ENGINE=rocksdb
+/*!*/;
+SET TIMESTAMP=1000000000/*!*/;
+BEGIN
+/*!*/;
+COMMIT/*!*/;
+SET TIMESTAMP=1000000000/*!*/;
+BEGIN
+/*!*/;
+COMMIT/*!*/;
+SET TIMESTAMP=1000000000/*!*/;
+create table t2 (a int primary key, b char(8)) ENGINE=rocksdb
+/*!*/;
+SET TIMESTAMP=1000000000/*!*/;
+BEGIN
+/*!*/;
+COMMIT/*!*/;
+DELIMITER ;
+# End of log file
+ROLLBACK /* added by mysqlbinlog */;
+/*!50003 SET COMPLETION_TYPE=@OLD_COMPLETION_TYPE*/;
+/*!50530 SET @@SESSION.PSEUDO_SLAVE_MODE=0*/;
+use test2;
+start transaction;
+insert into t2 values(5, 'e');
+insert into t2 values(6, 'f');
+use test;
+insert into t2 values(7, 'g');
+insert into t2 values(8, 'h');
+commit;
+FLUSH LOGS;
+==== Output of mysqlbinlog with --short-form --skip-empty-trans, --database and --skip-gtids options ====
+==== DB changed in the middle of the transaction, which belongs to the selected database
+/*!50530 SET @@SESSION.PSEUDO_SLAVE_MODE=1*/;
+/*!40019 SET @@session.max_insert_delayed_threads=0*/;
+/*!50003 SET @OLD_COMPLETION_TYPE=@@COMPLETION_TYPE,COMPLETION_TYPE=0*/;
+DELIMITER /*!*/;
+SET TIMESTAMP=1000000000/*!*/;
+SET @@session.pseudo_thread_id=999999999/*!*/;
+SET @@session.foreign_key_checks=1, @@session.sql_auto_is_null=0, @@session.unique_checks=1, @@session.autocommit=1/*!*/;
+SET @@session.sql_mode=1073741824/*!*/;
+SET @@session.auto_increment_increment=1, @@session.auto_increment_offset=1/*!*/;
+/*!\C latin1 *//*!*/;
+SET @@session.character_set_client=8,@@session.collation_connection=8,@@session.collation_server=8/*!*/;
+SET @@session.lc_time_names=0/*!*/;
+SET @@session.collation_database=DEFAULT/*!*/;
+BEGIN
+/*!*/;
+DELIMITER ;
+# End of log file
+ROLLBACK /* added by mysqlbinlog */;
+/*!50003 SET COMPLETION_TYPE=@OLD_COMPLETION_TYPE*/;
+/*!50530 SET @@SESSION.PSEUDO_SLAVE_MODE=0*/;
+use test;
+start transaction;
+insert into t2 values(9, 'i');
+insert into t2 values(10, 'j');
+use test2;
+insert into t2 values(11, 'k');
+insert into t2 values(12, 'l');
+commit;
+FLUSH LOGS;
+==== Output of mysqlbinlog with --short-form --skip-empty-trans, --database and --skip-gtids options ====
+==== DB changed in the middle of the transaction, which belongs to the non-selected database
+/*!50530 SET @@SESSION.PSEUDO_SLAVE_MODE=1*/;
+/*!40019 SET @@session.max_insert_delayed_threads=0*/;
+/*!50003 SET @OLD_COMPLETION_TYPE=@@COMPLETION_TYPE,COMPLETION_TYPE=0*/;
+DELIMITER /*!*/;
+DELIMITER ;
+# End of log file
+ROLLBACK /* added by mysqlbinlog */;
+/*!50003 SET COMPLETION_TYPE=@OLD_COMPLETION_TYPE*/;
+/*!50530 SET @@SESSION.PSEUDO_SLAVE_MODE=0*/;
+use test;
+drop table t1;
+drop table if exists t2;
+use test2;
+drop table t1;
+drop table if exists t2;
+use test3;
+drop table t1;
+drop table if exists t2;
+drop database test2;
+drop database test3;
+FLUSH LOGS;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/mysqldump.result b/storage/rocksdb/mysql-test/rocksdb/r/mysqldump.result
new file mode 100644
index 00000000000..ac6615be093
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/mysqldump.result
@@ -0,0 +1,246 @@
+drop table if exists r1;
+connect con1,localhost,root,,;
+connect con2,localhost,root,,;
+connection con1;
+create table r1 (id1 int, id2 int, id3 varchar(100), id4 int, value1 int, value2 int, value3 int, value4 int, primary key (id1, id2, id3, id4), KEY (value1, value2, value3)) engine=rocksdb;
+insert into r1 values (1,1,1,1,1,1,1,1);
+insert into r1 values (1,1,1,2,2,2,2,2);
+insert into r1 values (1,1,2,1,3,3,3,3);
+insert into r1 values (1,1,2,2,4,4,4,4);
+insert into r1 values (1,2,1,1,5,5,5,5);
+insert into r1 values (1,2,1,2,6,6,6,6);
+insert into r1 values (1,2,2,1,7,7,7,7);
+insert into r1 values (1,2,2,2,8,8,8,8);
+insert into r1 values (2,1,1,1,9,9,9,9);
+insert into r1 values (2,1,1,2,10,10,10,10);
+insert into r1 values (2,1,2,1,11,11,11,11);
+insert into r1 values (2,1,2,2,12,12,12,12);
+insert into r1 values (2,2,1,1,13,13,13,13);
+insert into r1 values (2,2,1,2,14,14,14,14);
+insert into r1 values (2,2,2,1,15,15,15,15);
+insert into r1 values (2,2,2,2,16,16,16,16);
+connection con2;
+BEGIN;
+insert into r1 values (5,5,5,5,5,5,5,5);
+update r1 set value1=value1+100 where id1=1 and id2=1 and id3='1';
+
+/*!40101 SET @OLD_CHARACTER_SET_CLIENT=@@CHARACTER_SET_CLIENT */;
+/*!40101 SET @OLD_CHARACTER_SET_RESULTS=@@CHARACTER_SET_RESULTS */;
+/*!40101 SET @OLD_COLLATION_CONNECTION=@@COLLATION_CONNECTION */;
+/*!40101 SET NAMES utf8 */;
+/*!40103 SET @OLD_TIME_ZONE=@@TIME_ZONE */;
+/*!40103 SET TIME_ZONE='+00:00' */;
+/*!40014 SET @OLD_UNIQUE_CHECKS=@@UNIQUE_CHECKS, UNIQUE_CHECKS=0 */;
+/*!40014 SET @OLD_FOREIGN_KEY_CHECKS=@@FOREIGN_KEY_CHECKS, FOREIGN_KEY_CHECKS=0 */;
+/*!40101 SET @OLD_SQL_MODE=@@SQL_MODE, SQL_MODE='NO_AUTO_VALUE_ON_ZERO' */;
+/*!40111 SET @OLD_SQL_NOTES=@@SQL_NOTES, SQL_NOTES=0 */;
+/*!50601 SELECT count(*) INTO @is_mysql8 FROM information_schema.TABLES WHERE table_schema='performance_schema' AND table_name='session_variables' */;
+/*!50601 SET @check_rocksdb = CONCAT( 'SELECT count(*) INTO @is_rocksdb_supported FROM ', IF (@is_mysql8, 'performance', 'information'), '_schema.session_variables WHERE variable_name=\'rocksdb_bulk_load\'') */;
+/*!50601 PREPARE s FROM @check_rocksdb */;
+/*!50601 EXECUTE s */;
+/*!50601 SET @enable_bulk_load = IF (@is_rocksdb_supported, 'SET SESSION rocksdb_bulk_load=1', 'SET @dummy = 0') */;
+/*!50601 PREPARE s FROM @enable_bulk_load */;
+/*!50601 EXECUTE s */;
+-- CHANGE MASTER TO MASTER_LOG_FILE='master-bin.000001', MASTER_LOG_POS=BINLOG_START;
+-- SET GLOBAL gtid_slave_pos='0-1-18';
+DROP TABLE IF EXISTS `r1`;
+/*!40101 SET @saved_cs_client = @@character_set_client */;
+/*!40101 SET character_set_client = utf8 */;
+CREATE TABLE `r1` (
+ `id1` int(11) NOT NULL,
+ `id2` int(11) NOT NULL,
+ `id3` varchar(100) NOT NULL,
+ `id4` int(11) NOT NULL,
+ `value1` int(11) DEFAULT NULL,
+ `value2` int(11) DEFAULT NULL,
+ `value3` int(11) DEFAULT NULL,
+ `value4` int(11) DEFAULT NULL,
+ PRIMARY KEY (`id1`,`id2`,`id3`,`id4`),
+ KEY `value1` (`value1`,`value2`,`value3`)
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1;
+/*!40101 SET character_set_client = @saved_cs_client */;
+/* ORDERING KEY (DESC) : PRIMARY */;
+
+LOCK TABLES `r1` WRITE;
+/*!40000 ALTER TABLE `r1` DISABLE KEYS */;
+INSERT INTO `r1` VALUES (2,2,'2',2,16,16,16,16),(2,2,'2',1,15,15,15,15),(2,2,'1',2,14,14,14,14),(2,2,'1',1,13,13,13,13),(2,1,'2',2,12,12,12,12),(2,1,'2',1,11,11,11,11),(2,1,'1',2,10,10,10,10),(2,1,'1',1,9,9,9,9),(1,2,'2',2,8,8,8,8),(1,2,'2',1,7,7,7,7),(1,2,'1',2,6,6,6,6),(1,2,'1',1,5,5,5,5),(1,1,'2',2,4,4,4,4),(1,1,'2',1,3,3,3,3),(1,1,'1',2,2,2,2,2),(1,1,'1',1,1,1,1,1);
+/*!40000 ALTER TABLE `r1` ENABLE KEYS */;
+UNLOCK TABLES;
+/*!50601 SET @disable_bulk_load = IF (@is_rocksdb_supported, 'SET SESSION rocksdb_bulk_load=0', 'SET @dummy = 0') */;
+/*!50601 PREPARE s FROM @disable_bulk_load */;
+/*!50601 EXECUTE s */;
+/*!40103 SET TIME_ZONE=@OLD_TIME_ZONE */;
+
+/*!40101 SET SQL_MODE=@OLD_SQL_MODE */;
+/*!40014 SET FOREIGN_KEY_CHECKS=@OLD_FOREIGN_KEY_CHECKS */;
+/*!40014 SET UNIQUE_CHECKS=@OLD_UNIQUE_CHECKS */;
+/*!40101 SET CHARACTER_SET_CLIENT=@OLD_CHARACTER_SET_CLIENT */;
+/*!40101 SET CHARACTER_SET_RESULTS=@OLD_CHARACTER_SET_RESULTS */;
+/*!40101 SET COLLATION_CONNECTION=@OLD_COLLATION_CONNECTION */;
+/*!40111 SET SQL_NOTES=@OLD_SQL_NOTES */;
+
+
+/*!40101 SET @OLD_CHARACTER_SET_CLIENT=@@CHARACTER_SET_CLIENT */;
+/*!40101 SET @OLD_CHARACTER_SET_RESULTS=@@CHARACTER_SET_RESULTS */;
+/*!40101 SET @OLD_COLLATION_CONNECTION=@@COLLATION_CONNECTION */;
+/*!40101 SET NAMES utf8 */;
+/*!40103 SET @OLD_TIME_ZONE=@@TIME_ZONE */;
+/*!40103 SET TIME_ZONE='+00:00' */;
+/*!40014 SET @OLD_UNIQUE_CHECKS=@@UNIQUE_CHECKS, UNIQUE_CHECKS=0 */;
+/*!40014 SET @OLD_FOREIGN_KEY_CHECKS=@@FOREIGN_KEY_CHECKS, FOREIGN_KEY_CHECKS=0 */;
+/*!40101 SET @OLD_SQL_MODE=@@SQL_MODE, SQL_MODE='NO_AUTO_VALUE_ON_ZERO' */;
+/*!40111 SET @OLD_SQL_NOTES=@@SQL_NOTES, SQL_NOTES=0 */;
+/*!50601 SELECT count(*) INTO @is_mysql8 FROM information_schema.TABLES WHERE table_schema='performance_schema' AND table_name='session_variables' */;
+/*!50601 SET @check_rocksdb = CONCAT( 'SELECT count(*) INTO @is_rocksdb_supported FROM ', IF (@is_mysql8, 'performance', 'information'), '_schema.session_variables WHERE variable_name=\'rocksdb_bulk_load\'') */;
+/*!50601 PREPARE s FROM @check_rocksdb */;
+/*!50601 EXECUTE s */;
+/*!50601 SET @bulk_load_allow_sk = IF (@is_rocksdb_supported, 'SET SESSION rocksdb_bulk_load_allow_sk=1', 'SET @dummy = 0') */;
+/*!50601 PREPARE s FROM @bulk_load_allow_sk */;
+/*!50601 EXECUTE s */;
+/*!50601 SET @enable_bulk_load = IF (@is_rocksdb_supported, 'SET SESSION rocksdb_bulk_load=1', 'SET @dummy = 0') */;
+/*!50601 PREPARE s FROM @enable_bulk_load */;
+/*!50601 EXECUTE s */;
+-- CHANGE MASTER TO MASTER_LOG_FILE='master-bin.000001', MASTER_LOG_POS=BINLOG_START;
+DROP TABLE IF EXISTS `r1`;
+/*!40101 SET @saved_cs_client = @@character_set_client */;
+/*!40101 SET character_set_client = utf8 */;
+CREATE TABLE `r1` (
+ `id1` int(11) NOT NULL DEFAULT '0',
+ `id2` int(11) NOT NULL DEFAULT '0',
+ `id3` varchar(100) NOT NULL DEFAULT '',
+ `id4` int(11) NOT NULL DEFAULT '0',
+ `value1` int(11) DEFAULT NULL,
+ `value2` int(11) DEFAULT NULL,
+ `value3` int(11) DEFAULT NULL,
+ `value4` int(11) DEFAULT NULL,
+ PRIMARY KEY (`id1`,`id2`,`id3`,`id4`),
+ KEY `value1` (`value1`,`value2`,`value3`)
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1;
+/*!40101 SET character_set_client = @saved_cs_client */;
+/* ORDERING KEY (DESC) : PRIMARY */;
+
+LOCK TABLES `r1` WRITE;
+/*!40000 ALTER TABLE `r1` DISABLE KEYS */;
+INSERT INTO `r1` VALUES (2,2,'2',2,16,16,16,16),(2,2,'2',1,15,15,15,15),(2,2,'1',2,14,14,14,14),(2,2,'1',1,13,13,13,13),(2,1,'2',2,12,12,12,12),(2,1,'2',1,11,11,11,11),(2,1,'1',2,10,10,10,10),(2,1,'1',1,9,9,9,9),(1,2,'2',2,8,8,8,8),(1,2,'2',1,7,7,7,7),(1,2,'1',2,6,6,6,6),(1,2,'1',1,5,5,5,5),(1,1,'2',2,4,4,4,4),(1,1,'2',1,3,3,3,3),(1,1,'1',2,2,2,2,2),(1,1,'1',1,1,1,1,1);
+/*!40000 ALTER TABLE `r1` ENABLE KEYS */;
+UNLOCK TABLES;
+/*!50601 SET @disable_bulk_load = IF (@is_rocksdb_supported, 'SET SESSION rocksdb_bulk_load=0', 'SET @dummy = 0') */;
+/*!50601 PREPARE s FROM @disable_bulk_load */;
+/*!50601 EXECUTE s */;
+/*!50601 SET @disable_bulk_load_allow_sk = IF (@is_rocksdb_supported, 'SET SESSION rocksdb_bulk_load_allow_sk=0', 'SET @dummy = 0') */;
+/*!50601 PREPARE s FROM @disable_bulk_load_allow_sk */;
+/*!50601 EXECUTE s */;
+/*!40103 SET TIME_ZONE=@OLD_TIME_ZONE */;
+
+/*!40101 SET SQL_MODE=@OLD_SQL_MODE */;
+/*!40014 SET FOREIGN_KEY_CHECKS=@OLD_FOREIGN_KEY_CHECKS */;
+/*!40014 SET UNIQUE_CHECKS=@OLD_UNIQUE_CHECKS */;
+/*!40101 SET CHARACTER_SET_CLIENT=@OLD_CHARACTER_SET_CLIENT */;
+/*!40101 SET CHARACTER_SET_RESULTS=@OLD_CHARACTER_SET_RESULTS */;
+/*!40101 SET COLLATION_CONNECTION=@OLD_COLLATION_CONNECTION */;
+/*!40111 SET SQL_NOTES=@OLD_SQL_NOTES */;
+
+rollback;
+connection con1;
+2
+set @save_default_storage_engine=@@global.default_storage_engine;
+SET GLOBAL default_storage_engine=rocksdb;
+
+/*!40101 SET @OLD_CHARACTER_SET_CLIENT=@@CHARACTER_SET_CLIENT */;
+/*!40101 SET @OLD_CHARACTER_SET_RESULTS=@@CHARACTER_SET_RESULTS */;
+/*!40101 SET @OLD_COLLATION_CONNECTION=@@COLLATION_CONNECTION */;
+/*!40101 SET NAMES utf8 */;
+/*!40103 SET @OLD_TIME_ZONE=@@TIME_ZONE */;
+/*!40103 SET TIME_ZONE='+00:00' */;
+/*!40014 SET @OLD_UNIQUE_CHECKS=@@UNIQUE_CHECKS, UNIQUE_CHECKS=0 */;
+/*!40014 SET @OLD_FOREIGN_KEY_CHECKS=@@FOREIGN_KEY_CHECKS, FOREIGN_KEY_CHECKS=0 */;
+/*!40101 SET @OLD_SQL_MODE=@@SQL_MODE, SQL_MODE='NO_AUTO_VALUE_ON_ZERO' */;
+/*!40111 SET @OLD_SQL_NOTES=@@SQL_NOTES, SQL_NOTES=0 */;
+-- CHANGE MASTER TO MASTER_LOG_FILE='master-bin.000001', MASTER_LOG_POS=BINLOG_START;
+-- SET GLOBAL gtid_slave_pos='0-1-18';
+DROP TABLE IF EXISTS `r1`;
+/*!40101 SET @saved_cs_client = @@character_set_client */;
+/*!40101 SET character_set_client = utf8 */;
+CREATE TABLE `r1` (
+ `id1` int(11) NOT NULL,
+ `id2` int(11) NOT NULL,
+ `id3` varchar(100) NOT NULL,
+ `id4` int(11) NOT NULL,
+ `value1` int(11) DEFAULT NULL,
+ `value2` int(11) DEFAULT NULL,
+ `value3` int(11) DEFAULT NULL,
+ `value4` int(11) DEFAULT NULL,
+ PRIMARY KEY (`id1`,`id2`,`id3`,`id4`),
+ KEY `value1` (`value1`,`value2`,`value3`)
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1;
+/*!40101 SET character_set_client = @saved_cs_client */;
+/* ORDERING KEY : (null) */;
+
+LOCK TABLES `r1` WRITE;
+/*!40000 ALTER TABLE `r1` DISABLE KEYS */;
+INSERT INTO `r1` VALUES (1,1,'1',1,1,1,1,1),(1,1,'1',2,2,2,2,2),(1,1,'2',1,3,3,3,3),(1,1,'2',2,4,4,4,4),(1,2,'1',1,5,5,5,5),(1,2,'1',2,6,6,6,6),(1,2,'2',1,7,7,7,7),(1,2,'2',2,8,8,8,8),(2,1,'1',1,9,9,9,9),(2,1,'1',2,10,10,10,10),(2,1,'2',1,11,11,11,11),(2,1,'2',2,12,12,12,12),(2,2,'1',1,13,13,13,13),(2,2,'1',2,14,14,14,14),(2,2,'2',1,15,15,15,15),(2,2,'2',2,16,16,16,16);
+/*!40000 ALTER TABLE `r1` ENABLE KEYS */;
+UNLOCK TABLES;
+/*!40103 SET TIME_ZONE=@OLD_TIME_ZONE */;
+
+/*!40101 SET SQL_MODE=@OLD_SQL_MODE */;
+/*!40014 SET FOREIGN_KEY_CHECKS=@OLD_FOREIGN_KEY_CHECKS */;
+/*!40014 SET UNIQUE_CHECKS=@OLD_UNIQUE_CHECKS */;
+/*!40101 SET CHARACTER_SET_CLIENT=@OLD_CHARACTER_SET_CLIENT */;
+/*!40101 SET CHARACTER_SET_RESULTS=@OLD_CHARACTER_SET_RESULTS */;
+/*!40101 SET COLLATION_CONNECTION=@OLD_COLLATION_CONNECTION */;
+/*!40111 SET SQL_NOTES=@OLD_SQL_NOTES */;
+
+3
+==== mysqldump with --innodb-stats-on-metadata ====
+
+/*!40101 SET @OLD_CHARACTER_SET_CLIENT=@@CHARACTER_SET_CLIENT */;
+/*!40101 SET @OLD_CHARACTER_SET_RESULTS=@@CHARACTER_SET_RESULTS */;
+/*!40101 SET @OLD_COLLATION_CONNECTION=@@COLLATION_CONNECTION */;
+/*!40101 SET NAMES utf8 */;
+/*!40103 SET @OLD_TIME_ZONE=@@TIME_ZONE */;
+/*!40103 SET TIME_ZONE='+00:00' */;
+/*!40014 SET @OLD_UNIQUE_CHECKS=@@UNIQUE_CHECKS, UNIQUE_CHECKS=0 */;
+/*!40014 SET @OLD_FOREIGN_KEY_CHECKS=@@FOREIGN_KEY_CHECKS, FOREIGN_KEY_CHECKS=0 */;
+/*!40101 SET @OLD_SQL_MODE=@@SQL_MODE, SQL_MODE='NO_AUTO_VALUE_ON_ZERO' */;
+/*!40111 SET @OLD_SQL_NOTES=@@SQL_NOTES, SQL_NOTES=0 */;
+-- CHANGE MASTER TO MASTER_LOG_FILE='master-bin.000001', MASTER_LOG_POS=BINLOG_START;
+-- SET GLOBAL gtid_slave_pos='0-1-18';
+DROP TABLE IF EXISTS `r1`;
+/*!40101 SET @saved_cs_client = @@character_set_client */;
+/*!40101 SET character_set_client = utf8 */;
+CREATE TABLE `r1` (
+ `id1` int(11) NOT NULL,
+ `id2` int(11) NOT NULL,
+ `id3` varchar(100) NOT NULL,
+ `id4` int(11) NOT NULL,
+ `value1` int(11) DEFAULT NULL,
+ `value2` int(11) DEFAULT NULL,
+ `value3` int(11) DEFAULT NULL,
+ `value4` int(11) DEFAULT NULL,
+ PRIMARY KEY (`id1`,`id2`,`id3`,`id4`),
+ KEY `value1` (`value1`,`value2`,`value3`)
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1;
+/*!40101 SET character_set_client = @saved_cs_client */;
+/* ORDERING KEY : (null) */;
+
+LOCK TABLES `r1` WRITE;
+/*!40000 ALTER TABLE `r1` DISABLE KEYS */;
+INSERT INTO `r1` VALUES (1,1,'1',1,1,1,1,1),(1,1,'1',2,2,2,2,2),(1,1,'2',1,3,3,3,3),(1,1,'2',2,4,4,4,4),(1,2,'1',1,5,5,5,5),(1,2,'1',2,6,6,6,6),(1,2,'2',1,7,7,7,7),(1,2,'2',2,8,8,8,8),(2,1,'1',1,9,9,9,9),(2,1,'1',2,10,10,10,10),(2,1,'2',1,11,11,11,11),(2,1,'2',2,12,12,12,12),(2,2,'1',1,13,13,13,13),(2,2,'1',2,14,14,14,14),(2,2,'2',1,15,15,15,15),(2,2,'2',2,16,16,16,16);
+/*!40000 ALTER TABLE `r1` ENABLE KEYS */;
+UNLOCK TABLES;
+/*!40103 SET TIME_ZONE=@OLD_TIME_ZONE */;
+
+/*!40101 SET SQL_MODE=@OLD_SQL_MODE */;
+/*!40014 SET FOREIGN_KEY_CHECKS=@OLD_FOREIGN_KEY_CHECKS */;
+/*!40014 SET UNIQUE_CHECKS=@OLD_UNIQUE_CHECKS */;
+/*!40101 SET CHARACTER_SET_CLIENT=@OLD_CHARACTER_SET_CLIENT */;
+/*!40101 SET CHARACTER_SET_RESULTS=@OLD_CHARACTER_SET_RESULTS */;
+/*!40101 SET COLLATION_CONNECTION=@OLD_COLLATION_CONNECTION */;
+/*!40111 SET SQL_NOTES=@OLD_SQL_NOTES */;
+
+SET GLOBAL binlog_format=statement;
+SET GLOBAL binlog_format=row;
+drop table r1;
+reset master;
+set @@global.default_storage_engine=@save_default_storage_engine;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/mysqldump2.result b/storage/rocksdb/mysql-test/rocksdb/r/mysqldump2.result
new file mode 100644
index 00000000000..11c1f370e7a
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/mysqldump2.result
@@ -0,0 +1,16 @@
+DROP TABLE IF EXISTS t1;
+create table t1 (id int primary key, value int, value2 varchar(200), index(value)) engine=rocksdb;
+optimize table t1;
+Table Op Msg_type Msg_text
+test.t1 optimize status OK
+select variable_value into @a from information_schema.global_status where variable_name='rocksdb_block_cache_add';
+select case when variable_value - @a > 20 then 'true' else 'false' end from information_schema.global_status where variable_name='rocksdb_block_cache_add';
+case when variable_value - @a > 20 then 'true' else 'false' end
+false
+select count(*) from t1;
+count(*)
+50000
+select case when variable_value - @a > 100 then 'true' else 'false' end from information_schema.global_status where variable_name='rocksdb_block_cache_add';
+case when variable_value - @a > 100 then 'true' else 'false' end
+true
+DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/native_procedure.result b/storage/rocksdb/mysql-test/rocksdb/r/native_procedure.result
new file mode 100644
index 00000000000..725b74e1291
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/native_procedure.result
@@ -0,0 +1,397 @@
+create database linkdb;
+use linkdb;
+create table linktable (
+id1 bigint(20) unsigned NOT NULL DEFAULT '0',
+id2 bigint(20) unsigned NOT NULL DEFAULT '0',
+link_type bigint(20) unsigned NOT NULL DEFAULT '0',
+visibility tinyint(3) NOT NULL DEFAULT '0',
+data varchar(255) NOT NULL DEFAULT '',
+time bigint(20) unsigned NOT NULL DEFAULT '0',
+version int(11) unsigned NOT NULL DEFAULT '0',
+primary key (id1,id2,link_type) COMMENT 'cf_link_pk',
+KEY id1_type (id1,link_type,visibility,time,version,data)
+COMMENT 'rev:cf_link_id1_type') ENGINE=rocksdb DEFAULT COLLATE=latin1_bin;
+create table counttable (
+id bigint(20) unsigned NOT NULL DEFAULT '0',
+link_type bigint(20) unsigned NOT NULL DEFAULT '0',
+count int(10) unsigned NOT NULL DEFAULT '0',
+time bigint(20) unsigned NOT NULL DEFAULT '0',
+version bigint(20) unsigned NOT NULL DEFAULT '0',
+primary key (id,link_type) COMMENT 'cf_count_pk')
+ENGINE=rocksdb DEFAULT COLLATE=latin1_bin;
+create table nodetable (
+id bigint(20) unsigned NOT NULL AUTO_INCREMENT,
+type int(10) unsigned NOT NULL,
+version bigint(20) unsigned NOT NULL,
+time int(10) unsigned NOT NULL,
+data mediumtext NOT NULL,
+primary key(id) COMMENT 'cf_node_pk')
+ENGINE=rocksdb DEFAULT COLLATE=latin1_bin;
+#
+# Test nodeGet function
+#
+create native procedure nodeGet soname "NP_EXAMPLE_LIB";
+%nodeGet 1;
+id type version time data
+1 1 1 1000 data
+%nodeGet 50 anything can go here;
+id type version time data
+50 1 1 50000 data
+%nodeGet 39;
+id type version time data
+39 1 1 39000 data
+%nodeGet 98;
+id type version time data
+98 1 1 98000 data
+%nodeGet 1000;
+id type version time data
+%nodeGet -1;
+ERROR HY000: Native procedure failed. (code: 7, msg: 'Invalid arguments: Conversion failed for field id.', query 'nodeGet -1')
+%nodeGet asdf;
+ERROR HY000: Native procedure failed. (code: 7, msg: 'Invalid arguments: Conversion failed for field id.', query 'nodeGet asdf')
+#
+# Test linkGetRange/linkGetId2s function
+#
+create native procedure linkGetRange soname "NP_EXAMPLE_LIB";
+create native procedure linkGetId2s soname "NP_EXAMPLE_LIB";
+%linkGetRange 1 1 1000 2000 0 1000;
+id1 id2 link_type visibility data time version
+1 5 1 1 data 1005 1
+1 4 1 1 data 1004 1
+1 3 1 1 data 1003 1
+1 2 1 1 data 1002 1
+1 1 1 1 data 1001 1
+%linkGetRange 1 2 1000 2000 0 1000;
+id1 id2 link_type visibility data time version
+%linkGetRange 1 1 5000 2000 0 1000;
+id1 id2 link_type visibility data time version
+%linkGetRange 1 2 1000 6000 0 5;
+id1 id2 link_type visibility data time version
+1 5 2 1 data 2005 1
+1 4 2 1 data 2004 1
+1 3 2 1 data 2003 1
+1 2 2 1 data 2002 1
+1 1 2 1 data 2001 1
+%linkGetRange 1 2 1000 6000 0 2;
+id1 id2 link_type visibility data time version
+1 5 2 1 data 2005 1
+1 4 2 1 data 2004 1
+%linkGetRange 1 2 1000 6000 2 2;
+id1 id2 link_type visibility data time version
+1 3 2 1 data 2003 1
+1 2 2 1 data 2002 1
+%linkGetId2s 1 3 3 1 2 3;
+id1 id2 link_type visibility data time version
+1 1 3 1 data 3001 1
+1 2 3 1 data 3002 1
+1 3 3 1 data 3003 1
+%linkGetId2s 1 3 3 3 2 1;
+id1 id2 link_type visibility data time version
+1 3 3 1 data 3003 1
+1 2 3 1 data 3002 1
+1 1 3 1 data 3001 1
+%linkGetId2s 1 3 3 3 2 10;
+id1 id2 link_type visibility data time version
+1 3 3 1 data 3003 1
+1 2 3 1 data 3002 1
+%linkGetId2s 1 3 3 3 2 1 asdf;
+id1 id2 link_type visibility data time version
+1 3 3 1 data 3003 1
+1 2 3 1 data 3002 1
+1 1 3 1 data 3001 1
+%linkGetId2s 1 3 0;
+id1 id2 link_type visibility data time version
+%linkGetId2s 1 3 4 2;
+ERROR HY000: Incorrect arguments to native procedure. (query 'linkGetId2s 1 3 4 2')
+#
+# Test rangeQuery function
+#
+create native procedure rangeQuery soname "NP_EXAMPLE_LIB";
+%rangeQuery 1 0 0 4 id1 1 link_type 1 visibility 1 time 1001 4 id1 1 link_type 1 visibility 1 time 1005;
+id1 id2 link_type visibility data time version
+1 1 1 1 data 1001 1
+1 2 1 1 data 1002 1
+1 3 1 1 data 1003 1
+1 4 1 1 data 1004 1
+1 5 1 1 data 1005 1
+%rangeQuery 1 0 1 4 id1 1 link_type 1 visibility 1 time 1001 4 id1 1 link_type 1 visibility 1 time 1005;
+id1 id2 link_type visibility data time version
+1 1 1 1 data 1001 1
+1 2 1 1 data 1002 1
+1 3 1 1 data 1003 1
+1 4 1 1 data 1004 1
+%rangeQuery 1 1 0 4 id1 1 link_type 1 visibility 1 time 1001 4 id1 1 link_type 1 visibility 1 time 1005;
+id1 id2 link_type visibility data time version
+1 2 1 1 data 1002 1
+1 3 1 1 data 1003 1
+1 4 1 1 data 1004 1
+1 5 1 1 data 1005 1
+%rangeQuery 1 1 1 4 id1 1 link_type 1 visibility 1 time 1001 4 id1 1 link_type 1 visibility 1 time 1005;
+id1 id2 link_type visibility data time version
+1 2 1 1 data 1002 1
+1 3 1 1 data 1003 1
+1 4 1 1 data 1004 1
+%rangeQuery 0 0 0 4 id1 1 link_type 1 visibility 1 time 1001 4 id1 1 link_type 1 visibility 1 time 1005;
+id1 id2 link_type visibility data time version
+1 5 1 1 data 1005 1
+1 4 1 1 data 1004 1
+1 3 1 1 data 1003 1
+1 2 1 1 data 1002 1
+1 1 1 1 data 1001 1
+%rangeQuery 0 0 1 4 id1 1 link_type 1 visibility 1 time 1001 4 id1 1 link_type 1 visibility 1 time 1005;
+id1 id2 link_type visibility data time version
+1 4 1 1 data 1004 1
+1 3 1 1 data 1003 1
+1 2 1 1 data 1002 1
+1 1 1 1 data 1001 1
+%rangeQuery 0 1 0 4 id1 1 link_type 1 visibility 1 time 1001 4 id1 1 link_type 1 visibility 1 time 1005;
+id1 id2 link_type visibility data time version
+1 5 1 1 data 1005 1
+1 4 1 1 data 1004 1
+1 3 1 1 data 1003 1
+1 2 1 1 data 1002 1
+%rangeQuery 0 1 1 4 id1 1 link_type 1 visibility 1 time 1001 4 id1 1 link_type 1 visibility 1 time 1005;
+id1 id2 link_type visibility data time version
+1 4 1 1 data 1004 1
+1 3 1 1 data 1003 1
+1 2 1 1 data 1002 1
+%rangeQuery 1 0 0 2 id1 1 link_type 1 2 id1 1 link_type 2;
+id1 id2 link_type visibility data time version
+1 1 1 1 data 1001 1
+1 2 1 1 data 1002 1
+1 3 1 1 data 1003 1
+1 4 1 1 data 1004 1
+1 5 1 1 data 1005 1
+1 1 2 1 data 2001 1
+1 2 2 1 data 2002 1
+1 3 2 1 data 2003 1
+1 4 2 1 data 2004 1
+1 5 2 1 data 2005 1
+%rangeQuery 1 0 1 2 id1 1 link_type 1 2 id1 1 link_type 2;
+id1 id2 link_type visibility data time version
+1 1 1 1 data 1001 1
+1 2 1 1 data 1002 1
+1 3 1 1 data 1003 1
+1 4 1 1 data 1004 1
+1 5 1 1 data 1005 1
+%rangeQuery 1 1 0 2 id1 1 link_type 1 2 id1 1 link_type 2;
+id1 id2 link_type visibility data time version
+1 1 2 1 data 2001 1
+1 2 2 1 data 2002 1
+1 3 2 1 data 2003 1
+1 4 2 1 data 2004 1
+1 5 2 1 data 2005 1
+%rangeQuery 1 1 1 2 id1 1 link_type 1 2 id1 1 link_type 2;
+id1 id2 link_type visibility data time version
+%rangeQuery 0 0 0 2 id1 1 link_type 1 2 id1 1 link_type 2;
+id1 id2 link_type visibility data time version
+1 5 2 1 data 2005 1
+1 4 2 1 data 2004 1
+1 3 2 1 data 2003 1
+1 2 2 1 data 2002 1
+1 1 2 1 data 2001 1
+1 5 1 1 data 1005 1
+1 4 1 1 data 1004 1
+1 3 1 1 data 1003 1
+1 2 1 1 data 1002 1
+1 1 1 1 data 1001 1
+%rangeQuery 0 0 1 2 id1 1 link_type 1 2 id1 1 link_type 2;
+id1 id2 link_type visibility data time version
+1 5 1 1 data 1005 1
+1 4 1 1 data 1004 1
+1 3 1 1 data 1003 1
+1 2 1 1 data 1002 1
+1 1 1 1 data 1001 1
+%rangeQuery 0 1 0 2 id1 1 link_type 1 2 id1 1 link_type 2;
+id1 id2 link_type visibility data time version
+1 5 2 1 data 2005 1
+1 4 2 1 data 2004 1
+1 3 2 1 data 2003 1
+1 2 2 1 data 2002 1
+1 1 2 1 data 2001 1
+%rangeQuery 0 1 1 2 id1 1 link_type 1 2 id1 1 link_type 2;
+id1 id2 link_type visibility data time version
+%rangeQuery 0 0 0 2 id1 1 link_type 1 4 id1 1 link_type 2 visibility 1 time 2004;
+id1 id2 link_type visibility data time version
+1 4 2 1 data 2004 1
+1 3 2 1 data 2003 1
+1 2 2 1 data 2002 1
+1 1 2 1 data 2001 1
+1 5 1 1 data 1005 1
+1 4 1 1 data 1004 1
+1 3 1 1 data 1003 1
+1 2 1 1 data 1002 1
+1 1 1 1 data 1001 1
+%rangeQuery 0 0 1 2 id1 1 link_type 1 4 id1 1 link_type 2 visibility 1 time 2004;
+id1 id2 link_type visibility data time version
+1 3 2 1 data 2003 1
+1 2 2 1 data 2002 1
+1 1 2 1 data 2001 1
+1 5 1 1 data 1005 1
+1 4 1 1 data 1004 1
+1 3 1 1 data 1003 1
+1 2 1 1 data 1002 1
+1 1 1 1 data 1001 1
+%rangeQuery 0 1 0 2 id1 1 link_type 1 4 id1 1 link_type 2 visibility 1 time 2004;
+id1 id2 link_type visibility data time version
+1 4 2 1 data 2004 1
+1 3 2 1 data 2003 1
+1 2 2 1 data 2002 1
+1 1 2 1 data 2001 1
+%rangeQuery 0 1 1 2 id1 1 link_type 1 4 id1 1 link_type 2 visibility 1 time 2004;
+id1 id2 link_type visibility data time version
+1 3 2 1 data 2003 1
+1 2 2 1 data 2002 1
+1 1 2 1 data 2001 1
+#
+# Test countGet function
+#
+create native procedure countGet soname "NP_EXAMPLE_LIB";
+%countGet 1 1;
+count
+2
+%countGet 10 1;
+count
+20
+%countGet 111 1;
+count
+%countGet 1 111;
+count
+%countGet -1 1 1;
+ERROR HY000: Native procedure failed. (code: 7, msg: 'Invalid arguments: Conversion failed for field id.', query 'countGet -1 1 1')
+%countGet -1 1 2;
+ERROR HY000: Native procedure failed. (code: 7, msg: 'Invalid arguments: Conversion failed for field id.', query 'countGet -1 1 2')
+%countGet;
+ERROR HY000: Incorrect arguments to native procedure. (query 'countGet')
+#
+# Check that DDL locks are respected.
+#
+create native procedure sleepRange soname "NP_EXAMPLE_LIB";
+%sleepRange 1;
+set @start_lock_wait_timeout = @@session.lock_wait_timeout;
+set lock_wait_timeout = 1;
+drop table counttable;
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on table metadata: linkdb.counttable
+set lock_wait_timeout = @start_lock_wait_timeout;
+count
+2
+4
+6
+8
+10
+12
+14
+16
+18
+20
+#
+# Check that queries can be killed.
+#
+%sleepRange 0;
+kill query ID;
+ERROR 70100: Query execution was interrupted
+#
+# Check that native procedures work properly with transactions.
+#
+use linkdb;
+set session transaction isolation level repeatable read;
+%countGet 1 1;
+count
+2
+begin;
+select count from counttable where id = 1 and link_type = 1;
+count
+2
+%countGet 1 1;
+count
+2
+# Before update
+%countGet 1 1;
+count
+2
+update counttable set count = count + 1 where id = 1 and link_type = 1;
+# After update
+%countGet 1 1;
+count
+3
+# Unchanged due to consistent reads
+%countGet 1 1;
+count
+2
+#
+# Check index reads on prefixed data.
+#
+alter table linktable drop index id1_type;
+alter table linktable
+add index id1_type (id1,link_type,visibility,time,version,data(1))
+COMMENT 'rev:cf_link_id1_type';
+%linkGetRange 1 1 1000 2000 0 1000;
+id1 id2 link_type visibility data time version
+1 5 1 1 data 1005 1
+1 4 1 1 data 1004 1
+1 3 1 1 data 1003 1
+1 2 1 1 data 1002 1
+1 1 1 1 data 1001 1
+#
+# Check correct error handling for various scenarios.
+#
+create native procedure invalidKey1 soname "NP_EXAMPLE_LIB";
+%invalidKey1;
+ERROR HY000: Native procedure failed. (code: 6, msg: 'Not found: ', query 'invalidKey1')
+create native procedure invalidOpen1 soname "NP_EXAMPLE_LIB";
+%invalidOpen1;
+ERROR HY000: Native procedure failed. (code: 5, msg: 'Cannot reinitialize: ', query 'invalidOpen1')
+create native procedure invalidOpen2 soname "NP_EXAMPLE_LIB";
+%invalidOpen2;
+ERROR HY000: Native procedure failed. (code: 5, msg: 'Cannot reinitialize: ', query 'invalidOpen2')
+create native procedure invalidOpen3 soname "NP_EXAMPLE_LIB";
+%invalidOpen3;
+ERROR HY000: Native procedure failed. (code: 5, msg: 'Cannot reinitialize: ', query 'invalidOpen3')
+create native procedure invalidOpen4 soname "NP_EXAMPLE_LIB";
+%invalidOpen4;
+ERROR HY000: Native procedure failed. (code: 5, msg: 'Cannot reinitialize: ', query 'invalidOpen4')
+%invalidProcedure;
+ERROR HY000: Unknown native procedure. 'invalidProcedure'
+create native procedure invalidProcedure soname "invalid.so";
+ERROR HY000: Can't open shared library
+create native procedure invalidProcedure soname "NP_EXAMPLE_LIB";
+ERROR HY000: Can't find symbol 'invalidProcedure' in library
+#
+# Check that our functions are reloaded after restart.
+#
+select * from mysql.native_proc order by name;
+name type dl lua
+countGet native np_example.so
+invalidKey1 native np_example.so
+invalidOpen1 native np_example.so
+invalidOpen2 native np_example.so
+invalidOpen3 native np_example.so
+invalidOpen4 native np_example.so
+linkGetId2s native np_example.so
+linkGetRange native np_example.so
+nodeGet native np_example.so
+rangeQuery native np_example.so
+sleepRange native np_example.so
+drop native procedure nodeGet;
+create native procedure nodeGet soname "NP_EXAMPLE_LIB";
+ERROR HY000: Native procedure 'nodeGet' exists.
+drop native procedure linkGetRange;
+drop native procedure linkGetId2s;
+drop native procedure countGet;
+drop native procedure sleepRange;
+drop native procedure rangeQuery;
+drop native procedure invalidKey1;
+drop native procedure invalidOpen1;
+drop native procedure invalidOpen2;
+drop native procedure invalidOpen3;
+drop native procedure invalidOpen4;
+%nodeGet 1;
+ERROR HY000: Unknown native procedure. 'nodeGet'
+#
+# Check that our functions are unloaded after restart.
+#
+select * from mysql.native_proc order by name;
+name type dl lua
+%nodeGet 1;
+ERROR HY000: Unknown native procedure. 'nodeGet'
+drop database linkdb;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/negative_stats.result b/storage/rocksdb/mysql-test/rocksdb/r/negative_stats.result
new file mode 100644
index 00000000000..61c1d7e9bdb
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/negative_stats.result
@@ -0,0 +1,9 @@
+DROP TABLE IF EXISTS t1;
+CREATE TABLE t1 (i1 INT, PRIMARY KEY (i1)) ENGINE = ROCKSDB;
+SET GLOBAL ROCKSDB_FORCE_FLUSH_MEMTABLE_NOW=1;
+set session debug_dbug= "+d,myrocks_simulate_negative_stats";
+SELECT CASE WHEN DATA_LENGTH < 1024 * 1024 THEN 'true' ELSE 'false' END FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_NAME = 't1';
+CASE WHEN DATA_LENGTH < 1024 * 1024 THEN 'true' ELSE 'false' END
+true
+set session debug_dbug= "-d,myrocks_simulate_negative_stats";
+DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/no_merge_sort.result b/storage/rocksdb/mysql-test/rocksdb/r/no_merge_sort.result
new file mode 100644
index 00000000000..3a631d2925b
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/no_merge_sort.result
@@ -0,0 +1,63 @@
+Warnings:
+Note 1051 Unknown table 'test.ti_nk'
+skip_merge_sort
+true
+skip_merge_sort
+true
+skip_merge_sort
+true
+skip_merge_sort
+true
+skip_merge_sort
+true
+skip_merge_sort
+true
+skip_merge_sort
+true
+skip_merge_sort
+true
+skip_merge_sort
+true
+skip_merge_sort
+true
+skip_merge_sort
+true
+skip_merge_sort
+true
+skip_merge_sort
+true
+skip_merge_sort
+true
+skip_merge_sort
+true
+skip_merge_sort
+true
+skip_merge_sort
+true
+skip_merge_sort
+true
+skip_merge_sort
+true
+skip_merge_sort
+true
+skip_merge_sort
+true
+skip_merge_sort
+true
+skip_merge_sort
+true
+skip_merge_sort
+true
+skip_merge_sort
+true
+skip_merge_sort
+true
+skip_merge_sort
+true
+skip_merge_sort
+true
+skip_merge_sort
+true
+skip_merge_sort
+true
+DROP TABLE ti_nk;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/optimize_myrocks_replace_into_base.result b/storage/rocksdb/mysql-test/rocksdb/r/optimize_myrocks_replace_into_base.result
new file mode 100644
index 00000000000..12223ebf228
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/optimize_myrocks_replace_into_base.result
@@ -0,0 +1,98 @@
+SET @prior_rocksdb_perf_context_level = @@rocksdb_perf_context_level;
+SET GLOBAL rocksdb_perf_context_level=3;
+SET GLOBAL enable_blind_replace=ON;
+create table t1(c1 int,c2 int, primary key (c1)) engine=rocksdb;
+insert into t1 values(1,1),(2,2),(3,3);
+select * from t1;
+c1 c2
+1 1
+2 2
+3 3
+select variable_value into @c from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+replace into t1 values(1,11);
+select case when variable_value-@c > 1 then 'false' else 'true' end as read_free from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+read_free
+true
+drop table t1;
+create table t1(c1 int,c2 int, primary key (c1)) engine=rocksdb;
+create trigger trg before insert on t1 for each row set @a:=1;
+insert into t1 values(1,1),(2,2),(3,3);
+select * from t1;
+c1 c2
+1 1
+2 2
+3 3
+select variable_value into @c from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+replace into t1 values(1,11);
+select case when variable_value-@c > 1 then 'false' else 'true' end as read_free from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+read_free
+false
+drop table t1;
+create table t1(c1 int,c2 int) engine=rocksdb;
+insert into t1 values(1,1),(2,2),(3,3);
+select * from t1;
+c1 c2
+1 1
+2 2
+3 3
+select variable_value into @c from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+replace into t1 values(1,11);
+select case when variable_value-@c > 1 then 'false' else 'true' end as read_free from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+read_free
+true
+drop table t1;
+create table t1(c1 int,c2 int unique) engine=rocksdb;
+insert into t1 values(1,1),(2,2),(3,3);
+select * from t1;
+c1 c2
+1 1
+2 2
+3 3
+select variable_value into @c from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+replace into t1 values(1,11);
+select case when variable_value-@c > 1 then 'false' else 'true' end as read_free from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+read_free
+false
+drop table t1;
+create table t1(c1 int primary key,c2 int unique) engine=rocksdb;
+insert into t1 values(1,1),(2,2),(3,3);
+select * from t1;
+c1 c2
+1 1
+2 2
+3 3
+select variable_value into @c from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+replace into t1 values(1,11);
+select case when variable_value-@c > 1 then 'false' else 'true' end as read_free from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+read_free
+false
+drop table t1;
+create table t1(c1 int primary key,c2 int, key idx1(c2)) engine=rocksdb;
+insert into t1 values(1,1),(2,2),(3,3);
+select * from t1;
+c1 c2
+1 1
+2 2
+3 3
+select variable_value into @c from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+replace into t1 values(1,11);
+select case when variable_value-@c > 1 then 'false' else 'true' end as read_free from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+read_free
+false
+drop table t1;
+SET GLOBAL enable_blind_replace=OFF;
+create table t1(c1 int,c2 int, primary key (c1)) engine=rocksdb;
+insert into t1 values(1,1),(2,2),(3,3);
+select * from t1;
+c1 c2
+1 1
+2 2
+3 3
+select variable_value into @c from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+replace into t1 values(1,11);
+select case when variable_value-@c > 1 then 'false' else 'true' end as read_free from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+read_free
+false
+drop table t1;
+SET GLOBAL enable_blind_replace=DEFAULT;
+SET GLOBAL rocksdb_perf_context_level = @prior_rocksdb_perf_context_level;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/optimize_myrocks_replace_into_lock.result b/storage/rocksdb/mysql-test/rocksdb/r/optimize_myrocks_replace_into_lock.result
new file mode 100644
index 00000000000..65ee9768339
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/optimize_myrocks_replace_into_lock.result
@@ -0,0 +1,46 @@
+SET GLOBAL enable_blind_replace=ON;
+create table t1(c1 int,c2 int, primary key (c1)) engine=rocksdb;
+insert into t1 values(1,1),(2,2),(3,3);
+select * from t1;
+c1 c2
+1 1
+2 2
+3 3
+SELECT @@global.enable_blind_replace;
+@@global.enable_blind_replace
+1
+begin;
+replace into t1 values(1,11);
+SELECT @@global.enable_blind_replace;
+@@global.enable_blind_replace
+1
+begin;
+update t1 set c2=22 where c1=1;
+commit;
+# Reap update.
+commit;
+select * from t1;
+c1 c2
+1 22
+2 2
+3 3
+SELECT @@global.enable_blind_replace;
+@@global.enable_blind_replace
+1
+begin;
+update t1 set c2=55 where c1=1;
+SELECT @@global.enable_blind_replace;
+@@global.enable_blind_replace
+1
+begin;
+replace into t1 values(1,66);
+commit;
+# Reap replace into.
+commit;
+select * from t1;
+c1 c2
+1 66
+2 2
+3 3
+drop table t1;
+SET GLOBAL enable_blind_replace=DEFAULT;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/optimize_table.result b/storage/rocksdb/mysql-test/rocksdb/r/optimize_table.result
new file mode 100644
index 00000000000..8273fdbae9f
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/optimize_table.result
@@ -0,0 +1,77 @@
+DROP TABLE IF EXISTS t1,t2,t3,t4,t5,t6;
+create table t1 (id int primary key, value int, value2 varchar(200), index(value)) engine=rocksdb;
+create table t2 (id int primary key, value int, value2 varchar(200), index(value)) engine=rocksdb;
+create table t3 (id int primary key, value int, value2 varchar(200), index(value)) engine=rocksdb;
+create table t4 (id int, value int, value2 varchar(200), primary key (id) comment 'rev:cf_i', index(value) comment 'rev:cf_i') engine=rocksdb;
+create table t5 (id int, value int, value2 varchar(200), primary key (id) comment 'rev:cf_i', index(value) comment 'rev:cf_i') engine=rocksdb;
+create table t6 (id int, value int, value2 varchar(200), primary key (id) comment 'rev:cf_i', index(value) comment 'rev:cf_i') engine=rocksdb;
+select count(*) from t1;
+count(*)
+10000
+select count(*) from t2;
+count(*)
+10000
+select count(*) from t3;
+count(*)
+10000
+select count(*) from t4;
+count(*)
+10000
+select count(*) from t5;
+count(*)
+10000
+select count(*) from t6;
+count(*)
+10000
+delete from t1 where id <= 9900;
+delete from t2 where id <= 9900;
+delete from t3 where id <= 9900;
+delete from t4 where id <= 9900;
+delete from t5 where id <= 9900;
+delete from t6 where id <= 9900;
+optimize table t1;
+Table Op Msg_type Msg_text
+test.t1 optimize status OK
+sst file reduction ok
+optimize table t3;
+Table Op Msg_type Msg_text
+test.t3 optimize status OK
+sst file reduction ok
+optimize table t4;
+Table Op Msg_type Msg_text
+test.t4 optimize status OK
+sst file reduction ok
+optimize table t6;
+Table Op Msg_type Msg_text
+test.t6 optimize status OK
+sst file reduction ok
+select count(*) from t1;
+count(*)
+100
+select count(*) from t2;
+count(*)
+100
+select count(*) from t3;
+count(*)
+100
+select count(*) from t4;
+count(*)
+100
+select count(*) from t5;
+count(*)
+100
+select count(*) from t6;
+count(*)
+100
+optimize table t2;
+Table Op Msg_type Msg_text
+test.t2 optimize status OK
+optimize table t5;
+Table Op Msg_type Msg_text
+test.t5 optimize status OK
+DROP TABLE t1;
+DROP TABLE t2;
+DROP TABLE t3;
+DROP TABLE t4;
+DROP TABLE t5;
+DROP TABLE t6;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/optimizer_loose_index_scans.result b/storage/rocksdb/mysql-test/rocksdb/r/optimizer_loose_index_scans.result
new file mode 100644
index 00000000000..1fe61fe9fc5
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/optimizer_loose_index_scans.result
@@ -0,0 +1,281 @@
+set optimizer_switch='index_merge_sort_union=off';
+create table t (a int, b int, c int, d int, e int, primary key(a, b, c, d), key(b, d)) engine=rocksdb;
+analyze table t;
+Table Op Msg_type Msg_text
+test.t analyze status OK
+show indexes from t;
+Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment Index_comment
+t 0 PRIMARY 1 a A 100 NULL NULL LSMTREE
+t 0 PRIMARY 2 b A 500 NULL NULL LSMTREE
+t 0 PRIMARY 3 c A 2500 NULL NULL LSMTREE
+t 0 PRIMARY 4 d A 2500 NULL NULL LSMTREE
+t 1 b 1 b A 50 NULL NULL LSMTREE
+t 1 b 2 d A 500 NULL NULL LSMTREE
+set optimizer_switch = 'skip_scan=off';
+explain select b, d from t where d < 2;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t index NULL b 8 NULL # Using where; Using index
+rows_read
+2500
+set optimizer_switch = 'skip_scan=on,skip_scan_cost_based=off';
+explain select b, d from t where d < 2;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t range PRIMARY,b b 8 NULL # Using where; Using index for skip scan
+rows_read
+260
+include/diff_tables.inc [temp_orig, temp_skip]
+set optimizer_switch = 'skip_scan=off,skip_scan_cost_based=on';
+set optimizer_switch = 'skip_scan=off';
+explain select b, d from t where d > 4;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t index NULL b 8 NULL # Using where; Using index
+rows_read
+2500
+set optimizer_switch = 'skip_scan=on,skip_scan_cost_based=off';
+explain select b, d from t where d > 4;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t range PRIMARY,b b 8 NULL # Using where; Using index for skip scan
+rows_read
+1505
+include/diff_tables.inc [temp_orig, temp_skip]
+set optimizer_switch = 'skip_scan=off,skip_scan_cost_based=on';
+set optimizer_switch = 'skip_scan=off';
+explain select a, b, c, d from t where a = 5 and d <= 3;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t ref PRIMARY PRIMARY 4 const # Using where; Using index
+rows_read
+250
+set optimizer_switch = 'skip_scan=on,skip_scan_cost_based=off';
+explain select a, b, c, d from t where a = 5 and d <= 3;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t range PRIMARY,b PRIMARY 16 NULL # Using where; Using index for skip scan
+rows_read
+126
+include/diff_tables.inc [temp_orig, temp_skip]
+set optimizer_switch = 'skip_scan=off,skip_scan_cost_based=on';
+set optimizer_switch = 'skip_scan=off';
+explain select e from t where a = 5 and d <= 3;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t ref PRIMARY PRIMARY 4 const # Using where
+rows_read
+250
+set optimizer_switch = 'skip_scan=on,skip_scan_cost_based=off';
+explain select e from t where a = 5 and d <= 3;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t ref PRIMARY,b PRIMARY 4 const # Using where
+rows_read
+250
+include/diff_tables.inc [temp_orig, temp_skip]
+set optimizer_switch = 'skip_scan=off,skip_scan_cost_based=on';
+set optimizer_switch = 'skip_scan=off';
+explain select a, b, c, d from t where a = 5 and d >= 98;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t ref PRIMARY PRIMARY 4 const # Using where; Using index
+rows_read
+250
+set optimizer_switch = 'skip_scan=on,skip_scan_cost_based=off';
+explain select a, b, c, d from t where a = 5 and d >= 98;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t range PRIMARY,b PRIMARY 16 NULL # Using where; Using index for skip scan
+rows_read
+26
+include/diff_tables.inc [temp_orig, temp_skip]
+set optimizer_switch = 'skip_scan=off,skip_scan_cost_based=on';
+set optimizer_switch = 'skip_scan=off';
+explain select e from t where a = 5 and d >= 98;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t ref PRIMARY PRIMARY 4 const # Using where
+rows_read
+250
+set optimizer_switch = 'skip_scan=on,skip_scan_cost_based=off';
+explain select e from t where a = 5 and d >= 98;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t ref PRIMARY,b PRIMARY 4 const # Using where
+rows_read
+250
+include/diff_tables.inc [temp_orig, temp_skip]
+set optimizer_switch = 'skip_scan=off,skip_scan_cost_based=on';
+set optimizer_switch = 'skip_scan=off';
+explain select a, b, c, d from t where a in (1, 5) and d >= 98;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t range PRIMARY PRIMARY 4 NULL # Using where; Using index
+rows_read
+500
+set optimizer_switch = 'skip_scan=on,skip_scan_cost_based=off';
+explain select a, b, c, d from t where a in (1, 5) and d >= 98;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t range PRIMARY,b PRIMARY 16 NULL # Using where; Using index for skip scan
+rows_read
+52
+include/diff_tables.inc [temp_orig, temp_skip]
+set optimizer_switch = 'skip_scan=off,skip_scan_cost_based=on';
+set optimizer_switch = 'skip_scan=off';
+explain select a, b, c, d from t where a in (1, 3, 5) and d >= 98;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t range PRIMARY PRIMARY 4 NULL # Using where; Using index
+rows_read
+750
+set optimizer_switch = 'skip_scan=on,skip_scan_cost_based=off';
+explain select a, b, c, d from t where a in (1, 3, 5) and d >= 98;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t range PRIMARY,b PRIMARY 16 NULL # Using where; Using index for skip scan
+rows_read
+78
+include/diff_tables.inc [temp_orig, temp_skip]
+set optimizer_switch = 'skip_scan=off,skip_scan_cost_based=on';
+set optimizer_switch = 'skip_scan=off';
+explain select a, b, c, d from t where a in (1, 5) and b in (1, 2) and d >= 98;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t range PRIMARY,b PRIMARY 8 NULL # Using where; Using index
+rows_read
+200
+set optimizer_switch = 'skip_scan=on,skip_scan_cost_based=off';
+explain select a, b, c, d from t where a in (1, 5) and b in (1, 2) and d >= 98;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t range PRIMARY,b PRIMARY 16 NULL # Using where; Using index for skip scan
+rows_read
+24
+include/diff_tables.inc [temp_orig, temp_skip]
+set optimizer_switch = 'skip_scan=off,skip_scan_cost_based=on';
+set optimizer_switch = 'skip_scan=off';
+explain select a, b, c, d from t where a in (1, 2, 3, 4, 5) and b in (1, 2, 3) and d >= 98;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t range PRIMARY,b PRIMARY 8 NULL # Using where; Using index
+rows_read
+750
+set optimizer_switch = 'skip_scan=on,skip_scan_cost_based=off';
+explain select a, b, c, d from t where a in (1, 2, 3, 4, 5) and b in (1, 2, 3) and d >= 98;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t range PRIMARY,b PRIMARY 16 NULL # Using where; Using index for skip scan
+rows_read
+90
+include/diff_tables.inc [temp_orig, temp_skip]
+set optimizer_switch = 'skip_scan=off,skip_scan_cost_based=on';
+set optimizer_switch = 'skip_scan=off';
+explain select a, b, c, d from t where a = 5 and b = 2 and d >= 98;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t ref PRIMARY,b PRIMARY 8 const,const # Using where; Using index
+rows_read
+50
+set optimizer_switch = 'skip_scan=on,skip_scan_cost_based=off';
+explain select a, b, c, d from t where a = 5 and b = 2 and d >= 98;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t range PRIMARY,b PRIMARY 16 NULL # Using where; Using index for skip scan
+rows_read
+6
+include/diff_tables.inc [temp_orig, temp_skip]
+set optimizer_switch = 'skip_scan=off,skip_scan_cost_based=on';
+set optimizer_switch = 'skip_scan=off';
+explain select a+1, b, c, d from t where a = 5 and d < 3;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t ref PRIMARY PRIMARY 4 const # Using where; Using index
+rows_read
+250
+set optimizer_switch = 'skip_scan=on,skip_scan_cost_based=off';
+explain select a+1, b, c, d from t where a = 5 and d < 3;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t range PRIMARY,b PRIMARY 16 NULL # Using where; Using index for skip scan
+rows_read
+101
+include/diff_tables.inc [temp_orig, temp_skip]
+set optimizer_switch = 'skip_scan=off,skip_scan_cost_based=on';
+set optimizer_switch = 'skip_scan=off';
+explain select b, c, d from t where a = 5 and d < 3;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t ref PRIMARY PRIMARY 4 const # Using where; Using index
+rows_read
+250
+set optimizer_switch = 'skip_scan=on,skip_scan_cost_based=off';
+explain select b, c, d from t where a = 5 and d < 3;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t range PRIMARY,b PRIMARY 16 NULL # Using where; Using index for skip scan
+rows_read
+101
+include/diff_tables.inc [temp_orig, temp_skip]
+set optimizer_switch = 'skip_scan=off,skip_scan_cost_based=on';
+set optimizer_switch = 'skip_scan=off';
+explain select a, b, c, d from t where a = b and d >= 98;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t index NULL b 8 NULL # Using where; Using index
+rows_read
+2500
+set optimizer_switch = 'skip_scan=on,skip_scan_cost_based=off';
+explain select a, b, c, d from t where a = b and d >= 98;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t range PRIMARY,b b 8 NULL # Using where; Using index for skip scan
+rows_read
+5
+include/diff_tables.inc [temp_orig, temp_skip]
+set optimizer_switch = 'skip_scan=off,skip_scan_cost_based=on';
+set optimizer_switch = 'skip_scan=on';
+set optimizer_trace = 'enabled=on';
+explain select a, b, c, d from t where a = 5 and d < 3 order by b, c, d;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t ref PRIMARY,b PRIMARY 4 const # Using where; Using index
+select count(*) from information_schema.optimizer_trace where trace like '%order_attribute_not_prefix_in_index%';
+count(*)
+1
+explain select a, b, c, d from t where a = 2 and d >= 98 and e = 2;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t ref PRIMARY PRIMARY 4 const # Using where
+select count(*) from information_schema.optimizer_trace where trace like '%query_references_nonkey_column%';
+count(*)
+1
+explain select a, b, c, d from t where a = 5 or b = 2 and d >= 98;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t index PRIMARY,b b 8 NULL # Using where; Using index
+select count(*) from information_schema.optimizer_trace where trace like '%no_range_tree%';
+count(*)
+1
+explain select a, b, c, d from t where a = 5 or b = 2 or d >= 98;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t index PRIMARY,b b 8 NULL # Using where; Using index
+select count(*) from information_schema.optimizer_trace where trace like '%no_range_tree%';
+count(*)
+1
+explain select a, b, c, d from t where a = 5 or d >= 98;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t index PRIMARY,b b 8 NULL # Using where; Using index
+select count(*) from information_schema.optimizer_trace where trace like '%no_range_tree%';
+count(*)
+1
+explain select a, b, c, d from t where ((a = 5 and b = 2) or a = 2) and d >= 98;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t range PRIMARY,b b 8 NULL # Using where; Using index for skip scan
+select count(*) from information_schema.optimizer_trace where trace like '%keypart_in_disjunctive_query%';
+count(*)
+1
+explain select a, b, c, d from t where a > 2 and d >= 98;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t range PRIMARY,b PRIMARY 4 NULL # Using where; Using index
+select count(*) from information_schema.optimizer_trace where trace like '%prefix_not_const_equality%';
+count(*)
+1
+explain select a, b, c, d from t where a = 2 and (d >= 98 or d < 2);
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t ref PRIMARY,b PRIMARY 4 const # Using where; Using index
+select count(*) from information_schema.optimizer_trace where trace like '%range_predicate_too_complex%';
+count(*)
+1
+explain select a, b, c, d from t where a = 2 and b = 2;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t ref PRIMARY,b PRIMARY 8 const,const # Using index
+select count(*) from information_schema.optimizer_trace where trace like '%no_range_predicate%';
+count(*)
+1
+explain select a, b, c, d from t where a = 2 and c > 2 and d < 2;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t ref PRIMARY,b PRIMARY 4 const # Using where; Using index
+select count(*) from information_schema.optimizer_trace where trace like '%too_many_range_predicates%';
+count(*)
+1
+explain select a, b, c, d from t where (a < 1 or a = 4 or a = 5) and b in (1, 2, 3) and d >= 98;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t range PRIMARY,b PRIMARY 8 NULL # Using where; Using index
+select count(*) from information_schema.optimizer_trace where trace like '%prefix_not_const_equality%';
+count(*)
+1
+set optimizer_trace = 'enabled=off';
+set optimizer_switch= 'skip_scan=off';
+drop table t;
+set optimizer_switch='index_merge_sort_union=on';
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/partition.result b/storage/rocksdb/mysql-test/rocksdb/r/partition.result
new file mode 100644
index 00000000000..1ba966e9e07
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/partition.result
@@ -0,0 +1,688 @@
+DROP TABLE IF EXISTS t1;
+DROP TABLE IF EXISTS VAR_POP;
+DROP TABLE IF EXISTS TEMP0;
+DROP TABLE IF EXISTS VAR_SAMP;
+DROP TABLE IF EXISTS ti;
+DROP TABLE IF EXISTS members;
+DROP TABLE IF EXISTS members_2;
+DROP TABLE IF EXISTS employees;
+DROP TABLE IF EXISTS employees_2;
+DROP TABLE IF EXISTS employees_3;
+DROP TABLE IF EXISTS quarterly_report_status;
+DROP TABLE IF EXISTS employees_4;
+DROP TABLE IF EXISTS h2;
+DROP TABLE IF EXISTS rcx;
+DROP TABLE IF EXISTS r1;
+DROP TABLE IF EXISTS rc1;
+DROP TABLE IF EXISTS rx;
+DROP TABLE IF EXISTS rc2;
+DROP TABLE IF EXISTS rc3;
+DROP TABLE IF EXISTS rc4;
+DROP TABLE IF EXISTS employees_by_lname;
+DROP TABLE IF EXISTS customers_1;
+DROP TABLE IF EXISTS customers_2;
+DROP TABLE IF EXISTS customers_3;
+DROP TABLE IF EXISTS employees_hash;
+DROP TABLE IF EXISTS employees_hash_1;
+DROP TABLE IF EXISTS t1_hash;
+DROP TABLE IF EXISTS employees_linear_hash;
+DROP TABLE IF EXISTS t1_linear_hash;
+DROP TABLE IF EXISTS k1;
+DROP TABLE IF EXISTS k2;
+DROP TABLE IF EXISTS tm1;
+DROP TABLE IF EXISTS tk;
+DROP TABLE IF EXISTS ts;
+DROP TABLE IF EXISTS ts_1;
+DROP TABLE IF EXISTS ts_3;
+DROP TABLE IF EXISTS ts_4;
+DROP TABLE IF EXISTS ts_5;
+DROP TABLE IF EXISTS trb3;
+DROP TABLE IF EXISTS tr;
+DROP TABLE IF EXISTS members_3;
+DROP TABLE IF EXISTS clients;
+DROP TABLE IF EXISTS clients_lk;
+DROP TABLE IF EXISTS trb1;
+CREATE TABLE t1 (i INT, j INT, k INT, PRIMARY KEY (i)) ENGINE = ROCKSDB PARTITION BY KEY(i) PARTITIONS 4;
+Table Op Msg_type Msg_text
+test.t1 optimize status OK
+Table Op Msg_type Msg_text
+test.t1 analyze status OK
+Table Op Msg_type Msg_text
+test.t1 repair status OK
+Table Op Msg_type Msg_text
+test.t1 check status OK
+select lower(table_name) as tname
+from information_schema.tables
+where table_schema=database()
+order by tname;
+tname
+t1
+temp0
+var_pop
+var_samp
+SELECT * FROM t1 ORDER BY i LIMIT 10;
+i j k
+1 1 1
+2 2 2
+3 3 3
+4 4 4
+5 5 5
+6 6 6
+7 7 7
+8 8 8
+9 9 9
+10 10 10
+SELECT COUNT(*) FROM t1;
+COUNT(*)
+1000
+CREATE TABLE ti(
+id INT,
+amount DECIMAL(7,2),
+tr_date DATE
+) ENGINE=ROCKSDB
+PARTITION BY HASH(MONTH(tr_date))
+PARTITIONS 6;
+CREATE TABLE members (
+firstname VARCHAR(25) NOT NULL,
+lastname VARCHAR(25) NOT NULL,
+username VARCHAR(16) NOT NULL,
+email VARCHAR(35),
+joined DATE NOT NULL
+) ENGINE=ROCKSDB
+PARTITION BY KEY(joined)
+PARTITIONS 6;
+CREATE TABLE members_2 (
+firstname VARCHAR(25) NOT NULL,
+lastname VARCHAR(25) NOT NULL,
+username VARCHAR(16) NOT NULL,
+email VARCHAR(35),
+joined DATE NOT NULL
+) ENGINE=ROCKSDB
+PARTITION BY RANGE(YEAR(joined)) (
+PARTITION p0 VALUES LESS THAN (1960),
+PARTITION p1 VALUES LESS THAN (1970),
+PARTITION p2 VALUES LESS THAN (1980),
+PARTITION p3 VALUES LESS THAN (1990),
+PARTITION p4 VALUES LESS THAN MAXVALUE
+);
+CREATE TABLE t2 (val INT)
+ENGINE=ROCKSDB
+PARTITION BY LIST(val)(
+PARTITION mypart VALUES IN (1,3,5),
+PARTITION MyPart VALUES IN (2,4,6)
+);
+ERROR HY000: Duplicate partition name MyPart
+CREATE TABLE employees (
+id INT NOT NULL,
+fname VARCHAR(30),
+lname VARCHAR(30),
+hired DATE NOT NULL DEFAULT '1970-01-01',
+separated DATE NOT NULL DEFAULT '9999-12-31',
+job_code INT NOT NULL,
+store_id INT NOT NULL
+) ENGINE=ROCKSDB
+PARTITION BY RANGE (store_id) (
+PARTITION p0 VALUES LESS THAN (6),
+PARTITION p1 VALUES LESS THAN (11),
+PARTITION p2 VALUES LESS THAN (16),
+PARTITION p3 VALUES LESS THAN MAXVALUE
+);
+CREATE TABLE employees_2 (
+id INT NOT NULL,
+fname VARCHAR(30),
+lname VARCHAR(30),
+hired DATE NOT NULL DEFAULT '1970-01-01',
+separated DATE NOT NULL DEFAULT '9999-12-31',
+job_code INT NOT NULL,
+store_id INT NOT NULL
+) ENGINE=ROCKSDB
+PARTITION BY RANGE (job_code) (
+PARTITION p0 VALUES LESS THAN (100),
+PARTITION p1 VALUES LESS THAN (1000),
+PARTITION p2 VALUES LESS THAN (10000)
+);
+CREATE TABLE employees_3 (
+id INT NOT NULL,
+fname VARCHAR(30),
+lname VARCHAR(30),
+hired DATE NOT NULL DEFAULT '1970-01-01',
+separated DATE NOT NULL DEFAULT '9999-12-31',
+job_code INT,
+store_id INT
+) ENGINE=ROCKSDB
+PARTITION BY RANGE (YEAR(separated)) (
+PARTITION p0 VALUES LESS THAN (1991),
+PARTITION p1 VALUES LESS THAN (1996),
+PARTITION p2 VALUES LESS THAN (2001),
+PARTITION p3 VALUES LESS THAN MAXVALUE
+);
+CREATE TABLE quarterly_report_status (
+report_id INT NOT NULL,
+report_status VARCHAR(20) NOT NULL,
+report_updated TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP
+) ENGINE=ROCKSDB
+PARTITION BY RANGE (UNIX_TIMESTAMP(report_updated)) (
+PARTITION p0 VALUES LESS THAN ( UNIX_TIMESTAMP('2008-01-01 00:00:00') ),
+PARTITION p1 VALUES LESS THAN ( UNIX_TIMESTAMP('2008-04-01 00:00:00') ),
+PARTITION p2 VALUES LESS THAN ( UNIX_TIMESTAMP('2008-07-01 00:00:00') ),
+PARTITION p3 VALUES LESS THAN ( UNIX_TIMESTAMP('2008-10-01 00:00:00') ),
+PARTITION p4 VALUES LESS THAN ( UNIX_TIMESTAMP('2009-01-01 00:00:00') ),
+PARTITION p5 VALUES LESS THAN ( UNIX_TIMESTAMP('2009-04-01 00:00:00') ),
+PARTITION p6 VALUES LESS THAN ( UNIX_TIMESTAMP('2009-07-01 00:00:00') ),
+PARTITION p7 VALUES LESS THAN ( UNIX_TIMESTAMP('2009-10-01 00:00:00') ),
+PARTITION p8 VALUES LESS THAN ( UNIX_TIMESTAMP('2010-01-01 00:00:00') ),
+PARTITION p9 VALUES LESS THAN (MAXVALUE)
+);
+CREATE TABLE employees_4 (
+id INT NOT NULL,
+fname VARCHAR(30),
+lname VARCHAR(30),
+hired DATE NOT NULL DEFAULT '1970-01-01',
+separated DATE NOT NULL DEFAULT '9999-12-31',
+job_code INT,
+store_id INT
+) ENGINE=ROCKSDB
+PARTITION BY LIST(store_id) (
+PARTITION pNorth VALUES IN (3,5,6,9,17),
+PARTITION pEast VALUES IN (1,2,10,11,19,20),
+PARTITION pWest VALUES IN (4,12,13,14,18),
+PARTITION pCentral VALUES IN (7,8,15,16)
+);
+CREATE TABLE h2 (
+c1 INT,
+c2 INT
+) ENGINE=ROCKSDB
+PARTITION BY LIST(c1) (
+PARTITION p0 VALUES IN (1, 4, 7),
+PARTITION p1 VALUES IN (2, 5, 8)
+);
+INSERT INTO h2 VALUES (3, 5);
+ERROR HY000: Table has no partition for value 3
+CREATE TABLE rcx (
+a INT,
+b INT,
+c CHAR(3),
+d INT
+) ENGINE=ROCKSDB
+PARTITION BY RANGE COLUMNS(a,d,c) (
+PARTITION p0 VALUES LESS THAN (5,10,'ggg'),
+PARTITION p1 VALUES LESS THAN (10,20,'mmm'),
+PARTITION p2 VALUES LESS THAN (15,30,'sss'),
+PARTITION p3 VALUES LESS THAN (MAXVALUE,MAXVALUE,MAXVALUE)
+);
+CREATE TABLE r1 (
+a INT,
+b INT
+) ENGINE=ROCKSDB
+PARTITION BY RANGE (a) (
+PARTITION p0 VALUES LESS THAN (5),
+PARTITION p1 VALUES LESS THAN (MAXVALUE)
+);
+INSERT INTO r1 VALUES (5,10), (5,11), (5,12);
+CREATE TABLE rc1 (
+a INT,
+b INT
+) ENGINE=ROCKSDB
+PARTITION BY RANGE COLUMNS(a, b) (
+PARTITION p0 VALUES LESS THAN (5, 12),
+PARTITION p3 VALUES LESS THAN (MAXVALUE, MAXVALUE)
+);
+INSERT INTO rc1 VALUES (5,10), (5,11), (5,12);
+SELECT (5,10) < (5,12), (5,11) < (5,12), (5,12) < (5,12);
+(5,10) < (5,12) (5,11) < (5,12) (5,12) < (5,12)
+1 1 0
+CREATE TABLE rx (
+a INT,
+b INT
+) ENGINE=ROCKSDB
+PARTITION BY RANGE COLUMNS (a) (
+PARTITION p0 VALUES LESS THAN (5),
+PARTITION p1 VALUES LESS THAN (MAXVALUE)
+);
+INSERT INTO rx VALUES (5,10), (5,11), (5,12);
+CREATE TABLE rc2 (
+a INT,
+b INT
+) ENGINE=ROCKSDB
+PARTITION BY RANGE COLUMNS(a,b) (
+PARTITION p0 VALUES LESS THAN (0,10),
+PARTITION p1 VALUES LESS THAN (10,20),
+PARTITION p2 VALUES LESS THAN (10,30),
+PARTITION p3 VALUES LESS THAN (MAXVALUE,MAXVALUE)
+);
+CREATE TABLE rc3 (
+a INT,
+b INT
+) ENGINE=ROCKSDB
+PARTITION BY RANGE COLUMNS(a,b) (
+PARTITION p0 VALUES LESS THAN (0,10),
+PARTITION p1 VALUES LESS THAN (10,20),
+PARTITION p2 VALUES LESS THAN (10,30),
+PARTITION p3 VALUES LESS THAN (10,35),
+PARTITION p4 VALUES LESS THAN (20,40),
+PARTITION p5 VALUES LESS THAN (MAXVALUE,MAXVALUE)
+);
+CREATE TABLE rc4 (
+a INT,
+b INT,
+c INT
+) ENGINE=ROCKSDB
+PARTITION BY RANGE COLUMNS(a,b,c) (
+PARTITION p0 VALUES LESS THAN (0,25,50),
+PARTITION p1 VALUES LESS THAN (10,20,100),
+PARTITION p2 VALUES LESS THAN (10,30,50),
+PARTITION p3 VALUES LESS THAN (MAXVALUE,MAXVALUE,MAXVALUE)
+);
+SELECT (0,25,50) < (10,20,100), (10,20,100) < (10,30,50);
+(0,25,50) < (10,20,100) (10,20,100) < (10,30,50)
+1 1
+CREATE TABLE rcf (
+a INT,
+b INT,
+c INT
+) ENGINE=ROCKSDB
+PARTITION BY RANGE COLUMNS(a,b,c) (
+PARTITION p0 VALUES LESS THAN (0,25,50),
+PARTITION p1 VALUES LESS THAN (20,20,100),
+PARTITION p2 VALUES LESS THAN (10,30,50),
+PARTITION p3 VALUES LESS THAN (MAXVALUE,MAXVALUE,MAXVALUE)
+);
+ERROR HY000: VALUES LESS THAN value must be strictly increasing for each partition
+CREATE TABLE employees_by_lname (
+id INT NOT NULL,
+fname VARCHAR(30),
+lname VARCHAR(30),
+hired DATE NOT NULL DEFAULT '1970-01-01',
+separated DATE NOT NULL DEFAULT '9999-12-31',
+job_code INT NOT NULL,
+store_id INT NOT NULL
+) ENGINE=ROCKSDB
+PARTITION BY RANGE COLUMNS (lname) (
+PARTITION p0 VALUES LESS THAN ('g'),
+PARTITION p1 VALUES LESS THAN ('m'),
+PARTITION p2 VALUES LESS THAN ('t'),
+PARTITION p3 VALUES LESS THAN (MAXVALUE)
+);
+ALTER TABLE employees_by_lname PARTITION BY RANGE COLUMNS (lname) (
+PARTITION p0 VALUES LESS THAN ('g'),
+PARTITION p1 VALUES LESS THAN ('m'),
+PARTITION p2 VALUES LESS THAN ('t'),
+PARTITION p3 VALUES LESS THAN (MAXVALUE)
+);
+ALTER TABLE employees_by_lname PARTITION BY RANGE COLUMNS (hired) (
+PARTITION p0 VALUES LESS THAN ('1970-01-01'),
+PARTITION p1 VALUES LESS THAN ('1980-01-01'),
+PARTITION p2 VALUES LESS THAN ('1990-01-01'),
+PARTITION p3 VALUES LESS THAN ('2000-01-01'),
+PARTITION p4 VALUES LESS THAN ('2010-01-01'),
+PARTITION p5 VALUES LESS THAN (MAXVALUE)
+);
+CREATE TABLE customers_1 (
+first_name VARCHAR(25),
+last_name VARCHAR(25),
+street_1 VARCHAR(30),
+street_2 VARCHAR(30),
+city VARCHAR(15),
+renewal DATE
+) ENGINE=ROCKSDB
+PARTITION BY LIST COLUMNS(city) (
+PARTITION pRegion_1 VALUES IN('Oskarshamn', 'Högsby', 'Mönsterås'),
+PARTITION pRegion_2 VALUES IN('Vimmerby', 'Hultsfred', 'Västervik'),
+PARTITION pRegion_3 VALUES IN('Nässjö', 'Eksjö', 'Vetlanda'),
+PARTITION pRegion_4 VALUES IN('Uppvidinge', 'Alvesta', 'Växjo')
+);
+CREATE TABLE customers_2 (
+first_name VARCHAR(25),
+last_name VARCHAR(25),
+street_1 VARCHAR(30),
+street_2 VARCHAR(30),
+city VARCHAR(15),
+renewal DATE
+) ENGINE=ROCKSDB
+PARTITION BY LIST COLUMNS(renewal) (
+PARTITION pWeek_1 VALUES IN('2010-02-01', '2010-02-02', '2010-02-03',
+'2010-02-04', '2010-02-05', '2010-02-06', '2010-02-07'),
+PARTITION pWeek_2 VALUES IN('2010-02-08', '2010-02-09', '2010-02-10',
+'2010-02-11', '2010-02-12', '2010-02-13', '2010-02-14'),
+PARTITION pWeek_3 VALUES IN('2010-02-15', '2010-02-16', '2010-02-17',
+'2010-02-18', '2010-02-19', '2010-02-20', '2010-02-21'),
+PARTITION pWeek_4 VALUES IN('2010-02-22', '2010-02-23', '2010-02-24',
+'2010-02-25', '2010-02-26', '2010-02-27', '2010-02-28')
+);
+CREATE TABLE customers_3 (
+first_name VARCHAR(25),
+last_name VARCHAR(25),
+street_1 VARCHAR(30),
+street_2 VARCHAR(30),
+city VARCHAR(15),
+renewal DATE
+) ENGINE=ROCKSDB
+PARTITION BY RANGE COLUMNS(renewal) (
+PARTITION pWeek_1 VALUES LESS THAN('2010-02-09'),
+PARTITION pWeek_2 VALUES LESS THAN('2010-02-15'),
+PARTITION pWeek_3 VALUES LESS THAN('2010-02-22'),
+PARTITION pWeek_4 VALUES LESS THAN('2010-03-01')
+);
+CREATE TABLE employees_hash (
+id INT NOT NULL,
+fname VARCHAR(30),
+lname VARCHAR(30),
+hired DATE NOT NULL DEFAULT '1970-01-01',
+separated DATE NOT NULL DEFAULT '9999-12-31',
+job_code INT,
+store_id INT
+) ENGINE=ROCKSDB
+PARTITION BY HASH(store_id)
+PARTITIONS 4;
+CREATE TABLE employees_hash_1 (
+id INT NOT NULL,
+fname VARCHAR(30),
+lname VARCHAR(30),
+hired DATE NOT NULL DEFAULT '1970-01-01',
+separated DATE NOT NULL DEFAULT '9999-12-31',
+job_code INT,
+store_id INT
+) ENGINE=ROCKSDB
+PARTITION BY HASH( YEAR(hired) )
+PARTITIONS 4;
+CREATE TABLE t1_hash (
+col1 INT,
+col2 CHAR(5),
+col3 DATE
+) ENGINE=ROCKSDB
+PARTITION BY HASH( YEAR(col3) )
+PARTITIONS 4;
+CREATE TABLE employees_linear_hash (
+id INT NOT NULL,
+fname VARCHAR(30),
+lname VARCHAR(30),
+hired DATE NOT NULL DEFAULT '1970-01-01',
+separated DATE NOT NULL DEFAULT '9999-12-31',
+job_code INT,
+store_id INT
+) ENGINE=ROCKSDB
+PARTITION BY LINEAR HASH( YEAR(hired) )
+PARTITIONS 4;
+CREATE TABLE t1_linear_hash (
+col1 INT,
+col2 CHAR(5),
+col3 DATE
+) ENGINE=ROCKSDB
+PARTITION BY LINEAR HASH( YEAR(col3) )
+PARTITIONS 6;
+CREATE TABLE k1 (
+id INT NOT NULL PRIMARY KEY,
+name VARCHAR(20)
+) ENGINE=ROCKSDB
+PARTITION BY KEY()
+PARTITIONS 2;
+CREATE TABLE k2 (
+id INT NOT NULL,
+name VARCHAR(20),
+UNIQUE KEY (id)
+) ENGINE=ROCKSDB
+PARTITION BY KEY()
+PARTITIONS 2;
+CREATE TABLE tm1 (
+s1 CHAR(32) PRIMARY KEY
+) ENGINE=ROCKSDB
+PARTITION BY KEY(s1)
+PARTITIONS 10;
+CREATE TABLE tk (
+col1 INT NOT NULL,
+col2 CHAR(5),
+col3 DATE
+) ENGINE=ROCKSDB
+PARTITION BY LINEAR KEY (col1)
+PARTITIONS 3;
+CREATE TABLE ts (
+id INT,
+purchased DATE
+) ENGINE=ROCKSDB
+PARTITION BY RANGE( YEAR(purchased) )
+SUBPARTITION BY HASH( TO_DAYS(purchased) )
+SUBPARTITIONS 2 (
+PARTITION p0 VALUES LESS THAN (1990),
+PARTITION p1 VALUES LESS THAN (2000),
+PARTITION p2 VALUES LESS THAN MAXVALUE
+);
+CREATE TABLE ts_1 (
+id INT,
+purchased DATE
+) ENGINE=ROCKSDB
+PARTITION BY RANGE( YEAR(purchased) )
+SUBPARTITION BY HASH( TO_DAYS(purchased) ) (
+PARTITION p0 VALUES LESS THAN (1990) (
+SUBPARTITION s0,
+SUBPARTITION s1
+),
+PARTITION p1 VALUES LESS THAN (2000) (
+SUBPARTITION s2,
+SUBPARTITION s3
+),
+PARTITION p2 VALUES LESS THAN MAXVALUE (
+SUBPARTITION s4,
+SUBPARTITION s5
+)
+);
+CREATE TABLE ts_2 (
+id INT,
+purchased DATE
+) ENGINE=ROCKSDB
+PARTITION BY RANGE( YEAR(purchased) )
+SUBPARTITION BY HASH( TO_DAYS(purchased) ) (
+PARTITION p0 VALUES LESS THAN (1990) (
+SUBPARTITION s0,
+SUBPARTITION s1
+),
+PARTITION p1 VALUES LESS THAN (2000),
+PARTITION p2 VALUES LESS THAN MAXVALUE (
+SUBPARTITION s2,
+SUBPARTITION s3
+)
+);
+ERROR 42000: Wrong number of subpartitions defined, mismatch with previous setting near '
+PARTITION p2 VALUES LESS THAN MAXVALUE (
+SUBPARTITION s2,
+SUBPARTITION s3
+)
+)' at line 11
+CREATE TABLE ts_3 (
+id INT,
+purchased DATE
+) ENGINE=ROCKSDB
+PARTITION BY RANGE( YEAR(purchased) )
+SUBPARTITION BY HASH( TO_DAYS(purchased) ) (
+PARTITION p0 VALUES LESS THAN (1990) (
+SUBPARTITION s0,
+SUBPARTITION s1
+),
+PARTITION p1 VALUES LESS THAN (2000) (
+SUBPARTITION s2,
+SUBPARTITION s3
+),
+PARTITION p2 VALUES LESS THAN MAXVALUE (
+SUBPARTITION s4,
+SUBPARTITION s5
+)
+);
+CREATE TABLE ts_4 (
+id INT,
+purchased DATE
+) ENGINE=ROCKSDB
+PARTITION BY RANGE( YEAR(purchased) )
+SUBPARTITION BY HASH( TO_DAYS(purchased) ) (
+PARTITION p0 VALUES LESS THAN (1990) (
+SUBPARTITION s0,
+SUBPARTITION s1
+),
+PARTITION p1 VALUES LESS THAN (2000) (
+SUBPARTITION s2,
+SUBPARTITION s3
+),
+PARTITION p2 VALUES LESS THAN MAXVALUE (
+SUBPARTITION s4,
+SUBPARTITION s5
+)
+);
+CREATE TABLE ts_5 (
+id INT,
+purchased DATE
+) ENGINE=ROCKSDB
+PARTITION BY RANGE(YEAR(purchased))
+SUBPARTITION BY HASH( TO_DAYS(purchased) ) (
+PARTITION p0 VALUES LESS THAN (1990) (
+SUBPARTITION s0a,
+SUBPARTITION s0b
+),
+PARTITION p1 VALUES LESS THAN (2000) (
+SUBPARTITION s1a,
+SUBPARTITION s1b
+),
+PARTITION p2 VALUES LESS THAN MAXVALUE (
+SUBPARTITION s2a,
+SUBPARTITION s2b
+)
+);
+CREATE TABLE trb3 (
+id INT,
+name VARCHAR(50),
+purchased DATE
+) ENGINE=ROCKSDB
+PARTITION BY RANGE( YEAR(purchased) ) (
+PARTITION p0 VALUES LESS THAN (1990),
+PARTITION p1 VALUES LESS THAN (1995),
+PARTITION p2 VALUES LESS THAN (2000),
+PARTITION p3 VALUES LESS THAN (2005)
+);
+ALTER TABLE trb3 PARTITION BY KEY(id) PARTITIONS 2;
+CREATE TABLE tr (
+id INT,
+name VARCHAR(50),
+purchased DATE
+) ENGINE=ROCKSDB
+PARTITION BY RANGE( YEAR(purchased) ) (
+PARTITION p0 VALUES LESS THAN (1990),
+PARTITION p1 VALUES LESS THAN (1995),
+PARTITION p2 VALUES LESS THAN (2000),
+PARTITION p3 VALUES LESS THAN (2005)
+);
+INSERT INTO tr VALUES
+(1, 'desk organiser', '2003-10-15'),
+(2, 'CD player', '1993-11-05'),
+(3, 'TV set', '1996-03-10'),
+(4, 'bookcase', '1982-01-10'),
+(5, 'exercise bike', '2004-05-09'),
+(6, 'sofa', '1987-06-05'),
+(7, 'popcorn maker', '2001-11-22'),
+(8, 'aquarium', '1992-08-04'),
+(9, 'study desk', '1984-09-16'),
+(10, 'lava lamp', '1998-12-25');
+SELECT * FROM tr WHERE purchased BETWEEN '1995-01-01' AND '1999-12-31';
+id name purchased
+3 TV set 1996-03-10
+10 lava lamp 1998-12-25
+ALTER TABLE tr DROP PARTITION p2;
+SELECT * FROM tr WHERE purchased BETWEEN '1995-01-01' AND '1999-12-31';
+id name purchased
+CREATE TABLE members_3 (
+id INT,
+fname VARCHAR(25),
+lname VARCHAR(25),
+dob DATE
+) ENGINE=ROCKSDB
+PARTITION BY RANGE( YEAR(dob) ) (
+PARTITION p0 VALUES LESS THAN (1970),
+PARTITION p1 VALUES LESS THAN (1980),
+PARTITION p2 VALUES LESS THAN (1990)
+);
+ALTER TABLE members_3 ADD PARTITION (PARTITION p3 VALUES LESS THAN (2000));
+ALTER TABLE members_3 ADD PARTITION (PARTITION n VALUES LESS THAN (1960));
+ERROR HY000: VALUES LESS THAN value must be strictly increasing for each partition
+CREATE TABLE clients (
+id INT,
+fname VARCHAR(30),
+lname VARCHAR(30),
+signed DATE
+) ENGINE=ROCKSDB
+PARTITION BY HASH( MONTH(signed) )
+PARTITIONS 12;
+ALTER TABLE clients COALESCE PARTITION 4;
+CREATE TABLE clients_lk (
+id INT,
+fname VARCHAR(30),
+lname VARCHAR(30),
+signed DATE
+) ENGINE=ROCKSDB
+PARTITION BY LINEAR KEY(signed)
+PARTITIONS 12;
+ALTER TABLE clients COALESCE PARTITION 18;
+ERROR HY000: Cannot remove all partitions, use DROP TABLE instead
+ALTER TABLE clients ADD PARTITION PARTITIONS 6;
+CREATE TABLE trb1 (
+id INT,
+name VARCHAR(50),
+purchased DATE
+) ENGINE=ROCKSDB
+PARTITION BY RANGE(id) (
+PARTITION p0 VALUES LESS THAN (3),
+PARTITION p1 VALUES LESS THAN (7),
+PARTITION p2 VALUES LESS THAN (9),
+PARTITION p3 VALUES LESS THAN (11)
+);
+INSERT INTO trb1 VALUES
+(1, 'desk organiser', '2003-10-15'),
+(2, 'CD player', '1993-11-05'),
+(3, 'TV set', '1996-03-10'),
+(4, 'bookcase', '1982-01-10'),
+(5, 'exercise bike', '2004-05-09'),
+(6, 'sofa', '1987-06-05'),
+(7, 'popcorn maker', '2001-11-22'),
+(8, 'aquarium', '1992-08-04'),
+(9, 'study desk', '1984-09-16'),
+(10, 'lava lamp', '1998-12-25');
+ALTER TABLE trb1 ADD PRIMARY KEY (id);
+DROP TABLE IF EXISTS t1;
+DROP TABLE IF EXISTS VAR_POP;
+DROP TABLE IF EXISTS TEMP0;
+DROP TABLE IF EXISTS VAR_SAMP;
+DROP TABLE IF EXISTS ti;
+DROP TABLE IF EXISTS members;
+DROP TABLE IF EXISTS members_2;
+DROP TABLE IF EXISTS employees;
+DROP TABLE IF EXISTS employees_2;
+DROP TABLE IF EXISTS employees_3;
+DROP TABLE IF EXISTS quarterly_report_status;
+DROP TABLE IF EXISTS employees_4;
+DROP TABLE IF EXISTS h2;
+DROP TABLE IF EXISTS rcx;
+DROP TABLE IF EXISTS r1;
+DROP TABLE IF EXISTS rc1;
+DROP TABLE IF EXISTS rx;
+DROP TABLE IF EXISTS rc2;
+DROP TABLE IF EXISTS rc3;
+DROP TABLE IF EXISTS rc4;
+DROP TABLE IF EXISTS employees_by_lname;
+DROP TABLE IF EXISTS customers_1;
+DROP TABLE IF EXISTS customers_2;
+DROP TABLE IF EXISTS customers_3;
+DROP TABLE IF EXISTS employees_hash;
+DROP TABLE IF EXISTS employees_hash_1;
+DROP TABLE IF EXISTS t1_hash;
+DROP TABLE IF EXISTS employees_linear_hash;
+DROP TABLE IF EXISTS t1_linear_hash;
+DROP TABLE IF EXISTS k1;
+DROP TABLE IF EXISTS k2;
+DROP TABLE IF EXISTS tm1;
+DROP TABLE IF EXISTS tk;
+DROP TABLE IF EXISTS ts;
+DROP TABLE IF EXISTS ts_1;
+DROP TABLE IF EXISTS ts_3;
+DROP TABLE IF EXISTS ts_4;
+DROP TABLE IF EXISTS ts_5;
+DROP TABLE IF EXISTS trb3;
+DROP TABLE IF EXISTS tr;
+DROP TABLE IF EXISTS members_3;
+DROP TABLE IF EXISTS clients;
+DROP TABLE IF EXISTS clients_lk;
+DROP TABLE IF EXISTS trb1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/percona_nonflushing_analyze_debug.result b/storage/rocksdb/mysql-test/rocksdb/r/percona_nonflushing_analyze_debug.result
new file mode 100644
index 00000000000..84176da34fe
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/percona_nonflushing_analyze_debug.result
@@ -0,0 +1,19 @@
+CREATE TABLE t1 (a INT PRIMARY KEY) ENGINE=RocksDB;
+INSERT INTO t1 VALUES (1), (2), (3);
+SET DEBUG_SYNC="handler_ha_index_next_end SIGNAL idx_scan_in_progress WAIT_FOR finish_scan";
+SELECT * FROM t1;
+SET DEBUG_SYNC="now WAIT_FOR idx_scan_in_progress";
+ANALYZE TABLE t1;
+Table Op Msg_type Msg_text
+test.t1 analyze status OK
+SELECT * FROM t1;
+a
+1
+2
+3
+SET DEBUG_SYNC="now SIGNAL finish_scan";
+a
+1
+2
+3
+DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/perf_context.result b/storage/rocksdb/mysql-test/rocksdb/r/perf_context.result
new file mode 100644
index 00000000000..28f965843aa
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/perf_context.result
@@ -0,0 +1,191 @@
+DROP TABLE IF EXISTS t1;
+DROP TABLE IF EXISTS t2;
+SET @prior_rocksdb_perf_context_level = @@rocksdb_perf_context_level;
+SET GLOBAL rocksdb_perf_context_level=3;
+CREATE TABLE t1 (i INT, j INT, PRIMARY KEY (i)) ENGINE = ROCKSDB;
+CREATE TABLE t2 (k INT, PRIMARY KEY (k)) ENGINE = ROCKSDB;
+INSERT INTO t1 VALUES (1,1), (2,2), (3,3), (4,4), (5,5);
+SELECT * FROM INFORMATION_SCHEMA.ROCKSDB_PERF_CONTEXT WHERE TABLE_NAME = 't1';
+TABLE_SCHEMA TABLE_NAME PARTITION_NAME STAT_TYPE VALUE
+test t1 NULL USER_KEY_COMPARISON_COUNT #
+test t1 NULL BLOCK_CACHE_HIT_COUNT #
+test t1 NULL BLOCK_READ_COUNT #
+test t1 NULL BLOCK_READ_BYTE #
+test t1 NULL BLOCK_READ_TIME #
+test t1 NULL BLOCK_CHECKSUM_TIME #
+test t1 NULL BLOCK_DECOMPRESS_TIME #
+test t1 NULL GET_READ_BYTES #
+test t1 NULL MULTIGET_READ_BYTES #
+test t1 NULL ITER_READ_BYTES #
+test t1 NULL INTERNAL_KEY_SKIPPED_COUNT #
+test t1 NULL INTERNAL_DELETE_SKIPPED_COUNT #
+test t1 NULL INTERNAL_RECENT_SKIPPED_COUNT #
+test t1 NULL INTERNAL_MERGE_COUNT #
+test t1 NULL GET_SNAPSHOT_TIME #
+test t1 NULL GET_FROM_MEMTABLE_TIME #
+test t1 NULL GET_FROM_MEMTABLE_COUNT #
+test t1 NULL GET_POST_PROCESS_TIME #
+test t1 NULL GET_FROM_OUTPUT_FILES_TIME #
+test t1 NULL SEEK_ON_MEMTABLE_TIME #
+test t1 NULL SEEK_ON_MEMTABLE_COUNT #
+test t1 NULL NEXT_ON_MEMTABLE_COUNT #
+test t1 NULL PREV_ON_MEMTABLE_COUNT #
+test t1 NULL SEEK_CHILD_SEEK_TIME #
+test t1 NULL SEEK_CHILD_SEEK_COUNT #
+test t1 NULL SEEK_MIN_HEAP_TIME #
+test t1 NULL SEEK_MAX_HEAP_TIME #
+test t1 NULL SEEK_INTERNAL_SEEK_TIME #
+test t1 NULL FIND_NEXT_USER_ENTRY_TIME #
+test t1 NULL WRITE_WAL_TIME #
+test t1 NULL WRITE_MEMTABLE_TIME #
+test t1 NULL WRITE_DELAY_TIME #
+test t1 NULL WRITE_PRE_AND_POST_PROCESS_TIME #
+test t1 NULL DB_MUTEX_LOCK_NANOS #
+test t1 NULL DB_CONDITION_WAIT_NANOS #
+test t1 NULL MERGE_OPERATOR_TIME_NANOS #
+test t1 NULL READ_INDEX_BLOCK_NANOS #
+test t1 NULL READ_FILTER_BLOCK_NANOS #
+test t1 NULL NEW_TABLE_BLOCK_ITER_NANOS #
+test t1 NULL NEW_TABLE_ITERATOR_NANOS #
+test t1 NULL BLOCK_SEEK_NANOS #
+test t1 NULL FIND_TABLE_NANOS #
+test t1 NULL BLOOM_MEMTABLE_HIT_COUNT #
+test t1 NULL BLOOM_MEMTABLE_MISS_COUNT #
+test t1 NULL BLOOM_SST_HIT_COUNT #
+test t1 NULL BLOOM_SST_MISS_COUNT #
+test t1 NULL KEY_LOCK_WAIT_TIME #
+test t1 NULL KEY_LOCK_WAIT_COUNT #
+test t1 NULL IO_THREAD_POOL_ID #
+test t1 NULL IO_BYTES_WRITTEN #
+test t1 NULL IO_BYTES_READ #
+test t1 NULL IO_OPEN_NANOS #
+test t1 NULL IO_ALLOCATE_NANOS #
+test t1 NULL IO_WRITE_NANOS #
+test t1 NULL IO_READ_NANOS #
+test t1 NULL IO_RANGE_SYNC_NANOS #
+test t1 NULL IO_LOGGER_NANOS #
+SELECT * FROM INFORMATION_SCHEMA.ROCKSDB_PERF_CONTEXT_GLOBAL;
+STAT_TYPE VALUE
+USER_KEY_COMPARISON_COUNT #
+BLOCK_CACHE_HIT_COUNT #
+BLOCK_READ_COUNT #
+BLOCK_READ_BYTE #
+BLOCK_READ_TIME #
+BLOCK_CHECKSUM_TIME #
+BLOCK_DECOMPRESS_TIME #
+GET_READ_BYTES #
+MULTIGET_READ_BYTES #
+ITER_READ_BYTES #
+INTERNAL_KEY_SKIPPED_COUNT #
+INTERNAL_DELETE_SKIPPED_COUNT #
+INTERNAL_RECENT_SKIPPED_COUNT #
+INTERNAL_MERGE_COUNT #
+GET_SNAPSHOT_TIME #
+GET_FROM_MEMTABLE_TIME #
+GET_FROM_MEMTABLE_COUNT #
+GET_POST_PROCESS_TIME #
+GET_FROM_OUTPUT_FILES_TIME #
+SEEK_ON_MEMTABLE_TIME #
+SEEK_ON_MEMTABLE_COUNT #
+NEXT_ON_MEMTABLE_COUNT #
+PREV_ON_MEMTABLE_COUNT #
+SEEK_CHILD_SEEK_TIME #
+SEEK_CHILD_SEEK_COUNT #
+SEEK_MIN_HEAP_TIME #
+SEEK_MAX_HEAP_TIME #
+SEEK_INTERNAL_SEEK_TIME #
+FIND_NEXT_USER_ENTRY_TIME #
+WRITE_WAL_TIME #
+WRITE_MEMTABLE_TIME #
+WRITE_DELAY_TIME #
+WRITE_PRE_AND_POST_PROCESS_TIME #
+DB_MUTEX_LOCK_NANOS #
+DB_CONDITION_WAIT_NANOS #
+MERGE_OPERATOR_TIME_NANOS #
+READ_INDEX_BLOCK_NANOS #
+READ_FILTER_BLOCK_NANOS #
+NEW_TABLE_BLOCK_ITER_NANOS #
+NEW_TABLE_ITERATOR_NANOS #
+BLOCK_SEEK_NANOS #
+FIND_TABLE_NANOS #
+BLOOM_MEMTABLE_HIT_COUNT #
+BLOOM_MEMTABLE_MISS_COUNT #
+BLOOM_SST_HIT_COUNT #
+BLOOM_SST_MISS_COUNT #
+KEY_LOCK_WAIT_TIME #
+KEY_LOCK_WAIT_COUNT #
+IO_THREAD_POOL_ID #
+IO_BYTES_WRITTEN #
+IO_BYTES_READ #
+IO_OPEN_NANOS #
+IO_ALLOCATE_NANOS #
+IO_WRITE_NANOS #
+IO_READ_NANOS #
+IO_RANGE_SYNC_NANOS #
+IO_LOGGER_NANOS #
+SELECT * FROM INFORMATION_SCHEMA.ROCKSDB_PERF_CONTEXT
+WHERE TABLE_NAME = 't1'
+AND STAT_TYPE in ('INTERNAL_KEY_SKIPPED_COUNT', 'INTERNAL_DELETE_SKIPPED_COUNT');
+TABLE_SCHEMA TABLE_NAME PARTITION_NAME STAT_TYPE VALUE
+test t1 NULL INTERNAL_KEY_SKIPPED_COUNT 0
+test t1 NULL INTERNAL_DELETE_SKIPPED_COUNT 0
+SELECT * FROM t1;
+i j
+1 1
+2 2
+3 3
+4 4
+5 5
+SELECT * FROM INFORMATION_SCHEMA.ROCKSDB_PERF_CONTEXT
+WHERE TABLE_NAME = 't1'
+AND STAT_TYPE in ('INTERNAL_KEY_SKIPPED_COUNT', 'INTERNAL_DELETE_SKIPPED_COUNT');
+TABLE_SCHEMA TABLE_NAME PARTITION_NAME STAT_TYPE VALUE
+test t1 NULL INTERNAL_KEY_SKIPPED_COUNT 5
+test t1 NULL INTERNAL_DELETE_SKIPPED_COUNT 0
+SELECT * FROM t1 WHERE j BETWEEN 1 AND 5;
+i j
+1 1
+2 2
+3 3
+4 4
+5 5
+SELECT * FROM INFORMATION_SCHEMA.ROCKSDB_PERF_CONTEXT
+WHERE TABLE_NAME = 't1'
+AND STAT_TYPE in ('INTERNAL_KEY_SKIPPED_COUNT', 'INTERNAL_DELETE_SKIPPED_COUNT');
+TABLE_SCHEMA TABLE_NAME PARTITION_NAME STAT_TYPE VALUE
+test t1 NULL INTERNAL_KEY_SKIPPED_COUNT 10
+test t1 NULL INTERNAL_DELETE_SKIPPED_COUNT 0
+set @tmp_flush_log= @@rocksdb_flush_log_at_trx_commit;
+set global rocksdb_flush_log_at_trx_commit=1;
+BEGIN;
+INSERT INTO t2 VALUES (1), (2);
+INSERT INTO t2 VALUES (3), (4);
+COMMIT;
+SELECT COUNT(*) from INFORMATION_SCHEMA.ROCKSDB_PERF_CONTEXT
+WHERE TABLE_NAME = 't2'
+AND STAT_TYPE = 'IO_WRITE_NANOS'
+AND VALUE > 0;
+COUNT(*)
+0
+SELECT COUNT(*) from INFORMATION_SCHEMA.ROCKSDB_PERF_CONTEXT_GLOBAL
+WHERE STAT_TYPE = 'IO_WRITE_NANOS' AND VALUE > 0;
+COUNT(*)
+1
+SELECT VALUE INTO @a from INFORMATION_SCHEMA.ROCKSDB_PERF_CONTEXT_GLOBAL
+WHERE STAT_TYPE = 'IO_WRITE_NANOS';
+INSERT INTO t2 VALUES (5), (6), (7), (8);
+SELECT COUNT(*) from INFORMATION_SCHEMA.ROCKSDB_PERF_CONTEXT
+WHERE TABLE_NAME = 't2'
+AND STAT_TYPE = 'IO_WRITE_NANOS'
+AND VALUE > 0;
+COUNT(*)
+1
+SELECT VALUE INTO @b from INFORMATION_SCHEMA.ROCKSDB_PERF_CONTEXT_GLOBAL
+WHERE STAT_TYPE = 'IO_WRITE_NANOS';
+SELECT CASE WHEN @b - @a > 0 THEN 'true' ELSE 'false' END;
+CASE WHEN @b - @a > 0 THEN 'true' ELSE 'false' END
+true
+DROP TABLE t1;
+DROP TABLE t2;
+SET GLOBAL rocksdb_perf_context_level = @prior_rocksdb_perf_context_level;
+set global rocksdb_flush_log_at_trx_commit= @tmp_flush_log;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/persistent_cache.result b/storage/rocksdb/mysql-test/rocksdb/r/persistent_cache.result
new file mode 100644
index 00000000000..bc5739c2d96
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/persistent_cache.result
@@ -0,0 +1,11 @@
+DROP TABLE IF EXISTS t1;
+CREATE TABLE t1 (a int primary key) ENGINE=ROCKSDB;
+insert into t1 values (1);
+set global rocksdb_force_flush_memtable_now=1;
+select * from t1 where a = 1;
+a
+1
+select * from t1 where a = 1;
+a
+1
+drop table t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/prefix_extractor_override.result b/storage/rocksdb/mysql-test/rocksdb/r/prefix_extractor_override.result
new file mode 100644
index 00000000000..070169fd674
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/prefix_extractor_override.result
@@ -0,0 +1,82 @@
+DROP TABLE IF EXISTS t1;
+CREATE TABLE t1 (id1 BIGINT, id2 BIGINT, id3 BIGINT, id4 BIGINT, PRIMARY KEY (id1, id2, id3, id4) comment 'cf1') ENGINE=rocksdb collate latin1_bin;
+set global rocksdb_force_flush_memtable_now = 1;
+
+Original Prefix Extractor:
+
+SELECT * FROM information_schema.rocksdb_cf_options WHERE option_type like '%prefix_extractor%';
+CF_NAME OPTION_TYPE VALUE
+__system__ PREFIX_EXTRACTOR rocksdb.CappedPrefix.24
+cf1 PREFIX_EXTRACTOR rocksdb.CappedPrefix.24
+default PREFIX_EXTRACTOR rocksdb.CappedPrefix.24
+select variable_value into @u from information_schema.global_status where variable_name='rocksdb_bloom_filter_prefix_checked';
+SELECT COUNT(*) FROM t1 WHERE id1=1 AND id2=1 AND id3=1;
+COUNT(*)
+1
+select variable_value-@u from information_schema.global_status where variable_name='rocksdb_bloom_filter_prefix_checked';
+variable_value-@u
+1
+
+Prefix Extractor (after override_cf_options set, should not be changed):
+
+SELECT * FROM information_schema.rocksdb_cf_options WHERE option_type like '%prefix_extractor%';
+CF_NAME OPTION_TYPE VALUE
+__system__ PREFIX_EXTRACTOR rocksdb.CappedPrefix.24
+cf1 PREFIX_EXTRACTOR rocksdb.CappedPrefix.24
+default PREFIX_EXTRACTOR rocksdb.CappedPrefix.24
+SET @@global.rocksdb_update_cf_options = 'cf1={prefix_extractor=capped:26};';
+
+Changed Prefix Extractor (after update_cf_options set, without restart):
+
+SELECT * FROM information_schema.rocksdb_cf_options WHERE option_type like '%prefix_extractor%';
+CF_NAME OPTION_TYPE VALUE
+__system__ PREFIX_EXTRACTOR rocksdb.CappedPrefix.24
+cf1 PREFIX_EXTRACTOR rocksdb.CappedPrefix.26
+default PREFIX_EXTRACTOR rocksdb.CappedPrefix.24
+SET @@global.rocksdb_update_cf_options = 'cf2={prefix_extractor=capped:28};';
+SELECT * FROM information_schema.rocksdb_cf_options WHERE option_type like '%prefix_extractor%';
+CF_NAME OPTION_TYPE VALUE
+__system__ PREFIX_EXTRACTOR rocksdb.CappedPrefix.24
+cf1 PREFIX_EXTRACTOR rocksdb.CappedPrefix.26
+cf2 PREFIX_EXTRACTOR rocksdb.CappedPrefix.28
+default PREFIX_EXTRACTOR rocksdb.CappedPrefix.24
+select variable_value into @u from information_schema.global_status where variable_name='rocksdb_bloom_filter_prefix_checked';
+SELECT COUNT(*) FROM t1 WHERE id1=1 AND id2=1 AND id3=1;
+COUNT(*)
+1
+select variable_value-@u from information_schema.global_status where variable_name='rocksdb_bloom_filter_prefix_checked';
+variable_value-@u
+0
+set global rocksdb_force_flush_memtable_now = 1;
+select variable_value into @u from information_schema.global_status where variable_name='rocksdb_bloom_filter_prefix_checked';
+SELECT COUNT(*) FROM t1 WHERE id1=1 AND id2=1 AND id3=1;
+COUNT(*)
+1
+select variable_value-@u from information_schema.global_status where variable_name='rocksdb_bloom_filter_prefix_checked';
+variable_value-@u
+1
+SELECT COUNT(*) FROM information_schema.rocksdb_index_file_map WHERE COLUMN_FAMILY != 1;
+COUNT(*)
+2
+UPDATE t1 SET id1=1,id2 = 30,id3 = 30 WHERE id4 >= 0 AND id4 <=10;
+set global rocksdb_force_flush_memtable_now = 1;
+SELECT COUNT(*) FROM information_schema.rocksdb_index_file_map WHERE COLUMN_FAMILY != 1;
+COUNT(*)
+3
+select variable_value into @u from information_schema.global_status where variable_name='rocksdb_bloom_filter_prefix_checked';
+SELECT COUNT(*) FROM t1 WHERE id1=1 AND id2=1 AND id3=1;
+COUNT(*)
+0
+select variable_value-@u from information_schema.global_status where variable_name='rocksdb_bloom_filter_prefix_checked';
+variable_value-@u
+2
+SET @@global.rocksdb_update_cf_options = '';
+set global rocksdb_compact_cf='cf1';
+select variable_value into @u from information_schema.global_status where variable_name='rocksdb_bloom_filter_prefix_checked';
+SELECT COUNT(*) FROM t1 WHERE id1=1 AND id2=30 AND id3=30;
+COUNT(*)
+11
+select variable_value-@u from information_schema.global_status where variable_name='rocksdb_bloom_filter_prefix_checked';
+variable_value-@u
+1
+DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/read_only_tx.result b/storage/rocksdb/mysql-test/rocksdb/r/read_only_tx.result
new file mode 100644
index 00000000000..db21c3c01d4
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/read_only_tx.result
@@ -0,0 +1,46 @@
+DROP TABLE IF EXISTS t1;
+connect con1,localhost,root,,;
+connect con2,localhost,root,,;
+connection con1;
+CREATE TABLE t1 (id INT, value int, PRIMARY KEY (id), INDEX (value)) ENGINE=RocksDB;
+INSERT INTO t1 VALUES (1,1);
+select variable_value into @p from information_schema.global_status where variable_name='rocksdb_number_sst_entry_put';
+select variable_value into @s from information_schema.global_status where variable_name='rocksdb_number_sst_entry_singledelete';
+START TRANSACTION WITH CONSISTENT SNAPSHOT;
+connection con2;
+connection con1;
+select case when variable_value-@p < 1000 then 'true' else variable_value-@p end from information_schema.global_status where variable_name='rocksdb_number_sst_entry_put';
+case when variable_value-@p < 1000 then 'true' else variable_value-@p end
+true
+select case when variable_value-@s < 100 then 'true' else variable_value-@s end from information_schema.global_status where variable_name='rocksdb_number_sst_entry_singledelete';
+case when variable_value-@s < 100 then 'true' else variable_value-@s end
+true
+SELECT * FROM t1;
+id value
+1 1
+INSERT INTO t1 values (2, 2);
+ERROR HY000: Can't execute updates when you started a transaction with START TRANSACTION WITH CONSISTENT [ROCKSDB] SNAPSHOT.
+ROLLBACK;
+SELECT * FROM t1;
+id value
+1 10001
+INSERT INTO t1 values (2, 2);
+SELECT * FROM t1 ORDER BY id;
+id value
+1 10001
+2 2
+BEGIN;
+connection con2;
+connection con1;
+SELECT COUNT(*) FROM t1;
+COUNT(*)
+9998
+COMMIT;
+connection default;
+disconnect con1;
+disconnect con2;
+OPTIMIZE TABLE t1;
+Table Op Msg_type Msg_text
+test.t1 optimize status OK
+DROP TABLE t1;
+reset master;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/records_in_range.result b/storage/rocksdb/mysql-test/rocksdb/r/records_in_range.result
new file mode 100644
index 00000000000..89ebe760384
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/records_in_range.result
@@ -0,0 +1,210 @@
+DROP TABLE IF EXISTS t1;
+CREATE TABLE t1 (
+i INT,
+a INT,
+b INT,
+PRIMARY KEY (i),
+KEY ka(a),
+KEY kb(b) comment 'rev:cf1'
+) ENGINE = rocksdb;
+explain extended select * from t1 where a> 500 and a< 750;
+id select_type table type possible_keys key key_len ref rows filtered Extra
+1 SIMPLE t1 range ka ka 5 NULL 1000 100.00 Using index condition
+Warnings:
+Note 1003 select `test`.`t1`.`i` AS `i`,`test`.`t1`.`a` AS `a`,`test`.`t1`.`b` AS `b` from `test`.`t1` where `test`.`t1`.`a` > 500 and `test`.`t1`.`a` < 750
+explain extended select * from t1 where a< 750;
+id select_type table type possible_keys key key_len ref rows filtered Extra
+1 SIMPLE t1 range ka ka 5 NULL 1000 100.00 Using index condition
+Warnings:
+Note 1003 select `test`.`t1`.`i` AS `i`,`test`.`t1`.`a` AS `a`,`test`.`t1`.`b` AS `b` from `test`.`t1` where `test`.`t1`.`a` < 750
+explain extended select * from t1 where a> 500;
+id select_type table type possible_keys key key_len ref rows filtered Extra
+1 SIMPLE t1 range ka ka 5 NULL 1000 100.00 Using index condition
+Warnings:
+Note 1003 select `test`.`t1`.`i` AS `i`,`test`.`t1`.`a` AS `a`,`test`.`t1`.`b` AS `b` from `test`.`t1` where `test`.`t1`.`a` > 500
+explain extended select * from t1 where a>=0 and a<=1000;
+id select_type table type possible_keys key key_len ref rows filtered Extra
+1 SIMPLE t1 range ka ka 5 NULL 1000 100.00 Using index condition
+Warnings:
+Note 1003 select `test`.`t1`.`i` AS `i`,`test`.`t1`.`a` AS `a`,`test`.`t1`.`b` AS `b` from `test`.`t1` where `test`.`t1`.`a` >= 0 and `test`.`t1`.`a` <= 1000
+explain extended select * from t1 where b> 500 and b< 750;
+id select_type table type possible_keys key key_len ref rows filtered Extra
+1 SIMPLE t1 range kb kb 5 NULL 1000 100.00 Using index condition
+Warnings:
+Note 1003 select `test`.`t1`.`i` AS `i`,`test`.`t1`.`a` AS `a`,`test`.`t1`.`b` AS `b` from `test`.`t1` where `test`.`t1`.`b` > 500 and `test`.`t1`.`b` < 750
+explain extended select * from t1 where b< 750;
+id select_type table type possible_keys key key_len ref rows filtered Extra
+1 SIMPLE t1 range kb kb 5 NULL 1000 100.00 Using index condition
+Warnings:
+Note 1003 select `test`.`t1`.`i` AS `i`,`test`.`t1`.`a` AS `a`,`test`.`t1`.`b` AS `b` from `test`.`t1` where `test`.`t1`.`b` < 750
+explain extended select * from t1 where b> 500;
+id select_type table type possible_keys key key_len ref rows filtered Extra
+1 SIMPLE t1 range kb kb 5 NULL 1000 100.00 Using index condition
+Warnings:
+Note 1003 select `test`.`t1`.`i` AS `i`,`test`.`t1`.`a` AS `a`,`test`.`t1`.`b` AS `b` from `test`.`t1` where `test`.`t1`.`b` > 500
+explain extended select * from t1 where b>=0 and b<=1000;
+id select_type table type possible_keys key key_len ref rows filtered Extra
+1 SIMPLE t1 range kb kb 5 NULL 1000 100.00 Using index condition
+Warnings:
+Note 1003 select `test`.`t1`.`i` AS `i`,`test`.`t1`.`a` AS `a`,`test`.`t1`.`b` AS `b` from `test`.`t1` where `test`.`t1`.`b` >= 0 and `test`.`t1`.`b` <= 1000
+set @save_rocksdb_records_in_range = @@session.rocksdb_records_in_range;
+set rocksdb_records_in_range = 15000;
+explain extended select a from t1 where a < 750;
+id select_type table type possible_keys key key_len ref rows filtered Extra
+1 SIMPLE t1 range ka ka 5 NULL 15000 100.00 Using where; Using index
+Warnings:
+Note 1003 select `test`.`t1`.`a` AS `a` from `test`.`t1` where `test`.`t1`.`a` < 750
+explain extended select a, b from t1 where a < 750;
+id select_type table type possible_keys key key_len ref rows filtered Extra
+1 SIMPLE t1 ALL ka NULL NULL NULL 20000 75.00 Using where
+Warnings:
+Note 1003 select `test`.`t1`.`a` AS `a`,`test`.`t1`.`b` AS `b` from `test`.`t1` where `test`.`t1`.`a` < 750
+explain extended select a from t1 where a = 700;
+id select_type table type possible_keys key key_len ref rows filtered Extra
+1 SIMPLE t1 ref ka ka 5 const 15000 100.00 Using index
+Warnings:
+Note 1003 select `test`.`t1`.`a` AS `a` from `test`.`t1` where `test`.`t1`.`a` = 700
+explain extended select a,b from t1 where a = 700;
+id select_type table type possible_keys key key_len ref rows filtered Extra
+1 SIMPLE t1 ref ka ka 5 const 15000 100.00
+Warnings:
+Note 1003 select `test`.`t1`.`a` AS `a`,`test`.`t1`.`b` AS `b` from `test`.`t1` where `test`.`t1`.`a` = 700
+explain extended select a from t1 where a in (700, 800);
+id select_type table type possible_keys key key_len ref rows filtered Extra
+1 SIMPLE t1 index ka ka 5 NULL 20000 100.00 Using where; Using index
+Warnings:
+Note 1003 select `test`.`t1`.`a` AS `a` from `test`.`t1` where `test`.`t1`.`a` in (700,800)
+explain extended select a,b from t1 where a in (700, 800);
+id select_type table type possible_keys key key_len ref rows filtered Extra
+1 SIMPLE t1 ALL ka NULL NULL NULL 20000 100.00 Using where
+Warnings:
+Note 1003 select `test`.`t1`.`a` AS `a`,`test`.`t1`.`b` AS `b` from `test`.`t1` where `test`.`t1`.`a` in (700,800)
+set rocksdb_records_in_range=8000;
+explain extended select a from t1 where a in (700, 800);
+id select_type table type possible_keys key key_len ref rows filtered Extra
+1 SIMPLE t1 range ka ka 5 NULL 16000 100.00 Using where; Using index
+Warnings:
+Note 1003 select `test`.`t1`.`a` AS `a` from `test`.`t1` where `test`.`t1`.`a` in (700,800)
+explain extended select a,b from t1 where a in (700, 800);
+id select_type table type possible_keys key key_len ref rows filtered Extra
+1 SIMPLE t1 ALL ka NULL NULL NULL 20000 80.00 Using where
+Warnings:
+Note 1003 select `test`.`t1`.`a` AS `a`,`test`.`t1`.`b` AS `b` from `test`.`t1` where `test`.`t1`.`a` in (700,800)
+set rocksdb_records_in_range = @save_rocksdb_records_in_range;
+set global rocksdb_force_flush_memtable_now = true;
+explain extended select * from t1 where a> 500 and a< 750;
+id select_type table type possible_keys key key_len ref rows filtered Extra
+1 SIMPLE t1 range ka ka 5 NULL 1000 100.00 Using index condition
+Warnings:
+Note 1003 select `test`.`t1`.`i` AS `i`,`test`.`t1`.`a` AS `a`,`test`.`t1`.`b` AS `b` from `test`.`t1` where `test`.`t1`.`a` > 500 and `test`.`t1`.`a` < 750
+explain extended select * from t1 where a< 750;
+id select_type table type possible_keys key key_len ref rows filtered Extra
+1 SIMPLE t1 range ka ka 5 NULL 1000 100.00 Using index condition
+Warnings:
+Note 1003 select `test`.`t1`.`i` AS `i`,`test`.`t1`.`a` AS `a`,`test`.`t1`.`b` AS `b` from `test`.`t1` where `test`.`t1`.`a` < 750
+explain extended select * from t1 where a> 500;
+id select_type table type possible_keys key key_len ref rows filtered Extra
+1 SIMPLE t1 range ka ka 5 NULL 1000 100.00 Using index condition
+Warnings:
+Note 1003 select `test`.`t1`.`i` AS `i`,`test`.`t1`.`a` AS `a`,`test`.`t1`.`b` AS `b` from `test`.`t1` where `test`.`t1`.`a` > 500
+explain extended select * from t1 where a>=0 and a<=1000;
+id select_type table type possible_keys key key_len ref rows filtered Extra
+1 SIMPLE t1 range ka ka 5 NULL 1000 100.00 Using index condition
+Warnings:
+Note 1003 select `test`.`t1`.`i` AS `i`,`test`.`t1`.`a` AS `a`,`test`.`t1`.`b` AS `b` from `test`.`t1` where `test`.`t1`.`a` >= 0 and `test`.`t1`.`a` <= 1000
+explain extended select * from t1 where b> 500 and b< 750;
+id select_type table type possible_keys key key_len ref rows filtered Extra
+1 SIMPLE t1 range kb kb 5 NULL 1000 100.00 Using index condition
+Warnings:
+Note 1003 select `test`.`t1`.`i` AS `i`,`test`.`t1`.`a` AS `a`,`test`.`t1`.`b` AS `b` from `test`.`t1` where `test`.`t1`.`b` > 500 and `test`.`t1`.`b` < 750
+explain extended select * from t1 where b< 750;
+id select_type table type possible_keys key key_len ref rows filtered Extra
+1 SIMPLE t1 range kb kb 5 NULL 1000 100.00 Using index condition
+Warnings:
+Note 1003 select `test`.`t1`.`i` AS `i`,`test`.`t1`.`a` AS `a`,`test`.`t1`.`b` AS `b` from `test`.`t1` where `test`.`t1`.`b` < 750
+explain extended select * from t1 where b> 500;
+id select_type table type possible_keys key key_len ref rows filtered Extra
+1 SIMPLE t1 range kb kb 5 NULL 1000 100.00 Using index condition
+Warnings:
+Note 1003 select `test`.`t1`.`i` AS `i`,`test`.`t1`.`a` AS `a`,`test`.`t1`.`b` AS `b` from `test`.`t1` where `test`.`t1`.`b` > 500
+explain extended select * from t1 where b>=0 and b<=1000;
+id select_type table type possible_keys key key_len ref rows filtered Extra
+1 SIMPLE t1 range kb kb 5 NULL 1000 100.00 Using index condition
+Warnings:
+Note 1003 select `test`.`t1`.`i` AS `i`,`test`.`t1`.`a` AS `a`,`test`.`t1`.`b` AS `b` from `test`.`t1` where `test`.`t1`.`b` >= 0 and `test`.`t1`.`b` <= 1000
+explain extended select * from t1 where a>= 500 and a<= 500;
+id select_type table type possible_keys key key_len ref rows filtered Extra
+1 SIMPLE t1 range ka ka 5 NULL 1000 100.00 Using index condition
+Warnings:
+Note 1003 select `test`.`t1`.`i` AS `i`,`test`.`t1`.`a` AS `a`,`test`.`t1`.`b` AS `b` from `test`.`t1` where `test`.`t1`.`a` >= 500 and `test`.`t1`.`a` <= 500
+explain extended select * from t1 where b>= 500 and b<= 500;
+id select_type table type possible_keys key key_len ref rows filtered Extra
+1 SIMPLE t1 range kb kb 5 NULL 1000 100.00 Using index condition
+Warnings:
+Note 1003 select `test`.`t1`.`i` AS `i`,`test`.`t1`.`a` AS `a`,`test`.`t1`.`b` AS `b` from `test`.`t1` where `test`.`t1`.`b` >= 500 and `test`.`t1`.`b` <= 500
+explain extended select * from t1 where a< 750 and b> 500 and b< 750;
+id select_type table type possible_keys key key_len ref rows filtered Extra
+1 SIMPLE t1 range ka,kb ka 5 NULL 1000 100.00 Using index condition; Using where
+Warnings:
+Note 1003 select `test`.`t1`.`i` AS `i`,`test`.`t1`.`a` AS `a`,`test`.`t1`.`b` AS `b` from `test`.`t1` where `test`.`t1`.`a` < 750 and `test`.`t1`.`b` > 500 and `test`.`t1`.`b` < 750
+drop index ka on t1;
+drop index kb on t1;
+create index kab on t1(a,b);
+set global rocksdb_force_flush_memtable_now = true;
+explain extended select * from t1 where a< 750 and b> 500 and b< 750;
+id select_type table type possible_keys key key_len ref rows filtered Extra
+1 SIMPLE t1 range kab kab 5 NULL 1000 100.00 Using where; Using index
+Warnings:
+Note 1003 select `test`.`t1`.`i` AS `i`,`test`.`t1`.`a` AS `a`,`test`.`t1`.`b` AS `b` from `test`.`t1` where `test`.`t1`.`a` < 750 and `test`.`t1`.`b` > 500 and `test`.`t1`.`b` < 750
+set rocksdb_records_in_range=444;
+explain extended select * from t1 where a< 750 and b> 500 and b< 750;
+id select_type table type possible_keys key key_len ref rows filtered Extra
+1 SIMPLE t1 range kab kab 5 NULL 444 100.00 Using where; Using index
+Warnings:
+Note 1003 select `test`.`t1`.`i` AS `i`,`test`.`t1`.`a` AS `a`,`test`.`t1`.`b` AS `b` from `test`.`t1` where `test`.`t1`.`a` < 750 and `test`.`t1`.`b` > 500 and `test`.`t1`.`b` < 750
+set rocksdb_records_in_range=0;
+CREATE TABLE `linktable` (
+`id1` bigint(20) unsigned NOT NULL DEFAULT '0',
+`id1_type` int(10) unsigned NOT NULL DEFAULT '0',
+`id2` bigint(20) unsigned NOT NULL DEFAULT '0',
+`id2_type` int(10) unsigned NOT NULL DEFAULT '0',
+`link_type` bigint(20) unsigned NOT NULL DEFAULT '0',
+`visibility` tinyint(3) NOT NULL DEFAULT '0',
+`data` varchar(255) COLLATE latin1_bin NOT NULL DEFAULT '',
+`time` bigint(20) unsigned NOT NULL DEFAULT '0',
+`version` int(11) unsigned NOT NULL DEFAULT '0',
+PRIMARY KEY (`link_type`,`id1`,`id2`) COMMENT 'cf_link_pk',
+KEY `id1_type` (`id1`,`link_type`,`visibility`,`time`,`version`,`data`) COMMENT 'cf_link_id1_type'
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1 COLLATE=latin1_bin;
+insert into linktable values (1,1,1,1,1,1,1,1,1);
+insert into linktable values (1,1,2,1,1,1,1,1,1);
+insert into linktable values (1,1,3,1,1,1,1,1,1);
+insert into linktable values (1,1,4,1,1,1,1,1,1);
+set global rocksdb_force_flush_memtable_now = true;
+explain select id1, id2, link_type, visibility, data, time, version from linktable where id1 = 1 and link_type = 1 and id2 in (1, 2);
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE linktable range PRIMARY,id1_type PRIMARY 24 NULL 2 Using where
+drop table linktable;
+CREATE TABLE `linktable` (
+`id1` bigint(20) unsigned NOT NULL DEFAULT '0',
+`id1_type` int(10) unsigned NOT NULL DEFAULT '0',
+`id2` bigint(20) unsigned NOT NULL DEFAULT '0',
+`id2_type` int(10) unsigned NOT NULL DEFAULT '0',
+`link_type` bigint(20) unsigned NOT NULL DEFAULT '0',
+`visibility` tinyint(3) NOT NULL DEFAULT '0',
+`data` varchar(255) COLLATE latin1_bin NOT NULL DEFAULT '',
+`time` bigint(20) unsigned NOT NULL DEFAULT '0',
+`version` int(11) unsigned NOT NULL DEFAULT '0',
+PRIMARY KEY (`link_type`,`id1`,`id2`) COMMENT 'cf_link_pk',
+KEY `id1_type` (`id1`,`link_type`,`visibility`,`time`,`version`,`data`) COMMENT 'rev:cf_link_id1_type'
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1 COLLATE=latin1_bin;
+insert into linktable values (1,1,1,1,1,1,1,1,1);
+insert into linktable values (1,1,2,1,1,1,1,1,1);
+insert into linktable values (1,1,3,1,1,1,1,1,1);
+insert into linktable values (1,1,4,1,1,1,1,1,1);
+set global rocksdb_force_flush_memtable_now = true;
+explain select id1, id2, link_type, visibility, data, time, version from linktable where id1 = 1 and link_type = 1 and id2 in (1, 2);
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE linktable range PRIMARY,id1_type PRIMARY 24 NULL 2 Using where
+drop table linktable;
+DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/repair_table.result b/storage/rocksdb/mysql-test/rocksdb/r/repair_table.result
new file mode 100644
index 00000000000..f227d200bcc
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/repair_table.result
@@ -0,0 +1,37 @@
+DROP TABLE IF EXISTS t1,t2;
+CREATE TABLE t1 (a INT PRIMARY KEY, b CHAR(8)) ENGINE=rocksdb;
+INSERT INTO t1 (a,b) VALUES (1,'a'),(2,'b');
+CREATE TABLE t2 (a INT, b CHAR(8) PRIMARY KEY) ENGINE=rocksdb;
+REPAIR TABLE t1;
+Table Op Msg_type Msg_text
+test.t1 repair note The storage engine for the table doesn't support repair
+INSERT INTO t1 (a,b) VALUES (3,'c');
+INSERT INTO t2 (a,b) VALUES (4,'d');
+REPAIR NO_WRITE_TO_BINLOG TABLE t1, t2;
+Table Op Msg_type Msg_text
+test.t1 repair note The storage engine for the table doesn't support repair
+test.t2 repair note The storage engine for the table doesn't support repair
+INSERT INTO t2 (a,b) VALUES (5,'e'),(6,'f');
+REPAIR LOCAL TABLE t2;
+Table Op Msg_type Msg_text
+test.t2 repair note The storage engine for the table doesn't support repair
+INSERT INTO t1 (a,b) VALUES (7,'g'),(8,'h');
+INSERT INTO t2 (a,b) VALUES (9,'i');
+REPAIR LOCAL TABLE t2, t1 EXTENDED;
+Table Op Msg_type Msg_text
+test.t2 repair note The storage engine for the table doesn't support repair
+test.t1 repair note The storage engine for the table doesn't support repair
+INSERT INTO t1 (a,b) VALUES (10,'j');
+INSERT INTO t2 (a,b) VALUES (11,'k');
+REPAIR TABLE t1, t2 QUICK USE_FRM;
+Table Op Msg_type Msg_text
+test.t1 repair note The storage engine for the table doesn't support repair
+test.t2 repair note The storage engine for the table doesn't support repair
+INSERT INTO t1 (a,b) VALUES (12,'l');
+INSERT INTO t2 (a,b) VALUES (13,'m');
+REPAIR NO_WRITE_TO_BINLOG TABLE t1, t2 QUICK EXTENDED USE_FRM;
+Table Op Msg_type Msg_text
+test.t1 repair note The storage engine for the table doesn't support repair
+test.t2 repair note The storage engine for the table doesn't support repair
+FLUSH TABLE t1;
+DROP TABLE t1, t2;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/replace.result b/storage/rocksdb/mysql-test/rocksdb/r/replace.result
new file mode 100644
index 00000000000..f8f61a3f8c3
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/replace.result
@@ -0,0 +1,32 @@
+DROP TABLE IF EXISTS t1;
+CREATE TABLE t1 (a INT, b CHAR(8), pk INT AUTO_INCREMENT PRIMARY KEY) ENGINE=rocksdb;
+REPLACE INTO t1 (a,b) VALUES (1,'a'),(2,'b'),(3,'c'),(4,'d'),(5,'e');
+SELECT a,b FROM t1;
+a b
+1 a
+2 b
+3 c
+4 d
+5 e
+REPLACE t1 (a,b) VALUE (10,'foo'),(10,'foo');
+SELECT a,b FROM t1;
+a b
+1 a
+10 foo
+10 foo
+2 b
+3 c
+4 d
+5 e
+DROP TABLE t1;
+CREATE TABLE t1 (a INT, b CHAR(8), PRIMARY KEY (b)) ENGINE=rocksdb;
+REPLACE INTO t1 (a,b) VALUES (1,'a'),(2,'b'),(3,'c');
+INSERT INTO t1 (a,b) VALUES (4,'b');
+ERROR 23000: Duplicate entry 'b' for key 'PRIMARY'
+REPLACE INTO t1 (a,b) VALUES (4,'b');
+SELECT a,b FROM t1;
+a b
+1 a
+3 c
+4 b
+DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/rocksdb.result b/storage/rocksdb/mysql-test/rocksdb/r/rocksdb.result
new file mode 100644
index 00000000000..6cc4cc7a1dc
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/rocksdb.result
@@ -0,0 +1,2635 @@
+select ENGINE,COMMENT,TRANSACTIONS,XA,SAVEPOINTS from information_schema.engines where engine = 'rocksdb';
+ENGINE COMMENT TRANSACTIONS XA SAVEPOINTS
+ROCKSDB RocksDB storage engine YES YES YES
+drop table if exists t0,t1,t2,t3,t4,t5,t6,t7,t8,t9,t10;
+drop table if exists t11,t12,t13,t14,t15,t16,t17,t18,t19,t20;
+drop table if exists t21,t22,t23,t24,t25,t26,t27,t28,t29;
+drop table if exists t30,t31,t32,t33,t34,t35,t36,t37,t38,t39;
+drop table if exists t40,t41,t42,t43,t44,t45,t46,t47,t48,t49;
+SET @ORIG_PAUSE_BACKGROUND_WORK = @@ROCKSDB_PAUSE_BACKGROUND_WORK;
+SET GLOBAL ROCKSDB_PAUSE_BACKGROUND_WORK = 1;
+#
+# Issue #1: Don't update indexes if index values have not changed
+#
+create table t1 (
+pk int primary key,
+a int,
+b int,
+key(a)
+) engine=rocksdb;
+insert into t1 values
+(1,1,1), (2,2,2), (3,3,3), (4,4,4);
+set @var1=(select variable_value
+from information_schema.global_status
+where variable_name='rocksdb_number_keys_written');
+# Do an update that doesn't change the key 'a'.
+update t1 set b=3334341 where a=2;
+set @var2=(select variable_value
+from information_schema.global_status
+where variable_name='rocksdb_number_keys_written');
+# The following should produce 1
+select @var2 - @var1;
+@var2 - @var1
+1
+# Do an update that sets the key to the same value
+update t1 set a=pk where a=3;
+set @var3=(select variable_value
+from information_schema.global_status
+where variable_name='rocksdb_number_keys_written');
+# We have 'updated' column to the same value, so the following must return 0:
+select @var3 - @var2;
+@var3 - @var2
+0
+drop table t1;
+create table t0 (a int primary key) engine=rocksdb;
+show create table t0;
+Table Create Table
+t0 CREATE TABLE `t0` (
+ `a` int(11) NOT NULL,
+ PRIMARY KEY (`a`)
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+drop table t0;
+create table t1 (a int primary key, b int) engine=rocksdb;
+insert into t1 values (1,1);
+insert into t1 values (2,2);
+select * from t1;
+a b
+1 1
+2 2
+# Check that we can create another table and insert there
+create table t2 (a varchar(10) primary key, b varchar(10)) engine=rocksdb;
+insert into t2 value ('abc','def');
+insert into t2 value ('hijkl','mnopq');
+select * from t2;
+a b
+abc def
+hijkl mnopq
+# Select again from t1 to see that records from different tables dont mix
+select * from t1;
+a b
+1 1
+2 2
+explain select * from t2 where a='no-such-key';
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE NULL NULL NULL NULL NULL NULL NULL Impossible WHERE noticed after reading const tables
+explain select * from t2 where a='abc';
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 const PRIMARY PRIMARY 12 const #
+select * from t2 where a='abc';
+a b
+abc def
+# Try a composite PK
+create table t3 (
+pk1 int,
+pk2 varchar(10),
+col1 varchar(10),
+primary key(pk1, pk2)
+) engine=rocksdb;
+insert into t3 values (2,'two', 'row#2');
+insert into t3 values (3,'three', 'row#3');
+insert into t3 values (1,'one', 'row#1');
+select * from t3;
+pk1 pk2 col1
+1 one row#1
+2 two row#2
+3 three row#3
+select * from t3 where pk1=3 and pk2='three';
+pk1 pk2 col1
+3 three row#3
+drop table t1, t2, t3;
+#
+# Test blob values
+#
+create table t4 (a int primary key, b blob) engine=rocksdb;
+insert into t4 values (1, repeat('quux-quux', 60));
+insert into t4 values (10, repeat('foo-bar', 43));
+insert into t4 values (5, repeat('foo-bar', 200));
+insert into t4 values (2, NULL);
+select
+a,
+(case a
+when 1 then b=repeat('quux-quux', 60)
+when 10 then b=repeat('foo-bar', 43)
+when 5 then b=repeat('foo-bar', 200)
+when 2 then b is null
+else 'IMPOSSIBLE!' end) as CMP
+from t4;
+a CMP
+1 1
+2 1
+5 1
+10 1
+drop table t4;
+#
+# Test blobs of various sizes
+#
+# TINYBLOB
+create table t5 (a int primary key, b tinyblob) engine=rocksdb;
+insert into t5 values (1, repeat('quux-quux', 6));
+insert into t5 values (10, repeat('foo-bar', 4));
+insert into t5 values (5, repeat('foo-bar', 2));
+select
+a,
+(case a
+when 1 then b=repeat('quux-quux', 6)
+when 10 then b=repeat('foo-bar', 4)
+when 5 then b=repeat('foo-bar', 2)
+else 'IMPOSSIBLE!' end) as CMP
+from t5;
+a CMP
+1 1
+5 1
+10 1
+drop table t5;
+# MEDIUMBLOB
+create table t6 (a int primary key, b mediumblob) engine=rocksdb;
+insert into t6 values (1, repeat('AB', 65000));
+insert into t6 values (10, repeat('bbb', 40000));
+insert into t6 values (5, repeat('foo-bar', 2));
+select
+a,
+(case a
+when 1 then b=repeat('AB', 65000)
+when 10 then b=repeat('bbb', 40000)
+when 5 then b=repeat('foo-bar', 2)
+else 'IMPOSSIBLE!' end) as CMP
+from t6;
+a CMP
+1 1
+5 1
+10 1
+drop table t6;
+# LONGBLOB
+create table t7 (a int primary key, b longblob) engine=rocksdb;
+insert into t7 values (1, repeat('AB', 65000));
+insert into t7 values (10, repeat('bbb', 40000));
+insert into t7 values (5, repeat('foo-bar', 2));
+select
+a,
+(case a
+when 1 then b=repeat('AB', 65000)
+when 10 then b=repeat('bbb', 40000)
+when 5 then b=repeat('foo-bar', 2)
+else 'IMPOSSIBLE!' end) as CMP
+from t7;
+a CMP
+1 1
+5 1
+10 1
+drop table t7;
+#
+# Check if DELETEs work
+#
+create table t8 (a varchar(10) primary key, col1 varchar(12)) engine=rocksdb;
+insert into t8 values
+('one', 'eins'),
+('two', 'zwei'),
+('three', 'drei'),
+('four', 'vier'),
+('five', 'funf');
+# Delete by PK
+explain delete from t8 where a='three';
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t8 range PRIMARY PRIMARY 12 NULL # Using where
+delete from t8 where a='three';
+select * from t8;
+a col1
+five funf
+four vier
+one eins
+two zwei
+# Delete while doing a full table scan
+delete from t8 where col1='eins' or col1='vier';
+select * from t8;
+a col1
+five funf
+two zwei
+# delete w/o WHERE:
+delete from t8;
+select * from t8;
+a col1
+#
+# Test UPDATEs
+#
+insert into t8 values
+('one', 'eins'),
+('two', 'zwei'),
+('three', 'drei'),
+('four', 'vier'),
+('five', 'funf');
+update t8 set col1='dva' where a='two';
+update t8 set a='fourAAA' where col1='vier';
+select * from t8;
+a col1
+five funf
+fourAAA vier
+one eins
+three drei
+two dva
+delete from t8;
+#
+# Basic transactions tests
+#
+begin;
+insert into t8 values ('trx1-val1', 'data');
+insert into t8 values ('trx1-val2', 'data');
+rollback;
+select * from t8;
+a col1
+begin;
+insert into t8 values ('trx1-val1', 'data');
+insert into t8 values ('trx1-val2', 'data');
+commit;
+select * from t8;
+a col1
+trx1-val1 data
+trx1-val2 data
+drop table t8;
+#
+# Check if DROP TABLE works
+#
+create table t8 (a varchar(10) primary key, col1 varchar(12)) engine=rocksdb;
+select * from t8;
+a col1
+insert into t8 values ('foo','foo');
+drop table t8;
+create table t8 (a varchar(10) primary key, col1 varchar(12)) engine=rocksdb;
+select * from t8;
+a col1
+drop table t8;
+#
+# MDEV-3961: Assertion ... on creating a TEMPORARY RocksDB table
+#
+CREATE TEMPORARY TABLE t10 (pk INT PRIMARY KEY) ENGINE=RocksDB;
+ERROR HY000: Table storage engine 'ROCKSDB' does not support the create option 'TEMPORARY'
+#
+# MDEV-3963: JOIN or WHERE conditions involving keys on RocksDB tables don't work
+#
+CREATE TABLE t10 (i INT PRIMARY KEY) ENGINE=RocksDB;
+INSERT INTO t10 VALUES (1),(3);
+CREATE TABLE t11 (j INT PRIMARY KEY) ENGINE=RocksDB;
+INSERT INTO t11 VALUES (1),(4);
+select * from t10;
+i
+1
+3
+select * from t11;
+j
+1
+4
+EXPLAIN
+SELECT * FROM t10, t11 WHERE i=j;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t10 index PRIMARY PRIMARY 4 NULL # Using index
+1 SIMPLE t11 eq_ref PRIMARY PRIMARY 4 test.t10.i # Using index
+SELECT * FROM t10, t11 WHERE i=j;
+i j
+1 1
+DROP TABLE t10,t11;
+#
+# MDEV-3962: SELECT with ORDER BY causes "ERROR 1030 (HY000): Got error 122
+#
+CREATE TABLE t12 (pk INT PRIMARY KEY) ENGINE=RocksDB;
+INSERT INTO t12 VALUES (2),(1);
+SELECT * FROM t12 ORDER BY pk;
+pk
+1
+2
+DROP TABLE t12;
+#
+# MDEV-3964: Assertion `!pk_descr' fails in ha_rocksdb::open on adding partitions ...
+#
+create table t14 (pk int primary key) engine=RocksDB partition by hash(pk) partitions 2;
+drop table t14;
+#
+# MDEV-3960: Server crashes on running DISCARD TABLESPACE on a RocksDB table
+#
+create table t9 (i int primary key) engine=rocksdb;
+alter table t9 discard tablespace;
+ERROR HY000: Storage engine ROCKSDB of the table `test`.`t9` doesn't have this option
+drop table t9;
+#
+# MDEV-3959: Assertion `slice->size() == table->s->reclength' fails ...
+# on accessing a table after ALTER
+#
+CREATE TABLE t15 (a INT, rocksdb_pk INT PRIMARY KEY) ENGINE=RocksDB;
+INSERT INTO t15 VALUES (1,1),(5,2);
+ALTER TABLE t15 DROP COLUMN a;
+DROP TABLE t15;
+#
+# MDEV-3968: UPDATE produces a wrong result while modifying a PK on a RocksDB table
+#
+create table t16 (pk int primary key, a char(8)) engine=RocksDB;
+insert into t16 values (1,'a'),(2,'b'),(3,'c'),(4,'d');
+update t16 set pk=100, a = 'updated' where a in ('b','c');
+ERROR 23000: Duplicate entry '100' for key 'PRIMARY'
+select * from t16;
+pk a
+1 a
+2 b
+3 c
+4 d
+drop table t16;
+#
+# MDEV-3970: A set of assorted crashes on inserting a row into a RocksDB table
+#
+drop table if exists t_very_long_table_name;
+CREATE TABLE `t_very_long_table_name` (
+`c` char(1) NOT NULL,
+`c0` char(0) NOT NULL,
+`c1` char(1) NOT NULL,
+`c20` char(20) NOT NULL,
+`c255` char(255) NOT NULL,
+PRIMARY KEY (`c255`)
+) ENGINE=RocksDB DEFAULT CHARSET=latin1;
+INSERT INTO t_very_long_table_name VALUES ('a', '', 'c', REPEAT('a',20), REPEAT('x',255));
+drop table t_very_long_table_name;
+#
+# Test table locking and read-before-write checks.
+#
+create table t17 (pk varchar(12) primary key, col1 varchar(12)) engine=rocksdb;
+insert into t17 values ('row1', 'val1');
+insert into t17 values ('row1', 'val1-try2');
+ERROR 23000: Duplicate entry 'row1' for key 'PRIMARY'
+insert into t17 values ('ROW1', 'val1-try2');
+ERROR 23000: Duplicate entry 'ROW1' for key 'PRIMARY'
+insert into t17 values ('row2', 'val2');
+insert into t17 values ('row3', 'val3');
+# This is ok
+update t17 set pk='row4' where pk='row1';
+# This will try to overwrite another row:
+update t17 set pk='row3' where pk='row2';
+ERROR 23000: Duplicate entry 'row3' for key 'PRIMARY'
+select * from t17;
+pk col1
+row2 val2
+row3 val3
+row4 val1
+#
+# Locking tests
+#
+connect con1,localhost,root,,;
+# First, make sure there's no locking when transactions update different rows
+connection con1;
+set autocommit=0;
+update t17 set col1='UPD1' where pk='row2';
+connection default;
+update t17 set col1='UPD2' where pk='row3';
+connection con1;
+commit;
+connection default;
+select * from t17;
+pk col1
+row2 UPD1
+row3 UPD2
+row4 val1
+# Check the variable
+show variables like 'rocksdb_lock_wait_timeout';
+Variable_name Value
+rocksdb_lock_wait_timeout 1
+set rocksdb_lock_wait_timeout=2;
+show variables like 'rocksdb_lock_wait_timeout';
+Variable_name Value
+rocksdb_lock_wait_timeout 2
+# Try updating the same row from two transactions
+connection con1;
+begin;
+update t17 set col1='UPD2-AA' where pk='row2';
+connection default;
+update t17 set col1='UPD2-BB' where pk='row2';
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction
+set rocksdb_lock_wait_timeout=1000;
+update t17 set col1='UPD2-CC' where pk='row2';
+connection con1;
+rollback;
+connection default;
+select * from t17 where pk='row2';
+pk col1
+row2 UPD2-CC
+drop table t17;
+disconnect con1;
+#
+# MDEV-4035: RocksDB: SELECT produces different results inside a transaction (read is not repeatable)
+#
+create table t18 (pk int primary key, i int) engine=RocksDB;
+begin;
+select * from t18;
+pk i
+select * from t18 where pk = 1;
+pk i
+connect con1,localhost,root,,;
+insert into t18 values (1,100);
+connection default;
+select * from t18;
+pk i
+select * from t18 where pk = 1;
+pk i
+commit;
+drop table t18;
+#
+# MDEV-4036: RocksDB: INSERT .. ON DUPLICATE KEY UPDATE does not work, produces ER_DUP_KEY
+#
+create table t19 (pk int primary key, i int) engine=RocksDB;
+insert into t19 values (1,1);
+insert into t19 values (1,100) on duplicate key update i = 102;
+select * from t19;
+pk i
+1 102
+drop table t19;
+# MDEV-4037: RocksDB: REPLACE doesn't work, produces ER_DUP_KEY
+create table t20 (pk int primary key, i int) engine=RocksDB;
+insert into t20 values (1,1);
+replace into t20 values (1,100);
+select * from t20;
+pk i
+1 100
+drop table t20;
+#
+# MDEV-4041: Server crashes in Primary_key_comparator::get_hashnr on INSERT
+#
+create table t21 (v varbinary(16) primary key, i int) engine=RocksDB;
+insert into t21 values ('a',1);
+select * from t21;
+v i
+a 1
+drop table t21;
+#
+# MDEV-4047: RocksDB: Assertion `0' fails in Protocol::end_statement() on multi-table INSERT IGNORE
+#
+CREATE TABLE t22 (a int primary key) ENGINE=RocksDB;
+INSERT INTO t22 VALUES (1),(2);
+CREATE TABLE t23 (b int primary key) ENGINE=RocksDB;
+INSERT INTO t23 SELECT * FROM t22;
+DELETE IGNORE t22.*, t23.* FROM t22, t23 WHERE b < a;
+DROP TABLE t22,t23;
+#
+# MDEV-4046: RocksDB: Multi-table DELETE locks itself and ends with ER_LOCK_WAIT_TIMEOUT
+#
+CREATE TABLE t24 (pk int primary key) ENGINE=RocksDB;
+INSERT INTO t24 VALUES (1),(2);
+CREATE TABLE t25 LIKE t24;
+INSERT INTO t25 SELECT * FROM t24;
+DELETE t25.* FROM t24, t25;
+DROP TABLE t24,t25;
+#
+# MDEV-4044: RocksDB: UPDATE or DELETE with ORDER BY locks itself
+#
+create table t26 (pk int primary key, c char(1)) engine=RocksDB;
+insert into t26 values (1,'a'),(2,'b');
+update t26 set c = 'x' order by pk limit 1;
+delete from t26 order by pk limit 1;
+select * from t26;
+pk c
+2 b
+drop table t26;
+#
+# Test whether SELECT ... FOR UPDATE puts locks
+#
+create table t27(pk varchar(10) primary key, col1 varchar(20)) engine=RocksDB;
+insert into t27 values
+('row1', 'row1data'),
+('row2', 'row2data'),
+('row3', 'row3data');
+connection con1;
+begin;
+select * from t27 where pk='row3' for update;
+pk col1
+row3 row3data
+connection default;
+set rocksdb_lock_wait_timeout=1;
+update t27 set col1='row2-modified' where pk='row3';
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction
+connection con1;
+rollback;
+connection default;
+disconnect con1;
+drop table t27;
+#
+# MDEV-4060: RocksDB: Assertion `! trx->batch' fails in
+#
+create table t28 (pk int primary key, a int) engine=RocksDB;
+insert into t28 values (1,10),(2,20);
+begin;
+update t28 set a = 100 where pk = 3;
+rollback;
+select * from t28;
+pk a
+1 10
+2 20
+drop table t28;
+#
+# Secondary indexes
+#
+create table t30 (
+pk varchar(16) not null primary key,
+key1 varchar(16) not null,
+col1 varchar(16) not null,
+key(key1)
+) engine=rocksdb;
+insert into t30 values ('row1', 'row1-key', 'row1-data');
+insert into t30 values ('row2', 'row2-key', 'row2-data');
+insert into t30 values ('row3', 'row3-key', 'row3-data');
+explain
+select * from t30 where key1='row2-key';
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t30 ref key1 key1 18 const # Using index condition
+select * from t30 where key1='row2-key';
+pk key1 col1
+row2 row2-key row2-data
+explain
+select * from t30 where key1='row1';
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t30 ref key1 key1 18 const # Using index condition
+# This will produce nothing:
+select * from t30 where key1='row1';
+pk key1 col1
+explain
+select key1 from t30;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t30 index NULL key1 20 NULL # Using index
+select key1 from t30;
+key1
+row1-key
+row2-key
+row3-key
+# Create a duplicate record
+insert into t30 values ('row2a', 'row2-key', 'row2a-data');
+# Can we see it?
+select * from t30 where key1='row2-key';
+pk key1 col1
+row2 row2-key row2-data
+row2a row2-key row2a-data
+delete from t30 where pk='row2';
+select * from t30 where key1='row2-key';
+pk key1 col1
+row2a row2-key row2a-data
+#
+# Range scans on secondary index
+#
+delete from t30;
+insert into t30 values
+('row1', 'row1-key', 'row1-data'),
+('row2', 'row2-key', 'row2-data'),
+('row3', 'row3-key', 'row3-data'),
+('row4', 'row4-key', 'row4-data'),
+('row5', 'row5-key', 'row5-data');
+explain
+select * from t30 where key1 <='row3-key';
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t30 range key1 key1 18 NULL # Using index condition
+select * from t30 where key1 <='row3-key';
+pk key1 col1
+row1 row1-key row1-data
+row2 row2-key row2-data
+row3 row3-key row3-data
+explain
+select * from t30 where key1 between 'row2-key' and 'row4-key';
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t30 range key1 key1 18 NULL # Using index condition
+select * from t30 where key1 between 'row2-key' and 'row4-key';
+pk key1 col1
+row2 row2-key row2-data
+row3 row3-key row3-data
+row4 row4-key row4-data
+explain
+select * from t30 where key1 in ('row2-key','row4-key');
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t30 range key1 key1 18 NULL # Using index condition
+select * from t30 where key1 in ('row2-key','row4-key');
+pk key1 col1
+row2 row2-key row2-data
+row4 row4-key row4-data
+explain
+select key1 from t30 where key1 in ('row2-key','row4-key');
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t30 range key1 key1 18 NULL # Using where; Using index
+select key1 from t30 where key1 in ('row2-key','row4-key');
+key1
+row2-key
+row4-key
+explain
+select * from t30 where key1 > 'row1-key' and key1 < 'row4-key';
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t30 range key1 key1 18 NULL # Using index condition
+select * from t30 where key1 > 'row1-key' and key1 < 'row4-key';
+pk key1 col1
+row2 row2-key row2-data
+row3 row3-key row3-data
+explain
+select * from t30 order by key1 limit 3;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t30 index NULL key1 20 NULL #
+select * from t30 order by key1 limit 3;
+pk key1 col1
+row1 row1-key row1-data
+row2 row2-key row2-data
+row3 row3-key row3-data
+explain
+select * from t30 order by key1 desc limit 3;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t30 index NULL key1 20 NULL #
+select * from t30 order by key1 desc limit 3;
+pk key1 col1
+row5 row5-key row5-data
+row4 row4-key row4-data
+row3 row3-key row3-data
+#
+# Range scans on primary key
+#
+explain
+select * from t30 where pk <='row3';
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t30 range PRIMARY PRIMARY 18 NULL # Using where
+select * from t30 where pk <='row3';
+pk key1 col1
+row1 row1-key row1-data
+row2 row2-key row2-data
+row3 row3-key row3-data
+explain
+select * from t30 where pk between 'row2' and 'row4';
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t30 range PRIMARY PRIMARY 18 NULL # Using where
+select * from t30 where pk between 'row2' and 'row4';
+pk key1 col1
+row2 row2-key row2-data
+row3 row3-key row3-data
+row4 row4-key row4-data
+explain
+select * from t30 where pk in ('row2','row4');
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t30 range PRIMARY PRIMARY 18 NULL # Using where
+select * from t30 where pk in ('row2','row4');
+pk key1 col1
+row2 row2-key row2-data
+row4 row4-key row4-data
+explain
+select * from t30 order by pk limit 3;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t30 index NULL PRIMARY 18 NULL #
+select * from t30 order by pk limit 3;
+pk key1 col1
+row1 row1-key row1-data
+row2 row2-key row2-data
+row3 row3-key row3-data
+drop table t30;
+#
+# MDEV-3841: RocksDB: Reading by PK prefix does not work
+#
+create table t31 (i int, j int, k int, primary key(i,j,k)) engine=RocksDB;
+insert into t31 values (1,10,100),(2,20,200);
+select * from t31 where i = 1;
+i j k
+1 10 100
+select * from t31 where j = 10;
+i j k
+1 10 100
+select * from t31 where k = 100;
+i j k
+1 10 100
+select * from t31 where i = 1 and j = 10;
+i j k
+1 10 100
+select * from t31 where i = 1 and k = 100;
+i j k
+1 10 100
+select * from t31 where j = 10 and k = 100;
+i j k
+1 10 100
+select * from t31 where i = 1 and j = 10 and k = 100;
+i j k
+1 10 100
+drop table t31;
+#
+# MDEV-4055: RocksDB: UPDATE/DELETE by a multi-part PK does not work
+#
+create table t32 (i int, j int, k int, primary key(i,j,k), a varchar(8)) engine=RocksDB;
+insert into t32 values
+(1,10,100,''),
+(2,20,200,'');
+select * from t32 where i = 1 and j = 10 and k = 100;
+i j k a
+1 10 100
+update t32 set a = 'updated' where i = 1 and j = 10 and k = 100;
+select * from t32;
+i j k a
+1 10 100 updated
+2 20 200
+drop table t32;
+#
+# MDEV-3841: RocksDB: Assertion `0' fails in ha_rocksdb::index_read_map on range select with ORDER BY .. DESC
+#
+CREATE TABLE t33 (pk INT PRIMARY KEY, a CHAR(1)) ENGINE=RocksDB;
+INSERT INTO t33 VALUES (1,'a'),(2,'b');
+SELECT * FROM t33 WHERE pk <= 10 ORDER BY pk DESC;
+pk a
+2 b
+1 a
+DROP TABLE t33;
+#
+# MDEV-4081: RocksDB throws error 122 on an attempt to create a table with unique index
+#
+# Unique indexes can be created, but uniqueness won't be enforced
+create table t33 (pk int primary key, u int, unique index(u)) engine=RocksDB;
+drop table t33;
+#
+# MDEV-4077: RocksDB: Wrong result (duplicate row) on select with range
+#
+CREATE TABLE t34 (pk INT PRIMARY KEY) ENGINE=RocksDB;
+INSERT INTO t34 VALUES (10),(11);
+SELECT pk FROM t34 WHERE pk > 5 AND pk < 15;
+pk
+10
+11
+SELECT pk FROM t34 WHERE pk BETWEEN 5 AND 15;
+pk
+10
+11
+SELECT pk FROM t34 WHERE pk > 5;
+pk
+10
+11
+SELECT pk FROM t34 WHERE pk < 15;
+pk
+10
+11
+drop table t34;
+#
+# MDEV-4086: RocksDB does not allow a query with multi-part pk and index and ORDER BY .. DEC
+#
+create table t35 (a int, b int, c int, d int, e int, primary key (a,b,c), key (a,c,d,e)) engine=RocksDB;
+insert into t35 values (1,1,1,1,1),(2,2,2,2,2);
+select * from t35 where a = 1 and c = 1 and d = 1 order by e desc;
+a b c d e
+1 1 1 1 1
+drop table t35;
+#
+# MDEV-4084: RocksDB: Wrong result on IN subquery with index
+#
+CREATE TABLE t36 (pk INT PRIMARY KEY, a INT, KEY(a)) ENGINE=RocksDB;
+INSERT INTO t36 VALUES (1,10),(2,20);
+SELECT 3 IN ( SELECT a FROM t36 );
+3 IN ( SELECT a FROM t36 )
+0
+drop table t36;
+#
+# MDEV-4084: RocksDB: Wrong result on IN subquery with index
+#
+CREATE TABLE t37 (pk INT PRIMARY KEY, a INT, b CHAR(1), KEY(a), KEY(a,b))
+ENGINE=RocksDB;
+INSERT INTO t37 VALUES (1,10,'x'), (2,20,'y');
+SELECT MAX(a) FROM t37 WHERE a < 100;
+MAX(a)
+20
+DROP TABLE t37;
+#
+# MDEV-4090: RocksDB: Wrong result (duplicate rows) on range access with secondary key and ORDER BY DESC
+#
+CREATE TABLE t38 (pk INT PRIMARY KEY, i INT, KEY(i)) ENGINE=RocksDB;
+INSERT INTO t38 VALUES (1,10), (2,20);
+SELECT i FROM t38 WHERE i NOT IN (8) ORDER BY i DESC;
+i
+20
+10
+drop table t38;
+#
+# MDEV-4092: RocksDB: Assertion `in_table(pa, a_len)' fails in Rdb_key_def::cmp_full_keys
+# with a multi-part key and ORDER BY .. DESC
+#
+CREATE TABLE t40 (pk1 INT PRIMARY KEY, a INT, b VARCHAR(1), KEY(b,a)) ENGINE=RocksDB;
+INSERT INTO t40 VALUES (1, 7,'x'),(2,8,'y');
+CREATE TABLE t41 (pk2 INT PRIMARY KEY) ENGINE=RocksDB;
+INSERT INTO t41 VALUES (1),(2);
+SELECT * FROM t40, t41 WHERE pk1 = pk2 AND b = 'o' ORDER BY a DESC;
+pk1 a b pk2
+DROP TABLE t40,t41;
+#
+# MDEV-4093: RocksDB: IN subquery by secondary key with NULL among values returns true instead of NULL
+#
+CREATE TABLE t42 (pk INT PRIMARY KEY, a INT, KEY(a)) ENGINE=RocksDB;
+INSERT INTO t42 VALUES (1, NULL),(2, 8);
+SELECT ( 3 ) NOT IN ( SELECT a FROM t42 );
+( 3 ) NOT IN ( SELECT a FROM t42 )
+NULL
+DROP TABLE t42;
+#
+# MDEV-4094: RocksDB: Wrong result on SELECT and ER_KEY_NOT_FOUND on
+# DELETE with search by NULL-able secondary key ...
+#
+CREATE TABLE t43 (pk INT PRIMARY KEY, a INT, b CHAR(1), KEY(a)) ENGINE=RocksDB;
+INSERT INTO t43 VALUES (1,8,'g'),(2,9,'x');
+UPDATE t43 SET pk = 10 WHERE a = 8;
+REPLACE INTO t43 ( a ) VALUES ( 8 );
+Warnings:
+Warning 1364 Field 'pk' doesn't have a default value
+REPLACE INTO t43 ( b ) VALUES ( 'y' );
+Warnings:
+Warning 1364 Field 'pk' doesn't have a default value
+SELECT * FROM t43 WHERE a = 8;
+pk a b
+10 8 g
+DELETE FROM t43 WHERE a = 8;
+DROP TABLE t43;
+#
+# Basic AUTO_INCREMENT tests
+#
+create table t44(pk int primary key auto_increment, col1 varchar(12)) engine=rocksdb;
+insert into t44 (col1) values ('row1');
+insert into t44 (col1) values ('row2');
+insert into t44 (col1) values ('row3');
+select * from t44;
+pk col1
+1 row1
+2 row2
+3 row3
+drop table t44;
+#
+# ALTER TABLE tests
+#
+create table t45 (pk int primary key, col1 varchar(12)) engine=rocksdb;
+insert into t45 values (1, 'row1');
+insert into t45 values (2, 'row2');
+alter table t45 rename t46;
+select * from t46;
+pk col1
+1 row1
+2 row2
+drop table t46;
+drop table t45;
+ERROR 42S02: Unknown table 'test.t45'
+#
+# Check Bulk loading
+# Bulk loading used to overwrite existing data
+# Now it fails if there is data overlap with what
+# already exists
+#
+show variables
+where
+variable_name like 'rocksdb%' and
+variable_name not like 'rocksdb_max_open_files' and
+variable_name not like 'rocksdb_supported_compression_types';
+Variable_name Value
+rocksdb_access_hint_on_compaction_start 1
+rocksdb_advise_random_on_open ON
+rocksdb_allow_concurrent_memtable_write OFF
+rocksdb_allow_mmap_reads OFF
+rocksdb_allow_mmap_writes OFF
+rocksdb_allow_to_start_after_corruption OFF
+rocksdb_blind_delete_primary_key OFF
+rocksdb_block_cache_size 536870912
+rocksdb_block_restart_interval 16
+rocksdb_block_size 4096
+rocksdb_block_size_deviation 10
+rocksdb_bulk_load OFF
+rocksdb_bulk_load_allow_sk OFF
+rocksdb_bulk_load_allow_unsorted OFF
+rocksdb_bulk_load_size 1000
+rocksdb_bytes_per_sync 0
+rocksdb_cache_dump ON
+rocksdb_cache_high_pri_pool_ratio 0.000000
+rocksdb_cache_index_and_filter_blocks ON
+rocksdb_cache_index_and_filter_with_high_priority ON
+rocksdb_checksums_pct 100
+rocksdb_collect_sst_properties ON
+rocksdb_commit_in_the_middle OFF
+rocksdb_commit_time_batch_for_recovery ON
+rocksdb_compact_cf
+rocksdb_compaction_readahead_size 0
+rocksdb_compaction_sequential_deletes 0
+rocksdb_compaction_sequential_deletes_count_sd OFF
+rocksdb_compaction_sequential_deletes_file_size 0
+rocksdb_compaction_sequential_deletes_window 0
+rocksdb_create_checkpoint
+rocksdb_create_if_missing ON
+rocksdb_create_missing_column_families OFF
+rocksdb_datadir ./#rocksdb
+rocksdb_db_write_buffer_size 0
+rocksdb_deadlock_detect OFF
+rocksdb_deadlock_detect_depth 50
+rocksdb_debug_manual_compaction_delay 0
+rocksdb_debug_optimizer_no_zero_cardinality ON
+rocksdb_debug_ttl_ignore_pk OFF
+rocksdb_debug_ttl_read_filter_ts 0
+rocksdb_debug_ttl_rec_ts 0
+rocksdb_debug_ttl_snapshot_ts 0
+rocksdb_default_cf_options
+rocksdb_delayed_write_rate 0
+rocksdb_delete_cf
+rocksdb_delete_obsolete_files_period_micros 21600000000
+rocksdb_enable_2pc ON
+rocksdb_enable_bulk_load_api ON
+rocksdb_enable_insert_with_update_caching ON
+rocksdb_enable_thread_tracking ON
+rocksdb_enable_ttl ON
+rocksdb_enable_ttl_read_filtering ON
+rocksdb_enable_write_thread_adaptive_yield OFF
+rocksdb_error_if_exists OFF
+rocksdb_error_on_suboptimal_collation ON
+rocksdb_flush_log_at_trx_commit 0
+rocksdb_force_compute_memtable_stats ON
+rocksdb_force_compute_memtable_stats_cachetime 0
+rocksdb_force_flush_memtable_and_lzero_now OFF
+rocksdb_force_flush_memtable_now OFF
+rocksdb_force_index_records_in_range 0
+rocksdb_git_hash #
+rocksdb_hash_index_allow_collision ON
+rocksdb_ignore_unknown_options ON
+rocksdb_index_type kBinarySearch
+rocksdb_info_log_level error_level
+rocksdb_io_write_timeout 0
+rocksdb_is_fd_close_on_exec ON
+rocksdb_keep_log_file_num 1000
+rocksdb_large_prefix OFF
+rocksdb_lock_scanned_rows OFF
+rocksdb_lock_wait_timeout 1
+rocksdb_log_file_time_to_roll 0
+rocksdb_manifest_preallocation_size 4194304
+rocksdb_manual_compaction_threads 0
+rocksdb_manual_wal_flush ON
+rocksdb_master_skip_tx_api OFF
+rocksdb_max_background_jobs 2
+rocksdb_max_latest_deadlocks 5
+rocksdb_max_log_file_size 0
+rocksdb_max_manifest_file_size 1073741824
+rocksdb_max_manual_compactions 10
+rocksdb_max_row_locks 1048576
+rocksdb_max_subcompactions 1
+rocksdb_max_total_wal_size 0
+rocksdb_merge_buf_size 67108864
+rocksdb_merge_combine_read_size 1073741824
+rocksdb_merge_tmp_file_removal_delay_ms 0
+rocksdb_new_table_reader_for_compaction_inputs OFF
+rocksdb_no_block_cache OFF
+rocksdb_override_cf_options
+rocksdb_paranoid_checks ON
+rocksdb_pause_background_work ON
+rocksdb_perf_context_level 0
+rocksdb_persistent_cache_path
+rocksdb_persistent_cache_size_mb 0
+rocksdb_pin_l0_filter_and_index_blocks_in_cache ON
+rocksdb_print_snapshot_conflict_queries OFF
+rocksdb_rate_limiter_bytes_per_sec 0
+rocksdb_records_in_range 50
+rocksdb_remove_mariabackup_checkpoint OFF
+rocksdb_reset_stats OFF
+rocksdb_rollback_on_timeout OFF
+rocksdb_seconds_between_stat_computes 3600
+rocksdb_signal_drop_index_thread OFF
+rocksdb_sim_cache_size 0
+rocksdb_skip_bloom_filter_on_read OFF
+rocksdb_skip_fill_cache OFF
+rocksdb_skip_unique_check_tables .*
+rocksdb_sst_mgr_rate_bytes_per_sec 0
+rocksdb_stats_dump_period_sec 600
+rocksdb_stats_level 0
+rocksdb_stats_recalc_rate 0
+rocksdb_store_row_debug_checksums OFF
+rocksdb_strict_collation_check OFF
+rocksdb_strict_collation_exceptions
+rocksdb_table_cache_numshardbits 6
+rocksdb_table_stats_sampling_pct 10
+rocksdb_tmpdir
+rocksdb_trace_sst_api OFF
+rocksdb_two_write_queues ON
+rocksdb_unsafe_for_binlog OFF
+rocksdb_update_cf_options
+rocksdb_use_adaptive_mutex OFF
+rocksdb_use_clock_cache OFF
+rocksdb_use_direct_io_for_flush_and_compaction OFF
+rocksdb_use_direct_reads OFF
+rocksdb_use_fsync OFF
+rocksdb_validate_tables 1
+rocksdb_verify_row_debug_checksums OFF
+rocksdb_wal_bytes_per_sync 0
+rocksdb_wal_dir
+rocksdb_wal_recovery_mode 1
+rocksdb_wal_size_limit_mb 0
+rocksdb_wal_ttl_seconds 0
+rocksdb_whole_key_filtering ON
+rocksdb_write_batch_max_bytes 0
+rocksdb_write_disable_wal OFF
+rocksdb_write_ignore_missing_column_families OFF
+rocksdb_write_policy write_committed
+create table t47 (pk int primary key, col1 varchar(12)) engine=rocksdb;
+insert into t47 values (1, 'row1');
+insert into t47 values (2, 'row2');
+set rocksdb_bulk_load=1;
+insert into t47 values (3, 'row3'),(4, 'row4');
+set rocksdb_bulk_load=0;
+connect con1,localhost,root,,;
+set rocksdb_bulk_load=1;
+insert into t47 values (10, 'row10'),(11, 'row11');
+connection default;
+set rocksdb_bulk_load=1;
+insert into t47 values (100, 'row100'),(101, 'row101');
+disconnect con1;
+connection default;
+set rocksdb_bulk_load=0;
+select * from t47;
+pk col1
+1 row1
+2 row2
+3 row3
+4 row4
+10 row10
+11 row11
+100 row100
+101 row101
+drop table t47;
+#
+# Fix TRUNCATE over empty table (transaction is committed when it wasn't
+# started)
+#
+create table t48(pk int primary key auto_increment, col1 varchar(12)) engine=rocksdb;
+set autocommit=0;
+truncate table t48;
+set autocommit=1;
+drop table t48;
+#
+# MDEV-4059: RocksDB: query waiting for a lock cannot be killed until query timeout exceeded
+#
+create table t49 (pk int primary key, a int) engine=RocksDB;
+insert into t49 values (1,10),(2,20);
+begin;
+update t49 set a = 100 where pk = 1;
+connect con1,localhost,root,,;
+set rocksdb_lock_wait_timeout=60;
+set @var1= to_seconds(now());
+update t49 set a = 1000 where pk = 1;
+connect con2,localhost,root,,;
+kill query $con1_id;
+connection con1;
+ERROR 70100: Query execution was interrupted
+set @var2= to_seconds(now());
+select if ((@var2 - @var1) < 60, "passed", (@var2 - @var1)) as 'result';
+result
+passed
+connection default;
+disconnect con1;
+commit;
+drop table t49;
+#
+# Index-only tests for INT-based columns
+#
+create table t1 (pk int primary key, key1 int, col1 int, key(key1)) engine=rocksdb;
+insert into t1 values (1,1,1);
+insert into t1 values (2,2,2);
+insert into t1 values (-5,-5,-5);
+# INT column uses index-only:
+explain
+select key1 from t1 where key1=2;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 ref key1 key1 5 const # Using index
+select key1 from t1 where key1=2;
+key1
+2
+select key1 from t1 where key1=-5;
+key1
+-5
+drop table t1;
+create table t2 (pk int primary key, key1 int unsigned, col1 int, key(key1)) engine=rocksdb;
+insert into t2 values (1,1,1), (2,2,2);
+# INT UNSIGNED column uses index-only:
+explain
+select key1 from t2 where key1=2;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 ref key1 key1 5 const # Using index
+select key1 from t2 where key1=2;
+key1
+2
+drop table t2;
+create table t3 (pk bigint primary key, key1 bigint, col1 int, key(key1)) engine=rocksdb;
+insert into t3 values (1,1,1), (2,2,2);
+# BIGINT uses index-only:
+explain
+select key1 from t3 where key1=2;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t3 ref key1 key1 9 const # Using index
+select key1 from t3 where key1=2;
+key1
+2
+drop table t3;
+#
+# Index-only reads for string columns
+#
+create table t1 (
+pk int primary key,
+key1 char(10) character set binary,
+col1 int,
+key (key1)
+) engine=rocksdb;
+insert into t1 values(1, 'one',11), (2,'two',22);
+explain
+select key1 from t1 where key1='one';
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 ref key1 key1 11 const # Using where; Using index
+# The following will produce no rows. This looks like a bug,
+# but it is actually correct behavior. Binary strings are end-padded
+# with \0 character (and not space). Comparison does not ignore
+# the tail of \0.
+select key1 from t1 where key1='one';
+key1
+explain
+select hex(key1) from t1 where key1='one\0\0\0\0\0\0\0';
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 ref key1 key1 11 const # Using where; Using index
+select hex(key1) from t1 where key1='one\0\0\0\0\0\0\0';
+hex(key1)
+6F6E6500000000000000
+drop table t1;
+create table t2 (
+pk int primary key,
+key1 char(10) collate latin1_bin,
+col1 int,
+key (key1)
+) engine=rocksdb;
+insert into t2 values(1, 'one',11), (2,'two',22);
+explain
+select key1 from t2 where key1='one';
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 ref key1 key1 11 const # Using where; Using index
+select key1 from t2 where key1='one';
+key1
+one
+drop table t2;
+create table t3 (
+pk int primary key,
+key1 char(10) collate utf8_bin,
+col1 int,
+key (key1)
+) engine=rocksdb;
+insert into t3 values(1, 'one',11), (2,'two',22);
+explain
+select key1 from t3 where key1='one';
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t3 ref key1 key1 31 const # Using where; Using index
+select key1 from t3 where key1='one';
+key1
+one
+drop table t3;
+# a VARCHAR column
+create table t4 (
+pk int primary key,
+key1 varchar(10) collate latin1_bin,
+key(key1)
+) engine=rocksdb;
+insert into t4 values(1, 'one'), (2,'two'),(3,'threee'),(55,'fifty-five');
+explain
+select key1 from t4 where key1='two';
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t4 ref key1 key1 13 const # Using where; Using index
+select key1 from t4 where key1='two';
+key1
+two
+select key1 from t4 where key1='fifty-five';
+key1
+fifty-five
+explain
+select key1 from t4 where key1 between 's' and 'u';
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t4 range key1 key1 13 NULL # Using where; Using index
+select key1 from t4 where key1 between 's' and 'u';
+key1
+threee
+two
+drop table t4;
+#
+# MDEV-4305: RocksDB: Assertion `((keypart_map + 1) & keypart_map) == 0' fails in calculate_key_len
+#
+CREATE TABLE t1 (pk1 INT, pk2 CHAR(32), i INT, PRIMARY KEY(pk1,pk2), KEY(i)) ENGINE=RocksDB;
+INSERT INTO t1 VALUES (1,'test1',6),(2,'test2',8);
+SELECT * FROM t1 WHERE i != 3 OR pk1 > 9;
+pk1 pk2 i
+1 test1 6
+2 test2 8
+DROP TABLE t1;
+#
+# MDEV-4298: RocksDB: Assertion `thd->is_error() || kill_errno' fails in ha_rows filesort
+#
+call mtr.add_suppression("Sort aborted");
+CREATE TABLE t1 (pk INT PRIMARY KEY, i INT, KEY(i)) ENGINE=RocksDB;
+INSERT INTO t1 VALUES (1,1),(2,2);
+BEGIN;
+UPDATE t1 SET i = 100;
+connect con1,localhost,root,,test;
+DELETE IGNORE FROM t1 ORDER BY i;
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction
+disconnect con1;
+connection default;
+COMMIT;
+DROP TABLE t1;
+#
+# MDEV-4324: RocksDB: Valgrind "Use of uninitialised value" warnings on inserting value into varchar field
+# (testcase only)
+#
+CREATE TABLE t1 (pk INT PRIMARY KEY, c VARCHAR(4)) ENGINE=RocksDB;
+INSERT INTO t1 VALUES (1,'foo'), (2,'bar');
+DROP TABLE t1;
+#
+# MDEV-4304: RocksDB: Index-only scan by a field with utf8_bin collation returns garbage symbols
+#
+CREATE TABLE t1 (pk INT PRIMARY KEY, c1 CHAR(1), c2 CHAR(1), KEY(c1)) ENGINE=RocksDB CHARSET utf8 COLLATE utf8_bin;
+INSERT INTO t1 VALUES (1,'h','h');
+SELECT * FROM t1;
+pk c1 c2
+1 h h
+SELECT c1 FROM t1;
+c1
+h
+DROP TABLE t1;
+#
+# MDEV-4300: RocksDB: Server crashes in inline_mysql_mutex_lock on SELECT .. FOR UPDATE
+#
+CREATE TABLE t2 (pk INT PRIMARY KEY, i INT, KEY (i)) ENGINE=RocksDB;
+INSERT INTO t2 VALUES (1,4),(2,5);
+SELECT 1 FROM t2 WHERE i < 0 FOR UPDATE;
+1
+DROP TABLE t2;
+#
+# MDEV-4301: RocksDB: Assertion `pack_info != __null' fails in Rdb_key_def::unpack_record
+#
+CREATE TABLE t1 (pk INT PRIMARY KEY, i INT, c CHAR(1), KEY(c,i)) ENGINE=RocksDB;
+INSERT INTO t1 VALUES (1,4,'d'),(2,8,'e');
+SELECT MAX( pk ) FROM t1 WHERE i = 105 AND c = 'h';
+MAX( pk )
+NULL
+DROP TABLE t1;
+#
+# MDEV-4337: RocksDB: Inconsistent results comparing a char field with an int field
+#
+create table t1 (c char(1), i int, primary key(c), key(i)) engine=RocksDB;
+insert into t1 values ('2',2),('6',6);
+select * from t1 where c = i;
+c i
+2 2
+6 6
+select * from t1 ignore index (i) where c = i;
+c i
+2 2
+6 6
+drop table t1;
+#
+# Test statement rollback inside a transaction
+#
+create table t1 (pk varchar(12) primary key) engine=rocksdb;
+insert into t1 values ('old-val1'),('old-val2');
+create table t2 (pk varchar(12) primary key) engine=rocksdb;
+insert into t2 values ('new-val2'),('old-val1');
+begin;
+insert into t1 values ('new-val1');
+insert into t1 select * from t2;
+ERROR 23000: Duplicate entry 'old-val1' for key 'PRIMARY'
+commit;
+select * from t1;
+pk
+new-val1
+old-val1
+old-val2
+drop table t1, t2;
+#
+# MDEV-4383: RocksDB: Wrong result of DELETE .. ORDER BY .. LIMIT:
+# rows that should be deleted remain in the table
+#
+CREATE TABLE t2 (pk INT AUTO_INCREMENT PRIMARY KEY) ENGINE=RocksDB;
+CREATE TABLE t1 (pk INT AUTO_INCREMENT PRIMARY KEY) ENGINE=RocksDB;
+INSERT INTO t1 (pk) VALUES (NULL),(NULL);
+BEGIN;
+INSERT INTO t2 (pk) VALUES (NULL),(NULL);
+INSERT INTO t1 (pk) VALUES (NULL),(NULL),(NULL),(NULL),(NULL),(NULL);
+SELECT * FROM t1 ORDER BY pk LIMIT 9;
+pk
+1
+2
+3
+4
+5
+6
+7
+8
+affected rows: 8
+DELETE FROM t1 ORDER BY pk LIMIT 9;
+affected rows: 8
+SELECT * FROM t1 ORDER BY pk LIMIT 9;
+pk
+affected rows: 0
+DROP TABLE t1,t2;
+#
+# MDEV-4374: RocksDB: Valgrind warnings 'Use of uninitialised value' on
+# inserting into a varchar column
+#
+CREATE TABLE t1 (pk INT PRIMARY KEY, a VARCHAR(32)) ENGINE=RocksDB;
+INSERT INTO t1 VALUES (1,'foo'),(2,'bar');
+DROP TABLE t1;
+#
+# MDEV-4061: RocksDB: Changes from an interrupted query are still applied
+#
+create table t1 (pk int primary key, a int) engine=RocksDB;
+insert into t1 values (1,10),(2,20);
+set autocommit = 1;
+update t1 set a = sleep(100) where pk = 1;
+connect con1,localhost,root,,;
+kill query $con_id;
+connection default;
+ERROR 70100: Query execution was interrupted
+select * from t1;
+pk a
+1 10
+2 20
+disconnect con1;
+drop table t1;
+#
+# MDEV-4099: RocksDB: Wrong results with index and range access after INSERT IGNORE or REPLACE
+#
+CREATE TABLE t1 (pk INT PRIMARY KEY, a SMALLINT, b INT, KEY (a)) ENGINE=RocksDB;
+INSERT IGNORE INTO t1 VALUES (1, 157, 0), (2, 1898, -504403), (1, -14659, 0);
+Warnings:
+Warning 1062 Duplicate entry '1' for key 'PRIMARY'
+SELECT * FROM t1;
+pk a b
+1 157 0
+2 1898 -504403
+SELECT pk FROM t1;
+pk
+1
+2
+SELECT * FROM t1 WHERE a != 97;
+pk a b
+1 157 0
+2 1898 -504403
+DROP TABLE t1;
+#
+# Test @@rocksdb_max_row_locks
+#
+CREATE TABLE t1 (pk INT PRIMARY KEY, a int) ENGINE=RocksDB;
+set @a=-1;
+insert into t1 select (@a:=@a+1), 1234 from information_schema.session_variables limit 100;
+set @tmp1= @@rocksdb_max_row_locks;
+set rocksdb_max_row_locks= 20;
+update t1 set a=a+10;
+ERROR HY000: Got error 10 'Operation aborted: Failed to acquire lock due to rocksdb_max_row_locks limit' from ROCKSDB
+DROP TABLE t1;
+#
+# Test AUTO_INCREMENT behavior problem,
+# "explicit insert into an auto-inc column is not noticed by RocksDB"
+#
+create table t1 (i int primary key auto_increment) engine=RocksDB;
+insert into t1 values (null);
+insert into t1 values (null);
+select * from t1;
+i
+1
+2
+drop table t1;
+create table t2 (i int primary key auto_increment) engine=RocksDB;
+insert into t2 values (1);
+select * from t2;
+i
+1
+# this fails (ie. used to fail), RocksDB engine did not notice use of '1' above
+insert into t2 values (null);
+select * from t2;
+i
+1
+2
+# but then this succeeds, so previous statement must have incremented next number counter
+insert into t2 values (null);
+select * from t2;
+i
+1
+2
+3
+drop table t2;
+#
+# Fix Issue#2: AUTO_INCREMENT value doesn't survive server shutdown
+#
+create table t1 (i int primary key auto_increment) engine=RocksDB;
+insert into t1 values (null);
+insert into t1 values (null);
+SET GLOBAL ROCKSDB_PAUSE_BACKGROUND_WORK = @ORIG_PAUSE_BACKGROUND_WORK;
+SET @ORIG_PAUSE_BACKGROUND_WORK = @@ROCKSDB_PAUSE_BACKGROUND_WORK;
+SET GLOBAL ROCKSDB_PAUSE_BACKGROUND_WORK = 1;
+insert into t1 values (null);
+select * from t1;
+i
+1
+2
+3
+drop table t1;
+#
+# Fix Issue #3: SHOW TABLE STATUS shows Auto_increment=0
+#
+create table t1 (i int primary key auto_increment) engine=RocksDB;
+insert into t1 values (null),(null);
+show table status like 't1';
+Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
+t1 ROCKSDB 10 Fixed 1000 0 # 0 0 0 3 NULL NULL NULL latin1_swedish_ci NULL
+drop table t1;
+#
+# Fix Issue #4: Crash when using pseudo-unique keys
+#
+CREATE TABLE t1 (
+i INT,
+t TINYINT,
+s SMALLINT,
+m MEDIUMINT,
+b BIGINT,
+pk MEDIUMINT AUTO_INCREMENT PRIMARY KEY,
+UNIQUE KEY b_t (b,t)
+) ENGINE=rocksdb;
+INSERT INTO t1 (i,t,s,m,b) VALUES (1,2,3,4,5),(1000,100,10000,1000000,1000000000000000000),(5,100,10000,1000000,100000000000000000),(2,3,4,5,6),(3,4,5,6,7),(101,102,103,104,105),(10001,103,10002,10003,10004),(10,11,12,13,14),(11,12,13,14,15),(12,13,14,15,16);
+SELECT b+t FROM t1 WHERE (b,t) IN ( SELECT b, t FROM t1 WHERE i>1 ) ORDER BY b+t;
+b+t
+9
+11
+25
+27
+29
+207
+10107
+100000000000000100
+1000000000000000100
+DROP TABLE t1;
+#
+# Fix issue #5: Transaction rollback doesn't undo all changes.
+#
+create table t0 (a int) engine=myisam;
+insert into t0 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
+create table t1 (id int auto_increment primary key, value int) engine=rocksdb;
+set autocommit=0;
+begin;
+set @a:=0;
+insert into t1 select @a:=@a+1, @a from t0 A, t0 B, t0 C, t0 D where D.a<4;
+insert into t1 select @a:=@a+1, @a from t0 A, t0 B, t0 C, t0 D where D.a<4;
+insert into t1 select @a:=@a+1, @a from t0 A, t0 B, t0 C, t0 D where D.a<4;
+rollback;
+select count(*) from t1;
+count(*)
+0
+set autocommit=1;
+drop table t0, t1;
+#
+# Check status variables
+# NOTE: We exclude rocksdb_num_get_for_update_calls because it's a debug only status var
+#
+show status where variable_name like 'rocksdb%' and variable_name not like '%num_get_for_update%';
+Variable_name Value
+Rocksdb_rows_deleted #
+Rocksdb_rows_inserted #
+Rocksdb_rows_read #
+Rocksdb_rows_updated #
+Rocksdb_rows_deleted_blind #
+Rocksdb_rows_expired #
+Rocksdb_rows_filtered #
+Rocksdb_system_rows_deleted #
+Rocksdb_system_rows_inserted #
+Rocksdb_system_rows_read #
+Rocksdb_system_rows_updated #
+Rocksdb_memtable_total #
+Rocksdb_memtable_unflushed #
+Rocksdb_queries_point #
+Rocksdb_queries_range #
+Rocksdb_covered_secondary_key_lookups #
+Rocksdb_block_cache_add #
+Rocksdb_block_cache_add_failures #
+Rocksdb_block_cache_bytes_read #
+Rocksdb_block_cache_bytes_write #
+Rocksdb_block_cache_data_add #
+Rocksdb_block_cache_data_bytes_insert #
+Rocksdb_block_cache_data_hit #
+Rocksdb_block_cache_data_miss #
+Rocksdb_block_cache_filter_add #
+Rocksdb_block_cache_filter_bytes_evict #
+Rocksdb_block_cache_filter_bytes_insert #
+Rocksdb_block_cache_filter_hit #
+Rocksdb_block_cache_filter_miss #
+Rocksdb_block_cache_hit #
+Rocksdb_block_cache_index_add #
+Rocksdb_block_cache_index_bytes_evict #
+Rocksdb_block_cache_index_bytes_insert #
+Rocksdb_block_cache_index_hit #
+Rocksdb_block_cache_index_miss #
+Rocksdb_block_cache_miss #
+Rocksdb_block_cachecompressed_hit #
+Rocksdb_block_cachecompressed_miss #
+Rocksdb_bloom_filter_full_positive #
+Rocksdb_bloom_filter_full_true_positive #
+Rocksdb_bloom_filter_prefix_checked #
+Rocksdb_bloom_filter_prefix_useful #
+Rocksdb_bloom_filter_useful #
+Rocksdb_bytes_read #
+Rocksdb_bytes_written #
+Rocksdb_compact_read_bytes #
+Rocksdb_compact_write_bytes #
+Rocksdb_compaction_key_drop_new #
+Rocksdb_compaction_key_drop_obsolete #
+Rocksdb_compaction_key_drop_user #
+Rocksdb_flush_write_bytes #
+Rocksdb_get_hit_l0 #
+Rocksdb_get_hit_l1 #
+Rocksdb_get_hit_l2_and_up #
+Rocksdb_getupdatessince_calls #
+Rocksdb_iter_bytes_read #
+Rocksdb_manual_compactions_processed #
+Rocksdb_manual_compactions_running #
+Rocksdb_memtable_hit #
+Rocksdb_memtable_miss #
+Rocksdb_no_file_closes #
+Rocksdb_no_file_errors #
+Rocksdb_no_file_opens #
+Rocksdb_num_iterators #
+Rocksdb_number_block_not_compressed #
+Rocksdb_number_db_next #
+Rocksdb_number_db_next_found #
+Rocksdb_number_db_prev #
+Rocksdb_number_db_prev_found #
+Rocksdb_number_db_seek #
+Rocksdb_number_db_seek_found #
+Rocksdb_number_deletes_filtered #
+Rocksdb_number_keys_read #
+Rocksdb_number_keys_updated #
+Rocksdb_number_keys_written #
+Rocksdb_number_merge_failures #
+Rocksdb_number_multiget_bytes_read #
+Rocksdb_number_multiget_get #
+Rocksdb_number_multiget_keys_read #
+Rocksdb_number_reseeks_iteration #
+Rocksdb_number_sst_entry_delete #
+Rocksdb_number_sst_entry_merge #
+Rocksdb_number_sst_entry_other #
+Rocksdb_number_sst_entry_put #
+Rocksdb_number_sst_entry_singledelete #
+Rocksdb_number_superversion_acquires #
+Rocksdb_number_superversion_cleanups #
+Rocksdb_number_superversion_releases #
+Rocksdb_row_lock_deadlocks #
+Rocksdb_row_lock_wait_timeouts #
+Rocksdb_snapshot_conflict_errors #
+Rocksdb_stall_l0_file_count_limit_slowdowns #
+Rocksdb_stall_locked_l0_file_count_limit_slowdowns #
+Rocksdb_stall_l0_file_count_limit_stops #
+Rocksdb_stall_locked_l0_file_count_limit_stops #
+Rocksdb_stall_pending_compaction_limit_stops #
+Rocksdb_stall_pending_compaction_limit_slowdowns #
+Rocksdb_stall_memtable_limit_stops #
+Rocksdb_stall_memtable_limit_slowdowns #
+Rocksdb_stall_total_stops #
+Rocksdb_stall_total_slowdowns #
+Rocksdb_stall_micros #
+Rocksdb_wal_bytes #
+Rocksdb_wal_group_syncs #
+Rocksdb_wal_synced #
+Rocksdb_write_other #
+Rocksdb_write_self #
+Rocksdb_write_timedout #
+Rocksdb_write_wal #
+select VARIABLE_NAME from INFORMATION_SCHEMA.global_status where VARIABLE_NAME LIKE 'rocksdb%' and VARIABLE_NAME NOT LIKE '%num_get_for_update%';
+VARIABLE_NAME
+ROCKSDB_ROWS_DELETED
+ROCKSDB_ROWS_INSERTED
+ROCKSDB_ROWS_READ
+ROCKSDB_ROWS_UPDATED
+ROCKSDB_ROWS_DELETED_BLIND
+ROCKSDB_ROWS_EXPIRED
+ROCKSDB_ROWS_FILTERED
+ROCKSDB_SYSTEM_ROWS_DELETED
+ROCKSDB_SYSTEM_ROWS_INSERTED
+ROCKSDB_SYSTEM_ROWS_READ
+ROCKSDB_SYSTEM_ROWS_UPDATED
+ROCKSDB_MEMTABLE_TOTAL
+ROCKSDB_MEMTABLE_UNFLUSHED
+ROCKSDB_QUERIES_POINT
+ROCKSDB_QUERIES_RANGE
+ROCKSDB_COVERED_SECONDARY_KEY_LOOKUPS
+ROCKSDB_BLOCK_CACHE_ADD
+ROCKSDB_BLOCK_CACHE_ADD_FAILURES
+ROCKSDB_BLOCK_CACHE_BYTES_READ
+ROCKSDB_BLOCK_CACHE_BYTES_WRITE
+ROCKSDB_BLOCK_CACHE_DATA_ADD
+ROCKSDB_BLOCK_CACHE_DATA_BYTES_INSERT
+ROCKSDB_BLOCK_CACHE_DATA_HIT
+ROCKSDB_BLOCK_CACHE_DATA_MISS
+ROCKSDB_BLOCK_CACHE_FILTER_ADD
+ROCKSDB_BLOCK_CACHE_FILTER_BYTES_EVICT
+ROCKSDB_BLOCK_CACHE_FILTER_BYTES_INSERT
+ROCKSDB_BLOCK_CACHE_FILTER_HIT
+ROCKSDB_BLOCK_CACHE_FILTER_MISS
+ROCKSDB_BLOCK_CACHE_HIT
+ROCKSDB_BLOCK_CACHE_INDEX_ADD
+ROCKSDB_BLOCK_CACHE_INDEX_BYTES_EVICT
+ROCKSDB_BLOCK_CACHE_INDEX_BYTES_INSERT
+ROCKSDB_BLOCK_CACHE_INDEX_HIT
+ROCKSDB_BLOCK_CACHE_INDEX_MISS
+ROCKSDB_BLOCK_CACHE_MISS
+ROCKSDB_BLOCK_CACHECOMPRESSED_HIT
+ROCKSDB_BLOCK_CACHECOMPRESSED_MISS
+ROCKSDB_BLOOM_FILTER_FULL_POSITIVE
+ROCKSDB_BLOOM_FILTER_FULL_TRUE_POSITIVE
+ROCKSDB_BLOOM_FILTER_PREFIX_CHECKED
+ROCKSDB_BLOOM_FILTER_PREFIX_USEFUL
+ROCKSDB_BLOOM_FILTER_USEFUL
+ROCKSDB_BYTES_READ
+ROCKSDB_BYTES_WRITTEN
+ROCKSDB_COMPACT_READ_BYTES
+ROCKSDB_COMPACT_WRITE_BYTES
+ROCKSDB_COMPACTION_KEY_DROP_NEW
+ROCKSDB_COMPACTION_KEY_DROP_OBSOLETE
+ROCKSDB_COMPACTION_KEY_DROP_USER
+ROCKSDB_FLUSH_WRITE_BYTES
+ROCKSDB_GET_HIT_L0
+ROCKSDB_GET_HIT_L1
+ROCKSDB_GET_HIT_L2_AND_UP
+ROCKSDB_GETUPDATESSINCE_CALLS
+ROCKSDB_ITER_BYTES_READ
+ROCKSDB_MANUAL_COMPACTIONS_PROCESSED
+ROCKSDB_MANUAL_COMPACTIONS_RUNNING
+ROCKSDB_MEMTABLE_HIT
+ROCKSDB_MEMTABLE_MISS
+ROCKSDB_NO_FILE_CLOSES
+ROCKSDB_NO_FILE_ERRORS
+ROCKSDB_NO_FILE_OPENS
+ROCKSDB_NUM_ITERATORS
+ROCKSDB_NUMBER_BLOCK_NOT_COMPRESSED
+ROCKSDB_NUMBER_DB_NEXT
+ROCKSDB_NUMBER_DB_NEXT_FOUND
+ROCKSDB_NUMBER_DB_PREV
+ROCKSDB_NUMBER_DB_PREV_FOUND
+ROCKSDB_NUMBER_DB_SEEK
+ROCKSDB_NUMBER_DB_SEEK_FOUND
+ROCKSDB_NUMBER_DELETES_FILTERED
+ROCKSDB_NUMBER_KEYS_READ
+ROCKSDB_NUMBER_KEYS_UPDATED
+ROCKSDB_NUMBER_KEYS_WRITTEN
+ROCKSDB_NUMBER_MERGE_FAILURES
+ROCKSDB_NUMBER_MULTIGET_BYTES_READ
+ROCKSDB_NUMBER_MULTIGET_GET
+ROCKSDB_NUMBER_MULTIGET_KEYS_READ
+ROCKSDB_NUMBER_RESEEKS_ITERATION
+ROCKSDB_NUMBER_SST_ENTRY_DELETE
+ROCKSDB_NUMBER_SST_ENTRY_MERGE
+ROCKSDB_NUMBER_SST_ENTRY_OTHER
+ROCKSDB_NUMBER_SST_ENTRY_PUT
+ROCKSDB_NUMBER_SST_ENTRY_SINGLEDELETE
+ROCKSDB_NUMBER_SUPERVERSION_ACQUIRES
+ROCKSDB_NUMBER_SUPERVERSION_CLEANUPS
+ROCKSDB_NUMBER_SUPERVERSION_RELEASES
+ROCKSDB_ROW_LOCK_DEADLOCKS
+ROCKSDB_ROW_LOCK_WAIT_TIMEOUTS
+ROCKSDB_SNAPSHOT_CONFLICT_ERRORS
+ROCKSDB_STALL_L0_FILE_COUNT_LIMIT_SLOWDOWNS
+ROCKSDB_STALL_LOCKED_L0_FILE_COUNT_LIMIT_SLOWDOWNS
+ROCKSDB_STALL_L0_FILE_COUNT_LIMIT_STOPS
+ROCKSDB_STALL_LOCKED_L0_FILE_COUNT_LIMIT_STOPS
+ROCKSDB_STALL_PENDING_COMPACTION_LIMIT_STOPS
+ROCKSDB_STALL_PENDING_COMPACTION_LIMIT_SLOWDOWNS
+ROCKSDB_STALL_MEMTABLE_LIMIT_STOPS
+ROCKSDB_STALL_MEMTABLE_LIMIT_SLOWDOWNS
+ROCKSDB_STALL_TOTAL_STOPS
+ROCKSDB_STALL_TOTAL_SLOWDOWNS
+ROCKSDB_STALL_MICROS
+ROCKSDB_WAL_BYTES
+ROCKSDB_WAL_GROUP_SYNCS
+ROCKSDB_WAL_SYNCED
+ROCKSDB_WRITE_OTHER
+ROCKSDB_WRITE_SELF
+ROCKSDB_WRITE_TIMEDOUT
+ROCKSDB_WRITE_WAL
+# RocksDB-SE's status variables are global internally
+# but they are shown as both session and global, like InnoDB's status vars.
+select VARIABLE_NAME from INFORMATION_SCHEMA.session_status where VARIABLE_NAME LIKE 'rocksdb%' and VARIABLE_NAME NOT LIKE '%num_get_for_update%';
+VARIABLE_NAME
+ROCKSDB_ROWS_DELETED
+ROCKSDB_ROWS_INSERTED
+ROCKSDB_ROWS_READ
+ROCKSDB_ROWS_UPDATED
+ROCKSDB_ROWS_DELETED_BLIND
+ROCKSDB_ROWS_EXPIRED
+ROCKSDB_ROWS_FILTERED
+ROCKSDB_SYSTEM_ROWS_DELETED
+ROCKSDB_SYSTEM_ROWS_INSERTED
+ROCKSDB_SYSTEM_ROWS_READ
+ROCKSDB_SYSTEM_ROWS_UPDATED
+ROCKSDB_MEMTABLE_TOTAL
+ROCKSDB_MEMTABLE_UNFLUSHED
+ROCKSDB_QUERIES_POINT
+ROCKSDB_QUERIES_RANGE
+ROCKSDB_COVERED_SECONDARY_KEY_LOOKUPS
+ROCKSDB_BLOCK_CACHE_ADD
+ROCKSDB_BLOCK_CACHE_ADD_FAILURES
+ROCKSDB_BLOCK_CACHE_BYTES_READ
+ROCKSDB_BLOCK_CACHE_BYTES_WRITE
+ROCKSDB_BLOCK_CACHE_DATA_ADD
+ROCKSDB_BLOCK_CACHE_DATA_BYTES_INSERT
+ROCKSDB_BLOCK_CACHE_DATA_HIT
+ROCKSDB_BLOCK_CACHE_DATA_MISS
+ROCKSDB_BLOCK_CACHE_FILTER_ADD
+ROCKSDB_BLOCK_CACHE_FILTER_BYTES_EVICT
+ROCKSDB_BLOCK_CACHE_FILTER_BYTES_INSERT
+ROCKSDB_BLOCK_CACHE_FILTER_HIT
+ROCKSDB_BLOCK_CACHE_FILTER_MISS
+ROCKSDB_BLOCK_CACHE_HIT
+ROCKSDB_BLOCK_CACHE_INDEX_ADD
+ROCKSDB_BLOCK_CACHE_INDEX_BYTES_EVICT
+ROCKSDB_BLOCK_CACHE_INDEX_BYTES_INSERT
+ROCKSDB_BLOCK_CACHE_INDEX_HIT
+ROCKSDB_BLOCK_CACHE_INDEX_MISS
+ROCKSDB_BLOCK_CACHE_MISS
+ROCKSDB_BLOCK_CACHECOMPRESSED_HIT
+ROCKSDB_BLOCK_CACHECOMPRESSED_MISS
+ROCKSDB_BLOOM_FILTER_FULL_POSITIVE
+ROCKSDB_BLOOM_FILTER_FULL_TRUE_POSITIVE
+ROCKSDB_BLOOM_FILTER_PREFIX_CHECKED
+ROCKSDB_BLOOM_FILTER_PREFIX_USEFUL
+ROCKSDB_BLOOM_FILTER_USEFUL
+ROCKSDB_BYTES_READ
+ROCKSDB_BYTES_WRITTEN
+ROCKSDB_COMPACT_READ_BYTES
+ROCKSDB_COMPACT_WRITE_BYTES
+ROCKSDB_COMPACTION_KEY_DROP_NEW
+ROCKSDB_COMPACTION_KEY_DROP_OBSOLETE
+ROCKSDB_COMPACTION_KEY_DROP_USER
+ROCKSDB_FLUSH_WRITE_BYTES
+ROCKSDB_GET_HIT_L0
+ROCKSDB_GET_HIT_L1
+ROCKSDB_GET_HIT_L2_AND_UP
+ROCKSDB_GETUPDATESSINCE_CALLS
+ROCKSDB_ITER_BYTES_READ
+ROCKSDB_MANUAL_COMPACTIONS_PROCESSED
+ROCKSDB_MANUAL_COMPACTIONS_RUNNING
+ROCKSDB_MEMTABLE_HIT
+ROCKSDB_MEMTABLE_MISS
+ROCKSDB_NO_FILE_CLOSES
+ROCKSDB_NO_FILE_ERRORS
+ROCKSDB_NO_FILE_OPENS
+ROCKSDB_NUM_ITERATORS
+ROCKSDB_NUMBER_BLOCK_NOT_COMPRESSED
+ROCKSDB_NUMBER_DB_NEXT
+ROCKSDB_NUMBER_DB_NEXT_FOUND
+ROCKSDB_NUMBER_DB_PREV
+ROCKSDB_NUMBER_DB_PREV_FOUND
+ROCKSDB_NUMBER_DB_SEEK
+ROCKSDB_NUMBER_DB_SEEK_FOUND
+ROCKSDB_NUMBER_DELETES_FILTERED
+ROCKSDB_NUMBER_KEYS_READ
+ROCKSDB_NUMBER_KEYS_UPDATED
+ROCKSDB_NUMBER_KEYS_WRITTEN
+ROCKSDB_NUMBER_MERGE_FAILURES
+ROCKSDB_NUMBER_MULTIGET_BYTES_READ
+ROCKSDB_NUMBER_MULTIGET_GET
+ROCKSDB_NUMBER_MULTIGET_KEYS_READ
+ROCKSDB_NUMBER_RESEEKS_ITERATION
+ROCKSDB_NUMBER_SST_ENTRY_DELETE
+ROCKSDB_NUMBER_SST_ENTRY_MERGE
+ROCKSDB_NUMBER_SST_ENTRY_OTHER
+ROCKSDB_NUMBER_SST_ENTRY_PUT
+ROCKSDB_NUMBER_SST_ENTRY_SINGLEDELETE
+ROCKSDB_NUMBER_SUPERVERSION_ACQUIRES
+ROCKSDB_NUMBER_SUPERVERSION_CLEANUPS
+ROCKSDB_NUMBER_SUPERVERSION_RELEASES
+ROCKSDB_ROW_LOCK_DEADLOCKS
+ROCKSDB_ROW_LOCK_WAIT_TIMEOUTS
+ROCKSDB_SNAPSHOT_CONFLICT_ERRORS
+ROCKSDB_STALL_L0_FILE_COUNT_LIMIT_SLOWDOWNS
+ROCKSDB_STALL_LOCKED_L0_FILE_COUNT_LIMIT_SLOWDOWNS
+ROCKSDB_STALL_L0_FILE_COUNT_LIMIT_STOPS
+ROCKSDB_STALL_LOCKED_L0_FILE_COUNT_LIMIT_STOPS
+ROCKSDB_STALL_PENDING_COMPACTION_LIMIT_STOPS
+ROCKSDB_STALL_PENDING_COMPACTION_LIMIT_SLOWDOWNS
+ROCKSDB_STALL_MEMTABLE_LIMIT_STOPS
+ROCKSDB_STALL_MEMTABLE_LIMIT_SLOWDOWNS
+ROCKSDB_STALL_TOTAL_STOPS
+ROCKSDB_STALL_TOTAL_SLOWDOWNS
+ROCKSDB_STALL_MICROS
+ROCKSDB_WAL_BYTES
+ROCKSDB_WAL_GROUP_SYNCS
+ROCKSDB_WAL_SYNCED
+ROCKSDB_WRITE_OTHER
+ROCKSDB_WRITE_SELF
+ROCKSDB_WRITE_TIMEDOUT
+ROCKSDB_WRITE_WAL
+#
+# Fix issue #9: HA_ERR_INTERNAL_ERROR when running linkbench
+#
+create table t0 (a int) engine=myisam;
+insert into t0 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
+create table t1 (
+pk int primary key,
+col1 varchar(255),
+key(col1)
+) engine=rocksdb;
+insert into t1 select a, repeat('123456789ABCDEF-', 15) from t0;
+select * from t1 where pk=3;
+pk col1
+3 123456789ABCDEF-123456789ABCDEF-123456789ABCDEF-123456789ABCDEF-123456789ABCDEF-123456789ABCDEF-123456789ABCDEF-123456789ABCDEF-123456789ABCDEF-123456789ABCDEF-123456789ABCDEF-123456789ABCDEF-123456789ABCDEF-123456789ABCDEF-123456789ABCDEF-
+drop table t0, t1;
+#
+# Fix issue #10: Segfault in Rdb_key_def::get_primary_key_tuple
+#
+create table t0 (a int) engine=myisam;
+insert into t0 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
+CREATE TABLE t1 (
+id1 bigint(20) unsigned NOT NULL DEFAULT '0',
+id2 bigint(20) unsigned NOT NULL DEFAULT '0',
+link_type bigint(20) unsigned NOT NULL DEFAULT '0',
+visibility tinyint(3) NOT NULL DEFAULT '0',
+data varchar(255) NOT NULL DEFAULT '',
+time bigint(20) unsigned NOT NULL DEFAULT '0',
+version int(11) unsigned NOT NULL DEFAULT '0',
+PRIMARY KEY (link_type,id1,id2)
+) engine=rocksdb;
+insert into t1 select a,a,a,1,a,a,a from t0;
+alter table t1 add index id1_type (id1,link_type,visibility,time,version,data);
+select * from t1 where id1 = 3;
+id1 id2 link_type visibility data time version
+3 3 3 1 3 3 3
+drop table t0,t1;
+#
+# Test column families
+#
+create table t1 (
+pk int primary key,
+col1 int,
+col2 int,
+key(col1) comment 'cf3',
+key(col2) comment 'cf4'
+) engine=rocksdb;
+insert into t1 values (1,1,1), (2,2,2), (3,3,3), (4,4,4), (5,5,5);
+explain
+select * from t1 where col1=2;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 ref col1 col1 5 const #
+select * from t1 where col1=2;
+pk col1 col2
+2 2 2
+explain
+select * from t1 where col2=3;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 ref col2 col2 5 const #
+select * from t1 where col2=3;
+pk col1 col2
+3 3 3
+select * from t1 where pk=4;
+pk col1 col2
+4 4 4
+drop table t1;
+#
+# Try primary key in a non-default CF:
+#
+create table t1 (
+pk int,
+col1 int,
+col2 int,
+key(col1) comment 'cf3',
+key(col2) comment 'cf4',
+primary key (pk) comment 'cf5'
+) engine=rocksdb;
+insert into t1 values (1,1,1), (2,2,2), (3,3,3), (4,4,4), (5,5,5);
+explain
+select * from t1 where col1=2;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 ref col1 col1 5 const #
+select * from t1 where col1=2;
+pk col1 col2
+2 2 2
+select * from t1 where pk=4;
+pk col1 col2
+4 4 4
+drop table t1;
+#
+# Issue #15: SIGSEGV from reading in blob data
+#
+CREATE TABLE t1 (
+id int not null,
+blob_col text,
+PRIMARY KEY (id)
+) ENGINE=ROCKSDB CHARSET=latin1;
+INSERT INTO t1 SET id=123, blob_col=repeat('z',64000) ON DUPLICATE KEY UPDATE blob_col=VALUES(blob_col);
+INSERT INTO t1 SET id=123, blob_col='' ON DUPLICATE KEY UPDATE blob_col=VALUES(blob_col);
+DROP TABLE t1;
+#
+# Issue #17: Automatic per-index column families
+# (Now deprecated)
+#
+create table t1 (
+id int not null,
+key1 int,
+PRIMARY KEY (id),
+index (key1) comment '$per_index_cf'
+) engine=rocksdb;
+ERROR HY000: The per-index column family option has been deprecated
+#
+# Issue #22: SELECT ... FOR UPDATE takes a long time
+#
+create table t0 (a int) engine=myisam;
+insert into t0 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
+create table t1 (
+id1 int,
+id2 int,
+value1 int,
+value2 int,
+primary key(id1, id2) COMMENT 'new_column_family',
+key(id2)
+) engine=rocksdb default charset=latin1 collate=latin1_bin;
+insert into t1 select A.a, B.a, 31, 1234 from t0 A, t0 B;
+explain
+select * from t1 where id1=30 and value1=30 for update;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 ref PRIMARY PRIMARY 4 const # Using where
+set @var1=(select variable_value
+from information_schema.global_status
+where variable_name='rocksdb_number_keys_read');
+select * from t1 where id1=3 and value1=3 for update;
+id1 id2 value1 value2
+set @var2=(select variable_value
+from information_schema.global_status
+where variable_name='rocksdb_number_keys_read');
+# The following must return true (before the fix, the difference was 70):
+select if((@var2 - @var1) < 30, 1, @var2-@var1);
+if((@var2 - @var1) < 30, 1, @var2-@var1)
+1
+drop table t0,t1;
+#
+# Issue #33: SELECT ... FROM rocksdb_table ORDER BY primary_key uses sorting
+#
+create table t1 (id int primary key, value int) engine=rocksdb;
+insert into t1 values (1,1),(2,2),(3,3);
+# The following must not use 'Using filesort':
+explain select * from t1 ORDER BY id;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 index NULL PRIMARY 4 NULL #
+drop table t1;
+#
+# Issue #26: Index-only scans for DATETIME and TIMESTAMP
+#
+create table t0 (a int) engine=myisam;
+insert into t0 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
+# Try a DATETIME column:
+create table t1 (
+pk int auto_increment primary key,
+kp1 datetime,
+kp2 int,
+col1 int,
+key(kp1, kp2)
+) engine=rocksdb;
+insert into t1 (kp1,kp2)
+select date_add('2015-01-01 12:34:56', interval a day), a from t0;
+select * from t1;
+pk kp1 kp2 col1
+1 2015-01-01 12:34:56 0 NULL
+2 2015-01-02 12:34:56 1 NULL
+3 2015-01-03 12:34:56 2 NULL
+4 2015-01-04 12:34:56 3 NULL
+5 2015-01-05 12:34:56 4 NULL
+6 2015-01-06 12:34:56 5 NULL
+7 2015-01-07 12:34:56 6 NULL
+8 2015-01-08 12:34:56 7 NULL
+9 2015-01-09 12:34:56 8 NULL
+10 2015-01-10 12:34:56 9 NULL
+# This must show 'Using index'
+explain
+select kp1,kp2 from t1 force index (kp1)
+where kp1 between '2015-01-01 00:00:00' and '2015-01-05 23:59:59';
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range kp1 kp1 6 NULL # Using where; Using index
+select kp1,kp2 from t1 force index (kp1)
+where kp1 between '2015-01-01 00:00:00' and '2015-01-05 23:59:59';
+kp1 kp2
+2015-01-01 12:34:56 0
+2015-01-02 12:34:56 1
+2015-01-03 12:34:56 2
+2015-01-04 12:34:56 3
+2015-01-05 12:34:56 4
+# Now, the same with NOT NULL column
+create table t2 (
+pk int auto_increment primary key,
+kp1 datetime not null,
+kp2 int,
+col1 int,
+key(kp1, kp2)
+) engine=rocksdb;
+insert into t2 select * from t1;
+# This must show 'Using index'
+explain
+select kp1,kp2 from t2 force index (kp1)
+where kp1 between '2015-01-01 00:00:00' and '2015-01-05 23:59:59';
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 range kp1 kp1 5 NULL # Using where; Using index
+select kp1,kp2 from t2 force index (kp1)
+where kp1 between '2015-01-01 00:00:00' and '2015-01-05 23:59:59';
+kp1 kp2
+2015-01-01 12:34:56 0
+2015-01-02 12:34:56 1
+2015-01-03 12:34:56 2
+2015-01-04 12:34:56 3
+2015-01-05 12:34:56 4
+drop table t1,t2;
+# Try a DATE column:
+create table t1 (
+pk int auto_increment primary key,
+kp1 date,
+kp2 int,
+col1 int,
+key(kp1, kp2)
+) engine=rocksdb;
+insert into t1 (kp1,kp2)
+select date_add('2015-01-01', interval a day), a from t0;
+select * from t1;
+pk kp1 kp2 col1
+1 2015-01-01 0 NULL
+2 2015-01-02 1 NULL
+3 2015-01-03 2 NULL
+4 2015-01-04 3 NULL
+5 2015-01-05 4 NULL
+6 2015-01-06 5 NULL
+7 2015-01-07 6 NULL
+8 2015-01-08 7 NULL
+9 2015-01-09 8 NULL
+10 2015-01-10 9 NULL
+# This must show 'Using index'
+explain
+select kp1,kp2 from t1 force index (kp1)
+where kp1 between '2015-01-01' and '2015-01-05';
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range kp1 kp1 4 NULL # Using where; Using index
+select kp1,kp2 from t1 force index (kp1)
+where kp1 between '2015-01-01' and '2015-01-05';
+kp1 kp2
+2015-01-01 0
+2015-01-02 1
+2015-01-03 2
+2015-01-04 3
+2015-01-05 4
+# Now, the same with NOT NULL column
+create table t2 (
+pk int auto_increment primary key,
+kp1 date not null,
+kp2 int,
+col1 int,
+key(kp1, kp2)
+) engine=rocksdb;
+insert into t2 select * from t1;
+# This must show 'Using index'
+explain
+select kp1,kp2 from t2 force index (kp1)
+where kp1 between '2015-01-01 00:00:00' and '2015-01-05 23:59:59';
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 range kp1 kp1 3 NULL # Using where; Using index
+select kp1,kp2 from t2 force index (kp1)
+where kp1 between '2015-01-01 00:00:00' and '2015-01-05 23:59:59';
+kp1 kp2
+2015-01-01 0
+2015-01-02 1
+2015-01-03 2
+2015-01-04 3
+2015-01-05 4
+drop table t1,t2;
+#
+# Try a TIMESTAMP column:
+#
+create table t1 (
+pk int auto_increment primary key,
+kp1 timestamp,
+kp2 int,
+col1 int,
+key(kp1, kp2)
+) engine=rocksdb;
+insert into t1 (kp1,kp2)
+select date_add('2015-01-01 12:34:56', interval a day), a from t0;
+select * from t1;
+pk kp1 kp2 col1
+1 2015-01-01 12:34:56 0 NULL
+2 2015-01-02 12:34:56 1 NULL
+3 2015-01-03 12:34:56 2 NULL
+4 2015-01-04 12:34:56 3 NULL
+5 2015-01-05 12:34:56 4 NULL
+6 2015-01-06 12:34:56 5 NULL
+7 2015-01-07 12:34:56 6 NULL
+8 2015-01-08 12:34:56 7 NULL
+9 2015-01-09 12:34:56 8 NULL
+10 2015-01-10 12:34:56 9 NULL
+# This must show 'Using index'
+explain
+select kp1,kp2 from t1 force index (kp1)
+where kp1 between '2015-01-01 00:00:00' and '2015-01-05 23:59:59';
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range kp1 kp1 5 NULL # Using where; Using index
+select kp1,kp2 from t1 force index (kp1)
+where kp1 between '2015-01-01 00:00:00' and '2015-01-05 23:59:59';
+kp1 kp2
+2015-01-01 12:34:56 0
+2015-01-02 12:34:56 1
+2015-01-03 12:34:56 2
+2015-01-04 12:34:56 3
+2015-01-05 12:34:56 4
+# Now, the same with NOT NULL column
+create table t2 (
+pk int auto_increment primary key,
+kp1 timestamp not null,
+kp2 int,
+col1 int,
+key(kp1, kp2)
+) engine=rocksdb;
+insert into t2 select * from t1;
+# This must show 'Using index'
+explain
+select kp1,kp2 from t2 force index (kp1)
+where kp1 between '2015-01-01 00:00:00' and '2015-01-05 23:59:59';
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 range kp1 kp1 4 NULL # Using where; Using index
+select kp1,kp2 from t2 force index (kp1)
+where kp1 between '2015-01-01 00:00:00' and '2015-01-05 23:59:59';
+kp1 kp2
+2015-01-01 12:34:56 0
+2015-01-02 12:34:56 1
+2015-01-03 12:34:56 2
+2015-01-04 12:34:56 3
+2015-01-05 12:34:56 4
+drop table t1,t2;
+#
+# Try a TIME column:
+#
+create table t1 (
+pk int auto_increment primary key,
+kp1 time,
+kp2 int,
+col1 int,
+key(kp1, kp2)
+) engine=rocksdb;
+insert into t1 (kp1,kp2)
+select date_add('2015-01-01 09:00:00', interval a minute), a from t0;
+select * from t1;
+pk kp1 kp2 col1
+1 09:00:00 0 NULL
+2 09:01:00 1 NULL
+3 09:02:00 2 NULL
+4 09:03:00 3 NULL
+5 09:04:00 4 NULL
+6 09:05:00 5 NULL
+7 09:06:00 6 NULL
+8 09:07:00 7 NULL
+9 09:08:00 8 NULL
+10 09:09:00 9 NULL
+# This must show 'Using index'
+explain
+select kp1,kp2 from t1 force index (kp1)
+where kp1 between '09:01:00' and '09:05:00';
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 index kp1 kp1 9 NULL # Using where; Using index
+select kp1,kp2 from t1 force index (kp1)
+where kp1 between '09:01:00' and '09:05:00';
+kp1 kp2
+09:01:00 1
+09:02:00 2
+09:03:00 3
+09:04:00 4
+09:05:00 5
+# Now, the same with NOT NULL column
+create table t2 (
+pk int auto_increment primary key,
+kp1 time not null,
+kp2 int,
+col1 int,
+key(kp1, kp2)
+) engine=rocksdb;
+insert into t2 select * from t1;
+# This must show 'Using index'
+explain
+select kp1,kp2 from t2 force index (kp1)
+where kp1 between '09:01:00' and '09:05:00';
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 index kp1 kp1 8 NULL # Using where; Using index
+select kp1,kp2 from t2 force index (kp1)
+where kp1 between '09:01:00' and '09:05:00';
+kp1 kp2
+09:01:00 1
+09:02:00 2
+09:03:00 3
+09:04:00 4
+09:05:00 5
+drop table t1,t2;
+#
+# Try a YEAR column:
+#
+create table t1 (
+pk int auto_increment primary key,
+kp1 year,
+kp2 int,
+col1 int,
+key(kp1, kp2)
+) engine=rocksdb;
+insert into t1 (kp1,kp2) select 2015+a, a from t0;
+select * from t1;
+pk kp1 kp2 col1
+1 2015 0 NULL
+2 2016 1 NULL
+3 2017 2 NULL
+4 2018 3 NULL
+5 2019 4 NULL
+6 2020 5 NULL
+7 2021 6 NULL
+8 2022 7 NULL
+9 2023 8 NULL
+10 2024 9 NULL
+# This must show 'Using index'
+explain
+select kp1,kp2 from t1 force index (kp1)
+where kp1 between '2016' and '2020';
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range kp1 kp1 2 NULL # Using where; Using index
+select kp1,kp2 from t1 force index (kp1)
+where kp1 between '2016' and '2020';
+kp1 kp2
+2016 1
+2017 2
+2018 3
+2019 4
+2020 5
+# Now, the same with NOT NULL column
+create table t2 (
+pk int auto_increment primary key,
+kp1 year not null,
+kp2 int,
+col1 int,
+key(kp1, kp2)
+) engine=rocksdb;
+insert into t2 select * from t1;
+# This must show 'Using index'
+explain
+select kp1,kp2 from t2 force index (kp1)
+where kp1 between '2016' and '2020';
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 range kp1 kp1 1 NULL # Using where; Using index
+select kp1,kp2 from t2 force index (kp1)
+where kp1 between '2016' and '2020';
+kp1 kp2
+2016 1
+2017 2
+2018 3
+2019 4
+2020 5
+drop table t1,t2;
+#
+# Issue #57: Release row locks on statement errors
+#
+create table t1 (id int primary key) engine=rocksdb;
+insert into t1 values (1), (2), (3);
+begin;
+insert into t1 values (4), (5), (6);
+insert into t1 values (7), (8), (2), (9);
+ERROR 23000: Duplicate entry '2' for key 'PRIMARY'
+select * from t1;
+id
+1
+2
+3
+4
+5
+6
+begin;
+select * from t1 where id=4 for update;
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction
+select * from t1 where id=7 for update;
+id
+select * from t1 where id=9 for update;
+id
+drop table t1;
+#Index on blob column
+SET @old_mode = @@sql_mode;
+SET sql_mode = 'strict_all_tables';
+create table t1 (a int, b text, c varchar(400), Primary Key(a), Key(c, b(255))) engine=rocksdb;
+drop table t1;
+set global rocksdb_large_prefix=1;
+create table t1 (a int, b text, c varchar(400), Primary Key(a), Key(b(1255))) engine=rocksdb;
+set global rocksdb_large_prefix=0;
+insert into t1 values (1, '1abcde', '1abcde'), (2, '2abcde', '2abcde'), (3, '3abcde', '3abcde');
+select * from t1;
+a b c
+1 1abcde 1abcde
+2 2abcde 2abcde
+3 3abcde 3abcde
+explain select * from t1 where b like '1%';
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range b b 1258 NULL # Using where
+explain select b, a from t1 where b like '1%';
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range b b 1258 NULL # Using where
+update t1 set b= '12345' where b = '2abcde';
+select * from t1;
+a b c
+1 1abcde 1abcde
+2 12345 2abcde
+3 3abcde 3abcde
+drop table t1;
+create table t1 (a int, b text, c varchar(400), Primary Key(a), Key(b(2255))) engine=rocksdb;
+Warnings:
+Note 1071 Specified key was too long; max key length is 767 bytes
+drop table t1;
+SET sql_mode = @old_mode;
+drop table t0;
+#
+# Fix assertion failure (attempt to overrun the key buffer) for prefix indexes
+#
+create table t1 (
+pk int primary key,
+col1 varchar(100),
+key (col1(10))
+) engine=rocksdb;
+insert into t1 values (1, repeat('0123456789', 9));
+drop table t1;
+#
+# Issue #76: Assertion `buf == table->record[0]' fails in virtual int ha_rocksdb::delete_row(const uchar*)
+#
+CREATE TABLE t1 (pk INT PRIMARY KEY, f1 INT) ENGINE=RocksDB;
+CREATE TABLE t2 (pk INT PRIMARY KEY, f1 INT) ENGINE=RocksDB;
+CREATE TRIGGER tr AFTER DELETE ON t1 FOR EACH ROW DELETE FROM t2 WHERE pk = old.pk;
+INSERT INTO t1 VALUES (1,1);
+REPLACE INTO t1 VALUES (1,2);
+SELECT * FROM t1;
+pk f1
+1 2
+DROP TABLE t1, t2;
+#
+# Issue #99: UPDATE for table with VARCHAR pk gives "Can't find record" error
+#
+create table t1(a int primary key);
+insert into t1 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
+create table t2 (
+a varchar(32) primary key,
+col1 int
+) engine=rocksdb;
+insert into t2
+select concat('v-', 100 + A.a*100 + B.a), 12345 from t1 A, t1 B;
+update t2 set a=concat('x-', a) where a between 'v-1002' and 'v-1004';
+drop table t1,t2;
+#
+# Issue #131: Assertion `v->cfd_->internal_comparator().Compare(start, end) <= 0' failed
+#
+CREATE TABLE t2(c1 INTEGER UNSIGNED NOT NULL, c2 INTEGER NULL, c3 TINYINT, c4 SMALLINT , c5 MEDIUMINT, c6 INT, c7 BIGINT, PRIMARY KEY(c1,c6)) ENGINE=RocksDB;
+INSERT INTO t2 VALUES (1,1,1,1,1,1,1);
+SELECT * FROM t2 WHERE c1 > 4294967295 ORDER BY c1,c6;
+c1 c2 c3 c4 c5 c6 c7
+EXPLAIN SELECT * FROM t2 WHERE c1 > 4294967295 ORDER BY c1,c6;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 range PRIMARY PRIMARY 4 NULL 50 Using where
+drop table t2;
+#
+# Issue #135: register transaction was not being called for statement
+#
+DROP DATABASE IF EXISTS test_db;
+CREATE DATABASE test_db;
+CREATE TABLE test_db.t1(c1 INT PRIMARY KEY);
+LOCK TABLES test_db.t1 READ;
+SET AUTOCOMMIT=0;
+SELECT c1 FROM test_db.t1;
+c1
+START TRANSACTION WITH CONSISTENT SNAPSHOT, READ ONLY;
+DROP DATABASE test_db;
+#
+# Issue #143: Split rocksdb_bulk_load option into two
+#
+CREATE TABLE t1 (id int primary key, value int) engine=RocksDB;
+SET unique_checks=0;
+INSERT INTO t1 VALUES(1, 1);
+INSERT INTO t1 VALUES(1, 2);
+INSERT INTO t1 VALUES(1, 3);
+SELECT * FROM t1;
+id value
+1 3
+REPLACE INTO t1 VALUES(4, 4);
+ERROR HY000: When unique checking is disabled in MyRocks, INSERT,UPDATE,LOAD statements with clauses that update or replace the key (i.e. INSERT ON DUPLICATE KEY UPDATE, REPLACE) are not allowed. Query: REPLACE INTO t1 VALUES(4, 4)
+INSERT INTO t1 VALUES(5, 5) ON DUPLICATE KEY UPDATE value=value+1;
+ERROR HY000: When unique checking is disabled in MyRocks, INSERT,UPDATE,LOAD statements with clauses that update or replace the key (i.e. INSERT ON DUPLICATE KEY UPDATE, REPLACE) are not allowed. Query: INSERT INTO t1 VALUES(5, 5) ON DUPLICATE KEY UPDATE value=value+1
+TRUNCATE TABLE t1;
+SET @save_rocksdb_bulk_load_size= @@rocksdb_bulk_load_size;
+SET unique_checks=1;
+SET rocksdb_commit_in_the_middle=1;
+SET rocksdb_bulk_load_size=10;
+BEGIN;
+INSERT INTO t1 (id) VALUES(1),(2),(3),(4),(5),(6),(7),(8),(9),(10),
+(11),(12),(13),(14),(15),(16),(17),(18),(19);
+ROLLBACK;
+SELECT * FROM t1;
+id value
+1 NULL
+2 NULL
+3 NULL
+4 NULL
+5 NULL
+6 NULL
+7 NULL
+8 NULL
+9 NULL
+10 NULL
+INSERT INTO t1 (id) VALUES (11),(12),(13),(14),(15);
+BEGIN;
+UPDATE t1 SET value=100;
+ROLLBACK;
+SELECT * FROM t1;
+id value
+1 100
+2 100
+3 100
+4 100
+5 100
+6 100
+7 100
+8 100
+9 100
+10 100
+11 NULL
+12 NULL
+13 NULL
+14 NULL
+15 NULL
+BEGIN;
+DELETE FROM t1;
+ROLLBACK;
+SELECT * FROM t1;
+id value
+11 NULL
+12 NULL
+13 NULL
+14 NULL
+15 NULL
+SET rocksdb_commit_in_the_middle=0;
+SET rocksdb_bulk_load_size= @save_rocksdb_bulk_load_size;
+DROP TABLE t1;
+#
+# Issue #185 Assertion `BaseValid()' failed in void rocksdb::BaseDeltaIterator::Advance()
+#
+CREATE TABLE t2(id INT NOT NULL PRIMARY KEY, data INT) Engine=MEMORY;
+INSERT INTO t2 VALUES (100,NULL),(150,"long varchar"),(200,"varchar"),(250,"long long long varchar");
+Warnings:
+Warning 1366 Incorrect integer value: 'long varchar' for column `test`.`t2`.`data` at row 2
+Warning 1366 Incorrect integer value: 'varchar' for column `test`.`t2`.`data` at row 3
+Warning 1366 Incorrect integer value: 'long long long varchar' for column `test`.`t2`.`data` at row 4
+create TABLE t1 (a int not null, b int not null, primary key(a,b));
+INSERT INTO t1 VALUES (1,1);
+SELECT a FROM t1, t2 WHERE a=b AND (b NOT IN (SELECT a FROM t1 WHERE a > 4));
+a
+1
+1
+1
+1
+DROP TABLE t1, t2;
+#
+# Issue #189 ha_rocksdb::load_auto_incr_value() creates implicit snapshot and doesn't release
+#
+create table r1 (id int auto_increment primary key, value int);
+insert into r1 (id) values (null), (null), (null), (null), (null);
+create table r2 like r1;
+show create table r2;
+Table Create Table
+r2 CREATE TABLE `r2` (
+ `id` int(11) NOT NULL AUTO_INCREMENT,
+ `value` int(11) DEFAULT NULL,
+ PRIMARY KEY (`id`)
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+begin;
+insert into r1 values (10, 1);
+commit;
+begin;
+select * from r1;
+id value
+1 NULL
+2 NULL
+3 NULL
+4 NULL
+5 NULL
+10 1
+commit;
+drop table r1, r2;
+create table r1 (id int auto_increment, value int, index i(id));
+insert into r1 (id) values (null), (null), (null), (null), (null);
+create table r2 like r1;
+show create table r2;
+Table Create Table
+r2 CREATE TABLE `r2` (
+ `id` int(11) NOT NULL AUTO_INCREMENT,
+ `value` int(11) DEFAULT NULL,
+ KEY `i` (`id`)
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+begin;
+insert into r1 values (10, 1);
+commit;
+begin;
+select * from r1;
+id value
+1 NULL
+2 NULL
+3 NULL
+4 NULL
+5 NULL
+10 1
+commit;
+drop table r1, r2;
+#
+# Issue#211 Crash on LOCK TABLES + START TRANSACTION WITH CONSISTENT SNAPSHOT
+#
+CREATE TABLE t1(c1 INT);
+lock TABLE t1 read local;
+SELECT 1 FROM t1 GROUP BY TRIM(LEADING RAND()FROM'');
+1
+set AUTOCOMMIT=0;
+start transaction with consistent snapshot;
+SELECT * FROM t1;
+c1
+COMMIT;
+UNLOCK TABLES;
+DROP TABLE t1;
+#
+# Issue#213 Crash on LOCK TABLES + partitions
+#
+CREATE TABLE t1(a INT,b INT,KEY (b)) engine=rocksdb PARTITION BY HASH(a) PARTITIONS 2;
+INSERT INTO t1(a)VALUES (20010101101010.999949);
+Warnings:
+Warning 1264 Out of range value for column 'a' at row 1
+lock tables t1 write,t1 as t0 write,t1 as t2 write;
+SELECT a FROM t1 ORDER BY a;
+a
+2147483647
+truncate t1;
+INSERT INTO t1 VALUES(X'042000200020',X'042000200020'),(X'200400200020',X'200400200020');
+Warnings:
+Warning 1366 Incorrect integer value: '\x04 \x00 \x00 ' for column `test`.`t1`.`a` at row 1
+Warning 1366 Incorrect integer value: '\x04 \x00 \x00 ' for column `test`.`t1`.`b` at row 1
+Warning 1366 Incorrect integer value: ' \x04\x00 \x00 ' for column `test`.`t1`.`a` at row 2
+Warning 1366 Incorrect integer value: ' \x04\x00 \x00 ' for column `test`.`t1`.`b` at row 2
+UNLOCK TABLES;
+DROP TABLE t1;
+#
+# Issue#250: MyRocks/Innodb different output from query with order by on table with index and decimal type
+# (the test was changed to use VARCHAR, because DECIMAL now supports index-only, and this issue
+# needs a datype that doesn't support index-inly)
+#
+CREATE TABLE t1(
+c1 varchar(10) character set utf8 collate utf8_general_ci NOT NULL,
+c2 varchar(10) character set utf8 collate utf8_general_ci,
+c3 INT,
+INDEX idx(c1,c2)
+);
+INSERT INTO t1 VALUES ('c1-val1','c2-val1',5);
+INSERT INTO t1 VALUES ('c1-val2','c2-val3',6);
+INSERT INTO t1 VALUES ('c1-val3','c2-val3',7);
+SELECT * FROM t1 force index(idx) WHERE c1 <> 'c1-val2' ORDER BY c1 DESC;
+c1 c2 c3
+c1-val3 c2-val3 7
+c1-val1 c2-val1 5
+explain SELECT * FROM t1 force index(idx) WHERE c1 <> '1' ORDER BY c1 DESC;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range idx idx 32 NULL # Using where
+drop table t1;
+#
+# Issue#267: MyRocks issue with no matching min/max row and count(*)
+#
+CREATE TABLE t1(c1 INT UNSIGNED, c2 INT SIGNED, INDEX idx2(c2));
+INSERT INTO t1 VALUES(1,null);
+INSERT INTO t1 VALUES(2,null);
+SELECT count(*) as total_rows, min(c2) as min_value FROM t1;
+total_rows min_value
+2 NULL
+DROP TABLE t1;
+#
+# Issue#263: MyRocks auto_increment skips values if you insert a negative value
+#
+CREATE TABLE t1(a INT AUTO_INCREMENT KEY);
+INSERT INTO t1 VALUES(0),(-1),(0);
+SHOW TABLE STATUS LIKE 't1';
+Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
+t1 ROCKSDB 10 Fixed 1000 0 0 0 0 0 3 NULL NULL NULL latin1_swedish_ci NULL
+SELECT * FROM t1;
+a
+-1
+1
+2
+DROP TABLE t1;
+CREATE TABLE t1(a INT AUTO_INCREMENT KEY);
+INSERT INTO t1 VALUES(0),(10),(0);
+SHOW TABLE STATUS LIKE 't1';
+Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
+t1 ROCKSDB 10 Fixed 1000 0 0 0 0 0 12 NULL NULL NULL latin1_swedish_ci NULL
+SELECT * FROM t1;
+a
+1
+10
+11
+DROP TABLE t1;
+#
+# Issue #411: Setting rocksdb_commit_in_the_middle commits transaction
+# without releasing iterator
+#
+CREATE TABLE t1 (id1 bigint(20),
+id2 bigint(20),
+id3 bigint(20),
+PRIMARY KEY (id1, id2, id3))
+DEFAULT CHARSET=latin1;
+CREATE TABLE t2 (id1 bigint(20),
+id2 bigint(20),
+PRIMARY KEY (id1, id2))
+DEFAULT CHARSET=latin1;
+set rocksdb_commit_in_the_middle=1;
+SET @save_rocksdb_bulk_load_size= @@rocksdb_bulk_load_size;
+set rocksdb_bulk_load_size = 100;
+DELETE t2, t1 FROM t2 LEFT JOIN t1 ON t2.id2 = t1.id2 AND t2.id1 = t1.id1 WHERE t2.id1 = 0;
+SET rocksdb_bulk_load_size= @save_rocksdb_bulk_load_size;
+SET rocksdb_commit_in_the_middle=0;
+DROP TABLE t1, t2;
+SET GLOBAL ROCKSDB_PAUSE_BACKGROUND_WORK = @ORIG_PAUSE_BACKGROUND_WORK;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/rocksdb_cf_options.result b/storage/rocksdb/mysql-test/rocksdb/r/rocksdb_cf_options.result
new file mode 100644
index 00000000000..6c3d85b760c
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/rocksdb_cf_options.result
@@ -0,0 +1,61 @@
+create table t1 (a int,
+primary key (a) comment 'cf1') engine=rocksdb;
+create table t2 (a int,
+primary key (a) comment 'cf2') engine=rocksdb;
+create table t3 (a int,
+primary key (a) comment 'z') engine=rocksdb;
+insert into t1 values (1);
+insert into t2 values (2);
+insert into t3 values (2);
+
+Default options for all column families:
+
+select cf_name, option_type, value
+from information_schema.rocksdb_cf_options
+where option_type in ('WRITE_BUFFER_SIZE',
+'TARGET_FILE_SIZE_BASE',
+'MAX_BYTES_FOR_LEVEL_MULTIPLIER')
+order by cf_name, option_type;
+cf_name option_type value
+cf1 MAX_BYTES_FOR_LEVEL_MULTIPLIER 10.000000
+cf1 TARGET_FILE_SIZE_BASE 1048576
+cf1 WRITE_BUFFER_SIZE 12582912
+cf2 MAX_BYTES_FOR_LEVEL_MULTIPLIER 10.000000
+cf2 TARGET_FILE_SIZE_BASE 1048576
+cf2 WRITE_BUFFER_SIZE 12582912
+default MAX_BYTES_FOR_LEVEL_MULTIPLIER 10.000000
+default TARGET_FILE_SIZE_BASE 1048576
+default WRITE_BUFFER_SIZE 12582912
+z MAX_BYTES_FOR_LEVEL_MULTIPLIER 10.000000
+z TARGET_FILE_SIZE_BASE 1048576
+z WRITE_BUFFER_SIZE 12582912
+__system__ MAX_BYTES_FOR_LEVEL_MULTIPLIER 10.000000
+__system__ TARGET_FILE_SIZE_BASE 1048576
+__system__ WRITE_BUFFER_SIZE 12582912
+
+Individualized options for column families:
+
+select cf_name, option_type, value
+from information_schema.rocksdb_cf_options
+where option_type in ('WRITE_BUFFER_SIZE',
+'TARGET_FILE_SIZE_BASE',
+'MAX_BYTES_FOR_LEVEL_MULTIPLIER')
+order by cf_name, option_type;
+cf_name option_type value
+cf1 MAX_BYTES_FOR_LEVEL_MULTIPLIER 10.000000
+cf1 TARGET_FILE_SIZE_BASE 2097152
+cf1 WRITE_BUFFER_SIZE 8388608
+cf2 MAX_BYTES_FOR_LEVEL_MULTIPLIER 8.000000
+cf2 TARGET_FILE_SIZE_BASE 1048576
+cf2 WRITE_BUFFER_SIZE 16777216
+default MAX_BYTES_FOR_LEVEL_MULTIPLIER 10.000000
+default TARGET_FILE_SIZE_BASE 1048576
+default WRITE_BUFFER_SIZE 12582912
+z MAX_BYTES_FOR_LEVEL_MULTIPLIER 10.000000
+z TARGET_FILE_SIZE_BASE 4194304
+z WRITE_BUFFER_SIZE 12582912
+__system__ MAX_BYTES_FOR_LEVEL_MULTIPLIER 10.000000
+__system__ TARGET_FILE_SIZE_BASE 1048576
+__system__ WRITE_BUFFER_SIZE 12582912
+
+drop table t1,t2,t3;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/rocksdb_cf_per_partition.result b/storage/rocksdb/mysql-test/rocksdb/r/rocksdb_cf_per_partition.result
new file mode 100644
index 00000000000..ed226f0d8ba
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/rocksdb_cf_per_partition.result
@@ -0,0 +1,423 @@
+DROP TABLE IF EXISTS t1;
+DROP TABLE IF EXISTS t2;
+CREATE TABLE t1 (
+c1 INT,
+c2 INT,
+name VARCHAR(25) NOT NULL,
+event DATE,
+PRIMARY KEY (`c1`, `c2`) COMMENT 'testcomment'
+) ENGINE=ROCKSDB
+PARTITION BY LIST(c1) (
+PARTITION custom_p0 VALUES IN (1, 4, 7),
+PARTITION custom_p1 VALUES IN (2, 5, 8),
+PARTITION custom_p2 VALUES IN (3, 6, 9)
+);
+SELECT DISTINCT(cf_name) FROM information_schema.rocksdb_cfstats WHERE cf_name='testcomment';
+cf_name
+DROP TABLE t1;
+CREATE TABLE t1 (
+c1 INT,
+c2 INT,
+name VARCHAR(25) NOT NULL,
+event DATE,
+PRIMARY KEY (`c1`, `c2`) COMMENT 'rev:testrevcomment'
+) ENGINE=ROCKSDB
+PARTITION BY LIST(c1) (
+PARTITION custom_p0 VALUES IN (1, 4, 7),
+PARTITION custom_p1 VALUES IN (2, 5, 8),
+PARTITION custom_p2 VALUES IN (3, 6, 9)
+);
+SELECT DISTINCT(cf_name) FROM information_schema.rocksdb_cfstats WHERE cf_name='rev:testrevcomment';
+cf_name
+DROP TABLE t1;
+CREATE TABLE t1 (
+c1 INT,
+c2 INT,
+name VARCHAR(25) NOT NULL,
+event DATE,
+PRIMARY KEY (`c1`, `c2`) COMMENT 'custom_p0_cfname=foo;custom_p1_cfname=my_custom_cf;custom_p2_cfname=baz'
+) ENGINE=ROCKSDB
+PARTITION BY LIST(c1) (
+PARTITION custom_p0 VALUES IN (1, 4, 7),
+PARTITION custom_p1 VALUES IN (2, 5, 8),
+PARTITION custom_p2 VALUES IN (3, 6, 9)
+);
+set @@global.rocksdb_compact_cf = 'foo';
+set @@global.rocksdb_compact_cf = 'my_custom_cf';
+set @@global.rocksdb_compact_cf = 'baz';
+SELECT DISTINCT(cf_name) FROM information_schema.rocksdb_cfstats WHERE cf_name='foo';
+cf_name
+foo
+SELECT DISTINCT(cf_name) FROM information_schema.rocksdb_cfstats WHERE cf_name='my_custom_cf';
+cf_name
+my_custom_cf
+SELECT DISTINCT(cf_name) FROM information_schema.rocksdb_cfstats WHERE cf_name='baz';
+cf_name
+baz
+DROP TABLE t1;
+CREATE TABLE t1 (
+c1 INT,
+c2 INT,
+name VARCHAR(25) NOT NULL,
+event DATE,
+PRIMARY KEY (`c1`, `c2`) COMMENT 'custom_p0_cfname=t1-p0;custom_p1_cfname=rev:bar;custom_p2_cfname=t1-p2'
+) ENGINE=ROCKSDB
+PARTITION BY LIST(c1) (
+PARTITION custom_p0 VALUES IN (1, 4, 7),
+PARTITION custom_p1 VALUES IN (2, 5, 8),
+PARTITION custom_p2 VALUES IN (3, 6, 9)
+);
+set @@global.rocksdb_compact_cf = 't1-p0';
+set @@global.rocksdb_compact_cf = 'rev:bar';
+set @@global.rocksdb_compact_cf = 't1-p2';
+SELECT DISTINCT(cf_name) FROM information_schema.rocksdb_cfstats WHERE cf_name='t1-p0';
+cf_name
+t1-p0
+SELECT DISTINCT(cf_name) FROM information_schema.rocksdb_cfstats WHERE cf_name='rev:bar';
+cf_name
+rev:bar
+SELECT DISTINCT(cf_name) FROM information_schema.rocksdb_cfstats WHERE cf_name='t1-p2';
+cf_name
+t1-p2
+DROP TABLE t1;
+CREATE TABLE t1 (
+c1 INT,
+c2 INT,
+name VARCHAR(25) NOT NULL,
+event DATE,
+PRIMARY KEY (`c1`, `c2`) COMMENT 'custom_p0_cfname=cf-zero;custom_p1_cfname=cf-one;custom_p2_cfname=cf-zero'
+) ENGINE=ROCKSDB
+PARTITION BY LIST(c1) (
+PARTITION custom_p0 VALUES IN (1, 4, 7),
+PARTITION custom_p1 VALUES IN (2, 5, 8),
+PARTITION custom_p2 VALUES IN (3, 6, 9),
+PARTITION custom_p3 VALUES IN (10, 20, 30)
+);
+set @@global.rocksdb_compact_cf = 'cf-zero';
+set @@global.rocksdb_compact_cf = 'cf-one';
+SELECT DISTINCT(cf_name) FROM information_schema.rocksdb_cfstats WHERE cf_name='cf-zero';
+cf_name
+cf-zero
+SELECT DISTINCT(cf_name) FROM information_schema.rocksdb_cfstats WHERE cf_name='cf-one';
+cf_name
+cf-one
+DROP TABLE t1;
+CREATE TABLE t1 (
+c1 INT,
+c2 INT,
+name VARCHAR(25) NOT NULL,
+event DATE,
+PRIMARY KEY (`c1`, `c2`) COMMENT 'custom_p0_cfname=foo;custom_p1_cfname=bar;custom_p2_cfname=baz'
+) ENGINE=ROCKSDB
+PARTITION BY LIST(c1) (
+PARTITION custom_p0 VALUES IN (1, 4, 7),
+PARTITION custom_p1 VALUES IN (2, 5, 8),
+PARTITION custom_p2 VALUES IN (3, 6, 9)
+);
+INSERT INTO t1 VALUES (1, 1, "one", null);
+INSERT INTO t1 VALUES (2, 2, "two", null);
+INSERT INTO t1 VALUES (3, 3, "three", null);
+INSERT INTO t1 VALUES (5, 5, "five", null);
+INSERT INTO t1 VALUES (9, 9, "nine", null);
+SELECT * FROM t1;
+c1 c2 name event
+1 1 one NULL
+2 2 two NULL
+5 5 five NULL
+3 3 three NULL
+9 9 nine NULL
+ALTER TABLE t1 DROP PRIMARY KEY;
+SELECT * FROM t1;
+c1 c2 name event
+1 1 one NULL
+2 2 two NULL
+5 5 five NULL
+3 3 three NULL
+9 9 nine NULL
+set @@global.rocksdb_compact_cf = 'foo';
+set @@global.rocksdb_compact_cf = 'bar';
+set @@global.rocksdb_compact_cf = 'baz';
+DROP TABLE t1;
+CREATE TABLE t1 (
+c1 INT,
+c2 INT,
+name VARCHAR(25) NOT NULL,
+event DATE,
+PRIMARY KEY (`c1`, `c2`) COMMENT 'custom_p0_cfname=foo;custom_p1_cfname=bar;custom_p2_cfname=baz'
+) ENGINE=ROCKSDB
+PARTITION BY LIST(c1) (
+PARTITION custom_p0 VALUES IN (1, 4, 7),
+PARTITION custom_p1 VALUES IN (2, 5, 8),
+PARTITION custom_p2 VALUES IN (3, 6, 9)
+);
+INSERT INTO t1 VALUES (1, 1, "one", null);
+INSERT INTO t1 VALUES (2, 2, "two", null);
+INSERT INTO t1 VALUES (3, 3, "three", null);
+INSERT INTO t1 VALUES (5, 5, "five", null);
+INSERT INTO t1 VALUES (9, 9, "nine", null);
+ALTER TABLE t1 DROP PRIMARY KEY;
+ALTER TABLE t1 ADD PRIMARY KEY (`c1`, `c2`) COMMENT 'custom_p0_cfname=p0_cf;custom_p1_cfname=p1_cf';
+set @@global.rocksdb_compact_cf = 'p0_cf';
+set @@global.rocksdb_compact_cf = 'p1_cf';
+SELECT DISTINCT(cf_name) FROM information_schema.rocksdb_cfstats WHERE cf_name='p0_cf';
+cf_name
+p0_cf
+SELECT DISTINCT(cf_name) FROM information_schema.rocksdb_cfstats WHERE cf_name='p1_cf';
+cf_name
+p1_cf
+DROP TABLE t1;
+CREATE TABLE t1 (
+c1 INT,
+c2 INT,
+name VARCHAR(25) NOT NULL,
+event DATE,
+PRIMARY KEY (`c1`, `c2`) COMMENT 'custom_p0_cfname=foo;custom_p1_cfname=bar;custom_p2_cfname=baz'
+) ENGINE=ROCKSDB
+PARTITION BY LIST(c1) (
+PARTITION custom_p0 VALUES IN (1, 4, 7),
+PARTITION custom_p1 VALUES IN (2, 5, 8),
+PARTITION custom_p2 VALUES IN (3, 6, 9)
+);
+INSERT INTO t1 VALUES (1, 1, "one", null);
+INSERT INTO t1 VALUES (2, 2, "two", null);
+INSERT INTO t1 VALUES (3, 3, "three", null);
+INSERT INTO t1 VALUES (5, 5, "five", null);
+INSERT INTO t1 VALUES (9, 9, "nine", null);
+ALTER TABLE t1 PARTITION BY LIST(c1) (
+PARTITION custom_p3 VALUES IN (1, 4, 7),
+PARTITION custom_p4 VALUES IN (2, 5, 8, 3, 6, 9)
+);
+ALTER TABLE t1 DROP PRIMARY KEY;
+ALTER TABLE t1 ADD PRIMARY KEY (`c1`, `c2`) COMMENT 'custom_p3_cfname=p3_cf;custom_p4_cfname=p4_cf';
+set @@global.rocksdb_compact_cf = 'p3_cf';
+set @@global.rocksdb_compact_cf = 'p4_cf';
+SELECT DISTINCT(cf_name) FROM information_schema.rocksdb_cfstats WHERE cf_name='p3_cf';
+cf_name
+p3_cf
+SELECT DISTINCT(cf_name) FROM information_schema.rocksdb_cfstats WHERE cf_name='p4_cf';
+cf_name
+p4_cf
+DROP TABLE t1;
+CREATE TABLE t1 (
+c1 INT,
+c2 INT,
+name VARCHAR(25) NOT NULL,
+event DATE,
+PRIMARY KEY (`c1`, `c2`) COMMENT 'custom_p0_cfname=foo;custom_p1_cfname=;'
+) ENGINE=ROCKSDB
+PARTITION BY LIST(c1) (
+PARTITION custom_p0 VALUES IN (1, 4, 7),
+PARTITION custom_p1 VALUES IN (2, 5, 8),
+PARTITION custom_p2 VALUES IN (3, 6, 9)
+);
+DROP TABLE t1;
+CREATE TABLE `t2` (
+`col1` bigint(20) NOT NULL,
+`col2` varbinary(64) NOT NULL,
+`col3` varbinary(256) NOT NULL,
+`col4` bigint(20) NOT NULL,
+`col5` mediumblob NOT NULL,
+PRIMARY KEY (`col1`,`col2`,`col3`)
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+PARTITION BY LIST COLUMNS (`col2`) (
+PARTITION custom_p0 VALUES IN (0x12345),
+PARTITION custom_p1 VALUES IN (0x23456),
+PARTITION custom_p2 VALUES IN (0x34567),
+PARTITION custom_p3 VALUES IN (0x45678),
+PARTITION custom_p4 VALUES IN (0x56789),
+PARTITION custom_p5 VALUES IN (0x6789A),
+PARTITION custom_p6 VALUES IN (0x789AB),
+PARTITION custom_p7 VALUES IN (0x89ABC)
+);
+DROP TABLE t2;
+CREATE TABLE `t2` (
+`col1` bigint(20) NOT NULL,
+`col2` varbinary(64) NOT NULL,
+`col3` varbinary(256) NOT NULL,
+`col4` bigint(20) NOT NULL,
+`col5` mediumblob NOT NULL,
+PRIMARY KEY (`col1`,`col2`,`col3`) COMMENT 'custom_p0_cfname=my_cf0;custom_p1_cfname=my_cf1'
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+PARTITION BY LIST COLUMNS (`col2`) (
+PARTITION custom_p0 VALUES IN (0x12345),
+PARTITION custom_p1 VALUES IN (0x23456),
+PARTITION custom_p2 VALUES IN (0x34567),
+PARTITION custom_p3 VALUES IN (0x45678),
+PARTITION custom_p4 VALUES IN (0x56789),
+PARTITION custom_p5 VALUES IN (0x6789A),
+PARTITION custom_p6 VALUES IN (0x789AB),
+PARTITION custom_p7 VALUES IN (0x89ABC)
+);
+set @@global.rocksdb_compact_cf = 'my_cf0';
+set @@global.rocksdb_compact_cf = 'my_cf1';
+SELECT DISTINCT(cf_name) FROM information_schema.rocksdb_cfstats WHERE cf_name='my_cf0';
+cf_name
+my_cf0
+SELECT DISTINCT(cf_name) FROM information_schema.rocksdb_cfstats WHERE cf_name='my_cf1';
+cf_name
+my_cf1
+INSERT INTO t2 VALUES (100, 0x12345, 0x1, 1, 0x2);
+INSERT INTO t2 VALUES (200, 0x12345, 0x1, 1, 0x2);
+INSERT INTO t2 VALUES (300, 0x12345, 0x1, 1, 0x2);
+INSERT INTO t2 VALUES (100, 0x23456, 0x2, 1, 0x3);
+INSERT INTO t2 VALUES (100, 0x34567, 0x4, 1, 0x5);
+INSERT INTO t2 VALUES (400, 0x89ABC, 0x4, 1, 0x5);
+SELECT col1, HEX(col2), HEX(col3), col4, HEX(col5) FROM t2;
+col1 HEX(col2) HEX(col3) col4 HEX(col5)
+100 012345 01 1 02
+200 012345 01 1 02
+300 012345 01 1 02
+100 023456 02 1 03
+100 034567 04 1 05
+400 089ABC 04 1 05
+EXPLAIN PARTITIONS SELECT HEX(col2) FROM t2 where col2 = 0x12345;
+id select_type table partitions type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 custom_p0 index NULL PRIMARY 332 NULL 3 Using where; Using index
+EXPLAIN PARTITIONS SELECT HEX(col2) FROM t2 where col2 = 0x23456;
+id select_type table partitions type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 custom_p1 index NULL PRIMARY 332 NULL 2 Using where; Using index
+ALTER TABLE t2 DROP PRIMARY KEY;
+ALTER TABLE t2 ADD PRIMARY KEY (`col1`,`col2`,`col3`) COMMENT 'custom_p0_cfname=new_cf0;custom_p1_cfname=new_cf1';
+set @@global.rocksdb_compact_cf = 'new_cf0';
+set @@global.rocksdb_compact_cf = 'new_cf1';
+SELECT DISTINCT(cf_name) FROM information_schema.rocksdb_cfstats WHERE cf_name='new_cf0';
+cf_name
+new_cf0
+SELECT DISTINCT(cf_name) FROM information_schema.rocksdb_cfstats WHERE cf_name='new_cf1';
+cf_name
+new_cf1
+INSERT INTO t2 VALUES (500, 0x12345, 0x5, 1, 0x2);
+INSERT INTO t2 VALUES (700, 0x23456, 0x7, 1, 0x3);
+EXPLAIN PARTITIONS SELECT HEX(col2) FROM t2 where col2 = 0x12345;
+id select_type table partitions type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 custom_p0 index NULL PRIMARY 332 NULL 4 Using where; Using index
+EXPLAIN PARTITIONS SELECT HEX(col2) FROM t2 where col2 = 0x23456;
+id select_type table partitions type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 custom_p1 index NULL PRIMARY 332 NULL 2 Using where; Using index
+SELECT col1, HEX(col2), HEX(col3), col4, HEX(col5) FROM t2;
+col1 HEX(col2) HEX(col3) col4 HEX(col5)
+100 012345 01 1 02
+200 012345 01 1 02
+300 012345 01 1 02
+500 012345 05 1 02
+100 023456 02 1 03
+700 023456 07 1 03
+100 034567 04 1 05
+400 089ABC 04 1 05
+DROP TABLE t2;
+CREATE TABLE `t2` (
+`col1` bigint(20) NOT NULL,
+`col2` varbinary(64) NOT NULL,
+`col3` varbinary(256) NOT NULL,
+`col4` bigint(20) NOT NULL,
+`col5` mediumblob NOT NULL,
+PRIMARY KEY (`col1`,`col2`,`col3`) COMMENT 'custom_p0_cfname=test_cf0;custom_p1_cfname=test_cf1',
+KEY (`col2`, `col4`) COMMENT 'custom_p5_cfname=test_cf5'
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+PARTITION BY LIST COLUMNS (`col2`) (
+PARTITION custom_p0 VALUES IN (0x12345),
+PARTITION custom_p1 VALUES IN (0x23456),
+PARTITION custom_p2 VALUES IN (0x34567),
+PARTITION custom_p3 VALUES IN (0x45678),
+PARTITION custom_p4 VALUES IN (0x56789),
+PARTITION custom_p5 VALUES IN (0x6789A),
+PARTITION custom_p6 VALUES IN (0x789AB),
+PARTITION custom_p7 VALUES IN (0x89ABC)
+);
+SELECT DISTINCT(cf_name) FROM information_schema.rocksdb_cfstats WHERE cf_name='test_cf0';
+cf_name
+test_cf0
+SELECT DISTINCT(cf_name) FROM information_schema.rocksdb_cfstats WHERE cf_name='test_cf1';
+cf_name
+test_cf1
+SELECT DISTINCT(cf_name) FROM information_schema.rocksdb_cfstats WHERE cf_name='test_cf5';
+cf_name
+test_cf5
+INSERT INTO t2 VALUES (100, 0x12345, 0x1, 1, 0x2);
+INSERT INTO t2 VALUES (200, 0x12345, 0x1, 1, 0x2);
+INSERT INTO t2 VALUES (300, 0x12345, 0x1, 1, 0x2);
+INSERT INTO t2 VALUES (100, 0x23456, 0x2, 1, 0x3);
+INSERT INTO t2 VALUES (100, 0x34567, 0x4, 1, 0x5);
+INSERT INTO t2 VALUES (400, 0x89ABC, 0x4, 1, 0x5);
+INSERT INTO t2 VALUES (500, 0x6789A, 0x5, 1, 0x7);
+EXPLAIN PARTITIONS SELECT * FROM t2 WHERE col2 = 0x6789A AND col4 = 1;
+id select_type table partitions type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 custom_p5 ref col2 col2 74 const,const 1 Using where
+ALTER TABLE t2 DROP KEY `col2`;
+ALTER TABLE t2 ADD KEY (`col3`, `col4`) COMMENT 'custom_p5_cfname=another_cf_for_p5';
+SELECT DISTINCT(cf_name) FROM information_schema.rocksdb_cfstats WHERE cf_name='another_cf_for_p5';
+cf_name
+another_cf_for_p5
+ANALYZE TABLE t2;
+Table Op Msg_type Msg_text
+test.t2 analyze status OK
+EXPLAIN PARTITIONS SELECT * FROM t2 WHERE col3 = 0x4 AND col2 = 0x34567;
+id select_type table partitions type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 custom_p2 ref col3 col3 258 const # Using where
+DROP TABLE t2;
+CREATE TABLE `t2` (
+`col1` bigint(20) NOT NULL,
+`col2` varbinary(64) NOT NULL,
+`col3` varbinary(256) NOT NULL,
+`col4` bigint(20) NOT NULL,
+`col5` mediumblob NOT NULL,
+PRIMARY KEY (`col1`,`col2`,`col3`) COMMENT 'custom_p0_cfname=test_cf0;custom_p1_cfname=test_cf1',
+UNIQUE KEY (`col2`, `col4`) COMMENT 'custom_p5_cfname=unique_test_cf5'
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+PARTITION BY LIST COLUMNS (`col2`) (
+PARTITION custom_p0 VALUES IN (0x12345),
+PARTITION custom_p1 VALUES IN (0x23456),
+PARTITION custom_p2 VALUES IN (0x34567),
+PARTITION custom_p3 VALUES IN (0x45678),
+PARTITION custom_p4 VALUES IN (0x56789),
+PARTITION custom_p5 VALUES IN (0x6789A),
+PARTITION custom_p6 VALUES IN (0x789AB),
+PARTITION custom_p7 VALUES IN (0x89ABC)
+);
+SELECT DISTINCT(cf_name) FROM information_schema.rocksdb_cfstats WHERE cf_name='unique_test_cf5';
+cf_name
+unique_test_cf5
+INSERT INTO t2 VALUES (100, 0x12345, 0x1, 1, 0x2);
+INSERT INTO t2 VALUES (200, 0x12345, 0x1, 1, 0x2);
+ERROR 23000: Duplicate entry '\x01#E-1' for key 'col2'
+INSERT INTO t2 VALUES (300, 0x12345, 0x1, 1, 0x2);
+ERROR 23000: Duplicate entry '\x01#E-1' for key 'col2'
+INSERT INTO t2 VALUES (100, 0x23456, 0x2, 1, 0x3);
+INSERT INTO t2 VALUES (100, 0x34567, 0x4, 1, 0x5);
+INSERT INTO t2 VALUES (400, 0x89ABC, 0x4, 1, 0x5);
+INSERT INTO t2 VALUES (500, 0x6789A, 0x5, 1, 0x7);
+DROP TABLE t2;
+CREATE TABLE t1 (
+`a` int,
+PRIMARY KEY (a) COMMENT "sharedcf"
+) ENGINE=ROCKSDB;
+SELECT DISTINCT(cf_name) FROM information_schema.rocksdb_cfstats WHERE cf_name='sharedcf';
+cf_name
+sharedcf
+CREATE TABLE t2 (
+`a` INT,
+`b` DATE,
+`c` VARCHAR(42),
+PRIMARY KEY (`a`) COMMENT "custom_p0_cfname=sharedcf;custom_p2_cfname=notsharedcf"
+) ENGINE=ROCKSDB
+PARTITION BY LIST(`a`) (
+PARTITION custom_p0 VALUES IN (1, 4, 7),
+PARTITION custom_p1 VALUES IN (2, 5, 8),
+PARTITION custom_p2 VALUES IN (3, 6, 9)
+);
+SELECT DISTINCT(cf_name) FROM information_schema.rocksdb_cfstats WHERE cf_name='notsharedcf';
+cf_name
+notsharedcf
+DROP TABLE IF EXISTS t1;
+DROP TABLE IF EXISTS t2;
+CREATE TABLE t1 (
+a INT NOT NULL,
+PRIMARY KEY (a) COMMENT 'p1_cfname=foo;'
+) ENGINE=ROCKSDB
+PARTITION BY LIST COLUMNS(a)
+(PARTITION p1 VALUES IN (1) ENGINE = ROCKSDB);
+INSERT INTO t1 values (1);
+TRUNCATE TABLE t1;
+SELECT * FROM t1;
+a
+DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/rocksdb_cf_reverse.result b/storage/rocksdb/mysql-test/rocksdb/r/rocksdb_cf_reverse.result
new file mode 100644
index 00000000000..1c85343cabb
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/rocksdb_cf_reverse.result
@@ -0,0 +1,120 @@
+create table t0 (a int) engine=myisam;
+insert into t0 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
+create table t1 (
+pk int primary key,
+a int not null,
+b int not null,
+key(a) comment 'rev:foo',
+key(b) comment 'bar'
+) engine=rocksdb;
+insert into t1 select a,a,a from t0;
+insert into t1 select a+10,a+10,a+10 from t0;
+# Primary key is not in a reverse-ordered CF, so full table scan
+# returns rows in ascending order:
+select * from t1;
+pk a b
+0 0 0
+1 1 1
+2 2 2
+3 3 3
+4 4 4
+5 5 5
+6 6 6
+7 7 7
+8 8 8
+9 9 9
+10 10 10
+11 11 11
+12 12 12
+13 13 13
+14 14 14
+15 15 15
+16 16 16
+17 17 17
+18 18 18
+19 19 19
+explain
+select a from t1 order by a limit 5;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 index NULL a 4 NULL # Using index
+select a from t1 order by a limit 5;
+a
+0
+1
+2
+3
+4
+explain
+select b from t1 order by b limit 5;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 index NULL b 4 NULL # Using index
+select a from t1 order by a limit 5;
+a
+0
+1
+2
+3
+4
+explain
+select a from t1 order by a desc limit 5;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 index NULL a 4 NULL # Using index
+select a from t1 order by a desc limit 5;
+a
+19
+18
+17
+16
+15
+explain
+select b from t1 order by b desc limit 5;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 index NULL b 4 NULL # Using index
+select b from t1 order by b desc limit 5;
+b
+19
+18
+17
+16
+15
+drop table t1;
+#
+# Try a primary key in a reverse-ordered CF.
+#
+create table t2 (
+pk int,
+a int not null,
+primary key(pk) comment 'rev:cf1'
+) engine=rocksdb;
+insert into t2 select a,a from t0;
+# Primary key is in a reverse-ordered CF, so full table scan
+# returns rows in descending order:
+select * from t2;
+pk a
+9 9
+8 8
+7 7
+6 6
+5 5
+4 4
+3 3
+2 2
+1 1
+0 0
+set autocommit=0;
+begin;
+delete from t2 where a=3 or a=7;
+select * from t2;
+pk a
+9 9
+8 8
+6 6
+5 5
+4 4
+2 2
+1 1
+0 0
+rollback;
+set autocommit=1;
+drop table t2;
+drop table t0;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/rocksdb_checksums.result b/storage/rocksdb/mysql-test/rocksdb/r/rocksdb_checksums.result
new file mode 100644
index 00000000000..aae6de12f6a
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/rocksdb_checksums.result
@@ -0,0 +1,128 @@
+set @save_rocksdb_store_row_debug_checksums=@@global.rocksdb_store_row_debug_checksums;
+set @save_rocksdb_verify_row_debug_checksums=@@global.rocksdb_verify_row_debug_checksums;
+set @save_rocksdb_checksums_pct=@@global.rocksdb_checksums_pct;
+show variables like 'rocksdb_%checksum%';
+Variable_name Value
+rocksdb_checksums_pct 100
+rocksdb_store_row_debug_checksums OFF
+rocksdb_verify_row_debug_checksums OFF
+create table t1 (pk int primary key, a int, b int, key(a), key(b)) engine=rocksdb;
+insert into t1 values (1,1,1),(2,2,2),(3,3,3);
+check table t1;
+Table Op Msg_type Msg_text
+test.t1 check status OK
+CHECKTABLE t1: Checking table t1
+CHECKTABLE t1: Checking index a
+CHECKTABLE t1: ... 3 index entries checked (0 had checksums)
+CHECKTABLE t1: Checking index b
+CHECKTABLE t1: ... 3 index entries checked (0 had checksums)
+CHECKTABLE t1: 0 table records had checksums
+drop table t1;
+set session rocksdb_store_row_debug_checksums=on;
+create table t2 (pk int primary key, a int, b int, key(a), key(b)) engine=rocksdb;
+insert into t2 values (1,1,1),(2,2,2),(3,3,3);
+check table t2;
+Table Op Msg_type Msg_text
+test.t2 check status OK
+CHECKTABLE t2: Checking table t2
+CHECKTABLE t2: Checking index a
+CHECKTABLE t2: ... 3 index entries checked (3 had checksums)
+CHECKTABLE t2: Checking index b
+CHECKTABLE t2: ... 3 index entries checked (3 had checksums)
+CHECKTABLE t2: 3 table records had checksums
+# Now, make a table that has both rows with checksums and without
+create table t3 (pk int primary key, a int, b int, key(a), key(b)) engine=rocksdb;
+insert into t3 values (1,1,1),(2,2,2),(3,3,3);
+set session rocksdb_store_row_debug_checksums=off;
+update t3 set b=3 where a=2;
+set session rocksdb_store_row_debug_checksums=on;
+check table t3;
+Table Op Msg_type Msg_text
+test.t3 check status OK
+CHECKTABLE t3: Checking table t3
+CHECKTABLE t3: Checking index a
+CHECKTABLE t3: ... 3 index entries checked (3 had checksums)
+CHECKTABLE t3: Checking index b
+CHECKTABLE t3: ... 3 index entries checked (2 had checksums)
+CHECKTABLE t3: 2 table records had checksums
+set session rocksdb_store_row_debug_checksums=on;
+set session rocksdb_checksums_pct=5;
+create table t4 (pk int primary key, a int, b int, key(a), key(b)) engine=rocksdb;
+check table t4;
+Table Op Msg_type Msg_text
+test.t4 check status OK
+4000 index entries had around 200 checksums
+4000 index entries had around 200 checksums
+Around 200 table records had checksums
+set session rocksdb_checksums_pct=100;
+#
+# Ok, table t2 has all rows with checksums. Simulate a few checksum mismatches.
+#
+insert into mtr.test_suppressions values
+('Checksum mismatch in key of key-value pair for index'),
+('Checksum mismatch in value of key-value pair for index'),
+('Data with incorrect checksum');
+# 1. Start with mismatch in key checksum of the PK.
+set session debug_dbug= "+d,myrocks_simulate_bad_pk_checksum1";
+set session rocksdb_verify_row_debug_checksums=off;
+select * from t3;
+pk a b
+1 1 1
+2 2 3
+3 3 3
+set session rocksdb_verify_row_debug_checksums=on;
+select * from t3;
+ERROR HY000: Internal error: Record checksum mismatch
+select * from t4;
+ERROR HY000: Internal error: Record checksum mismatch
+set session debug_dbug= "-d,myrocks_simulate_bad_pk_checksum1";
+# 2. Continue with mismatch in pk value checksum.
+set session debug_dbug= "+d,myrocks_simulate_bad_pk_checksum2";
+set session rocksdb_verify_row_debug_checksums=off;
+select * from t3;
+pk a b
+1 1 1
+2 2 3
+3 3 3
+set session rocksdb_verify_row_debug_checksums=on;
+select * from t3;
+ERROR HY000: Internal error: Record checksum mismatch
+select * from t4;
+ERROR HY000: Internal error: Record checksum mismatch
+set session debug_dbug= "-d,myrocks_simulate_bad_pk_checksum2";
+# 3. Check if we catch checksum mismatches for secondary indexes
+explain
+select * from t3 force index(a) where a<4;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t3 range a a 5 NULL # Using index condition
+select * from t3 force index(a) where a<4;
+pk a b
+1 1 1
+2 2 3
+3 3 3
+set session debug_dbug= "+d,myrocks_simulate_bad_key_checksum1";
+select * from t3 force index(a) where a<4;
+ERROR HY000: Internal error: Record checksum mismatch
+select * from t4 force index(a) where a<1000000;
+ERROR HY000: Internal error: Record checksum mismatch
+set session debug_dbug= "-d,myrocks_simulate_bad_key_checksum1";
+# 4. The same for index-only reads?
+explain
+select a from t3 force index(a) where a<4;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t3 range a a 5 NULL # Using where; Using index
+select a from t3 force index(a) where a<4;
+a
+1
+2
+3
+set session debug_dbug= "+d,myrocks_simulate_bad_key_checksum1";
+select a from t3 force index(a) where a<4;
+ERROR HY000: Internal error: Record checksum mismatch
+select a from t4 force index(a) where a<1000000;
+ERROR HY000: Internal error: Record checksum mismatch
+set session debug_dbug= "-d,myrocks_simulate_bad_key_checksum1";
+set @@global.rocksdb_store_row_debug_checksums=@save_rocksdb_store_row_debug_checksums;
+set @@global.rocksdb_verify_row_debug_checksums=@save_rocksdb_verify_row_debug_checksums;
+set @@global.rocksdb_checksums_pct=@save_rocksdb_checksums_pct;
+drop table t2,t3,t4;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/rocksdb_concurrent_delete.result b/storage/rocksdb/mysql-test/rocksdb/r/rocksdb_concurrent_delete.result
new file mode 100644
index 00000000000..9106e79f80c
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/rocksdb_concurrent_delete.result
@@ -0,0 +1,671 @@
+connect con, localhost, root,,;
+connection default;
+SET SESSION TRANSACTION ISOLATION LEVEL REPEATABLE READ;
+SET debug_sync='RESET';
+CREATE TABLE t1 (pk INT PRIMARY KEY COMMENT "", a INT);
+INSERT INTO t1 VALUES(1,1), (2,2), (3,3), (4,4), (5,5);
+--PK first row delete
+connection con;
+SET SESSION TRANSACTION ISOLATION LEVEL REPEATABLE READ;
+SET debug_sync='rocksdb_concurrent_delete SIGNAL parked WAIT_FOR go';
+SELECT * FROM t1 order by t1.pk ASC FOR UPDATE;
+connection default;
+SET debug_sync='now WAIT_FOR parked';
+DELETE FROM t1 WHERE pk = 1;
+SET debug_sync='now SIGNAL go';
+connection con;
+pk a
+2 2
+3 3
+4 4
+5 5
+--PK middle row delete
+SET debug_sync='rocksdb_concurrent_delete SIGNAL parked WAIT_FOR go';
+SELECT * FROM t1 order by t1.pk ASC FOR UPDATE;
+connection default;
+SET debug_sync='now WAIT_FOR parked';
+DELETE FROM t1 WHERE pk = 3;
+SET debug_sync='now SIGNAL go';
+connection con;
+ERROR 40001: Deadlock found when trying to get lock; try restarting transaction (snapshot conflict)
+--PK end row delete
+SET debug_sync='rocksdb_concurrent_delete SIGNAL parked WAIT_FOR go';
+SELECT * FROM t1 order by t1.pk ASC FOR UPDATE;
+connection default;
+SET debug_sync='now WAIT_FOR parked';
+DELETE FROM t1 WHERE pk = 5;
+SET debug_sync='now SIGNAL go';
+connection con;
+ERROR 40001: Deadlock found when trying to get lock; try restarting transaction (snapshot conflict)
+connection default;
+disconnect con;
+set debug_sync='RESET';
+drop table t1;
+connect con, localhost, root,,;
+connection default;
+SET SESSION TRANSACTION ISOLATION LEVEL REPEATABLE READ;
+SET debug_sync='RESET';
+CREATE TABLE t1 (pk INT PRIMARY KEY, a INT, index a(a));
+INSERT INTO t1 VALUES(1,1), (2,2), (3,3), (4,4), (5,5);
+--SK first row delete
+connection con;
+SET SESSION TRANSACTION ISOLATION LEVEL REPEATABLE READ;
+SET debug_sync='rocksdb_concurrent_delete_sk SIGNAL parked WAIT_FOR go';
+SELECT a FROM t1 FORCE INDEX(a) FOR UPDATE;
+connection default;
+SET debug_sync='now WAIT_FOR parked';
+DELETE FROM t1 WHERE pk = 1;
+SET debug_sync='now SIGNAL go';
+connection con;
+a
+2
+3
+4
+5
+--SK middle row delete
+SET debug_sync='rocksdb_concurrent_delete_sk SIGNAL parked WAIT_FOR go';
+SELECT a FROM t1 FORCE INDEX(a) FOR UPDATE;
+connection default;
+SET debug_sync='now WAIT_FOR parked';
+DELETE FROM t1 WHERE pk = 3;
+SET debug_sync='now SIGNAL go';
+connection con;
+ERROR 40001: Deadlock found when trying to get lock; try restarting transaction (snapshot conflict)
+--SK end row delete
+SET debug_sync='rocksdb_concurrent_delete_sk SIGNAL parked WAIT_FOR go';
+SELECT a FROM t1 FORCE INDEX(a) FOR UPDATE;
+connection default;
+SET debug_sync='now WAIT_FOR parked';
+DELETE FROM t1 WHERE pk = 5;
+SET debug_sync='now SIGNAL go';
+connection con;
+ERROR 40001: Deadlock found when trying to get lock; try restarting transaction (snapshot conflict)
+connection default;
+disconnect con;
+set debug_sync='RESET';
+drop table t1;
+connect con, localhost, root,,;
+connection default;
+SET SESSION TRANSACTION ISOLATION LEVEL REPEATABLE READ;
+SET debug_sync='RESET';
+CREATE TABLE t1 (pk INT PRIMARY KEY COMMENT "", a INT);
+INSERT INTO t1 VALUES(1,1), (2,2), (3,3), (4,4), (5,5);
+--PK first row delete
+connection con;
+SET SESSION TRANSACTION ISOLATION LEVEL REPEATABLE READ;
+SET debug_sync='rocksdb_concurrent_delete SIGNAL parked WAIT_FOR go';
+SELECT * FROM t1 order by t1.pk DESC FOR UPDATE;
+connection default;
+SET debug_sync='now WAIT_FOR parked';
+DELETE FROM t1 WHERE pk = 5;
+SET debug_sync='now SIGNAL go';
+connection con;
+pk a
+4 4
+3 3
+2 2
+1 1
+--PK middle row delete
+SET debug_sync='rocksdb_concurrent_delete SIGNAL parked WAIT_FOR go';
+SELECT * FROM t1 order by t1.pk DESC FOR UPDATE;
+connection default;
+SET debug_sync='now WAIT_FOR parked';
+DELETE FROM t1 WHERE pk = 3;
+SET debug_sync='now SIGNAL go';
+connection con;
+ERROR 40001: Deadlock found when trying to get lock; try restarting transaction (snapshot conflict)
+--PK end row delete
+SET debug_sync='rocksdb_concurrent_delete SIGNAL parked WAIT_FOR go';
+SELECT * FROM t1 order by t1.pk DESC FOR UPDATE;
+connection default;
+SET debug_sync='now WAIT_FOR parked';
+DELETE FROM t1 WHERE pk = 1;
+SET debug_sync='now SIGNAL go';
+connection con;
+ERROR 40001: Deadlock found when trying to get lock; try restarting transaction (snapshot conflict)
+connection default;
+disconnect con;
+set debug_sync='RESET';
+drop table t1;
+connect con, localhost, root,,;
+connection default;
+SET SESSION TRANSACTION ISOLATION LEVEL REPEATABLE READ;
+SET debug_sync='RESET';
+CREATE TABLE t1 (pk INT PRIMARY KEY COMMENT "rev:cf2", a INT);
+INSERT INTO t1 VALUES(1,1), (2,2), (3,3), (4,4), (5,5);
+--PK first row delete
+connection con;
+SET SESSION TRANSACTION ISOLATION LEVEL REPEATABLE READ;
+SET debug_sync='rocksdb_concurrent_delete SIGNAL parked WAIT_FOR go';
+SELECT * FROM t1 order by t1.pk ASC FOR UPDATE;
+connection default;
+SET debug_sync='now WAIT_FOR parked';
+DELETE FROM t1 WHERE pk = 1;
+SET debug_sync='now SIGNAL go';
+connection con;
+pk a
+2 2
+3 3
+4 4
+5 5
+--PK middle row delete
+SET debug_sync='rocksdb_concurrent_delete SIGNAL parked WAIT_FOR go';
+SELECT * FROM t1 order by t1.pk ASC FOR UPDATE;
+connection default;
+SET debug_sync='now WAIT_FOR parked';
+DELETE FROM t1 WHERE pk = 3;
+SET debug_sync='now SIGNAL go';
+connection con;
+ERROR 40001: Deadlock found when trying to get lock; try restarting transaction (snapshot conflict)
+--PK end row delete
+SET debug_sync='rocksdb_concurrent_delete SIGNAL parked WAIT_FOR go';
+SELECT * FROM t1 order by t1.pk ASC FOR UPDATE;
+connection default;
+SET debug_sync='now WAIT_FOR parked';
+DELETE FROM t1 WHERE pk = 5;
+SET debug_sync='now SIGNAL go';
+connection con;
+ERROR 40001: Deadlock found when trying to get lock; try restarting transaction (snapshot conflict)
+connection default;
+disconnect con;
+set debug_sync='RESET';
+drop table t1;
+connect con, localhost, root,,;
+connection default;
+SET SESSION TRANSACTION ISOLATION LEVEL REPEATABLE READ;
+SET debug_sync='RESET';
+CREATE TABLE t1 (pk INT PRIMARY KEY COMMENT "rev:cf2", a INT);
+INSERT INTO t1 VALUES(1,1), (2,2), (3,3), (4,4), (5,5);
+--PK first row delete
+connection con;
+SET SESSION TRANSACTION ISOLATION LEVEL REPEATABLE READ;
+SET debug_sync='rocksdb_concurrent_delete SIGNAL parked WAIT_FOR go';
+SELECT * FROM t1 order by t1.pk DESC FOR UPDATE;
+connection default;
+SET debug_sync='now WAIT_FOR parked';
+DELETE FROM t1 WHERE pk = 5;
+SET debug_sync='now SIGNAL go';
+connection con;
+pk a
+4 4
+3 3
+2 2
+1 1
+--PK middle row delete
+SET debug_sync='rocksdb_concurrent_delete SIGNAL parked WAIT_FOR go';
+SELECT * FROM t1 order by t1.pk DESC FOR UPDATE;
+connection default;
+SET debug_sync='now WAIT_FOR parked';
+DELETE FROM t1 WHERE pk = 3;
+SET debug_sync='now SIGNAL go';
+connection con;
+ERROR 40001: Deadlock found when trying to get lock; try restarting transaction (snapshot conflict)
+--PK end row delete
+SET debug_sync='rocksdb_concurrent_delete SIGNAL parked WAIT_FOR go';
+SELECT * FROM t1 order by t1.pk DESC FOR UPDATE;
+connection default;
+SET debug_sync='now WAIT_FOR parked';
+DELETE FROM t1 WHERE pk = 1;
+SET debug_sync='now SIGNAL go';
+connection con;
+ERROR 40001: Deadlock found when trying to get lock; try restarting transaction (snapshot conflict)
+connection default;
+disconnect con;
+set debug_sync='RESET';
+drop table t1;
+connect con, localhost, root,,;
+connection default;
+set debug_sync='RESET';
+SET SESSION TRANSACTION ISOLATION LEVEL REPEATABLE READ;
+create table t1 (id1 int, id2 int, value int, primary key (id1, id2), index sk (id1, value)) engine=rocksdb;
+insert into t1 values (1, 1, 1),(1, 2, 1),(1, 3, 1),(1, 4, 1),(1, 5, 1),(2, 2, 2);
+--First row delete with PRIMARY
+connection con;
+SET SESSION TRANSACTION ISOLATION LEVEL REPEATABLE READ;
+set debug_sync='rocksdb.get_row_by_rowid SIGNAL parked WAIT_FOR go';
+update t1 force index (PRIMARY) set value=100 where id1=1;
+connection default;
+set debug_sync='now WAIT_FOR parked';
+delete from t1 where id1=1 and id2=1;
+set debug_sync='now SIGNAL go';
+connection con;
+select * from t1 where id1=1;
+id1 id2 value
+1 2 100
+1 3 100
+1 4 100
+1 5 100
+--Middle row delete with PRIMARY
+SET SESSION TRANSACTION ISOLATION LEVEL REPEATABLE READ;
+set debug_sync='rocksdb.get_row_by_rowid SIGNAL parked WAIT_FOR go';
+update t1 force index (PRIMARY) set value=200 where id1=1;
+connection default;
+set debug_sync='now WAIT_FOR parked';
+delete from t1 where id1=1 and id2=3;
+set debug_sync='now SIGNAL go';
+connection con;
+ERROR 40001: Deadlock found when trying to get lock; try restarting transaction (snapshot conflict)
+select * from t1 where id1=1;
+id1 id2 value
+1 2 100
+1 4 100
+1 5 100
+--End row delete with PRIMARY
+SET SESSION TRANSACTION ISOLATION LEVEL REPEATABLE READ;
+set debug_sync='rocksdb.get_row_by_rowid SIGNAL parked WAIT_FOR go';
+update t1 force index (PRIMARY) set value=300 where id1=1;
+connection default;
+set debug_sync='now WAIT_FOR parked';
+delete from t1 where id1=1 and id2=5;
+set debug_sync='now SIGNAL go';
+connection con;
+ERROR 40001: Deadlock found when trying to get lock; try restarting transaction (snapshot conflict)
+select * from t1 where id1=1;
+id1 id2 value
+1 2 100
+1 4 100
+connection default;
+disconnect con;
+set debug_sync='RESET';
+drop table t1;
+connect con, localhost, root,,;
+connection default;
+set debug_sync='RESET';
+SET SESSION TRANSACTION ISOLATION LEVEL REPEATABLE READ;
+create table t1 (id1 int, id2 int, value int, primary key (id1, id2), index sk (id1, value)) engine=rocksdb;
+insert into t1 values (1, 1, 1),(1, 2, 1),(1, 3, 1),(1, 4, 1),(1, 5, 1),(2, 2, 2);
+--First row delete with sk
+connection con;
+SET SESSION TRANSACTION ISOLATION LEVEL REPEATABLE READ;
+set debug_sync='rocksdb.get_row_by_rowid SIGNAL parked WAIT_FOR go';
+update t1 force index (sk) set value=100 where id1=1;
+connection default;
+set debug_sync='now WAIT_FOR parked';
+delete from t1 where id1=1 and id2=1;
+set debug_sync='now SIGNAL go';
+connection con;
+select * from t1 where id1=1;
+id1 id2 value
+1 2 100
+1 3 100
+1 4 100
+1 5 100
+--Middle row delete with sk
+SET SESSION TRANSACTION ISOLATION LEVEL REPEATABLE READ;
+set debug_sync='rocksdb.get_row_by_rowid SIGNAL parked WAIT_FOR go';
+update t1 force index (sk) set value=200 where id1=1;
+connection default;
+set debug_sync='now WAIT_FOR parked';
+delete from t1 where id1=1 and id2=3;
+set debug_sync='now SIGNAL go';
+connection con;
+ERROR 40001: Deadlock found when trying to get lock; try restarting transaction (snapshot conflict)
+select * from t1 where id1=1;
+id1 id2 value
+1 2 100
+1 4 100
+1 5 100
+--End row delete with sk
+SET SESSION TRANSACTION ISOLATION LEVEL REPEATABLE READ;
+set debug_sync='rocksdb.get_row_by_rowid SIGNAL parked WAIT_FOR go';
+update t1 force index (sk) set value=300 where id1=1;
+connection default;
+set debug_sync='now WAIT_FOR parked';
+delete from t1 where id1=1 and id2=5;
+set debug_sync='now SIGNAL go';
+connection con;
+ERROR 40001: Deadlock found when trying to get lock; try restarting transaction (snapshot conflict)
+select * from t1 where id1=1;
+id1 id2 value
+1 2 100
+1 4 100
+connection default;
+disconnect con;
+set debug_sync='RESET';
+drop table t1;
+connect con, localhost, root,,;
+connection default;
+SET SESSION TRANSACTION ISOLATION LEVEL READ COMMITTED;
+SET debug_sync='RESET';
+CREATE TABLE t1 (pk INT PRIMARY KEY COMMENT "", a INT);
+INSERT INTO t1 VALUES(1,1), (2,2), (3,3), (4,4), (5,5);
+--PK first row delete
+connection con;
+SET SESSION TRANSACTION ISOLATION LEVEL READ COMMITTED;
+SET debug_sync='rocksdb_concurrent_delete SIGNAL parked WAIT_FOR go';
+SELECT * FROM t1 order by t1.pk ASC FOR UPDATE;
+connection default;
+SET debug_sync='now WAIT_FOR parked';
+DELETE FROM t1 WHERE pk = 1;
+SET debug_sync='now SIGNAL go';
+connection con;
+pk a
+2 2
+3 3
+4 4
+5 5
+--PK middle row delete
+SET debug_sync='rocksdb_concurrent_delete SIGNAL parked WAIT_FOR go';
+SELECT * FROM t1 order by t1.pk ASC FOR UPDATE;
+connection default;
+SET debug_sync='now WAIT_FOR parked';
+DELETE FROM t1 WHERE pk = 3;
+SET debug_sync='now SIGNAL go';
+connection con;
+pk a
+2 2
+4 4
+5 5
+--PK end row delete
+SET debug_sync='rocksdb_concurrent_delete SIGNAL parked WAIT_FOR go';
+SELECT * FROM t1 order by t1.pk ASC FOR UPDATE;
+connection default;
+SET debug_sync='now WAIT_FOR parked';
+DELETE FROM t1 WHERE pk = 5;
+SET debug_sync='now SIGNAL go';
+connection con;
+pk a
+2 2
+4 4
+connection default;
+disconnect con;
+set debug_sync='RESET';
+drop table t1;
+connect con, localhost, root,,;
+connection default;
+SET SESSION TRANSACTION ISOLATION LEVEL READ COMMITTED;
+SET debug_sync='RESET';
+CREATE TABLE t1 (pk INT PRIMARY KEY, a INT, index a(a));
+INSERT INTO t1 VALUES(1,1), (2,2), (3,3), (4,4), (5,5);
+--SK first row delete
+connection con;
+SET SESSION TRANSACTION ISOLATION LEVEL READ COMMITTED;
+SET debug_sync='rocksdb_concurrent_delete_sk SIGNAL parked WAIT_FOR go';
+SELECT a FROM t1 FORCE INDEX(a) FOR UPDATE;
+connection default;
+SET debug_sync='now WAIT_FOR parked';
+DELETE FROM t1 WHERE pk = 1;
+SET debug_sync='now SIGNAL go';
+connection con;
+a
+2
+3
+4
+5
+--SK middle row delete
+SET debug_sync='rocksdb_concurrent_delete_sk SIGNAL parked WAIT_FOR go';
+SELECT a FROM t1 FORCE INDEX(a) FOR UPDATE;
+connection default;
+SET debug_sync='now WAIT_FOR parked';
+DELETE FROM t1 WHERE pk = 3;
+SET debug_sync='now SIGNAL go';
+connection con;
+a
+2
+4
+5
+--SK end row delete
+SET debug_sync='rocksdb_concurrent_delete_sk SIGNAL parked WAIT_FOR go';
+SELECT a FROM t1 FORCE INDEX(a) FOR UPDATE;
+connection default;
+SET debug_sync='now WAIT_FOR parked';
+DELETE FROM t1 WHERE pk = 5;
+SET debug_sync='now SIGNAL go';
+connection con;
+a
+2
+4
+connection default;
+disconnect con;
+set debug_sync='RESET';
+drop table t1;
+connect con, localhost, root,,;
+connection default;
+SET SESSION TRANSACTION ISOLATION LEVEL READ COMMITTED;
+SET debug_sync='RESET';
+CREATE TABLE t1 (pk INT PRIMARY KEY COMMENT "", a INT);
+INSERT INTO t1 VALUES(1,1), (2,2), (3,3), (4,4), (5,5);
+--PK first row delete
+connection con;
+SET SESSION TRANSACTION ISOLATION LEVEL READ COMMITTED;
+SET debug_sync='rocksdb_concurrent_delete SIGNAL parked WAIT_FOR go';
+SELECT * FROM t1 order by t1.pk DESC FOR UPDATE;
+connection default;
+SET debug_sync='now WAIT_FOR parked';
+DELETE FROM t1 WHERE pk = 5;
+SET debug_sync='now SIGNAL go';
+connection con;
+pk a
+4 4
+3 3
+2 2
+1 1
+--PK middle row delete
+SET debug_sync='rocksdb_concurrent_delete SIGNAL parked WAIT_FOR go';
+SELECT * FROM t1 order by t1.pk DESC FOR UPDATE;
+connection default;
+SET debug_sync='now WAIT_FOR parked';
+DELETE FROM t1 WHERE pk = 3;
+SET debug_sync='now SIGNAL go';
+connection con;
+pk a
+4 4
+2 2
+1 1
+--PK end row delete
+SET debug_sync='rocksdb_concurrent_delete SIGNAL parked WAIT_FOR go';
+SELECT * FROM t1 order by t1.pk DESC FOR UPDATE;
+connection default;
+SET debug_sync='now WAIT_FOR parked';
+DELETE FROM t1 WHERE pk = 1;
+SET debug_sync='now SIGNAL go';
+connection con;
+pk a
+4 4
+2 2
+connection default;
+disconnect con;
+set debug_sync='RESET';
+drop table t1;
+connect con, localhost, root,,;
+connection default;
+SET SESSION TRANSACTION ISOLATION LEVEL READ COMMITTED;
+SET debug_sync='RESET';
+CREATE TABLE t1 (pk INT PRIMARY KEY COMMENT "rev:cf2", a INT);
+INSERT INTO t1 VALUES(1,1), (2,2), (3,3), (4,4), (5,5);
+--PK first row delete
+connection con;
+SET SESSION TRANSACTION ISOLATION LEVEL READ COMMITTED;
+SET debug_sync='rocksdb_concurrent_delete SIGNAL parked WAIT_FOR go';
+SELECT * FROM t1 order by t1.pk ASC FOR UPDATE;
+connection default;
+SET debug_sync='now WAIT_FOR parked';
+DELETE FROM t1 WHERE pk = 1;
+SET debug_sync='now SIGNAL go';
+connection con;
+pk a
+2 2
+3 3
+4 4
+5 5
+--PK middle row delete
+SET debug_sync='rocksdb_concurrent_delete SIGNAL parked WAIT_FOR go';
+SELECT * FROM t1 order by t1.pk ASC FOR UPDATE;
+connection default;
+SET debug_sync='now WAIT_FOR parked';
+DELETE FROM t1 WHERE pk = 3;
+SET debug_sync='now SIGNAL go';
+connection con;
+pk a
+2 2
+4 4
+5 5
+--PK end row delete
+SET debug_sync='rocksdb_concurrent_delete SIGNAL parked WAIT_FOR go';
+SELECT * FROM t1 order by t1.pk ASC FOR UPDATE;
+connection default;
+SET debug_sync='now WAIT_FOR parked';
+DELETE FROM t1 WHERE pk = 5;
+SET debug_sync='now SIGNAL go';
+connection con;
+pk a
+2 2
+4 4
+connection default;
+disconnect con;
+set debug_sync='RESET';
+drop table t1;
+connect con, localhost, root,,;
+connection default;
+SET SESSION TRANSACTION ISOLATION LEVEL READ COMMITTED;
+SET debug_sync='RESET';
+CREATE TABLE t1 (pk INT PRIMARY KEY COMMENT "rev:cf2", a INT);
+INSERT INTO t1 VALUES(1,1), (2,2), (3,3), (4,4), (5,5);
+--PK first row delete
+connection con;
+SET SESSION TRANSACTION ISOLATION LEVEL READ COMMITTED;
+SET debug_sync='rocksdb_concurrent_delete SIGNAL parked WAIT_FOR go';
+SELECT * FROM t1 order by t1.pk DESC FOR UPDATE;
+connection default;
+SET debug_sync='now WAIT_FOR parked';
+DELETE FROM t1 WHERE pk = 5;
+SET debug_sync='now SIGNAL go';
+connection con;
+pk a
+4 4
+3 3
+2 2
+1 1
+--PK middle row delete
+SET debug_sync='rocksdb_concurrent_delete SIGNAL parked WAIT_FOR go';
+SELECT * FROM t1 order by t1.pk DESC FOR UPDATE;
+connection default;
+SET debug_sync='now WAIT_FOR parked';
+DELETE FROM t1 WHERE pk = 3;
+SET debug_sync='now SIGNAL go';
+connection con;
+pk a
+4 4
+2 2
+1 1
+--PK end row delete
+SET debug_sync='rocksdb_concurrent_delete SIGNAL parked WAIT_FOR go';
+SELECT * FROM t1 order by t1.pk DESC FOR UPDATE;
+connection default;
+SET debug_sync='now WAIT_FOR parked';
+DELETE FROM t1 WHERE pk = 1;
+SET debug_sync='now SIGNAL go';
+connection con;
+pk a
+4 4
+2 2
+connection default;
+disconnect con;
+set debug_sync='RESET';
+drop table t1;
+connect con, localhost, root,,;
+connection default;
+set debug_sync='RESET';
+SET SESSION TRANSACTION ISOLATION LEVEL READ COMMITTED;
+create table t1 (id1 int, id2 int, value int, primary key (id1, id2), index sk (id1, value)) engine=rocksdb;
+insert into t1 values (1, 1, 1),(1, 2, 1),(1, 3, 1),(1, 4, 1),(1, 5, 1),(2, 2, 2);
+--First row delete with PRIMARY
+connection con;
+SET SESSION TRANSACTION ISOLATION LEVEL READ COMMITTED;
+set debug_sync='rocksdb.get_row_by_rowid SIGNAL parked WAIT_FOR go';
+update t1 force index (PRIMARY) set value=100 where id1=1;
+connection default;
+set debug_sync='now WAIT_FOR parked';
+delete from t1 where id1=1 and id2=1;
+set debug_sync='now SIGNAL go';
+connection con;
+select * from t1 where id1=1;
+id1 id2 value
+1 2 100
+1 3 100
+1 4 100
+1 5 100
+--Middle row delete with PRIMARY
+SET SESSION TRANSACTION ISOLATION LEVEL READ COMMITTED;
+set debug_sync='rocksdb.get_row_by_rowid SIGNAL parked WAIT_FOR go';
+update t1 force index (PRIMARY) set value=200 where id1=1;
+connection default;
+set debug_sync='now WAIT_FOR parked';
+delete from t1 where id1=1 and id2=3;
+set debug_sync='now SIGNAL go';
+connection con;
+select * from t1 where id1=1;
+id1 id2 value
+1 2 200
+1 4 200
+1 5 200
+--End row delete with PRIMARY
+SET SESSION TRANSACTION ISOLATION LEVEL READ COMMITTED;
+set debug_sync='rocksdb.get_row_by_rowid SIGNAL parked WAIT_FOR go';
+update t1 force index (PRIMARY) set value=300 where id1=1;
+connection default;
+set debug_sync='now WAIT_FOR parked';
+delete from t1 where id1=1 and id2=5;
+set debug_sync='now SIGNAL go';
+connection con;
+select * from t1 where id1=1;
+id1 id2 value
+1 2 300
+1 4 300
+connection default;
+disconnect con;
+set debug_sync='RESET';
+drop table t1;
+connect con, localhost, root,,;
+connection default;
+set debug_sync='RESET';
+SET SESSION TRANSACTION ISOLATION LEVEL READ COMMITTED;
+create table t1 (id1 int, id2 int, value int, primary key (id1, id2), index sk (id1, value)) engine=rocksdb;
+insert into t1 values (1, 1, 1),(1, 2, 1),(1, 3, 1),(1, 4, 1),(1, 5, 1),(2, 2, 2);
+--First row delete with sk
+connection con;
+SET SESSION TRANSACTION ISOLATION LEVEL READ COMMITTED;
+set debug_sync='rocksdb.get_row_by_rowid SIGNAL parked WAIT_FOR go';
+update t1 force index (sk) set value=100 where id1=1;
+connection default;
+set debug_sync='now WAIT_FOR parked';
+delete from t1 where id1=1 and id2=1;
+set debug_sync='now SIGNAL go';
+connection con;
+select * from t1 where id1=1;
+id1 id2 value
+1 2 100
+1 3 100
+1 4 100
+1 5 100
+--Middle row delete with sk
+SET SESSION TRANSACTION ISOLATION LEVEL READ COMMITTED;
+set debug_sync='rocksdb.get_row_by_rowid SIGNAL parked WAIT_FOR go';
+update t1 force index (sk) set value=200 where id1=1;
+connection default;
+set debug_sync='now WAIT_FOR parked';
+delete from t1 where id1=1 and id2=3;
+set debug_sync='now SIGNAL go';
+connection con;
+select * from t1 where id1=1;
+id1 id2 value
+1 2 200
+1 4 200
+1 5 200
+--End row delete with sk
+SET SESSION TRANSACTION ISOLATION LEVEL READ COMMITTED;
+set debug_sync='rocksdb.get_row_by_rowid SIGNAL parked WAIT_FOR go';
+update t1 force index (sk) set value=300 where id1=1;
+connection default;
+set debug_sync='now WAIT_FOR parked';
+delete from t1 where id1=1 and id2=5;
+set debug_sync='now SIGNAL go';
+connection con;
+select * from t1 where id1=1;
+id1 id2 value
+1 2 300
+1 4 300
+connection default;
+disconnect con;
+set debug_sync='RESET';
+drop table t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/rocksdb_datadir.result b/storage/rocksdb/mysql-test/rocksdb/r/rocksdb_datadir.result
new file mode 100644
index 00000000000..40c53f6fd8a
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/rocksdb_datadir.result
@@ -0,0 +1,2 @@
+Check for MANIFEST files
+MANIFEST-000006
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/rocksdb_deadlock_detect_rc.result b/storage/rocksdb/mysql-test/rocksdb/r/rocksdb_deadlock_detect_rc.result
new file mode 100644
index 00000000000..ea2506941b2
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/rocksdb_deadlock_detect_rc.result
@@ -0,0 +1,89 @@
+set @prior_rocksdb_lock_wait_timeout = @@rocksdb_lock_wait_timeout;
+set @prior_rocksdb_deadlock_detect = @@rocksdb_deadlock_detect;
+set global rocksdb_lock_wait_timeout = 100000;
+set global rocksdb_deadlock_detect = ON;
+create table t (i int primary key);
+create table r1 (id int primary key, value int);
+insert into r1 values (1,1),(2,2),(3,3),(4,4),(5,5),(6,6),(7,7),(8,8),(9,9),(10,10);
+create table r2 like r1;
+insert into r2 select * from r1;
+connect con1,localhost,root,,;
+begin;
+update r2 set value=100 where id=9;
+connect con2,localhost,root,,;
+begin;
+update r1 set value=100 where id=8;
+select * from r2 for update;;
+connection con1;
+select * from r1 for update;
+ERROR 40001: Deadlock found when trying to get lock; try restarting transaction
+rollback;
+connection con2;
+id value
+1 1
+2 2
+3 3
+4 4
+5 5
+6 6
+7 7
+8 8
+9 9
+10 10
+rollback;
+connection con1;
+begin;
+insert into t values (1);
+connection con2;
+begin;
+insert into t values (2);
+connect con3,localhost,root,,;
+begin;
+insert into t values (3);
+connection con1;
+select * from t where i = 2 for update;
+connection con2;
+select * from t where i = 3 for update;
+connection con3;
+select * from t;
+i
+3
+insert into t values (4), (1);
+ERROR 40001: Deadlock found when trying to get lock; try restarting transaction
+# Transaction should be rolled back
+select * from t;
+i
+rollback;
+connection con2;
+i
+rollback;
+connection con1;
+i
+rollback;
+connection default;
+create table t1 (id int primary key, value int, value2 int, index(value)) engine=rocksdb;
+insert into t1 values (1,1,1),(2,2,2),(3,3,3),(4,4,4),(5,5,5),(6,6,6),(7,7,7),(8,8,8),(9,9,9),(10,10,10);
+connection con1;
+begin;
+update t1 force index (value) set value2=value2+1 where value=3;
+connection con2;
+begin;
+update t1 force index (value) set value2=value2+1 where value=2;
+update t1 force index (value) set value2=value2+1 where value=4;
+connection con1;
+update t1 force index (value) set value2=value2+1 where value=4;
+connection con2;
+update t1 force index (value) set value2=value2+1 where value=3;
+ERROR 40001: Deadlock found when trying to get lock; try restarting transaction
+connection con1;
+rollback;
+connection con2;
+rollback;
+drop table t1;
+connection default;
+disconnect con1;
+disconnect con2;
+disconnect con3;
+set global rocksdb_lock_wait_timeout = @prior_rocksdb_lock_wait_timeout;
+set global rocksdb_deadlock_detect = @prior_rocksdb_deadlock_detect;
+drop table t,r1,r2;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/rocksdb_deadlock_detect_rr.result b/storage/rocksdb/mysql-test/rocksdb/r/rocksdb_deadlock_detect_rr.result
new file mode 100644
index 00000000000..ea2506941b2
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/rocksdb_deadlock_detect_rr.result
@@ -0,0 +1,89 @@
+set @prior_rocksdb_lock_wait_timeout = @@rocksdb_lock_wait_timeout;
+set @prior_rocksdb_deadlock_detect = @@rocksdb_deadlock_detect;
+set global rocksdb_lock_wait_timeout = 100000;
+set global rocksdb_deadlock_detect = ON;
+create table t (i int primary key);
+create table r1 (id int primary key, value int);
+insert into r1 values (1,1),(2,2),(3,3),(4,4),(5,5),(6,6),(7,7),(8,8),(9,9),(10,10);
+create table r2 like r1;
+insert into r2 select * from r1;
+connect con1,localhost,root,,;
+begin;
+update r2 set value=100 where id=9;
+connect con2,localhost,root,,;
+begin;
+update r1 set value=100 where id=8;
+select * from r2 for update;;
+connection con1;
+select * from r1 for update;
+ERROR 40001: Deadlock found when trying to get lock; try restarting transaction
+rollback;
+connection con2;
+id value
+1 1
+2 2
+3 3
+4 4
+5 5
+6 6
+7 7
+8 8
+9 9
+10 10
+rollback;
+connection con1;
+begin;
+insert into t values (1);
+connection con2;
+begin;
+insert into t values (2);
+connect con3,localhost,root,,;
+begin;
+insert into t values (3);
+connection con1;
+select * from t where i = 2 for update;
+connection con2;
+select * from t where i = 3 for update;
+connection con3;
+select * from t;
+i
+3
+insert into t values (4), (1);
+ERROR 40001: Deadlock found when trying to get lock; try restarting transaction
+# Transaction should be rolled back
+select * from t;
+i
+rollback;
+connection con2;
+i
+rollback;
+connection con1;
+i
+rollback;
+connection default;
+create table t1 (id int primary key, value int, value2 int, index(value)) engine=rocksdb;
+insert into t1 values (1,1,1),(2,2,2),(3,3,3),(4,4,4),(5,5,5),(6,6,6),(7,7,7),(8,8,8),(9,9,9),(10,10,10);
+connection con1;
+begin;
+update t1 force index (value) set value2=value2+1 where value=3;
+connection con2;
+begin;
+update t1 force index (value) set value2=value2+1 where value=2;
+update t1 force index (value) set value2=value2+1 where value=4;
+connection con1;
+update t1 force index (value) set value2=value2+1 where value=4;
+connection con2;
+update t1 force index (value) set value2=value2+1 where value=3;
+ERROR 40001: Deadlock found when trying to get lock; try restarting transaction
+connection con1;
+rollback;
+connection con2;
+rollback;
+drop table t1;
+connection default;
+disconnect con1;
+disconnect con2;
+disconnect con3;
+set global rocksdb_lock_wait_timeout = @prior_rocksdb_lock_wait_timeout;
+set global rocksdb_deadlock_detect = @prior_rocksdb_deadlock_detect;
+drop table t,r1,r2;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/rocksdb_deadlock_stress_rc.result b/storage/rocksdb/mysql-test/rocksdb/r/rocksdb_deadlock_stress_rc.result
new file mode 100644
index 00000000000..f97da0099fe
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/rocksdb_deadlock_stress_rc.result
@@ -0,0 +1,8 @@
+create table t1 (a int primary key, b int) engine=rocksdb;
+set @prior_rocksdb_lock_wait_timeout = @@rocksdb_lock_wait_timeout;
+set @prior_rocksdb_deadlock_detect = @@rocksdb_deadlock_detect;
+set global rocksdb_lock_wait_timeout = 100000;
+set global rocksdb_deadlock_detect = ON;
+set global rocksdb_lock_wait_timeout = @prior_rocksdb_lock_wait_timeout;
+set global rocksdb_deadlock_detect = @prior_rocksdb_deadlock_detect;
+drop table t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/rocksdb_deadlock_stress_rr.result b/storage/rocksdb/mysql-test/rocksdb/r/rocksdb_deadlock_stress_rr.result
new file mode 100644
index 00000000000..f97da0099fe
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/rocksdb_deadlock_stress_rr.result
@@ -0,0 +1,8 @@
+create table t1 (a int primary key, b int) engine=rocksdb;
+set @prior_rocksdb_lock_wait_timeout = @@rocksdb_lock_wait_timeout;
+set @prior_rocksdb_deadlock_detect = @@rocksdb_deadlock_detect;
+set global rocksdb_lock_wait_timeout = 100000;
+set global rocksdb_deadlock_detect = ON;
+set global rocksdb_lock_wait_timeout = @prior_rocksdb_lock_wait_timeout;
+set global rocksdb_deadlock_detect = @prior_rocksdb_deadlock_detect;
+drop table t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/rocksdb_debug.result b/storage/rocksdb/mysql-test/rocksdb/r/rocksdb_debug.result
new file mode 100644
index 00000000000..a245fa851de
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/rocksdb_debug.result
@@ -0,0 +1,11 @@
+#
+# Issue #728: Assertion `covers_key(b)' failed in int
+# myrocks::Rdb_key_def::cmp_full_keys(const rocks db::Slice&,
+# const rocksdb::Slice&)
+#
+CREATE TABLE t2(c1 TINYINT SIGNED KEY,c2 TINYINT UNSIGNED,c3 INT);
+INSERT INTO t2(c1)VALUES(0);
+SELECT * FROM t2 WHERE c1<=127 ORDER BY c1 DESC;
+c1 c2 c3
+0 NULL NULL
+DROP TABLE t2;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/rocksdb_icp.result b/storage/rocksdb/mysql-test/rocksdb/r/rocksdb_icp.result
new file mode 100644
index 00000000000..9ef1ff28f10
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/rocksdb_icp.result
@@ -0,0 +1,257 @@
+select * from information_schema.engines where engine = 'rocksdb';
+ENGINE SUPPORT COMMENT TRANSACTIONS XA SAVEPOINTS
+ROCKSDB DEFAULT RocksDB storage engine YES YES YES
+drop table if exists t0,t1,t2,t3;
+create table t0 (a int) engine=myisam;
+insert into t0 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
+create table t1(a int) engine=myisam;
+insert into t1 select A.a + B.a* 10 + C.a * 100 from t0 A, t0 B, t0 C;
+create table t2 (
+pk int primary key,
+kp1 int,
+kp2 int,
+col1 int,
+key (kp1,kp2) comment 'cf1'
+) engine=rocksdb;
+insert into t2 select a,a,a,a from t1;
+# Try a basic case:
+explain
+select * from t2 where kp1 between 1 and 10 and mod(kp2,2)=0;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 range kp1 kp1 5 NULL # Using index condition
+select * from t2 where kp1 between 1 and 10 and mod(kp2,2)=0;
+pk kp1 kp2 col1
+2 2 2 2
+4 4 4 4
+6 6 6 6
+8 8 8 8
+10 10 10 10
+# Check that ICP doesnt work for columns where column value
+# cant be restored from mem-comparable form:
+create table t3 (
+pk int primary key,
+kp1 int,
+kp2 varchar(10) collate utf8_general_ci,
+col1 int,
+key (kp1,kp2) comment 'cf1'
+) engine=rocksdb;
+insert into t3 select a,a/10,a,a from t1;
+# This must not use ICP:
+explain
+select * from t3 where kp1=3 and kp2 like '%foo%';
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t3 ref kp1 kp1 5 const # Using where
+explain format=json
+select * from t3 where kp1 between 2 and 4 and mod(kp1,3)=0 and kp2 like '%foo%';
+EXPLAIN
+{
+ "query_block": {
+ "select_id": 1,
+ "table": {
+ "table_name": "t3",
+ "access_type": "range",
+ "possible_keys": ["kp1"],
+ "key": "kp1",
+ "key_length": "5",
+ "used_key_parts": ["kp1"],
+ "rows": 1000,
+ "filtered": 100,
+ "index_condition": "t3.kp1 between 2 and 4 and t3.kp1 % 3 = 0",
+ "attached_condition": "t3.kp2 like '%foo%'"
+ }
+ }
+}
+# Check that we handle the case where out-of-range is encountered sooner
+# than matched index condition
+explain
+select * from t2 where kp1< 3 and kp2+1>50000;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 range kp1 kp1 5 NULL # Using index condition
+select * from t2 where kp1< 3 and kp2+1>50000;
+pk kp1 kp2 col1
+explain
+select * from t2 where kp1< 3 and kp2+1>50000;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 range kp1 kp1 5 NULL # Using index condition
+select * from t2 where kp1< 3 and kp2+1>50000;
+pk kp1 kp2 col1
+# Try doing backwards scans
+# MariaDB: ICP is not supported for reverse scans.
+explain
+select * from t2 where kp1 between 1 and 10 and mod(kp2,2)=0 order by kp1 desc;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 range kp1 kp1 5 NULL # Using where
+select * from t2 where kp1 between 1 and 10 and mod(kp2,2)=0 order by kp1 desc;
+pk kp1 kp2 col1
+10 10 10 10
+8 8 8 8
+6 6 6 6
+4 4 4 4
+2 2 2 2
+explain
+select * from t2 where kp1 >990 and mod(kp2,2)=0 order by kp1 desc;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 range kp1 kp1 5 NULL # Using where
+select * from t2 where kp1 >990 and mod(kp2,2)=0 order by kp1 desc;
+pk kp1 kp2 col1
+998 998 998 998
+996 996 996 996
+994 994 994 994
+992 992 992 992
+explain
+select * from t2 where kp1< 3 and kp2+1>50000 order by kp1 desc;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 range kp1 kp1 5 NULL # Using where
+select * from t2 where kp1< 3 and kp2+1>50000 order by kp1 desc;
+pk kp1 kp2 col1
+drop table t0,t1,t2,t3;
+#
+# Check how ICP affects counters
+#
+# First, some preparations
+#
+# in facebook/mysql-5.6, it was:
+# select ROWS_READ, ROWS_REQUESTED, ROWS_INDEX_FIRST, ROWS_INDEX_NEXT
+#
+# In MariaDB, we do:
+create procedure save_read_stats()
+begin
+set @rr=(select ROWS_READ
+from information_schema.table_statistics
+where table_name='t4' and table_schema=database());
+set @rif= (select VARIABLE_VALUE
+from information_schema.session_status
+where VARIABLE_NAME='Handler_read_first');
+set @rin=(select VARIABLE_VALUE
+from information_schema.session_status
+where VARIABLE_NAME='Handler_read_next');
+set @icp_attempts=(select VARIABLE_VALUE
+from information_schema.session_status
+where VARIABLE_NAME='Handler_icp_attempts');
+set @icp_matches=(select VARIABLE_VALUE
+from information_schema.session_status
+where VARIABLE_NAME='Handler_icp_match');
+end|
+create procedure get_read_stats()
+begin
+select
+(select ROWS_READ
+from information_schema.table_statistics
+where table_name='t4' and table_schema=database()
+) - @rr as ROWS_READ_DIFF,
+(select VARIABLE_VALUE - @rif
+from information_schema.session_status
+where VARIABLE_NAME='Handler_read_first') as ROWS_INDEX_FIRST,
+(select VARIABLE_VALUE - @rin
+from information_schema.session_status
+where VARIABLE_NAME='Handler_read_next') as ROWS_INDEX_NEXT,
+(select VARIABLE_VALUE - @icp_attempts
+from information_schema.session_status
+where VARIABLE_NAME='Handler_icp_attempts') as ICP_ATTEMPTS,
+(select VARIABLE_VALUE - @icp_matches
+from information_schema.session_status
+where VARIABLE_NAME='Handler_icp_match') as ICP_MATCHES;
+end|
+create table t4 (
+id int,
+id1 int,
+id2 int,
+value int,
+value2 varchar(100),
+primary key (id),
+key id1_id2 (id1, id2) comment 'cf1'
+) engine=rocksdb charset=latin1 collate latin1_bin;
+insert into t4 values
+(1,1,1,1,1), (2,1,2,2,2), (3,1,3,3,3),(4,1,4,4,4),(5,1,5,5,5),
+(6,1,6,6,6), (7,1,7,7,7), (8,1,8,8,8),(9,1,9,9,9),(10,1,10,10,10);
+#
+# Now, the test itself
+#
+call save_read_stats();
+call get_read_stats();
+ROWS_READ_DIFF ROWS_INDEX_FIRST ROWS_INDEX_NEXT ICP_ATTEMPTS ICP_MATCHES
+0 0 0 0 0
+# ============== index-only query ==============
+explain
+select id1,id2 from t4 force index (id1_id2) where id1=1 and id2 % 10 = 1;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t4 ref id1_id2 id1_id2 5 const # Using where; Using index
+call save_read_stats();
+select id1,id2 from t4 force index (id1_id2) where id1=1 and id2 % 10 = 1;
+id1 id2
+1 1
+call get_read_stats();
+ROWS_READ_DIFF 10
+ROWS_INDEX_FIRST 0
+ROWS_INDEX_NEXT 10
+ICP_ATTEMPTS 0
+ICP_MATCHES 0
+# ============== Query without ICP ==============
+set optimizer_switch='index_condition_pushdown=off';
+explain
+select * from t4 force index (id1_id2) where id1=1 and id2 % 10 = 1;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t4 ref id1_id2 id1_id2 5 const # Using where
+call save_read_stats();
+select * from t4 force index (id1_id2) where id1=1 and id2 % 10 = 1;
+id id1 id2 value value2
+1 1 1 1 1
+call get_read_stats();
+ROWS_READ_DIFF 10
+ROWS_INDEX_FIRST 0
+ROWS_INDEX_NEXT 10
+ICP_ATTEMPTS 0
+ICP_MATCHES 0
+# ============== Query with ICP ==============
+set optimizer_switch='index_condition_pushdown=on';
+explain
+select * from t4 force index (id1_id2) where id1=1 and id2 % 10 = 1;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t4 ref id1_id2 id1_id2 5 const # Using index condition
+call save_read_stats();
+select * from t4 force index (id1_id2) where id1=1 and id2 % 10 = 1;
+id id1 id2 value value2
+1 1 1 1 1
+call get_read_stats();
+ROWS_READ_DIFF 1
+ROWS_INDEX_FIRST 0
+ROWS_INDEX_NEXT 1
+ICP_ATTEMPTS 10
+ICP_MATCHES 1
+drop table t4;
+drop procedure save_read_stats;
+drop procedure get_read_stats;
+#
+# Issue #67: Inefficient index condition pushdown
+#
+create table t0 (a int) engine=myisam;
+insert into t0 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
+create table t1 (
+pk int not null primary key,
+key1 bigint(20) unsigned,
+col1 int,
+key (key1)
+) engine=rocksdb;
+insert into t1
+select
+A.a+10*B.a+100*C.a,
+A.a+10*B.a+100*C.a,
+1234
+from t0 A, t0 B, t0 C;
+set @count=0;
+explain
+select * from t1 force index(key1) where key1=1;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 ref key1 key1 9 const #
+set @count_diff =(select (value - @count) from information_schema.rocksdb_perf_context
+where table_schema=database() and table_name='t1' and stat_type='INTERNAL_KEY_SKIPPED_COUNT');
+select * from t1 force index(key1) where key1=1;
+pk key1 col1
+1 1 1234
+set @count_diff =(select (value - @count) from information_schema.rocksdb_perf_context
+where table_schema=database() and table_name='t1' and stat_type='INTERNAL_KEY_SKIPPED_COUNT');
+# The following must be =1, or in any case not 999:
+select @count_diff as "INTERNAL_KEY_SKIPPED_COUNT increment";
+INTERNAL_KEY_SKIPPED_COUNT increment
+1
+drop table t0,t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/rocksdb_icp_rev.result b/storage/rocksdb/mysql-test/rocksdb/r/rocksdb_icp_rev.result
new file mode 100644
index 00000000000..9c4b2d22ad7
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/rocksdb_icp_rev.result
@@ -0,0 +1,223 @@
+select * from information_schema.engines where engine = 'rocksdb';
+ENGINE SUPPORT COMMENT TRANSACTIONS XA SAVEPOINTS
+ROCKSDB DEFAULT RocksDB storage engine YES YES YES
+drop table if exists t0,t1,t2,t3;
+create table t0 (a int) engine=myisam;
+insert into t0 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
+create table t1(a int) engine=myisam;
+insert into t1 select A.a + B.a* 10 + C.a * 100 from t0 A, t0 B, t0 C;
+create table t2 (
+pk int primary key,
+kp1 int,
+kp2 int,
+col1 int,
+key (kp1,kp2) comment 'rev:cf1'
+) engine=rocksdb;
+insert into t2 select a,a,a,a from t1;
+# Try a basic case:
+explain
+select * from t2 where kp1 between 1 and 10 and mod(kp2,2)=0;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 range kp1 kp1 5 NULL # Using index condition
+select * from t2 where kp1 between 1 and 10 and mod(kp2,2)=0;
+pk kp1 kp2 col1
+2 2 2 2
+4 4 4 4
+6 6 6 6
+8 8 8 8
+10 10 10 10
+# Check that ICP doesnt work for columns where column value
+# cant be restored from mem-comparable form:
+create table t3 (
+pk int primary key,
+kp1 int,
+kp2 varchar(10) collate utf8_general_ci,
+col1 int,
+key (kp1,kp2) comment 'rev:cf1'
+) engine=rocksdb;
+insert into t3 select a,a/10,a,a from t1;
+# This must not use ICP:
+explain
+select * from t3 where kp1=3 and kp2 like '%foo%';
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t3 ref kp1 kp1 5 const # Using where
+explain format=json
+select * from t3 where kp1 between 2 and 4 and mod(kp1,3)=0 and kp2 like '%foo%';
+EXPLAIN
+{
+ "query_block": {
+ "select_id": 1,
+ "table": {
+ "table_name": "t3",
+ "access_type": "range",
+ "possible_keys": ["kp1"],
+ "key": "kp1",
+ "key_length": "5",
+ "used_key_parts": ["kp1"],
+ "rows": 1000,
+ "filtered": 100,
+ "index_condition": "t3.kp1 between 2 and 4 and t3.kp1 % 3 = 0",
+ "attached_condition": "t3.kp2 like '%foo%'"
+ }
+ }
+}
+# Check that we handle the case where out-of-range is encountered sooner
+# than matched index condition
+explain
+select * from t2 where kp1< 3 and kp2+1>50000;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 range kp1 kp1 5 NULL # Using index condition
+select * from t2 where kp1< 3 and kp2+1>50000;
+pk kp1 kp2 col1
+explain
+select * from t2 where kp1< 3 and kp2+1>50000;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 range kp1 kp1 5 NULL # Using index condition
+select * from t2 where kp1< 3 and kp2+1>50000;
+pk kp1 kp2 col1
+# Try doing backwards scans
+# MariaDB: ICP is not supported for reverse scans.
+explain
+select * from t2 where kp1 between 1 and 10 and mod(kp2,2)=0 order by kp1 desc;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 range kp1 kp1 5 NULL # Using where
+select * from t2 where kp1 between 1 and 10 and mod(kp2,2)=0 order by kp1 desc;
+pk kp1 kp2 col1
+10 10 10 10
+8 8 8 8
+6 6 6 6
+4 4 4 4
+2 2 2 2
+explain
+select * from t2 where kp1 >990 and mod(kp2,2)=0 order by kp1 desc;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 range kp1 kp1 5 NULL # Using where
+select * from t2 where kp1 >990 and mod(kp2,2)=0 order by kp1 desc;
+pk kp1 kp2 col1
+998 998 998 998
+996 996 996 996
+994 994 994 994
+992 992 992 992
+explain
+select * from t2 where kp1< 3 and kp2+1>50000 order by kp1 desc;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 range kp1 kp1 5 NULL # Using where
+select * from t2 where kp1< 3 and kp2+1>50000 order by kp1 desc;
+pk kp1 kp2 col1
+drop table t0,t1,t2,t3;
+#
+# Check how ICP affects counters
+#
+# First, some preparations
+#
+# in facebook/mysql-5.6, it was:
+# select ROWS_READ, ROWS_REQUESTED, ROWS_INDEX_FIRST, ROWS_INDEX_NEXT
+#
+# In MariaDB, we do:
+create procedure save_read_stats()
+begin
+set @rr=(select ROWS_READ
+from information_schema.table_statistics
+where table_name='t4' and table_schema=database());
+set @rif= (select VARIABLE_VALUE
+from information_schema.session_status
+where VARIABLE_NAME='Handler_read_first');
+set @rin=(select VARIABLE_VALUE
+from information_schema.session_status
+where VARIABLE_NAME='Handler_read_next');
+set @icp_attempts=(select VARIABLE_VALUE
+from information_schema.session_status
+where VARIABLE_NAME='Handler_icp_attempts');
+set @icp_matches=(select VARIABLE_VALUE
+from information_schema.session_status
+where VARIABLE_NAME='Handler_icp_match');
+end|
+create procedure get_read_stats()
+begin
+select
+(select ROWS_READ
+from information_schema.table_statistics
+where table_name='t4' and table_schema=database()
+) - @rr as ROWS_READ_DIFF,
+(select VARIABLE_VALUE - @rif
+from information_schema.session_status
+where VARIABLE_NAME='Handler_read_first') as ROWS_INDEX_FIRST,
+(select VARIABLE_VALUE - @rin
+from information_schema.session_status
+where VARIABLE_NAME='Handler_read_next') as ROWS_INDEX_NEXT,
+(select VARIABLE_VALUE - @icp_attempts
+from information_schema.session_status
+where VARIABLE_NAME='Handler_icp_attempts') as ICP_ATTEMPTS,
+(select VARIABLE_VALUE - @icp_matches
+from information_schema.session_status
+where VARIABLE_NAME='Handler_icp_match') as ICP_MATCHES;
+end|
+create table t4 (
+id int,
+id1 int,
+id2 int,
+value int,
+value2 varchar(100),
+primary key (id),
+key id1_id2 (id1, id2) comment 'rev:cf1'
+) engine=rocksdb charset=latin1 collate latin1_bin;
+insert into t4 values
+(1,1,1,1,1), (2,1,2,2,2), (3,1,3,3,3),(4,1,4,4,4),(5,1,5,5,5),
+(6,1,6,6,6), (7,1,7,7,7), (8,1,8,8,8),(9,1,9,9,9),(10,1,10,10,10);
+#
+# Now, the test itself
+#
+call save_read_stats();
+call get_read_stats();
+ROWS_READ_DIFF ROWS_INDEX_FIRST ROWS_INDEX_NEXT ICP_ATTEMPTS ICP_MATCHES
+0 0 0 0 0
+# ============== index-only query ==============
+explain
+select id1,id2 from t4 force index (id1_id2) where id1=1 and id2 % 10 = 1;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t4 ref id1_id2 id1_id2 5 const # Using where; Using index
+call save_read_stats();
+select id1,id2 from t4 force index (id1_id2) where id1=1 and id2 % 10 = 1;
+id1 id2
+1 1
+call get_read_stats();
+ROWS_READ_DIFF 10
+ROWS_INDEX_FIRST 0
+ROWS_INDEX_NEXT 10
+ICP_ATTEMPTS 0
+ICP_MATCHES 0
+# ============== Query without ICP ==============
+set optimizer_switch='index_condition_pushdown=off';
+explain
+select * from t4 force index (id1_id2) where id1=1 and id2 % 10 = 1;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t4 ref id1_id2 id1_id2 5 const # Using where
+call save_read_stats();
+select * from t4 force index (id1_id2) where id1=1 and id2 % 10 = 1;
+id id1 id2 value value2
+1 1 1 1 1
+call get_read_stats();
+ROWS_READ_DIFF 10
+ROWS_INDEX_FIRST 0
+ROWS_INDEX_NEXT 10
+ICP_ATTEMPTS 0
+ICP_MATCHES 0
+# ============== Query with ICP ==============
+set optimizer_switch='index_condition_pushdown=on';
+explain
+select * from t4 force index (id1_id2) where id1=1 and id2 % 10 = 1;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t4 ref id1_id2 id1_id2 5 const # Using index condition
+call save_read_stats();
+select * from t4 force index (id1_id2) where id1=1 and id2 % 10 = 1;
+id id1 id2 value value2
+1 1 1 1 1
+call get_read_stats();
+ROWS_READ_DIFF 1
+ROWS_INDEX_FIRST 0
+ROWS_INDEX_NEXT 1
+ICP_ATTEMPTS 10
+ICP_MATCHES 1
+drop table t4;
+drop procedure save_read_stats;
+drop procedure get_read_stats;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/rocksdb_locks.result b/storage/rocksdb/mysql-test/rocksdb/r/rocksdb_locks.result
new file mode 100644
index 00000000000..c09959736dd
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/rocksdb_locks.result
@@ -0,0 +1,64 @@
+create table t1 (pk int not null primary key) engine=rocksdb;
+insert into t1 values (1),(2),(3);
+set autocommit=0;
+begin;
+select * from t1 where pk=1 for update;
+pk
+1
+connect con1,localhost,root,,;
+connection con1;
+call mtr.add_suppression("Got snapshot conflict errors");
+### Connection con1
+set @@rocksdb_lock_wait_timeout=500;
+set autocommit=0;
+begin;
+select * from t1 where pk=1 for update;;
+connection default;
+### Connection default
+rollback;
+connection con1;
+pk
+1
+rollback;
+connection default;
+begin;
+select * from t1 where pk=1 for update;
+pk
+1
+connection con1;
+### Connection con1
+set @@rocksdb_lock_wait_timeout=2;
+set autocommit=0;
+begin;
+select * from t1 where pk=1 for update;
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction
+connection default;
+rollback;
+set autocommit=1;
+connection con1;
+drop table t1;
+connection default;
+#
+# Now, test what happens if another transaction modified the record and committed
+#
+CREATE TABLE t1 (
+id int primary key,
+value int
+) engine=rocksdb collate latin1_bin;
+insert into t1 values (1,1),(2,2),(3,3),(4,4),(5,5),(6,6),(7,7),(8,8),(9,9),(10,10);
+connection con1;
+BEGIN;
+SELECT * FROM t1 WHERE id=3;
+id value
+3 3
+connection default;
+BEGIN;
+UPDATE t1 SET value=30 WHERE id=3;
+COMMIT;
+connection con1;
+SELECT * FROM t1 WHERE id=3 FOR UPDATE;
+ERROR 40001: Deadlock found when trying to get lock; try restarting transaction (snapshot conflict)
+ROLLBACK;
+disconnect con1;
+connection default;
+drop table t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/rocksdb_parts.result b/storage/rocksdb/mysql-test/rocksdb/r/rocksdb_parts.result
new file mode 100644
index 00000000000..2fde11c2a08
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/rocksdb_parts.result
@@ -0,0 +1,151 @@
+drop table if exists t1,t2;
+# Tests for MyRocks + partitioning
+#
+# MyRocks Issue #70: Server crashes in Rdb_key_def::get_primary_key_tuple
+#
+CREATE TABLE t1 (pk INT PRIMARY KEY, f1 INT, f2 INT, KEY(f2)) ENGINE=RocksDB
+PARTITION BY HASH(pk) PARTITIONS 2;
+INSERT INTO t1 VALUES (1, 6, NULL), (2, NULL, 1);
+CREATE TABLE t2 (pk INT PRIMARY KEY, f1 INT) ENGINE=RocksDB;
+INSERT INTO t2 VALUES (1, 1), (2, 1);
+SELECT f1 FROM t1 WHERE f2 = ( SELECT f1 FROM t2 WHERE pk = 2 );
+f1
+NULL
+drop table t1,t2;
+#
+# Issue#105: key_info[secondary_key].actual_key_parts does not include primary key on partitioned tables
+#
+CREATE TABLE t1 (
+id INT PRIMARY KEY,
+a set ('a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','x','y','z') CHARACTER SET utf8,
+b set ('a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','x','y','z') CHARACTER SET utf8 default null,
+c set ('a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','x','y','z') CHARACTER SET utf8 not null,
+INDEX (a),
+INDEX (b),
+INDEX (c)
+) ENGINE=RocksDB PARTITION BY key (id) partitions 2;
+INSERT INTO t1 (id, b) VALUES (28, 3);
+Warnings:
+Warning 1364 Field 'c' doesn't have a default value
+UPDATE t1 SET id=8 WHERE c < 8 LIMIT 1;
+check table t1;
+Table Op Msg_type Msg_text
+test.t1 check status OK
+drop table t1;
+#
+# Issue #105, another testcase
+#
+create table t1 (
+pk int primary key,
+col1 int,
+col2 int,
+key (col1) comment 'rev:cf_issue105'
+) engine=rocksdb partition by hash(pk) partitions 2;
+insert into t1 values (1,10,10);
+insert into t1 values (2,10,10);
+insert into t1 values (11,20,20);
+insert into t1 values (12,20,20);
+set @tmp_rfirr= @@rocksdb_force_index_records_in_range;
+set rocksdb_force_index_records_in_range= 12;
+explain select * from t1 force index(col1) where col1=10;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 ref col1 col1 5 const #
+select * from t1 force index(col1) where col1=10;
+pk col1 col2
+1 10 10
+2 10 10
+select * from t1 use index () where col1=10;
+pk col1 col2
+2 10 10
+1 10 10
+set rocksdb_force_index_records_in_range= @tmp_rfirr;
+drop table t1;
+#
+# Issue #108: Index-only scans do not work for partitioned tables and extended keys
+#
+create table t1 (
+pk int primary key,
+col1 int,
+col2 int,
+key (col1)
+) engine=rocksdb partition by hash(pk) partitions 2;
+insert into t1 values (1,10,10);
+insert into t1 values (2,10,10);
+insert into t1 values (11,20,20);
+insert into t1 values (12,20,20);
+# The following must use "Using index"
+explain select pk from t1 force index(col1) where col1=10;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 ref col1 col1 5 const 2000 Using index
+drop table t1;
+#
+# Issue #214: subqueries cause crash
+#
+create TABLE t1(a int,b int,c int,primary key(a,b))
+partition by list (b*a) (partition x1 values in (1) tablespace ts1,
+partition x2 values in (3,11,5,7) tablespace ts2,
+partition x3 values in (16,8,5+19,70-43) tablespace ts3);
+create table t2(b binary(2));
+set session optimizer_switch='materialization=off';
+insert into t1(a,b) values(1,7);
+select a from t1 where a in (select a from t1 where a in (select b from t2));
+a
+drop table t1, t2;
+#
+# Issue #260: altering name to invalid value leaves table unaccessible
+#
+CREATE TABLE t1 (c1 INT NOT NULL, c2 CHAR(5)) PARTITION BY HASH(c1) PARTITIONS 4;
+INSERT INTO t1 VALUES(1,'a');
+RENAME TABLE t1 TO db3.t3;
+ERROR HY000: Error on rename of './test/t1' to './db3/t3' (errno: -1 "Internal error < 0 (Not system error)")
+SELECT * FROM t1;
+c1 c2
+1 a
+SHOW TABLES;
+Tables_in_test
+t1
+RENAME TABLE t1 TO test.t3;
+SELECT * FROM t3;
+c1 c2
+1 a
+SHOW TABLES;
+Tables_in_test
+t3
+CREATE DATABASE db3;
+USE test;
+RENAME TABLE t3 to db3.t2;
+USE db3;
+SELECT * FROM t2;
+c1 c2
+1 a
+SHOW TABLES;
+Tables_in_db3
+t2
+DROP TABLE t2;
+use test;
+DROP DATABASE db3;
+#
+# MDEV-13153 Assertion `global_status_var.global_memory_used == 0 '
+# failed upon server restart with partitioned RocksDB table
+#
+CREATE TABLE t1 (a INT) ENGINE=RocksDB PARTITION BY HASH(a) PARTITIONS 2;
+INSERT INTO t1 (a) VALUES (1),(2);
+ALTER TABLE t1 ADD PARTITION PARTITIONS 2;
+SELECT 1;
+1
+1
+DROP TABLE t1;
+#
+# MDEV-15245: Assertion `false' failed in myrocks::ha_rocksdb::position
+#
+CREATE TABLE t1 (a INT) ENGINE=RocksDB;
+INSERT INTO t1 VALUES (1),(2);
+CREATE TABLE t2 (b INT) ENGINE=RocksDB
+PARTITION BY RANGE(b)
+(PARTITION p0 VALUES LESS THAN (1),
+PARTITION p1 VALUES LESS THAN MAXVALUE);
+INSERT INTO t2 VALUES (NULL),(0);
+CREATE ALGORITHM = MERGE VIEW v1 AS SELECT t2.* FROM t1, t2;
+UPDATE v1 SET b = 5;
+drop view v1;
+drop table t1,t2;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/rocksdb_qcache.result b/storage/rocksdb/mysql-test/rocksdb/r/rocksdb_qcache.result
new file mode 100644
index 00000000000..2847921716c
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/rocksdb_qcache.result
@@ -0,0 +1,45 @@
+set @save_query_cache_type=@@global.query_cache_type;
+set @save_query_cache_size=@@global.query_cache_size;
+set @@global.query_cache_type=1;
+set @@global.query_cache_size=1024*1024;
+create table t1 (pk int primary key, c char(8)) engine=RocksDB;
+insert into t1 values (1,'new'),(2,'new');
+select * from t1;
+pk c
+1 new
+2 new
+connect con1,localhost,root,,;
+update t1 set c = 'updated';
+connection default;
+flush status;
+show status like 'Qcache_hits';
+Variable_name Value
+Qcache_hits 0
+show global status like 'Qcache_hits';
+Variable_name Value
+Qcache_hits 0
+select * from t1;
+pk c
+1 updated
+2 updated
+select sql_no_cache * from t1;
+pk c
+1 updated
+2 updated
+select * from t1 where pk = 1;
+pk c
+1 updated
+show status like 'Qcache_hits';
+Variable_name Value
+Qcache_hits 0
+# MariaDB: Qcache_not_cached is not incremented for select sql_no_cache queries
+# so the following query produces 2, not 3:
+show status like 'Qcache_not_cached';
+Variable_name Value
+Qcache_not_cached 2
+show global status like 'Qcache_hits';
+Variable_name Value
+Qcache_hits 0
+drop table t1;
+set @@global.query_cache_type=@save_query_cache_type;
+set @@global.query_cache_size=@save_query_cache_size;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/rocksdb_range.result b/storage/rocksdb/mysql-test/rocksdb/r/rocksdb_range.result
new file mode 100644
index 00000000000..918859ea036
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/rocksdb_range.result
@@ -0,0 +1,293 @@
+select * from information_schema.engines where engine = 'rocksdb';
+ENGINE SUPPORT COMMENT TRANSACTIONS XA SAVEPOINTS
+ROCKSDB DEFAULT RocksDB storage engine YES YES YES
+drop table if exists t0,t1,t2,t3,t4,t5;
+create table t0 (a int) engine=myisam;
+insert into t0 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
+create table t1(a int) engine=myisam;
+insert into t1 select A.a + B.a* 10 + C.a * 100 from t0 A, t0 B, t0 C;
+create table t2 (
+pk int not null,
+a int not null,
+b int not null,
+primary key(pk),
+key(a) comment 'rev:cf1'
+) engine=rocksdb;
+insert into t2 select A.a, FLOOR(A.a/10), A.a from t1 A;
+#
+# HA_READ_KEY_EXACT tests
+#
+# Original failure was here:
+explain
+select * from t2 force index (a) where a=0;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 ref a a 4 const #
+select * from t2 force index (a) where a=0;
+pk a b
+0 0 0
+1 0 1
+2 0 2
+3 0 3
+4 0 4
+5 0 5
+6 0 6
+7 0 7
+8 0 8
+9 0 9
+# The rest are for code coverage:
+explain
+select * from t2 force index (a) where a=2;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 ref a a 4 const #
+select * from t2 force index (a) where a=2;
+pk a b
+20 2 20
+21 2 21
+22 2 22
+23 2 23
+24 2 24
+25 2 25
+26 2 26
+27 2 27
+28 2 28
+29 2 29
+explain
+select * from t2 force index (a) where a=3 and pk=33;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 const a a 8 const,const #
+select * from t2 force index (a) where a=3 and pk=33;
+pk a b
+33 3 33
+select * from t2 force index (a) where a=99 and pk=99;
+pk a b
+select * from t2 force index (a) where a=0 and pk=0;
+pk a b
+0 0 0
+select * from t2 force index (a) where a=-1;
+pk a b
+select * from t2 force index (a) where a=-1 and pk in (101,102);
+pk a b
+select * from t2 force index (a) where a=100 and pk in (101,102);
+pk a b
+#
+# #36: Range in form tbl.key >= const doesn't work in reverse column family
+#
+explain
+select count(*) from t2 force index (a) where a>=0 and a <=1;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 range a a 4 NULL # Using where; Using index
+select count(*) from t2 force index (a) where a>=0 and a <=1;
+count(*)
+20
+explain
+select count(*) from t2 force index (a) where a>=-1 and a <=1;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 range a a 4 NULL # Using where; Using index
+select count(*) from t2 force index (a) where a>=-1 and a <=1;
+count(*)
+20
+explain
+select * from t2 force index (a) where a=0 and pk>=3;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 range a a 8 NULL # Using index condition
+select * from t2 force index (a) where a=0 and pk>=3;
+pk a b
+3 0 3
+4 0 4
+5 0 5
+6 0 6
+7 0 7
+8 0 8
+9 0 9
+# Try edge cases where we fall over the end of the table
+create table t3 like t2;
+insert into t3 select * from t2;
+select * from t3 where pk>=1000000;
+pk a b
+select * from t2 where pk>=1000000;
+pk a b
+#
+# #42: Range in form tbl.key > const doesn't work in reverse column family
+#
+explain
+select count(*) from t2 force index (a) where a>0;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 range a a 4 NULL # Using where; Using index
+select count(*) from t2 force index (a) where a>0;
+count(*)
+990
+explain
+select count(*) from t2 force index (a) where a>99;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 range a a 4 NULL # Using where; Using index
+select count(*) from t2 force index (a) where a>99;
+count(*)
+0
+select * from t2 where pk>1000000;
+pk a b
+select * from t3 where pk>1000000;
+pk a b
+explain
+select count(*) from t2 force index (a) where a=2 and pk>25;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 range a a 8 NULL # Using where; Using index
+select count(*) from t2 force index (a) where a=2 and pk>25;
+count(*)
+4
+select * from t2 force index (a) where a>-10 and a < 1;
+pk a b
+0 0 0
+1 0 1
+2 0 2
+3 0 3
+4 0 4
+5 0 5
+6 0 6
+7 0 7
+8 0 8
+9 0 9
+select * from t3 force index (a) where a>-10 and a < 1;
+pk a b
+0 0 0
+1 0 1
+2 0 2
+3 0 3
+4 0 4
+5 0 5
+6 0 6
+7 0 7
+8 0 8
+9 0 9
+#
+# #46: index_read_map(HA_READ_BEFORE_KEY) does not work in reverse column family
+#
+select max(a) from t2 where a < 2;
+max(a)
+1
+select max(a) from t2 where a < -1;
+max(a)
+NULL
+select max(pk) from t2 where a=3 and pk < 6;
+max(pk)
+NULL
+select max(pk) from t2 where pk < 200000;
+max(pk)
+999
+select max(pk) from t2 where pk < 20;
+max(pk)
+19
+select max(a) from t3 where a < 2;
+max(a)
+1
+select max(a) from t3 where a < -1;
+max(a)
+NULL
+select max(pk) from t3 where pk < 200000;
+max(pk)
+999
+select max(pk) from t3 where pk < 20;
+max(pk)
+19
+select max(pk) from t2 where a=3 and pk < 33;
+max(pk)
+32
+select max(pk) from t3 where a=3 and pk < 33;
+max(pk)
+32
+#
+# #48: index_read_map(HA_READ_PREFIX_LAST) does not work in reverse CF
+#
+# Tests for search_flag=HA_READ_PREFIX_LAST_OR_PREV
+# Note: the next explain has "Using index condition" in fb/mysql-5.6
+# but "Using where" in MariaDB because the latter does not
+# support ICP over reverse scans.
+explain
+select * from t2 where a between 99 and 2000 order by a desc;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 range a a 4 NULL # Using where
+select * from t2 where a between 99 and 2000 order by a desc;
+pk a b
+999 99 999
+998 99 998
+997 99 997
+996 99 996
+995 99 995
+994 99 994
+993 99 993
+992 99 992
+991 99 991
+990 99 990
+select max(a) from t2 where a <=10;
+max(a)
+10
+select max(a) from t2 where a <=-4;
+max(a)
+NULL
+select max(pk) from t2 where a=5 and pk <=55;
+max(pk)
+55
+select max(pk) from t2 where a=5 and pk <=55555;
+max(pk)
+59
+select max(pk) from t2 where a=5 and pk <=0;
+max(pk)
+NULL
+select max(pk) from t2 where pk <=-1;
+max(pk)
+NULL
+select max(pk) from t2 where pk <=999999;
+max(pk)
+999
+select max(pk) from t3 where pk <=-1;
+max(pk)
+NULL
+select max(pk) from t3 where pk <=999999;
+max(pk)
+999
+#
+# Tests for search_flag=HA_READ_PREFIX_LAST
+#
+create table t4 (
+pk int primary key,
+a int,
+b int,
+c int,
+key(a,b,c)
+) engine=rocksdb;
+insert into t4 select pk,pk,pk,pk from t2 where pk < 100;
+explain
+select * from t4 where a=1 and b in (1) order by c desc;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t4 ref a a 10 const,const # Using where; Using index
+select * from t4 where a=1 and b in (1) order by c desc;
+pk a b c
+1 1 1 1
+explain
+select * from t4 where a=5 and b in (4) order by c desc;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t4 ref a a 10 const,const # Using where; Using index
+select * from t4 where a=5 and b in (4) order by c desc;
+pk a b c
+# HA_READ_PREFIX_LAST for reverse-ordered CF
+create table t5 (
+pk int primary key,
+a int,
+b int,
+c int,
+key(a,b,c) comment 'rev:cf2'
+) engine=rocksdb;
+insert into t5 select pk,pk,pk,pk from t2 where pk < 100;
+explain
+select * from t5 where a=1 and b in (1) order by c desc;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t5 ref a a 10 const,const # Using where; Using index
+select * from t5 where a=1 and b in (1) order by c desc;
+pk a b c
+1 1 1 1
+explain
+select * from t5 where a=5 and b in (4) order by c desc;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t5 ref a a 10 const,const # Using where; Using index
+select * from t5 where a=5 and b in (4) order by c desc;
+pk a b c
+drop table t0,t1,t2,t3,t4,t5;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/rocksdb_range2.result b/storage/rocksdb/mysql-test/rocksdb/r/rocksdb_range2.result
new file mode 100644
index 00000000000..a925c21e188
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/rocksdb_range2.result
@@ -0,0 +1,28 @@
+create table t1 (id1 bigint, id2 bigint, c1 bigint, c2 bigint, c3 bigint, c4 bigint, c5 bigint, c6 bigint, c7 bigint, primary key (id1, id2), index i(c1, c2));
+analyze table t1;
+Table Op Msg_type Msg_text
+test.t1 analyze status OK
+select count(*) from t1;
+count(*)
+10000
+explain select c1 from t1 where c1 > 5 limit 10;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range i i 9 NULL # Using where; Using index
+drop table t1;
+#
+# MDEV-17414: MyROCKS order desc limit 1 fails
+#
+create table t1 (date date);
+insert into t1 values ('2018-10-04'), ('2018-10-05');
+select * from t1 where date < '2018-10-09' order by date desc limit 1;
+date
+2018-10-05
+alter table t1 add index date_index (date);
+select * from t1 where date < '2018-10-05' order by date desc limit 1;
+date
+2018-10-04
+# this should not produce an empty set:
+select * from t1 where date < '2018-10-09' order by date desc limit 1;
+date
+2018-10-05
+drop table t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/rocksdb_read_free_rpl.result b/storage/rocksdb/mysql-test/rocksdb/r/rocksdb_read_free_rpl.result
new file mode 100644
index 00000000000..fabf077e27a
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/rocksdb_read_free_rpl.result
@@ -0,0 +1,335 @@
+include/master-slave.inc
+Warnings:
+Note #### Sending passwords in plain text without SSL/TLS is extremely insecure.
+Note #### Storing MySQL user name or password information in the master info repository is not secure and is therefore not recommended. Please consider using the USER and PASSWORD connection options for START SLAVE; see the 'START SLAVE Syntax' in the MySQL Manual for more information.
+[connection master]
+drop table if exists t1;
+create table t1 (id int primary key, value int);
+insert into t1 values (1,1), (2,2), (3,3), (4,4);
+include/sync_slave_sql_with_master.inc
+
+# regular update/delete. With rocks_read_free_rpl=PK_SK, rocksdb_rows_read does not increase on slaves
+
+select variable_value into @up from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+update t1 set value=value+1 where id=1;
+delete from t1 where id=4;
+select * from t1;
+id value
+1 2
+2 2
+3 3
+include/sync_slave_sql_with_master.inc
+select case when variable_value-@up > 0 then 'false' else 'true' end as read_free from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+read_free
+true
+select * from t1;
+id value
+1 2
+2 2
+3 3
+
+# "rocks_read_free_rpl=PK_SK" makes "row not found error" not happen anymore
+
+include/stop_slave.inc
+delete from t1 where id in (2, 3);
+include/start_slave.inc
+select variable_value into @up from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+update t1 set value=value+1 where id=3;
+delete from t1 where id=2;
+select * from t1;
+id value
+1 2
+3 4
+include/sync_slave_sql_with_master.inc
+select case when variable_value-@up > 0 then 'false' else 'true' end as read_free from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+read_free
+true
+select * from t1;
+id value
+1 2
+3 4
+
+## tables without primary key -- read free replication should be disabled
+
+
+#no index
+
+drop table t1;
+create table t1 (c1 int, c2 int);
+insert into t1 values (1,1), (2,2),(3,3),(4,4),(5,5);
+include/sync_slave_sql_with_master.inc
+select variable_value into @up from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+update t1 set c2=100 where c1=3;
+delete from t1 where c1 <= 2;
+include/sync_slave_sql_with_master.inc
+select case when variable_value-@up > 0 then 'false' else 'true' end as read_free from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+read_free
+false
+select * from t1;
+c1 c2
+3 100
+4 4
+5 5
+
+#secondary index only
+
+drop table t1;
+create table t1 (c1 int, c2 int, index i(c1));
+insert into t1 values (1,1), (2,2),(3,3),(4,4),(5,5);
+include/sync_slave_sql_with_master.inc
+select variable_value into @up from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+update t1 set c2=100 where c1=3;
+delete from t1 where c1 <= 2;
+include/sync_slave_sql_with_master.inc
+select case when variable_value-@up > 0 then 'false' else 'true' end as read_free from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+read_free
+false
+select * from t1;
+c1 c2
+3 100
+4 4
+5 5
+
+## large row operations -- primary key modification, secondary key modification
+
+drop table t1;
+create table t1 (id1 bigint, id2 bigint, c1 bigint, c2 bigint, c3 bigint, c4 bigint, c5 bigint, c6 bigint, c7 bigint, primary key (id1, id2), index i(c1, c2));
+include/sync_slave_sql_with_master.inc
+select variable_value into @up from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+
+#updating all secondary keys by 1
+
+include/sync_slave_sql_with_master.inc
+select case when variable_value-@up > 0 then 'false' else 'true' end as read_free from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+read_free
+true
+include/diff_tables.inc [master:t1, slave:t1]
+
+#updating all primary keys by 2
+
+select variable_value into @up from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+include/sync_slave_sql_with_master.inc
+select case when variable_value-@up > 0 then 'false' else 'true' end as read_free from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+read_free
+true
+include/diff_tables.inc [master:t1, slave:t1]
+
+#updating secondary keys after truncating t1 on slave
+
+truncate table t1;
+select variable_value into @up from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+update t1 set c2=c2+10;
+include/sync_slave_sql_with_master.inc
+select case when variable_value-@up > 0 then 'false' else 'true' end as read_free from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+read_free
+true
+include/diff_tables.inc [master:t1, slave:t1]
+
+#updating primary keys after truncating t1 on slave
+
+truncate table t1;
+select variable_value into @up from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+update t1 set id2=id2+10;
+include/sync_slave_sql_with_master.inc
+select case when variable_value-@up > 0 then 'false' else 'true' end as read_free from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+read_free
+true
+include/diff_tables.inc [master:t1, slave:t1]
+
+#deleting half rows
+
+select variable_value into @up from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+delete from t1 where id1 <= 5000;
+include/sync_slave_sql_with_master.inc
+select case when variable_value-@up > 0 then 'false' else 'true' end as read_free from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+read_free
+true
+include/diff_tables.inc [master:t1, slave:t1]
+
+# rocksdb_read_free_rpl = PK_ONLY i.e. it only works on tables with only PK
+
+[on slave]
+stop slave;
+set @@global.rocksdb_read_free_rpl = PK_ONLY;
+start slave;
+[on master]
+create table t2 (id int primary key, i1 int, i2 int, value int);
+create table u2 (id int primary key, i1 int, i2 int, value int, index(i1), index(i2));
+insert into t2 values (1,1,1,1),(2,2,2,2),(3,3,3,3);
+insert into u2 values (1,1,1,1),(2,2,2,2),(3,3,3,3);
+include/sync_slave_sql_with_master.inc
+[on slave]
+delete from t2 where id <= 2;
+delete from u2 where id <= 2;
+[on master]
+update t2 set i2=100, value=100 where id=1;
+update u2 set i2=100, value=100 where id=1;
+[on slave]
+call mtr.add_suppression("Slave SQL.*Could not execute Update_rows event on table test.u2.*Error_code.*");
+call mtr.add_suppression("Slave: Can't find record in 'u2'.*");
+include/wait_for_slave_sql_error.inc [errno=1032]
+select count(*) from t2 force index(primary);
+count(*)
+2
+select * from t2 where id=1;
+id i1 i2 value
+1 1 100 100
+select i1 from t2 where i1=1;
+i1
+1
+select i2 from t2 where i2=100;
+i2
+100
+select count(*) from u2 force index(primary);
+count(*)
+1
+select count(*) from u2 force index(i1);
+count(*)
+1
+select count(*) from u2 force index(i2);
+count(*)
+1
+select * from u2 where id=1;
+id i1 i2 value
+select i1 from u2 where i1=1;
+i1
+select i2 from u2 where i2=100;
+i2
+include/wait_for_slave_sql_to_start.inc
+[on slave]
+stop slave;
+set @@global.rocksdb_read_free_rpl = PK_SK;
+start slave;
+
+# some tables with read-free replication on and some with it off
+
+[on slave]
+stop slave;
+set @@global.rocksdb_read_free_rpl_tables = "t.*";
+start slave;
+[on master]
+drop table if exists t2;
+drop table if exists u2;
+create table t2 (id int primary key, i1 int, i2 int, value int);
+create table u2 (id int primary key, i1 int, i2 int, value int);
+insert into t2 values (1,1,1,1),(2,2,2,2),(3,3,3,3);
+insert into u2 values (1,1,1,1),(2,2,2,2),(3,3,3,3);
+include/sync_slave_sql_with_master.inc
+[on slave]
+delete from t2 where id <= 2;
+delete from u2 where id <= 2;
+[on master]
+update t2 set i2=100, value=100 where id=1;
+update u2 set i2=100, value=100 where id=1;
+[on slave]
+call mtr.add_suppression("Slave SQL.*Could not execute Update_rows event on table test.u2.*Error_code.*");
+call mtr.add_suppression("Slave: Can't find record in 'u2'.*");
+include/wait_for_slave_sql_error.inc [errno=1032]
+select count(*) from t2 force index(primary);
+count(*)
+2
+select * from t2 where id=1;
+id i1 i2 value
+1 1 100 100
+select i1 from t2 where i1=1;
+i1
+1
+select i2 from t2 where i2=100;
+i2
+100
+select count(*) from u2 force index(primary);
+count(*)
+1
+select * from u2 where id=1;
+id i1 i2 value
+select i1 from u2 where i1=1;
+i1
+select i2 from u2 where i2=100;
+i2
+include/wait_for_slave_sql_to_start.inc
+[on slave]
+stop slave;
+set @@global.rocksdb_read_free_rpl_tables = ".*";
+start slave;
+
+# secondary keys lose rows
+
+[on master]
+create table t3 (id int primary key, i1 int, i2 int, value int, index(i1),
+index(i2));
+insert into t3 values (1,1,1,1),(2,2,2,2),(3,3,3,3);
+include/sync_slave_sql_with_master.inc
+[on slave]
+delete from t3 where id <= 2;
+[on master]
+update t3 set i2=100, value=100 where id=1;
+include/sync_slave_sql_with_master.inc
+select count(*) from t3 force index(primary);
+count(*)
+2
+select count(*) from t3 force index(i1);
+count(*)
+1
+select count(*) from t3 force index(i2);
+count(*)
+2
+select * from t3 where id=1;
+id i1 i2 value
+1 1 100 100
+select i1 from t3 where i1=1;
+i1
+select i2 from t3 where i2=100;
+i2
+100
+
+# secondary keys have extra rows
+
+[on master]
+create table t4 (id int primary key, i1 int, i2 int, value int, index(i1), index(i2));
+insert into t4 values (1,1,1,1),(2,2,2,2),(3,3,3,3);
+include/sync_slave_sql_with_master.inc
+[on slave]
+update t4 set i1=100 where id=1;
+[on master]
+delete from t4 where id=1;
+include/sync_slave_sql_with_master.inc
+[on slave]
+select count(*) from t4 force index(primary);
+count(*)
+2
+select count(*) from t4 force index(i1);
+count(*)
+3
+select count(*) from t4 force index(i2);
+count(*)
+2
+select i1 from t4 where i1=100;
+i1
+100
+
+# inserts are also read-free
+
+[on master]
+drop table if exists t2;
+drop table if exists t3;
+create table t2 (id int primary key, i1 int, i2 int);
+create table t3 (id int primary key, i1 int, i2 int, key(i1));
+select variable_value into @up from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+insert into t2 values(1, 1, 1);
+insert into t2 values(2, 2, 2);
+insert into t3 values(1, 1, 1);
+insert into t3 values(2, 2, 2);
+include/sync_slave_sql_with_master.inc
+select case when variable_value-@up > 0 then 'false' else 'true' end as read_free from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+read_free
+true
+select * from t2;
+id i1 i2
+1 1 1
+2 2 2
+select * from t3;
+id i1 i2
+1 1 1
+2 2 2
+drop table t1, t2, t3, t4, u2;
+include/rpl_end.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/rocksdb_read_free_rpl_stress.result b/storage/rocksdb/mysql-test/rocksdb/r/rocksdb_read_free_rpl_stress.result
new file mode 100644
index 00000000000..9e3c7a0582b
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/rocksdb_read_free_rpl_stress.result
@@ -0,0 +1,35 @@
+include/master-slave.inc
+Warnings:
+Note #### Sending passwords in plain text without SSL/TLS is extremely insecure.
+Note #### Storing MySQL user name or password information in the master info repository is not secure and is therefore not recommended. Please consider using the USER and PASSWORD connection options for START SLAVE; see the 'START SLAVE Syntax' in the MySQL Manual for more information.
+[connection master]
+include/stop_slave.inc
+set @@global.rocksdb_read_free_rpl = PK_SK;
+include/start_slave.inc
+include/sync_slave_sql_with_master.inc
+include/diff_tables.inc [master:t1, slave:t1]
+include/diff_tables.inc [master:t2, slave:t2]
+include/diff_tables.inc [master:t3, slave:t3]
+include/diff_tables.inc [master:t4, slave:t4]
+include/diff_tables.inc [master:t5, slave:t5]
+include/diff_tables.inc [master:t6, slave:t6]
+include/diff_tables.inc [master:t7, slave:t7]
+include/diff_tables.inc [master:t8, slave:t8]
+include/sync_slave_sql_with_master.inc
+include/stop_slave.inc
+set @@global.rocksdb_read_free_rpl = PK_ONLY;
+include/start_slave.inc
+include/sync_slave_sql_with_master.inc
+include/diff_tables.inc [master:t1, slave:t1]
+include/diff_tables.inc [master:t2, slave:t2]
+include/diff_tables.inc [master:t3, slave:t3]
+include/diff_tables.inc [master:t4, slave:t4]
+include/diff_tables.inc [master:t5, slave:t5]
+include/diff_tables.inc [master:t6, slave:t6]
+include/diff_tables.inc [master:t7, slave:t7]
+include/diff_tables.inc [master:t8, slave:t8]
+include/sync_slave_sql_with_master.inc
+include/stop_slave.inc
+set @@global.rocksdb_read_free_rpl = default;
+include/start_slave.inc
+include/rpl_end.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/rocksdb_row_stats.result b/storage/rocksdb/mysql-test/rocksdb/r/rocksdb_row_stats.result
new file mode 100644
index 00000000000..8c02de98c90
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/rocksdb_row_stats.result
@@ -0,0 +1,66 @@
+create table t1 (a int primary key) engine=rocksdb;
+Verify rocksdb_rows_inserted
+select variable_value into @old_rows_inserted from information_schema.global_status where variable_name = 'rocksdb_rows_inserted';
+insert into t1 values(1);
+select variable_value into @new_rows_inserted from information_schema.global_status where variable_name = 'rocksdb_rows_inserted';
+select @new_rows_inserted - @old_rows_inserted;
+@new_rows_inserted - @old_rows_inserted
+1
+Verify rocksdb_rows_updated
+select variable_value into @old_rows_updated from information_schema.global_status where variable_name = 'rocksdb_rows_updated';
+update t1 set a=2 where a=1;
+select variable_value into @new_rows_updated from information_schema.global_status where variable_name = 'rocksdb_rows_updated';
+select @new_rows_updated - @old_rows_updated;
+@new_rows_updated - @old_rows_updated
+1
+Verify rocksdb_rows_read
+select variable_value into @old_rows_read from information_schema.global_status where variable_name = 'rocksdb_rows_read';
+select * from t1;
+a
+2
+select variable_value into @new_rows_read from information_schema.global_status where variable_name = 'rocksdb_rows_read';
+select @new_rows_read - @old_rows_read;
+@new_rows_read - @old_rows_read
+1
+Verify rocksdb_rows_deleted
+select variable_value into @old_rows_deleted from information_schema.global_status where variable_name = 'rocksdb_rows_deleted';
+delete from t1;
+select variable_value into @new_rows_deleted from information_schema.global_status where variable_name = 'rocksdb_rows_deleted';
+select @new_rows_deleted - @old_rows_deleted;
+@new_rows_deleted - @old_rows_deleted
+1
+use mysql;
+create table t1(a int primary key) engine=rocksdb;
+Verify rocksdb_system_rows_inserted
+select variable_value into @old_system_rows_inserted from information_schema.global_status where variable_name = 'rocksdb_system_rows_inserted';
+insert into t1 values(1);
+select variable_value into @new_system_rows_inserted from information_schema.global_status where variable_name = 'rocksdb_system_rows_inserted';
+select @new_system_rows_inserted - @old_system_rows_inserted;
+@new_system_rows_inserted - @old_system_rows_inserted
+1
+Verify rocksdb_system_rows_updated
+select variable_value into @old_system_rows_updated from information_schema.global_status where variable_name = 'rocksdb_system_rows_updated';
+update t1 set a=2 where a=1;
+select variable_value into @new_system_rows_updated from information_schema.global_status where variable_name = 'rocksdb_system_rows_updated';
+select @new_system_rows_updated - @old_system_rows_updated;
+@new_system_rows_updated - @old_system_rows_updated
+1
+Verify rocksdb_system_rows_read
+select variable_value into @old_system_rows_read from information_schema.global_status where variable_name = 'rocksdb_system_rows_read';
+select * from t1;
+a
+2
+select variable_value into @new_system_rows_read from information_schema.global_status where variable_name = 'rocksdb_system_rows_read';
+select @new_system_rows_read - @old_system_rows_read;
+@new_system_rows_read - @old_system_rows_read
+1
+Verify rocksdb_system_rows_deleted
+select variable_value into @old_system_rows_deleted from information_schema.global_status where variable_name = 'rocksdb_system_rows_deleted';
+delete from t1;
+select variable_value into @new_system_rows_deleted from information_schema.global_status where variable_name = 'rocksdb_system_rows_deleted';
+select @new_system_rows_deleted - @old_system_rows_deleted;
+@new_system_rows_deleted - @old_system_rows_deleted
+1
+drop table t1;
+use test;
+drop table t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/rocksdb_table_stats_sampling_pct_change.result b/storage/rocksdb/mysql-test/rocksdb/r/rocksdb_table_stats_sampling_pct_change.result
new file mode 100644
index 00000000000..1e8aa5787a6
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/rocksdb_table_stats_sampling_pct_change.result
@@ -0,0 +1,23 @@
+drop table if exists t1;
+SET @ORIG_PCT = @@ROCKSDB_TABLE_STATS_SAMPLING_PCT;
+SET @@global.ROCKSDB_TABLE_STATS_SAMPLING_PCT = 100;
+create table t1 (pk int primary key) engine=rocksdb;
+set global rocksdb_force_flush_memtable_now = true;
+select table_rows from information_schema.tables
+where table_schema = database() and table_name = 't1';
+table_rows
+10000
+drop table t1;
+drop table if exists t2;
+SET @@global.ROCKSDB_TABLE_STATS_SAMPLING_PCT = 10;
+create table t2 (pk int primary key) engine=rocksdb;
+set global rocksdb_force_flush_memtable_now = true;
+select table_rows from information_schema.tables
+where table_schema = database() and table_name = 't2';
+table_rows
+10000
+select table_name from information_schema.tables where table_schema = database() and table_name = 't2';
+table_name
+t2
+drop table t2;
+SET GLOBAL ROCKSDB_TABLE_STATS_SAMPLING_PCT = @ORIG_PCT;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/rocksdb_timeout_rollback.result b/storage/rocksdb/mysql-test/rocksdb/r/rocksdb_timeout_rollback.result
new file mode 100644
index 00000000000..adf05d06aac
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/rocksdb_timeout_rollback.result
@@ -0,0 +1,84 @@
+drop table if exists t1;
+SET @@global.rocksdb_rollback_on_timeout = 1;
+show variables like 'rocksdb_rollback_on_timeout';
+Variable_name Value
+rocksdb_rollback_on_timeout ON
+create table t1 (a int unsigned not null primary key) engine = rocksdb;
+insert into t1 values (1);
+commit;
+connect con1,localhost,root,,;
+connect con2,localhost,root,,;
+connection con2;
+begin work;
+insert into t1 values (5);
+insert into t1 values (6);
+update t1 set a = a + 1 where a = 1;
+connection con1;
+begin work;
+insert into t1 values (7);
+insert into t1 values (8);
+update t1 set a = a + 1 where a = 1;
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction
+select * from t1;
+a
+1
+commit;
+connection con2;
+select * from t1;
+a
+2
+5
+6
+commit;
+connection default;
+select * from t1;
+a
+2
+5
+6
+SET @@global.rocksdb_rollback_on_timeout = 0;
+show variables like 'rocksdb_rollback_on_timeout';
+Variable_name Value
+rocksdb_rollback_on_timeout OFF
+connection con2;
+begin work;
+insert into t1 values (9);
+insert into t1 values (10);
+update t1 set a = a + 1 where a = 2;
+connection con1;
+begin work;
+insert into t1 values (11);
+insert into t1 values (12);
+update t1 set a = a + 1 where a = 2;
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction
+select * from t1;
+a
+2
+5
+6
+11
+12
+commit;
+connection con2;
+select * from t1;
+a
+3
+5
+6
+9
+10
+commit;
+connection default;
+select * from t1;
+a
+3
+5
+6
+9
+10
+11
+12
+SET @@global.rocksdb_rollback_on_timeout = DEFAULT;
+drop table t1;
+disconnect con1;
+disconnect con2;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/rollback_savepoint.result b/storage/rocksdb/mysql-test/rocksdb/r/rollback_savepoint.result
new file mode 100644
index 00000000000..18b76818e6a
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/rollback_savepoint.result
@@ -0,0 +1,29 @@
+DROP TABLE IF EXISTS t1, t2;
+CREATE TABLE t1 (a INT, b CHAR(8), pk INT AUTO_INCREMENT PRIMARY KEY) ENGINE=rocksdb;
+INSERT INTO t1 (a,b) VALUES (1,'a'),(2,'b'),(3,'a');
+CREATE TABLE t2 LIKE t1;
+INSERT INTO t2 SELECT * FROM t1;
+connect con1,localhost,root,,;
+connect con2,localhost,root,,;
+connection con1;
+START TRANSACTION WITH CONSISTENT SNAPSHOT;
+SAVEPOINT a;
+SELECT * FROM t1 ORDER BY pk;
+a b pk
+1 a 1
+2 b 2
+3 a 3
+ROLLBACK TO SAVEPOINT a;
+SAVEPOINT a;
+SELECT * FROM t2 ORDER BY pk;
+a b pk
+1 a 1
+2 b 2
+3 a 3
+ROLLBACK TO SAVEPOINT a;
+connection con2;
+ALTER TABLE t1 RENAME TO t3;
+connection default;
+DROP TABLE t2, t3;
+disconnect con1;
+disconnect con2;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/rpl_row_not_found.result b/storage/rocksdb/mysql-test/rocksdb/r/rpl_row_not_found.result
new file mode 100644
index 00000000000..8cdfa910739
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/rpl_row_not_found.result
@@ -0,0 +1,56 @@
+include/master-slave.inc
+Warnings:
+Note #### Sending passwords in plain text without SSL/TLS is extremely insecure.
+Note #### Storing MySQL user name or password information in the master info repository is not secure and is therefore not recommended. Please consider using the USER and PASSWORD connection options for START SLAVE; see the 'START SLAVE Syntax' in the MySQL Manual for more information.
+[connection master]
+drop table if exists t1;
+create table t0 (a int) engine=myisam;
+insert into t0 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
+create table t1(a int) engine=myisam;
+insert into t1 select A.a + B.a* 10 + C.a * 100 from t0 A, t0 B, t0 C;
+create table t2 (
+pk int primary key,
+kp1 int,
+kp2 int,
+col1 int,
+key (kp1,kp2)
+) engine=rocksdb;
+insert into t2 select a,a,a,a from t1;
+create table t3 like t2;
+insert into t3 select * from t2;
+include/sync_slave_sql_with_master.inc
+set global debug= 'd,dbug.rocksdb.get_row_by_rowid';
+include/stop_slave.inc
+include/start_slave.inc
+update t2 set col1=100 where kp1 between 1 and 3 and mod(kp2,2)=0;
+set debug_sync= 'now WAIT_FOR Reached';
+set global debug = '';
+set sql_log_bin=0;
+delete from t2 where pk=2;
+delete from t2 where pk=3;
+set debug_sync= 'now SIGNAL signal.rocksdb.get_row_by_rowid_let_running';
+include/sync_slave_sql_with_master.inc
+select * from t2 where pk < 5;
+pk kp1 kp2 col1
+0 0 0 0
+1 1 1 1
+4 4 4 4
+set global debug= 'd,dbug.rocksdb.get_row_by_rowid';
+include/stop_slave.inc
+include/start_slave.inc
+update t3 set col1=100 where kp1 between 1 and 4 and mod(kp2,2)=0;
+call mtr.add_suppression("Deadlock found when trying to get lock");
+set debug_sync= 'now WAIT_FOR Reached';
+set global debug = '';
+set sql_log_bin=0;
+delete from t3 where pk=2;
+delete from t3 where pk=3;
+set debug_sync= 'now SIGNAL signal.rocksdb.get_row_by_rowid_let_running';
+include/sync_slave_sql_with_master.inc
+select * from t3 where pk < 5;
+pk kp1 kp2 col1
+0 0 0 0
+1 1 1 1
+4 4 4 100
+drop table t0, t1, t2, t3;
+include/rpl_end.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/rpl_row_not_found_rc.result b/storage/rocksdb/mysql-test/rocksdb/r/rpl_row_not_found_rc.result
new file mode 100644
index 00000000000..8cdfa910739
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/rpl_row_not_found_rc.result
@@ -0,0 +1,56 @@
+include/master-slave.inc
+Warnings:
+Note #### Sending passwords in plain text without SSL/TLS is extremely insecure.
+Note #### Storing MySQL user name or password information in the master info repository is not secure and is therefore not recommended. Please consider using the USER and PASSWORD connection options for START SLAVE; see the 'START SLAVE Syntax' in the MySQL Manual for more information.
+[connection master]
+drop table if exists t1;
+create table t0 (a int) engine=myisam;
+insert into t0 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
+create table t1(a int) engine=myisam;
+insert into t1 select A.a + B.a* 10 + C.a * 100 from t0 A, t0 B, t0 C;
+create table t2 (
+pk int primary key,
+kp1 int,
+kp2 int,
+col1 int,
+key (kp1,kp2)
+) engine=rocksdb;
+insert into t2 select a,a,a,a from t1;
+create table t3 like t2;
+insert into t3 select * from t2;
+include/sync_slave_sql_with_master.inc
+set global debug= 'd,dbug.rocksdb.get_row_by_rowid';
+include/stop_slave.inc
+include/start_slave.inc
+update t2 set col1=100 where kp1 between 1 and 3 and mod(kp2,2)=0;
+set debug_sync= 'now WAIT_FOR Reached';
+set global debug = '';
+set sql_log_bin=0;
+delete from t2 where pk=2;
+delete from t2 where pk=3;
+set debug_sync= 'now SIGNAL signal.rocksdb.get_row_by_rowid_let_running';
+include/sync_slave_sql_with_master.inc
+select * from t2 where pk < 5;
+pk kp1 kp2 col1
+0 0 0 0
+1 1 1 1
+4 4 4 4
+set global debug= 'd,dbug.rocksdb.get_row_by_rowid';
+include/stop_slave.inc
+include/start_slave.inc
+update t3 set col1=100 where kp1 between 1 and 4 and mod(kp2,2)=0;
+call mtr.add_suppression("Deadlock found when trying to get lock");
+set debug_sync= 'now WAIT_FOR Reached';
+set global debug = '';
+set sql_log_bin=0;
+delete from t3 where pk=2;
+delete from t3 where pk=3;
+set debug_sync= 'now SIGNAL signal.rocksdb.get_row_by_rowid_let_running';
+include/sync_slave_sql_with_master.inc
+select * from t3 where pk < 5;
+pk kp1 kp2 col1
+0 0 0 0
+1 1 1 1
+4 4 4 100
+drop table t0, t1, t2, t3;
+include/rpl_end.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/rpl_row_rocksdb.result b/storage/rocksdb/mysql-test/rocksdb/r/rpl_row_rocksdb.result
new file mode 100644
index 00000000000..de47f3b39b0
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/rpl_row_rocksdb.result
@@ -0,0 +1,45 @@
+include/master-slave.inc
+[connection master]
+connection master;
+drop table if exists t1;
+connection master;
+select @@binlog_format;
+@@binlog_format
+ROW
+create table t1 (pk int primary key) engine=rocksdb;
+insert into t1 values (1),(2),(3);
+include/sync_slave_sql_with_master.inc
+connection slave;
+select * from t1;
+pk
+1
+2
+3
+connection master;
+drop table t1;
+#
+# Issue #18: slave crash on update with row based binary logging
+#
+create table t1 (id int primary key, value int, value2 int, index(value)) engine=rocksdb;
+insert into t1 values (1,1,1);
+insert into t1 values (2,1,1);
+insert into t1 values (3,1,1);
+insert into t1 values (4,1,1);
+insert into t1 values (5,1,1);
+update t1 set value2=100 where id=1;
+update t1 set value2=200 where id=2;
+update t1 set value2=300 where id=3;
+include/sync_slave_sql_with_master.inc
+connection slave;
+select * from t1 where id=1;
+id value value2
+1 1 100
+select * from t1 where id=2;
+id value value2
+2 1 200
+select * from t1 where id=3;
+id value value2
+3 1 300
+connection master;
+drop table t1;
+include/rpl_end.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/rpl_row_stats.result b/storage/rocksdb/mysql-test/rocksdb/r/rpl_row_stats.result
new file mode 100644
index 00000000000..a14d2693ad3
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/rpl_row_stats.result
@@ -0,0 +1,98 @@
+include/master-slave.inc
+[connection master]
+connection master;
+drop table if exists t1;
+connection master;
+create procedure save_read_stats()
+begin
+/*select rows_requested into @rq from information_schema.table_statistics
+where table_schema=database() and table_name='t1';*/
+select rows_read into @rr_is from information_schema.table_statistics
+where table_schema=database() and table_name='t1';
+select variable_value into @rr from information_schema.global_status
+where variable_name='rocksdb_rows_read';
+select variable_value into @ru from information_schema.global_status
+where variable_name='rocksdb_rows_updated';
+select variable_value into @rd from information_schema.global_status
+where variable_name='rocksdb_rows_deleted';
+end//
+create procedure get_read_stats()
+begin
+/*select rows_requested - @rq as rows_requested from
+information_schema.table_statistics
+where table_schema=database() and table_name='t1';*/
+select rows_read - @rr_is as rows_read_userstat from
+information_schema.table_statistics
+where table_schema=database() and table_name='t1';
+select variable_value - @rr as rows_read from
+information_schema.global_status
+where variable_name='rocksdb_rows_read';
+select variable_value - @ru as rows_updated from
+information_schema.global_status
+where variable_name='rocksdb_rows_updated';
+select variable_value - @rd as rows_deleted from
+information_schema.global_status
+where variable_name='rocksdb_rows_deleted';
+end//
+create table t1 (id int primary key, value int);
+insert into t1 values (1,1), (2,2), (3,3), (4,4), (5,5);
+include/sync_slave_sql_with_master.inc
+connection slave;
+call save_read_stats();
+connection master;
+update t1 set value=value+1 where id=1;
+update t1 set value=value+1 where id=3;
+select * from t1;
+id value
+1 2
+2 2
+3 4
+4 4
+5 5
+include/sync_slave_sql_with_master.inc
+connection slave;
+call get_read_stats();
+rows_read_userstat
+2
+rows_read
+2
+rows_updated
+2
+rows_deleted
+0
+select * from t1;
+id value
+1 2
+2 2
+3 4
+4 4
+5 5
+call save_read_stats();
+connection master;
+delete from t1 where id in (4,5);
+select * from t1;
+id value
+1 2
+2 2
+3 4
+include/sync_slave_sql_with_master.inc
+connection slave;
+call get_read_stats();
+rows_read_userstat
+2
+rows_read
+2
+rows_updated
+0
+rows_deleted
+2
+select * from t1;
+id value
+1 2
+2 2
+3 4
+connection master;
+drop table t1;
+drop procedure save_read_stats;
+drop procedure get_read_stats;
+include/rpl_end.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/rpl_row_triggers.result b/storage/rocksdb/mysql-test/rocksdb/r/rpl_row_triggers.result
new file mode 100644
index 00000000000..69acc4a92e8
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/rpl_row_triggers.result
@@ -0,0 +1,286 @@
+include/master-slave.inc
+[connection master]
+# Test of row replication with triggers on the slave side
+connection master;
+CREATE TABLE t1 (C1 CHAR(1) primary key, C2 CHAR(1));
+SELECT * FROM t1;
+C1 C2
+connection slave;
+connection slave;
+SET @old_slave_exec_mode= @@global.slave_exec_mode;
+SET @old_slave_run_triggers_for_rbr= @@global.slave_run_triggers_for_rbr;
+SET @@global.slave_exec_mode= IDEMPOTENT;
+SET @@global.slave_run_triggers_for_rbr= YES;
+SELECT * FROM t1;
+C1 C2
+create table t2 (id char(2) primary key, cnt int, o char(1), n char(1));
+insert into t2 values
+('u0', 0, ' ', ' '),('u1', 0, ' ', ' '),
+('d0', 0, ' ', ' '),('d1', 0, ' ', ' '),
+('i0', 0, ' ', ' '),('i1', 0, ' ', ' ');
+create trigger t1_cnt_b before update on t1 for each row
+update t2 set cnt=cnt+1, o=old.C1, n=new.C1 where id = 'u0';
+create trigger t1_cnt_db before delete on t1 for each row
+update t2 set cnt=cnt+1, o=old.C1, n=' ' where id = 'd0';
+create trigger t1_cnt_ib before insert on t1 for each row
+update t2 set cnt=cnt+1, n=new.C1, o=' ' where id = 'i0';
+create trigger t1_cnt_a after update on t1 for each row
+update t2 set cnt=cnt+1, o=old.C1, n=new.C1 where id = 'u1';
+create trigger t1_cnt_da after delete on t1 for each row
+update t2 set cnt=cnt+1, o=old.C1, n=' ' where id = 'd1';
+create trigger t1_cnt_ia after insert on t1 for each row
+update t2 set cnt=cnt+1, n=new.C1, o=' ' where id = 'i1';
+SELECT * FROM t2 order by id;
+id cnt o n
+d0 0
+d1 0
+i0 0
+i1 0
+u0 0
+u1 0
+connection master;
+# INSERT triggers test
+insert into t1 values ('a','b');
+connection slave;
+connection slave;
+SELECT * FROM t2 order by id;
+id cnt o n
+d0 0
+d1 0
+i0 1 a
+i1 1 a
+u0 0
+u1 0
+connection master;
+# UPDATE triggers test
+update t1 set C1= 'd';
+connection slave;
+connection slave;
+SELECT * FROM t2 order by id;
+id cnt o n
+d0 0
+d1 0
+i0 1 a
+i1 1 a
+u0 1 a d
+u1 1 a d
+connection master;
+# DELETE triggers test
+delete from t1 where C1='d';
+connection slave;
+connection slave;
+SELECT * FROM t2 order by id;
+id cnt o n
+d0 1 d
+d1 1 d
+i0 1 a
+i1 1 a
+u0 1 a d
+u1 1 a d
+# INSERT triggers which cause also UPDATE test (insert duplicate row)
+insert into t1 values ('0','1');
+SELECT * FROM t2 order by id;
+id cnt o n
+d0 1 d
+d1 1 d
+i0 2 0
+i1 2 0
+u0 1 a d
+u1 1 a d
+connection master;
+insert into t1 values ('0','1');
+connection slave;
+connection slave;
+SELECT * FROM t2 order by id;
+id cnt o n
+d0 1 d
+d1 1 d
+i0 3 0
+i1 3 0
+u0 2 0 0
+u1 2 0 0
+# INSERT triggers which cause also DELETE test
+# (insert duplicate row in table referenced by foreign key)
+insert into t1 values ('1','1');
+connection master;
+drop table if exists t1;
+connection slave;
+connection slave;
+SET @@global.slave_exec_mode= @old_slave_exec_mode;
+SET @@global.slave_run_triggers_for_rbr= @old_slave_run_triggers_for_rbr;
+drop table t2;
+connection master;
+CREATE TABLE t1 (i INT);
+CREATE TABLE t2 (i INT);
+connection slave;
+SET @old_slave_run_triggers_for_rbr= @@global.slave_run_triggers_for_rbr;
+SET GLOBAL slave_run_triggers_for_rbr=YES;
+CREATE TRIGGER tr AFTER INSERT ON t1 FOR EACH ROW
+INSERT INTO t2 VALUES (new.i);
+connection master;
+BEGIN;
+INSERT INTO t1 VALUES (1);
+INSERT INTO t1 VALUES (2);
+COMMIT;
+connection slave;
+select * from t2;
+i
+1
+2
+SET @@global.slave_run_triggers_for_rbr= @old_slave_run_triggers_for_rbr;
+connection master;
+drop tables t2,t1;
+connection slave;
+# Triggers on slave do not work if master has some
+connection master;
+CREATE TABLE t1 (C1 CHAR(1) primary key, C2 CHAR(1));
+SELECT * FROM t1;
+C1 C2
+create trigger t1_dummy before delete on t1 for each row
+set @dummy= 1;
+connection slave;
+connection slave;
+SET @old_slave_exec_mode= @@global.slave_exec_mode;
+SET @old_slave_run_triggers_for_rbr= @@global.slave_run_triggers_for_rbr;
+SET @@global.slave_exec_mode= IDEMPOTENT;
+SET @@global.slave_run_triggers_for_rbr= YES;
+SELECT * FROM t1;
+C1 C2
+create table t2 (id char(2) primary key, cnt int, o char(1), n char(1));
+insert into t2 values
+('u0', 0, ' ', ' '),('u1', 0, ' ', ' '),
+('d0', 0, ' ', ' '),('d1', 0, ' ', ' '),
+('i0', 0, ' ', ' '),('i1', 0, ' ', ' ');
+create trigger t1_cnt_b before update on t1 for each row
+update t2 set cnt=cnt+1, o=old.C1, n=new.C1 where id = 'u0';
+create trigger t1_cnt_ib before insert on t1 for each row
+update t2 set cnt=cnt+1, n=new.C1, o=' ' where id = 'i0';
+create trigger t1_cnt_a after update on t1 for each row
+update t2 set cnt=cnt+1, o=old.C1, n=new.C1 where id = 'u1';
+create trigger t1_cnt_da after delete on t1 for each row
+update t2 set cnt=cnt+1, o=old.C1, n=' ' where id = 'd1';
+create trigger t1_cnt_ia after insert on t1 for each row
+update t2 set cnt=cnt+1, n=new.C1, o=' ' where id = 'i1';
+SELECT * FROM t2 order by id;
+id cnt o n
+d0 0
+d1 0
+i0 0
+i1 0
+u0 0
+u1 0
+connection master;
+# INSERT triggers test
+insert into t1 values ('a','b');
+connection slave;
+connection slave;
+SELECT * FROM t2 order by id;
+id cnt o n
+d0 0
+d1 0
+i0 0
+i1 0
+u0 0
+u1 0
+connection master;
+# UPDATE triggers test
+update t1 set C1= 'd';
+connection slave;
+connection slave;
+SELECT * FROM t2 order by id;
+id cnt o n
+d0 0
+d1 0
+i0 0
+i1 0
+u0 0
+u1 0
+connection master;
+# DELETE triggers test
+delete from t1 where C1='d';
+connection slave;
+connection slave;
+SELECT * FROM t2 order by id;
+id cnt o n
+d0 0
+d1 0
+i0 0
+i1 0
+u0 0
+u1 0
+# INSERT triggers which cause also UPDATE test (insert duplicate row)
+insert into t1 values ('0','1');
+SELECT * FROM t2 order by id;
+id cnt o n
+d0 0
+d1 0
+i0 1 0
+i1 1 0
+u0 0
+u1 0
+connection master;
+insert into t1 values ('0','1');
+connection slave;
+connection slave;
+SELECT * FROM t2 order by id;
+id cnt o n
+d0 0
+d1 0
+i0 1 0
+i1 1 0
+u0 0
+u1 0
+# INSERT triggers which cause also DELETE test
+# (insert duplicate row in table referenced by foreign key)
+insert into t1 values ('1','1');
+connection master;
+drop table if exists t1;
+connection slave;
+connection slave;
+SET @@global.slave_exec_mode= @old_slave_exec_mode;
+SET @@global.slave_run_triggers_for_rbr= @old_slave_run_triggers_for_rbr;
+drop table t2;
+#
+# MDEV-5513: Trigger is applied to the rows after first one
+#
+connection master;
+create table t1 (a int, b int);
+create table tlog (a int auto_increment primary key);
+set sql_log_bin=0;
+create trigger tr1 after insert on t1 for each row insert into tlog values (null);
+set sql_log_bin=1;
+connection slave;
+connection slave;
+set @slave_run_triggers_for_rbr.saved = @@slave_run_triggers_for_rbr;
+set global slave_run_triggers_for_rbr=1;
+create trigger tr2 before insert on t1 for each row set new.b = new.a;
+connection master;
+insert into t1 values (1,10),(2,20),(3,30);
+connection slave;
+select * from t1;
+a b
+1 10
+2 20
+3 30
+#
+# Verify slave skips running triggers if master ran and logged the row events for triggers
+#
+create table t4(a int, b int);
+delete from tlog;
+create trigger tr4 before insert on t4 for each row insert into tlog values (null);
+insert into t4 values (1, 10),(2, 20);
+select * from tlog;
+a
+4
+5
+select * from t4;
+a b
+1 10
+2 20
+select * from tlog;
+a
+4
+5
+set global slave_run_triggers_for_rbr = @slave_run_triggers_for_rbr.saved;
+drop table t1, tlog, t4;
+include/rpl_end.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/rpl_savepoint.result b/storage/rocksdb/mysql-test/rocksdb/r/rpl_savepoint.result
new file mode 100644
index 00000000000..5746119efac
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/rpl_savepoint.result
@@ -0,0 +1,110 @@
+include/master-slave.inc
+[connection master]
+connection master;
+drop table if exists t1;
+connection master;
+create table t1 (id int primary key, value int);
+insert into t1 values (1,1), (2,2), (3,3);
+begin;
+insert into t1 values (11, 1);
+savepoint a;
+insert into t1 values (12, 1);
+rollback to savepoint a;
+ERROR HY000: MyRocks currently does not support ROLLBACK TO SAVEPOINT if modifying rows.
+commit;
+ERROR HY000: This transaction was rolled back and cannot be committed. Only supported operation is to roll it back, so all pending changes will be discarded. Please restart another transaction.
+commit;
+select * from t1;
+id value
+1 1
+2 2
+3 3
+include/sync_slave_sql_with_master.inc
+connection slave;
+select * from t1;
+id value
+1 1
+2 2
+3 3
+connection master;
+begin;
+insert into t1 values (21, 1);
+savepoint a;
+insert into t1 values (22, 1);
+rollback to savepoint a;
+ERROR HY000: MyRocks currently does not support ROLLBACK TO SAVEPOINT if modifying rows.
+insert into t1 values (23, 1);
+ERROR HY000: This transaction was rolled back and cannot be committed. Only supported operation is to roll it back, so all pending changes will be discarded. Please restart another transaction.
+commit;
+ERROR HY000: This transaction was rolled back and cannot be committed. Only supported operation is to roll it back, so all pending changes will be discarded. Please restart another transaction.
+commit;
+select * from t1;
+id value
+1 1
+2 2
+3 3
+include/sync_slave_sql_with_master.inc
+connection slave;
+select * from t1;
+id value
+1 1
+2 2
+3 3
+connection master;
+begin;
+insert into t1 values (31, 1);
+savepoint a;
+insert into t1 values (32, 1);
+savepoint b;
+insert into t1 values (33, 1);
+rollback to savepoint a;
+ERROR HY000: MyRocks currently does not support ROLLBACK TO SAVEPOINT if modifying rows.
+insert into t1 values (34, 1);
+ERROR HY000: This transaction was rolled back and cannot be committed. Only supported operation is to roll it back, so all pending changes will be discarded. Please restart another transaction.
+rollback;
+select * from t1;
+id value
+1 1
+2 2
+3 3
+include/sync_slave_sql_with_master.inc
+connection slave;
+select * from t1;
+id value
+1 1
+2 2
+3 3
+connection master;
+SET autocommit=off;
+select * from t1;
+id value
+1 1
+2 2
+3 3
+SAVEPOINT A;
+select * from t1;
+id value
+1 1
+2 2
+3 3
+SAVEPOINT A;
+insert into t1 values (35, 35);
+ROLLBACK TO SAVEPOINT A;
+ERROR HY000: MyRocks currently does not support ROLLBACK TO SAVEPOINT if modifying rows.
+START TRANSACTION;
+ERROR HY000: This transaction was rolled back and cannot be committed. Only supported operation is to roll it back, so all pending changes will be discarded. Please restart another transaction.
+select * from t1;
+id value
+1 1
+2 2
+3 3
+include/sync_slave_sql_with_master.inc
+connection slave;
+select * from t1;
+id value
+1 1
+2 2
+3 3
+connection master;
+drop table t1;
+include/rpl_end.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/rpl_statement.result b/storage/rocksdb/mysql-test/rocksdb/r/rpl_statement.result
new file mode 100644
index 00000000000..df1a60519db
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/rpl_statement.result
@@ -0,0 +1,57 @@
+include/master-slave.inc
+[connection master]
+connection master;
+drop table if exists t1;
+connection master;
+select @@binlog_format;
+@@binlog_format
+STATEMENT
+create table t1 (pk int primary key) engine=rocksdb;
+insert into t1 values (1),(2),(3);
+ERROR HY000: Cannot execute statement: impossible to write to binary log since BINLOG_FORMAT = STATEMENT and at least one table uses a storage engine limited to row-based logging.
+set session rocksdb_unsafe_for_binlog=on;
+insert into t1 values (1),(2),(3);
+select * from t1;
+pk
+1
+2
+3
+delete from t1;
+set session rocksdb_unsafe_for_binlog=off;
+insert into t1 values (1),(2),(3);
+ERROR HY000: Cannot execute statement: impossible to write to binary log since BINLOG_FORMAT = STATEMENT and at least one table uses a storage engine limited to row-based logging.
+set binlog_format=row;
+insert into t1 values (1),(2),(3);
+include/sync_slave_sql_with_master.inc
+connection slave;
+select * from t1;
+pk
+1
+2
+3
+connection master;
+drop table t1;
+create table t1 (id int primary key, value int, value2 int, index(value)) engine=rocksdb;
+insert into t1 values (1,1,1);
+insert into t1 values (2,1,1);
+insert into t1 values (3,1,1);
+insert into t1 values (4,1,1);
+insert into t1 values (5,1,1);
+update t1 set value2=100 where id=1;
+update t1 set value2=200 where id=2;
+update t1 set value2=300 where id=3;
+include/sync_slave_sql_with_master.inc
+connection slave;
+select * from t1 where id=1;
+id value value2
+1 1 100
+select * from t1 where id=2;
+id value value2
+2 1 200
+select * from t1 where id=3;
+id value value2
+3 1 300
+connection master;
+drop table t1;
+set binlog_format=row;
+include/rpl_end.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/rpl_statement_not_found.result b/storage/rocksdb/mysql-test/rocksdb/r/rpl_statement_not_found.result
new file mode 100644
index 00000000000..9e71ffa72f0
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/rpl_statement_not_found.result
@@ -0,0 +1,70 @@
+include/master-slave.inc
+[connection master]
+connection master;
+drop table if exists t1;
+connection master;
+create table t0 (a int) engine=myisam;
+insert into t0 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
+create table t1(a int) engine=myisam;
+insert into t1 select A.a + B.a* 10 + C.a * 100 from t0 A, t0 B, t0 C;
+create table t2 (
+pk int primary key,
+kp1 int,
+kp2 int,
+col1 int,
+key (kp1,kp2)
+) engine=rocksdb;
+set @tmp_binlog_format=@@binlog_format;
+set @@binlog_format=ROW;
+insert into t2 select a,a,a,a from t1;
+create table t3 like t2;
+insert into t3 select * from t2;
+set binlog_format=@tmp_binlog_format;
+include/sync_slave_sql_with_master.inc
+connection slave;
+set global debug_dbug= 'd,dbug.rocksdb.get_row_by_rowid';
+include/stop_slave.inc
+include/start_slave.inc
+connection master;
+update t2 set col1=100 where kp1 between 1 and 3 and mod(kp2,2)=0;
+connection slave;
+set debug_sync= 'now WAIT_FOR Reached';
+set global debug_dbug = '';
+set sql_log_bin=0;
+delete from t2 where pk=2;
+delete from t2 where pk=3;
+set debug_sync= 'now SIGNAL signal.rocksdb.get_row_by_rowid_let_running';
+connection master;
+include/sync_slave_sql_with_master.inc
+connection slave;
+select * from t2 where pk < 5;
+pk kp1 kp2 col1
+0 0 0 0
+1 1 1 1
+4 4 4 4
+connection slave;
+set global debug_dbug= 'd,dbug.rocksdb.get_row_by_rowid';
+include/stop_slave.inc
+include/start_slave.inc
+connection master;
+update t3 set col1=100 where kp1 between 1 and 4 and mod(kp2,2)=0;
+connection slave;
+call mtr.add_suppression("Deadlock found when trying to get lock");
+set debug_sync= 'now WAIT_FOR Reached';
+set global debug_dbug = '';
+set sql_log_bin=0;
+delete from t3 where pk=2;
+delete from t3 where pk=3;
+set debug_sync= 'now SIGNAL signal.rocksdb.get_row_by_rowid_let_running';
+connection master;
+include/sync_slave_sql_with_master.inc
+connection slave;
+select * from t3 where pk < 5;
+pk kp1 kp2 col1
+0 0 0 0
+1 1 1 1
+4 4 4 100
+set debug_sync='RESET';
+connection master;
+drop table t0, t1, t2, t3;
+include/rpl_end.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/rqg_examples.result b/storage/rocksdb/mysql-test/rocksdb/r/rqg_examples.result
new file mode 100644
index 00000000000..766795932b0
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/rqg_examples.result
@@ -0,0 +1,3 @@
+CREATE DATABASE IF NOT EXISTS rqg_examples;
+Running test with grammar file example.yy
+DROP DATABASE rqg_examples;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/rqg_runtime.result b/storage/rocksdb/mysql-test/rocksdb/r/rqg_runtime.result
new file mode 100644
index 00000000000..1b872f82c58
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/rqg_runtime.result
@@ -0,0 +1,30 @@
+call mtr.add_suppression("Did not write failed ");
+call mtr.add_suppression("Can't open and lock privilege tables");
+call mtr.add_suppression("Attempt to delete the trigger file");
+SET @ORIG_EVENT_SCHEDULER = @@EVENT_SCHEDULER;
+CREATE TABLE mysql.user_temp LIKE mysql.user;
+INSERT mysql.user_temp SELECT * FROM mysql.user;
+CREATE TABLE mysql.tables_priv_temp LIKE mysql.tables_priv;
+INSERT mysql.tables_priv_temp SELECT * FROM mysql.tables_priv_temp;
+CREATE DATABASE IF NOT EXISTS rqg_runtime;
+Running test with grammar file alter_online.yy
+DROP DATABASE rqg_runtime;
+CREATE DATABASE IF NOT EXISTS rqg_runtime;
+Running test with grammar file concurrency_1.yy
+DROP DATABASE rqg_runtime;
+CREATE DATABASE IF NOT EXISTS rqg_runtime;
+Running test with grammar file connect_kill_sql.yy
+DROP DATABASE rqg_runtime;
+CREATE DATABASE IF NOT EXISTS rqg_runtime;
+Running test with grammar file metadata_stability.yy
+DROP DATABASE rqg_runtime;
+DELETE FROM mysql.tables_priv;
+DELETE FROM mysql.user;
+INSERT mysql.user SELECT * FROM mysql.user_temp;
+INSERT mysql.tables_priv SELECT * FROM mysql.tables_priv_temp;
+DROP TABLE mysql.user_temp;
+DROP TABLE mysql.tables_priv_temp;
+DROP TABLE IF EXISTS test.executors;
+DROP DATABASE IF EXISTS testdb_N;
+DROP DATABASE IF EXISTS testdb_S;
+SET GLOBAL EVENT_SCHEDULER = @ORIG_EVENT_SCHEDULER;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/rqg_transactions.result b/storage/rocksdb/mysql-test/rocksdb/r/rqg_transactions.result
new file mode 100644
index 00000000000..23705d493e7
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/rqg_transactions.result
@@ -0,0 +1,11 @@
+call mtr.add_suppression("Deadlock found when trying to get lock");
+CREATE DATABASE IF NOT EXISTS rqg_transactions;
+Running test with grammar file transactions.yy
+Running test with grammar file repeatable_read.yy
+Running test with grammar file transaction_durability.yy
+Running test with grammar file transactions-flat.yy
+Running test with grammar file combinations.yy
+Running test with grammar file repeatable_read.yy
+Running test with grammar file transaction_durability.yy
+Running test with grammar file transactions-flat.yy
+DROP DATABASE rqg_transactions;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/secondary_key_update_lock.result b/storage/rocksdb/mysql-test/rocksdb/r/secondary_key_update_lock.result
new file mode 100644
index 00000000000..14217824bb4
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/secondary_key_update_lock.result
@@ -0,0 +1,18 @@
+connect con, localhost, root,,;
+connection default;
+create table t1 (a int primary key, b int unique key) engine = rocksdb;
+insert into t1 values(1, 1);
+connection con;
+begin;
+update t1 set b = 2 where b = 1;
+connection default;
+insert into t1 values(2, 1);
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction
+connection con;
+rollback;
+select * from t1;
+a b
+1 1
+connection default;
+drop table t1;
+disconnect con;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/select.result b/storage/rocksdb/mysql-test/rocksdb/r/select.result
new file mode 100644
index 00000000000..fc3825d5377
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/select.result
@@ -0,0 +1,374 @@
+DROP TABLE IF EXISTS t1, t2;
+CREATE TABLE t1 (a INT, b CHAR(8), pk INT AUTO_INCREMENT PRIMARY KEY) ENGINE=rocksdb;
+INSERT INTO t1 (a,b) VALUES (100,'foobar'),(1,'z'),(200,'bar');
+CREATE TABLE t2 (a INT, b CHAR(8), pk INT AUTO_INCREMENT PRIMARY KEY) ENGINE=rocksdb;
+INSERT INTO t2 (a,b) SELECT a, b FROM t1;
+INSERT INTO t1 (a,b) SELECT a, b FROM t2;
+SELECT * FROM t1;
+a b pk
+1 z 2
+1 z 5
+100 foobar 1
+100 foobar 4
+200 bar 3
+200 bar 6
+SELECT DISTINCT a FROM t1;
+a
+1
+100
+200
+SELECT ALL b, a FROM t1;
+b a
+bar 200
+bar 200
+foobar 100
+foobar 100
+z 1
+z 1
+SELECT STRAIGHT_JOIN SQL_CACHE t1.* FROM t2, t1 WHERE t1.a <> t2.a;
+a b pk
+1 z 2
+1 z 2
+1 z 5
+1 z 5
+100 foobar 1
+100 foobar 1
+100 foobar 4
+100 foobar 4
+200 bar 3
+200 bar 3
+200 bar 6
+200 bar 6
+SELECT SQL_SMALL_RESULT SQL_NO_CACHE t1.a FROM t1, t2;
+a
+1
+1
+1
+1
+1
+1
+100
+100
+100
+100
+100
+100
+200
+200
+200
+200
+200
+200
+SELECT SQL_BIG_RESULT SQL_CALC_FOUND_ROWS DISTINCT(t2.a)
+FROM t1 t1_1, t2, t1 t1_2;
+a
+1
+100
+200
+SELECT FOUND_ROWS();
+FOUND_ROWS()
+3
+SET @save_query_cache=@@global.query_cache_size;
+SET GLOBAL query_cache_size = 1024*1024;
+SELECT SQL_CACHE * FROM t1, t2;
+a b pk a b pk
+1 z 2 1 z 2
+1 z 2 100 foobar 1
+1 z 2 200 bar 3
+1 z 5 1 z 2
+1 z 5 100 foobar 1
+1 z 5 200 bar 3
+100 foobar 1 1 z 2
+100 foobar 1 100 foobar 1
+100 foobar 1 200 bar 3
+100 foobar 4 1 z 2
+100 foobar 4 100 foobar 1
+100 foobar 4 200 bar 3
+200 bar 3 1 z 2
+200 bar 3 100 foobar 1
+200 bar 3 200 bar 3
+200 bar 6 1 z 2
+200 bar 6 100 foobar 1
+200 bar 6 200 bar 3
+SET @@global.query_cache_size=@save_query_cache;
+SELECT a+10 AS field1, CONCAT(b,':',b) AS field2 FROM t1
+WHERE b > 'b' AND a IS NOT NULL
+GROUP BY 2 DESC, field1 ASC
+HAVING field1 < 1000
+ORDER BY field2, 1 DESC, field1*2
+LIMIT 5 OFFSET 1;
+field1 field2
+11 z:z
+110 foobar:foobar
+SELECT SUM(a), MAX(a), b FROM t1 GROUP BY b WITH ROLLUP;
+SUM(a) MAX(a) b
+2 1 z
+200 100 foobar
+400 200 bar
+602 200 NULL
+SELECT * FROM t2 WHERE a>0 PROCEDURE ANALYSE();
+Field_name Min_value Max_value Min_length Max_length Empties_or_zeros Nulls Avg_value_or_avg_length Std Optimal_fieldtype
+test.t2.a 1 200 1 3 0 0 100.3333 81.2418 ENUM('1','100','200') NOT NULL
+test.t2.b bar z 1 6 0 0 3.3333 NULL ENUM('bar','foobar','z') NOT NULL
+test.t2.pk 1 3 1 1 0 0 2.0000 0.8165 ENUM('1','2','3') NOT NULL
+SELECT t1.a, t2.b FROM t2, t1 WHERE t1.a = t2.a ORDER BY t2.b, t1.a
+INTO OUTFILE '<DATADIR>/select.out'
+CHARACTER SET utf8
+FIELDS TERMINATED BY ',' OPTIONALLY ENCLOSED BY '''';
+200,'bar'
+200,'bar'
+100,'foobar'
+100,'foobar'
+1,'z'
+1,'z'
+SELECT t1.a, t2.b FROM t2, t1 WHERE t1.a = t2.a ORDER BY t2.b, t1.a
+INTO DUMPFILE '<DATADIR>/select.dump';
+ERROR 42000: Result consisted of more than one row
+SELECT t1.*, t2.* FROM t1, t2 ORDER BY t2.b, t1.a, t2.a, t1.b, t1.pk, t2.pk LIMIT 1
+INTO DUMPFILE '<DATADIR>/select.dump';
+1z2200bar3
+SELECT MIN(a), MAX(a) FROM t1 INTO @min, @max;
+SELECT @min, @max;
+@min @max
+1 200
+SELECT t1_1.*, t2.* FROM t2, t1 AS t1_1, t1 AS t1_2
+WHERE t1_1.a = t1_2.a AND t2.a = t1_1.a;
+a b pk a b pk
+1 z 2 1 z 2
+1 z 2 1 z 2
+1 z 5 1 z 2
+1 z 5 1 z 2
+100 foobar 1 100 foobar 1
+100 foobar 1 100 foobar 1
+100 foobar 4 100 foobar 1
+100 foobar 4 100 foobar 1
+200 bar 3 200 bar 3
+200 bar 3 200 bar 3
+200 bar 6 200 bar 3
+200 bar 6 200 bar 3
+SELECT alias1.* FROM ( SELECT a,b FROM t1 ) alias1, t2 WHERE t2.a IN (100,200);
+a b
+1 z
+1 z
+1 z
+1 z
+100 foobar
+100 foobar
+100 foobar
+100 foobar
+200 bar
+200 bar
+200 bar
+200 bar
+SELECT t1.a FROM { OJ t1 LEFT OUTER JOIN t2 ON t1.a = t2.a+10 };
+a
+1
+1
+100
+100
+200
+200
+SELECT t1.* FROM t2 INNER JOIN t1;
+a b pk
+1 z 2
+1 z 2
+1 z 2
+1 z 5
+1 z 5
+1 z 5
+100 foobar 1
+100 foobar 1
+100 foobar 1
+100 foobar 4
+100 foobar 4
+100 foobar 4
+200 bar 3
+200 bar 3
+200 bar 3
+200 bar 6
+200 bar 6
+200 bar 6
+SELECT t1_2.* FROM t1 t1_1 CROSS JOIN t1 t1_2 ON t1_1.b = t1_2.b;
+a b pk
+1 z 2
+1 z 2
+1 z 5
+1 z 5
+100 foobar 1
+100 foobar 1
+100 foobar 4
+100 foobar 4
+200 bar 3
+200 bar 3
+200 bar 6
+200 bar 6
+SELECT t1.a, t2.b FROM t2 STRAIGHT_JOIN t1 WHERE t1.b > t2.b;
+a b
+1 bar
+1 bar
+1 foobar
+1 foobar
+100 bar
+100 bar
+SELECT t1.a, t2.b FROM t2 STRAIGHT_JOIN t1 ON t1.b > t2.b ORDER BY t1.a, t2.b;
+a b
+1 bar
+1 bar
+1 foobar
+1 foobar
+100 bar
+100 bar
+SELECT t2.* FROM t1 LEFT JOIN t2 USING (a) ORDER BY t2.a, t2.b LIMIT 1;
+a b pk
+1 z 2
+SELECT t2.* FROM t2 LEFT OUTER JOIN t1 ON t1.a = t2.a WHERE t1.a IS NOT NULL;
+a b pk
+1 z 2
+1 z 2
+100 foobar 1
+100 foobar 1
+200 bar 3
+200 bar 3
+SELECT SUM(t2.a) FROM t1 RIGHT JOIN t2 ON t2.b = t1.b;
+SUM(t2.a)
+602
+SELECT MIN(t2.a) FROM t1 RIGHT OUTER JOIN t2 USING (b,a);
+MIN(t2.a)
+1
+SELECT alias.b FROM t1 NATURAL JOIN ( SELECT a,b FROM t1 ) alias WHERE b > '';
+b
+bar
+bar
+bar
+bar
+foobar
+foobar
+foobar
+foobar
+z
+z
+z
+z
+SELECT t2.b FROM ( SELECT a,b FROM t1 ) alias NATURAL LEFT JOIN t2 WHERE b IS NOT NULL;
+b
+bar
+bar
+foobar
+foobar
+z
+z
+SELECT t1.*, t2.* FROM t1 NATURAL LEFT OUTER JOIN t2;
+a b pk a b pk
+1 z 2 1 z 2
+1 z 5 NULL NULL NULL
+100 foobar 1 100 foobar 1
+100 foobar 4 NULL NULL NULL
+200 bar 3 200 bar 3
+200 bar 6 NULL NULL NULL
+SELECT t2_2.* FROM t2 t2_1 NATURAL RIGHT JOIN t2 t2_2 WHERE t2_1.a IN ( SELECT a FROM t1 );
+a b pk
+1 z 2
+100 foobar 1
+200 bar 3
+SELECT t1_2.b FROM t1 t1_1 NATURAL RIGHT OUTER JOIN t1 t1_2 INNER JOIN t2;
+b
+bar
+bar
+bar
+bar
+bar
+bar
+foobar
+foobar
+foobar
+foobar
+foobar
+foobar
+z
+z
+z
+z
+z
+z
+SELECT ( SELECT MIN(a) FROM ( SELECT a,b FROM t1 ) alias1 ) AS min_a FROM t2;
+min_a
+1
+1
+1
+SELECT a,b FROM t2 WHERE a = ( SELECT MIN(a) FROM t1 );
+a b
+1 z
+SELECT a,b FROM t2 WHERE b LIKE ( SELECT b FROM t1 ORDER BY b LIMIT 1 );
+a b
+200 bar
+SELECT t2.* FROM t1 t1_outer, t2 WHERE ( t1_outer.a, t2.b ) IN ( SELECT a, b FROM t2 WHERE a = t1_outer.a );
+a b pk
+1 z 2
+1 z 2
+100 foobar 1
+100 foobar 1
+200 bar 3
+200 bar 3
+SELECT a,b FROM t2 WHERE b = ANY ( SELECT b FROM t1 WHERE a > 1 );
+a b
+100 foobar
+200 bar
+SELECT a,b FROM t2 WHERE b > ALL ( SELECT b FROM t1 WHERE b < 'foo' );
+a b
+1 z
+100 foobar
+SELECT a,b FROM t1 WHERE ROW(a, b) = ( SELECT a, b FROM t2 ORDER BY a, b LIMIT 1 );
+a b
+1 z
+1 z
+SELECT a,b FROM t1 WHERE EXISTS ( SELECT a,b FROM t2 WHERE t2.b > t1.b );
+a b
+100 foobar
+100 foobar
+200 bar
+200 bar
+SELECT t1.* FROM t1, t2 ORDER BY ( SELECT b FROM t1 WHERE a IS NULL ORDER BY b LIMIT 1 ) DESC;
+a b pk
+1 z 2
+1 z 2
+1 z 2
+1 z 5
+1 z 5
+1 z 5
+100 foobar 1
+100 foobar 1
+100 foobar 1
+100 foobar 4
+100 foobar 4
+100 foobar 4
+200 bar 3
+200 bar 3
+200 bar 3
+200 bar 6
+200 bar 6
+200 bar 6
+SELECT a, b FROM t1 HAVING a IN ( SELECT a FROM t2 WHERE b = t1.b );
+a b
+1 z
+1 z
+100 foobar
+100 foobar
+200 bar
+200 bar
+SELECT a,b FROM t1 UNION SELECT a,b FROM t2 UNION DISTINCT SELECT a,b FROM t1;
+a b
+1 z
+100 foobar
+200 bar
+SELECT a,b FROM t1 UNION SELECT a,b FROM t2 UNION ALL SELECT a,b FROM t1;
+a b
+1 z
+1 z
+1 z
+100 foobar
+100 foobar
+100 foobar
+200 bar
+200 bar
+200 bar
+DROP TABLE t1, t2;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/select_for_update.result b/storage/rocksdb/mysql-test/rocksdb/r/select_for_update.result
new file mode 100644
index 00000000000..2890941a1b9
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/select_for_update.result
@@ -0,0 +1,35 @@
+DROP TABLE IF EXISTS t1;
+CREATE TABLE t1 (a INT, b CHAR(8), pk INT AUTO_INCREMENT PRIMARY KEY) ENGINE=rocksdb;
+INSERT INTO t1 (a,b) VALUES (1,'a'),(2,'b'),(3,'a');
+connect con1,localhost,root,,;
+BEGIN;
+SELECT a,b FROM t1 WHERE b='a' FOR UPDATE;
+a b
+1 a
+3 a
+connection default;
+SET lock_wait_timeout = 1;
+SELECT a,b FROM t1 WHERE b='a';
+a b
+1 a
+3 a
+SELECT a,b FROM t1 WHERE b='a' LOCK IN SHARE MODE;
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction
+UPDATE t1 SET b='c' WHERE b='a';
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction
+connection con1;
+COMMIT;
+SELECT a,b FROM t1;
+a b
+1 a
+2 b
+3 a
+disconnect con1;
+connection default;
+UPDATE t1 SET b='c' WHERE b='a';
+SELECT a,b FROM t1;
+a b
+1 c
+2 b
+3 c
+DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/select_for_update_skip_locked_nowait.result b/storage/rocksdb/mysql-test/rocksdb/r/select_for_update_skip_locked_nowait.result
new file mode 100644
index 00000000000..044aa4d6fc7
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/select_for_update_skip_locked_nowait.result
@@ -0,0 +1,28 @@
+drop table if exists t1;
+create table t1 (a int primary key) engine=rocksdb;
+insert into t1 values (1), (2), (3);
+Should succeed since no table gets involved
+select 1 for update skip locked;
+1
+1
+select * from nonexistence for update skip locked;
+ERROR 42S02: Table 'test.nonexistence' doesn't exist
+select * from t1 for update skip locked;
+ERROR HY000: Table storage engine for 't1' doesn't have this option
+select * from t1 where a > 1 and a < 3 for update skip locked;
+ERROR HY000: Table storage engine for 't1' doesn't have this option
+insert into t1 select * from t1 for update skip locked;
+ERROR HY000: Table storage engine for 't1' doesn't have this option
+Should succeed since no table gets involved
+select 1 for update nowait;
+1
+1
+select * from nonexistence for update nowait;
+ERROR 42S02: Table 'test.nonexistence' doesn't exist
+select * from t1 for update nowait;
+ERROR HY000: Table storage engine for 't1' doesn't have this option
+select * from t1 where a > 1 and a < 3 for update nowait;
+ERROR HY000: Table storage engine for 't1' doesn't have this option
+insert into t1 select * from t1 for update nowait;
+ERROR HY000: Table storage engine for 't1' doesn't have this option
+drop table t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/select_lock_in_share_mode.result b/storage/rocksdb/mysql-test/rocksdb/r/select_lock_in_share_mode.result
new file mode 100644
index 00000000000..b073b887115
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/select_lock_in_share_mode.result
@@ -0,0 +1,37 @@
+DROP TABLE IF EXISTS t1;
+CREATE TABLE t1 (a INT, b CHAR(8), pk INT AUTO_INCREMENT PRIMARY KEY) ENGINE=rocksdb;
+INSERT INTO t1 (a,b) VALUES (1,'a'),(2,'b'),(3,'a');
+connect con1,localhost,root,,;
+BEGIN;
+SELECT a,b FROM t1 WHERE b='a' LOCK IN SHARE MODE;
+a b
+1 a
+3 a
+connection default;
+SET lock_wait_timeout = 1;
+SELECT a,b FROM t1 WHERE b='a';
+a b
+1 a
+3 a
+SELECT a,b FROM t1 WHERE b='a' LOCK IN SHARE MODE;
+a b
+1 a
+3 a
+UPDATE t1 SET b='c' WHERE b='a';
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction
+connection con1;
+COMMIT;
+SELECT a,b FROM t1;
+a b
+1 a
+2 b
+3 a
+disconnect con1;
+connection default;
+UPDATE t1 SET b='c' WHERE b='a';
+SELECT a,b FROM t1;
+a b
+1 c
+2 b
+3 c
+DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/show_engine.result b/storage/rocksdb/mysql-test/rocksdb/r/show_engine.result
new file mode 100644
index 00000000000..eac329a24e7
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/show_engine.result
@@ -0,0 +1,491 @@
+DROP TABLE IF EXISTS t1;
+DROP TABLE IF EXISTS t2;
+DROP TABLE IF EXISTS t3;
+DROP TABLE IF EXISTS t4;
+CREATE TABLE t1 (i INT, PRIMARY KEY (i) COMMENT 'cf_t1') ENGINE = ROCKSDB;
+CREATE TABLE t2 (j INT, PRIMARY KEY (j) COMMENT 'rev:cf_t2') ENGINE = ROCKSDB;
+CREATE TABLE t3 (k INT, PRIMARY KEY (k) COMMENT 'cf_t1') ENGINE = ROCKSDB;
+CREATE TABLE t4 (l INT, PRIMARY KEY (l) COMMENT 'cf_t4') ENGINE = ROCKSDB
+PARTITION BY KEY(l) PARTITIONS 4;
+SET @save.rocksdb_max_background_jobs= @@global.rocksdb_max_background_jobs;
+SET GLOBAL rocksdb_max_background_jobs= 1;
+INSERT INTO t1 VALUES (1), (2), (3);
+SELECT COUNT(*) FROM t1;
+COUNT(*)
+3
+INSERT INTO t2 VALUES (1), (2), (3), (4);
+SELECT COUNT(*) FROM t2;
+COUNT(*)
+4
+INSERT INTO t4 VALUES (1), (2), (3), (4), (5);
+SELECT COUNT(*) FROM t4;
+COUNT(*)
+5
+SET GLOBAL rocksdb_force_flush_memtable_now=1;
+SET GLOBAL rocksdb_compact_cf="cf_t1";
+SHOW ENGINE rocksdb STATUS;
+Type Name Status
+STATISTICS # #
+DBSTATS # #
+CF_COMPACTION # #
+CF_COMPACTION # #
+CF_COMPACTION # #
+CF_COMPACTION # #
+MEMORY_STATS # #
+BG_THREADS # #
+BG_THREADS # #
+SELECT * FROM INFORMATION_SCHEMA.ROCKSDB_CFSTATS;
+CF_NAME STAT_TYPE VALUE
+__system__ NUM_IMMUTABLE_MEM_TABLE #
+__system__ MEM_TABLE_FLUSH_PENDING #
+__system__ COMPACTION_PENDING #
+__system__ CUR_SIZE_ACTIVE_MEM_TABLE #
+__system__ CUR_SIZE_ALL_MEM_TABLES #
+__system__ NUM_ENTRIES_ACTIVE_MEM_TABLE #
+__system__ NUM_ENTRIES_IMM_MEM_TABLES #
+__system__ NON_BLOCK_CACHE_SST_MEM_USAGE #
+__system__ NUM_LIVE_VERSIONS #
+cf_t1 NUM_IMMUTABLE_MEM_TABLE #
+cf_t1 MEM_TABLE_FLUSH_PENDING #
+cf_t1 COMPACTION_PENDING #
+cf_t1 CUR_SIZE_ACTIVE_MEM_TABLE #
+cf_t1 CUR_SIZE_ALL_MEM_TABLES #
+cf_t1 NUM_ENTRIES_ACTIVE_MEM_TABLE #
+cf_t1 NUM_ENTRIES_IMM_MEM_TABLES #
+cf_t1 NON_BLOCK_CACHE_SST_MEM_USAGE #
+cf_t1 NUM_LIVE_VERSIONS #
+default NUM_IMMUTABLE_MEM_TABLE #
+default MEM_TABLE_FLUSH_PENDING #
+default COMPACTION_PENDING #
+default CUR_SIZE_ACTIVE_MEM_TABLE #
+default CUR_SIZE_ALL_MEM_TABLES #
+default NUM_ENTRIES_ACTIVE_MEM_TABLE #
+default NUM_ENTRIES_IMM_MEM_TABLES #
+default NON_BLOCK_CACHE_SST_MEM_USAGE #
+default NUM_LIVE_VERSIONS #
+rev:cf_t2 NUM_IMMUTABLE_MEM_TABLE #
+rev:cf_t2 MEM_TABLE_FLUSH_PENDING #
+rev:cf_t2 COMPACTION_PENDING #
+rev:cf_t2 CUR_SIZE_ACTIVE_MEM_TABLE #
+rev:cf_t2 CUR_SIZE_ALL_MEM_TABLES #
+rev:cf_t2 NUM_ENTRIES_ACTIVE_MEM_TABLE #
+rev:cf_t2 NUM_ENTRIES_IMM_MEM_TABLES #
+rev:cf_t2 NON_BLOCK_CACHE_SST_MEM_USAGE #
+rev:cf_t2 NUM_LIVE_VERSIONS #
+SELECT * FROM INFORMATION_SCHEMA.ROCKSDB_DBSTATS;
+STAT_TYPE VALUE
+DB_BACKGROUND_ERRORS #
+DB_NUM_SNAPSHOTS #
+DB_OLDEST_SNAPSHOT_TIME #
+DB_BLOCK_CACHE_USAGE #
+SELECT TABLE_SCHEMA, TABLE_NAME, PARTITION_NAME, COUNT(STAT_TYPE)
+FROM INFORMATION_SCHEMA.ROCKSDB_PERF_CONTEXT
+WHERE TABLE_SCHEMA = 'test'
+GROUP BY TABLE_NAME, PARTITION_NAME;
+TABLE_SCHEMA TABLE_NAME PARTITION_NAME COUNT(STAT_TYPE)
+test t1 NULL 57
+test t2 NULL 57
+test t4 p0 57
+test t4 p1 57
+test t4 p2 57
+test t4 p3 57
+SELECT * FROM INFORMATION_SCHEMA.ROCKSDB_CF_OPTIONS;
+CF_NAME OPTION_TYPE VALUE
+__system__ COMPARATOR #
+__system__ MERGE_OPERATOR #
+__system__ COMPACTION_FILTER #
+__system__ COMPACTION_FILTER_FACTORY #
+__system__ WRITE_BUFFER_SIZE #
+__system__ MAX_WRITE_BUFFER_NUMBER #
+__system__ MIN_WRITE_BUFFER_NUMBER_TO_MERGE #
+__system__ NUM_LEVELS #
+__system__ LEVEL0_FILE_NUM_COMPACTION_TRIGGER #
+__system__ LEVEL0_SLOWDOWN_WRITES_TRIGGER #
+__system__ LEVEL0_STOP_WRITES_TRIGGER #
+__system__ MAX_MEM_COMPACTION_LEVEL #
+__system__ TARGET_FILE_SIZE_BASE #
+__system__ TARGET_FILE_SIZE_MULTIPLIER #
+__system__ MAX_BYTES_FOR_LEVEL_BASE #
+__system__ LEVEL_COMPACTION_DYNAMIC_LEVEL_BYTES #
+__system__ MAX_BYTES_FOR_LEVEL_MULTIPLIER #
+__system__ SOFT_RATE_LIMIT #
+__system__ HARD_RATE_LIMIT #
+__system__ RATE_LIMIT_DELAY_MAX_MILLISECONDS #
+__system__ ARENA_BLOCK_SIZE #
+__system__ DISABLE_AUTO_COMPACTIONS #
+__system__ PURGE_REDUNDANT_KVS_WHILE_FLUSH #
+__system__ MAX_SEQUENTIAL_SKIP_IN_ITERATIONS #
+__system__ MEMTABLE_FACTORY #
+__system__ INPLACE_UPDATE_SUPPORT #
+__system__ INPLACE_UPDATE_NUM_LOCKS #
+__system__ MEMTABLE_PREFIX_BLOOM_BITS_RATIO #
+__system__ MEMTABLE_PREFIX_BLOOM_HUGE_PAGE_TLB_SIZE #
+__system__ BLOOM_LOCALITY #
+__system__ MAX_SUCCESSIVE_MERGES #
+__system__ OPTIMIZE_FILTERS_FOR_HITS #
+__system__ MAX_BYTES_FOR_LEVEL_MULTIPLIER_ADDITIONAL #
+__system__ COMPRESSION_TYPE #
+__system__ COMPRESSION_PER_LEVEL #
+__system__ COMPRESSION_OPTS #
+__system__ BOTTOMMOST_COMPRESSION #
+__system__ PREFIX_EXTRACTOR #
+__system__ COMPACTION_STYLE #
+__system__ COMPACTION_OPTIONS_UNIVERSAL #
+__system__ COMPACTION_OPTION_FIFO::MAX_TABLE_FILES_SIZE #
+__system__ TABLE_FACTORY::FLUSH_BLOCK_POLICY_FACTORY #
+__system__ TABLE_FACTORY::CACHE_INDEX_AND_FILTER_BLOCKS #
+__system__ TABLE_FACTORY::CACHE_INDEX_AND_FILTER_BLOCKS_WITH_HIGH_PRIORITY #
+__system__ TABLE_FACTORY::PIN_L0_FILTER_AND_INDEX_BLOCKS_IN_CACHE #
+__system__ TABLE_FACTORY::PIN_TOP_LEVEL_INDEX_AND_FILTER #
+__system__ TABLE_FACTORY::INDEX_TYPE #
+__system__ TABLE_FACTORY::DATA_BLOCK_INDEX_TYPE #
+__system__ TABLE_FACTORY::INDEX_SHORTENING #
+__system__ TABLE_FACTORY::DATA_BLOCK_HASH_TABLE_UTIL_RATIO #
+__system__ TABLE_FACTORY::HASH_INDEX_ALLOW_COLLISION #
+__system__ TABLE_FACTORY::CHECKSUM #
+__system__ TABLE_FACTORY::NO_BLOCK_CACHE #
+__system__ TABLE_FACTORY::BLOCK_CACHE #
+__system__ TABLE_FACTORY::BLOCK_CACHE_NAME #
+__system__ TABLE_FACTORY::BLOCK_CACHE_OPTIONS #
+__system__ TABLE_FACTORY::CAPACITY #
+__system__ TABLE_FACTORY::NUM_SHARD_BITS #
+__system__ TABLE_FACTORY::STRICT_CAPACITY_LIMIT #
+__system__ TABLE_FACTORY::MEMORY_ALLOCATOR #
+__system__ TABLE_FACTORY::HIGH_PRI_POOL_RATIO #
+__system__ TABLE_FACTORY::BLOCK_CACHE_COMPRESSED #
+__system__ TABLE_FACTORY::PERSISTENT_CACHE #
+__system__ TABLE_FACTORY::BLOCK_SIZE #
+__system__ TABLE_FACTORY::BLOCK_SIZE_DEVIATION #
+__system__ TABLE_FACTORY::BLOCK_RESTART_INTERVAL #
+__system__ TABLE_FACTORY::INDEX_BLOCK_RESTART_INTERVAL #
+__system__ TABLE_FACTORY::METADATA_BLOCK_SIZE #
+__system__ TABLE_FACTORY::PARTITION_FILTERS #
+__system__ TABLE_FACTORY::USE_DELTA_ENCODING #
+__system__ TABLE_FACTORY::FILTER_POLICY #
+__system__ TABLE_FACTORY::WHOLE_KEY_FILTERING #
+__system__ TABLE_FACTORY::VERIFY_COMPRESSION #
+__system__ TABLE_FACTORY::READ_AMP_BYTES_PER_BIT #
+__system__ TABLE_FACTORY::FORMAT_VERSION #
+__system__ TABLE_FACTORY::ENABLE_INDEX_COMPRESSION #
+__system__ TABLE_FACTORY::BLOCK_ALIGN #
+cf_t1 COMPARATOR #
+cf_t1 MERGE_OPERATOR #
+cf_t1 COMPACTION_FILTER #
+cf_t1 COMPACTION_FILTER_FACTORY #
+cf_t1 WRITE_BUFFER_SIZE #
+cf_t1 MAX_WRITE_BUFFER_NUMBER #
+cf_t1 MIN_WRITE_BUFFER_NUMBER_TO_MERGE #
+cf_t1 NUM_LEVELS #
+cf_t1 LEVEL0_FILE_NUM_COMPACTION_TRIGGER #
+cf_t1 LEVEL0_SLOWDOWN_WRITES_TRIGGER #
+cf_t1 LEVEL0_STOP_WRITES_TRIGGER #
+cf_t1 MAX_MEM_COMPACTION_LEVEL #
+cf_t1 TARGET_FILE_SIZE_BASE #
+cf_t1 TARGET_FILE_SIZE_MULTIPLIER #
+cf_t1 MAX_BYTES_FOR_LEVEL_BASE #
+cf_t1 LEVEL_COMPACTION_DYNAMIC_LEVEL_BYTES #
+cf_t1 MAX_BYTES_FOR_LEVEL_MULTIPLIER #
+cf_t1 SOFT_RATE_LIMIT #
+cf_t1 HARD_RATE_LIMIT #
+cf_t1 RATE_LIMIT_DELAY_MAX_MILLISECONDS #
+cf_t1 ARENA_BLOCK_SIZE #
+cf_t1 DISABLE_AUTO_COMPACTIONS #
+cf_t1 PURGE_REDUNDANT_KVS_WHILE_FLUSH #
+cf_t1 MAX_SEQUENTIAL_SKIP_IN_ITERATIONS #
+cf_t1 MEMTABLE_FACTORY #
+cf_t1 INPLACE_UPDATE_SUPPORT #
+cf_t1 INPLACE_UPDATE_NUM_LOCKS #
+cf_t1 MEMTABLE_PREFIX_BLOOM_BITS_RATIO #
+cf_t1 MEMTABLE_PREFIX_BLOOM_HUGE_PAGE_TLB_SIZE #
+cf_t1 BLOOM_LOCALITY #
+cf_t1 MAX_SUCCESSIVE_MERGES #
+cf_t1 OPTIMIZE_FILTERS_FOR_HITS #
+cf_t1 MAX_BYTES_FOR_LEVEL_MULTIPLIER_ADDITIONAL #
+cf_t1 COMPRESSION_TYPE #
+cf_t1 COMPRESSION_PER_LEVEL #
+cf_t1 COMPRESSION_OPTS #
+cf_t1 BOTTOMMOST_COMPRESSION #
+cf_t1 PREFIX_EXTRACTOR #
+cf_t1 COMPACTION_STYLE #
+cf_t1 COMPACTION_OPTIONS_UNIVERSAL #
+cf_t1 COMPACTION_OPTION_FIFO::MAX_TABLE_FILES_SIZE #
+cf_t1 TABLE_FACTORY::FLUSH_BLOCK_POLICY_FACTORY #
+cf_t1 TABLE_FACTORY::CACHE_INDEX_AND_FILTER_BLOCKS #
+cf_t1 TABLE_FACTORY::CACHE_INDEX_AND_FILTER_BLOCKS_WITH_HIGH_PRIORITY #
+cf_t1 TABLE_FACTORY::PIN_L0_FILTER_AND_INDEX_BLOCKS_IN_CACHE #
+cf_t1 TABLE_FACTORY::PIN_TOP_LEVEL_INDEX_AND_FILTER #
+cf_t1 TABLE_FACTORY::INDEX_TYPE #
+cf_t1 TABLE_FACTORY::DATA_BLOCK_INDEX_TYPE #
+cf_t1 TABLE_FACTORY::INDEX_SHORTENING #
+cf_t1 TABLE_FACTORY::DATA_BLOCK_HASH_TABLE_UTIL_RATIO #
+cf_t1 TABLE_FACTORY::HASH_INDEX_ALLOW_COLLISION #
+cf_t1 TABLE_FACTORY::CHECKSUM #
+cf_t1 TABLE_FACTORY::NO_BLOCK_CACHE #
+cf_t1 TABLE_FACTORY::BLOCK_CACHE #
+cf_t1 TABLE_FACTORY::BLOCK_CACHE_NAME #
+cf_t1 TABLE_FACTORY::BLOCK_CACHE_OPTIONS #
+cf_t1 TABLE_FACTORY::CAPACITY #
+cf_t1 TABLE_FACTORY::NUM_SHARD_BITS #
+cf_t1 TABLE_FACTORY::STRICT_CAPACITY_LIMIT #
+cf_t1 TABLE_FACTORY::MEMORY_ALLOCATOR #
+cf_t1 TABLE_FACTORY::HIGH_PRI_POOL_RATIO #
+cf_t1 TABLE_FACTORY::BLOCK_CACHE_COMPRESSED #
+cf_t1 TABLE_FACTORY::PERSISTENT_CACHE #
+cf_t1 TABLE_FACTORY::BLOCK_SIZE #
+cf_t1 TABLE_FACTORY::BLOCK_SIZE_DEVIATION #
+cf_t1 TABLE_FACTORY::BLOCK_RESTART_INTERVAL #
+cf_t1 TABLE_FACTORY::INDEX_BLOCK_RESTART_INTERVAL #
+cf_t1 TABLE_FACTORY::METADATA_BLOCK_SIZE #
+cf_t1 TABLE_FACTORY::PARTITION_FILTERS #
+cf_t1 TABLE_FACTORY::USE_DELTA_ENCODING #
+cf_t1 TABLE_FACTORY::FILTER_POLICY #
+cf_t1 TABLE_FACTORY::WHOLE_KEY_FILTERING #
+cf_t1 TABLE_FACTORY::VERIFY_COMPRESSION #
+cf_t1 TABLE_FACTORY::READ_AMP_BYTES_PER_BIT #
+cf_t1 TABLE_FACTORY::FORMAT_VERSION #
+cf_t1 TABLE_FACTORY::ENABLE_INDEX_COMPRESSION #
+cf_t1 TABLE_FACTORY::BLOCK_ALIGN #
+default COMPARATOR #
+default MERGE_OPERATOR #
+default COMPACTION_FILTER #
+default COMPACTION_FILTER_FACTORY #
+default WRITE_BUFFER_SIZE #
+default MAX_WRITE_BUFFER_NUMBER #
+default MIN_WRITE_BUFFER_NUMBER_TO_MERGE #
+default NUM_LEVELS #
+default LEVEL0_FILE_NUM_COMPACTION_TRIGGER #
+default LEVEL0_SLOWDOWN_WRITES_TRIGGER #
+default LEVEL0_STOP_WRITES_TRIGGER #
+default MAX_MEM_COMPACTION_LEVEL #
+default TARGET_FILE_SIZE_BASE #
+default TARGET_FILE_SIZE_MULTIPLIER #
+default MAX_BYTES_FOR_LEVEL_BASE #
+default LEVEL_COMPACTION_DYNAMIC_LEVEL_BYTES #
+default MAX_BYTES_FOR_LEVEL_MULTIPLIER #
+default SOFT_RATE_LIMIT #
+default HARD_RATE_LIMIT #
+default RATE_LIMIT_DELAY_MAX_MILLISECONDS #
+default ARENA_BLOCK_SIZE #
+default DISABLE_AUTO_COMPACTIONS #
+default PURGE_REDUNDANT_KVS_WHILE_FLUSH #
+default MAX_SEQUENTIAL_SKIP_IN_ITERATIONS #
+default MEMTABLE_FACTORY #
+default INPLACE_UPDATE_SUPPORT #
+default INPLACE_UPDATE_NUM_LOCKS #
+default MEMTABLE_PREFIX_BLOOM_BITS_RATIO #
+default MEMTABLE_PREFIX_BLOOM_HUGE_PAGE_TLB_SIZE #
+default BLOOM_LOCALITY #
+default MAX_SUCCESSIVE_MERGES #
+default OPTIMIZE_FILTERS_FOR_HITS #
+default MAX_BYTES_FOR_LEVEL_MULTIPLIER_ADDITIONAL #
+default COMPRESSION_TYPE #
+default COMPRESSION_PER_LEVEL #
+default COMPRESSION_OPTS #
+default BOTTOMMOST_COMPRESSION #
+default PREFIX_EXTRACTOR #
+default COMPACTION_STYLE #
+default COMPACTION_OPTIONS_UNIVERSAL #
+default COMPACTION_OPTION_FIFO::MAX_TABLE_FILES_SIZE #
+default TABLE_FACTORY::FLUSH_BLOCK_POLICY_FACTORY #
+default TABLE_FACTORY::CACHE_INDEX_AND_FILTER_BLOCKS #
+default TABLE_FACTORY::CACHE_INDEX_AND_FILTER_BLOCKS_WITH_HIGH_PRIORITY #
+default TABLE_FACTORY::PIN_L0_FILTER_AND_INDEX_BLOCKS_IN_CACHE #
+default TABLE_FACTORY::PIN_TOP_LEVEL_INDEX_AND_FILTER #
+default TABLE_FACTORY::INDEX_TYPE #
+default TABLE_FACTORY::DATA_BLOCK_INDEX_TYPE #
+default TABLE_FACTORY::INDEX_SHORTENING #
+default TABLE_FACTORY::DATA_BLOCK_HASH_TABLE_UTIL_RATIO #
+default TABLE_FACTORY::HASH_INDEX_ALLOW_COLLISION #
+default TABLE_FACTORY::CHECKSUM #
+default TABLE_FACTORY::NO_BLOCK_CACHE #
+default TABLE_FACTORY::BLOCK_CACHE #
+default TABLE_FACTORY::BLOCK_CACHE_NAME #
+default TABLE_FACTORY::BLOCK_CACHE_OPTIONS #
+default TABLE_FACTORY::CAPACITY #
+default TABLE_FACTORY::NUM_SHARD_BITS #
+default TABLE_FACTORY::STRICT_CAPACITY_LIMIT #
+default TABLE_FACTORY::MEMORY_ALLOCATOR #
+default TABLE_FACTORY::HIGH_PRI_POOL_RATIO #
+default TABLE_FACTORY::BLOCK_CACHE_COMPRESSED #
+default TABLE_FACTORY::PERSISTENT_CACHE #
+default TABLE_FACTORY::BLOCK_SIZE #
+default TABLE_FACTORY::BLOCK_SIZE_DEVIATION #
+default TABLE_FACTORY::BLOCK_RESTART_INTERVAL #
+default TABLE_FACTORY::INDEX_BLOCK_RESTART_INTERVAL #
+default TABLE_FACTORY::METADATA_BLOCK_SIZE #
+default TABLE_FACTORY::PARTITION_FILTERS #
+default TABLE_FACTORY::USE_DELTA_ENCODING #
+default TABLE_FACTORY::FILTER_POLICY #
+default TABLE_FACTORY::WHOLE_KEY_FILTERING #
+default TABLE_FACTORY::VERIFY_COMPRESSION #
+default TABLE_FACTORY::READ_AMP_BYTES_PER_BIT #
+default TABLE_FACTORY::FORMAT_VERSION #
+default TABLE_FACTORY::ENABLE_INDEX_COMPRESSION #
+default TABLE_FACTORY::BLOCK_ALIGN #
+rev:cf_t2 COMPARATOR #
+rev:cf_t2 MERGE_OPERATOR #
+rev:cf_t2 COMPACTION_FILTER #
+rev:cf_t2 COMPACTION_FILTER_FACTORY #
+rev:cf_t2 WRITE_BUFFER_SIZE #
+rev:cf_t2 MAX_WRITE_BUFFER_NUMBER #
+rev:cf_t2 MIN_WRITE_BUFFER_NUMBER_TO_MERGE #
+rev:cf_t2 NUM_LEVELS #
+rev:cf_t2 LEVEL0_FILE_NUM_COMPACTION_TRIGGER #
+rev:cf_t2 LEVEL0_SLOWDOWN_WRITES_TRIGGER #
+rev:cf_t2 LEVEL0_STOP_WRITES_TRIGGER #
+rev:cf_t2 MAX_MEM_COMPACTION_LEVEL #
+rev:cf_t2 TARGET_FILE_SIZE_BASE #
+rev:cf_t2 TARGET_FILE_SIZE_MULTIPLIER #
+rev:cf_t2 MAX_BYTES_FOR_LEVEL_BASE #
+rev:cf_t2 LEVEL_COMPACTION_DYNAMIC_LEVEL_BYTES #
+rev:cf_t2 MAX_BYTES_FOR_LEVEL_MULTIPLIER #
+rev:cf_t2 SOFT_RATE_LIMIT #
+rev:cf_t2 HARD_RATE_LIMIT #
+rev:cf_t2 RATE_LIMIT_DELAY_MAX_MILLISECONDS #
+rev:cf_t2 ARENA_BLOCK_SIZE #
+rev:cf_t2 DISABLE_AUTO_COMPACTIONS #
+rev:cf_t2 PURGE_REDUNDANT_KVS_WHILE_FLUSH #
+rev:cf_t2 MAX_SEQUENTIAL_SKIP_IN_ITERATIONS #
+rev:cf_t2 MEMTABLE_FACTORY #
+rev:cf_t2 INPLACE_UPDATE_SUPPORT #
+rev:cf_t2 INPLACE_UPDATE_NUM_LOCKS #
+rev:cf_t2 MEMTABLE_PREFIX_BLOOM_BITS_RATIO #
+rev:cf_t2 MEMTABLE_PREFIX_BLOOM_HUGE_PAGE_TLB_SIZE #
+rev:cf_t2 BLOOM_LOCALITY #
+rev:cf_t2 MAX_SUCCESSIVE_MERGES #
+rev:cf_t2 OPTIMIZE_FILTERS_FOR_HITS #
+rev:cf_t2 MAX_BYTES_FOR_LEVEL_MULTIPLIER_ADDITIONAL #
+rev:cf_t2 COMPRESSION_TYPE #
+rev:cf_t2 COMPRESSION_PER_LEVEL #
+rev:cf_t2 COMPRESSION_OPTS #
+rev:cf_t2 BOTTOMMOST_COMPRESSION #
+rev:cf_t2 PREFIX_EXTRACTOR #
+rev:cf_t2 COMPACTION_STYLE #
+rev:cf_t2 COMPACTION_OPTIONS_UNIVERSAL #
+rev:cf_t2 COMPACTION_OPTION_FIFO::MAX_TABLE_FILES_SIZE #
+rev:cf_t2 TABLE_FACTORY::FLUSH_BLOCK_POLICY_FACTORY #
+rev:cf_t2 TABLE_FACTORY::CACHE_INDEX_AND_FILTER_BLOCKS #
+rev:cf_t2 TABLE_FACTORY::CACHE_INDEX_AND_FILTER_BLOCKS_WITH_HIGH_PRIORITY #
+rev:cf_t2 TABLE_FACTORY::PIN_L0_FILTER_AND_INDEX_BLOCKS_IN_CACHE #
+rev:cf_t2 TABLE_FACTORY::PIN_TOP_LEVEL_INDEX_AND_FILTER #
+rev:cf_t2 TABLE_FACTORY::INDEX_TYPE #
+rev:cf_t2 TABLE_FACTORY::DATA_BLOCK_INDEX_TYPE #
+rev:cf_t2 TABLE_FACTORY::INDEX_SHORTENING #
+rev:cf_t2 TABLE_FACTORY::DATA_BLOCK_HASH_TABLE_UTIL_RATIO #
+rev:cf_t2 TABLE_FACTORY::HASH_INDEX_ALLOW_COLLISION #
+rev:cf_t2 TABLE_FACTORY::CHECKSUM #
+rev:cf_t2 TABLE_FACTORY::NO_BLOCK_CACHE #
+rev:cf_t2 TABLE_FACTORY::BLOCK_CACHE #
+rev:cf_t2 TABLE_FACTORY::BLOCK_CACHE_NAME #
+rev:cf_t2 TABLE_FACTORY::BLOCK_CACHE_OPTIONS #
+rev:cf_t2 TABLE_FACTORY::CAPACITY #
+rev:cf_t2 TABLE_FACTORY::NUM_SHARD_BITS #
+rev:cf_t2 TABLE_FACTORY::STRICT_CAPACITY_LIMIT #
+rev:cf_t2 TABLE_FACTORY::MEMORY_ALLOCATOR #
+rev:cf_t2 TABLE_FACTORY::HIGH_PRI_POOL_RATIO #
+rev:cf_t2 TABLE_FACTORY::BLOCK_CACHE_COMPRESSED #
+rev:cf_t2 TABLE_FACTORY::PERSISTENT_CACHE #
+rev:cf_t2 TABLE_FACTORY::BLOCK_SIZE #
+rev:cf_t2 TABLE_FACTORY::BLOCK_SIZE_DEVIATION #
+rev:cf_t2 TABLE_FACTORY::BLOCK_RESTART_INTERVAL #
+rev:cf_t2 TABLE_FACTORY::INDEX_BLOCK_RESTART_INTERVAL #
+rev:cf_t2 TABLE_FACTORY::METADATA_BLOCK_SIZE #
+rev:cf_t2 TABLE_FACTORY::PARTITION_FILTERS #
+rev:cf_t2 TABLE_FACTORY::USE_DELTA_ENCODING #
+rev:cf_t2 TABLE_FACTORY::FILTER_POLICY #
+rev:cf_t2 TABLE_FACTORY::WHOLE_KEY_FILTERING #
+rev:cf_t2 TABLE_FACTORY::VERIFY_COMPRESSION #
+rev:cf_t2 TABLE_FACTORY::READ_AMP_BYTES_PER_BIT #
+rev:cf_t2 TABLE_FACTORY::FORMAT_VERSION #
+rev:cf_t2 TABLE_FACTORY::ENABLE_INDEX_COMPRESSION #
+rev:cf_t2 TABLE_FACTORY::BLOCK_ALIGN #
+DROP TABLE t1;
+DROP TABLE t2;
+DROP TABLE t3;
+DROP TABLE t4;
+SHOW ENGINE rocksdb MUTEX;
+Type Name Status
+SHOW ENGINE ALL MUTEX;
+SHOW ENGINE rocksdb TRANSACTION STATUS;
+Type Name Status
+rocksdb
+============================================================
+TIMESTAMP ROCKSDB TRANSACTION MONITOR OUTPUT
+============================================================
+---------
+SNAPSHOTS
+---------
+LIST OF SNAPSHOTS FOR EACH SESSION:
+----------LATEST DETECTED DEADLOCKS----------
+-----------------------------------------
+END OF ROCKSDB TRANSACTION MONITOR OUTPUT
+=========================================
+
+START TRANSACTION WITH CONSISTENT SNAPSHOT;
+SHOW ENGINE rocksdb TRANSACTION STATUS;
+Type Name Status
+rocksdb
+============================================================
+TIMESTAMP ROCKSDB TRANSACTION MONITOR OUTPUT
+============================================================
+---------
+SNAPSHOTS
+---------
+LIST OF SNAPSHOTS FOR EACH SESSION:
+---SNAPSHOT, ACTIVE NUM sec
+MySQL thread id TID, OS thread handle PTR, query id QID localhost root ACTION
+SHOW ENGINE rocksdb TRANSACTION STATUS
+lock count 0, write count 0
+insert count 0, update count 0, delete count 0
+----------LATEST DETECTED DEADLOCKS----------
+-----------------------------------------
+END OF ROCKSDB TRANSACTION MONITOR OUTPUT
+=========================================
+
+ROLLBACK;
+START TRANSACTION WITH SHARED ROCKSDB SNAPSHOT;
+File Position Gtid_executed Snapshot_ID
+ 0 1
+SHOW ENGINE rocksdb STATUS;
+Type Name Status
+STATISTICS # #
+DBSTATS # #
+CF_COMPACTION # #
+CF_COMPACTION # #
+CF_COMPACTION # #
+CF_COMPACTION # #
+MEMORY_STATS # #
+BG_THREADS # #
+BG_THREADS # #
+EXPLICIT_SNAPSHOTS # #
+ROLLBACK;
+CREATE EXPLICIT rocksdb SNAPSHOT;
+File Position Gtid_executed Snapshot_ID
+ 0 2
+SHOW ENGINE rocksdb STATUS;
+Type Name Status
+STATISTICS # #
+DBSTATS # #
+CF_COMPACTION # #
+CF_COMPACTION # #
+CF_COMPACTION # #
+CF_COMPACTION # #
+MEMORY_STATS # #
+BG_THREADS # #
+BG_THREADS # #
+EXPLICIT_SNAPSHOTS # #
+RELEASE EXPLICIT rocksdb SNAPSHOT;
+File Position Gtid_executed Snapshot_ID
+ 0 2
+SHOW ENGINE rocksdb STATUS;
+Type Name Status
+STATISTICS # #
+DBSTATS # #
+CF_COMPACTION # #
+CF_COMPACTION # #
+CF_COMPACTION # #
+CF_COMPACTION # #
+MEMORY_STATS # #
+BG_THREADS # #
+BG_THREADS # #
+SET GLOBAL rocksdb_max_background_jobs= @save.rocksdb_max_background_jobs;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/show_table_status.result b/storage/rocksdb/mysql-test/rocksdb/r/show_table_status.result
new file mode 100644
index 00000000000..29140f045e4
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/show_table_status.result
@@ -0,0 +1,24 @@
+DROP TABLE IF EXISTS t1, t2, t3;
+CREATE TABLE t1 (a INT, b CHAR(8) PRIMARY KEY) ENGINE=rocksdb;
+INSERT INTO t1 (a,b) VALUES (100,'a'),(2,'foo');
+CREATE TABLE t2 (a INT PRIMARY KEY, b CHAR(8)) ENGINE=rocksdb;
+INSERT INTO t2 (a,b) VALUES (1,'bar');
+set global rocksdb_force_flush_memtable_now = true;
+CREATE TABLE t3 (a INT, b CHAR(8), pk INT PRIMARY KEY) ENGINE=rocksdb CHARACTER SET utf8;
+SHOW TABLE STATUS WHERE name IN ( 't1', 't2', 't3' );
+Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
+t1 ROCKSDB 10 Fixed 1000 # # 0 0 0 NULL NULL NULL NULL latin1_swedish_ci NULL
+t2 ROCKSDB 10 Fixed 1000 # # 0 0 0 NULL NULL NULL NULL latin1_swedish_ci NULL
+t3 ROCKSDB 10 Fixed 1000 # # 0 0 0 NULL NULL NULL NULL utf8_general_ci NULL
+SHOW TABLE STATUS WHERE name LIKE 't2';
+Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
+t2 ROCKSDB 10 Fixed 1000 # # 0 0 0 NULL NULL NULL NULL latin1_swedish_ci NULL
+DROP TABLE t1, t2, t3;
+CREATE DATABASE `db_new..............................................end`;
+USE `db_new..............................................end`;
+CREATE TABLE `t1_new..............................................end`(a int) engine=rocksdb;
+INSERT INTO `t1_new..............................................end` VALUES (1);
+SELECT TABLE_SCHEMA, TABLE_NAME FROM information_schema.table_statistics WHERE TABLE_NAME = 't1_new..............................................end';
+TABLE_SCHEMA db_new..............................................end
+TABLE_NAME t1_new..............................................end
+DROP DATABASE `db_new..............................................end`;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/shutdown.result b/storage/rocksdb/mysql-test/rocksdb/r/shutdown.result
new file mode 100644
index 00000000000..f40aceffd79
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/shutdown.result
@@ -0,0 +1,9 @@
+DROP TABLE IF EXISTS t1;
+SHOW GLOBAL VARIABLES LIKE "log_bin";
+Variable_name Value
+log_bin ON
+CREATE TABLE t1 (i INT, PRIMARY KEY (i) COMMENT 'cf_t1') ENGINE = ROCKSDB;
+SELECT COUNT(*) FROM t1;
+COUNT(*)
+1000
+DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/singledelete.result b/storage/rocksdb/mysql-test/rocksdb/r/singledelete.result
new file mode 100644
index 00000000000..da29f325410
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/singledelete.result
@@ -0,0 +1,86 @@
+CREATE TABLE t1 (id INT, value int, PRIMARY KEY (id), INDEX (value)) ENGINE=RocksDB;
+INSERT INTO t1 VALUES (1,1);
+select variable_value into @s from information_schema.global_status where variable_name='rocksdb_number_sst_entry_singledelete';
+select variable_value into @d from information_schema.global_status where variable_name='rocksdb_number_sst_entry_delete';
+optimize table t1;
+Table Op Msg_type Msg_text
+test.t1 optimize status OK
+select case when variable_value-@s > 5 and variable_value-@s < 100 then 'true' else 'false' end from information_schema.global_status where variable_name='rocksdb_number_sst_entry_singledelete';
+case when variable_value-@s > 5 and variable_value-@s < 100 then 'true' else 'false' end
+true
+select case when variable_value-@d < 10 then 'true' else 'false' end from information_schema.global_status where variable_name='rocksdb_number_sst_entry_delete';
+case when variable_value-@d < 10 then 'true' else 'false' end
+true
+CREATE TABLE t2 (id INT, value int, PRIMARY KEY (id), INDEX (value)) ENGINE=RocksDB;
+INSERT INTO t2 VALUES (1,1);
+select variable_value into @s from information_schema.global_status where variable_name='rocksdb_number_sst_entry_singledelete';
+select variable_value into @d from information_schema.global_status where variable_name='rocksdb_number_sst_entry_delete';
+optimize table t2;
+Table Op Msg_type Msg_text
+test.t2 optimize status OK
+select case when variable_value-@s > 5 and variable_value-@s < 100 then 'true' else 'false' end from information_schema.global_status where variable_name='rocksdb_number_sst_entry_singledelete';
+case when variable_value-@s > 5 and variable_value-@s < 100 then 'true' else 'false' end
+true
+select case when variable_value-@d > 9000 then 'true' else 'false' end from information_schema.global_status where variable_name='rocksdb_number_sst_entry_delete';
+case when variable_value-@d > 9000 then 'true' else 'false' end
+true
+CREATE TABLE t3 (id INT, value int, PRIMARY KEY (id)) ENGINE=RocksDB;
+INSERT INTO t3 VALUES (1,1);
+select variable_value into @s from information_schema.global_status where variable_name='rocksdb_number_sst_entry_singledelete';
+select variable_value into @d from information_schema.global_status where variable_name='rocksdb_number_sst_entry_delete';
+optimize table t3;
+Table Op Msg_type Msg_text
+test.t3 optimize status OK
+select case when variable_value-@s = 0 then 'true' else 'false' end from information_schema.global_status where variable_name='rocksdb_number_sst_entry_singledelete';
+case when variable_value-@s = 0 then 'true' else 'false' end
+true
+select case when variable_value-@d > 9000 then 'true' else 'false' end from information_schema.global_status where variable_name='rocksdb_number_sst_entry_delete';
+case when variable_value-@d > 9000 then 'true' else 'false' end
+true
+CREATE TABLE t4 (id INT, PRIMARY KEY (id)) ENGINE=RocksDB;
+INSERT INTO t4 VALUES (1);
+select variable_value into @s from information_schema.global_status where variable_name='rocksdb_number_sst_entry_singledelete';
+select variable_value into @d from information_schema.global_status where variable_name='rocksdb_number_sst_entry_delete';
+optimize table t4;
+Table Op Msg_type Msg_text
+test.t4 optimize status OK
+select case when variable_value-@s > 5 and variable_value-@s < 100 then 'true' else 'false' end from information_schema.global_status where variable_name='rocksdb_number_sst_entry_singledelete';
+case when variable_value-@s > 5 and variable_value-@s < 100 then 'true' else 'false' end
+true
+select case when variable_value-@d < 10 then 'true' else 'false' end from information_schema.global_status where variable_name='rocksdb_number_sst_entry_delete';
+case when variable_value-@d < 10 then 'true' else 'false' end
+true
+CREATE TABLE t5 (id1 INT, id2 INT, PRIMARY KEY (id1, id2), INDEX(id2)) ENGINE=RocksDB;
+INSERT INTO t5 VALUES (1, 1);
+select variable_value into @s from information_schema.global_status where variable_name='rocksdb_number_sst_entry_singledelete';
+select variable_value into @d from information_schema.global_status where variable_name='rocksdb_number_sst_entry_delete';
+optimize table t5;
+Table Op Msg_type Msg_text
+test.t5 optimize status OK
+select case when variable_value-@s > 5 and variable_value-@s < 100 then 'true' else 'false' end from information_schema.global_status where variable_name='rocksdb_number_sst_entry_singledelete';
+case when variable_value-@s > 5 and variable_value-@s < 100 then 'true' else 'false' end
+true
+select case when variable_value-@d < 10 then 'true' else 'false' end from information_schema.global_status where variable_name='rocksdb_number_sst_entry_delete';
+case when variable_value-@d < 10 then 'true' else 'false' end
+true
+CREATE TABLE t6 (
+pk VARCHAR(64) COLLATE latin1_swedish_ci PRIMARY KEY
+) ENGINE=RocksDB;
+INSERT INTO t6 VALUES ('a');
+SET GLOBAL rocksdb_force_flush_memtable_now=1;
+SELECT * FROM t6;
+pk
+a
+UPDATE t6 SET pk='A' WHERE pk='a';
+SELECT * FROM t6;
+pk
+A
+DELETE FROM t6 where pk='A';
+SELECT should return nothing;
+SELECT * FROM t6;
+pk
+SET GLOBAL rocksdb_force_flush_memtable_now=1;
+SELECT should return nothing;
+SELECT * FROM t6;
+pk
+DROP TABLE t1, t2, t3, t4, t5, t6;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/skip_core_dump_on_error.result b/storage/rocksdb/mysql-test/rocksdb/r/skip_core_dump_on_error.result
new file mode 100644
index 00000000000..60d9f69a398
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/skip_core_dump_on_error.result
@@ -0,0 +1,31 @@
+create table mz(c int);
+affected rows: 0
+insert into mz values(1);
+affected rows: 1
+commit;
+affected rows: 0
+SET debug= '+d,abort_with_io_write_error';
+affected rows: 0
+set global binlog_error_action=1;
+affected rows: 0
+show session variables like 'debug';
+Variable_name Value
+debug d,abort_with_io_write_error
+affected rows: 1
+show global variables like 'binlog_error_action';
+Variable_name Value
+binlog_error_action ABORT_SERVER
+affected rows: 1
+show global variables like 'skip_core_dump_on_error';
+Variable_name Value
+skip_core_dump_on_error ON
+affected rows: 1
+# crash_during_update
+update mz set c=13;
+ERROR HY000: Binary logging not possible. Message: An error occurred during sync stage of the commit. 'binlog_error_action' is set to 'ABORT_SERVER'. Hence aborting the server.
+# server aborted
+Pattern "mysqld got signal 6" found
+# but no core written
+Pattern "Writing a core file" not found
+drop table mz;
+affected rows: 0
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/skip_validate_tmp_table.result b/storage/rocksdb/mysql-test/rocksdb/r/skip_validate_tmp_table.result
new file mode 100644
index 00000000000..92906f22b1e
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/skip_validate_tmp_table.result
@@ -0,0 +1,20 @@
+create table t1 (pk int primary key) engine=rocksdb;
+show tables;
+Tables_in_test
+#mysql50#t1#sql-test
+t1
+call mtr.add_suppression('Invalid .old.. table or database name .t1#sql-test.');
+set session debug_dbug="+d,gen_sql_table_name";
+rename table t1 to t2;
+set session debug_dbug= "-d,gen_sql_table_name";
+show tables;
+Tables_in_test
+#mysql50#t1#sql-test
+t2
+show tables;
+Tables_in_test
+create table t2 (pk int primary key) engine=rocksdb;
+show tables;
+Tables_in_test
+t2
+drop table t2;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/slow_query_log.result b/storage/rocksdb/mysql-test/rocksdb/r/slow_query_log.result
new file mode 100644
index 00000000000..e8a11363dba
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/slow_query_log.result
@@ -0,0 +1,10 @@
+SET @cur_long_query_time = @@long_query_time;
+SET @@long_query_time = 600;
+DROP TABLE IF EXISTS t1;
+CREATE TABLE t1 (id INT PRIMARY KEY, value INT) ENGINE=ROCKSDB;
+SET @@long_query_time = 0;
+SELECT COUNT(*) FROM t1;
+COUNT(*)
+7500
+SET @@long_query_time = @cur_long_query_time;
+DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/statistics.result b/storage/rocksdb/mysql-test/rocksdb/r/statistics.result
new file mode 100644
index 00000000000..579c4adc11d
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/statistics.result
@@ -0,0 +1,69 @@
+DROP TABLE IF EXISTS t1;
+DROP TABLE IF EXISTS t2;
+DROP TABLE IF EXISTS t3;
+create table t1(
+id bigint not null primary key auto_increment,
+a varchar(255) not null,
+b bigint,
+index t1_1(b)
+) engine=rocksdb;
+create table t2(
+id bigint not null primary key auto_increment,
+a varchar(255) not null,
+b bigint,
+index t2_1(b) comment 'cf_t3'
+) engine=rocksdb;
+create table t3(
+id bigint not null primary key auto_increment,
+a varchar(255) not null,
+b bigint,
+index t3_1(b) comment 'rev:cf_t4'
+) engine=rocksdb;
+SELECT table_name, table_rows FROM information_schema.tables WHERE table_schema = DATABASE() and table_name <> 't1';
+table_name table_rows
+t2 1000
+t3 1000
+SELECT CASE WHEN table_rows < 100000 then 'true' else 'false' end from information_schema.tables where table_name = 't1';
+CASE WHEN table_rows < 100000 then 'true' else 'false' end
+true
+set global rocksdb_force_flush_memtable_now = true;
+SELECT table_name, table_rows FROM information_schema.tables WHERE table_schema = DATABASE();
+table_name table_rows
+t1 1000
+t2 1000
+t3 1000
+SELECT table_name, data_length>0, index_length>0 FROM information_schema.tables WHERE table_schema = DATABASE();
+table_name data_length>0 index_length>0
+t1 1 1
+t2 1 1
+t3 1 1
+SELECT table_name, table_rows FROM information_schema.tables WHERE table_schema = DATABASE();
+table_name table_rows
+t1 1000
+t2 1000
+t3 1000
+SELECT table_name, data_length>0, index_length>0 FROM information_schema.tables WHERE table_schema = DATABASE();
+table_name data_length>0 index_length>0
+t1 1 1
+t2 1 1
+t3 1 1
+analyze table t1,t2,t3,t4,t5;
+Table Op Msg_type Msg_text
+test.t1 analyze status OK
+test.t2 analyze status OK
+test.t3 analyze status OK
+test.t4 analyze Error Table 'test.t4' doesn't exist
+test.t4 analyze status Operation failed
+test.t5 analyze Error Table 'test.t5' doesn't exist
+test.t5 analyze status Operation failed
+SELECT table_name, table_rows FROM information_schema.tables WHERE table_schema = DATABASE();
+table_name table_rows
+t1 1000
+t2 1000
+t3 1000
+SELECT table_name, data_length>0, index_length>0 FROM information_schema.tables WHERE table_schema = DATABASE();
+table_name data_length>0 index_length>0
+t1 1 1
+t2 1 1
+t3 1 1
+drop table t1, t2, t3;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/table_stats.result b/storage/rocksdb/mysql-test/rocksdb/r/table_stats.result
new file mode 100644
index 00000000000..31cb1b6477b
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/table_stats.result
@@ -0,0 +1,12 @@
+DROP TABLE IF EXISTS t1;
+CREATE TABLE t1 (i INT, PRIMARY KEY (i) COMMENT 'cf_t1') ENGINE = ROCKSDB;
+SELECT COUNT(*) FROM t1;
+COUNT(*)
+1000
+SELECT * FROM INFORMATION_SCHEMA.TABLE_STATISTICS WHERE TABLE_NAME = "t1";
+TABLE_SCHEMA test
+TABLE_NAME t1
+ROWS_READ 1000
+ROWS_CHANGED 1000
+ROWS_CHANGED_X_INDEXES 1000
+DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/tbl_opt_ai.result b/storage/rocksdb/mysql-test/rocksdb/r/tbl_opt_ai.result
new file mode 100644
index 00000000000..7cc0cc7cd98
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/tbl_opt_ai.result
@@ -0,0 +1,38 @@
+DROP TABLE IF EXISTS t1;
+CREATE TABLE t1 (a INT AUTO_INCREMENT PRIMARY KEY) ENGINE=rocksdb AUTO_INCREMENT=10;
+SHOW CREATE TABLE t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `a` int(11) NOT NULL AUTO_INCREMENT,
+ PRIMARY KEY (`a`)
+) ENGINE=ROCKSDB AUTO_INCREMENT=10 DEFAULT CHARSET=latin1
+INSERT INTO t1 VALUES (NULL);
+SELECT * FROM t1;
+a
+10
+ALTER TABLE t1 AUTO_INCREMENT=100;
+SHOW CREATE TABLE t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `a` int(11) NOT NULL AUTO_INCREMENT,
+ PRIMARY KEY (`a`)
+) ENGINE=ROCKSDB AUTO_INCREMENT=100 DEFAULT CHARSET=latin1
+INSERT INTO t1 VALUES (NULL);
+SELECT * FROM t1 ORDER BY a;
+a
+10
+100
+ALTER TABLE t1 AUTO_INCREMENT=50;
+SHOW CREATE TABLE t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `a` int(11) NOT NULL AUTO_INCREMENT,
+ PRIMARY KEY (`a`)
+) ENGINE=ROCKSDB AUTO_INCREMENT=101 DEFAULT CHARSET=latin1
+INSERT INTO t1 VALUES (NULL);
+SELECT * FROM t1 ORDER BY a;
+a
+10
+100
+101
+DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/tbl_opt_avg_row_length.result b/storage/rocksdb/mysql-test/rocksdb/r/tbl_opt_avg_row_length.result
new file mode 100644
index 00000000000..f904c04e0fb
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/tbl_opt_avg_row_length.result
@@ -0,0 +1,18 @@
+DROP TABLE IF EXISTS t1;
+CREATE TABLE t1 (a INT PRIMARY KEY, b CHAR(8)) ENGINE=rocksdb AVG_ROW_LENGTH=300;
+SHOW CREATE TABLE t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `a` int(11) NOT NULL,
+ `b` char(8) DEFAULT NULL,
+ PRIMARY KEY (`a`)
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1 AVG_ROW_LENGTH=300
+ALTER TABLE t1 AVG_ROW_LENGTH=30000000;
+SHOW CREATE TABLE t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `a` int(11) NOT NULL,
+ `b` char(8) DEFAULT NULL,
+ PRIMARY KEY (`a`)
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1 AVG_ROW_LENGTH=30000000
+DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/tbl_opt_checksum.result b/storage/rocksdb/mysql-test/rocksdb/r/tbl_opt_checksum.result
new file mode 100644
index 00000000000..d9cc69ee2a1
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/tbl_opt_checksum.result
@@ -0,0 +1,18 @@
+DROP TABLE IF EXISTS t1;
+CREATE TABLE t1 (a INT PRIMARY KEY, b CHAR(8)) ENGINE=rocksdb CHECKSUM=1;
+SHOW CREATE TABLE t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `a` int(11) NOT NULL,
+ `b` char(8) DEFAULT NULL,
+ PRIMARY KEY (`a`)
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1 CHECKSUM=1
+ALTER TABLE t1 CHECKSUM=0;
+SHOW CREATE TABLE t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `a` int(11) NOT NULL,
+ `b` char(8) DEFAULT NULL,
+ PRIMARY KEY (`a`)
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/tbl_opt_connection.result b/storage/rocksdb/mysql-test/rocksdb/r/tbl_opt_connection.result
new file mode 100644
index 00000000000..0beddd9f6e3
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/tbl_opt_connection.result
@@ -0,0 +1,26 @@
+DROP TABLE IF EXISTS t1;
+CREATE DATABASE test_remote;
+CREATE SERVER test_connection FOREIGN DATA WRAPPER mysql
+OPTIONS (USER 'root', HOST 'localhost', DATABASE 'test_remote');
+CREATE SERVER test_connection2 FOREIGN DATA WRAPPER mysql
+OPTIONS (USER 'root', HOST 'localhost', DATABASE 'test_remote');
+CREATE TABLE t1 (a INT PRIMARY KEY, b CHAR(8)) ENGINE=rocksdb CONNECTION='test_connection';
+SHOW CREATE TABLE t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `a` int(11) NOT NULL,
+ `b` char(8) DEFAULT NULL,
+ PRIMARY KEY (`a`)
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1 CONNECTION='test_connection'
+ALTER TABLE t1 CONNECTION='test_connection2';
+SHOW CREATE TABLE t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `a` int(11) NOT NULL,
+ `b` char(8) DEFAULT NULL,
+ PRIMARY KEY (`a`)
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1 CONNECTION='test_connection2'
+DROP TABLE t1;
+DROP SERVER test_connection;
+DROP SERVER test_connection2;
+DROP DATABASE test_remote;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/tbl_opt_data_index_dir.result b/storage/rocksdb/mysql-test/rocksdb/r/tbl_opt_data_index_dir.result
new file mode 100644
index 00000000000..dcb66a2ab23
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/tbl_opt_data_index_dir.result
@@ -0,0 +1,41 @@
+DROP TABLE IF EXISTS t1;
+CREATE TABLE t1 (a INT PRIMARY KEY, b CHAR(8)) ENGINE=rocksdb DATA DIRECTORY = '/foo/bar/data';
+ERROR HY000: Can't create table `test`.`t1` (errno: 196 "Unknown error 196")
+show warnings;
+Level Code Message
+Error 1005 Can't create table `test`.`t1` (errno: 196 "Unknown error 196")
+Warning 1296 Got error 196 'Specifying DATA DIRECTORY for an individual table is not supported.' from ROCKSDB
+CREATE TABLE t1 (a INT PRIMARY KEY, b CHAR(8)) ENGINE=rocksdb INDEX DIRECTORY = '/foo/bar/index';
+ERROR HY000: Can't create table `test`.`t1` (errno: 197 "Unknown error 197")
+show warnings;
+Level Code Message
+Error 1005 Can't create table `test`.`t1` (errno: 197 "Unknown error 197")
+Warning 1296 Got error 197 'Specifying INDEX DIRECTORY for an individual table is not supported.' from ROCKSDB
+CREATE TABLE t1 (id INT NOT NULL PRIMARY KEY) ENGINE=rocksdb PARTITION BY RANGE (id)
+(
+PARTITION P0 VALUES LESS THAN (1000)
+DATA DIRECTORY = '/foo/bar/data/',
+PARTITION P1 VALUES LESS THAN (2000)
+DATA DIRECTORY = '/foo/bar/data/',
+PARTITION P2 VALUES LESS THAN (MAXVALUE)
+);
+ERROR HY000: Can't create table `test`.`t1` (errno: 196 "Unknown error 196")
+show warnings;
+Level Code Message
+Error 1005 Can't create table `test`.`t1` (errno: 196 "Unknown error 196")
+Warning 1296 Got error 196 'Specifying DATA DIRECTORY for an individual table is not supported.' from ROCKSDB
+Error 6 Error on delete of './test/t1.par' (Errcode: 2 "No such file or directory")
+CREATE TABLE t1 (id int not null primary key) ENGINE=rocksdb PARTITION BY RANGE (id)
+(
+PARTITION P0 VALUES LESS THAN (1000)
+INDEX DIRECTORY = '/foo/bar/data/',
+PARTITION P1 VALUES LESS THAN (2000)
+INDEX DIRECTORY = '/foo/bar/data/',
+PARTITION P2 VALUES LESS THAN (MAXVALUE)
+);
+ERROR HY000: Can't create table `test`.`t1` (errno: 197 "Unknown error 197")
+show warnings;
+Level Code Message
+Error 1005 Can't create table `test`.`t1` (errno: 197 "Unknown error 197")
+Warning 1296 Got error 197 'Specifying INDEX DIRECTORY for an individual table is not supported.' from ROCKSDB
+Error 6 Error on delete of './test/t1.par' (Errcode: 2 "No such file or directory")
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/tbl_opt_delay_key_write.result b/storage/rocksdb/mysql-test/rocksdb/r/tbl_opt_delay_key_write.result
new file mode 100644
index 00000000000..c5d1ad8ace9
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/tbl_opt_delay_key_write.result
@@ -0,0 +1,18 @@
+DROP TABLE IF EXISTS t1;
+CREATE TABLE t1 (a INT PRIMARY KEY, b CHAR(8)) ENGINE=rocksdb DELAY_KEY_WRITE=1;
+SHOW CREATE TABLE t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `a` int(11) NOT NULL,
+ `b` char(8) DEFAULT NULL,
+ PRIMARY KEY (`a`)
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1 DELAY_KEY_WRITE=1
+ALTER TABLE t1 DELAY_KEY_WRITE=0;
+SHOW CREATE TABLE t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `a` int(11) NOT NULL,
+ `b` char(8) DEFAULT NULL,
+ PRIMARY KEY (`a`)
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/tbl_opt_insert_method.result b/storage/rocksdb/mysql-test/rocksdb/r/tbl_opt_insert_method.result
new file mode 100644
index 00000000000..bd5e65f59c4
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/tbl_opt_insert_method.result
@@ -0,0 +1,18 @@
+DROP TABLE IF EXISTS t1;
+CREATE TABLE t1 (a INT PRIMARY KEY, b CHAR(8)) ENGINE=rocksdb INSERT_METHOD=FIRST;
+SHOW CREATE TABLE t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `a` int(11) NOT NULL,
+ `b` char(8) DEFAULT NULL,
+ PRIMARY KEY (`a`)
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+ALTER TABLE t1 INSERT_METHOD=NO;
+SHOW CREATE TABLE t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `a` int(11) NOT NULL,
+ `b` char(8) DEFAULT NULL,
+ PRIMARY KEY (`a`)
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/tbl_opt_key_block_size.result b/storage/rocksdb/mysql-test/rocksdb/r/tbl_opt_key_block_size.result
new file mode 100644
index 00000000000..6c34d08b7eb
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/tbl_opt_key_block_size.result
@@ -0,0 +1,18 @@
+DROP TABLE IF EXISTS t1;
+CREATE TABLE t1 (a INT PRIMARY KEY, b CHAR(8)) ENGINE=rocksdb KEY_BLOCK_SIZE=8;
+SHOW CREATE TABLE t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `a` int(11) NOT NULL,
+ `b` char(8) DEFAULT NULL,
+ PRIMARY KEY (`a`)
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1 KEY_BLOCK_SIZE=8
+ALTER TABLE t1 KEY_BLOCK_SIZE=1;
+SHOW CREATE TABLE t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `a` int(11) NOT NULL,
+ `b` char(8) DEFAULT NULL,
+ PRIMARY KEY (`a`) KEY_BLOCK_SIZE=8
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1 KEY_BLOCK_SIZE=1
+DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/tbl_opt_max_rows.result b/storage/rocksdb/mysql-test/rocksdb/r/tbl_opt_max_rows.result
new file mode 100644
index 00000000000..679e00e0771
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/tbl_opt_max_rows.result
@@ -0,0 +1,18 @@
+DROP TABLE IF EXISTS t1;
+CREATE TABLE t1 (a INT PRIMARY KEY, b CHAR(8)) ENGINE=rocksdb MAX_ROWS=10000000;
+SHOW CREATE TABLE t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `a` int(11) NOT NULL,
+ `b` char(8) DEFAULT NULL,
+ PRIMARY KEY (`a`)
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1 MAX_ROWS=10000000
+ALTER TABLE t1 MAX_ROWS=30000000;
+SHOW CREATE TABLE t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `a` int(11) NOT NULL,
+ `b` char(8) DEFAULT NULL,
+ PRIMARY KEY (`a`)
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1 MAX_ROWS=30000000
+DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/tbl_opt_min_rows.result b/storage/rocksdb/mysql-test/rocksdb/r/tbl_opt_min_rows.result
new file mode 100644
index 00000000000..bc650434b7a
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/tbl_opt_min_rows.result
@@ -0,0 +1,18 @@
+DROP TABLE IF EXISTS t1;
+CREATE TABLE t1 (a INT PRIMARY KEY, b CHAR(8)) ENGINE=rocksdb MIN_ROWS=1;
+SHOW CREATE TABLE t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `a` int(11) NOT NULL,
+ `b` char(8) DEFAULT NULL,
+ PRIMARY KEY (`a`)
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1 MIN_ROWS=1
+ALTER TABLE t1 MIN_ROWS=10000;
+SHOW CREATE TABLE t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `a` int(11) NOT NULL,
+ `b` char(8) DEFAULT NULL,
+ PRIMARY KEY (`a`)
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1 MIN_ROWS=10000
+DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/tbl_opt_pack_keys.result b/storage/rocksdb/mysql-test/rocksdb/r/tbl_opt_pack_keys.result
new file mode 100644
index 00000000000..b42d3f4d450
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/tbl_opt_pack_keys.result
@@ -0,0 +1,18 @@
+DROP TABLE IF EXISTS t1;
+CREATE TABLE t1 (a INT PRIMARY KEY, b CHAR(8)) ENGINE=rocksdb PACK_KEYS=1;
+SHOW CREATE TABLE t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `a` int(11) NOT NULL,
+ `b` char(8) DEFAULT NULL,
+ PRIMARY KEY (`a`)
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1 PACK_KEYS=1
+ALTER TABLE t1 PACK_KEYS=0;
+SHOW CREATE TABLE t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `a` int(11) NOT NULL,
+ `b` char(8) DEFAULT NULL,
+ PRIMARY KEY (`a`)
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1 PACK_KEYS=0
+DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/tbl_opt_password.result b/storage/rocksdb/mysql-test/rocksdb/r/tbl_opt_password.result
new file mode 100644
index 00000000000..80ec79497ce
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/tbl_opt_password.result
@@ -0,0 +1,18 @@
+DROP TABLE IF EXISTS t1;
+CREATE TABLE t1 (a INT PRIMARY KEY, b CHAR(8)) ENGINE=rocksdb PASSWORD='password';
+SHOW CREATE TABLE t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `a` int(11) NOT NULL,
+ `b` char(8) DEFAULT NULL,
+ PRIMARY KEY (`a`)
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+ALTER TABLE t1 PASSWORD='new_password';
+SHOW CREATE TABLE t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `a` int(11) NOT NULL,
+ `b` char(8) DEFAULT NULL,
+ PRIMARY KEY (`a`)
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/tbl_opt_row_format.result b/storage/rocksdb/mysql-test/rocksdb/r/tbl_opt_row_format.result
new file mode 100644
index 00000000000..a0c480eaede
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/tbl_opt_row_format.result
@@ -0,0 +1,18 @@
+DROP TABLE IF EXISTS t1;
+CREATE TABLE t1 (a INT, b CHAR(8) PRIMARY KEY) ENGINE=rocksdb ROW_FORMAT=FIXED;
+SHOW CREATE TABLE t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `a` int(11) DEFAULT NULL,
+ `b` char(8) NOT NULL,
+ PRIMARY KEY (`b`)
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1 ROW_FORMAT=FIXED
+ALTER TABLE t1 ROW_FORMAT=DYNAMIC;
+SHOW CREATE TABLE t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `a` int(11) DEFAULT NULL,
+ `b` char(8) NOT NULL,
+ PRIMARY KEY (`b`)
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1 ROW_FORMAT=DYNAMIC
+DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/tbl_opt_union.result b/storage/rocksdb/mysql-test/rocksdb/r/tbl_opt_union.result
new file mode 100644
index 00000000000..fb021dbda18
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/tbl_opt_union.result
@@ -0,0 +1,16 @@
+DROP TABLE IF EXISTS t1, child1, child2;
+CREATE TABLE t1 (a INT PRIMARY KEY) ENGINE=rocksdb UNION(child1);
+SHOW CREATE TABLE t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `a` int(11) NOT NULL,
+ PRIMARY KEY (`a`)
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+ALTER TABLE t1 UNION = (child1,child2);
+SHOW CREATE TABLE t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `a` int(11) NOT NULL,
+ PRIMARY KEY (`a`)
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+DROP TABLE t1, child1, child2;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/tbl_standard_opts.result b/storage/rocksdb/mysql-test/rocksdb/r/tbl_standard_opts.result
new file mode 100644
index 00000000000..ecfc72932cc
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/tbl_standard_opts.result
@@ -0,0 +1,46 @@
+DROP TABLE IF EXISTS t1;
+CREATE TABLE t1 (a INT PRIMARY KEY, b CHAR(8)) ENGINE=rocksdb
+DEFAULT CHARACTER SET = utf8
+COLLATE = utf8_general_ci
+COMMENT = 'standard table options'
+;
+SHOW CREATE TABLE t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `a` int(11) NOT NULL,
+ `b` char(8) DEFAULT NULL,
+ PRIMARY KEY (`a`)
+) ENGINE=ROCKSDB DEFAULT CHARSET=utf8 COMMENT='standard table options'
+ALTER TABLE t1 COMMENT = 'table altered';
+SHOW CREATE TABLE t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `a` int(11) NOT NULL,
+ `b` char(8) DEFAULT NULL,
+ PRIMARY KEY (`a`)
+) ENGINE=ROCKSDB DEFAULT CHARSET=utf8 COMMENT='table altered'
+ALTER TABLE t1 ENGINE=MEMORY;
+SHOW CREATE TABLE t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `a` int(11) NOT NULL,
+ `b` char(8) DEFAULT NULL,
+ PRIMARY KEY (`a`)
+) ENGINE=MEMORY DEFAULT CHARSET=utf8 COMMENT='table altered'
+ALTER TABLE t1 ENGINE=rocksdb;
+SHOW CREATE TABLE t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `a` int(11) NOT NULL,
+ `b` char(8) DEFAULT NULL,
+ PRIMARY KEY (`a`)
+) ENGINE=ROCKSDB DEFAULT CHARSET=utf8 COMMENT='table altered'
+ALTER TABLE t1 CHARACTER SET = latin1 COLLATE = latin1_swedish_ci;
+SHOW CREATE TABLE t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `a` int(11) NOT NULL,
+ `b` char(8) CHARACTER SET utf8 DEFAULT NULL,
+ PRIMARY KEY (`a`)
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1 COMMENT='table altered'
+DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/tmpdir.result b/storage/rocksdb/mysql-test/rocksdb/r/tmpdir.result
new file mode 100644
index 00000000000..4c1e0221178
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/tmpdir.result
@@ -0,0 +1,32 @@
+# If rocksdb_tmpdir is NULL or "", temporary file will be created in
+# server configuration variable location(--tmpdir)
+CREATE TABLE t1 (a INT PRIMARY KEY, b INT) ENGINE=RocksDB;
+show session variables like 'rocksdb_tmpdir';
+Variable_name Value
+rocksdb_tmpdir
+# Connection con1
+connect con1,localhost,root;
+show session variables like 'rocksdb_tmpdir';
+Variable_name Value
+rocksdb_tmpdir
+ALTER TABLE t1 ADD INDEX kb(b), ALGORITHM=INPLACE;
+connection default;
+disconnect con1;
+DROP TABLE t1;
+# rocksdb_tmpdir with valid location.
+CREATE TABLE t1 (a INT PRIMARY KEY, b INT) ENGINE=RocksDB;
+set @tmpdir = @@global.tmpdir;
+set global rocksdb_tmpdir = @tmpdir;
+show session variables like 'rocksdb_tmpdir';
+Variable_name Value
+rocksdb_tmpdir
+# Connection con3
+connect con2,localhost,root;
+show session variables like 'rocksdb_tmpdir';
+Variable_name Value
+rocksdb_tmpdir MYSQL_TMP_DIR/mysqld.1
+ALTER TABLE t1 ADD INDEX kb(b), ALGORITHM=INPLACE;
+disconnect con2;
+connection default;
+set global rocksdb_tmpdir=NULL;
+DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/transaction.result b/storage/rocksdb/mysql-test/rocksdb/r/transaction.result
new file mode 100644
index 00000000000..8a5825b9291
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/transaction.result
@@ -0,0 +1,977 @@
+create table t1 (id int primary key, value int, value2 varchar(100), index(value)) engine=rocksdb;
+insert into t1 values (1,1,1),(2,2,2),(3,3,3),(4,4,4),(5,5,5),(6,6,6),(8,8,8),(9,9,9),(10,10,10);
+begin;
+insert into t1 values (11,11,11);
+select * from t1 where id=1;
+id value value2
+1 1 1
+select * from t1 where value=1;
+id value value2
+1 1 1
+select value from t1 where value=1;
+value
+1
+select * from t1 where value2=1;
+id value value2
+1 1 1
+select * from t1 where id=5;
+id value value2
+5 5 5
+select * from t1 where value=5;
+id value value2
+5 5 5
+select value from t1 where value=5;
+value
+5
+select * from t1 where value2=5;
+id value value2
+5 5 5
+select * from t1 where id < 3 order by id;
+id value value2
+1 1 1
+2 2 2
+select * from t1 where value < 3 order by id;
+id value value2
+1 1 1
+2 2 2
+select value from t1 where value < 3 order by id;
+value
+1
+2
+select * from t1 where value2 < 3 order by id;
+id value value2
+1 1 1
+2 2 2
+select * from t1 order by id;
+id value value2
+1 1 1
+2 2 2
+3 3 3
+4 4 4
+5 5 5
+6 6 6
+8 8 8
+9 9 9
+10 10 10
+11 11 11
+select value from t1 order by id;
+value
+1
+2
+3
+4
+5
+6
+8
+9
+10
+11
+rollback;
+begin;
+insert into t1 values (7,7,7);
+select * from t1 where id=1;
+id value value2
+1 1 1
+select * from t1 where value=1;
+id value value2
+1 1 1
+select value from t1 where value=1;
+value
+1
+select * from t1 where value2=1;
+id value value2
+1 1 1
+select * from t1 where id=5;
+id value value2
+5 5 5
+select * from t1 where value=5;
+id value value2
+5 5 5
+select value from t1 where value=5;
+value
+5
+select * from t1 where value2=5;
+id value value2
+5 5 5
+select * from t1 where id < 3 order by id;
+id value value2
+1 1 1
+2 2 2
+select * from t1 where value < 3 order by id;
+id value value2
+1 1 1
+2 2 2
+select value from t1 where value < 3 order by id;
+value
+1
+2
+select * from t1 where value2 < 3 order by id;
+id value value2
+1 1 1
+2 2 2
+select * from t1 order by id;
+id value value2
+1 1 1
+2 2 2
+3 3 3
+4 4 4
+5 5 5
+6 6 6
+7 7 7
+8 8 8
+9 9 9
+10 10 10
+select value from t1 order by id;
+value
+1
+2
+3
+4
+5
+6
+7
+8
+9
+10
+rollback;
+begin;
+update t1 set value2=100 where id=1;
+select * from t1 where id=1;
+id value value2
+1 1 100
+select * from t1 where value=1;
+id value value2
+1 1 100
+select value from t1 where value=1;
+value
+1
+select * from t1 where value2=1;
+id value value2
+select * from t1 where id=5;
+id value value2
+5 5 5
+select * from t1 where value=5;
+id value value2
+5 5 5
+select value from t1 where value=5;
+value
+5
+select * from t1 where value2=5;
+id value value2
+5 5 5
+select * from t1 where id < 3 order by id;
+id value value2
+1 1 100
+2 2 2
+select * from t1 where value < 3 order by id;
+id value value2
+1 1 100
+2 2 2
+select value from t1 where value < 3 order by id;
+value
+1
+2
+select * from t1 where value2 < 3 order by id;
+id value value2
+2 2 2
+select * from t1 order by id;
+id value value2
+1 1 100
+2 2 2
+3 3 3
+4 4 4
+5 5 5
+6 6 6
+8 8 8
+9 9 9
+10 10 10
+select value from t1 order by id;
+value
+1
+2
+3
+4
+5
+6
+8
+9
+10
+rollback;
+begin;
+update t1 set value=100 where id=1;
+select * from t1 where id=1;
+id value value2
+1 100 1
+select * from t1 where value=1;
+id value value2
+select value from t1 where value=1;
+value
+select * from t1 where value2=1;
+id value value2
+1 100 1
+select * from t1 where id=5;
+id value value2
+5 5 5
+select * from t1 where value=5;
+id value value2
+5 5 5
+select value from t1 where value=5;
+value
+5
+select * from t1 where value2=5;
+id value value2
+5 5 5
+select * from t1 where id < 3 order by id;
+id value value2
+1 100 1
+2 2 2
+select * from t1 where value < 3 order by id;
+id value value2
+2 2 2
+select value from t1 where value < 3 order by id;
+value
+2
+select * from t1 where value2 < 3 order by id;
+id value value2
+1 100 1
+2 2 2
+select * from t1 order by id;
+id value value2
+1 100 1
+2 2 2
+3 3 3
+4 4 4
+5 5 5
+6 6 6
+8 8 8
+9 9 9
+10 10 10
+select value from t1 order by id;
+value
+100
+2
+3
+4
+5
+6
+8
+9
+10
+rollback;
+begin;
+update t1 set id=100 where id=1;
+select * from t1 where id=1;
+id value value2
+select * from t1 where value=1;
+id value value2
+100 1 1
+select value from t1 where value=1;
+value
+1
+select * from t1 where value2=1;
+id value value2
+100 1 1
+select * from t1 where id=5;
+id value value2
+5 5 5
+select * from t1 where value=5;
+id value value2
+5 5 5
+select value from t1 where value=5;
+value
+5
+select * from t1 where value2=5;
+id value value2
+5 5 5
+select * from t1 where id < 3 order by id;
+id value value2
+2 2 2
+select * from t1 where value < 3 order by id;
+id value value2
+2 2 2
+100 1 1
+select value from t1 where value < 3 order by id;
+value
+2
+1
+select * from t1 where value2 < 3 order by id;
+id value value2
+2 2 2
+100 1 1
+select * from t1 order by id;
+id value value2
+2 2 2
+3 3 3
+4 4 4
+5 5 5
+6 6 6
+8 8 8
+9 9 9
+10 10 10
+100 1 1
+select value from t1 order by id;
+value
+2
+3
+4
+5
+6
+8
+9
+10
+1
+rollback;
+begin;
+update t1 set value2=100 where value=1;
+select * from t1 where id=1;
+id value value2
+1 1 100
+select * from t1 where value=1;
+id value value2
+1 1 100
+select value from t1 where value=1;
+value
+1
+select * from t1 where value2=1;
+id value value2
+select * from t1 where id=5;
+id value value2
+5 5 5
+select * from t1 where value=5;
+id value value2
+5 5 5
+select value from t1 where value=5;
+value
+5
+select * from t1 where value2=5;
+id value value2
+5 5 5
+select * from t1 where id < 3 order by id;
+id value value2
+1 1 100
+2 2 2
+select * from t1 where value < 3 order by id;
+id value value2
+1 1 100
+2 2 2
+select value from t1 where value < 3 order by id;
+value
+1
+2
+select * from t1 where value2 < 3 order by id;
+id value value2
+2 2 2
+select * from t1 order by id;
+id value value2
+1 1 100
+2 2 2
+3 3 3
+4 4 4
+5 5 5
+6 6 6
+8 8 8
+9 9 9
+10 10 10
+select value from t1 order by id;
+value
+1
+2
+3
+4
+5
+6
+8
+9
+10
+rollback;
+begin;
+update t1 set value=100 where value=1;
+select * from t1 where id=1;
+id value value2
+1 100 1
+select * from t1 where value=1;
+id value value2
+select value from t1 where value=1;
+value
+select * from t1 where value2=1;
+id value value2
+1 100 1
+select * from t1 where id=5;
+id value value2
+5 5 5
+select * from t1 where value=5;
+id value value2
+5 5 5
+select value from t1 where value=5;
+value
+5
+select * from t1 where value2=5;
+id value value2
+5 5 5
+select * from t1 where id < 3 order by id;
+id value value2
+1 100 1
+2 2 2
+select * from t1 where value < 3 order by id;
+id value value2
+2 2 2
+select value from t1 where value < 3 order by id;
+value
+2
+select * from t1 where value2 < 3 order by id;
+id value value2
+1 100 1
+2 2 2
+select * from t1 order by id;
+id value value2
+1 100 1
+2 2 2
+3 3 3
+4 4 4
+5 5 5
+6 6 6
+8 8 8
+9 9 9
+10 10 10
+select value from t1 order by id;
+value
+100
+2
+3
+4
+5
+6
+8
+9
+10
+rollback;
+begin;
+update t1 set id=100 where value=1;
+select * from t1 where id=1;
+id value value2
+select * from t1 where value=1;
+id value value2
+100 1 1
+select value from t1 where value=1;
+value
+1
+select * from t1 where value2=1;
+id value value2
+100 1 1
+select * from t1 where id=5;
+id value value2
+5 5 5
+select * from t1 where value=5;
+id value value2
+5 5 5
+select value from t1 where value=5;
+value
+5
+select * from t1 where value2=5;
+id value value2
+5 5 5
+select * from t1 where id < 3 order by id;
+id value value2
+2 2 2
+select * from t1 where value < 3 order by id;
+id value value2
+2 2 2
+100 1 1
+select value from t1 where value < 3 order by id;
+value
+2
+1
+select * from t1 where value2 < 3 order by id;
+id value value2
+2 2 2
+100 1 1
+select * from t1 order by id;
+id value value2
+2 2 2
+3 3 3
+4 4 4
+5 5 5
+6 6 6
+8 8 8
+9 9 9
+10 10 10
+100 1 1
+select value from t1 order by id;
+value
+2
+3
+4
+5
+6
+8
+9
+10
+1
+rollback;
+begin;
+update t1 set value2=100 where value2=1;
+select * from t1 where id=1;
+id value value2
+1 1 100
+select * from t1 where value=1;
+id value value2
+1 1 100
+select value from t1 where value=1;
+value
+1
+select * from t1 where value2=1;
+id value value2
+select * from t1 where id=5;
+id value value2
+5 5 5
+select * from t1 where value=5;
+id value value2
+5 5 5
+select value from t1 where value=5;
+value
+5
+select * from t1 where value2=5;
+id value value2
+5 5 5
+select * from t1 where id < 3 order by id;
+id value value2
+1 1 100
+2 2 2
+select * from t1 where value < 3 order by id;
+id value value2
+1 1 100
+2 2 2
+select value from t1 where value < 3 order by id;
+value
+1
+2
+select * from t1 where value2 < 3 order by id;
+id value value2
+2 2 2
+select * from t1 order by id;
+id value value2
+1 1 100
+2 2 2
+3 3 3
+4 4 4
+5 5 5
+6 6 6
+8 8 8
+9 9 9
+10 10 10
+select value from t1 order by id;
+value
+1
+2
+3
+4
+5
+6
+8
+9
+10
+rollback;
+begin;
+update t1 set value=100 where value2=1;
+select * from t1 where id=1;
+id value value2
+1 100 1
+select * from t1 where value=1;
+id value value2
+select value from t1 where value=1;
+value
+select * from t1 where value2=1;
+id value value2
+1 100 1
+select * from t1 where id=5;
+id value value2
+5 5 5
+select * from t1 where value=5;
+id value value2
+5 5 5
+select value from t1 where value=5;
+value
+5
+select * from t1 where value2=5;
+id value value2
+5 5 5
+select * from t1 where id < 3 order by id;
+id value value2
+1 100 1
+2 2 2
+select * from t1 where value < 3 order by id;
+id value value2
+2 2 2
+select value from t1 where value < 3 order by id;
+value
+2
+select * from t1 where value2 < 3 order by id;
+id value value2
+1 100 1
+2 2 2
+select * from t1 order by id;
+id value value2
+1 100 1
+2 2 2
+3 3 3
+4 4 4
+5 5 5
+6 6 6
+8 8 8
+9 9 9
+10 10 10
+select value from t1 order by id;
+value
+100
+2
+3
+4
+5
+6
+8
+9
+10
+rollback;
+begin;
+update t1 set id=100 where value2=1;
+select * from t1 where id=1;
+id value value2
+select * from t1 where value=1;
+id value value2
+100 1 1
+select value from t1 where value=1;
+value
+1
+select * from t1 where value2=1;
+id value value2
+100 1 1
+select * from t1 where id=5;
+id value value2
+5 5 5
+select * from t1 where value=5;
+id value value2
+5 5 5
+select value from t1 where value=5;
+value
+5
+select * from t1 where value2=5;
+id value value2
+5 5 5
+select * from t1 where id < 3 order by id;
+id value value2
+2 2 2
+select * from t1 where value < 3 order by id;
+id value value2
+2 2 2
+100 1 1
+select value from t1 where value < 3 order by id;
+value
+2
+1
+select * from t1 where value2 < 3 order by id;
+id value value2
+2 2 2
+100 1 1
+select * from t1 order by id;
+id value value2
+2 2 2
+3 3 3
+4 4 4
+5 5 5
+6 6 6
+8 8 8
+9 9 9
+10 10 10
+100 1 1
+select value from t1 order by id;
+value
+2
+3
+4
+5
+6
+8
+9
+10
+1
+rollback;
+begin;
+delete from t1 where id=1;
+select * from t1 where id=1;
+id value value2
+select * from t1 where value=1;
+id value value2
+select value from t1 where value=1;
+value
+select * from t1 where value2=1;
+id value value2
+select * from t1 where id=5;
+id value value2
+5 5 5
+select * from t1 where value=5;
+id value value2
+5 5 5
+select value from t1 where value=5;
+value
+5
+select * from t1 where value2=5;
+id value value2
+5 5 5
+select * from t1 where id < 3 order by id;
+id value value2
+2 2 2
+select * from t1 where value < 3 order by id;
+id value value2
+2 2 2
+select value from t1 where value < 3 order by id;
+value
+2
+select * from t1 where value2 < 3 order by id;
+id value value2
+2 2 2
+select * from t1 order by id;
+id value value2
+2 2 2
+3 3 3
+4 4 4
+5 5 5
+6 6 6
+8 8 8
+9 9 9
+10 10 10
+select value from t1 order by id;
+value
+2
+3
+4
+5
+6
+8
+9
+10
+rollback;
+begin;
+delete from t1 where value=1;
+select * from t1 where id=1;
+id value value2
+select * from t1 where value=1;
+id value value2
+select value from t1 where value=1;
+value
+select * from t1 where value2=1;
+id value value2
+select * from t1 where id=5;
+id value value2
+5 5 5
+select * from t1 where value=5;
+id value value2
+5 5 5
+select value from t1 where value=5;
+value
+5
+select * from t1 where value2=5;
+id value value2
+5 5 5
+select * from t1 where id < 3 order by id;
+id value value2
+2 2 2
+select * from t1 where value < 3 order by id;
+id value value2
+2 2 2
+select value from t1 where value < 3 order by id;
+value
+2
+select * from t1 where value2 < 3 order by id;
+id value value2
+2 2 2
+select * from t1 order by id;
+id value value2
+2 2 2
+3 3 3
+4 4 4
+5 5 5
+6 6 6
+8 8 8
+9 9 9
+10 10 10
+select value from t1 order by id;
+value
+2
+3
+4
+5
+6
+8
+9
+10
+rollback;
+begin;
+delete from t1 where value2=1;
+select * from t1 where id=1;
+id value value2
+select * from t1 where value=1;
+id value value2
+select value from t1 where value=1;
+value
+select * from t1 where value2=1;
+id value value2
+select * from t1 where id=5;
+id value value2
+5 5 5
+select * from t1 where value=5;
+id value value2
+5 5 5
+select value from t1 where value=5;
+value
+5
+select * from t1 where value2=5;
+id value value2
+5 5 5
+select * from t1 where id < 3 order by id;
+id value value2
+2 2 2
+select * from t1 where value < 3 order by id;
+id value value2
+2 2 2
+select value from t1 where value < 3 order by id;
+value
+2
+select * from t1 where value2 < 3 order by id;
+id value value2
+2 2 2
+select * from t1 order by id;
+id value value2
+2 2 2
+3 3 3
+4 4 4
+5 5 5
+6 6 6
+8 8 8
+9 9 9
+10 10 10
+select value from t1 order by id;
+value
+2
+3
+4
+5
+6
+8
+9
+10
+rollback;
+begin;
+insert into t1 values (11,11,11);
+insert into t1 values (12,12,12);
+insert into t1 values (13,13,13);
+delete from t1 where id=9;
+delete from t1 where value=8;
+update t1 set id=100 where value2=5;
+update t1 set value=103 where value=4;
+update t1 set id=115 where id=3;
+select * from t1 where id=1;
+id value value2
+1 1 1
+select * from t1 where value=1;
+id value value2
+1 1 1
+select value from t1 where value=1;
+value
+1
+select * from t1 where value2=1;
+id value value2
+1 1 1
+select * from t1 where id=5;
+id value value2
+select * from t1 where value=5;
+id value value2
+100 5 5
+select value from t1 where value=5;
+value
+5
+select * from t1 where value2=5;
+id value value2
+100 5 5
+select * from t1 where id < 3 order by id;
+id value value2
+1 1 1
+2 2 2
+select * from t1 where value < 3 order by id;
+id value value2
+1 1 1
+2 2 2
+select value from t1 where value < 3 order by id;
+value
+1
+2
+select * from t1 where value2 < 3 order by id;
+id value value2
+1 1 1
+2 2 2
+select * from t1 order by id;
+id value value2
+1 1 1
+2 2 2
+4 103 4
+6 6 6
+10 10 10
+11 11 11
+12 12 12
+13 13 13
+100 5 5
+115 3 3
+select value from t1 order by id;
+value
+1
+2
+103
+6
+10
+11
+12
+13
+5
+3
+rollback;
+drop table t1;
+#
+# #802: MyRocks: Statement rollback doesnt work correctly for nested statements
+#
+create table t1 (a varchar(100)) engine=rocksdb;
+create table t2(a int) engine=rocksdb;
+insert into t2 values (1), (2);
+create table t3(a varchar(100)) engine=rocksdb;
+create function func() returns varchar(100) deterministic
+begin
+insert into t3 values ('func-called');
+set @a= (select a from t2);
+return 'func-returned';
+end;//
+begin;
+insert into t1 values (func());
+ERROR 21000: Subquery returns more than 1 row
+select * from t1;
+a
+# The following must not produce 'func-called':
+select * from t3;
+a
+rollback;
+drop function func;
+drop table t1,t2,t3;
+#
+# MDEV-16710: Slave SQL: Could not execute Update_rows_v1 event with RocksDB and triggers
+# Issue#857: MyRocks: Incorrect behavior when multiple statements fail inside a transaction
+#
+CREATE TABLE t1 (a INT PRIMARY KEY) ENGINE=RocksDB;
+INSERT INTO t1 VALUES (1);
+CREATE TABLE t2 (b INT PRIMARY KEY) ENGINE=RocksDB;
+CREATE TRIGGER tr AFTER INSERT ON t2 FOR EACH ROW INSERT INTO non_existing_table VALUES (NULL);
+BEGIN;
+DELETE FROM t1;
+INSERT INTO t2 VALUES (1);
+INSERT INTO t2 VALUES (2);
+# Must return empty result:
+SELECT * FROM t1;
+a
+COMMIT;
+drop table t1,t2;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/truncate_partition.result b/storage/rocksdb/mysql-test/rocksdb/r/truncate_partition.result
new file mode 100644
index 00000000000..67f655b66d7
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/truncate_partition.result
@@ -0,0 +1,620 @@
+#
+# table(hidden key)
+#
+CREATE TABLE t1 (
+a INT,
+b INT
+) ENGINE=ROCKSDB
+PARTITION BY RANGE (b) (
+PARTITION p0 VALUES LESS THAN (3),
+PARTITION p1 VALUES LESS THAN (6),
+PARTITION p2 VALUES LESS THAN MAXVALUE
+);
+TRUNCATE TABLE t1;
+TRUNCATE TABLE t1;
+TRUNCATE TABLE t1;
+ALTER TABLE t1 TRUNCATE PARTITION p0;
+ALTER TABLE t1 TRUNCATE PARTITION p0;
+ALTER TABLE t1 TRUNCATE PARTITION p0;
+ALTER TABLE t1 TRUNCATE PARTITION p1;
+ALTER TABLE t1 TRUNCATE PARTITION p1;
+ALTER TABLE t1 TRUNCATE PARTITION p1;
+ALTER TABLE t1 TRUNCATE PARTITION p2;
+ALTER TABLE t1 TRUNCATE PARTITION p2;
+ALTER TABLE t1 TRUNCATE PARTITION p2;
+ALTER TABLE t1 TRUNCATE PARTITION p1;
+ALTER TABLE t1 TRUNCATE PARTITION p0;
+TRUNCATE TABLE t1;
+ALTER TABLE t1 TRUNCATE PARTITION p2;
+INSERT INTO t1 (a,b) VALUES (1, 1), (2, 4), (3, 8);
+SELECT a,b FROM t1 ORDER BY a;
+a b
+1 1
+2 4
+3 8
+ALTER TABLE t1 TRUNCATE PARTITION p2;
+SELECT a,b FROM t1 ORDER BY a;
+a b
+1 1
+2 4
+SELECT a FROM t1 WHERE b > 2;
+a
+2
+SELECT b from t1 where a != 3;
+b
+1
+4
+ALTER TABLE t1 TRUNCATE PARTITION p1;
+SELECT a,b FROM t1 ORDER BY b;
+a b
+1 1
+SELECT a FROM t1 WHERE b > 2;
+a
+SELECT b from t1 where a != 3;
+b
+1
+ALTER TABLE t1 TRUNCATE PARTITION p0;
+SELECT a,b FROM t1 ORDER BY a;
+a b
+TRUNCATE TABLE t1;
+SELECT a,b FROM t1;
+a b
+INSERT INTO t1 (a,b) VALUES (4, 1), (5, 4), (6, 8);
+SELECT a,b FROM t1;
+a b
+4 1
+5 4
+6 8
+SELECT a FROM t1 WHERE b < 5;
+a
+4
+5
+ALTER TABLE t1 TRUNCATE PARTITION p2;
+SELECT a,b FROM t1;
+a b
+4 1
+5 4
+INSERT INTO t1(a,b) VALUES(7, 1);
+SELECT b from t1 WHERE a > 2;
+b
+1
+1
+4
+SELECT a,b FROM t1;
+a b
+4 1
+5 4
+7 1
+ALTER TABLE t1 TRUNCATE PARTITION p1;
+SELECT a,b FROM t1;
+a b
+4 1
+7 1
+INSERT INTO t1(a,b) VALUES(8, 4);
+SELECT a,b FROM t1;
+a b
+4 1
+7 1
+8 4
+SELECT b from t1 WHERE a < 9;
+b
+1
+1
+4
+ALTER TABLE t1 TRUNCATE PARTITION p0;
+SELECT a,b FROM t1;
+a b
+8 4
+INSERT INTO t1(a,b) VALUES(9, 8);
+TRUNCATE TABLE t1;
+SELECT a,b FROM t1;
+a b
+TRUNCATE TABLE t1;
+SELECT a,b FROM t1;
+a b
+set global rocksdb_force_flush_memtable_now = true;
+set global rocksdb_compact_cf = 'default';
+SELECT b FROM t1 WHERE a < 5;
+b
+1
+2
+3
+4
+TRUNCATE TABLE t1;
+SELECT b FROM t1 WHERE a < 5;
+b
+DROP TABLE t1;
+#
+# table(secondary key)
+#
+CREATE TABLE t1(
+a INT,
+b INT,
+KEY (b)
+) ENGINE=ROCKSDB
+PARTITION BY HASH(a) PARTITIONS 3;
+TRUNCATE TABLE t1;
+TRUNCATE TABLE t1;
+TRUNCATE TABLE t1;
+ALTER TABLE t1 TRUNCATE PARTITION p0;
+ALTER TABLE t1 TRUNCATE PARTITION p0;
+ALTER TABLE t1 TRUNCATE PARTITION p0;
+ALTER TABLE t1 TRUNCATE PARTITION p1;
+ALTER TABLE t1 TRUNCATE PARTITION p1;
+ALTER TABLE t1 TRUNCATE PARTITION p1;
+ALTER TABLE t1 TRUNCATE PARTITION p2;
+ALTER TABLE t1 TRUNCATE PARTITION p2;
+ALTER TABLE t1 TRUNCATE PARTITION p2;
+ALTER TABLE t1 TRUNCATE PARTITION p1;
+ALTER TABLE t1 TRUNCATE PARTITION p0;
+TRUNCATE TABLE t1;
+ALTER TABLE t1 TRUNCATE PARTITION p2;
+INSERT INTO t1 (a,b) VALUES (1, 1), (2, 4), (3, 8);
+SELECT a,b FROM t1 ORDER BY a;
+a b
+1 1
+2 4
+3 8
+ALTER TABLE t1 TRUNCATE PARTITION p2;
+SELECT a,b FROM t1 ORDER BY a;
+a b
+1 1
+3 8
+SELECT a FROM t1 WHERE b > 2;
+a
+3
+SELECT b from t1 where a != 3;
+b
+1
+ALTER TABLE t1 TRUNCATE PARTITION p1;
+SELECT a,b FROM t1 ORDER BY b;
+a b
+3 8
+SELECT a FROM t1 WHERE b > 2;
+a
+3
+SELECT b from t1 where a != 3;
+b
+ALTER TABLE t1 TRUNCATE PARTITION p0;
+SELECT a,b FROM t1 ORDER BY a;
+a b
+TRUNCATE TABLE t1;
+SELECT a,b FROM t1;
+a b
+INSERT INTO t1 (a,b) VALUES (4, 1), (5, 4), (6, 8);
+SELECT a,b FROM t1;
+a b
+4 1
+5 4
+6 8
+SELECT a FROM t1 WHERE b < 5;
+a
+4
+5
+ALTER TABLE t1 TRUNCATE PARTITION p2;
+SELECT a,b FROM t1;
+a b
+4 1
+6 8
+INSERT INTO t1(a,b) VALUES(7, 1);
+SELECT b from t1 WHERE a > 2;
+b
+1
+1
+8
+SELECT a,b FROM t1;
+a b
+4 1
+6 8
+7 1
+ALTER TABLE t1 TRUNCATE PARTITION p1;
+SELECT a,b FROM t1;
+a b
+6 8
+INSERT INTO t1(a,b) VALUES(8, 4);
+SELECT a,b FROM t1;
+a b
+6 8
+8 4
+SELECT b from t1 WHERE a < 9;
+b
+4
+8
+ALTER TABLE t1 TRUNCATE PARTITION p0;
+SELECT a,b FROM t1;
+a b
+8 4
+INSERT INTO t1(a,b) VALUES(9, 8);
+TRUNCATE TABLE t1;
+SELECT a,b FROM t1;
+a b
+TRUNCATE TABLE t1;
+SELECT a,b FROM t1;
+a b
+set global rocksdb_force_flush_memtable_now = true;
+set global rocksdb_compact_cf = 'default';
+SELECT b FROM t1 WHERE a < 5;
+b
+1
+2
+3
+4
+TRUNCATE TABLE t1;
+SELECT b FROM t1 WHERE a < 5;
+b
+DROP TABLE t1;
+#
+# table(primary key, auto increment)
+#
+CREATE TABLE t1(
+a INT NOT NULL AUTO_INCREMENT,
+b INT,
+PRIMARY KEY(a)
+) ENGINE=ROCKSDB
+PARTITION BY KEY() PARTITIONS 3;
+TRUNCATE TABLE t1;
+TRUNCATE TABLE t1;
+TRUNCATE TABLE t1;
+ALTER TABLE t1 TRUNCATE PARTITION p0;
+ALTER TABLE t1 TRUNCATE PARTITION p0;
+ALTER TABLE t1 TRUNCATE PARTITION p0;
+ALTER TABLE t1 TRUNCATE PARTITION p1;
+ALTER TABLE t1 TRUNCATE PARTITION p1;
+ALTER TABLE t1 TRUNCATE PARTITION p1;
+ALTER TABLE t1 TRUNCATE PARTITION p2;
+ALTER TABLE t1 TRUNCATE PARTITION p2;
+ALTER TABLE t1 TRUNCATE PARTITION p2;
+ALTER TABLE t1 TRUNCATE PARTITION p1;
+ALTER TABLE t1 TRUNCATE PARTITION p0;
+TRUNCATE TABLE t1;
+ALTER TABLE t1 TRUNCATE PARTITION p2;
+INSERT INTO t1 (a,b) VALUES (1, 1), (2, 4), (3, 8);
+SELECT a,b FROM t1 ORDER BY a;
+a b
+1 1
+2 4
+3 8
+ALTER TABLE t1 TRUNCATE PARTITION p2;
+SELECT a,b FROM t1 ORDER BY a;
+a b
+1 1
+SELECT a FROM t1 WHERE b > 2;
+a
+SELECT b from t1 where a != 3;
+b
+1
+ALTER TABLE t1 TRUNCATE PARTITION p1;
+SELECT a,b FROM t1 ORDER BY b;
+a b
+SELECT a FROM t1 WHERE b > 2;
+a
+SELECT b from t1 where a != 3;
+b
+ALTER TABLE t1 TRUNCATE PARTITION p0;
+SELECT a,b FROM t1 ORDER BY a;
+a b
+TRUNCATE TABLE t1;
+SELECT a,b FROM t1;
+a b
+INSERT INTO t1 (a,b) VALUES (4, 1), (5, 4), (6, 8);
+SELECT a,b FROM t1;
+a b
+4 1
+5 4
+6 8
+SELECT a FROM t1 WHERE b < 5;
+a
+4
+5
+ALTER TABLE t1 TRUNCATE PARTITION p2;
+SELECT a,b FROM t1;
+a b
+4 1
+5 4
+6 8
+INSERT INTO t1(a,b) VALUES(7, 1);
+SELECT b from t1 WHERE a > 2;
+b
+1
+1
+4
+8
+SELECT a,b FROM t1;
+a b
+4 1
+5 4
+6 8
+7 1
+ALTER TABLE t1 TRUNCATE PARTITION p1;
+SELECT a,b FROM t1;
+a b
+4 1
+5 4
+INSERT INTO t1(a,b) VALUES(8, 4);
+SELECT a,b FROM t1;
+a b
+4 1
+5 4
+8 4
+SELECT b from t1 WHERE a < 9;
+b
+1
+4
+4
+ALTER TABLE t1 TRUNCATE PARTITION p0;
+SELECT a,b FROM t1;
+a b
+8 4
+INSERT INTO t1(a,b) VALUES(9, 8);
+TRUNCATE TABLE t1;
+SELECT a,b FROM t1;
+a b
+TRUNCATE TABLE t1;
+SELECT a,b FROM t1;
+a b
+set global rocksdb_force_flush_memtable_now = true;
+set global rocksdb_compact_cf = 'default';
+SELECT b FROM t1 WHERE a < 5;
+b
+1
+2
+3
+4
+TRUNCATE TABLE t1;
+SELECT b FROM t1 WHERE a < 5;
+b
+DROP TABLE t1;
+#
+# table(cf)
+#
+CREATE TABLE t1 (
+a INT,
+b INT,
+PRIMARY KEY (`a`, `b`) COMMENT 'testcomment'
+) ENGINE=ROCKSDB
+PARTITION BY LIST(a) (
+PARTITION p0 VALUES IN (1, 4, 7),
+PARTITION p1 VALUES IN (2, 5, 8),
+PARTITION p2 VALUES IN (3, 6, 9)
+);
+TRUNCATE TABLE t1;
+TRUNCATE TABLE t1;
+TRUNCATE TABLE t1;
+ALTER TABLE t1 TRUNCATE PARTITION p0;
+ALTER TABLE t1 TRUNCATE PARTITION p0;
+ALTER TABLE t1 TRUNCATE PARTITION p0;
+ALTER TABLE t1 TRUNCATE PARTITION p1;
+ALTER TABLE t1 TRUNCATE PARTITION p1;
+ALTER TABLE t1 TRUNCATE PARTITION p1;
+ALTER TABLE t1 TRUNCATE PARTITION p2;
+ALTER TABLE t1 TRUNCATE PARTITION p2;
+ALTER TABLE t1 TRUNCATE PARTITION p2;
+ALTER TABLE t1 TRUNCATE PARTITION p1;
+ALTER TABLE t1 TRUNCATE PARTITION p0;
+TRUNCATE TABLE t1;
+ALTER TABLE t1 TRUNCATE PARTITION p2;
+INSERT INTO t1 (a,b) VALUES (1, 1), (2, 4), (3, 8);
+SELECT a,b FROM t1 ORDER BY a;
+a b
+1 1
+2 4
+3 8
+ALTER TABLE t1 TRUNCATE PARTITION p2;
+SELECT a,b FROM t1 ORDER BY a;
+a b
+1 1
+2 4
+SELECT a FROM t1 WHERE b > 2;
+a
+2
+SELECT b from t1 where a != 3;
+b
+1
+4
+ALTER TABLE t1 TRUNCATE PARTITION p1;
+SELECT a,b FROM t1 ORDER BY b;
+a b
+1 1
+SELECT a FROM t1 WHERE b > 2;
+a
+SELECT b from t1 where a != 3;
+b
+1
+ALTER TABLE t1 TRUNCATE PARTITION p0;
+SELECT a,b FROM t1 ORDER BY a;
+a b
+TRUNCATE TABLE t1;
+SELECT a,b FROM t1;
+a b
+INSERT INTO t1 (a,b) VALUES (4, 1), (5, 4), (6, 8);
+SELECT a,b FROM t1;
+a b
+4 1
+5 4
+6 8
+SELECT a FROM t1 WHERE b < 5;
+a
+4
+5
+ALTER TABLE t1 TRUNCATE PARTITION p2;
+SELECT a,b FROM t1;
+a b
+4 1
+5 4
+INSERT INTO t1(a,b) VALUES(7, 1);
+SELECT b from t1 WHERE a > 2;
+b
+1
+1
+4
+SELECT a,b FROM t1;
+a b
+4 1
+5 4
+7 1
+ALTER TABLE t1 TRUNCATE PARTITION p1;
+SELECT a,b FROM t1;
+a b
+4 1
+7 1
+INSERT INTO t1(a,b) VALUES(8, 4);
+SELECT a,b FROM t1;
+a b
+4 1
+7 1
+8 4
+SELECT b from t1 WHERE a < 9;
+b
+1
+1
+4
+ALTER TABLE t1 TRUNCATE PARTITION p0;
+SELECT a,b FROM t1;
+a b
+8 4
+INSERT INTO t1(a,b) VALUES(9, 8);
+TRUNCATE TABLE t1;
+SELECT a,b FROM t1;
+a b
+TRUNCATE TABLE t1;
+SELECT a,b FROM t1;
+a b
+set global rocksdb_force_flush_memtable_now = true;
+set global rocksdb_compact_cf = 'default';
+SELECT b FROM t1 WHERE a < 5;
+b
+1
+2
+3
+4
+TRUNCATE TABLE t1;
+SELECT b FROM t1 WHERE a < 5;
+b
+DROP TABLE t1;
+#
+# table(reverse cf)
+#
+CREATE TABLE t1 (
+a INT,
+b INT,
+PRIMARY KEY (`a`, `b`) COMMENT 'p0_cfname=rev:foo;p1_cfname=bar;p2_cfname=baz'
+) ENGINE=ROCKSDB
+PARTITION BY LIST(a) (
+PARTITION p0 VALUES IN (1, 4, 7),
+PARTITION p1 VALUES IN (2, 5, 8),
+PARTITION p2 VALUES IN (3, 6, 9)
+);
+TRUNCATE TABLE t1;
+TRUNCATE TABLE t1;
+TRUNCATE TABLE t1;
+ALTER TABLE t1 TRUNCATE PARTITION p0;
+ALTER TABLE t1 TRUNCATE PARTITION p0;
+ALTER TABLE t1 TRUNCATE PARTITION p0;
+ALTER TABLE t1 TRUNCATE PARTITION p1;
+ALTER TABLE t1 TRUNCATE PARTITION p1;
+ALTER TABLE t1 TRUNCATE PARTITION p1;
+ALTER TABLE t1 TRUNCATE PARTITION p2;
+ALTER TABLE t1 TRUNCATE PARTITION p2;
+ALTER TABLE t1 TRUNCATE PARTITION p2;
+ALTER TABLE t1 TRUNCATE PARTITION p1;
+ALTER TABLE t1 TRUNCATE PARTITION p0;
+TRUNCATE TABLE t1;
+ALTER TABLE t1 TRUNCATE PARTITION p2;
+INSERT INTO t1 (a,b) VALUES (1, 1), (2, 4), (3, 8);
+SELECT a,b FROM t1 ORDER BY a;
+a b
+1 1
+2 4
+3 8
+ALTER TABLE t1 TRUNCATE PARTITION p2;
+SELECT a,b FROM t1 ORDER BY a;
+a b
+1 1
+2 4
+SELECT a FROM t1 WHERE b > 2;
+a
+2
+SELECT b from t1 where a != 3;
+b
+1
+4
+ALTER TABLE t1 TRUNCATE PARTITION p1;
+SELECT a,b FROM t1 ORDER BY b;
+a b
+1 1
+SELECT a FROM t1 WHERE b > 2;
+a
+SELECT b from t1 where a != 3;
+b
+1
+ALTER TABLE t1 TRUNCATE PARTITION p0;
+SELECT a,b FROM t1 ORDER BY a;
+a b
+TRUNCATE TABLE t1;
+SELECT a,b FROM t1;
+a b
+INSERT INTO t1 (a,b) VALUES (4, 1), (5, 4), (6, 8);
+SELECT a,b FROM t1;
+a b
+4 1
+5 4
+6 8
+SELECT a FROM t1 WHERE b < 5;
+a
+4
+5
+ALTER TABLE t1 TRUNCATE PARTITION p2;
+SELECT a,b FROM t1;
+a b
+4 1
+5 4
+INSERT INTO t1(a,b) VALUES(7, 1);
+SELECT b from t1 WHERE a > 2;
+b
+1
+1
+4
+SELECT a,b FROM t1;
+a b
+4 1
+5 4
+7 1
+ALTER TABLE t1 TRUNCATE PARTITION p1;
+SELECT a,b FROM t1;
+a b
+4 1
+7 1
+INSERT INTO t1(a,b) VALUES(8, 4);
+SELECT a,b FROM t1;
+a b
+4 1
+7 1
+8 4
+SELECT b from t1 WHERE a < 9;
+b
+1
+1
+4
+ALTER TABLE t1 TRUNCATE PARTITION p0;
+SELECT a,b FROM t1;
+a b
+8 4
+INSERT INTO t1(a,b) VALUES(9, 8);
+TRUNCATE TABLE t1;
+SELECT a,b FROM t1;
+a b
+TRUNCATE TABLE t1;
+SELECT a,b FROM t1;
+a b
+set global rocksdb_force_flush_memtable_now = true;
+set global rocksdb_compact_cf = 'default';
+SELECT b FROM t1 WHERE a < 5;
+b
+1
+2
+3
+4
+TRUNCATE TABLE t1;
+SELECT b FROM t1 WHERE a < 5;
+b
+DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/truncate_table.result b/storage/rocksdb/mysql-test/rocksdb/r/truncate_table.result
new file mode 100644
index 00000000000..e6ff6e1ca32
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/truncate_table.result
@@ -0,0 +1,33 @@
+DROP TABLE IF EXISTS t1;
+CREATE TABLE t1 (a INT PRIMARY KEY, b CHAR(8)) ENGINE=rocksdb;
+TRUNCATE TABLE t1;
+INSERT INTO t1 (a,b) VALUES (1,'a'), (2,'b'), (3,'c');
+TRUNCATE TABLE t1;
+SELECT a,b FROM t1;
+a b
+DROP TABLE t1;
+CREATE TABLE t1 (a INT KEY AUTO_INCREMENT, c CHAR(8)) ENGINE=rocksdb;
+SHOW TABLE STATUS LIKE 't1';
+Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
+t1 ROCKSDB 10 Fixed # # # 0 0 0 1 NULL NULL NULL latin1_swedish_ci NULL
+INSERT INTO t1 (c) VALUES ('a'),('b'),('c');
+SHOW TABLE STATUS LIKE 't1';
+Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
+t1 ROCKSDB 10 Fixed # # # 0 0 0 4 NULL NULL NULL latin1_swedish_ci NULL
+TRUNCATE TABLE t1;
+SHOW TABLE STATUS LIKE 't1';
+Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
+t1 ROCKSDB 10 Fixed # # # 0 0 0 1 NULL NULL NULL latin1_swedish_ci NULL
+INSERT INTO t1 (c) VALUES ('d');
+SHOW TABLE STATUS LIKE 't1';
+Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
+t1 ROCKSDB 10 Fixed # # # 0 0 0 2 NULL NULL NULL latin1_swedish_ci NULL
+SELECT a,c FROM t1;
+a c
+1 d
+DROP TABLE t1;
+CREATE TABLE t1 (a INT PRIMARY KEY, b CHAR(8)) ENGINE=rocksdb;
+INSERT INTO t1 (a,b) VALUES (1,'a'),(2,'b'),(3,'c');
+HANDLER t1 OPEN AS h1;
+ERROR HY000: Storage engine ROCKSDB of the table `test`.`t1` doesn't have this option
+DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/truncate_table3.result b/storage/rocksdb/mysql-test/rocksdb/r/truncate_table3.result
new file mode 100644
index 00000000000..eda560fefdb
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/truncate_table3.result
@@ -0,0 +1,22 @@
+call mtr.add_suppression("Column family 'cf1' not found");
+call mtr.add_suppression("Column family 'rev:cf2' not found");
+DROP TABLE IF EXISTS t1;
+call mtr.add_suppression("Column family 'cf1' not found");
+call mtr.add_suppression("Column family 'rev:cf2' not found");
+set global rocksdb_compact_cf = 'cf1';
+set global rocksdb_compact_cf = 'rev:cf2';
+set global rocksdb_signal_drop_index_thread = 1;
+CREATE TABLE t1 (
+a int not null,
+b int not null,
+c varchar(500) not null,
+primary key (a,b) comment 'cf1',
+key (b) comment 'rev:cf2'
+) ENGINE=RocksDB;
+DELETE FROM t1;
+select variable_value into @a from information_schema.global_status where variable_name='rocksdb_compact_read_bytes';
+truncate table t1;
+select case when variable_value-@a < 500000 then 'true' else 'false' end from information_schema.global_status where variable_name='rocksdb_compact_read_bytes';
+case when variable_value-@a < 500000 then 'true' else 'false' end
+true
+DROP TABLE IF EXISTS t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/trx_info.result b/storage/rocksdb/mysql-test/rocksdb/r/trx_info.result
new file mode 100644
index 00000000000..ada2e127021
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/trx_info.result
@@ -0,0 +1,13 @@
+DROP TABLE IF EXISTS t1;
+create table t1 (a int) engine=rocksdb;
+insert into t1 values (1);
+insert into t1 values (2);
+set autocommit=0;
+select * from t1 for update;
+a
+1
+2
+select * from information_schema.rocksdb_trx;
+TRANSACTION_ID STATE NAME WRITE_COUNT LOCK_COUNT TIMEOUT_SEC WAITING_KEY WAITING_COLUMN_FAMILY_ID IS_REPLICATION SKIP_TRX_API READ_ONLY HAS_DEADLOCK_DETECTION NUM_ONGOING_BULKLOAD THREAD_ID QUERY
+_TRX_ID_ STARTED _NAME_ 0 2 1 _KEY_ 0 0 0 0 0 0 _THREAD_ID_ select * from information_schema.rocksdb_trx
+DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/trx_info_rpl.result b/storage/rocksdb/mysql-test/rocksdb/r/trx_info_rpl.result
new file mode 100644
index 00000000000..b096e42a11b
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/trx_info_rpl.result
@@ -0,0 +1,16 @@
+include/master-slave.inc
+[connection master]
+DROP TABLE IF EXISTS t1;
+connection slave;
+include/stop_slave.inc
+create table t1 (a int, b int, primary key (a), unique key (b)) engine=rocksdb;
+connection master;
+connection slave;
+show variables like 'rpl_skip_tx_api';
+Variable_name Value
+rpl_skip_tx_api ON
+include/start_slave.inc
+found
+connection master;
+DROP TABLE t1;
+include/rpl_end.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/ttl_primary.result b/storage/rocksdb/mysql-test/rocksdb/r/ttl_primary.result
new file mode 100644
index 00000000000..2903e9aa7eb
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/ttl_primary.result
@@ -0,0 +1,489 @@
+CREATE TABLE t1 (
+`a` binary(8) NOT NULL,
+`b` varbinary(64) NOT NULL,
+`c` varbinary(256) NOT NULL,
+`ts` bigint(20) UNSIGNED NOT NULL,
+`value` mediumblob NOT NULL,
+PRIMARY KEY (`b`,`a`,`c`)
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+COMMENT='ttl_duration=1;ttl_col=ts;';
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values ('a', 'b', 'c', UNIX_TIMESTAMP(), 'd');
+INSERT INTO t1 values ('d', 'e', 'f', UNIX_TIMESTAMP(), 'g');
+set global rocksdb_debug_ttl_rec_ts = 0;
+SELECT COUNT(*) FROM t1;
+COUNT(*)
+2
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+SELECT COUNT(*) FROM t1;
+COUNT(*)
+0
+DROP TABLE t1;
+CREATE TABLE t1 (
+a bigint(20) NOT NULL,
+b int NOT NULL,
+ts bigint(20) UNSIGNED NOT NULL,
+c int NOT NULL,
+PRIMARY KEY (a)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=1;ttl_col=ts;';
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values (1, 3, UNIX_TIMESTAMP(), 5);
+INSERT INTO t1 values (2, 4, UNIX_TIMESTAMP(), 6);
+set global rocksdb_debug_ttl_rec_ts = 0;
+SELECT COUNT(*) FROM t1;
+COUNT(*)
+2
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+SELECT COUNT(*) FROM t1;
+COUNT(*)
+0
+DROP TABLE t1;
+CREATE TABLE t1 (
+a bigint(20) NOT NULL,
+b int NOT NULL,
+c int NOT NULL,
+ts bigint(20) UNSIGNED NOT NULL,
+PRIMARY KEY (a,c)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=1;ttl_col=ts;';
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values (1, 3, 5, UNIX_TIMESTAMP());
+INSERT INTO t1 values (2, 4, 6, UNIX_TIMESTAMP());
+set global rocksdb_debug_ttl_rec_ts = 0;
+SELECT COUNT(*) FROM t1;
+COUNT(*)
+2
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+SELECT COUNT(*) FROM t1;
+COUNT(*)
+0
+DROP TABLE t1;
+CREATE TABLE t1 (
+a bigint(20) NOT NULL,
+b int NOT NULL,
+c int NOT NULL,
+ts bigint(20) UNSIGNED NOT NULL,
+PRIMARY KEY (a,c)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=1;ttl_col=ts;';
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values (1, 3, 5, UNIX_TIMESTAMP());
+INSERT INTO t1 values (2, 4, 6, UNIX_TIMESTAMP());
+set global rocksdb_debug_ttl_rec_ts = 0;
+SELECT COUNT(*) FROM t1;
+COUNT(*)
+2
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+SELECT COUNT(*) FROM t1;
+COUNT(*)
+0
+DROP TABLE t1;
+CREATE TABLE t1 (
+a bigint(20) NOT NULL,
+b int,
+c int,
+ts bigint(20) UNSIGNED NOT NULL,
+PRIMARY KEY (a)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=1;ttl_col=ts;';
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values (1, NULL, NULL, UNIX_TIMESTAMP());
+INSERT INTO t1 values (2, NULL, NULL, UNIX_TIMESTAMP());
+set global rocksdb_debug_ttl_rec_ts = 0;
+SELECT COUNT(*) FROM t1;
+COUNT(*)
+2
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+SELECT COUNT(*) FROM t1;
+COUNT(*)
+0
+DROP TABLE t1;
+CREATE TABLE t1 (
+`a` binary(8) NOT NULL,
+`b` varbinary(64),
+`c` varbinary(256),
+`ts` bigint(20) UNSIGNED NOT NULL,
+`value` mediumblob NOT NULL,
+PRIMARY KEY (`a`)
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+COMMENT='ttl_duration=1;ttl_col=ts;';
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values ('a', NULL, 'bc', UNIX_TIMESTAMP(), 'd');
+INSERT INTO t1 values ('d', 'efghijk', NULL, UNIX_TIMESTAMP(), 'l');
+set global rocksdb_debug_ttl_rec_ts = 0;
+SELECT COUNT(*) FROM t1;
+COUNT(*)
+2
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+SELECT COUNT(*) FROM t1;
+COUNT(*)
+0
+DROP TABLE t1;
+CREATE TABLE t1 (
+a bigint(20) NOT NULL,
+b int NOT NULL,
+c int NOT NULL,
+PRIMARY KEY (a)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=1;';
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values (1, 3, 5);
+INSERT INTO t1 values (2, 4, 6);
+set global rocksdb_debug_ttl_rec_ts = 0;
+SELECT COUNT(*) FROM t1;
+COUNT(*)
+2
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+SELECT COUNT(*) FROM t1;
+COUNT(*)
+0
+DROP TABLE t1;
+CREATE TABLE t1 (
+a int,
+ts bigint(20) UNSIGNED NOT NULL,
+PRIMARY KEY (a, ts)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=5;ttl_col=ts;';
+INSERT INTO t1 values (1, UNIX_TIMESTAMP());
+INSERT INTO t1 values (2, UNIX_TIMESTAMP());
+SELECT COUNT(*) FROM t1;
+COUNT(*)
+2
+set global rocksdb_debug_ttl_snapshot_ts = -10;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_snapshot_ts = 0;
+SELECT COUNT(*) FROM t1;
+COUNT(*)
+2
+set global rocksdb_debug_ttl_snapshot_ts = 10;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_snapshot_ts = 0;
+SELECT COUNT(*) FROM t1;
+COUNT(*)
+0
+DROP TABLE t1;
+CREATE TABLE t1 (
+a bigint(20) NOT NULL,
+b int NOT NULL,
+ts bigint(20) UNSIGNED NOT NULL,
+c int NOT NULL,
+PRIMARY KEY (a, ts)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=1;ttl_col=ts;';
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values (1, 3, UNIX_TIMESTAMP(), 5);
+INSERT INTO t1 values (2, 4, UNIX_TIMESTAMP(), 6);
+set global rocksdb_debug_ttl_rec_ts = 0;
+SELECT COUNT(*) FROM t1;
+COUNT(*)
+2
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+SELECT COUNT(*) FROM t1;
+COUNT(*)
+0
+DROP TABLE t1;
+CREATE TABLE t1 (
+`a` binary(8) NOT NULL,
+`b` varbinary(64),
+`c` varbinary(256),
+`ts` bigint(20) UNSIGNED NOT NULL,
+`value` mediumblob NOT NULL,
+PRIMARY KEY (`a`, `ts`)
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+COMMENT='ttl_duration=1;ttl_col=ts;';
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values ('a', NULL, 'bc', UNIX_TIMESTAMP(), 'd');
+INSERT INTO t1 values ('de', 'fghijk', NULL, UNIX_TIMESTAMP(), 'l');
+set global rocksdb_debug_ttl_rec_ts = 0;
+SELECT COUNT(*) FROM t1;
+COUNT(*)
+2
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+SELECT COUNT(*) FROM t1;
+COUNT(*)
+0
+DROP TABLE t1;
+CREATE TABLE t1 (
+a INT NOT NULL,
+b varbinary(64) NOT NULL,
+c varbinary(256) NOT NULL,
+ts bigint(20) UNSIGNED NOT NULL,
+value mediumblob NOT NULL,
+PRIMARY KEY (b,a,c)
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+COMMENT='ttl_duration=10;ttl_col=ts;';
+set global rocksdb_debug_ttl_rec_ts = -300;
+INSERT INTO t1 values (1, 'b', 'c', UNIX_TIMESTAMP(), 'd');
+INSERT INTO t1 values (2, 'e', 'f', UNIX_TIMESTAMP(), 'g');
+set global rocksdb_debug_ttl_rec_ts = 300;
+INSERT INTO t1 values (3, 'i', 'j', UNIX_TIMESTAMP(), 'k');
+INSERT INTO t1 values (4, 'm', 'n', UNIX_TIMESTAMP(), 'o');
+set global rocksdb_debug_ttl_rec_ts = 0;
+set global rocksdb_debug_ttl_snapshot_ts = -3600;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_snapshot_ts = 0;
+SELECT a FROM t1;
+a
+1
+2
+3
+4
+set global rocksdb_compact_cf='default';
+SELECT a FROM t1;
+a
+3
+4
+set global rocksdb_debug_ttl_snapshot_ts = 3600;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_snapshot_ts = 0;
+SELECT a FROM t1;
+a
+DROP TABLE t1;
+CREATE TABLE t1 (
+a bigint(20) UNSIGNED NOT NULL,
+b int NOT NULL,
+c int NOT NULL,
+ts bigint(20),
+PRIMARY KEY (a,c)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=1;ttl_col=ts;';
+ERROR HY000: TTL column (ts) in MyRocks must be an unsigned non-null 64-bit integer, exist inside the table, and have an accompanying ttl duration.
+CREATE TABLE t1 (
+a bigint(20) UNSIGNED NOT NULL,
+b int NOT NULL,
+c int NOT NULL,
+ts int,
+PRIMARY KEY (a,c)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=1;ttl_col=ts;';
+ERROR HY000: TTL column (ts) in MyRocks must be an unsigned non-null 64-bit integer, exist inside the table, and have an accompanying ttl duration.
+CREATE TABLE t1 (
+a bigint(20) UNSIGNED NOT NULL,
+b int NOT NULL,
+c int NOT NULL,
+PRIMARY KEY (a,c)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=abc;';
+ERROR HY000: TTL duration (abc) in MyRocks must be an unsigned non-null 64-bit integer.
+CREATE TABLE t1 (
+a bigint(20) UNSIGNED NOT NULL,
+b int NOT NULL,
+c int NOT NULL,
+PRIMARY KEY (a,c)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=1;ttl_col=abc;';
+ERROR HY000: TTL column (abc) in MyRocks must be an unsigned non-null 64-bit integer, exist inside the table, and have an accompanying ttl duration.
+CREATE TABLE t1 (
+a bigint(20) UNSIGNED NOT NULL,
+b int NOT NULL,
+c int NOT NULL,
+PRIMARY KEY (a,c)
+) ENGINE=rocksdb
+COMMENT='ttl_col=abc;';
+ERROR HY000: TTL column (abc) in MyRocks must be an unsigned non-null 64-bit integer, exist inside the table, and have an accompanying ttl duration.
+CREATE TABLE t1 (
+a bigint(20) NOT NULL,
+PRIMARY KEY (a)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=500;';
+INSERT INTO t1 values (1);
+SELECT COUNT(*) FROM t1;
+COUNT(*)
+1
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+SELECT COUNT(*) FROM t1;
+COUNT(*)
+1
+DROP TABLE t1;
+CREATE TABLE t1 (
+a INT PRIMARY KEY
+) ENGINE=rocksdb
+COMMENT='ttl_duration=100;';
+INSERT INTO t1 values (1);
+SELECT * FROM t1;
+a
+1
+set global rocksdb_debug_ttl_rec_ts = -300;
+ALTER TABLE t1 COMMENT = 'ttl_duration=1';
+set global rocksdb_debug_ttl_rec_ts = 0;
+SHOW CREATE TABLE t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `a` int(11) NOT NULL,
+ PRIMARY KEY (`a`)
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1 COMMENT='ttl_duration=1'
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+SELECT COUNT(*) FROM t1;
+COUNT(*)
+0
+DROP TABLE t1;
+CREATE TABLE t1 (
+a INT PRIMARY KEY,
+b INT
+) ENGINE=rocksdb
+COMMENT='ttl_duration=100;';
+ALTER TABLE t1 DROP PRIMARY KEY;
+ERROR HY000: TTL support is currently disabled when table has a hidden PK.
+DROP TABLE t1;
+CREATE TABLE t1 (
+a INT PRIMARY KEY,
+b INT
+) ENGINE=rocksdb
+COMMENT='ttl_duration=5;';
+INSERT INTO t1 VALUES (1,1);
+INSERT INTO t1 VALUES (2,2);
+ALTER TABLE t1 DROP PRIMARY KEY, ADD PRIMARY KEY(b);
+set global rocksdb_debug_ttl_snapshot_ts = -3600;
+set global rocksdb_force_flush_memtable_now=1;
+set @@global.rocksdb_compact_cf = 'default';
+set global rocksdb_debug_ttl_snapshot_ts = 0;
+SELECT COUNT(*) FROM t1;
+COUNT(*)
+2
+set global rocksdb_debug_ttl_snapshot_ts = 3600;
+set @@global.rocksdb_compact_cf = 'default';
+set global rocksdb_debug_ttl_snapshot_ts = 0;
+SELECT COUNT(*) FROM t1;
+COUNT(*)
+0
+DROP TABLE t1;
+CREATE TABLE t1 (
+a bigint(20) UNSIGNED NOT NULL,
+b int,
+PRIMARY KEY (a,b)
+) ENGINE=rocksdb
+COMMENT='asdadfasdfsadfadf ;ttl_duration=1; asfasdfasdfadfa';
+INSERT INTO t1 values (UNIX_TIMESTAMP(), 1);
+SELECT COUNT(*) FROM t1;
+COUNT(*)
+1
+set global rocksdb_debug_ttl_snapshot_ts = 3600;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_snapshot_ts = 0;
+SELECT COUNT(*) FROM t1;
+COUNT(*)
+0
+ALTER TABLE t1 COMMENT = 'adsf;;ttl_duration=5;asfasdfa;ttl_col=a;asdfasdf;';
+set global rocksdb_debug_ttl_rec_ts = 300;
+INSERT INTO t1 values (UNIX_TIMESTAMP(), 2);
+set global rocksdb_debug_ttl_rec_ts = 0;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+SELECT COUNT(*) FROM t1;
+COUNT(*)
+1
+set global rocksdb_debug_ttl_snapshot_ts = 3600;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_snapshot_ts = 0;
+SELECT COUNT(*) FROM t1;
+COUNT(*)
+0
+DROP TABLE t1;
+CREATE TABLE t1 (
+a bigint(20) NOT NULL,
+PRIMARY KEY (a)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=5;';
+set global rocksdb_debug_ttl_rec_ts = -300;
+INSERT INTO t1 values (1);
+INSERT INTO t1 values (3);
+INSERT INTO t1 values (5);
+set global rocksdb_debug_ttl_rec_ts = 300;
+INSERT INTO t1 values (7);
+INSERT INTO t1 values (9);
+set global rocksdb_debug_ttl_rec_ts = 0;
+UPDATE t1 SET a=a+1;
+SELECT * FROM t1;
+a
+10
+2
+4
+6
+8
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+SELECT * FROM t1;
+a
+10
+8
+DROP TABLE t1;
+CREATE TABLE t1 (
+a INT,
+b bigint(20) UNSIGNED NOT NULL,
+PRIMARY KEY (a)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=5;ttl_col=b;';
+set global rocksdb_debug_ttl_rec_ts = -300;
+INSERT INTO t1 values (1, UNIX_TIMESTAMP());
+INSERT INTO t1 values (3, UNIX_TIMESTAMP());
+INSERT INTO t1 values (5, UNIX_TIMESTAMP());
+INSERT INTO t1 values (7, UNIX_TIMESTAMP());
+set global rocksdb_debug_ttl_rec_ts = 300;
+UPDATE t1 SET b=UNIX_TIMESTAMP() WHERE a < 4;
+set global rocksdb_debug_ttl_rec_ts = 0;
+SELECT a FROM t1;
+a
+1
+3
+5
+7
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+SELECT a FROM t1;
+a
+1
+3
+DROP TABLE t1;
+CREATE TABLE t1 (
+a bigint(20) NOT NULL,
+PRIMARY KEY (a)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=1;';
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values (1);
+INSERT INTO t1 values (2);
+INSERT INTO t1 values (3);
+set global rocksdb_debug_ttl_rec_ts = 0;
+set global rocksdb_enable_ttl=0;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+select variable_value into @c from information_schema.global_status where variable_name='rocksdb_rows_expired';
+set global rocksdb_enable_ttl=1;
+set global rocksdb_compact_cf='default';
+select variable_value-@c from information_schema.global_status where variable_name='rocksdb_rows_expired';
+variable_value-@c
+3
+SELECT COUNT(*) FROM t1;
+COUNT(*)
+0
+DROP TABLE t1;
+CREATE TABLE t1 (
+a bigint(20) NOT NULL,
+PRIMARY KEY (a)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=100;';
+INSERT INTO t1 values (1);
+INSERT INTO t1 values (2);
+INSERT INTO t1 values (3);
+select variable_value into @c from information_schema.global_status where variable_name='rocksdb_rows_expired';
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+select variable_value-@c from information_schema.global_status where variable_name='rocksdb_rows_expired';
+variable_value-@c
+0
+DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/ttl_primary_read_filtering.result b/storage/rocksdb/mysql-test/rocksdb/r/ttl_primary_read_filtering.result
new file mode 100644
index 00000000000..558924e6513
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/ttl_primary_read_filtering.result
@@ -0,0 +1,283 @@
+CREATE TABLE t1 (
+a int PRIMARY KEY
+) ENGINE=rocksdb
+COMMENT='ttl_duration=1;';
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values (1);
+INSERT INTO t1 values (2);
+set global rocksdb_debug_ttl_rec_ts = 0;
+set global rocksdb_force_flush_memtable_now=1;
+SELECT * FROM t1;
+a
+select variable_value into @c from information_schema.global_status where variable_name='rocksdb_rows_expired';
+set global rocksdb_compact_cf='default';
+select variable_value-@c from information_schema.global_status where variable_name='rocksdb_rows_expired';
+variable_value-@c
+2
+DROP TABLE t1;
+CREATE TABLE t1 (
+a int PRIMARY KEY,
+b BIGINT UNSIGNED NOT NULL
+) ENGINE=rocksdb
+COMMENT='ttl_duration=10;';
+set global rocksdb_debug_ttl_rec_ts = -300;
+INSERT INTO t1 values (1, UNIX_TIMESTAMP());
+set global rocksdb_debug_ttl_rec_ts = 300;
+INSERT INTO t1 values (2, UNIX_TIMESTAMP());
+INSERT INTO t1 values (3, UNIX_TIMESTAMP());
+set global rocksdb_debug_ttl_rec_ts = 0;
+set global rocksdb_force_flush_memtable_now=1;
+SELECT a FROM t1;
+a
+2
+3
+set global rocksdb_compact_cf='default';
+SELECT a FROM t1;
+a
+2
+3
+set global rocksdb_debug_ttl_read_filter_ts = -310;
+SELECT a FROM t1;
+a
+set global rocksdb_debug_ttl_read_filter_ts = 0;
+DROP TABLE t1;
+CREATE TABLE t1 (
+a int PRIMARY KEY
+) ENGINE=rocksdb
+COMMENT='ttl_duration=1;';
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values (1);
+INSERT INTO t1 values (3);
+INSERT INTO t1 values (5);
+INSERT INTO t1 values (7);
+set global rocksdb_debug_ttl_rec_ts = 0;
+select variable_value into @a from information_schema.global_status where variable_name='rocksdb_rows_filtered';
+SELECT * FROM t1;
+a
+select variable_value-@a from information_schema.global_status where variable_name='rocksdb_rows_filtered';
+variable_value-@a
+4
+set global rocksdb_enable_ttl_read_filtering=0;
+select variable_value into @a from information_schema.global_status where variable_name='rocksdb_rows_filtered';
+SELECT * FROM t1;
+a
+1
+3
+5
+7
+select variable_value-@a from information_schema.global_status where variable_name='rocksdb_rows_filtered';
+variable_value-@a
+0
+set global rocksdb_enable_ttl_read_filtering=1;
+select variable_value into @a from information_schema.global_status where variable_name='rocksdb_rows_filtered';
+SELECT * FROM t1;
+a
+select variable_value-@a from information_schema.global_status where variable_name='rocksdb_rows_filtered';
+variable_value-@a
+4
+DROP TABLE t1;
+set global ROCKSDB_COMPACT_CF= 'default';
+CREATE TABLE t1 (
+a int,
+b int,
+c int,
+PRIMARY KEY (a,b,c)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=1;';
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values (0,0,0);
+INSERT INTO t1 values (0,0,1);
+INSERT INTO t1 values (0,1,0);
+INSERT INTO t1 values (0,1,1);
+INSERT INTO t1 values (1,1,2);
+INSERT INTO t1 values (1,2,1);
+INSERT INTO t1 values (1,2,2);
+INSERT INTO t1 values (1,2,3);
+set global rocksdb_debug_ttl_rec_ts = 0;
+select variable_value into @c from information_schema.global_status where variable_name='rocksdb_rows_expired';
+set global rocksdb_force_flush_memtable_now=1;
+SELECT * FROM t1 WHERE a=1 AND b=2 AND c=2;
+a b c
+SELECT * FROM t1 WHERE a = 1;
+a b c
+SELECT max(a) from t1 where a < 3;
+max(a)
+NULL
+SELECT max(a) from t1 where a < 2 AND b = 1 AND c < 3;
+max(a)
+NULL
+SELECT min(a) from t1 where a >= 1;
+min(a)
+NULL
+SELECT min(a) from t1 where a > 1;
+min(a)
+NULL
+select * from t1 where a=1 and b in (1) order by c desc;
+a b c
+select max(a) from t1 where a <=10;
+max(a)
+NULL
+select a from t1 where a > 0 and a <= 2;
+a
+select variable_value-@c from information_schema.global_status where variable_name='rocksdb_rows_expired';
+variable_value-@c
+0
+set global rocksdb_compact_cf='default';
+select variable_value-@c from information_schema.global_status where variable_name='rocksdb_rows_expired';
+variable_value-@c
+8
+DROP TABLE t1;
+CREATE TABLE t1 (
+a int PRIMARY KEY
+) ENGINE=rocksdb
+COMMENT='ttl_duration=100;';
+set global rocksdb_debug_ttl_rec_ts = -110;
+INSERT INTO t1 values (1);
+set global rocksdb_debug_ttl_rec_ts = 0;
+SELECT * FROM t1;
+a
+INSERT INTO t1 values (1);
+SELECT * FROM t1;
+a
+1
+DROP TABLE t1;
+set global rocksdb_force_flush_memtable_now=1;
+CREATE TABLE t1 (
+a int PRIMARY KEY
+) ENGINE=rocksdb
+COMMENT='ttl_duration=1;';
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values (1);
+set global rocksdb_debug_ttl_rec_ts = 0;
+SELECT * FROM t1;
+a
+UPDATE t1 set a = 1;
+DROP TABLE t1;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+CREATE TABLE t1 (
+a int PRIMARY KEY,
+b int
+) ENGINE=rocksdb
+COMMENT='ttl_duration=100;';
+set global rocksdb_debug_ttl_rec_ts = -110;
+INSERT INTO t1 values (1,1);
+INSERT INTO t1 values (3,3);
+set global rocksdb_debug_ttl_rec_ts = 0;
+INSERT INTO t1 values (5,5);
+UPDATE t1 set a = 1;
+SELECT * FROM t1;
+a b
+1 5
+set global rocksdb_enable_ttl_read_filtering=0;
+SELECT * FROM t1;
+a b
+1 5
+3 3
+set global rocksdb_enable_ttl_read_filtering=1;
+UPDATE t1 set a = 999 where a = 1;
+SELECT * FROM t1;
+a b
+999 5
+UPDATE t1 set a = a - 1;
+SELECT * FROM t1;
+a b
+998 5
+DROP TABLE t1;
+connect con1,localhost,root,,;
+connect con2,localhost,root,,;
+CREATE TABLE t1 (
+a int PRIMARY KEY
+) ENGINE=rocksdb
+COMMENT='ttl_duration=5;';
+INSERT INTO t1 values (1);
+connection con1;
+# Creating Snapshot (start transaction)
+BEGIN;
+SELECT * FROM t1;
+a
+1
+SELECT * FROM t1;
+a
+1
+# Switching to connection 2
+connection con2;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+select variable_value into @a from information_schema.global_status where variable_name='rocksdb_rows_filtered';
+SELECT * FROM t1;
+a
+select variable_value-@a from information_schema.global_status where variable_name='rocksdb_rows_filtered';
+variable_value-@a
+1
+# Switching to connection 1
+connection con1;
+select variable_value into @a from information_schema.global_status where variable_name='rocksdb_rows_filtered';
+SELECT * FROM t1;
+a
+1
+select variable_value-@a from information_schema.global_status where variable_name='rocksdb_rows_filtered';
+variable_value-@a
+0
+UPDATE t1 set a = a + 1;
+select variable_value into @a from information_schema.global_status where variable_name='rocksdb_rows_filtered';
+SELECT * FROM t1;
+a
+2
+select variable_value-@a from information_schema.global_status where variable_name='rocksdb_rows_filtered';
+variable_value-@a
+0
+COMMIT;
+select variable_value into @a from information_schema.global_status where variable_name='rocksdb_rows_filtered';
+SELECT * FROM t1;
+a
+select variable_value-@a from information_schema.global_status where variable_name='rocksdb_rows_filtered';
+variable_value-@a
+1
+DROP TABLE t1;
+disconnect con1;
+disconnect con2;
+connect con1,localhost,root,,;
+connect con2,localhost,root,,;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+CREATE TABLE t1 (
+a int PRIMARY KEY
+) ENGINE=rocksdb
+COMMENT='ttl_duration=1;';
+# On Connection 1
+connection con1;
+# Creating Snapshot (start transaction)
+BEGIN;
+SELECT * FROM t1;
+a
+# On Connection 2
+connection con2;
+set global rocksdb_debug_ttl_rec_ts = -2;
+INSERT INTO t1 values (1);
+INSERT INTO t1 values (3);
+INSERT INTO t1 values (5);
+INSERT INTO t1 values (7);
+set global rocksdb_debug_ttl_rec_ts = 0;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+# On Connection 1
+connection con1;
+SELECT * FROM t1;
+a
+# On Connection 2
+connection con2;
+SELECT * FROM t1;
+a
+set global rocksdb_enable_ttl_read_filtering=0;
+SELECT * FROM t1;
+a
+1
+3
+5
+7
+set global rocksdb_enable_ttl_read_filtering=1;
+disconnect con2;
+disconnect con1;
+connection default;
+DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/ttl_primary_with_partitions.result b/storage/rocksdb/mysql-test/rocksdb/r/ttl_primary_with_partitions.result
new file mode 100644
index 00000000000..d6d9e290e9f
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/ttl_primary_with_partitions.result
@@ -0,0 +1,256 @@
+CREATE TABLE t1 (
+c1 INT,
+PRIMARY KEY (`c1`)
+) ENGINE=ROCKSDB
+COMMENT="custom_p0_ttl_duration=1;"
+PARTITION BY LIST(c1) (
+PARTITION custom_p0 VALUES IN (1, 4, 7),
+PARTITION custom_p1 VALUES IN (2, 5, 8),
+PARTITION custom_p2 VALUES IN (3, 6, 9)
+);
+set global rocksdb_debug_ttl_rec_ts = -3600;
+INSERT INTO t1 values (1);
+INSERT INTO t1 values (2);
+INSERT INTO t1 values (3);
+INSERT INTO t1 values (4);
+INSERT INTO t1 values (5);
+INSERT INTO t1 values (6);
+INSERT INTO t1 values (7);
+INSERT INTO t1 values (8);
+INSERT INTO t1 values (9);
+set global rocksdb_debug_ttl_rec_ts = 0;
+SELECT * FROM t1;
+c1
+1
+2
+3
+4
+5
+6
+7
+8
+9
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+SELECT * FROM t1;
+c1
+2
+3
+5
+6
+8
+9
+DROP TABLE t1;
+CREATE TABLE t1 (
+c1 INT,
+c2 INT,
+name VARCHAR(25) NOT NULL,
+PRIMARY KEY (`c1`, `c2`) COMMENT 'custom_p0_cfname=foo;custom_p1_cfname=my_custom_cf;custom_p2_cfname=baz'
+) ENGINE=ROCKSDB
+COMMENT="custom_p0_ttl_duration=1;custom_p1_ttl_duration=7;"
+PARTITION BY LIST(c1) (
+PARTITION custom_p0 VALUES IN (1, 4, 7),
+PARTITION custom_p1 VALUES IN (2, 5, 8),
+PARTITION custom_p2 VALUES IN (3, 6, 9)
+);
+set global rocksdb_debug_ttl_rec_ts = -1200;
+INSERT INTO t1 values (1,1,'a');
+INSERT INTO t1 values (4,4,'aaaa');
+INSERT INTO t1 values (7,7,'aaaaaaa');
+set global rocksdb_debug_ttl_rec_ts = 1200;
+INSERT INTO t1 values (2,2,'aa');
+INSERT INTO t1 values (3,3,'aaa');
+INSERT INTO t1 values (5,5,'aaaaa');
+INSERT INTO t1 values (6,6,'aaaaaa');
+INSERT INTO t1 values (8,8,'aaaaaaaa');
+INSERT INTO t1 values (9,9,'aaaaaaaaa');
+set global rocksdb_debug_ttl_rec_ts = 0;
+SELECT * FROM t1;
+c1 c2 name
+1 1 a
+2 2 aa
+3 3 aaa
+4 4 aaaa
+5 5 aaaaa
+6 6 aaaaaa
+7 7 aaaaaaa
+8 8 aaaaaaaa
+9 9 aaaaaaaaa
+set global rocksdb_force_flush_memtable_now=1;
+set @@global.rocksdb_compact_cf = 'foo';
+set @@global.rocksdb_compact_cf = 'my_custom_cf';
+SELECT * FROM t1;
+c1 c2 name
+2 2 aa
+3 3 aaa
+5 5 aaaaa
+6 6 aaaaaa
+8 8 aaaaaaaa
+9 9 aaaaaaaaa
+set global rocksdb_debug_ttl_snapshot_ts = 3600;
+set @@global.rocksdb_compact_cf = 'foo';
+SELECT * FROM t1;
+c1 c2 name
+2 2 aa
+3 3 aaa
+5 5 aaaaa
+6 6 aaaaaa
+8 8 aaaaaaaa
+9 9 aaaaaaaaa
+set @@global.rocksdb_compact_cf = 'my_custom_cf';
+set global rocksdb_debug_ttl_snapshot_ts = 0;
+SELECT * FROM t1;
+c1 c2 name
+3 3 aaa
+6 6 aaaaaa
+9 9 aaaaaaaaa
+DROP TABLE t1;
+CREATE TABLE t1 (
+c1 INT,
+c2 INT,
+name VARCHAR(25) NOT NULL,
+event DATE,
+PRIMARY KEY (`c1`, `c2`) COMMENT 'custom_p0_cfname=foo;custom_p1_cfname=bar;custom_p2_cfname=baz;'
+) ENGINE=ROCKSDB
+COMMENT="custom_p0_ttl_duration=9999;custom_p2_ttl_duration=5;"
+PARTITION BY LIST(c1) (
+PARTITION custom_p0 VALUES IN (1, 2, 3),
+PARTITION custom_p1 VALUES IN (4, 5, 6),
+PARTITION custom_p2 VALUES IN (7, 8, 9)
+);
+INSERT INTO t1 VALUES (1, 1, "one", null);
+INSERT INTO t1 VALUES (2, 2, "two", null);
+INSERT INTO t1 VALUES (3, 3, "three", null);
+INSERT INTO t1 VALUES (4, 4, "four", null);
+INSERT INTO t1 VALUES (5, 5, "five", null);
+INSERT INTO t1 VALUES (6, 6, "six", null);
+INSERT INTO t1 VALUES (7, 7, "seven", null);
+INSERT INTO t1 VALUES (8, 8, "eight", null);
+INSERT INTO t1 VALUES (9, 9, "nine", null);
+SELECT * FROM t1;
+c1 c2 name event
+1 1 one NULL
+2 2 two NULL
+3 3 three NULL
+4 4 four NULL
+5 5 five NULL
+6 6 six NULL
+7 7 seven NULL
+8 8 eight NULL
+9 9 nine NULL
+set global rocksdb_debug_ttl_rec_ts = 600;
+ALTER TABLE t1 DROP PRIMARY KEY, ADD PRIMARY KEY(`c2`,`c1`) COMMENT 'custom_p0_cfname=foo;custom_p1_cfname=bar;custom_p2_cfname=baz;';
+set global rocksdb_debug_ttl_rec_ts = 0;
+SHOW CREATE TABLE t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `c1` int(11) NOT NULL,
+ `c2` int(11) NOT NULL,
+ `name` varchar(25) NOT NULL,
+ `event` date DEFAULT NULL,
+ PRIMARY KEY (`c2`,`c1`) COMMENT 'custom_p0_cfname=foo;custom_p1_cfname=bar;custom_p2_cfname=baz;'
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1 COMMENT='custom_p0_ttl_duration=9999;custom_p2_ttl_duration=5;'
+ PARTITION BY LIST (`c1`)
+(PARTITION `custom_p0` VALUES IN (1,2,3) ENGINE = ROCKSDB,
+ PARTITION `custom_p1` VALUES IN (4,5,6) ENGINE = ROCKSDB,
+ PARTITION `custom_p2` VALUES IN (7,8,9) ENGINE = ROCKSDB)
+set global rocksdb_debug_ttl_snapshot_ts = 100;
+set global rocksdb_force_flush_memtable_now=1;
+set @@global.rocksdb_compact_cf = 'baz';
+set global rocksdb_debug_ttl_snapshot_ts = 0;
+SELECT * FROM t1;
+c1 c2 name event
+1 1 one NULL
+2 2 two NULL
+3 3 three NULL
+4 4 four NULL
+5 5 five NULL
+6 6 six NULL
+7 7 seven NULL
+8 8 eight NULL
+9 9 nine NULL
+set global rocksdb_debug_ttl_snapshot_ts = 1200;
+set @@global.rocksdb_compact_cf = 'foo';
+set @@global.rocksdb_compact_cf = 'baz';
+set global rocksdb_debug_ttl_snapshot_ts = 0;
+SELECT * FROM t1;
+c1 c2 name event
+1 1 one NULL
+2 2 two NULL
+3 3 three NULL
+4 4 four NULL
+5 5 five NULL
+6 6 six NULL
+DROP TABLE t1;
+CREATE TABLE t1 (
+c1 BIGINT,
+c2 BIGINT UNSIGNED NOT NULL,
+name VARCHAR(25) NOT NULL,
+event DATE,
+PRIMARY KEY (`c1`) COMMENT 'custom_p0_cfname=foo;custom_p1_cfname=bar;custom_p2_cfname=baz;'
+) ENGINE=ROCKSDB
+COMMENT="ttl_duration=1;custom_p1_ttl_duration=100;custom_p1_ttl_col=c2;custom_p2_ttl_duration=5000;"
+PARTITION BY LIST(c1) (
+PARTITION custom_p0 VALUES IN (1, 2, 3),
+PARTITION custom_p1 VALUES IN (4, 5, 6),
+PARTITION custom_p2 VALUES IN (7, 8, 9)
+);
+set global rocksdb_debug_ttl_rec_ts = -300;
+INSERT INTO t1 VALUES (1, UNIX_TIMESTAMP(), "one", null);
+INSERT INTO t1 VALUES (2, UNIX_TIMESTAMP(), "two", null);
+INSERT INTO t1 VALUES (3, UNIX_TIMESTAMP(), "three", null);
+set global rocksdb_debug_ttl_rec_ts = 0;
+INSERT INTO t1 VALUES (4, UNIX_TIMESTAMP(), "four", null);
+INSERT INTO t1 VALUES (5, UNIX_TIMESTAMP(), "five", null);
+INSERT INTO t1 VALUES (6, UNIX_TIMESTAMP(), "six", null);
+INSERT INTO t1 VALUES (7, UNIX_TIMESTAMP(), "seven", null);
+INSERT INTO t1 VALUES (8, UNIX_TIMESTAMP(), "eight", null);
+INSERT INTO t1 VALUES (9, UNIX_TIMESTAMP(), "nine", null);
+set global rocksdb_force_flush_memtable_now=1;
+set @@global.rocksdb_compact_cf = 'foo';
+set @@global.rocksdb_compact_cf = 'baz';
+set @@global.rocksdb_compact_cf = 'bar';
+SELECT c1 FROM t1;
+c1
+4
+5
+6
+7
+8
+9
+set global rocksdb_debug_ttl_snapshot_ts = 600;
+set @@global.rocksdb_compact_cf = 'bar';
+set global rocksdb_debug_ttl_snapshot_ts = 0;
+SELECT c1 FROM t1;
+c1
+7
+8
+9
+DROP TABLE t1;
+CREATE TABLE t1 (
+c1 BIGINT,
+c2 BIGINT UNSIGNED NOT NULL,
+PRIMARY KEY (`c1`, `c2`)
+) ENGINE=ROCKSDB
+COMMENT="ttl_duration=100;ttl_col=c2;"
+PARTITION BY LIST(c1) (
+PARTITION custom_p0 VALUES IN (1),
+PARTITION custom_p1 VALUES IN (2),
+PARTITION custom_p2 VALUES IN (3)
+);
+INSERT INTO t1 values (1, UNIX_TIMESTAMP());
+INSERT INTO t1 values (2, UNIX_TIMESTAMP());
+INSERT INTO t1 values (3, UNIX_TIMESTAMP());
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+SELECT c1 FROM t1;
+c1
+1
+2
+3
+set global rocksdb_debug_ttl_snapshot_ts = 300;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_snapshot_ts = 0;
+SELECT c1 FROM t1;
+c1
+DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/ttl_rows_examined.result b/storage/rocksdb/mysql-test/rocksdb/r/ttl_rows_examined.result
new file mode 100644
index 00000000000..b0304af8bef
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/ttl_rows_examined.result
@@ -0,0 +1,45 @@
+set debug_sync='RESET';
+set global rocksdb_debug_ttl_read_filter_ts = -10;
+connect conn1, localhost, root,,test;
+connect conn2, localhost, root,,test;
+connection conn1;
+CREATE TABLE t_re (
+a INT, b INT, PRIMARY KEY (a)
+) ENGINE=ROCKSDB
+COMMENT 'ttl_duration=1';
+affected rows: 0
+set global rocksdb_debug_ttl_rec_ts = -13;
+affected rows: 0
+insert into t_re values (1,1);
+affected rows: 1
+insert into t_re values (2,2);
+affected rows: 1
+set global rocksdb_debug_ttl_rec_ts = 0;
+affected rows: 0
+commit;
+affected rows: 0
+set debug_sync='rocksdb.ttl_rows_examined SIGNAL parked WAIT_FOR go';
+affected rows: 0
+SELECT * FROM t_re;
+connection conn2;
+set debug_sync='now WAIT_FOR parked';
+affected rows: 0
+SHOW PROCESSLIST;
+Id User Host db Command Time State Info Progress
+### ### ### ### Query ### debug sync point: rocksdb.ttl_rows_examined SELECT * FROM t_re 0.000
+### ### ### ### Query ### init SHOW PROCESSLIST 0.000
+### ### ### ### Sleep ### NULL 0.000
+affected rows: 3
+set debug_sync='now SIGNAL go';
+affected rows: 0
+connection conn1;
+a b
+affected rows: 0
+set debug_sync='RESET';
+affected rows: 0
+set global rocksdb_debug_ttl_read_filter_ts = DEFAULT;
+affected rows: 0
+drop table t_re;
+affected rows: 0
+disconnect conn1;
+disconnect conn2;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/ttl_secondary.result b/storage/rocksdb/mysql-test/rocksdb/r/ttl_secondary.result
new file mode 100644
index 00000000000..1f748a3841a
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/ttl_secondary.result
@@ -0,0 +1,709 @@
+CREATE TABLE t1 (
+`a` binary(8) NOT NULL,
+`b` varbinary(64) NOT NULL,
+`c` varbinary(256) NOT NULL,
+`ts` bigint(20) UNSIGNED NOT NULL,
+`value` mediumblob NOT NULL,
+PRIMARY KEY (`b`,`a`,`c`)
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+COMMENT='ttl_duration=1;ttl_col=ts;';
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values ('a', 'b', 'c', UNIX_TIMESTAMP(), 'd');
+INSERT INTO t1 values ('d', 'e', 'f', UNIX_TIMESTAMP(), 'g');
+set global rocksdb_debug_ttl_rec_ts = 0;
+SELECT COUNT(*) FROM t1;
+COUNT(*)
+2
+set global rocksdb_debug_ttl_ignore_pk = 1;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_ignore_pk = 0;
+SELECT COUNT(*) FROM t1;
+COUNT(*)
+2
+DROP TABLE t1;
+CREATE TABLE t1 (
+`a` binary(8) NOT NULL,
+`b` varbinary(64) NOT NULL,
+`c` varbinary(256) NOT NULL,
+`ts` bigint(20) UNSIGNED NOT NULL,
+`value` mediumblob NOT NULL,
+PRIMARY KEY (`b`,`a`,`c`),
+KEY kb (`b`)
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+COMMENT='ttl_duration=1;ttl_col=ts;';
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values ('a', 'b', 'c', UNIX_TIMESTAMP(), 'd');
+INSERT INTO t1 values ('d', 'e', 'f', UNIX_TIMESTAMP(), 'g');
+set global rocksdb_debug_ttl_rec_ts = 0;
+SELECT COUNT(*) FROM t1 FORCE INDEX (kb);
+COUNT(*)
+2
+set global rocksdb_debug_ttl_ignore_pk = 1;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_ignore_pk = 0;
+SELECT COUNT(*) FROM t1 FORCE INDEX (kb);
+COUNT(*)
+0
+DROP TABLE t1;
+CREATE TABLE t1 (
+a bigint(20) NOT NULL,
+b int NOT NULL,
+ts bigint(20) UNSIGNED NOT NULL,
+c int NOT NULL,
+PRIMARY KEY (a),
+KEY kb (b)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=1;ttl_col=ts;';
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values (1, 3, UNIX_TIMESTAMP(), 5);
+INSERT INTO t1 values (2, 4, UNIX_TIMESTAMP(), 6);
+set global rocksdb_debug_ttl_rec_ts = 0;
+SELECT COUNT(*) FROM t1 FORCE INDEX (kb);
+COUNT(*)
+2
+set global rocksdb_debug_ttl_ignore_pk = 1;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_ignore_pk = 0;
+SELECT COUNT(*) FROM t1 FORCE INDEX (kb);
+COUNT(*)
+0
+DROP TABLE t1;
+CREATE TABLE t1 (
+a bigint(20) NOT NULL,
+b int NOT NULL,
+c int NOT NULL,
+ts bigint(20) UNSIGNED NOT NULL,
+PRIMARY KEY (a,c),
+KEY kb (b)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=1;ttl_col=ts;';
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values (1, 3, 5, UNIX_TIMESTAMP());
+INSERT INTO t1 values (2, 4, 6, UNIX_TIMESTAMP());
+set global rocksdb_debug_ttl_rec_ts = 0;
+SELECT COUNT(*) FROM t1 FORCE INDEX (kb);
+COUNT(*)
+2
+set global rocksdb_debug_ttl_ignore_pk=1;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_ignore_pk=0;
+SELECT COUNT(*) FROM t1 FORCE INDEX (kb);
+COUNT(*)
+0
+DROP TABLE t1;
+CREATE TABLE t1 (
+a bigint(20) NOT NULL,
+b int,
+c int,
+ts bigint(20) UNSIGNED NOT NULL,
+PRIMARY KEY (a),
+KEY kbc (b, c)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=1;ttl_col=ts;';
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values (1, NULL, NULL, UNIX_TIMESTAMP());
+INSERT INTO t1 values (2, NULL, NULL, UNIX_TIMESTAMP());
+set global rocksdb_debug_ttl_rec_ts = 0;
+SELECT COUNT(*) FROM t1 FORCE INDEX (kb);
+COUNT(*)
+2
+set global rocksdb_debug_ttl_ignore_pk=1;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_ignore_pk=0;
+SELECT COUNT(*) FROM t1 FORCE INDEX (kb);
+COUNT(*)
+0
+DROP TABLE t1;
+CREATE TABLE t1 (
+`a` binary(8) NOT NULL,
+`b` varbinary(64),
+`c` varbinary(256),
+`ts` bigint(20) UNSIGNED NOT NULL,
+`value` mediumblob NOT NULL,
+PRIMARY KEY (`a`),
+KEY kbc (`b`, `c`)
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+COMMENT='ttl_duration=1;ttl_col=ts;';
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values ('a', NULL, 'bc', UNIX_TIMESTAMP(), 'd');
+INSERT INTO t1 values ('d', 'efghijk', NULL, UNIX_TIMESTAMP(), 'l');
+set global rocksdb_debug_ttl_rec_ts = 0;
+SELECT COUNT(*) FROM t1 FORCE INDEX (kb);
+COUNT(*)
+2
+set global rocksdb_debug_ttl_ignore_pk=1;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_ignore_pk=0;
+SELECT COUNT(*) FROM t1 FORCE INDEX (kb);
+COUNT(*)
+0
+DROP TABLE t1;
+CREATE TABLE t1 (
+a bigint(20) NOT NULL,
+b int NOT NULL,
+c int NOT NULL,
+PRIMARY KEY (a),
+KEY kb (b)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=1;';
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values (1, 3, 5);
+INSERT INTO t1 values (2, 4, 6);
+set global rocksdb_debug_ttl_rec_ts = 0;
+SELECT COUNT(*) FROM t1 FORCE INDEX (kb);
+COUNT(*)
+2
+set global rocksdb_debug_ttl_ignore_pk=1;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_ignore_pk=0;
+SELECT COUNT(*) FROM t1 FORCE INDEX (kb);
+COUNT(*)
+0
+DROP TABLE t1;
+CREATE TABLE t1 (
+a int,
+ts bigint(20) UNSIGNED NOT NULL,
+PRIMARY KEY (a, ts),
+KEY kt (ts)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=5;ttl_col=ts;';
+INSERT INTO t1 values (1, UNIX_TIMESTAMP());
+INSERT INTO t1 values (2, UNIX_TIMESTAMP());
+SELECT COUNT(*) FROM t1 FORCE INDEX(kt);
+COUNT(*)
+2
+set global rocksdb_debug_ttl_snapshot_ts = -10;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_snapshot_ts = 0;
+SELECT COUNT(*) FROM t1 FORCE INDEX(kt);
+COUNT(*)
+2
+set global rocksdb_debug_ttl_ignore_pk=1;
+set global rocksdb_debug_ttl_snapshot_ts = 10;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_snapshot_ts = 0;
+set global rocksdb_debug_ttl_ignore_pk=0;
+SELECT COUNT(*) FROM t1 FORCE INDEX(kt);
+COUNT(*)
+0
+DROP TABLE t1;
+CREATE TABLE t1 (
+a bigint(20) NOT NULL,
+b int NOT NULL,
+ts bigint(20) UNSIGNED NOT NULL,
+c int NOT NULL,
+PRIMARY KEY (a, ts),
+KEY kb (b)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=1;ttl_col=ts;';
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values (1, 3, UNIX_TIMESTAMP(), 5);
+INSERT INTO t1 values (2, 4, UNIX_TIMESTAMP(), 6);
+set global rocksdb_debug_ttl_rec_ts = 0;
+SELECT COUNT(*) FROM t1 FORCE INDEX (kb);
+COUNT(*)
+2
+set global rocksdb_debug_ttl_ignore_pk=1;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_ignore_pk=0;
+SELECT COUNT(*) FROM t1;
+COUNT(*)
+0
+DROP TABLE t1;
+CREATE TABLE t1 (
+`a` binary(8) NOT NULL,
+`b` varbinary(64),
+`c` varbinary(256),
+`ts` bigint(20) UNSIGNED NOT NULL,
+`value` mediumblob NOT NULL,
+PRIMARY KEY (`a`, `ts`),
+KEY kb (`b`)
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+COMMENT='ttl_duration=1;ttl_col=ts;';
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values ('a', NULL, 'bc', UNIX_TIMESTAMP(), 'd');
+INSERT INTO t1 values ('de', 'fghijk', NULL, UNIX_TIMESTAMP(), 'l');
+set global rocksdb_debug_ttl_rec_ts = 0;
+SELECT COUNT(*) FROM t1;
+COUNT(*)
+2
+set global rocksdb_debug_ttl_ignore_pk=1;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_ignore_pk=0;
+SELECT COUNT(*) FROM t1;
+COUNT(*)
+0
+DROP TABLE t1;
+CREATE TABLE t1 (
+a INT NOT NULL,
+b varbinary(64) NOT NULL,
+c varbinary(256) NOT NULL,
+ts bigint(20) UNSIGNED NOT NULL,
+value mediumblob NOT NULL,
+PRIMARY KEY (b,a,c),
+KEY kb (b)
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+COMMENT='ttl_duration=10;ttl_col=ts;';
+set global rocksdb_debug_ttl_rec_ts = -300;
+INSERT INTO t1 values (1, 'b', 'c', UNIX_TIMESTAMP(), 'd');
+INSERT INTO t1 values (2, 'e', 'f', UNIX_TIMESTAMP(), 'g');
+set global rocksdb_debug_ttl_rec_ts = 300;
+INSERT INTO t1 values (3, 'i', 'j', UNIX_TIMESTAMP(), 'k');
+INSERT INTO t1 values (4, 'm', 'n', UNIX_TIMESTAMP(), 'o');
+set global rocksdb_debug_ttl_rec_ts = 0;
+set global rocksdb_debug_ttl_snapshot_ts = -3600;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_snapshot_ts = 0;
+SELECT a FROM t1 FORCE INDEX (kb);
+a
+1
+2
+3
+4
+set global rocksdb_debug_ttl_ignore_pk=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_ignore_pk=0;
+SELECT a FROM t1 FORCE INDEX (kb);
+a
+3
+4
+set global rocksdb_debug_ttl_ignore_pk=1;
+set global rocksdb_debug_ttl_snapshot_ts = 3600;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_snapshot_ts = 0;
+set global rocksdb_debug_ttl_ignore_pk=0;
+SELECT a FROM t1 FORCE INDEX (kb);
+a
+DROP TABLE t1;
+CREATE TABLE t1 (
+a bigint(20) UNSIGNED NOT NULL,
+b int NOT NULL,
+c int NOT NULL,
+ts bigint(20),
+PRIMARY KEY (a,c),
+KEY (b)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=1;ttl_col=ts;';
+ERROR HY000: TTL column (ts) in MyRocks must be an unsigned non-null 64-bit integer, exist inside the table, and have an accompanying ttl duration.
+CREATE TABLE t1 (
+a bigint(20) UNSIGNED NOT NULL,
+b int NOT NULL,
+c int NOT NULL,
+ts int,
+PRIMARY KEY (a,c),
+KEY (b)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=1;ttl_col=ts;';
+ERROR HY000: TTL column (ts) in MyRocks must be an unsigned non-null 64-bit integer, exist inside the table, and have an accompanying ttl duration.
+CREATE TABLE t1 (
+a bigint(20) UNSIGNED NOT NULL,
+b int NOT NULL,
+c int NOT NULL,
+PRIMARY KEY (a,c),
+KEY (b)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=abc;';
+ERROR HY000: TTL duration (abc) in MyRocks must be an unsigned non-null 64-bit integer.
+CREATE TABLE t1 (
+a bigint(20) UNSIGNED NOT NULL,
+b int NOT NULL,
+c int NOT NULL,
+PRIMARY KEY (a,c),
+KEY (b)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=1;ttl_col=abc;';
+ERROR HY000: TTL column (abc) in MyRocks must be an unsigned non-null 64-bit integer, exist inside the table, and have an accompanying ttl duration.
+CREATE TABLE t1 (
+a bigint(20) UNSIGNED NOT NULL,
+b int NOT NULL,
+c int NOT NULL,
+PRIMARY KEY (a,c),
+KEY (b)
+) ENGINE=rocksdb
+COMMENT='ttl_col=abc;';
+ERROR HY000: TTL column (abc) in MyRocks must be an unsigned non-null 64-bit integer, exist inside the table, and have an accompanying ttl duration.
+CREATE TABLE t1 (
+a bigint(20) NOT NULL,
+b int NOT NULL,
+PRIMARY KEY (a),
+KEY kb (b)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=500;';
+INSERT INTO t1 values (1, 1);
+SELECT COUNT(*) FROM t1 FORCE INDEX (kb);
+COUNT(*)
+1
+set global rocksdb_debug_ttl_ignore_pk=1;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_ignore_pk=0;
+SELECT COUNT(*) FROM t1 FORCE INDEX (kb);
+COUNT(*)
+1
+DROP TABLE t1;
+CREATE TABLE t1 (
+a INT PRIMARY KEY,
+b INT NOT NULL,
+KEY kb (b)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=100;';
+INSERT INTO t1 values (1, 1);
+SELECT * FROM t1 FORCE INDEX (kb);
+a b
+1 1
+set global rocksdb_debug_ttl_rec_ts = -300;
+ALTER TABLE t1 COMMENT = 'ttl_duration=1';
+set global rocksdb_debug_ttl_rec_ts = 0;
+SHOW CREATE TABLE t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `a` int(11) NOT NULL,
+ `b` int(11) NOT NULL,
+ PRIMARY KEY (`a`),
+ KEY `kb` (`b`)
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1 COMMENT='ttl_duration=1'
+set global rocksdb_debug_ttl_ignore_pk=1;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_ignore_pk=0;
+SELECT COUNT(*) FROM t1 FORCE INDEX (kb);
+COUNT(*)
+0
+DROP TABLE t1;
+CREATE TABLE t1 (
+a INT PRIMARY KEY,
+b INT,
+KEY (b)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=100;';
+ALTER TABLE t1 DROP PRIMARY KEY;
+ERROR HY000: TTL support is currently disabled when table has a hidden PK.
+DROP TABLE t1;
+CREATE TABLE t1 (
+a INT PRIMARY KEY,
+b INT,
+KEY kb (b)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=5;';
+INSERT INTO t1 VALUES (1,1);
+INSERT INTO t1 VALUES (2,2);
+ALTER TABLE t1 DROP PRIMARY KEY, ADD PRIMARY KEY(b);
+set global rocksdb_debug_ttl_snapshot_ts = -3600;
+set global rocksdb_force_flush_memtable_now=1;
+set @@global.rocksdb_compact_cf = 'default';
+set global rocksdb_debug_ttl_snapshot_ts = 0;
+SELECT COUNT(*) FROM t1 FORCE INDEX (kb);
+COUNT(*)
+2
+set global rocksdb_debug_ttl_ignore_pk=1;
+set global rocksdb_debug_ttl_snapshot_ts = 3600;
+set @@global.rocksdb_compact_cf = 'default';
+set global rocksdb_debug_ttl_snapshot_ts = 0;
+set global rocksdb_debug_ttl_ignore_pk=0;
+SELECT COUNT(*) FROM t1 FORCE INDEX (kb);
+COUNT(*)
+0
+DROP TABLE t1;
+CREATE TABLE t1 (
+a bigint(20) UNSIGNED NOT NULL,
+b int,
+PRIMARY KEY (a,b),
+KEY kb (b)
+) ENGINE=rocksdb
+COMMENT='asdadfasdfsadfadf ;ttl_duration=1; asfasdfasdfadfa';
+INSERT INTO t1 values (UNIX_TIMESTAMP(), 1);
+SELECT COUNT(*) FROM t1 FORCE INDEX (kb);
+COUNT(*)
+1
+set global rocksdb_debug_ttl_snapshot_ts = 3600;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_snapshot_ts = 0;
+SELECT COUNT(*) FROM t1 FORCE INDEX (kb);
+COUNT(*)
+0
+ALTER TABLE t1 COMMENT = 'adsf;;ttl_duration=5;asfasdfa;ttl_col=a;asdfasdf;';
+set global rocksdb_debug_ttl_rec_ts = 300;
+INSERT INTO t1 values (UNIX_TIMESTAMP(), 2);
+set global rocksdb_debug_ttl_rec_ts = 0;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+SELECT COUNT(*) FROM t1 FORCE INDEX (kb);
+COUNT(*)
+1
+set global rocksdb_debug_ttl_ignore_pk=1;
+set global rocksdb_debug_ttl_snapshot_ts = 3600;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_snapshot_ts = 0;
+set global rocksdb_debug_ttl_ignore_pk=0;
+SELECT COUNT(*) FROM t1 FORCE INDEX (kb);
+COUNT(*)
+0
+DROP TABLE t1;
+CREATE TABLE t1 (
+a bigint(20) NOT NULL,
+b int NOT NULL,
+PRIMARY KEY (a),
+KEY kb (b)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=5;';
+set global rocksdb_debug_ttl_rec_ts = -300;
+INSERT INTO t1 values (1, 0);
+INSERT INTO t1 values (3, 0);
+INSERT INTO t1 values (5, 0);
+set global rocksdb_debug_ttl_rec_ts = 300;
+INSERT INTO t1 values (7, 0);
+INSERT INTO t1 values (9, 0);
+set global rocksdb_debug_ttl_rec_ts = 0;
+UPDATE t1 SET a=a+1;
+SELECT * FROM t1 FORCE INDEX (kb);
+a b
+10 0
+2 0
+4 0
+6 0
+8 0
+set global rocksdb_debug_ttl_ignore_pk=1;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_ignore_pk=0;
+SELECT * FROM t1;
+a b
+10 0
+8 0
+DROP TABLE t1;
+CREATE TABLE t1 (
+a INT,
+b bigint(20) UNSIGNED NOT NULL,
+PRIMARY KEY (a),
+KEY kb (b)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=5;ttl_col=b;';
+set global rocksdb_debug_ttl_rec_ts = -300;
+INSERT INTO t1 values (1, UNIX_TIMESTAMP());
+INSERT INTO t1 values (3, UNIX_TIMESTAMP());
+INSERT INTO t1 values (5, UNIX_TIMESTAMP());
+INSERT INTO t1 values (7, UNIX_TIMESTAMP());
+set global rocksdb_debug_ttl_rec_ts = 300;
+UPDATE t1 SET b=(UNIX_TIMESTAMP()+1) WHERE a < 4;
+set global rocksdb_debug_ttl_rec_ts = 0;
+SELECT a FROM t1 FORCE INDEX (kb);
+a
+1
+3
+5
+7
+set global rocksdb_debug_ttl_ignore_pk=1;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_ignore_pk=0;
+SELECT a FROM t1 FORCE INDEX (kb);
+a
+1
+3
+DROP TABLE t1;
+CREATE TABLE t1 (
+a bigint(20) NOT NULL,
+b int NOT NULL,
+PRIMARY KEY (a),
+KEY kb (b)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=1;';
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values (1, 1);
+INSERT INTO t1 values (2, 1);
+INSERT INTO t1 values (3, 1);
+set global rocksdb_debug_ttl_rec_ts = 0;
+set global rocksdb_enable_ttl=0;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+select variable_value into @c from information_schema.global_status where variable_name='rocksdb_rows_expired';
+set global rocksdb_enable_ttl=1;
+set global rocksdb_compact_cf='default';
+select variable_value-@c from information_schema.global_status where variable_name='rocksdb_rows_expired';
+variable_value-@c
+6
+SELECT COUNT(*) FROM t1 FORCE INDEX (kb);
+COUNT(*)
+0
+DROP TABLE t1;
+CREATE TABLE t1 (
+a bigint(20) NOT NULL,
+b int NOT NULL,
+PRIMARY KEY (a),
+KEY kb (b)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=100;';
+INSERT INTO t1 values (1, 1);
+INSERT INTO t1 values (2, 2);
+INSERT INTO t1 values (3, 3);
+select variable_value into @c from information_schema.global_status where variable_name='rocksdb_rows_expired';
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+select variable_value-@c from information_schema.global_status where variable_name='rocksdb_rows_expired';
+variable_value-@c
+0
+DROP TABLE t1;
+CREATE TABLE t1 (
+a INT,
+b bigint(20) UNSIGNED NOT NULL,
+PRIMARY KEY (a, b),
+KEY kb (b)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=5;ttl_col=b;';
+set global rocksdb_debug_ttl_rec_ts = -300;
+INSERT INTO t1 values (1, UNIX_TIMESTAMP());
+INSERT INTO t1 values (3, UNIX_TIMESTAMP());
+INSERT INTO t1 values (5, UNIX_TIMESTAMP());
+INSERT INTO t1 values (7, UNIX_TIMESTAMP());
+set global rocksdb_debug_ttl_rec_ts = 300;
+UPDATE t1 SET b=(UNIX_TIMESTAMP()+1) WHERE a < 4;
+set global rocksdb_debug_ttl_rec_ts = 0;
+SELECT a FROM t1 FORCE INDEX (kb);
+a
+1
+3
+5
+7
+set global rocksdb_debug_ttl_ignore_pk=1;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_ignore_pk=0;
+SELECT a FROM t1 FORCE INDEX (kb);
+a
+1
+3
+DROP TABLE t1;
+CREATE TABLE t1 (
+a INT,
+b bigint(20) UNSIGNED NOT NULL,
+PRIMARY KEY (a, b)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=5;ttl_col=b;';
+set global rocksdb_debug_ttl_rec_ts = -300;
+INSERT INTO t1 values (1, UNIX_TIMESTAMP());
+INSERT INTO t1 values (3, UNIX_TIMESTAMP());
+INSERT INTO t1 values (5, UNIX_TIMESTAMP());
+INSERT INTO t1 values (7, UNIX_TIMESTAMP());
+set global rocksdb_debug_ttl_rec_ts = 300;
+UPDATE t1 SET b=(UNIX_TIMESTAMP()+1) WHERE a < 4;
+set global rocksdb_debug_ttl_rec_ts = 0;
+SELECT a FROM t1;
+a
+1
+3
+5
+7
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+SELECT a FROM t1;
+a
+1
+3
+DROP TABLE t1;
+CREATE TABLE t1 (
+`a` binary(8) NOT NULL,
+`b` varbinary(64) NOT NULL,
+`c` varbinary(256) NOT NULL,
+`ts` bigint(20) UNSIGNED NOT NULL,
+`value` mediumblob NOT NULL,
+PRIMARY KEY (`b`,`a`,`c`)
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+COMMENT='ttl_duration=1;ttl_col=ts;';
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values ('a', 'b', 'c', UNIX_TIMESTAMP(), 'd');
+INSERT INTO t1 values ('d', 'e', 'f', UNIX_TIMESTAMP(), 'g');
+set global rocksdb_debug_ttl_rec_ts = 0;
+SELECT COUNT(*);
+COUNT(*)
+1
+set global rocksdb_debug_ttl_ignore_pk = 1;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_ignore_pk = 0;
+SELECT COUNT(*);
+COUNT(*)
+1
+CREATE INDEX kb on t1 (b);
+set global rocksdb_debug_ttl_ignore_pk = 1;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_ignore_pk = 0;
+SELECT COUNT(*) FROM t1 FORCE INDEX (kb);
+COUNT(*)
+0
+DROP TABLE t1;
+CREATE TABLE t1 (
+`a` binary(8) NOT NULL,
+`b` varbinary(64) NOT NULL,
+`c` varbinary(256) NOT NULL,
+`value` mediumblob NOT NULL,
+PRIMARY KEY (`b`,`a`,`c`)
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+COMMENT='ttl_duration=1';
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values ('a', 'b', 'c', 'd');
+INSERT INTO t1 values ('d', 'e', 'f', 'g');
+set global rocksdb_debug_ttl_rec_ts = 0;
+SELECT COUNT(*);
+COUNT(*)
+1
+set global rocksdb_debug_ttl_ignore_pk = 1;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_ignore_pk = 0;
+SELECT COUNT(*);
+COUNT(*)
+1
+CREATE INDEX kb on t1 (b);
+set global rocksdb_debug_ttl_ignore_pk = 1;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_ignore_pk = 0;
+SELECT COUNT(*) FROM t1 FORCE INDEX (kb);
+COUNT(*)
+0
+DROP TABLE t1;
+CREATE TABLE t1 (
+`a` binary(8) NOT NULL,
+`b` varbinary(64) NOT NULL,
+`c` varbinary(256) NOT NULL,
+`ts` bigint(20) UNSIGNED NOT NULL,
+`value` mediumblob NOT NULL,
+PRIMARY KEY (`b`,`a`,`c`, `ts`)
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+COMMENT='ttl_duration=1;ttl_col=ts;';
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values ('a', 'b', 'c', UNIX_TIMESTAMP(), 'd');
+INSERT INTO t1 values ('d', 'e', 'f', UNIX_TIMESTAMP(), 'g');
+set global rocksdb_debug_ttl_rec_ts = 0;
+SELECT COUNT(*);
+COUNT(*)
+1
+set global rocksdb_debug_ttl_ignore_pk = 1;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_ignore_pk = 0;
+SELECT COUNT(*);
+COUNT(*)
+1
+CREATE INDEX kb on t1 (b);
+set global rocksdb_debug_ttl_ignore_pk = 1;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_ignore_pk = 0;
+SELECT COUNT(*) FROM t1 FORCE INDEX (kb);
+COUNT(*)
+0
+DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/ttl_secondary_read_filtering.result b/storage/rocksdb/mysql-test/rocksdb/r/ttl_secondary_read_filtering.result
new file mode 100644
index 00000000000..395c84edfe9
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/ttl_secondary_read_filtering.result
@@ -0,0 +1,511 @@
+CREATE TABLE t1 (
+a int PRIMARY KEY,
+b int NOT NULL,
+KEY kb (b)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=1;';
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values (1, 1);
+INSERT INTO t1 values (2, 2);
+set global rocksdb_debug_ttl_rec_ts = 0;
+set global rocksdb_force_flush_memtable_now=1;
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+a b
+SELECT * FROM t1 FORCE INDEX (kb);
+a b
+select variable_value into @c from information_schema.global_status where variable_name='rocksdb_rows_expired';
+set global rocksdb_debug_ttl_ignore_pk = 1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_ignore_pk = 0;
+select variable_value-@c from information_schema.global_status where variable_name='rocksdb_rows_expired';
+variable_value-@c
+2
+DROP TABLE t1;
+CREATE TABLE t1 (
+a int PRIMARY KEY,
+b BIGINT UNSIGNED NOT NULL,
+KEY kb (b)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=10;';
+set global rocksdb_debug_ttl_rec_ts = -300;
+INSERT INTO t1 values (1, UNIX_TIMESTAMP());
+set global rocksdb_debug_ttl_rec_ts = 300;
+INSERT INTO t1 values (2, UNIX_TIMESTAMP());
+INSERT INTO t1 values (3, UNIX_TIMESTAMP());
+set global rocksdb_debug_ttl_rec_ts = 0;
+set global rocksdb_force_flush_memtable_now=1;
+# 1 should be hidden
+SELECT a FROM t1 FORCE INDEX (PRIMARY);
+a
+2
+3
+SELECT a FROM t1 FORCE INDEX (kb);
+a
+2
+3
+set global rocksdb_debug_ttl_ignore_pk = 1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_ignore_pk = 0;
+# none should be hidden yet, compaction runs but records aren't expired
+SELECT a FROM t1 FORCE INDEX (PRIMARY);
+a
+2
+3
+SELECT a FROM t1 FORCE INDEX (kb);
+a
+2
+3
+# all should be hidden now, even though compaction hasn't run again
+set global rocksdb_debug_ttl_read_filter_ts = -310;
+SELECT a FROM t1 FORCE INDEX (PRIMARY);
+a
+SELECT a FROM t1 FORCE INDEX (kb);
+a
+set global rocksdb_debug_ttl_read_filter_ts = 0;
+DROP TABLE t1;
+CREATE TABLE t1 (
+a int PRIMARY KEY,
+b int NOT NULL,
+KEY kb (b)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=1;';
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values (1, 1);
+INSERT INTO t1 values (3, 3);
+INSERT INTO t1 values (5, 5);
+INSERT INTO t1 values (7, 7);
+set global rocksdb_debug_ttl_rec_ts = 0;
+# should return nothing.
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+a b
+SELECT * FROM t1 FORCE INDEX (kb);
+a b
+set global rocksdb_enable_ttl_read_filtering=0;
+# should return everything
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+a b
+1 1
+3 3
+5 5
+7 7
+SELECT * FROM t1 FORCE INDEX (kb);
+a b
+1 1
+3 3
+5 5
+7 7
+set global rocksdb_enable_ttl_read_filtering=1;
+# should return nothing.
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+a b
+SELECT * FROM t1 FORCE INDEX (kb);
+a b
+DROP TABLE t1;
+set global rocksdb_compact_cf= 'default';
+# Read filtering index scan tests (None of these queries should return any results)
+CREATE TABLE t1 (
+a int,
+b int,
+c int,
+PRIMARY KEY (a,b,c),
+KEY kb (b)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=1;';
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values (0,0,0);
+INSERT INTO t1 values (0,0,1);
+INSERT INTO t1 values (0,1,0);
+INSERT INTO t1 values (0,1,1);
+INSERT INTO t1 values (1,1,2);
+INSERT INTO t1 values (1,2,1);
+INSERT INTO t1 values (1,2,2);
+INSERT INTO t1 values (1,2,3);
+set global rocksdb_debug_ttl_rec_ts = 0;
+select variable_value into @c from information_schema.global_status where variable_name='rocksdb_rows_expired';
+set global rocksdb_force_flush_memtable_now=1;
+SELECT * FROM t1 FORCE INDEX (PRIMARY) WHERE a=1 AND b=2 AND c=2;
+a b c
+SELECT * FROM t1 FORCE INDEX (kb) WHERE a=1 AND b=2 AND c=2;
+a b c
+SELECT * FROM t1 FORCE INDEX (PRIMARY) WHERE a = 1;
+a b c
+SELECT * FROM t1 FORCE INDEX (kb) WHERE a = 1;
+a b c
+SELECT max(a) FROM t1 FORCE INDEX (PRIMARY) WHERE a < 3;
+max(a)
+NULL
+SELECT max(a) FROM t1 FORCE INDEX (kb) WHERE a < 3;
+max(a)
+NULL
+SELECT max(a) FROM t1 FORCE INDEX (PRIMARY) WHERE a < 2 AND b = 1 AND c < 3;
+max(a)
+NULL
+SELECT max(a) FROM t1 FORCE INDEX (kb) WHERE a < 2 AND b = 1 AND c < 3;
+max(a)
+NULL
+SELECT min(a) FROM t1 FORCE INDEX (PRIMARY) WHERE a >= 1;
+min(a)
+NULL
+SELECT min(a) FROM t1 FORCE INDEX (kb) WHERE a >= 1;
+min(a)
+NULL
+SELECT min(a) FROM t1 FORCE INDEX (PRIMARY) WHERE a > 1;
+min(a)
+NULL
+SELECT min(a) FROM t1 FORCE INDEX (kb) WHERE a > 1;
+min(a)
+NULL
+SELECT * FROM t1 FORCE INDEX (PRIMARY) WHERE a=1 and b in (1) order by c desc;
+a b c
+SELECT * FROM t1 FORCE INDEX (kb) WHERE a=1 and b in (1) order by c desc;
+a b c
+SELECT max(a) FROM t1 FORCE INDEX (PRIMARY) WHERE a <=10;
+max(a)
+NULL
+SELECT max(a) FROM t1 FORCE INDEX (kb) WHERE a <=10;
+max(a)
+NULL
+SELECT a FROM t1 FORCE INDEX (PRIMARY) WHERE a > 0 and a <= 2;
+a
+SELECT a FROM t1 FORCE INDEX (kb) WHERE a > 0 and a <= 2;
+a
+select variable_value-@c from information_schema.global_status where variable_name='rocksdb_rows_expired';
+variable_value-@c
+0
+set global rocksdb_debug_ttl_ignore_pk = 1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_ignore_pk = 0;
+select variable_value-@c from information_schema.global_status where variable_name='rocksdb_rows_expired';
+variable_value-@c
+8
+DROP TABLE t1;
+# Attempt to update expired value, should filter out
+set global rocksdb_force_flush_memtable_now=1;
+CREATE TABLE t1 (
+a int PRIMARY KEY
+) ENGINE=rocksdb
+COMMENT='ttl_duration=1;';
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values (1);
+set global rocksdb_debug_ttl_rec_ts = 0;
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+a
+SELECT * FROM t1;
+a
+# No error is thrown here, under the hood index_next_with_direction is
+# filtering out the record from being seen in the first place.
+UPDATE t1 set a = 1;
+DROP TABLE t1;
+# Ensure no rows can disappear in the middle of long-running transactions
+# Also ensure repeatable-read works as expected
+connect con1,localhost,root,,;
+connect con2,localhost,root,,;
+CREATE TABLE t1 (
+a int PRIMARY KEY,
+b int NOT NULL,
+KEY kb (b)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=5;';
+INSERT INTO t1 values (1, 1);
+connection con1;
+# Creating Snapshot (start transaction)
+BEGIN;
+# Nothing filtered out here
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+a b
+1 1
+SELECT * FROM t1 FORCE INDEX (kb);
+a b
+1 1
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+a b
+1 1
+SELECT * FROM t1 FORCE INDEX (kb);
+a b
+1 1
+# Switching to connection 2
+connection con2;
+# compaction doesn't do anything since con1 snapshot is still open
+set global rocksdb_debug_ttl_ignore_pk = 1;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_ignore_pk = 0;
+# read filtered out, because on a different connection, on
+# this connection the records have 'expired' already so they are filtered out
+# even though they have not yet been removed by compaction
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+a b
+SELECT * FROM t1 FORCE INDEX (kb);
+a b
+# Switching to connection 1
+connection con1;
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+a b
+1 1
+SELECT * FROM t1 FORCE INDEX (kb);
+a b
+1 1
+UPDATE t1 set a = a + 1;
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+a b
+2 1
+SELECT * FROM t1 FORCE INDEX (kb);
+a b
+2 1
+COMMIT;
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+a b
+SELECT * FROM t1 FORCE INDEX (kb);
+a b
+DROP TABLE t1;
+disconnect con1;
+disconnect con2;
+connect con1,localhost,root,,;
+connect con2,localhost,root,,;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+CREATE TABLE t1 (
+a int PRIMARY KEY,
+b int NOT NULL,
+KEY kb (b)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=1;';
+# On Connection 1
+connection con1;
+# Creating Snapshot (start transaction)
+BEGIN;
+SELECT * FROM t1 FORCE INDEX (kb);
+a b
+# On Connection 2
+connection con2;
+set global rocksdb_debug_ttl_rec_ts = -2;
+INSERT INTO t1 values (1, 1);
+INSERT INTO t1 values (3, 3);
+INSERT INTO t1 values (5, 5);
+INSERT INTO t1 values (7, 7);
+set global rocksdb_debug_ttl_rec_ts = 0;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+# On Connection 1
+connection con1;
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+a b
+SELECT * FROM t1 FORCE INDEX (kb);
+a b
+# On Connection 2
+connection con2;
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+a b
+SELECT * FROM t1 FORCE INDEX (kb);
+a b
+set global rocksdb_enable_ttl_read_filtering=0;
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+a b
+1 1
+3 3
+5 5
+7 7
+SELECT * FROM t1 FORCE INDEX (kb);
+a b
+1 1
+3 3
+5 5
+7 7
+set global rocksdb_enable_ttl_read_filtering=1;
+disconnect con2;
+disconnect con1;
+connection default;
+DROP TABLE t1;
+CREATE TABLE t1 (
+a int,
+b int,
+ts bigint(20) UNSIGNED NOT NULL,
+PRIMARY KEY (a),
+KEY kb (b)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=1;ttl_col=ts;';
+set global rocksdb_debug_ttl_rec_ts = 100;
+INSERT INTO t1 VALUES (1, 1, UNIX_TIMESTAMP());
+INSERT INTO t1 VALUES (2, 2, UNIX_TIMESTAMP());
+INSERT INTO t1 VALUES (3, 3, UNIX_TIMESTAMP());
+INSERT INTO t1 VALUES (4, 4, UNIX_TIMESTAMP());
+INSERT INTO t1 VALUES (5, 5, UNIX_TIMESTAMP());
+INSERT INTO t1 VALUES (6, 6, UNIX_TIMESTAMP());
+INSERT INTO t1 VALUES (7, 7, UNIX_TIMESTAMP());
+INSERT INTO t1 VALUES (8, 8, UNIX_TIMESTAMP());
+INSERT INTO t1 VALUES (9, 9, UNIX_TIMESTAMP());
+INSERT INTO t1 VALUES (10, 10, UNIX_TIMESTAMP());
+set global rocksdb_debug_ttl_rec_ts = 0;
+set global rocksdb_force_flush_memtable_now=1;
+# None are expired
+SELECT a, b FROM t1 FORCE INDEX (kb);
+a b
+1 1
+2 2
+3 3
+4 4
+5 5
+6 6
+7 7
+8 8
+9 9
+10 10
+set global rocksdb_debug_ttl_rec_ts = -100;
+UPDATE t1 SET ts=(UNIX_TIMESTAMP()+1) WHERE a IN (4, 7);
+set global rocksdb_debug_ttl_rec_ts = 0;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+# 4 and 7 should be gone
+SELECT a, b FROM t1 FORCE INDEX (kb);
+a b
+1 1
+2 2
+3 3
+5 5
+6 6
+8 8
+9 9
+10 10
+DROP TABLE t1;
+CREATE TABLE t1 (
+c1 INT,
+c2 INT,
+name VARCHAR(25) NOT NULL,
+PRIMARY KEY (c1, c2),
+KEY kc2 (c2)
+) ENGINE=ROCKSDB
+COMMENT='ttl_duration=1;';
+set global rocksdb_debug_ttl_rec_ts = -1200;
+INSERT INTO t1 values (1,1,'a');
+INSERT INTO t1 values (2,2,'b');
+set global rocksdb_debug_ttl_rec_ts = 1200;
+INSERT INTO t1 values (3,3,'c');
+INSERT INTO t1 values (4,4,'d');
+set global rocksdb_debug_ttl_rec_ts = -1200;
+INSERT INTO t1 values (5,5,'e');
+INSERT INTO t1 values (6,6,'f');
+set global rocksdb_debug_ttl_rec_ts = 1200;
+INSERT INTO t1 values (7,7,'g');
+INSERT INTO t1 values (8,8,'h');
+set global rocksdb_debug_ttl_rec_ts = 0;
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+c1 c2 name
+3 3 c
+4 4 d
+7 7 g
+8 8 h
+SELECT * FROM t1 FORCE INDEX (kc2);
+c1 c2 name
+3 3 c
+4 4 d
+7 7 g
+8 8 h
+SELECT * FROM t1 FORCE INDEX (PRIMARY) WHERE c1 > 5;
+c1 c2 name
+7 7 g
+8 8 h
+SELECT * FROM t1 FORCE INDEX (kc2) WHERE c2 > 5;
+c1 c2 name
+7 7 g
+8 8 h
+SELECT * FROM t1 FORCE INDEX (PRIMARY) WHERE 3 < c1 AND c1 < 6;
+c1 c2 name
+4 4 d
+SELECT * FROM t1 FORCE INDEX (kc2) WHERE 3 < c2 AND c2 < 6;
+c1 c2 name
+4 4 d
+DROP TABLE t1;
+CREATE TABLE t1 (
+a int,
+b int,
+PRIMARY KEY (a),
+KEY kb (b)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=1800;';
+set global rocksdb_debug_ttl_rec_ts = 0;
+INSERT INTO t1 values (1,1);
+INSERT INTO t1 values (2,2);
+INSERT INTO t1 values (7,7);
+INSERT INTO t1 values (10,10);
+INSERT INTO t1 values (11,11);
+INSERT INTO t1 values (12,12);
+set global rocksdb_debug_ttl_rec_ts = 450;
+INSERT INTO t1 values (3,3);
+INSERT INTO t1 values (4,4);
+INSERT INTO t1 values (8,8);
+INSERT INTO t1 values (16,16);
+INSERT INTO t1 values (17,17);
+INSERT INTO t1 values (18,18);
+set global rocksdb_debug_ttl_rec_ts = 900;
+INSERT INTO t1 values (5,5);
+INSERT INTO t1 values (6,6);
+INSERT INTO t1 values (9,9);
+INSERT INTO t1 values (13,13);
+INSERT INTO t1 values (14,14);
+INSERT INTO t1 values (15,15);
+set global rocksdb_debug_ttl_rec_ts = 0;
+# Should see everything
+SELECT * FROM t1;
+a b
+1 1
+2 2
+3 3
+4 4
+5 5
+6 6
+7 7
+8 8
+9 9
+10 10
+11 11
+12 12
+13 13
+14 14
+15 15
+16 16
+17 17
+18 18
+# Should have no records from the first group
+set global rocksdb_debug_ttl_read_filter_ts = -1800;
+SELECT * FROM t1;
+a b
+3 3
+4 4
+5 5
+6 6
+8 8
+9 9
+13 13
+14 14
+15 15
+16 16
+17 17
+18 18
+SELECT * FROM t1 FORCE INDEX (kb) WHERE a > 5 AND a < 15;
+a b
+6 6
+8 8
+9 9
+13 13
+14 14
+# Should only have records from the last group
+set global rocksdb_debug_ttl_read_filter_ts = -1800 - 450;
+SELECT * FROM t1;
+a b
+5 5
+6 6
+9 9
+13 13
+14 14
+15 15
+SELECT * FROM t1 FORCE INDEX (kb) WHERE a < 10;
+a b
+5 5
+6 6
+9 9
+# Should be empty
+set global rocksdb_debug_ttl_read_filter_ts = -1800 - 900;
+SELECT * FROM t1;
+a b
+set global rocksdb_debug_ttl_read_filter_ts = 0;
+DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/ttl_secondary_read_filtering_multiple_index.result b/storage/rocksdb/mysql-test/rocksdb/r/ttl_secondary_read_filtering_multiple_index.result
new file mode 100644
index 00000000000..e4c361576f5
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/ttl_secondary_read_filtering_multiple_index.result
@@ -0,0 +1,82 @@
+CREATE TABLE t1 (
+a int NOT NULL,
+b int NOT NULL,
+c int NOT NULL,
+PRIMARY KEY (a),
+KEY kb (b) COMMENT 'kb',
+KEY kc (c) COMMENT 'kc'
+) ENGINE=ROCKSDB
+COMMENT='ttl_duration=1';
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values (1, 1, 1);
+INSERT INTO t1 values (2, 2, 2);
+set global rocksdb_debug_ttl_rec_ts = 100;
+INSERT INTO t1 values (3, 3, 3);
+set global rocksdb_debug_ttl_rec_ts = 0;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='kb';
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+a b c
+3 3 3
+SELECT * FROM t1 FORCE INDEX (kb);
+a b c
+3 3 3
+SELECT * FROM t1 FORCE INDEX (kc);
+a b c
+3 3 3
+DROP TABLE t1;
+CREATE TABLE t1 (
+a int NOT NULL,
+b int NOT NULL,
+c int NOT NULL,
+PRIMARY KEY (a),
+KEY kb (b) COMMENT 'kb',
+KEY kc (c) COMMENT 'kc'
+) ENGINE=ROCKSDB
+COMMENT='ttl_duration=1';
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values (1, 1, 1);
+INSERT INTO t1 values (2, 2, 2);
+set global rocksdb_debug_ttl_rec_ts = 100;
+INSERT INTO t1 values (3, 3, 3);
+set global rocksdb_debug_ttl_rec_ts = 0;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+a b c
+3 3 3
+SELECT * FROM t1 FORCE INDEX (kb);
+a b c
+3 3 3
+SELECT * FROM t1 FORCE INDEX (kc);
+a b c
+3 3 3
+DROP TABLE t1;
+CREATE TABLE t1 (
+a int NOT NULL,
+b int NOT NULL,
+c int NOT NULL,
+PRIMARY KEY (a),
+KEY kb (b) COMMENT 'kb',
+KEY kc (c) COMMENT 'kc'
+) ENGINE=ROCKSDB
+COMMENT='ttl_duration=1';
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values (1, 1, 1);
+INSERT INTO t1 values (2, 2, 2);
+set global rocksdb_debug_ttl_rec_ts = 100;
+INSERT INTO t1 values (3, 3, 3);
+set global rocksdb_debug_ttl_rec_ts = 0;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_compact_cf='kb';
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+a b c
+3 3 3
+SELECT * FROM t1 FORCE INDEX (kb);
+a b c
+3 3 3
+SELECT * FROM t1 FORCE INDEX (kc);
+a b c
+3 3 3
+DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/ttl_secondary_with_partitions.result b/storage/rocksdb/mysql-test/rocksdb/r/ttl_secondary_with_partitions.result
new file mode 100644
index 00000000000..713c7e92fa8
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/ttl_secondary_with_partitions.result
@@ -0,0 +1,389 @@
+CREATE TABLE t1 (
+c1 INT,
+c2 INT,
+PRIMARY KEY (`c1`),
+KEY kc2 (`c2`)
+) ENGINE=ROCKSDB
+COMMENT="custom_p0_ttl_duration=1;"
+PARTITION BY LIST(c1) (
+PARTITION custom_p0 VALUES IN (1, 4, 7),
+PARTITION custom_p1 VALUES IN (2, 5, 8),
+PARTITION custom_p2 VALUES IN (3, 6, 9)
+);
+set global rocksdb_debug_ttl_rec_ts = -3600;
+INSERT INTO t1 values (1, 1);
+INSERT INTO t1 values (2, 2);
+INSERT INTO t1 values (3, 3);
+INSERT INTO t1 values (4, 4);
+INSERT INTO t1 values (5, 5);
+INSERT INTO t1 values (6, 6);
+INSERT INTO t1 values (7, 7);
+INSERT INTO t1 values (8, 8);
+INSERT INTO t1 values (9, 9);
+set global rocksdb_debug_ttl_rec_ts = 0;
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+c1 c2
+1 1
+2 2
+3 3
+4 4
+5 5
+6 6
+7 7
+8 8
+9 9
+SELECT * FROM t1 FORCE INDEX (kc2);
+c1 c2
+1 1
+2 2
+3 3
+4 4
+5 5
+6 6
+7 7
+8 8
+9 9
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+c1 c2
+2 2
+3 3
+5 5
+6 6
+8 8
+9 9
+SELECT * FROM t1 FORCE INDEX (kc2);
+c1 c2
+2 2
+3 3
+5 5
+6 6
+8 8
+9 9
+DROP TABLE t1;
+CREATE TABLE t1 (
+c1 INT,
+c2 INT,
+name VARCHAR(25) NOT NULL,
+PRIMARY KEY (`c1`, `c2`) COMMENT 'custom_p0_cfname=foo;custom_p1_cfname=my_custom_cf;custom_p2_cfname=baz',
+KEY kc2 (`c2`)
+) ENGINE=ROCKSDB
+COMMENT="custom_p0_ttl_duration=1;custom_p1_ttl_duration=7;"
+PARTITION BY LIST(c1) (
+PARTITION custom_p0 VALUES IN (1, 4, 7),
+PARTITION custom_p1 VALUES IN (2, 5, 8),
+PARTITION custom_p2 VALUES IN (3, 6, 9)
+);
+set global rocksdb_debug_ttl_rec_ts = -1200;
+INSERT INTO t1 values (1,1,'a');
+INSERT INTO t1 values (4,4,'aaaa');
+INSERT INTO t1 values (7,7,'aaaaaaa');
+set global rocksdb_debug_ttl_rec_ts = 1200;
+INSERT INTO t1 values (2,2,'aa');
+INSERT INTO t1 values (3,3,'aaa');
+INSERT INTO t1 values (5,5,'aaaaa');
+INSERT INTO t1 values (6,6,'aaaaaa');
+INSERT INTO t1 values (8,8,'aaaaaaaa');
+INSERT INTO t1 values (9,9,'aaaaaaaaa');
+set global rocksdb_debug_ttl_rec_ts = 0;
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+c1 c2 name
+1 1 a
+2 2 aa
+3 3 aaa
+4 4 aaaa
+5 5 aaaaa
+6 6 aaaaaa
+7 7 aaaaaaa
+8 8 aaaaaaaa
+9 9 aaaaaaaaa
+SELECT * FROM t1 FORCE INDEX (kc2);
+c1 c2 name
+1 1 a
+2 2 aa
+3 3 aaa
+4 4 aaaa
+5 5 aaaaa
+6 6 aaaaaa
+7 7 aaaaaaa
+8 8 aaaaaaaa
+9 9 aaaaaaaaa
+set global rocksdb_force_flush_memtable_now=1;
+set @@global.rocksdb_compact_cf = 'foo';
+set @@global.rocksdb_compact_cf = 'my_custom_cf';
+set @@global.rocksdb_compact_cf = 'default';
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+c1 c2 name
+2 2 aa
+3 3 aaa
+5 5 aaaaa
+6 6 aaaaaa
+8 8 aaaaaaaa
+9 9 aaaaaaaaa
+SELECT * FROM t1 FORCE INDEX (kc2);
+c1 c2 name
+2 2 aa
+3 3 aaa
+5 5 aaaaa
+6 6 aaaaaa
+8 8 aaaaaaaa
+9 9 aaaaaaaaa
+set global rocksdb_debug_ttl_snapshot_ts = 3600;
+set @@global.rocksdb_compact_cf = 'foo';
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+c1 c2 name
+2 2 aa
+3 3 aaa
+5 5 aaaaa
+6 6 aaaaaa
+8 8 aaaaaaaa
+9 9 aaaaaaaaa
+SELECT * FROM t1 FORCE INDEX (kc2);
+c1 c2 name
+2 2 aa
+3 3 aaa
+5 5 aaaaa
+6 6 aaaaaa
+8 8 aaaaaaaa
+9 9 aaaaaaaaa
+set @@global.rocksdb_compact_cf = 'my_custom_cf';
+set @@global.rocksdb_compact_cf = 'default';
+set global rocksdb_debug_ttl_snapshot_ts = 0;
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+c1 c2 name
+3 3 aaa
+6 6 aaaaaa
+9 9 aaaaaaaaa
+SELECT * FROM t1 FORCE INDEX (kc2);
+c1 c2 name
+3 3 aaa
+6 6 aaaaaa
+9 9 aaaaaaaaa
+DROP TABLE t1;
+CREATE TABLE t1 (
+c1 INT,
+c2 INT,
+name VARCHAR(25) NOT NULL,
+event DATE,
+PRIMARY KEY (`c1`, `c2`) COMMENT 'custom_p0_cfname=foo;custom_p1_cfname=bar;custom_p2_cfname=baz;',
+KEY kc2 (c2)
+) ENGINE=ROCKSDB
+COMMENT="custom_p0_ttl_duration=9999;custom_p2_ttl_duration=5;"
+PARTITION BY LIST(c1) (
+PARTITION custom_p0 VALUES IN (1, 2, 3),
+PARTITION custom_p1 VALUES IN (4, 5, 6),
+PARTITION custom_p2 VALUES IN (7, 8, 9)
+);
+INSERT INTO t1 VALUES (1, 1, "one", null);
+INSERT INTO t1 VALUES (2, 2, "two", null);
+INSERT INTO t1 VALUES (3, 3, "three", null);
+INSERT INTO t1 VALUES (4, 4, "four", null);
+INSERT INTO t1 VALUES (5, 5, "five", null);
+INSERT INTO t1 VALUES (6, 6, "six", null);
+INSERT INTO t1 VALUES (7, 7, "seven", null);
+INSERT INTO t1 VALUES (8, 8, "eight", null);
+INSERT INTO t1 VALUES (9, 9, "nine", null);
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+c1 c2 name event
+1 1 one NULL
+2 2 two NULL
+3 3 three NULL
+4 4 four NULL
+5 5 five NULL
+6 6 six NULL
+7 7 seven NULL
+8 8 eight NULL
+9 9 nine NULL
+SELECT * FROM t1 FORCE INDEX (kc2);
+c1 c2 name event
+1 1 one NULL
+2 2 two NULL
+3 3 three NULL
+4 4 four NULL
+5 5 five NULL
+6 6 six NULL
+7 7 seven NULL
+8 8 eight NULL
+9 9 nine NULL
+set global rocksdb_debug_ttl_rec_ts = 600;
+ALTER TABLE t1 DROP PRIMARY KEY, ADD PRIMARY KEY(`c2`,`c1`) COMMENT 'custom_p0_cfname=foo;custom_p1_cfname=bar;custom_p2_cfname=baz;';
+set global rocksdb_debug_ttl_rec_ts = 0;
+SHOW CREATE TABLE t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `c1` int(11) NOT NULL,
+ `c2` int(11) NOT NULL,
+ `name` varchar(25) NOT NULL,
+ `event` date DEFAULT NULL,
+ PRIMARY KEY (`c2`,`c1`) COMMENT 'custom_p0_cfname=foo;custom_p1_cfname=bar;custom_p2_cfname=baz;',
+ KEY `kc2` (`c2`)
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1 COMMENT='custom_p0_ttl_duration=9999;custom_p2_ttl_duration=5;'
+ PARTITION BY LIST (`c1`)
+(PARTITION `custom_p0` VALUES IN (1,2,3) ENGINE = ROCKSDB,
+ PARTITION `custom_p1` VALUES IN (4,5,6) ENGINE = ROCKSDB,
+ PARTITION `custom_p2` VALUES IN (7,8,9) ENGINE = ROCKSDB)
+set global rocksdb_debug_ttl_snapshot_ts = 100;
+set global rocksdb_force_flush_memtable_now=1;
+set @@global.rocksdb_compact_cf = 'baz';
+set @@global.rocksdb_compact_cf = 'default';
+set global rocksdb_debug_ttl_snapshot_ts = 0;
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+c1 c2 name event
+1 1 one NULL
+2 2 two NULL
+3 3 three NULL
+4 4 four NULL
+5 5 five NULL
+6 6 six NULL
+7 7 seven NULL
+8 8 eight NULL
+9 9 nine NULL
+SELECT * FROM t1 FORCE INDEX (kc2);
+c1 c2 name event
+1 1 one NULL
+2 2 two NULL
+3 3 three NULL
+4 4 four NULL
+5 5 five NULL
+6 6 six NULL
+7 7 seven NULL
+8 8 eight NULL
+9 9 nine NULL
+set global rocksdb_debug_ttl_snapshot_ts = 1200;
+set @@global.rocksdb_compact_cf = 'foo';
+set @@global.rocksdb_compact_cf = 'baz';
+set @@global.rocksdb_compact_cf = 'default';
+set global rocksdb_debug_ttl_snapshot_ts = 0;
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+c1 c2 name event
+1 1 one NULL
+2 2 two NULL
+3 3 three NULL
+4 4 four NULL
+5 5 five NULL
+6 6 six NULL
+SELECT * FROM t1 FORCE INDEX (kc2);
+c1 c2 name event
+1 1 one NULL
+2 2 two NULL
+3 3 three NULL
+4 4 four NULL
+5 5 five NULL
+6 6 six NULL
+DROP TABLE t1;
+CREATE TABLE t1 (
+c1 BIGINT,
+c2 BIGINT UNSIGNED NOT NULL,
+name VARCHAR(25) NOT NULL,
+event DATE,
+PRIMARY KEY (`c1`) COMMENT 'custom_p0_cfname=foo;custom_p1_cfname=bar;custom_p2_cfname=baz;',
+KEY kc2 (`c2`)
+) ENGINE=ROCKSDB
+COMMENT="ttl_duration=1;custom_p1_ttl_duration=100;custom_p1_ttl_col=c2;custom_p2_ttl_duration=5000;"
+PARTITION BY LIST(c1) (
+PARTITION custom_p0 VALUES IN (1, 2, 3),
+PARTITION custom_p1 VALUES IN (4, 5, 6),
+PARTITION custom_p2 VALUES IN (7, 8, 9)
+);
+set global rocksdb_debug_ttl_rec_ts = -300;
+INSERT INTO t1 VALUES (1, UNIX_TIMESTAMP(), "one", null);
+INSERT INTO t1 VALUES (2, UNIX_TIMESTAMP(), "two", null);
+INSERT INTO t1 VALUES (3, UNIX_TIMESTAMP(), "three", null);
+set global rocksdb_debug_ttl_rec_ts = 0;
+INSERT INTO t1 VALUES (4, UNIX_TIMESTAMP(), "four", null);
+INSERT INTO t1 VALUES (5, UNIX_TIMESTAMP(), "five", null);
+INSERT INTO t1 VALUES (6, UNIX_TIMESTAMP(), "six", null);
+INSERT INTO t1 VALUES (7, UNIX_TIMESTAMP(), "seven", null);
+INSERT INTO t1 VALUES (8, UNIX_TIMESTAMP(), "eight", null);
+INSERT INTO t1 VALUES (9, UNIX_TIMESTAMP(), "nine", null);
+set global rocksdb_force_flush_memtable_now=1;
+set @@global.rocksdb_compact_cf = 'foo';
+set @@global.rocksdb_compact_cf = 'baz';
+set @@global.rocksdb_compact_cf = 'bar';
+set @@global.rocksdb_compact_cf = 'default';
+SELECT c1 FROM t1 FORCE INDEX (PRIMARY);
+c1
+4
+5
+6
+7
+8
+9
+SELECT c1 FROM t1 FORCE INDEX (kc2);
+c1
+4
+5
+6
+7
+8
+9
+set global rocksdb_debug_ttl_snapshot_ts = 600;
+set @@global.rocksdb_compact_cf = 'bar';
+set @@global.rocksdb_compact_cf = 'default';
+set global rocksdb_debug_ttl_snapshot_ts = 0;
+SELECT c1 FROM t1 FORCE INDEX (PRIMARY);
+c1
+7
+8
+9
+SELECT c1 FROM t1 FORCE INDEX (kc2);
+c1
+7
+8
+9
+DROP TABLE t1;
+CREATE TABLE t1 (
+c1 INT,
+c2 INT,
+PRIMARY KEY (`c1`) COMMENT 'custom_p0_cfname=foo;'
+) ENGINE=ROCKSDB
+COMMENT="custom_p0_ttl_duration=1;"
+PARTITION BY LIST(c1) (
+PARTITION custom_p0 VALUES IN (1, 4, 7),
+PARTITION custom_p1 VALUES IN (2, 5, 8),
+PARTITION custom_p2 VALUES IN (3, 6, 9)
+);
+set global rocksdb_debug_ttl_rec_ts = -3600;
+INSERT INTO t1 values (1, 1);
+INSERT INTO t1 values (4, 4);
+INSERT INTO t1 values (7, 7);
+set global rocksdb_debug_ttl_rec_ts = 0;
+INSERT INTO t1 values (2, 2);
+INSERT INTO t1 values (3, 3);
+INSERT INTO t1 values (5, 5);
+INSERT INTO t1 values (6, 6);
+INSERT INTO t1 values (8, 8);
+INSERT INTO t1 values (9, 9);
+SELECT * FROM t1;
+c1 c2
+1 1
+2 2
+3 3
+4 4
+5 5
+6 6
+7 7
+8 8
+9 9
+CREATE INDEX kc2 on t1 (c2);
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='foo';
+set global rocksdb_compact_cf='default';
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+c1 c2
+2 2
+3 3
+5 5
+6 6
+8 8
+9 9
+SELECT * FROM t1 FORCE INDEX (kc2);
+c1 c2
+2 2
+3 3
+5 5
+6 6
+8 8
+9 9
+DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/type_binary.result b/storage/rocksdb/mysql-test/rocksdb/r/type_binary.result
new file mode 100644
index 00000000000..bbdd6d210fb
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/type_binary.result
@@ -0,0 +1,48 @@
+DROP TABLE IF EXISTS t1;
+CREATE TABLE t1 (
+b BINARY ,
+b0 BINARY(0) ,
+b1 BINARY(1) ,
+b20 BINARY(20) ,
+b255 BINARY(255) ,
+pk BINARY PRIMARY KEY
+) ENGINE=rocksdb;
+SHOW COLUMNS IN t1;
+Field Type Null Key Default Extra
+b binary(1) YES NULL
+b0 binary(0) YES NULL
+b1 binary(1) YES NULL
+b20 binary(20) YES NULL
+b255 binary(255) YES NULL
+pk binary(1) NO PRI NULL
+INSERT INTO t1 VALUES ('','','','','','');
+INSERT INTO t1 VALUES ('a','','b','abcdefghi klmnopqrst', 'Creating an article for the Knowledgebase is similar to asking questions. First, navigate to the category where you feel the article should be. Once there, double check that an article doesn\'t already exist which would work.','a');
+SELECT HEX(b), HEX(b0), HEX(b1), HEX(b20), HEX(b255), HEX(pk) FROM t1 ORDER BY pk;
+HEX(b) HEX(b0) HEX(b1) HEX(b20) HEX(b255) HEX(pk)
+00 00 0000000000000000000000000000000000000000 000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 00
+61 62 616263646566676869206B6C6D6E6F7071727374 4372656174696E6720616E2061727469636C6520666F7220746865204B6E6F776C65646765626173652069732073696D696C617220746F2061736B696E67207175657374696F6E732E2046697273742C206E6176696761746520746F207468652063617465676F727920776865726520796F75206665656C207468652061727469636C652073686F756C642062652E204F6E63652074686572652C20646F75626C6520636865636B207468617420616E2061727469636C6520646F65736E277420616C726561647920657869737420776869636820776F756C6420776F726B2E00000000000000000000000000000000000000000000000000000000000000 61
+INSERT INTO t1 VALUES ('abc', 'a', 'abc', REPEAT('a',21), REPEAT('x',256),'b');
+Warnings:
+Warning 1265 Data truncated for column 'b' at row 1
+Warning 1265 Data truncated for column 'b0' at row 1
+Warning 1265 Data truncated for column 'b1' at row 1
+Warning 1265 Data truncated for column 'b20' at row 1
+Warning 1265 Data truncated for column 'b255' at row 1
+INSERT INTO t1 SELECT b255, b255, b255, b255, CONCAT('a',b255,b255), 'c' FROM t1;
+ERROR 23000: Duplicate entry 'c' for key 'PRIMARY'
+SELECT HEX(b), HEX(b0), HEX(b1), HEX(b20), HEX(b255), HEX(pk) FROM t1 ORDER BY pk;
+HEX(b) HEX(b0) HEX(b1) HEX(b20) HEX(b255) HEX(pk)
+00 00 0000000000000000000000000000000000000000 000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 00
+61 61 6161616161616161616161616161616161616161 787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878 62
+61 62 616263646566676869206B6C6D6E6F7071727374 4372656174696E6720616E2061727469636C6520666F7220746865204B6E6F776C65646765626173652069732073696D696C617220746F2061736B696E67207175657374696F6E732E2046697273742C206E6176696761746520746F207468652063617465676F727920776865726520796F75206665656C207468652061727469636C652073686F756C642062652E204F6E63652074686572652C20646F75626C6520636865636B207468617420616E2061727469636C6520646F65736E277420616C726561647920657869737420776869636820776F756C6420776F726B2E00000000000000000000000000000000000000000000000000000000000000 61
+ALTER TABLE t1 ADD COLUMN b257 BINARY(257) ;
+ERROR 42000: Column length too big for column 'b257' (max = 255); use BLOB or TEXT instead
+SHOW COLUMNS IN t1;
+Field Type Null Key Default Extra
+b binary(1) YES NULL
+b0 binary(0) YES NULL
+b1 binary(1) YES NULL
+b20 binary(20) YES NULL
+b255 binary(255) YES NULL
+pk binary(1) NO PRI NULL
+DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/type_binary_indexes.result b/storage/rocksdb/mysql-test/rocksdb/r/type_binary_indexes.result
new file mode 100644
index 00000000000..c5cffdc1a0d
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/type_binary_indexes.result
@@ -0,0 +1,80 @@
+SET @ORIG_PAUSE_BACKGROUND_WORK = @@ROCKSDB_PAUSE_BACKGROUND_WORK;
+SET GLOBAL ROCKSDB_PAUSE_BACKGROUND_WORK = 1;
+DROP TABLE IF EXISTS t1;
+CREATE TABLE t1 (b BINARY,
+b20 BINARY(20) PRIMARY KEY,
+v16 VARBINARY(16),
+v128 VARBINARY(128)
+) ENGINE=rocksdb;
+SHOW INDEX IN t1;
+Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment Index_comment
+t1 0 PRIMARY 1 b20 A 1000 NULL NULL LSMTREE
+INSERT INTO t1 (b,b20,v16,v128) VALUES ('a','char1','varchar1a','varchar1b'),('a','char2','varchar2a','varchar2b'),('b','char3','varchar1a','varchar1b'),('c','char4','varchar3a','varchar3b');
+EXPLAIN SELECT HEX(b20) FROM t1 ORDER BY b20;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 index NULL PRIMARY 20 NULL # Using index
+SELECT HEX(b20) FROM t1 ORDER BY b20;
+HEX(b20)
+6368617231000000000000000000000000000000
+6368617232000000000000000000000000000000
+6368617233000000000000000000000000000000
+6368617234000000000000000000000000000000
+EXPLAIN SELECT HEX(b20) FROM t1 IGNORE INDEX (PRIMARY) ORDER BY b20 DESC;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 ALL NULL NULL NULL NULL # Using filesort
+SELECT HEX(b20) FROM t1 ORDER BY b20 DESC;
+HEX(b20)
+6368617234000000000000000000000000000000
+6368617233000000000000000000000000000000
+6368617232000000000000000000000000000000
+6368617231000000000000000000000000000000
+DROP TABLE t1;
+CREATE TABLE t1 (b BINARY,
+b20 BINARY(20),
+v16 VARBINARY(16),
+v128 VARBINARY(128),
+pk VARBINARY(10) PRIMARY KEY,
+INDEX (v16(10))
+) ENGINE=rocksdb;
+SHOW INDEX IN t1;
+Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment Index_comment
+t1 0 PRIMARY 1 pk A 1000 NULL NULL LSMTREE
+t1 1 v16 1 v16 A 500 10 NULL YES LSMTREE
+INSERT INTO t1 (b,b20,v16,v128,pk) VALUES ('a','char1','varchar1a','varchar1b',1),('a','char2','varchar2a','varchar2b',2),('b','char3','varchar1a','varchar1b',3),('c','char4','varchar3a','varchar3b',4),('d','char5','varchar4a','varchar3b',5),('e','char6','varchar2a','varchar3b',6);
+INSERT INTO t1 (b,b20,v16,v128,pk) SELECT b,b20,v16,v128,pk+100 FROM t1;
+EXPLAIN SELECT HEX(SUBSTRING(v16,0,3)) FROM t1 WHERE v16 LIKE 'varchar%';
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range v16 v16 13 NULL # Using where
+SELECT HEX(SUBSTRING(v16,7,3)) FROM t1 WHERE v16 LIKE 'varchar%';
+HEX(SUBSTRING(v16,7,3))
+723161
+723161
+723161
+723161
+723261
+723261
+723261
+723261
+723361
+723361
+723461
+723461
+EXPLAIN SELECT HEX(SUBSTRING(v16,0,3)) FROM t1 FORCE INDEX (v16) WHERE v16 LIKE 'varchar%';
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range v16 v16 13 NULL # Using where
+SELECT HEX(SUBSTRING(v16,7,3)) FROM t1 FORCE INDEX (v16) WHERE v16 LIKE 'varchar%';
+HEX(SUBSTRING(v16,7,3))
+723161
+723161
+723161
+723161
+723261
+723261
+723261
+723261
+723361
+723361
+723461
+723461
+DROP TABLE t1;
+SET GLOBAL ROCKSDB_PAUSE_BACKGROUND_WORK = @ORIG_PAUSE_BACKGROUND_WORK;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/type_bit.result b/storage/rocksdb/mysql-test/rocksdb/r/type_bit.result
new file mode 100644
index 00000000000..fa84cbde8c8
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/type_bit.result
@@ -0,0 +1,53 @@
+DROP TABLE IF EXISTS t1;
+CREATE TABLE t1 (
+a BIT ,
+b BIT(20) ,
+c BIT(64) ,
+d BIT(1) ,
+PRIMARY KEY (c)
+) ENGINE=rocksdb;
+SHOW COLUMNS IN t1;
+Field Type Null Key Default Extra
+a bit(1) YES NULL
+b bit(20) YES NULL
+c bit(64) NO PRI NULL
+d bit(1) YES NULL
+ALTER TABLE t1 DROP COLUMN d;
+ALTER TABLE t1 ADD COLUMN d BIT(0) ;
+SHOW COLUMNS IN t1;
+Field Type Null Key Default Extra
+a bit(1) YES NULL
+b bit(20) YES NULL
+c bit(64) NO PRI NULL
+d bit(1) YES NULL
+INSERT INTO t1 (a,b,c,d) VALUES (0,POW(2,20)-1,b'1111111111111111111111111111111111111111111111111111111111111111',1);
+SELECT BIN(a), HEX(b), c+0 FROM t1 WHERE d>0;
+BIN(a) HEX(b) c+0
+0 FFFFF 18446744073709551615
+INSERT INTO t1 (a,b,c,d) VALUES (1,0,-2,0);
+SELECT a+0, b+0, c+0 FROM t1 WHERE d<100;
+a+0 b+0 c+0
+0 1048575 18446744073709551615
+1 0 18446744073709551614
+INSERT INTO t1 (a,b,c,d) VALUES (b'1', 'f', 0xFF, 0x0);
+SELECT a+0, b+0, c+0 FROM t1 WHERE d IN (0, 2);
+a+0 b+0 c+0
+1 0 18446744073709551614
+1 102 255
+DELETE FROM t1;
+INSERT INTO t1 (a,b,c,d) VALUES (0x10,0,0,1);
+Warnings:
+Warning 1264 Out of range value for column 'a' at row 1
+SELECT a+0,b+0,c+0,d+0 FROM t1;
+a+0 b+0 c+0 d+0
+1 0 0 1
+INSERT INTO t1 (a,b,c,d) VALUES (0x01,0,0x10000000000000000,0);
+Warnings:
+Warning 1264 Out of range value for column 'c' at row 1
+SELECT a+0,b+0,c+0,d+0 FROM t1;
+a+0 b+0 c+0 d+0
+1 0 0 1
+1 0 18446744073709551615 0
+DROP TABLE t1;
+CREATE TABLE t1 (pk INT PRIMARY KEY, a BIT(65) ) ENGINE=rocksdb;
+ERROR 42000: Display width out of range for 'a' (max = 64)
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/type_bit_indexes.result b/storage/rocksdb/mysql-test/rocksdb/r/type_bit_indexes.result
new file mode 100644
index 00000000000..8da878eb0f2
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/type_bit_indexes.result
@@ -0,0 +1,58 @@
+SET @ORIG_PAUSE_BACKGROUND_WORK = @@ROCKSDB_PAUSE_BACKGROUND_WORK;
+SET GLOBAL ROCKSDB_PAUSE_BACKGROUND_WORK = 1;
+DROP TABLE IF EXISTS t1;
+CREATE TABLE t1 (
+a BIT,
+b BIT(20) PRIMARY KEY,
+c BIT(32),
+d BIT(64)
+) ENGINE=rocksdb;
+SHOW INDEX IN t1;
+Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment Index_comment
+t1 0 PRIMARY 1 b A 1000 NULL NULL LSMTREE
+INSERT INTO t1 (a,b,c,d) VALUES
+(0,0xFFFFF,0,1),(0,256,0xAAA,0x12345),(1,16,0,0xFFFFFFF),(0,11,12,13),
+(1,100,101,102),(0,12,13,14),(1,13,14,15),(0,101,201,202),(1,1000,1001,1002),
+(1,0xFFFF,0xFFFFFFFF,0xFFFFFFFFFFFFFFFF);
+EXPLAIN SELECT b+0 FROM t1 ORDER BY b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 index NULL PRIMARY 3 NULL # Using index
+SELECT b+0 FROM t1 ORDER BY b;
+b+0
+11
+12
+13
+16
+100
+101
+256
+1000
+65535
+1048575
+DROP TABLE t1;
+# TODO: Unique indexes are not enforced
+CREATE TABLE t1 (
+a BIT,
+b BIT(20),
+c BIT(32),
+d BIT(64),
+pk BIT(10) PRIMARY KEY,
+INDEX(a)
+) ENGINE=rocksdb;
+SHOW INDEX IN t1;
+Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment Index_comment
+t1 0 PRIMARY 1 pk A 1000 NULL NULL LSMTREE
+t1 1 a 1 a A 500 NULL NULL YES LSMTREE
+INSERT INTO t1 (a,b,c,d,pk) VALUES
+(0,0xFFFFF,0,1,1),(0,256,0xAAA,0x12345,2),(1,16,0,0xFFFFFFF,3),(0,11,12,13,4),
+(1,100,101,102,5),(0,12,13,14,6),(1,13,14,15,7),(0,101,201,202,8),(1,1000,1001,1002,9),
+(1,0xFFFF,0xFFFFFFFF,0xFFFFFFFFFFFFFFFF,10);
+EXPLAIN SELECT DISTINCT a+0 FROM t1 ORDER BY a;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 ALL NULL NULL NULL NULL # Using temporary; Using filesort
+SELECT DISTINCT a+0 FROM t1 ORDER BY a;
+a+0
+0
+1
+DROP TABLE t1;
+SET GLOBAL ROCKSDB_PAUSE_BACKGROUND_WORK = @ORIG_PAUSE_BACKGROUND_WORK;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/type_blob.result b/storage/rocksdb/mysql-test/rocksdb/r/type_blob.result
new file mode 100644
index 00000000000..3b2bee74b6a
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/type_blob.result
@@ -0,0 +1,57 @@
+DROP TABLE IF EXISTS t1;
+CREATE TABLE t1 (
+pk INT AUTO_INCREMENT PRIMARY KEY,
+b BLOB ,
+b0 BLOB(0) ,
+b1 BLOB(1) ,
+b300 BLOB(300) ,
+bm BLOB(65535) ,
+b70k BLOB(70000) ,
+b17m BLOB(17000000) ,
+t TINYBLOB ,
+m MEDIUMBLOB ,
+l LONGBLOB
+) ENGINE=rocksdb;
+SHOW COLUMNS IN t1;
+Field Type Null Key Default Extra
+pk int(11) NO PRI NULL auto_increment
+b blob YES NULL
+b0 blob YES NULL
+b1 tinyblob YES NULL
+b300 blob YES NULL
+bm blob YES NULL
+b70k mediumblob YES NULL
+b17m longblob YES NULL
+t tinyblob YES NULL
+m mediumblob YES NULL
+l longblob YES NULL
+INSERT INTO t1 (b,b0,b1,b300,bm,b70k,b17m,t,m,l) VALUES
+('','','','','','','','','',''),
+('a','b','c','d','e','f','g','h','i','j'),
+('test1','test2','test3','test4','test5','test6','test7','test8','test9','test10'),
+( REPEAT('a',65535), REPEAT('b',65535), REPEAT('c',255), REPEAT('d',65535), REPEAT('e',65535), REPEAT('f',1048576), HEX(REPEAT('g',1048576)), REPEAT('h',255), REPEAT('i',1048576), HEX(REPEAT('j',1048576)) );
+SELECT LENGTH(b), LENGTH(b0), LENGTH(b1), LENGTH(b300), LENGTH(bm), LENGTH(b70k), LENGTH(b17m), LENGTH(t), LENGTH(m), LENGTH(l) FROM t1;
+LENGTH(b) LENGTH(b0) LENGTH(b1) LENGTH(b300) LENGTH(bm) LENGTH(b70k) LENGTH(b17m) LENGTH(t) LENGTH(m) LENGTH(l)
+0 0 0 0 0 0 0 0 0 0
+1 1 1 1 1 1 1 1 1 1
+5 5 5 5 5 5 5 5 5 6
+65535 65535 255 65535 65535 1048576 2097152 255 1048576 2097152
+INSERT INTO t1 (b,b0,b1,b300,bm,b70k,b17m,t,m,l) VALUES
+( REPEAT('a',65536), REPEAT('b',65536), REPEAT('c',256), REPEAT('d',65536), REPEAT('e',65536), REPEAT('f',1048576), REPEAT('g',1048576), REPEAT('h',256), REPEAT('i',1048576), REPEAT('j',1048576) );
+Warnings:
+Warning 1265 Data truncated for column 'b' at row 1
+Warning 1265 Data truncated for column 'b0' at row 1
+Warning 1265 Data truncated for column 'b1' at row 1
+Warning 1265 Data truncated for column 'b300' at row 1
+Warning 1265 Data truncated for column 'bm' at row 1
+Warning 1265 Data truncated for column 't' at row 1
+SELECT LENGTH(b), LENGTH(b0), LENGTH(b1), LENGTH(b300), LENGTH(bm), LENGTH(b70k), LENGTH(b17m), LENGTH(t), LENGTH(m), LENGTH(l) FROM t1;
+LENGTH(b) LENGTH(b0) LENGTH(b1) LENGTH(b300) LENGTH(bm) LENGTH(b70k) LENGTH(b17m) LENGTH(t) LENGTH(m) LENGTH(l)
+0 0 0 0 0 0 0 0 0 0
+1 1 1 1 1 1 1 1 1 1
+5 5 5 5 5 5 5 5 5 6
+65535 65535 255 65535 65535 1048576 1048576 255 1048576 1048576
+65535 65535 255 65535 65535 1048576 2097152 255 1048576 2097152
+ALTER TABLE t1 ADD COLUMN bbb BLOB(4294967296);
+ERROR 42000: Display width out of range for 'bbb' (max = 4294967295)
+DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/type_blob_indexes.result b/storage/rocksdb/mysql-test/rocksdb/r/type_blob_indexes.result
new file mode 100644
index 00000000000..26726e0f6d1
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/type_blob_indexes.result
@@ -0,0 +1,188 @@
+SET @ORIG_PAUSE_BACKGROUND_WORK = @@ROCKSDB_PAUSE_BACKGROUND_WORK;
+SET GLOBAL ROCKSDB_PAUSE_BACKGROUND_WORK = 1;
+DROP TABLE IF EXISTS t1;
+CREATE TABLE t1 (
+b BLOB,
+t TINYBLOB,
+m MEDIUMBLOB,
+l LONGBLOB,
+PRIMARY KEY b (b(32))
+) ENGINE=rocksdb;
+SHOW INDEX IN t1;
+Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment Index_comment
+t1 0 PRIMARY 1 b A 1000 32 NULL LSMTREE
+INSERT INTO t1 (b,t,m,l) VALUES
+('','','',''),
+('a','b','c','d'),
+('b','d','c','b'),
+('test1','test2','test3','test4'),
+(REPEAT('a',128),REPEAT('b',128),REPEAT('c',128),REPEAT('d',128)),
+(HEX('abcd'),HEX('def'),HEX('a'),HEX('abc')),
+('abc','def','ghi','jkl'),
+('test2','test3','test4','test5'),
+('test3','test4','test5','test6'),
+(REPEAT('b',128),REPEAT('f',128),REPEAT('e',128),REPEAT('d',128)),
+(REPEAT('c',128),REPEAT('b',128),REPEAT('c',128),REPEAT('e',128));
+EXPLAIN SELECT SUBSTRING(b,16) AS f FROM t1 WHERE b IN ('test1','test2') ORDER BY f;
+id select_type table type possible_keys key key_len ref rows Extra
+# # # # # PRIMARY # # # #
+SELECT SUBSTRING(b,16) AS f FROM t1 WHERE b IN ('test1','test2') ORDER BY f;
+f
+
+
+EXPLAIN SELECT SUBSTRING(b,16) AS f FROM t1 USE INDEX () WHERE b IN ('test1','test2') ORDER BY f;
+id select_type table type possible_keys key key_len ref rows Extra
+# # # # # NULL # # # #
+SELECT SUBSTRING(b,16) AS f FROM t1 USE INDEX () WHERE b IN ('test1','test2') ORDER BY f;
+f
+
+
+DROP TABLE t1;
+CREATE TABLE t1 (
+b BLOB,
+t TINYBLOB,
+m MEDIUMBLOB,
+l LONGBLOB,
+pk INT AUTO_INCREMENT PRIMARY KEY,
+UNIQUE INDEX l_t (l(256),t(64))
+) ENGINE=rocksdb;
+SHOW INDEX IN t1;
+Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment Index_comment
+t1 0 PRIMARY 1 pk # # NULL NULL # #
+t1 0 l_t 1 l # # 256 NULL # #
+t1 0 l_t 2 t # # 64 NULL # #
+INSERT INTO t1 (b,t,m,l) VALUES
+('','','',''),
+('a','b','c','d'),
+('b','d','c','b'),
+('test1','test2','test3','test4'),
+(REPEAT('a',128),REPEAT('b',128),REPEAT('c',128),REPEAT('d',128)),
+(HEX('abcd'),HEX('def'),HEX('a'),HEX('abc')),
+('abc','def','ghi','jkl'),
+('test2','test3','test4','test5'),
+('test3','test4','test5','test6'),
+(REPEAT('b',128),REPEAT('f',128),REPEAT('e',128),REPEAT('d',128)),
+(REPEAT('c',128),REPEAT('b',128),REPEAT('c',128),REPEAT('e',128));
+EXPLAIN SELECT SUBSTRING(t,64), SUBSTRING(l,256) FROM t1 WHERE t!=l AND l NOT IN ('test1') ORDER BY t, l DESC;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range l_t l_t 259 NULL # Using where; Using filesort
+SELECT SUBSTRING(t,64), SUBSTRING(l,256) FROM t1 WHERE t!=l AND l NOT IN ('test1') ORDER BY t, l DESC;
+SUBSTRING(t,64) SUBSTRING(l,256)
+
+
+bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb
+bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb
+
+
+fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff
+
+
+
+EXPLAIN SELECT SUBSTRING(t,64), SUBSTRING(l,256) FROM t1 FORCE INDEX (l_t) WHERE t!=l AND l NOT IN ('test1') ORDER BY t, l DESC;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range l_t l_t 259 NULL # Using where; Using filesort
+SELECT SUBSTRING(t,64), SUBSTRING(l,256) FROM t1 FORCE INDEX (l_t) WHERE t!=l AND l NOT IN ('test1') ORDER BY t, l DESC;
+SUBSTRING(t,64) SUBSTRING(l,256)
+
+
+bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb
+bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb
+
+
+fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff
+
+
+
+DROP TABLE t1;
+CREATE TABLE t1 (
+b BLOB,
+t TINYBLOB,
+m MEDIUMBLOB,
+l LONGBLOB,
+pk INT AUTO_INCREMENT PRIMARY KEY,
+INDEX (m(128))
+) ENGINE=rocksdb;
+SHOW INDEX IN t1;
+Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment Index_comment
+t1 0 PRIMARY 1 pk A 1000 NULL NULL LSMTREE
+t1 1 m 1 m A 500 128 NULL YES LSMTREE
+INSERT INTO t1 (b,t,m,l) VALUES
+('','','',''),
+('a','b','c','d'),
+('b','d','c','b'),
+('test1','test2','test3','test4'),
+(REPEAT('a',128),REPEAT('b',128),REPEAT('c',128),REPEAT('d',128)),
+(HEX('abcd'),HEX('def'),HEX('a'),HEX('abc')),
+('abc','def','ghi','jkl'),
+('test2','test3','test4','test5'),
+('test3','test4','test5','test6'),
+(REPEAT('b',128),REPEAT('f',128),REPEAT('e',128),REPEAT('d',128)),
+(REPEAT('c',128),REPEAT('b',128),REPEAT('c',128),REPEAT('e',128));
+EXPLAIN SELECT SUBSTRING(m,128) AS f FROM t1 WHERE m = 'test1' ORDER BY f DESC;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 ref m m 131 const # Using where; Using filesort
+SELECT SUBSTRING(m,128) AS f FROM t1 WHERE m = 'test1' ORDER BY f DESC;
+f
+EXPLAIN SELECT SUBSTRING(m,128) AS f FROM t1 IGNORE INDEX FOR ORDER BY (m) WHERE m = 'test1' ORDER BY f DESC;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 ref m m 131 const # Using where; Using filesort
+SELECT SUBSTRING(m,128) AS f FROM t1 IGNORE INDEX FOR ORDER BY (m) WHERE m = 'test1' ORDER BY f DESC;
+f
+DROP TABLE t1;
+CREATE TABLE t1 (
+b BLOB,
+PRIMARY KEY b (b(32))
+) ENGINE=rocksdb;
+INSERT INTO t1 (b) VALUES
+('00000000000000000000000000000000'),
+('00000000000000000000000000000001'),
+('00000000000000000000000000000002');
+SELECT b FROM t1;
+b
+00000000000000000000000000000000
+00000000000000000000000000000001
+00000000000000000000000000000002
+DROP TABLE t1;
+CREATE TABLE t1 (
+b TINYBLOB,
+PRIMARY KEY b (b(32))
+) ENGINE=rocksdb;
+INSERT INTO t1 (b) VALUES
+('00000000000000000000000000000000'),
+('00000000000000000000000000000001'),
+('00000000000000000000000000000002');
+SELECT b FROM t1;
+b
+00000000000000000000000000000000
+00000000000000000000000000000001
+00000000000000000000000000000002
+DROP TABLE t1;
+CREATE TABLE t1 (
+b MEDIUMBLOB,
+PRIMARY KEY b (b(32))
+) ENGINE=rocksdb;
+INSERT INTO t1 (b) VALUES
+('00000000000000000000000000000000'),
+('00000000000000000000000000000001'),
+('00000000000000000000000000000002');
+SELECT b FROM t1;
+b
+00000000000000000000000000000000
+00000000000000000000000000000001
+00000000000000000000000000000002
+DROP TABLE t1;
+CREATE TABLE t1 (
+b LONGBLOB,
+PRIMARY KEY b (b(32))
+) ENGINE=rocksdb;
+INSERT INTO t1 (b) VALUES
+('00000000000000000000000000000000'),
+('00000000000000000000000000000001'),
+('00000000000000000000000000000002');
+SELECT b FROM t1;
+b
+00000000000000000000000000000000
+00000000000000000000000000000001
+00000000000000000000000000000002
+DROP TABLE t1;
+SET GLOBAL ROCKSDB_PAUSE_BACKGROUND_WORK = @ORIG_PAUSE_BACKGROUND_WORK;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/type_bool.result b/storage/rocksdb/mysql-test/rocksdb/r/type_bool.result
new file mode 100644
index 00000000000..bfe2c18acb4
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/type_bool.result
@@ -0,0 +1,73 @@
+DROP TABLE IF EXISTS t1;
+CREATE TABLE t1 (
+pk INT AUTO_INCREMENT PRIMARY KEY,
+b1 BOOL ,
+b2 BOOLEAN
+) ENGINE=rocksdb;
+SHOW COLUMNS IN t1;
+Field Type Null Key Default Extra
+pk int(11) NO PRI NULL auto_increment
+b1 tinyint(1) YES NULL
+b2 tinyint(1) YES NULL
+INSERT INTO t1 (b1,b2) VALUES (1,TRUE);
+SELECT b1,b2 FROM t1;
+b1 b2
+1 1
+INSERT INTO t1 (b1,b2) VALUES (FALSE,0);
+SELECT b1,b2 FROM t1;
+b1 b2
+0 0
+1 1
+INSERT INTO t1 (b1,b2) VALUES (2,3);
+SELECT b1,b2 FROM t1;
+b1 b2
+0 0
+1 1
+2 3
+INSERT INTO t1 (b1,b2) VALUES (-1,-2);
+SELECT b1,b2 FROM t1;
+b1 b2
+-1 -2
+0 0
+1 1
+2 3
+SELECT IF(b1,'true','false') AS a, IF(b2,'true','false') AS b FROM t1;
+a b
+false false
+true true
+true true
+true true
+SELECT b1,b2 FROM t1 WHERE b1 = TRUE;
+b1 b2
+1 1
+SELECT b1,b2 FROM t1 WHERE b2 = FALSE;
+b1 b2
+0 0
+INSERT INTO t1 (b1,b2) VALUES ('a','b');
+Warnings:
+Warning 1366 Incorrect integer value: 'a' for column `test`.`t1`.`b1` at row 1
+Warning 1366 Incorrect integer value: 'b' for column `test`.`t1`.`b2` at row 1
+SELECT b1,b2 FROM t1;
+b1 b2
+-1 -2
+0 0
+0 0
+1 1
+2 3
+INSERT INTO t1 (b1,b2) VALUES (128,-129);
+Warnings:
+Warning 1264 Out of range value for column 'b1' at row 1
+Warning 1264 Out of range value for column 'b2' at row 1
+SELECT b1,b2 FROM t1;
+b1 b2
+-1 -2
+0 0
+0 0
+1 1
+127 -128
+2 3
+ALTER TABLE t1 ADD COLUMN b3 BOOLEAN UNSIGNED ;
+ERROR 42000: You have an error in your SQL syntax; check the manual that corresponds to your MariaDB server version for the right syntax to use near 'UNSIGNED' at line 1
+ALTER TABLE ADD COLUMN b3 BOOL ZEROFILL ;
+ERROR 42000: You have an error in your SQL syntax; check the manual that corresponds to your MariaDB server version for the right syntax to use near 'ADD COLUMN b3 BOOL ZEROFILL' at line 1
+DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/type_char.result b/storage/rocksdb/mysql-test/rocksdb/r/type_char.result
new file mode 100644
index 00000000000..1786dfae1e7
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/type_char.result
@@ -0,0 +1,76 @@
+DROP TABLE IF EXISTS t1;
+CREATE TABLE t1 (
+c CHAR ,
+c0 CHAR(0) ,
+c1 CHAR(1) ,
+c20 CHAR(20) ,
+c255 CHAR(255) ,
+PRIMARY KEY (c255)
+) ENGINE=rocksdb;
+SHOW COLUMNS IN t1;
+Field Type Null Key Default Extra
+c char(1) YES NULL
+c0 char(0) YES NULL
+c1 char(1) YES NULL
+c20 char(20) YES NULL
+c255 char(255) NO PRI NULL
+INSERT INTO t1 (c,c0,c1,c20,c255) VALUES ('','','','','');
+INSERT INTO t1 (c,c0,c1,c20,c255) VALUES ('a','','b','abcdefghi klmnopqrst', 'Creating an article for the Knowledgebase is similar to asking questions. First, navigate to the category where you feel the article should be. Once there, double check that an article doesn\'t already exist which would work.');
+SELECT c,c0,c1,c20,c255 FROM t1;
+c c0 c1 c20 c255
+
+a b abcdefghi klmnopqrst Creating an article for the Knowledgebase is similar to asking questions. First, navigate to the category where you feel the article should be. Once there, double check that an article doesn't already exist which would work.
+INSERT INTO t1 (c,c0,c1,c20,c255) VALUES ('abc', 'a', 'abc', REPEAT('a',21), REPEAT('x',256));
+Warnings:
+Warning 1265 Data truncated for column 'c' at row 1
+Warning 1265 Data truncated for column 'c0' at row 1
+Warning 1265 Data truncated for column 'c1' at row 1
+Warning 1265 Data truncated for column 'c20' at row 1
+Warning 1265 Data truncated for column 'c255' at row 1
+INSERT INTO t1 (c,c0,c1,c20,c255) SELECT c255, c255, c255, c255, CONCAT('a',c255,c1) FROM t1;
+Warnings:
+Warning 1265 Data truncated for column 'c' at row 5
+Warning 1265 Data truncated for column 'c0' at row 5
+Warning 1265 Data truncated for column 'c1' at row 5
+Warning 1265 Data truncated for column 'c20' at row 5
+Warning 1265 Data truncated for column 'c' at row 6
+Warning 1265 Data truncated for column 'c0' at row 6
+Warning 1265 Data truncated for column 'c1' at row 6
+Warning 1265 Data truncated for column 'c20' at row 6
+Warning 1265 Data truncated for column 'c255' at row 6
+SELECT c,c0,c1,c20,c255 FROM t1;
+c c0 c1 c20 c255
+
+ a
+C C Creating an article aCreating an article for the Knowledgebase is similar to asking questions. First, navigate to the category where you feel the article should be. Once there, double check that an article doesn't already exist which would work.b
+a a aaaaaaaaaaaaaaaaaaaa xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+a b abcdefghi klmnopqrst Creating an article for the Knowledgebase is similar to asking questions. First, navigate to the category where you feel the article should be. Once there, double check that an article doesn't already exist which would work.
+x x xxxxxxxxxxxxxxxxxxxx axxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+SELECT DISTINCT c20, REPEAT('a',LENGTH(c20)), COUNT(*) FROM t1 GROUP BY c1, c20;
+c20 REPEAT('a',LENGTH(c20)) COUNT(*)
+ 2
+Creating an article aaaaaaaaaaaaaaaaaaa 1
+aaaaaaaaaaaaaaaaaaaa aaaaaaaaaaaaaaaaaaaa 1
+abcdefghi klmnopqrst aaaaaaaaaaaaaaaaaaaa 1
+xxxxxxxxxxxxxxxxxxxx aaaaaaaaaaaaaaaaaaaa 1
+ALTER TABLE t1 ADD COLUMN c257 CHAR(257) ;
+ERROR 42000: Column length too big for column 'c257' (max = 255); use BLOB or TEXT instead
+DROP TABLE t1;
+CREATE TABLE t1(c1 CHAR(0) NOT NULL);
+INSERT INTO t1 VALUES('a');
+Warnings:
+Warning 1265 Data truncated for column 'c1' at row 1
+SELECT * FROM t1;
+c1
+
+DROP TABLE t1;
+CREATE TABLE t1(a char(10) character set utf8 collate utf8_bin primary key);
+INSERT INTO t1 VALUES ('one'),('two'),('three'),('four'),('five');
+(SELECT * FROM t1 LIMIT 1) UNION (SELECT * FROM t1);
+a
+five
+four
+one
+three
+two
+DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/type_char_indexes.result b/storage/rocksdb/mysql-test/rocksdb/r/type_char_indexes.result
new file mode 100644
index 00000000000..413c3f69f23
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/type_char_indexes.result
@@ -0,0 +1,73 @@
+SET @ORIG_PAUSE_BACKGROUND_WORK = @@ROCKSDB_PAUSE_BACKGROUND_WORK;
+SET GLOBAL ROCKSDB_PAUSE_BACKGROUND_WORK = 1;
+DROP TABLE IF EXISTS t1;
+CREATE TABLE t1 (
+c CHAR,
+c20 CHAR(20) PRIMARY KEY,
+v16 VARCHAR(16),
+v128 VARCHAR(128)
+) ENGINE=rocksdb;
+SHOW INDEX IN t1;
+Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment Index_comment
+t1 0 PRIMARY 1 c20 A 1000 NULL NULL LSMTREE
+INSERT INTO t1 (c,c20,v16,v128) VALUES ('a','char1','varchar1a','varchar1b'),('a','char2','varchar2a','varchar2b'),('b','char3','varchar1a','varchar1b'),('c','char4','varchar3a','varchar3b');
+EXPLAIN SELECT c20 FROM t1 ORDER BY c20;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 index NULL PRIMARY 20 NULL # Using index
+SELECT c20 FROM t1 ORDER BY c20;
+c20
+char1
+char2
+char3
+char4
+EXPLAIN SELECT c20 FROM t1 FORCE INDEX FOR ORDER BY (PRIMARY) ORDER BY c20;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 index NULL PRIMARY 20 NULL # Using index
+SELECT c20 FROM t1 FORCE INDEX FOR ORDER BY (PRIMARY) ORDER BY c20;
+c20
+char1
+char2
+char3
+char4
+DROP TABLE t1;
+CREATE TABLE t1 (
+c CHAR,
+c20 CHAR(20),
+v16 VARCHAR(16),
+v128 VARCHAR(128),
+pk VARCHAR(64) PRIMARY KEY,
+INDEX (v16)
+) ENGINE=rocksdb;
+SHOW INDEX IN t1;
+Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment Index_comment
+t1 0 PRIMARY 1 pk A 1000 NULL NULL LSMTREE
+t1 1 v16 1 v16 A 500 NULL NULL YES LSMTREE
+INSERT INTO t1 (c,c20,v16,v128,pk) VALUES ('a','char1','varchar1a','varchar1b','1'),('a','char2','varchar2a','varchar2b','2'),('b','char3','varchar1a','varchar1b','3'),('c','char4','varchar3a','varchar3b','4');
+EXPLAIN SELECT SUBSTRING(v16,0,3) FROM t1 WHERE v16 LIKE 'varchar%';
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 index v16 v16 21 NULL # Using where; Using index
+SELECT SUBSTRING(v16,7,3) FROM t1 WHERE v16 LIKE 'varchar%';
+SUBSTRING(v16,7,3)
+r1a
+r1a
+r2a
+r3a
+EXPLAIN SELECT SUBSTRING(v16,0,3) FROM t1 IGNORE INDEX (v16) WHERE v16 LIKE 'varchar%';
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 ALL NULL NULL NULL NULL # Using where
+SELECT SUBSTRING(v16,7,3) FROM t1 IGNORE INDEX (v16) WHERE v16 LIKE 'varchar%';
+SUBSTRING(v16,7,3)
+r1a
+r1a
+r2a
+r3a
+EXPLAIN SELECT c,c20,v16,v128 FROM t1 WHERE v16 = 'varchar1a' OR v16 = 'varchar3a' ORDER BY v16;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 ALL v16 NULL NULL NULL # Using where; Using filesort
+SELECT c,c20,v16,v128 FROM t1 WHERE v16 = 'varchar1a' OR v16 = 'varchar3a' ORDER BY v16;
+c c20 v16 v128
+a char1 varchar1a varchar1b
+b char3 varchar1a varchar1b
+c char4 varchar3a varchar3b
+DROP TABLE t1;
+SET GLOBAL ROCKSDB_PAUSE_BACKGROUND_WORK = @ORIG_PAUSE_BACKGROUND_WORK;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/type_char_indexes_collation.result b/storage/rocksdb/mysql-test/rocksdb/r/type_char_indexes_collation.result
new file mode 100644
index 00000000000..cb56089595b
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/type_char_indexes_collation.result
@@ -0,0 +1,91 @@
+set session debug_dbug= "+d,myrocks_enable_unknown_collation_index_only_scans";
+create table t (id int not null auto_increment primary key,
+c varchar(8) CHARACTER SET utf8 COLLATE utf8_general_ci,
+key sk (c));
+insert into t (c) values ('☀'), ('ß');
+explain select c from t;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t index NULL sk 27 NULL # Using index
+select c from t;
+c
+ß
+☀
+drop table t;
+set session debug_dbug= "-d,myrocks_enable_unknown_collation_index_only_scans";
+create table t (id int not null auto_increment,
+c1 varchar(1) CHARACTER SET latin1 COLLATE latin1_swedish_ci,
+c2 char(1) CHARACTER SET latin1 COLLATE latin1_general_ci,
+primary key (id),
+key sk1 (c1),
+key sk2 (c2));
+explain select hex(c1) from t order by c1;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t index NULL sk1 4 NULL # Using index
+explain select hex(c1) from t IGNORE INDEX (sk1) order by c1;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t ALL NULL NULL NULL NULL # Using filesort
+explain select hex(c2) from t order by c2;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t index NULL sk2 2 NULL # Using index
+explain select hex(c2) from t IGNORE INDEX (sk1) order by c2;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t index NULL sk2 2 NULL # Using index
+truncate t;
+insert into t (c1, c2) values ('Asdf ', 'Asdf ');
+Warnings:
+Warning 1265 Data truncated for column 'c1' at row 1
+Warning 1265 Data truncated for column 'c2' at row 1
+select char_length(c1), char_length(c2), c1, c2 from t;
+char_length(c1) char_length(c2) c1 c2
+1 1 A A
+drop table t;
+create table t (id int not null auto_increment,
+c2 char(255) CHARACTER SET latin1 COLLATE latin1_general_ci,
+primary key (id),
+unique key sk2 (c2));
+insert into t (c2) values ('Asdf');
+insert into t (c2) values ('asdf ');
+ERROR 23000: Duplicate entry 'asdf' for key 'sk2'
+drop table t;
+create table t (id int not null auto_increment,
+c1 varchar(256) CHARACTER SET latin1 COLLATE latin1_swedish_ci,
+primary key (id),
+unique key sk1 (c1));
+insert into t (c1) values ('Asdf');
+insert into t (c1) values ('asdf ');
+ERROR 23000: Duplicate entry 'asdf ' for key 'sk1'
+insert into t (c1) values ('asdf');
+ERROR 23000: Duplicate entry 'asdf' for key 'sk1'
+drop table t;
+create table t (id int not null auto_increment,
+c1 varchar(256) CHARACTER SET latin1 COLLATE latin1_swedish_ci,
+primary key (id),
+unique key sk1 (c1(1)));
+insert into t (c1) values ('Asdf');
+insert into t (c1) values ('bbbb ');
+insert into t (c1) values ('a ');
+ERROR 23000: Duplicate entry 'a' for key 'sk1'
+explain select c1 from t;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t ALL NULL NULL NULL NULL #
+select c1 from t;
+c1
+Asdf
+bbbb
+drop table t;
+set session rocksdb_verify_row_debug_checksums = on;
+create table t (id int primary key, email varchar(100), KEY email_i (email(30))) engine=rocksdb default charset=latin1;
+insert into t values (1, ' a');
+explain select 'email_i' as index_name, count(*) AS count from t force index(email_i);
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t ALL NULL NULL NULL NULL #
+select 'email_i' as index_name, count(*) AS count from t force index(email_i);
+index_name count
+email_i 1
+drop table t;
+create table t (id int primary key, email varchar(767), KEY email_i (email)) engine=rocksdb default charset=latin1;
+insert into t values (1, REPEAT('a', 700));
+select 'email_i' as index_name, count(*) AS count from t force index(email_i);
+index_name count
+email_i 1
+drop table t;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/type_date_time.result b/storage/rocksdb/mysql-test/rocksdb/r/type_date_time.result
new file mode 100644
index 00000000000..91e7ac257c3
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/type_date_time.result
@@ -0,0 +1,56 @@
+DROP TABLE IF EXISTS t1;
+set @save_time_zone=@@time_zone;
+set time_zone='UTC';
+CREATE TABLE t1 (
+d DATE ,
+dt DATETIME ,
+ts TIMESTAMP ,
+t TIME ,
+y YEAR ,
+y4 YEAR(4) ,
+y2 YEAR(2) ,
+pk DATETIME PRIMARY KEY
+) ENGINE=rocksdb;
+Warnings:
+Note 1287 'YEAR(2)' is deprecated and will be removed in a future release. Please use YEAR(4) instead
+SHOW COLUMNS IN t1;
+Field Type Null Key Default Extra
+d date YES NULL
+dt datetime YES NULL
+ts timestamp YES NULL
+t time YES NULL
+y year(4) YES NULL
+y4 year(4) YES NULL
+y2 year(2) YES NULL
+pk datetime NO PRI NULL
+SET @tm = '2012-04-09 05:27:00';
+INSERT INTO t1 (d,dt,ts,t,y,y4,y2,pk) VALUES
+('1000-01-01', '1000-01-01 00:00:00', FROM_UNIXTIME(1), '-838:59:59', '1901', '1901', '00','2012-12-12 12:12:12'),
+('9999-12-31', '9999-12-31 23:59:59', FROM_UNIXTIME(2147483647), '838:59:59', '2155', '2155', '99','2012-12-12 12:12:13'),
+('0000-00-00', '0000-00-00 00:00:00', '0000-00-00 00:00:00', '00:00:00', '0', '0', '0','2012-12-12 12:12:14'),
+(DATE(@tm),@tm,TIMESTAMP(@tm),TIME(@tm),YEAR(@tm),YEAR(@tm),YEAR(@tm),'2012-12-12 12:12:15');
+SELECT d,dt,ts,t,y,y4,y2 FROM t1;
+d dt ts t y y4 y2
+0000-00-00 0000-00-00 00:00:00 0000-00-00 00:00:00 00:00:00 2000 2000 00
+1000-01-01 1000-01-01 00:00:00 1970-01-01 00:00:01 -838:59:59 1901 1901 00
+2012-04-09 2012-04-09 05:27:00 2012-04-09 05:27:00 05:27:00 2012 2012 12
+9999-12-31 9999-12-31 23:59:59 2038-01-19 03:14:07 838:59:59 2155 2155 99
+INSERT INTO t1 (d,dt,ts,t,y,y4,y2,pk) VALUES
+('999-13-32', '999-11-31 00:00:00', '0', '-839:00:00', '1900', '1900', '-1','2012-12-12 12:12:16');
+Warnings:
+Warning 1265 Data truncated for column 'd' at row 1
+Warning 1265 Data truncated for column 'dt' at row 1
+Warning 1265 Data truncated for column 'ts' at row 1
+Warning 1264 Out of range value for column 't' at row 1
+Warning 1264 Out of range value for column 'y' at row 1
+Warning 1264 Out of range value for column 'y4' at row 1
+Warning 1264 Out of range value for column 'y2' at row 1
+SELECT d,dt,ts,t,y,y4,y2 FROM t1;
+d dt ts t y y4 y2
+1000-01-01 1000-01-01 00:00:00 1970-01-01 00:00:01 -838:59:59 1901 1901 00
+9999-12-31 9999-12-31 23:59:59 2038-01-19 03:14:07 838:59:59 2155 2155 99
+0000-00-00 0000-00-00 00:00:00 0000-00-00 00:00:00 00:00:00 2000 2000 00
+2012-04-09 2012-04-09 05:27:00 2012-04-09 05:27:00 05:27:00 2012 2012 12
+0000-00-00 0000-00-00 00:00:00 0000-00-00 00:00:00 -838:59:59 0000 0000 00
+set time_zone=@save_time_zone;
+DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/type_date_time_indexes.result b/storage/rocksdb/mysql-test/rocksdb/r/type_date_time_indexes.result
new file mode 100644
index 00000000000..120d0d81b55
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/type_date_time_indexes.result
@@ -0,0 +1,119 @@
+SET @ORIG_PAUSE_BACKGROUND_WORK = @@ROCKSDB_PAUSE_BACKGROUND_WORK;
+SET GLOBAL ROCKSDB_PAUSE_BACKGROUND_WORK = 1;
+DROP TABLE IF EXISTS t1;
+CREATE TABLE t1 (
+d DATE,
+dt DATETIME PRIMARY KEY,
+ts TIMESTAMP,
+t TIME,
+y YEAR
+) ENGINE=rocksdb;
+SHOW INDEX IN t1;
+Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment Index_comment
+t1 0 PRIMARY 1 dt A 1000 NULL NULL LSMTREE
+SET @tm = '2012-04-09 05:27:00';
+INSERT INTO t1 (d,dt,ts,t,y) VALUES
+('2012-01-12', '2010-11-22 12:33:54', '2011-11-14 21:45:55', '00:12:33', '2000'),
+('2012-01-12', '2010-11-22 11:43:14', '2011-11-14 21:45:55', '00:12:32', '2001'),
+('2012-03-31', '2011-08-28 21:33:56', '1999-04-30 19:11:08', '12:00:00', '1999'),
+('2012-03-13', '2011-08-27 21:33:56', '1999-03-30 19:11:08', '12:10:00', '1998'),
+('2011-03-31', '2011-08-28 20:33:56', '1997-01-31 11:54:01', '22:04:10', '1994'),
+(DATE(@tm),@tm,TIMESTAMP(@tm),TIME(@tm),YEAR(@tm));
+EXPLAIN SELECT dt FROM t1 ORDER BY dt LIMIT 3;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 index NULL PRIMARY 5 NULL # Using index
+SELECT dt FROM t1 ORDER BY dt LIMIT 3;
+dt
+2010-11-22 11:43:14
+2010-11-22 12:33:54
+2011-08-27 21:33:56
+EXPLAIN SELECT dt FROM t1 FORCE INDEX FOR ORDER BY (PRIMARY) ORDER BY dt LIMIT 3;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 index NULL PRIMARY 5 NULL # Using index
+SELECT dt FROM t1 FORCE INDEX FOR ORDER BY (PRIMARY) ORDER BY dt LIMIT 3;
+dt
+2010-11-22 11:43:14
+2010-11-22 12:33:54
+2011-08-27 21:33:56
+INSERT INTO t1 (d,dt,ts,t,y) VALUES
+('2012-01-11', '2010-11-22 12:33:54', '2011-11-14 21:45:55', '00:12:33', '2000');
+ERROR 23000: Duplicate entry '2010-11-22 12:33:54' for key 'PRIMARY'
+DROP TABLE t1;
+CREATE TABLE t1 (
+d DATE,
+dt DATETIME,
+ts TIMESTAMP,
+t TIME,
+y YEAR,
+pk TIME PRIMARY KEY,
+INDEX (ts)
+) ENGINE=rocksdb;
+SHOW INDEX IN t1;
+Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment Index_comment
+t1 0 PRIMARY 1 pk A 1000 NULL NULL LSMTREE
+t1 1 ts 1 ts A 500 NULL NULL YES LSMTREE
+SET @tm = '2012-04-09 05:27:00';
+INSERT INTO t1 (d,dt,ts,t,y,pk) VALUES
+('2012-01-12', '2010-11-22 12:33:54', '2011-11-14 21:45:55', '00:12:33', '2000','12:00:00'),
+('2012-01-12', '2010-11-22 11:43:14', '2011-11-14 21:45:55', '00:12:32', '2001','12:01:00'),
+('2012-03-31', '2011-08-28 21:33:56', '1999-04-30 19:11:08', '12:00:00', '1999','12:02:00'),
+('2012-03-13', '2011-08-27 21:33:56', '1999-03-30 19:11:08', '12:10:00', '1998','12:03:00'),
+('2011-03-31', '2011-08-28 20:33:56', '1997-01-31 11:54:01', '22:04:10', '1994','12:04:00'),
+(DATE(@tm),@tm,TIMESTAMP(@tm),TIME(@tm),YEAR(@tm),'12:05:00');
+EXPLAIN SELECT ts FROM t1 WHERE ts > NOW();
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 index ts ts 5 NULL # Using where; Using index
+SELECT ts FROM t1 WHERE ts > NOW();
+ts
+EXPLAIN SELECT ts FROM t1 USE INDEX () WHERE ts > NOW();
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 ALL NULL NULL NULL NULL # Using where
+SELECT ts FROM t1 USE INDEX () WHERE ts > NOW();
+ts
+DROP TABLE t1;
+CREATE TABLE t1 (
+d DATE,
+dt DATETIME,
+ts TIMESTAMP,
+t TIME,
+y YEAR,
+pk TIME PRIMARY KEY,
+INDEX (y,t)
+) ENGINE=rocksdb;
+SHOW INDEX IN t1;
+Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment Index_comment
+t1 0 PRIMARY 1 pk A 1000 NULL NULL LSMTREE
+t1 1 y 1 y A 250 NULL NULL YES LSMTREE
+t1 1 y 2 t A 500 NULL NULL YES LSMTREE
+SET @tm = '2012-04-09 05:27:00';
+INSERT INTO t1 (d,dt,ts,t,y,pk) VALUES
+('2012-01-12', '2010-11-22 12:33:54', '2011-11-14 21:45:55', '00:12:33', '2000','18:18:18'),
+('2012-01-12', '2010-11-22 11:43:14', '2011-11-14 21:45:55', '00:12:32', '2001','19:18:18'),
+('2012-03-31', '2011-08-28 21:33:56', '1999-04-30 19:11:08', '12:00:00', '1999','20:18:18'),
+('2012-03-13', '2011-08-27 21:33:56', '1999-03-30 19:11:08', '12:10:00', '1998','21:18:18'),
+('2011-03-31', '2011-08-28 20:33:56', '1997-01-31 11:54:01', '22:04:10', '1994','22:18:18'),
+(DATE(@tm),@tm,TIMESTAMP(@tm),TIME(@tm),YEAR(@tm),'23:18:18');
+EXPLAIN SELECT y, COUNT(*) FROM t1 GROUP BY y;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 index NULL y 6 NULL # Using index
+SELECT y, COUNT(*) FROM t1 GROUP BY y;
+y COUNT(*)
+1994 1
+1998 1
+1999 1
+2000 1
+2001 1
+2012 1
+EXPLAIN SELECT y, COUNT(*) FROM t1 USE INDEX FOR GROUP BY () GROUP BY y;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 index NULL y 6 NULL # Using index; Using temporary; Using filesort
+SELECT y, COUNT(*) FROM t1 USE INDEX FOR GROUP BY () GROUP BY y;
+y COUNT(*)
+1994 1
+1998 1
+1999 1
+2000 1
+2001 1
+2012 1
+DROP TABLE t1;
+SET GLOBAL ROCKSDB_PAUSE_BACKGROUND_WORK = @ORIG_PAUSE_BACKGROUND_WORK;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/type_decimal.result b/storage/rocksdb/mysql-test/rocksdb/r/type_decimal.result
new file mode 100644
index 00000000000..3a8cf9ed21a
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/type_decimal.result
@@ -0,0 +1,103 @@
+drop table if exists t1, t2;
+#
+# Check that DECIMAL datatype supports 'index-only' scans and is decoded correctly.
+# (Decoding happens from the mem-comparable image in the index, regardless
+# of whether the value part has original value or not)
+#
+create table t1 (
+pk int not null primary key,
+col1 decimal (2,1) signed,
+col2 decimal (2,1) unsigned,
+filler varchar(100),
+key key1(col1, col2)
+)engine=rocksdb;
+insert into t1 values
+(1,-9.1, 0.7, 'filler'),
+(2,-8.2, 1.6, 'filler'),
+(3, 0.3, 2.5, 'filler'),
+(4, 1.4, 3.4, 'filler'),
+(5, 2.5, 4.3, 'filler'),
+(6, 3.3, 5.3, 'filler');
+insert into t1 select pk+100, 9.0, 9.0, 'extra-data' from t1;
+insert into t1 select pk+200, 9.0, 9.0, 'extra-data' from t1;
+insert into t1 select pk+1000, 9.0, 9.0, 'extra-data' from t1;
+insert into t1 select pk+10000, 9.0, 9.0, 'extra-data' from t1;
+insert into t1 select pk+100000, 9.0, 9.0, 'extra-data' from t1;
+analyze table t1;
+Table Op Msg_type Msg_text
+test.t1 analyze status OK
+# The following can't use index-only:
+explain select * from t1 where col1 between -8 and 8;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range key1 key1 3 NULL # Using index condition
+# This will use index-only:
+explain
+select col1, col2 from t1 where col1 between -8 and 8;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 index key1 key1 6 NULL # Using where; Using index
+select col1, col2 from t1 where col1 between -8 and 8;
+col1 col2
+0.3 2.5
+1.4 3.4
+2.5 4.3
+3.3 5.3
+insert into t1 values (11, NULL, 0.9, 'row1-with-null');
+insert into t1 values (10, -8.4, NULL, 'row2-with-null');
+explain
+select col1, col2 from t1 force index(key1) where col1 is null or col1 < -7;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 index key1 key1 6 NULL # Using where; Using index
+select col1, col2 from t1 force index(key1) where col1 is null or col1 < -7;
+col1 col2
+NULL 0.9
+-9.1 0.7
+-8.4 NULL
+-8.2 1.6
+# Try an UPDATE
+select * from t1 where pk in (3,4);
+pk col1 col2 filler
+3 0.3 2.5 filler
+4 1.4 3.4 filler
+update t1 set col2= col2+0.2 where pk in (3,4);
+select * from t1 where pk in (3,4);
+pk col1 col2 filler
+3 0.3 2.7 filler
+4 1.4 3.6 filler
+drop table t1;
+#
+# Try another DECIMAL-based type that takes more space
+#
+create table t1 (
+pk int not null primary key,
+col1 decimal (12,6) signed,
+col2 decimal (12,6) unsigned,
+filler varchar(100),
+key key1(col1, col2)
+)engine=rocksdb;
+insert into t1 values
+(1,-900.001, 000.007, 'filler'),
+(2,-700.002, 100.006, 'filler'),
+(3, 000.003, 200.005, 'filler'),
+(4, 100.004, 300.004, 'filler'),
+(5, 200.005, 400.003, 'filler'),
+(6, 300.003, 500.003, 'filler');
+insert into t1 select pk+100, col1+20000, col2+20000, 'extra-data' from t1;
+insert into t1 select pk+200, col1+20000, col2+20000, 'extra-data' from t1;
+insert into t1 select pk+1000, col1+20000, col2+20000, 'extra-data' from t1;
+insert into t1 select pk+10000, col1+20000, col2+20000, 'extra-data' from t1;
+insert into t1 select pk+100000, col1+20000, col2+20000, 'extra-data' from t1;
+analyze table t1;
+Table Op Msg_type Msg_text
+test.t1 analyze status OK
+explain
+select col1, col2 from t1 force index(key1) where col1 between -800 and 800;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 index key1 key1 14 NULL # Using where; Using index
+select col1, col2 from t1 force index(key1) where col1 between -800 and 800;
+col1 col2
+-700.002000 100.006000
+0.003000 200.005000
+100.004000 300.004000
+200.005000 400.003000
+300.003000 500.003000
+drop table t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/type_enum.result b/storage/rocksdb/mysql-test/rocksdb/r/type_enum.result
new file mode 100644
index 00000000000..ed66303cc0f
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/type_enum.result
@@ -0,0 +1,47 @@
+DROP TABLE IF EXISTS t1;
+CREATE TABLE t1 (
+a ENUM('') ,
+b ENUM('test1','test2','test3','test4','test5') ,
+c ENUM('1','2','3','4','5','6','7','8','9','a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','x','y','z',' ','11','12','13','14','15','16','17','18','19','1a','1b','1c','1d','1e','1f','1g','1h','1i','1j','1k','1l','1m','1n','1o','1p','1q','1r','1s','1t','1u','1v','1w','1x','1y','1z','20','21','22','23','24','25','26','27','28','29','2a','2b','2c','2d','2e','2f','2g','2h','2i','2j','2k','2l','2m','2n','2o','2p','2q','2r','2s','2t','2u','2v','2w','2x','2y','2z','30','31','32','33','34','35','36','37','38','39','3a','3b','3c','3d','3e','3f','3g','3h','3i','3j','3k','3l','3m','3n','3o','3p','3q','3r','3s','3t','3u','3v','3w','3x','3y','3z','40','41','42','43','44','45','46','47','48','49','4a','4b','4c','4d','4e','4f','4g','4h','4i','4j','4k','4l','4m','4n','4o','4p','4q','4r','4s','4t','4u','4v','4w','4x','4y','4z','50','51','52','53','54','55','56','57','58','59','5a','5b','5c','5d','5e','5f','5g','5h','5i','5j','5k','5l','5m','5n','5o','5p','5q','5r','5s','5t','5u','5v','5w','5x','5y','5z','60','61','62','63','64','65','66','67','68','69','6a','6b','6c','6d','6e','6f','6g','6h','6i','6j','6k','6l','6m','6n','6o','6p','6q','6r','6s','6t','6u','6v','6w','6x','6y','6z','70','71','72','73','74','75') ,
+PRIMARY KEY (b)
+) ENGINE=rocksdb;
+SHOW COLUMNS IN t1;
+Field Type Null Key Default Extra
+a enum('') YES NULL
+b enum('test1','test2','test3','test4','test5') NO PRI NULL
+c enum('1','2','3','4','5','6','7','8','9','a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','x','y','z','','11','12','13','14','15','16','17','18','19','1a','1b','1c','1d','1e','1f','1g','1h','1i','1j','1k','1l','1m','1n','1o','1p','1q','1r','1s','1t','1u','1v','1w','1x','1y','1z','20','21','22','23','24','25','26','27','28','29','2a','2b','2c','2d','2e','2f','2g','2h','2i','2j','2k','2l','2m','2n','2o','2p','2q','2r','2s','2t','2u','2v','2w','2x','2y','2z','30','31','32','33','34','35','36','37','38','39','3a','3b','3c','3d','3e','3f','3g','3h','3i','3j','3k','3l','3m','3n','3o','3p','3q','3r','3s','3t','3u','3v','3w','3x','3y','3z','40','41','42','43','44','45','46','47','48','49','4a','4b','4c','4d','4e','4f','4g','4h','4i','4j','4k','4l','4m','4n','4o','4p','4q','4r','4s','4t','4u','4v','4w','4x','4y','4z','50','51','52','53','54','55','56','57','58','59','5a','5b','5c','5d','5e','5f','5g','5h','5i','5j','5k','5l','5m','5n','5o','5p','5q','5r','5s','5t','5u','5v','5w','5x','5y','5z','60','61','62','63','64','65','66','67','68','69','6a','6b','6c','6d','6e','6f','6g','6h','6i','6j','6k','6l','6m','6n','6o','6p','6q','6r','6s','6t','6u','6v','6w','6x','6y','6z','70','71','72','73','74','75') YES NULL
+INSERT INTO t1 (a,b,c) VALUES ('','test2','4'),('',5,2);
+SELECT a,b,c FROM t1;
+a b c
+ test2 4
+ test5 2
+INSERT INTO t1 (a,b,c) VALUES (0,'test6',-1);
+Warnings:
+Warning 1265 Data truncated for column 'a' at row 1
+Warning 1265 Data truncated for column 'b' at row 1
+Warning 1265 Data truncated for column 'c' at row 1
+SELECT a,b,c FROM t1;
+a b c
+
+ test2 4
+ test5 2
+ALTER TABLE t1 ADD COLUMN e ENUM('a','A') ;
+Warnings:
+Note 1291 Column 'e' has duplicated value 'a' in ENUM
+SHOW COLUMNS IN t1;
+Field Type Null Key Default Extra
+a enum('') YES NULL
+b enum('test1','test2','test3','test4','test5') NO PRI NULL
+c enum('1','2','3','4','5','6','7','8','9','a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','x','y','z','','11','12','13','14','15','16','17','18','19','1a','1b','1c','1d','1e','1f','1g','1h','1i','1j','1k','1l','1m','1n','1o','1p','1q','1r','1s','1t','1u','1v','1w','1x','1y','1z','20','21','22','23','24','25','26','27','28','29','2a','2b','2c','2d','2e','2f','2g','2h','2i','2j','2k','2l','2m','2n','2o','2p','2q','2r','2s','2t','2u','2v','2w','2x','2y','2z','30','31','32','33','34','35','36','37','38','39','3a','3b','3c','3d','3e','3f','3g','3h','3i','3j','3k','3l','3m','3n','3o','3p','3q','3r','3s','3t','3u','3v','3w','3x','3y','3z','40','41','42','43','44','45','46','47','48','49','4a','4b','4c','4d','4e','4f','4g','4h','4i','4j','4k','4l','4m','4n','4o','4p','4q','4r','4s','4t','4u','4v','4w','4x','4y','4z','50','51','52','53','54','55','56','57','58','59','5a','5b','5c','5d','5e','5f','5g','5h','5i','5j','5k','5l','5m','5n','5o','5p','5q','5r','5s','5t','5u','5v','5w','5x','5y','5z','60','61','62','63','64','65','66','67','68','69','6a','6b','6c','6d','6e','6f','6g','6h','6i','6j','6k','6l','6m','6n','6o','6p','6q','6r','6s','6t','6u','6v','6w','6x','6y','6z','70','71','72','73','74','75') YES NULL
+e enum('a','A') YES NULL
+INSERT INTO t1 (a,b,c,e) VALUES ('','test3','75','A');
+SELECT a,b,c,e FROM t1;
+a b c e
+ NULL
+ test2 4 NULL
+ test3 75 a
+ test5 2 NULL
+SELECT a,b,c,e FROM t1 WHERE b='test2' OR a != '';
+a b c e
+ test2 4 NULL
+DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/type_enum_indexes.result b/storage/rocksdb/mysql-test/rocksdb/r/type_enum_indexes.result
new file mode 100644
index 00000000000..70bbc840454
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/type_enum_indexes.result
@@ -0,0 +1,69 @@
+SET @ORIG_PAUSE_BACKGROUND_WORK = @@ROCKSDB_PAUSE_BACKGROUND_WORK;
+SET GLOBAL ROCKSDB_PAUSE_BACKGROUND_WORK = 1;
+DROP TABLE IF EXISTS t1;
+CREATE TABLE t1 (
+a ENUM('N.America','S.America','Africa','Europe','Australia','Asia','Antarctica'),
+b ENUM('test1','test2','test3','test4','test5'),
+c ENUM('1a','1b','1d','1j','4a','4z','5a','5b','6v','6z') PRIMARY KEY
+) ENGINE=rocksdb;
+INSERT INTO t1 (a,b,c) VALUES
+('N.America','test1','5a'),('Europe','test1','5b'),('Europe','test2','6v'),
+('Africa','test3','4z'),('Africa','test4','1j'),('Antarctica','test4','1d');
+SHOW INDEX IN t1;
+Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment Index_comment
+t1 0 PRIMARY 1 c A 1000 NULL NULL LSMTREE
+EXPLAIN SELECT c FROM t1 WHERE c BETWEEN '1d' AND '6u';
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 index PRIMARY PRIMARY 1 NULL # Using where; Using index
+SELECT c FROM t1 WHERE c BETWEEN '1d' AND '6u';
+c
+1d
+1j
+4z
+5a
+5b
+EXPLAIN SELECT c FROM t1 USE INDEX () WHERE c BETWEEN '1d' AND '6u';
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 ALL NULL NULL NULL NULL # Using where
+SELECT c FROM t1 USE INDEX () WHERE c BETWEEN '1d' AND '6u';
+c
+1d
+1j
+4z
+5a
+5b
+DROP TABLE t1;
+CREATE TABLE t1 (
+a ENUM('N.America','S.America','Africa','Europe','Australia','Asia','Antarctica'),
+b ENUM('test1','test2','test3','test4','test5'),
+c ENUM('1a','1b','1d','1j','4a','4z','5a','5b','6v','6z'),
+pk ENUM('1','2','3','4','5','6','7','8','9') PRIMARY KEY,
+INDEX(b)
+) ENGINE=rocksdb;
+INSERT INTO t1 (a,b,c,pk) VALUES
+('N.America','test1','5a',1),('Europe','test1','5b',2),('Europe','test2','6v',3),
+('Africa','test3','4z',4),('Africa','test4','1j',5),('Antarctica','test4','1d',6);
+SHOW INDEX IN t1;
+Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment Index_comment
+t1 0 PRIMARY 1 pk A 1000 NULL NULL LSMTREE
+t1 1 b 1 b A 500 NULL NULL YES LSMTREE
+EXPLAIN SELECT DISTINCT b FROM t1;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 index NULL b 2 NULL #
+SELECT DISTINCT b FROM t1;
+b
+test1
+test2
+test3
+test4
+EXPLAIN SELECT DISTINCT b FROM t1 IGNORE INDEX (b);
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 ALL NULL NULL NULL NULL # Using temporary
+SELECT DISTINCT b FROM t1 IGNORE INDEX (b);
+b
+test1
+test2
+test3
+test4
+DROP TABLE t1;
+SET GLOBAL ROCKSDB_PAUSE_BACKGROUND_WORK = @ORIG_PAUSE_BACKGROUND_WORK;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/type_fixed.result b/storage/rocksdb/mysql-test/rocksdb/r/type_fixed.result
new file mode 100644
index 00000000000..055952ea55f
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/type_fixed.result
@@ -0,0 +1,131 @@
+DROP TABLE IF EXISTS t1;
+CREATE TABLE t1 (
+d DECIMAL ,
+d0 DECIMAL(0) ,
+d1_1 DECIMAL(1,1) ,
+d10_2 DECIMAL(10,2) ,
+d60_10 DECIMAL(60,10) ,
+n NUMERIC ,
+n0_0 NUMERIC(0,0) ,
+n1 NUMERIC(1) ,
+n20_4 NUMERIC(20,4) ,
+n65_4 NUMERIC(65,4) ,
+pk NUMERIC PRIMARY KEY
+) ENGINE=rocksdb;
+SHOW COLUMNS IN t1;
+Field Type Null Key Default Extra
+d decimal(10,0) YES NULL
+d0 decimal(10,0) YES NULL
+d1_1 decimal(1,1) YES NULL
+d10_2 decimal(10,2) YES NULL
+d60_10 decimal(60,10) YES NULL
+n decimal(10,0) YES NULL
+n0_0 decimal(10,0) YES NULL
+n1 decimal(1,0) YES NULL
+n20_4 decimal(20,4) YES NULL
+n65_4 decimal(65,4) YES NULL
+pk decimal(10,0) NO PRI NULL
+INSERT INTO t1 (d,d0,d1_1,d10_2,d60_10,n,n0_0,n1,n20_4,n65_4,pk) VALUES (100,123456,0.3,40000.25,123456789123456789.10001,1024,7000.0,8.0,999999.9,9223372036854775807,1);
+INSERT INTO t1 (d,d0,d1_1,d10_2,d60_10,n,n0_0,n1,n20_4,n65_4,pk) VALUES (0,0,0,0,0,0,0,0,0,0,2);
+INSERT INTO t1 (d,d0,d1_1,d10_2,d60_10,n,n0_0,n1,n20_4,n65_4,pk) VALUES (9999999999.0,9999999999.0,0.9,99999999.99,99999999999999999999999999999999999999999999999999.9999999999,9999999999.0,9999999999.0,9.0,9999999999999999.9999,9999999999999999999999999999999999999999999999999999999999999.9999,3);
+SELECT d,d0,d1_1,d10_2,d60_10,n,n0_0,n1,n20_4,n65_4 FROM t1;
+d d0 d1_1 d10_2 d60_10 n n0_0 n1 n20_4 n65_4
+0 0 0.0 0.00 0.0000000000 0 0 0 0.0000 0.0000
+100 123456 0.3 40000.25 123456789123456789.1000100000 1024 7000 8 999999.9000 9223372036854775807.0000
+9999999999 9999999999 0.9 99999999.99 99999999999999999999999999999999999999999999999999.9999999999 9999999999 9999999999 9 9999999999999999.9999 9999999999999999999999999999999999999999999999999999999999999.9999
+INSERT INTO t1 (d,d0,d1_1,d10_2,d60_10,n,n0_0,n1,n20_4,n65_4,pk) VALUES (-100,-123456,-0.3,-40000.25,-123456789123456789.10001,-1024,-7000.0,-8.0,-999999.9,-9223372036854775807,4);
+INSERT INTO t1 (d,d0,d1_1,d10_2,d60_10,n,n0_0,n1,n20_4,n65_4,pk) VALUES (-9999999999.0,-9999999999.0,-0.9,-99999999.99,-99999999999999999999999999999999999999999999999999.9999999999,-9999999999.0,-9999999999.0,-9.0,-9999999999999999.9999,-9999999999999999999999999999999999999999999999999999999999999.9999,5);
+SELECT d,d0,d1_1,d10_2,d60_10,n,n0_0,n1,n20_4,n65_4 FROM t1;
+d d0 d1_1 d10_2 d60_10 n n0_0 n1 n20_4 n65_4
+-100 -123456 -0.3 -40000.25 -123456789123456789.1000100000 -1024 -7000 -8 -999999.9000 -9223372036854775807.0000
+-9999999999 -9999999999 -0.9 -99999999.99 -99999999999999999999999999999999999999999999999999.9999999999 -9999999999 -9999999999 -9 -9999999999999999.9999 -9999999999999999999999999999999999999999999999999999999999999.9999
+0 0 0.0 0.00 0.0000000000 0 0 0 0.0000 0.0000
+100 123456 0.3 40000.25 123456789123456789.1000100000 1024 7000 8 999999.9000 9223372036854775807.0000
+9999999999 9999999999 0.9 99999999.99 99999999999999999999999999999999999999999999999999.9999999999 9999999999 9999999999 9 9999999999999999.9999 9999999999999999999999999999999999999999999999999999999999999.9999
+SELECT d,d0,d1_1,d10_2,d60_10,n,n0_0,n1,n20_4,n65_4 FROM t1 WHERE n20_4 = 9999999999999999.9999 OR d < 100;
+d d0 d1_1 d10_2 d60_10 n n0_0 n1 n20_4 n65_4
+-100 -123456 -0.3 -40000.25 -123456789123456789.1000100000 -1024 -7000 -8 -999999.9000 -9223372036854775807.0000
+-9999999999 -9999999999 -0.9 -99999999.99 -99999999999999999999999999999999999999999999999999.9999999999 -9999999999 -9999999999 -9 -9999999999999999.9999 -9999999999999999999999999999999999999999999999999999999999999.9999
+0 0 0.0 0.00 0.0000000000 0 0 0 0.0000 0.0000
+9999999999 9999999999 0.9 99999999.99 99999999999999999999999999999999999999999999999999.9999999999 9999999999 9999999999 9 9999999999999999.9999 9999999999999999999999999999999999999999999999999999999999999.9999
+INSERT INTO t1 (d,d0,d1_1,d10_2,d60_10,n,n0_0,n1,n20_4,n65_4,pk) VALUES (
+9999999999999999999999999999999999999999999999999999999999999.9999,
+9999999999999999999999999999999999999999999999999999999999999.9999,
+9999999999999999999999999999999999999999999999999999999999999.9999,
+9999999999999999999999999999999999999999999999999999999999999.9999,
+9999999999999999999999999999999999999999999999999999999999999.9999,
+9999999999999999999999999999999999999999999999999999999999999.9999,
+9999999999999999999999999999999999999999999999999999999999999.9999,
+9999999999999999999999999999999999999999999999999999999999999.9999,
+9999999999999999999999999999999999999999999999999999999999999.9999,
+9999999999999999999999999999999999999999999999999999999999999.9999,
+6
+);
+Warnings:
+Warning 1264 Out of range value for column 'd' at row 1
+Warning 1264 Out of range value for column 'd0' at row 1
+Warning 1264 Out of range value for column 'd1_1' at row 1
+Warning 1264 Out of range value for column 'd10_2' at row 1
+Warning 1264 Out of range value for column 'd60_10' at row 1
+Warning 1264 Out of range value for column 'n' at row 1
+Warning 1264 Out of range value for column 'n0_0' at row 1
+Warning 1264 Out of range value for column 'n1' at row 1
+Warning 1264 Out of range value for column 'n20_4' at row 1
+SELECT d,d0,d1_1,d10_2,d60_10,n,n0_0,n1,n20_4,n65_4 FROM t1;
+d d0 d1_1 d10_2 d60_10 n n0_0 n1 n20_4 n65_4
+-100 -123456 -0.3 -40000.25 -123456789123456789.1000100000 -1024 -7000 -8 -999999.9000 -9223372036854775807.0000
+-9999999999 -9999999999 -0.9 -99999999.99 -99999999999999999999999999999999999999999999999999.9999999999 -9999999999 -9999999999 -9 -9999999999999999.9999 -9999999999999999999999999999999999999999999999999999999999999.9999
+0 0 0.0 0.00 0.0000000000 0 0 0 0.0000 0.0000
+100 123456 0.3 40000.25 123456789123456789.1000100000 1024 7000 8 999999.9000 9223372036854775807.0000
+9999999999 9999999999 0.9 99999999.99 99999999999999999999999999999999999999999999999999.9999999999 9999999999 9999999999 9 9999999999999999.9999 9999999999999999999999999999999999999999999999999999999999999.9999
+9999999999 9999999999 0.9 99999999.99 99999999999999999999999999999999999999999999999999.9999999999 9999999999 9999999999 9 9999999999999999.9999 9999999999999999999999999999999999999999999999999999999999999.9999
+INSERT INTO t1 (d,d0,d1_1,d10_2,d60_10,n,n0_0,n1,n20_4,n65_4,pk) VALUES (10000000000.0,10000000000.0,1.1,100000000.99,100000000000000000000000000000000000000000000000000.0,10000000000.0,10000000000.0,10.0,10000000000000000.9999,10000000000000000000000000000000000000000000000000000000000000.9999,7);
+Warnings:
+Warning 1264 Out of range value for column 'd' at row 1
+Warning 1264 Out of range value for column 'd0' at row 1
+Warning 1264 Out of range value for column 'd1_1' at row 1
+Warning 1264 Out of range value for column 'd10_2' at row 1
+Warning 1264 Out of range value for column 'd60_10' at row 1
+Warning 1264 Out of range value for column 'n' at row 1
+Warning 1264 Out of range value for column 'n0_0' at row 1
+Warning 1264 Out of range value for column 'n1' at row 1
+Warning 1264 Out of range value for column 'n20_4' at row 1
+Warning 1264 Out of range value for column 'n65_4' at row 1
+SELECT d,d0,d1_1,d10_2,d60_10,n,n0_0,n1,n20_4,n65_4 FROM t1;
+d d0 d1_1 d10_2 d60_10 n n0_0 n1 n20_4 n65_4
+-100 -123456 -0.3 -40000.25 -123456789123456789.1000100000 -1024 -7000 -8 -999999.9000 -9223372036854775807.0000
+-9999999999 -9999999999 -0.9 -99999999.99 -99999999999999999999999999999999999999999999999999.9999999999 -9999999999 -9999999999 -9 -9999999999999999.9999 -9999999999999999999999999999999999999999999999999999999999999.9999
+0 0 0.0 0.00 0.0000000000 0 0 0 0.0000 0.0000
+100 123456 0.3 40000.25 123456789123456789.1000100000 1024 7000 8 999999.9000 9223372036854775807.0000
+9999999999 9999999999 0.9 99999999.99 99999999999999999999999999999999999999999999999999.9999999999 9999999999 9999999999 9 9999999999999999.9999 9999999999999999999999999999999999999999999999999999999999999.9999
+9999999999 9999999999 0.9 99999999.99 99999999999999999999999999999999999999999999999999.9999999999 9999999999 9999999999 9 9999999999999999.9999 9999999999999999999999999999999999999999999999999999999999999.9999
+9999999999 9999999999 0.9 99999999.99 99999999999999999999999999999999999999999999999999.9999999999 9999999999 9999999999 9 9999999999999999.9999 9999999999999999999999999999999999999999999999999999999999999.9999
+INSERT INTO t1 (d,d0,d1_1,d10_2,d60_10,n,n0_0,n1,n20_4,n65_4,pk) VALUES (9999999999.1,9999999999.1,1.9,99999999.001,99999999999999999999999999999999999999999999999999.99999999991,9999999999.1,9999999999.1,9.1,9999999999999999.00001,9999999999999999999999999999999999999999999999999999999999999.11111,8);
+Warnings:
+Note 1265 Data truncated for column 'd' at row 1
+Note 1265 Data truncated for column 'd0' at row 1
+Warning 1264 Out of range value for column 'd1_1' at row 1
+Note 1265 Data truncated for column 'd10_2' at row 1
+Note 1265 Data truncated for column 'd60_10' at row 1
+Note 1265 Data truncated for column 'n' at row 1
+Note 1265 Data truncated for column 'n0_0' at row 1
+Note 1265 Data truncated for column 'n1' at row 1
+Note 1265 Data truncated for column 'n20_4' at row 1
+Note 1265 Data truncated for column 'n65_4' at row 1
+SELECT d,d0,d1_1,d10_2,d60_10,n,n0_0,n1,n20_4,n65_4 FROM t1;
+d d0 d1_1 d10_2 d60_10 n n0_0 n1 n20_4 n65_4
+-100 -123456 -0.3 -40000.25 -123456789123456789.1000100000 -1024 -7000 -8 -999999.9000 -9223372036854775807.0000
+-9999999999 -9999999999 -0.9 -99999999.99 -99999999999999999999999999999999999999999999999999.9999999999 -9999999999 -9999999999 -9 -9999999999999999.9999 -9999999999999999999999999999999999999999999999999999999999999.9999
+0 0 0.0 0.00 0.0000000000 0 0 0 0.0000 0.0000
+100 123456 0.3 40000.25 123456789123456789.1000100000 1024 7000 8 999999.9000 9223372036854775807.0000
+9999999999 9999999999 0.9 99999999.00 99999999999999999999999999999999999999999999999999.9999999999 9999999999 9999999999 9 9999999999999999.0000 9999999999999999999999999999999999999999999999999999999999999.1111
+9999999999 9999999999 0.9 99999999.99 99999999999999999999999999999999999999999999999999.9999999999 9999999999 9999999999 9 9999999999999999.9999 9999999999999999999999999999999999999999999999999999999999999.9999
+9999999999 9999999999 0.9 99999999.99 99999999999999999999999999999999999999999999999999.9999999999 9999999999 9999999999 9 9999999999999999.9999 9999999999999999999999999999999999999999999999999999999999999.9999
+9999999999 9999999999 0.9 99999999.99 99999999999999999999999999999999999999999999999999.9999999999 9999999999 9999999999 9 9999999999999999.9999 9999999999999999999999999999999999999999999999999999999999999.9999
+ALTER TABLE t1 ADD COLUMN n66 NUMERIC(66) ;
+ERROR 42000: Too big precision 66 specified for 'n66'. Maximum is 65
+ALTER TABLE t1 ADD COLUMN n66_6 DECIMAL(66,6) ;
+ERROR 42000: Too big precision 66 specified for 'n66_6'. Maximum is 65
+ALTER TABLE t1 ADD COLUMN n66_66 DECIMAL(66,66) ;
+ERROR 42000: Too big scale 66 specified for 'n66_66'. Maximum is 38
+DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/type_fixed_indexes.result b/storage/rocksdb/mysql-test/rocksdb/r/type_fixed_indexes.result
new file mode 100644
index 00000000000..3f6a0f5d2b9
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/type_fixed_indexes.result
@@ -0,0 +1,129 @@
+SET @ORIG_PAUSE_BACKGROUND_WORK = @@ROCKSDB_PAUSE_BACKGROUND_WORK;
+SET GLOBAL ROCKSDB_PAUSE_BACKGROUND_WORK = 1;
+DROP TABLE IF EXISTS t1;
+CREATE TABLE t1 (
+d1 DECIMAL(10,2) PRIMARY KEY,
+d2 DECIMAL(60,10),
+n1 NUMERIC,
+n2 NUMERIC(65,4)
+) ENGINE=rocksdb;
+SHOW INDEX IN t1;
+Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment Index_comment
+t1 0 PRIMARY 1 d1 A 1000 NULL NULL LSMTREE
+INSERT INTO t1 (d1,d2,n1,n2) VALUES
+(10.22,60.12345,123456,14.3456),
+(10.0,60.12345,123456,14),
+(11.14,15,123456,13),
+(100,100,1,2),
+(0,0,0,0),
+(4540424564.23,3343303441.0,12,13),
+(15,17,23,100000);
+Warnings:
+Warning 1264 Out of range value for column 'd1' at row 6
+EXPLAIN SELECT d1 FROM t1 ORDER BY d1 DESC;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 index NULL PRIMARY 5 NULL # Using index
+SELECT d1 FROM t1 ORDER BY d1 DESC;
+d1
+99999999.99
+100.00
+15.00
+11.14
+10.22
+10.00
+0.00
+EXPLAIN SELECT d1 FROM t1 IGNORE INDEX FOR ORDER BY (PRIMARY) ORDER BY d1 DESC;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 index NULL PRIMARY 5 NULL # Using index; Using filesort
+SELECT d1 FROM t1 IGNORE INDEX FOR ORDER BY (PRIMARY) ORDER BY d1 DESC;
+d1
+99999999.99
+100.00
+15.00
+11.14
+10.22
+10.00
+0.00
+DROP TABLE t1;
+CREATE TABLE t1 (
+d1 DECIMAL(10,2),
+d2 DECIMAL(60,10),
+n1 NUMERIC,
+n2 NUMERIC(65,4),
+pk NUMERIC PRIMARY KEY,
+UNIQUE INDEX n1_n2 (n1,n2)
+) ENGINE=rocksdb;
+SHOW INDEX IN t1;
+Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment Index_comment
+t1 0 PRIMARY 1 pk A 1000 NULL NULL LSMTREE
+t1 0 n1_n2 1 n1 A 500 NULL NULL YES LSMTREE
+t1 0 n1_n2 2 n2 A 1000 NULL NULL YES LSMTREE
+INSERT INTO t1 (d1,d2,n1,n2,pk) VALUES
+(10.22,60.12345,123456,14.3456,1),
+(10.0,60.12345,123456,14,2),
+(11.14,15,123456,13,3),
+(100,100,1,2,4),
+(0,0,0,0,5),
+(4540424564.23,3343303441.0,12,13,6),
+(15,17,23,100000,7);
+Warnings:
+Warning 1264 Out of range value for column 'd1' at row 6
+EXPLAIN SELECT DISTINCT n1+n2 FROM t1;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 index NULL n1_n2 37 NULL # Using index; Using temporary
+SELECT DISTINCT n1+n2 FROM t1;
+n1+n2
+0.0000
+100023.0000
+123469.0000
+123470.0000
+123470.3456
+25.0000
+3.0000
+DROP TABLE t1;
+CREATE TABLE t1 (
+d1 DECIMAL(10,2),
+d2 DECIMAL(60,10),
+n1 NUMERIC,
+n2 NUMERIC(65,4),
+pk DECIMAL(20,10) PRIMARY KEY,
+INDEX (d2)
+) ENGINE=rocksdb;
+SHOW INDEX IN t1;
+Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment Index_comment
+t1 0 PRIMARY 1 pk A 1000 NULL NULL LSMTREE
+t1 1 d2 1 d2 A 500 NULL NULL YES LSMTREE
+INSERT INTO t1 (d1,d2,n1,n2,pk) VALUES
+(10.22,60.12345,123456,14.3456,1),
+(10.0,60.12345,123456,14,2),
+(11.14,15,123456,13,3),
+(100,100,1,2,4),
+(0,0,0,0,5),
+(4540424564.23,3343303441.0,12,13,6),
+(15,17,23,100000,7);
+Warnings:
+Warning 1264 Out of range value for column 'd1' at row 6
+EXPLAIN SELECT d2, COUNT(*) FROM t1 GROUP BY d2;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 index NULL d2 29 NULL # Using index
+SELECT d2, COUNT(*) FROM t1 GROUP BY d2;
+d2 COUNT(*)
+0.0000000000 1
+100.0000000000 1
+15.0000000000 1
+17.0000000000 1
+3343303441.0000000000 1
+60.1234500000 2
+EXPLAIN SELECT d2, COUNT(*) FROM t1 IGNORE INDEX FOR GROUP BY (d2) GROUP BY d2;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 index NULL d2 29 NULL # Using index; Using temporary; Using filesort
+SELECT d2, COUNT(*) FROM t1 IGNORE INDEX FOR GROUP BY (d2) GROUP BY d2;
+d2 COUNT(*)
+0.0000000000 1
+100.0000000000 1
+15.0000000000 1
+17.0000000000 1
+3343303441.0000000000 1
+60.1234500000 2
+DROP TABLE t1;
+SET GLOBAL ROCKSDB_PAUSE_BACKGROUND_WORK = @ORIG_PAUSE_BACKGROUND_WORK;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/type_float.result b/storage/rocksdb/mysql-test/rocksdb/r/type_float.result
new file mode 100644
index 00000000000..778ed95d338
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/type_float.result
@@ -0,0 +1,314 @@
+DROP TABLE IF EXISTS t1;
+CREATE TABLE t1 (
+f FLOAT ,
+f0 FLOAT(0) ,
+r1_1 REAL(1,1) ,
+f23_0 FLOAT(23) ,
+f20_3 FLOAT(20,3) ,
+d DOUBLE ,
+d1_0 DOUBLE(1,0) ,
+d10_10 DOUBLE PRECISION (10,10) ,
+d53 DOUBLE(53,0) ,
+d53_10 DOUBLE(53,10) ,
+pk DOUBLE PRIMARY KEY
+) ENGINE=rocksdb;
+SHOW COLUMNS IN t1;
+Field Type Null Key Default Extra
+f float YES NULL
+f0 float YES NULL
+r1_1 double(1,1) YES NULL
+f23_0 float YES NULL
+f20_3 float(20,3) YES NULL
+d double YES NULL
+d1_0 double(1,0) YES NULL
+d10_10 double(10,10) YES NULL
+d53 double(53,0) YES NULL
+d53_10 double(53,10) YES NULL
+pk double NO PRI NULL
+INSERT INTO t1 (f,f0,r1_1,f23_0,f20_3,d,d1_0,d10_10,d53,d53_10,pk) VALUES (12345.12345,12345.12345,0.9,123456789.123,56789.987,11111111.111,8.0,0.0123456789,1234566789123456789,99999999999999999.99999999,1);
+SELECT f,f0,r1_1,f23_0,f20_3,d,d1_0,d10_10,d53,d53_10 FROM t1;
+f 12345.1
+d 11111111.111
+d10_10 0.0123456789
+d1_0 8
+d53 1234566789123456800
+d53_10 100000000000000000.0000000000
+f0 12345.1
+f20_3 56789.988
+f23_0 123457000
+r1_1 0.9
+INSERT INTO t1 (f,f0,r1_1,f23_0,f20_3,d,d1_0,d10_10,d53,d53_10,pk) VALUES (0,0,0,0,0,0,0,0,0,0,2);
+INSERT INTO t1 (f,f0,r1_1,f23_0,f20_3,d,d1_0,d10_10,d53,d53_10,pk) VALUES (
+99999999999999999999999999999999999999,
+99999999999999999999999999999999999999.9999999999999999,
+0.9,
+99999999999999999999999999999999999999.9,
+99999999999999999.999,
+999999999999999999999999999999999999999999999999999999999999999999999999999999999,
+9,
+0.9999999999,
+1999999999999999999999999999999999999999999999999999999,
+19999999999999999999999999999999999999999999.9999999999,
+3
+);
+Warnings:
+Warning 1264 Out of range value for column 'd53' at row 1
+Warning 1264 Out of range value for column 'd53_10' at row 1
+SELECT f,f0,r1_1,f23_0,f20_3,d,d1_0,d10_10,d53,d53_10 FROM t1;
+f 12345.1
+d 0
+d 11111111.111
+d 1e81
+d10_10 0.0000000000
+d10_10 0.0123456789
+d10_10 0.9999999999
+d1_0 0
+d1_0 8
+d1_0 9
+d53 0
+d53 100000000000000000000000000000000000000000000000000000
+d53 1234566789123456800
+d53_10 0.0000000000
+d53_10 100000000000000000.0000000000
+d53_10 10000000000000000000000000000000000000000000.0000000000
+f 0
+f 1e38
+f0 0
+f0 12345.1
+f0 1e38
+f20_3 0.000
+f20_3 56789.988
+f20_3 99999998430674940.000
+f23_0 0
+f23_0 123457000
+f23_0 1e38
+r1_1 0.0
+r1_1 0.9
+r1_1 0.9
+INSERT INTO t1 (f,f0,r1_1,f23_0,f20_3,d,d1_0,d10_10,d53,d53_10,pk) VALUES (-999999999999999999999999,-99999999999.999999999999,-0.9,-999.99999999999999999999,-99999999999999999.999,-999999999999999999999999999999999999999999999999999999999999-0.999,-9,-.9999999999,-999999999999999999999999999999.99999999999999999999999,-9999999999999999999999999999999999999999999.9999999999,4);
+SELECT f,f0,r1_1,f23_0,f20_3,d,d1_0,d10_10,d53,d53_10 FROM t1;
+f 12345.1
+d -1e60
+d 0
+d 11111111.111
+d 1e81
+d10_10 -0.9999999999
+d10_10 0.0000000000
+d10_10 0.0123456789
+d10_10 0.9999999999
+d1_0 -9
+d1_0 0
+d1_0 8
+d1_0 9
+d53 -1000000000000000000000000000000
+d53 0
+d53 100000000000000000000000000000000000000000000000000000
+d53 1234566789123456800
+d53_10 -10000000000000000000000000000000000000000000.0000000000
+d53_10 0.0000000000
+d53_10 100000000000000000.0000000000
+d53_10 10000000000000000000000000000000000000000000.0000000000
+f -1e24
+f 0
+f 1e38
+f0 -100000000000
+f0 0
+f0 12345.1
+f0 1e38
+f20_3 -99999998430674940.000
+f20_3 0.000
+f20_3 56789.988
+f20_3 99999998430674940.000
+f23_0 -1000
+f23_0 0
+f23_0 123457000
+f23_0 1e38
+r1_1 -0.9
+r1_1 0.0
+r1_1 0.9
+r1_1 0.9
+SELECT
+CONCAT('', MAX(f)),
+CONCAT('', MAX(f0)),
+CONCAT('', MAX(r1_1)),
+CONCAT('', MAX(f23_0)),
+CONCAT('', MAX(f20_3)),
+CONCAT('', MAX(d)),
+CONCAT('', MAX(d1_0)),
+CONCAT('', MAX(d10_10)),
+CONCAT('', MAX(d53)),
+CONCAT('', MAX(d53_10)) FROM t1;
+CONCAT('', MAX(f)) 9.999999680285692e37
+CONCAT('', MAX(d)) 1e81
+CONCAT('', MAX(d10_10)) 0.9999999999
+CONCAT('', MAX(d1_0)) 9
+CONCAT('', MAX(d53)) 100000000000000000000000000000000000000000000000000000
+CONCAT('', MAX(d53_10)) 10000000000000000000000000000000000000000000.0000000000
+CONCAT('', MAX(f0)) 9.999999680285692e37
+CONCAT('', MAX(f20_3)) 99999998430674940.000
+CONCAT('', MAX(f23_0)) 9.999999680285692e37
+CONCAT('', MAX(r1_1)) 0.9
+INSERT INTO t1 (f,f0,r1_1,f23_0,f20_3,d,d1_0,d10_10,d53,d53_10,pk) VALUES (
+9999999999999999999999999999999999999999999999999999999999999.9999,
+9999999999999999999999999999999999999999999999999999999999999.9999,
+9999999999999999999999999999999999999999999999999999999999999.9999,
+9999999999999999999999999999999999999999999999999999999999999.9999,
+9999999999999999999999999999999999999999999999999999999999999.9999,
+9999999999999999999999999999999999999999999999999999999999999.9999,
+9999999999999999999999999999999999999999999999999999999999999.9999,
+9999999999999999999999999999999999999999999999999999999999999.9999,
+9999999999999999999999999999999999999999999999999999999999999.9999,
+9999999999999999999999999999999999999999999999999999999999999.9999,
+5
+);
+Warnings:
+Warning 1264 Out of range value for column 'f' at row 1
+Warning 1264 Out of range value for column 'f0' at row 1
+Warning 1264 Out of range value for column 'r1_1' at row 1
+Warning 1264 Out of range value for column 'f23_0' at row 1
+Warning 1264 Out of range value for column 'f20_3' at row 1
+Warning 1264 Out of range value for column 'd1_0' at row 1
+Warning 1264 Out of range value for column 'd10_10' at row 1
+Warning 1264 Out of range value for column 'd53' at row 1
+Warning 1264 Out of range value for column 'd53_10' at row 1
+SELECT f,f0,r1_1,f23_0,f20_3,d,d1_0,d10_10,d53,d53_10 FROM t1;
+f 12345.1
+d -1e60
+d 0
+d 11111111.111
+d 1e61
+d 1e81
+d10_10 -0.9999999999
+d10_10 0.0000000000
+d10_10 0.0123456789
+d10_10 0.9999999999
+d10_10 0.9999999999
+d1_0 -9
+d1_0 0
+d1_0 8
+d1_0 9
+d1_0 9
+d53 -1000000000000000000000000000000
+d53 0
+d53 100000000000000000000000000000000000000000000000000000
+d53 100000000000000000000000000000000000000000000000000000
+d53 1234566789123456800
+d53_10 -10000000000000000000000000000000000000000000.0000000000
+d53_10 0.0000000000
+d53_10 100000000000000000.0000000000
+d53_10 10000000000000000000000000000000000000000000.0000000000
+d53_10 10000000000000000000000000000000000000000000.0000000000
+f -1e24
+f 0
+f 1e38
+f 3.40282e38
+f0 -100000000000
+f0 0
+f0 12345.1
+f0 1e38
+f0 3.40282e38
+f20_3 -99999998430674940.000
+f20_3 0.000
+f20_3 56789.988
+f20_3 99999998430674940.000
+f20_3 99999998430674940.000
+f23_0 -1000
+f23_0 0
+f23_0 123457000
+f23_0 1e38
+f23_0 3.40282e38
+r1_1 -0.9
+r1_1 0.0
+r1_1 0.9
+r1_1 0.9
+r1_1 0.9
+INSERT INTO t1 (f,f0,r1_1,f23_0,f20_3,d,d1_0,d10_10,d53,d53_10,pk) VALUES (
+999999999999999999999999999999999999999,
+999999999999999999999999999999999999999.9999999999999999,
+1.9,
+999999999999999999999999999999999999999.9,
+999999999999999999.999,
+9999999999999999999999999999999999999999999999999999999999999999999999999999999999,
+99,
+1.9999999999,
+1999999999999999999999999999999999999999999999999999999,
+19999999999999999999999999999999999999999999.9999999999,
+6
+);
+Warnings:
+Warning 1916 Got overflow when converting '' to DECIMAL. Value truncated
+Warning 1264 Out of range value for column 'f' at row 1
+Warning 1264 Out of range value for column 'f0' at row 1
+Warning 1264 Out of range value for column 'r1_1' at row 1
+Warning 1264 Out of range value for column 'f23_0' at row 1
+Warning 1264 Out of range value for column 'f20_3' at row 1
+Warning 1264 Out of range value for column 'd1_0' at row 1
+Warning 1264 Out of range value for column 'd10_10' at row 1
+Warning 1264 Out of range value for column 'd53' at row 1
+Warning 1264 Out of range value for column 'd53_10' at row 1
+SELECT f,f0,r1_1,f23_0,f20_3,d,d1_0,d10_10,d53,d53_10 FROM t1;
+f 12345.1
+d -1e60
+d 0
+d 11111111.111
+d 1e61
+d 1e65
+d 1e81
+d10_10 -0.9999999999
+d10_10 0.0000000000
+d10_10 0.0123456789
+d10_10 0.9999999999
+d10_10 0.9999999999
+d10_10 0.9999999999
+d1_0 -9
+d1_0 0
+d1_0 8
+d1_0 9
+d1_0 9
+d1_0 9
+d53 -1000000000000000000000000000000
+d53 0
+d53 100000000000000000000000000000000000000000000000000000
+d53 100000000000000000000000000000000000000000000000000000
+d53 100000000000000000000000000000000000000000000000000000
+d53 1234566789123456800
+d53_10 -10000000000000000000000000000000000000000000.0000000000
+d53_10 0.0000000000
+d53_10 100000000000000000.0000000000
+d53_10 10000000000000000000000000000000000000000000.0000000000
+d53_10 10000000000000000000000000000000000000000000.0000000000
+d53_10 10000000000000000000000000000000000000000000.0000000000
+f -1e24
+f 0
+f 1e38
+f 3.40282e38
+f 3.40282e38
+f0 -100000000000
+f0 0
+f0 12345.1
+f0 1e38
+f0 3.40282e38
+f0 3.40282e38
+f20_3 -99999998430674940.000
+f20_3 0.000
+f20_3 56789.988
+f20_3 99999998430674940.000
+f20_3 99999998430674940.000
+f20_3 99999998430674940.000
+f23_0 -1000
+f23_0 0
+f23_0 123457000
+f23_0 1e38
+f23_0 3.40282e38
+f23_0 3.40282e38
+r1_1 -0.9
+r1_1 0.0
+r1_1 0.9
+r1_1 0.9
+r1_1 0.9
+r1_1 0.9
+ALTER TABLE t1 ADD COLUMN d0_0 DOUBLE(0,0) ;
+ALTER TABLE t1 ADD COLUMN n66_6 DECIMAL(256,1) ;
+ERROR 42000: Too big precision 256 specified for 'n66_6'. Maximum is 65
+ALTER TABLE t1 ADD COLUMN n66_66 DECIMAL(40,35) ;
+DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/type_float_indexes.result b/storage/rocksdb/mysql-test/rocksdb/r/type_float_indexes.result
new file mode 100644
index 00000000000..99d6bbe45b9
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/type_float_indexes.result
@@ -0,0 +1,189 @@
+SET @ORIG_PAUSE_BACKGROUND_WORK = @@ROCKSDB_PAUSE_BACKGROUND_WORK;
+SET GLOBAL ROCKSDB_PAUSE_BACKGROUND_WORK = 1;
+DROP TABLE IF EXISTS t1;
+CREATE TABLE t1 (
+f FLOAT PRIMARY KEY,
+r REAL(20,3),
+d DOUBLE,
+dp DOUBLE PRECISION (64,10)
+) ENGINE=rocksdb;
+SHOW INDEX IN t1;
+Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment Index_comment
+t1 0 PRIMARY 1 f A 1000 NULL NULL LSMTREE
+INSERT INTO t1 (f,r,d,dp) VALUES
+(1.2345,1422.22,1.2345,1234567.89),
+(0,0,0,0),
+(-1,-1,-1,-1),
+(17.5843,4953453454.44,29229114.0,1111111.23),
+(4644,1422.22,466664.999,0.5);
+EXPLAIN SELECT f FROM t1 ORDER BY f;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 index NULL PRIMARY 4 NULL # Using index
+SELECT f FROM t1 ORDER BY f;
+f
+-1
+0
+1.2345
+17.5843
+4644
+EXPLAIN SELECT f FROM t1 IGNORE INDEX (PRIMARY) ORDER BY f;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 ALL NULL NULL NULL NULL # Using filesort
+SELECT f FROM t1 IGNORE INDEX (PRIMARY) ORDER BY f;
+f
+-1
+0
+1.2345
+17.5843
+4644
+DROP TABLE t1;
+CREATE TABLE t1 (
+f FLOAT,
+r REAL(20,3),
+d DOUBLE,
+dp DOUBLE PRECISION (64,10),
+pk DOUBLE PRIMARY KEY,
+UNIQUE KEY r_dp (r,dp)
+) ENGINE=rocksdb;
+SHOW INDEX IN t1;
+Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment Index_comment
+t1 0 PRIMARY 1 pk A 1000 NULL NULL LSMTREE
+t1 0 r_dp 1 r A 500 NULL NULL YES LSMTREE
+t1 0 r_dp 2 dp A 1000 NULL NULL YES LSMTREE
+INSERT INTO t1 (f,r,d,dp,pk) VALUES
+(1.2345,1422.22,1.2345,1234567.89,1),
+(0,0,0,0,2),
+(-1,-1,-1,-1,3),
+(17.5843,4953453454.44,29229114.0,1111111.23,4),
+(4644,1422.22,466664.999,0.5,5);
+EXPLAIN SELECT r, dp FROM t1 WHERE r > 0 or dp > 0;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 index r_dp r_dp 18 NULL # Using where; Using index
+SELECT r, dp FROM t1 WHERE r > 0 or dp > 0;
+r dp
+1422.220 0.5000000000
+1422.220 1234567.8900000000
+4953453454.440 1111111.2300000000
+DROP TABLE t1;
+CREATE TABLE t1 (
+f FLOAT,
+r REAL(20,3),
+d DOUBLE,
+dp DOUBLE PRECISION (64,10),
+pk FLOAT PRIMARY KEY,
+UNIQUE KEY(d)
+) ENGINE=rocksdb;
+SHOW INDEX IN t1;
+Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment Index_comment
+t1 0 PRIMARY 1 pk A 1000 NULL NULL LSMTREE
+t1 0 d 1 d A 1000 NULL NULL YES LSMTREE
+INSERT INTO t1 (f,r,d,dp,pk) VALUES
+(1.2345,1422.22,1.2345,1234567.89,1),
+(0,0,0,0,2),
+(-1,-1,-1,-1,3),
+(17.5843,4953453454.44,29229114.0,1111111.23,4),
+(4644,1422.22,466664.999,0.5,5);
+EXPLAIN SELECT DISTINCT d FROM t1 ORDER BY d;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 index NULL d 9 NULL # Using index
+SELECT DISTINCT d FROM t1 ORDER BY d;
+d
+-1
+0
+1.2345
+466664.999
+29229114
+DROP TABLE t1;
+CREATE TABLE t1 (
+f FLOAT,
+r REAL(20,3),
+d DOUBLE,
+dp DOUBLE PRECISION (64,10),
+pk FLOAT PRIMARY KEY,
+KEY(d)
+) ENGINE=rocksdb;
+SHOW INDEX IN t1;
+Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment Index_comment
+t1 0 PRIMARY 1 pk A 1000 NULL NULL LSMTREE
+t1 1 d 1 d A 500 NULL NULL YES LSMTREE
+INSERT INTO t1 (f,r,d,dp,pk) VALUES
+(1.2345,1422.22,1.2345,1234567.89,1),
+(0,0,0,0,2),
+(-1,-1,-1,-1,3),
+(17.5843,4953453454.44,29229114.0,1111111.23,4),
+(4644,1422.22,466664.999,0.5,5);
+EXPLAIN SELECT DISTINCT d FROM t1 ORDER BY d;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 index NULL d 9 NULL # Using index
+SELECT DISTINCT d FROM t1 ORDER BY d;
+d
+-1
+0
+1.2345
+466664.999
+29229114
+DROP TABLE t1;
+CREATE TABLE t1 (
+f FLOAT,
+r REAL(20,3),
+d DOUBLE,
+dp DOUBLE PRECISION (64,10),
+pk FLOAT PRIMARY KEY,
+UNIQUE KEY(f)
+) ENGINE=rocksdb;
+SHOW INDEX IN t1;
+Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment Index_comment
+t1 0 PRIMARY 1 pk A 1000 NULL NULL LSMTREE
+t1 0 f 1 f A 1000 NULL NULL YES LSMTREE
+INSERT INTO t1 (f,r,d,dp,pk) VALUES
+(1.2345,1422.22,1.2345,1234567.89,1),
+(0,0,0,0,2),
+(-1,-1,-1,-1,3),
+(17.5843,4953453454.44,29229114.0,1111111.23,4),
+(4644,1422.22,466664.999,0.5,5);
+INSERT INTO t1 (f,r,d,dp,pk) VALUES
+(1.2345,0,0,0,6);
+ERROR 23000: Duplicate entry '1.2345' for key 'f'
+EXPLAIN SELECT DISTINCT f FROM t1 ORDER BY f;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 index NULL f 5 NULL # Using index
+SELECT DISTINCT f FROM t1 ORDER BY f;
+f
+-1
+0
+1.2345
+17.5843
+4644
+DROP TABLE t1;
+CREATE TABLE t1 (
+f FLOAT,
+r REAL(20,3),
+d DOUBLE,
+dp DOUBLE PRECISION (64,10),
+pk FLOAT PRIMARY KEY,
+KEY(f)
+) ENGINE=rocksdb;
+SHOW INDEX IN t1;
+Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment Index_comment
+t1 0 PRIMARY 1 pk A 1000 NULL NULL LSMTREE
+t1 1 f 1 f A 500 NULL NULL YES LSMTREE
+INSERT INTO t1 (f,r,d,dp,pk) VALUES
+(1.2345,1422.22,1.2345,1234567.89,1),
+(0,0,0,0,2),
+(-1,-1,-1,-1,3),
+(17.5843,4953453454.44,29229114.0,1111111.23,4),
+(4644,1422.22,466664.999,0.5,5);
+INSERT INTO t1 (f,r,d,dp,pk) VALUES
+(1.2345,0,0,0,6);
+EXPLAIN SELECT DISTINCT f FROM t1 ORDER BY f;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 index NULL f 5 NULL # Using index
+SELECT DISTINCT f FROM t1 ORDER BY f;
+f
+-1
+0
+1.2345
+17.5843
+4644
+DROP TABLE t1;
+SET GLOBAL ROCKSDB_PAUSE_BACKGROUND_WORK = @ORIG_PAUSE_BACKGROUND_WORK;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/type_int.result b/storage/rocksdb/mysql-test/rocksdb/r/type_int.result
new file mode 100644
index 00000000000..306042912d0
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/type_int.result
@@ -0,0 +1,212 @@
+DROP TABLE IF EXISTS t1;
+CREATE TABLE t1 (
+i INT ,
+i0 INT(0) ,
+i1 INT(1) ,
+i20 INT(20) ,
+t TINYINT ,
+t0 TINYINT(0) ,
+t1 TINYINT(1) ,
+t20 TINYINT(20) ,
+s SMALLINT ,
+s0 SMALLINT(0) ,
+s1 SMALLINT(1) ,
+s20 SMALLINT(20) ,
+m MEDIUMINT ,
+m0 MEDIUMINT(0) ,
+m1 MEDIUMINT(1) ,
+m20 MEDIUMINT(20) ,
+b BIGINT ,
+b0 BIGINT(0) ,
+b1 BIGINT(1) ,
+b20 BIGINT(20) ,
+pk INT AUTO_INCREMENT PRIMARY KEY
+) ENGINE=rocksdb;
+SHOW COLUMNS IN t1;
+Field Type Null Key Default Extra
+i int(11) YES NULL
+i0 int(11) YES NULL
+i1 int(1) YES NULL
+i20 int(20) YES NULL
+t tinyint(4) YES NULL
+t0 tinyint(4) YES NULL
+t1 tinyint(1) YES NULL
+t20 tinyint(20) YES NULL
+s smallint(6) YES NULL
+s0 smallint(6) YES NULL
+s1 smallint(1) YES NULL
+s20 smallint(20) YES NULL
+m mediumint(9) YES NULL
+m0 mediumint(9) YES NULL
+m1 mediumint(1) YES NULL
+m20 mediumint(20) YES NULL
+b bigint(20) YES NULL
+b0 bigint(20) YES NULL
+b1 bigint(1) YES NULL
+b20 bigint(20) YES NULL
+pk int(11) NO PRI NULL auto_increment
+INSERT INTO t1 (i,i0,i1,i20,t,t0,t1,t20,s,s0,s1,s20,m,m0,m1,m20,b,b0,b1,b20) VALUES (1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20);
+INSERT INTO t1 (i,i0,i1,i20,t,t0,t1,t20,s,s0,s1,s20,m,m0,m1,m20,b,b0,b1,b20) VALUES (0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0);
+INSERT INTO t1 (i,i0,i1,i20,t,t0,t1,t20,s,s0,s1,s20,m,m0,m1,m20,b,b0,b1,b20) VALUES (2147483647,2147483647,2147483647,2147483647,127,127,127,127,32767,32767,32767,32767,8388607,8388607,8388607,8388607,9223372036854775807,9223372036854775807,9223372036854775807,9223372036854775807);
+SELECT i,i0,i1,i20,t,t0,t1,t20,s,s0,s1,s20,m,m0,m1,m20,b,b0,b1,b20 FROM t1;
+i i0 i1 i20 t t0 t1 t20 s s0 s1 s20 m m0 m1 m20 b b0 b1 b20
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
+2147483647 2147483647 2147483647 2147483647 127 127 127 127 32767 32767 32767 32767 8388607 8388607 8388607 8388607 9223372036854775807 9223372036854775807 9223372036854775807 9223372036854775807
+INSERT INTO t1 (i,i0,i1,i20,t,t0,t1,t20,s,s0,s1,s20,m,m0,m1,m20,b,b0,b1,b20) VALUES (-2147483648,-2147483648,-2147483648,-2147483648,-128,-128,-128,-128,-32768,-32768,-32768,-32768,-8388608,-8388608,-8388608,-8388608,-9223372036854775808,-9223372036854775808,-9223372036854775808,-9223372036854775808);
+INSERT INTO t1 (i,i0,i1,i20,t,t0,t1,t20,s,s0,s1,s20,m,m0,m1,m20,b,b0,b1,b20) VALUES (4294967295,4294967295,4294967295,4294967295,255,255,255,255,65535,65535,65535,65535,16777215,16777215,16777215,16777215,18446744073709551615,18446744073709551615,18446744073709551615,18446744073709551615);
+Warnings:
+Warning 1264 Out of range value for column 'i' at row 1
+Warning 1264 Out of range value for column 'i0' at row 1
+Warning 1264 Out of range value for column 'i1' at row 1
+Warning 1264 Out of range value for column 'i20' at row 1
+Warning 1264 Out of range value for column 't' at row 1
+Warning 1264 Out of range value for column 't0' at row 1
+Warning 1264 Out of range value for column 't1' at row 1
+Warning 1264 Out of range value for column 't20' at row 1
+Warning 1264 Out of range value for column 's' at row 1
+Warning 1264 Out of range value for column 's0' at row 1
+Warning 1264 Out of range value for column 's1' at row 1
+Warning 1264 Out of range value for column 's20' at row 1
+Warning 1264 Out of range value for column 'm' at row 1
+Warning 1264 Out of range value for column 'm0' at row 1
+Warning 1264 Out of range value for column 'm1' at row 1
+Warning 1264 Out of range value for column 'm20' at row 1
+Warning 1264 Out of range value for column 'b' at row 1
+Warning 1264 Out of range value for column 'b0' at row 1
+Warning 1264 Out of range value for column 'b1' at row 1
+Warning 1264 Out of range value for column 'b20' at row 1
+SELECT i,i0,i1,i20,t,t0,t1,t20,s,s0,s1,s20,m,m0,m1,m20,b,b0,b1,b20 FROM t1;
+i i0 i1 i20 t t0 t1 t20 s s0 s1 s20 m m0 m1 m20 b b0 b1 b20
+-2147483648 -2147483648 -2147483648 -2147483648 -128 -128 -128 -128 -32768 -32768 -32768 -32768 -8388608 -8388608 -8388608 -8388608 -9223372036854775808 -9223372036854775808 -9223372036854775808 -9223372036854775808
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
+2147483647 2147483647 2147483647 2147483647 127 127 127 127 32767 32767 32767 32767 8388607 8388607 8388607 8388607 9223372036854775807 9223372036854775807 9223372036854775807 9223372036854775807
+2147483647 2147483647 2147483647 2147483647 127 127 127 127 32767 32767 32767 32767 8388607 8388607 8388607 8388607 9223372036854775807 9223372036854775807 9223372036854775807 9223372036854775807
+INSERT INTO t1 (i,i0,i1,i20,t,t0,t1,t20,s,s0,s1,s20,m,m0,m1,m20,b,b0,b1,b20) VALUES (-2147483649,-2147483649,-2147483649,-2147483649,-129,-129,-129,-129,-32769,-32769,-32769,-32769,-8388609,-8388609,-8388609,-8388609,-9223372036854775809,-9223372036854775809,-9223372036854775809,-9223372036854775809);
+Warnings:
+Warning 1264 Out of range value for column 'i' at row 1
+Warning 1264 Out of range value for column 'i0' at row 1
+Warning 1264 Out of range value for column 'i1' at row 1
+Warning 1264 Out of range value for column 'i20' at row 1
+Warning 1264 Out of range value for column 't' at row 1
+Warning 1264 Out of range value for column 't0' at row 1
+Warning 1264 Out of range value for column 't1' at row 1
+Warning 1264 Out of range value for column 't20' at row 1
+Warning 1264 Out of range value for column 's' at row 1
+Warning 1264 Out of range value for column 's0' at row 1
+Warning 1264 Out of range value for column 's1' at row 1
+Warning 1264 Out of range value for column 's20' at row 1
+Warning 1264 Out of range value for column 'm' at row 1
+Warning 1264 Out of range value for column 'm0' at row 1
+Warning 1264 Out of range value for column 'm1' at row 1
+Warning 1264 Out of range value for column 'm20' at row 1
+Warning 1264 Out of range value for column 'b' at row 1
+Warning 1264 Out of range value for column 'b0' at row 1
+Warning 1264 Out of range value for column 'b1' at row 1
+Warning 1264 Out of range value for column 'b20' at row 1
+INSERT INTO t1 (i,i0,i1,i20,t,t0,t1,t20,s,s0,s1,s20,m,m0,m1,m20,b,b0,b1,b20) VALUES (4294967296,4294967296,4294967296,4294967296,256,256,256,256,65536,65536,65536,65536,16777216,16777216,16777216,16777216,18446744073709551616,18446744073709551616,18446744073709551616,18446744073709551616);
+Warnings:
+Warning 1264 Out of range value for column 'i' at row 1
+Warning 1264 Out of range value for column 'i0' at row 1
+Warning 1264 Out of range value for column 'i1' at row 1
+Warning 1264 Out of range value for column 'i20' at row 1
+Warning 1264 Out of range value for column 't' at row 1
+Warning 1264 Out of range value for column 't0' at row 1
+Warning 1264 Out of range value for column 't1' at row 1
+Warning 1264 Out of range value for column 't20' at row 1
+Warning 1264 Out of range value for column 's' at row 1
+Warning 1264 Out of range value for column 's0' at row 1
+Warning 1264 Out of range value for column 's1' at row 1
+Warning 1264 Out of range value for column 's20' at row 1
+Warning 1264 Out of range value for column 'm' at row 1
+Warning 1264 Out of range value for column 'm0' at row 1
+Warning 1264 Out of range value for column 'm1' at row 1
+Warning 1264 Out of range value for column 'm20' at row 1
+Warning 1264 Out of range value for column 'b' at row 1
+Warning 1264 Out of range value for column 'b0' at row 1
+Warning 1264 Out of range value for column 'b1' at row 1
+Warning 1264 Out of range value for column 'b20' at row 1
+INSERT INTO t1 (i,i0,i1,i20,t,t0,t1,t20,s,s0,s1,s20,m,m0,m1,m20,b,b0,b1,b20) SELECT b,b,b,b,b,b,b,b,b,b,b,b,b,b,b,b,b,b,b,b FROM t1 WHERE b IN (-9223372036854775808,9223372036854775807,18446744073709551615);
+Warnings:
+Warning 1264 Out of range value for column 'i' at row 8
+Warning 1264 Out of range value for column 'i0' at row 8
+Warning 1264 Out of range value for column 'i1' at row 8
+Warning 1264 Out of range value for column 'i20' at row 8
+Warning 1264 Out of range value for column 't' at row 8
+Warning 1264 Out of range value for column 't0' at row 8
+Warning 1264 Out of range value for column 't1' at row 8
+Warning 1264 Out of range value for column 't20' at row 8
+Warning 1264 Out of range value for column 's' at row 8
+Warning 1264 Out of range value for column 's0' at row 8
+Warning 1264 Out of range value for column 's1' at row 8
+Warning 1264 Out of range value for column 's20' at row 8
+Warning 1264 Out of range value for column 'm' at row 8
+Warning 1264 Out of range value for column 'm0' at row 8
+Warning 1264 Out of range value for column 'm1' at row 8
+Warning 1264 Out of range value for column 'm20' at row 8
+Warning 1264 Out of range value for column 'i' at row 9
+Warning 1264 Out of range value for column 'i0' at row 9
+Warning 1264 Out of range value for column 'i1' at row 9
+Warning 1264 Out of range value for column 'i20' at row 9
+Warning 1264 Out of range value for column 't' at row 9
+Warning 1264 Out of range value for column 't0' at row 9
+Warning 1264 Out of range value for column 't1' at row 9
+Warning 1264 Out of range value for column 't20' at row 9
+Warning 1264 Out of range value for column 's' at row 9
+Warning 1264 Out of range value for column 's0' at row 9
+Warning 1264 Out of range value for column 's1' at row 9
+Warning 1264 Out of range value for column 's20' at row 9
+Warning 1264 Out of range value for column 'm' at row 9
+Warning 1264 Out of range value for column 'm0' at row 9
+Warning 1264 Out of range value for column 'm1' at row 9
+Warning 1264 Out of range value for column 'm20' at row 9
+Warning 1264 Out of range value for column 'i' at row 10
+Warning 1264 Out of range value for column 'i0' at row 10
+Warning 1264 Out of range value for column 'i1' at row 10
+Warning 1264 Out of range value for column 'i20' at row 10
+Warning 1264 Out of range value for column 't' at row 10
+Warning 1264 Out of range value for column 't0' at row 10
+Warning 1264 Out of range value for column 't1' at row 10
+Warning 1264 Out of range value for column 't20' at row 10
+Warning 1264 Out of range value for column 's' at row 10
+Warning 1264 Out of range value for column 's0' at row 10
+Warning 1264 Out of range value for column 's1' at row 10
+Warning 1264 Out of range value for column 's20' at row 10
+Warning 1264 Out of range value for column 'm' at row 10
+Warning 1264 Out of range value for column 'm0' at row 10
+Warning 1264 Out of range value for column 'm1' at row 10
+Warning 1264 Out of range value for column 'm20' at row 10
+Warning 1264 Out of range value for column 'i' at row 11
+Warning 1264 Out of range value for column 'i0' at row 11
+Warning 1264 Out of range value for column 'i1' at row 11
+Warning 1264 Out of range value for column 'i20' at row 11
+Warning 1264 Out of range value for column 't' at row 11
+Warning 1264 Out of range value for column 't0' at row 11
+Warning 1264 Out of range value for column 't1' at row 11
+Warning 1264 Out of range value for column 't20' at row 11
+Warning 1264 Out of range value for column 's' at row 11
+Warning 1264 Out of range value for column 's0' at row 11
+Warning 1264 Out of range value for column 's1' at row 11
+Warning 1264 Out of range value for column 's20' at row 11
+Warning 1264 Out of range value for column 'm' at row 11
+Warning 1264 Out of range value for column 'm0' at row 11
+Warning 1264 Out of range value for column 'm1' at row 11
+Warning 1264 Out of range value for column 'm20' at row 11
+SELECT i,i0,i1,i20,t,t0,t1,t20,s,s0,s1,s20,m,m0,m1,m20,b,b0,b1,b20 FROM t1;
+i i0 i1 i20 t t0 t1 t20 s s0 s1 s20 m m0 m1 m20 b b0 b1 b20
+-2147483648 -2147483648 -2147483648 -2147483648 -128 -128 -128 -128 -32768 -32768 -32768 -32768 -8388608 -8388608 -8388608 -8388608 -9223372036854775808 -9223372036854775808 -9223372036854775808 -9223372036854775808
+-2147483648 -2147483648 -2147483648 -2147483648 -128 -128 -128 -128 -32768 -32768 -32768 -32768 -8388608 -8388608 -8388608 -8388608 -9223372036854775808 -9223372036854775808 -9223372036854775808 -9223372036854775808
+-2147483648 -2147483648 -2147483648 -2147483648 -128 -128 -128 -128 -32768 -32768 -32768 -32768 -8388608 -8388608 -8388608 -8388608 -9223372036854775808 -9223372036854775808 -9223372036854775808 -9223372036854775808
+-2147483648 -2147483648 -2147483648 -2147483648 -128 -128 -128 -128 -32768 -32768 -32768 -32768 -8388608 -8388608 -8388608 -8388608 -9223372036854775808 -9223372036854775808 -9223372036854775808 -9223372036854775808
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
+2147483647 2147483647 2147483647 2147483647 127 127 127 127 32767 32767 32767 32767 8388607 8388607 8388607 8388607 9223372036854775807 9223372036854775807 9223372036854775807 9223372036854775807
+2147483647 2147483647 2147483647 2147483647 127 127 127 127 32767 32767 32767 32767 8388607 8388607 8388607 8388607 9223372036854775807 9223372036854775807 9223372036854775807 9223372036854775807
+2147483647 2147483647 2147483647 2147483647 127 127 127 127 32767 32767 32767 32767 8388607 8388607 8388607 8388607 9223372036854775807 9223372036854775807 9223372036854775807 9223372036854775807
+2147483647 2147483647 2147483647 2147483647 127 127 127 127 32767 32767 32767 32767 8388607 8388607 8388607 8388607 9223372036854775807 9223372036854775807 9223372036854775807 9223372036854775807
+2147483647 2147483647 2147483647 2147483647 127 127 127 127 32767 32767 32767 32767 8388607 8388607 8388607 8388607 9223372036854775807 9223372036854775807 9223372036854775807 9223372036854775807
+2147483647 2147483647 2147483647 2147483647 127 127 127 127 32767 32767 32767 32767 8388607 8388607 8388607 8388607 9223372036854775807 9223372036854775807 9223372036854775807 9223372036854775807
+ALTER TABLE t1 ADD COLUMN i257 INT(257) ;
+ERROR 42000: Display width out of range for 'i257' (max = 255)
+DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/type_int_indexes.result b/storage/rocksdb/mysql-test/rocksdb/r/type_int_indexes.result
new file mode 100644
index 00000000000..e4c47f04c24
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/type_int_indexes.result
@@ -0,0 +1,99 @@
+SET @ORIG_PAUSE_BACKGROUND_WORK = @@ROCKSDB_PAUSE_BACKGROUND_WORK;
+SET GLOBAL ROCKSDB_PAUSE_BACKGROUND_WORK = 1;
+DROP TABLE IF EXISTS t1;
+CREATE TABLE t1 (
+i INT PRIMARY KEY,
+t TINYINT,
+s SMALLINT,
+m MEDIUMINT,
+b BIGINT
+) ENGINE=rocksdb;
+INSERT INTO t1 (i,t,s,m,b) VALUES (1,2,3,4,5),(1000,100,10000,1000000,1000000000000000000),(5,100,10000,1000000,100000000000000000),(2,3,4,5,6),(3,4,5,6,7),(101,102,103,104,105),(10001,103,10002,10003,10004),(10,11,12,13,14),(11,12,13,14,15),(12,13,14,15,16);
+EXPLAIN SELECT i FROM t1 ORDER BY i;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 index NULL PRIMARY 4 NULL # Using index
+SELECT i FROM t1 ORDER BY i;
+i
+1
+2
+3
+5
+10
+11
+12
+101
+1000
+10001
+DROP TABLE t1;
+CREATE TABLE t1 (
+i INT,
+t TINYINT,
+s SMALLINT,
+m MEDIUMINT,
+b BIGINT,
+pk SMALLINT AUTO_INCREMENT PRIMARY KEY,
+INDEX s_m (s,m)
+) ENGINE=rocksdb;
+INSERT INTO t1 (i,t,s,m,b) VALUES (1,2,3,4,5),(1000,100,10000,1000000,1000000000000000000),(5,100,10000,1000000,100000000000000000),(2,3,4,5,6),(3,4,5,6,7),(101,102,103,104,105),(10001,103,10002,10003,10004),(10,11,12,13,14),(11,12,13,14,15),(12,13,14,15,16);
+EXPLAIN SELECT s, m FROM t1 WHERE s != 10 AND m != 1;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 index s_m s_m 7 NULL # Using where; Using index
+SELECT s, m FROM t1 WHERE s != 10 AND m != 1;
+s m
+10000 1000000
+10000 1000000
+10002 10003
+103 104
+12 13
+13 14
+14 15
+3 4
+4 5
+5 6
+DROP TABLE t1;
+# RocksDB: unique indexes allowed
+CREATE TABLE t1 (
+i INT,
+t TINYINT,
+s SMALLINT,
+m MEDIUMINT,
+b BIGINT,
+pk MEDIUMINT AUTO_INCREMENT PRIMARY KEY,
+UNIQUE KEY b_t (b,t)
+) ENGINE=rocksdb;
+INSERT INTO t1 (i,t,s,m,b) VALUES (1,2,3,4,5),(1000,100,10000,1000000,1000000000000000000),(5,100,10000,1000000,100000000000000000),(2,3,4,5,6),(3,4,5,6,7),(101,102,103,104,105),(10001,103,10002,10003,10004),(10,11,12,13,14),(11,12,13,14,15),(12,13,14,15,16);
+SELECT b+t FROM t1 WHERE (b,t) IN ( SELECT b, t FROM t1 WHERE i>1 ) ORDER BY b+t;
+b+t
+9
+11
+25
+27
+29
+207
+10107
+100000000000000100
+1000000000000000100
+SELECT b+t FROM t1 FORCE INDEX (b_t) WHERE (b,t) IN ( SELECT b, t FROM t1 WHERE i>1 ) ORDER BY b+t;
+b+t
+9
+11
+25
+27
+29
+207
+10107
+100000000000000100
+1000000000000000100
+SELECT b+t FROM t1 IGNORE INDEX (b_t) WHERE (b,t) IN ( SELECT b, t FROM t1 WHERE i>1 ) ORDER BY b+t;
+b+t
+9
+11
+25
+27
+29
+207
+10107
+100000000000000100
+1000000000000000100
+DROP TABLE t1;
+SET GLOBAL ROCKSDB_PAUSE_BACKGROUND_WORK = @ORIG_PAUSE_BACKGROUND_WORK;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/type_set.result b/storage/rocksdb/mysql-test/rocksdb/r/type_set.result
new file mode 100644
index 00000000000..5f875d147ec
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/type_set.result
@@ -0,0 +1,49 @@
+DROP TABLE IF EXISTS t1;
+CREATE TABLE t1 (
+a SET('') ,
+b SET('test1','test2','test3','test4','test5') ,
+c SET('01','02','03','04','05','06','07','08','09','10','11','12','13','14','15','16','17','18','19','20','21','22','23','24','25','26','27','28','29','30','31','32','33','34','35','36','37','38','39','40','41','42','43','44','45','46','47','48','49','50''51','52','53','54','55','56','57','58','59','60','61','62','63','64') ,
+PRIMARY KEY (c)
+) ENGINE=rocksdb;
+SHOW COLUMNS IN t1;
+Field Type Null Key Default Extra
+a set('') YES NULL
+b set('test1','test2','test3','test4','test5') YES NULL
+c set('01','02','03','04','05','06','07','08','09','10','11','12','13','14','15','16','17','18','19','20','21','22','23','24','25','26','27','28','29','30','31','32','33','34','35','36','37','38','39','40','41','42','43','44','45','46','47','48','49','50''51','52','53','54','55','56','57','58','59','60','61','62','63','64') NO PRI NULL
+INSERT INTO t1 (a,b,c) VALUES
+('','test2,test3','01,34,44,,23'),
+('',5,2),
+(',','test4,test2','');
+Warnings:
+Warning 1265 Data truncated for column 'c' at row 1
+SELECT a,b,c FROM t1;
+a b c
+ test1,test3 02
+ test2,test3 01,23,34,44
+ test2,test4
+INSERT INTO t1 (a,b,c) VALUES (0,'test6',-1);
+Warnings:
+Warning 1265 Data truncated for column 'b' at row 1
+Warning 1265 Data truncated for column 'c' at row 1
+SELECT a,b,c FROM t1;
+a b c
+ 01,02,03,04,05,06,07,08,09,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50'51,52,53,54,55,56,57,58,59,60,61,62,63,64
+ test1,test3 02
+ test2,test3 01,23,34,44
+ test2,test4
+ALTER TABLE t1 ADD COLUMN e SET('a','A') ;
+Warnings:
+Note 1291 Column 'e' has duplicated value 'a' in SET
+SHOW COLUMNS IN t1;
+Field Type Null Key Default Extra
+a set('') YES NULL
+b set('test1','test2','test3','test4','test5') YES NULL
+c set('01','02','03','04','05','06','07','08','09','10','11','12','13','14','15','16','17','18','19','20','21','22','23','24','25','26','27','28','29','30','31','32','33','34','35','36','37','38','39','40','41','42','43','44','45','46','47','48','49','50''51','52','53','54','55','56','57','58','59','60','61','62','63','64') NO PRI NULL
+e set('a','A') YES NULL
+ALTER TABLE t1 ADD COLUMN f SET('1','2','3','4','5','6','7','8','9','a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','x','y','z',' ','11','12','13','14','15','16','17','18','19','1a','1b','1c','1d','1e','1f','1g','1h','1i','1j','1k','1l','1m','1n','1o','1p','1q','1r','1s','1t','1u','1v','1w','1x','1y','1z','20','21','22','23','24','25','26','27','28','29','2a','2b','2c','2d','2e','2f','2g','2h','2i','2j','2k','2l','2m','2n','2o','2p','2q','2r','2s','2t','2u','2v','2w','2x','2y','2z','30','31','32','33','34','35','36','37','38','39','3a','3b','3c','3d','3e','3f','3g','3h','3i') ;
+ERROR HY000: Too many strings for column f and SET
+SELECT a,b,c,e FROM t1 WHERE FIND_IN_SET('test2',b)>0 OR a != '';
+a b c e
+ test2,test3 01,23,34,44 NULL
+ test2,test4 NULL
+DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/type_set_indexes.result b/storage/rocksdb/mysql-test/rocksdb/r/type_set_indexes.result
new file mode 100644
index 00000000000..391649e0e3b
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/type_set_indexes.result
@@ -0,0 +1,115 @@
+SET @ORIG_PAUSE_BACKGROUND_WORK = @@ROCKSDB_PAUSE_BACKGROUND_WORK;
+SET GLOBAL ROCKSDB_PAUSE_BACKGROUND_WORK = 1;
+DROP TABLE IF EXISTS t1;
+CREATE TABLE t1 (
+a SET('N.America','S.America','Africa','Antarctica','Australia','Europe','Asia'),
+b SET('test1','test2','test3','test4','test5'),
+c SET('01','22','23','33','34','39','40','44','50','63','64') PRIMARY KEY
+) ENGINE=rocksdb;
+SHOW INDEX IN t1;
+Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment Index_comment
+t1 0 PRIMARY 1 c A 1000 NULL NULL LSMTREE
+INSERT INTO t1 (a,b,c) VALUES
+('','test2,test3','01,34,44,23'),
+('',5,2),
+('N.America,Asia','test4,test2',''),
+('Africa,Europe,Asia','test2,test3','01'),
+('Antarctica','test3','34,44'),
+('Asia','test5','50'),
+('Europe,S.America','test1,','39');
+Warnings:
+Warning 1265 Data truncated for column 'b' at row 7
+EXPLAIN SELECT c FROM t1 ORDER BY c;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 index NULL PRIMARY 2 NULL # Using index
+SELECT c FROM t1 ORDER BY c;
+c
+
+01
+22
+39
+34,44
+01,23,34,44
+50
+EXPLAIN SELECT c FROM t1 IGNORE INDEX FOR ORDER BY (PRIMARY) ORDER BY c;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 index NULL PRIMARY 2 NULL # Using index; Using filesort
+SELECT c FROM t1 IGNORE INDEX FOR ORDER BY (PRIMARY) ORDER BY c;
+c
+
+01
+22
+39
+34,44
+01,23,34,44
+50
+INSERT INTO t1 (a,b,c) VALUES ('Antarctica','test3','02');
+ERROR 23000: Duplicate entry '22' for key 'PRIMARY'
+INSERT INTO t1 (a,b,c) VALUES ('','test1','34,44');
+ERROR 23000: Duplicate entry '34,44' for key 'PRIMARY'
+DROP TABLE t1;
+CREATE TABLE t1 (
+a SET('N.America','S.America','Africa','Antarctica','Australia','Europe','Asia'),
+b SET('test1','test2','test3','test4','test5'),
+c SET('01','22','23','33','34','39','40','44','50','63','64'),
+pk SET('1','2','3','4','5','6','7','8','9') PRIMARY KEY,
+INDEX(a)
+) ENGINE=rocksdb;
+SHOW INDEX IN t1;
+Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment Index_comment
+t1 0 PRIMARY 1 pk A 1000 NULL NULL LSMTREE
+t1 1 a 1 a A 500 NULL NULL YES LSMTREE
+INSERT INTO t1 (a,b,c,pk) VALUES
+('','test2,test3','01,34,44,23',1),
+('',5,2,2),
+('N.America,Asia','test4,test2','',3),
+('Africa,Europe,Asia','test2,test3','01',4),
+('Antarctica','test3','34,44',5),
+('Asia','test5','50',6),
+('Europe,S.America','test1,','39',7);
+Warnings:
+Warning 1265 Data truncated for column 'b' at row 7
+EXPLAIN SELECT a FROM t1 WHERE FIND_IN_SET('Europe',a) > 0;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 ALL NULL NULL NULL NULL # Using where
+SELECT a FROM t1 WHERE FIND_IN_SET('Europe',a) > 0;
+a
+Africa,Europe,Asia
+S.America,Europe
+DROP TABLE t1;
+CREATE TABLE t1 (
+a SET('N.America','S.America','Africa','Antarctica','Australia','Europe','Asia'),
+b SET('test1','test2','test3','test4','test5'),
+c SET('01','22','23','33','34','39','40','44','50','63','64'),
+pk SET('1','2','3','4','5','6','7','8','9') PRIMARY KEY,
+UNIQUE INDEX b_a (b,a)
+) ENGINE=rocksdb;
+SHOW INDEX IN t1;
+Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment Index_comment
+t1 0 PRIMARY 1 pk A 1000 NULL NULL LSMTREE
+t1 0 b_a 1 b A 500 NULL NULL YES LSMTREE
+t1 0 b_a 2 a A 1000 NULL NULL YES LSMTREE
+INSERT INTO t1 (a,b,c,pk) VALUES
+('','test2,test3','01,34,44,23',1),
+('',5,2,2),
+('N.America,Asia','test4,test2','',3),
+('Africa,Europe,Asia','test2,test3','01',4),
+('Antarctica','test3','34,44',5),
+('Asia','test5','50',6),
+('Europe,S.America','test1,','39',7);
+Warnings:
+Warning 1265 Data truncated for column 'b' at row 7
+EXPLAIN SELECT DISTINCT a, b FROM t1 ORDER BY b DESC, a;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 ALL NULL NULL NULL NULL # Using temporary; Using filesort
+SELECT DISTINCT a, b FROM t1 ORDER BY b DESC, a;
+a b
+ test1,test3
+ test2,test3
+Africa,Europe,Asia test2,test3
+Antarctica test3
+Asia test5
+N.America,Asia test2,test4
+S.America,Europe test1
+DROP TABLE t1;
+SET GLOBAL ROCKSDB_PAUSE_BACKGROUND_WORK = @ORIG_PAUSE_BACKGROUND_WORK;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/type_text.result b/storage/rocksdb/mysql-test/rocksdb/r/type_text.result
new file mode 100644
index 00000000000..4e118bb9bc5
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/type_text.result
@@ -0,0 +1,57 @@
+DROP TABLE IF EXISTS t1;
+CREATE TABLE t1 (
+pk INT AUTO_INCREMENT PRIMARY KEY,
+t TEXT ,
+t0 TEXT(0) ,
+t1 TEXT(1) ,
+t300 TEXT(300) ,
+tm TEXT(65535) ,
+t70k TEXT(70000) ,
+t17m TEXT(17000000) ,
+tt TINYTEXT ,
+m MEDIUMTEXT ,
+l LONGTEXT
+) ENGINE=rocksdb;
+SHOW COLUMNS IN t1;
+Field Type Null Key Default Extra
+pk int(11) NO PRI NULL auto_increment
+t text YES NULL
+t0 text YES NULL
+t1 tinytext YES NULL
+t300 text YES NULL
+tm text YES NULL
+t70k mediumtext YES NULL
+t17m longtext YES NULL
+tt tinytext YES NULL
+m mediumtext YES NULL
+l longtext YES NULL
+INSERT INTO t1 (t,t0,t1,t300,tm,t70k,t17m,tt,m,l) VALUES
+('','','','','','','','','',''),
+('a','b','c','d','e','f','g','h','i','j'),
+('test1','test2','test3','test4','test5','test6','test7','test8','test9','test10'),
+( REPEAT('a',65535), REPEAT('b',65535), REPEAT('c',255), REPEAT('d',65535), REPEAT('e',65535), REPEAT('f',1048576), REPEAT('g',1048576), REPEAT('h',255), REPEAT('i',1048576), REPEAT('j',1048576) );
+SELECT LENGTH(t), LENGTH(t0), LENGTH(t1), LENGTH(t300), LENGTH(tm), LENGTH(t70k), LENGTH(t17m), LENGTH(tt), LENGTH(m), LENGTH(l) FROM t1;
+LENGTH(t) LENGTH(t0) LENGTH(t1) LENGTH(t300) LENGTH(tm) LENGTH(t70k) LENGTH(t17m) LENGTH(tt) LENGTH(m) LENGTH(l)
+0 0 0 0 0 0 0 0 0 0
+1 1 1 1 1 1 1 1 1 1
+5 5 5 5 5 5 5 5 5 6
+65535 65535 255 65535 65535 1048576 1048576 255 1048576 1048576
+INSERT INTO t1 (t,t0,t1,t300,tm,t70k,t17m,tt,m,l) VALUES
+( REPEAT('a',65536), REPEAT('b',65536), REPEAT('c',256), REPEAT('d',65536), REPEAT('e',65536), REPEAT('f',1048576), REPEAT('g',1048576), REPEAT('h',256), REPEAT('i',1048576), REPEAT('j',1048576) );
+Warnings:
+Warning 1265 Data truncated for column 't' at row 1
+Warning 1265 Data truncated for column 't0' at row 1
+Warning 1265 Data truncated for column 't1' at row 1
+Warning 1265 Data truncated for column 't300' at row 1
+Warning 1265 Data truncated for column 'tm' at row 1
+Warning 1265 Data truncated for column 'tt' at row 1
+SELECT LENGTH(t), LENGTH(t0), LENGTH(t1), LENGTH(t300), LENGTH(tm), LENGTH(t70k), LENGTH(t17m), LENGTH(tt), LENGTH(m), LENGTH(l) FROM t1;
+LENGTH(t) LENGTH(t0) LENGTH(t1) LENGTH(t300) LENGTH(tm) LENGTH(t70k) LENGTH(t17m) LENGTH(tt) LENGTH(m) LENGTH(l)
+0 0 0 0 0 0 0 0 0 0
+1 1 1 1 1 1 1 1 1 1
+5 5 5 5 5 5 5 5 5 6
+65535 65535 255 65535 65535 1048576 1048576 255 1048576 1048576
+65535 65535 255 65535 65535 1048576 1048576 255 1048576 1048576
+ALTER TABLE t1 ADD COLUMN ttt TEXT(4294967296) ;
+ERROR 42000: Display width out of range for 'ttt' (max = 4294967295)
+DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/type_text_indexes.result b/storage/rocksdb/mysql-test/rocksdb/r/type_text_indexes.result
new file mode 100644
index 00000000000..22318316596
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/type_text_indexes.result
@@ -0,0 +1,165 @@
+SET @ORIG_PAUSE_BACKGROUND_WORK = @@ROCKSDB_PAUSE_BACKGROUND_WORK;
+SET GLOBAL ROCKSDB_PAUSE_BACKGROUND_WORK = 1;
+DROP TABLE IF EXISTS t1;
+CREATE TABLE t1 (
+t TEXT,
+tt TINYTEXT,
+m MEDIUMTEXT,
+l LONGTEXT,
+PRIMARY KEY t (t(32))
+) ENGINE=rocksdb;
+SHOW INDEX IN t1;
+Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment Index_comment
+t1 0 PRIMARY 1 t A 1000 32 NULL LSMTREE
+INSERT INTO t1 (t,tt,m,l) VALUES
+('','','',''),
+('a','b','c','d'),
+('b','d','c','b'),
+('test1','test2','test3','test4'),
+(REPEAT('a',128),REPEAT('b',128),REPEAT('c',128),REPEAT('d',128)),
+('abc','def','ghi','jkl'),
+('test2','test3','test4','test5'),
+('test3','test4','test5','test6'),
+(REPEAT('b',128),REPEAT('f',128),REPEAT('e',128),REPEAT('d',128)),
+(REPEAT('c',128),REPEAT('b',128),REPEAT('c',128),REPEAT('e',128));
+EXPLAIN SELECT SUBSTRING(t,16) AS f FROM t1 WHERE t IN ('test1','test2') ORDER BY f;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range PRIMARY PRIMARY 34 NULL # Using where; Using filesort
+SELECT SUBSTRING(t,16) AS f FROM t1 WHERE t IN ('test1','test2') ORDER BY f;
+f
+
+
+EXPLAIN SELECT SUBSTRING(t,16) AS f FROM t1 IGNORE INDEX (PRIMARY) WHERE t IN ('test1','test2') ORDER BY f;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 ALL NULL NULL NULL NULL # Using where; Using filesort
+SELECT SUBSTRING(t,16) AS f FROM t1 IGNORE INDEX (PRIMARY) WHERE t IN ('test1','test2') ORDER BY f;
+f
+
+
+DROP TABLE t1;
+CREATE TABLE t1 (
+t TEXT,
+tt TINYTEXT,
+m MEDIUMTEXT,
+l LONGTEXT,
+pk TINYTEXT PRIMARY KEY,
+UNIQUE INDEX l_tt (l(256),tt(64))
+) ENGINE=rocksdb;
+ERROR 42000: BLOB/TEXT column 'pk' used in key specification without a key length
+CREATE TABLE t1 (
+t TEXT,
+tt TINYTEXT,
+m MEDIUMTEXT,
+l LONGTEXT,
+pk MEDIUMTEXT,
+PRIMARY KEY mt (pk(1)),
+INDEX (m(128))
+) ENGINE=rocksdb;
+SHOW INDEX IN t1;
+Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment Index_comment
+t1 0 PRIMARY 1 pk A 1000 1 NULL LSMTREE
+t1 1 m 1 m A 500 128 NULL YES LSMTREE
+INSERT INTO t1 (t,tt,m,l,pk) VALUES
+('','','','','0'),
+('a','b','c','d','1'),
+('b','d','c','b','2'),
+('test1','test2','test3','test4','3'),
+(REPEAT('a',128),REPEAT('b',128),REPEAT('c',128),REPEAT('d',128),'4'),
+('abc','def','ghi','jkl','5'),
+('test2','test3','test4','test5','6'),
+('test3','test4','test5','test6','7'),
+(REPEAT('b',128),REPEAT('f',128),REPEAT('e',128),REPEAT('d',128),'8'),
+(REPEAT('c',128),REPEAT('b',128),REPEAT('c',128),REPEAT('e',128),'9');
+EXPLAIN SELECT SUBSTRING(m,128) AS f FROM t1 WHERE m = 'test1' ORDER BY f DESC;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 ref m m 131 const # Using where; Using filesort
+SELECT SUBSTRING(m,128) AS f FROM t1 WHERE m = 'test1' ORDER BY f DESC;
+f
+DROP TABLE t1;
+CREATE TABLE t1 (
+b TEXT,
+PRIMARY KEY b (b(32))
+) ENGINE=rocksdb;
+INSERT INTO t1 (b) VALUES
+('00000000000000000000000000000000'),
+('00000000000000000000000000000001'),
+('00000000000000000000000000000002');
+SELECT b FROM t1;
+b
+00000000000000000000000000000000
+00000000000000000000000000000001
+00000000000000000000000000000002
+DROP TABLE t1;
+CREATE TABLE t1 (
+b TINYTEXT,
+PRIMARY KEY b (b(32))
+) ENGINE=rocksdb;
+INSERT INTO t1 (b) VALUES
+('00000000000000000000000000000000'),
+('00000000000000000000000000000001'),
+('00000000000000000000000000000002');
+SELECT b FROM t1;
+b
+00000000000000000000000000000000
+00000000000000000000000000000001
+00000000000000000000000000000002
+DROP TABLE t1;
+CREATE TABLE t1 (
+b MEDIUMTEXT,
+PRIMARY KEY b (b(32))
+) ENGINE=rocksdb;
+INSERT INTO t1 (b) VALUES
+('00000000000000000000000000000000'),
+('00000000000000000000000000000001'),
+('00000000000000000000000000000002');
+SELECT b FROM t1;
+b
+00000000000000000000000000000000
+00000000000000000000000000000001
+00000000000000000000000000000002
+DROP TABLE t1;
+CREATE TABLE t1 (
+b LONGTEXT,
+PRIMARY KEY b (b(32))
+) ENGINE=rocksdb;
+INSERT INTO t1 (b) VALUES
+('00000000000000000000000000000000'),
+('00000000000000000000000000000001'),
+('00000000000000000000000000000002');
+SELECT b FROM t1;
+b
+00000000000000000000000000000000
+00000000000000000000000000000001
+00000000000000000000000000000002
+DROP TABLE t1;
+CREATE TABLE t1 (
+b LONGTEXT CHARACTER SET "binary" COLLATE "binary",
+PRIMARY KEY b (b(32))
+) ENGINE=rocksdb;
+INSERT INTO t1 (b) VALUES
+('00000000000000000000000000000000'),
+('00000000000000000000000000000001'),
+('00000000000000000000000000000002');
+INSERT INTO t1 (b) VALUES (''), (_binary 0x0), (' ');
+SELECT hex(b) FROM t1;
+hex(b)
+
+00
+20
+3030303030303030303030303030303030303030303030303030303030303030
+3030303030303030303030303030303030303030303030303030303030303031
+3030303030303030303030303030303030303030303030303030303030303032
+DROP TABLE t1;
+CREATE TABLE t1 (
+b LONGTEXT CHARACTER SET "latin1" COLLATE "latin1_bin",
+PRIMARY KEY b (b(32))
+) ENGINE=rocksdb;
+INSERT INTO t1 (b) VALUES (''), (_binary 0x0), (' ');
+ERROR 23000: Duplicate entry ' ' for key 'PRIMARY'
+INSERT INTO t1 (b) VALUES (''), (_binary 0x0);
+SELECT hex(b) FROM t1;
+hex(b)
+00
+
+DROP TABLE t1;
+SET GLOBAL ROCKSDB_PAUSE_BACKGROUND_WORK = @ORIG_PAUSE_BACKGROUND_WORK;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/type_varbinary.result b/storage/rocksdb/mysql-test/rocksdb/r/type_varbinary.result
new file mode 100644
index 00000000000..6de8c0331cd
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/type_varbinary.result
@@ -0,0 +1,93 @@
+DROP TABLE IF EXISTS t1, t2;
+CREATE TABLE t1 (
+v0 VARBINARY(0) ,
+v1 VARBINARY(1) ,
+v64 VARBINARY(64) ,
+v65000 VARBINARY(65000) ,
+PRIMARY KEY (v64)
+) ENGINE=rocksdb;
+SHOW COLUMNS IN t1;
+Field Type Null Key Default Extra
+v0 varbinary(0) YES NULL
+v1 varbinary(1) YES NULL
+v64 varbinary(64) NO PRI NULL
+v65000 varbinary(65000) YES NULL
+CREATE TABLE t2 (v VARBINARY(65532) , PRIMARY KEY(v(255))) ENGINE=rocksdb;
+SHOW COLUMNS IN t2;
+Field Type Null Key Default Extra
+v varbinary(65532) NO PRI NULL
+INSERT INTO t1 (v0,v1,v64,v65000) VALUES ('','','','');
+INSERT INTO t1 (v0,v1,v64,v65000) VALUES ('','y','Once there, double check that an article doesn\'t already exist','Here is a list of recommended books on MariaDB and MySQL. We\'ve provided links to Amazon.com here for convenience, but they can be found at many other bookstores, both online and off.
+
+ If you want to have your favorite MySQL / MariaDB book listed here, please leave a comment.
+ For developers who want to code on MariaDB or MySQL
+
+ * Understanding MySQL Internals by Sasha Pachev, former MySQL developer at MySQL AB.
+ o This is the only book we know about that describes the internals of MariaDB / MySQL. A must have for anyone who wants to understand and develop on MariaDB!
+ o Not all topics are covered and some parts are slightly outdated, but still the best book on this topic.
+ * MySQL 5.1 Plugin Development by Sergei Golubchik and Andrew Hutchings
+ o A must read for anyone wanting to write a plugin for MariaDB, written by the Sergei who designed the plugin interface for MySQL and MariaDB!
+
+ For MariaDB / MySQL end users
+
+ * MariaDB Crash Course by Ben Forta
+ o First MariaDB book!
+ o For people who want to learn SQL and the basics of MariaDB.
+ o Now shipping. Purchase at Amazon.com or your favorite bookseller.
+
+ * SQL-99 Complete, Really by Peter Gulutzan & Trudy Pelzer.
+ o Everything you wanted to know about the SQL 99 standard. Excellent reference book!
+ o Free to read in the Knowledgebase!
+
+ * MySQL (4th Edition) by Paul DuBois
+ o The \'default\' book to read if you wont to learn to use MySQL / MariaDB.
+
+ * MySQL Cookbook by Paul DuBois
+ o A lot of examples of how to use MySQL. As with all of Paul\'s books, it\'s worth its weight in gold and even enjoyable reading for such a \'dry\' subject.
+
+ * High Performance MySQL, Second Edition, By Baron Schwartz, Peter Zaitsev, Vadim Tkachenko, Jeremy D. Zawodny, Arjen Lentz, Derek J. Balling, et al.
+ o \"High Performance MySQL is the definitive guide to building fast, reliable systems with MySQL. Written by noted experts with years of real-world experience building very large systems, this book covers every aspect of MySQL performance in detail, and focuses on robustness, security, and data integrity. Learn advanced techniques in depth so you can bring out MySQL\'s full power.\" (From the book description at O\'Reilly)
+
+ * MySQL Admin Cookbook
+ o A quick step-by-step guide for MySQL users and database administrators to tackle real-world challenges with MySQL configuration and administration
+
+ * MySQL 5.0 Certification Study Guide, By Paul DuBois, Stefan Hinz, Carsten Pedersen
+ o This is the official guide to cover the passing of the two MySQL Certification examinations. It is valid till version 5.0 of the server, so while it misses all the features available in MySQL 5.1 and greater (including MariaDB 5.1 and greater), it provides a good basic understanding of MySQL for the end-user. ');
+SELECT HEX(v0), HEX(v1), HEX(v64), HEX(v65000) FROM t1;
+HEX(v0) HEX(v1) HEX(v64) HEX(v65000)
+
+ 79 4F6E63652074686572652C20646F75626C6520636865636B207468617420616E2061727469636C6520646F65736E277420616C7265616479206578697374 486572652069732061206C697374206F66207265636F6D6D656E64656420626F6F6B73206F6E204D61726961444220616E64204D7953514C2E2057652776652070726F7669646564206C696E6B7320746F20416D617A6F6E2E636F6D206865726520666F7220636F6E76656E69656E63652C2062757420746865792063616E20626520666F756E64206174206D616E79206F7468657220626F6F6B73746F7265732C20626F7468206F6E6C696E6520616E64206F66662E0A0A2020496620796F752077616E7420746F206861766520796F7572206661766F72697465204D7953514C202F204D61726961444220626F6F6B206C697374656420686572652C20706C65617365206C65617665206120636F6D6D656E742E0A2020466F7220646576656C6F706572732077686F2077616E7420746F20636F6465206F6E204D617269614442206F72204D7953514C0A0A2020202020202A20556E6465727374616E64696E67204D7953514C20496E7465726E616C73206279205361736861205061636865762C20666F726D6572204D7953514C20646576656C6F706572206174204D7953514C2041422E0A2020202020202020202020206F205468697320697320746865206F6E6C7920626F6F6B207765206B6E6F772061626F75742074686174206465736372696265732074686520696E7465726E616C73206F66204D617269614442202F204D7953514C2E2041206D757374206861766520666F7220616E796F6E652077686F2077616E747320746F20756E6465727374616E6420616E6420646576656C6F70206F6E204D617269614442210A2020202020202020202020206F204E6F7420616C6C20746F706963732061726520636F766572656420616E6420736F6D652070617274732061726520736C696768746C79206F757464617465642C20627574207374696C6C20746865206265737420626F6F6B206F6E207468697320746F7069632E200A2020202020202A204D7953514C20352E3120506C7567696E20446576656C6F706D656E742062792053657267656920476F6C75626368696B20616E6420416E64726577204875746368696E67730A2020202020202020202020206F2041206D757374207265616420666F7220616E796F6E652077616E74696E6720746F207772697465206120706C7567696E20666F72204D6172696144422C207772697474656E20627920746865205365726765692077686F2064657369676E65642074686520706C7567696E20696E7465726661636520666F72204D7953514C20616E64204D61726961444221200A0A2020466F72204D617269614442202F204D7953514C20656E642075736572730A0A2020202020202A204D61726961444220437261736820436F757273652062792042656E20466F7274610A2020202020202020202020206F204669727374204D61726961444220626F6F6B210A2020202020202020202020206F20466F722070656F706C652077686F2077616E7420746F206C6561726E2053514C20616E642074686520626173696373206F66204D6172696144422E0A2020202020202020202020206F204E6F77207368697070696E672E20507572636861736520617420416D617A6F6E2E636F6D206F7220796F7572206661766F7269746520626F6F6B73656C6C65722E200A0A2020202020202A2053514C2D393920436F6D706C6574652C205265616C6C792062792050657465722047756C75747A616E20262054727564792050656C7A65722E0A2020202020202020202020206F2045766572797468696E6720796F752077616E74656420746F206B6E6F772061626F7574207468652053514C203939207374616E646172642E20457863656C6C656E74207265666572656E636520626F6F6B210A2020202020202020202020206F204672656520746F207265616420696E20746865204B6E6F776C656467656261736521200A0A2020202020202A204D7953514C20283474682045646974696F6E29206279205061756C204475426F69730A2020202020202020202020206F20546865202764656661756C742720626F6F6B20746F207265616420696620796F7520776F6E7420746F206C6561726E20746F20757365204D7953514C202F204D6172696144422E200A0A2020202020202A204D7953514C20436F6F6B626F6F6B206279205061756C204475426F69730A2020202020202020202020206F2041206C6F74206F66206578616D706C6573206F6620686F7720746F20757365204D7953514C2E204173207769746820616C6C206F66205061756C277320626F6F6B732C206974277320776F727468206974732077656967687420696E20676F6C6420616E64206576656E20656E6A6F7961626C652072656164696E6720666F7220737563682061202764727927207375626A6563742E200A0A2020202020202A204869676820506572666F726D616E6365204D7953514C2C205365636F6E642045646974696F6E2C204279204261726F6E20536368776172747A2C205065746572205A6169747365762C20566164696D20546B616368656E6B6F2C204A6572656D7920442E205A61776F646E792C2041726A656E204C656E747A2C20446572656B204A2E2042616C6C696E672C20657420616C2E0A2020202020202020202020206F20224869676820506572666F726D616E6365204D7953514C2069732074686520646566696E697469766520677569646520746F206275696C64696E6720666173742C2072656C6961626C652073797374656D732077697468204D7953514C2E205772697474656E206279206E6F74656420657870657274732077697468207965617273206F66207265616C2D776F726C6420657870657269656E6365206275696C64696E672076657279206C617267652073797374656D732C207468697320626F6F6B20636F7665727320657665727920617370656374206F66204D7953514C20706572666F726D616E636520696E2064657461696C2C20616E6420666F6375736573206F6E20726F627573746E6573732C2073656375726974792C20616E64206461746120696E746567726974792E204C6561726E20616476616E63656420746563686E697175657320696E20646570746820736F20796F752063616E206272696E67206F7574204D7953514C27732066756C6C20706F7765722E22202846726F6D2074686520626F6F6B206465736372697074696F6E206174204F275265696C6C7929200A0A2020202020202A204D7953514C2041646D696E20436F6F6B626F6F6B0A2020202020202020202020206F204120717569636B20737465702D62792D7374657020677569646520666F72204D7953514C20757365727320616E642064617461626173652061646D696E6973747261746F727320746F207461636B6C65207265616C2D776F726C64206368616C6C656E6765732077697468204D7953514C20636F6E66696775726174696F6E20616E642061646D696E697374726174696F6E200A0A2020202020202A204D7953514C20352E302043657274696669636174696F6E2053747564792047756964652C204279205061756C204475426F69732C2053746566616E2048696E7A2C204361727374656E20506564657273656E0A2020202020202020202020206F205468697320697320746865206F6666696369616C20677569646520746F20636F766572207468652070617373696E67206F66207468652074776F204D7953514C2043657274696669636174696F6E206578616D696E6174696F6E732E2049742069732076616C69642074696C6C2076657273696F6E20352E30206F6620746865207365727665722C20736F207768696C65206974206D697373657320616C6C2074686520666561747572657320617661696C61626C6520696E204D7953514C20352E3120616E6420677265617465722028696E636C7564696E67204D61726961444220352E3120616E642067726561746572292C2069742070726F7669646573206120676F6F6420626173696320756E6465727374616E64696E67206F66204D7953514C20666F722074686520656E642D757365722E20
+INSERT INTO t1 (v0,v1,v64,v65000) VALUES ('y', 'yy', REPEAT('c',65), REPEAT('abcdefghi ',6501));
+Warnings:
+Warning 1265 Data truncated for column 'v0' at row 1
+Warning 1265 Data truncated for column 'v1' at row 1
+Warning 1265 Data truncated for column 'v64' at row 1
+Warning 1265 Data truncated for column 'v65000' at row 1
+INSERT INTO t1 (v0,v1,v64,v65000) SELECT v65000, v65000, CONCAT('a',v65000), CONCAT(v65000,v1) FROM t1;
+Warnings:
+Warning 1265 Data truncated for column 'v0' at row 5
+Warning 1265 Data truncated for column 'v1' at row 5
+Warning 1265 Data truncated for column 'v64' at row 5
+Warning 1265 Data truncated for column 'v0' at row 6
+Warning 1265 Data truncated for column 'v1' at row 6
+Warning 1265 Data truncated for column 'v64' at row 6
+Warning 1265 Data truncated for column 'v65000' at row 6
+SELECT HEX(v0), HEX(v1), HEX(v64), LENGTH(HEX(v65000)) FROM t1;
+HEX(v0) HEX(v1) HEX(v64) LENGTH(HEX(v65000))
+ 0
+ 61 0
+ 48 61486572652069732061206C697374206F66207265636F6D6D656E64656420626F6F6B73206F6E204D61726961444220616E64204D7953514C2E205765277665 5932
+ 61 61616263646566676869206162636465666768692061626364656667686920616263646566676869206162636465666768692061626364656667686920616263 130000
+ 79 4F6E63652074686572652C20646F75626C6520636865636B207468617420616E2061727469636C6520646F65736E277420616C7265616479206578697374 5930
+ 79 63636363636363636363636363636363636363636363636363636363636363636363636363636363636363636363636363636363636363636363636363636363 130000
+ALTER TABLE t1 ADD COLUMN v65536 VARBINARY(65536) ;
+Warnings:
+Note 1246 Converting column 'v65536' from VARBINARY to BLOB
+SHOW COLUMNS IN t1;
+Field Type Null Key Default Extra
+v0 varbinary(0) YES NULL
+v1 varbinary(1) YES NULL
+v64 varbinary(64) NO PRI NULL
+v65000 varbinary(65000) YES NULL
+v65536 mediumblob YES NULL
+DROP TABLE t1, t2;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/type_varchar.result b/storage/rocksdb/mysql-test/rocksdb/r/type_varchar.result
new file mode 100644
index 00000000000..a7e086fde66
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/type_varchar.result
@@ -0,0 +1,770 @@
+#
+# A basic test whether endspace-aware variable length encoding
+# works when in PK
+#
+create table t1 ( pk varchar(10) primary key, col1 int not null) ;
+insert into t1 values ('a', 1);
+select * from t1;
+pk col1
+a 1
+drop table t1;
+DROP TABLE IF EXISTS t1, t2;
+CREATE TABLE t1 (
+v0 VARCHAR(0) ,
+v1 VARCHAR(1) ,
+v64 VARCHAR(64) ,
+v65000 VARCHAR(65000) ,
+PRIMARY KEY (v64)
+) ENGINE=rocksdb;
+SHOW COLUMNS IN t1;
+Field Type Null Key Default Extra
+v0 varchar(0) YES NULL
+v1 varchar(1) YES NULL
+v64 varchar(64) NO PRI NULL
+v65000 varchar(65000) YES NULL
+CREATE TABLE t2 (v VARCHAR(65532), PRIMARY KEY (v(255))) ENGINE=rocksdb;
+SHOW COLUMNS IN t2;
+Field Type Null Key Default Extra
+v varchar(65532) NO PRI NULL
+INSERT INTO t1 (v0,v1,v64,v65000) VALUES ('','','','');
+INSERT INTO t1 (v0,v1,v64,v65000) VALUES ('','y','Once there, double check that an article doesn\'t already exist','Here is a list of recommended books on MariaDB and MySQL. We\'ve provided links to Amazon.com here for convenience, but they can be found at many other bookstores, both online and off.
+
+ If you want to have your favorite MySQL / MariaDB book listed here, please leave a comment.
+ For developers who want to code on MariaDB or MySQL
+
+ * Understanding MySQL Internals by Sasha Pachev, former MySQL developer at MySQL AB.
+ o This is the only book we know about that describes the internals of MariaDB / MySQL. A must have for anyone who wants to understand and develop on MariaDB!
+ o Not all topics are covered and some parts are slightly outdated, but still the best book on this topic.
+ * MySQL 5.1 Plugin Development by Sergei Golubchik and Andrew Hutchings
+ o A must read for anyone wanting to write a plugin for MariaDB, written by the Sergei who designed the plugin interface for MySQL and MariaDB!
+
+ For MariaDB / MySQL end users
+
+ * MariaDB Crash Course by Ben Forta
+ o First MariaDB book!
+ o For people who want to learn SQL and the basics of MariaDB.
+ o Now shipping. Purchase at Amazon.com or your favorite bookseller.
+
+ * SQL-99 Complete, Really by Peter Gulutzan & Trudy Pelzer.
+ o Everything you wanted to know about the SQL 99 standard. Excellent reference book!
+ o Free to read in the Knowledgebase!
+
+ * MySQL (4th Edition) by Paul DuBois
+ o The \'default\' book to read if you wont to learn to use MySQL / MariaDB.
+
+ * MySQL Cookbook by Paul DuBois
+ o A lot of examples of how to use MySQL. As with all of Paul\'s books, it\'s worth its weight in gold and even enjoyable reading for such a \'dry\' subject.
+
+ * High Performance MySQL, Second Edition, By Baron Schwartz, Peter Zaitsev, Vadim Tkachenko, Jeremy D. Zawodny, Arjen Lentz, Derek J. Balling, et al.
+ o \"High Performance MySQL is the definitive guide to building fast, reliable systems with MySQL. Written by noted experts with years of real-world experience building very large systems, this book covers every aspect of MySQL performance in detail, and focuses on robustness, security, and data integrity. Learn advanced techniques in depth so you can bring out MySQL\'s full power.\" (From the book description at O\'Reilly)
+
+ * MySQL Admin Cookbook
+ o A quick step-by-step guide for MySQL users and database administrators to tackle real-world challenges with MySQL configuration and administration
+
+ * MySQL 5.0 Certification Study Guide, By Paul DuBois, Stefan Hinz, Carsten Pedersen
+ o This is the official guide to cover the passing of the two MySQL Certification examinations. It is valid till version 5.0 of the server, so while it misses all the features available in MySQL 5.1 and greater (including MariaDB 5.1 and greater), it provides a good basic understanding of MySQL for the end-user. ');
+SELECT v0,v1,v64,v65000 FROM t1;
+v0 v1 v64 v65000
+
+
+
+
+
+
+
+
+
+
+
+ y Once there, double check that an article doesn't already exist Here is a list of recommended books on MariaDB and MySQL. We've provided links to Amazon.com here for convenience, but they can be found at many other bookstores, both online and off.
+ o "High Performance MySQL is the definitive guide to building fast, reliable systems with MySQL. Written by noted experts with years of real-world experience building very large systems, this book covers every aspect of MySQL performance in detail, and focuses on robustness, security, and data integrity. Learn advanced techniques in depth so you can bring out MySQL's full power." (From the book description at O'Reilly)
+ o A lot of examples of how to use MySQL. As with all of Paul's books, it's worth its weight in gold and even enjoyable reading for such a 'dry' subject.
+ o A must read for anyone wanting to write a plugin for MariaDB, written by the Sergei who designed the plugin interface for MySQL and MariaDB!
+ o A quick step-by-step guide for MySQL users and database administrators to tackle real-world challenges with MySQL configuration and administration
+ o Everything you wanted to know about the SQL 99 standard. Excellent reference book!
+ o First MariaDB book!
+ o For people who want to learn SQL and the basics of MariaDB.
+ o Free to read in the Knowledgebase!
+ o Not all topics are covered and some parts are slightly outdated, but still the best book on this topic.
+ o Now shipping. Purchase at Amazon.com or your favorite bookseller.
+ o The 'default' book to read if you wont to learn to use MySQL / MariaDB.
+ o This is the official guide to cover the passing of the two MySQL Certification examinations. It is valid till version 5.0 of the server, so while it misses all the features available in MySQL 5.1 and greater (including MariaDB 5.1 and greater), it provides a good basic understanding of MySQL for the end-user.
+ o This is the only book we know about that describes the internals of MariaDB / MySQL. A must have for anyone who wants to understand and develop on MariaDB!
+ * High Performance MySQL, Second Edition, By Baron Schwartz, Peter Zaitsev, Vadim Tkachenko, Jeremy D. Zawodny, Arjen Lentz, Derek J. Balling, et al.
+ * MariaDB Crash Course by Ben Forta
+ * MySQL (4th Edition) by Paul DuBois
+ * MySQL 5.0 Certification Study Guide, By Paul DuBois, Stefan Hinz, Carsten Pedersen
+ * MySQL 5.1 Plugin Development by Sergei Golubchik and Andrew Hutchings
+ * MySQL Admin Cookbook
+ * MySQL Cookbook by Paul DuBois
+ * SQL-99 Complete, Really by Peter Gulutzan & Trudy Pelzer.
+ * Understanding MySQL Internals by Sasha Pachev, former MySQL developer at MySQL AB.
+ For MariaDB / MySQL end users
+ For developers who want to code on MariaDB or MySQL
+ If you want to have your favorite MySQL / MariaDB book listed here, please leave a comment.
+INSERT INTO t1 (v0,v1,v64,v65000) VALUES ('y', 'yy', REPEAT('c',65), REPEAT('abcdefghi ',6501));
+Warnings:
+Warning 1265 Data truncated for column 'v0' at row 1
+Warning 1265 Data truncated for column 'v1' at row 1
+Warning 1265 Data truncated for column 'v64' at row 1
+Warning 1265 Data truncated for column 'v65000' at row 1
+INSERT INTO t1 (v0,v1,v64,v65000) SELECT v65000, v65000, CONCAT('a',v65000), CONCAT(v65000,v1) FROM t1;
+Warnings:
+Warning 1265 Data truncated for column 'v0' at row 5
+Warning 1265 Data truncated for column 'v1' at row 5
+Warning 1265 Data truncated for column 'v64' at row 5
+Warning 1265 Data truncated for column 'v65000' at row 5
+Warning 1265 Data truncated for column 'v0' at row 6
+Warning 1265 Data truncated for column 'v1' at row 6
+Warning 1265 Data truncated for column 'v64' at row 6
+SELECT v0, v1, v64, LENGTH(v65000) FROM t1;
+v0 v1 v64 LENGTH(v65000)
+ 0
+ a 0
+ H aHere is a list of recommended books on MariaDB and MySQL. We've 2966
+ a aabcdefghi abcdefghi abcdefghi abcdefghi abcdefghi abcdefghi abc 65000
+ y Once there, double check that an article doesn't already exist 2965
+ y cccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc 65000
+ALTER TABLE t1 ADD COLUMN v65536 VARCHAR(65536) ;
+Warnings:
+Note 1246 Converting column 'v65536' from VARCHAR to TEXT
+SHOW COLUMNS IN t1;
+Field Type Null Key Default Extra
+v0 varchar(0) YES NULL
+v1 varchar(1) YES NULL
+v64 varchar(64) NO PRI NULL
+v65000 varchar(65000) YES NULL
+v65536 mediumtext YES NULL
+DROP TABLE t1, t2;
+#
+# Endspace-comparison tests:
+#
+#
+# Issue 257: Sort order for varchars is different between
+# MyISAM/InnoDB vs MyRocks
+#
+create table t1 (
+pk varchar(64) CHARACTER SET latin1 COLLATE latin1_bin,
+col1 varchar(64),
+primary key (pk)
+);
+insert into t1 values ('a','a');
+insert into t1 values ('a ', 'a-space');
+ERROR 23000: Duplicate entry 'a ' for key 'PRIMARY'
+insert into t1 values('b ', 'b-2x-space');
+insert into t1 values ('b', 'b');
+ERROR 23000: Duplicate entry 'b' for key 'PRIMARY'
+select pk, hex(pk), col1 from t1;
+pk hex(pk) col1
+a 61 a
+b 622020 b-2x-space
+insert into t1 values ('a\t', 'a-tab');
+insert into t1 values ('a \t', 'a-space-tab');
+select pk, hex(pk), col1 from t1 order by pk;
+pk hex(pk) col1
+a 6109 a-tab
+a 612009 a-space-tab
+a 61 a
+b 622020 b-2x-space
+# Try longer values
+insert into t1 values (concat('a', repeat(' ',10)), 'a-10-x-space');
+ERROR 23000: Duplicate entry 'a ' for key 'PRIMARY'
+insert into t1 values (concat('c', repeat(' ',10)), 'c-10-x-space');
+select * from t1;
+pk col1
+a a-tab
+a a-space-tab
+a a
+b b-2x-space
+c c-10-x-space
+drop table t1;
+# Secondary index
+create table t1 (
+pk int not null primary key,
+col1 varchar(64) CHARACTER SET latin1 COLLATE latin1_bin,
+col2 varchar(64),
+key (col1)
+);
+insert into t1 values (0, 'ab', 'a-b');
+insert into t1 values (1, 'a ', 'a-space');
+insert into t1 values (2, 'a', 'a');
+insert into t1 values (3, 'a \t', 'a-tab');
+analyze table t1;
+Table Op Msg_type Msg_text
+test.t1 analyze status OK
+# Must show 'using index' for latin1_bin and utf8_bin:
+explain
+select col1, hex(col1) from t1;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 index NULL col1 67 NULL # Using index
+select col1, hex(col1) from t1;
+col1 hex(col1)
+a 61202009
+a 6120
+a 61
+ab 6162
+# Must show 'using index' for latin1_bin and utf8_bin:
+explain
+select col1, hex(col1) from t1 force index(col1) where col1 < 'b';
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 # col1 col1 67 NULL # Using where; Using index
+select col1, hex(col1) from t1 force index(col1) where col1 < 'b';
+col1 hex(col1)
+a 61202009
+a 6120
+a 61
+ab 6162
+delete from t1;
+insert into t1 values(10, '', 'empty');
+insert into t1 values(11, repeat(' ', 8), '8x-space');
+insert into t1 values(12, repeat(' ', 16), '16x-space');
+insert into t1 values(13, repeat(' ', 24), '24x-space');
+insert into t1 values(14, concat(repeat(' ', 16),'a'), '16x-space-a');
+insert into t1 values(21, repeat(' ', 9), '9x-space');
+insert into t1 values(22, repeat(' ',17), '17x-space');
+insert into t1 values(23, repeat(' ',18), '18x-space');
+explain
+select pk, col1, hex(col1), length(col1) from t1;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 # NULL col1 67 NULL # Using index
+select pk, col1, hex(col1), length(col1) from t1;
+pk col1 hex(col1) length(col1)
+10 0
+11 2020202020202020 8
+12 20202020202020202020202020202020 16
+13 202020202020202020202020202020202020202020202020 24
+21 202020202020202020 9
+22 2020202020202020202020202020202020 17
+23 202020202020202020202020202020202020 18
+14 a 2020202020202020202020202020202061 17
+drop table t1;
+create table t1 (pk int primary key, a varchar(512), key(a)) engine=rocksdb;
+insert into t1 values (1, concat('a', repeat(' ', 300)));
+insert into t1 values (2, concat('b', repeat(' ', 300)));
+select pk,length(a) from t1 force index(a) where a < 'zz';
+pk length(a)
+1 301
+2 301
+select pk,length(a),rtrim(a) from t1 force index(a) where a < 'zz';
+pk length(a) rtrim(a)
+1 301 a
+2 301 b
+select pk,length(a),rtrim(a) from t1 ignore index(a) where a < 'zz';
+pk length(a) rtrim(a)
+1 301 a
+2 301 b
+drop table t1;
+#
+# Issue 257: Sort order for varchars is different between
+# MyISAM/InnoDB vs MyRocks
+#
+create table t1 (
+pk varchar(64) CHARACTER SET utf8 COLLATE utf8_bin,
+col1 varchar(64),
+primary key (pk)
+);
+insert into t1 values ('a','a');
+insert into t1 values ('a ', 'a-space');
+ERROR 23000: Duplicate entry 'a ' for key 'PRIMARY'
+insert into t1 values('b ', 'b-2x-space');
+insert into t1 values ('b', 'b');
+ERROR 23000: Duplicate entry 'b' for key 'PRIMARY'
+select pk, hex(pk), col1 from t1;
+pk hex(pk) col1
+a 61 a
+b 622020 b-2x-space
+insert into t1 values ('a\t', 'a-tab');
+insert into t1 values ('a \t', 'a-space-tab');
+select pk, hex(pk), col1 from t1 order by pk;
+pk hex(pk) col1
+a 6109 a-tab
+a 612009 a-space-tab
+a 61 a
+b 622020 b-2x-space
+# Try longer values
+insert into t1 values (concat('a', repeat(' ',10)), 'a-10-x-space');
+ERROR 23000: Duplicate entry 'a ' for key 'PRIMARY'
+insert into t1 values (concat('c', repeat(' ',10)), 'c-10-x-space');
+select * from t1;
+pk col1
+a a-tab
+a a-space-tab
+a a
+b b-2x-space
+c c-10-x-space
+drop table t1;
+# Secondary index
+create table t1 (
+pk int not null primary key,
+col1 varchar(64) CHARACTER SET utf8 COLLATE utf8_bin,
+col2 varchar(64),
+key (col1)
+);
+insert into t1 values (0, 'ab', 'a-b');
+insert into t1 values (1, 'a ', 'a-space');
+insert into t1 values (2, 'a', 'a');
+insert into t1 values (3, 'a \t', 'a-tab');
+analyze table t1;
+Table Op Msg_type Msg_text
+test.t1 analyze status OK
+# Must show 'using index' for latin1_bin and utf8_bin:
+explain
+select col1, hex(col1) from t1;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 index NULL col1 195 NULL # Using index
+select col1, hex(col1) from t1;
+col1 hex(col1)
+a 61202009
+a 6120
+a 61
+ab 6162
+# Must show 'using index' for latin1_bin and utf8_bin:
+explain
+select col1, hex(col1) from t1 force index(col1) where col1 < 'b';
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 # col1 col1 195 NULL # Using where; Using index
+select col1, hex(col1) from t1 force index(col1) where col1 < 'b';
+col1 hex(col1)
+a 61202009
+a 6120
+a 61
+ab 6162
+delete from t1;
+insert into t1 values(10, '', 'empty');
+insert into t1 values(11, repeat(' ', 8), '8x-space');
+insert into t1 values(12, repeat(' ', 16), '16x-space');
+insert into t1 values(13, repeat(' ', 24), '24x-space');
+insert into t1 values(14, concat(repeat(' ', 16),'a'), '16x-space-a');
+insert into t1 values(21, repeat(' ', 9), '9x-space');
+insert into t1 values(22, repeat(' ',17), '17x-space');
+insert into t1 values(23, repeat(' ',18), '18x-space');
+explain
+select pk, col1, hex(col1), length(col1) from t1;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 # NULL col1 195 NULL # Using index
+select pk, col1, hex(col1), length(col1) from t1;
+pk col1 hex(col1) length(col1)
+10 0
+11 2020202020202020 8
+12 20202020202020202020202020202020 16
+13 202020202020202020202020202020202020202020202020 24
+21 202020202020202020 9
+22 2020202020202020202020202020202020 17
+23 202020202020202020202020202020202020 18
+14 a 2020202020202020202020202020202061 17
+drop table t1;
+create table t1 (pk int primary key, a varchar(512), key(a)) engine=rocksdb;
+insert into t1 values (1, concat('a', repeat(' ', 300)));
+insert into t1 values (2, concat('b', repeat(' ', 300)));
+select pk,length(a) from t1 force index(a) where a < 'zz';
+pk length(a)
+1 301
+2 301
+select pk,length(a),rtrim(a) from t1 force index(a) where a < 'zz';
+pk length(a) rtrim(a)
+1 301 a
+2 301 b
+select pk,length(a),rtrim(a) from t1 ignore index(a) where a < 'zz';
+pk length(a) rtrim(a)
+1 301 a
+2 301 b
+drop table t1;
+#
+# Issue 257: Sort order for varchars is different between
+# MyISAM/InnoDB vs MyRocks
+#
+create table t1 (
+pk varchar(64) CHARACTER SET ucs2 COLLATE ucs2_bin,
+col1 varchar(64),
+primary key (pk)
+);
+insert into t1 values ('a','a');
+insert into t1 values ('a ', 'a-space');
+ERROR 23000: Duplicate entry 'a ' for key 'PRIMARY'
+insert into t1 values('b ', 'b-2x-space');
+insert into t1 values ('b', 'b');
+ERROR 23000: Duplicate entry 'b' for key 'PRIMARY'
+select pk, hex(pk), col1 from t1;
+pk hex(pk) col1
+a 0061 a
+b 006200200020 b-2x-space
+insert into t1 values ('a\t', 'a-tab');
+insert into t1 values ('a \t', 'a-space-tab');
+select pk, hex(pk), col1 from t1 order by pk;
+pk hex(pk) col1
+a 00610009 a-tab
+a 006100200009 a-space-tab
+a 0061 a
+b 006200200020 b-2x-space
+# Try longer values
+insert into t1 values (concat('a', repeat(' ',10)), 'a-10-x-space');
+ERROR 23000: Duplicate entry 'a ' for key 'PRIMARY'
+insert into t1 values (concat('c', repeat(' ',10)), 'c-10-x-space');
+select * from t1;
+pk col1
+a a-tab
+a a-space-tab
+a a
+b b-2x-space
+c c-10-x-space
+drop table t1;
+# Secondary index
+create table t1 (
+pk int not null primary key,
+col1 varchar(64) CHARACTER SET ucs2 COLLATE ucs2_bin,
+col2 varchar(64),
+key (col1)
+);
+insert into t1 values (0, 'ab', 'a-b');
+insert into t1 values (1, 'a ', 'a-space');
+insert into t1 values (2, 'a', 'a');
+insert into t1 values (3, 'a \t', 'a-tab');
+analyze table t1;
+Table Op Msg_type Msg_text
+test.t1 analyze status OK
+# Must show 'using index' for latin1_bin and utf8_bin:
+explain
+select col1, hex(col1) from t1;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 ALL NULL NULL NULL NULL #
+select col1, hex(col1) from t1;
+col1 hex(col1)
+ab 00610062
+a 00610020
+a 0061
+a 0061002000200009
+# Must show 'using index' for latin1_bin and utf8_bin:
+explain
+select col1, hex(col1) from t1 force index(col1) where col1 < 'b';
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 # col1 col1 131 NULL # Using where
+select col1, hex(col1) from t1 force index(col1) where col1 < 'b';
+col1 hex(col1)
+a 0061002000200009
+a 00610020
+a 0061
+ab 00610062
+delete from t1;
+insert into t1 values(10, '', 'empty');
+insert into t1 values(11, repeat(' ', 8), '8x-space');
+insert into t1 values(12, repeat(' ', 16), '16x-space');
+insert into t1 values(13, repeat(' ', 24), '24x-space');
+insert into t1 values(14, concat(repeat(' ', 16),'a'), '16x-space-a');
+insert into t1 values(21, repeat(' ', 9), '9x-space');
+insert into t1 values(22, repeat(' ',17), '17x-space');
+insert into t1 values(23, repeat(' ',18), '18x-space');
+explain
+select pk, col1, hex(col1), length(col1) from t1;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 # NULL NULL NULL NULL #
+select pk, col1, hex(col1), length(col1) from t1;
+pk col1 hex(col1) length(col1)
+10 0
+11 00200020002000200020002000200020 16
+12 0020002000200020002000200020002000200020002000200020002000200020 32
+13 002000200020002000200020002000200020002000200020002000200020002000200020002000200020002000200020 48
+14 a 00200020002000200020002000200020002000200020002000200020002000200061 34
+21 002000200020002000200020002000200020 18
+22 00200020002000200020002000200020002000200020002000200020002000200020 34
+23 002000200020002000200020002000200020002000200020002000200020002000200020 36
+drop table t1;
+create table t1 (pk int primary key, a varchar(512), key(a)) engine=rocksdb;
+insert into t1 values (1, concat('a', repeat(' ', 300)));
+insert into t1 values (2, concat('b', repeat(' ', 300)));
+select pk,length(a) from t1 force index(a) where a < 'zz';
+pk length(a)
+1 301
+2 301
+select pk,length(a),rtrim(a) from t1 force index(a) where a < 'zz';
+pk length(a) rtrim(a)
+1 301 a
+2 301 b
+select pk,length(a),rtrim(a) from t1 ignore index(a) where a < 'zz';
+pk length(a) rtrim(a)
+1 301 a
+2 301 b
+drop table t1;
+#
+# Issue 257: Sort order for varchars is different between
+# MyISAM/InnoDB vs MyRocks
+#
+create table t1 (
+pk varchar(64) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin,
+col1 varchar(64),
+primary key (pk)
+);
+insert into t1 values ('a','a');
+insert into t1 values ('a ', 'a-space');
+ERROR 23000: Duplicate entry 'a ' for key 'PRIMARY'
+insert into t1 values('b ', 'b-2x-space');
+insert into t1 values ('b', 'b');
+ERROR 23000: Duplicate entry 'b' for key 'PRIMARY'
+select pk, hex(pk), col1 from t1;
+pk hex(pk) col1
+a 61 a
+b 622020 b-2x-space
+insert into t1 values ('a\t', 'a-tab');
+insert into t1 values ('a \t', 'a-space-tab');
+select pk, hex(pk), col1 from t1 order by pk;
+pk hex(pk) col1
+a 6109 a-tab
+a 612009 a-space-tab
+a 61 a
+b 622020 b-2x-space
+# Try longer values
+insert into t1 values (concat('a', repeat(' ',10)), 'a-10-x-space');
+ERROR 23000: Duplicate entry 'a ' for key 'PRIMARY'
+insert into t1 values (concat('c', repeat(' ',10)), 'c-10-x-space');
+select * from t1;
+pk col1
+a a-tab
+a a-space-tab
+a a
+b b-2x-space
+c c-10-x-space
+drop table t1;
+# Secondary index
+create table t1 (
+pk int not null primary key,
+col1 varchar(64) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin,
+col2 varchar(64),
+key (col1)
+);
+insert into t1 values (0, 'ab', 'a-b');
+insert into t1 values (1, 'a ', 'a-space');
+insert into t1 values (2, 'a', 'a');
+insert into t1 values (3, 'a \t', 'a-tab');
+analyze table t1;
+Table Op Msg_type Msg_text
+test.t1 analyze status OK
+# Must show 'using index' for latin1_bin and utf8_bin:
+explain
+select col1, hex(col1) from t1;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 ALL NULL NULL NULL NULL #
+select col1, hex(col1) from t1;
+col1 hex(col1)
+ab 6162
+a 6120
+a 61
+a 61202009
+# Must show 'using index' for latin1_bin and utf8_bin:
+explain
+select col1, hex(col1) from t1 force index(col1) where col1 < 'b';
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 # col1 col1 259 NULL # Using where
+select col1, hex(col1) from t1 force index(col1) where col1 < 'b';
+col1 hex(col1)
+a 61202009
+a 6120
+a 61
+ab 6162
+delete from t1;
+insert into t1 values(10, '', 'empty');
+insert into t1 values(11, repeat(' ', 8), '8x-space');
+insert into t1 values(12, repeat(' ', 16), '16x-space');
+insert into t1 values(13, repeat(' ', 24), '24x-space');
+insert into t1 values(14, concat(repeat(' ', 16),'a'), '16x-space-a');
+insert into t1 values(21, repeat(' ', 9), '9x-space');
+insert into t1 values(22, repeat(' ',17), '17x-space');
+insert into t1 values(23, repeat(' ',18), '18x-space');
+explain
+select pk, col1, hex(col1), length(col1) from t1;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 # NULL NULL NULL NULL #
+select pk, col1, hex(col1), length(col1) from t1;
+pk col1 hex(col1) length(col1)
+10 0
+11 2020202020202020 8
+12 20202020202020202020202020202020 16
+13 202020202020202020202020202020202020202020202020 24
+14 a 2020202020202020202020202020202061 17
+21 202020202020202020 9
+22 2020202020202020202020202020202020 17
+23 202020202020202020202020202020202020 18
+drop table t1;
+create table t1 (pk int primary key, a varchar(512), key(a)) engine=rocksdb;
+insert into t1 values (1, concat('a', repeat(' ', 300)));
+insert into t1 values (2, concat('b', repeat(' ', 300)));
+select pk,length(a) from t1 force index(a) where a < 'zz';
+pk length(a)
+1 301
+2 301
+select pk,length(a),rtrim(a) from t1 force index(a) where a < 'zz';
+pk length(a) rtrim(a)
+1 301 a
+2 301 b
+select pk,length(a),rtrim(a) from t1 ignore index(a) where a < 'zz';
+pk length(a) rtrim(a)
+1 301 a
+2 301 b
+drop table t1;
+#
+# Issue 257: Sort order for varchars is different between
+# MyISAM/InnoDB vs MyRocks
+#
+create table t1 (
+pk varchar(64) CHARACTER SET utf16 COLLATE utf16_bin,
+col1 varchar(64),
+primary key (pk)
+);
+insert into t1 values ('a','a');
+insert into t1 values ('a ', 'a-space');
+ERROR 23000: Duplicate entry 'a ' for key 'PRIMARY'
+insert into t1 values('b ', 'b-2x-space');
+insert into t1 values ('b', 'b');
+ERROR 23000: Duplicate entry 'b' for key 'PRIMARY'
+select pk, hex(pk), col1 from t1;
+pk hex(pk) col1
+a 0061 a
+b 006200200020 b-2x-space
+insert into t1 values ('a\t', 'a-tab');
+insert into t1 values ('a \t', 'a-space-tab');
+select pk, hex(pk), col1 from t1 order by pk;
+pk hex(pk) col1
+a 00610009 a-tab
+a 006100200009 a-space-tab
+a 0061 a
+b 006200200020 b-2x-space
+# Try longer values
+insert into t1 values (concat('a', repeat(' ',10)), 'a-10-x-space');
+ERROR 23000: Duplicate entry 'a ' for key 'PRIMARY'
+insert into t1 values (concat('c', repeat(' ',10)), 'c-10-x-space');
+select * from t1;
+pk col1
+a a-tab
+a a-space-tab
+a a
+b b-2x-space
+c c-10-x-space
+drop table t1;
+# Secondary index
+create table t1 (
+pk int not null primary key,
+col1 varchar(64) CHARACTER SET utf16 COLLATE utf16_bin,
+col2 varchar(64),
+key (col1)
+);
+insert into t1 values (0, 'ab', 'a-b');
+insert into t1 values (1, 'a ', 'a-space');
+insert into t1 values (2, 'a', 'a');
+insert into t1 values (3, 'a \t', 'a-tab');
+analyze table t1;
+Table Op Msg_type Msg_text
+test.t1 analyze status OK
+# Must show 'using index' for latin1_bin and utf8_bin:
+explain
+select col1, hex(col1) from t1;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 ALL NULL NULL NULL NULL #
+select col1, hex(col1) from t1;
+col1 hex(col1)
+ab 00610062
+a 00610020
+a 0061
+a 0061002000200009
+# Must show 'using index' for latin1_bin and utf8_bin:
+explain
+select col1, hex(col1) from t1 force index(col1) where col1 < 'b';
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 # col1 col1 259 NULL # Using where
+select col1, hex(col1) from t1 force index(col1) where col1 < 'b';
+col1 hex(col1)
+a 0061002000200009
+a 00610020
+a 0061
+ab 00610062
+delete from t1;
+insert into t1 values(10, '', 'empty');
+insert into t1 values(11, repeat(' ', 8), '8x-space');
+insert into t1 values(12, repeat(' ', 16), '16x-space');
+insert into t1 values(13, repeat(' ', 24), '24x-space');
+insert into t1 values(14, concat(repeat(' ', 16),'a'), '16x-space-a');
+insert into t1 values(21, repeat(' ', 9), '9x-space');
+insert into t1 values(22, repeat(' ',17), '17x-space');
+insert into t1 values(23, repeat(' ',18), '18x-space');
+explain
+select pk, col1, hex(col1), length(col1) from t1;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 # NULL NULL NULL NULL #
+select pk, col1, hex(col1), length(col1) from t1;
+pk col1 hex(col1) length(col1)
+10 0
+11 00200020002000200020002000200020 16
+12 0020002000200020002000200020002000200020002000200020002000200020 32
+13 002000200020002000200020002000200020002000200020002000200020002000200020002000200020002000200020 48
+14 a 00200020002000200020002000200020002000200020002000200020002000200061 34
+21 002000200020002000200020002000200020 18
+22 00200020002000200020002000200020002000200020002000200020002000200020 34
+23 002000200020002000200020002000200020002000200020002000200020002000200020 36
+drop table t1;
+create table t1 (pk int primary key, a varchar(512), key(a)) engine=rocksdb;
+insert into t1 values (1, concat('a', repeat(' ', 300)));
+insert into t1 values (2, concat('b', repeat(' ', 300)));
+select pk,length(a) from t1 force index(a) where a < 'zz';
+pk length(a)
+1 301
+2 301
+select pk,length(a),rtrim(a) from t1 force index(a) where a < 'zz';
+pk length(a) rtrim(a)
+1 301 a
+2 301 b
+select pk,length(a),rtrim(a) from t1 ignore index(a) where a < 'zz';
+pk length(a) rtrim(a)
+1 301 a
+2 301 b
+drop table t1;
+create table t1 (
+pk int primary key,
+col1 varchar(10) collate utf8mb4_bin not null,
+col2 varchar(20),
+key(col1)
+) engine=rocksdb;
+insert into t1 values (1, 'ab','ab');
+insert into t1 values (2, 'ab\0','ab0');
+select pk, hex(col1), col2 from t1 force index(col1) order by col1;
+pk hex(col1) col2
+2 616200 ab0
+1 6162 ab
+select pk, hex(col1), col2 from t1 ignore index(col1) order by col1;
+pk hex(col1) col2
+2 616200 ab0
+1 6162 ab
+drop table t1;
+create table t (id int primary key, email varchar(100), KEY email_i (email(30)));
+insert into t values (1, 'abcabcabcabcabcabcabcabcabcabcabc ');
+explain select 'email_i' as index_name, count(*) AS count from t force index(email_i);
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t ALL NULL NULL NULL NULL #
+select 'email_i' as index_name, count(*) AS count from t force index(email_i);
+index_name count
+email_i 1
+drop table t;
+set @save_rocksdb_checksums_pct = @@global.rocksdb_checksums_pct;
+set @save_rocksdb_verify_row_debug_checksums = @@session.rocksdb_verify_row_debug_checksums;
+set global rocksdb_checksums_pct = 100;
+set session rocksdb_verify_row_debug_checksums = on;
+create table t (id int primary key, email varchar(100), KEY email_i (email(30)));
+insert into t values (1, 'a');
+explain select 'email_i' as index_name, count(*) AS count from t force index(email_i);
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t ALL NULL NULL NULL NULL #
+select 'email_i' as index_name, count(*) AS count from t force index(email_i);
+index_name count
+email_i 1
+drop table t;
+set global rocksdb_checksums_pct = @save_rocksdb_checksums_pct;
+set session rocksdb_verify_row_debug_checksums = @save_rocksdb_verify_row_debug_checksums;
+drop table if exists t;
+Warnings:
+Note 1051 Unknown table 'test.t'
+create table t (h varchar(31) character set utf8 collate utf8_bin not null, i varchar(19) collate latin1_bin not null, primary key(i), key(h)) engine=rocksdb;
+insert into t(i,h) values('a','b');
+check table t;
+Table Op Msg_type Msg_text
+test.t check status OK
+alter table t modify h varchar(31) character set cp1257 collate cp1257_bin not null;
+check table t;
+Table Op Msg_type Msg_text
+test.t check status OK
+drop table t;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/unique_check.result b/storage/rocksdb/mysql-test/rocksdb/r/unique_check.result
new file mode 100644
index 00000000000..db9b119043a
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/unique_check.result
@@ -0,0 +1,117 @@
+connect con1, localhost, root,,;
+connect con2, localhost, root,,;
+connect con3, localhost, root,,;
+connection default;
+set debug_sync='RESET';
+drop table if exists t1;
+create table t1 (id int, value int, primary key (id)) engine=rocksdb;
+create table t2 (id int, id2 int, value int, primary key (id), unique key (id2)) engine=rocksdb;
+connection con1;
+begin;
+insert into t1 values (1,1);
+connection con2;
+set session rocksdb_lock_wait_timeout=50;
+begin;
+insert into t1 values (1,2);
+connection con1;
+commit;
+connection con2;
+ERROR 23000: Duplicate entry '1' for key 'PRIMARY'
+commit;
+select * from t1;
+id value
+1 1
+truncate table t1;
+connection con1;
+begin;
+insert into t2 values (1,1,1);
+connection con2;
+begin;
+insert into t2 values (2,1,2);
+connection con1;
+commit;
+connection con2;
+ERROR 23000: Duplicate entry '1' for key 'id2'
+commit;
+select * from t2;
+id id2 value
+1 1 1
+truncate table t2;
+connection con1;
+begin;
+insert into t1 values (1,1);
+connection con2;
+begin;
+insert into t1 values (1,2);
+connection con1;
+rollback;
+connection con2;
+commit;
+select * from t1;
+id value
+1 2
+truncate table t1;
+connection con1;
+begin;
+insert into t2 values (1,1,1);
+connection con2;
+begin;
+insert into t2 values (2,1,2);
+connection con1;
+rollback;
+connection con2;
+commit;
+select * from t2;
+id id2 value
+2 1 2
+truncate table t2;
+connection con1;
+set debug_sync='rocksdb.update_write_row_after_unique_check SIGNAL parked1 WAIT_FOR go';
+insert into t1 values (1,1);
+connection default;
+set debug_sync='now WAIT_FOR parked1';
+connection con2;
+set debug_sync='rocksdb.update_write_row_after_unique_check SIGNAL parked2 WAIT_FOR go';
+insert into t2 values (1,1,1);
+connection default;
+set debug_sync='now WAIT_FOR parked2';
+connection con3;
+set session rocksdb_lock_wait_timeout=1;
+insert into t1 values (1,2);
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction
+insert into t2 values (2,1,2);
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction
+connection default;
+set debug_sync='now SIGNAL go';
+connection con1;
+connection con2;
+connection default;
+insert into t1 values (1,2);
+ERROR 23000: Duplicate entry '1' for key 'PRIMARY'
+insert into t2 values (2,1,2);
+ERROR 23000: Duplicate entry '1' for key 'id2'
+select * from t1;
+id value
+1 1
+select * from t2;
+id id2 value
+1 1 1
+connection default;
+set debug_sync='RESET';
+disconnect con1;
+disconnect con2;
+disconnect con3;
+drop table t1, t2;
+connection default;
+drop table if exists t1,t2,t3;
+create table t1 (id int, value int, primary key (id)) engine=rocksdb;
+create table t2 (id int, id2 int, value int, primary key (id), unique key (id2)) engine=rocksdb;
+create table t3 (id int, value int) engine=rocksdb;
+SET @old_val = @@session.unique_checks;
+set @@session.unique_checks = FALSE;
+insert into t1 values (1, 1), (1, 2);
+insert into t2 values (1, 1, 1), (1, 2, 1);
+ERROR 23000: Duplicate entry '1' for key 'PRIMARY'
+insert into t3 values (1, 1), (1, 1);
+set @@session.unique_checks = @old_val;
+drop table t1, t2, t3;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/unique_sec.result b/storage/rocksdb/mysql-test/rocksdb/r/unique_sec.result
new file mode 100644
index 00000000000..a37e7f1cb31
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/unique_sec.result
@@ -0,0 +1,221 @@
+DROP TABLE IF EXISTS t1;
+connect con1,localhost,root,,;
+connect con2,localhost,root,,;
+connection con1;
+CREATE TABLE t1 (id1 INT NOT NULL, id2 INT NOT NULL, id3 VARCHAR(32),
+id4 INT, id5 VARCHAR(32),
+value1 INT, value2 INT, value3 VARCHAR(32),
+PRIMARY KEY (id1, id2) ,
+UNIQUE INDEX (id2, id1) ,
+UNIQUE INDEX (id2, id3, id4) ,
+INDEX (id1) ,
+INDEX (id3, id1) ,
+UNIQUE INDEX(id5) ,
+INDEX (id2, id5)) ENGINE=ROCKSDB;
+SELECT COUNT(*) FROM t1;
+COUNT(*)
+10
+# Test inserting a key that returns duplicate error
+INSERT INTO t1 VALUES (1, 1, 11, 11, 11, 11, 11, 11);
+ERROR 23000: Duplicate entry '1-1' for key 'PRIMARY'
+INSERT INTO t1 VALUES (5, 5, 11, 11, 11, 11, 11, 11);
+ERROR 23000: Duplicate entry '5-5' for key 'PRIMARY'
+INSERT INTO t1 VALUES (10, 10, 11, 11, 11, 11, 11, 11);
+ERROR 23000: Duplicate entry '10-10' for key 'PRIMARY'
+INSERT INTO t1 VALUES (11, 1, 1, 1, 11, 11, 11, 11);
+ERROR 23000: Duplicate entry '1-1-1' for key 'id2_2'
+INSERT INTO t1 VALUES (11, 5, 5, 5, 11, 11, 11, 11);
+ERROR 23000: Duplicate entry '5-5-5' for key 'id2_2'
+INSERT INTO t1 VALUES (11, 10, 10, 10, 11, 11, 11, 11);
+ERROR 23000: Duplicate entry '10-10-10' for key 'id2_2'
+INSERT INTO t1 VALUES (11, 11, 11, 11, 1, 11, 11, 11);
+ERROR 23000: Duplicate entry '1' for key 'id5'
+INSERT INTO t1 VALUES (11, 11, 11, 11, 5, 11, 11, 11);
+ERROR 23000: Duplicate entry '5' for key 'id5'
+INSERT INTO t1 VALUES (11, 11, 11, 11, 10, 11, 11, 11);
+ERROR 23000: Duplicate entry '10' for key 'id5'
+# Test updating a key that returns duplicate error
+UPDATE t1 SET id2=1, id3=1, id4=1 WHERE id1=2;
+ERROR 23000: Duplicate entry '1-1-1' for key 'id2_2'
+UPDATE t1 SET id2=1, id3=1, id4=1;
+ERROR 23000: Duplicate entry '1-1-1' for key 'id2_2'
+SELECT COUNT(*) FROM t1;
+COUNT(*)
+10
+# Test updating a key to itself
+UPDATE t1 set id2=id4;
+UPDATE t1 set id5=id3, value1=value2;
+UPDATE t1 set value3=value1;
+# Test modifying values should not cause duplicates
+UPDATE t1 SET value1=value3+1;
+UPDATE t1 SET value3=value3 div 2;
+UPDATE t1 SET value2=value3;
+SELECT COUNT(*) FROM t1;
+COUNT(*)
+10
+# Test NULL values are considered unique
+INSERT INTO t1 VALUES (20, 20, 20, NULL, NULL, 20, 20, 20);
+INSERT INTO t1 VALUES (21, 20, 20, NULL, NULL, 20, 20, 20);
+INSERT INTO t1 VALUES (22, 20, 20, NULL, NULL, 20, 20, 20);
+SELECT COUNT(*) FROM t1;
+COUNT(*)
+13
+# Adding multiple rows where one of the rows fail the duplicate
+# check should fail the whole statement
+INSERT INTO t1 VALUES (23, 23, 23, 23, 23, 23, 23, 23),
+(24, 24, 24, 24, 24, 24, 24, 24),
+(25, 10, 10, 10, 25, 25, 25, 25),
+(26, 26, 26, 26, 26, 26, 26, 26);
+ERROR 23000: Duplicate entry '10-10-10' for key 'id2_2'
+SELECT COUNT(*) FROM t1;
+COUNT(*)
+13
+connection con1;
+BEGIN;
+INSERT INTO t1 VALUES (30, 31, 32, 33, 34, 30, 30, 30);
+connection con2;
+BEGIN;
+SELECT COUNT(*) FROM t1;
+COUNT(*)
+13
+# Primary key should prevent duplicate on insert
+INSERT INTO t1 VALUES (30, 31, 30, 30, 30, 30, 30, 30);
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction
+# Primary key should prevent duplicate on update
+UPDATE t1 SET id1=30, id2=31 WHERE id2=10;
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction
+# Unique secondary key should prevent duplicate on insert
+INSERT INTO t1 VALUES (31, 31, 32, 33, 30, 30, 30, 30);
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction
+INSERT INTO t1 VALUES (32, 32, 32, 32, 34, 32, 32, 32);
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction
+# Unique secondary key should prevent duplicate on update
+UPDATE t1 SET id2=31, id3=32, id4=33 WHERE id2=8;
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction
+UPDATE t1 SET id5=34 WHERE id2=8;
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction
+# Adding multiple rows where one of the rows fail the duplicate
+# check should fail the whole statement
+INSERT INTO t1 VALUES (35, 35, 35, 35, 35, 35, 35, 35),
+(36, 36, 36, 36, 36, 36, 36, 36),
+(37, 31, 32, 33, 37, 37, 37, 37),
+(38, 38, 38, 38, 38, 38, 38, 38);
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction
+INSERT INTO t1 VALUES (35, 35, 35, 35, 35, 35, 35, 35),
+(36, 36, 36, 36, 36, 36, 36, 36),
+(37, 37, 37, 37, 34, 37, 37, 37),
+(38, 38, 38, 38, 38, 38, 38, 38);
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction
+# NULL values are unique and duplicates in value fields are ignored
+INSERT INTO t1 VALUES (37, 31, 32, NULL, 37, 37, 37, 37),
+(38, 31, 32, NULL, 38, 37, 37, 37),
+(39, 31, 32, NULL, 39, 37, 37, 37);
+SELECT COUNT(*) FROM t1;
+COUNT(*)
+16
+# Fail on duplicate key update for row added in our transaction
+UPDATE t1 SET id5=37 WHERE id1=38;
+ERROR 23000: Duplicate entry '37' for key 'id5'
+# Fail on lock timeout for row modified in another transaction
+UPDATE t1 SET id5=34 WHERE id1=38;
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction
+# NULL values are unique
+UPDATE t1 SET id5=NULL WHERE value1 > 37;
+connection con1;
+COMMIT;
+connection con2;
+COMMIT;
+connection con2;
+BEGIN;
+SELECT COUNT(*) FROM t1;
+COUNT(*)
+17
+connection con1;
+BEGIN;
+INSERT INTO t1 VALUES (40, 40, 40, 40, 40, 40, 40, 40);
+connection con2;
+# When transaction is pending, fail on lock acquisition
+INSERT INTO t1 VALUES (40, 40, 40, 40, 40, 40, 40, 40);
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction
+INSERT INTO t1 VALUES (41, 40, 40, 40, 40, 40, 40, 40);
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction
+SELECT COUNT(*) FROM t1;
+COUNT(*)
+17
+connection con1;
+COMMIT;
+connection con2;
+# When transaction is committed, fail on duplicate key
+INSERT INTO t1 VALUES (40, 40, 40, 40, 40, 40, 40, 40);
+Got one of the listed errors
+INSERT INTO t1 VALUES (41, 40, 40, 40, 40, 40, 40, 40);
+ERROR 23000: Duplicate entry '40-40-40' for key 'id2_2'
+ROLLBACK;
+SELECT * FROM t1;
+id1 id2 id3 id4 id5 value1 value2 value3
+1 1 1 1 1 2 0 0
+2 2 2 2 2 3 1 1
+3 3 3 3 3 4 1 1
+4 4 4 4 4 5 2 2
+5 5 5 5 5 6 2 2
+6 6 6 6 6 7 3 3
+7 7 7 7 7 8 3 3
+8 8 8 8 8 9 4 4
+9 9 9 9 9 10 4 4
+10 10 10 10 10 11 5 5
+20 20 20 NULL NULL 20 20 20
+21 20 20 NULL NULL 20 20 20
+22 20 20 NULL NULL 20 20 20
+30 31 32 33 34 30 30 30
+37 31 32 NULL 37 37 37 37
+38 31 32 NULL 38 37 37 37
+39 31 32 NULL 39 37 37 37
+40 40 40 40 40 40 40 40
+disconnect con1;
+disconnect con2;
+connection default;
+DROP TABLE t1;
+#
+# Issue #88: Creating unique index over column with duplicate values succeeds
+#
+create table t1 (pk int primary key, a int) engine=rocksdb;
+insert into t1 values
+(1, 1),
+(2, 2),
+(3, 3),
+(4, 1),
+(5, 5);
+alter table t1 add unique(a);
+ERROR 23000: Duplicate entry '1' for key 'a'
+drop table t1;
+#
+# Issue #111
+#
+CREATE TABLE t2 (pk int, a int, PRIMARY KEY (pk, a), UNIQUE KEY (a)) ENGINE=ROCKSDB PARTITION BY KEY (a) PARTITIONS 16;
+INSERT INTO t2 VALUES (1,1);
+INSERT INTO t2 VALUES (1,1);
+ERROR 23000: Duplicate entry '1-1' for key 'PRIMARY'
+INSERT INTO t2 VALUES (2,1);
+ERROR 23000: Duplicate entry '1' for key 'a'
+DROP TABLE t2;
+#
+# Issue #491 (https://github.com/facebook/mysql-5.6/issues/491)
+#
+CREATE TABLE t (a BLOB, PRIMARY KEY(a(2)), UNIQUE KEY (a(1))) engine=rocksdb;
+INSERT INTO t VALUES('a');
+CHECK TABLE t EXTENDED;
+Table Op Msg_type Msg_text
+test.t check status OK
+DROP TABLE t;
+CREATE TABLE t (a VARCHAR(255), PRIMARY KEY(a), UNIQUE KEY (a(1))) engine=rocksdb;
+INSERT INTO t VALUES('a');
+CHECK TABLE t EXTENDED;
+Table Op Msg_type Msg_text
+test.t check status OK
+DROP TABLE t;
+CREATE TABLE t (a VARCHAR(255), PRIMARY KEY(a(2)), UNIQUE KEY (a(1))) engine=rocksdb;
+INSERT INTO t VALUES('a');
+CHECK TABLE t EXTENDED;
+Table Op Msg_type Msg_text
+test.t check status OK
+DROP TABLE t;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/unique_sec_rev_cf.result b/storage/rocksdb/mysql-test/rocksdb/r/unique_sec_rev_cf.result
new file mode 100644
index 00000000000..210c74098af
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/unique_sec_rev_cf.result
@@ -0,0 +1,177 @@
+DROP TABLE IF EXISTS t1;
+connect con1,localhost,root,,;
+connect con2,localhost,root,,;
+connection con1;
+CREATE TABLE t1 (id1 INT NOT NULL, id2 INT NOT NULL, id3 VARCHAR(32),
+id4 INT, id5 VARCHAR(32),
+value1 INT, value2 INT, value3 VARCHAR(32),
+PRIMARY KEY (id1, id2) COMMENT 'rev:cf',
+UNIQUE INDEX (id2, id1) COMMENT 'rev:cf',
+UNIQUE INDEX (id2, id3, id4) COMMENT 'rev:cf',
+INDEX (id1) COMMENT 'rev:cf',
+INDEX (id3, id1) COMMENT 'rev:cf',
+UNIQUE INDEX(id5) COMMENT 'rev:cf',
+INDEX (id2, id5)) ENGINE=ROCKSDB;
+SELECT COUNT(*) FROM t1;
+COUNT(*)
+10
+# Test inserting a key that returns duplicate error
+INSERT INTO t1 VALUES (1, 1, 11, 11, 11, 11, 11, 11);
+ERROR 23000: Duplicate entry '1-1' for key 'PRIMARY'
+INSERT INTO t1 VALUES (5, 5, 11, 11, 11, 11, 11, 11);
+ERROR 23000: Duplicate entry '5-5' for key 'PRIMARY'
+INSERT INTO t1 VALUES (10, 10, 11, 11, 11, 11, 11, 11);
+ERROR 23000: Duplicate entry '10-10' for key 'PRIMARY'
+INSERT INTO t1 VALUES (11, 1, 1, 1, 11, 11, 11, 11);
+ERROR 23000: Duplicate entry '1-1-1' for key 'id2_2'
+INSERT INTO t1 VALUES (11, 5, 5, 5, 11, 11, 11, 11);
+ERROR 23000: Duplicate entry '5-5-5' for key 'id2_2'
+INSERT INTO t1 VALUES (11, 10, 10, 10, 11, 11, 11, 11);
+ERROR 23000: Duplicate entry '10-10-10' for key 'id2_2'
+INSERT INTO t1 VALUES (11, 11, 11, 11, 1, 11, 11, 11);
+ERROR 23000: Duplicate entry '1' for key 'id5'
+INSERT INTO t1 VALUES (11, 11, 11, 11, 5, 11, 11, 11);
+ERROR 23000: Duplicate entry '5' for key 'id5'
+INSERT INTO t1 VALUES (11, 11, 11, 11, 10, 11, 11, 11);
+ERROR 23000: Duplicate entry '10' for key 'id5'
+# Test updating a key that returns duplicate error
+UPDATE t1 SET id2=1, id3=1, id4=1 WHERE id1=2;
+ERROR 23000: Duplicate entry '1-1-1' for key 'id2_2'
+UPDATE t1 SET id2=1, id3=1, id4=1;
+ERROR 23000: Duplicate entry '1-1-1' for key 'id2_2'
+SELECT COUNT(*) FROM t1;
+COUNT(*)
+10
+# Test updating a key to itself
+UPDATE t1 set id2=id4;
+UPDATE t1 set id5=id3, value1=value2;
+UPDATE t1 set value3=value1;
+# Test modifying values should not cause duplicates
+UPDATE t1 SET value1=value3+1;
+UPDATE t1 SET value3=value3 div 2;
+UPDATE t1 SET value2=value3;
+SELECT COUNT(*) FROM t1;
+COUNT(*)
+10
+# Test NULL values are considered unique
+INSERT INTO t1 VALUES (20, 20, 20, NULL, NULL, 20, 20, 20);
+INSERT INTO t1 VALUES (21, 20, 20, NULL, NULL, 20, 20, 20);
+INSERT INTO t1 VALUES (22, 20, 20, NULL, NULL, 20, 20, 20);
+SELECT COUNT(*) FROM t1;
+COUNT(*)
+13
+# Adding multiple rows where one of the rows fail the duplicate
+# check should fail the whole statement
+INSERT INTO t1 VALUES (23, 23, 23, 23, 23, 23, 23, 23),
+(24, 24, 24, 24, 24, 24, 24, 24),
+(25, 10, 10, 10, 25, 25, 25, 25),
+(26, 26, 26, 26, 26, 26, 26, 26);
+ERROR 23000: Duplicate entry '10-10-10' for key 'id2_2'
+SELECT COUNT(*) FROM t1;
+COUNT(*)
+13
+connection con1;
+BEGIN;
+INSERT INTO t1 VALUES (30, 31, 32, 33, 34, 30, 30, 30);
+connection con2;
+BEGIN;
+SELECT COUNT(*) FROM t1;
+COUNT(*)
+13
+# Primary key should prevent duplicate on insert
+INSERT INTO t1 VALUES (30, 31, 30, 30, 30, 30, 30, 30);
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction
+# Primary key should prevent duplicate on update
+UPDATE t1 SET id1=30, id2=31 WHERE id2=10;
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction
+# Unique secondary key should prevent duplicate on insert
+INSERT INTO t1 VALUES (31, 31, 32, 33, 30, 30, 30, 30);
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction
+INSERT INTO t1 VALUES (32, 32, 32, 32, 34, 32, 32, 32);
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction
+# Unique secondary key should prevent duplicate on update
+UPDATE t1 SET id2=31, id3=32, id4=33 WHERE id2=8;
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction
+UPDATE t1 SET id5=34 WHERE id2=8;
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction
+# Adding multiple rows where one of the rows fail the duplicate
+# check should fail the whole statement
+INSERT INTO t1 VALUES (35, 35, 35, 35, 35, 35, 35, 35),
+(36, 36, 36, 36, 36, 36, 36, 36),
+(37, 31, 32, 33, 37, 37, 37, 37),
+(38, 38, 38, 38, 38, 38, 38, 38);
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction
+INSERT INTO t1 VALUES (35, 35, 35, 35, 35, 35, 35, 35),
+(36, 36, 36, 36, 36, 36, 36, 36),
+(37, 37, 37, 37, 34, 37, 37, 37),
+(38, 38, 38, 38, 38, 38, 38, 38);
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction
+# NULL values are unique and duplicates in value fields are ignored
+INSERT INTO t1 VALUES (37, 31, 32, NULL, 37, 37, 37, 37),
+(38, 31, 32, NULL, 38, 37, 37, 37),
+(39, 31, 32, NULL, 39, 37, 37, 37);
+SELECT COUNT(*) FROM t1;
+COUNT(*)
+16
+# Fail on duplicate key update for row added in our transaction
+UPDATE t1 SET id5=37 WHERE id1=38;
+ERROR 23000: Duplicate entry '37' for key 'id5'
+# Fail on lock timeout for row modified in another transaction
+UPDATE t1 SET id5=34 WHERE id1=38;
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction
+# NULL values are unique
+UPDATE t1 SET id5=NULL WHERE value1 > 37;
+connection con1;
+COMMIT;
+connection con2;
+COMMIT;
+connection con2;
+BEGIN;
+SELECT COUNT(*) FROM t1;
+COUNT(*)
+17
+connection con1;
+BEGIN;
+INSERT INTO t1 VALUES (40, 40, 40, 40, 40, 40, 40, 40);
+connection con2;
+# When transaction is pending, fail on lock acquisition
+INSERT INTO t1 VALUES (40, 40, 40, 40, 40, 40, 40, 40);
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction
+INSERT INTO t1 VALUES (41, 40, 40, 40, 40, 40, 40, 40);
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction
+SELECT COUNT(*) FROM t1;
+COUNT(*)
+17
+connection con1;
+COMMIT;
+connection con2;
+# When transaction is committed, fail on duplicate key
+INSERT INTO t1 VALUES (40, 40, 40, 40, 40, 40, 40, 40);
+Got one of the listed errors
+INSERT INTO t1 VALUES (41, 40, 40, 40, 40, 40, 40, 40);
+ERROR 23000: Duplicate entry '40-40-40' for key 'id2_2'
+ROLLBACK;
+SELECT * FROM t1;
+id1 id2 id3 id4 id5 value1 value2 value3
+40 40 40 40 40 40 40 40
+39 31 32 NULL 39 37 37 37
+38 31 32 NULL 38 37 37 37
+37 31 32 NULL 37 37 37 37
+30 31 32 33 34 30 30 30
+22 20 20 NULL NULL 20 20 20
+21 20 20 NULL NULL 20 20 20
+20 20 20 NULL NULL 20 20 20
+10 10 10 10 10 11 5 5
+9 9 9 9 9 10 4 4
+8 8 8 8 8 9 4 4
+7 7 7 7 7 8 3 3
+6 6 6 6 6 7 3 3
+5 5 5 5 5 6 2 2
+4 4 4 4 4 5 2 2
+3 3 3 3 3 4 1 1
+2 2 2 2 2 3 1 1
+1 1 1 1 1 2 0 0
+disconnect con1;
+disconnect con2;
+connection default;
+DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/unsupported_tx_isolations.result b/storage/rocksdb/mysql-test/rocksdb/r/unsupported_tx_isolations.result
new file mode 100644
index 00000000000..32776e19767
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/unsupported_tx_isolations.result
@@ -0,0 +1,18 @@
+DROP TABLE IF EXISTS t1;
+create table t1 (id int primary key, value int) engine=rocksdb;
+insert into t1 values (1,1);
+SET session transaction isolation level read uncommitted;
+begin;
+insert into t1 values (2,1);
+ERROR HY000: MyRocks supports only READ COMMITTED and REPEATABLE READ isolation levels. Please change from current isolation level READ-UNCOMMITTED
+select * from t1 where id=1;
+ERROR HY000: MyRocks supports only READ COMMITTED and REPEATABLE READ isolation levels. Please change from current isolation level READ-UNCOMMITTED
+rollback;
+SET session transaction isolation level serializable;
+begin;
+insert into t1 values (2,1);
+ERROR HY000: MyRocks supports only READ COMMITTED and REPEATABLE READ isolation levels. Please change from current isolation level SERIALIZABLE
+select * from t1 where id=1;
+ERROR HY000: MyRocks supports only READ COMMITTED and REPEATABLE READ isolation levels. Please change from current isolation level SERIALIZABLE
+rollback;
+DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/update.result b/storage/rocksdb/mysql-test/rocksdb/r/update.result
new file mode 100644
index 00000000000..791fddd27ef
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/update.result
@@ -0,0 +1,121 @@
+DROP TABLE IF EXISTS t1;
+CREATE TABLE t1 (a INT, b CHAR(8), pk INT AUTO_INCREMENT PRIMARY KEY) ENGINE=rocksdb;
+INSERT INTO t1 (a,b) VALUES (1,'a'),(2,'b'),(3,'c'),(4,'d'),(5,'e'),(10000,'foobar');
+INSERT INTO t1 (a,b) SELECT a, b FROM t1;
+UPDATE t1 SET a=a+100;
+SELECT a,b FROM t1;
+a b
+101 a
+101 a
+10100 foobar
+10100 foobar
+102 b
+102 b
+103 c
+103 c
+104 d
+104 d
+105 e
+105 e
+UPDATE t1 SET a=a-100, b=DEFAULT WHERE a>100;
+SELECT a,b FROM t1;
+a b
+1 NULL
+1 NULL
+10000 NULL
+10000 NULL
+2 NULL
+2 NULL
+3 NULL
+3 NULL
+4 NULL
+4 NULL
+5 NULL
+5 NULL
+UPDATE t1 SET b = 'update' WHERE a <= 4 ORDER BY b DESC, a ASC LIMIT 1;
+SELECT a,b FROM t1;
+a b
+1 NULL
+1 update
+10000 NULL
+10000 NULL
+2 NULL
+2 NULL
+3 NULL
+3 NULL
+4 NULL
+4 NULL
+5 NULL
+5 NULL
+DROP TABLE t1;
+CREATE TABLE t1 (a INT, b CHAR(8), pk INT AUTO_INCREMENT PRIMARY KEY) ENGINE=rocksdb;
+INSERT INTO t1 (a,b) VALUES (1,'a'),(2,'b'),(3,'c'),(4,'d'),(5,'e'),(10000,'foobar');
+INSERT INTO t1 (a,b) SELECT a, b FROM t1;
+BEGIN;
+UPDATE t1 SET a=a+100;
+UPDATE t1 SET a=a-50, b=DEFAULT WHERE a>100;
+COMMIT;
+SELECT * FROM t1 ORDER BY pk;
+a b pk
+10050 NULL 12
+10050 NULL 6
+51 NULL 1
+51 NULL 7
+52 NULL 2
+52 NULL 8
+53 NULL 3
+53 NULL 9
+54 NULL 10
+54 NULL 4
+55 NULL 11
+55 NULL 5
+BEGIN;
+UPDATE t1 SET b = 'update' WHERE a <= 4 ORDER BY a DESC, b ASC LIMIT 3;
+UPDATE t1 SET b = '';
+ROLLBACK;
+SELECT * FROM t1 ORDER BY pk;
+a b pk
+51 NULL 1
+52 NULL 2
+53 NULL 3
+54 NULL 4
+55 NULL 5
+10050 NULL 6
+51 NULL 7
+52 NULL 8
+53 NULL 9
+54 NULL 10
+55 NULL 11
+10050 NULL 12
+BEGIN;
+UPDATE t1 SET b = 'update2' WHERE a <= 100;
+SAVEPOINT spt1;
+UPDATE t1 SET b = '';
+ROLLBACK TO SAVEPOINT spt1;
+ERROR HY000: MyRocks currently does not support ROLLBACK TO SAVEPOINT if modifying rows.
+UPDATE t1 SET b = 'upd' WHERE a = 10050;
+COMMIT;
+ERROR HY000: This transaction was rolled back and cannot be committed. Only supported operation is to roll it back, so all pending changes will be discarded. Please restart another transaction.
+SELECT * FROM t1 ORDER BY pk;
+a b pk
+51 NULL 1
+52 NULL 2
+53 NULL 3
+54 NULL 4
+55 NULL 5
+10050 NULL 6
+51 NULL 7
+52 NULL 8
+53 NULL 9
+54 NULL 10
+55 NULL 11
+10050 NULL 12
+DROP TABLE t1;
+CREATE TABLE t1 (a INT, b CHAR(8), UNIQUE INDEX(a)) ENGINE=RocksDB;
+INSERT INTO t1 (a,b) VALUES (1,'foo'),(2,'bar');
+UPDATE t1 SET a=a+100;
+SELECT * FROM t1;
+a b
+101 foo
+102 bar
+DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/update_ignore.result b/storage/rocksdb/mysql-test/rocksdb/r/update_ignore.result
new file mode 100644
index 00000000000..d36371be45b
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/update_ignore.result
@@ -0,0 +1,57 @@
+DROP TABLE IF EXISTS t1,t2;
+CREATE TABLE t1 (a INT, b CHAR(8), pk INT AUTO_INCREMENT PRIMARY KEY) ENGINE=rocksdb;
+INSERT INTO t1 (a,b) VALUES (1,'a'),(2,'b'),(3,'c'),(4,'d'),(5,'e'),(10000,'foobar');
+INSERT INTO t1 (a,b) SELECT a, b FROM t1;
+CREATE TABLE t2 (c CHAR(8), d INT, pk INT AUTO_INCREMENT PRIMARY KEY) ENGINE=rocksdb;
+INSERT INTO t2 (c,d) SELECT b, a FROM t1;
+UPDATE IGNORE t1 SET b = 'upd1' WHERE b IS NOT NULL ORDER BY a LIMIT 1;
+SELECT a,b FROM t1 ORDER BY pk;
+a b
+1 upd1
+2 b
+3 c
+4 d
+5 e
+10000 foobar
+1 a
+2 b
+3 c
+4 d
+5 e
+10000 foobar
+UPDATE t1, t2 SET b = 'upd2a', c = 'upd2b'
+WHERE c < b OR a != ( SELECT 1 UNION SELECT 2 );
+ERROR 21000: Subquery returns more than 1 row
+UPDATE IGNORE t1, t2 SET b = 'upd2a', c = 'upd2b'
+WHERE c < b OR a != ( SELECT 1 UNION SELECT 2 );
+Warnings:
+Warning 1242 Subquery returns more than 1 row
+SELECT a,b FROM t1 ORDER BY pk;
+a b
+1 upd2a
+2 upd2a
+3 upd2a
+4 upd2a
+5 upd2a
+10000 upd2a
+1 a
+2 upd2a
+3 upd2a
+4 upd2a
+5 upd2a
+10000 upd2a
+SELECT c,d FROM t2 ORDER BY pk;
+c d
+upd2b 1
+upd2b 2
+upd2b 3
+upd2b 4
+upd2b 5
+upd2b 10000
+upd2b 1
+upd2b 2
+upd2b 3
+upd2b 4
+upd2b 5
+upd2b 10000
+DROP TABLE t1, t2;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/update_multi.result b/storage/rocksdb/mysql-test/rocksdb/r/update_multi.result
new file mode 100644
index 00000000000..294c07b2a79
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/update_multi.result
@@ -0,0 +1,691 @@
+DROP TABLE IF EXISTS t1,t2;
+CREATE TABLE t1 (a INT, b CHAR(8), pk INT AUTO_INCREMENT
+PRIMARY KEY) ENGINE=rocksdb;
+INSERT INTO t1 (a,b) VALUES
+(1,'a'),(2,'b'),(3,'c'),(4,'d'),(5,'e'),(10000,'foobar');
+INSERT INTO t1 (a,b) SELECT a, b FROM t1;
+CREATE TABLE t2 (c CHAR(8), d INT, pk INT AUTO_INCREMENT
+PRIMARY KEY) ENGINE=rocksdb;
+INSERT INTO t2 (c,d) SELECT b, a FROM t1;
+UPDATE t1 STRAIGHT_JOIN t2 SET t1.a = t2.d+100, t2.c = 'multi'
+WHERE c < b AND a + d != 1;
+SELECT a,b FROM t1;
+a b
+1 a
+1 a
+101 b
+101 b
+101 c
+101 c
+101 d
+101 d
+101 e
+101 e
+101 foobar
+101 foobar
+SELECT c,d FROM t2;
+c d
+foobar 10000
+foobar 10000
+multi 1
+multi 1
+multi 2
+multi 2
+multi 3
+multi 3
+multi 4
+multi 4
+multi 5
+multi 5
+UPDATE t2 STRAIGHT_JOIN t1 SET t2.d = DEFAULT
+WHERE c = 'foobar' and b = c;
+SELECT a,b FROM t1;
+a b
+1 a
+1 a
+101 b
+101 b
+101 c
+101 c
+101 d
+101 d
+101 e
+101 e
+101 foobar
+101 foobar
+SELECT c,d FROM t2;
+c d
+foobar NULL
+foobar NULL
+multi 1
+multi 1
+multi 2
+multi 2
+multi 3
+multi 3
+multi 4
+multi 4
+multi 5
+multi 5
+DROP TABLE t1, t2;
+CREATE TABLE t1 (a INT, b CHAR(8), pk INT AUTO_INCREMENT
+PRIMARY KEY) ENGINE=rocksdb;
+INSERT INTO t1 (a,b) VALUES
+(1,'a'),(2,'b'),(3,'c'),(4,'d'),(5,'e'),(10000,'foobar');
+INSERT INTO t1 (a,b) SELECT a, b FROM t1;
+CREATE TABLE t2 (c CHAR(8), d INT, pk INT AUTO_INCREMENT
+PRIMARY KEY) ENGINE=rocksdb;
+INSERT INTO t2 (c,d) SELECT b, a FROM t1;
+UPDATE t1 STRAIGHT_JOIN t2 SET t1.a = t2.d+100, t2.c = 'multi'
+WHERE c < b AND a + d != 1;
+SELECT a,b FROM t1;
+a b
+1 a
+1 a
+101 b
+101 b
+101 c
+101 c
+101 d
+101 d
+101 e
+101 e
+101 foobar
+101 foobar
+SELECT c,d FROM t2;
+c d
+foobar 10000
+foobar 10000
+multi 1
+multi 1
+multi 2
+multi 2
+multi 3
+multi 3
+multi 4
+multi 4
+multi 5
+multi 5
+UPDATE t2 STRAIGHT_JOIN t1 SET t2.d = DEFAULT
+WHERE c = 'foobar' and b = c;
+SELECT a,b FROM t1;
+a b
+1 a
+1 a
+101 b
+101 b
+101 c
+101 c
+101 d
+101 d
+101 e
+101 e
+101 foobar
+101 foobar
+SELECT c,d FROM t2;
+c d
+foobar NULL
+foobar NULL
+multi 1
+multi 1
+multi 2
+multi 2
+multi 3
+multi 3
+multi 4
+multi 4
+multi 5
+multi 5
+DROP TABLE t1, t2;
+CREATE TABLE t1 (a INT, b CHAR(8), pk INT AUTO_INCREMENT
+PRIMARY KEY) ENGINE=rocksdb;
+INSERT INTO t1 (a,b) VALUES
+(1,'a'),(2,'b'),(3,'c'),(4,'d'),(5,'e'),(10000,'foobar');
+INSERT INTO t1 (a,b) SELECT a, b FROM t1;
+CREATE TABLE t2 (c CHAR(8), d INT, pk INT AUTO_INCREMENT
+PRIMARY KEY) ENGINE=rocksdb;
+INSERT INTO t2 (c,d) SELECT b, a FROM t1;
+UPDATE t1 STRAIGHT_JOIN t2 SET t1.a = t2.d+100, t2.c = 'multi'
+WHERE c < b AND a + d != 1;
+SELECT a,b FROM t1;
+a b
+1 a
+1 a
+101 b
+101 b
+101 c
+101 c
+101 d
+101 d
+101 e
+101 e
+101 foobar
+101 foobar
+SELECT c,d FROM t2;
+c d
+foobar 10000
+foobar 10000
+multi 1
+multi 1
+multi 2
+multi 2
+multi 3
+multi 3
+multi 4
+multi 4
+multi 5
+multi 5
+UPDATE t2 STRAIGHT_JOIN t1 SET t2.d = DEFAULT
+WHERE c = 'foobar' and b = c;
+SELECT a,b FROM t1;
+a b
+1 a
+1 a
+101 b
+101 b
+101 c
+101 c
+101 d
+101 d
+101 e
+101 e
+101 foobar
+101 foobar
+SELECT c,d FROM t2;
+c d
+foobar NULL
+foobar NULL
+multi 1
+multi 1
+multi 2
+multi 2
+multi 3
+multi 3
+multi 4
+multi 4
+multi 5
+multi 5
+DROP TABLE t1, t2;
+CREATE TABLE t1 (a INT, b CHAR(8), pk INT AUTO_INCREMENT
+PRIMARY KEY) ENGINE=rocksdb;
+INSERT INTO t1 (a,b) VALUES
+(1,'a'),(2,'b'),(3,'c'),(4,'d'),(5,'e'),(10000,'foobar');
+INSERT INTO t1 (a,b) SELECT a, b FROM t1;
+CREATE TABLE t2 (c CHAR(8), d INT, pk INT AUTO_INCREMENT
+PRIMARY KEY) ENGINE=rocksdb;
+INSERT INTO t2 (c,d) SELECT b, a FROM t1;
+UPDATE t1 STRAIGHT_JOIN t2 SET t1.a = t2.d+100, t2.c = 'multi'
+WHERE c < b AND a + d != 1;
+SELECT a,b FROM t1;
+a b
+1 a
+1 a
+101 b
+101 b
+101 c
+101 c
+101 d
+101 d
+101 e
+101 e
+101 foobar
+101 foobar
+SELECT c,d FROM t2;
+c d
+foobar 10000
+foobar 10000
+multi 1
+multi 1
+multi 2
+multi 2
+multi 3
+multi 3
+multi 4
+multi 4
+multi 5
+multi 5
+UPDATE t2 STRAIGHT_JOIN t1 SET t2.d = DEFAULT
+WHERE c = 'foobar' and b = c;
+SELECT a,b FROM t1;
+a b
+1 a
+1 a
+101 b
+101 b
+101 c
+101 c
+101 d
+101 d
+101 e
+101 e
+101 foobar
+101 foobar
+SELECT c,d FROM t2;
+c d
+foobar NULL
+foobar NULL
+multi 1
+multi 1
+multi 2
+multi 2
+multi 3
+multi 3
+multi 4
+multi 4
+multi 5
+multi 5
+DROP TABLE t1, t2;
+CREATE TABLE t1 (a INT, b CHAR(8), pk INT AUTO_INCREMENT
+PRIMARY KEY) ENGINE=rocksdb;
+INSERT INTO t1 (a,b) VALUES
+(1,'a'),(2,'b'),(3,'c'),(4,'d'),(5,'e'),(10000,'foobar');
+INSERT INTO t1 (a,b) SELECT a, b FROM t1;
+CREATE TABLE t2 (c CHAR(8), d INT, pk INT AUTO_INCREMENT
+PRIMARY KEY) ENGINE=rocksdb;
+INSERT INTO t2 (c,d) SELECT b, a FROM t1;
+UPDATE t1 STRAIGHT_JOIN t2 SET t1.a = t2.d+100, t2.c = 'multi'
+WHERE c < b AND a + d != 1;
+SELECT a,b FROM t1;
+a b
+1 a
+1 a
+101 b
+101 b
+101 c
+101 c
+101 d
+101 d
+101 e
+101 e
+101 foobar
+101 foobar
+SELECT c,d FROM t2;
+c d
+foobar 10000
+foobar 10000
+multi 1
+multi 1
+multi 2
+multi 2
+multi 3
+multi 3
+multi 4
+multi 4
+multi 5
+multi 5
+UPDATE t2 STRAIGHT_JOIN t1 SET t2.d = DEFAULT
+WHERE c = 'foobar' and b = c;
+SELECT a,b FROM t1;
+a b
+1 a
+1 a
+101 b
+101 b
+101 c
+101 c
+101 d
+101 d
+101 e
+101 e
+101 foobar
+101 foobar
+SELECT c,d FROM t2;
+c d
+foobar NULL
+foobar NULL
+multi 1
+multi 1
+multi 2
+multi 2
+multi 3
+multi 3
+multi 4
+multi 4
+multi 5
+multi 5
+DROP TABLE t1, t2;
+CREATE TABLE t1 (a INT, b CHAR(8), pk INT AUTO_INCREMENT
+PRIMARY KEY) ENGINE=rocksdb;
+INSERT INTO t1 (a,b) VALUES
+(1,'a'),(2,'b'),(3,'c'),(4,'d'),(5,'e'),(10000,'foobar');
+INSERT INTO t1 (a,b) SELECT a, b FROM t1;
+CREATE TABLE t2 (c CHAR(8), d INT, pk INT AUTO_INCREMENT
+PRIMARY KEY) ENGINE=rocksdb;
+INSERT INTO t2 (c,d) SELECT b, a FROM t1;
+UPDATE t1 STRAIGHT_JOIN t2 SET t1.a = t2.d+100, t2.c = 'multi'
+WHERE c < b AND a + d != 1;
+SELECT a,b FROM t1;
+a b
+1 a
+1 a
+101 b
+101 b
+101 c
+101 c
+101 d
+101 d
+101 e
+101 e
+101 foobar
+101 foobar
+SELECT c,d FROM t2;
+c d
+foobar 10000
+foobar 10000
+multi 1
+multi 1
+multi 2
+multi 2
+multi 3
+multi 3
+multi 4
+multi 4
+multi 5
+multi 5
+UPDATE t2 STRAIGHT_JOIN t1 SET t2.d = DEFAULT
+WHERE c = 'foobar' and b = c;
+SELECT a,b FROM t1;
+a b
+1 a
+1 a
+101 b
+101 b
+101 c
+101 c
+101 d
+101 d
+101 e
+101 e
+101 foobar
+101 foobar
+SELECT c,d FROM t2;
+c d
+foobar NULL
+foobar NULL
+multi 1
+multi 1
+multi 2
+multi 2
+multi 3
+multi 3
+multi 4
+multi 4
+multi 5
+multi 5
+DROP TABLE t1, t2;
+CREATE TABLE t1 (a INT, b CHAR(8), pk INT AUTO_INCREMENT
+PRIMARY KEY) ENGINE=rocksdb;
+INSERT INTO t1 (a,b) VALUES
+(1,'a'),(2,'b'),(3,'c'),(4,'d'),(5,'e'),(10000,'foobar');
+INSERT INTO t1 (a,b) SELECT a, b FROM t1;
+CREATE TABLE t2 (c CHAR(8), d INT, pk INT AUTO_INCREMENT
+PRIMARY KEY) ENGINE=rocksdb;
+INSERT INTO t2 (c,d) SELECT b, a FROM t1;
+UPDATE t1 STRAIGHT_JOIN t2 SET t1.a = t2.d+100, t2.c = 'multi'
+WHERE c < b AND a + d != 1;
+SELECT a,b FROM t1;
+a b
+1 a
+1 a
+101 b
+101 b
+101 c
+101 c
+101 d
+101 d
+101 e
+101 e
+101 foobar
+101 foobar
+SELECT c,d FROM t2;
+c d
+foobar 10000
+foobar 10000
+multi 1
+multi 1
+multi 2
+multi 2
+multi 3
+multi 3
+multi 4
+multi 4
+multi 5
+multi 5
+UPDATE t2 STRAIGHT_JOIN t1 SET t2.d = DEFAULT
+WHERE c = 'foobar' and b = c;
+SELECT a,b FROM t1;
+a b
+1 a
+1 a
+101 b
+101 b
+101 c
+101 c
+101 d
+101 d
+101 e
+101 e
+101 foobar
+101 foobar
+SELECT c,d FROM t2;
+c d
+foobar NULL
+foobar NULL
+multi 1
+multi 1
+multi 2
+multi 2
+multi 3
+multi 3
+multi 4
+multi 4
+multi 5
+multi 5
+DROP TABLE t1, t2;
+CREATE TABLE t1 (a INT, b CHAR(8), pk INT AUTO_INCREMENT
+PRIMARY KEY) ENGINE=rocksdb;
+INSERT INTO t1 (a,b) VALUES
+(1,'a'),(2,'b'),(3,'c'),(4,'d'),(5,'e'),(10000,'foobar');
+INSERT INTO t1 (a,b) SELECT a, b FROM t1;
+CREATE TABLE t2 (c CHAR(8), d INT, pk INT AUTO_INCREMENT
+PRIMARY KEY) ENGINE=rocksdb;
+INSERT INTO t2 (c,d) SELECT b, a FROM t1;
+UPDATE t1 STRAIGHT_JOIN t2 SET t1.a = t2.d+100, t2.c = 'multi'
+WHERE c < b AND a + d != 1;
+SELECT a,b FROM t1;
+a b
+1 a
+1 a
+101 b
+101 b
+101 c
+101 c
+101 d
+101 d
+101 e
+101 e
+101 foobar
+101 foobar
+SELECT c,d FROM t2;
+c d
+foobar 10000
+foobar 10000
+multi 1
+multi 1
+multi 2
+multi 2
+multi 3
+multi 3
+multi 4
+multi 4
+multi 5
+multi 5
+UPDATE t2 STRAIGHT_JOIN t1 SET t2.d = DEFAULT
+WHERE c = 'foobar' and b = c;
+SELECT a,b FROM t1;
+a b
+1 a
+1 a
+101 b
+101 b
+101 c
+101 c
+101 d
+101 d
+101 e
+101 e
+101 foobar
+101 foobar
+SELECT c,d FROM t2;
+c d
+foobar NULL
+foobar NULL
+multi 1
+multi 1
+multi 2
+multi 2
+multi 3
+multi 3
+multi 4
+multi 4
+multi 5
+multi 5
+DROP TABLE t1, t2;
+CREATE TABLE t1 (a INT, b CHAR(8), pk INT AUTO_INCREMENT
+PRIMARY KEY) ENGINE=rocksdb;
+INSERT INTO t1 (a,b) VALUES
+(1,'a'),(2,'b'),(3,'c'),(4,'d'),(5,'e'),(10000,'foobar');
+INSERT INTO t1 (a,b) SELECT a, b FROM t1;
+CREATE TABLE t2 (c CHAR(8), d INT, pk INT AUTO_INCREMENT
+PRIMARY KEY) ENGINE=rocksdb;
+INSERT INTO t2 (c,d) SELECT b, a FROM t1;
+UPDATE t1 STRAIGHT_JOIN t2 SET t1.a = t2.d+100, t2.c = 'multi'
+WHERE c < b AND a + d != 1;
+SELECT a,b FROM t1;
+a b
+1 a
+1 a
+101 b
+101 b
+101 c
+101 c
+101 d
+101 d
+101 e
+101 e
+101 foobar
+101 foobar
+SELECT c,d FROM t2;
+c d
+foobar 10000
+foobar 10000
+multi 1
+multi 1
+multi 2
+multi 2
+multi 3
+multi 3
+multi 4
+multi 4
+multi 5
+multi 5
+UPDATE t2 STRAIGHT_JOIN t1 SET t2.d = DEFAULT
+WHERE c = 'foobar' and b = c;
+SELECT a,b FROM t1;
+a b
+1 a
+1 a
+101 b
+101 b
+101 c
+101 c
+101 d
+101 d
+101 e
+101 e
+101 foobar
+101 foobar
+SELECT c,d FROM t2;
+c d
+foobar NULL
+foobar NULL
+multi 1
+multi 1
+multi 2
+multi 2
+multi 3
+multi 3
+multi 4
+multi 4
+multi 5
+multi 5
+DROP TABLE t1, t2;
+CREATE TABLE t1 (a INT, b CHAR(8), pk INT AUTO_INCREMENT
+PRIMARY KEY) ENGINE=rocksdb;
+INSERT INTO t1 (a,b) VALUES
+(1,'a'),(2,'b'),(3,'c'),(4,'d'),(5,'e'),(10000,'foobar');
+INSERT INTO t1 (a,b) SELECT a, b FROM t1;
+CREATE TABLE t2 (c CHAR(8), d INT, pk INT AUTO_INCREMENT
+PRIMARY KEY) ENGINE=rocksdb;
+INSERT INTO t2 (c,d) SELECT b, a FROM t1;
+UPDATE t1 STRAIGHT_JOIN t2 SET t1.a = t2.d+100, t2.c = 'multi'
+WHERE c < b AND a + d != 1;
+SELECT a,b FROM t1;
+a b
+1 a
+1 a
+101 b
+101 b
+101 c
+101 c
+101 d
+101 d
+101 e
+101 e
+101 foobar
+101 foobar
+SELECT c,d FROM t2;
+c d
+foobar 10000
+foobar 10000
+multi 1
+multi 1
+multi 2
+multi 2
+multi 3
+multi 3
+multi 4
+multi 4
+multi 5
+multi 5
+UPDATE t2 STRAIGHT_JOIN t1 SET t2.d = DEFAULT
+WHERE c = 'foobar' and b = c;
+SELECT a,b FROM t1;
+a b
+1 a
+1 a
+101 b
+101 b
+101 c
+101 c
+101 d
+101 d
+101 e
+101 e
+101 foobar
+101 foobar
+SELECT c,d FROM t2;
+c d
+foobar NULL
+foobar NULL
+multi 1
+multi 1
+multi 2
+multi 2
+multi 3
+multi 3
+multi 4
+multi 4
+multi 5
+multi 5
+DROP TABLE t1, t2;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/update_with_keys.result b/storage/rocksdb/mysql-test/rocksdb/r/update_with_keys.result
new file mode 100644
index 00000000000..576fe6f6754
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/update_with_keys.result
@@ -0,0 +1,38 @@
+DROP TABLE IF EXISTS t1;
+CREATE TABLE t1 (a INT, b CHAR(8), pk INT AUTO_INCREMENT PRIMARY KEY, INDEX(b)) ENGINE=rocksdb;
+INSERT INTO t1 (a,b) VALUES (1,'a'),(2,'b'),(3,'c'),(4,'d'),(5,'e'),(6,'x'),(7,'y'),(8,'z');
+UPDATE t1 SET a=100, b='f' WHERE b IN ('b','c');
+UPDATE t1 SET b='m' WHERE b = 'f';
+UPDATE t1 SET b='z' WHERE a < 2;
+UPDATE t1 SET b='';
+SELECT a,b FROM t1;
+a b
+1
+100
+100
+4
+5
+6
+7
+8
+DROP TABLE t1;
+# RocksDB: skip the test for secondary UNIQUE keys.
+CREATE TABLE t1 (a INT PRIMARY KEY, b CHAR(8)) ENGINE=rocksdb;
+INSERT INTO t1 (a,b) VALUES (1,'a'),(2,'b'),(3,'c'),(4,'d'),(5,'e'),(0,'f'),(100,'a');
+UPDATE t1 SET a=a+200;
+UPDATE t1 SET a=0 WHERE a > 250;
+UPDATE t1 SET a=205 WHERE a=200;
+ERROR 23000: Duplicate entry '205' for key 'PRIMARY'
+UPDATE t1 SET a=12345 ORDER BY a DESC, b LIMIT 1;
+SELECT a,b FROM t1;
+a b
+0 a
+12345 e
+200 f
+201 a
+202 b
+203 c
+204 d
+UPDATE t1 SET a=80 WHERE a IN (202,203);
+ERROR 23000: Duplicate entry '80' for key 'PRIMARY'
+DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/use_direct_io_for_flush_and_compaction.result b/storage/rocksdb/mysql-test/rocksdb/r/use_direct_io_for_flush_and_compaction.result
new file mode 100644
index 00000000000..8a4ee14c116
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/use_direct_io_for_flush_and_compaction.result
@@ -0,0 +1,18 @@
+Checking direct reads
+CREATE TABLE t1 (pk INT PRIMARY KEY DEFAULT '0', a INT(11), b CHAR(8)) ENGINE=rocksdb;
+SHOW CREATE TABLE t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `pk` int(11) NOT NULL DEFAULT 0,
+ `a` int(11) DEFAULT NULL,
+ `b` char(8) DEFAULT NULL,
+ PRIMARY KEY (`pk`)
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+INSERT INTO t1 VALUES (1, 1,'a');
+INSERT INTO t1 (a,b) VALUES (2,'b');
+set global rocksdb_force_flush_memtable_now=1;
+SELECT a,b FROM t1;
+a b
+1 a
+2 b
+DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/use_direct_reads.result b/storage/rocksdb/mysql-test/rocksdb/r/use_direct_reads.result
new file mode 100644
index 00000000000..8a4ee14c116
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/use_direct_reads.result
@@ -0,0 +1,18 @@
+Checking direct reads
+CREATE TABLE t1 (pk INT PRIMARY KEY DEFAULT '0', a INT(11), b CHAR(8)) ENGINE=rocksdb;
+SHOW CREATE TABLE t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `pk` int(11) NOT NULL DEFAULT 0,
+ `a` int(11) DEFAULT NULL,
+ `b` char(8) DEFAULT NULL,
+ PRIMARY KEY (`pk`)
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+INSERT INTO t1 VALUES (1, 1,'a');
+INSERT INTO t1 (a,b) VALUES (2,'b');
+set global rocksdb_force_flush_memtable_now=1;
+SELECT a,b FROM t1;
+a b
+1 a
+2 b
+DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/use_direct_reads_writes.result b/storage/rocksdb/mysql-test/rocksdb/r/use_direct_reads_writes.result
new file mode 100644
index 00000000000..e8456457cdd
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/use_direct_reads_writes.result
@@ -0,0 +1,18 @@
+call mtr.add_suppression("rocksdb");
+call mtr.add_suppression("Aborting");
+# This shows that RocksDB plugin is loaded:
+select plugin_name, plugin_type from information_schema.plugins where plugin_name='RocksDB';
+plugin_name plugin_type
+ROCKSDB STORAGE ENGINE
+Checking direct reads
+FOUND 1 /enable both use_direct_reads/ in use_direct_reads_writes.err
+Checking direct writes
+FOUND 1 /enable both use_direct_io_for_flush_and_compaction/ in use_direct_reads_writes.err
+Checking rocksdb_flush_log_at_trx_commit
+FOUND 1 /rocksdb_flush_log_at_trx_commit needs to be/ in use_direct_reads_writes.err
+Validate flush_log settings when direct writes is enabled
+set global rocksdb_flush_log_at_trx_commit=0;
+set global rocksdb_flush_log_at_trx_commit=1;
+ERROR 42000: Variable 'rocksdb_flush_log_at_trx_commit' can't be set to the value of '1'
+set global rocksdb_flush_log_at_trx_commit=2;
+ERROR 42000: Variable 'rocksdb_flush_log_at_trx_commit' can't be set to the value of '2'
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/validate_datadic.result b/storage/rocksdb/mysql-test/rocksdb/r/validate_datadic.result
new file mode 100644
index 00000000000..5d9a6342ae9
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/validate_datadic.result
@@ -0,0 +1,9 @@
+call mtr.add_suppression('RocksDB: Schema mismatch');
+CREATE TABLE t1 (pk int primary key) ENGINE=ROCKSDB;
+CREATE TABLE t2 (pk int primary key) ENGINE=ROCKSDB PARTITION BY KEY(pk) PARTITIONS 4;
+"Expect errors that we are missing two .frm files"
+FOUND 1 /RocksDB: Schema mismatch - Table test.t1 is registered in RocksDB but does not have a .frm file/ in validate_datadic.err
+FOUND 1 /RocksDB: Schema mismatch - Table test.t2 is registered in RocksDB but does not have a .frm file/ in validate_datadic.err
+"Expect an error that we have an extra .frm file"
+FOUND 1 /Schema mismatch - A .frm file exists for table test.t1_dummy, but that table is not registered in RocksDB/ in validate_datadic.err
+DROP TABLE t1, t2;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/varbinary_format.result b/storage/rocksdb/mysql-test/rocksdb/r/varbinary_format.result
new file mode 100644
index 00000000000..8b4634bb6e2
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/varbinary_format.result
@@ -0,0 +1,260 @@
+CREATE TABLE t1(
+vb VARBINARY(64) primary key
+) ENGINE=rocksdb;
+INSERT INTO t1 values(0x00);
+INSERT INTO t1 values(0x0000);
+INSERT INTO t1 values(0x0000000000000000);
+INSERT INTO t1 values(0x000000);
+INSERT INTO t1 values(0x000000000000000000000000000000000000000000000000000000000000000000);
+INSERT INTO t1 values(0x00000000000000000000000000000000000000000000000000000000000000);
+INSERT INTO t1 values(0x0000000000000000000000000000000000000000000000000000000000000000);
+INSERT INTO t1 values(0x00000000);
+INSERT INTO t1 values(0x0000000000);
+INSERT INTO t1 values(0x00000000000000000000);
+INSERT INTO t1 values(0x000000000000);
+INSERT INTO t1 values(0x00000000000000);
+INSERT INTO t1 values(0x000000000000000000);
+SELECT hex(vb) FROM t1;
+hex(vb)
+00
+0000
+000000
+00000000
+0000000000
+000000000000
+00000000000000
+0000000000000000
+000000000000000000
+00000000000000000000
+00000000000000000000000000000000000000000000000000000000000000
+0000000000000000000000000000000000000000000000000000000000000000
+000000000000000000000000000000000000000000000000000000000000000000
+BEGIN;
+SELECT hex(vb) FROM t1 FOR UPDATE;
+hex(vb)
+00
+0000
+000000
+00000000
+0000000000
+000000000000
+00000000000000
+0000000000000000
+000000000000000000
+00000000000000000000
+00000000000000000000000000000000000000000000000000000000000000
+0000000000000000000000000000000000000000000000000000000000000000
+000000000000000000000000000000000000000000000000000000000000000000
+SELECT SUBSTRING(a.key,9) FROM information_schema.rocksdb_locks AS a ORDER BY a.key;
+SUBSTRING(a.key,9)
+000000000000000001
+000000000000000002
+000000000000000003
+000000000000000004
+000000000000000005
+000000000000000006
+000000000000000007
+000000000000000008
+000000000000000009000000000000000001
+000000000000000009000000000000000002
+000000000000000009000000000000000009000000000000000009000000000000000007
+000000000000000009000000000000000009000000000000000009000000000000000008
+000000000000000009000000000000000009000000000000000009000000000000000009000000000000000001
+ROLLBACK;
+DROP TABLE t1;
+set session debug_dbug= '+d,MYROCKS_LEGACY_VARBINARY_FORMAT';
+CREATE TABLE t1(
+vb VARBINARY(64) primary key
+) ENGINE=rocksdb;
+set session debug_dbug= '-d,MYROCKS_LEGACY_VARBINARY_FORMAT';
+INSERT INTO t1 values(0x00);
+INSERT INTO t1 values(0x0000);
+INSERT INTO t1 values(0x0000000000000000);
+INSERT INTO t1 values(0x000000);
+INSERT INTO t1 values(0x000000000000000000000000000000000000000000000000000000000000000000);
+INSERT INTO t1 values(0x00000000000000000000000000000000000000000000000000000000000000);
+INSERT INTO t1 values(0x0000000000000000000000000000000000000000000000000000000000000000);
+INSERT INTO t1 values(0x00000000);
+INSERT INTO t1 values(0x0000000000);
+INSERT INTO t1 values(0x00000000000000000000);
+INSERT INTO t1 values(0x000000000000);
+INSERT INTO t1 values(0x00000000000000);
+INSERT INTO t1 values(0x000000000000000000);
+SELECT hex(vb) FROM t1;
+hex(vb)
+00
+0000
+000000
+00000000
+0000000000
+000000000000
+00000000000000
+0000000000000000
+000000000000000000
+00000000000000000000
+00000000000000000000000000000000000000000000000000000000000000
+0000000000000000000000000000000000000000000000000000000000000000
+000000000000000000000000000000000000000000000000000000000000000000
+BEGIN;
+SELECT hex(vb) FROM t1 FOR UPDATE;
+hex(vb)
+00
+0000
+000000
+00000000
+0000000000
+000000000000
+00000000000000
+0000000000000000
+000000000000000000
+00000000000000000000
+00000000000000000000000000000000000000000000000000000000000000
+0000000000000000000000000000000000000000000000000000000000000000
+000000000000000000000000000000000000000000000000000000000000000000
+SELECT SUBSTRING(a.key,9) FROM information_schema.rocksdb_locks AS a ORDER BY a.key;
+SUBSTRING(a.key,9)
+0000000000000000f8
+0000000000000000f9
+0000000000000000fa
+0000000000000000fb
+0000000000000000fc
+0000000000000000fd
+0000000000000000fe
+0000000000000000ff0000000000000000f7
+0000000000000000ff0000000000000000f8
+0000000000000000ff0000000000000000f9
+0000000000000000ff0000000000000000ff0000000000000000ff0000000000000000fe
+0000000000000000ff0000000000000000ff0000000000000000ff0000000000000000ff0000000000000000f7
+0000000000000000ff0000000000000000ff0000000000000000ff0000000000000000ff0000000000000000f8
+ROLLBACK;
+DROP TABLE t1;
+CREATE TABLE t1(
+vc VARCHAR(64) collate 'binary' primary key
+) ENGINE=rocksdb;
+INSERT INTO t1 values('a');
+INSERT INTO t1 values('aa');
+INSERT INTO t1 values('aaaaaaaa');
+INSERT INTO t1 values('aaa');
+INSERT INTO t1 values('aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa');
+INSERT INTO t1 values('aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa');
+INSERT INTO t1 values('aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa');
+INSERT INTO t1 values('aaaa');
+INSERT INTO t1 values('aaaaa');
+INSERT INTO t1 values('aaaaaaaaaa');
+INSERT INTO t1 values('aaaaaa');
+INSERT INTO t1 values('aaaaaaa');
+INSERT INTO t1 values('aaaaaaaaa');
+SELECT * FROM t1;
+vc
+a
+aa
+aaa
+aaaa
+aaaaa
+aaaaaa
+aaaaaaa
+aaaaaaaa
+aaaaaaaaa
+aaaaaaaaaa
+aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
+aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
+aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
+BEGIN;
+SELECT * FROM t1 FOR UPDATE;
+vc
+a
+aa
+aaa
+aaaa
+aaaaa
+aaaaaa
+aaaaaaa
+aaaaaaaa
+aaaaaaaaa
+aaaaaaaaaa
+aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
+aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
+aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
+SELECT SUBSTRING(a.key,9) FROM information_schema.rocksdb_locks AS a ORDER BY a.key;
+SUBSTRING(a.key,9)
+610000000000000001
+616100000000000002
+616161000000000003
+616161610000000004
+616161616100000005
+616161616161000006
+616161616161610007
+616161616161616108
+616161616161616109610000000000000001
+616161616161616109616100000000000002
+616161616161616109616161616161616109616161616161616109616161616161610007
+616161616161616109616161616161616109616161616161616109616161616161616108
+616161616161616109616161616161616109616161616161616109616161616161616109610000000000000001
+ROLLBACK;
+DROP TABLE t1;
+set session debug_dbug= '+d,MYROCKS_LEGACY_VARBINARY_FORMAT';
+CREATE TABLE t1(
+vc VARCHAR(64) collate 'binary' primary key
+) ENGINE=rocksdb;
+set session debug_dbug= '-d,MYROCKS_LEGACY_VARBINARY_FORMAT';
+INSERT INTO t1 values('a');
+INSERT INTO t1 values('aa');
+INSERT INTO t1 values('aaaaaaaa');
+INSERT INTO t1 values('aaa');
+INSERT INTO t1 values('aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa');
+INSERT INTO t1 values('aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa');
+INSERT INTO t1 values('aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa');
+INSERT INTO t1 values('aaaa');
+INSERT INTO t1 values('aaaaa');
+INSERT INTO t1 values('aaaaaaaaaa');
+INSERT INTO t1 values('aaaaaa');
+INSERT INTO t1 values('aaaaaaa');
+INSERT INTO t1 values('aaaaaaaaa');
+SELECT * FROM t1;
+vc
+a
+aa
+aaa
+aaaa
+aaaaa
+aaaaaa
+aaaaaaa
+aaaaaaaa
+aaaaaaaaa
+aaaaaaaaaa
+aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
+aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
+aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
+BEGIN;
+SELECT * FROM t1 FOR UPDATE;
+vc
+a
+aa
+aaa
+aaaa
+aaaaa
+aaaaaa
+aaaaaaa
+aaaaaaaa
+aaaaaaaaa
+aaaaaaaaaa
+aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
+aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
+aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
+SELECT SUBSTRING(a.key,9) FROM information_schema.rocksdb_locks AS a ORDER BY a.key;
+SUBSTRING(a.key,9)
+6100000000000000f8
+6161000000000000f9
+6161610000000000fa
+6161616100000000fb
+6161616161000000fc
+6161616161610000fd
+6161616161616100fe
+6161616161616161ff0000000000000000f7
+6161616161616161ff6100000000000000f8
+6161616161616161ff6161000000000000f9
+6161616161616161ff6161616161616161ff6161616161616161ff6161616161616100fe
+6161616161616161ff6161616161616161ff6161616161616161ff6161616161616161ff0000000000000000f7
+6161616161616161ff6161616161616161ff6161616161616161ff6161616161616161ff6100000000000000f8
+ROLLBACK;
+DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/write_sync.result b/storage/rocksdb/mysql-test/rocksdb/r/write_sync.result
new file mode 100644
index 00000000000..d0a9b034927
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/write_sync.result
@@ -0,0 +1,30 @@
+SET GLOBAL rocksdb_write_disable_wal=false;
+SET GLOBAL rocksdb_write_ignore_missing_column_families=true;
+create table aaa (id int primary key, i int) engine rocksdb;
+set @save_rocksdb_flush_log_at_trx_commit=@@global.rocksdb_flush_log_at_trx_commit;
+SET GLOBAL rocksdb_flush_log_at_trx_commit=1;
+insert aaa(id, i) values(0,1);
+select variable_value into @a from information_schema.global_status where variable_name='rocksdb_wal_synced';
+insert aaa(id, i) values(1,1);
+select variable_value-@a from information_schema.global_status where variable_name='rocksdb_wal_synced';
+variable_value-@a
+1
+insert aaa(id, i) values(2,1);
+select variable_value-@a from information_schema.global_status where variable_name='rocksdb_wal_synced';
+variable_value-@a
+2
+insert aaa(id, i) values(3,1);
+select variable_value-@a from information_schema.global_status where variable_name='rocksdb_wal_synced';
+variable_value-@a
+3
+select variable_value into @a from information_schema.global_status where variable_name='rocksdb_wal_synced';
+SET GLOBAL rocksdb_flush_log_at_trx_commit=0;
+insert aaa(id, i) values(4,1);
+select variable_value into @a from information_schema.global_status where variable_name='rocksdb_wal_synced';
+SET GLOBAL rocksdb_flush_log_at_trx_commit=2;
+insert aaa(id, i) values(5,1);
+truncate table aaa;
+drop table aaa;
+set @@global.rocksdb_flush_log_at_trx_commit=@save_rocksdb_flush_log_at_trx_commit;
+SET GLOBAL rocksdb_write_disable_wal=false;
+SET GLOBAL rocksdb_write_ignore_missing_column_families=false;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/xa.result b/storage/rocksdb/mysql-test/rocksdb/r/xa.result
new file mode 100644
index 00000000000..30cfe94e0b7
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/xa.result
@@ -0,0 +1,32 @@
+#
+# MDEV-13155: XA recovery not supported for RocksDB (Just a testcase)
+#
+call mtr.add_suppression("Found .* prepared XA transactions");
+connect con1,localhost,root,,test;
+DROP TABLE IF EXISTS t1;
+CREATE TABLE t1 (a INT) ENGINE=RocksDB;
+XA START 'xa1';
+INSERT INTO t1 (a) VALUES (1),(2);
+XA END 'xa1';
+XA PREPARE 'xa1';
+connect con2,localhost,root,,test;
+XA START 'xa2';
+INSERT INTO t1 (a) VALUES (3);
+INSERT INTO t1 (a) VALUES (4);
+XA END 'xa2';
+XA PREPARE 'xa2';
+connection default;
+SELECT * FROM t1;
+a
+connect con3,localhost,root,,test;
+XA RECOVER;
+formatID gtrid_length bqual_length data
+1 3 0 xa1
+1 3 0 xa2
+XA ROLLBACK 'xa1';
+XA COMMIT 'xa2';
+SELECT a FROM t1;
+a
+3
+4
+DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/slow_query_log.awk b/storage/rocksdb/mysql-test/rocksdb/slow_query_log.awk
new file mode 100644
index 00000000000..a921f47243e
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/slow_query_log.awk
@@ -0,0 +1,25 @@
+/Query_time:/ {
+ results["Rows_examined:"] = "uninit";
+ results["RocksDB_key_skipped:"] = "uninit";
+ results["RocksDB_del_skipped:"] = "uninit";
+
+ for (i = 2; i <= NF; i = i+2) {
+ results[$i] = $(i+1);
+ }
+
+ # If the output format has changed and we don't find these keys,
+ # error out.
+ if (results["Rows_examined:"] == "uninit" ||
+ results["RocksDB_key_skipped:"] == "uninit" ||
+ results["RocksDB_del_skipped:"] == "uninit") {
+ exit(-2);
+ }
+
+ if (results["Rows_examined:"] == 0) {
+ next
+ }
+ if (results["RocksDB_key_skipped:"] == 0 ||
+ results["RocksDB_del_skipped:"] == 0) {
+ exit(-1);
+ }
+}
diff --git a/storage/rocksdb/mysql-test/rocksdb/suite.opt b/storage/rocksdb/mysql-test/rocksdb/suite.opt
new file mode 100644
index 00000000000..22c9d7a300e
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/suite.opt
@@ -0,0 +1,2 @@
+--plugin-load=$HA_ROCKSDB_SO --default-storage-engine=rocksdb
+
diff --git a/storage/rocksdb/mysql-test/rocksdb/suite.pm b/storage/rocksdb/mysql-test/rocksdb/suite.pm
new file mode 100644
index 00000000000..633e883f46d
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/suite.pm
@@ -0,0 +1,28 @@
+package My::Suite::Rocksdb;
+
+use My::SysInfo;
+
+#
+# Note: ../rocksdb_sys_vars/suite.pm file has a similar
+# function. If you modify this file, consider modifying that one, too.
+#
+@ISA = qw(My::Suite);
+use My::Find;
+use File::Basename;
+use strict;
+
+#sub is_default { not $::opt_embedded_server }
+
+my $sst_dump=
+::mtr_exe_maybe_exists(
+ "$::bindir/storage/rocksdb$::opt_vs_config/sst_dump",
+ "$::path_client_bindir/sst_dump");
+return "RocksDB is not compiled, no sst_dump" unless $sst_dump;
+$ENV{MARIAROCKS_SST_DUMP}="$sst_dump";
+
+## Temporarily disable testing under valgrind, due to MDEV-12439
+#return "RocksDB tests disabled under valgrind" if ($::opt_valgrind);
+
+
+bless { };
+
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/1st.test b/storage/rocksdb/mysql-test/rocksdb/t/1st.test
new file mode 100644
index 00000000000..cecef8b7537
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/1st.test
@@ -0,0 +1,36 @@
+--source include/have_rocksdb.inc
+
+#
+# This test checks some very basic capabilities
+# which will be used in almost every other test,
+# and will not be checked through support* variables.
+# If this test does not pass, there is no point
+# at executing other ones.
+#
+# Minimal requirements:
+# - supported column types: INT, CHAR (default CHAR(8), INT(11));
+# - column attributes as declared in define_engine.inc ($default_col_opts)
+# (by default empty, which means no additional attributes apart from the type);
+# - table attributes as declared in define_engine.inc ($default_tbl_opts)
+# (by default empty, which means no additional attributes apart from ENGINE);
+# - CREATE TABLE .. (column1 <column options>, column2 <column options>) ENGINE=<storage_engine>;
+# - INSERT INTO TABLE .. VALUES (val1,val2);
+# - DROP TABLE ..
+# - SELECT a,b FROM ..
+# - SHOW CREATE TABLE ..
+# - SHOW COLUMNS IN ...
+#
+
+--disable_warnings
+DROP TABLE IF EXISTS t1;
+--enable_warnings
+
+CREATE TABLE t1 (pk INT PRIMARY KEY DEFAULT '0', a INT(11), b CHAR(8)) ENGINE=rocksdb;
+SHOW CREATE TABLE t1;
+SHOW COLUMNS IN t1;
+INSERT INTO t1 VALUES (1, 1,'a');
+INSERT INTO t1 (a,b) VALUES (2,'b');
+--sorted_result
+SELECT a,b FROM t1;
+DROP TABLE t1;
+
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/2pc_group_commit-master.opt b/storage/rocksdb/mysql-test/rocksdb/t/2pc_group_commit-master.opt
new file mode 100644
index 00000000000..83ed8522e72
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/2pc_group_commit-master.opt
@@ -0,0 +1 @@
+--binlog-format=row
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/2pc_group_commit.test b/storage/rocksdb/mysql-test/rocksdb/t/2pc_group_commit.test
new file mode 100644
index 00000000000..aeadf5381b0
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/2pc_group_commit.test
@@ -0,0 +1,168 @@
+--source include/have_rocksdb.inc
+--source include/have_log_bin.inc
+--source include/not_windows.inc
+--echo # Disable for valgrind because this takes too long
+--source include/not_valgrind.inc
+
+# MariaDB: tooling to slowdown commits (also when running on ramdisk, we need
+# write_prepared for some reason, this is set in the .opt file)
+--source include/have_debug.inc
+--source include/have_debug_sync.inc
+--source include/have_write_prepared.inc
+
+
+--disable_warnings
+DROP DATABASE IF EXISTS mysqlslap;
+--enable_warnings
+
+CREATE DATABASE mysqlslap;
+USE mysqlslap;
+CREATE TABLE t1(id BIGINT AUTO_INCREMENT, value BIGINT, PRIMARY KEY(id)) ENGINE=rocksdb;
+
+SET @save_rocksdb_enable_2pc= @@rocksdb_enable_2pc;
+SET @save_rocksdb_flush_log_at_trx_commit= @@rocksdb_flush_log_at_trx_commit;
+
+
+#
+# In MariaDB, regular group commit operation does not cause increment of
+# rocksdb_wal_group_syncs.
+#
+
+--echo # 2PC enabled, MyRocks durability enabled
+SET GLOBAL rocksdb_enable_2pc=1;
+SET GLOBAL rocksdb_flush_log_at_trx_commit=1;
+
+--echo ##
+--echo ## 2PC + durability + single thread
+--echo ##
+select variable_value into @b1 from information_schema.global_status where variable_name='Binlog_commits';
+select variable_value into @b2 from information_schema.global_status where variable_name='Binlog_group_commits';
+select variable_value into @b3 from information_schema.global_status where variable_name='Rocksdb_wal_synced';
+--exec $MYSQL_SLAP --silent --concurrency=1 --number-of-queries=1000 --query="INSERT INTO t1 (id, value) VALUES(NULL, 1)"
+select IF(variable_value - @b1 = 1000, 'OK', variable_value - @b1) as Binlog_commits
+from information_schema.global_status where variable_name='Binlog_commits';
+select IF(variable_value - @b2 = 1000, 'OK', variable_value - @b2) as Binlog_group_commits
+from information_schema.global_status where variable_name='Binlog_group_commits';
+--echo # Prepare operations sync, commits don't. We expect slightly more than 1K syncs:
+select IF(variable_value - @b3 between 1000 and 1500, 'OK', variable_value - @b3) as Rocksdb_wal_synced
+from information_schema.global_status where variable_name='Rocksdb_wal_synced';
+
+# SQL layer solution is sufficient for Binlog counts but not RocksDB.
+#set @tmp_bcwc= @@binlog_commit_wait_count;
+#set @tmp_bcwu= @@binlog_commit_wait_usec;
+#set global binlog_commit_wait_count=30;
+#set global binlog_commit_wait_usec=500*1000;
+
+# RocksDB-side solution:
+
+set debug_dbug='+d,rocksdb_enable_delay_commits';
+create table dummy10(a int) engine=rocksdb;
+drop table dummy10;
+set debug_dbug='-d,rocksdb_enable_delay_commits';
+
+--echo ##
+--echo ## 2PC + durability + group commit
+--echo ##
+select variable_value into @b1 from information_schema.global_status where variable_name='Binlog_commits';
+select variable_value into @b2 from information_schema.global_status where variable_name='Binlog_group_commits';
+select variable_value into @b3 from information_schema.global_status where variable_name='Rocksdb_wal_synced';
+
+--exec $MYSQL_SLAP --silent --concurrency=50 --number-of-queries=10000 --query="INSERT INTO t1 (id, value) VALUES(NULL, 1)"
+
+select IF(variable_value - @b1 = 10000, 'OK', variable_value - @b1) as Binlog_commits
+from information_schema.global_status where variable_name='Binlog_commits';
+select IF(variable_value - @b2 between 100 and 5000, 'OK', variable_value - @b2) as Binlog_group_commits
+from information_schema.global_status where variable_name='Binlog_group_commits';
+select IF(variable_value - @b3 between 1 and 9000, 'OK', variable_value - @b3)
+from information_schema.global_status where variable_name='Rocksdb_wal_synced';
+
+#set global binlog_commit_wait_count= @tmp_bcwc;
+#set global binlog_commit_wait_usec= @tmp_bcwu;
+
+set debug_dbug='+d,rocksdb_disable_delay_commits';
+create table dummy10(a int) engine=rocksdb;
+drop table dummy10;
+set debug_dbug='-d,rocksdb_disable_delay_commits';
+
+--echo ##
+--echo # 2PC enabled, MyRocks durability disabled, single thread
+--echo ##
+SET GLOBAL rocksdb_enable_2pc=1;
+SET GLOBAL rocksdb_flush_log_at_trx_commit=0;
+
+select variable_value into @b1 from information_schema.global_status where variable_name='Binlog_commits';
+select variable_value into @b2 from information_schema.global_status where variable_name='Binlog_group_commits';
+select variable_value into @b3 from information_schema.global_status where variable_name='Rocksdb_wal_synced';
+--exec $MYSQL_SLAP --silent --concurrency=1 --number-of-queries=1000 --query="INSERT INTO t1 (id, value) VALUES(NULL, 1)"
+
+select IF(variable_value - @b1 = 1000, 'OK', variable_value - @b1) as Binlog_commits
+from information_schema.global_status where variable_name='Binlog_commits';
+select IF(variable_value - @b2 = 1000, 'OK', variable_value - @b2) as Binlog_group_commits
+from information_schema.global_status where variable_name='Binlog_group_commits';
+select IF(variable_value - @b3 < 10, 'OK', variable_value - @b3)
+from information_schema.global_status where variable_name='Rocksdb_wal_synced';
+
+--echo ##
+--echo # 2PC enabled, MyRocks durability disabled, concurrent workload
+--echo ##
+
+select variable_value into @b1 from information_schema.global_status where variable_name='Binlog_commits';
+select variable_value into @b2 from information_schema.global_status where variable_name='Binlog_group_commits';
+select variable_value into @b3 from information_schema.global_status where variable_name='Rocksdb_wal_synced';
+
+--exec $MYSQL_SLAP --silent --concurrency=50 --number-of-queries=10000 --query="INSERT INTO t1 (id, value) VALUES(NULL, 1)"
+
+select IF(variable_value - @b1 = 10000, 'OK', variable_value - @b1) as Binlog_commits
+from information_schema.global_status where variable_name='Binlog_commits';
+select IF(variable_value - @b2 < 8000, 'OK', variable_value - @b2) as Binlog_group_commits
+from information_schema.global_status where variable_name='Binlog_group_commits';
+select IF(variable_value - @b3 < 10, 'OK', variable_value - @b3)
+from information_schema.global_status where variable_name='Rocksdb_wal_synced';
+
+##
+## The next two are disabled because they don't add any test coverage in
+## MariaDB. @@rocksdb_enable_2pc=0 is a MyRocks-internal setting, binlog
+## [group] commit still happens, and syncing RocksDB WAL too.
+##
+
+--disable_parsing
+--echo ##
+--echo # 2PC disabled, MyRocks durability enabled, one thread
+--echo ##
+SET GLOBAL rocksdb_enable_2pc=0;
+SET GLOBAL rocksdb_flush_log_at_trx_commit=1;
+
+select variable_value into @b1 from information_schema.global_status where variable_name='Binlog_commits';
+select variable_value into @b2 from information_schema.global_status where variable_name='Binlog_group_commits';
+select variable_value into @b3 from information_schema.global_status where variable_name='Rocksdb_wal_synced';
+
+--exec $MYSQL_SLAP --silent --concurrency=1 --number-of-queries=1000 --query="INSERT INTO t1 (id, value) VALUES(NULL, 1)"
+
+select variable_value - @b1 as Binlog_commits
+from information_schema.global_status where variable_name='Binlog_commits';
+select variable_value - @b2 as Binlog_group_commits
+from information_schema.global_status where variable_name='Binlog_group_commits';
+select variable_value - @b3 as Rocksdb_wal_synced
+from information_schema.global_status where variable_name='Rocksdb_wal_synced';
+
+--echo ##
+--echo # 2PC disabled, MyRocks durability enabled, concurrent workload
+--echo ##
+select variable_value into @b1 from information_schema.global_status where variable_name='Binlog_commits';
+select variable_value into @b2 from information_schema.global_status where variable_name='Binlog_group_commits';
+select variable_value into @b3 from information_schema.global_status where variable_name='Rocksdb_wal_synced';
+--exec $MYSQL_SLAP --silent --concurrency=50 --number-of-queries=10000 --query="INSERT INTO t1 (id, value) VALUES(NULL, 1)"
+
+select variable_value - @b1 as Binlog_commits
+from information_schema.global_status where variable_name='Binlog_commits';
+select variable_value - @b2 as Binlog_group_commits
+from information_schema.global_status where variable_name='Binlog_group_commits';
+select variable_value - @b3 as Rocksdb_wal_synced
+from information_schema.global_status where variable_name='Rocksdb_wal_synced';
+--enable_parsing
+
+SET GLOBAL rocksdb_enable_2pc= @save_rocksdb_enable_2pc;
+SET GLOBAL rocksdb_flush_log_at_trx_commit= @save_rocksdb_flush_log_at_trx_commit;
+
+DROP TABLE t1;
+DROP DATABASE mysqlslap;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/add_index_inplace.test b/storage/rocksdb/mysql-test/rocksdb/t/add_index_inplace.test
new file mode 100644
index 00000000000..df7790ee4c2
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/add_index_inplace.test
@@ -0,0 +1,417 @@
+--source include/have_rocksdb.inc
+--source include/have_partition.inc
+
+--disable_warnings
+drop table if exists t1;
+--enable_warnings
+
+##
+## test adding index inplace
+##
+
+# test basic add
+CREATE TABLE t1 (a INT, b INT, KEY ka(a), KEY kab(a,b)) ENGINE=RocksDB;
+INSERT INTO t1 (a, b) VALUES (1, 5);
+INSERT INTO t1 (a, b) VALUES (2, 6);
+INSERT INTO t1 (a, b) VALUES (3, 7);
+ALTER TABLE t1 ADD INDEX kb(b), ALGORITHM=INPLACE;
+SHOW CREATE TABLE t1;
+CHECK TABLE t1;
+--sorted_result
+SELECT * FROM t1 FORCE INDEX(kb) WHERE b > 5;
+--sorted_result
+SELECT * FROM t1 FORCE INDEX(kab) WHERE a > 2;
+DROP TABLE t1;
+
+# test add + drop (simultaneous)
+CREATE TABLE t1 (a INT, b INT, KEY ka(a), KEY kab(a,b)) ENGINE=RocksDB;
+INSERT INTO t1 (a, b) VALUES (1, 5);
+INSERT INTO t1 (a, b) VALUES (2, 6);
+INSERT INTO t1 (a, b) VALUES (3, 7);
+ALTER TABLE t1 ADD INDEX kb(b), DROP INDEX ka, ALGORITHM=INPLACE;
+SHOW CREATE TABLE t1;
+CHECK TABLE t1;
+--sorted_result
+SELECT * FROM t1 FORCE INDEX(kb) WHERE b > 5;
+--sorted_result
+SELECT * FROM t1 FORCE INDEX(kab) WHERE a > 2;
+DROP TABLE t1;
+
+## test multi-drop + multi-add
+CREATE TABLE t1 (a INT, b INT, KEY ka(a), KEY kab(a,b)) ENGINE=RocksDB;
+INSERT INTO t1 (a, b) VALUES (1, 5);
+INSERT INTO t1 (a, b) VALUES (2, 6);
+INSERT INTO t1 (a, b) VALUES (3, 7);
+ALTER TABLE t1 DROP INDEX ka, DROP INDEX kab, ALGORITHM=INPLACE;
+ALTER TABLE t1 ADD INDEX kb(b), ADD INDEX kab(a,b), ALGORITHM=INPLACE;
+SHOW CREATE TABLE t1;
+CHECK TABLE t1;
+--sorted_result
+SELECT * FROM t1 FORCE INDEX(kb) WHERE b > 5;
+--sorted_result
+SELECT * FROM t1 FORCE INDEX(kab) WHERE a > 2;
+DROP TABLE t1;
+
+# test multi add + drop (simultaneous)
+CREATE TABLE t1 (a INT, b INT, KEY ka(a), KEY kab(a,b)) ENGINE=RocksDB;
+INSERT INTO t1 (a, b) VALUES (1, 5);
+INSERT INTO t1 (a, b) VALUES (2, 6);
+INSERT INTO t1 (a, b) VALUES (3, 7);
+ALTER TABLE t1 ADD INDEX kb(b), DROP INDEX ka, ADD INDEX kba(b,a), DROP INDEX kab, ALGORITHM=INPLACE;
+SHOW CREATE TABLE t1;
+CHECK TABLE t1;
+--sorted_result
+SELECT * FROM t1 FORCE INDEX(kb) WHERE b > 5;
+--sorted_result
+SELECT * FROM t1 FORCE INDEX(kba) WHERE a > 2;
+DROP TABLE t1;
+
+# test dropping and adding a key simultaneously w/ same name but different col
+CREATE TABLE t1 (a INT, b INT, KEY ka(a), KEY kab(a,b)) ENGINE=RocksDB;
+ALTER TABLE t1 DROP INDEX ka, ADD INDEX ka(b), ALGORITHM=INPLACE;
+SHOW CREATE TABLE t1;
+CHECK TABLE t1;
+--sorted_result
+SELECT * FROM t1 FORCE INDEX(ka) WHERE b > 5;
+--sorted_result
+SELECT * FROM t1 FORCE INDEX(kab) WHERE a > 2;
+DROP TABLE t1;
+
+##
+## test adding index inplace w/ various column types
+##
+
+# test basic add
+CREATE TABLE t1 (pk CHAR(8) PRIMARY KEY, a VARCHAR(11), b INT UNSIGNED) ENGINE=rocksdb charset utf8 collate utf8_bin;
+SHOW CREATE TABLE t1;
+SHOW COLUMNS IN t1;
+INSERT INTO t1 VALUES ('aaa', '1111', 1);
+INSERT INTO t1 VALUES ('bbb', '2222', 2);
+INSERT INTO t1 VALUES ('ccc', '3333', 3);
+ALTER TABLE t1 ADD INDEX kab(a,b), ALGORITHM=INPLACE;
+SHOW CREATE TABLE t1;
+CHECK TABLE t1;
+--sorted_result
+SELECT * FROM t1 FORCE INDEX(kab) WHERE a > '2' AND b < 3;
+DROP TABLE t1;
+
+## test add + drop (simultaneous)
+CREATE TABLE t1 (pk CHAR(8) PRIMARY KEY, a VARCHAR(11), b INT UNSIGNED) ENGINE=rocksdb charset utf8 collate utf8_bin;
+SHOW CREATE TABLE t1;
+SHOW COLUMNS IN t1;
+INSERT INTO t1 VALUES ('aaa', '1111', 1);
+INSERT INTO t1 VALUES ('bbb', '2222', 2);
+INSERT INTO t1 VALUES ('ccc', '3333', 3);
+ALTER TABLE t1 ADD INDEX kab(a,b), ALGORITHM=INPLACE;
+ALTER TABLE t1 ADD INDEX ka(a), DROP INDEX kab, ALGORITHM=INPLACE;
+SHOW CREATE TABLE t1;
+CHECK TABLE t1;
+--sorted_result
+SELECT * FROM t1 FORCE INDEX(ka) WHERE a > '2' AND b < 3;
+DROP TABLE t1;
+
+### test multi-drop + multi-add
+CREATE TABLE t1 (pk CHAR(8) PRIMARY KEY, a VARCHAR(11), b INT UNSIGNED) ENGINE=rocksdb charset utf8 collate utf8_bin;
+SHOW CREATE TABLE t1;
+SHOW COLUMNS IN t1;
+INSERT INTO t1 VALUES ('aaa', '1111', 1);
+INSERT INTO t1 VALUES ('bbb', '2222', 2);
+INSERT INTO t1 VALUES ('ccc', '3333', 3);
+ALTER TABLE t1 ADD INDEX kab(a,b), ADD INDEX ka(a), ADD INDEX kb(b), ALGORITHM=INPLACE;
+ALTER TABLE t1 DROP INDEX ka, DROP INDEX kb, ALGORITHM=INPLACE;
+SHOW CREATE TABLE t1;
+CHECK TABLE t1;
+--sorted_result
+SELECT * FROM t1 FORCE INDEX(kab) WHERE a > '2' AND b < 3;
+DROP TABLE t1;
+
+##
+## test adding via CREATE/DROP index syntax
+##
+CREATE TABLE t1 (a INT, b INT, KEY ka(a), KEY kab(a,b)) ENGINE=RocksDB;
+INSERT INTO t1 (a, b) VALUES (1, 5);
+INSERT INTO t1 (a, b) VALUES (2, 6);
+INSERT INTO t1 (a, b) VALUES (3, 7);
+CREATE INDEX kb on t1 (b);
+CREATE INDEX kba on t1 (b,a);
+DROP INDEX ka on t1;
+DROP INDEX kab on t1;
+SHOW CREATE TABLE t1;
+CHECK TABLE t1;
+--sorted_result
+SELECT * FROM t1 FORCE INDEX(kb) WHERE b > 5;
+--sorted_result
+SELECT * FROM t1 FORCE INDEX(kba) WHERE a > 2;
+DROP TABLE t1;
+
+#
+# Create tables with partitions and try to update/select from them.
+#
+CREATE TABLE t1 (i INT, j INT, k INT, PRIMARY KEY (i), KEY(j)) ENGINE = ROCKSDB PARTITION BY KEY(i) PARTITIONS 4;
+
+--disable_query_log
+let $max = 100;
+let $i = 1;
+while ($i <= $max) {
+ let $insert = INSERT INTO t1 VALUES ($i, $i, $i);
+ inc $i;
+ eval $insert;
+}
+--enable_query_log
+
+ALTER TABLE t1 ADD INDEX kij(i,j), ALGORITHM=INPLACE;
+DROP INDEX kij ON t1;
+SHOW CREATE TABLE t1;
+
+SELECT * FROM t1 ORDER BY i LIMIT 10;
+SELECT COUNT(*) FROM t1;
+
+DROP TABLE t1;
+
+# test failure in prepare phase (due to collation)
+set @tmp_rocksdb_strict_collation_check= @@rocksdb_strict_collation_check;
+set global rocksdb_strict_collation_check=1;
+CREATE TABLE t1 (a INT, b TEXT);
+
+--echo # MariaDB no longer gives ER_UNSUPPORTED_COLLATION
+ALTER TABLE t1 ADD KEY kb(b(10));
+ALTER TABLE t1 ADD PRIMARY KEY(a);
+DROP TABLE t1;
+
+CREATE TABLE t1 (a INT, b TEXT collate utf8_general_ci);
+--echo # MariaDB no longer gives ER_UNSUPPORTED_COLLATION
+ALTER TABLE t1 ADD KEY kb(b(10));
+ALTER TABLE t1 ADD PRIMARY KEY(a);
+DROP TABLE t1;
+
+set global rocksdb_strict_collation_check= @tmp_rocksdb_strict_collation_check;
+
+# make sure race condition between connection close and alter on another
+# connection is handled
+
+set global rocksdb_bulk_load=1;
+
+--echo # Establish connection con1 (user=root)
+connect (con1,localhost,root,,);
+
+--echo # Switch to connection con1
+connection con1;
+
+show global variables like 'rocksdb_bulk_load%';
+show session variables like 'rocksdb_bulk_load%';
+
+CREATE TABLE t1 (i INT, j INT, PRIMARY KEY (i)) ENGINE = ROCKSDB;
+
+INSERT INTO t1 VALUES (1,1);
+
+# Disconnect connection 1, this starts the code path that will call
+# rocksdb_close_connection, ending the bulk load.
+--echo # Disconnecting on con1
+disconnect con1;
+
+--echo # Establish connection con2 (user=root)
+connect (con2,localhost,root,,);
+--echo # Switch to connection con2
+connection con2;
+
+# when alter table happens, it tries to close all other TABLE instances
+# when acquiring the exclusive lock for alter table (this happens in SQL layer)
+# make sure bulk_load now handles this possible race condition properly
+ALTER TABLE t1 ADD INDEX kj(j), ALGORITHM=INPLACE;
+
+SELECT COUNT(*) FROM t1 FORCE INDEX(PRIMARY);
+SELECT COUNT(*) FROM t1 FORCE INDEX(kj);
+
+DROP TABLE t1;
+disconnect con2;
+
+# make sure implicilty closing the alter from another session works
+
+--echo # Establish connection con1 (user=root)
+connect (con1,localhost,root,,);
+--echo # Establish connection con2 (user=root)
+connect (con2,localhost,root,,);
+
+--echo # Switch to connection con1
+connection con1;
+
+CREATE TABLE t1 (i INT, j INT, PRIMARY KEY (i)) ENGINE = ROCKSDB;
+
+set rocksdb_bulk_load=1;
+INSERT INTO t1 VALUES (1,1);
+
+--echo # Switch to connection con2
+connection con2;
+
+# here, the bulk load hasn't been completed yet, and we are in conn2
+# therefore select count returns 0
+SELECT COUNT(*) FROM t1 FORCE INDEX(PRIMARY);
+
+# implicilty close the table from connection 2
+ALTER TABLE t1 ADD INDEX kj(j), ALGORITHM=INPLACE;
+
+SELECT COUNT(*) FROM t1 FORCE INDEX(PRIMARY);
+SELECT COUNT(*) FROM t1 FORCE INDEX(kj);
+
+set global rocksdb_bulk_load=0;
+
+DROP TABLE t1;
+
+connection default;
+
+
+SET @prior_rocksdb_merge_combine_read_size= @@rocksdb_merge_combine_read_size;
+SET @prior_rocksdb_strict_collation_check= @@rocksdb_strict_collation_check;
+SET @prior_rocksdb_merge_buf_size = @@rocksdb_merge_buf_size;
+
+SET global rocksdb_strict_collation_check = off;
+SET session rocksdb_merge_combine_read_size = 566;
+SET session rocksdb_merge_buf_size = 340;
+
+show variables like 'rocksdb_bulk_load%';
+CREATE TABLE t1 (a VARCHAR(80)) ENGINE=RocksDB;
+INSERT INTO t1 (a) VALUES (REPEAT("a", 80));
+INSERT INTO t1 (a) VALUES (REPEAT("a", 80));
+INSERT INTO t1 (a) VALUES (REPEAT("a", 80));
+INSERT INTO t1 (a) VALUES (REPEAT("a", 80));
+ALTER TABLE t1 ADD INDEX ka(a), ALGORITHM=INPLACE;
+SHOW CREATE TABLE t1;
+CHECK TABLE t1;
+--sorted_result
+SELECT * FROM t1 FORCE INDEX(ka) WHERE a > "";
+DROP TABLE t1;
+
+SET session rocksdb_merge_buf_size = @prior_rocksdb_merge_buf_size;
+SET session rocksdb_merge_combine_read_size = @prior_rocksdb_merge_combine_read_size;
+SET global rocksdb_strict_collation_check = @prior_rocksdb_strict_collation_check;
+
+# Test to make sure index statistics are updating properly
+CREATE TABLE t1 (i INT, j INT, PRIMARY KEY (i)) ENGINE = ROCKSDB;
+
+--disable_query_log
+let $max = 100;
+let $i = 1;
+while ($i <= $max) {
+ let $insert = INSERT INTO t1 VALUES ($i, $i);
+ inc $i;
+ eval $insert;
+}
+--enable_query_log
+
+set global rocksdb_force_flush_memtable_now=1;
+
+--let $data_length_old = query_get_value("select INDEX_LENGTH from information_schema.tables where table_schema=database() and table_name='t1'", INDEX_LENGTH, 1)
+
+## uncomment to see the actual values
+#--replace_column 8 #
+#SHOW TABLE STATUS WHERE name LIKE 't1';
+
+# Now do an alter and see what happens
+ALTER TABLE t1 ADD INDEX kj(j), ALGORITHM=INPLACE;
+
+--let $data_length_new = query_get_value("select INDEX_LENGTH from information_schema.tables where table_schema=database() and table_name='t1'", INDEX_LENGTH, 1)
+--disable_query_log
+--eval select $data_length_old < $data_length_new as "larger"
+
+--source include/restart_mysqld.inc
+--source include/wait_until_connected_again.inc
+--let $data_length_new = query_get_value("select INDEX_LENGTH from information_schema.tables where table_schema=database() and table_name='t1'", INDEX_LENGTH, 1)
+--disable_query_log
+--eval select $data_length_old < $data_length_new as "larger"
+
+analyze table t1;
+--let $data_length_new = query_get_value("select INDEX_LENGTH from information_schema.tables where table_schema=database() and table_name='t1'", INDEX_LENGTH, 1)
+--disable_query_log
+--eval select $data_length_old < $data_length_new as "larger"
+
+--source include/restart_mysqld.inc
+--source include/wait_until_connected_again.inc
+--let $data_length_new = query_get_value("select INDEX_LENGTH from information_schema.tables where table_schema=database() and table_name='t1'", INDEX_LENGTH, 1)
+--disable_query_log
+--eval select $data_length_old < $data_length_new as "larger"
+
+# verifying multiple analyze table won't change stats
+--disable_query_log
+let $max = 10;
+let $i = 1;
+while ($i <= $max) {
+ let $analyze = ANALYZE TABLE t1;
+ inc $i;
+ eval $analyze;
+}
+--enable_query_log
+
+--let $data_length_new2 = query_get_value("select INDEX_LENGTH from information_schema.tables where table_schema=database() and table_name='t1'", INDEX_LENGTH, 1)
+--eval select $data_length_new2 < $data_length_new * 1.5 as "same"
+
+
+--enable_query_log
+
+## uncomment to see the actual values
+#--replace_column 8 #
+#SHOW TABLE STATUS WHERE name LIKE 't1';
+
+DROP TABLE t1;
+
+# https://github.com/facebook/mysql-5.6/issues/602
+# Add then drop same index should be optimized out.
+CREATE TABLE t1 (
+a INT PRIMARY KEY,
+b INT,
+c INT,
+KEY kbc(b,c)) ENGINE = ROCKSDB;
+INSERT INTO t1 (a,b,c) VALUES (1,1,1);
+INSERT INTO t1 (a,b,c) VALUES (2,2,2);
+INSERT INTO t1 (a,b,c) VALUES (3,3,3);
+SHOW CREATE TABLE t1;
+
+ALTER TABLE t1 DROP INDEX kbc, ADD INDEX kbc(b,c), ALGORITHM=INPLACE;
+ALTER TABLE t1 DROP INDEX kbc;
+DROP TABLE t1;
+
+# Make sure changing key part prefix length causes index rebuild as well.
+CREATE TABLE t1 (
+a INT PRIMARY KEY,
+b varchar(10),
+index kb(b(5))
+) ENGINE = ROCKSDB charset utf8 collate utf8_bin;
+
+INSERT INTO t1 (a,b) VALUES (1,'1111122222');
+INSERT INTO t1 (a,b) VALUES (2,'2222233333');
+INSERT INTO t1 (a,b) VALUES (3,'3333344444');
+
+--let $start_max_index_id = query_get_value(SELECT * from INFORMATION_SCHEMA.ROCKSDB_GLOBAL_INFO where type = 'MAX_INDEX_ID', VALUE, 1)
+
+ALTER TABLE t1 DROP INDEX kb, ADD INDEX kb(b(8)), ALGORITHM=INPLACE;
+SELECT * FROM t1 FORCE INDEX(kb);
+
+--let $end_max_index_id = query_get_value(SELECT * from INFORMATION_SCHEMA.ROCKSDB_GLOBAL_INFO where type = 'MAX_INDEX_ID', VALUE, 1)
+
+if ($end_max_index_id <= $start_max_index_id) {
+ echo Max index ID did not increase;
+}
+
+SHOW CREATE TABLE t1;
+DROP TABLE t1;
+
+# Cardinality checks for indexes statistics
+SET @prior_rocksdb_table_stats_sampling_pct = @@rocksdb_table_stats_sampling_pct;
+set global rocksdb_table_stats_sampling_pct = 100;
+
+CREATE TABLE t1 (a INT, b INT, PRIMARY KEY ka(a)) ENGINE=RocksDB;
+
+INSERT INTO t1 (a, b) VALUES (1, 10);
+INSERT INTO t1 (a, b) VALUES (2, 10);
+INSERT INTO t1 (a, b) VALUES (3, 20);
+INSERT INTO t1 (a, b) VALUES (4, 20);
+
+set global rocksdb_force_flush_memtable_now=1;
+analyze table t1;
+
+SHOW INDEX in t1;
+
+ALTER TABLE t1 ADD INDEX kb(b), ALGORITHM=INPLACE;
+SHOW INDEX in t1;
+
+DROP TABLE t1;
+SET global rocksdb_table_stats_sampling_pct = @prior_rocksdb_table_stats_sampling_pct;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/add_index_inplace_cardinality-master.opt b/storage/rocksdb/mysql-test/rocksdb/t/add_index_inplace_cardinality-master.opt
new file mode 100644
index 00000000000..436edf2b40c
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/add_index_inplace_cardinality-master.opt
@@ -0,0 +1 @@
+--rocksdb_table_stats_sampling_pct=100
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/add_index_inplace_cardinality.test b/storage/rocksdb/mysql-test/rocksdb/t/add_index_inplace_cardinality.test
new file mode 100644
index 00000000000..148edf7a3d2
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/add_index_inplace_cardinality.test
@@ -0,0 +1,44 @@
+--source include/have_rocksdb.inc
+--source include/have_debug.inc
+--source include/have_debug_sync.inc
+
+--disable_warnings
+DROP TABLE IF EXISTS t1;
+--enable_warnings
+
+# Test that fast secondary index creation updates cardinality properly
+CREATE TABLE t1 (i INT PRIMARY KEY, j INT) ENGINE = ROCKSDB;
+INSERT INTO t1 VALUES (1,2), (2,4), (3,6), (4,8), (5,10);
+
+SET debug_sync= 'rocksdb.commit_in_place_alter_table WAIT_FOR flushed';
+send ALTER TABLE t1 ADD INDEX kj(j), ALGORITHM=INPLACE;
+
+connect (con1,localhost,root,,);
+
+# Flush memtable out to SST
+SET GLOBAL rocksdb_force_flush_memtable_now = 1;
+SET debug_sync= 'now SIGNAL flushed';
+
+connection default;
+reap;
+
+# Return the data for the primary key of t1
+--replace_column 1 # 2 # 3 SSTNAME 5 # 6 # 7 # 8 # 9 #
+SELECT * FROM INFORMATION_SCHEMA.ROCKSDB_INDEX_FILE_MAP
+WHERE INDEX_NUMBER =
+ (SELECT INDEX_NUMBER FROM INFORMATION_SCHEMA.ROCKSDB_DDL
+ WHERE TABLE_NAME = 't1' AND INDEX_NAME = "PRIMARY");
+
+# Return the data for the secondary index of t1
+--replace_column 1 # 2 # 3 SSTNAME 5 # 6 # 7 # 8 # 9 #
+SELECT * FROM INFORMATION_SCHEMA.ROCKSDB_INDEX_FILE_MAP
+WHERE INDEX_NUMBER =
+ (SELECT INDEX_NUMBER FROM INFORMATION_SCHEMA.ROCKSDB_DDL
+ WHERE TABLE_NAME = 't1' AND INDEX_NAME = "kj");
+
+disconnect con1;
+SET debug_sync='RESET';
+
+# cleanup
+DROP TABLE t1;
+
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/add_index_inplace_crash.test b/storage/rocksdb/mysql-test/rocksdb/t/add_index_inplace_crash.test
new file mode 100644
index 00000000000..4f34cbe8f8a
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/add_index_inplace_crash.test
@@ -0,0 +1,118 @@
+--source include/have_rocksdb.inc
+--source include/have_debug.inc
+--source include/have_partition.inc
+--source include/not_valgrind.inc
+
+--disable_warnings
+drop table if exists t1;
+--enable_warnings
+
+#
+# test crash recovery
+#
+
+CREATE TABLE t1 (a INT, b INT, KEY ka(a), KEY kab(a,b)) ENGINE=RocksDB;
+INSERT INTO t1 (a, b) VALUES (1, 5);
+INSERT INTO t1 (a, b) VALUES (2, 6);
+INSERT INTO t1 (a, b) VALUES (3, 7);
+
+--echo # crash_during_online_index_creation
+flush logs;
+
+--exec echo "restart" > $MYSQLTEST_VARDIR/tmp/mysqld.1.expect
+SET SESSION debug_dbug="+d,crash_during_online_index_creation";
+--error 2013
+ALTER TABLE t1 ADD INDEX kb(b), ALGORITHM=INPLACE;
+
+--enable_reconnect
+--source include/wait_until_connected_again.inc
+
+SET SESSION debug_dbug="-d,crash_during_online_index_creation";
+
+SHOW CREATE TABLE t1;
+CHECK TABLE t1;
+
+DROP TABLE t1;
+
+#
+# Test crash recovery with partitioned tables
+#
+CREATE TABLE t1 (i INT, j INT, k INT, PRIMARY KEY (i), KEY(j)) ENGINE = ROCKSDB PARTITION BY KEY(i) PARTITIONS 4;
+
+--disable_query_log
+let $max = 100;
+let $i = 1;
+while ($i <= $max) {
+ let $insert = INSERT INTO t1 VALUES ($i, $i, $i);
+ inc $i;
+ eval $insert;
+}
+--enable_query_log
+
+--echo # crash_during_index_creation_partition
+flush logs;
+
+--exec echo "restart" > $MYSQLTEST_VARDIR/tmp/mysqld.1.expect
+SET SESSION debug_dbug="+d,crash_during_index_creation_partition";
+--error 2013
+ALTER TABLE t1 ADD INDEX kij(i,j), ALGORITHM=INPLACE;
+
+--enable_reconnect
+--source include/wait_until_connected_again.inc
+
+SET SESSION debug_dbug="-d,crash_during_index_creation_partition";
+
+SHOW CREATE TABLE t1;
+
+# here, the index numbers should be higher because previously 4 index numbers
+# were allocated for the partitioned table
+ALTER TABLE t1 ADD INDEX kij(i,j), ALGORITHM=INPLACE;
+
+SELECT * FROM t1 ORDER BY i LIMIT 10;
+SELECT COUNT(*) FROM t1;
+
+DROP TABLE t1;
+
+#
+# Test rollback on partitioned tables for inplace alter
+#
+CREATE TABLE t1 (i INT, j INT, k INT, PRIMARY KEY (i), KEY(j)) ENGINE = ROCKSDB PARTITION BY KEY(i) PARTITIONS 4;
+
+--disable_query_log
+let $max = 100;
+let $i = 1;
+while ($i <= $max) {
+ let $insert = INSERT INTO t1 VALUES ($i, $i, $i);
+ inc $i;
+ eval $insert;
+}
+--enable_query_log
+
+--echo # crash_during_index_creation_partition
+flush logs;
+
+SET SESSION debug_dbug="+d,myrocks_simulate_index_create_rollback";
+
+--error 1105
+ALTER TABLE t1 ADD INDEX kij(i,j), ALGORITHM=INPLACE;
+SET SESSION debug_dbug="-d,myrocks_simulate_index_create_rollback";
+SHOW CREATE TABLE t1;
+
+# here, the index numbers should be higher because previously 4 index numbers
+# were allocated for the partitioned table
+ALTER TABLE t1 ADD INDEX kij(i,j), ALGORITHM=INPLACE;
+
+SHOW CREATE TABLE t1;
+SELECT COUNT(*) FROM t1;
+
+DROP TABLE t1;
+
+# Cleanup temporary #sql files. In the future server will remove these
+# automatically but for now we need to do the delete explicit
+
+--disable_query_log
+--disable_result_log
+let $datadir=`select @@datadir`;
+--remove_files_wildcard $datadir/test #sql*
+--enable_result_log
+--enable_query_log
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/add_index_inplace_sstfilewriter.test b/storage/rocksdb/mysql-test/rocksdb/t/add_index_inplace_sstfilewriter.test
new file mode 100644
index 00000000000..61a10a60e7f
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/add_index_inplace_sstfilewriter.test
@@ -0,0 +1,113 @@
+--source include/have_rocksdb.inc
+
+# This test requires ~1.3G of disk space
+--source include/big_test.inc
+
+--disable_warnings
+drop table if exists t1;
+--enable_warnings
+
+# Create a table with a primary key and one secondary key as well as one
+# more column
+CREATE TABLE t1(pk CHAR(5) PRIMARY KEY, a char(30), b char(30)) COLLATE 'latin1_bin';
+
+--let $file = `SELECT CONCAT(@@datadir, "test_loadfile.txt")`
+
+# Create a text file with data to import into the table.
+# The primary key is in sorted order and the secondary keys are randomly generated
+--let ROCKSDB_INFILE = $file
+perl;
+my $fn = $ENV{'ROCKSDB_INFILE'};
+open(my $fh, '>>', $fn) || die "perl open($fn): $!";
+my $max = 300000;
+my @chars = ("A".."Z", "a".."z", "0".."9");
+my @lowerchars = ("a".."z");
+my @powers_of_26 = (26 * 26 * 26 * 26, 26 * 26 * 26, 26 * 26, 26, 1);
+for (my $ii = 0; $ii < $max; $ii++)
+{
+ my $pk;
+ my $tmp = $ii;
+ foreach (@powers_of_26)
+ {
+ $pk .= $lowerchars[$tmp / $_];
+ $tmp = $tmp % $_;
+ }
+
+ my $num = int(rand(25)) + 6;
+ my $a;
+ $a .= $chars[rand(@chars)] for 1..$num;
+
+ $num = int(rand(25)) + 6;
+ my $b;
+ $b .= $chars[rand(@chars)] for 1..$num;
+ print $fh "$pk\t$a\t$b\n";
+}
+close($fh);
+EOF
+
+--file_exists $file
+
+set rocksdb_bulk_load=1;
+set rocksdb_bulk_load_size=10000;
+--disable_query_log
+--echo LOAD DATA INFILE <input_file> INTO TABLE t1;
+eval LOAD DATA INFILE '$file' INTO TABLE t1;
+--enable_query_log
+set rocksdb_bulk_load=0;
+
+# Make sure all the data is there.
+select count(pk) from t1;
+select count(a) from t1;
+select count(b) from t1;
+
+# now do fast secondary index creation
+ALTER TABLE t1 ADD INDEX kb(b), ALGORITHM=INPLACE;
+# disable duplicate index warning
+--disable_warnings
+# now do same index using copy algorithm
+# hitting max row locks (1M)
+set @tmp= @@rocksdb_max_row_locks;
+set session rocksdb_max_row_locks=1000;
+--error ER_GET_ERRMSG
+ALTER TABLE t1 ADD INDEX kb_copy(b), ALGORITHM=COPY;
+set session rocksdb_bulk_load=1;
+ALTER TABLE t1 ADD INDEX kb_copy(b), ALGORITHM=COPY;
+set session rocksdb_bulk_load=0;
+--enable_warnings
+set session rocksdb_max_row_locks=@tmp;
+
+# checksum testing
+SELECT COUNT(*) as c FROM
+(SELECT COALESCE(LOWER(CONV(BIT_XOR(CAST(CRC32(CONCAT_WS('#', `b`, CONCAT(ISNULL(`b`)))) AS UNSIGNED)), 10, 16)), 0) AS crc FROM `t1` FORCE INDEX(`kb`)
+UNION DISTINCT
+SELECT COALESCE(LOWER(CONV(BIT_XOR(CAST(CRC32(CONCAT_WS('#',
+`b`, CONCAT(ISNULL(`b`)))) AS UNSIGNED)), 10, 16)), 0) AS crc FROM `t1` FORCE
+INDEX(`kb_copy`)) as temp;
+
+select count(*) from t1 FORCE INDEX(kb);
+select count(*) from t1 FORCE INDEX(kb_copy);
+select count(*) from t1 FORCE INDEX(PRIMARY);
+
+# drop the index
+ALTER TABLE t1 DROP INDEX kb, ALGORITHM=INPLACE;
+ALTER TABLE t1 DROP INDEX kb_copy, ALGORITHM=INPLACE;
+
+# add two indexes simultaneously
+ALTER TABLE t1 ADD INDEX kb(b), ADD INDEX kab(a,b), ALGORITHM=INPLACE;
+SELECT COUNT(*) FROM t1 FORCE INDEX(kab);
+SELECT COUNT(*) FROM t1 FORCE INDEX(kb);
+SHOW CREATE TABLE t1;
+
+DROP TABLE t1;
+
+# Reverse CF testing, needs to be added to SSTFileWriter in reverse order
+CREATE TABLE t1 (a INT PRIMARY KEY, b INT, KEY kab(a,b)) ENGINE=RocksDB;
+INSERT INTO t1 (a, b) VALUES (1, 5);
+INSERT INTO t1 (a, b) VALUES (2, 6);
+INSERT INTO t1 (a, b) VALUES (3, 7);
+ALTER TABLE t1 DROP INDEX kab, ALGORITHM=INPLACE;
+ALTER TABLE t1 ADD INDEX kb(b) comment 'rev:cf1', ALGORITHM=INPLACE;
+SHOW CREATE TABLE t1;
+SELECT COUNT(*) FROM t1 FORCE INDEX(kb);
+DROP TABLE t1;
+--remove_file $file
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/add_unique_index_inplace.test b/storage/rocksdb/mysql-test/rocksdb/t/add_unique_index_inplace.test
new file mode 100644
index 00000000000..cf0546d182c
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/add_unique_index_inplace.test
@@ -0,0 +1,101 @@
+--source include/have_rocksdb.inc
+--source include/have_debug.inc
+
+--disable_warnings
+drop table if exists t1;
+--enable_warnings
+
+# test adding duplicate value before unique index
+CREATE TABLE t1 (a INT, b INT, PRIMARY KEY ka(a)) ENGINE=RocksDB;
+INSERT INTO t1 (a, b) VALUES (1, 5);
+INSERT INTO t1 (a, b) VALUES (2, 6);
+INSERT INTO t1 (a, b) VALUES (3, 7);
+
+INSERT INTO t1 (a,b) VALUES (4,5);
+
+# should cause error here, duplicate value on b
+--error 1062
+ALTER TABLE t1 ADD UNIQUE INDEX kb(b), ALGORITHM=INPLACE;
+
+SHOW CREATE TABLE t1;
+DROP TABLE t1;
+
+# test dup value AFTER unique index
+CREATE TABLE t1 (a INT, b INT, PRIMARY KEY ka(a)) ENGINE=RocksDB;
+INSERT INTO t1 (a, b) VALUES (1, 5);
+INSERT INTO t1 (a, b) VALUES (2, 6);
+INSERT INTO t1 (a, b) VALUES (3, 7);
+ALTER TABLE t1 ADD UNIQUE INDEX kb(b), ALGORITHM=INPLACE;
+
+# should error here, duplicate value on b
+--error 1062
+INSERT INTO t1 (a,b) VALUES (4,5);
+
+# should succeed
+INSERT INTO t1 (a,b) VALUES (5,8);
+
+SHOW CREATE TABLE t1;
+SELECT * FROM t1 FORCE INDEX(kb);
+DROP TABLE t1;
+
+# test what happens when duplicate nulls exist
+CREATE TABLE t1 (a INT, b INT, PRIMARY KEY ka(a)) ENGINE=RocksDB;
+INSERT INTO t1 (a, b) VALUES (1, 5);
+INSERT INTO t1 (a, b) VALUES (2, NULL);
+INSERT INTO t1 (a, b) VALUES (3, NULL);
+
+# should pass, because in MySQL we allow multiple NULLS in unique key
+ALTER TABLE t1 ADD UNIQUE INDEX kb(b), ALGORITHM=INPLACE;
+INSERT INTO t1 (a, b) VALUES (4, NULL);
+
+SHOW CREATE TABLE t1;
+SELECT COUNT(*) FROM t1 FORCE INDEX(kb);
+DROP TABLE t1;
+
+## test case with multi-part key with nulls
+CREATE TABLE t1 (a INT, b INT, c INT, PRIMARY KEY ka(a)) ENGINE=RocksDB;
+INSERT INTO t1 (a,b,c) VALUES (1,1,NULL);
+INSERT INTO t1 (a,b,c) VALUES (2,1,NULL);
+INSERT INTO t1 (a,b,c) VALUES (3,1,NULL);
+INSERT INTO t1 (a,b,c) VALUES (4,1,5);
+
+# should pass
+ALTER TABLE t1 ADD UNIQUE INDEX kbc(b,c), ALGORITHM=INPLACE;
+
+SHOW CREATE TABLE t1;
+SELECT COUNT(*) FROM t1 FORCE INDEX(kbc);
+DROP TABLE t1;
+
+## test case with table w/ no primary key, and we try to add unique key
+CREATE TABLE t1 (a INT, b INT) ENGINE=RocksDB;
+INSERT INTO t1 (a, b) VALUES (1, 5);
+INSERT INTO t1 (a, b) VALUES (2, 6);
+INSERT INTO t1 (a, b) VALUES (3, 7);
+
+# should succeed, we can add unique index on table w/ no pk
+ALTER TABLE t1 ADD UNIQUE INDEX kb(b);
+INSERT INTO t1 (a, b) VALUES (4, 8);
+--error ER_DUP_ENTRY
+INSERT INTO t1 (a, b) VALUES (5, 5);
+
+SHOW CREATE TABLE t1;
+DROP TABLE t1;
+
+
+# Test changing key with same name and parts from not unique to unique, should
+# fail.
+CREATE TABLE t1 (
+a INT PRIMARY KEY,
+b INT,
+c INT,
+KEY kbc(b,c)) ENGINE = ROCKSDB;
+INSERT INTO t1 (a,b,c) VALUES (1,1,1);
+INSERT INTO t1 (a,b,c) VALUES (2,2,2);
+INSERT INTO t1 (a,b,c) VALUES (3,2,2);
+
+# should cause error here, duplicate value 2-2
+--error 1062
+ALTER TABLE t1 DROP INDEX kbc, ADD UNIQUE INDEX kbc(b,c), ALGORITHM=INPLACE;
+
+DROP TABLE t1;
+
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/allow_no_pk_concurrent_insert.test b/storage/rocksdb/mysql-test/rocksdb/t/allow_no_pk_concurrent_insert.test
new file mode 100644
index 00000000000..8dda4372eb3
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/allow_no_pk_concurrent_insert.test
@@ -0,0 +1,22 @@
+
+#
+# Tests concurrent inserts for tables with no primary key.
+#
+
+--source include/have_rocksdb.inc
+
+--disable_warnings
+drop table if exists t1;
+--enable_warnings
+
+--echo # Binary must be compiled with debug for this test
+--source include/have_debug.inc
+
+# create the actual table
+CREATE TABLE t1 (a INT) ENGINE=rocksdb;
+
+let $exec = python ../storage/rocksdb/mysql-test/rocksdb/t/rocksdb_concurrent_insert.py root 127.0.0.1 $MASTER_MYPORT test t1 100 4;
+exec $exec;
+
+SELECT COUNT(*) from t1;
+DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/allow_no_primary_key.test b/storage/rocksdb/mysql-test/rocksdb/t/allow_no_primary_key.test
new file mode 100644
index 00000000000..5f2a37f235a
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/allow_no_primary_key.test
@@ -0,0 +1,126 @@
+--source include/have_rocksdb.inc
+
+#
+# This test checks some very basic capabilities
+# for tables without primary keys. A hidden pk will be generated under the hood
+# in myrocks. Everything should work here as normal.
+#
+
+--disable_warnings
+DROP TABLE IF EXISTS t1;
+--enable_warnings
+
+# test CREATE
+CREATE TABLE t1 (a INT, b CHAR(8)) ENGINE=rocksdb;
+--source no_primary_key_basic_ops.inc
+DROP TABLE t1;
+
+## test ALTER
+CREATE TABLE t1 (a INT, c CHAR(8)) ENGINE=rocksdb;
+INSERT INTO t1 VALUES (1,'a'),(5,'z');
+ALTER TABLE t1 ADD COLUMN b INT;
+SHOW CREATE TABLE t1;
+
+--sorted_result
+SELECT * FROM t1;
+ALTER TABLE t1 DROP COLUMN b;
+SHOW CREATE TABLE t1;
+--sorted_result
+SELECT * FROM t1;
+DROP TABLE t1;
+
+## test creating a table with primary and then dropping that key
+CREATE TABLE t1 (a INT, b CHAR(8), pk INT AUTO_INCREMENT PRIMARY KEY) ENGINE=rocksdb;
+ALTER TABLE t1 DROP COLUMN pk;
+--source no_primary_key_basic_ops.inc
+DROP TABLE t1;
+
+# test CHECK TABLE
+# CHECK TABLE statements
+#
+# Note: the output is likely to be different for the engine under test,
+# in which case rdiff will be needed. Or, the output might say that
+# the storage engine does not support CHECK.
+#
+
+--disable_warnings
+DROP TABLE IF EXISTS t1,t2;
+--enable_warnings
+
+CREATE TABLE t1 (a INT, b CHAR(8)) ENGINE=rocksdb;
+INSERT INTO t1 (a,b) VALUES (1,'a'),(2,'b');
+
+CREATE TABLE t2 (a INT, b CHAR(8)) ENGINE=rocksdb;
+
+CHECK TABLE t1;
+INSERT INTO t1 (a,b) VALUES (3,'c');
+INSERT INTO t2 (a,b) VALUES (4,'d');
+CHECK TABLE t1, t2 FOR UPGRADE;
+INSERT INTO t2 (a,b) VALUES (5,'e');
+CHECK TABLE t2 QUICK;
+INSERT INTO t1 (a,b) VALUES (6,'f');
+CHECK TABLE t1 FAST;
+INSERT INTO t1 (a,b) VALUES (7,'g');
+INSERT INTO t2 (a,b) VALUES (8,'h');
+CHECK TABLE t2, t1 MEDIUM;
+INSERT INTO t1 (a,b) VALUES (9,'i');
+INSERT INTO t2 (a,b) VALUES (10,'j');
+CHECK TABLE t1, t2 EXTENDED;
+INSERT INTO t1 (a,b) VALUES (11,'k');
+CHECK TABLE t1 CHANGED;
+
+DROP TABLE t1, t2;
+
+# test unique keys with no primary key
+CREATE TABLE t1 (a INT, b CHAR(8), UNIQUE INDEX(a)) ENGINE=rocksdb;
+INSERT INTO t1 (a,b) VALUES (1,'a'),(2,'b');
+--error ER_DUP_ENTRY
+INSERT INTO t1 (a,b) VALUES (1,'c');
+SELECT * FROM t1;
+SELECT * FROM t1 WHERE a = 2;
+EXPLAIN SELECT * FROM t1 WHERE a = 2;
+
+DROP TABLE t1;
+
+## test restarting a table that has no data
+CREATE TABLE t1 (a INT, b CHAR(8)) ENGINE=rocksdb;
+SHOW CREATE TABLE t1;
+SHOW COLUMNS IN t1;
+--source include/restart_mysqld.inc
+
+## single delete statement should remove MULTIPLE rows (aka duplicate rows)
+INSERT INTO t1 (a,b) VALUES (35,'foo');
+INSERT INTO t1 (a,b) VALUES (35,'foo');
+INSERT INTO t1 (a,b) VALUES (36,'foo');
+DELETE FROM t1 WHERE a = 35 AND b = 'foo';
+--sorted_result
+SELECT * FROM t1;
+DROP TABLE t1;
+
+--echo #
+--echo # Issue #834/MDEV-15304 ALTER TABLE table_with_hidden_pk causes Can't
+--echo # write; duplicate key in table error and/or crash
+--echo #
+CREATE TABLE t1 (a INT, KEY(a)) ENGINE=RocksDB;
+INSERT INTO t1 VALUES (1),(1+1);
+create table t2 (a int);
+insert into t2 values (10),(20),(30);
+
+BEGIN;
+select * from t2;
+
+connect (con1,localhost,root,,);
+connection con1;
+alter table t1 force;
+
+connection default;
+select * from t1;
+
+connection con1;
+insert into t1 values (100);
+select * from t1;
+
+disconnect con1;
+connection default;
+rollback;
+drop table t1,t2;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/allow_no_primary_key_with_sk.test b/storage/rocksdb/mysql-test/rocksdb/t/allow_no_primary_key_with_sk.test
new file mode 100644
index 00000000000..18ccf2e39f6
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/allow_no_primary_key_with_sk.test
@@ -0,0 +1,149 @@
+--source include/have_rocksdb.inc
+
+#
+# This test checks some very basic capabilities
+# for tables without primary keys. A hidden pk will be generated under the hood
+# in myrocks. Everything should work here as normal.
+#
+
+--disable_warnings
+DROP TABLE IF EXISTS t1;
+--enable_warnings
+
+## test CREATE with SK
+CREATE TABLE t1 (a INT, b CHAR(8), KEY(a)) ENGINE=rocksdb;
+--source no_primary_key_basic_ops.inc
+DROP TABLE t1;
+
+## test adding/dropping sk w/no pk
+CREATE TABLE t1 (a INT, b CHAR(8)) ENGINE=rocksdb;
+ALTER TABLE t1 ADD INDEX (b);
+--source no_primary_key_basic_ops.inc
+
+ALTER TABLE t1 DROP INDEX b;
+--source no_primary_key_basic_ops.inc
+DROP TABLE t1;
+
+# test dropping pk w/ sk
+CREATE TABLE t1 (a INT, b CHAR(8), pk INT AUTO_INCREMENT PRIMARY KEY) ENGINE=rocksdb;
+ALTER TABLE t1 DROP COLUMN pk;
+--source no_primary_key_basic_ops.inc
+DROP TABLE t1;
+
+--echo #
+--echo # MDEV-4313: RocksDB: Server crashes in Rdb_key_def::setup on dropping the primary key column
+--echo #
+CREATE TABLE t1 (pk INT PRIMARY KEY, i INT NOT NULL, KEY(i)) ENGINE=RocksDB;
+ALTER TABLE t1 DROP COLUMN `pk`;
+DROP TABLE t1;
+
+# create table with multiple sk, make sure it still works
+# test CREATE with SK
+CREATE TABLE t1 (a INT, b CHAR(8), KEY(a), KEY(b)) ENGINE=rocksdb;
+--source no_primary_key_basic_ops.inc
+DROP TABLE t1;
+
+# test CREATE table with multi-part sk
+CREATE TABLE t1 (a INT, b CHAR(8), KEY(a, b)) ENGINE=rocksdb;
+--source no_primary_key_basic_ops.inc
+DROP TABLE t1;
+
+# test CREATE table with more than one sk
+CREATE TABLE t1 (a INT, b CHAR(8), KEY(a), KEY(b)) ENGINE=rocksdb;
+--source no_primary_key_basic_ops.inc
+DROP TABLE t1;
+
+# test check table with sk
+CREATE TABLE t1 (a INT, b CHAR(8), KEY(a)) ENGINE=rocksdb;
+INSERT INTO t1 (a) VALUES (1),(2),(5);
+CHECK TABLE t1;
+INSERT INTO t1 (a) VALUES (6),(8),(12);
+CHECK TABLE t1 FOR UPGRADE;
+INSERT INTO t1 (a) VALUES (13),(15),(16);
+CHECK TABLE t1 QUICK;
+INSERT INTO t1 (a) VALUES (17),(120),(132);
+CHECK TABLE t1 FAST;
+INSERT INTO t1 (a) VALUES (801),(900),(7714);
+CHECK TABLE t1 MEDIUM;
+INSERT INTO t1 (a) VALUES (8760),(10023),(12000);
+CHECK TABLE t1 EXTENDED;
+INSERT INTO t1 (a) VALUES (13345),(24456),(78302),(143028);
+CHECK TABLE t1 CHANGED;
+DROP TABLE t1;
+
+## tables with multi-part secondary indexes + columns that dont belong to any
+## secondary indexes
+CREATE TABLE t1 (a INT, b INT, c INT, d INT, KEY kab(a, b), KEY kbc(b, c), KEY kabc(a,b,c)) ENGINE=rocksdb;
+SHOW CREATE TABLE t1;
+SHOW COLUMNS IN t1;
+
+INSERT INTO t1 (a,b,c,d) VALUES (1,2,3,4);
+INSERT INTO t1 (a,b,c,d) VALUES (5,6,7,8);
+INSERT INTO t1 (a,b,c,d) VALUES (10,11,12,13);
+INSERT INTO t1 (a,b,c,d) VALUES (14,15,16,17);
+
+--sorted_result
+SELECT * FROM t1;
+--sorted_result
+SELECT * FROM t1 WHERE a = 1 OR a = 10;
+--sorted_result
+SELECT * FROM t1 WHERE c = 3 OR d = 17;
+--sorted_result
+SELECT * FROM t1 WHERE a > 5 OR d > 5;
+
+# force some of these selects to use different indexes and/or have the columns
+# being selected also not contain column d
+--sorted_result
+SELECT a, b, c FROM t1 FORCE INDEX (kabc) WHERE a=1 OR b=11;
+--sorted_result
+SELECT d FROM t1 FORCE INDEX (kbc) WHERE b > 6 AND c > 12;
+
+UPDATE t1 SET a=a+100;
+UPDATE t1 SET a=a-100, b=99 WHERE a>100;
+--sorted_result
+SELECT * FROM t1;
+
+DELETE FROM t1 WHERE a>5;
+DELETE FROM t1 WHERE b=99 AND d>4;
+--sorted_result
+SELECT * FROM t1;
+
+TRUNCATE TABLE t1;
+DROP TABLE t1;
+
+## secondary indexes live in reverse column families
+CREATE TABLE t1 (a INT, b CHAR(8), KEY ka(a) comment 'rev:cf1', KEY kb(b)
+comment 'rev:cf1', KEY kab(a,b) comment 'rev:cf2') ENGINE=rocksdb;
+--source no_primary_key_basic_ops.inc
+DROP TABLE t1;
+
+## https://github.com/facebook/mysql-5.6/issues/209
+## Accidental single delete caused data inconsistency
+CREATE TABLE t1 (col1 int, col2 int, KEY kcol1(col1)) ENGINE=ROCKSDB;
+INSERT INTO t1 (col1, col2) values (2,2);
+ALTER TABLE t1 ADD COLUMN extra INT;
+UPDATE t1 SET col2 = 1;
+select * from t1;
+DELETE FROM t1 WHERE col1 = 2;
+
+# flush memtable to cause compaction to occur.
+# During compaction, if a SingleDelete occurs then the delete marker and the
+# key it is deleting are both removed. This will cause data inconsistency if
+# SingleDelete is called on PK, since we do multiple Put() operations to update
+# primary keys.
+set global rocksdb_force_flush_memtable_now = true;
+
+select * from t1;
+DROP TABLE t1;
+
+## https://github.com/facebook/mysql-5.6/issues/736
+create table t1 (i int auto_increment, key(i)) engine=rocksdb;
+insert into t1 values();
+insert into t1 values();
+insert into t1 values();
+
+show create table t1;
+--source include/restart_mysqld.inc
+show create table t1;
+
+drop table t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/allow_to_start_after_corruption-master.opt b/storage/rocksdb/mysql-test/rocksdb/t/allow_to_start_after_corruption-master.opt
new file mode 100644
index 00000000000..70c120604f6
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/allow_to_start_after_corruption-master.opt
@@ -0,0 +1 @@
+--rocksdb_flush_log_at_trx_commit=1
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/allow_to_start_after_corruption.test b/storage/rocksdb/mysql-test/rocksdb/t/allow_to_start_after_corruption.test
new file mode 100644
index 00000000000..67b2d5f96d7
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/allow_to_start_after_corruption.test
@@ -0,0 +1,75 @@
+--source include/have_rocksdb.inc
+--source include/not_valgrind.inc
+
+--echo #
+--echo # Test how MyRocks behaves when RocksDB reports corrupted data.
+--echo #
+
+--source include/have_debug.inc
+
+# use custom error log to assert on error message in search_pattern_in_file.inc
+--let LOG=$MYSQLTEST_VARDIR/tmp/allow_to_start_after_corruption_debug.err
+--let SEARCH_FILE=$LOG
+
+# restart server to change error log and ignore corruptopn on startup
+--let $_mysqld_option=--log-error=$LOG --rocksdb_allow_to_start_after_corruption=1
+--source include/restart_mysqld_with_option.inc
+
+--echo #
+--echo # Test server crashes on corrupted data and restarts
+--echo #
+create table t1 (
+ pk int not null primary key,
+ col1 varchar(10)
+) engine=rocksdb;
+
+insert into t1 values (1,1),(2,2),(3,3);
+
+select * from t1 where pk=1;
+set session debug_dbug= "+d,rocksdb_return_status_corrupted";
+--exec echo "wait" > $MYSQLTEST_VARDIR/tmp/mysqld.1.expect
+--error 2013
+select * from t1 where pk=1;
+--source include/wait_until_disconnected.inc
+--let SEARCH_PATTERN=data corruption detected
+--source include/search_pattern_in_file.inc
+--remove_file $LOG
+
+--echo #
+--echo # The same for scan queries
+--echo #
+
+--source include/start_mysqld_with_option.inc
+select * from t1;
+set session debug_dbug= "+d,rocksdb_return_status_corrupted";
+--exec echo "wait" > $_expect_file_name
+--error 2013
+select * from t1;
+--source include/wait_until_disconnected.inc
+--let SEARCH_PATTERN=data corruption detected
+--source include/search_pattern_in_file.inc
+--remove_file $LOG
+
+--echo #
+--echo # Test restart failure. The server is shutdown at this point.
+--echo #
+
+# remove flag to ignore corruption
+--let $_mysqld_option=--log-error=$LOG
+--error 0
+--exec $MYSQLD_CMD --plugin_load=$HA_ROCKSDB_SO $_mysqld_option
+--let SEARCH_PATTERN=The server will exit normally and stop restart attempts
+--source include/search_pattern_in_file.inc
+--remove_file $LOG
+
+--echo #
+--echo # Remove corruption file and restart cleanly
+--echo #
+
+--exec rm $MYSQLTEST_VARDIR/mysqld.$_server_id/data/#rocksdb/ROCKSDB_CORRUPTED
+--source include/start_mysqld_with_option.inc
+
+drop table t1;
+
+# Restart mysqld with default options
+--source include/restart_mysqld.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/alter_table.test b/storage/rocksdb/mysql-test/rocksdb/t/alter_table.test
new file mode 100644
index 00000000000..2603311da55
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/alter_table.test
@@ -0,0 +1,94 @@
+--source include/have_rocksdb.inc
+
+#
+# Basic ALTER TABLE statements.
+#
+# USAGE of table options in ALTER statements
+# is covered in tbl_standard_opts and tbl_opt*.tests.
+#
+# Index operations are covered in index* tests.
+#
+# ALTER OFFLINE is not covered as it is not supported, as of 5.5.23
+#
+
+--disable_warnings
+DROP TABLE IF EXISTS t1, t2;
+--enable_warnings
+
+CREATE TABLE t1 (pk INT PRIMARY KEY, a INT, c CHAR(8)) ENGINE=rocksdb;
+INSERT INTO t1 VALUES (1,1,'a'),(2,5,'z');
+
+# Column operations
+
+ALTER TABLE t1 ADD COLUMN b INT;
+SHOW CREATE TABLE t1;
+
+ALTER TABLE t1 ALTER COLUMN a SET DEFAULT '0';
+SHOW CREATE TABLE t1;
+
+ALTER TABLE t1 ALTER a DROP DEFAULT;
+SHOW CREATE TABLE t1;
+
+ALTER TABLE t1 CHANGE COLUMN b b1 CHAR(8) FIRST;
+SHOW CREATE TABLE t1;
+
+ALTER TABLE t1 CHANGE b1 b INT AFTER c;
+SHOW CREATE TABLE t1;
+
+ALTER TABLE t1 CHANGE b b CHAR(8);
+SHOW CREATE TABLE t1;
+
+ALTER TABLE t1 MODIFY COLUMN b INT;
+SHOW CREATE TABLE t1;
+
+ALTER TABLE t1 MODIFY COLUMN b CHAR(8) FIRST;
+SHOW CREATE TABLE t1;
+
+ALTER TABLE t1 MODIFY COLUMN b INT AFTER a;
+SHOW CREATE TABLE t1;
+
+ALTER TABLE t1 DROP COLUMN b;
+SHOW CREATE TABLE t1;
+
+
+# Rename table
+
+ALTER TABLE t1 RENAME TO t2;
+--error ER_NO_SUCH_TABLE
+SHOW CREATE TABLE t1;
+SHOW CREATE TABLE t2;
+DROP TABLE t2;
+
+
+# ORDER BY
+CREATE TABLE t1 (pk INT PRIMARY KEY, a INT, b INT) ENGINE=rocksdb;
+INSERT INTO t1 VALUES (1,1,5),(2,2,2),(3,4,3);
+SHOW CREATE TABLE t1;
+
+ALTER TABLE t1 ORDER BY b ASC, a DESC, pk DESC;
+SHOW CREATE TABLE t1;
+SELECT * FROM t1;
+DROP TABLE t1;
+
+
+# Character set, collate
+
+CREATE TABLE t1 (pk INT PRIMARY KEY, a INT, b CHAR(8), c CHAR(8)) ENGINE=rocksdb CHARACTER SET latin1 COLLATE latin1_general_cs;
+INSERT INTO t1 VALUES (1,5,'z','t');
+SHOW CREATE TABLE t1;
+
+ALTER TABLE t1 CONVERT TO CHARACTER SET utf8;
+SHOW CREATE TABLE t1;
+
+ALTER TABLE t1 DEFAULT CHARACTER SET = latin1 COLLATE latin1_general_ci;
+SHOW CREATE TABLE t1;
+
+
+# A 'null' ALTER operation
+
+ALTER TABLE t1 FORCE;
+SHOW CREATE TABLE t1;
+
+DROP TABLE t1;
+
+
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/analyze_table.test b/storage/rocksdb/mysql-test/rocksdb/t/analyze_table.test
new file mode 100644
index 00000000000..b24398b1fe2
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/analyze_table.test
@@ -0,0 +1,57 @@
+--source include/have_rocksdb.inc
+--source include/have_partition.inc
+
+#
+# ANALYZE TABLE statements
+#
+
+--disable_warnings
+DROP TABLE IF EXISTS t1,t2;
+--enable_warnings
+
+CREATE TABLE t1 (pk INT PRIMARY KEY, a INT(11), b CHAR(8)) ENGINE=rocksdb;
+INSERT INTO t1 VALUES (1,1,'a'),(2,2,'b');
+CREATE TABLE t2 (pk INT PRIMARY KEY, a INT(11), b CHAR(8)) ENGINE=rocksdb;
+INSERT INTO t1 VALUES (3,3,'c');
+ANALYZE TABLE t1;
+INSERT INTO t2 VALUES (1,4,'d');
+ANALYZE NO_WRITE_TO_BINLOG TABLE t2;
+INSERT INTO t1 VALUES (4,5,'e');
+INSERT INTO t2 VALUES (2,6,'f');
+ANALYZE LOCAL TABLE t1, t2;
+
+DROP TABLE t1, t2;
+
+ --let $create_definition = a $int_indexed_col, $default_index(a)
+CREATE TABLE t1 (pk INT PRIMARY KEY, a INT(11), KEY(a)) ENGINE=rocksdb;
+INSERT INTO t1 VALUES (1,1),(2,2),(3,4),(4,7);
+ANALYZE TABLE t1;
+INSERT INTO t1 VALUES (5,8),(6,10),(7,11),(8,12);
+ANALYZE TABLE t1;
+DROP TABLE t1;
+
+--echo #
+--echo # MDEV-12465: Server crashes in my_scan_weight_utf8_bin upon
+--echo # collecting stats for RocksDB table
+--echo #
+
+CREATE TABLE t1 (
+ pk INT,
+ f1 CHAR(255),
+ f2 TEXT,
+ f3 VARCHAR(255),
+ f4 TEXT,
+ PRIMARY KEY (pk),
+ KEY (f4(255))
+) ENGINE=RocksDB
+ CHARSET utf8
+ COLLATE utf8_bin
+ PARTITION BY KEY (pk) PARTITIONS 2;
+INSERT INTO t1 VALUES
+(1,'foo','bar','foo','bar'), (2,'bar','foo','bar','foo');
+
+ANALYZE TABLE t1 PERSISTENT FOR ALL;
+
+drop table t1;
+
+--echo # End of 10.2 tests
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/apply_changes_iter.test b/storage/rocksdb/mysql-test/rocksdb/t/apply_changes_iter.test
new file mode 100644
index 00000000000..4f759a8ec60
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/apply_changes_iter.test
@@ -0,0 +1,44 @@
+--source include/have_rocksdb.inc
+
+# Tests the Apply_changes_iter class for walking forward and backwards
+# with data in both the transaction class and in the rocksdb storage layer
+
+--disable_warnings
+DROP TABLE IF EXISTS t1;
+DROP TABLE IF EXISTS t2;
+--enable_warnings
+
+CREATE TABLE t1 (
+ pk INT NOT NULL PRIMARY KEY,
+ key1 INT NOT NULL,
+ KEY (key1)
+) ENGINE=ROCKSDB;
+
+INSERT INTO t1 VALUES (12,12);
+INSERT INTO t1 VALUES (6,6);
+BEGIN;
+INSERT INTO t1 VALUES (8,8), (10,10);
+SELECT * FROM t1 WHERE key1 BETWEEN 4 and 11 ORDER BY KEY1 DESC;
+SELECT * FROM t1 WHERE key1 BETWEEN 4 and 11 ORDER BY KEY1 ASC;
+SELECT * FROM t1 IGNORE INDEX(key1) WHERE key1 BETWEEN 4 and 11 ORDER BY key1 DESC;
+SELECT * FROM t1 IGNORE INDEX(key1) WHERE key1 BETWEEN 4 and 11 ORDER BY key1 ASC;
+ROLLBACK;
+
+CREATE TABLE t2 (
+ pk INT NOT NULL PRIMARY KEY,
+ key1 INT NOT NULL,
+ KEY (key1) COMMENT 'rev:cf'
+) ENGINE=ROCKSDB;
+
+INSERT INTO t2 VALUES (12,12);
+INSERT INTO t2 VALUES (6,6);
+BEGIN;
+INSERT INTO t2 VALUES (8,8), (10,10);
+SELECT * FROM t2 WHERE key1 BETWEEN 4 and 11 ORDER BY KEY1 DESC;
+SELECT * FROM t2 WHERE key1 BETWEEN 4 and 11 ORDER BY KEY1 ASC;
+SELECT * FROM t2 IGNORE INDEX(key1) WHERE key1 BETWEEN 4 and 11 ORDER BY key1 DESC;
+SELECT * FROM t2 IGNORE INDEX(key1) WHERE key1 BETWEEN 4 and 11 ORDER BY key1 ASC;
+ROLLBACK;
+
+DROP TABLE t1;
+DROP TABLE t2;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/autoinc_crash_safe.cnf b/storage/rocksdb/mysql-test/rocksdb/t/autoinc_crash_safe.cnf
new file mode 100644
index 00000000000..a43c4617b96
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/autoinc_crash_safe.cnf
@@ -0,0 +1,8 @@
+!include suite/rpl/my.cnf
+
+[mysqld.1]
+binlog_format=row
+[mysqld.2]
+binlog_format=row
+slave_parallel_workers=1
+#rpl_skip_tx_api=ON
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/autoinc_crash_safe.test b/storage/rocksdb/mysql-test/rocksdb/t/autoinc_crash_safe.test
new file mode 100644
index 00000000000..e61ba720aaf
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/autoinc_crash_safe.test
@@ -0,0 +1,9 @@
+--source include/have_rocksdb.inc
+--source include/have_binlog_format_row.inc
+--source include/master-slave.inc
+
+create table t (i int primary key auto_increment) engine=rocksdb;
+
+--source include/autoinc_crash_safe.inc
+
+--source include/rpl_end.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/autoinc_crash_safe_partition.cnf b/storage/rocksdb/mysql-test/rocksdb/t/autoinc_crash_safe_partition.cnf
new file mode 100644
index 00000000000..0c0b614039e
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/autoinc_crash_safe_partition.cnf
@@ -0,0 +1,8 @@
+!include suite/rpl/my.cnf
+
+[mysqld.1]
+binlog_format=row
+[mysqld.2]
+binlog_format=row
+#slave_parallel_workers=1
+#rpl_skip_tx_api=ON
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/autoinc_crash_safe_partition.test b/storage/rocksdb/mysql-test/rocksdb/t/autoinc_crash_safe_partition.test
new file mode 100644
index 00000000000..56cf93db9d9
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/autoinc_crash_safe_partition.test
@@ -0,0 +1,10 @@
+--source include/have_rocksdb.inc
+--source include/have_binlog_format_row.inc
+--source include/master-slave.inc
+--source include/have_partition.inc
+
+create table t (i int primary key auto_increment) engine=rocksdb partition by key (i) partitions 3;
+
+--source include/autoinc_crash_safe.inc
+
+--source include/rpl_end.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/autoinc_debug-master.opt b/storage/rocksdb/mysql-test/rocksdb/t/autoinc_debug-master.opt
new file mode 100644
index 00000000000..06917181cd6
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/autoinc_debug-master.opt
@@ -0,0 +1 @@
+--binlog-format=row --rocksdb-flush-log-at-trx-commit=1
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/autoinc_debug.test b/storage/rocksdb/mysql-test/rocksdb/t/autoinc_debug.test
new file mode 100644
index 00000000000..b17548063d9
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/autoinc_debug.test
@@ -0,0 +1,119 @@
+--source include/have_rocksdb.inc
+--source include/have_debug.inc
+--source include/have_log_bin.inc
+--source include/not_valgrind.inc
+
+--echo #
+--echo # Testing upgrading from server without merges for auto_increment
+--echo # to new server with such support.
+--echo #
+
+set debug_dbug='+d,myrocks_autoinc_upgrade';
+create table t (i int primary key auto_increment);
+insert into t values ();
+insert into t values ();
+insert into t values ();
+select * from t;
+
+delete from t where i > 1;
+select * from t;
+
+select table_name, index_name, auto_increment
+ from information_schema.rocksdb_ddl where table_name = 't';
+
+set debug_dbug='-d,myrocks_autoinc_upgrade';
+
+--source include/restart_mysqld.inc
+
+insert into t values ();
+insert into t values ();
+insert into t values ();
+select * from t;
+
+select table_name, index_name, auto_increment
+ from information_schema.rocksdb_ddl where table_name = 't';
+
+delete from t where i > 1;
+
+--source include/restart_mysqld.inc
+
+insert into t values ();
+insert into t values ();
+insert into t values ();
+select * from t;
+
+drop table t;
+
+--echo #
+--echo # Testing crash safety of transactions.
+--echo #
+create table t (i int primary key auto_increment);
+insert into t values ();
+insert into t values ();
+insert into t values ();
+
+--echo # Before anything
+begin;
+insert into t values ();
+insert into t values ();
+set debug_dbug="+d,crash_commit_before";
+--exec echo "restart" > $MYSQLTEST_VARDIR/tmp/mysqld.1.expect
+--error 2013
+commit;
+--source include/wait_until_disconnected.inc
+--enable_reconnect
+--exec echo "restart" > $MYSQLTEST_VARDIR/tmp/mysqld.1.expect
+--source include/wait_until_connected_again.inc
+--disable_reconnect
+select max(i) into @row_max from t;
+select table_schema, table_name, auto_increment > @row_max from information_schema.tables where table_name = 't';
+
+--echo # After engine prepare
+begin;
+insert into t values ();
+insert into t values ();
+set debug_dbug="+d,crash_commit_after_prepare";
+--exec echo "restart" > $MYSQLTEST_VARDIR/tmp/mysqld.1.expect
+--error 2013
+commit;
+--source include/wait_until_disconnected.inc
+--enable_reconnect
+--exec echo "restart" > $MYSQLTEST_VARDIR/tmp/mysqld.1.expect
+--source include/wait_until_connected_again.inc
+--disable_reconnect
+select max(i) into @row_max from t;
+select table_schema, table_name, auto_increment > @row_max from information_schema.tables where table_name = 't';
+
+--echo # After binlog
+begin;
+insert into t values ();
+insert into t values ();
+set debug_dbug="+d,crash_commit_after_log";
+--exec echo "restart" > $MYSQLTEST_VARDIR/tmp/mysqld.1.expect
+--error 2013
+commit;
+--source include/wait_until_disconnected.inc
+--enable_reconnect
+--exec echo "restart" > $MYSQLTEST_VARDIR/tmp/mysqld.1.expect
+--source include/wait_until_connected_again.inc
+--disable_reconnect
+select max(i) into @row_max from t;
+select table_schema, table_name, auto_increment > @row_max from information_schema.tables where table_name = 't';
+
+--echo # After everything
+begin;
+insert into t values ();
+insert into t values ();
+set debug_dbug="+d,crash_commit_after";
+--exec echo "restart" > $MYSQLTEST_VARDIR/tmp/mysqld.1.expect
+--error 2013
+commit;
+--source include/wait_until_disconnected.inc
+--enable_reconnect
+--exec echo "restart" > $MYSQLTEST_VARDIR/tmp/mysqld.1.expect
+--source include/wait_until_connected_again.inc
+--disable_reconnect
+select max(i) into @row_max from t;
+select table_schema, table_name, auto_increment > @row_max from information_schema.tables where table_name = 't';
+
+drop table t;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/autoinc_secondary.test b/storage/rocksdb/mysql-test/rocksdb/t/autoinc_secondary.test
new file mode 100644
index 00000000000..68ad21bea1c
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/autoinc_secondary.test
@@ -0,0 +1,16 @@
+--source include/have_rocksdb.inc
+
+--disable_warnings
+DROP TABLE IF EXISTS t1;
+--enable_warnings
+
+CREATE TABLE t1 (pk INT PRIMARY KEY, a INT AUTO_INCREMENT, KEY(a)) ENGINE=rocksdb;
+INSERT INTO t1 (pk) VALUES (3), (2), (1);
+SELECT * FROM t1;
+
+--source include/restart_mysqld.inc
+
+INSERT INTO t1 (pk) VALUES (4);
+SELECT * FROM t1;
+
+DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/autoinc_vars.test b/storage/rocksdb/mysql-test/rocksdb/t/autoinc_vars.test
new file mode 100644
index 00000000000..085324481b8
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/autoinc_vars.test
@@ -0,0 +1,171 @@
+--source include/have_rocksdb.inc
+
+#
+# auto-increment-offset and auto-increment-increment
+#
+
+############################################
+# TODO:
+# This test currently produces wrong result
+# on the line 36 of the result file and further
+# due to bug MySQL:47118.
+# When/if the bug is fixed,
+# the result will need to be updated
+############################################
+
+--disable_warnings
+DROP TABLE IF EXISTS t1;
+--enable_warnings
+
+--echo #---------------------------
+--echo # auto_increment_offset
+--echo #---------------------------
+SET auto_increment_offset = 200;
+
+CREATE TABLE t1 (a INT AUTO_INCREMENT PRIMARY KEY, b CHAR(8)) ENGINE=rocksdb;
+
+# If auto_increment_offset is greater than auto_increment_increment,
+# the offset is ignored
+
+INSERT INTO t1 (a,b) VALUES (NULL,'a'),(NULL,'b'),(NULL,'c');
+SELECT LAST_INSERT_ID();
+SELECT a,b FROM t1 ORDER BY a;
+
+--echo #---------------------------
+--echo # auto_increment_increment
+--echo #---------------------------
+
+SET auto_increment_increment = 300;
+# offset should not be ignored anymore
+
+INSERT INTO t1 (a,b) VALUES (NULL,'d'),(NULL,'e'),(NULL,'f');
+SELECT LAST_INSERT_ID();
+SELECT a,b FROM t1 ORDER BY a;
+
+SET auto_increment_increment = 50;
+INSERT INTO t1 (a,b) VALUES (NULL,'g'),(NULL,'h'),(NULL,'i');
+SELECT LAST_INSERT_ID();
+SELECT a,b FROM t1 ORDER BY a;
+DROP TABLE t1;
+
+
+--echo #---------------------------
+--echo # offset is greater than the max value
+--echo #---------------------------
+
+SET auto_increment_increment = 500;
+SET auto_increment_offset = 300;
+
+CREATE TABLE t1 (a TINYINT AUTO_INCREMENT PRIMARY KEY) ENGINE=rocksdb;
+--echo # In MariaDB, this is an error:
+--error HA_ERR_AUTOINC_ERANGE
+INSERT INTO t1 (a) VALUES (NULL);
+SELECT LAST_INSERT_ID();
+SELECT a FROM t1 ORDER BY a;
+DROP TABLE t1;
+
+--echo #---------------------------
+--echo # test large autoincrement values
+--echo #---------------------------
+
+SET auto_increment_increment = 1;
+SET auto_increment_offset = 1;
+CREATE TABLE t1 (a BIGINT UNSIGNED AUTO_INCREMENT PRIMARY KEY, b CHAR(8)) ENGINE=rocksdb;
+INSERT INTO t1 VALUES (18446744073709551613, 'a');
+SHOW CREATE TABLE t1;
+INSERT INTO t1 VALUES (NULL, 'b');
+SHOW CREATE TABLE t1;
+--error ER_AUTOINC_READ_FAILED
+INSERT INTO t1 VALUES (NULL, 'c');
+SELECT * FROM t1;
+DROP TABLE t1;
+
+SET auto_increment_increment = 300;
+CREATE TABLE t1 (a BIGINT UNSIGNED AUTO_INCREMENT PRIMARY KEY, b CHAR(8)) ENGINE=rocksdb;
+INSERT INTO t1 VALUES (18446744073709551613, 'a');
+SHOW CREATE TABLE t1;
+--error ER_AUTOINC_READ_FAILED
+INSERT INTO t1 VALUES (NULL, 'b');
+SHOW CREATE TABLE t1;
+--error ER_AUTOINC_READ_FAILED
+INSERT INTO t1 VALUES (NULL, 'c');
+SELECT * FROM t1;
+DROP TABLE t1;
+
+SET auto_increment_offset = 200;
+CREATE TABLE t1 (a BIGINT UNSIGNED AUTO_INCREMENT PRIMARY KEY, b CHAR(8)) ENGINE=rocksdb;
+INSERT INTO t1 VALUES (18446744073709551613, 'a');
+SHOW CREATE TABLE t1;
+--error ER_AUTOINC_READ_FAILED
+INSERT INTO t1 VALUES (NULL, 'b');
+SHOW CREATE TABLE t1;
+--error ER_AUTOINC_READ_FAILED
+INSERT INTO t1 VALUES (NULL, 'c');
+SELECT * FROM t1;
+DROP TABLE t1;
+
+--echo #----------------------------------
+--echo # Issue #792 Crash in autoincrement
+--echo #----------------------------------
+
+CREATE TABLE t1(C1 DOUBLE AUTO_INCREMENT KEY,C2 CHAR) ENGINE=ROCKSDB;
+INSERT INTO t1 VALUES(2177,0);
+DROP TABLE t1;
+
+CREATE TABLE t0(c0 BLOB) ENGINE=ROCKSDB;
+INSERT INTO t0 VALUES(0);
+ALTER TABLE t0 AUTO_INCREMENT=0;
+DROP TABLE t0;
+
+--echo #---------------------------------------------------------------
+--echo # MDEV-16703 Assertion failed in load_auto_incr_value_from_index
+--echo #---------------------------------------------------------------
+
+CREATE TABLE t1 (pk INT AUTO_INCREMENT, a INT, PRIMARY KEY(pk)) ENGINE=RocksDB;
+INSERT INTO t1 (a) VALUES (1);
+UPDATE t1 SET pk = 3;
+ALTER TABLE t1 AUTO_INCREMENT 2;
+DROP TABLE t1;
+
+--echo #----------------------------------
+--echo # Issue #792 Crash in autoincrement
+--echo #----------------------------------
+
+CREATE TABLE t1(C1 DOUBLE AUTO_INCREMENT KEY,C2 CHAR) ENGINE=ROCKSDB;
+INSERT INTO t1 VALUES(2177,0);
+DROP TABLE t1;
+
+CREATE TABLE t0(c0 BLOB) ENGINE=ROCKSDB;
+INSERT INTO t0 VALUES(0);
+ALTER TABLE t0 AUTO_INCREMENT=0;
+DROP TABLE t0;
+
+--echo #----------------------------------
+--echo # Issue #869 Crash in autoincrement
+--echo #----------------------------------
+
+CREATE TABLE t1 (pk INT AUTO_INCREMENT, a INT, PRIMARY KEY(pk)) ENGINE=RocksDB;
+INSERT INTO t1 (a) VALUES (1);
+UPDATE t1 SET pk = 3;
+ALTER TABLE t1 AUTO_INCREMENT 2;
+DROP TABLE t1;
+
+--echo #----------------------------------
+--echo # Issue #902 Debug assert in autoincrement with small field type
+--echo #----------------------------------
+
+SET auto_increment_increment=100, auto_increment_offset=10;
+CREATE TABLE t1(i INT AUTO_INCREMENT PRIMARY KEY) ENGINE=ROCKSDB AUTO_INCREMENT=18446744073709551615;
+# ha_rocksdb::get_auto_increment would assert here
+--error ER_AUTOINC_READ_FAILED
+INSERT INTO t1 VALUES (NULL);
+SELECT * FROM t1;
+ALTER TABLE t1 AUTO_INCREMENT=1;
+INSERT INTO t1 VALUES (NULL);
+SELECT * FROM t1;
+ALTER TABLE t1 AUTO_INCREMENT=18446744073709551615;
+# ha_rocksdb::get_auto_increment would assert here
+--error ER_AUTOINC_READ_FAILED
+INSERT INTO t1 VALUES (NULL);
+SELECT * FROM t1;
+DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/autoinc_vars_thread.test b/storage/rocksdb/mysql-test/rocksdb/t/autoinc_vars_thread.test
new file mode 100644
index 00000000000..5a40d28e1c9
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/autoinc_vars_thread.test
@@ -0,0 +1,65 @@
+--source include/have_rocksdb.inc
+--source include/have_debug_sync.inc
+
+--echo #---------------------------
+--echo # two threads inserting simultaneously with increment > 1
+--echo # Issue #390
+--echo #---------------------------
+
+CREATE TABLE t1 (a INT AUTO_INCREMENT PRIMARY KEY) ENGINE=rocksdb;
+
+# Set up connections
+connect (con1, localhost, root,,);
+SET auto_increment_increment = 2;
+SET auto_increment_offset = 1;
+# Insert one row to set up the conditions that caused the original failure
+INSERT INTO t1 VALUES(NULL);
+
+connect (con2, localhost, root,,);
+SET auto_increment_increment = 2;
+SET auto_increment_offset = 1;
+
+connect (con3, localhost, root,,);
+
+# Start each thread on an insert that will block waiting for a signal
+connection con1;
+SET debug_sync='rocksdb.autoinc_vars2 SIGNAL go2';
+SET debug_sync='rocksdb.autoinc_vars SIGNAL parked1 WAIT_FOR go1';
+send INSERT INTO t1 VALUES(NULL);
+
+connection default;
+SET debug_sync='now WAIT_FOR parked1';
+
+connection con2;
+SET debug_sync='rocksdb.autoinc_vars SIGNAL parked2 WAIT_FOR go2';
+send INSERT INTO t1 VALUES(NULL);
+
+# Wait for both threads to be at debug_sync point
+connection default;
+SET debug_sync='now WAIT_FOR parked2';
+
+
+# Signal both threads to continue
+# (In MariaDB, we signal one of them which continues and signals the other)
+send SET debug_sync='now SIGNAL go1';
+connection con3;
+# MariaDB: do nothing here
+
+connection default;
+reap;
+
+connection con1;
+reap;
+
+connection con2;
+reap;
+
+connection default;
+SET debug_sync='RESET';
+
+disconnect con1;
+disconnect con2;
+
+SELECT * FROM t1;
+DROP TABLE t1;
+
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/autoinc_vars_thread_2.test b/storage/rocksdb/mysql-test/rocksdb/t/autoinc_vars_thread_2.test
new file mode 100644
index 00000000000..b64af16411b
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/autoinc_vars_thread_2.test
@@ -0,0 +1,142 @@
+--source include/have_rocksdb.inc
+
+--echo #---------------------------
+--echo # ten threads inserting simultaneously with increment > 1
+--echo # Issue #390
+--echo #---------------------------
+
+# Run 10 simulatenous threads each inserting 10,000 rows
+let $num_threads = 10;
+let $num_rows_per_thread = 100000;
+
+# Create the table with an AUTO_INCREMENT primary key and a separate colum
+# to store which thread created the row
+CREATE TABLE t1 (pk INT AUTO_INCREMENT PRIMARY KEY, thr INT) ENGINE=rocksdb;
+
+# For each thread...
+# 1) set up a connection
+# 2) create a file that can be used for LOAD DATA INFILE ...
+let $i = `SELECT $num_threads`;
+while ($i > 0)
+{
+ dec $i;
+
+ # Set up connection
+ connect (con$i, localhost, root,,);
+
+ # Set up the auto_increment_* variables for each thread
+ eval SET auto_increment_increment = 100;
+ eval SET auto_increment_offset = $i + 1;
+ let $file = `SELECT CONCAT(@@datadir, "test_insert_", $i, ".txt")`;
+
+ # Pass variables into perl
+ let ROCKSDB_INFILE = $file;
+ let ROCKSDB_THREAD = `SELECT $i`;
+ let ROCKSDB_ROWS_PER_THREAD = `SELECT $num_rows_per_thread`;
+
+ # Create a file to load
+ perl;
+ my $fn = $ENV{'ROCKSDB_INFILE'};
+ my $thr = $ENV{'ROCKSDB_THREAD'};
+ my $num = $ENV{'ROCKSDB_ROWS_PER_THREAD'};
+ open(my $fh, '>>', $fn) || die "perl open($fn): $!";
+ binmode $fh;
+ for (my $ii = 0; $ii < $num; $ii++)
+ {
+ print $fh "\\N\t$thr\n"
+ }
+ close($fh);
+ EOF
+}
+
+# For each connection start the LOAD DATA INFILE in the background
+connection default;
+let $i = `SELECT $num_threads`;
+while ($i > 0)
+{
+ dec $i;
+
+ connection con$i;
+ let $file = `SELECT CONCAT(@@datadir, "test_insert_", $i, ".txt")`;
+ --disable_query_log
+ --echo LOAD DATA INFILE <input_file> INTO TABLE t1;
+ send_eval LOAD DATA INFILE '$file' INTO TABLE t1;
+ --enable_query_log
+}
+
+# Reap each connection's background result
+connection default;
+let $i = `SELECT $num_threads`;
+while ($i > 0)
+{
+ dec $i;
+
+ connection con$i;
+ reap;
+}
+
+# Make sure we have the required number of rows
+connection default;
+SELECT COUNT(*) FROM t1;
+SELECT thr, COUNT(pk) FROM t1 GROUP BY thr;
+
+# Cleanup the connection and file used for LOAD DATA INFILE
+let $i = `SELECT $num_threads`;
+while ($i > 0)
+{
+ dec $i;
+
+ disconnect con$i;
+ let $file = `SELECT CONCAT(@@datadir, "test_insert_", "$i", ".txt")`;
+ remove_file $file;
+}
+
+# Validate each row. For each row, the created 'thr' column shows which
+# thread created the row. The pk that was automatically generated should
+# therefore match a certain pattern. For thread 0, the pk should be in
+# the sequence [1, 101, 201, 301, ...]; for thread 1, it should be in the
+# sequence [2, 102, 202, 302, ...], etc. The pk for each row should be
+# smallest value in the sequence for thread 'thr' that is greater than
+# the pk in the previous row.
+let $file = `SELECT CONCAT(@@datadir, "test_export.txt")`;
+--disable_query_log
+--echo SELECT * FROM t1 ORDER BY pk INTO OUTFILE <output_file>;
+eval SELECT * FROM t1 ORDER BY pk INTO OUTFILE "$file";
+--enable_query_log
+
+let ROCKSDB_OUTFILE = $file;
+
+perl;
+my $fn = $ENV{'ROCKSDB_OUTFILE'};
+my $last_pk = 0;
+open(my $fh, '<', $fn) || die "perl open($fn): $!";
+while (<$fh>)
+{
+ if ($_ =~ m/^(.*)\t(.*)$/)
+ {
+ my $pk = $1;
+ my $thr = $2;
+
+ my $expected_pk = int($last_pk / 100) * 100 + ($thr + 1);
+ $expected_pk += 100 if $expected_pk <= $last_pk;
+
+ if ($expected_pk != $pk)
+ {
+ die "Incorrect next pk ($pk); expected $expected_pk (previous: $last_pk)"
+ }
+
+ $last_pk = $pk;
+ }
+ else
+ {
+ die "output file has incorrect format: $_";
+ }
+}
+print stdout "All pk values matched their expected values\n";
+EOF
+
+remove_file $file;
+
+# Drop the table to finally clean up
+DROP TABLE t1;
+
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/blind_delete_rc.cnf b/storage/rocksdb/mysql-test/rocksdb/t/blind_delete_rc.cnf
new file mode 100644
index 00000000000..a76f1244bab
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/blind_delete_rc.cnf
@@ -0,0 +1,11 @@
+!include suite/rpl/my.cnf
+
+[mysqld.1]
+sync_binlog=0
+binlog_format=row
+slave-exec-mode=strict
+
+[mysqld.2]
+sync_binlog=0
+binlog_format=row
+slave-exec-mode=strict
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/blind_delete_rc.test b/storage/rocksdb/mysql-test/rocksdb/t/blind_delete_rc.test
new file mode 100644
index 00000000000..9b5c4571c19
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/blind_delete_rc.test
@@ -0,0 +1,3 @@
+let $trx_isolation = READ COMMITTED;
+--source blind_delete_without_tx_api.inc
+
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/blind_delete_rr.cnf b/storage/rocksdb/mysql-test/rocksdb/t/blind_delete_rr.cnf
new file mode 100644
index 00000000000..a76f1244bab
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/blind_delete_rr.cnf
@@ -0,0 +1,11 @@
+!include suite/rpl/my.cnf
+
+[mysqld.1]
+sync_binlog=0
+binlog_format=row
+slave-exec-mode=strict
+
+[mysqld.2]
+sync_binlog=0
+binlog_format=row
+slave-exec-mode=strict
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/blind_delete_rr.test b/storage/rocksdb/mysql-test/rocksdb/t/blind_delete_rr.test
new file mode 100644
index 00000000000..4369f6baa62
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/blind_delete_rr.test
@@ -0,0 +1,3 @@
+let $trx_isolation = REPEATABLE READ;
+--source blind_delete_without_tx_api.inc
+
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/blind_delete_without_tx_api.inc b/storage/rocksdb/mysql-test/rocksdb/t/blind_delete_without_tx_api.inc
new file mode 100644
index 00000000000..4f03695bf02
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/blind_delete_without_tx_api.inc
@@ -0,0 +1,132 @@
+--source include/have_rocksdb.inc
+--source include/have_binlog_format_row.inc
+
+source include/master-slave.inc;
+
+connection master;
+
+eval SET SESSION TRANSACTION ISOLATION LEVEL $trx_isolation;
+set @save_rocksdb_blind_delete_primary_key=@@session.rocksdb_blind_delete_primary_key;
+set @save_rocksdb_master_skip_tx_api=@@session.rocksdb_master_skip_tx_api;
+
+--disable_warnings
+DROP TABLE IF EXISTS t1,t2;
+--enable_warnings
+create table t1 (id int primary key, value int, value2 varchar(200)) engine=rocksdb;
+create table t2 (id int primary key, value int, value2 varchar(200), index(value)) engine=rocksdb;
+
+--disable_query_log
+let $t = 1;
+while ($t <= 2) {
+ let $i = 1;
+ while ($i <= 10000) {
+ let $insert = INSERT INTO t$t VALUES($i, $i, REPEAT('x', 150));
+ inc $i;
+ eval $insert;
+ }
+ inc $t;
+}
+--enable_query_log
+
+SET session rocksdb_blind_delete_primary_key=1;
+select variable_value into @c from information_schema.global_status where variable_name='rocksdb_rows_deleted_blind';
+# Deleting 1000 rows from t1
+--disable_query_log
+let $i = 1;
+while ($i <= 1000) {
+ let $insert = DELETE FROM t1 WHERE id=$i;
+ inc $i;
+ eval $insert;
+}
+--enable_query_log
+select variable_value-@c from information_schema.global_status where variable_name='rocksdb_rows_deleted_blind';
+SELECT count(*) FROM t1;
+
+--source include/sync_slave_sql_with_master.inc
+connection slave;
+eval SET SESSION TRANSACTION ISOLATION LEVEL $trx_isolation;
+SELECT count(*) FROM t1;
+connection master;
+
+# Deleting 1000 rows from t2 (blind delete disabled because of secondary key)
+select variable_value into @c from information_schema.global_status where variable_name='rocksdb_rows_deleted_blind';
+--disable_query_log
+let $i = 1;
+while ($i <= 1000) {
+ let $insert = DELETE FROM t2 WHERE id=$i;
+ inc $i;
+ eval $insert;
+}
+--enable_query_log
+select variable_value-@c from information_schema.global_status where variable_name='rocksdb_rows_deleted_blind';
+SELECT count(*) FROM t2;
+
+SET session rocksdb_master_skip_tx_api=1;
+
+select variable_value into @c from information_schema.global_status where variable_name='rocksdb_rows_deleted_blind';
+--disable_query_log
+let $t = 1;
+while ($t <= 2) {
+ let $i = 1001;
+ while ($i <= 2000) {
+ let $insert = DELETE FROM t$t WHERE id=$i;
+ inc $i;
+ eval $insert;
+ }
+ inc $t;
+}
+--enable_query_log
+select variable_value-@c from information_schema.global_status where variable_name='rocksdb_rows_deleted_blind';
+SELECT count(*) FROM t1;
+SELECT count(*) FROM t2;
+--source include/sync_slave_sql_with_master.inc
+connection slave;
+SELECT count(*) FROM t1;
+SELECT count(*) FROM t2;
+connection master;
+
+
+# Range Deletes (blind delete disabled)
+select variable_value into @c from information_schema.global_status where variable_name='rocksdb_rows_deleted_blind';
+DELETE FROM t1 WHERE id BETWEEN 3001 AND 4000;
+DELETE FROM t2 WHERE id BETWEEN 3001 AND 4000;
+select variable_value-@c from information_schema.global_status where variable_name='rocksdb_rows_deleted_blind';
+SELECT count(*) FROM t1;
+SELECT count(*) FROM t2;
+--source include/sync_slave_sql_with_master.inc
+connection slave;
+SELECT count(*) FROM t1;
+SELECT count(*) FROM t2;
+connection master;
+
+
+# Deleting same keys (slaves stop)
+DELETE FROM t1 WHERE id = 10;
+SELECT count(*) FROM t1;
+connection slave;
+call mtr.add_suppression("Slave SQL.*Could not execute Delete_rows event on table test.t1.*Error_code.*");
+call mtr.add_suppression("Slave: Can't find record in 't1'.*");
+# wait until we have the expected error
+--let $slave_sql_errno= convert_error(ER_KEY_NOT_FOUND)
+--source include/wait_for_slave_sql_error.inc
+
+connection slave;
+set @save_rocksdb_read_free_rpl=@@global.rocksdb_read_free_rpl;
+set global rocksdb_read_free_rpl=PK_SK;
+START SLAVE;
+connection master;
+--source include/sync_slave_sql_with_master.inc
+connection slave;
+SELECT count(*) FROM t1;
+connection master;
+
+
+# cleanup
+connection slave;
+set global rocksdb_read_free_rpl=@save_rocksdb_read_free_rpl;
+connection master;
+SET session rocksdb_blind_delete_primary_key=@save_rocksdb_blind_delete_primary_key;
+SET session rocksdb_master_skip_tx_api=@save_rocksdb_master_skip_tx_api;
+
+DROP TABLE t1, t2;
+--source include/rpl_end.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/bloomfilter-master.opt b/storage/rocksdb/mysql-test/rocksdb/t/bloomfilter-master.opt
new file mode 100644
index 00000000000..bf9d0624522
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/bloomfilter-master.opt
@@ -0,0 +1,2 @@
+--rocksdb_default_cf_options=write_buffer_size=256k;block_based_table_factory={filter_policy=bloomfilter:10:false;whole_key_filtering=0;};prefix_extractor=capped:20
+--rocksdb_override_cf_options=cf_short_prefix={prefix_extractor=capped:4};rev:cf_short_prefix={prefix_extractor=capped:4};cf_long_prefix={prefix_extractor=capped:240};rev:cf_long_prefix={prefix_extractor=capped:240}
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/bloomfilter.inc b/storage/rocksdb/mysql-test/rocksdb/t/bloomfilter.inc
new file mode 100644
index 00000000000..1fb9ef8a0e7
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/bloomfilter.inc
@@ -0,0 +1,78 @@
+--source include/have_rocksdb.inc
+
+DELIMITER //;
+CREATE PROCEDURE bloom_start()
+BEGIN
+ select variable_value into @c from information_schema.global_status where variable_name='rocksdb_bloom_filter_prefix_checked';
+ select variable_value into @u from information_schema.global_status where variable_name='rocksdb_bloom_filter_prefix_useful';
+END//
+CREATE PROCEDURE bloom_end()
+BEGIN
+select case when variable_value-@c > 0 then 'true' else 'false' end as checked from information_schema.global_status where variable_name='rocksdb_bloom_filter_prefix_checked';
+END//
+DELIMITER ;//
+
+#BF is sometimes invoked and useful
+--let $CF=
+--source bloomfilter_table_def.inc
+--source bloomfilter_load_select.inc
+
+#BF is always invoked but not useful at all
+--let $CF=COMMENT 'cf_short_prefix'
+--source bloomfilter_table_def.inc
+--source bloomfilter_load_select.inc
+
+--let $CF=COMMENT 'rev:cf_short_prefix'
+--source bloomfilter_table_def.inc
+--source bloomfilter_load_select.inc
+
+#BF is most of the time invoked and useful
+--let $CF=COMMENT 'cf_long_prefix'
+--source bloomfilter_table_def.inc
+--source bloomfilter_load_select.inc
+
+--let $CF=COMMENT 'rev:cf_long_prefix'
+--source bloomfilter_table_def.inc
+--source bloomfilter_load_select.inc
+
+
+# BUG: Prev() with prefix lookup should not use prefix bloom filter
+create table r1 (id1 bigint, id2 bigint, id3 bigint, v1 int, v2 text, primary key (id1, id2, id3)) engine=rocksdb DEFAULT CHARSET=latin1 collate latin1_bin;
+--disable_query_log
+let $max = 100;
+let $i = 1;
+
+create table t10(a int primary key);
+insert into t10 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
+
+create table t11(a int primary key);
+insert into t11 select A.a + B.a* 10 + C.a * 100 from t10 A, t10 B, t10 C;
+
+set @a=0;
+set rocksdb_bulk_load=1;
+while ($i <= $max) {
+ inc $i;
+ eval insert into r1 select @a:=@a+1, @a, @a, @a, @a from t11;
+}
+set rocksdb_bulk_load=0;
+
+drop table t10, t11;
+
+--enable_query_log
+call bloom_start();
+select * from r1 where id1=1 and id2 in (1) order by id3 asc;
+call bloom_end();
+call bloom_start();
+select * from r1 where id1=1 and id2 in (1) order by id3 desc;
+call bloom_end();
+
+# cleanup
+DROP PROCEDURE bloom_start;
+DROP PROCEDURE bloom_end;
+truncate table t1;
+optimize table t1;
+truncate table t2;
+optimize table t2;
+drop table if exists t1;
+drop table if exists t2;
+drop table if exists r1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/bloomfilter.test b/storage/rocksdb/mysql-test/rocksdb/t/bloomfilter.test
new file mode 100644
index 00000000000..efcf9ee1f73
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/bloomfilter.test
@@ -0,0 +1 @@
+--source bloomfilter.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/bloomfilter2-master.opt b/storage/rocksdb/mysql-test/rocksdb/t/bloomfilter2-master.opt
new file mode 100644
index 00000000000..f3824106b25
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/bloomfilter2-master.opt
@@ -0,0 +1 @@
+--rocksdb_default_cf_options=write_buffer_size=64k;block_based_table_factory={filter_policy=bloomfilter:10:false;whole_key_filtering=0;};prefix_extractor=capped:24
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/bloomfilter2.test b/storage/rocksdb/mysql-test/rocksdb/t/bloomfilter2.test
new file mode 100644
index 00000000000..c4f1570ec41
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/bloomfilter2.test
@@ -0,0 +1,103 @@
+--source include/have_rocksdb.inc
+
+## Test 0: Eq cond len includs VARCHAR, and real cond len < prefix bloom len < VARCHAR definition len
+CREATE TABLE t0 (id1 VARCHAR(30), id2 INT, value INT, PRIMARY KEY (id1, id2)) ENGINE=rocksdb collate latin1_bin;
+--disable_query_log
+let $i = 1;
+while ($i <= 10000) {
+ let $insert = INSERT INTO t0 VALUES('X', $i, $i);
+ inc $i;
+ eval $insert;
+}
+--enable_query_log
+
+# BF not used
+select variable_value into @u from information_schema.global_status where variable_name='rocksdb_bloom_filter_prefix_useful';
+SELECT COUNT(*) FROM t0 WHERE id1='X' AND id2>=1;
+select case when variable_value-@u = 0 then 'true' else 'false' end from information_schema.global_status where variable_name='rocksdb_bloom_filter_prefix_useful';
+
+DROP TABLE t0;
+
+
+## Test 1: Eq cond len is shorter than prefix bloom len
+CREATE TABLE t1 (id1 BIGINT, id2 INT, id3 BIGINT, value INT, PRIMARY KEY (id1, id2, id3)) ENGINE=rocksdb;
+
+--disable_query_log
+let $i = 1;
+while ($i <= 10000) {
+ let $insert = INSERT INTO t1 VALUES(1, 1, $i, $i);
+ eval $insert;
+ inc $i;
+}
+--enable_query_log
+
+# BF not used (4+8+4=16)
+select variable_value into @u from information_schema.global_status where variable_name='rocksdb_bloom_filter_prefix_useful';
+SELECT COUNT(*) FROM t1 WHERE id1=1 AND id2=1 AND id3>=2;
+select case when variable_value-@u = 0 then 'true' else 'false' end from information_schema.global_status where variable_name='rocksdb_bloom_filter_prefix_useful';
+
+# BF not used (4+8=12)
+select variable_value into @u from information_schema.global_status where variable_name='rocksdb_bloom_filter_prefix_useful';
+SELECT COUNT(*) FROM t1 WHERE id1=1 AND id2>=1 AND id3>=2;
+select case when variable_value-@u = 0 then 'true' else 'false' end from information_schema.global_status where variable_name='rocksdb_bloom_filter_prefix_useful';
+
+DROP TABLE t1;
+
+
+## Test 2: Long IN and short IN (varchar) -- can_use_bloom_filter changes within the same query
+CREATE TABLE t2 (id1 INT, id2 VARCHAR(100), id3 BIGINT, value INT, PRIMARY KEY (id1, id2, id3)) ENGINE=rocksdb collate latin1_bin;
+--disable_query_log
+let $i = 1;
+while ($i <= 10000) {
+ let $insert = INSERT INTO t2 VALUES($i, $i, $i, $i);
+ inc $i;
+ eval $insert;
+}
+--enable_query_log
+
+# BF used for large cond, not used for short cond
+select variable_value into @u from information_schema.global_status where variable_name='rocksdb_bloom_filter_prefix_useful';
+select count(*) from t2 WHERE id1=100 and id2 IN ('00000000000000000000', '100');
+select case when variable_value-@u > 0 then 'true' else 'false' end from information_schema.global_status where variable_name='rocksdb_bloom_filter_prefix_useful';
+
+select variable_value into @u from information_schema.global_status where variable_name='rocksdb_bloom_filter_prefix_useful';
+select count(*) from t2 WHERE id1=200 and id2 IN ('00000000000000000000', '200');
+select case when variable_value-@u > 0 then 'true' else 'false' end from information_schema.global_status where variable_name='rocksdb_bloom_filter_prefix_useful';
+
+# BF not used because cond length is too small in all cases
+select variable_value into @u from information_schema.global_status where variable_name='rocksdb_bloom_filter_prefix_useful';
+select count(*) from t2 WHERE id1=200 and id2 IN ('3', '200');
+select case when variable_value-@u = 0 then 'true' else 'false' end from information_schema.global_status where variable_name='rocksdb_bloom_filter_prefix_useful';
+
+DROP TABLE t2;
+
+
+## Test 3: Eq cond len is longer than prefix bloom len
+CREATE TABLE t3 (id1 BIGINT, id2 BIGINT, id3 BIGINT, id4 BIGINT, PRIMARY KEY (id1, id2, id3, id4)) ENGINE=rocksdb collate latin1_bin;
+--disable_query_log
+let $i = 1;
+while ($i <= 10000) {
+ if ($i != 5000) {
+ let $insert = INSERT INTO t3 VALUES(1, $i, $i, $i);
+ eval $insert;
+ }
+ inc $i;
+}
+--enable_query_log
+
+# Full BF works with Get(), Block based does not.
+select variable_value into @u from information_schema.global_status where variable_name='rocksdb_bloom_filter_useful';
+SELECT COUNT(*) FROM t3 WHERE id1=1 AND id2=5000 AND id3=1 AND id4=1;
+select case when variable_value-@u > 0 then 'true' else 'false' end from information_schema.global_status where variable_name='rocksdb_bloom_filter_useful';
+
+# BF used (4+8+8+8)
+select variable_value into @u from information_schema.global_status where variable_name='rocksdb_bloom_filter_prefix_useful';
+SELECT COUNT(*) FROM t3 WHERE id1=1 AND id2=1 AND id3=1;
+select case when variable_value-@u > 0 then 'true' else 'false' end from information_schema.global_status where variable_name='rocksdb_bloom_filter_prefix_useful';
+
+select variable_value into @u from information_schema.global_status where variable_name='rocksdb_bloom_filter_prefix_useful';
+SELECT COUNT(*) FROM t3 WHERE id1=1 AND id2=1 AND id3=1 AND id4 <= 500;
+select case when variable_value-@u > 0 then 'true' else 'false' end from information_schema.global_status where variable_name='rocksdb_bloom_filter_prefix_useful';
+
+DROP TABLE t3;
+
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/bloomfilter3-master.opt b/storage/rocksdb/mysql-test/rocksdb/t/bloomfilter3-master.opt
new file mode 100644
index 00000000000..a21608c7c1d
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/bloomfilter3-master.opt
@@ -0,0 +1,4 @@
+--rocksdb_default_cf_options=write_buffer_size=64k;block_based_table_factory={filter_policy=bloomfilter:10:false;whole_key_filtering=0;};prefix_extractor=capped:20
+--rocksdb_debug_optimizer_n_rows=1000
+--rocksdb_table_stats_sampling_pct=100
+--rocksdb_info_log_level=debug_level
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/bloomfilter3.test b/storage/rocksdb/mysql-test/rocksdb/t/bloomfilter3.test
new file mode 100644
index 00000000000..dc2a0da506d
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/bloomfilter3.test
@@ -0,0 +1,136 @@
+--source include/have_rocksdb.inc
+
+--source include/restart_mysqld.inc
+CREATE TABLE `linktable` (
+ `id1` bigint(20) unsigned NOT NULL DEFAULT '0',
+ `id1_type` int(10) unsigned NOT NULL DEFAULT '0',
+ `id2` bigint(20) unsigned NOT NULL DEFAULT '0',
+ `id2_type` int(10) unsigned NOT NULL DEFAULT '0',
+ `link_type` bigint(20) unsigned NOT NULL DEFAULT '0',
+ `visibility` tinyint(3) NOT NULL DEFAULT '0',
+ `data` varchar(255) NOT NULL DEFAULT '',
+ `time` bigint(20) unsigned NOT NULL DEFAULT '0',
+ `version` int(11) unsigned NOT NULL DEFAULT '0',
+ PRIMARY KEY (link_type, `id1`,`id2`) COMMENT 'cf_link_pk',
+ KEY `id1_type` (`id1`,`link_type`,`visibility`,`time`,`version`,`data`) COMMENT 'rev:cf_link_id1_type',
+ KEY `id1_type2` (`id1`,`link_type`,`time`,`version`,`data`,`visibility`) COMMENT 'rev:cf_link_id1_type2',
+ KEY `id1_type3` (`id1`,`visibility`,`time`,`version`,`data`,`link_type`) COMMENT 'rev:cf_link_id1_type3'
+) ENGINE=RocksDB DEFAULT COLLATE=latin1_bin;
+
+--disable_query_log
+call mtr.add_suppression("LibRocksDB");
+let $i = 1;
+while ($i <= 10000) {
+ let $insert = INSERT INTO linktable VALUES($i, $i, $i, $i, 1, 1, $i, $i, $i);
+ eval $insert;
+ inc $i;
+}
+--enable_query_log
+
+## HA_READ_PREFIX_LAST_OR_PREV
+# BF len 21
+select variable_value into @c from information_schema.global_status where variable_name='rocksdb_bloom_filter_prefix_checked';
+select id1, id2, link_type, visibility, data, time, version from linktable FORCE INDEX(`id1_type`) where id1 = 100 and link_type = 1 and time >= 0 and time <= 9223372036854775807 and visibility = 1 order by time desc;
+select case when variable_value-@c > 0 then 'true' else 'false' end from information_schema.global_status where variable_name='rocksdb_bloom_filter_prefix_checked';
+
+# BF len 20
+
+--echo # MariaDB: we don't have optimizer_force_index_for_range, but we can use EITS
+--echo # to get the query plan we want.
+set @tmp_use_stat_tables= @@use_stat_tables;
+set use_stat_tables='preferably';
+analyze table linktable persistent for all;
+flush tables;
+explain select * from linktable;
+--echo # This must use range(id1_type2), key_len=24
+explain
+select id1, id2, link_type, visibility, data, time, version from linktable
+FORCE INDEX(`id1_type2`) where id1 = 100 and link_type = 1 and time >= 0 and time <= 9223372036854775807 order by time desc;
+
+select variable_value into @c from information_schema.global_status where variable_name='rocksdb_bloom_filter_prefix_checked';
+# MariaDB: no support for optimizer_force_index_for_range:
+#set @tmp_force_index_for_range=@@optimizer_force_index_for_range;
+#set optimizer_force_index_for_range=on;
+select id1, id2, link_type, visibility, data, time, version from linktable FORCE INDEX(`id1_type2`) where id1 = 100 and link_type = 1 and time >= 0 and time <= 9223372036854775807 order by time desc;
+select case when variable_value-@c > 0 then 'true' else 'false' end from information_schema.global_status where variable_name='rocksdb_bloom_filter_prefix_checked';
+#set global optimizer_force_index_for_range=@tmp_force_index_for_range;
+
+# BF len 13
+select variable_value into @c from information_schema.global_status where variable_name='rocksdb_bloom_filter_prefix_checked';
+select id1, id2, link_type, visibility, data, time, version from linktable FORCE INDEX(`id1_type3`) where id1 = 100 and time >= 0 and time <= 9223372036854775807 and visibility = 1 order by time desc;
+select case when variable_value-@c = 0 then 'true' else 'false' end from information_schema.global_status where variable_name='rocksdb_bloom_filter_prefix_checked';
+
+## HA_READ_PREFIX_LAST_OR_PREV (no end range)
+# BF len 20
+select variable_value into @c from information_schema.global_status where variable_name='rocksdb_bloom_filter_prefix_checked';
+select id1, id2, link_type, visibility, data, time, version from linktable FORCE INDEX(`id1_type`) where id1 = 100 and link_type = 1 and visibility = 1 and time >= 0 order by time desc;
+select case when variable_value-@c > 0 then 'true' else 'false' end from information_schema.global_status where variable_name='rocksdb_bloom_filter_prefix_checked';
+
+# BF len 19
+select variable_value into @c from information_schema.global_status where variable_name='rocksdb_bloom_filter_prefix_checked';
+select id1, id2, link_type, visibility, data, time, version from linktable FORCE INDEX(`id1_type2`) where id1 = 100 and link_type = 1 and time >= 0 order by time desc;
+select case when variable_value-@c = 0 then 'true' else 'false' end from information_schema.global_status where variable_name='rocksdb_bloom_filter_prefix_checked';
+
+--echo ## HA_READ_PREFIX_LAST
+--echo # BF len 20
+select variable_value into @c from information_schema.global_status where variable_name='rocksdb_bloom_filter_prefix_checked';
+select id1, id2, link_type, visibility, data, time, version from linktable FORCE INDEX(`id1_type`) where id1 = 100 and link_type = 1 and visibility = 1 order by time desc;
+select case when variable_value-@c > 0 then 'true' else 'false' end from information_schema.global_status where variable_name='rocksdb_bloom_filter_prefix_checked';
+
+--echo # BF len 19
+select variable_value into @c from information_schema.global_status where variable_name='rocksdb_bloom_filter_prefix_checked';
+select id1, id2, link_type, visibility, data, time, version from linktable FORCE INDEX(`id1_type2`) where id1 = 100 and link_type = 1 order by time desc;
+select case when variable_value-@c = 0 then 'true' else 'false' end from information_schema.global_status where variable_name='rocksdb_bloom_filter_prefix_checked';
+
+--echo # BF len 12
+select variable_value into @c from information_schema.global_status where variable_name='rocksdb_bloom_filter_prefix_checked';
+select id1, id2, link_type, visibility, data, time, version from linktable FORCE INDEX(`id1_type3`) where id1 = 100 and visibility = 1 order by time desc;
+select case when variable_value-@c = 0 then 'true' else 'false' end from information_schema.global_status where variable_name='rocksdb_bloom_filter_prefix_checked';
+
+
+DROP TABLE linktable;
+--source include/restart_mysqld.inc
+
+--echo #
+--echo # bloom filter prefix is 20 byte
+--echo # Create a key which is longer than that, so that we see that
+--echo # eq_cond_len= slice.size() - 1;
+--echo # doesnt work.
+--echo #
+--echo # indexnr 4
+--echo # kp0 + 4 = 8
+--echo # kp1 + 8 = 16
+--echo # kp2 + 8 = 24 24>20 byte length prefix
+--echo # kp3 + 8 = 28
+
+create table t1 (
+ pk int primary key,
+ kp0 int not null,
+ kp1 bigint not null,
+ kp2 bigint not null,
+ kp3 bigint not null,
+ key kp12(kp0, kp1, kp2, kp3) comment 'rev:x1'
+) engine=rocksdb;
+
+insert into t1 values (1, 1,1, 1,1);
+insert into t1 values (10,1,1,0x12FFFFFFFFFF,1);
+insert into t1 values (11,1,1,0x12FFFFFFFFFF,1);
+insert into t1 values (20,2,2,0x12FFFFFFFFFF,1);
+insert into t1 values (21,2,2,0x12FFFFFFFFFF,1);
+
+--source include/restart_mysqld.inc
+
+--replace_column 9 #
+explain
+select * from t1 where kp0=1 and kp1=1 and kp2=0x12FFFFFFFFFF order by kp3 desc;
+show status like '%rocksdb_bloom_filter_prefix%';
+
+select variable_value into @c from information_schema.global_status where variable_name='rocksdb_bloom_filter_prefix_checked';
+select * from t1 where kp0=1 and kp1=1 and kp2=0x12FFFFFFFFFF order by kp3 desc;
+show status like '%rocksdb_bloom_filter_prefix%';
+--echo # The following MUST show TRUE:
+select case when variable_value-@c = 0 then 'true' else 'false' end from information_schema.global_status where variable_name='rocksdb_bloom_filter_prefix_checked';
+
+drop table t1;
+# Key length is 4 + 8 + 8 = 20
+
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/bloomfilter4-master.opt b/storage/rocksdb/mysql-test/rocksdb/t/bloomfilter4-master.opt
new file mode 100644
index 00000000000..0a325757962
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/bloomfilter4-master.opt
@@ -0,0 +1 @@
+--rocksdb_default_cf_options=write_buffer_size=16k;block_based_table_factory={filter_policy=bloomfilter:10:false;whole_key_filtering=0;};prefix_extractor=capped:12
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/bloomfilter4.test b/storage/rocksdb/mysql-test/rocksdb/t/bloomfilter4.test
new file mode 100644
index 00000000000..76ec6ca101f
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/bloomfilter4.test
@@ -0,0 +1,52 @@
+--source include/have_rocksdb.inc
+
+# Fixing issue#230 -- Prefix bloom filter + reverse column family misses some rows
+# This test inserts 20,000 rows into t1, then selecting one by one from stored procedure.
+# If the select does not return any row, it is wrong.
+
+CREATE TABLE t1 (
+ `id1` int unsigned NOT NULL DEFAULT '0',
+ `id2` int unsigned NOT NULL DEFAULT '0',
+ `link_type` int unsigned NOT NULL DEFAULT '0',
+ `visibility` tinyint NOT NULL DEFAULT '0',
+ `data` varchar(255) NOT NULL DEFAULT '',
+ `time` int unsigned NOT NULL DEFAULT '0',
+ `version` int unsigned NOT NULL DEFAULT '0',
+ PRIMARY KEY (id1, link_type, visibility, id2) COMMENT 'rev:cf_link_pk'
+) ENGINE=RocksDB DEFAULT COLLATE=latin1_bin;
+
+DELIMITER //;
+CREATE PROCEDURE select_test()
+BEGIN
+ DECLARE id1_cond INT;
+ SET id1_cond = 1;
+ WHILE id1_cond <= 20000 DO
+ SELECT count(*) AS cnt FROM (SELECT id1 FROM t1 FORCE INDEX (PRIMARY) WHERE id1 = id1_cond AND link_type = 1 AND visibility = 1 ORDER BY id2 DESC) AS t INTO @cnt;
+ IF @cnt < 1 THEN
+ SELECT id1_cond, @cnt;
+ END IF;
+ SET id1_cond = id1_cond + 1;
+ END WHILE;
+END//
+DELIMITER ;//
+
+--disable_query_log
+let $i = 1;
+while ($i <= 20000) {
+ let $insert = INSERT INTO t1 VALUES($i, $i, 1, 1, $i, $i, $i);
+ eval $insert;
+ inc $i;
+}
+--enable_query_log
+
+--echo "Skipping bloom filter"
+SET session rocksdb_skip_bloom_filter_on_read=1;
+CALL select_test();
+
+--echo "Using bloom filter"
+SET session rocksdb_skip_bloom_filter_on_read=0;
+CALL select_test();
+
+DROP PROCEDURE select_test;
+drop table t1;
+
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/bloomfilter5-master.opt b/storage/rocksdb/mysql-test/rocksdb/t/bloomfilter5-master.opt
new file mode 100644
index 00000000000..4576d20f45b
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/bloomfilter5-master.opt
@@ -0,0 +1,3 @@
+--rocksdb_default_cf_options=write_buffer_size=256k;block_based_table_factory={filter_policy=bloomfilter:10:false;whole_key_filtering=0;}
+--rocksdb_override_cf_options=rev:bf5_1={prefix_extractor=capped:4};bf5_1={prefix_extractor=capped:4}
+
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/bloomfilter5.test b/storage/rocksdb/mysql-test/rocksdb/t/bloomfilter5.test
new file mode 100644
index 00000000000..11890dcfbaf
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/bloomfilter5.test
@@ -0,0 +1,86 @@
+
+--echo #
+--echo # Issue #809: Wrong query result with bloom filters
+--echo #
+
+create table t1 (
+ id1 bigint not null,
+ id2 bigint not null,
+ id3 varchar(100) not null,
+ id4 int not null,
+ id5 int not null,
+ value bigint,
+ value2 varchar(100),
+ primary key (id1, id2, id3, id4) COMMENT 'rev:bf5_1'
+) engine=ROCKSDB;
+
+
+create table t2(a int);
+insert into t2 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
+
+create table t3(seq int);
+insert into t3
+select
+ 1+ A.a + B.a* 10 + C.a * 100 + D.a * 1000
+from t2 A, t2 B, t2 C, t2 D;
+
+insert t1
+select
+ (seq+9) div 10, (seq+4) div 5, (seq+4) div 5, seq, seq, 1000, "aaabbbccc"
+from t3;
+
+set global rocksdb_force_flush_memtable_now=1;
+
+--echo # Full table scan
+explain
+select * from t1 limit 10;
+select * from t1 limit 10;
+
+--echo # An index scan starting from the end of the table:
+explain
+select * from t1 order by id1 desc,id2 desc, id3 desc, id4 desc limit 1;
+select * from t1 order by id1 desc,id2 desc, id3 desc, id4 desc limit 1;
+
+# A testcase for an assertion that the fix is removing
+# The only requirement for the used column family is that it is reverse-ordered
+create table t4 (
+ pk int unsigned not null primary key,
+ kp1 int unsigned not null,
+ kp2 int unsigned not null,
+ col1 int unsigned,
+ key(kp1, kp2) comment 'rev:bf5_2'
+) engine=rocksdb;
+
+insert into t4 values (1, 0xFFFF, 0xFFF, 12345);
+
+--echo # This must not fail an assert:
+select * from t4 force index(kp1) where kp1=0xFFFFFFFF and kp2<=0xFFFFFFFF order by kp2 desc;
+
+
+--echo #
+--echo # Issue #881: Issue #809 still occurs for reverse scans on forward cfs
+--echo #
+
+# The same as t1 above but uses forward-ordered column family:
+
+create table t5 (
+ id1 bigint not null,
+ id2 bigint not null,
+ id3 varchar(100) not null,
+ id4 int not null,
+ id5 int not null,
+ value bigint,
+ value2 varchar(100),
+ primary key (id1, id2, id3, id4) COMMENT 'bf5_1'
+) engine=ROCKSDB;
+
+insert into t5 select * from t1;
+
+set global rocksdb_force_flush_memtable_now=1;
+
+--echo # An index scan starting from the end of the table:
+explain
+select * from t5 order by id1 desc,id2 desc, id3 desc, id4 desc limit 1;
+select * from t5 order by id1 desc,id2 desc, id3 desc, id4 desc limit 1;
+
+drop table t1,t2,t3,t4,t5;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/bloomfilter_bulk_load-master.opt b/storage/rocksdb/mysql-test/rocksdb/t/bloomfilter_bulk_load-master.opt
new file mode 100644
index 00000000000..c28681ef030
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/bloomfilter_bulk_load-master.opt
@@ -0,0 +1,2 @@
+--rocksdb_default_cf_options=write_buffer_size=16k;block_based_table_factory={filter_policy=bloomfilter:10:false;whole_key_filtering=0;};prefix_extractor=capped:12
+--rocksdb_override_cf_options=cf2={optimize_filters_for_hits=true}
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/bloomfilter_bulk_load.test b/storage/rocksdb/mysql-test/rocksdb/t/bloomfilter_bulk_load.test
new file mode 100644
index 00000000000..09d9d734f9e
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/bloomfilter_bulk_load.test
@@ -0,0 +1,35 @@
+--source include/have_rocksdb.inc
+
+create table r1 (id bigint primary key, value bigint) engine=rocksdb;
+create table r2 (id bigint, value bigint, primary key (id) comment 'cf2') engine=rocksdb;
+set session rocksdb_bulk_load=1;
+--disable_query_log
+let $t = 1;
+let $i = 1;
+while ($t <= 2) {
+ while ($i <= 1000) {
+ let $insert = INSERT INTO r$t VALUES($i, $i);
+ #skipping a row
+ if ($i != 100) {
+ eval $insert;
+ }
+ inc $i;
+ }
+ inc $t;
+}
+--enable_query_log
+set session rocksdb_bulk_load=0;
+
+# bloom filter should be useful on insert (calling GetForUpdate)
+select variable_value into @h from information_schema.global_status where variable_name='rocksdb_block_cache_filter_hit';
+insert into r1 values (100, 100);
+select variable_value-@h from information_schema.global_status where variable_name='rocksdb_block_cache_filter_hit';
+
+# cf2 has no bloo filter in the bottommost level
+select variable_value into @h from information_schema.global_status where variable_name='rocksdb_block_cache_filter_hit';
+insert into r2 values (100, 100);
+select variable_value-@h from information_schema.global_status where variable_name='rocksdb_block_cache_filter_hit';
+
+DROP TABLE r1, r2;
+
+
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/bloomfilter_load_select.inc b/storage/rocksdb/mysql-test/rocksdb/t/bloomfilter_load_select.inc
new file mode 100644
index 00000000000..cf8b26847f0
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/bloomfilter_load_select.inc
@@ -0,0 +1,190 @@
+source include/have_sequence.inc;
+
+# loading some data (larger than write buf size) to cause compaction
+insert t1
+ select (seq+9) div 10, (seq+4) div 5, (seq+4) div 5, seq, seq, 1000, "aaabbbccc"
+ from seq_1_to_10000;
+insert t2 select * from t1;
+set global rocksdb_force_flush_memtable_now=1;
+
+# BF conditions (prefix short(4B)|medium(20B)|long(240B))
+#0 no eq condition (o, x, x)
+## cond length 4, key length > 4
+call bloom_start();
+select count(*) from t1;
+call bloom_end();
+call bloom_start();
+select count(*) from t2;
+call bloom_end();
+call bloom_start();
+select count(*) from t1 force index(PRIMARY) where id1 >= 1;
+call bloom_end();
+call bloom_start();
+select count(*) from t1 force index(id2_id1) where id2 >= 1;
+call bloom_end();
+call bloom_start();
+select count(*) from t2 force index(id3_id4) where id3 >= '1';
+call bloom_end();
+
+#1 cond length == prefix length (o, o, x)
+## cond length 4+8+8=20, key length > 20
+call bloom_start();
+select count(*) from t1 force index(id2_id1) where id2=2 and id1=1;
+call bloom_end();
+call bloom_start();
+select count(*) from t1 force index(id2_id1) where id2=24 and id1=12;
+call bloom_end();
+call bloom_start();
+select count(*) from t1 force index(id2_id1) where id2=88 and id1=44;
+call bloom_end();
+call bloom_start();
+select count(*) from t1 force index(id2_id1) where id2=100 and id1=50;
+call bloom_end();
+call bloom_start();
+select count(*) from t1 force index(id2_id1) where id2=428 and id1=214;
+call bloom_end();
+## (cond_length == extended_key_length(4+8+4+4=20) == prefix_length)
+call bloom_start();
+select count(*) from t2 force index (id2_id4_id5) where id2=1 and id4=1 and id5=1;
+call bloom_end();
+call bloom_start();
+select count(*) from t2 force index (id2_id4_id5) where id2=23 and id4=115 and id5=115;
+call bloom_end();
+call bloom_start();
+select count(*) from t2 force index (id2_id4_id5) where id2=500 and id4=2500 and id5=2500;
+call bloom_end();
+call bloom_start();
+select count(*) from t2 force index (id2_id4_id5) where id2=601 and id4=3005 and id5=3005;
+call bloom_end();
+
+#2 cond length < actual key length and cond_length < prefix length (o, x, x)
+## for long prefix key, most cases falling into this category, unless all key colums are used.
+## cond length 4+8=12, key length > 12
+call bloom_start();
+select count(*) from t2 force index (id2_id3) where id2=1;
+call bloom_end();
+call bloom_start();
+select count(*) from t2 force index (id2_id3) where id2=23;
+call bloom_end();
+call bloom_start();
+select count(*) from t2 force index (id2_id3) where id2=345;
+call bloom_end();
+call bloom_start();
+select count(*) from t2 force index (id2_id3) where id2=456;
+call bloom_end();
+call bloom_start();
+select count(*) from t2 force index (id2_id4) where id2=1;
+call bloom_end();
+call bloom_start();
+select count(*) from t2 force index (id2_id4) where id2=23;
+call bloom_end();
+call bloom_start();
+select count(*) from t2 force index (id2_id4) where id2=345;
+call bloom_end();
+call bloom_start();
+select count(*) from t2 force index (id2_id4) where id2=456;
+call bloom_end();
+
+#3 both actual key length and cond length >= prefix length (o, o, o/x)
+## cond length 4+8+9+8+4=33
+call bloom_start();
+select count(*) from t1 force index (id2_id3_id1_id4) where id2=1 and id3='1' and id1=1 order by id4;
+call bloom_end();
+call bloom_start();
+select count(*) from t1 force index (id2_id3_id1_id4) where id2=36 and id3='36' and id1=18 order by id4;
+call bloom_end();
+call bloom_start();
+select count(*) from t1 force index (id2_id3_id1_id4) where id2=124 and id3='124' and id1=62 order by id4;
+call bloom_end();
+call bloom_start();
+select count(*) from t1 force index (id2_id3_id1_id4) where id2=888 and id3='888' and id1=444 order by id4;
+call bloom_end();
+call bloom_start();
+select count(*) from t1 force index (id2_id3_id1_id4) where id2=124 and id3='124';
+call bloom_end();
+call bloom_start();
+select count(*) from t2 force index (id2_id3) where id2=1 and id3='1' and id4=1;
+call bloom_end();
+call bloom_start();
+select count(*) from t2 force index (id2_id3) where id2=12 and id3='12' and id4=60;
+call bloom_end();
+## 4+8+9=25
+call bloom_start();
+select count(*) from t1 force index (id2_id3) where id2=1 and id3='1';
+call bloom_end();
+call bloom_start();
+select count(*) from t1 force index (id2_id3) where id2=23 and id3='23';
+call bloom_end();
+call bloom_start();
+select count(*) from t1 force index (id3_id2) where id2=1 and id3='1';
+call bloom_end();
+call bloom_start();
+select count(*) from t1 force index (id3_id2) where id2=23 and id3='23';
+call bloom_end();
+
+#4 actual key length > prefix length and cond length < prefix length (o, x, x)
+## cond length 4+8=12
+call bloom_start();
+select count(*) from t1 force index (PRIMARY) where id1=1;
+call bloom_end();
+call bloom_start();
+select count(*) from t1 force index (PRIMARY) where id1=12;
+call bloom_end();
+call bloom_start();
+select count(*) from t1 force index (PRIMARY) where id1=23;
+call bloom_end();
+call bloom_start();
+select count(*) from t1 force index (PRIMARY) where id1=100;
+call bloom_end();
+call bloom_start();
+select count(*) from t1 force index (PRIMARY) where id1=234;
+call bloom_end();
+call bloom_start();
+select count(*) from t1 force index (id2_id3_id1_id4) where id2=36;
+call bloom_end();
+call bloom_start();
+select count(*) from t1 force index (id2_id3_id1_id4) where id2=234;
+call bloom_end();
+
+#5 cond length == extended key length < prefix length (o, o, o)
+call bloom_start();
+select count(*) from t2 force index (id2) where id2=1 and id4=1;
+call bloom_end();
+call bloom_start();
+select count(*) from t2 force index (id2) where id2=23 and id4=115;
+call bloom_end();
+call bloom_start();
+select count(*) from t2 force index (id2) where id2=500 and id4=2500;
+call bloom_end();
+call bloom_start();
+select count(*) from t2 force index (id2) where id2=601 and id4=3005;
+call bloom_end();
+## 4+9+4=17
+call bloom_start();
+select count(*) from t2 force index (id3_id4) where id3='1' and id4=1;
+call bloom_end();
+call bloom_start();
+select count(*) from t2 force index (id3_id4) where id3='12' and id4=60;
+call bloom_end();
+
+#6 cond length == non-extended key length < prefix length, actual key length > prefix length (o, x, x)
+call bloom_start();
+select count(*) from t1 force index (id2) where id2=1;
+call bloom_end();
+call bloom_start();
+select count(*) from t1 force index (id2) where id2=23;
+call bloom_end();
+call bloom_start();
+select count(*) from t1 force index (id2) where id2=345;
+call bloom_end();
+call bloom_start();
+select count(*) from t1 force index (id2) where id2=456;
+call bloom_end();
+## 4+9+4=17
+call bloom_start();
+select count(*) from t2 force index (id3_id5) where id3='100' and id5=500;
+call bloom_end();
+call bloom_start();
+select count(*) from t2 force index (id3_id5) where id3='240' and id5=1200;
+call bloom_end();
+
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/bloomfilter_skip-master.opt b/storage/rocksdb/mysql-test/rocksdb/t/bloomfilter_skip-master.opt
new file mode 100644
index 00000000000..5c62c7cf986
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/bloomfilter_skip-master.opt
@@ -0,0 +1,3 @@
+--rocksdb_default_cf_options=write_buffer_size=256k;block_based_table_factory={filter_policy=bloomfilter:10:false;whole_key_filtering=0;};prefix_extractor=capped:20
+--rocksdb_override_cf_options=cf_short_prefix={prefix_extractor=capped:4};cf_long_prefix={prefix_extractor=capped:240}
+--rocksdb_skip_bloom_filter_on_read=1
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/bloomfilter_skip.test b/storage/rocksdb/mysql-test/rocksdb/t/bloomfilter_skip.test
new file mode 100644
index 00000000000..efcf9ee1f73
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/bloomfilter_skip.test
@@ -0,0 +1 @@
+--source bloomfilter.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/bloomfilter_table_def.inc b/storage/rocksdb/mysql-test/rocksdb/t/bloomfilter_table_def.inc
new file mode 100644
index 00000000000..2bc9bb64d5e
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/bloomfilter_table_def.inc
@@ -0,0 +1,33 @@
+eval create or replace table t1 (
+ id1 bigint not null,
+ id2 bigint not null,
+ id3 varchar(100) not null,
+ id4 int not null,
+ id5 int not null,
+ value bigint,
+ value2 varchar(100),
+ primary key (id1, id2, id3, id4) $CF,
+ index id2 (id2) $CF,
+ index id2_id1 (id2, id1) $CF,
+ index id2_id3 (id2, id3) $CF,
+ index id2_id4 (id2, id4) $CF,
+ index id2_id3_id1_id4 (id2, id3, id1, id4) $CF,
+ index id3_id2 (id3, id2) $CF
+) engine=ROCKSDB;
+
+eval create or replace table t2 (
+ id1 bigint not null,
+ id2 bigint not null,
+ id3 varchar(100) not null,
+ id4 int not null,
+ id5 int not null,
+ value bigint,
+ value2 varchar(100),
+ primary key (id4) $CF,
+ index id2 (id2) $CF,
+ index id2_id3 (id2, id3) $CF,
+ index id2_id4 (id2, id4) $CF,
+ index id2_id4_id5 (id2, id4, id5) $CF,
+ index id3_id4 (id3, id4) $CF,
+ index id3_id5 (id3, id5) $CF
+) engine=ROCKSDB;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/bulk_load.test b/storage/rocksdb/mysql-test/rocksdb/t/bulk_load.test
new file mode 100644
index 00000000000..b1afc5b2f9d
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/bulk_load.test
@@ -0,0 +1,11 @@
+--source include/have_rocksdb.inc
+--source include/have_partition.inc
+
+# This test requires ~2.3G of disk space
+--source include/big_test.inc
+
+--let pk_cf=cf1
+--let pk_cf_name=cf1
+--let data_order_desc=0
+
+--source ../include/bulk_load.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_drop_table.test b/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_drop_table.test
new file mode 100644
index 00000000000..18e40fbf4ab
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_drop_table.test
@@ -0,0 +1,19 @@
+--source include/have_rocksdb.inc
+
+CREATE TABLE t1 (pk INT, PRIMARY KEY (pk)) ENGINE=ROCKSDB;
+
+SET rocksdb_bulk_load_allow_unsorted=1;
+SET rocksdb_bulk_load=1;
+
+INSERT INTO t1 VALUES (1);
+
+--connect (con1,localhost,root,,)
+DROP TABLE t1;
+
+--connection default
+--disconnect con1
+
+# This would have crashed the server prior to the fix
+SET rocksdb_bulk_load=0;
+--error ER_NO_SUCH_TABLE
+SELECT * FROM t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_errors.test b/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_errors.test
new file mode 100644
index 00000000000..0409784811f
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_errors.test
@@ -0,0 +1,168 @@
+--source include/have_rocksdb.inc
+--source include/count_sessions.inc
+
+--let LOG1=$MYSQLTEST_VARDIR/tmp/rocksdb.bulk_load_errors.1.err
+--let $_mysqld_option=--log-error=$LOG1
+--replace_result $MYSQLTEST_VARDIR MYSQLTEST_VARDIR
+--source include/restart_mysqld_with_option.inc
+
+### Bulk load ###
+CREATE TABLE t1(pk INT, PRIMARY KEY(pk)) ENGINE=ROCKSDB;
+
+# Make sure we get an error with out of order keys during bulk load
+SET rocksdb_bulk_load=1;
+INSERT INTO t1 VALUES(10);
+INSERT INTO t1 VALUES(11);
+--error ER_KEYS_OUT_OF_ORDER
+INSERT INTO t1 VALUES(9);
+SET rocksdb_bulk_load=0;
+
+# Make sure only 10 and 11 got into the table
+SELECT * FROM t1;
+
+# Make sure we get an error with overlapping data
+SET rocksdb_bulk_load=1;
+INSERT INTO t1 VALUES(1);
+INSERT INTO t1 VALUES(2);
+INSERT INTO t1 VALUES(20);
+INSERT INTO t1 VALUES(21);
+
+--error ER_OVERLAPPING_KEYS
+SET rocksdb_bulk_load=0;
+
+SHOW VARIABLES LIKE 'rocksdb_bulk_load';
+call mtr.add_suppression('finalizing last SST file while setting bulk loading variable');
+
+SELECT * FROM t1;
+
+--let SEARCH_FILE=$LOG1
+--let SEARCH_PATTERN=RocksDB: Error [0-9]+ finalizing last SST file while setting bulk loading variable
+--source include/search_pattern_in_file.inc
+
+--let LOG2=$MYSQLTEST_VARDIR/tmp/rocksdb.bulk_load_errors.2.err
+--let $_mysqld_option=--log-error=$LOG2
+--replace_result $MYSQLTEST_VARDIR MYSQLTEST_VARDIR
+--source include/restart_mysqld_with_option.inc
+--remove_file $LOG1
+
+
+# Make sure we get an error in log when we disconnect and do not assert the server
+--connect (con1,localhost,root,,)
+SET rocksdb_bulk_load=1;
+INSERT INTO t1 VALUES(1);
+INSERT INTO t1 VALUES(2);
+INSERT INTO t1 VALUES(20);
+INSERT INTO t1 VALUES(21);
+let $ID = `SELECT connection_id()`;
+--connection default
+--disconnect con1
+
+SELECT * FROM t1;
+
+--disable_parsing
+# MariaDB: no support for $RPC_PROTOCOL
+if (`SELECT $RPC_PROTOCOL > 0`) {
+ # for --rpc_protocol mode wait for the background detached session to
+ # go away
+ let $wait_condition =
+ SELECT COUNT(*) = 0
+ FROM information_schema.srv_sessions
+ WHERE id = $ID;
+ --source include/wait_condition.inc
+}
+
+if (`SELECT $RPC_PROTOCOL = 0`) {
+ # for non --rpc_protocol mode simply wait until the number of sessions
+ # returns to earlier levels
+ --source include/wait_until_count_sessions.inc
+}
+--enable_parsing
+# MariaDB:
+--source include/wait_until_count_sessions.inc
+
+# Note: in MariaDB, session count will be decremented *before*
+# myrocks::rocksdb_close_connection is called. This causes a race condition:
+# we may grep the error log before bulk load is finalized.
+# To prevent that, do a soft restart of the server (I wasnt able to find
+# any other reliable way)
+--source include/restart_mysqld_with_option.inc
+
+--let SEARCH_FILE=$LOG2
+--let SEARCH_PATTERN=RocksDB: Error [0-9]+ finalizing last SST file while disconnecting
+--source include/search_pattern_in_file.inc
+
+--let LOG3=$MYSQLTEST_VARDIR/tmp/rocksdb.bulk_load_errors.3.err
+--let $_mysqld_option=--log-error=$LOG3
+--replace_result $MYSQLTEST_VARDIR MYSQLTEST_VARDIR
+--source include/restart_mysqld_with_option.inc
+--remove_file $LOG2
+
+TRUNCATE TABLE t1;
+
+### Bulk load with unsorted PKs ###
+SET rocksdb_bulk_load_allow_unsorted=1;
+
+# We should not get an error with out of order PKs
+SET rocksdb_bulk_load=1;
+INSERT INTO t1 VALUES(100);
+INSERT INTO t1 VALUES(101);
+INSERT INTO t1 VALUES(99);
+SET rocksdb_bulk_load=0;
+SELECT * FROM t1;
+TRUNCATE TABLE t1;
+
+# We should get an error with duplicate PKs in the same bulk load
+SET rocksdb_bulk_load=1;
+INSERT INTO t1 VALUES(201);
+INSERT INTO t1 VALUES(200);
+INSERT INTO t1 VALUES(202);
+--error ER_DUP_ENTRY
+INSERT INTO t1 VALUES(201);
+SET rocksdb_bulk_load=0;
+SELECT * FROM t1;
+
+SET rocksdb_bulk_load_allow_unsorted=DEFAULT;
+DROP TABLE t1;
+
+# This would trigger a debug assertion that is just an error in release builds
+CREATE TABLE t1(c1 INT KEY) ENGINE=ROCKSDB;
+SET rocksdb_bulk_load=1;
+--error ER_KEYS_OUT_OF_ORDER
+INSERT INTO t1 VALUES (),(),();
+SET rocksdb_bulk_load=0;
+DROP TABLE t1;
+
+# Crash when table open cache closes handler with bulk load operation not finalized
+SET @orig_table_open_cache=@@global.table_open_cache;
+CREATE TABLE t1(a INT AUTO_INCREMENT, b INT, PRIMARY KEY (a)) ENGINE=ROCKSDB DEFAULT CHARSET=latin1;
+SET rocksdb_bulk_load=1;
+INSERT INTO t1 VALUES(13, 0);
+INSERT INTO t1 VALUES(2, 'test 2');
+INSERT INTO t1 VALUES(@id, @arg04);
+SET @@global.table_open_cache=FALSE;
+INSERT INTO t1 VALUES(51479+0.333333333,1);
+DROP TABLE t1;
+SET @@global.table_open_cache=@orig_table_open_cache;
+
+--let SEARCH_FILE=$LOG3
+--let SEARCH_PATTERN=RocksDB: Error [0-9]+ finalizing bulk load while closing handler
+--source include/search_pattern_in_file.inc
+
+--source include/restart_mysqld.inc
+
+--remove_file $LOG3
+
+# Switch between tables, but also introduce duplicate key errors
+CREATE TABLE t1 (pk INT, PRIMARY KEY (pk)) ENGINE=ROCKSDB;
+CREATE TABLE t2 (pk INT, PRIMARY KEY (pk)) ENGINE=ROCKSDB;
+SET rocksdb_bulk_load=1;
+INSERT INTO t1 VALUES (1), (2);
+INSERT INTO t2 VALUES (1), (2);
+INSERT INTO t1 VALUES (1);
+--error ER_OVERLAPPING_KEYS
+INSERT INTO t2 VALUES (3);
+SET rocksdb_bulk_load=0;
+DROP TABLE t1;
+DROP TABLE t2;
+
+--source include/wait_until_count_sessions.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_rev_cf.test b/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_rev_cf.test
new file mode 100644
index 00000000000..f011964db34
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_rev_cf.test
@@ -0,0 +1,10 @@
+--source include/have_rocksdb.inc
+--source include/have_partition.inc
+
+--source include/big_test.inc
+
+--let pk_cf=rev:cf1
+--let pk_cf_name=cf1
+--let data_order_desc=0
+
+--source ../include/bulk_load.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_rev_cf_and_data.test b/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_rev_cf_and_data.test
new file mode 100644
index 00000000000..37f19a39564
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_rev_cf_and_data.test
@@ -0,0 +1,10 @@
+--source include/have_rocksdb.inc
+--source include/have_partition.inc
+
+--source include/big_test.inc
+
+--let pk_cf=rev:cf1
+--let pk_cf_name=cf1
+--let data_order_desc=1
+
+--source ../include/bulk_load.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_rev_data.test b/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_rev_data.test
new file mode 100644
index 00000000000..4f3ffd23bd9
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_rev_data.test
@@ -0,0 +1,10 @@
+--source include/have_rocksdb.inc
+--source include/have_partition.inc
+
+--source include/big_test.inc
+
+--let pk_cf=cf1
+--let pk_cf_name=cf1
+--let data_order_desc=1
+
+--source ../include/bulk_load.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_sk.test b/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_sk.test
new file mode 100644
index 00000000000..0fb47f74669
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_sk.test
@@ -0,0 +1,119 @@
+--source include/have_rocksdb.inc
+
+SET rocksdb_bulk_load_size=15;
+
+CREATE TABLE t4 (a INT, b INT, c INT,
+ PRIMARY KEY (a),
+ KEY (b),
+ KEY (c) COMMENT "rev:cf") ENGINE=ROCKSDB;
+CREATE TABLE t3 (a INT, b INT, c INT,
+ PRIMARY KEY (a),
+ KEY (b),
+ KEY (c) COMMENT "rev:cf") ENGINE=ROCKSDB;
+CREATE TABLE t2 (a INT, b INT, c INT,
+ PRIMARY KEY (a),
+ KEY (b),
+ KEY (c) COMMENT "rev:cf") ENGINE=ROCKSDB;
+CREATE TABLE t1 (a INT, b INT, c INT,
+ PRIMARY KEY (a),
+ KEY (b),
+ KEY (c) COMMENT "rev:cf") ENGINE=ROCKSDB;
+
+### Setup the control table ###
+--disable_query_log
+let $sign = 1;
+let $max = 10;
+let $i = 1;
+while ($i <= $max) {
+ let $a = 1 + $sign * $i;
+ let $b = 1 - $sign * $i;
+ let $sign = -$sign;
+ let $insert = INSERT INTO t3 VALUES ($a, $b, $b);
+ eval $insert;
+ inc $i;
+}
+--enable_query_log
+
+### Bulk load PK only ###
+SET rocksdb_bulk_load=1;
+INSERT INTO t1 SELECT * FROM t3 FORCE INDEX (PRIMARY) ORDER BY a;
+SELECT count(*) FROM t1 FORCE INDEX (PRIMARY);
+SELECT count(*) FROM t1 FORCE INDEX (b);
+SELECT count(*) FROM t1 FORCE INDEX (c);
+SET rocksdb_bulk_load=0;
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+SELECT b FROM t1 FORCE INDEX (b);
+SELECT c FROM t1 FORCE INDEX (c);
+--echo Checksums should match
+CHECKSUM TABLE t3;
+CHECKSUM TABLE t1;
+
+### Bulk load PK and SK but require PK order ###
+SET rocksdb_bulk_load_allow_sk=1;
+SET rocksdb_bulk_load=1;
+INSERT INTO t4 SELECT * FROM t3 FORCE INDEX (PRIMARY) ORDER BY a;
+SELECT count(*) FROM t4 FORCE INDEX (PRIMARY);
+SELECT count(*) FROM t4 FORCE INDEX (b);
+SELECT count(*) FROM t4 FORCE INDEX (c);
+SET rocksdb_bulk_load=0;
+SELECT * FROM t4 FORCE INDEX (PRIMARY);
+SELECT b FROM t4 FORCE INDEX (b);
+SELECT c FROM t4 FORCE INDEX (c);
+--echo Checksums should match
+CHECKSUM TABLE t3;
+CHECKSUM TABLE t4;
+
+### Bulk load both PK and SK in random order for all ###
+SET rocksdb_bulk_load_allow_unsorted=1;
+SET rocksdb_bulk_load_allow_sk=1;
+SET rocksdb_bulk_load=1;
+
+INSERT INTO t2 SELECT * FROM t3 WHERE b >= 0 ORDER BY b;
+INSERT INTO t2 SELECT * FROM t3 WHERE b < 0 ORDER BY b;
+SELECT count(*) FROM t2 FORCE INDEX (PRIMARY);
+SELECT count(*) FROM t2 FORCE INDEX (b);
+SELECT count(*) FROM t2 FORCE INDEX (c);
+
+--disable_query_log
+let $sign = 1;
+let $max = 20;
+let $i = 11;
+while ($i <= $max) {
+ let $a = 1 + $sign * $i;
+ let $b = 1 - $sign * $i;
+ let $sign = -$sign;
+ let $insert = INSERT INTO t2 VALUES ($a, $b, $b);
+ eval $insert;
+ inc $i;
+}
+--enable_query_log
+SELECT count(*) FROM t2 FORCE INDEX (PRIMARY);
+SELECT count(*) FROM t2 FORCE INDEX (b);
+SELECT count(*) FROM t2 FORCE INDEX (c);
+SET rocksdb_bulk_load=0;
+
+--disable_query_log
+let $sign = 1;
+let $max = 20;
+let $i = 11;
+while ($i <= $max) {
+ let $a = 1 + $sign * $i;
+ let $b = 1 - $sign * $i;
+ let $sign = -$sign;
+ let $insert = INSERT INTO t3 VALUES ($a, $b, $b);
+ eval $insert;
+ inc $i;
+}
+--enable_query_log
+
+SELECT * FROM t2 FORCE INDEX (PRIMARY);
+SELECT b FROM t2 FORCE INDEX (b);
+SELECT c FROM t2 FORCE INDEX (c);
+--echo Checksums should match
+CHECKSUM TABLE t3;
+CHECKSUM TABLE t2;
+
+DROP TABLE t1;
+DROP TABLE t2;
+DROP TABLE t3;
+DROP TABLE t4;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_unsorted.test b/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_unsorted.test
new file mode 100644
index 00000000000..2abeae343c9
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_unsorted.test
@@ -0,0 +1,6 @@
+--source include/have_rocksdb.inc
+--source include/have_partition.inc
+
+--let pk_cf=cf1
+
+--source ../include/bulk_load_unsorted.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_unsorted_errors.test b/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_unsorted_errors.test
new file mode 100644
index 00000000000..eee4f713a9b
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_unsorted_errors.test
@@ -0,0 +1,8 @@
+--source include/have_rocksdb.inc
+--source include/not_debug.inc
+
+# Cannot change unsorted input preference during bulk load
+SET rocksdb_bulk_load=1;
+--error ER_ERROR_WHEN_EXECUTING_COMMAND
+SET rocksdb_bulk_load_allow_unsorted=1;
+SET rocksdb_bulk_load=0;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_unsorted_rev.test b/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_unsorted_rev.test
new file mode 100644
index 00000000000..de9a5c26424
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_unsorted_rev.test
@@ -0,0 +1,5 @@
+--source include/have_rocksdb.inc
+
+--let pk_cf=rev:cf1
+
+--source ../include/bulk_load_unsorted.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/bypass_select_basic.inc b/storage/rocksdb/mysql-test/rocksdb/t/bypass_select_basic.inc
new file mode 100644
index 00000000000..1f5c9fbb3f2
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/bypass_select_basic.inc
@@ -0,0 +1,213 @@
+--source include/have_rocksdb.inc
+
+--source ../include/bypass_create_table.inc
+
+SELECT /*+ bypass */ id1,id2,id1_type,id2_type,data,version from link_table
+WHERE id1=1 and id2=2 and link_type=3;
+SELECT /*+ no_bypass */ id1,id2,id1_type,id2_type,data,version from link_table
+WHERE id1=1 and id2=2 and link_type=3;
+SELECT id1,id2,id1_type,id2_type,data,version from link_table
+WHERE id1=1 and id2=2 and link_type=3;
+
+SELECT /*+bypassabc*/ id1,id2,id1_type,id2_type,data,version from link_table
+WHERE id1=1 and id2=2 and link_type=3;
+SELECT /*+bypass */ id1,id2,id1_type,id2_type,data,version from link_table
+WHERE id1=1 and id2=2 and link_type=3;
+SELECT /* +bypassabc*/ id1,id2,id1_type,id2_type,data,version from link_table
+WHERE id1=1 and id2=2 and link_type=3;
+SELECT /*aaaaaaaaabbbbbbbbb*/ id1,id2,id1_type,id2_type,data,version
+from link_table WHERE id1=1 and id2=2 and link_type=3;
+SELECT /*+*/ id1,id2,id1_type,id2_type,data,version from link_table
+WHERE id1=1 and id2=2 and link_type=3;
+SELECT /*+b*/ id1,id2,id1_type,id2_type,data,version from link_table
+WHERE id1=1 and id2=2 and link_type=3;
+SELECT /*+byp*/ id1,id2,id1_type,id2_type,data,version from link_table
+WHERE id1=1 and id2=2 and link_type=3;
+SELECT /*+bypw*/ id1,id2,id1_type,id2_type,data,version from link_table
+WHERE id1=1 and id2=2 and link_type=3;
+SELECT /*-b*/ id1,id2,id1_type,id2_type,data,version from link_table
+WHERE id1=1 and id2=2 and link_type=3;
+SELECT /**/ id1,id2,id1_type,id2_type,data,version from link_table
+WHERE id1=1 and id2=2 and link_type=3;
+
+--echo # Point query
+SELECT /*+ bypass */ id1,id2,id1_type,id2_type,data,version from link_table
+WHERE id1=1 and id2=2 and link_type=3;
+SELECT /*+ bypass */ id1,id2,id1_type,id2_type,data,version from link_table
+WHERE id1=1 and id2 IN (2, 3, 4) and link_type=3;
+SELECT /*+ bypass */ id1,id2,id1_type,id2_type,data,version from link_table
+WHERE id1=1 and id2 IN (2) and link_type=3;
+SELECT /*+ bypass */ id1,id2,id1_type,id2_type,data,version from link_table
+WHERE id1 IN (1) and id2 IN (2) and link_type=3;
+SELECT /*+ bypass */ id1,id2,id1_type,id2_type,data,version from link_table
+WHERE id1 IN (1, 2) and id2 IN (2, 3, 4) and link_type=3;
+
+--echo # Prefix range query
+
+--echo # Prefix range query with SK
+SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version
+FROM link_table FORCE INDEX (id1_type)
+WHERE link_type = 3 AND id1 = 1 AND visibility = 3 AND time = 10
+ORDER BY TIME DESC LIMIT 10;
+SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version
+FROM link_table FORCE INDEX (id1_type)
+WHERE link_type = 3 AND id1 = 1 AND visibility = 3 AND time = 10
+ORDER BY TIME ASC LIMIT 10;
+
+--echo # Prefix range query with SK with limits
+SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version
+FROM link_table FORCE INDEX (id1_type)
+WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10
+ORDER BY TIME DESC;
+
+SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version
+FROM link_table FORCE INDEX (id1_type)
+WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10
+ORDER BY TIME DESC LIMIT 10;
+
+SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version
+FROM link_table FORCE INDEX (id1_type)
+WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10
+ORDER BY TIME DESC LIMIT 5;
+
+SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version
+FROM link_table FORCE INDEX (id1_type)
+WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10
+ORDER BY TIME DESC LIMIT 1;
+
+SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version
+FROM link_table FORCE INDEX (id1_type)
+WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10
+ORDER BY TIME DESC LIMIT 0;
+
+SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version
+FROM link_table FORCE INDEX (id1_type)
+WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10
+ORDER BY TIME DESC LIMIT 0,10;
+
+SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version
+FROM link_table FORCE INDEX (id1_type)
+WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10
+ORDER BY TIME DESC LIMIT 0,5;
+
+SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version
+FROM link_table FORCE INDEX (id1_type)
+WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10
+ORDER BY TIME DESC LIMIT 0,1;
+
+SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version
+FROM link_table FORCE INDEX (id1_type)
+WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10
+ORDER BY TIME DESC LIMIT 1,0;
+
+SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version
+FROM link_table FORCE INDEX (id1_type)
+WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10
+ORDER BY TIME DESC LIMIT 1,10;
+
+SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version
+FROM link_table FORCE INDEX (id1_type)
+WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10
+ORDER BY TIME DESC LIMIT 1,5;
+
+SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version
+FROM link_table FORCE INDEX (id1_type)
+WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10
+ORDER BY TIME DESC LIMIT 1,1;
+
+SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version
+FROM link_table FORCE INDEX (id1_type)
+WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10
+ORDER BY TIME DESC LIMIT 1,0;
+
+SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version
+FROM link_table FORCE INDEX (id1_type)
+WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10
+ORDER BY TIME DESC LIMIT 5,10;
+
+SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version
+FROM link_table FORCE INDEX (id1_type)
+WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10
+ORDER BY TIME DESC LIMIT 5,5;
+
+SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version
+FROM link_table FORCE INDEX (id1_type)
+WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10
+ORDER BY TIME DESC LIMIT 5,1;
+
+SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version
+FROM link_table FORCE INDEX (id1_type)
+WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10
+ORDER BY TIME DESC LIMIT 5,0;
+
+SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version
+FROM link_table FORCE INDEX (id1_type)
+WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10
+ORDER BY TIME DESC LIMIT 10,10;
+
+SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version
+FROM link_table FORCE INDEX (id1_type)
+WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10
+ORDER BY TIME DESC LIMIT 10,5;
+
+SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version
+FROM link_table FORCE INDEX (id1_type)
+WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10
+ORDER BY TIME DESC LIMIT 10,1;
+
+SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version
+FROM link_table FORCE INDEX (id1_type)
+WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10
+ORDER BY TIME DESC LIMIT 10,0;
+
+--echo # Prefix range query with PK
+SELECT /*+ bypass */ id1, id2, link_type FROM link_table FORCE INDEX (PRIMARY)
+WHERE link_type=3 and id1=1 ORDER BY id2 DESC;
+SELECT /*+ bypass */ id1, id2, link_type FROM link_table FORCE INDEX (PRIMARY)
+WHERE link_type=3 and id1=1 ORDER BY id2 ASC;
+
+--echo # Prefix range query with PK + value
+SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version
+FROM link_table FORCE INDEX (PRIMARY)
+WHERE link_type=3 and id1=1 ORDER BY id2 DESC;
+SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version
+FROM link_table FORCE INDEX (PRIMARY)
+WHERE link_type=3 and id1=1 ORDER BY id2 ASC;
+
+--echo # Transaction
+BEGIN;
+SELECT /*+ bypass */ id1,id2,id1_type,id2_type,data,version from link_table
+WHERE id1=1 and id2=2 and link_type=3;
+UPDATE link_table set data="bcd" WHERE id1=1 and id2=2 and link_type = 3;
+SELECT /*+ bypass */ id1,id2,id1_type,id2_type,data,version from link_table
+WHERE id1=1 and id2=2 and link_type=3;
+COMMIT;
+
+BEGIN;
+SELECT /*+ bypass */ id1,id2,id1_type,id2_type,data,version from link_table
+WHERE id1=1 and id2=2 and link_type=3;
+UPDATE link_table set data="cde" WHERE id1=1 and id2=2 and link_type = 3;
+SELECT /*+ bypass */ id1,id2,id1_type,id2_type,data,version from link_table
+WHERE id1=1 and id2=2 and link_type=3;
+ROLLBACK;
+
+SELECT /*+ bypass */ id1,id2,id1_type,id2_type,data,version from link_table
+WHERE id1=1 and id2=2 and link_type=3;
+
+--echo # Data types
+SELECT /*+ bypass */ id1 FROM link_table where link_type="3";
+SELECT /*+ bypass */ id1 FROM link_table where link_type="3" AND id1="1";
+SELECT /*+ bypass */ id1 FROM link_table where link_type="3" AND id1=True;
+SELECT /*+ bypass */ id1 FROM link_table where link_type="3" AND id1=b'1';
+SELECT /*+ bypass */ id1 FROM link_table where link_type="3" AND id1=x'01';
+SELECT /*+ bypass */ id1 FROM link_table where link_type="3" AND id1=NULL;
+
+DROP TABLE count_table;
+DROP TABLE link_table;
+DROP TABLE link_table3;
+DROP TABLE link_table2;
+DROP TABLE id_table;
+DROP TABLE node_table;
+DROP TABLE link_table5;
+DROP TABLE link_table6;
+DROP TABLE link_table4;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/bypass_select_basic.test b/storage/rocksdb/mysql-test/rocksdb/t/bypass_select_basic.test
new file mode 100644
index 00000000000..51064356de7
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/bypass_select_basic.test
@@ -0,0 +1,3 @@
+--source include/have_rocksdb.inc
+
+--source bypass_select_basic.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/bypass_select_basic_bloom-master.opt b/storage/rocksdb/mysql-test/rocksdb/t/bypass_select_basic_bloom-master.opt
new file mode 100644
index 00000000000..81bc90b0531
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/bypass_select_basic_bloom-master.opt
@@ -0,0 +1,3 @@
+--rocksdb_default_cf_options=write_buffer_size=128m;target_file_size_base=32m;max_bytes_for_level_base=512m;level0_file_num_compaction_trigger=4;level0_slowdown_writes_trigger=20;level0_stop_writes_trigger=30;max_write_buffer_number=4;compression_per_level=kLZ4Compression;bottommost_compression=kZSTD;compression_opts=-14:6:0;block_based_table_factory={cache_index_and_filter_blocks=1;filter_policy=bloomfilter:10:false;whole_key_filtering=0};prefix_extractor=capped:12;level_compaction_dynamic_level_bytes=true;optimize_filters_for_hits=true;memtable_prefix_bloom_size_ratio=0.039;max_compaction_bytes=402653184;report_bg_io_stats=true;compaction_pri=kMinOverlappingRatio;soft_pending_compaction_bytes_limit=20480000000
+--rocksdb_override_cf_options=cf_assoc={prefix_extractor=capped:28};cf_assoc_count={prefix_extractor=capped:20};rev:cf_assoc_id1_type={prefix_extractor=capped:20};cf_fbobj_type_id={prefix_extractor=capped:16};cf_assoc_disagg={prefix_extractor=capped:20};__system__={write_buffer_size=16m};
+
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/bypass_select_basic_bloom.test b/storage/rocksdb/mysql-test/rocksdb/t/bypass_select_basic_bloom.test
new file mode 100644
index 00000000000..51064356de7
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/bypass_select_basic_bloom.test
@@ -0,0 +1,3 @@
+--source include/have_rocksdb.inc
+
+--source bypass_select_basic.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/bytes_written.test b/storage/rocksdb/mysql-test/rocksdb/t/bytes_written.test
new file mode 100644
index 00000000000..f0361707355
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/bytes_written.test
@@ -0,0 +1,22 @@
+--source include/have_rocksdb.inc
+
+--disable_warnings
+DROP TABLE IF EXISTS stats_test_table;
+--enable_warnings
+
+# Create the table
+CREATE TABLE stats_test_table (a INT, b INT, PRIMARY KEY (a)) ENGINE=ROCKSDB;
+
+# Ensure appropriate perf_context_level is set
+SET GLOBAL rocksdb_perf_context_level=3;
+
+# Insert some values
+INSERT INTO stats_test_table VALUES (7,1);
+INSERT INTO stats_test_table VALUES (2,2);
+
+# Verify the bytes written are updated in the table stats
+SELECT io_write_bytes > 0 FROM INFORMATION_SCHEMA.TABLE_STATISTICS WHERE TABLE_NAME = "stats_test_table";
+
+# Cleanup
+DROP TABLE stats_test_table;
+SET GLOBAL rocksdb_perf_context_level=DEFAULT;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/cardinality-master.opt b/storage/rocksdb/mysql-test/rocksdb/t/cardinality-master.opt
new file mode 100644
index 00000000000..2cd3c8051f8
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/cardinality-master.opt
@@ -0,0 +1,3 @@
+--skip-rocksdb_debug_optimizer_no_zero_cardinality
+--rocksdb_compaction_sequential_deletes=0
+--rocksdb_table_stats_sampling_pct=100
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/cardinality.test b/storage/rocksdb/mysql-test/rocksdb/t/cardinality.test
new file mode 100644
index 00000000000..21e4b49e560
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/cardinality.test
@@ -0,0 +1,117 @@
+--source include/have_rocksdb.inc
+
+--source include/restart_mysqld.inc
+
+# Test memtable cardinality statistics
+CREATE TABLE t0 (id int PRIMARY KEY, a int, INDEX ix_a (a)) engine=rocksdb;
+
+# populate the table with 10 reconds where cardinality of id is N and a is N/2.
+insert into t0 values (0, 0),(1, 1),(2, 2),(3, 3),(4, 4),
+(5, 4),(6, 4),(7, 4),(8, 4),(9, 4);
+
+# Assert no cardinality data exists before ANALYZE TABLE is done
+SELECT cardinality FROM information_schema.statistics where table_name="t0" and
+column_name="id";
+SELECT cardinality FROM information_schema.statistics where table_name="t0" and
+column_name="a";
+
+--disable_result_log
+ANALYZE TABLE t0;
+--enable_result_log
+
+SELECT table_rows into @N FROM information_schema.tables
+WHERE table_name = "t0";
+SELECT FLOOR(@N/cardinality) FROM
+information_schema.statistics where table_name="t0" and column_name="id";
+SELECT FLOOR(@N/cardinality) FROM
+information_schema.statistics where table_name="t0" and column_name="a";
+
+# Flush the table and re-run the test as statistics is calculated a bit
+# differently for memtable and SST files
+SET GLOBAL rocksdb_force_flush_memtable_now = 1;
+--disable_result_log
+ANALYZE TABLE t0;
+--enable_result_log
+
+SELECT table_rows into @N FROM information_schema.tables
+WHERE table_name = "t0";
+SELECT FLOOR(@N/cardinality) FROM
+information_schema.statistics where table_name="t0" and column_name="id";
+SELECT FLOOR(@N/cardinality) FROM
+information_schema.statistics where table_name="t0" and column_name="a";
+
+drop table t0;
+
+# Test big table on SST
+
+--disable_warnings
+DROP TABLE IF EXISTS t1,t10,t11;
+--enable_warnings
+
+create table t1(
+ id bigint not null primary key,
+ i1 bigint, #unique
+ i2 bigint, #repeating
+ c1 varchar(20), #unique
+ c2 varchar(20), #repeating
+ index t1_1(id, i1),
+ index t1_2(i1, i2),
+ index t1_3(i2, i1),
+ index t1_4(c1, c2),
+ index t1_5(c2, c1)
+) engine=rocksdb;
+
+--disable_query_log
+create table t10(a int primary key);
+insert into t10 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
+
+create table t11(a int primary key);
+insert into t11 select A.a + B.a* 10 + C.a * 100 from t10 A, t10 B, t10 C;
+
+set @a=0;
+let $i=0;
+set rocksdb_bulk_load=1;
+while ($i<100)
+{
+ inc $i;
+ eval insert into t1 select (@a:=@a+1), @a, @a div 10, @a, @a div 10 from t11;
+}
+set rocksdb_bulk_load=0;
+
+drop table t10;
+drop table t11;
+--enable_query_log
+
+# Flush memtable out to SST and display index cardinalities
+optimize table t1;
+show index in t1;
+SELECT table_name, table_rows FROM information_schema.tables WHERE table_schema = DATABASE();
+
+--echo restarting...
+--source include/restart_mysqld.inc
+
+# display index cardinalities after the restart
+show index in t1;
+SELECT table_name, table_rows FROM information_schema.tables WHERE table_schema = DATABASE();
+
+CREATE TABLE t2 (a INT, b INT, c INT, d INT, e INT, f INT, g INT,
+ PRIMARY KEY (a), KEY (c, b, a, d, e, f, g))
+ ENGINE=ROCKSDB;
+--disable_query_log
+let $i=0;
+while ($i<100)
+{
+ inc $i;
+ eval insert t2 values($i, $i div 10, 1, 1, 1, 1, 1);
+}
+--enable_query_log
+
+# Cardinality of key c should be 1 for c, 10 for b, 100 for a and the other fields.
+SET GLOBAL rocksdb_force_flush_memtable_now = 1;
+ANALYZE TABLE t2;
+--echo cardinality of the columns after 'a' must be equal to the cardinality of column 'a'
+SELECT CARDINALITY INTO @c FROM information_schema.statistics WHERE TABLE_NAME='t2' AND INDEX_NAME='c' AND COLUMN_NAME='a';
+SELECT COLUMN_NAME, CARDINALITY = @c FROM information_schema.statistics WHERE TABLE_NAME='t2' AND INDEX_NAME='c' AND SEQ_IN_INDEX > 3;
+
+drop table t1, t2;
+
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/check_flags.test b/storage/rocksdb/mysql-test/rocksdb/t/check_flags.test
new file mode 100644
index 00000000000..9afe562f114
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/check_flags.test
@@ -0,0 +1,117 @@
+--source include/have_rocksdb.inc
+--source include/have_debug_sync.inc
+
+set debug_sync='RESET';
+set global rocksdb_debug_ttl_read_filter_ts = -10;
+
+connect (conn1, localhost, root,,);
+--let $conn1_id = `SELECT CONNECTION_ID()`
+connection default;
+
+CREATE TABLE t1 (id INT, value INT, KEY (id), KEY (value)) ENGINE=ROCKSDB;
+CREATE TABLE t2 (id INT, value INT) ENGINE=ROCKSDB;
+CREATE TABLE t3 (id INT, kp1 INT, PRIMARY KEY (id), KEY(kp1)) ENGINE=ROCKSDB COMMENT='ttl_duration=1';
+
+INSERT INTO t1 VALUES (1,1), (2,2), (3,3), (4,4), (5,5);
+INSERT INTO t2 SELECT * FROM t1;
+INSERT INTO t3 SELECT * FROM t1;
+
+connection conn1;
+set debug_sync='rocksdb.check_flags_rmi SIGNAL parked WAIT_FOR go';
+send SELECT value FROM t1 WHERE value = 3;
+
+connection default;
+set debug_sync='now WAIT_FOR parked';
+--echo KILL QUERY \$conn1_id;
+--disable_query_log
+eval KILL QUERY $conn1_id;
+--enable_query_log
+set debug_sync='now SIGNAL go';
+
+connection conn1;
+--error ER_QUERY_INTERRUPTED
+--reap
+
+set debug_sync='RESET';
+
+connection conn1;
+set debug_sync='rocksdb.check_flags_rmi_scan SIGNAL parked WAIT_FOR go';
+send SELECT DISTINCT(id) FROM t1 WHERE value = 5 AND id IN (1, 3, 5);
+
+connection default;
+set debug_sync='now WAIT_FOR parked';
+--echo KILL QUERY \$conn1_id;
+--disable_query_log
+eval KILL QUERY $conn1_id;
+--enable_query_log
+set debug_sync='now SIGNAL go';
+
+connection conn1;
+--error ER_QUERY_INTERRUPTED
+--reap
+
+set debug_sync='RESET';
+
+connection conn1;
+set debug_sync='rocksdb.check_flags_inwd SIGNAL parked WAIT_FOR go';
+send SELECT value FROM t1 WHERE value > 3;
+
+connection default;
+set debug_sync='now WAIT_FOR parked';
+--echo KILL QUERY \$conn1_id;
+--disable_query_log
+eval KILL QUERY $conn1_id;
+--enable_query_log
+set debug_sync='now SIGNAL go';
+
+connection conn1;
+--error ER_QUERY_INTERRUPTED
+--reap
+
+set debug_sync='RESET';
+
+connection conn1;
+set debug_sync='rocksdb.check_flags_rnwd SIGNAL parked WAIT_FOR go';
+send SELECT id FROM t2;
+
+connection default;
+set debug_sync='now WAIT_FOR parked';
+--echo KILL QUERY \$conn1_id;
+--disable_query_log
+eval KILL QUERY $conn1_id;
+--enable_query_log
+set debug_sync='now SIGNAL go';
+
+connection conn1;
+--error ER_QUERY_INTERRUPTED
+--reap
+
+set debug_sync='RESET';
+
+
+connection conn1;
+set debug_sync='rocksdb.check_flags_ser SIGNAL parked WAIT_FOR go';
+send SELECT kp1 FROM t3 ORDER BY kp1;
+
+connection default;
+set debug_sync='now WAIT_FOR parked';
+--echo KILL QUERY \$conn1_id;
+--disable_query_log
+eval KILL QUERY $conn1_id;
+--enable_query_log
+set debug_sync='now SIGNAL go';
+
+connection conn1;
+--error ER_QUERY_INTERRUPTED
+--reap
+
+connection default;
+--disconnect conn1
+
+set debug_sync='RESET';
+
+set global rocksdb_debug_ttl_read_filter_ts = DEFAULT;
+
+DROP TABLE t1;
+DROP TABLE t2;
+DROP TABLE t3;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/check_ignore_unknown_options.test b/storage/rocksdb/mysql-test/rocksdb/t/check_ignore_unknown_options.test
new file mode 100644
index 00000000000..c8c12626139
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/check_ignore_unknown_options.test
@@ -0,0 +1,56 @@
+--disable_warnings
+let $MYSQLD_DATADIR= `select @@datadir`;
+let $restart_file= $MYSQLTEST_VARDIR/tmp/mysqld.1.expect;
+let $error_log= $MYSQLTEST_VARDIR/log/my_restart.err;
+select variable_name, variable_value from information_schema.global_variables where variable_name="rocksdb_ignore_unknown_options";
+
+#
+# MariaDB: The following shell commands are not portable so we are
+# using perl instead:
+#--exec find $MYSQLD_DATADIR/#rocksdb/OPTIONS* | sort -t- -k 2 -n | tail -1 | xargs -0 -I {} -t sh -c "sed -i 's/rocksdb_version=.*/rocksdb_version=99.9.9/' {}"
+#--exec find $MYSQLD_DATADIR/#rocksdb/OPTIONS* | sort -t- -k 2 -n | tail -1 | xargs -0 -I {} -t sh -c "echo hello=world>>{}"
+
+perl;
+ my $path=$ENV{MYSQLTEST_VARDIR} . "/mysqld.1/data/\#rocksdb";
+ opendir(my $dh, $path) || die "Can't opendir $some_dir: $!";
+ my @files = grep { /^OPTIONS/ } readdir($dh);
+ closedir($dh);
+
+ sub compare_second_as_number {
+ local $aa= shift;
+ local $bb= shift;
+ $aa =~ s/OPTIONS-//;
+ $bb =~ s/OPTIONS-//;
+ return $aa <=> $bb;
+ }
+
+ @sorted_files = sort { compare_second_as_number($a, $b); } @files;
+ my $last_file= $sorted_files[-1];
+
+ my $contents="";
+ open(my $fh, "<", "$path/$last_file") || die ("Couldn't open $path/$last_file");
+ while (<$fh>) {
+ $_ =~ s/rocksdb_version=.*/rocksdb_version=99.9.9/;
+ $contents .= $_;
+ }
+ close($fh);
+ $contents .= "hello=world\n";
+ open(my $fh, ">", "$path/$last_file") || die("Can't open $path/$file for writing");
+ print $fh $contents;
+ close($fh);
+EOF
+
+--exec echo "wait" > $MYSQLTEST_VARDIR/tmp/mysqld.1.expect
+--shutdown_server 10
+
+--error 1
+--exec $MYSQLD_CMD --plugin_load=$HA_ROCKSDB_SO --rocksdb_ignore_unknown_options=0 --log-error=$error_log
+
+let SEARCH_FILE= $error_log;
+let SEARCH_PATTERN= RocksDB: Compatibility check against existing database options failed;
+--source include/search_pattern_in_file.inc
+--remove_file $error_log
+--enable_reconnect
+--exec echo "restart" > $restart_file
+--source include/wait_until_connected_again.inc
+select variable_name, variable_value from information_schema.global_variables where variable_name="rocksdb_ignore_unknown_options";
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/check_log_for_xa.py b/storage/rocksdb/mysql-test/rocksdb/t/check_log_for_xa.py
new file mode 100644
index 00000000000..a3d50f305a4
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/check_log_for_xa.py
@@ -0,0 +1,31 @@
+import sys
+import re
+
+"""
+Example usage:
+ python check_log_for_xa.py path/to/log/mysqld.2.err rollback,commit,prepare
+"""
+
+log_path = sys.argv[1]
+desired_filters = sys.argv[2]
+
+all_filters = [
+ ('rollback', re.compile('(\[Note\] rollback xid .+)')),
+ ('commit', re.compile('(\[Note\] commit xid .+)')),
+ ('prepare',
+ re.compile('(\[Note\] Found \d+ prepared transaction\(s\) in \w+)')),
+]
+
+active_filters = filter(lambda f: f[0] in desired_filters, all_filters)
+
+results = set()
+with open(log_path) as log:
+ for line in log:
+ line = line.strip()
+ for f in active_filters:
+ match = f[1].search(line)
+ if match:
+ results.add("**found '%s' log entry**" % f[0])
+
+for res in results:
+ print res
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/check_table.inc b/storage/rocksdb/mysql-test/rocksdb/t/check_table.inc
new file mode 100644
index 00000000000..c108a97362d
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/check_table.inc
@@ -0,0 +1,54 @@
+#
+# CHECK TABLE statements
+#
+# Note: the output is likely to be different for the engine under test,
+# in which case rdiff will be needed. Or, the output might say that
+# the storage engine does not support CHECK.
+#
+
+--disable_warnings
+DROP TABLE IF EXISTS t1,t2;
+--enable_warnings
+
+CREATE TABLE t1 (a INT, b CHAR(8), pk INT AUTO_INCREMENT PRIMARY KEY) ENGINE=rocksdb;
+INSERT INTO t1 (a,b) VALUES (1,'a'),(2,'b');
+
+CREATE TABLE t2 (a INT, b CHAR(8), pk INT AUTO_INCREMENT PRIMARY KEY) ENGINE=rocksdb;
+
+CHECK TABLE t1;
+INSERT INTO t1 (a,b) VALUES (3,'c');
+INSERT INTO t2 (a,b) VALUES (4,'d');
+CHECK TABLE t1, t2 FOR UPGRADE;
+INSERT INTO t2 (a,b) VALUES (5,'e');
+CHECK TABLE t2 QUICK;
+INSERT INTO t1 (a,b) VALUES (6,'f');
+CHECK TABLE t1 FAST;
+INSERT INTO t1 (a,b) VALUES (7,'g');
+INSERT INTO t2 (a,b) VALUES (8,'h');
+CHECK TABLE t2, t1 MEDIUM;
+INSERT INTO t1 (a,b) VALUES (9,'i');
+INSERT INTO t2 (a,b) VALUES (10,'j');
+CHECK TABLE t1, t2 EXTENDED;
+INSERT INTO t1 (a,b) VALUES (11,'k');
+CHECK TABLE t1 CHANGED;
+
+DROP TABLE t1, t2;
+
+
+CREATE TABLE t1 (a INT, b CHAR(8), pk INT AUTO_INCREMENT PRIMARY KEY, KEY(a)) ENGINE=rocksdb;
+INSERT INTO t1 (a) VALUES (1),(2),(5);
+CHECK TABLE t1;
+INSERT INTO t1 (a) VALUES (6),(8),(12);
+CHECK TABLE t1 FOR UPGRADE;
+INSERT INTO t1 (a) VALUES (13),(15),(16);
+CHECK TABLE t1 QUICK;
+INSERT INTO t1 (a) VALUES (17),(120),(132);
+CHECK TABLE t1 FAST;
+INSERT INTO t1 (a) VALUES (801),(900),(7714);
+CHECK TABLE t1 MEDIUM;
+INSERT INTO t1 (a) VALUES (8760),(10023),(12000);
+CHECK TABLE t1 EXTENDED;
+INSERT INTO t1 (a) VALUES (13345),(24456),(78302),(143028);
+CHECK TABLE t1 CHANGED;
+DROP TABLE t1;
+
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/check_table.test b/storage/rocksdb/mysql-test/rocksdb/t/check_table.test
new file mode 100644
index 00000000000..4d349f7a167
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/check_table.test
@@ -0,0 +1,12 @@
+--source include/have_rocksdb.inc
+
+#
+# CHECK TABLE statements
+#
+# Note: the output is likely to be different for the engine under test,
+# in which case rdiff will be needed. Or, the output might say that
+# the storage engine does not support CHECK.
+#
+
+--source check_table.inc
+
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/checkpoint.test b/storage/rocksdb/mysql-test/rocksdb/t/checkpoint.test
new file mode 100644
index 00000000000..e5de6246f60
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/checkpoint.test
@@ -0,0 +1,107 @@
+--source include/have_rocksdb.inc
+
+--disable_warnings
+DROP TABLE IF EXISTS t1;
+DROP TABLE IF EXISTS t2;
+DROP TABLE IF EXISTS t3;
+DROP TABLE IF EXISTS t4;
+DROP TABLE IF EXISTS t5;
+--enable_warnings
+
+# Start from clean slate
+#--source include/restart_mysqld.inc
+
+CREATE TABLE t1 (
+ a int not null,
+ b int not null,
+ primary key (a,b) comment 'cf1',
+ key (b) comment 'rev:cf2'
+) ENGINE=RocksDB;
+
+CREATE TABLE t2 (
+ a int not null,
+ b int not null,
+ primary key (a,b) comment 'cf1',
+ key (b) comment 'rev:cf2'
+) ENGINE=RocksDB;
+
+CREATE TABLE t3 (
+ a int not null,
+ b int not null,
+ primary key (a,b) comment 'cf1',
+ key (b) comment 'rev:cf2'
+) ENGINE=RocksDB;
+
+CREATE TABLE t4 (
+ a int not null,
+ b int not null,
+ primary key (a,b) comment 'cf1',
+ key (b) comment 'rev:cf2'
+) ENGINE=RocksDB;
+
+# Populate tables
+let $max = 1000;
+let $table = t1;
+--source drop_table_repopulate_table.inc
+let $table = t2;
+--source drop_table_repopulate_table.inc
+let $table = t3;
+--source drop_table_repopulate_table.inc
+let $table = t4;
+--source drop_table_repopulate_table.inc
+
+# Make sure new table gets unique indices
+CREATE TABLE t5 (
+ a int not null,
+ b int not null,
+ primary key (a,b) comment 'cf1',
+ key (b) comment 'rev:cf2'
+) ENGINE=RocksDB;
+
+let $max = 1000;
+let $table = t5;
+--source drop_table_repopulate_table.inc
+
+# Create checkpoint without trailing '/'
+let $checkpoint = $MYSQL_TMP_DIR/checkpoint;
+let $succeeds = 1;
+--source set_checkpoint.inc
+
+# Create checkpoint with a trailing '/'
+let $checkpoint = $MYSQL_TMP_DIR/checkpoint/;
+let $succeeds = 1;
+--source set_checkpoint.inc
+
+# Set checkpoint dir as empty string, which fails
+let $checkpoint = ;
+let $succeeds = 0;
+--source set_checkpoint.inc
+
+# Set checkpoint as a directory that does not exist, which fails
+let $checkpoint = /does/not/exist;
+let $succeeds = 0;
+--source set_checkpoint.inc
+
+# Set checkpoint as a directory that already exists, which fails
+let $checkpoint = $MYSQL_TMP_DIR/already-existing-directory;
+--mkdir $checkpoint
+let $succeeds = 0;
+--source set_checkpoint.inc
+--exec rm -rf $checkpoint
+
+--disable_result_log
+truncate table t1;
+optimize table t1;
+truncate table t2;
+optimize table t2;
+truncate table t3;
+optimize table t3;
+truncate table t4;
+optimize table t4;
+truncate table t5;
+optimize table t5;
+drop table if exists t1;
+drop table if exists t2;
+drop table if exists t3;
+drop table if exists t4;
+drop table if exists t5;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/checksum_table.test b/storage/rocksdb/mysql-test/rocksdb/t/checksum_table.test
new file mode 100644
index 00000000000..51c639a85dd
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/checksum_table.test
@@ -0,0 +1,84 @@
+--source include/have_rocksdb.inc
+
+#
+# CHECKSUM TABLE statements for standard CHECKSUM properties.
+# Live checksums are covered in checksum_table_live.test
+#
+
+--disable_warnings
+DROP TABLE IF EXISTS t1,t2;
+--enable_warnings
+
+CREATE TABLE t1 (a INT PRIMARY KEY, b CHAR(8)) ENGINE=rocksdb CHECKSUM=0;
+INSERT INTO t1 (a,b) VALUES (1,'a'),(2,'b');
+
+CREATE TABLE t2 (a INT PRIMARY KEY, b CHAR(8)) ENGINE=rocksdb CHECKSUM=0;
+
+CHECKSUM TABLE t1;
+CHECKSUM TABLE t2, t1;
+CHECKSUM TABLE t1, t2 QUICK;
+CHECKSUM TABLE t1, t2 EXTENDED;
+
+DROP TABLE t1, t2;
+
+--echo #
+--echo # Issue #110: SQL command checksum returns inconsistent result
+--echo #
+create table t1 (pk int primary key, col1 varchar(10)) engine=rocksdb;
+insert into t1 values (2,'fooo');
+insert into t1 values (1,NULL);
+checksum table t1;
+checksum table t1;
+select * from t1 where pk=2;
+checksum table t1;
+checksum table t1;
+flush tables;
+checksum table t1;
+checksum table t1;
+
+drop table t1;
+
+--echo #
+--echo # The following test is about making sure MyRocks CHECKSUM TABLE
+--echo # values are the same as with InnoDB.
+--echo # If you see checksum values changed, make sure their counterparts
+--echo # in suite/innodb/r/checksum-matches-myrocks.result match.
+--echo #
+
+create table t1 (pk int primary key, col1 varchar(10)) engine=rocksdb;
+insert into t1 values (2,'fooo');
+insert into t1 values (1,NULL);
+checksum table t1;
+drop table t1;
+
+create table t1 (
+ pk bigint unsigned primary key,
+ col1 varchar(10),
+ col2 tinyint,
+ col3 double
+) engine=rocksdb;
+
+--echo # MariaDB has changed the checksumming algorithm
+--echo # Enable the old algorithm:
+set @tmp_old=@@old;
+set old=1;
+
+
+checksum table t1;
+
+insert into t1 values (1, NULL, NULL, NULL);
+insert into t1 values (2, 'foo', NULL, NULL);
+checksum table t1;
+
+insert into t1 values (3, NULL, 123, NULL);
+insert into t1 values (4, NULL, NULL, 2.78);
+checksum table t1;
+
+insert into t1 values (5, 'xxxYYYzzzT', NULL, 2.78);
+insert into t1 values (6, '', NULL, 2.78);
+checksum table t1;
+
+set old=@tmp_old;
+
+drop table t1;
+
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/checksum_table_live.test b/storage/rocksdb/mysql-test/rocksdb/t/checksum_table_live.test
new file mode 100644
index 00000000000..da278ed7f9b
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/checksum_table_live.test
@@ -0,0 +1,24 @@
+--source include/have_rocksdb.inc
+
+#
+# CHECKSUM TABLE statements for live CHECKSUM.
+#
+
+--disable_warnings
+DROP TABLE IF EXISTS t1,t2;
+--enable_warnings
+
+# For most engines CHECKSUM=1 option will be ignored,
+# and the results will be different
+
+CREATE TABLE t1 (a INT PRIMARY KEY, b CHAR(8)) ENGINE=rocksdb CHECKSUM=1;
+INSERT INTO t1 (a,b) VALUES (1,'a'),(2,'b');
+CREATE TABLE t2 (a INT PRIMARY KEY, b CHAR(8)) ENGINE=rocksdb CHECKSUM=1;
+
+CHECKSUM TABLE t1;
+CHECKSUM TABLE t2, t1;
+CHECKSUM TABLE t1, t2 QUICK;
+CHECKSUM TABLE t1, t2 EXTENDED;
+
+DROP TABLE t1, t2;
+
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/col_not_null.inc b/storage/rocksdb/mysql-test/rocksdb/t/col_not_null.inc
new file mode 100644
index 00000000000..2d3c9292441
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/col_not_null.inc
@@ -0,0 +1,55 @@
+#
+# NOT NULL attribute in columns
+#
+# Usage:
+# let $col_type = <column type>;
+# let $col_default = <default value for a column>;
+# --source col_not_null.inc
+#
+# We will add NOT NULL to the column options;
+#
+
+--disable_warnings
+DROP TABLE IF EXISTS t1;
+--enable_warnings
+
+--echo #----------------------------------
+--echo # $col_type NOT NULL columns without a default
+--echo #----------------------------------
+
+eval CREATE TABLE t1 (pk INT AUTO_INCREMENT PRIMARY KEY, c $col_type NOT NULL) ENGINE=rocksdb;
+SHOW COLUMNS IN t1;
+
+--error ER_BAD_NULL_ERROR
+INSERT INTO t1 (c) VALUES (NULL);
+eval INSERT INTO t1 (c) VALUES ($col_default);
+SELECT HEX(c) FROM t1;
+
+DROP TABLE t1;
+
+--echo #----------------------------------
+--echo # $col_type NOT NULL columns with a default
+--echo #----------------------------------
+
+eval CREATE TABLE t1 (
+ pk INT AUTO_INCREMENT PRIMARY KEY,
+ c $col_type NOT NULL DEFAULT $col_default
+) ENGINE=rocksdb;
+
+SHOW COLUMNS IN t1;
+
+--error ER_INVALID_DEFAULT
+eval ALTER TABLE t1 ADD COLUMN err $col_type NOT NULL DEFAULT NULL;
+
+--error ER_BAD_NULL_ERROR
+INSERT INTO t1 (c) VALUES (NULL);
+
+eval INSERT INTO t1 (c) VALUES ($col_default);
+eval INSERT INTO t1 () VALUES ();
+
+# HEX should be universal for all column types
+SELECT pk, HEX(c) FROM t1 ORDER BY pk;
+
+DROP TABLE t1;
+
+
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/col_not_null_timestamp.inc b/storage/rocksdb/mysql-test/rocksdb/t/col_not_null_timestamp.inc
new file mode 100644
index 00000000000..812ada6f486
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/col_not_null_timestamp.inc
@@ -0,0 +1,70 @@
+#
+# NOT NULL attribute in TIMESTAMP columns
+#
+# This is a copy of col_not_null.inc, except that
+# instead of getting an error on inserting NULL into a non-NULL column,
+# we are getting the current timestamp (see MySQL:68472).
+# If the bug is ever fixed, this include file won't be needed anymore.
+
+
+--disable_warnings
+DROP TABLE IF EXISTS t1;
+--enable_warnings
+
+--echo #----------------------------------
+--echo # $col_type NOT NULL column without a default
+--echo #----------------------------------
+
+eval CREATE TABLE t1 (pk INT AUTO_INCREMENT PRIMARY KEY, c $col_type NOT NULL) ENGINE=rocksdb;
+SHOW COLUMNS IN t1;
+
+# Here where the non-standard behavior strikes:
+# instead of an error we are getting the current timestamp
+
+# As of mysql-5.6.11, this no longer works, and we get an error:
+# (MariaDB doesn't have this patch, so it doesnt produce an error):
+# --error ER_BAD_NULL_ERROR
+INSERT INTO t1 (c) VALUES (NULL);
+eval INSERT INTO t1 (c) VALUES ($col_default);
+SELECT HEX(c) FROM t1;
+
+DROP TABLE t1;
+
+--echo #----------------------------------
+--echo # $col_type NOT NULL columns with a default
+--echo #----------------------------------
+
+eval CREATE TABLE t1 (
+ pk INT AUTO_INCREMENT PRIMARY KEY,
+ c $col_type NOT NULL DEFAULT $col_default
+) ENGINE=rocksdb;
+
+SHOW COLUMNS IN t1;
+
+--error ER_INVALID_DEFAULT
+eval ALTER TABLE t1 ADD COLUMN err $col_type NOT NULL DEFAULT NULL;
+
+# Here where the non-standard behavior strikes:
+# instead of an error we are getting the current timestamp
+
+# As of mysql-5.6.11, this no longer works, and we get an error:
+# (MariaDB doesn't have this patch, so it doesnt produce an error):
+# --error ER_BAD_NULL_ERROR
+
+# Since we don't produce an error, the row will get inserted. Make it
+# deterministic:
+set @save_ts=@@timestamp;
+set timestamp=1478923914;
+
+INSERT INTO t1 (c) VALUES (NULL);
+set timestamp=@save_ts;
+
+eval INSERT INTO t1 (c) VALUES ($col_default);
+eval INSERT INTO t1 () VALUES ();
+
+# HEX should be universal for all column types
+SELECT pk, HEX(c) FROM t1 ORDER BY pk;
+
+DROP TABLE t1;
+
+
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/col_null.inc b/storage/rocksdb/mysql-test/rocksdb/t/col_null.inc
new file mode 100644
index 00000000000..7ebfee0b114
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/col_null.inc
@@ -0,0 +1,34 @@
+#
+# NULL attribute and DEFAULT NULL in columns
+#
+# Usage:
+# let $col_type = <column type>;
+# let $col_default = <default non-null value for a column>;
+# --source col_null.inc
+#
+# We will add NULL attribute to the column options.
+#
+
+
+--disable_warnings
+DROP TABLE IF EXISTS t1;
+--enable_warnings
+
+eval CREATE TABLE t1 (
+ c $col_type NULL,
+ c1 $col_type NULL DEFAULT NULL,
+ c2 $col_type NULL DEFAULT $col_default,
+ pk INT AUTO_INCREMENT PRIMARY KEY
+) ENGINE=rocksdb;
+
+SHOW COLUMNS IN t1;
+
+INSERT INTO t1 (c,c1,c2) VALUES (NULL,NULL,NULL);
+eval INSERT INTO t1 (c,c1,c2) VALUES ($col_default,$col_default,$col_default);
+INSERT INTO t1 () VALUES ();
+
+SELECT pk, HEX(c), HEX(c1), HEX(c2) FROM t1 ORDER BY pk;
+SELECT pk, HEX(c2) FROM t1 ORDER BY pk;
+
+DROP TABLE t1;
+
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/col_opt_default.test b/storage/rocksdb/mysql-test/rocksdb/t/col_opt_default.test
new file mode 100644
index 00000000000..6f91ee7ca9a
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/col_opt_default.test
@@ -0,0 +1,27 @@
+--source include/have_rocksdb.inc
+
+#
+# Check whether DEFAULT column attribute
+# is supported in CREATE and ALTER TABLE.
+# If the attribute is supported at all, it will be covered
+# in more details in col_option_null and col_option_not_null tests.
+#
+
+--disable_warnings
+DROP TABLE IF EXISTS t1;
+--enable_warnings
+
+CREATE TABLE t1 (a INT PRIMARY KEY DEFAULT '0') ENGINE=rocksdb;
+SHOW COLUMNS IN t1;
+
+INSERT INTO t1 (a) VALUES (1);
+SELECT a FROM t1;
+
+ALTER TABLE t1 ADD COLUMN b CHAR(8) DEFAULT '';
+SHOW COLUMNS IN t1;
+
+INSERT INTO t1 (b) VALUES ('a');
+SELECT a,b FROM t1 ORDER BY a,b;
+
+DROP TABLE t1;
+
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/col_opt_not_null.test b/storage/rocksdb/mysql-test/rocksdb/t/col_opt_not_null.test
new file mode 100644
index 00000000000..1de4ccee0f7
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/col_opt_not_null.test
@@ -0,0 +1,229 @@
+--source include/have_rocksdb.inc
+
+#
+# NOT NULL column attribute
+#
+
+#########################################
+# TODO:
+# Currently the test produces incorrect (?) result
+# due to bug MySQL:68472. If the bug is ever fixed,
+# the test and result files will need to be updated.
+#########################################
+
+
+let $extra_col_opts = NOT NULL;
+
+--echo ########################
+--echo # BINARY columns
+--echo ########################
+
+--source type_binary.inc
+--let $col_type = BINARY
+--let $col_default = 0
+--source col_not_null.inc
+
+--echo ########################
+--echo # VARBINARY columns
+--echo ########################
+
+--source type_varbinary.inc
+--let $col_type = VARBINARY(64)
+--let $col_default = 'test'
+--source col_not_null.inc
+
+--echo ########################
+--echo # BIT columns
+--echo ########################
+
+--source type_bit.inc
+--let $col_type = BIT
+--let $col_default = 1
+--source col_not_null.inc
+
+--echo ########################
+--echo # BLOB columns
+--echo ########################
+
+--source type_blob.inc
+
+--let $col_default = ''
+
+--let $col_type = BLOB
+--source col_not_null.inc
+
+--let $col_type = TINYBLOB
+--source col_not_null.inc
+
+--let $col_type = MEDIUMBLOB
+--source col_not_null.inc
+
+--let $col_type = LONGBLOB
+--source col_not_null.inc
+
+--echo ########################
+--echo # BOOL columns
+--echo ########################
+
+--source type_bool.inc
+--let $col_type = BOOL
+--let $col_default = '0'
+--source col_not_null.inc
+
+--echo ########################
+--echo # CHAR columns
+--echo ########################
+
+--source type_char.inc
+--let $col_type = CHAR
+--let $col_default = '_'
+--source col_not_null.inc
+
+--echo ########################
+--echo # VARCHAR columns
+--echo ########################
+
+--source type_varchar.inc
+--let $col_type = VARCHAR(64)
+--let $col_default = 'test default'
+--source col_not_null.inc
+
+--echo ########################
+--echo # date and time columns
+--echo ########################
+
+set @col_opt_not_nullsave_time_zone=@@time_zone;
+set time_zone='UTC';
+
+--source type_date_time.inc
+
+SET TIMESTAMP=UNIX_TIMESTAMP('2013-12-12 12:12:12');
+
+--let $col_type = DATE
+--let $col_default = '2012-12-21'
+--source col_not_null.inc
+
+--let $col_type = DATETIME
+--let $col_default = '2012-12-21 12:21:12'
+--source col_not_null.inc
+
+# Even with explicit-defaults-for-timestamps, we still can't use
+# the standard include file, due to bug MySQL:68472
+
+--let $col_type = TIMESTAMP
+--let $col_default = '2012-12-21 12:21:12'
+--source col_not_null_timestamp.inc
+
+--let $col_type = TIME
+--let $col_default = '12:21:12'
+--source col_not_null.inc
+
+--let $col_type = YEAR
+--let $col_default = '2012'
+--source col_not_null.inc
+
+--let $col_type = YEAR(2)
+--let $col_default = '12'
+--source col_not_null.inc
+
+set time_zone= @col_opt_not_nullsave_time_zone;
+
+--echo ########################
+--echo # ENUM columns
+--echo ########################
+
+--source type_enum.inc
+
+--let $col_type = ENUM('test1','test2','test3')
+--let $col_default = 'test2'
+--source col_not_null.inc
+
+--echo ########################
+--echo # Fixed point columns (NUMERIC, DECIMAL)
+--echo ########################
+
+--source type_fixed.inc
+
+--let $col_type = DECIMAL
+--let $col_default = 1.1
+--source col_not_null.inc
+
+--let $col_type = NUMERIC
+--let $col_default = 0
+--source col_not_null.inc
+
+--echo ########################
+--echo # Floating point columns (FLOAT, DOUBLE)
+--echo ########################
+
+--source type_float.inc
+
+--let $col_type = FLOAT
+--let $col_default = 1.1
+--source col_not_null.inc
+
+--let $col_type = DOUBLE
+--let $col_default = 0
+--source col_not_null.inc
+
+--echo ########################
+--echo # INT columns
+--echo ########################
+
+--source type_int.inc
+
+--let $col_type = INT
+--let $col_default = 2147483647
+--source col_not_null.inc
+
+--let $col_type = TINYINT
+--let $col_default = 127
+--source col_not_null.inc
+
+--let $col_type = SMALLINT
+--let $col_default = 0
+--source col_not_null.inc
+
+--let $col_type = MEDIUMINT
+--let $col_default = 1
+--source col_not_null.inc
+
+--let $col_type = BIGINT
+--let $col_default = 9223372036854775807
+--source col_not_null.inc
+
+--echo ########################
+--echo # SET columns
+--echo ########################
+
+--source type_set.inc
+--let $col_type = SET('test1','test2','test3')
+--let $col_default = 'test2,test3'
+--source col_not_null.inc
+
+--echo ########################
+--echo # TEXT columns
+--echo ########################
+
+--source type_text.inc
+
+--let $col_default = ''
+
+--let $col_type = TEXT
+--source col_not_null.inc
+
+--let $col_type = TINYTEXT
+--source col_not_null.inc
+
+--let $col_type = MEDIUMTEXT
+--source col_not_null.inc
+
+--let $col_type = LONGTEXT
+--source col_not_null.inc
+
+
+--let $col_type =
+--let $col_default =
+--let $extra_col_opts =
+
+
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/col_opt_null.test b/storage/rocksdb/mysql-test/rocksdb/t/col_opt_null.test
new file mode 100644
index 00000000000..c41abb78642
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/col_opt_null.test
@@ -0,0 +1,220 @@
+--source include/have_rocksdb.inc
+
+#
+# NULL column attribute
+#
+
+let $extra_col_opts = NULL;
+
+
+--echo ########################
+--echo # BINARY columns
+--echo ########################
+
+--source type_binary.inc
+--let $col_type = BINARY
+--let $col_default = 0
+--source col_null.inc
+
+--echo ########################
+--echo # VARBINARY columns
+--echo ########################
+
+--source type_varbinary.inc
+--let $col_type = VARBINARY(64)
+--let $col_default = 'test'
+--source col_null.inc
+
+--echo ########################
+--echo # BIT columns
+--echo ########################
+
+--source type_bit.inc
+--let $col_type = BIT
+--let $col_default = 1
+--source col_null.inc
+
+--echo ########################
+--echo # BLOB columns
+--echo ########################
+
+--source type_blob.inc
+
+--let $col_default = ''
+
+--let $col_type = BLOB
+--source col_null.inc
+
+--let $col_type = TINYBLOB
+--source col_null.inc
+
+--let $col_type = MEDIUMBLOB
+--source col_null.inc
+
+--let $col_type = LONGBLOB
+--source col_null.inc
+
+--echo ########################
+--echo # BOOL columns
+--echo ########################
+
+--source type_bool.inc
+--let $col_type = BOOL
+--let $col_default = '0'
+--source col_null.inc
+
+
+--echo ########################
+--echo # CHAR columns
+--echo ########################
+
+--source type_char.inc
+--let $col_type = CHAR
+--let $col_default = '_'
+--source col_null.inc
+
+--echo ########################
+--echo # VARCHAR columns
+--echo ########################
+
+
+--source type_varchar.inc
+--let $col_type = VARCHAR(64)
+--let $col_default = 'test default'
+--source col_null.inc
+
+
+--echo ########################
+--echo # date and time columns
+--echo ########################
+
+set @col_opt_nullsave_time_zone=@@time_zone;
+set time_zone='UTC';
+
+--source type_date_time.inc
+
+--let $col_type = DATE
+--let $col_default = '2012-12-21'
+--source col_null.inc
+
+--let $col_type = DATETIME
+--let $col_default = '2012-12-21 12:21:12'
+--source col_null.inc
+
+--let $col_type = TIMESTAMP
+--let $col_default = '2012-12-21 12:21:12'
+--source col_null.inc
+
+--let $col_type = TIME
+--let $col_default = '12:21:12'
+--source col_null.inc
+
+--let $col_type = YEAR
+--let $col_default = '2012'
+--source col_null.inc
+
+--let $col_type = YEAR(2)
+--let $col_default = '12'
+--source col_null.inc
+
+set time_zone=@col_opt_nullsave_time_zone;
+
+--echo ########################
+--echo # ENUM columns
+--echo ########################
+
+--source type_enum.inc
+--let $col_type = ENUM('test1','test2','test3')
+--let $col_default = 'test2'
+--source col_null.inc
+
+--echo ########################
+--echo # Fixed point columns (NUMERIC, DECIMAL)
+--echo ########################
+
+--source type_fixed.inc
+
+--let $col_type = DECIMAL
+--let $col_default = 1.1
+--source col_null.inc
+
+--let $col_type = NUMERIC
+--let $col_default = 0
+--source col_null.inc
+
+--echo ########################
+--echo # Floating point columns (FLOAT, DOUBLE)
+--echo ########################
+
+--source type_float.inc
+
+--let $col_type = FLOAT
+--let $col_default = 1.1
+--source col_null.inc
+
+--let $col_type = DOUBLE
+--let $col_default = 0
+--source col_null.inc
+
+--echo ########################
+--echo # INT columns
+--echo ########################
+
+--source type_int.inc
+
+--let $col_type = INT
+--let $col_default = 2147483647
+--source col_null.inc
+
+--let $col_type = TINYINT
+--let $col_default = 127
+--source col_null.inc
+
+--let $col_type = SMALLINT
+--let $col_default = 0
+--source col_null.inc
+
+--let $col_type = MEDIUMINT
+--let $col_default = 1
+--source col_null.inc
+
+--let $col_type = BIGINT
+--let $col_default = 9223372036854775807
+--source col_null.inc
+
+--echo ########################
+--echo # SET columns
+--echo ########################
+
+--source type_set.inc
+--let $col_type = SET('test1','test2','test3')
+--let $col_default = 'test2,test3'
+--source col_null.inc
+
+
+--echo ########################
+--echo # TEXT columns
+--echo ########################
+
+--source type_text.inc
+
+--let $col_default = ''
+
+--let $col_type = TEXT
+--source col_null.inc
+
+--let $col_type = TINYTEXT
+--source col_null.inc
+
+--let $col_type = MEDIUMTEXT
+--source col_null.inc
+
+--let $col_type = LONGTEXT
+--source col_null.inc
+
+
+--let $col_type =
+--let $col_default =
+--let $extra_col_opts =
+
+
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/col_opt_unsigned.test b/storage/rocksdb/mysql-test/rocksdb/t/col_opt_unsigned.test
new file mode 100644
index 00000000000..25cda84ce2e
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/col_opt_unsigned.test
@@ -0,0 +1,74 @@
+--source include/have_rocksdb.inc
+
+#
+# UNSIGNED column attribute
+#
+
+--let $extra_col_opts = UNSIGNED
+
+--echo ########################
+--echo # Fixed point columns (NUMERIC, DECIMAL)
+--echo ########################
+
+--source type_fixed.inc
+
+CREATE TABLE t1 (
+ a DECIMAL UNSIGNED,
+ b NUMERIC UNSIGNED,
+ PRIMARY KEY (a)
+) ENGINE=rocksdb;
+
+SHOW COLUMNS IN t1;
+
+INSERT INTO t1 (a,b) VALUES (1.0,-1.0);
+INSERT INTO t1 (a,b) VALUES (-100,100);
+--sorted_result
+SELECT a,b FROM t1;
+DROP TABLE t1;
+
+--echo ########################
+--echo # Floating point columns (FLOAT, DOUBLE)
+--echo ########################
+
+--source type_float.inc
+
+CREATE TABLE t1 (
+ a DOUBLE UNSIGNED,
+ b FLOAT UNSIGNED,
+ PRIMARY KEY (b)
+) ENGINE=rocksdb;
+
+SHOW COLUMNS IN t1;
+
+INSERT INTO t1 (a,b) VALUES (1.0,-1.0);
+INSERT INTO t1 (a,b) VALUES (-100,100);
+--sorted_result
+SELECT a,b FROM t1;
+DROP TABLE t1;
+
+--echo ########################
+--echo # INT columns
+--echo ########################
+
+--source type_int.inc
+
+CREATE TABLE t1 (
+ t TINYINT UNSIGNED,
+ s SMALLINT UNSIGNED,
+ m MEDIUMINT UNSIGNED,
+ i INT UNSIGNED,
+ b BIGINT UNSIGNED,
+ PRIMARY KEY (b)
+) ENGINE=rocksdb;
+
+SHOW COLUMNS IN t1;
+
+INSERT INTO t1 (t,s,m,i,b) VALUES (255,65535,16777215,4294967295,18446744073709551615);
+INSERT INTO t1 (t,s,m,i,b) VALUES (-1,-1,-1,-1,-1);
+--sorted_result
+SELECT t,s,m,i,b FROM t1;
+
+DROP TABLE t1;
+
+--let $extra_col_opts =
+
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/col_opt_zerofill.test b/storage/rocksdb/mysql-test/rocksdb/t/col_opt_zerofill.test
new file mode 100644
index 00000000000..37982ae0964
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/col_opt_zerofill.test
@@ -0,0 +1,67 @@
+--source include/have_rocksdb.inc
+
+#
+# ZEROFILL column attribute
+#
+
+let $extra_col_opts = ZEROFILL;
+
+--echo ########################
+--echo # Fixed point columns (NUMERIC, DECIMAL)
+--echo ########################
+
+--source type_fixed.inc
+
+CREATE TABLE t1 (
+ a DECIMAL ZEROFILL,
+ b NUMERIC ZEROFILL,
+ PRIMARY KEY (a)
+) ENGINE=rocksdb;
+
+SHOW COLUMNS IN t1;
+
+INSERT INTO t1 (a,b) VALUES (1.1,1234);
+SELECT a,b FROM t1;
+DROP TABLE t1;
+
+--echo ########################
+--echo # Floating point columns (FLOAT, DOUBLE)
+--echo ########################
+
+--source type_float.inc
+
+CREATE TABLE t1 (
+ a DOUBLE ZEROFILL,
+ b FLOAT ZEROFILL,
+ PRIMARY KEY (b)
+) ENGINE=rocksdb;
+
+SHOW COLUMNS IN t1;
+
+INSERT INTO t1 (a,b) VALUES (1,1234.5);
+SELECT a,b FROM t1;
+DROP TABLE t1;
+
+--echo ########################
+--echo # INT columns
+--echo ########################
+
+--source type_int.inc
+
+CREATE TABLE t1 (
+ t TINYINT ZEROFILL,
+ s SMALLINT ZEROFILL,
+ m MEDIUMINT ZEROFILL,
+ i INT ZEROFILL,
+ b BIGINT ZEROFILL,
+ PRIMARY KEY (b)
+) ENGINE=rocksdb;
+
+SHOW COLUMNS IN t1;
+
+INSERT INTO t1 (t,s,m,i,b) VALUES (1,10,100,1000,0);
+SELECT t,s,m,i,b FROM t1;
+DROP TABLE t1;
+
+--let $extra_col_opts =
+
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/collation-master.opt b/storage/rocksdb/mysql-test/rocksdb/t/collation-master.opt
new file mode 100644
index 00000000000..79e591636fd
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/collation-master.opt
@@ -0,0 +1 @@
+--rocksdb_strict_collation_check=ON
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/collation.test b/storage/rocksdb/mysql-test/rocksdb/t/collation.test
new file mode 100644
index 00000000000..3b808bc329a
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/collation.test
@@ -0,0 +1,211 @@
+--source include/have_rocksdb.inc
+# MariaDB doesn't have server variables to check for GCC version, so the
+# following check is commented out:
+# --source include/have_fullregex.inc
+
+call mtr.add_suppression("Invalid pattern");
+
+# ci non-indexed column is allowed
+CREATE TABLE t1 (id INT primary key, value varchar(50), value2 varbinary(50), value3 text) engine=rocksdb charset utf8;
+# ci indexed column is not allowed
+--error ER_UNSUPPORTED_COLLATION
+ALTER TABLE t1 ADD INDEX (value);
+DROP TABLE t1;
+
+# ci indexed column is not allowed
+--error ER_UNSUPPORTED_COLLATION
+CREATE TABLE t1 (id INT primary key, value varchar(50), value2 varbinary(50), value3 text, index(value)) engine=rocksdb charset utf8;
+--error ER_UNSUPPORTED_COLLATION
+CREATE TABLE t1 (id INT primary key, value varchar(50), value2 varbinary(50), value3 text, index(value3(50))) engine=rocksdb charset utf8;
+# ci indexed column with rocksdb_strict_collation_check=OFF is allowed.
+SET GLOBAL rocksdb_strict_collation_check=0;
+CREATE TABLE t1 (id INT primary key, value varchar(50), value2 varbinary(50), value3 text, index(value3(50))) engine=rocksdb charset utf8;
+DROP TABLE t1;
+SET GLOBAL rocksdb_strict_collation_check=1;
+
+# cs indexed column is allowed
+CREATE TABLE t1 (id INT primary key, value varchar(50), value2 varbinary(50), value3 text, index(value2)) engine=rocksdb charset utf8;
+DROP TABLE t1;
+
+# cs latin1_bin is allowed
+CREATE TABLE t1 (id varchar(20), value varchar(50), value2 varchar(50), value3 text, primary key (id), index(value, value2)) engine=rocksdb charset latin1 collate latin1_bin;
+# THIS SHOULD FAIL BUT IT DOES NOT
+ALTER TABLE t1 collate=latin1_general_ci;
+DROP TABLE t1;
+
+# cs utf8_bin is allowed
+CREATE TABLE t1 (id varchar(20), value varchar(50), value2 varchar(50), value3 text, primary key (id), index(value, value2)) engine=rocksdb charset utf8 collate utf8_bin;
+DROP TABLE t1;
+
+# cs mixed latin1_bin and utf8_bin is allowed
+CREATE TABLE t1 (id varchar(20) collate latin1_bin, value varchar(50) collate utf8_bin, value2 varchar(50) collate latin1_bin, value3 text, primary key (id), index(value, value2)) engine=rocksdb;
+DROP TABLE t1;
+
+# ci indexed column is not allowed unless table name is in exception list
+SET GLOBAL rocksdb_strict_collation_exceptions=t1;
+CREATE TABLE t1 (id INT primary key, value varchar(50), index(value)) engine=rocksdb charset utf8;
+DROP TABLE t1;
+--error ER_UNSUPPORTED_COLLATION
+CREATE TABLE t2 (id INT primary key, value varchar(50), index(value)) engine=rocksdb charset utf8;
+
+# test regex for exception list
+SET GLOBAL rocksdb_strict_collation_exceptions="t.*";
+CREATE TABLE t123 (id INT primary key, value varchar(50), index(value)) engine=rocksdb charset utf8;
+DROP TABLE t123;
+--error ER_UNSUPPORTED_COLLATION
+CREATE TABLE s123 (id INT primary key, value varchar(50), index(value)) engine=rocksdb charset utf8;
+
+SET GLOBAL rocksdb_strict_collation_exceptions=".t.*";
+CREATE TABLE xt123 (id INT primary key, value varchar(50), index(value)) engine=rocksdb charset utf8;
+DROP TABLE xt123;
+--error ER_UNSUPPORTED_COLLATION
+CREATE TABLE t123 (id INT primary key, value varchar(50), index(value)) engine=rocksdb charset utf8;
+
+# test multiple entries in the list with commas
+SET GLOBAL rocksdb_strict_collation_exceptions="s.*,t.*";
+CREATE TABLE s1 (id INT primary key, value varchar(50), index(value)) engine=rocksdb charset utf8;
+DROP TABLE s1;
+CREATE TABLE t1 (id INT primary key, value varchar(50), index(value)) engine=rocksdb charset utf8;
+DROP TABLE t1;
+--error ER_UNSUPPORTED_COLLATION
+CREATE TABLE u1 (id INT primary key, value varchar(50), index(value)) engine=rocksdb charset utf8;
+
+# test multiple entries in the list with vertical bar
+SET GLOBAL rocksdb_strict_collation_exceptions="s.*|t.*";
+CREATE TABLE s1 (id INT primary key, value varchar(50), index(value)) engine=rocksdb charset utf8;
+DROP TABLE s1;
+CREATE TABLE t1 (id INT primary key, value varchar(50), index(value)) engine=rocksdb charset utf8;
+DROP TABLE t1;
+--error ER_UNSUPPORTED_COLLATION
+CREATE TABLE u1 (id INT primary key, value varchar(50), index(value)) engine=rocksdb charset utf8;
+
+# test multiple entries in the list and extra comma at the front
+SET GLOBAL rocksdb_strict_collation_exceptions=",s.*,t.*";
+CREATE TABLE s1 (id INT primary key, value varchar(50), index(value)) engine=rocksdb charset utf8;
+DROP TABLE s1;
+CREATE TABLE t1 (id INT primary key, value varchar(50), index(value)) engine=rocksdb charset utf8;
+DROP TABLE t1;
+--error ER_UNSUPPORTED_COLLATION
+CREATE TABLE u1 (id INT primary key, value varchar(50), index(value)) engine=rocksdb charset utf8;
+
+# test multiple entries in the list and extra vertical bar at the front
+SET GLOBAL rocksdb_strict_collation_exceptions="|s.*|t.*";
+CREATE TABLE s1 (id INT primary key, value varchar(50), index(value)) engine=rocksdb charset utf8;
+DROP TABLE s1;
+CREATE TABLE t1 (id INT primary key, value varchar(50), index(value)) engine=rocksdb charset utf8;
+DROP TABLE t1;
+--error ER_UNSUPPORTED_COLLATION
+CREATE TABLE u1 (id INT primary key, value varchar(50), index(value)) engine=rocksdb charset utf8;
+
+# test multiple entries in the list and extra comma in the middle
+SET GLOBAL rocksdb_strict_collation_exceptions="s.*,,t.*";
+CREATE TABLE s1 (id INT primary key, value varchar(50), index(value)) engine=rocksdb charset utf8;
+DROP TABLE s1;
+CREATE TABLE t1 (id INT primary key, value varchar(50), index(value)) engine=rocksdb charset utf8;
+DROP TABLE t1;
+--error ER_UNSUPPORTED_COLLATION
+CREATE TABLE u1 (id INT primary key, value varchar(50), index(value)) engine=rocksdb charset utf8;
+
+# test multiple entries in the list and extra vertical bar in the middle
+SET GLOBAL rocksdb_strict_collation_exceptions="s.*||t.*";
+CREATE TABLE s1 (id INT primary key, value varchar(50), index(value)) engine=rocksdb charset utf8;
+DROP TABLE s1;
+CREATE TABLE t1 (id INT primary key, value varchar(50), index(value)) engine=rocksdb charset utf8;
+DROP TABLE t1;
+--error ER_UNSUPPORTED_COLLATION
+CREATE TABLE u1 (id INT primary key, value varchar(50), index(value)) engine=rocksdb charset utf8;
+
+# test multiple entries in the list and extra comma at the end
+SET GLOBAL rocksdb_strict_collation_exceptions="s.*,t.*,";
+CREATE TABLE s1 (id INT primary key, value varchar(50), index(value)) engine=rocksdb charset utf8;
+DROP TABLE s1;
+CREATE TABLE t1 (id INT primary key, value varchar(50), index(value)) engine=rocksdb charset utf8;
+DROP TABLE t1;
+--error ER_UNSUPPORTED_COLLATION
+CREATE TABLE u1 (id INT primary key, value varchar(50), index(value)) engine=rocksdb charset utf8;
+
+# test multiple entries in the list and extra vertical bar at the end
+SET GLOBAL rocksdb_strict_collation_exceptions="s.*|t.*|";
+CREATE TABLE s1 (id INT primary key, value varchar(50), index(value)) engine=rocksdb charset utf8;
+DROP TABLE s1;
+CREATE TABLE t1 (id INT primary key, value varchar(50), index(value)) engine=rocksdb charset utf8;
+DROP TABLE t1;
+--error ER_UNSUPPORTED_COLLATION
+CREATE TABLE u1 (id INT primary key, value varchar(50), index(value)) engine=rocksdb charset utf8;
+
+# test multiple entries in the list and tons of commas and vertical bars just for the fun of it
+SET GLOBAL rocksdb_strict_collation_exceptions="||||,,,,s.*,,|,,||,t.*,,|||,,,";
+CREATE TABLE s1 (id INT primary key, value varchar(50), index(value)) engine=rocksdb charset utf8;
+DROP TABLE s1;
+CREATE TABLE t1 (id INT primary key, value varchar(50), index(value)) engine=rocksdb charset utf8;
+DROP TABLE t1;
+--error ER_UNSUPPORTED_COLLATION
+CREATE TABLE u1 (id INT primary key, value varchar(50), index(value)) engine=rocksdb charset utf8;
+
+# test allowing alters to create temporary tables
+SET GLOBAL rocksdb_strict_collation_exceptions='t1';
+CREATE TABLE t1 (id INT primary key, value varchar(50), index(value)) engine=rocksdb;
+ALTER TABLE t1 AUTO_INCREMENT=1;
+DROP TABLE t1;
+--error ER_UNSUPPORTED_COLLATION
+CREATE TABLE t2 (id INT primary key, value varchar(50), index(value)) engine=rocksdb;
+CREATE TABLE t2 (id INT primary key, value varchar(50)) engine=rocksdb;
+--error ER_UNSUPPORTED_COLLATION
+ALTER TABLE t2 ADD INDEX(value);
+DROP TABLE t2;
+
+
+# test invalid regex (missing end bracket)
+SET GLOBAL rocksdb_strict_collation_exceptions="[a-b";
+let SEARCH_FILE=$MYSQLTEST_VARDIR/log/mysqld.1.err;
+let SEARCH_PATTERN=Invalid pattern in strict_collation_exceptions: \[a-b;
+source include/search_pattern_in_file.inc;
+CREATE TABLE a (id INT PRIMARY KEY, value varchar(50), index(value)) engine=rocksdb charset utf8;
+SET GLOBAL rocksdb_strict_collation_exceptions="[a-b]";
+CREATE TABLE a (id INT PRIMARY KEY, value varchar(50), index(value)) engine=rocksdb charset utf8;
+CREATE TABLE b (id INT PRIMARY KEY, value varchar(50), index(value)) engine=rocksdb charset utf8;
+--error ER_UNSUPPORTED_COLLATION
+CREATE TABLE c (id INT PRIMARY KEY, value varchar(50), index(value)) engine=rocksdb charset utf8;
+DROP TABLE a, b;
+
+call mtr.add_suppression("Invalid pattern in strict_collation_exceptions:");
+# test invalid regex (trailing escape)
+SET GLOBAL rocksdb_strict_collation_exceptions="abc\\";
+let SEARCH_PATTERN=Invalid pattern in strict_collation_exceptions: abc;
+source include/search_pattern_in_file.inc;
+--error ER_UNSUPPORTED_COLLATION
+CREATE TABLE abc (id INT PRIMARY KEY, value varchar(50), index(value)) engine=rocksdb charset utf8;
+SET GLOBAL rocksdb_strict_collation_exceptions="abc";
+CREATE TABLE abc (id INT PRIMARY KEY, value varchar(50), index(value)) engine=rocksdb charset utf8;
+--error ER_UNSUPPORTED_COLLATION
+CREATE TABLE abcd (id INT PRIMARY KEY, value varchar(50), index(value)) engine=rocksdb charset utf8;
+DROP TABLE abc;
+
+# test bad regex (null caused a crash) - Issue 493
+SET GLOBAL rocksdb_strict_collation_exceptions=null;
+
+# test for warnings instead of errors
+--let $_mysqld_option=--rocksdb_error_on_suboptimal_collation=0
+--replace_result $MYSQLTEST_VARDIR MYSQLTEST_VARDIR
+--source include/restart_mysqld_with_option.inc
+
+SET GLOBAL rocksdb_strict_collation_check=1;
+
+# ci indexed column is not optimal, should emit a warning
+CREATE TABLE t1 (id INT primary key, value varchar(50), value2 varbinary(50), value3 text, index(value)) engine=rocksdb charset utf8;
+DROP TABLE t1;
+
+# ci non-indexed column is allowed
+CREATE TABLE t1 (id INT primary key, value varchar(50), value2 varbinary(50), value3 text) engine=rocksdb charset utf8;
+# ci indexed column is not allowed, should emit a warning
+ALTER TABLE t1 ADD INDEX (value);
+DROP TABLE t1;
+
+# cs latin1_bin is allowed
+CREATE TABLE t1 (id varchar(20), value varchar(50), value2 varchar(50), value3 text, primary key (id), index(value, value2)) engine=rocksdb charset latin1 collate latin1_bin;
+# THIS SHOULD WARN BUT IT DOES NOT
+ALTER TABLE t1 collate=latin1_general_ci;
+DROP TABLE t1;
+
+# cleanup
+--source include/restart_mysqld.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/collation_exception-master.opt b/storage/rocksdb/mysql-test/rocksdb/t/collation_exception-master.opt
new file mode 100644
index 00000000000..13563edb439
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/collation_exception-master.opt
@@ -0,0 +1,2 @@
+--rocksdb_strict_collation_check=ON
+--rocksdb_strict_collation_exceptions='r1.lol'
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/collation_exception.test b/storage/rocksdb/mysql-test/rocksdb/t/collation_exception.test
new file mode 100644
index 00000000000..334b1bb2750
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/collation_exception.test
@@ -0,0 +1,29 @@
+--source include/have_rocksdb.inc
+
+CREATE TABLE `r1.lol` (
+ `c1` int(10) NOT NULL DEFAULT '0',
+ `c2` int(11) NOT NULL DEFAULT '0',
+ `c3` int(1) NOT NULL DEFAULT '0',
+ `c4` int(11) NOT NULL DEFAULT '0',
+ `c5` int(11) NOT NULL DEFAULT '0',
+ `c6` varchar(100) NOT NULL DEFAULT '',
+ `c7` varchar(100) NOT NULL DEFAULT '',
+ `c8` varchar(255) NOT NULL DEFAULT '',
+ `c9` int(10) NOT NULL DEFAULT '125',
+ `c10` int(10) NOT NULL DEFAULT '125',
+ `c11` text NOT NULL,
+ `c12` int(11) NOT NULL DEFAULT '0',
+ `c13` int(10) NOT NULL DEFAULT '0',
+ `c14` text NOT NULL,
+ `c15` blob NOT NULL,
+ `c16` int(11) NOT NULL DEFAULT '0',
+ `c17` int(11) NOT NULL DEFAULT '0',
+ `c18` int(11) NOT NULL DEFAULT '0',
+ PRIMARY KEY (`c1`),
+ KEY i1 (`c4`),
+ KEY i2 (`c7`),
+ KEY i3 (`c2`)) ENGINE=RocksDB DEFAULT CHARSET=latin1;
+
+DROP INDEX i1 ON `r1.lol`;
+DROP TABLE `r1.lol`;
+
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/com_rpc_tx.cnf b/storage/rocksdb/mysql-test/rocksdb/t/com_rpc_tx.cnf
new file mode 100644
index 00000000000..49ebd28c793
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/com_rpc_tx.cnf
@@ -0,0 +1,4 @@
+!include suite/rpl/my.cnf
+
+[mysqld.1]
+binlog_format=row
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/com_rpc_tx.test b/storage/rocksdb/mysql-test/rocksdb/t/com_rpc_tx.test
new file mode 100644
index 00000000000..963f6c247fa
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/com_rpc_tx.test
@@ -0,0 +1,90 @@
+--source "include/have_rocksdb.inc"
+--source "include/have_log_bin.inc"
+# Don't run this with --rpc_protocol because it is doing its own work with
+# the RPC protocol
+--source "include/not_rpc_protocol.inc"
+
+#
+# This test was created because 2pc transactions were failing in MyRocks
+# when using detached sessions. The test generates two separate transactions
+# in two detached sessions and then attempts to commit them as simultaneously
+# as possible. This consistently showed the problem but succeeds after the
+# fix was put in place.
+
+CREATE DATABASE db_rpc;
+USE db_rpc;
+CREATE TABLE t1(pk INT PRIMARY KEY) ENGINE=rocksdb;
+
+SET GLOBAL rocksdb_enable_2pc=1;
+
+connect(con2,localhost,root,,);
+connection default;
+
+query_attrs_add rpc_role root;
+query_attrs_add rpc_db db_rpc;
+SET autocommit = 0;
+let $rpc_id1 = get_rpc_id();
+if ($rpc_id1 == "") {
+ echo "Fail: rpc_id not returned as expected";
+}
+
+SET autocommit = 0;
+let $rpc_id2 = get_rpc_id();
+if ($rpc_id2 == "") {
+ echo "Fail: rpc_id not returned as expected";
+}
+
+query_attrs_delete rpc_role;
+query_attrs_delete rpc_db;
+
+query_attrs_add rpc_id $rpc_id1;
+BEGIN;
+query_attrs_delete rpc_id;
+
+query_attrs_add rpc_id $rpc_id2;
+BEGIN;
+query_attrs_delete rpc_id;
+
+query_attrs_add rpc_id $rpc_id1;
+SELECT * from t1;
+query_attrs_delete rpc_id;
+
+query_attrs_add rpc_id $rpc_id2;
+SELECT * from t1;
+query_attrs_delete rpc_id;
+
+query_attrs_add rpc_id $rpc_id1;
+INSERT INTO t1 VALUES(1);
+query_attrs_delete rpc_id;
+
+query_attrs_add rpc_id $rpc_id2;
+INSERT INTO t1 VALUES(2);
+query_attrs_delete rpc_id;
+
+query_attrs_add rpc_id $rpc_id1;
+send COMMIT;
+
+connection con2;
+query_attrs_add rpc_id $rpc_id2;
+send COMMIT;
+
+connection default;
+reap;
+query_attrs_delete rpc_id;
+
+connection con2;
+reap;
+query_attrs_delete rpc_id;
+
+connection default;
+disconnect con2;
+
+SELECT * from db_rpc.t1;
+
+disable_query_log;
+eval KILL $rpc_id1;
+eval KILL $rpc_id2;
+enable_query_log;
+
+DROP DATABASE db_rpc;
+
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/commit_in_the_middle_ddl.test b/storage/rocksdb/mysql-test/rocksdb/t/commit_in_the_middle_ddl.test
new file mode 100644
index 00000000000..02f45779b9a
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/commit_in_the_middle_ddl.test
@@ -0,0 +1,27 @@
+--source include/have_rocksdb.inc
+
+--disable_warnings
+DROP TABLE IF EXISTS a;
+--enable_warnings
+
+create table a (id int, value int, primary key (id) comment 'cf_a') engine=rocksdb;
+
+--disable_query_log
+let $i = 1;
+while ($i <= 100000) {
+ let $insert = INSERT INTO a VALUES($i, $i);
+ inc $i;
+ eval $insert;
+}
+--enable_query_log
+
+set rocksdb_bulk_load=1;
+set rocksdb_commit_in_the_middle=1;
+alter table a add index v (value) COMMENT 'cf_a';
+set rocksdb_bulk_load=0;
+set rocksdb_commit_in_the_middle=0;
+select count(*) from a force index(primary);
+select count(*) from a force index(v);
+
+DROP TABLE a;
+
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/compact_deletes-master.opt b/storage/rocksdb/mysql-test/rocksdb/t/compact_deletes-master.opt
new file mode 100644
index 00000000000..3b4871f864a
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/compact_deletes-master.opt
@@ -0,0 +1,3 @@
+--rocksdb_debug_optimizer_n_rows=1000
+--rocksdb_records_in_range=50
+--rocksdb_compaction_sequential_deletes_count_sd=1
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/compact_deletes.test b/storage/rocksdb/mysql-test/rocksdb/t/compact_deletes.test
new file mode 100644
index 00000000000..b61da676b48
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/compact_deletes.test
@@ -0,0 +1,88 @@
+--source include/not_windows.inc
+--source include/have_rocksdb.inc
+
+--disable_warnings
+DROP TABLE IF EXISTS r1;
+--enable_warnings
+
+create table r1 (
+ id1 int,
+ id2 int,
+ type int,
+ value varchar(100),
+ value2 int,
+ value3 int,
+ primary key (type, id1, id2),
+ index id1_type (id1, type, value2, value, id2)
+) engine=rocksdb collate latin1_bin;
+
+select 'loading data';
+
+--disable_query_log
+let $i=0;
+while ($i<1000)
+{
+ inc $i;
+ eval insert r1(id1, id2, type, value, value2, value3)
+ values($i,$i,$i, 'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx',$i,$i);
+}
+--enable_query_log
+
+set global rocksdb_force_flush_memtable_now=1;
+optimize table r1;
+
+--echo Test 1: Do a bunch of updates without setting the compaction sysvar
+--echo Expect: no compaction
+let $window = 0;
+let $deletes = 0;
+let $file_size = 0;
+let $secondary_only = 0;
+let $primary = 1;
+let $no_more_deletes = 0;
+--source compact_deletes_test.inc
+
+--echo Test 2: Do a bunch of updates and set the compaction sysvar
+--echo Expect: compaction
+let $window = 1000;
+let $deletes = 990;
+let $file_size = 0;
+let $secondary_only = 0;
+let $primary = 1;
+let $no_more_deletes = 1;
+--source compact_deletes_test.inc
+
+--echo Test 3: Do a bunch of updates and set the compaction sysvar and a file size to something large
+--echo Expect: no compaction
+let $window = 1000;
+let $deletes = 1000;
+let $file_size = 1000000;
+let $secondary_only = 0;
+let $primary = 1;
+let $no_more_deletes = 0;
+--source compact_deletes_test.inc
+
+--echo Test 4: Do a bunch of secondary key updates and set the compaction sysvar
+--echo Expect: compaction
+let $window = 1000;
+let $deletes = 50;
+let $file_size = 0;
+let $secondary_only = 1;
+let $primary = 0;
+let $no_more_deletes = 1;
+--source compact_deletes_test.inc
+
+--echo Test 5: Do a bunch of secondary key updates and set the compaction sysvar,
+--echo and rocksdb_compaction_sequential_deletes_count_sd turned on
+--echo Expect: compaction
+let $window = 1000;
+let $deletes = 50;
+let $file_size = 0;
+let $secondary_only = 1;
+let $primary = 0;
+let $no_more_deletes = 1;
+SET @save_rocksdb_compaction_sequential_deletes_count_sd = @@global.rocksdb_compaction_sequential_deletes_count_sd;
+SET GLOBAL rocksdb_compaction_sequential_deletes_count_sd= ON;
+--source compact_deletes_test.inc
+SET GLOBAL rocksdb_compaction_sequential_deletes_count_sd= @save_rocksdb_compaction_sequential_deletes_count_sd;
+
+drop table r1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/compact_deletes_test.inc b/storage/rocksdb/mysql-test/rocksdb/t/compact_deletes_test.inc
new file mode 100644
index 00000000000..0a0d670505f
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/compact_deletes_test.inc
@@ -0,0 +1,72 @@
+# Usage:
+# let $window = <window size>;
+# let $deletes = <number of deletes per window>;
+# let $file_size = <min size of the file>;
+# --source compact_deletes_test.inc
+#
+
+let $save_rocksdb_compaction_sequential_deletes_window = `SELECT @@rocksdb_compaction_sequential_deletes_window`;
+eval set global rocksdb_compaction_sequential_deletes_window=$window;
+let $save_rocksdb_compaction_sequential_deletes = `SELECT @@rocksdb_compaction_sequential_deletes`;
+eval set global rocksdb_compaction_sequential_deletes= $deletes;
+let $save_rocksdb_compaction_sequential_deletes_file_size = `SELECT @@rocksdb_compaction_sequential_deletes_file_size`;
+eval set global rocksdb_compaction_sequential_deletes_file_size=$file_size;
+--disable_query_log
+let $i=0;
+while ($i<1000)
+{
+ inc $i;
+ if ($secondary_only)
+ {
+ eval update r1 set value2=value2+1 where id1=$i;
+ }
+ if ($primary)
+ {
+ eval update r1 set id2=id2+10000 where id1=500;
+ }
+}
+--enable_query_log
+set global rocksdb_force_flush_memtable_now=1;
+--sleep 1
+
+--disable_query_log
+let $wait_timeout= 300; # Override default 30 seconds with 300.
+let $wait_condition = select count(*) = 0
+ as c from information_schema.rocksdb_global_info
+ where TYPE = 'DDL_DROP_INDEX_ONGOING';
+--source include/wait_condition.inc
+--enable_query_log
+
+let NO_MORE_DELETES=$no_more_deletes;
+perl;
+ $num_retries=240;
+ $retry=0;
+ print "wait_for_delete: $ENV{no_more_deletes}\n";
+ while ($retry++ < $num_retries) {
+ $total_d=$total_e=0;
+ for $f (<$ENV{MYSQLTEST_VARDIR}/mysqld.1/data/#rocksdb/*.sst>) {
+ # excluding system cf
+ $filename= "$ENV{MARIAROCKS_SST_DUMP} --command=scan --output_hex --file=$f";
+ open(D, '-|', $filename) || die("Can't open file $filename: $!");
+ while (<D>) {
+ next unless /'(\d{8})/ and $1 >= 8;
+ $total_d++ if /: [07]/;
+ $total_e++ if /: 1/;
+ }
+ close D;
+ }
+ last if $total_e and not ($total_d and $ENV{no_more_deletes});
+ sleep 1;
+ }
+
+ unless ($total_e) {
+ print "No records in the database\n";
+ exit;
+ }
+
+ print $total_d ? "There are deletes left\n" : "No more deletes left\n";
+EOF
+
+eval SET GLOBAL rocksdb_compaction_sequential_deletes= $save_rocksdb_compaction_sequential_deletes;
+eval SET GLOBAL rocksdb_compaction_sequential_deletes_file_size= $save_rocksdb_compaction_sequential_deletes_file_size;
+eval SET GLOBAL rocksdb_compaction_sequential_deletes_window= $save_rocksdb_compaction_sequential_deletes_window;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/compression_zstd.test b/storage/rocksdb/mysql-test/rocksdb/t/compression_zstd.test
new file mode 100644
index 00000000000..c2216f768d0
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/compression_zstd.test
@@ -0,0 +1,14 @@
+--source include/have_rocksdb.inc
+
+let $no_zstd=`select @@rocksdb_supported_compression_types NOT LIKE '%ZSTD%'`;
+
+if ($no_zstd)
+{
+ -- Skip Requires RocksDB to be built with ZStandard Compression support
+}
+
+--let $restart_parameters=--rocksdb_default_cf_options=compression_per_level=kZSTDNotFinalCompression;compression_opts=-14:4:0;
+--source include/restart_mysqld.inc
+
+create table t (id int primary key) engine=rocksdb;
+drop table t;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/concurrent_alter.test b/storage/rocksdb/mysql-test/rocksdb/t/concurrent_alter.test
new file mode 100644
index 00000000000..3ebdd67a1a6
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/concurrent_alter.test
@@ -0,0 +1,39 @@
+--source include/have_rocksdb.inc
+
+# Bash
+--source include/not_windows.inc
+
+#
+# Generate concurrent requests to alter a table using mysqlslap
+#
+
+--disable_warnings
+DROP DATABASE IF EXISTS mysqlslap;
+--enable_warnings
+
+CREATE DATABASE mysqlslap;
+
+use mysqlslap;
+
+CREATE TABLE a1 (a int, b int) ENGINE=ROCKSDB;
+INSERT INTO a1 VALUES (1, 1);
+
+--write_file $MYSQL_TMP_DIR/concurrent_alter.sh
+$MYSQL_SLAP --silent --delimiter=";" --query="alter table a1 add index bx(b); alter table a1 drop index bx" --concurrency=1 --iterations=25 &
+$MYSQL_SLAP --silent --delimiter=";" --query="alter table a1 add index ax(a); alter table a1 drop index ax" --concurrency=1 --iterations=25 &
+sleep 2
+$MYSQL_SLAP --silent --delimiter=";" --query="select * from a1 where a=1" --concurrency=16 --iterations=1000 &
+$MYSQL_SLAP --silent --delimiter=";" --query="select * from a1 where b=1" --concurrency=16 --iterations=1000
+sleep 2
+$MYSQL_SLAP --silent --delimiter=";" --query="select * from a1 where a=1" --concurrency=16 --iterations=1000 &
+$MYSQL_SLAP --silent --delimiter=";" --query="select * from a1 where b=1" --concurrency=16 --iterations=1000
+wait
+EOF
+
+--exec bash $MYSQL_TMP_DIR/concurrent_alter.sh
+
+let $server_charset=`select @@character_set_server`;
+--replace_result $server_charset DEFAULT_CHARSET
+SHOW CREATE TABLE a1;
+
+DROP DATABASE mysqlslap;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/cons_snapshot_read_committed.opt b/storage/rocksdb/mysql-test/rocksdb/t/cons_snapshot_read_committed.opt
new file mode 100644
index 00000000000..418e4c3f056
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/cons_snapshot_read_committed.opt
@@ -0,0 +1 @@
+--rocksdb_default_cf_options=disable_auto_compactions=true \ No newline at end of file
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/cons_snapshot_read_committed.test b/storage/rocksdb/mysql-test/rocksdb/t/cons_snapshot_read_committed.test
new file mode 100644
index 00000000000..4dfa5abbbbb
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/cons_snapshot_read_committed.test
@@ -0,0 +1,6 @@
+--source include/have_rocksdb.inc
+
+let $trx_isolation = READ COMMITTED;
+
+--source consistent_snapshot.inc
+
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/cons_snapshot_repeatable_read.opt b/storage/rocksdb/mysql-test/rocksdb/t/cons_snapshot_repeatable_read.opt
new file mode 100644
index 00000000000..418e4c3f056
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/cons_snapshot_repeatable_read.opt
@@ -0,0 +1 @@
+--rocksdb_default_cf_options=disable_auto_compactions=true \ No newline at end of file
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/cons_snapshot_repeatable_read.test b/storage/rocksdb/mysql-test/rocksdb/t/cons_snapshot_repeatable_read.test
new file mode 100644
index 00000000000..c9f28dbcbe4
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/cons_snapshot_repeatable_read.test
@@ -0,0 +1,6 @@
+--source include/have_rocksdb.inc
+
+let $trx_isolation = REPEATABLE READ;
+
+--source consistent_snapshot.inc
+
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/cons_snapshot_serializable.opt b/storage/rocksdb/mysql-test/rocksdb/t/cons_snapshot_serializable.opt
new file mode 100644
index 00000000000..418e4c3f056
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/cons_snapshot_serializable.opt
@@ -0,0 +1 @@
+--rocksdb_default_cf_options=disable_auto_compactions=true \ No newline at end of file
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/cons_snapshot_serializable.test b/storage/rocksdb/mysql-test/rocksdb/t/cons_snapshot_serializable.test
new file mode 100644
index 00000000000..57b45050fea
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/cons_snapshot_serializable.test
@@ -0,0 +1,6 @@
+--source include/have_rocksdb.inc
+
+let $trx_isolation = SERIALIZABLE;
+
+--source consistent_snapshot.inc
+
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/consistent_snapshot.inc b/storage/rocksdb/mysql-test/rocksdb/t/consistent_snapshot.inc
new file mode 100644
index 00000000000..ca7510b1253
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/consistent_snapshot.inc
@@ -0,0 +1,136 @@
+#
+# TRANSACTION WITH CONSISTENT SNAPSHOT
+#
+
+--enable_connect_log
+
+# Save the initial number of concurrent sessions
+--source include/count_sessions.inc
+
+--disable_warnings
+DROP TABLE IF EXISTS t1;
+--enable_warnings
+
+connect (con1,localhost,root,,);
+connect (con2,localhost,root,,);
+
+connection con1;
+
+CREATE TABLE t1 (a INT, pk INT AUTO_INCREMENT PRIMARY KEY) ENGINE=ROCKSDB;
+eval SET SESSION TRANSACTION ISOLATION LEVEL $trx_isolation;
+
+# While a consistent snapshot transaction is executed,
+# no external inserts should be visible to the transaction.
+# But it should only work this way for REPEATABLE-READ and SERIALIZABLE
+
+--error 0,ER_ISOLATION_LEVEL_WITH_CONSISTENT_SNAPSHOT
+START TRANSACTION WITH CONSISTENT SNAPSHOT;
+--echo ERROR: $mysql_errno
+
+connection con2;
+select * from information_schema.rocksdb_dbstats where stat_type='DB_NUM_SNAPSHOTS';
+
+connection con1;
+COMMIT;
+
+# verifying snapshot is released after finishing transaction
+connection con2;
+select * from information_schema.rocksdb_dbstats where stat_type='DB_NUM_SNAPSHOTS';
+
+connection con1;
+--error 0,ER_ISOLATION_LEVEL_WITH_CONSISTENT_SNAPSHOT
+START TRANSACTION WITH CONSISTENT SNAPSHOT;
+--echo ERROR: $mysql_errno
+
+connection con2;
+INSERT INTO t1 (a) VALUES (1);
+
+connection con1;
+--echo # If consistent read works on this isolation level ($trx_isolation), the following SELECT should not return the value we inserted (1)
+SELECT a FROM t1;
+COMMIT;
+
+connection default;
+disconnect con1;
+disconnect con2;
+DROP TABLE t1;
+
+connect (con1,localhost,root,,);
+connect (con2,localhost,root,,);
+
+connection con1;
+CREATE TABLE r1 (id int primary key, value int, value2 int) engine=ROCKSDB;
+eval SET SESSION TRANSACTION ISOLATION LEVEL $trx_isolation;
+insert into r1 values (1,1,1),(2,2,2),(3,3,3),(4,4,4);
+
+BEGIN;
+
+connection con2;
+INSERT INTO r1 values (5,5,5);
+
+connection con1;
+SELECT * FROM r1; # 5
+
+connection con2;
+INSERT INTO r1 values (6,6,6);
+
+connection con1;
+SELECT * FROM r1; # 5
+COMMIT;
+SELECT * FROM r1; # 6
+
+--error 0,ER_ISOLATION_LEVEL_WITH_CONSISTENT_SNAPSHOT
+START TRANSACTION WITH CONSISTENT SNAPSHOT;
+--echo ERROR: $mysql_errno
+
+connection con2;
+INSERT INTO r1 values (7,7,7);
+
+connection con1;
+SELECT * FROM r1; # 6
+
+connection con2;
+INSERT INTO r1 values (8,8,8);
+
+connection con1;
+SELECT * FROM r1; # 6
+COMMIT;
+SELECT * FROM r1; # 8
+
+--error 0,ER_ISOLATION_LEVEL_WITH_CONSISTENT_SNAPSHOT
+START TRANSACTION WITH CONSISTENT SNAPSHOT;
+--echo ERROR: $mysql_errno
+
+connection con2;
+INSERT INTO r1 values (9,9,9);
+
+connection con1;
+--error 0,ER_ISOLATION_LEVEL_WITH_CONSISTENT_SNAPSHOT
+START TRANSACTION WITH CONSISTENT SNAPSHOT;
+--echo ERROR: $mysql_errno
+
+connection con2;
+INSERT INTO r1 values (10,10,10);
+
+connection con1;
+SELECT * FROM r1; # 9
+
+--error 0,ER_ISOLATION_LEVEL_WITH_CONSISTENT_SNAPSHOT
+START TRANSACTION WITH CONSISTENT SNAPSHOT;
+--echo ERROR: $mysql_errno
+# Succeeds with Read Committed, Fails with Repeatable Read
+--error 0,ER_UPDATES_WITH_CONSISTENT_SNAPSHOT
+INSERT INTO r1 values (11,11,11);
+--echo ERROR: $mysql_errno
+SELECT * FROM r1; # self changes should be visible
+
+
+drop table r1;
+
+connection default;
+disconnect con1;
+disconnect con2;
+
+
+--source include/wait_until_count_sessions.inc
+
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/corrupted_data_reads_debug.test b/storage/rocksdb/mysql-test/rocksdb/t/corrupted_data_reads_debug.test
new file mode 100644
index 00000000000..eca0830c4b4
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/corrupted_data_reads_debug.test
@@ -0,0 +1,80 @@
+--source include/have_rocksdb.inc
+
+--echo #
+--echo # Test how MyRocks handles reading corrupted data from disk.
+--echo # Data corruption is simulated at source-code level.
+--echo #
+
+--source include/have_debug.inc
+
+
+--echo #
+--echo # A test for case when data in the table *record* is longer
+--echo # than table DDL expects it to be
+--echo #
+create table t1 (
+ pk int not null primary key,
+ col1 varchar(10)
+) engine=rocksdb;
+
+insert into t1 values (1,1),(2,2),(3,3);
+
+select * from t1;
+
+set @tmp1=@@rocksdb_verify_row_debug_checksums;
+set rocksdb_verify_row_debug_checksums=1;
+set session debug_dbug= "+d,myrocks_simulate_bad_row_read1";
+--error ER_GET_ERRMSG
+select * from t1 where pk=1;
+set session debug_dbug= "-d,myrocks_simulate_bad_row_read1";
+set rocksdb_verify_row_debug_checksums=@tmp1;
+
+select * from t1 where pk=1;
+
+set session debug_dbug= "+d,myrocks_simulate_bad_row_read2";
+--error ER_GET_ERRMSG
+select * from t1 where pk=1;
+set session debug_dbug= "-d,myrocks_simulate_bad_row_read2";
+
+set session debug_dbug= "+d,myrocks_simulate_bad_row_read3";
+--error ER_GET_ERRMSG
+select * from t1 where pk=1;
+set session debug_dbug= "-d,myrocks_simulate_bad_row_read3";
+
+insert into t1 values(4,'0123456789');
+select * from t1;
+drop table t1;
+
+--echo #
+--echo # A test for case when index data is longer than table DDL
+--echo # expects it to be
+--echo #
+
+create table t2 (
+ pk varchar(4) not null primary key,
+ col1 int not null
+) engine=rocksdb collate latin1_bin;
+
+insert into t2 values ('ABCD',1);
+select * from t2;
+set session debug_dbug= "+d,myrocks_simulate_bad_pk_read1";
+--error ER_GET_ERRMSG
+select * from t2;
+set session debug_dbug= "-d,myrocks_simulate_bad_pk_read1";
+
+drop table t2;
+
+create table t2 (
+ pk varchar(4) not null primary key,
+ col1 int not null
+) engine=rocksdb;
+
+insert into t2 values ('ABCD',1);
+
+select * from t2;
+set session debug_dbug= "+d,myrocks_simulate_bad_pk_read1";
+--error ER_GET_ERRMSG
+select * from t2;
+set session debug_dbug= "-d,myrocks_simulate_bad_pk_read1";
+
+drop table t2;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/covered_unpack_info_format.test b/storage/rocksdb/mysql-test/rocksdb/t/covered_unpack_info_format.test
new file mode 100644
index 00000000000..ccd91a61ff0
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/covered_unpack_info_format.test
@@ -0,0 +1,79 @@
+# Disable until covered unpack_info format enabled in prod
+--source include/have_debug.inc
+--source include/not_debug.inc
+
+--disable_warnings
+DROP TABLE IF EXISTS t1;
+--enable_warnings
+
+#
+# Normal case
+#
+
+CREATE TABLE t1 (
+ id INT,
+ fake_id INT,
+ bigfield VARCHAR(4096),
+ PRIMARY KEY (id),
+ KEY bf (bigfield(32)),
+ KEY fid (fake_id, bigfield(32))
+) ENGINE=rocksdb;
+
+INSERT INTO t1 VALUES (1, 1001, REPEAT('a', 1)),
+ (8, 1008, REPEAT('b', 8)),
+ (24, 1024, REPEAT('c', 24)),
+ (31, 1031, REPEAT('d', 31)),
+ (32, 1032, REPEAT('x', 32)),
+ (33, 1033, REPEAT('y', 33)),
+ (128, 1128, REPEAT('z', 128));
+
+SELECT * FROM t1;
+
+--let $prefix_index_check_title= Eligible for optimization, access via fake_id only
+--let $prefix_index_check_read_avoided_delta= 2
+--let $prefix_index_check_query= SELECT id, bigfield FROM t1 FORCE INDEX(fid) WHERE fake_id = 1031
+--source suite/rocksdb/include/prefix_index_only_query_check.inc
+
+--let $prefix_index_check_title= Not eligible for optimization, access via fake_id of big row.
+--let $prefix_index_check_read_avoided_delta= 0
+--let $prefix_index_check_query= SELECT id, bigfield FROM t1 FORCE INDEX(fid) WHERE fake_id = 1033
+--source suite/rocksdb/include/prefix_index_only_query_check.inc
+
+DROP TABLE t1;
+
+#
+# Create same table with older format to test compatibility
+#
+
+set session debug= '+d,MYROCKS_NO_COVERED_BITMAP_FORMAT';
+CREATE TABLE t1 (
+ id INT,
+ fake_id INT,
+ bigfield VARCHAR(4096),
+ PRIMARY KEY (id),
+ KEY bf (bigfield(32)),
+ KEY fid (fake_id, bigfield(32))
+) ENGINE=rocksdb;
+set session debug= '-d,MYROCKS_NO_COVERED_BITMAP_FORMAT';
+
+INSERT INTO t1 VALUES (1, 1001, REPEAT('a', 1)),
+ (8, 1008, REPEAT('b', 8)),
+ (24, 1024, REPEAT('c', 24)),
+ (31, 1031, REPEAT('d', 31)),
+ (32, 1032, REPEAT('x', 32)),
+ (33, 1033, REPEAT('y', 33)),
+ (128, 1128, REPEAT('z', 128));
+
+SELECT * FROM t1;
+
+--let $prefix_index_check_title= No longer eligible for optimization since no covered bitmap was stored.
+--let $prefix_index_check_read_avoided_delta= 0
+--let $prefix_index_check_query= SELECT id, bigfield FROM t1 FORCE INDEX(fid) WHERE fake_id = 1031
+--source suite/rocksdb/include/prefix_index_only_query_check.inc
+
+--let $prefix_index_check_title= Not eligible for optimization.
+--let $prefix_index_check_read_avoided_delta= 0
+--let $prefix_index_check_query= SELECT id, bigfield FROM t1 FORCE INDEX(fid) WHERE fake_id = 1033
+--source suite/rocksdb/include/prefix_index_only_query_check.inc
+
+DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/create_no_primary_key_table-master.opt b/storage/rocksdb/mysql-test/rocksdb/t/create_no_primary_key_table-master.opt
new file mode 100644
index 00000000000..ce274af3507
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/create_no_primary_key_table-master.opt
@@ -0,0 +1 @@
+--block_create_no_primary_key=TRUE
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/create_no_primary_key_table.test b/storage/rocksdb/mysql-test/rocksdb/t/create_no_primary_key_table.test
new file mode 100644
index 00000000000..3ef35cb2633
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/create_no_primary_key_table.test
@@ -0,0 +1,63 @@
+USE mysql;
+CREATE TABLE mysql_table (a INT) ENGINE=ROCKSDB;
+-- error ER_BLOCK_NO_PRIMARY_KEY
+CREATE TABLE test.mysql_table (a INT) ENGINE=ROCKSDB;
+USE test;
+-- error ER_BLOCK_NO_PRIMARY_KEY
+CREATE TABLE mysql_table (a INT) ENGINE=ROCKSDB;
+-- error ER_BLOCK_NO_PRIMARY_KEY
+CREATE TABLE IF NOT EXISTS mysql_table_2 (a INT) ENGINE=ROCKSDB;
+-- error ER_TABLE_MUST_HAVE_COLUMNS
+CREATE TABLE mysql_table_no_cols ENGINE=ROCKSDB;
+CREATE TABLE mysql.mysql_table_2 (a INT) ENGINE=ROCKSDB;
+
+CREATE TABLE mysql_primkey (a INT PRIMARY KEY, b INT, c INT, d INT, INDEX (c)) ENGINE=ROCKSDB;
+ALTER TABLE mysql_primkey DROP b, DROP a, ADD (f INT PRIMARY KEY);
+-- error ER_BLOCK_NO_PRIMARY_KEY
+ALTER TABLE mysql_primkey DROP PRIMARY KEY;
+
+CREATE TABLE mysql_primkey2 (a INT PRIMARY KEY, b INT, c INT) ENGINE=ROCKSDB;
+ALTER TABLE mysql_primkey2 DROP b;
+ALTER TABLE mysql_primkey2 ADD (b INT);
+-- error ER_BLOCK_NO_PRIMARY_KEY
+ALTER TABLE mysql_primkey2 DROP c, DROP A;
+
+CREATE TABLE mysql_primkey3 (a INT PRIMARY KEY, b INT, c INT, INDEX indexonb (b), INDEX indexonc (c)) ENGINE=ROCKSDB;
+ALTER TABLE mysql_primkey3 DROP INDEX indexonb;
+ALTER TABLE mysql_primkey3 DROP c;
+ALTER TABLE mysql_primkey3 DROP PRIMARY KEY, ADD PRIMARY KEY(b);
+
+CREATE TABLE mysql_primkey4(a INT, b INT, PRIMARY KEY(a), INDEX si (a, b)) ENGINE=ROCKSDB;
+DROP INDEX si ON mysql_primkey4;
+-- error ER_BLOCK_NO_PRIMARY_KEY
+DROP INDEX `PRIMARY` ON mysql_primkey4;
+
+ALTER TABLE mysql.mysql_table ADD PRIMARY KEY (a);
+ALTER TABLE mysql.mysql_table DROP PRIMARY KEY;
+
+SET default_storage_engine=ROCKSDB;
+-- error ER_BLOCK_NO_PRIMARY_KEY
+CREATE TABLE mysql_noeng(a INT, b INT);
+
+# Disable no_engine_substitution
+SET sql_mode="";
+-- error ER_BLOCK_NO_PRIMARY_KEY
+CREATE TABLE mysql_noeng_sub(a INT, b INT) ENGINE=BOGUS_ENGINE;
+
+CREATE TABLE mysql_primkey5 LIKE mysql_primkey;
+
+SET @@global.block_create_no_primary_key = false;
+CREATE TABLE mysql_no_primkey (a INT) ENGINE=ROCKSDB;
+SET @@global.block_create_no_primary_key = true;
+-- error ER_BLOCK_NO_PRIMARY_KEY
+CREATE TABLE mysql_block_no_primkey LIKE mysql_no_primkey;
+
+DROP TABLE mysql_primkey;
+DROP TABLE mysql_primkey2;
+DROP TABLE mysql_primkey3;
+DROP TABLE mysql_primkey4;
+DROP TABLE mysql_primkey5;
+DROP TABLE mysql_no_primkey;
+USE mysql;
+DROP TABLE mysql_table;
+DROP TABLE mysql_table_2;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/create_table.test b/storage/rocksdb/mysql-test/rocksdb/t/create_table.test
new file mode 100644
index 00000000000..4fffe7497c4
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/create_table.test
@@ -0,0 +1,192 @@
+--source include/have_rocksdb.inc
+
+#
+# Basic CREATE TABLE statements
+#
+
+#############################################
+# TODO:
+# A part of the test is currently disabled
+# because temporary tables are not supported
+#############################################
+
+--disable_warnings
+DROP TABLE IF EXISTS t1,t2;
+--enable_warnings
+
+# Simple create table with minimal table options
+# which are defined in have_engine.inc
+# (default empty) plus ENGINE=
+
+CREATE TABLE t1 (a INT PRIMARY KEY) ENGINE=rocksdb;
+SHOW CREATE TABLE t1;
+
+# IF NOT EXISTS
+CREATE TABLE IF NOT EXISTS t1 (a INT PRIMARY KEY) ENGINE=rocksdb;
+
+# CREATE .. LIKE
+
+CREATE TABLE t2 LIKE t1;
+SHOW CREATE TABLE t2;
+
+--error ER_ILLEGAL_HA_CREATE_OPTION
+CREATE TEMPORARY TABLE t2 (a INT PRIMARY KEY) ENGINE=rocksdb;
+
+--disable_parsing
+
+DROP TABLE t2;
+
+CREATE TEMPORARY TABLE t2 LIKE t1;
+
+SHOW CREATE TABLE t2;
+DROP TEMPORARY TABLE t2;
+
+--enable_parsing
+
+DROP TABLE t2;
+
+DROP TABLE IF EXISTS t1;
+
+# CREATE .. AS SELECT
+
+# Use the engine as default
+
+SET default_storage_engine = rocksdb;
+
+CREATE TABLE t1 (a INT PRIMARY KEY);
+SHOW CREATE TABLE t1;
+DROP TABLE t1;
+
+CREATE TABLE t1 (a INT PRIMARY KEY) AS SELECT 1 AS a UNION SELECT 2 AS a;
+SHOW CREATE TABLE t1;
+--sorted_result
+SELECT * FROM t1;
+
+# Just to add FLUSH LOGS into the mix while we are in the most common test
+FLUSH LOGS;
+
+DROP TABLE IF EXISTS t1;
+
+# CREATE TABLE with MAX_INDEXES (64) keys and no primary key
+# MyRocks adds a hidden primary key, so make sure we don't break anything
+CREATE TABLE t1(c1 INT,c2 INT,c3 INT,c4 INT,c5 INT,c6 INT,c7 INT,c8 INT,c9 INT,
+ c10 INT,c11 INT,c12 INT,c13 INT,c14 INT,c15 INT,c16 INT,c17 INT,
+ c18 INT,c19 INT,c20 INT,c21 INT,c22 INT,c23 INT,c24 INT,c25 INT,
+ c26 INT,c27 INT,c28 INT,c29 INT,c30 INT,c31 INT,c32 INT,c33 INT,
+ c34 INT,c35 INT,c36 INT,c37 INT,c38 INT,c39 INT,c40 INT,c41 INT,
+ c42 INT,c43 INT,c44 INT,c45 INT,c46 INT,c47 INT,c48 INT,c49 INT,
+ c50 INT,c51 INT,c52 INT,c53 INT,c54 INT,c55 INT,c56 INT,c57 INT,
+ c58 INT,c59 INT,c60 INT,c61 INT,c62 INT,c63 INT,c64 INT,c65 INT,
+ c66 INT,c67 INT,c68 INT,c69 INT,c70 INT,c71 INT,c72 INT,c73 INT,
+ c74 INT,c75 INT,c76 INT,c77 INT,c78 INT,c79 INT,c80 INT,c81 INT,
+ c82 INT,c83 INT,c84 INT,c85 INT,c86 INT,c87 INT,c88 INT,c89 INT,
+ c90 INT,c91 INT,c92 INT,c93 INT,c94 INT,c95 INT,c96 INT,c97 INT,
+ c98 INT,c99 INT,c100 INT,c101 INT,c102 INT,c103 INT,c104 INT,
+ c105 INT,c106 INT,c107 INT,c108 INT,c109 INT,c110 INT,c111 INT,
+ c112 INT,c113 INT,c114 INT,c115 INT,c116 INT,c117 INT,c118 INT,
+ c119 INT,c120 INT,c121 INT,c122 INT,c123 INT,c124 INT,c125 INT,
+ c126 INT,c127 INT,c128 INT,c129 INT,c130 INT,c131 INT,c132 INT,
+ c133 INT,c134 INT,c135 INT,c136 INT,c137 INT,c138 INT,c139 INT,
+ c140 INT,c141 INT,c142 INT,c143 INT,c144 INT,c145 INT,c146 INT,
+ c147 INT,c148 INT,c149 INT,c150 INT,c151 INT,c152 INT,c153 INT,
+ c154 INT,c155 INT,c156 INT,c157 INT,c158 INT,c159 INT,c160 INT,
+ c161 INT,c162 INT,c163 INT,c164 INT,c165 INT,c166 INT,c167 INT,
+ c168 INT,c169 INT,c170 INT,c171 INT,c172 INT,c173 INT,c174 INT,
+ c175 INT,c176 INT,c177 INT,c178 INT,c179 INT,c180 INT,c181 INT,
+ c182 INT,c183 INT,c184 INT,c185 INT,c186 INT,c187 INT,c188 INT,
+ c189 INT,c190 INT,c191 INT,c192 INT,c193 INT,c194 INT,c195 INT,
+ c196 INT,c197 INT,c198 INT,c199 INT,c200 INT,c201 INT,c202 INT,
+ c203 INT,c204 INT,c205 INT,c206 INT,c207 INT,c208 INT,c209 INT,
+ c210 INT,c211 INT,c212 INT,c213 INT,c214 INT,c215 INT,c216 INT,
+ c217 INT,c218 INT,c219 INT,c220 INT,c221 INT,c222 INT,c223 INT,
+ c224 INT,c225 INT,c226 INT,c227 INT,c228 INT,c229 INT,c230 INT,
+ c231 INT,c232 INT,c233 INT,c234 INT,c235 INT,c236 INT,c237 INT,
+ c238 INT,c239 INT,c240 INT,c241 INT,c242 INT,c243 INT,c244 INT,
+ c245 INT,c246 INT,c247 INT,c248 INT,c249 INT,c250 INT,c251 INT,
+ c252 INT,c253 INT,c254 INT,c255 INT,c256 INT,c257 INT,c258 INT,
+ c259 INT,c260 INT,c261 INT,c262 INT,c263 INT,c264 INT,c265 INT,
+ c266 INT,c267 INT,c268 INT,c269 INT,c270 INT,c271 INT,c272 INT,
+ c273 INT,c274 INT,c275 INT,c276 INT,c277 INT,c278 INT,c279 INT,
+ c280 INT,c281 INT,c282 INT,c283 INT,c284 INT,c285 INT,c286 INT,
+ c287 INT,c288 INT,c289 INT,c290 INT,c291 INT,c292 INT,c293 INT,
+ c294 INT,c295 INT,c296 INT,c297 INT,c298 INT,c299 INT,c300 INT,
+ c301 INT,c302 INT,c303 INT,c304 INT,c305 INT,c306 INT,c307 INT,
+ c308 INT,c309 INT,c310 INT,c311 INT,c312 INT,c313 INT,c314 INT,
+ c315 INT,c316 INT,c317 INT,c318 INT,c319 INT,c320 INT,c321 INT,
+ c322 INT,c323 INT,c324 INT,c325 INT,c326 INT,c327 INT,c328 INT,
+ c329 INT,c330 INT,c331 INT,c332 INT,c333 INT,c334 INT,c335 INT,
+ c336 INT,c337 INT,c338 INT,c339 INT,c340 INT,c341 INT,c342 INT,
+ c343 INT,c344 INT,c345 INT,c346 INT,c347 INT,c348 INT,c349 INT,
+ c350 INT,c351 INT,c352 INT,c353 INT,c354 INT,c355 INT,c356 INT,
+ c357 INT,c358 INT,c359 INT,c360 INT,c361 INT,c362 INT,c363 INT,
+ c364 INT,c365 INT,c366 INT,c367 INT,c368 INT,c369 INT,c370 INT,
+ c371 INT,c372 INT,c373 INT,c374 INT,c375 INT,c376 INT,c377 INT,
+ c378 INT,c379 INT,c380 INT,c381 INT,c382 INT,c383 INT,c384 INT,
+ c385 INT,c386 INT,c387 INT,c388 INT,c389 INT,c390 INT,c391 INT,
+ c392 INT,c393 INT,c394 INT,c395 INT,c396 INT,c397 INT,c398 INT,
+ c399 INT,c400 INT,c401 INT,c402 INT,c403 INT,c404 INT,c405 INT,
+ c406 INT,c407 INT,c408 INT,c409 INT,c410 INT,c411 INT,c412 INT,
+ c413 INT,c414 INT,c415 INT,c416 INT,c417 INT,c418 INT,c419 INT,
+ c420 INT,c421 INT,c422 INT,c423 INT,c424 INT,c425 INT,c426 INT,
+ c427 INT,c428 INT,c429 INT,c430 INT,c431 INT,c432 INT,c433 INT,
+ c434 INT,c435 INT,c436 INT,c437 INT,c438 INT,c439 INT,c440 INT,
+ c441 INT,c442 INT,c443 INT,c444 INT,c445 INT,c446 INT,c447 INT,
+ c448 INT,
+ KEY (c1,c2,c3,c4,c5,c6,c7),KEY (c8,c9,c10,c11,c12,c13,c14),
+ KEY (c15,c16,c17,c18,c19,c20,c21),KEY (c22,c23,c24,c25,c26,c27,c28),
+ KEY (c29,c30,c31,c32,c33,c34,c35),KEY (c36,c37,c38,c39,c40,c41,c42),
+ KEY (c43,c44,c45,c46,c47,c48,c49),KEY (c50,c51,c52,c53,c54,c55,c56),
+ KEY (c57,c58,c59,c60,c61,c62,c63),KEY (c64,c65,c66,c67,c68,c69,c70),
+ KEY (c71,c72,c73,c74,c75,c76,c77),KEY (c78,c79,c80,c81,c82,c83,c84),
+ KEY (c85,c86,c87,c88,c89,c90,c91),KEY (c92,c93,c94,c95,c96,c97,c98),
+ KEY (c99,c100,c101,c102,c103,c104,c105),
+ KEY (c106,c107,c108,c109,c110,c111,c112),
+ KEY (c113,c114,c115,c116,c117,c118,c119),
+ KEY (c120,c121,c122,c123,c124,c125,c126),
+ KEY (c127,c128,c129,c130,c131,c132,c133),
+ KEY (c134,c135,c136,c137,c138,c139,c140),
+ KEY (c141,c142,c143,c144,c145,c146,c147),
+ KEY (c148,c149,c150,c151,c152,c153,c154),
+ KEY (c155,c156,c157,c158,c159,c160,c161),
+ KEY (c162,c163,c164,c165,c166,c167,c168),
+ KEY (c169,c170,c171,c172,c173,c174,c175),
+ KEY (c176,c177,c178,c179,c180,c181,c182),
+ KEY (c183,c184,c185,c186,c187,c188,c189),
+ KEY (c190,c191,c192,c193,c194,c195,c196),
+ KEY (c197,c198,c199,c200,c201,c202,c203),
+ KEY (c204,c205,c206,c207,c208,c209,c210),
+ KEY (c211,c212,c213,c214,c215,c216,c217),
+ KEY (c218,c219,c220,c221,c222,c223,c224),
+ KEY (c225,c226,c227,c228,c229,c230,c231),
+ KEY (c232,c233,c234,c235,c236,c237,c238),
+ KEY (c239,c240,c241,c242,c243,c244,c245),
+ KEY (c246,c247,c248,c249,c250,c251,c252),
+ KEY (c253,c254,c255,c256,c257,c258,c259),
+ KEY (c260,c261,c262,c263,c264,c265,c266),
+ KEY (c267,c268,c269,c270,c271,c272,c273),
+ KEY (c274,c275,c276,c277,c278,c279,c280),
+ KEY (c281,c282,c283,c284,c285,c286,c287),
+ KEY (c288,c289,c290,c291,c292,c293,c294),
+ KEY (c295,c296,c297,c298,c299,c300,c301),
+ KEY (c302,c303,c304,c305,c306,c307,c308),
+ KEY (c309,c310,c311,c312,c313,c314,c315),
+ KEY (c316,c317,c318,c319,c320,c321,c322),
+ KEY (c323,c324,c325,c326,c327,c328,c329),
+ KEY (c330,c331,c332,c333,c334,c335,c336),
+ KEY (c337,c338,c339,c340,c341,c342,c343),
+ KEY (c344,c345,c346,c347,c348,c349,c350),
+ KEY (c351,c352,c353,c354,c355,c356,c357),
+ KEY (c358,c359,c360,c361,c362,c363,c364),
+ KEY (c365,c366,c367,c368,c369,c370,c371),
+ KEY (c372,c373,c374,c375,c376,c377,c378),
+ KEY (c379,c380,c381,c382,c383,c384,c385),
+ KEY (c386,c387,c388,c389,c390,c391,c392),
+ KEY (c393,c394,c395,c396,c397,c398,c399),
+ KEY (c400,c401,c402,c403,c404,c405,c406),
+ KEY (c407,c408,c409,c410,c411,c412,c413),
+ KEY (c414,c415,c416,c417,c418,c419,c420),
+ KEY (c421,c422,c423,c424,c425,c426,c427),
+ KEY (c428,c429,c430,c431,c432,c433,c434),
+ KEY (c435,c436,c437,c438,c439,c440,c441),
+ KEY (c442,c443,c444,c445,c446,c447,c448));
+DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/ddl_high_priority.test b/storage/rocksdb/mysql-test/rocksdb/t/ddl_high_priority.test
new file mode 100644
index 00000000000..01bb5cfa4ed
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/ddl_high_priority.test
@@ -0,0 +1,18 @@
+# Test high priority DDL
+# There are two modes of high priority DDL:
+# 1. Through the system variable high_priority_ddl
+# 2. Through syntax: CREATE/ALTER/DROP/OPTIMIZE HIGH_PRIORITY ...
+
+--source include/have_rocksdb.inc
+
+--echo ##
+--echo ## Using the system variable high_priority_ddl"
+--echo ##
+--let $use_sys_var = 1;
+--source include/ddl_high_priority.inc
+
+--echo ##
+--echo ## Using HIGH_PRIORITY syntax
+--echo ##
+--let $use_sys_var = 0;
+--source include/ddl_high_priority.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/deadlock.test b/storage/rocksdb/mysql-test/rocksdb/t/deadlock.test
new file mode 100644
index 00000000000..656be9eb3fc
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/deadlock.test
@@ -0,0 +1,44 @@
+--echo #
+--echo # Validate that deadlock errors don't occur with a high level of concurrency
+--echo #
+
+--source include/have_rocksdb.inc
+--source include/big_test.inc
+
+--echo # Disable for valgrind because this takes too long
+--source include/not_valgrind.inc
+
+--disable_warnings
+DROP DATABASE IF EXISTS mysqlslap;
+--enable_warnings
+
+CREATE DATABASE mysqlslap;
+USE mysqlslap;
+CREATE TABLE t1(id1 BIGINT, id2 BIGINT, count INT, PRIMARY KEY(id1, id2), KEY(id2)) ENGINE=rocksdb;
+CREATE TABLE t1rev(id1 BIGINT, id2 BIGINT, count INT, PRIMARY KEY(id1, id2) COMMENT "rev:cf2", KEY(id2) COMMENT "rev:cf2") ENGINE=rocksdb;
+
+SET @save = @@global.rocksdb_lock_wait_timeout;
+SET GLOBAL rocksdb_lock_wait_timeout = 60;
+
+--exec $MYSQL_SLAP --silent --concurrency=50 --number-of-queries=50000 --query="INSERT INTO t1 VALUES(1, 1, 1) ON DUPLICATE KEY UPDATE count=count+1"
+SELECT count from t1;
+--exec $MYSQL_SLAP --silent --concurrency=50 --number-of-queries=50000 --query="UPDATE t1 SET count=count+1 WHERE id1=1 AND id2=1"
+SELECT count from t1;
+--exec $MYSQL_SLAP --silent --concurrency=50 --number-of-queries=50000 --query="UPDATE t1 SET count=count+1 WHERE id2=1"
+SELECT count from t1;
+--exec $MYSQL_SLAP --silent --concurrency=50 --number-of-queries=50000 --query="UPDATE t1 SET count=count+1"
+SELECT count from t1;
+
+# Same tests on a table with reverse orderings
+--exec $MYSQL_SLAP --silent --concurrency=50 --number-of-queries=50000 --query="INSERT INTO t1rev VALUES(1, 1, 1) ON DUPLICATE KEY UPDATE count=count+1"
+SELECT count from t1rev;
+--exec $MYSQL_SLAP --silent --concurrency=50 --number-of-queries=50000 --query="UPDATE t1rev SET count=count+1 WHERE id1=1 AND id2=1"
+SELECT count from t1rev;
+--exec $MYSQL_SLAP --silent --concurrency=50 --number-of-queries=50000 --query="UPDATE t1rev SET count=count+1 WHERE id2=1"
+SELECT count from t1rev;
+--exec $MYSQL_SLAP --silent --concurrency=50 --number-of-queries=50000 --query="UPDATE t1rev SET count=count+1"
+SELECT count from t1rev;
+
+SET GLOBAL rocksdb_lock_wait_timeout = @save;
+
+DROP DATABASE mysqlslap;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/deadlock_stats.test b/storage/rocksdb/mysql-test/rocksdb/t/deadlock_stats.test
new file mode 100644
index 00000000000..a9b30a4273a
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/deadlock_stats.test
@@ -0,0 +1,3 @@
+let $engine=rocksdb;
+
+--source include/deadlock_stats.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/deadlock_tracking.test b/storage/rocksdb/mysql-test/rocksdb/t/deadlock_tracking.test
new file mode 100644
index 00000000000..8ceebef8f72
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/deadlock_tracking.test
@@ -0,0 +1,185 @@
+set @prior_lock_wait_timeout = @@rocksdb_lock_wait_timeout;
+set @prior_deadlock_detect = @@rocksdb_deadlock_detect;
+set @prior_max_latest_deadlocks = @@rocksdb_max_latest_deadlocks;
+set global rocksdb_deadlock_detect = on;
+set global rocksdb_lock_wait_timeout = 10000;
+--echo # Clears deadlock buffer of any prior deadlocks.
+set global rocksdb_max_latest_deadlocks = 0;
+set global rocksdb_max_latest_deadlocks = @prior_max_latest_deadlocks;
+let $engine = rocksdb;
+
+--source include/count_sessions.inc
+connect (con1,localhost,root,,);
+let $con1= `SELECT CONNECTION_ID()`;
+
+connect (con2,localhost,root,,);
+let $con2= `SELECT CONNECTION_ID()`;
+
+connect (con3,localhost,root,,);
+let $con3= `SELECT CONNECTION_ID()`;
+
+connection default;
+eval create table t (i int primary key) engine=$engine;
+insert into t values (1), (2), (3);
+--replace_regex /[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}/TIMESTAMP/ /WAITING KEY: [0-9a-f]{16}/KEY/ /TRANSACTION ID: [0-9]*/TXN_ID/ /TIMESTAMP: [0-9]*/TSTAMP/
+show engine rocksdb transaction status;
+
+echo Deadlock #1;
+--source include/simple_deadlock.inc
+connection default;
+--replace_regex /[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}/TIMESTAMP/ /WAITING KEY: [0-9a-f]{16}/KEY/ /TRANSACTION ID: [0-9]*/TXN_ID/ /TIMESTAMP: [0-9]*/TSTAMP/
+show engine rocksdb transaction status;
+
+echo Deadlock #2;
+--source include/simple_deadlock.inc
+connection default;
+--replace_regex /[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}/TIMESTAMP/ /WAITING KEY: [0-9a-f]{16}/KEY/ /TRANSACTION ID: [0-9]*/TXN_ID/ /TIMESTAMP: [0-9]*/TSTAMP/
+show engine rocksdb transaction status;
+set global rocksdb_max_latest_deadlocks = 10;
+
+echo Deadlock #3;
+--source include/simple_deadlock.inc
+connection default;
+--replace_regex /[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}/TIMESTAMP/ /WAITING KEY: [0-9a-f]{16}/KEY/ /TRANSACTION ID: [0-9]*/TXN_ID/ /TIMESTAMP: [0-9]*/TSTAMP/
+show engine rocksdb transaction status;
+set global rocksdb_max_latest_deadlocks = 1;
+--replace_regex /[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}/TIMESTAMP/ /WAITING KEY: [0-9a-f]{16}/KEY/ /TRANSACTION ID: [0-9]*/TXN_ID/ /TIMESTAMP: [0-9]*/TSTAMP/
+show engine rocksdb transaction status;
+
+connection con3;
+set rocksdb_deadlock_detect_depth = 2;
+
+echo Deadlock #4;
+connection con1;
+begin;
+select * from t where i=1 for update;
+
+connection con2;
+begin;
+select * from t where i=2 for update;
+
+connection con3;
+begin;
+select * from t where i=3 for update;
+
+connection con1;
+send select * from t where i=2 for update;
+
+connection con2;
+let $wait_condition = select count(*) = 1 from information_schema.rocksdb_trx
+where thread_id = $con1 and waiting_key != "";
+--source include/wait_condition.inc
+
+send select * from t where i=3 for update;
+
+connection con3;
+let $wait_condition = select count(*) = 1 from information_schema.rocksdb_trx
+where thread_id = $con2 and waiting_key != "";
+--source include/wait_condition.inc
+
+select variable_value into @a from information_schema.global_status where variable_name='rocksdb_row_lock_deadlocks';
+--error ER_LOCK_DEADLOCK
+select * from t where i=1 for update;
+select case when variable_value-@a = 1 then 'true' else 'false' end as deadlocks from information_schema.global_status where variable_name='rocksdb_row_lock_deadlocks';
+rollback;
+
+connection con2;
+reap;
+rollback;
+
+connection con1;
+reap;
+rollback;
+
+connection default;
+set global rocksdb_max_latest_deadlocks = 5;
+--replace_regex /[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}/TIMESTAMP/ /WAITING KEY: [0-9a-f]{16}/KEY/ /TRANSACTION ID: [0-9]*/TXN_ID/ /TIMESTAMP: [0-9]*/TSTAMP/
+show engine rocksdb transaction status;
+
+echo Deadlock #5;
+connection con1;
+begin;
+select * from t where i=1 for update;
+
+connection con2;
+begin;
+select * from t where i=2 for update;
+
+connection con3;
+begin;
+select * from t where i=3 lock in share mode;
+
+connection con1;
+select * from t where i=100 for update;
+select * from t where i=101 for update;
+send select * from t where i=2 for update;
+
+connection con2;
+let $wait_condition = select count(*) = 1 from information_schema.rocksdb_trx
+where thread_id = $con1 and waiting_key != "";
+--source include/wait_condition.inc
+
+select * from t where i=3 lock in share mode;
+select * from t where i=200 for update;
+select * from t where i=201 for update;
+
+--error ER_LOCK_DEADLOCK
+select * from t where i=1 lock in share mode;
+rollback;
+
+connection con1;
+reap;
+rollback;
+
+connection con3;
+rollback;
+
+connection default;
+--replace_regex /[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}/TIMESTAMP/ /WAITING KEY: [0-9a-f]{16}/KEY/ /TRANSACTION ID: [0-9]*/TXN_ID/ /TIMESTAMP: [0-9]*/TSTAMP/
+show engine rocksdb transaction status;
+
+echo Deadlock #6;
+connection con1;
+create table t1 (id int primary key, value int) engine=rocksdb;
+insert into t1 values (1,1),(2,2),(3,3),(4,4),(5,5);
+begin;
+update t1 set value=value+100 where id=1;
+update t1 set value=value+100 where id=2;
+
+connection con2;
+begin;
+update t1 set value=value+200 where id=3;
+
+connection con1;
+send update t1 set value=value+100 where id=3;
+
+connection con2;
+let $wait_condition = select count(*) = 1 from information_schema.rocksdb_trx
+where thread_id = $con1 and waiting_key != "";
+--source include/wait_condition.inc
+--error ER_LOCK_DEADLOCK
+update t1 set value=value+200 where id=1;
+
+# con2 tx is automatically rolled back
+connection con1;
+reap;
+select * from t1;
+drop table t1;
+
+connection default;
+
+disconnect con1;
+disconnect con2;
+disconnect con3;
+
+set global rocksdb_lock_wait_timeout = @prior_lock_wait_timeout;
+set global rocksdb_deadlock_detect = @prior_deadlock_detect;
+drop table t;
+--replace_regex /[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}/TIMESTAMP/ /WAITING KEY: [0-9a-f]{16}/KEY/ /TRANSACTION ID: [0-9]*/TXN_ID/ /INDEX_ID: [0-9a-f]*/IDX_ID/ /TIMESTAMP: [0-9]*/TSTAMP/
+show engine rocksdb transaction status;
+set global rocksdb_max_latest_deadlocks = 0;
+--echo # Clears deadlock buffer of any existent deadlocks.
+set global rocksdb_max_latest_deadlocks = @prior_max_latest_deadlocks;
+--replace_regex /[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}/TIMESTAMP/ /WAITING KEY: [0-9a-f]{16}/KEY/ /TRANSACTION ID: [0-9]*/TXN_ID/ /INDEX_ID: [0-9a-f]*/IDX_ID/ /TIMESTAMP: [0-9]*/TSTAMP/
+show engine rocksdb transaction status;
+--source include/wait_until_count_sessions.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/delete.test b/storage/rocksdb/mysql-test/rocksdb/t/delete.test
new file mode 100644
index 00000000000..c829dd9da3b
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/delete.test
@@ -0,0 +1,101 @@
+--source include/have_rocksdb.inc
+
+#
+# Basic DELETE statements.
+# DELETE LOW_PRIORITY is covered in delete_low_prio test
+# DELETE QUICK is covered in delete_quick test (syntax only)
+# DELETE IGNORE is covered in delete_ignore test
+#
+
+--disable_warnings
+DROP TABLE IF EXISTS t1,t2;
+--enable_warnings
+
+CREATE TABLE t1 (a INT, b CHAR(8), pk INT AUTO_INCREMENT PRIMARY KEY) ENGINE=rocksdb;
+INSERT INTO t1 (a,b) VALUES (10000,'foobar'),(1,'a'),(2,'b'),(3,'c'),(4,'d'),(5,'e');
+INSERT INTO t1 (a,b) SELECT a, b FROM t1;
+
+# Single-table DELETE
+
+DELETE FROM t1 WHERE b IN ('c');
+--sorted_result
+SELECT a,b FROM t1;
+
+DELETE FROM t1 WHERE a < 0 OR b = 'a';
+--sorted_result
+SELECT a,b FROM t1;
+
+# ORDER BY and LIMIT
+DELETE FROM t1 WHERE a <= 4 ORDER BY b DESC LIMIT 1;
+--sorted_result
+SELECT a,b FROM t1;
+
+# Multi-table DELETE
+
+CREATE TABLE t2 (c CHAR(8), d INT, pk INT AUTO_INCREMENT PRIMARY KEY) ENGINE=rocksdb;
+INSERT INTO t2 (c,d) SELECT b, a FROM t1;
+--sorted_result
+SELECT c,d FROM t2;
+
+DELETE t2.* FROM t1, t2 WHERE c < b AND a + d != 1;
+--sorted_result
+SELECT a,b FROM t1;
+--sorted_result
+SELECT c,d FROM t2;
+
+DELETE FROM t2, t1.* USING t2, t1 WHERE c = 'foobar' and b = c;
+--sorted_result
+SELECT a,b FROM t1;
+--sorted_result
+SELECT c,d FROM t2;
+
+DELETE FROM t1;
+--sorted_result
+SELECT a,b FROM t1;
+
+DROP TABLE t1, t2;
+
+
+#
+# Transactional DELETE
+#
+
+CREATE TABLE t1 (a INT, b CHAR(8), pk INT AUTO_INCREMENT PRIMARY KEY) ENGINE=rocksdb;
+INSERT INTO t1 (a,b) VALUES (1,'a'),(2,'b'),(3,'c'),(4,'d'),(5,'e'),(6,'f'),(7,'g'),(8,'h'),(10000,'foobar');
+INSERT INTO t1 (a,b) SELECT a, b FROM t1;
+
+BEGIN;
+DELETE FROM t1 WHERE b IN ('c');
+--sorted_result
+SELECT a,b FROM t1;
+DELETE FROM t1 WHERE a < 0 OR b = 'a';
+COMMIT;
+--sorted_result
+SELECT a,b FROM t1;
+
+# Savepoints
+
+BEGIN;
+DELETE FROM t1 WHERE a <= 4 ORDER BY b DESC LIMIT 1;
+SAVEPOINT spt1;
+
+DELETE FROM t1;
+RELEASE SAVEPOINT spt1;
+ROLLBACK;
+--sorted_result
+SELECT a,b FROM t1;
+
+BEGIN;
+DELETE FROM t1 WHERE a <= 4 ORDER BY b DESC LIMIT 1;
+SAVEPOINT spt1;
+DELETE FROM t1;
+INSERT INTO t1 (a,b) VALUES (1,'a');
+--error ER_ROLLBACK_TO_SAVEPOINT
+ROLLBACK TO SAVEPOINT spt1;
+--error ER_ROLLBACK_ONLY
+COMMIT;
+--sorted_result
+SELECT a,b FROM t1;
+
+DROP TABLE t1;
+
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/delete_ignore.test b/storage/rocksdb/mysql-test/rocksdb/t/delete_ignore.test
new file mode 100644
index 00000000000..d087d80f4ac
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/delete_ignore.test
@@ -0,0 +1,37 @@
+--source include/have_rocksdb.inc
+
+#
+# DELETE IGNORE
+#
+
+--disable_warnings
+DROP TABLE IF EXISTS t1,t2;
+--enable_warnings
+
+CREATE TABLE t1 (a INT, b CHAR(8), pk INT AUTO_INCREMENT PRIMARY KEY) ENGINE=rocksdb;
+
+INSERT INTO t1 (a,b) VALUES (10000,'foobar'),(1,'a'),(2,'b'),(3,'c'),(4,'d'),(5,'e');
+INSERT INTO t1 (a,b) SELECT a, b FROM t1;
+
+CREATE TABLE t2 (pk INT AUTO_INCREMENT PRIMARY KEY, c CHAR(8), d INT) ENGINE=rocksdb;
+
+INSERT INTO t2 (c,d) SELECT b, a FROM t1;
+--sorted_result
+SELECT a,b FROM t1;
+--sorted_result
+SELECT c,d FROM t2;
+
+DELETE IGNORE FROM t1 WHERE b IS NOT NULL ORDER BY a LIMIT 1;
+--sorted_result
+SELECT a,b FROM t1;
+
+DELETE IGNORE t1.*, t2.* FROM t1, t2 WHERE c < b OR a != ( SELECT 1 UNION SELECT 2 );
+--sorted_result
+SELECT a,b FROM t1;
+--sorted_result
+SELECT c,d FROM t2;
+
+# Cleanup
+DROP TABLE t1, t2;
+
+
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/delete_quick.test b/storage/rocksdb/mysql-test/rocksdb/t/delete_quick.test
new file mode 100644
index 00000000000..127ef47dfaf
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/delete_quick.test
@@ -0,0 +1,32 @@
+--source include/have_rocksdb.inc
+
+#
+# DELETE QUICK syntax.
+# For now we only check that the keyword is accepted,
+# without actually checking whether the feature works.
+#
+
+--disable_warnings
+DROP TABLE IF EXISTS t1,t2;
+--enable_warnings
+
+CREATE TABLE t1 (a INT, b CHAR(8), PRIMARY KEY (a)) ENGINE=rocksdb;
+INSERT INTO t1 (a,b) VALUES (1,'a'),(2,'b'),(3,'c'),(4,'d'),(5,'e');
+
+DELETE QUICK FROM t1 WHERE a = 1 OR b > 'foo';
+--sorted_result
+SELECT a,b FROM t1;
+
+CREATE TABLE t2 (c CHAR(8), d INT, PRIMARY KEY (c)) ENGINE=rocksdb;
+INSERT INTO t2 (c,d) SELECT b, a FROM t1;
+--sorted_result
+SELECT c,d FROM t2;
+
+DELETE QUICK FROM t2, t1.* USING t2, t1 WHERE c IS NULL OR a = d;
+--sorted_result
+SELECT a,b FROM t1;
+--sorted_result
+SELECT c,d FROM t2;
+
+DROP TABLE t1, t2;
+
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/delete_with_keys.test b/storage/rocksdb/mysql-test/rocksdb/t/delete_with_keys.test
new file mode 100644
index 00000000000..f4d890eb1b4
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/delete_with_keys.test
@@ -0,0 +1,39 @@
+--source include/have_rocksdb.inc
+
+#
+# DELETE statements for tables with keys
+#
+
+--disable_warnings
+DROP TABLE IF EXISTS t1;
+--enable_warnings
+
+CREATE TABLE t1 (a INT, b CHAR(8), pk INT AUTO_INCREMENT PRIMARY KEY, KEY(b)) ENGINE=rocksdb;
+INSERT INTO t1 (a,b) VALUES (1,'a'),(2,'b'),(3,'c'),(4,'d'),(5,'e'),(6,'x'),(7,'y'),(8,'z');
+DELETE FROM t1 WHERE b > 'y';
+DELETE FROM t1 WHERE a=2;
+
+--sorted_result
+SELECT a,b FROM t1;
+DELETE FROM t1;
+
+DROP TABLE t1;
+
+CREATE TABLE t1 (a INT PRIMARY KEY, b CHAR(8)) ENGINE=rocksdb;
+INSERT INTO t1 (a,b) VALUES (1,'a'),(2,'b'),(3,'c'),(4,'d'),(5,'e'),(6,'x'),(7,'y'),(8,'z');
+DELETE FROM t1 WHERE b > 'y';
+DELETE FROM t1 WHERE a=2;
+
+--sorted_result
+SELECT a,b FROM t1;
+DELETE FROM t1;
+DROP TABLE t1;
+
+CREATE TABLE t1 (a INT, b INT, c INT, pk INT AUTO_INCREMENT PRIMARY KEY, KEY(a), KEY (b)) ENGINE=rocksdb;
+
+INSERT INTO t1 (a,b,c) VALUES (1,2,3),(4,5,6),(7,8,9);
+DELETE FROM t1 WHERE a = 10 OR b = 20 ORDER BY c LIMIT 1;
+--sorted_result
+SELECT a,b,c FROM t1;
+DROP TABLE t1;
+
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/describe.test b/storage/rocksdb/mysql-test/rocksdb/t/describe.test
new file mode 100644
index 00000000000..9bc5d299a31
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/describe.test
@@ -0,0 +1,24 @@
+--source include/have_rocksdb.inc
+
+#
+# DESCRIBE statement
+#
+
+--disable_warnings
+DROP TABLE IF EXISTS t1, t2, t3;
+--enable_warnings
+
+CREATE TABLE t1 (a INT, b CHAR(8), PRIMARY KEY (a)) ENGINE=rocksdb;
+INSERT INTO t1 (a,b) VALUES (100,'foo'),(2, 'b');
+
+CREATE TABLE t2 (a INT, b CHAR(8), PRIMARY KEY (b)) ENGINE=rocksdb CHARACTER SET utf8;
+INSERT INTO t2 (a,b) VALUES (1, 'bar');
+
+CREATE TABLE t3 (a INT, b CHAR(8), pk INT AUTO_INCREMENT PRIMARY KEY) ENGINE=rocksdb CHARACTER SET utf8;
+
+DESCRIBE t1;
+DESC t2 a;
+DESCRIBE t3 '%';
+
+DROP TABLE t1, t2, t3;
+
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/disabled.def b/storage/rocksdb/mysql-test/rocksdb/t/disabled.def
new file mode 100644
index 00000000000..7ef2dec832b
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/disabled.def
@@ -0,0 +1,95 @@
+##
+## See also: storage/rocksdb/mysql-test/rocksdb/suite.pm - tests on
+## platforms and under valgrind are disabled from there.
+##
+
+##
+## Tests that are disabled in the upstream
+##
+cons_snapshot_serializable : Consistent read does not work on serializable
+level_read_uncommitted : Not supported
+level_serializable: Not supported
+
+
+##
+## Tests that require FB/MySQL specific features for which there are
+## no plans of ever porting them into MariaDB
+##
+optimizer_loose_index_scans: MariaDB doesnt support Skip Scan
+mysqldump : MariaDB's mysqldump doesn't support --print-ordering-key
+mysqldump2 : MariaDB's mysqldump doesn't support --print-ordering-key
+native_procedure : Not supported in MariaDB
+slow_query_log: MDEV-11480
+select_for_update_skip_locked_nowait: MDEV-11481
+create_no_primary_key_table: MariaDB doesn't have --block_create_no_primary_key
+explicit_snapshot: MariaDB doesn't support Shared/Explicit snapshots
+percona_nonflushing_analyze_debug : Requires Percona Server's Non-flushing ANALYZE feature
+com_rpc_tx : Requires connection attributes and detached sessions
+mysqlbinlog_blind_replace: requires @@enable_blind_replace support
+optimize_myrocks_replace_into_base: requires @@enable_blind_replace support
+optimize_myrocks_replace_into_lock: requires @@enable_blind_replace support
+rocksdb.skip_core_dump_on_error: requires @@binlog_error_action support
+bypass_select_basic_bloom : Query bypass is not supported
+bypass_select_basic : Query bypass is not supported
+
+rocksdb_read_free_rpl : Read-Free replication is not supported
+rocksdb_read_free_rpl_stress : Read-Free replication is not supported
+
+blind_delete_rr : Read-Free replication is not supported
+blind_delete_rc : Read-Free replication is not supported
+
+force_shutdown: requires support for SHUTDOWN statement which calls exit().
+
+##
+## Tests that do not fit MariaDB's test environment. Upstream seems to test
+## on boxes with much more RAM and CPU, some tests are more of a stress tests
+## than functional tests or bug testcases.
+##
+rqg_examples : Test that use RQG are disabled
+rqg_runtime : Test that use RQG are disabled
+rqg_transactions : Test that use RQG are disabled
+allow_no_pk_concurrent_insert: stress test
+rocksdb_deadlock_stress_rc: stress test
+rocksdb_deadlock_stress_rr: stress test
+use_direct_reads: Direct IO is not supported on all filesystems
+
+##
+## Tests which hit a problem elsewhere (Upstream, SQL layer, etc)
+##
+persistent_cache: Upstream RocksDB bug https://github.com/facebook/mysql-5.6/issues/579
+collation: Fails on gcc 4.8 and before, MDEV-12433
+col_opt_zerofill: MDEV-14729 (also MDEV-14165 which was fixed): problem in the client
+
+
+##
+## Tests that require FB/MySQL specific features which MariaDB
+## currently doesnt have but eventually might get:
+##
+gap_lock_issue254: MDEV-11735: MyRocks: Gap Lock detector support
+gap_lock_raise_error: MDEV-11735: MyRocks: Gap Lock detector support
+show_engine : MariaRocks: MariaDB doesnt support SHOW ENGINE rocksdb TRANSACTION STATUS
+issue243_transactionStatus: MariaDB doesnt support SHOW ENGINE rocksdb TRANSACTION STATUS
+rpl_row_not_found : MariaDB doesnt support slave_exec_mode='SEMI_STRICT'
+rpl_row_not_found_rc : MariaDB doesnt support slave_exec_mode='SEMI_STRICT'
+
+ddl_high_priority: Needs fractional @@lock_wait_timeout
+deadlock_tracking : Needs SHOW ENGINE ROCKSDB TRANSACTION STATUS
+bytes_written: Needs I_S.TABLE_STATISTICS.IO_WRITE_BYTES
+trx_info_rpl : MariaRocks: @@rpl_skip_tx_api doesn't work, yet.
+lock_wait_timeout_stats: MDEV-13404
+rpl_row_triggers : Requires read-free slave.
+
+##
+## Test failures (in buildbot or else where)
+##
+
+compact_deletes: MDEV-12663 : rocksdb.compact_deletes times out and causes other tests to fail
+blind_delete_without_tx_api: MDEV-12286: rocksdb.blind_delete_without_tx_api test fails
+information_schema: MDEV-14372: unstable testcase
+
+##
+## Tests that fail for some other reason
+##
+
+mysqlbinlog_gtid_skip_empty_trans_rocksdb : MariaRocks: requires GTIDs
+add_index_inplace: not stable result
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/drop_database.test b/storage/rocksdb/mysql-test/rocksdb/t/drop_database.test
new file mode 100644
index 00000000000..174a4bbf286
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/drop_database.test
@@ -0,0 +1,11 @@
+--source include/have_rocksdb.inc
+
+--disable_warnings
+DROP DATABASE IF EXISTS test_drop_database;
+--enable_warnings
+
+CREATE DATABASE test_drop_database;
+CREATE TABLE t1 (a int, b int, c int, primary key (a), unique key (b)) ENGINE=ROCKSDB;
+ALTER TABLE t1 DROP PRIMARY KEY, ADD PRIMARY KEY (a);
+DROP TABLE t1;
+DROP DATABASE test_drop_database;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/drop_index_inplace.test b/storage/rocksdb/mysql-test/rocksdb/t/drop_index_inplace.test
new file mode 100644
index 00000000000..57d7cdf57c2
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/drop_index_inplace.test
@@ -0,0 +1,116 @@
+--source include/have_rocksdb.inc
+
+--disable_warnings
+drop table if exists t1;
+--enable_warnings
+
+##
+## test dropping index inplace
+##
+
+CREATE TABLE t1 (a INT, b INT AUTO_INCREMENT, KEY ka(a), KEY kb(a,b), PRIMARY KEY(b)) ENGINE=rocksdb;
+SHOW CREATE TABLE t1;
+INSERT INTO t1 (a) VALUES (1);
+INSERT INTO t1 (a) VALUES (3);
+INSERT INTO t1 (a) VALUES (5);
+
+ALTER TABLE t1 DROP INDEX ka, ALGORITHM=INPLACE;
+SHOW CREATE TABLE t1;
+
+# Key ka does not exist in table t1
+--error 1176
+SELECT * FROM t1 FORCE INDEX(ka) where a > 1;
+
+--sorted_result
+SELECT * FROM t1 FORCE INDEX(kb) where a > 1;
+--sorted_result
+SELECT * FROM t1 where b > 1;
+
+DROP TABLE t1;
+
+##
+## test dropping multiple indexes at once and multi-part indexes
+##
+
+CREATE TABLE t1 (a INT AUTO_INCREMENT, b INT, c INT, KEY kb(b), KEY kbc(b,c), KEY kc(c), PRIMARY KEY(a)) ENGINE=rocksdb;
+SHOW CREATE TABLE t1;
+INSERT INTO t1 (b,c) VALUES (1,2);
+INSERT INTO t1 (b,c) VALUES (3,4);
+INSERT INTO t1 (b,c) VALUES (5,6);
+ALTER TABLE t1 DROP INDEX kb, DROP INDEX kbc, ALGORITHM=INPLACE;
+SHOW CREATE TABLE t1;
+
+
+# test restarting to make sure everything is still ok and persisted properly
+--source include/restart_mysqld.inc
+
+SHOW CREATE TABLE t1;
+
+INSERT INTO t1 (b,c) VALUES (1,2);
+INSERT INTO t1 (b,c) VALUES (3,4);
+INSERT INTO t1 (b,c) VALUES (5,6);
+
+--sorted_result
+SELECT * FROM t1 FORCE INDEX(kc) where c > 3;
+--sorted_result
+SELECT * FROM t1 where b > 3;
+
+DROP TABLE t1;
+
+# test dropping pk to see if thats still ok
+CREATE TABLE t1 (a INT, b INT, c INT, KEY kb(b), KEY kbc(b,c), KEY kc(c), PRIMARY KEY(a)) ENGINE=rocksdb;
+SHOW INDEX IN t1;
+ALTER TABLE t1 DROP INDEX kb, DROP INDEX kbc, ALGORITHM=INPLACE;
+SHOW INDEX IN t1;
+
+ALTER TABLE t1 DROP PRIMARY KEY;
+SHOW INDEX IN t1;
+# test dropping index on tables with no pk
+ALTER TABLE t1 DROP INDEX kc, ALGORITHM=INPLACE;
+SHOW INDEX IN t1;
+
+DROP TABLE t1;
+
+# test dropping unique keys
+CREATE TABLE t1 (a INT AUTO_INCREMENT, b INT, c INT, PRIMARY KEY(a)) ENGINE=rocksdb;
+ALTER TABLE t1 ADD UNIQUE INDEX kb(b);
+ALTER TABLE t1 ADD UNIQUE INDEX kbc(b,c);
+ALTER TABLE t1 ADD UNIQUE INDEX kc(c);
+SHOW INDEX IN t1;
+
+ALTER TABLE t1 DROP INDEX kb, DROP INDEX kbc;
+SHOW INDEX IN t1;
+
+# test restarting to make sure everything is still ok and persisted properly
+--source include/restart_mysqld.inc
+
+--sorted_result
+INSERT INTO t1 (b,c) VALUES (1,2);
+INSERT INTO t1 (b,c) VALUES (3,4);
+INSERT INTO t1 (b,c) VALUES (5,6);
+SELECT * FROM t1 FORCE INDEX(kc) where c > 3;
+
+# test dropping index on tables with no pk
+ALTER TABLE t1 DROP INDEX kc, ALGORITHM=INPLACE;
+SHOW CREATE TABLE t1;
+
+DROP TABLE t1;
+
+# case where dropping column, where column is the key, we dont want to use
+# inplace in this scenario
+CREATE TABLE IF NOT EXISTS t1 (col1 INT, col2 INT, col3 INT);
+INSERT INTO t1 (col1,col2,col3) VALUES (1,2,3);
+ALTER TABLE t1 ADD KEY idx ( col1, col2 );
+ANALYZE TABLE t1;
+ALTER TABLE t1 DROP COLUMN col2;
+ALTER TABLE t1 DROP COLUMN col3;
+DROP TABLE t1;
+
+# case drop and add at same time, should not use inplace algorithm yet
+CREATE TABLE IF NOT EXISTS t1 (col1 INT, col2 INT, col3 INT);
+INSERT INTO t1 (col1,col2,col3) VALUES (1,2,3);
+ALTER TABLE t1 ADD KEY idx ( col1, col2 );
+ANALYZE TABLE t1;
+ALTER TABLE t1 DROP COLUMN col2;
+ALTER TABLE t1 DROP COLUMN col3;
+DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/drop_stats_procedure.inc b/storage/rocksdb/mysql-test/rocksdb/t/drop_stats_procedure.inc
new file mode 100644
index 00000000000..b40004402c9
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/drop_stats_procedure.inc
@@ -0,0 +1,3 @@
+drop procedure save_read_stats;
+drop procedure get_read_stats;
+
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/drop_table-master.opt b/storage/rocksdb/mysql-test/rocksdb/t/drop_table-master.opt
new file mode 100644
index 00000000000..f40e9db55b2
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/drop_table-master.opt
@@ -0,0 +1,3 @@
+--rocksdb_max_subcompactions=1
+--rocksdb_default_cf_options=write_buffer_size=512k;target_file_size_base=512k;level0_file_num_compaction_trigger=2;level0_slowdown_writes_trigger=-1;level0_stop_writes_trigger=1000;max_bytes_for_level_base=1m
+
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/drop_table.test b/storage/rocksdb/mysql-test/rocksdb/t/drop_table.test
new file mode 100644
index 00000000000..9667c7ba650
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/drop_table.test
@@ -0,0 +1,145 @@
+--source include/have_rocksdb.inc
+call mtr.add_suppression("Column family 'cf1' not found");
+call mtr.add_suppression("Column family 'rev:cf2' not found");
+
+--disable_warnings
+DROP TABLE IF EXISTS t1;
+DROP TABLE IF EXISTS t2;
+DROP TABLE IF EXISTS t3;
+DROP TABLE IF EXISTS t4;
+DROP TABLE IF EXISTS t5;
+--enable_warnings
+
+call mtr.add_suppression("Column family 'cf1' not found");
+call mtr.add_suppression("Column family 'rev:cf2' not found");
+call mtr.add_suppression("LibRocksDB");
+
+# Start from clean slate
+set global rocksdb_compact_cf = 'cf1';
+set global rocksdb_compact_cf = 'rev:cf2';
+set global rocksdb_signal_drop_index_thread = 1;
+--source include/restart_mysqld.inc
+
+CREATE TABLE t1 (
+ a int not null,
+ b int not null,
+ primary key (a,b) comment 'cf1',
+ key (b) comment 'rev:cf2'
+) ENGINE=RocksDB;
+
+CREATE TABLE t2 (
+ a int not null,
+ b int not null,
+ primary key (a,b) comment 'cf1',
+ key (b) comment 'rev:cf2'
+) ENGINE=RocksDB;
+
+CREATE TABLE t3 (
+ a int not null,
+ b int not null,
+ primary key (a,b) comment 'cf1',
+ key (b) comment 'rev:cf2'
+) ENGINE=RocksDB;
+
+CREATE TABLE t4 (
+ a int not null,
+ b int not null,
+ primary key (a,b) comment 'cf1',
+ key (b) comment 'rev:cf2'
+) ENGINE=RocksDB;
+
+# Populate tables
+let $max = 1000;
+let $table = t1;
+--source drop_table_repopulate_table.inc
+let $table = t2;
+--source drop_table_repopulate_table.inc
+let $table = t3;
+--source drop_table_repopulate_table.inc
+let $table = t4;
+--source drop_table_repopulate_table.inc
+
+drop table t2;
+
+# Restart the server before t2's indices are deleted
+--source include/restart_mysqld.inc
+
+let $table = t1;
+--source drop_table_repopulate_table.inc
+let $table = t4;
+--source drop_table_repopulate_table.inc
+
+drop table t3;
+
+# Insert enough data to trigger compactions that eliminate t2 and t3
+let $max = 50000;
+let $table = t1;
+--source drop_table_repopulate_table.inc
+let $table = t4;
+--source drop_table_repopulate_table.inc
+
+# Run manual compaction, then restarting mysqld
+# and confirming it is not blocked.
+SET GLOBAL rocksdb_max_manual_compactions = 2;
+SET GLOBAL rocksdb_debug_manual_compaction_delay = 3600;
+connect (con1, localhost, root,,);
+connect (con2, localhost, root,,);
+connect (con3, localhost, root,,);
+connection con1;
+send SET GLOBAL rocksdb_compact_cf='cf1';
+connection con2;
+send SET GLOBAL rocksdb_compact_cf='rev:cf2';
+connection default;
+let $wait_condition = select count(*) = 2 from information_schema.processlist where info like 'SET GLOBAL rocksdb_compact_cf%';
+--source include/wait_condition.inc
+# longer enough than manual compaction thread to start compaction
+--sleep 2
+select * from information_schema.global_status where variable_name='rocksdb_manual_compactions_running';
+
+connection con3;
+--error ER_INTERNAL_ERROR
+SET GLOBAL rocksdb_compact_cf='cf1';
+--error ER_INTERNAL_ERROR
+SET GLOBAL rocksdb_compact_cf='rev:cf2';
+
+connection default;
+--source include/restart_mysqld.inc
+
+drop table t4;
+
+# Restart the server before t4's indices are deleted
+--source include/restart_mysqld.inc
+
+# Make sure new table gets unique indices
+CREATE TABLE t5 (
+ a int not null,
+ b int not null,
+ primary key (a,b) comment 'cf1',
+ key (b) comment 'rev:cf2'
+) ENGINE=RocksDB;
+
+let $max = 1000;
+let $table = t5;
+--source drop_table_repopulate_table.inc
+
+drop table t5;
+
+# Manually compact column families, cleaning up all lingering data
+set global rocksdb_compact_cf = 'cf1';
+set global rocksdb_compact_cf = 'rev:cf2';
+
+# Signal thread to check for dropped indices
+set global rocksdb_signal_drop_index_thread = 1;
+
+let $show_rpl_debug_info= 1; # to force post-failure printout
+let $wait_timeout= 300; # Override default 30 seconds with 300.
+let $wait_condition = select count(*) = 0
+ as c from information_schema.rocksdb_global_info
+ where TYPE = 'DDL_DROP_INDEX_ONGOING';
+--source include/wait_condition.inc
+
+## Upstream has removed the following:
+--disable_parsing
+--enable_parsing
+# Cleanup
+drop table t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/drop_table2.test b/storage/rocksdb/mysql-test/rocksdb/t/drop_table2.test
new file mode 100644
index 00000000000..a9012aea80a
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/drop_table2.test
@@ -0,0 +1,131 @@
+--source include/have_rocksdb.inc
+
+call mtr.add_suppression("Column family 'cf1' not found");
+call mtr.add_suppression("Column family 'rev:cf2' not found");
+
+--disable_warnings
+DROP TABLE IF EXISTS t1;
+DROP TABLE IF EXISTS t2;
+DROP TABLE IF EXISTS t3;
+DROP TABLE IF EXISTS t4;
+DROP TABLE IF EXISTS t5;
+--enable_warnings
+
+call mtr.add_suppression("Column family 'cf1' not found");
+call mtr.add_suppression("Column family 'rev:cf2' not found");
+
+# Start from clean slate
+set global rocksdb_compact_cf = 'cf1';
+set global rocksdb_compact_cf = 'rev:cf2';
+set global rocksdb_signal_drop_index_thread = 1;
+--source include/restart_mysqld.inc
+
+CREATE TABLE t1 (
+ a int not null,
+ b int not null,
+ primary key (a,b) comment 'cf1',
+ key (b) comment 'rev:cf2'
+) ENGINE=RocksDB;
+
+CREATE TABLE t2 (
+ a int not null,
+ b int not null,
+ primary key (a,b) comment 'cf1',
+ key (b) comment 'rev:cf2'
+) ENGINE=RocksDB;
+
+CREATE TABLE t3 (
+ a int not null,
+ b int not null,
+ primary key (a,b) comment 'cf1',
+ key (b) comment 'rev:cf2'
+) ENGINE=RocksDB;
+
+CREATE TABLE t4 (
+ a int not null,
+ b int not null,
+ primary key (a,b) comment 'cf1',
+ key (b) comment 'rev:cf2'
+) ENGINE=RocksDB;
+
+# Populate tables
+let $max = 1000;
+let $table = t1;
+--source drop_table_repopulate_table.inc
+let $table = t2;
+--source drop_table_repopulate_table.inc
+let $table = t3;
+--source drop_table_repopulate_table.inc
+let $table = t4;
+--source drop_table_repopulate_table.inc
+
+
+# Restart the server before t2's indices are deleted
+--source include/restart_mysqld.inc
+
+let $table = t1;
+--source drop_table_repopulate_table.inc
+let $table = t4;
+--source drop_table_repopulate_table.inc
+
+
+# Insert enough data to trigger compactions that eliminate t2 and t3
+let $max = 50000;
+let $table = t1;
+--source drop_table_repopulate_table.inc
+let $table = t4;
+--source drop_table_repopulate_table.inc
+
+
+# Restart the server before t4's indices are deleted
+--source include/restart_mysqld.inc
+
+# Make sure new table gets unique indices
+CREATE TABLE t5 (
+ a int not null,
+ b int not null,
+ primary key (a,b) comment 'cf1',
+ key (b) comment 'rev:cf2'
+) ENGINE=RocksDB;
+
+let $max = 1000;
+let $table = t5;
+--source drop_table_repopulate_table.inc
+
+set @@global.rocksdb_compact_cf = 'cf1';
+set @@global.rocksdb_compact_cf = 'rev:cf2';
+set @@global.rocksdb_compact_cf = 'default';
+
+perl;
+$size+=-s $_ for (<$ENV{MYSQLTEST_VARDIR}/mysqld.1/data/\#rocksdb/*.sst>);
+$filename= "$ENV{MYSQLTEST_VARDIR}/tmp/size_output";
+open(F, '>', $filename) || die("Can't open file $filename: $!");
+print F $size;
+EOF
+drop table t1;
+drop table t2;
+drop table t3;
+drop table t4;
+drop table t5;
+
+set @@global.rocksdb_compact_cf = 'cf1';
+set @@global.rocksdb_compact_cf = 'rev:cf2';
+set @@global.rocksdb_compact_cf = 'default';
+
+let $show_rpl_debug_info= 1; # to force post-failure printout
+let $wait_timeout= 300; # Override default 30 seconds with 300.
+let $wait_condition = select count(*) = 0
+ as c from information_schema.rocksdb_global_info
+ where TYPE = 'DDL_DROP_INDEX_ONGOING';
+--source include/wait_condition.inc
+
+# Check that space is reclaimed
+perl;
+$size+=-s $_ for (<$ENV{MYSQLTEST_VARDIR}/mysqld.1/data/\#rocksdb/*.sst>);
+$filename= "$ENV{MYSQLTEST_VARDIR}/tmp/size_output";
+open(F, '<', $filename) || die("Can't open file $filename: $!");
+$old=<F>;
+print "Compacted\n" if $old > $size * 2;
+EOF
+
+# Cleanup
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/drop_table3-master.opt b/storage/rocksdb/mysql-test/rocksdb/t/drop_table3-master.opt
new file mode 100644
index 00000000000..a9ebc4ec20b
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/drop_table3-master.opt
@@ -0,0 +1,2 @@
+--rocksdb_max_subcompactions=1
+--rocksdb_default_cf_options=write_buffer_size=16k;target_file_size_base=16k;level0_slowdown_writes_trigger=-1;level0_stop_writes_trigger=1000;compression_per_level=kNoCompression;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/drop_table3.inc b/storage/rocksdb/mysql-test/rocksdb/t/drop_table3.inc
new file mode 100644
index 00000000000..1a044384a45
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/drop_table3.inc
@@ -0,0 +1,52 @@
+--source include/have_rocksdb.inc
+
+call mtr.add_suppression("Column family 'cf1' not found");
+call mtr.add_suppression("Column family 'rev:cf2' not found");
+
+--disable_warnings
+DROP TABLE IF EXISTS t1;
+--enable_warnings
+
+call mtr.add_suppression("Column family 'cf1' not found");
+call mtr.add_suppression("Column family 'rev:cf2' not found");
+
+# Start from clean slate
+set global rocksdb_compact_cf = 'cf1';
+set global rocksdb_compact_cf = 'rev:cf2';
+set global rocksdb_signal_drop_index_thread = 1;
+--source include/restart_mysqld.inc
+
+CREATE TABLE t1 (
+ a int not null,
+ b int not null,
+ c varchar(500) not null,
+ primary key (a,b) comment 'cf1',
+ key (b) comment 'rev:cf2'
+) ENGINE=RocksDB;
+
+# Populate tables
+let $max = 50000;
+let $table = t1;
+--source drop_table3_repopulate_table.inc
+
+select variable_value into @a from information_schema.global_status where variable_name='rocksdb_compact_read_bytes';
+if ($truncate_table)
+{
+ truncate table t1;
+}
+if ($drop_table)
+{
+ drop table t1;
+}
+
+let $show_rpl_debug_info= 1; # to force post-failure printout
+let $wait_timeout= 300; # Override default 30 seconds with 300.
+let $wait_condition = select count(*) = 0
+ as c from information_schema.rocksdb_global_info
+ where TYPE = 'DDL_DROP_INDEX_ONGOING';
+--source include/wait_condition.inc
+
+select case when variable_value-@a < 500000 then 'true' else 'false' end from information_schema.global_status where variable_name='rocksdb_compact_read_bytes';
+
+# Cleanup
+DROP TABLE IF EXISTS t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/drop_table3.test b/storage/rocksdb/mysql-test/rocksdb/t/drop_table3.test
new file mode 100644
index 00000000000..b3a6bf9958e
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/drop_table3.test
@@ -0,0 +1,5 @@
+--source include/have_rocksdb.inc
+
+-- let $truncate_table = 0
+-- let $drop_table = 1
+-- source drop_table3.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/drop_table3_repopulate_table.inc b/storage/rocksdb/mysql-test/rocksdb/t/drop_table3_repopulate_table.inc
new file mode 100644
index 00000000000..c34af07204f
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/drop_table3_repopulate_table.inc
@@ -0,0 +1,15 @@
+# Usage:
+# let $max = <count>;
+# let $table = <table name>;
+# --source drop_table_repopulate_table.inc
+#
+eval DELETE FROM $table;
+
+--disable_query_log
+let $i = 1;
+while ($i <= $max) {
+ let $insert = INSERT INTO $table VALUES ($i, $i, rpad('a', 499, 'b'));
+ inc $i;
+ eval $insert;
+}
+--enable_query_log
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/drop_table_repopulate_table.inc b/storage/rocksdb/mysql-test/rocksdb/t/drop_table_repopulate_table.inc
new file mode 100644
index 00000000000..6faf41ef7b1
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/drop_table_repopulate_table.inc
@@ -0,0 +1,15 @@
+# Usage:
+# let $max = <count>;
+# let $table = <table name>;
+# --source drop_table_repopulate_table.inc
+#
+eval DELETE FROM $table;
+
+--disable_query_log
+let $i = 1;
+while ($i <= $max) {
+ let $insert = INSERT INTO $table VALUES ($i, $i);
+ inc $i;
+ eval $insert;
+}
+--enable_query_log
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/drop_table_sync.inc b/storage/rocksdb/mysql-test/rocksdb/t/drop_table_sync.inc
new file mode 100644
index 00000000000..c6a3ccde7a6
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/drop_table_sync.inc
@@ -0,0 +1,6 @@
+let $show_rpl_debug_info= 1; # to force post-failure printout
+let $wait_timeout= 300; # Override default 30 seconds with 300.
+let $wait_condition = select count(*) = 0
+ as c from information_schema.rocksdb_global_info
+ where TYPE = 'DDL_DROP_INDEX_ONGOING';
+--source include/wait_condition.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/dup_key_update.test b/storage/rocksdb/mysql-test/rocksdb/t/dup_key_update.test
new file mode 100644
index 00000000000..b4a0c9e5e96
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/dup_key_update.test
@@ -0,0 +1,45 @@
+--source include/have_rocksdb.inc
+
+# Test insert ... on duplicate key update statements
+
+--disable_warnings
+DROP TABLE IF EXISTS t1;
+DROP TABLE IF EXISTS t2;
+--enable_warnings
+
+CREATE TABLE t1 (id1 INT, id2 INT, id3 INT,
+ PRIMARY KEY (id1, id2, id3),
+ UNIQUE KEY (id3, id1)) ENGINE=ROCKSDB;
+
+CREATE TABLE t2 (id1 INT, id2 INT, id3 INT,
+ PRIMARY KEY (id1, id2, id3),
+ UNIQUE KEY (id3, id1) COMMENT 'rev:cf') ENGINE=ROCKSDB;
+
+
+--source include/dup_key_update.inc
+
+# Cleanup
+DROP TABLE t1;
+DROP TABLE t2;
+
+set global rocksdb_large_prefix=1;
+CREATE TABLE t1 (id1 varchar(128) CHARACTER SET latin1 COLLATE latin1_bin,
+ id2 varchar(256) CHARACTER SET utf8 COLLATE utf8_bin,
+ id3 varchar(200) CHARACTER SET latin1 COLLATE latin1_swedish_ci,
+ PRIMARY KEY (id1, id2, id3),
+ UNIQUE KEY (id3, id1)) ENGINE=ROCKSDB;
+set global rocksdb_large_prefix=DEFAULT;
+
+set global rocksdb_large_prefix=1;
+CREATE TABLE t2 (id1 varchar(128) CHARACTER SET latin1 COLLATE latin1_bin,
+ id2 varchar(256) CHARACTER SET utf8 COLLATE utf8_bin,
+ id3 varchar(200) CHARACTER SET latin1 COLLATE latin1_swedish_ci,
+ PRIMARY KEY (id1, id2, id3),
+ UNIQUE KEY (id3, id1) COMMENT 'rev:cf') ENGINE=ROCKSDB;
+set global rocksdb_large_prefix=DEFAULT;
+
+--source include/dup_key_update.inc
+
+# Cleanup
+DROP TABLE t1;
+DROP TABLE t2;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/duplicate_table.test b/storage/rocksdb/mysql-test/rocksdb/t/duplicate_table.test
new file mode 100644
index 00000000000..875f8514d8f
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/duplicate_table.test
@@ -0,0 +1,16 @@
+--source include/have_rocksdb.inc
+--disable_warnings
+DROP TABLE IF EXISTS t;
+--enable_warnings
+CREATE TABLE t(id int primary key) engine=rocksdb;
+INSERT INTO t values (1), (2), (3);
+--error ER_TABLE_EXISTS_ERROR
+CREATE TABLE t(id int primary key) engine=rocksdb;
+FLUSH TABLES;
+move_file $MYSQLTEST_VARDIR/mysqld.1/data/test/t.frm $MYSQLTEST_VARDIR/mysqld.1/data/test/t.frm.tmp;
+--error ER_METADATA_INCONSISTENCY
+CREATE TABLE t(id int primary key) engine=rocksdb;
+move_file $MYSQLTEST_VARDIR/mysqld.1/data/test/t.frm.tmp $MYSQLTEST_VARDIR/mysqld.1/data/test/t.frm;
+FLUSH TABLES;
+SELECT * FROM t;
+DROP TABLE t;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/explicit_snapshot-master.opt b/storage/rocksdb/mysql-test/rocksdb/t/explicit_snapshot-master.opt
new file mode 100644
index 00000000000..d7dd66b4480
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/explicit_snapshot-master.opt
@@ -0,0 +1 @@
+--log-bin --binlog_format=row --gtid_mode=ON --enforce_gtid_consistency --log-slave-updates
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/explicit_snapshot.test b/storage/rocksdb/mysql-test/rocksdb/t/explicit_snapshot.test
new file mode 100644
index 00000000000..efe1980b694
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/explicit_snapshot.test
@@ -0,0 +1,263 @@
+source include/have_log_bin.inc;
+source include/have_rocksdb.inc;
+
+disable_warnings;
+DROP TABLE IF EXISTS t1;
+enable_warnings;
+
+connect (con1,localhost,root,,);
+connect (con2,localhost,root,,);
+
+connection con1;
+CREATE TABLE T1 (a INT PRIMARY KEY AUTO_INCREMENT) ENGINE=ROCKSDB;
+INSERT INTO T1 VALUES();
+
+#
+
+echo "con1: Creating explict snapshot";
+let $snapshot=
+ query_get_value(CREATE EXPLICIT ROCKSDB SNAPSHOT, Snapshot_ID, 1);
+SELECT * FROM T1;
+
+connection con2;
+echo "con2: Inserting a row";
+INSERT INTO T1 VALUES();
+SELECT * FROM T1;
+echo "con2: Attaching snapshot id $snapshot";
+disable_result_log;
+eval ATTACH EXPLICIT ROCKSDB SNAPSHOT $snapshot;
+enable_result_log;
+echo "con2: New row should not be visible";
+SELECT * FROM T1;
+echo "con2: Releasing snapshot";
+disable_result_log;
+RELEASE EXPLICIT ROCKSDB SNAPSHOT;
+enable_result_log;
+echo "con2: New row should be visible";
+SELECT * FROM T1;
+
+connection con1;
+echo "con1: New row should not be visible";
+SELECT * FROM T1;
+echo "con1: Releasing snapshot";
+disable_result_log;
+RELEASE EXPLICIT ROCKSDB SNAPSHOT;
+enable_result_log;
+echo "con1: New row should be visible";
+SELECT * FROM T1;
+
+#
+
+echo "con1: Starting shared snapshot";
+let $snapshot=
+query_get_value(START TRANSACTION WITH SHARED ROCKSDB SNAPSHOT, Snapshot_ID, 1);
+SELECT * FROM T1;
+
+connection con2;
+echo "con2: Inserting a row";
+INSERT INTO T1 VALUES();
+SELECT * FROM T1;
+echo "con2: Starting existing snapshot";
+disable_result_log;
+eval START TRANSACTION WITH EXISTING ROCKSDB SNAPSHOT $snapshot;
+enable_result_log;
+echo "con2: New row should not be visible";
+SELECT * FROM T1;
+COMMIT;
+echo "con2: New row should be visible";
+SELECT * FROM T1;
+
+connection con1;
+COMMIT;
+echo "con1: New row should be visible";
+SELECT * FROM T1;
+
+## Negative test cases
+
+connection con1;
+echo "con1: Creating explict snapshot";
+let $snapshot=
+ query_get_value(CREATE EXPLICIT ROCKSDB SNAPSHOT, Snapshot_ID, 1);
+echo "con2: Trying to insert row";
+error ER_UPDATES_WITH_EXPLICIT_SNAPSHOT;
+INSERT INTO T1 VALUES();
+
+connection con2;
+echo "con2: Attaching existing snapshot";
+disable_result_log;
+eval ATTACH EXPLICIT ROCKSDB SNAPSHOT $snapshot;
+enable_result_log;
+echo "con2: Trying to insert row";
+error ER_UPDATES_WITH_EXPLICIT_SNAPSHOT;
+INSERT INTO T1 VALUES();
+
+connection con1;
+disable_result_log;
+RELEASE EXPLICIT ROCKSDB SNAPSHOT;
+enable_result_log;
+connection con2;
+disable_result_log;
+RELEASE EXPLICIT ROCKSDB SNAPSHOT;
+enable_result_log;
+
+connection con1;
+echo "con1: Starting shared snapshot";
+let $snapshot=
+query_get_value(START TRANSACTION WITH SHARED ROCKSDB SNAPSHOT, Snapshot_ID, 1);
+echo "con1: Trying to insert row";
+error ER_UPDATES_WITH_CONSISTENT_SNAPSHOT;
+INSERT INTO T1 VALUES();
+
+connection con2;
+echo "con2: Starting existing snapshot";
+disable_result_log;
+eval START TRANSACTION WITH EXISTING ROCKSDB SNAPSHOT $snapshot;
+enable_result_log;
+echo "con2: Trying to insert row";
+error ER_UPDATES_WITH_CONSISTENT_SNAPSHOT;
+INSERT INTO T1 VALUES();
+
+connection con1;
+COMMIT;
+connection con2;
+COMMIT;
+
+## Test how overlapping explicit snapshot statements work
+
+connection con1;
+echo "con1: Creating explicit snapshot";
+disable_result_log;
+CREATE EXPLICIT ROCKSDB SNAPSHOT;
+enable_result_log;
+SELECT * FROM T1;
+
+connection con2;
+echo "con2: Inserting a row";
+INSERT INTO T1 VALUES();
+SELECT * FROM T1;
+
+connection con1;
+echo "con1: New row should not be seen";
+SELECT * FROM T1;
+echo "con1: Creating another explicit snapshot";
+disable_result_log;
+CREATE EXPLICIT ROCKSDB SNAPSHOT;
+enable_result_log;
+echo "con1: Now the new row should be seen";
+SELECT * FROM T1;
+
+#
+
+echo "con1: Starting transaction with consistent snapshot";
+disable_result_log;
+START TRANSACTION WITH CONSISTENT ROCKSDB SNAPSHOT;
+enable_result_log;
+SELECT * FROM T1;
+
+connection con2;
+echo "con2: Inserting a row";
+INSERT INTO T1 VALUES();
+SELECT * FROM T1;
+
+connection con1;
+echo "con1: The new row should not be seen";
+SELECT * FROM T1;
+
+echo "con1: Creating another explicit snapshot";
+disable_result_log;
+CREATE EXPLICIT ROCKSDB SNAPSHOT;
+enable_result_log;
+echo "con1: The new row should still not be seen";
+SELECT * FROM T1;
+
+echo "con1: Committing trx";
+COMMIT;
+echo "con1: The new row should now be seen because of the new explicit snapshot created above";
+SELECT * FROM T1;
+
+connection con1;
+echo "con1: Releasing explicit snapshot";
+disable_result_log;
+RELEASE EXPLICIT ROCKSDB SNAPSHOT;
+enable_result_log;
+
+#
+
+echo "con1: Starting transaction with shared snapshot";
+disable_result_log;
+START TRANSACTION WITH SHARED ROCKSDB SNAPSHOT;
+enable_result_log;
+SELECT * FROM T1;
+
+connection con2;
+echo "con2: Inserting a row";
+INSERT INTO T1 VALUES();
+SELECT * FROM T1;
+
+connection con1;
+echo "con1: The new row should not be seen";
+SELECT * FROM T1;
+
+echo "con1: Starting another transaction with shared snapshot";
+disable_result_log;
+START TRANSACTION WITH SHARED ROCKSDB SNAPSHOT;
+enable_result_log;
+echo "con1: The new row should now be seen";
+SELECT * FROM T1;
+COMMIT;
+
+#
+
+echo "con1: Creating explicit snapshot";
+disable_result_log;
+CREATE EXPLICIT ROCKSDB SNAPSHOT;
+enable_result_log;
+SELECT * FROM T1;
+
+echo "con1: Releasing explicit snapshot";
+disable_result_log;
+RELEASE EXPLICIT ROCKSDB SNAPSHOT;
+enable_result_log;
+
+echo "con1: Releasing explicit snapshot again";
+error ER_UNKNOWN_ERROR;
+RELEASE EXPLICIT ROCKSDB SNAPSHOT;
+
+#
+
+echo "con1: Starting transaction with shared snapshot";
+disable_result_log;
+START TRANSACTION WITH SHARED ROCKSDB SNAPSHOT;
+enable_result_log;
+SELECT * FROM T1;
+
+connection con2;
+echo "con2: Inserting a row";
+INSERT INTO T1 VALUES();
+SELECT * FROM T1;
+
+connection con1;
+echo "con1: Creating explicit snapshot";
+disable_result_log;
+CREATE EXPLICIT ROCKSDB SNAPSHOT;
+enable_result_log;
+SELECT * FROM T1;
+
+echo "con1: Releasing explicit snapshot";
+disable_result_log;
+RELEASE EXPLICIT ROCKSDB SNAPSHOT;
+enable_result_log;
+
+echo "con1: The new row should not be seen";
+SELECT* FROM T1;
+COMMIT;
+
+#
+
+## Cleanup
+DROP TABLE T1;
+
+connection default;
+disconnect con1;
+disconnect con2;
+
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/fail_system_cf.test b/storage/rocksdb/mysql-test/rocksdb/t/fail_system_cf.test
new file mode 100644
index 00000000000..255819704a8
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/fail_system_cf.test
@@ -0,0 +1,17 @@
+--source include/have_rocksdb.inc
+
+#
+# Any create table using the system column family should fail
+
+--disable_warnings
+DROP TABLE IF EXISTS t1;
+--enable_warnings
+
+--error ER_WRONG_ARGUMENTS
+CREATE TABLE t1 (i INT, PRIMARY KEY (i) COMMENT '__system__') ENGINE = ROCKSDB;
+
+#cleanup
+--disable_warnings
+DROP TABLE IF EXISTS t1;
+--enable_warnings
+
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/fast_prefix_index_fetch.test b/storage/rocksdb/mysql-test/rocksdb/t/fast_prefix_index_fetch.test
new file mode 100644
index 00000000000..0b6cf42aff0
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/fast_prefix_index_fetch.test
@@ -0,0 +1,120 @@
+# Disable until covered unpack_info format enabled in prod
+--source include/have_debug.inc
+--source include/not_debug.inc
+
+--source include/have_rocksdb.inc
+
+--disable_warnings
+DROP TABLE IF EXISTS t1;
+--enable_warnings
+
+CREATE TABLE t1 (
+ id INT,
+ fake_id INT,
+ bigfield VARCHAR(4096),
+ PRIMARY KEY (id),
+ KEY bf (bigfield(32)),
+ KEY fid (fake_id, bigfield(32))
+) ENGINE=rocksdb;
+
+INSERT INTO t1 VALUES (1, 1001, REPEAT('a', 1)),
+ (8, 1008, REPEAT('b', 8)),
+ (24, 1024, REPEAT('c', 24)),
+ (31, 1031, REPEAT('d', 31)),
+ (32, 1032, REPEAT('x', 32)),
+ (33, 1033, REPEAT('y', 33)),
+ (128, 1128, REPEAT('z', 128));
+
+SELECT * FROM t1;
+
+--disable_query_log
+
+--let $prefix_index_check_title= Baseline sanity check
+--let $prefix_index_check_read_avoided_delta= 0
+--let $prefix_index_check_query= SELECT "no-op query"
+--source suite/rocksdb/include/prefix_index_only_query_check.inc
+
+--let $prefix_index_check_title= Eligible for optimization.
+--let $prefix_index_check_read_avoided_delta= 2
+--let $prefix_index_check_query= SELECT id, bigfield FROM t1 FORCE INDEX(bf) WHERE bigfield = REPEAT('d', 31)
+--source suite/rocksdb/include/prefix_index_only_query_check.inc
+
+--let $prefix_index_check_title= Eligible for optimization, access via fake_id only
+--let $prefix_index_check_read_avoided_delta= 2
+--let $prefix_index_check_query= SELECT id, bigfield FROM t1 FORCE INDEX(fid) WHERE fake_id = 1031
+--source suite/rocksdb/include/prefix_index_only_query_check.inc
+
+--let $prefix_index_check_title= Not eligible for optimization, access via fake_id of big row.
+--let $prefix_index_check_read_avoided_delta= 0
+--let $prefix_index_check_query= SELECT id, bigfield FROM t1 FORCE INDEX(fid) WHERE fake_id = 1033
+--source suite/rocksdb/include/prefix_index_only_query_check.inc
+
+# The secondary_index_read call isn't covered because the next record in the
+# index has a bigfield value of length 33, so only one of two lookups are
+# covered here.
+--let $prefix_index_check_title= Eligible for optimization.
+--let $prefix_index_check_read_avoided_delta= 1
+--let $prefix_index_check_query= SELECT id, bigfield FROM t1 FORCE INDEX(bf) WHERE bigfield = REPEAT('x', 32)
+--source suite/rocksdb/include/prefix_index_only_query_check.inc
+
+--let $prefix_index_check_title= Not eligible for optimization.
+--let $prefix_index_check_read_avoided_delta= 0
+--let $prefix_index_check_query= SELECT id, bigfield FROM t1 FORCE INDEX(bf) WHERE bigfield = REPEAT('y', 33)
+--source suite/rocksdb/include/prefix_index_only_query_check.inc
+
+--let $prefix_index_check_title= Eligible for optimization.
+--let $prefix_index_check_read_avoided_delta= 2
+--let $prefix_index_check_query= SELECT id, bigfield FROM t1 FORCE INDEX(bf) WHERE bigfield = REPEAT('b', 8)
+--source suite/rocksdb/include/prefix_index_only_query_check.inc
+
+--let $prefix_index_check_title= Eligible for optimization.
+--let $prefix_index_check_read_avoided_delta= 2
+--let $prefix_index_check_query= SELECT id, bigfield FROM t1 FORCE INDEX(bf) WHERE bigfield = REPEAT('c', 24)
+--source suite/rocksdb/include/prefix_index_only_query_check.inc
+
+--let $prefix_index_check_title= Not eligible for optimization.
+--let $prefix_index_check_read_avoided_delta= 0
+--let $prefix_index_check_query= SELECT id, bigfield FROM t1 FORCE INDEX(bf) WHERE bigfield = REPEAT('z', 128)
+--source suite/rocksdb/include/prefix_index_only_query_check.inc
+
+DROP TABLE t1;
+
+--echo #
+--echo # Test that multi-byte charsets are handled correctly
+--echo #
+
+SET NAMES utf8;
+
+CREATE TABLE t1 (
+ a INT PRIMARY KEY,
+ b VARCHAR(30) CHARACTER SET utf8 collate utf8_bin,
+ KEY k (b(2))
+) ENGINE=rocksdb;
+
+INSERT INTO t1 VALUES
+ (1, 'a'),
+ (2, 'cc'),
+ (3, 'ŽŽ'),
+ (4, 'žžžž');
+
+--let $prefix_index_check_title= Charset record obviously shorter than the prefix
+--let $prefix_index_check_query= SELECT * FROM t1 FORCE INDEX(k) WHERE b = "a"
+--let $prefix_index_check_read_avoided_delta= 2
+--source suite/rocksdb/include/prefix_index_only_query_check.inc
+
+--let $prefix_index_check_title= Charset record shorter than prefix
+--let $prefix_index_check_query= SELECT * FROM t1 FORCE INDEX(k) WHERE b = "cc"
+--let $prefix_index_check_read_avoided_delta= 2
+--source suite/rocksdb/include/prefix_index_only_query_check.inc
+
+--let $prefix_index_check_title= Charset record with glyphs shorter than prefix
+--let $prefix_index_check_query= SELECT * FROM t1 FORCE INDEX(k) WHERE b = "ŽŽ"
+--let $prefix_index_check_read_avoided_delta= 1
+--source suite/rocksdb/include/prefix_index_only_query_check.inc
+
+--let $prefix_index_check_title= Charset record longer than prefix
+--let $prefix_index_check_query= SELECT * FROM t1 FORCE INDEX(k) WHERE b = "žžžž"
+--let $prefix_index_check_read_avoided_delta= 0
+--source suite/rocksdb/include/prefix_index_only_query_check.inc
+
+DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/force_shutdown.test b/storage/rocksdb/mysql-test/rocksdb/t/force_shutdown.test
new file mode 100644
index 00000000000..1817bc06fc3
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/force_shutdown.test
@@ -0,0 +1,97 @@
+--source include/have_rocksdb.inc
+
+--source include/have_debug.inc
+--source include/not_valgrind.inc
+
+connect (conn1, localhost, root,,test);
+
+create table t1 (
+ pk int not null primary key,
+ col1 varchar(10)
+) engine=rocksdb;
+
+insert into t1 values (1,1),(2,2),(3,3);
+
+connection conn1;
+set session debug= "+d,myrocks_busy_loop_on_row_read";
+send select * from t1 where pk=1;
+
+--echo # testing unclean shutdown on stuck instance
+connection default;
+let $wait_condition=
+ select count(*) = 1 from information_schema.processlist
+ where info = 'select * from t1 where pk=1';
+--source include/wait_condition.inc
+--echo # Run shutdown sql command with forcing kill (exit code 127)
+--exec echo "wait" > $MYSQLTEST_VARDIR/tmp/mysqld.1.expect
+--error 2006,2013
+shutdown 1;
+--source include/wait_until_disconnected.inc
+
+--echo # verifying exit code is printed
+let $error_log=$MYSQLTEST_VARDIR/log/testlog.err;
+let SEARCH_FILE=$error_log;
+--echo # restart the server
+--exec echo "restart:--log-error=$error_log" > $MYSQLTEST_VARDIR/tmp/mysqld.1.expect
+--enable_reconnect
+--source include/wait_until_connected_again.inc
+
+--error 2006,2013
+shutdown 230;
+--source include/wait_until_disconnected.inc
+let SEARCH_PATTERN=COM_SHUTDOWN received from host/user = localhost/root, exit code 230;
+--source include/search_pattern_in_file.inc
+
+--echo # restart the server
+--exec echo "restart:" > $MYSQLTEST_VARDIR/tmp/mysqld.1.expect
+--enable_reconnect
+--source include/wait_until_connected_again.inc
+
+--echo # verifying SHUTDOWN is refused if exit code > 255
+--error ER_UNKNOWN_ERROR
+SHUTDOWN 256;
+--error ER_UNKNOWN_ERROR
+SHUTDOWN 10000;
+
+--echo # verifying SHUTDOWN is refused if instances are not read only
+--error ER_UNKNOWN_ERROR
+SHUTDOWN 0 read_only;
+--error ER_UNKNOWN_ERROR
+SHUTDOWN 127 read_only;
+--error 2006,2013
+SHUTDOWN 127;
+--source include/wait_until_disconnected.inc
+
+--echo # restart the server
+--exec echo "restart:" > $MYSQLTEST_VARDIR/tmp/mysqld.1.expect
+--enable_reconnect
+--source include/wait_until_connected_again.inc
+
+connect (conn2, localhost, root,,test);
+
+connection conn2;
+set session debug= "+d,myrocks_busy_loop_on_row_read";
+send select * from t1 where pk=1;
+
+connection default;
+let $wait_condition=
+ select count(*) = 1 from information_schema.processlist
+ where info = 'select * from t1 where pk=1';
+--source include/wait_condition.inc
+
+SET GLOBAL read_only=1;
+--echo # verifying SHUTDOWN read_only works with read_only instance
+--echo # Run shutdown sql command with forcing kill (exit code 127)
+--exec echo "wait" > $MYSQLTEST_VARDIR/tmp/mysqld.1.expect
+--error 2006,2013
+shutdown 255 read_only;
+--source include/wait_until_disconnected.inc
+
+--echo # restart the server
+--exec echo "restart:" > $MYSQLTEST_VARDIR/tmp/mysqld.1.expect
+--enable_reconnect
+--source include/wait_until_connected_again.inc
+
+disconnect conn1;
+disconnect conn2;
+drop table t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/foreign_key.test b/storage/rocksdb/mysql-test/rocksdb/t/foreign_key.test
new file mode 100644
index 00000000000..675a337c24d
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/foreign_key.test
@@ -0,0 +1,47 @@
+--source include/have_rocksdb.inc
+
+--disable_warnings
+DROP TABLE IF EXISTS t1, t2;
+--enable_warnings
+
+CREATE TABLE t1 (b INT PRIMARY KEY);
+
+# Try simple foreign key - should fail
+--error ER_NOT_SUPPORTED_YET
+CREATE TABLE t2 (a INT NOT NULL, b INT NOT NULL, FOREIGN KEY (b) REFERENCES t1(b));
+
+# Try simple valid syntax with 'foreign' as part - should succeed
+CREATE TABLE t2 (a INT NOT NULL, bforeign INT NOT NULL);
+DROP TABLE t2;
+
+# Try simple valid syntax with 'foreign' and 'key' as part (with no space) - should succeed
+CREATE TABLE t2 (a INT NOT NULL, foreignkey INT NOT NULL);
+DROP TABLE t2;
+
+# Try with valid id containing 'foreign' and then a foreign key - should fail
+--error ER_NOT_SUPPORTED_YET
+CREATE TABLE t2 (a INT NOT NULL, bforeign INT not null, FOREIGN KEY (bforeign) REFERENCES t1(b));
+
+CREATE TABLE t2 (a INT NOT NULL, b INT NOT NULL);
+# Alter with foreign key - should fail
+--error ER_NOT_SUPPORTED_YET
+ALTER TABLE t2 ADD FOREIGN KEY (b) REFERENCES t1(b);
+DROP TABLE t2;
+
+# Alter with valid syntax that contains 'foreign' - should succeed
+CREATE TABLE t2 (a INT NOT NULL);
+ALTER TABLE t2 ADD bforeign INT NOT NULL;
+DROP TABLE t2;
+
+# Alter with valid syntax that contains 'foreign' and 'key' (no space) - should succeed
+CREATE TABLE t2 (a INT NOT NULL);
+ALTER TABLE t2 ADD foreignkey INT NOT NULL;
+DROP TABLE t2;
+
+# Alter with valid syntax that contains 'foreign' and then foreign key - should fail
+CREATE TABLE t2 (a INT NOT NULL);
+--error ER_NOT_SUPPORTED_YET
+ALTER TABLE t2 ADD bforeign INT NOT NULL, ADD FOREIGN KEY (bforeign) REFERENCES t1(b);
+DROP TABLE t2;
+
+DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/gap_lock_issue254-master.opt b/storage/rocksdb/mysql-test/rocksdb/t/gap_lock_issue254-master.opt
new file mode 100644
index 00000000000..f0b7f4b5ce5
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/gap_lock_issue254-master.opt
@@ -0,0 +1 @@
+--binlog-format=row --binlog-row-image=full --gap-lock-raise-error=1
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/gap_lock_issue254.test b/storage/rocksdb/mysql-test/rocksdb/t/gap_lock_issue254.test
new file mode 100644
index 00000000000..af7c9b1ab4f
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/gap_lock_issue254.test
@@ -0,0 +1,14 @@
+-- source include/have_binlog_format_row.inc
+-- source include/have_rocksdb.inc
+
+# For issue#254
+create table t (id int primary key, value int);
+begin;
+update t set value=100 where id in (1, 2);
+commit;
+begin;
+--error ER_UNKNOWN_ERROR
+select * from t for update;
+commit;
+drop table t;
+
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/gap_lock_raise_error.test b/storage/rocksdb/mysql-test/rocksdb/t/gap_lock_raise_error.test
new file mode 100644
index 00000000000..59fe7e6f80a
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/gap_lock_raise_error.test
@@ -0,0 +1,37 @@
+--source include/have_rocksdb.inc
+
+let $engine=rocksdb;
+--source include/gap_lock_raise_error_all.inc
+
+SET @save_gap_lock_exceptions = @@global.gap_lock_exceptions;
+
+SET GLOBAL gap_lock_exceptions="t.*";
+--source include/gap_lock_raise_error_init.inc
+
+set session autocommit=0;
+--error ER_UNKNOWN_ERROR
+select * from gap1 limit 1 for update;
+--error ER_UNKNOWN_ERROR
+select * from gap1 where value != 100 limit 1 for update;
+
+--source include/gap_lock_raise_error_cleanup.inc
+
+SET GLOBAL gap_lock_exceptions="gap.*";
+--source include/gap_lock_raise_error_init.inc
+
+set session autocommit=0;
+select * from gap1 limit 1 for update;
+select * from gap1 where value != 100 limit 1 for update;
+
+--source include/gap_lock_raise_error_cleanup.inc
+
+# This test has been temporarily removed because it fails when the server
+# is compiled using GCC 4.8 as full regular expression handling was added
+# in GCC 4.9. We need to add the ability to detect if full regex is
+# available before re-enabling this test.
+## Make sure we handle invalid regex expressions and generate a warning
+#--exec echo "" >$MYSQLTEST_VARDIR/log/mysqld.1.err
+#SET GLOBAL gap_lock_exceptions="[a-b,abc\\";
+#--exec grep -A 2 "Invalid pattern" $MYSQLTEST_VARDIR/log/mysqld.1.err | cut -d] -f2
+
+SET GLOBAL gap_lock_exceptions=@save_gap_lock_exceptions;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/get_error_message.test b/storage/rocksdb/mysql-test/rocksdb/t/get_error_message.test
new file mode 100644
index 00000000000..b4c9e2dae3a
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/get_error_message.test
@@ -0,0 +1,27 @@
+--source include/have_rocksdb.inc
+--source include/have_partition.inc
+--source include/not_windows.inc # Data directory option not availablr
+
+#
+# Create tables with partitions and try to generate an error while creating
+# partitions.
+#
+--disable_warnings
+
+DROP TABLE IF EXISTS t1;
+
+--enable_warnings
+
+CREATE TABLE t1(a INT,b INT,KEY (a)) PARTITION BY HASH (a) PARTITIONS 3;
+
+SHOW TABLES;
+
+--disable_query_log
+call mtr.add_suppression("Failed to execute action for entry.*");
+--enable_query_log
+
+--error ER_WRONG_TABLE_NAME
+ALTER TABLE t1 ADD PARTITION(PARTITION p3 DATA DIRECTORY='G:/mysqltest/p3Data' INDEX DIRECTORY='H:/mysqltest/p3Index');
+
+DROP TABLE t1;
+
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/group_min_max-master.opt b/storage/rocksdb/mysql-test/rocksdb/t/group_min_max-master.opt
new file mode 100644
index 00000000000..ba9364e1523
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/group_min_max-master.opt
@@ -0,0 +1 @@
+--rocksdb_debug_optimizer_n_rows=1000
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/group_min_max.test b/storage/rocksdb/mysql-test/rocksdb/t/group_min_max.test
new file mode 100644
index 00000000000..eb66bd0e972
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/group_min_max.test
@@ -0,0 +1,9 @@
+--source include/have_debug.inc
+set @debug_tmp= @@debug_dbug;
+set global debug_dbug="+d,force_group_by";
+
+let $engine=RocksDB;
+--source include/group_min_max.inc
+
+set global debug_dbug=@debug_tmp;
+
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/ha_extra_keyread.test b/storage/rocksdb/mysql-test/rocksdb/t/ha_extra_keyread.test
new file mode 100644
index 00000000000..0d0fad2e5fa
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/ha_extra_keyread.test
@@ -0,0 +1,15 @@
+--source include/have_debug.inc
+--source include/have_rocksdb.inc
+
+CREATE TABLE t1 (a INT, b CHAR(8), KEY ab(a, b)) ENGINE=rocksdb DEFAULT CHARSET utf8mb4 COLLATE utf8mb4_bin;
+INSERT INTO t1 (a,b) VALUES (76,'bar');
+INSERT INTO t1 (a,b) VALUES (35,'foo');
+INSERT INTO t1 (a,b) VALUES (77,'baz');
+
+SET debug_dbug="+d,dbug.rocksdb.HA_EXTRA_KEYREAD";
+
+SELECT b FROM t1 FORCE INDEX(ab) WHERE a=35;
+
+
+SET debug_dbug="-d,dbug.rocksdb.HA_EXTRA_KEYREAD";
+DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/handler_basic.test b/storage/rocksdb/mysql-test/rocksdb/t/handler_basic.test
new file mode 100644
index 00000000000..7b1652c759b
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/handler_basic.test
@@ -0,0 +1,52 @@
+--source include/have_rocksdb.inc
+
+#
+# Basic HANDLER counts
+#
+
+--disable_warnings
+DROP TABLE IF EXISTS t1;
+--enable_warnings
+
+FLUSH STATUS;
+CREATE TABLE t1 (id INT PRIMARY KEY, a VARCHAR(100), b INT,
+INDEX b(b)) ENGINE=rocksdb;
+INSERT INTO t1 (id,a,b) VALUES (1,'foobar',100),(2,'z',0),(3,'bar',50);
+SHOW SESSION STATUS LIKE 'Handler_write%';
+
+UPDATE t1 SET b=1000 WHERE id=1;
+SHOW SESSION STATUS LIKE 'Handler_update%';
+
+DELETE FROM t1 WHERE id=2;
+SHOW SESSION STATUS LIKE 'Handler_delete%';
+
+INSERT INTO t1 (id,b) VALUES(4,4),(5,5),(6,6),(7,7),(8,8),(9,9),(10,10);
+SHOW SESSION STATUS LIKE 'Handler_write%';
+
+FLUSH STATUS;
+SELECT * FROM t1 WHERE id=8;
+SHOW SESSION STATUS LIKE 'Handler_read%';
+
+FLUSH STATUS;
+SELECT * FROM t1 WHERE b=6;
+SHOW SESSION STATUS LIKE 'Handler_read%';
+
+FLUSH STATUS;
+--sorted_result
+SELECT * FROM t1;
+SHOW SESSION STATUS LIKE 'Handler_read%';
+
+FLUSH STATUS;
+SELECT * FROM t1 FORCE INDEX(b) WHERE b <=5 ORDER BY b;
+SHOW SESSION STATUS LIKE 'Handler_read%';
+
+FLUSH STATUS;
+SELECT * FROM t1 WHERE id >=8 ORDER BY id;
+SHOW SESSION STATUS LIKE 'Handler_read%';
+
+FLUSH STATUS;
+SELECT * FROM t1 WHERE id < 8 ORDER BY id;
+SHOW SESSION STATUS LIKE 'Handler_read%';
+
+# Cleanup
+DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/hermitage.inc b/storage/rocksdb/mysql-test/rocksdb/t/hermitage.inc
new file mode 100644
index 00000000000..17baf5b6c57
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/hermitage.inc
@@ -0,0 +1,257 @@
+--enable_connect_log
+
+# Save the initial number of concurrent sessions
+--source include/count_sessions.inc
+
+### See full test cases here:
+### https://github.com/ept/hermitage/blob/master/mysql.md
+
+--disable_warnings
+DROP TABLE IF EXISTS test;
+--enable_warnings
+
+connect (con1,localhost,root,,);
+eval SET SESSION TRANSACTION ISOLATION LEVEL $trx_isolation;
+connect (con2,localhost,root,,);
+eval SET SESSION TRANSACTION ISOLATION LEVEL $trx_isolation;
+connect (con3,localhost,root,,);
+eval SET SESSION TRANSACTION ISOLATION LEVEL $trx_isolation;
+
+connection con1;
+
+create table test (id int primary key, value int) engine=rocksdb;
+
+### Prevents Aborted Reads (G1a)
+--source hermitage_init.inc
+
+connection con1;
+select * from test;
+update test set value = 101 where id = 1;
+connection con2;
+select * from test; # Still shows 1 => 10
+connection con1;
+rollback;
+connection con2; # Still shows 1 => 10
+select * from test;
+commit;
+
+
+### Prevents Intermediate Reads (G1b)
+--source hermitage_init.inc
+
+connection con1;
+update test set value = 101 where id = 1;
+connection con2;
+select * from test; # Still shows 1 => 10
+connection con1;
+update test set value = 11 where id = 1;
+commit;
+connection con2;
+select * from test; # Now shows 1 => 11
+commit;
+
+
+### Prevents Circular Information Flow (G1c)
+--source hermitage_init.inc
+
+connection con1;
+update test set value = 11 where id = 1;
+connection con2;
+update test set value = 22 where id = 2;
+connection con1;
+select * from test where id = 2; # Still shows 2 => 20
+connection con2;
+select * from test where id = 1; # Still shows 1 => 10
+connection con1;
+commit;
+connection con2;
+commit;
+
+
+### prevents Observed Transaction Vanishes (OTV)
+--source hermitage_init.inc
+
+connection con1;
+update test set value = 11 where id = 1;
+update test set value = 19 where id = 2;
+connection con2;
+send update test set value = 12 where id = 1;
+connection con1;
+commit;
+connection con2;
+reap;
+connection con3;
+select * from test; # Shows 1 => 11, 2 => 19
+connection con2;
+update test set value = 18 where id = 2;
+connection con3;
+select * from test; # Shows 1 => 11, 2 => 19
+connection con2;
+commit;
+connection con3;
+select * from test; # Shows 1 => 12, 2 => 18
+commit;
+
+
+### Predicate-Many-Preceders (PMP) -- RC does not prevent, RR prevents
+--source hermitage_init.inc
+
+connection con1;
+select * from test where value = 30;
+connection con2;
+insert into test (id, value) values(3, 30);
+commit;
+connection con1;
+# RC: Returns the newly inserted row
+# RR: Still returns nothing
+select * from test where value % 3 = 0;
+commit;
+
+--source hermitage_init.inc
+connection con1;
+update test set value = value + 10;
+connection con2;
+select variable_value into @a from information_schema.global_status where variable_name='rocksdb_snapshot_conflict_errors';
+select * from test;
+send delete from test where value = 20;
+connection con1;
+commit;
+connection con2;
+if ($trx_isolation == "READ COMMITTED")
+{
+ reap;
+ # RC: Returns 2 => 30
+ select * from test;
+}
+if ($trx_isolation == "REPEATABLE READ")
+{
+ --error ER_LOCK_DEADLOCK
+ reap;
+ select variable_value-@a from information_schema.global_status where variable_name='rocksdb_snapshot_conflict_errors';
+
+}
+commit;
+
+
+### Lost Update (P4) -- RC does not prevent, RR prevents
+--source hermitage_init.inc
+
+connection con1;
+select * from test where id = 1;
+connection con2;
+select * from test where id = 1;
+connection con1;
+update test set value = 11 where id = 1;
+connection con2;
+send update test set value = 12 where id = 1;
+connection con1;
+commit;
+connection con2;
+if ($trx_isolation == "READ COMMITTED")
+{
+ reap;
+ # RC: Returns 1 => 12
+ select * from test;
+}
+if ($trx_isolation == "REPEATABLE READ")
+{
+ --error ER_LOCK_DEADLOCK
+ reap;
+}
+commit;
+
+
+### Read Skew (G-single) -- RC does not prevent, RR prevents
+--source hermitage_init.inc
+
+connection con1;
+select * from test where id = 1;
+connection con2;
+select * from test where id = 1;
+select * from test where id = 2;
+update test set value = 12 where id = 1;
+update test set value = 18 where id = 2;
+commit;
+connection con1;
+select * from test where id = 2; # RC shows 18, RR shows 20
+commit;
+
+# test using predicate dependencies
+--source hermitage_init.inc
+
+connection con1;
+select * from test where value % 5 = 0;
+connection con2;
+update test set value = 12 where value = 10;
+commit;
+connection con1;
+# RC: returns 1 => 12, RR: returns nothing
+select * from test where value % 3 = 0;
+commit;
+
+# on a write predicate
+--source hermitage_init.inc
+
+connection con1;
+select * from test where id = 1;
+connection con2;
+select * from test;
+update test set value = 12 where id = 1;
+update test set value = 18 where id = 2;
+commit;
+connection con1;
+if ($trx_isolation == "READ COMMITTED")
+{
+ delete from test where value = 20; # doesn't delete anything
+ select * from test where id = 2; # shows 2 => 18
+}
+if ($trx_isolation == "REPEATABLE READ")
+{
+ --error ER_LOCK_DEADLOCK
+ delete from test where value = 20;
+}
+commit;
+
+
+### Write Skew (G2-item) -- Neither RC/RR prevents
+--source hermitage_init.inc
+
+connection con1;
+select * from test where id in (1,2);
+connection con2;
+select * from test where id in (1,2);
+connection con1;
+update test set value = 11 where id = 1;
+connection con2;
+update test set value = 21 where id = 2;
+connection con1;
+commit;
+connection con2;
+commit;
+
+### Anti-Dependency Cycles (G2) -- Neither RC/RR prevents
+--source hermitage_init.inc
+
+connection con1;
+select * from test where value % 3 = 0;
+connection con2;
+select * from test where value % 3 = 0;
+connection con1;
+insert into test (id, value) values(3, 30);
+connection con2;
+insert into test (id, value) values(4, 42);
+connection con1;
+commit;
+connection con2;
+commit;
+select * from test where value % 3 = 0; # Either. Returns 3 => 30, 4 => 42
+connection con1;
+select * from test where value % 3 = 0;
+
+
+connection default;
+drop table test;
+
+disconnect con1;
+disconnect con2;
+disconnect con3;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/hermitage.test b/storage/rocksdb/mysql-test/rocksdb/t/hermitage.test
new file mode 100644
index 00000000000..e4138e8d89f
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/hermitage.test
@@ -0,0 +1,10 @@
+--source include/have_rocksdb.inc
+
+# Hermitage is an attempt to test transaction isolation levels.
+# https://github.com/ept/hermitage
+
+let $trx_isolation = READ COMMITTED;
+--source hermitage.inc
+
+let $trx_isolation = REPEATABLE READ;
+--source hermitage.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/hermitage_init.inc b/storage/rocksdb/mysql-test/rocksdb/t/hermitage_init.inc
new file mode 100644
index 00000000000..4f3f03efab0
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/hermitage_init.inc
@@ -0,0 +1,8 @@
+connection con1;
+truncate table test;
+insert into test (id, value) values (1, 10), (2, 20);
+begin;
+connection con2;
+begin;
+connection con3;
+begin;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/i_s_ddl.test b/storage/rocksdb/mysql-test/rocksdb/t/i_s_ddl.test
new file mode 100644
index 00000000000..716f372067b
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/i_s_ddl.test
@@ -0,0 +1,29 @@
+--source include/have_rocksdb.inc
+
+#
+# Information Schema DDL
+#
+
+--disable_warnings
+DROP TABLE IF EXISTS is_ddl_t1;
+DROP TABLE IF EXISTS is_ddl_t2;
+DROP TABLE IF EXISTS is_ddl_t3;
+--enable_warnings
+
+CREATE TABLE is_ddl_t1 (i INT, j INT, k INT, l INT,
+ PRIMARY KEY (i), KEY (j), KEY (k, l) COMMENT 'kl_cf')
+ ENGINE = ROCKSDB;
+
+CREATE TABLE is_ddl_t2 (x INT, y INT, z INT,
+ PRIMARY KEY (z, y) COMMENT 'zy_cf',
+ KEY (x)) ENGINE = ROCKSDB;
+CREATE TABLE is_ddl_t3 (a INT, b INT, c INT, PRIMARY KEY (a)) ENGINE = ROCKSDB
+ COMMENT "ttl_duration=3600;";
+
+--sorted_result
+SELECT TABLE_SCHEMA,TABLE_NAME,PARTITION_NAME,INDEX_NAME,INDEX_TYPE,KV_FORMAT_VERSION,CF,TTL_DURATION,INDEX_FLAGS FROM INFORMATION_SCHEMA.ROCKSDB_DDL WHERE TABLE_NAME like 'is_ddl_t%';
+
+# cleanup
+DROP TABLE is_ddl_t1;
+DROP TABLE is_ddl_t2;
+DROP TABLE is_ddl_t3;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/i_s_deadlock.test b/storage/rocksdb/mysql-test/rocksdb/t/i_s_deadlock.test
new file mode 100644
index 00000000000..e0479d6a337
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/i_s_deadlock.test
@@ -0,0 +1,158 @@
+--source include/have_rocksdb.inc
+
+set @prior_lock_wait_timeout = @@rocksdb_lock_wait_timeout;
+set @prior_deadlock_detect = @@rocksdb_deadlock_detect;
+set @prior_max_latest_deadlocks = @@rocksdb_max_latest_deadlocks;
+set global rocksdb_deadlock_detect = on;
+set global rocksdb_lock_wait_timeout = 10000;
+--echo # Clears deadlock buffer of any prior deadlocks.
+set global rocksdb_max_latest_deadlocks = 0;
+set global rocksdb_max_latest_deadlocks = @prior_max_latest_deadlocks;
+
+# needed by simple_deadlock.inc
+let $engine = rocksdb;
+
+--source include/count_sessions.inc
+connect (con1,localhost,root,,);
+let $con1= `SELECT CONNECTION_ID()`;
+
+connect (con2,localhost,root,,);
+let $con2= `SELECT CONNECTION_ID()`;
+
+connect (con3,localhost,root,,);
+let $con3= `SELECT CONNECTION_ID()`;
+
+connection default;
+show create table information_schema.rocksdb_deadlock;
+
+create table t (i int primary key) engine=rocksdb;
+insert into t values (1), (2), (3);
+select * from information_schema.rocksdb_deadlock;
+
+echo Deadlock #1;
+--source include/simple_deadlock.inc
+connection default;
+--replace_column 1 DEADLOCK_ID 2 TIMESTAMP 3 TRANSACTION_ID 5 WAITING_KEY
+select * from information_schema.rocksdb_deadlock;
+
+echo Deadlock #2;
+--source include/simple_deadlock.inc
+connection default;
+--replace_column 1 DEADLOCK_ID 2 TIMESTAMP 3 TRANSACTION_ID 5 WAITING_KEY
+select * from information_schema.rocksdb_deadlock;
+set global rocksdb_max_latest_deadlocks = 10;
+
+echo Deadlock #3;
+--source include/simple_deadlock.inc
+connection default;
+--replace_column 1 DEADLOCK_ID 2 TIMESTAMP 3 TRANSACTION_ID 5 WAITING_KEY
+select * from information_schema.rocksdb_deadlock;
+set global rocksdb_max_latest_deadlocks = 1;
+--replace_column 1 DEADLOCK_ID 2 TIMESTAMP 3 TRANSACTION_ID 5 WAITING_KEY
+select * from information_schema.rocksdb_deadlock;
+
+connection con3;
+set rocksdb_deadlock_detect_depth = 2;
+
+echo Deadlock #4;
+connection con1;
+begin;
+select * from t where i=1 for update;
+
+connection con2;
+begin;
+select * from t where i=2 for update;
+
+connection con3;
+begin;
+select * from t where i=3 for update;
+
+connection con1;
+send select * from t where i=2 for update;
+
+connection con2;
+let $wait_condition = select count(*) = 1 from information_schema.rocksdb_trx
+where thread_id = $con1 and waiting_key != "";
+--source include/wait_condition.inc
+
+send select * from t where i=3 for update;
+
+connection con3;
+let $wait_condition = select count(*) = 1 from information_schema.rocksdb_trx
+where thread_id = $con2 and waiting_key != "";
+--source include/wait_condition.inc
+
+--error ER_LOCK_DEADLOCK
+select * from t where i=1 for update;
+rollback;
+
+connection con2;
+reap;
+rollback;
+
+connection con1;
+reap;
+rollback;
+
+connection default;
+set global rocksdb_max_latest_deadlocks = 5;
+--replace_column 1 DEADLOCK_ID 2 TIMESTAMP 3 TRANSACTION_ID 5 WAITING_KEY
+select * from information_schema.rocksdb_deadlock;
+
+echo Deadlock #5;
+connection con1;
+begin;
+select * from t where i=1 for update;
+
+connection con2;
+begin;
+select * from t where i=2 for update;
+
+connection con3;
+begin;
+select * from t where i=3 lock in share mode;
+
+connection con1;
+select * from t where i=100 for update;
+select * from t where i=101 for update;
+send select * from t where i=2 for update;
+
+connection con2;
+let $wait_condition = select count(*) = 1 from information_schema.rocksdb_trx
+where thread_id = $con1 and waiting_key != "";
+--source include/wait_condition.inc
+
+select * from t where i=3 lock in share mode;
+select * from t where i=200 for update;
+select * from t where i=201 for update;
+
+--error ER_LOCK_DEADLOCK
+select * from t where i=1 lock in share mode;
+rollback;
+
+connection con1;
+reap;
+rollback;
+
+connection con3;
+rollback;
+
+connection default;
+--replace_column 1 DEADLOCK_ID 2 TIMESTAMP 3 TRANSACTION_ID 5 WAITING_KEY
+select * from information_schema.rocksdb_deadlock;
+
+disconnect con1;
+disconnect con2;
+disconnect con3;
+
+set global rocksdb_lock_wait_timeout = @prior_lock_wait_timeout;
+set global rocksdb_deadlock_detect = @prior_deadlock_detect;
+drop table t;
+--replace_column 1 DEADLOCK_ID 2 TIMESTAMP 3 TRANSACTION_ID 5 WAITING_KEY 7 INDEX_NAME 8 TABLE_NAME
+select * from information_schema.rocksdb_deadlock;
+set global rocksdb_max_latest_deadlocks = 0;
+--echo # Clears deadlock buffer of any existent deadlocks.
+set global rocksdb_max_latest_deadlocks = @prior_max_latest_deadlocks;
+--replace_column 1 DEADLOCK_ID 2 TRANSACTION_ID 4 WAITING_KEY
+select * from information_schema.rocksdb_deadlock;
+--source include/wait_until_count_sessions.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/index.inc b/storage/rocksdb/mysql-test/rocksdb/t/index.inc
new file mode 100644
index 00000000000..8b000a255b3
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/index.inc
@@ -0,0 +1,155 @@
+#
+# Basic syntax related to indexes:
+# unique and non-unique keys,
+# single- and multi-column keys,
+# index option COMMENT.
+#
+# See other index* tests for operations
+# which are less likely to be supported
+#
+# PRIMARY KEY syntax is covered in index_primary test.
+# Index types BTREE|HASH -- in index_type_btree|hash tests.
+# SPATIAL -- in type_spatial_indexes test.
+# FULLTEXT -- in fulltext_search test.
+# KEY_BLOCK_SIZE -- in index_key_block_size test.
+#
+# Usage to call the test from another test:
+#
+# A calling test may define $index_type, in which case
+# USING clause will be added to the syntax.
+#
+
+################################################
+# TODO:
+# A part of the test is disabled because unique indexes
+# are not currently supported
+################################################
+
+
+let $using_index_type = ;
+if ($index_type)
+{
+ let $using_index_type = USING $index_type;
+}
+
+
+eval CREATE TABLE t1 (a INT,
+ b CHAR(8),
+ pk INT PRIMARY KEY,
+ KEY $using_index_type (a)
+) ENGINE=rocksdb;
+
+--replace_column 7 #
+SHOW KEYS IN t1;
+DROP TABLE t1;
+
+eval CREATE TABLE t1 (a INT,
+ b CHAR(8),
+ pk INT PRIMARY KEY,
+ KEY a_b $using_index_type (a,b) COMMENT 'a_b index'
+) ENGINE=rocksdb;
+
+--replace_column 7 #
+SHOW KEYS IN t1;
+DROP TABLE t1;
+
+eval CREATE TABLE t1 (a INT,
+ b CHAR(8),
+ pk INT PRIMARY KEY,
+ KEY $using_index_type (a),
+ KEY $using_index_type (b)
+) ENGINE=rocksdb;
+
+--replace_column 7 #
+SHOW KEYS IN t1;
+DROP TABLE t1;
+
+--disable_parsing
+
+eval CREATE TABLE t1 (a INT,
+ b CHAR(8),
+ pk INT PRIMARY KEY,
+ UNIQUE INDEX $using_index_type (a)
+) ENGINE=rocksdb;
+
+--replace_column 7 #
+SHOW KEYS IN t1;
+INSERT INTO t1 (a,b) VALUES (1,'a'),(2,'b');
+--error ER_DUP_ENTRY,ER_DUP_KEY
+INSERT INTO t1 (a,b) VALUES (1,'c');
+
+DROP TABLE t1;
+
+--source drop_table_sync.inc
+
+--enable_parsing
+
+#
+# ALTER TABLE
+#
+
+CREATE TABLE t1 (a INT, b CHAR(8), pk INT AUTO_INCREMENT PRIMARY KEY) ENGINE=rocksdb;
+INSERT INTO t1 (a,b) VALUES (100,'z');
+
+eval ALTER TABLE t1 ADD KEY (a) $using_index_type COMMENT 'simple index on a';
+--replace_column 7 #
+SHOW INDEX FROM t1;
+ALTER TABLE t1 DROP KEY a;
+DROP TABLE t1;
+
+--disable_parsing
+
+eval CREATE TABLE t1 (a INT,
+ b CHAR(8),
+ pk INT AUTO_INCREMENT PRIMARY KEY,
+ UNIQUE INDEX $using_index_type (a)
+) ENGINE=rocksdb;
+
+--replace_column 7 #
+SHOW KEYS IN t1;
+INSERT INTO t1 (a,b) VALUES (1,'a'),(2,'b');
+--error ER_DUP_ENTRY,ER_DUP_KEY
+INSERT INTO t1 (a,b) VALUES (1,'c');
+
+ALTER TABLE t1 DROP INDEX a;
+INSERT INTO t1 (a,b) VALUES (1,'c');
+--error ER_DUP_ENTRY
+eval ALTER TABLE t1 ADD UNIQUE INDEX a(a) $using_index_type;
+DROP TABLE t1;
+
+--enable_parsing
+
+#
+# Test index prefix length limits.
+#
+set global rocksdb_large_prefix=0;
+
+CREATE TABLE t1 (
+ a BLOB(1024),
+ KEY (a(767))
+) ENGINE=rocksdb;
+DROP TABLE t1;
+
+# Should display warning
+CREATE TABLE t1 (
+ a BLOB(1024),
+ KEY (a(768))
+) ENGINE=rocksdb;
+DROP TABLE t1;
+
+set global rocksdb_large_prefix=1;
+
+CREATE TABLE t1 (
+ a BLOB(4096),
+ KEY (a(3072))
+) ENGINE=rocksdb;
+DROP TABLE t1;
+
+# Should display warning
+CREATE TABLE t1 (
+ a BLOB(4096),
+ KEY (a(3073))
+) ENGINE=rocksdb;
+DROP TABLE t1;
+
+set global rocksdb_large_prefix=DEFAULT;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/index.test b/storage/rocksdb/mysql-test/rocksdb/t/index.test
new file mode 100644
index 00000000000..e0c6175b397
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/index.test
@@ -0,0 +1,44 @@
+--source include/have_rocksdb.inc
+
+#
+# Basic syntax related to indexes:
+# unique and non-unique keys,
+# single- and multi-column keys,
+# index option COMMENT.
+#
+# See other index* tests for operations
+# which are less likely to be supported
+#
+# PRIMARY KEY syntax is covered in index_primary test.
+# Index types BTREE|HASH -- in index_type_btree|hash tests.
+# SPATIAL -- in type_spatial_indexes test.
+# FULLTEXT -- in fulltext_search test.
+# KEY_BLOCK_SIZE -- in index_key_block_size test.
+#
+
+# (Default index type)
+
+--source index.inc
+
+--echo #
+--echo # Issue #376: MyRocks: ORDER BY optimizer is unable to use the index extension
+--echo #
+create table t0 (a int);
+insert into t0 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
+create table t1(a int);
+insert into t1 select A.a + B.a* 10 + C.a * 100 from t0 A, t0 B, t0 C;
+create table t2 (
+ pk int not null,
+ a int not null,
+ b int not null,
+ primary key(pk),
+ key(a)
+) engine=rocksdb;
+insert into t2 select A.a, FLOOR(A.a/10), A.a from t1 A;
+
+--echo # This must have type=range, index=a, and must not have 'Using filesort':
+--replace_column 9 #
+explain select * from t2 force index (a) where a=0 and pk>=3 order by pk;
+
+drop table t0,t1,t2;
+
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/index_file_map-master.opt b/storage/rocksdb/mysql-test/rocksdb/t/index_file_map-master.opt
new file mode 100644
index 00000000000..436edf2b40c
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/index_file_map-master.opt
@@ -0,0 +1 @@
+--rocksdb_table_stats_sampling_pct=100
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/index_file_map.test b/storage/rocksdb/mysql-test/rocksdb/t/index_file_map.test
new file mode 100644
index 00000000000..8b0e2339426
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/index_file_map.test
@@ -0,0 +1,54 @@
+--source include/have_rocksdb.inc
+
+#
+# Information Schema index file map
+#
+
+--disable_warnings
+DROP TABLE IF EXISTS t1;
+DROP TABLE IF EXISTS t2;
+--enable_warnings
+
+
+CREATE TABLE t1 (i INT PRIMARY KEY, j INT, INDEX(j)) ENGINE = ROCKSDB;
+CREATE TABLE t2 (k INT PRIMARY KEY, l INT REFERENCES t1.i) ENGINE = ROCKSDB;
+
+INSERT INTO t1 VALUES (1,2), (2,4), (3,6), (4,8), (5,10);
+INSERT INTO t2 VALUES (100,1), (200,2), (300,3), (400,4);
+
+COMMIT;
+
+# Flush memtable out to SST
+SET GLOBAL rocksdb_force_flush_memtable_now = 1;
+
+###############################################################################
+# Test that expected index_file_map data exists
+###############################################################################
+
+# Return the data for the primary key of t1
+--replace_column 1 # 2 # 3 SSTNAME 5 # 6 # 7 # 8 # 9 #
+SELECT * FROM INFORMATION_SCHEMA.ROCKSDB_INDEX_FILE_MAP
+WHERE INDEX_NUMBER =
+ (SELECT INDEX_NUMBER FROM INFORMATION_SCHEMA.ROCKSDB_DDL
+ WHERE TABLE_NAME = 't1' AND INDEX_NAME = "PRIMARY");
+
+# Return the data for the secondary index of t1
+--replace_column 1 # 2 # 3 SSTNAME 5 # 6 # 7 # 8 # 9 #
+SELECT * FROM INFORMATION_SCHEMA.ROCKSDB_INDEX_FILE_MAP
+WHERE INDEX_NUMBER =
+ (SELECT INDEX_NUMBER FROM INFORMATION_SCHEMA.ROCKSDB_DDL
+ WHERE TABLE_NAME = 't1' AND INDEX_NAME = "j");
+
+# Return the data for the primary index of t2
+--replace_column 1 # 2 # 3 SSTNAME 5 # 6 # 7 # 8 # 9 #
+SELECT * FROM INFORMATION_SCHEMA.ROCKSDB_INDEX_FILE_MAP
+WHERE INDEX_NUMBER =
+ (SELECT INDEX_NUMBER FROM INFORMATION_SCHEMA.ROCKSDB_DDL
+ WHERE TABLE_NAME = 't2' AND INDEX_NAME = "PRIMARY");
+
+# The number of sst files should be 1 or more
+select count(*) > 0 from information_schema.rocksdb_sst_props;
+
+# cleanup
+DROP TABLE t1;
+DROP TABLE t2;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/index_key_block_size.test b/storage/rocksdb/mysql-test/rocksdb/t/index_key_block_size.test
new file mode 100644
index 00000000000..f156aec0021
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/index_key_block_size.test
@@ -0,0 +1,70 @@
+--source include/have_rocksdb.inc
+
+#
+# KEY_BLOCK_SIZE index option.
+#
+
+--disable_warnings
+DROP TABLE IF EXISTS t1;
+--enable_warnings
+
+CREATE TABLE t1 (a INT,
+ b CHAR(8),
+ pk INT PRIMARY KEY,
+ KEY (a) KEY_BLOCK_SIZE=8
+) ENGINE=rocksdb;
+
+--replace_column 7 #
+SHOW KEYS IN t1;
+DROP TABLE t1;
+
+CREATE TABLE t1 (a INT,
+ b CHAR(8),
+ pk INT PRIMARY KEY,
+ KEY ind1(b ASC) KEY_BLOCK_SIZE=0
+) ENGINE=rocksdb;
+
+--replace_column 7 #
+SHOW INDEX IN t1;
+DROP TABLE t1;
+
+CREATE TABLE t1 (a INT,
+ b CHAR(8),
+ PRIMARY KEY ind2(b(1) DESC) KEY_BLOCK_SIZE=32768 COMMENT 'big key_block_size value'
+) ENGINE=rocksdb;
+
+--replace_column 7 #
+SHOW INDEX IN t1;
+
+DROP TABLE t1;
+
+CREATE TABLE t1 (a INT,
+ b CHAR(8),
+ pk INT AUTO_INCREMENT PRIMARY KEY,
+ KEY a_b(a,b) KEY_BLOCK_SIZE=8192
+) ENGINE=rocksdb;
+
+--replace_column 7 #
+SHOW INDEX IN t1;
+
+DROP TABLE t1;
+
+--source drop_table_sync.inc
+
+#
+# ALTER TABLE
+#
+
+CREATE TABLE t1 (a INT,
+ b CHAR(8),
+ PRIMARY KEY (b)
+) ENGINE=rocksdb;
+
+INSERT INTO t1 (a,b) VALUES (100,'z');
+
+ALTER TABLE t1 ADD KEY(a) KEY_BLOCK_SIZE 8192;
+--replace_column 7 #
+SHOW INDEX FROM t1;
+
+DROP TABLE t1;
+
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/index_merge_rocksdb-master.opt b/storage/rocksdb/mysql-test/rocksdb/t/index_merge_rocksdb-master.opt
new file mode 100644
index 00000000000..c07b063f07c
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/index_merge_rocksdb-master.opt
@@ -0,0 +1 @@
+--rocksdb_strict_collation_check=off --binlog_format=row --log-bin
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/index_merge_rocksdb.test b/storage/rocksdb/mysql-test/rocksdb/t/index_merge_rocksdb.test
new file mode 100644
index 00000000000..887b4dd6a65
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/index_merge_rocksdb.test
@@ -0,0 +1,110 @@
+--source include/have_rocksdb.inc
+
+#
+# index_merge_rocksdb test copied over from index_merge_ror.inc
+#
+# Triggers issue # https://github.com/facebook/mysql-5.6/issues/604
+
+CREATE TABLE t1
+(
+ /* fields/keys for row retrieval tests */
+ key1 INT,
+ key2 INT,
+ key3 INT,
+ key4 INT,
+
+ /* make rows much bigger then keys */
+ filler1 CHAR(200),
+
+ KEY(key1),
+ KEY(key2)
+) ENGINE=ROCKSDB;
+
+# fill table
+CREATE TABLE t0 AS SELECT * FROM t1;
+--disable_query_log
+--echo # Printing of many insert into t0 values (....) disabled.
+let $cnt=100;
+while ($cnt)
+{
+ eval INSERT INTO t0 VALUES (0, 0, 0, 0, 'data1');
+ dec $cnt;
+}
+
+--echo # Printing of many insert into t1 select .... from t0 disabled.
+let $1=4;
+while ($1)
+{
+ let $2=4;
+ while ($2)
+ {
+ let $3=4;
+ while ($3)
+ {
+ eval INSERT INTO t1 SELECT key1, key2, key3, key4, filler1 FROM t0;
+ dec $3;
+ }
+ dec $2;
+ }
+ dec $1;
+}
+
+--echo # Printing of many insert into t1 (...) values (....) disabled.
+# Row retrieval tests
+# -1 is used for values 'out of any range we are using'
+# insert enough rows for index intersection to be used for (key1,key2)
+INSERT INTO t1 (key1, key2, key3, key4, filler1) VALUES (100, 100, 100, 100,'key1-key2-key3-key4');
+let $cnt=400;
+while ($cnt)
+{
+ eval INSERT INTO t1 (key1, key2, key3, key4, filler1) VALUES (100, -1, 100, -1,'key1-key3');
+ dec $cnt;
+}
+let $cnt=400;
+while ($cnt)
+{
+ eval INSERT INTO t1 (key1, key2, key3, key4, filler1) VALUES (-1, 100, -1, 100,'key2-key4');
+ dec $cnt;
+}
+--enable_query_log
+
+SELECT COUNT(*) FROM t1;
+
+# flush the table first as statistics is calculated a bit differently for memtable and SST files
+SET GLOBAL rocksdb_force_flush_memtable_now = 1;
+
+-- disable_query_log
+-- disable_result_log
+ANALYZE TABLE t1;
+-- enable_result_log
+-- enable_query_log
+
+--replace_column 9 #
+EXPLAIN UPDATE t1 SET filler1='to be deleted' WHERE key1=100 AND key2=100;
+UPDATE t1 SET filler1='to be deleted' WHERE key1=100 and key2=100;
+
+DROP TABLE t0, t1;
+
+# Issue624 - MyRocks executes index_merge query plan incorrectly
+create table t1 (key1 int, key2 int, key3 int, key (key1), key (key2), key(key3)) engine=rocksdb;
+insert into t1 values (1, 100, 100), (1, 200, 200), (1, 300, 300);
+--disable_query_log
+let $i = 1;
+while ($i <= 1000) {
+ let $insert = INSERT INTO t1 VALUES(1000,1000,1000);
+ inc $i;
+ eval $insert;
+}
+--enable_query_log
+set global rocksdb_force_flush_memtable_now=1;
+analyze table t1;
+
+--replace_column 9 #
+explain select * from t1 where key1 = 1;
+--replace_column 9 #
+explain select key1,key2 from t1 where key1 = 1 or key2 = 1;
+select * from t1 where key1 = 1;
+select key1,key2 from t1 where key1 = 1 or key2 = 1;
+
+drop table t1;
+
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/index_merge_rocksdb2-master.opt b/storage/rocksdb/mysql-test/rocksdb/t/index_merge_rocksdb2-master.opt
new file mode 100644
index 00000000000..7681c42eeb3
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/index_merge_rocksdb2-master.opt
@@ -0,0 +1 @@
+--rocksdb_strict_collation_check=off --binlog_format=row --log-bin --rocksdb_records_in_range=2
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/index_merge_rocksdb2.test b/storage/rocksdb/mysql-test/rocksdb/t/index_merge_rocksdb2.test
new file mode 100644
index 00000000000..2306558ff41
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/index_merge_rocksdb2.test
@@ -0,0 +1,72 @@
+# Skiping this test from Valgrind execution as per Bug-14627884
+--source include/not_valgrind.inc
+# Adding big test option for this test.
+--source include/big_test.inc
+
+# t/index_merge_innodb.test
+#
+# Index merge tests (the test is called 'index_merge_rocksdb2' because
+# 'index_merge_rocksdb' has already existed before copying 'index_merge_innodb')
+#
+# Last update:
+# 2006-08-07 ML test refactored (MySQL 5.1)
+# Main code of several index_merge tests
+# -> include/index_merge*.inc
+# wrapper t/index_merge_innodb.test sources now several
+# include/index_merge*.inc files
+#
+
+--source include/have_rocksdb.inc
+let $engine_type= RocksDB;
+# skipping because too unstable in MyRocks
+let $skip_ror_EXPLAIN_for_MyRocks = 1;
+let $random_rows_in_EXPLAIN = 1;
+let $sorted_result = 1;
+# According to Oracle: "InnoDB's estimate for the index cardinality
+# depends on a pseudo random number generator (it picks up random
+# pages to sample). After an optimization that was made in r2625 two
+# EXPLAINs started returning a different number of rows (3 instead of
+# 4)", so:
+let $index_merge_random_rows_in_EXPLAIN = 1;
+# RocksDB does not support Merge tables (affects include/index_merge1.inc)
+let $merge_table_support= 0;
+
+set global rocksdb_force_flush_memtable_now=1;
+--source include/index_merge1.inc
+set global rocksdb_force_flush_memtable_now=1;
+--source include/index_merge_ror.inc
+set global rocksdb_force_flush_memtable_now=1;
+--source include/index_merge2.inc
+
+set global rocksdb_force_flush_memtable_now=1;
+--source include/index_merge_2sweeps.inc
+set global rocksdb_force_flush_memtable_now=1;
+--source include/index_merge_ror_cpk.inc
+
+set global rocksdb_force_flush_memtable_now=1;
+--echo #
+--echo # Bug#11747423 32254: INDEX MERGE USED UNNECESSARILY
+--echo #
+CREATE TABLE t1 (
+ id INT NOT NULL PRIMARY KEY,
+ id2 INT NOT NULL,
+ id3 INT NOT NULL,
+ KEY (id2),
+ KEY (id3),
+ KEY covering_index (id2,id3)
+) ENGINE=RocksDB;
+
+INSERT INTO t1 VALUES (0, 0, 0), (1, 1, 1), (2, 2, 2), (3, 3, 3), (4, 4, 4), (5, 5, 5), (6, 6, 6), (7, 7, 7);
+INSERT INTO t1 SELECT id + 8, id2 + 8, id3 +8 FROM t1;
+INSERT INTO t1 SELECT id + 16, 7, 0 FROM t1;
+
+-- disable_query_log
+-- disable_result_log
+set global rocksdb_force_flush_memtable_now=1;
+analyze table t1;
+-- enable_result_log
+-- enable_query_log
+
+EXPLAIN SELECT SQL_NO_CACHE count(*) FROM t1 WHERE id2=7 AND id3=0;
+
+DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/index_primary.test b/storage/rocksdb/mysql-test/rocksdb/t/index_primary.test
new file mode 100644
index 00000000000..5dcfbaa8d3b
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/index_primary.test
@@ -0,0 +1,96 @@
+--source include/have_rocksdb.inc
+
+#
+# Basic syntax related to primary keys
+#
+
+--disable_warnings
+DROP TABLE IF EXISTS t1;
+--enable_warnings
+
+CREATE TABLE t1 (a INT PRIMARY KEY,
+ b CHAR(8)
+) ENGINE=rocksdb;
+
+--replace_column 7 #
+SHOW KEYS IN t1;
+
+INSERT INTO t1 (a,b) VALUES (1,'a'),(2,'b');
+--error ER_DUP_ENTRY
+INSERT INTO t1 (a,b) VALUES (1,'c');
+
+DROP TABLE t1;
+
+--error ER_MULTIPLE_PRI_KEY
+CREATE TABLE t1 (a INT PRIMARY KEY,
+ b CHAR(8) PRIMARY KEY
+) ENGINE=rocksdb;
+
+CREATE TABLE t1 (a INT,
+ b CHAR(8),
+ PRIMARY KEY (a,b)
+) ENGINE=rocksdb;
+
+--replace_column 7 #
+SHOW INDEX IN t1;
+INSERT INTO t1 (a,b) VALUES (1,'a'),(1,'b'),(2,'a'),(2,'b');
+--error ER_DUP_ENTRY
+INSERT INTO t1 (a,b) VALUES (1,'b');
+ DROP TABLE t1;
+
+# KEY in a column definition means PK!
+
+CREATE TABLE t1 (a INT KEY,
+ b CHAR(8),
+ KEY (b)
+) ENGINE=rocksdb;
+
+--replace_column 7 #
+SHOW INDEX IN t1;
+DROP TABLE t1;
+
+CREATE TABLE t1 (a INT,
+ b CHAR(8) PRIMARY KEY
+) ENGINE=rocksdb;
+
+--replace_column 7 #
+SHOW INDEX IN t1;
+
+--error ER_MULTIPLE_PRI_KEY
+ALTER TABLE t1 ADD CONSTRAINT PRIMARY KEY pk (a);
+--replace_column 7 #
+SHOW KEYS IN t1;
+DROP TABLE t1;
+
+#
+# Test index prefix length limits.
+#
+set global rocksdb_large_prefix=0;
+
+CREATE TABLE t1 (
+ a BLOB(1024),
+ PRIMARY KEY (a(767))
+) ENGINE=rocksdb;
+DROP TABLE t1;
+
+--error ER_TOO_LONG_KEY
+CREATE TABLE t1 (
+ a BLOB(1024),
+ PRIMARY KEY (a(768))
+) ENGINE=rocksdb;
+
+set global rocksdb_large_prefix=1;
+
+CREATE TABLE t1 (
+ a BLOB(4096),
+ PRIMARY KEY (a(3072))
+) ENGINE=rocksdb;
+DROP TABLE t1;
+
+--error ER_TOO_LONG_KEY
+CREATE TABLE t1 (
+ a BLOB(4096),
+ PRIMARY KEY (a(3073))
+) ENGINE=rocksdb;
+
+set global rocksdb_large_prefix=DEFAULT;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/index_type_btree.test b/storage/rocksdb/mysql-test/rocksdb/t/index_type_btree.test
new file mode 100644
index 00000000000..4adc5b55329
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/index_type_btree.test
@@ -0,0 +1,12 @@
+--source include/have_rocksdb.inc
+
+#
+# Index type BTREE
+#
+
+let $index_type = BTREE;
+
+--source index.inc
+
+let $index_type =;
+
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/index_type_hash.test b/storage/rocksdb/mysql-test/rocksdb/t/index_type_hash.test
new file mode 100644
index 00000000000..f3dc9cf5f10
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/index_type_hash.test
@@ -0,0 +1,12 @@
+--source include/have_rocksdb.inc
+
+#
+# Index type HASH
+#
+
+let $index_type = HASH;
+
+--source index.inc
+
+let $index_type =;
+
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/information_schema-master.opt b/storage/rocksdb/mysql-test/rocksdb/t/information_schema-master.opt
new file mode 100644
index 00000000000..86379847638
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/information_schema-master.opt
@@ -0,0 +1 @@
+--binlog_format=row --log_slave_updates
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/information_schema.test b/storage/rocksdb/mysql-test/rocksdb/t/information_schema.test
new file mode 100644
index 00000000000..09998b9ae5f
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/information_schema.test
@@ -0,0 +1,89 @@
+--source include/have_rocksdb.inc
+--source include/have_log_bin.inc
+
+--source include/restart_mysqld.inc
+
+--disable_warnings
+DROP TABLE IF EXISTS t1;
+DROP TABLE IF EXISTS t2;
+DROP TABLE IF EXISTS t3;
+--enable_warnings
+
+# MariaDB: the following is for handling the case where the tests
+# is started on a totally empty datadir, where no MyRocks table has
+# ever been created). In that case, there is no MAX_INDEX_ID.
+# Create/drop a table so that we do have MAX_INDEX_ID.
+SET GLOBAL ROCKSDB_PAUSE_BACKGROUND_WORK=1;
+create table t1 (a int) engine=rocksdb;
+drop table t1;
+
+--let $max_index_id = query_get_value(SELECT * from INFORMATION_SCHEMA.ROCKSDB_GLOBAL_INFO where type = 'MAX_INDEX_ID', VALUE, 1)
+--replace_result $max_index_id max_index_id
+select * from INFORMATION_SCHEMA.ROCKSDB_GLOBAL_INFO where type<>'DDL_DROP_INDEX_ONGOING';
+select count(*) from INFORMATION_SCHEMA.ROCKSDB_GLOBAL_INFO where type<>'DDL_DROP_INDEX_ONGOING';
+SET GLOBAL ROCKSDB_PAUSE_BACKGROUND_WORK=0;
+
+select VALUE into @keysIn from INFORMATION_SCHEMA.ROCKSDB_COMPACTION_STATS where CF_NAME = 'default' and LEVEL = 'Sum' and TYPE = 'KeyIn';
+
+CREATE TABLE t1 (i1 INT, i2 INT, PRIMARY KEY (i1)) ENGINE = ROCKSDB;
+INSERT INTO t1 VALUES (1, 1), (2, 2), (3, 3);
+
+set global rocksdb_force_flush_memtable_now = true;
+# No binlog coordinates in MariaDB: --let $MASTER_UUID = query_get_value(SELECT @@SERVER_UUID, @@SERVER_UUID, 1)
+--let $max_index_id = query_get_value(SELECT * from INFORMATION_SCHEMA.ROCKSDB_GLOBAL_INFO where type = 'MAX_INDEX_ID', VALUE, 1)
+# No binlog coordinates in MariaDB: --replace_result $MASTER_UUID uuid $max_index_id max_index_id
+--replace_result $max_index_id max_index_id
+select * from INFORMATION_SCHEMA.ROCKSDB_GLOBAL_INFO;
+select count(*) from INFORMATION_SCHEMA.ROCKSDB_GLOBAL_INFO;
+
+set global rocksdb_force_flush_memtable_now = true;
+set global rocksdb_compact_cf='default';
+select case when VALUE-@keysIn >= 3 then 'true' else 'false' end from INFORMATION_SCHEMA.ROCKSDB_COMPACTION_STATS where CF_NAME = 'default' and LEVEL = 'Sum' and TYPE = 'KeyIn';
+
+CREATE INDEX tindex1 on t1 (i1);
+--let $start_max_index_id = query_get_value(SELECT * from INFORMATION_SCHEMA.ROCKSDB_GLOBAL_INFO where type = 'MAX_INDEX_ID', VALUE, 1)
+
+CREATE INDEX tindex2 on t1 (i2);
+--let $end_max_index_id = query_get_value(SELECT * from INFORMATION_SCHEMA.ROCKSDB_GLOBAL_INFO where type = 'MAX_INDEX_ID', VALUE, 1)
+
+if ($end_max_index_id <= $start_max_index_id) {
+ echo Max index ID did not increase;
+}
+
+select * from INFORMATION_SCHEMA.ROCKSDB_GLOBAL_INFO where TYPE = 'CF_FLAGS';
+
+CREATE TABLE t2 (
+ a int,
+ b int,
+ c int,
+ d int,
+ PRIMARY KEY (a) COMMENT "cf_a",
+ KEY (b) COMMENT "cf_b",
+ KEY (c) COMMENT "cf_c",
+ KEY (d) COMMENT "rev:cf_d") ENGINE=ROCKSDB;
+
+select * from INFORMATION_SCHEMA.ROCKSDB_GLOBAL_INFO where TYPE = 'CF_FLAGS';
+
+CREATE TABLE t3 (a INT, PRIMARY KEY (a)) ENGINE=ROCKSDB;
+insert into t3 (a) values (1), (2), (3);
+SET @ORIG_ROCKSDB_PAUSE_BACKGROUND_WORK = @@GLOBAL.ROCKSDB_PAUSE_BACKGROUND_WORK;
+--let $t3_index_id = query_get_value(SELECT * FROM INFORMATION_SCHEMA.ROCKSDB_DDL WHERE TABLE_NAME = 't3', INDEX_NUMBER, 1)
+--let $t3_cf_id = query_get_value(SELECT * FROM INFORMATION_SCHEMA.ROCKSDB_DDL WHERE TABLE_NAME = 't3', COLUMN_FAMILY, 1)
+SHOW GLOBAL VARIABLES LIKE 'ROCKSDB_PAUSE_BACKGROUND_WORK';
+SET GLOBAL ROCKSDB_PAUSE_BACKGROUND_WORK=1;
+SHOW GLOBAL VARIABLES LIKE 'ROCKSDB_PAUSE_BACKGROUND_WORK';
+SET GLOBAL ROCKSDB_PAUSE_BACKGROUND_WORK=1;
+SHOW GLOBAL VARIABLES LIKE 'ROCKSDB_PAUSE_BACKGROUND_WORK';
+DROP TABLE t3;
+--let $result = query_get_value("SELECT * FROM INFORMATION_SCHEMA.ROCKSDB_GLOBAL_INFO WHERE TYPE = 'DDL_DROP_INDEX_ONGOING' AND NAME LIKE 'cf_id:$t3_cf_id,index_id:$t3_index_id'", NAME, 1)
+--echo $result
+SET GLOBAL ROCKSDB_PAUSE_BACKGROUND_WORK=0;
+SHOW GLOBAL VARIABLES LIKE 'ROCKSDB_PAUSE_BACKGROUND_WORK';
+--echo next line shouldn't cause assertion to fail
+SET GLOBAL ROCKSDB_PAUSE_BACKGROUND_WORK=0;
+SHOW GLOBAL VARIABLES LIKE 'ROCKSDB_PAUSE_BACKGROUND_WORK';
+SET GLOBAL ROCKSDB_PAUSE_BACKGROUND_WORK = @ORIG_ROCKSDB_PAUSE_BACKGROUND_WORK;
+
+DROP TABLE t1;
+DROP TABLE t2;
+
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/init_stats_procedure.inc b/storage/rocksdb/mysql-test/rocksdb/t/init_stats_procedure.inc
new file mode 100644
index 00000000000..dda253bc346
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/init_stats_procedure.inc
@@ -0,0 +1,40 @@
+# This inc script creates two procedures -- save_read_stats() and
+# get_read_stats(). get_read_stats() prints differential rocksdb_rows_read,
+# rocksdb_rows_updated, and rocksdb_rows_deleted values since calling
+# save_read_stats().
+
+delimiter //;
+create procedure save_read_stats()
+begin
+ /*select rows_requested into @rq from information_schema.table_statistics
+ where table_schema=database() and table_name='t1';*/
+ select rows_read into @rr_is from information_schema.table_statistics
+ where table_schema=database() and table_name='t1';
+ select variable_value into @rr from information_schema.global_status
+ where variable_name='rocksdb_rows_read';
+ select variable_value into @ru from information_schema.global_status
+ where variable_name='rocksdb_rows_updated';
+ select variable_value into @rd from information_schema.global_status
+ where variable_name='rocksdb_rows_deleted';
+end//
+
+create procedure get_read_stats()
+begin
+ /*select rows_requested - @rq as rows_requested from
+ information_schema.table_statistics
+ where table_schema=database() and table_name='t1';*/
+ select rows_read - @rr_is as rows_read_userstat from
+ information_schema.table_statistics
+ where table_schema=database() and table_name='t1';
+ select variable_value - @rr as rows_read from
+ information_schema.global_status
+ where variable_name='rocksdb_rows_read';
+ select variable_value - @ru as rows_updated from
+ information_schema.global_status
+ where variable_name='rocksdb_rows_updated';
+ select variable_value - @rd as rows_deleted from
+ information_schema.global_status
+ where variable_name='rocksdb_rows_deleted';
+end//
+delimiter ;//
+
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/innodb_i_s_tables_disabled-master.opt b/storage/rocksdb/mysql-test/rocksdb/t/innodb_i_s_tables_disabled-master.opt
new file mode 100644
index 00000000000..b3565b5fa82
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/innodb_i_s_tables_disabled-master.opt
@@ -0,0 +1,30 @@
+--loose-enable-innodb_trx
+--loose-enable-innodb_file_status
+--loose-enable-innodb_locks
+--loose-enable-innodb_lock_waits
+--loose-enable-innodb_cmp
+--loose-enable-innodb_cmp_reset
+--loose-enable-innodb_cmp_per_index
+--loose-enable-innodb_cmp_per_index_reset
+--loose-enable-innodb_cmpmem
+--loose-enable-innodb_cmpmem_reset
+--loose-enable-innodb_metrics
+--loose-enable-innodb_ft_default_stopword
+--loose-enable-innodb_ft_deleted
+--loose-enable-innodb_ft_being_deleted
+--loose-enable-innodb_ft_index_cache
+--loose-enable-innodb_ft_index_table
+--loose-enable-innodb_ft_config
+--loose-enable-innodb_buffer_pool_stats
+--loose-enable-innodb_buffer_page
+--loose-enable-innodb_buffer_page_lru
+--loose-enable-innodb_sys_tables
+--loose-enable-innodb_sys_tablestats
+--loose-enable-innodb_sys_indexes
+--loose-enable-innodb_sys_columns
+--loose-enable-innodb_sys_fields
+--loose-enable-innodb_sys_foreign
+--loose-enable-innodb_sys_foreign_cols
+--loose-enable-innodb_sys_tablespaces
+--loose-enable-innodb_sys_datafiles
+--loose-enable-innodb_sys_docstore_fields
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/innodb_i_s_tables_disabled.test b/storage/rocksdb/mysql-test/rocksdb/t/innodb_i_s_tables_disabled.test
new file mode 100644
index 00000000000..4ff48e13089
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/innodb_i_s_tables_disabled.test
@@ -0,0 +1,43 @@
+--source include/have_rocksdb.inc
+
+# Make sure that the InnoDb information schema tables are disabled when InnoDB
+# is turned off and attempting to access them doesn't crash.
+
+# Disable warnings, as the table names in warnings appear in lower or uppercase
+# depending on platform
+
+--disable_warnings
+
+SELECT * FROM INFORMATION_SCHEMA.INNODB_TRX;
+#Not in MariaDB: SELECT * FROM INFORMATION_SCHEMA.INNODB_FILE_STATUS;
+
+SELECT * FROM INFORMATION_SCHEMA.INNODB_LOCKS;
+SELECT * FROM INFORMATION_SCHEMA.INNODB_LOCK_WAITS;
+SELECT * FROM INFORMATION_SCHEMA.INNODB_CMP;
+SELECT * FROM INFORMATION_SCHEMA.INNODB_CMP_RESET;
+SELECT * FROM INFORMATION_SCHEMA.INNODB_CMP_PER_INDEX;
+SELECT * FROM INFORMATION_SCHEMA.INNODB_CMP_PER_INDEX_RESET;
+SELECT * FROM INFORMATION_SCHEMA.INNODB_CMPMEM;
+SELECT * FROM INFORMATION_SCHEMA.INNODB_CMPMEM_RESET;
+SELECT * FROM INFORMATION_SCHEMA.INNODB_METRICS;
+SELECT * FROM INFORMATION_SCHEMA.INNODB_FT_DEFAULT_STOPWORD;
+SELECT * FROM INFORMATION_SCHEMA.INNODB_FT_DELETED;
+SELECT * FROM INFORMATION_SCHEMA.INNODB_FT_BEING_DELETED;
+SELECT * FROM INFORMATION_SCHEMA.INNODB_FT_INDEX_CACHE;
+SELECT * FROM INFORMATION_SCHEMA.INNODB_FT_INDEX_TABLE;
+SELECT * FROM INFORMATION_SCHEMA.INNODB_FT_CONFIG;
+SELECT * FROM INFORMATION_SCHEMA.INNODB_BUFFER_POOL_STATS;
+SELECT * FROM INFORMATION_SCHEMA.INNODB_BUFFER_PAGE;
+SELECT * FROM INFORMATION_SCHEMA.INNODB_BUFFER_PAGE_LRU;
+SELECT * FROM INFORMATION_SCHEMA.INNODB_SYS_TABLES;
+SELECT * FROM INFORMATION_SCHEMA.INNODB_SYS_TABLESTATS;
+SELECT * FROM INFORMATION_SCHEMA.INNODB_SYS_INDEXES;
+SELECT * FROM INFORMATION_SCHEMA.INNODB_SYS_COLUMNS;
+SELECT * FROM INFORMATION_SCHEMA.INNODB_SYS_FIELDS;
+SELECT * FROM INFORMATION_SCHEMA.INNODB_SYS_FOREIGN;
+SELECT * FROM INFORMATION_SCHEMA.INNODB_SYS_FOREIGN_COLS;
+SELECT * FROM INFORMATION_SCHEMA.INNODB_SYS_TABLESPACES;
+SELECT * FROM INFORMATION_SCHEMA.INNODB_SYS_DATAFILES;
+#Not in MariaDB: SELECT * FROM INFORMATION_SCHEMA.INNODB_SYS_DOCSTORE_FIELDS;
+
+--enable_warnings
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/insert.test b/storage/rocksdb/mysql-test/rocksdb/t/insert.test
new file mode 100644
index 00000000000..aebd3f76e87
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/insert.test
@@ -0,0 +1,99 @@
+--source include/have_rocksdb.inc
+
+#
+# Basic INSERT statements
+#
+
+--disable_warnings
+DROP TABLE IF EXISTS t1, t2;
+--enable_warnings
+
+CREATE TABLE t1 (a INT, b CHAR(8), pk INT AUTO_INCREMENT PRIMARY KEY) ENGINE=rocksdb;
+
+# INSERT [INTO] .. VALUES|VALUE ..
+
+INSERT INTO t1 VALUES (100,'foobar',100),(1,'a',101),(2,'b',103),(3,'c',104),(4,'d',105),(5,'e',106);
+--sorted_result
+SELECT a,b FROM t1;
+
+INSERT t1 VALUE (10,'foo',107),(11,'abc',108);
+--sorted_result
+SELECT a,b FROM t1;
+
+INSERT INTO t1 (b,a) VALUES ('test',0);
+--sorted_result
+SELECT a,b FROM t1;
+
+INSERT INTO t1 VALUES (DEFAULT,DEFAULT,NULL);
+--sorted_result
+SELECT a,b FROM t1;
+
+INSERT t1 (a) VALUE (10),(20);
+--sorted_result
+SELECT a,b FROM t1;
+
+# INSERT [INTO] .. SET
+
+INSERT INTO t1 SET a = 11, b = 'f';
+--sorted_result
+SELECT a,b FROM t1;
+
+INSERT t1 SET b = DEFAULT;
+--sorted_result
+SELECT a,b FROM t1;
+
+
+# INSERT .. SELECT
+
+CREATE TABLE t2 (a INT, b CHAR(8), pk INT AUTO_INCREMENT PRIMARY KEY) ENGINE=rocksdb;
+
+INSERT INTO t2 SELECT a,b,pk FROM t1;
+INSERT INTO t1 (a) SELECT a FROM t2 WHERE b = 'foo';
+--sorted_result
+SELECT a,b FROM t1;
+
+INSERT t1 (a,b) SELECT a,b FROM t1;
+--sorted_result
+SELECT a,b FROM t1;
+
+DROP TABLE t1, t2;
+
+#
+# Transactional INSERT
+#
+
+CREATE TABLE t1 (a INT, b CHAR(8), pk INT AUTO_INCREMENT PRIMARY KEY) ENGINE=rocksdb;
+
+BEGIN;
+INSERT INTO t1 (a,b) VALUES (1,'a'),(2,'b'),(3,'c'),(4,'d'),(5,'e'),(100,'foo');
+INSERT t1 (a,b) VALUE (10,'foo'),(11,'abc');
+COMMIT;
+--sorted_result
+SELECT a,b FROM t1;
+
+BEGIN;
+INSERT INTO t1 (b,a) VALUES ('test',0);
+SAVEPOINT spt1;
+INSERT INTO t1 (a,b) VALUES (DEFAULT,DEFAULT);
+RELEASE SAVEPOINT spt1;
+INSERT INTO t1 (a,b) VALUES (DEFAULT,DEFAULT);
+ROLLBACK;
+--sorted_result
+SELECT a,b FROM t1;
+
+BEGIN;
+INSERT t1 (a) VALUE (10),(20);
+SAVEPOINT spt1;
+INSERT INTO t1 SET a = 11, b = 'f';
+INSERT t1 SET b = DEFAULT;
+--error ER_ROLLBACK_TO_SAVEPOINT
+ROLLBACK TO SAVEPOINT spt1;
+INSERT INTO t1 (b,a) VALUES ('test1',10);
+--error ER_ROLLBACK_ONLY
+COMMIT;
+--sorted_result
+SELECT a,b FROM t1;
+
+DROP TABLE t1;
+
+
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/insert_optimized_config.test b/storage/rocksdb/mysql-test/rocksdb/t/insert_optimized_config.test
new file mode 100644
index 00000000000..46ea7f0eb0a
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/insert_optimized_config.test
@@ -0,0 +1,51 @@
+--source include/have_rocksdb.inc
+--source include/have_write_committed.inc
+
+--disable_warnings
+DROP TABLE IF EXISTS t1;
+--enable_warnings
+
+# reload with load optimized config
+let $restart_file= $MYSQLTEST_VARDIR/tmp/mysqld.1.expect;
+--exec echo "wait" > $restart_file
+--shutdown_server 10
+--source include/wait_until_disconnected.inc
+-- exec echo "restart:--rocksdb_write_disable_wal=1 --rocksdb_flush_log_at_trx_commit=0 --rocksdb_default_cf_options=write_buffer_size=16k;target_file_size_base=16k;level0_file_num_compaction_trigger=4;level0_slowdown_writes_trigger=256;level0_stop_writes_trigger=256;max_write_buffer_number=16;compression_per_level=kNoCompression;memtable=vector:1024 --rocksdb_override_cf_options=__system__={memtable=skip_list:16} --rocksdb_compaction_sequential_deletes=0 --rocksdb_compaction_sequential_deletes_window=0 --rocksdb_allow_concurrent_memtable_write=0" > $MYSQLTEST_VARDIR/tmp/mysqld.1.expect
+-- enable_reconnect
+-- source include/wait_until_connected_again.inc
+
+create table t1(
+ id bigint not null primary key,
+ i1 bigint, #unique
+ i2 bigint, #repeating
+ c1 varchar(20), #unique
+ c2 varchar(20), #repeating
+ index t1_2(i1)
+) engine=rocksdb;
+
+--disable_query_log
+set rocksdb_bulk_load=1;
+let $i=0;
+while ($i<50000)
+{
+ inc $i;
+ eval insert t1(id, i1, i2, c1, c2) values($i, $i, $i div 10, $i, $i div 10);
+}
+set rocksdb_bulk_load=0;
+--enable_query_log
+
+select count(*), sum(id), sum(i1), sum(i2) from t1;
+
+# reload without load optimized config
+let $restart_file= $MYSQLTEST_VARDIR/tmp/mysqld.1.expect;
+--exec echo "wait" > $restart_file
+--shutdown_server 10
+--source include/wait_until_disconnected.inc
+-- exec echo "restart:--rocksdb_write_disable_wal=0 --rocksdb_default_cf_options=write_buffer_size=64k;target_file_size_base=64k;max_bytes_for_level_base=1m;compression_per_level=kNoCompression;" > $MYSQLTEST_VARDIR/tmp/mysqld.1.expect
+-- enable_reconnect
+-- source include/wait_until_connected_again.inc
+
+select count(*), sum(id), sum(i1), sum(i2) from t1;
+
+drop table t1;
+
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/insert_with_keys.test b/storage/rocksdb/mysql-test/rocksdb/t/insert_with_keys.test
new file mode 100644
index 00000000000..1332fe143d0
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/insert_with_keys.test
@@ -0,0 +1,169 @@
+--source include/have_rocksdb.inc
+
+#
+# INSERT statements for tables with keys
+#
+
+--disable_warnings
+DROP TABLE IF EXISTS t1;
+--enable_warnings
+
+CREATE TABLE t1 (a INT, b CHAR(8), pk INT AUTO_INCREMENT PRIMARY KEY, KEY(b)) ENGINE=rocksdb;
+INSERT INTO t1 (a,b) VALUES (1,'a'),(2,'b'),(3,'c'),(4,'d'),(5,'e');
+INSERT INTO t1 (a,b) VALUES (100,'a'), (6,'f');
+INSERT INTO t1 (a,b) VALUES (30,'m'),(29,'n');
+INSERT INTO t1 (a,b) VALUES (1,'a'),(12345,'z');
+INSERT INTO t1 (a,b) VALUES (3,'a'),(0,'');
+--sorted_result
+SELECT a,b FROM t1;
+DROP TABLE t1;
+
+CREATE TABLE t1 (a INT, b CHAR(8), pk INT AUTO_INCREMENT PRIMARY KEY, UNIQUE INDEX(a)) ENGINE=rocksdb;
+INSERT INTO t1 (a,b) VALUES (1,'a'),(2,'b'),(3,'c'),(4,'d'),(5,'e');
+INSERT INTO t1 (a,b) VALUES (100,'a'), (6,'f');
+INSERT INTO t1 (a,b) VALUES (30,'m'),(29,'n');
+--error ER_DUP_ENTRY
+INSERT INTO t1 (a,b) VALUES (1,'a'),(12345,'z');
+--error ER_DUP_ENTRY
+INSERT INTO t1 (a,b) VALUES (3,'a'),(0,'');
+INSERT INTO t1 (a,b) VALUES (0,'');
+--sorted_result
+SELECT a,b FROM t1;
+
+INSERT IGNORE INTO t1 (a,b) VALUES (1,'a'),(12345,'z');
+INSERT INTO t1 (a,b) VALUES (3,'a'),(4,'d') ON DUPLICATE KEY UPDATE a = a+10;
+
+--sorted_result
+SELECT a,b FROM t1;
+
+DROP TABLE t1;
+
+CREATE TABLE t1 (a INT, b CHAR(8), pk INT AUTO_INCREMENT PRIMARY KEY, UNIQUE INDEX(a,b)) ENGINE=rocksdb;
+INSERT INTO t1 (a,b) VALUES (1,'a'),(2,'b'),(3,'c'),(4,'d'),(5,'e');
+INSERT INTO t1 (a,b) VALUES (100,'a'), (6,'f');
+INSERT INTO t1 (a,b) VALUES (30,'m'),(29,'n');
+INSERT INTO t1 (a,b) VALUES (100,'b'), (2,'c');
+--error ER_DUP_ENTRY
+INSERT INTO t1 (a,b) VALUES (1,'a'),(12345,'z');
+--sorted_result
+SELECT a,b FROM t1;
+
+INSERT IGNORE INTO t1 (a,b) VALUES (1,'a'),(12345,'z');
+INSERT INTO t1 (a,b) VALUES (1,'a'),(12345,'z') ON DUPLICATE KEY UPDATE a = a+VALUES(a);
+--sorted_result
+SELECT a,b FROM t1;
+
+--error ER_DUP_ENTRY
+INSERT INTO t1 (a,b) VALUES (101,'x'),(101,'x');
+
+--sorted_result
+SELECT a,b FROM t1;
+
+DROP TABLE t1;
+
+
+CREATE TABLE t1 (a INT PRIMARY KEY, b CHAR(8)) ENGINE=rocksdb;
+INSERT INTO t1 (a,b) VALUES (1,'a'),(2,'b'),(3,'c'),(4,'d'),(5,'e');
+INSERT INTO t1 (a,b) VALUES (100,'a'), (6,'f');
+INSERT INTO t1 (a,b) VALUES (30,'m'),(29,'n');
+--error ER_DUP_ENTRY
+INSERT INTO t1 (a,b) VALUES (1,'a'),(12345,'z');
+--error ER_DUP_ENTRY
+INSERT INTO t1 (a,b) VALUES (3,'a'),(0,'');
+INSERT INTO t1 (a,b) VALUES (0,'');
+--sorted_result
+SELECT a,b FROM t1;
+
+INSERT IGNORE INTO t1 (a,b) VALUES (1,'a'),(12345,'z');
+INSERT INTO t1 (a,b) VALUES (1,'a'),(12345,'z') ON DUPLICATE KEY UPDATE b = CONCAT(b,b);
+--sorted_result
+SELECT a,b FROM t1;
+
+DROP TABLE t1;
+
+#
+# INSERT on DUPLICATE KEY UPDATE with multiple keys
+#
+--echo
+--echo INSERT on DUPLICATE KEY UPDATE with multiple keys
+--echo
+CREATE TABLE t1 (a INT PRIMARY KEY, b VARCHAR(255)) ENGINE=rocksdb;
+INSERT INTO t1 (a,b) VALUES (1,'a');
+INSERT INTO t1 (a,b) VALUES (1,'a'), (1,'a'), (1,'a'), (1,'a'), (1,'a'), (1,'a'), (1,'a'), (1,'a'), (1,'a'), (1,'a') ON DUPLICATE KEY UPDATE b = CONCAT(b, 'a');
+INSERT INTO t1 (a,b) VALUES (1,'a'), (2,'a'), (3,'a'), (4,'a'), (5,'a'), (6,'a'), (7,'a'), (8,'a'), (9,'a'), (10,'a') ON DUPLICATE KEY UPDATE b = CONCAT(b, 'a');
+INSERT INTO t1 (a,b) VALUES (1,'a'), (2,'a'), (3,'a'), (4,'a'), (5,'a'), (6,'a'), (7,'a'), (8,'a'), (9,'a'), (10,'a') ON DUPLICATE KEY UPDATE b = CONCAT(b, 'a');
+INSERT INTO t1 (a,b) VALUES (1,'a'), (2,'a'), (1,'a'), (2,'a'), (1,'a'), (2,'a'), (1,'a'), (2,'a'), (1,'a'), (2,'a') ON DUPLICATE KEY UPDATE b = CONCAT(b, 'a');
+
+--sorted_result
+SELECT a,b FROM t1;
+
+DROP TABLE t1;
+
+#
+# INSERT on DUPLICATE KEY UPDATE with secondary key
+#
+--echo
+--echo INSERT on DUPLICATE KEY UPDATE with secondary key
+--echo
+CREATE TABLE t1 (a INT, b CHAR(8), c INT DEFAULT 0, pk INT AUTO_INCREMENT PRIMARY KEY, UNIQUE INDEX(a,b)) ENGINE=rocksdb;
+INSERT INTO t1 (a,b) VALUES (1,'a');
+INSERT INTO t1 (a,b) VALUES (1,'a'), (1,'a'), (1,'a'), (1,'a'), (1,'a'), (1,'a'), (1,'a'), (1,'a'), (1,'a'), (1,'a') ON DUPLICATE KEY UPDATE c = c + 1;
+INSERT INTO t1 (a,b) VALUES (1,'a'), (2,'b'), (3,'c'), (4,'d'), (5,'e'), (6,'f'), (7,'g'), (8,'h'), (9,'i'), (10,'j') ON DUPLICATE KEY UPDATE c = c + 1;
+INSERT INTO t1 (a,b) VALUES (1,'a'), (2,'b'), (3,'c'), (4,'d'), (5,'e'), (6,'f'), (7,'g'), (8,'h'), (9,'i'), (10,'j') ON DUPLICATE KEY UPDATE c = c + 1;
+INSERT INTO t1 (a,b) VALUES (1,'a'), (2,'b'), (1,'a'), (2,'b'), (1,'a'), (2,'b'), (1,'a'), (2,'b'), (1,'a'), (2,'b') ON DUPLICATE KEY UPDATE c = c + 1;
+INSERT INTO t1 (a,b) VALUES (1,'a'), (2,'c'), (1,'a'), (2,'c'), (1,'a'), (2,'c'), (1,'a'), (2,'c'), (1,'a'), (2,'c') ON DUPLICATE KEY UPDATE c = c + 1;
+
+--sorted_result
+SELECT a,b,c,pk FROM t1;
+
+DROP TABLE t1;
+
+--echo
+--echo Disable caching and see if it still functions properly
+--echo
+SELECT @@rocksdb_enable_insert_with_update_caching;
+SET GLOBAL rocksdb_enable_insert_with_update_caching=0;
+SELECT @@rocksdb_enable_insert_with_update_caching;
+
+#
+# INSERT on DUPLICATE KEY UPDATE with multiple keys
+#
+--echo
+--echo INSERT on DUPLICATE KEY UPDATE with multiple keys
+--echo
+CREATE TABLE t1 (a INT PRIMARY KEY, b VARCHAR(255)) ENGINE=rocksdb;
+INSERT INTO t1 (a,b) VALUES (1,'a');
+INSERT INTO t1 (a,b) VALUES (1,'a'), (1,'a'), (1,'a'), (1,'a'), (1,'a'), (1,'a'), (1,'a'), (1,'a'), (1,'a'), (1,'a') ON DUPLICATE KEY UPDATE b = CONCAT(b, 'a');
+INSERT INTO t1 (a,b) VALUES (1,'a'), (2,'a'), (3,'a'), (4,'a'), (5,'a'), (6,'a'), (7,'a'), (8,'a'), (9,'a'), (10,'a') ON DUPLICATE KEY UPDATE b = CONCAT(b, 'a');
+INSERT INTO t1 (a,b) VALUES (1,'a'), (2,'a'), (3,'a'), (4,'a'), (5,'a'), (6,'a'), (7,'a'), (8,'a'), (9,'a'), (10,'a') ON DUPLICATE KEY UPDATE b = CONCAT(b, 'a');
+INSERT INTO t1 (a,b) VALUES (1,'a'), (2,'a'), (1,'a'), (2,'a'), (1,'a'), (2,'a'), (1,'a'), (2,'a'), (1,'a'), (2,'a') ON DUPLICATE KEY UPDATE b = CONCAT(b, 'a');
+
+--sorted_result
+SELECT a,b FROM t1;
+
+DROP TABLE t1;
+
+#
+# INSERT on DUPLICATE KEY UPDATE with secondary key
+#
+--echo
+--echo INSERT on DUPLICATE KEY UPDATE with secondary key
+--echo
+CREATE TABLE t1 (a INT, b CHAR(8), c INT DEFAULT 0, pk INT AUTO_INCREMENT PRIMARY KEY, UNIQUE INDEX(a,b)) ENGINE=rocksdb;
+INSERT INTO t1 (a,b) VALUES (1,'a');
+INSERT INTO t1 (a,b) VALUES (1,'a'), (1,'a'), (1,'a'), (1,'a'), (1,'a'), (1,'a'), (1,'a'), (1,'a'), (1,'a'), (1,'a') ON DUPLICATE KEY UPDATE c = c + 1;
+INSERT INTO t1 (a,b) VALUES (1,'a'), (2,'b'), (3,'c'), (4,'d'), (5,'e'), (6,'f'), (7,'g'), (8,'h'), (9,'i'), (10,'j') ON DUPLICATE KEY UPDATE c = c + 1;
+INSERT INTO t1 (a,b) VALUES (1,'a'), (2,'b'), (3,'c'), (4,'d'), (5,'e'), (6,'f'), (7,'g'), (8,'h'), (9,'i'), (10,'j') ON DUPLICATE KEY UPDATE c = c + 1;
+INSERT INTO t1 (a,b) VALUES (1,'a'), (2,'b'), (1,'a'), (2,'b'), (1,'a'), (2,'b'), (1,'a'), (2,'b'), (1,'a'), (2,'b') ON DUPLICATE KEY UPDATE c = c + 1;
+INSERT INTO t1 (a,b) VALUES (1,'a'), (2,'c'), (1,'a'), (2,'c'), (1,'a'), (2,'c'), (1,'a'), (2,'c'), (1,'a'), (2,'c') ON DUPLICATE KEY UPDATE c = c + 1;
+
+--sorted_result
+SELECT a,b,c,pk FROM t1;
+
+DROP TABLE t1;
+
+--echo
+--echo Cleanup
+--echo
+SET GLOBAL rocksdb_enable_insert_with_update_caching=1;
+
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/issue100.test b/storage/rocksdb/mysql-test/rocksdb/t/issue100.test
new file mode 100644
index 00000000000..b0b3eb7a8fd
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/issue100.test
@@ -0,0 +1,23 @@
+--source include/have_rocksdb.inc
+
+create table t1 (
+ id int,
+ value int,
+ primary key (id)
+) engine=rocksdb;
+
+insert into t1 values(1,1),(2,2);
+set autocommit=0;
+
+begin;
+insert into t1 values (50,50);
+select * from t1;
+
+update t1 set id=id+100;
+
+select * from t1;
+
+rollback;
+set autocommit=1;
+
+drop table t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/issue100_delete-master.opt b/storage/rocksdb/mysql-test/rocksdb/t/issue100_delete-master.opt
new file mode 100644
index 00000000000..436edf2b40c
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/issue100_delete-master.opt
@@ -0,0 +1 @@
+--rocksdb_table_stats_sampling_pct=100
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/issue100_delete.test b/storage/rocksdb/mysql-test/rocksdb/t/issue100_delete.test
new file mode 100644
index 00000000000..743bf7dd7a1
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/issue100_delete.test
@@ -0,0 +1,19 @@
+--source include/have_rocksdb.inc
+
+create table ten(a int primary key);
+insert into ten values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
+create table one_k(a int primary key);
+insert into one_k select A.a + B.a* 10 + C.a * 100 from ten A, ten B, ten C;
+
+create table t100(pk int primary key, a int, b int, key(a));
+insert into t100 select a,a,a from test.one_k;
+
+set global rocksdb_force_flush_memtable_now=1;
+select num_rows, entry_deletes, entry_singledeletes from information_schema.rocksdb_index_file_map where index_number = (select max(index_number) from information_schema.rocksdb_index_file_map) order by entry_deletes, entry_singledeletes;
+
+update t100 set a=a+1;
+set global rocksdb_force_flush_memtable_now=1;
+select num_rows, entry_deletes, entry_singledeletes from information_schema.rocksdb_index_file_map where index_number = (select max(index_number) from information_schema.rocksdb_index_file_map) order by entry_deletes, entry_singledeletes;
+
+drop table ten, t100, one_k;
+
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/issue111.test b/storage/rocksdb/mysql-test/rocksdb/t/issue111.test
new file mode 100644
index 00000000000..671ea4708d6
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/issue111.test
@@ -0,0 +1,38 @@
+--source include/have_rocksdb.inc
+
+connect (con2,localhost,root,,);
+connection default;
+
+create table t1 (
+ pk int not null primary key,
+ col1 int not null,
+ col2 int not null,
+ key(col1)
+) engine=rocksdb;
+
+create table ten(a int primary key);
+insert into ten values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
+
+create table one_k(a int primary key);
+insert into one_k select A.a + B.a* 10 + C.a * 100 from ten A, ten B, ten C;
+
+insert into t1 select a,a,a from one_k;
+
+--echo # Start the transaction, get the snapshot
+begin;
+select * from t1 where col1<10;
+
+--echo # Connect with another connection and make a conflicting change
+connection con2;
+
+begin;
+update t1 set col2=123456 where pk=0;
+commit;
+
+connection default;
+
+--error ER_LOCK_DEADLOCK
+update t1 set col2=col2+1 where col1 < 10 limit 5;
+
+disconnect con2;
+drop table t1, ten, one_k;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/issue243_transactionStatus.test b/storage/rocksdb/mysql-test/rocksdb/t/issue243_transactionStatus.test
new file mode 100644
index 00000000000..0997bde3f49
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/issue243_transactionStatus.test
@@ -0,0 +1,80 @@
+--source include/have_rocksdb.inc
+
+--disable_warnings
+DROP TABLE IF EXISTS t1;
+--enable_warnings
+
+CREATE TABLE t1 (
+ id INT,
+ val1 INT,
+ val2 INT,
+ PRIMARY KEY (id)
+) ENGINE=rocksdb;
+
+#
+# DB operations without Transaction , every count should be 0
+# so there's no count related output
+#
+INSERT INTO t1 VALUES(1,1,1),(2,1,2);
+SELECT * FROM t1;
+UPDATE t1 SET val1=2 WHERE id=2;
+SELECT * FROM t1;
+--replace_regex /[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}/TIMESTAMP/ /(ACTIVE) [0-9]+ /\1 NUM / /(thread id) [0-9]+/\1 TID/ /0x[0-9a-f]+/PTR/ /(query id) [0-9]+/\1 QID/ /(root) [a-z ]+/\1 ACTION/
+SHOW ENGINE rocksdb TRANSACTION STATUS;
+
+#
+# DB operations with Tansaction, insert_count, update_count, delete_count
+# and total write_count should be printed
+# Cases: rollback, commit transaction
+#
+SET AUTOCOMMIT=0;
+START TRANSACTION;
+INSERT INTO t1 VALUES(20,1,1),(30,30,30);
+SELECT * FROM t1;
+UPDATE t1 SET val1=20, val2=20 WHERE id=20;
+SELECT * FROM t1;
+DELETE FROM t1 WHERE id=30;
+--replace_regex /[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}/TIMESTAMP/ /(ACTIVE) [0-9]+ /\1 NUM / /(thread id) [0-9]+/\1 TID/ /0x[0-9a-f]+/PTR/ /(query id) [0-9]+/\1 QID/ /(root) [a-z ]+/\1 ACTION/
+SHOW ENGINE rocksdb TRANSACTION STATUS;
+
+ROLLBACK;
+--replace_regex /[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}/TIMESTAMP/ /(ACTIVE) [0-9]+ /\1 NUM / /(thread id) [0-9]+/\1 TID/ /0x[0-9a-f]+/PTR/ /(query id) [0-9]+/\1 QID/ /(root) [a-z ]+/\1 ACTION/
+SHOW ENGINE rocksdb TRANSACTION STATUS;
+
+START TRANSACTION;
+INSERT INTO t1 VALUES(40,40,40);
+--replace_regex /[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}/TIMESTAMP/ /(ACTIVE) [0-9]+ /\1 NUM / /(thread id) [0-9]+/\1 TID/ /0x[0-9a-f]+/PTR/ /(query id) [0-9]+/\1 QID/ /(root) [a-z ]+/\1 ACTION/
+SHOW ENGINE rocksdb TRANSACTION STATUS;
+COMMIT;
+--replace_regex /[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}/TIMESTAMP/ /(ACTIVE) [0-9]+ /\1 NUM / /(thread id) [0-9]+/\1 TID/ /0x[0-9a-f]+/PTR/ /(query id) [0-9]+/\1 QID/ /(root) [a-z ]+/\1 ACTION/
+SHOW ENGINE rocksdb TRANSACTION STATUS;
+
+SET AUTOCOMMIT=1;
+DROP TABLE t1;
+
+#
+# Secondary Key Tests
+#
+--disable_warnings
+DROP TABLE IF EXISTS t2;
+--enable_warnings
+
+CREATE TABLE t2 (
+ id1 INT,
+ id2 INT,
+ value INT,
+ PRIMARY KEY (id1),
+ KEY (id2)
+) ENGINE=rocksdb;
+
+SET AUTOCOMMIT=0;
+START TRANSACTION;
+INSERT INTO t2 VALUES(1,2,0),(10,20,30);
+UPDATE t2 SET value=3 WHERE id2=2;
+DELETE FROM t2 WHERE id1=10;
+
+--replace_regex /[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}/TIMESTAMP/ /(ACTIVE) [0-9]+ /\1 NUM / /(thread id) [0-9]+/\1 TID/ /0x[0-9a-f]+/PTR/ /(query id) [0-9]+/\1 QID/ /(root) [a-z ]+/\1 ACTION/
+SHOW ENGINE rocksdb TRANSACTION STATUS;
+ROLLBACK;
+SET AUTOCOMMIT=1;
+DROP TABLE t2;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/issue255.test b/storage/rocksdb/mysql-test/rocksdb/t/issue255.test
new file mode 100644
index 00000000000..370dece0c6c
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/issue255.test
@@ -0,0 +1,51 @@
+--source include/have_rocksdb.inc
+
+CREATE TABLE t1 (pk BIGINT NOT NULL PRIMARY KEY AUTO_INCREMENT);
+
+INSERT INTO t1 VALUES (5);
+--replace_column 3 # 5 # 6 # 7 # 8 # 9 # 10 #
+SHOW TABLE STATUS LIKE 't1';
+
+INSERT INTO t1 VALUES ('538647864786478647864');
+--replace_column 3 # 5 # 6 # 7 # 8 # 9 # 10 #
+SELECT * FROM t1;
+SHOW TABLE STATUS LIKE 't1';
+
+--error ER_DUP_ENTRY
+INSERT INTO t1 VALUES ();
+SELECT * FROM t1;
+--replace_column 3 # 5 # 6 # 7 # 8 # 9 # 10 #
+SHOW TABLE STATUS LIKE 't1';
+
+--error ER_DUP_ENTRY
+INSERT INTO t1 VALUES ();
+SELECT * FROM t1;
+--replace_column 3 # 5 # 6 # 7 # 8 # 9 # 10 #
+SHOW TABLE STATUS LIKE 't1';
+
+DROP TABLE t1;
+
+CREATE TABLE t1 (pk TINYINT NOT NULL PRIMARY KEY AUTO_INCREMENT);
+
+INSERT INTO t1 VALUES (5);
+--replace_column 3 # 5 # 6 # 7 # 8 # 9 # 10 #
+SHOW TABLE STATUS LIKE 't1';
+
+INSERT INTO t1 VALUES (1000);
+SELECT * FROM t1;
+--replace_column 3 # 5 # 6 # 7 # 8 # 9 # 10 #
+SHOW TABLE STATUS LIKE 't1';
+
+--error ER_DUP_ENTRY
+INSERT INTO t1 VALUES ();
+SELECT * FROM t1;
+--replace_column 3 # 5 # 6 # 7 # 8 # 9 # 10 #
+SHOW TABLE STATUS LIKE 't1';
+
+--error ER_DUP_ENTRY
+INSERT INTO t1 VALUES ();
+SELECT * FROM t1;
+--replace_column 3 # 5 # 6 # 7 # 8 # 9 # 10 #
+SHOW TABLE STATUS LIKE 't1';
+
+DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/issue290.test b/storage/rocksdb/mysql-test/rocksdb/t/issue290.test
new file mode 100644
index 00000000000..5ea8799c627
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/issue290.test
@@ -0,0 +1,40 @@
+--source include/have_rocksdb.inc
+
+CREATE TABLE `linktable` (
+ `id1` bigint(20) unsigned NOT NULL DEFAULT '0',
+ `id1_type` int(10) unsigned NOT NULL DEFAULT '0',
+ `id2` bigint(20) unsigned NOT NULL DEFAULT '0',
+ `id2_type` int(10) unsigned NOT NULL DEFAULT '0',
+ `link_type` bigint(20) unsigned NOT NULL DEFAULT '0',
+ `visibility` tinyint(3) NOT NULL DEFAULT '0',
+ `data` varchar(255) NOT NULL DEFAULT '',
+ `time` bigint(20) unsigned NOT NULL DEFAULT '0',
+ `version` int(11) unsigned NOT NULL DEFAULT '0',
+ PRIMARY KEY (link_type, `id1`,`id2`) COMMENT 'cf_link_pk',
+ KEY `id1_type` (`id1`,`link_type`,`visibility`,`time`,`id2`,`version`,`data`) COMMENT 'rev:cf_link_id1_type'
+) ENGINE=RocksDB DEFAULT COLLATE=latin1_bin;
+--disable_query_log
+let $i = 1;
+while ($i <= 10000) {
+ let $insert = INSERT INTO linktable (id1, link_type, id2) values (1, 1, $i);
+ inc $i;
+ eval $insert;
+}
+--enable_query_log
+set global rocksdb_force_flush_memtable_now=1;
+
+insert into linktable (id1, link_type, id2) values (2, 1, 1);
+insert into linktable (id1, link_type, id2) values (2, 1, 2);
+insert into linktable (id1, link_type, id2) values (2, 1, 3);
+insert into linktable (id1, link_type, id2) values (2, 1, 4);
+insert into linktable (id1, link_type, id2) values (2, 1, 5);
+insert into linktable (id1, link_type, id2) values (2, 1, 6);
+insert into linktable (id1, link_type, id2) values (2, 1, 7);
+insert into linktable (id1, link_type, id2) values (2, 1, 8);
+insert into linktable (id1, link_type, id2) values (2, 1, 9);
+insert into linktable (id1, link_type, id2) values (2, 1, 10);
+
+--replace_column 9 #
+explain select id1, id2, link_type, data from linktable force index(primary) where id1=2 and link_type=1 and (id2=1 or id2=2 or id2=3 or id2=4 or id2=5);
+
+drop table linktable;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/issue314.test b/storage/rocksdb/mysql-test/rocksdb/t/issue314.test
new file mode 100644
index 00000000000..822969a380f
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/issue314.test
@@ -0,0 +1,16 @@
+--source include/have_rocksdb.inc
+
+--disable_warnings
+drop table if exists t1;
+--enable_warnings
+
+SET SESSION TRANSACTION ISOLATION LEVEL SERIALIZABLE;
+CREATE TABLE t1(a int);
+SET TRANSACTION ISOLATION LEVEL READ COMMITTED;
+INSERT INTO t1 VALUES(1);
+--error ER_ISOLATION_MODE_NOT_SUPPORTED
+select * from t1;
+SET SESSION TRANSACTION ISOLATION LEVEL READ COMMITTED;
+select * from t1;
+drop table t1;
+
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/issue495.test b/storage/rocksdb/mysql-test/rocksdb/t/issue495.test
new file mode 100644
index 00000000000..5dcc7c19ba9
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/issue495.test
@@ -0,0 +1,34 @@
+--source include/have_rocksdb.inc
+--source include/have_partition.inc
+--disable_warnings
+drop table if exists t;
+--enable_warnings
+
+create table t (
+ a int,
+ b int,
+ c varchar(12249) collate latin1_bin,
+ d datetime,
+ e int,
+ f int,
+ g blob,
+ h int,
+ i int,
+ key (b,e),
+ key (h,b)
+) engine=rocksdb
+partition by linear hash (i) partitions 8 ;
+
+insert into t values (1,1,'a',now(),1,1,'a',1,1);
+insert into t values (1,1,'a',now(),1,1,'a',1,1);
+insert into t values (1,1,'a',now(),1,1,'a',1,1);
+insert into t values (1,1,'a',now(),1,1,'a',1,1);
+insert into t values (1,1,'a',now(),1,1,'a',1,1);
+insert into t values (1,1,'a',now(),1,1,'a',1,1);
+insert into t values (1,1,'a',now(),1,1,'a',1,1);
+insert into t values (1,1,'a',now(),1,1,'a',1,1);
+select i from t group by h;
+select i from t group by h;
+
+drop table t;
+
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/issue884.test b/storage/rocksdb/mysql-test/rocksdb/t/issue884.test
new file mode 100644
index 00000000000..6bf3e5177f2
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/issue884.test
@@ -0,0 +1,43 @@
+--source include/have_rocksdb.inc
+
+create table test (
+ a bigint(20) not null,
+ b bigint(20) not null,
+ c varchar(500) not null,
+ d bigint(20) not null,
+ e bigint(20) not null,
+ f varchar(500) not null,
+ g varchar(500) not null,
+ h varchar(500) not null,
+ i varchar(1000) not null,
+ j varchar(16384) not null,
+ k varchar(200) not null,
+ l varchar(500) not null,
+ m varchar(100) not null,
+ n bigint(20) not null,
+ primary key (a, b, m, c(100), l(100), d, e, f(100), g(100), h(100), n),
+ key n (n),
+ key d (d, a)
+) engine = rocksdb default charset = latin1;
+
+--disable_query_log
+let $i = 1000;
+while ($i) {
+ --eval insert into test values (10, 1, "i", $i / 100, $i, "f", "g", "h", "i", "j", "k", "l", "m", $i);
+ --eval insert into test values (10, 2, "i", $i / 100, $i, "f", "g", "h", "i", "j", "k", "l", "m", $i);
+ --eval insert into test values (10, 3, "i", $i / 100, $i, "f", "g", "h", "i", "j", "k", "l", "m", $i);
+ --eval insert into test values (10, 4, "i", $i / 100, $i, "f", "g", "h", "i", "j", "k", "l", "m", $i);
+ --eval insert into test values (10, 5, "i", $i / 100, $i, "f", "g", "h", "i", "j", "k", "l", "m", $i);
+ dec $i;
+}
+set global rocksdb_force_flush_memtable_now = true;
+analyze table test;
+--enable_query_log
+
+--replace_column 9 #
+explain
+select * from test where d = 10 and a = 10 and b = 2;
+select * from test where d = 10 and a = 10 and b = 2;
+
+
+drop table test;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/issue896.test b/storage/rocksdb/mysql-test/rocksdb/t/issue896.test
new file mode 100644
index 00000000000..ba57fb99832
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/issue896.test
@@ -0,0 +1,17 @@
+# issue 896 : Segmentation fault in myrocks::Rdb_string_reader::read
+--source include/have_rocksdb.inc
+
+CREATE TABLE `t1` (
+`a` bigint(20) NOT NULL,
+`b` varchar(10) CHARACTER SET utf8 COLLATE utf8_bin NOT NULL,
+`u` bigint(20) unsigned NOT NULL,
+`d` bigint(20) DEFAULT NULL,
+PRIMARY KEY (`a`,`b`),
+KEY `d` (`d`)
+) ENGINE=ROCKSDB DEFAULT CHARSET=utf8 COLLATE=utf8_bin COMMENT='ttl_duration=1000;ttl_col=u';
+INSERT INTO t1 VALUES (100, 'aaabbb', UNIX_TIMESTAMP(), 200);
+--replace_column 9 #
+EXPLAIN SELECT COUNT(*) FROM t1 FORCE INDEX(d);
+--echo # segfault here without the fix
+SELECT COUNT(*) FROM t1 FORCE INDEX(d);
+DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/issue900.test b/storage/rocksdb/mysql-test/rocksdb/t/issue900.test
new file mode 100644
index 00000000000..c420d418c20
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/issue900.test
@@ -0,0 +1,13 @@
+--source include/have_rocksdb.inc
+
+# Issue 900 : Segmentation fault in myrocks::Rdb_string_reader::read
+CREATE TABLE t1(c1 VARCHAR(1) CHARACTER SET 'utf8' COLLATE 'utf8_bin', c2 YEAR, c3 REAL(1,0) UNSIGNED, PRIMARY KEY(c1)) ENGINE=RocksDB;
+INSERT INTO t1 VALUES(0,'0','0');
+INSERT INTO t1 VALUES('{0}','0','0');
+INSERT INTO t1 VALUES('1','0','1');
+# Would segfault here
+--error ER_DUP_ENTRY
+ALTER TABLE t1 ADD INDEX(c3), ADD UNIQUE (c3);
+--error ER_KEY_DOES_NOT_EXITS
+SELECT c3 FROM t1 FORCE INDEX(c3) ORDER BY c3;
+DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/iterator_bounds-master.opt b/storage/rocksdb/mysql-test/rocksdb/t/iterator_bounds-master.opt
new file mode 100644
index 00000000000..d77439930fd
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/iterator_bounds-master.opt
@@ -0,0 +1,2 @@
+--rocksdb_default_cf_options=write_buffer_size=256k;block_based_table_factory={filter_policy=bloomfilter:10:false;whole_key_filtering=0;}
+--rocksdb_override_cf_options=rev:bf5_1={prefix_extractor=capped:12};
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/iterator_bounds.test b/storage/rocksdb/mysql-test/rocksdb/t/iterator_bounds.test
new file mode 100644
index 00000000000..2cced2a1d7a
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/iterator_bounds.test
@@ -0,0 +1,29 @@
+#
+# Issue #878: Descending scans from reverse column families return no results
+# due to iterator bounds
+#
+
+create table t (i int primary key) engine=rocksdb;
+
+let $cond=1;
+while ($cond)
+{
+ --disable_query_log
+ truncate table t;
+ --enable_query_log
+ let $cond=`select RIGHT(HEX(index_number), 2) != "FD" from information_schema.rocksdb_ddl where table_name = 't'`;
+}
+
+# Index id is now at FD. Create a table with primary and secondary key, so
+# that the secondary key index id ends in 0xFF.
+
+drop table t;
+create table t (i int primary key, j int, key(j) comment 'rev:bf5_2') engine=rocksdb;
+select RIGHT(HEX(index_number), 2) from information_schema.rocksdb_ddl where table_name = 't';
+
+insert into t values (1, 1);
+
+select j from t order by j asc;
+select j from t order by j desc;
+
+drop table t;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/kill.test b/storage/rocksdb/mysql-test/rocksdb/t/kill.test
new file mode 100644
index 00000000000..a2809d969d5
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/kill.test
@@ -0,0 +1,9 @@
+--source include/have_binlog_format_row.inc
+--source include/have_rocksdb.inc
+
+CREATE TABLE t1 (pk INT PRIMARY KEY) ENGINE=RocksDB;
+LOCK TABLE t1 WRITE;
+SET SESSION TRANSACTION ISOLATION LEVEL SERIALIZABLE;
+--error ER_ISOLATION_MODE_NOT_SUPPORTED
+FLUSH TABLES;
+DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/level_read_committed.test b/storage/rocksdb/mysql-test/rocksdb/t/level_read_committed.test
new file mode 100644
index 00000000000..bedf87a8efa
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/level_read_committed.test
@@ -0,0 +1,6 @@
+--source include/have_rocksdb.inc
+
+let $trx_isolation = READ COMMITTED;
+
+--source transaction_isolation.inc
+
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/level_read_uncommitted.opt b/storage/rocksdb/mysql-test/rocksdb/t/level_read_uncommitted.opt
new file mode 100644
index 00000000000..418e4c3f056
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/level_read_uncommitted.opt
@@ -0,0 +1 @@
+--rocksdb_default_cf_options=disable_auto_compactions=true \ No newline at end of file
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/level_read_uncommitted.test b/storage/rocksdb/mysql-test/rocksdb/t/level_read_uncommitted.test
new file mode 100644
index 00000000000..8ee3af60bf3
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/level_read_uncommitted.test
@@ -0,0 +1,6 @@
+--source include/have_rocksdb.inc
+
+let $trx_isolation = READ UNCOMMITTED;
+--source transaction_isolation.inc
+--source consistent_snapshot.inc
+
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/level_repeatable_read.test b/storage/rocksdb/mysql-test/rocksdb/t/level_repeatable_read.test
new file mode 100644
index 00000000000..cf29073f69e
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/level_repeatable_read.test
@@ -0,0 +1,5 @@
+--source include/have_rocksdb.inc
+
+let $trx_isolation = REPEATABLE READ;
+--source transaction_isolation.inc
+
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/level_serializable.test b/storage/rocksdb/mysql-test/rocksdb/t/level_serializable.test
new file mode 100644
index 00000000000..9b5db0e8998
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/level_serializable.test
@@ -0,0 +1,5 @@
+--source include/have_rocksdb.inc
+
+let $trx_isolation = SERIALIZABLE;
+--source transaction_isolation.inc
+
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/loaddata.inc b/storage/rocksdb/mysql-test/rocksdb/t/loaddata.inc
new file mode 100644
index 00000000000..1d83598a282
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/loaddata.inc
@@ -0,0 +1,117 @@
+--source include/have_rocksdb.inc
+
+#
+# Basic LOAD DATA statements
+#
+
+--disable_warnings
+DROP TABLE IF EXISTS t1;
+--enable_warnings
+
+CREATE TABLE t1 (a INT, b CHAR(8), pk INT AUTO_INCREMENT PRIMARY KEY) ENGINE=rocksdb;
+
+let $datadir = `SELECT @@datadir`;
+
+--write_file $datadir/se_loaddata.dat
+1,foo,
+2,bar,
+3,,
+4,abc,
+EOF
+
+--replace_result $datadir <DATADIR>
+eval
+LOAD DATA INFILE '$datadir/se_loaddata.dat' INTO TABLE t1
+ FIELDS TERMINATED BY ',' (a,b);
+--sorted_result
+SELECT a,b FROM t1;
+
+--replace_result $datadir <DATADIR>
+eval
+LOAD DATA LOCAL INFILE '$datadir/se_loaddata.dat' INTO TABLE t1
+ CHARACTER SET utf8 COLUMNS TERMINATED BY ','
+ ESCAPED BY '/' (a,b);
+--sorted_result
+SELECT a,b FROM t1;
+
+--remove_file $datadir/se_loaddata.dat
+--write_file $datadir/se_loaddata.dat
+5;YYY;
+102;'zzz';
+0;'test';
+EOF
+
+--replace_result $datadir <DATADIR>
+eval
+LOAD DATA LOCAL INFILE '$datadir/se_loaddata.dat' INTO TABLE t1
+ FIELDS TERMINATED BY ';'
+ (a) SET b='loaded';
+
+--sorted_result
+SELECT a,b FROM t1;
+
+--remove_file $datadir/se_loaddata.dat
+--write_file $datadir/se_loaddata.dat
+prefix:5;'foo';
+prefix:6;'';
+prefix:100;foo;
+prefix:7;'test';suffix
+101;abc;
+102;'z';
+prefix:0;;
+EOF
+
+--replace_result $datadir <DATADIR>
+eval
+LOAD DATA INFILE '$datadir/se_loaddata.dat' INTO TABLE t1
+ FIELDS TERMINATED BY ';'
+ OPTIONALLY ENCLOSED BY ''''
+ LINES STARTING BY 'prefix:'
+ IGNORE 2 LINES (a,b);
+
+--sorted_result
+SELECT a,b FROM t1;
+
+--remove_file $datadir/se_loaddata.dat
+--write_file $datadir/se_loaddata.dat
+1 foo
+2 bar
+3
+4 abc
+EOF
+
+--replace_result $datadir <DATADIR>
+eval
+LOAD DATA INFILE '$datadir/se_loaddata.dat' INTO TABLE t1;
+--sorted_result
+SELECT a,b FROM t1;
+--remove_file $datadir/se_loaddata.dat
+
+
+--write_file $datadir/se_replacedata.dat
+1 aaa
+2 bbb
+3 ccc
+4 ddd
+EOF
+
+--replace_result $datadir <DATADIR>
+if ($skip_unique_check == 1)
+{
+ --error ER_ON_DUPLICATE_DISABLED
+ eval
+ LOAD DATA INFILE '$datadir/se_replacedata.dat' REPLACE INTO TABLE t1;
+}
+if ($skip_unique_check == 0)
+{
+ eval
+ LOAD DATA INFILE '$datadir/se_replacedata.dat' REPLACE INTO TABLE t1;
+}
+--sorted_result
+SELECT a,b FROM t1;
+--remove_file $datadir/se_replacedata.dat
+
+
+# Cleanup
+DROP TABLE t1;
+
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/loaddata.test b/storage/rocksdb/mysql-test/rocksdb/t/loaddata.test
new file mode 100644
index 00000000000..1f59d5ce204
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/loaddata.test
@@ -0,0 +1,7 @@
+--source include/have_rocksdb.inc
+
+let $skip_unique_check = 0;
+--source loaddata.inc
+let $skip_unique_check = 1;
+set session unique_checks=0;
+--source loaddata.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/lock.test b/storage/rocksdb/mysql-test/rocksdb/t/lock.test
new file mode 100644
index 00000000000..9ce5e566505
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/lock.test
@@ -0,0 +1,224 @@
+--source include/have_rocksdb.inc
+
+# Standard table locking:
+# LOCK TABLE .. READ
+# LOCK TABLE .. [LOW_PRIORITY] WRITE
+# UNLOCK TABLES
+#
+# and global locking:
+# FLUSH TABLES [..] WITH READ LOCK
+#
+############################################
+# TODO:
+# A part of the test is currently disabled
+# because temporary tables are not supported
+############################################
+
+--enable_connect_log
+
+# Save the initial number of concurrent sessions.
+--source include/count_sessions.inc
+
+connect (con1,localhost,root,,);
+SET lock_wait_timeout=1;
+
+connection default;
+
+--disable_warnings
+DROP TABLE IF EXISTS t1, t2, t3;
+--enable_warnings
+
+CREATE TABLE t1 (id INT, id2 INT, pk INT AUTO_INCREMENT PRIMARY KEY) ENGINE=rocksdb;
+INSERT INTO t1 (id,id2) VALUES (1,1),(1,2),(1,3);
+
+# LOW_PRIORITY has no effect, but is still syntactically correct
+LOCK TABLE t1 LOW_PRIORITY WRITE;
+SELECT id2,COUNT(DISTINCT id) FROM t1 GROUP BY id2;
+
+UPDATE t1 SET id=-1 WHERE id=1;
+
+connection con1;
+# With WRITE lock held by connection 'default',
+# nobody else can access the table
+--error ER_LOCK_WAIT_TIMEOUT
+SELECT id,id2 FROM t1;
+--error ER_LOCK_WAIT_TIMEOUT
+LOCK TABLE t1 READ;
+
+connection default;
+LOCK TABLE t1 READ;
+--error ER_TABLE_NOT_LOCKED_FOR_WRITE
+UPDATE t1 SET id=1 WHERE id=1;
+
+connection con1;
+# With READ lock held by connection 'default',
+# it should be possible to read from the table
+# or acquire another READ lock,
+# but not update it or acquire WRITE lock
+SELECT COUNT(DISTINCT id) FROM t1;
+--error ER_LOCK_WAIT_TIMEOUT
+UPDATE t1 SET id=2 WHERE id=2;
+--error ER_LOCK_WAIT_TIMEOUT
+LOCK TABLE t1 WRITE;
+LOCK TABLE t1 READ;
+UNLOCK TABLES;
+
+
+--connection default
+
+--error ER_TABLE_NOT_LOCKED
+CREATE TABLE t2 (a INT, b CHAR(8), PRIMARY KEY(a)) ENGINE=rocksdb;
+
+--disable_parsing
+
+CREATE TEMPORARY TABLE t2 (a INT, b CHAR(8), PRIMARY KEY(a)) ENGINE=rocksdb;
+DROP TABLE IF EXISTS t2;
+
+--enable_parsing
+
+UNLOCK TABLES;
+
+CREATE TABLE t2 (id INT, id2 INT, pk INT AUTO_INCREMENT PRIMARY KEY) ENGINE=rocksdb;
+LOCK TABLE t1 WRITE, t2 WRITE;
+INSERT INTO t2 (id,id2) SELECT id,id2 FROM t1;
+UPDATE t1 SET id=1 WHERE id=-1;
+DROP TABLE t1,t2;
+
+#
+# INSERT ... SELECT with lock tables
+#
+
+CREATE TABLE t1 (i1 INT, nr INT, pk INT AUTO_INCREMENT PRIMARY KEY) ENGINE=rocksdb;
+CREATE TABLE t2 (nr INT, nm INT, pk INT AUTO_INCREMENT PRIMARY KEY) ENGINE=rocksdb;
+INSERT INTO t2 (nr,nm) VALUES (1,3);
+INSERT INTO t2 (nr,nm) VALUES (2,4);
+
+LOCK TABLES t1 WRITE, t2 READ;
+INSERT INTO t1 (i1,nr) SELECT 1, nr FROM t2 WHERE nm=3;
+INSERT INTO t1 (i1,nr) SELECT 2, nr FROM t2 WHERE nm=4;
+UNLOCK TABLES;
+
+LOCK TABLES t1 WRITE;
+--error ER_TABLE_NOT_LOCKED
+INSERT INTO t1 (i1,nr) SELECT i1, nr FROM t1;
+UNLOCK TABLES;
+LOCK TABLES t1 WRITE, t1 AS t1_alias READ;
+INSERT INTO t1 (i1,nr) SELECT i1, nr FROM t1 AS t1_alias;
+--error ER_TABLE_NOT_LOCKED
+DROP TABLE t1,t2;
+UNLOCK TABLES;
+DROP TABLE t1,t2;
+
+#
+# Check that a dropped table is removed from a lock
+
+CREATE TABLE t1 (a INT, b CHAR(8), PRIMARY KEY(a)) ENGINE=rocksdb;
+CREATE TABLE t2 (a INT, b CHAR(8), PRIMARY KEY(b)) ENGINE=rocksdb;
+CREATE TABLE t3 (a INT, b CHAR(8), pk INT PRIMARY KEY) ENGINE=rocksdb;
+LOCK TABLES t1 WRITE, t2 WRITE, t3 WRITE;
+# This removes one table after the other from the lock.
+DROP TABLE t2, t3, t1;
+#
+# Check that a lock merge works
+
+CREATE TABLE t1 (a INT, b CHAR(8), PRIMARY KEY(a)) ENGINE=rocksdb;
+CREATE TABLE t2 (a INT, b CHAR(8), PRIMARY KEY(b)) ENGINE=rocksdb;
+CREATE TABLE t3 (a INT, b CHAR(8), pk INT PRIMARY KEY) ENGINE=rocksdb;
+LOCK TABLES t1 WRITE, t2 WRITE, t3 WRITE, t1 AS t4 READ;
+
+ALTER TABLE t2 ADD COLUMN c2 INT;
+
+DROP TABLE t1, t2, t3;
+
+# FLUSH TABLES is not permitted when there is an active LOCK TABLES .. READ,
+# FLUSH TABLES .. WITH READ LOCK should be used instead
+# (and for other connections the table is locked)
+
+CREATE TABLE t1 (a INT, b CHAR(8), PRIMARY KEY(a)) ENGINE=rocksdb;
+CREATE TABLE t2 (a INT, b CHAR(8), PRIMARY KEY(b)) ENGINE=rocksdb;
+
+LOCK TABLE t1 READ, t2 READ;
+--error ER_TABLE_NOT_LOCKED_FOR_WRITE
+FLUSH TABLE t1;
+--error ER_TABLE_NOT_LOCKED_FOR_WRITE
+FLUSH TABLES;
+--error ER_LOCK_OR_ACTIVE_TRANSACTION
+FLUSH TABLES t1, t2 WITH READ LOCK;
+UNLOCK TABLES;
+
+FLUSH TABLES t1, t2 WITH READ LOCK;
+
+--connection con1
+--error ER_LOCK_WAIT_TIMEOUT
+INSERT INTO t1 (a,b) VALUES (1,'a'),(2,'b');
+
+--connection default
+UNLOCK TABLES;
+
+# Global lock
+
+FLUSH TABLES WITH READ LOCK;
+
+--connection con1
+--error ER_LOCK_WAIT_TIMEOUT
+INSERT INTO t1 (a,b) VALUES (1,'a'),(2,'b');
+
+--connection default
+UNLOCK TABLES;
+INSERT INTO t1 (a,b) VALUES (1,'a'),(2,'b');
+FLUSH TABLES WITH READ LOCK;
+--error ER_CANT_UPDATE_WITH_READLOCK
+DROP TABLE t1, t2;
+UNLOCK TABLES;
+DROP TABLE t1, t2;
+
+--disconnect con1
+
+#
+# Ensure that FLUSH TABLES doesn't substitute a base locked table
+# with a temporary one.
+#
+
+CREATE TABLE t1 (a INT, b CHAR(8), PRIMARY KEY(a)) ENGINE=rocksdb;
+CREATE TABLE t2 (a INT, b CHAR(8), PRIMARY KEY(b)) ENGINE=rocksdb;
+
+LOCK TABLE t1 WRITE, t2 WRITE;
+
+--disable_parsing
+
+CREATE TEMPORARY TABLE t1 (a INT, b CHAR(8), PRIMARY KEY(a)) ENGINE=rocksdb;
+FLUSH TABLE t1;
+DROP TEMPORARY TABLE t1;
+
+--enable_parsing
+
+SELECT a,b FROM t1;
+UNLOCK TABLES;
+DROP TABLE t1, t2;
+
+CREATE TABLE t1 (i INT) ENGINE=MyISAM;
+HANDLER t1 OPEN h;
+CREATE TABLE t2 (i INT) ENGINE=RocksDB;
+LOCK TABLES t2 WRITE;
+
+--connect (con1,localhost,root,,test)
+--connection con1
+--send
+FLUSH TABLES WITH READ LOCK;
+
+--connection default
+INSERT INTO t2 VALUES (1);
+UNLOCK TABLES;
+HANDLER h CLOSE;
+
+--connection con1
+--reap
+--disconnect con1
+
+--connection default
+DROP TABLE t1, t2;
+
+# Check that all connections opened by test cases in this file are really
+# gone so execution of other tests won't be affected by their presence.
+--source include/wait_until_count_sessions.inc
+
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/lock_info.test b/storage/rocksdb/mysql-test/rocksdb/t/lock_info.test
new file mode 100644
index 00000000000..1b624cf38c0
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/lock_info.test
@@ -0,0 +1,31 @@
+--source include/have_rocksdb.inc
+
+--disable_warnings
+DROP TABLE IF EXISTS t1;
+DROP TABLE IF EXISTS t2;
+--enable_warnings
+
+create table t1 (a int, primary key(a) comment 'lock_into_cf1') engine=rocksdb;
+insert into t1 values (1);
+insert into t1 values (2);
+
+create table t2 (a int, primary key(a) comment 'lock_info_cf2') engine=rocksdb;
+insert into t2 values (1);
+insert into t2 values (2);
+
+set autocommit=0;
+select * from t1 for update;
+select * from t2 for update;
+
+use information_schema;
+--replace_column 2 _txn_id_ 3 _key_
+select rocksdb_ddl.cf, rocksdb_locks.transaction_id, rocksdb_locks.key
+from rocksdb_locks
+left join rocksdb_ddl
+on rocksdb_locks.column_family_id=rocksdb_ddl.column_family
+order by rocksdb_ddl.cf;
+
+use test;
+
+DROP TABLE t1;
+DROP TABLE t2;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/lock_rows_not_exist.test b/storage/rocksdb/mysql-test/rocksdb/t/lock_rows_not_exist.test
new file mode 100644
index 00000000000..d8a6bde45c8
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/lock_rows_not_exist.test
@@ -0,0 +1,110 @@
+--source include/have_rocksdb.inc
+
+--enable_connect_log
+
+# Save the initial number of concurrent sessions
+--source include/count_sessions.inc
+
+--disable_warnings
+DROP TABLE IF EXISTS t1;
+--enable_warnings
+
+connect (con1,localhost,root,,);
+connect (con2,localhost,root,,);
+
+#1. Using all primary key columns, with equal conditions
+connection con1;
+CREATE TABLE t (id1 int, id2 int, id3 int, value int, PRIMARY KEY (id1, id2, id3)) ENGINE=RocksDB;
+
+#1.1 SELECT FOR UPDATE
+SET SESSION TRANSACTION ISOLATION LEVEL REPEATABLE READ;
+BEGIN;
+SELECT * FROM t WHERE id1=1 AND id2=1 AND id3=1 FOR UPDATE;
+
+connection con2;
+SET SESSION TRANSACTION ISOLATION LEVEL REPEATABLE READ;
+BEGIN;
+--error ER_LOCK_WAIT_TIMEOUT
+SELECT * FROM t WHERE id1=1 AND id2=1 AND id3=1 FOR UPDATE;
+SELECT * FROM t WHERE id1=1 AND id2=1 AND id3=2 FOR UPDATE;
+
+#1.2 UPDATE
+connection con1;
+ROLLBACK;
+BEGIN;
+UPDATE t SET value=value+100 WHERE id1=1 AND id2=1 AND id3=1;
+
+connection con2;
+ROLLBACK;
+BEGIN;
+--error ER_LOCK_WAIT_TIMEOUT
+UPDATE t SET value=value+100 WHERE id1=1 AND id2=1 AND id3=1;
+UPDATE t SET value=value+100 WHERE id1=1 AND id2=0 AND id3=1;
+
+#1.3 DELETE
+connection con1;
+ROLLBACK;
+BEGIN;
+DELETE FROM t WHERE id1=1 AND id2=1 AND id3=1;
+
+connection con2;
+ROLLBACK;
+BEGIN;
+--error ER_LOCK_WAIT_TIMEOUT
+DELETE FROM t WHERE id1=1 AND id2=1 AND id3=1;
+DELETE FROM t WHERE id1=1 AND id2=1 AND id3=0;
+
+--disable_parsing
+#
+# The following is commented out because RocksDB's Transaction API doesn't
+# "support" READ COMMITED, in particular, it doesn't release row locks
+# after each statement. (MyRocks is able to request a new snapshot for
+# every statement, but this won't free the locks. TODO: Is the behavior
+# that is tested below really needed?)
+#
+connection con1;
+ROLLBACK;
+SET SESSION TRANSACTION ISOLATION LEVEL READ COMMITTED;
+BEGIN;
+SELECT * FROM t WHERE id1=1 AND id2=1 AND id3=1 FOR UPDATE;
+
+connection con2;
+ROLLBACK;
+SET SESSION TRANSACTION ISOLATION LEVEL READ COMMITTED;
+BEGIN;
+SELECT * FROM t WHERE id1=1 AND id2=1 AND id3=1 FOR UPDATE;
+SELECT * FROM t WHERE id1=1 AND id2=1 AND id3=2 FOR UPDATE;
+
+connection con1;
+ROLLBACK;
+BEGIN;
+UPDATE t SET value=value+100 WHERE id1=1 AND id2=1 AND id3=1;
+
+connection con2;
+ROLLBACK;
+BEGIN;
+UPDATE t SET value=value+100 WHERE id1=1 AND id2=1 AND id3=1;
+UPDATE t SET value=value+100 WHERE id1=1 AND id2=0 AND id3=1;
+
+connection con1;
+ROLLBACK;
+BEGIN;
+DELETE FROM t WHERE id1=1 AND id2=1 AND id3=1;
+
+connection con2;
+ROLLBACK;
+BEGIN;
+DELETE FROM t WHERE id1=1 AND id2=1 AND id3=1;
+DELETE FROM t WHERE id1=1 AND id2=1 AND id3=0;
+
+connection con1;
+ROLLBACK;
+connection con2;
+ROLLBACK;
+
+--enable_parsing
+connection default;
+disconnect con1;
+disconnect con2;
+
+DROP TABLE t;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/lock_wait_timeout_stats.test b/storage/rocksdb/mysql-test/rocksdb/t/lock_wait_timeout_stats.test
new file mode 100644
index 00000000000..5288680c3bd
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/lock_wait_timeout_stats.test
@@ -0,0 +1,38 @@
+create table t (a int primary key) engine=rocksdb;
+
+begin;
+insert into t values (0);
+
+--source include/count_sessions.inc
+--connect (con1,localhost,root,,)
+--connection con1
+set @@rocksdb_lock_wait_timeout=1;
+select ROW_LOCK_WAIT_TIMEOUTS from information_schema.table_statistics where table_name="t";
+begin;
+
+--connect (con2,localhost,root,,)
+--connection con2
+set @@rocksdb_lock_wait_timeout=1;
+begin;
+
+--connection con1
+select variable_value into @a from information_schema.global_status where variable_name='rocksdb_row_lock_wait_timeouts';
+--error ER_LOCK_WAIT_TIMEOUT
+insert into t values(0);
+select ROW_LOCK_WAIT_TIMEOUTS from information_schema.table_statistics where table_name="t";
+select ROW_LOCK_WAIT_TIMEOUTS from information_schema.table_statistics where table_name="t";
+select case when variable_value-@a = 1 then 'true' else 'false' end as waits from information_schema.global_status where variable_name='rocksdb_row_lock_wait_timeouts';
+
+--connection con2
+select variable_value into @a from information_schema.global_status where variable_name='rocksdb_row_lock_wait_timeouts';
+--error ER_LOCK_WAIT_TIMEOUT
+insert into t values(0);
+select ROW_LOCK_WAIT_TIMEOUTS from information_schema.table_statistics where table_name="t";
+select ROW_LOCK_WAIT_TIMEOUTS from information_schema.table_statistics where table_name="t";
+select case when variable_value-@a = 1 then 'true' else 'false' end as waits from information_schema.global_status where variable_name='rocksdb_row_lock_wait_timeouts';
+
+--disconnect con1
+--connection default
+--disconnect con2
+drop table t;
+--source include/wait_until_count_sessions.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/locking_issues.test b/storage/rocksdb/mysql-test/rocksdb/t/locking_issues.test
new file mode 100644
index 00000000000..41e2f69578a
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/locking_issues.test
@@ -0,0 +1,3 @@
+--source include/have_rocksdb.inc
+
+--echo tests moved to rocksdb.locking_issues_case*
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/locking_issues_case1_1_rc.test b/storage/rocksdb/mysql-test/rocksdb/t/locking_issues_case1_1_rc.test
new file mode 100644
index 00000000000..3fd183bf4dd
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/locking_issues_case1_1_rc.test
@@ -0,0 +1,4 @@
+--source include/have_rocksdb.inc
+
+let $isolation_level = READ COMMITTED;
+--source include/locking_issues_case1_1.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/locking_issues_case1_1_rr.test b/storage/rocksdb/mysql-test/rocksdb/t/locking_issues_case1_1_rr.test
new file mode 100644
index 00000000000..cd4a0bd3c9d
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/locking_issues_case1_1_rr.test
@@ -0,0 +1,4 @@
+--source include/have_rocksdb.inc
+
+let $isolation_level = REPEATABLE READ;
+--source include/locking_issues_case1_1.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/locking_issues_case1_2_rc.test b/storage/rocksdb/mysql-test/rocksdb/t/locking_issues_case1_2_rc.test
new file mode 100644
index 00000000000..3fe052a4099
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/locking_issues_case1_2_rc.test
@@ -0,0 +1,4 @@
+--source include/have_rocksdb.inc
+
+let $isolation_level = READ COMMITTED;
+--source include/locking_issues_case1_2.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/locking_issues_case1_2_rr.test b/storage/rocksdb/mysql-test/rocksdb/t/locking_issues_case1_2_rr.test
new file mode 100644
index 00000000000..02263273ba5
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/locking_issues_case1_2_rr.test
@@ -0,0 +1,4 @@
+--source include/have_rocksdb.inc
+
+let $isolation_level = REPEATABLE READ;
+--source include/locking_issues_case1_2.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/locking_issues_case2_rc.test b/storage/rocksdb/mysql-test/rocksdb/t/locking_issues_case2_rc.test
new file mode 100644
index 00000000000..d780b5247bc
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/locking_issues_case2_rc.test
@@ -0,0 +1,5 @@
+--source include/have_rocksdb.inc
+
+let $lock_scanned_rows=0;
+let $isolation_level = READ COMMITTED;
+--source include/locking_issues_case2.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/locking_issues_case2_rc_lsr.test b/storage/rocksdb/mysql-test/rocksdb/t/locking_issues_case2_rc_lsr.test
new file mode 100644
index 00000000000..bd46f93a76c
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/locking_issues_case2_rc_lsr.test
@@ -0,0 +1,5 @@
+--source include/have_rocksdb.inc
+
+let $lock_scanned_rows=1;
+let $isolation_level = READ COMMITTED;
+--source include/locking_issues_case2.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/locking_issues_case2_rr.test b/storage/rocksdb/mysql-test/rocksdb/t/locking_issues_case2_rr.test
new file mode 100644
index 00000000000..b820fddb979
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/locking_issues_case2_rr.test
@@ -0,0 +1,5 @@
+--source include/have_rocksdb.inc
+
+let $lock_scanned_rows=0;
+let $isolation_level = REPEATABLE READ;
+--source include/locking_issues_case2.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/locking_issues_case2_rr_lsr.test b/storage/rocksdb/mysql-test/rocksdb/t/locking_issues_case2_rr_lsr.test
new file mode 100644
index 00000000000..33d3b752098
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/locking_issues_case2_rr_lsr.test
@@ -0,0 +1,5 @@
+--source include/have_rocksdb.inc
+
+let $lock_scanned_rows=1;
+let $isolation_level = REPEATABLE READ;
+--source include/locking_issues_case2.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/locking_issues_case3_rc.test b/storage/rocksdb/mysql-test/rocksdb/t/locking_issues_case3_rc.test
new file mode 100644
index 00000000000..7dc7f8784ea
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/locking_issues_case3_rc.test
@@ -0,0 +1,4 @@
+--source include/have_rocksdb.inc
+
+let $isolation_level = READ COMMITTED;
+--source include/locking_issues_case3.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/locking_issues_case3_rr.test b/storage/rocksdb/mysql-test/rocksdb/t/locking_issues_case3_rr.test
new file mode 100644
index 00000000000..7c81daccebc
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/locking_issues_case3_rr.test
@@ -0,0 +1,4 @@
+--source include/have_rocksdb.inc
+
+let $isolation_level = REPEATABLE READ;
+--source include/locking_issues_case3.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/locking_issues_case4_rc.test b/storage/rocksdb/mysql-test/rocksdb/t/locking_issues_case4_rc.test
new file mode 100644
index 00000000000..edc1111b0a5
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/locking_issues_case4_rc.test
@@ -0,0 +1,4 @@
+--source include/have_rocksdb.inc
+
+let $isolation_level = READ COMMITTED;
+--source include/locking_issues_case4.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/locking_issues_case4_rr.test b/storage/rocksdb/mysql-test/rocksdb/t/locking_issues_case4_rr.test
new file mode 100644
index 00000000000..8c26c2d1e19
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/locking_issues_case4_rr.test
@@ -0,0 +1,4 @@
+--source include/have_rocksdb.inc
+
+let $isolation_level = REPEATABLE READ;
+--source include/locking_issues_case4.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/locking_issues_case5_rc.test b/storage/rocksdb/mysql-test/rocksdb/t/locking_issues_case5_rc.test
new file mode 100644
index 00000000000..10bedcf2cca
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/locking_issues_case5_rc.test
@@ -0,0 +1,4 @@
+--source include/have_rocksdb.inc
+
+let $isolation_level = READ COMMITTED;
+--source include/locking_issues_case5.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/locking_issues_case5_rr.test b/storage/rocksdb/mysql-test/rocksdb/t/locking_issues_case5_rr.test
new file mode 100644
index 00000000000..6de3847cb66
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/locking_issues_case5_rr.test
@@ -0,0 +1,4 @@
+--source include/have_rocksdb.inc
+
+let $isolation_level = REPEATABLE READ;
+--source include/locking_issues_case5.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/locking_issues_case6_rc.test b/storage/rocksdb/mysql-test/rocksdb/t/locking_issues_case6_rc.test
new file mode 100644
index 00000000000..9409bde0c58
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/locking_issues_case6_rc.test
@@ -0,0 +1,4 @@
+--source include/have_rocksdb.inc
+
+let $isolation_level = READ COMMITTED;
+--source include/locking_issues_case6.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/locking_issues_case6_rr.test b/storage/rocksdb/mysql-test/rocksdb/t/locking_issues_case6_rr.test
new file mode 100644
index 00000000000..0c7a8a116c3
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/locking_issues_case6_rr.test
@@ -0,0 +1,4 @@
+--source include/have_rocksdb.inc
+
+let $isolation_level = REPEATABLE READ;
+--source include/locking_issues_case6.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/locking_issues_case7_rc.test b/storage/rocksdb/mysql-test/rocksdb/t/locking_issues_case7_rc.test
new file mode 100644
index 00000000000..f8e874a135c
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/locking_issues_case7_rc.test
@@ -0,0 +1,5 @@
+--source include/have_rocksdb.inc
+
+let $lock_scanned_rows=0;
+let $isolation_level = READ COMMITTED;
+--source include/locking_issues_case7.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/locking_issues_case7_rc_lsr.test b/storage/rocksdb/mysql-test/rocksdb/t/locking_issues_case7_rc_lsr.test
new file mode 100644
index 00000000000..0f97ca2f00f
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/locking_issues_case7_rc_lsr.test
@@ -0,0 +1,5 @@
+--source include/have_rocksdb.inc
+
+let $lock_scanned_rows=1;
+let $isolation_level = READ COMMITTED;
+--source include/locking_issues_case7.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/locking_issues_case7_rr.test b/storage/rocksdb/mysql-test/rocksdb/t/locking_issues_case7_rr.test
new file mode 100644
index 00000000000..c8cec956893
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/locking_issues_case7_rr.test
@@ -0,0 +1,5 @@
+--source include/have_rocksdb.inc
+
+let $lock_scanned_rows=0;
+let $isolation_level = REPEATABLE READ;
+--source include/locking_issues_case7.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/locking_issues_case7_rr_lsr.test b/storage/rocksdb/mysql-test/rocksdb/t/locking_issues_case7_rr_lsr.test
new file mode 100644
index 00000000000..875e81a1e05
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/locking_issues_case7_rr_lsr.test
@@ -0,0 +1,5 @@
+--source include/have_rocksdb.inc
+
+let $lock_scanned_rows=1;
+let $isolation_level = REPEATABLE READ;
+--source include/locking_issues_case7.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/mariadb_ignore_dirs.test b/storage/rocksdb/mysql-test/rocksdb/t/mariadb_ignore_dirs.test
new file mode 100644
index 00000000000..49591dd612a
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/mariadb_ignore_dirs.test
@@ -0,0 +1,17 @@
+--source include/have_rocksdb.inc
+
+
+--echo #
+--echo # RocksDB plugin adds #rocksdb to ignore_db_dirs
+--echo #
+
+select @@ignore_db_dirs;
+
+--let $_mysqld_option=--ignore-db-dirs=aa --ignore-db-dirs=bbb
+--source include/restart_mysqld_with_option.inc
+
+select @@ignore_db_dirs;
+
+--let $_mysqld_option=--ignore-db-dirs=#rocksdb
+--source include/restart_mysqld_with_option.inc
+
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/mariadb_misc_binlog-master.opt b/storage/rocksdb/mysql-test/rocksdb/t/mariadb_misc_binlog-master.opt
new file mode 100644
index 00000000000..beae84b3862
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/mariadb_misc_binlog-master.opt
@@ -0,0 +1 @@
+--log-bin
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/mariadb_misc_binlog.test b/storage/rocksdb/mysql-test/rocksdb/t/mariadb_misc_binlog.test
new file mode 100644
index 00000000000..e32679e88a2
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/mariadb_misc_binlog.test
@@ -0,0 +1,40 @@
+--source include/have_rocksdb.inc
+
+create table t1 (a int) engine=rocksdb;
+
+--echo # Should have binlog ON
+select @@log_bin;
+
+set binlog_format='row';
+
+--echo # Should succeed
+optimize table t1;
+
+--echo #
+--echo # MDEV-13602: rocksdb.index_merge_rocksdb2 failed in buildbot
+--echo #
+
+lock tables t1 write;
+insert into t1 values(1);
+unlock tables;
+
+set @tmp_bf= @@binlog_format;
+set binlog_format='STATEMENT';
+lock tables t1 write;
+--error ER_BINLOG_STMT_MODE_AND_ROW_ENGINE
+insert into t1 values(1);
+unlock tables;
+set @@binlog_format=@tmp_bf;
+
+drop table t1;
+
+--echo #
+--echo # MDEV-17045: MyRocks tables cannot be updated when binlog_format=MIXED.
+--echo #
+set @tmp_bf= @@binlog_format;
+set binlog_format='MIXED';
+create table t1 (pk int primary key) engine=rocksdb;
+insert into t1 values (1);
+drop table t1;
+set @@binlog_format=@tmp_bf;
+
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/mariadb_plugin-master.opt b/storage/rocksdb/mysql-test/rocksdb/t/mariadb_plugin-master.opt
new file mode 100644
index 00000000000..0f0a3ef33e5
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/mariadb_plugin-master.opt
@@ -0,0 +1 @@
+--default-storage-engine=myisam --plugin-load='' --ignore-db-dirs=#rocksdb
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/mariadb_plugin.test b/storage/rocksdb/mysql-test/rocksdb/t/mariadb_plugin.test
new file mode 100644
index 00000000000..0cf56c0cbd5
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/mariadb_plugin.test
@@ -0,0 +1,59 @@
+--source include/have_log_bin.inc
+--source include/have_binlog_format_row.inc
+--source include/not_windows.inc
+
+--echo #
+--echo # MDEV-14843: Assertion `s_tx_list.size() == 0' failed in myrocks::Rdb_transaction::term_mutex
+--echo #
+
+#
+# On Linux, wsrep plugin is always loaded so XA is enabled at this point.
+# On Windows, there is no wsrep, so we get this warning:
+# Warning 1105 Cannot enable tc-log at run-time. XA features of ROCKSDB are disabled
+#
+--disable_warnings
+INSTALL SONAME 'ha_rocksdb';
+--enable_warnings
+
+connect (con1,localhost,root,,test);
+CREATE TABLE t1 (i INT) ENGINE=RocksDB;
+insert into t1 values (1);
+DROP TABLE t1;
+
+connection default;
+# Cleanup
+UNINSTALL SONAME 'ha_rocksdb';
+SELECT ENGINE, SUPPORT FROM INFORMATION_SCHEMA.ENGINES WHERE ENGINE='ROCKSDB';
+disconnect con1;
+# Unfortunately this is the only more or less reliable way to wait until
+# connection done ha_close_connections(). It doesn't work on Windows due
+# to different thread handling.
+let $wait_condition= SELECT VARIABLE_VALUE=1 FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME='Threads_cached';
+--source include/wait_condition.inc
+
+--echo #
+--echo # MDEV-15686: Loading MyRocks plugin back after it has been unloaded causes a crash
+--echo #
+call mtr.add_suppression("Plugin 'ROCKSDB.*' init function returned error.");
+call mtr.add_suppression("Plugin 'ROCKSDB.*' registration as a INFORMATION SCHEMA failed.");
+call mtr.add_suppression("Plugin 'ROCKSDB' registration as a STORAGE ENGINE failed");
+
+--echo #
+--echo # There are two possible scenarios:
+
+--echo # ha_rocksdb.{dll,so} is still loaded into mysqld's address space. Its
+--echo # global variables are in the state that doesn't allow it to be
+--echo # initialized back (this is what MDEV-15686 is about). This is handled
+--echo # by intentionally returning an error from rocksdb_init_func.
+--echo #
+--echo # The second case is when ha_rocksdb.{ddl,so} has been fully unloaded
+--echo # and so it will be now loaded as if it happens for the first time.
+
+--error 0,ER_INTERNAL_ERROR
+INSTALL SONAME 'ha_rocksdb';
+
+--echo # Whatever happened on the previous step, restore things to the way they
+--echo # were at testcase start.
+--error 0,ER_SP_DOES_NOT_EXIST
+UNINSTALL SONAME 'ha_rocksdb';
+
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/mariadb_port_fixes.test b/storage/rocksdb/mysql-test/rocksdb/t/mariadb_port_fixes.test
new file mode 100644
index 00000000000..569f33c94b2
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/mariadb_port_fixes.test
@@ -0,0 +1,114 @@
+--source include/have_rocksdb.inc
+
+--echo #
+--echo # MDEV-14433: RocksDB may show empty or incorrect output with rocksdb_strict_collation_check=off
+--echo #
+set global rocksdb_strict_collation_check=off;
+set @tmp_rscc=@@rocksdb_strict_collation_check;
+
+CREATE TABLE t1(
+ a varchar(10) NOT NULL,
+ b char(1) DEFAULT 'X',
+ c char(2) NOT NULL DEFAULT '??',
+ d varchar(10) NOT NULL,
+ e int(11) DEFAULT 0,
+ PRIMARY KEY (a,d),
+ KEY (e)
+) ENGINE=ROCKSDB DEFAULT CHARSET=utf8;
+
+insert into t1 select 1,1,1,1,0;
+insert into t1 select 2,1,1,1,0;
+insert into t1 select 3,1,1,1,0;
+
+--replace_column 9 #
+explain
+select a from t1 force index(e) where e<10000;
+select a from t1;
+select * from t1;
+DROP TABLE t1;
+
+--echo #
+--echo # MDEV-14563: Wrong query plan for query with no PK
+--echo #
+
+CREATE TABLE t1(
+ pk int primary key,
+ a varchar(10) NOT NULL,
+ e int(11) DEFAULT 0,
+ KEY (a)
+) ENGINE=ROCKSDB DEFAULT CHARSET=utf8;
+insert into t1 values (1,1,1),(2,2,2);
+--replace_column 9 #
+explain select a from t1 where a <'zzz';
+
+CREATE TABLE t2(
+ pk int,
+ a varchar(10) NOT NULL,
+ e int(11) DEFAULT 0,
+ KEY (a)
+) ENGINE=ROCKSDB DEFAULT CHARSET=utf8;
+insert into t2 values (1,1,1),(2,2,2);
+--replace_column 9 #
+explain select a from t2 where a <'zzz';
+
+drop table t1,t2;
+
+set global rocksdb_strict_collation_check=@tmp_rscc;
+
+--echo #
+--echo # MDEV-14389: MyRocks and NOPAD collations
+--echo #
+
+--error ER_MYROCKS_CANT_NOPAD_COLLATION
+create table t1 (pk varchar(10) collate latin1_nopad_bin, primary key(pk)) engine=rocksdb;
+
+set global rocksdb_strict_collation_check=off;
+--error ER_MYROCKS_CANT_NOPAD_COLLATION
+create table t1 (pk varchar(10) collate latin1_nopad_bin, primary key(pk)) engine=rocksdb;
+
+set global rocksdb_strict_collation_check=@tmp_rscc;
+
+--echo #
+--echo # MDEV-14679: RocksdB plugin fails to load with "Loading of unknown plugin ROCKSDB_CFSTATS
+--echo #
+select plugin_name, plugin_maturity from information_schema.plugins where plugin_name like '%rocksdb%';
+
+
+--echo #
+--echo # MDEV-12466 : Assertion `thd->transaction.stmt.is_empty() || thd->in_sub_stmt || ...
+--echo #
+
+CREATE TABLE t1 (i INT) ENGINE=RocksDB;
+--error ER_ILLEGAL_HA
+FLUSH TABLE t1 FOR EXPORT;
+DROP TABLE t1;
+
+--echo #
+--echo # MDEV-16154 Server crashes in in myrocks::ha_rocksdb::load_auto_incr_value_from_inde
+--echo #
+CREATE TABLE t1 (a INT) ENGINE=RocksDB;
+INSERT INTO t1 VALUES (1);
+ALTER TABLE t1 AUTO_INCREMENT 10;
+
+DROP TABLE t1;
+
+--echo #
+--echo # MDEV-16155: UPDATE on RocksDB table with unique constraint does not work
+--echo #
+CREATE TABLE t1 (a INT, b CHAR(8), UNIQUE INDEX(a)) ENGINE=RocksDB;
+INSERT INTO t1 (a,b) VALUES (1,'foo'),(2,'bar');
+UPDATE t1 SET a=a+100;
+SELECT * FROM t1;
+DROP TABLE t1;
+
+
+--echo #
+--echo # MDEV-15319: [SQL Layer] Server crashes in Field::set_null / myrocks::ha_rocksdb::convert_record_from_storage_format
+--echo # (just a testcase)
+--echo #
+
+CREATE TABLE t1 (i INT);
+INSERT INTO t1 VALUES (1);
+CREATE TABLE t2 ENGINE=RocksDB AS SELECT VALUES(i) AS a FROM t1;
+DELETE FROM t2;
+DROP TABLE t1,t2;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/mariadb_port_rpl.test b/storage/rocksdb/mysql-test/rocksdb/t/mariadb_port_rpl.test
new file mode 100644
index 00000000000..ed9eb0291c2
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/mariadb_port_rpl.test
@@ -0,0 +1,14 @@
+--source include/have_rocksdb.inc
+--source include/have_binlog_format_row.inc
+
+--echo #
+--echo # MDEV-15472: Assertion `!is_set() || (m_status == DA_OK_BULK && is_bulk_op())' failure ...
+--echo #
+select @@log_bin;
+select @@binlog_format;
+CREATE OR REPLACE TABLE t1 (i INT) ENGINE=RocksDB;
+LOCK TABLE t1 WRITE;
+FLUSH TABLES;
+UNLOCK TABLES;
+DROP TABLE t1;
+
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/max_open_files.test b/storage/rocksdb/mysql-test/rocksdb/t/max_open_files.test
new file mode 100644
index 00000000000..c7c5e7b2ef3
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/max_open_files.test
@@ -0,0 +1,53 @@
+--source include/have_rocksdb.inc
+
+# Basic Sysbench run fails with basic MyROCKS install due to lack of open files
+
+# test for over limit
+CALL mtr.add_suppression("RocksDB: rocksdb_max_open_files should not be greater than the open_files_limit*");
+
+--let $over_rocksdb_max_open_files=`SELECT @@global.open_files_limit + 100`
+--let $under_rocksdb_max_open_files=`SELECT @@global.open_files_limit -1`
+--let SEARCH_FILE=$MYSQLTEST_VARDIR/tmp/rocksdb.max_open_files.err
+--let SEARCH_PATTERN=RocksDB: rocksdb_max_open_files should not be greater than the open_files_limit
+
+--replace_result $MYSQLTEST_VARDIR MYSQLTEST_VARDIR $over_rocksdb_max_open_files over_rocksdb_max_open_files
+--let $_mysqld_option=--log-error=$SEARCH_FILE --rocksdb_max_open_files=$over_rocksdb_max_open_files
+--source include/restart_mysqld_with_option.inc
+--source include/search_pattern_in_file.inc
+
+SELECT FLOOR(@@global.open_files_limit / 2) = @@global.rocksdb_max_open_files;
+
+# test for within limit
+--let $_mysqld_option=--rocksdb_max_open_files=$under_rocksdb_max_open_files
+--source include/restart_mysqld_with_option.inc
+
+SELECT @@global.open_files_limit - 1 = @@global.rocksdb_max_open_files;
+
+# test for minimal value
+--let $_mysqld_option=--rocksdb_max_open_files=0
+--source include/restart_mysqld_with_option.inc
+
+SELECT @@global.rocksdb_max_open_files;
+
+# verify that we can still do work with no descriptor cache
+CREATE TABLE t1(a INT) ENGINE=ROCKSDB;
+INSERT INTO t1 VALUES(0),(1),(2),(3),(4);
+SET GLOBAL rocksdb_force_flush_memtable_and_lzero_now=1;
+DROP TABLE t1;
+
+# test for unlimited
+--let $_mysqld_option=--rocksdb_max_open_files=-1
+--source include/restart_mysqld_with_option.inc
+
+SELECT @@global.rocksdb_max_open_files;
+
+# test for auto-tune
+--let $_mysqld_option=--rocksdb_max_open_files=-2
+--source include/restart_mysqld_with_option.inc
+
+SELECT FLOOR(@@global.open_files_limit / 2) = @@global.rocksdb_max_open_files;
+
+# cleanup
+--let _$mysqld_option=
+--source include/restart_mysqld.inc
+--remove_file $SEARCH_FILE
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/misc.test b/storage/rocksdb/mysql-test/rocksdb/t/misc.test
new file mode 100644
index 00000000000..b8efd0389d2
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/misc.test
@@ -0,0 +1,45 @@
+--source include/have_rocksdb.inc
+
+#
+# Different statements not related to an engine,
+# but added to provide function coverage for handler.cc and handler.h.
+# The test can be disabled or removed later.
+#
+
+# hits get_error_message(int, String*)
+
+INSERT INTO mysql.event (
+ db,
+ name,
+ body,
+ definer,
+ interval_value,
+ interval_field,
+ originator,
+ character_set_client,
+ collation_connection,
+ db_collation,
+ body_utf8)
+values (
+ database(),
+ "ev1",
+ "select 1",
+ user(),
+ 100,
+ "SECOND_MICROSECOND",
+ 1,
+ 'utf8',
+ 'utf8_general_ci',
+ 'utf8_general_ci',
+ 'select 1');
+
+--error ER_NOT_SUPPORTED_YET
+SHOW EVENTS;
+DROP EVENT ev1;
+
+# hits get_foreign_key_list(THD*, List<st_foreign_key_info>*)
+--sorted_result
+SELECT TABLE_NAME, COLUMN_NAME, REFERENCED_TABLE_NAME, REFERENCED_COLUMN_NAME
+FROM INFORMATION_SCHEMA.KEY_COLUMN_USAGE ORDER BY TABLE_NAME;
+
+
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/multi_varchar_sk_lookup.test b/storage/rocksdb/mysql-test/rocksdb/t/multi_varchar_sk_lookup.test
new file mode 100644
index 00000000000..cb65cf91f05
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/multi_varchar_sk_lookup.test
@@ -0,0 +1,49 @@
+# Disable until covered unpack_info format enabled in prod
+--source include/have_debug.inc
+--source include/not_debug.inc
+
+--source include/have_rocksdb.inc
+
+--disable_warnings
+DROP TABLE IF EXISTS T1;
+--enable_warnings
+
+CREATE TABLE T1 (
+ P1 VARCHAR(64), P2 VARCHAR(64), P3 VARCHAR(64), P4 VARCHAR(64),
+ S1 VARCHAR(64), S2 VARCHAR(64), S3 VARCHAR(64), S4 VARCHAR(64),
+ S5 VARCHAR(64), S6 VARCHAR(64), S7 VARCHAR(64), S8 VARCHAR(64),
+ S9 VARCHAR(64), S10 VARCHAR(64), S11 VARCHAR(64), S12 VARCHAR(64),
+ S13 VARCHAR(64), S14 VARCHAR(64), S15 VARCHAR(64), S16 VARCHAR(64),
+ PRIMARY KEY (P1(8), P2(8), P3(8), P4(8)),
+ KEY SK (S1(8), S2(8), S3(8), S4(8),
+ S5(8), S6(8), S7(8), S8(8),
+ S9(8), S10(8), S11(8), S12(8),
+ S13(8), S14(8), S15(8), S16(8))
+) ENGINE=rocksdb;
+
+INSERT INTO T1 VALUES ('1', '2', '3', '4',
+ '5', '6', '7', '8',
+ '9', '10', '11', '12',
+ '13', '14', '15', '16',
+ '17', '18', '19', '20');
+
+SELECT * FROM T1;
+
+--let $prefix_index_check_title= Not eligible for optimization, shorter than prefix length.
+--let $prefix_index_check_read_avoided_delta= 0
+--let $prefix_index_check_query= SELECT P4, S2 FROM T1 FORCE INDEX(SK) WHERE S1='5'
+--source suite/rocksdb/include/prefix_index_only_query_check.inc
+
+DELETE FROM T1;
+INSERT INTO T1 VALUES ('100000000', '200000000', '300000000', '400000000',
+ '500000000', '600000000', '700000000', '800000000',
+ '900000000', '100000000', '110000000', '120000000',
+ '130000000', '140000000', '150000000', '160000000',
+ '170000000', '180000000', '190000000', '200000000');
+
+--let $prefix_index_check_title= Not eligible for optimization, longer than prefix length.
+--let $prefix_index_check_read_avoided_delta= 0
+--let $prefix_index_check_query= SELECT P4, S2 FROM T1 FORCE INDEX(SK) WHERE S1='5'
+--source suite/rocksdb/include/prefix_index_only_query_check.inc
+
+DROP TABLE T1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/mysqlbinlog_blind_replace.test b/storage/rocksdb/mysql-test/rocksdb/t/mysqlbinlog_blind_replace.test
new file mode 100644
index 00000000000..2b033023b2a
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/mysqlbinlog_blind_replace.test
@@ -0,0 +1,62 @@
+#
+# This test is intended to check that when blind replace is enabled,
+# mysqlbinlog is able to pass this information in the captured binlog
+# events and we are able to reapply such events
+#
+
+--source include/have_log_bin.inc
+--source include/have_rocksdb.inc
+--source include/have_debug.inc
+
+reset master;
+set GLOBAL binlog_format= 'ROW';
+SET GLOBAL enable_blind_replace=ON;
+set binlog_format=row;
+
+create table t5 (c1 int primary key, c2 int);
+insert into t5 values (1, 1);
+insert into t5 values (2, 2);
+insert into t5 values (3, 3);
+select * from t5;
+select variable_value into @c from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+replace into t5 values (1, 11);
+replace into t5 values (2, 22);
+replace into t5 values (3, 33);
+
+# Ensure that this was a blind replace
+select case when variable_value-@c = 3 then 'true' else 'false' end as read_free from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+select * from t5;
+--source include/show_binlog_events.inc
+
+flush logs;
+
+# Capture binlog events using mysqlbinlog
+let $MYSQLD_DATADIR= `select @@datadir`;
+--let $log_file_name = query_get_value("SHOW BINARY LOGS", Log_name, 1)
+--exec $MYSQL_BINLOG $MYSQLD_DATADIR/$log_file_name > $MYSQLTEST_VARDIR/tmp/mysqlbinlog-output
+
+# Drop the table. This will be recreated when we reapply binlog events
+drop table t5;
+reset master;
+
+# Now replay the binlog events
+--echo Replaying binlog events containing blind replace statements should work
+--exec $MYSQL < $MYSQLTEST_VARDIR/tmp/mysqlbinlog-output
+select * from t5;
+--source include/show_binlog_events.inc
+
+drop table t5;
+reset master;
+
+# Replay the same binlog events again, but with blind_replace turned off
+--echo Replaying the same binlog events with blind replace disabled should work
+--echo The server should internally convert such events into updates
+SET GLOBAL enable_blind_replace=OFF;
+--exec $MYSQL < $MYSQLTEST_VARDIR/tmp/mysqlbinlog-output
+select * from t5;
+
+--source include/show_binlog_events.inc
+
+set GLOBAL binlog_format=DEFAULT;
+SET GLOBAL enable_blind_replace=DEFAULT;
+drop table t5;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/mysqlbinlog_gtid_skip_empty_trans_rocksdb-master.opt b/storage/rocksdb/mysql-test/rocksdb/t/mysqlbinlog_gtid_skip_empty_trans_rocksdb-master.opt
new file mode 100644
index 00000000000..53303252212
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/mysqlbinlog_gtid_skip_empty_trans_rocksdb-master.opt
@@ -0,0 +1 @@
+--binlog_format=row --gtid_mode=ON --enforce_gtid_consistency --log_bin --log_slave_updates
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/mysqlbinlog_gtid_skip_empty_trans_rocksdb.test b/storage/rocksdb/mysql-test/rocksdb/t/mysqlbinlog_gtid_skip_empty_trans_rocksdb.test
new file mode 100644
index 00000000000..0c04b60427e
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/mysqlbinlog_gtid_skip_empty_trans_rocksdb.test
@@ -0,0 +1,16 @@
+--source include/have_rocksdb.inc
+
+################################################################################
+# Test the --skip-empty-trans option for mysqlbinlog with rocksdb
+################################################################################
+--disable_warnings
+--source include/have_gtid.inc
+--enable_warnings
+
+reset master;
+set timestamp=1000000000;
+let $storage_engine=rocksdb;
+
+# Test row based replication
+set SESSION binlog_format = 'ROW';
+--source include/mysqlbinlog_gtid_skip_empty_trans_input.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/mysqldump-master.opt b/storage/rocksdb/mysql-test/rocksdb/t/mysqldump-master.opt
new file mode 100644
index 00000000000..2672d4ff35e
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/mysqldump-master.opt
@@ -0,0 +1 @@
+--binlog_format=row
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/mysqldump.test b/storage/rocksdb/mysql-test/rocksdb/t/mysqldump.test
new file mode 100644
index 00000000000..473bebdda89
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/mysqldump.test
@@ -0,0 +1,67 @@
+--source include/have_rocksdb.inc
+--source include/have_log_bin.inc
+--source include/restart_mysqld.inc
+
+# Save the initial number of concurrent sessions
+--source include/count_sessions.inc
+
+connect (con1,localhost,root,,);
+connect (con2,localhost,root,,);
+
+connection con1;
+create table r1 (id1 int, id2 int, id3 varchar(100), id4 int, value1 int, value2 int, value3 int, value4 int, primary key (id1, id2, id3, id4), KEY (value1, value2, value3)) engine=rocksdb;
+insert into r1 values (1,1,1,1,1,1,1,1);
+insert into r1 values (1,1,1,2,2,2,2,2);
+insert into r1 values (1,1,2,1,3,3,3,3);
+insert into r1 values (1,1,2,2,4,4,4,4);
+insert into r1 values (1,2,1,1,5,5,5,5);
+insert into r1 values (1,2,1,2,6,6,6,6);
+insert into r1 values (1,2,2,1,7,7,7,7);
+insert into r1 values (1,2,2,2,8,8,8,8);
+insert into r1 values (2,1,1,1,9,9,9,9);
+insert into r1 values (2,1,1,2,10,10,10,10);
+insert into r1 values (2,1,2,1,11,11,11,11);
+insert into r1 values (2,1,2,2,12,12,12,12);
+insert into r1 values (2,2,1,1,13,13,13,13);
+insert into r1 values (2,2,1,2,14,14,14,14);
+insert into r1 values (2,2,2,1,15,15,15,15);
+insert into r1 values (2,2,2,2,16,16,16,16);
+
+connection con2;
+BEGIN;
+insert into r1 values (5,5,5,5,5,5,5,5);
+update r1 set value1=value1+100 where id1=1 and id2=1 and id3='1';
+
+--replace_regex /MASTER_LOG_POS=[0-9]+/MASTER_LOG_POS=BINLOG_START/
+--exec ASAN_OPTIONS="detect_leaks=0" $MYSQL_DUMP --skip-comments --single-transaction --master-data=2 --print-ordering-key --rocksdb --order-by-primary-desc --rocksdb_bulk_load test
+
+--replace_regex /MASTER_LOG_POS=[0-9]+/MASTER_LOG_POS=BINLOG_START/
+--exec ASAN_OPTIONS="detect_leaks=0" $MYSQL_DUMP --skip-comments --single-transaction --master-data=2 --print-ordering-key --rocksdb --order-by-primary-desc --rocksdb_bulk_load --rocksdb_bulk_load_allow_sk test
+
+rollback;
+
+connection con1;
+
+let SEARCH_FILE=$MYSQLTEST_VARDIR/mysqld.1/mysqld.log;
+let SEARCH_PATTERN=START TRANSACTION WITH CONSISTENT ROCKSDB SNAPSHOT;
+source include/search_pattern_in_file.inc;
+
+set @save_default_storage_engine=@@global.default_storage_engine;
+SET GLOBAL default_storage_engine=rocksdb;
+--replace_regex /MASTER_LOG_POS=[0-9]+/MASTER_LOG_POS=BINLOG_START/
+--exec ASAN_OPTIONS="detect_leaks=0" $MYSQL_DUMP --skip-comments --single-transaction --master-data=2 --print-ordering-key test
+source include/search_pattern_in_file.inc;
+
+# Sanity test mysqldump when the --innodb-stats-on-metadata is specified (no effect)
+--echo ==== mysqldump with --innodb-stats-on-metadata ====
+--replace_regex /MASTER_LOG_POS=[0-9]+/MASTER_LOG_POS=BINLOG_START/
+--exec ASAN_OPTIONS="detect_leaks=0" $MYSQL_DUMP --skip-comments --single-transaction --master-data=2 --print-ordering-key --innodb-stats-on-metadata test
+
+# testing mysqldump work with statement based binary logging
+SET GLOBAL binlog_format=statement;
+--exec ASAN_OPTIONS="detect_leaks=0" $MYSQL_DUMP --skip-comments --single-transaction --master-data=2 --print-ordering-key test > /dev/null
+SET GLOBAL binlog_format=row;
+
+drop table r1;
+reset master;
+set @@global.default_storage_engine=@save_default_storage_engine;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/mysqldump2-master.opt b/storage/rocksdb/mysql-test/rocksdb/t/mysqldump2-master.opt
new file mode 100644
index 00000000000..2672d4ff35e
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/mysqldump2-master.opt
@@ -0,0 +1 @@
+--binlog_format=row
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/mysqldump2.test b/storage/rocksdb/mysql-test/rocksdb/t/mysqldump2.test
new file mode 100644
index 00000000000..ca9eb5d2ecf
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/mysqldump2.test
@@ -0,0 +1,43 @@
+--source include/have_rocksdb.inc
+
+--source include/have_log_bin.inc
+
+--disable_warnings
+DROP TABLE IF EXISTS t1;
+--enable_warnings
+create table t1 (id int primary key, value int, value2 varchar(200), index(value)) engine=rocksdb;
+
+--disable_query_log
+let $i = 1;
+while ($i <= 50000) {
+ let $insert = INSERT INTO t1 VALUES($i, $i, REPEAT('x', 150));
+ inc $i;
+ eval $insert;
+}
+--enable_query_log
+
+optimize table t1;
+
+#wiping block cache
+let $restart_file= $MYSQLTEST_VARDIR/tmp/mysqld.1.expect;
+--exec echo "wait" > $restart_file
+--shutdown_server 10
+--source include/wait_until_disconnected.inc
+-- exec echo "restart:--rocksdb_default_cf_options=write_buffer_size=64k;target_file_size_base=64k;max_bytes_for_level_base=1m;compression_per_level=kNoCompression;disable_auto_compactions=true;level0_stop_writes_trigger=1000 " > $MYSQLTEST_VARDIR/tmp/mysqld.1.expect
+-- enable_reconnect
+-- source include/wait_until_connected_again.inc
+
+select variable_value into @a from information_schema.global_status where variable_name='rocksdb_block_cache_add';
+
+--exec ASAN_OPTIONS="detect_leaks=0" $MYSQL_DUMP --skip-comments --single-transaction --master-data=2 --print-ordering-key --rocksdb test > /dev/null
+
+# verifying block cache was not filled
+select case when variable_value - @a > 20 then 'true' else 'false' end from information_schema.global_status where variable_name='rocksdb_block_cache_add';
+
+select count(*) from t1;
+
+# verifying block cache was filled
+select case when variable_value - @a > 100 then 'true' else 'false' end from information_schema.global_status where variable_name='rocksdb_block_cache_add';
+
+#cleanup
+DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/native_procedure-master.opt b/storage/rocksdb/mysql-test/rocksdb/t/native_procedure-master.opt
new file mode 100644
index 00000000000..6c4cea3d4b7
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/native_procedure-master.opt
@@ -0,0 +1 @@
+$NP_EXAMPLE_LIB_OPT
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/native_procedure.test b/storage/rocksdb/mysql-test/rocksdb/t/native_procedure.test
new file mode 100644
index 00000000000..d4f38a607cf
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/native_procedure.test
@@ -0,0 +1,2 @@
+let $engine=rocksdb;
+--source include/native_procedure.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/negative_stats.test b/storage/rocksdb/mysql-test/rocksdb/t/negative_stats.test
new file mode 100644
index 00000000000..9de41d17976
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/negative_stats.test
@@ -0,0 +1,26 @@
+--source include/have_rocksdb.inc
+--source include/have_debug.inc
+
+--disable_warnings
+DROP TABLE IF EXISTS t1;
+--enable_warnings
+
+CREATE TABLE t1 (i1 INT, PRIMARY KEY (i1)) ENGINE = ROCKSDB;
+
+--disable_query_log
+let $max = 10000;
+let $i = 1;
+while ($i <= $max) {
+ let $insert = INSERT INTO t1 VALUES ($i);
+ inc $i;
+ eval $insert;
+}
+--enable_query_log
+
+SET GLOBAL ROCKSDB_FORCE_FLUSH_MEMTABLE_NOW=1;
+
+set session debug_dbug= "+d,myrocks_simulate_negative_stats";
+SELECT CASE WHEN DATA_LENGTH < 1024 * 1024 THEN 'true' ELSE 'false' END FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_NAME = 't1';
+set session debug_dbug= "-d,myrocks_simulate_negative_stats";
+
+DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/no_merge_sort.test b/storage/rocksdb/mysql-test/rocksdb/t/no_merge_sort.test
new file mode 100644
index 00000000000..ccef7182c11
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/no_merge_sort.test
@@ -0,0 +1,32 @@
+--source include/have_rocksdb.inc
+
+# For Issue#117 MyRocks does merge sort with small data sets
+
+let $datadir = `SELECT @@datadir`;
+
+--disable_query_log
+let $i= 0;
+while ($i < 30) {
+ DROP TABLE IF EXISTS ti_nk;
+
+ CREATE TABLE `ti_nk` (
+ `a` int(11) DEFAULT NULL,
+ `b` int(11) DEFAULT NULL,
+ `c` varchar(36) COLLATE latin1_bin NOT NULL,
+ PRIMARY KEY (`c`)
+ );
+ let $j = 0;
+ while ($j < 200) {
+ eval insert into ti_nk values ($j, $j, md5($j));
+ inc $j;
+ }
+
+ select variable_value into @s from information_schema.global_status where variable_name='Sort_merge_passes';
+ eval SELECT a, b, c FROM ti_nk ORDER BY a,b,c INTO OUTFILE '$datadir/select.out';
+ --remove_file $datadir/select.out
+ select case when variable_value-@s = 0 then 'true' else 'false' end as skip_merge_sort from information_schema.global_status where variable_name='Sort_merge_passes';
+ inc $i;
+}
+--enable_query_log
+
+DROP TABLE ti_nk;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/no_primary_key_basic_ops.inc b/storage/rocksdb/mysql-test/rocksdb/t/no_primary_key_basic_ops.inc
new file mode 100644
index 00000000000..0dcd452194a
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/no_primary_key_basic_ops.inc
@@ -0,0 +1,65 @@
+#
+# This include file checks some very basic capabilities for restart insert
+# update and delete for tables with no pk
+# NOTE: requires table with structure similar to
+# CREATE TABLE t1 (a INT, b CHAR(8)) ENGINE=rocksdb;
+#
+
+SHOW CREATE TABLE t1;
+SHOW COLUMNS IN t1;
+
+### test INSERT
+INSERT INTO t1 (a,b) VALUES (76,'bar');
+INSERT INTO t1 (a,b) VALUES (35,'foo');
+INSERT INTO t1 (a,b) VALUES (77,'baz');
+
+## test SELECT w/ index scans
+--sorted_result
+SELECT * FROM t1 WHERE a = 35;
+--sorted_result
+SELECT * FROM t1 WHERE a = 35 AND b = 'foo';
+--sorted_result
+SELECT * FROM t1 WHERE a = 77 OR b = 'bar';
+--sorted_result
+SELECT * FROM t1 WHERE a > 35;
+--sorted_result
+SELECT * FROM t1;
+
+# test UPDATE
+UPDATE t1 SET a=a+100;
+--sorted_result
+SELECT * FROM t1;
+
+UPDATE t1 SET a=a-100, b='bbb' WHERE a>100;
+--sorted_result
+SELECT * FROM t1;
+UPDATE t1 SET a=300, b='ccc' WHERE a>70;
+--sorted_result
+SELECT * FROM t1;
+UPDATE t1 SET a=123 WHERE a=35;
+--sorted_result
+SELECT * FROM t1;
+UPDATE t1 SET a=321 WHERE b='ccc';
+--sorted_result
+SELECT * FROM t1;
+
+
+## test RESTART/OPEN
+--source include/restart_mysqld.inc
+## test insert after restart
+INSERT INTO t1 (a,b) VALUES (45,'bob');
+--sorted_result
+SELECT * FROM t1;
+
+# test DELETE
+DELETE FROM t1 WHERE a=123;
+--sorted_result
+SELECT * FROM t1;
+
+DELETE FROM t1 WHERE b > 'bbb' AND a > 100;
+--sorted_result
+SELECT * FROM t1;
+
+# test TRUNCATE
+TRUNCATE TABLE t1;
+
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/optimize_myrocks_replace_into_base.test b/storage/rocksdb/mysql-test/rocksdb/t/optimize_myrocks_replace_into_base.test
new file mode 100644
index 00000000000..b37f532a21e
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/optimize_myrocks_replace_into_base.test
@@ -0,0 +1,96 @@
+--source include/have_rocksdb.inc
+--source include/have_debug.inc
+
+SET @prior_rocksdb_perf_context_level = @@rocksdb_perf_context_level;
+SET GLOBAL rocksdb_perf_context_level=3;
+SET GLOBAL enable_blind_replace=ON;
+
+#
+# case 1: table only with primary key, support replace blind write
+#
+create table t1(c1 int,c2 int, primary key (c1)) engine=rocksdb;
+insert into t1 values(1,1),(2,2),(3,3);
+select * from t1;
+
+select variable_value into @c from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+replace into t1 values(1,11);
+select case when variable_value-@c > 1 then 'false' else 'true' end as read_free from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+drop table t1;
+
+#
+# case 2: table only with primary key but with trigger, not support replace blind write
+#
+create table t1(c1 int,c2 int, primary key (c1)) engine=rocksdb;
+create trigger trg before insert on t1 for each row set @a:=1;
+insert into t1 values(1,1),(2,2),(3,3);
+select * from t1;
+
+select variable_value into @c from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+replace into t1 values(1,11);
+select case when variable_value-@c > 1 then 'false' else 'true' end as read_free from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+drop table t1;
+
+
+#
+# case 3: table without primary key, not support replace blind write
+#
+
+create table t1(c1 int,c2 int) engine=rocksdb;
+insert into t1 values(1,1),(2,2),(3,3);
+select * from t1;
+
+select variable_value into @c from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+replace into t1 values(1,11);
+select case when variable_value-@c > 1 then 'false' else 'true' end as read_free from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+drop table t1;
+
+
+create table t1(c1 int,c2 int unique) engine=rocksdb;
+insert into t1 values(1,1),(2,2),(3,3);
+select * from t1;
+
+select variable_value into @c from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+replace into t1 values(1,11);
+select case when variable_value-@c > 1 then 'false' else 'true' end as read_free from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+drop table t1;
+
+
+
+#
+# case 4: table with primary key and secondary key, not support replace blind write
+#
+create table t1(c1 int primary key,c2 int unique) engine=rocksdb;
+insert into t1 values(1,1),(2,2),(3,3);
+select * from t1;
+
+select variable_value into @c from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+replace into t1 values(1,11);
+select case when variable_value-@c > 1 then 'false' else 'true' end as read_free from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+drop table t1;
+
+
+create table t1(c1 int primary key,c2 int, key idx1(c2)) engine=rocksdb;
+insert into t1 values(1,1),(2,2),(3,3);
+select * from t1;
+
+select variable_value into @c from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+replace into t1 values(1,11);
+select case when variable_value-@c > 1 then 'false' else 'true' end as read_free from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+drop table t1;
+
+
+
+#
+# case 5: Disabling blind replace through enable_blind_replace should work
+SET GLOBAL enable_blind_replace=OFF;
+create table t1(c1 int,c2 int, primary key (c1)) engine=rocksdb;
+insert into t1 values(1,1),(2,2),(3,3);
+select * from t1;
+
+select variable_value into @c from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+replace into t1 values(1,11);
+select case when variable_value-@c > 1 then 'false' else 'true' end as read_free from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+drop table t1;
+
+SET GLOBAL enable_blind_replace=DEFAULT;
+SET GLOBAL rocksdb_perf_context_level = @prior_rocksdb_perf_context_level;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/optimize_myrocks_replace_into_lock.test b/storage/rocksdb/mysql-test/rocksdb/t/optimize_myrocks_replace_into_lock.test
new file mode 100644
index 00000000000..6cce429a5de
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/optimize_myrocks_replace_into_lock.test
@@ -0,0 +1,88 @@
+--source include/have_rocksdb.inc
+--source include/have_debug.inc
+
+# Enable blind replace
+SET GLOBAL enable_blind_replace=ON;
+
+connect (con1,localhost,root,,);
+connect (con2,localhost,root,,);
+
+create table t1(c1 int,c2 int, primary key (c1)) engine=rocksdb;
+insert into t1 values(1,1),(2,2),(3,3);
+select * from t1;
+
+#
+# case 1: update is blocked by replace into
+#
+connection con1;
+SELECT @@global.enable_blind_replace;
+begin;
+replace into t1 values(1,11);
+
+
+connection con2;
+SELECT @@global.enable_blind_replace;
+begin;
+send update t1 set c2=22 where c1=1;
+
+
+connection default;
+# Check that the above update is blocked
+let $wait_condition=
+ select count(*) = 1 from information_schema.processlist
+ where state = 'Waiting for row lock' and
+ info = 'update t1 set c2=22 where c1=1';
+--source include/wait_condition.inc
+
+
+connection con1;
+commit;
+
+connection con2;
+--echo # Reap update.
+--reap
+commit;
+select * from t1;
+
+
+#
+# cast 2: replace into is blocked by update
+#
+
+connection con1;
+SELECT @@global.enable_blind_replace;
+begin;
+update t1 set c2=55 where c1=1;
+
+connection con2;
+SELECT @@global.enable_blind_replace;
+begin;
+send replace into t1 values(1,66);
+
+
+connection default;
+# Check that the above replace into is blocked
+let $wait_condition=
+ select count(*) = 1 from information_schema.processlist
+ where state = 'Waiting for row lock' and
+ info = 'replace into t1 values(1,66)';
+--source include/wait_condition.inc
+
+
+connection con1;
+commit;
+
+connection con2;
+--echo # Reap replace into.
+--reap
+commit;
+select * from t1;
+
+connection default;
+drop table t1;
+
+disconnect con1;
+disconnect con2;
+
+# Disable blind replace
+SET GLOBAL enable_blind_replace=DEFAULT;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/optimize_table-master.opt b/storage/rocksdb/mysql-test/rocksdb/t/optimize_table-master.opt
new file mode 100644
index 00000000000..71f74ee53ab
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/optimize_table-master.opt
@@ -0,0 +1 @@
+--rocksdb_default_cf_options=write_buffer_size=64k;target_file_size_base=64k;max_bytes_for_level_base=1m;compression_per_level=kNoCompression
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/optimize_table.inc b/storage/rocksdb/mysql-test/rocksdb/t/optimize_table.inc
new file mode 100644
index 00000000000..5f66937cef1
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/optimize_table.inc
@@ -0,0 +1,20 @@
+# run a check script to verify sst files reduced enough during each optimize table
+perl;
+
+$size += -s $_ for (<$ENV{datadir}/#rocksdb/*.sst>);
+$file= "$ENV{MYSQL_TMP_DIR}/sst_size.dat";
+
+if (-f $file) {
+ open(F, '<', $file) || die("Can't open file $file: $!");
+ $old = <F>;
+ close F;
+ if ($old - $size < 1e6) {
+ print "sst file reduction was not enough $old -> $size (minimum 1000kb)\n";
+ } else {
+ print "sst file reduction ok\n";
+ }
+}
+open(F, '>', $file) || die("Can't open file $file: $!");
+print F $size;
+close F;
+EOF
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/optimize_table.test b/storage/rocksdb/mysql-test/rocksdb/t/optimize_table.test
new file mode 100644
index 00000000000..7a8f4fc7085
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/optimize_table.test
@@ -0,0 +1,81 @@
+--source include/have_rocksdb.inc
+
+#
+# OPTIMIZE TABLE statements
+#
+
+let datadir = `SELECT @@datadir`;
+
+--disable_warnings
+DROP TABLE IF EXISTS t1,t2,t3,t4,t5,t6;
+--enable_warnings
+create table t1 (id int primary key, value int, value2 varchar(200), index(value)) engine=rocksdb;
+create table t2 (id int primary key, value int, value2 varchar(200), index(value)) engine=rocksdb;
+create table t3 (id int primary key, value int, value2 varchar(200), index(value)) engine=rocksdb;
+create table t4 (id int, value int, value2 varchar(200), primary key (id) comment 'rev:cf_i', index(value) comment 'rev:cf_i') engine=rocksdb;
+create table t5 (id int, value int, value2 varchar(200), primary key (id) comment 'rev:cf_i', index(value) comment 'rev:cf_i') engine=rocksdb;
+create table t6 (id int, value int, value2 varchar(200), primary key (id) comment 'rev:cf_i', index(value) comment 'rev:cf_i') engine=rocksdb;
+
+--disable_query_log
+let $t = 1;
+while ($t <= 6) {
+ let $i = 1;
+ while ($i <= 10000) {
+ let $insert = INSERT INTO t$t VALUES($i, $i, REPEAT('x', 150));
+ inc $i;
+ eval $insert;
+ }
+ inc $t;
+}
+--enable_query_log
+
+# Disable auto compaction so that effects of optimize table are stable
+let $restart_file= $MYSQLTEST_VARDIR/tmp/mysqld.1.expect;
+--exec echo "wait" > $restart_file
+--shutdown_server 10
+--source include/wait_until_disconnected.inc
+-- exec echo "restart:--rocksdb_default_cf_options=write_buffer_size=64k;target_file_size_base=64k;max_bytes_for_level_base=1m;compression_per_level=kNoCompression;disable_auto_compactions=true;level0_stop_writes_trigger=1000 " > $MYSQLTEST_VARDIR/tmp/mysqld.1.expect
+-- enable_reconnect
+-- source include/wait_until_connected_again.inc
+
+select count(*) from t1;
+select count(*) from t2;
+select count(*) from t3;
+select count(*) from t4;
+select count(*) from t5;
+select count(*) from t6;
+delete from t1 where id <= 9900;
+delete from t2 where id <= 9900;
+delete from t3 where id <= 9900;
+delete from t4 where id <= 9900;
+delete from t5 where id <= 9900;
+delete from t6 where id <= 9900;
+
+source optimize_table.inc;
+optimize table t1;
+source optimize_table.inc;
+optimize table t3;
+source optimize_table.inc;
+optimize table t4;
+source optimize_table.inc;
+optimize table t6;
+source optimize_table.inc;
+
+select count(*) from t1;
+select count(*) from t2;
+select count(*) from t3;
+select count(*) from t4;
+select count(*) from t5;
+select count(*) from t6;
+
+#cleanup
+optimize table t2;
+optimize table t5;
+DROP TABLE t1;
+DROP TABLE t2;
+DROP TABLE t3;
+DROP TABLE t4;
+DROP TABLE t5;
+DROP TABLE t6;
+--remove_file $MYSQL_TMP_DIR/sst_size.dat
+
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/optimizer_loose_index_scans.test b/storage/rocksdb/mysql-test/rocksdb/t/optimizer_loose_index_scans.test
new file mode 100644
index 00000000000..db66da3b0dc
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/optimizer_loose_index_scans.test
@@ -0,0 +1,4 @@
+--source include/have_rocksdb.inc
+
+let $engine=rocksdb;
+--source include/loose_index_scans.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/partition.test b/storage/rocksdb/mysql-test/rocksdb/t/partition.test
new file mode 100644
index 00000000000..5954c0d95db
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/partition.test
@@ -0,0 +1,762 @@
+
+--source include/have_rocksdb.inc
+--source include/have_partition.inc
+
+#
+# Create tables with partitions and try to update/select from them.
+#
+--disable_warnings
+DROP TABLE IF EXISTS t1;
+DROP TABLE IF EXISTS VAR_POP;
+DROP TABLE IF EXISTS TEMP0;
+DROP TABLE IF EXISTS VAR_SAMP;
+DROP TABLE IF EXISTS ti;
+DROP TABLE IF EXISTS members;
+DROP TABLE IF EXISTS members_2;
+DROP TABLE IF EXISTS employees;
+DROP TABLE IF EXISTS employees_2;
+DROP TABLE IF EXISTS employees_3;
+DROP TABLE IF EXISTS quarterly_report_status;
+DROP TABLE IF EXISTS employees_4;
+DROP TABLE IF EXISTS h2;
+DROP TABLE IF EXISTS rcx;
+DROP TABLE IF EXISTS r1;
+DROP TABLE IF EXISTS rc1;
+DROP TABLE IF EXISTS rx;
+DROP TABLE IF EXISTS rc2;
+DROP TABLE IF EXISTS rc3;
+DROP TABLE IF EXISTS rc4;
+DROP TABLE IF EXISTS employees_by_lname;
+DROP TABLE IF EXISTS customers_1;
+DROP TABLE IF EXISTS customers_2;
+DROP TABLE IF EXISTS customers_3;
+DROP TABLE IF EXISTS employees_hash;
+DROP TABLE IF EXISTS employees_hash_1;
+DROP TABLE IF EXISTS t1_hash;
+DROP TABLE IF EXISTS employees_linear_hash;
+DROP TABLE IF EXISTS t1_linear_hash;
+DROP TABLE IF EXISTS k1;
+DROP TABLE IF EXISTS k2;
+DROP TABLE IF EXISTS tm1;
+DROP TABLE IF EXISTS tk;
+DROP TABLE IF EXISTS ts;
+DROP TABLE IF EXISTS ts_1;
+DROP TABLE IF EXISTS ts_3;
+DROP TABLE IF EXISTS ts_4;
+DROP TABLE IF EXISTS ts_5;
+DROP TABLE IF EXISTS trb3;
+DROP TABLE IF EXISTS tr;
+DROP TABLE IF EXISTS members_3;
+DROP TABLE IF EXISTS clients;
+DROP TABLE IF EXISTS clients_lk;
+DROP TABLE IF EXISTS trb1;
+
+--enable_warnings
+
+CREATE TABLE t1 (i INT, j INT, k INT, PRIMARY KEY (i)) ENGINE = ROCKSDB PARTITION BY KEY(i) PARTITIONS 4;
+
+--disable_query_log
+let $max = 1000;
+let $i = 1;
+while ($i <= $max) {
+ let $insert = INSERT INTO t1 VALUES ($i, $i, $i);
+ inc $i;
+ eval $insert;
+}
+
+ALTER TABLE t1 REBUILD PARTITION p0, p1;
+ALTER TABLE t1 OPTIMIZE PARTITION p0, p1;
+ALTER TABLE t1 ANALYZE PARTITION p3;
+ALTER TABLE t1 REPAIR PARTITION p0,p1;
+ALTER TABLE t1 CHECK PARTITION p1;
+
+# Parition string is "#P#". To verify that parsing is done correctly then we'll
+# verify if tables containing "P" somwhere can be created correctly.
+CREATE TABLE VAR_POP (a int) ENGINE = ROCKSDB;
+CREATE TABLE TEMP0 (a int) ENGINE = ROCKSDB PARTITION BY HASH (a) PARTITIONS 3;
+CREATE TABLE VAR_SAMP (a int) ENGINE = ROCKSDB PARTITION BY HASH (a) PARTITIONS 10;
+
+--enable_query_log
+
+select lower(table_name) as tname
+from information_schema.tables
+where table_schema=database()
+order by tname;
+
+SELECT * FROM t1 ORDER BY i LIMIT 10;
+SELECT COUNT(*) FROM t1;
+
+#
+# Test-cases above are copied from
+# https://dev.mysql.com/doc/refman/5.6/en/partitioning.html to validate that the
+# partitioning related examples work with MyRocks.
+#
+
+# Create a table that is partitioned by hash into 6 partitions.
+CREATE TABLE ti(
+ id INT,
+ amount DECIMAL(7,2),
+ tr_date DATE
+) ENGINE=ROCKSDB
+ PARTITION BY HASH(MONTH(tr_date))
+ PARTITIONS 6;
+
+CREATE TABLE members (
+ firstname VARCHAR(25) NOT NULL,
+ lastname VARCHAR(25) NOT NULL,
+ username VARCHAR(16) NOT NULL,
+ email VARCHAR(35),
+ joined DATE NOT NULL
+) ENGINE=ROCKSDB
+ PARTITION BY KEY(joined)
+ PARTITIONS 6;
+
+CREATE TABLE members_2 (
+ firstname VARCHAR(25) NOT NULL,
+ lastname VARCHAR(25) NOT NULL,
+ username VARCHAR(16) NOT NULL,
+ email VARCHAR(35),
+ joined DATE NOT NULL
+) ENGINE=ROCKSDB
+ PARTITION BY RANGE(YEAR(joined)) (
+ PARTITION p0 VALUES LESS THAN (1960),
+ PARTITION p1 VALUES LESS THAN (1970),
+ PARTITION p2 VALUES LESS THAN (1980),
+ PARTITION p3 VALUES LESS THAN (1990),
+ PARTITION p4 VALUES LESS THAN MAXVALUE
+ );
+
+# Partition names are not case-sensitive.
+--error 1517
+CREATE TABLE t2 (val INT)
+ ENGINE=ROCKSDB
+ PARTITION BY LIST(val)(
+ PARTITION mypart VALUES IN (1,3,5),
+ PARTITION MyPart VALUES IN (2,4,6)
+ );
+
+CREATE TABLE employees (
+ id INT NOT NULL,
+ fname VARCHAR(30),
+ lname VARCHAR(30),
+ hired DATE NOT NULL DEFAULT '1970-01-01',
+ separated DATE NOT NULL DEFAULT '9999-12-31',
+ job_code INT NOT NULL,
+ store_id INT NOT NULL
+) ENGINE=ROCKSDB
+ PARTITION BY RANGE (store_id) (
+ PARTITION p0 VALUES LESS THAN (6),
+ PARTITION p1 VALUES LESS THAN (11),
+ PARTITION p2 VALUES LESS THAN (16),
+ PARTITION p3 VALUES LESS THAN MAXVALUE
+ );
+
+CREATE TABLE employees_2 (
+ id INT NOT NULL,
+ fname VARCHAR(30),
+ lname VARCHAR(30),
+ hired DATE NOT NULL DEFAULT '1970-01-01',
+ separated DATE NOT NULL DEFAULT '9999-12-31',
+ job_code INT NOT NULL,
+ store_id INT NOT NULL
+) ENGINE=ROCKSDB
+ PARTITION BY RANGE (job_code) (
+ PARTITION p0 VALUES LESS THAN (100),
+ PARTITION p1 VALUES LESS THAN (1000),
+ PARTITION p2 VALUES LESS THAN (10000)
+ );
+
+CREATE TABLE employees_3 (
+ id INT NOT NULL,
+ fname VARCHAR(30),
+ lname VARCHAR(30),
+ hired DATE NOT NULL DEFAULT '1970-01-01',
+ separated DATE NOT NULL DEFAULT '9999-12-31',
+ job_code INT,
+ store_id INT
+) ENGINE=ROCKSDB
+ PARTITION BY RANGE (YEAR(separated)) (
+ PARTITION p0 VALUES LESS THAN (1991),
+ PARTITION p1 VALUES LESS THAN (1996),
+ PARTITION p2 VALUES LESS THAN (2001),
+ PARTITION p3 VALUES LESS THAN MAXVALUE
+ );
+
+CREATE TABLE quarterly_report_status (
+ report_id INT NOT NULL,
+ report_status VARCHAR(20) NOT NULL,
+ report_updated TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP
+) ENGINE=ROCKSDB
+ PARTITION BY RANGE (UNIX_TIMESTAMP(report_updated)) (
+ PARTITION p0 VALUES LESS THAN ( UNIX_TIMESTAMP('2008-01-01 00:00:00') ),
+ PARTITION p1 VALUES LESS THAN ( UNIX_TIMESTAMP('2008-04-01 00:00:00') ),
+ PARTITION p2 VALUES LESS THAN ( UNIX_TIMESTAMP('2008-07-01 00:00:00') ),
+ PARTITION p3 VALUES LESS THAN ( UNIX_TIMESTAMP('2008-10-01 00:00:00') ),
+ PARTITION p4 VALUES LESS THAN ( UNIX_TIMESTAMP('2009-01-01 00:00:00') ),
+ PARTITION p5 VALUES LESS THAN ( UNIX_TIMESTAMP('2009-04-01 00:00:00') ),
+ PARTITION p6 VALUES LESS THAN ( UNIX_TIMESTAMP('2009-07-01 00:00:00') ),
+ PARTITION p7 VALUES LESS THAN ( UNIX_TIMESTAMP('2009-10-01 00:00:00') ),
+ PARTITION p8 VALUES LESS THAN ( UNIX_TIMESTAMP('2010-01-01 00:00:00') ),
+ PARTITION p9 VALUES LESS THAN (MAXVALUE)
+ );
+
+CREATE TABLE employees_4 (
+ id INT NOT NULL,
+ fname VARCHAR(30),
+ lname VARCHAR(30),
+ hired DATE NOT NULL DEFAULT '1970-01-01',
+ separated DATE NOT NULL DEFAULT '9999-12-31',
+ job_code INT,
+ store_id INT
+) ENGINE=ROCKSDB
+ PARTITION BY LIST(store_id) (
+ PARTITION pNorth VALUES IN (3,5,6,9,17),
+ PARTITION pEast VALUES IN (1,2,10,11,19,20),
+ PARTITION pWest VALUES IN (4,12,13,14,18),
+ PARTITION pCentral VALUES IN (7,8,15,16)
+ );
+
+CREATE TABLE h2 (
+ c1 INT,
+ c2 INT
+) ENGINE=ROCKSDB
+ PARTITION BY LIST(c1) (
+ PARTITION p0 VALUES IN (1, 4, 7),
+ PARTITION p1 VALUES IN (2, 5, 8)
+ );
+
+# ERROR 1526 (HY000): Table has no partition for value 3
+--error 1526
+INSERT INTO h2 VALUES (3, 5);
+
+CREATE TABLE rcx (
+ a INT,
+ b INT,
+ c CHAR(3),
+ d INT
+) ENGINE=ROCKSDB
+ PARTITION BY RANGE COLUMNS(a,d,c) (
+ PARTITION p0 VALUES LESS THAN (5,10,'ggg'),
+ PARTITION p1 VALUES LESS THAN (10,20,'mmm'),
+ PARTITION p2 VALUES LESS THAN (15,30,'sss'),
+ PARTITION p3 VALUES LESS THAN (MAXVALUE,MAXVALUE,MAXVALUE)
+ );
+
+CREATE TABLE r1 (
+ a INT,
+ b INT
+) ENGINE=ROCKSDB
+ PARTITION BY RANGE (a) (
+ PARTITION p0 VALUES LESS THAN (5),
+ PARTITION p1 VALUES LESS THAN (MAXVALUE)
+ );
+
+INSERT INTO r1 VALUES (5,10), (5,11), (5,12);
+
+CREATE TABLE rc1 (
+ a INT,
+ b INT
+) ENGINE=ROCKSDB
+ PARTITION BY RANGE COLUMNS(a, b) (
+ PARTITION p0 VALUES LESS THAN (5, 12),
+ PARTITION p3 VALUES LESS THAN (MAXVALUE, MAXVALUE)
+ );
+
+INSERT INTO rc1 VALUES (5,10), (5,11), (5,12);
+SELECT (5,10) < (5,12), (5,11) < (5,12), (5,12) < (5,12);
+
+CREATE TABLE rx (
+ a INT,
+ b INT
+) ENGINE=ROCKSDB
+ PARTITION BY RANGE COLUMNS (a) (
+ PARTITION p0 VALUES LESS THAN (5),
+ PARTITION p1 VALUES LESS THAN (MAXVALUE)
+ );
+
+INSERT INTO rx VALUES (5,10), (5,11), (5,12);
+
+CREATE TABLE rc2 (
+ a INT,
+ b INT
+) ENGINE=ROCKSDB
+ PARTITION BY RANGE COLUMNS(a,b) (
+ PARTITION p0 VALUES LESS THAN (0,10),
+ PARTITION p1 VALUES LESS THAN (10,20),
+ PARTITION p2 VALUES LESS THAN (10,30),
+ PARTITION p3 VALUES LESS THAN (MAXVALUE,MAXVALUE)
+ );
+
+CREATE TABLE rc3 (
+ a INT,
+ b INT
+) ENGINE=ROCKSDB
+ PARTITION BY RANGE COLUMNS(a,b) (
+ PARTITION p0 VALUES LESS THAN (0,10),
+ PARTITION p1 VALUES LESS THAN (10,20),
+ PARTITION p2 VALUES LESS THAN (10,30),
+ PARTITION p3 VALUES LESS THAN (10,35),
+ PARTITION p4 VALUES LESS THAN (20,40),
+ PARTITION p5 VALUES LESS THAN (MAXVALUE,MAXVALUE)
+);
+
+CREATE TABLE rc4 (
+ a INT,
+ b INT,
+ c INT
+) ENGINE=ROCKSDB
+ PARTITION BY RANGE COLUMNS(a,b,c) (
+ PARTITION p0 VALUES LESS THAN (0,25,50),
+ PARTITION p1 VALUES LESS THAN (10,20,100),
+ PARTITION p2 VALUES LESS THAN (10,30,50),
+ PARTITION p3 VALUES LESS THAN (MAXVALUE,MAXVALUE,MAXVALUE)
+ );
+
+SELECT (0,25,50) < (10,20,100), (10,20,100) < (10,30,50);
+
+-- ERROR 1493 (HY000): VALUES LESS THAN value must be strictly increasing for each partition
+
+--error 1493
+CREATE TABLE rcf (
+ a INT,
+ b INT,
+ c INT
+) ENGINE=ROCKSDB
+ PARTITION BY RANGE COLUMNS(a,b,c) (
+ PARTITION p0 VALUES LESS THAN (0,25,50),
+ PARTITION p1 VALUES LESS THAN (20,20,100),
+ PARTITION p2 VALUES LESS THAN (10,30,50),
+ PARTITION p3 VALUES LESS THAN (MAXVALUE,MAXVALUE,MAXVALUE)
+ );
+
+CREATE TABLE employees_by_lname (
+ id INT NOT NULL,
+ fname VARCHAR(30),
+ lname VARCHAR(30),
+ hired DATE NOT NULL DEFAULT '1970-01-01',
+ separated DATE NOT NULL DEFAULT '9999-12-31',
+ job_code INT NOT NULL,
+ store_id INT NOT NULL
+) ENGINE=ROCKSDB
+ PARTITION BY RANGE COLUMNS (lname) (
+ PARTITION p0 VALUES LESS THAN ('g'),
+ PARTITION p1 VALUES LESS THAN ('m'),
+ PARTITION p2 VALUES LESS THAN ('t'),
+ PARTITION p3 VALUES LESS THAN (MAXVALUE)
+ );
+
+ALTER TABLE employees_by_lname PARTITION BY RANGE COLUMNS (lname) (
+ PARTITION p0 VALUES LESS THAN ('g'),
+ PARTITION p1 VALUES LESS THAN ('m'),
+ PARTITION p2 VALUES LESS THAN ('t'),
+ PARTITION p3 VALUES LESS THAN (MAXVALUE)
+);
+
+ALTER TABLE employees_by_lname PARTITION BY RANGE COLUMNS (hired) (
+ PARTITION p0 VALUES LESS THAN ('1970-01-01'),
+ PARTITION p1 VALUES LESS THAN ('1980-01-01'),
+ PARTITION p2 VALUES LESS THAN ('1990-01-01'),
+ PARTITION p3 VALUES LESS THAN ('2000-01-01'),
+ PARTITION p4 VALUES LESS THAN ('2010-01-01'),
+ PARTITION p5 VALUES LESS THAN (MAXVALUE)
+);
+
+CREATE TABLE customers_1 (
+ first_name VARCHAR(25),
+ last_name VARCHAR(25),
+ street_1 VARCHAR(30),
+ street_2 VARCHAR(30),
+ city VARCHAR(15),
+ renewal DATE
+) ENGINE=ROCKSDB
+ PARTITION BY LIST COLUMNS(city) (
+ PARTITION pRegion_1 VALUES IN('Oskarshamn', 'Högsby', 'Mönsterås'),
+ PARTITION pRegion_2 VALUES IN('Vimmerby', 'Hultsfred', 'Västervik'),
+ PARTITION pRegion_3 VALUES IN('Nässjö', 'Eksjö', 'Vetlanda'),
+ PARTITION pRegion_4 VALUES IN('Uppvidinge', 'Alvesta', 'Växjo')
+ );
+
+CREATE TABLE customers_2 (
+ first_name VARCHAR(25),
+ last_name VARCHAR(25),
+ street_1 VARCHAR(30),
+ street_2 VARCHAR(30),
+ city VARCHAR(15),
+ renewal DATE
+) ENGINE=ROCKSDB
+ PARTITION BY LIST COLUMNS(renewal) (
+ PARTITION pWeek_1 VALUES IN('2010-02-01', '2010-02-02', '2010-02-03',
+ '2010-02-04', '2010-02-05', '2010-02-06', '2010-02-07'),
+ PARTITION pWeek_2 VALUES IN('2010-02-08', '2010-02-09', '2010-02-10',
+ '2010-02-11', '2010-02-12', '2010-02-13', '2010-02-14'),
+ PARTITION pWeek_3 VALUES IN('2010-02-15', '2010-02-16', '2010-02-17',
+ '2010-02-18', '2010-02-19', '2010-02-20', '2010-02-21'),
+ PARTITION pWeek_4 VALUES IN('2010-02-22', '2010-02-23', '2010-02-24',
+ '2010-02-25', '2010-02-26', '2010-02-27', '2010-02-28')
+ );
+
+CREATE TABLE customers_3 (
+ first_name VARCHAR(25),
+ last_name VARCHAR(25),
+ street_1 VARCHAR(30),
+ street_2 VARCHAR(30),
+ city VARCHAR(15),
+ renewal DATE
+) ENGINE=ROCKSDB
+ PARTITION BY RANGE COLUMNS(renewal) (
+ PARTITION pWeek_1 VALUES LESS THAN('2010-02-09'),
+ PARTITION pWeek_2 VALUES LESS THAN('2010-02-15'),
+ PARTITION pWeek_3 VALUES LESS THAN('2010-02-22'),
+ PARTITION pWeek_4 VALUES LESS THAN('2010-03-01')
+ );
+
+CREATE TABLE employees_hash (
+ id INT NOT NULL,
+ fname VARCHAR(30),
+ lname VARCHAR(30),
+ hired DATE NOT NULL DEFAULT '1970-01-01',
+ separated DATE NOT NULL DEFAULT '9999-12-31',
+ job_code INT,
+ store_id INT
+) ENGINE=ROCKSDB
+ PARTITION BY HASH(store_id)
+ PARTITIONS 4;
+
+CREATE TABLE employees_hash_1 (
+ id INT NOT NULL,
+ fname VARCHAR(30),
+ lname VARCHAR(30),
+ hired DATE NOT NULL DEFAULT '1970-01-01',
+ separated DATE NOT NULL DEFAULT '9999-12-31',
+ job_code INT,
+ store_id INT
+) ENGINE=ROCKSDB
+ PARTITION BY HASH( YEAR(hired) )
+ PARTITIONS 4;
+
+CREATE TABLE t1_hash (
+ col1 INT,
+ col2 CHAR(5),
+ col3 DATE
+) ENGINE=ROCKSDB
+ PARTITION BY HASH( YEAR(col3) )
+ PARTITIONS 4;
+
+CREATE TABLE employees_linear_hash (
+ id INT NOT NULL,
+ fname VARCHAR(30),
+ lname VARCHAR(30),
+ hired DATE NOT NULL DEFAULT '1970-01-01',
+ separated DATE NOT NULL DEFAULT '9999-12-31',
+ job_code INT,
+ store_id INT
+) ENGINE=ROCKSDB
+ PARTITION BY LINEAR HASH( YEAR(hired) )
+ PARTITIONS 4;
+
+CREATE TABLE t1_linear_hash (
+ col1 INT,
+ col2 CHAR(5),
+ col3 DATE
+) ENGINE=ROCKSDB
+ PARTITION BY LINEAR HASH( YEAR(col3) )
+ PARTITIONS 6;
+
+CREATE TABLE k1 (
+ id INT NOT NULL PRIMARY KEY,
+ name VARCHAR(20)
+) ENGINE=ROCKSDB
+ PARTITION BY KEY()
+ PARTITIONS 2;
+
+CREATE TABLE k2 (
+ id INT NOT NULL,
+ name VARCHAR(20),
+ UNIQUE KEY (id)
+) ENGINE=ROCKSDB
+ PARTITION BY KEY()
+ PARTITIONS 2;
+
+CREATE TABLE tm1 (
+ s1 CHAR(32) PRIMARY KEY
+) ENGINE=ROCKSDB
+ PARTITION BY KEY(s1)
+ PARTITIONS 10;
+
+CREATE TABLE tk (
+ col1 INT NOT NULL,
+ col2 CHAR(5),
+ col3 DATE
+) ENGINE=ROCKSDB
+ PARTITION BY LINEAR KEY (col1)
+ PARTITIONS 3;
+
+CREATE TABLE ts (
+ id INT,
+ purchased DATE
+) ENGINE=ROCKSDB
+ PARTITION BY RANGE( YEAR(purchased) )
+ SUBPARTITION BY HASH( TO_DAYS(purchased) )
+ SUBPARTITIONS 2 (
+ PARTITION p0 VALUES LESS THAN (1990),
+ PARTITION p1 VALUES LESS THAN (2000),
+ PARTITION p2 VALUES LESS THAN MAXVALUE
+ );
+
+CREATE TABLE ts_1 (
+ id INT,
+ purchased DATE
+) ENGINE=ROCKSDB
+ PARTITION BY RANGE( YEAR(purchased) )
+ SUBPARTITION BY HASH( TO_DAYS(purchased) ) (
+ PARTITION p0 VALUES LESS THAN (1990) (
+ SUBPARTITION s0,
+ SUBPARTITION s1
+ ),
+ PARTITION p1 VALUES LESS THAN (2000) (
+ SUBPARTITION s2,
+ SUBPARTITION s3
+ ),
+ PARTITION p2 VALUES LESS THAN MAXVALUE (
+ SUBPARTITION s4,
+ SUBPARTITION s5
+ )
+ );
+
+--error 1064
+CREATE TABLE ts_2 (
+ id INT,
+ purchased DATE
+) ENGINE=ROCKSDB
+ PARTITION BY RANGE( YEAR(purchased) )
+ SUBPARTITION BY HASH( TO_DAYS(purchased) ) (
+ PARTITION p0 VALUES LESS THAN (1990) (
+ SUBPARTITION s0,
+ SUBPARTITION s1
+ ),
+ PARTITION p1 VALUES LESS THAN (2000),
+ PARTITION p2 VALUES LESS THAN MAXVALUE (
+ SUBPARTITION s2,
+ SUBPARTITION s3
+ )
+ );
+
+CREATE TABLE ts_3 (
+ id INT,
+ purchased DATE
+) ENGINE=ROCKSDB
+ PARTITION BY RANGE( YEAR(purchased) )
+ SUBPARTITION BY HASH( TO_DAYS(purchased) ) (
+ PARTITION p0 VALUES LESS THAN (1990) (
+ SUBPARTITION s0,
+ SUBPARTITION s1
+ ),
+ PARTITION p1 VALUES LESS THAN (2000) (
+ SUBPARTITION s2,
+ SUBPARTITION s3
+ ),
+ PARTITION p2 VALUES LESS THAN MAXVALUE (
+ SUBPARTITION s4,
+ SUBPARTITION s5
+ )
+ );
+
+CREATE TABLE ts_4 (
+ id INT,
+ purchased DATE
+) ENGINE=ROCKSDB
+ PARTITION BY RANGE( YEAR(purchased) )
+ SUBPARTITION BY HASH( TO_DAYS(purchased) ) (
+ PARTITION p0 VALUES LESS THAN (1990) (
+ SUBPARTITION s0,
+ SUBPARTITION s1
+ ),
+ PARTITION p1 VALUES LESS THAN (2000) (
+ SUBPARTITION s2,
+ SUBPARTITION s3
+ ),
+ PARTITION p2 VALUES LESS THAN MAXVALUE (
+ SUBPARTITION s4,
+ SUBPARTITION s5
+ )
+ );
+
+CREATE TABLE ts_5 (
+ id INT,
+ purchased DATE
+) ENGINE=ROCKSDB
+ PARTITION BY RANGE(YEAR(purchased))
+ SUBPARTITION BY HASH( TO_DAYS(purchased) ) (
+ PARTITION p0 VALUES LESS THAN (1990) (
+ SUBPARTITION s0a,
+ SUBPARTITION s0b
+ ),
+ PARTITION p1 VALUES LESS THAN (2000) (
+ SUBPARTITION s1a,
+ SUBPARTITION s1b
+ ),
+ PARTITION p2 VALUES LESS THAN MAXVALUE (
+ SUBPARTITION s2a,
+ SUBPARTITION s2b
+ )
+ );
+
+CREATE TABLE trb3 (
+ id INT,
+ name VARCHAR(50),
+ purchased DATE
+) ENGINE=ROCKSDB
+ PARTITION BY RANGE( YEAR(purchased) ) (
+ PARTITION p0 VALUES LESS THAN (1990),
+ PARTITION p1 VALUES LESS THAN (1995),
+ PARTITION p2 VALUES LESS THAN (2000),
+ PARTITION p3 VALUES LESS THAN (2005)
+ );
+
+ALTER TABLE trb3 PARTITION BY KEY(id) PARTITIONS 2;
+
+CREATE TABLE tr (
+ id INT,
+ name VARCHAR(50),
+ purchased DATE
+) ENGINE=ROCKSDB
+ PARTITION BY RANGE( YEAR(purchased) ) (
+ PARTITION p0 VALUES LESS THAN (1990),
+ PARTITION p1 VALUES LESS THAN (1995),
+ PARTITION p2 VALUES LESS THAN (2000),
+ PARTITION p3 VALUES LESS THAN (2005)
+ );
+
+INSERT INTO tr VALUES
+ (1, 'desk organiser', '2003-10-15'),
+ (2, 'CD player', '1993-11-05'),
+ (3, 'TV set', '1996-03-10'),
+ (4, 'bookcase', '1982-01-10'),
+ (5, 'exercise bike', '2004-05-09'),
+ (6, 'sofa', '1987-06-05'),
+ (7, 'popcorn maker', '2001-11-22'),
+ (8, 'aquarium', '1992-08-04'),
+ (9, 'study desk', '1984-09-16'),
+ (10, 'lava lamp', '1998-12-25');
+
+SELECT * FROM tr WHERE purchased BETWEEN '1995-01-01' AND '1999-12-31';
+
+ALTER TABLE tr DROP PARTITION p2;
+
+SELECT * FROM tr WHERE purchased BETWEEN '1995-01-01' AND '1999-12-31';
+
+CREATE TABLE members_3 (
+ id INT,
+ fname VARCHAR(25),
+ lname VARCHAR(25),
+ dob DATE
+) ENGINE=ROCKSDB
+ PARTITION BY RANGE( YEAR(dob) ) (
+ PARTITION p0 VALUES LESS THAN (1970),
+ PARTITION p1 VALUES LESS THAN (1980),
+ PARTITION p2 VALUES LESS THAN (1990)
+ );
+
+ALTER TABLE members_3 ADD PARTITION (PARTITION p3 VALUES LESS THAN (2000));
+
+# ERROR 1493 (HY000): VALUES LESS THAN value must be strictly increasing for each partition
+--error 1493
+ALTER TABLE members_3 ADD PARTITION (PARTITION n VALUES LESS THAN (1960));
+
+CREATE TABLE clients (
+ id INT,
+ fname VARCHAR(30),
+ lname VARCHAR(30),
+ signed DATE
+) ENGINE=ROCKSDB
+ PARTITION BY HASH( MONTH(signed) )
+ PARTITIONS 12;
+
+ALTER TABLE clients COALESCE PARTITION 4;
+
+CREATE TABLE clients_lk (
+ id INT,
+ fname VARCHAR(30),
+ lname VARCHAR(30),
+ signed DATE
+) ENGINE=ROCKSDB
+ PARTITION BY LINEAR KEY(signed)
+ PARTITIONS 12;
+
+# ERROR 1508 (HY000): Cannot remove all partitions, use DROP TABLE instead
+--error 1508
+ALTER TABLE clients COALESCE PARTITION 18;
+
+ALTER TABLE clients ADD PARTITION PARTITIONS 6;
+
+CREATE TABLE trb1 (
+ id INT,
+ name VARCHAR(50),
+ purchased DATE
+) ENGINE=ROCKSDB
+ PARTITION BY RANGE(id) (
+ PARTITION p0 VALUES LESS THAN (3),
+ PARTITION p1 VALUES LESS THAN (7),
+ PARTITION p2 VALUES LESS THAN (9),
+ PARTITION p3 VALUES LESS THAN (11)
+ );
+
+INSERT INTO trb1 VALUES
+ (1, 'desk organiser', '2003-10-15'),
+ (2, 'CD player', '1993-11-05'),
+ (3, 'TV set', '1996-03-10'),
+ (4, 'bookcase', '1982-01-10'),
+ (5, 'exercise bike', '2004-05-09'),
+ (6, 'sofa', '1987-06-05'),
+ (7, 'popcorn maker', '2001-11-22'),
+ (8, 'aquarium', '1992-08-04'),
+ (9, 'study desk', '1984-09-16'),
+ (10, 'lava lamp', '1998-12-25');
+
+ALTER TABLE trb1 ADD PRIMARY KEY (id);
+
+# Clean up.
+DROP TABLE IF EXISTS t1;
+DROP TABLE IF EXISTS VAR_POP;
+DROP TABLE IF EXISTS TEMP0;
+DROP TABLE IF EXISTS VAR_SAMP;
+DROP TABLE IF EXISTS ti;
+DROP TABLE IF EXISTS members;
+DROP TABLE IF EXISTS members_2;
+DROP TABLE IF EXISTS employees;
+DROP TABLE IF EXISTS employees_2;
+DROP TABLE IF EXISTS employees_3;
+DROP TABLE IF EXISTS quarterly_report_status;
+DROP TABLE IF EXISTS employees_4;
+DROP TABLE IF EXISTS h2;
+DROP TABLE IF EXISTS rcx;
+DROP TABLE IF EXISTS r1;
+DROP TABLE IF EXISTS rc1;
+DROP TABLE IF EXISTS rx;
+DROP TABLE IF EXISTS rc2;
+DROP TABLE IF EXISTS rc3;
+DROP TABLE IF EXISTS rc4;
+DROP TABLE IF EXISTS employees_by_lname;
+DROP TABLE IF EXISTS customers_1;
+DROP TABLE IF EXISTS customers_2;
+DROP TABLE IF EXISTS customers_3;
+DROP TABLE IF EXISTS employees_hash;
+DROP TABLE IF EXISTS employees_hash_1;
+DROP TABLE IF EXISTS t1_hash;
+DROP TABLE IF EXISTS employees_linear_hash;
+DROP TABLE IF EXISTS t1_linear_hash;
+DROP TABLE IF EXISTS k1;
+DROP TABLE IF EXISTS k2;
+DROP TABLE IF EXISTS tm1;
+DROP TABLE IF EXISTS tk;
+DROP TABLE IF EXISTS ts;
+DROP TABLE IF EXISTS ts_1;
+DROP TABLE IF EXISTS ts_3;
+DROP TABLE IF EXISTS ts_4;
+DROP TABLE IF EXISTS ts_5;
+DROP TABLE IF EXISTS trb3;
+DROP TABLE IF EXISTS tr;
+DROP TABLE IF EXISTS members_3;
+DROP TABLE IF EXISTS clients;
+DROP TABLE IF EXISTS clients_lk;
+DROP TABLE IF EXISTS trb1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/percona_nonflushing_analyze_debug.test b/storage/rocksdb/mysql-test/rocksdb/t/percona_nonflushing_analyze_debug.test
new file mode 100644
index 00000000000..c0c73e683bf
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/percona_nonflushing_analyze_debug.test
@@ -0,0 +1,11 @@
+--source include/have_debug_sync.inc
+--source include/have_rocksdb.inc
+
+CREATE TABLE t1 (a INT PRIMARY KEY) ENGINE=RocksDB;
+INSERT INTO t1 VALUES (1), (2), (3);
+
+--let $percona_nonflushing_analyze_table= t1
+--source include/percona_nonflushing_analyze_debug.inc
+
+DROP TABLE t1;
+
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/perf_context.test b/storage/rocksdb/mysql-test/rocksdb/t/perf_context.test
new file mode 100644
index 00000000000..ee41324a34d
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/perf_context.test
@@ -0,0 +1,96 @@
+--source include/have_rocksdb.inc
+
+#
+# Information Schema perf context
+#
+
+--disable_warnings
+DROP TABLE IF EXISTS t1;
+DROP TABLE IF EXISTS t2;
+--enable_warnings
+
+
+SET @prior_rocksdb_perf_context_level = @@rocksdb_perf_context_level;
+SET GLOBAL rocksdb_perf_context_level=3;
+
+CREATE TABLE t1 (i INT, j INT, PRIMARY KEY (i)) ENGINE = ROCKSDB;
+CREATE TABLE t2 (k INT, PRIMARY KEY (k)) ENGINE = ROCKSDB;
+
+INSERT INTO t1 VALUES (1,1), (2,2), (3,3), (4,4), (5,5);
+
+###############################################################################
+# Test that expected perf context stats exists
+###############################################################################
+
+# Check per-table perf context
+--replace_column 5 #
+SELECT * FROM INFORMATION_SCHEMA.ROCKSDB_PERF_CONTEXT WHERE TABLE_NAME = 't1';
+
+# Check global perf context
+--replace_column 2 #
+SELECT * FROM INFORMATION_SCHEMA.ROCKSDB_PERF_CONTEXT_GLOBAL;
+
+###############################################################################
+# Test iteration skip counters
+###############################################################################
+
+SELECT * FROM INFORMATION_SCHEMA.ROCKSDB_PERF_CONTEXT
+WHERE TABLE_NAME = 't1'
+AND STAT_TYPE in ('INTERNAL_KEY_SKIPPED_COUNT', 'INTERNAL_DELETE_SKIPPED_COUNT');
+
+SELECT * FROM t1;
+
+SELECT * FROM INFORMATION_SCHEMA.ROCKSDB_PERF_CONTEXT
+WHERE TABLE_NAME = 't1'
+AND STAT_TYPE in ('INTERNAL_KEY_SKIPPED_COUNT', 'INTERNAL_DELETE_SKIPPED_COUNT');
+
+SELECT * FROM t1 WHERE j BETWEEN 1 AND 5;
+
+SELECT * FROM INFORMATION_SCHEMA.ROCKSDB_PERF_CONTEXT
+WHERE TABLE_NAME = 't1'
+AND STAT_TYPE in ('INTERNAL_KEY_SKIPPED_COUNT', 'INTERNAL_DELETE_SKIPPED_COUNT');
+
+###############################################################################
+# Test write I/O stats
+###############################################################################
+
+# Statistics for multi-statement transactions cannot be attributed to
+# individual tables but should show up in global perf context stats
+set @tmp_flush_log= @@rocksdb_flush_log_at_trx_commit;
+set global rocksdb_flush_log_at_trx_commit=1;
+
+BEGIN;
+INSERT INTO t2 VALUES (1), (2);
+INSERT INTO t2 VALUES (3), (4);
+COMMIT;
+
+SELECT COUNT(*) from INFORMATION_SCHEMA.ROCKSDB_PERF_CONTEXT
+WHERE TABLE_NAME = 't2'
+AND STAT_TYPE = 'IO_WRITE_NANOS'
+AND VALUE > 0;
+
+SELECT COUNT(*) from INFORMATION_SCHEMA.ROCKSDB_PERF_CONTEXT_GLOBAL
+WHERE STAT_TYPE = 'IO_WRITE_NANOS' AND VALUE > 0;
+
+SELECT VALUE INTO @a from INFORMATION_SCHEMA.ROCKSDB_PERF_CONTEXT_GLOBAL
+WHERE STAT_TYPE = 'IO_WRITE_NANOS';
+
+# Single statement writes do show up in per-table stats
+INSERT INTO t2 VALUES (5), (6), (7), (8);
+
+SELECT COUNT(*) from INFORMATION_SCHEMA.ROCKSDB_PERF_CONTEXT
+WHERE TABLE_NAME = 't2'
+AND STAT_TYPE = 'IO_WRITE_NANOS'
+AND VALUE > 0;
+
+SELECT VALUE INTO @b from INFORMATION_SCHEMA.ROCKSDB_PERF_CONTEXT_GLOBAL
+WHERE STAT_TYPE = 'IO_WRITE_NANOS';
+
+SELECT CASE WHEN @b - @a > 0 THEN 'true' ELSE 'false' END;
+
+# cleanup
+DROP TABLE t1;
+DROP TABLE t2;
+SET GLOBAL rocksdb_perf_context_level = @prior_rocksdb_perf_context_level;
+set global rocksdb_flush_log_at_trx_commit= @tmp_flush_log;
+
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/persistent_cache.test b/storage/rocksdb/mysql-test/rocksdb/t/persistent_cache.test
new file mode 100644
index 00000000000..03d1d0a60bc
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/persistent_cache.test
@@ -0,0 +1,41 @@
+--source include/have_rocksdb.inc
+
+--disable_warnings
+DROP TABLE IF EXISTS t1;
+--enable_warnings
+
+--let $_server_id= `SELECT @@server_id`
+--let $_expect_file_name= $MYSQLTEST_VARDIR/tmp/mysqld.$_server_id.expect
+--let $_cache_file_name= $MYSQLTEST_VARDIR/tmp/persistent_cache
+--exec echo "wait" >$_expect_file_name
+
+# restart server with correct parameters
+shutdown_server 10;
+--exec echo "restart:--rocksdb_persistent_cache_path=$_cache_file_name --rocksdb_persistent_cache_size_mb=100" >$_expect_file_name
+--sleep 5
+--enable_reconnect
+--source include/wait_until_connected_again.inc
+--disable_reconnect
+
+
+# insert values and flush out of memtable
+CREATE TABLE t1 (a int primary key) ENGINE=ROCKSDB;
+insert into t1 values (1);
+set global rocksdb_force_flush_memtable_now=1;
+
+# pull data through cache
+select * from t1 where a = 1;
+
+# restart server to re-read cache
+--exec echo "wait" >$_expect_file_name
+shutdown_server 10;
+--exec echo "restart:--rocksdb_persistent_cache_path=$_cache_file_name --rocksdb_persistent_cache_size_mb=100" >$_expect_file_name
+--sleep 5
+--enable_reconnect
+--source include/wait_until_connected_again.inc
+--disable_reconnect
+
+# pull values from cache again
+select * from t1 where a = 1;
+
+drop table t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/prefix_extractor_override-master.opt b/storage/rocksdb/mysql-test/rocksdb/t/prefix_extractor_override-master.opt
new file mode 100644
index 00000000000..ca7e3636645
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/prefix_extractor_override-master.opt
@@ -0,0 +1 @@
+--rocksdb_default_cf_options=write_buffer_size=64k;block_based_table_factory={filter_policy=bloomfilter:10:false;whole_key_filtering=0;};prefix_extractor=capped:24;disable_auto_compactions=true
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/prefix_extractor_override.test b/storage/rocksdb/mysql-test/rocksdb/t/prefix_extractor_override.test
new file mode 100644
index 00000000000..8fa43e15827
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/prefix_extractor_override.test
@@ -0,0 +1,96 @@
+--source include/have_rocksdb.inc
+
+--disable_warnings
+DROP TABLE IF EXISTS t1;
+--enable_warnings
+
+CREATE TABLE t1 (id1 BIGINT, id2 BIGINT, id3 BIGINT, id4 BIGINT, PRIMARY KEY (id1, id2, id3, id4) comment 'cf1') ENGINE=rocksdb collate latin1_bin;
+--disable_query_log
+let $i = 1;
+while ($i <= 100) {
+ let $insert = INSERT INTO t1 VALUES(1, $i, $i, $i);
+ eval $insert;
+ inc $i;
+}
+--enable_query_log
+set global rocksdb_force_flush_memtable_now = 1;
+
+--echo
+--echo Original Prefix Extractor:
+--echo
+--sorted_result
+SELECT * FROM information_schema.rocksdb_cf_options WHERE option_type like '%prefix_extractor%';
+
+# BF used (4+8+8+8)
+select variable_value into @u from information_schema.global_status where variable_name='rocksdb_bloom_filter_prefix_checked';
+SELECT COUNT(*) FROM t1 WHERE id1=1 AND id2=1 AND id3=1;
+select variable_value-@u from information_schema.global_status where variable_name='rocksdb_bloom_filter_prefix_checked';
+
+--let $_mysqld_option=--rocksdb_override_cf_options=cf1={prefix_extractor=capped:26};
+
+--echo
+--echo Prefix Extractor (after override_cf_options set, should not be changed):
+--echo
+--sorted_result
+SELECT * FROM information_schema.rocksdb_cf_options WHERE option_type like '%prefix_extractor%';
+
+SET @@global.rocksdb_update_cf_options = 'cf1={prefix_extractor=capped:26};';
+--echo
+--echo Changed Prefix Extractor (after update_cf_options set, without restart):
+--echo
+--sorted_result
+# Restart no longer needed
+SELECT * FROM information_schema.rocksdb_cf_options WHERE option_type like '%prefix_extractor%';
+
+# set cf_options for non-existent cf2, cf2 should be created automatically
+SET @@global.rocksdb_update_cf_options = 'cf2={prefix_extractor=capped:28};';
+SELECT * FROM information_schema.rocksdb_cf_options WHERE option_type like '%prefix_extractor%';
+
+# Satisfies can_use_bloom_filter (4+8+8+8), but can't use because the old SST
+# files have old prefix extractor
+select variable_value into @u from information_schema.global_status where variable_name='rocksdb_bloom_filter_prefix_checked';
+SELECT COUNT(*) FROM t1 WHERE id1=1 AND id2=1 AND id3=1;
+select variable_value-@u from information_schema.global_status where variable_name='rocksdb_bloom_filter_prefix_checked';
+
+# Insert more data into t1, verify it uses new bloom filter
+--disable_query_log
+let $i = 101;
+while ($i <= 200) {
+ let $insert = INSERT INTO t1 VALUES(1, $i, $i, $i);
+ eval $insert;
+ inc $i;
+}
+--enable_query_log
+
+set global rocksdb_force_flush_memtable_now = 1;
+
+# BF used w/ new prefix extractor (4+8+8+8) (still increments once because it
+# needs to check the new SST file, but doesnt increment for SST file with old
+# extractor)
+select variable_value into @u from information_schema.global_status where variable_name='rocksdb_bloom_filter_prefix_checked';
+SELECT COUNT(*) FROM t1 WHERE id1=1 AND id2=1 AND id3=1;
+select variable_value-@u from information_schema.global_status where variable_name='rocksdb_bloom_filter_prefix_checked';
+
+# should have 2 sst files, one with old prefix extractor and one with new
+SELECT COUNT(*) FROM information_schema.rocksdb_index_file_map WHERE COLUMN_FAMILY != 1;
+
+# update some old data, force compaction, verify that new SST files use
+# new bloom filter
+UPDATE t1 SET id1=1,id2 = 30,id3 = 30 WHERE id4 >= 0 AND id4 <=10;
+set global rocksdb_force_flush_memtable_now = 1;
+
+# should have 3 sst files, one with old prefix extractor and two with new
+SELECT COUNT(*) FROM information_schema.rocksdb_index_file_map WHERE COLUMN_FAMILY != 1;
+select variable_value into @u from information_schema.global_status where variable_name='rocksdb_bloom_filter_prefix_checked';
+SELECT COUNT(*) FROM t1 WHERE id1=1 AND id2=1 AND id3=1;
+select variable_value-@u from information_schema.global_status where variable_name='rocksdb_bloom_filter_prefix_checked';
+
+SET @@global.rocksdb_update_cf_options = '';
+set global rocksdb_compact_cf='cf1';
+
+# Select the updated, make sure bloom filter is checked now
+select variable_value into @u from information_schema.global_status where variable_name='rocksdb_bloom_filter_prefix_checked';
+SELECT COUNT(*) FROM t1 WHERE id1=1 AND id2=30 AND id3=30;
+select variable_value-@u from information_schema.global_status where variable_name='rocksdb_bloom_filter_prefix_checked';
+
+DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/read_only_tx-master.opt b/storage/rocksdb/mysql-test/rocksdb/t/read_only_tx-master.opt
new file mode 100644
index 00000000000..221b35c672a
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/read_only_tx-master.opt
@@ -0,0 +1 @@
+--rocksdb_default_cf_options=write_buffer_size=16k --log-bin --binlog_format=row --log-slave-updates
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/read_only_tx.test b/storage/rocksdb/mysql-test/rocksdb/t/read_only_tx.test
new file mode 100644
index 00000000000..3a1025a3623
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/read_only_tx.test
@@ -0,0 +1,70 @@
+--source include/have_log_bin.inc
+--source include/have_rocksdb.inc
+--source include/count_sessions.inc
+--disable_warnings
+#--source include/have_gtid.inc
+--enable_warnings
+#-- let $uuid = `select @@server_uuid;`
+
+--disable_warnings
+DROP TABLE IF EXISTS t1;
+--enable_warnings
+
+connect (con1,localhost,root,,);
+connect (con2,localhost,root,,);
+
+connection con1;
+CREATE TABLE t1 (id INT, value int, PRIMARY KEY (id), INDEX (value)) ENGINE=RocksDB;
+INSERT INTO t1 VALUES (1,1);
+
+# Read-only, long-running transaction. SingleDelete/Put shouldn't increase much.
+select variable_value into @p from information_schema.global_status where variable_name='rocksdb_number_sst_entry_put';
+select variable_value into @s from information_schema.global_status where variable_name='rocksdb_number_sst_entry_singledelete';
+#-- replace_result $uuid uuid
+START TRANSACTION WITH CONSISTENT SNAPSHOT;
+
+connection con2;
+--disable_query_log
+let $i = 1;
+while ($i <= 10000) {
+ let $update = UPDATE t1 SET value=value+1 WHERE id=1;
+ inc $i;
+ eval $update;
+}
+--enable_query_log
+
+connection con1;
+select case when variable_value-@p < 1000 then 'true' else variable_value-@p end from information_schema.global_status where variable_name='rocksdb_number_sst_entry_put';
+select case when variable_value-@s < 100 then 'true' else variable_value-@s end from information_schema.global_status where variable_name='rocksdb_number_sst_entry_singledelete';
+SELECT * FROM t1;
+--error ER_UPDATES_WITH_CONSISTENT_SNAPSHOT
+INSERT INTO t1 values (2, 2);
+ROLLBACK;
+SELECT * FROM t1;
+INSERT INTO t1 values (2, 2);
+SELECT * FROM t1 ORDER BY id;
+
+# Regular long-running transaction.
+# No "Transaction could not check for conflicts for opearation" error should happen.
+BEGIN;
+
+connection con2;
+--disable_query_log
+let $i = 5;
+while ($i <= 10000) {
+ let $insert = INSERT INTO t1 VALUES ($i, $i);
+ inc $i;
+ eval $insert;
+}
+--enable_query_log
+
+connection con1;
+SELECT COUNT(*) FROM t1;
+COMMIT;
+
+connection default;
+disconnect con1;
+disconnect con2;
+OPTIMIZE TABLE t1;
+DROP TABLE t1;
+reset master;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/records_in_range-master.opt b/storage/rocksdb/mysql-test/rocksdb/t/records_in_range-master.opt
new file mode 100644
index 00000000000..75a17cc157e
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/records_in_range-master.opt
@@ -0,0 +1,3 @@
+--rocksdb_debug_optimizer_n_rows=20000
+--rocksdb_records_in_range=1000
+--rocksdb_table_stats_sampling_pct=100
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/records_in_range.test b/storage/rocksdb/mysql-test/rocksdb/t/records_in_range.test
new file mode 100644
index 00000000000..68fb1a09526
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/records_in_range.test
@@ -0,0 +1,146 @@
+--source include/have_rocksdb.inc
+--source include/restart_mysqld.inc
+--source include/default_optimizer_switch.inc
+
+--disable_warnings
+DROP TABLE IF EXISTS t1;
+--enable_warnings
+
+# Create the table and insert some keys
+CREATE TABLE t1 (
+ i INT,
+ a INT,
+ b INT,
+ PRIMARY KEY (i),
+ KEY ka(a),
+ KEY kb(b) comment 'rev:cf1'
+) ENGINE = rocksdb;
+
+--disable_query_log
+let $max = 20000;
+let $i = 1;
+while ($i <= $max) {
+ let $insert = INSERT INTO t1 VALUES ($i, $i, $i);
+ inc $i;
+ eval $insert;
+}
+--enable_query_log
+
+# get results for records_in_range prior to memtable flush
+# normal CF
+explain extended select * from t1 where a> 500 and a< 750;
+explain extended select * from t1 where a< 750;
+explain extended select * from t1 where a> 500;
+explain extended select * from t1 where a>=0 and a<=1000;
+
+#reverse CF
+explain extended select * from t1 where b> 500 and b< 750;
+explain extended select * from t1 where b< 750;
+explain extended select * from t1 where b> 500;
+explain extended select * from t1 where b>=0 and b<=1000;
+
+## cost calculation differences between covering vs non-covering (#298)
+set @save_rocksdb_records_in_range = @@session.rocksdb_records_in_range;
+set rocksdb_records_in_range = 15000;
+# covering, range
+explain extended select a from t1 where a < 750;
+# non-covering, full
+explain extended select a, b from t1 where a < 750;
+# covering, ref
+explain extended select a from t1 where a = 700;
+# non-covering, ref
+explain extended select a,b from t1 where a = 700;
+# covering, full index
+explain extended select a from t1 where a in (700, 800);
+# non-covering, full
+explain extended select a,b from t1 where a in (700, 800);
+set rocksdb_records_in_range=8000;
+# covering, range
+explain extended select a from t1 where a in (700, 800);
+# non-covering, full
+explain extended select a,b from t1 where a in (700, 800);
+set rocksdb_records_in_range = @save_rocksdb_records_in_range;
+
+# flush memtable and repeat
+set global rocksdb_force_flush_memtable_now = true;
+# normal CF
+explain extended select * from t1 where a> 500 and a< 750;
+explain extended select * from t1 where a< 750;
+explain extended select * from t1 where a> 500;
+explain extended select * from t1 where a>=0 and a<=1000;
+
+#reverse CF
+explain extended select * from t1 where b> 500 and b< 750;
+explain extended select * from t1 where b< 750;
+explain extended select * from t1 where b> 500;
+explain extended select * from t1 where b>=0 and b<=1000;
+
+# a set of 1
+explain extended select * from t1 where a>= 500 and a<= 500;
+explain extended select * from t1 where b>= 500 and b<= 500;
+
+# two indexes
+explain extended select * from t1 where a< 750 and b> 500 and b< 750;
+
+# composite index
+drop index ka on t1;
+drop index kb on t1;
+create index kab on t1(a,b);
+set global rocksdb_force_flush_memtable_now = true;
+explain extended select * from t1 where a< 750 and b> 500 and b< 750;
+
+# override records in range
+set rocksdb_records_in_range=444;
+explain extended select * from t1 where a< 750 and b> 500 and b< 750;
+set rocksdb_records_in_range=0;
+
+# issue 82
+## forward cf
+CREATE TABLE `linktable` (
+ `id1` bigint(20) unsigned NOT NULL DEFAULT '0',
+ `id1_type` int(10) unsigned NOT NULL DEFAULT '0',
+ `id2` bigint(20) unsigned NOT NULL DEFAULT '0',
+ `id2_type` int(10) unsigned NOT NULL DEFAULT '0',
+ `link_type` bigint(20) unsigned NOT NULL DEFAULT '0',
+ `visibility` tinyint(3) NOT NULL DEFAULT '0',
+ `data` varchar(255) COLLATE latin1_bin NOT NULL DEFAULT '',
+ `time` bigint(20) unsigned NOT NULL DEFAULT '0',
+ `version` int(11) unsigned NOT NULL DEFAULT '0',
+ PRIMARY KEY (`link_type`,`id1`,`id2`) COMMENT 'cf_link_pk',
+ KEY `id1_type` (`id1`,`link_type`,`visibility`,`time`,`version`,`data`) COMMENT 'cf_link_id1_type'
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1 COLLATE=latin1_bin;
+
+insert into linktable values (1,1,1,1,1,1,1,1,1);
+insert into linktable values (1,1,2,1,1,1,1,1,1);
+insert into linktable values (1,1,3,1,1,1,1,1,1);
+insert into linktable values (1,1,4,1,1,1,1,1,1);
+set global rocksdb_force_flush_memtable_now = true;
+explain select id1, id2, link_type, visibility, data, time, version from linktable where id1 = 1 and link_type = 1 and id2 in (1, 2);
+drop table linktable;
+
+## rev cf
+CREATE TABLE `linktable` (
+ `id1` bigint(20) unsigned NOT NULL DEFAULT '0',
+ `id1_type` int(10) unsigned NOT NULL DEFAULT '0',
+ `id2` bigint(20) unsigned NOT NULL DEFAULT '0',
+ `id2_type` int(10) unsigned NOT NULL DEFAULT '0',
+ `link_type` bigint(20) unsigned NOT NULL DEFAULT '0',
+ `visibility` tinyint(3) NOT NULL DEFAULT '0',
+ `data` varchar(255) COLLATE latin1_bin NOT NULL DEFAULT '',
+ `time` bigint(20) unsigned NOT NULL DEFAULT '0',
+ `version` int(11) unsigned NOT NULL DEFAULT '0',
+ PRIMARY KEY (`link_type`,`id1`,`id2`) COMMENT 'cf_link_pk',
+ KEY `id1_type` (`id1`,`link_type`,`visibility`,`time`,`version`,`data`) COMMENT 'rev:cf_link_id1_type'
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1 COLLATE=latin1_bin;
+
+insert into linktable values (1,1,1,1,1,1,1,1,1);
+insert into linktable values (1,1,2,1,1,1,1,1,1);
+insert into linktable values (1,1,3,1,1,1,1,1,1);
+insert into linktable values (1,1,4,1,1,1,1,1,1);
+set global rocksdb_force_flush_memtable_now = true;
+explain select id1, id2, link_type, visibility, data, time, version from linktable where id1 = 1 and link_type = 1 and id2 in (1, 2);
+drop table linktable;
+
+#cleanup
+DROP TABLE t1;
+
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/repair_table.inc b/storage/rocksdb/mysql-test/rocksdb/t/repair_table.inc
new file mode 100644
index 00000000000..47d6a299095
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/repair_table.inc
@@ -0,0 +1,38 @@
+#
+# REPAIR TABLE statements
+#
+# Note: the output is likely to be different for the engine under test,
+# in which case rdiff will be needed. Or, the output might say that
+# the storage engine does not support REPAIR.
+#
+
+--disable_warnings
+DROP TABLE IF EXISTS t1,t2;
+--enable_warnings
+
+CREATE TABLE t1 (a INT PRIMARY KEY, b CHAR(8)) ENGINE=rocksdb;
+INSERT INTO t1 (a,b) VALUES (1,'a'),(2,'b');
+CREATE TABLE t2 (a INT, b CHAR(8) PRIMARY KEY) ENGINE=rocksdb;
+
+REPAIR TABLE t1;
+
+INSERT INTO t1 (a,b) VALUES (3,'c');
+INSERT INTO t2 (a,b) VALUES (4,'d');
+REPAIR NO_WRITE_TO_BINLOG TABLE t1, t2;
+INSERT INTO t2 (a,b) VALUES (5,'e'),(6,'f');
+REPAIR LOCAL TABLE t2;
+INSERT INTO t1 (a,b) VALUES (7,'g'),(8,'h');
+INSERT INTO t2 (a,b) VALUES (9,'i');
+REPAIR LOCAL TABLE t2, t1 EXTENDED;
+INSERT INTO t1 (a,b) VALUES (10,'j');
+INSERT INTO t2 (a,b) VALUES (11,'k');
+REPAIR TABLE t1, t2 QUICK USE_FRM;
+INSERT INTO t1 (a,b) VALUES (12,'l');
+INSERT INTO t2 (a,b) VALUES (13,'m');
+REPAIR NO_WRITE_TO_BINLOG TABLE t1, t2 QUICK EXTENDED USE_FRM;
+FLUSH TABLE t1;
+
+DROP TABLE t1, t2;
+
+
+
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/repair_table.test b/storage/rocksdb/mysql-test/rocksdb/t/repair_table.test
new file mode 100644
index 00000000000..5c4807ce986
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/repair_table.test
@@ -0,0 +1,8 @@
+--source include/have_rocksdb.inc
+
+#
+# REPAIR TABLE statements
+#
+
+--source repair_table.inc
+
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/replace.test b/storage/rocksdb/mysql-test/rocksdb/t/replace.test
new file mode 100644
index 00000000000..3ac37330b2d
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/replace.test
@@ -0,0 +1,54 @@
+--source include/have_rocksdb.inc
+
+#
+# Basic REPLACE statements
+#
+
+###########################################
+# TODO:
+# A part of the test is currently disabled
+# because unique indexes are not supported
+###########################################
+
+--disable_warnings
+DROP TABLE IF EXISTS t1;
+--enable_warnings
+
+CREATE TABLE t1 (a INT, b CHAR(8), pk INT AUTO_INCREMENT PRIMARY KEY) ENGINE=rocksdb;
+
+# When there is no duplicate key, REPLACE should work as INSERT
+
+REPLACE INTO t1 (a,b) VALUES (1,'a'),(2,'b'),(3,'c'),(4,'d'),(5,'e');
+--sorted_result
+SELECT a,b FROM t1;
+
+REPLACE t1 (a,b) VALUE (10,'foo'),(10,'foo');
+--sorted_result
+SELECT a,b FROM t1;
+
+DROP TABLE t1;
+
+--disable_parsing
+
+CREATE TABLE t1 (a INT, b CHAR(8), pk INT AUTO_INCREMENT PRIMARY KEY, UNIQUE INDEX (a)) ENGINE=rocksdb;
+REPLACE INTO t1 (a,b) VALUES (1,'a'),(2,'b'),(3,'c');
+--error ER_DUP_ENTRY
+INSERT INTO t1 (a,b) VALUES (2,'d');
+REPLACE INTO t1 (a,b) VALUES (2,'d');
+--sorted_result
+SELECT a,b FROM t1;
+
+DROP TABLE t1;
+
+--enable_parsing
+
+CREATE TABLE t1 (a INT, b CHAR(8), PRIMARY KEY (b)) ENGINE=rocksdb;
+REPLACE INTO t1 (a,b) VALUES (1,'a'),(2,'b'),(3,'c');
+--error ER_DUP_ENTRY
+INSERT INTO t1 (a,b) VALUES (4,'b');
+REPLACE INTO t1 (a,b) VALUES (4,'b');
+--sorted_result
+SELECT a,b FROM t1;
+
+DROP TABLE t1;
+
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/rocksdb-master.opt b/storage/rocksdb/mysql-test/rocksdb/t/rocksdb-master.opt
new file mode 100644
index 00000000000..6ad42e58aa2
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/rocksdb-master.opt
@@ -0,0 +1 @@
+--rocksdb_debug_optimizer_n_rows=1000 --rocksdb_records_in_range=50
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/rocksdb.test b/storage/rocksdb/mysql-test/rocksdb/t/rocksdb.test
new file mode 100644
index 00000000000..96fe1a90bc9
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/rocksdb.test
@@ -0,0 +1,1964 @@
+--source include/have_rocksdb.inc
+--source include/have_partition.inc
+--source include/have_write_committed.inc
+
+#
+# RocksDB Storage Engine tests
+#
+select ENGINE,COMMENT,TRANSACTIONS,XA,SAVEPOINTS from information_schema.engines where engine = 'rocksdb';
+
+--disable_warnings
+drop table if exists t0,t1,t2,t3,t4,t5,t6,t7,t8,t9,t10;
+drop table if exists t11,t12,t13,t14,t15,t16,t17,t18,t19,t20;
+drop table if exists t21,t22,t23,t24,t25,t26,t27,t28,t29;
+drop table if exists t30,t31,t32,t33,t34,t35,t36,t37,t38,t39;
+drop table if exists t40,t41,t42,t43,t44,t45,t46,t47,t48,t49;
+--enable_warnings
+
+# Disable background compaction to prevent stats from affect explain output
+SET @ORIG_PAUSE_BACKGROUND_WORK = @@ROCKSDB_PAUSE_BACKGROUND_WORK;
+SET GLOBAL ROCKSDB_PAUSE_BACKGROUND_WORK = 1;
+
+--echo #
+--echo # Issue #1: Don't update indexes if index values have not changed
+--echo #
+# [Jay Edgar] I moved this test first because it uses the
+# rocksdb_number_keys_written value, but this value is affected out of band
+# by drop tables. There is a background thread that periodically processes
+# through the list of dropped keys and if any are gone from the database it
+# deletes information related to the key - and this delete causes this count
+# to be incorrect. I moved this test first and made the whole test require
+# a fresh server to hopefully avoid tihs.
+create table t1 (
+ pk int primary key,
+ a int,
+ b int,
+ key(a)
+) engine=rocksdb;
+
+insert into t1 values
+(1,1,1), (2,2,2), (3,3,3), (4,4,4);
+
+set @var1=(select variable_value
+ from information_schema.global_status
+ where variable_name='rocksdb_number_keys_written');
+
+--echo # Do an update that doesn't change the key 'a'.
+update t1 set b=3334341 where a=2;
+
+set @var2=(select variable_value
+ from information_schema.global_status
+ where variable_name='rocksdb_number_keys_written');
+--echo # The following should produce 1
+select @var2 - @var1;
+
+--echo # Do an update that sets the key to the same value
+update t1 set a=pk where a=3;
+set @var3=(select variable_value
+ from information_schema.global_status
+ where variable_name='rocksdb_number_keys_written');
+--echo # We have 'updated' column to the same value, so the following must return 0:
+select @var3 - @var2;
+drop table t1;
+
+create table t0 (a int primary key) engine=rocksdb;
+show create table t0;
+drop table t0;
+
+create table t1 (a int primary key, b int) engine=rocksdb;
+insert into t1 values (1,1);
+insert into t1 values (2,2);
+
+select * from t1;
+
+--echo # Check that we can create another table and insert there
+create table t2 (a varchar(10) primary key, b varchar(10)) engine=rocksdb;
+insert into t2 value ('abc','def');
+insert into t2 value ('hijkl','mnopq');
+select * from t2;
+
+--echo # Select again from t1 to see that records from different tables dont mix
+select * from t1;
+
+explain select * from t2 where a='no-such-key';
+--replace_column 9 #
+explain select * from t2 where a='abc';
+select * from t2 where a='abc';
+
+--echo # Try a composite PK
+create table t3 (
+ pk1 int,
+ pk2 varchar(10),
+ col1 varchar(10),
+ primary key(pk1, pk2)
+) engine=rocksdb;
+
+insert into t3 values (2,'two', 'row#2');
+insert into t3 values (3,'three', 'row#3');
+insert into t3 values (1,'one', 'row#1');
+
+select * from t3;
+select * from t3 where pk1=3 and pk2='three';
+
+drop table t1, t2, t3;
+
+--echo #
+--echo # Test blob values
+--echo #
+
+create table t4 (a int primary key, b blob) engine=rocksdb;
+insert into t4 values (1, repeat('quux-quux', 60));
+insert into t4 values (10, repeat('foo-bar', 43));
+insert into t4 values (5, repeat('foo-bar', 200));
+
+insert into t4 values (2, NULL);
+
+
+select
+ a,
+ (case a
+ when 1 then b=repeat('quux-quux', 60)
+ when 10 then b=repeat('foo-bar', 43)
+ when 5 then b=repeat('foo-bar', 200)
+ when 2 then b is null
+ else 'IMPOSSIBLE!' end) as CMP
+from t4;
+
+drop table t4;
+
+--echo #
+--echo # Test blobs of various sizes
+--echo #
+
+--echo # TINYBLOB
+create table t5 (a int primary key, b tinyblob) engine=rocksdb;
+insert into t5 values (1, repeat('quux-quux', 6));
+insert into t5 values (10, repeat('foo-bar', 4));
+insert into t5 values (5, repeat('foo-bar', 2));
+select
+ a,
+ (case a
+ when 1 then b=repeat('quux-quux', 6)
+ when 10 then b=repeat('foo-bar', 4)
+ when 5 then b=repeat('foo-bar', 2)
+ else 'IMPOSSIBLE!' end) as CMP
+from t5;
+drop table t5;
+
+--echo # MEDIUMBLOB
+create table t6 (a int primary key, b mediumblob) engine=rocksdb;
+insert into t6 values (1, repeat('AB', 65000));
+insert into t6 values (10, repeat('bbb', 40000));
+insert into t6 values (5, repeat('foo-bar', 2));
+select
+ a,
+ (case a
+ when 1 then b=repeat('AB', 65000)
+ when 10 then b=repeat('bbb', 40000)
+ when 5 then b=repeat('foo-bar', 2)
+ else 'IMPOSSIBLE!' end) as CMP
+from t6;
+drop table t6;
+
+--echo # LONGBLOB
+create table t7 (a int primary key, b longblob) engine=rocksdb;
+insert into t7 values (1, repeat('AB', 65000));
+insert into t7 values (10, repeat('bbb', 40000));
+insert into t7 values (5, repeat('foo-bar', 2));
+select
+ a,
+ (case a
+ when 1 then b=repeat('AB', 65000)
+ when 10 then b=repeat('bbb', 40000)
+ when 5 then b=repeat('foo-bar', 2)
+ else 'IMPOSSIBLE!' end) as CMP
+from t7;
+drop table t7;
+
+
+--echo #
+--echo # Check if DELETEs work
+--echo #
+create table t8 (a varchar(10) primary key, col1 varchar(12)) engine=rocksdb;
+
+insert into t8 values
+ ('one', 'eins'),
+ ('two', 'zwei'),
+ ('three', 'drei'),
+ ('four', 'vier'),
+ ('five', 'funf');
+
+--echo # Delete by PK
+--replace_column 9 #
+explain delete from t8 where a='three';
+delete from t8 where a='three';
+
+select * from t8;
+
+--echo # Delete while doing a full table scan
+delete from t8 where col1='eins' or col1='vier';
+select * from t8;
+
+--echo # delete w/o WHERE:
+delete from t8;
+select * from t8;
+
+--echo #
+--echo # Test UPDATEs
+--echo #
+insert into t8 values
+ ('one', 'eins'),
+ ('two', 'zwei'),
+ ('three', 'drei'),
+ ('four', 'vier'),
+ ('five', 'funf');
+
+update t8 set col1='dva' where a='two';
+
+update t8 set a='fourAAA' where col1='vier';
+
+select * from t8;
+delete from t8;
+
+--echo #
+--echo # Basic transactions tests
+--echo #
+begin;
+insert into t8 values ('trx1-val1', 'data');
+insert into t8 values ('trx1-val2', 'data');
+rollback;
+select * from t8;
+
+begin;
+insert into t8 values ('trx1-val1', 'data');
+insert into t8 values ('trx1-val2', 'data');
+commit;
+select * from t8;
+
+drop table t8;
+
+--echo #
+--echo # Check if DROP TABLE works
+--echo #
+create table t8 (a varchar(10) primary key, col1 varchar(12)) engine=rocksdb;
+select * from t8;
+insert into t8 values ('foo','foo');
+drop table t8;
+create table t8 (a varchar(10) primary key, col1 varchar(12)) engine=rocksdb;
+select * from t8;
+drop table t8;
+
+--echo #
+--echo # MDEV-3961: Assertion ... on creating a TEMPORARY RocksDB table
+--echo #
+--error ER_ILLEGAL_HA_CREATE_OPTION
+CREATE TEMPORARY TABLE t10 (pk INT PRIMARY KEY) ENGINE=RocksDB;
+
+--echo #
+--echo # MDEV-3963: JOIN or WHERE conditions involving keys on RocksDB tables don't work
+--echo #
+CREATE TABLE t10 (i INT PRIMARY KEY) ENGINE=RocksDB;
+INSERT INTO t10 VALUES (1),(3);
+CREATE TABLE t11 (j INT PRIMARY KEY) ENGINE=RocksDB;
+INSERT INTO t11 VALUES (1),(4);
+
+select * from t10;
+select * from t11;
+--replace_column 9 #
+EXPLAIN
+SELECT * FROM t10, t11 WHERE i=j;
+SELECT * FROM t10, t11 WHERE i=j;
+
+DROP TABLE t10,t11;
+
+--echo #
+--echo # MDEV-3962: SELECT with ORDER BY causes "ERROR 1030 (HY000): Got error 122
+--echo #
+CREATE TABLE t12 (pk INT PRIMARY KEY) ENGINE=RocksDB;
+INSERT INTO t12 VALUES (2),(1);
+SELECT * FROM t12 ORDER BY pk;
+DROP TABLE t12;
+
+--echo #
+--echo # MDEV-3964: Assertion `!pk_descr' fails in ha_rocksdb::open on adding partitions ...
+--echo #
+create table t14 (pk int primary key) engine=RocksDB partition by hash(pk) partitions 2;
+#--error ER_GET_ERRNO
+#alter table t14 add partition partitions 2;
+# ^^ works, but causes weird warnings in error log.
+drop table t14;
+
+--echo #
+--echo # MDEV-3960: Server crashes on running DISCARD TABLESPACE on a RocksDB table
+--echo #
+create table t9 (i int primary key) engine=rocksdb;
+--error ER_ILLEGAL_HA
+alter table t9 discard tablespace;
+drop table t9;
+
+--echo #
+--echo # MDEV-3959: Assertion `slice->size() == table->s->reclength' fails ...
+--echo # on accessing a table after ALTER
+--echo #
+CREATE TABLE t15 (a INT, rocksdb_pk INT PRIMARY KEY) ENGINE=RocksDB;
+INSERT INTO t15 VALUES (1,1),(5,2);
+#--error ER_ILLEGAL_HA
+ALTER TABLE t15 DROP COLUMN a;
+DROP TABLE t15;
+
+--echo #
+--echo # MDEV-3968: UPDATE produces a wrong result while modifying a PK on a RocksDB table
+--echo #
+create table t16 (pk int primary key, a char(8)) engine=RocksDB;
+insert into t16 values (1,'a'),(2,'b'),(3,'c'),(4,'d');
+
+#
+# Not anymore: The following query will still eat a record because of CANT-SEE-OWN-CHANGES
+# property.
+#
+--error ER_DUP_ENTRY
+update t16 set pk=100, a = 'updated' where a in ('b','c');
+select * from t16;
+drop table t16;
+
+--echo #
+--echo # MDEV-3970: A set of assorted crashes on inserting a row into a RocksDB table
+--echo #
+--disable_warnings
+drop table if exists t_very_long_table_name;
+--enable_warnings
+
+CREATE TABLE `t_very_long_table_name` (
+ `c` char(1) NOT NULL,
+ `c0` char(0) NOT NULL,
+ `c1` char(1) NOT NULL,
+ `c20` char(20) NOT NULL,
+ `c255` char(255) NOT NULL,
+ PRIMARY KEY (`c255`)
+ ) ENGINE=RocksDB DEFAULT CHARSET=latin1;
+INSERT INTO t_very_long_table_name VALUES ('a', '', 'c', REPEAT('a',20), REPEAT('x',255));
+drop table t_very_long_table_name;
+
+
+--echo #
+--echo # Test table locking and read-before-write checks.
+--echo #
+create table t17 (pk varchar(12) primary key, col1 varchar(12)) engine=rocksdb;
+insert into t17 values ('row1', 'val1');
+
+--error ER_DUP_ENTRY
+insert into t17 values ('row1', 'val1-try2');
+--error ER_DUP_ENTRY
+insert into t17 values ('ROW1', 'val1-try2');
+
+insert into t17 values ('row2', 'val2');
+insert into t17 values ('row3', 'val3');
+
+--echo # This is ok
+update t17 set pk='row4' where pk='row1';
+
+--echo # This will try to overwrite another row:
+--error ER_DUP_ENTRY
+update t17 set pk='row3' where pk='row2';
+
+select * from t17;
+
+--echo #
+--echo # Locking tests
+--echo #
+
+connect (con1,localhost,root,,);
+
+--echo # First, make sure there's no locking when transactions update different rows
+connection con1;
+set autocommit=0;
+update t17 set col1='UPD1' where pk='row2';
+
+connection default;
+update t17 set col1='UPD2' where pk='row3';
+
+connection con1;
+commit;
+
+connection default;
+select * from t17;
+
+--echo # Check the variable
+show variables like 'rocksdb_lock_wait_timeout';
+set rocksdb_lock_wait_timeout=2; # seconds
+show variables like 'rocksdb_lock_wait_timeout';
+
+--echo # Try updating the same row from two transactions
+connection con1;
+begin;
+update t17 set col1='UPD2-AA' where pk='row2';
+
+connection default;
+--error ER_LOCK_WAIT_TIMEOUT
+update t17 set col1='UPD2-BB' where pk='row2';
+
+set rocksdb_lock_wait_timeout=1000; # seconds
+--send
+ update t17 set col1='UPD2-CC' where pk='row2';
+
+connection con1;
+rollback;
+
+connection default;
+reap;
+select * from t17 where pk='row2';
+
+drop table t17;
+
+disconnect con1;
+--echo #
+--echo # MDEV-4035: RocksDB: SELECT produces different results inside a transaction (read is not repeatable)
+--echo #
+--enable_connect_log
+
+create table t18 (pk int primary key, i int) engine=RocksDB;
+begin;
+select * from t18;
+select * from t18 where pk = 1;
+
+--connect (con1,localhost,root,,)
+insert into t18 values (1,100);
+
+--connection default
+select * from t18;
+select * from t18 where pk = 1;
+commit;
+
+drop table t18;
+
+--echo #
+--echo # MDEV-4036: RocksDB: INSERT .. ON DUPLICATE KEY UPDATE does not work, produces ER_DUP_KEY
+--echo #
+create table t19 (pk int primary key, i int) engine=RocksDB;
+insert into t19 values (1,1);
+insert into t19 values (1,100) on duplicate key update i = 102;
+select * from t19;
+drop table t19;
+
+--echo # MDEV-4037: RocksDB: REPLACE doesn't work, produces ER_DUP_KEY
+create table t20 (pk int primary key, i int) engine=RocksDB;
+insert into t20 values (1,1);
+replace into t20 values (1,100);
+select * from t20;
+drop table t20;
+
+--echo #
+--echo # MDEV-4041: Server crashes in Primary_key_comparator::get_hashnr on INSERT
+--echo #
+create table t21 (v varbinary(16) primary key, i int) engine=RocksDB;
+insert into t21 values ('a',1);
+select * from t21;
+drop table t21;
+
+--echo #
+--echo # MDEV-4047: RocksDB: Assertion `0' fails in Protocol::end_statement() on multi-table INSERT IGNORE
+--echo #
+
+CREATE TABLE t22 (a int primary key) ENGINE=RocksDB;
+INSERT INTO t22 VALUES (1),(2);
+CREATE TABLE t23 (b int primary key) ENGINE=RocksDB;
+INSERT INTO t23 SELECT * FROM t22;
+DELETE IGNORE t22.*, t23.* FROM t22, t23 WHERE b < a;
+DROP TABLE t22,t23;
+
+--echo #
+--echo # MDEV-4046: RocksDB: Multi-table DELETE locks itself and ends with ER_LOCK_WAIT_TIMEOUT
+--echo #
+CREATE TABLE t24 (pk int primary key) ENGINE=RocksDB;
+INSERT INTO t24 VALUES (1),(2);
+
+CREATE TABLE t25 LIKE t24;
+INSERT INTO t25 SELECT * FROM t24;
+
+DELETE t25.* FROM t24, t25;
+DROP TABLE t24,t25;
+
+--echo #
+--echo # MDEV-4044: RocksDB: UPDATE or DELETE with ORDER BY locks itself
+--echo #
+create table t26 (pk int primary key, c char(1)) engine=RocksDB;
+insert into t26 values (1,'a'),(2,'b');
+update t26 set c = 'x' order by pk limit 1;
+delete from t26 order by pk limit 1;
+select * from t26;
+drop table t26;
+
+
+--echo #
+--echo # Test whether SELECT ... FOR UPDATE puts locks
+--echo #
+create table t27(pk varchar(10) primary key, col1 varchar(20)) engine=RocksDB;
+insert into t27 values
+ ('row1', 'row1data'),
+ ('row2', 'row2data'),
+ ('row3', 'row3data');
+
+connection con1;
+begin;
+select * from t27 where pk='row3' for update;
+
+connection default;
+set rocksdb_lock_wait_timeout=1;
+--error ER_LOCK_WAIT_TIMEOUT
+update t27 set col1='row2-modified' where pk='row3';
+
+connection con1;
+rollback;
+connection default;
+disconnect con1;
+
+drop table t27;
+
+--echo #
+--echo # MDEV-4060: RocksDB: Assertion `! trx->batch' fails in
+--echo #
+create table t28 (pk int primary key, a int) engine=RocksDB;
+insert into t28 values (1,10),(2,20);
+begin;
+update t28 set a = 100 where pk = 3;
+rollback;
+select * from t28;
+drop table t28;
+
+
+--echo #
+--echo # Secondary indexes
+--echo #
+create table t30 (
+ pk varchar(16) not null primary key,
+ key1 varchar(16) not null,
+ col1 varchar(16) not null,
+ key(key1)
+) engine=rocksdb;
+
+insert into t30 values ('row1', 'row1-key', 'row1-data');
+insert into t30 values ('row2', 'row2-key', 'row2-data');
+insert into t30 values ('row3', 'row3-key', 'row3-data');
+
+--replace_column 9 #
+explain
+select * from t30 where key1='row2-key';
+select * from t30 where key1='row2-key';
+
+--replace_column 9 #
+explain
+select * from t30 where key1='row1';
+--echo # This will produce nothing:
+select * from t30 where key1='row1';
+
+--replace_column 9 #
+explain
+select key1 from t30;
+select key1 from t30;
+
+--echo # Create a duplicate record
+insert into t30 values ('row2a', 'row2-key', 'row2a-data');
+
+--echo # Can we see it?
+select * from t30 where key1='row2-key';
+
+delete from t30 where pk='row2';
+select * from t30 where key1='row2-key';
+
+--echo #
+--echo # Range scans on secondary index
+--echo #
+delete from t30;
+insert into t30 values
+ ('row1', 'row1-key', 'row1-data'),
+ ('row2', 'row2-key', 'row2-data'),
+ ('row3', 'row3-key', 'row3-data'),
+ ('row4', 'row4-key', 'row4-data'),
+ ('row5', 'row5-key', 'row5-data');
+
+--replace_column 9 #
+explain
+select * from t30 where key1 <='row3-key';
+select * from t30 where key1 <='row3-key';
+
+--replace_column 9 #
+explain
+select * from t30 where key1 between 'row2-key' and 'row4-key';
+select * from t30 where key1 between 'row2-key' and 'row4-key';
+
+--replace_column 9 #
+explain
+select * from t30 where key1 in ('row2-key','row4-key');
+select * from t30 where key1 in ('row2-key','row4-key');
+
+--replace_column 9 #
+explain
+select key1 from t30 where key1 in ('row2-key','row4-key');
+select key1 from t30 where key1 in ('row2-key','row4-key');
+
+--replace_column 9 #
+explain
+select * from t30 where key1 > 'row1-key' and key1 < 'row4-key';
+select * from t30 where key1 > 'row1-key' and key1 < 'row4-key';
+
+--replace_column 9 #
+explain
+select * from t30 order by key1 limit 3;
+select * from t30 order by key1 limit 3;
+
+--replace_column 9 #
+explain
+select * from t30 order by key1 desc limit 3;
+select * from t30 order by key1 desc limit 3;
+
+--echo #
+--echo # Range scans on primary key
+--echo #
+--replace_column 9 #
+explain
+select * from t30 where pk <='row3';
+select * from t30 where pk <='row3';
+
+--replace_column 9 #
+explain
+select * from t30 where pk between 'row2' and 'row4';
+select * from t30 where pk between 'row2' and 'row4';
+
+--replace_column 9 #
+explain
+select * from t30 where pk in ('row2','row4');
+select * from t30 where pk in ('row2','row4');
+
+--replace_column 9 #
+explain
+select * from t30 order by pk limit 3;
+select * from t30 order by pk limit 3;
+
+drop table t30;
+
+
+--echo #
+--echo # MDEV-3841: RocksDB: Reading by PK prefix does not work
+--echo #
+create table t31 (i int, j int, k int, primary key(i,j,k)) engine=RocksDB;
+insert into t31 values (1,10,100),(2,20,200);
+select * from t31 where i = 1;
+select * from t31 where j = 10;
+select * from t31 where k = 100;
+select * from t31 where i = 1 and j = 10;
+select * from t31 where i = 1 and k = 100;
+select * from t31 where j = 10 and k = 100;
+select * from t31 where i = 1 and j = 10 and k = 100;
+drop table t31;
+
+--echo #
+--echo # MDEV-4055: RocksDB: UPDATE/DELETE by a multi-part PK does not work
+--echo #
+create table t32 (i int, j int, k int, primary key(i,j,k), a varchar(8)) engine=RocksDB;
+insert into t32 values
+ (1,10,100,''),
+ (2,20,200,'');
+select * from t32 where i = 1 and j = 10 and k = 100;
+update t32 set a = 'updated' where i = 1 and j = 10 and k = 100;
+select * from t32;
+drop table t32;
+
+--echo #
+--echo # MDEV-3841: RocksDB: Assertion `0' fails in ha_rocksdb::index_read_map on range select with ORDER BY .. DESC
+--echo #
+CREATE TABLE t33 (pk INT PRIMARY KEY, a CHAR(1)) ENGINE=RocksDB;
+INSERT INTO t33 VALUES (1,'a'),(2,'b');
+SELECT * FROM t33 WHERE pk <= 10 ORDER BY pk DESC;
+DROP TABLE t33;
+
+--echo #
+--echo # MDEV-4081: RocksDB throws error 122 on an attempt to create a table with unique index
+--echo #
+#--error ER_GET_ERRMSG
+--echo # Unique indexes can be created, but uniqueness won't be enforced
+create table t33 (pk int primary key, u int, unique index(u)) engine=RocksDB;
+drop table t33;
+
+--echo #
+--echo # MDEV-4077: RocksDB: Wrong result (duplicate row) on select with range
+--echo #
+CREATE TABLE t34 (pk INT PRIMARY KEY) ENGINE=RocksDB;
+INSERT INTO t34 VALUES (10),(11);
+SELECT pk FROM t34 WHERE pk > 5 AND pk < 15;
+SELECT pk FROM t34 WHERE pk BETWEEN 5 AND 15;
+SELECT pk FROM t34 WHERE pk > 5;
+SELECT pk FROM t34 WHERE pk < 15;
+drop table t34;
+
+--echo #
+--echo # MDEV-4086: RocksDB does not allow a query with multi-part pk and index and ORDER BY .. DEC
+--echo #
+create table t35 (a int, b int, c int, d int, e int, primary key (a,b,c), key (a,c,d,e)) engine=RocksDB;
+insert into t35 values (1,1,1,1,1),(2,2,2,2,2);
+select * from t35 where a = 1 and c = 1 and d = 1 order by e desc;
+drop table t35;
+
+--echo #
+--echo # MDEV-4084: RocksDB: Wrong result on IN subquery with index
+--echo #
+CREATE TABLE t36 (pk INT PRIMARY KEY, a INT, KEY(a)) ENGINE=RocksDB;
+INSERT INTO t36 VALUES (1,10),(2,20);
+SELECT 3 IN ( SELECT a FROM t36 );
+drop table t36;
+
+--echo #
+--echo # MDEV-4084: RocksDB: Wrong result on IN subquery with index
+--echo #
+CREATE TABLE t37 (pk INT PRIMARY KEY, a INT, b CHAR(1), KEY(a), KEY(a,b))
+ ENGINE=RocksDB;
+INSERT INTO t37 VALUES (1,10,'x'), (2,20,'y');
+SELECT MAX(a) FROM t37 WHERE a < 100;
+DROP TABLE t37;
+
+--echo #
+--echo # MDEV-4090: RocksDB: Wrong result (duplicate rows) on range access with secondary key and ORDER BY DESC
+--echo #
+CREATE TABLE t38 (pk INT PRIMARY KEY, i INT, KEY(i)) ENGINE=RocksDB;
+INSERT INTO t38 VALUES (1,10), (2,20);
+SELECT i FROM t38 WHERE i NOT IN (8) ORDER BY i DESC;
+drop table t38;
+
+--echo #
+--echo # MDEV-4092: RocksDB: Assertion `in_table(pa, a_len)' fails in Rdb_key_def::cmp_full_keys
+--echo # with a multi-part key and ORDER BY .. DESC
+--echo #
+CREATE TABLE t40 (pk1 INT PRIMARY KEY, a INT, b VARCHAR(1), KEY(b,a)) ENGINE=RocksDB;
+INSERT INTO t40 VALUES (1, 7,'x'),(2,8,'y');
+
+CREATE TABLE t41 (pk2 INT PRIMARY KEY) ENGINE=RocksDB;
+INSERT INTO t41 VALUES (1),(2);
+
+SELECT * FROM t40, t41 WHERE pk1 = pk2 AND b = 'o' ORDER BY a DESC;
+DROP TABLE t40,t41;
+
+--echo #
+--echo # MDEV-4093: RocksDB: IN subquery by secondary key with NULL among values returns true instead of NULL
+--echo #
+CREATE TABLE t42 (pk INT PRIMARY KEY, a INT, KEY(a)) ENGINE=RocksDB;
+INSERT INTO t42 VALUES (1, NULL),(2, 8);
+SELECT ( 3 ) NOT IN ( SELECT a FROM t42 );
+DROP TABLE t42;
+
+--echo #
+--echo # MDEV-4094: RocksDB: Wrong result on SELECT and ER_KEY_NOT_FOUND on
+--echo # DELETE with search by NULL-able secondary key ...
+--echo #
+CREATE TABLE t43 (pk INT PRIMARY KEY, a INT, b CHAR(1), KEY(a)) ENGINE=RocksDB;
+INSERT INTO t43 VALUES (1,8,'g'),(2,9,'x');
+UPDATE t43 SET pk = 10 WHERE a = 8;
+REPLACE INTO t43 ( a ) VALUES ( 8 );
+REPLACE INTO t43 ( b ) VALUES ( 'y' );
+SELECT * FROM t43 WHERE a = 8;
+DELETE FROM t43 WHERE a = 8;
+DROP TABLE t43;
+
+--echo #
+--echo # Basic AUTO_INCREMENT tests
+--echo #
+create table t44(pk int primary key auto_increment, col1 varchar(12)) engine=rocksdb;
+insert into t44 (col1) values ('row1');
+insert into t44 (col1) values ('row2');
+insert into t44 (col1) values ('row3');
+select * from t44;
+drop table t44;
+
+--echo #
+--echo # ALTER TABLE tests
+--echo #
+create table t45 (pk int primary key, col1 varchar(12)) engine=rocksdb;
+insert into t45 values (1, 'row1');
+insert into t45 values (2, 'row2');
+alter table t45 rename t46;
+select * from t46;
+drop table t46;
+--error ER_BAD_TABLE_ERROR
+drop table t45;
+
+
+--echo #
+--echo # Check Bulk loading
+--echo # Bulk loading used to overwrite existing data
+--echo # Now it fails if there is data overlap with what
+--echo # already exists
+--echo #
+# We exclude rocksdb_max_open_files here because it value is dependent on
+# the value of the servers open_file_limit and is expected to be different
+# across distros and installs
+
+--replace_regex /[a-f0-9]{40}/#/
+show variables
+where
+ variable_name like 'rocksdb%' and
+ variable_name not like 'rocksdb_max_open_files' and
+ variable_name not like 'rocksdb_supported_compression_types';
+
+create table t47 (pk int primary key, col1 varchar(12)) engine=rocksdb;
+insert into t47 values (1, 'row1');
+insert into t47 values (2, 'row2');
+set rocksdb_bulk_load=1;
+insert into t47 values (3, 'row3'),(4, 'row4');
+set rocksdb_bulk_load=0;
+# Check concurrent bulk loading
+--connect (con1,localhost,root,,)
+set rocksdb_bulk_load=1;
+insert into t47 values (10, 'row10'),(11, 'row11');
+--connection default
+set rocksdb_bulk_load=1;
+insert into t47 values (100, 'row100'),(101, 'row101');
+--disconnect con1
+--connection default
+set rocksdb_bulk_load=0;
+--disable_query_log
+let $wait_condition = select count(*) = 8 as c from t47;
+--source include/wait_condition.inc
+--enable_query_log
+select * from t47;
+drop table t47;
+
+--echo #
+--echo # Fix TRUNCATE over empty table (transaction is committed when it wasn't
+--echo # started)
+--echo #
+create table t48(pk int primary key auto_increment, col1 varchar(12)) engine=rocksdb;
+set autocommit=0;
+#--error ER_ILLEGAL_HA
+truncate table t48;
+set autocommit=1;
+drop table t48;
+
+--echo #
+--echo # MDEV-4059: RocksDB: query waiting for a lock cannot be killed until query timeout exceeded
+--echo #
+--enable_connect_log
+
+create table t49 (pk int primary key, a int) engine=RocksDB;
+insert into t49 values (1,10),(2,20);
+begin;
+update t49 set a = 100 where pk = 1;
+
+--connect (con1,localhost,root,,)
+--let $con1_id = `SELECT CONNECTION_ID()`
+set rocksdb_lock_wait_timeout=60;
+set @var1= to_seconds(now());
+send update t49 set a = 1000 where pk = 1;
+
+--connect (con2,localhost,root,,)
+--echo kill query \$con1_id;
+--disable_query_log
+# If we immeditely kill the query - internally the condition broadcast can
+# occur before the lock is waiting on the condition, thus the broadcast call
+# is lost. Sleep 1 second to avoid this condition.
+--sleep 1
+eval kill query $con1_id;
+--enable_query_log
+--connection con1
+--error ER_QUERY_INTERRUPTED
+--reap
+set @var2= to_seconds(now());
+
+# We expect the time to kill query in con1 should be below
+# rocksdb_lock_wait_timeout (60).
+select if ((@var2 - @var1) < 60, "passed", (@var2 - @var1)) as 'result';
+
+--connection default
+--disconnect con1
+
+commit;
+drop table t49;
+
+--echo #
+--echo # Index-only tests for INT-based columns
+--echo #
+create table t1 (pk int primary key, key1 int, col1 int, key(key1)) engine=rocksdb;
+insert into t1 values (1,1,1);
+insert into t1 values (2,2,2);
+insert into t1 values (-5,-5,-5);
+--echo # INT column uses index-only:
+--replace_column 9 #
+explain
+select key1 from t1 where key1=2;
+select key1 from t1 where key1=2;
+select key1 from t1 where key1=-5;
+drop table t1;
+
+
+create table t2 (pk int primary key, key1 int unsigned, col1 int, key(key1)) engine=rocksdb;
+insert into t2 values (1,1,1), (2,2,2);
+--echo # INT UNSIGNED column uses index-only:
+--replace_column 9 #
+explain
+select key1 from t2 where key1=2;
+select key1 from t2 where key1=2;
+drop table t2;
+
+
+create table t3 (pk bigint primary key, key1 bigint, col1 int, key(key1)) engine=rocksdb;
+insert into t3 values (1,1,1), (2,2,2);
+--echo # BIGINT uses index-only:
+--replace_column 9 #
+explain
+select key1 from t3 where key1=2;
+select key1 from t3 where key1=2;
+drop table t3;
+
+--echo #
+--echo # Index-only reads for string columns
+--echo #
+create table t1 (
+ pk int primary key,
+ key1 char(10) character set binary,
+ col1 int,
+ key (key1)
+) engine=rocksdb;
+insert into t1 values(1, 'one',11), (2,'two',22);
+--replace_column 9 #
+explain
+select key1 from t1 where key1='one';
+--echo # The following will produce no rows. This looks like a bug,
+--echo # but it is actually correct behavior. Binary strings are end-padded
+--echo # with \0 character (and not space). Comparison does not ignore
+--echo # the tail of \0.
+select key1 from t1 where key1='one';
+--replace_column 9 #
+explain
+select hex(key1) from t1 where key1='one\0\0\0\0\0\0\0';
+select hex(key1) from t1 where key1='one\0\0\0\0\0\0\0';
+drop table t1;
+
+
+create table t2 (
+ pk int primary key,
+ key1 char(10) collate latin1_bin,
+ col1 int,
+ key (key1)
+) engine=rocksdb;
+insert into t2 values(1, 'one',11), (2,'two',22);
+--replace_column 9 #
+explain
+select key1 from t2 where key1='one';
+select key1 from t2 where key1='one';
+drop table t2;
+
+
+create table t3 (
+ pk int primary key,
+ key1 char(10) collate utf8_bin,
+ col1 int,
+ key (key1)
+) engine=rocksdb;
+insert into t3 values(1, 'one',11), (2,'two',22);
+--replace_column 9 #
+explain
+select key1 from t3 where key1='one';
+select key1 from t3 where key1='one';
+drop table t3;
+
+
+--echo # a VARCHAR column
+create table t4 (
+ pk int primary key,
+ key1 varchar(10) collate latin1_bin,
+ key(key1)
+) engine=rocksdb;
+insert into t4 values(1, 'one'), (2,'two'),(3,'threee'),(55,'fifty-five');
+
+--replace_column 9 #
+explain
+select key1 from t4 where key1='two';
+select key1 from t4 where key1='two';
+
+select key1 from t4 where key1='fifty-five';
+
+--replace_column 9 #
+explain
+select key1 from t4 where key1 between 's' and 'u';
+select key1 from t4 where key1 between 's' and 'u';
+
+drop table t4;
+
+--echo #
+--echo # MDEV-4305: RocksDB: Assertion `((keypart_map + 1) & keypart_map) == 0' fails in calculate_key_len
+--echo #
+CREATE TABLE t1 (pk1 INT, pk2 CHAR(32), i INT, PRIMARY KEY(pk1,pk2), KEY(i)) ENGINE=RocksDB;
+INSERT INTO t1 VALUES (1,'test1',6),(2,'test2',8);
+SELECT * FROM t1 WHERE i != 3 OR pk1 > 9;
+DROP TABLE t1;
+
+--echo #
+--echo # MDEV-4298: RocksDB: Assertion `thd->is_error() || kill_errno' fails in ha_rows filesort
+--echo #
+call mtr.add_suppression("Sort aborted");
+CREATE TABLE t1 (pk INT PRIMARY KEY, i INT, KEY(i)) ENGINE=RocksDB;
+INSERT INTO t1 VALUES (1,1),(2,2);
+BEGIN;
+UPDATE t1 SET i = 100;
+
+--connect (con1,localhost,root,,test)
+--error ER_LOCK_WAIT_TIMEOUT
+DELETE IGNORE FROM t1 ORDER BY i;
+--disconnect con1
+
+--connection default
+COMMIT;
+DROP TABLE t1;
+
+--echo #
+--echo # MDEV-4324: RocksDB: Valgrind "Use of uninitialised value" warnings on inserting value into varchar field
+--echo # (testcase only)
+--echo #
+CREATE TABLE t1 (pk INT PRIMARY KEY, c VARCHAR(4)) ENGINE=RocksDB;
+INSERT INTO t1 VALUES (1,'foo'), (2,'bar');
+DROP TABLE t1;
+
+--echo #
+--echo # MDEV-4304: RocksDB: Index-only scan by a field with utf8_bin collation returns garbage symbols
+--echo #
+CREATE TABLE t1 (pk INT PRIMARY KEY, c1 CHAR(1), c2 CHAR(1), KEY(c1)) ENGINE=RocksDB CHARSET utf8 COLLATE utf8_bin;
+INSERT INTO t1 VALUES (1,'h','h');
+SELECT * FROM t1;
+SELECT c1 FROM t1;
+DROP TABLE t1;
+
+--echo #
+--echo # MDEV-4300: RocksDB: Server crashes in inline_mysql_mutex_lock on SELECT .. FOR UPDATE
+--echo #
+CREATE TABLE t2 (pk INT PRIMARY KEY, i INT, KEY (i)) ENGINE=RocksDB;
+INSERT INTO t2 VALUES (1,4),(2,5);
+SELECT 1 FROM t2 WHERE i < 0 FOR UPDATE;
+DROP TABLE t2;
+
+--echo #
+--echo # MDEV-4301: RocksDB: Assertion `pack_info != __null' fails in Rdb_key_def::unpack_record
+--echo #
+CREATE TABLE t1 (pk INT PRIMARY KEY, i INT, c CHAR(1), KEY(c,i)) ENGINE=RocksDB;
+INSERT INTO t1 VALUES (1,4,'d'),(2,8,'e');
+SELECT MAX( pk ) FROM t1 WHERE i = 105 AND c = 'h';
+DROP TABLE t1;
+
+--echo #
+--echo # MDEV-4337: RocksDB: Inconsistent results comparing a char field with an int field
+--echo #
+create table t1 (c char(1), i int, primary key(c), key(i)) engine=RocksDB;
+insert into t1 values ('2',2),('6',6);
+select * from t1 where c = i;
+select * from t1 ignore index (i) where c = i;
+drop table t1;
+
+
+--echo #
+--echo # Test statement rollback inside a transaction
+--echo #
+create table t1 (pk varchar(12) primary key) engine=rocksdb;
+insert into t1 values ('old-val1'),('old-val2');
+
+create table t2 (pk varchar(12) primary key) engine=rocksdb;
+insert into t2 values ('new-val2'),('old-val1');
+
+begin;
+insert into t1 values ('new-val1');
+--error ER_DUP_ENTRY
+insert into t1 select * from t2;
+commit;
+
+select * from t1;
+drop table t1, t2;
+
+--echo #
+--echo # MDEV-4383: RocksDB: Wrong result of DELETE .. ORDER BY .. LIMIT:
+--echo # rows that should be deleted remain in the table
+--echo #
+CREATE TABLE t2 (pk INT AUTO_INCREMENT PRIMARY KEY) ENGINE=RocksDB;
+CREATE TABLE t1 (pk INT AUTO_INCREMENT PRIMARY KEY) ENGINE=RocksDB;
+
+INSERT INTO t1 (pk) VALUES (NULL),(NULL);
+BEGIN;
+INSERT INTO t2 (pk) VALUES (NULL),(NULL);
+INSERT INTO t1 (pk) VALUES (NULL),(NULL),(NULL),(NULL),(NULL),(NULL);
+
+--enable_info
+SELECT * FROM t1 ORDER BY pk LIMIT 9;
+DELETE FROM t1 ORDER BY pk LIMIT 9;
+SELECT * FROM t1 ORDER BY pk LIMIT 9;
+--disable_info
+
+DROP TABLE t1,t2;
+
+--echo #
+--echo # MDEV-4374: RocksDB: Valgrind warnings 'Use of uninitialised value' on
+--echo # inserting into a varchar column
+--echo #
+CREATE TABLE t1 (pk INT PRIMARY KEY, a VARCHAR(32)) ENGINE=RocksDB;
+INSERT INTO t1 VALUES (1,'foo'),(2,'bar');
+DROP TABLE t1;
+
+
+--echo #
+--echo # MDEV-4061: RocksDB: Changes from an interrupted query are still applied
+--echo #
+
+--enable_connect_log
+
+create table t1 (pk int primary key, a int) engine=RocksDB;
+insert into t1 values (1,10),(2,20);
+
+--let $con_id = `select connection_id()`
+
+set autocommit = 1;
+--send
+update t1 set a = sleep(100) where pk = 1;
+
+--connect (con1,localhost,root,,)
+
+let $wait_condition= select State='User sleep' from information_schema.processlist where id=$con_id/* or srv_id=$con_id*/;
+--source include/wait_condition.inc
+
+--echo kill query \$con_id;
+--disable_query_log
+eval kill query $con_id;
+--enable_query_log
+
+--connection default
+--error ER_QUERY_INTERRUPTED
+--reap
+
+select * from t1;
+--disconnect con1
+--disable_connect_log
+drop table t1;
+
+
+--echo #
+--echo # MDEV-4099: RocksDB: Wrong results with index and range access after INSERT IGNORE or REPLACE
+--echo #
+CREATE TABLE t1 (pk INT PRIMARY KEY, a SMALLINT, b INT, KEY (a)) ENGINE=RocksDB;
+INSERT IGNORE INTO t1 VALUES (1, 157, 0), (2, 1898, -504403), (1, -14659, 0);
+SELECT * FROM t1;
+SELECT pk FROM t1;
+SELECT * FROM t1 WHERE a != 97;
+DROP TABLE t1;
+
+
+--echo #
+--echo # Test @@rocksdb_max_row_locks
+--echo #
+CREATE TABLE t1 (pk INT PRIMARY KEY, a int) ENGINE=RocksDB;
+set @a=-1;
+insert into t1 select (@a:=@a+1), 1234 from information_schema.session_variables limit 100;
+set @tmp1= @@rocksdb_max_row_locks;
+set rocksdb_max_row_locks= 20;
+--error ER_GET_ERRMSG
+update t1 set a=a+10;
+DROP TABLE t1;
+
+
+--echo #
+--echo # Test AUTO_INCREMENT behavior problem,
+--echo # "explicit insert into an auto-inc column is not noticed by RocksDB"
+--echo #
+create table t1 (i int primary key auto_increment) engine=RocksDB;
+
+insert into t1 values (null);
+insert into t1 values (null);
+select * from t1;
+drop table t1;
+
+create table t2 (i int primary key auto_increment) engine=RocksDB;
+
+insert into t2 values (1);
+select * from t2;
+
+--echo # this fails (ie. used to fail), RocksDB engine did not notice use of '1' above
+insert into t2 values (null);
+select * from t2;
+
+--echo # but then this succeeds, so previous statement must have incremented next number counter
+insert into t2 values (null);
+select * from t2;
+drop table t2;
+
+--echo #
+--echo # Fix Issue#2: AUTO_INCREMENT value doesn't survive server shutdown
+--echo #
+create table t1 (i int primary key auto_increment) engine=RocksDB;
+
+insert into t1 values (null);
+insert into t1 values (null);
+
+SET GLOBAL ROCKSDB_PAUSE_BACKGROUND_WORK = @ORIG_PAUSE_BACKGROUND_WORK;
+
+--source include/restart_mysqld.inc
+
+SET @ORIG_PAUSE_BACKGROUND_WORK = @@ROCKSDB_PAUSE_BACKGROUND_WORK;
+SET GLOBAL ROCKSDB_PAUSE_BACKGROUND_WORK = 1;
+
+insert into t1 values (null);
+select * from t1;
+
+drop table t1;
+
+--echo #
+--echo # Fix Issue #3: SHOW TABLE STATUS shows Auto_increment=0
+--echo #
+create table t1 (i int primary key auto_increment) engine=RocksDB;
+
+insert into t1 values (null),(null);
+--replace_column 7 #
+show table status like 't1';
+drop table t1;
+
+--echo #
+--echo # Fix Issue #4: Crash when using pseudo-unique keys
+--echo #
+CREATE TABLE t1 (
+ i INT,
+ t TINYINT,
+ s SMALLINT,
+ m MEDIUMINT,
+ b BIGINT,
+ pk MEDIUMINT AUTO_INCREMENT PRIMARY KEY,
+ UNIQUE KEY b_t (b,t)
+) ENGINE=rocksdb;
+
+INSERT INTO t1 (i,t,s,m,b) VALUES (1,2,3,4,5),(1000,100,10000,1000000,1000000000000000000),(5,100,10000,1000000,100000000000000000),(2,3,4,5,6),(3,4,5,6,7),(101,102,103,104,105),(10001,103,10002,10003,10004),(10,11,12,13,14),(11,12,13,14,15),(12,13,14,15,16);
+
+SELECT b+t FROM t1 WHERE (b,t) IN ( SELECT b, t FROM t1 WHERE i>1 ) ORDER BY b+t;
+DROP TABLE t1;
+
+--echo #
+--echo # Fix issue #5: Transaction rollback doesn't undo all changes.
+--echo #
+create table t0 (a int) engine=myisam;
+insert into t0 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
+
+create table t1 (id int auto_increment primary key, value int) engine=rocksdb;
+
+set autocommit=0;
+begin;
+set @a:=0;
+insert into t1 select @a:=@a+1, @a from t0 A, t0 B, t0 C, t0 D where D.a<4;
+insert into t1 select @a:=@a+1, @a from t0 A, t0 B, t0 C, t0 D where D.a<4;
+insert into t1 select @a:=@a+1, @a from t0 A, t0 B, t0 C, t0 D where D.a<4;
+rollback;
+select count(*) from t1;
+
+set autocommit=1;
+drop table t0, t1;
+
+--echo #
+--echo # Check status variables
+--echo # NOTE: We exclude rocksdb_num_get_for_update_calls because it's a debug only status var
+--echo #
+--replace_column 2 #
+show status where variable_name like 'rocksdb%' and variable_name not like '%num_get_for_update%';
+
+select VARIABLE_NAME from INFORMATION_SCHEMA.global_status where VARIABLE_NAME LIKE 'rocksdb%' and VARIABLE_NAME NOT LIKE '%num_get_for_update%';
+--echo # RocksDB-SE's status variables are global internally
+--echo # but they are shown as both session and global, like InnoDB's status vars.
+select VARIABLE_NAME from INFORMATION_SCHEMA.session_status where VARIABLE_NAME LIKE 'rocksdb%' and VARIABLE_NAME NOT LIKE '%num_get_for_update%';
+
+
+--echo #
+--echo # Fix issue #9: HA_ERR_INTERNAL_ERROR when running linkbench
+--echo #
+create table t0 (a int) engine=myisam;
+insert into t0 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
+
+create table t1 (
+ pk int primary key,
+ col1 varchar(255),
+ key(col1)
+) engine=rocksdb;
+insert into t1 select a, repeat('123456789ABCDEF-', 15) from t0;
+select * from t1 where pk=3;
+drop table t0, t1;
+
+--echo #
+--echo # Fix issue #10: Segfault in Rdb_key_def::get_primary_key_tuple
+--echo #
+create table t0 (a int) engine=myisam;
+insert into t0 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
+
+CREATE TABLE t1 (
+ id1 bigint(20) unsigned NOT NULL DEFAULT '0',
+ id2 bigint(20) unsigned NOT NULL DEFAULT '0',
+ link_type bigint(20) unsigned NOT NULL DEFAULT '0',
+ visibility tinyint(3) NOT NULL DEFAULT '0',
+ data varchar(255) NOT NULL DEFAULT '',
+ time bigint(20) unsigned NOT NULL DEFAULT '0',
+ version int(11) unsigned NOT NULL DEFAULT '0',
+ PRIMARY KEY (link_type,id1,id2)
+) engine=rocksdb;
+
+insert into t1 select a,a,a,1,a,a,a from t0;
+
+alter table t1 add index id1_type (id1,link_type,visibility,time,version,data);
+select * from t1 where id1 = 3;
+
+drop table t0,t1;
+
+--echo #
+--echo # Test column families
+--echo #
+
+create table t1 (
+ pk int primary key,
+ col1 int,
+ col2 int,
+ key(col1) comment 'cf3',
+ key(col2) comment 'cf4'
+) engine=rocksdb;
+
+insert into t1 values (1,1,1), (2,2,2), (3,3,3), (4,4,4), (5,5,5);
+
+--replace_column 9 #
+explain
+select * from t1 where col1=2;
+select * from t1 where col1=2;
+
+--replace_column 9 #
+explain
+select * from t1 where col2=3;
+select * from t1 where col2=3;
+
+select * from t1 where pk=4;
+
+drop table t1;
+
+--echo #
+--echo # Try primary key in a non-default CF:
+--echo #
+create table t1 (
+ pk int,
+ col1 int,
+ col2 int,
+ key(col1) comment 'cf3',
+ key(col2) comment 'cf4',
+ primary key (pk) comment 'cf5'
+) engine=rocksdb;
+insert into t1 values (1,1,1), (2,2,2), (3,3,3), (4,4,4), (5,5,5);
+
+--replace_column 9 #
+explain
+select * from t1 where col1=2;
+select * from t1 where col1=2;
+
+select * from t1 where pk=4;
+
+drop table t1;
+
+--echo #
+--echo # Issue #15: SIGSEGV from reading in blob data
+--echo #
+CREATE TABLE t1 (
+ id int not null,
+ blob_col text,
+ PRIMARY KEY (id)
+) ENGINE=ROCKSDB CHARSET=latin1;
+
+INSERT INTO t1 SET id=123, blob_col=repeat('z',64000) ON DUPLICATE KEY UPDATE blob_col=VALUES(blob_col);
+INSERT INTO t1 SET id=123, blob_col='' ON DUPLICATE KEY UPDATE blob_col=VALUES(blob_col);
+DROP TABLE t1;
+
+
+--echo #
+--echo # Issue #17: Automatic per-index column families
+--echo # (Now deprecated)
+--echo #
+--error ER_PER_INDEX_CF_DEPRECATED
+create table t1 (
+ id int not null,
+ key1 int,
+ PRIMARY KEY (id),
+ index (key1) comment '$per_index_cf'
+) engine=rocksdb;
+
+
+--echo #
+--echo # Issue #22: SELECT ... FOR UPDATE takes a long time
+--echo #
+create table t0 (a int) engine=myisam;
+insert into t0 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
+
+create table t1 (
+ id1 int,
+ id2 int,
+ value1 int,
+ value2 int,
+ primary key(id1, id2) COMMENT 'new_column_family',
+ key(id2)
+) engine=rocksdb default charset=latin1 collate=latin1_bin;
+
+insert into t1 select A.a, B.a, 31, 1234 from t0 A, t0 B;
+
+--replace_column 9 #
+explain
+select * from t1 where id1=30 and value1=30 for update;
+
+set @var1=(select variable_value
+ from information_schema.global_status
+ where variable_name='rocksdb_number_keys_read');
+
+select * from t1 where id1=3 and value1=3 for update;
+
+set @var2=(select variable_value
+ from information_schema.global_status
+ where variable_name='rocksdb_number_keys_read');
+--echo # The following must return true (before the fix, the difference was 70):
+select if((@var2 - @var1) < 30, 1, @var2-@var1);
+
+drop table t0,t1;
+
+--echo #
+--echo # Issue #33: SELECT ... FROM rocksdb_table ORDER BY primary_key uses sorting
+--echo #
+create table t1 (id int primary key, value int) engine=rocksdb;
+insert into t1 values (1,1),(2,2),(3,3);
+--echo # The following must not use 'Using filesort':
+--replace_column 9 #
+explain select * from t1 ORDER BY id;
+drop table t1;
+
+--echo #
+--echo # Issue #26: Index-only scans for DATETIME and TIMESTAMP
+--echo #
+create table t0 (a int) engine=myisam;
+insert into t0 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
+
+--echo # Try a DATETIME column:
+create table t1 (
+ pk int auto_increment primary key,
+ kp1 datetime,
+ kp2 int,
+ col1 int,
+ key(kp1, kp2)
+) engine=rocksdb;
+insert into t1 (kp1,kp2)
+select date_add('2015-01-01 12:34:56', interval a day), a from t0;
+
+select * from t1;
+
+--echo # This must show 'Using index'
+--replace_column 9 #
+explain
+select kp1,kp2 from t1 force index (kp1)
+where kp1 between '2015-01-01 00:00:00' and '2015-01-05 23:59:59';
+
+select kp1,kp2 from t1 force index (kp1)
+where kp1 between '2015-01-01 00:00:00' and '2015-01-05 23:59:59';
+
+--echo # Now, the same with NOT NULL column
+create table t2 (
+ pk int auto_increment primary key,
+ kp1 datetime not null,
+ kp2 int,
+ col1 int,
+ key(kp1, kp2)
+) engine=rocksdb;
+insert into t2 select * from t1;
+--echo # This must show 'Using index'
+--replace_column 9 #
+explain
+select kp1,kp2 from t2 force index (kp1)
+where kp1 between '2015-01-01 00:00:00' and '2015-01-05 23:59:59';
+
+select kp1,kp2 from t2 force index (kp1)
+where kp1 between '2015-01-01 00:00:00' and '2015-01-05 23:59:59';
+drop table t1,t2;
+
+--echo # Try a DATE column:
+create table t1 (
+ pk int auto_increment primary key,
+ kp1 date,
+ kp2 int,
+ col1 int,
+ key(kp1, kp2)
+) engine=rocksdb;
+insert into t1 (kp1,kp2)
+select date_add('2015-01-01', interval a day), a from t0;
+
+select * from t1;
+
+--echo # This must show 'Using index'
+--replace_column 9 #
+explain
+select kp1,kp2 from t1 force index (kp1)
+where kp1 between '2015-01-01' and '2015-01-05';
+
+select kp1,kp2 from t1 force index (kp1)
+where kp1 between '2015-01-01' and '2015-01-05';
+
+--echo # Now, the same with NOT NULL column
+create table t2 (
+ pk int auto_increment primary key,
+ kp1 date not null,
+ kp2 int,
+ col1 int,
+ key(kp1, kp2)
+) engine=rocksdb;
+insert into t2 select * from t1;
+--echo # This must show 'Using index'
+--replace_column 9 #
+explain
+select kp1,kp2 from t2 force index (kp1)
+where kp1 between '2015-01-01 00:00:00' and '2015-01-05 23:59:59';
+
+select kp1,kp2 from t2 force index (kp1)
+where kp1 between '2015-01-01 00:00:00' and '2015-01-05 23:59:59';
+drop table t1,t2;
+
+--echo #
+--echo # Try a TIMESTAMP column:
+--echo #
+create table t1 (
+ pk int auto_increment primary key,
+ kp1 timestamp,
+ kp2 int,
+ col1 int,
+ key(kp1, kp2)
+) engine=rocksdb;
+insert into t1 (kp1,kp2)
+select date_add('2015-01-01 12:34:56', interval a day), a from t0;
+
+select * from t1;
+
+--echo # This must show 'Using index'
+--replace_column 9 #
+explain
+select kp1,kp2 from t1 force index (kp1)
+where kp1 between '2015-01-01 00:00:00' and '2015-01-05 23:59:59';
+
+select kp1,kp2 from t1 force index (kp1)
+where kp1 between '2015-01-01 00:00:00' and '2015-01-05 23:59:59';
+
+--echo # Now, the same with NOT NULL column
+create table t2 (
+ pk int auto_increment primary key,
+ kp1 timestamp not null,
+ kp2 int,
+ col1 int,
+ key(kp1, kp2)
+) engine=rocksdb;
+insert into t2 select * from t1;
+--echo # This must show 'Using index'
+--replace_column 9 #
+explain
+select kp1,kp2 from t2 force index (kp1)
+where kp1 between '2015-01-01 00:00:00' and '2015-01-05 23:59:59';
+
+select kp1,kp2 from t2 force index (kp1)
+where kp1 between '2015-01-01 00:00:00' and '2015-01-05 23:59:59';
+drop table t1,t2;
+
+--echo #
+--echo # Try a TIME column:
+--echo #
+create table t1 (
+ pk int auto_increment primary key,
+ kp1 time,
+ kp2 int,
+ col1 int,
+ key(kp1, kp2)
+) engine=rocksdb;
+--disable_warnings
+insert into t1 (kp1,kp2)
+select date_add('2015-01-01 09:00:00', interval a minute), a from t0;
+--enable_warnings
+
+select * from t1;
+
+--echo # This must show 'Using index'
+--replace_column 9 #
+explain
+select kp1,kp2 from t1 force index (kp1)
+where kp1 between '09:01:00' and '09:05:00';
+
+select kp1,kp2 from t1 force index (kp1)
+where kp1 between '09:01:00' and '09:05:00';
+
+--echo # Now, the same with NOT NULL column
+create table t2 (
+ pk int auto_increment primary key,
+ kp1 time not null,
+ kp2 int,
+ col1 int,
+ key(kp1, kp2)
+) engine=rocksdb;
+insert into t2 select * from t1;
+--echo # This must show 'Using index'
+--replace_column 9 #
+explain
+select kp1,kp2 from t2 force index (kp1)
+where kp1 between '09:01:00' and '09:05:00';
+
+select kp1,kp2 from t2 force index (kp1)
+where kp1 between '09:01:00' and '09:05:00';
+drop table t1,t2;
+
+--echo #
+--echo # Try a YEAR column:
+--echo #
+create table t1 (
+ pk int auto_increment primary key,
+ kp1 year,
+ kp2 int,
+ col1 int,
+ key(kp1, kp2)
+) engine=rocksdb;
+--disable_warnings
+insert into t1 (kp1,kp2) select 2015+a, a from t0;
+--enable_warnings
+
+select * from t1;
+
+--echo # This must show 'Using index'
+--replace_column 9 #
+explain
+select kp1,kp2 from t1 force index (kp1)
+where kp1 between '2016' and '2020';
+
+select kp1,kp2 from t1 force index (kp1)
+where kp1 between '2016' and '2020';
+
+--echo # Now, the same with NOT NULL column
+create table t2 (
+ pk int auto_increment primary key,
+ kp1 year not null,
+ kp2 int,
+ col1 int,
+ key(kp1, kp2)
+) engine=rocksdb;
+insert into t2 select * from t1;
+--echo # This must show 'Using index'
+--replace_column 9 #
+explain
+select kp1,kp2 from t2 force index (kp1)
+where kp1 between '2016' and '2020';
+
+select kp1,kp2 from t2 force index (kp1)
+where kp1 between '2016' and '2020';
+
+drop table t1,t2;
+
+--echo #
+--echo # Issue #57: Release row locks on statement errors
+--echo #
+create table t1 (id int primary key) engine=rocksdb;
+insert into t1 values (1), (2), (3);
+begin;
+insert into t1 values (4), (5), (6);
+--error ER_DUP_ENTRY
+insert into t1 values (7), (8), (2), (9);
+select * from t1;
+
+-- connect(con1,localhost,root,,)
+--connection con1
+begin;
+--error ER_LOCK_WAIT_TIMEOUT
+select * from t1 where id=4 for update;
+
+select * from t1 where id=7 for update;
+
+select * from t1 where id=9 for update;
+
+--connection default
+-- disconnect con1
+drop table t1;
+
+--echo #Index on blob column
+SET @old_mode = @@sql_mode;
+SET sql_mode = 'strict_all_tables';
+create table t1 (a int, b text, c varchar(400), Primary Key(a), Key(c, b(255))) engine=rocksdb;
+drop table t1;
+set global rocksdb_large_prefix=1;
+create table t1 (a int, b text, c varchar(400), Primary Key(a), Key(b(1255))) engine=rocksdb;
+set global rocksdb_large_prefix=0;
+insert into t1 values (1, '1abcde', '1abcde'), (2, '2abcde', '2abcde'), (3, '3abcde', '3abcde');
+select * from t1;
+--replace_column 9 #
+explain select * from t1 where b like '1%';
+--replace_column 9 #
+explain select b, a from t1 where b like '1%';
+update t1 set b= '12345' where b = '2abcde';
+select * from t1;
+drop table t1;
+# In MariaDB, the error becomes a warning:
+# --error ER_TOO_LONG_KEY
+create table t1 (a int, b text, c varchar(400), Primary Key(a), Key(b(2255))) engine=rocksdb;
+drop table t1;
+SET sql_mode = @old_mode;
+
+drop table t0;
+
+--echo #
+--echo # Fix assertion failure (attempt to overrun the key buffer) for prefix indexes
+--echo #
+
+create table t1 (
+ pk int primary key,
+ col1 varchar(100),
+ key (col1(10))
+) engine=rocksdb;
+
+insert into t1 values (1, repeat('0123456789', 9));
+
+drop table t1;
+
+--echo #
+--echo # Issue #76: Assertion `buf == table->record[0]' fails in virtual int ha_rocksdb::delete_row(const uchar*)
+--echo #
+
+CREATE TABLE t1 (pk INT PRIMARY KEY, f1 INT) ENGINE=RocksDB;
+CREATE TABLE t2 (pk INT PRIMARY KEY, f1 INT) ENGINE=RocksDB;
+
+CREATE TRIGGER tr AFTER DELETE ON t1 FOR EACH ROW DELETE FROM t2 WHERE pk = old.pk;
+
+INSERT INTO t1 VALUES (1,1);
+REPLACE INTO t1 VALUES (1,2);
+
+SELECT * FROM t1;
+DROP TABLE t1, t2;
+
+--echo #
+--echo # Issue #99: UPDATE for table with VARCHAR pk gives "Can't find record" error
+--echo #
+create table t1(a int primary key);
+insert into t1 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
+
+create table t2 (
+ a varchar(32) primary key,
+ col1 int
+) engine=rocksdb;
+
+insert into t2
+select concat('v-', 100 + A.a*100 + B.a), 12345 from t1 A, t1 B;
+update t2 set a=concat('x-', a) where a between 'v-1002' and 'v-1004';
+
+drop table t1,t2;
+
+--echo #
+--echo # Issue #131: Assertion `v->cfd_->internal_comparator().Compare(start, end) <= 0' failed
+--echo #
+CREATE TABLE t2(c1 INTEGER UNSIGNED NOT NULL, c2 INTEGER NULL, c3 TINYINT, c4 SMALLINT , c5 MEDIUMINT, c6 INT, c7 BIGINT, PRIMARY KEY(c1,c6)) ENGINE=RocksDB;
+INSERT INTO t2 VALUES (1,1,1,1,1,1,1);
+SELECT * FROM t2 WHERE c1 > 4294967295 ORDER BY c1,c6;
+EXPLAIN SELECT * FROM t2 WHERE c1 > 4294967295 ORDER BY c1,c6;
+drop table t2;
+
+--echo #
+--echo # Issue #135: register transaction was not being called for statement
+--echo #
+--disable_warnings
+DROP DATABASE IF EXISTS test_db;
+--enable_warnings
+CREATE DATABASE test_db;
+CREATE TABLE test_db.t1(c1 INT PRIMARY KEY);
+LOCK TABLES test_db.t1 READ;
+SET AUTOCOMMIT=0;
+SELECT c1 FROM test_db.t1;
+START TRANSACTION WITH CONSISTENT SNAPSHOT, READ ONLY;
+DROP DATABASE test_db;
+
+--echo #
+--echo # Issue #143: Split rocksdb_bulk_load option into two
+--echo #
+CREATE TABLE t1 (id int primary key, value int) engine=RocksDB;
+SET unique_checks=0;
+INSERT INTO t1 VALUES(1, 1);
+INSERT INTO t1 VALUES(1, 2);
+INSERT INTO t1 VALUES(1, 3);
+SELECT * FROM t1;
+--error ER_ON_DUPLICATE_DISABLED
+REPLACE INTO t1 VALUES(4, 4);
+--error ER_ON_DUPLICATE_DISABLED
+INSERT INTO t1 VALUES(5, 5) ON DUPLICATE KEY UPDATE value=value+1;
+TRUNCATE TABLE t1;
+SET @save_rocksdb_bulk_load_size= @@rocksdb_bulk_load_size;
+SET unique_checks=1;
+SET rocksdb_commit_in_the_middle=1;
+SET rocksdb_bulk_load_size=10;
+BEGIN;
+INSERT INTO t1 (id) VALUES(1),(2),(3),(4),(5),(6),(7),(8),(9),(10),
+ (11),(12),(13),(14),(15),(16),(17),(18),(19);
+ROLLBACK;
+SELECT * FROM t1;
+INSERT INTO t1 (id) VALUES (11),(12),(13),(14),(15);
+BEGIN;
+UPDATE t1 SET value=100;
+ROLLBACK;
+SELECT * FROM t1;
+BEGIN;
+DELETE FROM t1;
+ROLLBACK;
+SELECT * FROM t1;
+SET rocksdb_commit_in_the_middle=0;
+SET rocksdb_bulk_load_size= @save_rocksdb_bulk_load_size;
+DROP TABLE t1;
+
+--echo #
+--echo # Issue #185 Assertion `BaseValid()' failed in void rocksdb::BaseDeltaIterator::Advance()
+--echo #
+CREATE TABLE t2(id INT NOT NULL PRIMARY KEY, data INT) Engine=MEMORY;
+INSERT INTO t2 VALUES (100,NULL),(150,"long varchar"),(200,"varchar"),(250,"long long long varchar");
+create TABLE t1 (a int not null, b int not null, primary key(a,b));
+INSERT INTO t1 VALUES (1,1);
+SELECT a FROM t1, t2 WHERE a=b AND (b NOT IN (SELECT a FROM t1 WHERE a > 4));
+DROP TABLE t1, t2;
+
+--echo #
+--echo # Issue #189 ha_rocksdb::load_auto_incr_value() creates implicit snapshot and doesn't release
+--echo #
+--connect (con1,localhost,root,,)
+create table r1 (id int auto_increment primary key, value int);
+insert into r1 (id) values (null), (null), (null), (null), (null);
+connection con1;
+create table r2 like r1;
+show create table r2;
+connection default;
+begin;
+insert into r1 values (10, 1);
+commit;
+connection con1;
+begin;
+select * from r1;
+commit;
+connection default;
+drop table r1, r2;
+
+# hidden primary key
+create table r1 (id int auto_increment, value int, index i(id));
+insert into r1 (id) values (null), (null), (null), (null), (null);
+connection con1;
+create table r2 like r1;
+show create table r2;
+connection default;
+begin;
+insert into r1 values (10, 1);
+commit;
+connection con1;
+begin;
+select * from r1;
+commit;
+connection default;
+drop table r1, r2;
+
+disconnect con1;
+
+--echo #
+--echo # Issue#211 Crash on LOCK TABLES + START TRANSACTION WITH CONSISTENT SNAPSHOT
+--echo #
+CREATE TABLE t1(c1 INT);
+lock TABLE t1 read local;
+SELECT 1 FROM t1 GROUP BY TRIM(LEADING RAND()FROM'');
+set AUTOCOMMIT=0;
+start transaction with consistent snapshot;
+SELECT * FROM t1;
+COMMIT;
+UNLOCK TABLES;
+DROP TABLE t1;
+
+--echo #
+--echo # Issue#213 Crash on LOCK TABLES + partitions
+--echo #
+CREATE TABLE t1(a INT,b INT,KEY (b)) engine=rocksdb PARTITION BY HASH(a) PARTITIONS 2;
+INSERT INTO t1(a)VALUES (20010101101010.999949);
+lock tables t1 write,t1 as t0 write,t1 as t2 write;
+SELECT a FROM t1 ORDER BY a;
+truncate t1;
+INSERT INTO t1 VALUES(X'042000200020',X'042000200020'),(X'200400200020',X'200400200020');
+UNLOCK TABLES;
+DROP TABLE t1;
+
+--echo #
+--echo # Issue#250: MyRocks/Innodb different output from query with order by on table with index and decimal type
+--echo # (the test was changed to use VARCHAR, because DECIMAL now supports index-only, and this issue
+--echo # needs a datype that doesn't support index-inly)
+--echo #
+
+CREATE TABLE t1(
+ c1 varchar(10) character set utf8 collate utf8_general_ci NOT NULL,
+ c2 varchar(10) character set utf8 collate utf8_general_ci,
+ c3 INT,
+ INDEX idx(c1,c2)
+);
+INSERT INTO t1 VALUES ('c1-val1','c2-val1',5);
+INSERT INTO t1 VALUES ('c1-val2','c2-val3',6);
+INSERT INTO t1 VALUES ('c1-val3','c2-val3',7);
+SELECT * FROM t1 force index(idx) WHERE c1 <> 'c1-val2' ORDER BY c1 DESC;
+--replace_column 9 #
+explain SELECT * FROM t1 force index(idx) WHERE c1 <> '1' ORDER BY c1 DESC;
+drop table t1;
+
+--echo #
+--echo # Issue#267: MyRocks issue with no matching min/max row and count(*)
+--echo #
+CREATE TABLE t1(c1 INT UNSIGNED, c2 INT SIGNED, INDEX idx2(c2));
+INSERT INTO t1 VALUES(1,null);
+INSERT INTO t1 VALUES(2,null);
+SELECT count(*) as total_rows, min(c2) as min_value FROM t1;
+DROP TABLE t1;
+
+--echo #
+--echo # Issue#263: MyRocks auto_increment skips values if you insert a negative value
+--echo #
+# We have slightly different behavior regarding auto-increment values than
+# InnoDB, so the results of the SHOW TABLE STATUS command will be slightly
+# different. InnoDB will reserve 3 values but only use 2 of them (because
+# the user hard-coded a -1 as the second value). MyRocks will only reserve
+# the values as needed, so only 2 values will be used. This means that the
+# SHOW TABLE STATUS in InnoDB will indicate that the next auto-increment
+# value is 4 while MyRocks will show it as 3.
+CREATE TABLE t1(a INT AUTO_INCREMENT KEY);
+INSERT INTO t1 VALUES(0),(-1),(0);
+SHOW TABLE STATUS LIKE 't1';
+SELECT * FROM t1;
+DROP TABLE t1;
+CREATE TABLE t1(a INT AUTO_INCREMENT KEY);
+INSERT INTO t1 VALUES(0),(10),(0);
+SHOW TABLE STATUS LIKE 't1';
+SELECT * FROM t1;
+DROP TABLE t1;
+
+--echo #
+--echo # Issue #411: Setting rocksdb_commit_in_the_middle commits transaction
+--echo # without releasing iterator
+--echo #
+
+CREATE TABLE t1 (id1 bigint(20),
+ id2 bigint(20),
+ id3 bigint(20),
+ PRIMARY KEY (id1, id2, id3))
+ DEFAULT CHARSET=latin1;
+
+CREATE TABLE t2 (id1 bigint(20),
+ id2 bigint(20),
+ PRIMARY KEY (id1, id2))
+ DEFAULT CHARSET=latin1;
+
+
+set rocksdb_commit_in_the_middle=1;
+SET @save_rocksdb_bulk_load_size= @@rocksdb_bulk_load_size;
+set rocksdb_bulk_load_size = 100;
+
+--disable_query_log
+let $j = 10000;
+while ($j)
+{
+ --eval insert into t1 (id1, id2, id3) values (0, $j, 0);
+ --eval insert into t2 (id1, id2) values (0, $j);
+ dec $j;
+}
+--enable_query_log
+
+DELETE t2, t1 FROM t2 LEFT JOIN t1 ON t2.id2 = t1.id2 AND t2.id1 = t1.id1 WHERE t2.id1 = 0;
+
+SET rocksdb_bulk_load_size= @save_rocksdb_bulk_load_size;
+SET rocksdb_commit_in_the_middle=0;
+DROP TABLE t1, t2;
+
+
+SET GLOBAL ROCKSDB_PAUSE_BACKGROUND_WORK = @ORIG_PAUSE_BACKGROUND_WORK;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_cf_options-master.opt b/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_cf_options-master.opt
new file mode 100644
index 00000000000..95d819ee425
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_cf_options-master.opt
@@ -0,0 +1 @@
+--rocksdb_default_cf_options="write_buffer_size=12m;target_file_size_base=1m;max_bytes_for_level_multiplier=10"
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_cf_options-master.sh b/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_cf_options-master.sh
new file mode 100755
index 00000000000..9381de1fafc
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_cf_options-master.sh
@@ -0,0 +1,5 @@
+#!/bin/bash
+
+cat > $MYSQL_TMP_DIR/cf_configs.cnf <<EOL
+
+EOL
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_cf_options.test b/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_cf_options.test
new file mode 100644
index 00000000000..9482c2178be
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_cf_options.test
@@ -0,0 +1,75 @@
+--source include/have_rocksdb.inc
+
+let MYSQLD_DATADIR= `SELECT @@datadir`;
+let file= $MYSQL_TMP_DIR/cf_configs.cnf;
+
+# create new column families with no configs in cf_configs.cnf file
+
+create table t1 (a int,
+ primary key (a) comment 'cf1') engine=rocksdb;
+create table t2 (a int,
+ primary key (a) comment 'cf2') engine=rocksdb;
+create table t3 (a int,
+ primary key (a) comment 'z') engine=rocksdb;
+
+insert into t1 values (1);
+insert into t2 values (2);
+insert into t3 values (2);
+
+# restart with new column families in DB
+
+--source include/restart_mysqld.inc
+
+# check column family options in log -- should be all default settings
+
+--echo
+--echo Default options for all column families:
+--echo
+select cf_name, option_type, value
+ from information_schema.rocksdb_cf_options
+ where option_type in ('WRITE_BUFFER_SIZE',
+ 'TARGET_FILE_SIZE_BASE',
+ 'MAX_BYTES_FOR_LEVEL_MULTIPLIER')
+ order by cf_name, option_type;
+# restart with cf configs for cf1 and cf2
+
+--let $restart_parameters=--rocksdb_override_cf_options=cf1={write_buffer_size=8m;target_file_size_base=2m};cf2={write_buffer_size=16m;max_bytes_for_level_multiplier=8};z={target_file_size_base=4m};
+--source include/restart_mysqld.inc
+
+# check column family options in log -- should reflect individual settings
+
+--echo
+--echo Individualized options for column families:
+--echo
+select cf_name, option_type, value
+ from information_schema.rocksdb_cf_options
+ where option_type in ('WRITE_BUFFER_SIZE',
+ 'TARGET_FILE_SIZE_BASE',
+ 'MAX_BYTES_FOR_LEVEL_MULTIPLIER')
+ order by cf_name, option_type;
+
+# syntax error in options (no equal sign)
+
+--exec echo "restart:--rocksdb_override_cf_options=cf1" > $_expect_file_name
+--error 1
+--source include/wait_until_connected_again.inc
+
+# invalid cf option config (no curly braces)
+
+--exec echo "restart:--rocksdb_override_cf_options=cf1=write_buffer_size=8m" > $_expect_file_name
+--error 1
+--source include/wait_until_connected_again.inc
+
+# invalid cf option config (cf listed twice)
+
+--exec echo "restart:--rocksdb_override_cf_options=cf1={write_buffer_size=8m};cf1={target_file_size_base=2m}" > $_expect_file_name
+--error 1
+--source include/wait_until_connected_again.inc
+
+# clean up
+
+--source include/restart_mysqld.inc
+
+--echo
+drop table t1,t2,t3;
+
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_cf_per_partition.test b/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_cf_per_partition.test
new file mode 100644
index 00000000000..fcbd8527acc
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_cf_per_partition.test
@@ -0,0 +1,513 @@
+--source include/have_rocksdb.inc
+
+--source include/have_partition.inc
+
+--disable_warnings
+DROP TABLE IF EXISTS t1;
+DROP TABLE IF EXISTS t2;
+--enable_warnings
+
+#
+# Create a table with multiple partitions, but in the comment don't specify
+# that per-partition based column families (CF) should be created. Expect that
+# default CF will be used and new one won't be created.
+#
+CREATE TABLE t1 (
+ c1 INT,
+ c2 INT,
+ name VARCHAR(25) NOT NULL,
+ event DATE,
+ PRIMARY KEY (`c1`, `c2`) COMMENT 'testcomment'
+) ENGINE=ROCKSDB
+PARTITION BY LIST(c1) (
+ PARTITION custom_p0 VALUES IN (1, 4, 7),
+ PARTITION custom_p1 VALUES IN (2, 5, 8),
+ PARTITION custom_p2 VALUES IN (3, 6, 9)
+);
+
+# Expecting no results here.
+SELECT DISTINCT(cf_name) FROM information_schema.rocksdb_cfstats WHERE cf_name='testcomment';
+
+DROP TABLE t1;
+
+#
+# Same test case as above, only with the reverse CF. Should result in the same
+# behavior. No new CF-s created, only default one will be used.
+#
+CREATE TABLE t1 (
+ c1 INT,
+ c2 INT,
+ name VARCHAR(25) NOT NULL,
+ event DATE,
+ PRIMARY KEY (`c1`, `c2`) COMMENT 'rev:testrevcomment'
+) ENGINE=ROCKSDB
+PARTITION BY LIST(c1) (
+ PARTITION custom_p0 VALUES IN (1, 4, 7),
+ PARTITION custom_p1 VALUES IN (2, 5, 8),
+ PARTITION custom_p2 VALUES IN (3, 6, 9)
+);
+
+# Expecting no results here.
+SELECT DISTINCT(cf_name) FROM information_schema.rocksdb_cfstats WHERE cf_name='rev:testrevcomment';
+
+DROP TABLE t1;
+
+#
+# Create a table with multiple partitions and request for separate CF to be
+# created per every partition. As a result we expect three different CF-s to be
+# created.
+#
+CREATE TABLE t1 (
+ c1 INT,
+ c2 INT,
+ name VARCHAR(25) NOT NULL,
+ event DATE,
+ PRIMARY KEY (`c1`, `c2`) COMMENT 'custom_p0_cfname=foo;custom_p1_cfname=my_custom_cf;custom_p2_cfname=baz'
+) ENGINE=ROCKSDB
+PARTITION BY LIST(c1) (
+ PARTITION custom_p0 VALUES IN (1, 4, 7),
+ PARTITION custom_p1 VALUES IN (2, 5, 8),
+ PARTITION custom_p2 VALUES IN (3, 6, 9)
+);
+
+set @@global.rocksdb_compact_cf = 'foo';
+set @@global.rocksdb_compact_cf = 'my_custom_cf';
+set @@global.rocksdb_compact_cf = 'baz';
+
+SELECT DISTINCT(cf_name) FROM information_schema.rocksdb_cfstats WHERE cf_name='foo';
+SELECT DISTINCT(cf_name) FROM information_schema.rocksdb_cfstats WHERE cf_name='my_custom_cf';
+SELECT DISTINCT(cf_name) FROM information_schema.rocksdb_cfstats WHERE cf_name='baz';
+
+DROP TABLE t1;
+
+#
+# Same test case as above, only one of the partitions has "rev:" prefix. The
+# intent here is to make sure that qualifier can specify reverse CF as well.
+#
+CREATE TABLE t1 (
+ c1 INT,
+ c2 INT,
+ name VARCHAR(25) NOT NULL,
+ event DATE,
+ PRIMARY KEY (`c1`, `c2`) COMMENT 'custom_p0_cfname=t1-p0;custom_p1_cfname=rev:bar;custom_p2_cfname=t1-p2'
+) ENGINE=ROCKSDB
+PARTITION BY LIST(c1) (
+ PARTITION custom_p0 VALUES IN (1, 4, 7),
+ PARTITION custom_p1 VALUES IN (2, 5, 8),
+ PARTITION custom_p2 VALUES IN (3, 6, 9)
+);
+
+set @@global.rocksdb_compact_cf = 't1-p0';
+set @@global.rocksdb_compact_cf = 'rev:bar';
+set @@global.rocksdb_compact_cf = 't1-p2';
+
+SELECT DISTINCT(cf_name) FROM information_schema.rocksdb_cfstats WHERE cf_name='t1-p0';
+SELECT DISTINCT(cf_name) FROM information_schema.rocksdb_cfstats WHERE cf_name='rev:bar';
+SELECT DISTINCT(cf_name) FROM information_schema.rocksdb_cfstats WHERE cf_name='t1-p2';
+
+DROP TABLE t1;
+
+
+#
+# Create a table with multiple partitions and assign two partitions to the same
+# CF, third one gets a separate partition, and fourth one will belong to a
+# default one. As a result we expect two new CF-s to be created.
+#
+CREATE TABLE t1 (
+ c1 INT,
+ c2 INT,
+ name VARCHAR(25) NOT NULL,
+ event DATE,
+ PRIMARY KEY (`c1`, `c2`) COMMENT 'custom_p0_cfname=cf-zero;custom_p1_cfname=cf-one;custom_p2_cfname=cf-zero'
+) ENGINE=ROCKSDB
+PARTITION BY LIST(c1) (
+ PARTITION custom_p0 VALUES IN (1, 4, 7),
+ PARTITION custom_p1 VALUES IN (2, 5, 8),
+ PARTITION custom_p2 VALUES IN (3, 6, 9),
+ PARTITION custom_p3 VALUES IN (10, 20, 30)
+);
+
+set @@global.rocksdb_compact_cf = 'cf-zero';
+set @@global.rocksdb_compact_cf = 'cf-one';
+
+SELECT DISTINCT(cf_name) FROM information_schema.rocksdb_cfstats WHERE cf_name='cf-zero';
+SELECT DISTINCT(cf_name) FROM information_schema.rocksdb_cfstats WHERE cf_name='cf-one';
+
+DROP TABLE t1;
+
+#
+# Create a table with CF-s per partition and verify that ALTER TABLE + DROP
+# INDEX work for that scenario and data is persisted.
+#
+CREATE TABLE t1 (
+ c1 INT,
+ c2 INT,
+ name VARCHAR(25) NOT NULL,
+ event DATE,
+ PRIMARY KEY (`c1`, `c2`) COMMENT 'custom_p0_cfname=foo;custom_p1_cfname=bar;custom_p2_cfname=baz'
+) ENGINE=ROCKSDB
+PARTITION BY LIST(c1) (
+ PARTITION custom_p0 VALUES IN (1, 4, 7),
+ PARTITION custom_p1 VALUES IN (2, 5, 8),
+ PARTITION custom_p2 VALUES IN (3, 6, 9)
+);
+
+INSERT INTO t1 VALUES (1, 1, "one", null);
+INSERT INTO t1 VALUES (2, 2, "two", null);
+INSERT INTO t1 VALUES (3, 3, "three", null);
+INSERT INTO t1 VALUES (5, 5, "five", null);
+INSERT INTO t1 VALUES (9, 9, "nine", null);
+
+SELECT * FROM t1;
+ALTER TABLE t1 DROP PRIMARY KEY;
+SELECT * FROM t1;
+
+#
+# Verify that we can compact custom CF-s.
+#
+set @@global.rocksdb_compact_cf = 'foo';
+set @@global.rocksdb_compact_cf = 'bar';
+set @@global.rocksdb_compact_cf = 'baz';
+
+DROP TABLE t1;
+
+#
+# Create a table with CF-s per partition and verify that ALTER TABLE + DROP
+# INDEX + ADD INDEX work for that scenario and data is persisted and new cf_name_str
+# are created.
+#
+CREATE TABLE t1 (
+ c1 INT,
+ c2 INT,
+ name VARCHAR(25) NOT NULL,
+ event DATE,
+ PRIMARY KEY (`c1`, `c2`) COMMENT 'custom_p0_cfname=foo;custom_p1_cfname=bar;custom_p2_cfname=baz'
+) ENGINE=ROCKSDB
+PARTITION BY LIST(c1) (
+ PARTITION custom_p0 VALUES IN (1, 4, 7),
+ PARTITION custom_p1 VALUES IN (2, 5, 8),
+ PARTITION custom_p2 VALUES IN (3, 6, 9)
+);
+
+INSERT INTO t1 VALUES (1, 1, "one", null);
+INSERT INTO t1 VALUES (2, 2, "two", null);
+INSERT INTO t1 VALUES (3, 3, "three", null);
+INSERT INTO t1 VALUES (5, 5, "five", null);
+INSERT INTO t1 VALUES (9, 9, "nine", null);
+
+ALTER TABLE t1 DROP PRIMARY KEY;
+ALTER TABLE t1 ADD PRIMARY KEY (`c1`, `c2`) COMMENT 'custom_p0_cfname=p0_cf;custom_p1_cfname=p1_cf';
+
+set @@global.rocksdb_compact_cf = 'p0_cf';
+set @@global.rocksdb_compact_cf = 'p1_cf';
+
+SELECT DISTINCT(cf_name) FROM information_schema.rocksdb_cfstats WHERE cf_name='p0_cf';
+SELECT DISTINCT(cf_name) FROM information_schema.rocksdb_cfstats WHERE cf_name='p1_cf';
+
+DROP TABLE t1;
+
+#
+# Create a table CF-s per partition, use ALTER TABLE to change the way it's
+# partitioned and verify that new CF-s will be created.
+#
+CREATE TABLE t1 (
+ c1 INT,
+ c2 INT,
+ name VARCHAR(25) NOT NULL,
+ event DATE,
+ PRIMARY KEY (`c1`, `c2`) COMMENT 'custom_p0_cfname=foo;custom_p1_cfname=bar;custom_p2_cfname=baz'
+) ENGINE=ROCKSDB
+PARTITION BY LIST(c1) (
+ PARTITION custom_p0 VALUES IN (1, 4, 7),
+ PARTITION custom_p1 VALUES IN (2, 5, 8),
+ PARTITION custom_p2 VALUES IN (3, 6, 9)
+);
+
+INSERT INTO t1 VALUES (1, 1, "one", null);
+INSERT INTO t1 VALUES (2, 2, "two", null);
+INSERT INTO t1 VALUES (3, 3, "three", null);
+INSERT INTO t1 VALUES (5, 5, "five", null);
+INSERT INTO t1 VALUES (9, 9, "nine", null);
+
+ALTER TABLE t1 PARTITION BY LIST(c1) (
+ PARTITION custom_p3 VALUES IN (1, 4, 7),
+ PARTITION custom_p4 VALUES IN (2, 5, 8, 3, 6, 9)
+);
+
+ALTER TABLE t1 DROP PRIMARY KEY;
+ALTER TABLE t1 ADD PRIMARY KEY (`c1`, `c2`) COMMENT 'custom_p3_cfname=p3_cf;custom_p4_cfname=p4_cf';
+
+set @@global.rocksdb_compact_cf = 'p3_cf';
+set @@global.rocksdb_compact_cf = 'p4_cf';
+
+SELECT DISTINCT(cf_name) FROM information_schema.rocksdb_cfstats WHERE cf_name='p3_cf';
+SELECT DISTINCT(cf_name) FROM information_schema.rocksdb_cfstats WHERE cf_name='p4_cf';
+
+DROP TABLE t1;
+
+#
+# Create a table CF-s per partition, use empty qualifier name. Verify that no
+# new CF-s are created. This will also make sure that nothing gets added for
+# `custom_p2`.
+#
+CREATE TABLE t1 (
+ c1 INT,
+ c2 INT,
+ name VARCHAR(25) NOT NULL,
+ event DATE,
+ PRIMARY KEY (`c1`, `c2`) COMMENT 'custom_p0_cfname=foo;custom_p1_cfname=;'
+) ENGINE=ROCKSDB
+PARTITION BY LIST(c1) (
+ PARTITION custom_p0 VALUES IN (1, 4, 7),
+ PARTITION custom_p1 VALUES IN (2, 5, 8),
+ PARTITION custom_p2 VALUES IN (3, 6, 9)
+);
+
+DROP TABLE t1;
+
+#
+# Verify some basic partition related operations when using PARTITION BY LIST
+# COLUMNS on a VARBINARY column on a table with more complicated schema.
+#
+
+#
+# Verify that creating the table without COMMENT actually works.
+#
+CREATE TABLE `t2` (
+ `col1` bigint(20) NOT NULL,
+ `col2` varbinary(64) NOT NULL,
+ `col3` varbinary(256) NOT NULL,
+ `col4` bigint(20) NOT NULL,
+ `col5` mediumblob NOT NULL,
+ PRIMARY KEY (`col1`,`col2`,`col3`)
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+ PARTITION BY LIST COLUMNS (`col2`) (
+ PARTITION custom_p0 VALUES IN (0x12345),
+ PARTITION custom_p1 VALUES IN (0x23456),
+ PARTITION custom_p2 VALUES IN (0x34567),
+ PARTITION custom_p3 VALUES IN (0x45678),
+ PARTITION custom_p4 VALUES IN (0x56789),
+ PARTITION custom_p5 VALUES IN (0x6789A),
+ PARTITION custom_p6 VALUES IN (0x789AB),
+ PARTITION custom_p7 VALUES IN (0x89ABC)
+);
+
+DROP TABLE t2;
+
+#
+# Create the same table with two custom CF-s per partition as specified in the
+# COMMENT.
+#
+CREATE TABLE `t2` (
+ `col1` bigint(20) NOT NULL,
+ `col2` varbinary(64) NOT NULL,
+ `col3` varbinary(256) NOT NULL,
+ `col4` bigint(20) NOT NULL,
+ `col5` mediumblob NOT NULL,
+ PRIMARY KEY (`col1`,`col2`,`col3`) COMMENT 'custom_p0_cfname=my_cf0;custom_p1_cfname=my_cf1'
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+ PARTITION BY LIST COLUMNS (`col2`) (
+ PARTITION custom_p0 VALUES IN (0x12345),
+ PARTITION custom_p1 VALUES IN (0x23456),
+ PARTITION custom_p2 VALUES IN (0x34567),
+ PARTITION custom_p3 VALUES IN (0x45678),
+ PARTITION custom_p4 VALUES IN (0x56789),
+ PARTITION custom_p5 VALUES IN (0x6789A),
+ PARTITION custom_p6 VALUES IN (0x789AB),
+ PARTITION custom_p7 VALUES IN (0x89ABC)
+);
+
+# Verify that CF-s were created earlier.
+set @@global.rocksdb_compact_cf = 'my_cf0';
+set @@global.rocksdb_compact_cf = 'my_cf1';
+
+SELECT DISTINCT(cf_name) FROM information_schema.rocksdb_cfstats WHERE cf_name='my_cf0';
+SELECT DISTINCT(cf_name) FROM information_schema.rocksdb_cfstats WHERE cf_name='my_cf1';
+
+# Insert some random data.
+INSERT INTO t2 VALUES (100, 0x12345, 0x1, 1, 0x2);
+INSERT INTO t2 VALUES (200, 0x12345, 0x1, 1, 0x2);
+INSERT INTO t2 VALUES (300, 0x12345, 0x1, 1, 0x2);
+INSERT INTO t2 VALUES (100, 0x23456, 0x2, 1, 0x3);
+INSERT INTO t2 VALUES (100, 0x34567, 0x4, 1, 0x5);
+INSERT INTO t2 VALUES (400, 0x89ABC, 0x4, 1, 0x5);
+
+# Verify it's there.
+SELECT col1, HEX(col2), HEX(col3), col4, HEX(col5) FROM t2;
+
+# Verify it's being fetched from the right partition. This tests partitioning
+# functionality, but we want to make sure that by adding CF-s per partition we
+# don't regress anything.
+EXPLAIN PARTITIONS SELECT HEX(col2) FROM t2 where col2 = 0x12345;
+EXPLAIN PARTITIONS SELECT HEX(col2) FROM t2 where col2 = 0x23456;
+
+# Delete the current PK and create a new one referencing different CF-s. We
+# need to verity that new CF-s will be created and no data will be lost in
+# process.
+ALTER TABLE t2 DROP PRIMARY KEY;
+ALTER TABLE t2 ADD PRIMARY KEY (`col1`,`col2`,`col3`) COMMENT 'custom_p0_cfname=new_cf0;custom_p1_cfname=new_cf1';
+
+# Verify that new CF-s are created as well.
+set @@global.rocksdb_compact_cf = 'new_cf0';
+set @@global.rocksdb_compact_cf = 'new_cf1';
+
+SELECT DISTINCT(cf_name) FROM information_schema.rocksdb_cfstats WHERE cf_name='new_cf0';
+SELECT DISTINCT(cf_name) FROM information_schema.rocksdb_cfstats WHERE cf_name='new_cf1';
+
+# Insert some more random data.
+INSERT INTO t2 VALUES (500, 0x12345, 0x5, 1, 0x2);
+INSERT INTO t2 VALUES (700, 0x23456, 0x7, 1, 0x3);
+
+# Verify that partition mappings are still intact.
+EXPLAIN PARTITIONS SELECT HEX(col2) FROM t2 where col2 = 0x12345;
+EXPLAIN PARTITIONS SELECT HEX(col2) FROM t2 where col2 = 0x23456;
+
+# Verify that no data is lost.
+SELECT col1, HEX(col2), HEX(col3), col4, HEX(col5) FROM t2;
+
+DROP TABLE t2;
+
+#
+# Create the same table with two custom CF-s per partition as specified in the
+# COMMENT. Use both the PK and SK when creating the table.
+#
+CREATE TABLE `t2` (
+ `col1` bigint(20) NOT NULL,
+ `col2` varbinary(64) NOT NULL,
+ `col3` varbinary(256) NOT NULL,
+ `col4` bigint(20) NOT NULL,
+ `col5` mediumblob NOT NULL,
+ PRIMARY KEY (`col1`,`col2`,`col3`) COMMENT 'custom_p0_cfname=test_cf0;custom_p1_cfname=test_cf1',
+ KEY (`col2`, `col4`) COMMENT 'custom_p5_cfname=test_cf5'
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+ PARTITION BY LIST COLUMNS (`col2`) (
+ PARTITION custom_p0 VALUES IN (0x12345),
+ PARTITION custom_p1 VALUES IN (0x23456),
+ PARTITION custom_p2 VALUES IN (0x34567),
+ PARTITION custom_p3 VALUES IN (0x45678),
+ PARTITION custom_p4 VALUES IN (0x56789),
+ PARTITION custom_p5 VALUES IN (0x6789A),
+ PARTITION custom_p6 VALUES IN (0x789AB),
+ PARTITION custom_p7 VALUES IN (0x89ABC)
+);
+
+# Verify that CF-s were created for PK.
+SELECT DISTINCT(cf_name) FROM information_schema.rocksdb_cfstats WHERE cf_name='test_cf0';
+SELECT DISTINCT(cf_name) FROM information_schema.rocksdb_cfstats WHERE cf_name='test_cf1';
+
+# Verify that CF-s were created for SK.
+SELECT DISTINCT(cf_name) FROM information_schema.rocksdb_cfstats WHERE cf_name='test_cf5';
+
+# Insert some random data.
+INSERT INTO t2 VALUES (100, 0x12345, 0x1, 1, 0x2);
+INSERT INTO t2 VALUES (200, 0x12345, 0x1, 1, 0x2);
+INSERT INTO t2 VALUES (300, 0x12345, 0x1, 1, 0x2);
+INSERT INTO t2 VALUES (100, 0x23456, 0x2, 1, 0x3);
+INSERT INTO t2 VALUES (100, 0x34567, 0x4, 1, 0x5);
+INSERT INTO t2 VALUES (400, 0x89ABC, 0x4, 1, 0x5);
+INSERT INTO t2 VALUES (500, 0x6789A, 0x5, 1, 0x7);
+
+# Basic verification that correct partition and key are used when searching.
+EXPLAIN PARTITIONS SELECT * FROM t2 WHERE col2 = 0x6789A AND col4 = 1;
+
+# Remove the key.
+ALTER TABLE t2 DROP KEY `col2`;
+
+# Add a new key and expect new CF to be created as well.
+ALTER TABLE t2 ADD KEY (`col3`, `col4`) COMMENT 'custom_p5_cfname=another_cf_for_p5';
+
+# Verify that CF-s were created for SK.
+SELECT DISTINCT(cf_name) FROM information_schema.rocksdb_cfstats WHERE cf_name='another_cf_for_p5';
+
+# Verify that correct partition and key are used when searching.
+ANALYZE TABLE t2;
+--replace_column 10 #
+EXPLAIN PARTITIONS SELECT * FROM t2 WHERE col3 = 0x4 AND col2 = 0x34567;
+
+DROP TABLE t2;
+
+#
+# Verify the same scenario as before, but with a UNIQUE KEY in addition to PK.
+#
+CREATE TABLE `t2` (
+ `col1` bigint(20) NOT NULL,
+ `col2` varbinary(64) NOT NULL,
+ `col3` varbinary(256) NOT NULL,
+ `col4` bigint(20) NOT NULL,
+ `col5` mediumblob NOT NULL,
+ PRIMARY KEY (`col1`,`col2`,`col3`) COMMENT 'custom_p0_cfname=test_cf0;custom_p1_cfname=test_cf1',
+ UNIQUE KEY (`col2`, `col4`) COMMENT 'custom_p5_cfname=unique_test_cf5'
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+ PARTITION BY LIST COLUMNS (`col2`) (
+ PARTITION custom_p0 VALUES IN (0x12345),
+ PARTITION custom_p1 VALUES IN (0x23456),
+ PARTITION custom_p2 VALUES IN (0x34567),
+ PARTITION custom_p3 VALUES IN (0x45678),
+ PARTITION custom_p4 VALUES IN (0x56789),
+ PARTITION custom_p5 VALUES IN (0x6789A),
+ PARTITION custom_p6 VALUES IN (0x789AB),
+ PARTITION custom_p7 VALUES IN (0x89ABC)
+);
+
+# Verify that CF-s were created for SK.
+SELECT DISTINCT(cf_name) FROM information_schema.rocksdb_cfstats WHERE cf_name='unique_test_cf5';
+
+INSERT INTO t2 VALUES (100, 0x12345, 0x1, 1, 0x2);
+
+--error ER_DUP_ENTRY
+INSERT INTO t2 VALUES (200, 0x12345, 0x1, 1, 0x2);
+
+--error ER_DUP_ENTRY
+INSERT INTO t2 VALUES (300, 0x12345, 0x1, 1, 0x2);
+
+INSERT INTO t2 VALUES (100, 0x23456, 0x2, 1, 0x3);
+INSERT INTO t2 VALUES (100, 0x34567, 0x4, 1, 0x5);
+INSERT INTO t2 VALUES (400, 0x89ABC, 0x4, 1, 0x5);
+INSERT INTO t2 VALUES (500, 0x6789A, 0x5, 1, 0x7);
+
+DROP TABLE t2;
+
+#
+# Verify that both partitioned and non-partitioned table can share a CF.
+#
+CREATE TABLE t1 (
+ `a` int,
+ PRIMARY KEY (a) COMMENT "sharedcf"
+) ENGINE=ROCKSDB;
+
+SELECT DISTINCT(cf_name) FROM information_schema.rocksdb_cfstats WHERE cf_name='sharedcf';
+
+#
+# We expect this to succeed.
+#
+CREATE TABLE t2 (
+ `a` INT,
+ `b` DATE,
+ `c` VARCHAR(42),
+ PRIMARY KEY (`a`) COMMENT "custom_p0_cfname=sharedcf;custom_p2_cfname=notsharedcf"
+) ENGINE=ROCKSDB
+ PARTITION BY LIST(`a`) (
+ PARTITION custom_p0 VALUES IN (1, 4, 7),
+ PARTITION custom_p1 VALUES IN (2, 5, 8),
+ PARTITION custom_p2 VALUES IN (3, 6, 9)
+);
+
+SELECT DISTINCT(cf_name) FROM information_schema.rocksdb_cfstats WHERE cf_name='notsharedcf';
+
+DROP TABLE IF EXISTS t1;
+DROP TABLE IF EXISTS t2;
+
+
+# Test that truncating table actually removes rows.
+CREATE TABLE t1 (
+ a INT NOT NULL,
+ PRIMARY KEY (a) COMMENT 'p1_cfname=foo;'
+) ENGINE=ROCKSDB
+PARTITION BY LIST COLUMNS(a)
+(PARTITION p1 VALUES IN (1) ENGINE = ROCKSDB);
+
+INSERT INTO t1 values (1);
+TRUNCATE TABLE t1;
+SELECT * FROM t1;
+DROP TABLE t1;
+
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_cf_reverse-master.opt b/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_cf_reverse-master.opt
new file mode 100644
index 00000000000..ba9364e1523
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_cf_reverse-master.opt
@@ -0,0 +1 @@
+--rocksdb_debug_optimizer_n_rows=1000
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_cf_reverse.test b/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_cf_reverse.test
new file mode 100644
index 00000000000..8e30332bafe
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_cf_reverse.test
@@ -0,0 +1,71 @@
+--source include/have_rocksdb.inc
+
+#
+# RocksDB-SE tests for reverse-ordered Column Families
+#
+
+create table t0 (a int) engine=myisam;
+insert into t0 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
+
+create table t1 (
+ pk int primary key,
+ a int not null,
+ b int not null,
+ key(a) comment 'rev:foo',
+ key(b) comment 'bar'
+) engine=rocksdb;
+
+insert into t1 select a,a,a from t0;
+insert into t1 select a+10,a+10,a+10 from t0;
+
+--echo # Primary key is not in a reverse-ordered CF, so full table scan
+--echo # returns rows in ascending order:
+select * from t1;
+
+--replace_column 9 #
+explain
+select a from t1 order by a limit 5;
+select a from t1 order by a limit 5;
+
+--replace_column 9 #
+explain
+select b from t1 order by b limit 5;
+select a from t1 order by a limit 5;
+
+--replace_column 9 #
+explain
+select a from t1 order by a desc limit 5;
+select a from t1 order by a desc limit 5;
+
+--replace_column 9 #
+explain
+select b from t1 order by b desc limit 5;
+select b from t1 order by b desc limit 5;
+
+drop table t1;
+
+--echo #
+--echo # Try a primary key in a reverse-ordered CF.
+--echo #
+
+create table t2 (
+ pk int,
+ a int not null,
+ primary key(pk) comment 'rev:cf1'
+) engine=rocksdb;
+
+insert into t2 select a,a from t0;
+--echo # Primary key is in a reverse-ordered CF, so full table scan
+--echo # returns rows in descending order:
+select * from t2;
+
+set autocommit=0;
+begin;
+delete from t2 where a=3 or a=7;
+select * from t2;
+rollback;
+set autocommit=1;
+
+drop table t2;
+drop table t0;
+
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_checksums-master.opt b/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_checksums-master.opt
new file mode 100644
index 00000000000..320c48e5563
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_checksums-master.opt
@@ -0,0 +1,3 @@
+--rocksdb_debug_optimizer_n_rows=1000
+--rocksdb_records_in_range=50
+--log_warnings=3
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_checksums.test b/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_checksums.test
new file mode 100644
index 00000000000..42a4c83ff04
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_checksums.test
@@ -0,0 +1,152 @@
+--source include/have_rocksdb.inc
+
+#
+# Tests for row checksums feature
+#
+--source include/have_debug.inc
+
+--let LOG=$MYSQLTEST_VARDIR/tmp/rocksdb_checksum.err
+--let $_mysqld_option=--log-error=$LOG
+--replace_result $MYSQLTEST_VARDIR MYSQLTEST_VARDIR
+--source include/restart_mysqld_with_option.inc
+
+set @save_rocksdb_store_row_debug_checksums=@@global.rocksdb_store_row_debug_checksums;
+set @save_rocksdb_verify_row_debug_checksums=@@global.rocksdb_verify_row_debug_checksums;
+set @save_rocksdb_checksums_pct=@@global.rocksdb_checksums_pct;
+
+show variables like 'rocksdb_%checksum%';
+
+create table t1 (pk int primary key, a int, b int, key(a), key(b)) engine=rocksdb;
+insert into t1 values (1,1,1),(2,2,2),(3,3,3);
+check table t1;
+--let SEARCH_FILE=$LOG
+--let SEARCH_PATTERN=0 table records had checksums
+--let SEARCH_PATTERN=CHECKTABLE t1[^\n]*
+--let SEARCH_OUTPUT=matches
+--source include/search_pattern_in_file.inc
+
+drop table t1;
+
+set session rocksdb_store_row_debug_checksums=on;
+create table t2 (pk int primary key, a int, b int, key(a), key(b)) engine=rocksdb;
+insert into t2 values (1,1,1),(2,2,2),(3,3,3);
+check table t2;
+--let SEARCH_PATTERN=CHECKTABLE t2[^\n]*
+--source include/search_pattern_in_file.inc
+
+--echo # Now, make a table that has both rows with checksums and without
+create table t3 (pk int primary key, a int, b int, key(a), key(b)) engine=rocksdb;
+insert into t3 values (1,1,1),(2,2,2),(3,3,3);
+set session rocksdb_store_row_debug_checksums=off;
+update t3 set b=3 where a=2;
+set session rocksdb_store_row_debug_checksums=on;
+check table t3;
+--let SEARCH_PATTERN=CHECKTABLE t3[^\n]*
+--source include/search_pattern_in_file.inc
+
+set session rocksdb_store_row_debug_checksums=on;
+set session rocksdb_checksums_pct=5;
+create table t4 (pk int primary key, a int, b int, key(a), key(b)) engine=rocksdb;
+--disable_query_log
+let $i=0;
+let $x= 100000;
+while ($i<4000)
+{
+ inc $i;
+ eval insert t4(pk,a,b) values($i, $i, $i div 10);
+ eval update t4 set a= a+$x where a=$i;
+ eval update t4 set pk=pk+$x where pk=$i;
+}
+--enable_query_log
+check table t4;
+perl;
+$total=4000;
+$pct=5;
+@out=();
+
+$filename= "$ENV{LOG}";
+print $filename "\n";
+open(F, '<', $filename) || die("Can't open file $filename: $!");
+while(<F>) {
+ @out=() if /^CURRENT_TEST:/;
+ if (/(\d+) index entries checked \((\d+) had checksums/) {
+ if ($1 == $total and $2 >= $total*($pct-2)/100 and $2 <= $total*($pct+2)/100) {
+ push @out, sprintf "%d index entries had around %d checksums\n", $total, $total*$pct/100;
+ }
+ } elsif (/(\d+) table records had checksums/) {
+ if ($1 >= $total*($pct-2)/100 and $1 <= $total*($pct+2)/100) {
+ push @out, sprintf "Around %d table records had checksums\n", $total*$pct/100;
+ }
+ }
+}
+print @out;
+EOF
+set session rocksdb_checksums_pct=100;
+
+--echo #
+--echo # Ok, table t2 has all rows with checksums. Simulate a few checksum mismatches.
+--echo #
+insert into mtr.test_suppressions values
+ ('Checksum mismatch in key of key-value pair for index'),
+ ('Checksum mismatch in value of key-value pair for index'),
+ ('Data with incorrect checksum');
+
+--echo # 1. Start with mismatch in key checksum of the PK.
+set session debug_dbug= "+d,myrocks_simulate_bad_pk_checksum1";
+set session rocksdb_verify_row_debug_checksums=off;
+select * from t3;
+set session rocksdb_verify_row_debug_checksums=on;
+--error ER_INTERNAL_ERROR
+select * from t3;
+--error ER_INTERNAL_ERROR
+select * from t4;
+set session debug_dbug= "-d,myrocks_simulate_bad_pk_checksum1";
+
+--echo # 2. Continue with mismatch in pk value checksum.
+set session debug_dbug= "+d,myrocks_simulate_bad_pk_checksum2";
+set session rocksdb_verify_row_debug_checksums=off;
+select * from t3;
+set session rocksdb_verify_row_debug_checksums=on;
+--error ER_INTERNAL_ERROR
+select * from t3;
+--error ER_INTERNAL_ERROR
+select * from t4;
+set session debug_dbug= "-d,myrocks_simulate_bad_pk_checksum2";
+
+--echo # 3. Check if we catch checksum mismatches for secondary indexes
+--replace_column 9 #
+explain
+select * from t3 force index(a) where a<4;
+select * from t3 force index(a) where a<4;
+
+set session debug_dbug= "+d,myrocks_simulate_bad_key_checksum1";
+--error ER_INTERNAL_ERROR
+select * from t3 force index(a) where a<4;
+--error ER_INTERNAL_ERROR
+select * from t4 force index(a) where a<1000000;
+set session debug_dbug= "-d,myrocks_simulate_bad_key_checksum1";
+
+--echo # 4. The same for index-only reads?
+--disable_query_log
+set global rocksdb_force_flush_memtable_now=1;
+--enable_query_log
+--replace_column 9 #
+explain
+select a from t3 force index(a) where a<4;
+select a from t3 force index(a) where a<4;
+
+set session debug_dbug= "+d,myrocks_simulate_bad_key_checksum1";
+--error ER_INTERNAL_ERROR
+select a from t3 force index(a) where a<4;
+--error ER_INTERNAL_ERROR
+select a from t4 force index(a) where a<1000000;
+set session debug_dbug= "-d,myrocks_simulate_bad_key_checksum1";
+
+set @@global.rocksdb_store_row_debug_checksums=@save_rocksdb_store_row_debug_checksums;
+set @@global.rocksdb_verify_row_debug_checksums=@save_rocksdb_verify_row_debug_checksums;
+set @@global.rocksdb_checksums_pct=@save_rocksdb_checksums_pct;
+
+--source include/restart_mysqld.inc
+#--remove_file $LOG
+
+drop table t2,t3,t4;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_concurrent_delete.inc b/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_concurrent_delete.inc
new file mode 100644
index 00000000000..5336c77ee83
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_concurrent_delete.inc
@@ -0,0 +1,109 @@
+# MariaDB: including the below too many times causes really long argv list
+# in win_main()'s argument which blows up some limit on Windows.
+# Comment it out:
+#--source include/have_rocksdb.inc
+--source include/have_debug_sync.inc
+
+--source include/count_sessions.inc
+
+# Usage:
+#
+# let $order = ASC; # or DESC
+# let $comment = "rev:cf2"; # or ""
+# --source suite/rocksdb/t/rocksdb_concurrent_delete.inc
+
+let $first_row = -1; # Error this should never happen
+if ($order == 'ASC')
+{
+ let $first_row = 1;
+ let $middle_row = 3;
+ let $end_row = 5;
+}
+if ($order == 'DESC')
+{
+ let $first_row = 5;
+ let $middle_row = 3;
+ let $end_row = 1;
+}
+
+connect (con, localhost, root,,);
+connection default;
+eval SET SESSION TRANSACTION ISOLATION LEVEL $isolation_level;
+
+SET debug_sync='RESET';
+
+eval CREATE TABLE t1 (pk INT PRIMARY KEY COMMENT $comment, a INT);
+INSERT INTO t1 VALUES(1,1), (2,2), (3,3), (4,4), (5,5);
+
+# This will cause the SELECT to block after finding the first row, but
+# before locking and reading it.
+--echo --PK first row delete
+connection con;
+eval SET SESSION TRANSACTION ISOLATION LEVEL $isolation_level;
+SET debug_sync='rocksdb_concurrent_delete SIGNAL parked WAIT_FOR go';
+send_eval SELECT * FROM t1 order by t1.pk $order FOR UPDATE;
+
+# While that connection is waiting, delete the first row (the one con
+# is about to lock and read
+connection default;
+SET debug_sync='now WAIT_FOR parked';
+eval DELETE FROM t1 WHERE pk = $first_row;
+
+# Signal the waiting select to continue
+SET debug_sync='now SIGNAL go';
+
+# Now get the results from the select. The first entry (1,1) (or (3,3) when
+# using reverse ordering) should be missing. Prior to the fix the SELECT
+# would have returned: "1815: Internal error: NotFound:"
+connection con;
+reap;
+
+# Deleting a middle row
+--echo --PK middle row delete
+SET debug_sync='rocksdb_concurrent_delete SIGNAL parked WAIT_FOR go';
+send_eval SELECT * FROM t1 order by t1.pk $order FOR UPDATE;
+
+connection default;
+SET debug_sync='now WAIT_FOR parked';
+eval DELETE FROM t1 WHERE pk = $middle_row;
+SET debug_sync='now SIGNAL go';
+
+connection con;
+if ($isolation_level == "REPEATABLE READ")
+{
+ --error ER_LOCK_DEADLOCK
+ reap;
+}
+if ($isolation_level == "READ COMMITTED")
+{
+ reap;
+}
+
+# Deleting the end row
+--echo --PK end row delete
+SET debug_sync='rocksdb_concurrent_delete SIGNAL parked WAIT_FOR go';
+send_eval SELECT * FROM t1 order by t1.pk $order FOR UPDATE;
+
+connection default;
+SET debug_sync='now WAIT_FOR parked';
+eval DELETE FROM t1 WHERE pk = $end_row;
+SET debug_sync='now SIGNAL go';
+
+connection con;
+if ($isolation_level == "REPEATABLE READ")
+{
+ --error ER_LOCK_DEADLOCK
+ reap;
+}
+if ($isolation_level == "READ COMMITTED")
+{
+ reap;
+}
+
+
+# Cleanup
+connection default;
+disconnect con;
+set debug_sync='RESET';
+drop table t1;
+--source include/wait_until_count_sessions.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_concurrent_delete.test b/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_concurrent_delete.test
new file mode 100644
index 00000000000..47818bfdbe1
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_concurrent_delete.test
@@ -0,0 +1,38 @@
+# rocksdb_concurrent_delete test case covers snapshot conflicts, and
+# verifying locking reads not stopping scanning when hitting row not found.
+# The following code coverages are covered.
+#
+# 1. PK full scan (key=NULL)
+# first row, and other rows
+# ha_rnd_next -> rnd_next -> rnd_next_with_direction
+#
+# 2. PK range scan (key=PRIMARY)
+# first row
+# read_range_first -> index_read_map_impl -> read_row_from_primary_key
+# next row
+# index_next -> index_next_with_direction -> rnd_next_with_direction
+#
+# 3. SK full scan
+# first row
+# index_first -> index_first_intern -> index_next_with_direction -> secondary_index_read
+# next row
+# index_next -> index_next_with_direction -> secondary_index_read
+#
+# 4. SK range scan
+# first row
+# read_range_first -> index_read_map_impl -> read_row_from_secondary_key
+# next row
+# index_next -> index_next_with_direction -> secondary_index_read
+#
+# In all cases, RR gets snapshot conflict errors if non-first rows get
+# deleted by another transaction after scanning.
+
+--source include/have_rocksdb.inc
+--source include/have_debug_sync.inc
+
+let $isolation_level = REPEATABLE READ;
+--source rocksdb_concurrent_delete_main.inc
+
+let $isolation_level = READ COMMITTED;
+--source rocksdb_concurrent_delete_main.inc
+
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_concurrent_delete_main.inc b/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_concurrent_delete_main.inc
new file mode 100644
index 00000000000..bcd86af96aa
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_concurrent_delete_main.inc
@@ -0,0 +1,30 @@
+--source include/have_rocksdb.inc
+--source include/have_debug_sync.inc
+
+# This validates the fix for Issue #144. The problem was that with more
+# than one client accessing/deleting the same row there was a possibility
+# of client A finding a row (through Next() or Prev()) but the row being
+# deleted before the GetForUpdate() call could occur. When this happened
+# a nearly useless error was being returned.
+
+let $order=ASC;
+let $comment="";
+--source rocksdb_concurrent_delete.inc
+--source rocksdb_concurrent_delete_sk.inc
+
+let $order=DESC;
+let $comment="";
+--source rocksdb_concurrent_delete.inc
+
+let $order=ASC;
+let $comment="rev:cf2";
+--source rocksdb_concurrent_delete.inc
+
+let $order=DESC;
+let $comment="rev:cf2";
+--source rocksdb_concurrent_delete.inc
+
+let $index=PRIMARY;
+--source rocksdb_concurrent_delete_range.inc
+let $index=sk;
+--source rocksdb_concurrent_delete_range.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_concurrent_delete_range.inc b/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_concurrent_delete_range.inc
new file mode 100644
index 00000000000..a85527141f6
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_concurrent_delete_range.inc
@@ -0,0 +1,85 @@
+--source include/have_rocksdb.inc
+--source include/have_debug_sync.inc
+
+--source include/count_sessions.inc
+
+# This is a test case to reproduce https://github.com/facebook/mysql-5.6/issues/162
+# Expected output of the last select for update was (1,2,100) and (1,3,100), but
+# currently it returns (1,2,1) and (1,3,1), which must be fixed.
+
+connect (con, localhost, root,,);
+connection default;
+
+set debug_sync='RESET';
+eval SET SESSION TRANSACTION ISOLATION LEVEL $isolation_level;
+create table t1 (id1 int, id2 int, value int, primary key (id1, id2), index sk (id1, value)) engine=rocksdb;
+insert into t1 values (1, 1, 1),(1, 2, 1),(1, 3, 1),(1, 4, 1),(1, 5, 1),(2, 2, 2);
+
+# deleting a first row
+--echo --First row delete with $index
+connection con;
+eval SET SESSION TRANSACTION ISOLATION LEVEL $isolation_level;
+set debug_sync='rocksdb.get_row_by_rowid SIGNAL parked WAIT_FOR go';
+send_eval update t1 force index ($index) set value=100 where id1=1;
+
+connection default;
+set debug_sync='now WAIT_FOR parked';
+delete from t1 where id1=1 and id2=1;
+set debug_sync='now SIGNAL go';
+
+connection con;
+reap;
+select * from t1 where id1=1;
+
+# deleting a middle row
+--echo --Middle row delete with $index
+eval SET SESSION TRANSACTION ISOLATION LEVEL $isolation_level;
+set debug_sync='rocksdb.get_row_by_rowid SIGNAL parked WAIT_FOR go';
+send_eval update t1 force index ($index) set value=200 where id1=1;
+
+connection default;
+set debug_sync='now WAIT_FOR parked';
+delete from t1 where id1=1 and id2=3;
+set debug_sync='now SIGNAL go';
+
+connection con;
+if ($isolation_level == "REPEATABLE READ")
+{
+ --error ER_LOCK_DEADLOCK
+ reap;
+}
+if ($isolation_level == "READ COMMITTED")
+{
+ reap;
+}
+select * from t1 where id1=1;
+
+# deleting the end row
+--echo --End row delete with $index
+eval SET SESSION TRANSACTION ISOLATION LEVEL $isolation_level;
+set debug_sync='rocksdb.get_row_by_rowid SIGNAL parked WAIT_FOR go';
+send_eval update t1 force index ($index) set value=300 where id1=1;
+
+connection default;
+set debug_sync='now WAIT_FOR parked';
+delete from t1 where id1=1 and id2=5;
+set debug_sync='now SIGNAL go';
+
+connection con;
+if ($isolation_level == "REPEATABLE READ")
+{
+ --error ER_LOCK_DEADLOCK
+ reap;
+}
+if ($isolation_level == "READ COMMITTED")
+{
+ reap;
+}
+select * from t1 where id1=1;
+
+# Cleanup
+connection default;
+disconnect con;
+set debug_sync='RESET';
+drop table t1;
+--source include/wait_until_count_sessions.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_concurrent_delete_sk.inc b/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_concurrent_delete_sk.inc
new file mode 100644
index 00000000000..ac0b5d76854
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_concurrent_delete_sk.inc
@@ -0,0 +1,82 @@
+--source include/have_rocksdb.inc
+--source include/have_debug_sync.inc
+
+--source include/count_sessions.inc
+
+connect (con, localhost, root,,);
+connection default;
+eval SET SESSION TRANSACTION ISOLATION LEVEL $isolation_level;
+
+SET debug_sync='RESET';
+
+eval CREATE TABLE t1 (pk INT PRIMARY KEY, a INT, index a(a));
+INSERT INTO t1 VALUES(1,1), (2,2), (3,3), (4,4), (5,5);
+
+# This will cause the SELECT to block after finding the first row, but
+# before locking and reading it.
+--echo --SK first row delete
+connection con;
+eval SET SESSION TRANSACTION ISOLATION LEVEL $isolation_level;
+SET debug_sync='rocksdb_concurrent_delete_sk SIGNAL parked WAIT_FOR go';
+send_eval SELECT a FROM t1 FORCE INDEX(a) FOR UPDATE;
+
+# While that connection is waiting, delete the first row (the one con
+# is about to lock and read
+connection default;
+SET debug_sync='now WAIT_FOR parked';
+eval DELETE FROM t1 WHERE pk = 1;
+
+# Signal the waiting select to continue
+SET debug_sync='now SIGNAL go';
+
+connection con;
+reap;
+
+# Deleting a middle row
+--echo --SK middle row delete
+SET debug_sync='rocksdb_concurrent_delete_sk SIGNAL parked WAIT_FOR go';
+send_eval SELECT a FROM t1 FORCE INDEX(a) FOR UPDATE;
+
+connection default;
+SET debug_sync='now WAIT_FOR parked';
+eval DELETE FROM t1 WHERE pk = 3;
+SET debug_sync='now SIGNAL go';
+
+connection con;
+if ($isolation_level == "REPEATABLE READ")
+{
+ --error ER_LOCK_DEADLOCK
+ reap;
+}
+if ($isolation_level == "READ COMMITTED")
+{
+ reap;
+}
+
+# Deleting the end row
+--echo --SK end row delete
+SET debug_sync='rocksdb_concurrent_delete_sk SIGNAL parked WAIT_FOR go';
+send_eval SELECT a FROM t1 FORCE INDEX(a) FOR UPDATE;
+
+connection default;
+SET debug_sync='now WAIT_FOR parked';
+eval DELETE FROM t1 WHERE pk = 5;
+SET debug_sync='now SIGNAL go';
+
+connection con;
+if ($isolation_level == "REPEATABLE READ")
+{
+ --error ER_LOCK_DEADLOCK
+ reap;
+}
+if ($isolation_level == "READ COMMITTED")
+{
+ reap;
+}
+
+# Cleanup
+connection default;
+disconnect con;
+set debug_sync='RESET';
+drop table t1;
+--source include/wait_until_count_sessions.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_concurrent_insert.py b/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_concurrent_insert.py
new file mode 100644
index 00000000000..37b118d525a
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_concurrent_insert.py
@@ -0,0 +1,95 @@
+"""
+This script tests concurrent inserts on a given table.
+Example Usage (in Mysql Test Framework):
+
+ CREATE TABLE t1 (a INT) ENGINE=rocksdb;
+
+ let $exec = python suite/rocksdb/t/rocksdb_concurrent_insert.py \
+ root 127.0.0.1 $MASTER_MYPORT test t1 100 4;
+ exec $exec;
+
+"""
+import cStringIO
+import hashlib
+import MySQLdb
+import os
+import random
+import signal
+import sys
+import threading
+import time
+import string
+
+def get_insert(table_name, idx):
+ return """INSERT INTO %s (a) VALUES (%d)""" % (table_name, idx)
+
+class Inserter(threading.Thread):
+ Instance = None
+ def __init__(self, con, table_name, num_inserts):
+ threading.Thread.__init__(self)
+ self.finished = False
+ self.num_inserts = num_inserts
+ con.autocommit(False)
+ self.con = con
+ self.rand = random.Random()
+ self.exception = None
+ self.table_name = table_name
+ Inserter.Instance = self
+ self.start()
+ def run(self):
+ try:
+ self.runme()
+ except Exception, e:
+ self.exception = traceback.format_exc()
+ print "caught (%s)" % e
+ finally:
+ self.finish()
+ def runme(self):
+ cur = self.con.cursor()
+ for i in xrange(self.num_inserts):
+ try:
+ cur.execute(get_insert(self.table_name, i))
+ r = self.rand.randint(1,10)
+ if r < 4:
+ self.con.commit()
+ except:
+ cur = self.con.cursor()
+ try:
+ self.con.commit()
+ except Exception, e:
+ self.exception = traceback.format_exc()
+ print "caught (%s)" % e
+ pass
+ def finish(self):
+ self.finished = True
+
+if __name__ == '__main__':
+ if len(sys.argv) != 8:
+ print "Usage: rocksdb_concurrent_insert.py user host port db_name " \
+ "table_name num_inserts num_threads"
+ sys.exit(1)
+
+ user = sys.argv[1]
+ host = sys.argv[2]
+ port = int(sys.argv[3])
+ db = sys.argv[4]
+ table_name = sys.argv[5]
+ num_inserts = int(sys.argv[6])
+ num_workers = int(sys.argv[7])
+
+ worker_failed = False
+ workers = []
+ for i in xrange(num_workers):
+ inserter = Inserter(
+ MySQLdb.connect(user=user, host=host, port=port, db=db), table_name,
+ num_inserts)
+ workers.append(inserter)
+
+ for w in workers:
+ w.join()
+ if w.exception:
+ print "Worker hit an exception:\n%s\n" % w.exception
+ worker_failed = True
+
+ if worker_failed:
+ sys.exit(1)
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_datadir.test b/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_datadir.test
new file mode 100644
index 00000000000..4399dd1a401
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_datadir.test
@@ -0,0 +1,33 @@
+--source include/have_rocksdb.inc
+let $ddir = $MYSQL_TMP_DIR/.rocksdb_datadir.test.install.db;
+let $rdb_ddir = $MYSQL_TMP_DIR/.rocksdb_datadir.test;
+let $sql_file = $MYSQL_TMP_DIR/rocksdb_datadir.sql;
+
+--write_file $sql_file
+CREATE DATABASE mysqltest;
+USE mysqltest;
+CREATE TABLE t1 (a INT PRIMARY KEY);
+INSERT INTO t1 VALUES(42);
+SET GLOBAL rocksdb_force_flush_memtable_now = 1;
+SELECT sleep(1);
+DROP TABLE t1;
+DROP DATABASE mysqltest;
+EOF
+
+# Must ensure this directory exists before launching mysqld
+mkdir $ddir;
+
+let $plugin_dir=`select @@plugin_dir`;
+# Launch mysqld with non-standard rocksdb_datadir
+exec $MYSQLD_BOOTSTRAP_CMD --plugin-dir=$plugin_dir --plugin-load=$HA_ROCKSDB_SO --datadir=$ddir --rocksdb_datadir=$rdb_ddir --default-storage-engine=rocksdb --skip-innodb --default-tmp-storage-engine=MyISAM --rocksdb < $sql_file;
+
+--echo Check for MANIFEST files
+--list_files $rdb_ddir MANIFEST-0000*
+
+# Clean up
+remove_files_wildcard $ddir *;
+remove_files_wildcard $ddir *;
+remove_files_wildcard $rdb_ddir *;
+rmdir $ddir;
+rmdir $rdb_ddir;
+remove_file $sql_file;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_deadlock_detect.inc b/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_deadlock_detect.inc
new file mode 100644
index 00000000000..082c61308f3
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_deadlock_detect.inc
@@ -0,0 +1,123 @@
+#
+# Some basic sanity tests for deadlock detection.
+#
+--source include/have_rocksdb.inc
+
+set @prior_rocksdb_lock_wait_timeout = @@rocksdb_lock_wait_timeout;
+set @prior_rocksdb_deadlock_detect = @@rocksdb_deadlock_detect;
+set global rocksdb_lock_wait_timeout = 100000;
+set global rocksdb_deadlock_detect = ON;
+
+create table t (i int primary key);
+create table r1 (id int primary key, value int);
+insert into r1 values (1,1),(2,2),(3,3),(4,4),(5,5),(6,6),(7,7),(8,8),(9,9),(10,10);
+create table r2 like r1;
+insert into r2 select * from r1;
+
+# deadlock on scanned locking reads
+connect (con1,localhost,root,,);
+let $con1= `SELECT CONNECTION_ID()`;
+begin;
+update r2 set value=100 where id=9;
+
+connect (con2,localhost,root,,);
+let $con2= `SELECT CONNECTION_ID()`;
+begin;
+update r1 set value=100 where id=8;
+--send select * from r2 for update;
+
+connection con1;
+let $wait_condition =
+`SELECT CONCAT('select count(*) = 1 from information_schema.rocksdb_trx where THREAD_ID = ', '$con2', ' and WAITING_KEY != ""')`;
+--source include/wait_condition.inc
+--error ER_LOCK_DEADLOCK
+select * from r1 for update;
+rollback;
+
+connection con2;
+--reap;
+rollback;
+
+connection con1;
+begin;
+insert into t values (1);
+
+connection con2;
+begin;
+insert into t values (2);
+
+connect (con3,localhost,root,,);
+begin;
+insert into t values (3);
+
+connection con1;
+--send select * from t where i = 2 for update
+
+connection con2;
+let $wait_condition =
+`SELECT CONCAT('select count(*) = 1 from information_schema.rocksdb_trx where THREAD_ID = ', '$con1', ' and WAITING_KEY != ""')`;
+--source include/wait_condition.inc
+
+--send select * from t where i = 3 for update
+
+connection con3;
+let $wait_condition =
+`SELECT CONCAT('select count(*) = 1 from information_schema.rocksdb_trx where THREAD_ID = ', '$con2', ' and WAITING_KEY != ""')`;
+--source include/wait_condition.inc
+
+select * from t;
+--error ER_LOCK_DEADLOCK
+insert into t values (4), (1);
+--echo # Transaction should be rolled back
+select * from t;
+rollback;
+
+connection con2;
+--reap
+rollback;
+
+connection con1;
+--reap
+rollback;
+
+
+connection default;
+create table t1 (id int primary key, value int, value2 int, index(value)) engine=rocksdb;
+insert into t1 values (1,1,1),(2,2,2),(3,3,3),(4,4,4),(5,5,5),(6,6,6),(7,7,7),(8,8,8),(9,9,9),(10,10,10);
+
+connection con1;
+begin;
+update t1 force index (value) set value2=value2+1 where value=3;
+
+connection con2;
+begin;
+update t1 force index (value) set value2=value2+1 where value=2;
+update t1 force index (value) set value2=value2+1 where value=4;
+
+connection con1;
+send update t1 force index (value) set value2=value2+1 where value=4;
+
+connection con2;
+let $wait_condition =
+`SELECT CONCAT('select count(*) = 1 from information_schema.rocksdb_trx where THREAD_ID = ', '$con1', ' and WAITING_KEY != ""')`;
+--source include/wait_condition.inc
+--error ER_LOCK_DEADLOCK
+update t1 force index (value) set value2=value2+1 where value=3;
+
+connection con1;
+--reap
+rollback;
+
+connection con2;
+rollback;
+drop table t1;
+
+
+connection default;
+disconnect con1;
+disconnect con2;
+disconnect con3;
+
+set global rocksdb_lock_wait_timeout = @prior_rocksdb_lock_wait_timeout;
+set global rocksdb_deadlock_detect = @prior_rocksdb_deadlock_detect;
+drop table t,r1,r2;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_deadlock_detect_rc-master.opt b/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_deadlock_detect_rc-master.opt
new file mode 100644
index 00000000000..25b80282211
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_deadlock_detect_rc-master.opt
@@ -0,0 +1 @@
+--transaction-isolation=read-committed
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_deadlock_detect_rc.test b/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_deadlock_detect_rc.test
new file mode 100644
index 00000000000..9757285fe8b
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_deadlock_detect_rc.test
@@ -0,0 +1 @@
+--source t/rocksdb_deadlock_detect.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_deadlock_detect_rr.test b/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_deadlock_detect_rr.test
new file mode 100644
index 00000000000..9757285fe8b
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_deadlock_detect_rr.test
@@ -0,0 +1 @@
+--source t/rocksdb_deadlock_detect.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_deadlock_stress.inc b/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_deadlock_stress.inc
new file mode 100644
index 00000000000..c88c7ebd20a
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_deadlock_stress.inc
@@ -0,0 +1,18 @@
+#
+# Stress tests deadlock detection
+#
+
+--source include/have_rocksdb.inc
+
+create table t1 (a int primary key, b int) engine=rocksdb;
+
+set @prior_rocksdb_lock_wait_timeout = @@rocksdb_lock_wait_timeout;
+set @prior_rocksdb_deadlock_detect = @@rocksdb_deadlock_detect;
+set global rocksdb_lock_wait_timeout = 100000;
+set global rocksdb_deadlock_detect = ON;
+
+exec python ../storage/rocksdb/mysql-test/rocksdb/t/rocksdb_deadlock_stress.py root 127.0.0.1 $MASTER_MYPORT test t1 10000 10;
+
+set global rocksdb_lock_wait_timeout = @prior_rocksdb_lock_wait_timeout;
+set global rocksdb_deadlock_detect = @prior_rocksdb_deadlock_detect;
+drop table t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_deadlock_stress.py b/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_deadlock_stress.py
new file mode 100644
index 00000000000..3bc8a3be010
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_deadlock_stress.py
@@ -0,0 +1,94 @@
+"""
+This script stress tests deadlock detection.
+
+Usage: rocksdb_deadlock_stress.py user host port db_name table_name
+ num_iters num_threads
+"""
+import cStringIO
+import hashlib
+import MySQLdb
+from MySQLdb.constants import ER
+import os
+import random
+import signal
+import sys
+import threading
+import time
+import string
+import traceback
+
+def is_deadlock_error(exc):
+ error_code = exc.args[0]
+ return (error_code == MySQLdb.constants.ER.LOCK_DEADLOCK)
+
+def get_query(table_name, idx):
+ # Let's assume that even indexes will always be acquireable, to make
+ # deadlock detection more interesting.
+ if idx % 2 == 0:
+ return """SELECT * from %s WHERE a = %d LOCK IN SHARE MODE""" % (table_name, idx)
+ else:
+ r = random.randint(1, 3);
+ if r == 1:
+ return """SELECT * from %s WHERE a = %d FOR UPDATE""" % (table_name, idx)
+ elif r == 2:
+ return """INSERT INTO %s VALUES (%d, 1)
+ ON DUPLICATE KEY UPDATE b=b+1""" % (table_name, idx)
+ else:
+ return """DELETE from %s WHERE a = %d""" % (table_name, idx)
+
+class Worker(threading.Thread):
+ def __init__(self, con, table_name, num_iters):
+ threading.Thread.__init__(self)
+ self.con = con
+ self.table_name = table_name
+ self.num_iters = num_iters
+ self.exception = None
+ self.start()
+ def run(self):
+ try:
+ self.runme()
+ except Exception, e:
+ self.exception = traceback.format_exc()
+ def runme(self):
+ cur = self.con.cursor()
+ for x in xrange(self.num_iters):
+ try:
+ for i in random.sample(xrange(100), 10):
+ cur.execute(get_query(self.table_name, i))
+ self.con.commit()
+ except MySQLdb.OperationalError, e:
+ self.con.rollback()
+ cur = self.con.cursor()
+ if not is_deadlock_error(e):
+ raise e
+
+if __name__ == '__main__':
+ if len(sys.argv) != 8:
+ print "Usage: rocksdb_deadlock_stress.py user host port db_name " \
+ "table_name num_iters num_threads"
+ sys.exit(1)
+
+ user = sys.argv[1]
+ host = sys.argv[2]
+ port = int(sys.argv[3])
+ db = sys.argv[4]
+ table_name = sys.argv[5]
+ num_iters = int(sys.argv[6])
+ num_workers = int(sys.argv[7])
+
+ worker_failed = False
+ workers = []
+ for i in xrange(num_workers):
+ w = Worker(
+ MySQLdb.connect(user=user, host=host, port=port, db=db), table_name,
+ num_iters)
+ workers.append(w)
+
+ for w in workers:
+ w.join()
+ if w.exception:
+ print "Worker hit an exception:\n%s\n" % w.exception
+ worker_failed = True
+
+ if worker_failed:
+ sys.exit(1)
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_deadlock_stress_rc-master.opt b/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_deadlock_stress_rc-master.opt
new file mode 100644
index 00000000000..25b80282211
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_deadlock_stress_rc-master.opt
@@ -0,0 +1 @@
+--transaction-isolation=read-committed
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_deadlock_stress_rc.test b/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_deadlock_stress_rc.test
new file mode 100644
index 00000000000..67e306b8744
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_deadlock_stress_rc.test
@@ -0,0 +1 @@
+--source t/rocksdb_deadlock_stress.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_deadlock_stress_rr.test b/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_deadlock_stress_rr.test
new file mode 100644
index 00000000000..67e306b8744
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_deadlock_stress_rr.test
@@ -0,0 +1 @@
+--source t/rocksdb_deadlock_stress.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_debug.test b/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_debug.test
new file mode 100644
index 00000000000..7cd4e09e946
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_debug.test
@@ -0,0 +1,14 @@
+--source include/have_rocksdb.inc
+--source include/have_debug.inc
+
+--echo #
+--echo # Issue #728: Assertion `covers_key(b)' failed in int
+--echo # myrocks::Rdb_key_def::cmp_full_keys(const rocks db::Slice&,
+--echo # const rocksdb::Slice&)
+--echo #
+
+CREATE TABLE t2(c1 TINYINT SIGNED KEY,c2 TINYINT UNSIGNED,c3 INT);
+INSERT INTO t2(c1)VALUES(0);
+SELECT * FROM t2 WHERE c1<=127 ORDER BY c1 DESC;
+DROP TABLE t2;
+
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_icp-master.opt b/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_icp-master.opt
new file mode 100644
index 00000000000..acc0bdaa378
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_icp-master.opt
@@ -0,0 +1 @@
+--rocksdb_debug_optimizer_n_rows=20000 --rocksdb_records_in_range=1000 --rocksdb_perf_context_level=2 --userstat=ON
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_icp.test b/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_icp.test
new file mode 100644
index 00000000000..8d0ec89e85a
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_icp.test
@@ -0,0 +1,44 @@
+--source include/have_rocksdb.inc
+
+
+let $cf_name=cf1;
+
+--source include/rocksdb_icp.inc
+
+--echo #
+--echo # Issue #67: Inefficient index condition pushdown
+--echo #
+create table t0 (a int) engine=myisam;
+insert into t0 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
+
+create table t1 (
+ pk int not null primary key,
+ key1 bigint(20) unsigned,
+ col1 int,
+ key (key1)
+) engine=rocksdb;
+
+insert into t1
+select
+ A.a+10*B.a+100*C.a,
+ A.a+10*B.a+100*C.a,
+ 1234
+from t0 A, t0 B, t0 C;
+
+set @count=0;
+let $save_query=
+set @count_diff =(select (value - @count) from information_schema.rocksdb_perf_context
+ where table_schema=database() and table_name='t1' and stat_type='INTERNAL_KEY_SKIPPED_COUNT');
+
+--replace_column 9 #
+explain
+select * from t1 force index(key1) where key1=1;
+
+eval $save_query;
+select * from t1 force index(key1) where key1=1;
+eval $save_query;
+--echo # The following must be =1, or in any case not 999:
+select @count_diff as "INTERNAL_KEY_SKIPPED_COUNT increment";
+
+drop table t0,t1;
+
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_icp_rev-master.opt b/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_icp_rev-master.opt
new file mode 100644
index 00000000000..fe129d79d63
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_icp_rev-master.opt
@@ -0,0 +1 @@
+--rocksdb_debug_optimizer_n_rows=20000 --rocksdb_records_in_range=1000 --userstat=ON
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_icp_rev.test b/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_icp_rev.test
new file mode 100644
index 00000000000..33914a4eac6
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_icp_rev.test
@@ -0,0 +1,7 @@
+--source include/have_rocksdb.inc
+
+
+let $cf_name=rev:cf1;
+
+--source include/rocksdb_icp.inc
+
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_locks-master.opt b/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_locks-master.opt
new file mode 100644
index 00000000000..c9d9edb8565
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_locks-master.opt
@@ -0,0 +1 @@
+--rocksdb_print_snapshot_conflict_queries=1
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_locks.test b/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_locks.test
new file mode 100644
index 00000000000..ff092773737
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_locks.test
@@ -0,0 +1,94 @@
+--source include/have_rocksdb.inc
+
+#
+# MyRocks-specific tests for locking
+#
+--source include/have_debug.inc
+
+--enable_connect_log
+create table t1 (pk int not null primary key) engine=rocksdb;
+
+insert into t1 values (1),(2),(3);
+
+set autocommit=0;
+begin;
+select * from t1 where pk=1 for update;
+
+--connect (con1,localhost,root,,)
+--connection con1
+call mtr.add_suppression("Got snapshot conflict errors");
+--echo ### Connection con1
+let $ID= `select connection_id()`;
+set @@rocksdb_lock_wait_timeout=500;
+set autocommit=0;
+begin;
+--send select * from t1 where pk=1 for update;
+
+--connection default
+--echo ### Connection default
+
+let $wait_condition=
+ select 1 from INFORMATION_SCHEMA.PROCESSLIST
+ where (ID = $ID /* or SRV_ID = $ID */) and STATE = "Waiting for row lock";
+--source include/wait_condition.inc
+## Waiting for row lock
+## select connection_id();
+## select state='Waiting for row lock' from information_schema.processlist where id=2;
+
+rollback;
+
+connection con1;
+reap;
+rollback;
+connection default;
+
+##
+## Now, repeat the same test but let the wait time out.
+##
+begin;
+select * from t1 where pk=1 for update;
+
+--connection con1
+--echo ### Connection con1
+set @@rocksdb_lock_wait_timeout=2;
+set autocommit=0;
+begin;
+--error ER_LOCK_WAIT_TIMEOUT
+select * from t1 where pk=1 for update;
+
+--connection default
+
+rollback;
+set autocommit=1;
+
+--connection con1
+drop table t1;
+--connection default
+
+--echo #
+--echo # Now, test what happens if another transaction modified the record and committed
+--echo #
+
+CREATE TABLE t1 (
+ id int primary key,
+ value int
+) engine=rocksdb collate latin1_bin;
+insert into t1 values (1,1),(2,2),(3,3),(4,4),(5,5),(6,6),(7,7),(8,8),(9,9),(10,10);
+
+--connection con1
+BEGIN;
+SELECT * FROM t1 WHERE id=3;
+
+--connection default
+BEGIN;
+UPDATE t1 SET value=30 WHERE id=3;
+COMMIT;
+
+--connection con1
+--error ER_LOCK_DEADLOCK
+SELECT * FROM t1 WHERE id=3 FOR UPDATE;
+
+ROLLBACK;
+--disconnect con1
+--connection default
+drop table t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_parts-master.opt b/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_parts-master.opt
new file mode 100644
index 00000000000..ba9364e1523
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_parts-master.opt
@@ -0,0 +1 @@
+--rocksdb_debug_optimizer_n_rows=1000
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_parts.test b/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_parts.test
new file mode 100644
index 00000000000..b8d41ae942a
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_parts.test
@@ -0,0 +1,160 @@
+--source include/have_rocksdb.inc
+
+--source include/have_partition.inc
+
+--disable_warnings
+drop table if exists t1,t2;
+--enable_warnings
+
+--echo # Tests for MyRocks + partitioning
+
+--echo #
+--echo # MyRocks Issue #70: Server crashes in Rdb_key_def::get_primary_key_tuple
+--echo #
+CREATE TABLE t1 (pk INT PRIMARY KEY, f1 INT, f2 INT, KEY(f2)) ENGINE=RocksDB
+PARTITION BY HASH(pk) PARTITIONS 2;
+INSERT INTO t1 VALUES (1, 6, NULL), (2, NULL, 1);
+
+CREATE TABLE t2 (pk INT PRIMARY KEY, f1 INT) ENGINE=RocksDB;
+INSERT INTO t2 VALUES (1, 1), (2, 1);
+
+SELECT f1 FROM t1 WHERE f2 = ( SELECT f1 FROM t2 WHERE pk = 2 );
+
+drop table t1,t2;
+
+--echo #
+--echo # Issue#105: key_info[secondary_key].actual_key_parts does not include primary key on partitioned tables
+--echo #
+CREATE TABLE t1 (
+ id INT PRIMARY KEY,
+ a set ('a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','x','y','z') CHARACTER SET utf8,
+ b set ('a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','x','y','z') CHARACTER SET utf8 default null,
+ c set ('a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','x','y','z') CHARACTER SET utf8 not null,
+ INDEX (a),
+ INDEX (b),
+ INDEX (c)
+) ENGINE=RocksDB PARTITION BY key (id) partitions 2;
+
+INSERT INTO t1 (id, b) VALUES (28, 3);
+UPDATE t1 SET id=8 WHERE c < 8 LIMIT 1;
+check table t1;
+drop table t1;
+
+--echo #
+--echo # Issue #105, another testcase
+--echo #
+create table t1 (
+ pk int primary key,
+ col1 int,
+ col2 int,
+ key (col1) comment 'rev:cf_issue105'
+) engine=rocksdb partition by hash(pk) partitions 2;
+
+insert into t1 values (1,10,10);
+insert into t1 values (2,10,10);
+
+insert into t1 values (11,20,20);
+insert into t1 values (12,20,20);
+
+set @tmp_rfirr= @@rocksdb_force_index_records_in_range;
+set rocksdb_force_index_records_in_range= 12;
+
+--replace_column 9 #
+explain select * from t1 force index(col1) where col1=10;
+select * from t1 force index(col1) where col1=10;
+select * from t1 use index () where col1=10;
+
+set rocksdb_force_index_records_in_range= @tmp_rfirr;
+
+drop table t1;
+
+--echo #
+--echo # Issue #108: Index-only scans do not work for partitioned tables and extended keys
+--echo #
+create table t1 (
+ pk int primary key,
+ col1 int,
+ col2 int,
+ key (col1)
+) engine=rocksdb partition by hash(pk) partitions 2;
+
+insert into t1 values (1,10,10);
+insert into t1 values (2,10,10);
+
+insert into t1 values (11,20,20);
+insert into t1 values (12,20,20);
+--echo # The following must use "Using index"
+explain select pk from t1 force index(col1) where col1=10;
+
+drop table t1;
+
+--echo #
+--echo # Issue #214: subqueries cause crash
+--echo #
+create TABLE t1(a int,b int,c int,primary key(a,b))
+ partition by list (b*a) (partition x1 values in (1) tablespace ts1,
+ partition x2 values in (3,11,5,7) tablespace ts2,
+ partition x3 values in (16,8,5+19,70-43) tablespace ts3);
+create table t2(b binary(2));
+set session optimizer_switch='materialization=off';
+insert into t1(a,b) values(1,7);
+select a from t1 where a in (select a from t1 where a in (select b from t2));
+
+drop table t1, t2;
+
+--echo #
+--echo # Issue #260: altering name to invalid value leaves table unaccessible
+--echo #
+CREATE TABLE t1 (c1 INT NOT NULL, c2 CHAR(5)) PARTITION BY HASH(c1) PARTITIONS 4;
+INSERT INTO t1 VALUES(1,'a');
+--replace_result \\ /
+--error ER_ERROR_ON_RENAME
+RENAME TABLE t1 TO db3.t3;
+SELECT * FROM t1;
+SHOW TABLES;
+# try it again to the same database
+RENAME TABLE t1 TO test.t3;
+SELECT * FROM t3;
+SHOW TABLES;
+# now try it again but with another existing database
+CREATE DATABASE db3;
+USE test;
+RENAME TABLE t3 to db3.t2;
+USE db3;
+SELECT * FROM t2;
+SHOW TABLES;
+# cleanup
+DROP TABLE t2;
+use test;
+DROP DATABASE db3;
+
+--echo #
+--echo # MDEV-13153 Assertion `global_status_var.global_memory_used == 0 '
+--echo # failed upon server restart with partitioned RocksDB table
+--echo #
+CREATE TABLE t1 (a INT) ENGINE=RocksDB PARTITION BY HASH(a) PARTITIONS 2;
+INSERT INTO t1 (a) VALUES (1),(2);
+ALTER TABLE t1 ADD PARTITION PARTITIONS 2;
+--source include/restart_mysqld.inc
+SELECT 1;
+DROP TABLE t1;
+
+--echo #
+--echo # MDEV-15245: Assertion `false' failed in myrocks::ha_rocksdb::position
+--echo #
+CREATE TABLE t1 (a INT) ENGINE=RocksDB;
+INSERT INTO t1 VALUES (1),(2);
+# partitioning
+CREATE TABLE t2 (b INT) ENGINE=RocksDB
+PARTITION BY RANGE(b)
+(PARTITION p0 VALUES LESS THAN (1),
+ PARTITION p1 VALUES LESS THAN MAXVALUE);
+
+
+INSERT INTO t2 VALUES (NULL),(0);
+CREATE ALGORITHM = MERGE VIEW v1 AS SELECT t2.* FROM t1, t2;
+UPDATE v1 SET b = 5;
+
+drop view v1;
+drop table t1,t2;
+
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_qcache-master.opt b/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_qcache-master.opt
new file mode 100644
index 00000000000..a00258bc48c
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_qcache-master.opt
@@ -0,0 +1 @@
+--query_cache_type=1
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_qcache.test b/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_qcache.test
new file mode 100644
index 00000000000..0369e758f5d
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_qcache.test
@@ -0,0 +1,43 @@
+--source include/have_rocksdb.inc
+
+# Important:
+# The test needs to be run with --mysqld=--query-cache-type=1
+
+--source include/have_query_cache.inc
+set @save_query_cache_type=@@global.query_cache_type;
+set @save_query_cache_size=@@global.query_cache_size;
+set @@global.query_cache_type=1;
+set @@global.query_cache_size=1024*1024;
+
+--enable_connect_log
+
+create table t1 (pk int primary key, c char(8)) engine=RocksDB;
+insert into t1 values (1,'new'),(2,'new');
+
+select * from t1;
+
+--connect (con1,localhost,root,,)
+
+update t1 set c = 'updated';
+#select * from t1;
+
+--connection default
+flush status;
+show status like 'Qcache_hits';
+show global status like 'Qcache_hits';
+select * from t1;
+select sql_no_cache * from t1;
+select * from t1 where pk = 1;
+show status like 'Qcache_hits';
+--echo # MariaDB: Qcache_not_cached is not incremented for select sql_no_cache queries
+--echo # so the following query produces 2, not 3:
+show status like 'Qcache_not_cached';
+show global status like 'Qcache_hits';
+
+drop table t1;
+
+#
+# Cleanup
+#
+set @@global.query_cache_type=@save_query_cache_type;
+set @@global.query_cache_size=@save_query_cache_size;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_range-master.opt b/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_range-master.opt
new file mode 100644
index 00000000000..6ad42e58aa2
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_range-master.opt
@@ -0,0 +1 @@
+--rocksdb_debug_optimizer_n_rows=1000 --rocksdb_records_in_range=50
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_range.test b/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_range.test
new file mode 100644
index 00000000000..f4b6096c696
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_range.test
@@ -0,0 +1,196 @@
+--source include/have_rocksdb.inc
+
+#
+# Range access test for RocksDB storage engine
+#
+select * from information_schema.engines where engine = 'rocksdb';
+
+--disable_warnings
+drop table if exists t0,t1,t2,t3,t4,t5;
+--enable_warnings
+create table t0 (a int) engine=myisam;
+insert into t0 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
+
+create table t1(a int) engine=myisam;
+insert into t1 select A.a + B.a* 10 + C.a * 100 from t0 A, t0 B, t0 C;
+
+create table t2 (
+ pk int not null,
+ a int not null,
+ b int not null,
+ primary key(pk),
+ key(a) comment 'rev:cf1'
+) engine=rocksdb;
+
+# 10 pk values for each value of a...
+insert into t2 select A.a, FLOOR(A.a/10), A.a from t1 A;
+
+--echo #
+--echo # HA_READ_KEY_EXACT tests
+--echo #
+
+--echo # Original failure was here:
+--replace_column 9 #
+explain
+select * from t2 force index (a) where a=0;
+select * from t2 force index (a) where a=0;
+
+--echo # The rest are for code coverage:
+--replace_column 9 #
+explain
+select * from t2 force index (a) where a=2;
+select * from t2 force index (a) where a=2;
+
+--replace_column 9 #
+explain
+select * from t2 force index (a) where a=3 and pk=33;
+select * from t2 force index (a) where a=3 and pk=33;
+
+select * from t2 force index (a) where a=99 and pk=99;
+select * from t2 force index (a) where a=0 and pk=0;
+select * from t2 force index (a) where a=-1;
+select * from t2 force index (a) where a=-1 and pk in (101,102);
+select * from t2 force index (a) where a=100 and pk in (101,102);
+
+
+--echo #
+--echo # #36: Range in form tbl.key >= const doesn't work in reverse column family
+--echo #
+--replace_column 9 #
+explain
+select count(*) from t2 force index (a) where a>=0 and a <=1;
+select count(*) from t2 force index (a) where a>=0 and a <=1;
+
+--replace_column 9 #
+explain
+select count(*) from t2 force index (a) where a>=-1 and a <=1;
+select count(*) from t2 force index (a) where a>=-1 and a <=1;
+
+--replace_column 9 #
+explain
+select * from t2 force index (a) where a=0 and pk>=3;
+select * from t2 force index (a) where a=0 and pk>=3;
+
+--echo # Try edge cases where we fall over the end of the table
+create table t3 like t2;
+insert into t3 select * from t2;
+
+select * from t3 where pk>=1000000;
+select * from t2 where pk>=1000000;
+
+--echo #
+--echo # #42: Range in form tbl.key > const doesn't work in reverse column family
+--echo #
+--replace_column 9 #
+explain
+select count(*) from t2 force index (a) where a>0;
+select count(*) from t2 force index (a) where a>0;
+
+--replace_column 9 #
+explain
+select count(*) from t2 force index (a) where a>99;
+select count(*) from t2 force index (a) where a>99;
+
+select * from t2 where pk>1000000;
+select * from t3 where pk>1000000;
+
+--replace_column 9 #
+explain
+select count(*) from t2 force index (a) where a=2 and pk>25;
+select count(*) from t2 force index (a) where a=2 and pk>25;
+
+
+select * from t2 force index (a) where a>-10 and a < 1;
+select * from t3 force index (a) where a>-10 and a < 1;
+
+
+--echo #
+--echo # #46: index_read_map(HA_READ_BEFORE_KEY) does not work in reverse column family
+--echo #
+select max(a) from t2 where a < 2;
+select max(a) from t2 where a < -1;
+
+select max(pk) from t2 where a=3 and pk < 6;
+
+select max(pk) from t2 where pk < 200000;
+select max(pk) from t2 where pk < 20;
+
+select max(a) from t3 where a < 2;
+select max(a) from t3 where a < -1;
+select max(pk) from t3 where pk < 200000;
+select max(pk) from t3 where pk < 20;
+
+select max(pk) from t2 where a=3 and pk < 33;
+select max(pk) from t3 where a=3 and pk < 33;
+
+--echo #
+--echo # #48: index_read_map(HA_READ_PREFIX_LAST) does not work in reverse CF
+--echo #
+
+--echo # Tests for search_flag=HA_READ_PREFIX_LAST_OR_PREV
+--echo # Note: the next explain has "Using index condition" in fb/mysql-5.6
+--echo # but "Using where" in MariaDB because the latter does not
+--echo # support ICP over reverse scans.
+--replace_column 9 #
+explain
+select * from t2 where a between 99 and 2000 order by a desc;
+select * from t2 where a between 99 and 2000 order by a desc;
+
+select max(a) from t2 where a <=10;
+select max(a) from t2 where a <=-4;
+
+select max(pk) from t2 where a=5 and pk <=55;
+select max(pk) from t2 where a=5 and pk <=55555;
+select max(pk) from t2 where a=5 and pk <=0;
+
+select max(pk) from t2 where pk <=-1;
+select max(pk) from t2 where pk <=999999;
+select max(pk) from t3 where pk <=-1;
+select max(pk) from t3 where pk <=999999;
+
+--echo #
+--echo # Tests for search_flag=HA_READ_PREFIX_LAST
+--echo #
+
+create table t4 (
+ pk int primary key,
+ a int,
+ b int,
+ c int,
+ key(a,b,c)
+) engine=rocksdb;
+
+insert into t4 select pk,pk,pk,pk from t2 where pk < 100;
+
+--replace_column 9 #
+explain
+select * from t4 where a=1 and b in (1) order by c desc;
+select * from t4 where a=1 and b in (1) order by c desc;
+
+--replace_column 9 #
+explain
+select * from t4 where a=5 and b in (4) order by c desc;
+select * from t4 where a=5 and b in (4) order by c desc;
+
+--echo # HA_READ_PREFIX_LAST for reverse-ordered CF
+create table t5 (
+ pk int primary key,
+ a int,
+ b int,
+ c int,
+ key(a,b,c) comment 'rev:cf2'
+) engine=rocksdb;
+
+insert into t5 select pk,pk,pk,pk from t2 where pk < 100;
+
+--replace_column 9 #
+explain
+select * from t5 where a=1 and b in (1) order by c desc;
+select * from t5 where a=1 and b in (1) order by c desc;
+
+--replace_column 9 #
+explain
+select * from t5 where a=5 and b in (4) order by c desc;
+select * from t5 where a=5 and b in (4) order by c desc;
+
+drop table t0,t1,t2,t3,t4,t5;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_range2.test b/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_range2.test
new file mode 100644
index 00000000000..28010d13753
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_range2.test
@@ -0,0 +1,33 @@
+--source include/have_rocksdb.inc
+
+# Issue#212 MyRocks chooses full index scan even if range scan is more efficient
+# rocksdb_debug_optimizer_n_rows must not be set.
+
+create table t1 (id1 bigint, id2 bigint, c1 bigint, c2 bigint, c3 bigint, c4 bigint, c5 bigint, c6 bigint, c7 bigint, primary key (id1, id2), index i(c1, c2));
+--disable_query_log
+let $i=0;
+while ($i<10000)
+{
+ inc $i;
+ eval insert t1(id1, id2, c1, c2, c3, c4, c5, c6, c7)
+ values($i, 0, $i, 0, 0, 0, 0, 0, 0);
+}
+--enable_query_log
+analyze table t1;
+select count(*) from t1;
+--replace_column 9 #
+explain select c1 from t1 where c1 > 5 limit 10;
+drop table t1;
+
+--echo #
+--echo # MDEV-17414: MyROCKS order desc limit 1 fails
+--echo #
+create table t1 (date date);
+insert into t1 values ('2018-10-04'), ('2018-10-05');
+select * from t1 where date < '2018-10-09' order by date desc limit 1; # Works as expected
+alter table t1 add index date_index (date);
+select * from t1 where date < '2018-10-05' order by date desc limit 1; # Works as expected
+--echo # this should not produce an empty set:
+select * from t1 where date < '2018-10-09' order by date desc limit 1;
+drop table t1;
+
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_read_free_rpl.cnf b/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_read_free_rpl.cnf
new file mode 100644
index 00000000000..9ceb0cc0a97
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_read_free_rpl.cnf
@@ -0,0 +1,16 @@
+!include suite/rpl/my.cnf
+
+[mysqld.1]
+sync_binlog=0
+binlog_format=row
+rocksdb_read_free_rpl=PK_SK
+slave-exec-mode=strict
+rocksdb_perf_context_level=3
+
+[mysqld.2]
+sync_binlog=0
+binlog_format=row
+rocksdb_read_free_rpl=PK_SK
+slave-exec-mode=strict
+rocksdb_default_cf_options=write_buffer_size=16k;target_file_size_base=16k
+rocksdb_perf_context_level=3
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_read_free_rpl.test b/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_read_free_rpl.test
new file mode 100644
index 00000000000..e1fb9db0b19
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_read_free_rpl.test
@@ -0,0 +1,414 @@
+source include/have_rocksdb.inc;
+source include/master-slave.inc;
+source include/have_debug.inc;
+
+
+connection master;
+--disable_warnings
+drop table if exists t1;
+--enable_warnings
+
+# initialization/insert
+connection master;
+create table t1 (id int primary key, value int);
+insert into t1 values (1,1), (2,2), (3,3), (4,4);
+--source include/sync_slave_sql_with_master.inc
+
+--let $diff_tables= master:t1, slave:t1
+
+--echo
+--echo # regular update/delete. With rocks_read_free_rpl=PK_SK, rocksdb_rows_read does not increase on slaves
+--echo
+connection slave;
+select variable_value into @up from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+connection master;
+update t1 set value=value+1 where id=1;
+delete from t1 where id=4;
+select * from t1;
+--source include/sync_slave_sql_with_master.inc
+connection slave;
+select case when variable_value-@up > 0 then 'false' else 'true' end as read_free from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+select * from t1;
+
+
+--echo
+--echo # "rocks_read_free_rpl=PK_SK" makes "row not found error" not happen anymore
+--echo
+connection slave;
+--source include/stop_slave.inc
+delete from t1 where id in (2, 3);
+--source include/start_slave.inc
+select variable_value into @up from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+
+connection master;
+update t1 set value=value+1 where id=3;
+delete from t1 where id=2;
+select * from t1;
+--source include/sync_slave_sql_with_master.inc
+connection slave;
+select case when variable_value-@up > 0 then 'false' else 'true' end as read_free from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+select * from t1;
+
+
+--echo
+--echo ## tables without primary key -- read free replication should be disabled
+--echo
+--echo
+--echo #no index
+--echo
+connection master;
+drop table t1;
+create table t1 (c1 int, c2 int);
+insert into t1 values (1,1), (2,2),(3,3),(4,4),(5,5);
+--source include/sync_slave_sql_with_master.inc
+connection slave;
+select variable_value into @up from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+connection master;
+update t1 set c2=100 where c1=3;
+delete from t1 where c1 <= 2;
+--source include/sync_slave_sql_with_master.inc
+connection slave;
+select case when variable_value-@up > 0 then 'false' else 'true' end as read_free from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+select * from t1;
+
+--echo
+--echo #secondary index only
+--echo
+connection master;
+drop table t1;
+create table t1 (c1 int, c2 int, index i(c1));
+insert into t1 values (1,1), (2,2),(3,3),(4,4),(5,5);
+--source include/sync_slave_sql_with_master.inc
+connection slave;
+select variable_value into @up from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+connection master;
+update t1 set c2=100 where c1=3;
+delete from t1 where c1 <= 2;
+--source include/sync_slave_sql_with_master.inc
+connection slave;
+select case when variable_value-@up > 0 then 'false' else 'true' end as read_free from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+select * from t1;
+
+
+
+--echo
+--echo ## large row operations -- primary key modification, secondary key modification
+--echo
+connection master;
+drop table t1;
+create table t1 (id1 bigint, id2 bigint, c1 bigint, c2 bigint, c3 bigint, c4 bigint, c5 bigint, c6 bigint, c7 bigint, primary key (id1, id2), index i(c1, c2));
+
+--disable_query_log
+let $i=1;
+while ($i<=10000)
+{
+ eval insert t1(id1,id2,c1,c2,c3,c4,c5,c6,c7)
+ values($i,0,$i,0,0,0,0,0,0);
+ inc $i;
+}
+--enable_query_log
+
+--source include/sync_slave_sql_with_master.inc
+connection slave;
+select variable_value into @up from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+connection master;
+
+--echo
+--echo #updating all secondary keys by 1
+--echo
+--disable_query_log
+let $i=1;
+while ($i<=10000)
+{
+ eval update t1 set c2=c2+1 where id1=$i and id2=0;
+ inc $i;
+}
+--enable_query_log
+--source include/sync_slave_sql_with_master.inc
+connection slave;
+select case when variable_value-@up > 0 then 'false' else 'true' end as read_free from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+connection master;
+--source include/diff_tables.inc
+
+--echo
+--echo #updating all primary keys by 2
+--echo
+connection slave;
+select variable_value into @up from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+connection master;
+--disable_query_log
+let $i=1;
+while ($i<=10000)
+{
+ eval update t1 set id2=id2+2 where id1=$i and id2=0;
+ inc $i;
+}
+--enable_query_log
+--source include/sync_slave_sql_with_master.inc
+connection slave;
+select case when variable_value-@up > 0 then 'false' else 'true' end as read_free from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+connection master;
+--source include/diff_tables.inc
+
+--echo
+--echo #updating secondary keys after truncating t1 on slave
+--echo
+connection slave;
+truncate table t1;
+select variable_value into @up from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+connection master;
+update t1 set c2=c2+10;
+--source include/sync_slave_sql_with_master.inc
+connection slave;
+select case when variable_value-@up > 0 then 'false' else 'true' end as read_free from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+connection master;
+--source include/diff_tables.inc
+
+--echo
+--echo #updating primary keys after truncating t1 on slave
+--echo
+connection slave;
+truncate table t1;
+select variable_value into @up from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+connection master;
+update t1 set id2=id2+10;
+--source include/sync_slave_sql_with_master.inc
+connection slave;
+select case when variable_value-@up > 0 then 'false' else 'true' end as read_free from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+connection master;
+--source include/diff_tables.inc
+
+--echo
+--echo #deleting half rows
+--echo
+connection slave;
+select variable_value into @up from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+connection master;
+delete from t1 where id1 <= 5000;
+--source include/sync_slave_sql_with_master.inc
+connection slave;
+select case when variable_value-@up > 0 then 'false' else 'true' end as read_free from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+connection master;
+--source include/diff_tables.inc
+
+--echo
+--echo # rocksdb_read_free_rpl = PK_ONLY i.e. it only works on tables with only PK
+--echo
+connection slave;
+--echo [on slave]
+stop slave;
+set @@global.rocksdb_read_free_rpl = PK_ONLY;
+start slave;
+connection master;
+--echo [on master]
+create table t2 (id int primary key, i1 int, i2 int, value int);
+create table u2 (id int primary key, i1 int, i2 int, value int, index(i1), index(i2));
+insert into t2 values (1,1,1,1),(2,2,2,2),(3,3,3,3);
+insert into u2 values (1,1,1,1),(2,2,2,2),(3,3,3,3);
+--source include/sync_slave_sql_with_master.inc
+
+# make a mismatch between the slave and the master
+connection slave;
+--echo [on slave]
+delete from t2 where id <= 2;
+delete from u2 where id <= 2;
+
+# make changes on the master
+connection master;
+--echo [on master]
+update t2 set i2=100, value=100 where id=1;
+update u2 set i2=100, value=100 where id=1;
+
+connection slave;
+--echo [on slave]
+call mtr.add_suppression("Slave SQL.*Could not execute Update_rows event on table test.u2.*Error_code.*");
+call mtr.add_suppression("Slave: Can't find record in 'u2'.*");
+# wait until we have the expected error
+--let $slave_sql_errno= convert_error(ER_KEY_NOT_FOUND)
+--source include/wait_for_slave_sql_error.inc
+
+# query the t2 table on the slave
+connection slave;
+select count(*) from t2 force index(primary);
+select * from t2 where id=1;
+select i1 from t2 where i1=1;
+select i2 from t2 where i2=100;
+
+# query the u2 table on the slave
+select count(*) from u2 force index(primary);
+select count(*) from u2 force index(i1);
+select count(*) from u2 force index(i2);
+select * from u2 where id=1;
+select i1 from u2 where i1=1;
+select i2 from u2 where i2=100;
+
+# the slave replication thread stopped because of the errors;
+# cleanup the problem and restart it
+--disable_query_log
+insert into u2 values(1,1,1,1), (2,2,2,2);
+start slave sql_thread;
+--source include/wait_for_slave_sql_to_start.inc
+--enable_query_log
+
+connection slave;
+--echo [on slave]
+stop slave;
+set @@global.rocksdb_read_free_rpl = PK_SK;
+start slave;
+
+--echo
+--echo # some tables with read-free replication on and some with it off
+--echo
+# We'll set the table filter to all tables starting with 't'
+connection slave;
+--echo [on slave]
+stop slave;
+set @@global.rocksdb_read_free_rpl_tables = "t.*";
+start slave;
+connection master;
+--echo [on master]
+drop table if exists t2;
+drop table if exists u2;
+create table t2 (id int primary key, i1 int, i2 int, value int);
+create table u2 (id int primary key, i1 int, i2 int, value int);
+insert into t2 values (1,1,1,1),(2,2,2,2),(3,3,3,3);
+insert into u2 values (1,1,1,1),(2,2,2,2),(3,3,3,3);
+--source include/sync_slave_sql_with_master.inc
+
+# make a mismatch between the slave and the master
+connection slave;
+--echo [on slave]
+delete from t2 where id <= 2;
+delete from u2 where id <= 2;
+
+# make changes on the master
+connection master;
+--echo [on master]
+update t2 set i2=100, value=100 where id=1;
+update u2 set i2=100, value=100 where id=1;
+
+connection slave;
+--echo [on slave]
+call mtr.add_suppression("Slave SQL.*Could not execute Update_rows event on table test.u2.*Error_code.*");
+call mtr.add_suppression("Slave: Can't find record in 'u2'.*");
+# wait until we have the expected error
+--let $slave_sql_errno= convert_error(ER_KEY_NOT_FOUND)
+--source include/wait_for_slave_sql_error.inc
+
+# query the t2 table on the slave
+connection slave;
+select count(*) from t2 force index(primary);
+select * from t2 where id=1;
+select i1 from t2 where i1=1;
+select i2 from t2 where i2=100;
+
+# query the u2 table on the slave
+select count(*) from u2 force index(primary);
+select * from u2 where id=1;
+select i1 from u2 where i1=1;
+select i2 from u2 where i2=100;
+
+# the slave replication thread stopped because of the errors;
+# cleanup the problem and restart it
+--disable_query_log
+insert into u2 values(1,1,1,1), (2,2,2,2);
+start slave sql_thread;
+--source include/wait_for_slave_sql_to_start.inc
+--enable_query_log
+
+connection slave;
+--echo [on slave]
+stop slave;
+set @@global.rocksdb_read_free_rpl_tables = ".*";
+start slave;
+
+--echo
+--echo # secondary keys lose rows
+--echo
+connection master;
+--echo [on master]
+create table t3 (id int primary key, i1 int, i2 int, value int, index(i1),
+index(i2));
+insert into t3 values (1,1,1,1),(2,2,2,2),(3,3,3,3);
+--source include/sync_slave_sql_with_master.inc
+
+# make a mismatch between the slave and the master
+connection slave;
+--echo [on slave]
+delete from t3 where id <= 2;
+
+# make changes on the master
+connection master;
+--echo [on master]
+update t3 set i2=100, value=100 where id=1;
+
+# make sure the slave is caught up
+--source include/sync_slave_sql_with_master.inc
+
+# query the t3 table on the slave
+connection slave;
+select count(*) from t3 force index(primary);
+select count(*) from t3 force index(i1);
+select count(*) from t3 force index(i2);
+select * from t3 where id=1;
+select i1 from t3 where i1=1;
+select i2 from t3 where i2=100;
+
+--echo
+--echo # secondary keys have extra rows
+--echo
+connection master;
+--echo [on master]
+create table t4 (id int primary key, i1 int, i2 int, value int, index(i1), index(i2));
+insert into t4 values (1,1,1,1),(2,2,2,2),(3,3,3,3);
+--source include/sync_slave_sql_with_master.inc
+
+# make a mismatch between the slave and the master
+connection slave;
+--echo [on slave]
+update t4 set i1=100 where id=1;
+
+# make changes on the master
+connection master;
+--echo [on master]
+delete from t4 where id=1;
+
+# make sure the slave is caught up
+--source include/sync_slave_sql_with_master.inc
+
+# query the t4 table on the slave
+connection slave;
+--echo [on slave]
+select count(*) from t4 force index(primary);
+select count(*) from t4 force index(i1);
+select count(*) from t4 force index(i2);
+select i1 from t4 where i1=100;
+
+--echo
+--echo # inserts are also read-free
+--echo
+connection master;
+--echo [on master]
+drop table if exists t2;
+drop table if exists t3;
+create table t2 (id int primary key, i1 int, i2 int);
+create table t3 (id int primary key, i1 int, i2 int, key(i1));
+connection slave;
+select variable_value into @up from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+connection master;
+insert into t2 values(1, 1, 1);
+insert into t2 values(2, 2, 2);
+insert into t3 values(1, 1, 1);
+insert into t3 values(2, 2, 2);
+--source include/sync_slave_sql_with_master.inc
+connection slave;
+select case when variable_value-@up > 0 then 'false' else 'true' end as read_free from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+select * from t2;
+select * from t3;
+
+# cleanup
+connection master;
+drop table t1, t2, t3, t4, u2;
+
+--source include/rpl_end.inc
+
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_read_free_rpl_stress.cnf b/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_read_free_rpl_stress.cnf
new file mode 100644
index 00000000000..f225d5dd71b
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_read_free_rpl_stress.cnf
@@ -0,0 +1,17 @@
+!include suite/rpl/my.cnf
+
+[mysqld.1]
+sync_binlog=0
+binlog_format=row
+rocksdb_perf_context_level=3
+
+[mysqld.2]
+sync_binlog=0
+binlog_format=row
+slave-exec-mode=strict
+rocksdb_perf_context_level=3
+slave_use_idempotent_for_recovery=YES
+slave_parallel_workers=8
+mts_dependency_replication=STMT
+mts_dependency_order_commits=0
+slave_tx_isolation=READ-COMMITTED
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_read_free_rpl_stress.inc b/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_read_free_rpl_stress.inc
new file mode 100644
index 00000000000..e69bcce72d8
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_read_free_rpl_stress.inc
@@ -0,0 +1,69 @@
+
+disable_query_log;
+
+# Create a schema with different kinds of tables (with different kinds of keys)
+connection master;
+create table t1(a int, b int, c int, d int); #no keys
+create table t2(a int primary key, b int, c int, d int); #only pk
+create table t3(a int, b int, c int, d int, key(b)); #only sk
+create table t4(a int, b int unique, c int, d int); #only unique sk
+create table t5(a int primary key, b int, c int, d int, key(b)); #pk + sk
+create table t6(a int primary key, b int unique, c int, d int); #pk + unique sk
+create table t7(a int, b int unique, c int, d int, key(c)); #sk + unique sk
+create table t8(a int primary key, b int unique, c int, d int, key(c)); #pk + sk + unique sk
+
+# Insert a bunch of rows
+let $iter = 0;
+while ($iter < 1000) {
+ let $t = 1;
+ while ($t <= 8) {
+ eval insert into t$t values($iter, $iter, $iter, $iter);
+ inc $t;
+ }
+ inc $iter;
+}
+
+let $iter = 0;
+while ($iter < 10) {
+ let $t = 1;
+ while ($t <= 8) {
+ eval update t$t set a = a + 10000 where a > 900; # update pk (if any)
+ eval update t$t set b = b + 10000 where b > 900; # update sk or unique (if any)
+ eval update t$t set c = c + 10000 where c > 900; # update sk or unique(if any)
+ eval update t$t set d = d + 10000 where d > 900; # update non key col
+
+ eval delete from t$t where a < 25;
+ eval delete from t$t where b < 50;
+ eval delete from t$t where c < 75;
+ eval delete from t$t where d < 100;
+
+ # Re-insert the deleted rows
+ let $i = 0;
+ while ($i < 100) {
+ eval insert into t$t values($i, $i, $i, $i);
+ inc $i;
+ }
+ inc $t;
+ }
+ inc $iter;
+}
+source include/sync_slave_sql_with_master.inc;
+
+connection master;
+let $t = 1;
+while ($t <= 8) {
+ let $diff_tables = master:t$t, slave:t$t;
+ source include/diff_tables.inc;
+ inc $t;
+}
+
+# Cleanup
+connection master;
+let $t = 1;
+while ($t <= 8) {
+ eval drop table t$t;
+ inc $t;
+}
+source include/sync_slave_sql_with_master.inc;
+
+enable_query_log;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_read_free_rpl_stress.test b/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_read_free_rpl_stress.test
new file mode 100644
index 00000000000..31e65db8d5d
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_read_free_rpl_stress.test
@@ -0,0 +1,22 @@
+source include/have_rocksdb.inc;
+source include/master-slave.inc;
+source include/not_valgrind.inc;
+
+connection slave;
+source include/stop_slave.inc;
+set @@global.rocksdb_read_free_rpl = PK_SK;
+source include/start_slave.inc;
+source rocksdb_read_free_rpl_stress.inc;
+
+connection slave;
+source include/stop_slave.inc;
+set @@global.rocksdb_read_free_rpl = PK_ONLY;
+source include/start_slave.inc;
+source rocksdb_read_free_rpl_stress.inc;
+
+connection slave;
+source include/stop_slave.inc;
+set @@global.rocksdb_read_free_rpl = default;
+source include/start_slave.inc;
+
+source include/rpl_end.inc;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_row_stats.test b/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_row_stats.test
new file mode 100644
index 00000000000..ebcc741fc17
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_row_stats.test
@@ -0,0 +1,57 @@
+source include/have_rocksdb.inc;
+create table t1 (a int primary key) engine=rocksdb;
+
+-- echo Verify rocksdb_rows_inserted
+select variable_value into @old_rows_inserted from information_schema.global_status where variable_name = 'rocksdb_rows_inserted';
+insert into t1 values(1);
+select variable_value into @new_rows_inserted from information_schema.global_status where variable_name = 'rocksdb_rows_inserted';
+select @new_rows_inserted - @old_rows_inserted;
+
+-- echo Verify rocksdb_rows_updated
+select variable_value into @old_rows_updated from information_schema.global_status where variable_name = 'rocksdb_rows_updated';
+update t1 set a=2 where a=1;
+select variable_value into @new_rows_updated from information_schema.global_status where variable_name = 'rocksdb_rows_updated';
+select @new_rows_updated - @old_rows_updated;
+
+-- echo Verify rocksdb_rows_read
+select variable_value into @old_rows_read from information_schema.global_status where variable_name = 'rocksdb_rows_read';
+select * from t1;
+select variable_value into @new_rows_read from information_schema.global_status where variable_name = 'rocksdb_rows_read';
+select @new_rows_read - @old_rows_read;
+
+-- echo Verify rocksdb_rows_deleted
+select variable_value into @old_rows_deleted from information_schema.global_status where variable_name = 'rocksdb_rows_deleted';
+delete from t1;
+select variable_value into @new_rows_deleted from information_schema.global_status where variable_name = 'rocksdb_rows_deleted';
+select @new_rows_deleted - @old_rows_deleted;
+
+use mysql;
+create table t1(a int primary key) engine=rocksdb;
+
+-- echo Verify rocksdb_system_rows_inserted
+select variable_value into @old_system_rows_inserted from information_schema.global_status where variable_name = 'rocksdb_system_rows_inserted';
+insert into t1 values(1);
+select variable_value into @new_system_rows_inserted from information_schema.global_status where variable_name = 'rocksdb_system_rows_inserted';
+select @new_system_rows_inserted - @old_system_rows_inserted;
+
+-- echo Verify rocksdb_system_rows_updated
+select variable_value into @old_system_rows_updated from information_schema.global_status where variable_name = 'rocksdb_system_rows_updated';
+update t1 set a=2 where a=1;
+select variable_value into @new_system_rows_updated from information_schema.global_status where variable_name = 'rocksdb_system_rows_updated';
+select @new_system_rows_updated - @old_system_rows_updated;
+
+-- echo Verify rocksdb_system_rows_read
+select variable_value into @old_system_rows_read from information_schema.global_status where variable_name = 'rocksdb_system_rows_read';
+select * from t1;
+select variable_value into @new_system_rows_read from information_schema.global_status where variable_name = 'rocksdb_system_rows_read';
+select @new_system_rows_read - @old_system_rows_read;
+
+-- echo Verify rocksdb_system_rows_deleted
+select variable_value into @old_system_rows_deleted from information_schema.global_status where variable_name = 'rocksdb_system_rows_deleted';
+delete from t1;
+select variable_value into @new_system_rows_deleted from information_schema.global_status where variable_name = 'rocksdb_system_rows_deleted';
+select @new_system_rows_deleted - @old_system_rows_deleted;
+
+drop table t1;
+use test;
+drop table t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_table_stats_sampling_pct_change.test b/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_table_stats_sampling_pct_change.test
new file mode 100644
index 00000000000..5eaeff5cdbd
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_table_stats_sampling_pct_change.test
@@ -0,0 +1,80 @@
+--source include/have_rocksdb.inc
+
+--disable_warnings
+drop table if exists t1;
+--enable_warnings
+
+#
+# First set sampling rate to 100% and make sure that the baseline is
+# correct and we get the correct number of rows as a result.
+#
+SET @ORIG_PCT = @@ROCKSDB_TABLE_STATS_SAMPLING_PCT;
+SET @@global.ROCKSDB_TABLE_STATS_SAMPLING_PCT = 100;
+
+create table t1 (pk int primary key) engine=rocksdb;
+
+--disable_query_log
+let $i = 0;
+let $n = 10000;
+
+while ($i < $n)
+{
+ inc $i;
+ eval insert t1(pk) values($i);
+}
+--enable_query_log
+
+set global rocksdb_force_flush_memtable_now = true;
+
+# This should return 10K rows.
+select table_rows from information_schema.tables
+where table_schema = database() and table_name = 't1';
+
+let $t1_len = `select data_length from information_schema.tables where table_schema = database() and table_name = 't1'`;
+
+drop table t1;
+
+--disable_warnings
+drop table if exists t2;
+--enable_warnings
+
+#
+# Now, set the sampling rate to 10% and expect to see the same amount of
+# rows.
+#
+SET @@global.ROCKSDB_TABLE_STATS_SAMPLING_PCT = 10;
+
+create table t2 (pk int primary key) engine=rocksdb;
+
+--disable_query_log
+let $i = 0;
+let $n = 10000;
+
+while ($i < $n)
+{
+ inc $i;
+ eval insert t2(pk) values($i);
+}
+--enable_query_log
+
+set global rocksdb_force_flush_memtable_now = true;
+
+# This should return 10K rows as well.
+select table_rows from information_schema.tables
+where table_schema = database() and table_name = 't2';
+
+let $t2_len = `select data_length from information_schema.tables where table_schema = database() and table_name = 't2'`;
+let $diff = `select abs($t1_len - $t2_len)`;
+
+#
+# Table sizes are approximations and for this particular case we allow about
+# 10% deviation.
+#
+if ($diff < 6000) {
+ select table_name from information_schema.tables where table_schema = database() and table_name = 't2';
+}
+
+drop table t2;
+
+SET GLOBAL ROCKSDB_TABLE_STATS_SAMPLING_PCT = @ORIG_PCT;
+
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_timeout_rollback-master.opt b/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_timeout_rollback-master.opt
new file mode 100644
index 00000000000..8d8ae3d65f3
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_timeout_rollback-master.opt
@@ -0,0 +1 @@
+--rocksdb_lock_wait_timeout=2
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_timeout_rollback.test b/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_timeout_rollback.test
new file mode 100644
index 00000000000..d47af90d842
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_timeout_rollback.test
@@ -0,0 +1,78 @@
+--source include/have_rocksdb.inc
+
+--disable_warnings
+drop table if exists t1;
+--enable_warnings
+
+SET @@global.rocksdb_rollback_on_timeout = 1;
+show variables like 'rocksdb_rollback_on_timeout';
+
+create table t1 (a int unsigned not null primary key) engine = rocksdb;
+insert into t1 values (1);
+commit;
+
+connect (con1,localhost,root,,);
+connect (con2,localhost,root,,);
+
+connection con2;
+begin work;
+insert into t1 values (5);
+insert into t1 values (6);
+
+update t1 set a = a + 1 where a = 1;
+
+connection con1;
+begin work;
+insert into t1 values (7);
+insert into t1 values (8);
+
+# This statement will time out. The whole transaction will be
+# rolled back. So values 7 and 8 are not inserted.
+--error ER_LOCK_WAIT_TIMEOUT
+update t1 set a = a + 1 where a = 1;
+
+select * from t1;
+commit;
+
+connection con2;
+select * from t1;
+commit;
+
+connection default;
+select * from t1;
+
+SET @@global.rocksdb_rollback_on_timeout = 0;
+show variables like 'rocksdb_rollback_on_timeout';
+
+connection con2;
+begin work;
+insert into t1 values (9);
+insert into t1 values (10);
+
+update t1 set a = a + 1 where a = 2;
+
+connection con1;
+begin work;
+insert into t1 values (11);
+insert into t1 values (12);
+
+# This statement will time out. Only this statement will be
+# rolled back. So values 11 and 12 are inserted.
+--error ER_LOCK_WAIT_TIMEOUT
+update t1 set a = a + 1 where a = 2;
+
+select * from t1;
+commit;
+
+connection con2;
+select * from t1;
+commit;
+
+connection default;
+select * from t1;
+
+SET @@global.rocksdb_rollback_on_timeout = DEFAULT;
+
+drop table t1;
+disconnect con1;
+disconnect con2;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/rollback_savepoint.test b/storage/rocksdb/mysql-test/rocksdb/t/rollback_savepoint.test
new file mode 100644
index 00000000000..c46d8b07637
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/rollback_savepoint.test
@@ -0,0 +1,33 @@
+--source include/have_rocksdb.inc
+
+--disable_warnings
+DROP TABLE IF EXISTS t1, t2;
+--enable_warnings
+
+CREATE TABLE t1 (a INT, b CHAR(8), pk INT AUTO_INCREMENT PRIMARY KEY) ENGINE=rocksdb;
+INSERT INTO t1 (a,b) VALUES (1,'a'),(2,'b'),(3,'a');
+CREATE TABLE t2 LIKE t1;
+INSERT INTO t2 SELECT * FROM t1;
+
+--connect (con1,localhost,root,,)
+--connect (con2,localhost,root,,)
+
+--connection con1
+START TRANSACTION WITH CONSISTENT SNAPSHOT;
+SAVEPOINT a;
+SELECT * FROM t1 ORDER BY pk;
+ROLLBACK TO SAVEPOINT a;
+SAVEPOINT a;
+SELECT * FROM t2 ORDER BY pk;
+ROLLBACK TO SAVEPOINT a;
+
+# should not be blocked
+--connection con2
+ALTER TABLE t1 RENAME TO t3;
+
+--connection default
+DROP TABLE t2, t3;
+
+--disconnect con1
+--disconnect con2
+
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/rpl_row_not_found.cnf b/storage/rocksdb/mysql-test/rocksdb/t/rpl_row_not_found.cnf
new file mode 100644
index 00000000000..44100e59cc2
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/rpl_row_not_found.cnf
@@ -0,0 +1,9 @@
+!include suite/rpl/my.cnf
+
+[mysqld.1]
+binlog_format=row
+[mysqld.2]
+binlog_format=row
+slave_parallel_workers=4
+slave_exec_mode=SEMI_STRICT
+rocksdb_lock_wait_timeout=5
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/rpl_row_not_found.inc b/storage/rocksdb/mysql-test/rocksdb/t/rpl_row_not_found.inc
new file mode 100644
index 00000000000..9575abb7019
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/rpl_row_not_found.inc
@@ -0,0 +1,98 @@
+--source include/have_rocksdb.inc
+--source include/master-slave.inc
+--source include/have_debug.inc
+--source include/have_debug_sync.inc
+
+connection master;
+--disable_warnings
+drop table if exists t1;
+--enable_warnings
+
+connection master;
+
+create table t0 (a int) engine=myisam;
+insert into t0 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
+create table t1(a int) engine=myisam;
+insert into t1 select A.a + B.a* 10 + C.a * 100 from t0 A, t0 B, t0 C;
+create table t2 (
+ pk int primary key,
+ kp1 int,
+ kp2 int,
+ col1 int,
+ key (kp1,kp2)
+) engine=rocksdb;
+# Use RBR for next few statements to avoid the
+# 'Unsafe statement written to the binary log' warnings.
+set @tmp_binlog_format=@@binlog_format;
+set @@binlog_format=ROW;
+insert into t2 select a,a,a,a from t1;
+create table t3 like t2;
+insert into t3 select * from t2;
+set binlog_format=@tmp_binlog_format;
+
+
+# For GitHub issue#166
+# Slave is suspended at ha_rocksdb::read_range_first() -> index_read_map_impl()
+# -> ha_rocksdb::get_row_by_rowid() -- which is after creating an iterator,
+# Seek(), Next() (getting pk=1)
+# and before GetForUpdate() and before creating a snapshot.
+# Deletes remove pk=2 and pk=3, then resumes update on slave.
+# The update resumes with GetForUpdate(pk=1),
+# index_next() -> secondary_index_read() -> get_row_by_rowid(pk=2)
+# then doesn't find a row.
+# The slave should not stop with error (Can't find a record).
+
+--source include/sync_slave_sql_with_master.inc
+
+connection slave;
+let $old_debug = `select @@global.debug`;
+set global debug_dbug= 'd,dbug.rocksdb.get_row_by_rowid';
+--source include/stop_slave.inc
+--source include/start_slave.inc
+
+connection master;
+update t2 set col1=100 where kp1 between 1 and 3 and mod(kp2,2)=0;
+
+connection slave;
+set debug_sync= 'now WAIT_FOR Reached';
+eval set global debug_dbug = '$old_debug';
+set sql_log_bin=0;
+delete from t2 where pk=2;
+delete from t2 where pk=3;
+set debug_sync= 'now SIGNAL signal.rocksdb.get_row_by_rowid_let_running';
+
+connection master;
+--source include/sync_slave_sql_with_master.inc
+connection slave;
+select * from t2 where pk < 5;
+
+# For GitHub issue#162 (result file must be updated after fixing #162)
+connection slave;
+set global debug_dbug= 'd,dbug.rocksdb.get_row_by_rowid';
+--source include/stop_slave.inc
+--source include/start_slave.inc
+
+connection master;
+update t3 set col1=100 where kp1 between 1 and 4 and mod(kp2,2)=0;
+
+connection slave;
+call mtr.add_suppression("Deadlock found when trying to get lock");
+set debug_sync= 'now WAIT_FOR Reached';
+eval set global debug_dbug = '$old_debug';
+set sql_log_bin=0;
+delete from t3 where pk=2;
+delete from t3 where pk=3;
+set debug_sync= 'now SIGNAL signal.rocksdb.get_row_by_rowid_let_running';
+
+connection master;
+--source include/sync_slave_sql_with_master.inc
+connection slave;
+# col1 for pk=4 should be 100
+select * from t3 where pk < 5;
+
+set debug_sync='RESET';
+# Cleanup
+connection master;
+drop table t0, t1, t2, t3;
+--source include/rpl_end.inc
+
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/rpl_row_not_found.test b/storage/rocksdb/mysql-test/rocksdb/t/rpl_row_not_found.test
new file mode 100644
index 00000000000..36188427585
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/rpl_row_not_found.test
@@ -0,0 +1,4 @@
+--source include/have_binlog_format_row.inc
+
+--source rpl_row_not_found.inc
+
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/rpl_row_not_found_rc.cnf b/storage/rocksdb/mysql-test/rocksdb/t/rpl_row_not_found_rc.cnf
new file mode 100644
index 00000000000..110d18abac7
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/rpl_row_not_found_rc.cnf
@@ -0,0 +1,11 @@
+!include suite/rpl/my.cnf
+
+[mysqld.1]
+binlog_format=row
+transaction_isolation=read-committed
+[mysqld.2]
+binlog_format=row
+slave_parallel_workers=4
+slave_exec_mode=SEMI_STRICT
+rocksdb_lock_wait_timeout=5
+transaction_isolation=read-committed
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/rpl_row_not_found_rc.test b/storage/rocksdb/mysql-test/rocksdb/t/rpl_row_not_found_rc.test
new file mode 100644
index 00000000000..36188427585
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/rpl_row_not_found_rc.test
@@ -0,0 +1,4 @@
+--source include/have_binlog_format_row.inc
+
+--source rpl_row_not_found.inc
+
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/rpl_row_rocksdb.cnf b/storage/rocksdb/mysql-test/rocksdb/t/rpl_row_rocksdb.cnf
new file mode 100644
index 00000000000..09a1c853ffc
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/rpl_row_rocksdb.cnf
@@ -0,0 +1 @@
+!include suite/rpl/my.cnf
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/rpl_row_rocksdb.test b/storage/rocksdb/mysql-test/rocksdb/t/rpl_row_rocksdb.test
new file mode 100644
index 00000000000..b103dfc3ef8
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/rpl_row_rocksdb.test
@@ -0,0 +1,48 @@
+--source include/have_rocksdb.inc
+
+source include/master-slave.inc;
+source include/have_binlog_format_row.inc;
+
+connection master;
+--disable_warnings
+drop table if exists t1;
+--enable_warnings
+
+connection master;
+
+select @@binlog_format;
+create table t1 (pk int primary key) engine=rocksdb;
+insert into t1 values (1),(2),(3);
+
+--source include/sync_slave_sql_with_master.inc
+connection slave;
+
+select * from t1;
+
+connection master;
+drop table t1;
+
+--echo #
+--echo # Issue #18: slave crash on update with row based binary logging
+--echo #
+create table t1 (id int primary key, value int, value2 int, index(value)) engine=rocksdb;
+insert into t1 values (1,1,1);
+insert into t1 values (2,1,1);
+insert into t1 values (3,1,1);
+insert into t1 values (4,1,1);
+insert into t1 values (5,1,1);
+update t1 set value2=100 where id=1;
+update t1 set value2=200 where id=2;
+update t1 set value2=300 where id=3;
+
+--source include/sync_slave_sql_with_master.inc
+connection slave;
+select * from t1 where id=1;
+select * from t1 where id=2;
+select * from t1 where id=3;
+
+connection master;
+drop table t1;
+
+--source include/rpl_end.inc
+
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/rpl_row_stats-slave.opt b/storage/rocksdb/mysql-test/rocksdb/t/rpl_row_stats-slave.opt
new file mode 100644
index 00000000000..039295e140d
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/rpl_row_stats-slave.opt
@@ -0,0 +1 @@
+--userstat=ON
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/rpl_row_stats.cnf b/storage/rocksdb/mysql-test/rocksdb/t/rpl_row_stats.cnf
new file mode 100644
index 00000000000..09a1c853ffc
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/rpl_row_stats.cnf
@@ -0,0 +1 @@
+!include suite/rpl/my.cnf
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/rpl_row_stats.test b/storage/rocksdb/mysql-test/rocksdb/t/rpl_row_stats.test
new file mode 100644
index 00000000000..db4d1ca6f9e
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/rpl_row_stats.test
@@ -0,0 +1,47 @@
+--source include/have_rocksdb.inc
+
+source include/have_binlog_format_row.inc;
+source include/master-slave.inc;
+
+connection master;
+--disable_warnings
+drop table if exists t1;
+--enable_warnings
+
+# initialization/insert
+connection master;
+# creating save_read_stats() and get_read_stats() procedures
+--source init_stats_procedure.inc
+
+create table t1 (id int primary key, value int);
+insert into t1 values (1,1), (2,2), (3,3), (4,4), (5,5);
+--source include/sync_slave_sql_with_master.inc
+
+connection slave;
+call save_read_stats();
+connection master;
+update t1 set value=value+1 where id=1;
+update t1 set value=value+1 where id=3;
+select * from t1;
+--source include/sync_slave_sql_with_master.inc
+connection slave;
+call get_read_stats();
+select * from t1;
+call save_read_stats();
+
+connection master;
+delete from t1 where id in (4,5);
+select * from t1;
+--source include/sync_slave_sql_with_master.inc
+connection slave;
+call get_read_stats();
+select * from t1;
+
+
+# cleanup
+connection master;
+drop table t1;
+--source drop_stats_procedure.inc
+
+--source include/rpl_end.inc
+
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/rpl_row_triggers.cnf b/storage/rocksdb/mysql-test/rocksdb/t/rpl_row_triggers.cnf
new file mode 100644
index 00000000000..b0a37fd30ad
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/rpl_row_triggers.cnf
@@ -0,0 +1,19 @@
+!include suite/rpl/my.cnf
+
+[mysqld.1]
+binlog_format=row
+gtid_mode=ON
+enforce_gtid_consistency
+log_slave_updates
+binlog_row_image=FULL
+rocksdb_read_free_rpl=PK_SK
+rocksdb_strict_collation_check=0
+[mysqld.2]
+binlog_format=row
+gtid_mode=ON
+enforce_gtid_consistency
+log_slave_updates
+binlog_row_image=FULL
+rocksdb_read_free_rpl=PK_SK
+rocksdb_strict_collation_check=0
+
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/rpl_row_triggers.test b/storage/rocksdb/mysql-test/rocksdb/t/rpl_row_triggers.test
new file mode 100644
index 00000000000..4490353b749
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/rpl_row_triggers.test
@@ -0,0 +1,262 @@
+-- source include/have_binlog_format_row.inc
+-- source include/have_rbr_triggers.inc
+-- source include/have_rocksdb.inc
+-- source include/master-slave.inc
+
+-- echo # Test of row replication with triggers on the slave side
+connection master;
+CREATE TABLE t1 (C1 CHAR(1) primary key, C2 CHAR(1));
+SELECT * FROM t1;
+
+sync_slave_with_master;
+
+connection slave;
+SET @old_slave_exec_mode= @@global.slave_exec_mode;
+SET @old_slave_run_triggers_for_rbr= @@global.slave_run_triggers_for_rbr;
+SET @@global.slave_exec_mode= IDEMPOTENT;
+SET @@global.slave_run_triggers_for_rbr= YES;
+SELECT * FROM t1;
+create table t2 (id char(2) primary key, cnt int, o char(1), n char(1));
+insert into t2 values
+ ('u0', 0, ' ', ' '),('u1', 0, ' ', ' '),
+ ('d0', 0, ' ', ' '),('d1', 0, ' ', ' '),
+ ('i0', 0, ' ', ' '),('i1', 0, ' ', ' ');
+create trigger t1_cnt_b before update on t1 for each row
+ update t2 set cnt=cnt+1, o=old.C1, n=new.C1 where id = 'u0';
+create trigger t1_cnt_db before delete on t1 for each row
+ update t2 set cnt=cnt+1, o=old.C1, n=' ' where id = 'd0';
+create trigger t1_cnt_ib before insert on t1 for each row
+ update t2 set cnt=cnt+1, n=new.C1, o=' ' where id = 'i0';
+create trigger t1_cnt_a after update on t1 for each row
+ update t2 set cnt=cnt+1, o=old.C1, n=new.C1 where id = 'u1';
+create trigger t1_cnt_da after delete on t1 for each row
+ update t2 set cnt=cnt+1, o=old.C1, n=' ' where id = 'd1';
+create trigger t1_cnt_ia after insert on t1 for each row
+ update t2 set cnt=cnt+1, n=new.C1, o=' ' where id = 'i1';
+SELECT * FROM t2 order by id;
+
+connection master;
+--echo # INSERT triggers test
+insert into t1 values ('a','b');
+
+sync_slave_with_master;
+
+connection slave;
+SELECT * FROM t2 order by id;
+
+connection master;
+--echo # UPDATE triggers test
+update t1 set C1= 'd';
+sync_slave_with_master;
+
+connection slave;
+SELECT * FROM t2 order by id;
+
+connection master;
+--echo # DELETE triggers test
+delete from t1 where C1='d';
+
+sync_slave_with_master;
+
+connection slave;
+SELECT * FROM t2 order by id;
+--echo # INSERT triggers which cause also UPDATE test (insert duplicate row)
+insert into t1 values ('0','1');
+SELECT * FROM t2 order by id;
+
+connection master;
+insert into t1 values ('0','1');
+
+sync_slave_with_master;
+
+connection slave;
+SELECT * FROM t2 order by id;
+--echo # INSERT triggers which cause also DELETE test
+--echo # (insert duplicate row in table referenced by foreign key)
+insert into t1 values ('1','1');
+
+connection master;
+# Foreign key is not supported in MyRocks
+#CREATE TABLE t3 (C1 CHAR(1) primary key, FOREIGN KEY (C1) REFERENCES t1(C1) );
+#insert into t1 values ('1','1');
+
+#sync_slave_with_master;
+
+#connection slave;
+#SELECT * FROM t2 order by id;
+
+#connection master;
+#drop table t3,t1;
+drop table if exists t1;
+
+sync_slave_with_master;
+
+connection slave;
+SET @@global.slave_exec_mode= @old_slave_exec_mode;
+SET @@global.slave_run_triggers_for_rbr= @old_slave_run_triggers_for_rbr;
+drop table t2;
+
+--connection master
+CREATE TABLE t1 (i INT);
+CREATE TABLE t2 (i INT);
+
+--sync_slave_with_master
+SET @old_slave_run_triggers_for_rbr= @@global.slave_run_triggers_for_rbr;
+SET GLOBAL slave_run_triggers_for_rbr=YES;
+CREATE TRIGGER tr AFTER INSERT ON t1 FOR EACH ROW
+ INSERT INTO t2 VALUES (new.i);
+
+--connection master
+BEGIN;
+INSERT INTO t1 VALUES (1);
+INSERT INTO t1 VALUES (2);
+COMMIT;
+--sync_slave_with_master
+select * from t2;
+SET @@global.slave_run_triggers_for_rbr= @old_slave_run_triggers_for_rbr;
+--connection master
+drop tables t2,t1;
+
+--sync_slave_with_master
+
+-- echo # Triggers on slave do not work if master has some
+
+connection master;
+CREATE TABLE t1 (C1 CHAR(1) primary key, C2 CHAR(1));
+SELECT * FROM t1;
+create trigger t1_dummy before delete on t1 for each row
+ set @dummy= 1;
+
+sync_slave_with_master;
+
+connection slave;
+SET @old_slave_exec_mode= @@global.slave_exec_mode;
+SET @old_slave_run_triggers_for_rbr= @@global.slave_run_triggers_for_rbr;
+SET @@global.slave_exec_mode= IDEMPOTENT;
+SET @@global.slave_run_triggers_for_rbr= YES;
+SELECT * FROM t1;
+create table t2 (id char(2) primary key, cnt int, o char(1), n char(1));
+insert into t2 values
+ ('u0', 0, ' ', ' '),('u1', 0, ' ', ' '),
+ ('d0', 0, ' ', ' '),('d1', 0, ' ', ' '),
+ ('i0', 0, ' ', ' '),('i1', 0, ' ', ' ');
+create trigger t1_cnt_b before update on t1 for each row
+ update t2 set cnt=cnt+1, o=old.C1, n=new.C1 where id = 'u0';
+create trigger t1_cnt_ib before insert on t1 for each row
+ update t2 set cnt=cnt+1, n=new.C1, o=' ' where id = 'i0';
+create trigger t1_cnt_a after update on t1 for each row
+ update t2 set cnt=cnt+1, o=old.C1, n=new.C1 where id = 'u1';
+create trigger t1_cnt_da after delete on t1 for each row
+ update t2 set cnt=cnt+1, o=old.C1, n=' ' where id = 'd1';
+create trigger t1_cnt_ia after insert on t1 for each row
+ update t2 set cnt=cnt+1, n=new.C1, o=' ' where id = 'i1';
+SELECT * FROM t2 order by id;
+
+connection master;
+--echo # INSERT triggers test
+insert into t1 values ('a','b');
+
+sync_slave_with_master;
+
+connection slave;
+SELECT * FROM t2 order by id;
+connection master;
+--echo # UPDATE triggers test
+update t1 set C1= 'd';
+
+sync_slave_with_master;
+
+connection slave;
+SELECT * FROM t2 order by id;
+
+connection master;
+--echo # DELETE triggers test
+delete from t1 where C1='d';
+
+sync_slave_with_master;
+
+connection slave;
+SELECT * FROM t2 order by id;
+--echo # INSERT triggers which cause also UPDATE test (insert duplicate row)
+insert into t1 values ('0','1');
+SELECT * FROM t2 order by id;
+
+
+connection master;
+insert into t1 values ('0','1');
+
+sync_slave_with_master;
+
+connection slave;
+SELECT * FROM t2 order by id;
+--echo # INSERT triggers which cause also DELETE test
+--echo # (insert duplicate row in table referenced by foreign key)
+insert into t1 values ('1','1');
+
+connection master;
+
+# Foreign Key is not supported in MyRocks
+#CREATE TABLE t3 (C1 CHAR(1) primary key, FOREIGN KEY (C1) REFERENCES t1(C1) );
+#insert into t1 values ('1','1');
+
+#sync_slave_with_master;
+
+#connection slave;
+#SELECT * FROM t2 order by id;
+
+#connection master;
+#drop table t3,t1;
+drop table if exists t1;
+
+sync_slave_with_master;
+
+connection slave;
+SET @@global.slave_exec_mode= @old_slave_exec_mode;
+SET @@global.slave_run_triggers_for_rbr= @old_slave_run_triggers_for_rbr;
+drop table t2;
+
+--echo #
+--echo # MDEV-5513: Trigger is applied to the rows after first one
+--echo #
+
+--connection master
+create table t1 (a int, b int);
+create table tlog (a int auto_increment primary key);
+set sql_log_bin=0;
+create trigger tr1 after insert on t1 for each row insert into tlog values (null);
+set sql_log_bin=1;
+
+sync_slave_with_master;
+--connection slave
+
+set @slave_run_triggers_for_rbr.saved = @@slave_run_triggers_for_rbr;
+set global slave_run_triggers_for_rbr=1;
+create trigger tr2 before insert on t1 for each row set new.b = new.a;
+
+--connection master
+insert into t1 values (1,10),(2,20),(3,30);
+
+--sync_slave_with_master
+select * from t1;
+
+--echo #
+--echo # Verify slave skips running triggers if master ran and logged the row events for triggers
+--echo #
+--connection master
+create table t4(a int, b int);
+delete from tlog;
+create trigger tr4 before insert on t4 for each row insert into tlog values (null);
+insert into t4 values (1, 10),(2, 20);
+select * from tlog;
+
+--sync_slave_with_master
+select * from t4;
+select * from tlog;
+
+# Cleanup
+set global slave_run_triggers_for_rbr = @slave_run_triggers_for_rbr.saved;
+--connection master
+drop table t1, tlog, t4;
+sync_slave_with_master;
+
+--source include/rpl_end.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/rpl_savepoint.cnf b/storage/rocksdb/mysql-test/rocksdb/t/rpl_savepoint.cnf
new file mode 100644
index 00000000000..09a1c853ffc
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/rpl_savepoint.cnf
@@ -0,0 +1 @@
+!include suite/rpl/my.cnf
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/rpl_savepoint.test b/storage/rocksdb/mysql-test/rocksdb/t/rpl_savepoint.test
new file mode 100644
index 00000000000..56d14f92892
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/rpl_savepoint.test
@@ -0,0 +1,91 @@
+--source include/have_rocksdb.inc
+
+source include/have_binlog_format_row.inc;
+source include/master-slave.inc;
+
+connection master;
+--disable_warnings
+drop table if exists t1;
+--enable_warnings
+
+connection master;
+
+create table t1 (id int primary key, value int);
+insert into t1 values (1,1), (2,2), (3,3);
+
+begin;
+insert into t1 values (11, 1);
+savepoint a;
+insert into t1 values (12, 1);
+--error ER_ROLLBACK_TO_SAVEPOINT
+rollback to savepoint a;
+--error ER_ROLLBACK_ONLY
+commit;
+commit;
+select * from t1;
+
+--source include/sync_slave_sql_with_master.inc
+connection slave;
+
+select * from t1;
+
+connection master;
+begin;
+insert into t1 values (21, 1);
+savepoint a;
+insert into t1 values (22, 1);
+--error ER_ROLLBACK_TO_SAVEPOINT
+rollback to savepoint a;
+--error ER_ROLLBACK_ONLY
+insert into t1 values (23, 1);
+--error ER_ROLLBACK_ONLY
+commit;
+commit;
+select * from t1;
+
+--source include/sync_slave_sql_with_master.inc
+connection slave;
+select * from t1;
+
+
+connection master;
+begin;
+insert into t1 values (31, 1);
+savepoint a;
+insert into t1 values (32, 1);
+savepoint b;
+insert into t1 values (33, 1);
+--error ER_ROLLBACK_TO_SAVEPOINT
+rollback to savepoint a;
+--error ER_ROLLBACK_ONLY
+insert into t1 values (34, 1);
+rollback;
+select * from t1;
+
+--source include/sync_slave_sql_with_master.inc
+connection slave;
+select * from t1;
+
+### GitHub Issue#195
+connection master;
+SET autocommit=off;
+select * from t1;
+SAVEPOINT A;
+select * from t1;
+SAVEPOINT A;
+insert into t1 values (35, 35);
+--error ER_ROLLBACK_TO_SAVEPOINT
+ROLLBACK TO SAVEPOINT A;
+--error ER_ROLLBACK_ONLY
+START TRANSACTION;
+select * from t1;
+--source include/sync_slave_sql_with_master.inc
+connection slave;
+select * from t1;
+
+
+connection master;
+drop table t1;
+
+--source include/rpl_end.inc
+
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/rpl_statement.cnf b/storage/rocksdb/mysql-test/rocksdb/t/rpl_statement.cnf
new file mode 100644
index 00000000000..6e5130c1f01
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/rpl_statement.cnf
@@ -0,0 +1,7 @@
+!include suite/rpl/my.cnf
+
+[mysqld.1]
+binlog_format=statement
+[mysqld.2]
+binlog_format=mixed
+rocksdb_lock_wait_timeout=5
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/rpl_statement.test b/storage/rocksdb/mysql-test/rocksdb/t/rpl_statement.test
new file mode 100644
index 00000000000..cb5f5e04b00
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/rpl_statement.test
@@ -0,0 +1,59 @@
+--source include/have_rocksdb.inc
+source include/master-slave.inc;
+
+source include/have_binlog_format_statement.inc;
+
+connection master;
+--disable_warnings
+drop table if exists t1;
+--enable_warnings
+
+connection master;
+
+select @@binlog_format;
+create table t1 (pk int primary key) engine=rocksdb;
+--error ER_BINLOG_STMT_MODE_AND_ROW_ENGINE
+insert into t1 values (1),(2),(3);
+
+set session rocksdb_unsafe_for_binlog=on;
+insert into t1 values (1),(2),(3);
+select * from t1;
+delete from t1;
+set session rocksdb_unsafe_for_binlog=off;
+
+--error ER_BINLOG_STMT_MODE_AND_ROW_ENGINE
+insert into t1 values (1),(2),(3);
+
+set binlog_format=row;
+insert into t1 values (1),(2),(3);
+
+--source include/sync_slave_sql_with_master.inc
+connection slave;
+
+select * from t1;
+
+connection master;
+drop table t1;
+
+create table t1 (id int primary key, value int, value2 int, index(value)) engine=rocksdb;
+insert into t1 values (1,1,1);
+insert into t1 values (2,1,1);
+insert into t1 values (3,1,1);
+insert into t1 values (4,1,1);
+insert into t1 values (5,1,1);
+update t1 set value2=100 where id=1;
+update t1 set value2=200 where id=2;
+update t1 set value2=300 where id=3;
+
+--source include/sync_slave_sql_with_master.inc
+connection slave;
+select * from t1 where id=1;
+select * from t1 where id=2;
+select * from t1 where id=3;
+
+connection master;
+drop table t1;
+set binlog_format=row;
+
+--source include/rpl_end.inc
+
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/rpl_statement_not_found.cnf b/storage/rocksdb/mysql-test/rocksdb/t/rpl_statement_not_found.cnf
new file mode 100644
index 00000000000..470b073d185
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/rpl_statement_not_found.cnf
@@ -0,0 +1,9 @@
+!include suite/rpl/my.cnf
+
+[mysqld.1]
+binlog_format=statement
+rocksdb_unsafe_for_binlog=1
+[mysqld.2]
+binlog_format=row
+slave_parallel_workers=4
+rocksdb_lock_wait_timeout=5
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/rpl_statement_not_found.test b/storage/rocksdb/mysql-test/rocksdb/t/rpl_statement_not_found.test
new file mode 100644
index 00000000000..019e83acf14
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/rpl_statement_not_found.test
@@ -0,0 +1,3 @@
+--source include/have_binlog_format_statement.inc
+--source rpl_row_not_found.inc
+
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/rqg.inc b/storage/rocksdb/mysql-test/rocksdb/t/rqg.inc
new file mode 100644
index 00000000000..40154d9eaa7
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/rqg.inc
@@ -0,0 +1,44 @@
+#
+# Random Query Generator tests
+#
+# Arguments needed to be set by the test when including this one:
+# $TESTDIR : name of sub-directory in conf containing the data/grammar files
+# $GRAMMAR_FILES: space separated list of grammar files
+# $DATA_FILE: name of the data file
+#
+
+let $MYSQL_BASEDIR = `SELECT @@BASEDIR`;
+let RQG_BASE = $MYSQL_BASEDIR/rqg/rqg/common/mariadb-patches;
+let MYSQL_SOCKET = `SELECT @@SOCKET`;
+let GRAMMAR_FILES = $GRAMMAR_FILES;
+let DATA_FILE = $DATA_FILE;
+let TESTDIR = $TESTDIR;
+let $TESTDB = rqg_$TESTDIR;
+let TESTDB = $TESTDB;
+
+--eval CREATE DATABASE IF NOT EXISTS $TESTDB
+
+--perl
+
+$ENV{'RQG_HOME'}=$ENV{'RQG_BASE'};
+foreach $grammar_file (split(/ /, $ENV{'GRAMMAR_FILES'})) {
+
+ # Errors from the gentest.pl file will be captured in the results file
+ my $cmd = "perl $ENV{'RQG_BASE'}/gentest.pl " .
+ "--dsn=dbi:mysql:host=:port=:user=root:database=$ENV{'TESTDB'}" .
+ ":mysql_socket=$ENV{'MYSQL_SOCKET'} " .
+ "--gendata=$ENV{'RQG_BASE'}/conf/$ENV{'TESTDIR'}/$ENV{'DATA_FILE'} " .
+ "--grammar=$ENV{'RQG_BASE'}/conf/$ENV{'TESTDIR'}/$grammar_file " .
+ "--threads=5 --queries=10000 --duration=60 --sqltrace 2>&1 >> " .
+ "$ENV{'MYSQLTEST_VARDIR'}/tmp/$ENV{'TESTDB'}.log";
+
+ print "Running test with grammar file $grammar_file\n";
+ system($cmd);
+ if ($? != 0) {
+ print ("Failure running test! Command executed: $cmd\n");
+ }
+}
+
+EOF
+
+--eval DROP DATABASE $TESTDB
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/rqg_examples-master.opt b/storage/rocksdb/mysql-test/rocksdb/t/rqg_examples-master.opt
new file mode 100644
index 00000000000..5b714857e13
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/rqg_examples-master.opt
@@ -0,0 +1 @@
+--rocksdb_strict_collation_check=0
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/rqg_examples.test b/storage/rocksdb/mysql-test/rocksdb/t/rqg_examples.test
new file mode 100644
index 00000000000..80bae00424b
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/rqg_examples.test
@@ -0,0 +1,12 @@
+--source include/have_rocksdb.inc
+# Don't run the RQG tests with --rpc_protocol because the connections and
+# queries will be coming from Perl where we don't have any ability to
+# specify the query attributes needed for the RPC protocol.
+--source include/not_rpc_protocol.inc
+
+# RQG's examples test
+let $TESTDIR = examples;
+let $GRAMMAR_FILES = example.yy;
+let $DATA_FILE = example.zz;
+
+--source rqg.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/rqg_runtime-master.opt b/storage/rocksdb/mysql-test/rocksdb/t/rqg_runtime-master.opt
new file mode 100644
index 00000000000..f494273892c
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/rqg_runtime-master.opt
@@ -0,0 +1 @@
+--rocksdb_strict_collation_check=0 --secure-file-priv=/tmp
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/rqg_runtime.test b/storage/rocksdb/mysql-test/rocksdb/t/rqg_runtime.test
new file mode 100644
index 00000000000..2e560c86c62
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/rqg_runtime.test
@@ -0,0 +1,58 @@
+--source include/have_rocksdb.inc
+# Don't run the RQG tests with --rpc_protocol because the connections and
+# queries will be coming from Perl where we don't have any ability to
+# specify the query attributes needed for the RPC protocol.
+--source include/not_rpc_protocol.inc
+
+call mtr.add_suppression("Did not write failed ");
+call mtr.add_suppression("Can't open and lock privilege tables");
+call mtr.add_suppression("Attempt to delete the trigger file");
+
+SET @ORIG_EVENT_SCHEDULER = @@EVENT_SCHEDULER;
+
+# mysql.user and mysql.tables_priv are modified by the
+# tests, so they need to be restored to the original
+# state.
+--disable_warnings
+CREATE TABLE mysql.user_temp LIKE mysql.user;
+INSERT mysql.user_temp SELECT * FROM mysql.user;
+CREATE TABLE mysql.tables_priv_temp LIKE mysql.tables_priv;
+INSERT mysql.tables_priv_temp SELECT * FROM mysql.tables_priv_temp;
+--enable_warnings
+
+# RQG's runtime test
+let $TESTDIR = runtime;
+
+let $GRAMMAR_FILES = alter_online.yy;
+let $DATA_FILE = alter_online.zz;
+
+--source rqg.inc
+
+let $GRAMMAR_FILES = concurrency_1.yy;
+let $DATA_FILE = concurrency_1.zz;
+
+--source rqg.inc
+
+let $GRAMMAR_FILES = connect_kill_sql.yy;
+let $DATA_FILE = connect_kill_data.zz;
+
+--source rqg.inc
+
+let $GRAMMAR_FILES = metadata_stability.yy;
+let $DATA_FILE = metadata_stability.zz;
+
+--source rqg.inc
+
+--disable_warnings
+DELETE FROM mysql.tables_priv;
+DELETE FROM mysql.user;
+INSERT mysql.user SELECT * FROM mysql.user_temp;
+INSERT mysql.tables_priv SELECT * FROM mysql.tables_priv_temp;
+DROP TABLE mysql.user_temp;
+DROP TABLE mysql.tables_priv_temp;
+DROP TABLE IF EXISTS test.executors;
+DROP DATABASE IF EXISTS testdb_N;
+DROP DATABASE IF EXISTS testdb_S;
+--enable_warnings
+
+SET GLOBAL EVENT_SCHEDULER = @ORIG_EVENT_SCHEDULER;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/rqg_transactions-master.opt b/storage/rocksdb/mysql-test/rocksdb/t/rqg_transactions-master.opt
new file mode 100644
index 00000000000..5b714857e13
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/rqg_transactions-master.opt
@@ -0,0 +1 @@
+--rocksdb_strict_collation_check=0
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/rqg_transactions.test b/storage/rocksdb/mysql-test/rocksdb/t/rqg_transactions.test
new file mode 100644
index 00000000000..383b9aed39f
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/rqg_transactions.test
@@ -0,0 +1,14 @@
+--source include/have_rocksdb.inc
+# Don't run the RQG tests with --rpc_protocol because the connections and
+# queries will be coming from Perl where we don't have any ability to
+# specify the query attributes needed for the RPC protocol.
+--source include/not_rpc_protocol.inc
+
+call mtr.add_suppression("Deadlock found when trying to get lock");
+
+# RQG's transactions test
+let $TESTDIR = transactions;
+let $GRAMMAR_FILES = transactions.yy repeatable_read.yy transaction_durability.yy transactions-flat.yy combinations.yy repeatable_read.yy transaction_durability.yy transactions-flat.yy;
+let $DATA_FILE = transactions.zz;
+
+--source rqg.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/se-innodb.out b/storage/rocksdb/mysql-test/rocksdb/t/se-innodb.out
new file mode 100644
index 00000000000..406e5066132
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/se-innodb.out
@@ -0,0 +1 @@
+Can't open perl script "./mtr": No such file or directory
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/secondary_key_update_lock.test b/storage/rocksdb/mysql-test/rocksdb/t/secondary_key_update_lock.test
new file mode 100644
index 00000000000..b43a32b6a0d
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/secondary_key_update_lock.test
@@ -0,0 +1,26 @@
+--source include/have_rocksdb.inc
+
+# Tests if locks are held for the secondary keys of old rows in updates
+
+connect (con, localhost, root,,);
+
+connection default;
+create table t1 (a int primary key, b int unique key) engine = rocksdb;
+insert into t1 values(1, 1);
+
+connection con;
+begin;
+update t1 set b = 2 where b = 1; # this should lock the row where b = 1
+
+connection default;
+error ER_LOCK_WAIT_TIMEOUT;
+insert into t1 values(2, 1); # should error out with lock_wait_timeout
+
+connection con;
+rollback;
+select * from t1;
+
+# Cleanup
+connection default;
+drop table t1;
+disconnect con;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/select.test b/storage/rocksdb/mysql-test/rocksdb/t/select.test
new file mode 100644
index 00000000000..3d9bdc7b4b8
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/select.test
@@ -0,0 +1,202 @@
+--source include/have_rocksdb.inc
+
+#
+# Basic SELECT statements
+#
+
+--disable_warnings
+DROP TABLE IF EXISTS t1, t2;
+--enable_warnings
+
+CREATE TABLE t1 (a INT, b CHAR(8), pk INT AUTO_INCREMENT PRIMARY KEY) ENGINE=rocksdb;
+INSERT INTO t1 (a,b) VALUES (100,'foobar'),(1,'z'),(200,'bar');
+
+CREATE TABLE t2 (a INT, b CHAR(8), pk INT AUTO_INCREMENT PRIMARY KEY) ENGINE=rocksdb;
+INSERT INTO t2 (a,b) SELECT a, b FROM t1;
+INSERT INTO t1 (a,b) SELECT a, b FROM t2;
+
+--sorted_result
+SELECT * FROM t1;
+
+# Modifiers
+
+--sorted_result
+SELECT DISTINCT a FROM t1;
+
+--sorted_result
+SELECT ALL b, a FROM t1;
+
+# Optimizer and cache directives should not have any visible effect here,
+# but we will add them for completness
+
+--sorted_result
+SELECT STRAIGHT_JOIN SQL_CACHE t1.* FROM t2, t1 WHERE t1.a <> t2.a;
+
+--sorted_result
+SELECT SQL_SMALL_RESULT SQL_NO_CACHE t1.a FROM t1, t2;
+
+--sorted_result
+SELECT SQL_BIG_RESULT SQL_CALC_FOUND_ROWS DISTINCT(t2.a)
+ FROM t1 t1_1, t2, t1 t1_2;
+SELECT FOUND_ROWS();
+
+SET @save_query_cache=@@global.query_cache_size;
+SET GLOBAL query_cache_size = 1024*1024;
+--sorted_result
+SELECT SQL_CACHE * FROM t1, t2;
+SET @@global.query_cache_size=@save_query_cache;
+
+# Combination of main clauses
+
+--sorted_result
+SELECT a+10 AS field1, CONCAT(b,':',b) AS field2 FROM t1
+WHERE b > 'b' AND a IS NOT NULL
+GROUP BY 2 DESC, field1 ASC
+HAVING field1 < 1000
+ORDER BY field2, 1 DESC, field1*2
+LIMIT 5 OFFSET 1;
+
+# ROLLUP
+--sorted_result
+SELECT SUM(a), MAX(a), b FROM t1 GROUP BY b WITH ROLLUP;
+
+# Procedure
+
+--sorted_result
+SELECT * FROM t2 WHERE a>0 PROCEDURE ANALYSE();
+
+# SELECT INTO
+let $datadir = `SELECT @@datadir`;
+
+--replace_result $datadir <DATADIR>
+eval
+SELECT t1.a, t2.b FROM t2, t1 WHERE t1.a = t2.a ORDER BY t2.b, t1.a
+ INTO OUTFILE '$datadir/select.out'
+ CHARACTER SET utf8
+ FIELDS TERMINATED BY ',' OPTIONALLY ENCLOSED BY '''';
+--cat_file $datadir/select.out
+--remove_file $datadir/select.out
+
+--replace_result $datadir <DATADIR>
+--error ER_TOO_MANY_ROWS
+eval
+SELECT t1.a, t2.b FROM t2, t1 WHERE t1.a = t2.a ORDER BY t2.b, t1.a
+ INTO DUMPFILE '$datadir/select.dump';
+--remove_file $datadir/select.dump
+--replace_result $datadir <DATADIR>
+eval
+SELECT t1.*, t2.* FROM t1, t2 ORDER BY t2.b, t1.a, t2.a, t1.b, t1.pk, t2.pk LIMIT 1
+ INTO DUMPFILE '$datadir/select.dump';
+
+--cat_file $datadir/select.dump
+--echo
+--remove_file $datadir/select.dump
+
+SELECT MIN(a), MAX(a) FROM t1 INTO @min, @max;
+SELECT @min, @max;
+
+# Joins
+
+--sorted_result
+SELECT t1_1.*, t2.* FROM t2, t1 AS t1_1, t1 AS t1_2
+ WHERE t1_1.a = t1_2.a AND t2.a = t1_1.a;
+
+--sorted_result
+SELECT alias1.* FROM ( SELECT a,b FROM t1 ) alias1, t2 WHERE t2.a IN (100,200);
+
+--sorted_result
+SELECT t1.a FROM { OJ t1 LEFT OUTER JOIN t2 ON t1.a = t2.a+10 };
+
+--sorted_result
+SELECT t1.* FROM t2 INNER JOIN t1;
+
+--sorted_result
+SELECT t1_2.* FROM t1 t1_1 CROSS JOIN t1 t1_2 ON t1_1.b = t1_2.b;
+
+--sorted_result
+SELECT t1.a, t2.b FROM t2 STRAIGHT_JOIN t1 WHERE t1.b > t2.b;
+
+SELECT t1.a, t2.b FROM t2 STRAIGHT_JOIN t1 ON t1.b > t2.b ORDER BY t1.a, t2.b;
+
+SELECT t2.* FROM t1 LEFT JOIN t2 USING (a) ORDER BY t2.a, t2.b LIMIT 1;
+
+--sorted_result
+SELECT t2.* FROM t2 LEFT OUTER JOIN t1 ON t1.a = t2.a WHERE t1.a IS NOT NULL;
+
+SELECT SUM(t2.a) FROM t1 RIGHT JOIN t2 ON t2.b = t1.b;
+
+SELECT MIN(t2.a) FROM t1 RIGHT OUTER JOIN t2 USING (b,a);
+
+--sorted_result
+SELECT alias.b FROM t1 NATURAL JOIN ( SELECT a,b FROM t1 ) alias WHERE b > '';
+
+--sorted_result
+SELECT t2.b FROM ( SELECT a,b FROM t1 ) alias NATURAL LEFT JOIN t2 WHERE b IS NOT NULL;
+
+--sorted_result
+SELECT t1.*, t2.* FROM t1 NATURAL LEFT OUTER JOIN t2;
+
+--sorted_result
+SELECT t2_2.* FROM t2 t2_1 NATURAL RIGHT JOIN t2 t2_2 WHERE t2_1.a IN ( SELECT a FROM t1 );
+
+--sorted_result
+SELECT t1_2.b FROM t1 t1_1 NATURAL RIGHT OUTER JOIN t1 t1_2 INNER JOIN t2;
+
+# Subquery as scalar operand, subquery in the FROM clause
+
+--sorted_result
+SELECT ( SELECT MIN(a) FROM ( SELECT a,b FROM t1 ) alias1 ) AS min_a FROM t2;
+
+# Comparison using subqueries
+
+--sorted_result
+SELECT a,b FROM t2 WHERE a = ( SELECT MIN(a) FROM t1 );
+
+--sorted_result
+SELECT a,b FROM t2 WHERE b LIKE ( SELECT b FROM t1 ORDER BY b LIMIT 1 );
+
+# Subquery with IN, correlated subquery
+
+--sorted_result
+SELECT t2.* FROM t1 t1_outer, t2 WHERE ( t1_outer.a, t2.b ) IN ( SELECT a, b FROM t2 WHERE a = t1_outer.a );
+
+# Subquery with ANY, ALL
+
+--sorted_result
+SELECT a,b FROM t2 WHERE b = ANY ( SELECT b FROM t1 WHERE a > 1 );
+
+--sorted_result
+SELECT a,b FROM t2 WHERE b > ALL ( SELECT b FROM t1 WHERE b < 'foo' );
+
+# Row subqueries
+
+--sorted_result
+SELECT a,b FROM t1 WHERE ROW(a, b) = ( SELECT a, b FROM t2 ORDER BY a, b LIMIT 1 );
+
+# Subquery with EXISTS
+
+--sorted_result
+SELECT a,b FROM t1 WHERE EXISTS ( SELECT a,b FROM t2 WHERE t2.b > t1.b );
+
+# Subquery in ORDER BY
+
+--sorted_result
+SELECT t1.* FROM t1, t2 ORDER BY ( SELECT b FROM t1 WHERE a IS NULL ORDER BY b LIMIT 1 ) DESC;
+
+# Subquery in HAVING
+
+--sorted_result
+SELECT a, b FROM t1 HAVING a IN ( SELECT a FROM t2 WHERE b = t1.b );
+
+# Union
+
+--sorted_result
+SELECT a,b FROM t1 UNION SELECT a,b FROM t2 UNION DISTINCT SELECT a,b FROM t1;
+
+--sorted_result
+SELECT a,b FROM t1 UNION SELECT a,b FROM t2 UNION ALL SELECT a,b FROM t1;
+
+
+# Cleanup
+DROP TABLE t1, t2;
+
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/select_for_update.test b/storage/rocksdb/mysql-test/rocksdb/t/select_for_update.test
new file mode 100644
index 00000000000..14fdfb7896c
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/select_for_update.test
@@ -0,0 +1,55 @@
+--source include/have_rocksdb.inc
+
+#
+# SELECT .. FOR UPDATE
+#
+
+--disable_warnings
+DROP TABLE IF EXISTS t1;
+--enable_warnings
+
+--enable_connect_log
+
+--source include/count_sessions.inc
+
+CREATE TABLE t1 (a INT, b CHAR(8), pk INT AUTO_INCREMENT PRIMARY KEY) ENGINE=rocksdb;
+INSERT INTO t1 (a,b) VALUES (1,'a'),(2,'b'),(3,'a');
+
+--connect (con1,localhost,root,,)
+BEGIN;
+--sorted_result
+SELECT a,b FROM t1 WHERE b='a' FOR UPDATE;
+
+--connection default
+SET lock_wait_timeout = 1;
+
+# Should still be able to select
+
+--sorted_result
+SELECT a,b FROM t1 WHERE b='a';
+
+# ... but not with LOCK IN SHARE MODE
+
+--sorted_result
+--error ER_LOCK_WAIT_TIMEOUT
+SELECT a,b FROM t1 WHERE b='a' LOCK IN SHARE MODE;
+
+--error ER_LOCK_WAIT_TIMEOUT
+UPDATE t1 SET b='c' WHERE b='a';
+
+--connection con1
+COMMIT;
+--sorted_result
+SELECT a,b FROM t1;
+
+--disconnect con1
+--connection default
+# Now it can be updated all right
+UPDATE t1 SET b='c' WHERE b='a';
+--sorted_result
+SELECT a,b FROM t1;
+
+DROP TABLE t1;
+
+--source include/wait_until_count_sessions.inc
+
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/select_for_update_skip_locked_nowait.test b/storage/rocksdb/mysql-test/rocksdb/t/select_for_update_skip_locked_nowait.test
new file mode 100644
index 00000000000..c6ebbfa3f01
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/select_for_update_skip_locked_nowait.test
@@ -0,0 +1,48 @@
+--source include/have_rocksdb.inc
+
+##############################################################################
+## SKIP LOCKED | NOWAIT are *not* supported for SELECT...FOR UPDATE in RocksDB
+
+--disable_warnings
+drop table if exists t1;
+--enable_warnings
+
+create table t1 (a int primary key) engine=rocksdb;
+
+insert into t1 values (1), (2), (3);
+
+### SKIP LOCKED
+
+--echo Should succeed since no table gets involved
+select 1 for update skip locked;
+
+--error ER_NO_SUCH_TABLE
+select * from nonexistence for update skip locked;
+
+--error ER_ILLEGAL_HA
+select * from t1 for update skip locked;
+
+--error ER_ILLEGAL_HA
+select * from t1 where a > 1 and a < 3 for update skip locked;
+
+--error ER_ILLEGAL_HA
+insert into t1 select * from t1 for update skip locked;
+
+### NOWAIT
+
+--echo Should succeed since no table gets involved
+select 1 for update nowait;
+
+--error ER_NO_SUCH_TABLE
+select * from nonexistence for update nowait;
+
+--error ER_ILLEGAL_HA
+select * from t1 for update nowait;
+
+--error ER_ILLEGAL_HA
+select * from t1 where a > 1 and a < 3 for update nowait;
+
+--error ER_ILLEGAL_HA
+insert into t1 select * from t1 for update nowait;
+
+drop table t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/select_lock_in_share_mode.test b/storage/rocksdb/mysql-test/rocksdb/t/select_lock_in_share_mode.test
new file mode 100644
index 00000000000..23ce6d45234
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/select_lock_in_share_mode.test
@@ -0,0 +1,54 @@
+--source include/have_rocksdb.inc
+
+#
+# SELECT .. LOCK IN SHARE MODE
+#
+# If the engine has its own lock timeouts,
+# it makes sense to set them to minimum to decrease
+# the duration of the test.
+
+--disable_warnings
+DROP TABLE IF EXISTS t1;
+--enable_warnings
+
+--enable_connect_log
+
+--source include/count_sessions.inc
+
+CREATE TABLE t1 (a INT, b CHAR(8), pk INT AUTO_INCREMENT PRIMARY KEY) ENGINE=rocksdb;
+INSERT INTO t1 (a,b) VALUES (1,'a'),(2,'b'),(3,'a');
+
+--connect (con1,localhost,root,,)
+BEGIN;
+--sorted_result
+SELECT a,b FROM t1 WHERE b='a' LOCK IN SHARE MODE;
+
+--connection default
+SET lock_wait_timeout = 1;
+
+# Should still be able to select
+
+--sorted_result
+SELECT a,b FROM t1 WHERE b='a';
+--sorted_result
+SELECT a,b FROM t1 WHERE b='a' LOCK IN SHARE MODE;
+
+--error ER_LOCK_WAIT_TIMEOUT
+UPDATE t1 SET b='c' WHERE b='a';
+
+--connection con1
+COMMIT;
+--sorted_result
+SELECT a,b FROM t1;
+
+--disconnect con1
+--connection default
+# Now it can be updated all right
+UPDATE t1 SET b='c' WHERE b='a';
+--sorted_result
+SELECT a,b FROM t1;
+
+DROP TABLE t1;
+
+--source include/wait_until_count_sessions.inc
+
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/set_checkpoint.inc b/storage/rocksdb/mysql-test/rocksdb/t/set_checkpoint.inc
new file mode 100644
index 00000000000..aae9db6c927
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/set_checkpoint.inc
@@ -0,0 +1,30 @@
+# Usage:
+# let $checkpoint = <value to set the checkpoint>;
+# let $succeeds = <1 if checkpoint creation should succeed, 0 otherwise>;
+# --source set_checkpoint.inc
+
+
+if ($succeeds)
+{
+ # Create checkpoint
+ --replace_result '$checkpoint' [CHECKPOINT]
+ eval SET GLOBAL ROCKSDB_CREATE_CHECKPOINT = '$checkpoint';
+
+ # Check checkpoint
+ --list_files $checkpoint CURRENT
+
+ # Cleanup
+ --remove_files_wildcard $checkpoint *
+ --rmdir $checkpoint
+ --disable_abort_on_error
+ --enable_abort_on_error
+}
+if (!$succeeds)
+{
+ --disable_result_log
+ --disable_query_log
+ --error ER_GET_ERRMSG
+ eval SET GLOBAL ROCKSDB_CREATE_CHECKPOINT = '$checkpoint';
+ --enable_query_log
+ --enable_result_log
+}
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/show_engine.test b/storage/rocksdb/mysql-test/rocksdb/t/show_engine.test
new file mode 100644
index 00000000000..ccca197d317
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/show_engine.test
@@ -0,0 +1,103 @@
+--source include/have_rocksdb.inc
+--source include/have_partition.inc
+
+--source include/restart_mysqld.inc
+
+#
+# SHOW ENGINE STATUS command
+# Checking that the command doesn't produce an error.
+# If it starts producing an actual result, the result file
+# will need to be updated, and possibly masked.
+
+--disable_warnings
+DROP TABLE IF EXISTS t1;
+DROP TABLE IF EXISTS t2;
+DROP TABLE IF EXISTS t3;
+DROP TABLE IF EXISTS t4;
+--enable_warnings
+
+CREATE TABLE t1 (i INT, PRIMARY KEY (i) COMMENT 'cf_t1') ENGINE = ROCKSDB;
+CREATE TABLE t2 (j INT, PRIMARY KEY (j) COMMENT 'rev:cf_t2') ENGINE = ROCKSDB;
+CREATE TABLE t3 (k INT, PRIMARY KEY (k) COMMENT 'cf_t1') ENGINE = ROCKSDB;
+
+# With partition based column family creation we now expect all the partitions
+# to belong to a default column family because mapping wasn't specified in
+# this case.
+CREATE TABLE t4 (l INT, PRIMARY KEY (l) COMMENT 'cf_t4') ENGINE = ROCKSDB
+ PARTITION BY KEY(l) PARTITIONS 4;
+
+SET @save.rocksdb_max_background_jobs= @@global.rocksdb_max_background_jobs;
+SET GLOBAL rocksdb_max_background_jobs= 1;
+
+INSERT INTO t1 VALUES (1), (2), (3);
+SELECT COUNT(*) FROM t1;
+
+INSERT INTO t2 VALUES (1), (2), (3), (4);
+SELECT COUNT(*) FROM t2;
+
+INSERT INTO t4 VALUES (1), (2), (3), (4), (5);
+SELECT COUNT(*) FROM t4;
+
+SET GLOBAL rocksdb_force_flush_memtable_now=1;
+SET GLOBAL rocksdb_compact_cf="cf_t1";
+--replace_column 2 # 3 #
+SHOW ENGINE rocksdb STATUS;
+
+# Fetch data from information schema as well
+--replace_column 3 #
+SELECT * FROM INFORMATION_SCHEMA.ROCKSDB_CFSTATS;
+
+--replace_column 2 #
+SELECT * FROM INFORMATION_SCHEMA.ROCKSDB_DBSTATS;
+
+SELECT TABLE_SCHEMA, TABLE_NAME, PARTITION_NAME, COUNT(STAT_TYPE)
+FROM INFORMATION_SCHEMA.ROCKSDB_PERF_CONTEXT
+WHERE TABLE_SCHEMA = 'test'
+GROUP BY TABLE_NAME, PARTITION_NAME;
+
+--replace_column 3 #
+SELECT * FROM INFORMATION_SCHEMA.ROCKSDB_CF_OPTIONS;
+
+DROP TABLE t1;
+DROP TABLE t2;
+DROP TABLE t3;
+DROP TABLE t4;
+
+SHOW ENGINE rocksdb MUTEX;
+# For SHOW ALL MUTEX even the number of lines is volatile, so the result logging is disabled
+--disable_result_log
+SHOW ENGINE ALL MUTEX;
+--enable_result_log
+
+# The output from SHOW ENGINE ROCKSDB TRANSACTION STATUS has some
+# non-deterministic results. Replace the timestamp with 'TIMESTAMP', the
+# number of seconds active with 'NUM', the thread id with 'TID' and the thread
+# pointer with 'PTR'. This test may fail in the future if it is being run in
+# parallel with other tests as the number of snapshots would then be greater
+# than expected. We may need to turn off the result log if that is the case.
+--replace_regex /[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}/TIMESTAMP/ /(ACTIVE) [0-9]+ /\1 NUM / /(thread id) [0-9]+/\1 TID/ /0x[0-9a-f]+/PTR/
+SHOW ENGINE rocksdb TRANSACTION STATUS;
+
+START TRANSACTION WITH CONSISTENT SNAPSHOT;
+
+#select sleep(10);
+--replace_regex /[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}/TIMESTAMP/ /(ACTIVE) [0-9]+ /\1 NUM / /(thread id) [0-9]+/\1 TID/ /0x[0-9a-f]+/PTR/ /(query id) [0-9]+/\1 QID/ /(root) [a-z ]+/\1 ACTION/
+SHOW ENGINE rocksdb TRANSACTION STATUS;
+
+ROLLBACK;
+
+# Check if explicit snapshots are correctly populated
+START TRANSACTION WITH SHARED ROCKSDB SNAPSHOT;
+--replace_column 2 # 3 #
+SHOW ENGINE rocksdb STATUS;
+ROLLBACK;
+CREATE EXPLICIT rocksdb SNAPSHOT;
+--replace_column 2 # 3 #
+SHOW ENGINE rocksdb STATUS;
+RELEASE EXPLICIT rocksdb SNAPSHOT;
+--replace_column 2 # 3 #
+SHOW ENGINE rocksdb STATUS;
+
+
+# Restore old values
+SET GLOBAL rocksdb_max_background_jobs= @save.rocksdb_max_background_jobs;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/show_table_status-master.opt b/storage/rocksdb/mysql-test/rocksdb/t/show_table_status-master.opt
new file mode 100644
index 00000000000..843f7012cfa
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/show_table_status-master.opt
@@ -0,0 +1,3 @@
+--rocksdb_debug_optimizer_n_rows=1000
+--rocksdb_table_stats_sampling_pct=100
+--userstat=ON
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/show_table_status.test b/storage/rocksdb/mysql-test/rocksdb/t/show_table_status.test
new file mode 100644
index 00000000000..a293b9ee6b8
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/show_table_status.test
@@ -0,0 +1,65 @@
+--source include/have_rocksdb.inc
+--source include/not_windows.inc # decorated database names is too long, exceeded OS limits
+
+#
+# SHOW TABLE STATUS statement
+#
+
+###################################
+# TODO:
+# The result file is likely to change
+# if MDEV-4197 is fixed
+###################################
+
+--disable_warnings
+DROP TABLE IF EXISTS t1, t2, t3;
+--enable_warnings
+
+CREATE TABLE t1 (a INT, b CHAR(8) PRIMARY KEY) ENGINE=rocksdb;
+INSERT INTO t1 (a,b) VALUES (100,'a'),(2,'foo');
+
+CREATE TABLE t2 (a INT PRIMARY KEY, b CHAR(8)) ENGINE=rocksdb;
+INSERT INTO t2 (a,b) VALUES (1,'bar');
+
+set global rocksdb_force_flush_memtable_now = true;
+
+CREATE TABLE t3 (a INT, b CHAR(8), pk INT PRIMARY KEY) ENGINE=rocksdb CHARACTER SET utf8;
+
+--replace_column 6 # 7 #
+SHOW TABLE STATUS WHERE name IN ( 't1', 't2', 't3' );
+
+# Some statistics don't get updated as quickly. The Data_length and
+# Avg_row_length are trailing statistics, meaning they don't get updated
+# for the current SST until the next SST is written. Insert a bunch of data,
+# then flush, then insert a bit more and do another flush to get them to show
+# up.
+
+--disable_query_log
+let $count = 2;
+let $max = 10000;
+while ($count < $max) {
+ eval INSERT INTO t2 (a) VALUES ($count);
+ inc $count;
+}
+
+set global rocksdb_force_flush_memtable_now = true;
+eval INSERT INTO t2 (a) VALUES ($max);
+set global rocksdb_force_flush_memtable_now = true;
+--enable_query_log
+
+# We expect the number of rows to be 10000. Data_len and Avg_row_len
+# may vary, depending on built-in compression library.
+--replace_column 6 # 7 #
+SHOW TABLE STATUS WHERE name LIKE 't2';
+DROP TABLE t1, t2, t3;
+
+#
+# Confirm that long db and table names work.
+#
+
+CREATE DATABASE `db_new..............................................end`;
+USE `db_new..............................................end`;
+CREATE TABLE `t1_new..............................................end`(a int) engine=rocksdb;
+INSERT INTO `t1_new..............................................end` VALUES (1);
+--query_vertical SELECT TABLE_SCHEMA, TABLE_NAME FROM information_schema.table_statistics WHERE TABLE_NAME = 't1_new..............................................end'
+DROP DATABASE `db_new..............................................end`;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/shutdown-master.opt b/storage/rocksdb/mysql-test/rocksdb/t/shutdown-master.opt
new file mode 100644
index 00000000000..d6c7939eae6
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/shutdown-master.opt
@@ -0,0 +1 @@
+--log-bin --binlog_format=row --rocksdb_default_cf_options=write_buffer_size=64k
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/shutdown.test b/storage/rocksdb/mysql-test/rocksdb/t/shutdown.test
new file mode 100644
index 00000000000..ba625deb514
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/shutdown.test
@@ -0,0 +1,36 @@
+--source include/have_rocksdb.inc
+
+--disable_warnings
+DROP TABLE IF EXISTS t1;
+--enable_warnings
+
+# Ensure bin log is enabled.
+SHOW GLOBAL VARIABLES LIKE "log_bin";
+
+# Create the table and insert some keys
+CREATE TABLE t1 (i INT, PRIMARY KEY (i) COMMENT 'cf_t1') ENGINE = ROCKSDB;
+
+--disable_query_log
+let $max = 1000;
+let $i = 1;
+while ($i <= $max) {
+ let $insert = INSERT INTO t1 VALUES ($i);
+ inc $i;
+ eval $insert;
+}
+--enable_query_log
+
+# Restart the server
+let $restart_file= $MYSQLTEST_VARDIR/tmp/mysqld.1.expect;
+--exec echo "wait" > $restart_file
+--shutdown_server 10
+--source include/wait_until_disconnected.inc
+-- exec echo "restart" > $MYSQLTEST_VARDIR/tmp/mysqld.1.expect
+-- enable_reconnect
+-- source include/wait_until_connected_again.inc
+
+# Verify table has correct rows
+SELECT COUNT(*) FROM t1;
+
+#cleanup
+DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/singledelete-master.opt b/storage/rocksdb/mysql-test/rocksdb/t/singledelete-master.opt
new file mode 100644
index 00000000000..a3d2d07ec79
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/singledelete-master.opt
@@ -0,0 +1 @@
+--rocksdb_default_cf_options=write_buffer_size=16k --rocksdb_strict_collation_check=0
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/singledelete.test b/storage/rocksdb/mysql-test/rocksdb/t/singledelete.test
new file mode 100644
index 00000000000..5a9d17e0255
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/singledelete.test
@@ -0,0 +1,105 @@
+--source include/have_rocksdb.inc
+
+# only SingleDelete increases
+CREATE TABLE t1 (id INT, value int, PRIMARY KEY (id), INDEX (value)) ENGINE=RocksDB;
+INSERT INTO t1 VALUES (1,1);
+select variable_value into @s from information_schema.global_status where variable_name='rocksdb_number_sst_entry_singledelete';
+select variable_value into @d from information_schema.global_status where variable_name='rocksdb_number_sst_entry_delete';
+--disable_query_log
+let $i = 1;
+while ($i <= 10000) {
+ let $update = UPDATE t1 SET value=value+1 WHERE value=$i;
+ inc $i;
+ eval $update;
+}
+--enable_query_log
+optimize table t1;
+select case when variable_value-@s > 5 and variable_value-@s < 100 then 'true' else 'false' end from information_schema.global_status where variable_name='rocksdb_number_sst_entry_singledelete';
+select case when variable_value-@d < 10 then 'true' else 'false' end from information_schema.global_status where variable_name='rocksdb_number_sst_entry_delete';
+
+
+# both SingleDelete and Delete increases
+CREATE TABLE t2 (id INT, value int, PRIMARY KEY (id), INDEX (value)) ENGINE=RocksDB;
+INSERT INTO t2 VALUES (1,1);
+select variable_value into @s from information_schema.global_status where variable_name='rocksdb_number_sst_entry_singledelete';
+select variable_value into @d from information_schema.global_status where variable_name='rocksdb_number_sst_entry_delete';
+--disable_query_log
+let $i = 1;
+while ($i <= 10000) {
+ let $update = UPDATE t2 SET id=id+1 WHERE id=$i;
+ inc $i;
+ eval $update;
+}
+--enable_query_log
+optimize table t2;
+select case when variable_value-@s > 5 and variable_value-@s < 100 then 'true' else 'false' end from information_schema.global_status where variable_name='rocksdb_number_sst_entry_singledelete';
+select case when variable_value-@d > 9000 then 'true' else 'false' end from information_schema.global_status where variable_name='rocksdb_number_sst_entry_delete';
+
+# only Delete increases
+CREATE TABLE t3 (id INT, value int, PRIMARY KEY (id)) ENGINE=RocksDB;
+INSERT INTO t3 VALUES (1,1);
+select variable_value into @s from information_schema.global_status where variable_name='rocksdb_number_sst_entry_singledelete';
+select variable_value into @d from information_schema.global_status where variable_name='rocksdb_number_sst_entry_delete';
+--disable_query_log
+let $i = 1;
+while ($i <= 10000) {
+ let $update = UPDATE t3 SET id=id+1 WHERE id=$i;
+ inc $i;
+ eval $update;
+}
+--enable_query_log
+optimize table t3;
+select case when variable_value-@s = 0 then 'true' else 'false' end from information_schema.global_status where variable_name='rocksdb_number_sst_entry_singledelete';
+select case when variable_value-@d > 9000 then 'true' else 'false' end from information_schema.global_status where variable_name='rocksdb_number_sst_entry_delete';
+
+# only SingleDelete increases
+CREATE TABLE t4 (id INT, PRIMARY KEY (id)) ENGINE=RocksDB;
+INSERT INTO t4 VALUES (1);
+select variable_value into @s from information_schema.global_status where variable_name='rocksdb_number_sst_entry_singledelete';
+select variable_value into @d from information_schema.global_status where variable_name='rocksdb_number_sst_entry_delete';
+--disable_query_log
+let $i = 1;
+while ($i <= 10000) {
+ let $update = UPDATE t4 SET id=id+1 WHERE id=$i;
+ inc $i;
+ eval $update;
+}
+--enable_query_log
+optimize table t4;
+select case when variable_value-@s > 5 and variable_value-@s < 100 then 'true' else 'false' end from information_schema.global_status where variable_name='rocksdb_number_sst_entry_singledelete';
+select case when variable_value-@d < 10 then 'true' else 'false' end from information_schema.global_status where variable_name='rocksdb_number_sst_entry_delete';
+
+# only SingleDelete increases
+CREATE TABLE t5 (id1 INT, id2 INT, PRIMARY KEY (id1, id2), INDEX(id2)) ENGINE=RocksDB;
+INSERT INTO t5 VALUES (1, 1);
+select variable_value into @s from information_schema.global_status where variable_name='rocksdb_number_sst_entry_singledelete';
+select variable_value into @d from information_schema.global_status where variable_name='rocksdb_number_sst_entry_delete';
+--disable_query_log
+let $i = 1;
+while ($i <= 10000) {
+ let $update = UPDATE t5 SET id1=id1+1 WHERE id1=$i;
+ inc $i;
+ eval $update;
+}
+--enable_query_log
+optimize table t5;
+select case when variable_value-@s > 5 and variable_value-@s < 100 then 'true' else 'false' end from information_schema.global_status where variable_name='rocksdb_number_sst_entry_singledelete';
+select case when variable_value-@d < 10 then 'true' else 'false' end from information_schema.global_status where variable_name='rocksdb_number_sst_entry_delete';
+
+# SingleDelete used for PK. Verify old PK is always deleted.
+CREATE TABLE t6 (
+ pk VARCHAR(64) COLLATE latin1_swedish_ci PRIMARY KEY
+) ENGINE=RocksDB;
+INSERT INTO t6 VALUES ('a');
+SET GLOBAL rocksdb_force_flush_memtable_now=1;
+SELECT * FROM t6;
+UPDATE t6 SET pk='A' WHERE pk='a';
+SELECT * FROM t6;
+DELETE FROM t6 where pk='A';
+--echo SELECT should return nothing;
+SELECT * FROM t6;
+SET GLOBAL rocksdb_force_flush_memtable_now=1;
+--echo SELECT should return nothing;
+SELECT * FROM t6;
+
+DROP TABLE t1, t2, t3, t4, t5, t6;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/skip_core_dump_on_error-master.opt b/storage/rocksdb/mysql-test/rocksdb/t/skip_core_dump_on_error-master.opt
new file mode 100644
index 00000000000..c07b063f07c
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/skip_core_dump_on_error-master.opt
@@ -0,0 +1 @@
+--rocksdb_strict_collation_check=off --binlog_format=row --log-bin
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/skip_core_dump_on_error.test b/storage/rocksdb/mysql-test/rocksdb/t/skip_core_dump_on_error.test
new file mode 100644
index 00000000000..451eed057ac
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/skip_core_dump_on_error.test
@@ -0,0 +1,53 @@
+--source include/have_rocksdb.inc
+--source include/have_debug.inc
+--source include/not_valgrind.inc
+
+--enable_connect_log
+--enable_info
+
+# setup search pattern and file (new log error file)
+--let SEARCH_FILE=$MYSQLTEST_VARDIR/tmp/skip_core_dump_on_error.err
+
+# restart the server with the custom error log file
+--let $_mysqld_option=--log-error=$SEARCH_FILE --default-storage-engine=rocksdb
+--replace_result $MYSQLTEST_VARDIR MYSQLTEST_VARDIR
+--source include/restart_mysqld_with_option.inc
+
+# setup
+create table mz(c int);
+insert into mz values(1);
+commit;
+
+# simulate a write error
+SET debug= '+d,abort_with_io_write_error';
+
+# we want to abort server if we fail to write (ABORT_SERVER)
+set global binlog_error_action=1;
+
+# diplay the values of the key parameters
+show session variables like 'debug';
+show global variables like 'binlog_error_action';
+show global variables like 'skip_core_dump_on_error';
+
+--echo # crash_during_update
+# tell client that crash is expected
+--error 1598
+# run an update to trigger a write error
+update mz set c=13;
+
+# should find server abort (prints: Pattern "..." found)
+--echo # server aborted
+--let SEARCH_PATTERN=mysqld got signal 6
+--source include/search_pattern.inc
+
+# should not find a core dump (prints: Pattern "..." not found)
+--echo # but no core written
+--let SEARCH_PATTERN=Writing a core file
+--source include/search_pattern.inc
+
+--let _$mysqld_option=
+--source include/start_mysqld.inc
+--remove_file $SEARCH_FILE
+
+# tidy up
+drop table mz;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/skip_validate_tmp_table.test b/storage/rocksdb/mysql-test/rocksdb/t/skip_validate_tmp_table.test
new file mode 100644
index 00000000000..c4321462dfd
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/skip_validate_tmp_table.test
@@ -0,0 +1,39 @@
+--source include/have_rocksdb.inc
+--source include/have_debug.inc
+
+--let $_server_id= `SELECT @@server_id`
+
+create table t1 (pk int primary key) engine=rocksdb;
+
+# Create a .frm file without a matching table
+--exec cp $MYSQLTEST_VARDIR/mysqld.$_server_id/data/test/t1.frm $MYSQLTEST_VARDIR/mysqld.$_server_id/data/test/t1#sql-test.frm
+
+--source include/restart_mysqld.inc
+
+show tables;
+
+# MariaDB produces a warning:
+call mtr.add_suppression('Invalid .old.. table or database name .t1#sql-test.');
+
+# This will append '#sql-test' to the end of new name
+set session debug_dbug="+d,gen_sql_table_name";
+rename table t1 to t2;
+set session debug_dbug= "-d,gen_sql_table_name";
+
+show tables;
+
+# Remove the corresponding .frm files
+--remove_files_wildcard $MYSQLTEST_VARDIR/mysqld.$_server_id/data/test *t1*.frm
+--remove_files_wildcard $MYSQLTEST_VARDIR/mysqld.$_server_id/data/test *t2*.frm
+
+# Restart the server with a table registered in RocksDB but does not have a .frm file
+--source include/restart_mysqld.inc
+
+show tables;
+
+# try to recreate a table with the same name
+create table t2 (pk int primary key) engine=rocksdb;
+
+show tables;
+
+drop table t2;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/slow_query_log-master.opt b/storage/rocksdb/mysql-test/rocksdb/t/slow_query_log-master.opt
new file mode 100644
index 00000000000..fc5c3ed4c7a
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/slow_query_log-master.opt
@@ -0,0 +1 @@
+--log-slow-extra --rocksdb-perf-context-level=2
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/slow_query_log.test b/storage/rocksdb/mysql-test/rocksdb/t/slow_query_log.test
new file mode 100644
index 00000000000..9f1694ab8bd
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/slow_query_log.test
@@ -0,0 +1,37 @@
+--source include/have_rocksdb.inc
+#Unixism (exec awk)
+-- source include/not_windows.inc
+
+SET @cur_long_query_time = @@long_query_time;
+# Set the long query time to something big so that nothing unexpected gets into it
+SET @@long_query_time = 600;
+# Test the slow query log feature
+
+--disable_warnings
+DROP TABLE IF EXISTS t1;
+--enable_warnings
+
+CREATE TABLE t1 (id INT PRIMARY KEY, value INT) ENGINE=ROCKSDB;
+
+--disable_query_log
+let $max = 10000;
+let $i = 1;
+while ($i < $max) {
+ let $insert = INSERT INTO t1 VALUES ($i, $i);
+ inc $i;
+ eval $insert;
+}
+
+DELETE FROM t1 WHERE id < 2500;
+--enable_query_log
+
+SET @@long_query_time = 0;
+# we expect this query to be reflected in the slow query log
+SELECT COUNT(*) FROM t1;
+
+SET @@long_query_time = @cur_long_query_time;
+
+# Verify the output of the slow query log contains counts for the skipped keys
+--exec awk -f suite/rocksdb/slow_query_log.awk $MYSQLTEST_VARDIR/mysqld.1/mysqld-slow.log
+
+DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/statistics-master.opt b/storage/rocksdb/mysql-test/rocksdb/t/statistics-master.opt
new file mode 100644
index 00000000000..8a56deb0299
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/statistics-master.opt
@@ -0,0 +1,3 @@
+--rocksdb_default_cf_options=max_write_buffer_number_to_maintain=10
+--rocksdb_debug_optimizer_n_rows=1000
+--rocksdb_table_stats_sampling_pct=100
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/statistics.test b/storage/rocksdb/mysql-test/rocksdb/t/statistics.test
new file mode 100644
index 00000000000..70fc2f72b7e
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/statistics.test
@@ -0,0 +1,74 @@
+--source include/have_rocksdb.inc
+
+--disable_warnings
+DROP TABLE IF EXISTS t1;
+DROP TABLE IF EXISTS t2;
+DROP TABLE IF EXISTS t3;
+--enable_warnings
+
+# table with index in default CF
+create table t1(
+ id bigint not null primary key auto_increment,
+ a varchar(255) not null,
+ b bigint,
+ index t1_1(b)
+) engine=rocksdb;
+
+# a table with index in a different CF
+create table t2(
+ id bigint not null primary key auto_increment,
+ a varchar(255) not null,
+ b bigint,
+ index t2_1(b) comment 'cf_t3'
+) engine=rocksdb;
+
+# a table wint index in a reverse CF
+create table t3(
+ id bigint not null primary key auto_increment,
+ a varchar(255) not null,
+ b bigint,
+ index t3_1(b) comment 'rev:cf_t4'
+) engine=rocksdb;
+
+--disable_query_log
+let $i=0;
+while ($i<100000)
+{
+ inc $i;
+ eval insert t1(a,b) values(concat('a',$i,'b',$i,'c',$i), $i);
+ if ($i<5000)
+ {
+ eval insert t2(a,b) values(concat('a',$i,'b',$i,'c',$i), $i);
+ eval insert t3(a,b) values(concat('a',$i,'b',$i,'c',$i), $i);
+ }
+}
+--enable_query_log
+
+# should have some statistics before the memtable flush
+SELECT table_name, table_rows FROM information_schema.tables WHERE table_schema = DATABASE() and table_name <> 't1';
+
+# due to inconsistencies in when the memtable is flushed, just verify t1 has fewer
+# than the expected number of rows.
+SELECT CASE WHEN table_rows < 100000 then 'true' else 'false' end from information_schema.tables where table_name = 't1';
+
+# flush and get even better statistics
+set global rocksdb_force_flush_memtable_now = true;
+SELECT table_name, table_rows FROM information_schema.tables WHERE table_schema = DATABASE();
+SELECT table_name, data_length>0, index_length>0 FROM information_schema.tables WHERE table_schema = DATABASE();
+
+# restart the server, check the stats
+--source include/restart_mysqld.inc
+
+# give the server a chance to load in statistics
+--sleep 5
+
+SELECT table_name, table_rows FROM information_schema.tables WHERE table_schema = DATABASE();
+SELECT table_name, data_length>0, index_length>0 FROM information_schema.tables WHERE table_schema = DATABASE();
+
+analyze table t1,t2,t3,t4,t5;
+
+# make sure that stats do not change after calling analyze table
+SELECT table_name, table_rows FROM information_schema.tables WHERE table_schema = DATABASE();
+SELECT table_name, data_length>0, index_length>0 FROM information_schema.tables WHERE table_schema = DATABASE();
+
+drop table t1, t2, t3;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/table_stats-master.opt b/storage/rocksdb/mysql-test/rocksdb/t/table_stats-master.opt
new file mode 100644
index 00000000000..be8a06eacae
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/table_stats-master.opt
@@ -0,0 +1 @@
+--userstat
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/table_stats.test b/storage/rocksdb/mysql-test/rocksdb/t/table_stats.test
new file mode 100644
index 00000000000..3eb58098372
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/table_stats.test
@@ -0,0 +1,29 @@
+--source include/have_rocksdb.inc
+
+--disable_warnings
+DROP TABLE IF EXISTS t1;
+--enable_warnings
+
+# Create the table and insert some keys
+CREATE TABLE t1 (i INT, PRIMARY KEY (i) COMMENT 'cf_t1') ENGINE = ROCKSDB;
+
+--disable_query_log
+let $max = 1000;
+let $i = 1;
+while ($i <= $max) {
+ let $insert = INSERT INTO t1 VALUES ($i);
+ inc $i;
+ eval $insert;
+}
+--enable_query_log
+
+# Verify table has correct rows
+SELECT COUNT(*) FROM t1;
+
+# Verify the table stats are returned
+--vertical_results
+SELECT * FROM INFORMATION_SCHEMA.TABLE_STATISTICS WHERE TABLE_NAME = "t1";
+--horizontal_results
+
+#cleanup
+DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/tbl_opt_ai.test b/storage/rocksdb/mysql-test/rocksdb/t/tbl_opt_ai.test
new file mode 100644
index 00000000000..8fb4539b401
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/tbl_opt_ai.test
@@ -0,0 +1,29 @@
+--source include/have_rocksdb.inc
+
+#
+# Check whether AUTO_INCREMENT option
+# is supported in CREATE and ALTER TABLE
+#
+
+--disable_warnings
+DROP TABLE IF EXISTS t1;
+--enable_warnings
+
+CREATE TABLE t1 (a INT AUTO_INCREMENT PRIMARY KEY) ENGINE=rocksdb AUTO_INCREMENT=10;
+SHOW CREATE TABLE t1;
+INSERT INTO t1 VALUES (NULL);
+SELECT * FROM t1;
+
+ALTER TABLE t1 AUTO_INCREMENT=100;
+SHOW CREATE TABLE t1;
+INSERT INTO t1 VALUES (NULL);
+SELECT * FROM t1 ORDER BY a;
+
+ALTER TABLE t1 AUTO_INCREMENT=50;
+SHOW CREATE TABLE t1;
+INSERT INTO t1 VALUES (NULL);
+SELECT * FROM t1 ORDER BY a;
+
+DROP TABLE t1;
+
+
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/tbl_opt_avg_row_length.test b/storage/rocksdb/mysql-test/rocksdb/t/tbl_opt_avg_row_length.test
new file mode 100644
index 00000000000..3e6797a8686
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/tbl_opt_avg_row_length.test
@@ -0,0 +1,23 @@
+--source include/have_rocksdb.inc
+
+#
+# Check whether AVG_ROW_LENGTH option
+# is supported in CREATE and ALTER TABLE
+#
+# Note: the test does not check whether the option
+# has any real effect on the table, only
+# that it's accepted
+#
+
+--disable_warnings
+DROP TABLE IF EXISTS t1;
+--enable_warnings
+
+CREATE TABLE t1 (a INT PRIMARY KEY, b CHAR(8)) ENGINE=rocksdb AVG_ROW_LENGTH=300;
+SHOW CREATE TABLE t1;
+
+ALTER TABLE t1 AVG_ROW_LENGTH=30000000;
+SHOW CREATE TABLE t1;
+
+DROP TABLE t1;
+
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/tbl_opt_checksum.test b/storage/rocksdb/mysql-test/rocksdb/t/tbl_opt_checksum.test
new file mode 100644
index 00000000000..3b49b967937
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/tbl_opt_checksum.test
@@ -0,0 +1,19 @@
+--source include/have_rocksdb.inc
+
+#
+# Check whether CHECKSUM option is supported
+# in CREATE and ALTER TABLE.
+#
+
+--disable_warnings
+DROP TABLE IF EXISTS t1;
+--enable_warnings
+
+CREATE TABLE t1 (a INT PRIMARY KEY, b CHAR(8)) ENGINE=rocksdb CHECKSUM=1;
+SHOW CREATE TABLE t1;
+
+ALTER TABLE t1 CHECKSUM=0;
+SHOW CREATE TABLE t1;
+
+DROP TABLE t1;
+
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/tbl_opt_connection.test b/storage/rocksdb/mysql-test/rocksdb/t/tbl_opt_connection.test
new file mode 100644
index 00000000000..b97b3dd9d4c
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/tbl_opt_connection.test
@@ -0,0 +1,32 @@
+--source include/have_rocksdb.inc
+
+#
+# Check whether CONNECTION option is supported
+# is supported in CREATE and ALTER TABLE
+#
+# Note: the test does not check whether the option
+# has any real effect on the table, only
+# that it's accepted
+#
+
+--disable_warnings
+DROP TABLE IF EXISTS t1;
+--enable_warnings
+
+CREATE DATABASE test_remote;
+CREATE SERVER test_connection FOREIGN DATA WRAPPER mysql
+OPTIONS (USER 'root', HOST 'localhost', DATABASE 'test_remote');
+CREATE SERVER test_connection2 FOREIGN DATA WRAPPER mysql
+OPTIONS (USER 'root', HOST 'localhost', DATABASE 'test_remote');
+
+CREATE TABLE t1 (a INT PRIMARY KEY, b CHAR(8)) ENGINE=rocksdb CONNECTION='test_connection';
+SHOW CREATE TABLE t1;
+ALTER TABLE t1 CONNECTION='test_connection2';
+SHOW CREATE TABLE t1;
+
+DROP TABLE t1;
+
+DROP SERVER test_connection;
+DROP SERVER test_connection2;
+DROP DATABASE test_remote;
+
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/tbl_opt_data_index_dir.test b/storage/rocksdb/mysql-test/rocksdb/t/tbl_opt_data_index_dir.test
new file mode 100644
index 00000000000..6fcfd491af1
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/tbl_opt_data_index_dir.test
@@ -0,0 +1,60 @@
+--source include/have_rocksdb.inc
+
+--source include/have_partition.inc
+--source include/not_windows.inc
+
+#
+# Check that when either DATA DIRECTORY or INDEX DIRECTORY are specified
+# then MyRocks returns an appropriate error. We don't support this
+# functionality and therefore shouldn't just silently accept the values.
+#
+
+--disable_warnings
+DROP TABLE IF EXISTS t1;
+--enable_warnings
+
+# On a Mac, strerror() prints "Unknown error: nn", as
+# opposed to "Unknown error nn" on Linux/etc.
+# Replacing 'error:' with 'error' below to make the output uniform.
+
+--replace_result error: error
+--error ER_CANT_CREATE_TABLE
+CREATE TABLE t1 (a INT PRIMARY KEY, b CHAR(8)) ENGINE=rocksdb DATA DIRECTORY = '/foo/bar/data';
+--replace_result error: error
+show warnings;
+
+--replace_result error: error
+--error ER_CANT_CREATE_TABLE
+CREATE TABLE t1 (a INT PRIMARY KEY, b CHAR(8)) ENGINE=rocksdb INDEX DIRECTORY = '/foo/bar/index';
+--replace_result error: error
+show warnings;
+
+#
+# Verify that we'll get the same error codes when using the partitions.
+#
+
+--replace_result error: error
+--error ER_CANT_CREATE_TABLE
+CREATE TABLE t1 (id INT NOT NULL PRIMARY KEY) ENGINE=rocksdb PARTITION BY RANGE (id)
+(
+ PARTITION P0 VALUES LESS THAN (1000)
+ DATA DIRECTORY = '/foo/bar/data/',
+ PARTITION P1 VALUES LESS THAN (2000)
+ DATA DIRECTORY = '/foo/bar/data/',
+ PARTITION P2 VALUES LESS THAN (MAXVALUE)
+);
+--replace_result error: error
+show warnings;
+
+--replace_result error: error
+--error ER_CANT_CREATE_TABLE
+CREATE TABLE t1 (id int not null primary key) ENGINE=rocksdb PARTITION BY RANGE (id)
+(
+ PARTITION P0 VALUES LESS THAN (1000)
+ INDEX DIRECTORY = '/foo/bar/data/',
+ PARTITION P1 VALUES LESS THAN (2000)
+ INDEX DIRECTORY = '/foo/bar/data/',
+ PARTITION P2 VALUES LESS THAN (MAXVALUE)
+);
+--replace_result error: error
+show warnings;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/tbl_opt_delay_key_write.test b/storage/rocksdb/mysql-test/rocksdb/t/tbl_opt_delay_key_write.test
new file mode 100644
index 00000000000..85cd45e969d
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/tbl_opt_delay_key_write.test
@@ -0,0 +1,23 @@
+--source include/have_rocksdb.inc
+
+#
+# Check whether DELAY_KEY_WRITE option
+# is supported in CREATE and ALTER TABLE
+#
+# Note: the test does not check whether the option
+# has any real effect on the table, only
+# that it's accepted
+#
+
+--disable_warnings
+DROP TABLE IF EXISTS t1;
+--enable_warnings
+
+CREATE TABLE t1 (a INT PRIMARY KEY, b CHAR(8)) ENGINE=rocksdb DELAY_KEY_WRITE=1;
+SHOW CREATE TABLE t1;
+
+ALTER TABLE t1 DELAY_KEY_WRITE=0;
+SHOW CREATE TABLE t1;
+
+DROP TABLE t1;
+
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/tbl_opt_insert_method.test b/storage/rocksdb/mysql-test/rocksdb/t/tbl_opt_insert_method.test
new file mode 100644
index 00000000000..e289827ac72
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/tbl_opt_insert_method.test
@@ -0,0 +1,23 @@
+--source include/have_rocksdb.inc
+
+#
+# Check whether INSERT_METHOD option
+# is supported in CREATE and ALTER TABLE
+#
+# Note: the test does not check whether the option
+# has any real effect on the table, only
+# that it's accepted (and apparently ignored)
+#
+
+--disable_warnings
+DROP TABLE IF EXISTS t1;
+--enable_warnings
+
+CREATE TABLE t1 (a INT PRIMARY KEY, b CHAR(8)) ENGINE=rocksdb INSERT_METHOD=FIRST;
+SHOW CREATE TABLE t1;
+
+ALTER TABLE t1 INSERT_METHOD=NO;
+SHOW CREATE TABLE t1;
+
+DROP TABLE t1;
+
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/tbl_opt_key_block_size.test b/storage/rocksdb/mysql-test/rocksdb/t/tbl_opt_key_block_size.test
new file mode 100644
index 00000000000..d927c785ae9
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/tbl_opt_key_block_size.test
@@ -0,0 +1,23 @@
+--source include/have_rocksdb.inc
+
+#
+# Check whether KEY_BLOCK_SIZE option
+# is supported in CREATE and ALTER TABLE
+#
+# Note: the test does not check whether the option
+# has any real effect on the table, only
+# that it's accepted
+#
+
+--disable_warnings
+DROP TABLE IF EXISTS t1;
+--enable_warnings
+
+CREATE TABLE t1 (a INT PRIMARY KEY, b CHAR(8)) ENGINE=rocksdb KEY_BLOCK_SIZE=8;
+SHOW CREATE TABLE t1;
+
+ALTER TABLE t1 KEY_BLOCK_SIZE=1;
+SHOW CREATE TABLE t1;
+
+DROP TABLE t1;
+
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/tbl_opt_max_rows.test b/storage/rocksdb/mysql-test/rocksdb/t/tbl_opt_max_rows.test
new file mode 100644
index 00000000000..35aa0f4dafa
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/tbl_opt_max_rows.test
@@ -0,0 +1,23 @@
+--source include/have_rocksdb.inc
+
+#
+# Check whether MAX_ROWS option
+# is supported in CREATE and ALTER TABLE
+#
+# Note: the test does not check whether the option
+# has any real effect on the table, only
+# that it's accepted
+#
+
+--disable_warnings
+DROP TABLE IF EXISTS t1;
+--enable_warnings
+
+CREATE TABLE t1 (a INT PRIMARY KEY, b CHAR(8)) ENGINE=rocksdb MAX_ROWS=10000000;
+SHOW CREATE TABLE t1;
+
+ALTER TABLE t1 MAX_ROWS=30000000;
+SHOW CREATE TABLE t1;
+
+DROP TABLE t1;
+
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/tbl_opt_min_rows.test b/storage/rocksdb/mysql-test/rocksdb/t/tbl_opt_min_rows.test
new file mode 100644
index 00000000000..d62a8771ea3
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/tbl_opt_min_rows.test
@@ -0,0 +1,23 @@
+--source include/have_rocksdb.inc
+
+#
+# Check whether MIN_ROWS option
+# is supported in CREATE and ALTER TABLE
+#
+# Note: the test does not check whether the option
+# has any real effect on the table, only
+# that it's accepted
+#
+
+--disable_warnings
+DROP TABLE IF EXISTS t1;
+--enable_warnings
+
+CREATE TABLE t1 (a INT PRIMARY KEY, b CHAR(8)) ENGINE=rocksdb MIN_ROWS=1;
+SHOW CREATE TABLE t1;
+
+ALTER TABLE t1 MIN_ROWS=10000;
+SHOW CREATE TABLE t1;
+
+DROP TABLE t1;
+
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/tbl_opt_pack_keys.test b/storage/rocksdb/mysql-test/rocksdb/t/tbl_opt_pack_keys.test
new file mode 100644
index 00000000000..acdb612b4d4
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/tbl_opt_pack_keys.test
@@ -0,0 +1,23 @@
+--source include/have_rocksdb.inc
+
+#
+# Check whether PACK KEYS option
+# is supported in CREATE and ALTER TABLE
+#
+# Note: the test does not check whether the option
+# has any real effect on the table, only
+# that it's accepted
+#
+
+--disable_warnings
+DROP TABLE IF EXISTS t1;
+--enable_warnings
+
+CREATE TABLE t1 (a INT PRIMARY KEY, b CHAR(8)) ENGINE=rocksdb PACK_KEYS=1;
+SHOW CREATE TABLE t1;
+
+ALTER TABLE t1 PACK_KEYS=0;
+SHOW CREATE TABLE t1;
+
+DROP TABLE t1;
+
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/tbl_opt_password.test b/storage/rocksdb/mysql-test/rocksdb/t/tbl_opt_password.test
new file mode 100644
index 00000000000..e897992e933
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/tbl_opt_password.test
@@ -0,0 +1,27 @@
+--source include/have_rocksdb.inc
+
+#
+# Check whether PASSWORD option
+# is supported in CREATE and ALTER TABLE
+#
+# Note: the test does not check whether the option
+# has any real effect on the table, only
+# that it's accepted
+#
+# This option is not supported by any known engines,
+# that's why the result file does not contain it;
+# but it's syntactically acceptable.
+#
+
+--disable_warnings
+DROP TABLE IF EXISTS t1;
+--enable_warnings
+
+CREATE TABLE t1 (a INT PRIMARY KEY, b CHAR(8)) ENGINE=rocksdb PASSWORD='password';
+SHOW CREATE TABLE t1;
+
+ALTER TABLE t1 PASSWORD='new_password';
+SHOW CREATE TABLE t1;
+
+DROP TABLE t1;
+
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/tbl_opt_row_format.test b/storage/rocksdb/mysql-test/rocksdb/t/tbl_opt_row_format.test
new file mode 100644
index 00000000000..de834d238ef
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/tbl_opt_row_format.test
@@ -0,0 +1,23 @@
+--source include/have_rocksdb.inc
+
+#
+# Check whether ROW_FORMAT option
+# is supported in CREATE and ALTER TABLE
+#
+# Note: the test does not check whether the option
+# has any real effect on the table, only
+# that it's accepted
+#
+
+--disable_warnings
+DROP TABLE IF EXISTS t1;
+--enable_warnings
+
+CREATE TABLE t1 (a INT, b CHAR(8) PRIMARY KEY) ENGINE=rocksdb ROW_FORMAT=FIXED;
+SHOW CREATE TABLE t1;
+
+ALTER TABLE t1 ROW_FORMAT=DYNAMIC;
+SHOW CREATE TABLE t1;
+
+DROP TABLE t1;
+
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/tbl_opt_union.test b/storage/rocksdb/mysql-test/rocksdb/t/tbl_opt_union.test
new file mode 100644
index 00000000000..d3c371b18c7
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/tbl_opt_union.test
@@ -0,0 +1,28 @@
+--source include/have_rocksdb.inc
+
+#
+# Check whether UNION option
+# is supported in CREATE and ALTER TABLE
+#
+# Note: the test does not check whether the option
+# has any real effect on the table, only
+# that it's accepted
+#
+
+--disable_warnings
+DROP TABLE IF EXISTS t1, child1, child2;
+--enable_warnings
+
+--disable_query_log
+CREATE TABLE child1 (a INT PRIMARY KEY) ENGINE=MyISAM;
+CREATE TABLE child2 (a INT PRIMARY KEY) ENGINE=MyISAM;
+--enable_query_log
+
+CREATE TABLE t1 (a INT PRIMARY KEY) ENGINE=rocksdb UNION(child1);
+SHOW CREATE TABLE t1;
+
+ALTER TABLE t1 UNION = (child1,child2);
+SHOW CREATE TABLE t1;
+
+DROP TABLE t1, child1, child2;
+
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/tbl_standard_opts.test b/storage/rocksdb/mysql-test/rocksdb/t/tbl_standard_opts.test
new file mode 100644
index 00000000000..5d60c02a7e6
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/tbl_standard_opts.test
@@ -0,0 +1,42 @@
+--source include/have_rocksdb.inc
+
+#
+# Standard options in CREATE and ALTER TABLE
+#
+# Note: the test does not check whether the options
+# have any real effect on the table, only
+# that they are accepted
+#
+
+--disable_warnings
+DROP TABLE IF EXISTS t1;
+--enable_warnings
+
+# Create table with standard options
+
+CREATE TABLE t1 (a INT PRIMARY KEY, b CHAR(8)) ENGINE=rocksdb
+ DEFAULT CHARACTER SET = utf8
+ COLLATE = utf8_general_ci
+ COMMENT = 'standard table options'
+;
+SHOW CREATE TABLE t1;
+
+# Alter comment
+
+ALTER TABLE t1 COMMENT = 'table altered';
+SHOW CREATE TABLE t1;
+
+# Alter ENGINE value
+
+ALTER TABLE t1 ENGINE=MEMORY;
+SHOW CREATE TABLE t1;
+ALTER TABLE t1 ENGINE=rocksdb;
+SHOW CREATE TABLE t1;
+
+# Alter character set and collation
+
+ALTER TABLE t1 CHARACTER SET = latin1 COLLATE = latin1_swedish_ci;
+SHOW CREATE TABLE t1;
+
+DROP TABLE t1;
+
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/tmpdir.test b/storage/rocksdb/mysql-test/rocksdb/t/tmpdir.test
new file mode 100644
index 00000000000..28615264d35
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/tmpdir.test
@@ -0,0 +1,35 @@
+--source include/have_rocksdb.inc
+
+--echo # If rocksdb_tmpdir is NULL or "", temporary file will be created in
+--echo # server configuration variable location(--tmpdir)
+
+CREATE TABLE t1 (a INT PRIMARY KEY, b INT) ENGINE=RocksDB;
+show session variables like 'rocksdb_tmpdir';
+--echo # Connection con1
+connect (con1,localhost,root);
+show session variables like 'rocksdb_tmpdir';
+
+ALTER TABLE t1 ADD INDEX kb(b), ALGORITHM=INPLACE;
+connection default;
+disconnect con1;
+DROP TABLE t1;
+
+--echo # rocksdb_tmpdir with valid location.
+
+CREATE TABLE t1 (a INT PRIMARY KEY, b INT) ENGINE=RocksDB;
+set @tmpdir = @@global.tmpdir;
+set global rocksdb_tmpdir = @tmpdir;
+show session variables like 'rocksdb_tmpdir';
+
+--echo # Connection con3
+connect (con2,localhost,root);
+--replace_result $MYSQL_TMP_DIR MYSQL_TMP_DIR
+show session variables like 'rocksdb_tmpdir';
+
+ALTER TABLE t1 ADD INDEX kb(b), ALGORITHM=INPLACE;
+disconnect con2;
+connection default;
+set global rocksdb_tmpdir=NULL;
+DROP TABLE t1;
+
+
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/transaction.test b/storage/rocksdb/mysql-test/rocksdb/t/transaction.test
new file mode 100644
index 00000000000..129484bac91
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/transaction.test
@@ -0,0 +1,158 @@
+--source include/have_rocksdb.inc
+
+create table t1 (id int primary key, value int, value2 varchar(100), index(value)) engine=rocksdb;
+
+insert into t1 values (1,1,1),(2,2,2),(3,3,3),(4,4,4),(5,5,5),(6,6,6),(8,8,8),(9,9,9),(10,10,10);
+
+# insert
+begin;
+insert into t1 values (11,11,11);
+--source transaction_select.inc
+rollback;
+
+# insert in the middle
+begin;
+insert into t1 values (7,7,7);
+--source transaction_select.inc
+rollback;
+
+# update non-index column by primary key
+begin;
+update t1 set value2=100 where id=1;
+--source transaction_select.inc
+rollback;
+
+# update secondary key by primary key
+begin;
+update t1 set value=100 where id=1;
+--source transaction_select.inc
+rollback;
+
+# update primary key by primary key
+begin;
+update t1 set id=100 where id=1;
+--source transaction_select.inc
+rollback;
+
+# update non-index column key by secondary key
+begin;
+update t1 set value2=100 where value=1;
+--source transaction_select.inc
+rollback;
+
+# update secondary key by secondary key
+begin;
+update t1 set value=100 where value=1;
+--source transaction_select.inc
+rollback;
+
+# update primary key by secondary key
+begin;
+update t1 set id=100 where value=1;
+--source transaction_select.inc
+rollback;
+
+# update non-index column by non-index column
+begin;
+update t1 set value2=100 where value2=1;
+--source transaction_select.inc
+rollback;
+
+# update secondary key by non-index column
+begin;
+update t1 set value=100 where value2=1;
+--source transaction_select.inc
+rollback;
+
+# update primary key column by non-index column
+begin;
+update t1 set id=100 where value2=1;
+--source transaction_select.inc
+rollback;
+
+
+# delete by primary key
+begin;
+delete from t1 where id=1;
+--source transaction_select.inc
+rollback;
+
+# delete by secondary key
+begin;
+delete from t1 where value=1;
+--source transaction_select.inc
+rollback;
+
+# delete by non-index column
+begin;
+delete from t1 where value2=1;
+--source transaction_select.inc
+rollback;
+
+# mixed
+begin;
+insert into t1 values (11,11,11);
+insert into t1 values (12,12,12);
+insert into t1 values (13,13,13);
+delete from t1 where id=9;
+delete from t1 where value=8;
+update t1 set id=100 where value2=5;
+update t1 set value=103 where value=4;
+update t1 set id=115 where id=3;
+--source transaction_select.inc
+rollback;
+
+drop table t1;
+
+--echo #
+--echo # #802: MyRocks: Statement rollback doesnt work correctly for nested statements
+--echo #
+create table t1 (a varchar(100)) engine=rocksdb;
+create table t2(a int) engine=rocksdb;
+insert into t2 values (1), (2);
+
+create table t3(a varchar(100)) engine=rocksdb;
+
+delimiter //;
+create function func() returns varchar(100) deterministic
+begin
+ insert into t3 values ('func-called');
+ set @a= (select a from t2);
+ return 'func-returned';
+end;//
+delimiter ;//
+
+begin;
+--error ER_SUBQUERY_NO_1_ROW
+insert into t1 values (func());
+select * from t1;
+--echo # The following must not produce 'func-called':
+select * from t3;
+
+rollback;
+drop function func;
+drop table t1,t2,t3;
+
+--echo #
+--echo # MDEV-16710: Slave SQL: Could not execute Update_rows_v1 event with RocksDB and triggers
+--echo # Issue#857: MyRocks: Incorrect behavior when multiple statements fail inside a transaction
+--echo #
+CREATE TABLE t1 (a INT PRIMARY KEY) ENGINE=RocksDB;
+INSERT INTO t1 VALUES (1);
+
+CREATE TABLE t2 (b INT PRIMARY KEY) ENGINE=RocksDB;
+
+CREATE TRIGGER tr AFTER INSERT ON t2 FOR EACH ROW INSERT INTO non_existing_table VALUES (NULL);
+
+BEGIN;
+DELETE FROM t1;
+--error 0,ER_NO_SUCH_TABLE
+INSERT INTO t2 VALUES (1);
+--error 0,ER_NO_SUCH_TABLE
+INSERT INTO t2 VALUES (2);
+--echo # Must return empty result:
+SELECT * FROM t1;
+COMMIT;
+
+drop table t1,t2;
+
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/transaction_isolation.inc b/storage/rocksdb/mysql-test/rocksdb/t/transaction_isolation.inc
new file mode 100644
index 00000000000..dbd1d90622f
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/transaction_isolation.inc
@@ -0,0 +1,150 @@
+#
+# Basic check for transaction isolation.
+# The results should be different depending on the isolation level.
+# For some isolation levels, some statements will end with a timeout.
+# If the engine has its own timeout parameters, reduce them to minimum,
+# otherwise the test will take very long.
+# If the timeout value is greater than the testcase-timeout the test is run with,
+# it might fail due to the testcase timeout.
+#
+
+--enable_connect_log
+
+# Save the initial number of concurrent sessions
+--source include/count_sessions.inc
+
+--disable_warnings
+DROP TABLE IF EXISTS t1;
+--enable_warnings
+
+connect (con1,localhost,root,,);
+eval SET SESSION TRANSACTION ISOLATION LEVEL $trx_isolation;
+connect (con2,localhost,root,,);
+eval SET SESSION TRANSACTION ISOLATION LEVEL $trx_isolation;
+
+connection con1;
+
+CREATE TABLE t1 (a INT, pk INT AUTO_INCREMENT PRIMARY KEY) ENGINE=rocksdb;
+
+START TRANSACTION;
+--sorted_result
+SELECT a FROM t1; # First snapshot
+
+connection con2;
+
+BEGIN;
+--error 0,ER_LOCK_WAIT_TIMEOUT
+INSERT INTO t1 (a) VALUES(1);
+
+connection con1;
+--sorted_result
+SELECT a FROM t1; # Second snapshot
+
+connection con2;
+--error 0,ER_LOCK_WAIT_TIMEOUT
+INSERT INTO t1 (a) VALUES (2);
+
+connection con1;
+--sorted_result
+SELECT a FROM t1; # Third snapshot
+
+--error 0,ER_LOCK_WAIT_TIMEOUT
+INSERT INTO t1 (a) SELECT a+100 FROM t1;
+
+--sorted_result
+SELECT a FROM t1;
+
+connection con2;
+--sorted_result
+SELECT a FROM t1; # Inside the transaction
+COMMIT;
+--sorted_result
+SELECT a FROM t1; # Outside the transaction
+
+connection con1;
+--sorted_result
+SELECT a FROM t1; # Inside the transaction
+
+# Note: INSERT .. SELECT might be tricky, for example for InnoDB
+# even with REPEATABLE-READ it works as if it is executed with READ COMMITTED.
+# The test will have a 'logical' result for repeatable read, even although
+# we currently don't have an engine which works this way.
+
+--error 0,ER_LOCK_WAIT_TIMEOUT
+INSERT INTO t1 (a) SELECT a+200 FROM t1;
+
+--sorted_result
+SELECT a FROM t1;
+COMMIT;
+--sorted_result
+SELECT a FROM t1; # Outside the transaction
+
+connection con2;
+--sorted_result
+SELECT a FROM t1; # After both transactions have committed
+
+# Now test with an error in one statement to make sure the snapshots are
+# Held/released when expected
+connection default;
+CREATE TABLE t2 (a INT PRIMARY KEY) ENGINE=rocksdb;
+INSERT INTO t2 (a) VALUES (1);
+COMMIT;
+
+connection con1;
+BEGIN;
+--sorted_result
+SELECT a from t2;
+--error ER_DUP_ENTRY
+INSERT INTO t2 (a) VALUES (1), (3); # failure
+
+connection con2;
+--error 0,ER_LOCK_WAIT_TIMEOUT
+INSERT INTO t2 (a) VALUES (2);
+COMMIT;
+
+connection con1;
+--sorted_result
+SELECT a from t2;
+COMMIT;
+
+connection default;
+disconnect con1;
+disconnect con2;
+DROP TABLE t1;
+DROP TABLE t2;
+
+CREATE TABLE t3 (
+ pk int unsigned PRIMARY KEY,
+ count int unsigned DEFAULT '0'
+) ENGINE=ROCKSDB;
+
+connect (con1,localhost,root,,);
+eval SET SESSION TRANSACTION ISOLATION LEVEL $trx_isolation;
+connect (con2,localhost,root,,);
+eval SET SESSION TRANSACTION ISOLATION LEVEL $trx_isolation;
+
+connection con1;
+BEGIN;
+SELECT * FROM t3;
+
+connection con2;
+BEGIN;
+INSERT INTO t3 (pk) VALUES(1) ON DUPLICATE KEY UPDATE count=count+1;
+COMMIT;
+
+connection con1;
+--error 0,ER_LOCK_DEADLOCK
+INSERT INTO t3 (pk) VALUES(1) ON DUPLICATE KEY UPDATE count=count+1;
+COMMIT;
+
+# count will be 0 for repeatable read (because the last insert failed)
+# and 1 for read committed
+SELECT count FROM t3;
+
+connection default;
+disconnect con1;
+disconnect con2;
+DROP TABLE t3;
+
+--source include/wait_until_count_sessions.inc
+
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/transaction_select.inc b/storage/rocksdb/mysql-test/rocksdb/t/transaction_select.inc
new file mode 100644
index 00000000000..932a450e7c2
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/transaction_select.inc
@@ -0,0 +1,14 @@
+select * from t1 where id=1;
+select * from t1 where value=1;
+select value from t1 where value=1;
+select * from t1 where value2=1;
+select * from t1 where id=5;
+select * from t1 where value=5;
+select value from t1 where value=5;
+select * from t1 where value2=5;
+select * from t1 where id < 3 order by id;
+select * from t1 where value < 3 order by id;
+select value from t1 where value < 3 order by id;
+select * from t1 where value2 < 3 order by id;
+select * from t1 order by id;
+select value from t1 order by id;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/truncate_partition.inc b/storage/rocksdb/mysql-test/rocksdb/t/truncate_partition.inc
new file mode 100644
index 00000000000..2193aa9f1de
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/truncate_partition.inc
@@ -0,0 +1,102 @@
+
+# Truncate table multiple times
+TRUNCATE TABLE t1;
+TRUNCATE TABLE t1;
+TRUNCATE TABLE t1;
+
+# Truncate partition multiple times
+ALTER TABLE t1 TRUNCATE PARTITION p0;
+ALTER TABLE t1 TRUNCATE PARTITION p0;
+ALTER TABLE t1 TRUNCATE PARTITION p0;
+
+# TRUNCATE multiple times
+ALTER TABLE t1 TRUNCATE PARTITION p1;
+ALTER TABLE t1 TRUNCATE PARTITION p1;
+ALTER TABLE t1 TRUNCATE PARTITION p1;
+
+# TRUNCATE multiple times
+ALTER TABLE t1 TRUNCATE PARTITION p2;
+ALTER TABLE t1 TRUNCATE PARTITION p2;
+ALTER TABLE t1 TRUNCATE PARTITION p2;
+
+# TRUNCATE different partition
+ALTER TABLE t1 TRUNCATE PARTITION p1;
+ALTER TABLE t1 TRUNCATE PARTITION p0;
+TRUNCATE TABLE t1;
+ALTER TABLE t1 TRUNCATE PARTITION p2;
+
+# Insert value once and truncate multiple times
+INSERT INTO t1 (a,b) VALUES (1, 1), (2, 4), (3, 8);
+SELECT a,b FROM t1 ORDER BY a;
+
+ALTER TABLE t1 TRUNCATE PARTITION p2;
+SELECT a,b FROM t1 ORDER BY a;
+SELECT a FROM t1 WHERE b > 2;
+SELECT b from t1 where a != 3;
+
+ALTER TABLE t1 TRUNCATE PARTITION p1;
+SELECT a,b FROM t1 ORDER BY b;
+SELECT a FROM t1 WHERE b > 2;
+SELECT b from t1 where a != 3;
+
+ALTER TABLE t1 TRUNCATE PARTITION p0;
+SELECT a,b FROM t1 ORDER BY a;
+
+TRUNCATE TABLE t1;
+SELECT a,b FROM t1;
+
+# Insert value multiple times and truncate multiple times
+INSERT INTO t1 (a,b) VALUES (4, 1), (5, 4), (6, 8);
+--sorted_result
+SELECT a,b FROM t1;
+--sorted_result
+SELECT a FROM t1 WHERE b < 5;
+
+ALTER TABLE t1 TRUNCATE PARTITION p2;
+--sorted_result
+SELECT a,b FROM t1;
+INSERT INTO t1(a,b) VALUES(7, 1);
+--sorted_result
+SELECT b from t1 WHERE a > 2;
+--sorted_result
+SELECT a,b FROM t1;
+
+ALTER TABLE t1 TRUNCATE PARTITION p1;
+--sorted_result
+SELECT a,b FROM t1;
+INSERT INTO t1(a,b) VALUES(8, 4);
+--sorted_result
+SELECT a,b FROM t1;
+--sorted_result
+SELECT b from t1 WHERE a < 9;
+
+ALTER TABLE t1 TRUNCATE PARTITION p0;
+--sorted_result
+SELECT a,b FROM t1;
+INSERT INTO t1(a,b) VALUES(9, 8);
+
+TRUNCATE TABLE t1;
+SELECT a,b FROM t1;
+
+TRUNCATE TABLE t1;
+SELECT a,b FROM t1;
+
+# manual commpact
+set global rocksdb_force_flush_memtable_now = true;
+set global rocksdb_compact_cf = 'default';
+
+--disable_query_log
+let $i = 0;
+while($i < 9)
+{
+ inc $i;
+ eval insert t1 values($i, $i);
+}
+--enable_query_log
+--sorted_result
+SELECT b FROM t1 WHERE a < 5;
+
+TRUNCATE TABLE t1;
+SELECT b FROM t1 WHERE a < 5;
+
+DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/truncate_partition.test b/storage/rocksdb/mysql-test/rocksdb/t/truncate_partition.test
new file mode 100644
index 00000000000..f9a89517e2a
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/truncate_partition.test
@@ -0,0 +1,83 @@
+--source include/have_rocksdb.inc
+--source include/have_partition.inc
+
+#
+# TRUNCATE PARTITION
+#
+
+
+# Hidden Key table
+--echo #
+--echo # table(hidden key)
+--echo #
+CREATE TABLE t1 (
+ a INT,
+ b INT
+) ENGINE=ROCKSDB
+PARTITION BY RANGE (b) (
+ PARTITION p0 VALUES LESS THAN (3),
+ PARTITION p1 VALUES LESS THAN (6),
+ PARTITION p2 VALUES LESS THAN MAXVALUE
+);
+
+--source truncate_partition.inc
+
+
+--echo #
+--echo # table(secondary key)
+--echo #
+CREATE TABLE t1(
+ a INT,
+ b INT,
+ KEY (b)
+) ENGINE=ROCKSDB
+PARTITION BY HASH(a) PARTITIONS 3;
+
+--source truncate_partition.inc
+
+
+--echo #
+--echo # table(primary key, auto increment)
+--echo #
+CREATE TABLE t1(
+ a INT NOT NULL AUTO_INCREMENT,
+ b INT,
+ PRIMARY KEY(a)
+) ENGINE=ROCKSDB
+PARTITION BY KEY() PARTITIONS 3;
+
+--source truncate_partition.inc
+
+
+--echo #
+--echo # table(cf)
+--echo #
+CREATE TABLE t1 (
+ a INT,
+ b INT,
+ PRIMARY KEY (`a`, `b`) COMMENT 'testcomment'
+) ENGINE=ROCKSDB
+ PARTITION BY LIST(a) (
+ PARTITION p0 VALUES IN (1, 4, 7),
+ PARTITION p1 VALUES IN (2, 5, 8),
+ PARTITION p2 VALUES IN (3, 6, 9)
+);
+
+--source truncate_partition.inc
+
+
+--echo #
+--echo # table(reverse cf)
+--echo #
+CREATE TABLE t1 (
+ a INT,
+ b INT,
+ PRIMARY KEY (`a`, `b`) COMMENT 'p0_cfname=rev:foo;p1_cfname=bar;p2_cfname=baz'
+) ENGINE=ROCKSDB
+PARTITION BY LIST(a) (
+ PARTITION p0 VALUES IN (1, 4, 7),
+ PARTITION p1 VALUES IN (2, 5, 8),
+ PARTITION p2 VALUES IN (3, 6, 9)
+);
+
+--source truncate_partition.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/truncate_table.test b/storage/rocksdb/mysql-test/rocksdb/t/truncate_table.test
new file mode 100644
index 00000000000..a61488654a3
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/truncate_table.test
@@ -0,0 +1,74 @@
+--source include/have_rocksdb.inc
+
+#
+# TRUNCATE TABLE
+#
+
+########################################
+# TODO:
+# A part of the test is disabled because
+# HANDLER is not supported. If it ever
+# changes, the test will complain about
+# NOT producing ER_ILLEGAL_HA
+########################################
+
+--disable_warnings
+DROP TABLE IF EXISTS t1;
+--enable_warnings
+
+CREATE TABLE t1 (a INT PRIMARY KEY, b CHAR(8)) ENGINE=rocksdb;
+TRUNCATE TABLE t1;
+INSERT INTO t1 (a,b) VALUES (1,'a'), (2,'b'), (3,'c');
+TRUNCATE TABLE t1;
+SELECT a,b FROM t1;
+DROP TABLE t1;
+
+
+# Truncate resets auto-increment value on the table
+
+CREATE TABLE t1 (a INT KEY AUTO_INCREMENT, c CHAR(8)) ENGINE=rocksdb;
+
+#--replace_column 2 # 3 # 4 # 5 # 6 # 7 # 8 # 9 # 10 # 12 # 13 # 14 # 15 # 16 # 17 # 18 #
+--replace_column 5 # 6 # 7 #
+SHOW TABLE STATUS LIKE 't1';
+
+INSERT INTO t1 (c) VALUES ('a'),('b'),('c');
+#--replace_column 2 # 3 # 4 # 5 # 6 # 7 # 8 # 9 # 10 # 12 # 13 # 14 # 15 # 16 # 17 # 18 #
+--replace_column 5 # 6 # 7 #
+SHOW TABLE STATUS LIKE 't1';
+
+TRUNCATE TABLE t1;
+#--replace_column 2 # 3 # 4 # 5 # 6 # 7 # 8 # 9 # 10 # 12 # 13 # 14 # 15 # 16 # 17 # 18 #
+--replace_column 5 # 6 # 7 #
+SHOW TABLE STATUS LIKE 't1';
+
+INSERT INTO t1 (c) VALUES ('d');
+#--replace_column 2 # 3 # 4 # 5 # 6 # 7 # 8 # 9 # 10 # 12 # 13 # 14 # 15 # 16 # 17 # 18 #
+--replace_column 5 # 6 # 7 #
+SHOW TABLE STATUS LIKE 't1';
+
+--sorted_result
+SELECT a,c FROM t1;
+DROP TABLE t1;
+
+# Truncate closes handlers
+
+CREATE TABLE t1 (a INT PRIMARY KEY, b CHAR(8)) ENGINE=rocksdb;
+INSERT INTO t1 (a,b) VALUES (1,'a'),(2,'b'),(3,'c');
+
+--error ER_ILLEGAL_HA
+HANDLER t1 OPEN AS h1;
+
+--disable_parsing
+
+HANDLER h1 READ FIRST;
+TRUNCATE TABLE t1;
+--error ER_UNKNOWN_TABLE
+HANDLER h1 READ NEXT;
+HANDLER t1 OPEN AS h2;
+HANDLER h2 READ FIRST;
+
+--enable_parsing
+
+DROP TABLE t1;
+
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/truncate_table3-master.opt b/storage/rocksdb/mysql-test/rocksdb/t/truncate_table3-master.opt
new file mode 100644
index 00000000000..a9ebc4ec20b
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/truncate_table3-master.opt
@@ -0,0 +1,2 @@
+--rocksdb_max_subcompactions=1
+--rocksdb_default_cf_options=write_buffer_size=16k;target_file_size_base=16k;level0_slowdown_writes_trigger=-1;level0_stop_writes_trigger=1000;compression_per_level=kNoCompression;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/truncate_table3.test b/storage/rocksdb/mysql-test/rocksdb/t/truncate_table3.test
new file mode 100644
index 00000000000..b3f95f812b3
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/truncate_table3.test
@@ -0,0 +1,5 @@
+--source include/have_rocksdb.inc
+
+-- let $truncate_table = 1
+-- let $drop_table = 0
+-- source drop_table3.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/trx_info.test b/storage/rocksdb/mysql-test/rocksdb/t/trx_info.test
new file mode 100644
index 00000000000..975bed6132c
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/trx_info.test
@@ -0,0 +1,17 @@
+--source include/have_rocksdb.inc
+
+--disable_warnings
+DROP TABLE IF EXISTS t1;
+--enable_warnings
+
+create table t1 (a int) engine=rocksdb;
+insert into t1 values (1);
+insert into t1 values (2);
+
+set autocommit=0;
+select * from t1 for update;
+
+--replace_column 1 _TRX_ID_ 3 _NAME_ 7 _KEY_ 14 _THREAD_ID_
+select * from information_schema.rocksdb_trx;
+
+DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/trx_info_rpl.cnf b/storage/rocksdb/mysql-test/rocksdb/t/trx_info_rpl.cnf
new file mode 100644
index 00000000000..46771b5a67f
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/trx_info_rpl.cnf
@@ -0,0 +1,11 @@
+!include suite/rpl/my.cnf
+
+[mysqld.1]
+binlog_format=row
+loose-enable-rocksdb_trx
+
+[mysqld.2]
+binlog_format=row
+slave_parallel_workers=1
+rpl_skip_tx_api=ON
+loose-enable-rocksdb_trx
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/trx_info_rpl.test b/storage/rocksdb/mysql-test/rocksdb/t/trx_info_rpl.test
new file mode 100644
index 00000000000..91ab266cdc4
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/trx_info_rpl.test
@@ -0,0 +1,44 @@
+--source include/have_rocksdb.inc
+--source include/master-slave.inc
+--source include/have_binlog_format_row.inc
+
+--disable_warnings
+DROP TABLE IF EXISTS t1;
+connection slave;
+--source include/stop_slave.inc
+--enable_warnings
+
+connection master;
+create table t1 (a int, b int, primary key (a), unique key (b)) engine=rocksdb;
+--disable_query_log
+--let $aa= 0
+while ($aa < 1000) {
+ eval insert into t1 values ($aa, $aa);
+ --inc $aa
+}
+--enable_query_log
+
+connection slave;
+show variables like 'rpl_skip_tx_api';
+--source include/start_slave.inc
+
+--let $it=0
+--let $stop=0
+while ($stop != 1) {
+let $count= query_get_value(select count(*) as Value from information_schema.rocksdb_trx, Value, 1);
+ if ($count) {
+ --echo found
+ --let $stop=1
+ }
+
+ if ($it > 1000) {
+ --echo not found
+ --let $stop=1
+ }
+
+ --inc $it
+}
+
+connection master;
+DROP TABLE t1;
+--source include/rpl_end.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/ttl_primary-master.opt b/storage/rocksdb/mysql-test/rocksdb/t/ttl_primary-master.opt
new file mode 100644
index 00000000000..b991f718a33
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/ttl_primary-master.opt
@@ -0,0 +1,2 @@
+--rocksdb_enable_ttl_read_filtering=0
+--rocksdb_default_cf_options=disable_auto_compactions=true
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/ttl_primary.test b/storage/rocksdb/mysql-test/rocksdb/t/ttl_primary.test
new file mode 100644
index 00000000000..38bfb2eef8f
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/ttl_primary.test
@@ -0,0 +1,545 @@
+--source include/have_debug.inc
+--source include/have_rocksdb.inc
+
+# Basic TTL test
+CREATE TABLE t1 (
+`a` binary(8) NOT NULL,
+`b` varbinary(64) NOT NULL,
+`c` varbinary(256) NOT NULL,
+`ts` bigint(20) UNSIGNED NOT NULL,
+`value` mediumblob NOT NULL,
+PRIMARY KEY (`b`,`a`,`c`)
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+COMMENT='ttl_duration=1;ttl_col=ts;';
+
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values ('a', 'b', 'c', UNIX_TIMESTAMP(), 'd');
+INSERT INTO t1 values ('d', 'e', 'f', UNIX_TIMESTAMP(), 'g');
+set global rocksdb_debug_ttl_rec_ts = 0;
+SELECT COUNT(*) FROM t1;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+
+# should have filtered the rows out since ttl is passed in compaction filter
+SELECT COUNT(*) FROM t1;
+DROP TABLE t1;
+
+# column before TTL in value
+CREATE TABLE t1 (
+ a bigint(20) NOT NULL,
+ b int NOT NULL,
+ ts bigint(20) UNSIGNED NOT NULL,
+ c int NOT NULL,
+ PRIMARY KEY (a)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=1;ttl_col=ts;';
+
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values (1, 3, UNIX_TIMESTAMP(), 5);
+INSERT INTO t1 values (2, 4, UNIX_TIMESTAMP(), 6);
+set global rocksdb_debug_ttl_rec_ts = 0;
+SELECT COUNT(*) FROM t1;
+
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+
+# should have filtered the rows out since ttl is passed in compaction filter
+SELECT COUNT(*) FROM t1;
+DROP TABLE t1;
+
+# multi-part PK w/ TTL
+CREATE TABLE t1 (
+ a bigint(20) NOT NULL,
+ b int NOT NULL,
+ c int NOT NULL,
+ ts bigint(20) UNSIGNED NOT NULL,
+ PRIMARY KEY (a,c)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=1;ttl_col=ts;';
+
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values (1, 3, 5, UNIX_TIMESTAMP());
+INSERT INTO t1 values (2, 4, 6, UNIX_TIMESTAMP());
+set global rocksdb_debug_ttl_rec_ts = 0;
+SELECT COUNT(*) FROM t1;
+
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+
+# should have filtered the rows out since ttl is passed in compaction filter
+SELECT COUNT(*) FROM t1;
+DROP TABLE t1;
+
+# multi-part PK w/ TTL
+CREATE TABLE t1 (
+ a bigint(20) NOT NULL,
+ b int NOT NULL,
+ c int NOT NULL,
+ ts bigint(20) UNSIGNED NOT NULL,
+ PRIMARY KEY (a,c)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=1;ttl_col=ts;';
+
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values (1, 3, 5, UNIX_TIMESTAMP());
+INSERT INTO t1 values (2, 4, 6, UNIX_TIMESTAMP());
+set global rocksdb_debug_ttl_rec_ts = 0;
+SELECT COUNT(*) FROM t1;
+
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+
+# should have filtered the rows out since ttl is passed in compaction filter
+SELECT COUNT(*) FROM t1;
+DROP TABLE t1;
+
+# nullable column(s) before TTL
+CREATE TABLE t1 (
+ a bigint(20) NOT NULL,
+ b int,
+ c int,
+ ts bigint(20) UNSIGNED NOT NULL,
+ PRIMARY KEY (a)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=1;ttl_col=ts;';
+
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values (1, NULL, NULL, UNIX_TIMESTAMP());
+INSERT INTO t1 values (2, NULL, NULL, UNIX_TIMESTAMP());
+set global rocksdb_debug_ttl_rec_ts = 0;
+SELECT COUNT(*) FROM t1;
+
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+
+# should have filtered the rows out since ttl is passed in compaction filter
+SELECT COUNT(*) FROM t1;
+DROP TABLE t1;
+
+# variable len columns + null column(s) before TTL
+CREATE TABLE t1 (
+`a` binary(8) NOT NULL,
+`b` varbinary(64),
+`c` varbinary(256),
+`ts` bigint(20) UNSIGNED NOT NULL,
+`value` mediumblob NOT NULL,
+PRIMARY KEY (`a`)
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+COMMENT='ttl_duration=1;ttl_col=ts;';
+
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values ('a', NULL, 'bc', UNIX_TIMESTAMP(), 'd');
+INSERT INTO t1 values ('d', 'efghijk', NULL, UNIX_TIMESTAMP(), 'l');
+set global rocksdb_debug_ttl_rec_ts = 0;
+SELECT COUNT(*) FROM t1;
+
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+
+# should have filtered the rows out since ttl is passed in compaction filter
+SELECT COUNT(*) FROM t1;
+DROP TABLE t1;
+
+# TTL implicitly generated (no ttl column)
+CREATE TABLE t1 (
+ a bigint(20) NOT NULL,
+ b int NOT NULL,
+ c int NOT NULL,
+ PRIMARY KEY (a)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=1;';
+
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values (1, 3, 5);
+INSERT INTO t1 values (2, 4, 6);
+set global rocksdb_debug_ttl_rec_ts = 0;
+SELECT COUNT(*) FROM t1;
+
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+
+# should have filtered the rows out since ttl is passed in compaction filter
+SELECT COUNT(*) FROM t1;
+DROP TABLE t1;
+
+# TTL field as the PK
+CREATE TABLE t1 (
+ a int,
+ ts bigint(20) UNSIGNED NOT NULL,
+ PRIMARY KEY (a, ts)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=5;ttl_col=ts;';
+
+INSERT INTO t1 values (1, UNIX_TIMESTAMP());
+INSERT INTO t1 values (2, UNIX_TIMESTAMP());
+SELECT COUNT(*) FROM t1;
+
+set global rocksdb_debug_ttl_snapshot_ts = -10;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_snapshot_ts = 0;
+# should all still be there..
+SELECT COUNT(*) FROM t1;
+
+set global rocksdb_debug_ttl_snapshot_ts = 10;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_snapshot_ts = 0;
+# should have filtered the rows out since ttl is passed in compaction filter
+SELECT COUNT(*) FROM t1;
+DROP TABLE t1;
+
+
+# TTL field inside multi-part pk
+CREATE TABLE t1 (
+ a bigint(20) NOT NULL,
+ b int NOT NULL,
+ ts bigint(20) UNSIGNED NOT NULL,
+ c int NOT NULL,
+ PRIMARY KEY (a, ts)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=1;ttl_col=ts;';
+
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values (1, 3, UNIX_TIMESTAMP(), 5);
+INSERT INTO t1 values (2, 4, UNIX_TIMESTAMP(), 6);
+set global rocksdb_debug_ttl_rec_ts = 0;
+SELECT COUNT(*) FROM t1;
+
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+
+# should have filtered the rows out since ttl is passed in compaction filter
+SELECT COUNT(*) FROM t1;
+DROP TABLE t1;
+
+# TTL field inside key with variable length things..
+CREATE TABLE t1 (
+`a` binary(8) NOT NULL,
+`b` varbinary(64),
+`c` varbinary(256),
+`ts` bigint(20) UNSIGNED NOT NULL,
+`value` mediumblob NOT NULL,
+PRIMARY KEY (`a`, `ts`)
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+COMMENT='ttl_duration=1;ttl_col=ts;';
+
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values ('a', NULL, 'bc', UNIX_TIMESTAMP(), 'd');
+INSERT INTO t1 values ('de', 'fghijk', NULL, UNIX_TIMESTAMP(), 'l');
+set global rocksdb_debug_ttl_rec_ts = 0;
+SELECT COUNT(*) FROM t1;
+
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+
+# should have filtered the rows out since ttl is passed in compaction filter
+SELECT COUNT(*) FROM t1;
+DROP TABLE t1;
+
+# TTL test where you compact (values still exist), real_sleep, then compact again,
+# values should now be gone.
+CREATE TABLE t1 (
+a INT NOT NULL,
+b varbinary(64) NOT NULL,
+c varbinary(256) NOT NULL,
+ts bigint(20) UNSIGNED NOT NULL,
+value mediumblob NOT NULL,
+PRIMARY KEY (b,a,c)
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+COMMENT='ttl_duration=10;ttl_col=ts;';
+
+set global rocksdb_debug_ttl_rec_ts = -300;
+INSERT INTO t1 values (1, 'b', 'c', UNIX_TIMESTAMP(), 'd');
+INSERT INTO t1 values (2, 'e', 'f', UNIX_TIMESTAMP(), 'g');
+set global rocksdb_debug_ttl_rec_ts = 300;
+INSERT INTO t1 values (3, 'i', 'j', UNIX_TIMESTAMP(), 'k');
+INSERT INTO t1 values (4, 'm', 'n', UNIX_TIMESTAMP(), 'o');
+set global rocksdb_debug_ttl_rec_ts = 0;
+
+# Nothing should get removed here.
+set global rocksdb_debug_ttl_snapshot_ts = -3600;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_snapshot_ts = 0;
+--sorted_result
+SELECT a FROM t1;
+
+# 1 and 2 should get removed here.
+set global rocksdb_compact_cf='default';
+--sorted_result
+SELECT a FROM t1;
+
+# 3 and 4 should get removed here.
+set global rocksdb_debug_ttl_snapshot_ts = 3600;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_snapshot_ts = 0;
+--sorted_result
+SELECT a FROM t1;
+
+DROP TABLE t1;
+
+# TTL field with nullable ttl column (should fail)
+--error ER_RDB_TTL_COL_FORMAT
+CREATE TABLE t1 (
+ a bigint(20) UNSIGNED NOT NULL,
+ b int NOT NULL,
+ c int NOT NULL,
+ ts bigint(20),
+ PRIMARY KEY (a,c)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=1;ttl_col=ts;';
+
+# TTL field with non 8-bit integer column (should fail)
+--error ER_RDB_TTL_COL_FORMAT
+CREATE TABLE t1 (
+ a bigint(20) UNSIGNED NOT NULL,
+ b int NOT NULL,
+ c int NOT NULL,
+ ts int,
+ PRIMARY KEY (a,c)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=1;ttl_col=ts;';
+
+# TTL duration as some random garbage value
+--error ER_RDB_TTL_DURATION_FORMAT
+CREATE TABLE t1 (
+ a bigint(20) UNSIGNED NOT NULL,
+ b int NOT NULL,
+ c int NOT NULL,
+ PRIMARY KEY (a,c)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=abc;';
+
+# TTL col is some column outside of the table
+--error ER_RDB_TTL_COL_FORMAT
+CREATE TABLE t1 (
+ a bigint(20) UNSIGNED NOT NULL,
+ b int NOT NULL,
+ c int NOT NULL,
+ PRIMARY KEY (a,c)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=1;ttl_col=abc;';
+
+# TTL col must have accompanying duration
+--error ER_RDB_TTL_COL_FORMAT
+CREATE TABLE t1 (
+ a bigint(20) UNSIGNED NOT NULL,
+ b int NOT NULL,
+ c int NOT NULL,
+ PRIMARY KEY (a,c)
+) ENGINE=rocksdb
+COMMENT='ttl_col=abc;';
+
+# Make sure it doesn't filter out things early
+CREATE TABLE t1 (
+ a bigint(20) NOT NULL,
+ PRIMARY KEY (a)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=500;';
+
+INSERT INTO t1 values (1);
+SELECT COUNT(*) FROM t1;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+
+SELECT COUNT(*) FROM t1;
+DROP TABLE t1;
+
+# Testing altering table comment with updated TTL duration
+# This should trigger a rebuild of the table
+CREATE TABLE t1 (
+ a INT PRIMARY KEY
+) ENGINE=rocksdb
+COMMENT='ttl_duration=100;';
+
+INSERT INTO t1 values (1);
+SELECT * FROM t1;
+
+set global rocksdb_debug_ttl_rec_ts = -300;
+ALTER TABLE t1 COMMENT = 'ttl_duration=1';
+set global rocksdb_debug_ttl_rec_ts = 0;
+SHOW CREATE TABLE t1;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+
+SELECT COUNT(*) FROM t1;
+DROP TABLE t1;
+
+# Tables with hidden PK and SK disabled
+CREATE TABLE t1 (
+ a INT PRIMARY KEY,
+ b INT
+) ENGINE=rocksdb
+COMMENT='ttl_duration=100;';
+
+--error ER_RDB_TTL_UNSUPPORTED
+ALTER TABLE t1 DROP PRIMARY KEY;
+
+DROP TABLE t1;
+
+# Test replacing PK, ttl should still work after
+CREATE TABLE t1 (
+ a INT PRIMARY KEY,
+ b INT
+) ENGINE=rocksdb
+COMMENT='ttl_duration=5;';
+
+INSERT INTO t1 VALUES (1,1);
+INSERT INTO t1 VALUES (2,2);
+
+ALTER TABLE t1 DROP PRIMARY KEY, ADD PRIMARY KEY(b);
+set global rocksdb_debug_ttl_snapshot_ts = -3600;
+set global rocksdb_force_flush_memtable_now=1;
+set @@global.rocksdb_compact_cf = 'default';
+set global rocksdb_debug_ttl_snapshot_ts = 0;
+
+--sorted_result
+SELECT COUNT(*) FROM t1;
+
+set global rocksdb_debug_ttl_snapshot_ts = 3600;
+set @@global.rocksdb_compact_cf = 'default';
+set global rocksdb_debug_ttl_snapshot_ts = 0;
+
+--sorted_result
+SELECT COUNT(*) FROM t1;
+
+DROP TABLE t1;
+
+# Make sure table comment filled with other text before/after will work
+# (basically, it needs semicolon before and after)
+CREATE TABLE t1 (
+ a bigint(20) UNSIGNED NOT NULL,
+ b int,
+ PRIMARY KEY (a,b)
+) ENGINE=rocksdb
+COMMENT='asdadfasdfsadfadf ;ttl_duration=1; asfasdfasdfadfa';
+INSERT INTO t1 values (UNIX_TIMESTAMP(), 1);
+SELECT COUNT(*) FROM t1;
+
+set global rocksdb_debug_ttl_snapshot_ts = 3600;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_snapshot_ts = 0;
+
+SELECT COUNT(*) FROM t1;
+
+ALTER TABLE t1 COMMENT = 'adsf;;ttl_duration=5;asfasdfa;ttl_col=a;asdfasdf;';
+
+set global rocksdb_debug_ttl_rec_ts = 300;
+INSERT INTO t1 values (UNIX_TIMESTAMP(), 2);
+set global rocksdb_debug_ttl_rec_ts = 0;
+set global rocksdb_force_flush_memtable_now=1;
+
+# nothing removed here
+set global rocksdb_compact_cf='default';
+SELECT COUNT(*) FROM t1;
+
+# all removed here
+set global rocksdb_debug_ttl_snapshot_ts = 3600;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_snapshot_ts = 0;
+SELECT COUNT(*) FROM t1;
+
+DROP TABLE t1;
+
+# Test to make sure that TTL retains original timestamp during update
+CREATE TABLE t1 (
+ a bigint(20) NOT NULL,
+ PRIMARY KEY (a)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=5;';
+
+set global rocksdb_debug_ttl_rec_ts = -300;
+INSERT INTO t1 values (1);
+INSERT INTO t1 values (3);
+INSERT INTO t1 values (5);
+set global rocksdb_debug_ttl_rec_ts = 300;
+INSERT INTO t1 values (7);
+INSERT INTO t1 values (9);
+set global rocksdb_debug_ttl_rec_ts = 0;
+
+UPDATE t1 SET a=a+1;
+--sorted_result
+SELECT * FROM t1;
+
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+
+# 1,3,5 should be dropped
+--sorted_result
+SELECT * FROM t1;
+DROP TABLE t1;
+
+# test behaviour on update with TTL column, TTL time can be updated here.
+CREATE TABLE t1 (
+ a INT,
+ b bigint(20) UNSIGNED NOT NULL,
+ PRIMARY KEY (a)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=5;ttl_col=b;';
+
+set global rocksdb_debug_ttl_rec_ts = -300;
+INSERT INTO t1 values (1, UNIX_TIMESTAMP());
+INSERT INTO t1 values (3, UNIX_TIMESTAMP());
+INSERT INTO t1 values (5, UNIX_TIMESTAMP());
+INSERT INTO t1 values (7, UNIX_TIMESTAMP());
+
+set global rocksdb_debug_ttl_rec_ts = 300;
+UPDATE t1 SET b=UNIX_TIMESTAMP() WHERE a < 4;
+set global rocksdb_debug_ttl_rec_ts = 0;
+
+--sorted_result
+SELECT a FROM t1;
+
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+
+# 5 and 7 should be gone here
+--sorted_result
+SELECT a FROM t1;
+DROP TABLE t1;
+
+# Test rows expired stat variable and disable ttl variable
+CREATE TABLE t1 (
+ a bigint(20) NOT NULL,
+ PRIMARY KEY (a)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=1;';
+
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values (1);
+INSERT INTO t1 values (2);
+INSERT INTO t1 values (3);
+set global rocksdb_debug_ttl_rec_ts = 0;
+
+set global rocksdb_enable_ttl=0;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+
+select variable_value into @c from information_schema.global_status where variable_name='rocksdb_rows_expired';
+set global rocksdb_enable_ttl=1;
+set global rocksdb_compact_cf='default';
+
+select variable_value-@c from information_schema.global_status where variable_name='rocksdb_rows_expired';
+SELECT COUNT(*) FROM t1;
+DROP TABLE t1;
+
+
+# Table with TTL won't increment rows expired when no records have been
+# compacted
+CREATE TABLE t1 (
+ a bigint(20) NOT NULL,
+ PRIMARY KEY (a)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=100;';
+
+INSERT INTO t1 values (1);
+INSERT INTO t1 values (2);
+INSERT INTO t1 values (3);
+
+select variable_value into @c from information_schema.global_status where variable_name='rocksdb_rows_expired';
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+select variable_value-@c from information_schema.global_status where variable_name='rocksdb_rows_expired';
+
+DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/ttl_primary_read_filtering-master.opt b/storage/rocksdb/mysql-test/rocksdb/t/ttl_primary_read_filtering-master.opt
new file mode 100644
index 00000000000..aefc2f5da34
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/ttl_primary_read_filtering-master.opt
@@ -0,0 +1 @@
+--rocksdb_default_cf_options=disable_auto_compactions=true
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/ttl_primary_read_filtering.test b/storage/rocksdb/mysql-test/rocksdb/t/ttl_primary_read_filtering.test
new file mode 100644
index 00000000000..7a7609f456e
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/ttl_primary_read_filtering.test
@@ -0,0 +1,388 @@
+--source include/have_debug.inc
+--source include/have_rocksdb.inc
+
+# The purpose of read filtering for tables with TTL is to ensure that during a
+# transaction a key which has expired already but not removed by compaction
+# yet, is not returned to the user.
+#
+# Without this the user might be hit with problems such as disappearing rows
+# within a transaction, etc, because the compaction filter ignores snapshots
+# when filtering keys.
+
+# Basic read filtering test
+CREATE TABLE t1 (
+ a int PRIMARY KEY
+) ENGINE=rocksdb
+COMMENT='ttl_duration=1;';
+
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values (1);
+INSERT INTO t1 values (2);
+set global rocksdb_debug_ttl_rec_ts = 0;
+set global rocksdb_force_flush_memtable_now=1;
+
+--sorted_result
+SELECT * FROM t1;
+
+select variable_value into @c from information_schema.global_status where variable_name='rocksdb_rows_expired';
+set global rocksdb_compact_cf='default';
+select variable_value-@c from information_schema.global_status where variable_name='rocksdb_rows_expired';
+
+DROP TABLE t1;
+
+# Test that some rows are hidden but others aren't...
+CREATE TABLE t1 (
+ a int PRIMARY KEY,
+ b BIGINT UNSIGNED NOT NULL
+) ENGINE=rocksdb
+COMMENT='ttl_duration=10;';
+
+set global rocksdb_debug_ttl_rec_ts = -300;
+INSERT INTO t1 values (1, UNIX_TIMESTAMP());
+set global rocksdb_debug_ttl_rec_ts = 300;
+INSERT INTO t1 values (2, UNIX_TIMESTAMP());
+INSERT INTO t1 values (3, UNIX_TIMESTAMP());
+set global rocksdb_debug_ttl_rec_ts = 0;
+
+set global rocksdb_force_flush_memtable_now=1;
+
+# 1 should be hidden even though compaction hasn't run.
+--sorted_result
+SELECT a FROM t1;
+
+set global rocksdb_compact_cf='default';
+
+# none should be hidden yet, compaction runs but records aren't expired
+--sorted_result
+SELECT a FROM t1;
+
+# all should be hidden now, even though compaction hasn't run again
+set global rocksdb_debug_ttl_read_filter_ts = -310;
+--sorted_result
+SELECT a FROM t1;
+set global rocksdb_debug_ttl_read_filter_ts = 0;
+
+DROP TABLE t1;
+
+# Test the filtering code explicitly.
+CREATE TABLE t1 (
+ a int PRIMARY KEY
+) ENGINE=rocksdb
+COMMENT='ttl_duration=1;';
+
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values (1);
+INSERT INTO t1 values (3);
+INSERT INTO t1 values (5);
+INSERT INTO t1 values (7);
+set global rocksdb_debug_ttl_rec_ts = 0;
+
+# should return nothing.
+select variable_value into @a from information_schema.global_status where variable_name='rocksdb_rows_filtered';
+--sorted_result
+SELECT * FROM t1;
+select variable_value-@a from information_schema.global_status where variable_name='rocksdb_rows_filtered';
+
+# disable filtering
+set global rocksdb_enable_ttl_read_filtering=0;
+
+# should return everything
+select variable_value into @a from information_schema.global_status where variable_name='rocksdb_rows_filtered';
+--sorted_result
+SELECT * FROM t1;
+select variable_value-@a from information_schema.global_status where variable_name='rocksdb_rows_filtered';
+
+# enable filtering
+set global rocksdb_enable_ttl_read_filtering=1;
+
+# should return nothing.
+select variable_value into @a from information_schema.global_status where variable_name='rocksdb_rows_filtered';
+--sorted_result
+SELECT * FROM t1;
+select variable_value-@a from information_schema.global_status where variable_name='rocksdb_rows_filtered';
+
+DROP TABLE t1;
+# Compact away the dropped data
+set global ROCKSDB_COMPACT_CF= 'default';
+
+# Read filtering index scan tests (None of these queries should return any results)
+CREATE TABLE t1 (
+ a int,
+ b int,
+ c int,
+ PRIMARY KEY (a,b,c)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=1;';
+
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values (0,0,0);
+INSERT INTO t1 values (0,0,1);
+INSERT INTO t1 values (0,1,0);
+INSERT INTO t1 values (0,1,1);
+INSERT INTO t1 values (1,1,2);
+INSERT INTO t1 values (1,2,1);
+INSERT INTO t1 values (1,2,2);
+INSERT INTO t1 values (1,2,3);
+set global rocksdb_debug_ttl_rec_ts = 0;
+
+select variable_value into @c from information_schema.global_status where variable_name='rocksdb_rows_expired';
+
+set global rocksdb_force_flush_memtable_now=1;
+
+# HA_READ_KEY_EXACT, using full key
+SELECT * FROM t1 WHERE a=1 AND b=2 AND c=2;
+
+# HA_READ_KEY_EXACT, not using full key
+SELECT * FROM t1 WHERE a = 1;
+
+# HA_READ_BEFORE_KEY, not using full key
+SELECT max(a) from t1 where a < 3;
+
+#HA_READ_BEFORE_KEY, using full key
+SELECT max(a) from t1 where a < 2 AND b = 1 AND c < 3;
+
+# HA_READ_KEY_OR_NEXT
+SELECT min(a) from t1 where a >= 1;
+
+# HA_READ_AFTER_KEY, /* Find next rec. after key-record */
+SELECT min(a) from t1 where a > 1;
+
+# HA_READ_PREFIX_LAST, /* Last key with the same prefix */
+select * from t1 where a=1 and b in (1) order by c desc;
+
+# HA_READ_PREFIX_LAST_OR_PREV, /* Last or prev key with the same prefix */
+select max(a) from t1 where a <=10;
+
+# need to test read_range_first()
+# calls into read_range_next() and uses compare_keys() to see if its out of
+# range
+select a from t1 where a > 0 and a <= 2;
+
+select variable_value-@c from information_schema.global_status where variable_name='rocksdb_rows_expired';
+set global rocksdb_compact_cf='default';
+select variable_value-@c from information_schema.global_status where variable_name='rocksdb_rows_expired';
+DROP TABLE t1;
+
+# duplicate PK value attempt to be inserted when old one is expired...
+# in this case, we pretend the expired key was not found and insert into PK
+CREATE TABLE t1 (
+ a int PRIMARY KEY
+) ENGINE=rocksdb
+COMMENT='ttl_duration=100;';
+set global rocksdb_debug_ttl_rec_ts = -110;
+INSERT INTO t1 values (1);
+set global rocksdb_debug_ttl_rec_ts = 0;
+
+SELECT * FROM t1;
+
+# this should work, even if old value is not filtered out yet.
+INSERT INTO t1 values (1);
+
+# should show (1) result
+SELECT * FROM t1;
+
+DROP TABLE t1;
+
+# Attempt to update expired value, should filter out
+set global rocksdb_force_flush_memtable_now=1;
+CREATE TABLE t1 (
+ a int PRIMARY KEY
+) ENGINE=rocksdb
+COMMENT='ttl_duration=1;';
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values (1);
+set global rocksdb_debug_ttl_rec_ts = 0;
+
+--sorted_result
+SELECT * FROM t1;
+
+# No error is thrown here, under the hood rnd_next_with_direction is
+# filtering out the record from being seen in the first place.
+UPDATE t1 set a = 1;
+DROP TABLE t1;
+
+##
+## More tests on update behaviour with expired keys.
+##
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+CREATE TABLE t1 (
+ a int PRIMARY KEY,
+ b int
+) ENGINE=rocksdb
+COMMENT='ttl_duration=100;';
+
+set global rocksdb_debug_ttl_rec_ts = -110;
+INSERT INTO t1 values (1,1);
+INSERT INTO t1 values (3,3);
+set global rocksdb_debug_ttl_rec_ts = 0;
+INSERT INTO t1 values (5,5);
+
+# expired key (1) is still around under the hood, but
+# this time rnd_next_with_direction finds non-expired key (5). So the
+# execution flow in the SQL layer moves onto update_write_row, where it then
+# finds the duplicate key (1). But the duplicate key is expired, so it allows
+# the overwrite.
+UPDATE t1 set a = 1;
+
+--sorted_result
+SELECT * FROM t1;
+
+set global rocksdb_enable_ttl_read_filtering=0;
+# 1,1 should be gone, even with read filtering disabled as it has been
+# overwritten
+--sorted_result
+SELECT * FROM t1;
+set global rocksdb_enable_ttl_read_filtering=1;
+
+# get_row_by_rowid tested here via index_read_map_impl
+UPDATE t1 set a = 999 where a = 1;
+--sorted_result
+SELECT * FROM t1;
+
+UPDATE t1 set a = a - 1;
+--sorted_result
+SELECT * FROM t1;
+
+DROP TABLE t1;
+
+# Ensure no rows can disappear in the middle of long-running transactions
+# Also ensure repeatable-read works as expected
+--source include/count_sessions.inc
+connect (con1,localhost,root,,);
+connect (con2,localhost,root,,);
+
+CREATE TABLE t1 (
+ a int PRIMARY KEY
+) ENGINE=rocksdb
+COMMENT='ttl_duration=5;';
+
+INSERT INTO t1 values (1);
+
+connection con1;
+--echo # Creating Snapshot (start transaction)
+BEGIN;
+
+# We need the below snippet in case establishing con1 took an arbitrary
+# amount of time. See https://github.com/facebook/mysql-5.6/pull/617#discussion_r120525391.
+--disable_query_log
+--let $snapshot_size= `SELECT COUNT(*) FROM t1`
+--let $i= 0
+while ($snapshot_size != 1)
+{
+ if ($i == 1000)
+ {
+ --die Your testing host is too slow for reasonable TTL testing
+ }
+
+ $i++;
+ ROLLBACK;
+ INSERT INTO t1 values (1);
+ BEGIN;
+ --let $snapshot_size= `SELECT COUNT(*) FROM t1`
+}
+--enable_query_log
+
+# Nothing filtered out here
+--sorted_result
+SELECT * FROM t1;
+
+--sleep 5
+
+--sorted_result
+SELECT * FROM t1; # <= shouldn't be filtered out here
+
+--echo # Switching to connection 2
+connection con2;
+# compaction doesn't do anything since con1 snapshot is still open
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+# read filtered out, because on a different connection, on
+# this connection the records have 'expired' already so they are filtered out
+# even though they have not yet been removed by compaction
+
+select variable_value into @a from information_schema.global_status where variable_name='rocksdb_rows_filtered';
+--sorted_result
+SELECT * FROM t1;
+select variable_value-@a from information_schema.global_status where variable_name='rocksdb_rows_filtered';
+
+--echo # Switching to connection 1
+connection con1;
+select variable_value into @a from information_schema.global_status where variable_name='rocksdb_rows_filtered';
+--sorted_result
+SELECT * FROM t1; # <= shouldn't be filtered out here
+select variable_value-@a from information_schema.global_status where variable_name='rocksdb_rows_filtered';
+
+UPDATE t1 set a = a + 1;
+select variable_value into @a from information_schema.global_status where variable_name='rocksdb_rows_filtered';
+--sorted_result
+SELECT * FROM t1; # <= shouldn't be filtered out here
+select variable_value-@a from information_schema.global_status where variable_name='rocksdb_rows_filtered';
+
+COMMIT;
+
+select variable_value into @a from information_schema.global_status where variable_name='rocksdb_rows_filtered';
+--sorted_result # <= filtered out here because time has passed.
+SELECT * FROM t1;
+select variable_value-@a from information_schema.global_status where variable_name='rocksdb_rows_filtered';
+
+DROP TABLE t1;
+disconnect con1;
+disconnect con2;
+
+#transaction 1, create a snapshot and select * => returns nothing.
+#transaction 2, insert into table, flush
+#transaction 1, select * => returns nothing, but the snapshot should prevent the compaction code from removing the rows, no matter what the ttl duration is.
+#transaction 2, select * -> sees nothing, disable filter, select * -> sees everything, enable filter, select * -> sees nothing.
+connect (con1,localhost,root,,);
+connect (con2,localhost,root,,);
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+
+CREATE TABLE t1 (
+ a int PRIMARY KEY
+) ENGINE=rocksdb
+COMMENT='ttl_duration=1;';
+
+--echo # On Connection 1
+connection con1;
+--echo # Creating Snapshot (start transaction)
+BEGIN;
+--sorted_result
+SELECT * FROM t1;
+# Sleep 5 secs after creating snapshot, this ensures any records created after
+# this can't be removed by compaction until this snapshot is released.
+--sleep 5
+
+--echo # On Connection 2
+connection con2;
+set global rocksdb_debug_ttl_rec_ts = -2;
+INSERT INTO t1 values (1);
+INSERT INTO t1 values (3);
+INSERT INTO t1 values (5);
+INSERT INTO t1 values (7);
+set global rocksdb_debug_ttl_rec_ts = 0;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+
+--echo # On Connection 1
+connection con1;
+--sorted_result
+SELECT * FROM t1;
+
+--echo # On Connection 2
+connection con2;
+--sorted_result
+SELECT * FROM t1;
+set global rocksdb_enable_ttl_read_filtering=0;
+--sorted_result
+SELECT * FROM t1;
+set global rocksdb_enable_ttl_read_filtering=1;
+
+disconnect con2;
+disconnect con1;
+connection default;
+
+DROP TABLE t1;
+# Wait till we reached the initial number of concurrent sessions
+--source include/wait_until_count_sessions.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/ttl_primary_with_partitions-master.opt b/storage/rocksdb/mysql-test/rocksdb/t/ttl_primary_with_partitions-master.opt
new file mode 100644
index 00000000000..b991f718a33
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/ttl_primary_with_partitions-master.opt
@@ -0,0 +1,2 @@
+--rocksdb_enable_ttl_read_filtering=0
+--rocksdb_default_cf_options=disable_auto_compactions=true
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/ttl_primary_with_partitions.test b/storage/rocksdb/mysql-test/rocksdb/t/ttl_primary_with_partitions.test
new file mode 100644
index 00000000000..aba2b594db5
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/ttl_primary_with_partitions.test
@@ -0,0 +1,254 @@
+--source include/have_debug.inc
+--source include/have_rocksdb.inc
+--source include/have_partition.inc
+
+#
+# Create a table with multiple partitions, but in the comment don't specify
+# that per-partition based column families (CF) should be created. Expect that
+# default CF will be used and new one won't be created.
+#
+# In addition, specify TTL on one of the partitions. Insert a few things
+# inside all the partitions, verify after compaction that the rows inside the
+# partition with TTL has disappeared.
+#
+CREATE TABLE t1 (
+ c1 INT,
+ PRIMARY KEY (`c1`)
+) ENGINE=ROCKSDB
+COMMENT="custom_p0_ttl_duration=1;"
+PARTITION BY LIST(c1) (
+ PARTITION custom_p0 VALUES IN (1, 4, 7),
+ PARTITION custom_p1 VALUES IN (2, 5, 8),
+ PARTITION custom_p2 VALUES IN (3, 6, 9)
+);
+
+set global rocksdb_debug_ttl_rec_ts = -3600;
+INSERT INTO t1 values (1);
+INSERT INTO t1 values (2);
+INSERT INTO t1 values (3);
+INSERT INTO t1 values (4);
+INSERT INTO t1 values (5);
+INSERT INTO t1 values (6);
+INSERT INTO t1 values (7);
+INSERT INTO t1 values (8);
+INSERT INTO t1 values (9);
+set global rocksdb_debug_ttl_rec_ts = 0;
+
+--sorted_result
+SELECT * FROM t1;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+
+# 1,4, and 7 should be gone
+--sorted_result
+SELECT * FROM t1;
+DROP TABLE t1;
+
+#
+# Create a table with multiple partitions and request for separate CF to be
+# created per every partition. As a result we expect three different CF-s to be
+# created.
+#
+# In addition, specify TTL on some of the partitions. Insert a few things
+# inside all the partitions, verify after compaction that the rows inside the
+# partition with TTL has disappeared.
+#
+CREATE TABLE t1 (
+ c1 INT,
+ c2 INT,
+ name VARCHAR(25) NOT NULL,
+ PRIMARY KEY (`c1`, `c2`) COMMENT 'custom_p0_cfname=foo;custom_p1_cfname=my_custom_cf;custom_p2_cfname=baz'
+) ENGINE=ROCKSDB
+COMMENT="custom_p0_ttl_duration=1;custom_p1_ttl_duration=7;"
+PARTITION BY LIST(c1) (
+ PARTITION custom_p0 VALUES IN (1, 4, 7),
+ PARTITION custom_p1 VALUES IN (2, 5, 8),
+ PARTITION custom_p2 VALUES IN (3, 6, 9)
+);
+
+set global rocksdb_debug_ttl_rec_ts = -1200;
+INSERT INTO t1 values (1,1,'a');
+INSERT INTO t1 values (4,4,'aaaa');
+INSERT INTO t1 values (7,7,'aaaaaaa');
+
+set global rocksdb_debug_ttl_rec_ts = 1200;
+INSERT INTO t1 values (2,2,'aa');
+INSERT INTO t1 values (3,3,'aaa');
+INSERT INTO t1 values (5,5,'aaaaa');
+INSERT INTO t1 values (6,6,'aaaaaa');
+INSERT INTO t1 values (8,8,'aaaaaaaa');
+INSERT INTO t1 values (9,9,'aaaaaaaaa');
+set global rocksdb_debug_ttl_rec_ts = 0;
+
+--sorted_result
+SELECT * FROM t1;
+
+set global rocksdb_force_flush_memtable_now=1;
+set @@global.rocksdb_compact_cf = 'foo';
+set @@global.rocksdb_compact_cf = 'my_custom_cf';
+--sorted_result
+SELECT * FROM t1;
+
+set global rocksdb_debug_ttl_snapshot_ts = 3600;
+set @@global.rocksdb_compact_cf = 'foo';
+--sorted_result
+SELECT * FROM t1;
+
+# Now 2,5,8 should be removed (this verifies that TTL is only operating on the
+# particular CF.
+set @@global.rocksdb_compact_cf = 'my_custom_cf';
+set global rocksdb_debug_ttl_snapshot_ts = 0;
+--sorted_result
+SELECT * FROM t1;
+
+DROP TABLE t1;
+
+#
+# Create a table with CF-s/TTL per partition and verify that ALTER TABLE + DROP
+# PRIMARY, ADD PRIMARY work for that scenario and data is persisted/filtered as
+# expected.
+#
+CREATE TABLE t1 (
+ c1 INT,
+ c2 INT,
+ name VARCHAR(25) NOT NULL,
+ event DATE,
+ PRIMARY KEY (`c1`, `c2`) COMMENT 'custom_p0_cfname=foo;custom_p1_cfname=bar;custom_p2_cfname=baz;'
+) ENGINE=ROCKSDB
+COMMENT="custom_p0_ttl_duration=9999;custom_p2_ttl_duration=5;"
+PARTITION BY LIST(c1) (
+ PARTITION custom_p0 VALUES IN (1, 2, 3),
+ PARTITION custom_p1 VALUES IN (4, 5, 6),
+ PARTITION custom_p2 VALUES IN (7, 8, 9)
+);
+
+INSERT INTO t1 VALUES (1, 1, "one", null);
+INSERT INTO t1 VALUES (2, 2, "two", null);
+INSERT INTO t1 VALUES (3, 3, "three", null);
+
+INSERT INTO t1 VALUES (4, 4, "four", null);
+INSERT INTO t1 VALUES (5, 5, "five", null);
+INSERT INTO t1 VALUES (6, 6, "six", null);
+
+INSERT INTO t1 VALUES (7, 7, "seven", null);
+INSERT INTO t1 VALUES (8, 8, "eight", null);
+INSERT INTO t1 VALUES (9, 9, "nine", null);
+
+--sorted_result
+SELECT * FROM t1;
+
+# TTL should be reset after alter table
+set global rocksdb_debug_ttl_rec_ts = 600;
+ALTER TABLE t1 DROP PRIMARY KEY, ADD PRIMARY KEY(`c2`,`c1`) COMMENT 'custom_p0_cfname=foo;custom_p1_cfname=bar;custom_p2_cfname=baz;';
+set global rocksdb_debug_ttl_rec_ts = 0;
+SHOW CREATE TABLE t1;
+
+# ...so nothing should be gone here
+set global rocksdb_debug_ttl_snapshot_ts = 100;
+set global rocksdb_force_flush_memtable_now=1;
+set @@global.rocksdb_compact_cf = 'baz';
+set global rocksdb_debug_ttl_snapshot_ts = 0;
+--sorted_result
+SELECT * FROM t1;
+
+set global rocksdb_debug_ttl_snapshot_ts = 1200;
+set @@global.rocksdb_compact_cf = 'foo';
+set @@global.rocksdb_compact_cf = 'baz';
+set global rocksdb_debug_ttl_snapshot_ts = 0;
+--sorted_result
+SELECT * FROM t1;
+
+DROP TABLE t1;
+
+#
+# Create a table with non-partitioned TTL duration, with partitioned TTL
+# columns
+#
+# In this case the same TTL duration will be applied across different TTL
+# columns in different partitions, except for in p2 where we override the ttl
+# duration.
+#
+CREATE TABLE t1 (
+ c1 BIGINT,
+ c2 BIGINT UNSIGNED NOT NULL,
+ name VARCHAR(25) NOT NULL,
+ event DATE,
+ PRIMARY KEY (`c1`) COMMENT 'custom_p0_cfname=foo;custom_p1_cfname=bar;custom_p2_cfname=baz;'
+) ENGINE=ROCKSDB
+COMMENT="ttl_duration=1;custom_p1_ttl_duration=100;custom_p1_ttl_col=c2;custom_p2_ttl_duration=5000;"
+PARTITION BY LIST(c1) (
+ PARTITION custom_p0 VALUES IN (1, 2, 3),
+ PARTITION custom_p1 VALUES IN (4, 5, 6),
+ PARTITION custom_p2 VALUES IN (7, 8, 9)
+);
+
+set global rocksdb_debug_ttl_rec_ts = -300;
+INSERT INTO t1 VALUES (1, UNIX_TIMESTAMP(), "one", null);
+INSERT INTO t1 VALUES (2, UNIX_TIMESTAMP(), "two", null);
+INSERT INTO t1 VALUES (3, UNIX_TIMESTAMP(), "three", null);
+set global rocksdb_debug_ttl_rec_ts = 0;
+
+INSERT INTO t1 VALUES (4, UNIX_TIMESTAMP(), "four", null);
+INSERT INTO t1 VALUES (5, UNIX_TIMESTAMP(), "five", null);
+INSERT INTO t1 VALUES (6, UNIX_TIMESTAMP(), "six", null);
+
+INSERT INTO t1 VALUES (7, UNIX_TIMESTAMP(), "seven", null);
+INSERT INTO t1 VALUES (8, UNIX_TIMESTAMP(), "eight", null);
+INSERT INTO t1 VALUES (9, UNIX_TIMESTAMP(), "nine", null);
+
+set global rocksdb_force_flush_memtable_now=1;
+set @@global.rocksdb_compact_cf = 'foo';
+set @@global.rocksdb_compact_cf = 'baz';
+set @@global.rocksdb_compact_cf = 'bar';
+
+# here we expect only 1,2,3 to be gone, ttl implicit.
+--sorted_result
+SELECT c1 FROM t1;
+
+# here we expect only 4,5,6 to be gone, ttl based on column c2.
+set global rocksdb_debug_ttl_snapshot_ts = 600;
+set @@global.rocksdb_compact_cf = 'bar';
+set global rocksdb_debug_ttl_snapshot_ts = 0;
+--sorted_result
+SELECT c1 FROM t1;
+
+# at this point only 7,8,9 should be left..
+DROP TABLE t1;
+
+#
+# Make sure non-partitioned TTL duration/col still works on table with
+# partitions.
+#
+# Simultaneously tests when TTL col is part of the key in partitioned table
+#
+CREATE TABLE t1 (
+ c1 BIGINT,
+ c2 BIGINT UNSIGNED NOT NULL,
+ PRIMARY KEY (`c1`, `c2`)
+) ENGINE=ROCKSDB
+COMMENT="ttl_duration=100;ttl_col=c2;"
+PARTITION BY LIST(c1) (
+ PARTITION custom_p0 VALUES IN (1),
+ PARTITION custom_p1 VALUES IN (2),
+ PARTITION custom_p2 VALUES IN (3)
+);
+
+INSERT INTO t1 values (1, UNIX_TIMESTAMP());
+INSERT INTO t1 values (2, UNIX_TIMESTAMP());
+INSERT INTO t1 values (3, UNIX_TIMESTAMP());
+
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+
+# everything should still be here
+--sorted_result
+SELECT c1 FROM t1;
+
+set global rocksdb_debug_ttl_snapshot_ts = 300;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_snapshot_ts = 0;
+# everything should now be gone
+--sorted_result
+SELECT c1 FROM t1;
+
+DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/ttl_rows_examined.test b/storage/rocksdb/mysql-test/rocksdb/t/ttl_rows_examined.test
new file mode 100644
index 00000000000..714c2c6056c
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/ttl_rows_examined.test
@@ -0,0 +1,57 @@
+--source include/have_rocksdb.inc
+--source include/have_debug_sync.inc
+
+####
+# Bump rows_examined count whenever MyRocks filters out a row due to expired TTL
+####
+
+# clean start
+set debug_sync='RESET';
+set global rocksdb_debug_ttl_read_filter_ts = -10;
+
+--enable_connect_log
+--enable_info
+
+connect (conn1, localhost, root,,test);
+connect (conn2, localhost, root,,test);
+
+connection conn1;
+
+# create table with TTL policy (1s)
+CREATE TABLE t_re (
+ a INT, b INT, PRIMARY KEY (a)
+) ENGINE=ROCKSDB
+COMMENT 'ttl_duration=1';
+
+# start with 2 rows, expired at the insertion time
+set global rocksdb_debug_ttl_rec_ts = -13;
+insert into t_re values (1,1);
+insert into t_re values (2,2);
+set global rocksdb_debug_ttl_rec_ts = 0;
+commit;
+
+# setup signal to stop in code where we skip expired records
+set debug_sync='rocksdb.ttl_rows_examined SIGNAL parked WAIT_FOR go';
+send SELECT * FROM t_re;
+
+connection conn2;
+set debug_sync='now WAIT_FOR parked';
+
+# display "Rows Examined" before returning from call
+--replace_result Execute Query
+--replace_column 1 ### 2 ### 3 ### 4 ### 6 ### 10 ### 11 ### 12 ###
+--sorted_result
+SHOW PROCESSLIST;
+
+set debug_sync='now SIGNAL go';
+
+connection conn1;
+reap;
+
+# tidy up
+set debug_sync='RESET';
+set global rocksdb_debug_ttl_read_filter_ts = DEFAULT;
+drop table t_re;
+
+disconnect conn1;
+disconnect conn2;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/ttl_secondary-master.opt b/storage/rocksdb/mysql-test/rocksdb/t/ttl_secondary-master.opt
new file mode 100644
index 00000000000..b991f718a33
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/ttl_secondary-master.opt
@@ -0,0 +1,2 @@
+--rocksdb_enable_ttl_read_filtering=0
+--rocksdb_default_cf_options=disable_auto_compactions=true
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/ttl_secondary.test b/storage/rocksdb/mysql-test/rocksdb/t/ttl_secondary.test
new file mode 100644
index 00000000000..fb439e109e7
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/ttl_secondary.test
@@ -0,0 +1,780 @@
+--source include/have_debug.inc
+--source include/have_rocksdb.inc
+
+# Basic TTL test, pk ignored, no sk
+CREATE TABLE t1 (
+`a` binary(8) NOT NULL,
+`b` varbinary(64) NOT NULL,
+`c` varbinary(256) NOT NULL,
+`ts` bigint(20) UNSIGNED NOT NULL,
+`value` mediumblob NOT NULL,
+PRIMARY KEY (`b`,`a`,`c`)
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+COMMENT='ttl_duration=1;ttl_col=ts;';
+
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values ('a', 'b', 'c', UNIX_TIMESTAMP(), 'd');
+INSERT INTO t1 values ('d', 'e', 'f', UNIX_TIMESTAMP(), 'g');
+set global rocksdb_debug_ttl_rec_ts = 0;
+SELECT COUNT(*) FROM t1;
+
+set global rocksdb_debug_ttl_ignore_pk = 1;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_ignore_pk = 0;
+
+# no rows should be filtered
+SELECT COUNT(*) FROM t1;
+DROP TABLE t1;
+
+# Basic TTL test
+CREATE TABLE t1 (
+`a` binary(8) NOT NULL,
+`b` varbinary(64) NOT NULL,
+`c` varbinary(256) NOT NULL,
+`ts` bigint(20) UNSIGNED NOT NULL,
+`value` mediumblob NOT NULL,
+PRIMARY KEY (`b`,`a`,`c`),
+KEY kb (`b`)
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+COMMENT='ttl_duration=1;ttl_col=ts;';
+
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values ('a', 'b', 'c', UNIX_TIMESTAMP(), 'd');
+INSERT INTO t1 values ('d', 'e', 'f', UNIX_TIMESTAMP(), 'g');
+set global rocksdb_debug_ttl_rec_ts = 0;
+SELECT COUNT(*) FROM t1 FORCE INDEX (kb);
+
+set global rocksdb_debug_ttl_ignore_pk = 1;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_ignore_pk = 0;
+
+# should have filtered the rows out since ttl is passed in compaction filter
+SELECT COUNT(*) FROM t1 FORCE INDEX (kb);
+DROP TABLE t1;
+
+# column before TTL in value
+CREATE TABLE t1 (
+ a bigint(20) NOT NULL,
+ b int NOT NULL,
+ ts bigint(20) UNSIGNED NOT NULL,
+ c int NOT NULL,
+ PRIMARY KEY (a),
+ KEY kb (b)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=1;ttl_col=ts;';
+
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values (1, 3, UNIX_TIMESTAMP(), 5);
+INSERT INTO t1 values (2, 4, UNIX_TIMESTAMP(), 6);
+set global rocksdb_debug_ttl_rec_ts = 0;
+SELECT COUNT(*) FROM t1 FORCE INDEX (kb);
+
+set global rocksdb_debug_ttl_ignore_pk = 1;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_ignore_pk = 0;
+
+# should have filtered the rows out since ttl is passed in compaction filter
+SELECT COUNT(*) FROM t1 FORCE INDEX (kb);
+DROP TABLE t1;
+
+# multi-part PK w/ TTL
+CREATE TABLE t1 (
+ a bigint(20) NOT NULL,
+ b int NOT NULL,
+ c int NOT NULL,
+ ts bigint(20) UNSIGNED NOT NULL,
+ PRIMARY KEY (a,c),
+ KEY kb (b)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=1;ttl_col=ts;';
+
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values (1, 3, 5, UNIX_TIMESTAMP());
+INSERT INTO t1 values (2, 4, 6, UNIX_TIMESTAMP());
+set global rocksdb_debug_ttl_rec_ts = 0;
+SELECT COUNT(*) FROM t1 FORCE INDEX (kb);
+
+set global rocksdb_debug_ttl_ignore_pk=1;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_ignore_pk=0;
+
+# should have filtered the rows out since ttl is passed in compaction filter
+SELECT COUNT(*) FROM t1 FORCE INDEX (kb);
+DROP TABLE t1;
+
+# nullable column(s) before TTL
+CREATE TABLE t1 (
+ a bigint(20) NOT NULL,
+ b int,
+ c int,
+ ts bigint(20) UNSIGNED NOT NULL,
+ PRIMARY KEY (a),
+ KEY kbc (b, c)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=1;ttl_col=ts;';
+
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values (1, NULL, NULL, UNIX_TIMESTAMP());
+INSERT INTO t1 values (2, NULL, NULL, UNIX_TIMESTAMP());
+set global rocksdb_debug_ttl_rec_ts = 0;
+SELECT COUNT(*) FROM t1 FORCE INDEX (kb);
+
+set global rocksdb_debug_ttl_ignore_pk=1;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_ignore_pk=0;
+
+# should have filtered the rows out since ttl is passed in compaction filter
+SELECT COUNT(*) FROM t1 FORCE INDEX (kb);
+DROP TABLE t1;
+
+# variable len columns + null column(s) before TTL
+CREATE TABLE t1 (
+`a` binary(8) NOT NULL,
+`b` varbinary(64),
+`c` varbinary(256),
+`ts` bigint(20) UNSIGNED NOT NULL,
+`value` mediumblob NOT NULL,
+PRIMARY KEY (`a`),
+KEY kbc (`b`, `c`)
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+COMMENT='ttl_duration=1;ttl_col=ts;';
+
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values ('a', NULL, 'bc', UNIX_TIMESTAMP(), 'd');
+INSERT INTO t1 values ('d', 'efghijk', NULL, UNIX_TIMESTAMP(), 'l');
+set global rocksdb_debug_ttl_rec_ts = 0;
+SELECT COUNT(*) FROM t1 FORCE INDEX (kb);
+
+set global rocksdb_debug_ttl_ignore_pk=1;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_ignore_pk=0;
+
+# should have filtered the rows out since ttl is passed in compaction filter
+SELECT COUNT(*) FROM t1 FORCE INDEX (kb);
+DROP TABLE t1;
+
+# TTL implicitly generated (no ttl column)
+CREATE TABLE t1 (
+ a bigint(20) NOT NULL,
+ b int NOT NULL,
+ c int NOT NULL,
+ PRIMARY KEY (a),
+ KEY kb (b)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=1;';
+
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values (1, 3, 5);
+INSERT INTO t1 values (2, 4, 6);
+set global rocksdb_debug_ttl_rec_ts = 0;
+SELECT COUNT(*) FROM t1 FORCE INDEX (kb);
+
+set global rocksdb_debug_ttl_ignore_pk=1;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_ignore_pk=0;
+
+# should have filtered the rows out since ttl is passed in compaction filter
+SELECT COUNT(*) FROM t1 FORCE INDEX (kb);
+DROP TABLE t1;
+
+# TTL field as the PK
+CREATE TABLE t1 (
+ a int,
+ ts bigint(20) UNSIGNED NOT NULL,
+ PRIMARY KEY (a, ts),
+ KEY kt (ts)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=5;ttl_col=ts;';
+
+INSERT INTO t1 values (1, UNIX_TIMESTAMP());
+INSERT INTO t1 values (2, UNIX_TIMESTAMP());
+SELECT COUNT(*) FROM t1 FORCE INDEX(kt);
+
+set global rocksdb_debug_ttl_snapshot_ts = -10;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_snapshot_ts = 0;
+# should all still be there..
+SELECT COUNT(*) FROM t1 FORCE INDEX(kt);
+
+set global rocksdb_debug_ttl_ignore_pk=1;
+set global rocksdb_debug_ttl_snapshot_ts = 10;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_snapshot_ts = 0;
+set global rocksdb_debug_ttl_ignore_pk=0;
+
+# should have filtered the rows out since ttl is passed in compaction filter
+SELECT COUNT(*) FROM t1 FORCE INDEX(kt);
+DROP TABLE t1;
+
+# TTL field inside multi-part pk
+CREATE TABLE t1 (
+ a bigint(20) NOT NULL,
+ b int NOT NULL,
+ ts bigint(20) UNSIGNED NOT NULL,
+ c int NOT NULL,
+ PRIMARY KEY (a, ts),
+ KEY kb (b)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=1;ttl_col=ts;';
+
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values (1, 3, UNIX_TIMESTAMP(), 5);
+INSERT INTO t1 values (2, 4, UNIX_TIMESTAMP(), 6);
+set global rocksdb_debug_ttl_rec_ts = 0;
+SELECT COUNT(*) FROM t1 FORCE INDEX (kb);
+
+set global rocksdb_debug_ttl_ignore_pk=1;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_ignore_pk=0;
+
+# should have filtered the rows out since ttl is passed in compaction filter
+SELECT COUNT(*) FROM t1;
+DROP TABLE t1;
+
+# TTL field inside key with variable length things..
+CREATE TABLE t1 (
+`a` binary(8) NOT NULL,
+`b` varbinary(64),
+`c` varbinary(256),
+`ts` bigint(20) UNSIGNED NOT NULL,
+`value` mediumblob NOT NULL,
+PRIMARY KEY (`a`, `ts`),
+KEY kb (`b`)
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+COMMENT='ttl_duration=1;ttl_col=ts;';
+
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values ('a', NULL, 'bc', UNIX_TIMESTAMP(), 'd');
+INSERT INTO t1 values ('de', 'fghijk', NULL, UNIX_TIMESTAMP(), 'l');
+set global rocksdb_debug_ttl_rec_ts = 0;
+SELECT COUNT(*) FROM t1;
+
+set global rocksdb_debug_ttl_ignore_pk=1;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_ignore_pk=0;
+
+# should have filtered the rows out since ttl is passed in compaction filter
+SELECT COUNT(*) FROM t1;
+DROP TABLE t1;
+
+# TTL test where you compact (values still exist), real_sleep, then compact again,
+# values should now be gone.
+CREATE TABLE t1 (
+a INT NOT NULL,
+b varbinary(64) NOT NULL,
+c varbinary(256) NOT NULL,
+ts bigint(20) UNSIGNED NOT NULL,
+value mediumblob NOT NULL,
+PRIMARY KEY (b,a,c),
+KEY kb (b)
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+COMMENT='ttl_duration=10;ttl_col=ts;';
+
+set global rocksdb_debug_ttl_rec_ts = -300;
+INSERT INTO t1 values (1, 'b', 'c', UNIX_TIMESTAMP(), 'd');
+INSERT INTO t1 values (2, 'e', 'f', UNIX_TIMESTAMP(), 'g');
+set global rocksdb_debug_ttl_rec_ts = 300;
+INSERT INTO t1 values (3, 'i', 'j', UNIX_TIMESTAMP(), 'k');
+INSERT INTO t1 values (4, 'm', 'n', UNIX_TIMESTAMP(), 'o');
+set global rocksdb_debug_ttl_rec_ts = 0;
+
+# Nothing should get removed here.
+set global rocksdb_debug_ttl_snapshot_ts = -3600;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_snapshot_ts = 0;
+--sorted_result
+SELECT a FROM t1 FORCE INDEX (kb);
+
+# 1 and 2 should get removed here.
+set global rocksdb_debug_ttl_ignore_pk=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_ignore_pk=0;
+--sorted_result
+SELECT a FROM t1 FORCE INDEX (kb);
+
+# 3 and 4 should get removed here.
+set global rocksdb_debug_ttl_ignore_pk=1;
+set global rocksdb_debug_ttl_snapshot_ts = 3600;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_snapshot_ts = 0;
+set global rocksdb_debug_ttl_ignore_pk=0;
+--sorted_result
+SELECT a FROM t1 FORCE INDEX (kb);
+
+DROP TABLE t1;
+
+# TTL field with nullable ttl column (should fail)
+--error ER_RDB_TTL_COL_FORMAT
+CREATE TABLE t1 (
+ a bigint(20) UNSIGNED NOT NULL,
+ b int NOT NULL,
+ c int NOT NULL,
+ ts bigint(20),
+ PRIMARY KEY (a,c),
+ KEY (b)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=1;ttl_col=ts;';
+
+# TTL field with non 8-bit integer column (should fail)
+--error ER_RDB_TTL_COL_FORMAT
+CREATE TABLE t1 (
+ a bigint(20) UNSIGNED NOT NULL,
+ b int NOT NULL,
+ c int NOT NULL,
+ ts int,
+ PRIMARY KEY (a,c),
+ KEY (b)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=1;ttl_col=ts;';
+
+# TTL duration as some random garbage value
+--error ER_RDB_TTL_DURATION_FORMAT
+CREATE TABLE t1 (
+ a bigint(20) UNSIGNED NOT NULL,
+ b int NOT NULL,
+ c int NOT NULL,
+ PRIMARY KEY (a,c),
+ KEY (b)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=abc;';
+
+# TTL col is some column outside of the table
+--error ER_RDB_TTL_COL_FORMAT
+CREATE TABLE t1 (
+ a bigint(20) UNSIGNED NOT NULL,
+ b int NOT NULL,
+ c int NOT NULL,
+ PRIMARY KEY (a,c),
+ KEY (b)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=1;ttl_col=abc;';
+
+# TTL col must have accompanying duration
+--error ER_RDB_TTL_COL_FORMAT
+CREATE TABLE t1 (
+ a bigint(20) UNSIGNED NOT NULL,
+ b int NOT NULL,
+ c int NOT NULL,
+ PRIMARY KEY (a,c),
+ KEY (b)
+) ENGINE=rocksdb
+COMMENT='ttl_col=abc;';
+
+# Make sure it doesn't filter out things early
+CREATE TABLE t1 (
+ a bigint(20) NOT NULL,
+ b int NOT NULL,
+ PRIMARY KEY (a),
+ KEY kb (b)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=500;';
+
+INSERT INTO t1 values (1, 1);
+SELECT COUNT(*) FROM t1 FORCE INDEX (kb);
+
+set global rocksdb_debug_ttl_ignore_pk=1;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_ignore_pk=0;
+
+SELECT COUNT(*) FROM t1 FORCE INDEX (kb);
+DROP TABLE t1;
+
+# Testing altering table comment with updated TTL duration
+# This should trigger a rebuild of the table
+CREATE TABLE t1 (
+ a INT PRIMARY KEY,
+ b INT NOT NULL,
+ KEY kb (b)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=100;';
+
+INSERT INTO t1 values (1, 1);
+SELECT * FROM t1 FORCE INDEX (kb);
+
+set global rocksdb_debug_ttl_rec_ts = -300;
+ALTER TABLE t1 COMMENT = 'ttl_duration=1';
+set global rocksdb_debug_ttl_rec_ts = 0;
+SHOW CREATE TABLE t1;
+
+set global rocksdb_debug_ttl_ignore_pk=1;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_ignore_pk=0;
+
+SELECT COUNT(*) FROM t1 FORCE INDEX (kb);
+DROP TABLE t1;
+
+# Tables with hidden PK disabled
+CREATE TABLE t1 (
+ a INT PRIMARY KEY,
+ b INT,
+ KEY (b)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=100;';
+
+--error ER_RDB_TTL_UNSUPPORTED
+ALTER TABLE t1 DROP PRIMARY KEY;
+
+DROP TABLE t1;
+
+# Test replacing PK, ttl should still work after
+CREATE TABLE t1 (
+ a INT PRIMARY KEY,
+ b INT,
+ KEY kb (b)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=5;';
+
+INSERT INTO t1 VALUES (1,1);
+INSERT INTO t1 VALUES (2,2);
+
+ALTER TABLE t1 DROP PRIMARY KEY, ADD PRIMARY KEY(b);
+set global rocksdb_debug_ttl_snapshot_ts = -3600;
+set global rocksdb_force_flush_memtable_now=1;
+set @@global.rocksdb_compact_cf = 'default';
+set global rocksdb_debug_ttl_snapshot_ts = 0;
+
+--sorted_result
+SELECT COUNT(*) FROM t1 FORCE INDEX (kb);
+
+set global rocksdb_debug_ttl_ignore_pk=1;
+set global rocksdb_debug_ttl_snapshot_ts = 3600;
+set @@global.rocksdb_compact_cf = 'default';
+set global rocksdb_debug_ttl_snapshot_ts = 0;
+set global rocksdb_debug_ttl_ignore_pk=0;
+
+--sorted_result
+SELECT COUNT(*) FROM t1 FORCE INDEX (kb);
+
+DROP TABLE t1;
+
+# Make sure table comment filled with other text before/after will work
+# (basically, it needs semicolon before and after)
+CREATE TABLE t1 (
+ a bigint(20) UNSIGNED NOT NULL,
+ b int,
+ PRIMARY KEY (a,b),
+ KEY kb (b)
+) ENGINE=rocksdb
+COMMENT='asdadfasdfsadfadf ;ttl_duration=1; asfasdfasdfadfa';
+INSERT INTO t1 values (UNIX_TIMESTAMP(), 1);
+SELECT COUNT(*) FROM t1 FORCE INDEX (kb);
+
+set global rocksdb_debug_ttl_snapshot_ts = 3600;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_snapshot_ts = 0;
+
+SELECT COUNT(*) FROM t1 FORCE INDEX (kb);
+
+ALTER TABLE t1 COMMENT = 'adsf;;ttl_duration=5;asfasdfa;ttl_col=a;asdfasdf;';
+set global rocksdb_debug_ttl_rec_ts = 300;
+INSERT INTO t1 values (UNIX_TIMESTAMP(), 2);
+set global rocksdb_debug_ttl_rec_ts = 0;
+set global rocksdb_force_flush_memtable_now=1;
+
+# nothing removed here
+set global rocksdb_compact_cf='default';
+SELECT COUNT(*) FROM t1 FORCE INDEX (kb);
+
+# all removed here
+set global rocksdb_debug_ttl_ignore_pk=1;
+set global rocksdb_debug_ttl_snapshot_ts = 3600;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_snapshot_ts = 0;
+set global rocksdb_debug_ttl_ignore_pk=0;
+SELECT COUNT(*) FROM t1 FORCE INDEX (kb);
+
+DROP TABLE t1;
+
+# Test to make sure that TTL retains original timestamp during update
+CREATE TABLE t1 (
+ a bigint(20) NOT NULL,
+ b int NOT NULL,
+ PRIMARY KEY (a),
+ KEY kb (b)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=5;';
+
+set global rocksdb_debug_ttl_rec_ts = -300;
+INSERT INTO t1 values (1, 0);
+INSERT INTO t1 values (3, 0);
+INSERT INTO t1 values (5, 0);
+set global rocksdb_debug_ttl_rec_ts = 300;
+INSERT INTO t1 values (7, 0);
+INSERT INTO t1 values (9, 0);
+set global rocksdb_debug_ttl_rec_ts = 0;
+
+UPDATE t1 SET a=a+1;
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (kb);
+
+set global rocksdb_debug_ttl_ignore_pk=1;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_ignore_pk=0;
+
+# 1,3,5 should be dropped
+--sorted_result
+SELECT * FROM t1;
+DROP TABLE t1;
+
+# test behaviour on update with TTL column, TTL time can be updated here.
+CREATE TABLE t1 (
+ a INT,
+ b bigint(20) UNSIGNED NOT NULL,
+ PRIMARY KEY (a),
+ KEY kb (b)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=5;ttl_col=b;';
+
+set global rocksdb_debug_ttl_rec_ts = -300;
+INSERT INTO t1 values (1, UNIX_TIMESTAMP());
+INSERT INTO t1 values (3, UNIX_TIMESTAMP());
+INSERT INTO t1 values (5, UNIX_TIMESTAMP());
+INSERT INTO t1 values (7, UNIX_TIMESTAMP());
+
+set global rocksdb_debug_ttl_rec_ts = 300;
+UPDATE t1 SET b=(UNIX_TIMESTAMP()+1) WHERE a < 4;
+set global rocksdb_debug_ttl_rec_ts = 0;
+
+--sorted_result
+SELECT a FROM t1 FORCE INDEX (kb);
+
+set global rocksdb_debug_ttl_ignore_pk=1;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_ignore_pk=0;
+
+# 5 and 7 should be gone here
+--sorted_result
+SELECT a FROM t1 FORCE INDEX (kb);
+DROP TABLE t1;
+
+# Test rows expired stat variable and disable ttl variable
+CREATE TABLE t1 (
+ a bigint(20) NOT NULL,
+ b int NOT NULL,
+ PRIMARY KEY (a),
+ KEY kb (b)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=1;';
+
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values (1, 1);
+INSERT INTO t1 values (2, 1);
+INSERT INTO t1 values (3, 1);
+set global rocksdb_debug_ttl_rec_ts = 0;
+
+set global rocksdb_enable_ttl=0;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+
+select variable_value into @c from information_schema.global_status where variable_name='rocksdb_rows_expired';
+set global rocksdb_enable_ttl=1;
+set global rocksdb_compact_cf='default';
+
+select variable_value-@c from information_schema.global_status where variable_name='rocksdb_rows_expired';
+SELECT COUNT(*) FROM t1 FORCE INDEX (kb);
+DROP TABLE t1;
+
+# Table with TTL won't increment rows expired when no records have been
+# compacted
+CREATE TABLE t1 (
+ a bigint(20) NOT NULL,
+ b int NOT NULL,
+ PRIMARY KEY (a),
+ KEY kb (b)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=100;';
+
+INSERT INTO t1 values (1, 1);
+INSERT INTO t1 values (2, 2);
+INSERT INTO t1 values (3, 3);
+
+select variable_value into @c from information_schema.global_status where variable_name='rocksdb_rows_expired';
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+select variable_value-@c from information_schema.global_status where variable_name='rocksdb_rows_expired';
+
+DROP TABLE t1;
+
+# Test update on TTL column in pk
+CREATE TABLE t1 (
+ a INT,
+ b bigint(20) UNSIGNED NOT NULL,
+ PRIMARY KEY (a, b),
+ KEY kb (b)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=5;ttl_col=b;';
+
+set global rocksdb_debug_ttl_rec_ts = -300;
+INSERT INTO t1 values (1, UNIX_TIMESTAMP());
+INSERT INTO t1 values (3, UNIX_TIMESTAMP());
+INSERT INTO t1 values (5, UNIX_TIMESTAMP());
+INSERT INTO t1 values (7, UNIX_TIMESTAMP());
+
+set global rocksdb_debug_ttl_rec_ts = 300;
+UPDATE t1 SET b=(UNIX_TIMESTAMP()+1) WHERE a < 4;
+set global rocksdb_debug_ttl_rec_ts = 0;
+
+--sorted_result
+SELECT a FROM t1 FORCE INDEX (kb);
+
+set global rocksdb_debug_ttl_ignore_pk=1;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_ignore_pk=0;
+
+# 5 and 7 should be gone here
+--sorted_result
+SELECT a FROM t1 FORCE INDEX (kb);
+DROP TABLE t1;
+
+# test behaviour on update with TTL column, TTL time can be updated here.
+CREATE TABLE t1 (
+ a INT,
+ b bigint(20) UNSIGNED NOT NULL,
+ PRIMARY KEY (a, b)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=5;ttl_col=b;';
+
+set global rocksdb_debug_ttl_rec_ts = -300;
+INSERT INTO t1 values (1, UNIX_TIMESTAMP());
+INSERT INTO t1 values (3, UNIX_TIMESTAMP());
+INSERT INTO t1 values (5, UNIX_TIMESTAMP());
+INSERT INTO t1 values (7, UNIX_TIMESTAMP());
+
+set global rocksdb_debug_ttl_rec_ts = 300;
+UPDATE t1 SET b=(UNIX_TIMESTAMP()+1) WHERE a < 4;
+set global rocksdb_debug_ttl_rec_ts = 0;
+
+--sorted_result
+SELECT a FROM t1;
+
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+
+# 7 should be gone here
+--sorted_result
+SELECT a FROM t1;
+DROP TABLE t1;
+
+# Add index inplace
+CREATE TABLE t1 (
+`a` binary(8) NOT NULL,
+`b` varbinary(64) NOT NULL,
+`c` varbinary(256) NOT NULL,
+`ts` bigint(20) UNSIGNED NOT NULL,
+`value` mediumblob NOT NULL,
+PRIMARY KEY (`b`,`a`,`c`)
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+COMMENT='ttl_duration=1;ttl_col=ts;';
+
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values ('a', 'b', 'c', UNIX_TIMESTAMP(), 'd');
+INSERT INTO t1 values ('d', 'e', 'f', UNIX_TIMESTAMP(), 'g');
+set global rocksdb_debug_ttl_rec_ts = 0;
+SELECT COUNT(*);
+
+set global rocksdb_debug_ttl_ignore_pk = 1;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_ignore_pk = 0;
+
+# nothing filtered out
+SELECT COUNT(*);
+
+CREATE INDEX kb on t1 (b);
+
+set global rocksdb_debug_ttl_ignore_pk = 1;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_ignore_pk = 0;
+
+# should have filtered the rows out since ttl is passed in compaction filter
+SELECT COUNT(*) FROM t1 FORCE INDEX (kb);
+DROP TABLE t1;
+
+# Add index inplace, implicit TTL
+CREATE TABLE t1 (
+`a` binary(8) NOT NULL,
+`b` varbinary(64) NOT NULL,
+`c` varbinary(256) NOT NULL,
+`value` mediumblob NOT NULL,
+PRIMARY KEY (`b`,`a`,`c`)
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+COMMENT='ttl_duration=1';
+
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values ('a', 'b', 'c', 'd');
+INSERT INTO t1 values ('d', 'e', 'f', 'g');
+set global rocksdb_debug_ttl_rec_ts = 0;
+SELECT COUNT(*);
+
+set global rocksdb_debug_ttl_ignore_pk = 1;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_ignore_pk = 0;
+
+# nothing filtered out
+SELECT COUNT(*);
+
+CREATE INDEX kb on t1 (b);
+
+set global rocksdb_debug_ttl_ignore_pk = 1;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_ignore_pk = 0;
+
+# should have filtered the rows out since ttl is passed in compaction filter
+SELECT COUNT(*) FROM t1 FORCE INDEX (kb);
+DROP TABLE t1;
+
+# Add index inplace, TTL column in PK
+CREATE TABLE t1 (
+`a` binary(8) NOT NULL,
+`b` varbinary(64) NOT NULL,
+`c` varbinary(256) NOT NULL,
+`ts` bigint(20) UNSIGNED NOT NULL,
+`value` mediumblob NOT NULL,
+PRIMARY KEY (`b`,`a`,`c`, `ts`)
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+COMMENT='ttl_duration=1;ttl_col=ts;';
+
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values ('a', 'b', 'c', UNIX_TIMESTAMP(), 'd');
+INSERT INTO t1 values ('d', 'e', 'f', UNIX_TIMESTAMP(), 'g');
+set global rocksdb_debug_ttl_rec_ts = 0;
+SELECT COUNT(*);
+
+set global rocksdb_debug_ttl_ignore_pk = 1;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_ignore_pk = 0;
+
+# nothing filtered out
+SELECT COUNT(*);
+
+CREATE INDEX kb on t1 (b);
+
+set global rocksdb_debug_ttl_ignore_pk = 1;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_ignore_pk = 0;
+
+# should have filtered the rows out since ttl is passed in compaction filter
+SELECT COUNT(*) FROM t1 FORCE INDEX (kb);
+DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/ttl_secondary_read_filtering-master.opt b/storage/rocksdb/mysql-test/rocksdb/t/ttl_secondary_read_filtering-master.opt
new file mode 100644
index 00000000000..aefc2f5da34
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/ttl_secondary_read_filtering-master.opt
@@ -0,0 +1 @@
+--rocksdb_default_cf_options=disable_auto_compactions=true
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/ttl_secondary_read_filtering.test b/storage/rocksdb/mysql-test/rocksdb/t/ttl_secondary_read_filtering.test
new file mode 100644
index 00000000000..f6042cc517e
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/ttl_secondary_read_filtering.test
@@ -0,0 +1,503 @@
+--source include/have_debug.inc
+--source include/have_rocksdb.inc
+
+# The purpose of read filtering for tables with TTL is to ensure that during a
+# transaction a key which has expired already but not removed by compaction
+# yet, is not returned to the user.
+#
+# Without this the user might be hit with problems such as disappearing rows
+# within a transaction, etc, because the compaction filter ignores snapshots
+# when filtering keys.
+
+# Basic read filtering test
+CREATE TABLE t1 (
+ a int PRIMARY KEY,
+ b int NOT NULL,
+ KEY kb (b)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=1;';
+
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values (1, 1);
+INSERT INTO t1 values (2, 2);
+set global rocksdb_debug_ttl_rec_ts = 0;
+
+set global rocksdb_force_flush_memtable_now=1;
+
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (kb);
+
+select variable_value into @c from information_schema.global_status where variable_name='rocksdb_rows_expired';
+set global rocksdb_debug_ttl_ignore_pk = 1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_ignore_pk = 0;
+select variable_value-@c from information_schema.global_status where variable_name='rocksdb_rows_expired';
+
+DROP TABLE t1;
+
+# Test that some rows are hidden but others aren't...
+CREATE TABLE t1 (
+ a int PRIMARY KEY,
+ b BIGINT UNSIGNED NOT NULL,
+ KEY kb (b)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=10;';
+
+set global rocksdb_debug_ttl_rec_ts = -300;
+INSERT INTO t1 values (1, UNIX_TIMESTAMP());
+set global rocksdb_debug_ttl_rec_ts = 300;
+INSERT INTO t1 values (2, UNIX_TIMESTAMP());
+INSERT INTO t1 values (3, UNIX_TIMESTAMP());
+set global rocksdb_debug_ttl_rec_ts = 0;
+
+set global rocksdb_force_flush_memtable_now=1;
+
+--echo # 1 should be hidden
+--sorted_result
+SELECT a FROM t1 FORCE INDEX (PRIMARY);
+--sorted_result
+SELECT a FROM t1 FORCE INDEX (kb);
+
+set global rocksdb_debug_ttl_ignore_pk = 1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_ignore_pk = 0;
+
+--echo # none should be hidden yet, compaction runs but records aren't expired
+--sorted_result
+SELECT a FROM t1 FORCE INDEX (PRIMARY);
+--sorted_result
+SELECT a FROM t1 FORCE INDEX (kb);
+
+--echo # all should be hidden now, even though compaction hasn't run again
+set global rocksdb_debug_ttl_read_filter_ts = -310;
+--sorted_result
+SELECT a FROM t1 FORCE INDEX (PRIMARY);
+--sorted_result
+SELECT a FROM t1 FORCE INDEX (kb);
+set global rocksdb_debug_ttl_read_filter_ts = 0;
+
+DROP TABLE t1;
+
+# Test the filtering code explicitly.
+CREATE TABLE t1 (
+ a int PRIMARY KEY,
+ b int NOT NULL,
+ KEY kb (b)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=1;';
+
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values (1, 1);
+INSERT INTO t1 values (3, 3);
+INSERT INTO t1 values (5, 5);
+INSERT INTO t1 values (7, 7);
+set global rocksdb_debug_ttl_rec_ts = 0;
+
+--echo # should return nothing.
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (kb);
+
+# disable filtering
+set global rocksdb_enable_ttl_read_filtering=0;
+
+--echo # should return everything
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (kb);
+
+# enable filtering
+set global rocksdb_enable_ttl_read_filtering=1;
+
+--echo # should return nothing.
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (kb);
+
+DROP TABLE t1;
+
+# Compact away the dropped data
+set global rocksdb_compact_cf= 'default';
+
+--echo # Read filtering index scan tests (None of these queries should return any results)
+CREATE TABLE t1 (
+ a int,
+ b int,
+ c int,
+ PRIMARY KEY (a,b,c),
+ KEY kb (b)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=1;';
+
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values (0,0,0);
+INSERT INTO t1 values (0,0,1);
+INSERT INTO t1 values (0,1,0);
+INSERT INTO t1 values (0,1,1);
+INSERT INTO t1 values (1,1,2);
+INSERT INTO t1 values (1,2,1);
+INSERT INTO t1 values (1,2,2);
+INSERT INTO t1 values (1,2,3);
+set global rocksdb_debug_ttl_rec_ts = 0;
+
+select variable_value into @c from information_schema.global_status where variable_name='rocksdb_rows_expired';
+
+set global rocksdb_force_flush_memtable_now=1;
+
+# HA_READ_KEY_EXACT, using full key
+SELECT * FROM t1 FORCE INDEX (PRIMARY) WHERE a=1 AND b=2 AND c=2;
+SELECT * FROM t1 FORCE INDEX (kb) WHERE a=1 AND b=2 AND c=2;
+
+# HA_READ_KEY_EXACT, not using full key
+SELECT * FROM t1 FORCE INDEX (PRIMARY) WHERE a = 1;
+SELECT * FROM t1 FORCE INDEX (kb) WHERE a = 1;
+
+# HA_READ_BEFORE_KEY, not using full key
+SELECT max(a) FROM t1 FORCE INDEX (PRIMARY) WHERE a < 3;
+SELECT max(a) FROM t1 FORCE INDEX (kb) WHERE a < 3;
+
+# HA_READ_BEFORE_KEY, using full key
+SELECT max(a) FROM t1 FORCE INDEX (PRIMARY) WHERE a < 2 AND b = 1 AND c < 3;
+SELECT max(a) FROM t1 FORCE INDEX (kb) WHERE a < 2 AND b = 1 AND c < 3;
+
+# HA_READ_KEY_OR_NEXT
+SELECT min(a) FROM t1 FORCE INDEX (PRIMARY) WHERE a >= 1;
+SELECT min(a) FROM t1 FORCE INDEX (kb) WHERE a >= 1;
+
+# HA_READ_AFTER_KEY, /* Find next rec. after key-record */
+SELECT min(a) FROM t1 FORCE INDEX (PRIMARY) WHERE a > 1;
+SELECT min(a) FROM t1 FORCE INDEX (kb) WHERE a > 1;
+
+# HA_READ_PREFIX_LAST, /* Last key with the same prefix */
+SELECT * FROM t1 FORCE INDEX (PRIMARY) WHERE a=1 and b in (1) order by c desc;
+SELECT * FROM t1 FORCE INDEX (kb) WHERE a=1 and b in (1) order by c desc;
+
+# HA_READ_PREFIX_LAST_OR_PREV, /* Last or prev key with the same prefix */
+SELECT max(a) FROM t1 FORCE INDEX (PRIMARY) WHERE a <=10;
+SELECT max(a) FROM t1 FORCE INDEX (kb) WHERE a <=10;
+
+# need to test read_range_first()
+# calls into read_range_next() and uses compare_keys() to see if its out of
+# range
+SELECT a FROM t1 FORCE INDEX (PRIMARY) WHERE a > 0 and a <= 2;
+SELECT a FROM t1 FORCE INDEX (kb) WHERE a > 0 and a <= 2;
+
+select variable_value-@c from information_schema.global_status where variable_name='rocksdb_rows_expired';
+set global rocksdb_debug_ttl_ignore_pk = 1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_ignore_pk = 0;
+select variable_value-@c from information_schema.global_status where variable_name='rocksdb_rows_expired';
+DROP TABLE t1;
+
+--echo # Attempt to update expired value, should filter out
+set global rocksdb_force_flush_memtable_now=1;
+CREATE TABLE t1 (
+ a int PRIMARY KEY
+) ENGINE=rocksdb
+COMMENT='ttl_duration=1;';
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values (1);
+set global rocksdb_debug_ttl_rec_ts = 0;
+
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+--sorted_result
+SELECT * FROM t1;
+
+--echo # No error is thrown here, under the hood index_next_with_direction is
+--echo # filtering out the record from being seen in the first place.
+UPDATE t1 set a = 1;
+DROP TABLE t1;
+
+--echo # Ensure no rows can disappear in the middle of long-running transactions
+--echo # Also ensure repeatable-read works as expected
+--source include/count_sessions.inc
+connect (con1,localhost,root,,);
+connect (con2,localhost,root,,);
+
+CREATE TABLE t1 (
+ a int PRIMARY KEY,
+ b int NOT NULL,
+ KEY kb (b)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=5;';
+
+INSERT INTO t1 values (1, 1);
+
+connection con1;
+--echo # Creating Snapshot (start transaction)
+BEGIN;
+
+# We need the below snippet in case establishing con1 took an arbitrary
+# amount of time. See https://github.com/facebook/mysql-5.6/pull/617#discussion_r120525391.
+--disable_query_log
+--let $snapshot_size= `SELECT COUNT(*) FROM t1`
+--let $i= 0
+while ($snapshot_size != 1)
+{
+ if ($i == 1000)
+ {
+ --die Your testing host is too slow for reasonable TTL testing
+ }
+
+ $i++;
+ ROLLBACK;
+ INSERT INTO t1 values (1,1);
+ BEGIN;
+ --let $snapshot_size= `SELECT COUNT(*) FROM t1`
+}
+--enable_query_log
+
+--echo # Nothing filtered out here
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (kb);
+
+--sleep 5
+
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (kb); # <= shouldn't be filtered out here
+
+--echo # Switching to connection 2
+connection con2;
+--echo # compaction doesn't do anything since con1 snapshot is still open
+set global rocksdb_debug_ttl_ignore_pk = 1;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_ignore_pk = 0;
+--echo # read filtered out, because on a different connection, on
+--echo # this connection the records have 'expired' already so they are filtered out
+--echo # even though they have not yet been removed by compaction
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (kb);
+
+--echo # Switching to connection 1
+connection con1;
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (kb); # <= shouldn't be filtered out here
+
+UPDATE t1 set a = a + 1;
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (kb); # <= shouldn't be filtered out here
+
+COMMIT;
+
+--sorted_result # <= filtered out here because time has passed.
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (kb);
+
+DROP TABLE t1;
+disconnect con1;
+disconnect con2;
+
+#transaction 1, create a snapshot and select * => returns nothing.
+#transaction 2, insert into table, flush
+#transaction 1, select * => returns nothing, but the snapshot should prevent the compaction code from removing the rows, no matter what the ttl duration is.
+#transaction 2, select * -> sees nothing, disable filter, select * -> sees everything, enable filter, select * -> sees nothing.
+connect (con1,localhost,root,,);
+connect (con2,localhost,root,,);
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+
+CREATE TABLE t1 (
+ a int PRIMARY KEY,
+ b int NOT NULL,
+ KEY kb (b)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=1;';
+
+--echo # On Connection 1
+connection con1;
+--echo # Creating Snapshot (start transaction)
+BEGIN;
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (kb);
+# Sleep 5 secs after creating snapshot, this ensures any records created after
+# this can't be removed by compaction until this snapshot is released.
+--sleep 5
+
+--echo # On Connection 2
+connection con2;
+set global rocksdb_debug_ttl_rec_ts = -2;
+INSERT INTO t1 values (1, 1);
+INSERT INTO t1 values (3, 3);
+INSERT INTO t1 values (5, 5);
+INSERT INTO t1 values (7, 7);
+set global rocksdb_debug_ttl_rec_ts = 0;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+
+--echo # On Connection 1
+connection con1;
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (kb);
+
+--echo # On Connection 2
+connection con2;
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (kb);
+set global rocksdb_enable_ttl_read_filtering=0;
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (kb);
+set global rocksdb_enable_ttl_read_filtering=1;
+
+disconnect con2;
+disconnect con1;
+connection default;
+
+DROP TABLE t1;
+# Wait till we reached the initial number of concurrent sessions
+--source include/wait_until_count_sessions.inc
+
+# Test that index_next_with_direction skips records properly
+CREATE TABLE t1 (
+ a int,
+ b int,
+ ts bigint(20) UNSIGNED NOT NULL,
+ PRIMARY KEY (a),
+ KEY kb (b)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=1;ttl_col=ts;';
+
+set global rocksdb_debug_ttl_rec_ts = 100;
+INSERT INTO t1 VALUES (1, 1, UNIX_TIMESTAMP());
+INSERT INTO t1 VALUES (2, 2, UNIX_TIMESTAMP());
+INSERT INTO t1 VALUES (3, 3, UNIX_TIMESTAMP());
+INSERT INTO t1 VALUES (4, 4, UNIX_TIMESTAMP());
+INSERT INTO t1 VALUES (5, 5, UNIX_TIMESTAMP());
+INSERT INTO t1 VALUES (6, 6, UNIX_TIMESTAMP());
+INSERT INTO t1 VALUES (7, 7, UNIX_TIMESTAMP());
+INSERT INTO t1 VALUES (8, 8, UNIX_TIMESTAMP());
+INSERT INTO t1 VALUES (9, 9, UNIX_TIMESTAMP());
+INSERT INTO t1 VALUES (10, 10, UNIX_TIMESTAMP());
+set global rocksdb_debug_ttl_rec_ts = 0;
+
+set global rocksdb_force_flush_memtable_now=1;
+--echo # None are expired
+SELECT a, b FROM t1 FORCE INDEX (kb);
+
+set global rocksdb_debug_ttl_rec_ts = -100;
+UPDATE t1 SET ts=(UNIX_TIMESTAMP()+1) WHERE a IN (4, 7);
+set global rocksdb_debug_ttl_rec_ts = 0;
+
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+
+--echo # 4 and 7 should be gone
+SELECT a, b FROM t1 FORCE INDEX (kb);
+
+DROP TABLE t1;
+
+# Test range scans with various conditionals
+CREATE TABLE t1 (
+ c1 INT,
+ c2 INT,
+ name VARCHAR(25) NOT NULL,
+ PRIMARY KEY (c1, c2),
+ KEY kc2 (c2)
+) ENGINE=ROCKSDB
+COMMENT='ttl_duration=1;';
+
+set global rocksdb_debug_ttl_rec_ts = -1200;
+INSERT INTO t1 values (1,1,'a');
+INSERT INTO t1 values (2,2,'b');
+set global rocksdb_debug_ttl_rec_ts = 1200;
+INSERT INTO t1 values (3,3,'c');
+INSERT INTO t1 values (4,4,'d');
+set global rocksdb_debug_ttl_rec_ts = -1200;
+INSERT INTO t1 values (5,5,'e');
+INSERT INTO t1 values (6,6,'f');
+set global rocksdb_debug_ttl_rec_ts = 1200;
+INSERT INTO t1 values (7,7,'g');
+INSERT INTO t1 values (8,8,'h');
+set global rocksdb_debug_ttl_rec_ts = 0;
+
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (kc2);
+
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (PRIMARY) WHERE c1 > 5;
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (kc2) WHERE c2 > 5;
+
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (PRIMARY) WHERE 3 < c1 AND c1 < 6;
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (kc2) WHERE 3 < c2 AND c2 < 6;
+
+DROP TABLE t1;
+
+# Test range scans with varying expirations
+CREATE TABLE t1 (
+ a int,
+ b int,
+ PRIMARY KEY (a),
+ KEY kb (b)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=1800;';
+
+set global rocksdb_debug_ttl_rec_ts = 0;
+INSERT INTO t1 values (1,1);
+INSERT INTO t1 values (2,2);
+INSERT INTO t1 values (7,7);
+INSERT INTO t1 values (10,10);
+INSERT INTO t1 values (11,11);
+INSERT INTO t1 values (12,12);
+set global rocksdb_debug_ttl_rec_ts = 450;
+INSERT INTO t1 values (3,3);
+INSERT INTO t1 values (4,4);
+INSERT INTO t1 values (8,8);
+INSERT INTO t1 values (16,16);
+INSERT INTO t1 values (17,17);
+INSERT INTO t1 values (18,18);
+set global rocksdb_debug_ttl_rec_ts = 900;
+INSERT INTO t1 values (5,5);
+INSERT INTO t1 values (6,6);
+INSERT INTO t1 values (9,9);
+INSERT INTO t1 values (13,13);
+INSERT INTO t1 values (14,14);
+INSERT INTO t1 values (15,15);
+set global rocksdb_debug_ttl_rec_ts = 0;
+
+--echo # Should see everything
+SELECT * FROM t1;
+
+--echo # Should have no records from the first group
+set global rocksdb_debug_ttl_read_filter_ts = -1800;
+SELECT * FROM t1;
+SELECT * FROM t1 FORCE INDEX (kb) WHERE a > 5 AND a < 15;
+
+--echo # Should only have records from the last group
+set global rocksdb_debug_ttl_read_filter_ts = -1800 - 450;
+SELECT * FROM t1;
+SELECT * FROM t1 FORCE INDEX (kb) WHERE a < 10;
+
+--echo # Should be empty
+set global rocksdb_debug_ttl_read_filter_ts = -1800 - 900;
+SELECT * FROM t1;
+
+set global rocksdb_debug_ttl_read_filter_ts = 0;
+DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/ttl_secondary_read_filtering_multiple_index.test b/storage/rocksdb/mysql-test/rocksdb/t/ttl_secondary_read_filtering_multiple_index.test
new file mode 100644
index 00000000000..4f9788ce33c
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/ttl_secondary_read_filtering_multiple_index.test
@@ -0,0 +1,87 @@
+--source include/have_debug.inc
+--source include/have_rocksdb.inc
+
+# Multiple indexes, trigger compaction on sk and check consistency
+CREATE TABLE t1 (
+ a int NOT NULL,
+ b int NOT NULL,
+ c int NOT NULL,
+ PRIMARY KEY (a),
+ KEY kb (b) COMMENT 'kb',
+ KEY kc (c) COMMENT 'kc'
+) ENGINE=ROCKSDB
+COMMENT='ttl_duration=1';
+
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values (1, 1, 1);
+INSERT INTO t1 values (2, 2, 2);
+set global rocksdb_debug_ttl_rec_ts = 100;
+INSERT INTO t1 values (3, 3, 3);
+set global rocksdb_debug_ttl_rec_ts = 0;
+
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='kb';
+
+# Results should be consistent
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+SELECT * FROM t1 FORCE INDEX (kb);
+SELECT * FROM t1 FORCE INDEX (kc);
+
+DROP TABLE t1;
+
+# Trigger compaction on pk and check consistency
+CREATE TABLE t1 (
+ a int NOT NULL,
+ b int NOT NULL,
+ c int NOT NULL,
+ PRIMARY KEY (a),
+ KEY kb (b) COMMENT 'kb',
+ KEY kc (c) COMMENT 'kc'
+) ENGINE=ROCKSDB
+COMMENT='ttl_duration=1';
+
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values (1, 1, 1);
+INSERT INTO t1 values (2, 2, 2);
+set global rocksdb_debug_ttl_rec_ts = 100;
+INSERT INTO t1 values (3, 3, 3);
+set global rocksdb_debug_ttl_rec_ts = 0;
+
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+
+# Results should be consistent
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+SELECT * FROM t1 FORCE INDEX (kb);
+SELECT * FROM t1 FORCE INDEX (kc);
+
+DROP TABLE t1;
+
+# Trigger compaction on pk and sk and check consistency
+CREATE TABLE t1 (
+ a int NOT NULL,
+ b int NOT NULL,
+ c int NOT NULL,
+ PRIMARY KEY (a),
+ KEY kb (b) COMMENT 'kb',
+ KEY kc (c) COMMENT 'kc'
+) ENGINE=ROCKSDB
+COMMENT='ttl_duration=1';
+
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values (1, 1, 1);
+INSERT INTO t1 values (2, 2, 2);
+set global rocksdb_debug_ttl_rec_ts = 100;
+INSERT INTO t1 values (3, 3, 3);
+set global rocksdb_debug_ttl_rec_ts = 0;
+
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_compact_cf='kb';
+
+# Results should be consistent
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+SELECT * FROM t1 FORCE INDEX (kb);
+SELECT * FROM t1 FORCE INDEX (kc);
+
+DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/ttl_secondary_with_partitions-master.opt b/storage/rocksdb/mysql-test/rocksdb/t/ttl_secondary_with_partitions-master.opt
new file mode 100644
index 00000000000..10a88c30361
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/ttl_secondary_with_partitions-master.opt
@@ -0,0 +1 @@
+--rocksdb_enable_ttl_read_filtering=0
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/ttl_secondary_with_partitions.test b/storage/rocksdb/mysql-test/rocksdb/t/ttl_secondary_with_partitions.test
new file mode 100644
index 00000000000..c10c83f9f9d
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/ttl_secondary_with_partitions.test
@@ -0,0 +1,300 @@
+--source include/have_debug.inc
+--source include/have_rocksdb.inc
+--source include/have_partition.inc
+
+#
+# Create a table with multiple partitions, but in the comment don't specify
+# that per-partition based column families (CF) should be created. Expect that
+# default CF will be used and new one won't be created.
+#
+# In addition, specify TTL on one of the partitions. Insert a few things
+# inside all the partitions, verify after compaction that the rows inside the
+# partition with TTL has disappeared.
+#
+# Add secondary keys to all tables to test compatibility.
+#
+CREATE TABLE t1 (
+ c1 INT,
+ c2 INT,
+ PRIMARY KEY (`c1`),
+ KEY kc2 (`c2`)
+) ENGINE=ROCKSDB
+COMMENT="custom_p0_ttl_duration=1;"
+PARTITION BY LIST(c1) (
+ PARTITION custom_p0 VALUES IN (1, 4, 7),
+ PARTITION custom_p1 VALUES IN (2, 5, 8),
+ PARTITION custom_p2 VALUES IN (3, 6, 9)
+);
+
+set global rocksdb_debug_ttl_rec_ts = -3600;
+INSERT INTO t1 values (1, 1);
+INSERT INTO t1 values (2, 2);
+INSERT INTO t1 values (3, 3);
+INSERT INTO t1 values (4, 4);
+INSERT INTO t1 values (5, 5);
+INSERT INTO t1 values (6, 6);
+INSERT INTO t1 values (7, 7);
+INSERT INTO t1 values (8, 8);
+INSERT INTO t1 values (9, 9);
+set global rocksdb_debug_ttl_rec_ts = 0;
+
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (kc2);
+
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+
+# 1,4, and 7 should be gone
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (kc2);
+DROP TABLE t1;
+
+#
+# Create a table with multiple partitions and request for separate CF to be
+# created per every partition. As a result we expect three different CF-s to be
+# created.
+#
+# In addition, specify TTL on some of the partitions. Insert a few things
+# inside all the partitions, verify after compaction that the rows inside the
+# partition with TTL has disappeared.
+#
+CREATE TABLE t1 (
+ c1 INT,
+ c2 INT,
+ name VARCHAR(25) NOT NULL,
+ PRIMARY KEY (`c1`, `c2`) COMMENT 'custom_p0_cfname=foo;custom_p1_cfname=my_custom_cf;custom_p2_cfname=baz',
+ KEY kc2 (`c2`)
+) ENGINE=ROCKSDB
+COMMENT="custom_p0_ttl_duration=1;custom_p1_ttl_duration=7;"
+PARTITION BY LIST(c1) (
+ PARTITION custom_p0 VALUES IN (1, 4, 7),
+ PARTITION custom_p1 VALUES IN (2, 5, 8),
+ PARTITION custom_p2 VALUES IN (3, 6, 9)
+);
+
+set global rocksdb_debug_ttl_rec_ts = -1200;
+INSERT INTO t1 values (1,1,'a');
+INSERT INTO t1 values (4,4,'aaaa');
+INSERT INTO t1 values (7,7,'aaaaaaa');
+
+set global rocksdb_debug_ttl_rec_ts = 1200;
+INSERT INTO t1 values (2,2,'aa');
+INSERT INTO t1 values (3,3,'aaa');
+INSERT INTO t1 values (5,5,'aaaaa');
+INSERT INTO t1 values (6,6,'aaaaaa');
+INSERT INTO t1 values (8,8,'aaaaaaaa');
+INSERT INTO t1 values (9,9,'aaaaaaaaa');
+set global rocksdb_debug_ttl_rec_ts = 0;
+
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (kc2);
+
+set global rocksdb_force_flush_memtable_now=1;
+set @@global.rocksdb_compact_cf = 'foo';
+set @@global.rocksdb_compact_cf = 'my_custom_cf';
+set @@global.rocksdb_compact_cf = 'default';
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (kc2);
+
+set global rocksdb_debug_ttl_snapshot_ts = 3600;
+set @@global.rocksdb_compact_cf = 'foo';
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (kc2);
+
+# Now 2,5,8 should be removed (this verifies that TTL is only operating on the
+# particular CF.
+set @@global.rocksdb_compact_cf = 'my_custom_cf';
+set @@global.rocksdb_compact_cf = 'default';
+set global rocksdb_debug_ttl_snapshot_ts = 0;
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (kc2);
+
+DROP TABLE t1;
+
+#
+# Create a table with CF-s/TTL per partition and verify that ALTER TABLE + DROP
+# PRIMARY, ADD PRIMARY work for that scenario and data is persisted/filtered as
+# expected.
+#
+CREATE TABLE t1 (
+ c1 INT,
+ c2 INT,
+ name VARCHAR(25) NOT NULL,
+ event DATE,
+ PRIMARY KEY (`c1`, `c2`) COMMENT 'custom_p0_cfname=foo;custom_p1_cfname=bar;custom_p2_cfname=baz;',
+ KEY kc2 (c2)
+) ENGINE=ROCKSDB
+COMMENT="custom_p0_ttl_duration=9999;custom_p2_ttl_duration=5;"
+PARTITION BY LIST(c1) (
+ PARTITION custom_p0 VALUES IN (1, 2, 3),
+ PARTITION custom_p1 VALUES IN (4, 5, 6),
+ PARTITION custom_p2 VALUES IN (7, 8, 9)
+);
+
+INSERT INTO t1 VALUES (1, 1, "one", null);
+INSERT INTO t1 VALUES (2, 2, "two", null);
+INSERT INTO t1 VALUES (3, 3, "three", null);
+
+INSERT INTO t1 VALUES (4, 4, "four", null);
+INSERT INTO t1 VALUES (5, 5, "five", null);
+INSERT INTO t1 VALUES (6, 6, "six", null);
+
+INSERT INTO t1 VALUES (7, 7, "seven", null);
+INSERT INTO t1 VALUES (8, 8, "eight", null);
+INSERT INTO t1 VALUES (9, 9, "nine", null);
+
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (kc2);
+
+# TTL should be reset after alter table
+set global rocksdb_debug_ttl_rec_ts = 600;
+ALTER TABLE t1 DROP PRIMARY KEY, ADD PRIMARY KEY(`c2`,`c1`) COMMENT 'custom_p0_cfname=foo;custom_p1_cfname=bar;custom_p2_cfname=baz;';
+set global rocksdb_debug_ttl_rec_ts = 0;
+SHOW CREATE TABLE t1;
+
+# ...so nothing should be gone here
+set global rocksdb_debug_ttl_snapshot_ts = 100;
+set global rocksdb_force_flush_memtable_now=1;
+set @@global.rocksdb_compact_cf = 'baz';
+# Filter out expired secondary keys too
+set @@global.rocksdb_compact_cf = 'default';
+set global rocksdb_debug_ttl_snapshot_ts = 0;
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (kc2);
+
+set global rocksdb_debug_ttl_snapshot_ts = 1200;
+set @@global.rocksdb_compact_cf = 'foo';
+set @@global.rocksdb_compact_cf = 'baz';
+# Filter out expired secondary keys too
+set @@global.rocksdb_compact_cf = 'default';
+set global rocksdb_debug_ttl_snapshot_ts = 0;
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (kc2);
+
+DROP TABLE t1;
+
+#
+# Create a table with non-partitioned TTL duration, with partitioned TTL
+# columns
+#
+# In this case the same TTL duration will be applied across different TTL
+# columns in different partitions, except for in p2 where we override the ttl
+# duration.
+#
+CREATE TABLE t1 (
+ c1 BIGINT,
+ c2 BIGINT UNSIGNED NOT NULL,
+ name VARCHAR(25) NOT NULL,
+ event DATE,
+ PRIMARY KEY (`c1`) COMMENT 'custom_p0_cfname=foo;custom_p1_cfname=bar;custom_p2_cfname=baz;',
+ KEY kc2 (`c2`)
+) ENGINE=ROCKSDB
+COMMENT="ttl_duration=1;custom_p1_ttl_duration=100;custom_p1_ttl_col=c2;custom_p2_ttl_duration=5000;"
+PARTITION BY LIST(c1) (
+ PARTITION custom_p0 VALUES IN (1, 2, 3),
+ PARTITION custom_p1 VALUES IN (4, 5, 6),
+ PARTITION custom_p2 VALUES IN (7, 8, 9)
+);
+
+set global rocksdb_debug_ttl_rec_ts = -300;
+INSERT INTO t1 VALUES (1, UNIX_TIMESTAMP(), "one", null);
+INSERT INTO t1 VALUES (2, UNIX_TIMESTAMP(), "two", null);
+INSERT INTO t1 VALUES (3, UNIX_TIMESTAMP(), "three", null);
+set global rocksdb_debug_ttl_rec_ts = 0;
+
+INSERT INTO t1 VALUES (4, UNIX_TIMESTAMP(), "four", null);
+INSERT INTO t1 VALUES (5, UNIX_TIMESTAMP(), "five", null);
+INSERT INTO t1 VALUES (6, UNIX_TIMESTAMP(), "six", null);
+
+INSERT INTO t1 VALUES (7, UNIX_TIMESTAMP(), "seven", null);
+INSERT INTO t1 VALUES (8, UNIX_TIMESTAMP(), "eight", null);
+INSERT INTO t1 VALUES (9, UNIX_TIMESTAMP(), "nine", null);
+
+set global rocksdb_force_flush_memtable_now=1;
+set @@global.rocksdb_compact_cf = 'foo';
+set @@global.rocksdb_compact_cf = 'baz';
+set @@global.rocksdb_compact_cf = 'bar';
+# Filter out expired secondary keys too
+set @@global.rocksdb_compact_cf = 'default';
+
+# here we expect only 1,2,3 to be gone, ttl implicit.
+--sorted_result
+SELECT c1 FROM t1 FORCE INDEX (PRIMARY);
+--sorted_result
+SELECT c1 FROM t1 FORCE INDEX (kc2);
+
+# here we expect only 4,5,6 to be gone, ttl based on column c2.
+set global rocksdb_debug_ttl_snapshot_ts = 600;
+set @@global.rocksdb_compact_cf = 'bar';
+# Filter out expired secondary keys too
+set @@global.rocksdb_compact_cf = 'default';
+set global rocksdb_debug_ttl_snapshot_ts = 0;
+--sorted_result
+SELECT c1 FROM t1 FORCE INDEX (PRIMARY);
+--sorted_result
+SELECT c1 FROM t1 FORCE INDEX (kc2);
+
+# at this point only 7,8,9 should be left..
+DROP TABLE t1;
+
+#
+# Add index inplace
+#
+CREATE TABLE t1 (
+ c1 INT,
+ c2 INT,
+ PRIMARY KEY (`c1`) COMMENT 'custom_p0_cfname=foo;'
+) ENGINE=ROCKSDB
+COMMENT="custom_p0_ttl_duration=1;"
+PARTITION BY LIST(c1) (
+ PARTITION custom_p0 VALUES IN (1, 4, 7),
+ PARTITION custom_p1 VALUES IN (2, 5, 8),
+ PARTITION custom_p2 VALUES IN (3, 6, 9)
+);
+
+set global rocksdb_debug_ttl_rec_ts = -3600;
+INSERT INTO t1 values (1, 1);
+INSERT INTO t1 values (4, 4);
+INSERT INTO t1 values (7, 7);
+set global rocksdb_debug_ttl_rec_ts = 0;
+INSERT INTO t1 values (2, 2);
+INSERT INTO t1 values (3, 3);
+INSERT INTO t1 values (5, 5);
+INSERT INTO t1 values (6, 6);
+INSERT INTO t1 values (8, 8);
+INSERT INTO t1 values (9, 9);
+
+# Nothing filtered out
+--sorted_result
+SELECT * FROM t1;
+
+CREATE INDEX kc2 on t1 (c2);
+
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='foo';
+set global rocksdb_compact_cf='default';
+
+# 1,4, and 7 should be gone
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (kc2);
+DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/type_binary.inc b/storage/rocksdb/mysql-test/rocksdb/t/type_binary.inc
new file mode 100644
index 00000000000..8e2b0e8549b
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/type_binary.inc
@@ -0,0 +1,45 @@
+#
+# BINARY column types
+#
+
+# $extra_col_options is set by a calling test
+
+--disable_warnings
+DROP TABLE IF EXISTS t1;
+--enable_warnings
+
+eval CREATE TABLE t1 (
+ b BINARY $extra_col_opts,
+ b0 BINARY(0) $extra_col_opts,
+ b1 BINARY(1) $extra_col_opts,
+ b20 BINARY(20) $extra_col_opts,
+ b255 BINARY(255) $extra_col_opts,
+ pk BINARY PRIMARY KEY
+) ENGINE=rocksdb;
+
+SHOW COLUMNS IN t1;
+
+# Valid values
+
+INSERT INTO t1 VALUES ('','','','','','');
+INSERT INTO t1 VALUES ('a','','b','abcdefghi klmnopqrst', 'Creating an article for the Knowledgebase is similar to asking questions. First, navigate to the category where you feel the article should be. Once there, double check that an article doesn\'t already exist which would work.','a');
+
+--sorted_result
+SELECT HEX(b), HEX(b0), HEX(b1), HEX(b20), HEX(b255), HEX(pk) FROM t1 ORDER BY pk;
+
+# Invalid values
+
+INSERT INTO t1 VALUES ('abc', 'a', 'abc', REPEAT('a',21), REPEAT('x',256),'b');
+--error ER_DUP_ENTRY
+INSERT INTO t1 SELECT b255, b255, b255, b255, CONCAT('a',b255,b255), 'c' FROM t1;
+
+--sorted_result
+SELECT HEX(b), HEX(b0), HEX(b1), HEX(b20), HEX(b255), HEX(pk) FROM t1 ORDER BY pk;
+
+--error ER_TOO_BIG_FIELDLENGTH
+eval ALTER TABLE t1 ADD COLUMN b257 BINARY(257) $extra_col_opts;
+
+SHOW COLUMNS IN t1;
+
+DROP TABLE t1;
+
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/type_binary.test b/storage/rocksdb/mysql-test/rocksdb/t/type_binary.test
new file mode 100644
index 00000000000..91749e36a2e
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/type_binary.test
@@ -0,0 +1,8 @@
+--source include/have_rocksdb.inc
+
+#
+# BINARY column types
+#
+
+--source type_binary.inc
+
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/type_binary_indexes-master.opt b/storage/rocksdb/mysql-test/rocksdb/t/type_binary_indexes-master.opt
new file mode 100644
index 00000000000..6ad42e58aa2
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/type_binary_indexes-master.opt
@@ -0,0 +1 @@
+--rocksdb_debug_optimizer_n_rows=1000 --rocksdb_records_in_range=50
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/type_binary_indexes.test b/storage/rocksdb/mysql-test/rocksdb/t/type_binary_indexes.test
new file mode 100644
index 00000000000..f4360ed629b
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/type_binary_indexes.test
@@ -0,0 +1,99 @@
+--source include/have_rocksdb.inc
+
+#
+# BINARY and VARBINARY columns with indexes
+#
+
+#######################################
+# TODO:
+# A part of the test is disabled
+# because unique keys are not supported
+#######################################
+
+SET @ORIG_PAUSE_BACKGROUND_WORK = @@ROCKSDB_PAUSE_BACKGROUND_WORK;
+SET GLOBAL ROCKSDB_PAUSE_BACKGROUND_WORK = 1;
+
+--disable_warnings
+DROP TABLE IF EXISTS t1;
+--enable_warnings
+
+CREATE TABLE t1 (b BINARY,
+ b20 BINARY(20) PRIMARY KEY,
+ v16 VARBINARY(16),
+ v128 VARBINARY(128)
+) ENGINE=rocksdb;
+
+SHOW INDEX IN t1;
+
+INSERT INTO t1 (b,b20,v16,v128) VALUES ('a','char1','varchar1a','varchar1b'),('a','char2','varchar2a','varchar2b'),('b','char3','varchar1a','varchar1b'),('c','char4','varchar3a','varchar3b');
+
+--replace_column 9 #
+EXPLAIN SELECT HEX(b20) FROM t1 ORDER BY b20;
+SELECT HEX(b20) FROM t1 ORDER BY b20;
+
+--replace_column 9 #
+EXPLAIN SELECT HEX(b20) FROM t1 IGNORE INDEX (PRIMARY) ORDER BY b20 DESC;
+SELECT HEX(b20) FROM t1 ORDER BY b20 DESC;
+
+DROP TABLE t1;
+
+--disable_parsing
+--error ER_GET_ERRMSG
+CREATE TABLE t1 (b BINARY,
+ b20 BINARY(20),
+ v16 VARBINARY(16),
+ v128 VARBINARY(128),
+ UNIQUE INDEX b_v (b,v128),
+ pk VARBINARY(10) PRIMARY KEY
+) ENGINE=rocksdb;
+
+
+SHOW INDEX IN t1;
+
+INSERT INTO t1 (b,b20,v16,v128) VALUES ('a','char1','varchar1a','varchar1b'),('a','char2','varchar2a','varchar2b'),('b','char3','varchar1a','varchar1b'),('c','char4','varchar3a','varchar3b');
+
+--replace_column 9 #
+EXPLAIN SELECT HEX(b), HEX(v128) FROM t1 WHERE b != 'a' AND v128 > 'varchar';
+--sorted_result
+SELECT HEX(b), HEX(v128) FROM t1 WHERE b != 'a' AND v128 > 'varchar';
+
+--replace_column 9 #
+EXPLAIN SELECT HEX(b), HEX(v128) FROM t1 USE INDEX (b_v) WHERE b != 'a' AND v128 > 'varchar';
+--sorted_result
+SELECT HEX(b), HEX(v128) FROM t1 USE INDEX (b_v) WHERE b != 'a' AND v128 > 'varchar';
+
+--replace_column 9 #
+EXPLAIN SELECT HEX(v128), COUNT(*) FROM t1 GROUP BY HEX(v128);
+--sorted_result
+SELECT HEX(v128), COUNT(*) FROM t1 GROUP BY HEX(v128);
+
+DROP TABLE t1;
+
+--enable_parsing
+
+CREATE TABLE t1 (b BINARY,
+ b20 BINARY(20),
+ v16 VARBINARY(16),
+ v128 VARBINARY(128),
+ pk VARBINARY(10) PRIMARY KEY,
+ INDEX (v16(10))
+) ENGINE=rocksdb;
+
+SHOW INDEX IN t1;
+
+INSERT INTO t1 (b,b20,v16,v128,pk) VALUES ('a','char1','varchar1a','varchar1b',1),('a','char2','varchar2a','varchar2b',2),('b','char3','varchar1a','varchar1b',3),('c','char4','varchar3a','varchar3b',4),('d','char5','varchar4a','varchar3b',5),('e','char6','varchar2a','varchar3b',6);
+INSERT INTO t1 (b,b20,v16,v128,pk) SELECT b,b20,v16,v128,pk+100 FROM t1;
+
+--replace_column 9 #
+EXPLAIN SELECT HEX(SUBSTRING(v16,0,3)) FROM t1 WHERE v16 LIKE 'varchar%';
+--sorted_result
+SELECT HEX(SUBSTRING(v16,7,3)) FROM t1 WHERE v16 LIKE 'varchar%';
+
+--replace_column 9 #
+EXPLAIN SELECT HEX(SUBSTRING(v16,0,3)) FROM t1 FORCE INDEX (v16) WHERE v16 LIKE 'varchar%';
+--sorted_result
+SELECT HEX(SUBSTRING(v16,7,3)) FROM t1 FORCE INDEX (v16) WHERE v16 LIKE 'varchar%';
+
+DROP TABLE t1;
+
+SET GLOBAL ROCKSDB_PAUSE_BACKGROUND_WORK = @ORIG_PAUSE_BACKGROUND_WORK;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/type_bit.inc b/storage/rocksdb/mysql-test/rocksdb/t/type_bit.inc
new file mode 100644
index 00000000000..ba0c6537404
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/type_bit.inc
@@ -0,0 +1,53 @@
+#
+# BIT column type
+#
+
+--disable_warnings
+DROP TABLE IF EXISTS t1;
+--enable_warnings
+
+# Valid values
+
+eval CREATE TABLE t1 (
+ a BIT $extra_col_opts,
+ b BIT(20) $extra_col_opts,
+ c BIT(64) $extra_col_opts,
+ d BIT(1) $extra_col_opts,
+ PRIMARY KEY (c)
+) ENGINE=rocksdb;
+
+SHOW COLUMNS IN t1;
+
+ALTER TABLE t1 DROP COLUMN d;
+eval ALTER TABLE t1 ADD COLUMN d BIT(0) $extra_col_opts;
+SHOW COLUMNS IN t1;
+
+INSERT INTO t1 (a,b,c,d) VALUES (0,POW(2,20)-1,b'1111111111111111111111111111111111111111111111111111111111111111',1);
+SELECT BIN(a), HEX(b), c+0 FROM t1 WHERE d>0;
+
+INSERT INTO t1 (a,b,c,d) VALUES (1,0,-2,0);
+--sorted_result
+SELECT a+0, b+0, c+0 FROM t1 WHERE d<100;
+
+INSERT INTO t1 (a,b,c,d) VALUES (b'1', 'f', 0xFF, 0x0);
+--sorted_result
+SELECT a+0, b+0, c+0 FROM t1 WHERE d IN (0, 2);
+
+DELETE FROM t1;
+
+# Out of range values
+# (should produce warnings)
+
+INSERT INTO t1 (a,b,c,d) VALUES (0x10,0,0,1);
+SELECT a+0,b+0,c+0,d+0 FROM t1;
+
+INSERT INTO t1 (a,b,c,d) VALUES (0x01,0,0x10000000000000000,0);
+--sorted_result
+SELECT a+0,b+0,c+0,d+0 FROM t1;
+
+DROP TABLE t1;
+
+--error ER_TOO_BIG_DISPLAYWIDTH
+eval CREATE TABLE t1 (pk INT PRIMARY KEY, a BIT(65) $extra_col_opts) ENGINE=rocksdb;
+
+
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/type_bit.test b/storage/rocksdb/mysql-test/rocksdb/t/type_bit.test
new file mode 100644
index 00000000000..8d57cabffc8
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/type_bit.test
@@ -0,0 +1,8 @@
+--source include/have_rocksdb.inc
+
+#
+# BIT column type
+#
+
+--source type_bit.inc
+
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/type_bit_indexes-master.opt b/storage/rocksdb/mysql-test/rocksdb/t/type_bit_indexes-master.opt
new file mode 100644
index 00000000000..ba9364e1523
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/type_bit_indexes-master.opt
@@ -0,0 +1 @@
+--rocksdb_debug_optimizer_n_rows=1000
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/type_bit_indexes.test b/storage/rocksdb/mysql-test/rocksdb/t/type_bit_indexes.test
new file mode 100644
index 00000000000..e4f4bb81819
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/type_bit_indexes.test
@@ -0,0 +1,113 @@
+--source include/have_rocksdb.inc
+
+#
+# BIT columns with indexes
+#
+
+SET @ORIG_PAUSE_BACKGROUND_WORK = @@ROCKSDB_PAUSE_BACKGROUND_WORK;
+SET GLOBAL ROCKSDB_PAUSE_BACKGROUND_WORK = 1;
+
+--disable_warnings
+DROP TABLE IF EXISTS t1;
+--enable_warnings
+
+CREATE TABLE t1 (
+ a BIT,
+ b BIT(20) PRIMARY KEY,
+ c BIT(32),
+ d BIT(64)
+) ENGINE=rocksdb;
+
+SHOW INDEX IN t1;
+
+INSERT INTO t1 (a,b,c,d) VALUES
+(0,0xFFFFF,0,1),(0,256,0xAAA,0x12345),(1,16,0,0xFFFFFFF),(0,11,12,13),
+(1,100,101,102),(0,12,13,14),(1,13,14,15),(0,101,201,202),(1,1000,1001,1002),
+(1,0xFFFF,0xFFFFFFFF,0xFFFFFFFFFFFFFFFF);
+
+--replace_column 9 #
+EXPLAIN SELECT b+0 FROM t1 ORDER BY b;
+SELECT b+0 FROM t1 ORDER BY b;
+
+DROP TABLE t1;
+
+--echo # TODO: Unique indexes are not enforced
+--disable_parsing
+--error ER_GET_ERRMSG
+CREATE TABLE t1 (
+ a BIT,
+ b BIT(20),
+ c BIT(32),
+ d BIT(64),
+ pk BIT(10) PRIMARY KEY,
+UNIQUE INDEX b_c (b,c)
+) ENGINE=rocksdb;
+
+
+SHOW INDEX IN t1;
+
+INSERT INTO t1 (a,b,c,d,pk) VALUES
+(0,0xFFFFF,0,1,1),(0,256,0xAAA,0x12345,2),(1,16,0,0xFFFFFFF,3),(0,11,12,13,4),
+(1,100,101,102,5),(0,12,13,14,6),(1,13,14,15,7),(0,101,201,202,8),(1,1000,1001,1002,9),
+(1,0xFFFF,0xFFFFFFFF,0xFFFFFFFFFFFFFFFF,10);
+
+--replace_column 9 #
+EXPLAIN SELECT HEX(b+c) FROM t1 WHERE c > 1 OR HEX(b) < 0xFFFFFF;
+--sorted_result
+SELECT HEX(b+c) FROM t1 WHERE c > 1 OR HEX(b) < 0xFFFFFF;
+
+DROP TABLE t1;
+
+--enable_parsing
+
+CREATE TABLE t1 (
+ a BIT,
+ b BIT(20),
+ c BIT(32),
+ d BIT(64),
+ pk BIT(10) PRIMARY KEY,
+ INDEX(a)
+) ENGINE=rocksdb;
+
+SHOW INDEX IN t1;
+
+INSERT INTO t1 (a,b,c,d,pk) VALUES
+(0,0xFFFFF,0,1,1),(0,256,0xAAA,0x12345,2),(1,16,0,0xFFFFFFF,3),(0,11,12,13,4),
+(1,100,101,102,5),(0,12,13,14,6),(1,13,14,15,7),(0,101,201,202,8),(1,1000,1001,1002,9),
+(1,0xFFFF,0xFFFFFFFF,0xFFFFFFFFFFFFFFFF,10);
+
+--replace_column 9 #
+EXPLAIN SELECT DISTINCT a+0 FROM t1 ORDER BY a;
+SELECT DISTINCT a+0 FROM t1 ORDER BY a;
+
+DROP TABLE t1;
+
+--disable_parsing
+--error ER_GET_ERRMSG
+CREATE TABLE t1 (
+ a BIT,
+ b BIT(20),
+ c BIT(32),
+ d BIT(64),
+ pk BIT(10) PRIMARY KEY,
+ UNIQUE INDEX (d)
+) ENGINE=rocksdb;
+
+
+SHOW INDEX IN t1;
+
+INSERT INTO t1 (a,b,c,d,pk) VALUES
+(0,0xFFFFF,0,1,1),(0,256,0xAAA,0x12345,2),(1,16,0,0xFFFFFFF,3),(0,11,12,13,4),
+(1,100,101,102,5),(0,12,13,14,6),(1,13,14,15,7),(0,101,201,202,8),(1,1000,1001,1002,9),
+(1,0xFFFF,0xFFFFFFFF,0xFFFFFFFFFFFFFFFF,10);
+
+--replace_column 9 #
+EXPLAIN SELECT d FROM t1 WHERE d BETWEEN 1 AND 10000;
+--sorted_result
+SELECT d+0 FROM t1 WHERE d BETWEEN 1 AND 10000;
+
+DROP TABLE t1;
+
+--enable_parsing
+
+SET GLOBAL ROCKSDB_PAUSE_BACKGROUND_WORK = @ORIG_PAUSE_BACKGROUND_WORK;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/type_blob.inc b/storage/rocksdb/mysql-test/rocksdb/t/type_blob.inc
new file mode 100644
index 00000000000..723b3ee528c
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/type_blob.inc
@@ -0,0 +1,49 @@
+#
+# BLOB column types
+#
+
+--disable_warnings
+DROP TABLE IF EXISTS t1;
+--enable_warnings
+
+eval CREATE TABLE t1 (
+ pk INT AUTO_INCREMENT PRIMARY KEY,
+ b BLOB $extra_col_opts,
+ b0 BLOB(0) $extra_col_opts,
+ b1 BLOB(1) $extra_col_opts,
+ b300 BLOB(300) $extra_col_opts,
+ bm BLOB(65535) $extra_col_opts,
+ b70k BLOB(70000) $extra_col_opts,
+ b17m BLOB(17000000) $extra_col_opts,
+ t TINYBLOB $extra_col_opts,
+ m MEDIUMBLOB $extra_col_opts,
+ l LONGBLOB $extra_col_opts
+) ENGINE=rocksdb;
+
+SHOW COLUMNS IN t1;
+
+# Valid values
+# (cannot get MAX for all columns due to max_allowed_packet limitations)
+
+INSERT INTO t1 (b,b0,b1,b300,bm,b70k,b17m,t,m,l) VALUES
+('','','','','','','','','',''),
+('a','b','c','d','e','f','g','h','i','j'),
+('test1','test2','test3','test4','test5','test6','test7','test8','test9','test10'),
+( REPEAT('a',65535), REPEAT('b',65535), REPEAT('c',255), REPEAT('d',65535), REPEAT('e',65535), REPEAT('f',1048576), HEX(REPEAT('g',1048576)), REPEAT('h',255), REPEAT('i',1048576), HEX(REPEAT('j',1048576)) );
+
+--sorted_result
+SELECT LENGTH(b), LENGTH(b0), LENGTH(b1), LENGTH(b300), LENGTH(bm), LENGTH(b70k), LENGTH(b17m), LENGTH(t), LENGTH(m), LENGTH(l) FROM t1;
+
+# Invalid values (produce warnings, except for mediumblob and longblob columns for which the values are within limits)
+
+INSERT INTO t1 (b,b0,b1,b300,bm,b70k,b17m,t,m,l) VALUES
+( REPEAT('a',65536), REPEAT('b',65536), REPEAT('c',256), REPEAT('d',65536), REPEAT('e',65536), REPEAT('f',1048576), REPEAT('g',1048576), REPEAT('h',256), REPEAT('i',1048576), REPEAT('j',1048576) );
+
+--sorted_result
+SELECT LENGTH(b), LENGTH(b0), LENGTH(b1), LENGTH(b300), LENGTH(bm), LENGTH(b70k), LENGTH(b17m), LENGTH(t), LENGTH(m), LENGTH(l) FROM t1;
+
+--error ER_TOO_BIG_DISPLAYWIDTH
+ALTER TABLE t1 ADD COLUMN bbb BLOB(4294967296);
+
+DROP TABLE t1;
+
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/type_blob.test b/storage/rocksdb/mysql-test/rocksdb/t/type_blob.test
new file mode 100644
index 00000000000..54859707091
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/type_blob.test
@@ -0,0 +1,8 @@
+--source include/have_rocksdb.inc
+
+#
+# BLOB column types
+#
+
+--source type_blob.inc
+
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/type_blob_indexes-master.opt b/storage/rocksdb/mysql-test/rocksdb/t/type_blob_indexes-master.opt
new file mode 100644
index 00000000000..6ad42e58aa2
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/type_blob_indexes-master.opt
@@ -0,0 +1 @@
+--rocksdb_debug_optimizer_n_rows=1000 --rocksdb_records_in_range=50
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/type_blob_indexes.test b/storage/rocksdb/mysql-test/rocksdb/t/type_blob_indexes.test
new file mode 100644
index 00000000000..24c70e8e733
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/type_blob_indexes.test
@@ -0,0 +1,176 @@
+--source include/have_rocksdb.inc
+--source include/have_debug.inc
+
+#
+# BLOB columns with indexes
+#
+
+SET @ORIG_PAUSE_BACKGROUND_WORK = @@ROCKSDB_PAUSE_BACKGROUND_WORK;
+SET GLOBAL ROCKSDB_PAUSE_BACKGROUND_WORK = 1;
+
+--disable_warnings
+DROP TABLE IF EXISTS t1;
+--enable_warnings
+
+CREATE TABLE t1 (
+ b BLOB,
+ t TINYBLOB,
+ m MEDIUMBLOB,
+ l LONGBLOB,
+ PRIMARY KEY b (b(32))
+) ENGINE=rocksdb;
+
+SHOW INDEX IN t1;
+
+INSERT INTO t1 (b,t,m,l) VALUES
+('','','',''),
+('a','b','c','d'),
+('b','d','c','b'),
+('test1','test2','test3','test4'),
+(REPEAT('a',128),REPEAT('b',128),REPEAT('c',128),REPEAT('d',128)),
+(HEX('abcd'),HEX('def'),HEX('a'),HEX('abc')),
+('abc','def','ghi','jkl'),
+('test2','test3','test4','test5'),
+('test3','test4','test5','test6'),
+(REPEAT('b',128),REPEAT('f',128),REPEAT('e',128),REPEAT('d',128)),
+(REPEAT('c',128),REPEAT('b',128),REPEAT('c',128),REPEAT('e',128));
+
+--replace_column 1 # 2 # 3 # 4 # 5 # 7 # 8 # 9 # 10 #
+EXPLAIN SELECT SUBSTRING(b,16) AS f FROM t1 WHERE b IN ('test1','test2') ORDER BY f;
+SELECT SUBSTRING(b,16) AS f FROM t1 WHERE b IN ('test1','test2') ORDER BY f;
+
+--replace_column 1 # 2 # 3 # 4 # 5 # 7 # 8 # 9 # 10 #
+EXPLAIN SELECT SUBSTRING(b,16) AS f FROM t1 USE INDEX () WHERE b IN ('test1','test2') ORDER BY f;
+SELECT SUBSTRING(b,16) AS f FROM t1 USE INDEX () WHERE b IN ('test1','test2') ORDER BY f;
+
+DROP TABLE t1;
+
+
+CREATE TABLE t1 (
+ b BLOB,
+ t TINYBLOB,
+ m MEDIUMBLOB,
+ l LONGBLOB,
+ pk INT AUTO_INCREMENT PRIMARY KEY,
+ UNIQUE INDEX l_t (l(256),t(64))
+) ENGINE=rocksdb;
+
+--replace_column 6 # 7 # 10 # 11 #
+SHOW INDEX IN t1;
+
+INSERT INTO t1 (b,t,m,l) VALUES
+('','','',''),
+('a','b','c','d'),
+('b','d','c','b'),
+('test1','test2','test3','test4'),
+(REPEAT('a',128),REPEAT('b',128),REPEAT('c',128),REPEAT('d',128)),
+(HEX('abcd'),HEX('def'),HEX('a'),HEX('abc')),
+('abc','def','ghi','jkl'),
+('test2','test3','test4','test5'),
+('test3','test4','test5','test6'),
+(REPEAT('b',128),REPEAT('f',128),REPEAT('e',128),REPEAT('d',128)),
+(REPEAT('c',128),REPEAT('b',128),REPEAT('c',128),REPEAT('e',128));
+
+# Here we are getting possible key l_t, but not the final key
+--replace_column 9 #
+EXPLAIN SELECT SUBSTRING(t,64), SUBSTRING(l,256) FROM t1 WHERE t!=l AND l NOT IN ('test1') ORDER BY t, l DESC;
+SELECT SUBSTRING(t,64), SUBSTRING(l,256) FROM t1 WHERE t!=l AND l NOT IN ('test1') ORDER BY t, l DESC;
+
+--replace_column 9 #
+EXPLAIN SELECT SUBSTRING(t,64), SUBSTRING(l,256) FROM t1 FORCE INDEX (l_t) WHERE t!=l AND l NOT IN ('test1') ORDER BY t, l DESC;
+SELECT SUBSTRING(t,64), SUBSTRING(l,256) FROM t1 FORCE INDEX (l_t) WHERE t!=l AND l NOT IN ('test1') ORDER BY t, l DESC;
+
+DROP TABLE t1;
+
+
+CREATE TABLE t1 (
+ b BLOB,
+ t TINYBLOB,
+ m MEDIUMBLOB,
+ l LONGBLOB,
+ pk INT AUTO_INCREMENT PRIMARY KEY,
+ INDEX (m(128))
+) ENGINE=rocksdb;
+
+SHOW INDEX IN t1;
+
+INSERT INTO t1 (b,t,m,l) VALUES
+('','','',''),
+('a','b','c','d'),
+('b','d','c','b'),
+('test1','test2','test3','test4'),
+(REPEAT('a',128),REPEAT('b',128),REPEAT('c',128),REPEAT('d',128)),
+(HEX('abcd'),HEX('def'),HEX('a'),HEX('abc')),
+('abc','def','ghi','jkl'),
+('test2','test3','test4','test5'),
+('test3','test4','test5','test6'),
+(REPEAT('b',128),REPEAT('f',128),REPEAT('e',128),REPEAT('d',128)),
+(REPEAT('c',128),REPEAT('b',128),REPEAT('c',128),REPEAT('e',128));
+
+--replace_column 9 #
+EXPLAIN SELECT SUBSTRING(m,128) AS f FROM t1 WHERE m = 'test1' ORDER BY f DESC;
+SELECT SUBSTRING(m,128) AS f FROM t1 WHERE m = 'test1' ORDER BY f DESC;
+
+--replace_column 9 #
+EXPLAIN SELECT SUBSTRING(m,128) AS f FROM t1 IGNORE INDEX FOR ORDER BY (m) WHERE m = 'test1' ORDER BY f DESC;
+SELECT SUBSTRING(m,128) AS f FROM t1 IGNORE INDEX FOR ORDER BY (m) WHERE m = 'test1' ORDER BY f DESC;
+
+DROP TABLE t1;
+
+CREATE TABLE t1 (
+ b BLOB,
+ PRIMARY KEY b (b(32))
+) ENGINE=rocksdb;
+
+INSERT INTO t1 (b) VALUES
+('00000000000000000000000000000000'),
+('00000000000000000000000000000001'),
+('00000000000000000000000000000002');
+
+SELECT b FROM t1;
+
+DROP TABLE t1;
+
+CREATE TABLE t1 (
+ b TINYBLOB,
+ PRIMARY KEY b (b(32))
+) ENGINE=rocksdb;
+
+INSERT INTO t1 (b) VALUES
+('00000000000000000000000000000000'),
+('00000000000000000000000000000001'),
+('00000000000000000000000000000002');
+
+SELECT b FROM t1;
+
+DROP TABLE t1;
+
+CREATE TABLE t1 (
+ b MEDIUMBLOB,
+ PRIMARY KEY b (b(32))
+) ENGINE=rocksdb;
+
+INSERT INTO t1 (b) VALUES
+('00000000000000000000000000000000'),
+('00000000000000000000000000000001'),
+('00000000000000000000000000000002');
+
+SELECT b FROM t1;
+
+DROP TABLE t1;
+
+CREATE TABLE t1 (
+ b LONGBLOB,
+ PRIMARY KEY b (b(32))
+) ENGINE=rocksdb;
+
+INSERT INTO t1 (b) VALUES
+('00000000000000000000000000000000'),
+('00000000000000000000000000000001'),
+('00000000000000000000000000000002');
+
+SELECT b FROM t1;
+
+DROP TABLE t1;
+
+SET GLOBAL ROCKSDB_PAUSE_BACKGROUND_WORK = @ORIG_PAUSE_BACKGROUND_WORK;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/type_bool.inc b/storage/rocksdb/mysql-test/rocksdb/t/type_bool.inc
new file mode 100644
index 00000000000..cddc0822c44
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/type_bool.inc
@@ -0,0 +1,64 @@
+#
+# BOOLEAN column type
+#
+
+--disable_warnings
+DROP TABLE IF EXISTS t1;
+--enable_warnings
+
+eval CREATE TABLE t1 (
+ pk INT AUTO_INCREMENT PRIMARY KEY,
+ b1 BOOL $extra_col_opts,
+ b2 BOOLEAN $extra_col_opts
+) ENGINE=rocksdb;
+
+SHOW COLUMNS IN t1;
+
+# Valid values
+
+INSERT INTO t1 (b1,b2) VALUES (1,TRUE);
+SELECT b1,b2 FROM t1;
+
+INSERT INTO t1 (b1,b2) VALUES (FALSE,0);
+--sorted_result
+SELECT b1,b2 FROM t1;
+
+INSERT INTO t1 (b1,b2) VALUES (2,3);
+--sorted_result
+SELECT b1,b2 FROM t1;
+
+INSERT INTO t1 (b1,b2) VALUES (-1,-2);
+--sorted_result
+SELECT b1,b2 FROM t1;
+
+--sorted_result
+SELECT IF(b1,'true','false') AS a, IF(b2,'true','false') AS b FROM t1;
+
+--sorted_result
+SELECT b1,b2 FROM t1 WHERE b1 = TRUE;
+
+--sorted_result
+SELECT b1,b2 FROM t1 WHERE b2 = FALSE;
+
+# Invalid values
+
+INSERT INTO t1 (b1,b2) VALUES ('a','b');
+--sorted_result
+SELECT b1,b2 FROM t1;
+
+INSERT INTO t1 (b1,b2) VALUES (128,-129);
+--sorted_result
+SELECT b1,b2 FROM t1;
+
+# This is why we don't have zerofill and unsigned tests
+# for boolean columns:
+--error ER_PARSE_ERROR
+eval ALTER TABLE t1 ADD COLUMN b3 BOOLEAN UNSIGNED $extra_col_opts;
+
+--error ER_PARSE_ERROR
+eval ALTER TABLE ADD COLUMN b3 BOOL ZEROFILL $extra_col_opts;
+
+DROP TABLE t1;
+
+
+
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/type_bool.test b/storage/rocksdb/mysql-test/rocksdb/t/type_bool.test
new file mode 100644
index 00000000000..d5a3b9be83d
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/type_bool.test
@@ -0,0 +1,8 @@
+--source include/have_rocksdb.inc
+
+#
+# BOOLEAN column type
+#
+
+--source type_bool.inc
+
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/type_char.inc b/storage/rocksdb/mysql-test/rocksdb/t/type_char.inc
new file mode 100644
index 00000000000..d770dc608fd
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/type_char.inc
@@ -0,0 +1,45 @@
+#
+# CHAR column types
+#
+
+--disable_warnings
+DROP TABLE IF EXISTS t1;
+--enable_warnings
+
+eval CREATE TABLE t1 (
+ c CHAR $extra_col_opts,
+ c0 CHAR(0) $extra_col_opts,
+ c1 CHAR(1) $extra_col_opts,
+ c20 CHAR(20) $extra_col_opts,
+ c255 CHAR(255) $extra_col_opts,
+ PRIMARY KEY (c255)
+) ENGINE=rocksdb;
+
+SHOW COLUMNS IN t1;
+
+# Valid values
+
+INSERT INTO t1 (c,c0,c1,c20,c255) VALUES ('','','','','');
+INSERT INTO t1 (c,c0,c1,c20,c255) VALUES ('a','','b','abcdefghi klmnopqrst', 'Creating an article for the Knowledgebase is similar to asking questions. First, navigate to the category where you feel the article should be. Once there, double check that an article doesn\'t already exist which would work.');
+
+--sorted_result
+SELECT c,c0,c1,c20,c255 FROM t1;
+
+# Invalid values
+
+INSERT INTO t1 (c,c0,c1,c20,c255) VALUES ('abc', 'a', 'abc', REPEAT('a',21), REPEAT('x',256));
+INSERT INTO t1 (c,c0,c1,c20,c255) SELECT c255, c255, c255, c255, CONCAT('a',c255,c1) FROM t1;
+
+--sorted_result
+SELECT c,c0,c1,c20,c255 FROM t1;
+
+--sorted_result
+SELECT DISTINCT c20, REPEAT('a',LENGTH(c20)), COUNT(*) FROM t1 GROUP BY c1, c20;
+
+--error ER_TOO_BIG_FIELDLENGTH
+eval ALTER TABLE t1 ADD COLUMN c257 CHAR(257) $extra_col_opts;
+
+DROP TABLE t1;
+
+
+
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/type_char.test b/storage/rocksdb/mysql-test/rocksdb/t/type_char.test
new file mode 100644
index 00000000000..347f83cad33
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/type_char.test
@@ -0,0 +1,19 @@
+--source include/have_rocksdb.inc
+
+#
+# CHAR column types
+#
+
+--source type_char.inc
+
+# Issue #226
+CREATE TABLE t1(c1 CHAR(0) NOT NULL);
+INSERT INTO t1 VALUES('a');
+SELECT * FROM t1;
+DROP TABLE t1;
+
+# Issue #259
+CREATE TABLE t1(a char(10) character set utf8 collate utf8_bin primary key);
+INSERT INTO t1 VALUES ('one'),('two'),('three'),('four'),('five');
+(SELECT * FROM t1 LIMIT 1) UNION (SELECT * FROM t1);
+DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/type_char_indexes-master.opt b/storage/rocksdb/mysql-test/rocksdb/t/type_char_indexes-master.opt
new file mode 100644
index 00000000000..ba9364e1523
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/type_char_indexes-master.opt
@@ -0,0 +1 @@
+--rocksdb_debug_optimizer_n_rows=1000
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/type_char_indexes.test b/storage/rocksdb/mysql-test/rocksdb/t/type_char_indexes.test
new file mode 100644
index 00000000000..6ee2f03e74d
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/type_char_indexes.test
@@ -0,0 +1,107 @@
+--source include/have_rocksdb.inc
+
+#
+# CHAR and VARCHAR columns with indexes
+#
+
+SET @ORIG_PAUSE_BACKGROUND_WORK = @@ROCKSDB_PAUSE_BACKGROUND_WORK;
+SET GLOBAL ROCKSDB_PAUSE_BACKGROUND_WORK = 1;
+
+--disable_warnings
+DROP TABLE IF EXISTS t1;
+--enable_warnings
+
+CREATE TABLE t1 (
+ c CHAR,
+ c20 CHAR(20) PRIMARY KEY,
+ v16 VARCHAR(16),
+ v128 VARCHAR(128)
+) ENGINE=rocksdb;
+
+SHOW INDEX IN t1;
+
+INSERT INTO t1 (c,c20,v16,v128) VALUES ('a','char1','varchar1a','varchar1b'),('a','char2','varchar2a','varchar2b'),('b','char3','varchar1a','varchar1b'),('c','char4','varchar3a','varchar3b');
+
+--replace_column 9 #
+EXPLAIN SELECT c20 FROM t1 ORDER BY c20;
+SELECT c20 FROM t1 ORDER BY c20;
+
+--replace_column 9 #
+EXPLAIN SELECT c20 FROM t1 FORCE INDEX FOR ORDER BY (PRIMARY) ORDER BY c20;
+SELECT c20 FROM t1 FORCE INDEX FOR ORDER BY (PRIMARY) ORDER BY c20;
+
+DROP TABLE t1;
+
+--disable_parsing
+--error ER_GET_ERRMSG
+CREATE TABLE t1 (
+ c CHAR,
+ c20 CHAR(20),
+ v16 VARCHAR(16),
+ v128 VARCHAR(128),
+ pk CHAR(64) PRIMARY KEY,
+ UNIQUE INDEX c_v (c,v128)
+) ENGINE=rocksdb;
+
+
+SHOW INDEX IN t1;
+
+INSERT INTO t1 (c,c20,v16,v128) VALUES ('a','char1','varchar1a','varchar1b'),('a','char2','varchar2a','varchar2b'),('b','char3','varchar1a','varchar1b'),('c','char4','varchar3a','varchar3b');
+
+--replace_column 9 #
+EXPLAIN SELECT c, v128 FROM t1 WHERE c != 'a' AND v128 > 'varchar';
+--sorted_result
+SELECT c, v128 FROM t1 WHERE c != 'a' AND v128 > 'varchar';
+
+--replace_column 9 #
+EXPLAIN SELECT v128, COUNT(*) FROM t1 GROUP BY v128;
+--sorted_result
+SELECT v128, COUNT(*) FROM t1 GROUP BY v128;
+
+--replace_column 9 #
+EXPLAIN SELECT v128, COUNT(*) FROM t1 USE INDEX FOR GROUP BY (c_v) GROUP BY v128;
+--sorted_result
+SELECT v128, COUNT(*) FROM t1 USE INDEX FOR GROUP BY (c_v) GROUP BY v128;
+
+SET SESSION optimizer_switch = 'engine_condition_pushdown=on';
+--replace_column 9 #
+EXPLAIN SELECT c,c20,v16,v128 FROM t1 WHERE c > 'a';
+--sorted_result
+SELECT c,c20,v16,v128 FROM t1 WHERE c > 'a';
+SET SESSION optimizer_switch = @@global.optimizer_switch;
+
+DROP TABLE t1;
+
+--enable_parsing
+
+CREATE TABLE t1 (
+ c CHAR,
+ c20 CHAR(20),
+ v16 VARCHAR(16),
+ v128 VARCHAR(128),
+ pk VARCHAR(64) PRIMARY KEY,
+ INDEX (v16)
+) ENGINE=rocksdb;
+
+SHOW INDEX IN t1;
+
+INSERT INTO t1 (c,c20,v16,v128,pk) VALUES ('a','char1','varchar1a','varchar1b','1'),('a','char2','varchar2a','varchar2b','2'),('b','char3','varchar1a','varchar1b','3'),('c','char4','varchar3a','varchar3b','4');
+
+--replace_column 9 #
+EXPLAIN SELECT SUBSTRING(v16,0,3) FROM t1 WHERE v16 LIKE 'varchar%';
+--sorted_result
+SELECT SUBSTRING(v16,7,3) FROM t1 WHERE v16 LIKE 'varchar%';
+
+--replace_column 9 #
+EXPLAIN SELECT SUBSTRING(v16,0,3) FROM t1 IGNORE INDEX (v16) WHERE v16 LIKE 'varchar%';
+--sorted_result
+SELECT SUBSTRING(v16,7,3) FROM t1 IGNORE INDEX (v16) WHERE v16 LIKE 'varchar%';
+
+--replace_column 9 #
+EXPLAIN SELECT c,c20,v16,v128 FROM t1 WHERE v16 = 'varchar1a' OR v16 = 'varchar3a' ORDER BY v16;
+--sorted_result
+SELECT c,c20,v16,v128 FROM t1 WHERE v16 = 'varchar1a' OR v16 = 'varchar3a' ORDER BY v16;
+
+DROP TABLE t1;
+
+SET GLOBAL ROCKSDB_PAUSE_BACKGROUND_WORK = @ORIG_PAUSE_BACKGROUND_WORK;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/type_char_indexes_collation-master.opt b/storage/rocksdb/mysql-test/rocksdb/t/type_char_indexes_collation-master.opt
new file mode 100644
index 00000000000..ba9364e1523
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/type_char_indexes_collation-master.opt
@@ -0,0 +1 @@
+--rocksdb_debug_optimizer_n_rows=1000
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/type_char_indexes_collation.test b/storage/rocksdb/mysql-test/rocksdb/t/type_char_indexes_collation.test
new file mode 100644
index 00000000000..d231236bd92
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/type_char_indexes_collation.test
@@ -0,0 +1,126 @@
+--source include/have_rocksdb.inc
+--source include/have_debug.inc
+
+# Test if unknown collation works.
+set session debug_dbug= "+d,myrocks_enable_unknown_collation_index_only_scans";
+create table t (id int not null auto_increment primary key,
+ c varchar(8) CHARACTER SET utf8 COLLATE utf8_general_ci,
+ key sk (c));
+insert into t (c) values ('☀'), ('ß');
+--replace_column 9 #
+explain select c from t;
+select c from t;
+drop table t;
+set session debug_dbug= "-d,myrocks_enable_unknown_collation_index_only_scans";
+
+# Testing if all characters in latin1 charset get restored correctly. This is
+# done by comparing results from a PK scan.
+create table t (id int not null auto_increment,
+ c1 varchar(1) CHARACTER SET latin1 COLLATE latin1_swedish_ci,
+ c2 char(1) CHARACTER SET latin1 COLLATE latin1_general_ci,
+ primary key (id),
+ key sk1 (c1),
+ key sk2 (c2));
+
+let $i = 0;
+
+--disable_query_log
+while ($i < 256)
+{
+ --eval insert into t (c1, c2) values (CHAR('$i'), CHAR('$i'))
+ inc $i;
+}
+--enable_query_log
+
+--replace_column 9 #
+explain select hex(c1) from t order by c1;
+--replace_column 9 #
+explain select hex(c1) from t IGNORE INDEX (sk1) order by c1;
+
+--replace_column 9 #
+explain select hex(c2) from t order by c2;
+--replace_column 9 #
+explain select hex(c2) from t IGNORE INDEX (sk1) order by c2;
+
+--let $file1=$MYSQLTEST_VARDIR/tmp/filesort_order
+--let $file2=$MYSQLTEST_VARDIR/tmp/sk_order
+--disable_query_log
+--eval select hex(weight_string(c1)) INTO OUTFILE '$file1' from t order by c1
+--eval select hex(weight_string(c1)) INTO OUTFILE '$file2' from t IGNORE INDEX (sk1) order by c1
+--enable_query_log
+
+--diff_files $file1 $file2
+--remove_file $file1
+--remove_file $file2
+
+--disable_query_log
+--eval select hex(weight_string(c2)) INTO OUTFILE '$file1' from t order by c2
+--eval select hex(weight_string(c2)) INTO OUTFILE '$file2' from t IGNORE INDEX (sk1) order by c2
+--enable_query_log
+
+--diff_files $file1 $file2
+--remove_file $file1
+--remove_file $file2
+
+truncate t;
+
+# Test handling of spaces at the end of fields.
+insert into t (c1, c2) values ('Asdf ', 'Asdf ');
+select char_length(c1), char_length(c2), c1, c2 from t;
+
+drop table t;
+
+create table t (id int not null auto_increment,
+ c2 char(255) CHARACTER SET latin1 COLLATE latin1_general_ci,
+ primary key (id),
+ unique key sk2 (c2));
+
+insert into t (c2) values ('Asdf');
+--error ER_DUP_ENTRY
+insert into t (c2) values ('asdf ');
+
+drop table t;
+
+create table t (id int not null auto_increment,
+ c1 varchar(256) CHARACTER SET latin1 COLLATE latin1_swedish_ci,
+ primary key (id),
+ unique key sk1 (c1));
+
+insert into t (c1) values ('Asdf');
+--error ER_DUP_ENTRY
+insert into t (c1) values ('asdf ');
+--error ER_DUP_ENTRY
+insert into t (c1) values ('asdf');
+
+drop table t;
+
+create table t (id int not null auto_increment,
+ c1 varchar(256) CHARACTER SET latin1 COLLATE latin1_swedish_ci,
+ primary key (id),
+ unique key sk1 (c1(1)));
+
+insert into t (c1) values ('Asdf');
+insert into t (c1) values ('bbbb ');
+--error ER_DUP_ENTRY
+insert into t (c1) values ('a ');
+
+--replace_column 9 #
+explain select c1 from t;
+select c1 from t;
+
+drop table t;
+
+# Test varchar keyparts with key prefix
+set session rocksdb_verify_row_debug_checksums = on;
+create table t (id int primary key, email varchar(100), KEY email_i (email(30))) engine=rocksdb default charset=latin1;
+insert into t values (1, ' a');
+--replace_column 9 #
+explain select 'email_i' as index_name, count(*) AS count from t force index(email_i);
+select 'email_i' as index_name, count(*) AS count from t force index(email_i);
+drop table t;
+
+# Test varchar with length greater than 255
+create table t (id int primary key, email varchar(767), KEY email_i (email)) engine=rocksdb default charset=latin1;
+insert into t values (1, REPEAT('a', 700));
+select 'email_i' as index_name, count(*) AS count from t force index(email_i);
+drop table t;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/type_date_time.inc b/storage/rocksdb/mysql-test/rocksdb/t/type_date_time.inc
new file mode 100644
index 00000000000..18ed7436b62
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/type_date_time.inc
@@ -0,0 +1,47 @@
+#
+# Date and time column types
+# (DATE, DATETIME, TIMESTAMP, TIME, YEAR)
+#
+
+--disable_warnings
+DROP TABLE IF EXISTS t1;
+--enable_warnings
+set @save_time_zone=@@time_zone;
+set time_zone='UTC';
+eval CREATE TABLE t1 (
+ d DATE $extra_col_opts,
+ dt DATETIME $extra_col_opts,
+ ts TIMESTAMP $extra_col_opts,
+ t TIME $extra_col_opts,
+ y YEAR $extra_col_opts,
+ y4 YEAR(4) $extra_col_opts,
+ y2 YEAR(2) $extra_col_opts,
+ pk DATETIME PRIMARY KEY
+) ENGINE=rocksdb;
+
+SHOW COLUMNS IN t1;
+
+SET @tm = '2012-04-09 05:27:00';
+
+# Valid values
+# '1970-01-01 00:00:01'
+INSERT INTO t1 (d,dt,ts,t,y,y4,y2,pk) VALUES
+('1000-01-01', '1000-01-01 00:00:00', FROM_UNIXTIME(1), '-838:59:59', '1901', '1901', '00','2012-12-12 12:12:12'),
+('9999-12-31', '9999-12-31 23:59:59', FROM_UNIXTIME(2147483647), '838:59:59', '2155', '2155', '99','2012-12-12 12:12:13'),
+('0000-00-00', '0000-00-00 00:00:00', '0000-00-00 00:00:00', '00:00:00', '0', '0', '0','2012-12-12 12:12:14'),
+(DATE(@tm),@tm,TIMESTAMP(@tm),TIME(@tm),YEAR(@tm),YEAR(@tm),YEAR(@tm),'2012-12-12 12:12:15');
+
+--sorted_result
+SELECT d,dt,ts,t,y,y4,y2 FROM t1;
+
+# Invalid values
+
+INSERT INTO t1 (d,dt,ts,t,y,y4,y2,pk) VALUES
+('999-13-32', '999-11-31 00:00:00', '0', '-839:00:00', '1900', '1900', '-1','2012-12-12 12:12:16');
+
+SELECT d,dt,ts,t,y,y4,y2 FROM t1;
+set time_zone=@save_time_zone;
+
+DROP TABLE t1;
+
+
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/type_date_time.test b/storage/rocksdb/mysql-test/rocksdb/t/type_date_time.test
new file mode 100644
index 00000000000..af4e006c900
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/type_date_time.test
@@ -0,0 +1,9 @@
+--source include/have_rocksdb.inc
+
+#
+# Date and time column types
+# (DATE, DATETIME, TIMESTAMP, TIME, YEAR)
+#
+
+--source type_date_time.inc
+
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/type_date_time_indexes-master.opt b/storage/rocksdb/mysql-test/rocksdb/t/type_date_time_indexes-master.opt
new file mode 100644
index 00000000000..ba9364e1523
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/type_date_time_indexes-master.opt
@@ -0,0 +1 @@
+--rocksdb_debug_optimizer_n_rows=1000
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/type_date_time_indexes.test b/storage/rocksdb/mysql-test/rocksdb/t/type_date_time_indexes.test
new file mode 100644
index 00000000000..06cf86b7661
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/type_date_time_indexes.test
@@ -0,0 +1,157 @@
+--source include/have_rocksdb.inc
+
+#
+# Date and time columns with indexes
+# (DATE, DATETIME, TIMESTAMP, TIME, YEAR)
+#
+
+SET @ORIG_PAUSE_BACKGROUND_WORK = @@ROCKSDB_PAUSE_BACKGROUND_WORK;
+SET GLOBAL ROCKSDB_PAUSE_BACKGROUND_WORK = 1;
+
+--disable_warnings
+DROP TABLE IF EXISTS t1;
+--enable_warnings
+
+
+CREATE TABLE t1 (
+ d DATE,
+ dt DATETIME PRIMARY KEY,
+ ts TIMESTAMP,
+ t TIME,
+ y YEAR
+) ENGINE=rocksdb;
+
+SHOW INDEX IN t1;
+SET @tm = '2012-04-09 05:27:00';
+
+INSERT INTO t1 (d,dt,ts,t,y) VALUES
+('2012-01-12', '2010-11-22 12:33:54', '2011-11-14 21:45:55', '00:12:33', '2000'),
+('2012-01-12', '2010-11-22 11:43:14', '2011-11-14 21:45:55', '00:12:32', '2001'),
+('2012-03-31', '2011-08-28 21:33:56', '1999-04-30 19:11:08', '12:00:00', '1999'),
+('2012-03-13', '2011-08-27 21:33:56', '1999-03-30 19:11:08', '12:10:00', '1998'),
+('2011-03-31', '2011-08-28 20:33:56', '1997-01-31 11:54:01', '22:04:10', '1994'),
+(DATE(@tm),@tm,TIMESTAMP(@tm),TIME(@tm),YEAR(@tm));
+
+--replace_column 9 #
+EXPLAIN SELECT dt FROM t1 ORDER BY dt LIMIT 3;
+SELECT dt FROM t1 ORDER BY dt LIMIT 3;
+
+--replace_column 9 #
+EXPLAIN SELECT dt FROM t1 FORCE INDEX FOR ORDER BY (PRIMARY) ORDER BY dt LIMIT 3;
+SELECT dt FROM t1 FORCE INDEX FOR ORDER BY (PRIMARY) ORDER BY dt LIMIT 3;
+
+--error ER_DUP_ENTRY
+INSERT INTO t1 (d,dt,ts,t,y) VALUES
+('2012-01-11', '2010-11-22 12:33:54', '2011-11-14 21:45:55', '00:12:33', '2000');
+
+DROP TABLE t1;
+
+CREATE TABLE t1 (
+ d DATE,
+ dt DATETIME,
+ ts TIMESTAMP,
+ t TIME,
+ y YEAR,
+ pk TIME PRIMARY KEY,
+ INDEX (ts)
+) ENGINE=rocksdb;
+
+SHOW INDEX IN t1;
+SET @tm = '2012-04-09 05:27:00';
+
+INSERT INTO t1 (d,dt,ts,t,y,pk) VALUES
+('2012-01-12', '2010-11-22 12:33:54', '2011-11-14 21:45:55', '00:12:33', '2000','12:00:00'),
+('2012-01-12', '2010-11-22 11:43:14', '2011-11-14 21:45:55', '00:12:32', '2001','12:01:00'),
+('2012-03-31', '2011-08-28 21:33:56', '1999-04-30 19:11:08', '12:00:00', '1999','12:02:00'),
+('2012-03-13', '2011-08-27 21:33:56', '1999-03-30 19:11:08', '12:10:00', '1998','12:03:00'),
+('2011-03-31', '2011-08-28 20:33:56', '1997-01-31 11:54:01', '22:04:10', '1994','12:04:00'),
+(DATE(@tm),@tm,TIMESTAMP(@tm),TIME(@tm),YEAR(@tm),'12:05:00');
+
+--replace_column 9 #
+EXPLAIN SELECT ts FROM t1 WHERE ts > NOW();
+--sorted_result
+SELECT ts FROM t1 WHERE ts > NOW();
+
+--replace_column 9 #
+EXPLAIN SELECT ts FROM t1 USE INDEX () WHERE ts > NOW();
+--sorted_result
+SELECT ts FROM t1 USE INDEX () WHERE ts > NOW();
+
+DROP TABLE t1;
+
+--disable_parsing
+--error ER_GET_ERRMSG
+CREATE TABLE t1 (
+ d DATE,
+ dt DATETIME,
+ ts TIMESTAMP,
+ t TIME,
+ y YEAR,
+ pk YEAR PRIMARY KEY,
+ UNIQUE INDEX d_t (d,t)
+) ENGINE=rocksdb;
+
+
+SHOW INDEX IN t1;
+SET @tm = '2012-04-09 05:27:00';
+
+INSERT INTO t1 (d,dt,ts,t,y,pk) VALUES
+('2012-01-12', '2010-11-22 12:33:54', '2011-11-14 21:45:55', '00:12:33', '2000','1990'),
+('2012-01-12', '2010-11-22 11:43:14', '2011-11-14 21:45:55', '00:12:32', '2001','1991'),
+('2012-03-31', '2011-08-28 21:33:56', '1999-04-30 19:11:08', '12:00:00', '1999','1992'),
+('2012-03-13', '2011-08-27 21:33:56', '1999-03-30 19:11:08', '12:10:00', '1998','1993'),
+('2011-03-31', '2011-08-28 20:33:56', '1997-01-31 11:54:01', '22:04:10', '1994','1994'),
+(DATE(@tm),@tm,TIMESTAMP(@tm),TIME(@tm),YEAR(@tm),'1995');
+
+--replace_column 9 #
+EXPLAIN SELECT d, t FROM t1 WHERE CONCAT(d,' ',t) != CURRENT_DATE();
+--sorted_result
+SELECT d, t FROM t1 WHERE CONCAT(d,' ',t) != CURRENT_DATE();
+
+--replace_column 9 #
+EXPLAIN SELECT d, t FROM t1 IGNORE INDEX (d_t) WHERE CONCAT(d,' ',t) != CURRENT_DATE();
+--sorted_result
+SELECT d, t FROM t1 IGNORE INDEX (d_t) WHERE CONCAT(d,' ',t) != CURRENT_DATE();
+
+--error ER_DUP_ENTRY
+INSERT INTO t1 (d,dt,ts,t,y) VALUES
+('2012-01-12', '2010-11-22 12:33:53', '2011-11-14 21:45:55', '00:12:33', '2000');
+
+DROP TABLE t1;
+
+--enable_parsing
+
+CREATE TABLE t1 (
+ d DATE,
+ dt DATETIME,
+ ts TIMESTAMP,
+ t TIME,
+ y YEAR,
+ pk TIME PRIMARY KEY,
+ INDEX (y,t)
+) ENGINE=rocksdb;
+
+SHOW INDEX IN t1;
+SET @tm = '2012-04-09 05:27:00';
+
+INSERT INTO t1 (d,dt,ts,t,y,pk) VALUES
+('2012-01-12', '2010-11-22 12:33:54', '2011-11-14 21:45:55', '00:12:33', '2000','18:18:18'),
+('2012-01-12', '2010-11-22 11:43:14', '2011-11-14 21:45:55', '00:12:32', '2001','19:18:18'),
+('2012-03-31', '2011-08-28 21:33:56', '1999-04-30 19:11:08', '12:00:00', '1999','20:18:18'),
+('2012-03-13', '2011-08-27 21:33:56', '1999-03-30 19:11:08', '12:10:00', '1998','21:18:18'),
+('2011-03-31', '2011-08-28 20:33:56', '1997-01-31 11:54:01', '22:04:10', '1994','22:18:18'),
+(DATE(@tm),@tm,TIMESTAMP(@tm),TIME(@tm),YEAR(@tm),'23:18:18');
+
+--replace_column 9 #
+EXPLAIN SELECT y, COUNT(*) FROM t1 GROUP BY y;
+--sorted_result
+SELECT y, COUNT(*) FROM t1 GROUP BY y;
+
+--replace_column 9 #
+EXPLAIN SELECT y, COUNT(*) FROM t1 USE INDEX FOR GROUP BY () GROUP BY y;
+--sorted_result
+SELECT y, COUNT(*) FROM t1 USE INDEX FOR GROUP BY () GROUP BY y;
+
+DROP TABLE t1;
+
+SET GLOBAL ROCKSDB_PAUSE_BACKGROUND_WORK = @ORIG_PAUSE_BACKGROUND_WORK;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/type_decimal-master.opt b/storage/rocksdb/mysql-test/rocksdb/t/type_decimal-master.opt
new file mode 100644
index 00000000000..33e72265db2
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/type_decimal-master.opt
@@ -0,0 +1 @@
+--rocksdb_debug_optimizer_n_rows=10
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/type_decimal.test b/storage/rocksdb/mysql-test/rocksdb/t/type_decimal.test
new file mode 100644
index 00000000000..ee325b34eff
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/type_decimal.test
@@ -0,0 +1,88 @@
+--source include/have_rocksdb.inc
+--source include/have_debug.inc
+
+--disable_warnings
+drop table if exists t1, t2;
+--enable_warnings
+
+--echo #
+--echo # Check that DECIMAL datatype supports 'index-only' scans and is decoded correctly.
+--echo # (Decoding happens from the mem-comparable image in the index, regardless
+--echo # of whether the value part has original value or not)
+--echo #
+
+create table t1 (
+ pk int not null primary key,
+ col1 decimal (2,1) signed,
+ col2 decimal (2,1) unsigned,
+ filler varchar(100),
+ key key1(col1, col2)
+)engine=rocksdb;
+insert into t1 values
+(1,-9.1, 0.7, 'filler'),
+(2,-8.2, 1.6, 'filler'),
+(3, 0.3, 2.5, 'filler'),
+(4, 1.4, 3.4, 'filler'),
+(5, 2.5, 4.3, 'filler'),
+(6, 3.3, 5.3, 'filler');
+insert into t1 select pk+100, 9.0, 9.0, 'extra-data' from t1;
+insert into t1 select pk+200, 9.0, 9.0, 'extra-data' from t1;
+insert into t1 select pk+1000, 9.0, 9.0, 'extra-data' from t1;
+insert into t1 select pk+10000, 9.0, 9.0, 'extra-data' from t1;
+insert into t1 select pk+100000, 9.0, 9.0, 'extra-data' from t1;
+analyze table t1;
+
+--echo # The following can't use index-only:
+--replace_column 9 #
+explain select * from t1 where col1 between -8 and 8;
+
+--echo # This will use index-only:
+--replace_column 9 #
+explain
+select col1, col2 from t1 where col1 between -8 and 8;
+select col1, col2 from t1 where col1 between -8 and 8;
+
+insert into t1 values (11, NULL, 0.9, 'row1-with-null');
+insert into t1 values (10, -8.4, NULL, 'row2-with-null');
+--replace_column 9 #
+explain
+select col1, col2 from t1 force index(key1) where col1 is null or col1 < -7;
+select col1, col2 from t1 force index(key1) where col1 is null or col1 < -7;
+
+--echo # Try an UPDATE
+select * from t1 where pk in (3,4);
+update t1 set col2= col2+0.2 where pk in (3,4);
+select * from t1 where pk in (3,4);
+
+drop table t1;
+
+--echo #
+--echo # Try another DECIMAL-based type that takes more space
+--echo #
+create table t1 (
+ pk int not null primary key,
+ col1 decimal (12,6) signed,
+ col2 decimal (12,6) unsigned,
+ filler varchar(100),
+ key key1(col1, col2)
+)engine=rocksdb;
+insert into t1 values
+(1,-900.001, 000.007, 'filler'),
+(2,-700.002, 100.006, 'filler'),
+(3, 000.003, 200.005, 'filler'),
+(4, 100.004, 300.004, 'filler'),
+(5, 200.005, 400.003, 'filler'),
+(6, 300.003, 500.003, 'filler');
+insert into t1 select pk+100, col1+20000, col2+20000, 'extra-data' from t1;
+insert into t1 select pk+200, col1+20000, col2+20000, 'extra-data' from t1;
+insert into t1 select pk+1000, col1+20000, col2+20000, 'extra-data' from t1;
+insert into t1 select pk+10000, col1+20000, col2+20000, 'extra-data' from t1;
+insert into t1 select pk+100000, col1+20000, col2+20000, 'extra-data' from t1;
+analyze table t1;
+
+--replace_column 9 #
+explain
+select col1, col2 from t1 force index(key1) where col1 between -800 and 800;
+select col1, col2 from t1 force index(key1) where col1 between -800 and 800;
+drop table t1;
+
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/type_enum.inc b/storage/rocksdb/mysql-test/rocksdb/t/type_enum.inc
new file mode 100644
index 00000000000..8184f6261cc
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/type_enum.inc
@@ -0,0 +1,50 @@
+#
+# ENUM column type
+#
+
+--disable_warnings
+DROP TABLE IF EXISTS t1;
+--enable_warnings
+
+# Valid values.
+# We cannot test the maximum of 65,536 here,
+# because mysqltest has the standard limit of MAX_QUERY=256K;
+# but we can at least try 257
+
+eval CREATE TABLE t1 (
+ a ENUM('') $extra_col_opts,
+ b ENUM('test1','test2','test3','test4','test5') $extra_col_opts,
+ c ENUM('1','2','3','4','5','6','7','8','9','a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','x','y','z',' ','11','12','13','14','15','16','17','18','19','1a','1b','1c','1d','1e','1f','1g','1h','1i','1j','1k','1l','1m','1n','1o','1p','1q','1r','1s','1t','1u','1v','1w','1x','1y','1z','20','21','22','23','24','25','26','27','28','29','2a','2b','2c','2d','2e','2f','2g','2h','2i','2j','2k','2l','2m','2n','2o','2p','2q','2r','2s','2t','2u','2v','2w','2x','2y','2z','30','31','32','33','34','35','36','37','38','39','3a','3b','3c','3d','3e','3f','3g','3h','3i','3j','3k','3l','3m','3n','3o','3p','3q','3r','3s','3t','3u','3v','3w','3x','3y','3z','40','41','42','43','44','45','46','47','48','49','4a','4b','4c','4d','4e','4f','4g','4h','4i','4j','4k','4l','4m','4n','4o','4p','4q','4r','4s','4t','4u','4v','4w','4x','4y','4z','50','51','52','53','54','55','56','57','58','59','5a','5b','5c','5d','5e','5f','5g','5h','5i','5j','5k','5l','5m','5n','5o','5p','5q','5r','5s','5t','5u','5v','5w','5x','5y','5z','60','61','62','63','64','65','66','67','68','69','6a','6b','6c','6d','6e','6f','6g','6h','6i','6j','6k','6l','6m','6n','6o','6p','6q','6r','6s','6t','6u','6v','6w','6x','6y','6z','70','71','72','73','74','75') $extra_col_opts,
+ PRIMARY KEY (b)
+) ENGINE=rocksdb;
+
+SHOW COLUMNS IN t1;
+
+INSERT INTO t1 (a,b,c) VALUES ('','test2','4'),('',5,2);
+SELECT a,b,c FROM t1;
+
+# Out of range values
+# (should produce warnings)
+
+INSERT INTO t1 (a,b,c) VALUES (0,'test6',-1);
+--sorted_result
+SELECT a,b,c FROM t1;
+
+# Non-unique values in enum
+# (should produce a warning)
+eval ALTER TABLE t1 ADD COLUMN e ENUM('a','A') $extra_col_opts;
+SHOW COLUMNS IN t1;
+
+INSERT INTO t1 (a,b,c,e) VALUES ('','test3','75','A');
+--sorted_result
+SELECT a,b,c,e FROM t1;
+
+# Simple comparison
+
+--sorted_result
+SELECT a,b,c,e FROM t1 WHERE b='test2' OR a != '';
+
+DROP TABLE t1;
+
+
+
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/type_enum.test b/storage/rocksdb/mysql-test/rocksdb/t/type_enum.test
new file mode 100644
index 00000000000..d79469b2fad
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/type_enum.test
@@ -0,0 +1,8 @@
+--source include/have_rocksdb.inc
+
+#
+# ENUM column type
+#
+
+--source type_enum.inc
+
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/type_enum_indexes-master.opt b/storage/rocksdb/mysql-test/rocksdb/t/type_enum_indexes-master.opt
new file mode 100644
index 00000000000..ba9364e1523
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/type_enum_indexes-master.opt
@@ -0,0 +1 @@
+--rocksdb_debug_optimizer_n_rows=1000
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/type_enum_indexes.test b/storage/rocksdb/mysql-test/rocksdb/t/type_enum_indexes.test
new file mode 100644
index 00000000000..d7086a45fe1
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/type_enum_indexes.test
@@ -0,0 +1,93 @@
+--source include/have_rocksdb.inc
+
+#
+# ENUM columns with indexes
+#
+
+SET @ORIG_PAUSE_BACKGROUND_WORK = @@ROCKSDB_PAUSE_BACKGROUND_WORK;
+SET GLOBAL ROCKSDB_PAUSE_BACKGROUND_WORK = 1;
+
+--disable_warnings
+DROP TABLE IF EXISTS t1;
+--enable_warnings
+
+--disable_parsing
+--error ER_GET_ERRMSG
+CREATE TABLE t1 (
+ a ENUM('N.America','S.America','Africa','Europe','Australia','Asia','Antarctica'),
+ b ENUM('test1','test2','test3','test4','test5'),
+ c ENUM('1a','1b','1d','1j','4a','4z','5a','5b','6v','6z'),
+ pk ENUM('1','2','3','4','5','6','7','8','9') PRIMARY KEY,
+ UNIQUE KEY a_b (a,b)
+) ENGINE=rocksdb;
+
+
+INSERT INTO t1 (a,b,c,pk) VALUES
+('N.America','test1','5a',1),('Europe','test1','5b',2),('Europe','test2','6v',3),
+('Africa','test3','4z',4),('Africa','test4','1j',5),('Antarctica','test4','1d',6);
+
+SHOW INDEX IN t1;
+
+--replace_column 9 #
+EXPLAIN SELECT a FROM t1 WHERE b > 'test2' ORDER BY a;
+SELECT a FROM t1 WHERE b > 'test2' ORDER BY a;
+
+--replace_column 9 #
+EXPLAIN SELECT a FROM t1 FORCE INDEX (a_b) WHERE b > 'test2' ORDER BY a;
+SELECT a FROM t1 FORCE INDEX (a_b) WHERE b > 'test2' ORDER BY a;
+
+DROP TABLE t1;
+
+--enable_parsing
+
+CREATE TABLE t1 (
+ a ENUM('N.America','S.America','Africa','Europe','Australia','Asia','Antarctica'),
+ b ENUM('test1','test2','test3','test4','test5'),
+ c ENUM('1a','1b','1d','1j','4a','4z','5a','5b','6v','6z') PRIMARY KEY
+) ENGINE=rocksdb;
+
+INSERT INTO t1 (a,b,c) VALUES
+('N.America','test1','5a'),('Europe','test1','5b'),('Europe','test2','6v'),
+('Africa','test3','4z'),('Africa','test4','1j'),('Antarctica','test4','1d');
+
+SHOW INDEX IN t1;
+
+--replace_column 9 #
+EXPLAIN SELECT c FROM t1 WHERE c BETWEEN '1d' AND '6u';
+--sorted_result
+SELECT c FROM t1 WHERE c BETWEEN '1d' AND '6u';
+
+--replace_column 9 #
+EXPLAIN SELECT c FROM t1 USE INDEX () WHERE c BETWEEN '1d' AND '6u';
+--sorted_result
+SELECT c FROM t1 USE INDEX () WHERE c BETWEEN '1d' AND '6u';
+
+DROP TABLE t1;
+
+CREATE TABLE t1 (
+ a ENUM('N.America','S.America','Africa','Europe','Australia','Asia','Antarctica'),
+ b ENUM('test1','test2','test3','test4','test5'),
+ c ENUM('1a','1b','1d','1j','4a','4z','5a','5b','6v','6z'),
+ pk ENUM('1','2','3','4','5','6','7','8','9') PRIMARY KEY,
+ INDEX(b)
+) ENGINE=rocksdb;
+
+INSERT INTO t1 (a,b,c,pk) VALUES
+('N.America','test1','5a',1),('Europe','test1','5b',2),('Europe','test2','6v',3),
+('Africa','test3','4z',4),('Africa','test4','1j',5),('Antarctica','test4','1d',6);
+
+SHOW INDEX IN t1;
+
+--replace_column 9 #
+EXPLAIN SELECT DISTINCT b FROM t1;
+--sorted_result
+SELECT DISTINCT b FROM t1;
+
+--replace_column 9 #
+EXPLAIN SELECT DISTINCT b FROM t1 IGNORE INDEX (b);
+--sorted_result
+SELECT DISTINCT b FROM t1 IGNORE INDEX (b);
+
+DROP TABLE t1;
+
+SET GLOBAL ROCKSDB_PAUSE_BACKGROUND_WORK = @ORIG_PAUSE_BACKGROUND_WORK;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/type_fixed.inc b/storage/rocksdb/mysql-test/rocksdb/t/type_fixed.inc
new file mode 100644
index 00000000000..424f7c4f4ac
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/type_fixed.inc
@@ -0,0 +1,85 @@
+#
+# Fixed point types
+#
+
+--disable_warnings
+DROP TABLE IF EXISTS t1;
+--enable_warnings
+
+eval CREATE TABLE t1 (
+ d DECIMAL $extra_col_opts,
+ d0 DECIMAL(0) $extra_col_opts,
+ d1_1 DECIMAL(1,1) $extra_col_opts,
+ d10_2 DECIMAL(10,2) $extra_col_opts,
+ d60_10 DECIMAL(60,10) $extra_col_opts,
+ n NUMERIC $extra_col_opts,
+ n0_0 NUMERIC(0,0) $extra_col_opts,
+ n1 NUMERIC(1) $extra_col_opts,
+ n20_4 NUMERIC(20,4) $extra_col_opts,
+ n65_4 NUMERIC(65,4) $extra_col_opts,
+ pk NUMERIC $extra_col_opts PRIMARY KEY
+) ENGINE=rocksdb;
+
+SHOW COLUMNS IN t1;
+
+# Always valid values
+
+INSERT INTO t1 (d,d0,d1_1,d10_2,d60_10,n,n0_0,n1,n20_4,n65_4,pk) VALUES (100,123456,0.3,40000.25,123456789123456789.10001,1024,7000.0,8.0,999999.9,9223372036854775807,1);
+INSERT INTO t1 (d,d0,d1_1,d10_2,d60_10,n,n0_0,n1,n20_4,n65_4,pk) VALUES (0,0,0,0,0,0,0,0,0,0,2);
+INSERT INTO t1 (d,d0,d1_1,d10_2,d60_10,n,n0_0,n1,n20_4,n65_4,pk) VALUES (9999999999.0,9999999999.0,0.9,99999999.99,99999999999999999999999999999999999999999999999999.9999999999,9999999999.0,9999999999.0,9.0,9999999999999999.9999,9999999999999999999999999999999999999999999999999999999999999.9999,3);
+
+--sorted_result
+SELECT d,d0,d1_1,d10_2,d60_10,n,n0_0,n1,n20_4,n65_4 FROM t1;
+
+# Values which can be valid or not,
+# depending on whether columns are SIGNED or UNSIGNED
+# (if not valid should produce warnings)
+
+INSERT INTO t1 (d,d0,d1_1,d10_2,d60_10,n,n0_0,n1,n20_4,n65_4,pk) VALUES (-100,-123456,-0.3,-40000.25,-123456789123456789.10001,-1024,-7000.0,-8.0,-999999.9,-9223372036854775807,4);
+INSERT INTO t1 (d,d0,d1_1,d10_2,d60_10,n,n0_0,n1,n20_4,n65_4,pk) VALUES (-9999999999.0,-9999999999.0,-0.9,-99999999.99,-99999999999999999999999999999999999999999999999999.9999999999,-9999999999.0,-9999999999.0,-9.0,-9999999999999999.9999,-9999999999999999999999999999999999999999999999999999999999999.9999,5);
+
+--sorted_result
+SELECT d,d0,d1_1,d10_2,d60_10,n,n0_0,n1,n20_4,n65_4 FROM t1;
+
+--sorted_result
+SELECT d,d0,d1_1,d10_2,d60_10,n,n0_0,n1,n20_4,n65_4 FROM t1 WHERE n20_4 = 9999999999999999.9999 OR d < 100;
+
+# Invalid values
+
+INSERT INTO t1 (d,d0,d1_1,d10_2,d60_10,n,n0_0,n1,n20_4,n65_4,pk) VALUES (
+ 9999999999999999999999999999999999999999999999999999999999999.9999,
+ 9999999999999999999999999999999999999999999999999999999999999.9999,
+ 9999999999999999999999999999999999999999999999999999999999999.9999,
+ 9999999999999999999999999999999999999999999999999999999999999.9999,
+ 9999999999999999999999999999999999999999999999999999999999999.9999,
+ 9999999999999999999999999999999999999999999999999999999999999.9999,
+ 9999999999999999999999999999999999999999999999999999999999999.9999,
+ 9999999999999999999999999999999999999999999999999999999999999.9999,
+ 9999999999999999999999999999999999999999999999999999999999999.9999,
+ 9999999999999999999999999999999999999999999999999999999999999.9999,
+ 6
+);
+
+--sorted_result
+SELECT d,d0,d1_1,d10_2,d60_10,n,n0_0,n1,n20_4,n65_4 FROM t1;
+
+INSERT INTO t1 (d,d0,d1_1,d10_2,d60_10,n,n0_0,n1,n20_4,n65_4,pk) VALUES (10000000000.0,10000000000.0,1.1,100000000.99,100000000000000000000000000000000000000000000000000.0,10000000000.0,10000000000.0,10.0,10000000000000000.9999,10000000000000000000000000000000000000000000000000000000000000.9999,7);
+--sorted_result
+SELECT d,d0,d1_1,d10_2,d60_10,n,n0_0,n1,n20_4,n65_4 FROM t1;
+
+INSERT INTO t1 (d,d0,d1_1,d10_2,d60_10,n,n0_0,n1,n20_4,n65_4,pk) VALUES (9999999999.1,9999999999.1,1.9,99999999.001,99999999999999999999999999999999999999999999999999.99999999991,9999999999.1,9999999999.1,9.1,9999999999999999.00001,9999999999999999999999999999999999999999999999999999999999999.11111,8);
+--sorted_result
+SELECT d,d0,d1_1,d10_2,d60_10,n,n0_0,n1,n20_4,n65_4 FROM t1;
+
+--error ER_TOO_BIG_PRECISION
+eval ALTER TABLE t1 ADD COLUMN n66 NUMERIC(66) $extra_col_opts;
+
+--error ER_TOO_BIG_PRECISION
+eval ALTER TABLE t1 ADD COLUMN n66_6 DECIMAL(66,6) $extra_col_opts;
+
+--error ER_TOO_BIG_SCALE
+eval ALTER TABLE t1 ADD COLUMN n66_66 DECIMAL(66,66) $extra_col_opts;
+
+DROP TABLE t1;
+
+
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/type_fixed.test b/storage/rocksdb/mysql-test/rocksdb/t/type_fixed.test
new file mode 100644
index 00000000000..4f2c09d17c1
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/type_fixed.test
@@ -0,0 +1,8 @@
+--source include/have_rocksdb.inc
+
+#
+# Fixed point types
+#
+
+--source type_fixed.inc
+
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/type_fixed_indexes-master.opt b/storage/rocksdb/mysql-test/rocksdb/t/type_fixed_indexes-master.opt
new file mode 100644
index 00000000000..ba9364e1523
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/type_fixed_indexes-master.opt
@@ -0,0 +1 @@
+--rocksdb_debug_optimizer_n_rows=1000
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/type_fixed_indexes.test b/storage/rocksdb/mysql-test/rocksdb/t/type_fixed_indexes.test
new file mode 100644
index 00000000000..e9e6df58d21
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/type_fixed_indexes.test
@@ -0,0 +1,107 @@
+--source include/have_rocksdb.inc
+
+#
+# Fixed point columns with indexes
+#
+
+SET @ORIG_PAUSE_BACKGROUND_WORK = @@ROCKSDB_PAUSE_BACKGROUND_WORK;
+SET GLOBAL ROCKSDB_PAUSE_BACKGROUND_WORK = 1;
+
+--disable_warnings
+DROP TABLE IF EXISTS t1;
+--enable_warnings
+
+
+CREATE TABLE t1 (
+ d1 DECIMAL(10,2) PRIMARY KEY,
+ d2 DECIMAL(60,10),
+ n1 NUMERIC,
+ n2 NUMERIC(65,4)
+) ENGINE=rocksdb;
+
+SHOW INDEX IN t1;
+
+INSERT INTO t1 (d1,d2,n1,n2) VALUES
+(10.22,60.12345,123456,14.3456),
+(10.0,60.12345,123456,14),
+(11.14,15,123456,13),
+(100,100,1,2),
+(0,0,0,0),
+(4540424564.23,3343303441.0,12,13),
+(15,17,23,100000);
+
+--replace_column 9 #
+EXPLAIN SELECT d1 FROM t1 ORDER BY d1 DESC;
+SELECT d1 FROM t1 ORDER BY d1 DESC;
+
+--replace_column 9 #
+EXPLAIN SELECT d1 FROM t1 IGNORE INDEX FOR ORDER BY (PRIMARY) ORDER BY d1 DESC;
+SELECT d1 FROM t1 IGNORE INDEX FOR ORDER BY (PRIMARY) ORDER BY d1 DESC;
+
+DROP TABLE t1;
+
+# --error ER_GET_ERRMSG
+CREATE TABLE t1 (
+ d1 DECIMAL(10,2),
+ d2 DECIMAL(60,10),
+ n1 NUMERIC,
+ n2 NUMERIC(65,4),
+ pk NUMERIC PRIMARY KEY,
+ UNIQUE INDEX n1_n2 (n1,n2)
+) ENGINE=rocksdb;
+
+# --disable_parsing
+
+SHOW INDEX IN t1;
+
+INSERT INTO t1 (d1,d2,n1,n2,pk) VALUES
+(10.22,60.12345,123456,14.3456,1),
+(10.0,60.12345,123456,14,2),
+(11.14,15,123456,13,3),
+(100,100,1,2,4),
+(0,0,0,0,5),
+(4540424564.23,3343303441.0,12,13,6),
+(15,17,23,100000,7);
+
+--replace_column 9 #
+EXPLAIN SELECT DISTINCT n1+n2 FROM t1;
+--sorted_result
+SELECT DISTINCT n1+n2 FROM t1;
+
+DROP TABLE t1;
+
+#--enable_parsing
+
+CREATE TABLE t1 (
+ d1 DECIMAL(10,2),
+ d2 DECIMAL(60,10),
+ n1 NUMERIC,
+ n2 NUMERIC(65,4),
+ pk DECIMAL(20,10) PRIMARY KEY,
+ INDEX (d2)
+) ENGINE=rocksdb;
+
+SHOW INDEX IN t1;
+
+INSERT INTO t1 (d1,d2,n1,n2,pk) VALUES
+(10.22,60.12345,123456,14.3456,1),
+(10.0,60.12345,123456,14,2),
+(11.14,15,123456,13,3),
+(100,100,1,2,4),
+(0,0,0,0,5),
+(4540424564.23,3343303441.0,12,13,6),
+(15,17,23,100000,7);
+
+--replace_column 9 #
+EXPLAIN SELECT d2, COUNT(*) FROM t1 GROUP BY d2;
+--sorted_result
+SELECT d2, COUNT(*) FROM t1 GROUP BY d2;
+
+--replace_column 9 #
+EXPLAIN SELECT d2, COUNT(*) FROM t1 IGNORE INDEX FOR GROUP BY (d2) GROUP BY d2;
+--sorted_result
+SELECT d2, COUNT(*) FROM t1 IGNORE INDEX FOR GROUP BY (d2) GROUP BY d2;
+
+DROP TABLE t1;
+
+SET GLOBAL ROCKSDB_PAUSE_BACKGROUND_WORK = @ORIG_PAUSE_BACKGROUND_WORK;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/type_float.inc b/storage/rocksdb/mysql-test/rocksdb/t/type_float.inc
new file mode 100644
index 00000000000..ff58b73a91b
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/type_float.inc
@@ -0,0 +1,121 @@
+#
+# Float types
+#
+
+--disable_warnings
+DROP TABLE IF EXISTS t1;
+--enable_warnings
+
+eval CREATE TABLE t1 (
+ f FLOAT $extra_col_opts,
+ f0 FLOAT(0) $extra_col_opts,
+ r1_1 REAL(1,1) $extra_col_opts,
+ f23_0 FLOAT(23) $extra_col_opts,
+ f20_3 FLOAT(20,3) $extra_col_opts,
+ d DOUBLE $extra_col_opts,
+ d1_0 DOUBLE(1,0) $extra_col_opts,
+ d10_10 DOUBLE PRECISION (10,10) $extra_col_opts,
+ d53 DOUBLE(53,0) $extra_col_opts,
+ d53_10 DOUBLE(53,10) $extra_col_opts,
+ pk DOUBLE $extra_col_opts PRIMARY KEY
+) ENGINE=rocksdb;
+
+SHOW COLUMNS IN t1;
+
+# Always valid values
+
+INSERT INTO t1 (f,f0,r1_1,f23_0,f20_3,d,d1_0,d10_10,d53,d53_10,pk) VALUES (12345.12345,12345.12345,0.9,123456789.123,56789.987,11111111.111,8.0,0.0123456789,1234566789123456789,99999999999999999.99999999,1);
+
+--sorted_result
+--query_vertical SELECT f,f0,r1_1,f23_0,f20_3,d,d1_0,d10_10,d53,d53_10 FROM t1
+
+INSERT INTO t1 (f,f0,r1_1,f23_0,f20_3,d,d1_0,d10_10,d53,d53_10,pk) VALUES (0,0,0,0,0,0,0,0,0,0,2);
+INSERT INTO t1 (f,f0,r1_1,f23_0,f20_3,d,d1_0,d10_10,d53,d53_10,pk) VALUES (
+ 99999999999999999999999999999999999999,
+ 99999999999999999999999999999999999999.9999999999999999,
+ 0.9,
+ 99999999999999999999999999999999999999.9,
+ 99999999999999999.999,
+ 999999999999999999999999999999999999999999999999999999999999999999999999999999999,
+ 9,
+ 0.9999999999,
+ 1999999999999999999999999999999999999999999999999999999,
+ 19999999999999999999999999999999999999999999.9999999999,
+ 3
+);
+
+--sorted_result
+--query_vertical SELECT f,f0,r1_1,f23_0,f20_3,d,d1_0,d10_10,d53,d53_10 FROM t1
+
+# Values which can be valid or not,
+# depending on whether columns are SIGNED or UNSIGNED
+# (if not valid should produce warnings)
+
+INSERT INTO t1 (f,f0,r1_1,f23_0,f20_3,d,d1_0,d10_10,d53,d53_10,pk) VALUES (-999999999999999999999999,-99999999999.999999999999,-0.9,-999.99999999999999999999,-99999999999999999.999,-999999999999999999999999999999999999999999999999999999999999-0.999,-9,-.9999999999,-999999999999999999999999999999.99999999999999999999999,-9999999999999999999999999999999999999999999.9999999999,4);
+
+--sorted_result
+--query_vertical SELECT f,f0,r1_1,f23_0,f20_3,d,d1_0,d10_10,d53,d53_10 FROM t1
+
+--sorted_result
+query_vertical
+SELECT
+ CONCAT('', MAX(f)),
+ CONCAT('', MAX(f0)),
+ CONCAT('', MAX(r1_1)),
+ CONCAT('', MAX(f23_0)),
+ CONCAT('', MAX(f20_3)),
+ CONCAT('', MAX(d)),
+ CONCAT('', MAX(d1_0)),
+ CONCAT('', MAX(d10_10)),
+ CONCAT('', MAX(d53)),
+ CONCAT('', MAX(d53_10)) FROM t1;
+
+# Invalid values
+
+INSERT INTO t1 (f,f0,r1_1,f23_0,f20_3,d,d1_0,d10_10,d53,d53_10,pk) VALUES (
+ 9999999999999999999999999999999999999999999999999999999999999.9999,
+ 9999999999999999999999999999999999999999999999999999999999999.9999,
+ 9999999999999999999999999999999999999999999999999999999999999.9999,
+ 9999999999999999999999999999999999999999999999999999999999999.9999,
+ 9999999999999999999999999999999999999999999999999999999999999.9999,
+ 9999999999999999999999999999999999999999999999999999999999999.9999,
+ 9999999999999999999999999999999999999999999999999999999999999.9999,
+ 9999999999999999999999999999999999999999999999999999999999999.9999,
+ 9999999999999999999999999999999999999999999999999999999999999.9999,
+ 9999999999999999999999999999999999999999999999999999999999999.9999,
+ 5
+);
+
+--sorted_result
+--query_vertical SELECT f,f0,r1_1,f23_0,f20_3,d,d1_0,d10_10,d53,d53_10 FROM t1
+
+INSERT INTO t1 (f,f0,r1_1,f23_0,f20_3,d,d1_0,d10_10,d53,d53_10,pk) VALUES (
+ 999999999999999999999999999999999999999,
+ 999999999999999999999999999999999999999.9999999999999999,
+ 1.9,
+ 999999999999999999999999999999999999999.9,
+ 999999999999999999.999,
+ 9999999999999999999999999999999999999999999999999999999999999999999999999999999999,
+ 99,
+ 1.9999999999,
+ 1999999999999999999999999999999999999999999999999999999,
+ 19999999999999999999999999999999999999999999.9999999999,
+ 6
+);
+
+--sorted_result
+--query_vertical SELECT f,f0,r1_1,f23_0,f20_3,d,d1_0,d10_10,d53,d53_10 FROM t1
+
+# Error occurs on MySQL but not on MariaDB:
+# --error ER_TOO_BIG_DISPLAYWIDTH
+eval ALTER TABLE t1 ADD COLUMN d0_0 DOUBLE(0,0) $extra_col_opts;
+
+--error ER_TOO_BIG_PRECISION
+eval ALTER TABLE t1 ADD COLUMN n66_6 DECIMAL(256,1) $extra_col_opts;
+
+# Error occurs on MySQL but not on MariaDB:
+# --error ER_TOO_BIG_SCALE
+eval ALTER TABLE t1 ADD COLUMN n66_66 DECIMAL(40,35) $extra_col_opts;
+
+DROP TABLE t1;
+
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/type_float.test b/storage/rocksdb/mysql-test/rocksdb/t/type_float.test
new file mode 100644
index 00000000000..1f1849992ca
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/type_float.test
@@ -0,0 +1,8 @@
+--source include/have_rocksdb.inc
+
+#
+# Float types
+#
+
+--source type_float.inc
+
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/type_float_indexes-master.opt b/storage/rocksdb/mysql-test/rocksdb/t/type_float_indexes-master.opt
new file mode 100644
index 00000000000..ba9364e1523
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/type_float_indexes-master.opt
@@ -0,0 +1 @@
+--rocksdb_debug_optimizer_n_rows=1000
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/type_float_indexes.test b/storage/rocksdb/mysql-test/rocksdb/t/type_float_indexes.test
new file mode 100644
index 00000000000..907bc614d49
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/type_float_indexes.test
@@ -0,0 +1,175 @@
+--source include/have_rocksdb.inc
+
+#
+# Float type columns with indexes
+#
+
+SET @ORIG_PAUSE_BACKGROUND_WORK = @@ROCKSDB_PAUSE_BACKGROUND_WORK;
+SET GLOBAL ROCKSDB_PAUSE_BACKGROUND_WORK = 1;
+
+--disable_warnings
+DROP TABLE IF EXISTS t1;
+--enable_warnings
+
+CREATE TABLE t1 (
+ f FLOAT PRIMARY KEY,
+ r REAL(20,3),
+ d DOUBLE,
+ dp DOUBLE PRECISION (64,10)
+) ENGINE=rocksdb;
+
+SHOW INDEX IN t1;
+
+INSERT INTO t1 (f,r,d,dp) VALUES
+(1.2345,1422.22,1.2345,1234567.89),
+(0,0,0,0),
+(-1,-1,-1,-1),
+(17.5843,4953453454.44,29229114.0,1111111.23),
+(4644,1422.22,466664.999,0.5);
+
+--replace_column 9 #
+EXPLAIN SELECT f FROM t1 ORDER BY f;
+SELECT f FROM t1 ORDER BY f;
+
+--replace_column 9 #
+EXPLAIN SELECT f FROM t1 IGNORE INDEX (PRIMARY) ORDER BY f;
+SELECT f FROM t1 IGNORE INDEX (PRIMARY) ORDER BY f;
+
+DROP TABLE t1;
+
+#--error ER_GET_ERRMSG
+CREATE TABLE t1 (
+ f FLOAT,
+ r REAL(20,3),
+ d DOUBLE,
+ dp DOUBLE PRECISION (64,10),
+ pk DOUBLE PRIMARY KEY,
+ UNIQUE KEY r_dp (r,dp)
+) ENGINE=rocksdb;
+
+#--disable_parsing
+
+SHOW INDEX IN t1;
+
+INSERT INTO t1 (f,r,d,dp,pk) VALUES
+(1.2345,1422.22,1.2345,1234567.89,1),
+(0,0,0,0,2),
+(-1,-1,-1,-1,3),
+(17.5843,4953453454.44,29229114.0,1111111.23,4),
+(4644,1422.22,466664.999,0.5,5);
+
+--replace_column 9 #
+EXPLAIN SELECT r, dp FROM t1 WHERE r > 0 or dp > 0;
+--sorted_result
+SELECT r, dp FROM t1 WHERE r > 0 or dp > 0;
+
+DROP TABLE t1;
+
+CREATE TABLE t1 (
+ f FLOAT,
+ r REAL(20,3),
+ d DOUBLE,
+ dp DOUBLE PRECISION (64,10),
+ pk FLOAT PRIMARY KEY,
+ UNIQUE KEY(d)
+) ENGINE=rocksdb;
+
+SHOW INDEX IN t1;
+
+INSERT INTO t1 (f,r,d,dp,pk) VALUES
+(1.2345,1422.22,1.2345,1234567.89,1),
+(0,0,0,0,2),
+(-1,-1,-1,-1,3),
+(17.5843,4953453454.44,29229114.0,1111111.23,4),
+(4644,1422.22,466664.999,0.5,5);
+
+--replace_column 9 #
+EXPLAIN SELECT DISTINCT d FROM t1 ORDER BY d;
+SELECT DISTINCT d FROM t1 ORDER BY d;
+
+DROP TABLE t1;
+
+#--enable_parsing
+
+CREATE TABLE t1 (
+ f FLOAT,
+ r REAL(20,3),
+ d DOUBLE,
+ dp DOUBLE PRECISION (64,10),
+ pk FLOAT PRIMARY KEY,
+ KEY(d)
+) ENGINE=rocksdb;
+
+SHOW INDEX IN t1;
+
+INSERT INTO t1 (f,r,d,dp,pk) VALUES
+(1.2345,1422.22,1.2345,1234567.89,1),
+(0,0,0,0,2),
+(-1,-1,-1,-1,3),
+(17.5843,4953453454.44,29229114.0,1111111.23,4),
+(4644,1422.22,466664.999,0.5,5);
+
+--replace_column 9 #
+EXPLAIN SELECT DISTINCT d FROM t1 ORDER BY d;
+SELECT DISTINCT d FROM t1 ORDER BY d;
+
+DROP TABLE t1;
+
+CREATE TABLE t1 (
+ f FLOAT,
+ r REAL(20,3),
+ d DOUBLE,
+ dp DOUBLE PRECISION (64,10),
+ pk FLOAT PRIMARY KEY,
+ UNIQUE KEY(f)
+) ENGINE=rocksdb;
+
+SHOW INDEX IN t1;
+
+INSERT INTO t1 (f,r,d,dp,pk) VALUES
+(1.2345,1422.22,1.2345,1234567.89,1),
+(0,0,0,0,2),
+(-1,-1,-1,-1,3),
+(17.5843,4953453454.44,29229114.0,1111111.23,4),
+(4644,1422.22,466664.999,0.5,5);
+
+# Should fail because of 'unique' constraint
+--error ER_DUP_ENTRY
+INSERT INTO t1 (f,r,d,dp,pk) VALUES
+(1.2345,0,0,0,6);
+
+--replace_column 9 #
+EXPLAIN SELECT DISTINCT f FROM t1 ORDER BY f;
+SELECT DISTINCT f FROM t1 ORDER BY f;
+
+DROP TABLE t1;
+
+CREATE TABLE t1 (
+ f FLOAT,
+ r REAL(20,3),
+ d DOUBLE,
+ dp DOUBLE PRECISION (64,10),
+ pk FLOAT PRIMARY KEY,
+ KEY(f)
+) ENGINE=rocksdb;
+
+SHOW INDEX IN t1;
+
+INSERT INTO t1 (f,r,d,dp,pk) VALUES
+(1.2345,1422.22,1.2345,1234567.89,1),
+(0,0,0,0,2),
+(-1,-1,-1,-1,3),
+(17.5843,4953453454.44,29229114.0,1111111.23,4),
+(4644,1422.22,466664.999,0.5,5);
+
+# Should succeed because of no 'unique' constraint
+INSERT INTO t1 (f,r,d,dp,pk) VALUES
+(1.2345,0,0,0,6);
+
+--replace_column 9 #
+EXPLAIN SELECT DISTINCT f FROM t1 ORDER BY f;
+SELECT DISTINCT f FROM t1 ORDER BY f;
+
+DROP TABLE t1;
+
+SET GLOBAL ROCKSDB_PAUSE_BACKGROUND_WORK = @ORIG_PAUSE_BACKGROUND_WORK;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/type_int.inc b/storage/rocksdb/mysql-test/rocksdb/t/type_int.inc
new file mode 100644
index 00000000000..dbcdfe4fbdd
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/type_int.inc
@@ -0,0 +1,68 @@
+#
+# INT column types
+#
+
+--disable_warnings
+DROP TABLE IF EXISTS t1;
+--enable_warnings
+
+eval CREATE TABLE t1 (
+ i INT $extra_col_opts,
+ i0 INT(0) $extra_col_opts,
+ i1 INT(1) $extra_col_opts,
+ i20 INT(20) $extra_col_opts,
+ t TINYINT $extra_col_opts,
+ t0 TINYINT(0) $extra_col_opts,
+ t1 TINYINT(1) $extra_col_opts,
+ t20 TINYINT(20) $extra_col_opts,
+ s SMALLINT $extra_col_opts,
+ s0 SMALLINT(0) $extra_col_opts,
+ s1 SMALLINT(1) $extra_col_opts,
+ s20 SMALLINT(20) $extra_col_opts,
+ m MEDIUMINT $extra_col_opts,
+ m0 MEDIUMINT(0) $extra_col_opts,
+ m1 MEDIUMINT(1) $extra_col_opts,
+ m20 MEDIUMINT(20) $extra_col_opts,
+ b BIGINT $extra_col_opts,
+ b0 BIGINT(0) $extra_col_opts,
+ b1 BIGINT(1) $extra_col_opts,
+ b20 BIGINT(20) $extra_col_opts,
+ pk INT AUTO_INCREMENT PRIMARY KEY
+) ENGINE=rocksdb;
+
+SHOW COLUMNS IN t1;
+
+# Always valid values
+
+INSERT INTO t1 (i,i0,i1,i20,t,t0,t1,t20,s,s0,s1,s20,m,m0,m1,m20,b,b0,b1,b20) VALUES (1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20);
+INSERT INTO t1 (i,i0,i1,i20,t,t0,t1,t20,s,s0,s1,s20,m,m0,m1,m20,b,b0,b1,b20) VALUES (0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0);
+INSERT INTO t1 (i,i0,i1,i20,t,t0,t1,t20,s,s0,s1,s20,m,m0,m1,m20,b,b0,b1,b20) VALUES (2147483647,2147483647,2147483647,2147483647,127,127,127,127,32767,32767,32767,32767,8388607,8388607,8388607,8388607,9223372036854775807,9223372036854775807,9223372036854775807,9223372036854775807);
+--sorted_result
+SELECT i,i0,i1,i20,t,t0,t1,t20,s,s0,s1,s20,m,m0,m1,m20,b,b0,b1,b20 FROM t1;
+
+# Values which can be valid or not,
+# depending on whether columns are SIGNED or UNSIGNED
+# (if not valid should produce warnings)
+
+INSERT INTO t1 (i,i0,i1,i20,t,t0,t1,t20,s,s0,s1,s20,m,m0,m1,m20,b,b0,b1,b20) VALUES (-2147483648,-2147483648,-2147483648,-2147483648,-128,-128,-128,-128,-32768,-32768,-32768,-32768,-8388608,-8388608,-8388608,-8388608,-9223372036854775808,-9223372036854775808,-9223372036854775808,-9223372036854775808);
+INSERT INTO t1 (i,i0,i1,i20,t,t0,t1,t20,s,s0,s1,s20,m,m0,m1,m20,b,b0,b1,b20) VALUES (4294967295,4294967295,4294967295,4294967295,255,255,255,255,65535,65535,65535,65535,16777215,16777215,16777215,16777215,18446744073709551615,18446744073709551615,18446744073709551615,18446744073709551615);
+
+--sorted_result
+SELECT i,i0,i1,i20,t,t0,t1,t20,s,s0,s1,s20,m,m0,m1,m20,b,b0,b1,b20 FROM t1;
+
+# Invalid values
+
+INSERT INTO t1 (i,i0,i1,i20,t,t0,t1,t20,s,s0,s1,s20,m,m0,m1,m20,b,b0,b1,b20) VALUES (-2147483649,-2147483649,-2147483649,-2147483649,-129,-129,-129,-129,-32769,-32769,-32769,-32769,-8388609,-8388609,-8388609,-8388609,-9223372036854775809,-9223372036854775809,-9223372036854775809,-9223372036854775809);
+
+INSERT INTO t1 (i,i0,i1,i20,t,t0,t1,t20,s,s0,s1,s20,m,m0,m1,m20,b,b0,b1,b20) VALUES (4294967296,4294967296,4294967296,4294967296,256,256,256,256,65536,65536,65536,65536,16777216,16777216,16777216,16777216,18446744073709551616,18446744073709551616,18446744073709551616,18446744073709551616);
+
+INSERT INTO t1 (i,i0,i1,i20,t,t0,t1,t20,s,s0,s1,s20,m,m0,m1,m20,b,b0,b1,b20) SELECT b,b,b,b,b,b,b,b,b,b,b,b,b,b,b,b,b,b,b,b FROM t1 WHERE b IN (-9223372036854775808,9223372036854775807,18446744073709551615);
+
+--sorted_result
+SELECT i,i0,i1,i20,t,t0,t1,t20,s,s0,s1,s20,m,m0,m1,m20,b,b0,b1,b20 FROM t1;
+
+--error ER_TOO_BIG_DISPLAYWIDTH
+eval ALTER TABLE t1 ADD COLUMN i257 INT(257) $extra_col_opts;
+
+DROP TABLE t1;
+
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/type_int.test b/storage/rocksdb/mysql-test/rocksdb/t/type_int.test
new file mode 100644
index 00000000000..e92f6692172
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/type_int.test
@@ -0,0 +1,8 @@
+--source include/have_rocksdb.inc
+
+#
+# INT column types
+#
+
+--source type_int.inc
+
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/type_int_indexes-master.opt b/storage/rocksdb/mysql-test/rocksdb/t/type_int_indexes-master.opt
new file mode 100644
index 00000000000..ba9364e1523
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/type_int_indexes-master.opt
@@ -0,0 +1 @@
+--rocksdb_debug_optimizer_n_rows=1000
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/type_int_indexes.test b/storage/rocksdb/mysql-test/rocksdb/t/type_int_indexes.test
new file mode 100644
index 00000000000..c95c3d88b8a
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/type_int_indexes.test
@@ -0,0 +1,75 @@
+--source include/have_rocksdb.inc
+
+#
+# INT column types with indexes
+#
+
+SET @ORIG_PAUSE_BACKGROUND_WORK = @@ROCKSDB_PAUSE_BACKGROUND_WORK;
+SET GLOBAL ROCKSDB_PAUSE_BACKGROUND_WORK = 1;
+
+--disable_warnings
+DROP TABLE IF EXISTS t1;
+--enable_warnings
+
+CREATE TABLE t1 (
+ i INT PRIMARY KEY,
+ t TINYINT,
+ s SMALLINT,
+ m MEDIUMINT,
+ b BIGINT
+) ENGINE=rocksdb;
+
+INSERT INTO t1 (i,t,s,m,b) VALUES (1,2,3,4,5),(1000,100,10000,1000000,1000000000000000000),(5,100,10000,1000000,100000000000000000),(2,3,4,5,6),(3,4,5,6,7),(101,102,103,104,105),(10001,103,10002,10003,10004),(10,11,12,13,14),(11,12,13,14,15),(12,13,14,15,16);
+
+--replace_column 9 #
+EXPLAIN SELECT i FROM t1 ORDER BY i;
+SELECT i FROM t1 ORDER BY i;
+
+DROP TABLE t1;
+
+CREATE TABLE t1 (
+ i INT,
+ t TINYINT,
+ s SMALLINT,
+ m MEDIUMINT,
+ b BIGINT,
+ pk SMALLINT AUTO_INCREMENT PRIMARY KEY,
+ INDEX s_m (s,m)
+) ENGINE=rocksdb;
+
+INSERT INTO t1 (i,t,s,m,b) VALUES (1,2,3,4,5),(1000,100,10000,1000000,1000000000000000000),(5,100,10000,1000000,100000000000000000),(2,3,4,5,6),(3,4,5,6,7),(101,102,103,104,105),(10001,103,10002,10003,10004),(10,11,12,13,14),(11,12,13,14,15),(12,13,14,15,16);
+
+--replace_column 9 #
+EXPLAIN SELECT s, m FROM t1 WHERE s != 10 AND m != 1;
+--sorted_result
+SELECT s, m FROM t1 WHERE s != 10 AND m != 1;
+
+DROP TABLE t1;
+
+--echo # RocksDB: unique indexes allowed
+#--error ER_GET_ERRMSG
+CREATE TABLE t1 (
+ i INT,
+ t TINYINT,
+ s SMALLINT,
+ m MEDIUMINT,
+ b BIGINT,
+ pk MEDIUMINT AUTO_INCREMENT PRIMARY KEY,
+ UNIQUE KEY b_t (b,t)
+) ENGINE=rocksdb;
+
+##--disable_parsing
+
+INSERT INTO t1 (i,t,s,m,b) VALUES (1,2,3,4,5),(1000,100,10000,1000000,1000000000000000000),(5,100,10000,1000000,100000000000000000),(2,3,4,5,6),(3,4,5,6,7),(101,102,103,104,105),(10001,103,10002,10003,10004),(10,11,12,13,14),(11,12,13,14,15),(12,13,14,15,16);
+
+# This query should use the index b_t, we just don't want to run EXPLAIN
+# (to avoid mismatches due to different subquery-related plans)
+SELECT b+t FROM t1 WHERE (b,t) IN ( SELECT b, t FROM t1 WHERE i>1 ) ORDER BY b+t;
+SELECT b+t FROM t1 FORCE INDEX (b_t) WHERE (b,t) IN ( SELECT b, t FROM t1 WHERE i>1 ) ORDER BY b+t;
+SELECT b+t FROM t1 IGNORE INDEX (b_t) WHERE (b,t) IN ( SELECT b, t FROM t1 WHERE i>1 ) ORDER BY b+t;
+
+DROP TABLE t1;
+
+##--enable_parsing
+
+SET GLOBAL ROCKSDB_PAUSE_BACKGROUND_WORK = @ORIG_PAUSE_BACKGROUND_WORK;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/type_set.inc b/storage/rocksdb/mysql-test/rocksdb/t/type_set.inc
new file mode 100644
index 00000000000..25ae58294f7
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/type_set.inc
@@ -0,0 +1,49 @@
+#
+# SET column type
+#
+
+--disable_warnings
+DROP TABLE IF EXISTS t1;
+--enable_warnings
+
+# Valid values.
+
+eval CREATE TABLE t1 (
+ a SET('') $extra_col_opts,
+ b SET('test1','test2','test3','test4','test5') $extra_col_opts,
+ c SET('01','02','03','04','05','06','07','08','09','10','11','12','13','14','15','16','17','18','19','20','21','22','23','24','25','26','27','28','29','30','31','32','33','34','35','36','37','38','39','40','41','42','43','44','45','46','47','48','49','50''51','52','53','54','55','56','57','58','59','60','61','62','63','64') $extra_col_opts,
+ PRIMARY KEY (c)
+) ENGINE=rocksdb;
+
+SHOW COLUMNS IN t1;
+
+INSERT INTO t1 (a,b,c) VALUES
+('','test2,test3','01,34,44,,23'),
+('',5,2),
+(',','test4,test2','');
+--sorted_result
+SELECT a,b,c FROM t1;
+
+# Out of range values
+# (should produce warnings)
+
+INSERT INTO t1 (a,b,c) VALUES (0,'test6',-1);
+--sorted_result
+SELECT a,b,c FROM t1;
+
+# Non-unique values in set
+# (should produce a warning)
+eval ALTER TABLE t1 ADD COLUMN e SET('a','A') $extra_col_opts;
+
+SHOW COLUMNS IN t1;
+
+--error ER_TOO_BIG_SET
+eval ALTER TABLE t1 ADD COLUMN f SET('1','2','3','4','5','6','7','8','9','a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','x','y','z',' ','11','12','13','14','15','16','17','18','19','1a','1b','1c','1d','1e','1f','1g','1h','1i','1j','1k','1l','1m','1n','1o','1p','1q','1r','1s','1t','1u','1v','1w','1x','1y','1z','20','21','22','23','24','25','26','27','28','29','2a','2b','2c','2d','2e','2f','2g','2h','2i','2j','2k','2l','2m','2n','2o','2p','2q','2r','2s','2t','2u','2v','2w','2x','2y','2z','30','31','32','33','34','35','36','37','38','39','3a','3b','3c','3d','3e','3f','3g','3h','3i') $extra_col_opts;
+
+# Simple comparison
+
+--sorted_result
+SELECT a,b,c,e FROM t1 WHERE FIND_IN_SET('test2',b)>0 OR a != '';
+
+DROP TABLE t1;
+
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/type_set.test b/storage/rocksdb/mysql-test/rocksdb/t/type_set.test
new file mode 100644
index 00000000000..bd306668e6d
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/type_set.test
@@ -0,0 +1,8 @@
+--source include/have_rocksdb.inc
+
+#
+# SET column type
+#
+
+--source type_set.inc
+
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/type_set_indexes-master.opt b/storage/rocksdb/mysql-test/rocksdb/t/type_set_indexes-master.opt
new file mode 100644
index 00000000000..ba9364e1523
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/type_set_indexes-master.opt
@@ -0,0 +1 @@
+--rocksdb_debug_optimizer_n_rows=1000
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/type_set_indexes.test b/storage/rocksdb/mysql-test/rocksdb/t/type_set_indexes.test
new file mode 100644
index 00000000000..983c70256b0
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/type_set_indexes.test
@@ -0,0 +1,100 @@
+--source include/have_rocksdb.inc
+
+#
+# SET columns with indexes
+#
+
+SET @ORIG_PAUSE_BACKGROUND_WORK = @@ROCKSDB_PAUSE_BACKGROUND_WORK;
+SET GLOBAL ROCKSDB_PAUSE_BACKGROUND_WORK = 1;
+
+--disable_warnings
+DROP TABLE IF EXISTS t1;
+--enable_warnings
+
+CREATE TABLE t1 (
+ a SET('N.America','S.America','Africa','Antarctica','Australia','Europe','Asia'),
+ b SET('test1','test2','test3','test4','test5'),
+ c SET('01','22','23','33','34','39','40','44','50','63','64') PRIMARY KEY
+) ENGINE=rocksdb;
+
+SHOW INDEX IN t1;
+
+INSERT INTO t1 (a,b,c) VALUES
+('','test2,test3','01,34,44,23'),
+('',5,2),
+('N.America,Asia','test4,test2',''),
+('Africa,Europe,Asia','test2,test3','01'),
+('Antarctica','test3','34,44'),
+('Asia','test5','50'),
+('Europe,S.America','test1,','39');
+
+--replace_column 9 #
+EXPLAIN SELECT c FROM t1 ORDER BY c;
+SELECT c FROM t1 ORDER BY c;
+
+--replace_column 9 #
+EXPLAIN SELECT c FROM t1 IGNORE INDEX FOR ORDER BY (PRIMARY) ORDER BY c;
+SELECT c FROM t1 IGNORE INDEX FOR ORDER BY (PRIMARY) ORDER BY c;
+
+--error ER_DUP_ENTRY
+INSERT INTO t1 (a,b,c) VALUES ('Antarctica','test3','02');
+
+--error ER_DUP_ENTRY
+INSERT INTO t1 (a,b,c) VALUES ('','test1','34,44');
+
+DROP TABLE t1;
+
+CREATE TABLE t1 (
+ a SET('N.America','S.America','Africa','Antarctica','Australia','Europe','Asia'),
+ b SET('test1','test2','test3','test4','test5'),
+ c SET('01','22','23','33','34','39','40','44','50','63','64'),
+ pk SET('1','2','3','4','5','6','7','8','9') PRIMARY KEY,
+ INDEX(a)
+) ENGINE=rocksdb;
+
+SHOW INDEX IN t1;
+
+INSERT INTO t1 (a,b,c,pk) VALUES
+('','test2,test3','01,34,44,23',1),
+('',5,2,2),
+('N.America,Asia','test4,test2','',3),
+('Africa,Europe,Asia','test2,test3','01',4),
+('Antarctica','test3','34,44',5),
+('Asia','test5','50',6),
+('Europe,S.America','test1,','39',7);
+
+--replace_column 9 #
+EXPLAIN SELECT a FROM t1 WHERE FIND_IN_SET('Europe',a) > 0;
+--sorted_result
+SELECT a FROM t1 WHERE FIND_IN_SET('Europe',a) > 0;
+
+DROP TABLE t1;
+
+CREATE TABLE t1 (
+ a SET('N.America','S.America','Africa','Antarctica','Australia','Europe','Asia'),
+ b SET('test1','test2','test3','test4','test5'),
+ c SET('01','22','23','33','34','39','40','44','50','63','64'),
+ pk SET('1','2','3','4','5','6','7','8','9') PRIMARY KEY,
+ UNIQUE INDEX b_a (b,a)
+) ENGINE=rocksdb;
+
+
+SHOW INDEX IN t1;
+
+INSERT INTO t1 (a,b,c,pk) VALUES
+('','test2,test3','01,34,44,23',1),
+('',5,2,2),
+('N.America,Asia','test4,test2','',3),
+('Africa,Europe,Asia','test2,test3','01',4),
+('Antarctica','test3','34,44',5),
+('Asia','test5','50',6),
+('Europe,S.America','test1,','39',7);
+
+--replace_column 9 #
+EXPLAIN SELECT DISTINCT a, b FROM t1 ORDER BY b DESC, a;
+--sorted_result
+SELECT DISTINCT a, b FROM t1 ORDER BY b DESC, a;
+
+DROP TABLE t1;
+
+SET GLOBAL ROCKSDB_PAUSE_BACKGROUND_WORK = @ORIG_PAUSE_BACKGROUND_WORK;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/type_text.inc b/storage/rocksdb/mysql-test/rocksdb/t/type_text.inc
new file mode 100644
index 00000000000..f5c9c3b8978
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/type_text.inc
@@ -0,0 +1,49 @@
+#
+# TEXT column types
+#
+
+--disable_warnings
+DROP TABLE IF EXISTS t1;
+--enable_warnings
+
+eval CREATE TABLE t1 (
+ pk INT AUTO_INCREMENT PRIMARY KEY,
+ t TEXT $extra_col_opts,
+ t0 TEXT(0) $extra_col_opts,
+ t1 TEXT(1) $extra_col_opts,
+ t300 TEXT(300) $extra_col_opts,
+ tm TEXT(65535) $extra_col_opts,
+ t70k TEXT(70000) $extra_col_opts,
+ t17m TEXT(17000000) $extra_col_opts,
+ tt TINYTEXT $extra_col_opts,
+ m MEDIUMTEXT $extra_col_opts,
+ l LONGTEXT $extra_col_opts
+) ENGINE=rocksdb;
+
+SHOW COLUMNS IN t1;
+
+# Valid values
+# (cannot get MAX for all columns due to max_allowed_packet limitations)
+
+INSERT INTO t1 (t,t0,t1,t300,tm,t70k,t17m,tt,m,l) VALUES
+('','','','','','','','','',''),
+('a','b','c','d','e','f','g','h','i','j'),
+('test1','test2','test3','test4','test5','test6','test7','test8','test9','test10'),
+( REPEAT('a',65535), REPEAT('b',65535), REPEAT('c',255), REPEAT('d',65535), REPEAT('e',65535), REPEAT('f',1048576), REPEAT('g',1048576), REPEAT('h',255), REPEAT('i',1048576), REPEAT('j',1048576) );
+
+--sorted_result
+SELECT LENGTH(t), LENGTH(t0), LENGTH(t1), LENGTH(t300), LENGTH(tm), LENGTH(t70k), LENGTH(t17m), LENGTH(tt), LENGTH(m), LENGTH(l) FROM t1;
+
+# Invalid values (produce warnings, except for mediumtext and longtext columns for which the values are within limits)
+
+INSERT INTO t1 (t,t0,t1,t300,tm,t70k,t17m,tt,m,l) VALUES
+( REPEAT('a',65536), REPEAT('b',65536), REPEAT('c',256), REPEAT('d',65536), REPEAT('e',65536), REPEAT('f',1048576), REPEAT('g',1048576), REPEAT('h',256), REPEAT('i',1048576), REPEAT('j',1048576) );
+
+--sorted_result
+SELECT LENGTH(t), LENGTH(t0), LENGTH(t1), LENGTH(t300), LENGTH(tm), LENGTH(t70k), LENGTH(t17m), LENGTH(tt), LENGTH(m), LENGTH(l) FROM t1;
+
+--error ER_TOO_BIG_DISPLAYWIDTH
+eval ALTER TABLE t1 ADD COLUMN ttt TEXT(4294967296) $extra_col_opts;
+
+DROP TABLE t1;
+
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/type_text.test b/storage/rocksdb/mysql-test/rocksdb/t/type_text.test
new file mode 100644
index 00000000000..c6dd24ff42e
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/type_text.test
@@ -0,0 +1,8 @@
+--source include/have_rocksdb.inc
+
+#
+# TEXT column types
+#
+
+--source type_text.inc
+
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/type_text_indexes-master.opt b/storage/rocksdb/mysql-test/rocksdb/t/type_text_indexes-master.opt
new file mode 100644
index 00000000000..6ad42e58aa2
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/type_text_indexes-master.opt
@@ -0,0 +1 @@
+--rocksdb_debug_optimizer_n_rows=1000 --rocksdb_records_in_range=50
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/type_text_indexes.test b/storage/rocksdb/mysql-test/rocksdb/t/type_text_indexes.test
new file mode 100644
index 00000000000..5fdc4bff39d
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/type_text_indexes.test
@@ -0,0 +1,171 @@
+--source include/have_rocksdb.inc
+--source include/have_debug.inc
+
+#
+# TEXT columns with indexes
+#
+
+SET @ORIG_PAUSE_BACKGROUND_WORK = @@ROCKSDB_PAUSE_BACKGROUND_WORK;
+SET GLOBAL ROCKSDB_PAUSE_BACKGROUND_WORK = 1;
+
+--disable_warnings
+DROP TABLE IF EXISTS t1;
+--enable_warnings
+
+
+CREATE TABLE t1 (
+ t TEXT,
+ tt TINYTEXT,
+ m MEDIUMTEXT,
+ l LONGTEXT,
+ PRIMARY KEY t (t(32))
+) ENGINE=rocksdb;
+
+SHOW INDEX IN t1;
+
+INSERT INTO t1 (t,tt,m,l) VALUES
+('','','',''),
+('a','b','c','d'),
+('b','d','c','b'),
+('test1','test2','test3','test4'),
+(REPEAT('a',128),REPEAT('b',128),REPEAT('c',128),REPEAT('d',128)),
+('abc','def','ghi','jkl'),
+('test2','test3','test4','test5'),
+('test3','test4','test5','test6'),
+(REPEAT('b',128),REPEAT('f',128),REPEAT('e',128),REPEAT('d',128)),
+(REPEAT('c',128),REPEAT('b',128),REPEAT('c',128),REPEAT('e',128));
+
+--replace_column 9 #
+EXPLAIN SELECT SUBSTRING(t,16) AS f FROM t1 WHERE t IN ('test1','test2') ORDER BY f;
+SELECT SUBSTRING(t,16) AS f FROM t1 WHERE t IN ('test1','test2') ORDER BY f;
+
+--replace_column 9 #
+EXPLAIN SELECT SUBSTRING(t,16) AS f FROM t1 IGNORE INDEX (PRIMARY) WHERE t IN ('test1','test2') ORDER BY f;
+SELECT SUBSTRING(t,16) AS f FROM t1 IGNORE INDEX (PRIMARY) WHERE t IN ('test1','test2') ORDER BY f;
+DROP TABLE t1;
+
+--error ER_BLOB_KEY_WITHOUT_LENGTH
+CREATE TABLE t1 (
+ t TEXT,
+ tt TINYTEXT,
+ m MEDIUMTEXT,
+ l LONGTEXT,
+ pk TINYTEXT PRIMARY KEY,
+ UNIQUE INDEX l_tt (l(256),tt(64))
+) ENGINE=rocksdb;
+
+CREATE TABLE t1 (
+ t TEXT,
+ tt TINYTEXT,
+ m MEDIUMTEXT,
+ l LONGTEXT,
+ pk MEDIUMTEXT,
+ PRIMARY KEY mt (pk(1)),
+ INDEX (m(128))
+) ENGINE=rocksdb;
+
+SHOW INDEX IN t1;
+
+INSERT INTO t1 (t,tt,m,l,pk) VALUES
+('','','','','0'),
+('a','b','c','d','1'),
+('b','d','c','b','2'),
+('test1','test2','test3','test4','3'),
+(REPEAT('a',128),REPEAT('b',128),REPEAT('c',128),REPEAT('d',128),'4'),
+('abc','def','ghi','jkl','5'),
+('test2','test3','test4','test5','6'),
+('test3','test4','test5','test6','7'),
+(REPEAT('b',128),REPEAT('f',128),REPEAT('e',128),REPEAT('d',128),'8'),
+(REPEAT('c',128),REPEAT('b',128),REPEAT('c',128),REPEAT('e',128),'9');
+
+--replace_column 9 #
+EXPLAIN SELECT SUBSTRING(m,128) AS f FROM t1 WHERE m = 'test1' ORDER BY f DESC;
+SELECT SUBSTRING(m,128) AS f FROM t1 WHERE m = 'test1' ORDER BY f DESC;
+
+DROP TABLE t1;
+
+CREATE TABLE t1 (
+ b TEXT,
+ PRIMARY KEY b (b(32))
+) ENGINE=rocksdb;
+
+INSERT INTO t1 (b) VALUES
+('00000000000000000000000000000000'),
+('00000000000000000000000000000001'),
+('00000000000000000000000000000002');
+
+SELECT b FROM t1;
+
+DROP TABLE t1;
+
+CREATE TABLE t1 (
+ b TINYTEXT,
+ PRIMARY KEY b (b(32))
+) ENGINE=rocksdb;
+
+INSERT INTO t1 (b) VALUES
+('00000000000000000000000000000000'),
+('00000000000000000000000000000001'),
+('00000000000000000000000000000002');
+
+SELECT b FROM t1;
+
+DROP TABLE t1;
+
+CREATE TABLE t1 (
+ b MEDIUMTEXT,
+ PRIMARY KEY b (b(32))
+) ENGINE=rocksdb;
+
+INSERT INTO t1 (b) VALUES
+('00000000000000000000000000000000'),
+('00000000000000000000000000000001'),
+('00000000000000000000000000000002');
+
+SELECT b FROM t1;
+
+DROP TABLE t1;
+
+CREATE TABLE t1 (
+ b LONGTEXT,
+ PRIMARY KEY b (b(32))
+) ENGINE=rocksdb;
+
+INSERT INTO t1 (b) VALUES
+('00000000000000000000000000000000'),
+('00000000000000000000000000000001'),
+('00000000000000000000000000000002');
+
+SELECT b FROM t1;
+
+DROP TABLE t1;
+
+CREATE TABLE t1 (
+ b LONGTEXT CHARACTER SET "binary" COLLATE "binary",
+ PRIMARY KEY b (b(32))
+) ENGINE=rocksdb;
+
+INSERT INTO t1 (b) VALUES
+('00000000000000000000000000000000'),
+('00000000000000000000000000000001'),
+('00000000000000000000000000000002');
+INSERT INTO t1 (b) VALUES (''), (_binary 0x0), (' ');
+
+SELECT hex(b) FROM t1;
+
+DROP TABLE t1;
+
+CREATE TABLE t1 (
+ b LONGTEXT CHARACTER SET "latin1" COLLATE "latin1_bin",
+ PRIMARY KEY b (b(32))
+) ENGINE=rocksdb;
+
+--error ER_DUP_ENTRY
+INSERT INTO t1 (b) VALUES (''), (_binary 0x0), (' ');
+
+INSERT INTO t1 (b) VALUES (''), (_binary 0x0);
+SELECT hex(b) FROM t1;
+
+DROP TABLE t1;
+
+SET GLOBAL ROCKSDB_PAUSE_BACKGROUND_WORK = @ORIG_PAUSE_BACKGROUND_WORK;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/type_varbinary.inc b/storage/rocksdb/mysql-test/rocksdb/t/type_varbinary.inc
new file mode 100644
index 00000000000..a9f9883198c
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/type_varbinary.inc
@@ -0,0 +1,75 @@
+#
+# VARBINARY column types
+#
+
+--disable_warnings
+DROP TABLE IF EXISTS t1, t2;
+--enable_warnings
+
+eval CREATE TABLE t1 (
+ v0 VARBINARY(0) $extra_col_opts,
+ v1 VARBINARY(1) $extra_col_opts,
+ v64 VARBINARY(64) $extra_col_opts,
+ v65000 VARBINARY(65000) $extra_col_opts,
+ PRIMARY KEY (v64)
+) ENGINE=rocksdb;
+
+SHOW COLUMNS IN t1;
+
+eval CREATE TABLE t2 (v VARBINARY(65532) $extra_col_opts, PRIMARY KEY(v(255))) ENGINE=rocksdb;
+SHOW COLUMNS IN t2;
+
+INSERT INTO t1 (v0,v1,v64,v65000) VALUES ('','','','');
+INSERT INTO t1 (v0,v1,v64,v65000) VALUES ('','y','Once there, double check that an article doesn\'t already exist','Here is a list of recommended books on MariaDB and MySQL. We\'ve provided links to Amazon.com here for convenience, but they can be found at many other bookstores, both online and off.
+
+ If you want to have your favorite MySQL / MariaDB book listed here, please leave a comment.
+ For developers who want to code on MariaDB or MySQL
+
+ * Understanding MySQL Internals by Sasha Pachev, former MySQL developer at MySQL AB.
+ o This is the only book we know about that describes the internals of MariaDB / MySQL. A must have for anyone who wants to understand and develop on MariaDB!
+ o Not all topics are covered and some parts are slightly outdated, but still the best book on this topic.
+ * MySQL 5.1 Plugin Development by Sergei Golubchik and Andrew Hutchings
+ o A must read for anyone wanting to write a plugin for MariaDB, written by the Sergei who designed the plugin interface for MySQL and MariaDB!
+
+ For MariaDB / MySQL end users
+
+ * MariaDB Crash Course by Ben Forta
+ o First MariaDB book!
+ o For people who want to learn SQL and the basics of MariaDB.
+ o Now shipping. Purchase at Amazon.com or your favorite bookseller.
+
+ * SQL-99 Complete, Really by Peter Gulutzan & Trudy Pelzer.
+ o Everything you wanted to know about the SQL 99 standard. Excellent reference book!
+ o Free to read in the Knowledgebase!
+
+ * MySQL (4th Edition) by Paul DuBois
+ o The \'default\' book to read if you wont to learn to use MySQL / MariaDB.
+
+ * MySQL Cookbook by Paul DuBois
+ o A lot of examples of how to use MySQL. As with all of Paul\'s books, it\'s worth its weight in gold and even enjoyable reading for such a \'dry\' subject.
+
+ * High Performance MySQL, Second Edition, By Baron Schwartz, Peter Zaitsev, Vadim Tkachenko, Jeremy D. Zawodny, Arjen Lentz, Derek J. Balling, et al.
+ o \"High Performance MySQL is the definitive guide to building fast, reliable systems with MySQL. Written by noted experts with years of real-world experience building very large systems, this book covers every aspect of MySQL performance in detail, and focuses on robustness, security, and data integrity. Learn advanced techniques in depth so you can bring out MySQL\'s full power.\" (From the book description at O\'Reilly)
+
+ * MySQL Admin Cookbook
+ o A quick step-by-step guide for MySQL users and database administrators to tackle real-world challenges with MySQL configuration and administration
+
+ * MySQL 5.0 Certification Study Guide, By Paul DuBois, Stefan Hinz, Carsten Pedersen
+ o This is the official guide to cover the passing of the two MySQL Certification examinations. It is valid till version 5.0 of the server, so while it misses all the features available in MySQL 5.1 and greater (including MariaDB 5.1 and greater), it provides a good basic understanding of MySQL for the end-user. ');
+
+--sorted_result
+SELECT HEX(v0), HEX(v1), HEX(v64), HEX(v65000) FROM t1;
+
+# Invalid values
+
+INSERT INTO t1 (v0,v1,v64,v65000) VALUES ('y', 'yy', REPEAT('c',65), REPEAT('abcdefghi ',6501));
+INSERT INTO t1 (v0,v1,v64,v65000) SELECT v65000, v65000, CONCAT('a',v65000), CONCAT(v65000,v1) FROM t1;
+
+--sorted_result
+SELECT HEX(v0), HEX(v1), HEX(v64), LENGTH(HEX(v65000)) FROM t1;
+
+eval ALTER TABLE t1 ADD COLUMN v65536 VARBINARY(65536) $extra_col_opts;
+SHOW COLUMNS IN t1;
+
+DROP TABLE t1, t2;
+
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/type_varbinary.test b/storage/rocksdb/mysql-test/rocksdb/t/type_varbinary.test
new file mode 100644
index 00000000000..2ef3da8a499
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/type_varbinary.test
@@ -0,0 +1,8 @@
+--source include/have_rocksdb.inc
+
+#
+# VARBINARY column types
+#
+
+--source type_varbinary.inc
+
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/type_varchar-master.opt b/storage/rocksdb/mysql-test/rocksdb/t/type_varchar-master.opt
new file mode 100644
index 00000000000..33e72265db2
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/type_varchar-master.opt
@@ -0,0 +1 @@
+--rocksdb_debug_optimizer_n_rows=10
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/type_varchar.inc b/storage/rocksdb/mysql-test/rocksdb/t/type_varchar.inc
new file mode 100644
index 00000000000..ce5dbb444d3
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/type_varchar.inc
@@ -0,0 +1,77 @@
+#
+# VARCHAR column types
+#
+
+--disable_warnings
+DROP TABLE IF EXISTS t1, t2;
+--enable_warnings
+
+eval CREATE TABLE t1 (
+ v0 VARCHAR(0) $extra_col_opts,
+ v1 VARCHAR(1) $extra_col_opts,
+ v64 VARCHAR(64) $extra_col_opts,
+ v65000 VARCHAR(65000) $extra_col_opts,
+ PRIMARY KEY (v64)
+) ENGINE=rocksdb;
+
+SHOW COLUMNS IN t1;
+
+CREATE TABLE t2 (v VARCHAR(65532), PRIMARY KEY (v(255))) ENGINE=rocksdb;
+SHOW COLUMNS IN t2;
+
+# Valid values
+
+INSERT INTO t1 (v0,v1,v64,v65000) VALUES ('','','','');
+INSERT INTO t1 (v0,v1,v64,v65000) VALUES ('','y','Once there, double check that an article doesn\'t already exist','Here is a list of recommended books on MariaDB and MySQL. We\'ve provided links to Amazon.com here for convenience, but they can be found at many other bookstores, both online and off.
+
+ If you want to have your favorite MySQL / MariaDB book listed here, please leave a comment.
+ For developers who want to code on MariaDB or MySQL
+
+ * Understanding MySQL Internals by Sasha Pachev, former MySQL developer at MySQL AB.
+ o This is the only book we know about that describes the internals of MariaDB / MySQL. A must have for anyone who wants to understand and develop on MariaDB!
+ o Not all topics are covered and some parts are slightly outdated, but still the best book on this topic.
+ * MySQL 5.1 Plugin Development by Sergei Golubchik and Andrew Hutchings
+ o A must read for anyone wanting to write a plugin for MariaDB, written by the Sergei who designed the plugin interface for MySQL and MariaDB!
+
+ For MariaDB / MySQL end users
+
+ * MariaDB Crash Course by Ben Forta
+ o First MariaDB book!
+ o For people who want to learn SQL and the basics of MariaDB.
+ o Now shipping. Purchase at Amazon.com or your favorite bookseller.
+
+ * SQL-99 Complete, Really by Peter Gulutzan & Trudy Pelzer.
+ o Everything you wanted to know about the SQL 99 standard. Excellent reference book!
+ o Free to read in the Knowledgebase!
+
+ * MySQL (4th Edition) by Paul DuBois
+ o The \'default\' book to read if you wont to learn to use MySQL / MariaDB.
+
+ * MySQL Cookbook by Paul DuBois
+ o A lot of examples of how to use MySQL. As with all of Paul\'s books, it\'s worth its weight in gold and even enjoyable reading for such a \'dry\' subject.
+
+ * High Performance MySQL, Second Edition, By Baron Schwartz, Peter Zaitsev, Vadim Tkachenko, Jeremy D. Zawodny, Arjen Lentz, Derek J. Balling, et al.
+ o \"High Performance MySQL is the definitive guide to building fast, reliable systems with MySQL. Written by noted experts with years of real-world experience building very large systems, this book covers every aspect of MySQL performance in detail, and focuses on robustness, security, and data integrity. Learn advanced techniques in depth so you can bring out MySQL\'s full power.\" (From the book description at O\'Reilly)
+
+ * MySQL Admin Cookbook
+ o A quick step-by-step guide for MySQL users and database administrators to tackle real-world challenges with MySQL configuration and administration
+
+ * MySQL 5.0 Certification Study Guide, By Paul DuBois, Stefan Hinz, Carsten Pedersen
+ o This is the official guide to cover the passing of the two MySQL Certification examinations. It is valid till version 5.0 of the server, so while it misses all the features available in MySQL 5.1 and greater (including MariaDB 5.1 and greater), it provides a good basic understanding of MySQL for the end-user. ');
+
+--sorted_result
+SELECT v0,v1,v64,v65000 FROM t1;
+
+# Invalid values
+
+INSERT INTO t1 (v0,v1,v64,v65000) VALUES ('y', 'yy', REPEAT('c',65), REPEAT('abcdefghi ',6501));
+INSERT INTO t1 (v0,v1,v64,v65000) SELECT v65000, v65000, CONCAT('a',v65000), CONCAT(v65000,v1) FROM t1;
+
+--sorted_result
+SELECT v0, v1, v64, LENGTH(v65000) FROM t1;
+
+eval ALTER TABLE t1 ADD COLUMN v65536 VARCHAR(65536) $extra_col_opts;
+SHOW COLUMNS IN t1;
+
+DROP TABLE t1, t2;
+
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/type_varchar.test b/storage/rocksdb/mysql-test/rocksdb/t/type_varchar.test
new file mode 100644
index 00000000000..b631615c266
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/type_varchar.test
@@ -0,0 +1,82 @@
+--source include/have_rocksdb.inc
+
+#
+# VARCHAR column types
+#
+
+--echo #
+--echo # A basic test whether endspace-aware variable length encoding
+--echo # works when in PK
+--echo #
+create table t1 ( pk varchar(10) primary key, col1 int not null) ;
+insert into t1 values ('a', 1);
+select * from t1;
+drop table t1;
+
+--source type_varchar.inc
+
+
+--echo #
+--echo # Endspace-comparison tests:
+--echo #
+
+--let $character_set_collate=CHARACTER SET latin1 COLLATE latin1_bin
+--source type_varchar_endspace.inc
+
+--let $character_set_collate=CHARACTER SET utf8 COLLATE utf8_bin
+--source type_varchar_endspace.inc
+
+--let $character_set_collate=CHARACTER SET ucs2 COLLATE ucs2_bin
+--source type_varchar_endspace.inc
+
+--let $character_set_collate=CHARACTER SET utf8mb4 COLLATE utf8mb4_bin
+--source type_varchar_endspace.inc
+
+--let $character_set_collate=CHARACTER SET utf16 COLLATE utf16_bin
+--source type_varchar_endspace.inc
+
+create table t1 (
+ pk int primary key,
+ col1 varchar(10) collate utf8mb4_bin not null,
+ col2 varchar(20),
+ key(col1)
+) engine=rocksdb;
+
+insert into t1 values (1, 'ab','ab');
+insert into t1 values (2, 'ab\0','ab0');
+
+select pk, hex(col1), col2 from t1 force index(col1) order by col1;
+select pk, hex(col1), col2 from t1 ignore index(col1) order by col1;
+drop table t1;
+
+# Issue #306 - Do not store trailing spaces for prefixed keys.
+create table t (id int primary key, email varchar(100), KEY email_i (email(30)));
+insert into t values (1, 'abcabcabcabcabcabcabcabcabcabcabc ');
+--replace_column 9 #
+explain select 'email_i' as index_name, count(*) AS count from t force index(email_i);
+select 'email_i' as index_name, count(*) AS count from t force index(email_i);
+drop table t;
+
+set @save_rocksdb_checksums_pct = @@global.rocksdb_checksums_pct;
+set @save_rocksdb_verify_row_debug_checksums = @@session.rocksdb_verify_row_debug_checksums;
+set global rocksdb_checksums_pct = 100;
+set session rocksdb_verify_row_debug_checksums = on;
+create table t (id int primary key, email varchar(100), KEY email_i (email(30)));
+insert into t values (1, 'a');
+--replace_column 9 #
+explain select 'email_i' as index_name, count(*) AS count from t force index(email_i);
+select 'email_i' as index_name, count(*) AS count from t force index(email_i);
+drop table t;
+set global rocksdb_checksums_pct = @save_rocksdb_checksums_pct;
+set session rocksdb_verify_row_debug_checksums = @save_rocksdb_verify_row_debug_checksums;
+
+# Issue #784 - Skip trailing space bytes for non-unpackable fields
+
+drop table if exists t;
+create table t (h varchar(31) character set utf8 collate utf8_bin not null, i varchar(19) collate latin1_bin not null, primary key(i), key(h)) engine=rocksdb;
+insert into t(i,h) values('a','b');
+check table t;
+alter table t modify h varchar(31) character set cp1257 collate cp1257_bin not null;
+check table t;
+drop table t;
+
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/type_varchar_endspace.inc b/storage/rocksdb/mysql-test/rocksdb/t/type_varchar_endspace.inc
new file mode 100644
index 00000000000..494f0ea1395
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/type_varchar_endspace.inc
@@ -0,0 +1,85 @@
+--echo #
+--echo # Issue 257: Sort order for varchars is different between
+--echo # MyISAM/InnoDB vs MyRocks
+--echo #
+
+eval create table t1 (
+ pk varchar(64) $character_set_collate,
+ col1 varchar(64),
+ primary key (pk)
+);
+insert into t1 values ('a','a');
+--error ER_DUP_ENTRY
+insert into t1 values ('a ', 'a-space');
+
+insert into t1 values('b ', 'b-2x-space');
+--error ER_DUP_ENTRY
+insert into t1 values ('b', 'b');
+
+select pk, hex(pk), col1 from t1;
+
+insert into t1 values ('a\t', 'a-tab');
+insert into t1 values ('a \t', 'a-space-tab');
+select pk, hex(pk), col1 from t1 order by pk;
+
+--echo # Try longer values
+--error ER_DUP_ENTRY
+insert into t1 values (concat('a', repeat(' ',10)), 'a-10-x-space');
+
+insert into t1 values (concat('c', repeat(' ',10)), 'c-10-x-space');
+select * from t1;
+
+drop table t1;
+
+--echo # Secondary index
+eval create table t1 (
+ pk int not null primary key,
+ col1 varchar(64) $character_set_collate,
+ col2 varchar(64),
+ key (col1)
+);
+insert into t1 values (0, 'ab', 'a-b');
+insert into t1 values (1, 'a ', 'a-space');
+insert into t1 values (2, 'a', 'a');
+insert into t1 values (3, 'a \t', 'a-tab');
+
+analyze table t1;
+--echo # Must show 'using index' for latin1_bin and utf8_bin:
+--replace_column 9 #
+explain
+select col1, hex(col1) from t1;
+select col1, hex(col1) from t1;
+
+--echo # Must show 'using index' for latin1_bin and utf8_bin:
+--replace_column 4 # 9 #
+explain
+select col1, hex(col1) from t1 force index(col1) where col1 < 'b';
+select col1, hex(col1) from t1 force index(col1) where col1 < 'b';
+
+delete from t1;
+insert into t1 values(10, '', 'empty');
+insert into t1 values(11, repeat(' ', 8), '8x-space');
+insert into t1 values(12, repeat(' ', 16), '16x-space');
+insert into t1 values(13, repeat(' ', 24), '24x-space');
+insert into t1 values(14, concat(repeat(' ', 16),'a'), '16x-space-a');
+
+insert into t1 values(21, repeat(' ', 9), '9x-space');
+insert into t1 values(22, repeat(' ',17), '17x-space');
+insert into t1 values(23, repeat(' ',18), '18x-space');
+
+--replace_column 4 # 9 #
+explain
+select pk, col1, hex(col1), length(col1) from t1;
+select pk, col1, hex(col1), length(col1) from t1;
+drop table t1;
+
+create table t1 (pk int primary key, a varchar(512), key(a)) engine=rocksdb;
+insert into t1 values (1, concat('a', repeat(' ', 300)));
+insert into t1 values (2, concat('b', repeat(' ', 300)));
+
+select pk,length(a) from t1 force index(a) where a < 'zz';
+select pk,length(a),rtrim(a) from t1 force index(a) where a < 'zz';
+select pk,length(a),rtrim(a) from t1 ignore index(a) where a < 'zz';
+
+drop table t1;
+
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/unique_check.test b/storage/rocksdb/mysql-test/rocksdb/t/unique_check.test
new file mode 100644
index 00000000000..e2520388ea7
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/unique_check.test
@@ -0,0 +1,173 @@
+--source include/have_rocksdb.inc
+--source include/have_debug_sync.inc
+
+# For GitHub issue#167 -- Unique key check doesn't work
+
+connect (con1, localhost, root,,);
+connect (con2, localhost, root,,);
+connect (con3, localhost, root,,);
+connection default;
+
+--disable_warnings
+set debug_sync='RESET';
+drop table if exists t1;
+--enable_warnings
+
+create table t1 (id int, value int, primary key (id)) engine=rocksdb;
+create table t2 (id int, id2 int, value int, primary key (id), unique key (id2)) engine=rocksdb;
+
+# 1) second insert should be blocked at GetForUpdate(), then returning duplicate key error
+# after con1's commit
+connection con1;
+begin;
+insert into t1 values (1,1);
+
+connection con2;
+let $ID= `select connection_id()`;
+set session rocksdb_lock_wait_timeout=50;
+begin;
+send insert into t1 values (1,2);
+
+connection con1;
+let $wait_condition= select 1 from INFORMATION_SCHEMA.PROCESSLIST
+ where (ID = $ID /* or SRV_ID = $ID*/)
+ and STATE = "Waiting for row lock";
+--source include/wait_condition.inc
+commit;
+
+connection con2;
+--error ER_DUP_ENTRY
+reap;
+commit;
+select * from t1;
+truncate table t1;
+
+# 2) same as 1) but using secondary unique key constraint
+connection con1;
+begin;
+insert into t2 values (1,1,1);
+
+connection con2;
+begin;
+send insert into t2 values (2,1,2);
+
+connection con1;
+--source include/wait_condition.inc
+commit;
+
+connection con2;
+--error ER_DUP_ENTRY
+reap;
+commit;
+select * from t2;
+truncate table t2;
+
+# 3) similar to 1),2) but rolled back
+connection con1;
+begin;
+insert into t1 values (1,1);
+
+connection con2;
+begin;
+send insert into t1 values (1,2);
+
+connection con1;
+--source include/wait_condition.inc
+rollback;
+
+connection con2;
+reap;
+commit;
+select * from t1;
+truncate table t1;
+
+connection con1;
+begin;
+insert into t2 values (1,1,1);
+
+connection con2;
+begin;
+send insert into t2 values (2,1,2);
+
+connection con1;
+--source include/wait_condition.inc
+rollback;
+
+connection con2;
+reap;
+commit;
+select * from t2;
+truncate table t2;
+
+
+# 4) simulating T1 GetForUpdate() -> T2 GetForUpdate(). T2 should fail with lock wait timeout.
+connection con1;
+set debug_sync='rocksdb.update_write_row_after_unique_check SIGNAL parked1 WAIT_FOR go';
+send insert into t1 values (1,1);
+
+connection default;
+set debug_sync='now WAIT_FOR parked1';
+
+connection con2;
+set debug_sync='rocksdb.update_write_row_after_unique_check SIGNAL parked2 WAIT_FOR go';
+send insert into t2 values (1,1,1);
+
+connection default;
+set debug_sync='now WAIT_FOR parked2';
+
+connection con3;
+set session rocksdb_lock_wait_timeout=1;
+--error ER_LOCK_WAIT_TIMEOUT
+insert into t1 values (1,2);
+--error ER_LOCK_WAIT_TIMEOUT
+insert into t2 values (2,1,2);
+
+connection default;
+set debug_sync='now SIGNAL go';
+
+connection con1;
+reap;
+
+connection con2;
+reap;
+
+connection default;
+--error ER_DUP_ENTRY
+insert into t1 values (1,2);
+--error ER_DUP_ENTRY
+insert into t2 values (2,1,2);
+select * from t1;
+select * from t2;
+# Cleanup
+connection default;
+set debug_sync='RESET';
+disconnect con1;
+disconnect con2;
+disconnect con3;
+drop table t1, t2;
+
+# skip_unique checks should skip checks only for tables that don't have
+# secondary indexes
+connection default;
+--disable_warnings
+drop table if exists t1,t2,t3;
+--enable_warnings
+
+# table with PK only
+create table t1 (id int, value int, primary key (id)) engine=rocksdb;
+# table with PK and SK
+create table t2 (id int, id2 int, value int, primary key (id), unique key (id2)) engine=rocksdb;
+# table with hidden PK
+create table t3 (id int, value int) engine=rocksdb;
+
+SET @old_val = @@session.unique_checks;
+set @@session.unique_checks = FALSE;
+
+insert into t1 values (1, 1), (1, 2);
+--error ER_DUP_ENTRY
+insert into t2 values (1, 1, 1), (1, 2, 1);
+insert into t3 values (1, 1), (1, 1);
+
+set @@session.unique_checks = @old_val;
+# cleanup
+drop table t1, t2, t3;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/unique_sec.inc b/storage/rocksdb/mysql-test/rocksdb/t/unique_sec.inc
new file mode 100644
index 00000000000..ecfc0d8f734
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/unique_sec.inc
@@ -0,0 +1,198 @@
+# Save the initial number of concurrent sessions
+--source include/count_sessions.inc
+
+--disable_warnings
+DROP TABLE IF EXISTS t1;
+--enable_warnings
+
+connect (con1,localhost,root,,);
+connect (con2,localhost,root,,);
+
+connection con1;
+
+eval CREATE TABLE t1 (id1 INT NOT NULL, id2 INT NOT NULL, id3 VARCHAR(32),
+ id4 INT, id5 VARCHAR(32),
+ value1 INT, value2 INT, value3 VARCHAR(32),
+ PRIMARY KEY (id1, id2) $CF,
+ UNIQUE INDEX (id2, id1) $CF,
+ UNIQUE INDEX (id2, id3, id4) $CF,
+ INDEX (id1) $CF,
+ INDEX (id3, id1) $CF,
+ UNIQUE INDEX(id5) $CF,
+ INDEX (id2, id5)) ENGINE=ROCKSDB;
+
+--disable_query_log
+let $max = 10;
+let $i = 1;
+while ($i <= $max) {
+ let $insert = INSERT INTO t1 VALUES ($i, $i, $i, $i, $i, $i, $i, $i);
+ inc $i;
+ eval $insert;
+}
+--enable_query_log
+
+SELECT COUNT(*) FROM t1;
+
+--echo # Test inserting a key that returns duplicate error
+--error ER_DUP_ENTRY
+INSERT INTO t1 VALUES (1, 1, 11, 11, 11, 11, 11, 11);
+--error ER_DUP_ENTRY
+INSERT INTO t1 VALUES (5, 5, 11, 11, 11, 11, 11, 11);
+--error ER_DUP_ENTRY
+INSERT INTO t1 VALUES (10, 10, 11, 11, 11, 11, 11, 11);
+--error ER_DUP_ENTRY
+INSERT INTO t1 VALUES (11, 1, 1, 1, 11, 11, 11, 11);
+--error ER_DUP_ENTRY
+INSERT INTO t1 VALUES (11, 5, 5, 5, 11, 11, 11, 11);
+--error ER_DUP_ENTRY
+INSERT INTO t1 VALUES (11, 10, 10, 10, 11, 11, 11, 11);
+--error ER_DUP_ENTRY
+INSERT INTO t1 VALUES (11, 11, 11, 11, 1, 11, 11, 11);
+--error ER_DUP_ENTRY
+INSERT INTO t1 VALUES (11, 11, 11, 11, 5, 11, 11, 11);
+--error ER_DUP_ENTRY
+INSERT INTO t1 VALUES (11, 11, 11, 11, 10, 11, 11, 11);
+
+--echo # Test updating a key that returns duplicate error
+--error ER_DUP_ENTRY
+UPDATE t1 SET id2=1, id3=1, id4=1 WHERE id1=2;
+--error ER_DUP_ENTRY
+UPDATE t1 SET id2=1, id3=1, id4=1;
+
+SELECT COUNT(*) FROM t1;
+
+--echo # Test updating a key to itself
+UPDATE t1 set id2=id4;
+UPDATE t1 set id5=id3, value1=value2;
+UPDATE t1 set value3=value1;
+
+--echo # Test modifying values should not cause duplicates
+UPDATE t1 SET value1=value3+1;
+UPDATE t1 SET value3=value3 div 2;
+UPDATE t1 SET value2=value3;
+
+SELECT COUNT(*) FROM t1;
+
+--echo # Test NULL values are considered unique
+INSERT INTO t1 VALUES (20, 20, 20, NULL, NULL, 20, 20, 20);
+INSERT INTO t1 VALUES (21, 20, 20, NULL, NULL, 20, 20, 20);
+INSERT INTO t1 VALUES (22, 20, 20, NULL, NULL, 20, 20, 20);
+
+SELECT COUNT(*) FROM t1;
+
+--echo # Adding multiple rows where one of the rows fail the duplicate
+--echo # check should fail the whole statement
+--error ER_DUP_ENTRY
+INSERT INTO t1 VALUES (23, 23, 23, 23, 23, 23, 23, 23),
+ (24, 24, 24, 24, 24, 24, 24, 24),
+ (25, 10, 10, 10, 25, 25, 25, 25),
+ (26, 26, 26, 26, 26, 26, 26, 26);
+SELECT COUNT(*) FROM t1;
+
+# Test open transactions can prevent duplicate keys
+connection con1;
+BEGIN;
+INSERT INTO t1 VALUES (30, 31, 32, 33, 34, 30, 30, 30);
+
+connection con2;
+BEGIN;
+
+SELECT COUNT(*) FROM t1;
+
+--echo # Primary key should prevent duplicate on insert
+--error ER_LOCK_WAIT_TIMEOUT
+INSERT INTO t1 VALUES (30, 31, 30, 30, 30, 30, 30, 30);
+
+--echo # Primary key should prevent duplicate on update
+--error ER_LOCK_WAIT_TIMEOUT
+UPDATE t1 SET id1=30, id2=31 WHERE id2=10;
+
+--echo # Unique secondary key should prevent duplicate on insert
+--error ER_LOCK_WAIT_TIMEOUT
+INSERT INTO t1 VALUES (31, 31, 32, 33, 30, 30, 30, 30);
+--error ER_LOCK_WAIT_TIMEOUT
+INSERT INTO t1 VALUES (32, 32, 32, 32, 34, 32, 32, 32);
+
+--echo # Unique secondary key should prevent duplicate on update
+--error ER_LOCK_WAIT_TIMEOUT
+UPDATE t1 SET id2=31, id3=32, id4=33 WHERE id2=8;
+--error ER_LOCK_WAIT_TIMEOUT
+UPDATE t1 SET id5=34 WHERE id2=8;
+
+--echo # Adding multiple rows where one of the rows fail the duplicate
+--echo # check should fail the whole statement
+--error ER_LOCK_WAIT_TIMEOUT
+INSERT INTO t1 VALUES (35, 35, 35, 35, 35, 35, 35, 35),
+ (36, 36, 36, 36, 36, 36, 36, 36),
+ (37, 31, 32, 33, 37, 37, 37, 37),
+ (38, 38, 38, 38, 38, 38, 38, 38);
+--error ER_LOCK_WAIT_TIMEOUT
+INSERT INTO t1 VALUES (35, 35, 35, 35, 35, 35, 35, 35),
+ (36, 36, 36, 36, 36, 36, 36, 36),
+ (37, 37, 37, 37, 34, 37, 37, 37),
+ (38, 38, 38, 38, 38, 38, 38, 38);
+
+--echo # NULL values are unique and duplicates in value fields are ignored
+INSERT INTO t1 VALUES (37, 31, 32, NULL, 37, 37, 37, 37),
+ (38, 31, 32, NULL, 38, 37, 37, 37),
+ (39, 31, 32, NULL, 39, 37, 37, 37);
+
+SELECT COUNT(*) FROM t1;
+
+--echo # Fail on duplicate key update for row added in our transaction
+--error ER_DUP_ENTRY
+UPDATE t1 SET id5=37 WHERE id1=38;
+
+--echo # Fail on lock timeout for row modified in another transaction
+--error ER_LOCK_WAIT_TIMEOUT
+UPDATE t1 SET id5=34 WHERE id1=38;
+
+--echo # NULL values are unique
+UPDATE t1 SET id5=NULL WHERE value1 > 37;
+
+connection con1;
+COMMIT;
+
+connection con2;
+COMMIT;
+
+# Test transaction is reading from latest data
+connection con2;
+BEGIN;
+SELECT COUNT(*) FROM t1;
+
+connection con1;
+BEGIN;
+INSERT INTO t1 VALUES (40, 40, 40, 40, 40, 40, 40, 40);
+
+connection con2;
+--echo # When transaction is pending, fail on lock acquisition
+--error ER_LOCK_WAIT_TIMEOUT
+INSERT INTO t1 VALUES (40, 40, 40, 40, 40, 40, 40, 40);
+--error ER_LOCK_WAIT_TIMEOUT
+INSERT INTO t1 VALUES (41, 40, 40, 40, 40, 40, 40, 40);
+
+SELECT COUNT(*) FROM t1;
+
+connection con1;
+COMMIT;
+
+connection con2;
+--echo # When transaction is committed, fail on duplicate key
+--error ER_DUP_ENTRY,ER_LOCK_DEADLOCK
+INSERT INTO t1 VALUES (40, 40, 40, 40, 40, 40, 40, 40);
+--error ER_DUP_ENTRY
+INSERT INTO t1 VALUES (41, 40, 40, 40, 40, 40, 40, 40);
+
+ROLLBACK;
+
+SELECT * FROM t1;
+
+disconnect con1;
+disconnect con2;
+
+connection default;
+DROP TABLE t1;
+
+# Wait till we reached the initial number of concurrent sessions
+--source include/wait_until_count_sessions.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/unique_sec.test b/storage/rocksdb/mysql-test/rocksdb/t/unique_sec.test
new file mode 100644
index 00000000000..1dedd75f561
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/unique_sec.test
@@ -0,0 +1,51 @@
+--source include/have_rocksdb.inc
+--source include/have_partition.inc
+
+--let $CF=
+--source unique_sec.inc
+
+--echo #
+--echo # Issue #88: Creating unique index over column with duplicate values succeeds
+--echo #
+create table t1 (pk int primary key, a int) engine=rocksdb;
+
+insert into t1 values
+(1, 1),
+(2, 2),
+(3, 3),
+(4, 1),
+(5, 5);
+
+--error ER_DUP_ENTRY
+alter table t1 add unique(a);
+drop table t1;
+
+--echo #
+--echo # Issue #111
+--echo #
+CREATE TABLE t2 (pk int, a int, PRIMARY KEY (pk, a), UNIQUE KEY (a)) ENGINE=ROCKSDB PARTITION BY KEY (a) PARTITIONS 16;
+
+INSERT INTO t2 VALUES (1,1);
+--error ER_DUP_ENTRY
+INSERT INTO t2 VALUES (1,1);
+--error ER_DUP_ENTRY
+INSERT INTO t2 VALUES (2,1);
+DROP TABLE t2;
+
+--echo #
+--echo # Issue #491 (https://github.com/facebook/mysql-5.6/issues/491)
+--echo #
+CREATE TABLE t (a BLOB, PRIMARY KEY(a(2)), UNIQUE KEY (a(1))) engine=rocksdb;
+INSERT INTO t VALUES('a');
+CHECK TABLE t EXTENDED;
+DROP TABLE t;
+
+CREATE TABLE t (a VARCHAR(255), PRIMARY KEY(a), UNIQUE KEY (a(1))) engine=rocksdb;
+INSERT INTO t VALUES('a');
+CHECK TABLE t EXTENDED;
+DROP TABLE t;
+
+CREATE TABLE t (a VARCHAR(255), PRIMARY KEY(a(2)), UNIQUE KEY (a(1))) engine=rocksdb;
+INSERT INTO t VALUES('a');
+CHECK TABLE t EXTENDED;
+DROP TABLE t;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/unique_sec_rev_cf.test b/storage/rocksdb/mysql-test/rocksdb/t/unique_sec_rev_cf.test
new file mode 100644
index 00000000000..dc0bbd8ec8d
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/unique_sec_rev_cf.test
@@ -0,0 +1,4 @@
+--source include/have_rocksdb.inc
+
+let $CF=COMMENT 'rev:cf';
+--source unique_sec.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/unsupported_tx_isolations.test b/storage/rocksdb/mysql-test/rocksdb/t/unsupported_tx_isolations.test
new file mode 100644
index 00000000000..a15e5d2a7be
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/unsupported_tx_isolations.test
@@ -0,0 +1,25 @@
+--source include/have_rocksdb.inc
+
+--disable_warnings
+DROP TABLE IF EXISTS t1;
+--enable_warnings
+create table t1 (id int primary key, value int) engine=rocksdb;
+insert into t1 values (1,1);
+
+SET session transaction isolation level read uncommitted;
+begin;
+--error ER_ISOLATION_MODE_NOT_SUPPORTED
+insert into t1 values (2,1);
+--error ER_ISOLATION_MODE_NOT_SUPPORTED
+select * from t1 where id=1;
+rollback;
+
+SET session transaction isolation level serializable;
+begin;
+--error ER_ISOLATION_MODE_NOT_SUPPORTED
+insert into t1 values (2,1);
+--error ER_ISOLATION_MODE_NOT_SUPPORTED
+select * from t1 where id=1;
+rollback;
+
+DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/update.test b/storage/rocksdb/mysql-test/rocksdb/t/update.test
new file mode 100644
index 00000000000..6220c4f55a3
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/update.test
@@ -0,0 +1,82 @@
+--source include/have_rocksdb.inc
+
+#
+# Basic UPDATE statements.
+# UPDATE LOW_PRIORITY is covered in update_low_prio test
+# UPDATE IGNORE is covered in update_ignore test
+# Multi-table update is covered in update_multi test
+#
+
+########################################
+# TODO:
+# The results of the transactional part
+# are unusual due to "can't-see-own-changes"
+########################################
+
+--disable_warnings
+DROP TABLE IF EXISTS t1;
+--enable_warnings
+
+CREATE TABLE t1 (a INT, b CHAR(8), pk INT AUTO_INCREMENT PRIMARY KEY) ENGINE=rocksdb;
+INSERT INTO t1 (a,b) VALUES (1,'a'),(2,'b'),(3,'c'),(4,'d'),(5,'e'),(10000,'foobar');
+INSERT INTO t1 (a,b) SELECT a, b FROM t1;
+
+UPDATE t1 SET a=a+100;
+--sorted_result
+SELECT a,b FROM t1;
+
+UPDATE t1 SET a=a-100, b=DEFAULT WHERE a>100;
+--sorted_result
+SELECT a,b FROM t1;
+
+# ORDER BY and LIMIT
+UPDATE t1 SET b = 'update' WHERE a <= 4 ORDER BY b DESC, a ASC LIMIT 1;
+--sorted_result
+SELECT a,b FROM t1;
+
+DROP TABLE t1;
+
+#
+# Transactional UPDATE
+#
+
+CREATE TABLE t1 (a INT, b CHAR(8), pk INT AUTO_INCREMENT PRIMARY KEY) ENGINE=rocksdb;
+INSERT INTO t1 (a,b) VALUES (1,'a'),(2,'b'),(3,'c'),(4,'d'),(5,'e'),(10000,'foobar');
+INSERT INTO t1 (a,b) SELECT a, b FROM t1;
+
+BEGIN;
+UPDATE t1 SET a=a+100;
+UPDATE t1 SET a=a-50, b=DEFAULT WHERE a>100;
+COMMIT;
+--sorted_result
+SELECT * FROM t1 ORDER BY pk;
+
+BEGIN;
+UPDATE t1 SET b = 'update' WHERE a <= 4 ORDER BY a DESC, b ASC LIMIT 3;
+UPDATE t1 SET b = '';
+ROLLBACK;
+SELECT * FROM t1 ORDER BY pk;
+
+BEGIN;
+UPDATE t1 SET b = 'update2' WHERE a <= 100;
+SAVEPOINT spt1;
+UPDATE t1 SET b = '';
+--error ER_ROLLBACK_TO_SAVEPOINT
+ROLLBACK TO SAVEPOINT spt1;
+UPDATE t1 SET b = 'upd' WHERE a = 10050;
+--error ER_ROLLBACK_ONLY
+COMMIT;
+SELECT * FROM t1 ORDER BY pk;
+
+DROP TABLE t1;
+
+
+#
+# Issue #830 UPDATE with unique constraint does not work
+#
+
+CREATE TABLE t1 (a INT, b CHAR(8), UNIQUE INDEX(a)) ENGINE=RocksDB;
+INSERT INTO t1 (a,b) VALUES (1,'foo'),(2,'bar');
+UPDATE t1 SET a=a+100;
+SELECT * FROM t1;
+DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/update_ignore-master.opt b/storage/rocksdb/mysql-test/rocksdb/t/update_ignore-master.opt
new file mode 100644
index 00000000000..ba9364e1523
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/update_ignore-master.opt
@@ -0,0 +1 @@
+--rocksdb_debug_optimizer_n_rows=1000
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/update_ignore.test b/storage/rocksdb/mysql-test/rocksdb/t/update_ignore.test
new file mode 100644
index 00000000000..0208e1b3563
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/update_ignore.test
@@ -0,0 +1,35 @@
+--source include/have_rocksdb.inc
+
+#
+# UPDATE IGNORE
+#
+
+--disable_warnings
+DROP TABLE IF EXISTS t1,t2;
+--enable_warnings
+
+CREATE TABLE t1 (a INT, b CHAR(8), pk INT AUTO_INCREMENT PRIMARY KEY) ENGINE=rocksdb;
+
+INSERT INTO t1 (a,b) VALUES (1,'a'),(2,'b'),(3,'c'),(4,'d'),(5,'e'),(10000,'foobar');
+INSERT INTO t1 (a,b) SELECT a, b FROM t1;
+
+CREATE TABLE t2 (c CHAR(8), d INT, pk INT AUTO_INCREMENT PRIMARY KEY) ENGINE=rocksdb;
+
+INSERT INTO t2 (c,d) SELECT b, a FROM t1;
+
+UPDATE IGNORE t1 SET b = 'upd1' WHERE b IS NOT NULL ORDER BY a LIMIT 1;
+SELECT a,b FROM t1 ORDER BY pk;
+
+--error ER_SUBQUERY_NO_1_ROW
+UPDATE t1, t2 SET b = 'upd2a', c = 'upd2b'
+ WHERE c < b OR a != ( SELECT 1 UNION SELECT 2 );
+
+UPDATE IGNORE t1, t2 SET b = 'upd2a', c = 'upd2b'
+ WHERE c < b OR a != ( SELECT 1 UNION SELECT 2 );
+
+SELECT a,b FROM t1 ORDER BY pk;
+SELECT c,d FROM t2 ORDER BY pk;
+
+# Cleanup
+DROP TABLE t1, t2;
+
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/update_multi.test b/storage/rocksdb/mysql-test/rocksdb/t/update_multi.test
new file mode 100644
index 00000000000..1cb2f12804d
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/update_multi.test
@@ -0,0 +1,15 @@
+--source include/have_rocksdb.inc
+
+#
+# Multi-table UPDATE statements.
+#
+
+--disable_warnings
+DROP TABLE IF EXISTS t1,t2;
+--enable_warnings
+
+let $i= 0;
+while ($i < 10) {
+ --source update_multi_exec.inc
+ inc $i;
+}
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/update_multi_exec.inc b/storage/rocksdb/mysql-test/rocksdb/t/update_multi_exec.inc
new file mode 100644
index 00000000000..40d736693c6
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/update_multi_exec.inc
@@ -0,0 +1,27 @@
+CREATE TABLE t1 (a INT, b CHAR(8), pk INT AUTO_INCREMENT
+PRIMARY KEY) ENGINE=rocksdb;
+INSERT INTO t1 (a,b) VALUES
+(1,'a'),(2,'b'),(3,'c'),(4,'d'),(5,'e'),(10000,'foobar');
+INSERT INTO t1 (a,b) SELECT a, b FROM t1;
+
+CREATE TABLE t2 (c CHAR(8), d INT, pk INT AUTO_INCREMENT
+PRIMARY KEY) ENGINE=rocksdb;
+INSERT INTO t2 (c,d) SELECT b, a FROM t1;
+
+UPDATE t1 STRAIGHT_JOIN t2 SET t1.a = t2.d+100, t2.c = 'multi'
+WHERE c < b AND a + d != 1;
+--sorted_result
+SELECT a,b FROM t1;
+--sorted_result
+SELECT c,d FROM t2;
+
+UPDATE t2 STRAIGHT_JOIN t1 SET t2.d = DEFAULT
+WHERE c = 'foobar' and b = c;
+--sorted_result
+SELECT a,b FROM t1;
+--sorted_result
+SELECT c,d FROM t2;
+
+# Cleanup
+DROP TABLE t1, t2;
+
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/update_with_keys.test b/storage/rocksdb/mysql-test/rocksdb/t/update_with_keys.test
new file mode 100644
index 00000000000..c53eb9be85c
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/update_with_keys.test
@@ -0,0 +1,78 @@
+--source include/have_rocksdb.inc
+
+#
+# UPDATE statements for tables with keys
+#
+
+#############################################
+# TODO:
+# The test doesn't work quite as expected,
+# apparently due to "can't see own changes"
+#############################################
+
+--disable_warnings
+DROP TABLE IF EXISTS t1;
+--enable_warnings
+
+CREATE TABLE t1 (a INT, b CHAR(8), pk INT AUTO_INCREMENT PRIMARY KEY, INDEX(b)) ENGINE=rocksdb;
+INSERT INTO t1 (a,b) VALUES (1,'a'),(2,'b'),(3,'c'),(4,'d'),(5,'e'),(6,'x'),(7,'y'),(8,'z');
+UPDATE t1 SET a=100, b='f' WHERE b IN ('b','c');
+UPDATE t1 SET b='m' WHERE b = 'f';
+UPDATE t1 SET b='z' WHERE a < 2;
+UPDATE t1 SET b='';
+--sorted_result
+SELECT a,b FROM t1;
+DROP TABLE t1;
+
+--echo # RocksDB: skip the test for secondary UNIQUE keys.
+--disable_parsing
+--error ER_GET_ERRMSG
+CREATE TABLE t1 (a INT, b CHAR(8), pk INT AUTO_INCREMENT PRIMARY KEY, UNIQUE INDEX(a)) ENGINE=innodb;
+
+
+INSERT INTO t1 (a,b) VALUES (1,'a'),(2,'b'),(3,'c'),(4,'d'),(5,'e'),(0,'f'),(100,'a');
+UPDATE t1 SET a=a+200;
+UPDATE t1 SET a=0 WHERE a > 250;
+--error ER_DUP_ENTRY
+UPDATE t1 SET a=205 WHERE a=200;
+UPDATE t1 SET a=12345 ORDER BY a, b LIMIT 1;
+--sorted_result
+SELECT a,b FROM t1;
+
+--error ER_DUP_ENTRY
+UPDATE t1 SET a=80 WHERE a IN (202,203);
+--sorted_result
+SELECT a,b FROM t1;
+DROP TABLE t1;
+
+CREATE TABLE t1 (a INT, b CHAR(8), pk INT AUTO_INCREMENT PRIMARY KEY, UNIQUE INDEX(a,b)) ENGINE=rocksdb;
+INSERT INTO t1 (a,b) VALUES (1,'a'),(2,'b'),(3,'c'),(4,'d'),(5,'e'),(100,'a'),(6,'f');
+UPDATE t1 SET a=6 WHERE a=3;
+--error ER_DUP_ENTRY
+UPDATE t1 SET a=100 WHERE a=1;
+--error ER_DUP_ENTRY
+UPDATE t1 SET a=4, b='d' WHERE b='f';
+UPDATE t1 SET a=a+1;
+--sorted_result
+SELECT a,b FROM t1;
+--error ER_DUP_ENTRY
+UPDATE t1 SET b='z';
+DROP TABLE t1;
+
+--enable_parsing
+
+CREATE TABLE t1 (a INT PRIMARY KEY, b CHAR(8)) ENGINE=rocksdb;
+INSERT INTO t1 (a,b) VALUES (1,'a'),(2,'b'),(3,'c'),(4,'d'),(5,'e'),(0,'f'),(100,'a');
+UPDATE t1 SET a=a+200;
+UPDATE t1 SET a=0 WHERE a > 250;
+--error ER_DUP_ENTRY
+UPDATE t1 SET a=205 WHERE a=200;
+UPDATE t1 SET a=12345 ORDER BY a DESC, b LIMIT 1;
+
+--sorted_result
+SELECT a,b FROM t1;
+
+--error ER_DUP_ENTRY
+UPDATE t1 SET a=80 WHERE a IN (202,203);
+DROP TABLE t1;
+
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/use_direct_io_for_flush_and_compaction.test b/storage/rocksdb/mysql-test/rocksdb/t/use_direct_io_for_flush_and_compaction.test
new file mode 100644
index 00000000000..782e2a369a4
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/use_direct_io_for_flush_and_compaction.test
@@ -0,0 +1,5 @@
+--source include/have_rocksdb.inc
+
+--let $io_option=--rocksdb_use_direct_io_for_flush_and_compaction=1
+
+--source ../include/use_direct_io_option.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/use_direct_reads.test b/storage/rocksdb/mysql-test/rocksdb/t/use_direct_reads.test
new file mode 100644
index 00000000000..a1b717e85fc
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/use_direct_reads.test
@@ -0,0 +1,5 @@
+--source include/have_rocksdb.inc
+
+--let $io_option=--rocksdb_use_direct_reads=1
+
+--source ../include/use_direct_io_option.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/use_direct_reads_writes.test b/storage/rocksdb/mysql-test/rocksdb/t/use_direct_reads_writes.test
new file mode 100644
index 00000000000..53ba5161d16
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/use_direct_reads_writes.test
@@ -0,0 +1,62 @@
+--source include/have_rocksdb.inc
+--source include/have_direct_io.inc
+
+call mtr.add_suppression("rocksdb");
+call mtr.add_suppression("Aborting");
+
+--echo # This shows that RocksDB plugin is loaded:
+select plugin_name, plugin_type from information_schema.plugins where plugin_name='RocksDB';
+
+# Issue221
+# Turning on both --rocksdb-allow-mmap-reads and --rocksdb-use-direct-reads
+# caused an assertion in RocksDB. Now it should not be allowed and ROCKSDB
+# plugin will not load in such configuration.
+#
+--let LOG=$MYSQLTEST_VARDIR/tmp/use_direct_reads_writes.err
+--let SEARCH_FILE=$LOG
+
+--echo Checking direct reads
+--let $_mysqld_option=--log-error=$LOG --rocksdb_use_direct_reads=1 --rocksdb_allow_mmap_reads=1
+--replace_result $MYSQLTEST_VARDIR MYSQLTEST_VARDIR
+--source include/restart_mysqld_with_invalid_option.inc
+
+--let SEARCH_PATTERN=enable both use_direct_reads
+--source include/search_pattern_in_file.inc
+--remove_file $LOG
+
+
+# Repeat with direct-writes
+--echo Checking direct writes
+--let $_mysqld_option=--log-error=$LOG --rocksdb_use_direct_io_for_flush_and_compaction=1 --rocksdb_allow_mmap_writes=1
+--replace_result $MYSQLTEST_VARDIR MYSQLTEST_VARDIR
+--source include/restart_mysqld_with_invalid_option.inc
+
+--let SEARCH_PATTERN=enable both use_direct_io_for_flush_and_compaction
+--source include/search_pattern_in_file.inc
+--remove_file $LOG
+
+
+# Verify invalid direct-writes and --rocksdb_flush_log_at_trx_commit combination at startup fails
+--echo Checking rocksdb_flush_log_at_trx_commit
+--let $_mysqld_option=--log-error=$LOG --rocksdb_flush_log_at_trx_commit=1 --rocksdb_allow_mmap_writes=1
+--replace_result $MYSQLTEST_VARDIR MYSQLTEST_VARDIR
+--source include/restart_mysqld_with_invalid_option.inc
+
+--let SEARCH_PATTERN=rocksdb_flush_log_at_trx_commit needs to be
+--source include/search_pattern_in_file.inc
+--remove_file $LOG
+
+
+# Verify rocksdb_flush_log_at_trx_commit cannot be changed if direct writes are used
+--echo Validate flush_log settings when direct writes is enabled
+--let $_mysqld_option=--rocksdb_flush_log_at_trx_commit=0 --rocksdb_allow_mmap_writes=1
+--source include/restart_mysqld_with_option.inc
+
+set global rocksdb_flush_log_at_trx_commit=0;
+--error ER_WRONG_VALUE_FOR_VAR
+set global rocksdb_flush_log_at_trx_commit=1;
+--error ER_WRONG_VALUE_FOR_VAR
+set global rocksdb_flush_log_at_trx_commit=2;
+
+# Cleanup
+--source include/restart_mysqld.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/validate_datadic.test b/storage/rocksdb/mysql-test/rocksdb/t/validate_datadic.test
new file mode 100644
index 00000000000..ec48dc03ec8
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/validate_datadic.test
@@ -0,0 +1,108 @@
+--source include/have_rocksdb.inc
+--source include/have_partition.inc
+
+#
+# Validate that the server starts when everything is okay, but detects errors
+# if a table exists in the data dictionary but not as an .frm or vice versa.
+# The default mode causes these failures to keep the server from starting, but
+# this is problematic for the test as a server start failure is not easily
+# trappable. Instead use the mode where it will detect the problem and report
+# it in the log bug still start: --rocksdb_validate_tables=2
+#
+
+call mtr.add_suppression('RocksDB: Schema mismatch');
+
+CREATE TABLE t1 (pk int primary key) ENGINE=ROCKSDB;
+CREATE TABLE t2 (pk int primary key) ENGINE=ROCKSDB PARTITION BY KEY(pk) PARTITIONS 4;
+
+# Write file to make mysql-test-run.pl expect the "crash", but don't restart the
+# server until it is told to
+--let $_server_id= `SELECT @@server_id`
+--let $_expect_file_name= $MYSQLTEST_VARDIR/tmp/mysqld.$_server_id.expect
+--let LOG=$MYSQLTEST_VARDIR/tmp/validate_datadic.err
+
+--exec echo "wait" >$_expect_file_name
+
+# Send shutdown to the connected server and give it 10 seconds to die before
+# zapping it
+shutdown_server 10;
+
+# Write file to make mysql-test-run.pl start up the server again
+--exec echo "restart" >$_expect_file_name
+--sleep 5
+
+# Turn on reconnect
+--enable_reconnect
+
+# Call script that will poll the server waiting for it to be back online again
+--source include/wait_until_connected_again.inc
+
+# Turn off reconnect again
+--disable_reconnect
+
+# Now shut down again and rename one of the .frm files
+--exec echo "wait" >$_expect_file_name
+shutdown_server 10;
+
+# Rename the file
+--move_file $MYSQLTEST_VARDIR/mysqld.1/data/test/t1.frm $MYSQLTEST_VARDIR/mysqld.1/data/test/t1.frm.tmp
+--move_file $MYSQLTEST_VARDIR/mysqld.1/data/test/t2.frm $MYSQLTEST_VARDIR/mysqld.1/data/test/t2.frm.tmp
+
+# Attempt to restart the server
+--exec echo "restart:--rocksdb_validate_tables=2 --log-error=$LOG" >$_expect_file_name
+
+--enable_reconnect
+--source include/wait_until_connected_again.inc
+--disable_reconnect
+
+# We should now have an error message
+--echo "Expect errors that we are missing two .frm files"
+#--let SEARCH_FILE=$MYSQLTEST_VARDIR/log/mysqld.1.err
+#--let SEARCH_PATTERN=Schema mismatch
+#--source include/search_pattern_in_file.inc
+#--let SEARCH_PATTERN=RocksDB: Schema mismatch - Table test.t2 is registered in RocksDB but does not have a .frm file
+#--source include/search_pattern_in_file.inc
+--let SEARCH_FILE=$LOG
+--let SEARCH_PATTERN=RocksDB: Schema mismatch - Table test.t1 is registered in RocksDB but does not have a .frm file
+--source include/search_pattern_in_file.inc
+--let SEARCH_PATTERN=RocksDB: Schema mismatch - Table test.t2 is registered in RocksDB but does not have a .frm file
+--source include/search_pattern_in_file.inc
+
+# Now shut down again and rename one the .frm file back and make a copy of it
+--exec echo "wait" >$_expect_file_name
+shutdown_server 10;
+--remove_file $LOG
+# Rename the file
+--move_file $MYSQLTEST_VARDIR/mysqld.1/data/test/t1.frm.tmp $MYSQLTEST_VARDIR/mysqld.1/data/test/t1.frm
+--move_file $MYSQLTEST_VARDIR/mysqld.1/data/test/t2.frm.tmp $MYSQLTEST_VARDIR/mysqld.1/data/test/t2.frm
+--copy_file $MYSQLTEST_VARDIR/mysqld.1/data/test/t1.frm $MYSQLTEST_VARDIR/mysqld.1/data/test/t1_dummy.frm
+
+# Attempt to restart the server
+--exec echo "restart:--rocksdb_validate_tables=2 --log-error=$LOG" >$_expect_file_name
+
+--enable_reconnect
+--source include/wait_until_connected_again.inc
+--disable_reconnect
+
+# We should now have an error message for the second case
+--echo "Expect an error that we have an extra .frm file"
+--let SEARCH_FILE=$LOG
+--let SEARCH_PATTERN=Schema mismatch - A .frm file exists for table test.t1_dummy, but that table is not registered in RocksDB
+--source include/search_pattern_in_file.inc
+
+# Shut down an clean up
+--exec echo "wait" >$_expect_file_name
+shutdown_server 10;
+--remove_file $MYSQLTEST_VARDIR/mysqld.1/data/test/t1_dummy.frm
+--exec echo "restart" >$_expect_file_name
+--enable_reconnect
+--source include/wait_until_connected_again.inc
+--disable_reconnect
+
+# Remove log file
+--remove_file $LOG
+
+--disable_warnings
+DROP TABLE t1, t2;
+--enable_warnings
+
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/varbinary_format.test b/storage/rocksdb/mysql-test/rocksdb/t/varbinary_format.test
new file mode 100644
index 00000000000..d10082bb95c
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/varbinary_format.test
@@ -0,0 +1,131 @@
+--source include/have_debug.inc
+--source include/have_rocksdb.inc
+
+# Create a table with a varbinary key with the current format and validate
+# that it sorts correctly
+CREATE TABLE t1(
+ vb VARBINARY(64) primary key
+) ENGINE=rocksdb;
+
+INSERT INTO t1 values(0x00);
+INSERT INTO t1 values(0x0000);
+INSERT INTO t1 values(0x0000000000000000);
+INSERT INTO t1 values(0x000000);
+INSERT INTO t1 values(0x000000000000000000000000000000000000000000000000000000000000000000);
+INSERT INTO t1 values(0x00000000000000000000000000000000000000000000000000000000000000);
+INSERT INTO t1 values(0x0000000000000000000000000000000000000000000000000000000000000000);
+INSERT INTO t1 values(0x00000000);
+INSERT INTO t1 values(0x0000000000);
+INSERT INTO t1 values(0x00000000000000000000);
+INSERT INTO t1 values(0x000000000000);
+INSERT INTO t1 values(0x00000000000000);
+INSERT INTO t1 values(0x000000000000000000);
+
+SELECT hex(vb) FROM t1;
+
+# Use the fact that the rocksdb_locks shows the keys as they are encoded to
+# validate that the keys were encoded as expected
+BEGIN;
+SELECT hex(vb) FROM t1 FOR UPDATE;
+SELECT SUBSTRING(a.key,9) FROM information_schema.rocksdb_locks AS a ORDER BY a.key;
+ROLLBACK;
+
+DROP TABLE t1;
+
+# Now create the same table in the old format to show that they can be read
+# and handled correctly
+set session debug_dbug= '+d,MYROCKS_LEGACY_VARBINARY_FORMAT';
+CREATE TABLE t1(
+ vb VARBINARY(64) primary key
+) ENGINE=rocksdb;
+set session debug_dbug= '-d,MYROCKS_LEGACY_VARBINARY_FORMAT';
+
+INSERT INTO t1 values(0x00);
+INSERT INTO t1 values(0x0000);
+INSERT INTO t1 values(0x0000000000000000);
+INSERT INTO t1 values(0x000000);
+INSERT INTO t1 values(0x000000000000000000000000000000000000000000000000000000000000000000);
+INSERT INTO t1 values(0x00000000000000000000000000000000000000000000000000000000000000);
+INSERT INTO t1 values(0x0000000000000000000000000000000000000000000000000000000000000000);
+INSERT INTO t1 values(0x00000000);
+INSERT INTO t1 values(0x0000000000);
+INSERT INTO t1 values(0x00000000000000000000);
+INSERT INTO t1 values(0x000000000000);
+INSERT INTO t1 values(0x00000000000000);
+INSERT INTO t1 values(0x000000000000000000);
+
+SELECT hex(vb) FROM t1;
+
+# Use the fact that the rocksdb_locks shows the keys as they are encoded to
+# validate that the keys were encoded as expected
+BEGIN;
+SELECT hex(vb) FROM t1 FOR UPDATE;
+SELECT SUBSTRING(a.key,9) FROM information_schema.rocksdb_locks AS a ORDER BY a.key;
+ROLLBACK;
+
+DROP TABLE t1;
+
+# Now create a table with a varchar key using a binary collation with the
+# current format and validate that it sorts correctly
+CREATE TABLE t1(
+ vc VARCHAR(64) collate 'binary' primary key
+) ENGINE=rocksdb;
+
+INSERT INTO t1 values('a');
+INSERT INTO t1 values('aa');
+INSERT INTO t1 values('aaaaaaaa');
+INSERT INTO t1 values('aaa');
+INSERT INTO t1 values('aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa');
+INSERT INTO t1 values('aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa');
+INSERT INTO t1 values('aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa');
+INSERT INTO t1 values('aaaa');
+INSERT INTO t1 values('aaaaa');
+INSERT INTO t1 values('aaaaaaaaaa');
+INSERT INTO t1 values('aaaaaa');
+INSERT INTO t1 values('aaaaaaa');
+INSERT INTO t1 values('aaaaaaaaa');
+
+SELECT * FROM t1;
+
+# Use the fact that the rocksdb_locks shows the keys as they are encoded to
+# validate that the keys were encoded as expected
+BEGIN;
+SELECT * FROM t1 FOR UPDATE;
+SELECT SUBSTRING(a.key,9) FROM information_schema.rocksdb_locks AS a ORDER BY a.key;
+ROLLBACK;
+
+DROP TABLE t1;
+
+# Now create the same table in the old format to show that they can be read
+# and handled correctly
+set session debug_dbug= '+d,MYROCKS_LEGACY_VARBINARY_FORMAT';
+CREATE TABLE t1(
+ vc VARCHAR(64) collate 'binary' primary key
+) ENGINE=rocksdb;
+set session debug_dbug= '-d,MYROCKS_LEGACY_VARBINARY_FORMAT';
+
+INSERT INTO t1 values('a');
+INSERT INTO t1 values('aa');
+INSERT INTO t1 values('aaaaaaaa');
+INSERT INTO t1 values('aaa');
+INSERT INTO t1 values('aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa');
+INSERT INTO t1 values('aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa');
+INSERT INTO t1 values('aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa');
+INSERT INTO t1 values('aaaa');
+INSERT INTO t1 values('aaaaa');
+INSERT INTO t1 values('aaaaaaaaaa');
+INSERT INTO t1 values('aaaaaa');
+INSERT INTO t1 values('aaaaaaa');
+INSERT INTO t1 values('aaaaaaaaa');
+
+SELECT * FROM t1;
+
+# Use the fact that the rocksdb_locks shows the keys as they are encoded to
+# validate that the keys were encoded as expected
+BEGIN;
+SELECT * FROM t1 FOR UPDATE;
+SELECT SUBSTRING(a.key,9) FROM information_schema.rocksdb_locks AS a ORDER BY a.key;
+ROLLBACK;
+
+DROP TABLE t1;
+
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/write_sync.test b/storage/rocksdb/mysql-test/rocksdb/t/write_sync.test
new file mode 100644
index 00000000000..7c30d4fcbdb
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/write_sync.test
@@ -0,0 +1,41 @@
+--source include/have_rocksdb.inc
+
+SET GLOBAL rocksdb_write_disable_wal=false;
+SET GLOBAL rocksdb_write_ignore_missing_column_families=true;
+
+create table aaa (id int primary key, i int) engine rocksdb;
+set @save_rocksdb_flush_log_at_trx_commit=@@global.rocksdb_flush_log_at_trx_commit;
+SET GLOBAL rocksdb_flush_log_at_trx_commit=1;
+insert aaa(id, i) values(0,1);
+
+select variable_value into @a from information_schema.global_status where variable_name='rocksdb_wal_synced';
+insert aaa(id, i) values(1,1);
+select variable_value-@a from information_schema.global_status where variable_name='rocksdb_wal_synced';
+insert aaa(id, i) values(2,1);
+select variable_value-@a from information_schema.global_status where variable_name='rocksdb_wal_synced';
+insert aaa(id, i) values(3,1);
+select variable_value-@a from information_schema.global_status where variable_name='rocksdb_wal_synced';
+
+select variable_value into @a from information_schema.global_status where variable_name='rocksdb_wal_synced';
+SET GLOBAL rocksdb_flush_log_at_trx_commit=0;
+insert aaa(id, i) values(4,1);
+
+let $status_var=rocksdb_wal_synced;
+let $status_var_value=`select @a+1`;
+source include/wait_for_status_var.inc;
+
+select variable_value into @a from information_schema.global_status where variable_name='rocksdb_wal_synced';
+SET GLOBAL rocksdb_flush_log_at_trx_commit=2;
+insert aaa(id, i) values(5,1);
+
+let $status_var=rocksdb_wal_synced;
+let $status_var_value=`select @a+1`;
+source include/wait_for_status_var.inc;
+
+truncate table aaa;
+
+# Cleanup
+drop table aaa;
+set @@global.rocksdb_flush_log_at_trx_commit=@save_rocksdb_flush_log_at_trx_commit;
+SET GLOBAL rocksdb_write_disable_wal=false;
+SET GLOBAL rocksdb_write_ignore_missing_column_families=false;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/xa-master.opt b/storage/rocksdb/mysql-test/rocksdb/t/xa-master.opt
new file mode 100644
index 00000000000..70c120604f6
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/xa-master.opt
@@ -0,0 +1 @@
+--rocksdb_flush_log_at_trx_commit=1
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/xa.test b/storage/rocksdb/mysql-test/rocksdb/t/xa.test
new file mode 100644
index 00000000000..f8f381f0580
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/xa.test
@@ -0,0 +1,38 @@
+--echo #
+--echo # MDEV-13155: XA recovery not supported for RocksDB (Just a testcase)
+--echo #
+
+call mtr.add_suppression("Found .* prepared XA transactions");
+
+--connect (con1,localhost,root,,test)
+--disable_warnings
+DROP TABLE IF EXISTS t1;
+--enable_warnings
+
+CREATE TABLE t1 (a INT) ENGINE=RocksDB;
+
+XA START 'xa1';
+INSERT INTO t1 (a) VALUES (1),(2);
+XA END 'xa1';
+XA PREPARE 'xa1';
+
+--connect (con2,localhost,root,,test)
+XA START 'xa2';
+INSERT INTO t1 (a) VALUES (3);
+INSERT INTO t1 (a) VALUES (4);
+XA END 'xa2';
+XA PREPARE 'xa2';
+
+--connection default
+SELECT * FROM t1;
+
+--let $shutdown_timeout= 0
+--source include/restart_mysqld.inc
+
+--connect (con3,localhost,root,,test)
+--disable_abort_on_error
+XA RECOVER;
+XA ROLLBACK 'xa1';
+XA COMMIT 'xa2';
+SELECT a FROM t1;
+DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb_hotbackup/base.cnf b/storage/rocksdb/mysql-test/rocksdb_hotbackup/base.cnf
new file mode 100644
index 00000000000..101dbce2385
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_hotbackup/base.cnf
@@ -0,0 +1,25 @@
+# Use default setting for mysqld processes
+!include include/default_mysqld.cnf
+!include include/default_client.cnf
+
+[mysqld.1]
+rocksdb
+default-storage-engine=rocksdb
+skip-innodb
+default-tmp-storage-engine=MyISAM
+binlog_format=row
+
+[mysqld.2]
+rocksdb
+default-storage-engine=rocksdb
+skip-innodb
+default-tmp-storage-engine=MyISAM
+binlog_format=row
+
+[ENV]
+MASTER_MYPORT= @mysqld.1.port
+MASTER_MYSOCK= @mysqld.1.socket
+
+SLAVE_MYPORT= @mysqld.2.port
+SLAVE_MYSOCK= @mysqld.2.socket
+
diff --git a/storage/rocksdb/mysql-test/rocksdb_hotbackup/include/clean_tmpfiles.sh b/storage/rocksdb/mysql-test/rocksdb_hotbackup/include/clean_tmpfiles.sh
new file mode 100755
index 00000000000..98a1fecceba
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_hotbackup/include/clean_tmpfiles.sh
@@ -0,0 +1,8 @@
+#!/bin/bash
+
+COPY_LOG="${MYSQL_TMP_DIR}/myrocks_hotbackup_copy_log"
+SIGNAL_FILE=${MYSQL_TMP_DIR}/myrocks_hotbackup_signal
+MOVEBACK_LOG="${MYSQL_TMP_DIR}/myrocks_hotbackup_moveback_log"
+rm -f $COPY_LOG
+rm -f $SIGNAL_FILE
+rm -f $MOVEBACK_LOG
diff --git a/storage/rocksdb/mysql-test/rocksdb_hotbackup/include/cleanup.inc b/storage/rocksdb/mysql-test/rocksdb_hotbackup/include/cleanup.inc
new file mode 100644
index 00000000000..947bf0270e2
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_hotbackup/include/cleanup.inc
@@ -0,0 +1,3 @@
+
+--source include/rpl_end.inc
+
diff --git a/storage/rocksdb/mysql-test/rocksdb_hotbackup/include/create_slocket_socket.sh b/storage/rocksdb/mysql-test/rocksdb_hotbackup/include/create_slocket_socket.sh
new file mode 100755
index 00000000000..db470f527ca
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_hotbackup/include/create_slocket_socket.sh
@@ -0,0 +1,4 @@
+#!/bin/bash
+
+src_data_dir="${MYSQLTEST_VARDIR}/mysqld.1/data/"
+python -c "import socket as s; sock = s.socket(s.AF_UNIX); sock.bind('${src_data_dir}/slocket')"
diff --git a/storage/rocksdb/mysql-test/rocksdb_hotbackup/include/create_table.sh b/storage/rocksdb/mysql-test/rocksdb_hotbackup/include/create_table.sh
new file mode 100755
index 00000000000..2004caca160
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_hotbackup/include/create_table.sh
@@ -0,0 +1,18 @@
+#!/bin/bash
+
+set -e
+
+COPY_LOG=$1
+SIGNAL_FILE=$2
+# Creating a table after myrocks_hotbackup reaches waiting loop
+
+done=0
+while : ; do
+ wait=`tail -1 $COPY_LOG | grep 'Waiting until' | wc -l`
+ if [ "$wait" -eq "1" ]; then
+ break
+ fi
+ sleep 1
+done
+$MYSQL --defaults-group-suffix=.1 db1 -e "create table r10 (id int primary key ) engine=rocksdb"
+touch $SIGNAL_FILE
diff --git a/storage/rocksdb/mysql-test/rocksdb_hotbackup/include/load_data.sh b/storage/rocksdb/mysql-test/rocksdb_hotbackup/include/load_data.sh
new file mode 100755
index 00000000000..80f1a5e2567
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_hotbackup/include/load_data.sh
@@ -0,0 +1,45 @@
+#!/bin/bash
+
+set -e
+
+# Insert 100 batches of 100 records each to a table with following schema:
+# create table db1.t1 (
+# `id` int(10) not null auto_increment,
+# `k` int(10),
+# `data` varchar(2048),
+# primary key (`id`),
+# key (`k`)
+# ) engine=innodb;
+
+MAX_INSERTS=100
+MAX_ROWS_PER_INSERT=100
+
+insertData() {
+ for ((i=1; i<=$MAX_INSERTS; i++));
+ do
+ stmt='INSERT INTO db1.t1 values'
+ for ((j=1; j<=$MAX_ROWS_PER_INSERT; j++));
+ do
+ k=$RANDOM
+ data=$(head -c 2048 /dev/urandom|tr -cd 'a-zA-Z0-9')
+ stmt=$stmt' (NULL, '$k', "'$data'")'
+ if [ $j -lt $MAX_ROWS_PER_INSERT ]; then
+ stmt=$stmt','
+ fi
+ done
+ stmt=$stmt';'
+ $MYSQL --defaults-group-suffix=.1 -e "$stmt"
+ done
+}
+
+NUM_PARALLEL_INSERTS=25
+pids=()
+for ((k=1; k<=$NUM_PARALLEL_INSERTS; k++));
+do
+ insertData &
+ pids+=($!)
+done
+for ((k=1; k<=$NUM_PARALLEL_INSERTS; k++));
+do
+ wait ${pids[k]}
+done
diff --git a/storage/rocksdb/mysql-test/rocksdb_hotbackup/include/load_data_and_run.sh b/storage/rocksdb/mysql-test/rocksdb_hotbackup/include/load_data_and_run.sh
new file mode 100755
index 00000000000..a8e6fc445bb
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_hotbackup/include/load_data_and_run.sh
@@ -0,0 +1,11 @@
+#!/bin/bash
+
+set -e
+
+# Initially loads a chunk of data.
+# Then start loading another chunk of data,
+# while simultaneously running a backup
+
+suite/rocksdb_hotbackup/include/load_data.sh 2>&1
+suite/rocksdb_hotbackup/include/load_data.sh 2>&1 &
+suite/rocksdb_hotbackup/include/stream_run.sh 2>&1
diff --git a/storage/rocksdb/mysql-test/rocksdb_hotbackup/include/load_data_slocket.sh b/storage/rocksdb/mysql-test/rocksdb_hotbackup/include/load_data_slocket.sh
new file mode 100755
index 00000000000..036d68662d4
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_hotbackup/include/load_data_slocket.sh
@@ -0,0 +1,45 @@
+#!/bin/bash
+
+set -e
+
+# Insert 10 batches of 10 records each to a table with following schema:
+# create table slocket.t1 (
+# `id` int(10) not null auto_increment,
+# `k` int(10),
+# `data` varchar(2048),
+# primary key (`id`),
+# key (`k`)
+# ) engine=innodb;
+
+MAX_INSERTS=10
+MAX_ROWS_PER_INSERT=10
+
+insertData() {
+ for ((i=1; i<=$MAX_INSERTS; i++));
+ do
+ stmt='INSERT INTO slocket.t1 values'
+ for ((j=1; j<=$MAX_ROWS_PER_INSERT; j++));
+ do
+ k=$RANDOM
+ data=$(head -c 2048 /dev/urandom|tr -cd 'a-zA-Z0-9')
+ stmt=$stmt' (NULL, '$k', "'$data'")'
+ if [ $j -lt $MAX_ROWS_PER_INSERT ]; then
+ stmt=$stmt','
+ fi
+ done
+ stmt=$stmt';'
+ $MYSQL --defaults-group-suffix=.1 -e "$stmt"
+ done
+}
+
+NUM_PARALLEL_INSERTS=25
+pids=()
+for ((k=1; k<=$NUM_PARALLEL_INSERTS; k++));
+do
+ insertData &
+ pids+=($!)
+done
+for ((k=1; k<=$NUM_PARALLEL_INSERTS; k++));
+do
+ wait ${pids[k]}
+done
diff --git a/storage/rocksdb/mysql-test/rocksdb_hotbackup/include/remove_slocket_socket.sh b/storage/rocksdb/mysql-test/rocksdb_hotbackup/include/remove_slocket_socket.sh
new file mode 100755
index 00000000000..9114629ba31
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_hotbackup/include/remove_slocket_socket.sh
@@ -0,0 +1,4 @@
+#!/bin/bash
+
+src_data_dir="${MYSQLTEST_VARDIR}/mysqld.1/data/"
+rm "${src_data_dir}/slocket"
diff --git a/storage/rocksdb/mysql-test/rocksdb_hotbackup/include/setup.inc b/storage/rocksdb/mysql-test/rocksdb_hotbackup/include/setup.inc
new file mode 100644
index 00000000000..26c3f2ce7f1
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_hotbackup/include/setup.inc
@@ -0,0 +1,16 @@
+--let $rpl_server_count= 2
+--let $rpl_topology= none
+--source include/rpl_init.inc
+--source include/rpl_default_connections.inc
+
+connection server_1;
+create database db1;
+
+create table db1.t1 (
+ `id` int(10) not null auto_increment,
+ `k` int(10),
+ `data` varchar(2048),
+ primary key (`id`),
+ key (`k`)
+) engine=rocksdb;
+
diff --git a/storage/rocksdb/mysql-test/rocksdb_hotbackup/include/setup_replication_gtid.sh b/storage/rocksdb/mysql-test/rocksdb_hotbackup/include/setup_replication_gtid.sh
new file mode 100755
index 00000000000..3c95068a488
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_hotbackup/include/setup_replication_gtid.sh
@@ -0,0 +1,22 @@
+#!/bin/bash
+
+set -e
+
+binlog_line=($(grep -o "Last binlog file position [0-9]*, file name .*\.[0-9]*" ${MYSQLTEST_VARDIR}/log/mysqld.2.err | tail -1))
+binlog_pos=${binlog_line[4]%?}
+binlog_file=${binlog_line[7]}
+
+sql="show gtid_executed in '$binlog_file' from $binlog_pos"
+result=($($MYSQL --defaults-group-suffix=.1 -e "$sql"))
+gtid_executed=${result[1]}
+
+sql="reset master;"
+sql="$sql reset slave;"
+sql="$sql change master to master_host='127.0.0.1', master_port=${MASTER_MYPORT}, master_user='root', master_auto_position=1, master_connect_retry=1;"
+sql="$sql set global gtid_purged='$gtid_executed';"
+sql="$sql start slave;"
+sql="$sql stop slave;"
+sql="$sql change master to master_auto_position=0;"
+sql="$sql start slave;"
+$MYSQL --defaults-group-suffix=.2 -e "$sql"
+echo "$sql" > ${MYSQL_TMP_DIR}/gtid_stmt
diff --git a/storage/rocksdb/mysql-test/rocksdb_hotbackup/include/setup_replication_gtid_and_sync.inc b/storage/rocksdb/mysql-test/rocksdb_hotbackup/include/setup_replication_gtid_and_sync.inc
new file mode 100644
index 00000000000..75dc31964da
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_hotbackup/include/setup_replication_gtid_and_sync.inc
@@ -0,0 +1,4 @@
+--exec suite/rocksdb_hotbackup/include/setup_replication_gtid.sh
+
+let $slave_sync_timeout = 1800;
+source include/wait_for_slave_to_sync_with_master.inc;
diff --git a/storage/rocksdb/mysql-test/rocksdb_hotbackup/include/setup_slocket.inc b/storage/rocksdb/mysql-test/rocksdb_hotbackup/include/setup_slocket.inc
new file mode 100644
index 00000000000..ce889164219
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_hotbackup/include/setup_slocket.inc
@@ -0,0 +1,10 @@
+connection server_1;
+create database slocket;
+
+create table slocket.t1 (
+ `id` int(10) not null auto_increment,
+ `k` int(10),
+ `data` varchar(2048),
+ primary key (`id`),
+ key (`k`)
+) engine=rocksdb;
diff --git a/storage/rocksdb/mysql-test/rocksdb_hotbackup/include/stream_run.sh b/storage/rocksdb/mysql-test/rocksdb_hotbackup/include/stream_run.sh
new file mode 100755
index 00000000000..6108cfbb1aa
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_hotbackup/include/stream_run.sh
@@ -0,0 +1,81 @@
+#!/bin/bash
+
+. suite/rocksdb_hotbackup/include/clean_tmpfiles.sh
+
+if [ "$STREAM_TYPE" == 'wdt' ]; then
+ which wdt >/dev/null 2>&1
+ if [ $? -ne 0 ]; then
+ # change to tar if wdt is not installed
+ STREAM_TYPE='tar'
+ fi
+fi
+
+set -e
+set -o pipefail
+
+# Takes a full backup from server_1 to server_2
+# using myrocks_hotbackup streaming
+
+checkpoint_dir="${MYSQLTEST_VARDIR}/checkpoint"
+backup_dir="${MYSQLTEST_VARDIR}/backup"
+dest_data_dir="${MYSQLTEST_VARDIR}/mysqld.2/data/"
+
+mysql_dir=$(echo $MYSQL | awk '{print $1}' | xargs dirname)
+PATH=$mysql_dir:$PATH
+
+mkdir -p $checkpoint_dir
+rm -rf $checkpoint_dir/*
+
+mkdir -p $backup_dir
+rm -rf $backup_dir/*
+# delete and recreate the dest dir to make sure all hidden files
+# and directories (such as .rocksdb) are blown away
+rm -rf $dest_data_dir/
+mkdir $dest_data_dir
+
+
+SIGNAL_CONDITION=""
+
+if [ "$FRM" == '1' ]; then
+ suite/rocksdb_hotbackup/include/create_table.sh $COPY_LOG $SIGNAL_FILE 2>&1 &
+fi
+
+if [ "$DEBUG_SIGNAL" == '1' ]; then
+ SIGNAL_CONDITION="--debug_signal_file=$SIGNAL_FILE"
+fi
+
+if [ "$STREAM_TYPE" == 'tar' ]; then
+ BACKUP_CMD="$MYSQL_MYROCKS_HOTBACKUP --user='root' --port=${MASTER_MYPORT} \
+ --stream=tar --checkpoint_dir=$checkpoint_dir $SIGNAL_CONDITION 2> \
+ $COPY_LOG | tar -xi -C $backup_dir"
+elif [ "$STREAM_TYPE" == 'wdt' ]; then
+ BACKUP_CMD="$MYSQL_MYROCKS_HOTBACKUP --user='root' --stream=wdt \
+ --port=${MASTER_MYPORT} --destination=localhost --backup_dir=$backup_dir \
+ --avg_mbytes_per_sec=10 --interval=5 $SIGNAL_CONDITION \
+ --extra_wdt_sender_options='--block_size_mbytes=1' \
+ --checkpoint_dir=$checkpoint_dir 2> \
+ $COPY_LOG"
+elif [ "$STREAM_TYPE" == "xbstream_socket" ]; then
+ BACKUP_CMD="$MYSQL_MYROCKS_HOTBACKUP --user='root' --socket=${MASTER_MYSOCK} \
+ --stream=xbstream --checkpoint_dir=$checkpoint_dir $SIGNAL_CONDITION 2> \
+ $COPY_LOG | xbstream -x \
+ --directory=$backup_dir"
+else
+ BACKUP_CMD="$MYSQL_MYROCKS_HOTBACKUP --user='root' --port=${MASTER_MYPORT} \
+ --stream=xbstream --checkpoint_dir=$checkpoint_dir $SIGNAL_CONDITION 2> \
+ $COPY_LOG | xbstream -x \
+ --directory=$backup_dir"
+fi
+
+echo "myrocks_hotbackup copy phase"
+eval "$BACKUP_CMD"
+
+mkdir ${backup_dir}/test # TODO: Fix skipping empty directories
+
+
+echo "myrocks_hotbackup move-back phase"
+$MYSQL_MYROCKS_HOTBACKUP --move_back --datadir=$dest_data_dir \
+ --rocksdb_datadir=$dest_data_dir/\#rocksdb \
+ --rocksdb_waldir=$dest_data_dir/\#rocksdb \
+ --backup_dir=$backup_dir > $MOVEBACK_LOG 2>&1
+
diff --git a/storage/rocksdb/mysql-test/rocksdb_hotbackup/my.cnf b/storage/rocksdb/mysql-test/rocksdb_hotbackup/my.cnf
new file mode 100644
index 00000000000..bd9af04c813
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_hotbackup/my.cnf
@@ -0,0 +1,2 @@
+# Use settings from xb_base.cnf
+!include base.cnf
diff --git a/storage/rocksdb/mysql-test/rocksdb_hotbackup/r/gtid.result b/storage/rocksdb/mysql-test/rocksdb_hotbackup/r/gtid.result
new file mode 100644
index 00000000000..6cec6ca5d69
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_hotbackup/r/gtid.result
@@ -0,0 +1,23 @@
+include/rpl_init.inc [topology=none]
+include/rpl_default_connections.inc
+create database db1;
+create table db1.t1 (
+`id` int(10) not null auto_increment,
+`k` int(10),
+`data` varchar(2048),
+primary key (`id`),
+key (`k`)
+) engine=rocksdb;
+include/rpl_stop_server.inc [server_number=2]
+myrocks_hotbackup copy phase
+myrocks_hotbackup move-back phase
+include/rpl_start_server.inc [server_number=2]
+stop slave;
+start slave;
+select count(*) from db1.t1;
+count(*)
+500000
+drop database db1;
+stop slave;
+reset slave;
+include/rpl_end.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb_hotbackup/r/slocket.result b/storage/rocksdb/mysql-test/rocksdb_hotbackup/r/slocket.result
new file mode 100644
index 00000000000..9accd18b294
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_hotbackup/r/slocket.result
@@ -0,0 +1,41 @@
+include/rpl_init.inc [topology=none]
+include/rpl_default_connections.inc
+create database db1;
+create table db1.t1 (
+`id` int(10) not null auto_increment,
+`k` int(10),
+`data` varchar(2048),
+primary key (`id`),
+key (`k`)
+) engine=rocksdb;
+create database slocket;
+create table slocket.t1 (
+`id` int(10) not null auto_increment,
+`k` int(10),
+`data` varchar(2048),
+primary key (`id`),
+key (`k`)
+) engine=rocksdb;
+include/rpl_stop_server.inc [server_number=2]
+myrocks_hotbackup copy phase
+myrocks_hotbackup move-back phase
+include/rpl_start_server.inc [server_number=2]
+select count(*) from db1.t1;
+count(*)
+250000
+select count(*) from slocket.t1;
+count(*)
+2500
+drop database slocket;
+drop database db1;
+drop database slocket;
+include/rpl_stop_server.inc [server_number=2]
+myrocks_hotbackup copy phase
+myrocks_hotbackup move-back phase
+include/rpl_start_server.inc [server_number=2]
+select count(*) from db1.t1;
+count(*)
+250000
+drop database db1;
+drop database db1;
+include/rpl_end.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb_hotbackup/r/stream.result b/storage/rocksdb/mysql-test/rocksdb_hotbackup/r/stream.result
new file mode 100644
index 00000000000..d3f2ebc4e6f
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_hotbackup/r/stream.result
@@ -0,0 +1,20 @@
+include/rpl_init.inc [topology=none]
+include/rpl_default_connections.inc
+create database db1;
+create table db1.t1 (
+`id` int(10) not null auto_increment,
+`k` int(10),
+`data` varchar(2048),
+primary key (`id`),
+key (`k`)
+) engine=rocksdb;
+include/rpl_stop_server.inc [server_number=2]
+myrocks_hotbackup copy phase
+myrocks_hotbackup move-back phase
+include/rpl_start_server.inc [server_number=2]
+select count(*) from db1.t1;
+count(*)
+250000
+drop database db1;
+drop database db1;
+include/rpl_end.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb_hotbackup/r/wdt.result b/storage/rocksdb/mysql-test/rocksdb_hotbackup/r/wdt.result
new file mode 100644
index 00000000000..d3f2ebc4e6f
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_hotbackup/r/wdt.result
@@ -0,0 +1,20 @@
+include/rpl_init.inc [topology=none]
+include/rpl_default_connections.inc
+create database db1;
+create table db1.t1 (
+`id` int(10) not null auto_increment,
+`k` int(10),
+`data` varchar(2048),
+primary key (`id`),
+key (`k`)
+) engine=rocksdb;
+include/rpl_stop_server.inc [server_number=2]
+myrocks_hotbackup copy phase
+myrocks_hotbackup move-back phase
+include/rpl_start_server.inc [server_number=2]
+select count(*) from db1.t1;
+count(*)
+250000
+drop database db1;
+drop database db1;
+include/rpl_end.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb_hotbackup/r/xbstream.result b/storage/rocksdb/mysql-test/rocksdb_hotbackup/r/xbstream.result
new file mode 100644
index 00000000000..31ed2677444
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_hotbackup/r/xbstream.result
@@ -0,0 +1,21 @@
+include/rpl_init.inc [topology=none]
+include/rpl_default_connections.inc
+create database db1;
+create table db1.t1 (
+`id` int(10) not null auto_increment,
+`k` int(10),
+`data` varchar(2048),
+primary key (`id`),
+key (`k`)
+) engine=rocksdb;
+include/rpl_stop_server.inc [server_number=2]
+myrocks_hotbackup copy phase
+myrocks_hotbackup copy phase
+myrocks_hotbackup move-back phase
+include/rpl_start_server.inc [server_number=2]
+select count(*) from db1.t1;
+count(*)
+250000
+drop database db1;
+drop database db1;
+include/rpl_end.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb_hotbackup/r/xbstream_direct.result b/storage/rocksdb/mysql-test/rocksdb_hotbackup/r/xbstream_direct.result
new file mode 100644
index 00000000000..31ed2677444
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_hotbackup/r/xbstream_direct.result
@@ -0,0 +1,21 @@
+include/rpl_init.inc [topology=none]
+include/rpl_default_connections.inc
+create database db1;
+create table db1.t1 (
+`id` int(10) not null auto_increment,
+`k` int(10),
+`data` varchar(2048),
+primary key (`id`),
+key (`k`)
+) engine=rocksdb;
+include/rpl_stop_server.inc [server_number=2]
+myrocks_hotbackup copy phase
+myrocks_hotbackup copy phase
+myrocks_hotbackup move-back phase
+include/rpl_start_server.inc [server_number=2]
+select count(*) from db1.t1;
+count(*)
+250000
+drop database db1;
+drop database db1;
+include/rpl_end.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb_hotbackup/r/xbstream_socket.result b/storage/rocksdb/mysql-test/rocksdb_hotbackup/r/xbstream_socket.result
new file mode 100644
index 00000000000..d3f2ebc4e6f
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_hotbackup/r/xbstream_socket.result
@@ -0,0 +1,20 @@
+include/rpl_init.inc [topology=none]
+include/rpl_default_connections.inc
+create database db1;
+create table db1.t1 (
+`id` int(10) not null auto_increment,
+`k` int(10),
+`data` varchar(2048),
+primary key (`id`),
+key (`k`)
+) engine=rocksdb;
+include/rpl_stop_server.inc [server_number=2]
+myrocks_hotbackup copy phase
+myrocks_hotbackup move-back phase
+include/rpl_start_server.inc [server_number=2]
+select count(*) from db1.t1;
+count(*)
+250000
+drop database db1;
+drop database db1;
+include/rpl_end.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb_hotbackup/t/gtid-master.opt b/storage/rocksdb/mysql-test/rocksdb_hotbackup/t/gtid-master.opt
new file mode 100644
index 00000000000..9d7af67eec9
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_hotbackup/t/gtid-master.opt
@@ -0,0 +1 @@
+--gtid_mode=on --log_slave_updates=on --enforce_gtid_consistency=on
diff --git a/storage/rocksdb/mysql-test/rocksdb_hotbackup/t/gtid-slave.opt b/storage/rocksdb/mysql-test/rocksdb_hotbackup/t/gtid-slave.opt
new file mode 100644
index 00000000000..9d7af67eec9
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_hotbackup/t/gtid-slave.opt
@@ -0,0 +1 @@
+--gtid_mode=on --log_slave_updates=on --enforce_gtid_consistency=on
diff --git a/storage/rocksdb/mysql-test/rocksdb_hotbackup/t/gtid.test b/storage/rocksdb/mysql-test/rocksdb_hotbackup/t/gtid.test
new file mode 100644
index 00000000000..f9d58da093e
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_hotbackup/t/gtid.test
@@ -0,0 +1,47 @@
+
+source suite/rocksdb_hotbackup/include/setup.inc;
+
+--let $rpl_server_number= 2
+--source include/rpl_stop_server.inc
+
+--exec suite/rocksdb_hotbackup/include/load_data_and_run.sh 2>&1
+
+--let $rpl_server_number= 2
+--source include/rpl_start_server.inc
+
+connection server_2;
+let $num_rows = `select count(*) from db1.t1`;
+let $max_id = `select id from db1.t1 order by id desc limit 1`;
+
+if($num_rows != $max_id) {
+ echo Number of rows($num_rows) and last_id($max_id) does not match;
+}
+if($num_rows < 250000) {
+ echo Number of rows($num_rows) is less than 250000;
+}
+if($num_rows > 500000) {
+ echo Number of rows($num_rows) is more than 500000;
+}
+
+--source suite/rocksdb_hotbackup/include/setup_replication_gtid_and_sync.inc
+
+connection server_2;
+select count(*) from db1.t1;
+
+connection server_1;
+let $checksum1 = `checksum tables db1.t1`;
+connection server_2;
+let $checksum2 = `checksum tables db1.t1`;
+
+if($checksum1 != $checksum2) {
+ echo Checksums ($checksum1 and $checksum2) do not match;
+}
+
+connection server_1;
+drop database db1;
+sync_slave_with_master;
+connection server_2;
+stop slave;
+reset slave;
+
+source suite/rocksdb_hotbackup/include/cleanup.inc;
diff --git a/storage/rocksdb/mysql-test/rocksdb_hotbackup/t/slocket.test b/storage/rocksdb/mysql-test/rocksdb_hotbackup/t/slocket.test
new file mode 100644
index 00000000000..14ad8d23376
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_hotbackup/t/slocket.test
@@ -0,0 +1,46 @@
+source suite/rocksdb_hotbackup/include/setup.inc;
+source suite/rocksdb_hotbackup/include/setup_slocket.inc;
+
+--exec suite/rocksdb_hotbackup/include/load_data.sh 2>&1
+--exec suite/rocksdb_hotbackup/include/load_data_slocket.sh 2>&1
+
+--let $rpl_server_number= 2
+--source include/rpl_stop_server.inc
+
+--exec suite/rocksdb_hotbackup/include/stream_run.sh 2>&1
+
+--let $rpl_server_number= 2
+--source include/rpl_start_server.inc
+
+connection server_2;
+select count(*) from db1.t1;
+select count(*) from slocket.t1;
+
+connection server_1;
+drop database slocket;
+connection server_2;
+drop database db1;
+drop database slocket;
+
+--exec sleep 2
+--exec suite/rocksdb_hotbackup/include/create_slocket_socket.sh 2>&1
+
+--let $rpl_server_number= 2
+--source include/rpl_stop_server.inc
+
+--exec suite/rocksdb_hotbackup/include/stream_run.sh 2>&1
+
+--let $rpl_server_number= 2
+--source include/rpl_start_server.inc
+
+connection server_2;
+select count(*) from db1.t1;
+
+connection server_1;
+drop database db1;
+connection server_2;
+drop database db1;
+
+--exec suite/rocksdb_hotbackup/include/remove_slocket_socket.sh 2>&1
+
+source suite/rocksdb_hotbackup/include/cleanup.inc;
diff --git a/storage/rocksdb/mysql-test/rocksdb_hotbackup/t/stream.test b/storage/rocksdb/mysql-test/rocksdb_hotbackup/t/stream.test
new file mode 100644
index 00000000000..2b999f3fce7
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_hotbackup/t/stream.test
@@ -0,0 +1,22 @@
+
+source suite/rocksdb_hotbackup/include/setup.inc;
+
+--exec suite/rocksdb_hotbackup/include/load_data.sh 2>&1
+--let $rpl_server_number= 2
+--source include/rpl_stop_server.inc
+
+--exec STREAM_TYPE=tar suite/rocksdb_hotbackup/include/stream_run.sh 2>&1
+
+--let $rpl_server_number= 2
+--source include/rpl_start_server.inc
+
+connection server_2;
+select count(*) from db1.t1;
+
+connection server_1;
+drop database db1;
+connection server_2;
+drop database db1;
+
+source suite/rocksdb_hotbackup/include/cleanup.inc;
+
diff --git a/storage/rocksdb/mysql-test/rocksdb_hotbackup/t/wdt.test b/storage/rocksdb/mysql-test/rocksdb_hotbackup/t/wdt.test
new file mode 100644
index 00000000000..2d2ed89112b
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_hotbackup/t/wdt.test
@@ -0,0 +1,22 @@
+
+source suite/rocksdb_hotbackup/include/setup.inc;
+
+--let $rpl_server_number= 2
+--source include/rpl_stop_server.inc
+
+--exec suite/rocksdb_hotbackup/include/load_data.sh 2>&1
+--exec STREAM_TYPE=wdt suite/rocksdb_hotbackup/include/stream_run.sh 2>&1
+
+--let $rpl_server_number= 2
+--source include/rpl_start_server.inc
+
+connection server_2;
+select count(*) from db1.t1;
+
+connection server_1;
+drop database db1;
+connection server_2;
+drop database db1;
+
+source suite/rocksdb_hotbackup/include/cleanup.inc;
+
diff --git a/storage/rocksdb/mysql-test/rocksdb_hotbackup/t/xbstream.inc b/storage/rocksdb/mysql-test/rocksdb_hotbackup/t/xbstream.inc
new file mode 100644
index 00000000000..52456a68140
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_hotbackup/t/xbstream.inc
@@ -0,0 +1,25 @@
+
+source suite/rocksdb_hotbackup/include/setup.inc;
+
+--exec suite/rocksdb_hotbackup/include/load_data.sh 2>&1
+--let $rpl_server_number= 2
+--source include/rpl_stop_server.inc
+
+--error 1
+--exec STREAM_TYPE=xbstream FRM=1 DEBUG_SIGNAL=1 suite/rocksdb_hotbackup/include/stream_run.sh 2>&1
+
+--exec STREAM_TYPE=xbstream suite/rocksdb_hotbackup/include/stream_run.sh 2>&1
+
+--let $rpl_server_number= 2
+--source include/rpl_start_server.inc
+
+connection server_2;
+select count(*) from db1.t1;
+
+connection server_1;
+drop database db1;
+connection server_2;
+drop database db1;
+
+source suite/rocksdb_hotbackup/include/cleanup.inc;
+
diff --git a/storage/rocksdb/mysql-test/rocksdb_hotbackup/t/xbstream.test b/storage/rocksdb/mysql-test/rocksdb_hotbackup/t/xbstream.test
new file mode 100644
index 00000000000..18816c34446
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_hotbackup/t/xbstream.test
@@ -0,0 +1,7 @@
+--source include/have_rocksdb.inc
+--source xbstream.inc
+let SEARCH_FILE= $MYSQL_TMP_DIR/myrocks_hotbackup_copy_log;
+let SEARCH_PATTERN= Direct I/O: 0;
+--source include/search_pattern_in_file.inc
+
+--exec suite/rocksdb_hotbackup/include/clean_tmpfiles.sh
diff --git a/storage/rocksdb/mysql-test/rocksdb_hotbackup/t/xbstream_direct-master.opt b/storage/rocksdb/mysql-test/rocksdb_hotbackup/t/xbstream_direct-master.opt
new file mode 100644
index 00000000000..4ab98aeabe1
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_hotbackup/t/xbstream_direct-master.opt
@@ -0,0 +1 @@
+--rocksdb_use_direct_reads=ON --rocksdb_use_direct_io_for_flush_and_compaction=ON
diff --git a/storage/rocksdb/mysql-test/rocksdb_hotbackup/t/xbstream_direct.test b/storage/rocksdb/mysql-test/rocksdb_hotbackup/t/xbstream_direct.test
new file mode 100644
index 00000000000..41357d68415
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_hotbackup/t/xbstream_direct.test
@@ -0,0 +1,7 @@
+--source include/have_rocksdb.inc
+--source xbstream.inc
+let SEARCH_FILE= $MYSQL_TMP_DIR/myrocks_hotbackup_copy_log;
+let SEARCH_PATTERN= Direct I/O: 1;
+--source include/search_pattern_in_file.inc
+
+--exec suite/rocksdb_hotbackup/include/clean_tmpfiles.sh
diff --git a/storage/rocksdb/mysql-test/rocksdb_hotbackup/t/xbstream_socket.test b/storage/rocksdb/mysql-test/rocksdb_hotbackup/t/xbstream_socket.test
new file mode 100644
index 00000000000..28edff072e7
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_hotbackup/t/xbstream_socket.test
@@ -0,0 +1,22 @@
+
+source suite/rocksdb_hotbackup/include/setup.inc;
+
+--exec suite/rocksdb_hotbackup/include/load_data.sh 2>&1
+--let $rpl_server_number= 2
+--source include/rpl_stop_server.inc
+
+--exec STREAM_TYPE=xbstream_socket suite/rocksdb_hotbackup/include/stream_run.sh 2>&1
+
+--let $rpl_server_number= 2
+--source include/rpl_start_server.inc
+
+connection server_2;
+select count(*) from db1.t1;
+
+connection server_1;
+drop database db1;
+connection server_2;
+drop database db1;
+
+source suite/rocksdb_hotbackup/include/cleanup.inc;
+
diff --git a/storage/rocksdb/mysql-test/rocksdb_rpl/combinations b/storage/rocksdb/mysql-test/rocksdb_rpl/combinations
new file mode 100644
index 00000000000..eae7431662b
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_rpl/combinations
@@ -0,0 +1,7 @@
+[row-write-committed]
+binlog-format=row
+rocksdb_write_policy=write_committed
+
+[row-write-prepared]
+binlog-format=row
+rocksdb_write_policy=write_prepared
diff --git a/storage/rocksdb/mysql-test/rocksdb_rpl/include/have_rocksdb.inc b/storage/rocksdb/mysql-test/rocksdb_rpl/include/have_rocksdb.inc
new file mode 100644
index 00000000000..1f762d38c64
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_rpl/include/have_rocksdb.inc
@@ -0,0 +1,10 @@
+if (`SELECT COUNT(*) = 0 FROM INFORMATION_SCHEMA.ENGINES WHERE engine = 'rocksdb' AND support IN ('YES', 'DEFAULT', 'ENABLED')`)
+{
+ --skip Test requires engine RocksDB.
+}
+
+--disable_query_log
+# Table statistics can vary depending on when the memtables are flushed, so
+# flush them at the beginning of the test to ensure the test runs consistently.
+set global rocksdb_force_flush_memtable_now = true;
+--enable_query_log
diff --git a/storage/rocksdb/mysql-test/rocksdb_rpl/include/have_rocksdb.opt b/storage/rocksdb/mysql-test/rocksdb_rpl/include/have_rocksdb.opt
new file mode 100644
index 00000000000..36d7dda1609
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_rpl/include/have_rocksdb.opt
@@ -0,0 +1,12 @@
+--loose-enable-rocksdb
+--loose-enable-rocksdb_global_info
+--loose-enable-rocksdb_ddl
+--loose-enable-rocksdb_cf_options
+--loose-enable_rocksdb_perf_context
+--loose-enable_rocksdb_perf_context_global
+--loose-enable-rocksdb_index_file_map
+--loose-enable-rocksdb_dbstats
+--loose-enable-rocksdb_cfstats
+--loose-enable-rocksdb_lock_info
+--loose-enable-rocksdb_trx
+--loose-enable-rocksdb_locks
diff --git a/storage/rocksdb/mysql-test/rocksdb_rpl/include/rpl_gtid_crash_safe.inc b/storage/rocksdb/mysql-test/rocksdb_rpl/include/rpl_gtid_crash_safe.inc
new file mode 100644
index 00000000000..f0c0134e4d1
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_rpl/include/rpl_gtid_crash_safe.inc
@@ -0,0 +1,37 @@
+
+-- let $engine = ROCKSDB
+
+call mtr.add_suppression("Recovery from master pos");
+
+-- let $debug_option = crash_before_update_pos
+-- source extra/rpl_tests/rpl_gtid_crash_safe.inc
+
+-- source include/rpl_reset.inc
+-- let $debug_option = crash_after_update_pos_before_apply
+-- source extra/rpl_tests/rpl_gtid_crash_safe.inc
+
+-- source include/rpl_reset.inc
+-- let $debug_option = crash_before_writing_xid
+-- source extra/rpl_tests/rpl_gtid_crash_safe.inc
+
+-- source include/rpl_reset.inc
+-- let $debug_option = half_binlogged_transaction
+-- source extra/rpl_tests/rpl_gtid_crash_safe.inc
+
+-- source include/rpl_reset.inc
+-- let $debug_option = crash_commit_before
+-- source extra/rpl_tests/rpl_gtid_crash_safe.inc
+
+-- source include/rpl_reset.inc
+-- let $debug_option = crash_commit_after_log
+-- source extra/rpl_tests/rpl_gtid_crash_safe.inc
+
+-- source include/rpl_reset.inc
+-- let $debug_option = crash_commit_after_prepare
+-- source extra/rpl_tests/rpl_gtid_crash_safe.inc
+
+-- source include/rpl_reset.inc
+-- let $debug_option = crash_commit_after
+-- source extra/rpl_tests/rpl_gtid_crash_safe.inc
+
+-- source include/rpl_end.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb_rpl/include/rpl_no_unique_check_on_lag.inc b/storage/rocksdb/mysql-test/rocksdb_rpl/include/rpl_no_unique_check_on_lag.inc
new file mode 100644
index 00000000000..d983bdf8b58
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_rpl/include/rpl_no_unique_check_on_lag.inc
@@ -0,0 +1,72 @@
+--source include/master-slave.inc
+--source include/have_binlog_format_row.inc
+--source include/not_embedded.inc
+--source include/not_valgrind.inc
+
+call mtr.add_suppression("Slave SQL: Could not execute Write_rows event on table test.t1");
+call mtr.add_suppression(".*Worker.*failed executing transaction");
+call mtr.add_suppression(".*The slave coordinator and worker threads are stopped");
+
+--disable_warnings
+drop table if exists t1;
+--enable_warnings
+
+connection master;
+CREATE TABLE t1 (id int primary key, value int) engine=RocksDB;
+sync_slave_with_master;
+--let $rsbm = query_get_value(select @@global.reset_seconds_behind_master, @@global.reset_seconds_behind_master, 1)
+set global reset_seconds_behind_master=1;
+
+connection slave;
+INSERT INTO t1 VALUES(1, 0);
+INSERT INTO t1 VALUES(2, 0);
+INSERT INTO t1 VALUES(3, 0);
+
+connection master;
+sync_slave_with_master;
+connection master;
+INSERT INTO t1 VALUES(1, 1);
+
+connection slave;
+--let $slave_sql_errno= 1062
+--let $not_switch_connection= 0
+--let $slave_timeout= 120
+--source include/wait_for_slave_sql_error_and_skip.inc
+set global reset_seconds_behind_master=0;
+--source include/stop_slave_io.inc
+
+connection master;
+INSERT INTO t1 values (4,0);
+--sleep 11
+INSERT INTO t1 VALUES(2, 1);
+
+connection slave;
+--source include/start_slave_io.inc
+
+connection master;
+sync_slave_with_master;
+
+connection slave;
+set global reset_seconds_behind_master=1;
+
+connection master;
+insert into t1 values (5,0);
+--sleep 1
+sync_slave_with_master;
+
+connection master;
+INSERT INTO t1 VALUES(3, 1);
+
+connection slave;
+--let $slave_sql_errno= 1062
+--let $not_switch_connection= 0
+--source include/wait_for_slave_sql_error_and_skip.inc
+
+--echo #
+--echo # Cleanup
+--echo #
+
+connection master;
+DROP TABLE t1;
+eval set global reset_seconds_behind_master=$rsbm;
+--source include/rpl_end.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb_rpl/my.cnf b/storage/rocksdb/mysql-test/rocksdb_rpl/my.cnf
new file mode 100644
index 00000000000..518b16535df
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_rpl/my.cnf
@@ -0,0 +1,17 @@
+!include rpl_1slave_base.cnf
+!include include/default_my.cnf
+
+[server]
+skip-innodb
+default-storage-engine=rocksdb
+
+sql-mode=NO_ENGINE_SUBSTITUTION
+explicit-defaults-for-timestamp=1
+loose-rocksdb_lock_wait_timeout=1
+loose-rocksdb_strict_collation_check=0
+
+loose-rocksdb-flush-log-at-trx-commit=0
+
+# The following is to get rid of the harmless
+# "Deadlock found when trying to get lock" errors, see MDEV-12285.
+log-warnings=1
diff --git a/storage/rocksdb/mysql-test/rocksdb_rpl/r/consistent_snapshot_mixed_engines.result b/storage/rocksdb/mysql-test/rocksdb_rpl/r/consistent_snapshot_mixed_engines.result
new file mode 100644
index 00000000000..31777c45c68
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_rpl/r/consistent_snapshot_mixed_engines.result
@@ -0,0 +1,68 @@
+DROP TABLE IF EXISTS t1;
+connect con1,localhost,root,,;
+connect con2,localhost,root,,;
+connection con1;
+create table i1 (id int primary key , value int) engine=innodb;
+create table r1 (id int primary key , value int) engine=rocksdb;
+SET SESSION TRANSACTION ISOLATION LEVEL REPEATABLE READ;
+START TRANSACTION WITH CONSISTENT SNAPSHOT;
+connection con2;
+insert into i1 values (1,1);
+insert into r1 values (1,1);
+connection con1;
+select * from i1;
+id value
+select * from r1;
+id value
+START TRANSACTION WITH CONSISTENT ROCKSDB SNAPSHOT;
+File Position Gtid_executed
+master-bin.000001 1115 uuid:1-5
+connection con2;
+insert into i1 values (2,2);
+insert into r1 values (2,2);
+connection con1;
+select * from i1;
+id value
+1 1
+2 2
+select * from r1;
+id value
+1 1
+connection con2;
+insert into i1 values (3,2);
+insert into r1 values (3,2);
+connection con1;
+select * from i1;
+id value
+1 1
+2 2
+select * from r1;
+id value
+1 1
+START TRANSACTION WITH CONSISTENT INNODB SNAPSHOT;
+File Position Gtid_executed
+master-bin.000001 2015 uuid:1-9
+connection con2;
+insert into r1 values (4,4);
+connection con1;
+select * from r1;
+id value
+1 1
+2 2
+3 2
+4 4
+connection con2;
+insert into r1 values (5,5);
+connection con1;
+select * from r1;
+id value
+1 1
+2 2
+3 2
+4 4
+drop table i1;
+drop table r1;
+connection default;
+disconnect con1;
+disconnect con2;
+reset master;
diff --git a/storage/rocksdb/mysql-test/rocksdb_rpl/r/multiclient_2pc.result b/storage/rocksdb/mysql-test/rocksdb_rpl/r/multiclient_2pc.result
new file mode 100644
index 00000000000..493107ec071
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_rpl/r/multiclient_2pc.result
@@ -0,0 +1,26 @@
+DROP TABLE IF EXISTS t1;
+SET GLOBAL MAX_BINLOG_SIZE = 4096;
+SET GLOBAL ROCKSDB_ENABLE_2PC = ON;
+create table t1 (a int primary key, b int, c varchar(255)) engine=rocksdb;
+'con1'
+SET SESSION debug="d,crash_commit_after_log";
+SET DEBUG_SYNC='rocksdb.prepared SIGNAL parked WAIT_FOR go';
+insert into t1 values (1, 1, "iamtheogthealphaandomega");;
+'con2'
+insert into t1 values (2, 1, "i_am_just_here_to_trigger_a_flush");
+SET GLOBAL ROCKSDB_FLUSH_LOG_AT_TRX_COMMIT = 0;
+SET GLOBAL SYNC_BINLOG = 0;
+SET DEBUG_SYNC='now WAIT_FOR parked';
+SET GLOBAL ROCKSDB_FLUSH_LOG_AT_TRX_COMMIT = 2;
+SET GLOBAL SYNC_BINLOG = 1;
+insert into t1 values (1000000, 1, "i_am_just_here_to_trigger_a_flush");
+SET DEBUG_SYNC='now SIGNAL go';
+**found 'prepare' log entry**
+**found 'commit' log entry**
+select * from t1 where a=1;
+a b c
+1 1 iamtheogthealphaandomega
+select count(*) from t1;
+count(*)
+4096
+drop table t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb_rpl/r/optimize_myrocks_replace_into.result b/storage/rocksdb/mysql-test/rocksdb_rpl/r/optimize_myrocks_replace_into.result
new file mode 100644
index 00000000000..1b41405fd5e
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_rpl/r/optimize_myrocks_replace_into.result
@@ -0,0 +1,282 @@
+include/master-slave.inc
+Warnings:
+Note #### Sending passwords in plain text without SSL/TLS is extremely insecure.
+Note #### Storing MySQL user name or password information in the master info repository is not secure and is therefore not recommended. Please consider using the USER and PASSWORD connection options for START SLAVE; see the 'START SLAVE Syntax' in the MySQL Manual for more information.
+[connection master]
+SET @prior_rocksdb_perf_context_level = @@rocksdb_perf_context_level;
+SET GLOBAL rocksdb_perf_context_level=3;
+SET GLOBAL enable_blind_replace=ON;
+create table t1(c1 int,c2 int, primary key (c1)) engine=rocksdb;
+insert into t1 values(1,1),(2,2),(3,3);
+select * from t1;
+c1 c2
+1 1
+2 2
+3 3
+create table t2(c1 int,c2 int, primary key (c1)) engine=rocksdb;
+insert into t2 values(1,1),(2,2),(3,3);
+select * from t2;
+c1 c2
+1 1
+2 2
+3 3
+create table t3(c1 int,c2 int, primary key (c1)) engine=rocksdb;
+insert into t3 values(1,1),(2,2),(3,3);
+select * from t3;
+c1 c2
+1 1
+2 2
+3 3
+SET GLOBAL enable_blind_replace=ON;
+create trigger trg before insert on t2 for each row set @a:=1;
+alter table t3 add constraint slave_unique_key unique (c2);
+connect slave
+select variable_value into @d from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+Case 1
+connect master
+select variable_value into @d from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+replace into t1 values(1,11);
+replace into t1 values(2,22);
+replace into t1 values(3,33);
+select case when variable_value-@d > 3 then 'false' else 'true' end as read_free from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+read_free
+true
+select * from t1;
+c1 c2
+1 11
+2 22
+3 33
+connect slave
+select case when variable_value-@d > 3 then 'false' else 'true' end as read_free from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+read_free
+true
+select * from t1;
+c1 c2
+1 11
+2 22
+3 33
+select variable_value into @d from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+Case 2
+connect master
+select variable_value into @d from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+replace into t1 values(2,44),(3,55);
+select case when variable_value-@d > 2 then 'false' else 'true' end as read_free from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+read_free
+true
+select * from t1;
+c1 c2
+1 11
+2 44
+3 55
+connect slave
+select case when variable_value-@d > 2 then 'false' else 'true' end as read_free from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+read_free
+true
+select * from t1;
+c1 c2
+1 11
+2 44
+3 55
+select variable_value into @d from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+Case 3
+connect master
+update t1 set c2=66 where c1=3;
+select * from t1;
+c1 c2
+1 11
+2 44
+3 66
+connect slave
+select * from t1;
+c1 c2
+1 11
+2 44
+3 66
+select variable_value into @d from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+Case 4
+connect master
+select variable_value into @d from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+replace into t2 values(1,111);
+replace into t2 values(2,222);
+replace into t2 values(3,333);
+select case when variable_value-@d > 3 then 'false' else 'true' end as read_free from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+read_free
+true
+select * from t2;
+c1 c2
+1 111
+2 222
+3 333
+connect slave
+select case when variable_value-@d > 3 then 'false' else 'true' end as read_free from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+read_free
+false
+select * from t2;
+c1 c2
+1 111
+2 222
+3 333
+select variable_value into @d from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+Case 5
+connect master
+select variable_value into @d from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+replace into t3 values(1,1111);
+replace into t3 values(2,2222);
+replace into t3 values(3,3333);
+select * from t3;
+c1 c2
+1 1111
+2 2222
+3 3333
+select case when variable_value-@d > 3 then 'false' else 'true' end as read_free from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+read_free
+true
+connect slave
+select case when variable_value-@d > 3 then 'false' else 'true' end as read_free from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+read_free
+false
+select * from t3;
+c1 c2
+1 1111
+2 2222
+3 3333
+select * from t3 use index (slave_unique_key);
+c1 c2
+1 1111
+2 2222
+3 3333
+Case 6
+include/show_binlog_events.inc
+Log_name Pos Event_type Server_id End_log_pos Info
+master-bin.000001 # Query # # use `test`; create table t1(c1 int,c2 int, primary key (c1)) engine=rocksdb
+master-bin.000001 # Query # # BEGIN
+master-bin.000001 # Table_map # # table_id: # (test.t1)
+master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
+master-bin.000001 # Xid # # COMMIT /* XID */
+master-bin.000001 # Query # # use `test`; create table t2(c1 int,c2 int, primary key (c1)) engine=rocksdb
+master-bin.000001 # Query # # BEGIN
+master-bin.000001 # Table_map # # table_id: # (test.t2)
+master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
+master-bin.000001 # Xid # # COMMIT /* XID */
+master-bin.000001 # Query # # use `test`; create table t3(c1 int,c2 int, primary key (c1)) engine=rocksdb
+master-bin.000001 # Query # # BEGIN
+master-bin.000001 # Table_map # # table_id: # (test.t3)
+master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
+master-bin.000001 # Xid # # COMMIT /* XID */
+master-bin.000001 # Query # # BEGIN
+master-bin.000001 # Table_map # # table_id: # (test.t1)
+master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
+master-bin.000001 # Xid # # COMMIT /* XID */
+master-bin.000001 # Query # # BEGIN
+master-bin.000001 # Table_map # # table_id: # (test.t1)
+master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
+master-bin.000001 # Xid # # COMMIT /* XID */
+master-bin.000001 # Query # # BEGIN
+master-bin.000001 # Table_map # # table_id: # (test.t1)
+master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
+master-bin.000001 # Xid # # COMMIT /* XID */
+master-bin.000001 # Query # # BEGIN
+master-bin.000001 # Table_map # # table_id: # (test.t1)
+master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
+master-bin.000001 # Xid # # COMMIT /* XID */
+master-bin.000001 # Query # # BEGIN
+master-bin.000001 # Table_map # # table_id: # (test.t1)
+master-bin.000001 # Update_rows # # table_id: # flags: STMT_END_F
+master-bin.000001 # Xid # # COMMIT /* XID */
+master-bin.000001 # Query # # BEGIN
+master-bin.000001 # Table_map # # table_id: # (test.t2)
+master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
+master-bin.000001 # Xid # # COMMIT /* XID */
+master-bin.000001 # Query # # BEGIN
+master-bin.000001 # Table_map # # table_id: # (test.t2)
+master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
+master-bin.000001 # Xid # # COMMIT /* XID */
+master-bin.000001 # Query # # BEGIN
+master-bin.000001 # Table_map # # table_id: # (test.t2)
+master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
+master-bin.000001 # Xid # # COMMIT /* XID */
+master-bin.000001 # Query # # BEGIN
+master-bin.000001 # Table_map # # table_id: # (test.t3)
+master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
+master-bin.000001 # Xid # # COMMIT /* XID */
+master-bin.000001 # Query # # BEGIN
+master-bin.000001 # Table_map # # table_id: # (test.t3)
+master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
+master-bin.000001 # Xid # # COMMIT /* XID */
+master-bin.000001 # Query # # BEGIN
+master-bin.000001 # Table_map # # table_id: # (test.t3)
+master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
+master-bin.000001 # Xid # # COMMIT /* XID */
+include/show_binlog_events.inc
+Log_name Pos Event_type Server_id End_log_pos Info
+slave-bin.000001 # Query # # use `test`; create table t1(c1 int,c2 int, primary key (c1)) engine=rocksdb
+slave-bin.000001 # Query # # BEGIN
+slave-bin.000001 # Table_map # # table_id: # (test.t1)
+slave-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
+slave-bin.000001 # Xid # # COMMIT /* XID */
+slave-bin.000001 # Query # # use `test`; create table t2(c1 int,c2 int, primary key (c1)) engine=rocksdb
+slave-bin.000001 # Query # # BEGIN
+slave-bin.000001 # Table_map # # table_id: # (test.t2)
+slave-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
+slave-bin.000001 # Xid # # COMMIT /* XID */
+slave-bin.000001 # Query # # use `test`; create table t3(c1 int,c2 int, primary key (c1)) engine=rocksdb
+slave-bin.000001 # Query # # BEGIN
+slave-bin.000001 # Table_map # # table_id: # (test.t3)
+slave-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
+slave-bin.000001 # Xid # # COMMIT /* XID */
+slave-bin.000001 # Query # # use `test`; CREATE DEFINER=`root`@`localhost` trigger trg before insert on t2 for each row set @a:=1
+slave-bin.000001 # Query # # use `test`; alter table t3 add constraint slave_unique_key unique (c2)
+slave-bin.000001 # Query # # BEGIN
+slave-bin.000001 # Table_map # # table_id: # (test.t1)
+slave-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
+slave-bin.000001 # Xid # # COMMIT /* XID */
+slave-bin.000001 # Query # # BEGIN
+slave-bin.000001 # Table_map # # table_id: # (test.t1)
+slave-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
+slave-bin.000001 # Xid # # COMMIT /* XID */
+slave-bin.000001 # Query # # BEGIN
+slave-bin.000001 # Table_map # # table_id: # (test.t1)
+slave-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
+slave-bin.000001 # Xid # # COMMIT /* XID */
+slave-bin.000001 # Query # # BEGIN
+slave-bin.000001 # Table_map # # table_id: # (test.t1)
+slave-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
+slave-bin.000001 # Xid # # COMMIT /* XID */
+slave-bin.000001 # Query # # BEGIN
+slave-bin.000001 # Table_map # # table_id: # (test.t1)
+slave-bin.000001 # Update_rows # # table_id: # flags: STMT_END_F
+slave-bin.000001 # Xid # # COMMIT /* XID */
+slave-bin.000001 # Query # # BEGIN
+slave-bin.000001 # Table_map # # table_id: # (test.t2)
+slave-bin.000001 # Update_rows # # table_id: # flags: STMT_END_F
+slave-bin.000001 # Xid # # COMMIT /* XID */
+slave-bin.000001 # Query # # BEGIN
+slave-bin.000001 # Table_map # # table_id: # (test.t2)
+slave-bin.000001 # Update_rows # # table_id: # flags: STMT_END_F
+slave-bin.000001 # Xid # # COMMIT /* XID */
+slave-bin.000001 # Query # # BEGIN
+slave-bin.000001 # Table_map # # table_id: # (test.t2)
+slave-bin.000001 # Update_rows # # table_id: # flags: STMT_END_F
+slave-bin.000001 # Xid # # COMMIT /* XID */
+slave-bin.000001 # Query # # BEGIN
+slave-bin.000001 # Table_map # # table_id: # (test.t3)
+slave-bin.000001 # Delete_rows # # table_id: #
+slave-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
+slave-bin.000001 # Xid # # COMMIT /* XID */
+slave-bin.000001 # Query # # BEGIN
+slave-bin.000001 # Table_map # # table_id: # (test.t3)
+slave-bin.000001 # Delete_rows # # table_id: #
+slave-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
+slave-bin.000001 # Xid # # COMMIT /* XID */
+slave-bin.000001 # Query # # BEGIN
+slave-bin.000001 # Table_map # # table_id: # (test.t3)
+slave-bin.000001 # Delete_rows # # table_id: #
+slave-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
+slave-bin.000001 # Xid # # COMMIT /* XID */
+drop table t1;
+drop table t2;
+drop table t3;
+SET GLOBAL rocksdb_perf_context_level = @prior_rocksdb_perf_context_level;
+SET GLOBAL enable_blind_replace=DEFAULT;
+SET GLOBAL enable_blind_replace=DEFAULT;
+include/rpl_end.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb_rpl/r/rocksdb_slave_check_before_image_consistency.result b/storage/rocksdb/mysql-test/rocksdb_rpl/r/rocksdb_slave_check_before_image_consistency.result
new file mode 100644
index 00000000000..a770822285b
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_rpl/r/rocksdb_slave_check_before_image_consistency.result
@@ -0,0 +1,165 @@
+include/master-slave.inc
+Warnings:
+Note #### Sending passwords in plain text without SSL/TLS is extremely insecure.
+Note #### Storing MySQL user name or password information in the master info repository is not secure and is therefore not recommended. Please consider using the USER and PASSWORD connection options for START SLAVE; see the 'START SLAVE Syntax' in the MySQL Manual for more information.
+[connection master]
+call mtr.add_suppression("Error_code: 1032");
+create table t1 (a int primary key, b int, c int) engine = rocksdb;
+create table t2 (a int unique, b int, c int) engine = rocksdb;
+create table t3 (a int, b int, c int, key(a)) engine = rocksdb;
+create table t4 (a int, b int, c int) engine = rocksdb;
+insert into t1 values(1, 1, 1);
+insert into t2 values(1, 1, 1);
+insert into t3 values(1, 1, 1);
+insert into t4 values(1, 1, 1);
+include/sync_slave_sql_with_master.inc
+set @@sql_log_bin = 0;
+update t1 set c = 2;
+update t2 set c = 2;
+update t3 set c = 2;
+update t4 set c = 2;
+set @@sql_log_bin = 1;
+update t1 set b = 2;
+include/wait_for_slave_sql_error.inc [errno=1032]
+set @@sql_log_bin = 0;
+update t1 set c = 1;
+set @@sql_log_bin = 1;
+include/stop_slave.inc
+include/start_slave.inc
+include/sync_slave_sql_with_master.inc
+update t2 set b = 2;
+include/wait_for_slave_sql_error.inc [errno=1032]
+set @@sql_log_bin = 0;
+update t2 set c = 1;
+set @@sql_log_bin = 1;
+include/stop_slave.inc
+include/start_slave.inc
+include/sync_slave_sql_with_master.inc
+update t3 set b = 2;
+include/wait_for_slave_sql_error.inc [errno=1032]
+set @@sql_log_bin = 0;
+update t3 set c = 1;
+set @@sql_log_bin = 1;
+include/stop_slave.inc
+include/start_slave.inc
+include/sync_slave_sql_with_master.inc
+update t4 set b = 2;
+include/wait_for_slave_sql_error.inc [errno=1032]
+set @@sql_log_bin = 0;
+update t4 set c = 1;
+set @@sql_log_bin = 1;
+include/stop_slave.inc
+include/start_slave.inc
+include/sync_slave_sql_with_master.inc
+select * from t1;
+a b c
+1 2 1
+select * from t2;
+a b c
+1 2 1
+select * from t3;
+a b c
+1 2 1
+select * from t4;
+a b c
+1 2 1
+select * from t1;
+a b c
+1 2 1
+select * from t2;
+a b c
+1 2 1
+select * from t3;
+a b c
+1 2 1
+select * from t4;
+a b c
+1 2 1
+drop table t1;
+drop table t2;
+drop table t3;
+drop table t4;
+include/sync_slave_sql_with_master.inc
+include/stop_slave.inc
+set @@global.slave_rows_search_algorithms = 'INDEX_SCAN,TABLE_SCAN,HASH_SCAN';
+include/start_slave.inc
+create table t1 (a int primary key, b int, c int) engine = rocksdb;
+create table t2 (a int unique, b int, c int) engine = rocksdb;
+create table t3 (a int, b int, c int, key(a)) engine = rocksdb;
+create table t4 (a int, b int, c int) engine = rocksdb;
+insert into t1 values(1, 1, 1);
+insert into t2 values(1, 1, 1);
+insert into t3 values(1, 1, 1);
+insert into t4 values(1, 1, 1);
+include/sync_slave_sql_with_master.inc
+set @@sql_log_bin = 0;
+update t1 set c = 2;
+update t2 set c = 2;
+update t3 set c = 2;
+update t4 set c = 2;
+set @@sql_log_bin = 1;
+update t1 set b = 2;
+include/wait_for_slave_sql_error.inc [errno=1032]
+set @@sql_log_bin = 0;
+update t1 set c = 1;
+set @@sql_log_bin = 1;
+include/stop_slave.inc
+include/start_slave.inc
+include/sync_slave_sql_with_master.inc
+update t2 set b = 2;
+include/wait_for_slave_sql_error.inc [errno=1032]
+set @@sql_log_bin = 0;
+update t2 set c = 1;
+set @@sql_log_bin = 1;
+include/stop_slave.inc
+include/start_slave.inc
+include/sync_slave_sql_with_master.inc
+update t3 set b = 2;
+include/wait_for_slave_sql_error.inc [errno=1032]
+set @@sql_log_bin = 0;
+update t3 set c = 1;
+set @@sql_log_bin = 1;
+include/stop_slave.inc
+include/start_slave.inc
+include/sync_slave_sql_with_master.inc
+update t4 set b = 2;
+include/wait_for_slave_sql_error.inc [errno=1032]
+set @@sql_log_bin = 0;
+update t4 set c = 1;
+set @@sql_log_bin = 1;
+include/stop_slave.inc
+include/start_slave.inc
+include/sync_slave_sql_with_master.inc
+select * from t1;
+a b c
+1 2 1
+select * from t2;
+a b c
+1 2 1
+select * from t3;
+a b c
+1 2 1
+select * from t4;
+a b c
+1 2 1
+select * from t1;
+a b c
+1 2 1
+select * from t2;
+a b c
+1 2 1
+select * from t3;
+a b c
+1 2 1
+select * from t4;
+a b c
+1 2 1
+drop table t1;
+drop table t2;
+drop table t3;
+drop table t4;
+include/sync_slave_sql_with_master.inc
+include/stop_slave.inc
+set @@global.slave_rows_search_algorithms = DEFAULT;
+include/start_slave.inc
+include/rpl_end.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb_rpl/r/rpl_binlog_xid_count.result b/storage/rocksdb/mysql-test/rocksdb_rpl/r/rpl_binlog_xid_count.result
new file mode 100644
index 00000000000..9b46a5b5227
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_rpl/r/rpl_binlog_xid_count.result
@@ -0,0 +1,204 @@
+CREATE TABLE `t` (
+`a` text DEFAULT NULL
+) ENGINE=ROCKSDB;
+INSERT INTO t SET a=repeat('a', 4096);
+INSERT INTO t SET a=repeat('a', 4096/2);
+INSERT INTO t SET a=repeat('a', 4096);
+INSERT INTO t SET a=repeat('a', 4096/2);
+INSERT INTO t SET a=repeat('a', 4096);
+INSERT INTO t SET a=repeat('a', 4096/2);
+INSERT INTO t SET a=repeat('a', 4096);
+INSERT INTO t SET a=repeat('a', 4096/2);
+INSERT INTO t SET a=repeat('a', 4096);
+INSERT INTO t SET a=repeat('a', 4096/2);
+INSERT INTO t SET a=repeat('a', 4096);
+INSERT INTO t SET a=repeat('a', 4096/2);
+INSERT INTO t SET a=repeat('a', 4096);
+INSERT INTO t SET a=repeat('a', 4096/2);
+INSERT INTO t SET a=repeat('a', 4096);
+INSERT INTO t SET a=repeat('a', 4096/2);
+INSERT INTO t SET a=repeat('a', 4096);
+INSERT INTO t SET a=repeat('a', 4096/2);
+INSERT INTO t SET a=repeat('a', 4096);
+INSERT INTO t SET a=repeat('a', 4096/2);
+INSERT INTO t SET a=repeat('a', 4096);
+INSERT INTO t SET a=repeat('a', 4096/2);
+INSERT INTO t SET a=repeat('a', 4096);
+INSERT INTO t SET a=repeat('a', 4096/2);
+INSERT INTO t SET a=repeat('a', 4096);
+INSERT INTO t SET a=repeat('a', 4096/2);
+INSERT INTO t SET a=repeat('a', 4096);
+INSERT INTO t SET a=repeat('a', 4096/2);
+INSERT INTO t SET a=repeat('a', 4096);
+INSERT INTO t SET a=repeat('a', 4096/2);
+INSERT INTO t SET a=repeat('a', 4096);
+INSERT INTO t SET a=repeat('a', 4096/2);
+INSERT INTO t SET a=repeat('a', 4096);
+INSERT INTO t SET a=repeat('a', 4096/2);
+INSERT INTO t SET a=repeat('a', 4096);
+INSERT INTO t SET a=repeat('a', 4096/2);
+INSERT INTO t SET a=repeat('a', 4096);
+INSERT INTO t SET a=repeat('a', 4096/2);
+INSERT INTO t SET a=repeat('a', 4096);
+INSERT INTO t SET a=repeat('a', 4096/2);
+INSERT INTO t SET a=repeat('a', 4096);
+INSERT INTO t SET a=repeat('a', 4096/2);
+INSERT INTO t SET a=repeat('a', 4096);
+INSERT INTO t SET a=repeat('a', 4096/2);
+INSERT INTO t SET a=repeat('a', 4096);
+INSERT INTO t SET a=repeat('a', 4096/2);
+INSERT INTO t SET a=repeat('a', 4096);
+INSERT INTO t SET a=repeat('a', 4096/2);
+INSERT INTO t SET a=repeat('a', 4096);
+INSERT INTO t SET a=repeat('a', 4096/2);
+INSERT INTO t SET a=repeat('a', 4096);
+INSERT INTO t SET a=repeat('a', 4096/2);
+INSERT INTO t SET a=repeat('a', 4096);
+INSERT INTO t SET a=repeat('a', 4096/2);
+INSERT INTO t SET a=repeat('a', 4096);
+INSERT INTO t SET a=repeat('a', 4096/2);
+INSERT INTO t SET a=repeat('a', 4096);
+INSERT INTO t SET a=repeat('a', 4096/2);
+INSERT INTO t SET a=repeat('a', 4096);
+INSERT INTO t SET a=repeat('a', 4096/2);
+INSERT INTO t SET a=repeat('a', 4096);
+INSERT INTO t SET a=repeat('a', 4096/2);
+INSERT INTO t SET a=repeat('a', 4096);
+INSERT INTO t SET a=repeat('a', 4096/2);
+INSERT INTO t SET a=repeat('a', 4096);
+INSERT INTO t SET a=repeat('a', 4096/2);
+INSERT INTO t SET a=repeat('a', 4096);
+INSERT INTO t SET a=repeat('a', 4096/2);
+INSERT INTO t SET a=repeat('a', 4096);
+INSERT INTO t SET a=repeat('a', 4096/2);
+INSERT INTO t SET a=repeat('a', 4096);
+INSERT INTO t SET a=repeat('a', 4096/2);
+INSERT INTO t SET a=repeat('a', 4096);
+INSERT INTO t SET a=repeat('a', 4096/2);
+INSERT INTO t SET a=repeat('a', 4096);
+INSERT INTO t SET a=repeat('a', 4096/2);
+INSERT INTO t SET a=repeat('a', 4096);
+INSERT INTO t SET a=repeat('a', 4096/2);
+INSERT INTO t SET a=repeat('a', 4096);
+INSERT INTO t SET a=repeat('a', 4096/2);
+INSERT INTO t SET a=repeat('a', 4096);
+INSERT INTO t SET a=repeat('a', 4096/2);
+INSERT INTO t SET a=repeat('a', 4096);
+INSERT INTO t SET a=repeat('a', 4096/2);
+INSERT INTO t SET a=repeat('a', 4096);
+INSERT INTO t SET a=repeat('a', 4096/2);
+INSERT INTO t SET a=repeat('a', 4096);
+INSERT INTO t SET a=repeat('a', 4096/2);
+INSERT INTO t SET a=repeat('a', 4096);
+INSERT INTO t SET a=repeat('a', 4096/2);
+INSERT INTO t SET a=repeat('a', 4096);
+INSERT INTO t SET a=repeat('a', 4096/2);
+INSERT INTO t SET a=repeat('a', 4096);
+INSERT INTO t SET a=repeat('a', 4096/2);
+INSERT INTO t SET a=repeat('a', 4096);
+INSERT INTO t SET a=repeat('a', 4096/2);
+INSERT INTO t SET a=repeat('a', 4096);
+INSERT INTO t SET a=repeat('a', 4096/2);
+INSERT INTO t SET a=repeat('a', 4096);
+INSERT INTO t SET a=repeat('a', 4096/2);
+INSERT INTO t SET a=repeat('a', 4096);
+INSERT INTO t SET a=repeat('a', 4096/2);
+INSERT INTO t SET a=repeat('a', 4096);
+INSERT INTO t SET a=repeat('a', 4096/2);
+INSERT INTO t SET a=repeat('a', 4096);
+INSERT INTO t SET a=repeat('a', 4096/2);
+INSERT INTO t SET a=repeat('a', 4096);
+INSERT INTO t SET a=repeat('a', 4096/2);
+INSERT INTO t SET a=repeat('a', 4096);
+INSERT INTO t SET a=repeat('a', 4096/2);
+INSERT INTO t SET a=repeat('a', 4096);
+INSERT INTO t SET a=repeat('a', 4096/2);
+INSERT INTO t SET a=repeat('a', 4096);
+INSERT INTO t SET a=repeat('a', 4096/2);
+INSERT INTO t SET a=repeat('a', 4096);
+INSERT INTO t SET a=repeat('a', 4096/2);
+INSERT INTO t SET a=repeat('a', 4096);
+INSERT INTO t SET a=repeat('a', 4096/2);
+INSERT INTO t SET a=repeat('a', 4096);
+INSERT INTO t SET a=repeat('a', 4096/2);
+INSERT INTO t SET a=repeat('a', 4096);
+INSERT INTO t SET a=repeat('a', 4096/2);
+INSERT INTO t SET a=repeat('a', 4096);
+INSERT INTO t SET a=repeat('a', 4096/2);
+INSERT INTO t SET a=repeat('a', 4096);
+INSERT INTO t SET a=repeat('a', 4096/2);
+INSERT INTO t SET a=repeat('a', 4096);
+INSERT INTO t SET a=repeat('a', 4096/2);
+INSERT INTO t SET a=repeat('a', 4096);
+INSERT INTO t SET a=repeat('a', 4096/2);
+INSERT INTO t SET a=repeat('a', 4096);
+INSERT INTO t SET a=repeat('a', 4096/2);
+INSERT INTO t SET a=repeat('a', 4096);
+INSERT INTO t SET a=repeat('a', 4096/2);
+INSERT INTO t SET a=repeat('a', 4096);
+INSERT INTO t SET a=repeat('a', 4096/2);
+INSERT INTO t SET a=repeat('a', 4096);
+INSERT INTO t SET a=repeat('a', 4096/2);
+INSERT INTO t SET a=repeat('a', 4096);
+INSERT INTO t SET a=repeat('a', 4096/2);
+INSERT INTO t SET a=repeat('a', 4096);
+INSERT INTO t SET a=repeat('a', 4096/2);
+INSERT INTO t SET a=repeat('a', 4096);
+INSERT INTO t SET a=repeat('a', 4096/2);
+INSERT INTO t SET a=repeat('a', 4096);
+INSERT INTO t SET a=repeat('a', 4096/2);
+INSERT INTO t SET a=repeat('a', 4096);
+INSERT INTO t SET a=repeat('a', 4096/2);
+INSERT INTO t SET a=repeat('a', 4096);
+INSERT INTO t SET a=repeat('a', 4096/2);
+INSERT INTO t SET a=repeat('a', 4096);
+INSERT INTO t SET a=repeat('a', 4096/2);
+INSERT INTO t SET a=repeat('a', 4096);
+INSERT INTO t SET a=repeat('a', 4096/2);
+INSERT INTO t SET a=repeat('a', 4096);
+INSERT INTO t SET a=repeat('a', 4096/2);
+INSERT INTO t SET a=repeat('a', 4096);
+INSERT INTO t SET a=repeat('a', 4096/2);
+INSERT INTO t SET a=repeat('a', 4096);
+INSERT INTO t SET a=repeat('a', 4096/2);
+INSERT INTO t SET a=repeat('a', 4096);
+INSERT INTO t SET a=repeat('a', 4096/2);
+INSERT INTO t SET a=repeat('a', 4096);
+INSERT INTO t SET a=repeat('a', 4096/2);
+INSERT INTO t SET a=repeat('a', 4096);
+INSERT INTO t SET a=repeat('a', 4096/2);
+INSERT INTO t SET a=repeat('a', 4096);
+INSERT INTO t SET a=repeat('a', 4096/2);
+INSERT INTO t SET a=repeat('a', 4096);
+INSERT INTO t SET a=repeat('a', 4096/2);
+INSERT INTO t SET a=repeat('a', 4096);
+INSERT INTO t SET a=repeat('a', 4096/2);
+INSERT INTO t SET a=repeat('a', 4096);
+INSERT INTO t SET a=repeat('a', 4096/2);
+INSERT INTO t SET a=repeat('a', 4096);
+INSERT INTO t SET a=repeat('a', 4096/2);
+INSERT INTO t SET a=repeat('a', 4096);
+INSERT INTO t SET a=repeat('a', 4096/2);
+INSERT INTO t SET a=repeat('a', 4096);
+INSERT INTO t SET a=repeat('a', 4096/2);
+INSERT INTO t SET a=repeat('a', 4096);
+INSERT INTO t SET a=repeat('a', 4096/2);
+INSERT INTO t SET a=repeat('a', 4096);
+INSERT INTO t SET a=repeat('a', 4096/2);
+INSERT INTO t SET a=repeat('a', 4096);
+INSERT INTO t SET a=repeat('a', 4096/2);
+INSERT INTO t SET a=repeat('a', 4096);
+INSERT INTO t SET a=repeat('a', 4096/2);
+INSERT INTO t SET a=repeat('a', 4096);
+INSERT INTO t SET a=repeat('a', 4096/2);
+INSERT INTO t SET a=repeat('a', 4096);
+INSERT INTO t SET a=repeat('a', 4096/2);
+INSERT INTO t SET a=repeat('a', 4096);
+INSERT INTO t SET a=repeat('a', 4096/2);
+INSERT INTO t SET a=repeat('a', 4096);
+INSERT INTO t SET a=repeat('a', 4096/2);
+INSERT INTO t SET a=repeat('a', 4096);
+INSERT INTO t SET a=repeat('a', 4096/2);
+INSERT INTO t SET a=repeat('a', 4096);
+INSERT INTO t SET a=repeat('a', 4096/2);
+DROP TABLE t;
diff --git a/storage/rocksdb/mysql-test/rocksdb_rpl/r/rpl_crash_safe_wal_corrupt.result b/storage/rocksdb/mysql-test/rocksdb_rpl/r/rpl_crash_safe_wal_corrupt.result
new file mode 100644
index 00000000000..6d061e99846
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_rpl/r/rpl_crash_safe_wal_corrupt.result
@@ -0,0 +1,135 @@
+include/master-slave.inc
+Warnings:
+Note #### Sending passwords in plain text without SSL/TLS is extremely insecure.
+Note #### Storing MySQL user name or password information in the master info repository is not secure and is therefore not recommended. Please consider using the USER and PASSWORD connection options for START SLAVE; see the 'START SLAVE Syntax' in the MySQL Manual for more information.
+[connection master]
+drop table if exists x;
+select @@binlog_format;
+@@binlog_format
+ROW
+create table x (id int primary key, value int, value2 int, index(value)) engine=rocksdb;
+insert into x values (1,1,1);
+insert into x values (2,1,1);
+insert into x values (3,1,1);
+insert into x values (4,1,1);
+insert into x values (5,1,1);
+select @@global.gtid_executed;
+@@global.gtid_executed
+
+
+--- slave state before crash ---
+select * from x;
+id value value2
+1 1 1
+2 1 1
+3 1 1
+4 1 1
+5 1 1
+select @@global.gtid_executed;
+@@global.gtid_executed
+
+select * from mysql.slave_gtid_info;
+Id Database_name Last_gtid
+include/rpl_start_server.inc [server_number=2]
+
+--- slave state after crash recovery, slave stop, one transaction recovered---
+select * from x;
+id value value2
+1 1 1
+2 1 1
+3 1 1
+4 1 1
+select @@global.gtid_executed;
+@@global.gtid_executed
+
+select * from mysql.slave_gtid_info;
+Id Database_name Last_gtid
+
+--- slave state after restart, slave start ---
+include/start_slave.inc
+select * from x;
+id value value2
+1 1 1
+2 1 1
+3 1 1
+4 1 1
+5 1 1
+select @@global.gtid_executed;
+@@global.gtid_executed
+
+select * from mysql.slave_gtid_info;
+Id Database_name Last_gtid
+insert into x values (6,1,1);
+select * from x;
+id value value2
+1 1 1
+2 1 1
+3 1 1
+4 1 1
+5 1 1
+6 1 1
+select @@global.gtid_executed;
+@@global.gtid_executed
+
+select * from mysql.slave_gtid_info;
+Id Database_name Last_gtid
+insert into x values (7,1,1);
+insert into x values (8,1,1);
+insert into x values (9,1,1);
+insert into x values (10,1,1);
+insert into x values (11,1,1);
+insert into x values (12,1,1);
+select * from x;
+id value value2
+1 1 1
+2 1 1
+3 1 1
+4 1 1
+5 1 1
+6 1 1
+7 1 1
+8 1 1
+9 1 1
+10 1 1
+11 1 1
+12 1 1
+select @@global.gtid_executed;
+@@global.gtid_executed
+
+include/rpl_start_server.inc [server_number=2]
+
+--- slave state after crash recovery, slave stop, WAL was corrupted, point in time recovery with wal_recovery_mode=2 ---
+select * from x;
+id value value2
+1 1 1
+2 1 1
+3 1 1
+4 1 1
+5 1 1
+6 1 1
+7 1 1
+8 1 1
+9 1 1
+include/start_slave.inc
+select * from x;
+id value value2
+1 1 1
+2 1 1
+3 1 1
+4 1 1
+5 1 1
+6 1 1
+7 1 1
+8 1 1
+9 1 1
+10 1 1
+11 1 1
+12 1 1
+select @@global.gtid_executed;
+@@global.gtid_executed
+
+select * from mysql.slave_gtid_info;
+Id Database_name Last_gtid
+drop table x;
+include/rpl_end.inc
+Binlog Info Found
diff --git a/storage/rocksdb/mysql-test/rocksdb_rpl/r/rpl_ddl_high_priority.result b/storage/rocksdb/mysql-test/rocksdb_rpl/r/rpl_ddl_high_priority.result
new file mode 100644
index 00000000000..8a1fd1b94e0
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_rpl/r/rpl_ddl_high_priority.result
@@ -0,0 +1,39 @@
+include/master-slave.inc
+Warnings:
+Note #### Sending passwords in plain text without SSL/TLS is extremely insecure.
+Note #### Storing MySQL user name or password information in the master info repository is not secure and is therefore not recommended. Please consider using the USER and PASSWORD connection options for START SLAVE; see the 'START SLAVE Syntax' in the MySQL Manual for more information.
+[connection master]
+include/rpl_connect.inc [creating slave_block]
+drop table if exists t1;
+create table t1 (i int);
+show create table t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `i` int(11) DEFAULT NULL
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+insert into t1 values (1), (2), (3);
+connection slave
+select * from t1;
+i
+1
+2
+3
+connection slave_block
+lock tables t1 read;
+connection master;
+create high_priority index idx1 on t1 (i);
+show create table t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `i` int(11) DEFAULT NULL,
+ KEY `idx1` (`i`)
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+connection slave;
+show create table t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `i` int(11) DEFAULT NULL,
+ KEY `idx1` (`i`)
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+drop table t1;
+include/rpl_end.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb_rpl/r/rpl_gtid_crash_safe.result b/storage/rocksdb/mysql-test/rocksdb_rpl/r/rpl_gtid_crash_safe.result
new file mode 100644
index 00000000000..352ceff236c
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_rpl/r/rpl_gtid_crash_safe.result
@@ -0,0 +1,361 @@
+include/master-slave.inc
+Warnings:
+Note #### Sending passwords in plain text without SSL/TLS is extremely insecure.
+Note #### Storing MySQL user name or password information in the master info repository is not secure and is therefore not recommended. Please consider using the USER and PASSWORD connection options for START SLAVE; see the 'START SLAVE Syntax' in the MySQL Manual for more information.
+[connection master]
+call mtr.add_suppression("Recovery from master pos");
+create table t1(a int, PRIMARY KEY(a)) ENGINE=ROCKSDB;
+insert into t1 values(1);
+insert into t1 values(2);
+use mysql;
+select * from slave_gtid_info;
+Id Database_name Last_gtid
+1 mtr uuid:1
+2 test uuid:4
+SET GLOBAL debug = '+d,crash_before_update_pos';
+insert into t1 values(3);
+include/rpl_reconnect.inc
+SET GLOBAL debug = ``;
+use mysql;
+select * from slave_gtid_info;
+Id Database_name Last_gtid
+1 mtr uuid:1
+2 test uuid:4
+use test;
+select * from t1;
+a
+1
+2
+change master to master_auto_position = 1;
+include/start_slave.inc
+rename table t1 to test1;
+use test;
+select * from test1;
+a
+1
+2
+3
+use test;
+select * from test1;
+a
+1
+2
+3
+drop table test1;
+include/stop_slave.inc
+change master to master_auto_position = 0;
+include/start_slave.inc
+use mysql;
+select * from slave_gtid_info;
+Id Database_name Last_gtid
+1 mtr uuid:1
+2 test uuid:7
+include/rpl_reset.inc
+create table t1(a int, PRIMARY KEY(a)) ENGINE=ROCKSDB;
+insert into t1 values(1);
+insert into t1 values(2);
+use mysql;
+select * from slave_gtid_info;
+Id Database_name Last_gtid
+1 test uuid:3
+SET GLOBAL debug = '+d,crash_after_update_pos_before_apply';
+insert into t1 values(3);
+include/rpl_reconnect.inc
+SET GLOBAL debug = ``;
+use mysql;
+select * from slave_gtid_info;
+Id Database_name Last_gtid
+1 test uuid:3
+use test;
+select * from t1;
+a
+1
+2
+change master to master_auto_position = 1;
+include/start_slave.inc
+rename table t1 to test1;
+use test;
+select * from test1;
+a
+1
+2
+3
+use test;
+select * from test1;
+a
+1
+2
+3
+drop table test1;
+include/stop_slave.inc
+change master to master_auto_position = 0;
+include/start_slave.inc
+use mysql;
+select * from slave_gtid_info;
+Id Database_name Last_gtid
+1 test uuid:6
+include/rpl_reset.inc
+create table t1(a int, PRIMARY KEY(a)) ENGINE=ROCKSDB;
+insert into t1 values(1);
+insert into t1 values(2);
+use mysql;
+select * from slave_gtid_info;
+Id Database_name Last_gtid
+1 test uuid:3
+SET GLOBAL debug = '+d,crash_before_writing_xid';
+insert into t1 values(3);
+include/rpl_reconnect.inc
+SET GLOBAL debug = ``;
+use mysql;
+select * from slave_gtid_info;
+Id Database_name Last_gtid
+1 test uuid:3
+use test;
+select * from t1;
+a
+1
+2
+change master to master_auto_position = 1;
+include/start_slave.inc
+rename table t1 to test1;
+use test;
+select * from test1;
+a
+1
+2
+3
+use test;
+select * from test1;
+a
+1
+2
+3
+drop table test1;
+include/stop_slave.inc
+change master to master_auto_position = 0;
+include/start_slave.inc
+use mysql;
+select * from slave_gtid_info;
+Id Database_name Last_gtid
+1 test uuid:6
+include/rpl_reset.inc
+create table t1(a int, PRIMARY KEY(a)) ENGINE=ROCKSDB;
+insert into t1 values(1);
+insert into t1 values(2);
+use mysql;
+select * from slave_gtid_info;
+Id Database_name Last_gtid
+1 test uuid:3
+SET GLOBAL debug = '+d,half_binlogged_transaction';
+insert into t1 values(3);
+include/rpl_reconnect.inc
+SET GLOBAL debug = ``;
+use mysql;
+select * from slave_gtid_info;
+Id Database_name Last_gtid
+1 test uuid:3
+use test;
+select * from t1;
+a
+1
+2
+change master to master_auto_position = 1;
+include/start_slave.inc
+rename table t1 to test1;
+use test;
+select * from test1;
+a
+1
+2
+3
+use test;
+select * from test1;
+a
+1
+2
+3
+drop table test1;
+include/stop_slave.inc
+change master to master_auto_position = 0;
+include/start_slave.inc
+use mysql;
+select * from slave_gtid_info;
+Id Database_name Last_gtid
+1 test uuid:6
+include/rpl_reset.inc
+create table t1(a int, PRIMARY KEY(a)) ENGINE=ROCKSDB;
+insert into t1 values(1);
+insert into t1 values(2);
+use mysql;
+select * from slave_gtid_info;
+Id Database_name Last_gtid
+1 test uuid:3
+SET GLOBAL debug = '+d,crash_commit_before';
+insert into t1 values(3);
+include/rpl_reconnect.inc
+SET GLOBAL debug = ``;
+use mysql;
+select * from slave_gtid_info;
+Id Database_name Last_gtid
+1 test uuid:3
+use test;
+select * from t1;
+a
+1
+2
+change master to master_auto_position = 1;
+include/start_slave.inc
+rename table t1 to test1;
+use test;
+select * from test1;
+a
+1
+2
+3
+use test;
+select * from test1;
+a
+1
+2
+3
+drop table test1;
+include/stop_slave.inc
+change master to master_auto_position = 0;
+include/start_slave.inc
+use mysql;
+select * from slave_gtid_info;
+Id Database_name Last_gtid
+1 test uuid:6
+include/rpl_reset.inc
+create table t1(a int, PRIMARY KEY(a)) ENGINE=ROCKSDB;
+insert into t1 values(1);
+insert into t1 values(2);
+use mysql;
+select * from slave_gtid_info;
+Id Database_name Last_gtid
+1 test uuid:3
+SET GLOBAL debug = '+d,crash_commit_after_log';
+insert into t1 values(3);
+include/rpl_reconnect.inc
+SET GLOBAL debug = ``;
+use mysql;
+select * from slave_gtid_info;
+Id Database_name Last_gtid
+1 test uuid:3
+use test;
+select * from t1;
+a
+1
+2
+change master to master_auto_position = 1;
+include/start_slave.inc
+rename table t1 to test1;
+use test;
+select * from test1;
+a
+1
+2
+3
+use test;
+select * from test1;
+a
+1
+2
+3
+drop table test1;
+include/stop_slave.inc
+change master to master_auto_position = 0;
+include/start_slave.inc
+use mysql;
+select * from slave_gtid_info;
+Id Database_name Last_gtid
+1 test uuid:6
+include/rpl_reset.inc
+create table t1(a int, PRIMARY KEY(a)) ENGINE=ROCKSDB;
+insert into t1 values(1);
+insert into t1 values(2);
+use mysql;
+select * from slave_gtid_info;
+Id Database_name Last_gtid
+1 test uuid:3
+SET GLOBAL debug = '+d,crash_commit_after_prepare';
+insert into t1 values(3);
+include/rpl_reconnect.inc
+SET GLOBAL debug = ``;
+use mysql;
+select * from slave_gtid_info;
+Id Database_name Last_gtid
+1 test uuid:3
+use test;
+select * from t1;
+a
+1
+2
+change master to master_auto_position = 1;
+include/start_slave.inc
+rename table t1 to test1;
+use test;
+select * from test1;
+a
+1
+2
+3
+use test;
+select * from test1;
+a
+1
+2
+3
+drop table test1;
+include/stop_slave.inc
+change master to master_auto_position = 0;
+include/start_slave.inc
+use mysql;
+select * from slave_gtid_info;
+Id Database_name Last_gtid
+1 test uuid:6
+include/rpl_reset.inc
+create table t1(a int, PRIMARY KEY(a)) ENGINE=ROCKSDB;
+insert into t1 values(1);
+insert into t1 values(2);
+use mysql;
+select * from slave_gtid_info;
+Id Database_name Last_gtid
+1 test uuid:3
+SET GLOBAL debug = '+d,crash_commit_after';
+insert into t1 values(3);
+include/rpl_reconnect.inc
+SET GLOBAL debug = ``;
+use mysql;
+select * from slave_gtid_info;
+Id Database_name Last_gtid
+1 test uuid:3
+use test;
+select * from t1;
+a
+1
+2
+change master to master_auto_position = 1;
+include/start_slave.inc
+rename table t1 to test1;
+use test;
+select * from test1;
+a
+1
+2
+3
+use test;
+select * from test1;
+a
+1
+2
+3
+drop table test1;
+include/stop_slave.inc
+change master to master_auto_position = 0;
+include/start_slave.inc
+use mysql;
+select * from slave_gtid_info;
+Id Database_name Last_gtid
+1 test uuid:6
+include/rpl_end.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb_rpl/r/rpl_gtid_crash_safe_optimized.result b/storage/rocksdb/mysql-test/rocksdb_rpl/r/rpl_gtid_crash_safe_optimized.result
new file mode 100644
index 00000000000..a518de2b6e3
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_rpl/r/rpl_gtid_crash_safe_optimized.result
@@ -0,0 +1,361 @@
+include/master-slave.inc
+Warnings:
+Note #### Sending passwords in plain text without SSL/TLS is extremely insecure.
+Note #### Storing MySQL user name or password information in the master info repository is not secure and is therefore not recommended. Please consider using the USER and PASSWORD connection options for START SLAVE; see the 'START SLAVE Syntax' in the MySQL Manual for more information.
+[connection master]
+call mtr.add_suppression("Recovery from master pos");
+create table t1(a int, PRIMARY KEY(a)) ENGINE=ROCKSDB;
+insert into t1 values(1);
+insert into t1 values(2);
+use mysql;
+select * from slave_gtid_info;
+Id Database_name Last_gtid
+1 mtr
+2 test uuid:4
+SET GLOBAL debug = '+d,crash_before_update_pos';
+insert into t1 values(3);
+include/rpl_reconnect.inc
+SET GLOBAL debug = ``;
+use mysql;
+select * from slave_gtid_info;
+Id Database_name Last_gtid
+1 mtr
+2 test uuid:4
+use test;
+select * from t1;
+a
+1
+2
+change master to master_auto_position = 1;
+include/start_slave.inc
+rename table t1 to test1;
+use test;
+select * from test1;
+a
+1
+2
+3
+use test;
+select * from test1;
+a
+1
+2
+3
+drop table test1;
+include/stop_slave.inc
+change master to master_auto_position = 0;
+include/start_slave.inc
+use mysql;
+select * from slave_gtid_info;
+Id Database_name Last_gtid
+1 mtr
+2 test uuid:5
+include/rpl_reset.inc
+create table t1(a int, PRIMARY KEY(a)) ENGINE=ROCKSDB;
+insert into t1 values(1);
+insert into t1 values(2);
+use mysql;
+select * from slave_gtid_info;
+Id Database_name Last_gtid
+1 test uuid:3
+SET GLOBAL debug = '+d,crash_after_update_pos_before_apply';
+insert into t1 values(3);
+include/rpl_reconnect.inc
+SET GLOBAL debug = ``;
+use mysql;
+select * from slave_gtid_info;
+Id Database_name Last_gtid
+1 test uuid:3
+use test;
+select * from t1;
+a
+1
+2
+change master to master_auto_position = 1;
+include/start_slave.inc
+rename table t1 to test1;
+use test;
+select * from test1;
+a
+1
+2
+3
+use test;
+select * from test1;
+a
+1
+2
+3
+drop table test1;
+include/stop_slave.inc
+change master to master_auto_position = 0;
+include/start_slave.inc
+use mysql;
+select * from slave_gtid_info;
+Id Database_name Last_gtid
+1 test uuid:4
+include/rpl_reset.inc
+create table t1(a int, PRIMARY KEY(a)) ENGINE=ROCKSDB;
+insert into t1 values(1);
+insert into t1 values(2);
+use mysql;
+select * from slave_gtid_info;
+Id Database_name Last_gtid
+1 test uuid:3
+SET GLOBAL debug = '+d,crash_before_writing_xid';
+insert into t1 values(3);
+include/rpl_reconnect.inc
+SET GLOBAL debug = ``;
+use mysql;
+select * from slave_gtid_info;
+Id Database_name Last_gtid
+1 test uuid:3
+use test;
+select * from t1;
+a
+1
+2
+change master to master_auto_position = 1;
+include/start_slave.inc
+rename table t1 to test1;
+use test;
+select * from test1;
+a
+1
+2
+3
+use test;
+select * from test1;
+a
+1
+2
+3
+drop table test1;
+include/stop_slave.inc
+change master to master_auto_position = 0;
+include/start_slave.inc
+use mysql;
+select * from slave_gtid_info;
+Id Database_name Last_gtid
+1 test uuid:4
+include/rpl_reset.inc
+create table t1(a int, PRIMARY KEY(a)) ENGINE=ROCKSDB;
+insert into t1 values(1);
+insert into t1 values(2);
+use mysql;
+select * from slave_gtid_info;
+Id Database_name Last_gtid
+1 test uuid:3
+SET GLOBAL debug = '+d,half_binlogged_transaction';
+insert into t1 values(3);
+include/rpl_reconnect.inc
+SET GLOBAL debug = ``;
+use mysql;
+select * from slave_gtid_info;
+Id Database_name Last_gtid
+1 test uuid:3
+use test;
+select * from t1;
+a
+1
+2
+change master to master_auto_position = 1;
+include/start_slave.inc
+rename table t1 to test1;
+use test;
+select * from test1;
+a
+1
+2
+3
+use test;
+select * from test1;
+a
+1
+2
+3
+drop table test1;
+include/stop_slave.inc
+change master to master_auto_position = 0;
+include/start_slave.inc
+use mysql;
+select * from slave_gtid_info;
+Id Database_name Last_gtid
+1 test uuid:4
+include/rpl_reset.inc
+create table t1(a int, PRIMARY KEY(a)) ENGINE=ROCKSDB;
+insert into t1 values(1);
+insert into t1 values(2);
+use mysql;
+select * from slave_gtid_info;
+Id Database_name Last_gtid
+1 test uuid:3
+SET GLOBAL debug = '+d,crash_commit_before';
+insert into t1 values(3);
+include/rpl_reconnect.inc
+SET GLOBAL debug = ``;
+use mysql;
+select * from slave_gtid_info;
+Id Database_name Last_gtid
+1 test uuid:3
+use test;
+select * from t1;
+a
+1
+2
+change master to master_auto_position = 1;
+include/start_slave.inc
+rename table t1 to test1;
+use test;
+select * from test1;
+a
+1
+2
+3
+use test;
+select * from test1;
+a
+1
+2
+3
+drop table test1;
+include/stop_slave.inc
+change master to master_auto_position = 0;
+include/start_slave.inc
+use mysql;
+select * from slave_gtid_info;
+Id Database_name Last_gtid
+1 test uuid:4
+include/rpl_reset.inc
+create table t1(a int, PRIMARY KEY(a)) ENGINE=ROCKSDB;
+insert into t1 values(1);
+insert into t1 values(2);
+use mysql;
+select * from slave_gtid_info;
+Id Database_name Last_gtid
+1 test uuid:3
+SET GLOBAL debug = '+d,crash_commit_after_log';
+insert into t1 values(3);
+include/rpl_reconnect.inc
+SET GLOBAL debug = ``;
+use mysql;
+select * from slave_gtid_info;
+Id Database_name Last_gtid
+1 test uuid:3
+use test;
+select * from t1;
+a
+1
+2
+change master to master_auto_position = 1;
+include/start_slave.inc
+rename table t1 to test1;
+use test;
+select * from test1;
+a
+1
+2
+3
+use test;
+select * from test1;
+a
+1
+2
+3
+drop table test1;
+include/stop_slave.inc
+change master to master_auto_position = 0;
+include/start_slave.inc
+use mysql;
+select * from slave_gtid_info;
+Id Database_name Last_gtid
+1 test uuid:4
+include/rpl_reset.inc
+create table t1(a int, PRIMARY KEY(a)) ENGINE=ROCKSDB;
+insert into t1 values(1);
+insert into t1 values(2);
+use mysql;
+select * from slave_gtid_info;
+Id Database_name Last_gtid
+1 test uuid:3
+SET GLOBAL debug = '+d,crash_commit_after_prepare';
+insert into t1 values(3);
+include/rpl_reconnect.inc
+SET GLOBAL debug = ``;
+use mysql;
+select * from slave_gtid_info;
+Id Database_name Last_gtid
+1 test uuid:3
+use test;
+select * from t1;
+a
+1
+2
+change master to master_auto_position = 1;
+include/start_slave.inc
+rename table t1 to test1;
+use test;
+select * from test1;
+a
+1
+2
+3
+use test;
+select * from test1;
+a
+1
+2
+3
+drop table test1;
+include/stop_slave.inc
+change master to master_auto_position = 0;
+include/start_slave.inc
+use mysql;
+select * from slave_gtid_info;
+Id Database_name Last_gtid
+1 test uuid:4
+include/rpl_reset.inc
+create table t1(a int, PRIMARY KEY(a)) ENGINE=ROCKSDB;
+insert into t1 values(1);
+insert into t1 values(2);
+use mysql;
+select * from slave_gtid_info;
+Id Database_name Last_gtid
+1 test uuid:3
+SET GLOBAL debug = '+d,crash_commit_after';
+insert into t1 values(3);
+include/rpl_reconnect.inc
+SET GLOBAL debug = ``;
+use mysql;
+select * from slave_gtid_info;
+Id Database_name Last_gtid
+1 test uuid:3
+use test;
+select * from t1;
+a
+1
+2
+change master to master_auto_position = 1;
+include/start_slave.inc
+rename table t1 to test1;
+use test;
+select * from test1;
+a
+1
+2
+3
+use test;
+select * from test1;
+a
+1
+2
+3
+drop table test1;
+include/stop_slave.inc
+change master to master_auto_position = 0;
+include/start_slave.inc
+use mysql;
+select * from slave_gtid_info;
+Id Database_name Last_gtid
+1 test uuid:4
+include/rpl_end.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb_rpl/r/rpl_gtid_crash_safe_wal_corrupt.result b/storage/rocksdb/mysql-test/rocksdb_rpl/r/rpl_gtid_crash_safe_wal_corrupt.result
new file mode 100644
index 00000000000..e765e338cb5
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_rpl/r/rpl_gtid_crash_safe_wal_corrupt.result
@@ -0,0 +1,140 @@
+include/master-slave.inc
+Warnings:
+Note #### Sending passwords in plain text without SSL/TLS is extremely insecure.
+Note #### Storing MySQL user name or password information in the master info repository is not secure and is therefore not recommended. Please consider using the USER and PASSWORD connection options for START SLAVE; see the 'START SLAVE Syntax' in the MySQL Manual for more information.
+[connection master]
+drop table if exists x;
+select @@binlog_format;
+@@binlog_format
+ROW
+create table x (id int primary key, value int, value2 int, index(value)) engine=rocksdb;
+insert into x values (1,1,1);
+insert into x values (2,1,1);
+insert into x values (3,1,1);
+insert into x values (4,1,1);
+insert into x values (5,1,1);
+select @@global.gtid_executed;
+@@global.gtid_executed
+uuid:1-7
+
+--- slave state before crash ---
+select * from x;
+id value value2
+1 1 1
+2 1 1
+3 1 1
+4 1 1
+5 1 1
+select @@global.gtid_executed;
+@@global.gtid_executed
+uuid:1-7
+select * from mysql.slave_gtid_info;
+Id Database_name Last_gtid
+1 test uuid:7
+include/rpl_start_server.inc [server_number=2]
+
+--- slave state after crash recovery, slave stop, one transaction recovered---
+select * from x;
+id value value2
+1 1 1
+2 1 1
+3 1 1
+4 1 1
+select @@global.gtid_executed;
+@@global.gtid_executed
+uuid:1-6
+select * from mysql.slave_gtid_info;
+Id Database_name Last_gtid
+1 test uuid:6
+
+--- slave state after restart, slave start ---
+include/start_slave.inc
+select * from x;
+id value value2
+1 1 1
+2 1 1
+3 1 1
+4 1 1
+5 1 1
+select @@global.gtid_executed;
+@@global.gtid_executed
+uuid:1-7
+select * from mysql.slave_gtid_info;
+Id Database_name Last_gtid
+1 test uuid:7
+insert into x values (6,1,1);
+select * from x;
+id value value2
+1 1 1
+2 1 1
+3 1 1
+4 1 1
+5 1 1
+6 1 1
+select @@global.gtid_executed;
+@@global.gtid_executed
+uuid:1-8
+select * from mysql.slave_gtid_info;
+Id Database_name Last_gtid
+1 test uuid:8
+insert into x values (7,1,1);
+insert into x values (8,1,1);
+insert into x values (9,1,1);
+insert into x values (10,1,1);
+insert into x values (11,1,1);
+insert into x values (12,1,1);
+select * from x;
+id value value2
+1 1 1
+2 1 1
+3 1 1
+4 1 1
+5 1 1
+6 1 1
+7 1 1
+8 1 1
+9 1 1
+10 1 1
+11 1 1
+12 1 1
+select @@global.gtid_executed;
+@@global.gtid_executed
+uuid:1-14
+include/rpl_start_server.inc [server_number=2]
+
+--- slave state after crash recovery, slave stop, WAL was corrupted, point in time recovery with wal_recovery_mode=2 ---
+select * from x;
+id value value2
+1 1 1
+2 1 1
+3 1 1
+4 1 1
+5 1 1
+6 1 1
+7 1 1
+8 1 1
+9 1 1
+include/start_slave.inc
+select * from x;
+id value value2
+1 1 1
+2 1 1
+3 1 1
+4 1 1
+5 1 1
+6 1 1
+7 1 1
+8 1 1
+9 1 1
+10 1 1
+11 1 1
+12 1 1
+select @@global.gtid_executed;
+@@global.gtid_executed
+uuid:1-14
+select * from mysql.slave_gtid_info;
+Id Database_name Last_gtid
+1 test uuid:14
+drop table x;
+include/rpl_end.inc
+Binlog Info Found
diff --git a/storage/rocksdb/mysql-test/rocksdb_rpl/r/rpl_gtid_rocksdb_sys_header.result b/storage/rocksdb/mysql-test/rocksdb_rpl/r/rpl_gtid_rocksdb_sys_header.result
new file mode 100644
index 00000000000..b2703ee0cbb
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_rpl/r/rpl_gtid_rocksdb_sys_header.result
@@ -0,0 +1,16 @@
+include/master-slave.inc
+Warnings:
+Note #### Sending passwords in plain text without SSL/TLS is extremely insecure.
+Note #### Storing MySQL user name or password information in the master info repository is not secure and is therefore not recommended. Please consider using the USER and PASSWORD connection options for START SLAVE; see the 'START SLAVE Syntax' in the MySQL Manual for more information.
+[connection master]
+create table t1 (a int primary key) engine=rocksdb;
+insert into t1 values(1);
+SET GLOBAL debug = '+d,crash_before_writing_xid';
+insert into t1 values(2);
+ERROR HY000: Lost connection to MySQL server during query
+include/rpl_reconnect.inc
+SET GLOBAL debug = ``;
+include/start_slave.inc
+RocksDB: Last MySQL Gtid master_uuid:2
+drop table t1;
+include/rpl_end.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb_rpl/r/rpl_missing_columns_sk_update.result b/storage/rocksdb/mysql-test/rocksdb_rpl/r/rpl_missing_columns_sk_update.result
new file mode 100644
index 00000000000..19b0a191a1b
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_rpl/r/rpl_missing_columns_sk_update.result
@@ -0,0 +1,62 @@
+include/master-slave.inc
+Warnings:
+Note #### Sending passwords in plain text without SSL/TLS is extremely insecure.
+Note #### Storing MySQL user name or password information in the master info repository is not secure and is therefore not recommended. Please consider using the USER and PASSWORD connection options for START SLAVE; see the 'START SLAVE Syntax' in the MySQL Manual for more information.
+[connection master]
+set @@sql_log_bin = 0;
+CREATE TABLE `t1` (
+`a` int(10) unsigned NOT NULL DEFAULT '0',
+`b` bigint(20) unsigned NOT NULL DEFAULT '0',
+`c` bigint(20) unsigned NOT NULL DEFAULT '0',
+`d` bigint(20) unsigned NOT NULL DEFAULT '0',
+`e` varbinary(64) DEFAULT NULL,
+`f` int(10) NOT NULL DEFAULT '0',
+`g` int(10) NOT NULL DEFAULT '0',
+`h` int(10) unsigned NOT NULL DEFAULT '0',
+PRIMARY KEY (`a`,`b`),
+KEY `key1` (`a`, `e`(1)),
+KEY `key2` (`a`,`h`)
+) ENGINE=RocksDB;
+set @@sql_log_bin = 1;
+set @@sql_log_bin = 0;
+CREATE TABLE `t1` (
+`a` int(10) unsigned NOT NULL DEFAULT '0',
+`b` bigint(20) unsigned NOT NULL DEFAULT '0',
+`c` bigint(20) unsigned NOT NULL DEFAULT '0',
+`d` bigint(20) unsigned NOT NULL DEFAULT '0',
+`e` varbinary(64) DEFAULT NULL,
+`f` int(10) NOT NULL DEFAULT '0',
+`g` int(10) NOT NULL DEFAULT '0',
+`x` TINYINT(3) UNSIGNED DEFAULT NULL,
+`y` INT(10) DEFAULT NULL,
+`h` int(10) unsigned NOT NULL DEFAULT '0',
+PRIMARY KEY (`a`,`b`),
+KEY `key1` (`a`, `e`(1)),
+KEY `key2` (`a`,`h`)
+) ENGINE=RocksDB;
+set @@sql_log_bin = 1;
+INSERT INTO t1 VALUES (1, 1, 1, 1, 'a', 1, 1, 1);
+SELECT * FROM t1;
+a b c d e f g h
+1 1 1 1 a 1 1 1
+SELECT * FROM t1;
+a b c d e f g x y h
+1 1 1 1 a 1 1 NULL NULL 1
+UPDATE t1 SET h = 10 WHERE h = 1;
+SELECT * FROM t1;
+a b c d e f g h
+1 1 1 1 a 1 1 10
+SELECT * FROM t1;
+a b c d e f g x y h
+1 1 1 1 a 1 1 NULL NULL 10
+SELECT COUNT(*) FROM t1 FORCE INDEX (key1) WHERE h = 10 AND a = 1;
+COUNT(*)
+1
+SELECT COUNT(*) FROM t1 FORCE INDEX (key2) WHERE h = 10 AND a = 1;
+COUNT(*)
+1
+SELECT COUNT(*) FROM t1 FORCE INDEX (PRIMARY) WHERE h = 10 AND a = 1;
+COUNT(*)
+1
+DROP TABLE t1;
+include/rpl_end.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb_rpl/r/rpl_mts_dependency_unique_key_conflicts.result b/storage/rocksdb/mysql-test/rocksdb_rpl/r/rpl_mts_dependency_unique_key_conflicts.result
new file mode 100644
index 00000000000..0eadc7deafe
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_rpl/r/rpl_mts_dependency_unique_key_conflicts.result
@@ -0,0 +1,44 @@
+include/master-slave.inc
+Warnings:
+Note #### Sending passwords in plain text without SSL/TLS is extremely insecure.
+Note #### Storing MySQL user name or password information in the master info repository is not secure and is therefore not recommended. Please consider using the USER and PASSWORD connection options for START SLAVE; see the 'START SLAVE Syntax' in the MySQL Manual for more information.
+[connection master]
+include/stop_slave.inc
+set @save.slave_parallel_workers= @@global.slave_parallel_workers;
+set @save.slave_use_idempotent_for_recovery= @@global.slave_use_idempotent_for_recovery;
+set @save.mts_dependency_replication= @@global.mts_dependency_replication;
+set @save.mts_dependency_order_commits= @@global.mts_dependency_order_commits;
+set @save.debug= @@global.debug;
+set @@global.slave_parallel_workers= 2;
+set @@global.slave_use_idempotent_for_recovery= YES;
+set @@global.mts_dependency_replication= STMT;
+set @@global.mts_dependency_order_commits= false;
+set @@global.debug= '+d,dbug.dep_wait_before_update_execution';
+include/start_slave.inc
+create table t1 (a int primary key, b int unique key) engine = rocksdb;
+insert into t1 values(1, 1);
+include/sync_slave_sql_with_master.inc
+include/stop_slave.inc
+update t1 set b = 2 where a = 1;
+insert into t1 values(2, 1);
+include/start_slave.inc
+set debug_sync="now wait_for signal.reached";
+select * from t1;
+a b
+1 1
+set debug_sync="now signal signal.done";
+include/sync_slave_sql_with_master.inc
+select * from t1;
+a b
+2 1
+1 2
+drop table t1;
+include/sync_slave_sql_with_master.inc
+include/stop_slave.inc
+set @@global.slave_parallel_workers= @save.slave_parallel_workers;
+set @@global.mts_dependency_replication= @save.mts_dependency_replication;
+set @@global.slave_use_idempotent_for_recovery= @save.slave_use_idempotent_for_recovery;
+set @@global.mts_dependency_order_commits= @save.mts_dependency_order_commits;
+set @@global.debug= @save.debug;
+include/start_slave.inc
+include/rpl_end.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb_rpl/r/rpl_no_unique_check_on_lag.result b/storage/rocksdb/mysql-test/rocksdb_rpl/r/rpl_no_unique_check_on_lag.result
new file mode 100644
index 00000000000..905b56dacb5
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_rpl/r/rpl_no_unique_check_on_lag.result
@@ -0,0 +1,34 @@
+#
+# Ensure skip_unique_check is set when lag exceeds lag_threshold
+#
+include/master-slave.inc
+Warnings:
+Note #### Sending passwords in plain text without SSL/TLS is extremely insecure.
+Note #### Storing MySQL user name or password information in the master info repository is not secure and is therefore not recommended. Please consider using the USER and PASSWORD connection options for START SLAVE; see the 'START SLAVE Syntax' in the MySQL Manual for more information.
+[connection master]
+call mtr.add_suppression("Slave SQL: Could not execute Write_rows event on table test.t1");
+call mtr.add_suppression(".*Worker.*failed executing transaction");
+call mtr.add_suppression(".*The slave coordinator and worker threads are stopped");
+drop table if exists t1;
+CREATE TABLE t1 (id int primary key, value int) engine=RocksDB;
+set global reset_seconds_behind_master=1;
+INSERT INTO t1 VALUES(1, 0);
+INSERT INTO t1 VALUES(2, 0);
+INSERT INTO t1 VALUES(3, 0);
+INSERT INTO t1 VALUES(1, 1);
+include/wait_for_slave_sql_error_and_skip.inc [errno=1062]
+set global reset_seconds_behind_master=0;
+include/stop_slave_io.inc
+INSERT INTO t1 values (4,0);
+INSERT INTO t1 VALUES(2, 1);
+include/start_slave_io.inc
+set global reset_seconds_behind_master=1;
+insert into t1 values (5,0);
+INSERT INTO t1 VALUES(3, 1);
+include/wait_for_slave_sql_error_and_skip.inc [errno=1062]
+#
+# Cleanup
+#
+DROP TABLE t1;
+set global reset_seconds_behind_master=1;
+include/rpl_end.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb_rpl/r/rpl_no_unique_check_on_lag_mts.result b/storage/rocksdb/mysql-test/rocksdb_rpl/r/rpl_no_unique_check_on_lag_mts.result
new file mode 100644
index 00000000000..6c58cb16fed
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_rpl/r/rpl_no_unique_check_on_lag_mts.result
@@ -0,0 +1,31 @@
+include/master-slave.inc
+Warnings:
+Note #### Sending passwords in plain text without SSL/TLS is extremely insecure.
+Note #### Storing MySQL user name or password information in the master info repository is not secure and is therefore not recommended. Please consider using the USER and PASSWORD connection options for START SLAVE; see the 'START SLAVE Syntax' in the MySQL Manual for more information.
+[connection master]
+call mtr.add_suppression("Slave SQL: Could not execute Write_rows event on table test.t1");
+call mtr.add_suppression(".*Worker.*failed executing transaction");
+call mtr.add_suppression(".*The slave coordinator and worker threads are stopped");
+drop table if exists t1;
+CREATE TABLE t1 (id int primary key, value int) engine=RocksDB;
+set global reset_seconds_behind_master=1;
+INSERT INTO t1 VALUES(1, 0);
+INSERT INTO t1 VALUES(2, 0);
+INSERT INTO t1 VALUES(3, 0);
+INSERT INTO t1 VALUES(1, 1);
+include/wait_for_slave_sql_error_and_skip.inc [errno=1062]
+set global reset_seconds_behind_master=0;
+include/stop_slave_io.inc
+INSERT INTO t1 values (4,0);
+INSERT INTO t1 VALUES(2, 1);
+include/start_slave_io.inc
+set global reset_seconds_behind_master=1;
+insert into t1 values (5,0);
+INSERT INTO t1 VALUES(3, 1);
+include/wait_for_slave_sql_error_and_skip.inc [errno=1062]
+#
+# Cleanup
+#
+DROP TABLE t1;
+set global reset_seconds_behind_master=1;
+include/rpl_end.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb_rpl/r/rpl_rocksdb_2pc_crash_recover.result b/storage/rocksdb/mysql-test/rocksdb_rpl/r/rpl_rocksdb_2pc_crash_recover.result
new file mode 100644
index 00000000000..cf3fe03a305
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_rpl/r/rpl_rocksdb_2pc_crash_recover.result
@@ -0,0 +1,44 @@
+DROP TABLE IF EXISTS t1;
+create table t1 (a int primary key, msg varchar(255)) engine=rocksdb;
+SET GLOBAL ROCKSDB_ENABLE_2PC = ON;
+SET SESSION debug_dbug="d,crash_commit_after_prepare";
+insert into t1 values (1, 'dogz');
+select * from t1;
+a msg
+SET GLOBAL ROCKSDB_ENABLE_2PC = ON;
+SET SESSION debug_dbug="d,crash_commit_after_log";
+insert into t1 values (2, 'catz'), (3, 'men');
+select * from t1;
+a msg
+2 catz
+3 men
+SET GLOBAL ROCKSDB_ENABLE_2PC = ON;
+SET SESSION debug_dbug="d,crash_commit_after";
+insert into t1 values (4, 'cars'), (5, 'foo');
+select * from t1;
+a msg
+2 catz
+3 men
+4 cars
+5 foo
+SET GLOBAL ROCKSDB_ENABLE_2PC = OFF;
+SET SESSION debug_dbug="d,crash_commit_after_log";
+insert into t1 values (6, 'shipz'), (7, 'tankz');
+select * from t1;
+a msg
+2 catz
+3 men
+4 cars
+5 foo
+SET GLOBAL ROCKSDB_ENABLE_2PC = OFF;
+SET SESSION debug_dbug="d,crash_commit_after";
+insert into t1 values (8, 'space'), (9, 'time');
+select * from t1;
+a msg
+2 catz
+3 men
+4 cars
+5 foo
+8 space
+9 time
+drop table t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb_rpl/r/rpl_rocksdb_slave_gtid_info_optimized.result b/storage/rocksdb/mysql-test/rocksdb_rpl/r/rpl_rocksdb_slave_gtid_info_optimized.result
new file mode 100644
index 00000000000..1f6acf32872
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_rpl/r/rpl_rocksdb_slave_gtid_info_optimized.result
@@ -0,0 +1,43 @@
+include/master-slave.inc
+Warnings:
+Note #### Sending passwords in plain text without SSL/TLS is extremely insecure.
+Note #### Storing MySQL user name or password information in the master info repository is not secure and is therefore not recommended. Please consider using the USER and PASSWORD connection options for START SLAVE; see the 'START SLAVE Syntax' in the MySQL Manual for more information.
+[connection master]
+Make changes in master
+create table test1 (a int primary key, b int) engine=rocksdb;
+insert into test1 values (1, 1);
+Make sure slave is up-to-date and mysql.slave_gtid_info is good
+select * from test1;
+a b
+1 1
+select id, database_name, last_gtid from mysql.slave_gtid_info;
+id database_name last_gtid
+1 test UUID:2
+Make changes in master
+insert into test1 values (2, 2);
+Make sure slave is up-to-date and mysql.slave_gtid_info is good
+select @@slave_gtid_info;
+@@slave_gtid_info
+OPTIMIZED
+select * from test1;
+a b
+1 1
+2 2
+select * from mysql.slave_gtid_info;
+Id Database_name Last_gtid
+1 test UUID:3
+Make changes in master
+insert into test1 values (3, 3);
+insert into test1 values (4, 4);
+Make sure slave is up-to-date and mysql.slave_gtid_info is good
+select * from test1;
+a b
+1 1
+2 2
+3 3
+4 4
+select id, database_name, last_gtid from mysql.slave_gtid_info;
+id database_name last_gtid
+1 test UUID:5
+DROP TABLE IF EXISTS test1;
+include/rpl_end.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb_rpl/r/rpl_rocksdb_snapshot.result b/storage/rocksdb/mysql-test/rocksdb_rpl/r/rpl_rocksdb_snapshot.result
new file mode 100644
index 00000000000..f5e861feddc
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_rpl/r/rpl_rocksdb_snapshot.result
@@ -0,0 +1,222 @@
+include/master-slave.inc
+Warnings:
+Note #### Sending passwords in plain text without SSL/TLS is extremely insecure.
+Note #### Storing MySQL user name or password information in the master info repository is not secure and is therefore not recommended. Please consider using the USER and PASSWORD connection options for START SLAVE; see the 'START SLAVE Syntax' in the MySQL Manual for more information.
+[connection master]
+DROP TABLE IF EXISTS t1;
+# Establish connection con1 (user=root)
+# Establish connection con2 (user=root)
+# Establish connection con3 (user=root)
+# Establish connection con4 (user=root)
+# reset replication to guarantee that master-bin.000001 is used
+include/stop_slave.inc
+RESET SLAVE;
+RESET MASTER;
+RESET MASTER;
+CHANGE MASTER TO master_host="127.0.0.1",master_port=MASTER_PORT,master_user="root";
+Warnings:
+Note 1759 Sending passwords in plain text without SSL/TLS is extremely insecure.
+Note 1760 Storing MySQL user name or password information in the master info repository is not secure and is therefore not recommended. Please consider using the USER and PASSWORD connection options for START SLAVE; see the 'START SLAVE Syntax' in the MySQL Manual for more information.
+include/start_slave.inc
+# Switch to connection con1
+CREATE TABLE t1 (a INT PRIMARY KEY) ENGINE=rocksdb;
+INSERT INTO t1 VALUES(1);
+SET TRANSACTION ISOLATION LEVEL READ COMMITTED;
+START TRANSACTION WITH CONSISTENT SNAPSHOT;
+ERROR HY000: Only REPEATABLE READ isolation level is supported for START TRANSACTION WITH CONSISTENT|SHARED|EXISTING SNAPSHOT in RocksDB Storage Engine.
+START TRANSACTION WITH CONSISTENT ROCKSDB SNAPSHOT;
+ERROR HY000: Only REPEATABLE READ isolation level is supported for START TRANSACTION WITH CONSISTENT|SHARED|EXISTING SNAPSHOT in RocksDB Storage Engine.
+ROLLBACK;
+SET TRANSACTION ISOLATION LEVEL REPEATABLE READ;
+START TRANSACTION WITH CONSISTENT ROCKSDB SNAPSHOT;
+File Position Gtid_executed
+master-bin.000001 531 UUID:1-2
+# Switch to connection con2
+INSERT INTO t1 VALUES(2);
+INSERT INTO t1 VALUES(3);
+# Switch to connection con1
+SELECT * FROM t1;
+a
+1
+COMMIT;
+SELECT * FROM t1;
+a
+1
+2
+3
+DROP TABLE t1;
+# Switch to connection con1
+CREATE TABLE t1 (a INT PRIMARY KEY) ENGINE=rocksdb;
+INSERT INTO t1 VALUES(1);
+START TRANSACTION WITH CONSISTENT ROCKSDB SNAPSHOT;
+File Position Gtid_executed
+master-bin.000001 1510 UUID:1-7
+START TRANSACTION WITH CONSISTENT ROCKSDB SNAPSHOT;
+File Position Gtid_executed
+master-bin.000001 1510 UUID:1-7
+START TRANSACTION WITH CONSISTENT ROCKSDB SNAPSHOT;
+File Position Gtid_executed
+master-bin.000001 1510 UUID:1-7
+START TRANSACTION WITH CONSISTENT ROCKSDB SNAPSHOT;
+File Position Gtid_executed
+master-bin.000001 1510 UUID:1-7
+# Switch to connection con2
+INSERT INTO t1 VALUES(2);
+INSERT INTO t1 VALUES(3);
+# Switch to connection con1
+SELECT * FROM t1;
+a
+1
+SELECT * INTO OUTFILE '<MYSQLTEST_VARDIR>/tmp/rpl_rocksdb_snapshot.out.file' FROM t1;
+COMMIT;
+# Switch to slave
+CREATE TABLE t1_backup LIKE t1;
+INSERT INTO t1_backup SELECT * FROM t1;
+include/stop_slave.inc
+RESET SLAVE;
+RESET MASTER;
+DELETE FROM t1;
+LOAD DATA INFILE '<MYSQLTEST_VARDIR>/tmp/rpl_rocksdb_snapshot.out.file' INTO TABLE t1;
+SELECT * FROM t1;
+a
+1
+CHANGE MASTER TO master_host="127.0.0.1",master_port=MASTER_PORT,master_user="root",master_log_file="master-bin.000001",master_log_pos=binlog_pos;
+Warnings:
+Note 1759 Sending passwords in plain text without SSL/TLS is extremely insecure.
+Note 1760 Storing MySQL user name or password information in the master info repository is not secure and is therefore not recommended. Please consider using the USER and PASSWORD connection options for START SLAVE; see the 'START SLAVE Syntax' in the MySQL Manual for more information.
+include/start_slave.inc
+SELECT * FROM t1;
+a
+1
+2
+3
+SELECT * FROM t1_backup;
+a
+1
+2
+3
+DROP TABLE t1_backup;
+DROP TABLE t1;
+# Switch to connection con1
+CREATE TABLE t1 (a INT PRIMARY KEY) ENGINE=rocksdb;
+INSERT INTO t1 VALUES(1);
+# async queries from con2
+INSERT INTO t1 VALUES(2);
+# async queries from con3
+INSERT INTO t1 VALUES(21);
+# Switch to connection con1
+# Switch to connection con4
+INSERT INTO t1 VALUES(9);
+# Switch to connection con1
+SELECT * INTO OUTFILE '<MYSQLTEST_VARDIR>/tmp/rpl_rocksdb_snapshot.out.file' FROM t1;
+COMMIT;
+# reap async statements
+# Switch to slave
+CREATE TABLE t1_backup LIKE t1;
+INSERT INTO t1_backup SELECT * FROM t1;
+include/stop_slave.inc
+RESET SLAVE;
+RESET MASTER;
+DELETE FROM t1;
+LOAD DATA INFILE '<MYSQLTEST_VARDIR>/tmp/rpl_rocksdb_snapshot.out.file' INTO TABLE t1;
+CHANGE MASTER TO master_host="127.0.0.1",master_port=MASTER_PORT,master_user="root",master_log_file="master-bin.000001",master_log_pos=binlog_pos;
+Warnings:
+Note 1759 Sending passwords in plain text without SSL/TLS is extremely insecure.
+Note 1760 Storing MySQL user name or password information in the master info repository is not secure and is therefore not recommended. Please consider using the USER and PASSWORD connection options for START SLAVE; see the 'START SLAVE Syntax' in the MySQL Manual for more information.
+include/start_slave.inc
+# sync and then query slave
+ShouldBeZero
+0
+DROP TABLE t1_backup;
+DROP TABLE t1;
+# Switch to connection con1
+CREATE TABLE t1 (a INT PRIMARY KEY) ENGINE=rocksdb;
+INSERT INTO t1 VALUES(1);
+START TRANSACTION WITH CONSISTENT ROCKSDB SNAPSHOT;
+File Position Gtid_executed
+master-bin.000001 3688 UUID:1-18
+START TRANSACTION WITH CONSISTENT ROCKSDB SNAPSHOT;
+File Position Gtid_executed
+master-bin.000001 3688 UUID:1-18
+START TRANSACTION WITH CONSISTENT ROCKSDB SNAPSHOT;
+File Position Gtid_executed
+master-bin.000001 3688 UUID:1-18
+START TRANSACTION WITH CONSISTENT ROCKSDB SNAPSHOT;
+File Position Gtid_executed
+master-bin.000001 3688 UUID:1-18
+# Switch to connection con2
+INSERT INTO t1 VALUES(2);
+INSERT INTO t1 VALUES(3);
+# Switch to connection con1
+SELECT * FROM t1;
+a
+1
+SELECT * INTO OUTFILE '<MYSQLTEST_VARDIR>/tmp/rpl_rocksdb_snapshot.out.file' FROM t1;
+COMMIT;
+# Switch to slave
+CREATE TABLE t1_backup LIKE t1;
+INSERT INTO t1_backup SELECT * FROM t1;
+include/stop_slave.inc
+RESET SLAVE;
+RESET MASTER;
+SET @@global.gtid_purged='gtid_executed_from_snapshot';
+DELETE FROM t1;
+LOAD DATA INFILE '<MYSQLTEST_VARDIR>/tmp/rpl_rocksdb_snapshot.out.file' INTO TABLE t1;
+SELECT * FROM t1;
+a
+1
+CHANGE MASTER TO master_host="127.0.0.1",master_port=MASTER_PORT,master_user="root", master_auto_position=1;
+Warnings:
+Note 1759 Sending passwords in plain text without SSL/TLS is extremely insecure.
+Note 1760 Storing MySQL user name or password information in the master info repository is not secure and is therefore not recommended. Please consider using the USER and PASSWORD connection options for START SLAVE; see the 'START SLAVE Syntax' in the MySQL Manual for more information.
+include/start_slave.inc
+SELECT * FROM t1;
+a
+1
+2
+3
+SELECT * FROM t1_backup;
+a
+1
+2
+3
+DROP TABLE t1_backup;
+DROP TABLE t1;
+# Switch to connection con1
+CREATE TABLE t1 (a INT PRIMARY KEY) ENGINE=rocksdb;
+INSERT INTO t1 VALUES(1);
+# async queries from con2
+INSERT INTO t1 VALUES(2);
+# async queries from con3
+INSERT INTO t1 VALUES(21);
+# Switch to connection con1
+# Switch to connection con4
+INSERT INTO t1 VALUES(9);
+# Switch to connection con1
+SELECT * INTO OUTFILE '<MYSQLTEST_VARDIR>/tmp/rpl_rocksdb_snapshot.out.file' FROM t1;
+COMMIT;
+# reap async statements
+# Switch to slave
+CREATE TABLE t1_backup LIKE t1;
+INSERT INTO t1_backup SELECT * FROM t1;
+include/stop_slave.inc
+RESET SLAVE;
+RESET MASTER;
+SET @@global.gtid_purged='gtid_executed_from_snapshot';
+DELETE FROM t1;
+LOAD DATA INFILE '<MYSQLTEST_VARDIR>/tmp/rpl_rocksdb_snapshot.out.file' INTO TABLE t1;
+CHANGE MASTER TO master_host="127.0.0.1",master_port=MASTER_PORT,master_user="root", master_auto_position=1;
+Warnings:
+Note 1759 Sending passwords in plain text without SSL/TLS is extremely insecure.
+Note 1760 Storing MySQL user name or password information in the master info repository is not secure and is therefore not recommended. Please consider using the USER and PASSWORD connection options for START SLAVE; see the 'START SLAVE Syntax' in the MySQL Manual for more information.
+include/start_slave.inc
+# sync and then query slave
+ShouldBeZero
+0
+DROP TABLE t1_backup;
+DROP TABLE t1;
+# Switch to connection default + close connections con1 and con2
+include/stop_slave.inc
+CHANGE MASTER to master_auto_position=0;
+include/start_slave.inc
+include/rpl_end.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb_rpl/r/rpl_rocksdb_snapshot_without_gtid.result b/storage/rocksdb/mysql-test/rocksdb_rpl/r/rpl_rocksdb_snapshot_without_gtid.result
new file mode 100644
index 00000000000..57c1d0822c9
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_rpl/r/rpl_rocksdb_snapshot_without_gtid.result
@@ -0,0 +1,15 @@
+include/master-slave.inc
+Warnings:
+Note #### Sending passwords in plain text without SSL/TLS is extremely insecure.
+Note #### Storing MySQL user name or password information in the master info repository is not secure and is therefore not recommended. Please consider using the USER and PASSWORD connection options for START SLAVE; see the 'START SLAVE Syntax' in the MySQL Manual for more information.
+[connection master]
+create table t1(a int primary key);
+FLUSH LOGS;
+insert into t1 values(1);
+insert into t1 values(2);
+FLUSH LOGS;
+START TRANSACTION WITH CONSISTENT ROCKSDB SNAPSHOT;
+File Position Gtid_executed
+master-bin.000003 120
+drop table t1;
+include/rpl_end.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb_rpl/r/rpl_rocksdb_stress_crash.result b/storage/rocksdb/mysql-test/rocksdb_rpl/r/rpl_rocksdb_stress_crash.result
new file mode 100644
index 00000000000..d4920b14705
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_rpl/r/rpl_rocksdb_stress_crash.result
@@ -0,0 +1,28 @@
+include/master-slave.inc
+Warnings:
+Note #### Sending passwords in plain text without SSL/TLS is extremely insecure.
+Note #### Storing MySQL user name or password information in the master info repository is not secure and is therefore not recommended. Please consider using the USER and PASSWORD connection options for START SLAVE; see the 'START SLAVE Syntax' in the MySQL Manual for more information.
+[connection master]
+call mtr.add_suppression(".*");
+include/stop_slave.inc
+change master to master_auto_position=1;
+include/start_slave.inc
+call mtr.add_suppression('Slave: Error dropping database');
+stop slave sql_thread;
+insert into test0.benchmark set state='slave is processing load';
+start slave sql_thread;
+use test0;
+insert into benchmark set state='slave ends load';
+use test;
+select * from test1.benchmark into outfile 'benchmark.out';
+select ts from test0.benchmark where state like 'master started load' into @m_0;
+select ts from test0.benchmark where state like 'master ends load' into @m_1;
+select ts from test0.benchmark where state like 'slave takes on load' into @s_m0;
+select ts from test0.benchmark where state like 'slave is supposed to finish with load' into @s_m1;
+select ts from test0.benchmark where state like 'slave ends load' into @s_1;
+select ts from test0.benchmark where state like 'slave is processing load' into @s_0;
+select time_to_sec(@m_1) - time_to_sec(@m_0) as 'delta.out';
+include/stop_slave.inc
+change master to master_auto_position=0;
+include/start_slave.inc
+include/rpl_end.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb_rpl/r/rpl_skip_trx_api_binlog_format.result b/storage/rocksdb/mysql-test/rocksdb_rpl/r/rpl_skip_trx_api_binlog_format.result
new file mode 100644
index 00000000000..5559bf6168c
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_rpl/r/rpl_skip_trx_api_binlog_format.result
@@ -0,0 +1,27 @@
+include/master-slave.inc
+Warnings:
+Note #### Sending passwords in plain text without SSL/TLS is extremely insecure.
+Note #### Storing MySQL user name or password information in the master info repository is not secure and is therefore not recommended. Please consider using the USER and PASSWORD connection options for START SLAVE; see the 'START SLAVE Syntax' in the MySQL Manual for more information.
+[connection master]
+call mtr.add_suppression("Master's binlog format is not ROW but rpl_skip_tx_api is enabled on the slave");
+set global rpl_skip_tx_api=ON;
+set global rocksdb_unsafe_for_binlog=1;
+create table t1(a int);
+set session binlog_format=STATEMENT;
+insert into t1 values(1);
+include/wait_for_slave_sql_error.inc [errno=1756]
+Last_SQL_Error = 'Master's binlog format is not ROW but rpl_skip_tx_api is enabled on the slave, this should only be used when master's binlog format is ROW.'
+"Table after error"
+select * from t1;
+a
+set global rpl_skip_tx_api=OFF;
+include/start_slave.inc
+include/sync_slave_sql_with_master.inc
+"Table after error fixed"
+select * from t1;
+a
+1
+drop table t1;
+set global rocksdb_unsafe_for_binlog=0;
+set global rpl_skip_tx_api=0;
+include/rpl_end.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb_rpl/r/singledelete_idempotent_recovery.result b/storage/rocksdb/mysql-test/rocksdb_rpl/r/singledelete_idempotent_recovery.result
new file mode 100644
index 00000000000..89e93f6b8f0
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_rpl/r/singledelete_idempotent_recovery.result
@@ -0,0 +1,25 @@
+include/master-slave.inc
+Warnings:
+Note #### Sending passwords in plain text without SSL/TLS is extremely insecure.
+Note #### Storing MySQL user name or password information in the master info repository is not secure and is therefore not recommended. Please consider using the USER and PASSWORD connection options for START SLAVE; see the 'START SLAVE Syntax' in the MySQL Manual for more information.
+[connection master]
+call mtr.add_suppression("Recovery from master pos");
+drop table if exists r1;
+create table r1 (id1 int, id2 int, primary key (id1, id2), index i (id2)) engine=rocksdb;
+insert into r1 values (1, 1000);
+set global rocksdb_force_flush_memtable_now=1;
+include/rpl_start_server.inc [server_number=2]
+include/start_slave.inc
+insert into r1 values (2,2000);
+delete r1 from r1 force index (i) where id2=1000;
+select id1,id2 from r1 force index (primary) where id1=1 and id2=1000;
+id1 id2
+select id2 from r1 force index (i) where id1=1 and id2=1000;
+id2
+set global rocksdb_compact_cf='default';
+select id1,id2 from r1 force index (primary) where id1=1 and id2=1000;
+id1 id2
+select id2 from r1 force index (i) where id1=1 and id2=1000;
+id2
+drop table r1;
+include/rpl_end.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb_rpl/r/singledelete_idempotent_table.result b/storage/rocksdb/mysql-test/rocksdb_rpl/r/singledelete_idempotent_table.result
new file mode 100644
index 00000000000..979e2cbf6c3
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_rpl/r/singledelete_idempotent_table.result
@@ -0,0 +1,29 @@
+include/master-slave.inc
+[connection master]
+connection master;
+drop table if exists r1;
+create table r1 (id1 int, id2 int, primary key (id1, id2), index i (id2)) engine=rocksdb;
+insert into r1 values (1, 1000);
+set sql_log_bin=0;
+delete from r1 where id1=1 and id2=1000;
+set sql_log_bin=1;
+connection slave;
+connection slave;
+set global rocksdb_force_flush_memtable_now=1;
+connection master;
+insert into r1 values (1, 1000);
+connection slave;
+connection slave;
+delete r1 from r1 force index (i) where id2=1000;
+select id1,id2 from r1 force index (primary);
+id1 id2
+select id2 from r1 force index (i);
+id2
+set global rocksdb_compact_cf='default';
+select id1,id2 from r1 force index (primary);
+id1 id2
+select id2 from r1 force index (i);
+id2
+connection master;
+drop table r1;
+include/rpl_end.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb_rpl/rpl_1slave_base.cnf b/storage/rocksdb/mysql-test/rocksdb_rpl/rpl_1slave_base.cnf
new file mode 100644
index 00000000000..ed8c77bcc0b
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_rpl/rpl_1slave_base.cnf
@@ -0,0 +1,51 @@
+# Use default setting for mysqld processes
+!include include/default_mysqld.cnf
+!include include/default_client.cnf
+
+[mysqld.1]
+
+# Run the master.sh script before starting this process
+#!run-master-sh
+
+log-bin= master-bin
+
+loose-innodb
+
+[mysqld.2]
+# Run the slave.sh script before starting this process
+#!run-slave-sh
+
+# Append <testname>-slave.opt file to the list of argument used when
+# starting the mysqld
+#!use-slave-opt
+innodb_use_native_aio = 0
+
+log-bin= slave-bin
+relay-log= slave-relay-bin
+
+log-slave-updates
+master-retry-count= 10
+
+# Values reported by slave when it connect to master
+# and shows up in SHOW SLAVE STATUS;
+report-host= 127.0.0.1
+report-port= @mysqld.2.port
+report-user= root
+
+skip-slave-start
+
+# Directory where slaves find the dumps generated by "load data"
+# on the server. The path need to have constant length otherwise
+# test results will vary, thus a relative path is used.
+slave-load-tmpdir= ../../tmp
+
+loose-innodb
+
+
+[ENV]
+MASTER_MYPORT= @mysqld.1.port
+MASTER_MYSOCK= @mysqld.1.socket
+
+SLAVE_MYPORT= @mysqld.2.port
+SLAVE_MYSOCK= @mysqld.2.socket
+
diff --git a/storage/rocksdb/mysql-test/rocksdb_rpl/suite.opt b/storage/rocksdb/mysql-test/rocksdb_rpl/suite.opt
new file mode 100644
index 00000000000..f5dc0ce891c
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_rpl/suite.opt
@@ -0,0 +1,2 @@
+--ignore-db-dirs=.rocksdb --plugin-load=$HA_ROCKSDB_SO --default-storage-engine=rocksdb
+
diff --git a/storage/rocksdb/mysql-test/rocksdb_rpl/suite.pm b/storage/rocksdb/mysql-test/rocksdb_rpl/suite.pm
new file mode 100644
index 00000000000..1cc123b2a2a
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_rpl/suite.pm
@@ -0,0 +1,25 @@
+package My::Suite::Rocksdb;
+
+#
+# Note: ../rocksdb_sys_vars/suite.pm file has a similar
+# function. If you modify this file, consider modifying that one, too.
+#
+@ISA = qw(My::Suite);
+use My::Find;
+use File::Basename;
+use strict;
+
+#sub is_default { not $::opt_embedded_server }
+
+my $sst_dump=
+::mtr_exe_maybe_exists(
+ "$::bindir/storage/rocksdb$::opt_vs_config/sst_dump",
+ "$::path_client_bindir/sst_dump");
+return "RocksDB is not compiled, no sst_dump" unless $sst_dump;
+$ENV{MARIAROCKS_SST_DUMP}="$sst_dump";
+
+# Temporarily disable testing under valgrind, due to MDEV-12439
+return "RocksDB tests disabled under valgrind" if ($::opt_valgrind);
+
+bless { };
+
diff --git a/storage/rocksdb/mysql-test/rocksdb_rpl/t/consistent_snapshot_mixed_engines-master.opt b/storage/rocksdb/mysql-test/rocksdb_rpl/t/consistent_snapshot_mixed_engines-master.opt
new file mode 100644
index 00000000000..c747adc94d5
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_rpl/t/consistent_snapshot_mixed_engines-master.opt
@@ -0,0 +1 @@
+--gtid_mode=ON --enforce_gtid_consistency --log_bin --log_slave_updates
diff --git a/storage/rocksdb/mysql-test/rocksdb_rpl/t/consistent_snapshot_mixed_engines.test b/storage/rocksdb/mysql-test/rocksdb_rpl/t/consistent_snapshot_mixed_engines.test
new file mode 100644
index 00000000000..acea1903c05
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_rpl/t/consistent_snapshot_mixed_engines.test
@@ -0,0 +1,81 @@
+--source include/have_log_bin.inc
+--source include/have_rocksdb.inc
+--source include/have_innodb.inc
+--enable_connect_log
+-- let $uuid = `select @@server_uuid;`
+
+# Save the initial number of concurrent sessions
+--source include/count_sessions.inc
+
+--disable_warnings
+DROP TABLE IF EXISTS t1;
+--enable_warnings
+
+connect (con1,localhost,root,,);
+connect (con2,localhost,root,,);
+
+connection con1;
+create table i1 (id int primary key , value int) engine=innodb;
+create table r1 (id int primary key , value int) engine=rocksdb;
+
+
+SET SESSION TRANSACTION ISOLATION LEVEL REPEATABLE READ;
+
+# Without setting engine, this takes both InnoDB and RocksDB snapshots
+-- replace_result $uuid uuid
+START TRANSACTION WITH CONSISTENT SNAPSHOT;
+
+connection con2;
+insert into i1 values (1,1);
+insert into r1 values (1,1);
+
+connection con1;
+select * from i1;
+select * from r1;
+
+# This takes RocksDB snapshot only but both InnoDB participates in transaction.
+-- replace_result $uuid uuid
+START TRANSACTION WITH CONSISTENT ROCKSDB SNAPSHOT;
+
+connection con2;
+insert into i1 values (2,2);
+insert into r1 values (2,2);
+
+connection con1;
+# takes InnoDB snapshot here so changes after that not visible
+select * from i1;
+select * from r1;
+
+connection con2;
+insert into i1 values (3,2);
+insert into r1 values (3,2);
+
+connection con1;
+select * from i1;
+select * from r1;
+
+# RocksDB also partipates in transaction
+-- replace_result $uuid uuid
+START TRANSACTION WITH CONSISTENT INNODB SNAPSHOT;
+
+connection con2;
+insert into r1 values (4,4);
+
+connection con1;
+# takes RocksDB snapshot here so changes after that are not visible
+select * from r1;
+
+connection con2;
+insert into r1 values (5,5);
+
+connection con1;
+select * from r1;
+
+drop table i1;
+drop table r1;
+
+connection default;
+disconnect con1;
+disconnect con2;
+reset master;
+--source include/wait_until_count_sessions.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb_rpl/t/disabled.def b/storage/rocksdb/mysql-test/rocksdb_rpl/t/disabled.def
new file mode 100644
index 00000000000..2147e3e086d
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_rpl/t/disabled.def
@@ -0,0 +1,34 @@
+##
+## Tests that require FB/MySQL specific features for which there are
+## no plans to port them to MariaDB
+##
+rpl_no_unique_check_on_lag : unique_check_lag_threshold is not available in MariaDB
+rpl_no_unique_check_on_lag_mts : unique_check_lag_threshold is not available in MariaDB
+consistent_snapshot_mixed_engines : Tests START TRANSACTION WITH CONSISTENT $ENGINE_NAME SNAPSHOT
+rpl_skip_trx_api_binlog_format : requires @@rpl_skip_tx_api
+rpl_ddl_high_priority : DDL commands with HIGH_PRIORITY syntax are not in MariaDB
+rpl_gtid_rocksdb_sys_header : MariaDB doesn't support printing "RocksDB: Last MySQL Gtid UUID" into server stderr on startup
+singledelete_idempotent_recovery: MariaDB doesn't support --slave-use-idempotent-for-recovery
+rpl_mts_dependency_unique_key_conflicts: MariaDB doesn't support --slave-use-idempotent-for-recovery
+rpl_missing_columns_sk_update : Uses log_column_names=ON feature which is only present in FB/MySQL
+optimize_myrocks_replace_into: requires @@enable_blind_replace support.
+rpl_gtid_crash_safe_optimized: requires slave_gtid_info=optimized
+
+##
+## Tests that do not fit MariaDB's test environment (Functional tests only,
+## can't have stress tests)
+##
+rpl_rocksdb_stress_crash : Stress test
+
+##
+## Tests that are disabled for other reasons
+##
+
+multiclient_2pc : Didn't try with MariaDB, yet
+rpl_crash_safe_wal_corrupt : Didn't try with MariaDB, yet
+rpl_gtid_crash_safe : Didn't try with MariaDB, yet
+rpl_gtid_crash_safe_wal_corrupt : Didn't try with MariaDB, yet
+rpl_rocksdb_snapshot : Didn't try with MariaDB, yet
+rpl_rocksdb_snapshot_without_gtid : Didn't try with MariaDB, yet
+rpl_rocksdb_slave_gtid_info_optimized: requires slave-gtid-info=optimized which is an FB/MySQL-only feature
+rocksdb_slave_check_before_image_consistency: requires slave_check_before_image_consistency feature
diff --git a/storage/rocksdb/mysql-test/rocksdb_rpl/t/multiclient_2pc-master.opt b/storage/rocksdb/mysql-test/rocksdb_rpl/t/multiclient_2pc-master.opt
new file mode 100644
index 00000000000..c747adc94d5
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_rpl/t/multiclient_2pc-master.opt
@@ -0,0 +1 @@
+--gtid_mode=ON --enforce_gtid_consistency --log_bin --log_slave_updates
diff --git a/storage/rocksdb/mysql-test/rocksdb_rpl/t/multiclient_2pc.test b/storage/rocksdb/mysql-test/rocksdb_rpl/t/multiclient_2pc.test
new file mode 100644
index 00000000000..ff484171213
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_rpl/t/multiclient_2pc.test
@@ -0,0 +1,77 @@
+--source include/have_rocksdb.inc
+--source include/have_binlog_format_row.inc
+--source include/have_debug.inc
+--source include/have_debug_sync.inc
+--source include/big_test.inc
+# The test involves a crash which does not seem to be handled well with
+# mysql-test/lib/My/SafeProcess/my_safe_process under valgrind as it hangs
+# forever. The test did not mean to verify the memory leaks so not much
+# coverage should be missed by not running it under valgrind.
+--source include/not_valgrind.inc
+
+--exec echo > $MYSQLTEST_VARDIR/log/mysqld.1.err
+
+--disable_warnings
+DROP TABLE IF EXISTS t1;
+--enable_warnings
+
+# Set it to the minimum so that we can make the binlog rotate with a few inserts
+SET GLOBAL MAX_BINLOG_SIZE = 4096;
+SET GLOBAL ROCKSDB_ENABLE_2PC = ON;
+create table t1 (a int primary key, b int, c varchar(255)) engine=rocksdb;
+
+connect (con1, localhost, root,,);
+connect (con2, localhost, root,,);
+
+# On connection one we insert a row and pause after prepare marker is written to
+# WAL. Connection two then inserts many rows to rotate the binlog. After
+# connection two completes, connection one continues only to crash before commit
+# but after binlog write. On crash recovery we see that connection one's value
+# has been recovered and commited
+connection con1;
+--echo 'con1'
+--exec echo "restart" > $MYSQLTEST_VARDIR/tmp/mysqld.1.expect
+SET SESSION debug="d,crash_commit_after_log";
+SET DEBUG_SYNC='rocksdb.prepared SIGNAL parked WAIT_FOR go';
+--error 0,2013
+--send insert into t1 values (1, 1, "iamtheogthealphaandomega");
+
+connection con2;
+--echo 'con2'
+insert into t1 values (2, 1, "i_am_just_here_to_trigger_a_flush");
+
+# Disable 2PC and syncing for faster inserting of dummy rows
+# These rows only purpose is to rotate the binlog
+SET GLOBAL ROCKSDB_FLUSH_LOG_AT_TRX_COMMIT = 0;
+SET GLOBAL SYNC_BINLOG = 0;
+
+SET DEBUG_SYNC='now WAIT_FOR parked';
+--disable_query_log
+--let $pk= 3
+# binlog size is 4096 bytes so with that many insertion it will definitely rotate
+while ($pk < 4096) {
+ eval insert into t1 values ($pk, 1, "foobardatagoesheresothatmorelogsrollwhichiswhatwewant");
+ --inc $pk
+}
+--enable_query_log
+
+# re-enable 2PC an syncing then write to trigger a flush
+# before we trigger the crash to simulate full-durability
+SET GLOBAL ROCKSDB_FLUSH_LOG_AT_TRX_COMMIT = 2;
+SET GLOBAL SYNC_BINLOG = 1;
+
+insert into t1 values (1000000, 1, "i_am_just_here_to_trigger_a_flush");
+
+--error 0,2013
+SET DEBUG_SYNC='now SIGNAL go';
+--source include/wait_until_disconnected.inc
+--enable_reconnect
+--source include/wait_until_connected_again.inc
+--disable_reconnect
+
+--exec python suite/rocksdb/t/check_log_for_xa.py $MYSQLTEST_VARDIR/log/mysqld.1.err commit,prepare,rollback
+
+select * from t1 where a=1;
+select count(*) from t1;
+
+drop table t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb_rpl/t/optimize_myrocks_replace_into.test b/storage/rocksdb/mysql-test/rocksdb_rpl/t/optimize_myrocks_replace_into.test
new file mode 100644
index 00000000000..82b231d489a
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_rpl/t/optimize_myrocks_replace_into.test
@@ -0,0 +1,149 @@
+--source include/have_rocksdb.inc
+--source include/master-slave.inc
+--source include/have_debug.inc
+
+connection master;
+SET @prior_rocksdb_perf_context_level = @@rocksdb_perf_context_level;
+SET GLOBAL rocksdb_perf_context_level=3;
+SET GLOBAL enable_blind_replace=ON;
+
+# Create and insert some rows in a table
+create table t1(c1 int,c2 int, primary key (c1)) engine=rocksdb;
+insert into t1 values(1,1),(2,2),(3,3);
+select * from t1;
+
+# Create table which has a trigger only in slave
+create table t2(c1 int,c2 int, primary key (c1)) engine=rocksdb;
+insert into t2 values(1,1),(2,2),(3,3);
+select * from t2;
+
+# Create table which has a secondary key only in slave
+create table t3(c1 int,c2 int, primary key (c1)) engine=rocksdb;
+insert into t3 values(1,1),(2,2),(3,3);
+select * from t3;
+
+sync_slave_with_master;
+
+# Enable blind replace in both slave and master
+connection slave;
+SET GLOBAL enable_blind_replace=ON;
+create trigger trg before insert on t2 for each row set @a:=1;
+alter table t3 add constraint slave_unique_key unique (c2);
+
+connection master;
+
+sync_slave_with_master;
+--echo connect slave
+select variable_value into @d from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+
+# Case 1 - 'replace into' on a table with no triggers or secondary keys. Blind replace optimization should kick in both in master and slave
+--echo Case 1
+connection master;
+--echo connect master
+select variable_value into @d from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+
+replace into t1 values(1,11);
+replace into t1 values(2,22);
+replace into t1 values(3,33);
+select case when variable_value-@d > 3 then 'false' else 'true' end as read_free from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+
+select * from t1;
+
+sync_slave_with_master;
+--echo connect slave
+select case when variable_value-@d > 3 then 'false' else 'true' end as read_free from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+select * from t1;
+
+select variable_value into @d from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+
+# Case 2 - Multiple replaces in a single statement. blind replace optimization should kick in
+connection master;
+--echo Case 2
+--echo connect master
+select variable_value into @d from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+replace into t1 values(2,44),(3,55);
+select case when variable_value-@d > 2 then 'false' else 'true' end as read_free from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+select * from t1;
+
+sync_slave_with_master;
+--echo connect slave
+select case when variable_value-@d > 2 then 'false' else 'true' end as read_free from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+select * from t1;
+
+select variable_value into @d from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+
+# Case 3 - A regular update. This is not a blind replace
+--echo Case 3
+connection master;
+--echo connect master
+update t1 set c2=66 where c1=3;
+select * from t1;
+
+sync_slave_with_master;
+--echo connect slave
+select * from t1;
+
+select variable_value into @d from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+
+# Case 4 - Slave has trigger on its table. No triggers on the table in master.
+# Blind replace optimization should kick in on master.
+# Slave should convert this statement into a regular update
+--echo Case 4
+connection master;
+--echo connect master
+select variable_value into @d from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+replace into t2 values(1,111);
+replace into t2 values(2,222);
+replace into t2 values(3,333);
+select case when variable_value-@d > 3 then 'false' else 'true' end as read_free from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+select * from t2;
+
+sync_slave_with_master;
+--echo connect slave
+select case when variable_value-@d > 3 then 'false' else 'true' end as read_free from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+select * from t2;
+
+select variable_value into @d from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+
+# Case 5 - Slave has secondary keys on the table. No secondary keys on the table in master
+# Blind replace optimization should kick in on master.
+# Slave should convert this statement into a regular delete_insert
+--echo Case 5
+connection master;
+--echo connect master
+select variable_value into @d from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+replace into t3 values(1,1111);
+replace into t3 values(2,2222);
+replace into t3 values(3,3333);
+select * from t3;
+
+select case when variable_value-@d > 3 then 'false' else 'true' end as read_free from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+
+sync_slave_with_master;
+--echo connect slave
+select case when variable_value-@d > 3 then 'false' else 'true' end as read_free from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+select * from t3;
+select * from t3 use index (slave_unique_key);
+
+# Case 6 - Just to verify all binlog events.
+# blind replace will generate a write_rows event.
+# Or else, it will be a update_rows event or a delete_rows_write_rows event
+--echo Case 6
+connection master;
+--source include/show_binlog_events.inc
+
+connection slave;
+--source include/show_binlog_events.inc
+
+# Cleanup
+connection master;
+drop table t1;
+drop table t2;
+drop table t3;
+SET GLOBAL rocksdb_perf_context_level = @prior_rocksdb_perf_context_level;
+SET GLOBAL enable_blind_replace=DEFAULT;
+
+connection slave;
+SET GLOBAL enable_blind_replace=DEFAULT;
+
+--source include/rpl_end.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb_rpl/t/rocksdb_slave_check_before_image_consistency-slave.opt b/storage/rocksdb/mysql-test/rocksdb_rpl/t/rocksdb_slave_check_before_image_consistency-slave.opt
new file mode 100644
index 00000000000..78b517e93ab
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_rpl/t/rocksdb_slave_check_before_image_consistency-slave.opt
@@ -0,0 +1 @@
+--slave_check_before_image_consistency=ON
diff --git a/storage/rocksdb/mysql-test/rocksdb_rpl/t/rocksdb_slave_check_before_image_consistency.test b/storage/rocksdb/mysql-test/rocksdb_rpl/t/rocksdb_slave_check_before_image_consistency.test
new file mode 100644
index 00000000000..d7db127a207
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_rpl/t/rocksdb_slave_check_before_image_consistency.test
@@ -0,0 +1,22 @@
+source include/master-slave.inc;
+source include/have_binlog_format_row.inc;
+
+call mtr.add_suppression("Error_code: 1032");
+
+let $engine= rocksdb;
+
+source extra/rpl_tests/rpl_slave_check_before_image_consistency.inc;
+
+# check detection with HASH_SCAN enabled
+connection slave;
+source include/stop_slave.inc;
+set @@global.slave_rows_search_algorithms = 'INDEX_SCAN,TABLE_SCAN,HASH_SCAN';
+source include/start_slave.inc;
+source extra/rpl_tests/rpl_slave_check_before_image_consistency.inc;
+
+# cleanup
+source include/stop_slave.inc;
+set @@global.slave_rows_search_algorithms = DEFAULT;
+source include/start_slave.inc;
+
+source include/rpl_end.inc;
diff --git a/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_binlog_xid_count-master.opt b/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_binlog_xid_count-master.opt
new file mode 100644
index 00000000000..ed50a8a3deb
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_binlog_xid_count-master.opt
@@ -0,0 +1,3 @@
+--innodb --max-binlog-size=4096
+
+
diff --git a/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_binlog_xid_count.test b/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_binlog_xid_count.test
new file mode 100644
index 00000000000..7667f153cde
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_binlog_xid_count.test
@@ -0,0 +1,20 @@
+--source include/have_rocksdb.inc
+--source include/have_binlog_format_row.inc
+
+CREATE TABLE `t` (
+ `a` text DEFAULT NULL
+) ENGINE=ROCKSDB;
+
+
+--let $size=`SELECT @@GLOBAL.max_binlog_size`
+--let $loop_cnt= 100
+while ($loop_cnt)
+{
+ --eval INSERT INTO t SET a=repeat('a', $size)
+ --eval INSERT INTO t SET a=repeat('a', $size/2)
+
+ --dec $loop_cnt
+}
+
+# Cleanup
+DROP TABLE t;
diff --git a/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_check_for_binlog_info.pl b/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_check_for_binlog_info.pl
new file mode 100644
index 00000000000..a5e4d9d8035
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_check_for_binlog_info.pl
@@ -0,0 +1,19 @@
+my $pid_file = $ARGV[0];
+my $log_file = $ARGV[1];
+
+open(my $fh, '<', $pid_file) || die "Cannot open pid file $pid_file";
+my $slave_pid = <$fh>;
+close($fh);
+
+$slave_pid =~ s/\s//g;
+open(my $log_fh, '<', $log_file) || die "Cannot open log file $log_file";
+
+my $pid_found = 0;
+while (my $line = <$log_fh>) {
+ next unless ($pid_found || $line =~ /^[\d-]* [\d:]* $slave_pid /);
+ $pid_found = 1 unless ($pid_found);
+ if ($line =~ /^RocksDB: Last binlog file position.*slave-bin\..*\n/) {
+ print "Binlog Info Found\n";
+ }
+}
+close($log_fh);
diff --git a/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_crash_safe_wal_corrupt.cnf b/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_crash_safe_wal_corrupt.cnf
new file mode 100644
index 00000000000..bbffb0ec116
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_crash_safe_wal_corrupt.cnf
@@ -0,0 +1,13 @@
+!include suite/rpl/my.cnf
+
+[mysqld.1]
+log_slave_updates
+rocksdb_enable_2pc=OFF
+rocksdb_wal_recovery_mode=2
+
+[mysqld.2]
+relay_log_recovery=1
+relay_log_info_repository=TABLE
+log_slave_updates
+rocksdb_enable_2pc=OFF
+rocksdb_wal_recovery_mode=2
diff --git a/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_crash_safe_wal_corrupt.test b/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_crash_safe_wal_corrupt.test
new file mode 100644
index 00000000000..0e40e5423a2
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_crash_safe_wal_corrupt.test
@@ -0,0 +1,12 @@
+--source suite/rocksdb_rpl/t/rpl_gtid_crash_safe_wal_corrupt.inc
+
+connection slave;
+--let slave_pid_file= query_get_value(SELECT @@pid_file, @@pid_file, 1)
+
+# Verify the log file contains the Last binlog line, but only if the slave server's pid is found
+--exec perl suite/rocksdb_rpl/t/rpl_check_for_binlog_info.pl $slave_pid_file $MYSQLTEST_VARDIR/log/mysqld.2.err
+
+--disable_query_log
+connection slave;
+call mtr.add_suppression("Recovery from master pos");
+--enable_query_log
diff --git a/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_ddl_high_priority.test b/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_ddl_high_priority.test
new file mode 100644
index 00000000000..7cf4a4d32b5
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_ddl_high_priority.test
@@ -0,0 +1,2 @@
+--source include/have_rocksdb.inc
+--source include/rpl_ddl_high_priority.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_gtid_crash_safe-master.opt b/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_gtid_crash_safe-master.opt
new file mode 100644
index 00000000000..397310d37b4
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_gtid_crash_safe-master.opt
@@ -0,0 +1 @@
+--gtid_mode=ON --enforce_gtid_consistency --log_slave_updates --rocksdb_enable_2pc=OFF
diff --git a/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_gtid_crash_safe-slave.opt b/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_gtid_crash_safe-slave.opt
new file mode 100644
index 00000000000..3f959684a75
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_gtid_crash_safe-slave.opt
@@ -0,0 +1,2 @@
+--gtid_mode=ON --enforce_gtid_consistency --log_slave_updates --rocksdb_enable_2pc=OFF
+--sync_binlog=1000 --relay_log_recovery=1
diff --git a/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_gtid_crash_safe.test b/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_gtid_crash_safe.test
new file mode 100644
index 00000000000..5a3e665a025
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_gtid_crash_safe.test
@@ -0,0 +1,11 @@
+-- source include/have_rocksdb.inc
+-- source include/have_gtid.inc
+-- source include/master-slave.inc
+-- source include/have_debug.inc
+-- source include/not_valgrind.inc
+
+if (`select count(*) = 1 from information_schema.global_variables where variable_name = 'slave_gtid_info' and variable_value = 'optimized';`) {
+ --skip Test does not support row_write_committed_slave_gtid_optimized policy due to subtle behavioral differences. rpl_gtid_crash_safe_optimized covers slave_gtid_info=optimized.
+}
+
+-- source ../include/rpl_gtid_crash_safe.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_gtid_crash_safe_optimized-master.opt b/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_gtid_crash_safe_optimized-master.opt
new file mode 100644
index 00000000000..397310d37b4
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_gtid_crash_safe_optimized-master.opt
@@ -0,0 +1 @@
+--gtid_mode=ON --enforce_gtid_consistency --log_slave_updates --rocksdb_enable_2pc=OFF
diff --git a/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_gtid_crash_safe_optimized-slave.opt b/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_gtid_crash_safe_optimized-slave.opt
new file mode 100644
index 00000000000..e41dcc5eecd
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_gtid_crash_safe_optimized-slave.opt
@@ -0,0 +1,2 @@
+--gtid_mode=ON --enforce_gtid_consistency --log_slave_updates --rocksdb_enable_2pc=OFF
+--sync_binlog=1000 --relay_log_recovery=1 --slave_gtid_info=optimized
diff --git a/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_gtid_crash_safe_optimized.test b/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_gtid_crash_safe_optimized.test
new file mode 100644
index 00000000000..c262403286c
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_gtid_crash_safe_optimized.test
@@ -0,0 +1,11 @@
+-- source include/have_rocksdb.inc
+-- source include/have_gtid.inc
+-- source include/master-slave.inc
+-- source include/have_debug.inc
+-- source include/not_valgrind.inc
+
+if (`select count(*) = 0 from information_schema.global_variables where variable_name = 'slave_gtid_info' and variable_value = 'optimized';`) {
+ --skip Test requires row_write_committed_slave_gtid_optimized policy where slave_gtid_info=optimized
+}
+
+-- source ../include/rpl_gtid_crash_safe.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_gtid_crash_safe_wal_corrupt.cnf b/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_gtid_crash_safe_wal_corrupt.cnf
new file mode 100644
index 00000000000..457665f9e76
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_gtid_crash_safe_wal_corrupt.cnf
@@ -0,0 +1,18 @@
+!include suite/rpl/my.cnf
+
+[mysqld.1]
+log_slave_updates
+gtid_mode=ON
+enforce_gtid_consistency=ON
+rocksdb_enable_2pc=OFF
+rocksdb_wal_recovery_mode=2
+
+[mysqld.2]
+sync_relay_log_info=100
+relay_log_recovery=1
+relay_log_info_repository=FILE
+log_slave_updates
+gtid_mode=ON
+enforce_gtid_consistency=ON
+rocksdb_enable_2pc=OFF
+rocksdb_wal_recovery_mode=2
diff --git a/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_gtid_crash_safe_wal_corrupt.inc b/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_gtid_crash_safe_wal_corrupt.inc
new file mode 100644
index 00000000000..16ad535ff9e
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_gtid_crash_safe_wal_corrupt.inc
@@ -0,0 +1,154 @@
+source include/have_rocksdb.inc;
+source include/master-slave.inc;
+-- let $uuid = `select @@server_uuid;`
+
+--exec echo > $MYSQLTEST_VARDIR/log/mysqld.1.err
+
+connection master;
+--disable_warnings
+drop table if exists x;
+--enable_warnings
+
+connection master;
+
+select @@binlog_format;
+
+create table x (id int primary key, value int, value2 int, index(value)) engine=rocksdb;
+insert into x values (1,1,1);
+insert into x values (2,1,1);
+insert into x values (3,1,1);
+insert into x values (4,1,1);
+insert into x values (5,1,1);
+-- replace_result $uuid uuid
+select @@global.gtid_executed;
+
+sync_slave_with_master;
+connection slave;
+--let slave_data_dir= query_get_value(SELECT @@DATADIR, @@DATADIR, 1)
+--let slave_pid_file= query_get_value(SELECT @@pid_file, @@pid_file, 1)
+--disable_query_log
+select "--- slave state before crash ---" as "";
+--enable_query_log
+select * from x;
+-- replace_result $uuid uuid
+select @@global.gtid_executed;
+-- replace_result $uuid uuid
+select * from mysql.slave_gtid_info;
+
+--exec echo "wait" > $MYSQLTEST_VARDIR/tmp/mysqld.2.expect
+
+--write_file $MYSQL_TMP_DIR/truncate_tail_wal.sh
+#!/bin/bash
+
+F=`ls -t $slave_data_dir/\#rocksdb/*.log | head -n 1`
+SIZE=`stat -c %s $F`
+NEW_SIZE=`expr $SIZE - 30`
+truncate -s $NEW_SIZE $F
+rc=$?
+if [[ $rc != 0 ]]; then
+ exit 1
+fi
+
+kill -9 `head -1 $slave_pid_file`
+
+exit 0
+EOF
+--chmod 0755 $MYSQL_TMP_DIR/truncate_tail_wal.sh
+--exec $MYSQL_TMP_DIR/truncate_tail_wal.sh
+
+--let $rpl_skip_start_slave= 1
+--source include/rpl_start_server.inc
+--disable_query_log
+select "--- slave state after crash recovery, slave stop, one transaction recovered---" as "";
+--enable_query_log
+connection slave;
+--exec python suite/rocksdb/t/check_log_for_xa.py $MYSQLTEST_VARDIR/log/mysqld.2.err commit,prepare,rollback
+select * from x;
+-- replace_result $uuid uuid
+select @@global.gtid_executed;
+-- replace_result $uuid uuid
+select * from mysql.slave_gtid_info;
+
+--disable_query_log
+select "--- slave state after restart, slave start ---" as "";
+--enable_query_log
+--source include/start_slave.inc
+connection master;
+sync_slave_with_master;
+connection slave;
+select * from x;
+-- replace_result $uuid uuid
+select @@global.gtid_executed;
+-- replace_result $uuid uuid
+select * from mysql.slave_gtid_info;
+
+connection master;
+insert into x values (6,1,1);
+
+sync_slave_with_master;
+connection slave;
+select * from x;
+-- replace_result $uuid uuid
+select @@global.gtid_executed;
+-- replace_result $uuid uuid
+select * from mysql.slave_gtid_info;
+
+connection master;
+insert into x values (7,1,1);
+insert into x values (8,1,1);
+insert into x values (9,1,1);
+insert into x values (10,1,1);
+insert into x values (11,1,1);
+insert into x values (12,1,1);
+select * from x;
+-- replace_result $uuid uuid
+select @@global.gtid_executed;
+sync_slave_with_master;
+
+connection slave;
+
+# Corrupting WAL. MyRocks does point in time recovery with wal_recovery_mode=2.
+# It loses some data but can resync after restarting slave.
+
+--exec echo "wait" > $MYSQLTEST_VARDIR/tmp/mysqld.2.expect
+
+--write_file $MYSQL_TMP_DIR/corrupt_wal.sh
+#!/bin/bash
+
+# expected to be around 950 bytes
+F=`ls -t $slave_data_dir/\#rocksdb/*.log | head -n 1`
+SIZE=`stat -c %s $F`
+OFFSET=$(( $SIZE-500 ))
+dd bs=1 if=/dev/zero of=$F count=100 seek=$OFFSET conv=notrunc
+
+kill -9 `head -1 $slave_pid_file`
+
+exit 0
+EOF
+--chmod 0755 $MYSQL_TMP_DIR/corrupt_wal.sh
+--exec $MYSQL_TMP_DIR/corrupt_wal.sh
+
+--let $rpl_skip_start_slave= 1
+--source include/rpl_start_server.inc
+--disable_query_log
+select "--- slave state after crash recovery, slave stop, WAL was corrupted, point in time recovery with wal_recovery_mode=2 ---" as "";
+--enable_query_log
+select * from x;
+--source include/start_slave.inc
+connection master;
+sync_slave_with_master;
+connection slave;
+select * from x;
+-- replace_result $uuid uuid
+select @@global.gtid_executed;
+-- replace_result $uuid uuid
+select * from mysql.slave_gtid_info;
+
+connection master;
+drop table x;
+
+
+--remove_file $MYSQL_TMP_DIR/truncate_tail_wal.sh
+--remove_file $MYSQL_TMP_DIR/corrupt_wal.sh
+--source include/rpl_end.inc
+
diff --git a/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_gtid_crash_safe_wal_corrupt.test b/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_gtid_crash_safe_wal_corrupt.test
new file mode 100644
index 00000000000..3b660b2640f
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_gtid_crash_safe_wal_corrupt.test
@@ -0,0 +1,12 @@
+-- source suite/rocksdb_rpl/t/rpl_gtid_crash_safe_wal_corrupt.inc
+
+connection slave;
+-- let _SLAVE_PID_FILE= query_get_value(SELECT @@pid_file, @@pid_file, 1)
+
+# Verify the log file contains the Last binlog line, but only if the slave server's pid is found
+--exec perl suite/rocksdb_rpl/t/rpl_check_for_binlog_info.pl $slave_pid_file $MYSQLTEST_VARDIR/log/mysqld.2.err
+
+--disable_query_log
+connection slave;
+call mtr.add_suppression("Recovery from master pos");
+--enable_query_log
diff --git a/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_gtid_rocksdb_sys_header-master.opt b/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_gtid_rocksdb_sys_header-master.opt
new file mode 100644
index 00000000000..d828b6c01f4
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_gtid_rocksdb_sys_header-master.opt
@@ -0,0 +1 @@
+--gtid_mode=ON --enforce_gtid_consistency --log_slave_updates
diff --git a/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_gtid_rocksdb_sys_header-slave.opt b/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_gtid_rocksdb_sys_header-slave.opt
new file mode 100644
index 00000000000..d828b6c01f4
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_gtid_rocksdb_sys_header-slave.opt
@@ -0,0 +1 @@
+--gtid_mode=ON --enforce_gtid_consistency --log_slave_updates
diff --git a/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_gtid_rocksdb_sys_header.test b/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_gtid_rocksdb_sys_header.test
new file mode 100644
index 00000000000..d1793c4af1e
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_gtid_rocksdb_sys_header.test
@@ -0,0 +1,40 @@
+# based on rpl/rpl_gtid_innondb_sys_header.test
+source include/have_rocksdb.inc;
+source include/master-slave.inc;
+source include/have_gtid.inc;
+source include/have_debug.inc;
+source include/not_valgrind.inc;
+
+--let $old_debug = `select @@global.debug;`
+
+connection master;
+create table t1 (a int primary key) engine=rocksdb;
+insert into t1 values(1);
+--eval SET GLOBAL debug = '+d,crash_before_writing_xid'
+--exec echo "restart" > $MYSQLTEST_VARDIR/tmp/mysqld.1.expect
+--error 2013
+insert into t1 values(2);
+
+--source include/wait_until_disconnected.inc
+--let $rpl_server_number = 1
+--source include/rpl_reconnect.inc
+
+--eval SET GLOBAL debug = `$old_debug`
+
+connection slave;
+disable_warnings;
+source include/start_slave.inc;
+enable_warnings;
+connection master;
+sync_slave_with_master;
+
+connection master;
+--let $master_uuid= query_get_value(select @@server_uuid, @@server_uuid, 1)
+--replace_result $master_uuid master_uuid
+--exec grep 'RocksDB: Last MySQL Gtid $master_uuid' $MYSQLTEST_VARDIR/log/mysqld.1.err
+
+drop table t1;
+source include/rpl_end.inc;
+-- move_file $MYSQLTEST_VARDIR/log/mysqld.1.err $MYSQLTEST_VARDIR/log/mysqld.1.err.orig
+-- write_file $MYSQLTEST_VARDIR/log/mysqld.1.err
+EOF
diff --git a/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_missing_columns_sk_update.cnf b/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_missing_columns_sk_update.cnf
new file mode 100644
index 00000000000..92ed71986fe
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_missing_columns_sk_update.cnf
@@ -0,0 +1,13 @@
+!include include/default_mysqld.cnf
+
+[mysqld.1]
+binlog_row_image = COMPLETE
+log_column_names= ON
+
+[mysqld.2]
+binlog_row_image = COMPLETE
+log_column_names= ON
+
+[ENV]
+SERVER_MYPORT_1= @mysqld.1.port
+SERVER_MYPORT_2= @mysqld.2.port
diff --git a/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_missing_columns_sk_update.test b/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_missing_columns_sk_update.test
new file mode 100644
index 00000000000..624f54ac326
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_missing_columns_sk_update.test
@@ -0,0 +1,69 @@
+source include/master-slave.inc;
+source include/have_binlog_format_row.inc;
+
+# Create a table with SKs on master
+connection master;
+set @@sql_log_bin = 0;
+CREATE TABLE `t1` (
+ `a` int(10) unsigned NOT NULL DEFAULT '0',
+ `b` bigint(20) unsigned NOT NULL DEFAULT '0',
+ `c` bigint(20) unsigned NOT NULL DEFAULT '0',
+ `d` bigint(20) unsigned NOT NULL DEFAULT '0',
+ `e` varbinary(64) DEFAULT NULL,
+ `f` int(10) NOT NULL DEFAULT '0',
+ `g` int(10) NOT NULL DEFAULT '0',
+ `h` int(10) unsigned NOT NULL DEFAULT '0',
+ PRIMARY KEY (`a`,`b`),
+ KEY `key1` (`a`, `e`(1)),
+ KEY `key2` (`a`,`h`)
+) ENGINE=RocksDB;
+set @@sql_log_bin = 1;
+
+# Create same table on slaves but with two extra columns in the middle (x, y)
+connection slave;
+set @@sql_log_bin = 0;
+CREATE TABLE `t1` (
+ `a` int(10) unsigned NOT NULL DEFAULT '0',
+ `b` bigint(20) unsigned NOT NULL DEFAULT '0',
+ `c` bigint(20) unsigned NOT NULL DEFAULT '0',
+ `d` bigint(20) unsigned NOT NULL DEFAULT '0',
+ `e` varbinary(64) DEFAULT NULL,
+ `f` int(10) NOT NULL DEFAULT '0',
+ `g` int(10) NOT NULL DEFAULT '0',
+ `x` TINYINT(3) UNSIGNED DEFAULT NULL,
+ `y` INT(10) DEFAULT NULL,
+ `h` int(10) unsigned NOT NULL DEFAULT '0',
+ PRIMARY KEY (`a`,`b`),
+ KEY `key1` (`a`, `e`(1)),
+ KEY `key2` (`a`,`h`)
+) ENGINE=RocksDB;
+set @@sql_log_bin = 1;
+
+# Insert something in the table
+connection master;
+INSERT INTO t1 VALUES (1, 1, 1, 1, 'a', 1, 1, 1);
+SELECT * FROM t1;
+sync_slave_with_master;
+
+connection slave;
+SELECT * FROM t1;
+
+# Update a column that belongs to an SK
+connection master;
+UPDATE t1 SET h = 10 WHERE h = 1;
+SELECT * FROM t1;
+sync_slave_with_master;
+
+# Check if all keys are updated on the slave
+connection slave;
+SELECT * FROM t1;
+SELECT COUNT(*) FROM t1 FORCE INDEX (key1) WHERE h = 10 AND a = 1;
+SELECT COUNT(*) FROM t1 FORCE INDEX (key2) WHERE h = 10 AND a = 1;
+SELECT COUNT(*) FROM t1 FORCE INDEX (PRIMARY) WHERE h = 10 AND a = 1;
+
+# Cleanup
+connection master;
+DROP TABLE t1;
+sync_slave_with_master;
+
+source include/rpl_end.inc;
diff --git a/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_mts_dependency_unique_key_conflicts.test b/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_mts_dependency_unique_key_conflicts.test
new file mode 100644
index 00000000000..5869b9a39b9
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_mts_dependency_unique_key_conflicts.test
@@ -0,0 +1,64 @@
+# RocksDB clone of rpl_mts.rpl_mts_dependency_unique_key_conflicts
+source include/have_rocksdb.inc;
+source include/have_debug_sync.inc;
+source include/master-slave.inc;
+
+connection slave;
+source include/stop_slave.inc;
+set @save.slave_parallel_workers= @@global.slave_parallel_workers;
+set @save.slave_use_idempotent_for_recovery= @@global.slave_use_idempotent_for_recovery;
+set @save.mts_dependency_replication= @@global.mts_dependency_replication;
+set @save.mts_dependency_order_commits= @@global.mts_dependency_order_commits;
+set @save.debug= @@global.debug;
+set @@global.slave_parallel_workers= 2;
+set @@global.slave_use_idempotent_for_recovery= YES;
+set @@global.mts_dependency_replication= STMT;
+set @@global.mts_dependency_order_commits= false;
+set @@global.debug= '+d,dbug.dep_wait_before_update_execution';
+source include/start_slave.inc;
+
+connection master;
+create table t1 (a int primary key, b int unique key) engine = rocksdb;
+insert into t1 values(1, 1);
+source include/sync_slave_sql_with_master.inc;
+source include/stop_slave.inc;
+
+connection master;
+update t1 set b = 2 where a = 1; # this will stall on slave due to dbug_sync
+insert into t1 values(2, 1); # this should wait for the update to finish
+
+connection slave;
+source include/start_slave.inc;
+# wait till one of the workers reach the point just before execution of update
+set debug_sync="now wait_for signal.reached";
+
+# wait till the other worker is waiting after executing the table map for the
+# insert
+let $wait_condition=
+ select count(*)= 1 from information_schema.processlist
+ where state = 'Waiting for dependencies to be satisfied';
+source include/wait_condition.inc;
+
+select * from t1;
+set debug_sync="now signal signal.done";
+
+connection master;
+source include/sync_slave_sql_with_master.inc;
+
+connection slave;
+select * from t1;
+
+# Cleanup
+connection master;
+drop table t1;
+source include/sync_slave_sql_with_master.inc;
+connection slave;
+source include/stop_slave.inc;
+set @@global.slave_parallel_workers= @save.slave_parallel_workers;
+set @@global.mts_dependency_replication= @save.mts_dependency_replication;
+set @@global.slave_use_idempotent_for_recovery= @save.slave_use_idempotent_for_recovery;
+set @@global.mts_dependency_order_commits= @save.mts_dependency_order_commits;
+set @@global.debug= @save.debug;
+source include/start_slave.inc;
+
+source include/rpl_end.inc;
diff --git a/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_no_unique_check_on_lag-slave.opt b/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_no_unique_check_on_lag-slave.opt
new file mode 100644
index 00000000000..1c8dc1e62e9
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_no_unique_check_on_lag-slave.opt
@@ -0,0 +1 @@
+--unique-check-lag-threshold=5
diff --git a/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_no_unique_check_on_lag.test b/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_no_unique_check_on_lag.test
new file mode 100644
index 00000000000..cecacda44e8
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_no_unique_check_on_lag.test
@@ -0,0 +1,7 @@
+--echo #
+--echo # Ensure skip_unique_check is set when lag exceeds lag_threshold
+--echo #
+
+--source include/have_rocksdb.inc
+--source ../include/rpl_no_unique_check_on_lag.inc
+
diff --git a/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_no_unique_check_on_lag_mts-slave.opt b/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_no_unique_check_on_lag_mts-slave.opt
new file mode 100644
index 00000000000..1c8dc1e62e9
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_no_unique_check_on_lag_mts-slave.opt
@@ -0,0 +1 @@
+--unique-check-lag-threshold=5
diff --git a/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_no_unique_check_on_lag_mts.test b/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_no_unique_check_on_lag_mts.test
new file mode 100644
index 00000000000..7e77ec87c3b
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_no_unique_check_on_lag_mts.test
@@ -0,0 +1,3 @@
+--source include/have_rocksdb.inc
+--source ../include/rpl_no_unique_check_on_lag.inc
+
diff --git a/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_rocksdb_2pc_crash_recover-master.opt b/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_rocksdb_2pc_crash_recover-master.opt
new file mode 100644
index 00000000000..f8f297c567c
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_rocksdb_2pc_crash_recover-master.opt
@@ -0,0 +1 @@
+--log_bin --log_slave_updates --rocksdb_flush_log_at_trx_commit=1
diff --git a/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_rocksdb_2pc_crash_recover-slave.opt b/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_rocksdb_2pc_crash_recover-slave.opt
new file mode 100644
index 00000000000..7a3f630fda2
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_rocksdb_2pc_crash_recover-slave.opt
@@ -0,0 +1 @@
+--log_bin --log_slave_updates
diff --git a/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_rocksdb_2pc_crash_recover.test b/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_rocksdb_2pc_crash_recover.test
new file mode 100644
index 00000000000..58b3720904c
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_rocksdb_2pc_crash_recover.test
@@ -0,0 +1,57 @@
+--source include/have_binlog_format_row.inc
+--source include/have_rocksdb.inc
+--source include/have_debug.inc
+--source include/not_valgrind.inc
+
+--disable_warnings
+DROP TABLE IF EXISTS t1;
+--enable_warnings
+
+create table t1 (a int primary key, msg varchar(255)) engine=rocksdb;
+
+SET GLOBAL ROCKSDB_ENABLE_2PC = ON;
+--exec echo "restart" > $MYSQLTEST_VARDIR/tmp/mysqld.1.expect
+SET SESSION debug_dbug="d,crash_commit_after_prepare";
+--error 0,2013
+insert into t1 values (1, 'dogz');
+--enable_reconnect
+--source include/wait_until_connected_again.inc
+select * from t1;
+
+SET GLOBAL ROCKSDB_ENABLE_2PC = ON;
+--exec echo "restart" > $MYSQLTEST_VARDIR/tmp/mysqld.1.expect
+SET SESSION debug_dbug="d,crash_commit_after_log";
+--error 0,2013
+insert into t1 values (2, 'catz'), (3, 'men');
+--enable_reconnect
+--source include/wait_until_connected_again.inc
+select * from t1;
+
+SET GLOBAL ROCKSDB_ENABLE_2PC = ON;
+--exec echo "restart" > $MYSQLTEST_VARDIR/tmp/mysqld.1.expect
+SET SESSION debug_dbug="d,crash_commit_after";
+--error 0,2013
+insert into t1 values (4, 'cars'), (5, 'foo');
+--enable_reconnect
+--source include/wait_until_connected_again.inc
+select * from t1;
+
+SET GLOBAL ROCKSDB_ENABLE_2PC = OFF;
+--exec echo "restart" > $MYSQLTEST_VARDIR/tmp/mysqld.1.expect
+SET SESSION debug_dbug="d,crash_commit_after_log";
+--error 0,2013
+insert into t1 values (6, 'shipz'), (7, 'tankz');
+--enable_reconnect
+--source include/wait_until_connected_again.inc
+select * from t1;
+
+SET GLOBAL ROCKSDB_ENABLE_2PC = OFF;
+--exec echo "restart" > $MYSQLTEST_VARDIR/tmp/mysqld.1.expect
+SET SESSION debug_dbug="d,crash_commit_after";
+--error 0,2013
+insert into t1 values (8, 'space'), (9, 'time');
+--enable_reconnect
+--source include/wait_until_connected_again.inc
+select * from t1;
+
+drop table t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_rocksdb_slave_gtid_info_optimized-master.opt b/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_rocksdb_slave_gtid_info_optimized-master.opt
new file mode 100644
index 00000000000..c747adc94d5
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_rocksdb_slave_gtid_info_optimized-master.opt
@@ -0,0 +1 @@
+--gtid_mode=ON --enforce_gtid_consistency --log_bin --log_slave_updates
diff --git a/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_rocksdb_slave_gtid_info_optimized-slave.opt b/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_rocksdb_slave_gtid_info_optimized-slave.opt
new file mode 100644
index 00000000000..6cde3c553d4
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_rocksdb_slave_gtid_info_optimized-slave.opt
@@ -0,0 +1 @@
+--gtid_mode=ON --enforce_gtid_consistency --log_bin --log_slave_updates --slave-gtid-info=optimized
diff --git a/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_rocksdb_slave_gtid_info_optimized.test b/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_rocksdb_slave_gtid_info_optimized.test
new file mode 100644
index 00000000000..c8a0c8daf10
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_rocksdb_slave_gtid_info_optimized.test
@@ -0,0 +1,51 @@
+--source include/have_rocksdb.inc
+--source include/master-slave.inc
+--source include/have_binlog_format_row.inc
+
+if (`select count(*) = 0 from information_schema.global_variables where variable_name = 'slave_gtid_info' and variable_value = 'optimized';`) {
+ --skip Test requires row_write_committed_slave_gtid_optimized policy where slave_gtid_info=optimized
+}
+
+--echo Make changes in master
+create table test1 (a int primary key, b int) engine=rocksdb;
+insert into test1 values (1, 1);
+
+--echo Make sure slave is up-to-date and mysql.slave_gtid_info is good
+sync_slave_with_master slave;
+connection slave;
+select * from test1;
+-- replace_regex /[a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12}/UUID/
+select id, database_name, last_gtid from mysql.slave_gtid_info;
+
+--echo Make changes in master
+connection master;
+insert into test1 values (2, 2);
+
+--echo Make sure slave is up-to-date and mysql.slave_gtid_info is good
+sync_slave_with_master slave;
+connection slave;
+select @@slave_gtid_info;
+select * from test1;
+
+-- replace_regex /[a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12}/UUID/
+select * from mysql.slave_gtid_info;
+
+--echo Make changes in master
+connection master;
+insert into test1 values (3, 3);
+insert into test1 values (4, 4);
+
+--echo Make sure slave is up-to-date and mysql.slave_gtid_info is good
+sync_slave_with_master slave;
+connection slave;
+select * from test1;
+
+-- replace_regex /[a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12}/UUID/
+select id, database_name, last_gtid from mysql.slave_gtid_info;
+
+connection master;
+DROP TABLE IF EXISTS test1;
+
+sync_slave_with_master slave;
+
+--source include/rpl_end.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_rocksdb_snapshot-master.opt b/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_rocksdb_snapshot-master.opt
new file mode 100644
index 00000000000..c747adc94d5
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_rocksdb_snapshot-master.opt
@@ -0,0 +1 @@
+--gtid_mode=ON --enforce_gtid_consistency --log_bin --log_slave_updates
diff --git a/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_rocksdb_snapshot-slave.opt b/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_rocksdb_snapshot-slave.opt
new file mode 100644
index 00000000000..c747adc94d5
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_rocksdb_snapshot-slave.opt
@@ -0,0 +1 @@
+--gtid_mode=ON --enforce_gtid_consistency --log_bin --log_slave_updates
diff --git a/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_rocksdb_snapshot.test b/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_rocksdb_snapshot.test
new file mode 100644
index 00000000000..200f1cb314e
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_rocksdb_snapshot.test
@@ -0,0 +1,374 @@
+--source include/have_rocksdb.inc
+--source include/master-slave.inc
+--source include/have_binlog_format_row.inc
+
+--disable_warnings
+DROP TABLE IF EXISTS t1;
+--enable_warnings
+
+--echo # Establish connection con1 (user=root)
+connect (con1,localhost,root,,);
+--echo # Establish connection con2 (user=root)
+connect (con2,localhost,root,,);
+--echo # Establish connection con3 (user=root)
+connect (con3,localhost,root,,);
+--echo # Establish connection con4 (user=root)
+connect (con4,localhost,root,,);
+
+--echo # reset replication to guarantee that master-bin.000001 is used
+connection slave;
+--source include/stop_slave.inc
+RESET SLAVE;
+RESET MASTER;
+
+connection master;
+RESET MASTER;
+
+connection slave;
+--replace_result $MASTER_MYPORT MASTER_PORT
+eval CHANGE MASTER TO master_host="127.0.0.1",master_port=$MASTER_MYPORT,master_user="root";
+--source include/start_slave.inc
+
+### Test 1:
+### - While a consistent snapshot transaction is executed,
+### no external inserts should be visible to the transaction.
+
+--echo # Switch to connection con1
+connection con1;
+CREATE TABLE t1 (a INT PRIMARY KEY) ENGINE=rocksdb;
+INSERT INTO t1 VALUES(1);
+
+SET TRANSACTION ISOLATION LEVEL READ COMMITTED;
+--error ER_ISOLATION_LEVEL_WITH_CONSISTENT_SNAPSHOT
+START TRANSACTION WITH CONSISTENT SNAPSHOT;
+--error ER_ISOLATION_LEVEL_WITH_CONSISTENT_SNAPSHOT
+START TRANSACTION WITH CONSISTENT ROCKSDB SNAPSHOT;
+ROLLBACK;
+SET TRANSACTION ISOLATION LEVEL REPEATABLE READ;
+
+
+--disable_query_log
+--disable_result_log
+let $x=1000;
+while ($x) {
+ START TRANSACTION WITH CONSISTENT ROCKSDB SNAPSHOT;
+ dec $x;
+}
+--enable_query_log
+--enable_result_log
+
+-- replace_regex /[a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12}/UUID/
+START TRANSACTION WITH CONSISTENT ROCKSDB SNAPSHOT;
+
+--echo # Switch to connection con2
+connection con2;
+INSERT INTO t1 VALUES(2);
+INSERT INTO t1 VALUES(3);
+
+--echo # Switch to connection con1
+connection con1;
+SELECT * FROM t1; # should fetch one row
+COMMIT;
+
+SELECT * FROM t1; # should fetch three rows
+
+DROP TABLE t1;
+
+### Test 2:
+### - confirm result from snapshot select and replication replay matches original
+
+--echo # Switch to connection con1
+connection con1;
+CREATE TABLE t1 (a INT PRIMARY KEY) ENGINE=rocksdb;
+INSERT INTO t1 VALUES(1);
+
+-- replace_regex /[a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12}/UUID/
+START TRANSACTION WITH CONSISTENT ROCKSDB SNAPSHOT;
+-- replace_regex /[a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12}/UUID/
+START TRANSACTION WITH CONSISTENT ROCKSDB SNAPSHOT;
+-- replace_regex /[a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12}/UUID/
+START TRANSACTION WITH CONSISTENT ROCKSDB SNAPSHOT;
+-- replace_regex /[a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12}/UUID/
+START TRANSACTION WITH CONSISTENT ROCKSDB SNAPSHOT;
+
+let $binlog_pos = query_get_value(START TRANSACTION WITH CONSISTENT ROCKSDB SNAPSHOT, Position, 1);
+
+--echo # Switch to connection con2
+connection con2;
+INSERT INTO t1 VALUES(2);
+INSERT INTO t1 VALUES(3);
+
+--echo # Switch to connection con1
+connection con1;
+SELECT * FROM t1;
+
+--let $outfile = $MYSQLTEST_VARDIR/tmp/rpl_rocksdb_snapshot.out.file
+
+--replace_result $MYSQLTEST_VARDIR <MYSQLTEST_VARDIR>
+eval SELECT * INTO OUTFILE '$outfile' FROM t1;
+COMMIT;
+
+--echo # Switch to slave
+sync_slave_with_master slave;
+
+CREATE TABLE t1_backup LIKE t1;
+INSERT INTO t1_backup SELECT * FROM t1;
+--source include/stop_slave.inc
+RESET SLAVE;
+RESET MASTER;
+DELETE FROM t1;
+--replace_result $MYSQLTEST_VARDIR <MYSQLTEST_VARDIR>
+eval LOAD DATA INFILE '$outfile' INTO TABLE t1;
+SELECT * FROM t1;
+
+--replace_result $MASTER_MYPORT MASTER_PORT $binlog_pos binlog_pos
+eval CHANGE MASTER TO master_host="127.0.0.1",master_port=$MASTER_MYPORT,master_user="root",master_log_file="master-bin.000001",master_log_pos=$binlog_pos;
+--source include/start_slave.inc
+
+connection master;
+sync_slave_with_master slave;
+
+SELECT * FROM t1;
+SELECT * FROM t1_backup;
+DROP TABLE t1_backup;
+
+connection master;
+DROP TABLE t1;
+--remove_file $outfile
+
+### Test 3:
+### - confirm result from snapshot select and replication replay matches original
+### - use non-deterministic concurrency
+
+--echo # Switch to connection con1
+connection con1;
+CREATE TABLE t1 (a INT PRIMARY KEY) ENGINE=rocksdb;
+INSERT INTO t1 VALUES(1);
+
+--echo # async queries from con2
+connection con2;
+send INSERT INTO t1 VALUES(2);
+
+--echo # async queries from con3
+connection con3;
+send INSERT INTO t1 VALUES(21);
+
+--echo # Switch to connection con1
+connection con1;
+
+let $binlog_pos = query_get_value(START TRANSACTION WITH CONSISTENT ROCKSDB SNAPSHOT, Position, 1);
+
+--echo # Switch to connection con4
+connection con4;
+INSERT INTO t1 VALUES(9);
+
+--echo # Switch to connection con1
+connection con1;
+
+--let $outfile = $MYSQLTEST_VARDIR/tmp/rpl_rocksdb_snapshot.out.file
+
+--replace_result $MYSQLTEST_VARDIR <MYSQLTEST_VARDIR>
+eval SELECT * INTO OUTFILE '$outfile' FROM t1;
+COMMIT;
+
+--echo # reap async statements
+connection con2;
+reap;
+
+connection con3;
+reap;
+
+--echo # Switch to slave
+sync_slave_with_master slave;
+
+CREATE TABLE t1_backup LIKE t1;
+INSERT INTO t1_backup SELECT * FROM t1;
+--source include/stop_slave.inc
+RESET SLAVE;
+RESET MASTER;
+DELETE FROM t1;
+--replace_result $MYSQLTEST_VARDIR <MYSQLTEST_VARDIR>
+eval LOAD DATA INFILE '$outfile' INTO TABLE t1;
+
+--replace_result $MASTER_MYPORT MASTER_PORT $binlog_pos binlog_pos
+eval CHANGE MASTER TO master_host="127.0.0.1",master_port=$MASTER_MYPORT,master_user="root",master_log_file="master-bin.000001",master_log_pos=$binlog_pos;
+--source include/start_slave.inc
+
+--echo # sync and then query slave
+connection master;
+sync_slave_with_master slave;
+
+let $sum1 = `SELECT SUM(a) from t1`;
+let $sum2 = `SELECT SUM(a) from t1_backup`;
+--disable_query_log
+eval select $sum2 - $sum1 ShouldBeZero;
+--enable_query_log
+
+DROP TABLE t1_backup;
+
+connection master;
+DROP TABLE t1;
+--remove_file $outfile
+
+### Test 4:
+### - confirm result from snapshot select and replication relay using gtid protocol matches original
+
+--echo # Switch to connection con1
+connection con1;
+CREATE TABLE t1 (a INT PRIMARY KEY) ENGINE=rocksdb;
+INSERT INTO t1 VALUES(1);
+
+-- replace_regex /[a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12}/UUID/
+START TRANSACTION WITH CONSISTENT ROCKSDB SNAPSHOT;
+-- replace_regex /[a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12}/UUID/
+START TRANSACTION WITH CONSISTENT ROCKSDB SNAPSHOT;
+-- replace_regex /[a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12}/UUID/
+START TRANSACTION WITH CONSISTENT ROCKSDB SNAPSHOT;
+-- replace_regex /[a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12}/UUID/
+START TRANSACTION WITH CONSISTENT ROCKSDB SNAPSHOT;
+
+let $gtid_executed = query_get_value(START TRANSACTION WITH CONSISTENT ROCKSDB SNAPSHOT, Gtid_executed, 1);
+
+--echo # Switch to connection con2
+connection con2;
+INSERT INTO t1 VALUES(2);
+INSERT INTO t1 VALUES(3);
+
+--echo # Switch to connection con1
+connection con1;
+SELECT * FROM t1;
+
+--let $outfile = $MYSQLTEST_VARDIR/tmp/rpl_rocksdb_snapshot.out.file
+
+--replace_result $MYSQLTEST_VARDIR <MYSQLTEST_VARDIR>
+eval SELECT * INTO OUTFILE '$outfile' FROM t1;
+COMMIT;
+
+--echo # Switch to slave
+sync_slave_with_master slave;
+
+CREATE TABLE t1_backup LIKE t1;
+INSERT INTO t1_backup SELECT * FROM t1;
+--source include/stop_slave.inc
+RESET SLAVE;
+RESET MASTER;
+--replace_result $gtid_executed gtid_executed_from_snapshot
+eval SET @@global.gtid_purged='$gtid_executed';
+DELETE FROM t1;
+--replace_result $MYSQLTEST_VARDIR <MYSQLTEST_VARDIR>
+eval LOAD DATA INFILE '$outfile' INTO TABLE t1;
+SELECT * FROM t1;
+
+--replace_result $MASTER_MYPORT MASTER_PORT
+eval CHANGE MASTER TO master_host="127.0.0.1",master_port=$MASTER_MYPORT,master_user="root", master_auto_position=1;
+--source include/start_slave.inc
+
+connection master;
+sync_slave_with_master slave;
+
+SELECT * FROM t1;
+SELECT * FROM t1_backup;
+DROP TABLE t1_backup;
+
+connection master;
+DROP TABLE t1;
+--remove_file $outfile
+
+### Test 5:
+### - confirm result from snapshot select and replication replay using gtid_protocol matches original
+### - use non-deterministic concurrency
+
+--echo # Switch to connection con1
+connection con1;
+CREATE TABLE t1 (a INT PRIMARY KEY) ENGINE=rocksdb;
+INSERT INTO t1 VALUES(1);
+
+--echo # async queries from con2
+connection con2;
+send INSERT INTO t1 VALUES(2);
+
+--echo # async queries from con3
+connection con3;
+send INSERT INTO t1 VALUES(21);
+
+--echo # Switch to connection con1
+connection con1;
+
+let $gtid_executed = query_get_value(START TRANSACTION WITH CONSISTENT ROCKSDB SNAPSHOT, Gtid_executed, 1);
+
+--echo # Switch to connection con4
+connection con4;
+INSERT INTO t1 VALUES(9);
+
+--echo # Switch to connection con1
+connection con1;
+
+--let $outfile = $MYSQLTEST_VARDIR/tmp/rpl_rocksdb_snapshot.out.file
+
+--replace_result $MYSQLTEST_VARDIR <MYSQLTEST_VARDIR>
+eval SELECT * INTO OUTFILE '$outfile' FROM t1;
+COMMIT;
+
+--echo # reap async statements
+connection con2;
+reap;
+
+connection con3;
+reap;
+
+--echo # Switch to slave
+sync_slave_with_master slave;
+
+CREATE TABLE t1_backup LIKE t1;
+INSERT INTO t1_backup SELECT * FROM t1;
+--source include/stop_slave.inc
+RESET SLAVE;
+RESET MASTER;
+--replace_result $gtid_executed gtid_executed_from_snapshot
+eval SET @@global.gtid_purged='$gtid_executed';
+DELETE FROM t1;
+
+--replace_result $MYSQLTEST_VARDIR <MYSQLTEST_VARDIR>
+eval LOAD DATA INFILE '$outfile' INTO TABLE t1;
+
+--replace_result $MASTER_MYPORT MASTER_PORT
+eval CHANGE MASTER TO master_host="127.0.0.1",master_port=$MASTER_MYPORT,master_user="root", master_auto_position=1;
+--source include/start_slave.inc
+
+--echo # sync and then query slave
+connection master;
+sync_slave_with_master slave;
+
+let $sum1 = `SELECT SUM(a) from t1`;
+let $sum2 = `SELECT SUM(a) from t1_backup`;
+--disable_query_log
+eval select $sum2 - $sum1 ShouldBeZero;
+--enable_query_log
+
+DROP TABLE t1_backup;
+
+connection master;
+DROP TABLE t1;
+--remove_file $outfile
+
+--echo # Switch to connection default + close connections con1 and con2
+connection con1;
+disconnect con1;
+--source include/wait_until_disconnected.inc
+connection con2;
+disconnect con2;
+--source include/wait_until_disconnected.inc
+connection con3;
+disconnect con3;
+--source include/wait_until_disconnected.inc
+connection con4;
+disconnect con4;
+--source include/wait_until_disconnected.inc
+
+connection default;
+sync_slave_with_master slave;
+--source include/stop_slave.inc
+CHANGE MASTER to master_auto_position=0;
+--source include/start_slave.inc
+
+--source include/rpl_end.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_rocksdb_snapshot_without_gtid.test b/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_rocksdb_snapshot_without_gtid.test
new file mode 100644
index 00000000000..79d71f20e8a
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_rocksdb_snapshot_without_gtid.test
@@ -0,0 +1,18 @@
+--source include/have_rocksdb.inc
+--source include/master-slave.inc
+--source include/have_binlog_format_row.inc
+
+--connection master
+create table t1(a int primary key);
+
+FLUSH LOGS;
+
+insert into t1 values(1);
+insert into t1 values(2);
+
+FLUSH LOGS;
+
+START TRANSACTION WITH CONSISTENT ROCKSDB SNAPSHOT;
+
+drop table t1;
+-- source include/rpl_end.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_rocksdb_stress_crash-master.opt b/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_rocksdb_stress_crash-master.opt
new file mode 100644
index 00000000000..5c5a73bf2a4
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_rocksdb_stress_crash-master.opt
@@ -0,0 +1,2 @@
+--gtid_mode=ON --enforce_gtid_consistency --log_slave_updates
+--binlog_rows_query_log_events=TRUE --rocksdb_unsafe_for_binlog=TRUE
diff --git a/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_rocksdb_stress_crash-slave.opt b/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_rocksdb_stress_crash-slave.opt
new file mode 100644
index 00000000000..b3d52445ad8
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_rocksdb_stress_crash-slave.opt
@@ -0,0 +1,3 @@
+--gtid_mode=ON --enforce_gtid_consistency --log_slave_updates --max_binlog_size=50000
+--slave_parallel_workers=30 --relay_log_recovery=1 --rocksdb_unsafe_for_binlog=TRUE
+--rocksdb_wal_recovery_mode=2
diff --git a/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_rocksdb_stress_crash.test b/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_rocksdb_stress_crash.test
new file mode 100644
index 00000000000..17b866060b7
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_rocksdb_stress_crash.test
@@ -0,0 +1,26 @@
+-- source include/big_test.inc
+-- source include/master-slave.inc
+-- source include/not_valgrind.inc
+-- source include/have_gtid.inc
+-- source include/have_rocksdb.inc
+
+connection master;
+call mtr.add_suppression(".*");
+sync_slave_with_master;
+-- source include/stop_slave.inc
+change master to master_auto_position=1;
+-- source include/start_slave.inc
+
+-- let $iter=100
+-- let $databases=30
+-- let $num_crashes=100
+-- let $include_silent=1
+-- let $storage_engine='rocksdb'
+-- source extra/rpl_tests/rpl_parallel_load_innodb.test
+-- let $include_silent=0
+
+-- source include/stop_slave.inc
+change master to master_auto_position=0;
+-- source include/start_slave.inc
+
+-- source include/rpl_end.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_skip_trx_api_binlog_format-master.opt b/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_skip_trx_api_binlog_format-master.opt
new file mode 100644
index 00000000000..39bb3238861
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_skip_trx_api_binlog_format-master.opt
@@ -0,0 +1,2 @@
+--gtid_mode=ON --enforce_gtid_consistency --log_slave_updates
+--binlog_format=STATEMENT --default-storage-engine=rocksdb
diff --git a/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_skip_trx_api_binlog_format-slave.opt b/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_skip_trx_api_binlog_format-slave.opt
new file mode 100644
index 00000000000..826f1ee9cb6
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_skip_trx_api_binlog_format-slave.opt
@@ -0,0 +1,2 @@
+--gtid_mode=ON --enforce_gtid_consistency --log_slave_updates
+--sync_binlog=1000 --relay_log_recovery=1 --default-storage-engine=rocksdb
diff --git a/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_skip_trx_api_binlog_format.test b/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_skip_trx_api_binlog_format.test
new file mode 100644
index 00000000000..1ea9add8019
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_skip_trx_api_binlog_format.test
@@ -0,0 +1,52 @@
+# Checks if the slave stops executing transactions when master's binlog format
+# is STATEMENT but rpl_skip_tx_api is enabled
+-- source include/have_rocksdb.inc
+-- source include/master-slave.inc
+
+call mtr.add_suppression("Master's binlog format is not ROW but rpl_skip_tx_api is enabled on the slave");
+
+connection slave;
+let $old_rpl_skip_tx_api= `SELECT @@global.rpl_skip_tx_api`;
+set global rpl_skip_tx_api=ON;
+
+connection master;
+let $old_rocksdb_unsafe_for_binlog= `SELECT @@global.rocksdb_unsafe_for_binlog`;
+set global rocksdb_unsafe_for_binlog=1;
+create table t1(a int);
+set session binlog_format=STATEMENT;
+insert into t1 values(1);
+
+# Wait till we hit the binlog format mismatch error
+connection slave;
+let $slave_sql_errno= convert_error(ER_MTS_INCONSISTENT_DATA); # 1756
+let $show_slave_sql_error= 1;
+source include/wait_for_slave_sql_error.inc;
+
+# Print table
+connection slave;
+echo "Table after error";
+select * from t1;
+
+connection slave;
+# Turn off rpl_skip_tx_api and start the slave again
+set global rpl_skip_tx_api=OFF;
+source include/start_slave.inc;
+
+connection slave;
+source include/sync_slave_sql_with_master.inc;
+
+connection slave;
+# Print table again
+echo "Table after error fixed";
+select * from t1;
+
+# Cleanup
+connection master;
+drop table t1;
+eval set global rocksdb_unsafe_for_binlog=$old_rocksdb_unsafe_for_binlog;
+sync_slave_with_master;
+
+connection slave;
+eval set global rpl_skip_tx_api=$old_rpl_skip_tx_api;
+
+-- source include/rpl_end.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb_rpl/t/singledelete_idempotent_recovery.cnf b/storage/rocksdb/mysql-test/rocksdb_rpl/t/singledelete_idempotent_recovery.cnf
new file mode 100644
index 00000000000..71e124adc81
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_rpl/t/singledelete_idempotent_recovery.cnf
@@ -0,0 +1,15 @@
+!include suite/rpl/my.cnf
+
+[mysqld.1]
+log_slave_updates
+gtid_mode=ON
+enforce_gtid_consistency=ON
+
+[mysqld.2]
+relay_log_recovery=1
+relay_log_info_repository=FILE
+log_slave_updates
+gtid_mode=ON
+enforce_gtid_consistency=ON
+slave_use_idempotent_for_recovery=Yes
+
diff --git a/storage/rocksdb/mysql-test/rocksdb_rpl/t/singledelete_idempotent_recovery.test b/storage/rocksdb/mysql-test/rocksdb_rpl/t/singledelete_idempotent_recovery.test
new file mode 100644
index 00000000000..6d953ead4e9
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_rpl/t/singledelete_idempotent_recovery.test
@@ -0,0 +1,78 @@
+
+--source include/have_binlog_format_row.inc
+--source include/have_rocksdb.inc
+--source include/master-slave.inc
+--source include/have_gtid.inc
+--source include/not_valgrind.inc
+
+# This is a test case for issue#655 -- SingleDelete on Primary Key may
+# cause extra rows than Secondary Keys
+
+call mtr.add_suppression("Recovery from master pos");
+
+connection master;
+--disable_warnings
+drop table if exists r1;
+--enable_warnings
+create table r1 (id1 int, id2 int, primary key (id1, id2), index i (id2)) engine=rocksdb;
+insert into r1 values (1, 1000);
+
+sync_slave_with_master;
+connection slave;
+set global rocksdb_force_flush_memtable_now=1;
+--let slave_data_dir= query_get_value(SELECT @@DATADIR, @@DATADIR, 1)
+--let slave_binlog_file= query_get_value(SHOW MASTER STATUS, File, 1)
+--let slave_pid_file= query_get_value(SELECT @@pid_file, @@pid_file, 1)
+--exec echo "wait" > $MYSQLTEST_VARDIR/tmp/mysqld.2.expect
+
+--write_file $MYSQL_TMP_DIR/truncate_tail_binlog.sh
+#!/bin/bash
+
+F=$slave_data_dir/$slave_binlog_file
+SIZE=`stat -c %s $F`
+NEW_SIZE=`expr $SIZE - 100`
+truncate -s $NEW_SIZE $F
+rc=$?
+if [[ $rc != 0 ]]; then
+ exit 1
+fi
+
+kill -9 `head -1 $slave_pid_file`
+
+exit 0
+EOF
+--chmod 0755 $MYSQL_TMP_DIR/truncate_tail_binlog.sh
+--exec $MYSQL_TMP_DIR/truncate_tail_binlog.sh
+
+--exec echo "wait" > $MYSQLTEST_VARDIR/tmp/mysqld.2.expect
+
+# Crash recovery (losing some binlogs) with slave_use_idempotent_for_recovery may
+# replay same transactions with slave_exec_mode=idempotent implicitly enabled.
+# On slave, the last insert is converted to update with the same key.
+# It should be treated as SD and Put (same as singledelete_idempotent_table.test).
+
+--source include/rpl_start_server.inc
+--source include/start_slave.inc
+
+# Due to the binlogs being truncated, the slave may still think it's processed up to
+# the truncated binlog and select master_pos_wait() can return prematurely. Add
+# a new transaction to the master to force master_pos_wait() to wait.
+connection master;
+insert into r1 values (2,2000);
+sync_slave_with_master;
+
+connection slave;
+delete r1 from r1 force index (i) where id2=1000;
+select id1,id2 from r1 force index (primary) where id1=1 and id2=1000;
+select id2 from r1 force index (i) where id1=1 and id2=1000;
+set global rocksdb_compact_cf='default';
+select id1,id2 from r1 force index (primary) where id1=1 and id2=1000;
+select id2 from r1 force index (i) where id1=1 and id2=1000;
+
+connection master;
+drop table r1;
+
+--remove_file $MYSQL_TMP_DIR/truncate_tail_binlog.sh
+--source include/rpl_end.inc
+
+
diff --git a/storage/rocksdb/mysql-test/rocksdb_rpl/t/singledelete_idempotent_table.cnf b/storage/rocksdb/mysql-test/rocksdb_rpl/t/singledelete_idempotent_table.cnf
new file mode 100644
index 00000000000..5f1f87d762f
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_rpl/t/singledelete_idempotent_table.cnf
@@ -0,0 +1,11 @@
+!include suite/rpl/my.cnf
+
+[mysqld.1]
+log_slave_updates
+
+[mysqld.2]
+relay_log_recovery=1
+#relay_log_info_repository=FILE
+log_slave_updates
+#rbr_idempotent_tables='r1'
+slave_exec_mode=IDEMPOTENT
diff --git a/storage/rocksdb/mysql-test/rocksdb_rpl/t/singledelete_idempotent_table.test b/storage/rocksdb/mysql-test/rocksdb_rpl/t/singledelete_idempotent_table.test
new file mode 100644
index 00000000000..00dce7c2ca9
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_rpl/t/singledelete_idempotent_table.test
@@ -0,0 +1,44 @@
+
+--source include/have_binlog_format_row.inc
+--source include/have_rocksdb.inc
+--source include/master-slave.inc
+#--source include/have_gtid.inc
+--source include/not_valgrind.inc
+
+# This is a test case for issue#655 -- SingleDelete on Primary Key may
+# cause extra rows than Secondary Keys
+
+connection master;
+--disable_warnings
+drop table if exists r1;
+--enable_warnings
+create table r1 (id1 int, id2 int, primary key (id1, id2), index i (id2)) engine=rocksdb;
+insert into r1 values (1, 1000);
+set sql_log_bin=0;
+delete from r1 where id1=1 and id2=1000;
+set sql_log_bin=1;
+
+sync_slave_with_master;
+connection slave;
+set global rocksdb_force_flush_memtable_now=1;
+
+connection master;
+# same key insert on slave. Since slave sets rbr_idempotent_tables, the insert
+# is converted to update with the same key. MyRocks should call SD and Put for the key
+insert into r1 values (1, 1000);
+sync_slave_with_master;
+
+connection slave;
+delete r1 from r1 force index (i) where id2=1000;
+select id1,id2 from r1 force index (primary);
+select id2 from r1 force index (i);
+set global rocksdb_compact_cf='default';
+select id1,id2 from r1 force index (primary);
+select id2 from r1 force index (i);
+
+connection master;
+drop table r1;
+
+--source include/rpl_end.inc
+
+
diff --git a/storage/rocksdb/mysql-test/rocksdb_stress/combinations b/storage/rocksdb/mysql-test/rocksdb_stress/combinations
new file mode 100644
index 00000000000..b7316c71485
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_stress/combinations
@@ -0,0 +1,5 @@
+[write_committed]
+rocksdb_write_policy=write_committed
+
+[write_prepared]
+rocksdb_write_policy=write_prepared
diff --git a/storage/rocksdb/mysql-test/rocksdb_stress/include/have_rocksdb.inc b/storage/rocksdb/mysql-test/rocksdb_stress/include/have_rocksdb.inc
new file mode 100644
index 00000000000..1f762d38c64
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_stress/include/have_rocksdb.inc
@@ -0,0 +1,10 @@
+if (`SELECT COUNT(*) = 0 FROM INFORMATION_SCHEMA.ENGINES WHERE engine = 'rocksdb' AND support IN ('YES', 'DEFAULT', 'ENABLED')`)
+{
+ --skip Test requires engine RocksDB.
+}
+
+--disable_query_log
+# Table statistics can vary depending on when the memtables are flushed, so
+# flush them at the beginning of the test to ensure the test runs consistently.
+set global rocksdb_force_flush_memtable_now = true;
+--enable_query_log
diff --git a/storage/rocksdb/mysql-test/rocksdb_stress/include/have_rocksdb.opt b/storage/rocksdb/mysql-test/rocksdb_stress/include/have_rocksdb.opt
new file mode 100644
index 00000000000..36d7dda1609
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_stress/include/have_rocksdb.opt
@@ -0,0 +1,12 @@
+--loose-enable-rocksdb
+--loose-enable-rocksdb_global_info
+--loose-enable-rocksdb_ddl
+--loose-enable-rocksdb_cf_options
+--loose-enable_rocksdb_perf_context
+--loose-enable_rocksdb_perf_context_global
+--loose-enable-rocksdb_index_file_map
+--loose-enable-rocksdb_dbstats
+--loose-enable-rocksdb_cfstats
+--loose-enable-rocksdb_lock_info
+--loose-enable-rocksdb_trx
+--loose-enable-rocksdb_locks
diff --git a/storage/rocksdb/mysql-test/rocksdb_stress/include/rocksdb_stress.inc b/storage/rocksdb/mysql-test/rocksdb_stress/include/rocksdb_stress.inc
new file mode 100644
index 00000000000..3838987bf88
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_stress/include/rocksdb_stress.inc
@@ -0,0 +1,57 @@
+# Run the load generator to populate the table and generate concurrent
+# updates. After the load generator is complete, verify the tables on the
+# master and the slave are consistent
+
+--sync_slave_with_master
+
+--connection master
+--let $master_host = 127.0.0.1
+let $MYSQL_BASEDIR = `SELECT @@BASEDIR`;
+
+let $exec =
+ python
+ $MYSQL_BASEDIR/storage/rocksdb/mysql-test/rocksdb_stress/t/load_generator.py \
+ -L $MYSQL_TMP_DIR/load_generator.log -H $master_host -t $table \
+ -P $MASTER_MYPORT -n $num_records -m $max_records \
+ -l $num_loaders -c $num_checkers -r $num_requests \
+ -E $MYSQLTEST_VARDIR/tmp/mysqld.1.expect \
+ -D $reap_delay;
+
+exec $exec;
+
+enable_reconnect;
+source include/wait_until_connected_again.inc;
+
+--let $master_checksum = query_get_value(CHECKSUM TABLE $table, Checksum, 1)
+
+# if sync_slave_with_master had a configurable timeout this would not be needed
+let $slave_sync_timeout = 7200;
+--source include/wait_for_slave_to_sync_with_master.inc
+
+--connection slave
+--let $slave_checksum = query_get_value(CHECKSUM TABLE $table, Checksum, 1)
+
+let $not_same = `SELECT $master_checksum-$slave_checksum`;
+if ($not_same)
+{
+ --die "The checksums of table $table for the master and slave do not match!"
+}
+
+# Cleanup
+--connection master
+--let $cleanup = DROP TABLE $table
+eval $cleanup;
+
+# if sync_slave_with_master had a configurable timeout this would not be needed
+let $slave_sync_timeout = 7200;
+--source include/wait_for_slave_to_sync_with_master.inc
+
+--connection slave
+--source include/stop_slave.inc
+# For stress tests sometimes the replication thread can not connect to master
+# temporarily. This is either because the master crashed and it is recovering
+# or the master is too busy and could not service the slave's requests.
+# mtr's internal check requires that there be no errors in slave status.
+# restarting replication clears the errors.
+--source include/start_slave.inc
+--source include/stop_slave.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb_stress/my.cnf b/storage/rocksdb/mysql-test/rocksdb_stress/my.cnf
new file mode 100644
index 00000000000..6597d18b060
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_stress/my.cnf
@@ -0,0 +1,9 @@
+!include include/default_my.cnf
+!include ../storage/rocksdb/mysql-test/rocksdb/my.cnf
+!include suite/rpl/my.cnf
+
+[mysqld.1]
+binlog_format=row
+
+[mysqld.2]
+binlog_format=row
diff --git a/storage/rocksdb/mysql-test/rocksdb_stress/r/rocksdb_stress.result b/storage/rocksdb/mysql-test/rocksdb_stress/r/rocksdb_stress.result
new file mode 100644
index 00000000000..9f161b18c05
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_stress/r/rocksdb_stress.result
@@ -0,0 +1,23 @@
+include/master-slave.inc
+[connection master]
+DROP TABLE IF EXISTS t1;
+CREATE TABLE t1(id INT PRIMARY KEY,
+thread_id INT NOT NULL,
+request_id BIGINT UNSIGNED NOT NULL,
+update_count INT UNSIGNED NOT NULL DEFAULT 0,
+zero_sum INT DEFAULT 0,
+msg VARCHAR(1024),
+msg_length int,
+msg_checksum varchar(128),
+auto_inc BIGINT UNSIGNED NOT NULL AUTO_INCREMENT,
+KEY(auto_inc),
+KEY msg_i(msg(255), zero_sum))
+ENGINE=RocksDB DEFAULT CHARSET=latin1 COLLATE=latin1_bin;
+stop slave;
+start slave;
+DROP TABLE t1;
+stop slave;
+start slave;
+include/stop_slave.inc
+include/start_slave.inc
+include/stop_slave.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb_stress/r/rocksdb_stress_crash.result b/storage/rocksdb/mysql-test/rocksdb_stress/r/rocksdb_stress_crash.result
new file mode 100644
index 00000000000..9f161b18c05
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_stress/r/rocksdb_stress_crash.result
@@ -0,0 +1,23 @@
+include/master-slave.inc
+[connection master]
+DROP TABLE IF EXISTS t1;
+CREATE TABLE t1(id INT PRIMARY KEY,
+thread_id INT NOT NULL,
+request_id BIGINT UNSIGNED NOT NULL,
+update_count INT UNSIGNED NOT NULL DEFAULT 0,
+zero_sum INT DEFAULT 0,
+msg VARCHAR(1024),
+msg_length int,
+msg_checksum varchar(128),
+auto_inc BIGINT UNSIGNED NOT NULL AUTO_INCREMENT,
+KEY(auto_inc),
+KEY msg_i(msg(255), zero_sum))
+ENGINE=RocksDB DEFAULT CHARSET=latin1 COLLATE=latin1_bin;
+stop slave;
+start slave;
+DROP TABLE t1;
+stop slave;
+start slave;
+include/stop_slave.inc
+include/start_slave.inc
+include/stop_slave.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb_stress/suite.opt b/storage/rocksdb/mysql-test/rocksdb_stress/suite.opt
new file mode 100644
index 00000000000..22c9d7a300e
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_stress/suite.opt
@@ -0,0 +1,2 @@
+--plugin-load=$HA_ROCKSDB_SO --default-storage-engine=rocksdb
+
diff --git a/storage/rocksdb/mysql-test/rocksdb_stress/suite.pm b/storage/rocksdb/mysql-test/rocksdb_stress/suite.pm
new file mode 100644
index 00000000000..106a1d6a0e7
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_stress/suite.pm
@@ -0,0 +1,28 @@
+package My::Suite::Rocksdb;
+
+use My::SysInfo;
+
+#
+# Note: ../rocksdb_sys_vars/suite.pm file has a similar
+# function. If you modify this file, consider modifying that one, too.
+#
+@ISA = qw(My::Suite);
+use My::Find;
+use File::Basename;
+use strict;
+
+#sub is_default { not $::opt_embedded_server }
+
+my $sst_dump=
+::mtr_exe_maybe_exists(
+ "$::bindir/storage/rocksdb$::opt_vs_config/sst_dump",
+ "$::path_client_bindir/sst_dump");
+return "RocksDB is not compiled, no sst_dump" unless $sst_dump;
+$ENV{MARIAROCKS_SST_DUMP}="$sst_dump";
+
+# Temporarily disable testing under valgrind, due to MDEV-12439
+return "RocksDB tests disabled under valgrind" if ($::opt_valgrind);
+
+
+bless { };
+
diff --git a/storage/rocksdb/mysql-test/rocksdb_stress/t/disabled.def b/storage/rocksdb/mysql-test/rocksdb_stress/t/disabled.def
new file mode 100644
index 00000000000..42b2f661a8d
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_stress/t/disabled.def
@@ -0,0 +1,2 @@
+rocksdb_stress_crash: Client scipt seems to hang on terminal input in MariaDB
+rocksdb_stress: Client scipt seems to hang on terminal input in MariaDB
diff --git a/storage/rocksdb/mysql-test/rocksdb_stress/t/load_generator.py b/storage/rocksdb/mysql-test/rocksdb_stress/t/load_generator.py
new file mode 100644
index 00000000000..c1d3e7fb81c
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_stress/t/load_generator.py
@@ -0,0 +1,1042 @@
+import cStringIO
+import array
+import hashlib
+import MySQLdb
+from MySQLdb.constants import CR
+from MySQLdb.constants import ER
+from collections import deque
+import os
+import random
+import signal
+import sys
+import threading
+import time
+import string
+import traceback
+import logging
+import argparse
+
+# This is a generic load_generator for mysqld which persists across server
+# restarts and attempts to verify both committed and uncommitted transactions
+# are persisted correctly.
+#
+# The table schema used should look something like:
+#
+# CREATE TABLE t1(id INT PRIMARY KEY,
+# thread_id INT NOT NULL,
+# request_id BIGINT UNSIGNED NOT NULL,
+# update_count INT UNSIGNED NOT NULL DEFAULT 0,
+# zero_sum INT DEFAULT 0,
+# msg VARCHAR(1024),
+# msg_length int,
+# msg_checksum varchar(128),
+# KEY msg_i(msg(255), zero_sum))
+# ENGINE=RocksDB DEFAULT CHARSET=latin1 COLLATE=latin1_bin;
+#
+# zero_sum should always sum up to 0 regardless of when the transaction tries
+# to process the transaction. Each transaction always maintain this sum to 0.
+#
+# request_id should be unique across transactions. It is used during
+# transaction verification and is monotonically increasing..
+#
+# Several threads are spawned at the start of the test to populate the table.
+# Once the table is populated, both loader and checker threads are created.
+#
+# The row id space is split into two sections: exclusive and shared. Each
+# loader thread owns some part of the exclusive section which it maintains
+# complete information on insert/updates/deletes. Since this section is only
+# modified by one thread, the thread can maintain an accurate picture of all
+# changes. The shared section contains rows which multiple threads can
+# update/delete/insert. For checking purposes, the request_id is used to
+# determine if a row is consistent with a committed transaction.
+#
+# Each loader thread's transaction consists of selecting some number of rows
+# randomly. The thread can choose to delete the row, update the row or insert
+# the row if it doesn't exist. The state of rows that are owned by the loader
+# thread are tracked within the thread's id_map. This map contains the row id
+# and the request_id of the latest update. For indicating deleted rows, the
+# -request_id marker is used. Thus, at any point in time, the thread's id_map
+# should reflect the exact state of the rows that are owned.
+#
+# The loader thread also maintains the state of older transactions that were
+# successfully processed in addition to the current transaction, which may or
+# may not be committed. Each transaction state consists of the row id, and the
+# request_id. Again, -request_id is used to indicate a delete. For committed
+# transactions, the thread can verify the request_id of the row is larger than
+# what the thread has recorded. For uncommitted transactions, the thread would
+# verify the request_id of the row does not match that of the transaction. To
+# determine whether or not a transaction succeeded in case of a crash right at
+# commit, each thread always includes a particular row in the transaction which
+# it could use to check the request id against.
+#
+# Checker threads run continuously to verify the checksums on the rows and to
+# verify the zero_sum column sums up to zero at any point in time. The checker
+# threads run both point lookups and range scans for selecting the rows.
+
+class ValidateError(Exception):
+ """Raised when validation fails"""
+ pass
+
+class TestError(Exception):
+ """Raised when the test cannot make forward progress"""
+ pass
+
+CHARS = string.letters + string.digits
+OPTIONS = {}
+
+# max number of rows per transaction
+MAX_ROWS_PER_REQ = 10
+
+# global variable checked by threads to determine if the test is stopping
+TEST_STOP = False
+LOADERS_READY = 0
+
+# global monotonically increasing request id counter
+REQUEST_ID = 1
+REQUEST_ID_LOCK = threading.Lock()
+
+INSERT_ID_SET = set()
+
+def get_next_request_id():
+ global REQUEST_ID
+ with REQUEST_ID_LOCK:
+ REQUEST_ID += 1
+ return REQUEST_ID
+
+# given a percentage value, rolls a 100-sided die and return whether the
+# given value is above or equal to the die roll
+#
+# passing 0 should always return false and 100 should always return true
+def roll_d100(p):
+ assert p >= 0 and p <= 100
+ return p >= random.randint(1, 100)
+
+def sha1(x):
+ return hashlib.sha1(str(x)).hexdigest()
+
+def is_connection_error(exc):
+ error_code = exc.args[0]
+ return (error_code == MySQLdb.constants.CR.CONNECTION_ERROR or
+ error_code == MySQLdb.constants.CR.CONN_HOST_ERROR or
+ error_code == MySQLdb.constants.CR.SERVER_LOST or
+ error_code == MySQLdb.constants.CR.SERVER_GONE_ERROR or
+ error_code == MySQLdb.constants.ER.QUERY_INTERRUPTED or
+ error_code == MySQLdb.constants.ER.SERVER_SHUTDOWN)
+
+def is_deadlock_error(exc):
+ error_code = exc.args[0]
+ return (error_code == MySQLdb.constants.ER.LOCK_DEADLOCK or
+ error_code == MySQLdb.constants.ER.LOCK_WAIT_TIMEOUT)
+
+# should be deterministic given an idx
+def gen_msg(idx, thread_id, request_id):
+ random.seed(idx);
+ # field length is 1024 bytes, but 32 are reserved for the tid and req tag
+ blob_length = random.randint(1, 1024 - 32)
+
+ if roll_d100(50):
+ # blob that cannot be compressed (well, compresses to 85% of original size)
+ msg = ''.join([random.choice(CHARS) for x in xrange(blob_length)])
+ else:
+ # blob that can be compressed
+ msg = random.choice(CHARS) * blob_length
+
+ # append the thread_id and request_id to the end of the msg
+ return ''.join([msg, ' tid: %d req: %d' % (thread_id, request_id)])
+
+def execute(cur, stmt):
+ ROW_COUNT_ERROR = 18446744073709551615L
+ logging.debug("Executing %s" % stmt)
+ cur.execute(stmt)
+ if cur.rowcount < 0 or cur.rowcount == ROW_COUNT_ERROR:
+ raise MySQLdb.OperationalError(MySQLdb.constants.CR.CONNECTION_ERROR,
+ "Possible connection error, rowcount is %d"
+ % cur.rowcount)
+
+def wait_for_workers(workers, min_active = 0):
+ logging.info("Waiting for %d workers", len(workers))
+ # min_active needs to include the current waiting thread
+ min_active += 1
+
+ # polling here allows this thread to be responsive to keyboard interrupt
+ # exceptions, otherwise a user hitting ctrl-c would see the load_generator as
+ # hanging and unresponsive
+ try:
+ while threading.active_count() > min_active:
+ time.sleep(1)
+ except KeyboardInterrupt, e:
+ os._exit(1)
+
+ num_failures = 0
+ for w in workers:
+ w.join()
+ if w.exception:
+ logging.error(w.exception)
+ num_failures += 1
+
+ return num_failures
+
+# base class for worker threads and contains logic for handling reconnecting to
+# the mysqld server during connection failure
+class WorkerThread(threading.Thread):
+ def __init__(self, name):
+ threading.Thread.__init__(self)
+ self.name = name
+ self.exception = None
+ self.con = None
+ self.cur = None
+ self.isolation_level = None
+ self.start_time = time.time()
+ self.total_time = 0
+
+ def run(self):
+ global TEST_STOP
+
+ try:
+ logging.info("Started")
+ self.runme()
+ logging.info("Completed successfully")
+ except Exception, e:
+ self.exception = traceback.format_exc()
+ logging.error(self.exception)
+ TEST_STOP = True
+ finally:
+ self.total_time = time.time() - self.start_time
+ logging.info("Total run time: %.2f s" % self.total_time)
+ self.finish()
+
+ def reconnect(self, timeout=900):
+ global TEST_STOP
+
+ self.con = None
+ SECONDS_BETWEEN_RETRY = 10
+ attempts = 1
+ logging.info("Attempting to connect to MySQL Server")
+ while not self.con and timeout > 0 and not TEST_STOP:
+ try:
+ self.con = MySQLdb.connect(user=OPTIONS.user, host=OPTIONS.host,
+ port=OPTIONS.port, db=OPTIONS.db)
+ if self.con:
+ self.con.autocommit(False)
+ self.cur = self.con.cursor()
+ self.set_isolation_level(self.isolation_level)
+ logging.info("Connection successful after attempt %d" % attempts)
+ break
+ except MySQLdb.Error, e:
+ logging.debug(traceback.format_exc())
+ time.sleep(SECONDS_BETWEEN_RETRY)
+ timeout -= SECONDS_BETWEEN_RETRY
+ attempts += 1
+ return self.con is None
+
+ def get_isolation_level(self):
+ execute(self.cur, "SELECT @@SESSION.tx_isolation")
+ if self.cur.rowcount != 1:
+ raise TestError("Unable to retrieve tx_isolation")
+ return self.cur.fetchone()[0]
+
+ def set_isolation_level(self, isolation_level, persist = False):
+ if isolation_level is not None:
+ execute(self.cur, "SET @@SESSION.tx_isolation = '%s'" % isolation_level)
+ if self.cur.rowcount != 0:
+ raise TestError("Unable to set the isolation level to %s")
+
+ if isolation_level is None or persist:
+ self.isolation_level = isolation_level
+
+# periodically kills the server
+class ReaperWorker(WorkerThread):
+ def __init__(self):
+ WorkerThread.__init__(self, 'reaper')
+ self.start()
+ self.kills = 0
+
+ def finish(self):
+ logging.info('complete with %d kills' % self.kills)
+ if self.con:
+ self.con.close()
+
+ def get_server_pid(self):
+ execute(self.cur, "SELECT @@pid_file")
+ if self.cur.rowcount != 1:
+ raise TestError("Unable to retrieve pid_file")
+ return int(open(self.cur.fetchone()[0]).read())
+
+ def runme(self):
+ global TEST_STOP
+ time_remain = random.randint(10, 30)
+ while not TEST_STOP:
+ if time_remain > 0:
+ time_remain -= 1
+ time.sleep(1)
+ continue
+ if self.reconnect():
+ raise Exception("Unable to connect to MySQL server")
+ logging.info('killing server...')
+ with open(OPTIONS.expect_file, 'w+') as expect_file:
+ expect_file.write('restart')
+ os.kill(self.get_server_pid(), signal.SIGTERM)
+ self.kills += 1
+ time_remain = random.randint(0, 30) + OPTIONS.reap_delay;
+
+# runs initially to populate the table with the given number of rows
+class PopulateWorker(WorkerThread):
+ def __init__(self, thread_id, start_id, num_to_add):
+ WorkerThread.__init__(self, 'populate-%d' % thread_id)
+ self.thread_id = thread_id
+ self.start_id = start_id
+ self.num_to_add = num_to_add
+ self.table = OPTIONS.table
+ self.start()
+
+ def finish(self):
+ if self.con:
+ self.con.commit()
+ self.con.close()
+
+ def runme(self):
+ if self.reconnect():
+ raise Exception("Unable to connect to MySQL server")
+
+ stmt = None
+ for i in xrange(self.start_id, self.start_id + self.num_to_add):
+ stmt = gen_insert(self.table, i, 0, 0, 0)
+ execute(self.cur, stmt)
+ if i % 101 == 0:
+ self.con.commit()
+ check_id(self.con.insert_id())
+ self.con.commit()
+ check_id(self.con.insert_id())
+ logging.info("Inserted %d rows starting at id %d" %
+ (self.num_to_add, self.start_id))
+
+def check_id(id):
+ if id == 0:
+ return
+ if id in INSERT_ID_SET:
+ raise Exception("Duplicate auto_inc id %d" % id)
+ INSERT_ID_SET.add(id)
+
+def populate_table(num_records):
+
+ logging.info("Populate_table started for %d records" % num_records)
+ if num_records == 0:
+ return False
+
+ num_workers = min(10, num_records / 100)
+ workers = []
+
+ N = num_records / num_workers
+ start_id = 0
+ for i in xrange(num_workers):
+ workers.append(PopulateWorker(i, start_id, N))
+ start_id += N
+ if num_records > start_id:
+ workers.append(PopulateWorker(num_workers, start_id,
+ num_records - start_id))
+
+ # Wait for the populate threads to complete
+ return wait_for_workers(workers) > 0
+
+def gen_insert(table, idx, thread_id, request_id, zero_sum):
+ msg = gen_msg(idx, thread_id, request_id)
+ return ("INSERT INTO %s (id, thread_id, request_id, zero_sum, "
+ "msg, msg_length, msg_checksum) VALUES (%d,%d,%d,%d,'%s',%d,'%s')"
+ % (table, idx, thread_id, request_id,
+ zero_sum, msg, len(msg), sha1(msg)))
+
+def gen_update(table, idx, thread_id, request_id, zero_sum):
+ msg = gen_msg(idx, thread_id, request_id)
+ return ("UPDATE %s SET thread_id = %d, request_id = %d, "
+ "update_count = update_count + 1, zero_sum = zero_sum + (%d), "
+ "msg = '%s', msg_length = %d, msg_checksum = '%s' WHERE id = %d "
+ % (table, thread_id, request_id, zero_sum, msg, len(msg),
+ sha1(msg), idx))
+
+def gen_delete(table, idx):
+ return "DELETE FROM %s WHERE id = %d" % (table, idx)
+
+def gen_insert_on_dup(table, idx, thread_id, request_id, zero_sum):
+ msg = gen_msg(idx, thread_id, request_id)
+ msg_checksum = sha1(msg)
+ return ("INSERT INTO %s (id, thread_id, request_id, zero_sum, "
+ "msg, msg_length, msg_checksum) VALUES (%d,%d,%d,%d,'%s',%d,'%s') "
+ "ON DUPLICATE KEY UPDATE "
+ "thread_id=%d, request_id=%d, "
+ "update_count=update_count+1, "
+ "zero_sum=zero_sum + (%d), msg='%s', msg_length=%d, "
+ "msg_checksum='%s'" %
+ (table, idx, thread_id, request_id,
+ zero_sum, msg, len(msg), msg_checksum, thread_id, request_id,
+ zero_sum, msg, len(msg), msg_checksum))
+
+# Each loader thread owns a part of the id space which it maintains inventory
+# for. The loader thread generates inserts, updates and deletes for the table.
+# The latest successful transaction and the latest open transaction are kept to
+# verify after a disconnect that the rows were recovered properly.
+class LoadGenWorker(WorkerThread):
+ TXN_UNCOMMITTED = 0
+ TXN_COMMIT_STARTED = 1
+ TXN_COMMITTED = 2
+
+ def __init__(self, thread_id):
+ WorkerThread.__init__(self, 'loader-%02d' % thread_id)
+ self.thread_id = thread_id
+ self.rand = random.Random()
+ self.rand.seed(thread_id)
+ self.loop_num = 0
+
+ # id_map contains the array of id's owned by this worker thread. It needs
+ # to be offset by start_id for the actual id
+ self.id_map = array.array('l')
+ self.start_id = thread_id * OPTIONS.ids_per_loader
+ self.num_id = OPTIONS.ids_per_loader
+ self.start_share_id = OPTIONS.num_loaders * OPTIONS.ids_per_loader
+ self.max_id = OPTIONS.max_id
+ self.table = OPTIONS.table
+ self.num_requests = OPTIONS.num_requests
+
+ # stores information about the latest series of successful transactions
+ #
+ # each transaction is simply a map of id -> request_id
+ # deleted rows are indicated by -request_id
+ self.prev_txn = deque()
+ self.cur_txn = None
+ self.cur_txn_state = None
+
+ self.start()
+
+ def finish(self):
+ if self.total_time:
+ req_per_sec = self.loop_num / self.total_time
+ else:
+ req_per_sec = -1
+ logging.info("total txns: %d, txn/s: %.2f rps" %
+ (self.loop_num, req_per_sec))
+
+ # constructs the internal hash map of the ids owned by this thread and
+ # the request id of each id
+ def populate_id_map(self):
+ logging.info("Populating id map")
+
+ REQ_ID_COL = 0
+ stmt = "SELECT request_id FROM %s WHERE id = %d"
+
+ # the start_id is used for tracking active transactions, so the row needs
+ # to exist
+ idx = self.start_id
+ execute(self.cur, stmt % (self.table, idx))
+ if self.cur.rowcount > 0:
+ request_id = self.cur.fetchone()[REQ_ID_COL]
+ else:
+ request_id = get_next_request_id()
+ execute(self.cur, gen_insert(self.table, idx, self.thread_id,
+ request_id, 0))
+ self.con.commit()
+ check_id(self.con.insert_id())
+
+ self.id_map.append(request_id)
+
+ self.cur_txn = {idx:request_id}
+ self.cur_txn_state = self.TXN_COMMITTED
+ for i in xrange(OPTIONS.committed_txns):
+ self.prev_txn.append(self.cur_txn)
+
+ # fetch the rest of the row for the id space owned by this thread
+ for idx in xrange(self.start_id + 1, self.start_id + self.num_id):
+ execute(self.cur, stmt % (self.table, idx))
+ if self.cur.rowcount == 0:
+ # Negative number is used to indicated a missing row
+ self.id_map.append(-1)
+ else:
+ res = self.cur.fetchone()
+ self.id_map.append(res[REQ_ID_COL])
+
+ self.con.commit()
+
+ def apply_cur_txn_changes(self):
+ # apply the changes to the id_map
+ for idx in self.cur_txn:
+ if idx < self.start_id + self.num_id:
+ assert idx >= self.start_id
+ self.id_map[idx - self.start_id] = self.cur_txn[idx]
+ self.cur_txn_state = self.TXN_COMMITTED
+
+ self.prev_txn.append(self.cur_txn)
+ self.prev_txn.popleft()
+
+ def verify_txn(self, txn, committed):
+ request_id = txn[self.start_id]
+ if not committed:
+ # if the transaction was not committed, then there should be no rows
+ # in the table that have this request_id
+ cond = '='
+ # it is possible the start_id used to track this transaction is in
+ # the process of being deleted
+ if request_id < 0:
+ request_id = -request_id
+ else:
+ # if the transaction was committed, then no rows modified by this
+ # transaction should have a request_id less than this transaction's id
+ cond = '<'
+ stmt = ("SELECT COUNT(*) FROM %s WHERE id IN (%s) AND request_id %s %d" %
+ (self.table, ','.join(str(x) for x in txn), cond, request_id))
+ execute(self.cur, stmt)
+ if (self.cur.rowcount != 1):
+ raise TestError("Unable to retrieve results for query '%s'" % stmt)
+ count = self.cur.fetchone()[0]
+ if (count > 0):
+ raise TestError("Expected '%s' to return 0 rows, but %d returned "
+ "instead" % (stmt, count))
+ self.con.commit()
+
+ def verify_data(self):
+ # if the state of the current transaction is unknown (i.e. a commit was
+ # issued, but the connection failed before, check the start_id row to
+ # determine if it was committed
+ request_id = self.cur_txn[self.start_id]
+ if self.cur_txn_state == self.TXN_COMMIT_STARTED:
+ assert request_id >= 0
+ idx = self.start_id
+ stmt = "SELECT id, request_id FROM %s where id = %d" % (self.table, idx)
+ execute(self.cur, stmt)
+ if (self.cur.rowcount == 0):
+ raise TestError("Fetching start_id %d via '%s' returned no data! "
+ "This row should never be deleted!" % (idx, stmt))
+ REQUEST_ID_COL = 1
+ res = self.cur.fetchone()
+ if res[REQUEST_ID_COL] == self.cur_txn[idx]:
+ self.apply_cur_txn_changes()
+ else:
+ self.cur_txn_state = self.TXN_UNCOMMITTED
+ self.con.commit()
+
+ # if the transaction was not committed, verify there are no rows at this
+ # request id
+ #
+ # however, if the transaction was committed, then verify none of the rows
+ # have a request_id below the request_id recorded by the start_id row.
+ if self.cur_txn_state == self.TXN_UNCOMMITTED:
+ self.verify_txn(self.cur_txn, False)
+
+ # verify all committed transactions
+ for txn in self.prev_txn:
+ self.verify_txn(txn, True)
+
+ # verify the rows owned by this worker matches the request_id at which
+ # they were set.
+ idx = self.start_id
+ max_map_id = self.start_id + self.num_id
+ row_count = 0
+ ID_COL = 0
+ REQ_ID_COL = ID_COL + 1
+
+ while idx < max_map_id:
+ if (row_count == 0):
+ num_rows_to_check = random.randint(50, 100)
+ execute(self.cur,
+ "SELECT id, request_id FROM %s where id >= %d and id < %d "
+ "ORDER BY id LIMIT %d"
+ % (self.table, idx, max_map_id, num_rows_to_check))
+
+ # prevent future queries from being issued since we've hit the end of
+ # the rows that exist in the table
+ row_count = self.cur.rowcount if self.cur.rowcount != 0 else -1
+
+ # determine the id of the next available row in the table
+ if (row_count > 0):
+ res = self.cur.fetchone()
+ assert idx <= res[ID_COL]
+ next_id = res[ID_COL]
+ row_count -= 1
+ else:
+ next_id = max_map_id
+
+ # rows up to the next id don't exist within the table, verify our
+ # map has them as removed
+ while idx < next_id:
+ # see if the latest transaction may have modified this id. If so, use
+ # that value.
+ if self.id_map[idx - self.start_id] >= 0:
+ raise ValidateError("Row id %d was not found in table, but "
+ "id_map has it at request_id %d" %
+ (idx, self.id_map[idx - self.start_id]))
+ idx += 1
+
+ if idx == max_map_id:
+ break
+
+ if (self.id_map[idx - self.start_id] != res[REQ_ID_COL]):
+ raise ValidateError("Row id %d has req id %d, but %d is the "
+ "expected value!" %
+ (idx, res[REQ_ID_COL],
+ self.id_map[idx - self.start_id]))
+ idx += 1
+
+ self.con.commit()
+ logging.debug("Verified data successfully")
+
+ def execute_one(self):
+ # select a number of rows; perform an insert; update or delete operation on
+ # them
+ num_rows = random.randint(1, MAX_ROWS_PER_REQ)
+ ids = array.array('L')
+
+ # allocate at least one row in the id space owned by this worker
+ idx = random.randint(self.start_id, self.start_id + self.num_id - 1)
+ ids.append(idx)
+
+ for i in xrange(1, num_rows):
+ # The valid ranges for ids is from start_id to start_id + num_id and from
+ # start_share_id to max_id. The randint() uses the range from
+ # start_share_id to max_id + num_id - 1. start_share_id to max_id covers
+ # the shared range. The exclusive range is covered by max_id to max_id +
+ # num_id - 1. If any number lands in this >= max_id section, it is
+ # remapped to start_id and used for selecting a row in the exclusive
+ # section.
+ idx = random.randint(self.start_share_id, self.max_id + self.num_id - 1)
+ if idx >= self.max_id:
+ idx -= self.max_id - self.start_id
+ if ids.count(idx) == 0:
+ ids.append(idx)
+
+ # perform a read of these rows
+ ID_COL = 0
+ ZERO_SUM_COL = ID_COL + 1
+
+ # For repeatable-read isolation levels on MyRocks, during the lock
+ # acquisition part of this transaction, it is possible the selected rows
+ # conflict with another thread's transaction. This results in a deadlock
+ # error that requires the whole transaction to be rolled back because the
+ # transaction's current snapshot will always be reading an older version of
+ # the row. MyRocks will prevent any updates to this row until the
+ # snapshot is released and re-acquired.
+ NUM_RETRIES = 100
+ for i in xrange(NUM_RETRIES):
+ ids_found = {}
+ try:
+ for idx in ids:
+ stmt = ("SELECT id, zero_sum FROM %s WHERE id = %d "
+ "FOR UPDATE" % (self.table, idx))
+ execute(self.cur, stmt)
+ if self.cur.rowcount > 0:
+ res = self.cur.fetchone()
+ ids_found[res[ID_COL]] = res[ZERO_SUM_COL]
+ break
+ except MySQLdb.OperationalError, e:
+ if not is_deadlock_error(e):
+ raise e
+
+ # if a deadlock occurred, rollback the transaction and wait a short time
+ # before retrying.
+ logging.debug("%s generated deadlock, retry %d of %d" %
+ (stmt, i, NUM_RETRIES))
+ self.con.rollback()
+ time.sleep(0.2)
+
+ if i == NUM_RETRIES - 1:
+ raise TestError("Unable to acquire locks after a number of retries "
+ "for query '%s'" % stmt)
+
+ # ensure that the zero_sum column remains summed up to zero at the
+ # end of this operation
+ current_sum = 0
+
+ # all row locks acquired at this point, so allocate a request_id
+ request_id = get_next_request_id()
+ self.cur_txn = {self.start_id:request_id}
+ self.cur_txn_state = self.TXN_UNCOMMITTED
+
+ for idx in ids:
+ stmt = None
+ zero_sum = self.rand.randint(-1000, 1000)
+ action = self.rand.randint(0, 3)
+ is_delete = False
+
+ if idx in ids_found:
+ # for each row found, determine if it should be updated or deleted
+ if action == 0:
+ stmt = gen_delete(self.table, idx)
+ is_delete = True
+ current_sum -= ids_found[idx]
+ else:
+ stmt = gen_update(self.table, idx, self.thread_id, request_id,
+ zero_sum)
+ current_sum += zero_sum
+ else:
+ # if it does not exist, then determine if an insert should happen
+ if action <= 1:
+ stmt = gen_insert(self.table, idx, self.thread_id, request_id,
+ zero_sum)
+ current_sum += zero_sum
+
+ if stmt is not None:
+ # mark in self.cur_txn what these new changes will be
+ if is_delete:
+ self.cur_txn[idx] = -request_id
+ else:
+ self.cur_txn[idx] = request_id
+ execute(self.cur, stmt)
+ if self.cur.rowcount == 0:
+ raise TestError("Executing %s returned row count of 0!" % stmt)
+
+ # the start_id row is used to determine if this transaction has been
+ # committed if the connect fails and it is used to adjust the zero_sum
+ # correctly
+ idx = self.start_id
+ ids.append(idx)
+ self.cur_txn[idx] = request_id
+ stmt = gen_insert_on_dup(self.table, idx, self.thread_id, request_id,
+ -current_sum)
+ execute(self.cur, stmt)
+ if self.cur.rowcount == 0:
+ raise TestError("Executing '%s' returned row count of 0!" % stmt)
+
+ # 90% commit, 10% rollback
+ if roll_d100(90):
+ self.con.rollback()
+ logging.debug("request %s was rolled back" % request_id)
+ else:
+ self.cur_txn_state = self.TXN_COMMIT_STARTED
+ self.con.commit()
+ check_id(self.con.insert_id())
+ if not self.con.get_server_info():
+ raise MySQLdb.OperationalError(MySQLdb.constants.CR.CONNECTION_ERROR,
+ "Possible connection error on commit")
+ self.apply_cur_txn_changes()
+
+ self.loop_num += 1
+ if self.loop_num % 1000 == 0:
+ logging.info("Processed %d transactions so far" % self.loop_num)
+
+ def runme(self):
+ global TEST_STOP, LOADERS_READY
+
+ self.start_time = time.time()
+ if self.reconnect():
+ raise Exception("Unable to connect to MySQL server")
+
+ self.populate_id_map()
+ self.verify_data()
+
+ logging.info("Starting load generator")
+ reconnected = False
+ LOADERS_READY += 1
+
+ while self.loop_num < self.num_requests and not TEST_STOP:
+ try:
+ # verify our data on each reconnect and also on ocassion
+ if reconnected or random.randint(1, 500) == 1:
+ self.verify_data()
+ reconnected = False
+
+ self.execute_one()
+ self.loop_num += 1
+ except MySQLdb.OperationalError, e:
+ if not is_connection_error(e):
+ raise e
+ if self.reconnect():
+ raise Exception("Unable to connect to MySQL server")
+ reconnected = True
+ return
+
+# the checker thread is running read only transactions to verify the row
+# checksums match the message.
+class CheckerWorker(WorkerThread):
+ def __init__(self, thread_id):
+ WorkerThread.__init__(self, 'checker-%02d' % thread_id)
+ self.thread_id = thread_id
+ self.rand = random.Random()
+ self.rand.seed(thread_id)
+ self.max_id = OPTIONS.max_id
+ self.table = OPTIONS.table
+ self.loop_num = 0
+ self.start()
+
+ def finish(self):
+ logging.info("total loops: %d" % self.loop_num)
+
+ def check_zerosum(self):
+ # two methods for checking zero sum
+ # 1. request the server to do it (90% of the time for now)
+ # 2. read all rows and calculate directly
+ if roll_d100(90):
+ stmt = "SELECT SUM(zero_sum) FROM %s" % self.table
+ if roll_d100(50):
+ stmt += " FORCE INDEX(msg_i)"
+ execute(self.cur, stmt)
+
+ if self.cur.rowcount != 1:
+ raise ValidateError("Error with query '%s'" % stmt)
+ res = self.cur.fetchone()[0]
+ if res != 0:
+ raise ValidateError("Expected zero_sum to be 0, but %d returned "
+ "instead" % res)
+ else:
+ cur_isolation_level = self.get_isolation_level()
+ self.set_isolation_level('REPEATABLE-READ')
+ num_rows_to_check = random.randint(500, 1000)
+ idx = 0
+ sum = 0
+
+ stmt = "SELECT id, zero_sum FROM %s where id >= %d ORDER BY id LIMIT %d"
+ ID_COL = 0
+ ZERO_SUM_COL = 1
+
+ while idx < self.max_id:
+ execute(self.cur, stmt % (self.table, idx, num_rows_to_check))
+ if self.cur.rowcount == 0:
+ break
+
+ for i in xrange(self.cur.rowcount - 1):
+ sum += self.cur.fetchone()[ZERO_SUM_COL]
+
+ last_row = self.cur.fetchone()
+ idx = last_row[ID_COL] + 1
+ sum += last_row[ZERO_SUM_COL]
+
+ if sum != 0:
+ raise TestError("Zero sum column expected to total 0, but sum is %d "
+ "instead!" % sum)
+ self.set_isolation_level(cur_isolation_level)
+
+ def check_rows(self):
+ class id_range():
+ def __init__(self, min_id, min_inclusive, max_id, max_inclusive):
+ self.min_id = min_id if min_inclusive else min_id + 1
+ self.max_id = max_id if max_inclusive else max_id - 1
+ def count(self, idx):
+ return idx >= self.min_id and idx <= self.max_id
+
+ stmt = ("SELECT id, msg, msg_length, msg_checksum FROM %s WHERE " %
+ self.table)
+
+ # two methods for checking rows
+ # 1. pick a number of rows at random
+ # 2. range scan
+ if roll_d100(90):
+ ids = []
+ for i in xrange(random.randint(1, MAX_ROWS_PER_REQ)):
+ ids.append(random.randint(0, self.max_id - 1))
+ stmt += "id in (%s)" % ','.join(str(x) for x in ids)
+ else:
+ id1 = random.randint(0, self.max_id - 1)
+ id2 = random.randint(0, self.max_id - 1)
+ min_inclusive = random.randint(0, 1)
+ cond1 = '>=' if min_inclusive else '>'
+ max_inclusive = random.randint(0, 1)
+ cond2 = '<=' if max_inclusive else '<'
+ stmt += ("id %s %d AND id %s %d" %
+ (cond1, min(id1, id2), cond2, max(id1, id2)))
+ ids = id_range(min(id1, id2), min_inclusive, max(id1, id2), max_inclusive)
+
+ execute(self.cur, stmt)
+
+ ID_COL = 0
+ MSG_COL = ID_COL + 1
+ MSG_LENGTH_COL = MSG_COL + 1
+ MSG_CHECKSUM_COL = MSG_LENGTH_COL + 1
+
+ for row in self.cur.fetchall():
+ idx = row[ID_COL]
+ msg = row[MSG_COL]
+ msg_length = row[MSG_LENGTH_COL]
+ msg_checksum = row[MSG_CHECKSUM_COL]
+ if ids.count(idx) < 1:
+ raise ValidateError(
+ "id %d returned from database, but query was '%s'" % (idx, stmt))
+ if (len(msg) != msg_length):
+ raise ValidateError(
+ "id %d contains msg_length %d, but msg '%s' is only %d "
+ "characters long" % (idx, msg_length, msg, len(msg)))
+ if (sha1(msg) != msg_checksum):
+ raise ValidateError("id %d has checksum '%s', but expected checksum "
+ "is '%s'" % (idx, msg_checksum, sha1(msg)))
+
+ def runme(self):
+ global TEST_STOP
+
+ self.start_time = time.time()
+ if self.reconnect():
+ raise Exception("Unable to connect to MySQL server")
+ logging.info("Starting checker")
+
+ while not TEST_STOP:
+ try:
+ # choose one of three options:
+ # 1. compute zero_sum across all rows is 0
+ # 2. read a number of rows and verify checksums
+ if roll_d100(25):
+ self.check_zerosum()
+ else:
+ self.check_rows()
+
+ self.con.commit()
+ self.loop_num += 1
+ if self.loop_num % 10000 == 0:
+ logging.info("Processed %d transactions so far" % self.loop_num)
+ except MySQLdb.OperationalError, e:
+ if not is_connection_error(e):
+ raise e
+ if self.reconnect():
+ raise Exception("Unable to reconnect to MySQL server")
+
+if __name__ == '__main__':
+ parser = argparse.ArgumentParser(description='Concurrent load generator.')
+
+ parser.add_argument('-C, --committed-txns', dest='committed_txns',
+ default=3, type=int,
+ help="number of committed txns to verify")
+
+ parser.add_argument('-c, --num-checkers', dest='num_checkers', type=int,
+ default=4,
+ help="number of reader/checker threads to test with")
+
+ parser.add_argument('-d, --db', dest='db', default='test',
+ help="mysqld server database to test with")
+
+ parser.add_argument('-H, --host', dest='host', default='127.0.0.1',
+ help="mysqld server host ip address")
+
+ parser.add_argument('-i, --ids-per-loader', dest='ids_per_loader',
+ type=int, default=100,
+ help="number of records which each loader owns "
+ "exclusively, up to max-id / 2 / num-loaders")
+
+ parser.add_argument('-L, --log-file', dest='log_file', default=None,
+ help="log file for output")
+
+ parser.add_argument('-l, --num-loaders', dest='num_loaders', type=int,
+ default=16,
+ help="number of loader threads to test with")
+
+ parser.add_argument('-m, --max-id', dest='max_id', type=int, default=1000,
+ help="maximum number of records which the table "
+ "extends to, must be larger than ids_per_loader * "
+ "num_loaders")
+
+ parser.add_argument('-n, --num-records', dest='num_records', type=int,
+ default=0,
+ help="number of records to populate the table with")
+
+ parser.add_argument('-P, --port', dest='port', default=3307, type=int,
+ help='mysqld server host port')
+
+ parser.add_argument('-r, --num-requests', dest='num_requests', type=int,
+ default=100000000,
+ help="number of requests issued per worker thread")
+
+ parser.add_argument('-T, --truncate', dest='truncate', action='store_true',
+ help="truncates or creates the table before the test")
+
+ parser.add_argument('-t, --table', dest='table', default='t1',
+ help="mysqld server table to test with")
+
+ parser.add_argument('-u, --user', dest='user', default='root',
+ help="user to log into the mysql server")
+
+ parser.add_argument('-v, --verbose', dest='verbose', action='store_true',
+ help="enable debug logging")
+
+ parser.add_argument('-E, --expect-file', dest='expect_file', default=None,
+ help="expect file for server restart")
+
+ parser.add_argument('-D, --reap-delay', dest='reap_delay', type=int,
+ default=0,
+ help="seconds to sleep after each server reap")
+
+ OPTIONS = parser.parse_args()
+
+ if OPTIONS.verbose:
+ log_level = logging.DEBUG
+ else:
+ log_level = logging.INFO
+
+ logging.basicConfig(level=log_level,
+ format='%(asctime)s %(threadName)s [%(levelname)s] '
+ '%(message)s',
+ datefmt='%Y-%m-%d %H:%M:%S',
+ filename=OPTIONS.log_file)
+
+ logging.info("Command line given: %s" % ' '.join(sys.argv))
+
+ if (OPTIONS.max_id < 0 or OPTIONS.ids_per_loader <= 0 or
+ OPTIONS.max_id < OPTIONS.ids_per_loader * OPTIONS.num_loaders):
+ logging.error("ids-per-loader must be larger tha 0 and max-id must be "
+ "larger than ids_per_loader * num_loaders")
+ exit(1)
+
+ logging.info("Using table %s.%s for test" % (OPTIONS.db, OPTIONS.table))
+
+ if OPTIONS.truncate:
+ logging.info("Truncating table")
+ con = MySQLdb.connect(user=OPTIONS.user, host=OPTIONS.host,
+ port=OPTIONS.port, db=OPTIONS.db)
+ if not con:
+ raise TestError("Unable to connect to mysqld server to create/truncate "
+ "table")
+ cur = con.cursor()
+ cur.execute("SELECT COUNT(*) FROM INFORMATION_SCHEMA.tables WHERE "
+ "table_schema = '%s' AND table_name = '%s'" %
+ (OPTIONS.db, OPTIONS.table))
+ if cur.rowcount != 1:
+ logging.error("Unable to retrieve information about table %s "
+ "from information_schema!" % OPTIONS.table)
+ exit(1)
+
+ if cur.fetchone()[0] == 0:
+ logging.info("Table %s not found, creating a new one" % OPTIONS.table)
+ cur.execute("CREATE TABLE %s (id INT PRIMARY KEY, "
+ "thread_id INT NOT NULL, "
+ "request_id BIGINT UNSIGNED NOT NULL, "
+ "update_count INT UNSIGNED NOT NULL DEFAULT 0, "
+ "zero_sum INT DEFAULT 0, "
+ "msg VARCHAR(1024), "
+ "msg_length int, "
+ "msg_checksum varchar(128), "
+ "KEY msg_i(msg(255), zero_sum)) "
+ "ENGINE=RocksDB DEFAULT CHARSET=latin1 COLLATE=latin1_bin" %
+ OPTIONS.table)
+ else:
+ logging.info("Table %s found, truncating" % OPTIONS.table)
+ cur.execute("TRUNCATE TABLE %s" % OPTIONS.table)
+ con.commit()
+
+ if populate_table(OPTIONS.num_records):
+ logging.error("Populate table returned an error")
+ exit(1)
+
+ logging.info("Starting %d loaders" % OPTIONS.num_loaders)
+ loaders = []
+ for i in xrange(OPTIONS.num_loaders):
+ loaders.append(LoadGenWorker(i))
+
+ logging.info("Starting %d checkers" % OPTIONS.num_checkers)
+ checkers = []
+ for i in xrange(OPTIONS.num_checkers):
+ checkers.append(CheckerWorker(i))
+
+ while LOADERS_READY < OPTIONS.num_loaders:
+ time.sleep(0.5)
+
+ if OPTIONS.expect_file and OPTIONS.reap_delay > 0:
+ logging.info('Starting reaper')
+ checkers.append(ReaperWorker())
+
+ workers_failed = 0
+ workers_failed += wait_for_workers(loaders, len(checkers))
+
+ if TEST_STOP:
+ logging.error("Detected test failure, aborting")
+ os._exit(1)
+
+ TEST_STOP = True
+
+ workers_failed += wait_for_workers(checkers)
+
+ if workers_failed > 0:
+ logging.error("Test detected %d failures, aborting" % workers_failed)
+ sys.exit(1)
+
+ logging.info("Test completed successfully")
+ sys.exit(0)
diff --git a/storage/rocksdb/mysql-test/rocksdb_stress/t/rocksdb_stress.test b/storage/rocksdb/mysql-test/rocksdb_stress/t/rocksdb_stress.test
new file mode 100644
index 00000000000..e48b0c47ae3
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_stress/t/rocksdb_stress.test
@@ -0,0 +1,33 @@
+# basic stress tests for myrocks, just runs the load generator without any crashes
+
+# Don't test this under valgrind, memory leaks will occur
+--disable_warnings
+--source include/not_valgrind.inc
+--source include/have_rocksdb.inc
+--source include/master-slave.inc
+DROP TABLE IF EXISTS t1;
+--enable_warnings
+
+# create the actual table
+CREATE TABLE t1(id INT PRIMARY KEY,
+ thread_id INT NOT NULL,
+ request_id BIGINT UNSIGNED NOT NULL,
+ update_count INT UNSIGNED NOT NULL DEFAULT 0,
+ zero_sum INT DEFAULT 0,
+ msg VARCHAR(1024),
+ msg_length int,
+ msg_checksum varchar(128),
+ auto_inc BIGINT UNSIGNED NOT NULL AUTO_INCREMENT,
+ KEY(auto_inc),
+ KEY msg_i(msg(255), zero_sum))
+ENGINE=RocksDB DEFAULT CHARSET=latin1 COLLATE=latin1_bin;
+
+--let $table = t1
+--let $num_loaders = 8
+--let $num_checkers = 2
+--let $num_records = 200
+--let $max_records = 100000
+--let $num_requests = 10000
+--let $reap_delay = 0
+
+--source include/rocksdb_stress.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb_stress/t/rocksdb_stress_crash.test b/storage/rocksdb/mysql-test/rocksdb_stress/t/rocksdb_stress_crash.test
new file mode 100644
index 00000000000..3da3dc7b3c3
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_stress/t/rocksdb_stress_crash.test
@@ -0,0 +1,34 @@
+# basic stress tests for myrocks, runs the load generator with periodic crashes
+
+# Don't test this under valgrind, memory leaks will occur
+--disable_warnings
+--source include/not_valgrind.inc
+--source include/have_rocksdb.inc
+--source include/master-slave.inc
+--source include/have_binlog_format_row.inc
+DROP TABLE IF EXISTS t1;
+--enable_warnings
+
+# create the actual table
+CREATE TABLE t1(id INT PRIMARY KEY,
+ thread_id INT NOT NULL,
+ request_id BIGINT UNSIGNED NOT NULL,
+ update_count INT UNSIGNED NOT NULL DEFAULT 0,
+ zero_sum INT DEFAULT 0,
+ msg VARCHAR(1024),
+ msg_length int,
+ msg_checksum varchar(128),
+ auto_inc BIGINT UNSIGNED NOT NULL AUTO_INCREMENT,
+ KEY(auto_inc),
+ KEY msg_i(msg(255), zero_sum))
+ENGINE=RocksDB DEFAULT CHARSET=latin1 COLLATE=latin1_bin;
+
+--let $table = t1
+--let $num_loaders = 8
+--let $num_checkers = 2
+--let $num_records = 200
+--let $max_records = 100000
+--let $num_requests = 10000
+--let $reap_delay = 180
+
+--source include/rocksdb_stress.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/include/correctboolvalue.inc b/storage/rocksdb/mysql-test/rocksdb_sys_vars/include/correctboolvalue.inc
new file mode 100644
index 00000000000..f675aec19f9
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/include/correctboolvalue.inc
@@ -0,0 +1,25 @@
+##
+# $input the value of a boolean type
+# $output the value of int type
+##
+--let $int_value=$value
+if ($value==on)
+{
+ --let $int_value=1
+}
+
+if ($value==off)
+{
+ --let $int_value=0
+}
+
+# MySQL allows 'true' and 'false' on bool values
+if ($value==true)
+{
+ --let $int_value=1
+}
+
+if ($value==false)
+{
+ --let $int_value=0
+}
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/include/have_rocksdb.inc b/storage/rocksdb/mysql-test/rocksdb_sys_vars/include/have_rocksdb.inc
new file mode 100644
index 00000000000..1f762d38c64
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/include/have_rocksdb.inc
@@ -0,0 +1,10 @@
+if (`SELECT COUNT(*) = 0 FROM INFORMATION_SCHEMA.ENGINES WHERE engine = 'rocksdb' AND support IN ('YES', 'DEFAULT', 'ENABLED')`)
+{
+ --skip Test requires engine RocksDB.
+}
+
+--disable_query_log
+# Table statistics can vary depending on when the memtables are flushed, so
+# flush them at the beginning of the test to ensure the test runs consistently.
+set global rocksdb_force_flush_memtable_now = true;
+--enable_query_log
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/include/have_rocksdb.opt b/storage/rocksdb/mysql-test/rocksdb_sys_vars/include/have_rocksdb.opt
new file mode 100644
index 00000000000..36d7dda1609
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/include/have_rocksdb.opt
@@ -0,0 +1,12 @@
+--loose-enable-rocksdb
+--loose-enable-rocksdb_global_info
+--loose-enable-rocksdb_ddl
+--loose-enable-rocksdb_cf_options
+--loose-enable_rocksdb_perf_context
+--loose-enable_rocksdb_perf_context_global
+--loose-enable-rocksdb_index_file_map
+--loose-enable-rocksdb_dbstats
+--loose-enable-rocksdb_cfstats
+--loose-enable-rocksdb_lock_info
+--loose-enable-rocksdb_trx
+--loose-enable-rocksdb_locks
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/include/rocksdb_sys_var.inc b/storage/rocksdb/mysql-test/rocksdb_sys_vars/include/rocksdb_sys_var.inc
new file mode 100644
index 00000000000..db0abc57358
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/include/rocksdb_sys_var.inc
@@ -0,0 +1,124 @@
+##
+# $sys_var name of the variable
+# $read_only - true if read-only
+# $session - true if this is session, false if global-only
+# $suppress_default_value - if true, don't check the default value
+# valid_values table should contain valid values
+# invalid_values
+##
+
+--eval SET @start_global_value = @@global.$sys_var
+if (!$suppress_default_value)
+{
+ --replace_regex /[a-f0-9]{40}/#/
+ SELECT @start_global_value;
+ if ($session)
+ {
+ --eval SET @start_session_value = @@session.$sys_var
+ SELECT @start_session_value;
+ }
+}
+
+if (!$read_only)
+{
+ --echo '# Setting to valid values in global scope#'
+
+ --let $i=1
+ --let $value=query_get_value(select value from valid_values, value, $i)
+ while ($value != 'No such row')
+ {
+ --echo "Trying to set variable @@global.$sys_var to $value"
+ --eval SET @@global.$sys_var = $value
+ --eval SELECT @@global.$sys_var
+ --let $v=`SELECT @@global.$sys_var`
+ --source include/correctboolvalue.inc
+ if (!$sticky)
+ {
+ if ($v != $int_value)
+ {
+ --echo Set @@global.$sys_var to $value but it remained set to $v
+ --die Wrong variable value
+ }
+ }
+
+ --echo "Setting the global scope variable back to default"
+ --eval SET @@global.$sys_var = DEFAULT
+ --eval SELECT @@global.$sys_var
+
+ --inc $i
+ --let $value=query_get_value(select value from valid_values, value, $i)
+ }
+
+ if ($session)
+ {
+ --echo '# Setting to valid values in session scope#'
+
+ --let $i=1
+ --let $value=query_get_value(select value from valid_values, value, $i)
+ while ($value != 'No such row')
+ {
+ --echo "Trying to set variable @@session.$sys_var to $value"
+ --eval SET @@session.$sys_var = $value
+ --eval SELECT @@session.$sys_var
+ --let $v=`SELECT @@session.$sys_var`
+ --source include/correctboolvalue.inc
+ if (!$sticky)
+ {
+ if ($v != $int_value)
+ {
+ --echo Set @@session.$sys_var to $value but it remained set to $v
+ --die Wrong variable value
+ }
+ }
+ --echo "Setting the session scope variable back to default"
+ --eval SET @@session.$sys_var = DEFAULT
+ --eval SELECT @@session.$sys_var
+
+ --inc $i
+ --let $value=query_get_value(select value from valid_values, value, $i)
+ }
+ }
+ if (!$session)
+ {
+ --echo "Trying to set variable @@session.$sys_var to 444. It should fail because it is not session."
+ --Error ER_GLOBAL_VARIABLE
+ --eval SET @@session.$sys_var = 444
+ }
+
+ --echo '# Testing with invalid values in global scope #'
+ ####################################################################
+ # Change the value of query_prealloc_size to an invalid value #
+ ####################################################################
+ --let $i=1
+ --let $value=query_get_value(select value from invalid_values, value, $i)
+ while ($value != 'No such row')
+ {
+ --echo "Trying to set variable @@global.$sys_var to $value"
+ --Error ER_WRONG_VALUE_FOR_VAR, ER_WRONG_TYPE_FOR_VAR
+ --eval SET @@global.$sys_var = $value
+ --eval SELECT @@global.$sys_var
+ --inc $i
+ --let $value=query_get_value(select value from invalid_values, value, $i)
+ }
+}
+
+if ($read_only)
+{
+ --echo "Trying to set variable @@global.$sys_var to 444. It should fail because it is readonly."
+ --Error ER_INCORRECT_GLOBAL_LOCAL_VAR
+ --eval SET @@global.$sys_var = 444
+}
+
+####################################
+# Restore initial value #
+####################################
+if (!$read_only)
+{
+ --eval SET @@global.$sys_var = @start_global_value
+ --eval SELECT @@global.$sys_var
+ if ($session)
+ {
+ --eval SET @@session.$sys_var = @start_session_value
+ --eval SELECT @@session.$sys_var
+ }
+}
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/my.cnf b/storage/rocksdb/mysql-test/rocksdb_sys_vars/my.cnf
new file mode 100644
index 00000000000..f37bb45bad6
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/my.cnf
@@ -0,0 +1,10 @@
+!include include/default_my.cnf
+
+[server]
+skip-innodb
+default-storage-engine=rocksdb
+
+sql-mode=NO_ENGINE_SUBSTITUTION
+explicit-defaults-for-timestamp=1
+loose-rocksdb_lock_wait_timeout=1
+loose-rocksdb_force_compute_memtable_stats_cachetime=0
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/all_vars.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/all_vars.result
new file mode 100644
index 00000000000..159d6a983c8
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/all_vars.result
@@ -0,0 +1,13 @@
+create table t1 (test_name text) engine=MyISAM;
+create table t2 (variable_name text) engine=MyISAM;
+load data infile "MYSQLTEST_VARDIR/tmp/rocksdb_sys_vars.all_vars.txt" into table t1;
+insert into t2 select variable_name from information_schema.global_variables where variable_name like "rocksdb_%";
+insert into t2 select variable_name from information_schema.session_variables where variable_name like "rocksdb_%";
+select variable_name as `There should be *no* long test name listed below:` from t2
+where length(variable_name) > 50;
+There should be *no* long test name listed below:
+select variable_name as `There should be *no* variables listed below:` from t2
+left join t1 on variable_name=test_name where test_name is null ORDER BY variable_name;
+There should be *no* variables listed below:
+drop table t1;
+drop table t2;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_access_hint_on_compaction_start_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_access_hint_on_compaction_start_basic.result
new file mode 100644
index 00000000000..4398563d064
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_access_hint_on_compaction_start_basic.result
@@ -0,0 +1,7 @@
+SET @start_global_value = @@global.ROCKSDB_ACCESS_HINT_ON_COMPACTION_START;
+SELECT @start_global_value;
+@start_global_value
+1
+"Trying to set variable @@global.ROCKSDB_ACCESS_HINT_ON_COMPACTION_START to 444. It should fail because it is readonly."
+SET @@global.ROCKSDB_ACCESS_HINT_ON_COMPACTION_START = 444;
+ERROR HY000: Variable 'rocksdb_access_hint_on_compaction_start' is a read only variable
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_advise_random_on_open_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_advise_random_on_open_basic.result
new file mode 100644
index 00000000000..f7175fd91a3
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_advise_random_on_open_basic.result
@@ -0,0 +1,7 @@
+SET @start_global_value = @@global.ROCKSDB_ADVISE_RANDOM_ON_OPEN;
+SELECT @start_global_value;
+@start_global_value
+1
+"Trying to set variable @@global.ROCKSDB_ADVISE_RANDOM_ON_OPEN to 444. It should fail because it is readonly."
+SET @@global.ROCKSDB_ADVISE_RANDOM_ON_OPEN = 444;
+ERROR HY000: Variable 'rocksdb_advise_random_on_open' is a read only variable
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_allow_concurrent_memtable_write_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_allow_concurrent_memtable_write_basic.result
new file mode 100644
index 00000000000..3b174fbbc63
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_allow_concurrent_memtable_write_basic.result
@@ -0,0 +1,7 @@
+SET @start_global_value = @@global.ROCKSDB_ALLOW_CONCURRENT_MEMTABLE_WRITE;
+SELECT @start_global_value;
+@start_global_value
+0
+"Trying to set variable @@global.ROCKSDB_ALLOW_CONCURRENT_MEMTABLE_WRITE to 444. It should fail because it is readonly."
+SET @@global.ROCKSDB_ALLOW_CONCURRENT_MEMTABLE_WRITE = 444;
+ERROR HY000: Variable 'rocksdb_allow_concurrent_memtable_write' is a read only variable
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_allow_mmap_reads_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_allow_mmap_reads_basic.result
new file mode 100644
index 00000000000..f0f1b077ae0
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_allow_mmap_reads_basic.result
@@ -0,0 +1,7 @@
+SET @start_global_value = @@global.ROCKSDB_ALLOW_MMAP_READS;
+SELECT @start_global_value;
+@start_global_value
+0
+"Trying to set variable @@global.ROCKSDB_ALLOW_MMAP_READS to 444. It should fail because it is readonly."
+SET @@global.ROCKSDB_ALLOW_MMAP_READS = 444;
+ERROR HY000: Variable 'rocksdb_allow_mmap_reads' is a read only variable
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_allow_mmap_writes_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_allow_mmap_writes_basic.result
new file mode 100644
index 00000000000..3fa1f14e1df
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_allow_mmap_writes_basic.result
@@ -0,0 +1,7 @@
+SET @start_global_value = @@global.ROCKSDB_ALLOW_MMAP_WRITES;
+SELECT @start_global_value;
+@start_global_value
+0
+"Trying to set variable @@global.ROCKSDB_ALLOW_MMAP_WRITES to 444. It should fail because it is readonly."
+SET @@global.ROCKSDB_ALLOW_MMAP_WRITES = 444;
+ERROR HY000: Variable 'rocksdb_allow_mmap_writes' is a read only variable
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_allow_to_start_after_corruption_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_allow_to_start_after_corruption_basic.result
new file mode 100644
index 00000000000..086010dc79e
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_allow_to_start_after_corruption_basic.result
@@ -0,0 +1,7 @@
+SET @start_global_value = @@global.ROCKSDB_ALLOW_TO_START_AFTER_CORRUPTION;
+SELECT @start_global_value;
+@start_global_value
+0
+"Trying to set variable @@global.ROCKSDB_ALLOW_TO_START_AFTER_CORRUPTION to 444. It should fail because it is readonly."
+SET @@global.ROCKSDB_ALLOW_TO_START_AFTER_CORRUPTION = 444;
+ERROR HY000: Variable 'rocksdb_allow_to_start_after_corruption' is a read only variable
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_blind_delete_primary_key_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_blind_delete_primary_key_basic.result
new file mode 100644
index 00000000000..805ed2335f7
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_blind_delete_primary_key_basic.result
@@ -0,0 +1,100 @@
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(0);
+INSERT INTO valid_values VALUES('on');
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+INSERT INTO invalid_values VALUES('\'bbb\'');
+SET @start_global_value = @@global.ROCKSDB_BLIND_DELETE_PRIMARY_KEY;
+SELECT @start_global_value;
+@start_global_value
+0
+SET @start_session_value = @@session.ROCKSDB_BLIND_DELETE_PRIMARY_KEY;
+SELECT @start_session_value;
+@start_session_value
+0
+'# Setting to valid values in global scope#'
+"Trying to set variable @@global.ROCKSDB_BLIND_DELETE_PRIMARY_KEY to 1"
+SET @@global.ROCKSDB_BLIND_DELETE_PRIMARY_KEY = 1;
+SELECT @@global.ROCKSDB_BLIND_DELETE_PRIMARY_KEY;
+@@global.ROCKSDB_BLIND_DELETE_PRIMARY_KEY
+1
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_BLIND_DELETE_PRIMARY_KEY = DEFAULT;
+SELECT @@global.ROCKSDB_BLIND_DELETE_PRIMARY_KEY;
+@@global.ROCKSDB_BLIND_DELETE_PRIMARY_KEY
+0
+"Trying to set variable @@global.ROCKSDB_BLIND_DELETE_PRIMARY_KEY to 0"
+SET @@global.ROCKSDB_BLIND_DELETE_PRIMARY_KEY = 0;
+SELECT @@global.ROCKSDB_BLIND_DELETE_PRIMARY_KEY;
+@@global.ROCKSDB_BLIND_DELETE_PRIMARY_KEY
+0
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_BLIND_DELETE_PRIMARY_KEY = DEFAULT;
+SELECT @@global.ROCKSDB_BLIND_DELETE_PRIMARY_KEY;
+@@global.ROCKSDB_BLIND_DELETE_PRIMARY_KEY
+0
+"Trying to set variable @@global.ROCKSDB_BLIND_DELETE_PRIMARY_KEY to on"
+SET @@global.ROCKSDB_BLIND_DELETE_PRIMARY_KEY = on;
+SELECT @@global.ROCKSDB_BLIND_DELETE_PRIMARY_KEY;
+@@global.ROCKSDB_BLIND_DELETE_PRIMARY_KEY
+1
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_BLIND_DELETE_PRIMARY_KEY = DEFAULT;
+SELECT @@global.ROCKSDB_BLIND_DELETE_PRIMARY_KEY;
+@@global.ROCKSDB_BLIND_DELETE_PRIMARY_KEY
+0
+'# Setting to valid values in session scope#'
+"Trying to set variable @@session.ROCKSDB_BLIND_DELETE_PRIMARY_KEY to 1"
+SET @@session.ROCKSDB_BLIND_DELETE_PRIMARY_KEY = 1;
+SELECT @@session.ROCKSDB_BLIND_DELETE_PRIMARY_KEY;
+@@session.ROCKSDB_BLIND_DELETE_PRIMARY_KEY
+1
+"Setting the session scope variable back to default"
+SET @@session.ROCKSDB_BLIND_DELETE_PRIMARY_KEY = DEFAULT;
+SELECT @@session.ROCKSDB_BLIND_DELETE_PRIMARY_KEY;
+@@session.ROCKSDB_BLIND_DELETE_PRIMARY_KEY
+0
+"Trying to set variable @@session.ROCKSDB_BLIND_DELETE_PRIMARY_KEY to 0"
+SET @@session.ROCKSDB_BLIND_DELETE_PRIMARY_KEY = 0;
+SELECT @@session.ROCKSDB_BLIND_DELETE_PRIMARY_KEY;
+@@session.ROCKSDB_BLIND_DELETE_PRIMARY_KEY
+0
+"Setting the session scope variable back to default"
+SET @@session.ROCKSDB_BLIND_DELETE_PRIMARY_KEY = DEFAULT;
+SELECT @@session.ROCKSDB_BLIND_DELETE_PRIMARY_KEY;
+@@session.ROCKSDB_BLIND_DELETE_PRIMARY_KEY
+0
+"Trying to set variable @@session.ROCKSDB_BLIND_DELETE_PRIMARY_KEY to on"
+SET @@session.ROCKSDB_BLIND_DELETE_PRIMARY_KEY = on;
+SELECT @@session.ROCKSDB_BLIND_DELETE_PRIMARY_KEY;
+@@session.ROCKSDB_BLIND_DELETE_PRIMARY_KEY
+1
+"Setting the session scope variable back to default"
+SET @@session.ROCKSDB_BLIND_DELETE_PRIMARY_KEY = DEFAULT;
+SELECT @@session.ROCKSDB_BLIND_DELETE_PRIMARY_KEY;
+@@session.ROCKSDB_BLIND_DELETE_PRIMARY_KEY
+0
+'# Testing with invalid values in global scope #'
+"Trying to set variable @@global.ROCKSDB_BLIND_DELETE_PRIMARY_KEY to 'aaa'"
+SET @@global.ROCKSDB_BLIND_DELETE_PRIMARY_KEY = 'aaa';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_BLIND_DELETE_PRIMARY_KEY;
+@@global.ROCKSDB_BLIND_DELETE_PRIMARY_KEY
+0
+"Trying to set variable @@global.ROCKSDB_BLIND_DELETE_PRIMARY_KEY to 'bbb'"
+SET @@global.ROCKSDB_BLIND_DELETE_PRIMARY_KEY = 'bbb';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_BLIND_DELETE_PRIMARY_KEY;
+@@global.ROCKSDB_BLIND_DELETE_PRIMARY_KEY
+0
+SET @@global.ROCKSDB_BLIND_DELETE_PRIMARY_KEY = @start_global_value;
+SELECT @@global.ROCKSDB_BLIND_DELETE_PRIMARY_KEY;
+@@global.ROCKSDB_BLIND_DELETE_PRIMARY_KEY
+0
+SET @@session.ROCKSDB_BLIND_DELETE_PRIMARY_KEY = @start_session_value;
+SELECT @@session.ROCKSDB_BLIND_DELETE_PRIMARY_KEY;
+@@session.ROCKSDB_BLIND_DELETE_PRIMARY_KEY
+0
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_block_cache_size_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_block_cache_size_basic.result
new file mode 100644
index 00000000000..b319c39a1cd
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_block_cache_size_basic.result
@@ -0,0 +1,85 @@
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(65536);
+INSERT INTO valid_values VALUES(1024);
+INSERT INTO valid_values VALUES(1*1024*1024);
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+INSERT INTO invalid_values VALUES('\'bbb\'');
+INSERT INTO invalid_values VALUES('\'-1\'');
+INSERT INTO invalid_values VALUES('\'101\'');
+INSERT INTO invalid_values VALUES('\'484436\'');
+SET @start_global_value = @@global.ROCKSDB_BLOCK_CACHE_SIZE;
+SELECT @start_global_value;
+@start_global_value
+536870912
+'# Setting to valid values in global scope#'
+"Trying to set variable @@global.ROCKSDB_BLOCK_CACHE_SIZE to 65536"
+SET @@global.ROCKSDB_BLOCK_CACHE_SIZE = 65536;
+SELECT @@global.ROCKSDB_BLOCK_CACHE_SIZE;
+@@global.ROCKSDB_BLOCK_CACHE_SIZE
+65536
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_BLOCK_CACHE_SIZE = DEFAULT;
+SELECT @@global.ROCKSDB_BLOCK_CACHE_SIZE;
+@@global.ROCKSDB_BLOCK_CACHE_SIZE
+536870912
+"Trying to set variable @@global.ROCKSDB_BLOCK_CACHE_SIZE to 1024"
+SET @@global.ROCKSDB_BLOCK_CACHE_SIZE = 1024;
+SELECT @@global.ROCKSDB_BLOCK_CACHE_SIZE;
+@@global.ROCKSDB_BLOCK_CACHE_SIZE
+1024
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_BLOCK_CACHE_SIZE = DEFAULT;
+SELECT @@global.ROCKSDB_BLOCK_CACHE_SIZE;
+@@global.ROCKSDB_BLOCK_CACHE_SIZE
+536870912
+"Trying to set variable @@global.ROCKSDB_BLOCK_CACHE_SIZE to 1048576"
+SET @@global.ROCKSDB_BLOCK_CACHE_SIZE = 1048576;
+SELECT @@global.ROCKSDB_BLOCK_CACHE_SIZE;
+@@global.ROCKSDB_BLOCK_CACHE_SIZE
+1048576
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_BLOCK_CACHE_SIZE = DEFAULT;
+SELECT @@global.ROCKSDB_BLOCK_CACHE_SIZE;
+@@global.ROCKSDB_BLOCK_CACHE_SIZE
+536870912
+"Trying to set variable @@session.ROCKSDB_BLOCK_CACHE_SIZE to 444. It should fail because it is not session."
+SET @@session.ROCKSDB_BLOCK_CACHE_SIZE = 444;
+ERROR HY000: Variable 'rocksdb_block_cache_size' is a GLOBAL variable and should be set with SET GLOBAL
+'# Testing with invalid values in global scope #'
+"Trying to set variable @@global.ROCKSDB_BLOCK_CACHE_SIZE to 'aaa'"
+SET @@global.ROCKSDB_BLOCK_CACHE_SIZE = 'aaa';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_BLOCK_CACHE_SIZE;
+@@global.ROCKSDB_BLOCK_CACHE_SIZE
+536870912
+"Trying to set variable @@global.ROCKSDB_BLOCK_CACHE_SIZE to 'bbb'"
+SET @@global.ROCKSDB_BLOCK_CACHE_SIZE = 'bbb';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_BLOCK_CACHE_SIZE;
+@@global.ROCKSDB_BLOCK_CACHE_SIZE
+536870912
+"Trying to set variable @@global.ROCKSDB_BLOCK_CACHE_SIZE to '-1'"
+SET @@global.ROCKSDB_BLOCK_CACHE_SIZE = '-1';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_BLOCK_CACHE_SIZE;
+@@global.ROCKSDB_BLOCK_CACHE_SIZE
+536870912
+"Trying to set variable @@global.ROCKSDB_BLOCK_CACHE_SIZE to '101'"
+SET @@global.ROCKSDB_BLOCK_CACHE_SIZE = '101';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_BLOCK_CACHE_SIZE;
+@@global.ROCKSDB_BLOCK_CACHE_SIZE
+536870912
+"Trying to set variable @@global.ROCKSDB_BLOCK_CACHE_SIZE to '484436'"
+SET @@global.ROCKSDB_BLOCK_CACHE_SIZE = '484436';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_BLOCK_CACHE_SIZE;
+@@global.ROCKSDB_BLOCK_CACHE_SIZE
+536870912
+SET @@global.ROCKSDB_BLOCK_CACHE_SIZE = @start_global_value;
+SELECT @@global.ROCKSDB_BLOCK_CACHE_SIZE;
+@@global.ROCKSDB_BLOCK_CACHE_SIZE
+536870912
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_block_restart_interval_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_block_restart_interval_basic.result
new file mode 100644
index 00000000000..4d02e197a67
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_block_restart_interval_basic.result
@@ -0,0 +1,7 @@
+SET @start_global_value = @@global.ROCKSDB_BLOCK_RESTART_INTERVAL;
+SELECT @start_global_value;
+@start_global_value
+16
+"Trying to set variable @@global.ROCKSDB_BLOCK_RESTART_INTERVAL to 444. It should fail because it is readonly."
+SET @@global.ROCKSDB_BLOCK_RESTART_INTERVAL = 444;
+ERROR HY000: Variable 'rocksdb_block_restart_interval' is a read only variable
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_block_size_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_block_size_basic.result
new file mode 100644
index 00000000000..0382184f2a0
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_block_size_basic.result
@@ -0,0 +1,7 @@
+SET @start_global_value = @@global.ROCKSDB_BLOCK_SIZE;
+SELECT @start_global_value;
+@start_global_value
+4096
+"Trying to set variable @@global.ROCKSDB_BLOCK_SIZE to 444. It should fail because it is readonly."
+SET @@global.ROCKSDB_BLOCK_SIZE = 444;
+ERROR HY000: Variable 'rocksdb_block_size' is a read only variable
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_block_size_deviation_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_block_size_deviation_basic.result
new file mode 100644
index 00000000000..83513f814ed
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_block_size_deviation_basic.result
@@ -0,0 +1,7 @@
+SET @start_global_value = @@global.ROCKSDB_BLOCK_SIZE_DEVIATION;
+SELECT @start_global_value;
+@start_global_value
+10
+"Trying to set variable @@global.ROCKSDB_BLOCK_SIZE_DEVIATION to 444. It should fail because it is readonly."
+SET @@global.ROCKSDB_BLOCK_SIZE_DEVIATION = 444;
+ERROR HY000: Variable 'rocksdb_block_size_deviation' is a read only variable
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_bulk_load_allow_sk_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_bulk_load_allow_sk_basic.result
new file mode 100644
index 00000000000..c0d09d89c11
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_bulk_load_allow_sk_basic.result
@@ -0,0 +1,100 @@
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(0);
+INSERT INTO valid_values VALUES('on');
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+INSERT INTO invalid_values VALUES('\'bbb\'');
+SET @start_global_value = @@global.ROCKSDB_BULK_LOAD_ALLOW_SK;
+SELECT @start_global_value;
+@start_global_value
+0
+SET @start_session_value = @@session.ROCKSDB_BULK_LOAD_ALLOW_SK;
+SELECT @start_session_value;
+@start_session_value
+0
+'# Setting to valid values in global scope#'
+"Trying to set variable @@global.ROCKSDB_BULK_LOAD_ALLOW_SK to 1"
+SET @@global.ROCKSDB_BULK_LOAD_ALLOW_SK = 1;
+SELECT @@global.ROCKSDB_BULK_LOAD_ALLOW_SK;
+@@global.ROCKSDB_BULK_LOAD_ALLOW_SK
+1
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_BULK_LOAD_ALLOW_SK = DEFAULT;
+SELECT @@global.ROCKSDB_BULK_LOAD_ALLOW_SK;
+@@global.ROCKSDB_BULK_LOAD_ALLOW_SK
+0
+"Trying to set variable @@global.ROCKSDB_BULK_LOAD_ALLOW_SK to 0"
+SET @@global.ROCKSDB_BULK_LOAD_ALLOW_SK = 0;
+SELECT @@global.ROCKSDB_BULK_LOAD_ALLOW_SK;
+@@global.ROCKSDB_BULK_LOAD_ALLOW_SK
+0
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_BULK_LOAD_ALLOW_SK = DEFAULT;
+SELECT @@global.ROCKSDB_BULK_LOAD_ALLOW_SK;
+@@global.ROCKSDB_BULK_LOAD_ALLOW_SK
+0
+"Trying to set variable @@global.ROCKSDB_BULK_LOAD_ALLOW_SK to on"
+SET @@global.ROCKSDB_BULK_LOAD_ALLOW_SK = on;
+SELECT @@global.ROCKSDB_BULK_LOAD_ALLOW_SK;
+@@global.ROCKSDB_BULK_LOAD_ALLOW_SK
+1
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_BULK_LOAD_ALLOW_SK = DEFAULT;
+SELECT @@global.ROCKSDB_BULK_LOAD_ALLOW_SK;
+@@global.ROCKSDB_BULK_LOAD_ALLOW_SK
+0
+'# Setting to valid values in session scope#'
+"Trying to set variable @@session.ROCKSDB_BULK_LOAD_ALLOW_SK to 1"
+SET @@session.ROCKSDB_BULK_LOAD_ALLOW_SK = 1;
+SELECT @@session.ROCKSDB_BULK_LOAD_ALLOW_SK;
+@@session.ROCKSDB_BULK_LOAD_ALLOW_SK
+1
+"Setting the session scope variable back to default"
+SET @@session.ROCKSDB_BULK_LOAD_ALLOW_SK = DEFAULT;
+SELECT @@session.ROCKSDB_BULK_LOAD_ALLOW_SK;
+@@session.ROCKSDB_BULK_LOAD_ALLOW_SK
+0
+"Trying to set variable @@session.ROCKSDB_BULK_LOAD_ALLOW_SK to 0"
+SET @@session.ROCKSDB_BULK_LOAD_ALLOW_SK = 0;
+SELECT @@session.ROCKSDB_BULK_LOAD_ALLOW_SK;
+@@session.ROCKSDB_BULK_LOAD_ALLOW_SK
+0
+"Setting the session scope variable back to default"
+SET @@session.ROCKSDB_BULK_LOAD_ALLOW_SK = DEFAULT;
+SELECT @@session.ROCKSDB_BULK_LOAD_ALLOW_SK;
+@@session.ROCKSDB_BULK_LOAD_ALLOW_SK
+0
+"Trying to set variable @@session.ROCKSDB_BULK_LOAD_ALLOW_SK to on"
+SET @@session.ROCKSDB_BULK_LOAD_ALLOW_SK = on;
+SELECT @@session.ROCKSDB_BULK_LOAD_ALLOW_SK;
+@@session.ROCKSDB_BULK_LOAD_ALLOW_SK
+1
+"Setting the session scope variable back to default"
+SET @@session.ROCKSDB_BULK_LOAD_ALLOW_SK = DEFAULT;
+SELECT @@session.ROCKSDB_BULK_LOAD_ALLOW_SK;
+@@session.ROCKSDB_BULK_LOAD_ALLOW_SK
+0
+'# Testing with invalid values in global scope #'
+"Trying to set variable @@global.ROCKSDB_BULK_LOAD_ALLOW_SK to 'aaa'"
+SET @@global.ROCKSDB_BULK_LOAD_ALLOW_SK = 'aaa';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_BULK_LOAD_ALLOW_SK;
+@@global.ROCKSDB_BULK_LOAD_ALLOW_SK
+0
+"Trying to set variable @@global.ROCKSDB_BULK_LOAD_ALLOW_SK to 'bbb'"
+SET @@global.ROCKSDB_BULK_LOAD_ALLOW_SK = 'bbb';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_BULK_LOAD_ALLOW_SK;
+@@global.ROCKSDB_BULK_LOAD_ALLOW_SK
+0
+SET @@global.ROCKSDB_BULK_LOAD_ALLOW_SK = @start_global_value;
+SELECT @@global.ROCKSDB_BULK_LOAD_ALLOW_SK;
+@@global.ROCKSDB_BULK_LOAD_ALLOW_SK
+0
+SET @@session.ROCKSDB_BULK_LOAD_ALLOW_SK = @start_session_value;
+SELECT @@session.ROCKSDB_BULK_LOAD_ALLOW_SK;
+@@session.ROCKSDB_BULK_LOAD_ALLOW_SK
+0
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_bulk_load_allow_unsorted_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_bulk_load_allow_unsorted_basic.result
new file mode 100644
index 00000000000..a59ba561181
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_bulk_load_allow_unsorted_basic.result
@@ -0,0 +1,100 @@
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(0);
+INSERT INTO valid_values VALUES('on');
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+INSERT INTO invalid_values VALUES('\'bbb\'');
+SET @start_global_value = @@global.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED;
+SELECT @start_global_value;
+@start_global_value
+0
+SET @start_session_value = @@session.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED;
+SELECT @start_session_value;
+@start_session_value
+0
+'# Setting to valid values in global scope#'
+"Trying to set variable @@global.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED to 1"
+SET @@global.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED = 1;
+SELECT @@global.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED;
+@@global.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED
+1
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED = DEFAULT;
+SELECT @@global.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED;
+@@global.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED
+0
+"Trying to set variable @@global.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED to 0"
+SET @@global.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED = 0;
+SELECT @@global.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED;
+@@global.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED
+0
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED = DEFAULT;
+SELECT @@global.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED;
+@@global.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED
+0
+"Trying to set variable @@global.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED to on"
+SET @@global.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED = on;
+SELECT @@global.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED;
+@@global.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED
+1
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED = DEFAULT;
+SELECT @@global.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED;
+@@global.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED
+0
+'# Setting to valid values in session scope#'
+"Trying to set variable @@session.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED to 1"
+SET @@session.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED = 1;
+SELECT @@session.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED;
+@@session.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED
+1
+"Setting the session scope variable back to default"
+SET @@session.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED = DEFAULT;
+SELECT @@session.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED;
+@@session.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED
+0
+"Trying to set variable @@session.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED to 0"
+SET @@session.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED = 0;
+SELECT @@session.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED;
+@@session.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED
+0
+"Setting the session scope variable back to default"
+SET @@session.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED = DEFAULT;
+SELECT @@session.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED;
+@@session.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED
+0
+"Trying to set variable @@session.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED to on"
+SET @@session.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED = on;
+SELECT @@session.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED;
+@@session.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED
+1
+"Setting the session scope variable back to default"
+SET @@session.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED = DEFAULT;
+SELECT @@session.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED;
+@@session.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED
+0
+'# Testing with invalid values in global scope #'
+"Trying to set variable @@global.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED to 'aaa'"
+SET @@global.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED = 'aaa';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED;
+@@global.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED
+0
+"Trying to set variable @@global.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED to 'bbb'"
+SET @@global.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED = 'bbb';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED;
+@@global.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED
+0
+SET @@global.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED = @start_global_value;
+SELECT @@global.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED;
+@@global.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED
+0
+SET @@session.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED = @start_session_value;
+SELECT @@session.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED;
+@@session.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED
+0
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_bulk_load_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_bulk_load_basic.result
new file mode 100644
index 00000000000..96b78cf669e
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_bulk_load_basic.result
@@ -0,0 +1,100 @@
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(0);
+INSERT INTO valid_values VALUES('on');
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+INSERT INTO invalid_values VALUES('\'bbb\'');
+SET @start_global_value = @@global.ROCKSDB_BULK_LOAD;
+SELECT @start_global_value;
+@start_global_value
+0
+SET @start_session_value = @@session.ROCKSDB_BULK_LOAD;
+SELECT @start_session_value;
+@start_session_value
+0
+'# Setting to valid values in global scope#'
+"Trying to set variable @@global.ROCKSDB_BULK_LOAD to 1"
+SET @@global.ROCKSDB_BULK_LOAD = 1;
+SELECT @@global.ROCKSDB_BULK_LOAD;
+@@global.ROCKSDB_BULK_LOAD
+1
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_BULK_LOAD = DEFAULT;
+SELECT @@global.ROCKSDB_BULK_LOAD;
+@@global.ROCKSDB_BULK_LOAD
+0
+"Trying to set variable @@global.ROCKSDB_BULK_LOAD to 0"
+SET @@global.ROCKSDB_BULK_LOAD = 0;
+SELECT @@global.ROCKSDB_BULK_LOAD;
+@@global.ROCKSDB_BULK_LOAD
+0
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_BULK_LOAD = DEFAULT;
+SELECT @@global.ROCKSDB_BULK_LOAD;
+@@global.ROCKSDB_BULK_LOAD
+0
+"Trying to set variable @@global.ROCKSDB_BULK_LOAD to on"
+SET @@global.ROCKSDB_BULK_LOAD = on;
+SELECT @@global.ROCKSDB_BULK_LOAD;
+@@global.ROCKSDB_BULK_LOAD
+1
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_BULK_LOAD = DEFAULT;
+SELECT @@global.ROCKSDB_BULK_LOAD;
+@@global.ROCKSDB_BULK_LOAD
+0
+'# Setting to valid values in session scope#'
+"Trying to set variable @@session.ROCKSDB_BULK_LOAD to 1"
+SET @@session.ROCKSDB_BULK_LOAD = 1;
+SELECT @@session.ROCKSDB_BULK_LOAD;
+@@session.ROCKSDB_BULK_LOAD
+1
+"Setting the session scope variable back to default"
+SET @@session.ROCKSDB_BULK_LOAD = DEFAULT;
+SELECT @@session.ROCKSDB_BULK_LOAD;
+@@session.ROCKSDB_BULK_LOAD
+0
+"Trying to set variable @@session.ROCKSDB_BULK_LOAD to 0"
+SET @@session.ROCKSDB_BULK_LOAD = 0;
+SELECT @@session.ROCKSDB_BULK_LOAD;
+@@session.ROCKSDB_BULK_LOAD
+0
+"Setting the session scope variable back to default"
+SET @@session.ROCKSDB_BULK_LOAD = DEFAULT;
+SELECT @@session.ROCKSDB_BULK_LOAD;
+@@session.ROCKSDB_BULK_LOAD
+0
+"Trying to set variable @@session.ROCKSDB_BULK_LOAD to on"
+SET @@session.ROCKSDB_BULK_LOAD = on;
+SELECT @@session.ROCKSDB_BULK_LOAD;
+@@session.ROCKSDB_BULK_LOAD
+1
+"Setting the session scope variable back to default"
+SET @@session.ROCKSDB_BULK_LOAD = DEFAULT;
+SELECT @@session.ROCKSDB_BULK_LOAD;
+@@session.ROCKSDB_BULK_LOAD
+0
+'# Testing with invalid values in global scope #'
+"Trying to set variable @@global.ROCKSDB_BULK_LOAD to 'aaa'"
+SET @@global.ROCKSDB_BULK_LOAD = 'aaa';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_BULK_LOAD;
+@@global.ROCKSDB_BULK_LOAD
+0
+"Trying to set variable @@global.ROCKSDB_BULK_LOAD to 'bbb'"
+SET @@global.ROCKSDB_BULK_LOAD = 'bbb';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_BULK_LOAD;
+@@global.ROCKSDB_BULK_LOAD
+0
+SET @@global.ROCKSDB_BULK_LOAD = @start_global_value;
+SELECT @@global.ROCKSDB_BULK_LOAD;
+@@global.ROCKSDB_BULK_LOAD
+0
+SET @@session.ROCKSDB_BULK_LOAD = @start_session_value;
+SELECT @@session.ROCKSDB_BULK_LOAD;
+@@session.ROCKSDB_BULK_LOAD
+0
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_bulk_load_size_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_bulk_load_size_basic.result
new file mode 100644
index 00000000000..40404d2fab5
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_bulk_load_size_basic.result
@@ -0,0 +1,72 @@
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(1024);
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+SET @start_global_value = @@global.ROCKSDB_BULK_LOAD_SIZE;
+SELECT @start_global_value;
+@start_global_value
+1000
+SET @start_session_value = @@session.ROCKSDB_BULK_LOAD_SIZE;
+SELECT @start_session_value;
+@start_session_value
+1000
+'# Setting to valid values in global scope#'
+"Trying to set variable @@global.ROCKSDB_BULK_LOAD_SIZE to 1"
+SET @@global.ROCKSDB_BULK_LOAD_SIZE = 1;
+SELECT @@global.ROCKSDB_BULK_LOAD_SIZE;
+@@global.ROCKSDB_BULK_LOAD_SIZE
+1
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_BULK_LOAD_SIZE = DEFAULT;
+SELECT @@global.ROCKSDB_BULK_LOAD_SIZE;
+@@global.ROCKSDB_BULK_LOAD_SIZE
+1000
+"Trying to set variable @@global.ROCKSDB_BULK_LOAD_SIZE to 1024"
+SET @@global.ROCKSDB_BULK_LOAD_SIZE = 1024;
+SELECT @@global.ROCKSDB_BULK_LOAD_SIZE;
+@@global.ROCKSDB_BULK_LOAD_SIZE
+1024
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_BULK_LOAD_SIZE = DEFAULT;
+SELECT @@global.ROCKSDB_BULK_LOAD_SIZE;
+@@global.ROCKSDB_BULK_LOAD_SIZE
+1000
+'# Setting to valid values in session scope#'
+"Trying to set variable @@session.ROCKSDB_BULK_LOAD_SIZE to 1"
+SET @@session.ROCKSDB_BULK_LOAD_SIZE = 1;
+SELECT @@session.ROCKSDB_BULK_LOAD_SIZE;
+@@session.ROCKSDB_BULK_LOAD_SIZE
+1
+"Setting the session scope variable back to default"
+SET @@session.ROCKSDB_BULK_LOAD_SIZE = DEFAULT;
+SELECT @@session.ROCKSDB_BULK_LOAD_SIZE;
+@@session.ROCKSDB_BULK_LOAD_SIZE
+1000
+"Trying to set variable @@session.ROCKSDB_BULK_LOAD_SIZE to 1024"
+SET @@session.ROCKSDB_BULK_LOAD_SIZE = 1024;
+SELECT @@session.ROCKSDB_BULK_LOAD_SIZE;
+@@session.ROCKSDB_BULK_LOAD_SIZE
+1024
+"Setting the session scope variable back to default"
+SET @@session.ROCKSDB_BULK_LOAD_SIZE = DEFAULT;
+SELECT @@session.ROCKSDB_BULK_LOAD_SIZE;
+@@session.ROCKSDB_BULK_LOAD_SIZE
+1000
+'# Testing with invalid values in global scope #'
+"Trying to set variable @@global.ROCKSDB_BULK_LOAD_SIZE to 'aaa'"
+SET @@global.ROCKSDB_BULK_LOAD_SIZE = 'aaa';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_BULK_LOAD_SIZE;
+@@global.ROCKSDB_BULK_LOAD_SIZE
+1000
+SET @@global.ROCKSDB_BULK_LOAD_SIZE = @start_global_value;
+SELECT @@global.ROCKSDB_BULK_LOAD_SIZE;
+@@global.ROCKSDB_BULK_LOAD_SIZE
+1000
+SET @@session.ROCKSDB_BULK_LOAD_SIZE = @start_session_value;
+SELECT @@session.ROCKSDB_BULK_LOAD_SIZE;
+@@session.ROCKSDB_BULK_LOAD_SIZE
+1000
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_bytes_per_sync_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_bytes_per_sync_basic.result
new file mode 100644
index 00000000000..9af4f730a21
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_bytes_per_sync_basic.result
@@ -0,0 +1,85 @@
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(100);
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(0);
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+INSERT INTO invalid_values VALUES('\'bbb\'');
+INSERT INTO invalid_values VALUES('\'-1\'');
+INSERT INTO invalid_values VALUES('\'101\'');
+INSERT INTO invalid_values VALUES('\'484436\'');
+SET @start_global_value = @@global.ROCKSDB_BYTES_PER_SYNC;
+SELECT @start_global_value;
+@start_global_value
+0
+'# Setting to valid values in global scope#'
+"Trying to set variable @@global.ROCKSDB_BYTES_PER_SYNC to 100"
+SET @@global.ROCKSDB_BYTES_PER_SYNC = 100;
+SELECT @@global.ROCKSDB_BYTES_PER_SYNC;
+@@global.ROCKSDB_BYTES_PER_SYNC
+100
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_BYTES_PER_SYNC = DEFAULT;
+SELECT @@global.ROCKSDB_BYTES_PER_SYNC;
+@@global.ROCKSDB_BYTES_PER_SYNC
+0
+"Trying to set variable @@global.ROCKSDB_BYTES_PER_SYNC to 1"
+SET @@global.ROCKSDB_BYTES_PER_SYNC = 1;
+SELECT @@global.ROCKSDB_BYTES_PER_SYNC;
+@@global.ROCKSDB_BYTES_PER_SYNC
+1
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_BYTES_PER_SYNC = DEFAULT;
+SELECT @@global.ROCKSDB_BYTES_PER_SYNC;
+@@global.ROCKSDB_BYTES_PER_SYNC
+0
+"Trying to set variable @@global.ROCKSDB_BYTES_PER_SYNC to 0"
+SET @@global.ROCKSDB_BYTES_PER_SYNC = 0;
+SELECT @@global.ROCKSDB_BYTES_PER_SYNC;
+@@global.ROCKSDB_BYTES_PER_SYNC
+0
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_BYTES_PER_SYNC = DEFAULT;
+SELECT @@global.ROCKSDB_BYTES_PER_SYNC;
+@@global.ROCKSDB_BYTES_PER_SYNC
+0
+"Trying to set variable @@session.ROCKSDB_BYTES_PER_SYNC to 444. It should fail because it is not session."
+SET @@session.ROCKSDB_BYTES_PER_SYNC = 444;
+ERROR HY000: Variable 'rocksdb_bytes_per_sync' is a GLOBAL variable and should be set with SET GLOBAL
+'# Testing with invalid values in global scope #'
+"Trying to set variable @@global.ROCKSDB_BYTES_PER_SYNC to 'aaa'"
+SET @@global.ROCKSDB_BYTES_PER_SYNC = 'aaa';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_BYTES_PER_SYNC;
+@@global.ROCKSDB_BYTES_PER_SYNC
+0
+"Trying to set variable @@global.ROCKSDB_BYTES_PER_SYNC to 'bbb'"
+SET @@global.ROCKSDB_BYTES_PER_SYNC = 'bbb';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_BYTES_PER_SYNC;
+@@global.ROCKSDB_BYTES_PER_SYNC
+0
+"Trying to set variable @@global.ROCKSDB_BYTES_PER_SYNC to '-1'"
+SET @@global.ROCKSDB_BYTES_PER_SYNC = '-1';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_BYTES_PER_SYNC;
+@@global.ROCKSDB_BYTES_PER_SYNC
+0
+"Trying to set variable @@global.ROCKSDB_BYTES_PER_SYNC to '101'"
+SET @@global.ROCKSDB_BYTES_PER_SYNC = '101';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_BYTES_PER_SYNC;
+@@global.ROCKSDB_BYTES_PER_SYNC
+0
+"Trying to set variable @@global.ROCKSDB_BYTES_PER_SYNC to '484436'"
+SET @@global.ROCKSDB_BYTES_PER_SYNC = '484436';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_BYTES_PER_SYNC;
+@@global.ROCKSDB_BYTES_PER_SYNC
+0
+SET @@global.ROCKSDB_BYTES_PER_SYNC = @start_global_value;
+SELECT @@global.ROCKSDB_BYTES_PER_SYNC;
+@@global.ROCKSDB_BYTES_PER_SYNC
+0
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_cache_dump_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_cache_dump_basic.result
new file mode 100644
index 00000000000..90fc99ef21a
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_cache_dump_basic.result
@@ -0,0 +1,19 @@
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(0);
+INSERT INTO valid_values VALUES('on');
+INSERT INTO valid_values VALUES('off');
+INSERT INTO valid_values VALUES('true');
+INSERT INTO valid_values VALUES('false');
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+INSERT INTO invalid_values VALUES('\'bbb\'');
+SET @start_global_value = @@global.ROCKSDB_CACHE_DUMP;
+SELECT @start_global_value;
+@start_global_value
+1
+"Trying to set variable @@global.ROCKSDB_CACHE_DUMP to 444. It should fail because it is readonly."
+SET @@global.ROCKSDB_CACHE_DUMP = 444;
+ERROR HY000: Variable 'rocksdb_cache_dump' is a read only variable
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_cache_high_pri_pool_ratio_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_cache_high_pri_pool_ratio_basic.result
new file mode 100644
index 00000000000..9d098385789
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_cache_high_pri_pool_ratio_basic.result
@@ -0,0 +1,22 @@
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(1.0);
+INSERT INTO valid_values VALUES(0.0);
+INSERT INTO valid_values VALUES(0.5);
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES(2.0);
+INSERT INTO invalid_values VALUES(-0.5);
+INSERT INTO invalid_values VALUES('\'aaa\'');
+INSERT INTO invalid_values VALUES('\'bbb\'');
+INSERT INTO invalid_values VALUES('\'-1\'');
+INSERT INTO invalid_values VALUES('\'101\'');
+INSERT INTO invalid_values VALUES('\'484436\'');
+INSERT INTO invalid_values VALUES('\'0.5\'');
+SET @start_global_value = @@global.ROCKSDB_CACHE_HIGH_PRI_POOL_RATIO;
+SELECT @start_global_value;
+@start_global_value
+0
+"Trying to set variable @@global.ROCKSDB_CACHE_HIGH_PRI_POOL_RATIO to 444. It should fail because it is readonly."
+SET @@global.ROCKSDB_CACHE_HIGH_PRI_POOL_RATIO = 444;
+ERROR HY000: Variable 'rocksdb_cache_high_pri_pool_ratio' is a read only variable
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_cache_index_and_filter_blocks_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_cache_index_and_filter_blocks_basic.result
new file mode 100644
index 00000000000..12c25ad63dc
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_cache_index_and_filter_blocks_basic.result
@@ -0,0 +1,7 @@
+SET @start_global_value = @@global.ROCKSDB_CACHE_INDEX_AND_FILTER_BLOCKS;
+SELECT @start_global_value;
+@start_global_value
+1
+"Trying to set variable @@global.ROCKSDB_CACHE_INDEX_AND_FILTER_BLOCKS to 444. It should fail because it is readonly."
+SET @@global.ROCKSDB_CACHE_INDEX_AND_FILTER_BLOCKS = 444;
+ERROR HY000: Variable 'rocksdb_cache_index_and_filter_blocks' is a read only variable
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_cache_index_and_filter_with_high_priority_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_cache_index_and_filter_with_high_priority_basic.result
new file mode 100644
index 00000000000..819425c8bce
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_cache_index_and_filter_with_high_priority_basic.result
@@ -0,0 +1,19 @@
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(0);
+INSERT INTO valid_values VALUES('on');
+INSERT INTO valid_values VALUES('off');
+INSERT INTO valid_values VALUES('true');
+INSERT INTO valid_values VALUES('false');
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+INSERT INTO invalid_values VALUES('\'bbb\'');
+SET @start_global_value = @@global.ROCKSDB_CACHE_INDEX_AND_FILTER_WITH_HIGH_PRIORITY;
+SELECT @start_global_value;
+@start_global_value
+1
+"Trying to set variable @@global.ROCKSDB_CACHE_INDEX_AND_FILTER_WITH_HIGH_PRIORITY to 444. It should fail because it is readonly."
+SET @@global.ROCKSDB_CACHE_INDEX_AND_FILTER_WITH_HIGH_PRIORITY = 444;
+ERROR HY000: Variable 'rocksdb_cache_index_and_filter_with_high_priority' is a read only variable
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_checksums_pct_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_checksums_pct_basic.result
new file mode 100644
index 00000000000..694c9a4f1dc
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_checksums_pct_basic.result
@@ -0,0 +1,93 @@
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(0);
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(99);
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+SET @start_global_value = @@global.ROCKSDB_CHECKSUMS_PCT;
+SELECT @start_global_value;
+@start_global_value
+100
+SET @start_session_value = @@session.ROCKSDB_CHECKSUMS_PCT;
+SELECT @start_session_value;
+@start_session_value
+100
+'# Setting to valid values in global scope#'
+"Trying to set variable @@global.ROCKSDB_CHECKSUMS_PCT to 0"
+SET @@global.ROCKSDB_CHECKSUMS_PCT = 0;
+SELECT @@global.ROCKSDB_CHECKSUMS_PCT;
+@@global.ROCKSDB_CHECKSUMS_PCT
+0
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_CHECKSUMS_PCT = DEFAULT;
+SELECT @@global.ROCKSDB_CHECKSUMS_PCT;
+@@global.ROCKSDB_CHECKSUMS_PCT
+100
+"Trying to set variable @@global.ROCKSDB_CHECKSUMS_PCT to 1"
+SET @@global.ROCKSDB_CHECKSUMS_PCT = 1;
+SELECT @@global.ROCKSDB_CHECKSUMS_PCT;
+@@global.ROCKSDB_CHECKSUMS_PCT
+1
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_CHECKSUMS_PCT = DEFAULT;
+SELECT @@global.ROCKSDB_CHECKSUMS_PCT;
+@@global.ROCKSDB_CHECKSUMS_PCT
+100
+"Trying to set variable @@global.ROCKSDB_CHECKSUMS_PCT to 99"
+SET @@global.ROCKSDB_CHECKSUMS_PCT = 99;
+SELECT @@global.ROCKSDB_CHECKSUMS_PCT;
+@@global.ROCKSDB_CHECKSUMS_PCT
+99
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_CHECKSUMS_PCT = DEFAULT;
+SELECT @@global.ROCKSDB_CHECKSUMS_PCT;
+@@global.ROCKSDB_CHECKSUMS_PCT
+100
+'# Setting to valid values in session scope#'
+"Trying to set variable @@session.ROCKSDB_CHECKSUMS_PCT to 0"
+SET @@session.ROCKSDB_CHECKSUMS_PCT = 0;
+SELECT @@session.ROCKSDB_CHECKSUMS_PCT;
+@@session.ROCKSDB_CHECKSUMS_PCT
+0
+"Setting the session scope variable back to default"
+SET @@session.ROCKSDB_CHECKSUMS_PCT = DEFAULT;
+SELECT @@session.ROCKSDB_CHECKSUMS_PCT;
+@@session.ROCKSDB_CHECKSUMS_PCT
+100
+"Trying to set variable @@session.ROCKSDB_CHECKSUMS_PCT to 1"
+SET @@session.ROCKSDB_CHECKSUMS_PCT = 1;
+SELECT @@session.ROCKSDB_CHECKSUMS_PCT;
+@@session.ROCKSDB_CHECKSUMS_PCT
+1
+"Setting the session scope variable back to default"
+SET @@session.ROCKSDB_CHECKSUMS_PCT = DEFAULT;
+SELECT @@session.ROCKSDB_CHECKSUMS_PCT;
+@@session.ROCKSDB_CHECKSUMS_PCT
+100
+"Trying to set variable @@session.ROCKSDB_CHECKSUMS_PCT to 99"
+SET @@session.ROCKSDB_CHECKSUMS_PCT = 99;
+SELECT @@session.ROCKSDB_CHECKSUMS_PCT;
+@@session.ROCKSDB_CHECKSUMS_PCT
+99
+"Setting the session scope variable back to default"
+SET @@session.ROCKSDB_CHECKSUMS_PCT = DEFAULT;
+SELECT @@session.ROCKSDB_CHECKSUMS_PCT;
+@@session.ROCKSDB_CHECKSUMS_PCT
+100
+'# Testing with invalid values in global scope #'
+"Trying to set variable @@global.ROCKSDB_CHECKSUMS_PCT to 'aaa'"
+SET @@global.ROCKSDB_CHECKSUMS_PCT = 'aaa';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_CHECKSUMS_PCT;
+@@global.ROCKSDB_CHECKSUMS_PCT
+100
+SET @@global.ROCKSDB_CHECKSUMS_PCT = @start_global_value;
+SELECT @@global.ROCKSDB_CHECKSUMS_PCT;
+@@global.ROCKSDB_CHECKSUMS_PCT
+100
+SET @@session.ROCKSDB_CHECKSUMS_PCT = @start_session_value;
+SELECT @@session.ROCKSDB_CHECKSUMS_PCT;
+@@session.ROCKSDB_CHECKSUMS_PCT
+100
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_collect_sst_properties_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_collect_sst_properties_basic.result
new file mode 100644
index 00000000000..2f101987332
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_collect_sst_properties_basic.result
@@ -0,0 +1,7 @@
+SET @start_global_value = @@global.ROCKSDB_COLLECT_SST_PROPERTIES;
+SELECT @start_global_value;
+@start_global_value
+1
+"Trying to set variable @@global.ROCKSDB_COLLECT_SST_PROPERTIES to 444. It should fail because it is readonly."
+SET @@global.ROCKSDB_COLLECT_SST_PROPERTIES = 444;
+ERROR HY000: Variable 'rocksdb_collect_sst_properties' is a read only variable
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_commit_in_the_middle_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_commit_in_the_middle_basic.result
new file mode 100644
index 00000000000..4664ccb2b1e
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_commit_in_the_middle_basic.result
@@ -0,0 +1,100 @@
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(0);
+INSERT INTO valid_values VALUES('on');
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+INSERT INTO invalid_values VALUES('\'bbb\'');
+SET @start_global_value = @@global.ROCKSDB_COMMIT_IN_THE_MIDDLE;
+SELECT @start_global_value;
+@start_global_value
+0
+SET @start_session_value = @@session.ROCKSDB_COMMIT_IN_THE_MIDDLE;
+SELECT @start_session_value;
+@start_session_value
+0
+'# Setting to valid values in global scope#'
+"Trying to set variable @@global.ROCKSDB_COMMIT_IN_THE_MIDDLE to 1"
+SET @@global.ROCKSDB_COMMIT_IN_THE_MIDDLE = 1;
+SELECT @@global.ROCKSDB_COMMIT_IN_THE_MIDDLE;
+@@global.ROCKSDB_COMMIT_IN_THE_MIDDLE
+1
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_COMMIT_IN_THE_MIDDLE = DEFAULT;
+SELECT @@global.ROCKSDB_COMMIT_IN_THE_MIDDLE;
+@@global.ROCKSDB_COMMIT_IN_THE_MIDDLE
+0
+"Trying to set variable @@global.ROCKSDB_COMMIT_IN_THE_MIDDLE to 0"
+SET @@global.ROCKSDB_COMMIT_IN_THE_MIDDLE = 0;
+SELECT @@global.ROCKSDB_COMMIT_IN_THE_MIDDLE;
+@@global.ROCKSDB_COMMIT_IN_THE_MIDDLE
+0
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_COMMIT_IN_THE_MIDDLE = DEFAULT;
+SELECT @@global.ROCKSDB_COMMIT_IN_THE_MIDDLE;
+@@global.ROCKSDB_COMMIT_IN_THE_MIDDLE
+0
+"Trying to set variable @@global.ROCKSDB_COMMIT_IN_THE_MIDDLE to on"
+SET @@global.ROCKSDB_COMMIT_IN_THE_MIDDLE = on;
+SELECT @@global.ROCKSDB_COMMIT_IN_THE_MIDDLE;
+@@global.ROCKSDB_COMMIT_IN_THE_MIDDLE
+1
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_COMMIT_IN_THE_MIDDLE = DEFAULT;
+SELECT @@global.ROCKSDB_COMMIT_IN_THE_MIDDLE;
+@@global.ROCKSDB_COMMIT_IN_THE_MIDDLE
+0
+'# Setting to valid values in session scope#'
+"Trying to set variable @@session.ROCKSDB_COMMIT_IN_THE_MIDDLE to 1"
+SET @@session.ROCKSDB_COMMIT_IN_THE_MIDDLE = 1;
+SELECT @@session.ROCKSDB_COMMIT_IN_THE_MIDDLE;
+@@session.ROCKSDB_COMMIT_IN_THE_MIDDLE
+1
+"Setting the session scope variable back to default"
+SET @@session.ROCKSDB_COMMIT_IN_THE_MIDDLE = DEFAULT;
+SELECT @@session.ROCKSDB_COMMIT_IN_THE_MIDDLE;
+@@session.ROCKSDB_COMMIT_IN_THE_MIDDLE
+0
+"Trying to set variable @@session.ROCKSDB_COMMIT_IN_THE_MIDDLE to 0"
+SET @@session.ROCKSDB_COMMIT_IN_THE_MIDDLE = 0;
+SELECT @@session.ROCKSDB_COMMIT_IN_THE_MIDDLE;
+@@session.ROCKSDB_COMMIT_IN_THE_MIDDLE
+0
+"Setting the session scope variable back to default"
+SET @@session.ROCKSDB_COMMIT_IN_THE_MIDDLE = DEFAULT;
+SELECT @@session.ROCKSDB_COMMIT_IN_THE_MIDDLE;
+@@session.ROCKSDB_COMMIT_IN_THE_MIDDLE
+0
+"Trying to set variable @@session.ROCKSDB_COMMIT_IN_THE_MIDDLE to on"
+SET @@session.ROCKSDB_COMMIT_IN_THE_MIDDLE = on;
+SELECT @@session.ROCKSDB_COMMIT_IN_THE_MIDDLE;
+@@session.ROCKSDB_COMMIT_IN_THE_MIDDLE
+1
+"Setting the session scope variable back to default"
+SET @@session.ROCKSDB_COMMIT_IN_THE_MIDDLE = DEFAULT;
+SELECT @@session.ROCKSDB_COMMIT_IN_THE_MIDDLE;
+@@session.ROCKSDB_COMMIT_IN_THE_MIDDLE
+0
+'# Testing with invalid values in global scope #'
+"Trying to set variable @@global.ROCKSDB_COMMIT_IN_THE_MIDDLE to 'aaa'"
+SET @@global.ROCKSDB_COMMIT_IN_THE_MIDDLE = 'aaa';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_COMMIT_IN_THE_MIDDLE;
+@@global.ROCKSDB_COMMIT_IN_THE_MIDDLE
+0
+"Trying to set variable @@global.ROCKSDB_COMMIT_IN_THE_MIDDLE to 'bbb'"
+SET @@global.ROCKSDB_COMMIT_IN_THE_MIDDLE = 'bbb';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_COMMIT_IN_THE_MIDDLE;
+@@global.ROCKSDB_COMMIT_IN_THE_MIDDLE
+0
+SET @@global.ROCKSDB_COMMIT_IN_THE_MIDDLE = @start_global_value;
+SELECT @@global.ROCKSDB_COMMIT_IN_THE_MIDDLE;
+@@global.ROCKSDB_COMMIT_IN_THE_MIDDLE
+0
+SET @@session.ROCKSDB_COMMIT_IN_THE_MIDDLE = @start_session_value;
+SELECT @@session.ROCKSDB_COMMIT_IN_THE_MIDDLE;
+@@session.ROCKSDB_COMMIT_IN_THE_MIDDLE
+0
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_commit_time_batch_for_recovery_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_commit_time_batch_for_recovery_basic.result
new file mode 100644
index 00000000000..8d3f4a6e5af
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_commit_time_batch_for_recovery_basic.result
@@ -0,0 +1,121 @@
+CREATE TABLE valid_values (value varchar(255));
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(0);
+INSERT INTO valid_values VALUES('on');
+INSERT INTO valid_values VALUES('off');
+CREATE TABLE invalid_values (value varchar(255));
+INSERT INTO invalid_values VALUES('\'aaa\'');
+INSERT INTO invalid_values VALUES('\'bbb\'');
+SET @start_global_value = @@global.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY;
+SELECT @start_global_value;
+@start_global_value
+1
+SET @start_session_value = @@session.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY;
+SELECT @start_session_value;
+@start_session_value
+1
+'# Setting to valid values in global scope#'
+"Trying to set variable @@global.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY to 1"
+SET @@global.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY = 1;
+SELECT @@global.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY;
+@@global.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY
+1
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY = DEFAULT;
+SELECT @@global.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY;
+@@global.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY
+1
+"Trying to set variable @@global.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY to 0"
+SET @@global.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY = 0;
+SELECT @@global.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY;
+@@global.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY
+0
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY = DEFAULT;
+SELECT @@global.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY;
+@@global.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY
+1
+"Trying to set variable @@global.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY to on"
+SET @@global.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY = on;
+SELECT @@global.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY;
+@@global.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY
+1
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY = DEFAULT;
+SELECT @@global.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY;
+@@global.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY
+1
+"Trying to set variable @@global.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY to off"
+SET @@global.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY = off;
+SELECT @@global.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY;
+@@global.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY
+0
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY = DEFAULT;
+SELECT @@global.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY;
+@@global.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY
+1
+'# Setting to valid values in session scope#'
+"Trying to set variable @@session.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY to 1"
+SET @@session.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY = 1;
+SELECT @@session.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY;
+@@session.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY
+1
+"Setting the session scope variable back to default"
+SET @@session.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY = DEFAULT;
+SELECT @@session.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY;
+@@session.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY
+1
+"Trying to set variable @@session.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY to 0"
+SET @@session.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY = 0;
+SELECT @@session.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY;
+@@session.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY
+0
+"Setting the session scope variable back to default"
+SET @@session.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY = DEFAULT;
+SELECT @@session.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY;
+@@session.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY
+1
+"Trying to set variable @@session.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY to on"
+SET @@session.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY = on;
+SELECT @@session.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY;
+@@session.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY
+1
+"Setting the session scope variable back to default"
+SET @@session.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY = DEFAULT;
+SELECT @@session.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY;
+@@session.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY
+1
+"Trying to set variable @@session.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY to off"
+SET @@session.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY = off;
+SELECT @@session.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY;
+@@session.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY
+0
+"Setting the session scope variable back to default"
+SET @@session.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY = DEFAULT;
+SELECT @@session.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY;
+@@session.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY
+1
+'# Testing with invalid values in global scope #'
+"Trying to set variable @@global.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY to 'aaa'"
+SET @@global.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY = 'aaa';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY;
+@@global.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY
+1
+"Trying to set variable @@global.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY to 'bbb'"
+SET @@global.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY = 'bbb';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY;
+@@global.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY
+1
+SET @@global.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY = @start_global_value;
+SELECT @@global.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY;
+@@global.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY
+1
+SET @@session.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY = @start_session_value;
+SELECT @@session.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY;
+@@session.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY
+1
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_compact_cf_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_compact_cf_basic.result
new file mode 100644
index 00000000000..b65ef65c8f0
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_compact_cf_basic.result
@@ -0,0 +1,40 @@
+call mtr.add_suppression(" Column family '[a-z]*' not found.");
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES('abc');
+INSERT INTO valid_values VALUES('def');
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+SET @start_global_value = @@global.ROCKSDB_COMPACT_CF;
+SELECT @start_global_value;
+@start_global_value
+
+'# Setting to valid values in global scope#'
+"Trying to set variable @@global.ROCKSDB_COMPACT_CF to abc"
+SET @@global.ROCKSDB_COMPACT_CF = abc;
+SELECT @@global.ROCKSDB_COMPACT_CF;
+@@global.ROCKSDB_COMPACT_CF
+
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_COMPACT_CF = DEFAULT;
+SELECT @@global.ROCKSDB_COMPACT_CF;
+@@global.ROCKSDB_COMPACT_CF
+
+"Trying to set variable @@global.ROCKSDB_COMPACT_CF to def"
+SET @@global.ROCKSDB_COMPACT_CF = def;
+SELECT @@global.ROCKSDB_COMPACT_CF;
+@@global.ROCKSDB_COMPACT_CF
+
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_COMPACT_CF = DEFAULT;
+SELECT @@global.ROCKSDB_COMPACT_CF;
+@@global.ROCKSDB_COMPACT_CF
+
+"Trying to set variable @@session.ROCKSDB_COMPACT_CF to 444. It should fail because it is not session."
+SET @@session.ROCKSDB_COMPACT_CF = 444;
+ERROR HY000: Variable 'rocksdb_compact_cf' is a GLOBAL variable and should be set with SET GLOBAL
+'# Testing with invalid values in global scope #'
+SET @@global.ROCKSDB_COMPACT_CF = @start_global_value;
+SELECT @@global.ROCKSDB_COMPACT_CF;
+@@global.ROCKSDB_COMPACT_CF
+
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_compaction_readahead_size_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_compaction_readahead_size_basic.result
new file mode 100644
index 00000000000..206cfa8188e
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_compaction_readahead_size_basic.result
@@ -0,0 +1,70 @@
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(0);
+INSERT INTO valid_values VALUES(222333);
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+INSERT INTO invalid_values VALUES('\'bbb\'');
+SET @@global.rocksdb_compaction_readahead_size = -1;
+Warnings:
+Warning 1292 Truncated incorrect rocksdb_compaction_readahead_... value: '-1'
+SELECT @@global.rocksdb_compaction_readahead_size;
+@@global.rocksdb_compaction_readahead_size
+0
+SET @start_global_value = @@global.ROCKSDB_COMPACTION_READAHEAD_SIZE;
+SELECT @start_global_value;
+@start_global_value
+0
+'# Setting to valid values in global scope#'
+"Trying to set variable @@global.ROCKSDB_COMPACTION_READAHEAD_SIZE to 1"
+SET @@global.ROCKSDB_COMPACTION_READAHEAD_SIZE = 1;
+SELECT @@global.ROCKSDB_COMPACTION_READAHEAD_SIZE;
+@@global.ROCKSDB_COMPACTION_READAHEAD_SIZE
+1
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_COMPACTION_READAHEAD_SIZE = DEFAULT;
+SELECT @@global.ROCKSDB_COMPACTION_READAHEAD_SIZE;
+@@global.ROCKSDB_COMPACTION_READAHEAD_SIZE
+0
+"Trying to set variable @@global.ROCKSDB_COMPACTION_READAHEAD_SIZE to 0"
+SET @@global.ROCKSDB_COMPACTION_READAHEAD_SIZE = 0;
+SELECT @@global.ROCKSDB_COMPACTION_READAHEAD_SIZE;
+@@global.ROCKSDB_COMPACTION_READAHEAD_SIZE
+0
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_COMPACTION_READAHEAD_SIZE = DEFAULT;
+SELECT @@global.ROCKSDB_COMPACTION_READAHEAD_SIZE;
+@@global.ROCKSDB_COMPACTION_READAHEAD_SIZE
+0
+"Trying to set variable @@global.ROCKSDB_COMPACTION_READAHEAD_SIZE to 222333"
+SET @@global.ROCKSDB_COMPACTION_READAHEAD_SIZE = 222333;
+SELECT @@global.ROCKSDB_COMPACTION_READAHEAD_SIZE;
+@@global.ROCKSDB_COMPACTION_READAHEAD_SIZE
+222333
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_COMPACTION_READAHEAD_SIZE = DEFAULT;
+SELECT @@global.ROCKSDB_COMPACTION_READAHEAD_SIZE;
+@@global.ROCKSDB_COMPACTION_READAHEAD_SIZE
+0
+"Trying to set variable @@session.ROCKSDB_COMPACTION_READAHEAD_SIZE to 444. It should fail because it is not session."
+SET @@session.ROCKSDB_COMPACTION_READAHEAD_SIZE = 444;
+ERROR HY000: Variable 'rocksdb_compaction_readahead_size' is a GLOBAL variable and should be set with SET GLOBAL
+'# Testing with invalid values in global scope #'
+"Trying to set variable @@global.ROCKSDB_COMPACTION_READAHEAD_SIZE to 'aaa'"
+SET @@global.ROCKSDB_COMPACTION_READAHEAD_SIZE = 'aaa';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_COMPACTION_READAHEAD_SIZE;
+@@global.ROCKSDB_COMPACTION_READAHEAD_SIZE
+0
+"Trying to set variable @@global.ROCKSDB_COMPACTION_READAHEAD_SIZE to 'bbb'"
+SET @@global.ROCKSDB_COMPACTION_READAHEAD_SIZE = 'bbb';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_COMPACTION_READAHEAD_SIZE;
+@@global.ROCKSDB_COMPACTION_READAHEAD_SIZE
+0
+SET @@global.ROCKSDB_COMPACTION_READAHEAD_SIZE = @start_global_value;
+SELECT @@global.ROCKSDB_COMPACTION_READAHEAD_SIZE;
+@@global.ROCKSDB_COMPACTION_READAHEAD_SIZE
+0
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_compaction_sequential_deletes_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_compaction_sequential_deletes_basic.result
new file mode 100644
index 00000000000..311184a17d4
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_compaction_sequential_deletes_basic.result
@@ -0,0 +1,64 @@
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(1024);
+INSERT INTO valid_values VALUES(2000000);
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+INSERT INTO invalid_values VALUES('\'2000001\'');
+SET @start_global_value = @@global.ROCKSDB_COMPACTION_SEQUENTIAL_DELETES;
+SELECT @start_global_value;
+@start_global_value
+0
+'# Setting to valid values in global scope#'
+"Trying to set variable @@global.ROCKSDB_COMPACTION_SEQUENTIAL_DELETES to 1"
+SET @@global.ROCKSDB_COMPACTION_SEQUENTIAL_DELETES = 1;
+SELECT @@global.ROCKSDB_COMPACTION_SEQUENTIAL_DELETES;
+@@global.ROCKSDB_COMPACTION_SEQUENTIAL_DELETES
+1
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_COMPACTION_SEQUENTIAL_DELETES = DEFAULT;
+SELECT @@global.ROCKSDB_COMPACTION_SEQUENTIAL_DELETES;
+@@global.ROCKSDB_COMPACTION_SEQUENTIAL_DELETES
+0
+"Trying to set variable @@global.ROCKSDB_COMPACTION_SEQUENTIAL_DELETES to 1024"
+SET @@global.ROCKSDB_COMPACTION_SEQUENTIAL_DELETES = 1024;
+SELECT @@global.ROCKSDB_COMPACTION_SEQUENTIAL_DELETES;
+@@global.ROCKSDB_COMPACTION_SEQUENTIAL_DELETES
+1024
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_COMPACTION_SEQUENTIAL_DELETES = DEFAULT;
+SELECT @@global.ROCKSDB_COMPACTION_SEQUENTIAL_DELETES;
+@@global.ROCKSDB_COMPACTION_SEQUENTIAL_DELETES
+0
+"Trying to set variable @@global.ROCKSDB_COMPACTION_SEQUENTIAL_DELETES to 2000000"
+SET @@global.ROCKSDB_COMPACTION_SEQUENTIAL_DELETES = 2000000;
+SELECT @@global.ROCKSDB_COMPACTION_SEQUENTIAL_DELETES;
+@@global.ROCKSDB_COMPACTION_SEQUENTIAL_DELETES
+2000000
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_COMPACTION_SEQUENTIAL_DELETES = DEFAULT;
+SELECT @@global.ROCKSDB_COMPACTION_SEQUENTIAL_DELETES;
+@@global.ROCKSDB_COMPACTION_SEQUENTIAL_DELETES
+0
+"Trying to set variable @@session.ROCKSDB_COMPACTION_SEQUENTIAL_DELETES to 444. It should fail because it is not session."
+SET @@session.ROCKSDB_COMPACTION_SEQUENTIAL_DELETES = 444;
+ERROR HY000: Variable 'rocksdb_compaction_sequential_deletes' is a GLOBAL variable and should be set with SET GLOBAL
+'# Testing with invalid values in global scope #'
+"Trying to set variable @@global.ROCKSDB_COMPACTION_SEQUENTIAL_DELETES to 'aaa'"
+SET @@global.ROCKSDB_COMPACTION_SEQUENTIAL_DELETES = 'aaa';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_COMPACTION_SEQUENTIAL_DELETES;
+@@global.ROCKSDB_COMPACTION_SEQUENTIAL_DELETES
+0
+"Trying to set variable @@global.ROCKSDB_COMPACTION_SEQUENTIAL_DELETES to '2000001'"
+SET @@global.ROCKSDB_COMPACTION_SEQUENTIAL_DELETES = '2000001';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_COMPACTION_SEQUENTIAL_DELETES;
+@@global.ROCKSDB_COMPACTION_SEQUENTIAL_DELETES
+0
+SET @@global.ROCKSDB_COMPACTION_SEQUENTIAL_DELETES = @start_global_value;
+SELECT @@global.ROCKSDB_COMPACTION_SEQUENTIAL_DELETES;
+@@global.ROCKSDB_COMPACTION_SEQUENTIAL_DELETES
+0
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_compaction_sequential_deletes_count_sd_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_compaction_sequential_deletes_count_sd_basic.result
new file mode 100644
index 00000000000..d4e7e28bebc
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_compaction_sequential_deletes_count_sd_basic.result
@@ -0,0 +1,64 @@
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(0);
+INSERT INTO valid_values VALUES('on');
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+INSERT INTO invalid_values VALUES('\'bbb\'');
+SET @start_global_value = @@global.ROCKSDB_COMPACTION_SEQUENTIAL_DELETES_COUNT_SD;
+SELECT @start_global_value;
+@start_global_value
+0
+'# Setting to valid values in global scope#'
+"Trying to set variable @@global.ROCKSDB_COMPACTION_SEQUENTIAL_DELETES_COUNT_SD to 1"
+SET @@global.ROCKSDB_COMPACTION_SEQUENTIAL_DELETES_COUNT_SD = 1;
+SELECT @@global.ROCKSDB_COMPACTION_SEQUENTIAL_DELETES_COUNT_SD;
+@@global.ROCKSDB_COMPACTION_SEQUENTIAL_DELETES_COUNT_SD
+1
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_COMPACTION_SEQUENTIAL_DELETES_COUNT_SD = DEFAULT;
+SELECT @@global.ROCKSDB_COMPACTION_SEQUENTIAL_DELETES_COUNT_SD;
+@@global.ROCKSDB_COMPACTION_SEQUENTIAL_DELETES_COUNT_SD
+0
+"Trying to set variable @@global.ROCKSDB_COMPACTION_SEQUENTIAL_DELETES_COUNT_SD to 0"
+SET @@global.ROCKSDB_COMPACTION_SEQUENTIAL_DELETES_COUNT_SD = 0;
+SELECT @@global.ROCKSDB_COMPACTION_SEQUENTIAL_DELETES_COUNT_SD;
+@@global.ROCKSDB_COMPACTION_SEQUENTIAL_DELETES_COUNT_SD
+0
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_COMPACTION_SEQUENTIAL_DELETES_COUNT_SD = DEFAULT;
+SELECT @@global.ROCKSDB_COMPACTION_SEQUENTIAL_DELETES_COUNT_SD;
+@@global.ROCKSDB_COMPACTION_SEQUENTIAL_DELETES_COUNT_SD
+0
+"Trying to set variable @@global.ROCKSDB_COMPACTION_SEQUENTIAL_DELETES_COUNT_SD to on"
+SET @@global.ROCKSDB_COMPACTION_SEQUENTIAL_DELETES_COUNT_SD = on;
+SELECT @@global.ROCKSDB_COMPACTION_SEQUENTIAL_DELETES_COUNT_SD;
+@@global.ROCKSDB_COMPACTION_SEQUENTIAL_DELETES_COUNT_SD
+1
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_COMPACTION_SEQUENTIAL_DELETES_COUNT_SD = DEFAULT;
+SELECT @@global.ROCKSDB_COMPACTION_SEQUENTIAL_DELETES_COUNT_SD;
+@@global.ROCKSDB_COMPACTION_SEQUENTIAL_DELETES_COUNT_SD
+0
+"Trying to set variable @@session.ROCKSDB_COMPACTION_SEQUENTIAL_DELETES_COUNT_SD to 444. It should fail because it is not session."
+SET @@session.ROCKSDB_COMPACTION_SEQUENTIAL_DELETES_COUNT_SD = 444;
+ERROR HY000: Variable 'rocksdb_compaction_sequential_deletes_count_sd' is a GLOBAL variable and should be set with SET GLOBAL
+'# Testing with invalid values in global scope #'
+"Trying to set variable @@global.ROCKSDB_COMPACTION_SEQUENTIAL_DELETES_COUNT_SD to 'aaa'"
+SET @@global.ROCKSDB_COMPACTION_SEQUENTIAL_DELETES_COUNT_SD = 'aaa';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_COMPACTION_SEQUENTIAL_DELETES_COUNT_SD;
+@@global.ROCKSDB_COMPACTION_SEQUENTIAL_DELETES_COUNT_SD
+0
+"Trying to set variable @@global.ROCKSDB_COMPACTION_SEQUENTIAL_DELETES_COUNT_SD to 'bbb'"
+SET @@global.ROCKSDB_COMPACTION_SEQUENTIAL_DELETES_COUNT_SD = 'bbb';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_COMPACTION_SEQUENTIAL_DELETES_COUNT_SD;
+@@global.ROCKSDB_COMPACTION_SEQUENTIAL_DELETES_COUNT_SD
+0
+SET @@global.ROCKSDB_COMPACTION_SEQUENTIAL_DELETES_COUNT_SD = @start_global_value;
+SELECT @@global.ROCKSDB_COMPACTION_SEQUENTIAL_DELETES_COUNT_SD;
+@@global.ROCKSDB_COMPACTION_SEQUENTIAL_DELETES_COUNT_SD
+0
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_compaction_sequential_deletes_file_size_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_compaction_sequential_deletes_file_size_basic.result
new file mode 100644
index 00000000000..703e235ed18
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_compaction_sequential_deletes_file_size_basic.result
@@ -0,0 +1,46 @@
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(1024);
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+SET @start_global_value = @@global.ROCKSDB_COMPACTION_SEQUENTIAL_DELETES_FILE_SIZE;
+SELECT @start_global_value;
+@start_global_value
+0
+'# Setting to valid values in global scope#'
+"Trying to set variable @@global.ROCKSDB_COMPACTION_SEQUENTIAL_DELETES_FILE_SIZE to 1"
+SET @@global.ROCKSDB_COMPACTION_SEQUENTIAL_DELETES_FILE_SIZE = 1;
+SELECT @@global.ROCKSDB_COMPACTION_SEQUENTIAL_DELETES_FILE_SIZE;
+@@global.ROCKSDB_COMPACTION_SEQUENTIAL_DELETES_FILE_SIZE
+1
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_COMPACTION_SEQUENTIAL_DELETES_FILE_SIZE = DEFAULT;
+SELECT @@global.ROCKSDB_COMPACTION_SEQUENTIAL_DELETES_FILE_SIZE;
+@@global.ROCKSDB_COMPACTION_SEQUENTIAL_DELETES_FILE_SIZE
+0
+"Trying to set variable @@global.ROCKSDB_COMPACTION_SEQUENTIAL_DELETES_FILE_SIZE to 1024"
+SET @@global.ROCKSDB_COMPACTION_SEQUENTIAL_DELETES_FILE_SIZE = 1024;
+SELECT @@global.ROCKSDB_COMPACTION_SEQUENTIAL_DELETES_FILE_SIZE;
+@@global.ROCKSDB_COMPACTION_SEQUENTIAL_DELETES_FILE_SIZE
+1024
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_COMPACTION_SEQUENTIAL_DELETES_FILE_SIZE = DEFAULT;
+SELECT @@global.ROCKSDB_COMPACTION_SEQUENTIAL_DELETES_FILE_SIZE;
+@@global.ROCKSDB_COMPACTION_SEQUENTIAL_DELETES_FILE_SIZE
+0
+"Trying to set variable @@session.ROCKSDB_COMPACTION_SEQUENTIAL_DELETES_FILE_SIZE to 444. It should fail because it is not session."
+SET @@session.ROCKSDB_COMPACTION_SEQUENTIAL_DELETES_FILE_SIZE = 444;
+ERROR HY000: Variable 'rocksdb_compaction_sequential_deletes_file_size' is a GLOBAL variable and should be set with SET GLOBAL
+'# Testing with invalid values in global scope #'
+"Trying to set variable @@global.ROCKSDB_COMPACTION_SEQUENTIAL_DELETES_FILE_SIZE to 'aaa'"
+SET @@global.ROCKSDB_COMPACTION_SEQUENTIAL_DELETES_FILE_SIZE = 'aaa';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_COMPACTION_SEQUENTIAL_DELETES_FILE_SIZE;
+@@global.ROCKSDB_COMPACTION_SEQUENTIAL_DELETES_FILE_SIZE
+0
+SET @@global.ROCKSDB_COMPACTION_SEQUENTIAL_DELETES_FILE_SIZE = @start_global_value;
+SELECT @@global.ROCKSDB_COMPACTION_SEQUENTIAL_DELETES_FILE_SIZE;
+@@global.ROCKSDB_COMPACTION_SEQUENTIAL_DELETES_FILE_SIZE
+0
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_compaction_sequential_deletes_window_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_compaction_sequential_deletes_window_basic.result
new file mode 100644
index 00000000000..84436b65795
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_compaction_sequential_deletes_window_basic.result
@@ -0,0 +1,64 @@
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(1024);
+INSERT INTO valid_values VALUES(2000000);
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+INSERT INTO invalid_values VALUES('\'2000001\'');
+SET @start_global_value = @@global.ROCKSDB_COMPACTION_SEQUENTIAL_DELETES_WINDOW;
+SELECT @start_global_value;
+@start_global_value
+0
+'# Setting to valid values in global scope#'
+"Trying to set variable @@global.ROCKSDB_COMPACTION_SEQUENTIAL_DELETES_WINDOW to 1"
+SET @@global.ROCKSDB_COMPACTION_SEQUENTIAL_DELETES_WINDOW = 1;
+SELECT @@global.ROCKSDB_COMPACTION_SEQUENTIAL_DELETES_WINDOW;
+@@global.ROCKSDB_COMPACTION_SEQUENTIAL_DELETES_WINDOW
+1
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_COMPACTION_SEQUENTIAL_DELETES_WINDOW = DEFAULT;
+SELECT @@global.ROCKSDB_COMPACTION_SEQUENTIAL_DELETES_WINDOW;
+@@global.ROCKSDB_COMPACTION_SEQUENTIAL_DELETES_WINDOW
+0
+"Trying to set variable @@global.ROCKSDB_COMPACTION_SEQUENTIAL_DELETES_WINDOW to 1024"
+SET @@global.ROCKSDB_COMPACTION_SEQUENTIAL_DELETES_WINDOW = 1024;
+SELECT @@global.ROCKSDB_COMPACTION_SEQUENTIAL_DELETES_WINDOW;
+@@global.ROCKSDB_COMPACTION_SEQUENTIAL_DELETES_WINDOW
+1024
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_COMPACTION_SEQUENTIAL_DELETES_WINDOW = DEFAULT;
+SELECT @@global.ROCKSDB_COMPACTION_SEQUENTIAL_DELETES_WINDOW;
+@@global.ROCKSDB_COMPACTION_SEQUENTIAL_DELETES_WINDOW
+0
+"Trying to set variable @@global.ROCKSDB_COMPACTION_SEQUENTIAL_DELETES_WINDOW to 2000000"
+SET @@global.ROCKSDB_COMPACTION_SEQUENTIAL_DELETES_WINDOW = 2000000;
+SELECT @@global.ROCKSDB_COMPACTION_SEQUENTIAL_DELETES_WINDOW;
+@@global.ROCKSDB_COMPACTION_SEQUENTIAL_DELETES_WINDOW
+2000000
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_COMPACTION_SEQUENTIAL_DELETES_WINDOW = DEFAULT;
+SELECT @@global.ROCKSDB_COMPACTION_SEQUENTIAL_DELETES_WINDOW;
+@@global.ROCKSDB_COMPACTION_SEQUENTIAL_DELETES_WINDOW
+0
+"Trying to set variable @@session.ROCKSDB_COMPACTION_SEQUENTIAL_DELETES_WINDOW to 444. It should fail because it is not session."
+SET @@session.ROCKSDB_COMPACTION_SEQUENTIAL_DELETES_WINDOW = 444;
+ERROR HY000: Variable 'rocksdb_compaction_sequential_deletes_window' is a GLOBAL variable and should be set with SET GLOBAL
+'# Testing with invalid values in global scope #'
+"Trying to set variable @@global.ROCKSDB_COMPACTION_SEQUENTIAL_DELETES_WINDOW to 'aaa'"
+SET @@global.ROCKSDB_COMPACTION_SEQUENTIAL_DELETES_WINDOW = 'aaa';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_COMPACTION_SEQUENTIAL_DELETES_WINDOW;
+@@global.ROCKSDB_COMPACTION_SEQUENTIAL_DELETES_WINDOW
+0
+"Trying to set variable @@global.ROCKSDB_COMPACTION_SEQUENTIAL_DELETES_WINDOW to '2000001'"
+SET @@global.ROCKSDB_COMPACTION_SEQUENTIAL_DELETES_WINDOW = '2000001';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_COMPACTION_SEQUENTIAL_DELETES_WINDOW;
+@@global.ROCKSDB_COMPACTION_SEQUENTIAL_DELETES_WINDOW
+0
+SET @@global.ROCKSDB_COMPACTION_SEQUENTIAL_DELETES_WINDOW = @start_global_value;
+SELECT @@global.ROCKSDB_COMPACTION_SEQUENTIAL_DELETES_WINDOW;
+@@global.ROCKSDB_COMPACTION_SEQUENTIAL_DELETES_WINDOW
+0
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_create_checkpoint_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_create_checkpoint_basic.result
new file mode 100644
index 00000000000..630fa78e933
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_create_checkpoint_basic.result
@@ -0,0 +1,15 @@
+SET @start_value = @@global.ROCKSDB_CREATE_CHECKPOINT;
+SET @@global.ROCKSDB_CREATE_CHECKPOINT = 'TMP/abc';
+SELECT @@global.ROCKSDB_CREATE_CHECKPOINT;
+@@global.ROCKSDB_CREATE_CHECKPOINT
+
+SET @@global.ROCKSDB_CREATE_CHECKPOINT = DEFAULT;
+SET @@global.ROCKSDB_CREATE_CHECKPOINT = 'TMP/def';
+SELECT @@global.ROCKSDB_CREATE_CHECKPOINT;
+@@global.ROCKSDB_CREATE_CHECKPOINT
+
+SET @@global.ROCKSDB_CREATE_CHECKPOINT = DEFAULT;
+SET @@session.ROCKSDB_CREATE_CHECKPOINT = 444;
+ERROR HY000: Variable 'rocksdb_create_checkpoint' is a GLOBAL variable and should be set with SET GLOBAL
+SET @@global.ROCKSDB_CREATE_CHECKPOINT = @start_value;
+ERROR HY000: Got error 4 'Invalid argument: invalid checkpoint directory name' from ROCKSDB
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_create_if_missing_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_create_if_missing_basic.result
new file mode 100644
index 00000000000..26dd14fbb68
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_create_if_missing_basic.result
@@ -0,0 +1,14 @@
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(1024);
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+SET @start_global_value = @@global.ROCKSDB_CREATE_IF_MISSING;
+SELECT @start_global_value;
+@start_global_value
+1
+"Trying to set variable @@global.ROCKSDB_CREATE_IF_MISSING to 444. It should fail because it is readonly."
+SET @@global.ROCKSDB_CREATE_IF_MISSING = 444;
+ERROR HY000: Variable 'rocksdb_create_if_missing' is a read only variable
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_create_missing_column_families_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_create_missing_column_families_basic.result
new file mode 100644
index 00000000000..7debadc2bb1
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_create_missing_column_families_basic.result
@@ -0,0 +1,14 @@
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(1024);
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+SET @start_global_value = @@global.ROCKSDB_CREATE_MISSING_COLUMN_FAMILIES;
+SELECT @start_global_value;
+@start_global_value
+0
+"Trying to set variable @@global.ROCKSDB_CREATE_MISSING_COLUMN_FAMILIES to 444. It should fail because it is readonly."
+SET @@global.ROCKSDB_CREATE_MISSING_COLUMN_FAMILIES = 444;
+ERROR HY000: Variable 'rocksdb_create_missing_column_families' is a read only variable
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_datadir_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_datadir_basic.result
new file mode 100644
index 00000000000..3215624bad8
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_datadir_basic.result
@@ -0,0 +1,7 @@
+SET @start_global_value = @@global.ROCKSDB_DATADIR;
+SELECT @start_global_value;
+@start_global_value
+./#rocksdb
+"Trying to set variable @@global.ROCKSDB_DATADIR to 444. It should fail because it is readonly."
+SET @@global.ROCKSDB_DATADIR = 444;
+ERROR HY000: Variable 'rocksdb_datadir' is a read only variable
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_db_write_buffer_size_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_db_write_buffer_size_basic.result
new file mode 100644
index 00000000000..6c588b7e060
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_db_write_buffer_size_basic.result
@@ -0,0 +1,7 @@
+SET @start_global_value = @@global.ROCKSDB_DB_WRITE_BUFFER_SIZE;
+SELECT @start_global_value;
+@start_global_value
+0
+"Trying to set variable @@global.ROCKSDB_DB_WRITE_BUFFER_SIZE to 444. It should fail because it is readonly."
+SET @@global.ROCKSDB_DB_WRITE_BUFFER_SIZE = 444;
+ERROR HY000: Variable 'rocksdb_db_write_buffer_size' is a read only variable
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_deadlock_detect_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_deadlock_detect_basic.result
new file mode 100644
index 00000000000..f200105b542
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_deadlock_detect_basic.result
@@ -0,0 +1,121 @@
+CREATE TABLE valid_values (value varchar(255));
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(0);
+INSERT INTO valid_values VALUES('on');
+INSERT INTO valid_values VALUES('off');
+CREATE TABLE invalid_values (value varchar(255));
+INSERT INTO invalid_values VALUES('\'aaa\'');
+INSERT INTO invalid_values VALUES('\'bbb\'');
+SET @start_global_value = @@global.ROCKSDB_DEADLOCK_DETECT;
+SELECT @start_global_value;
+@start_global_value
+0
+SET @start_session_value = @@session.ROCKSDB_DEADLOCK_DETECT;
+SELECT @start_session_value;
+@start_session_value
+0
+'# Setting to valid values in global scope#'
+"Trying to set variable @@global.ROCKSDB_DEADLOCK_DETECT to 1"
+SET @@global.ROCKSDB_DEADLOCK_DETECT = 1;
+SELECT @@global.ROCKSDB_DEADLOCK_DETECT;
+@@global.ROCKSDB_DEADLOCK_DETECT
+1
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_DEADLOCK_DETECT = DEFAULT;
+SELECT @@global.ROCKSDB_DEADLOCK_DETECT;
+@@global.ROCKSDB_DEADLOCK_DETECT
+0
+"Trying to set variable @@global.ROCKSDB_DEADLOCK_DETECT to 0"
+SET @@global.ROCKSDB_DEADLOCK_DETECT = 0;
+SELECT @@global.ROCKSDB_DEADLOCK_DETECT;
+@@global.ROCKSDB_DEADLOCK_DETECT
+0
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_DEADLOCK_DETECT = DEFAULT;
+SELECT @@global.ROCKSDB_DEADLOCK_DETECT;
+@@global.ROCKSDB_DEADLOCK_DETECT
+0
+"Trying to set variable @@global.ROCKSDB_DEADLOCK_DETECT to on"
+SET @@global.ROCKSDB_DEADLOCK_DETECT = on;
+SELECT @@global.ROCKSDB_DEADLOCK_DETECT;
+@@global.ROCKSDB_DEADLOCK_DETECT
+1
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_DEADLOCK_DETECT = DEFAULT;
+SELECT @@global.ROCKSDB_DEADLOCK_DETECT;
+@@global.ROCKSDB_DEADLOCK_DETECT
+0
+"Trying to set variable @@global.ROCKSDB_DEADLOCK_DETECT to off"
+SET @@global.ROCKSDB_DEADLOCK_DETECT = off;
+SELECT @@global.ROCKSDB_DEADLOCK_DETECT;
+@@global.ROCKSDB_DEADLOCK_DETECT
+0
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_DEADLOCK_DETECT = DEFAULT;
+SELECT @@global.ROCKSDB_DEADLOCK_DETECT;
+@@global.ROCKSDB_DEADLOCK_DETECT
+0
+'# Setting to valid values in session scope#'
+"Trying to set variable @@session.ROCKSDB_DEADLOCK_DETECT to 1"
+SET @@session.ROCKSDB_DEADLOCK_DETECT = 1;
+SELECT @@session.ROCKSDB_DEADLOCK_DETECT;
+@@session.ROCKSDB_DEADLOCK_DETECT
+1
+"Setting the session scope variable back to default"
+SET @@session.ROCKSDB_DEADLOCK_DETECT = DEFAULT;
+SELECT @@session.ROCKSDB_DEADLOCK_DETECT;
+@@session.ROCKSDB_DEADLOCK_DETECT
+0
+"Trying to set variable @@session.ROCKSDB_DEADLOCK_DETECT to 0"
+SET @@session.ROCKSDB_DEADLOCK_DETECT = 0;
+SELECT @@session.ROCKSDB_DEADLOCK_DETECT;
+@@session.ROCKSDB_DEADLOCK_DETECT
+0
+"Setting the session scope variable back to default"
+SET @@session.ROCKSDB_DEADLOCK_DETECT = DEFAULT;
+SELECT @@session.ROCKSDB_DEADLOCK_DETECT;
+@@session.ROCKSDB_DEADLOCK_DETECT
+0
+"Trying to set variable @@session.ROCKSDB_DEADLOCK_DETECT to on"
+SET @@session.ROCKSDB_DEADLOCK_DETECT = on;
+SELECT @@session.ROCKSDB_DEADLOCK_DETECT;
+@@session.ROCKSDB_DEADLOCK_DETECT
+1
+"Setting the session scope variable back to default"
+SET @@session.ROCKSDB_DEADLOCK_DETECT = DEFAULT;
+SELECT @@session.ROCKSDB_DEADLOCK_DETECT;
+@@session.ROCKSDB_DEADLOCK_DETECT
+0
+"Trying to set variable @@session.ROCKSDB_DEADLOCK_DETECT to off"
+SET @@session.ROCKSDB_DEADLOCK_DETECT = off;
+SELECT @@session.ROCKSDB_DEADLOCK_DETECT;
+@@session.ROCKSDB_DEADLOCK_DETECT
+0
+"Setting the session scope variable back to default"
+SET @@session.ROCKSDB_DEADLOCK_DETECT = DEFAULT;
+SELECT @@session.ROCKSDB_DEADLOCK_DETECT;
+@@session.ROCKSDB_DEADLOCK_DETECT
+0
+'# Testing with invalid values in global scope #'
+"Trying to set variable @@global.ROCKSDB_DEADLOCK_DETECT to 'aaa'"
+SET @@global.ROCKSDB_DEADLOCK_DETECT = 'aaa';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_DEADLOCK_DETECT;
+@@global.ROCKSDB_DEADLOCK_DETECT
+0
+"Trying to set variable @@global.ROCKSDB_DEADLOCK_DETECT to 'bbb'"
+SET @@global.ROCKSDB_DEADLOCK_DETECT = 'bbb';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_DEADLOCK_DETECT;
+@@global.ROCKSDB_DEADLOCK_DETECT
+0
+SET @@global.ROCKSDB_DEADLOCK_DETECT = @start_global_value;
+SELECT @@global.ROCKSDB_DEADLOCK_DETECT;
+@@global.ROCKSDB_DEADLOCK_DETECT
+0
+SET @@session.ROCKSDB_DEADLOCK_DETECT = @start_session_value;
+SELECT @@session.ROCKSDB_DEADLOCK_DETECT;
+@@session.ROCKSDB_DEADLOCK_DETECT
+0
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_deadlock_detect_depth_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_deadlock_detect_depth_basic.result
new file mode 100644
index 00000000000..6f05268745d
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_deadlock_detect_depth_basic.result
@@ -0,0 +1,79 @@
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(100);
+INSERT INTO valid_values VALUES(2);
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+INSERT INTO invalid_values VALUES('\'123\'');
+SET @start_global_value = @@global.ROCKSDB_DEADLOCK_DETECT_DEPTH;
+SELECT @start_global_value;
+@start_global_value
+50
+SET @start_session_value = @@session.ROCKSDB_DEADLOCK_DETECT_DEPTH;
+SELECT @start_session_value;
+@start_session_value
+50
+'# Setting to valid values in global scope#'
+"Trying to set variable @@global.ROCKSDB_DEADLOCK_DETECT_DEPTH to 100"
+SET @@global.ROCKSDB_DEADLOCK_DETECT_DEPTH = 100;
+SELECT @@global.ROCKSDB_DEADLOCK_DETECT_DEPTH;
+@@global.ROCKSDB_DEADLOCK_DETECT_DEPTH
+100
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_DEADLOCK_DETECT_DEPTH = DEFAULT;
+SELECT @@global.ROCKSDB_DEADLOCK_DETECT_DEPTH;
+@@global.ROCKSDB_DEADLOCK_DETECT_DEPTH
+50
+"Trying to set variable @@global.ROCKSDB_DEADLOCK_DETECT_DEPTH to 2"
+SET @@global.ROCKSDB_DEADLOCK_DETECT_DEPTH = 2;
+SELECT @@global.ROCKSDB_DEADLOCK_DETECT_DEPTH;
+@@global.ROCKSDB_DEADLOCK_DETECT_DEPTH
+2
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_DEADLOCK_DETECT_DEPTH = DEFAULT;
+SELECT @@global.ROCKSDB_DEADLOCK_DETECT_DEPTH;
+@@global.ROCKSDB_DEADLOCK_DETECT_DEPTH
+50
+'# Setting to valid values in session scope#'
+"Trying to set variable @@session.ROCKSDB_DEADLOCK_DETECT_DEPTH to 100"
+SET @@session.ROCKSDB_DEADLOCK_DETECT_DEPTH = 100;
+SELECT @@session.ROCKSDB_DEADLOCK_DETECT_DEPTH;
+@@session.ROCKSDB_DEADLOCK_DETECT_DEPTH
+100
+"Setting the session scope variable back to default"
+SET @@session.ROCKSDB_DEADLOCK_DETECT_DEPTH = DEFAULT;
+SELECT @@session.ROCKSDB_DEADLOCK_DETECT_DEPTH;
+@@session.ROCKSDB_DEADLOCK_DETECT_DEPTH
+50
+"Trying to set variable @@session.ROCKSDB_DEADLOCK_DETECT_DEPTH to 2"
+SET @@session.ROCKSDB_DEADLOCK_DETECT_DEPTH = 2;
+SELECT @@session.ROCKSDB_DEADLOCK_DETECT_DEPTH;
+@@session.ROCKSDB_DEADLOCK_DETECT_DEPTH
+2
+"Setting the session scope variable back to default"
+SET @@session.ROCKSDB_DEADLOCK_DETECT_DEPTH = DEFAULT;
+SELECT @@session.ROCKSDB_DEADLOCK_DETECT_DEPTH;
+@@session.ROCKSDB_DEADLOCK_DETECT_DEPTH
+50
+'# Testing with invalid values in global scope #'
+"Trying to set variable @@global.ROCKSDB_DEADLOCK_DETECT_DEPTH to 'aaa'"
+SET @@global.ROCKSDB_DEADLOCK_DETECT_DEPTH = 'aaa';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_DEADLOCK_DETECT_DEPTH;
+@@global.ROCKSDB_DEADLOCK_DETECT_DEPTH
+50
+"Trying to set variable @@global.ROCKSDB_DEADLOCK_DETECT_DEPTH to '123'"
+SET @@global.ROCKSDB_DEADLOCK_DETECT_DEPTH = '123';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_DEADLOCK_DETECT_DEPTH;
+@@global.ROCKSDB_DEADLOCK_DETECT_DEPTH
+50
+SET @@global.ROCKSDB_DEADLOCK_DETECT_DEPTH = @start_global_value;
+SELECT @@global.ROCKSDB_DEADLOCK_DETECT_DEPTH;
+@@global.ROCKSDB_DEADLOCK_DETECT_DEPTH
+50
+SET @@session.ROCKSDB_DEADLOCK_DETECT_DEPTH = @start_session_value;
+SELECT @@session.ROCKSDB_DEADLOCK_DETECT_DEPTH;
+@@session.ROCKSDB_DEADLOCK_DETECT_DEPTH
+50
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_debug_manual_compaction_delay_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_debug_manual_compaction_delay_basic.result
new file mode 100644
index 00000000000..6c5db01533b
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_debug_manual_compaction_delay_basic.result
@@ -0,0 +1,46 @@
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(2400);
+INSERT INTO valid_values VALUES(100000);
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+SET @start_global_value = @@global.ROCKSDB_DEBUG_MANUAL_COMPACTION_DELAY;
+SELECT @start_global_value;
+@start_global_value
+0
+'# Setting to valid values in global scope#'
+"Trying to set variable @@global.ROCKSDB_DEBUG_MANUAL_COMPACTION_DELAY to 2400"
+SET @@global.ROCKSDB_DEBUG_MANUAL_COMPACTION_DELAY = 2400;
+SELECT @@global.ROCKSDB_DEBUG_MANUAL_COMPACTION_DELAY;
+@@global.ROCKSDB_DEBUG_MANUAL_COMPACTION_DELAY
+2400
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_DEBUG_MANUAL_COMPACTION_DELAY = DEFAULT;
+SELECT @@global.ROCKSDB_DEBUG_MANUAL_COMPACTION_DELAY;
+@@global.ROCKSDB_DEBUG_MANUAL_COMPACTION_DELAY
+0
+"Trying to set variable @@global.ROCKSDB_DEBUG_MANUAL_COMPACTION_DELAY to 100000"
+SET @@global.ROCKSDB_DEBUG_MANUAL_COMPACTION_DELAY = 100000;
+SELECT @@global.ROCKSDB_DEBUG_MANUAL_COMPACTION_DELAY;
+@@global.ROCKSDB_DEBUG_MANUAL_COMPACTION_DELAY
+100000
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_DEBUG_MANUAL_COMPACTION_DELAY = DEFAULT;
+SELECT @@global.ROCKSDB_DEBUG_MANUAL_COMPACTION_DELAY;
+@@global.ROCKSDB_DEBUG_MANUAL_COMPACTION_DELAY
+0
+"Trying to set variable @@session.ROCKSDB_DEBUG_MANUAL_COMPACTION_DELAY to 444. It should fail because it is not session."
+SET @@session.ROCKSDB_DEBUG_MANUAL_COMPACTION_DELAY = 444;
+ERROR HY000: Variable 'rocksdb_debug_manual_compaction_delay' is a GLOBAL variable and should be set with SET GLOBAL
+'# Testing with invalid values in global scope #'
+"Trying to set variable @@global.ROCKSDB_DEBUG_MANUAL_COMPACTION_DELAY to 'aaa'"
+SET @@global.ROCKSDB_DEBUG_MANUAL_COMPACTION_DELAY = 'aaa';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_DEBUG_MANUAL_COMPACTION_DELAY;
+@@global.ROCKSDB_DEBUG_MANUAL_COMPACTION_DELAY
+0
+SET @@global.ROCKSDB_DEBUG_MANUAL_COMPACTION_DELAY = @start_global_value;
+SELECT @@global.ROCKSDB_DEBUG_MANUAL_COMPACTION_DELAY;
+@@global.ROCKSDB_DEBUG_MANUAL_COMPACTION_DELAY
+0
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_debug_optimizer_no_zero_cardinality_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_debug_optimizer_no_zero_cardinality_basic.result
new file mode 100644
index 00000000000..5e64ccc69c3
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_debug_optimizer_no_zero_cardinality_basic.result
@@ -0,0 +1,64 @@
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(0);
+INSERT INTO valid_values VALUES('on');
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+INSERT INTO invalid_values VALUES('\'bbb\'');
+SET @start_global_value = @@global.ROCKSDB_DEBUG_OPTIMIZER_NO_ZERO_CARDINALITY;
+SELECT @start_global_value;
+@start_global_value
+1
+'# Setting to valid values in global scope#'
+"Trying to set variable @@global.ROCKSDB_DEBUG_OPTIMIZER_NO_ZERO_CARDINALITY to 1"
+SET @@global.ROCKSDB_DEBUG_OPTIMIZER_NO_ZERO_CARDINALITY = 1;
+SELECT @@global.ROCKSDB_DEBUG_OPTIMIZER_NO_ZERO_CARDINALITY;
+@@global.ROCKSDB_DEBUG_OPTIMIZER_NO_ZERO_CARDINALITY
+1
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_DEBUG_OPTIMIZER_NO_ZERO_CARDINALITY = DEFAULT;
+SELECT @@global.ROCKSDB_DEBUG_OPTIMIZER_NO_ZERO_CARDINALITY;
+@@global.ROCKSDB_DEBUG_OPTIMIZER_NO_ZERO_CARDINALITY
+1
+"Trying to set variable @@global.ROCKSDB_DEBUG_OPTIMIZER_NO_ZERO_CARDINALITY to 0"
+SET @@global.ROCKSDB_DEBUG_OPTIMIZER_NO_ZERO_CARDINALITY = 0;
+SELECT @@global.ROCKSDB_DEBUG_OPTIMIZER_NO_ZERO_CARDINALITY;
+@@global.ROCKSDB_DEBUG_OPTIMIZER_NO_ZERO_CARDINALITY
+0
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_DEBUG_OPTIMIZER_NO_ZERO_CARDINALITY = DEFAULT;
+SELECT @@global.ROCKSDB_DEBUG_OPTIMIZER_NO_ZERO_CARDINALITY;
+@@global.ROCKSDB_DEBUG_OPTIMIZER_NO_ZERO_CARDINALITY
+1
+"Trying to set variable @@global.ROCKSDB_DEBUG_OPTIMIZER_NO_ZERO_CARDINALITY to on"
+SET @@global.ROCKSDB_DEBUG_OPTIMIZER_NO_ZERO_CARDINALITY = on;
+SELECT @@global.ROCKSDB_DEBUG_OPTIMIZER_NO_ZERO_CARDINALITY;
+@@global.ROCKSDB_DEBUG_OPTIMIZER_NO_ZERO_CARDINALITY
+1
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_DEBUG_OPTIMIZER_NO_ZERO_CARDINALITY = DEFAULT;
+SELECT @@global.ROCKSDB_DEBUG_OPTIMIZER_NO_ZERO_CARDINALITY;
+@@global.ROCKSDB_DEBUG_OPTIMIZER_NO_ZERO_CARDINALITY
+1
+"Trying to set variable @@session.ROCKSDB_DEBUG_OPTIMIZER_NO_ZERO_CARDINALITY to 444. It should fail because it is not session."
+SET @@session.ROCKSDB_DEBUG_OPTIMIZER_NO_ZERO_CARDINALITY = 444;
+ERROR HY000: Variable 'rocksdb_debug_optimizer_no_zero_cardinality' is a GLOBAL variable and should be set with SET GLOBAL
+'# Testing with invalid values in global scope #'
+"Trying to set variable @@global.ROCKSDB_DEBUG_OPTIMIZER_NO_ZERO_CARDINALITY to 'aaa'"
+SET @@global.ROCKSDB_DEBUG_OPTIMIZER_NO_ZERO_CARDINALITY = 'aaa';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_DEBUG_OPTIMIZER_NO_ZERO_CARDINALITY;
+@@global.ROCKSDB_DEBUG_OPTIMIZER_NO_ZERO_CARDINALITY
+1
+"Trying to set variable @@global.ROCKSDB_DEBUG_OPTIMIZER_NO_ZERO_CARDINALITY to 'bbb'"
+SET @@global.ROCKSDB_DEBUG_OPTIMIZER_NO_ZERO_CARDINALITY = 'bbb';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_DEBUG_OPTIMIZER_NO_ZERO_CARDINALITY;
+@@global.ROCKSDB_DEBUG_OPTIMIZER_NO_ZERO_CARDINALITY
+1
+SET @@global.ROCKSDB_DEBUG_OPTIMIZER_NO_ZERO_CARDINALITY = @start_global_value;
+SELECT @@global.ROCKSDB_DEBUG_OPTIMIZER_NO_ZERO_CARDINALITY;
+@@global.ROCKSDB_DEBUG_OPTIMIZER_NO_ZERO_CARDINALITY
+1
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_debug_ttl_ignore_pk_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_debug_ttl_ignore_pk_basic.result
new file mode 100644
index 00000000000..1d8eb721c1b
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_debug_ttl_ignore_pk_basic.result
@@ -0,0 +1,64 @@
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(0);
+INSERT INTO valid_values VALUES('on');
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+INSERT INTO invalid_values VALUES('\'bbb\'');
+SET @start_global_value = @@global.ROCKSDB_DEBUG_TTL_IGNORE_PK;
+SELECT @start_global_value;
+@start_global_value
+0
+'# Setting to valid values in global scope#'
+"Trying to set variable @@global.ROCKSDB_DEBUG_TTL_IGNORE_PK to 1"
+SET @@global.ROCKSDB_DEBUG_TTL_IGNORE_PK = 1;
+SELECT @@global.ROCKSDB_DEBUG_TTL_IGNORE_PK;
+@@global.ROCKSDB_DEBUG_TTL_IGNORE_PK
+1
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_DEBUG_TTL_IGNORE_PK = DEFAULT;
+SELECT @@global.ROCKSDB_DEBUG_TTL_IGNORE_PK;
+@@global.ROCKSDB_DEBUG_TTL_IGNORE_PK
+0
+"Trying to set variable @@global.ROCKSDB_DEBUG_TTL_IGNORE_PK to 0"
+SET @@global.ROCKSDB_DEBUG_TTL_IGNORE_PK = 0;
+SELECT @@global.ROCKSDB_DEBUG_TTL_IGNORE_PK;
+@@global.ROCKSDB_DEBUG_TTL_IGNORE_PK
+0
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_DEBUG_TTL_IGNORE_PK = DEFAULT;
+SELECT @@global.ROCKSDB_DEBUG_TTL_IGNORE_PK;
+@@global.ROCKSDB_DEBUG_TTL_IGNORE_PK
+0
+"Trying to set variable @@global.ROCKSDB_DEBUG_TTL_IGNORE_PK to on"
+SET @@global.ROCKSDB_DEBUG_TTL_IGNORE_PK = on;
+SELECT @@global.ROCKSDB_DEBUG_TTL_IGNORE_PK;
+@@global.ROCKSDB_DEBUG_TTL_IGNORE_PK
+1
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_DEBUG_TTL_IGNORE_PK = DEFAULT;
+SELECT @@global.ROCKSDB_DEBUG_TTL_IGNORE_PK;
+@@global.ROCKSDB_DEBUG_TTL_IGNORE_PK
+0
+"Trying to set variable @@session.ROCKSDB_DEBUG_TTL_IGNORE_PK to 444. It should fail because it is not session."
+SET @@session.ROCKSDB_DEBUG_TTL_IGNORE_PK = 444;
+ERROR HY000: Variable 'rocksdb_debug_ttl_ignore_pk' is a GLOBAL variable and should be set with SET GLOBAL
+'# Testing with invalid values in global scope #'
+"Trying to set variable @@global.ROCKSDB_DEBUG_TTL_IGNORE_PK to 'aaa'"
+SET @@global.ROCKSDB_DEBUG_TTL_IGNORE_PK = 'aaa';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_DEBUG_TTL_IGNORE_PK;
+@@global.ROCKSDB_DEBUG_TTL_IGNORE_PK
+0
+"Trying to set variable @@global.ROCKSDB_DEBUG_TTL_IGNORE_PK to 'bbb'"
+SET @@global.ROCKSDB_DEBUG_TTL_IGNORE_PK = 'bbb';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_DEBUG_TTL_IGNORE_PK;
+@@global.ROCKSDB_DEBUG_TTL_IGNORE_PK
+0
+SET @@global.ROCKSDB_DEBUG_TTL_IGNORE_PK = @start_global_value;
+SELECT @@global.ROCKSDB_DEBUG_TTL_IGNORE_PK;
+@@global.ROCKSDB_DEBUG_TTL_IGNORE_PK
+0
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_debug_ttl_read_filter_ts_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_debug_ttl_read_filter_ts_basic.result
new file mode 100644
index 00000000000..bbc46001817
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_debug_ttl_read_filter_ts_basic.result
@@ -0,0 +1,46 @@
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(2400);
+INSERT INTO valid_values VALUES(-2400);
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+SET @start_global_value = @@global.ROCKSDB_DEBUG_TTL_READ_FILTER_TS;
+SELECT @start_global_value;
+@start_global_value
+0
+'# Setting to valid values in global scope#'
+"Trying to set variable @@global.ROCKSDB_DEBUG_TTL_READ_FILTER_TS to 2400"
+SET @@global.ROCKSDB_DEBUG_TTL_READ_FILTER_TS = 2400;
+SELECT @@global.ROCKSDB_DEBUG_TTL_READ_FILTER_TS;
+@@global.ROCKSDB_DEBUG_TTL_READ_FILTER_TS
+2400
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_DEBUG_TTL_READ_FILTER_TS = DEFAULT;
+SELECT @@global.ROCKSDB_DEBUG_TTL_READ_FILTER_TS;
+@@global.ROCKSDB_DEBUG_TTL_READ_FILTER_TS
+0
+"Trying to set variable @@global.ROCKSDB_DEBUG_TTL_READ_FILTER_TS to -2400"
+SET @@global.ROCKSDB_DEBUG_TTL_READ_FILTER_TS = -2400;
+SELECT @@global.ROCKSDB_DEBUG_TTL_READ_FILTER_TS;
+@@global.ROCKSDB_DEBUG_TTL_READ_FILTER_TS
+-2400
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_DEBUG_TTL_READ_FILTER_TS = DEFAULT;
+SELECT @@global.ROCKSDB_DEBUG_TTL_READ_FILTER_TS;
+@@global.ROCKSDB_DEBUG_TTL_READ_FILTER_TS
+0
+"Trying to set variable @@session.ROCKSDB_DEBUG_TTL_READ_FILTER_TS to 444. It should fail because it is not session."
+SET @@session.ROCKSDB_DEBUG_TTL_READ_FILTER_TS = 444;
+ERROR HY000: Variable 'rocksdb_debug_ttl_read_filter_ts' is a GLOBAL variable and should be set with SET GLOBAL
+'# Testing with invalid values in global scope #'
+"Trying to set variable @@global.ROCKSDB_DEBUG_TTL_READ_FILTER_TS to 'aaa'"
+SET @@global.ROCKSDB_DEBUG_TTL_READ_FILTER_TS = 'aaa';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_DEBUG_TTL_READ_FILTER_TS;
+@@global.ROCKSDB_DEBUG_TTL_READ_FILTER_TS
+0
+SET @@global.ROCKSDB_DEBUG_TTL_READ_FILTER_TS = @start_global_value;
+SELECT @@global.ROCKSDB_DEBUG_TTL_READ_FILTER_TS;
+@@global.ROCKSDB_DEBUG_TTL_READ_FILTER_TS
+0
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_debug_ttl_rec_ts_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_debug_ttl_rec_ts_basic.result
new file mode 100644
index 00000000000..347ba9a0b3d
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_debug_ttl_rec_ts_basic.result
@@ -0,0 +1,46 @@
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(2400);
+INSERT INTO valid_values VALUES(-2400);
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+SET @start_global_value = @@global.ROCKSDB_DEBUG_TTL_REC_TS;
+SELECT @start_global_value;
+@start_global_value
+0
+'# Setting to valid values in global scope#'
+"Trying to set variable @@global.ROCKSDB_DEBUG_TTL_REC_TS to 2400"
+SET @@global.ROCKSDB_DEBUG_TTL_REC_TS = 2400;
+SELECT @@global.ROCKSDB_DEBUG_TTL_REC_TS;
+@@global.ROCKSDB_DEBUG_TTL_REC_TS
+2400
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_DEBUG_TTL_REC_TS = DEFAULT;
+SELECT @@global.ROCKSDB_DEBUG_TTL_REC_TS;
+@@global.ROCKSDB_DEBUG_TTL_REC_TS
+0
+"Trying to set variable @@global.ROCKSDB_DEBUG_TTL_REC_TS to -2400"
+SET @@global.ROCKSDB_DEBUG_TTL_REC_TS = -2400;
+SELECT @@global.ROCKSDB_DEBUG_TTL_REC_TS;
+@@global.ROCKSDB_DEBUG_TTL_REC_TS
+-2400
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_DEBUG_TTL_REC_TS = DEFAULT;
+SELECT @@global.ROCKSDB_DEBUG_TTL_REC_TS;
+@@global.ROCKSDB_DEBUG_TTL_REC_TS
+0
+"Trying to set variable @@session.ROCKSDB_DEBUG_TTL_REC_TS to 444. It should fail because it is not session."
+SET @@session.ROCKSDB_DEBUG_TTL_REC_TS = 444;
+ERROR HY000: Variable 'rocksdb_debug_ttl_rec_ts' is a GLOBAL variable and should be set with SET GLOBAL
+'# Testing with invalid values in global scope #'
+"Trying to set variable @@global.ROCKSDB_DEBUG_TTL_REC_TS to 'aaa'"
+SET @@global.ROCKSDB_DEBUG_TTL_REC_TS = 'aaa';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_DEBUG_TTL_REC_TS;
+@@global.ROCKSDB_DEBUG_TTL_REC_TS
+0
+SET @@global.ROCKSDB_DEBUG_TTL_REC_TS = @start_global_value;
+SELECT @@global.ROCKSDB_DEBUG_TTL_REC_TS;
+@@global.ROCKSDB_DEBUG_TTL_REC_TS
+0
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_debug_ttl_snapshot_ts_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_debug_ttl_snapshot_ts_basic.result
new file mode 100644
index 00000000000..03a937ef218
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_debug_ttl_snapshot_ts_basic.result
@@ -0,0 +1,46 @@
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(2400);
+INSERT INTO valid_values VALUES(-2400);
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+SET @start_global_value = @@global.ROCKSDB_DEBUG_TTL_SNAPSHOT_TS;
+SELECT @start_global_value;
+@start_global_value
+0
+'# Setting to valid values in global scope#'
+"Trying to set variable @@global.ROCKSDB_DEBUG_TTL_SNAPSHOT_TS to 2400"
+SET @@global.ROCKSDB_DEBUG_TTL_SNAPSHOT_TS = 2400;
+SELECT @@global.ROCKSDB_DEBUG_TTL_SNAPSHOT_TS;
+@@global.ROCKSDB_DEBUG_TTL_SNAPSHOT_TS
+2400
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_DEBUG_TTL_SNAPSHOT_TS = DEFAULT;
+SELECT @@global.ROCKSDB_DEBUG_TTL_SNAPSHOT_TS;
+@@global.ROCKSDB_DEBUG_TTL_SNAPSHOT_TS
+0
+"Trying to set variable @@global.ROCKSDB_DEBUG_TTL_SNAPSHOT_TS to -2400"
+SET @@global.ROCKSDB_DEBUG_TTL_SNAPSHOT_TS = -2400;
+SELECT @@global.ROCKSDB_DEBUG_TTL_SNAPSHOT_TS;
+@@global.ROCKSDB_DEBUG_TTL_SNAPSHOT_TS
+-2400
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_DEBUG_TTL_SNAPSHOT_TS = DEFAULT;
+SELECT @@global.ROCKSDB_DEBUG_TTL_SNAPSHOT_TS;
+@@global.ROCKSDB_DEBUG_TTL_SNAPSHOT_TS
+0
+"Trying to set variable @@session.ROCKSDB_DEBUG_TTL_SNAPSHOT_TS to 444. It should fail because it is not session."
+SET @@session.ROCKSDB_DEBUG_TTL_SNAPSHOT_TS = 444;
+ERROR HY000: Variable 'rocksdb_debug_ttl_snapshot_ts' is a GLOBAL variable and should be set with SET GLOBAL
+'# Testing with invalid values in global scope #'
+"Trying to set variable @@global.ROCKSDB_DEBUG_TTL_SNAPSHOT_TS to 'aaa'"
+SET @@global.ROCKSDB_DEBUG_TTL_SNAPSHOT_TS = 'aaa';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_DEBUG_TTL_SNAPSHOT_TS;
+@@global.ROCKSDB_DEBUG_TTL_SNAPSHOT_TS
+0
+SET @@global.ROCKSDB_DEBUG_TTL_SNAPSHOT_TS = @start_global_value;
+SELECT @@global.ROCKSDB_DEBUG_TTL_SNAPSHOT_TS;
+@@global.ROCKSDB_DEBUG_TTL_SNAPSHOT_TS
+0
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_default_cf_options_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_default_cf_options_basic.result
new file mode 100644
index 00000000000..b2b1c0e4c97
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_default_cf_options_basic.result
@@ -0,0 +1,7 @@
+SET @start_global_value = @@global.ROCKSDB_DEFAULT_CF_OPTIONS;
+SELECT @start_global_value;
+@start_global_value
+
+"Trying to set variable @@global.ROCKSDB_DEFAULT_CF_OPTIONS to 444. It should fail because it is readonly."
+SET @@global.ROCKSDB_DEFAULT_CF_OPTIONS = 444;
+ERROR HY000: Variable 'rocksdb_default_cf_options' is a read only variable
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_delayed_write_rate_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_delayed_write_rate_basic.result
new file mode 100644
index 00000000000..13749e1c220
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_delayed_write_rate_basic.result
@@ -0,0 +1,85 @@
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(100);
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(0);
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+INSERT INTO invalid_values VALUES('\'bbb\'');
+INSERT INTO invalid_values VALUES('\'-1\'');
+INSERT INTO invalid_values VALUES('\'101\'');
+INSERT INTO invalid_values VALUES('\'484436\'');
+SET @start_global_value = @@global.ROCKSDB_DELAYED_WRITE_RATE;
+SELECT @start_global_value;
+@start_global_value
+0
+'# Setting to valid values in global scope#'
+"Trying to set variable @@global.ROCKSDB_DELAYED_WRITE_RATE to 100"
+SET @@global.ROCKSDB_DELAYED_WRITE_RATE = 100;
+SELECT @@global.ROCKSDB_DELAYED_WRITE_RATE;
+@@global.ROCKSDB_DELAYED_WRITE_RATE
+100
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_DELAYED_WRITE_RATE = DEFAULT;
+SELECT @@global.ROCKSDB_DELAYED_WRITE_RATE;
+@@global.ROCKSDB_DELAYED_WRITE_RATE
+0
+"Trying to set variable @@global.ROCKSDB_DELAYED_WRITE_RATE to 1"
+SET @@global.ROCKSDB_DELAYED_WRITE_RATE = 1;
+SELECT @@global.ROCKSDB_DELAYED_WRITE_RATE;
+@@global.ROCKSDB_DELAYED_WRITE_RATE
+1
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_DELAYED_WRITE_RATE = DEFAULT;
+SELECT @@global.ROCKSDB_DELAYED_WRITE_RATE;
+@@global.ROCKSDB_DELAYED_WRITE_RATE
+0
+"Trying to set variable @@global.ROCKSDB_DELAYED_WRITE_RATE to 0"
+SET @@global.ROCKSDB_DELAYED_WRITE_RATE = 0;
+SELECT @@global.ROCKSDB_DELAYED_WRITE_RATE;
+@@global.ROCKSDB_DELAYED_WRITE_RATE
+0
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_DELAYED_WRITE_RATE = DEFAULT;
+SELECT @@global.ROCKSDB_DELAYED_WRITE_RATE;
+@@global.ROCKSDB_DELAYED_WRITE_RATE
+0
+"Trying to set variable @@session.ROCKSDB_DELAYED_WRITE_RATE to 444. It should fail because it is not session."
+SET @@session.ROCKSDB_DELAYED_WRITE_RATE = 444;
+ERROR HY000: Variable 'rocksdb_delayed_write_rate' is a GLOBAL variable and should be set with SET GLOBAL
+'# Testing with invalid values in global scope #'
+"Trying to set variable @@global.ROCKSDB_DELAYED_WRITE_RATE to 'aaa'"
+SET @@global.ROCKSDB_DELAYED_WRITE_RATE = 'aaa';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_DELAYED_WRITE_RATE;
+@@global.ROCKSDB_DELAYED_WRITE_RATE
+0
+"Trying to set variable @@global.ROCKSDB_DELAYED_WRITE_RATE to 'bbb'"
+SET @@global.ROCKSDB_DELAYED_WRITE_RATE = 'bbb';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_DELAYED_WRITE_RATE;
+@@global.ROCKSDB_DELAYED_WRITE_RATE
+0
+"Trying to set variable @@global.ROCKSDB_DELAYED_WRITE_RATE to '-1'"
+SET @@global.ROCKSDB_DELAYED_WRITE_RATE = '-1';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_DELAYED_WRITE_RATE;
+@@global.ROCKSDB_DELAYED_WRITE_RATE
+0
+"Trying to set variable @@global.ROCKSDB_DELAYED_WRITE_RATE to '101'"
+SET @@global.ROCKSDB_DELAYED_WRITE_RATE = '101';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_DELAYED_WRITE_RATE;
+@@global.ROCKSDB_DELAYED_WRITE_RATE
+0
+"Trying to set variable @@global.ROCKSDB_DELAYED_WRITE_RATE to '484436'"
+SET @@global.ROCKSDB_DELAYED_WRITE_RATE = '484436';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_DELAYED_WRITE_RATE;
+@@global.ROCKSDB_DELAYED_WRITE_RATE
+0
+SET @@global.ROCKSDB_DELAYED_WRITE_RATE = @start_global_value;
+SELECT @@global.ROCKSDB_DELAYED_WRITE_RATE;
+@@global.ROCKSDB_DELAYED_WRITE_RATE
+0
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_delete_cf_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_delete_cf_basic.result
new file mode 100644
index 00000000000..b840baf29f8
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_delete_cf_basic.result
@@ -0,0 +1,6 @@
+SET @@global.ROCKSDB_DELETE_CF = 'nonexistent_cf';
+ERROR 42000: Variable 'rocksdb_delete_cf' can't be set to the value of 'nonexistent_cf'
+SET @@global.ROCKSDB_DELETE_CF = '__system__';
+ERROR 42000: Variable 'rocksdb_delete_cf' can't be set to the value of '__system__'
+SET @@global.ROCKSDB_DELETE_CF = 'default';
+ERROR 42000: Variable 'rocksdb_delete_cf' can't be set to the value of 'default'
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_delete_obsolete_files_period_micros_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_delete_obsolete_files_period_micros_basic.result
new file mode 100644
index 00000000000..2dc220fbe20
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_delete_obsolete_files_period_micros_basic.result
@@ -0,0 +1,7 @@
+SET @start_global_value = @@global.ROCKSDB_DELETE_OBSOLETE_FILES_PERIOD_MICROS;
+SELECT @start_global_value;
+@start_global_value
+21600000000
+"Trying to set variable @@global.ROCKSDB_DELETE_OBSOLETE_FILES_PERIOD_MICROS to 444. It should fail because it is readonly."
+SET @@global.ROCKSDB_DELETE_OBSOLETE_FILES_PERIOD_MICROS = 444;
+ERROR HY000: Variable 'rocksdb_delete_obsolete_files_period_micros' is a read only variable
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_enable_2pc_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_enable_2pc_basic.result
new file mode 100644
index 00000000000..686f8bcd39a
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_enable_2pc_basic.result
@@ -0,0 +1,75 @@
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(0);
+INSERT INTO valid_values VALUES('on');
+INSERT INTO valid_values VALUES('off');
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+INSERT INTO invalid_values VALUES('\'bbb\'');
+SET @start_global_value = @@global.ROCKSDB_ENABLE_2PC;
+SELECT @start_global_value;
+@start_global_value
+1
+'# Setting to valid values in global scope#'
+"Trying to set variable @@global.ROCKSDB_ENABLE_2PC to 1"
+SET @@global.ROCKSDB_ENABLE_2PC = 1;
+SELECT @@global.ROCKSDB_ENABLE_2PC;
+@@global.ROCKSDB_ENABLE_2PC
+1
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_ENABLE_2PC = DEFAULT;
+SELECT @@global.ROCKSDB_ENABLE_2PC;
+@@global.ROCKSDB_ENABLE_2PC
+1
+"Trying to set variable @@global.ROCKSDB_ENABLE_2PC to 0"
+SET @@global.ROCKSDB_ENABLE_2PC = 0;
+SELECT @@global.ROCKSDB_ENABLE_2PC;
+@@global.ROCKSDB_ENABLE_2PC
+0
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_ENABLE_2PC = DEFAULT;
+SELECT @@global.ROCKSDB_ENABLE_2PC;
+@@global.ROCKSDB_ENABLE_2PC
+1
+"Trying to set variable @@global.ROCKSDB_ENABLE_2PC to on"
+SET @@global.ROCKSDB_ENABLE_2PC = on;
+SELECT @@global.ROCKSDB_ENABLE_2PC;
+@@global.ROCKSDB_ENABLE_2PC
+1
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_ENABLE_2PC = DEFAULT;
+SELECT @@global.ROCKSDB_ENABLE_2PC;
+@@global.ROCKSDB_ENABLE_2PC
+1
+"Trying to set variable @@global.ROCKSDB_ENABLE_2PC to off"
+SET @@global.ROCKSDB_ENABLE_2PC = off;
+SELECT @@global.ROCKSDB_ENABLE_2PC;
+@@global.ROCKSDB_ENABLE_2PC
+0
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_ENABLE_2PC = DEFAULT;
+SELECT @@global.ROCKSDB_ENABLE_2PC;
+@@global.ROCKSDB_ENABLE_2PC
+1
+"Trying to set variable @@session.ROCKSDB_ENABLE_2PC to 444. It should fail because it is not session."
+SET @@session.ROCKSDB_ENABLE_2PC = 444;
+ERROR HY000: Variable 'rocksdb_enable_2pc' is a GLOBAL variable and should be set with SET GLOBAL
+'# Testing with invalid values in global scope #'
+"Trying to set variable @@global.ROCKSDB_ENABLE_2PC to 'aaa'"
+SET @@global.ROCKSDB_ENABLE_2PC = 'aaa';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_ENABLE_2PC;
+@@global.ROCKSDB_ENABLE_2PC
+1
+"Trying to set variable @@global.ROCKSDB_ENABLE_2PC to 'bbb'"
+SET @@global.ROCKSDB_ENABLE_2PC = 'bbb';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_ENABLE_2PC;
+@@global.ROCKSDB_ENABLE_2PC
+1
+SET @@global.ROCKSDB_ENABLE_2PC = @start_global_value;
+SELECT @@global.ROCKSDB_ENABLE_2PC;
+@@global.ROCKSDB_ENABLE_2PC
+1
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_enable_bulk_load_api_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_enable_bulk_load_api_basic.result
new file mode 100644
index 00000000000..2c0ff289d8a
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_enable_bulk_load_api_basic.result
@@ -0,0 +1,14 @@
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(1024);
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+SET @start_global_value = @@global.ROCKSDB_ENABLE_BULK_LOAD_API;
+SELECT @start_global_value;
+@start_global_value
+1
+"Trying to set variable @@global.ROCKSDB_ENABLE_BULK_LOAD_API to 444. It should fail because it is readonly."
+SET @@global.ROCKSDB_ENABLE_BULK_LOAD_API = 444;
+ERROR HY000: Variable 'rocksdb_enable_bulk_load_api' is a read only variable
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_enable_insert_with_update_caching_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_enable_insert_with_update_caching_basic.result
new file mode 100644
index 00000000000..ede1690f776
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_enable_insert_with_update_caching_basic.result
@@ -0,0 +1,75 @@
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(0);
+INSERT INTO valid_values VALUES('on');
+INSERT INTO valid_values VALUES('off');
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+INSERT INTO invalid_values VALUES('\'bbb\'');
+SET @start_global_value = @@global.ROCKSDB_ENABLE_INSERT_WITH_UPDATE_CACHING;
+SELECT @start_global_value;
+@start_global_value
+1
+'# Setting to valid values in global scope#'
+"Trying to set variable @@global.ROCKSDB_ENABLE_INSERT_WITH_UPDATE_CACHING to 1"
+SET @@global.ROCKSDB_ENABLE_INSERT_WITH_UPDATE_CACHING = 1;
+SELECT @@global.ROCKSDB_ENABLE_INSERT_WITH_UPDATE_CACHING;
+@@global.ROCKSDB_ENABLE_INSERT_WITH_UPDATE_CACHING
+1
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_ENABLE_INSERT_WITH_UPDATE_CACHING = DEFAULT;
+SELECT @@global.ROCKSDB_ENABLE_INSERT_WITH_UPDATE_CACHING;
+@@global.ROCKSDB_ENABLE_INSERT_WITH_UPDATE_CACHING
+1
+"Trying to set variable @@global.ROCKSDB_ENABLE_INSERT_WITH_UPDATE_CACHING to 0"
+SET @@global.ROCKSDB_ENABLE_INSERT_WITH_UPDATE_CACHING = 0;
+SELECT @@global.ROCKSDB_ENABLE_INSERT_WITH_UPDATE_CACHING;
+@@global.ROCKSDB_ENABLE_INSERT_WITH_UPDATE_CACHING
+0
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_ENABLE_INSERT_WITH_UPDATE_CACHING = DEFAULT;
+SELECT @@global.ROCKSDB_ENABLE_INSERT_WITH_UPDATE_CACHING;
+@@global.ROCKSDB_ENABLE_INSERT_WITH_UPDATE_CACHING
+1
+"Trying to set variable @@global.ROCKSDB_ENABLE_INSERT_WITH_UPDATE_CACHING to on"
+SET @@global.ROCKSDB_ENABLE_INSERT_WITH_UPDATE_CACHING = on;
+SELECT @@global.ROCKSDB_ENABLE_INSERT_WITH_UPDATE_CACHING;
+@@global.ROCKSDB_ENABLE_INSERT_WITH_UPDATE_CACHING
+1
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_ENABLE_INSERT_WITH_UPDATE_CACHING = DEFAULT;
+SELECT @@global.ROCKSDB_ENABLE_INSERT_WITH_UPDATE_CACHING;
+@@global.ROCKSDB_ENABLE_INSERT_WITH_UPDATE_CACHING
+1
+"Trying to set variable @@global.ROCKSDB_ENABLE_INSERT_WITH_UPDATE_CACHING to off"
+SET @@global.ROCKSDB_ENABLE_INSERT_WITH_UPDATE_CACHING = off;
+SELECT @@global.ROCKSDB_ENABLE_INSERT_WITH_UPDATE_CACHING;
+@@global.ROCKSDB_ENABLE_INSERT_WITH_UPDATE_CACHING
+0
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_ENABLE_INSERT_WITH_UPDATE_CACHING = DEFAULT;
+SELECT @@global.ROCKSDB_ENABLE_INSERT_WITH_UPDATE_CACHING;
+@@global.ROCKSDB_ENABLE_INSERT_WITH_UPDATE_CACHING
+1
+"Trying to set variable @@session.ROCKSDB_ENABLE_INSERT_WITH_UPDATE_CACHING to 444. It should fail because it is not session."
+SET @@session.ROCKSDB_ENABLE_INSERT_WITH_UPDATE_CACHING = 444;
+ERROR HY000: Variable 'rocksdb_enable_insert_with_update_caching' is a GLOBAL variable and should be set with SET GLOBAL
+'# Testing with invalid values in global scope #'
+"Trying to set variable @@global.ROCKSDB_ENABLE_INSERT_WITH_UPDATE_CACHING to 'aaa'"
+SET @@global.ROCKSDB_ENABLE_INSERT_WITH_UPDATE_CACHING = 'aaa';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_ENABLE_INSERT_WITH_UPDATE_CACHING;
+@@global.ROCKSDB_ENABLE_INSERT_WITH_UPDATE_CACHING
+1
+"Trying to set variable @@global.ROCKSDB_ENABLE_INSERT_WITH_UPDATE_CACHING to 'bbb'"
+SET @@global.ROCKSDB_ENABLE_INSERT_WITH_UPDATE_CACHING = 'bbb';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_ENABLE_INSERT_WITH_UPDATE_CACHING;
+@@global.ROCKSDB_ENABLE_INSERT_WITH_UPDATE_CACHING
+1
+SET @@global.ROCKSDB_ENABLE_INSERT_WITH_UPDATE_CACHING = @start_global_value;
+SELECT @@global.ROCKSDB_ENABLE_INSERT_WITH_UPDATE_CACHING;
+@@global.ROCKSDB_ENABLE_INSERT_WITH_UPDATE_CACHING
+1
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_enable_thread_tracking_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_enable_thread_tracking_basic.result
new file mode 100644
index 00000000000..a63383a4d59
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_enable_thread_tracking_basic.result
@@ -0,0 +1,7 @@
+SET @start_global_value = @@global.ROCKSDB_ENABLE_THREAD_TRACKING;
+SELECT @start_global_value;
+@start_global_value
+1
+"Trying to set variable @@global.ROCKSDB_ENABLE_THREAD_TRACKING to 444. It should fail because it is readonly."
+SET @@global.ROCKSDB_ENABLE_THREAD_TRACKING = 444;
+ERROR HY000: Variable 'rocksdb_enable_thread_tracking' is a read only variable
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_enable_ttl_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_enable_ttl_basic.result
new file mode 100644
index 00000000000..1f569235b63
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_enable_ttl_basic.result
@@ -0,0 +1,64 @@
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(0);
+INSERT INTO valid_values VALUES('on');
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+INSERT INTO invalid_values VALUES('\'bbb\'');
+SET @start_global_value = @@global.ROCKSDB_ENABLE_TTL;
+SELECT @start_global_value;
+@start_global_value
+1
+'# Setting to valid values in global scope#'
+"Trying to set variable @@global.ROCKSDB_ENABLE_TTL to 1"
+SET @@global.ROCKSDB_ENABLE_TTL = 1;
+SELECT @@global.ROCKSDB_ENABLE_TTL;
+@@global.ROCKSDB_ENABLE_TTL
+1
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_ENABLE_TTL = DEFAULT;
+SELECT @@global.ROCKSDB_ENABLE_TTL;
+@@global.ROCKSDB_ENABLE_TTL
+1
+"Trying to set variable @@global.ROCKSDB_ENABLE_TTL to 0"
+SET @@global.ROCKSDB_ENABLE_TTL = 0;
+SELECT @@global.ROCKSDB_ENABLE_TTL;
+@@global.ROCKSDB_ENABLE_TTL
+0
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_ENABLE_TTL = DEFAULT;
+SELECT @@global.ROCKSDB_ENABLE_TTL;
+@@global.ROCKSDB_ENABLE_TTL
+1
+"Trying to set variable @@global.ROCKSDB_ENABLE_TTL to on"
+SET @@global.ROCKSDB_ENABLE_TTL = on;
+SELECT @@global.ROCKSDB_ENABLE_TTL;
+@@global.ROCKSDB_ENABLE_TTL
+1
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_ENABLE_TTL = DEFAULT;
+SELECT @@global.ROCKSDB_ENABLE_TTL;
+@@global.ROCKSDB_ENABLE_TTL
+1
+"Trying to set variable @@session.ROCKSDB_ENABLE_TTL to 444. It should fail because it is not session."
+SET @@session.ROCKSDB_ENABLE_TTL = 444;
+ERROR HY000: Variable 'rocksdb_enable_ttl' is a GLOBAL variable and should be set with SET GLOBAL
+'# Testing with invalid values in global scope #'
+"Trying to set variable @@global.ROCKSDB_ENABLE_TTL to 'aaa'"
+SET @@global.ROCKSDB_ENABLE_TTL = 'aaa';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_ENABLE_TTL;
+@@global.ROCKSDB_ENABLE_TTL
+1
+"Trying to set variable @@global.ROCKSDB_ENABLE_TTL to 'bbb'"
+SET @@global.ROCKSDB_ENABLE_TTL = 'bbb';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_ENABLE_TTL;
+@@global.ROCKSDB_ENABLE_TTL
+1
+SET @@global.ROCKSDB_ENABLE_TTL = @start_global_value;
+SELECT @@global.ROCKSDB_ENABLE_TTL;
+@@global.ROCKSDB_ENABLE_TTL
+1
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_enable_ttl_read_filtering_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_enable_ttl_read_filtering_basic.result
new file mode 100644
index 00000000000..005c15e168b
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_enable_ttl_read_filtering_basic.result
@@ -0,0 +1,64 @@
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(0);
+INSERT INTO valid_values VALUES('on');
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+INSERT INTO invalid_values VALUES('\'bbb\'');
+SET @start_global_value = @@global.ROCKSDB_ENABLE_TTL_READ_FILTERING;
+SELECT @start_global_value;
+@start_global_value
+1
+'# Setting to valid values in global scope#'
+"Trying to set variable @@global.ROCKSDB_ENABLE_TTL_READ_FILTERING to 1"
+SET @@global.ROCKSDB_ENABLE_TTL_READ_FILTERING = 1;
+SELECT @@global.ROCKSDB_ENABLE_TTL_READ_FILTERING;
+@@global.ROCKSDB_ENABLE_TTL_READ_FILTERING
+1
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_ENABLE_TTL_READ_FILTERING = DEFAULT;
+SELECT @@global.ROCKSDB_ENABLE_TTL_READ_FILTERING;
+@@global.ROCKSDB_ENABLE_TTL_READ_FILTERING
+1
+"Trying to set variable @@global.ROCKSDB_ENABLE_TTL_READ_FILTERING to 0"
+SET @@global.ROCKSDB_ENABLE_TTL_READ_FILTERING = 0;
+SELECT @@global.ROCKSDB_ENABLE_TTL_READ_FILTERING;
+@@global.ROCKSDB_ENABLE_TTL_READ_FILTERING
+0
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_ENABLE_TTL_READ_FILTERING = DEFAULT;
+SELECT @@global.ROCKSDB_ENABLE_TTL_READ_FILTERING;
+@@global.ROCKSDB_ENABLE_TTL_READ_FILTERING
+1
+"Trying to set variable @@global.ROCKSDB_ENABLE_TTL_READ_FILTERING to on"
+SET @@global.ROCKSDB_ENABLE_TTL_READ_FILTERING = on;
+SELECT @@global.ROCKSDB_ENABLE_TTL_READ_FILTERING;
+@@global.ROCKSDB_ENABLE_TTL_READ_FILTERING
+1
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_ENABLE_TTL_READ_FILTERING = DEFAULT;
+SELECT @@global.ROCKSDB_ENABLE_TTL_READ_FILTERING;
+@@global.ROCKSDB_ENABLE_TTL_READ_FILTERING
+1
+"Trying to set variable @@session.ROCKSDB_ENABLE_TTL_READ_FILTERING to 444. It should fail because it is not session."
+SET @@session.ROCKSDB_ENABLE_TTL_READ_FILTERING = 444;
+ERROR HY000: Variable 'rocksdb_enable_ttl_read_filtering' is a GLOBAL variable and should be set with SET GLOBAL
+'# Testing with invalid values in global scope #'
+"Trying to set variable @@global.ROCKSDB_ENABLE_TTL_READ_FILTERING to 'aaa'"
+SET @@global.ROCKSDB_ENABLE_TTL_READ_FILTERING = 'aaa';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_ENABLE_TTL_READ_FILTERING;
+@@global.ROCKSDB_ENABLE_TTL_READ_FILTERING
+1
+"Trying to set variable @@global.ROCKSDB_ENABLE_TTL_READ_FILTERING to 'bbb'"
+SET @@global.ROCKSDB_ENABLE_TTL_READ_FILTERING = 'bbb';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_ENABLE_TTL_READ_FILTERING;
+@@global.ROCKSDB_ENABLE_TTL_READ_FILTERING
+1
+SET @@global.ROCKSDB_ENABLE_TTL_READ_FILTERING = @start_global_value;
+SELECT @@global.ROCKSDB_ENABLE_TTL_READ_FILTERING;
+@@global.ROCKSDB_ENABLE_TTL_READ_FILTERING
+1
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_enable_write_thread_adaptive_yield_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_enable_write_thread_adaptive_yield_basic.result
new file mode 100644
index 00000000000..37107be469f
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_enable_write_thread_adaptive_yield_basic.result
@@ -0,0 +1,7 @@
+SET @start_global_value = @@global.ROCKSDB_ENABLE_WRITE_THREAD_ADAPTIVE_YIELD;
+SELECT @start_global_value;
+@start_global_value
+0
+"Trying to set variable @@global.ROCKSDB_ENABLE_WRITE_THREAD_ADAPTIVE_YIELD to 444. It should fail because it is readonly."
+SET @@global.ROCKSDB_ENABLE_WRITE_THREAD_ADAPTIVE_YIELD = 444;
+ERROR HY000: Variable 'rocksdb_enable_write_thread_adaptive_yield' is a read only variable
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_error_if_exists_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_error_if_exists_basic.result
new file mode 100644
index 00000000000..650e2956e23
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_error_if_exists_basic.result
@@ -0,0 +1,14 @@
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(1024);
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+SET @start_global_value = @@global.ROCKSDB_ERROR_IF_EXISTS;
+SELECT @start_global_value;
+@start_global_value
+0
+"Trying to set variable @@global.ROCKSDB_ERROR_IF_EXISTS to 444. It should fail because it is readonly."
+SET @@global.ROCKSDB_ERROR_IF_EXISTS = 444;
+ERROR HY000: Variable 'rocksdb_error_if_exists' is a read only variable
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_error_on_suboptimal_collation_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_error_on_suboptimal_collation_basic.result
new file mode 100644
index 00000000000..e10583bff09
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_error_on_suboptimal_collation_basic.result
@@ -0,0 +1,7 @@
+SET @start_global_value = @@global.ROCKSDB_ERROR_ON_SUBOPTIMAL_COLLATION;
+SELECT @start_global_value;
+@start_global_value
+1
+"Trying to set variable @@global.ROCKSDB_ERROR_ON_SUBOPTIMAL_COLLATION to 444. It should fail because it is readonly."
+SET @@global.ROCKSDB_ERROR_ON_SUBOPTIMAL_COLLATION = 444;
+ERROR HY000: Variable 'rocksdb_error_on_suboptimal_collation' is a read only variable
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_flush_log_at_trx_commit_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_flush_log_at_trx_commit_basic.result
new file mode 100644
index 00000000000..b8fe837d2e6
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_flush_log_at_trx_commit_basic.result
@@ -0,0 +1,57 @@
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(2);
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(0);
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+SET @start_global_value = @@global.ROCKSDB_FLUSH_LOG_AT_TRX_COMMIT;
+SELECT @start_global_value;
+@start_global_value
+1
+'# Setting to valid values in global scope#'
+"Trying to set variable @@global.ROCKSDB_FLUSH_LOG_AT_TRX_COMMIT to 2"
+SET @@global.ROCKSDB_FLUSH_LOG_AT_TRX_COMMIT = 2;
+SELECT @@global.ROCKSDB_FLUSH_LOG_AT_TRX_COMMIT;
+@@global.ROCKSDB_FLUSH_LOG_AT_TRX_COMMIT
+2
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_FLUSH_LOG_AT_TRX_COMMIT = DEFAULT;
+SELECT @@global.ROCKSDB_FLUSH_LOG_AT_TRX_COMMIT;
+@@global.ROCKSDB_FLUSH_LOG_AT_TRX_COMMIT
+1
+"Trying to set variable @@global.ROCKSDB_FLUSH_LOG_AT_TRX_COMMIT to 1"
+SET @@global.ROCKSDB_FLUSH_LOG_AT_TRX_COMMIT = 1;
+SELECT @@global.ROCKSDB_FLUSH_LOG_AT_TRX_COMMIT;
+@@global.ROCKSDB_FLUSH_LOG_AT_TRX_COMMIT
+1
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_FLUSH_LOG_AT_TRX_COMMIT = DEFAULT;
+SELECT @@global.ROCKSDB_FLUSH_LOG_AT_TRX_COMMIT;
+@@global.ROCKSDB_FLUSH_LOG_AT_TRX_COMMIT
+1
+"Trying to set variable @@global.ROCKSDB_FLUSH_LOG_AT_TRX_COMMIT to 0"
+SET @@global.ROCKSDB_FLUSH_LOG_AT_TRX_COMMIT = 0;
+SELECT @@global.ROCKSDB_FLUSH_LOG_AT_TRX_COMMIT;
+@@global.ROCKSDB_FLUSH_LOG_AT_TRX_COMMIT
+0
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_FLUSH_LOG_AT_TRX_COMMIT = DEFAULT;
+SELECT @@global.ROCKSDB_FLUSH_LOG_AT_TRX_COMMIT;
+@@global.ROCKSDB_FLUSH_LOG_AT_TRX_COMMIT
+1
+"Trying to set variable @@session.ROCKSDB_FLUSH_LOG_AT_TRX_COMMIT to 444. It should fail because it is not session."
+SET @@session.ROCKSDB_FLUSH_LOG_AT_TRX_COMMIT = 444;
+ERROR HY000: Variable 'rocksdb_flush_log_at_trx_commit' is a GLOBAL variable and should be set with SET GLOBAL
+'# Testing with invalid values in global scope #'
+"Trying to set variable @@global.ROCKSDB_FLUSH_LOG_AT_TRX_COMMIT to 'aaa'"
+SET @@global.ROCKSDB_FLUSH_LOG_AT_TRX_COMMIT = 'aaa';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_FLUSH_LOG_AT_TRX_COMMIT;
+@@global.ROCKSDB_FLUSH_LOG_AT_TRX_COMMIT
+1
+SET @@global.ROCKSDB_FLUSH_LOG_AT_TRX_COMMIT = @start_global_value;
+SELECT @@global.ROCKSDB_FLUSH_LOG_AT_TRX_COMMIT;
+@@global.ROCKSDB_FLUSH_LOG_AT_TRX_COMMIT
+1
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_force_compute_memtable_stats_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_force_compute_memtable_stats_basic.result
new file mode 100644
index 00000000000..90fd829e7c3
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_force_compute_memtable_stats_basic.result
@@ -0,0 +1,15 @@
+DROP TABLE IF EXISTS t;
+CREATE TABLE t (a INT PRIMARY KEY, b CHAR(8)) ENGINE=rocksdb;
+INSERT INTO t (a,b) VALUES (1,'bar'),(2,'foo');
+SET @ORIG_FORCE_COMPUTE_MEMTABLE_STATS = @@rocksdb_force_compute_memtable_stats;
+set global rocksdb_force_flush_memtable_now = true;
+INSERT INTO t (a,b) VALUES (3,'dead'),(4,'beef'),(5,'a'),(6,'bbb'),(7,'c'),(8,'d');
+set global rocksdb_force_compute_memtable_stats=0;
+SELECT TABLE_ROWS INTO @ROWS_EXCLUDE_MEMTABLE FROM information_schema.TABLES WHERE table_name = 't';
+set global rocksdb_force_compute_memtable_stats=1;
+SELECT TABLE_ROWS INTO @ROWS_INCLUDE_MEMTABLE FROM information_schema.TABLES WHERE table_name = 't';
+select case when @ROWS_INCLUDE_MEMTABLE-@ROWS_EXCLUDE_MEMTABLE > 0 then 'true' else 'false' end;
+case when @ROWS_INCLUDE_MEMTABLE-@ROWS_EXCLUDE_MEMTABLE > 0 then 'true' else 'false' end
+true
+DROP TABLE t;
+set global rocksdb_force_compute_memtable_stats = @ORIG_FORCE_COMPUTE_MEMTABLE_STATS;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_force_compute_memtable_stats_cachetime_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_force_compute_memtable_stats_cachetime_basic.result
new file mode 100644
index 00000000000..50e06b5bacb
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_force_compute_memtable_stats_cachetime_basic.result
@@ -0,0 +1,68 @@
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(0);
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(1024);
+INSERT INTO valid_values VALUES(1024 * 1024 * 1024);
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+SET @start_global_value = @@global.ROCKSDB_FORCE_COMPUTE_MEMTABLE_STATS_CACHETIME;
+SELECT @start_global_value;
+@start_global_value
+0
+'# Setting to valid values in global scope#'
+"Trying to set variable @@global.ROCKSDB_FORCE_COMPUTE_MEMTABLE_STATS_CACHETIME to 0"
+SET @@global.ROCKSDB_FORCE_COMPUTE_MEMTABLE_STATS_CACHETIME = 0;
+SELECT @@global.ROCKSDB_FORCE_COMPUTE_MEMTABLE_STATS_CACHETIME;
+@@global.ROCKSDB_FORCE_COMPUTE_MEMTABLE_STATS_CACHETIME
+0
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_FORCE_COMPUTE_MEMTABLE_STATS_CACHETIME = DEFAULT;
+SELECT @@global.ROCKSDB_FORCE_COMPUTE_MEMTABLE_STATS_CACHETIME;
+@@global.ROCKSDB_FORCE_COMPUTE_MEMTABLE_STATS_CACHETIME
+60000000
+"Trying to set variable @@global.ROCKSDB_FORCE_COMPUTE_MEMTABLE_STATS_CACHETIME to 1"
+SET @@global.ROCKSDB_FORCE_COMPUTE_MEMTABLE_STATS_CACHETIME = 1;
+SELECT @@global.ROCKSDB_FORCE_COMPUTE_MEMTABLE_STATS_CACHETIME;
+@@global.ROCKSDB_FORCE_COMPUTE_MEMTABLE_STATS_CACHETIME
+1
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_FORCE_COMPUTE_MEMTABLE_STATS_CACHETIME = DEFAULT;
+SELECT @@global.ROCKSDB_FORCE_COMPUTE_MEMTABLE_STATS_CACHETIME;
+@@global.ROCKSDB_FORCE_COMPUTE_MEMTABLE_STATS_CACHETIME
+60000000
+"Trying to set variable @@global.ROCKSDB_FORCE_COMPUTE_MEMTABLE_STATS_CACHETIME to 1024"
+SET @@global.ROCKSDB_FORCE_COMPUTE_MEMTABLE_STATS_CACHETIME = 1024;
+SELECT @@global.ROCKSDB_FORCE_COMPUTE_MEMTABLE_STATS_CACHETIME;
+@@global.ROCKSDB_FORCE_COMPUTE_MEMTABLE_STATS_CACHETIME
+1024
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_FORCE_COMPUTE_MEMTABLE_STATS_CACHETIME = DEFAULT;
+SELECT @@global.ROCKSDB_FORCE_COMPUTE_MEMTABLE_STATS_CACHETIME;
+@@global.ROCKSDB_FORCE_COMPUTE_MEMTABLE_STATS_CACHETIME
+60000000
+"Trying to set variable @@global.ROCKSDB_FORCE_COMPUTE_MEMTABLE_STATS_CACHETIME to 1073741824"
+SET @@global.ROCKSDB_FORCE_COMPUTE_MEMTABLE_STATS_CACHETIME = 1073741824;
+SELECT @@global.ROCKSDB_FORCE_COMPUTE_MEMTABLE_STATS_CACHETIME;
+@@global.ROCKSDB_FORCE_COMPUTE_MEMTABLE_STATS_CACHETIME
+1073741824
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_FORCE_COMPUTE_MEMTABLE_STATS_CACHETIME = DEFAULT;
+SELECT @@global.ROCKSDB_FORCE_COMPUTE_MEMTABLE_STATS_CACHETIME;
+@@global.ROCKSDB_FORCE_COMPUTE_MEMTABLE_STATS_CACHETIME
+60000000
+"Trying to set variable @@session.ROCKSDB_FORCE_COMPUTE_MEMTABLE_STATS_CACHETIME to 444. It should fail because it is not session."
+SET @@session.ROCKSDB_FORCE_COMPUTE_MEMTABLE_STATS_CACHETIME = 444;
+ERROR HY000: Variable 'rocksdb_force_compute_memtable_stats_cachetime' is a GLOBAL variable and should be set with SET GLOBAL
+'# Testing with invalid values in global scope #'
+"Trying to set variable @@global.ROCKSDB_FORCE_COMPUTE_MEMTABLE_STATS_CACHETIME to 'aaa'"
+SET @@global.ROCKSDB_FORCE_COMPUTE_MEMTABLE_STATS_CACHETIME = 'aaa';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_FORCE_COMPUTE_MEMTABLE_STATS_CACHETIME;
+@@global.ROCKSDB_FORCE_COMPUTE_MEMTABLE_STATS_CACHETIME
+60000000
+SET @@global.ROCKSDB_FORCE_COMPUTE_MEMTABLE_STATS_CACHETIME = @start_global_value;
+SELECT @@global.ROCKSDB_FORCE_COMPUTE_MEMTABLE_STATS_CACHETIME;
+@@global.ROCKSDB_FORCE_COMPUTE_MEMTABLE_STATS_CACHETIME
+0
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_force_flush_memtable_and_lzero_now_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_force_flush_memtable_and_lzero_now_basic.result
new file mode 100644
index 00000000000..68cfeb07fc7
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_force_flush_memtable_and_lzero_now_basic.result
@@ -0,0 +1,50 @@
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(0);
+INSERT INTO valid_values VALUES('on');
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+SET @start_global_value = @@global.ROCKSDB_FORCE_FLUSH_MEMTABLE_AND_LZERO_NOW;
+SELECT @start_global_value;
+@start_global_value
+0
+'# Setting to valid values in global scope#'
+"Trying to set variable @@global.ROCKSDB_FORCE_FLUSH_MEMTABLE_AND_LZERO_NOW to 1"
+SET @@global.ROCKSDB_FORCE_FLUSH_MEMTABLE_AND_LZERO_NOW = 1;
+SELECT @@global.ROCKSDB_FORCE_FLUSH_MEMTABLE_AND_LZERO_NOW;
+@@global.ROCKSDB_FORCE_FLUSH_MEMTABLE_AND_LZERO_NOW
+0
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_FORCE_FLUSH_MEMTABLE_AND_LZERO_NOW = DEFAULT;
+SELECT @@global.ROCKSDB_FORCE_FLUSH_MEMTABLE_AND_LZERO_NOW;
+@@global.ROCKSDB_FORCE_FLUSH_MEMTABLE_AND_LZERO_NOW
+0
+"Trying to set variable @@global.ROCKSDB_FORCE_FLUSH_MEMTABLE_AND_LZERO_NOW to 0"
+SET @@global.ROCKSDB_FORCE_FLUSH_MEMTABLE_AND_LZERO_NOW = 0;
+SELECT @@global.ROCKSDB_FORCE_FLUSH_MEMTABLE_AND_LZERO_NOW;
+@@global.ROCKSDB_FORCE_FLUSH_MEMTABLE_AND_LZERO_NOW
+0
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_FORCE_FLUSH_MEMTABLE_AND_LZERO_NOW = DEFAULT;
+SELECT @@global.ROCKSDB_FORCE_FLUSH_MEMTABLE_AND_LZERO_NOW;
+@@global.ROCKSDB_FORCE_FLUSH_MEMTABLE_AND_LZERO_NOW
+0
+"Trying to set variable @@global.ROCKSDB_FORCE_FLUSH_MEMTABLE_AND_LZERO_NOW to on"
+SET @@global.ROCKSDB_FORCE_FLUSH_MEMTABLE_AND_LZERO_NOW = on;
+SELECT @@global.ROCKSDB_FORCE_FLUSH_MEMTABLE_AND_LZERO_NOW;
+@@global.ROCKSDB_FORCE_FLUSH_MEMTABLE_AND_LZERO_NOW
+0
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_FORCE_FLUSH_MEMTABLE_AND_LZERO_NOW = DEFAULT;
+SELECT @@global.ROCKSDB_FORCE_FLUSH_MEMTABLE_AND_LZERO_NOW;
+@@global.ROCKSDB_FORCE_FLUSH_MEMTABLE_AND_LZERO_NOW
+0
+"Trying to set variable @@session.ROCKSDB_FORCE_FLUSH_MEMTABLE_AND_LZERO_NOW to 444. It should fail because it is not session."
+SET @@session.ROCKSDB_FORCE_FLUSH_MEMTABLE_AND_LZERO_NOW = 444;
+ERROR HY000: Variable 'rocksdb_force_flush_memtable_and_lzero_now' is a GLOBAL variable and should be set with SET GLOBAL
+'# Testing with invalid values in global scope #'
+SET @@global.ROCKSDB_FORCE_FLUSH_MEMTABLE_AND_LZERO_NOW = @start_global_value;
+SELECT @@global.ROCKSDB_FORCE_FLUSH_MEMTABLE_AND_LZERO_NOW;
+@@global.ROCKSDB_FORCE_FLUSH_MEMTABLE_AND_LZERO_NOW
+0
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_force_flush_memtable_now_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_force_flush_memtable_now_basic.result
new file mode 100644
index 00000000000..30444e26d98
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_force_flush_memtable_now_basic.result
@@ -0,0 +1,50 @@
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(0);
+INSERT INTO valid_values VALUES('on');
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+SET @start_global_value = @@global.ROCKSDB_FORCE_FLUSH_MEMTABLE_NOW;
+SELECT @start_global_value;
+@start_global_value
+0
+'# Setting to valid values in global scope#'
+"Trying to set variable @@global.ROCKSDB_FORCE_FLUSH_MEMTABLE_NOW to 1"
+SET @@global.ROCKSDB_FORCE_FLUSH_MEMTABLE_NOW = 1;
+SELECT @@global.ROCKSDB_FORCE_FLUSH_MEMTABLE_NOW;
+@@global.ROCKSDB_FORCE_FLUSH_MEMTABLE_NOW
+0
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_FORCE_FLUSH_MEMTABLE_NOW = DEFAULT;
+SELECT @@global.ROCKSDB_FORCE_FLUSH_MEMTABLE_NOW;
+@@global.ROCKSDB_FORCE_FLUSH_MEMTABLE_NOW
+0
+"Trying to set variable @@global.ROCKSDB_FORCE_FLUSH_MEMTABLE_NOW to 0"
+SET @@global.ROCKSDB_FORCE_FLUSH_MEMTABLE_NOW = 0;
+SELECT @@global.ROCKSDB_FORCE_FLUSH_MEMTABLE_NOW;
+@@global.ROCKSDB_FORCE_FLUSH_MEMTABLE_NOW
+0
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_FORCE_FLUSH_MEMTABLE_NOW = DEFAULT;
+SELECT @@global.ROCKSDB_FORCE_FLUSH_MEMTABLE_NOW;
+@@global.ROCKSDB_FORCE_FLUSH_MEMTABLE_NOW
+0
+"Trying to set variable @@global.ROCKSDB_FORCE_FLUSH_MEMTABLE_NOW to on"
+SET @@global.ROCKSDB_FORCE_FLUSH_MEMTABLE_NOW = on;
+SELECT @@global.ROCKSDB_FORCE_FLUSH_MEMTABLE_NOW;
+@@global.ROCKSDB_FORCE_FLUSH_MEMTABLE_NOW
+0
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_FORCE_FLUSH_MEMTABLE_NOW = DEFAULT;
+SELECT @@global.ROCKSDB_FORCE_FLUSH_MEMTABLE_NOW;
+@@global.ROCKSDB_FORCE_FLUSH_MEMTABLE_NOW
+0
+"Trying to set variable @@session.ROCKSDB_FORCE_FLUSH_MEMTABLE_NOW to 444. It should fail because it is not session."
+SET @@session.ROCKSDB_FORCE_FLUSH_MEMTABLE_NOW = 444;
+ERROR HY000: Variable 'rocksdb_force_flush_memtable_now' is a GLOBAL variable and should be set with SET GLOBAL
+'# Testing with invalid values in global scope #'
+SET @@global.ROCKSDB_FORCE_FLUSH_MEMTABLE_NOW = @start_global_value;
+SELECT @@global.ROCKSDB_FORCE_FLUSH_MEMTABLE_NOW;
+@@global.ROCKSDB_FORCE_FLUSH_MEMTABLE_NOW
+0
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_force_index_records_in_range_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_force_index_records_in_range_basic.result
new file mode 100644
index 00000000000..d4768cfde1e
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_force_index_records_in_range_basic.result
@@ -0,0 +1,106 @@
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(0);
+INSERT INTO valid_values VALUES(222333);
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+INSERT INTO invalid_values VALUES('\'bbb\'');
+SET @@session.rocksdb_force_index_records_in_range = -1;
+Warnings:
+Warning 1292 Truncated incorrect rocksdb_force_index_records_i... value: '-1'
+SELECT @@session.rocksdb_force_index_records_in_range;
+@@session.rocksdb_force_index_records_in_range
+0
+SET @start_global_value = @@global.ROCKSDB_FORCE_INDEX_RECORDS_IN_RANGE;
+SELECT @start_global_value;
+@start_global_value
+0
+SET @start_session_value = @@session.ROCKSDB_FORCE_INDEX_RECORDS_IN_RANGE;
+SELECT @start_session_value;
+@start_session_value
+0
+'# Setting to valid values in global scope#'
+"Trying to set variable @@global.ROCKSDB_FORCE_INDEX_RECORDS_IN_RANGE to 1"
+SET @@global.ROCKSDB_FORCE_INDEX_RECORDS_IN_RANGE = 1;
+SELECT @@global.ROCKSDB_FORCE_INDEX_RECORDS_IN_RANGE;
+@@global.ROCKSDB_FORCE_INDEX_RECORDS_IN_RANGE
+1
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_FORCE_INDEX_RECORDS_IN_RANGE = DEFAULT;
+SELECT @@global.ROCKSDB_FORCE_INDEX_RECORDS_IN_RANGE;
+@@global.ROCKSDB_FORCE_INDEX_RECORDS_IN_RANGE
+0
+"Trying to set variable @@global.ROCKSDB_FORCE_INDEX_RECORDS_IN_RANGE to 0"
+SET @@global.ROCKSDB_FORCE_INDEX_RECORDS_IN_RANGE = 0;
+SELECT @@global.ROCKSDB_FORCE_INDEX_RECORDS_IN_RANGE;
+@@global.ROCKSDB_FORCE_INDEX_RECORDS_IN_RANGE
+0
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_FORCE_INDEX_RECORDS_IN_RANGE = DEFAULT;
+SELECT @@global.ROCKSDB_FORCE_INDEX_RECORDS_IN_RANGE;
+@@global.ROCKSDB_FORCE_INDEX_RECORDS_IN_RANGE
+0
+"Trying to set variable @@global.ROCKSDB_FORCE_INDEX_RECORDS_IN_RANGE to 222333"
+SET @@global.ROCKSDB_FORCE_INDEX_RECORDS_IN_RANGE = 222333;
+SELECT @@global.ROCKSDB_FORCE_INDEX_RECORDS_IN_RANGE;
+@@global.ROCKSDB_FORCE_INDEX_RECORDS_IN_RANGE
+222333
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_FORCE_INDEX_RECORDS_IN_RANGE = DEFAULT;
+SELECT @@global.ROCKSDB_FORCE_INDEX_RECORDS_IN_RANGE;
+@@global.ROCKSDB_FORCE_INDEX_RECORDS_IN_RANGE
+0
+'# Setting to valid values in session scope#'
+"Trying to set variable @@session.ROCKSDB_FORCE_INDEX_RECORDS_IN_RANGE to 1"
+SET @@session.ROCKSDB_FORCE_INDEX_RECORDS_IN_RANGE = 1;
+SELECT @@session.ROCKSDB_FORCE_INDEX_RECORDS_IN_RANGE;
+@@session.ROCKSDB_FORCE_INDEX_RECORDS_IN_RANGE
+1
+"Setting the session scope variable back to default"
+SET @@session.ROCKSDB_FORCE_INDEX_RECORDS_IN_RANGE = DEFAULT;
+SELECT @@session.ROCKSDB_FORCE_INDEX_RECORDS_IN_RANGE;
+@@session.ROCKSDB_FORCE_INDEX_RECORDS_IN_RANGE
+0
+"Trying to set variable @@session.ROCKSDB_FORCE_INDEX_RECORDS_IN_RANGE to 0"
+SET @@session.ROCKSDB_FORCE_INDEX_RECORDS_IN_RANGE = 0;
+SELECT @@session.ROCKSDB_FORCE_INDEX_RECORDS_IN_RANGE;
+@@session.ROCKSDB_FORCE_INDEX_RECORDS_IN_RANGE
+0
+"Setting the session scope variable back to default"
+SET @@session.ROCKSDB_FORCE_INDEX_RECORDS_IN_RANGE = DEFAULT;
+SELECT @@session.ROCKSDB_FORCE_INDEX_RECORDS_IN_RANGE;
+@@session.ROCKSDB_FORCE_INDEX_RECORDS_IN_RANGE
+0
+"Trying to set variable @@session.ROCKSDB_FORCE_INDEX_RECORDS_IN_RANGE to 222333"
+SET @@session.ROCKSDB_FORCE_INDEX_RECORDS_IN_RANGE = 222333;
+SELECT @@session.ROCKSDB_FORCE_INDEX_RECORDS_IN_RANGE;
+@@session.ROCKSDB_FORCE_INDEX_RECORDS_IN_RANGE
+222333
+"Setting the session scope variable back to default"
+SET @@session.ROCKSDB_FORCE_INDEX_RECORDS_IN_RANGE = DEFAULT;
+SELECT @@session.ROCKSDB_FORCE_INDEX_RECORDS_IN_RANGE;
+@@session.ROCKSDB_FORCE_INDEX_RECORDS_IN_RANGE
+0
+'# Testing with invalid values in global scope #'
+"Trying to set variable @@global.ROCKSDB_FORCE_INDEX_RECORDS_IN_RANGE to 'aaa'"
+SET @@global.ROCKSDB_FORCE_INDEX_RECORDS_IN_RANGE = 'aaa';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_FORCE_INDEX_RECORDS_IN_RANGE;
+@@global.ROCKSDB_FORCE_INDEX_RECORDS_IN_RANGE
+0
+"Trying to set variable @@global.ROCKSDB_FORCE_INDEX_RECORDS_IN_RANGE to 'bbb'"
+SET @@global.ROCKSDB_FORCE_INDEX_RECORDS_IN_RANGE = 'bbb';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_FORCE_INDEX_RECORDS_IN_RANGE;
+@@global.ROCKSDB_FORCE_INDEX_RECORDS_IN_RANGE
+0
+SET @@global.ROCKSDB_FORCE_INDEX_RECORDS_IN_RANGE = @start_global_value;
+SELECT @@global.ROCKSDB_FORCE_INDEX_RECORDS_IN_RANGE;
+@@global.ROCKSDB_FORCE_INDEX_RECORDS_IN_RANGE
+0
+SET @@session.ROCKSDB_FORCE_INDEX_RECORDS_IN_RANGE = @start_session_value;
+SELECT @@session.ROCKSDB_FORCE_INDEX_RECORDS_IN_RANGE;
+@@session.ROCKSDB_FORCE_INDEX_RECORDS_IN_RANGE
+0
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_git_hash_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_git_hash_basic.result
new file mode 100644
index 00000000000..bbcfa1417eb
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_git_hash_basic.result
@@ -0,0 +1,7 @@
+SET @start_global_value = @@global.ROCKSDB_GIT_HASH;
+SELECT @start_global_value;
+@start_global_value
+#
+"Trying to set variable @@global.ROCKSDB_GIT_HASH to 444. It should fail because it is readonly."
+SET @@global.ROCKSDB_GIT_HASH = 444;
+ERROR HY000: Variable 'rocksdb_git_hash' is a read only variable
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_hash_index_allow_collision_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_hash_index_allow_collision_basic.result
new file mode 100644
index 00000000000..34deca6ce85
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_hash_index_allow_collision_basic.result
@@ -0,0 +1,7 @@
+SET @start_global_value = @@global.ROCKSDB_HASH_INDEX_ALLOW_COLLISION;
+SELECT @start_global_value;
+@start_global_value
+1
+"Trying to set variable @@global.ROCKSDB_HASH_INDEX_ALLOW_COLLISION to 444. It should fail because it is readonly."
+SET @@global.ROCKSDB_HASH_INDEX_ALLOW_COLLISION = 444;
+ERROR HY000: Variable 'rocksdb_hash_index_allow_collision' is a read only variable
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_ignore_unknown_options_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_ignore_unknown_options_basic.result
new file mode 100644
index 00000000000..621213cd79b
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_ignore_unknown_options_basic.result
@@ -0,0 +1,14 @@
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(1024);
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+SET @start_global_value = @@global.ROCKSDB_IGNORE_UNKNOWN_OPTIONS;
+SELECT @start_global_value;
+@start_global_value
+1
+"Trying to set variable @@global.ROCKSDB_IGNORE_UNKNOWN_OPTIONS to 444. It should fail because it is readonly."
+SET @@global.ROCKSDB_IGNORE_UNKNOWN_OPTIONS = 444;
+ERROR HY000: Variable 'rocksdb_ignore_unknown_options' is a read only variable
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_index_type_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_index_type_basic.result
new file mode 100644
index 00000000000..97c6ed84de7
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_index_type_basic.result
@@ -0,0 +1,7 @@
+SET @start_global_value = @@global.ROCKSDB_INDEX_TYPE;
+SELECT @start_global_value;
+@start_global_value
+kBinarySearch
+"Trying to set variable @@global.ROCKSDB_INDEX_TYPE to 444. It should fail because it is readonly."
+SET @@global.ROCKSDB_INDEX_TYPE = 444;
+ERROR HY000: Variable 'rocksdb_index_type' is a read only variable
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_info_log_level_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_info_log_level_basic.result
new file mode 100644
index 00000000000..1509f9ae95d
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_info_log_level_basic.result
@@ -0,0 +1,93 @@
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES('fatal_level');
+INSERT INTO valid_values VALUES('error_level');
+INSERT INTO valid_values VALUES('warn_level');
+INSERT INTO valid_values VALUES('info_level');
+INSERT INTO valid_values VALUES('debug_level');
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES(5);
+INSERT INTO invalid_values VALUES(6);
+INSERT INTO invalid_values VALUES('foo');
+SET @start_global_value = @@global.ROCKSDB_INFO_LOG_LEVEL;
+SELECT @start_global_value;
+@start_global_value
+error_level
+'# Setting to valid values in global scope#'
+"Trying to set variable @@global.ROCKSDB_INFO_LOG_LEVEL to fatal_level"
+SET @@global.ROCKSDB_INFO_LOG_LEVEL = fatal_level;
+SELECT @@global.ROCKSDB_INFO_LOG_LEVEL;
+@@global.ROCKSDB_INFO_LOG_LEVEL
+fatal_level
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_INFO_LOG_LEVEL = DEFAULT;
+SELECT @@global.ROCKSDB_INFO_LOG_LEVEL;
+@@global.ROCKSDB_INFO_LOG_LEVEL
+error_level
+"Trying to set variable @@global.ROCKSDB_INFO_LOG_LEVEL to error_level"
+SET @@global.ROCKSDB_INFO_LOG_LEVEL = error_level;
+SELECT @@global.ROCKSDB_INFO_LOG_LEVEL;
+@@global.ROCKSDB_INFO_LOG_LEVEL
+error_level
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_INFO_LOG_LEVEL = DEFAULT;
+SELECT @@global.ROCKSDB_INFO_LOG_LEVEL;
+@@global.ROCKSDB_INFO_LOG_LEVEL
+error_level
+"Trying to set variable @@global.ROCKSDB_INFO_LOG_LEVEL to warn_level"
+SET @@global.ROCKSDB_INFO_LOG_LEVEL = warn_level;
+SELECT @@global.ROCKSDB_INFO_LOG_LEVEL;
+@@global.ROCKSDB_INFO_LOG_LEVEL
+warn_level
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_INFO_LOG_LEVEL = DEFAULT;
+SELECT @@global.ROCKSDB_INFO_LOG_LEVEL;
+@@global.ROCKSDB_INFO_LOG_LEVEL
+error_level
+"Trying to set variable @@global.ROCKSDB_INFO_LOG_LEVEL to info_level"
+SET @@global.ROCKSDB_INFO_LOG_LEVEL = info_level;
+SELECT @@global.ROCKSDB_INFO_LOG_LEVEL;
+@@global.ROCKSDB_INFO_LOG_LEVEL
+info_level
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_INFO_LOG_LEVEL = DEFAULT;
+SELECT @@global.ROCKSDB_INFO_LOG_LEVEL;
+@@global.ROCKSDB_INFO_LOG_LEVEL
+error_level
+"Trying to set variable @@global.ROCKSDB_INFO_LOG_LEVEL to debug_level"
+SET @@global.ROCKSDB_INFO_LOG_LEVEL = debug_level;
+SELECT @@global.ROCKSDB_INFO_LOG_LEVEL;
+@@global.ROCKSDB_INFO_LOG_LEVEL
+debug_level
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_INFO_LOG_LEVEL = DEFAULT;
+SELECT @@global.ROCKSDB_INFO_LOG_LEVEL;
+@@global.ROCKSDB_INFO_LOG_LEVEL
+error_level
+"Trying to set variable @@session.ROCKSDB_INFO_LOG_LEVEL to 444. It should fail because it is not session."
+SET @@session.ROCKSDB_INFO_LOG_LEVEL = 444;
+ERROR HY000: Variable 'rocksdb_info_log_level' is a GLOBAL variable and should be set with SET GLOBAL
+'# Testing with invalid values in global scope #'
+"Trying to set variable @@global.ROCKSDB_INFO_LOG_LEVEL to 5"
+SET @@global.ROCKSDB_INFO_LOG_LEVEL = 5;
+Got one of the listed errors
+SELECT @@global.ROCKSDB_INFO_LOG_LEVEL;
+@@global.ROCKSDB_INFO_LOG_LEVEL
+error_level
+"Trying to set variable @@global.ROCKSDB_INFO_LOG_LEVEL to 6"
+SET @@global.ROCKSDB_INFO_LOG_LEVEL = 6;
+Got one of the listed errors
+SELECT @@global.ROCKSDB_INFO_LOG_LEVEL;
+@@global.ROCKSDB_INFO_LOG_LEVEL
+error_level
+"Trying to set variable @@global.ROCKSDB_INFO_LOG_LEVEL to foo"
+SET @@global.ROCKSDB_INFO_LOG_LEVEL = foo;
+Got one of the listed errors
+SELECT @@global.ROCKSDB_INFO_LOG_LEVEL;
+@@global.ROCKSDB_INFO_LOG_LEVEL
+error_level
+SET @@global.ROCKSDB_INFO_LOG_LEVEL = @start_global_value;
+SELECT @@global.ROCKSDB_INFO_LOG_LEVEL;
+@@global.ROCKSDB_INFO_LOG_LEVEL
+error_level
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_io_write_timeout_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_io_write_timeout_basic.result
new file mode 100644
index 00000000000..0917a3970f4
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_io_write_timeout_basic.result
@@ -0,0 +1,86 @@
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(10);
+INSERT INTO valid_values VALUES(100);
+INSERT INTO valid_values VALUES(0);
+INSERT INTO valid_values VALUES(42);
+INSERT INTO valid_values VALUES(142);
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+INSERT INTO invalid_values VALUES('\'bbb\'');
+SET @start_global_value = @@global.ROCKSDB_IO_WRITE_TIMEOUT;
+SELECT @start_global_value;
+@start_global_value
+0
+'# Setting to valid values in global scope#'
+"Trying to set variable @@global.ROCKSDB_IO_WRITE_TIMEOUT to 10"
+SET @@global.ROCKSDB_IO_WRITE_TIMEOUT = 10;
+SELECT @@global.ROCKSDB_IO_WRITE_TIMEOUT;
+@@global.ROCKSDB_IO_WRITE_TIMEOUT
+10
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_IO_WRITE_TIMEOUT = DEFAULT;
+SELECT @@global.ROCKSDB_IO_WRITE_TIMEOUT;
+@@global.ROCKSDB_IO_WRITE_TIMEOUT
+0
+"Trying to set variable @@global.ROCKSDB_IO_WRITE_TIMEOUT to 100"
+SET @@global.ROCKSDB_IO_WRITE_TIMEOUT = 100;
+SELECT @@global.ROCKSDB_IO_WRITE_TIMEOUT;
+@@global.ROCKSDB_IO_WRITE_TIMEOUT
+100
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_IO_WRITE_TIMEOUT = DEFAULT;
+SELECT @@global.ROCKSDB_IO_WRITE_TIMEOUT;
+@@global.ROCKSDB_IO_WRITE_TIMEOUT
+0
+"Trying to set variable @@global.ROCKSDB_IO_WRITE_TIMEOUT to 0"
+SET @@global.ROCKSDB_IO_WRITE_TIMEOUT = 0;
+SELECT @@global.ROCKSDB_IO_WRITE_TIMEOUT;
+@@global.ROCKSDB_IO_WRITE_TIMEOUT
+0
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_IO_WRITE_TIMEOUT = DEFAULT;
+SELECT @@global.ROCKSDB_IO_WRITE_TIMEOUT;
+@@global.ROCKSDB_IO_WRITE_TIMEOUT
+0
+"Trying to set variable @@global.ROCKSDB_IO_WRITE_TIMEOUT to 42"
+SET @@global.ROCKSDB_IO_WRITE_TIMEOUT = 42;
+SELECT @@global.ROCKSDB_IO_WRITE_TIMEOUT;
+@@global.ROCKSDB_IO_WRITE_TIMEOUT
+42
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_IO_WRITE_TIMEOUT = DEFAULT;
+SELECT @@global.ROCKSDB_IO_WRITE_TIMEOUT;
+@@global.ROCKSDB_IO_WRITE_TIMEOUT
+0
+"Trying to set variable @@global.ROCKSDB_IO_WRITE_TIMEOUT to 142"
+SET @@global.ROCKSDB_IO_WRITE_TIMEOUT = 142;
+SELECT @@global.ROCKSDB_IO_WRITE_TIMEOUT;
+@@global.ROCKSDB_IO_WRITE_TIMEOUT
+142
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_IO_WRITE_TIMEOUT = DEFAULT;
+SELECT @@global.ROCKSDB_IO_WRITE_TIMEOUT;
+@@global.ROCKSDB_IO_WRITE_TIMEOUT
+0
+"Trying to set variable @@session.ROCKSDB_IO_WRITE_TIMEOUT to 444. It should fail because it is not session."
+SET @@session.ROCKSDB_IO_WRITE_TIMEOUT = 444;
+ERROR HY000: Variable 'rocksdb_io_write_timeout' is a GLOBAL variable and should be set with SET GLOBAL
+'# Testing with invalid values in global scope #'
+"Trying to set variable @@global.ROCKSDB_IO_WRITE_TIMEOUT to 'aaa'"
+SET @@global.ROCKSDB_IO_WRITE_TIMEOUT = 'aaa';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_IO_WRITE_TIMEOUT;
+@@global.ROCKSDB_IO_WRITE_TIMEOUT
+0
+"Trying to set variable @@global.ROCKSDB_IO_WRITE_TIMEOUT to 'bbb'"
+SET @@global.ROCKSDB_IO_WRITE_TIMEOUT = 'bbb';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_IO_WRITE_TIMEOUT;
+@@global.ROCKSDB_IO_WRITE_TIMEOUT
+0
+SET @@global.ROCKSDB_IO_WRITE_TIMEOUT = @start_global_value;
+SELECT @@global.ROCKSDB_IO_WRITE_TIMEOUT;
+@@global.ROCKSDB_IO_WRITE_TIMEOUT
+0
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_is_fd_close_on_exec_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_is_fd_close_on_exec_basic.result
new file mode 100644
index 00000000000..87dd0e90511
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_is_fd_close_on_exec_basic.result
@@ -0,0 +1,7 @@
+SET @start_global_value = @@global.ROCKSDB_IS_FD_CLOSE_ON_EXEC;
+SELECT @start_global_value;
+@start_global_value
+1
+"Trying to set variable @@global.ROCKSDB_IS_FD_CLOSE_ON_EXEC to 444. It should fail because it is readonly."
+SET @@global.ROCKSDB_IS_FD_CLOSE_ON_EXEC = 444;
+ERROR HY000: Variable 'rocksdb_is_fd_close_on_exec' is a read only variable
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_keep_log_file_num_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_keep_log_file_num_basic.result
new file mode 100644
index 00000000000..3a0c5060d00
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_keep_log_file_num_basic.result
@@ -0,0 +1,7 @@
+SET @start_global_value = @@global.ROCKSDB_KEEP_LOG_FILE_NUM;
+SELECT @start_global_value;
+@start_global_value
+1000
+"Trying to set variable @@global.ROCKSDB_KEEP_LOG_FILE_NUM to 444. It should fail because it is readonly."
+SET @@global.ROCKSDB_KEEP_LOG_FILE_NUM = 444;
+ERROR HY000: Variable 'rocksdb_keep_log_file_num' is a read only variable
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_large_prefix_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_large_prefix_basic.result
new file mode 100644
index 00000000000..89697683d1c
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_large_prefix_basic.result
@@ -0,0 +1,64 @@
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(0);
+INSERT INTO valid_values VALUES('on');
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+INSERT INTO invalid_values VALUES('\'bbb\'');
+SET @start_global_value = @@global.ROCKSDB_LARGE_PREFIX;
+SELECT @start_global_value;
+@start_global_value
+0
+'# Setting to valid values in global scope#'
+"Trying to set variable @@global.ROCKSDB_LARGE_PREFIX to 1"
+SET @@global.ROCKSDB_LARGE_PREFIX = 1;
+SELECT @@global.ROCKSDB_LARGE_PREFIX;
+@@global.ROCKSDB_LARGE_PREFIX
+1
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_LARGE_PREFIX = DEFAULT;
+SELECT @@global.ROCKSDB_LARGE_PREFIX;
+@@global.ROCKSDB_LARGE_PREFIX
+0
+"Trying to set variable @@global.ROCKSDB_LARGE_PREFIX to 0"
+SET @@global.ROCKSDB_LARGE_PREFIX = 0;
+SELECT @@global.ROCKSDB_LARGE_PREFIX;
+@@global.ROCKSDB_LARGE_PREFIX
+0
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_LARGE_PREFIX = DEFAULT;
+SELECT @@global.ROCKSDB_LARGE_PREFIX;
+@@global.ROCKSDB_LARGE_PREFIX
+0
+"Trying to set variable @@global.ROCKSDB_LARGE_PREFIX to on"
+SET @@global.ROCKSDB_LARGE_PREFIX = on;
+SELECT @@global.ROCKSDB_LARGE_PREFIX;
+@@global.ROCKSDB_LARGE_PREFIX
+1
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_LARGE_PREFIX = DEFAULT;
+SELECT @@global.ROCKSDB_LARGE_PREFIX;
+@@global.ROCKSDB_LARGE_PREFIX
+0
+"Trying to set variable @@session.ROCKSDB_LARGE_PREFIX to 444. It should fail because it is not session."
+SET @@session.ROCKSDB_LARGE_PREFIX = 444;
+ERROR HY000: Variable 'rocksdb_large_prefix' is a GLOBAL variable and should be set with SET GLOBAL
+'# Testing with invalid values in global scope #'
+"Trying to set variable @@global.ROCKSDB_LARGE_PREFIX to 'aaa'"
+SET @@global.ROCKSDB_LARGE_PREFIX = 'aaa';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_LARGE_PREFIX;
+@@global.ROCKSDB_LARGE_PREFIX
+0
+"Trying to set variable @@global.ROCKSDB_LARGE_PREFIX to 'bbb'"
+SET @@global.ROCKSDB_LARGE_PREFIX = 'bbb';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_LARGE_PREFIX;
+@@global.ROCKSDB_LARGE_PREFIX
+0
+SET @@global.ROCKSDB_LARGE_PREFIX = @start_global_value;
+SELECT @@global.ROCKSDB_LARGE_PREFIX;
+@@global.ROCKSDB_LARGE_PREFIX
+0
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_lock_scanned_rows_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_lock_scanned_rows_basic.result
new file mode 100644
index 00000000000..eff9e619967
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_lock_scanned_rows_basic.result
@@ -0,0 +1,170 @@
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(0);
+INSERT INTO valid_values VALUES('on');
+INSERT INTO valid_values VALUES('off');
+INSERT INTO valid_values VALUES('true');
+INSERT INTO valid_values VALUES('false');
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+INSERT INTO invalid_values VALUES(2);
+INSERT INTO invalid_values VALUES(1000);
+SET @start_global_value = @@global.ROCKSDB_LOCK_SCANNED_ROWS;
+SELECT @start_global_value;
+@start_global_value
+0
+SET @start_session_value = @@session.ROCKSDB_LOCK_SCANNED_ROWS;
+SELECT @start_session_value;
+@start_session_value
+0
+'# Setting to valid values in global scope#'
+"Trying to set variable @@global.ROCKSDB_LOCK_SCANNED_ROWS to 1"
+SET @@global.ROCKSDB_LOCK_SCANNED_ROWS = 1;
+SELECT @@global.ROCKSDB_LOCK_SCANNED_ROWS;
+@@global.ROCKSDB_LOCK_SCANNED_ROWS
+1
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_LOCK_SCANNED_ROWS = DEFAULT;
+SELECT @@global.ROCKSDB_LOCK_SCANNED_ROWS;
+@@global.ROCKSDB_LOCK_SCANNED_ROWS
+0
+"Trying to set variable @@global.ROCKSDB_LOCK_SCANNED_ROWS to 0"
+SET @@global.ROCKSDB_LOCK_SCANNED_ROWS = 0;
+SELECT @@global.ROCKSDB_LOCK_SCANNED_ROWS;
+@@global.ROCKSDB_LOCK_SCANNED_ROWS
+0
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_LOCK_SCANNED_ROWS = DEFAULT;
+SELECT @@global.ROCKSDB_LOCK_SCANNED_ROWS;
+@@global.ROCKSDB_LOCK_SCANNED_ROWS
+0
+"Trying to set variable @@global.ROCKSDB_LOCK_SCANNED_ROWS to on"
+SET @@global.ROCKSDB_LOCK_SCANNED_ROWS = on;
+SELECT @@global.ROCKSDB_LOCK_SCANNED_ROWS;
+@@global.ROCKSDB_LOCK_SCANNED_ROWS
+1
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_LOCK_SCANNED_ROWS = DEFAULT;
+SELECT @@global.ROCKSDB_LOCK_SCANNED_ROWS;
+@@global.ROCKSDB_LOCK_SCANNED_ROWS
+0
+"Trying to set variable @@global.ROCKSDB_LOCK_SCANNED_ROWS to off"
+SET @@global.ROCKSDB_LOCK_SCANNED_ROWS = off;
+SELECT @@global.ROCKSDB_LOCK_SCANNED_ROWS;
+@@global.ROCKSDB_LOCK_SCANNED_ROWS
+0
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_LOCK_SCANNED_ROWS = DEFAULT;
+SELECT @@global.ROCKSDB_LOCK_SCANNED_ROWS;
+@@global.ROCKSDB_LOCK_SCANNED_ROWS
+0
+"Trying to set variable @@global.ROCKSDB_LOCK_SCANNED_ROWS to true"
+SET @@global.ROCKSDB_LOCK_SCANNED_ROWS = true;
+SELECT @@global.ROCKSDB_LOCK_SCANNED_ROWS;
+@@global.ROCKSDB_LOCK_SCANNED_ROWS
+1
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_LOCK_SCANNED_ROWS = DEFAULT;
+SELECT @@global.ROCKSDB_LOCK_SCANNED_ROWS;
+@@global.ROCKSDB_LOCK_SCANNED_ROWS
+0
+"Trying to set variable @@global.ROCKSDB_LOCK_SCANNED_ROWS to false"
+SET @@global.ROCKSDB_LOCK_SCANNED_ROWS = false;
+SELECT @@global.ROCKSDB_LOCK_SCANNED_ROWS;
+@@global.ROCKSDB_LOCK_SCANNED_ROWS
+0
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_LOCK_SCANNED_ROWS = DEFAULT;
+SELECT @@global.ROCKSDB_LOCK_SCANNED_ROWS;
+@@global.ROCKSDB_LOCK_SCANNED_ROWS
+0
+'# Setting to valid values in session scope#'
+"Trying to set variable @@session.ROCKSDB_LOCK_SCANNED_ROWS to 1"
+SET @@session.ROCKSDB_LOCK_SCANNED_ROWS = 1;
+SELECT @@session.ROCKSDB_LOCK_SCANNED_ROWS;
+@@session.ROCKSDB_LOCK_SCANNED_ROWS
+1
+"Setting the session scope variable back to default"
+SET @@session.ROCKSDB_LOCK_SCANNED_ROWS = DEFAULT;
+SELECT @@session.ROCKSDB_LOCK_SCANNED_ROWS;
+@@session.ROCKSDB_LOCK_SCANNED_ROWS
+0
+"Trying to set variable @@session.ROCKSDB_LOCK_SCANNED_ROWS to 0"
+SET @@session.ROCKSDB_LOCK_SCANNED_ROWS = 0;
+SELECT @@session.ROCKSDB_LOCK_SCANNED_ROWS;
+@@session.ROCKSDB_LOCK_SCANNED_ROWS
+0
+"Setting the session scope variable back to default"
+SET @@session.ROCKSDB_LOCK_SCANNED_ROWS = DEFAULT;
+SELECT @@session.ROCKSDB_LOCK_SCANNED_ROWS;
+@@session.ROCKSDB_LOCK_SCANNED_ROWS
+0
+"Trying to set variable @@session.ROCKSDB_LOCK_SCANNED_ROWS to on"
+SET @@session.ROCKSDB_LOCK_SCANNED_ROWS = on;
+SELECT @@session.ROCKSDB_LOCK_SCANNED_ROWS;
+@@session.ROCKSDB_LOCK_SCANNED_ROWS
+1
+"Setting the session scope variable back to default"
+SET @@session.ROCKSDB_LOCK_SCANNED_ROWS = DEFAULT;
+SELECT @@session.ROCKSDB_LOCK_SCANNED_ROWS;
+@@session.ROCKSDB_LOCK_SCANNED_ROWS
+0
+"Trying to set variable @@session.ROCKSDB_LOCK_SCANNED_ROWS to off"
+SET @@session.ROCKSDB_LOCK_SCANNED_ROWS = off;
+SELECT @@session.ROCKSDB_LOCK_SCANNED_ROWS;
+@@session.ROCKSDB_LOCK_SCANNED_ROWS
+0
+"Setting the session scope variable back to default"
+SET @@session.ROCKSDB_LOCK_SCANNED_ROWS = DEFAULT;
+SELECT @@session.ROCKSDB_LOCK_SCANNED_ROWS;
+@@session.ROCKSDB_LOCK_SCANNED_ROWS
+0
+"Trying to set variable @@session.ROCKSDB_LOCK_SCANNED_ROWS to true"
+SET @@session.ROCKSDB_LOCK_SCANNED_ROWS = true;
+SELECT @@session.ROCKSDB_LOCK_SCANNED_ROWS;
+@@session.ROCKSDB_LOCK_SCANNED_ROWS
+1
+"Setting the session scope variable back to default"
+SET @@session.ROCKSDB_LOCK_SCANNED_ROWS = DEFAULT;
+SELECT @@session.ROCKSDB_LOCK_SCANNED_ROWS;
+@@session.ROCKSDB_LOCK_SCANNED_ROWS
+0
+"Trying to set variable @@session.ROCKSDB_LOCK_SCANNED_ROWS to false"
+SET @@session.ROCKSDB_LOCK_SCANNED_ROWS = false;
+SELECT @@session.ROCKSDB_LOCK_SCANNED_ROWS;
+@@session.ROCKSDB_LOCK_SCANNED_ROWS
+0
+"Setting the session scope variable back to default"
+SET @@session.ROCKSDB_LOCK_SCANNED_ROWS = DEFAULT;
+SELECT @@session.ROCKSDB_LOCK_SCANNED_ROWS;
+@@session.ROCKSDB_LOCK_SCANNED_ROWS
+0
+'# Testing with invalid values in global scope #'
+"Trying to set variable @@global.ROCKSDB_LOCK_SCANNED_ROWS to 'aaa'"
+SET @@global.ROCKSDB_LOCK_SCANNED_ROWS = 'aaa';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_LOCK_SCANNED_ROWS;
+@@global.ROCKSDB_LOCK_SCANNED_ROWS
+0
+"Trying to set variable @@global.ROCKSDB_LOCK_SCANNED_ROWS to 2"
+SET @@global.ROCKSDB_LOCK_SCANNED_ROWS = 2;
+Got one of the listed errors
+SELECT @@global.ROCKSDB_LOCK_SCANNED_ROWS;
+@@global.ROCKSDB_LOCK_SCANNED_ROWS
+0
+"Trying to set variable @@global.ROCKSDB_LOCK_SCANNED_ROWS to 1000"
+SET @@global.ROCKSDB_LOCK_SCANNED_ROWS = 1000;
+Got one of the listed errors
+SELECT @@global.ROCKSDB_LOCK_SCANNED_ROWS;
+@@global.ROCKSDB_LOCK_SCANNED_ROWS
+0
+SET @@global.ROCKSDB_LOCK_SCANNED_ROWS = @start_global_value;
+SELECT @@global.ROCKSDB_LOCK_SCANNED_ROWS;
+@@global.ROCKSDB_LOCK_SCANNED_ROWS
+0
+SET @@session.ROCKSDB_LOCK_SCANNED_ROWS = @start_session_value;
+SELECT @@session.ROCKSDB_LOCK_SCANNED_ROWS;
+@@session.ROCKSDB_LOCK_SCANNED_ROWS
+0
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_lock_wait_timeout_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_lock_wait_timeout_basic.result
new file mode 100644
index 00000000000..38df5820298
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_lock_wait_timeout_basic.result
@@ -0,0 +1,72 @@
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(1024);
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+SET @start_global_value = @@global.ROCKSDB_LOCK_WAIT_TIMEOUT;
+SELECT @start_global_value;
+@start_global_value
+1
+SET @start_session_value = @@session.ROCKSDB_LOCK_WAIT_TIMEOUT;
+SELECT @start_session_value;
+@start_session_value
+1
+'# Setting to valid values in global scope#'
+"Trying to set variable @@global.ROCKSDB_LOCK_WAIT_TIMEOUT to 1"
+SET @@global.ROCKSDB_LOCK_WAIT_TIMEOUT = 1;
+SELECT @@global.ROCKSDB_LOCK_WAIT_TIMEOUT;
+@@global.ROCKSDB_LOCK_WAIT_TIMEOUT
+1
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_LOCK_WAIT_TIMEOUT = DEFAULT;
+SELECT @@global.ROCKSDB_LOCK_WAIT_TIMEOUT;
+@@global.ROCKSDB_LOCK_WAIT_TIMEOUT
+1
+"Trying to set variable @@global.ROCKSDB_LOCK_WAIT_TIMEOUT to 1024"
+SET @@global.ROCKSDB_LOCK_WAIT_TIMEOUT = 1024;
+SELECT @@global.ROCKSDB_LOCK_WAIT_TIMEOUT;
+@@global.ROCKSDB_LOCK_WAIT_TIMEOUT
+1024
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_LOCK_WAIT_TIMEOUT = DEFAULT;
+SELECT @@global.ROCKSDB_LOCK_WAIT_TIMEOUT;
+@@global.ROCKSDB_LOCK_WAIT_TIMEOUT
+1
+'# Setting to valid values in session scope#'
+"Trying to set variable @@session.ROCKSDB_LOCK_WAIT_TIMEOUT to 1"
+SET @@session.ROCKSDB_LOCK_WAIT_TIMEOUT = 1;
+SELECT @@session.ROCKSDB_LOCK_WAIT_TIMEOUT;
+@@session.ROCKSDB_LOCK_WAIT_TIMEOUT
+1
+"Setting the session scope variable back to default"
+SET @@session.ROCKSDB_LOCK_WAIT_TIMEOUT = DEFAULT;
+SELECT @@session.ROCKSDB_LOCK_WAIT_TIMEOUT;
+@@session.ROCKSDB_LOCK_WAIT_TIMEOUT
+1
+"Trying to set variable @@session.ROCKSDB_LOCK_WAIT_TIMEOUT to 1024"
+SET @@session.ROCKSDB_LOCK_WAIT_TIMEOUT = 1024;
+SELECT @@session.ROCKSDB_LOCK_WAIT_TIMEOUT;
+@@session.ROCKSDB_LOCK_WAIT_TIMEOUT
+1024
+"Setting the session scope variable back to default"
+SET @@session.ROCKSDB_LOCK_WAIT_TIMEOUT = DEFAULT;
+SELECT @@session.ROCKSDB_LOCK_WAIT_TIMEOUT;
+@@session.ROCKSDB_LOCK_WAIT_TIMEOUT
+1
+'# Testing with invalid values in global scope #'
+"Trying to set variable @@global.ROCKSDB_LOCK_WAIT_TIMEOUT to 'aaa'"
+SET @@global.ROCKSDB_LOCK_WAIT_TIMEOUT = 'aaa';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_LOCK_WAIT_TIMEOUT;
+@@global.ROCKSDB_LOCK_WAIT_TIMEOUT
+1
+SET @@global.ROCKSDB_LOCK_WAIT_TIMEOUT = @start_global_value;
+SELECT @@global.ROCKSDB_LOCK_WAIT_TIMEOUT;
+@@global.ROCKSDB_LOCK_WAIT_TIMEOUT
+1
+SET @@session.ROCKSDB_LOCK_WAIT_TIMEOUT = @start_session_value;
+SELECT @@session.ROCKSDB_LOCK_WAIT_TIMEOUT;
+@@session.ROCKSDB_LOCK_WAIT_TIMEOUT
+1
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_log_file_time_to_roll_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_log_file_time_to_roll_basic.result
new file mode 100644
index 00000000000..24cff58426a
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_log_file_time_to_roll_basic.result
@@ -0,0 +1,7 @@
+SET @start_global_value = @@global.ROCKSDB_LOG_FILE_TIME_TO_ROLL;
+SELECT @start_global_value;
+@start_global_value
+0
+"Trying to set variable @@global.ROCKSDB_LOG_FILE_TIME_TO_ROLL to 444. It should fail because it is readonly."
+SET @@global.ROCKSDB_LOG_FILE_TIME_TO_ROLL = 444;
+ERROR HY000: Variable 'rocksdb_log_file_time_to_roll' is a read only variable
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_manifest_preallocation_size_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_manifest_preallocation_size_basic.result
new file mode 100644
index 00000000000..dbb331d235d
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_manifest_preallocation_size_basic.result
@@ -0,0 +1,7 @@
+SET @start_global_value = @@global.ROCKSDB_MANIFEST_PREALLOCATION_SIZE;
+SELECT @start_global_value;
+@start_global_value
+4194304
+"Trying to set variable @@global.ROCKSDB_MANIFEST_PREALLOCATION_SIZE to 444. It should fail because it is readonly."
+SET @@global.ROCKSDB_MANIFEST_PREALLOCATION_SIZE = 444;
+ERROR HY000: Variable 'rocksdb_manifest_preallocation_size' is a read only variable
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_manual_compaction_threads_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_manual_compaction_threads_basic.result
new file mode 100644
index 00000000000..3d599e1768e
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_manual_compaction_threads_basic.result
@@ -0,0 +1,93 @@
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(0);
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(99);
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+SET @start_global_value = @@global.ROCKSDB_MANUAL_COMPACTION_THREADS;
+SELECT @start_global_value;
+@start_global_value
+0
+SET @start_session_value = @@session.ROCKSDB_MANUAL_COMPACTION_THREADS;
+SELECT @start_session_value;
+@start_session_value
+0
+'# Setting to valid values in global scope#'
+"Trying to set variable @@global.ROCKSDB_MANUAL_COMPACTION_THREADS to 0"
+SET @@global.ROCKSDB_MANUAL_COMPACTION_THREADS = 0;
+SELECT @@global.ROCKSDB_MANUAL_COMPACTION_THREADS;
+@@global.ROCKSDB_MANUAL_COMPACTION_THREADS
+0
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_MANUAL_COMPACTION_THREADS = DEFAULT;
+SELECT @@global.ROCKSDB_MANUAL_COMPACTION_THREADS;
+@@global.ROCKSDB_MANUAL_COMPACTION_THREADS
+0
+"Trying to set variable @@global.ROCKSDB_MANUAL_COMPACTION_THREADS to 1"
+SET @@global.ROCKSDB_MANUAL_COMPACTION_THREADS = 1;
+SELECT @@global.ROCKSDB_MANUAL_COMPACTION_THREADS;
+@@global.ROCKSDB_MANUAL_COMPACTION_THREADS
+1
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_MANUAL_COMPACTION_THREADS = DEFAULT;
+SELECT @@global.ROCKSDB_MANUAL_COMPACTION_THREADS;
+@@global.ROCKSDB_MANUAL_COMPACTION_THREADS
+0
+"Trying to set variable @@global.ROCKSDB_MANUAL_COMPACTION_THREADS to 99"
+SET @@global.ROCKSDB_MANUAL_COMPACTION_THREADS = 99;
+SELECT @@global.ROCKSDB_MANUAL_COMPACTION_THREADS;
+@@global.ROCKSDB_MANUAL_COMPACTION_THREADS
+99
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_MANUAL_COMPACTION_THREADS = DEFAULT;
+SELECT @@global.ROCKSDB_MANUAL_COMPACTION_THREADS;
+@@global.ROCKSDB_MANUAL_COMPACTION_THREADS
+0
+'# Setting to valid values in session scope#'
+"Trying to set variable @@session.ROCKSDB_MANUAL_COMPACTION_THREADS to 0"
+SET @@session.ROCKSDB_MANUAL_COMPACTION_THREADS = 0;
+SELECT @@session.ROCKSDB_MANUAL_COMPACTION_THREADS;
+@@session.ROCKSDB_MANUAL_COMPACTION_THREADS
+0
+"Setting the session scope variable back to default"
+SET @@session.ROCKSDB_MANUAL_COMPACTION_THREADS = DEFAULT;
+SELECT @@session.ROCKSDB_MANUAL_COMPACTION_THREADS;
+@@session.ROCKSDB_MANUAL_COMPACTION_THREADS
+0
+"Trying to set variable @@session.ROCKSDB_MANUAL_COMPACTION_THREADS to 1"
+SET @@session.ROCKSDB_MANUAL_COMPACTION_THREADS = 1;
+SELECT @@session.ROCKSDB_MANUAL_COMPACTION_THREADS;
+@@session.ROCKSDB_MANUAL_COMPACTION_THREADS
+1
+"Setting the session scope variable back to default"
+SET @@session.ROCKSDB_MANUAL_COMPACTION_THREADS = DEFAULT;
+SELECT @@session.ROCKSDB_MANUAL_COMPACTION_THREADS;
+@@session.ROCKSDB_MANUAL_COMPACTION_THREADS
+0
+"Trying to set variable @@session.ROCKSDB_MANUAL_COMPACTION_THREADS to 99"
+SET @@session.ROCKSDB_MANUAL_COMPACTION_THREADS = 99;
+SELECT @@session.ROCKSDB_MANUAL_COMPACTION_THREADS;
+@@session.ROCKSDB_MANUAL_COMPACTION_THREADS
+99
+"Setting the session scope variable back to default"
+SET @@session.ROCKSDB_MANUAL_COMPACTION_THREADS = DEFAULT;
+SELECT @@session.ROCKSDB_MANUAL_COMPACTION_THREADS;
+@@session.ROCKSDB_MANUAL_COMPACTION_THREADS
+0
+'# Testing with invalid values in global scope #'
+"Trying to set variable @@global.ROCKSDB_MANUAL_COMPACTION_THREADS to 'aaa'"
+SET @@global.ROCKSDB_MANUAL_COMPACTION_THREADS = 'aaa';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_MANUAL_COMPACTION_THREADS;
+@@global.ROCKSDB_MANUAL_COMPACTION_THREADS
+0
+SET @@global.ROCKSDB_MANUAL_COMPACTION_THREADS = @start_global_value;
+SELECT @@global.ROCKSDB_MANUAL_COMPACTION_THREADS;
+@@global.ROCKSDB_MANUAL_COMPACTION_THREADS
+0
+SET @@session.ROCKSDB_MANUAL_COMPACTION_THREADS = @start_session_value;
+SELECT @@session.ROCKSDB_MANUAL_COMPACTION_THREADS;
+@@session.ROCKSDB_MANUAL_COMPACTION_THREADS
+0
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_manual_wal_flush_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_manual_wal_flush_basic.result
new file mode 100644
index 00000000000..9b176263a23
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_manual_wal_flush_basic.result
@@ -0,0 +1,14 @@
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(1024);
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+SET @start_global_value = @@global.ROCKSDB_MANUAL_WAL_FLUSH;
+SELECT @start_global_value;
+@start_global_value
+1
+"Trying to set variable @@global.ROCKSDB_MANUAL_WAL_FLUSH to 444. It should fail because it is readonly."
+SET @@global.ROCKSDB_MANUAL_WAL_FLUSH = 444;
+ERROR HY000: Variable 'rocksdb_manual_wal_flush' is a read only variable
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_master_skip_tx_api_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_master_skip_tx_api_basic.result
new file mode 100644
index 00000000000..3f50772ded5
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_master_skip_tx_api_basic.result
@@ -0,0 +1,100 @@
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(0);
+INSERT INTO valid_values VALUES('on');
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+INSERT INTO invalid_values VALUES('\'bbb\'');
+SET @start_global_value = @@global.ROCKSDB_MASTER_SKIP_TX_API;
+SELECT @start_global_value;
+@start_global_value
+0
+SET @start_session_value = @@session.ROCKSDB_MASTER_SKIP_TX_API;
+SELECT @start_session_value;
+@start_session_value
+0
+'# Setting to valid values in global scope#'
+"Trying to set variable @@global.ROCKSDB_MASTER_SKIP_TX_API to 1"
+SET @@global.ROCKSDB_MASTER_SKIP_TX_API = 1;
+SELECT @@global.ROCKSDB_MASTER_SKIP_TX_API;
+@@global.ROCKSDB_MASTER_SKIP_TX_API
+1
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_MASTER_SKIP_TX_API = DEFAULT;
+SELECT @@global.ROCKSDB_MASTER_SKIP_TX_API;
+@@global.ROCKSDB_MASTER_SKIP_TX_API
+0
+"Trying to set variable @@global.ROCKSDB_MASTER_SKIP_TX_API to 0"
+SET @@global.ROCKSDB_MASTER_SKIP_TX_API = 0;
+SELECT @@global.ROCKSDB_MASTER_SKIP_TX_API;
+@@global.ROCKSDB_MASTER_SKIP_TX_API
+0
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_MASTER_SKIP_TX_API = DEFAULT;
+SELECT @@global.ROCKSDB_MASTER_SKIP_TX_API;
+@@global.ROCKSDB_MASTER_SKIP_TX_API
+0
+"Trying to set variable @@global.ROCKSDB_MASTER_SKIP_TX_API to on"
+SET @@global.ROCKSDB_MASTER_SKIP_TX_API = on;
+SELECT @@global.ROCKSDB_MASTER_SKIP_TX_API;
+@@global.ROCKSDB_MASTER_SKIP_TX_API
+1
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_MASTER_SKIP_TX_API = DEFAULT;
+SELECT @@global.ROCKSDB_MASTER_SKIP_TX_API;
+@@global.ROCKSDB_MASTER_SKIP_TX_API
+0
+'# Setting to valid values in session scope#'
+"Trying to set variable @@session.ROCKSDB_MASTER_SKIP_TX_API to 1"
+SET @@session.ROCKSDB_MASTER_SKIP_TX_API = 1;
+SELECT @@session.ROCKSDB_MASTER_SKIP_TX_API;
+@@session.ROCKSDB_MASTER_SKIP_TX_API
+1
+"Setting the session scope variable back to default"
+SET @@session.ROCKSDB_MASTER_SKIP_TX_API = DEFAULT;
+SELECT @@session.ROCKSDB_MASTER_SKIP_TX_API;
+@@session.ROCKSDB_MASTER_SKIP_TX_API
+0
+"Trying to set variable @@session.ROCKSDB_MASTER_SKIP_TX_API to 0"
+SET @@session.ROCKSDB_MASTER_SKIP_TX_API = 0;
+SELECT @@session.ROCKSDB_MASTER_SKIP_TX_API;
+@@session.ROCKSDB_MASTER_SKIP_TX_API
+0
+"Setting the session scope variable back to default"
+SET @@session.ROCKSDB_MASTER_SKIP_TX_API = DEFAULT;
+SELECT @@session.ROCKSDB_MASTER_SKIP_TX_API;
+@@session.ROCKSDB_MASTER_SKIP_TX_API
+0
+"Trying to set variable @@session.ROCKSDB_MASTER_SKIP_TX_API to on"
+SET @@session.ROCKSDB_MASTER_SKIP_TX_API = on;
+SELECT @@session.ROCKSDB_MASTER_SKIP_TX_API;
+@@session.ROCKSDB_MASTER_SKIP_TX_API
+1
+"Setting the session scope variable back to default"
+SET @@session.ROCKSDB_MASTER_SKIP_TX_API = DEFAULT;
+SELECT @@session.ROCKSDB_MASTER_SKIP_TX_API;
+@@session.ROCKSDB_MASTER_SKIP_TX_API
+0
+'# Testing with invalid values in global scope #'
+"Trying to set variable @@global.ROCKSDB_MASTER_SKIP_TX_API to 'aaa'"
+SET @@global.ROCKSDB_MASTER_SKIP_TX_API = 'aaa';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_MASTER_SKIP_TX_API;
+@@global.ROCKSDB_MASTER_SKIP_TX_API
+0
+"Trying to set variable @@global.ROCKSDB_MASTER_SKIP_TX_API to 'bbb'"
+SET @@global.ROCKSDB_MASTER_SKIP_TX_API = 'bbb';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_MASTER_SKIP_TX_API;
+@@global.ROCKSDB_MASTER_SKIP_TX_API
+0
+SET @@global.ROCKSDB_MASTER_SKIP_TX_API = @start_global_value;
+SELECT @@global.ROCKSDB_MASTER_SKIP_TX_API;
+@@global.ROCKSDB_MASTER_SKIP_TX_API
+0
+SET @@session.ROCKSDB_MASTER_SKIP_TX_API = @start_session_value;
+SELECT @@session.ROCKSDB_MASTER_SKIP_TX_API;
+@@session.ROCKSDB_MASTER_SKIP_TX_API
+0
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_max_background_jobs_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_max_background_jobs_basic.result
new file mode 100644
index 00000000000..88e6d21c3ec
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_max_background_jobs_basic.result
@@ -0,0 +1,46 @@
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(64);
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'abc\'');
+SET @start_global_value = @@global.ROCKSDB_MAX_BACKGROUND_JOBS;
+SELECT @start_global_value;
+@start_global_value
+2
+'# Setting to valid values in global scope#'
+"Trying to set variable @@global.ROCKSDB_MAX_BACKGROUND_JOBS to 1"
+SET @@global.ROCKSDB_MAX_BACKGROUND_JOBS = 1;
+SELECT @@global.ROCKSDB_MAX_BACKGROUND_JOBS;
+@@global.ROCKSDB_MAX_BACKGROUND_JOBS
+1
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_MAX_BACKGROUND_JOBS = DEFAULT;
+SELECT @@global.ROCKSDB_MAX_BACKGROUND_JOBS;
+@@global.ROCKSDB_MAX_BACKGROUND_JOBS
+2
+"Trying to set variable @@global.ROCKSDB_MAX_BACKGROUND_JOBS to 64"
+SET @@global.ROCKSDB_MAX_BACKGROUND_JOBS = 64;
+SELECT @@global.ROCKSDB_MAX_BACKGROUND_JOBS;
+@@global.ROCKSDB_MAX_BACKGROUND_JOBS
+64
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_MAX_BACKGROUND_JOBS = DEFAULT;
+SELECT @@global.ROCKSDB_MAX_BACKGROUND_JOBS;
+@@global.ROCKSDB_MAX_BACKGROUND_JOBS
+2
+"Trying to set variable @@session.ROCKSDB_MAX_BACKGROUND_JOBS to 444. It should fail because it is not session."
+SET @@session.ROCKSDB_MAX_BACKGROUND_JOBS = 444;
+ERROR HY000: Variable 'rocksdb_max_background_jobs' is a GLOBAL variable and should be set with SET GLOBAL
+'# Testing with invalid values in global scope #'
+"Trying to set variable @@global.ROCKSDB_MAX_BACKGROUND_JOBS to 'abc'"
+SET @@global.ROCKSDB_MAX_BACKGROUND_JOBS = 'abc';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_MAX_BACKGROUND_JOBS;
+@@global.ROCKSDB_MAX_BACKGROUND_JOBS
+2
+SET @@global.ROCKSDB_MAX_BACKGROUND_JOBS = @start_global_value;
+SELECT @@global.ROCKSDB_MAX_BACKGROUND_JOBS;
+@@global.ROCKSDB_MAX_BACKGROUND_JOBS
+2
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_max_latest_deadlocks_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_max_latest_deadlocks_basic.result
new file mode 100644
index 00000000000..74dbdb4288c
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_max_latest_deadlocks_basic.result
@@ -0,0 +1,53 @@
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(100);
+INSERT INTO valid_values VALUES(1);
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+INSERT INTO invalid_values VALUES('\'123\'');
+SET @start_global_value = @@global.ROCKSDB_MAX_LATEST_DEADLOCKS;
+SELECT @start_global_value;
+@start_global_value
+5
+'# Setting to valid values in global scope#'
+"Trying to set variable @@global.ROCKSDB_MAX_LATEST_DEADLOCKS to 100"
+SET @@global.ROCKSDB_MAX_LATEST_DEADLOCKS = 100;
+SELECT @@global.ROCKSDB_MAX_LATEST_DEADLOCKS;
+@@global.ROCKSDB_MAX_LATEST_DEADLOCKS
+100
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_MAX_LATEST_DEADLOCKS = DEFAULT;
+SELECT @@global.ROCKSDB_MAX_LATEST_DEADLOCKS;
+@@global.ROCKSDB_MAX_LATEST_DEADLOCKS
+5
+"Trying to set variable @@global.ROCKSDB_MAX_LATEST_DEADLOCKS to 1"
+SET @@global.ROCKSDB_MAX_LATEST_DEADLOCKS = 1;
+SELECT @@global.ROCKSDB_MAX_LATEST_DEADLOCKS;
+@@global.ROCKSDB_MAX_LATEST_DEADLOCKS
+1
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_MAX_LATEST_DEADLOCKS = DEFAULT;
+SELECT @@global.ROCKSDB_MAX_LATEST_DEADLOCKS;
+@@global.ROCKSDB_MAX_LATEST_DEADLOCKS
+5
+"Trying to set variable @@session.ROCKSDB_MAX_LATEST_DEADLOCKS to 444. It should fail because it is not session."
+SET @@session.ROCKSDB_MAX_LATEST_DEADLOCKS = 444;
+ERROR HY000: Variable 'rocksdb_max_latest_deadlocks' is a GLOBAL variable and should be set with SET GLOBAL
+'# Testing with invalid values in global scope #'
+"Trying to set variable @@global.ROCKSDB_MAX_LATEST_DEADLOCKS to 'aaa'"
+SET @@global.ROCKSDB_MAX_LATEST_DEADLOCKS = 'aaa';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_MAX_LATEST_DEADLOCKS;
+@@global.ROCKSDB_MAX_LATEST_DEADLOCKS
+5
+"Trying to set variable @@global.ROCKSDB_MAX_LATEST_DEADLOCKS to '123'"
+SET @@global.ROCKSDB_MAX_LATEST_DEADLOCKS = '123';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_MAX_LATEST_DEADLOCKS;
+@@global.ROCKSDB_MAX_LATEST_DEADLOCKS
+5
+SET @@global.ROCKSDB_MAX_LATEST_DEADLOCKS = @start_global_value;
+SELECT @@global.ROCKSDB_MAX_LATEST_DEADLOCKS;
+@@global.ROCKSDB_MAX_LATEST_DEADLOCKS
+5
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_max_log_file_size_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_max_log_file_size_basic.result
new file mode 100644
index 00000000000..4359ee725d4
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_max_log_file_size_basic.result
@@ -0,0 +1,7 @@
+SET @start_global_value = @@global.ROCKSDB_MAX_LOG_FILE_SIZE;
+SELECT @start_global_value;
+@start_global_value
+0
+"Trying to set variable @@global.ROCKSDB_MAX_LOG_FILE_SIZE to 444. It should fail because it is readonly."
+SET @@global.ROCKSDB_MAX_LOG_FILE_SIZE = 444;
+ERROR HY000: Variable 'rocksdb_max_log_file_size' is a read only variable
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_max_manifest_file_size_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_max_manifest_file_size_basic.result
new file mode 100644
index 00000000000..45330b82702
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_max_manifest_file_size_basic.result
@@ -0,0 +1,7 @@
+SET @start_global_value = @@global.ROCKSDB_MAX_MANIFEST_FILE_SIZE;
+SELECT @start_global_value;
+@start_global_value
+1073741824
+"Trying to set variable @@global.ROCKSDB_MAX_MANIFEST_FILE_SIZE to 444. It should fail because it is readonly."
+SET @@global.ROCKSDB_MAX_MANIFEST_FILE_SIZE = 444;
+ERROR HY000: Variable 'rocksdb_max_manifest_file_size' is a read only variable
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_max_manual_compactions_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_max_manual_compactions_basic.result
new file mode 100644
index 00000000000..815506ccac8
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_max_manual_compactions_basic.result
@@ -0,0 +1,57 @@
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(1024);
+INSERT INTO valid_values VALUES(512*1024*1024);
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+SET @start_global_value = @@global.ROCKSDB_MAX_MANUAL_COMPACTIONS;
+SELECT @start_global_value;
+@start_global_value
+10
+'# Setting to valid values in global scope#'
+"Trying to set variable @@global.ROCKSDB_MAX_MANUAL_COMPACTIONS to 1"
+SET @@global.ROCKSDB_MAX_MANUAL_COMPACTIONS = 1;
+SELECT @@global.ROCKSDB_MAX_MANUAL_COMPACTIONS;
+@@global.ROCKSDB_MAX_MANUAL_COMPACTIONS
+1
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_MAX_MANUAL_COMPACTIONS = DEFAULT;
+SELECT @@global.ROCKSDB_MAX_MANUAL_COMPACTIONS;
+@@global.ROCKSDB_MAX_MANUAL_COMPACTIONS
+10
+"Trying to set variable @@global.ROCKSDB_MAX_MANUAL_COMPACTIONS to 1024"
+SET @@global.ROCKSDB_MAX_MANUAL_COMPACTIONS = 1024;
+SELECT @@global.ROCKSDB_MAX_MANUAL_COMPACTIONS;
+@@global.ROCKSDB_MAX_MANUAL_COMPACTIONS
+1024
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_MAX_MANUAL_COMPACTIONS = DEFAULT;
+SELECT @@global.ROCKSDB_MAX_MANUAL_COMPACTIONS;
+@@global.ROCKSDB_MAX_MANUAL_COMPACTIONS
+10
+"Trying to set variable @@global.ROCKSDB_MAX_MANUAL_COMPACTIONS to 536870912"
+SET @@global.ROCKSDB_MAX_MANUAL_COMPACTIONS = 536870912;
+SELECT @@global.ROCKSDB_MAX_MANUAL_COMPACTIONS;
+@@global.ROCKSDB_MAX_MANUAL_COMPACTIONS
+536870912
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_MAX_MANUAL_COMPACTIONS = DEFAULT;
+SELECT @@global.ROCKSDB_MAX_MANUAL_COMPACTIONS;
+@@global.ROCKSDB_MAX_MANUAL_COMPACTIONS
+10
+"Trying to set variable @@session.ROCKSDB_MAX_MANUAL_COMPACTIONS to 444. It should fail because it is not session."
+SET @@session.ROCKSDB_MAX_MANUAL_COMPACTIONS = 444;
+ERROR HY000: Variable 'rocksdb_max_manual_compactions' is a GLOBAL variable and should be set with SET GLOBAL
+'# Testing with invalid values in global scope #'
+"Trying to set variable @@global.ROCKSDB_MAX_MANUAL_COMPACTIONS to 'aaa'"
+SET @@global.ROCKSDB_MAX_MANUAL_COMPACTIONS = 'aaa';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_MAX_MANUAL_COMPACTIONS;
+@@global.ROCKSDB_MAX_MANUAL_COMPACTIONS
+10
+SET @@global.ROCKSDB_MAX_MANUAL_COMPACTIONS = @start_global_value;
+SELECT @@global.ROCKSDB_MAX_MANUAL_COMPACTIONS;
+@@global.ROCKSDB_MAX_MANUAL_COMPACTIONS
+10
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_max_open_files_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_max_open_files_basic.result
new file mode 100644
index 00000000000..60f505310c6
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_max_open_files_basic.result
@@ -0,0 +1,3 @@
+show variables like 'rocksdb_max_open_files';
+Variable_name Value
+rocksdb_max_open_files #
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_max_row_locks_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_max_row_locks_basic.result
new file mode 100644
index 00000000000..b195df092dc
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_max_row_locks_basic.result
@@ -0,0 +1,93 @@
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(1024);
+INSERT INTO valid_values VALUES(512*1024*1024);
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+SET @start_global_value = @@global.ROCKSDB_MAX_ROW_LOCKS;
+SELECT @start_global_value;
+@start_global_value
+1048576
+SET @start_session_value = @@session.ROCKSDB_MAX_ROW_LOCKS;
+SELECT @start_session_value;
+@start_session_value
+1048576
+'# Setting to valid values in global scope#'
+"Trying to set variable @@global.ROCKSDB_MAX_ROW_LOCKS to 1"
+SET @@global.ROCKSDB_MAX_ROW_LOCKS = 1;
+SELECT @@global.ROCKSDB_MAX_ROW_LOCKS;
+@@global.ROCKSDB_MAX_ROW_LOCKS
+1
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_MAX_ROW_LOCKS = DEFAULT;
+SELECT @@global.ROCKSDB_MAX_ROW_LOCKS;
+@@global.ROCKSDB_MAX_ROW_LOCKS
+1048576
+"Trying to set variable @@global.ROCKSDB_MAX_ROW_LOCKS to 1024"
+SET @@global.ROCKSDB_MAX_ROW_LOCKS = 1024;
+SELECT @@global.ROCKSDB_MAX_ROW_LOCKS;
+@@global.ROCKSDB_MAX_ROW_LOCKS
+1024
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_MAX_ROW_LOCKS = DEFAULT;
+SELECT @@global.ROCKSDB_MAX_ROW_LOCKS;
+@@global.ROCKSDB_MAX_ROW_LOCKS
+1048576
+"Trying to set variable @@global.ROCKSDB_MAX_ROW_LOCKS to 536870912"
+SET @@global.ROCKSDB_MAX_ROW_LOCKS = 536870912;
+SELECT @@global.ROCKSDB_MAX_ROW_LOCKS;
+@@global.ROCKSDB_MAX_ROW_LOCKS
+536870912
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_MAX_ROW_LOCKS = DEFAULT;
+SELECT @@global.ROCKSDB_MAX_ROW_LOCKS;
+@@global.ROCKSDB_MAX_ROW_LOCKS
+1048576
+'# Setting to valid values in session scope#'
+"Trying to set variable @@session.ROCKSDB_MAX_ROW_LOCKS to 1"
+SET @@session.ROCKSDB_MAX_ROW_LOCKS = 1;
+SELECT @@session.ROCKSDB_MAX_ROW_LOCKS;
+@@session.ROCKSDB_MAX_ROW_LOCKS
+1
+"Setting the session scope variable back to default"
+SET @@session.ROCKSDB_MAX_ROW_LOCKS = DEFAULT;
+SELECT @@session.ROCKSDB_MAX_ROW_LOCKS;
+@@session.ROCKSDB_MAX_ROW_LOCKS
+1048576
+"Trying to set variable @@session.ROCKSDB_MAX_ROW_LOCKS to 1024"
+SET @@session.ROCKSDB_MAX_ROW_LOCKS = 1024;
+SELECT @@session.ROCKSDB_MAX_ROW_LOCKS;
+@@session.ROCKSDB_MAX_ROW_LOCKS
+1024
+"Setting the session scope variable back to default"
+SET @@session.ROCKSDB_MAX_ROW_LOCKS = DEFAULT;
+SELECT @@session.ROCKSDB_MAX_ROW_LOCKS;
+@@session.ROCKSDB_MAX_ROW_LOCKS
+1048576
+"Trying to set variable @@session.ROCKSDB_MAX_ROW_LOCKS to 536870912"
+SET @@session.ROCKSDB_MAX_ROW_LOCKS = 536870912;
+SELECT @@session.ROCKSDB_MAX_ROW_LOCKS;
+@@session.ROCKSDB_MAX_ROW_LOCKS
+536870912
+"Setting the session scope variable back to default"
+SET @@session.ROCKSDB_MAX_ROW_LOCKS = DEFAULT;
+SELECT @@session.ROCKSDB_MAX_ROW_LOCKS;
+@@session.ROCKSDB_MAX_ROW_LOCKS
+1048576
+'# Testing with invalid values in global scope #'
+"Trying to set variable @@global.ROCKSDB_MAX_ROW_LOCKS to 'aaa'"
+SET @@global.ROCKSDB_MAX_ROW_LOCKS = 'aaa';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_MAX_ROW_LOCKS;
+@@global.ROCKSDB_MAX_ROW_LOCKS
+1048576
+SET @@global.ROCKSDB_MAX_ROW_LOCKS = @start_global_value;
+SELECT @@global.ROCKSDB_MAX_ROW_LOCKS;
+@@global.ROCKSDB_MAX_ROW_LOCKS
+1048576
+SET @@session.ROCKSDB_MAX_ROW_LOCKS = @start_session_value;
+SELECT @@session.ROCKSDB_MAX_ROW_LOCKS;
+@@session.ROCKSDB_MAX_ROW_LOCKS
+1048576
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_max_subcompactions_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_max_subcompactions_basic.result
new file mode 100644
index 00000000000..58452f580f2
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_max_subcompactions_basic.result
@@ -0,0 +1,7 @@
+SET @start_global_value = @@global.ROCKSDB_MAX_SUBCOMPACTIONS;
+SELECT @start_global_value;
+@start_global_value
+1
+"Trying to set variable @@global.ROCKSDB_MAX_SUBCOMPACTIONS to 444. It should fail because it is readonly."
+SET @@global.ROCKSDB_MAX_SUBCOMPACTIONS = 444;
+ERROR HY000: Variable 'rocksdb_max_subcompactions' is a read only variable
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_max_total_wal_size_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_max_total_wal_size_basic.result
new file mode 100644
index 00000000000..22c17c24e19
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_max_total_wal_size_basic.result
@@ -0,0 +1,7 @@
+SET @start_global_value = @@global.ROCKSDB_MAX_TOTAL_WAL_SIZE;
+SELECT @start_global_value;
+@start_global_value
+0
+"Trying to set variable @@global.ROCKSDB_MAX_TOTAL_WAL_SIZE to 444. It should fail because it is readonly."
+SET @@global.ROCKSDB_MAX_TOTAL_WAL_SIZE = 444;
+ERROR HY000: Variable 'rocksdb_max_total_wal_size' is a read only variable
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_merge_buf_size_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_merge_buf_size_basic.result
new file mode 100644
index 00000000000..5715b198d5a
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_merge_buf_size_basic.result
@@ -0,0 +1,43 @@
+drop table if exists t1;
+set session rocksdb_merge_buf_size=250;
+set session rocksdb_merge_combine_read_size=1000;
+CREATE TABLE t1 (i INT, j INT, PRIMARY KEY (i)) ENGINE = ROCKSDB;
+ALTER TABLE t1 ADD INDEX kj(j), ALGORITHM=INPLACE;
+ALTER TABLE t1 ADD INDEX kij(i,j), ALGORITHM=INPLACE;
+SHOW CREATE TABLE t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `i` int(11) NOT NULL,
+ `j` int(11) DEFAULT NULL,
+ PRIMARY KEY (`i`),
+ KEY `kj` (`j`),
+ KEY `kij` (`i`,`j`)
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+DROP INDEX kj on t1;
+DROP INDEX kij ON t1;
+ALTER TABLE t1 ADD INDEX kj(j), ADD INDEX kij(i,j), ADD INDEX kji(j,i), ALGORITHM=INPLACE;
+SHOW CREATE TABLE t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `i` int(11) NOT NULL,
+ `j` int(11) DEFAULT NULL,
+ PRIMARY KEY (`i`),
+ KEY `kj` (`j`),
+ KEY `kij` (`i`,`j`),
+ KEY `kji` (`j`,`i`)
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+DROP TABLE t1;
+CREATE TABLE t1 (a INT PRIMARY KEY, b INT) ENGINE=RocksDB;
+ALTER TABLE t1 ADD INDEX kb(b) comment 'rev:cf1', ALGORITHM=INPLACE;
+SHOW CREATE TABLE t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `a` int(11) NOT NULL,
+ `b` int(11) DEFAULT NULL,
+ PRIMARY KEY (`a`),
+ KEY `kb` (`b`) COMMENT 'rev:cf1'
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+SELECT COUNT(*) FROM t1 FORCE INDEX(kb);
+COUNT(*)
+100
+DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_merge_combine_read_size_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_merge_combine_read_size_basic.result
new file mode 100644
index 00000000000..5b73305cd9e
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_merge_combine_read_size_basic.result
@@ -0,0 +1,29 @@
+drop table if exists t1;
+set session rocksdb_merge_buf_size=250;
+set session rocksdb_merge_combine_read_size=1000;
+CREATE TABLE t1 (i INT, j INT, PRIMARY KEY (i)) ENGINE = ROCKSDB;
+ALTER TABLE t1 ADD INDEX kj(j), ALGORITHM=INPLACE;
+ALTER TABLE t1 ADD INDEX kij(i,j), ALGORITHM=INPLACE;
+SHOW CREATE TABLE t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `i` int(11) NOT NULL,
+ `j` int(11) DEFAULT NULL,
+ PRIMARY KEY (`i`),
+ KEY `kj` (`j`),
+ KEY `kij` (`i`,`j`)
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+DROP INDEX kj on t1;
+DROP INDEX kij ON t1;
+ALTER TABLE t1 ADD INDEX kj(j), ADD INDEX kij(i,j), ADD INDEX kji(j,i), ALGORITHM=INPLACE;
+SHOW CREATE TABLE t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `i` int(11) NOT NULL,
+ `j` int(11) DEFAULT NULL,
+ PRIMARY KEY (`i`),
+ KEY `kj` (`j`),
+ KEY `kij` (`i`,`j`),
+ KEY `kji` (`j`,`i`)
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_merge_tmp_file_removal_delay_ms_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_merge_tmp_file_removal_delay_ms_basic.result
new file mode 100644
index 00000000000..277de716d70
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_merge_tmp_file_removal_delay_ms_basic.result
@@ -0,0 +1,93 @@
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(0);
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+INSERT INTO invalid_values VALUES('\'bbb\'');
+INSERT INTO invalid_values VALUES('on');
+SET @start_global_value = @@global.ROCKSDB_MERGE_TMP_FILE_REMOVAL_DELAY_MS;
+SELECT @start_global_value;
+@start_global_value
+0
+SET @start_session_value = @@session.ROCKSDB_MERGE_TMP_FILE_REMOVAL_DELAY_MS;
+SELECT @start_session_value;
+@start_session_value
+0
+'# Setting to valid values in global scope#'
+"Trying to set variable @@global.ROCKSDB_MERGE_TMP_FILE_REMOVAL_DELAY_MS to 1"
+SET @@global.ROCKSDB_MERGE_TMP_FILE_REMOVAL_DELAY_MS = 1;
+SELECT @@global.ROCKSDB_MERGE_TMP_FILE_REMOVAL_DELAY_MS;
+@@global.ROCKSDB_MERGE_TMP_FILE_REMOVAL_DELAY_MS
+1
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_MERGE_TMP_FILE_REMOVAL_DELAY_MS = DEFAULT;
+SELECT @@global.ROCKSDB_MERGE_TMP_FILE_REMOVAL_DELAY_MS;
+@@global.ROCKSDB_MERGE_TMP_FILE_REMOVAL_DELAY_MS
+0
+"Trying to set variable @@global.ROCKSDB_MERGE_TMP_FILE_REMOVAL_DELAY_MS to 0"
+SET @@global.ROCKSDB_MERGE_TMP_FILE_REMOVAL_DELAY_MS = 0;
+SELECT @@global.ROCKSDB_MERGE_TMP_FILE_REMOVAL_DELAY_MS;
+@@global.ROCKSDB_MERGE_TMP_FILE_REMOVAL_DELAY_MS
+0
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_MERGE_TMP_FILE_REMOVAL_DELAY_MS = DEFAULT;
+SELECT @@global.ROCKSDB_MERGE_TMP_FILE_REMOVAL_DELAY_MS;
+@@global.ROCKSDB_MERGE_TMP_FILE_REMOVAL_DELAY_MS
+0
+'# Setting to valid values in session scope#'
+"Trying to set variable @@session.ROCKSDB_MERGE_TMP_FILE_REMOVAL_DELAY_MS to 1"
+SET @@session.ROCKSDB_MERGE_TMP_FILE_REMOVAL_DELAY_MS = 1;
+SELECT @@session.ROCKSDB_MERGE_TMP_FILE_REMOVAL_DELAY_MS;
+@@session.ROCKSDB_MERGE_TMP_FILE_REMOVAL_DELAY_MS
+1
+"Setting the session scope variable back to default"
+SET @@session.ROCKSDB_MERGE_TMP_FILE_REMOVAL_DELAY_MS = DEFAULT;
+SELECT @@session.ROCKSDB_MERGE_TMP_FILE_REMOVAL_DELAY_MS;
+@@session.ROCKSDB_MERGE_TMP_FILE_REMOVAL_DELAY_MS
+0
+"Trying to set variable @@session.ROCKSDB_MERGE_TMP_FILE_REMOVAL_DELAY_MS to 0"
+SET @@session.ROCKSDB_MERGE_TMP_FILE_REMOVAL_DELAY_MS = 0;
+SELECT @@session.ROCKSDB_MERGE_TMP_FILE_REMOVAL_DELAY_MS;
+@@session.ROCKSDB_MERGE_TMP_FILE_REMOVAL_DELAY_MS
+0
+"Setting the session scope variable back to default"
+SET @@session.ROCKSDB_MERGE_TMP_FILE_REMOVAL_DELAY_MS = DEFAULT;
+SELECT @@session.ROCKSDB_MERGE_TMP_FILE_REMOVAL_DELAY_MS;
+@@session.ROCKSDB_MERGE_TMP_FILE_REMOVAL_DELAY_MS
+0
+'# Testing with invalid values in global scope #'
+"Trying to set variable @@global.ROCKSDB_MERGE_TMP_FILE_REMOVAL_DELAY_MS to 'aaa'"
+SET @@global.ROCKSDB_MERGE_TMP_FILE_REMOVAL_DELAY_MS = 'aaa';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_MERGE_TMP_FILE_REMOVAL_DELAY_MS;
+@@global.ROCKSDB_MERGE_TMP_FILE_REMOVAL_DELAY_MS
+0
+"Trying to set variable @@global.ROCKSDB_MERGE_TMP_FILE_REMOVAL_DELAY_MS to 'bbb'"
+SET @@global.ROCKSDB_MERGE_TMP_FILE_REMOVAL_DELAY_MS = 'bbb';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_MERGE_TMP_FILE_REMOVAL_DELAY_MS;
+@@global.ROCKSDB_MERGE_TMP_FILE_REMOVAL_DELAY_MS
+0
+"Trying to set variable @@global.ROCKSDB_MERGE_TMP_FILE_REMOVAL_DELAY_MS to on"
+SET @@global.ROCKSDB_MERGE_TMP_FILE_REMOVAL_DELAY_MS = on;
+Got one of the listed errors
+SELECT @@global.ROCKSDB_MERGE_TMP_FILE_REMOVAL_DELAY_MS;
+@@global.ROCKSDB_MERGE_TMP_FILE_REMOVAL_DELAY_MS
+0
+SET @@global.ROCKSDB_MERGE_TMP_FILE_REMOVAL_DELAY_MS = @start_global_value;
+SELECT @@global.ROCKSDB_MERGE_TMP_FILE_REMOVAL_DELAY_MS;
+@@global.ROCKSDB_MERGE_TMP_FILE_REMOVAL_DELAY_MS
+0
+SET @@session.ROCKSDB_MERGE_TMP_FILE_REMOVAL_DELAY_MS = @start_session_value;
+SELECT @@session.ROCKSDB_MERGE_TMP_FILE_REMOVAL_DELAY_MS;
+@@session.ROCKSDB_MERGE_TMP_FILE_REMOVAL_DELAY_MS
+0
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
+set session rocksdb_merge_buf_size=250;
+set session rocksdb_merge_combine_read_size=1000;
+set session rocksdb_merge_tmp_file_removal_delay_ms=1000;
+CREATE TABLE t1 (i INT, j INT, PRIMARY KEY (i)) ENGINE = ROCKSDB;
+ALTER TABLE t1 ADD INDEX kj(j), ALGORITHM=INPLACE;
+include/assert.inc [Alter should have taken at least 10 seconds]
+DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_new_table_reader_for_compaction_inputs_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_new_table_reader_for_compaction_inputs_basic.result
new file mode 100644
index 00000000000..c2daec327a2
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_new_table_reader_for_compaction_inputs_basic.result
@@ -0,0 +1,7 @@
+SET @start_global_value = @@global.ROCKSDB_NEW_TABLE_READER_FOR_COMPACTION_INPUTS;
+SELECT @start_global_value;
+@start_global_value
+0
+"Trying to set variable @@global.ROCKSDB_NEW_TABLE_READER_FOR_COMPACTION_INPUTS to 444. It should fail because it is readonly."
+SET @@global.ROCKSDB_NEW_TABLE_READER_FOR_COMPACTION_INPUTS = 444;
+ERROR HY000: Variable 'rocksdb_new_table_reader_for_compaction_inputs' is a read only variable
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_no_block_cache_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_no_block_cache_basic.result
new file mode 100644
index 00000000000..7bd32950303
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_no_block_cache_basic.result
@@ -0,0 +1,7 @@
+SET @start_global_value = @@global.ROCKSDB_NO_BLOCK_CACHE;
+SELECT @start_global_value;
+@start_global_value
+0
+"Trying to set variable @@global.ROCKSDB_NO_BLOCK_CACHE to 444. It should fail because it is readonly."
+SET @@global.ROCKSDB_NO_BLOCK_CACHE = 444;
+ERROR HY000: Variable 'rocksdb_no_block_cache' is a read only variable
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_override_cf_options_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_override_cf_options_basic.result
new file mode 100644
index 00000000000..59042124dc8
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_override_cf_options_basic.result
@@ -0,0 +1,7 @@
+SET @start_global_value = @@global.ROCKSDB_OVERRIDE_CF_OPTIONS;
+SELECT @start_global_value;
+@start_global_value
+
+"Trying to set variable @@global.ROCKSDB_OVERRIDE_CF_OPTIONS to 444. It should fail because it is readonly."
+SET @@global.ROCKSDB_OVERRIDE_CF_OPTIONS = 444;
+ERROR HY000: Variable 'rocksdb_override_cf_options' is a read only variable
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_paranoid_checks_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_paranoid_checks_basic.result
new file mode 100644
index 00000000000..102d4926e65
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_paranoid_checks_basic.result
@@ -0,0 +1,7 @@
+SET @start_global_value = @@global.ROCKSDB_PARANOID_CHECKS;
+SELECT @start_global_value;
+@start_global_value
+1
+"Trying to set variable @@global.ROCKSDB_PARANOID_CHECKS to 444. It should fail because it is readonly."
+SET @@global.ROCKSDB_PARANOID_CHECKS = 444;
+ERROR HY000: Variable 'rocksdb_paranoid_checks' is a read only variable
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_pause_background_work_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_pause_background_work_basic.result
new file mode 100644
index 00000000000..5849fe09a20
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_pause_background_work_basic.result
@@ -0,0 +1,75 @@
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(0);
+INSERT INTO valid_values VALUES('on');
+INSERT INTO valid_values VALUES('off');
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+INSERT INTO invalid_values VALUES('\'bbb\'');
+SET @start_global_value = @@global.ROCKSDB_PAUSE_BACKGROUND_WORK;
+SELECT @start_global_value;
+@start_global_value
+0
+'# Setting to valid values in global scope#'
+"Trying to set variable @@global.ROCKSDB_PAUSE_BACKGROUND_WORK to 1"
+SET @@global.ROCKSDB_PAUSE_BACKGROUND_WORK = 1;
+SELECT @@global.ROCKSDB_PAUSE_BACKGROUND_WORK;
+@@global.ROCKSDB_PAUSE_BACKGROUND_WORK
+1
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_PAUSE_BACKGROUND_WORK = DEFAULT;
+SELECT @@global.ROCKSDB_PAUSE_BACKGROUND_WORK;
+@@global.ROCKSDB_PAUSE_BACKGROUND_WORK
+0
+"Trying to set variable @@global.ROCKSDB_PAUSE_BACKGROUND_WORK to 0"
+SET @@global.ROCKSDB_PAUSE_BACKGROUND_WORK = 0;
+SELECT @@global.ROCKSDB_PAUSE_BACKGROUND_WORK;
+@@global.ROCKSDB_PAUSE_BACKGROUND_WORK
+0
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_PAUSE_BACKGROUND_WORK = DEFAULT;
+SELECT @@global.ROCKSDB_PAUSE_BACKGROUND_WORK;
+@@global.ROCKSDB_PAUSE_BACKGROUND_WORK
+0
+"Trying to set variable @@global.ROCKSDB_PAUSE_BACKGROUND_WORK to on"
+SET @@global.ROCKSDB_PAUSE_BACKGROUND_WORK = on;
+SELECT @@global.ROCKSDB_PAUSE_BACKGROUND_WORK;
+@@global.ROCKSDB_PAUSE_BACKGROUND_WORK
+1
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_PAUSE_BACKGROUND_WORK = DEFAULT;
+SELECT @@global.ROCKSDB_PAUSE_BACKGROUND_WORK;
+@@global.ROCKSDB_PAUSE_BACKGROUND_WORK
+0
+"Trying to set variable @@global.ROCKSDB_PAUSE_BACKGROUND_WORK to off"
+SET @@global.ROCKSDB_PAUSE_BACKGROUND_WORK = off;
+SELECT @@global.ROCKSDB_PAUSE_BACKGROUND_WORK;
+@@global.ROCKSDB_PAUSE_BACKGROUND_WORK
+0
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_PAUSE_BACKGROUND_WORK = DEFAULT;
+SELECT @@global.ROCKSDB_PAUSE_BACKGROUND_WORK;
+@@global.ROCKSDB_PAUSE_BACKGROUND_WORK
+0
+"Trying to set variable @@session.ROCKSDB_PAUSE_BACKGROUND_WORK to 444. It should fail because it is not session."
+SET @@session.ROCKSDB_PAUSE_BACKGROUND_WORK = 444;
+ERROR HY000: Variable 'rocksdb_pause_background_work' is a GLOBAL variable and should be set with SET GLOBAL
+'# Testing with invalid values in global scope #'
+"Trying to set variable @@global.ROCKSDB_PAUSE_BACKGROUND_WORK to 'aaa'"
+SET @@global.ROCKSDB_PAUSE_BACKGROUND_WORK = 'aaa';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_PAUSE_BACKGROUND_WORK;
+@@global.ROCKSDB_PAUSE_BACKGROUND_WORK
+0
+"Trying to set variable @@global.ROCKSDB_PAUSE_BACKGROUND_WORK to 'bbb'"
+SET @@global.ROCKSDB_PAUSE_BACKGROUND_WORK = 'bbb';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_PAUSE_BACKGROUND_WORK;
+@@global.ROCKSDB_PAUSE_BACKGROUND_WORK
+0
+SET @@global.ROCKSDB_PAUSE_BACKGROUND_WORK = @start_global_value;
+SELECT @@global.ROCKSDB_PAUSE_BACKGROUND_WORK;
+@@global.ROCKSDB_PAUSE_BACKGROUND_WORK
+0
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_perf_context_level_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_perf_context_level_basic.result
new file mode 100644
index 00000000000..292ba58a3a3
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_perf_context_level_basic.result
@@ -0,0 +1,114 @@
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(2);
+INSERT INTO valid_values VALUES(3);
+INSERT INTO valid_values VALUES(4);
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+SET @start_global_value = @@global.ROCKSDB_PERF_CONTEXT_LEVEL;
+SELECT @start_global_value;
+@start_global_value
+0
+SET @start_session_value = @@session.ROCKSDB_PERF_CONTEXT_LEVEL;
+SELECT @start_session_value;
+@start_session_value
+0
+'# Setting to valid values in global scope#'
+"Trying to set variable @@global.ROCKSDB_PERF_CONTEXT_LEVEL to 1"
+SET @@global.ROCKSDB_PERF_CONTEXT_LEVEL = 1;
+SELECT @@global.ROCKSDB_PERF_CONTEXT_LEVEL;
+@@global.ROCKSDB_PERF_CONTEXT_LEVEL
+1
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_PERF_CONTEXT_LEVEL = DEFAULT;
+SELECT @@global.ROCKSDB_PERF_CONTEXT_LEVEL;
+@@global.ROCKSDB_PERF_CONTEXT_LEVEL
+0
+"Trying to set variable @@global.ROCKSDB_PERF_CONTEXT_LEVEL to 2"
+SET @@global.ROCKSDB_PERF_CONTEXT_LEVEL = 2;
+SELECT @@global.ROCKSDB_PERF_CONTEXT_LEVEL;
+@@global.ROCKSDB_PERF_CONTEXT_LEVEL
+2
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_PERF_CONTEXT_LEVEL = DEFAULT;
+SELECT @@global.ROCKSDB_PERF_CONTEXT_LEVEL;
+@@global.ROCKSDB_PERF_CONTEXT_LEVEL
+0
+"Trying to set variable @@global.ROCKSDB_PERF_CONTEXT_LEVEL to 3"
+SET @@global.ROCKSDB_PERF_CONTEXT_LEVEL = 3;
+SELECT @@global.ROCKSDB_PERF_CONTEXT_LEVEL;
+@@global.ROCKSDB_PERF_CONTEXT_LEVEL
+3
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_PERF_CONTEXT_LEVEL = DEFAULT;
+SELECT @@global.ROCKSDB_PERF_CONTEXT_LEVEL;
+@@global.ROCKSDB_PERF_CONTEXT_LEVEL
+0
+"Trying to set variable @@global.ROCKSDB_PERF_CONTEXT_LEVEL to 4"
+SET @@global.ROCKSDB_PERF_CONTEXT_LEVEL = 4;
+SELECT @@global.ROCKSDB_PERF_CONTEXT_LEVEL;
+@@global.ROCKSDB_PERF_CONTEXT_LEVEL
+4
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_PERF_CONTEXT_LEVEL = DEFAULT;
+SELECT @@global.ROCKSDB_PERF_CONTEXT_LEVEL;
+@@global.ROCKSDB_PERF_CONTEXT_LEVEL
+0
+'# Setting to valid values in session scope#'
+"Trying to set variable @@session.ROCKSDB_PERF_CONTEXT_LEVEL to 1"
+SET @@session.ROCKSDB_PERF_CONTEXT_LEVEL = 1;
+SELECT @@session.ROCKSDB_PERF_CONTEXT_LEVEL;
+@@session.ROCKSDB_PERF_CONTEXT_LEVEL
+1
+"Setting the session scope variable back to default"
+SET @@session.ROCKSDB_PERF_CONTEXT_LEVEL = DEFAULT;
+SELECT @@session.ROCKSDB_PERF_CONTEXT_LEVEL;
+@@session.ROCKSDB_PERF_CONTEXT_LEVEL
+0
+"Trying to set variable @@session.ROCKSDB_PERF_CONTEXT_LEVEL to 2"
+SET @@session.ROCKSDB_PERF_CONTEXT_LEVEL = 2;
+SELECT @@session.ROCKSDB_PERF_CONTEXT_LEVEL;
+@@session.ROCKSDB_PERF_CONTEXT_LEVEL
+2
+"Setting the session scope variable back to default"
+SET @@session.ROCKSDB_PERF_CONTEXT_LEVEL = DEFAULT;
+SELECT @@session.ROCKSDB_PERF_CONTEXT_LEVEL;
+@@session.ROCKSDB_PERF_CONTEXT_LEVEL
+0
+"Trying to set variable @@session.ROCKSDB_PERF_CONTEXT_LEVEL to 3"
+SET @@session.ROCKSDB_PERF_CONTEXT_LEVEL = 3;
+SELECT @@session.ROCKSDB_PERF_CONTEXT_LEVEL;
+@@session.ROCKSDB_PERF_CONTEXT_LEVEL
+3
+"Setting the session scope variable back to default"
+SET @@session.ROCKSDB_PERF_CONTEXT_LEVEL = DEFAULT;
+SELECT @@session.ROCKSDB_PERF_CONTEXT_LEVEL;
+@@session.ROCKSDB_PERF_CONTEXT_LEVEL
+0
+"Trying to set variable @@session.ROCKSDB_PERF_CONTEXT_LEVEL to 4"
+SET @@session.ROCKSDB_PERF_CONTEXT_LEVEL = 4;
+SELECT @@session.ROCKSDB_PERF_CONTEXT_LEVEL;
+@@session.ROCKSDB_PERF_CONTEXT_LEVEL
+4
+"Setting the session scope variable back to default"
+SET @@session.ROCKSDB_PERF_CONTEXT_LEVEL = DEFAULT;
+SELECT @@session.ROCKSDB_PERF_CONTEXT_LEVEL;
+@@session.ROCKSDB_PERF_CONTEXT_LEVEL
+0
+'# Testing with invalid values in global scope #'
+"Trying to set variable @@global.ROCKSDB_PERF_CONTEXT_LEVEL to 'aaa'"
+SET @@global.ROCKSDB_PERF_CONTEXT_LEVEL = 'aaa';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_PERF_CONTEXT_LEVEL;
+@@global.ROCKSDB_PERF_CONTEXT_LEVEL
+0
+SET @@global.ROCKSDB_PERF_CONTEXT_LEVEL = @start_global_value;
+SELECT @@global.ROCKSDB_PERF_CONTEXT_LEVEL;
+@@global.ROCKSDB_PERF_CONTEXT_LEVEL
+0
+SET @@session.ROCKSDB_PERF_CONTEXT_LEVEL = @start_session_value;
+SELECT @@session.ROCKSDB_PERF_CONTEXT_LEVEL;
+@@session.ROCKSDB_PERF_CONTEXT_LEVEL
+0
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_persistent_cache_path_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_persistent_cache_path_basic.result
new file mode 100644
index 00000000000..10b187d44e9
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_persistent_cache_path_basic.result
@@ -0,0 +1,13 @@
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES('abc');
+INSERT INTO valid_values VALUES('def');
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+SET @start_global_value = @@global.ROCKSDB_PERSISTENT_CACHE_PATH;
+SELECT @start_global_value;
+@start_global_value
+
+"Trying to set variable @@global.ROCKSDB_PERSISTENT_CACHE_PATH to 444. It should fail because it is readonly."
+SET @@global.ROCKSDB_PERSISTENT_CACHE_PATH = 444;
+ERROR HY000: Variable 'rocksdb_persistent_cache_path' is a read only variable
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_persistent_cache_size_mb_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_persistent_cache_size_mb_basic.result
new file mode 100644
index 00000000000..d097192545b
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_persistent_cache_size_mb_basic.result
@@ -0,0 +1,14 @@
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(1024);
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+SET @start_global_value = @@global.ROCKSDB_PERSISTENT_CACHE_SIZE_MB;
+SELECT @start_global_value;
+@start_global_value
+0
+"Trying to set variable @@global.ROCKSDB_PERSISTENT_CACHE_SIZE_MB to 444. It should fail because it is readonly."
+SET @@global.ROCKSDB_PERSISTENT_CACHE_SIZE_MB = 444;
+ERROR HY000: Variable 'rocksdb_persistent_cache_size_mb' is a read only variable
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_pin_l0_filter_and_index_blocks_in_cache_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_pin_l0_filter_and_index_blocks_in_cache_basic.result
new file mode 100644
index 00000000000..c152ecf1e5a
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_pin_l0_filter_and_index_blocks_in_cache_basic.result
@@ -0,0 +1,7 @@
+SET @start_global_value = @@global.ROCKSDB_PIN_L0_FILTER_AND_INDEX_BLOCKS_IN_CACHE;
+SELECT @start_global_value;
+@start_global_value
+1
+"Trying to set variable @@global.ROCKSDB_PIN_L0_FILTER_AND_INDEX_BLOCKS_IN_CACHE to 444. It should fail because it is readonly."
+SET @@global.ROCKSDB_PIN_L0_FILTER_AND_INDEX_BLOCKS_IN_CACHE = 444;
+ERROR HY000: Variable 'rocksdb_pin_l0_filter_and_index_blocks_in_cache' is a read only variable
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_print_snapshot_conflict_queries_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_print_snapshot_conflict_queries_basic.result
new file mode 100644
index 00000000000..02a4b4040d7
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_print_snapshot_conflict_queries_basic.result
@@ -0,0 +1,64 @@
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(0);
+INSERT INTO valid_values VALUES('on');
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+INSERT INTO invalid_values VALUES('\'bbb\'');
+SET @start_global_value = @@global.ROCKSDB_PRINT_SNAPSHOT_CONFLICT_QUERIES;
+SELECT @start_global_value;
+@start_global_value
+0
+'# Setting to valid values in global scope#'
+"Trying to set variable @@global.ROCKSDB_PRINT_SNAPSHOT_CONFLICT_QUERIES to 1"
+SET @@global.ROCKSDB_PRINT_SNAPSHOT_CONFLICT_QUERIES = 1;
+SELECT @@global.ROCKSDB_PRINT_SNAPSHOT_CONFLICT_QUERIES;
+@@global.ROCKSDB_PRINT_SNAPSHOT_CONFLICT_QUERIES
+1
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_PRINT_SNAPSHOT_CONFLICT_QUERIES = DEFAULT;
+SELECT @@global.ROCKSDB_PRINT_SNAPSHOT_CONFLICT_QUERIES;
+@@global.ROCKSDB_PRINT_SNAPSHOT_CONFLICT_QUERIES
+0
+"Trying to set variable @@global.ROCKSDB_PRINT_SNAPSHOT_CONFLICT_QUERIES to 0"
+SET @@global.ROCKSDB_PRINT_SNAPSHOT_CONFLICT_QUERIES = 0;
+SELECT @@global.ROCKSDB_PRINT_SNAPSHOT_CONFLICT_QUERIES;
+@@global.ROCKSDB_PRINT_SNAPSHOT_CONFLICT_QUERIES
+0
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_PRINT_SNAPSHOT_CONFLICT_QUERIES = DEFAULT;
+SELECT @@global.ROCKSDB_PRINT_SNAPSHOT_CONFLICT_QUERIES;
+@@global.ROCKSDB_PRINT_SNAPSHOT_CONFLICT_QUERIES
+0
+"Trying to set variable @@global.ROCKSDB_PRINT_SNAPSHOT_CONFLICT_QUERIES to on"
+SET @@global.ROCKSDB_PRINT_SNAPSHOT_CONFLICT_QUERIES = on;
+SELECT @@global.ROCKSDB_PRINT_SNAPSHOT_CONFLICT_QUERIES;
+@@global.ROCKSDB_PRINT_SNAPSHOT_CONFLICT_QUERIES
+1
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_PRINT_SNAPSHOT_CONFLICT_QUERIES = DEFAULT;
+SELECT @@global.ROCKSDB_PRINT_SNAPSHOT_CONFLICT_QUERIES;
+@@global.ROCKSDB_PRINT_SNAPSHOT_CONFLICT_QUERIES
+0
+"Trying to set variable @@session.ROCKSDB_PRINT_SNAPSHOT_CONFLICT_QUERIES to 444. It should fail because it is not session."
+SET @@session.ROCKSDB_PRINT_SNAPSHOT_CONFLICT_QUERIES = 444;
+ERROR HY000: Variable 'rocksdb_print_snapshot_conflict_queries' is a GLOBAL variable and should be set with SET GLOBAL
+'# Testing with invalid values in global scope #'
+"Trying to set variable @@global.ROCKSDB_PRINT_SNAPSHOT_CONFLICT_QUERIES to 'aaa'"
+SET @@global.ROCKSDB_PRINT_SNAPSHOT_CONFLICT_QUERIES = 'aaa';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_PRINT_SNAPSHOT_CONFLICT_QUERIES;
+@@global.ROCKSDB_PRINT_SNAPSHOT_CONFLICT_QUERIES
+0
+"Trying to set variable @@global.ROCKSDB_PRINT_SNAPSHOT_CONFLICT_QUERIES to 'bbb'"
+SET @@global.ROCKSDB_PRINT_SNAPSHOT_CONFLICT_QUERIES = 'bbb';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_PRINT_SNAPSHOT_CONFLICT_QUERIES;
+@@global.ROCKSDB_PRINT_SNAPSHOT_CONFLICT_QUERIES
+0
+SET @@global.ROCKSDB_PRINT_SNAPSHOT_CONFLICT_QUERIES = @start_global_value;
+SELECT @@global.ROCKSDB_PRINT_SNAPSHOT_CONFLICT_QUERIES;
+@@global.ROCKSDB_PRINT_SNAPSHOT_CONFLICT_QUERIES
+0
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_rate_limiter_bytes_per_sec_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_rate_limiter_bytes_per_sec_basic.result
new file mode 100644
index 00000000000..9d194ad718c
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_rate_limiter_bytes_per_sec_basic.result
@@ -0,0 +1,101 @@
+SET @@global.rocksdb_rate_limiter_bytes_per_sec = 10000;
+Warnings:
+Warning 1210 RocksDB: rocksdb_rate_limiter_bytes_per_sec cannot be dynamically changed to or from 0. Do a clean shutdown if you want to change it from or to 0.
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(1), (1000), (1000000), (1000000000), (1000000000000);
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\''), (3.14);
+SET @start_global_value = @@global.ROCKSDB_RATE_LIMITER_BYTES_PER_SEC;
+SELECT @start_global_value;
+@start_global_value
+10000
+'# Setting to valid values in global scope#'
+"Trying to set variable @@global.ROCKSDB_RATE_LIMITER_BYTES_PER_SEC to 1"
+SET @@global.ROCKSDB_RATE_LIMITER_BYTES_PER_SEC = 1;
+SELECT @@global.ROCKSDB_RATE_LIMITER_BYTES_PER_SEC;
+@@global.ROCKSDB_RATE_LIMITER_BYTES_PER_SEC
+1
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_RATE_LIMITER_BYTES_PER_SEC = DEFAULT;
+Warnings:
+Warning 1210 RocksDB: rocksdb_rate_limiter_bytes_per_sec cannot be dynamically changed to or from 0. Do a clean shutdown if you want to change it from or to 0.
+SELECT @@global.ROCKSDB_RATE_LIMITER_BYTES_PER_SEC;
+@@global.ROCKSDB_RATE_LIMITER_BYTES_PER_SEC
+1
+"Trying to set variable @@global.ROCKSDB_RATE_LIMITER_BYTES_PER_SEC to 1000"
+SET @@global.ROCKSDB_RATE_LIMITER_BYTES_PER_SEC = 1000;
+SELECT @@global.ROCKSDB_RATE_LIMITER_BYTES_PER_SEC;
+@@global.ROCKSDB_RATE_LIMITER_BYTES_PER_SEC
+1000
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_RATE_LIMITER_BYTES_PER_SEC = DEFAULT;
+Warnings:
+Warning 1210 RocksDB: rocksdb_rate_limiter_bytes_per_sec cannot be dynamically changed to or from 0. Do a clean shutdown if you want to change it from or to 0.
+SELECT @@global.ROCKSDB_RATE_LIMITER_BYTES_PER_SEC;
+@@global.ROCKSDB_RATE_LIMITER_BYTES_PER_SEC
+1000
+"Trying to set variable @@global.ROCKSDB_RATE_LIMITER_BYTES_PER_SEC to 1000000"
+SET @@global.ROCKSDB_RATE_LIMITER_BYTES_PER_SEC = 1000000;
+SELECT @@global.ROCKSDB_RATE_LIMITER_BYTES_PER_SEC;
+@@global.ROCKSDB_RATE_LIMITER_BYTES_PER_SEC
+1000000
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_RATE_LIMITER_BYTES_PER_SEC = DEFAULT;
+Warnings:
+Warning 1210 RocksDB: rocksdb_rate_limiter_bytes_per_sec cannot be dynamically changed to or from 0. Do a clean shutdown if you want to change it from or to 0.
+SELECT @@global.ROCKSDB_RATE_LIMITER_BYTES_PER_SEC;
+@@global.ROCKSDB_RATE_LIMITER_BYTES_PER_SEC
+1000000
+"Trying to set variable @@global.ROCKSDB_RATE_LIMITER_BYTES_PER_SEC to 1000000000"
+SET @@global.ROCKSDB_RATE_LIMITER_BYTES_PER_SEC = 1000000000;
+SELECT @@global.ROCKSDB_RATE_LIMITER_BYTES_PER_SEC;
+@@global.ROCKSDB_RATE_LIMITER_BYTES_PER_SEC
+1000000000
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_RATE_LIMITER_BYTES_PER_SEC = DEFAULT;
+Warnings:
+Warning 1210 RocksDB: rocksdb_rate_limiter_bytes_per_sec cannot be dynamically changed to or from 0. Do a clean shutdown if you want to change it from or to 0.
+SELECT @@global.ROCKSDB_RATE_LIMITER_BYTES_PER_SEC;
+@@global.ROCKSDB_RATE_LIMITER_BYTES_PER_SEC
+1000000000
+"Trying to set variable @@global.ROCKSDB_RATE_LIMITER_BYTES_PER_SEC to 1000000000000"
+SET @@global.ROCKSDB_RATE_LIMITER_BYTES_PER_SEC = 1000000000000;
+SELECT @@global.ROCKSDB_RATE_LIMITER_BYTES_PER_SEC;
+@@global.ROCKSDB_RATE_LIMITER_BYTES_PER_SEC
+1000000000000
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_RATE_LIMITER_BYTES_PER_SEC = DEFAULT;
+Warnings:
+Warning 1210 RocksDB: rocksdb_rate_limiter_bytes_per_sec cannot be dynamically changed to or from 0. Do a clean shutdown if you want to change it from or to 0.
+SELECT @@global.ROCKSDB_RATE_LIMITER_BYTES_PER_SEC;
+@@global.ROCKSDB_RATE_LIMITER_BYTES_PER_SEC
+1000000000000
+"Trying to set variable @@session.ROCKSDB_RATE_LIMITER_BYTES_PER_SEC to 444. It should fail because it is not session."
+SET @@session.ROCKSDB_RATE_LIMITER_BYTES_PER_SEC = 444;
+ERROR HY000: Variable 'rocksdb_rate_limiter_bytes_per_sec' is a GLOBAL variable and should be set with SET GLOBAL
+'# Testing with invalid values in global scope #'
+"Trying to set variable @@global.ROCKSDB_RATE_LIMITER_BYTES_PER_SEC to 'aaa'"
+SET @@global.ROCKSDB_RATE_LIMITER_BYTES_PER_SEC = 'aaa';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_RATE_LIMITER_BYTES_PER_SEC;
+@@global.ROCKSDB_RATE_LIMITER_BYTES_PER_SEC
+1000000000000
+"Trying to set variable @@global.ROCKSDB_RATE_LIMITER_BYTES_PER_SEC to 3.14"
+SET @@global.ROCKSDB_RATE_LIMITER_BYTES_PER_SEC = 3.14;
+Got one of the listed errors
+SELECT @@global.ROCKSDB_RATE_LIMITER_BYTES_PER_SEC;
+@@global.ROCKSDB_RATE_LIMITER_BYTES_PER_SEC
+1000000000000
+SET @@global.ROCKSDB_RATE_LIMITER_BYTES_PER_SEC = @start_global_value;
+SELECT @@global.ROCKSDB_RATE_LIMITER_BYTES_PER_SEC;
+@@global.ROCKSDB_RATE_LIMITER_BYTES_PER_SEC
+10000
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
+SET @@global.rocksdb_rate_limiter_bytes_per_sec = 0;
+Warnings:
+Warning 1210 RocksDB: rocksdb_rate_limiter_bytes_per_sec cannot be dynamically changed to or from 0. Do a clean shutdown if you want to change it from or to 0.
+SET @@global.rocksdb_rate_limiter_bytes_per_sec = -1;
+Warnings:
+Warning 1292 Truncated incorrect rocksdb_rate_limiter_bytes_pe... value: '-1'
+Warning 1210 RocksDB: rocksdb_rate_limiter_bytes_per_sec cannot be dynamically changed to or from 0. Do a clean shutdown if you want to change it from or to 0.
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_read_free_rpl_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_read_free_rpl_basic.result
new file mode 100644
index 00000000000..788379927cf
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_read_free_rpl_basic.result
@@ -0,0 +1,58 @@
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES('PK_SK');
+INSERT INTO valid_values VALUES('OFF');
+INSERT INTO valid_values VALUES('PK_ONLY');
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('a');
+SET @start_global_value = @@global.ROCKSDB_READ_FREE_RPL;
+SELECT @start_global_value;
+@start_global_value
+OFF
+'# Setting to valid values in global scope#'
+"Trying to set variable @@global.ROCKSDB_READ_FREE_RPL to PK_SK"
+SET @@global.ROCKSDB_READ_FREE_RPL = PK_SK;
+SELECT @@global.ROCKSDB_READ_FREE_RPL;
+@@global.ROCKSDB_READ_FREE_RPL
+PK_SK
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_READ_FREE_RPL = DEFAULT;
+SELECT @@global.ROCKSDB_READ_FREE_RPL;
+@@global.ROCKSDB_READ_FREE_RPL
+OFF
+"Trying to set variable @@global.ROCKSDB_READ_FREE_RPL to OFF"
+SET @@global.ROCKSDB_READ_FREE_RPL = OFF;
+SELECT @@global.ROCKSDB_READ_FREE_RPL;
+@@global.ROCKSDB_READ_FREE_RPL
+OFF
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_READ_FREE_RPL = DEFAULT;
+SELECT @@global.ROCKSDB_READ_FREE_RPL;
+@@global.ROCKSDB_READ_FREE_RPL
+OFF
+"Trying to set variable @@global.ROCKSDB_READ_FREE_RPL to PK_ONLY"
+SET @@global.ROCKSDB_READ_FREE_RPL = PK_ONLY;
+SELECT @@global.ROCKSDB_READ_FREE_RPL;
+@@global.ROCKSDB_READ_FREE_RPL
+PK_ONLY
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_READ_FREE_RPL = DEFAULT;
+SELECT @@global.ROCKSDB_READ_FREE_RPL;
+@@global.ROCKSDB_READ_FREE_RPL
+OFF
+"Trying to set variable @@session.ROCKSDB_READ_FREE_RPL to 444. It should fail because it is not session."
+SET @@session.ROCKSDB_READ_FREE_RPL = 444;
+ERROR HY000: Variable 'rocksdb_read_free_rpl' is a GLOBAL variable and should be set with SET GLOBAL
+'# Testing with invalid values in global scope #'
+"Trying to set variable @@global.ROCKSDB_READ_FREE_RPL to a"
+SET @@global.ROCKSDB_READ_FREE_RPL = a;
+Got one of the listed errors
+SELECT @@global.ROCKSDB_READ_FREE_RPL;
+@@global.ROCKSDB_READ_FREE_RPL
+OFF
+SET @@global.ROCKSDB_READ_FREE_RPL = @start_global_value;
+SELECT @@global.ROCKSDB_READ_FREE_RPL;
+@@global.ROCKSDB_READ_FREE_RPL
+OFF
+SET GLOBAL ROCKSDB_READ_FREE_RPL=DEFAULT;
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_read_free_rpl_tables_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_read_free_rpl_tables_basic.result
new file mode 100644
index 00000000000..2643eb08617
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_read_free_rpl_tables_basic.result
@@ -0,0 +1,49 @@
+call mtr.add_suppression(".*Invalid pattern in rocksdb_read_free_rpl_tables.*");
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES('a');
+INSERT INTO valid_values VALUES('b');
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'*\'');
+SET @start_global_value = @@global.ROCKSDB_READ_FREE_RPL_TABLES;
+SELECT @start_global_value;
+@start_global_value
+.*
+'# Setting to valid values in global scope#'
+"Trying to set variable @@global.ROCKSDB_READ_FREE_RPL_TABLES to a"
+SET @@global.ROCKSDB_READ_FREE_RPL_TABLES = a;
+SELECT @@global.ROCKSDB_READ_FREE_RPL_TABLES;
+@@global.ROCKSDB_READ_FREE_RPL_TABLES
+a
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_READ_FREE_RPL_TABLES = DEFAULT;
+SELECT @@global.ROCKSDB_READ_FREE_RPL_TABLES;
+@@global.ROCKSDB_READ_FREE_RPL_TABLES
+.*
+"Trying to set variable @@global.ROCKSDB_READ_FREE_RPL_TABLES to b"
+SET @@global.ROCKSDB_READ_FREE_RPL_TABLES = b;
+SELECT @@global.ROCKSDB_READ_FREE_RPL_TABLES;
+@@global.ROCKSDB_READ_FREE_RPL_TABLES
+b
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_READ_FREE_RPL_TABLES = DEFAULT;
+SELECT @@global.ROCKSDB_READ_FREE_RPL_TABLES;
+@@global.ROCKSDB_READ_FREE_RPL_TABLES
+.*
+"Trying to set variable @@session.ROCKSDB_READ_FREE_RPL_TABLES to 444. It should fail because it is not session."
+SET @@session.ROCKSDB_READ_FREE_RPL_TABLES = 444;
+ERROR HY000: Variable 'rocksdb_read_free_rpl_tables' is a GLOBAL variable and should be set with SET GLOBAL
+'# Testing with invalid values in global scope #'
+"Trying to set variable @@global.ROCKSDB_READ_FREE_RPL_TABLES to '*'"
+SET @@global.ROCKSDB_READ_FREE_RPL_TABLES = '*';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_READ_FREE_RPL_TABLES;
+@@global.ROCKSDB_READ_FREE_RPL_TABLES
+.*
+SET @@global.ROCKSDB_READ_FREE_RPL_TABLES = @start_global_value;
+SELECT @@global.ROCKSDB_READ_FREE_RPL_TABLES;
+@@global.ROCKSDB_READ_FREE_RPL_TABLES
+.*
+SET GLOBAL ROCKSDB_READ_FREE_RPL_TABLES=NULL;
+SET GLOBAL ROCKSDB_READ_FREE_RPL_TABLES=DEFAULT;
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_records_in_range_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_records_in_range_basic.result
new file mode 100644
index 00000000000..e866787efe0
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_records_in_range_basic.result
@@ -0,0 +1,100 @@
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(0);
+INSERT INTO valid_values VALUES(222333);
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+INSERT INTO invalid_values VALUES('\'bbb\'');
+SET @start_global_value = @@global.ROCKSDB_RECORDS_IN_RANGE;
+SELECT @start_global_value;
+@start_global_value
+0
+SET @start_session_value = @@session.ROCKSDB_RECORDS_IN_RANGE;
+SELECT @start_session_value;
+@start_session_value
+0
+'# Setting to valid values in global scope#'
+"Trying to set variable @@global.ROCKSDB_RECORDS_IN_RANGE to 1"
+SET @@global.ROCKSDB_RECORDS_IN_RANGE = 1;
+SELECT @@global.ROCKSDB_RECORDS_IN_RANGE;
+@@global.ROCKSDB_RECORDS_IN_RANGE
+1
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_RECORDS_IN_RANGE = DEFAULT;
+SELECT @@global.ROCKSDB_RECORDS_IN_RANGE;
+@@global.ROCKSDB_RECORDS_IN_RANGE
+0
+"Trying to set variable @@global.ROCKSDB_RECORDS_IN_RANGE to 0"
+SET @@global.ROCKSDB_RECORDS_IN_RANGE = 0;
+SELECT @@global.ROCKSDB_RECORDS_IN_RANGE;
+@@global.ROCKSDB_RECORDS_IN_RANGE
+0
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_RECORDS_IN_RANGE = DEFAULT;
+SELECT @@global.ROCKSDB_RECORDS_IN_RANGE;
+@@global.ROCKSDB_RECORDS_IN_RANGE
+0
+"Trying to set variable @@global.ROCKSDB_RECORDS_IN_RANGE to 222333"
+SET @@global.ROCKSDB_RECORDS_IN_RANGE = 222333;
+SELECT @@global.ROCKSDB_RECORDS_IN_RANGE;
+@@global.ROCKSDB_RECORDS_IN_RANGE
+222333
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_RECORDS_IN_RANGE = DEFAULT;
+SELECT @@global.ROCKSDB_RECORDS_IN_RANGE;
+@@global.ROCKSDB_RECORDS_IN_RANGE
+0
+'# Setting to valid values in session scope#'
+"Trying to set variable @@session.ROCKSDB_RECORDS_IN_RANGE to 1"
+SET @@session.ROCKSDB_RECORDS_IN_RANGE = 1;
+SELECT @@session.ROCKSDB_RECORDS_IN_RANGE;
+@@session.ROCKSDB_RECORDS_IN_RANGE
+1
+"Setting the session scope variable back to default"
+SET @@session.ROCKSDB_RECORDS_IN_RANGE = DEFAULT;
+SELECT @@session.ROCKSDB_RECORDS_IN_RANGE;
+@@session.ROCKSDB_RECORDS_IN_RANGE
+0
+"Trying to set variable @@session.ROCKSDB_RECORDS_IN_RANGE to 0"
+SET @@session.ROCKSDB_RECORDS_IN_RANGE = 0;
+SELECT @@session.ROCKSDB_RECORDS_IN_RANGE;
+@@session.ROCKSDB_RECORDS_IN_RANGE
+0
+"Setting the session scope variable back to default"
+SET @@session.ROCKSDB_RECORDS_IN_RANGE = DEFAULT;
+SELECT @@session.ROCKSDB_RECORDS_IN_RANGE;
+@@session.ROCKSDB_RECORDS_IN_RANGE
+0
+"Trying to set variable @@session.ROCKSDB_RECORDS_IN_RANGE to 222333"
+SET @@session.ROCKSDB_RECORDS_IN_RANGE = 222333;
+SELECT @@session.ROCKSDB_RECORDS_IN_RANGE;
+@@session.ROCKSDB_RECORDS_IN_RANGE
+222333
+"Setting the session scope variable back to default"
+SET @@session.ROCKSDB_RECORDS_IN_RANGE = DEFAULT;
+SELECT @@session.ROCKSDB_RECORDS_IN_RANGE;
+@@session.ROCKSDB_RECORDS_IN_RANGE
+0
+'# Testing with invalid values in global scope #'
+"Trying to set variable @@global.ROCKSDB_RECORDS_IN_RANGE to 'aaa'"
+SET @@global.ROCKSDB_RECORDS_IN_RANGE = 'aaa';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_RECORDS_IN_RANGE;
+@@global.ROCKSDB_RECORDS_IN_RANGE
+0
+"Trying to set variable @@global.ROCKSDB_RECORDS_IN_RANGE to 'bbb'"
+SET @@global.ROCKSDB_RECORDS_IN_RANGE = 'bbb';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_RECORDS_IN_RANGE;
+@@global.ROCKSDB_RECORDS_IN_RANGE
+0
+SET @@global.ROCKSDB_RECORDS_IN_RANGE = @start_global_value;
+SELECT @@global.ROCKSDB_RECORDS_IN_RANGE;
+@@global.ROCKSDB_RECORDS_IN_RANGE
+0
+SET @@session.ROCKSDB_RECORDS_IN_RANGE = @start_session_value;
+SELECT @@session.ROCKSDB_RECORDS_IN_RANGE;
+@@session.ROCKSDB_RECORDS_IN_RANGE
+0
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_remove_mariabackup_checkpoint_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_remove_mariabackup_checkpoint_basic.result
new file mode 100644
index 00000000000..01145cd2ab8
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_remove_mariabackup_checkpoint_basic.result
@@ -0,0 +1,4 @@
+SET GLOBAL rocksdb_create_checkpoint=CONCAT(@@rocksdb_datadir,'/mariabackup-checkpoint');
+SET GLOBAL rocksdb_remove_mariabackup_checkpoint=ON;
+SET GLOBAL rocksdb_create_checkpoint=CONCAT(@@rocksdb_datadir,'/mariabackup-checkpoint');
+SET GLOBAL rocksdb_remove_mariabackup_checkpoint=ON;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_reset_stats_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_reset_stats_basic.result
new file mode 100644
index 00000000000..d585e73489c
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_reset_stats_basic.result
@@ -0,0 +1,97 @@
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(0);
+INSERT INTO valid_values VALUES('on');
+INSERT INTO valid_values VALUES('off');
+INSERT INTO valid_values VALUES('true');
+INSERT INTO valid_values VALUES('false');
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+INSERT INTO invalid_values VALUES('\'bbb\'');
+SET @start_global_value = @@global.ROCKSDB_RESET_STATS;
+SELECT @start_global_value;
+@start_global_value
+0
+'# Setting to valid values in global scope#'
+"Trying to set variable @@global.ROCKSDB_RESET_STATS to 1"
+SET @@global.ROCKSDB_RESET_STATS = 1;
+SELECT @@global.ROCKSDB_RESET_STATS;
+@@global.ROCKSDB_RESET_STATS
+1
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_RESET_STATS = DEFAULT;
+SELECT @@global.ROCKSDB_RESET_STATS;
+@@global.ROCKSDB_RESET_STATS
+0
+"Trying to set variable @@global.ROCKSDB_RESET_STATS to 0"
+SET @@global.ROCKSDB_RESET_STATS = 0;
+SELECT @@global.ROCKSDB_RESET_STATS;
+@@global.ROCKSDB_RESET_STATS
+0
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_RESET_STATS = DEFAULT;
+SELECT @@global.ROCKSDB_RESET_STATS;
+@@global.ROCKSDB_RESET_STATS
+0
+"Trying to set variable @@global.ROCKSDB_RESET_STATS to on"
+SET @@global.ROCKSDB_RESET_STATS = on;
+SELECT @@global.ROCKSDB_RESET_STATS;
+@@global.ROCKSDB_RESET_STATS
+1
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_RESET_STATS = DEFAULT;
+SELECT @@global.ROCKSDB_RESET_STATS;
+@@global.ROCKSDB_RESET_STATS
+0
+"Trying to set variable @@global.ROCKSDB_RESET_STATS to off"
+SET @@global.ROCKSDB_RESET_STATS = off;
+SELECT @@global.ROCKSDB_RESET_STATS;
+@@global.ROCKSDB_RESET_STATS
+0
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_RESET_STATS = DEFAULT;
+SELECT @@global.ROCKSDB_RESET_STATS;
+@@global.ROCKSDB_RESET_STATS
+0
+"Trying to set variable @@global.ROCKSDB_RESET_STATS to true"
+SET @@global.ROCKSDB_RESET_STATS = true;
+SELECT @@global.ROCKSDB_RESET_STATS;
+@@global.ROCKSDB_RESET_STATS
+1
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_RESET_STATS = DEFAULT;
+SELECT @@global.ROCKSDB_RESET_STATS;
+@@global.ROCKSDB_RESET_STATS
+0
+"Trying to set variable @@global.ROCKSDB_RESET_STATS to false"
+SET @@global.ROCKSDB_RESET_STATS = false;
+SELECT @@global.ROCKSDB_RESET_STATS;
+@@global.ROCKSDB_RESET_STATS
+0
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_RESET_STATS = DEFAULT;
+SELECT @@global.ROCKSDB_RESET_STATS;
+@@global.ROCKSDB_RESET_STATS
+0
+"Trying to set variable @@session.ROCKSDB_RESET_STATS to 444. It should fail because it is not session."
+SET @@session.ROCKSDB_RESET_STATS = 444;
+ERROR HY000: Variable 'rocksdb_reset_stats' is a GLOBAL variable and should be set with SET GLOBAL
+'# Testing with invalid values in global scope #'
+"Trying to set variable @@global.ROCKSDB_RESET_STATS to 'aaa'"
+SET @@global.ROCKSDB_RESET_STATS = 'aaa';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_RESET_STATS;
+@@global.ROCKSDB_RESET_STATS
+0
+"Trying to set variable @@global.ROCKSDB_RESET_STATS to 'bbb'"
+SET @@global.ROCKSDB_RESET_STATS = 'bbb';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_RESET_STATS;
+@@global.ROCKSDB_RESET_STATS
+0
+SET @@global.ROCKSDB_RESET_STATS = @start_global_value;
+SELECT @@global.ROCKSDB_RESET_STATS;
+@@global.ROCKSDB_RESET_STATS
+0
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_rollback_on_timeout_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_rollback_on_timeout_basic.result
new file mode 100644
index 00000000000..e51df4f6834
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_rollback_on_timeout_basic.result
@@ -0,0 +1,97 @@
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(0);
+INSERT INTO valid_values VALUES('on');
+INSERT INTO valid_values VALUES('off');
+INSERT INTO valid_values VALUES('true');
+INSERT INTO valid_values VALUES('false');
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+INSERT INTO invalid_values VALUES('\'bbb\'');
+SET @start_global_value = @@global.ROCKSDB_ROLLBACK_ON_TIMEOUT;
+SELECT @start_global_value;
+@start_global_value
+0
+'# Setting to valid values in global scope#'
+"Trying to set variable @@global.ROCKSDB_ROLLBACK_ON_TIMEOUT to 1"
+SET @@global.ROCKSDB_ROLLBACK_ON_TIMEOUT = 1;
+SELECT @@global.ROCKSDB_ROLLBACK_ON_TIMEOUT;
+@@global.ROCKSDB_ROLLBACK_ON_TIMEOUT
+1
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_ROLLBACK_ON_TIMEOUT = DEFAULT;
+SELECT @@global.ROCKSDB_ROLLBACK_ON_TIMEOUT;
+@@global.ROCKSDB_ROLLBACK_ON_TIMEOUT
+0
+"Trying to set variable @@global.ROCKSDB_ROLLBACK_ON_TIMEOUT to 0"
+SET @@global.ROCKSDB_ROLLBACK_ON_TIMEOUT = 0;
+SELECT @@global.ROCKSDB_ROLLBACK_ON_TIMEOUT;
+@@global.ROCKSDB_ROLLBACK_ON_TIMEOUT
+0
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_ROLLBACK_ON_TIMEOUT = DEFAULT;
+SELECT @@global.ROCKSDB_ROLLBACK_ON_TIMEOUT;
+@@global.ROCKSDB_ROLLBACK_ON_TIMEOUT
+0
+"Trying to set variable @@global.ROCKSDB_ROLLBACK_ON_TIMEOUT to on"
+SET @@global.ROCKSDB_ROLLBACK_ON_TIMEOUT = on;
+SELECT @@global.ROCKSDB_ROLLBACK_ON_TIMEOUT;
+@@global.ROCKSDB_ROLLBACK_ON_TIMEOUT
+1
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_ROLLBACK_ON_TIMEOUT = DEFAULT;
+SELECT @@global.ROCKSDB_ROLLBACK_ON_TIMEOUT;
+@@global.ROCKSDB_ROLLBACK_ON_TIMEOUT
+0
+"Trying to set variable @@global.ROCKSDB_ROLLBACK_ON_TIMEOUT to off"
+SET @@global.ROCKSDB_ROLLBACK_ON_TIMEOUT = off;
+SELECT @@global.ROCKSDB_ROLLBACK_ON_TIMEOUT;
+@@global.ROCKSDB_ROLLBACK_ON_TIMEOUT
+0
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_ROLLBACK_ON_TIMEOUT = DEFAULT;
+SELECT @@global.ROCKSDB_ROLLBACK_ON_TIMEOUT;
+@@global.ROCKSDB_ROLLBACK_ON_TIMEOUT
+0
+"Trying to set variable @@global.ROCKSDB_ROLLBACK_ON_TIMEOUT to true"
+SET @@global.ROCKSDB_ROLLBACK_ON_TIMEOUT = true;
+SELECT @@global.ROCKSDB_ROLLBACK_ON_TIMEOUT;
+@@global.ROCKSDB_ROLLBACK_ON_TIMEOUT
+1
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_ROLLBACK_ON_TIMEOUT = DEFAULT;
+SELECT @@global.ROCKSDB_ROLLBACK_ON_TIMEOUT;
+@@global.ROCKSDB_ROLLBACK_ON_TIMEOUT
+0
+"Trying to set variable @@global.ROCKSDB_ROLLBACK_ON_TIMEOUT to false"
+SET @@global.ROCKSDB_ROLLBACK_ON_TIMEOUT = false;
+SELECT @@global.ROCKSDB_ROLLBACK_ON_TIMEOUT;
+@@global.ROCKSDB_ROLLBACK_ON_TIMEOUT
+0
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_ROLLBACK_ON_TIMEOUT = DEFAULT;
+SELECT @@global.ROCKSDB_ROLLBACK_ON_TIMEOUT;
+@@global.ROCKSDB_ROLLBACK_ON_TIMEOUT
+0
+"Trying to set variable @@session.ROCKSDB_ROLLBACK_ON_TIMEOUT to 444. It should fail because it is not session."
+SET @@session.ROCKSDB_ROLLBACK_ON_TIMEOUT = 444;
+ERROR HY000: Variable 'rocksdb_rollback_on_timeout' is a GLOBAL variable and should be set with SET GLOBAL
+'# Testing with invalid values in global scope #'
+"Trying to set variable @@global.ROCKSDB_ROLLBACK_ON_TIMEOUT to 'aaa'"
+SET @@global.ROCKSDB_ROLLBACK_ON_TIMEOUT = 'aaa';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_ROLLBACK_ON_TIMEOUT;
+@@global.ROCKSDB_ROLLBACK_ON_TIMEOUT
+0
+"Trying to set variable @@global.ROCKSDB_ROLLBACK_ON_TIMEOUT to 'bbb'"
+SET @@global.ROCKSDB_ROLLBACK_ON_TIMEOUT = 'bbb';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_ROLLBACK_ON_TIMEOUT;
+@@global.ROCKSDB_ROLLBACK_ON_TIMEOUT
+0
+SET @@global.ROCKSDB_ROLLBACK_ON_TIMEOUT = @start_global_value;
+SELECT @@global.ROCKSDB_ROLLBACK_ON_TIMEOUT;
+@@global.ROCKSDB_ROLLBACK_ON_TIMEOUT
+0
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_seconds_between_stat_computes_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_seconds_between_stat_computes_basic.result
new file mode 100644
index 00000000000..ea80d88f653
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_seconds_between_stat_computes_basic.result
@@ -0,0 +1,64 @@
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(0);
+INSERT INTO valid_values VALUES(1024);
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+INSERT INTO invalid_values VALUES('\'bbb\'');
+SET @start_global_value = @@global.ROCKSDB_SECONDS_BETWEEN_STAT_COMPUTES;
+SELECT @start_global_value;
+@start_global_value
+3600
+'# Setting to valid values in global scope#'
+"Trying to set variable @@global.ROCKSDB_SECONDS_BETWEEN_STAT_COMPUTES to 1"
+SET @@global.ROCKSDB_SECONDS_BETWEEN_STAT_COMPUTES = 1;
+SELECT @@global.ROCKSDB_SECONDS_BETWEEN_STAT_COMPUTES;
+@@global.ROCKSDB_SECONDS_BETWEEN_STAT_COMPUTES
+1
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_SECONDS_BETWEEN_STAT_COMPUTES = DEFAULT;
+SELECT @@global.ROCKSDB_SECONDS_BETWEEN_STAT_COMPUTES;
+@@global.ROCKSDB_SECONDS_BETWEEN_STAT_COMPUTES
+3600
+"Trying to set variable @@global.ROCKSDB_SECONDS_BETWEEN_STAT_COMPUTES to 0"
+SET @@global.ROCKSDB_SECONDS_BETWEEN_STAT_COMPUTES = 0;
+SELECT @@global.ROCKSDB_SECONDS_BETWEEN_STAT_COMPUTES;
+@@global.ROCKSDB_SECONDS_BETWEEN_STAT_COMPUTES
+0
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_SECONDS_BETWEEN_STAT_COMPUTES = DEFAULT;
+SELECT @@global.ROCKSDB_SECONDS_BETWEEN_STAT_COMPUTES;
+@@global.ROCKSDB_SECONDS_BETWEEN_STAT_COMPUTES
+3600
+"Trying to set variable @@global.ROCKSDB_SECONDS_BETWEEN_STAT_COMPUTES to 1024"
+SET @@global.ROCKSDB_SECONDS_BETWEEN_STAT_COMPUTES = 1024;
+SELECT @@global.ROCKSDB_SECONDS_BETWEEN_STAT_COMPUTES;
+@@global.ROCKSDB_SECONDS_BETWEEN_STAT_COMPUTES
+1024
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_SECONDS_BETWEEN_STAT_COMPUTES = DEFAULT;
+SELECT @@global.ROCKSDB_SECONDS_BETWEEN_STAT_COMPUTES;
+@@global.ROCKSDB_SECONDS_BETWEEN_STAT_COMPUTES
+3600
+"Trying to set variable @@session.ROCKSDB_SECONDS_BETWEEN_STAT_COMPUTES to 444. It should fail because it is not session."
+SET @@session.ROCKSDB_SECONDS_BETWEEN_STAT_COMPUTES = 444;
+ERROR HY000: Variable 'rocksdb_seconds_between_stat_computes' is a GLOBAL variable and should be set with SET GLOBAL
+'# Testing with invalid values in global scope #'
+"Trying to set variable @@global.ROCKSDB_SECONDS_BETWEEN_STAT_COMPUTES to 'aaa'"
+SET @@global.ROCKSDB_SECONDS_BETWEEN_STAT_COMPUTES = 'aaa';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_SECONDS_BETWEEN_STAT_COMPUTES;
+@@global.ROCKSDB_SECONDS_BETWEEN_STAT_COMPUTES
+3600
+"Trying to set variable @@global.ROCKSDB_SECONDS_BETWEEN_STAT_COMPUTES to 'bbb'"
+SET @@global.ROCKSDB_SECONDS_BETWEEN_STAT_COMPUTES = 'bbb';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_SECONDS_BETWEEN_STAT_COMPUTES;
+@@global.ROCKSDB_SECONDS_BETWEEN_STAT_COMPUTES
+3600
+SET @@global.ROCKSDB_SECONDS_BETWEEN_STAT_COMPUTES = @start_global_value;
+SELECT @@global.ROCKSDB_SECONDS_BETWEEN_STAT_COMPUTES;
+@@global.ROCKSDB_SECONDS_BETWEEN_STAT_COMPUTES
+3600
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_signal_drop_index_thread_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_signal_drop_index_thread_basic.result
new file mode 100644
index 00000000000..94a15275900
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_signal_drop_index_thread_basic.result
@@ -0,0 +1,64 @@
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(0);
+INSERT INTO valid_values VALUES('on');
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+INSERT INTO invalid_values VALUES('\'bbb\'');
+SET @start_global_value = @@global.ROCKSDB_SIGNAL_DROP_INDEX_THREAD;
+SELECT @start_global_value;
+@start_global_value
+0
+'# Setting to valid values in global scope#'
+"Trying to set variable @@global.ROCKSDB_SIGNAL_DROP_INDEX_THREAD to 1"
+SET @@global.ROCKSDB_SIGNAL_DROP_INDEX_THREAD = 1;
+SELECT @@global.ROCKSDB_SIGNAL_DROP_INDEX_THREAD;
+@@global.ROCKSDB_SIGNAL_DROP_INDEX_THREAD
+0
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_SIGNAL_DROP_INDEX_THREAD = DEFAULT;
+SELECT @@global.ROCKSDB_SIGNAL_DROP_INDEX_THREAD;
+@@global.ROCKSDB_SIGNAL_DROP_INDEX_THREAD
+0
+"Trying to set variable @@global.ROCKSDB_SIGNAL_DROP_INDEX_THREAD to 0"
+SET @@global.ROCKSDB_SIGNAL_DROP_INDEX_THREAD = 0;
+SELECT @@global.ROCKSDB_SIGNAL_DROP_INDEX_THREAD;
+@@global.ROCKSDB_SIGNAL_DROP_INDEX_THREAD
+0
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_SIGNAL_DROP_INDEX_THREAD = DEFAULT;
+SELECT @@global.ROCKSDB_SIGNAL_DROP_INDEX_THREAD;
+@@global.ROCKSDB_SIGNAL_DROP_INDEX_THREAD
+0
+"Trying to set variable @@global.ROCKSDB_SIGNAL_DROP_INDEX_THREAD to on"
+SET @@global.ROCKSDB_SIGNAL_DROP_INDEX_THREAD = on;
+SELECT @@global.ROCKSDB_SIGNAL_DROP_INDEX_THREAD;
+@@global.ROCKSDB_SIGNAL_DROP_INDEX_THREAD
+0
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_SIGNAL_DROP_INDEX_THREAD = DEFAULT;
+SELECT @@global.ROCKSDB_SIGNAL_DROP_INDEX_THREAD;
+@@global.ROCKSDB_SIGNAL_DROP_INDEX_THREAD
+0
+"Trying to set variable @@session.ROCKSDB_SIGNAL_DROP_INDEX_THREAD to 444. It should fail because it is not session."
+SET @@session.ROCKSDB_SIGNAL_DROP_INDEX_THREAD = 444;
+ERROR HY000: Variable 'rocksdb_signal_drop_index_thread' is a GLOBAL variable and should be set with SET GLOBAL
+'# Testing with invalid values in global scope #'
+"Trying to set variable @@global.ROCKSDB_SIGNAL_DROP_INDEX_THREAD to 'aaa'"
+SET @@global.ROCKSDB_SIGNAL_DROP_INDEX_THREAD = 'aaa';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_SIGNAL_DROP_INDEX_THREAD;
+@@global.ROCKSDB_SIGNAL_DROP_INDEX_THREAD
+0
+"Trying to set variable @@global.ROCKSDB_SIGNAL_DROP_INDEX_THREAD to 'bbb'"
+SET @@global.ROCKSDB_SIGNAL_DROP_INDEX_THREAD = 'bbb';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_SIGNAL_DROP_INDEX_THREAD;
+@@global.ROCKSDB_SIGNAL_DROP_INDEX_THREAD
+0
+SET @@global.ROCKSDB_SIGNAL_DROP_INDEX_THREAD = @start_global_value;
+SELECT @@global.ROCKSDB_SIGNAL_DROP_INDEX_THREAD;
+@@global.ROCKSDB_SIGNAL_DROP_INDEX_THREAD
+0
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_sim_cache_size_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_sim_cache_size_basic.result
new file mode 100644
index 00000000000..ef4c619457b
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_sim_cache_size_basic.result
@@ -0,0 +1,7 @@
+SET @start_global_value = @@global.ROCKSDB_SIM_CACHE_SIZE;
+SELECT @start_global_value;
+@start_global_value
+0
+"Trying to set variable @@global.ROCKSDB_SIM_CACHE_SIZE to 444. It should fail because it is readonly."
+SET @@global.ROCKSDB_SIM_CACHE_SIZE = 444;
+ERROR HY000: Variable 'rocksdb_sim_cache_size' is a read only variable
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_skip_bloom_filter_on_read_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_skip_bloom_filter_on_read_basic.result
new file mode 100644
index 00000000000..201bc5009ce
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_skip_bloom_filter_on_read_basic.result
@@ -0,0 +1,100 @@
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(0);
+INSERT INTO valid_values VALUES('on');
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+INSERT INTO invalid_values VALUES('\'bbb\'');
+SET @start_global_value = @@global.ROCKSDB_SKIP_BLOOM_FILTER_ON_READ;
+SELECT @start_global_value;
+@start_global_value
+0
+SET @start_session_value = @@session.ROCKSDB_SKIP_BLOOM_FILTER_ON_READ;
+SELECT @start_session_value;
+@start_session_value
+0
+'# Setting to valid values in global scope#'
+"Trying to set variable @@global.ROCKSDB_SKIP_BLOOM_FILTER_ON_READ to 1"
+SET @@global.ROCKSDB_SKIP_BLOOM_FILTER_ON_READ = 1;
+SELECT @@global.ROCKSDB_SKIP_BLOOM_FILTER_ON_READ;
+@@global.ROCKSDB_SKIP_BLOOM_FILTER_ON_READ
+1
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_SKIP_BLOOM_FILTER_ON_READ = DEFAULT;
+SELECT @@global.ROCKSDB_SKIP_BLOOM_FILTER_ON_READ;
+@@global.ROCKSDB_SKIP_BLOOM_FILTER_ON_READ
+0
+"Trying to set variable @@global.ROCKSDB_SKIP_BLOOM_FILTER_ON_READ to 0"
+SET @@global.ROCKSDB_SKIP_BLOOM_FILTER_ON_READ = 0;
+SELECT @@global.ROCKSDB_SKIP_BLOOM_FILTER_ON_READ;
+@@global.ROCKSDB_SKIP_BLOOM_FILTER_ON_READ
+0
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_SKIP_BLOOM_FILTER_ON_READ = DEFAULT;
+SELECT @@global.ROCKSDB_SKIP_BLOOM_FILTER_ON_READ;
+@@global.ROCKSDB_SKIP_BLOOM_FILTER_ON_READ
+0
+"Trying to set variable @@global.ROCKSDB_SKIP_BLOOM_FILTER_ON_READ to on"
+SET @@global.ROCKSDB_SKIP_BLOOM_FILTER_ON_READ = on;
+SELECT @@global.ROCKSDB_SKIP_BLOOM_FILTER_ON_READ;
+@@global.ROCKSDB_SKIP_BLOOM_FILTER_ON_READ
+1
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_SKIP_BLOOM_FILTER_ON_READ = DEFAULT;
+SELECT @@global.ROCKSDB_SKIP_BLOOM_FILTER_ON_READ;
+@@global.ROCKSDB_SKIP_BLOOM_FILTER_ON_READ
+0
+'# Setting to valid values in session scope#'
+"Trying to set variable @@session.ROCKSDB_SKIP_BLOOM_FILTER_ON_READ to 1"
+SET @@session.ROCKSDB_SKIP_BLOOM_FILTER_ON_READ = 1;
+SELECT @@session.ROCKSDB_SKIP_BLOOM_FILTER_ON_READ;
+@@session.ROCKSDB_SKIP_BLOOM_FILTER_ON_READ
+1
+"Setting the session scope variable back to default"
+SET @@session.ROCKSDB_SKIP_BLOOM_FILTER_ON_READ = DEFAULT;
+SELECT @@session.ROCKSDB_SKIP_BLOOM_FILTER_ON_READ;
+@@session.ROCKSDB_SKIP_BLOOM_FILTER_ON_READ
+0
+"Trying to set variable @@session.ROCKSDB_SKIP_BLOOM_FILTER_ON_READ to 0"
+SET @@session.ROCKSDB_SKIP_BLOOM_FILTER_ON_READ = 0;
+SELECT @@session.ROCKSDB_SKIP_BLOOM_FILTER_ON_READ;
+@@session.ROCKSDB_SKIP_BLOOM_FILTER_ON_READ
+0
+"Setting the session scope variable back to default"
+SET @@session.ROCKSDB_SKIP_BLOOM_FILTER_ON_READ = DEFAULT;
+SELECT @@session.ROCKSDB_SKIP_BLOOM_FILTER_ON_READ;
+@@session.ROCKSDB_SKIP_BLOOM_FILTER_ON_READ
+0
+"Trying to set variable @@session.ROCKSDB_SKIP_BLOOM_FILTER_ON_READ to on"
+SET @@session.ROCKSDB_SKIP_BLOOM_FILTER_ON_READ = on;
+SELECT @@session.ROCKSDB_SKIP_BLOOM_FILTER_ON_READ;
+@@session.ROCKSDB_SKIP_BLOOM_FILTER_ON_READ
+1
+"Setting the session scope variable back to default"
+SET @@session.ROCKSDB_SKIP_BLOOM_FILTER_ON_READ = DEFAULT;
+SELECT @@session.ROCKSDB_SKIP_BLOOM_FILTER_ON_READ;
+@@session.ROCKSDB_SKIP_BLOOM_FILTER_ON_READ
+0
+'# Testing with invalid values in global scope #'
+"Trying to set variable @@global.ROCKSDB_SKIP_BLOOM_FILTER_ON_READ to 'aaa'"
+SET @@global.ROCKSDB_SKIP_BLOOM_FILTER_ON_READ = 'aaa';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_SKIP_BLOOM_FILTER_ON_READ;
+@@global.ROCKSDB_SKIP_BLOOM_FILTER_ON_READ
+0
+"Trying to set variable @@global.ROCKSDB_SKIP_BLOOM_FILTER_ON_READ to 'bbb'"
+SET @@global.ROCKSDB_SKIP_BLOOM_FILTER_ON_READ = 'bbb';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_SKIP_BLOOM_FILTER_ON_READ;
+@@global.ROCKSDB_SKIP_BLOOM_FILTER_ON_READ
+0
+SET @@global.ROCKSDB_SKIP_BLOOM_FILTER_ON_READ = @start_global_value;
+SELECT @@global.ROCKSDB_SKIP_BLOOM_FILTER_ON_READ;
+@@global.ROCKSDB_SKIP_BLOOM_FILTER_ON_READ
+0
+SET @@session.ROCKSDB_SKIP_BLOOM_FILTER_ON_READ = @start_session_value;
+SELECT @@session.ROCKSDB_SKIP_BLOOM_FILTER_ON_READ;
+@@session.ROCKSDB_SKIP_BLOOM_FILTER_ON_READ
+0
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_skip_fill_cache_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_skip_fill_cache_basic.result
new file mode 100644
index 00000000000..a843851cf26
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_skip_fill_cache_basic.result
@@ -0,0 +1,100 @@
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(0);
+INSERT INTO valid_values VALUES('on');
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+INSERT INTO invalid_values VALUES('\'bbb\'');
+SET @start_global_value = @@global.ROCKSDB_SKIP_FILL_CACHE;
+SELECT @start_global_value;
+@start_global_value
+0
+SET @start_session_value = @@session.ROCKSDB_SKIP_FILL_CACHE;
+SELECT @start_session_value;
+@start_session_value
+0
+'# Setting to valid values in global scope#'
+"Trying to set variable @@global.ROCKSDB_SKIP_FILL_CACHE to 1"
+SET @@global.ROCKSDB_SKIP_FILL_CACHE = 1;
+SELECT @@global.ROCKSDB_SKIP_FILL_CACHE;
+@@global.ROCKSDB_SKIP_FILL_CACHE
+1
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_SKIP_FILL_CACHE = DEFAULT;
+SELECT @@global.ROCKSDB_SKIP_FILL_CACHE;
+@@global.ROCKSDB_SKIP_FILL_CACHE
+0
+"Trying to set variable @@global.ROCKSDB_SKIP_FILL_CACHE to 0"
+SET @@global.ROCKSDB_SKIP_FILL_CACHE = 0;
+SELECT @@global.ROCKSDB_SKIP_FILL_CACHE;
+@@global.ROCKSDB_SKIP_FILL_CACHE
+0
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_SKIP_FILL_CACHE = DEFAULT;
+SELECT @@global.ROCKSDB_SKIP_FILL_CACHE;
+@@global.ROCKSDB_SKIP_FILL_CACHE
+0
+"Trying to set variable @@global.ROCKSDB_SKIP_FILL_CACHE to on"
+SET @@global.ROCKSDB_SKIP_FILL_CACHE = on;
+SELECT @@global.ROCKSDB_SKIP_FILL_CACHE;
+@@global.ROCKSDB_SKIP_FILL_CACHE
+1
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_SKIP_FILL_CACHE = DEFAULT;
+SELECT @@global.ROCKSDB_SKIP_FILL_CACHE;
+@@global.ROCKSDB_SKIP_FILL_CACHE
+0
+'# Setting to valid values in session scope#'
+"Trying to set variable @@session.ROCKSDB_SKIP_FILL_CACHE to 1"
+SET @@session.ROCKSDB_SKIP_FILL_CACHE = 1;
+SELECT @@session.ROCKSDB_SKIP_FILL_CACHE;
+@@session.ROCKSDB_SKIP_FILL_CACHE
+1
+"Setting the session scope variable back to default"
+SET @@session.ROCKSDB_SKIP_FILL_CACHE = DEFAULT;
+SELECT @@session.ROCKSDB_SKIP_FILL_CACHE;
+@@session.ROCKSDB_SKIP_FILL_CACHE
+0
+"Trying to set variable @@session.ROCKSDB_SKIP_FILL_CACHE to 0"
+SET @@session.ROCKSDB_SKIP_FILL_CACHE = 0;
+SELECT @@session.ROCKSDB_SKIP_FILL_CACHE;
+@@session.ROCKSDB_SKIP_FILL_CACHE
+0
+"Setting the session scope variable back to default"
+SET @@session.ROCKSDB_SKIP_FILL_CACHE = DEFAULT;
+SELECT @@session.ROCKSDB_SKIP_FILL_CACHE;
+@@session.ROCKSDB_SKIP_FILL_CACHE
+0
+"Trying to set variable @@session.ROCKSDB_SKIP_FILL_CACHE to on"
+SET @@session.ROCKSDB_SKIP_FILL_CACHE = on;
+SELECT @@session.ROCKSDB_SKIP_FILL_CACHE;
+@@session.ROCKSDB_SKIP_FILL_CACHE
+1
+"Setting the session scope variable back to default"
+SET @@session.ROCKSDB_SKIP_FILL_CACHE = DEFAULT;
+SELECT @@session.ROCKSDB_SKIP_FILL_CACHE;
+@@session.ROCKSDB_SKIP_FILL_CACHE
+0
+'# Testing with invalid values in global scope #'
+"Trying to set variable @@global.ROCKSDB_SKIP_FILL_CACHE to 'aaa'"
+SET @@global.ROCKSDB_SKIP_FILL_CACHE = 'aaa';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_SKIP_FILL_CACHE;
+@@global.ROCKSDB_SKIP_FILL_CACHE
+0
+"Trying to set variable @@global.ROCKSDB_SKIP_FILL_CACHE to 'bbb'"
+SET @@global.ROCKSDB_SKIP_FILL_CACHE = 'bbb';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_SKIP_FILL_CACHE;
+@@global.ROCKSDB_SKIP_FILL_CACHE
+0
+SET @@global.ROCKSDB_SKIP_FILL_CACHE = @start_global_value;
+SELECT @@global.ROCKSDB_SKIP_FILL_CACHE;
+@@global.ROCKSDB_SKIP_FILL_CACHE
+0
+SET @@session.ROCKSDB_SKIP_FILL_CACHE = @start_session_value;
+SELECT @@session.ROCKSDB_SKIP_FILL_CACHE;
+@@session.ROCKSDB_SKIP_FILL_CACHE
+0
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_skip_unique_check_tables_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_skip_unique_check_tables_basic.result
new file mode 100644
index 00000000000..a3749b75e47
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_skip_unique_check_tables_basic.result
@@ -0,0 +1,67 @@
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES("aaa");
+INSERT INTO valid_values VALUES("bbb");
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+SET @start_global_value = @@global.ROCKSDB_SKIP_UNIQUE_CHECK_TABLES;
+SELECT @start_global_value;
+@start_global_value
+.*
+SET @start_session_value = @@session.ROCKSDB_SKIP_UNIQUE_CHECK_TABLES;
+SELECT @start_session_value;
+@start_session_value
+.*
+'# Setting to valid values in global scope#'
+"Trying to set variable @@global.ROCKSDB_SKIP_UNIQUE_CHECK_TABLES to aaa"
+SET @@global.ROCKSDB_SKIP_UNIQUE_CHECK_TABLES = aaa;
+SELECT @@global.ROCKSDB_SKIP_UNIQUE_CHECK_TABLES;
+@@global.ROCKSDB_SKIP_UNIQUE_CHECK_TABLES
+aaa
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_SKIP_UNIQUE_CHECK_TABLES = DEFAULT;
+SELECT @@global.ROCKSDB_SKIP_UNIQUE_CHECK_TABLES;
+@@global.ROCKSDB_SKIP_UNIQUE_CHECK_TABLES
+.*
+"Trying to set variable @@global.ROCKSDB_SKIP_UNIQUE_CHECK_TABLES to bbb"
+SET @@global.ROCKSDB_SKIP_UNIQUE_CHECK_TABLES = bbb;
+SELECT @@global.ROCKSDB_SKIP_UNIQUE_CHECK_TABLES;
+@@global.ROCKSDB_SKIP_UNIQUE_CHECK_TABLES
+bbb
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_SKIP_UNIQUE_CHECK_TABLES = DEFAULT;
+SELECT @@global.ROCKSDB_SKIP_UNIQUE_CHECK_TABLES;
+@@global.ROCKSDB_SKIP_UNIQUE_CHECK_TABLES
+.*
+'# Setting to valid values in session scope#'
+"Trying to set variable @@session.ROCKSDB_SKIP_UNIQUE_CHECK_TABLES to aaa"
+SET @@session.ROCKSDB_SKIP_UNIQUE_CHECK_TABLES = aaa;
+SELECT @@session.ROCKSDB_SKIP_UNIQUE_CHECK_TABLES;
+@@session.ROCKSDB_SKIP_UNIQUE_CHECK_TABLES
+aaa
+"Setting the session scope variable back to default"
+SET @@session.ROCKSDB_SKIP_UNIQUE_CHECK_TABLES = DEFAULT;
+SELECT @@session.ROCKSDB_SKIP_UNIQUE_CHECK_TABLES;
+@@session.ROCKSDB_SKIP_UNIQUE_CHECK_TABLES
+.*
+"Trying to set variable @@session.ROCKSDB_SKIP_UNIQUE_CHECK_TABLES to bbb"
+SET @@session.ROCKSDB_SKIP_UNIQUE_CHECK_TABLES = bbb;
+SELECT @@session.ROCKSDB_SKIP_UNIQUE_CHECK_TABLES;
+@@session.ROCKSDB_SKIP_UNIQUE_CHECK_TABLES
+bbb
+"Setting the session scope variable back to default"
+SET @@session.ROCKSDB_SKIP_UNIQUE_CHECK_TABLES = DEFAULT;
+SELECT @@session.ROCKSDB_SKIP_UNIQUE_CHECK_TABLES;
+@@session.ROCKSDB_SKIP_UNIQUE_CHECK_TABLES
+.*
+'# Testing with invalid values in global scope #'
+SET @@global.ROCKSDB_SKIP_UNIQUE_CHECK_TABLES = @start_global_value;
+SELECT @@global.ROCKSDB_SKIP_UNIQUE_CHECK_TABLES;
+@@global.ROCKSDB_SKIP_UNIQUE_CHECK_TABLES
+.*
+SET @@session.ROCKSDB_SKIP_UNIQUE_CHECK_TABLES = @start_session_value;
+SELECT @@session.ROCKSDB_SKIP_UNIQUE_CHECK_TABLES;
+@@session.ROCKSDB_SKIP_UNIQUE_CHECK_TABLES
+.*
+SET GLOBAL ROCKSDB_SKIP_UNIQUE_CHECK_TABLES=NULL;
+SET GLOBAL ROCKSDB_SKIP_UNIQUE_CHECK_TABLES=DEFAULT;
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_sst_mgr_rate_bytes_per_sec_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_sst_mgr_rate_bytes_per_sec_basic.result
new file mode 100644
index 00000000000..a714f1c2fdc
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_sst_mgr_rate_bytes_per_sec_basic.result
@@ -0,0 +1,85 @@
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(100);
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(0);
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+INSERT INTO invalid_values VALUES('\'bbb\'');
+INSERT INTO invalid_values VALUES('\'-1\'');
+INSERT INTO invalid_values VALUES('\'101\'');
+INSERT INTO invalid_values VALUES('\'484436\'');
+SET @start_global_value = @@global.ROCKSDB_SST_MGR_RATE_BYTES_PER_SEC;
+SELECT @start_global_value;
+@start_global_value
+0
+'# Setting to valid values in global scope#'
+"Trying to set variable @@global.ROCKSDB_SST_MGR_RATE_BYTES_PER_SEC to 100"
+SET @@global.ROCKSDB_SST_MGR_RATE_BYTES_PER_SEC = 100;
+SELECT @@global.ROCKSDB_SST_MGR_RATE_BYTES_PER_SEC;
+@@global.ROCKSDB_SST_MGR_RATE_BYTES_PER_SEC
+100
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_SST_MGR_RATE_BYTES_PER_SEC = DEFAULT;
+SELECT @@global.ROCKSDB_SST_MGR_RATE_BYTES_PER_SEC;
+@@global.ROCKSDB_SST_MGR_RATE_BYTES_PER_SEC
+0
+"Trying to set variable @@global.ROCKSDB_SST_MGR_RATE_BYTES_PER_SEC to 1"
+SET @@global.ROCKSDB_SST_MGR_RATE_BYTES_PER_SEC = 1;
+SELECT @@global.ROCKSDB_SST_MGR_RATE_BYTES_PER_SEC;
+@@global.ROCKSDB_SST_MGR_RATE_BYTES_PER_SEC
+1
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_SST_MGR_RATE_BYTES_PER_SEC = DEFAULT;
+SELECT @@global.ROCKSDB_SST_MGR_RATE_BYTES_PER_SEC;
+@@global.ROCKSDB_SST_MGR_RATE_BYTES_PER_SEC
+0
+"Trying to set variable @@global.ROCKSDB_SST_MGR_RATE_BYTES_PER_SEC to 0"
+SET @@global.ROCKSDB_SST_MGR_RATE_BYTES_PER_SEC = 0;
+SELECT @@global.ROCKSDB_SST_MGR_RATE_BYTES_PER_SEC;
+@@global.ROCKSDB_SST_MGR_RATE_BYTES_PER_SEC
+0
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_SST_MGR_RATE_BYTES_PER_SEC = DEFAULT;
+SELECT @@global.ROCKSDB_SST_MGR_RATE_BYTES_PER_SEC;
+@@global.ROCKSDB_SST_MGR_RATE_BYTES_PER_SEC
+0
+"Trying to set variable @@session.ROCKSDB_SST_MGR_RATE_BYTES_PER_SEC to 444. It should fail because it is not session."
+SET @@session.ROCKSDB_SST_MGR_RATE_BYTES_PER_SEC = 444;
+ERROR HY000: Variable 'rocksdb_sst_mgr_rate_bytes_per_sec' is a GLOBAL variable and should be set with SET GLOBAL
+'# Testing with invalid values in global scope #'
+"Trying to set variable @@global.ROCKSDB_SST_MGR_RATE_BYTES_PER_SEC to 'aaa'"
+SET @@global.ROCKSDB_SST_MGR_RATE_BYTES_PER_SEC = 'aaa';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_SST_MGR_RATE_BYTES_PER_SEC;
+@@global.ROCKSDB_SST_MGR_RATE_BYTES_PER_SEC
+0
+"Trying to set variable @@global.ROCKSDB_SST_MGR_RATE_BYTES_PER_SEC to 'bbb'"
+SET @@global.ROCKSDB_SST_MGR_RATE_BYTES_PER_SEC = 'bbb';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_SST_MGR_RATE_BYTES_PER_SEC;
+@@global.ROCKSDB_SST_MGR_RATE_BYTES_PER_SEC
+0
+"Trying to set variable @@global.ROCKSDB_SST_MGR_RATE_BYTES_PER_SEC to '-1'"
+SET @@global.ROCKSDB_SST_MGR_RATE_BYTES_PER_SEC = '-1';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_SST_MGR_RATE_BYTES_PER_SEC;
+@@global.ROCKSDB_SST_MGR_RATE_BYTES_PER_SEC
+0
+"Trying to set variable @@global.ROCKSDB_SST_MGR_RATE_BYTES_PER_SEC to '101'"
+SET @@global.ROCKSDB_SST_MGR_RATE_BYTES_PER_SEC = '101';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_SST_MGR_RATE_BYTES_PER_SEC;
+@@global.ROCKSDB_SST_MGR_RATE_BYTES_PER_SEC
+0
+"Trying to set variable @@global.ROCKSDB_SST_MGR_RATE_BYTES_PER_SEC to '484436'"
+SET @@global.ROCKSDB_SST_MGR_RATE_BYTES_PER_SEC = '484436';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_SST_MGR_RATE_BYTES_PER_SEC;
+@@global.ROCKSDB_SST_MGR_RATE_BYTES_PER_SEC
+0
+SET @@global.ROCKSDB_SST_MGR_RATE_BYTES_PER_SEC = @start_global_value;
+SELECT @@global.ROCKSDB_SST_MGR_RATE_BYTES_PER_SEC;
+@@global.ROCKSDB_SST_MGR_RATE_BYTES_PER_SEC
+0
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_stats_dump_period_sec_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_stats_dump_period_sec_basic.result
new file mode 100644
index 00000000000..2dbf5a55b87
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_stats_dump_period_sec_basic.result
@@ -0,0 +1,7 @@
+SET @start_global_value = @@global.ROCKSDB_STATS_DUMP_PERIOD_SEC;
+SELECT @start_global_value;
+@start_global_value
+600
+"Trying to set variable @@global.ROCKSDB_STATS_DUMP_PERIOD_SEC to 444. It should fail because it is readonly."
+SET @@global.ROCKSDB_STATS_DUMP_PERIOD_SEC = 444;
+ERROR HY000: Variable 'rocksdb_stats_dump_period_sec' is a read only variable
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_stats_level_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_stats_level_basic.result
new file mode 100644
index 00000000000..d8d218fe3e8
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_stats_level_basic.result
@@ -0,0 +1,85 @@
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(0);
+INSERT INTO valid_values VALUES(4);
+INSERT INTO valid_values VALUES(2);
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+INSERT INTO invalid_values VALUES('\'bbb\'');
+INSERT INTO invalid_values VALUES('\'-1\'');
+INSERT INTO invalid_values VALUES('\'101\'');
+INSERT INTO invalid_values VALUES('\'484436\'');
+SET @start_global_value = @@global.ROCKSDB_STATS_LEVEL;
+SELECT @start_global_value;
+@start_global_value
+0
+'# Setting to valid values in global scope#'
+"Trying to set variable @@global.ROCKSDB_STATS_LEVEL to 0"
+SET @@global.ROCKSDB_STATS_LEVEL = 0;
+SELECT @@global.ROCKSDB_STATS_LEVEL;
+@@global.ROCKSDB_STATS_LEVEL
+0
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_STATS_LEVEL = DEFAULT;
+SELECT @@global.ROCKSDB_STATS_LEVEL;
+@@global.ROCKSDB_STATS_LEVEL
+0
+"Trying to set variable @@global.ROCKSDB_STATS_LEVEL to 4"
+SET @@global.ROCKSDB_STATS_LEVEL = 4;
+SELECT @@global.ROCKSDB_STATS_LEVEL;
+@@global.ROCKSDB_STATS_LEVEL
+4
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_STATS_LEVEL = DEFAULT;
+SELECT @@global.ROCKSDB_STATS_LEVEL;
+@@global.ROCKSDB_STATS_LEVEL
+0
+"Trying to set variable @@global.ROCKSDB_STATS_LEVEL to 2"
+SET @@global.ROCKSDB_STATS_LEVEL = 2;
+SELECT @@global.ROCKSDB_STATS_LEVEL;
+@@global.ROCKSDB_STATS_LEVEL
+2
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_STATS_LEVEL = DEFAULT;
+SELECT @@global.ROCKSDB_STATS_LEVEL;
+@@global.ROCKSDB_STATS_LEVEL
+0
+"Trying to set variable @@session.ROCKSDB_STATS_LEVEL to 444. It should fail because it is not session."
+SET @@session.ROCKSDB_STATS_LEVEL = 444;
+ERROR HY000: Variable 'rocksdb_stats_level' is a GLOBAL variable and should be set with SET GLOBAL
+'# Testing with invalid values in global scope #'
+"Trying to set variable @@global.ROCKSDB_STATS_LEVEL to 'aaa'"
+SET @@global.ROCKSDB_STATS_LEVEL = 'aaa';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_STATS_LEVEL;
+@@global.ROCKSDB_STATS_LEVEL
+0
+"Trying to set variable @@global.ROCKSDB_STATS_LEVEL to 'bbb'"
+SET @@global.ROCKSDB_STATS_LEVEL = 'bbb';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_STATS_LEVEL;
+@@global.ROCKSDB_STATS_LEVEL
+0
+"Trying to set variable @@global.ROCKSDB_STATS_LEVEL to '-1'"
+SET @@global.ROCKSDB_STATS_LEVEL = '-1';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_STATS_LEVEL;
+@@global.ROCKSDB_STATS_LEVEL
+0
+"Trying to set variable @@global.ROCKSDB_STATS_LEVEL to '101'"
+SET @@global.ROCKSDB_STATS_LEVEL = '101';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_STATS_LEVEL;
+@@global.ROCKSDB_STATS_LEVEL
+0
+"Trying to set variable @@global.ROCKSDB_STATS_LEVEL to '484436'"
+SET @@global.ROCKSDB_STATS_LEVEL = '484436';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_STATS_LEVEL;
+@@global.ROCKSDB_STATS_LEVEL
+0
+SET @@global.ROCKSDB_STATS_LEVEL = @start_global_value;
+SELECT @@global.ROCKSDB_STATS_LEVEL;
+@@global.ROCKSDB_STATS_LEVEL
+0
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_stats_recalc_rate_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_stats_recalc_rate_basic.result
new file mode 100644
index 00000000000..4f337b84e3c
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_stats_recalc_rate_basic.result
@@ -0,0 +1,53 @@
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(100);
+INSERT INTO valid_values VALUES(1);
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+INSERT INTO invalid_values VALUES('\'123\'');
+SET @start_global_value = @@global.ROCKSDB_STATS_RECALC_RATE;
+SELECT @start_global_value;
+@start_global_value
+0
+'# Setting to valid values in global scope#'
+"Trying to set variable @@global.ROCKSDB_STATS_RECALC_RATE to 100"
+SET @@global.ROCKSDB_STATS_RECALC_RATE = 100;
+SELECT @@global.ROCKSDB_STATS_RECALC_RATE;
+@@global.ROCKSDB_STATS_RECALC_RATE
+100
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_STATS_RECALC_RATE = DEFAULT;
+SELECT @@global.ROCKSDB_STATS_RECALC_RATE;
+@@global.ROCKSDB_STATS_RECALC_RATE
+0
+"Trying to set variable @@global.ROCKSDB_STATS_RECALC_RATE to 1"
+SET @@global.ROCKSDB_STATS_RECALC_RATE = 1;
+SELECT @@global.ROCKSDB_STATS_RECALC_RATE;
+@@global.ROCKSDB_STATS_RECALC_RATE
+1
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_STATS_RECALC_RATE = DEFAULT;
+SELECT @@global.ROCKSDB_STATS_RECALC_RATE;
+@@global.ROCKSDB_STATS_RECALC_RATE
+0
+"Trying to set variable @@session.ROCKSDB_STATS_RECALC_RATE to 444. It should fail because it is not session."
+SET @@session.ROCKSDB_STATS_RECALC_RATE = 444;
+ERROR HY000: Variable 'rocksdb_stats_recalc_rate' is a GLOBAL variable and should be set with SET GLOBAL
+'# Testing with invalid values in global scope #'
+"Trying to set variable @@global.ROCKSDB_STATS_RECALC_RATE to 'aaa'"
+SET @@global.ROCKSDB_STATS_RECALC_RATE = 'aaa';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_STATS_RECALC_RATE;
+@@global.ROCKSDB_STATS_RECALC_RATE
+0
+"Trying to set variable @@global.ROCKSDB_STATS_RECALC_RATE to '123'"
+SET @@global.ROCKSDB_STATS_RECALC_RATE = '123';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_STATS_RECALC_RATE;
+@@global.ROCKSDB_STATS_RECALC_RATE
+0
+SET @@global.ROCKSDB_STATS_RECALC_RATE = @start_global_value;
+SELECT @@global.ROCKSDB_STATS_RECALC_RATE;
+@@global.ROCKSDB_STATS_RECALC_RATE
+0
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_store_row_debug_checksums_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_store_row_debug_checksums_basic.result
new file mode 100644
index 00000000000..a838d660a91
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_store_row_debug_checksums_basic.result
@@ -0,0 +1,100 @@
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(0);
+INSERT INTO valid_values VALUES('on');
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+INSERT INTO invalid_values VALUES('\'bbb\'');
+SET @start_global_value = @@global.ROCKSDB_STORE_ROW_DEBUG_CHECKSUMS;
+SELECT @start_global_value;
+@start_global_value
+0
+SET @start_session_value = @@session.ROCKSDB_STORE_ROW_DEBUG_CHECKSUMS;
+SELECT @start_session_value;
+@start_session_value
+0
+'# Setting to valid values in global scope#'
+"Trying to set variable @@global.ROCKSDB_STORE_ROW_DEBUG_CHECKSUMS to 1"
+SET @@global.ROCKSDB_STORE_ROW_DEBUG_CHECKSUMS = 1;
+SELECT @@global.ROCKSDB_STORE_ROW_DEBUG_CHECKSUMS;
+@@global.ROCKSDB_STORE_ROW_DEBUG_CHECKSUMS
+1
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_STORE_ROW_DEBUG_CHECKSUMS = DEFAULT;
+SELECT @@global.ROCKSDB_STORE_ROW_DEBUG_CHECKSUMS;
+@@global.ROCKSDB_STORE_ROW_DEBUG_CHECKSUMS
+0
+"Trying to set variable @@global.ROCKSDB_STORE_ROW_DEBUG_CHECKSUMS to 0"
+SET @@global.ROCKSDB_STORE_ROW_DEBUG_CHECKSUMS = 0;
+SELECT @@global.ROCKSDB_STORE_ROW_DEBUG_CHECKSUMS;
+@@global.ROCKSDB_STORE_ROW_DEBUG_CHECKSUMS
+0
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_STORE_ROW_DEBUG_CHECKSUMS = DEFAULT;
+SELECT @@global.ROCKSDB_STORE_ROW_DEBUG_CHECKSUMS;
+@@global.ROCKSDB_STORE_ROW_DEBUG_CHECKSUMS
+0
+"Trying to set variable @@global.ROCKSDB_STORE_ROW_DEBUG_CHECKSUMS to on"
+SET @@global.ROCKSDB_STORE_ROW_DEBUG_CHECKSUMS = on;
+SELECT @@global.ROCKSDB_STORE_ROW_DEBUG_CHECKSUMS;
+@@global.ROCKSDB_STORE_ROW_DEBUG_CHECKSUMS
+1
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_STORE_ROW_DEBUG_CHECKSUMS = DEFAULT;
+SELECT @@global.ROCKSDB_STORE_ROW_DEBUG_CHECKSUMS;
+@@global.ROCKSDB_STORE_ROW_DEBUG_CHECKSUMS
+0
+'# Setting to valid values in session scope#'
+"Trying to set variable @@session.ROCKSDB_STORE_ROW_DEBUG_CHECKSUMS to 1"
+SET @@session.ROCKSDB_STORE_ROW_DEBUG_CHECKSUMS = 1;
+SELECT @@session.ROCKSDB_STORE_ROW_DEBUG_CHECKSUMS;
+@@session.ROCKSDB_STORE_ROW_DEBUG_CHECKSUMS
+1
+"Setting the session scope variable back to default"
+SET @@session.ROCKSDB_STORE_ROW_DEBUG_CHECKSUMS = DEFAULT;
+SELECT @@session.ROCKSDB_STORE_ROW_DEBUG_CHECKSUMS;
+@@session.ROCKSDB_STORE_ROW_DEBUG_CHECKSUMS
+0
+"Trying to set variable @@session.ROCKSDB_STORE_ROW_DEBUG_CHECKSUMS to 0"
+SET @@session.ROCKSDB_STORE_ROW_DEBUG_CHECKSUMS = 0;
+SELECT @@session.ROCKSDB_STORE_ROW_DEBUG_CHECKSUMS;
+@@session.ROCKSDB_STORE_ROW_DEBUG_CHECKSUMS
+0
+"Setting the session scope variable back to default"
+SET @@session.ROCKSDB_STORE_ROW_DEBUG_CHECKSUMS = DEFAULT;
+SELECT @@session.ROCKSDB_STORE_ROW_DEBUG_CHECKSUMS;
+@@session.ROCKSDB_STORE_ROW_DEBUG_CHECKSUMS
+0
+"Trying to set variable @@session.ROCKSDB_STORE_ROW_DEBUG_CHECKSUMS to on"
+SET @@session.ROCKSDB_STORE_ROW_DEBUG_CHECKSUMS = on;
+SELECT @@session.ROCKSDB_STORE_ROW_DEBUG_CHECKSUMS;
+@@session.ROCKSDB_STORE_ROW_DEBUG_CHECKSUMS
+1
+"Setting the session scope variable back to default"
+SET @@session.ROCKSDB_STORE_ROW_DEBUG_CHECKSUMS = DEFAULT;
+SELECT @@session.ROCKSDB_STORE_ROW_DEBUG_CHECKSUMS;
+@@session.ROCKSDB_STORE_ROW_DEBUG_CHECKSUMS
+0
+'# Testing with invalid values in global scope #'
+"Trying to set variable @@global.ROCKSDB_STORE_ROW_DEBUG_CHECKSUMS to 'aaa'"
+SET @@global.ROCKSDB_STORE_ROW_DEBUG_CHECKSUMS = 'aaa';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_STORE_ROW_DEBUG_CHECKSUMS;
+@@global.ROCKSDB_STORE_ROW_DEBUG_CHECKSUMS
+0
+"Trying to set variable @@global.ROCKSDB_STORE_ROW_DEBUG_CHECKSUMS to 'bbb'"
+SET @@global.ROCKSDB_STORE_ROW_DEBUG_CHECKSUMS = 'bbb';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_STORE_ROW_DEBUG_CHECKSUMS;
+@@global.ROCKSDB_STORE_ROW_DEBUG_CHECKSUMS
+0
+SET @@global.ROCKSDB_STORE_ROW_DEBUG_CHECKSUMS = @start_global_value;
+SELECT @@global.ROCKSDB_STORE_ROW_DEBUG_CHECKSUMS;
+@@global.ROCKSDB_STORE_ROW_DEBUG_CHECKSUMS
+0
+SET @@session.ROCKSDB_STORE_ROW_DEBUG_CHECKSUMS = @start_session_value;
+SELECT @@session.ROCKSDB_STORE_ROW_DEBUG_CHECKSUMS;
+@@session.ROCKSDB_STORE_ROW_DEBUG_CHECKSUMS
+0
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_strict_collation_check_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_strict_collation_check_basic.result
new file mode 100644
index 00000000000..46d238d1fa3
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_strict_collation_check_basic.result
@@ -0,0 +1,75 @@
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(0);
+INSERT INTO valid_values VALUES('on');
+INSERT INTO valid_values VALUES('off');
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+INSERT INTO invalid_values VALUES('\'bbb\'');
+SET @start_global_value = @@global.ROCKSDB_STRICT_COLLATION_CHECK;
+SELECT @start_global_value;
+@start_global_value
+1
+'# Setting to valid values in global scope#'
+"Trying to set variable @@global.ROCKSDB_STRICT_COLLATION_CHECK to 1"
+SET @@global.ROCKSDB_STRICT_COLLATION_CHECK = 1;
+SELECT @@global.ROCKSDB_STRICT_COLLATION_CHECK;
+@@global.ROCKSDB_STRICT_COLLATION_CHECK
+1
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_STRICT_COLLATION_CHECK = DEFAULT;
+SELECT @@global.ROCKSDB_STRICT_COLLATION_CHECK;
+@@global.ROCKSDB_STRICT_COLLATION_CHECK
+1
+"Trying to set variable @@global.ROCKSDB_STRICT_COLLATION_CHECK to 0"
+SET @@global.ROCKSDB_STRICT_COLLATION_CHECK = 0;
+SELECT @@global.ROCKSDB_STRICT_COLLATION_CHECK;
+@@global.ROCKSDB_STRICT_COLLATION_CHECK
+0
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_STRICT_COLLATION_CHECK = DEFAULT;
+SELECT @@global.ROCKSDB_STRICT_COLLATION_CHECK;
+@@global.ROCKSDB_STRICT_COLLATION_CHECK
+1
+"Trying to set variable @@global.ROCKSDB_STRICT_COLLATION_CHECK to on"
+SET @@global.ROCKSDB_STRICT_COLLATION_CHECK = on;
+SELECT @@global.ROCKSDB_STRICT_COLLATION_CHECK;
+@@global.ROCKSDB_STRICT_COLLATION_CHECK
+1
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_STRICT_COLLATION_CHECK = DEFAULT;
+SELECT @@global.ROCKSDB_STRICT_COLLATION_CHECK;
+@@global.ROCKSDB_STRICT_COLLATION_CHECK
+1
+"Trying to set variable @@global.ROCKSDB_STRICT_COLLATION_CHECK to off"
+SET @@global.ROCKSDB_STRICT_COLLATION_CHECK = off;
+SELECT @@global.ROCKSDB_STRICT_COLLATION_CHECK;
+@@global.ROCKSDB_STRICT_COLLATION_CHECK
+0
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_STRICT_COLLATION_CHECK = DEFAULT;
+SELECT @@global.ROCKSDB_STRICT_COLLATION_CHECK;
+@@global.ROCKSDB_STRICT_COLLATION_CHECK
+1
+"Trying to set variable @@session.ROCKSDB_STRICT_COLLATION_CHECK to 444. It should fail because it is not session."
+SET @@session.ROCKSDB_STRICT_COLLATION_CHECK = 444;
+ERROR HY000: Variable 'rocksdb_strict_collation_check' is a GLOBAL variable and should be set with SET GLOBAL
+'# Testing with invalid values in global scope #'
+"Trying to set variable @@global.ROCKSDB_STRICT_COLLATION_CHECK to 'aaa'"
+SET @@global.ROCKSDB_STRICT_COLLATION_CHECK = 'aaa';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_STRICT_COLLATION_CHECK;
+@@global.ROCKSDB_STRICT_COLLATION_CHECK
+1
+"Trying to set variable @@global.ROCKSDB_STRICT_COLLATION_CHECK to 'bbb'"
+SET @@global.ROCKSDB_STRICT_COLLATION_CHECK = 'bbb';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_STRICT_COLLATION_CHECK;
+@@global.ROCKSDB_STRICT_COLLATION_CHECK
+1
+SET @@global.ROCKSDB_STRICT_COLLATION_CHECK = @start_global_value;
+SELECT @@global.ROCKSDB_STRICT_COLLATION_CHECK;
+@@global.ROCKSDB_STRICT_COLLATION_CHECK
+1
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_strict_collation_exceptions_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_strict_collation_exceptions_basic.result
new file mode 100644
index 00000000000..5f748621d25
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_strict_collation_exceptions_basic.result
@@ -0,0 +1,36 @@
+SET @start_global_value = @@global.ROCKSDB_STRICT_COLLATION_EXCEPTIONS;
+SELECT @start_global_value;
+@start_global_value
+
+"Trying to set @session.ROCKSDB_STRICT_COLLATION_EXCEPTIONS to simple table name."
+SET @@global.ROCKSDB_STRICT_COLLATION_EXCEPTIONS = mytable;
+SELECT @@global.ROCKSDB_STRICT_COLLATION_EXCEPTIONS;
+@@global.ROCKSDB_STRICT_COLLATION_EXCEPTIONS
+mytable
+"Trying to set @session.ROCKSDB_STRICT_COLLATION_EXCEPTIONS to regex table name(s)."
+SET @@global.ROCKSDB_STRICT_COLLATION_EXCEPTIONS = "t.*";
+SELECT @@global.ROCKSDB_STRICT_COLLATION_EXCEPTIONS;
+@@global.ROCKSDB_STRICT_COLLATION_EXCEPTIONS
+t.*
+"Trying to set @session.ROCKSDB_STRICT_COLLATION_EXCEPTIONS to multiple regex table names."
+SET @@global.ROCKSDB_STRICT_COLLATION_EXCEPTIONS = "s.*,t.*";
+SELECT @@global.ROCKSDB_STRICT_COLLATION_EXCEPTIONS;
+@@global.ROCKSDB_STRICT_COLLATION_EXCEPTIONS
+s.*,t.*
+"Trying to set @session.ROCKSDB_STRICT_COLLATION_EXCEPTIONS to empty."
+SET @@global.ROCKSDB_STRICT_COLLATION_EXCEPTIONS = "";
+SELECT @@global.ROCKSDB_STRICT_COLLATION_EXCEPTIONS;
+@@global.ROCKSDB_STRICT_COLLATION_EXCEPTIONS
+
+"Trying to set @session.ROCKSDB_STRICT_COLLATION_EXCEPTIONS to default."
+SET @@global.ROCKSDB_STRICT_COLLATION_EXCEPTIONS = DEFAULT;
+SELECT @@global.ROCKSDB_STRICT_COLLATION_EXCEPTIONS;
+@@global.ROCKSDB_STRICT_COLLATION_EXCEPTIONS
+
+"Trying to set @session.ROCKSDB_STRICT_COLLATION_EXCEPTIONS to 444. It should fail because it is not session."
+SET @@session.ROCKSDB_STRICT_COLLATION_EXCEPTIONS = 444;
+ERROR HY000: Variable 'rocksdb_strict_collation_exceptions' is a GLOBAL variable and should be set with SET GLOBAL
+SET @@global.ROCKSDB_STRICT_COLLATION_EXCEPTIONS = @start_global_value;
+SELECT @@global.ROCKSDB_STRICT_COLLATION_EXCEPTIONS;
+@@global.ROCKSDB_STRICT_COLLATION_EXCEPTIONS
+
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_supported_compression_types_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_supported_compression_types_basic.result
new file mode 100644
index 00000000000..aa77d739120
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_supported_compression_types_basic.result
@@ -0,0 +1,4 @@
+SET @start_global_value = @@global.ROCKSDB_SUPPORTED_COMPRESSION_TYPES;
+"Trying to set variable @@global.ROCKSDB_SUPPORTED_COMPRESSION_TYPES to 444. It should fail because it is readonly."
+SET @@global.ROCKSDB_SUPPORTED_COMPRESSION_TYPES = 444;
+ERROR HY000: Variable 'rocksdb_supported_compression_types' is a read only variable
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_table_cache_numshardbits_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_table_cache_numshardbits_basic.result
new file mode 100644
index 00000000000..0161a339082
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_table_cache_numshardbits_basic.result
@@ -0,0 +1,7 @@
+SET @start_global_value = @@global.ROCKSDB_TABLE_CACHE_NUMSHARDBITS;
+SELECT @start_global_value;
+@start_global_value
+6
+"Trying to set variable @@global.ROCKSDB_TABLE_CACHE_NUMSHARDBITS to 444. It should fail because it is readonly."
+SET @@global.ROCKSDB_TABLE_CACHE_NUMSHARDBITS = 444;
+ERROR HY000: Variable 'rocksdb_table_cache_numshardbits' is a read only variable
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_table_stats_sampling_pct_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_table_stats_sampling_pct_basic.result
new file mode 100644
index 00000000000..6ff47ab9569
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_table_stats_sampling_pct_basic.result
@@ -0,0 +1,85 @@
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(100);
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(0);
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+INSERT INTO invalid_values VALUES('\'bbb\'');
+INSERT INTO invalid_values VALUES('\'-1\'');
+INSERT INTO invalid_values VALUES('\'101\'');
+INSERT INTO invalid_values VALUES('\'484436\'');
+SET @start_global_value = @@global.ROCKSDB_TABLE_STATS_SAMPLING_PCT;
+SELECT @start_global_value;
+@start_global_value
+10
+'# Setting to valid values in global scope#'
+"Trying to set variable @@global.ROCKSDB_TABLE_STATS_SAMPLING_PCT to 100"
+SET @@global.ROCKSDB_TABLE_STATS_SAMPLING_PCT = 100;
+SELECT @@global.ROCKSDB_TABLE_STATS_SAMPLING_PCT;
+@@global.ROCKSDB_TABLE_STATS_SAMPLING_PCT
+100
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_TABLE_STATS_SAMPLING_PCT = DEFAULT;
+SELECT @@global.ROCKSDB_TABLE_STATS_SAMPLING_PCT;
+@@global.ROCKSDB_TABLE_STATS_SAMPLING_PCT
+10
+"Trying to set variable @@global.ROCKSDB_TABLE_STATS_SAMPLING_PCT to 1"
+SET @@global.ROCKSDB_TABLE_STATS_SAMPLING_PCT = 1;
+SELECT @@global.ROCKSDB_TABLE_STATS_SAMPLING_PCT;
+@@global.ROCKSDB_TABLE_STATS_SAMPLING_PCT
+1
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_TABLE_STATS_SAMPLING_PCT = DEFAULT;
+SELECT @@global.ROCKSDB_TABLE_STATS_SAMPLING_PCT;
+@@global.ROCKSDB_TABLE_STATS_SAMPLING_PCT
+10
+"Trying to set variable @@global.ROCKSDB_TABLE_STATS_SAMPLING_PCT to 0"
+SET @@global.ROCKSDB_TABLE_STATS_SAMPLING_PCT = 0;
+SELECT @@global.ROCKSDB_TABLE_STATS_SAMPLING_PCT;
+@@global.ROCKSDB_TABLE_STATS_SAMPLING_PCT
+0
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_TABLE_STATS_SAMPLING_PCT = DEFAULT;
+SELECT @@global.ROCKSDB_TABLE_STATS_SAMPLING_PCT;
+@@global.ROCKSDB_TABLE_STATS_SAMPLING_PCT
+10
+"Trying to set variable @@session.ROCKSDB_TABLE_STATS_SAMPLING_PCT to 444. It should fail because it is not session."
+SET @@session.ROCKSDB_TABLE_STATS_SAMPLING_PCT = 444;
+ERROR HY000: Variable 'rocksdb_table_stats_sampling_pct' is a GLOBAL variable and should be set with SET GLOBAL
+'# Testing with invalid values in global scope #'
+"Trying to set variable @@global.ROCKSDB_TABLE_STATS_SAMPLING_PCT to 'aaa'"
+SET @@global.ROCKSDB_TABLE_STATS_SAMPLING_PCT = 'aaa';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_TABLE_STATS_SAMPLING_PCT;
+@@global.ROCKSDB_TABLE_STATS_SAMPLING_PCT
+10
+"Trying to set variable @@global.ROCKSDB_TABLE_STATS_SAMPLING_PCT to 'bbb'"
+SET @@global.ROCKSDB_TABLE_STATS_SAMPLING_PCT = 'bbb';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_TABLE_STATS_SAMPLING_PCT;
+@@global.ROCKSDB_TABLE_STATS_SAMPLING_PCT
+10
+"Trying to set variable @@global.ROCKSDB_TABLE_STATS_SAMPLING_PCT to '-1'"
+SET @@global.ROCKSDB_TABLE_STATS_SAMPLING_PCT = '-1';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_TABLE_STATS_SAMPLING_PCT;
+@@global.ROCKSDB_TABLE_STATS_SAMPLING_PCT
+10
+"Trying to set variable @@global.ROCKSDB_TABLE_STATS_SAMPLING_PCT to '101'"
+SET @@global.ROCKSDB_TABLE_STATS_SAMPLING_PCT = '101';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_TABLE_STATS_SAMPLING_PCT;
+@@global.ROCKSDB_TABLE_STATS_SAMPLING_PCT
+10
+"Trying to set variable @@global.ROCKSDB_TABLE_STATS_SAMPLING_PCT to '484436'"
+SET @@global.ROCKSDB_TABLE_STATS_SAMPLING_PCT = '484436';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_TABLE_STATS_SAMPLING_PCT;
+@@global.ROCKSDB_TABLE_STATS_SAMPLING_PCT
+10
+SET @@global.ROCKSDB_TABLE_STATS_SAMPLING_PCT = @start_global_value;
+SELECT @@global.ROCKSDB_TABLE_STATS_SAMPLING_PCT;
+@@global.ROCKSDB_TABLE_STATS_SAMPLING_PCT
+10
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_tmpdir_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_tmpdir_basic.result
new file mode 100644
index 00000000000..25b19ee56a4
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_tmpdir_basic.result
@@ -0,0 +1,29 @@
+SET @start_global_value = @@global.rocksdb_tmpdir;
+SELECT @start_global_value;
+@start_global_value
+
+select @@session.rocksdb_tmpdir;
+@@session.rocksdb_tmpdir
+
+show global variables like 'rocksdb_tmpdir';
+Variable_name Value
+rocksdb_tmpdir
+show session variables like 'rocksdb_tmpdir';
+Variable_name Value
+rocksdb_tmpdir
+select * from information_schema.global_variables where variable_name='rocksdb_tmpdir';
+VARIABLE_NAME VARIABLE_VALUE
+ROCKSDB_TMPDIR
+select * from information_schema.session_variables where variable_name='rocksdb_tmpdir';
+VARIABLE_NAME VARIABLE_VALUE
+ROCKSDB_TMPDIR
+set global rocksdb_tmpdir='value';
+set session rocksdb_tmpdir='value';
+set global rocksdb_tmpdir=1.1;
+ERROR 42000: Incorrect argument type to variable 'rocksdb_tmpdir'
+set global rocksdb_tmpdir=1e1;
+ERROR 42000: Incorrect argument type to variable 'rocksdb_tmpdir'
+SET @@global.rocksdb_tmpdir = @start_global_value;
+SELECT @@global.rocksdb_tmpdir;
+@@global.rocksdb_tmpdir
+
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_trace_sst_api_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_trace_sst_api_basic.result
new file mode 100644
index 00000000000..d4ffde80001
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_trace_sst_api_basic.result
@@ -0,0 +1,100 @@
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(0);
+INSERT INTO valid_values VALUES('on');
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+INSERT INTO invalid_values VALUES('\'bbb\'');
+SET @start_global_value = @@global.ROCKSDB_TRACE_SST_API;
+SELECT @start_global_value;
+@start_global_value
+0
+SET @start_session_value = @@session.ROCKSDB_TRACE_SST_API;
+SELECT @start_session_value;
+@start_session_value
+0
+'# Setting to valid values in global scope#'
+"Trying to set variable @@global.ROCKSDB_TRACE_SST_API to 1"
+SET @@global.ROCKSDB_TRACE_SST_API = 1;
+SELECT @@global.ROCKSDB_TRACE_SST_API;
+@@global.ROCKSDB_TRACE_SST_API
+1
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_TRACE_SST_API = DEFAULT;
+SELECT @@global.ROCKSDB_TRACE_SST_API;
+@@global.ROCKSDB_TRACE_SST_API
+0
+"Trying to set variable @@global.ROCKSDB_TRACE_SST_API to 0"
+SET @@global.ROCKSDB_TRACE_SST_API = 0;
+SELECT @@global.ROCKSDB_TRACE_SST_API;
+@@global.ROCKSDB_TRACE_SST_API
+0
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_TRACE_SST_API = DEFAULT;
+SELECT @@global.ROCKSDB_TRACE_SST_API;
+@@global.ROCKSDB_TRACE_SST_API
+0
+"Trying to set variable @@global.ROCKSDB_TRACE_SST_API to on"
+SET @@global.ROCKSDB_TRACE_SST_API = on;
+SELECT @@global.ROCKSDB_TRACE_SST_API;
+@@global.ROCKSDB_TRACE_SST_API
+1
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_TRACE_SST_API = DEFAULT;
+SELECT @@global.ROCKSDB_TRACE_SST_API;
+@@global.ROCKSDB_TRACE_SST_API
+0
+'# Setting to valid values in session scope#'
+"Trying to set variable @@session.ROCKSDB_TRACE_SST_API to 1"
+SET @@session.ROCKSDB_TRACE_SST_API = 1;
+SELECT @@session.ROCKSDB_TRACE_SST_API;
+@@session.ROCKSDB_TRACE_SST_API
+1
+"Setting the session scope variable back to default"
+SET @@session.ROCKSDB_TRACE_SST_API = DEFAULT;
+SELECT @@session.ROCKSDB_TRACE_SST_API;
+@@session.ROCKSDB_TRACE_SST_API
+0
+"Trying to set variable @@session.ROCKSDB_TRACE_SST_API to 0"
+SET @@session.ROCKSDB_TRACE_SST_API = 0;
+SELECT @@session.ROCKSDB_TRACE_SST_API;
+@@session.ROCKSDB_TRACE_SST_API
+0
+"Setting the session scope variable back to default"
+SET @@session.ROCKSDB_TRACE_SST_API = DEFAULT;
+SELECT @@session.ROCKSDB_TRACE_SST_API;
+@@session.ROCKSDB_TRACE_SST_API
+0
+"Trying to set variable @@session.ROCKSDB_TRACE_SST_API to on"
+SET @@session.ROCKSDB_TRACE_SST_API = on;
+SELECT @@session.ROCKSDB_TRACE_SST_API;
+@@session.ROCKSDB_TRACE_SST_API
+1
+"Setting the session scope variable back to default"
+SET @@session.ROCKSDB_TRACE_SST_API = DEFAULT;
+SELECT @@session.ROCKSDB_TRACE_SST_API;
+@@session.ROCKSDB_TRACE_SST_API
+0
+'# Testing with invalid values in global scope #'
+"Trying to set variable @@global.ROCKSDB_TRACE_SST_API to 'aaa'"
+SET @@global.ROCKSDB_TRACE_SST_API = 'aaa';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_TRACE_SST_API;
+@@global.ROCKSDB_TRACE_SST_API
+0
+"Trying to set variable @@global.ROCKSDB_TRACE_SST_API to 'bbb'"
+SET @@global.ROCKSDB_TRACE_SST_API = 'bbb';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_TRACE_SST_API;
+@@global.ROCKSDB_TRACE_SST_API
+0
+SET @@global.ROCKSDB_TRACE_SST_API = @start_global_value;
+SELECT @@global.ROCKSDB_TRACE_SST_API;
+@@global.ROCKSDB_TRACE_SST_API
+0
+SET @@session.ROCKSDB_TRACE_SST_API = @start_session_value;
+SELECT @@session.ROCKSDB_TRACE_SST_API;
+@@session.ROCKSDB_TRACE_SST_API
+0
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_two_write_queues_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_two_write_queues_basic.result
new file mode 100644
index 00000000000..5a19016bf91
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_two_write_queues_basic.result
@@ -0,0 +1,14 @@
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(1024);
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+SET @start_global_value = @@global.ROCKSDB_TWO_WRITE_QUEUES;
+SELECT @start_global_value;
+@start_global_value
+1
+"Trying to set variable @@global.ROCKSDB_TWO_WRITE_QUEUES to 444. It should fail because it is readonly."
+SET @@global.ROCKSDB_TWO_WRITE_QUEUES = 444;
+ERROR HY000: Variable 'rocksdb_two_write_queues' is a read only variable
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_unsafe_for_binlog_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_unsafe_for_binlog_basic.result
new file mode 100644
index 00000000000..c9748cc6306
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_unsafe_for_binlog_basic.result
@@ -0,0 +1,100 @@
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(0);
+INSERT INTO valid_values VALUES('on');
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+INSERT INTO invalid_values VALUES('\'bbb\'');
+SET @start_global_value = @@global.ROCKSDB_UNSAFE_FOR_BINLOG;
+SELECT @start_global_value;
+@start_global_value
+0
+SET @start_session_value = @@session.ROCKSDB_UNSAFE_FOR_BINLOG;
+SELECT @start_session_value;
+@start_session_value
+0
+'# Setting to valid values in global scope#'
+"Trying to set variable @@global.ROCKSDB_UNSAFE_FOR_BINLOG to 1"
+SET @@global.ROCKSDB_UNSAFE_FOR_BINLOG = 1;
+SELECT @@global.ROCKSDB_UNSAFE_FOR_BINLOG;
+@@global.ROCKSDB_UNSAFE_FOR_BINLOG
+1
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_UNSAFE_FOR_BINLOG = DEFAULT;
+SELECT @@global.ROCKSDB_UNSAFE_FOR_BINLOG;
+@@global.ROCKSDB_UNSAFE_FOR_BINLOG
+0
+"Trying to set variable @@global.ROCKSDB_UNSAFE_FOR_BINLOG to 0"
+SET @@global.ROCKSDB_UNSAFE_FOR_BINLOG = 0;
+SELECT @@global.ROCKSDB_UNSAFE_FOR_BINLOG;
+@@global.ROCKSDB_UNSAFE_FOR_BINLOG
+0
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_UNSAFE_FOR_BINLOG = DEFAULT;
+SELECT @@global.ROCKSDB_UNSAFE_FOR_BINLOG;
+@@global.ROCKSDB_UNSAFE_FOR_BINLOG
+0
+"Trying to set variable @@global.ROCKSDB_UNSAFE_FOR_BINLOG to on"
+SET @@global.ROCKSDB_UNSAFE_FOR_BINLOG = on;
+SELECT @@global.ROCKSDB_UNSAFE_FOR_BINLOG;
+@@global.ROCKSDB_UNSAFE_FOR_BINLOG
+1
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_UNSAFE_FOR_BINLOG = DEFAULT;
+SELECT @@global.ROCKSDB_UNSAFE_FOR_BINLOG;
+@@global.ROCKSDB_UNSAFE_FOR_BINLOG
+0
+'# Setting to valid values in session scope#'
+"Trying to set variable @@session.ROCKSDB_UNSAFE_FOR_BINLOG to 1"
+SET @@session.ROCKSDB_UNSAFE_FOR_BINLOG = 1;
+SELECT @@session.ROCKSDB_UNSAFE_FOR_BINLOG;
+@@session.ROCKSDB_UNSAFE_FOR_BINLOG
+1
+"Setting the session scope variable back to default"
+SET @@session.ROCKSDB_UNSAFE_FOR_BINLOG = DEFAULT;
+SELECT @@session.ROCKSDB_UNSAFE_FOR_BINLOG;
+@@session.ROCKSDB_UNSAFE_FOR_BINLOG
+0
+"Trying to set variable @@session.ROCKSDB_UNSAFE_FOR_BINLOG to 0"
+SET @@session.ROCKSDB_UNSAFE_FOR_BINLOG = 0;
+SELECT @@session.ROCKSDB_UNSAFE_FOR_BINLOG;
+@@session.ROCKSDB_UNSAFE_FOR_BINLOG
+0
+"Setting the session scope variable back to default"
+SET @@session.ROCKSDB_UNSAFE_FOR_BINLOG = DEFAULT;
+SELECT @@session.ROCKSDB_UNSAFE_FOR_BINLOG;
+@@session.ROCKSDB_UNSAFE_FOR_BINLOG
+0
+"Trying to set variable @@session.ROCKSDB_UNSAFE_FOR_BINLOG to on"
+SET @@session.ROCKSDB_UNSAFE_FOR_BINLOG = on;
+SELECT @@session.ROCKSDB_UNSAFE_FOR_BINLOG;
+@@session.ROCKSDB_UNSAFE_FOR_BINLOG
+1
+"Setting the session scope variable back to default"
+SET @@session.ROCKSDB_UNSAFE_FOR_BINLOG = DEFAULT;
+SELECT @@session.ROCKSDB_UNSAFE_FOR_BINLOG;
+@@session.ROCKSDB_UNSAFE_FOR_BINLOG
+0
+'# Testing with invalid values in global scope #'
+"Trying to set variable @@global.ROCKSDB_UNSAFE_FOR_BINLOG to 'aaa'"
+SET @@global.ROCKSDB_UNSAFE_FOR_BINLOG = 'aaa';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_UNSAFE_FOR_BINLOG;
+@@global.ROCKSDB_UNSAFE_FOR_BINLOG
+0
+"Trying to set variable @@global.ROCKSDB_UNSAFE_FOR_BINLOG to 'bbb'"
+SET @@global.ROCKSDB_UNSAFE_FOR_BINLOG = 'bbb';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_UNSAFE_FOR_BINLOG;
+@@global.ROCKSDB_UNSAFE_FOR_BINLOG
+0
+SET @@global.ROCKSDB_UNSAFE_FOR_BINLOG = @start_global_value;
+SELECT @@global.ROCKSDB_UNSAFE_FOR_BINLOG;
+@@global.ROCKSDB_UNSAFE_FOR_BINLOG
+0
+SET @@session.ROCKSDB_UNSAFE_FOR_BINLOG = @start_session_value;
+SELECT @@session.ROCKSDB_UNSAFE_FOR_BINLOG;
+@@session.ROCKSDB_UNSAFE_FOR_BINLOG
+0
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_update_cf_options.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_update_cf_options.result
new file mode 100644
index 00000000000..126b4cffe8b
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_update_cf_options.result
@@ -0,0 +1,38 @@
+CREATE TABLE t1 (a INT, PRIMARY KEY (a) COMMENT 'update_cf1') ENGINE=ROCKSDB;
+SET @@GLOBAL.ROCKSDB_UPDATE_CF_OPTIONS='update_cf1={write_buffer_size=8m;target_file_size_base=2m};';
+SELECT @@global.rocksdb_update_cf_options;
+@@global.rocksdb_update_cf_options
+update_cf1={write_buffer_size=8m;target_file_size_base=2m};
+SET @@GLOBAL.ROCKSDB_UPDATE_CF_OPTIONS=NULL;
+SHOW GLOBAL VARIABLES LIKE 'rocksdb_update_cf_options';
+Variable_name Value
+rocksdb_update_cf_options
+SET @@GLOBAL.ROCKSDB_UPDATE_CF_OPTIONS=NULL;
+SHOW GLOBAL VARIABLES LIKE 'rocksdb_update_cf_options';
+Variable_name Value
+rocksdb_update_cf_options
+SET @@GLOBAL.ROCKSDB_UPDATE_CF_OPTIONS="";
+SHOW GLOBAL VARIABLES LIKE 'rocksdb_update_cf_options';
+Variable_name Value
+rocksdb_update_cf_options
+SET @@GLOBAL.ROCKSDB_UPDATE_CF_OPTIONS=NULL;
+SHOW GLOBAL VARIABLES LIKE 'rocksdb_update_cf_options';
+Variable_name Value
+rocksdb_update_cf_options
+SET @@GLOBAL.ROCKSDB_UPDATE_CF_OPTIONS='update_cf1={write_buffer_size=8m;target_file_size_base=2m};';
+SHOW GLOBAL VARIABLES LIKE 'rocksdb_update_cf_options';
+Variable_name Value
+rocksdb_update_cf_options update_cf1={write_buffer_size=8m;target_file_size_base=2m};
+SET @@GLOBAL.ROCKSDB_UPDATE_CF_OPTIONS='update_cf2={write_buffer_size=8m;target_file_size_base=2m};';
+SHOW GLOBAL VARIABLES LIKE 'rocksdb_update_cf_options';
+Variable_name Value
+rocksdb_update_cf_options update_cf2={write_buffer_size=8m;target_file_size_base=2m};
+DROP TABLE t1;
+SET @@GLOBAL.ROCKSDB_UPDATE_CF_OPTIONS='update_cf1={write_buffer_size=8m;target_file_size_base=2m};';
+SHOW GLOBAL VARIABLES LIKE 'rocksdb_update_cf_options';
+Variable_name Value
+rocksdb_update_cf_options update_cf1={write_buffer_size=8m;target_file_size_base=2m};
+SET @@GLOBAL.ROCKSDB_UPDATE_CF_OPTIONS=DEFAULT;
+SHOW GLOBAL VARIABLES LIKE 'rocksdb_update_cf_options';
+Variable_name Value
+rocksdb_update_cf_options
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_update_cf_options_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_update_cf_options_basic.result
new file mode 100644
index 00000000000..dcc7e1f68db
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_update_cf_options_basic.result
@@ -0,0 +1,126 @@
+call mtr.add_suppression("MyRocks: NULL is not a valid option for updates to column family settings.");
+call mtr.add_suppression("Invalid cf options, '=' expected *");
+call mtr.add_suppression("MyRocks: failed to parse the updated column family options = *");
+call mtr.add_suppression("Invalid cf config for default in override options *");
+DROP TABLE IF EXISTS t1;
+Warnings:
+Note 1051 Unknown table 'test.t1'
+CREATE TABLE `t1` (
+`col1` bigint(20) NOT NULL,
+`col2` varbinary(64) NOT NULL,
+`col3` varbinary(256) NOT NULL,
+`col4` bigint(20) NOT NULL,
+`col5` mediumblob NOT NULL,
+PRIMARY KEY (`col1`,`col2`,`col3`) COMMENT 'custom_p0_cfname=cf1;custom_p1_cfname=cf2',
+UNIQUE KEY (`col2`, `col4`) COMMENT 'custom_p5_cfname=cf3'
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+PARTITION BY LIST COLUMNS (`col2`) (
+PARTITION custom_p0 VALUES IN (0x12345),
+PARTITION custom_p1 VALUES IN (0x23456),
+PARTITION custom_p2 VALUES IN (0x34567),
+PARTITION custom_p3 VALUES IN (0x45678),
+PARTITION custom_p4 VALUES IN (0x56789),
+PARTITION custom_p5 VALUES IN (0x6789A),
+PARTITION custom_p6 VALUES IN (0x789AB),
+PARTITION custom_p7 VALUES IN (0x89ABC)
+);
+USE information_schema;
+SELECT @@global.rocksdb_update_cf_options;
+@@global.rocksdb_update_cf_options
+NULL
+SET @@global.rocksdb_update_cf_options = NULL;
+SELECT @@global.rocksdb_update_cf_options;
+@@global.rocksdb_update_cf_options
+NULL
+SET @@global.rocksdb_update_cf_options = NULL;
+SELECT @@global.rocksdb_update_cf_options;
+@@global.rocksdb_update_cf_options
+NULL
+SET @@global.rocksdb_update_cf_options = '';
+SELECT @@global.rocksdb_update_cf_options;
+@@global.rocksdb_update_cf_options
+
+SET @@global.rocksdb_update_cf_options = 'aaaaa';;
+ERROR 42000: Variable 'rocksdb_update_cf_options' can't be set to the value of 'aaaaa'
+SELECT @@global.rocksdb_update_cf_options;
+@@global.rocksdb_update_cf_options
+
+SELECT * FROM ROCKSDB_CF_OPTIONS WHERE CF_NAME='default' AND OPTION_TYPE='WRITE_BUFFER_SIZE';
+CF_NAME OPTION_TYPE VALUE
+default WRITE_BUFFER_SIZE 67108864
+SELECT * FROM ROCKSDB_CF_OPTIONS WHERE CF_NAME='default' AND OPTION_TYPE='TARGET_FILE_SIZE_BASE';
+CF_NAME OPTION_TYPE VALUE
+default TARGET_FILE_SIZE_BASE 67108864
+SET @@global.rocksdb_update_cf_options = 'default={write_buffer_size=8m;target_file_size_base=2m};';
+SELECT @@global.rocksdb_update_cf_options;
+@@global.rocksdb_update_cf_options
+default={write_buffer_size=8m;target_file_size_base=2m};
+SELECT * FROM ROCKSDB_CF_OPTIONS WHERE CF_NAME='default' AND OPTION_TYPE='WRITE_BUFFER_SIZE';
+CF_NAME OPTION_TYPE VALUE
+default WRITE_BUFFER_SIZE 8388608
+SELECT * FROM ROCKSDB_CF_OPTIONS WHERE CF_NAME='default' AND OPTION_TYPE='TARGET_FILE_SIZE_BASE';
+CF_NAME OPTION_TYPE VALUE
+default TARGET_FILE_SIZE_BASE 2097152
+SELECT * FROM ROCKSDB_CF_OPTIONS WHERE CF_NAME='cf1' AND OPTION_TYPE='WRITE_BUFFER_SIZE';
+CF_NAME OPTION_TYPE VALUE
+cf1 WRITE_BUFFER_SIZE 67108864
+SELECT * FROM ROCKSDB_CF_OPTIONS WHERE CF_NAME='cf1' AND OPTION_TYPE='TARGET_FILE_SIZE_BASE';
+CF_NAME OPTION_TYPE VALUE
+cf1 TARGET_FILE_SIZE_BASE 67108864
+SELECT * FROM ROCKSDB_CF_OPTIONS WHERE CF_NAME='cf2' AND OPTION_TYPE='WRITE_BUFFER_SIZE';
+CF_NAME OPTION_TYPE VALUE
+cf2 WRITE_BUFFER_SIZE 67108864
+SELECT * FROM ROCKSDB_CF_OPTIONS WHERE CF_NAME='cf2' AND OPTION_TYPE='MAX_BYTES_FOR_LEVEL_MULTIPLIER';
+CF_NAME OPTION_TYPE VALUE
+cf2 MAX_BYTES_FOR_LEVEL_MULTIPLIER 10.000000
+SELECT * FROM ROCKSDB_CF_OPTIONS WHERE CF_NAME='cf3' AND OPTION_TYPE='TARGET_FILE_SIZE_BASE';
+CF_NAME OPTION_TYPE VALUE
+cf3 TARGET_FILE_SIZE_BASE 67108864
+SET @@global.rocksdb_update_cf_options = 'cf1={write_buffer_size=8m;target_file_size_base=2m};cf2={write_buffer_size=16m;max_bytes_for_level_multiplier=8};cf3={target_file_size_base=4m};';
+SELECT @@global.rocksdb_update_cf_options;
+@@global.rocksdb_update_cf_options
+cf1={write_buffer_size=8m;target_file_size_base=2m};cf2={write_buffer_size=16m;max_bytes_for_level_multiplier=8};cf3={target_file_size_base=4m};
+SELECT * FROM ROCKSDB_CF_OPTIONS WHERE CF_NAME='cf1' AND OPTION_TYPE='WRITE_BUFFER_SIZE';
+CF_NAME OPTION_TYPE VALUE
+cf1 WRITE_BUFFER_SIZE 8388608
+SELECT * FROM ROCKSDB_CF_OPTIONS WHERE CF_NAME='cf1' AND OPTION_TYPE='TARGET_FILE_SIZE_BASE';
+CF_NAME OPTION_TYPE VALUE
+cf1 TARGET_FILE_SIZE_BASE 2097152
+SELECT * FROM ROCKSDB_CF_OPTIONS WHERE CF_NAME='cf2' AND OPTION_TYPE='WRITE_BUFFER_SIZE';
+CF_NAME OPTION_TYPE VALUE
+cf2 WRITE_BUFFER_SIZE 16777216
+SELECT * FROM ROCKSDB_CF_OPTIONS WHERE CF_NAME='cf2' AND OPTION_TYPE='MAX_BYTES_FOR_LEVEL_MULTIPLIER';
+CF_NAME OPTION_TYPE VALUE
+cf2 MAX_BYTES_FOR_LEVEL_MULTIPLIER 8.000000
+SELECT * FROM ROCKSDB_CF_OPTIONS WHERE CF_NAME='cf3' AND OPTION_TYPE='TARGET_FILE_SIZE_BASE';
+CF_NAME OPTION_TYPE VALUE
+cf3 TARGET_FILE_SIZE_BASE 4194304
+SET @@global.rocksdb_update_cf_options = 'cf3={target_file_size_base=24m};';
+SELECT @@global.rocksdb_update_cf_options;
+@@global.rocksdb_update_cf_options
+cf3={target_file_size_base=24m};
+SELECT * FROM ROCKSDB_CF_OPTIONS WHERE CF_NAME='cf3' AND OPTION_TYPE='TARGET_FILE_SIZE_BASE';
+CF_NAME OPTION_TYPE VALUE
+cf3 TARGET_FILE_SIZE_BASE 25165824
+SET @@global.rocksdb_update_cf_options = 'cf1={target_file_size_base=24m};foo={max_bytes_for_level_multiplier=8};';
+SELECT @@global.rocksdb_update_cf_options;
+@@global.rocksdb_update_cf_options
+cf1={target_file_size_base=24m};foo={max_bytes_for_level_multiplier=8};
+SELECT * FROM ROCKSDB_CF_OPTIONS WHERE CF_NAME='cf1' AND OPTION_TYPE='TARGET_FILE_SIZE_BASE';
+CF_NAME OPTION_TYPE VALUE
+cf1 TARGET_FILE_SIZE_BASE 25165824
+SET @@global.rocksdb_update_cf_options = 'default={foo=bar};';;
+ERROR 42000: Variable 'rocksdb_update_cf_options' can't be set to the value of 'default={foo=bar};'
+SELECT @@global.rocksdb_update_cf_options;
+@@global.rocksdb_update_cf_options
+cf1={target_file_size_base=24m};foo={max_bytes_for_level_multiplier=8};
+SET @@global.rocksdb_update_cf_options = 'default={write_buffer_size=67108864;target_file_size_base=67108864};';
+SET @@global.rocksdb_update_cf_options = 'cf1={write_buffer_size=67108864;target_file_size_base=67108864};';
+SET @@global.rocksdb_update_cf_options = 'cf2={write_buffer_size=67108864;target_file_size_base=67108864;max_bytes_for_level_multiplier=10.000000};';
+SET @@global.rocksdb_update_cf_options = 'cf3={write_buffer_size=67108864;target_file_size_base=67108864};';
+SET @@global.rocksdb_update_cf_options = NULL;
+SELECT @@global.rocksdb_update_cf_options;
+@@global.rocksdb_update_cf_options
+NULL
+USE test;
+DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_use_adaptive_mutex_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_use_adaptive_mutex_basic.result
new file mode 100644
index 00000000000..ef4007c7549
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_use_adaptive_mutex_basic.result
@@ -0,0 +1,7 @@
+SET @start_global_value = @@global.ROCKSDB_USE_ADAPTIVE_MUTEX;
+SELECT @start_global_value;
+@start_global_value
+0
+"Trying to set variable @@global.ROCKSDB_USE_ADAPTIVE_MUTEX to 444. It should fail because it is readonly."
+SET @@global.ROCKSDB_USE_ADAPTIVE_MUTEX = 444;
+ERROR HY000: Variable 'rocksdb_use_adaptive_mutex' is a read only variable
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_use_clock_cache_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_use_clock_cache_basic.result
new file mode 100644
index 00000000000..f23d1889027
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_use_clock_cache_basic.result
@@ -0,0 +1,19 @@
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(0);
+INSERT INTO valid_values VALUES('on');
+INSERT INTO valid_values VALUES('off');
+INSERT INTO valid_values VALUES('true');
+INSERT INTO valid_values VALUES('false');
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+INSERT INTO invalid_values VALUES('\'bbb\'');
+SET @start_global_value = @@global.ROCKSDB_USE_CLOCK_CACHE;
+SELECT @start_global_value;
+@start_global_value
+0
+"Trying to set variable @@global.ROCKSDB_USE_CLOCK_CACHE to 444. It should fail because it is readonly."
+SET @@global.ROCKSDB_USE_CLOCK_CACHE = 444;
+ERROR HY000: Variable 'rocksdb_use_clock_cache' is a read only variable
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_use_direct_io_for_flush_and_compaction_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_use_direct_io_for_flush_and_compaction_basic.result
new file mode 100644
index 00000000000..219cdb7319c
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_use_direct_io_for_flush_and_compaction_basic.result
@@ -0,0 +1,7 @@
+SET @start_global_value = @@global.ROCKSDB_USE_DIRECT_IO_FOR_FLUSH_AND_COMPACTION;
+SELECT @start_global_value;
+@start_global_value
+0
+"Trying to set variable @@global.ROCKSDB_USE_DIRECT_IO_FOR_FLUSH_AND_COMPACTION to 444. It should fail because it is readonly."
+SET @@global.ROCKSDB_USE_DIRECT_IO_FOR_FLUSH_AND_COMPACTION = 444;
+ERROR HY000: Variable 'rocksdb_use_direct_io_for_flush_and_compaction' is a read only variable
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_use_direct_reads_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_use_direct_reads_basic.result
new file mode 100644
index 00000000000..ec36c309dca
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_use_direct_reads_basic.result
@@ -0,0 +1,7 @@
+SET @start_global_value = @@global.ROCKSDB_USE_DIRECT_READS;
+SELECT @start_global_value;
+@start_global_value
+0
+"Trying to set variable @@global.ROCKSDB_USE_DIRECT_READS to 444. It should fail because it is readonly."
+SET @@global.ROCKSDB_USE_DIRECT_READS = 444;
+ERROR HY000: Variable 'rocksdb_use_direct_reads' is a read only variable
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_use_fsync_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_use_fsync_basic.result
new file mode 100644
index 00000000000..254cc2ceb5d
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_use_fsync_basic.result
@@ -0,0 +1,7 @@
+SET @start_global_value = @@global.ROCKSDB_USE_FSYNC;
+SELECT @start_global_value;
+@start_global_value
+0
+"Trying to set variable @@global.ROCKSDB_USE_FSYNC to 444. It should fail because it is readonly."
+SET @@global.ROCKSDB_USE_FSYNC = 444;
+ERROR HY000: Variable 'rocksdb_use_fsync' is a read only variable
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_validate_tables_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_validate_tables_basic.result
new file mode 100644
index 00000000000..c7b874877f8
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_validate_tables_basic.result
@@ -0,0 +1,7 @@
+SET @start_global_value = @@global.ROCKSDB_VALIDATE_TABLES;
+SELECT @start_global_value;
+@start_global_value
+1
+"Trying to set variable @@global.ROCKSDB_VALIDATE_TABLES to 444. It should fail because it is readonly."
+SET @@global.ROCKSDB_VALIDATE_TABLES = 444;
+ERROR HY000: Variable 'rocksdb_validate_tables' is a read only variable
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_verify_row_debug_checksums_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_verify_row_debug_checksums_basic.result
new file mode 100644
index 00000000000..ad71c8909a6
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_verify_row_debug_checksums_basic.result
@@ -0,0 +1,100 @@
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(0);
+INSERT INTO valid_values VALUES('on');
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+INSERT INTO invalid_values VALUES('\'bbb\'');
+SET @start_global_value = @@global.ROCKSDB_VERIFY_ROW_DEBUG_CHECKSUMS;
+SELECT @start_global_value;
+@start_global_value
+0
+SET @start_session_value = @@session.ROCKSDB_VERIFY_ROW_DEBUG_CHECKSUMS;
+SELECT @start_session_value;
+@start_session_value
+0
+'# Setting to valid values in global scope#'
+"Trying to set variable @@global.ROCKSDB_VERIFY_ROW_DEBUG_CHECKSUMS to 1"
+SET @@global.ROCKSDB_VERIFY_ROW_DEBUG_CHECKSUMS = 1;
+SELECT @@global.ROCKSDB_VERIFY_ROW_DEBUG_CHECKSUMS;
+@@global.ROCKSDB_VERIFY_ROW_DEBUG_CHECKSUMS
+1
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_VERIFY_ROW_DEBUG_CHECKSUMS = DEFAULT;
+SELECT @@global.ROCKSDB_VERIFY_ROW_DEBUG_CHECKSUMS;
+@@global.ROCKSDB_VERIFY_ROW_DEBUG_CHECKSUMS
+0
+"Trying to set variable @@global.ROCKSDB_VERIFY_ROW_DEBUG_CHECKSUMS to 0"
+SET @@global.ROCKSDB_VERIFY_ROW_DEBUG_CHECKSUMS = 0;
+SELECT @@global.ROCKSDB_VERIFY_ROW_DEBUG_CHECKSUMS;
+@@global.ROCKSDB_VERIFY_ROW_DEBUG_CHECKSUMS
+0
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_VERIFY_ROW_DEBUG_CHECKSUMS = DEFAULT;
+SELECT @@global.ROCKSDB_VERIFY_ROW_DEBUG_CHECKSUMS;
+@@global.ROCKSDB_VERIFY_ROW_DEBUG_CHECKSUMS
+0
+"Trying to set variable @@global.ROCKSDB_VERIFY_ROW_DEBUG_CHECKSUMS to on"
+SET @@global.ROCKSDB_VERIFY_ROW_DEBUG_CHECKSUMS = on;
+SELECT @@global.ROCKSDB_VERIFY_ROW_DEBUG_CHECKSUMS;
+@@global.ROCKSDB_VERIFY_ROW_DEBUG_CHECKSUMS
+1
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_VERIFY_ROW_DEBUG_CHECKSUMS = DEFAULT;
+SELECT @@global.ROCKSDB_VERIFY_ROW_DEBUG_CHECKSUMS;
+@@global.ROCKSDB_VERIFY_ROW_DEBUG_CHECKSUMS
+0
+'# Setting to valid values in session scope#'
+"Trying to set variable @@session.ROCKSDB_VERIFY_ROW_DEBUG_CHECKSUMS to 1"
+SET @@session.ROCKSDB_VERIFY_ROW_DEBUG_CHECKSUMS = 1;
+SELECT @@session.ROCKSDB_VERIFY_ROW_DEBUG_CHECKSUMS;
+@@session.ROCKSDB_VERIFY_ROW_DEBUG_CHECKSUMS
+1
+"Setting the session scope variable back to default"
+SET @@session.ROCKSDB_VERIFY_ROW_DEBUG_CHECKSUMS = DEFAULT;
+SELECT @@session.ROCKSDB_VERIFY_ROW_DEBUG_CHECKSUMS;
+@@session.ROCKSDB_VERIFY_ROW_DEBUG_CHECKSUMS
+0
+"Trying to set variable @@session.ROCKSDB_VERIFY_ROW_DEBUG_CHECKSUMS to 0"
+SET @@session.ROCKSDB_VERIFY_ROW_DEBUG_CHECKSUMS = 0;
+SELECT @@session.ROCKSDB_VERIFY_ROW_DEBUG_CHECKSUMS;
+@@session.ROCKSDB_VERIFY_ROW_DEBUG_CHECKSUMS
+0
+"Setting the session scope variable back to default"
+SET @@session.ROCKSDB_VERIFY_ROW_DEBUG_CHECKSUMS = DEFAULT;
+SELECT @@session.ROCKSDB_VERIFY_ROW_DEBUG_CHECKSUMS;
+@@session.ROCKSDB_VERIFY_ROW_DEBUG_CHECKSUMS
+0
+"Trying to set variable @@session.ROCKSDB_VERIFY_ROW_DEBUG_CHECKSUMS to on"
+SET @@session.ROCKSDB_VERIFY_ROW_DEBUG_CHECKSUMS = on;
+SELECT @@session.ROCKSDB_VERIFY_ROW_DEBUG_CHECKSUMS;
+@@session.ROCKSDB_VERIFY_ROW_DEBUG_CHECKSUMS
+1
+"Setting the session scope variable back to default"
+SET @@session.ROCKSDB_VERIFY_ROW_DEBUG_CHECKSUMS = DEFAULT;
+SELECT @@session.ROCKSDB_VERIFY_ROW_DEBUG_CHECKSUMS;
+@@session.ROCKSDB_VERIFY_ROW_DEBUG_CHECKSUMS
+0
+'# Testing with invalid values in global scope #'
+"Trying to set variable @@global.ROCKSDB_VERIFY_ROW_DEBUG_CHECKSUMS to 'aaa'"
+SET @@global.ROCKSDB_VERIFY_ROW_DEBUG_CHECKSUMS = 'aaa';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_VERIFY_ROW_DEBUG_CHECKSUMS;
+@@global.ROCKSDB_VERIFY_ROW_DEBUG_CHECKSUMS
+0
+"Trying to set variable @@global.ROCKSDB_VERIFY_ROW_DEBUG_CHECKSUMS to 'bbb'"
+SET @@global.ROCKSDB_VERIFY_ROW_DEBUG_CHECKSUMS = 'bbb';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_VERIFY_ROW_DEBUG_CHECKSUMS;
+@@global.ROCKSDB_VERIFY_ROW_DEBUG_CHECKSUMS
+0
+SET @@global.ROCKSDB_VERIFY_ROW_DEBUG_CHECKSUMS = @start_global_value;
+SELECT @@global.ROCKSDB_VERIFY_ROW_DEBUG_CHECKSUMS;
+@@global.ROCKSDB_VERIFY_ROW_DEBUG_CHECKSUMS
+0
+SET @@session.ROCKSDB_VERIFY_ROW_DEBUG_CHECKSUMS = @start_session_value;
+SELECT @@session.ROCKSDB_VERIFY_ROW_DEBUG_CHECKSUMS;
+@@session.ROCKSDB_VERIFY_ROW_DEBUG_CHECKSUMS
+0
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_wal_bytes_per_sync_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_wal_bytes_per_sync_basic.result
new file mode 100644
index 00000000000..f432f1f7750
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_wal_bytes_per_sync_basic.result
@@ -0,0 +1,85 @@
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(100);
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(0);
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+INSERT INTO invalid_values VALUES('\'bbb\'');
+INSERT INTO invalid_values VALUES('\'-1\'');
+INSERT INTO invalid_values VALUES('\'101\'');
+INSERT INTO invalid_values VALUES('\'484436\'');
+SET @start_global_value = @@global.ROCKSDB_WAL_BYTES_PER_SYNC;
+SELECT @start_global_value;
+@start_global_value
+0
+'# Setting to valid values in global scope#'
+"Trying to set variable @@global.ROCKSDB_WAL_BYTES_PER_SYNC to 100"
+SET @@global.ROCKSDB_WAL_BYTES_PER_SYNC = 100;
+SELECT @@global.ROCKSDB_WAL_BYTES_PER_SYNC;
+@@global.ROCKSDB_WAL_BYTES_PER_SYNC
+100
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_WAL_BYTES_PER_SYNC = DEFAULT;
+SELECT @@global.ROCKSDB_WAL_BYTES_PER_SYNC;
+@@global.ROCKSDB_WAL_BYTES_PER_SYNC
+0
+"Trying to set variable @@global.ROCKSDB_WAL_BYTES_PER_SYNC to 1"
+SET @@global.ROCKSDB_WAL_BYTES_PER_SYNC = 1;
+SELECT @@global.ROCKSDB_WAL_BYTES_PER_SYNC;
+@@global.ROCKSDB_WAL_BYTES_PER_SYNC
+1
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_WAL_BYTES_PER_SYNC = DEFAULT;
+SELECT @@global.ROCKSDB_WAL_BYTES_PER_SYNC;
+@@global.ROCKSDB_WAL_BYTES_PER_SYNC
+0
+"Trying to set variable @@global.ROCKSDB_WAL_BYTES_PER_SYNC to 0"
+SET @@global.ROCKSDB_WAL_BYTES_PER_SYNC = 0;
+SELECT @@global.ROCKSDB_WAL_BYTES_PER_SYNC;
+@@global.ROCKSDB_WAL_BYTES_PER_SYNC
+0
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_WAL_BYTES_PER_SYNC = DEFAULT;
+SELECT @@global.ROCKSDB_WAL_BYTES_PER_SYNC;
+@@global.ROCKSDB_WAL_BYTES_PER_SYNC
+0
+"Trying to set variable @@session.ROCKSDB_WAL_BYTES_PER_SYNC to 444. It should fail because it is not session."
+SET @@session.ROCKSDB_WAL_BYTES_PER_SYNC = 444;
+ERROR HY000: Variable 'rocksdb_wal_bytes_per_sync' is a GLOBAL variable and should be set with SET GLOBAL
+'# Testing with invalid values in global scope #'
+"Trying to set variable @@global.ROCKSDB_WAL_BYTES_PER_SYNC to 'aaa'"
+SET @@global.ROCKSDB_WAL_BYTES_PER_SYNC = 'aaa';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_WAL_BYTES_PER_SYNC;
+@@global.ROCKSDB_WAL_BYTES_PER_SYNC
+0
+"Trying to set variable @@global.ROCKSDB_WAL_BYTES_PER_SYNC to 'bbb'"
+SET @@global.ROCKSDB_WAL_BYTES_PER_SYNC = 'bbb';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_WAL_BYTES_PER_SYNC;
+@@global.ROCKSDB_WAL_BYTES_PER_SYNC
+0
+"Trying to set variable @@global.ROCKSDB_WAL_BYTES_PER_SYNC to '-1'"
+SET @@global.ROCKSDB_WAL_BYTES_PER_SYNC = '-1';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_WAL_BYTES_PER_SYNC;
+@@global.ROCKSDB_WAL_BYTES_PER_SYNC
+0
+"Trying to set variable @@global.ROCKSDB_WAL_BYTES_PER_SYNC to '101'"
+SET @@global.ROCKSDB_WAL_BYTES_PER_SYNC = '101';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_WAL_BYTES_PER_SYNC;
+@@global.ROCKSDB_WAL_BYTES_PER_SYNC
+0
+"Trying to set variable @@global.ROCKSDB_WAL_BYTES_PER_SYNC to '484436'"
+SET @@global.ROCKSDB_WAL_BYTES_PER_SYNC = '484436';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_WAL_BYTES_PER_SYNC;
+@@global.ROCKSDB_WAL_BYTES_PER_SYNC
+0
+SET @@global.ROCKSDB_WAL_BYTES_PER_SYNC = @start_global_value;
+SELECT @@global.ROCKSDB_WAL_BYTES_PER_SYNC;
+@@global.ROCKSDB_WAL_BYTES_PER_SYNC
+0
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_wal_dir_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_wal_dir_basic.result
new file mode 100644
index 00000000000..fd76a5ec00f
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_wal_dir_basic.result
@@ -0,0 +1,7 @@
+SET @start_global_value = @@global.ROCKSDB_WAL_DIR;
+SELECT @start_global_value;
+@start_global_value
+
+"Trying to set variable @@global.ROCKSDB_WAL_DIR to 444. It should fail because it is readonly."
+SET @@global.ROCKSDB_WAL_DIR = 444;
+ERROR HY000: Variable 'rocksdb_wal_dir' is a read only variable
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_wal_recovery_mode_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_wal_recovery_mode_basic.result
new file mode 100644
index 00000000000..9fec4a24bd8
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_wal_recovery_mode_basic.result
@@ -0,0 +1,46 @@
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(0);
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+SET @start_global_value = @@global.ROCKSDB_WAL_RECOVERY_MODE;
+SELECT @start_global_value;
+@start_global_value
+1
+'# Setting to valid values in global scope#'
+"Trying to set variable @@global.ROCKSDB_WAL_RECOVERY_MODE to 1"
+SET @@global.ROCKSDB_WAL_RECOVERY_MODE = 1;
+SELECT @@global.ROCKSDB_WAL_RECOVERY_MODE;
+@@global.ROCKSDB_WAL_RECOVERY_MODE
+1
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_WAL_RECOVERY_MODE = DEFAULT;
+SELECT @@global.ROCKSDB_WAL_RECOVERY_MODE;
+@@global.ROCKSDB_WAL_RECOVERY_MODE
+1
+"Trying to set variable @@global.ROCKSDB_WAL_RECOVERY_MODE to 0"
+SET @@global.ROCKSDB_WAL_RECOVERY_MODE = 0;
+SELECT @@global.ROCKSDB_WAL_RECOVERY_MODE;
+@@global.ROCKSDB_WAL_RECOVERY_MODE
+0
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_WAL_RECOVERY_MODE = DEFAULT;
+SELECT @@global.ROCKSDB_WAL_RECOVERY_MODE;
+@@global.ROCKSDB_WAL_RECOVERY_MODE
+1
+"Trying to set variable @@session.ROCKSDB_WAL_RECOVERY_MODE to 444. It should fail because it is not session."
+SET @@session.ROCKSDB_WAL_RECOVERY_MODE = 444;
+ERROR HY000: Variable 'rocksdb_wal_recovery_mode' is a GLOBAL variable and should be set with SET GLOBAL
+'# Testing with invalid values in global scope #'
+"Trying to set variable @@global.ROCKSDB_WAL_RECOVERY_MODE to 'aaa'"
+SET @@global.ROCKSDB_WAL_RECOVERY_MODE = 'aaa';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_WAL_RECOVERY_MODE;
+@@global.ROCKSDB_WAL_RECOVERY_MODE
+1
+SET @@global.ROCKSDB_WAL_RECOVERY_MODE = @start_global_value;
+SELECT @@global.ROCKSDB_WAL_RECOVERY_MODE;
+@@global.ROCKSDB_WAL_RECOVERY_MODE
+1
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_wal_size_limit_mb_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_wal_size_limit_mb_basic.result
new file mode 100644
index 00000000000..5f03597df3a
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_wal_size_limit_mb_basic.result
@@ -0,0 +1,7 @@
+SET @start_global_value = @@global.ROCKSDB_WAL_SIZE_LIMIT_MB;
+SELECT @start_global_value;
+@start_global_value
+0
+"Trying to set variable @@global.ROCKSDB_WAL_SIZE_LIMIT_MB to 444. It should fail because it is readonly."
+SET @@global.ROCKSDB_WAL_SIZE_LIMIT_MB = 444;
+ERROR HY000: Variable 'rocksdb_wal_size_limit_mb' is a read only variable
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_wal_ttl_seconds_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_wal_ttl_seconds_basic.result
new file mode 100644
index 00000000000..23f7fc81e7f
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_wal_ttl_seconds_basic.result
@@ -0,0 +1,7 @@
+SET @start_global_value = @@global.ROCKSDB_WAL_TTL_SECONDS;
+SELECT @start_global_value;
+@start_global_value
+0
+"Trying to set variable @@global.ROCKSDB_WAL_TTL_SECONDS to 444. It should fail because it is readonly."
+SET @@global.ROCKSDB_WAL_TTL_SECONDS = 444;
+ERROR HY000: Variable 'rocksdb_wal_ttl_seconds' is a read only variable
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_whole_key_filtering_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_whole_key_filtering_basic.result
new file mode 100644
index 00000000000..0d6f7216e9a
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_whole_key_filtering_basic.result
@@ -0,0 +1,7 @@
+SET @start_global_value = @@global.ROCKSDB_WHOLE_KEY_FILTERING;
+SELECT @start_global_value;
+@start_global_value
+1
+"Trying to set variable @@global.ROCKSDB_WHOLE_KEY_FILTERING to 444. It should fail because it is readonly."
+SET @@global.ROCKSDB_WHOLE_KEY_FILTERING = 444;
+ERROR HY000: Variable 'rocksdb_whole_key_filtering' is a read only variable
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_write_batch_max_bytes_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_write_batch_max_bytes_basic.result
new file mode 100644
index 00000000000..8f6d91b2780
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_write_batch_max_bytes_basic.result
@@ -0,0 +1,15 @@
+create table t (i int);
+insert into t values (1), (2), (3), (4), (5);
+set session rocksdb_write_batch_max_bytes = 1000;
+insert into t values (1), (2), (3), (4), (5);
+set session rocksdb_write_batch_max_bytes = 10;
+insert into t values (1), (2), (3), (4), (5);
+ERROR HY000: Got error 10 'Operation aborted: Memory limit reached' from ROCKSDB
+set session rocksdb_write_batch_max_bytes = 0;
+insert into t values (1), (2), (3), (4), (5);
+set session rocksdb_write_batch_max_bytes = 10;
+begin;
+insert into t values (1), (2), (3), (4), (5);
+ERROR HY000: Got error 10 'Operation aborted: Memory limit reached' from ROCKSDB
+rollback;
+drop table t;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_write_disable_wal_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_write_disable_wal_basic.result
new file mode 100644
index 00000000000..b71ee7f91cc
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_write_disable_wal_basic.result
@@ -0,0 +1,114 @@
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(0);
+INSERT INTO valid_values VALUES('on');
+INSERT INTO valid_values VALUES('off');
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+SET @start_global_value = @@global.ROCKSDB_WRITE_DISABLE_WAL;
+SELECT @start_global_value;
+@start_global_value
+0
+SET @start_session_value = @@session.ROCKSDB_WRITE_DISABLE_WAL;
+SELECT @start_session_value;
+@start_session_value
+0
+'# Setting to valid values in global scope#'
+"Trying to set variable @@global.ROCKSDB_WRITE_DISABLE_WAL to 1"
+SET @@global.ROCKSDB_WRITE_DISABLE_WAL = 1;
+SELECT @@global.ROCKSDB_WRITE_DISABLE_WAL;
+@@global.ROCKSDB_WRITE_DISABLE_WAL
+1
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_WRITE_DISABLE_WAL = DEFAULT;
+SELECT @@global.ROCKSDB_WRITE_DISABLE_WAL;
+@@global.ROCKSDB_WRITE_DISABLE_WAL
+0
+"Trying to set variable @@global.ROCKSDB_WRITE_DISABLE_WAL to 0"
+SET @@global.ROCKSDB_WRITE_DISABLE_WAL = 0;
+SELECT @@global.ROCKSDB_WRITE_DISABLE_WAL;
+@@global.ROCKSDB_WRITE_DISABLE_WAL
+0
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_WRITE_DISABLE_WAL = DEFAULT;
+SELECT @@global.ROCKSDB_WRITE_DISABLE_WAL;
+@@global.ROCKSDB_WRITE_DISABLE_WAL
+0
+"Trying to set variable @@global.ROCKSDB_WRITE_DISABLE_WAL to on"
+SET @@global.ROCKSDB_WRITE_DISABLE_WAL = on;
+SELECT @@global.ROCKSDB_WRITE_DISABLE_WAL;
+@@global.ROCKSDB_WRITE_DISABLE_WAL
+1
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_WRITE_DISABLE_WAL = DEFAULT;
+SELECT @@global.ROCKSDB_WRITE_DISABLE_WAL;
+@@global.ROCKSDB_WRITE_DISABLE_WAL
+0
+"Trying to set variable @@global.ROCKSDB_WRITE_DISABLE_WAL to off"
+SET @@global.ROCKSDB_WRITE_DISABLE_WAL = off;
+SELECT @@global.ROCKSDB_WRITE_DISABLE_WAL;
+@@global.ROCKSDB_WRITE_DISABLE_WAL
+0
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_WRITE_DISABLE_WAL = DEFAULT;
+SELECT @@global.ROCKSDB_WRITE_DISABLE_WAL;
+@@global.ROCKSDB_WRITE_DISABLE_WAL
+0
+'# Setting to valid values in session scope#'
+"Trying to set variable @@session.ROCKSDB_WRITE_DISABLE_WAL to 1"
+SET @@session.ROCKSDB_WRITE_DISABLE_WAL = 1;
+SELECT @@session.ROCKSDB_WRITE_DISABLE_WAL;
+@@session.ROCKSDB_WRITE_DISABLE_WAL
+1
+"Setting the session scope variable back to default"
+SET @@session.ROCKSDB_WRITE_DISABLE_WAL = DEFAULT;
+SELECT @@session.ROCKSDB_WRITE_DISABLE_WAL;
+@@session.ROCKSDB_WRITE_DISABLE_WAL
+0
+"Trying to set variable @@session.ROCKSDB_WRITE_DISABLE_WAL to 0"
+SET @@session.ROCKSDB_WRITE_DISABLE_WAL = 0;
+SELECT @@session.ROCKSDB_WRITE_DISABLE_WAL;
+@@session.ROCKSDB_WRITE_DISABLE_WAL
+0
+"Setting the session scope variable back to default"
+SET @@session.ROCKSDB_WRITE_DISABLE_WAL = DEFAULT;
+SELECT @@session.ROCKSDB_WRITE_DISABLE_WAL;
+@@session.ROCKSDB_WRITE_DISABLE_WAL
+0
+"Trying to set variable @@session.ROCKSDB_WRITE_DISABLE_WAL to on"
+SET @@session.ROCKSDB_WRITE_DISABLE_WAL = on;
+SELECT @@session.ROCKSDB_WRITE_DISABLE_WAL;
+@@session.ROCKSDB_WRITE_DISABLE_WAL
+1
+"Setting the session scope variable back to default"
+SET @@session.ROCKSDB_WRITE_DISABLE_WAL = DEFAULT;
+SELECT @@session.ROCKSDB_WRITE_DISABLE_WAL;
+@@session.ROCKSDB_WRITE_DISABLE_WAL
+0
+"Trying to set variable @@session.ROCKSDB_WRITE_DISABLE_WAL to off"
+SET @@session.ROCKSDB_WRITE_DISABLE_WAL = off;
+SELECT @@session.ROCKSDB_WRITE_DISABLE_WAL;
+@@session.ROCKSDB_WRITE_DISABLE_WAL
+0
+"Setting the session scope variable back to default"
+SET @@session.ROCKSDB_WRITE_DISABLE_WAL = DEFAULT;
+SELECT @@session.ROCKSDB_WRITE_DISABLE_WAL;
+@@session.ROCKSDB_WRITE_DISABLE_WAL
+0
+'# Testing with invalid values in global scope #'
+"Trying to set variable @@global.ROCKSDB_WRITE_DISABLE_WAL to 'aaa'"
+SET @@global.ROCKSDB_WRITE_DISABLE_WAL = 'aaa';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_WRITE_DISABLE_WAL;
+@@global.ROCKSDB_WRITE_DISABLE_WAL
+0
+SET @@global.ROCKSDB_WRITE_DISABLE_WAL = @start_global_value;
+SELECT @@global.ROCKSDB_WRITE_DISABLE_WAL;
+@@global.ROCKSDB_WRITE_DISABLE_WAL
+0
+SET @@session.ROCKSDB_WRITE_DISABLE_WAL = @start_session_value;
+SELECT @@session.ROCKSDB_WRITE_DISABLE_WAL;
+@@session.ROCKSDB_WRITE_DISABLE_WAL
+0
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_write_ignore_missing_column_families_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_write_ignore_missing_column_families_basic.result
new file mode 100644
index 00000000000..dbe46858c94
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_write_ignore_missing_column_families_basic.result
@@ -0,0 +1,100 @@
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(0);
+INSERT INTO valid_values VALUES('on');
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+INSERT INTO invalid_values VALUES('\'bbb\'');
+SET @start_global_value = @@global.ROCKSDB_WRITE_IGNORE_MISSING_COLUMN_FAMILIES;
+SELECT @start_global_value;
+@start_global_value
+0
+SET @start_session_value = @@session.ROCKSDB_WRITE_IGNORE_MISSING_COLUMN_FAMILIES;
+SELECT @start_session_value;
+@start_session_value
+0
+'# Setting to valid values in global scope#'
+"Trying to set variable @@global.ROCKSDB_WRITE_IGNORE_MISSING_COLUMN_FAMILIES to 1"
+SET @@global.ROCKSDB_WRITE_IGNORE_MISSING_COLUMN_FAMILIES = 1;
+SELECT @@global.ROCKSDB_WRITE_IGNORE_MISSING_COLUMN_FAMILIES;
+@@global.ROCKSDB_WRITE_IGNORE_MISSING_COLUMN_FAMILIES
+1
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_WRITE_IGNORE_MISSING_COLUMN_FAMILIES = DEFAULT;
+SELECT @@global.ROCKSDB_WRITE_IGNORE_MISSING_COLUMN_FAMILIES;
+@@global.ROCKSDB_WRITE_IGNORE_MISSING_COLUMN_FAMILIES
+0
+"Trying to set variable @@global.ROCKSDB_WRITE_IGNORE_MISSING_COLUMN_FAMILIES to 0"
+SET @@global.ROCKSDB_WRITE_IGNORE_MISSING_COLUMN_FAMILIES = 0;
+SELECT @@global.ROCKSDB_WRITE_IGNORE_MISSING_COLUMN_FAMILIES;
+@@global.ROCKSDB_WRITE_IGNORE_MISSING_COLUMN_FAMILIES
+0
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_WRITE_IGNORE_MISSING_COLUMN_FAMILIES = DEFAULT;
+SELECT @@global.ROCKSDB_WRITE_IGNORE_MISSING_COLUMN_FAMILIES;
+@@global.ROCKSDB_WRITE_IGNORE_MISSING_COLUMN_FAMILIES
+0
+"Trying to set variable @@global.ROCKSDB_WRITE_IGNORE_MISSING_COLUMN_FAMILIES to on"
+SET @@global.ROCKSDB_WRITE_IGNORE_MISSING_COLUMN_FAMILIES = on;
+SELECT @@global.ROCKSDB_WRITE_IGNORE_MISSING_COLUMN_FAMILIES;
+@@global.ROCKSDB_WRITE_IGNORE_MISSING_COLUMN_FAMILIES
+1
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_WRITE_IGNORE_MISSING_COLUMN_FAMILIES = DEFAULT;
+SELECT @@global.ROCKSDB_WRITE_IGNORE_MISSING_COLUMN_FAMILIES;
+@@global.ROCKSDB_WRITE_IGNORE_MISSING_COLUMN_FAMILIES
+0
+'# Setting to valid values in session scope#'
+"Trying to set variable @@session.ROCKSDB_WRITE_IGNORE_MISSING_COLUMN_FAMILIES to 1"
+SET @@session.ROCKSDB_WRITE_IGNORE_MISSING_COLUMN_FAMILIES = 1;
+SELECT @@session.ROCKSDB_WRITE_IGNORE_MISSING_COLUMN_FAMILIES;
+@@session.ROCKSDB_WRITE_IGNORE_MISSING_COLUMN_FAMILIES
+1
+"Setting the session scope variable back to default"
+SET @@session.ROCKSDB_WRITE_IGNORE_MISSING_COLUMN_FAMILIES = DEFAULT;
+SELECT @@session.ROCKSDB_WRITE_IGNORE_MISSING_COLUMN_FAMILIES;
+@@session.ROCKSDB_WRITE_IGNORE_MISSING_COLUMN_FAMILIES
+0
+"Trying to set variable @@session.ROCKSDB_WRITE_IGNORE_MISSING_COLUMN_FAMILIES to 0"
+SET @@session.ROCKSDB_WRITE_IGNORE_MISSING_COLUMN_FAMILIES = 0;
+SELECT @@session.ROCKSDB_WRITE_IGNORE_MISSING_COLUMN_FAMILIES;
+@@session.ROCKSDB_WRITE_IGNORE_MISSING_COLUMN_FAMILIES
+0
+"Setting the session scope variable back to default"
+SET @@session.ROCKSDB_WRITE_IGNORE_MISSING_COLUMN_FAMILIES = DEFAULT;
+SELECT @@session.ROCKSDB_WRITE_IGNORE_MISSING_COLUMN_FAMILIES;
+@@session.ROCKSDB_WRITE_IGNORE_MISSING_COLUMN_FAMILIES
+0
+"Trying to set variable @@session.ROCKSDB_WRITE_IGNORE_MISSING_COLUMN_FAMILIES to on"
+SET @@session.ROCKSDB_WRITE_IGNORE_MISSING_COLUMN_FAMILIES = on;
+SELECT @@session.ROCKSDB_WRITE_IGNORE_MISSING_COLUMN_FAMILIES;
+@@session.ROCKSDB_WRITE_IGNORE_MISSING_COLUMN_FAMILIES
+1
+"Setting the session scope variable back to default"
+SET @@session.ROCKSDB_WRITE_IGNORE_MISSING_COLUMN_FAMILIES = DEFAULT;
+SELECT @@session.ROCKSDB_WRITE_IGNORE_MISSING_COLUMN_FAMILIES;
+@@session.ROCKSDB_WRITE_IGNORE_MISSING_COLUMN_FAMILIES
+0
+'# Testing with invalid values in global scope #'
+"Trying to set variable @@global.ROCKSDB_WRITE_IGNORE_MISSING_COLUMN_FAMILIES to 'aaa'"
+SET @@global.ROCKSDB_WRITE_IGNORE_MISSING_COLUMN_FAMILIES = 'aaa';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_WRITE_IGNORE_MISSING_COLUMN_FAMILIES;
+@@global.ROCKSDB_WRITE_IGNORE_MISSING_COLUMN_FAMILIES
+0
+"Trying to set variable @@global.ROCKSDB_WRITE_IGNORE_MISSING_COLUMN_FAMILIES to 'bbb'"
+SET @@global.ROCKSDB_WRITE_IGNORE_MISSING_COLUMN_FAMILIES = 'bbb';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_WRITE_IGNORE_MISSING_COLUMN_FAMILIES;
+@@global.ROCKSDB_WRITE_IGNORE_MISSING_COLUMN_FAMILIES
+0
+SET @@global.ROCKSDB_WRITE_IGNORE_MISSING_COLUMN_FAMILIES = @start_global_value;
+SELECT @@global.ROCKSDB_WRITE_IGNORE_MISSING_COLUMN_FAMILIES;
+@@global.ROCKSDB_WRITE_IGNORE_MISSING_COLUMN_FAMILIES
+0
+SET @@session.ROCKSDB_WRITE_IGNORE_MISSING_COLUMN_FAMILIES = @start_session_value;
+SELECT @@session.ROCKSDB_WRITE_IGNORE_MISSING_COLUMN_FAMILIES;
+@@session.ROCKSDB_WRITE_IGNORE_MISSING_COLUMN_FAMILIES
+0
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_write_policy_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_write_policy_basic.result
new file mode 100644
index 00000000000..58e040e05f1
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_write_policy_basic.result
@@ -0,0 +1,15 @@
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES("write_committed");
+INSERT INTO valid_values VALUES("write_prepared");
+INSERT INTO valid_values VALUES("write_unprepared");
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+SET @start_global_value = @@global.ROCKSDB_WRITE_POLICY;
+SELECT @start_global_value;
+@start_global_value
+write_committed
+"Trying to set variable @@global.ROCKSDB_WRITE_POLICY to 444. It should fail because it is readonly."
+SET @@global.ROCKSDB_WRITE_POLICY = 444;
+ERROR HY000: Variable 'rocksdb_write_policy' is a read only variable
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/suite.opt b/storage/rocksdb/mysql-test/rocksdb_sys_vars/suite.opt
new file mode 100644
index 00000000000..b503913d2c8
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/suite.opt
@@ -0,0 +1,2 @@
+--ignore-db-dirs=#rocksdb --plugin-load=$HA_ROCKSDB_SO
+
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/suite.pm b/storage/rocksdb/mysql-test/rocksdb_sys_vars/suite.pm
new file mode 100644
index 00000000000..5264d657ef7
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/suite.pm
@@ -0,0 +1,21 @@
+package My::Suite::Rocksdb_sys_vars;
+
+#
+# Note: The below is copied from ../rocksdb/suite.pm
+#
+@ISA = qw(My::Suite);
+use My::Find;
+use File::Basename;
+use strict;
+
+#sub is_default { not $::opt_embedded_server }
+
+my $sst_dump=
+::mtr_exe_maybe_exists(
+ "$::bindir/storage/rocksdb$::opt_vs_config/sst_dump",
+ "$::path_client_bindir/sst_dump");
+return "RocksDB is not compiled, no sst_dump" unless $sst_dump;
+$ENV{MARIAROCKS_SST_DUMP}="$sst_dump";
+
+bless { };
+
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/all_vars.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/all_vars.test
new file mode 100644
index 00000000000..fc700357155
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/all_vars.test
@@ -0,0 +1,40 @@
+--source include/have_rocksdb.inc
+--source include/not_embedded.inc
+--source include/not_threadpool.inc
+
+# This test verifies that *all* MyRocks system variables are tested by the
+# rocksdb_sys_vars suite. For every MyRocks system variable there must be a
+# <variable_name>_basic.test file.
+#
+
+#
+# we can diff in perl or in sql, as it's my_SQL_test suite, do it in sql
+#
+
+perl;
+ use File::Basename;
+ my $dirname=dirname($ENV{MYSQLTEST_FILE});
+ my @all_tests=<$dirname/*_basic{,_32,_64}.test>;
+ open(F, '>', "$ENV{MYSQLTEST_VARDIR}/tmp/rocksdb_sys_vars.all_vars.txt") or die;
+ binmode F;
+ print F join "\n", sort map { s/_basic(_32|_64)?\.test$//; basename $_ } @all_tests;
+EOF
+
+create table t1 (test_name text) engine=MyISAM;
+create table t2 (variable_name text) engine=MyISAM;
+--replace_result $MYSQLTEST_VARDIR MYSQLTEST_VARDIR
+eval load data infile "$MYSQLTEST_VARDIR/tmp/rocksdb_sys_vars.all_vars.txt" into table t1;
+
+insert into t2 select variable_name from information_schema.global_variables where variable_name like "rocksdb_%";
+insert into t2 select variable_name from information_schema.session_variables where variable_name like "rocksdb_%";
+
+--sorted_result
+select variable_name as `There should be *no* long test name listed below:` from t2
+ where length(variable_name) > 50;
+
+--sorted_result
+select variable_name as `There should be *no* variables listed below:` from t2
+ left join t1 on variable_name=test_name where test_name is null ORDER BY variable_name;
+
+drop table t1;
+drop table t2;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/disabled.def b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/disabled.def
new file mode 100644
index 00000000000..efa82ff6184
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/disabled.def
@@ -0,0 +1,5 @@
+# Disabled tests
+
+rocksdb_read_free_rpl_basic : MariaDB: Read-Free replication is not supported
+rocksdb_read_free_rpl_tables_basic : MariaDB: Read-Free replication is not supported
+
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_access_hint_on_compaction_start_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_access_hint_on_compaction_start_basic.test
new file mode 100644
index 00000000000..51765574a91
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_access_hint_on_compaction_start_basic.test
@@ -0,0 +1,7 @@
+--source include/have_rocksdb.inc
+
+--let $sys_var=ROCKSDB_ACCESS_HINT_ON_COMPACTION_START
+--let $read_only=1
+--let $session=0
+--source include/rocksdb_sys_var.inc
+
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_advise_random_on_open_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_advise_random_on_open_basic.test
new file mode 100644
index 00000000000..06593363df2
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_advise_random_on_open_basic.test
@@ -0,0 +1,6 @@
+--source include/have_rocksdb.inc
+
+--let $sys_var=ROCKSDB_ADVISE_RANDOM_ON_OPEN
+--let $read_only=1
+--let $session=0
+--source include/rocksdb_sys_var.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_allow_concurrent_memtable_write_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_allow_concurrent_memtable_write_basic.test
new file mode 100644
index 00000000000..5d4e3c05f9d
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_allow_concurrent_memtable_write_basic.test
@@ -0,0 +1,5 @@
+--source include/have_rocksdb.inc
+--let $sys_var=ROCKSDB_ALLOW_CONCURRENT_MEMTABLE_WRITE
+--let $read_only=1
+--let $session=0
+--source include/rocksdb_sys_var.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_allow_mmap_reads_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_allow_mmap_reads_basic.test
new file mode 100644
index 00000000000..72fed6b2275
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_allow_mmap_reads_basic.test
@@ -0,0 +1,6 @@
+--source include/have_rocksdb.inc
+
+--let $sys_var=ROCKSDB_ALLOW_MMAP_READS
+--let $read_only=1
+--let $session=0
+--source include/rocksdb_sys_var.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_allow_mmap_writes_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_allow_mmap_writes_basic.test
new file mode 100644
index 00000000000..8b8da14f56c
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_allow_mmap_writes_basic.test
@@ -0,0 +1,6 @@
+--source include/have_rocksdb.inc
+
+--let $sys_var=ROCKSDB_ALLOW_MMAP_WRITES
+--let $read_only=1
+--let $session=0
+--source include/rocksdb_sys_var.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_allow_to_start_after_corruption_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_allow_to_start_after_corruption_basic.test
new file mode 100644
index 00000000000..64fb2458424
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_allow_to_start_after_corruption_basic.test
@@ -0,0 +1,6 @@
+--source include/have_rocksdb.inc
+
+--let $sys_var=ROCKSDB_ALLOW_TO_START_AFTER_CORRUPTION
+--let $read_only=1
+--let $session=0
+--source ../include/rocksdb_sys_var.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_blind_delete_primary_key_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_blind_delete_primary_key_basic.test
new file mode 100644
index 00000000000..da972cccf9a
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_blind_delete_primary_key_basic.test
@@ -0,0 +1,18 @@
+--source include/have_rocksdb.inc
+
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(0);
+INSERT INTO valid_values VALUES('on');
+
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+INSERT INTO invalid_values VALUES('\'bbb\'');
+
+--let $sys_var=ROCKSDB_BLIND_DELETE_PRIMARY_KEY
+--let $read_only=0
+--let $session=1
+--source include/rocksdb_sys_var.inc
+
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_block_cache_size_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_block_cache_size_basic.test
new file mode 100644
index 00000000000..8d3bab4e5ec
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_block_cache_size_basic.test
@@ -0,0 +1,21 @@
+--source include/have_rocksdb.inc
+
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(65536);
+INSERT INTO valid_values VALUES(1024);
+INSERT INTO valid_values VALUES(1*1024*1024);
+
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+INSERT INTO invalid_values VALUES('\'bbb\'');
+INSERT INTO invalid_values VALUES('\'-1\'');
+INSERT INTO invalid_values VALUES('\'101\'');
+INSERT INTO invalid_values VALUES('\'484436\'');
+
+--let $sys_var=ROCKSDB_BLOCK_CACHE_SIZE
+--let $read_only=0
+--let $session=0
+--source include/rocksdb_sys_var.inc
+
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_block_restart_interval_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_block_restart_interval_basic.test
new file mode 100644
index 00000000000..0688ef73281
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_block_restart_interval_basic.test
@@ -0,0 +1,6 @@
+--source include/have_rocksdb.inc
+
+--let $sys_var=ROCKSDB_BLOCK_RESTART_INTERVAL
+--let $read_only=1
+--let $session=0
+--source include/rocksdb_sys_var.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_block_size_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_block_size_basic.test
new file mode 100644
index 00000000000..150c1e533a7
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_block_size_basic.test
@@ -0,0 +1,7 @@
+--source include/have_rocksdb.inc
+
+--let $sys_var=ROCKSDB_BLOCK_SIZE
+--let $read_only=1
+--let $session=0
+--source include/rocksdb_sys_var.inc
+
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_block_size_deviation_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_block_size_deviation_basic.test
new file mode 100644
index 00000000000..98d179c028c
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_block_size_deviation_basic.test
@@ -0,0 +1,7 @@
+--source include/have_rocksdb.inc
+
+--let $sys_var=ROCKSDB_BLOCK_SIZE_DEVIATION
+--let $read_only=1
+--let $session=0
+--source include/rocksdb_sys_var.inc
+
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_bulk_load_allow_sk_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_bulk_load_allow_sk_basic.test
new file mode 100644
index 00000000000..14e5f38b335
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_bulk_load_allow_sk_basic.test
@@ -0,0 +1,18 @@
+--source include/have_rocksdb.inc
+
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(0);
+INSERT INTO valid_values VALUES('on');
+
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+INSERT INTO invalid_values VALUES('\'bbb\'');
+
+--let $sys_var=ROCKSDB_BULK_LOAD_ALLOW_SK
+--let $read_only=0
+--let $session=1
+--source ../include/rocksdb_sys_var.inc
+
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_bulk_load_allow_unsorted_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_bulk_load_allow_unsorted_basic.test
new file mode 100644
index 00000000000..e57396e0fdc
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_bulk_load_allow_unsorted_basic.test
@@ -0,0 +1,18 @@
+--source include/have_rocksdb.inc
+
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(0);
+INSERT INTO valid_values VALUES('on');
+
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+INSERT INTO invalid_values VALUES('\'bbb\'');
+
+--let $sys_var=ROCKSDB_BULK_LOAD_ALLOW_UNSORTED
+--let $read_only=0
+--let $session=1
+--source ../include/rocksdb_sys_var.inc
+
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_bulk_load_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_bulk_load_basic.test
new file mode 100644
index 00000000000..dd55c849adb
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_bulk_load_basic.test
@@ -0,0 +1,18 @@
+--source include/have_rocksdb.inc
+
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(0);
+INSERT INTO valid_values VALUES('on');
+
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+INSERT INTO invalid_values VALUES('\'bbb\'');
+
+--let $sys_var=ROCKSDB_BULK_LOAD
+--let $read_only=0
+--let $session=1
+--source include/rocksdb_sys_var.inc
+
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_bulk_load_size_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_bulk_load_size_basic.test
new file mode 100644
index 00000000000..70d1c44806a
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_bulk_load_size_basic.test
@@ -0,0 +1,16 @@
+--source include/have_rocksdb.inc
+
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(1024);
+
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+
+--let $sys_var=ROCKSDB_BULK_LOAD_SIZE
+--let $read_only=0
+--let $session=1
+--source include/rocksdb_sys_var.inc
+
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_bytes_per_sync_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_bytes_per_sync_basic.test
new file mode 100644
index 00000000000..bf78f578b6c
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_bytes_per_sync_basic.test
@@ -0,0 +1,22 @@
+--source include/have_rocksdb.inc
+
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(100);
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(0);
+
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+INSERT INTO invalid_values VALUES('\'bbb\'');
+INSERT INTO invalid_values VALUES('\'-1\'');
+INSERT INTO invalid_values VALUES('\'101\'');
+INSERT INTO invalid_values VALUES('\'484436\'');
+
+--let $sys_var=ROCKSDB_BYTES_PER_SYNC
+--let $read_only=0
+--let $session=0
+--source include/rocksdb_sys_var.inc
+
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
+
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_cache_dump_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_cache_dump_basic.test
new file mode 100644
index 00000000000..70557621828
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_cache_dump_basic.test
@@ -0,0 +1,21 @@
+--source include/have_rocksdb.inc
+
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(0);
+INSERT INTO valid_values VALUES('on');
+INSERT INTO valid_values VALUES('off');
+INSERT INTO valid_values VALUES('true');
+INSERT INTO valid_values VALUES('false');
+
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+INSERT INTO invalid_values VALUES('\'bbb\'');
+
+--let $sys_var=ROCKSDB_CACHE_DUMP
+--let $read_only=1
+--let $session=0
+--source ../include/rocksdb_sys_var.inc
+
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_cache_high_pri_pool_ratio_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_cache_high_pri_pool_ratio_basic.test
new file mode 100644
index 00000000000..d97c4e20f82
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_cache_high_pri_pool_ratio_basic.test
@@ -0,0 +1,24 @@
+--source include/have_rocksdb.inc
+
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(1.0);
+INSERT INTO valid_values VALUES(0.0);
+INSERT INTO valid_values VALUES(0.5);
+
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES(2.0);
+INSERT INTO invalid_values VALUES(-0.5);
+INSERT INTO invalid_values VALUES('\'aaa\'');
+INSERT INTO invalid_values VALUES('\'bbb\'');
+INSERT INTO invalid_values VALUES('\'-1\'');
+INSERT INTO invalid_values VALUES('\'101\'');
+INSERT INTO invalid_values VALUES('\'484436\'');
+INSERT INTO invalid_values VALUES('\'0.5\'');
+
+--let $sys_var=ROCKSDB_CACHE_HIGH_PRI_POOL_RATIO
+--let $read_only=1
+--let $session=0
+--source ../include/rocksdb_sys_var.inc
+
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_cache_index_and_filter_blocks_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_cache_index_and_filter_blocks_basic.test
new file mode 100644
index 00000000000..27d0aa99d01
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_cache_index_and_filter_blocks_basic.test
@@ -0,0 +1,6 @@
+--source include/have_rocksdb.inc
+
+--let $sys_var=ROCKSDB_CACHE_INDEX_AND_FILTER_BLOCKS
+--let $read_only=1
+--let $session=0
+--source include/rocksdb_sys_var.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_cache_index_and_filter_with_high_priority_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_cache_index_and_filter_with_high_priority_basic.test
new file mode 100644
index 00000000000..5ed7927f233
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_cache_index_and_filter_with_high_priority_basic.test
@@ -0,0 +1,21 @@
+--source include/have_rocksdb.inc
+
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(0);
+INSERT INTO valid_values VALUES('on');
+INSERT INTO valid_values VALUES('off');
+INSERT INTO valid_values VALUES('true');
+INSERT INTO valid_values VALUES('false');
+
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+INSERT INTO invalid_values VALUES('\'bbb\'');
+
+--let $sys_var=ROCKSDB_CACHE_INDEX_AND_FILTER_WITH_HIGH_PRIORITY
+--let $read_only=1
+--let $session=0
+--source ../include/rocksdb_sys_var.inc
+
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_checksums_pct_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_checksums_pct_basic.test
new file mode 100644
index 00000000000..b595cb62a56
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_checksums_pct_basic.test
@@ -0,0 +1,17 @@
+--source include/have_rocksdb.inc
+
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(0);
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(99);
+
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+
+--let $sys_var=ROCKSDB_CHECKSUMS_PCT
+--let $read_only=0
+--let $session=1
+--source include/rocksdb_sys_var.inc
+
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_collect_sst_properties_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_collect_sst_properties_basic.test
new file mode 100644
index 00000000000..9c0e111d7b9
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_collect_sst_properties_basic.test
@@ -0,0 +1,8 @@
+--source include/have_rocksdb.inc
+
+--let $sys_var=ROCKSDB_COLLECT_SST_PROPERTIES
+--let $read_only=1
+--let $session=0
+--source include/rocksdb_sys_var.inc
+
+
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_commit_in_the_middle_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_commit_in_the_middle_basic.test
new file mode 100644
index 00000000000..ec860cfcfc2
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_commit_in_the_middle_basic.test
@@ -0,0 +1,18 @@
+--source include/have_rocksdb.inc
+
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(0);
+INSERT INTO valid_values VALUES('on');
+
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+INSERT INTO invalid_values VALUES('\'bbb\'');
+
+--let $sys_var=ROCKSDB_COMMIT_IN_THE_MIDDLE
+--let $read_only=0
+--let $session=1
+--source include/rocksdb_sys_var.inc
+
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_commit_time_batch_for_recovery_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_commit_time_batch_for_recovery_basic.test
new file mode 100644
index 00000000000..0d36e0a173d
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_commit_time_batch_for_recovery_basic.test
@@ -0,0 +1,20 @@
+--source include/have_rocksdb.inc
+
+CREATE TABLE valid_values (value varchar(255));
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(0);
+INSERT INTO valid_values VALUES('on');
+INSERT INTO valid_values VALUES('off');
+
+CREATE TABLE invalid_values (value varchar(255));
+INSERT INTO invalid_values VALUES('\'aaa\'');
+INSERT INTO invalid_values VALUES('\'bbb\'');
+
+--let $sys_var=ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY
+--let $read_only=0
+--let $session=1
+--let $sticky=1
+--source ../include/rocksdb_sys_var.inc
+
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_compact_cf_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_compact_cf_basic.test
new file mode 100644
index 00000000000..736f6754b6f
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_compact_cf_basic.test
@@ -0,0 +1,19 @@
+
+call mtr.add_suppression(" Column family '[a-z]*' not found.");
+
+--source include/have_rocksdb.inc
+
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES('abc');
+INSERT INTO valid_values VALUES('def');
+
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+
+--let $sys_var=ROCKSDB_COMPACT_CF
+--let $read_only=0
+--let $session=0
+--let $sticky=1
+--source include/rocksdb_sys_var.inc
+
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_compaction_readahead_size_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_compaction_readahead_size_basic.test
new file mode 100644
index 00000000000..c0651a3a14d
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_compaction_readahead_size_basic.test
@@ -0,0 +1,23 @@
+--source include/have_rocksdb.inc
+
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(0);
+INSERT INTO valid_values VALUES(222333);
+
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+INSERT INTO invalid_values VALUES('\'bbb\'');
+
+# Attempt to set the value to -1 - this should first truncate to 0 and then generate a warning as
+# we can't set it to or from 0
+SET @@global.rocksdb_compaction_readahead_size = -1;
+SELECT @@global.rocksdb_compaction_readahead_size;
+
+--let $sys_var=ROCKSDB_COMPACTION_READAHEAD_SIZE
+--let $read_only=0
+--let $session=0
+--source include/rocksdb_sys_var.inc
+
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_compaction_sequential_deletes_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_compaction_sequential_deletes_basic.test
new file mode 100644
index 00000000000..24399c85d88
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_compaction_sequential_deletes_basic.test
@@ -0,0 +1,18 @@
+--source include/have_rocksdb.inc
+
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(1024);
+INSERT INTO valid_values VALUES(2000000);
+
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+INSERT INTO invalid_values VALUES('\'2000001\'');
+
+--let $sys_var=ROCKSDB_COMPACTION_SEQUENTIAL_DELETES
+--let $read_only=0
+--let $session=0
+--source include/rocksdb_sys_var.inc
+
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_compaction_sequential_deletes_count_sd_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_compaction_sequential_deletes_count_sd_basic.test
new file mode 100644
index 00000000000..b3a437d6cd4
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_compaction_sequential_deletes_count_sd_basic.test
@@ -0,0 +1,18 @@
+--source include/have_rocksdb.inc
+
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(0);
+INSERT INTO valid_values VALUES('on');
+
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+INSERT INTO invalid_values VALUES('\'bbb\'');
+
+--let $sys_var=ROCKSDB_COMPACTION_SEQUENTIAL_DELETES_COUNT_SD
+--let $read_only=0
+--let $session=0
+--source include/rocksdb_sys_var.inc
+
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_compaction_sequential_deletes_file_size_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_compaction_sequential_deletes_file_size_basic.test
new file mode 100644
index 00000000000..aaf71179221
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_compaction_sequential_deletes_file_size_basic.test
@@ -0,0 +1,16 @@
+--source include/have_rocksdb.inc
+
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(1024);
+
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+
+--let $sys_var=ROCKSDB_COMPACTION_SEQUENTIAL_DELETES_FILE_SIZE
+--let $read_only=0
+--let $session=0
+--source include/rocksdb_sys_var.inc
+
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_compaction_sequential_deletes_window_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_compaction_sequential_deletes_window_basic.test
new file mode 100644
index 00000000000..d5be34695c0
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_compaction_sequential_deletes_window_basic.test
@@ -0,0 +1,18 @@
+--source include/have_rocksdb.inc
+
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(1024);
+INSERT INTO valid_values VALUES(2000000);
+
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+INSERT INTO invalid_values VALUES('\'2000001\'');
+
+--let $sys_var=ROCKSDB_COMPACTION_SEQUENTIAL_DELETES_WINDOW
+--let $read_only=0
+--let $session=0
+--source include/rocksdb_sys_var.inc
+
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_create_checkpoint_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_create_checkpoint_basic.test
new file mode 100644
index 00000000000..32d4a6a23bc
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_create_checkpoint_basic.test
@@ -0,0 +1,29 @@
+--source include/have_rocksdb.inc
+
+--eval SET @start_value = @@global.ROCKSDB_CREATE_CHECKPOINT
+
+# Test using tmp/abc
+--replace_result $MYSQL_TMP_DIR TMP
+--eval SET @@global.ROCKSDB_CREATE_CHECKPOINT = '$MYSQL_TMP_DIR/abc'
+--eval SELECT @@global.ROCKSDB_CREATE_CHECKPOINT
+--eval SET @@global.ROCKSDB_CREATE_CHECKPOINT = DEFAULT
+
+# Test using tmp/def
+--replace_result $MYSQL_TMP_DIR TMP
+--eval SET @@global.ROCKSDB_CREATE_CHECKPOINT = '$MYSQL_TMP_DIR/def'
+--eval SELECT @@global.ROCKSDB_CREATE_CHECKPOINT
+--eval SET @@global.ROCKSDB_CREATE_CHECKPOINT = DEFAULT
+
+# Should fail because it is not a session
+--Error ER_GLOBAL_VARIABLE
+--eval SET @@session.ROCKSDB_CREATE_CHECKPOINT = 444
+
+# Set back to original value
+# validate that DEFAULT causes failure in creating checkpoint since
+# DEFAULT == ''
+--error ER_GET_ERRMSG
+--eval SET @@global.ROCKSDB_CREATE_CHECKPOINT = @start_value
+
+# clean up
+--exec rm -r $MYSQL_TMP_DIR/abc
+--exec rm -r $MYSQL_TMP_DIR/def
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_create_if_missing_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_create_if_missing_basic.test
new file mode 100644
index 00000000000..ab92a0a0867
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_create_if_missing_basic.test
@@ -0,0 +1,16 @@
+--source include/have_rocksdb.inc
+
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(1024);
+
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+
+--let $sys_var=ROCKSDB_CREATE_IF_MISSING
+--let $read_only=1
+--let $session=0
+--source include/rocksdb_sys_var.inc
+
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_create_missing_column_families_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_create_missing_column_families_basic.test
new file mode 100644
index 00000000000..21c0f0ead2c
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_create_missing_column_families_basic.test
@@ -0,0 +1,16 @@
+--source include/have_rocksdb.inc
+
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(1024);
+
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+
+--let $sys_var=ROCKSDB_CREATE_MISSING_COLUMN_FAMILIES
+--let $read_only=1
+--let $session=0
+--source include/rocksdb_sys_var.inc
+
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_datadir_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_datadir_basic.test
new file mode 100644
index 00000000000..fd3569c8f0a
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_datadir_basic.test
@@ -0,0 +1,6 @@
+--source include/have_rocksdb.inc
+
+--let $sys_var=ROCKSDB_DATADIR
+--let $read_only=1
+--let $session=0
+--source include/rocksdb_sys_var.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_db_write_buffer_size_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_db_write_buffer_size_basic.test
new file mode 100644
index 00000000000..df6a24902af
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_db_write_buffer_size_basic.test
@@ -0,0 +1,6 @@
+--source include/have_rocksdb.inc
+
+--let $sys_var=ROCKSDB_DB_WRITE_BUFFER_SIZE
+--let $read_only=1
+--let $session=0
+--source include/rocksdb_sys_var.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_deadlock_detect_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_deadlock_detect_basic.test
new file mode 100644
index 00000000000..980be0f3924
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_deadlock_detect_basic.test
@@ -0,0 +1,20 @@
+--source include/have_rocksdb.inc
+
+CREATE TABLE valid_values (value varchar(255));
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(0);
+INSERT INTO valid_values VALUES('on');
+INSERT INTO valid_values VALUES('off');
+
+CREATE TABLE invalid_values (value varchar(255));
+INSERT INTO invalid_values VALUES('\'aaa\'');
+INSERT INTO invalid_values VALUES('\'bbb\'');
+
+--let $sys_var=ROCKSDB_DEADLOCK_DETECT
+--let $read_only=0
+--let $session=1
+--let $sticky=1
+--source include/rocksdb_sys_var.inc
+
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_deadlock_detect_depth_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_deadlock_detect_depth_basic.test
new file mode 100644
index 00000000000..cab72a11e18
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_deadlock_detect_depth_basic.test
@@ -0,0 +1,17 @@
+--source include/have_rocksdb.inc
+
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(100);
+INSERT INTO valid_values VALUES(2);
+
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+INSERT INTO invalid_values VALUES('\'123\'');
+
+--let $sys_var=ROCKSDB_DEADLOCK_DETECT_DEPTH
+--let $read_only=0
+--let $session=1
+--source ../include/rocksdb_sys_var.inc
+
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_debug_manual_compaction_delay_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_debug_manual_compaction_delay_basic.test
new file mode 100644
index 00000000000..518c284c0cf
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_debug_manual_compaction_delay_basic.test
@@ -0,0 +1,16 @@
+--source include/have_rocksdb.inc
+
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(2400);
+INSERT INTO valid_values VALUES(100000);
+
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+
+--let $sys_var=ROCKSDB_DEBUG_MANUAL_COMPACTION_DELAY
+--let $read_only=0
+--let $session=0
+--source ../include/rocksdb_sys_var.inc
+
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_debug_optimizer_no_zero_cardinality_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_debug_optimizer_no_zero_cardinality_basic.test
new file mode 100644
index 00000000000..41c4ae6322d
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_debug_optimizer_no_zero_cardinality_basic.test
@@ -0,0 +1,18 @@
+--source include/have_rocksdb.inc
+
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(0);
+INSERT INTO valid_values VALUES('on');
+
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+INSERT INTO invalid_values VALUES('\'bbb\'');
+
+--let $sys_var=ROCKSDB_DEBUG_OPTIMIZER_NO_ZERO_CARDINALITY
+--let $read_only=0
+--let $session=0
+--source include/rocksdb_sys_var.inc
+
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_debug_ttl_ignore_pk_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_debug_ttl_ignore_pk_basic.test
new file mode 100644
index 00000000000..8ad071e131b
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_debug_ttl_ignore_pk_basic.test
@@ -0,0 +1,18 @@
+--source include/have_rocksdb.inc
+
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(0);
+INSERT INTO valid_values VALUES('on');
+
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+INSERT INTO invalid_values VALUES('\'bbb\'');
+
+--let $sys_var=ROCKSDB_DEBUG_TTL_IGNORE_PK
+--let $read_only=0
+--let $session=0
+--source ../include/rocksdb_sys_var.inc
+
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_debug_ttl_read_filter_ts_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_debug_ttl_read_filter_ts_basic.test
new file mode 100644
index 00000000000..c3837ff1454
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_debug_ttl_read_filter_ts_basic.test
@@ -0,0 +1,16 @@
+--source include/have_rocksdb.inc
+
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(2400);
+INSERT INTO valid_values VALUES(-2400);
+
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+
+--let $sys_var=ROCKSDB_DEBUG_TTL_READ_FILTER_TS
+--let $read_only=0
+--let $session=0
+--source ../include/rocksdb_sys_var.inc
+
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_debug_ttl_rec_ts_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_debug_ttl_rec_ts_basic.test
new file mode 100644
index 00000000000..14c3e3d30aa
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_debug_ttl_rec_ts_basic.test
@@ -0,0 +1,16 @@
+--source include/have_rocksdb.inc
+
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(2400);
+INSERT INTO valid_values VALUES(-2400);
+
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+
+--let $sys_var=ROCKSDB_DEBUG_TTL_REC_TS
+--let $read_only=0
+--let $session=0
+--source ../include/rocksdb_sys_var.inc
+
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_debug_ttl_snapshot_ts_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_debug_ttl_snapshot_ts_basic.test
new file mode 100644
index 00000000000..af507fbe7db
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_debug_ttl_snapshot_ts_basic.test
@@ -0,0 +1,16 @@
+--source include/have_rocksdb.inc
+
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(2400);
+INSERT INTO valid_values VALUES(-2400);
+
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+
+--let $sys_var=ROCKSDB_DEBUG_TTL_SNAPSHOT_TS
+--let $read_only=0
+--let $session=0
+--source ../include/rocksdb_sys_var.inc
+
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_default_cf_options_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_default_cf_options_basic.test
new file mode 100644
index 00000000000..1febc6db093
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_default_cf_options_basic.test
@@ -0,0 +1,6 @@
+--source include/have_rocksdb.inc
+
+--let $sys_var=ROCKSDB_DEFAULT_CF_OPTIONS
+--let $read_only=1
+--let $session=0
+--source include/rocksdb_sys_var.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_delayed_write_rate_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_delayed_write_rate_basic.test
new file mode 100644
index 00000000000..8068b6b87e8
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_delayed_write_rate_basic.test
@@ -0,0 +1,22 @@
+--source include/have_rocksdb.inc
+
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(100);
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(0);
+
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+INSERT INTO invalid_values VALUES('\'bbb\'');
+INSERT INTO invalid_values VALUES('\'-1\'');
+INSERT INTO invalid_values VALUES('\'101\'');
+INSERT INTO invalid_values VALUES('\'484436\'');
+
+--let $sys_var=ROCKSDB_DELAYED_WRITE_RATE
+--let $read_only=0
+--let $session=0
+--source include/rocksdb_sys_var.inc
+
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
+
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_delete_cf_basic-master.opt b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_delete_cf_basic-master.opt
new file mode 100644
index 00000000000..ae43ab332ee
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_delete_cf_basic-master.opt
@@ -0,0 +1 @@
+--loose-rocksdb-dummy-option-instead-of-force-restart
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_delete_cf_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_delete_cf_basic.test
new file mode 100644
index 00000000000..0875e492b2c
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_delete_cf_basic.test
@@ -0,0 +1,75 @@
+--disable_query_log
+call mtr.add_suppression("Failed to drop column family");
+call mtr.add_suppression("Column family '[a-z_]+' not found");
+--enable_query_log
+
+--source include/have_rocksdb.inc
+
+# should fail for not existing CF
+--error ER_WRONG_VALUE_FOR_VAR
+--eval SET @@global.ROCKSDB_DELETE_CF = 'nonexistent_cf'
+
+# should fail for default system cf
+--error ER_WRONG_VALUE_FOR_VAR
+--eval SET @@global.ROCKSDB_DELETE_CF = '__system__'
+
+# should fail for default cf
+--error ER_WRONG_VALUE_FOR_VAR
+--eval SET @@global.ROCKSDB_DELETE_CF = 'default'
+
+--disable_parsing
+# should succeed for not existing CF
+--eval SET @@global.ROCKSDB_DELETE_CF = 'nonexistent_cf'
+
+# should fail for default system cf
+--error ER_WRONG_VALUE_FOR_VAR
+--eval SET @@global.ROCKSDB_DELETE_CF = '__system__'
+
+alter table mysql.slave_worker_info engine = MyISAM;
+alter table mysql.slave_relay_log_info engine = MyISAM;
+alter table mysql.slave_gtid_info engine = MyISAM;
+alter table mysql.slave_master_info engine = MyISAM;
+
+select count(*) from information_schema.rocksdb_ddl where cf = 'default';
+
+# should fail for default cf
+--error ER_GET_ERRMSG
+--eval SET @@global.ROCKSDB_DELETE_CF = 'default'
+
+CREATE TABLE cf_deletion_test_table1 (
+ id1 int(10) unsigned NOT NULL DEFAULT '0',
+ id2 int(10) unsigned NOT NULL DEFAULT '0',
+ PRIMARY KEY (id1) COMMENT 'cf_primary_key',
+ KEY `sec_key` (id2) COMMENT 'cf_secondary_key'
+) ENGINE=ROCKSDB;
+
+# should fail, CFs are still in use
+--error ER_WRONG_VALUE_FOR_VAR
+--eval SET @@global.ROCKSDB_DELETE_CF = 'cf_primary_key'
+--error ER_WRONG_VALUE_FOR_VAR
+--eval SET @@global.ROCKSDB_DELETE_CF = 'cf_secondary_key'
+
+CREATE TABLE cf_deletion_test_table2 (
+ id1 int(10) unsigned NOT NULL DEFAULT '0',
+ PRIMARY KEY (id1) COMMENT 'cf_primary_key'
+) ENGINE=ROCKSDB;
+
+DROP TABLE cf_deletion_test_table1;
+
+# should fail, still used by second table
+--error ER_WRONG_VALUE_FOR_VAR
+--eval SET @@global.ROCKSDB_DELETE_CF = 'cf_primary_key'
+
+# should succeed, no one is using it anymore
+--eval SET @@global.ROCKSDB_DELETE_CF = 'cf_secondary_key'
+
+DROP TABLE cf_deletion_test_table2;
+
+# should succeed now
+--eval SET @@global.ROCKSDB_DELETE_CF = 'cf_primary_key'
+
+alter table mysql.slave_worker_info engine = ROCKSDB;
+alter table mysql.slave_relay_log_info engine = ROCKSDB;
+alter table mysql.slave_gtid_info engine = ROCKSDB;
+alter table mysql.slave_master_info engine = ROCKSDB;
+--enable_parsing
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_delete_obsolete_files_period_micros_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_delete_obsolete_files_period_micros_basic.test
new file mode 100644
index 00000000000..3c2cd2db87f
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_delete_obsolete_files_period_micros_basic.test
@@ -0,0 +1,6 @@
+--source include/have_rocksdb.inc
+
+--let $sys_var=ROCKSDB_DELETE_OBSOLETE_FILES_PERIOD_MICROS
+--let $read_only=1
+--let $session=0
+--source include/rocksdb_sys_var.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_enable_2pc_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_enable_2pc_basic.test
new file mode 100644
index 00000000000..0a38895c35a
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_enable_2pc_basic.test
@@ -0,0 +1,20 @@
+--source include/have_rocksdb.inc
+
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(0);
+INSERT INTO valid_values VALUES('on');
+INSERT INTO valid_values VALUES('off');
+
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+INSERT INTO invalid_values VALUES('\'bbb\'');
+
+--let $sys_var=ROCKSDB_ENABLE_2PC
+--let $read_only=0
+--let $session=0
+--let $sticky=1
+--source include/rocksdb_sys_var.inc
+
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_enable_bulk_load_api_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_enable_bulk_load_api_basic.test
new file mode 100644
index 00000000000..52313ffbe70
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_enable_bulk_load_api_basic.test
@@ -0,0 +1,16 @@
+--source include/have_rocksdb.inc
+
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(1024);
+
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+
+--let $sys_var=ROCKSDB_ENABLE_BULK_LOAD_API
+--let $read_only=1
+--let $session=0
+--source include/rocksdb_sys_var.inc
+
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_enable_insert_with_update_caching_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_enable_insert_with_update_caching_basic.test
new file mode 100644
index 00000000000..cd643cfef23
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_enable_insert_with_update_caching_basic.test
@@ -0,0 +1,21 @@
+--source include/have_rocksdb.inc
+
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(0);
+INSERT INTO valid_values VALUES('on');
+INSERT INTO valid_values VALUES('off');
+
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+INSERT INTO invalid_values VALUES('\'bbb\'');
+
+--let $sys_var=ROCKSDB_ENABLE_INSERT_WITH_UPDATE_CACHING
+--let $read_only=0
+--let $session=0
+--let $sticky=1
+--source ../include/rocksdb_sys_var.inc
+
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
+
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_enable_thread_tracking_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_enable_thread_tracking_basic.test
new file mode 100644
index 00000000000..566d56563fb
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_enable_thread_tracking_basic.test
@@ -0,0 +1,6 @@
+--source include/have_rocksdb.inc
+
+--let $sys_var=ROCKSDB_ENABLE_THREAD_TRACKING
+--let $read_only=1
+--let $session=0
+--source include/rocksdb_sys_var.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_enable_ttl_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_enable_ttl_basic.test
new file mode 100644
index 00000000000..209530bd899
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_enable_ttl_basic.test
@@ -0,0 +1,18 @@
+--source include/have_rocksdb.inc
+
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(0);
+INSERT INTO valid_values VALUES('on');
+
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+INSERT INTO invalid_values VALUES('\'bbb\'');
+
+--let $sys_var=ROCKSDB_ENABLE_TTL
+--let $read_only=0
+--let $session=0
+--source include/rocksdb_sys_var.inc
+
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_enable_ttl_read_filtering_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_enable_ttl_read_filtering_basic.test
new file mode 100644
index 00000000000..cc034ed47d6
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_enable_ttl_read_filtering_basic.test
@@ -0,0 +1,18 @@
+--source include/have_rocksdb.inc
+
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(0);
+INSERT INTO valid_values VALUES('on');
+
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+INSERT INTO invalid_values VALUES('\'bbb\'');
+
+--let $sys_var=ROCKSDB_ENABLE_TTL_READ_FILTERING
+--let $read_only=0
+--let $session=0
+--source ../include/rocksdb_sys_var.inc
+
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_enable_write_thread_adaptive_yield_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_enable_write_thread_adaptive_yield_basic.test
new file mode 100644
index 00000000000..b5c844a9c93
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_enable_write_thread_adaptive_yield_basic.test
@@ -0,0 +1,5 @@
+--source include/have_rocksdb.inc
+--let $sys_var=ROCKSDB_ENABLE_WRITE_THREAD_ADAPTIVE_YIELD
+--let $read_only=1
+--let $session=0
+--source include/rocksdb_sys_var.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_error_if_exists_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_error_if_exists_basic.test
new file mode 100644
index 00000000000..933642a73a6
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_error_if_exists_basic.test
@@ -0,0 +1,16 @@
+--source include/have_rocksdb.inc
+
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(1024);
+
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+
+--let $sys_var=ROCKSDB_ERROR_IF_EXISTS
+--let $read_only=1
+--let $session=0
+--source include/rocksdb_sys_var.inc
+
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_error_on_suboptimal_collation_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_error_on_suboptimal_collation_basic.test
new file mode 100644
index 00000000000..9278fa31933
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_error_on_suboptimal_collation_basic.test
@@ -0,0 +1,6 @@
+--source include/have_rocksdb.inc
+
+--let $sys_var=ROCKSDB_ERROR_ON_SUBOPTIMAL_COLLATION
+--let $read_only=1
+--let $session=0
+--source ../include/rocksdb_sys_var.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_flush_log_at_trx_commit_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_flush_log_at_trx_commit_basic.test
new file mode 100644
index 00000000000..02c533dc7c6
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_flush_log_at_trx_commit_basic.test
@@ -0,0 +1,17 @@
+--source include/have_rocksdb.inc
+
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(2);
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(0);
+
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+
+--let $sys_var=ROCKSDB_FLUSH_LOG_AT_TRX_COMMIT
+--let $read_only=0
+--let $session=0
+--source include/rocksdb_sys_var.inc
+
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_force_compute_memtable_stats_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_force_compute_memtable_stats_basic.test
new file mode 100644
index 00000000000..318ae1ee598
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_force_compute_memtable_stats_basic.test
@@ -0,0 +1,23 @@
+--source include/have_rocksdb.inc
+--disable_warnings
+DROP TABLE IF EXISTS t;
+--enable_warnings
+
+CREATE TABLE t (a INT PRIMARY KEY, b CHAR(8)) ENGINE=rocksdb;
+INSERT INTO t (a,b) VALUES (1,'bar'),(2,'foo');
+
+SET @ORIG_FORCE_COMPUTE_MEMTABLE_STATS = @@rocksdb_force_compute_memtable_stats;
+set global rocksdb_force_flush_memtable_now = true;
+
+INSERT INTO t (a,b) VALUES (3,'dead'),(4,'beef'),(5,'a'),(6,'bbb'),(7,'c'),(8,'d');
+
+set global rocksdb_force_compute_memtable_stats=0;
+SELECT TABLE_ROWS INTO @ROWS_EXCLUDE_MEMTABLE FROM information_schema.TABLES WHERE table_name = 't';
+
+set global rocksdb_force_compute_memtable_stats=1;
+SELECT TABLE_ROWS INTO @ROWS_INCLUDE_MEMTABLE FROM information_schema.TABLES WHERE table_name = 't';
+
+select case when @ROWS_INCLUDE_MEMTABLE-@ROWS_EXCLUDE_MEMTABLE > 0 then 'true' else 'false' end;
+
+DROP TABLE t;
+set global rocksdb_force_compute_memtable_stats = @ORIG_FORCE_COMPUTE_MEMTABLE_STATS;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_force_compute_memtable_stats_cachetime_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_force_compute_memtable_stats_cachetime_basic.test
new file mode 100644
index 00000000000..20180ec16a9
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_force_compute_memtable_stats_cachetime_basic.test
@@ -0,0 +1,18 @@
+--source include/have_rocksdb.inc
+
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(0);
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(1024);
+INSERT INTO valid_values VALUES(1024 * 1024 * 1024);
+
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+
+--let $sys_var=ROCKSDB_FORCE_COMPUTE_MEMTABLE_STATS_CACHETIME
+--let $read_only=0
+--let $session=0
+--source ../include/rocksdb_sys_var.inc
+
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_force_flush_memtable_and_lzero_now_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_force_flush_memtable_and_lzero_now_basic.test
new file mode 100644
index 00000000000..5eeac501040
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_force_flush_memtable_and_lzero_now_basic.test
@@ -0,0 +1,17 @@
+--source include/have_rocksdb.inc
+
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(0);
+INSERT INTO valid_values VALUES('on');
+
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+
+--let $sys_var=ROCKSDB_FORCE_FLUSH_MEMTABLE_AND_LZERO_NOW
+--let $read_only=0
+--let $session=0
+--let $sticky=1
+--source ../include/rocksdb_sys_var.inc
+
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_force_flush_memtable_now_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_force_flush_memtable_now_basic.test
new file mode 100644
index 00000000000..4386af1ee19
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_force_flush_memtable_now_basic.test
@@ -0,0 +1,17 @@
+--source include/have_rocksdb.inc
+
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(0);
+INSERT INTO valid_values VALUES('on');
+
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+
+--let $sys_var=ROCKSDB_FORCE_FLUSH_MEMTABLE_NOW
+--let $read_only=0
+--let $session=0
+--let $sticky=1
+--source include/rocksdb_sys_var.inc
+
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_force_index_records_in_range_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_force_index_records_in_range_basic.test
new file mode 100644
index 00000000000..30263ea4aa1
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_force_index_records_in_range_basic.test
@@ -0,0 +1,23 @@
+--source include/have_rocksdb.inc
+
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(0);
+INSERT INTO valid_values VALUES(222333);
+
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+INSERT INTO invalid_values VALUES('\'bbb\'');
+
+# Attempt to set the value to -1 - this should first truncate to 0 and then generate a warning as
+# we can't set it to or from 0
+SET @@session.rocksdb_force_index_records_in_range = -1;
+SELECT @@session.rocksdb_force_index_records_in_range;
+
+--let $sys_var=ROCKSDB_FORCE_INDEX_RECORDS_IN_RANGE
+--let $read_only=0
+--let $session=1
+--source include/rocksdb_sys_var.inc
+
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_git_hash_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_git_hash_basic.test
new file mode 100644
index 00000000000..7b314e47d4b
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_git_hash_basic.test
@@ -0,0 +1,6 @@
+--source include/have_rocksdb.inc
+
+--let $sys_var=ROCKSDB_GIT_HASH
+--let $read_only=1
+--let $session=0
+--source include/rocksdb_sys_var.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_hash_index_allow_collision_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_hash_index_allow_collision_basic.test
new file mode 100644
index 00000000000..e787dd33a34
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_hash_index_allow_collision_basic.test
@@ -0,0 +1,7 @@
+--source include/have_rocksdb.inc
+
+--let $sys_var=ROCKSDB_HASH_INDEX_ALLOW_COLLISION
+--let $read_only=1
+--let $session=0
+--source include/rocksdb_sys_var.inc
+
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_ignore_unknown_options_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_ignore_unknown_options_basic.test
new file mode 100644
index 00000000000..f10ff2c6123
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_ignore_unknown_options_basic.test
@@ -0,0 +1,16 @@
+--source include/have_rocksdb.inc
+
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(1024);
+
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+
+--let $sys_var=ROCKSDB_IGNORE_UNKNOWN_OPTIONS
+--let $read_only=1
+--let $session=0
+--source ../include/rocksdb_sys_var.inc
+
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_index_type_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_index_type_basic.test
new file mode 100644
index 00000000000..49369ffd765
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_index_type_basic.test
@@ -0,0 +1,7 @@
+--source include/have_rocksdb.inc
+
+--let $sys_var=ROCKSDB_INDEX_TYPE
+--let $read_only=1
+--let $session=0
+--source include/rocksdb_sys_var.inc
+
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_info_log_level_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_info_log_level_basic.test
new file mode 100644
index 00000000000..fb2ce5e713b
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_info_log_level_basic.test
@@ -0,0 +1,21 @@
+--source include/have_rocksdb.inc
+
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES('fatal_level');
+INSERT INTO valid_values VALUES('error_level');
+INSERT INTO valid_values VALUES('warn_level');
+INSERT INTO valid_values VALUES('info_level');
+INSERT INTO valid_values VALUES('debug_level');
+
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES(5);
+INSERT INTO invalid_values VALUES(6);
+INSERT INTO invalid_values VALUES('foo');
+
+--let $sys_var=ROCKSDB_INFO_LOG_LEVEL
+--let $read_only=0
+--let $session=0
+--source include/rocksdb_sys_var.inc
+
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_io_write_timeout_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_io_write_timeout_basic.test
new file mode 100644
index 00000000000..4433eb2632d
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_io_write_timeout_basic.test
@@ -0,0 +1,20 @@
+--source include/have_rocksdb.inc
+
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(10);
+INSERT INTO valid_values VALUES(100);
+INSERT INTO valid_values VALUES(0);
+INSERT INTO valid_values VALUES(42);
+INSERT INTO valid_values VALUES(142);
+
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+INSERT INTO invalid_values VALUES('\'bbb\'');
+
+--let $sys_var=ROCKSDB_IO_WRITE_TIMEOUT
+--let $read_only=0
+--let $session=0
+--source ../include/rocksdb_sys_var.inc
+
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_is_fd_close_on_exec_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_is_fd_close_on_exec_basic.test
new file mode 100644
index 00000000000..4d39c2a3656
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_is_fd_close_on_exec_basic.test
@@ -0,0 +1,6 @@
+--source include/have_rocksdb.inc
+
+--let $sys_var=ROCKSDB_IS_FD_CLOSE_ON_EXEC
+--let $read_only=1
+--let $session=0
+--source include/rocksdb_sys_var.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_keep_log_file_num_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_keep_log_file_num_basic.test
new file mode 100644
index 00000000000..0eff718c14c
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_keep_log_file_num_basic.test
@@ -0,0 +1,7 @@
+--source include/have_rocksdb.inc
+
+--let $sys_var=ROCKSDB_KEEP_LOG_FILE_NUM
+--let $read_only=1
+--let $session=0
+--source include/rocksdb_sys_var.inc
+
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_large_prefix_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_large_prefix_basic.test
new file mode 100644
index 00000000000..5ed3c74131e
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_large_prefix_basic.test
@@ -0,0 +1,18 @@
+--source include/have_rocksdb.inc
+
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(0);
+INSERT INTO valid_values VALUES('on');
+
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+INSERT INTO invalid_values VALUES('\'bbb\'');
+
+--let $sys_var=ROCKSDB_LARGE_PREFIX
+--let $read_only=0
+--let $session=0
+--source ../include/rocksdb_sys_var.inc
+
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_lock_scanned_rows_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_lock_scanned_rows_basic.test
new file mode 100644
index 00000000000..35b4128c3e5
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_lock_scanned_rows_basic.test
@@ -0,0 +1,22 @@
+--source include/have_rocksdb.inc
+
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(0);
+INSERT INTO valid_values VALUES('on');
+INSERT INTO valid_values VALUES('off');
+INSERT INTO valid_values VALUES('true');
+INSERT INTO valid_values VALUES('false');
+
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+INSERT INTO invalid_values VALUES(2);
+INSERT INTO invalid_values VALUES(1000);
+
+--let $sys_var=ROCKSDB_LOCK_SCANNED_ROWS
+--let $read_only=0
+--let $session=1
+--source include/rocksdb_sys_var.inc
+
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_lock_wait_timeout_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_lock_wait_timeout_basic.test
new file mode 100644
index 00000000000..24096677e1b
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_lock_wait_timeout_basic.test
@@ -0,0 +1,16 @@
+--source include/have_rocksdb.inc
+
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(1024);
+
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+
+--let $sys_var=ROCKSDB_LOCK_WAIT_TIMEOUT
+--let $read_only=0
+--let $session=1
+--source include/rocksdb_sys_var.inc
+
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_log_file_time_to_roll_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_log_file_time_to_roll_basic.test
new file mode 100644
index 00000000000..63a7c5fedfb
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_log_file_time_to_roll_basic.test
@@ -0,0 +1,6 @@
+--source include/have_rocksdb.inc
+
+--let $sys_var=ROCKSDB_LOG_FILE_TIME_TO_ROLL
+--let $read_only=1
+--let $session=0
+--source include/rocksdb_sys_var.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_manifest_preallocation_size_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_manifest_preallocation_size_basic.test
new file mode 100644
index 00000000000..6f248ece9e9
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_manifest_preallocation_size_basic.test
@@ -0,0 +1,6 @@
+--source include/have_rocksdb.inc
+
+--let $sys_var=ROCKSDB_MANIFEST_PREALLOCATION_SIZE
+--let $read_only=1
+--let $session=0
+--source include/rocksdb_sys_var.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_manual_compaction_threads_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_manual_compaction_threads_basic.test
new file mode 100644
index 00000000000..4f166a9ca8e
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_manual_compaction_threads_basic.test
@@ -0,0 +1,17 @@
+--source include/have_rocksdb.inc
+
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(0);
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(99);
+
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+
+--let $sys_var=ROCKSDB_MANUAL_COMPACTION_THREADS
+--let $read_only=0
+--let $session=1
+--source ../include/rocksdb_sys_var.inc
+
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_manual_wal_flush_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_manual_wal_flush_basic.test
new file mode 100644
index 00000000000..3e01722d5ea
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_manual_wal_flush_basic.test
@@ -0,0 +1,16 @@
+--source include/have_rocksdb.inc
+
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(1024);
+
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+
+--let $sys_var=ROCKSDB_MANUAL_WAL_FLUSH
+--let $read_only=1
+--let $session=0
+--source ../include/rocksdb_sys_var.inc
+
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_master_skip_tx_api_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_master_skip_tx_api_basic.test
new file mode 100644
index 00000000000..e0d5925cad6
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_master_skip_tx_api_basic.test
@@ -0,0 +1,18 @@
+--source include/have_rocksdb.inc
+
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(0);
+INSERT INTO valid_values VALUES('on');
+
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+INSERT INTO invalid_values VALUES('\'bbb\'');
+
+--let $sys_var=ROCKSDB_MASTER_SKIP_TX_API
+--let $read_only=0
+--let $session=1
+--source include/rocksdb_sys_var.inc
+
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_max_background_jobs_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_max_background_jobs_basic.test
new file mode 100644
index 00000000000..375a4fddb93
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_max_background_jobs_basic.test
@@ -0,0 +1,16 @@
+--source include/have_rocksdb.inc
+
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(64);
+
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'abc\'');
+
+--let $sys_var=ROCKSDB_MAX_BACKGROUND_JOBS
+--let $read_only=0
+--let $session=0
+--source include/rocksdb_sys_var.inc
+
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_max_latest_deadlocks_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_max_latest_deadlocks_basic.test
new file mode 100644
index 00000000000..9917ec31d9b
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_max_latest_deadlocks_basic.test
@@ -0,0 +1,17 @@
+--source include/have_rocksdb.inc
+
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(100);
+INSERT INTO valid_values VALUES(1);
+
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+INSERT INTO invalid_values VALUES('\'123\'');
+
+--let $sys_var=ROCKSDB_MAX_LATEST_DEADLOCKS
+--let $read_only=0
+--let $session=0
+--source ../include/rocksdb_sys_var.inc
+
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_max_log_file_size_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_max_log_file_size_basic.test
new file mode 100644
index 00000000000..cbe5d925fda
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_max_log_file_size_basic.test
@@ -0,0 +1,6 @@
+--source include/have_rocksdb.inc
+
+--let $sys_var=ROCKSDB_MAX_LOG_FILE_SIZE
+--let $read_only=1
+--let $session=0
+--source include/rocksdb_sys_var.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_max_manifest_file_size_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_max_manifest_file_size_basic.test
new file mode 100644
index 00000000000..f399b296732
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_max_manifest_file_size_basic.test
@@ -0,0 +1,7 @@
+--source include/have_rocksdb.inc
+
+--let $sys_var=ROCKSDB_MAX_MANIFEST_FILE_SIZE
+--let $read_only=1
+--let $session=0
+--source include/rocksdb_sys_var.inc
+
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_max_manual_compactions_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_max_manual_compactions_basic.test
new file mode 100644
index 00000000000..1c66316858b
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_max_manual_compactions_basic.test
@@ -0,0 +1,17 @@
+--source include/have_rocksdb.inc
+
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(1024);
+INSERT INTO valid_values VALUES(512*1024*1024);
+
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+
+--let $sys_var=ROCKSDB_MAX_MANUAL_COMPACTIONS
+--let $read_only=0
+--let $session=0
+--source ../include/rocksdb_sys_var.inc
+
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_max_open_files_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_max_open_files_basic.test
new file mode 100644
index 00000000000..36996761507
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_max_open_files_basic.test
@@ -0,0 +1,8 @@
+--source include/have_rocksdb.inc
+
+# We can not use rocksdb_sys_var.inc here as this is a global, read only option
+# whose value is dependent on the servers open_files_limit. It is more fully
+# tested in the rocksdb.max_open_files test.
+
+--replace_column 2 #
+show variables like 'rocksdb_max_open_files';
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_max_row_locks_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_max_row_locks_basic.test
new file mode 100644
index 00000000000..8a26ae91411
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_max_row_locks_basic.test
@@ -0,0 +1,17 @@
+--source include/have_rocksdb.inc
+
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(1024);
+INSERT INTO valid_values VALUES(512*1024*1024);
+
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+
+--let $sys_var=ROCKSDB_MAX_ROW_LOCKS
+--let $read_only=0
+--let $session=1
+--source include/rocksdb_sys_var.inc
+
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_max_subcompactions_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_max_subcompactions_basic.test
new file mode 100644
index 00000000000..a4494dd8262
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_max_subcompactions_basic.test
@@ -0,0 +1,7 @@
+--source include/have_rocksdb.inc
+
+--let $sys_var=ROCKSDB_MAX_SUBCOMPACTIONS
+--let $read_only=1
+--let $session=0
+--source include/rocksdb_sys_var.inc
+
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_max_total_wal_size_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_max_total_wal_size_basic.test
new file mode 100644
index 00000000000..35ba859c649
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_max_total_wal_size_basic.test
@@ -0,0 +1,6 @@
+--source include/have_rocksdb.inc
+
+--let $sys_var=ROCKSDB_MAX_TOTAL_WAL_SIZE
+--let $read_only=1
+--let $session=0
+--source include/rocksdb_sys_var.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_merge_buf_size_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_merge_buf_size_basic.test
new file mode 100644
index 00000000000..8e2dda64d4a
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_merge_buf_size_basic.test
@@ -0,0 +1,50 @@
+--source include/have_rocksdb.inc
+
+--disable_warnings
+drop table if exists t1;
+--enable_warnings
+
+set session rocksdb_merge_buf_size=250;
+set session rocksdb_merge_combine_read_size=1000;
+
+CREATE TABLE t1 (i INT, j INT, PRIMARY KEY (i)) ENGINE = ROCKSDB;
+
+--disable_query_log
+let $max = 100;
+let $i = 1;
+while ($i <= $max) {
+ let $insert = INSERT INTO t1 VALUES ($i, FLOOR(RAND() * 100));
+ inc $i;
+ eval $insert;
+}
+--enable_query_log
+
+ALTER TABLE t1 ADD INDEX kj(j), ALGORITHM=INPLACE;
+ALTER TABLE t1 ADD INDEX kij(i,j), ALGORITHM=INPLACE;
+SHOW CREATE TABLE t1;
+
+DROP INDEX kj on t1;
+DROP INDEX kij ON t1;
+
+ALTER TABLE t1 ADD INDEX kj(j), ADD INDEX kij(i,j), ADD INDEX kji(j,i), ALGORITHM=INPLACE;
+SHOW CREATE TABLE t1;
+
+DROP TABLE t1;
+
+# Reverse CF testing, needs to be added to SSTFileWriter in reverse order
+CREATE TABLE t1 (a INT PRIMARY KEY, b INT) ENGINE=RocksDB;
+--disable_query_log
+let $max = 100;
+let $i = 1;
+while ($i <= $max) {
+ let $insert = INSERT INTO t1 VALUES ($i, FLOOR(RAND() * 100));
+ inc $i;
+ eval $insert;
+}
+--enable_query_log
+
+ALTER TABLE t1 ADD INDEX kb(b) comment 'rev:cf1', ALGORITHM=INPLACE;
+SHOW CREATE TABLE t1;
+SELECT COUNT(*) FROM t1 FORCE INDEX(kb);
+DROP TABLE t1;
+
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_merge_combine_read_size_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_merge_combine_read_size_basic.test
new file mode 100644
index 00000000000..48e89137344
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_merge_combine_read_size_basic.test
@@ -0,0 +1,32 @@
+--source include/have_rocksdb.inc
+
+--disable_warnings
+drop table if exists t1;
+--enable_warnings
+
+set session rocksdb_merge_buf_size=250;
+set session rocksdb_merge_combine_read_size=1000;
+
+CREATE TABLE t1 (i INT, j INT, PRIMARY KEY (i)) ENGINE = ROCKSDB;
+
+--disable_query_log
+let $max = 100;
+let $i = 1;
+while ($i <= $max) {
+ let $insert = INSERT INTO t1 VALUES ($i, $i);
+ inc $i;
+ eval $insert;
+}
+--enable_query_log
+
+ALTER TABLE t1 ADD INDEX kj(j), ALGORITHM=INPLACE;
+ALTER TABLE t1 ADD INDEX kij(i,j), ALGORITHM=INPLACE;
+SHOW CREATE TABLE t1;
+
+DROP INDEX kj on t1;
+DROP INDEX kij ON t1;
+
+ALTER TABLE t1 ADD INDEX kj(j), ADD INDEX kij(i,j), ADD INDEX kji(j,i), ALGORITHM=INPLACE;
+SHOW CREATE TABLE t1;
+
+DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_merge_tmp_file_removal_delay_ms_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_merge_tmp_file_removal_delay_ms_basic.test
new file mode 100644
index 00000000000..03cc0b11d8c
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_merge_tmp_file_removal_delay_ms_basic.test
@@ -0,0 +1,49 @@
+--source include/have_rocksdb.inc
+
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(0);
+
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+INSERT INTO invalid_values VALUES('\'bbb\'');
+INSERT INTO invalid_values VALUES('on');
+
+--let $sys_var=ROCKSDB_MERGE_TMP_FILE_REMOVAL_DELAY_MS
+--let $read_only=0
+--let $session=1
+--source ../include/rocksdb_sys_var.inc
+
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
+
+set session rocksdb_merge_buf_size=250;
+set session rocksdb_merge_combine_read_size=1000;
+set session rocksdb_merge_tmp_file_removal_delay_ms=1000;
+
+CREATE TABLE t1 (i INT, j INT, PRIMARY KEY (i)) ENGINE = ROCKSDB;
+
+--disable_query_log
+let $max = 100;
+let $i = 1;
+while ($i <= $max) {
+ let $insert = INSERT INTO t1 VALUES ($i, FLOOR(RAND() * 100));
+ inc $i;
+ eval $insert;
+}
+--enable_query_log
+
+let $start= `SELECT UNIX_TIMESTAMP()`;
+# this should take a lot longer than normal because each deleted merge file
+# will sleep for 1 secs. There should be about 13 buffers.
+# So it should take at least 13 secs
+ALTER TABLE t1 ADD INDEX kj(j), ALGORITHM=INPLACE;
+let $stop_exec= `SELECT UNIX_TIMESTAMP()`;
+
+let $time_diff= `SELECT ($stop_exec - $start)`;
+let $assert_text= Alter should have taken at least 10 seconds;
+let $assert_cond= $time_diff >= 10;
+source include/assert.inc;
+
+DROP TABLE t1;
+
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_new_table_reader_for_compaction_inputs_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_new_table_reader_for_compaction_inputs_basic.test
new file mode 100644
index 00000000000..1d2ea6e6663
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_new_table_reader_for_compaction_inputs_basic.test
@@ -0,0 +1,7 @@
+--source include/have_rocksdb.inc
+
+--let $sys_var=ROCKSDB_NEW_TABLE_READER_FOR_COMPACTION_INPUTS
+--let $read_only=1
+--let $session=0
+--source include/rocksdb_sys_var.inc
+
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_no_block_cache_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_no_block_cache_basic.test
new file mode 100644
index 00000000000..be1e3e88392
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_no_block_cache_basic.test
@@ -0,0 +1,6 @@
+--source include/have_rocksdb.inc
+
+--let $sys_var=ROCKSDB_NO_BLOCK_CACHE
+--let $read_only=1
+--let $session=0
+--source include/rocksdb_sys_var.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_override_cf_options_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_override_cf_options_basic.test
new file mode 100644
index 00000000000..1f4325b89d6
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_override_cf_options_basic.test
@@ -0,0 +1,6 @@
+--source include/have_rocksdb.inc
+
+--let $sys_var=ROCKSDB_OVERRIDE_CF_OPTIONS
+--let $read_only=1
+--let $session=0
+--source include/rocksdb_sys_var.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_paranoid_checks_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_paranoid_checks_basic.test
new file mode 100644
index 00000000000..5bdd9d3d50b
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_paranoid_checks_basic.test
@@ -0,0 +1,7 @@
+--source include/have_rocksdb.inc
+
+--let $sys_var=ROCKSDB_PARANOID_CHECKS
+--let $read_only=1
+--let $session=0
+--source include/rocksdb_sys_var.inc
+
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_pause_background_work_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_pause_background_work_basic.test
new file mode 100644
index 00000000000..3f2f6bc703e
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_pause_background_work_basic.test
@@ -0,0 +1,20 @@
+--source include/have_rocksdb.inc
+
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(0);
+INSERT INTO valid_values VALUES('on');
+INSERT INTO valid_values VALUES('off');
+
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+INSERT INTO invalid_values VALUES('\'bbb\'');
+
+--let $sys_var=ROCKSDB_PAUSE_BACKGROUND_WORK
+--let $read_only=0
+--let $session=0
+--let $sticky=1
+--source include/rocksdb_sys_var.inc
+
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_perf_context_level_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_perf_context_level_basic.test
new file mode 100644
index 00000000000..46f74578471
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_perf_context_level_basic.test
@@ -0,0 +1,18 @@
+--source include/have_rocksdb.inc
+
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(2);
+INSERT INTO valid_values VALUES(3);
+INSERT INTO valid_values VALUES(4);
+
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+
+--let $sys_var=ROCKSDB_PERF_CONTEXT_LEVEL
+--let $read_only=0
+--let $session=1
+--source include/rocksdb_sys_var.inc
+
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_persistent_cache_path_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_persistent_cache_path_basic.test
new file mode 100644
index 00000000000..1a1146a17cc
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_persistent_cache_path_basic.test
@@ -0,0 +1,16 @@
+--source include/have_rocksdb.inc
+
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES('abc');
+INSERT INTO valid_values VALUES('def');
+
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+
+--let $sys_var=ROCKSDB_PERSISTENT_CACHE_PATH
+--let $read_only=1
+--let $session=0
+--let $sticky=1
+--source include/rocksdb_sys_var.inc
+
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_persistent_cache_size_mb_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_persistent_cache_size_mb_basic.test
new file mode 100644
index 00000000000..7f21d96f62c
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_persistent_cache_size_mb_basic.test
@@ -0,0 +1,16 @@
+--source include/have_rocksdb.inc
+
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(1024);
+
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+
+--let $sys_var=ROCKSDB_PERSISTENT_CACHE_SIZE_MB
+--let $read_only=1
+--let $session=0
+--source include/rocksdb_sys_var.inc
+
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_pin_l0_filter_and_index_blocks_in_cache_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_pin_l0_filter_and_index_blocks_in_cache_basic.test
new file mode 100644
index 00000000000..d25131062d4
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_pin_l0_filter_and_index_blocks_in_cache_basic.test
@@ -0,0 +1,6 @@
+--source include/have_rocksdb.inc
+
+--let $sys_var=ROCKSDB_PIN_L0_FILTER_AND_INDEX_BLOCKS_IN_CACHE
+--let $read_only=1
+--let $session=0
+--source include/rocksdb_sys_var.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_print_snapshot_conflict_queries_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_print_snapshot_conflict_queries_basic.test
new file mode 100644
index 00000000000..24d2f182fe8
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_print_snapshot_conflict_queries_basic.test
@@ -0,0 +1,18 @@
+--source include/have_rocksdb.inc
+
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(0);
+INSERT INTO valid_values VALUES('on');
+
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+INSERT INTO invalid_values VALUES('\'bbb\'');
+
+--let $sys_var=ROCKSDB_PRINT_SNAPSHOT_CONFLICT_QUERIES
+--let $read_only=0
+--let $session=0
+--source include/rocksdb_sys_var.inc
+
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_rate_limiter_bytes_per_sec_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_rate_limiter_bytes_per_sec_basic.test
new file mode 100644
index 00000000000..8277011831a
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_rate_limiter_bytes_per_sec_basic.test
@@ -0,0 +1,63 @@
+--source include/have_rocksdb.inc
+
+# Attempt to set the value - this should generate a warning as we can't set it to or from 0
+SET @@global.rocksdb_rate_limiter_bytes_per_sec = 10000;
+
+# Now shut down and come back up with the rate limiter enabled and retest setting the variable
+
+# Write file to make mysql-test-run.pl expect the "crash", but don't restart the
+# server until it is told to
+--let $_server_id= `SELECT @@server_id`
+--let $_expect_file_name= $MYSQLTEST_VARDIR/tmp/mysqld.$_server_id.expect
+--exec echo "wait" >$_expect_file_name
+
+# Send shutdown to the connected server and give it 10 seconds to die before
+# zapping it
+shutdown_server 10;
+
+# Attempt to restart the server with the rate limiter on
+--exec echo "restart:--rocksdb_rate_limiter_bytes_per_sec=10000" >$_expect_file_name
+--sleep 5
+
+# Wait for reconnect
+--enable_reconnect
+--source include/wait_until_connected_again.inc
+--disable_reconnect
+
+# The valid_values table lists the values that we want to make sure that the system will allow
+# us to set for rocksdb_rate_limiter_bytes_per_sec
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(1), (1000), (1000000), (1000000000), (1000000000000);
+
+# The invalid_values table lists the values that we don't want to allow for the variable
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\''), (3.14);
+
+# Test all the valid and invalid values
+--let $sys_var=ROCKSDB_RATE_LIMITER_BYTES_PER_SEC
+--let $session=0
+--source include/rocksdb_sys_var.inc
+
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
+
+# Zero is an invalid value if the rate limiter is turned on, but it won't be rejected by the
+# SET command but will generate a warning.
+
+# Attempt to set the value to 0 - this should generate a warning as we can't set it to or from 0
+SET @@global.rocksdb_rate_limiter_bytes_per_sec = 0;
+
+# Attempt to set the value to -1 - this should first truncate to 0 and then generate a warning as
+# we can't set it to or from 0
+SET @@global.rocksdb_rate_limiter_bytes_per_sec = -1;
+
+# Restart the server without the rate limiter
+--exec echo "wait" >$_expect_file_name
+shutdown_server 10;
+--exec echo "restart" >$_expect_file_name
+--sleep 5
+
+# Wait for reconnect
+--enable_reconnect
+--source include/wait_until_connected_again.inc
+--disable_reconnect
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_read_free_rpl_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_read_free_rpl_basic.test
new file mode 100644
index 00000000000..f37f75b4ac5
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_read_free_rpl_basic.test
@@ -0,0 +1,19 @@
+--source include/have_rocksdb.inc
+
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES('PK_SK');
+INSERT INTO valid_values VALUES('OFF');
+INSERT INTO valid_values VALUES('PK_ONLY');
+
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('a');
+
+--let $sys_var=ROCKSDB_READ_FREE_RPL
+--let $read_only=0
+--let $session=0
+--source ../include/rocksdb_sys_var.inc
+
+SET GLOBAL ROCKSDB_READ_FREE_RPL=DEFAULT;
+
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_read_free_rpl_tables_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_read_free_rpl_tables_basic.test
new file mode 100644
index 00000000000..a2c900c91a9
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_read_free_rpl_tables_basic.test
@@ -0,0 +1,20 @@
+--source include/have_rocksdb.inc
+
+call mtr.add_suppression(".*Invalid pattern in rocksdb_read_free_rpl_tables.*");
+
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES('a');
+INSERT INTO valid_values VALUES('b');
+
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'*\'');
+
+--let $sys_var=ROCKSDB_READ_FREE_RPL_TABLES
+--let $read_only=0
+--source include/rocksdb_sys_var.inc
+
+SET GLOBAL ROCKSDB_READ_FREE_RPL_TABLES=NULL;
+SET GLOBAL ROCKSDB_READ_FREE_RPL_TABLES=DEFAULT;
+
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_records_in_range_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_records_in_range_basic.test
new file mode 100644
index 00000000000..21503475e3e
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_records_in_range_basic.test
@@ -0,0 +1,18 @@
+--source include/have_rocksdb.inc
+
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(0);
+INSERT INTO valid_values VALUES(222333);
+
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+INSERT INTO invalid_values VALUES('\'bbb\'');
+
+--let $sys_var=ROCKSDB_RECORDS_IN_RANGE
+--let $read_only=0
+--let $session=1
+--source include/rocksdb_sys_var.inc
+
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_remove_mariabackup_checkpoint_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_remove_mariabackup_checkpoint_basic.test
new file mode 100644
index 00000000000..30f38283ba4
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_remove_mariabackup_checkpoint_basic.test
@@ -0,0 +1,5 @@
+# Simulate creating and removing mariabackup checkpoint twice
+SET GLOBAL rocksdb_create_checkpoint=CONCAT(@@rocksdb_datadir,'/mariabackup-checkpoint');
+SET GLOBAL rocksdb_remove_mariabackup_checkpoint=ON;
+SET GLOBAL rocksdb_create_checkpoint=CONCAT(@@rocksdb_datadir,'/mariabackup-checkpoint');
+SET GLOBAL rocksdb_remove_mariabackup_checkpoint=ON;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_reset_stats_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_reset_stats_basic.test
new file mode 100644
index 00000000000..62f75a3bcc5
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_reset_stats_basic.test
@@ -0,0 +1,21 @@
+--source include/have_rocksdb.inc
+
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(0);
+INSERT INTO valid_values VALUES('on');
+INSERT INTO valid_values VALUES('off');
+INSERT INTO valid_values VALUES('true');
+INSERT INTO valid_values VALUES('false');
+
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+INSERT INTO invalid_values VALUES('\'bbb\'');
+
+--let $sys_var=ROCKSDB_RESET_STATS
+--let $read_only=0
+--let $session=0
+--source ../include/rocksdb_sys_var.inc
+
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_rollback_on_timeout_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_rollback_on_timeout_basic.test
new file mode 100644
index 00000000000..793b7752198
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_rollback_on_timeout_basic.test
@@ -0,0 +1,21 @@
+--source include/have_rocksdb.inc
+
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(0);
+INSERT INTO valid_values VALUES('on');
+INSERT INTO valid_values VALUES('off');
+INSERT INTO valid_values VALUES('true');
+INSERT INTO valid_values VALUES('false');
+
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+INSERT INTO invalid_values VALUES('\'bbb\'');
+
+--let $sys_var=ROCKSDB_ROLLBACK_ON_TIMEOUT
+--let $session=0
+--source ../include/rocksdb_sys_var.inc
+
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
+
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_seconds_between_stat_computes_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_seconds_between_stat_computes_basic.test
new file mode 100644
index 00000000000..53c2e6e62bf
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_seconds_between_stat_computes_basic.test
@@ -0,0 +1,18 @@
+--source include/have_rocksdb.inc
+
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(0);
+INSERT INTO valid_values VALUES(1024);
+
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+INSERT INTO invalid_values VALUES('\'bbb\'');
+
+--let $sys_var=ROCKSDB_SECONDS_BETWEEN_STAT_COMPUTES
+--let $read_only=0
+--let $session=0
+--source include/rocksdb_sys_var.inc
+
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_signal_drop_index_thread_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_signal_drop_index_thread_basic.test
new file mode 100644
index 00000000000..ea90c7b7c58
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_signal_drop_index_thread_basic.test
@@ -0,0 +1,19 @@
+--source include/have_rocksdb.inc
+
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(0);
+INSERT INTO valid_values VALUES('on');
+
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+INSERT INTO invalid_values VALUES('\'bbb\'');
+
+--let $sys_var=ROCKSDB_SIGNAL_DROP_INDEX_THREAD
+--let $read_only=0
+--let $session=0
+--let $sticky=1
+--source include/rocksdb_sys_var.inc
+
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_sim_cache_size_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_sim_cache_size_basic.test
new file mode 100644
index 00000000000..a82d50e1d03
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_sim_cache_size_basic.test
@@ -0,0 +1,6 @@
+--source include/have_rocksdb.inc
+
+--let $sys_var=ROCKSDB_SIM_CACHE_SIZE
+--let $read_only=1
+--let $session=0
+--source ../include/rocksdb_sys_var.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_skip_bloom_filter_on_read_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_skip_bloom_filter_on_read_basic.test
new file mode 100644
index 00000000000..82b56e0bbcb
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_skip_bloom_filter_on_read_basic.test
@@ -0,0 +1,18 @@
+--source include/have_rocksdb.inc
+
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(0);
+INSERT INTO valid_values VALUES('on');
+
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+INSERT INTO invalid_values VALUES('\'bbb\'');
+
+--let $sys_var=ROCKSDB_SKIP_BLOOM_FILTER_ON_READ
+--let $read_only=0
+--let $session=1
+--source include/rocksdb_sys_var.inc
+
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_skip_fill_cache_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_skip_fill_cache_basic.test
new file mode 100644
index 00000000000..cc1b608b7b3
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_skip_fill_cache_basic.test
@@ -0,0 +1,18 @@
+--source include/have_rocksdb.inc
+
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(0);
+INSERT INTO valid_values VALUES('on');
+
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+INSERT INTO invalid_values VALUES('\'bbb\'');
+
+--let $sys_var=ROCKSDB_SKIP_FILL_CACHE
+--let $read_only=0
+--let $session=1
+--source include/rocksdb_sys_var.inc
+
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_skip_unique_check_tables_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_skip_unique_check_tables_basic.test
new file mode 100644
index 00000000000..50c2354d883
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_skip_unique_check_tables_basic.test
@@ -0,0 +1,18 @@
+--source include/have_rocksdb.inc
+
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES("aaa");
+INSERT INTO valid_values VALUES("bbb");
+
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+
+--let $sys_var=ROCKSDB_SKIP_UNIQUE_CHECK_TABLES
+--let $read_only=0
+--let $session=1
+--source include/rocksdb_sys_var.inc
+
+SET GLOBAL ROCKSDB_SKIP_UNIQUE_CHECK_TABLES=NULL;
+SET GLOBAL ROCKSDB_SKIP_UNIQUE_CHECK_TABLES=DEFAULT;
+
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_sst_mgr_rate_bytes_per_sec_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_sst_mgr_rate_bytes_per_sec_basic.test
new file mode 100644
index 00000000000..3492596d74b
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_sst_mgr_rate_bytes_per_sec_basic.test
@@ -0,0 +1,22 @@
+--source include/have_rocksdb.inc
+
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(100);
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(0);
+
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+INSERT INTO invalid_values VALUES('\'bbb\'');
+INSERT INTO invalid_values VALUES('\'-1\'');
+INSERT INTO invalid_values VALUES('\'101\'');
+INSERT INTO invalid_values VALUES('\'484436\'');
+
+--let $sys_var=ROCKSDB_SST_MGR_RATE_BYTES_PER_SEC
+--let $read_only=0
+--let $session=0
+--source ../include/rocksdb_sys_var.inc
+
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
+
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_stats_dump_period_sec_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_stats_dump_period_sec_basic.test
new file mode 100644
index 00000000000..2fbb0c6ea6d
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_stats_dump_period_sec_basic.test
@@ -0,0 +1,6 @@
+--source include/have_rocksdb.inc
+
+--let $sys_var=ROCKSDB_STATS_DUMP_PERIOD_SEC
+--let $read_only=1
+--let $session=0
+--source include/rocksdb_sys_var.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_stats_level_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_stats_level_basic.test
new file mode 100644
index 00000000000..89b0878fd0c
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_stats_level_basic.test
@@ -0,0 +1,21 @@
+--source include/have_rocksdb.inc
+
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(0);
+INSERT INTO valid_values VALUES(4);
+INSERT INTO valid_values VALUES(2);
+
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+INSERT INTO invalid_values VALUES('\'bbb\'');
+INSERT INTO invalid_values VALUES('\'-1\'');
+INSERT INTO invalid_values VALUES('\'101\'');
+INSERT INTO invalid_values VALUES('\'484436\'');
+
+--let $sys_var=ROCKSDB_STATS_LEVEL
+--let $read_only=0
+--let $session=0
+--source ../include/rocksdb_sys_var.inc
+
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_stats_recalc_rate_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_stats_recalc_rate_basic.test
new file mode 100644
index 00000000000..a3b9059b2b1
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_stats_recalc_rate_basic.test
@@ -0,0 +1,17 @@
+--source include/have_rocksdb.inc
+
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(100);
+INSERT INTO valid_values VALUES(1);
+
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+INSERT INTO invalid_values VALUES('\'123\'');
+
+--let $sys_var=ROCKSDB_STATS_RECALC_RATE
+--let $read_only=0
+--let $session=0
+--source ../include/rocksdb_sys_var.inc
+
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_store_row_debug_checksums_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_store_row_debug_checksums_basic.test
new file mode 100644
index 00000000000..e3faca86717
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_store_row_debug_checksums_basic.test
@@ -0,0 +1,18 @@
+--source include/have_rocksdb.inc
+
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(0);
+INSERT INTO valid_values VALUES('on');
+
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+INSERT INTO invalid_values VALUES('\'bbb\'');
+
+--let $sys_var=ROCKSDB_STORE_ROW_DEBUG_CHECKSUMS
+--let $read_only=0
+--let $session=1
+--source include/rocksdb_sys_var.inc
+
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_strict_collation_check_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_strict_collation_check_basic.test
new file mode 100644
index 00000000000..17aa63b8bb3
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_strict_collation_check_basic.test
@@ -0,0 +1,19 @@
+--source include/have_rocksdb.inc
+
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(0);
+INSERT INTO valid_values VALUES('on');
+INSERT INTO valid_values VALUES('off');
+
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+INSERT INTO invalid_values VALUES('\'bbb\'');
+
+--let $sys_var=ROCKSDB_STRICT_COLLATION_CHECK
+--let $read_only=0
+--let $session=0
+--source include/rocksdb_sys_var.inc
+
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_strict_collation_exceptions_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_strict_collation_exceptions_basic.test
new file mode 100644
index 00000000000..4eb96488840
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_strict_collation_exceptions_basic.test
@@ -0,0 +1,35 @@
+--source include/have_rocksdb.inc
+
+# We cannot use the rocskdb_sys_var.inc script as some of the strings we set
+# need to be quoted and that doesn't work with this script. Run through
+# valid options by hand.
+
+SET @start_global_value = @@global.ROCKSDB_STRICT_COLLATION_EXCEPTIONS;
+SELECT @start_global_value;
+
+--echo "Trying to set @session.ROCKSDB_STRICT_COLLATION_EXCEPTIONS to simple table name."
+SET @@global.ROCKSDB_STRICT_COLLATION_EXCEPTIONS = mytable;
+SELECT @@global.ROCKSDB_STRICT_COLLATION_EXCEPTIONS;
+
+--echo "Trying to set @session.ROCKSDB_STRICT_COLLATION_EXCEPTIONS to regex table name(s)."
+SET @@global.ROCKSDB_STRICT_COLLATION_EXCEPTIONS = "t.*";
+SELECT @@global.ROCKSDB_STRICT_COLLATION_EXCEPTIONS;
+
+--echo "Trying to set @session.ROCKSDB_STRICT_COLLATION_EXCEPTIONS to multiple regex table names."
+SET @@global.ROCKSDB_STRICT_COLLATION_EXCEPTIONS = "s.*,t.*";
+SELECT @@global.ROCKSDB_STRICT_COLLATION_EXCEPTIONS;
+
+--echo "Trying to set @session.ROCKSDB_STRICT_COLLATION_EXCEPTIONS to empty."
+SET @@global.ROCKSDB_STRICT_COLLATION_EXCEPTIONS = "";
+SELECT @@global.ROCKSDB_STRICT_COLLATION_EXCEPTIONS;
+
+--echo "Trying to set @session.ROCKSDB_STRICT_COLLATION_EXCEPTIONS to default."
+SET @@global.ROCKSDB_STRICT_COLLATION_EXCEPTIONS = DEFAULT;
+SELECT @@global.ROCKSDB_STRICT_COLLATION_EXCEPTIONS;
+
+--echo "Trying to set @session.ROCKSDB_STRICT_COLLATION_EXCEPTIONS to 444. It should fail because it is not session."
+--Error ER_GLOBAL_VARIABLE
+SET @@session.ROCKSDB_STRICT_COLLATION_EXCEPTIONS = 444;
+
+SET @@global.ROCKSDB_STRICT_COLLATION_EXCEPTIONS = @start_global_value;
+SELECT @@global.ROCKSDB_STRICT_COLLATION_EXCEPTIONS;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_supported_compression_types_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_supported_compression_types_basic.test
new file mode 100644
index 00000000000..52bf63c21cc
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_supported_compression_types_basic.test
@@ -0,0 +1,7 @@
+--source include/have_rocksdb.inc
+
+--let $sys_var=ROCKSDB_SUPPORTED_COMPRESSION_TYPES
+--let $read_only=1
+--let $session=0
+--let $suppress_default_value=1
+--source include/rocksdb_sys_var.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_table_cache_numshardbits_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_table_cache_numshardbits_basic.test
new file mode 100644
index 00000000000..11bdd6abce8
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_table_cache_numshardbits_basic.test
@@ -0,0 +1,6 @@
+--source include/have_rocksdb.inc
+
+--let $sys_var=ROCKSDB_TABLE_CACHE_NUMSHARDBITS
+--let $read_only=1
+--let $session=0
+--source include/rocksdb_sys_var.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_table_stats_sampling_pct_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_table_stats_sampling_pct_basic.test
new file mode 100644
index 00000000000..3bed5e6ec73
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_table_stats_sampling_pct_basic.test
@@ -0,0 +1,22 @@
+--source include/have_rocksdb.inc
+
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(100);
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(0);
+
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+INSERT INTO invalid_values VALUES('\'bbb\'');
+INSERT INTO invalid_values VALUES('\'-1\'');
+INSERT INTO invalid_values VALUES('\'101\'');
+INSERT INTO invalid_values VALUES('\'484436\'');
+
+--let $sys_var=ROCKSDB_TABLE_STATS_SAMPLING_PCT
+--let $read_only=0
+--let $session=0
+--source include/rocksdb_sys_var.inc
+
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
+
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_tmpdir_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_tmpdir_basic.test
new file mode 100644
index 00000000000..8865914dd18
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_tmpdir_basic.test
@@ -0,0 +1,38 @@
+--source include/have_rocksdb.inc
+
+SET @start_global_value = @@global.rocksdb_tmpdir;
+SELECT @start_global_value;
+
+#
+# exists as global and session
+#
+select @@session.rocksdb_tmpdir;
+
+show global variables like 'rocksdb_tmpdir';
+show session variables like 'rocksdb_tmpdir';
+
+select * from information_schema.global_variables where variable_name='rocksdb_tmpdir';
+select * from information_schema.session_variables where variable_name='rocksdb_tmpdir';
+
+#
+# Show that it is writable
+#
+
+set global rocksdb_tmpdir='value';
+set session rocksdb_tmpdir='value';
+
+#
+# incorrect types
+#
+--error ER_WRONG_TYPE_FOR_VAR
+set global rocksdb_tmpdir=1.1;
+--error ER_WRONG_TYPE_FOR_VAR
+set global rocksdb_tmpdir=1e1;
+
+#
+# Cleanup
+#
+
+SET @@global.rocksdb_tmpdir = @start_global_value;
+SELECT @@global.rocksdb_tmpdir;
+
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_trace_sst_api_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_trace_sst_api_basic.test
new file mode 100644
index 00000000000..259021d31d3
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_trace_sst_api_basic.test
@@ -0,0 +1,18 @@
+--source include/have_rocksdb.inc
+
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(0);
+INSERT INTO valid_values VALUES('on');
+
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+INSERT INTO invalid_values VALUES('\'bbb\'');
+
+--let $sys_var=ROCKSDB_TRACE_SST_API
+--let $read_only=0
+--let $session=1
+--source include/rocksdb_sys_var.inc
+
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_two_write_queues_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_two_write_queues_basic.test
new file mode 100644
index 00000000000..43579faba82
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_two_write_queues_basic.test
@@ -0,0 +1,16 @@
+--source include/have_rocksdb.inc
+
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(1024);
+
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+
+--let $sys_var=ROCKSDB_TWO_WRITE_QUEUES
+--let $read_only=1
+--let $session=0
+--source ../include/rocksdb_sys_var.inc
+
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_unsafe_for_binlog_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_unsafe_for_binlog_basic.test
new file mode 100644
index 00000000000..f5f4536d769
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_unsafe_for_binlog_basic.test
@@ -0,0 +1,18 @@
+--source include/have_rocksdb.inc
+
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(0);
+INSERT INTO valid_values VALUES('on');
+
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+INSERT INTO invalid_values VALUES('\'bbb\'');
+
+--let $sys_var=ROCKSDB_UNSAFE_FOR_BINLOG
+--let $read_only=0
+--let $session=1
+--source include/rocksdb_sys_var.inc
+
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_update_cf_options.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_update_cf_options.test
new file mode 100644
index 00000000000..03626260cab
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_update_cf_options.test
@@ -0,0 +1,22 @@
+--source include/have_rocksdb.inc
+
+CREATE TABLE t1 (a INT, PRIMARY KEY (a) COMMENT 'update_cf1') ENGINE=ROCKSDB;
+SET @@GLOBAL.ROCKSDB_UPDATE_CF_OPTIONS='update_cf1={write_buffer_size=8m;target_file_size_base=2m};';
+SELECT @@global.rocksdb_update_cf_options;
+SET @@GLOBAL.ROCKSDB_UPDATE_CF_OPTIONS=NULL;
+SHOW GLOBAL VARIABLES LIKE 'rocksdb_update_cf_options';
+SET @@GLOBAL.ROCKSDB_UPDATE_CF_OPTIONS=NULL;
+SHOW GLOBAL VARIABLES LIKE 'rocksdb_update_cf_options';
+SET @@GLOBAL.ROCKSDB_UPDATE_CF_OPTIONS="";
+SHOW GLOBAL VARIABLES LIKE 'rocksdb_update_cf_options';
+SET @@GLOBAL.ROCKSDB_UPDATE_CF_OPTIONS=NULL;
+SHOW GLOBAL VARIABLES LIKE 'rocksdb_update_cf_options';
+SET @@GLOBAL.ROCKSDB_UPDATE_CF_OPTIONS='update_cf1={write_buffer_size=8m;target_file_size_base=2m};';
+SHOW GLOBAL VARIABLES LIKE 'rocksdb_update_cf_options';
+SET @@GLOBAL.ROCKSDB_UPDATE_CF_OPTIONS='update_cf2={write_buffer_size=8m;target_file_size_base=2m};';
+SHOW GLOBAL VARIABLES LIKE 'rocksdb_update_cf_options';
+DROP TABLE t1;
+SET @@GLOBAL.ROCKSDB_UPDATE_CF_OPTIONS='update_cf1={write_buffer_size=8m;target_file_size_base=2m};';
+SHOW GLOBAL VARIABLES LIKE 'rocksdb_update_cf_options';
+SET @@GLOBAL.ROCKSDB_UPDATE_CF_OPTIONS=DEFAULT;
+SHOW GLOBAL VARIABLES LIKE 'rocksdb_update_cf_options';
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_update_cf_options_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_update_cf_options_basic.test
new file mode 100644
index 00000000000..9462e40aaf0
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_update_cf_options_basic.test
@@ -0,0 +1,119 @@
+--source include/have_rocksdb.inc
+--source include/have_partition.inc
+
+call mtr.add_suppression("MyRocks: NULL is not a valid option for updates to column family settings.");
+call mtr.add_suppression("Invalid cf options, '=' expected *");
+call mtr.add_suppression("MyRocks: failed to parse the updated column family options = *");
+call mtr.add_suppression("Invalid cf config for default in override options *");
+
+DROP TABLE IF EXISTS t1;
+
+# Need a table which has multiple partitions and column families associated
+# with them to make sure that we're testing the valid scenario.
+CREATE TABLE `t1` (
+ `col1` bigint(20) NOT NULL,
+ `col2` varbinary(64) NOT NULL,
+ `col3` varbinary(256) NOT NULL,
+ `col4` bigint(20) NOT NULL,
+ `col5` mediumblob NOT NULL,
+ PRIMARY KEY (`col1`,`col2`,`col3`) COMMENT 'custom_p0_cfname=cf1;custom_p1_cfname=cf2',
+ UNIQUE KEY (`col2`, `col4`) COMMENT 'custom_p5_cfname=cf3'
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+ PARTITION BY LIST COLUMNS (`col2`) (
+ PARTITION custom_p0 VALUES IN (0x12345),
+ PARTITION custom_p1 VALUES IN (0x23456),
+ PARTITION custom_p2 VALUES IN (0x34567),
+ PARTITION custom_p3 VALUES IN (0x45678),
+ PARTITION custom_p4 VALUES IN (0x56789),
+ PARTITION custom_p5 VALUES IN (0x6789A),
+ PARTITION custom_p6 VALUES IN (0x789AB),
+ PARTITION custom_p7 VALUES IN (0x89ABC)
+);
+
+USE information_schema;
+
+# We should start with NULL.
+SELECT @@global.rocksdb_update_cf_options;
+
+# ... and we should be able to handle NULL and issue a reasonable warning.
+SET @@global.rocksdb_update_cf_options = NULL;
+SELECT @@global.rocksdb_update_cf_options;
+
+# Make sure that we do not double free the NULL string
+SET @@global.rocksdb_update_cf_options = NULL;
+SELECT @@global.rocksdb_update_cf_options;
+
+# Attempt setting an empty string
+SET @@global.rocksdb_update_cf_options = '';
+SELECT @@global.rocksdb_update_cf_options;
+
+# Will fail to parse. Value not updated.
+--Error ER_WRONG_VALUE_FOR_VAR
+--eval SET @@global.rocksdb_update_cf_options = 'aaaaa';
+SELECT @@global.rocksdb_update_cf_options;
+
+SELECT * FROM ROCKSDB_CF_OPTIONS WHERE CF_NAME='default' AND OPTION_TYPE='WRITE_BUFFER_SIZE';
+SELECT * FROM ROCKSDB_CF_OPTIONS WHERE CF_NAME='default' AND OPTION_TYPE='TARGET_FILE_SIZE_BASE';
+
+# Save these off to reset later
+--let $ORIG_WRITE_BUFFER_SIZE=`SELECT VALUE FROM ROCKSDB_CF_OPTIONS WHERE CF_NAME='default' AND OPTION_TYPE='WRITE_BUFFER_SIZE'`
+--let $ORIG_TARGET_FILE_SIZE_BASE=`SELECT VALUE FROM ROCKSDB_CF_OPTIONS WHERE CF_NAME='default' AND OPTION_TYPE='TARGET_FILE_SIZE_BASE'`
+--let $ORIG_MAX_BYTES_FOR_LEVEL_MULTIPLIER=`SELECT VALUE FROM ROCKSDB_CF_OPTIONS WHERE CF_NAME='default' AND OPTION_TYPE='MAX_BYTES_FOR_LEVEL_MULTIPLIER'`
+
+# All good. Use default CF.
+SET @@global.rocksdb_update_cf_options = 'default={write_buffer_size=8m;target_file_size_base=2m};';
+SELECT @@global.rocksdb_update_cf_options;
+
+SELECT * FROM ROCKSDB_CF_OPTIONS WHERE CF_NAME='default' AND OPTION_TYPE='WRITE_BUFFER_SIZE';
+SELECT * FROM ROCKSDB_CF_OPTIONS WHERE CF_NAME='default' AND OPTION_TYPE='TARGET_FILE_SIZE_BASE';
+
+SELECT * FROM ROCKSDB_CF_OPTIONS WHERE CF_NAME='cf1' AND OPTION_TYPE='WRITE_BUFFER_SIZE';
+SELECT * FROM ROCKSDB_CF_OPTIONS WHERE CF_NAME='cf1' AND OPTION_TYPE='TARGET_FILE_SIZE_BASE';
+
+SELECT * FROM ROCKSDB_CF_OPTIONS WHERE CF_NAME='cf2' AND OPTION_TYPE='WRITE_BUFFER_SIZE';
+SELECT * FROM ROCKSDB_CF_OPTIONS WHERE CF_NAME='cf2' AND OPTION_TYPE='MAX_BYTES_FOR_LEVEL_MULTIPLIER';
+
+SELECT * FROM ROCKSDB_CF_OPTIONS WHERE CF_NAME='cf3' AND OPTION_TYPE='TARGET_FILE_SIZE_BASE';
+
+# All good. Use multiple valid CF-s.
+SET @@global.rocksdb_update_cf_options = 'cf1={write_buffer_size=8m;target_file_size_base=2m};cf2={write_buffer_size=16m;max_bytes_for_level_multiplier=8};cf3={target_file_size_base=4m};';
+SELECT @@global.rocksdb_update_cf_options;
+
+SELECT * FROM ROCKSDB_CF_OPTIONS WHERE CF_NAME='cf1' AND OPTION_TYPE='WRITE_BUFFER_SIZE';
+SELECT * FROM ROCKSDB_CF_OPTIONS WHERE CF_NAME='cf1' AND OPTION_TYPE='TARGET_FILE_SIZE_BASE';
+
+SELECT * FROM ROCKSDB_CF_OPTIONS WHERE CF_NAME='cf2' AND OPTION_TYPE='WRITE_BUFFER_SIZE';
+SELECT * FROM ROCKSDB_CF_OPTIONS WHERE CF_NAME='cf2' AND OPTION_TYPE='MAX_BYTES_FOR_LEVEL_MULTIPLIER';
+
+SELECT * FROM ROCKSDB_CF_OPTIONS WHERE CF_NAME='cf3' AND OPTION_TYPE='TARGET_FILE_SIZE_BASE';
+
+# All good. Use a single valid CF.
+SET @@global.rocksdb_update_cf_options = 'cf3={target_file_size_base=24m};';
+SELECT @@global.rocksdb_update_cf_options;
+
+SELECT * FROM ROCKSDB_CF_OPTIONS WHERE CF_NAME='cf3' AND OPTION_TYPE='TARGET_FILE_SIZE_BASE';
+
+# Some parts are good. Value still updated.
+SET @@global.rocksdb_update_cf_options = 'cf1={target_file_size_base=24m};foo={max_bytes_for_level_multiplier=8};';
+SELECT @@global.rocksdb_update_cf_options;
+
+SELECT * FROM ROCKSDB_CF_OPTIONS WHERE CF_NAME='cf1' AND OPTION_TYPE='TARGET_FILE_SIZE_BASE';
+
+# Will fail to parse. No valid assignments included. Value not updated and
+# reset to NULL.
+--Error ER_WRONG_VALUE_FOR_VAR
+--eval SET @@global.rocksdb_update_cf_options = 'default={foo=bar};';
+SELECT @@global.rocksdb_update_cf_options;
+
+# Reset the cf options so the test passes with --repeat=2
+--eval SET @@global.rocksdb_update_cf_options = 'default={write_buffer_size=$ORIG_WRITE_BUFFER_SIZE;target_file_size_base=$ORIG_TARGET_FILE_SIZE_BASE};'
+--eval SET @@global.rocksdb_update_cf_options = 'cf1={write_buffer_size=$ORIG_WRITE_BUFFER_SIZE;target_file_size_base=$ORIG_TARGET_FILE_SIZE_BASE};'
+--eval SET @@global.rocksdb_update_cf_options = 'cf2={write_buffer_size=$ORIG_WRITE_BUFFER_SIZE;target_file_size_base=$ORIG_TARGET_FILE_SIZE_BASE;max_bytes_for_level_multiplier=$ORIG_MAX_BYTES_FOR_LEVEL_MULTIPLIER};'
+--eval SET @@global.rocksdb_update_cf_options = 'cf3={write_buffer_size=$ORIG_WRITE_BUFFER_SIZE;target_file_size_base=$ORIG_TARGET_FILE_SIZE_BASE};'
+
+SET @@global.rocksdb_update_cf_options = NULL;
+SELECT @@global.rocksdb_update_cf_options;
+
+USE test;
+
+DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_use_adaptive_mutex_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_use_adaptive_mutex_basic.test
new file mode 100644
index 00000000000..7ce7bec1f6e
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_use_adaptive_mutex_basic.test
@@ -0,0 +1,6 @@
+--source include/have_rocksdb.inc
+
+--let $sys_var=ROCKSDB_USE_ADAPTIVE_MUTEX
+--let $read_only=1
+--let $session=0
+--source include/rocksdb_sys_var.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_use_clock_cache_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_use_clock_cache_basic.test
new file mode 100644
index 00000000000..d3e7b652039
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_use_clock_cache_basic.test
@@ -0,0 +1,21 @@
+--source include/have_rocksdb.inc
+
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(0);
+INSERT INTO valid_values VALUES('on');
+INSERT INTO valid_values VALUES('off');
+INSERT INTO valid_values VALUES('true');
+INSERT INTO valid_values VALUES('false');
+
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+INSERT INTO invalid_values VALUES('\'bbb\'');
+
+--let $sys_var=ROCKSDB_USE_CLOCK_CACHE
+--let $read_only=1
+--let $session=0
+--source ../include/rocksdb_sys_var.inc
+
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_use_direct_io_for_flush_and_compaction_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_use_direct_io_for_flush_and_compaction_basic.test
new file mode 100644
index 00000000000..f5dde2aa0a3
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_use_direct_io_for_flush_and_compaction_basic.test
@@ -0,0 +1,6 @@
+--source include/have_rocksdb.inc
+
+--let $sys_var=ROCKSDB_USE_DIRECT_IO_FOR_FLUSH_AND_COMPACTION
+--let $read_only=1
+--let $session=0
+--source ../include/rocksdb_sys_var.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_use_direct_reads_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_use_direct_reads_basic.test
new file mode 100644
index 00000000000..323b517f178
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_use_direct_reads_basic.test
@@ -0,0 +1,6 @@
+--source include/have_rocksdb.inc
+
+--let $sys_var=ROCKSDB_USE_DIRECT_READS
+--let $read_only=1
+--let $session=0
+--source include/rocksdb_sys_var.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_use_fsync_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_use_fsync_basic.test
new file mode 100644
index 00000000000..90b41c4aa57
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_use_fsync_basic.test
@@ -0,0 +1,6 @@
+--source include/have_rocksdb.inc
+
+--let $sys_var=ROCKSDB_USE_FSYNC
+--let $read_only=1
+--let $session=0
+--source include/rocksdb_sys_var.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_validate_tables_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_validate_tables_basic.test
new file mode 100644
index 00000000000..ed12b319cfc
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_validate_tables_basic.test
@@ -0,0 +1,6 @@
+--source include/have_rocksdb.inc
+
+--let $sys_var=ROCKSDB_VALIDATE_TABLES
+--let $read_only=1
+--let $session=0
+--source include/rocksdb_sys_var.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_verify_row_debug_checksums_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_verify_row_debug_checksums_basic.test
new file mode 100644
index 00000000000..352bc9d9cf0
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_verify_row_debug_checksums_basic.test
@@ -0,0 +1,18 @@
+--source include/have_rocksdb.inc
+
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(0);
+INSERT INTO valid_values VALUES('on');
+
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+INSERT INTO invalid_values VALUES('\'bbb\'');
+
+--let $sys_var=ROCKSDB_VERIFY_ROW_DEBUG_CHECKSUMS
+--let $read_only=0
+--let $session=1
+--source include/rocksdb_sys_var.inc
+
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_wal_bytes_per_sync_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_wal_bytes_per_sync_basic.test
new file mode 100644
index 00000000000..9c2a1f4f391
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_wal_bytes_per_sync_basic.test
@@ -0,0 +1,22 @@
+--source include/have_rocksdb.inc
+
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(100);
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(0);
+
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+INSERT INTO invalid_values VALUES('\'bbb\'');
+INSERT INTO invalid_values VALUES('\'-1\'');
+INSERT INTO invalid_values VALUES('\'101\'');
+INSERT INTO invalid_values VALUES('\'484436\'');
+
+--let $sys_var=ROCKSDB_WAL_BYTES_PER_SYNC
+--let $read_only=0
+--let $session=0
+--source include/rocksdb_sys_var.inc
+
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
+
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_wal_dir_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_wal_dir_basic.test
new file mode 100644
index 00000000000..a40c77669f2
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_wal_dir_basic.test
@@ -0,0 +1,6 @@
+--source include/have_rocksdb.inc
+
+--let $sys_var=ROCKSDB_WAL_DIR
+--let $read_only=1
+--let $session=0
+--source include/rocksdb_sys_var.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_wal_recovery_mode_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_wal_recovery_mode_basic.test
new file mode 100644
index 00000000000..ce202f2e2b5
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_wal_recovery_mode_basic.test
@@ -0,0 +1,17 @@
+--source include/have_rocksdb.inc
+
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(0);
+
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+
+--let $sys_var=ROCKSDB_WAL_RECOVERY_MODE
+--let $read_only=0
+--let $session=0
+--source include/rocksdb_sys_var.inc
+
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
+
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_wal_size_limit_mb_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_wal_size_limit_mb_basic.test
new file mode 100644
index 00000000000..95880ea3e63
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_wal_size_limit_mb_basic.test
@@ -0,0 +1,6 @@
+--source include/have_rocksdb.inc
+
+--let $sys_var=ROCKSDB_WAL_SIZE_LIMIT_MB
+--let $read_only=1
+--let $session=0
+--source include/rocksdb_sys_var.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_wal_ttl_seconds_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_wal_ttl_seconds_basic.test
new file mode 100644
index 00000000000..e65d3851392
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_wal_ttl_seconds_basic.test
@@ -0,0 +1,6 @@
+--source include/have_rocksdb.inc
+
+--let $sys_var=ROCKSDB_WAL_TTL_SECONDS
+--let $read_only=1
+--let $session=0
+--source include/rocksdb_sys_var.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_whole_key_filtering_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_whole_key_filtering_basic.test
new file mode 100644
index 00000000000..83e8e2382a2
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_whole_key_filtering_basic.test
@@ -0,0 +1,6 @@
+--source include/have_rocksdb.inc
+
+--let $sys_var=ROCKSDB_WHOLE_KEY_FILTERING
+--let $read_only=1
+--let $session=0
+--source include/rocksdb_sys_var.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_write_batch_max_bytes_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_write_batch_max_bytes_basic.test
new file mode 100644
index 00000000000..40d22373fbe
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_write_batch_max_bytes_basic.test
@@ -0,0 +1,26 @@
+--source include/have_rocksdb.inc
+
+create table t (i int);
+
+insert into t values (1), (2), (3), (4), (5);
+
+set session rocksdb_write_batch_max_bytes = 1000;
+
+insert into t values (1), (2), (3), (4), (5);
+
+set session rocksdb_write_batch_max_bytes = 10;
+
+--error ER_GET_ERRMSG
+insert into t values (1), (2), (3), (4), (5);
+
+set session rocksdb_write_batch_max_bytes = 0;
+
+insert into t values (1), (2), (3), (4), (5);
+
+set session rocksdb_write_batch_max_bytes = 10;
+begin;
+--error ER_GET_ERRMSG
+insert into t values (1), (2), (3), (4), (5);
+rollback;
+
+drop table t;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_write_disable_wal_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_write_disable_wal_basic.test
new file mode 100644
index 00000000000..d732bebac7f
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_write_disable_wal_basic.test
@@ -0,0 +1,18 @@
+--source include/have_rocksdb.inc
+
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(0);
+INSERT INTO valid_values VALUES('on');
+INSERT INTO valid_values VALUES('off');
+
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+
+--let $sys_var=ROCKSDB_WRITE_DISABLE_WAL
+--let $read_only=0
+--let $session=1
+--source include/rocksdb_sys_var.inc
+
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_write_ignore_missing_column_families_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_write_ignore_missing_column_families_basic.test
new file mode 100644
index 00000000000..f38b7c9601d
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_write_ignore_missing_column_families_basic.test
@@ -0,0 +1,18 @@
+--source include/have_rocksdb.inc
+
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(0);
+INSERT INTO valid_values VALUES('on');
+
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+INSERT INTO invalid_values VALUES('\'bbb\'');
+
+--let $sys_var=ROCKSDB_WRITE_IGNORE_MISSING_COLUMN_FAMILIES
+--let $read_only=0
+--let $session=1
+--source include/rocksdb_sys_var.inc
+
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_write_policy_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_write_policy_basic.test
new file mode 100644
index 00000000000..720b9d67378
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_write_policy_basic.test
@@ -0,0 +1,17 @@
+--source include/have_rocksdb.inc
+
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES("write_committed");
+INSERT INTO valid_values VALUES("write_prepared");
+INSERT INTO valid_values VALUES("write_unprepared");
+
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+
+--let $sys_var=ROCKSDB_WRITE_POLICY
+--let $read_only=1
+--let $session=0
+--source ../include/rocksdb_sys_var.inc
+
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/storage_engine/cache_index.rdiff b/storage/rocksdb/mysql-test/storage_engine/cache_index.rdiff
new file mode 100644
index 00000000000..76e7705e377
--- /dev/null
+++ b/storage/rocksdb/mysql-test/storage_engine/cache_index.rdiff
@@ -0,0 +1,71 @@
+--- /data/src/bb-10.2-mdev12528/mysql-test/suite/storage_engine/cache_index.result 2017-06-22 00:33:46.419995639 +0300
++++ /data/src/bb-10.2-mdev12528/mysql-test/suite/storage_engine/cache_index.reject 2017-06-22 01:11:22.479981459 +0300
+@@ -12,31 +12,31 @@
+ SET GLOBAL <CACHE_NAME>.key_buffer_size=128*1024;
+ CACHE INDEX t1 INDEX (a), t2 IN <CACHE_NAME>;
+ Table Op Msg_type Msg_text
+-test.t1 assign_to_keycache status OK
+-test.t2 assign_to_keycache status OK
++test.t1 assign_to_keycache note The storage engine for the table doesn't support assign_to_keycache
++test.t2 assign_to_keycache note The storage engine for the table doesn't support assign_to_keycache
+ LOAD INDEX INTO CACHE t1, t2;
+ Table Op Msg_type Msg_text
+-test.t1 preload_keys status OK
+-test.t2 preload_keys status OK
++test.t1 preload_keys note The storage engine for the table doesn't support preload_keys
++test.t2 preload_keys note The storage engine for the table doesn't support preload_keys
+ INSERT INTO t1 (a,b) VALUES (3,'c'),(4,'d');
+ SET GLOBAL <CACHE_NAME>.key_buffer_size=8*1024;
+ LOAD INDEX INTO CACHE t1, t2 IGNORE LEAVES;
+ Table Op Msg_type Msg_text
+-test.t1 preload_keys status OK
+-test.t2 preload_keys status OK
++test.t1 preload_keys note The storage engine for the table doesn't support preload_keys
++test.t2 preload_keys note The storage engine for the table doesn't support preload_keys
+ SET GLOBAL <CACHE_NAME>.key_cache_age_threshold = 100, <CACHE_NAME>.key_cache_block_size = 512, <CACHE_NAME>.key_cache_division_limit = 1, <CACHE_NAME>.key_cache_segments=2;
+ INSERT INTO t1 (a,b) VALUES (5,'e'),(6,'f');
+ LOAD INDEX INTO CACHE t1;
+ Table Op Msg_type Msg_text
+-test.t1 preload_keys status OK
++test.t1 preload_keys note The storage engine for the table doesn't support preload_keys
+ SET GLOBAL new_<CACHE_NAME>.key_buffer_size=128*1024;
+ CACHE INDEX t1 IN new_<CACHE_NAME>;
+ Table Op Msg_type Msg_text
+-test.t1 assign_to_keycache status OK
++test.t1 assign_to_keycache note The storage engine for the table doesn't support assign_to_keycache
+ INSERT INTO t1 (a,b) VALUES (7,'g'),(8,'h');
+ LOAD INDEX INTO CACHE t1 IGNORE LEAVES;
+ Table Op Msg_type Msg_text
+-test.t1 preload_keys status OK
++test.t1 preload_keys note The storage engine for the table doesn't support preload_keys
+ INSERT INTO t1 (a,b) VALUES (9,'i');
+ DROP TABLE t2;
+ DROP TABLE t1;
+@@ -47,11 +47,11 @@
+ ) ENGINE=<STORAGE_ENGINE> <CUSTOM_TABLE_OPTIONS>;
+ CACHE INDEX t1 IN <CACHE_NAME>;
+ Table Op Msg_type Msg_text
+-test.t1 assign_to_keycache status OK
++test.t1 assign_to_keycache note The storage engine for the table doesn't support assign_to_keycache
+ INSERT INTO t1 (a,b) VALUES (1,'a'),(2,'b');
+ LOAD INDEX INTO CACHE t1;
+ Table Op Msg_type Msg_text
+-test.t1 preload_keys status OK
++test.t1 preload_keys note The storage engine for the table doesn't support preload_keys
+ DROP TABLE t1;
+ CREATE TABLE t1 (a <INT_COLUMN>,
+ b <CHAR_COLUMN>,
+@@ -59,11 +59,11 @@
+ ) ENGINE=<STORAGE_ENGINE> <CUSTOM_TABLE_OPTIONS>;
+ CACHE INDEX t1 IN <CACHE_NAME>;
+ Table Op Msg_type Msg_text
+-test.t1 assign_to_keycache status OK
++test.t1 assign_to_keycache note The storage engine for the table doesn't support assign_to_keycache
+ INSERT INTO t1 (a,b) VALUES (1,'a'),(2,'b');
+ LOAD INDEX INTO CACHE t1;
+ Table Op Msg_type Msg_text
+-test.t1 preload_keys status OK
++test.t1 preload_keys note The storage engine for the table doesn't support preload_keys
+ DROP TABLE t1;
+ SET GLOBAL <CACHE_NAME>.key_buffer_size=0;
+ SET GLOBAL new_<CACHE_NAME>.key_buffer_size=0;
diff --git a/storage/rocksdb/mysql-test/storage_engine/checksum_table_live.rdiff b/storage/rocksdb/mysql-test/storage_engine/checksum_table_live.rdiff
new file mode 100644
index 00000000000..094136ee926
--- /dev/null
+++ b/storage/rocksdb/mysql-test/storage_engine/checksum_table_live.rdiff
@@ -0,0 +1,13 @@
+--- /data/src/bb-10.2-mdev12528/mysql-test/suite/storage_engine/checksum_table_live.result 2017-06-22 00:33:46.419995639 +0300
++++ /data/src/bb-10.2-mdev12528/mysql-test/suite/storage_engine/checksum_table_live.reject 2017-06-22 01:12:38.695980980 +0300
+@@ -11,8 +11,8 @@
+ test.t1 4272806499
+ CHECKSUM TABLE t1, t2 QUICK;
+ Table Checksum
+-test.t1 4272806499
+-test.t2 0
++test.t1 NULL
++test.t2 NULL
+ CHECKSUM TABLE t1, t2 EXTENDED;
+ Table Checksum
+ test.t1 4272806499
diff --git a/storage/rocksdb/mysql-test/storage_engine/cleanup_engine.inc b/storage/rocksdb/mysql-test/storage_engine/cleanup_engine.inc
new file mode 100644
index 00000000000..e6fe915ed38
--- /dev/null
+++ b/storage/rocksdb/mysql-test/storage_engine/cleanup_engine.inc
@@ -0,0 +1,25 @@
+###########################################
+#
+# This is a stub of the include file cleanup_engine.inc which
+# should be placed in storage/<engine>/mysql-test/storage_engine folder.
+#
+################################
+#
+# Here you can add whatever is needed to cleanup
+# in case your define_engine.inc created any artefacts,
+# e.g. an additional schema and/or tables.
+
+--let $datadir= `SELECT @@datadir`
+
+--error 0,1
+--file_exists $datadir/\#rocksdb/*
+if (!$mysql_errno)
+{
+ --enable_reconnect
+ --exec echo "wait" > $MYSQLTEST_VARDIR/tmp/mysqld.1.expect
+ --shutdown_server
+ --source include/wait_until_disconnected.inc
+ --rmdir $datadir/\#rocksdb
+ --exec echo "restart" > $MYSQLTEST_VARDIR/tmp/mysqld.1.expect
+ --source include/wait_until_connected_again.inc
+}
diff --git a/storage/rocksdb/mysql-test/storage_engine/define_engine.inc b/storage/rocksdb/mysql-test/storage_engine/define_engine.inc
new file mode 100644
index 00000000000..1c77a6b6bb6
--- /dev/null
+++ b/storage/rocksdb/mysql-test/storage_engine/define_engine.inc
@@ -0,0 +1,45 @@
+###########################################
+#
+# This is a template of the include file define_engine.inc which
+# should be placed in storage/<engine>/mysql-test/storage_engine folder.
+#
+################################
+#
+# The name of the engine under test must be defined in $ENGINE variable.
+# You can set it either here (uncomment and edit) or in your environment.
+#
+let $ENGINE = RocksDB;
+#
+################################
+#
+# The following three variables define specific options for columns and tables.
+# Normally there should be none needed, but for some engines it can be different.
+# If the engine requires specific column option for all or indexed columns,
+# set them inside the comment, e.g. /*!NOT NULL*/.
+# Do the same for table options if needed, e.g. /*!INSERT_METHOD=LAST*/
+
+let $default_col_opts = /*!*/;
+let $default_col_indexed_opts = /*!*/;
+let $default_tbl_opts = /*!*/;
+
+# INDEX, UNIQUE INDEX, PRIMARY KEY, special index type - choose the fist that the engine allows,
+# or set it to /*!*/ if none is supported
+
+let $default_index = /*!INDEX*/;
+
+# If the engine does not support the following types, replace them with the closest possible
+
+let $default_int_type = INT(11);
+let $default_char_type = CHAR(8);
+
+################################
+
+--disable_query_log
+--disable_result_log
+
+# Here you can place your custom MTR code which needs to be executed before each test,
+# e.g. creation of an additional schema or table, etc.
+# The cleanup part should be defined in cleanup_engine.inc
+
+--enable_query_log
+--enable_result_log
diff --git a/storage/rocksdb/mysql-test/storage_engine/disabled.def b/storage/rocksdb/mysql-test/storage_engine/disabled.def
new file mode 100644
index 00000000000..7ed4fac6645
--- /dev/null
+++ b/storage/rocksdb/mysql-test/storage_engine/disabled.def
@@ -0,0 +1,27 @@
+alter_tablespace : Not supported
+autoinc_secondary : Not supported
+create_table : MDEV-12914 - Engine for temporary tables which are implicitly created as RocksDB is substituted silently
+delete_low_prio : Not supported
+foreign_keys : Not supported
+fulltext_search : Not supported
+handler : Not supported
+index_enable_disable : Not supported
+insert_delayed : Not supported
+insert_high_prio : Not supported
+insert_low_prio : Not supported
+lock : MDEV-13148 - LOCK TABLE on RocksDB table fails with a bogus error message
+lock_concurrent : MDEV-13148 - LOCK TABLE on RocksDB table fails with a bogus error message
+optimize_table : MDEV-13148 - LOCK TABLE on RocksDB table fails with a bogus error message
+repair_table : MDEV-13148 - LOCK TABLE on RocksDB table fails with a bogus error message
+select_high_prio : Not supported
+show_engine : SHOW ENGINE produces different number of lines depending on previous tests
+show_table_status : MDEV-13152 - Indeterministic row number in SHOW TABLE STATUS on RocksDB table
+tbl_opt_data_dir : Not supported
+tbl_opt_index_dir : Not supported
+type_binary_indexes : MDEV-16387 - Wrong execution plan
+type_spatial : Not supported
+type_spatial_indexes : Not supported
+update_low_prio : Not supported
+update_ignore : MDEV-13151 - Indeterministic results of multi-table update on RocksDB tables
+update_multi : MDEV-13151 - Indeterministic results of multi-table update on RocksDB tables
+vcol : Not supported
diff --git a/storage/rocksdb/mysql-test/storage_engine/index.rdiff b/storage/rocksdb/mysql-test/storage_engine/index.rdiff
new file mode 100644
index 00000000000..76d6c6dd8e4
--- /dev/null
+++ b/storage/rocksdb/mysql-test/storage_engine/index.rdiff
@@ -0,0 +1,60 @@
+--- suite/storage_engine/index.result 2017-03-12 04:57:07.169911845 +0200
++++ suite/storage_engine/index.reject 2017-08-14 22:41:06.548555663 +0300
+@@ -4,7 +4,7 @@
+ ) ENGINE=<STORAGE_ENGINE> <CUSTOM_TABLE_OPTIONS>;
+ SHOW KEYS IN t1;
+ Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment Index_comment
+-t1 1 a 1 a # # NULL NULL # BTREE
++t1 1 a 1 a # # NULL NULL # LSMTREE
+ DROP TABLE t1;
+ CREATE TABLE t1 (a <INT_COLUMN>,
+ b <CHAR_COLUMN>,
+@@ -12,8 +12,8 @@
+ ) ENGINE=<STORAGE_ENGINE> <CUSTOM_TABLE_OPTIONS>;
+ SHOW KEYS IN t1;
+ Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment Index_comment
+-t1 1 a_b 1 a # # NULL NULL # BTREE a_b index
+-t1 1 a_b 2 b # # NULL NULL # BTREE a_b index
++t1 1 a_b 1 a # # NULL NULL # LSMTREE a_b index
++t1 1 a_b 2 b # # NULL NULL # LSMTREE a_b index
+ DROP TABLE t1;
+ CREATE TABLE t1 (a <INT_COLUMN>,
+ b <CHAR_COLUMN>,
+@@ -22,8 +22,8 @@
+ ) ENGINE=<STORAGE_ENGINE> <CUSTOM_TABLE_OPTIONS>;
+ SHOW KEYS IN t1;
+ Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment Index_comment
+-t1 1 a 1 a # # NULL NULL # BTREE
+-t1 1 b 1 b # # NULL NULL # BTREE
++t1 1 a 1 a # # NULL NULL # LSMTREE
++t1 1 b 1 b # # NULL NULL # LSMTREE
+ DROP TABLE t1;
+ CREATE TABLE t1 (a <INT_COLUMN>,
+ b <CHAR_COLUMN>,
+@@ -31,7 +31,7 @@
+ ) ENGINE=<STORAGE_ENGINE> <CUSTOM_TABLE_OPTIONS>;
+ SHOW KEYS IN t1;
+ Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment Index_comment
+-t1 0 a 1 a # # NULL NULL # BTREE
++t1 0 a 1 a # # NULL NULL # LSMTREE
+ INSERT INTO t1 (a,b) VALUES (1,'a'),(2,'b');
+ INSERT INTO t1 (a,b) VALUES (1,'c');
+ ERROR 23000: Duplicate entry '1' for key 'a'
+@@ -43,7 +43,7 @@
+ ALTER TABLE t1 ADD <CUSTOM_INDEX> (a) COMMENT 'simple index on a';
+ SHOW INDEX FROM t1;
+ Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment Index_comment
+-t1 1 a 1 a # # NULL NULL # BTREE simple index on a
++t1 1 a 1 a # # NULL NULL # LSMTREE simple index on a
+ ALTER TABLE t1 DROP KEY a;
+ DROP TABLE t1;
+ CREATE TABLE t1 (a <INT_COLUMN>,
+@@ -52,7 +52,7 @@
+ ) ENGINE=<STORAGE_ENGINE> <CUSTOM_TABLE_OPTIONS>;
+ SHOW KEYS IN t1;
+ Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment Index_comment
+-t1 0 a 1 a # # NULL NULL # BTREE
++t1 0 a 1 a # # NULL NULL # LSMTREE
+ INSERT INTO t1 (a,b) VALUES (1,'a'),(2,'b');
+ INSERT INTO t1 (a,b) VALUES (1,'c');
+ ERROR 23000: Duplicate entry '1' for key 'a'
diff --git a/storage/rocksdb/mysql-test/storage_engine/index_type_btree.rdiff b/storage/rocksdb/mysql-test/storage_engine/index_type_btree.rdiff
new file mode 100644
index 00000000000..5fcffbea13b
--- /dev/null
+++ b/storage/rocksdb/mysql-test/storage_engine/index_type_btree.rdiff
@@ -0,0 +1,60 @@
+--- suite/storage_engine/index_type_btree.result 2017-03-12 04:57:07.169911845 +0200
++++ suite/storage_engine/index_type_btree.reject 2017-08-14 22:50:47.264555216 +0300
+@@ -4,7 +4,7 @@
+ ) ENGINE=<STORAGE_ENGINE> <CUSTOM_TABLE_OPTIONS>;
+ SHOW KEYS IN t1;
+ Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment Index_comment
+-t1 1 a 1 a # # NULL NULL # BTREE
++t1 1 a 1 a # # NULL NULL # LSMTREE
+ DROP TABLE t1;
+ CREATE TABLE t1 (a <INT_COLUMN>,
+ b <CHAR_COLUMN>,
+@@ -12,8 +12,8 @@
+ ) ENGINE=<STORAGE_ENGINE> <CUSTOM_TABLE_OPTIONS>;
+ SHOW KEYS IN t1;
+ Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment Index_comment
+-t1 1 a_b 1 a # # NULL NULL # BTREE a_b index
+-t1 1 a_b 2 b # # NULL NULL # BTREE a_b index
++t1 1 a_b 1 a # # NULL NULL # LSMTREE a_b index
++t1 1 a_b 2 b # # NULL NULL # LSMTREE a_b index
+ DROP TABLE t1;
+ CREATE TABLE t1 (a <INT_COLUMN>,
+ b <CHAR_COLUMN>,
+@@ -22,8 +22,8 @@
+ ) ENGINE=<STORAGE_ENGINE> <CUSTOM_TABLE_OPTIONS>;
+ SHOW KEYS IN t1;
+ Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment Index_comment
+-t1 1 a 1 a # # NULL NULL # BTREE
+-t1 1 b 1 b # # NULL NULL # BTREE
++t1 1 a 1 a # # NULL NULL # LSMTREE
++t1 1 b 1 b # # NULL NULL # LSMTREE
+ DROP TABLE t1;
+ CREATE TABLE t1 (a <INT_COLUMN>,
+ b <CHAR_COLUMN>,
+@@ -31,7 +31,7 @@
+ ) ENGINE=<STORAGE_ENGINE> <CUSTOM_TABLE_OPTIONS>;
+ SHOW KEYS IN t1;
+ Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment Index_comment
+-t1 0 a 1 a # # NULL NULL # BTREE
++t1 0 a 1 a # # NULL NULL # LSMTREE
+ INSERT INTO t1 (a,b) VALUES (1,'a'),(2,'b');
+ INSERT INTO t1 (a,b) VALUES (1,'c');
+ ERROR 23000: Duplicate entry '1' for key 'a'
+@@ -43,7 +43,7 @@
+ ALTER TABLE t1 ADD <CUSTOM_INDEX> (a) USING BTREE COMMENT 'simple index on a';
+ SHOW INDEX FROM t1;
+ Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment Index_comment
+-t1 1 a 1 a # # NULL NULL # BTREE simple index on a
++t1 1 a 1 a # # NULL NULL # LSMTREE simple index on a
+ ALTER TABLE t1 DROP KEY a;
+ DROP TABLE t1;
+ CREATE TABLE t1 (a <INT_COLUMN>,
+@@ -52,7 +52,7 @@
+ ) ENGINE=<STORAGE_ENGINE> <CUSTOM_TABLE_OPTIONS>;
+ SHOW KEYS IN t1;
+ Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment Index_comment
+-t1 0 a 1 a # # NULL NULL # BTREE
++t1 0 a 1 a # # NULL NULL # LSMTREE
+ INSERT INTO t1 (a,b) VALUES (1,'a'),(2,'b');
+ INSERT INTO t1 (a,b) VALUES (1,'c');
+ ERROR 23000: Duplicate entry '1' for key 'a'
diff --git a/storage/rocksdb/mysql-test/storage_engine/index_type_hash.rdiff b/storage/rocksdb/mysql-test/storage_engine/index_type_hash.rdiff
new file mode 100644
index 00000000000..815b2983b87
--- /dev/null
+++ b/storage/rocksdb/mysql-test/storage_engine/index_type_hash.rdiff
@@ -0,0 +1,60 @@
+--- suite/storage_engine/index_type_hash.result 2017-03-12 04:57:07.169911845 +0200
++++ suite/storage_engine/index_type_hash.reject 2017-08-14 22:51:55.644555163 +0300
+@@ -4,7 +4,7 @@
+ ) ENGINE=<STORAGE_ENGINE> <CUSTOM_TABLE_OPTIONS>;
+ SHOW KEYS IN t1;
+ Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment Index_comment
+-t1 1 a 1 a # # NULL NULL # HASH
++t1 1 a 1 a # # NULL NULL # LSMTREE
+ DROP TABLE t1;
+ CREATE TABLE t1 (a <INT_COLUMN>,
+ b <CHAR_COLUMN>,
+@@ -12,8 +12,8 @@
+ ) ENGINE=<STORAGE_ENGINE> <CUSTOM_TABLE_OPTIONS>;
+ SHOW KEYS IN t1;
+ Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment Index_comment
+-t1 1 a_b 1 a # # NULL NULL # HASH a_b index
+-t1 1 a_b 2 b # # NULL NULL # HASH a_b index
++t1 1 a_b 1 a # # NULL NULL # LSMTREE a_b index
++t1 1 a_b 2 b # # NULL NULL # LSMTREE a_b index
+ DROP TABLE t1;
+ CREATE TABLE t1 (a <INT_COLUMN>,
+ b <CHAR_COLUMN>,
+@@ -22,8 +22,8 @@
+ ) ENGINE=<STORAGE_ENGINE> <CUSTOM_TABLE_OPTIONS>;
+ SHOW KEYS IN t1;
+ Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment Index_comment
+-t1 1 a 1 a # # NULL NULL # HASH
+-t1 1 b 1 b # # NULL NULL # HASH
++t1 1 a 1 a # # NULL NULL # LSMTREE
++t1 1 b 1 b # # NULL NULL # LSMTREE
+ DROP TABLE t1;
+ CREATE TABLE t1 (a <INT_COLUMN>,
+ b <CHAR_COLUMN>,
+@@ -31,7 +31,7 @@
+ ) ENGINE=<STORAGE_ENGINE> <CUSTOM_TABLE_OPTIONS>;
+ SHOW KEYS IN t1;
+ Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment Index_comment
+-t1 0 a 1 a # # NULL NULL # HASH
++t1 0 a 1 a # # NULL NULL # LSMTREE
+ INSERT INTO t1 (a,b) VALUES (1,'a'),(2,'b');
+ INSERT INTO t1 (a,b) VALUES (1,'c');
+ ERROR 23000: Duplicate entry '1' for key 'a'
+@@ -43,7 +43,7 @@
+ ALTER TABLE t1 ADD <CUSTOM_INDEX> (a) USING HASH COMMENT 'simple index on a';
+ SHOW INDEX FROM t1;
+ Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment Index_comment
+-t1 1 a 1 a # # NULL NULL # HASH simple index on a
++t1 1 a 1 a # # NULL NULL # LSMTREE simple index on a
+ ALTER TABLE t1 DROP KEY a;
+ DROP TABLE t1;
+ CREATE TABLE t1 (a <INT_COLUMN>,
+@@ -52,7 +52,7 @@
+ ) ENGINE=<STORAGE_ENGINE> <CUSTOM_TABLE_OPTIONS>;
+ SHOW KEYS IN t1;
+ Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment Index_comment
+-t1 0 a 1 a # # NULL NULL # HASH
++t1 0 a 1 a # # NULL NULL # LSMTREE
+ INSERT INTO t1 (a,b) VALUES (1,'a'),(2,'b');
+ INSERT INTO t1 (a,b) VALUES (1,'c');
+ ERROR 23000: Duplicate entry '1' for key 'a'
diff --git a/storage/rocksdb/mysql-test/storage_engine/mask_engine.inc b/storage/rocksdb/mysql-test/storage_engine/mask_engine.inc
new file mode 100644
index 00000000000..fc6cd02e3ef
--- /dev/null
+++ b/storage/rocksdb/mysql-test/storage_engine/mask_engine.inc
@@ -0,0 +1,15 @@
+#
+# This include file just replaces the storage engine under test by the generic string <STORAGE_ENGINE>
+# in the next statement. More masks can be added by defining $add_regex, e.g.
+# let $add_regex = /$data_dir/<DATA_DIR>/ /$index_dir/<INDEX_DIR>/
+#
+
+--let $regex = /$storage_engine/<STORAGE_ENGINE>/i / COLLATE[= ]latin1_bin//
+if ($add_regex)
+{
+ --let $regex = $regex $add_regex
+}
+
+--let $add_regex =
+--replace_regex $regex
+
diff --git a/storage/rocksdb/mysql-test/storage_engine/misc.rdiff b/storage/rocksdb/mysql-test/storage_engine/misc.rdiff
new file mode 100644
index 00000000000..694f3f54815
--- /dev/null
+++ b/storage/rocksdb/mysql-test/storage_engine/misc.rdiff
@@ -0,0 +1,25 @@
+--- /data/src/bb-10.2-mdev12528/mysql-test/suite/storage_engine/misc.result 2017-06-22 00:33:46.419995639 +0300
++++ /data/src/bb-10.2-mdev12528/mysql-test/suite/storage_engine/misc.reject 2017-06-22 02:34:23.647950149 +0300
+@@ -28,6 +28,9 @@
+ SELECT TABLE_NAME, COLUMN_NAME, REFERENCED_TABLE_NAME, REFERENCED_COLUMN_NAME
+ FROM INFORMATION_SCHEMA.KEY_COLUMN_USAGE ORDER BY TABLE_NAME;
+ TABLE_NAME COLUMN_NAME REFERENCED_TABLE_NAME REFERENCED_COLUMN_NAME
++Warning 1286 Unknown storage engine 'InnoDB'
++Warning 1286 Unknown storage engine 'InnoDB'
++Warnings:
+ column_stats column_name NULL NULL
+ column_stats db_name NULL NULL
+ column_stats table_name NULL NULL
+@@ -58,12 +61,6 @@
+ index_stats index_name NULL NULL
+ index_stats prefix_arity NULL NULL
+ index_stats table_name NULL NULL
+-innodb_index_stats database_name NULL NULL
+-innodb_index_stats index_name NULL NULL
+-innodb_index_stats stat_name NULL NULL
+-innodb_index_stats table_name NULL NULL
+-innodb_table_stats database_name NULL NULL
+-innodb_table_stats table_name NULL NULL
+ plugin name NULL NULL
+ proc db NULL NULL
+ proc name NULL NULL
diff --git a/storage/rocksdb/mysql-test/storage_engine/parts/checksum_table.rdiff b/storage/rocksdb/mysql-test/storage_engine/parts/checksum_table.rdiff
new file mode 100644
index 00000000000..bf3347a4341
--- /dev/null
+++ b/storage/rocksdb/mysql-test/storage_engine/parts/checksum_table.rdiff
@@ -0,0 +1,13 @@
+--- /data/src/bb-10.2-mdev12528/mysql-test/suite/storage_engine/parts/checksum_table.result 2017-06-22 00:33:46.419995639 +0300
++++ /data/src/bb-10.2-mdev12528/mysql-test/suite/storage_engine/parts/checksum_table.reject 2017-06-22 19:25:02.935568998 +0300
+@@ -31,8 +31,8 @@
+ test.t1 4272806499
+ CHECKSUM TABLE t1, t2 QUICK;
+ Table Checksum
+-test.t1 4272806499
+-test.t2 0
++test.t1 NULL
++test.t2 NULL
+ CHECKSUM TABLE t1, t2 EXTENDED;
+ Table Checksum
+ test.t1 4272806499
diff --git a/storage/rocksdb/mysql-test/storage_engine/parts/create_table.rdiff b/storage/rocksdb/mysql-test/storage_engine/parts/create_table.rdiff
new file mode 100644
index 00000000000..b2cb47a0927
--- /dev/null
+++ b/storage/rocksdb/mysql-test/storage_engine/parts/create_table.rdiff
@@ -0,0 +1,20 @@
+--- /data/src/bb-10.2-mdev12528/mysql-test/suite/storage_engine/parts/create_table.result 2017-06-22 00:33:46.419995639 +0300
++++ /data/src/bb-10.2-mdev12528/mysql-test/suite/storage_engine/parts/create_table.reject 2017-06-22 19:25:05.335568983 +0300
+@@ -65,7 +65,7 @@
+ 1 SIMPLE t1 abc,def # # # # # # #
+ EXPLAIN PARTITIONS SELECT a FROM t1 WHERE a = 100;
+ id select_type table partitions type possible_keys key key_len ref rows Extra
+-1 SIMPLE NULL NULL # # # # # # #
++1 SIMPLE t1 def # # # # # # #
+ INSERT INTO t1 (a) VALUES (50);
+ ERROR HY000: Table has no partition for value 50
+ DROP TABLE t1;
+@@ -81,7 +81,7 @@
+ 1 SIMPLE t1 abc_abcsp0,def_defsp0 # # # # # # #
+ EXPLAIN PARTITIONS SELECT a FROM t1 WHERE a = 100;
+ id select_type table partitions type possible_keys key key_len ref rows Extra
+-1 SIMPLE NULL NULL # # # # # # #
++1 SIMPLE t1 def_defsp0 # # # # # # #
+ SELECT TABLE_SCHEMA, TABLE_NAME, PARTITION_NAME, SUBPARTITION_NAME, PARTITION_METHOD, SUBPARTITION_METHOD
+ FROM INFORMATION_SCHEMA.PARTITIONS WHERE TABLE_NAME = 't1';
+ TABLE_SCHEMA TABLE_NAME PARTITION_NAME SUBPARTITION_NAME PARTITION_METHOD SUBPARTITION_METHOD
diff --git a/storage/rocksdb/mysql-test/storage_engine/parts/disabled.def b/storage/rocksdb/mysql-test/storage_engine/parts/disabled.def
new file mode 100644
index 00000000000..ef8ad5b3c82
--- /dev/null
+++ b/storage/rocksdb/mysql-test/storage_engine/parts/disabled.def
@@ -0,0 +1,3 @@
+alter_table : MDEV-13153 - Assertion `global_status_var.global_memory_used == 0'
+optimize_table : MDEV-13148 - LOCK TABLE on RocksDB table fails with a bogus error message
+repair_table : MDEV-13148 - LOCK TABLE on RocksDB table fails with a bogus error message
diff --git a/storage/rocksdb/mysql-test/storage_engine/parts/suite.opt b/storage/rocksdb/mysql-test/storage_engine/parts/suite.opt
new file mode 100644
index 00000000000..d77a822766f
--- /dev/null
+++ b/storage/rocksdb/mysql-test/storage_engine/parts/suite.opt
@@ -0,0 +1 @@
+--ignore-db-dirs=#rocksdb --plugin-load=$HA_ROCKSDB_SO --binlog_format=ROW --loose-rocksdb_flush_log_at_trx_commit=0
diff --git a/storage/rocksdb/mysql-test/storage_engine/show_engine.rdiff b/storage/rocksdb/mysql-test/storage_engine/show_engine.rdiff
new file mode 100644
index 00000000000..15a9bb6f171
--- /dev/null
+++ b/storage/rocksdb/mysql-test/storage_engine/show_engine.rdiff
@@ -0,0 +1,15 @@
+--- suite/storage_engine/show_engine.result 2017-03-12 04:57:07.169911845 +0200
++++ suite/storage_engine/show_engine.reject 2017-08-14 22:58:15.508554871 +0300
+@@ -4,7 +4,11 @@
+ # volatile data (timestamps, memory info, etc.)
+ SHOW ENGINE <STORAGE_ENGINE> STATUS;
+ Type Name Status
+-<STORAGE_ENGINE> ### Engine status, can be long and changeable ###
++STATISTICS <STORAGE_ENGINE> ### Engine status, can be long and changeable ###
++DBSTATS <STORAGE_ENGINE> ### Engine status, can be long and changeable ###
++CF_COMPACTION __system__ ### Engine status, can be long and changeable ###
++CF_COMPACTION default ### Engine status, can be long and changeable ###
++MEMORY_STATS <STORAGE_ENGINE> ### Engine status, can be long and changeable ###
+ # For SHOW MUTEX even the number of lines is volatile, so the result logging is disabled,
+ # the test only checks that the command does not produce any errors
+ SHOW ENGINE <STORAGE_ENGINE> MUTEX;
diff --git a/storage/rocksdb/mysql-test/storage_engine/show_table_status.rdiff b/storage/rocksdb/mysql-test/storage_engine/show_table_status.rdiff
new file mode 100644
index 00000000000..d7252eb54ed
--- /dev/null
+++ b/storage/rocksdb/mysql-test/storage_engine/show_table_status.rdiff
@@ -0,0 +1,20 @@
+--- /data/src/bb-10.2-mdev12528/mysql-test/suite/storage_engine/show_table_status.result 2017-06-22 00:33:46.423995639 +0300
++++ /data/src/bb-10.2-mdev12528/mysql-test/suite/storage_engine/show_table_status.reject 2017-06-22 14:04:10.723690009 +0300
+@@ -19,7 +19,7 @@
+ Create_time ###
+ Update_time ###
+ Check_time NULL
+-Collation latin1_swedish_ci
++Collation latin1_bin
+ Checksum NULL
+ Create_options
+ Comment
+@@ -37,7 +37,7 @@
+ Create_time ###
+ Update_time ###
+ Check_time NULL
+-Collation latin1_swedish_ci
++Collation latin1_bin
+ Checksum NULL
+ Create_options
+ Comment
diff --git a/storage/rocksdb/mysql-test/storage_engine/suite.opt b/storage/rocksdb/mysql-test/storage_engine/suite.opt
new file mode 100644
index 00000000000..e6122c7ed3e
--- /dev/null
+++ b/storage/rocksdb/mysql-test/storage_engine/suite.opt
@@ -0,0 +1 @@
+--ignore-db-dirs=#rocksdb --plugin-load=$HA_ROCKSDB_SO --binlog_format=ROW --collation-server=latin1_bin --loose-rocksdb_flush_log_at_trx_commit=0
diff --git a/storage/rocksdb/mysql-test/storage_engine/tbl_opt_insert_method.rdiff b/storage/rocksdb/mysql-test/storage_engine/tbl_opt_insert_method.rdiff
new file mode 100644
index 00000000000..20f594fbb40
--- /dev/null
+++ b/storage/rocksdb/mysql-test/storage_engine/tbl_opt_insert_method.rdiff
@@ -0,0 +1,11 @@
+--- /data/src/bb-10.2-mdev12528/mysql-test/suite/storage_engine/tbl_opt_insert_method.result 2017-06-22 00:33:46.423995639 +0300
++++ /data/src/bb-10.2-mdev12528/mysql-test/suite/storage_engine/tbl_opt_insert_method.reject 2017-06-22 02:39:45.243948128 +0300
+@@ -5,7 +5,7 @@
+ t1 CREATE TABLE `t1` (
+ `a` int(11) DEFAULT NULL,
+ `b` char(8) DEFAULT NULL
+-) ENGINE=<STORAGE_ENGINE> DEFAULT CHARSET=latin1 INSERT_METHOD=FIRST
++) ENGINE=<STORAGE_ENGINE> DEFAULT CHARSET=latin1
+ ALTER TABLE t1 INSERT_METHOD=NO;
+ SHOW CREATE TABLE t1;
+ Table Create Table
diff --git a/storage/rocksdb/mysql-test/storage_engine/tbl_opt_union.rdiff b/storage/rocksdb/mysql-test/storage_engine/tbl_opt_union.rdiff
new file mode 100644
index 00000000000..0d65ad0744a
--- /dev/null
+++ b/storage/rocksdb/mysql-test/storage_engine/tbl_opt_union.rdiff
@@ -0,0 +1,16 @@
+--- /data/src/bb-10.2-mdev12528/mysql-test/suite/storage_engine/tbl_opt_union.result 2017-06-22 00:33:46.423995639 +0300
++++ /data/src/bb-10.2-mdev12528/mysql-test/suite/storage_engine/tbl_opt_union.reject 2017-06-22 02:41:02.719947641 +0300
+@@ -4,11 +4,11 @@
+ Table Create Table
+ t1 CREATE TABLE `t1` (
+ `a` int(11) DEFAULT NULL
+-) ENGINE=<STORAGE_ENGINE> DEFAULT CHARSET=latin1 UNION=(`child1`)
++) ENGINE=<STORAGE_ENGINE> DEFAULT CHARSET=latin1
+ ALTER TABLE t1 UNION = (child1,child2);
+ SHOW CREATE TABLE t1;
+ Table Create Table
+ t1 CREATE TABLE `t1` (
+ `a` int(11) DEFAULT NULL
+-) ENGINE=<STORAGE_ENGINE> DEFAULT CHARSET=latin1 UNION=(`child1`,`child2`)
++) ENGINE=<STORAGE_ENGINE> DEFAULT CHARSET=latin1
+ DROP TABLE t1, child1, child2;
diff --git a/storage/rocksdb/mysql-test/storage_engine/tbl_temporary.rdiff b/storage/rocksdb/mysql-test/storage_engine/tbl_temporary.rdiff
new file mode 100644
index 00000000000..d24806e7c9f
--- /dev/null
+++ b/storage/rocksdb/mysql-test/storage_engine/tbl_temporary.rdiff
@@ -0,0 +1,24 @@
+--- /data/src/bb-10.2-mdev12528/mysql-test/suite/storage_engine/tbl_temporary.result 2017-06-22 00:33:46.423995639 +0300
++++ /data/src/bb-10.2-mdev12528/mysql-test/suite/storage_engine/tbl_temporary.reject 2017-06-22 15:27:50.643658456 +0300
+@@ -1,11 +1,14 @@
+ DROP TABLE IF EXISTS t1;
+ CREATE TABLE t1 (c CHAR(1)) ENGINE=MyISAM;
+ CREATE TEMPORARY TABLE t1 (a <INT_COLUMN>, b <CHAR_COLUMN>) ENGINE=<STORAGE_ENGINE> <CUSTOM_TABLE_OPTIONS>;
+-SHOW CREATE TABLE t1;
+-Table Create Table
+-t1 CREATE TEMPORARY TABLE `t1` (
+- `a` int(11) DEFAULT NULL,
+- `b` char(8) DEFAULT NULL
+-) ENGINE=<STORAGE_ENGINE> DEFAULT CHARSET=latin1
+-DROP TEMPORARY TABLE t1;
++ERROR HY000: Table storage engine 'ROCKSDB' does not support the create option 'TEMPORARY'
++# ERROR: Statement ended with errno 1478, errname ER_ILLEGAL_HA_CREATE_OPTION (expected to succeed)
++# ------------ UNEXPECTED RESULT ------------
++# [ CREATE TEMPORARY TABLE t1 (a INT(11) /*!*/ /*Custom column options*/, b CHAR(8) /*!*/ /*Custom column options*/) ENGINE=RocksDB /*!*/ /*Custom table options*/ ]
++# The statement|command finished with ER_ILLEGAL_HA_CREATE_OPTION.
++# Temporary tables or the mix could be unsupported|malfunctioning, or the problem was caused by previous errors.
++# You can change the engine code, or create an rdiff, or disable the test by adding it to disabled.def.
++# Further in this test, the message might sometimes be suppressed; a part of the test might be skipped.
++# Also, this problem may cause a chain effect (more errors of different kinds in the test).
++# -------------------------------------------
+ DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/storage_engine/truncate_table.rdiff b/storage/rocksdb/mysql-test/storage_engine/truncate_table.rdiff
new file mode 100644
index 00000000000..9ca7861d51e
--- /dev/null
+++ b/storage/rocksdb/mysql-test/storage_engine/truncate_table.rdiff
@@ -0,0 +1,24 @@
+--- /data/src/bb-10.2-mdev12528/mysql-test/suite/storage_engine/truncate_table.result 2017-06-22 00:33:46.423995639 +0300
++++ /data/src/bb-10.2-mdev12528/mysql-test/suite/storage_engine/truncate_table.reject 2017-06-22 02:43:27.183946733 +0300
+@@ -29,13 +29,12 @@
+ CREATE TABLE t1 (a <INT_COLUMN>, b <CHAR_COLUMN>) ENGINE=<STORAGE_ENGINE> <CUSTOM_TABLE_OPTIONS>;
+ INSERT INTO t1 (a,b) VALUES (1,'a'),(2,'b'),(3,'c');
+ HANDLER t1 OPEN AS h1;
+-HANDLER h1 READ FIRST;
+-a b
+-1 a
+-TRUNCATE TABLE t1;
+-HANDLER h1 READ NEXT;
+-ERROR 42S02: Unknown table 'h1' in HANDLER
+-HANDLER t1 OPEN AS h2;
+-HANDLER h2 READ FIRST;
+-a b
++ERROR HY000: Storage engine ROCKSDB of the table `test`.`t1` doesn't have this option
++# ------------ UNEXPECTED RESULT ------------
++# The statement|command finished with ER_ILLEGAL_HA.
++# HANDLER or the syntax or the mix could be unsupported.
++# You can change the engine code, or create an rdiff, or disable the test by adding it to disabled.def.
++# Further in this test, the message might sometimes be suppressed; a part of the test might be skipped.
++# Also, this problem may cause a chain effect (more errors of different kinds in the test).
++# -------------------------------------------
+ DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/storage_engine/trx/delete.rdiff b/storage/rocksdb/mysql-test/storage_engine/trx/delete.rdiff
new file mode 100644
index 00000000000..dac23b83579
--- /dev/null
+++ b/storage/rocksdb/mysql-test/storage_engine/trx/delete.rdiff
@@ -0,0 +1,10 @@
+--- /data/src/bb-10.2-mdev12528/mysql-test/suite/storage_engine/trx/delete.result 2017-06-22 00:33:46.423995639 +0300
++++ /data/src/bb-10.2-mdev12528/mysql-test/suite/storage_engine/trx/delete.reject 2017-06-22 19:29:36.827567276 +0300
+@@ -68,5 +68,7 @@
+ DELETE FROM t1;
+ INSERT INTO t1 (a,b) VALUES (1,'a');
+ ROLLBACK TO SAVEPOINT spt1;
++ERROR HY000: MyRocks currently does not support ROLLBACK TO SAVEPOINT if modifying rows.
+ COMMIT;
++ERROR HY000: This transaction was rolled back and cannot be committed. Only supported operation is to roll it back, so all pending changes will be discarded. Please restart another transaction.
+ DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/storage_engine/trx/disabled.def b/storage/rocksdb/mysql-test/storage_engine/trx/disabled.def
new file mode 100644
index 00000000000..4e227c10307
--- /dev/null
+++ b/storage/rocksdb/mysql-test/storage_engine/trx/disabled.def
@@ -0,0 +1,4 @@
+cons_snapshot_serializable : Not supported
+level_read_uncommitted : Not supported
+level_serializable : Not supported
+xa_recovery : MDEV-13155 - XA recovery not supported for RocksDB
diff --git a/storage/rocksdb/mysql-test/storage_engine/trx/insert.rdiff b/storage/rocksdb/mysql-test/storage_engine/trx/insert.rdiff
new file mode 100644
index 00000000000..36a71076a2b
--- /dev/null
+++ b/storage/rocksdb/mysql-test/storage_engine/trx/insert.rdiff
@@ -0,0 +1,24 @@
+--- /data/src/bb-10.2-mdev12528/mysql-test/suite/storage_engine/trx/insert.result 2017-06-22 00:33:46.423995639 +0300
++++ /data/src/bb-10.2-mdev12528/mysql-test/suite/storage_engine/trx/insert.reject 2017-06-22 19:29:39.131567262 +0300
+@@ -37,18 +37,18 @@
+ INSERT INTO t1 SET a = 11, b = 'f';
+ INSERT t1 SET b = DEFAULT;
+ ROLLBACK TO SAVEPOINT spt1;
++ERROR HY000: MyRocks currently does not support ROLLBACK TO SAVEPOINT if modifying rows.
+ INSERT INTO t1 (b,a) VALUES ('test1',10);
++ERROR HY000: This transaction was rolled back and cannot be committed. Only supported operation is to roll it back, so all pending changes will be discarded. Please restart another transaction.
+ COMMIT;
++ERROR HY000: This transaction was rolled back and cannot be committed. Only supported operation is to roll it back, so all pending changes will be discarded. Please restart another transaction.
+ SELECT a,b FROM t1;
+ a b
+ 1 a
+-10 NULL
+ 10 foo
+-10 test1
+ 100 foo
+ 11 abc
+ 2 b
+-20 NULL
+ 3 c
+ 4 d
+ 5 e
diff --git a/storage/rocksdb/mysql-test/storage_engine/trx/level_read_committed.rdiff b/storage/rocksdb/mysql-test/storage_engine/trx/level_read_committed.rdiff
new file mode 100644
index 00000000000..6b9e4a3f4e9
--- /dev/null
+++ b/storage/rocksdb/mysql-test/storage_engine/trx/level_read_committed.rdiff
@@ -0,0 +1,10 @@
+--- /data/src/bb-10.2-mdev12528/mysql-test/suite/storage_engine/trx/level_read_committed.result 2017-06-22 00:33:46.423995639 +0300
++++ /data/src/bb-10.2-mdev12528/mysql-test/suite/storage_engine/trx/level_read_committed.reject 2017-06-22 19:29:41.459567247 +0300
+@@ -77,6 +77,7 @@
+ CREATE TABLE t1 (a <INT_COLUMN>) ENGINE=<STORAGE_ENGINE> <CUSTOM_TABLE_OPTIONS>;
+ SET SESSION TRANSACTION ISOLATION LEVEL READ COMMITTED;
+ START TRANSACTION WITH CONSISTENT SNAPSHOT;
++ERROR HY000: Only REPEATABLE READ isolation level is supported for START TRANSACTION WITH CONSISTENT SNAPSHOT in RocksDB Storage Engine.
+ connection con2;
+ INSERT INTO t1 (a) VALUES (1);
+ connection con1;
diff --git a/storage/rocksdb/mysql-test/storage_engine/trx/level_repeatable_read.rdiff b/storage/rocksdb/mysql-test/storage_engine/trx/level_repeatable_read.rdiff
new file mode 100644
index 00000000000..cf770755243
--- /dev/null
+++ b/storage/rocksdb/mysql-test/storage_engine/trx/level_repeatable_read.rdiff
@@ -0,0 +1,35 @@
+--- /data/src/bb-10.2-mdev12528/mysql-test/suite/storage_engine/trx/level_repeatable_read.result 2017-06-22 00:33:46.423995639 +0300
++++ /data/src/bb-10.2-mdev12528/mysql-test/suite/storage_engine/trx/level_repeatable_read.reject 2017-06-22 20:33:13.935543284 +0300
+@@ -24,8 +24,7 @@
+ SELECT a FROM t1;
+ a
+ INSERT INTO t1 (a) SELECT a+100 FROM t1;
+-ERROR HY000: Lock wait timeout exceeded; try restarting transaction
+-# WARNING: Statement ended with errno 1205, errname 'ER_LOCK_WAIT_TIMEOUT'.
++# WARNING: Statement ended with errno 0, errname ''.
+ # If it differs from the result file, it might indicate a problem.
+ SELECT a FROM t1;
+ a
+@@ -47,22 +46,16 @@
+ # If it differs from the result file, it might indicate a problem.
+ SELECT a FROM t1;
+ a
+-201
+-202
+ COMMIT;
+ SELECT a FROM t1;
+ a
+ 1
+ 2
+-201
+-202
+ connection con2;
+ SELECT a FROM t1;
+ a
+ 1
+ 2
+-201
+-202
+ connection default;
+ disconnect con1;
+ disconnect con2;
diff --git a/storage/rocksdb/mysql-test/storage_engine/trx/suite.opt b/storage/rocksdb/mysql-test/storage_engine/trx/suite.opt
new file mode 100644
index 00000000000..d77a822766f
--- /dev/null
+++ b/storage/rocksdb/mysql-test/storage_engine/trx/suite.opt
@@ -0,0 +1 @@
+--ignore-db-dirs=#rocksdb --plugin-load=$HA_ROCKSDB_SO --binlog_format=ROW --loose-rocksdb_flush_log_at_trx_commit=0
diff --git a/storage/rocksdb/mysql-test/storage_engine/trx/update.rdiff b/storage/rocksdb/mysql-test/storage_engine/trx/update.rdiff
new file mode 100644
index 00000000000..ab181947733
--- /dev/null
+++ b/storage/rocksdb/mysql-test/storage_engine/trx/update.rdiff
@@ -0,0 +1,38 @@
+--- /data/src/bb-10.2-mdev12528/mysql-test/suite/storage_engine/trx/update.result 2017-06-22 00:33:46.423995639 +0300
++++ /data/src/bb-10.2-mdev12528/mysql-test/suite/storage_engine/trx/update.reject 2017-06-22 19:29:57.267567148 +0300
+@@ -29,20 +29,23 @@
+ SAVEPOINT spt1;
+ UPDATE t1 SET b = '';
+ ROLLBACK TO SAVEPOINT spt1;
++ERROR HY000: MyRocks currently does not support ROLLBACK TO SAVEPOINT if modifying rows.
+ UPDATE t1 SET b = 'upd' WHERE a = 10050;
++ERROR HY000: This transaction was rolled back and cannot be committed. Only supported operation is to roll it back, so all pending changes will be discarded. Please restart another transaction.
+ COMMIT;
++ERROR HY000: This transaction was rolled back and cannot be committed. Only supported operation is to roll it back, so all pending changes will be discarded. Please restart another transaction.
+ SELECT a,b FROM t1;
+ a b
+-10050 upd
+-10050 upd
+-51 update2
+-51 update2
+-52 update2
+-52 update2
+-53 update2
+-53 update2
+-54 update2
+-54 update2
+-55 update2
+-55 update2
++10050 NULL
++10050 NULL
++51 NULL
++51 NULL
++52 NULL
++52 NULL
++53 NULL
++53 NULL
++54 NULL
++54 NULL
++55 NULL
++55 NULL
+ DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/storage_engine/type_binary_indexes.rdiff b/storage/rocksdb/mysql-test/storage_engine/type_binary_indexes.rdiff
new file mode 100644
index 00000000000..5eec9d24250
--- /dev/null
+++ b/storage/rocksdb/mysql-test/storage_engine/type_binary_indexes.rdiff
@@ -0,0 +1,11 @@
+--- suite/storage_engine/type_binary_indexes.result 2017-03-12 04:57:07.173911845 +0200
++++ suite/storage_engine/type_binary_indexes.reject 2017-08-14 22:54:02.144555066 +0300
+@@ -91,7 +91,7 @@
+ INSERT INTO t1 (b,b20,v16,v128) SELECT b,b20,v16,v128 FROM t1;
+ EXPLAIN SELECT HEX(SUBSTRING(v16,0,3)) FROM t1 WHERE v16 LIKE 'varchar%';
+ id select_type table type possible_keys key key_len ref rows Extra
+-# # # # # NULL # # # #
++# # # # # v16 # # # #
+ SELECT HEX(SUBSTRING(v16,7,3)) FROM t1 WHERE v16 LIKE 'varchar%';
+ HEX(SUBSTRING(v16,7,3))
+ 723161
diff --git a/storage/rocksdb/mysql-test/storage_engine/type_bit_indexes.rdiff b/storage/rocksdb/mysql-test/storage_engine/type_bit_indexes.rdiff
new file mode 100644
index 00000000000..e53a33b4fba
--- /dev/null
+++ b/storage/rocksdb/mysql-test/storage_engine/type_bit_indexes.rdiff
@@ -0,0 +1,20 @@
+--- suite/storage_engine/type_bit_indexes.result 2017-12-12 20:34:34.000000000 +0200
++++ suite/storage_engine/type_bit_indexes.reject 2017-12-12 20:35:24.539330056 +0200
+@@ -69,7 +69,7 @@
+ (1,0xFFFF,0xFFFFFFFF,0xFFFFFFFFFFFFFFFF);
+ EXPLAIN SELECT HEX(b+c) FROM t1 WHERE c > 1 OR HEX(b) < 0xFFFFFF;
+ id select_type table type possible_keys key key_len ref rows Extra
+-# # # # # b_c # # # #
++# # # # # NULL # # # #
+ SELECT HEX(b+c) FROM t1 WHERE c > 1 OR HEX(b) < 0xFFFFFF;
+ HEX(b+c)
+ 10
+@@ -98,7 +98,7 @@
+ (1,0xFFFF,0xFFFFFFFF,0xFFFFFFFFFFFFFFFF);
+ EXPLAIN SELECT DISTINCT a+0 FROM t1 ORDER BY a;
+ id select_type table type possible_keys key key_len ref rows Extra
+-# # # # # a # # # #
++# # # # # NULL # # # #
+ SELECT DISTINCT a+0 FROM t1 ORDER BY a;
+ a+0
+ 0
diff --git a/storage/rocksdb/mysql-test/storage_engine/type_enum.rdiff b/storage/rocksdb/mysql-test/storage_engine/type_enum.rdiff
new file mode 100644
index 00000000000..a402e0fb418
--- /dev/null
+++ b/storage/rocksdb/mysql-test/storage_engine/type_enum.rdiff
@@ -0,0 +1,20 @@
+--- /data/src/bb-10.2-mdev12528/mysql-test/suite/storage_engine/type_enum.result 2017-06-22 00:33:46.423995639 +0300
++++ /data/src/bb-10.2-mdev12528/mysql-test/suite/storage_engine/type_enum.reject 2017-06-22 02:55:49.599942066 +0300
+@@ -24,8 +24,6 @@
+ test2 4
+ test5 2
+ ALTER TABLE t1 ADD COLUMN e ENUM('a','A') <CUSTOM_COL_OPTIONS>;
+-Warnings:
+-Note 1291 Column 'e' has duplicated value 'a' in ENUM
+ SHOW COLUMNS IN t1;
+ Field Type Null Key Default Extra
+ a enum('') # # #
+@@ -37,7 +35,7 @@
+ a b c e
+ NULL
+ test2 4 NULL
+- test3 75 a
++ test3 75 A
+ test5 2 NULL
+ SELECT a,b,c,e FROM t1 WHERE b='test2' OR a != '';
+ a b c e
diff --git a/storage/rocksdb/mysql-test/storage_engine/type_enum_indexes.rdiff b/storage/rocksdb/mysql-test/storage_engine/type_enum_indexes.rdiff
new file mode 100644
index 00000000000..be83fb6e212
--- /dev/null
+++ b/storage/rocksdb/mysql-test/storage_engine/type_enum_indexes.rdiff
@@ -0,0 +1,11 @@
+--- suite/storage_engine/type_enum_indexes.result 2017-03-12 04:38:50.000000000 +0200
++++ suite/storage_engine/type_enum_indexes.reject 2017-12-12 20:36:47.455331726 +0200
+@@ -30,7 +30,7 @@
+ t1 0 a_b 2 b # # NULL NULL # #
+ EXPLAIN SELECT a FROM t1 WHERE b > 'test2' ORDER BY a;
+ id select_type table type possible_keys key key_len ref rows Extra
+-# # # # # a_b # # # #
++# # # # # NULL # # # #
+ SELECT a FROM t1 WHERE b > 'test2' ORDER BY a;
+ a
+ Africa
diff --git a/storage/rocksdb/mysql-test/storage_engine/type_set.rdiff b/storage/rocksdb/mysql-test/storage_engine/type_set.rdiff
new file mode 100644
index 00000000000..c5cbeaedecf
--- /dev/null
+++ b/storage/rocksdb/mysql-test/storage_engine/type_set.rdiff
@@ -0,0 +1,11 @@
+--- /data/src/bb-10.2-mdev12528/mysql-test/suite/storage_engine/type_set.result 2017-06-22 00:33:46.423995639 +0300
++++ /data/src/bb-10.2-mdev12528/mysql-test/suite/storage_engine/type_set.reject 2017-06-22 03:02:58.695939369 +0300
+@@ -30,8 +30,6 @@
+ test2,test3 01,23,34,44
+ test2,test4
+ ALTER TABLE t1 ADD COLUMN e SET('a','A') <CUSTOM_COL_OPTIONS>;
+-Warnings:
+-Note 1291 Column 'e' has duplicated value 'a' in SET
+ SHOW COLUMNS IN t1;
+ Field Type Null Key Default Extra
+ a set('') # # #
diff --git a/storage/rocksdb/mysql-test/storage_engine/type_set_indexes.rdiff b/storage/rocksdb/mysql-test/storage_engine/type_set_indexes.rdiff
new file mode 100644
index 00000000000..2703e81b745
--- /dev/null
+++ b/storage/rocksdb/mysql-test/storage_engine/type_set_indexes.rdiff
@@ -0,0 +1,20 @@
+--- suite/storage_engine/type_set_indexes.result 2017-03-12 04:38:50.000000000 +0200
++++ suite/storage_engine/type_set_indexes.reject 2017-12-12 20:37:16.187332305 +0200
+@@ -97,7 +97,7 @@
+ Warning 1265 Data truncated for column 'b' at row 7
+ EXPLAIN SELECT a FROM t1 WHERE FIND_IN_SET('Europe',a) > 0;
+ id select_type table type possible_keys key key_len ref rows Extra
+-# # # # # a # # # #
++# # # # # NULL # # # #
+ SELECT a FROM t1 WHERE FIND_IN_SET('Europe',a) > 0;
+ a
+ Africa,Europe,Asia
+@@ -124,7 +124,7 @@
+ Warning 1265 Data truncated for column 'b' at row 7
+ EXPLAIN SELECT DISTINCT a, b FROM t1 ORDER BY b DESC, a;
+ id select_type table type possible_keys key key_len ref rows Extra
+-# # # # # b_a # # # #
++# # # # # NULL # # # #
+ SELECT DISTINCT a, b FROM t1 ORDER BY b DESC, a;
+ a b
+ test1,test3
diff --git a/storage/rocksdb/nosql_access.cc b/storage/rocksdb/nosql_access.cc
new file mode 100644
index 00000000000..e1b5d0f69a2
--- /dev/null
+++ b/storage/rocksdb/nosql_access.cc
@@ -0,0 +1,52 @@
+/*
+ Copyright (c) 2019, Facebook, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+#define MYSQL_SERVER 1
+
+/* This C++ file's header */
+#include "./nosql_access.h"
+
+/* C++ standard header files */
+#include <algorithm>
+#include <array>
+#include <limits>
+#include <string>
+#include <utility>
+#include <vector>
+
+/* C standard header files */
+#include <ctype.h>
+
+/* MySQL header files */
+#include "../../sql/item.h"
+#include "../../sql/sql_base.h"
+#include "../../sql/sql_class.h"
+#include "../../sql/strfunc.h"
+
+/* MyRocks header files */
+#include "./ha_rocksdb.h"
+#include "./ha_rocksdb_proto.h"
+#include "./rdb_buff.h"
+#include "./rdb_datadic.h"
+
+namespace myrocks {
+
+bool rocksdb_handle_single_table_select(THD * /* unused */,
+ st_select_lex * /* unused */) {
+ return false;
+}
+
+} // namespace myrocks
diff --git a/storage/rocksdb/nosql_access.h b/storage/rocksdb/nosql_access.h
new file mode 100644
index 00000000000..70aaa400668
--- /dev/null
+++ b/storage/rocksdb/nosql_access.h
@@ -0,0 +1,36 @@
+/*
+ Copyright (c) 2019, Facebook, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+/* C++ standard header files */
+#include <array>
+#include <string>
+#include <vector>
+
+/* C standard header files */
+#include <ctype.h>
+
+/* MySQL header files */
+#include "../../sql/protocol.h"
+#include "./sql_string.h"
+
+#pragma once
+
+namespace myrocks {
+
+// Not needed in MyRocks:
+//bool rocksdb_handle_single_table_select(THD *thd, st_select_lex *select_lex);
+
+} // namespace myrocks
diff --git a/storage/rocksdb/properties_collector.cc b/storage/rocksdb/properties_collector.cc
new file mode 100644
index 00000000000..d96e25e914c
--- /dev/null
+++ b/storage/rocksdb/properties_collector.cc
@@ -0,0 +1,544 @@
+/*
+ Copyright (c) 2015, Facebook, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111-1301 USA */
+
+#include <my_config.h>
+#ifdef _WIN32
+#define _CRT_RAND_S
+#include <stdlib.h>
+#define rand_r rand_s
+#endif
+/* This C++ file's header file */
+#include "./properties_collector.h"
+
+/* Standard C++ header files */
+#include <algorithm>
+#include <map>
+#include <string>
+#include <vector>
+
+/* MySQL header files */
+#include "./log.h"
+#include "./my_stacktrace.h"
+#include "./sql_array.h"
+
+/* MyRocks header files */
+#include "./rdb_datadic.h"
+#include "./rdb_utils.h"
+
+namespace myrocks {
+
+std::atomic<uint64_t> rocksdb_num_sst_entry_put(0);
+std::atomic<uint64_t> rocksdb_num_sst_entry_delete(0);
+std::atomic<uint64_t> rocksdb_num_sst_entry_singledelete(0);
+std::atomic<uint64_t> rocksdb_num_sst_entry_merge(0);
+std::atomic<uint64_t> rocksdb_num_sst_entry_other(0);
+my_bool rocksdb_compaction_sequential_deletes_count_sd = false;
+
+Rdb_tbl_prop_coll::Rdb_tbl_prop_coll(Rdb_ddl_manager *const ddl_manager,
+ const Rdb_compact_params &params,
+ const uint32_t cf_id,
+ const uint8_t table_stats_sampling_pct)
+ : m_cf_id(cf_id),
+ m_ddl_manager(ddl_manager),
+ m_last_stats(nullptr),
+ m_rows(0l),
+ m_window_pos(0l),
+ m_deleted_rows(0l),
+ m_max_deleted_rows(0l),
+ m_file_size(0),
+ m_params(params),
+ m_cardinality_collector(table_stats_sampling_pct),
+ m_recorded(false) {
+ DBUG_ASSERT(ddl_manager != nullptr);
+
+ m_deleted_rows_window.resize(m_params.m_window, false);
+}
+
+/*
+ This function is called by RocksDB for every key in the SST file
+*/
+rocksdb::Status Rdb_tbl_prop_coll::AddUserKey(const rocksdb::Slice &key,
+ const rocksdb::Slice &value,
+ rocksdb::EntryType type,
+ rocksdb::SequenceNumber seq,
+ uint64_t file_size) {
+ if (key.size() >= 4) {
+ AdjustDeletedRows(type);
+
+ m_rows++;
+
+ CollectStatsForRow(key, value, type, file_size);
+ }
+
+ return rocksdb::Status::OK();
+}
+
+void Rdb_tbl_prop_coll::AdjustDeletedRows(rocksdb::EntryType type) {
+ if (m_params.m_window > 0) {
+ // record the "is deleted" flag into the sliding window
+ // the sliding window is implemented as a circular buffer
+ // in m_deleted_rows_window vector
+ // the current position in the circular buffer is pointed at by
+ // m_rows % m_deleted_rows_window.size()
+ // m_deleted_rows is the current number of 1's in the vector
+ // --update the counter for the element which will be overridden
+ const bool is_delete = (type == rocksdb::kEntryDelete ||
+ (type == rocksdb::kEntrySingleDelete &&
+ rocksdb_compaction_sequential_deletes_count_sd));
+
+ // Only make changes if the value at the current position needs to change
+ if (is_delete != m_deleted_rows_window[m_window_pos]) {
+ // Set or clear the flag at the current position as appropriate
+ m_deleted_rows_window[m_window_pos] = is_delete;
+ if (!is_delete) {
+ m_deleted_rows--;
+ } else if (++m_deleted_rows > m_max_deleted_rows) {
+ m_max_deleted_rows = m_deleted_rows;
+ }
+ }
+
+ if (++m_window_pos == m_params.m_window) {
+ m_window_pos = 0;
+ }
+ }
+}
+
+Rdb_index_stats *Rdb_tbl_prop_coll::AccessStats(const rocksdb::Slice &key) {
+ GL_INDEX_ID gl_index_id;
+ gl_index_id.cf_id = m_cf_id;
+ gl_index_id.index_id = rdb_netbuf_to_uint32(reinterpret_cast<const uchar*>(key.data()));
+
+ if (m_last_stats == nullptr || m_last_stats->m_gl_index_id != gl_index_id) {
+ m_keydef = nullptr;
+
+ // starting a new table
+ // add the new element into m_stats
+ m_stats.emplace_back(gl_index_id);
+ m_last_stats = &m_stats.back();
+
+ if (m_ddl_manager) {
+ // safe_find() returns a std::shared_ptr<Rdb_key_def> with the count
+ // incremented (so it can't be deleted out from under us) and with
+ // the mutex locked (if setup has not occurred yet). We must make
+ // sure to free the mutex (via unblock_setup()) when we are done
+ // with this object. Currently this happens earlier in this function
+ // when we are switching to a new Rdb_key_def and when this object
+ // is destructed.
+ m_keydef = m_ddl_manager->safe_find(gl_index_id);
+ if (m_keydef != nullptr) {
+ // resize the array to the number of columns.
+ // It will be initialized with zeroes
+ m_last_stats->m_distinct_keys_per_prefix.resize(
+ m_keydef->get_key_parts());
+ m_last_stats->m_name = m_keydef->get_name();
+ }
+ }
+ m_cardinality_collector.Reset();
+ }
+
+ return m_last_stats;
+}
+
+void Rdb_tbl_prop_coll::CollectStatsForRow(const rocksdb::Slice &key,
+ const rocksdb::Slice &value,
+ const rocksdb::EntryType &type,
+ const uint64_t file_size) {
+ auto stats = AccessStats(key);
+
+ stats->m_data_size += key.size() + value.size();
+
+ // Incrementing per-index entry-type statistics
+ switch (type) {
+ case rocksdb::kEntryPut:
+ stats->m_rows++;
+ break;
+ case rocksdb::kEntryDelete:
+ stats->m_entry_deletes++;
+ break;
+ case rocksdb::kEntrySingleDelete:
+ stats->m_entry_single_deletes++;
+ break;
+ case rocksdb::kEntryMerge:
+ stats->m_entry_merges++;
+ break;
+ case rocksdb::kEntryOther:
+ stats->m_entry_others++;
+ break;
+ default:
+ // NO_LINT_DEBUG
+ sql_print_error(
+ "RocksDB: Unexpected entry type found: %u. "
+ "This should not happen so aborting the system.",
+ type);
+ abort();
+ break;
+ }
+
+ stats->m_actual_disk_size += file_size - m_file_size;
+ m_file_size = file_size;
+
+ if (m_keydef != nullptr) {
+ m_cardinality_collector.ProcessKey(key, m_keydef.get(), stats);
+ }
+}
+
+const char *Rdb_tbl_prop_coll::INDEXSTATS_KEY = "__indexstats__";
+
+/*
+ This function is called by RocksDB to compute properties to store in sst file
+*/
+rocksdb::Status Rdb_tbl_prop_coll::Finish(
+ rocksdb::UserCollectedProperties *const properties) {
+ uint64_t num_sst_entry_put = 0;
+ uint64_t num_sst_entry_delete = 0;
+ uint64_t num_sst_entry_singledelete = 0;
+ uint64_t num_sst_entry_merge = 0;
+ uint64_t num_sst_entry_other = 0;
+
+ DBUG_ASSERT(properties != nullptr);
+
+ for (auto it = m_stats.begin(); it != m_stats.end(); it++) {
+ num_sst_entry_put += it->m_rows;
+ num_sst_entry_delete += it->m_entry_deletes;
+ num_sst_entry_singledelete += it->m_entry_single_deletes;
+ num_sst_entry_merge += it->m_entry_merges;
+ num_sst_entry_other += it->m_entry_others;
+ }
+
+ if (!m_recorded) {
+ if (num_sst_entry_put > 0) {
+ rocksdb_num_sst_entry_put += num_sst_entry_put;
+ }
+
+ if (num_sst_entry_delete > 0) {
+ rocksdb_num_sst_entry_delete += num_sst_entry_delete;
+ }
+
+ if (num_sst_entry_singledelete > 0) {
+ rocksdb_num_sst_entry_singledelete += num_sst_entry_singledelete;
+ }
+
+ if (num_sst_entry_merge > 0) {
+ rocksdb_num_sst_entry_merge += num_sst_entry_merge;
+ }
+
+ if (num_sst_entry_other > 0) {
+ rocksdb_num_sst_entry_other += num_sst_entry_other;
+ }
+
+ for (Rdb_index_stats &stat : m_stats) {
+ m_cardinality_collector.AdjustStats(&stat);
+ }
+ m_recorded = true;
+ }
+ properties->insert({INDEXSTATS_KEY, Rdb_index_stats::materialize(m_stats)});
+ return rocksdb::Status::OK();
+}
+
+bool Rdb_tbl_prop_coll::NeedCompact() const {
+ return m_params.m_deletes && (m_params.m_window > 0) &&
+ (m_file_size > m_params.m_file_size) &&
+ (m_max_deleted_rows > m_params.m_deletes);
+}
+
+/*
+ Returns the same as above, but in human-readable way for logging
+*/
+rocksdb::UserCollectedProperties Rdb_tbl_prop_coll::GetReadableProperties()
+ const {
+ std::string s;
+#ifdef DBUG_OFF
+ s.append("[...");
+ s.append(std::to_string(m_stats.size()));
+ s.append(" records...]");
+#else
+ bool first = true;
+ for (auto it : m_stats) {
+ if (first) {
+ first = false;
+ } else {
+ s.append(",");
+ }
+ s.append(GetReadableStats(it));
+ }
+#endif
+ return rocksdb::UserCollectedProperties{{INDEXSTATS_KEY, s}};
+}
+
+std::string Rdb_tbl_prop_coll::GetReadableStats(const Rdb_index_stats &it) {
+ std::string s;
+ s.append("(");
+ s.append(std::to_string(it.m_gl_index_id.cf_id));
+ s.append(", ");
+ s.append(std::to_string(it.m_gl_index_id.index_id));
+ s.append("):{name:");
+ s.append(it.m_name);
+ s.append(", size:");
+ s.append(std::to_string(it.m_data_size));
+ s.append(", m_rows:");
+ s.append(std::to_string(it.m_rows));
+ s.append(", m_actual_disk_size:");
+ s.append(std::to_string(it.m_actual_disk_size));
+ s.append(", deletes:");
+ s.append(std::to_string(it.m_entry_deletes));
+ s.append(", single_deletes:");
+ s.append(std::to_string(it.m_entry_single_deletes));
+ s.append(", merges:");
+ s.append(std::to_string(it.m_entry_merges));
+ s.append(", others:");
+ s.append(std::to_string(it.m_entry_others));
+ s.append(", distincts per prefix: [");
+ for (auto num : it.m_distinct_keys_per_prefix) {
+ s.append(std::to_string(num));
+ s.append(" ");
+ }
+ s.append("]}");
+ return s;
+}
+
+/*
+ Given the properties of an SST file, reads the stats from it and returns it.
+*/
+
+void Rdb_tbl_prop_coll::read_stats_from_tbl_props(
+ const std::shared_ptr<const rocksdb::TableProperties> &table_props,
+ std::vector<Rdb_index_stats> *const out_stats_vector) {
+ DBUG_ASSERT(out_stats_vector != nullptr);
+ const auto &user_properties = table_props->user_collected_properties;
+ const auto it2 = user_properties.find(std::string(INDEXSTATS_KEY));
+ if (it2 != user_properties.end()) {
+ auto result MY_ATTRIBUTE((__unused__)) =
+ Rdb_index_stats::unmaterialize(it2->second, out_stats_vector);
+ DBUG_ASSERT(result == 0);
+ }
+}
+
+/*
+ Serializes an array of Rdb_index_stats into a network string.
+*/
+std::string Rdb_index_stats::materialize(
+ const std::vector<Rdb_index_stats> &stats) {
+ String ret;
+ rdb_netstr_append_uint16(&ret, INDEX_STATS_VERSION_ENTRY_TYPES);
+ for (const auto &i : stats) {
+ rdb_netstr_append_uint32(&ret, i.m_gl_index_id.cf_id);
+ rdb_netstr_append_uint32(&ret, i.m_gl_index_id.index_id);
+ DBUG_ASSERT(sizeof i.m_data_size <= 8);
+ rdb_netstr_append_uint64(&ret, i.m_data_size);
+ rdb_netstr_append_uint64(&ret, i.m_rows);
+ rdb_netstr_append_uint64(&ret, i.m_actual_disk_size);
+ rdb_netstr_append_uint64(&ret, i.m_distinct_keys_per_prefix.size());
+ rdb_netstr_append_uint64(&ret, i.m_entry_deletes);
+ rdb_netstr_append_uint64(&ret, i.m_entry_single_deletes);
+ rdb_netstr_append_uint64(&ret, i.m_entry_merges);
+ rdb_netstr_append_uint64(&ret, i.m_entry_others);
+ for (const auto &num_keys : i.m_distinct_keys_per_prefix) {
+ rdb_netstr_append_uint64(&ret, num_keys);
+ }
+ }
+
+ return std::string((char *)ret.ptr(), ret.length());
+}
+
+/**
+ @brief
+ Reads an array of Rdb_index_stats from a string.
+ @return HA_EXIT_FAILURE if it detects any inconsistency in the input
+ @return HA_EXIT_SUCCESS if completes successfully
+*/
+int Rdb_index_stats::unmaterialize(const std::string &s,
+ std::vector<Rdb_index_stats> *const ret) {
+ const uchar *p = rdb_std_str_to_uchar_ptr(s);
+ const uchar *const p2 = p + s.size();
+
+ DBUG_ASSERT(ret != nullptr);
+
+ if (p + 2 > p2) {
+ return HA_EXIT_FAILURE;
+ }
+
+ const int version = rdb_netbuf_read_uint16(&p);
+ Rdb_index_stats stats;
+ // Make sure version is within supported range.
+ if (version < INDEX_STATS_VERSION_INITIAL ||
+ version > INDEX_STATS_VERSION_ENTRY_TYPES) {
+ // NO_LINT_DEBUG
+ sql_print_error(
+ "Index stats version %d was outside of supported range. "
+ "This should not happen so aborting the system.",
+ version);
+ abort();
+ }
+
+ size_t needed = sizeof(stats.m_gl_index_id.cf_id) +
+ sizeof(stats.m_gl_index_id.index_id) +
+ sizeof(stats.m_data_size) + sizeof(stats.m_rows) +
+ sizeof(stats.m_actual_disk_size) + sizeof(uint64);
+ if (version >= INDEX_STATS_VERSION_ENTRY_TYPES) {
+ needed += sizeof(stats.m_entry_deletes) +
+ sizeof(stats.m_entry_single_deletes) +
+ sizeof(stats.m_entry_merges) + sizeof(stats.m_entry_others);
+ }
+
+ while (p < p2) {
+ if (p + needed > p2) {
+ return HA_EXIT_FAILURE;
+ }
+ rdb_netbuf_read_gl_index(&p, &stats.m_gl_index_id);
+ stats.m_data_size = rdb_netbuf_read_uint64(&p);
+ stats.m_rows = rdb_netbuf_read_uint64(&p);
+ stats.m_actual_disk_size = rdb_netbuf_read_uint64(&p);
+ stats.m_distinct_keys_per_prefix.resize(rdb_netbuf_read_uint64(&p));
+ if (version >= INDEX_STATS_VERSION_ENTRY_TYPES) {
+ stats.m_entry_deletes = rdb_netbuf_read_uint64(&p);
+ stats.m_entry_single_deletes = rdb_netbuf_read_uint64(&p);
+ stats.m_entry_merges = rdb_netbuf_read_uint64(&p);
+ stats.m_entry_others = rdb_netbuf_read_uint64(&p);
+ }
+ if (p + stats.m_distinct_keys_per_prefix.size() *
+ sizeof(stats.m_distinct_keys_per_prefix[0]) >
+ p2) {
+ return HA_EXIT_FAILURE;
+ }
+ for (std::size_t i = 0; i < stats.m_distinct_keys_per_prefix.size(); i++) {
+ stats.m_distinct_keys_per_prefix[i] = rdb_netbuf_read_uint64(&p);
+ }
+ ret->push_back(stats);
+ }
+ return HA_EXIT_SUCCESS;
+}
+
+/*
+ Merges one Rdb_index_stats into another. Can be used to come up with the stats
+ for the index based on stats for each sst
+*/
+void Rdb_index_stats::merge(const Rdb_index_stats &s, const bool increment,
+ const int64_t estimated_data_len) {
+ std::size_t i;
+
+ DBUG_ASSERT(estimated_data_len >= 0);
+
+ m_gl_index_id = s.m_gl_index_id;
+ if (m_distinct_keys_per_prefix.size() < s.m_distinct_keys_per_prefix.size()) {
+ m_distinct_keys_per_prefix.resize(s.m_distinct_keys_per_prefix.size());
+ }
+ if (increment) {
+ m_rows += s.m_rows;
+ m_data_size += s.m_data_size;
+
+ /*
+ The Data_length and Avg_row_length are trailing statistics, meaning
+ they don't get updated for the current SST until the next SST is
+ written. So, if rocksdb reports the data_length as 0,
+ we make a reasoned estimate for the data_file_length for the
+ index in the current SST.
+ */
+ m_actual_disk_size += s.m_actual_disk_size ? s.m_actual_disk_size
+ : estimated_data_len * s.m_rows;
+ m_entry_deletes += s.m_entry_deletes;
+ m_entry_single_deletes += s.m_entry_single_deletes;
+ m_entry_merges += s.m_entry_merges;
+ m_entry_others += s.m_entry_others;
+ for (i = 0; i < s.m_distinct_keys_per_prefix.size(); i++) {
+ m_distinct_keys_per_prefix[i] += s.m_distinct_keys_per_prefix[i];
+ }
+ } else {
+ m_rows -= s.m_rows;
+ m_data_size -= s.m_data_size;
+ m_actual_disk_size -= s.m_actual_disk_size ? s.m_actual_disk_size
+ : estimated_data_len * s.m_rows;
+ m_entry_deletes -= s.m_entry_deletes;
+ m_entry_single_deletes -= s.m_entry_single_deletes;
+ m_entry_merges -= s.m_entry_merges;
+ m_entry_others -= s.m_entry_others;
+ for (i = 0; i < s.m_distinct_keys_per_prefix.size(); i++) {
+ m_distinct_keys_per_prefix[i] -= s.m_distinct_keys_per_prefix[i];
+ }
+ }
+}
+
+Rdb_tbl_card_coll::Rdb_tbl_card_coll(const uint8_t table_stats_sampling_pct)
+ : m_table_stats_sampling_pct(table_stats_sampling_pct),
+ m_seed(time(nullptr)) {}
+
+bool Rdb_tbl_card_coll::IsSampingDisabled() {
+ // Zero means that we'll use all the keys to update statistics.
+ return m_table_stats_sampling_pct == 0 ||
+ RDB_TBL_STATS_SAMPLE_PCT_MAX == m_table_stats_sampling_pct;
+}
+
+bool Rdb_tbl_card_coll::ShouldCollectStats() {
+ if (IsSampingDisabled()) {
+ return true; // collect every key
+ }
+
+ const int val = rand_r(&m_seed) % (RDB_TBL_STATS_SAMPLE_PCT_MAX -
+ RDB_TBL_STATS_SAMPLE_PCT_MIN + 1) +
+ RDB_TBL_STATS_SAMPLE_PCT_MIN;
+
+ DBUG_ASSERT(val >= RDB_TBL_STATS_SAMPLE_PCT_MIN);
+ DBUG_ASSERT(val <= RDB_TBL_STATS_SAMPLE_PCT_MAX);
+
+ return val <= m_table_stats_sampling_pct;
+}
+
+void Rdb_tbl_card_coll::ProcessKey(const rocksdb::Slice &key,
+ const Rdb_key_def *keydef,
+ Rdb_index_stats *stats) {
+ if (ShouldCollectStats()) {
+ std::size_t column = 0;
+ bool new_key = true;
+
+ if (!m_last_key.empty()) {
+ rocksdb::Slice last(m_last_key.data(), m_last_key.size());
+ new_key = (keydef->compare_keys(&last, &key, &column) == 0);
+ }
+
+ if (new_key) {
+ DBUG_ASSERT(column <= stats->m_distinct_keys_per_prefix.size());
+
+ for (auto i = column; i < stats->m_distinct_keys_per_prefix.size(); i++) {
+ stats->m_distinct_keys_per_prefix[i]++;
+ }
+
+ // assign new last_key for the next call
+ // however, we only need to change the last key
+ // if one of the first n-1 columns is different
+ // If the n-1 prefix is the same, no sense in storing
+ // the new key
+ if (column < stats->m_distinct_keys_per_prefix.size()) {
+ m_last_key.assign(key.data(), key.size());
+ }
+ }
+ }
+}
+
+void Rdb_tbl_card_coll::Reset() { m_last_key.clear(); }
+
+// We need to adjust the index cardinality numbers based on the sampling
+// rate so that the output of "SHOW INDEX" command will reflect reality
+// more closely. It will still be an approximation, just a better one.
+void Rdb_tbl_card_coll::AdjustStats(Rdb_index_stats *stats) {
+ if (IsSampingDisabled()) {
+ // no sampling was done, return as stats is
+ return;
+ }
+ for (int64_t &num_keys : stats->m_distinct_keys_per_prefix) {
+ num_keys = num_keys * 100 / m_table_stats_sampling_pct;
+ }
+}
+
+} // namespace myrocks
diff --git a/storage/rocksdb/properties_collector.h b/storage/rocksdb/properties_collector.h
new file mode 100644
index 00000000000..ce2773cd618
--- /dev/null
+++ b/storage/rocksdb/properties_collector.h
@@ -0,0 +1,215 @@
+/*
+ Copyright (c) 2015, Facebook, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111-1301 USA */
+#pragma once
+
+/* C++ system header files */
+#include <map>
+#include <memory>
+#include <string>
+#include <unordered_set>
+#include <vector>
+
+/* RocksDB header files */
+#include "rocksdb/db.h"
+
+/* MyRocks header files */
+#include "./ha_rocksdb.h"
+
+namespace myrocks {
+
+class Rdb_ddl_manager;
+class Rdb_key_def;
+
+extern std::atomic<uint64_t> rocksdb_num_sst_entry_put;
+extern std::atomic<uint64_t> rocksdb_num_sst_entry_delete;
+extern std::atomic<uint64_t> rocksdb_num_sst_entry_singledelete;
+extern std::atomic<uint64_t> rocksdb_num_sst_entry_merge;
+extern std::atomic<uint64_t> rocksdb_num_sst_entry_other;
+extern my_bool rocksdb_compaction_sequential_deletes_count_sd;
+
+struct Rdb_compact_params {
+ uint64_t m_deletes, m_window, m_file_size;
+};
+
+struct Rdb_index_stats {
+ enum {
+ INDEX_STATS_VERSION_INITIAL = 1,
+ INDEX_STATS_VERSION_ENTRY_TYPES = 2,
+ };
+ GL_INDEX_ID m_gl_index_id;
+ int64_t m_data_size, m_rows, m_actual_disk_size;
+ int64_t m_entry_deletes, m_entry_single_deletes;
+ int64_t m_entry_merges, m_entry_others;
+ std::vector<int64_t> m_distinct_keys_per_prefix;
+ std::string m_name; // name is not persisted
+
+ static std::string materialize(const std::vector<Rdb_index_stats> &stats);
+ static int unmaterialize(const std::string &s,
+ std::vector<Rdb_index_stats> *const ret);
+
+ Rdb_index_stats() : Rdb_index_stats({0, 0}) {}
+ explicit Rdb_index_stats(GL_INDEX_ID gl_index_id)
+ : m_gl_index_id(gl_index_id),
+ m_data_size(0),
+ m_rows(0),
+ m_actual_disk_size(0),
+ m_entry_deletes(0),
+ m_entry_single_deletes(0),
+ m_entry_merges(0),
+ m_entry_others(0) {}
+
+ void merge(const Rdb_index_stats &s, const bool increment = true,
+ const int64_t estimated_data_len = 0);
+};
+
+// The helper class to calculate index cardinality
+class Rdb_tbl_card_coll {
+ public:
+ explicit Rdb_tbl_card_coll(const uint8_t table_stats_sampling_pct);
+
+ public:
+ void ProcessKey(const rocksdb::Slice &key, const Rdb_key_def *keydef,
+ Rdb_index_stats *stats);
+ /*
+ * Resets the state of the collector to start calculating statistics for a
+ * next index.
+ */
+ void Reset();
+
+ /*
+ * Cardinality statistics might be calculated using some sampling strategy.
+ * This method adjusts gathered statistics according to the sampling
+ * strategy used. Note that adjusted cardinality value is just an estimate
+ * and can return a value exeeding number of rows in a table, so the
+ * returned value should be capped by row count before using it by
+ * an optrimizer or displaying it to a clent.
+ */
+ void AdjustStats(Rdb_index_stats *stats);
+
+ private:
+ bool ShouldCollectStats();
+ bool IsSampingDisabled();
+
+ private:
+ std::string m_last_key;
+ uint8_t m_table_stats_sampling_pct;
+ unsigned int m_seed;
+};
+
+class Rdb_tbl_prop_coll : public rocksdb::TablePropertiesCollector {
+ public:
+ Rdb_tbl_prop_coll(Rdb_ddl_manager *const ddl_manager,
+ const Rdb_compact_params &params, const uint32_t cf_id,
+ const uint8_t table_stats_sampling_pct);
+
+ /*
+ Override parent class's virtual methods of interest.
+ */
+
+ virtual rocksdb::Status AddUserKey(const rocksdb::Slice &key,
+ const rocksdb::Slice &value,
+ rocksdb::EntryType type,
+ rocksdb::SequenceNumber seq,
+ uint64_t file_size) override;
+
+ virtual rocksdb::Status Finish(
+ rocksdb::UserCollectedProperties *properties) override;
+
+ virtual const char *Name() const override { return "Rdb_tbl_prop_coll"; }
+
+ rocksdb::UserCollectedProperties GetReadableProperties() const override;
+
+ bool NeedCompact() const override;
+
+ public:
+ uint64_t GetMaxDeletedRows() const { return m_max_deleted_rows; }
+
+ static void read_stats_from_tbl_props(
+ const std::shared_ptr<const rocksdb::TableProperties> &table_props,
+ std::vector<Rdb_index_stats> *out_stats_vector);
+
+ private:
+ static std::string GetReadableStats(const Rdb_index_stats &it);
+
+ bool ShouldCollectStats();
+ void CollectStatsForRow(const rocksdb::Slice &key,
+ const rocksdb::Slice &value,
+ const rocksdb::EntryType &type,
+ const uint64_t file_size);
+ Rdb_index_stats *AccessStats(const rocksdb::Slice &key);
+ void AdjustDeletedRows(rocksdb::EntryType type);
+
+ private:
+ uint32_t m_cf_id;
+ std::shared_ptr<const Rdb_key_def> m_keydef;
+ Rdb_ddl_manager *m_ddl_manager;
+ std::vector<Rdb_index_stats> m_stats;
+ Rdb_index_stats *m_last_stats;
+ static const char *INDEXSTATS_KEY;
+
+ // last added key
+ std::string m_last_key;
+
+ // floating window to count deleted rows
+ std::vector<bool> m_deleted_rows_window;
+ uint64_t m_rows, m_window_pos, m_deleted_rows, m_max_deleted_rows;
+ uint64_t m_file_size;
+ Rdb_compact_params m_params;
+ Rdb_tbl_card_coll m_cardinality_collector;
+ bool m_recorded;
+};
+
+class Rdb_tbl_prop_coll_factory
+ : public rocksdb::TablePropertiesCollectorFactory {
+ public:
+ Rdb_tbl_prop_coll_factory(const Rdb_tbl_prop_coll_factory &) = delete;
+ Rdb_tbl_prop_coll_factory &operator=(const Rdb_tbl_prop_coll_factory &) =
+ delete;
+
+ explicit Rdb_tbl_prop_coll_factory(Rdb_ddl_manager *ddl_manager)
+ : m_ddl_manager(ddl_manager) {}
+
+ /*
+ Override parent class's virtual methods of interest.
+ */
+
+ virtual rocksdb::TablePropertiesCollector *CreateTablePropertiesCollector(
+ rocksdb::TablePropertiesCollectorFactory::Context context) override {
+ return new Rdb_tbl_prop_coll(m_ddl_manager, m_params,
+ context.column_family_id,
+ m_table_stats_sampling_pct);
+ }
+
+ virtual const char *Name() const override {
+ return "Rdb_tbl_prop_coll_factory";
+ }
+
+ public:
+ void SetCompactionParams(const Rdb_compact_params &params) {
+ m_params = params;
+ }
+
+ void SetTableStatsSamplingPct(const uint8_t table_stats_sampling_pct) {
+ m_table_stats_sampling_pct = table_stats_sampling_pct;
+ }
+
+ private:
+ Rdb_ddl_manager *const m_ddl_manager;
+ Rdb_compact_params m_params;
+ uint8_t m_table_stats_sampling_pct;
+};
+
+} // namespace myrocks
diff --git a/storage/rocksdb/rdb_buff.h b/storage/rocksdb/rdb_buff.h
new file mode 100644
index 00000000000..639688db951
--- /dev/null
+++ b/storage/rocksdb/rdb_buff.h
@@ -0,0 +1,549 @@
+/*
+ Portions Copyright (c) 2016-Present, Facebook, Inc.
+ Portions Copyright (c) 2012,2013 Monty Program Ab
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111-1301 USA */
+#pragma once
+
+#include <algorithm>
+#include <string>
+#include <vector>
+
+#ifdef _WIN32
+#include <stdlib.h>
+#define htobe64 _byteswap_uint64
+#define be64toh _byteswap_uint64
+#define htobe32 _byteswap_ulong
+#define be32toh _byteswap_ulong
+#define htobe16 _byteswap_ushort
+#define be16toh _byteswap_ushort
+#endif
+
+#if __APPLE__
+#include <libkern/OSByteOrder.h>
+#define htobe64(x) OSSwapHostToBigInt64(x)
+#define be64toh(x) OSSwapBigToHostInt64(x)
+#define htobe32(x) OSSwapHostToBigInt32(x)
+#define be32toh(x) OSSwapBigToHostInt32(x)
+#define htobe16(x) OSSwapHostToBigInt16(x)
+#define be16toh(x) OSSwapBigToHostInt16(x)
+#endif
+
+/* MySQL header files */
+#include "./my_global.h"
+
+/* MyRocks header files */
+#include "./rdb_global.h"
+#include "./rdb_utils.h"
+
+/* RocksDB header files */
+#include "rocksdb/slice.h"
+#include "rocksdb/status.h"
+
+namespace myrocks {
+
+/*
+ Basic composition functions for a network buffer presented as a MySQL String
+ ("netstr") which stores data in Network Byte Order (Big Endian).
+*/
+
+inline void rdb_netstr_append_uint64(my_core::String *const out_netstr,
+ const uint64 val) {
+ DBUG_ASSERT(out_netstr != nullptr);
+
+ // Convert from host machine byte order (usually Little Endian) to network
+ // byte order (Big Endian).
+ uint64 net_val = htobe64(val);
+ out_netstr->append(reinterpret_cast<char *>(&net_val), sizeof(net_val));
+}
+
+inline void rdb_netstr_append_uint32(my_core::String *const out_netstr,
+ const uint32 val) {
+ DBUG_ASSERT(out_netstr != nullptr);
+
+ // Convert from host machine byte order (usually Little Endian) to network
+ // byte order (Big Endian).
+ uint32 net_val = htobe32(val);
+ out_netstr->append(reinterpret_cast<char *>(&net_val), sizeof(net_val));
+}
+
+inline void rdb_netstr_append_uint16(my_core::String *const out_netstr,
+ const uint16 val) {
+ DBUG_ASSERT(out_netstr != nullptr);
+
+ // Convert from host machine byte order (usually Little Endian) to network
+ // byte order (Big Endian).
+ uint16 net_val = htobe16(val);
+ out_netstr->append(reinterpret_cast<char *>(&net_val), sizeof(net_val));
+}
+
+/*
+ Basic network buffer ("netbuf") write helper functions.
+*/
+
+inline void rdb_netbuf_store_uint64(uchar *const dst_netbuf, const uint64 n) {
+ DBUG_ASSERT(dst_netbuf != nullptr);
+
+ // Convert from host byte order (usually Little Endian) to network byte order
+ // (Big Endian).
+ uint64 net_val = htobe64(n);
+ memcpy(dst_netbuf, &net_val, sizeof(net_val));
+}
+
+inline void rdb_netbuf_store_uint32(uchar *const dst_netbuf, const uint32 n) {
+ DBUG_ASSERT(dst_netbuf != nullptr);
+
+ // Convert from host byte order (usually Little Endian) to network byte order
+ // (Big Endian).
+ uint32 net_val = htobe32(n);
+ memcpy(dst_netbuf, &net_val, sizeof(net_val));
+}
+
+inline void rdb_netbuf_store_uint16(uchar *const dst_netbuf, const uint16 n) {
+ DBUG_ASSERT(dst_netbuf != nullptr);
+
+ // Convert from host byte order (usually Little Endian) to network byte order
+ // (Big Endian).
+ uint16 net_val = htobe16(n);
+ memcpy(dst_netbuf, &net_val, sizeof(net_val));
+}
+
+inline void rdb_netbuf_store_byte(uchar *const dst_netbuf, const uchar c) {
+ DBUG_ASSERT(dst_netbuf != nullptr);
+
+ *dst_netbuf = c;
+}
+
+inline void rdb_netbuf_store_index(uchar *const dst_netbuf,
+ const uint32 number) {
+ DBUG_ASSERT(dst_netbuf != nullptr);
+
+ rdb_netbuf_store_uint32(dst_netbuf, number);
+}
+
+/*
+ Basic conversion helper functions from network byte order (Big Endian) to host
+ machine byte order (usually Little Endian).
+*/
+
+inline uint64 rdb_netbuf_to_uint64(const uchar *const netbuf) {
+ DBUG_ASSERT(netbuf != nullptr);
+
+ uint64 net_val;
+ memcpy(&net_val, netbuf, sizeof(net_val));
+
+ // Convert from network byte order (Big Endian) to host machine byte order
+ // (usually Little Endian).
+ return be64toh(net_val);
+}
+
+inline uint32 rdb_netbuf_to_uint32(const uchar *const netbuf) {
+ DBUG_ASSERT(netbuf != nullptr);
+
+ uint32 net_val;
+ memcpy(&net_val, netbuf, sizeof(net_val));
+
+ // Convert from network byte order (Big Endian) to host machine byte order
+ // (usually Little Endian).
+ return be32toh(net_val);
+}
+
+inline uint16 rdb_netbuf_to_uint16(const uchar *const netbuf) {
+ DBUG_ASSERT(netbuf != nullptr);
+
+ uint16 net_val;
+ memcpy(&net_val, netbuf, sizeof(net_val));
+
+ // Convert from network byte order (Big Endian) to host machine byte order
+ // (usually Little Endian).
+ return be16toh(net_val);
+}
+
+inline uchar rdb_netbuf_to_byte(const uchar *const netbuf) {
+ DBUG_ASSERT(netbuf != nullptr);
+
+ return (uchar)netbuf[0];
+}
+
+/*
+ Basic network buffer ("netbuf") read helper functions.
+ Network buffer stores data in Network Byte Order (Big Endian).
+ NB: The netbuf is passed as an input/output param, hence after reading,
+ the netbuf pointer gets advanced to the following byte.
+*/
+
+inline uint64 rdb_netbuf_read_uint64(const uchar **netbuf_ptr) {
+ DBUG_ASSERT(netbuf_ptr != nullptr);
+
+ // Convert from network byte order (Big Endian) to host machine byte order
+ // (usually Little Endian).
+ const uint64 host_val = rdb_netbuf_to_uint64(*netbuf_ptr);
+
+ // Advance pointer.
+ *netbuf_ptr += sizeof(host_val);
+
+ return host_val;
+}
+
+inline uint32 rdb_netbuf_read_uint32(const uchar **netbuf_ptr) {
+ DBUG_ASSERT(netbuf_ptr != nullptr);
+
+ // Convert from network byte order (Big Endian) to host machine byte order
+ // (usually Little Endian).
+ const uint32 host_val = rdb_netbuf_to_uint32(*netbuf_ptr);
+
+ // Advance pointer.
+ *netbuf_ptr += sizeof(host_val);
+
+ return host_val;
+}
+
+inline uint16 rdb_netbuf_read_uint16(const uchar **netbuf_ptr) {
+ DBUG_ASSERT(netbuf_ptr != nullptr);
+
+ // Convert from network byte order (Big Endian) to host machine byte order
+ // (usually Little Endian).
+ const uint16 host_val = rdb_netbuf_to_uint16(*netbuf_ptr);
+
+ // Advance pointer.
+ *netbuf_ptr += sizeof(host_val);
+
+ return host_val;
+}
+
+inline void rdb_netbuf_read_gl_index(const uchar **netbuf_ptr,
+ GL_INDEX_ID *const gl_index_id) {
+ DBUG_ASSERT(gl_index_id != nullptr);
+ DBUG_ASSERT(netbuf_ptr != nullptr);
+
+ gl_index_id->cf_id = rdb_netbuf_read_uint32(netbuf_ptr);
+ gl_index_id->index_id = rdb_netbuf_read_uint32(netbuf_ptr);
+}
+
+/*
+ A simple string reader:
+ - it keeps position within the string that we read from
+ - it prevents one from reading beyond the end of the string.
+*/
+
+class Rdb_string_reader {
+ const char *m_ptr;
+ uint m_len;
+
+ private:
+ Rdb_string_reader &operator=(const Rdb_string_reader &) = default;
+
+ public:
+ Rdb_string_reader(const Rdb_string_reader &) = default;
+ /* named constructor */
+ static Rdb_string_reader read_or_empty(const rocksdb::Slice *const slice) {
+ if (!slice) {
+ return Rdb_string_reader("");
+ } else {
+ return Rdb_string_reader(slice);
+ }
+ }
+
+ explicit Rdb_string_reader(const std::string &str) {
+ m_len = str.length();
+ if (m_len) {
+ m_ptr = &str.at(0);
+ } else {
+ /*
+ One can a create a Rdb_string_reader for reading from an empty string
+ (although attempts to read anything will fail).
+ We must not access str.at(0), since len==0, we can set ptr to any
+ value.
+ */
+ m_ptr = nullptr;
+ }
+ }
+
+ explicit Rdb_string_reader(const rocksdb::Slice *const slice) {
+ m_ptr = slice->data();
+ m_len = slice->size();
+ }
+
+ /*
+ Read the next @param size bytes. Returns pointer to the bytes read, or
+ nullptr if the remaining string doesn't have that many bytes.
+ */
+ const char *read(const uint size) {
+ const char *res;
+ if (m_len < size) {
+ res = nullptr;
+ } else {
+ res = m_ptr;
+ m_ptr += size;
+ m_len -= size;
+ }
+ return res;
+ }
+
+ bool read_uint8(uint *const res) {
+ const uchar *p;
+ if (!(p = reinterpret_cast<const uchar *>(read(1)))) {
+ return true; // error
+ } else {
+ *res = *p;
+ return false; // Ok
+ }
+ }
+
+ bool read_uint16(uint *const res) {
+ const uchar *p;
+ if (!(p = reinterpret_cast<const uchar *>(read(2)))) {
+ return true; // error
+ } else {
+ *res = rdb_netbuf_to_uint16(p);
+ return false; // Ok
+ }
+ }
+
+ bool read_uint64(uint64 *const res) {
+ const uchar *p;
+ if (!(p = reinterpret_cast<const uchar *>(read(sizeof(uint64))))) {
+ return true; // error
+ } else {
+ *res = rdb_netbuf_to_uint64(p);
+ return false; // Ok
+ }
+ }
+
+ uint remaining_bytes() const { return m_len; }
+
+ /*
+ Return pointer to data that will be read by next read() call (if there is
+ nothing left to read, returns pointer to beyond the end of previous read()
+ call)
+ */
+ const char *get_current_ptr() const { return m_ptr; }
+};
+
+/*
+ @brief
+ A buffer one can write the data to.
+
+ @detail
+ Suggested usage pattern:
+
+ writer->clear();
+ writer->write_XXX(...);
+ ...
+ // Ok, writer->ptr() points to the data written so far,
+ // and writer->get_current_pos() is the length of the data
+
+*/
+
+class Rdb_string_writer {
+ std::vector<uchar> m_data;
+
+ public:
+ Rdb_string_writer(const Rdb_string_writer &) = delete;
+ Rdb_string_writer &operator=(const Rdb_string_writer &) = delete;
+ Rdb_string_writer() = default;
+
+ void clear() { m_data.clear(); }
+ void write_uint8(const uint val) {
+ m_data.push_back(static_cast<uchar>(val));
+ }
+
+ void write_uint16(const uint val) {
+ const auto size = m_data.size();
+ m_data.resize(size + 2);
+ rdb_netbuf_store_uint16(m_data.data() + size, val);
+ }
+
+ void write_uint32(const uint val) {
+ const auto size = m_data.size();
+ m_data.resize(size + 4);
+ rdb_netbuf_store_uint32(m_data.data() + size, val);
+ }
+
+ void write(const uchar *const new_data, const size_t len) {
+ DBUG_ASSERT(new_data != nullptr);
+ m_data.insert(m_data.end(), new_data, new_data + len);
+ }
+
+ uchar *ptr() { return m_data.data(); }
+ size_t get_current_pos() const { return m_data.size(); }
+
+ void write_uint8_at(const size_t pos, const uint new_val) {
+ // This function will only overwrite what was written
+ DBUG_ASSERT(pos < get_current_pos());
+ m_data.data()[pos] = new_val;
+ }
+
+ void write_uint16_at(const size_t pos, const uint new_val) {
+ // This function will only overwrite what was written
+ DBUG_ASSERT(pos < get_current_pos() && (pos + 1) < get_current_pos());
+ rdb_netbuf_store_uint16(m_data.data() + pos, new_val);
+ }
+
+ void truncate(const size_t pos) {
+ DBUG_ASSERT(pos < m_data.size());
+ m_data.resize(pos);
+ }
+
+ void allocate(const size_t len, const uchar val = 0) {
+ DBUG_ASSERT(len > 0);
+ m_data.resize(m_data.size() + len, val);
+ }
+
+ /*
+ An awful hack to deallocate the buffer without relying on the deconstructor.
+ This is needed to suppress valgrind errors in rocksdb.partition
+ */
+ void free() { std::vector<uchar>().swap(m_data); }
+};
+
+/*
+ A helper class for writing bits into Rdb_string_writer.
+
+ The class assumes (but doesn't check) that nobody tries to write
+ anything to the Rdb_string_writer that it is writing to.
+*/
+class Rdb_bit_writer {
+ Rdb_string_writer *m_writer;
+ uchar m_offset;
+
+ public:
+ Rdb_bit_writer(const Rdb_bit_writer &) = delete;
+ Rdb_bit_writer &operator=(const Rdb_bit_writer &) = delete;
+
+ explicit Rdb_bit_writer(Rdb_string_writer *writer_arg)
+ : m_writer(writer_arg), m_offset(0) {}
+
+ void write(uint size, const uint value) {
+ DBUG_ASSERT((value & ((1 << size) - 1)) == value);
+
+ while (size > 0) {
+ if (m_offset == 0) {
+ m_writer->write_uint8(0);
+ }
+ // number of bits to put in this byte
+ const uint bits = std::min(size, (uint)(8 - m_offset));
+ uchar *const last_byte =
+ m_writer->ptr() + m_writer->get_current_pos() - 1;
+ *last_byte |= (uchar)((value >> (size - bits)) & ((1 << bits) - 1))
+ << m_offset;
+ size -= bits;
+ m_offset = (m_offset + bits) & 0x7;
+ }
+ }
+};
+
+class Rdb_bit_reader {
+ const uchar *m_cur;
+ uchar m_offset;
+ uint m_ret;
+ Rdb_string_reader *const m_reader;
+
+ public:
+ Rdb_bit_reader(const Rdb_bit_reader &) = delete;
+ Rdb_bit_reader &operator=(const Rdb_bit_reader &) = delete;
+
+ explicit Rdb_bit_reader(Rdb_string_reader *const reader)
+ : m_cur(nullptr), m_offset(0), m_reader(reader) {}
+
+ // Returns a pointer to an uint containing the bits read. On subsequent
+ // reads, the value being pointed to will be overwritten. Returns nullptr
+ // on failure.
+ uint *read(uint size) {
+ m_ret = 0;
+ DBUG_ASSERT(size <= 32);
+
+ while (size > 0) {
+ if (m_offset == 0) {
+ m_cur = (const uchar *)m_reader->read(1);
+ if (m_cur == nullptr) {
+ return nullptr;
+ }
+ }
+ // how many bits from the current byte?
+ const uint bits = std::min((uint)(8 - m_offset), size);
+ m_ret <<= bits;
+ m_ret |= (*m_cur >> m_offset) & ((1 << bits) - 1);
+ size -= bits;
+ m_offset = (m_offset + bits) & 0x7;
+ }
+
+ return &m_ret;
+ }
+};
+
+template <size_t buf_length>
+class Rdb_buf_writer {
+ public:
+ Rdb_buf_writer(const Rdb_buf_writer &) = delete;
+ Rdb_buf_writer &operator=(const Rdb_buf_writer &) = delete;
+ Rdb_buf_writer() { reset(); }
+
+ void write_uint32(const uint32 n) {
+ DBUG_ASSERT(m_ptr + sizeof(n) <= m_buf.data() + buf_length);
+ rdb_netbuf_store_uint32(m_ptr, n);
+ m_ptr += sizeof(n);
+ }
+
+ void write_uint64(const uint64 n) {
+ DBUG_ASSERT(m_ptr + sizeof(n) <= m_buf.data() + buf_length);
+ rdb_netbuf_store_uint64(m_ptr, n);
+ m_ptr += sizeof(n);
+ }
+
+ void write_uint16(const uint16 n) {
+ DBUG_ASSERT(m_ptr + sizeof(n) <= m_buf.data() + buf_length);
+ rdb_netbuf_store_uint16(m_ptr, n);
+ m_ptr += sizeof(n);
+ }
+
+ void write_byte(const uchar c) {
+ DBUG_ASSERT(m_ptr + sizeof(c) <= m_buf.data() + buf_length);
+ rdb_netbuf_store_byte(m_ptr, c);
+ m_ptr += sizeof(c);
+ }
+
+ void write_index(const uint32 n) { write_uint32(n); }
+
+ void write(const char *buf, const size_t size) {
+ DBUG_ASSERT(m_ptr + size <= m_buf.data() + buf_length);
+ memcpy(m_ptr, buf, size);
+ m_ptr += size;
+ }
+
+ void write(const uchar *buf, const size_t size) {
+ DBUG_ASSERT(m_ptr + size <= m_buf.data() + buf_length);
+ memcpy(m_ptr, buf, size);
+ m_ptr += size;
+ }
+
+ void reset() { m_ptr = m_buf.data(); }
+
+ const char *data() const {
+ return reinterpret_cast<const char *>(m_buf.data());
+ }
+
+ size_t capacity() { return buf_length; }
+
+ /** Returns actual size of the buffer that has data */
+ size_t size() { return m_ptr - m_buf.data(); }
+
+ rocksdb::Slice to_slice() { return rocksdb::Slice(data(), size()); }
+
+ private:
+ std::array<uchar, buf_length> m_buf;
+ uchar *m_ptr;
+};
+
+} // namespace myrocks
diff --git a/storage/rocksdb/rdb_cf_manager.cc b/storage/rocksdb/rdb_cf_manager.cc
new file mode 100644
index 00000000000..c4461346f32
--- /dev/null
+++ b/storage/rocksdb/rdb_cf_manager.cc
@@ -0,0 +1,273 @@
+/*
+ Copyright (c) 2014, SkySQL Ab
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111-1301 USA */
+
+#ifdef USE_PRAGMA_IMPLEMENTATION
+#pragma implementation // gcc: Class implementation
+#endif
+
+/* For use of 'PRIu64': */
+#define __STDC_FORMAT_MACROS
+
+#include <my_config.h>
+
+#include <inttypes.h>
+
+/* This C++ files header file */
+#include "./rdb_cf_manager.h"
+
+/* MyRocks header files */
+#include "./ha_rocksdb.h"
+#include "./ha_rocksdb_proto.h"
+#include "./rdb_datadic.h"
+#include "./rdb_psi.h"
+
+#include <string>
+
+namespace myrocks {
+
+/* Check if ColumnFamily name says it's a reverse-ordered CF */
+bool Rdb_cf_manager::is_cf_name_reverse(const char *const name) {
+ /* nullptr means the default CF is used.. (TODO: can the default CF be
+ * reverse?) */
+ return (name && !strncmp(name, "rev:", 4));
+}
+
+void Rdb_cf_manager::init(
+ std::unique_ptr<Rdb_cf_options> &&cf_options,
+ std::vector<rocksdb::ColumnFamilyHandle *> *const handles) {
+ mysql_mutex_init(rdb_cfm_mutex_key, &m_mutex, MY_MUTEX_INIT_FAST);
+
+ DBUG_ASSERT(cf_options != nullptr);
+ DBUG_ASSERT(handles != nullptr);
+ DBUG_ASSERT(handles->size() > 0);
+
+ m_cf_options = std::move(cf_options);
+
+ for (auto cfh : *handles) {
+ DBUG_ASSERT(cfh != nullptr);
+ m_cf_name_map[cfh->GetName()] = cfh;
+ m_cf_id_map[cfh->GetID()] = cfh;
+ }
+}
+
+void Rdb_cf_manager::cleanup() {
+ for (auto it : m_cf_name_map) {
+ delete it.second;
+ }
+ mysql_mutex_destroy(&m_mutex);
+ m_cf_options = nullptr;
+}
+
+/*
+ @brief
+ Find column family by name. If it doesn't exist, create it
+
+ @detail
+ See Rdb_cf_manager::get_cf
+*/
+rocksdb::ColumnFamilyHandle *Rdb_cf_manager::get_or_create_cf(
+ rocksdb::DB *const rdb, const std::string &cf_name_arg) {
+ DBUG_ASSERT(rdb != nullptr);
+
+ rocksdb::ColumnFamilyHandle *cf_handle = nullptr;
+
+ if (cf_name_arg == PER_INDEX_CF_NAME) {
+ // per-index column families is no longer supported.
+ my_error(ER_PER_INDEX_CF_DEPRECATED, MYF(0));
+ return nullptr;
+ }
+
+ const std::string &cf_name =
+ cf_name_arg.empty() ? DEFAULT_CF_NAME : cf_name_arg;
+
+ RDB_MUTEX_LOCK_CHECK(m_mutex);
+
+ const auto it = m_cf_name_map.find(cf_name);
+
+ if (it != m_cf_name_map.end()) {
+ cf_handle = it->second;
+ } else {
+ /* Create a Column Family. */
+ rocksdb::ColumnFamilyOptions opts;
+ m_cf_options->get_cf_options(cf_name, &opts);
+
+ // NO_LINT_DEBUG
+ sql_print_information("RocksDB: creating a column family %s",
+ cf_name.c_str());
+ // NO_LINT_DEBUG
+ sql_print_information(" write_buffer_size=%ld", opts.write_buffer_size);
+
+ // NO_LINT_DEBUG
+ sql_print_information(" target_file_size_base=%" PRIu64,
+ opts.target_file_size_base);
+
+ const rocksdb::Status s =
+ rdb->CreateColumnFamily(opts, cf_name, &cf_handle);
+
+ if (s.ok()) {
+ m_cf_name_map[cf_handle->GetName()] = cf_handle;
+ m_cf_id_map[cf_handle->GetID()] = cf_handle;
+ } else {
+ cf_handle = nullptr;
+ }
+ }
+
+ RDB_MUTEX_UNLOCK_CHECK(m_mutex);
+
+ return cf_handle;
+}
+
+/*
+ Find column family by its cf_name.
+*/
+
+rocksdb::ColumnFamilyHandle *Rdb_cf_manager::get_cf(
+ const std::string &cf_name_arg, const bool lock_held_by_caller) const {
+ rocksdb::ColumnFamilyHandle *cf_handle;
+
+ if (!lock_held_by_caller) {
+ RDB_MUTEX_LOCK_CHECK(m_mutex);
+ }
+ std::string cf_name = cf_name_arg.empty() ? DEFAULT_CF_NAME : cf_name_arg;
+
+ const auto it = m_cf_name_map.find(cf_name);
+ cf_handle = (it != m_cf_name_map.end()) ? it->second : nullptr;
+
+ if (!cf_handle) {
+ // NO_LINT_DEBUG
+ sql_print_warning("Column family '%s' not found.", cf_name.c_str());
+ }
+
+ if (!lock_held_by_caller) {
+ RDB_MUTEX_UNLOCK_CHECK(m_mutex);
+ }
+
+ return cf_handle;
+}
+
+rocksdb::ColumnFamilyHandle *Rdb_cf_manager::get_cf(const uint32_t id) const {
+ rocksdb::ColumnFamilyHandle *cf_handle = nullptr;
+
+ RDB_MUTEX_LOCK_CHECK(m_mutex);
+ const auto it = m_cf_id_map.find(id);
+ if (it != m_cf_id_map.end()) cf_handle = it->second;
+ RDB_MUTEX_UNLOCK_CHECK(m_mutex);
+
+ return cf_handle;
+}
+
+std::vector<std::string> Rdb_cf_manager::get_cf_names(void) const {
+ std::vector<std::string> names;
+
+ RDB_MUTEX_LOCK_CHECK(m_mutex);
+ for (auto it : m_cf_name_map) {
+ names.push_back(it.first);
+ }
+ RDB_MUTEX_UNLOCK_CHECK(m_mutex);
+
+ return names;
+}
+
+std::vector<rocksdb::ColumnFamilyHandle *> Rdb_cf_manager::get_all_cf(
+ void) const {
+ std::vector<rocksdb::ColumnFamilyHandle *> list;
+
+ RDB_MUTEX_LOCK_CHECK(m_mutex);
+
+ for (auto it : m_cf_id_map) {
+ DBUG_ASSERT(it.second != nullptr);
+ list.push_back(it.second);
+ }
+
+ RDB_MUTEX_UNLOCK_CHECK(m_mutex);
+
+ return list;
+}
+
+struct Rdb_cf_scanner : public Rdb_tables_scanner {
+ uint32_t m_cf_id;
+ int m_is_cf_used;
+
+ explicit Rdb_cf_scanner(uint32_t cf_id)
+ : m_cf_id(cf_id), m_is_cf_used(false) {}
+
+ int add_table(Rdb_tbl_def *tdef) override {
+ DBUG_ASSERT(tdef != nullptr);
+
+ for (uint i = 0; i < tdef->m_key_count; i++) {
+ const Rdb_key_def &kd = *tdef->m_key_descr_arr[i];
+
+ if (kd.get_cf()->GetID() == m_cf_id) {
+ m_is_cf_used = true;
+ return HA_EXIT_SUCCESS;
+ }
+ }
+ return HA_EXIT_SUCCESS;
+ }
+};
+
+int Rdb_cf_manager::drop_cf(const std::string &cf_name) {
+ auto ddl_manager = rdb_get_ddl_manager();
+ uint32_t cf_id = 0;
+
+ if (cf_name == DEFAULT_SYSTEM_CF_NAME) {
+ return HA_EXIT_FAILURE;
+ }
+
+ RDB_MUTEX_LOCK_CHECK(m_mutex);
+ auto cf_handle = get_cf(cf_name, true /* lock_held_by_caller */);
+ if (cf_handle == nullptr) {
+ RDB_MUTEX_UNLOCK_CHECK(m_mutex);
+ return HA_EXIT_SUCCESS;
+ }
+
+ cf_id = cf_handle->GetID();
+ Rdb_cf_scanner scanner(cf_id);
+
+ auto ret = ddl_manager->scan_for_tables(&scanner);
+ if (ret) {
+ RDB_MUTEX_UNLOCK_CHECK(m_mutex);
+ return ret;
+ }
+
+ if (scanner.m_is_cf_used) {
+ // column family is used by existing key
+ RDB_MUTEX_UNLOCK_CHECK(m_mutex);
+ return HA_EXIT_FAILURE;
+ }
+
+ auto rdb = rdb_get_rocksdb_db();
+ auto status = rdb->DropColumnFamily(cf_handle);
+ if (!status.ok()) {
+ RDB_MUTEX_UNLOCK_CHECK(m_mutex);
+ return ha_rocksdb::rdb_error_to_mysql(status);
+ }
+
+ delete cf_handle;
+
+ auto id_iter = m_cf_id_map.find(cf_id);
+ DBUG_ASSERT(id_iter != m_cf_id_map.end());
+ m_cf_id_map.erase(id_iter);
+
+ auto name_iter = m_cf_name_map.find(cf_name);
+ DBUG_ASSERT(name_iter != m_cf_name_map.end());
+ m_cf_name_map.erase(name_iter);
+
+ RDB_MUTEX_UNLOCK_CHECK(m_mutex);
+
+ return HA_EXIT_SUCCESS;
+}
+} // namespace myrocks
diff --git a/storage/rocksdb/rdb_cf_manager.h b/storage/rocksdb/rdb_cf_manager.h
new file mode 100644
index 00000000000..cf7b3d6cfb8
--- /dev/null
+++ b/storage/rocksdb/rdb_cf_manager.h
@@ -0,0 +1,108 @@
+/*
+ Copyright (c) 2014, SkySQL Ab
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111-1301 USA */
+
+#pragma once
+
+/* C++ system header files */
+#include <map>
+#include <string>
+#include <vector>
+
+/* MySQL header files */
+#include "./sql_class.h"
+
+/* RocksDB header files */
+#include "rocksdb/db.h"
+
+/* MyRocks header files */
+#include "./rdb_cf_options.h"
+
+namespace myrocks {
+
+/*
+ We need a Column Family (CF) manager. Its functions:
+ - create column families (synchronized, don't create the same twice)
+ - keep count in each column family.
+ = the count is kept on-disk.
+ = there are no empty CFs. initially count=1.
+ = then, when doing DDL, we increase or decrease it.
+ (atomicity is maintained by being in the same WriteBatch with DDLs)
+ = if DROP discovers that now count=0, it removes the CF.
+
+ Current state is:
+ - CFs are created in a synchronized way. We can't remove them, yet.
+*/
+
+class Rdb_cf_manager {
+ std::map<std::string, rocksdb::ColumnFamilyHandle *> m_cf_name_map;
+ std::map<uint32_t, rocksdb::ColumnFamilyHandle *> m_cf_id_map;
+
+ mutable mysql_mutex_t m_mutex;
+
+ std::unique_ptr<Rdb_cf_options> m_cf_options = nullptr;
+
+ public:
+ Rdb_cf_manager(const Rdb_cf_manager &) = delete;
+ Rdb_cf_manager &operator=(const Rdb_cf_manager &) = delete;
+ Rdb_cf_manager() = default;
+
+ static bool is_cf_name_reverse(const char *const name);
+
+ /*
+ This is called right after the DB::Open() call. The parameters describe
+ column
+ families that are present in the database. The first CF is the default CF.
+ */
+ void init(std::unique_ptr<Rdb_cf_options> &&cf_options,
+ std::vector<rocksdb::ColumnFamilyHandle *> *const handles);
+ void cleanup();
+
+ /*
+ Used by CREATE TABLE.
+ - cf_name=nullptr means use default column family
+ */
+ rocksdb::ColumnFamilyHandle *get_or_create_cf(rocksdb::DB *const rdb,
+ const std::string &cf_name);
+
+ /* Used by table open */
+ rocksdb::ColumnFamilyHandle *get_cf(
+ const std::string &cf_name, const bool lock_held_by_caller = false) const;
+
+ /* Look up cf by id; used by datadic */
+ rocksdb::ColumnFamilyHandle *get_cf(const uint32_t id) const;
+
+ /* Used to iterate over column families for show status */
+ std::vector<std::string> get_cf_names(void) const;
+
+ /* Used to iterate over column families */
+ std::vector<rocksdb::ColumnFamilyHandle *> get_all_cf(void) const;
+
+ /* Used to delete cf by name */
+ int drop_cf(const std::string &cf_name);
+
+ void get_cf_options(const std::string &cf_name,
+ rocksdb::ColumnFamilyOptions *const opts)
+ MY_ATTRIBUTE((__nonnull__)) {
+ m_cf_options->get_cf_options(cf_name, opts);
+ }
+
+ void update_options_map(const std::string &cf_name,
+ const std::string &updated_options) {
+ m_cf_options->update(cf_name, updated_options);
+ }
+};
+
+} // namespace myrocks
diff --git a/storage/rocksdb/rdb_cf_options.cc b/storage/rocksdb/rdb_cf_options.cc
new file mode 100644
index 00000000000..4f12a998e65
--- /dev/null
+++ b/storage/rocksdb/rdb_cf_options.cc
@@ -0,0 +1,341 @@
+/*
+ Copyright (c) 2014, SkySQL Ab
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111-1301 USA */
+
+#ifdef USE_PRAGMA_IMPLEMENTATION
+#pragma implementation // gcc: Class implementation
+#endif
+
+#include <my_config.h>
+
+/* This C++ files header file */
+#include "./rdb_cf_options.h"
+
+/* C++ system header files */
+#include <string>
+
+/* MySQL header files */
+#include "./log.h"
+
+/* RocksDB header files */
+#include "rocksdb/utilities/convenience.h"
+
+/* MyRocks header files */
+#include "./ha_rocksdb.h"
+#include "./rdb_cf_manager.h"
+#include "./rdb_compact_filter.h"
+
+namespace myrocks {
+
+Rdb_pk_comparator Rdb_cf_options::s_pk_comparator;
+Rdb_rev_comparator Rdb_cf_options::s_rev_pk_comparator;
+
+bool Rdb_cf_options::init(
+ const rocksdb::BlockBasedTableOptions &table_options,
+ std::shared_ptr<rocksdb::TablePropertiesCollectorFactory> prop_coll_factory,
+ const char *const default_cf_options,
+ const char *const override_cf_options) {
+ DBUG_ASSERT(default_cf_options != nullptr);
+ DBUG_ASSERT(override_cf_options != nullptr);
+
+ m_default_cf_opts.comparator = &s_pk_comparator;
+ m_default_cf_opts.compaction_filter_factory.reset(
+ new Rdb_compact_filter_factory);
+
+ m_default_cf_opts.table_factory.reset(
+ rocksdb::NewBlockBasedTableFactory(table_options));
+
+ if (prop_coll_factory) {
+ m_default_cf_opts.table_properties_collector_factories.push_back(
+ prop_coll_factory);
+ }
+
+ if (!set_default(std::string(default_cf_options)) ||
+ !set_override(std::string(override_cf_options))) {
+ return false;
+ }
+
+ return true;
+}
+
+void Rdb_cf_options::get(const std::string &cf_name,
+ rocksdb::ColumnFamilyOptions *const opts) {
+ DBUG_ASSERT(opts != nullptr);
+
+ // Get defaults.
+ rocksdb::GetColumnFamilyOptionsFromString(*opts, m_default_config, opts);
+
+ // Get a custom confguration if we have one.
+ Name_to_config_t::iterator it = m_name_map.find(cf_name);
+
+ if (it != m_name_map.end()) {
+ rocksdb::GetColumnFamilyOptionsFromString(*opts, it->second, opts);
+ }
+}
+
+void Rdb_cf_options::update(const std::string &cf_name,
+ const std::string &cf_options) {
+ DBUG_ASSERT(!cf_name.empty());
+ DBUG_ASSERT(!cf_options.empty());
+
+ // Always update. If we didn't have an entry before then add it.
+ m_name_map[cf_name] = cf_options;
+
+ DBUG_ASSERT(!m_name_map.empty());
+}
+
+bool Rdb_cf_options::set_default(const std::string &default_config) {
+ rocksdb::ColumnFamilyOptions options;
+
+ if (!default_config.empty() && !rocksdb::GetColumnFamilyOptionsFromString(
+ options, default_config, &options)
+ .ok()) {
+ // NO_LINT_DEBUG
+ fprintf(stderr, "Invalid default column family config: %s\n",
+ default_config.c_str());
+ return false;
+ }
+
+ m_default_config = default_config;
+ return true;
+}
+
+// Skip over any spaces in the input string.
+void Rdb_cf_options::skip_spaces(const std::string &input, size_t *const pos) {
+ DBUG_ASSERT(pos != nullptr);
+
+ while (*pos < input.size() && isspace(input[*pos])) ++(*pos);
+}
+
+// Find a valid column family name. Note that all characters except a
+// semicolon are valid (should this change?) and all spaces are trimmed from
+// the beginning and end but are not removed between other characters.
+bool Rdb_cf_options::find_column_family(const std::string &input,
+ size_t *const pos,
+ std::string *const key) {
+ DBUG_ASSERT(pos != nullptr);
+ DBUG_ASSERT(key != nullptr);
+
+ const size_t beg_pos = *pos;
+ size_t end_pos = *pos - 1;
+
+ // Loop through the characters in the string until we see a '='.
+ for (; *pos < input.size() && input[*pos] != '='; ++(*pos)) {
+ // If this is not a space, move the end position to the current position.
+ if (input[*pos] != ' ') end_pos = *pos;
+ }
+
+ if (end_pos == beg_pos - 1) {
+ // NO_LINT_DEBUG
+ sql_print_warning("No column family found (options: %s)", input.c_str());
+ return false;
+ }
+
+ *key = input.substr(beg_pos, end_pos - beg_pos + 1);
+ return true;
+}
+
+// Find a valid options portion. Everything is deemed valid within the options
+// portion until we hit as many close curly braces as we have seen open curly
+// braces.
+bool Rdb_cf_options::find_options(const std::string &input, size_t *const pos,
+ std::string *const options) {
+ DBUG_ASSERT(pos != nullptr);
+ DBUG_ASSERT(options != nullptr);
+
+ // Make sure we have an open curly brace at the current position.
+ if (*pos < input.size() && input[*pos] != '{') {
+ // NO_LINT_DEBUG
+ sql_print_warning("Invalid cf options, '{' expected (options: %s)",
+ input.c_str());
+ return false;
+ }
+
+ // Skip the open curly brace and any spaces.
+ ++(*pos);
+ skip_spaces(input, pos);
+
+ // Set up our brace_count, the begin position and current end position.
+ size_t brace_count = 1;
+ const size_t beg_pos = *pos;
+
+ // Loop through the characters in the string until we find the appropriate
+ // number of closing curly braces.
+ while (*pos < input.size()) {
+ switch (input[*pos]) {
+ case '}':
+ // If this is a closing curly brace and we bring the count down to zero
+ // we can exit the loop with a valid options string.
+ if (--brace_count == 0) {
+ *options = input.substr(beg_pos, *pos - beg_pos);
+ ++(*pos); // Move past the last closing curly brace
+ return true;
+ }
+
+ break;
+
+ case '{':
+ // If this is an open curly brace increment the count.
+ ++brace_count;
+ break;
+
+ default:
+ break;
+ }
+
+ // Move to the next character.
+ ++(*pos);
+ }
+
+ // We never found the correct number of closing curly braces.
+ // Generate an error.
+ // NO_LINT_DEBUG
+ sql_print_warning("Mismatched cf options, '}' expected (options: %s)",
+ input.c_str());
+ return false;
+}
+
+bool Rdb_cf_options::find_cf_options_pair(const std::string &input,
+ size_t *const pos,
+ std::string *const cf,
+ std::string *const opt_str) {
+ DBUG_ASSERT(pos != nullptr);
+ DBUG_ASSERT(cf != nullptr);
+ DBUG_ASSERT(opt_str != nullptr);
+
+ // Skip any spaces.
+ skip_spaces(input, pos);
+
+ // We should now have a column family name.
+ if (!find_column_family(input, pos, cf)) return false;
+
+ // If we are at the end of the input then we generate an error.
+ if (*pos == input.size()) {
+ // NO_LINT_DEBUG
+ sql_print_warning("Invalid cf options, '=' expected (options: %s)",
+ input.c_str());
+ return false;
+ }
+
+ // Skip equal sign and any spaces after it
+ ++(*pos);
+ skip_spaces(input, pos);
+
+ // Find the options for this column family. This should be in the format
+ // {<options>} where <options> may contain embedded pairs of curly braces.
+ if (!find_options(input, pos, opt_str)) return false;
+
+ // Skip any trailing spaces after the option string.
+ skip_spaces(input, pos);
+
+ // We should either be at the end of the input string or at a semicolon.
+ if (*pos < input.size()) {
+ if (input[*pos] != ';') {
+ // NO_LINT_DEBUG
+ sql_print_warning("Invalid cf options, ';' expected (options: %s)",
+ input.c_str());
+ return false;
+ }
+
+ ++(*pos);
+ }
+
+ return true;
+}
+
+bool Rdb_cf_options::parse_cf_options(const std::string &cf_options,
+ Name_to_config_t *option_map) {
+ std::string cf;
+ std::string opt_str;
+ rocksdb::ColumnFamilyOptions options;
+
+ DBUG_ASSERT(option_map != nullptr);
+ DBUG_ASSERT(option_map->empty());
+
+ // Loop through the characters of the string until we reach the end.
+ size_t pos = 0;
+
+ while (pos < cf_options.size()) {
+ // Attempt to find <cf>={<opt_str>}.
+ if (!find_cf_options_pair(cf_options, &pos, &cf, &opt_str)) {
+ return false;
+ }
+
+ // Generate an error if we have already seen this column family.
+ if (option_map->find(cf) != option_map->end()) {
+ // NO_LINT_DEBUG
+ sql_print_warning(
+ "Duplicate entry for %s in override options (options: %s)",
+ cf.c_str(), cf_options.c_str());
+ return false;
+ }
+
+ // Generate an error if the <opt_str> is not valid according to RocksDB.
+ if (!rocksdb::GetColumnFamilyOptionsFromString(options, opt_str, &options)
+ .ok()) {
+ // NO_LINT_DEBUG
+ sql_print_warning(
+ "Invalid cf config for %s in override options (options: %s)",
+ cf.c_str(), cf_options.c_str());
+ return false;
+ }
+
+ // If everything is good, add this cf/opt_str pair to the map.
+ (*option_map)[cf] = opt_str;
+ }
+
+ return true;
+}
+
+bool Rdb_cf_options::set_override(const std::string &override_config) {
+ Name_to_config_t configs;
+
+ if (!parse_cf_options(override_config, &configs)) {
+ return false;
+ }
+
+ // Everything checked out - make the map live
+ m_name_map = configs;
+
+ return true;
+}
+
+const rocksdb::Comparator *Rdb_cf_options::get_cf_comparator(
+ const std::string &cf_name) {
+ if (Rdb_cf_manager::is_cf_name_reverse(cf_name.c_str())) {
+ return &s_rev_pk_comparator;
+ } else {
+ return &s_pk_comparator;
+ }
+}
+
+std::shared_ptr<rocksdb::MergeOperator> Rdb_cf_options::get_cf_merge_operator(
+ const std::string &cf_name) {
+ return (cf_name == DEFAULT_SYSTEM_CF_NAME)
+ ? std::make_shared<Rdb_system_merge_op>()
+ : nullptr;
+}
+
+void Rdb_cf_options::get_cf_options(const std::string &cf_name,
+ rocksdb::ColumnFamilyOptions *const opts) {
+ *opts = m_default_cf_opts;
+ get(cf_name, opts);
+
+ // Set the comparator according to 'rev:'
+ opts->comparator = get_cf_comparator(cf_name);
+ opts->merge_operator = get_cf_merge_operator(cf_name);
+}
+
+} // namespace myrocks
diff --git a/storage/rocksdb/rdb_cf_options.h b/storage/rocksdb/rdb_cf_options.h
new file mode 100644
index 00000000000..360356f7af1
--- /dev/null
+++ b/storage/rocksdb/rdb_cf_options.h
@@ -0,0 +1,104 @@
+/*
+ Copyright (c) 2014, SkySQL Ab
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111-1301 USA */
+
+#pragma once
+
+/* C++ system header files */
+#include <string>
+#include <unordered_map>
+
+/* RocksDB header files */
+#include "rocksdb/table.h"
+#include "rocksdb/utilities/options_util.h"
+
+/* MyRocks header files */
+#include "./rdb_comparator.h"
+
+namespace myrocks {
+
+/*
+ Per-column family options configs.
+
+ Per-column family option can be set
+ - Globally (the same value applies to all column families)
+ - Per column family: there is a {cf_name -> value} map,
+ and also there is a default value which applies to column
+ families not found in the map.
+*/
+class Rdb_cf_options {
+ public:
+ using Name_to_config_t = std::unordered_map<std::string, std::string>;
+
+ Rdb_cf_options(const Rdb_cf_options &) = delete;
+ Rdb_cf_options &operator=(const Rdb_cf_options &) = delete;
+ Rdb_cf_options() = default;
+
+ void get(const std::string &cf_name,
+ rocksdb::ColumnFamilyOptions *const opts);
+
+ void update(const std::string &cf_name, const std::string &cf_options);
+
+ bool init(const rocksdb::BlockBasedTableOptions &table_options,
+ std::shared_ptr<rocksdb::TablePropertiesCollectorFactory>
+ prop_coll_factory,
+ const char *const default_cf_options,
+ const char *const override_cf_options);
+
+ const rocksdb::ColumnFamilyOptions &get_defaults() const {
+ return m_default_cf_opts;
+ }
+
+ static const rocksdb::Comparator *get_cf_comparator(
+ const std::string &cf_name);
+
+ std::shared_ptr<rocksdb::MergeOperator> get_cf_merge_operator(
+ const std::string &cf_name);
+
+ void get_cf_options(const std::string &cf_name,
+ rocksdb::ColumnFamilyOptions *const opts)
+ MY_ATTRIBUTE((__nonnull__));
+
+ static bool parse_cf_options(const std::string &cf_options,
+ Name_to_config_t *option_map);
+
+ private:
+ bool set_default(const std::string &default_config);
+ bool set_override(const std::string &overide_config);
+
+ /* Helper string manipulation functions */
+ static void skip_spaces(const std::string &input, size_t *const pos);
+ static bool find_column_family(const std::string &input, size_t *const pos,
+ std::string *const key);
+ static bool find_options(const std::string &input, size_t *const pos,
+ std::string *const options);
+ static bool find_cf_options_pair(const std::string &input, size_t *const pos,
+ std::string *const cf,
+ std::string *const opt_str);
+
+ private:
+ static Rdb_pk_comparator s_pk_comparator;
+ static Rdb_rev_comparator s_rev_pk_comparator;
+
+ /* CF name -> value map */
+ Name_to_config_t m_name_map;
+
+ /* The default value (if there is only one value, it is stored here) */
+ std::string m_default_config;
+
+ rocksdb::ColumnFamilyOptions m_default_cf_opts;
+};
+
+} // namespace myrocks
diff --git a/storage/rocksdb/rdb_compact_filter.h b/storage/rocksdb/rdb_compact_filter.h
new file mode 100644
index 00000000000..1cd27273b56
--- /dev/null
+++ b/storage/rocksdb/rdb_compact_filter.h
@@ -0,0 +1,220 @@
+/*
+ Portions Copyright (c) 2016-Present, Facebook, Inc.
+ Portions Copyright (c) 2012, Monty Program Ab
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111-1301 USA */
+#pragma once
+
+#ifdef USE_PRAGMA_IMPLEMENTATION
+#pragma implementation // gcc: Class implementation
+#endif
+
+/* C++ system header files */
+#include <time.h>
+#include <string>
+#include <ctime>
+
+/* RocksDB includes */
+#include "rocksdb/compaction_filter.h"
+
+/* MyRocks includes */
+#include "./ha_rocksdb_proto.h"
+#include "./rdb_datadic.h"
+
+namespace myrocks {
+
+class Rdb_compact_filter : public rocksdb::CompactionFilter {
+ public:
+ Rdb_compact_filter(const Rdb_compact_filter &) = delete;
+ Rdb_compact_filter &operator=(const Rdb_compact_filter &) = delete;
+
+ explicit Rdb_compact_filter(uint32_t _cf_id) : m_cf_id(_cf_id) {}
+ ~Rdb_compact_filter() {
+ // Increment stats by num expired at the end of compaction
+ rdb_update_global_stats(ROWS_EXPIRED, m_num_expired);
+ }
+
+ // keys are passed in sorted order within the same sst.
+ // V1 Filter is thread safe on our usage (creating from Factory).
+ // Make sure to protect instance variables when switching to thread
+ // unsafe in the future.
+ virtual bool Filter(int level, const rocksdb::Slice &key,
+ const rocksdb::Slice &existing_value,
+ std::string *new_value,
+ bool *value_changed) const override {
+ DBUG_ASSERT(key.size() >= sizeof(uint32));
+
+ GL_INDEX_ID gl_index_id;
+ gl_index_id.cf_id = m_cf_id;
+ gl_index_id.index_id = rdb_netbuf_to_uint32((const uchar *)key.data());
+ DBUG_ASSERT(gl_index_id.index_id >= 1);
+
+ if (gl_index_id != m_prev_index) {
+ m_should_delete =
+ rdb_get_dict_manager()->is_drop_index_ongoing(gl_index_id);
+
+ if (!m_should_delete) {
+ get_ttl_duration_and_offset(gl_index_id, &m_ttl_duration,
+ &m_ttl_offset);
+
+ if (m_ttl_duration != 0 && m_snapshot_timestamp == 0) {
+ /*
+ For efficiency reasons, we lazily call GetIntProperty to get the
+ oldest snapshot time (occurs once per compaction).
+ */
+ rocksdb::DB *const rdb = rdb_get_rocksdb_db();
+ if (!rdb->GetIntProperty(rocksdb::DB::Properties::kOldestSnapshotTime,
+ &m_snapshot_timestamp) ||
+ m_snapshot_timestamp == 0) {
+ m_snapshot_timestamp = static_cast<uint64_t>(std::time(nullptr));
+ }
+
+#ifndef DBUG_OFF
+ int snapshot_ts = rdb_dbug_set_ttl_snapshot_ts();
+ if (snapshot_ts) {
+ m_snapshot_timestamp =
+ static_cast<uint64_t>(std::time(nullptr)) + snapshot_ts;
+ }
+#endif
+ }
+ }
+
+ m_prev_index = gl_index_id;
+ }
+
+ if (m_should_delete) {
+ m_num_deleted++;
+ return true;
+ } else if (m_ttl_duration > 0 &&
+ should_filter_ttl_rec(key, existing_value)) {
+ m_num_expired++;
+ return true;
+ }
+
+ return false;
+ }
+
+ virtual bool IgnoreSnapshots() const override { return true; }
+
+ virtual const char *Name() const override { return "Rdb_compact_filter"; }
+
+ void get_ttl_duration_and_offset(const GL_INDEX_ID &gl_index_id,
+ uint64 *ttl_duration,
+ uint32 *ttl_offset) const {
+ DBUG_ASSERT(ttl_duration != nullptr);
+ /*
+ If TTL is disabled set ttl_duration to 0. This prevents the compaction
+ filter from dropping expired records.
+ */
+ if (!rdb_is_ttl_enabled()) {
+ *ttl_duration = 0;
+ return;
+ }
+
+ /*
+ If key is part of system column family, it's definitely not a TTL key.
+ */
+ rocksdb::ColumnFamilyHandle *s_cf = rdb_get_dict_manager()->get_system_cf();
+ if (s_cf == nullptr || gl_index_id.cf_id == s_cf->GetID()) {
+ *ttl_duration = 0;
+ return;
+ }
+
+ struct Rdb_index_info index_info;
+ if (!rdb_get_dict_manager()->get_index_info(gl_index_id, &index_info)) {
+ // NO_LINT_DEBUG
+ sql_print_error(
+ "RocksDB: Could not get index information "
+ "for Index Number (%u,%u)",
+ gl_index_id.cf_id, gl_index_id.index_id);
+ }
+
+#ifndef DBUG_OFF
+ if (rdb_dbug_set_ttl_ignore_pk() &&
+ index_info.m_index_type == Rdb_key_def::INDEX_TYPE_PRIMARY) {
+ *ttl_duration = 0;
+ return;
+ }
+#endif
+
+ *ttl_duration = index_info.m_ttl_duration;
+ if (Rdb_key_def::has_index_flag(index_info.m_index_flags,
+ Rdb_key_def::TTL_FLAG)) {
+ *ttl_offset = Rdb_key_def::calculate_index_flag_offset(
+ index_info.m_index_flags, Rdb_key_def::TTL_FLAG);
+ }
+ }
+
+ bool should_filter_ttl_rec(const rocksdb::Slice &key,
+ const rocksdb::Slice &existing_value) const {
+ uint64 ttl_timestamp;
+ Rdb_string_reader reader(&existing_value);
+ if (!reader.read(m_ttl_offset) || reader.read_uint64(&ttl_timestamp)) {
+ std::string buf;
+ buf = rdb_hexdump(existing_value.data(), existing_value.size(),
+ RDB_MAX_HEXDUMP_LEN);
+ // NO_LINT_DEBUG
+ sql_print_error(
+ "Decoding ttl from PK value failed in compaction filter, "
+ "for index (%u,%u), val: %s",
+ m_prev_index.cf_id, m_prev_index.index_id, buf.c_str());
+ abort();
+ }
+
+ /*
+ Filter out the record only if it is older than the oldest snapshot
+ timestamp. This prevents any rows from expiring in the middle of
+ long-running transactions.
+ */
+ return ttl_timestamp + m_ttl_duration <= m_snapshot_timestamp;
+ }
+
+ private:
+ // Column family for this compaction filter
+ const uint32_t m_cf_id;
+ // Index id of the previous record
+ mutable GL_INDEX_ID m_prev_index = {0, 0};
+ // Number of rows deleted for the same index id
+ mutable uint64 m_num_deleted = 0;
+ // Number of rows expired for the TTL index
+ mutable uint64 m_num_expired = 0;
+ // Current index id should be deleted or not (should be deleted if true)
+ mutable bool m_should_delete = false;
+ // TTL duration for the current index if TTL is enabled
+ mutable uint64 m_ttl_duration = 0;
+ // TTL offset for all records in the current index
+ mutable uint32 m_ttl_offset = 0;
+ // Oldest snapshot timestamp at the time a TTL index is discovered
+ mutable uint64_t m_snapshot_timestamp = 0;
+};
+
+class Rdb_compact_filter_factory : public rocksdb::CompactionFilterFactory {
+ public:
+ Rdb_compact_filter_factory(const Rdb_compact_filter_factory &) = delete;
+ Rdb_compact_filter_factory &operator=(const Rdb_compact_filter_factory &) =
+ delete;
+ Rdb_compact_filter_factory() {}
+
+ ~Rdb_compact_filter_factory() {}
+
+ const char *Name() const override { return "Rdb_compact_filter_factory"; }
+
+ std::unique_ptr<rocksdb::CompactionFilter> CreateCompactionFilter(
+ const rocksdb::CompactionFilter::Context &context) override {
+ return std::unique_ptr<rocksdb::CompactionFilter>(
+ new Rdb_compact_filter(context.column_family_id));
+ }
+};
+
+} // namespace myrocks
diff --git a/storage/rocksdb/rdb_comparator.h b/storage/rocksdb/rdb_comparator.h
new file mode 100644
index 00000000000..9cb25925159
--- /dev/null
+++ b/storage/rocksdb/rdb_comparator.h
@@ -0,0 +1,85 @@
+/*
+ Copyright (c) 2012,2015 Monty Program Ab
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111-1301 USA */
+#pragma once
+
+/* C++ system header files */
+#include <string>
+
+/* MySQL includes */
+#include "./m_ctype.h"
+
+/* RocksDB header files */
+#include "rocksdb/comparator.h"
+
+/* MyRocks header files */
+#include "./rdb_utils.h"
+
+namespace myrocks {
+
+/*
+ The keys are in form: {index_number} {mem-comparable-key}
+
+ (todo: knowledge about this format is shared between this class and
+ Rdb_key_def)
+*/
+class Rdb_pk_comparator : public rocksdb::Comparator {
+ public:
+ Rdb_pk_comparator(const Rdb_pk_comparator &) = delete;
+ Rdb_pk_comparator &operator=(const Rdb_pk_comparator &) = delete;
+ Rdb_pk_comparator() = default;
+
+ // extracting from rocksdb::BytewiseComparator()->Compare() for optimization
+ int Compare(const rocksdb::Slice &a, const rocksdb::Slice &b) const override {
+ return a.compare(b);
+ }
+
+ const char *Name() const override { return "RocksDB_SE_v3.10"; }
+
+ // TODO: advanced funcs:
+ // - FindShortestSeparator
+ // - FindShortSuccessor
+
+ // for now, do-nothing implementations:
+ void FindShortestSeparator(std::string *start,
+ const rocksdb::Slice &limit) const override {
+ rocksdb::BytewiseComparator()->FindShortestSeparator(start, limit);
+ }
+ void FindShortSuccessor(std::string *key) const override {
+ rocksdb::BytewiseComparator()->FindShortSuccessor(key);
+ }
+};
+
+class Rdb_rev_comparator : public rocksdb::Comparator {
+ public:
+ Rdb_rev_comparator(const Rdb_rev_comparator &) = delete;
+ Rdb_rev_comparator &operator=(const Rdb_rev_comparator &) = delete;
+ Rdb_rev_comparator() = default;
+
+ // extracting from rocksdb::BytewiseComparator()->Compare() for optimization
+ int Compare(const rocksdb::Slice &a, const rocksdb::Slice &b) const override {
+ return -a.compare(b);
+ }
+ const char *Name() const override { return "rev:RocksDB_SE_v3.10"; }
+ void FindShortestSeparator(std::string *start,
+ const rocksdb::Slice &limit) const override {
+ rocksdb::ReverseBytewiseComparator()->FindShortestSeparator(start, limit);
+ }
+ void FindShortSuccessor(std::string *key) const override {
+ rocksdb::ReverseBytewiseComparator()->FindShortSuccessor(key);
+ }
+};
+
+} // namespace myrocks
diff --git a/storage/rocksdb/rdb_converter.cc b/storage/rocksdb/rdb_converter.cc
new file mode 100644
index 00000000000..c558f428652
--- /dev/null
+++ b/storage/rocksdb/rdb_converter.cc
@@ -0,0 +1,838 @@
+/*
+ Copyright (c) 2015, Facebook, Inc.
+
+ This program is f
+ i the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+#include <my_config.h>
+
+/* This C++ file's header file */
+#include "./rdb_converter.h"
+
+/* Standard C++ header files */
+#include <algorithm>
+#include <map>
+#include <string>
+#include <vector>
+
+/* MySQL header files */
+#include "./field.h"
+#include "./key.h"
+#include "./m_ctype.h"
+#include "./my_bit.h"
+#include "./my_bitmap.h"
+#include "./sql_table.h"
+
+
+/* MyRocks header files */
+#include "./ha_rocksdb.h"
+#include "./ha_rocksdb_proto.h"
+#include "./my_stacktrace.h"
+#include "./rdb_cf_manager.h"
+#include "./rdb_psi.h"
+#include "./rdb_utils.h"
+
+
+namespace myrocks {
+
+void dbug_modify_key_varchar8(String *on_disk_rec) {
+ std::string res;
+ // The key starts with index number
+ res.append(on_disk_rec->ptr(), Rdb_key_def::INDEX_NUMBER_SIZE);
+
+ // Then, a mem-comparable form of a varchar(8) value.
+ res.append("ABCDE\0\0\0\xFC", 9);
+ on_disk_rec->length(0);
+ on_disk_rec->append(res.data(), res.size());
+}
+
+/*
+ Convert field from rocksdb storage format into Mysql Record format
+ @param buf OUT start memory to fill converted data
+ @param offset IN/OUT decoded data is stored in buf + offset
+ @param table IN current table
+ @param field IN current field
+ @param reader IN rocksdb value slice reader
+ @param decode IN whether to decode current field
+ @return
+ 0 OK
+ other HA_ERR error code (can be SE-specific)
+*/
+int Rdb_convert_to_record_value_decoder::decode(uchar *const buf, uint *offset,
+ TABLE *table,
+ my_core::Field *field,
+ Rdb_field_encoder *field_dec,
+ Rdb_string_reader *reader,
+ bool decode, bool is_null) {
+ int err = HA_EXIT_SUCCESS;
+
+ uint field_offset = field->ptr - table->record[0];
+ *offset = field_offset;
+ uint null_offset = field->null_offset();
+ bool maybe_null = field->real_maybe_null();
+ field->move_field(buf + field_offset,
+ maybe_null ? buf + null_offset : nullptr, field->null_bit);
+
+ if (is_null) {
+ if (decode) {
+ // This sets the NULL-bit of this record
+ field->set_null();
+ /*
+ Besides that, set the field value to default value. CHECKSUM TABLE
+ depends on this.
+ */
+ memcpy(field->ptr, table->s->default_values + field_offset,
+ field->pack_length());
+ }
+ } else {
+ if (decode) {
+ // sets non-null bits for this record
+ field->set_notnull();
+ }
+
+ if (field_dec->m_field_type == MYSQL_TYPE_BLOB) {
+ err = decode_blob(table, field, reader, decode);
+ } else if (field_dec->m_field_type == MYSQL_TYPE_VARCHAR) {
+ err = decode_varchar(field, reader, decode);
+ } else {
+ err = decode_fixed_length_field(field, field_dec, reader, decode);
+ }
+ }
+
+ // Restore field->ptr and field->null_ptr
+ field->move_field(table->record[0] + field_offset,
+ maybe_null ? table->record[0] + null_offset : nullptr,
+ field->null_bit);
+
+ return err;
+}
+
+/*
+ Convert blob from rocksdb storage format into Mysql Record format
+ @param table IN current table
+ @param field IN current field
+ @param reader IN rocksdb value slice reader
+ @param decode IN whether to decode current field
+ @return
+ 0 OK
+ other HA_ERR error code (can be SE-specific)
+*/
+int Rdb_convert_to_record_value_decoder::decode_blob(TABLE *table, Field *field,
+ Rdb_string_reader *reader,
+ bool decode) {
+ my_core::Field_blob *blob = (my_core::Field_blob *)field;
+
+ // Get the number of bytes needed to store length
+ const uint length_bytes = blob->pack_length() - portable_sizeof_char_ptr;
+
+ const char *data_len_str;
+ if (!(data_len_str = reader->read(length_bytes))) {
+ return HA_ERR_ROCKSDB_CORRUPT_DATA;
+ }
+
+ memcpy(blob->ptr, data_len_str, length_bytes);
+ uint32 data_len =
+ blob->get_length(reinterpret_cast<const uchar *>(data_len_str),
+ length_bytes);
+ const char *blob_ptr;
+ if (!(blob_ptr = reader->read(data_len))) {
+ return HA_ERR_ROCKSDB_CORRUPT_DATA;
+ }
+
+ if (decode) {
+ // set 8-byte pointer to 0, like innodb does (relevant for 32-bit
+ // platforms)
+ memset(blob->ptr + length_bytes, 0, 8);
+ memcpy(blob->ptr + length_bytes, &blob_ptr, sizeof(uchar **));
+ }
+
+ return HA_EXIT_SUCCESS;
+}
+
+/*
+ Convert fixed length field from rocksdb storage format into Mysql Record
+ format
+ @param field IN current field
+ @param field_dec IN data structure conttain field encoding data
+ @param reader IN rocksdb value slice reader
+ @param decode IN whether to decode current field
+ @return
+ 0 OK
+ other HA_ERR error code (can be SE-specific)
+*/
+int Rdb_convert_to_record_value_decoder::decode_fixed_length_field(
+ my_core::Field *const field, Rdb_field_encoder *field_dec,
+ Rdb_string_reader *const reader, bool decode) {
+ uint len = field_dec->m_pack_length_in_rec;
+ if (len > 0) {
+ const char *data_bytes;
+ if ((data_bytes = reader->read(len)) == nullptr) {
+ return HA_ERR_ROCKSDB_CORRUPT_DATA;
+ }
+
+ if (decode) {
+ memcpy(field->ptr, data_bytes, len);
+ }
+ }
+
+ return HA_EXIT_SUCCESS;
+}
+
+/*
+ Convert varchar field from rocksdb storage format into Mysql Record format
+ @param field IN current field
+ @param field_dec IN data structure conttain field encoding data
+ @param reader IN rocksdb value slice reader
+ @param decode IN whether to decode current field
+ @return
+ 0 OK
+ other HA_ERR error code (can be SE-specific)
+*/
+int Rdb_convert_to_record_value_decoder::decode_varchar(
+ Field *field, Rdb_string_reader *const reader, bool decode) {
+ my_core::Field_varstring *const field_var = (my_core::Field_varstring *)field;
+
+ const char *data_len_str;
+ if (!(data_len_str = reader->read(field_var->length_bytes))) {
+ return HA_ERR_ROCKSDB_CORRUPT_DATA;
+ }
+
+ uint data_len;
+ // field_var->length_bytes is 1 or 2
+ if (field_var->length_bytes == 1) {
+ data_len = (uchar)data_len_str[0];
+ } else {
+ DBUG_ASSERT(field_var->length_bytes == 2);
+ data_len = uint2korr(data_len_str);
+ }
+
+ if (data_len > field_var->field_length) {
+ // The data on disk is longer than table DDL allows?
+ return HA_ERR_ROCKSDB_CORRUPT_DATA;
+ }
+
+ if (!reader->read(data_len)) {
+ return HA_ERR_ROCKSDB_CORRUPT_DATA;
+ }
+
+ if (decode) {
+ memcpy(field_var->ptr, data_len_str, field_var->length_bytes + data_len);
+ }
+
+ return HA_EXIT_SUCCESS;
+}
+
+template <typename value_field_decoder>
+Rdb_value_field_iterator<value_field_decoder>::Rdb_value_field_iterator(
+ TABLE *table, Rdb_string_reader *value_slice_reader,
+ const Rdb_converter *rdb_converter, uchar *const buf)
+ : m_buf(buf) {
+ DBUG_ASSERT(table != nullptr);
+ DBUG_ASSERT(buf != nullptr);
+
+ m_table = table;
+ m_value_slice_reader = value_slice_reader;
+ auto fields = rdb_converter->get_decode_fields();
+ m_field_iter = fields->begin();
+ m_field_end = fields->end();
+ m_null_bytes = rdb_converter->get_null_bytes();
+ m_offset = 0;
+}
+
+// Iterate each requested field and decode one by one
+template <typename value_field_decoder>
+int Rdb_value_field_iterator<value_field_decoder>::next() {
+ int err = HA_EXIT_SUCCESS;
+ while (m_field_iter != m_field_end) {
+ m_field_dec = m_field_iter->m_field_enc;
+ bool decode = m_field_iter->m_decode;
+ bool maybe_null = m_field_dec->maybe_null();
+ // This is_null value is bind to how stroage format store its value
+ m_is_null = maybe_null && ((m_null_bytes[m_field_dec->m_null_offset] &
+ m_field_dec->m_null_mask) != 0);
+
+ // Skip the bytes we need to skip
+ int skip = m_field_iter->m_skip;
+ if (skip && !m_value_slice_reader->read(skip)) {
+ return HA_ERR_ROCKSDB_CORRUPT_DATA;
+ }
+
+ m_field = m_table->field[m_field_dec->m_field_index];
+ // Decode each field
+ err = value_field_decoder::decode(m_buf, &m_offset, m_table, m_field,
+ m_field_dec, m_value_slice_reader, decode,
+ m_is_null);
+ if (err != HA_EXIT_SUCCESS) {
+ return err;
+ }
+ m_field_iter++;
+ // Only break for the field that are actually decoding rather than skipping
+ if (decode) {
+ break;
+ }
+ }
+ return err;
+}
+
+template <typename value_field_decoder>
+bool Rdb_value_field_iterator<value_field_decoder>::end_of_fields() const {
+ return m_field_iter == m_field_end;
+}
+
+template <typename value_field_decoder>
+Field *Rdb_value_field_iterator<value_field_decoder>::get_field() const {
+ DBUG_ASSERT(m_field != nullptr);
+ return m_field;
+}
+
+template <typename value_field_decoder>
+void *Rdb_value_field_iterator<value_field_decoder>::get_dst() const {
+ DBUG_ASSERT(m_buf != nullptr);
+ return m_buf + m_offset;
+}
+
+template <typename value_field_decoder>
+int Rdb_value_field_iterator<value_field_decoder>::get_field_index() const {
+ DBUG_ASSERT(m_field_dec != nullptr);
+ return m_field_dec->m_field_index;
+}
+
+template <typename value_field_decoder>
+enum_field_types Rdb_value_field_iterator<value_field_decoder>::get_field_type()
+ const {
+ DBUG_ASSERT(m_field_dec != nullptr);
+ return m_field_dec->m_field_type;
+}
+
+template <typename value_field_decoder>
+bool Rdb_value_field_iterator<value_field_decoder>::is_null() const {
+ DBUG_ASSERT(m_field != nullptr);
+ return m_is_null;
+}
+
+/*
+ Initialize Rdb_converter with table data
+ @param thd IN Thread context
+ @param tbl_def IN MyRocks table definition
+ @param table IN Current open table
+*/
+Rdb_converter::Rdb_converter(const THD *thd, const Rdb_tbl_def *tbl_def,
+ TABLE *table)
+ : m_thd(thd), m_tbl_def(tbl_def), m_table(table) {
+ DBUG_ASSERT(thd != nullptr);
+ DBUG_ASSERT(tbl_def != nullptr);
+ DBUG_ASSERT(table != nullptr);
+
+ m_key_requested = false;
+ m_verify_row_debug_checksums = false;
+ m_maybe_unpack_info = false;
+ m_row_checksums_checked = 0;
+ m_null_bytes = nullptr;
+ setup_field_encoders();
+}
+
+Rdb_converter::~Rdb_converter() {
+ my_free(m_encoder_arr);
+ m_encoder_arr = nullptr;
+ // These are needed to suppress valgrind errors in rocksdb.partition
+ m_storage_record.free();
+}
+
+/*
+ Decide storage type for each encoder
+*/
+void Rdb_converter::get_storage_type(Rdb_field_encoder *const encoder,
+ const uint kp) {
+ auto pk_descr =
+ m_tbl_def->m_key_descr_arr[ha_rocksdb::pk_index(m_table, m_tbl_def)];
+ // STORE_SOME uses unpack_info.
+ if (pk_descr->has_unpack_info(kp)) {
+ DBUG_ASSERT(pk_descr->can_unpack(kp));
+ encoder->m_storage_type = Rdb_field_encoder::STORE_SOME;
+ m_maybe_unpack_info = true;
+ } else if (pk_descr->can_unpack(kp)) {
+ encoder->m_storage_type = Rdb_field_encoder::STORE_NONE;
+ }
+}
+
+/*
+ @brief
+ Setup which fields will be unpacked when reading rows
+
+ @detail
+ Three special cases when we still unpack all fields:
+ - When client requires decode_all_fields, such as this table is being
+ updated (m_lock_rows==RDB_LOCK_WRITE).
+ - When @@rocksdb_verify_row_debug_checksums is ON (In this mode, we need to
+ read all fields to find whether there is a row checksum at the end. We could
+ skip the fields instead of decoding them, but currently we do decoding.)
+ - On index merge as bitmap is cleared during that operation
+
+ @seealso
+ Rdb_converter::setup_field_encoders()
+ Rdb_converter::convert_record_from_storage_format()
+*/
+void Rdb_converter::setup_field_decoders(const MY_BITMAP *field_map,
+ bool decode_all_fields) {
+ m_key_requested = false;
+ m_decoders_vect.clear();
+ int last_useful = 0;
+ int skip_size = 0;
+
+ for (uint i = 0; i < m_table->s->fields; i++) {
+ // bitmap is cleared on index merge, but it still needs to decode columns
+ bool field_requested =
+ decode_all_fields || m_verify_row_debug_checksums ||
+ bitmap_is_clear_all(field_map) ||
+ bitmap_is_set(field_map, m_table->field[i]->field_index);
+
+ // We only need the decoder if the whole record is stored.
+ if (m_encoder_arr[i].m_storage_type != Rdb_field_encoder::STORE_ALL) {
+ // the field potentially needs unpacking
+ if (field_requested) {
+ // the field is in the read set
+ m_key_requested = true;
+ }
+ continue;
+ }
+
+ if (field_requested) {
+ // We will need to decode this field
+ m_decoders_vect.push_back({&m_encoder_arr[i], true, skip_size});
+ last_useful = m_decoders_vect.size();
+ skip_size = 0;
+ } else {
+ if (m_encoder_arr[i].uses_variable_len_encoding() ||
+ m_encoder_arr[i].maybe_null()) {
+ // For variable-length field, we need to read the data and skip it
+ m_decoders_vect.push_back({&m_encoder_arr[i], false, skip_size});
+ skip_size = 0;
+ } else {
+ // Fixed-width field can be skipped without looking at it.
+ // Add appropriate skip_size to the next field.
+ skip_size += m_encoder_arr[i].m_pack_length_in_rec;
+ }
+ }
+ }
+
+ // It could be that the last few elements are varchars that just do
+ // skipping. Remove them.
+ m_decoders_vect.erase(m_decoders_vect.begin() + last_useful,
+ m_decoders_vect.end());
+}
+
+void Rdb_converter::setup_field_encoders() {
+ uint null_bytes_length = 0;
+ uchar cur_null_mask = 0x1;
+
+ m_encoder_arr = static_cast<Rdb_field_encoder *>(
+ my_malloc(m_table->s->fields * sizeof(Rdb_field_encoder), MYF(0)));
+ if (m_encoder_arr == nullptr) {
+ return;
+ }
+
+ for (uint i = 0; i < m_table->s->fields; i++) {
+ Field *const field = m_table->field[i];
+ m_encoder_arr[i].m_storage_type = Rdb_field_encoder::STORE_ALL;
+
+ /*
+ Check if this field is
+ - a part of primary key, and
+ - it can be decoded back from its key image.
+ If both hold, we don't need to store this field in the value part of
+ RocksDB's key-value pair.
+
+ If hidden pk exists, we skip this check since the field will never be
+ part of the hidden pk.
+ */
+ if (!Rdb_key_def::table_has_hidden_pk(m_table)) {
+ KEY *const pk_info = &m_table->key_info[m_table->s->primary_key];
+ for (uint kp = 0; kp < pk_info->user_defined_key_parts; kp++) {
+ // key_part->fieldnr is counted from 1
+ if (field->field_index + 1 == pk_info->key_part[kp].fieldnr) {
+ get_storage_type(&m_encoder_arr[i], kp);
+ break;
+ }
+ }
+ }
+
+ m_encoder_arr[i].m_field_type = field->real_type();
+ m_encoder_arr[i].m_field_index = i;
+ m_encoder_arr[i].m_pack_length_in_rec = field->pack_length_in_rec();
+
+ if (field->real_maybe_null()) {
+ m_encoder_arr[i].m_null_mask = cur_null_mask;
+ m_encoder_arr[i].m_null_offset = null_bytes_length;
+ if (cur_null_mask == 0x80) {
+ cur_null_mask = 0x1;
+ null_bytes_length++;
+ } else {
+ cur_null_mask = cur_null_mask << 1;
+ }
+ } else {
+ m_encoder_arr[i].m_null_mask = 0;
+ }
+ }
+
+ // Count the last, unfinished NULL-bits byte
+ if (cur_null_mask != 0x1) {
+ null_bytes_length++;
+ }
+
+ m_null_bytes_length_in_record = null_bytes_length;
+}
+
+/*
+ EntryPoint for Decode:
+ Decode key slice(if requested) and value slice using built-in field
+ decoders
+ @param key_def IN key definition to decode
+ @param dst OUT Mysql buffer to fill decoded content
+ @param key_slice IN RocksDB key slice to decode
+ @param value_slice IN RocksDB value slice to decode
+ @return
+ 0 OK
+ other HA_ERR error code (can be SE-specific)
+*/
+int Rdb_converter::decode(const std::shared_ptr<Rdb_key_def> &key_def,
+ uchar *dst, // address to fill data
+ const rocksdb::Slice *key_slice,
+ const rocksdb::Slice *value_slice) {
+ // Currently only support decode primary key, Will add decode secondary later
+ DBUG_ASSERT(key_def->m_index_type == Rdb_key_def::INDEX_TYPE_PRIMARY ||
+ key_def->m_index_type == Rdb_key_def::INDEX_TYPE_HIDDEN_PRIMARY);
+
+ const rocksdb::Slice *updated_key_slice = key_slice;
+#ifndef DBUG_OFF
+ String last_rowkey;
+ last_rowkey.copy(key_slice->data(), key_slice->size(), &my_charset_bin);
+ DBUG_EXECUTE_IF("myrocks_simulate_bad_pk_read1",
+ { dbug_modify_key_varchar8(&last_rowkey); });
+ rocksdb::Slice rowkey_slice(last_rowkey.ptr(), last_rowkey.length());
+ updated_key_slice = &rowkey_slice;
+#endif
+ return convert_record_from_storage_format(key_def, updated_key_slice,
+ value_slice, dst);
+}
+
+/*
+ Decode value slice header
+ @param reader IN value slice reader
+ @param pk_def IN key definition to decode
+ @param unpack_slice OUT unpack info slice
+ @return
+ 0 OK
+ other HA_ERR error code (can be SE-specific)
+*/
+int Rdb_converter::decode_value_header(
+ Rdb_string_reader *reader, const std::shared_ptr<Rdb_key_def> &pk_def,
+ rocksdb::Slice *unpack_slice) {
+ /* If it's a TTL record, skip the 8 byte TTL value */
+ if (pk_def->has_ttl()) {
+ const char *ttl_bytes;
+ if ((ttl_bytes = reader->read(ROCKSDB_SIZEOF_TTL_RECORD))) {
+ memcpy(m_ttl_bytes, ttl_bytes, ROCKSDB_SIZEOF_TTL_RECORD);
+ } else {
+ return HA_ERR_ROCKSDB_CORRUPT_DATA;
+ }
+ }
+
+ /* Other fields are decoded from the value */
+ if (m_null_bytes_length_in_record &&
+ !(m_null_bytes = reader->read(m_null_bytes_length_in_record))) {
+ return HA_ERR_ROCKSDB_CORRUPT_DATA;
+ }
+
+ if (m_maybe_unpack_info) {
+ const char *unpack_info = reader->get_current_ptr();
+ if (!unpack_info || !Rdb_key_def::is_unpack_data_tag(unpack_info[0]) ||
+ !reader->read(Rdb_key_def::get_unpack_header_size(unpack_info[0]))) {
+ return HA_ERR_ROCKSDB_CORRUPT_DATA;
+ }
+
+ uint16 unpack_info_len =
+ rdb_netbuf_to_uint16(reinterpret_cast<const uchar *>(unpack_info + 1));
+ *unpack_slice = rocksdb::Slice(unpack_info, unpack_info_len);
+
+ reader->read(unpack_info_len -
+ Rdb_key_def::get_unpack_header_size(unpack_info[0]));
+ }
+
+ return HA_EXIT_SUCCESS;
+}
+
+/*
+ Convert RocksDb key slice and value slice to Mysql format
+ @param key_def IN key definition to decode
+ @param key_slice IN RocksDB key slice
+ @param value_slice IN RocksDB value slice
+ @param dst OUT MySql format address
+ @return
+ 0 OK
+ other HA_ERR error code (can be SE-specific)
+*/
+int Rdb_converter::convert_record_from_storage_format(
+ const std::shared_ptr<Rdb_key_def> &pk_def,
+ const rocksdb::Slice *const key_slice,
+ const rocksdb::Slice *const value_slice, uchar *const dst) {
+ int err = HA_EXIT_SUCCESS;
+
+ Rdb_string_reader value_slice_reader(value_slice);
+ rocksdb::Slice unpack_slice;
+ err = decode_value_header(&value_slice_reader, pk_def, &unpack_slice);
+ if (err != HA_EXIT_SUCCESS) {
+ return err;
+ }
+
+ /*
+ Decode PK fields from the key
+ */
+ if (m_key_requested) {
+ err = pk_def->unpack_record(m_table, dst, key_slice,
+ !unpack_slice.empty() ? &unpack_slice : nullptr,
+ false /* verify_checksum */);
+ }
+ if (err != HA_EXIT_SUCCESS) {
+ return err;
+ }
+
+ Rdb_value_field_iterator<Rdb_convert_to_record_value_decoder>
+ value_field_iterator(m_table, &value_slice_reader, this, dst);
+
+ // Decode value slices
+ while (!value_field_iterator.end_of_fields()) {
+ err = value_field_iterator.next();
+
+ if (err != HA_EXIT_SUCCESS) {
+ return err;
+ }
+ }
+
+ if (m_verify_row_debug_checksums) {
+ return verify_row_debug_checksum(pk_def, &value_slice_reader, key_slice,
+ value_slice);
+ }
+ return HA_EXIT_SUCCESS;
+}
+
+/*
+ Verify checksum for row
+ @param pk_def IN key def
+ @param reader IN RocksDB value slice reader
+ @param key IN RocksDB key slice
+ @param value IN RocksDB value slice
+ @return
+ 0 OK
+ other HA_ERR error code (can be SE-specific)
+*/
+int Rdb_converter::verify_row_debug_checksum(
+ const std::shared_ptr<Rdb_key_def> &pk_def, Rdb_string_reader *reader,
+ const rocksdb::Slice *key, const rocksdb::Slice *value) {
+ if (reader->remaining_bytes() == RDB_CHECKSUM_CHUNK_SIZE &&
+ reader->read(1)[0] == RDB_CHECKSUM_DATA_TAG) {
+ uint32_t stored_key_chksum =
+ rdb_netbuf_to_uint32((const uchar *)reader->read(RDB_CHECKSUM_SIZE));
+ uint32_t stored_val_chksum =
+ rdb_netbuf_to_uint32((const uchar *)reader->read(RDB_CHECKSUM_SIZE));
+
+ const uint32_t computed_key_chksum =
+ my_core::crc32(0, rdb_slice_to_uchar_ptr(key), key->size());
+ const uint32_t computed_val_chksum =
+ my_core::crc32(0, rdb_slice_to_uchar_ptr(value),
+ value->size() - RDB_CHECKSUM_CHUNK_SIZE);
+
+ DBUG_EXECUTE_IF("myrocks_simulate_bad_pk_checksum1", stored_key_chksum++;);
+
+ if (stored_key_chksum != computed_key_chksum) {
+ pk_def->report_checksum_mismatch(true, key->data(), key->size());
+ return HA_ERR_ROCKSDB_CHECKSUM_MISMATCH;
+ }
+
+ DBUG_EXECUTE_IF("myrocks_simulate_bad_pk_checksum2", stored_val_chksum++;);
+ if (stored_val_chksum != computed_val_chksum) {
+ pk_def->report_checksum_mismatch(false, value->data(), value->size());
+ return HA_ERR_ROCKSDB_CHECKSUM_MISMATCH;
+ }
+
+ m_row_checksums_checked++;
+ }
+ if (reader->remaining_bytes()) {
+ return HA_ERR_ROCKSDB_CORRUPT_DATA;
+ }
+ return HA_EXIT_SUCCESS;
+}
+
+/**
+ Convert record from table->record[0] form into a form that can be written
+ into rocksdb.
+
+ @param pk_def IN Current key def
+ @pk_unpack_info IN Unpack info generated during key pack
+ @is_update_row IN Whether it is update row
+ @store_row_debug_checksums IN Whether to store checksums
+ @param ttl_bytes IN/OUT Old ttl value from previous record and
+ ttl value during current encode
+ @is_ttl_bytes_updated OUT Whether ttl bytes is updated
+ @param value_slice OUT Data slice with record data.
+*/
+int Rdb_converter::encode_value_slice(
+ const std::shared_ptr<Rdb_key_def> &pk_def,
+ const rocksdb::Slice &pk_packed_slice, Rdb_string_writer *pk_unpack_info,
+ bool is_update_row, bool store_row_debug_checksums, char *ttl_bytes,
+ bool *is_ttl_bytes_updated, rocksdb::Slice *const value_slice) {
+ DBUG_ASSERT(pk_def != nullptr);
+ // Currently only primary key will store value slice
+ DBUG_ASSERT(pk_def->m_index_type == Rdb_key_def::INDEX_TYPE_PRIMARY ||
+ pk_def->m_index_type == Rdb_key_def::INDEX_TYPE_HIDDEN_PRIMARY);
+ DBUG_ASSERT_IMP(m_maybe_unpack_info, pk_unpack_info);
+
+ bool has_ttl = pk_def->has_ttl();
+ bool has_ttl_column = !pk_def->m_ttl_column.empty();
+
+ m_storage_record.length(0);
+
+ if (has_ttl) {
+ /* If it's a TTL record, reserve space for 8 byte TTL value in front. */
+ m_storage_record.fill(
+ ROCKSDB_SIZEOF_TTL_RECORD + m_null_bytes_length_in_record, 0);
+ // NOTE: is_ttl_bytes_updated is only used for update case
+ // During update, skip update sk key/values slice iff none of sk fields
+ // have changed and ttl bytes isn't changed. see
+ // ha_rocksdb::update_write_sk() for more info
+ *is_ttl_bytes_updated = false;
+ char *const data = const_cast<char *>(m_storage_record.ptr());
+ if (has_ttl_column) {
+ DBUG_ASSERT(pk_def->get_ttl_field_index() != UINT_MAX);
+ Field *const field = m_table->field[pk_def->get_ttl_field_index()];
+ DBUG_ASSERT(field->pack_length_in_rec() == ROCKSDB_SIZEOF_TTL_RECORD);
+ DBUG_ASSERT(field->real_type() == MYSQL_TYPE_LONGLONG);
+
+ uint64 ts = uint8korr(field->ptr);
+#ifndef DBUG_OFF
+ ts += rdb_dbug_set_ttl_rec_ts();
+#endif
+ rdb_netbuf_store_uint64(reinterpret_cast<uchar *>(data), ts);
+ if (is_update_row) {
+ *is_ttl_bytes_updated =
+ memcmp(ttl_bytes, data, ROCKSDB_SIZEOF_TTL_RECORD);
+ }
+ // Also store in m_ttl_bytes to propagate to update_write_sk
+ memcpy(ttl_bytes, data, ROCKSDB_SIZEOF_TTL_RECORD);
+ } else {
+ /*
+ For implicitly generated TTL records we need to copy over the old
+ TTL value from the old record in the event of an update. It was stored
+ in m_ttl_bytes.
+
+ Otherwise, generate a timestamp using the current time.
+ */
+ if (is_update_row) {
+ memcpy(data, ttl_bytes, sizeof(uint64));
+ } else {
+ uint64 ts = static_cast<uint64>(std::time(nullptr));
+#ifndef DBUG_OFF
+ ts += rdb_dbug_set_ttl_rec_ts();
+#endif
+ rdb_netbuf_store_uint64(reinterpret_cast<uchar *>(data), ts);
+ // Also store in m_ttl_bytes to propagate to update_write_sk
+ memcpy(ttl_bytes, data, ROCKSDB_SIZEOF_TTL_RECORD);
+ }
+ }
+ } else {
+ /* All NULL bits are initially 0 */
+ m_storage_record.fill(m_null_bytes_length_in_record, 0);
+ }
+
+ // If a primary key may have non-empty unpack_info for certain values,
+ // (m_maybe_unpack_info=TRUE), we write the unpack_info block. The block
+ // itself was prepared in Rdb_key_def::pack_record.
+ if (m_maybe_unpack_info) {
+ m_storage_record.append(reinterpret_cast<char *>(pk_unpack_info->ptr()),
+ pk_unpack_info->get_current_pos());
+ }
+ for (uint i = 0; i < m_table->s->fields; i++) {
+ Rdb_field_encoder &encoder = m_encoder_arr[i];
+ /* Don't pack decodable PK key parts */
+ if (encoder.m_storage_type != Rdb_field_encoder::STORE_ALL) {
+ continue;
+ }
+
+ Field *const field = m_table->field[i];
+ if (encoder.maybe_null()) {
+ char *data = const_cast<char *>(m_storage_record.ptr());
+ if (has_ttl) {
+ data += ROCKSDB_SIZEOF_TTL_RECORD;
+ }
+
+ if (field->is_null()) {
+ data[encoder.m_null_offset] |= encoder.m_null_mask;
+ /* Don't write anything for NULL values */
+ continue;
+ }
+ }
+
+ if (encoder.m_field_type == MYSQL_TYPE_BLOB) {
+ my_core::Field_blob *blob =
+ reinterpret_cast<my_core::Field_blob *>(field);
+ /* Get the number of bytes needed to store length*/
+ const uint length_bytes = blob->pack_length() - portable_sizeof_char_ptr;
+
+ /* Store the length of the value */
+ m_storage_record.append(reinterpret_cast<char *>(blob->ptr),
+ length_bytes);
+
+ /* Store the blob value itself */
+ char *data_ptr;
+ memcpy(&data_ptr, blob->ptr + length_bytes, sizeof(uchar **));
+ m_storage_record.append(data_ptr, blob->get_length());
+ } else if (encoder.m_field_type == MYSQL_TYPE_VARCHAR) {
+ Field_varstring *const field_var =
+ reinterpret_cast<Field_varstring *>(field);
+ uint data_len;
+ /* field_var->length_bytes is 1 or 2 */
+ if (field_var->length_bytes == 1) {
+ data_len = field_var->ptr[0];
+ } else {
+ DBUG_ASSERT(field_var->length_bytes == 2);
+ data_len = uint2korr(field_var->ptr);
+ }
+ m_storage_record.append(reinterpret_cast<char *>(field_var->ptr),
+ field_var->length_bytes + data_len);
+ } else {
+ /* Copy the field data */
+ const uint len = field->pack_length_in_rec();
+ m_storage_record.append(reinterpret_cast<char *>(field->ptr), len);
+ }
+ }
+
+ if (store_row_debug_checksums) {
+ const uint32_t key_crc32 = my_core::crc32(
+ 0, rdb_slice_to_uchar_ptr(&pk_packed_slice), pk_packed_slice.size());
+ const uint32_t val_crc32 =
+ my_core::crc32(0, rdb_mysql_str_to_uchar_str(&m_storage_record),
+ m_storage_record.length());
+ uchar key_crc_buf[RDB_CHECKSUM_SIZE];
+ uchar val_crc_buf[RDB_CHECKSUM_SIZE];
+ rdb_netbuf_store_uint32(key_crc_buf, key_crc32);
+ rdb_netbuf_store_uint32(val_crc_buf, val_crc32);
+ m_storage_record.append((const char *)&RDB_CHECKSUM_DATA_TAG, 1);
+ m_storage_record.append((const char *)key_crc_buf, RDB_CHECKSUM_SIZE);
+ m_storage_record.append((const char *)val_crc_buf, RDB_CHECKSUM_SIZE);
+ }
+
+ *value_slice =
+ rocksdb::Slice(m_storage_record.ptr(), m_storage_record.length());
+
+ return HA_EXIT_SUCCESS;
+}
+} // namespace myrocks
diff --git a/storage/rocksdb/rdb_converter.h b/storage/rocksdb/rdb_converter.h
new file mode 100644
index 00000000000..a4eae341f16
--- /dev/null
+++ b/storage/rocksdb/rdb_converter.h
@@ -0,0 +1,247 @@
+/*
+ Copyright (c) 2018, Facebook, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+#pragma once
+
+// C++ standard header files
+#include <string>
+#include <vector>
+
+// MySQL header files
+#include "./handler.h" // handler
+#include "./my_global.h" // ulonglong
+#include "./sql_string.h"
+#include "./ut0counter.h"
+
+// MyRocks header files
+#include "./ha_rocksdb.h"
+#include "./rdb_datadic.h"
+
+namespace myrocks {
+class Rdb_field_encoder;
+
+/**
+ Describes instructions on how to decode the field for value slice
+*/
+struct READ_FIELD {
+ // Points to Rdb_field_encoder describing the field
+ Rdb_field_encoder *m_field_enc;
+ // if true, decode the field, otherwise skip it
+ bool m_decode;
+ // Skip this many bytes before reading (or skipping) this field
+ int m_skip;
+};
+
+/**
+ Class to convert rocksdb value slice from storage format to mysql record
+ format.
+*/
+class Rdb_convert_to_record_value_decoder {
+ public:
+ Rdb_convert_to_record_value_decoder() = delete;
+ Rdb_convert_to_record_value_decoder(
+ const Rdb_convert_to_record_value_decoder &decoder) = delete;
+ Rdb_convert_to_record_value_decoder &operator=(
+ const Rdb_convert_to_record_value_decoder &decoder) = delete;
+
+ static int decode(uchar *const buf, uint *offset, TABLE *table,
+ my_core::Field *field, Rdb_field_encoder *field_dec,
+ Rdb_string_reader *reader, bool decode, bool is_null);
+
+ private:
+ static int decode_blob(TABLE *table, Field *field, Rdb_string_reader *reader,
+ bool decode);
+ static int decode_fixed_length_field(Field *const field,
+ Rdb_field_encoder *field_dec,
+ Rdb_string_reader *const reader,
+ bool decode);
+
+ static int decode_varchar(Field *const field, Rdb_string_reader *const reader,
+ bool decode);
+};
+
+/**
+ Class to iterator fields in RocksDB value slice
+ A template class instantiation represent a way to decode the data.
+ The reason to use template class instead of normal class is to elimate
+ virtual method call.
+*/
+template <typename value_field_decoder>
+class Rdb_value_field_iterator {
+ private:
+ bool m_is_null;
+ std::vector<READ_FIELD>::const_iterator m_field_iter;
+ std::vector<READ_FIELD>::const_iterator m_field_end;
+ Rdb_string_reader *m_value_slice_reader;
+ // null value map
+ const char *m_null_bytes;
+ // The current open table
+ TABLE *m_table;
+ // The current field
+ Field *m_field;
+ Rdb_field_encoder *m_field_dec;
+ uchar *const m_buf;
+ uint m_offset;
+
+ public:
+ Rdb_value_field_iterator(TABLE *table, Rdb_string_reader *value_slice_reader,
+ const Rdb_converter *rdb_converter,
+ uchar *const buf);
+ Rdb_value_field_iterator(const Rdb_value_field_iterator &field_iterator) =
+ delete;
+ Rdb_value_field_iterator &operator=(
+ const Rdb_value_field_iterator &field_iterator) = delete;
+
+ /*
+ Move and decode next field
+ Run next() before accessing data
+ */
+ int next();
+ // Whether current field is the end of fields
+ bool end_of_fields() const;
+ void *get_dst() const;
+ // Whether the value of current field is null
+ bool is_null() const;
+ // get current field index
+ int get_field_index() const;
+ // get current field type
+ enum_field_types get_field_type() const;
+ // get current field
+ Field *get_field() const;
+};
+
+/**
+ Class to convert Mysql formats to rocksdb storage format, and vice versa.
+*/
+class Rdb_converter {
+ public:
+ /*
+ Initialize converter with table data
+ */
+ Rdb_converter(const THD *thd, const Rdb_tbl_def *tbl_def, TABLE *table);
+ Rdb_converter(const Rdb_converter &decoder) = delete;
+ Rdb_converter &operator=(const Rdb_converter &decoder) = delete;
+ ~Rdb_converter();
+
+ void setup_field_decoders(const MY_BITMAP *field_map,
+ bool decode_all_fields = false);
+
+ int decode(const std::shared_ptr<Rdb_key_def> &key_def, uchar *dst,
+ const rocksdb::Slice *key_slice,
+ const rocksdb::Slice *value_slice);
+
+ int encode_value_slice(const std::shared_ptr<Rdb_key_def> &pk_def,
+ const rocksdb::Slice &pk_packed_slice,
+ Rdb_string_writer *pk_unpack_info, bool is_update_row,
+ bool store_row_debug_checksums, char *ttl_bytes,
+ bool *is_ttl_bytes_updated,
+ rocksdb::Slice *const value_slice);
+
+ my_core::ha_rows get_row_checksums_checked() const {
+ return m_row_checksums_checked;
+ }
+ bool get_verify_row_debug_checksums() const {
+ return m_verify_row_debug_checksums;
+ }
+ void set_verify_row_debug_checksums(bool verify_row_debug_checksums) {
+ m_verify_row_debug_checksums = verify_row_debug_checksums;
+ }
+
+ const Rdb_field_encoder *get_encoder_arr() const { return m_encoder_arr; }
+ int get_null_bytes_in_record() { return m_null_bytes_length_in_record; }
+ const char *get_null_bytes() const { return m_null_bytes; }
+ void set_is_key_requested(bool key_requested) {
+ m_key_requested = key_requested;
+ }
+ bool get_maybe_unpack_info() const { return m_maybe_unpack_info; }
+
+ char *get_ttl_bytes_buffer() { return m_ttl_bytes; }
+
+ const std::vector<READ_FIELD> *get_decode_fields() const {
+ return &m_decoders_vect;
+ }
+
+ private:
+ int decode_value_header(Rdb_string_reader *reader,
+ const std::shared_ptr<Rdb_key_def> &pk_def,
+ rocksdb::Slice *unpack_slice);
+
+ void setup_field_encoders();
+
+ void get_storage_type(Rdb_field_encoder *const encoder, const uint kp);
+
+ int convert_record_from_storage_format(
+ const std::shared_ptr<Rdb_key_def> &pk_def,
+ const rocksdb::Slice *const key, const rocksdb::Slice *const value,
+ uchar *const buf);
+
+ int verify_row_debug_checksum(const std::shared_ptr<Rdb_key_def> &pk_def,
+ Rdb_string_reader *reader,
+ const rocksdb::Slice *key,
+ const rocksdb::Slice *value);
+
+ private:
+ /*
+ This tells if any field which is part of the key needs to be unpacked and
+ decoded.
+ */
+ bool m_key_requested;
+ /*
+ Controls whether verifying checksums during reading, This is updated from
+ the session variable at the start of each query.
+ */
+ bool m_verify_row_debug_checksums;
+ // Thread handle
+ const THD *m_thd;
+ /* MyRocks table definition*/
+ const Rdb_tbl_def *m_tbl_def;
+ /* The current open table */
+ TABLE *m_table;
+ /*
+ Number of bytes in on-disk (storage) record format that are used for
+ storing SQL NULL flags.
+ */
+ int m_null_bytes_length_in_record;
+ /*
+ Pointer to null bytes value
+ */
+ const char *m_null_bytes;
+ /*
+ TRUE <=> Some fields in the PK may require unpack_info.
+ */
+ bool m_maybe_unpack_info;
+ /*
+ Pointer to the original TTL timestamp value (8 bytes) during UPDATE.
+ */
+ char m_ttl_bytes[ROCKSDB_SIZEOF_TTL_RECORD];
+ /*
+ Array of table->s->fields elements telling how to store fields in the
+ record.
+ */
+ Rdb_field_encoder *m_encoder_arr;
+ /*
+ Array of request fields telling how to decode data in RocksDB format
+ */
+ std::vector<READ_FIELD> m_decoders_vect;
+ /*
+ A counter of how many row checksums were checked for this table. Note that
+ this does not include checksums for secondary index entries.
+ */
+ my_core::ha_rows m_row_checksums_checked;
+ // buffer to hold data during encode_value_slice
+ String m_storage_record;
+};
+} // namespace myrocks
diff --git a/storage/rocksdb/rdb_datadic.cc b/storage/rocksdb/rdb_datadic.cc
new file mode 100644
index 00000000000..fee5d24eb66
--- /dev/null
+++ b/storage/rocksdb/rdb_datadic.cc
@@ -0,0 +1,5386 @@
+/*
+ Copyright (c) 2012,2013 Monty Program Ab
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111-1301 USA */
+
+#ifdef USE_PRAGMA_IMPLEMENTATION
+#pragma implementation // gcc: Class implementation
+#endif
+
+/* For use of 'PRIu64': */
+#define __STDC_FORMAT_MACROS
+
+#include <my_config.h>
+/* This C++ file's header file */
+#include "./rdb_datadic.h"
+
+#include <inttypes.h>
+/* C++ standard header files */
+#include <algorithm>
+#include <array>
+#include <limits>
+#include <map>
+#include <set>
+#include <string>
+#include <utility>
+#include <vector>
+
+/* MySQL header files */
+#include "./field.h"
+#include "./key.h"
+#include "./m_ctype.h"
+#include "./my_bit.h"
+#include "./my_bitmap.h"
+#include "./sql_table.h"
+
+/* MyRocks header files */
+#include "./ha_rocksdb.h"
+#include "./ha_rocksdb_proto.h"
+#include "./my_stacktrace.h"
+#include "./rdb_cf_manager.h"
+#include "./rdb_psi.h"
+#include "./rdb_utils.h"
+
+namespace myrocks {
+
+void get_mem_comparable_space(const CHARSET_INFO *cs,
+ const std::vector<uchar> **xfrm, size_t *xfrm_len,
+ size_t *mb_len);
+
+/*
+ MariaDB's replacement for FB/MySQL Field::check_field_name_match :
+*/
+inline bool field_check_field_name_match(Field *field, const char *name)
+{
+ return (0 == my_strcasecmp(system_charset_info,
+ field->field_name,
+ name));
+}
+
+
+/*
+ Decode current key field
+ @param fpi IN data structure contains field metadata
+ @param field IN current field
+ @param reader IN key slice reader
+ @param unp_reader IN unpack information reader
+ @return
+ HA_EXIT_SUCCESS OK
+ other HA_ERR error code
+*/
+int Rdb_convert_to_record_key_decoder::decode_field(
+ Rdb_field_packing *fpi, Field *field, Rdb_string_reader *reader,
+ const uchar *const default_value, Rdb_string_reader *unpack_reader) {
+ if (fpi->m_maybe_null) {
+ const char *nullp;
+ if (!(nullp = reader->read(1))) {
+ return HA_EXIT_FAILURE;
+ }
+
+ if (*nullp == 0) {
+ /* Set the NULL-bit of this field */
+ field->set_null();
+ /* Also set the field to its default value */
+ memcpy(field->ptr, default_value, field->pack_length());
+ return HA_EXIT_SUCCESS;
+ } else if (*nullp == 1) {
+ field->set_notnull();
+ } else {
+ return HA_EXIT_FAILURE;
+ }
+ }
+
+ return (fpi->m_unpack_func)(fpi, field, field->ptr, reader, unpack_reader);
+}
+
+/*
+ Decode current key field
+
+ @param buf OUT the buf starting address
+ @param offset OUT the bytes offset when data is written
+ @param fpi IN data structure contains field metadata
+ @param table IN current table
+ @param field IN current field
+ @param has_unpack_inf IN whether contains unpack inf
+ @param reader IN key slice reader
+ @param unp_reader IN unpack information reader
+ @return
+ HA_EXIT_SUCCESS OK
+ other HA_ERR error code
+*/
+int Rdb_convert_to_record_key_decoder::decode(
+ uchar *const buf, uint *offset, Rdb_field_packing *fpi, TABLE *table,
+ Field *field, bool has_unpack_info, Rdb_string_reader *reader,
+ Rdb_string_reader *unpack_reader) {
+ DBUG_ASSERT(buf != nullptr);
+ DBUG_ASSERT(offset != nullptr);
+
+ uint field_offset = field->ptr - table->record[0];
+ *offset = field_offset;
+ uint null_offset = field->null_offset();
+ bool maybe_null = field->real_maybe_null();
+
+ field->move_field(buf + field_offset,
+ maybe_null ? buf + null_offset : nullptr, field->null_bit);
+
+ // If we need unpack info, but there is none, tell the unpack function
+ // this by passing unp_reader as nullptr. If we never read unpack_info
+ // during unpacking anyway, then there won't an error.
+ bool maybe_missing_unpack = !has_unpack_info && fpi->uses_unpack_info();
+
+ int res =
+ decode_field(fpi, field, reader, table->s->default_values + field_offset,
+ maybe_missing_unpack ? nullptr : unpack_reader);
+
+ // Restore field->ptr and field->null_ptr
+ field->move_field(table->record[0] + field_offset,
+ maybe_null ? table->record[0] + null_offset : nullptr,
+ field->null_bit);
+ if (res != UNPACK_SUCCESS) {
+ return HA_ERR_ROCKSDB_CORRUPT_DATA;
+ }
+ return HA_EXIT_SUCCESS;
+}
+
+/*
+ Skip current key field
+
+ @param fpi IN data structure contains field metadata
+ @param field IN current field
+ @param reader IN key slice reader
+ @param unp_reader IN unpack information reader
+ @return
+ HA_EXIT_SUCCESS OK
+ other HA_ERR error code
+*/
+int Rdb_convert_to_record_key_decoder::skip(const Rdb_field_packing *fpi,
+ const Field *field,
+ Rdb_string_reader *reader,
+ Rdb_string_reader *unp_reader) {
+ /* It is impossible to unpack the column. Skip it. */
+ if (fpi->m_maybe_null) {
+ const char *nullp;
+ if (!(nullp = reader->read(1))) {
+ return HA_ERR_ROCKSDB_CORRUPT_DATA;
+ }
+ if (*nullp == 0) {
+ /* This is a NULL value */
+ return HA_EXIT_SUCCESS;
+ }
+ /* If NULL marker is not '0', it can be only '1' */
+ if (*nullp != 1) {
+ return HA_ERR_ROCKSDB_CORRUPT_DATA;
+ }
+ }
+ if ((fpi->m_skip_func)(fpi, field, reader)) {
+ return HA_ERR_ROCKSDB_CORRUPT_DATA;
+ }
+ // If this is a space padded varchar, we need to skip the indicator
+ // bytes for trailing bytes. They're useless since we can't restore the
+ // field anyway.
+ //
+ // There is a special case for prefixed varchars where we do not
+ // generate unpack info, because we know prefixed varchars cannot be
+ // unpacked. In this case, it is not necessary to skip.
+ if (fpi->m_skip_func == &Rdb_key_def::skip_variable_space_pad &&
+ !fpi->m_unpack_info_stores_value) {
+ unp_reader->read(fpi->m_unpack_info_uses_two_bytes ? 2 : 1);
+ }
+ return HA_EXIT_SUCCESS;
+}
+
+Rdb_key_field_iterator::Rdb_key_field_iterator(
+ const Rdb_key_def *key_def, Rdb_field_packing *pack_info,
+ Rdb_string_reader *reader, Rdb_string_reader *unp_reader, TABLE *table,
+ bool has_unpack_info, const MY_BITMAP *covered_bitmap, uchar *const buf) {
+ m_key_def = key_def;
+ m_pack_info = pack_info;
+ m_iter_index = 0;
+ m_iter_end = key_def->get_key_parts();
+ m_reader = reader;
+ m_unp_reader = unp_reader;
+ m_table = table;
+ m_has_unpack_info = has_unpack_info;
+ m_covered_bitmap = covered_bitmap;
+ m_buf = buf;
+ m_secondary_key =
+ (key_def->m_index_type == Rdb_key_def::INDEX_TYPE_SECONDARY);
+ m_hidden_pk_exists = Rdb_key_def::table_has_hidden_pk(table);
+ m_is_hidden_pk =
+ (key_def->m_index_type == Rdb_key_def::INDEX_TYPE_HIDDEN_PRIMARY);
+ m_curr_bitmap_pos = 0;
+ m_offset = 0;
+}
+
+void *Rdb_key_field_iterator::get_dst() const { return m_buf + m_offset; }
+
+int Rdb_key_field_iterator::get_field_index() const {
+ DBUG_ASSERT(m_field != nullptr);
+ return m_field->field_index;
+}
+
+bool Rdb_key_field_iterator::get_is_null() const { return m_is_null; }
+Field *Rdb_key_field_iterator::get_field() const {
+ DBUG_ASSERT(m_field != nullptr);
+ return m_field;
+}
+
+bool Rdb_key_field_iterator::has_next() { return m_iter_index < m_iter_end; }
+
+/**
+ Iterate each field in the key and decode/skip one by one
+*/
+int Rdb_key_field_iterator::next() {
+ int status = HA_EXIT_SUCCESS;
+ while (m_iter_index < m_iter_end) {
+ int curr_index = m_iter_index++;
+
+ m_fpi = &m_pack_info[curr_index];
+ /*
+ Hidden pk field is packed at the end of the secondary keys, but the SQL
+ layer does not know about it. Skip retrieving field if hidden pk.
+ */
+ if ((m_secondary_key && m_hidden_pk_exists &&
+ curr_index + 1 == m_iter_end) ||
+ m_is_hidden_pk) {
+ DBUG_ASSERT(m_fpi->m_unpack_func);
+ if ((m_fpi->m_skip_func)(m_fpi, nullptr, m_reader)) {
+ return HA_ERR_ROCKSDB_CORRUPT_DATA;
+ }
+ return HA_EXIT_SUCCESS;
+ }
+
+ m_field = m_fpi->get_field_in_table(m_table);
+
+ bool covered_column = true;
+ if (m_covered_bitmap != nullptr &&
+ m_field->real_type() == MYSQL_TYPE_VARCHAR && !m_fpi->m_covered) {
+ covered_column = m_curr_bitmap_pos < MAX_REF_PARTS &&
+ bitmap_is_set(m_covered_bitmap, m_curr_bitmap_pos++);
+ }
+
+ if (m_fpi->m_unpack_func && covered_column) {
+ /* It is possible to unpack this column. Do it. */
+ status = Rdb_convert_to_record_key_decoder::decode(
+ m_buf, &m_offset, m_fpi, m_table, m_field, m_has_unpack_info,
+ m_reader, m_unp_reader);
+ if (status) {
+ return status;
+ }
+ break;
+ } else {
+ status = Rdb_convert_to_record_key_decoder::skip(m_fpi, m_field, m_reader,
+ m_unp_reader);
+ if (status) {
+ return status;
+ }
+ }
+ }
+ return HA_EXIT_SUCCESS;
+}
+
+/*
+ Rdb_key_def class implementation
+*/
+Rdb_key_def::Rdb_key_def(uint indexnr_arg, uint keyno_arg,
+ rocksdb::ColumnFamilyHandle *cf_handle_arg,
+ uint16_t index_dict_version_arg, uchar index_type_arg,
+ uint16_t kv_format_version_arg, bool is_reverse_cf_arg,
+ bool is_per_partition_cf_arg, const char *_name,
+ Rdb_index_stats _stats, uint32 index_flags_bitmap,
+ uint32 ttl_rec_offset, uint64 ttl_duration)
+ : m_index_number(indexnr_arg),
+ m_cf_handle(cf_handle_arg),
+ m_index_dict_version(index_dict_version_arg),
+ m_index_type(index_type_arg),
+ m_kv_format_version(kv_format_version_arg),
+ m_is_reverse_cf(is_reverse_cf_arg),
+ m_is_per_partition_cf(is_per_partition_cf_arg),
+ m_name(_name),
+ m_stats(_stats),
+ m_index_flags_bitmap(index_flags_bitmap),
+ m_ttl_rec_offset(ttl_rec_offset),
+ m_ttl_duration(ttl_duration),
+ m_ttl_column(""),
+ m_pk_part_no(nullptr),
+ m_pack_info(nullptr),
+ m_keyno(keyno_arg),
+ m_key_parts(0),
+ m_ttl_pk_key_part_offset(UINT_MAX),
+ m_ttl_field_index(UINT_MAX),
+ m_prefix_extractor(nullptr),
+ m_maxlength(0) // means 'not intialized'
+{
+ mysql_mutex_init(0, &m_mutex, MY_MUTEX_INIT_FAST);
+ rdb_netbuf_store_index(m_index_number_storage_form, m_index_number);
+ m_total_index_flags_length =
+ calculate_index_flag_offset(m_index_flags_bitmap, MAX_FLAG);
+ DBUG_ASSERT_IMP(m_index_type == INDEX_TYPE_SECONDARY &&
+ m_kv_format_version <= SECONDARY_FORMAT_VERSION_UPDATE2,
+ m_total_index_flags_length == 0);
+ DBUG_ASSERT_IMP(m_index_type == INDEX_TYPE_PRIMARY &&
+ m_kv_format_version <= PRIMARY_FORMAT_VERSION_UPDATE2,
+ m_total_index_flags_length == 0);
+ DBUG_ASSERT(m_cf_handle != nullptr);
+}
+
+Rdb_key_def::Rdb_key_def(const Rdb_key_def &k)
+ : m_index_number(k.m_index_number),
+ m_cf_handle(k.m_cf_handle),
+ m_is_reverse_cf(k.m_is_reverse_cf),
+ m_is_per_partition_cf(k.m_is_per_partition_cf),
+ m_name(k.m_name),
+ m_stats(k.m_stats),
+ m_index_flags_bitmap(k.m_index_flags_bitmap),
+ m_ttl_rec_offset(k.m_ttl_rec_offset),
+ m_ttl_duration(k.m_ttl_duration),
+ m_ttl_column(k.m_ttl_column),
+ m_pk_part_no(k.m_pk_part_no),
+ m_pack_info(k.m_pack_info),
+ m_keyno(k.m_keyno),
+ m_key_parts(k.m_key_parts),
+ m_ttl_pk_key_part_offset(k.m_ttl_pk_key_part_offset),
+ m_ttl_field_index(UINT_MAX),
+ m_prefix_extractor(k.m_prefix_extractor),
+ m_maxlength(k.m_maxlength) {
+ mysql_mutex_init(0, &m_mutex, MY_MUTEX_INIT_FAST);
+ rdb_netbuf_store_index(m_index_number_storage_form, m_index_number);
+ m_total_index_flags_length =
+ calculate_index_flag_offset(m_index_flags_bitmap, MAX_FLAG);
+ DBUG_ASSERT_IMP(m_index_type == INDEX_TYPE_SECONDARY &&
+ m_kv_format_version <= SECONDARY_FORMAT_VERSION_UPDATE2,
+ m_total_index_flags_length == 0);
+ DBUG_ASSERT_IMP(m_index_type == INDEX_TYPE_PRIMARY &&
+ m_kv_format_version <= PRIMARY_FORMAT_VERSION_UPDATE2,
+ m_total_index_flags_length == 0);
+ if (k.m_pack_info) {
+ const size_t size = sizeof(Rdb_field_packing) * k.m_key_parts;
+ void *pack_info= my_malloc(size, MYF(0));
+ memcpy(pack_info, k.m_pack_info, size);
+ m_pack_info = reinterpret_cast<Rdb_field_packing *>(pack_info);
+ }
+
+ if (k.m_pk_part_no) {
+ const size_t size = sizeof(uint) * m_key_parts;
+ m_pk_part_no = reinterpret_cast<uint *>(my_malloc(size, MYF(0)));
+ memcpy(m_pk_part_no, k.m_pk_part_no, size);
+ }
+}
+
+Rdb_key_def::~Rdb_key_def() {
+ mysql_mutex_destroy(&m_mutex);
+
+ my_free(m_pk_part_no);
+ m_pk_part_no = nullptr;
+
+ my_free(m_pack_info);
+ m_pack_info = nullptr;
+}
+
+void Rdb_key_def::setup(const TABLE *const tbl,
+ const Rdb_tbl_def *const tbl_def) {
+ DBUG_ASSERT(tbl != nullptr);
+ DBUG_ASSERT(tbl_def != nullptr);
+
+ /*
+ Set max_length based on the table. This can be called concurrently from
+ multiple threads, so there is a mutex to protect this code.
+ */
+ const bool is_hidden_pk = (m_index_type == INDEX_TYPE_HIDDEN_PRIMARY);
+ const bool hidden_pk_exists = table_has_hidden_pk(tbl);
+ const bool secondary_key = (m_index_type == INDEX_TYPE_SECONDARY);
+ if (!m_maxlength) {
+ RDB_MUTEX_LOCK_CHECK(m_mutex);
+ if (m_maxlength != 0) {
+ RDB_MUTEX_UNLOCK_CHECK(m_mutex);
+ return;
+ }
+
+ KEY *key_info = nullptr;
+ KEY *pk_info = nullptr;
+ if (!is_hidden_pk) {
+ key_info = &tbl->key_info[m_keyno];
+ if (!hidden_pk_exists) pk_info = &tbl->key_info[tbl->s->primary_key];
+ m_name = std::string(key_info->name);
+ } else {
+ m_name = HIDDEN_PK_NAME;
+ }
+
+ if (secondary_key) {
+ m_pk_key_parts= hidden_pk_exists ? 1 : pk_info->ext_key_parts;
+ } else {
+ pk_info = nullptr;
+ m_pk_key_parts = 0;
+ }
+
+ // "unique" secondary keys support:
+ m_key_parts= is_hidden_pk ? 1 : key_info->ext_key_parts;
+
+ if (secondary_key) {
+ /*
+ In most cases, SQL layer puts PK columns as invisible suffix at the
+ end of secondary key. There are cases where this doesn't happen:
+ - unique secondary indexes.
+ - partitioned tables.
+
+ Internally, we always need PK columns as suffix (and InnoDB does,
+ too, if you were wondering).
+
+ The loop below will attempt to put all PK columns at the end of key
+ definition. Columns that are already included in the index (either
+ by the user or by "extended keys" feature) are not included for the
+ second time.
+ */
+ m_key_parts += m_pk_key_parts;
+ }
+
+ if (secondary_key) {
+ m_pk_part_no = reinterpret_cast<uint *>(
+ my_malloc(sizeof(uint) * m_key_parts, MYF(0)));
+ } else {
+ m_pk_part_no = nullptr;
+ }
+
+ const size_t size = sizeof(Rdb_field_packing) * m_key_parts;
+ m_pack_info =
+ reinterpret_cast<Rdb_field_packing *>(my_malloc(size, MYF(0)));
+
+ /*
+ Guaranteed not to error here as checks have been made already during
+ table creation.
+ */
+ Rdb_key_def::extract_ttl_col(tbl, tbl_def, &m_ttl_column,
+ &m_ttl_field_index, true);
+
+ size_t max_len = INDEX_NUMBER_SIZE;
+ int unpack_len = 0;
+ int max_part_len = 0;
+ bool simulating_extkey = false;
+ uint dst_i = 0;
+
+ uint keyno_to_set = m_keyno;
+ uint keypart_to_set = 0;
+
+ if (is_hidden_pk) {
+ Field *field = nullptr;
+ m_pack_info[dst_i].setup(this, field, keyno_to_set, 0, 0);
+ m_pack_info[dst_i].m_unpack_data_offset = unpack_len;
+ max_len += m_pack_info[dst_i].m_max_image_len;
+ max_part_len = std::max(max_part_len, m_pack_info[dst_i].m_max_image_len);
+ dst_i++;
+ } else {
+ KEY_PART_INFO *key_part = key_info->key_part;
+
+ /* this loop also loops over the 'extended key' tail */
+ for (uint src_i = 0; src_i < m_key_parts; src_i++, keypart_to_set++) {
+ Field *const field = key_part ? key_part->field : nullptr;
+
+ if (simulating_extkey && !hidden_pk_exists) {
+ DBUG_ASSERT(secondary_key);
+ /* Check if this field is already present in the key definition */
+ bool found = false;
+ for (uint j= 0; j < key_info->ext_key_parts; j++) {
+ if (field->field_index ==
+ key_info->key_part[j].field->field_index &&
+ key_part->length == key_info->key_part[j].length) {
+ found = true;
+ break;
+ }
+ }
+
+ if (found) {
+ key_part++;
+ continue;
+ }
+ }
+
+ if (field && field->real_maybe_null()) max_len += 1; // NULL-byte
+
+ m_pack_info[dst_i].setup(this, field, keyno_to_set, keypart_to_set,
+ key_part ? key_part->length : 0);
+ m_pack_info[dst_i].m_unpack_data_offset = unpack_len;
+
+ if (pk_info) {
+ m_pk_part_no[dst_i] = -1;
+ for (uint j = 0; j < m_pk_key_parts; j++) {
+ if (field->field_index == pk_info->key_part[j].field->field_index) {
+ m_pk_part_no[dst_i] = j;
+ break;
+ }
+ }
+ } else if (secondary_key && hidden_pk_exists) {
+ /*
+ The hidden pk can never be part of the sk. So it is always
+ appended to the end of the sk.
+ */
+ m_pk_part_no[dst_i] = -1;
+ if (simulating_extkey) m_pk_part_no[dst_i] = 0;
+ }
+
+ max_len += m_pack_info[dst_i].m_max_image_len;
+
+ max_part_len =
+ std::max(max_part_len, m_pack_info[dst_i].m_max_image_len);
+
+ /*
+ Check key part name here, if it matches the TTL column then we store
+ the offset of the TTL key part here.
+ */
+ if (!m_ttl_column.empty() &&
+ field_check_field_name_match(field, m_ttl_column.c_str())) {
+ DBUG_ASSERT(field->real_type() == MYSQL_TYPE_LONGLONG);
+ DBUG_ASSERT(field->key_type() == HA_KEYTYPE_ULONGLONG);
+ DBUG_ASSERT(!field->real_maybe_null());
+ m_ttl_pk_key_part_offset = dst_i;
+ }
+
+ key_part++;
+ /*
+ For "unique" secondary indexes, pretend they have
+ "index extensions".
+
+ MariaDB also has this property: if an index has a partially-covered
+ column like KEY(varchar_col(N)), then the SQL layer will think it is
+ not "extended" with PK columns. The code below handles this case,
+ also.
+ */
+ if (secondary_key && src_i+1 == key_info->ext_key_parts) {
+ simulating_extkey = true;
+ if (!hidden_pk_exists) {
+ keyno_to_set = tbl->s->primary_key;
+ key_part = pk_info->key_part;
+ keypart_to_set = (uint)-1;
+ } else {
+ keyno_to_set = tbl_def->m_key_count - 1;
+ key_part = nullptr;
+ keypart_to_set = 0;
+ }
+ }
+
+ dst_i++;
+ }
+ }
+
+ m_key_parts = dst_i;
+
+ /* Initialize the memory needed by the stats structure */
+ m_stats.m_distinct_keys_per_prefix.resize(get_key_parts());
+
+ /* Cache prefix extractor for bloom filter usage later */
+ rocksdb::Options opt = rdb_get_rocksdb_db()->GetOptions(get_cf());
+ m_prefix_extractor = opt.prefix_extractor;
+
+ /*
+ This should be the last member variable set before releasing the mutex
+ so that other threads can't see the object partially set up.
+ */
+ m_maxlength = max_len;
+
+ RDB_MUTEX_UNLOCK_CHECK(m_mutex);
+ }
+}
+
+/*
+ Determine if the table has TTL enabled by parsing the table comment.
+
+ @param[IN] table_arg
+ @param[IN] tbl_def_arg
+ @param[OUT] ttl_duration Default TTL value parsed from table comment
+*/
+uint Rdb_key_def::extract_ttl_duration(const TABLE *const table_arg,
+ const Rdb_tbl_def *const tbl_def_arg,
+ uint64 *ttl_duration) {
+ DBUG_ASSERT(table_arg != nullptr);
+ DBUG_ASSERT(tbl_def_arg != nullptr);
+ DBUG_ASSERT(ttl_duration != nullptr);
+ std::string table_comment(table_arg->s->comment.str,
+ table_arg->s->comment.length);
+
+ bool ttl_duration_per_part_match_found = false;
+ std::string ttl_duration_str = Rdb_key_def::parse_comment_for_qualifier(
+ table_comment, table_arg, tbl_def_arg, &ttl_duration_per_part_match_found,
+ RDB_TTL_DURATION_QUALIFIER);
+
+ /* If we don't have a ttl duration, nothing to do here. */
+ if (ttl_duration_str.empty()) {
+ return HA_EXIT_SUCCESS;
+ }
+
+ /*
+ Catch errors where a non-integral value was used as ttl duration, strtoull
+ will return 0.
+ */
+ *ttl_duration = std::strtoull(ttl_duration_str.c_str(), nullptr, 0);
+ if (!*ttl_duration) {
+ my_error(ER_RDB_TTL_DURATION_FORMAT, MYF(0), ttl_duration_str.c_str());
+ return HA_EXIT_FAILURE;
+ }
+
+ return HA_EXIT_SUCCESS;
+}
+
+/*
+ Determine if the table has TTL enabled by parsing the table comment.
+
+ @param[IN] table_arg
+ @param[IN] tbl_def_arg
+ @param[OUT] ttl_column TTL column in the table
+ @param[IN] skip_checks Skip validation checks (when called in
+ setup())
+*/
+uint Rdb_key_def::extract_ttl_col(const TABLE *const table_arg,
+ const Rdb_tbl_def *const tbl_def_arg,
+ std::string *ttl_column,
+ uint *ttl_field_index, bool skip_checks) {
+ std::string table_comment(table_arg->s->comment.str,
+ table_arg->s->comment.length);
+ /*
+ Check if there is a TTL column specified. Note that this is not required
+ and if omitted, an 8-byte ttl field will be prepended to each record
+ implicitly.
+ */
+ bool ttl_col_per_part_match_found = false;
+ std::string ttl_col_str = Rdb_key_def::parse_comment_for_qualifier(
+ table_comment, table_arg, tbl_def_arg, &ttl_col_per_part_match_found,
+ RDB_TTL_COL_QUALIFIER);
+
+ if (skip_checks) {
+ for (uint i = 0; i < table_arg->s->fields; i++) {
+ Field *const field = table_arg->field[i];
+ if (field_check_field_name_match(field, ttl_col_str.c_str())) {
+ *ttl_column = ttl_col_str;
+ *ttl_field_index = i;
+ }
+ }
+ return HA_EXIT_SUCCESS;
+ }
+
+ /* Check if TTL column exists in table */
+ if (!ttl_col_str.empty()) {
+ bool found = false;
+ for (uint i = 0; i < table_arg->s->fields; i++) {
+ Field *const field = table_arg->field[i];
+ if (field_check_field_name_match(field, ttl_col_str.c_str()) &&
+ field->real_type() == MYSQL_TYPE_LONGLONG &&
+ field->key_type() == HA_KEYTYPE_ULONGLONG &&
+ !field->real_maybe_null()) {
+ *ttl_column = ttl_col_str;
+ *ttl_field_index = i;
+ found = true;
+ break;
+ }
+ }
+
+ if (!found) {
+ my_error(ER_RDB_TTL_COL_FORMAT, MYF(0), ttl_col_str.c_str());
+ return HA_EXIT_FAILURE;
+ }
+ }
+
+ return HA_EXIT_SUCCESS;
+}
+
+const std::string Rdb_key_def::gen_qualifier_for_table(
+ const char *const qualifier, const std::string &partition_name) {
+ bool has_partition = !partition_name.empty();
+ std::string qualifier_str = "";
+
+ if (!strcmp(qualifier, RDB_CF_NAME_QUALIFIER)) {
+ return has_partition ? gen_cf_name_qualifier_for_partition(partition_name)
+ : qualifier_str + RDB_CF_NAME_QUALIFIER +
+ RDB_QUALIFIER_VALUE_SEP;
+ } else if (!strcmp(qualifier, RDB_TTL_DURATION_QUALIFIER)) {
+ return has_partition
+ ? gen_ttl_duration_qualifier_for_partition(partition_name)
+ : qualifier_str + RDB_TTL_DURATION_QUALIFIER +
+ RDB_QUALIFIER_VALUE_SEP;
+ } else if (!strcmp(qualifier, RDB_TTL_COL_QUALIFIER)) {
+ return has_partition ? gen_ttl_col_qualifier_for_partition(partition_name)
+ : qualifier_str + RDB_TTL_COL_QUALIFIER +
+ RDB_QUALIFIER_VALUE_SEP;
+ } else {
+ DBUG_ASSERT(0);
+ }
+
+ return qualifier_str;
+}
+
+/*
+ Formats the string and returns the column family name assignment part for a
+ specific partition.
+*/
+const std::string Rdb_key_def::gen_cf_name_qualifier_for_partition(
+ const std::string &prefix) {
+ DBUG_ASSERT(!prefix.empty());
+
+ return prefix + RDB_PER_PARTITION_QUALIFIER_NAME_SEP + RDB_CF_NAME_QUALIFIER +
+ RDB_QUALIFIER_VALUE_SEP;
+}
+
+const std::string Rdb_key_def::gen_ttl_duration_qualifier_for_partition(
+ const std::string &prefix) {
+ DBUG_ASSERT(!prefix.empty());
+
+ return prefix + RDB_PER_PARTITION_QUALIFIER_NAME_SEP +
+ RDB_TTL_DURATION_QUALIFIER + RDB_QUALIFIER_VALUE_SEP;
+}
+
+const std::string Rdb_key_def::gen_ttl_col_qualifier_for_partition(
+ const std::string &prefix) {
+ DBUG_ASSERT(!prefix.empty());
+
+ return prefix + RDB_PER_PARTITION_QUALIFIER_NAME_SEP + RDB_TTL_COL_QUALIFIER +
+ RDB_QUALIFIER_VALUE_SEP;
+}
+
+const std::string Rdb_key_def::parse_comment_for_qualifier(
+ const std::string &comment, const TABLE *const table_arg,
+ const Rdb_tbl_def *const tbl_def_arg, bool *per_part_match_found,
+ const char *const qualifier) {
+ DBUG_ASSERT(table_arg != nullptr);
+ DBUG_ASSERT(tbl_def_arg != nullptr);
+ DBUG_ASSERT(per_part_match_found != nullptr);
+ DBUG_ASSERT(qualifier != nullptr);
+
+ std::string empty_result;
+
+ // Flag which marks if partition specific options were found.
+ *per_part_match_found = false;
+
+ if (comment.empty()) {
+ return empty_result;
+ }
+
+ // Let's fetch the comment for a index and check if there's a custom key
+ // name specified for a partition we are handling.
+ std::vector<std::string> v =
+ myrocks::parse_into_tokens(comment, RDB_QUALIFIER_SEP);
+
+ std::string search_str = gen_qualifier_for_table(qualifier);
+
+ // If table has partitions then we need to check if user has requested
+ // qualifiers on a per partition basis.
+ //
+ // NOTE: this means if you specify a qualifier for a specific partition it
+ // will take precedence the 'table level' qualifier if one exists.
+ std::string search_str_part;
+ if (IF_PARTITIONING(table_arg->part_info,nullptr) != nullptr) {
+ std::string partition_name = tbl_def_arg->base_partition();
+ DBUG_ASSERT(!partition_name.empty());
+ search_str_part = gen_qualifier_for_table(qualifier, partition_name);
+ }
+
+ DBUG_ASSERT(!search_str.empty());
+
+ // Basic O(N) search for a matching assignment. At most we expect maybe
+ // ten or so elements here.
+ if (!search_str_part.empty()) {
+ for (const auto &it : v) {
+ if (it.substr(0, search_str_part.length()) == search_str_part) {
+ // We found a prefix match. Try to parse it as an assignment.
+ std::vector<std::string> tokens =
+ myrocks::parse_into_tokens(it, RDB_QUALIFIER_VALUE_SEP);
+
+ // We found a custom qualifier, it was in the form we expected it to be.
+ // Return that instead of whatever we initially wanted to return. In
+ // a case below the `foo` part will be returned to the caller.
+ //
+ // p3_cfname=foo
+ //
+ // If no value was specified then we'll return an empty string which
+ // later gets translated into using a default CF.
+ if (tokens.size() == 2) {
+ *per_part_match_found = true;
+ return tokens[1];
+ } else {
+ return empty_result;
+ }
+ }
+ }
+ }
+
+ // Do this loop again, this time searching for 'table level' qualifiers if we
+ // didn't find any partition level qualifiers above.
+ for (const auto &it : v) {
+ if (it.substr(0, search_str.length()) == search_str) {
+ std::vector<std::string> tokens =
+ myrocks::parse_into_tokens(it, RDB_QUALIFIER_VALUE_SEP);
+ if (tokens.size() == 2) {
+ return tokens[1];
+ } else {
+ return empty_result;
+ }
+ }
+ }
+
+ // If we didn't find any partitioned/non-partitioned qualifiers, return an
+ // empty string.
+ return empty_result;
+}
+
+/**
+ Read a memcmp key part from a slice using the passed in reader.
+
+ Returns -1 if field was null, 1 if error, 0 otherwise.
+*/
+int Rdb_key_def::read_memcmp_key_part(const TABLE *table_arg,
+ Rdb_string_reader *reader,
+ const uint part_num) const {
+ /* It is impossible to unpack the column. Skip it. */
+ if (m_pack_info[part_num].m_maybe_null) {
+ const char *nullp;
+ if (!(nullp = reader->read(1))) return 1;
+ if (*nullp == 0) {
+ /* This is a NULL value */
+ return -1;
+ } else {
+ /* If NULL marker is not '0', it can be only '1' */
+ if (*nullp != 1) return 1;
+ }
+ }
+
+ Rdb_field_packing *fpi = &m_pack_info[part_num];
+ DBUG_ASSERT(table_arg->s != nullptr);
+
+ bool is_hidden_pk_part = (part_num + 1 == m_key_parts) &&
+ (table_arg->s->primary_key == MAX_INDEXES);
+ Field *field = nullptr;
+ if (!is_hidden_pk_part) {
+ field = fpi->get_field_in_table(table_arg);
+ }
+ if ((fpi->m_skip_func)(fpi, field, reader)) {
+ return 1;
+ }
+ return 0;
+}
+
+/**
+ Get a mem-comparable form of Primary Key from mem-comparable form of this key
+
+ @param
+ pk_descr Primary Key descriptor
+ key Index tuple from this key in mem-comparable form
+ pk_buffer OUT Put here mem-comparable form of the Primary Key.
+
+ @note
+ It may or may not be possible to restore primary key columns to their
+ mem-comparable form. To handle all cases, this function copies mem-
+ comparable forms directly.
+
+ RocksDB SE supports "Extended keys". This means that PK columns are present
+ at the end of every key. If the key already includes PK columns, then
+ these columns are not present at the end of the key.
+
+ Because of the above, we copy each primary key column.
+
+ @todo
+ If we checked crc32 checksums in this function, we would catch some CRC
+ violations that we currently don't. On the other hand, there is a broader
+ set of queries for which we would check the checksum twice.
+*/
+
+uint Rdb_key_def::get_primary_key_tuple(const TABLE *const table,
+ const Rdb_key_def &pk_descr,
+ const rocksdb::Slice *const key,
+ uchar *const pk_buffer) const {
+ DBUG_ASSERT(table != nullptr);
+ DBUG_ASSERT(key != nullptr);
+ DBUG_ASSERT(m_index_type == Rdb_key_def::INDEX_TYPE_SECONDARY);
+ DBUG_ASSERT(pk_buffer);
+
+ uint size = 0;
+ uchar *buf = pk_buffer;
+ DBUG_ASSERT(m_pk_key_parts);
+
+ /* Put the PK number */
+ rdb_netbuf_store_index(buf, pk_descr.m_index_number);
+ buf += INDEX_NUMBER_SIZE;
+ size += INDEX_NUMBER_SIZE;
+
+ const char *start_offs[MAX_REF_PARTS];
+ const char *end_offs[MAX_REF_PARTS];
+ int pk_key_part;
+ uint i;
+ Rdb_string_reader reader(key);
+
+ // Skip the index number
+ if ((!reader.read(INDEX_NUMBER_SIZE))) return RDB_INVALID_KEY_LEN;
+
+ for (i = 0; i < m_key_parts; i++) {
+ if ((pk_key_part = m_pk_part_no[i]) != -1) {
+ start_offs[pk_key_part] = reader.get_current_ptr();
+ }
+
+ if (read_memcmp_key_part(table, &reader, i) > 0) {
+ return RDB_INVALID_KEY_LEN;
+ }
+
+ if (pk_key_part != -1) {
+ end_offs[pk_key_part] = reader.get_current_ptr();
+ }
+ }
+
+ for (i = 0; i < m_pk_key_parts; i++) {
+ const uint part_size = end_offs[i] - start_offs[i];
+ memcpy(buf, start_offs[i], end_offs[i] - start_offs[i]);
+ buf += part_size;
+ size += part_size;
+ }
+
+ return size;
+}
+
+/**
+ Get a mem-comparable form of Secondary Key from mem-comparable form of this
+ key, without the extended primary key tail.
+
+ @param
+ key Index tuple from this key in mem-comparable form
+ sk_buffer OUT Put here mem-comparable form of the Secondary Key.
+ n_null_fields OUT Put number of null fields contained within sk entry
+*/
+uint Rdb_key_def::get_memcmp_sk_parts(const TABLE *table,
+ const rocksdb::Slice &key,
+ uchar *sk_buffer,
+ uint *n_null_fields) const {
+ DBUG_ASSERT(table != nullptr);
+ DBUG_ASSERT(sk_buffer != nullptr);
+ DBUG_ASSERT(n_null_fields != nullptr);
+ DBUG_ASSERT(m_keyno != table->s->primary_key && !table_has_hidden_pk(table));
+
+ uchar *buf = sk_buffer;
+
+ int res;
+ Rdb_string_reader reader(&key);
+ const char *start = reader.get_current_ptr();
+
+ // Skip the index number
+ if ((!reader.read(INDEX_NUMBER_SIZE))) return RDB_INVALID_KEY_LEN;
+
+ for (uint i = 0; i < table->key_info[m_keyno].user_defined_key_parts; i++) {
+ if ((res = read_memcmp_key_part(table, &reader, i)) > 0) {
+ return RDB_INVALID_KEY_LEN;
+ } else if (res == -1) {
+ (*n_null_fields)++;
+ }
+ }
+
+ uint sk_memcmp_len = reader.get_current_ptr() - start;
+ memcpy(buf, start, sk_memcmp_len);
+ return sk_memcmp_len;
+}
+
+/**
+ Convert index tuple into storage (i.e. mem-comparable) format
+
+ @detail
+ Currently this is done by unpacking into table->record[0] and then
+ packing index columns into storage format.
+
+ @param pack_buffer Temporary area for packing varchar columns. Its
+ size is at least max_storage_fmt_length() bytes.
+*/
+
+uint Rdb_key_def::pack_index_tuple(TABLE *const tbl, uchar *const pack_buffer,
+ uchar *const packed_tuple,
+ const uchar *const key_tuple,
+ const key_part_map &keypart_map) const {
+ DBUG_ASSERT(tbl != nullptr);
+ DBUG_ASSERT(pack_buffer != nullptr);
+ DBUG_ASSERT(packed_tuple != nullptr);
+ DBUG_ASSERT(key_tuple != nullptr);
+
+ /* We were given a record in KeyTupleFormat. First, save it to record */
+ const uint key_len = calculate_key_len(tbl, m_keyno, key_tuple, keypart_map);
+ key_restore(tbl->record[0], key_tuple, &tbl->key_info[m_keyno], key_len);
+
+ uint n_used_parts = my_count_bits(keypart_map);
+ if (keypart_map == HA_WHOLE_KEY) n_used_parts = 0; // Full key is used
+
+ /* Then, convert the record into a mem-comparable form */
+ return pack_record(tbl, pack_buffer, tbl->record[0], packed_tuple, nullptr,
+ false, 0, n_used_parts);
+}
+
+/**
+ @brief
+ Check if "unpack info" data includes checksum.
+
+ @detail
+ This is used only by CHECK TABLE to count the number of rows that have
+ checksums.
+*/
+
+bool Rdb_key_def::unpack_info_has_checksum(const rocksdb::Slice &unpack_info) {
+ size_t size = unpack_info.size();
+ if (size == 0) {
+ return false;
+ }
+ const uchar *ptr = (const uchar *)unpack_info.data();
+
+ // Skip unpack info if present.
+ if (is_unpack_data_tag(ptr[0]) && size >= get_unpack_header_size(ptr[0])) {
+ const uint16 skip_len = rdb_netbuf_to_uint16(ptr + 1);
+ SHIP_ASSERT(size >= skip_len);
+
+ size -= skip_len;
+ ptr += skip_len;
+ }
+
+ return (size == RDB_CHECKSUM_CHUNK_SIZE && ptr[0] == RDB_CHECKSUM_DATA_TAG);
+}
+
+/*
+ @return Number of bytes that were changed
+*/
+int Rdb_key_def::successor(uchar *const packed_tuple, const uint len) {
+ DBUG_ASSERT(packed_tuple != nullptr);
+
+ int changed = 0;
+ uchar *p = packed_tuple + len - 1;
+ for (; p > packed_tuple; p--) {
+ changed++;
+ if (*p != uchar(0xFF)) {
+ *p = *p + 1;
+ break;
+ }
+ *p = '\0';
+ }
+ return changed;
+}
+
+/*
+ @return Number of bytes that were changed
+*/
+int Rdb_key_def::predecessor(uchar *const packed_tuple, const uint len) {
+ DBUG_ASSERT(packed_tuple != nullptr);
+
+ int changed = 0;
+ uchar *p = packed_tuple + len - 1;
+ for (; p > packed_tuple; p--) {
+ changed++;
+ if (*p != uchar(0x00)) {
+ *p = *p - 1;
+ break;
+ }
+ *p = 0xFF;
+ }
+ return changed;
+}
+
+static const std::map<char, size_t> UNPACK_HEADER_SIZES = {
+ {RDB_UNPACK_DATA_TAG, RDB_UNPACK_HEADER_SIZE},
+ {RDB_UNPACK_COVERED_DATA_TAG, RDB_UNPACK_COVERED_HEADER_SIZE}};
+
+/*
+ @return The length in bytes of the header specified by the given tag
+*/
+size_t Rdb_key_def::get_unpack_header_size(char tag) {
+ DBUG_ASSERT(is_unpack_data_tag(tag));
+ return UNPACK_HEADER_SIZES.at(tag);
+}
+
+/*
+ Get a bitmap indicating which varchar columns must be covered for this
+ lookup to be covered. If the bitmap is a subset of the covered bitmap, then
+ the lookup is covered. If it can already be determined that the lookup is
+ not covered, map->bitmap will be set to null.
+ */
+void Rdb_key_def::get_lookup_bitmap(const TABLE *table, MY_BITMAP *map) const {
+ DBUG_ASSERT(map->bitmap == nullptr);
+ bitmap_init(map, nullptr, MAX_REF_PARTS, false);
+ uint curr_bitmap_pos = 0;
+
+ // Indicates which columns in the read set might be covered.
+ MY_BITMAP maybe_covered_bitmap;
+ bitmap_init(&maybe_covered_bitmap, nullptr, table->read_set->n_bits, false);
+
+ for (uint i = 0; i < m_key_parts; i++) {
+ if (table_has_hidden_pk(table) && i + 1 == m_key_parts) {
+ continue;
+ }
+
+ Field *const field = m_pack_info[i].get_field_in_table(table);
+
+ // Columns which are always covered are not stored in the covered bitmap so
+ // we can ignore them here too.
+ if (m_pack_info[i].m_covered &&
+ bitmap_is_set(table->read_set, field->field_index)) {
+ bitmap_set_bit(&maybe_covered_bitmap, field->field_index);
+ continue;
+ }
+
+ switch (field->real_type()) {
+ // This type may be covered depending on the record. If it was requested,
+ // we require the covered bitmap to have this bit set.
+ case MYSQL_TYPE_VARCHAR:
+ if (curr_bitmap_pos < MAX_REF_PARTS) {
+ if (bitmap_is_set(table->read_set, field->field_index)) {
+ bitmap_set_bit(map, curr_bitmap_pos);
+ bitmap_set_bit(&maybe_covered_bitmap, field->field_index);
+ }
+ curr_bitmap_pos++;
+ } else {
+ bitmap_free(&maybe_covered_bitmap);
+ bitmap_free(map);
+ return;
+ }
+ break;
+ // This column is a type which is never covered. If it was requested, we
+ // know this lookup will never be covered.
+ default:
+ if (bitmap_is_set(table->read_set, field->field_index)) {
+ bitmap_free(&maybe_covered_bitmap);
+ bitmap_free(map);
+ return;
+ }
+ break;
+ }
+ }
+
+ // If there are columns which are not covered in the read set, the lookup
+ // can't be covered.
+ if (!bitmap_cmp(table->read_set, &maybe_covered_bitmap)) {
+ bitmap_free(map);
+ }
+ bitmap_free(&maybe_covered_bitmap);
+}
+
+/*
+ Return true if for this secondary index
+ - All of the requested columns are in the index
+ - All values for columns that are prefix-only indexes are shorter or equal
+ in length to the prefix
+ */
+bool Rdb_key_def::covers_lookup(const rocksdb::Slice *const unpack_info,
+ const MY_BITMAP *const lookup_bitmap) const {
+ DBUG_ASSERT(lookup_bitmap != nullptr);
+ if (!use_covered_bitmap_format() || lookup_bitmap->bitmap == nullptr) {
+ return false;
+ }
+
+ Rdb_string_reader unp_reader = Rdb_string_reader::read_or_empty(unpack_info);
+
+ // Check if this unpack_info has a covered_bitmap
+ const char *unpack_header = unp_reader.get_current_ptr();
+ const bool has_covered_unpack_info =
+ unp_reader.remaining_bytes() &&
+ unpack_header[0] == RDB_UNPACK_COVERED_DATA_TAG;
+ if (!has_covered_unpack_info ||
+ !unp_reader.read(RDB_UNPACK_COVERED_HEADER_SIZE)) {
+ return false;
+ }
+
+ MY_BITMAP covered_bitmap;
+ my_bitmap_map covered_bits;
+ bitmap_init(&covered_bitmap, &covered_bits, MAX_REF_PARTS, false);
+ covered_bits = rdb_netbuf_to_uint16((const uchar *)unpack_header +
+ sizeof(RDB_UNPACK_COVERED_DATA_TAG) +
+ RDB_UNPACK_COVERED_DATA_LEN_SIZE);
+
+ return bitmap_is_subset(lookup_bitmap, &covered_bitmap);
+}
+
+/* Indicates that all key parts can be unpacked to cover a secondary lookup */
+bool Rdb_key_def::can_cover_lookup() const {
+ for (uint i = 0; i < m_key_parts; i++) {
+ if (!m_pack_info[i].m_covered) return false;
+ }
+ return true;
+}
+
+uchar *Rdb_key_def::pack_field(Field *const field, Rdb_field_packing *pack_info,
+ uchar *tuple, uchar *const packed_tuple,
+ uchar *const pack_buffer,
+ Rdb_string_writer *const unpack_info,
+ uint *const n_null_fields) const {
+ if (field->real_maybe_null()) {
+ DBUG_ASSERT(is_storage_available(tuple - packed_tuple, 1));
+ if (field->is_real_null()) {
+ /* NULL value. store '\0' so that it sorts before non-NULL values */
+ *tuple++ = 0;
+ /* That's it, don't store anything else */
+ if (n_null_fields) (*n_null_fields)++;
+ return tuple;
+ } else {
+ /* Not a NULL value. Store '1' */
+ *tuple++ = 1;
+ }
+ }
+
+ const bool create_unpack_info =
+ (unpack_info && // we were requested to generate unpack_info
+ pack_info->uses_unpack_info()); // and this keypart uses it
+ Rdb_pack_field_context pack_ctx(unpack_info);
+
+ // Set the offset for methods which do not take an offset as an argument
+ DBUG_ASSERT(
+ is_storage_available(tuple - packed_tuple, pack_info->m_max_image_len));
+
+ (pack_info->m_pack_func)(pack_info, field, pack_buffer, &tuple, &pack_ctx);
+
+ /* Make "unpack info" to be stored in the value */
+ if (create_unpack_info) {
+ (pack_info->m_make_unpack_info_func)(pack_info->m_charset_codec, field,
+ &pack_ctx);
+ }
+
+ return tuple;
+}
+
+/**
+ Get index columns from the record and pack them into mem-comparable form.
+
+ @param
+ tbl Table we're working on
+ record IN Record buffer with fields in table->record format
+ pack_buffer IN Temporary area for packing varchars. The size is
+ at least max_storage_fmt_length() bytes.
+ packed_tuple OUT Key in the mem-comparable form
+ unpack_info OUT Unpack data
+ unpack_info_len OUT Unpack data length
+ n_key_parts Number of keyparts to process. 0 means all of them.
+ n_null_fields OUT Number of key fields with NULL value.
+ ttl_bytes IN Previous ttl bytes from old record for update case or
+ current ttl bytes from just packed primary key/value
+ @detail
+ Some callers do not need the unpack information, they can pass
+ unpack_info=nullptr, unpack_info_len=nullptr.
+
+ @return
+ Length of the packed tuple
+*/
+
+uint Rdb_key_def::pack_record(const TABLE *const tbl, uchar *const pack_buffer,
+ const uchar *const record,
+ uchar *const packed_tuple,
+ Rdb_string_writer *const unpack_info,
+ const bool should_store_row_debug_checksums,
+ const longlong hidden_pk_id, uint n_key_parts,
+ uint *const n_null_fields,
+ const char *const ttl_bytes) const {
+ DBUG_ASSERT(tbl != nullptr);
+ DBUG_ASSERT(pack_buffer != nullptr);
+ DBUG_ASSERT(record != nullptr);
+ DBUG_ASSERT(packed_tuple != nullptr);
+ // Checksums for PKs are made when record is packed.
+ // We should never attempt to make checksum just from PK values
+ DBUG_ASSERT_IMP(should_store_row_debug_checksums,
+ (m_index_type == INDEX_TYPE_SECONDARY));
+
+ uchar *tuple = packed_tuple;
+ size_t unpack_start_pos = size_t(-1);
+ size_t unpack_len_pos = size_t(-1);
+ size_t covered_bitmap_pos = size_t(-1);
+ const bool hidden_pk_exists = table_has_hidden_pk(tbl);
+
+ rdb_netbuf_store_index(tuple, m_index_number);
+ tuple += INDEX_NUMBER_SIZE;
+
+ // If n_key_parts is 0, it means all columns.
+ // The following includes the 'extended key' tail.
+ // The 'extended key' includes primary key. This is done to 'uniqify'
+ // non-unique indexes
+ const bool use_all_columns = n_key_parts == 0 || n_key_parts == MAX_REF_PARTS;
+
+ // If hidden pk exists, but hidden pk wasnt passed in, we can't pack the
+ // hidden key part. So we skip it (its always 1 part).
+ if (hidden_pk_exists && !hidden_pk_id && use_all_columns) {
+ n_key_parts = m_key_parts - 1;
+ } else if (use_all_columns) {
+ n_key_parts = m_key_parts;
+ }
+
+ if (n_null_fields) *n_null_fields = 0;
+
+ // Check if we need a covered bitmap. If it is certain that all key parts are
+ // covering, we don't need one.
+ bool store_covered_bitmap = false;
+ if (unpack_info && use_covered_bitmap_format()) {
+ for (uint i = 0; i < n_key_parts; i++) {
+ if (!m_pack_info[i].m_covered) {
+ store_covered_bitmap = true;
+ break;
+ }
+ }
+ }
+
+ const char tag =
+ store_covered_bitmap ? RDB_UNPACK_COVERED_DATA_TAG : RDB_UNPACK_DATA_TAG;
+
+ if (unpack_info) {
+ unpack_info->clear();
+
+ if (m_index_type == INDEX_TYPE_SECONDARY &&
+ m_total_index_flags_length > 0) {
+ // Reserve space for index flag fields
+ unpack_info->allocate(m_total_index_flags_length);
+
+ // Insert TTL timestamp
+ if (has_ttl() && ttl_bytes) {
+ write_index_flag_field(unpack_info,
+ reinterpret_cast<const uchar *>(ttl_bytes),
+ Rdb_key_def::TTL_FLAG);
+ }
+ }
+
+ unpack_start_pos = unpack_info->get_current_pos();
+ unpack_info->write_uint8(tag);
+ unpack_len_pos = unpack_info->get_current_pos();
+ // we don't know the total length yet, so write a zero
+ unpack_info->write_uint16(0);
+
+ if (store_covered_bitmap) {
+ // Reserve two bytes for the covered bitmap. This will store, for key
+ // parts which are not always covering, whether or not it is covering
+ // for this record.
+ covered_bitmap_pos = unpack_info->get_current_pos();
+ unpack_info->write_uint16(0);
+ }
+ }
+
+ MY_BITMAP covered_bitmap;
+ my_bitmap_map covered_bits;
+ uint curr_bitmap_pos = 0;
+ bitmap_init(&covered_bitmap, &covered_bits, MAX_REF_PARTS, false);
+
+ for (uint i = 0; i < n_key_parts; i++) {
+ // Fill hidden pk id into the last key part for secondary keys for tables
+ // with no pk
+ if (hidden_pk_exists && hidden_pk_id && i + 1 == n_key_parts) {
+ m_pack_info[i].fill_hidden_pk_val(&tuple, hidden_pk_id);
+ break;
+ }
+
+ Field *const field = m_pack_info[i].get_field_in_table(tbl);
+ DBUG_ASSERT(field != nullptr);
+
+ uint field_offset = field->ptr - tbl->record[0];
+ uint null_offset = field->null_offset(tbl->record[0]);
+ bool maybe_null = field->real_maybe_null();
+
+ field->move_field(
+ const_cast<uchar *>(record) + field_offset,
+ maybe_null ? const_cast<uchar *>(record) + null_offset : nullptr,
+ field->null_bit);
+ // WARNING! Don't return without restoring field->ptr and field->null_ptr
+
+ tuple = pack_field(field, &m_pack_info[i], tuple, packed_tuple, pack_buffer,
+ unpack_info, n_null_fields);
+
+ // If this key part is a prefix of a VARCHAR field, check if it's covered.
+ if (store_covered_bitmap && field->real_type() == MYSQL_TYPE_VARCHAR &&
+ !m_pack_info[i].m_covered && curr_bitmap_pos < MAX_REF_PARTS) {
+ size_t data_length = field->data_length();
+ uint16 key_length;
+ if (m_pk_part_no[i] == (uint)-1) {
+ key_length = tbl->key_info[get_keyno()].key_part[i].length;
+ } else {
+ key_length =
+ tbl->key_info[tbl->s->primary_key].key_part[m_pk_part_no[i]].length;
+ }
+
+ if (m_pack_info[i].m_unpack_func != nullptr &&
+ data_length <= key_length) {
+ bitmap_set_bit(&covered_bitmap, curr_bitmap_pos);
+ }
+ curr_bitmap_pos++;
+ }
+
+ // Restore field->ptr and field->null_ptr
+ field->move_field(tbl->record[0] + field_offset,
+ maybe_null ? tbl->record[0] + null_offset : nullptr,
+ field->null_bit);
+ }
+
+ if (unpack_info) {
+ const size_t len = unpack_info->get_current_pos() - unpack_start_pos;
+ DBUG_ASSERT(len <= std::numeric_limits<uint16_t>::max());
+
+ // Don't store the unpack_info if it has only the header (that is, there's
+ // no meaningful content).
+ // Primary Keys are special: for them, store the unpack_info even if it's
+ // empty (provided m_maybe_unpack_info==true, see
+ // ha_rocksdb::convert_record_to_storage_format)
+ if (m_index_type == Rdb_key_def::INDEX_TYPE_SECONDARY) {
+ if (len == get_unpack_header_size(tag) && !covered_bits) {
+ unpack_info->truncate(unpack_start_pos);
+ } else if (store_covered_bitmap) {
+ unpack_info->write_uint16_at(covered_bitmap_pos, covered_bits);
+ }
+ } else {
+ unpack_info->write_uint16_at(unpack_len_pos, len);
+ }
+
+ //
+ // Secondary keys have key and value checksums in the value part
+ // Primary key is a special case (the value part has non-indexed columns),
+ // so the checksums are computed and stored by
+ // ha_rocksdb::convert_record_to_storage_format
+ //
+ if (should_store_row_debug_checksums) {
+ const uint32_t key_crc32 = crc32(0, packed_tuple, tuple - packed_tuple);
+ const uint32_t val_crc32 =
+ crc32(0, unpack_info->ptr(), unpack_info->get_current_pos());
+
+ unpack_info->write_uint8(RDB_CHECKSUM_DATA_TAG);
+ unpack_info->write_uint32(key_crc32);
+ unpack_info->write_uint32(val_crc32);
+ }
+ }
+
+ DBUG_ASSERT(is_storage_available(tuple - packed_tuple, 0));
+
+ return tuple - packed_tuple;
+}
+
+/**
+ Pack the hidden primary key into mem-comparable form.
+
+ @param
+ tbl Table we're working on
+ hidden_pk_id IN New value to be packed into key
+ packed_tuple OUT Key in the mem-comparable form
+
+ @return
+ Length of the packed tuple
+*/
+
+uint Rdb_key_def::pack_hidden_pk(const longlong hidden_pk_id,
+ uchar *const packed_tuple) const {
+ DBUG_ASSERT(packed_tuple != nullptr);
+
+ uchar *tuple = packed_tuple;
+ rdb_netbuf_store_index(tuple, m_index_number);
+ tuple += INDEX_NUMBER_SIZE;
+ DBUG_ASSERT(m_key_parts == 1);
+ DBUG_ASSERT(is_storage_available(tuple - packed_tuple,
+ m_pack_info[0].m_max_image_len));
+
+ m_pack_info[0].fill_hidden_pk_val(&tuple, hidden_pk_id);
+
+ DBUG_ASSERT(is_storage_available(tuple - packed_tuple, 0));
+ return tuple - packed_tuple;
+}
+
+/*
+ Function of type rdb_index_field_pack_t
+*/
+
+void Rdb_key_def::pack_with_make_sort_key(
+ Rdb_field_packing *const fpi, Field *const field,
+ uchar *const buf MY_ATTRIBUTE((__unused__)), uchar **dst,
+ Rdb_pack_field_context *const pack_ctx MY_ATTRIBUTE((__unused__))) {
+ DBUG_ASSERT(fpi != nullptr);
+ DBUG_ASSERT(field != nullptr);
+ DBUG_ASSERT(dst != nullptr);
+ DBUG_ASSERT(*dst != nullptr);
+
+ const int max_len = fpi->m_max_image_len;
+ my_bitmap_map *old_map;
+
+ old_map= dbug_tmp_use_all_columns(field->table,
+ field->table->read_set);
+ field->sort_string(*dst, max_len);
+ dbug_tmp_restore_column_map(field->table->read_set, old_map);
+ *dst += max_len;
+}
+
+/*
+ Compares two keys without unpacking
+
+ @detail
+ @return
+ 0 - Ok. column_index is the index of the first column which is different.
+ -1 if two kes are equal
+ 1 - Data format error.
+*/
+int Rdb_key_def::compare_keys(const rocksdb::Slice *key1,
+ const rocksdb::Slice *key2,
+ std::size_t *const column_index) const {
+ DBUG_ASSERT(key1 != nullptr);
+ DBUG_ASSERT(key2 != nullptr);
+ DBUG_ASSERT(column_index != nullptr);
+
+ // the caller should check the return value and
+ // not rely on column_index being valid
+ *column_index = 0xbadf00d;
+
+ Rdb_string_reader reader1(key1);
+ Rdb_string_reader reader2(key2);
+
+ // Skip the index number
+ if ((!reader1.read(INDEX_NUMBER_SIZE))) return HA_EXIT_FAILURE;
+
+ if ((!reader2.read(INDEX_NUMBER_SIZE))) return HA_EXIT_FAILURE;
+
+ for (uint i = 0; i < m_key_parts; i++) {
+ const Rdb_field_packing *const fpi = &m_pack_info[i];
+ if (fpi->m_maybe_null) {
+ const auto nullp1 = reader1.read(1);
+ const auto nullp2 = reader2.read(1);
+
+ if (nullp1 == nullptr || nullp2 == nullptr) {
+ return HA_EXIT_FAILURE;
+ }
+
+ if (*nullp1 != *nullp2) {
+ *column_index = i;
+ return HA_EXIT_SUCCESS;
+ }
+
+ if (*nullp1 == 0) {
+ /* This is a NULL value */
+ continue;
+ }
+ }
+
+ const auto before_skip1 = reader1.get_current_ptr();
+ const auto before_skip2 = reader2.get_current_ptr();
+ DBUG_ASSERT(fpi->m_skip_func);
+ if ((fpi->m_skip_func)(fpi, nullptr, &reader1)) {
+ return HA_EXIT_FAILURE;
+ }
+ if ((fpi->m_skip_func)(fpi, nullptr, &reader2)) {
+ return HA_EXIT_FAILURE;
+ }
+ const auto size1 = reader1.get_current_ptr() - before_skip1;
+ const auto size2 = reader2.get_current_ptr() - before_skip2;
+ if (size1 != size2) {
+ *column_index = i;
+ return HA_EXIT_SUCCESS;
+ }
+
+ if (memcmp(before_skip1, before_skip2, size1) != 0) {
+ *column_index = i;
+ return HA_EXIT_SUCCESS;
+ }
+ }
+
+ *column_index = m_key_parts;
+ return HA_EXIT_SUCCESS;
+}
+
+/*
+ @brief
+ Given a zero-padded key, determine its real key length
+
+ @detail
+ Fixed-size skip functions just read.
+*/
+
+size_t Rdb_key_def::key_length(const TABLE *const table,
+ const rocksdb::Slice &key) const {
+ DBUG_ASSERT(table != nullptr);
+
+ Rdb_string_reader reader(&key);
+
+ if ((!reader.read(INDEX_NUMBER_SIZE))) {
+ return size_t(-1);
+ }
+ for (uint i = 0; i < m_key_parts; i++) {
+ const Rdb_field_packing *fpi = &m_pack_info[i];
+ const Field *field = nullptr;
+ if (m_index_type != INDEX_TYPE_HIDDEN_PRIMARY) {
+ field = fpi->get_field_in_table(table);
+ }
+ if ((fpi->m_skip_func)(fpi, field, &reader)) {
+ return size_t(-1);
+ }
+ }
+ return key.size() - reader.remaining_bytes();
+}
+
+/*
+ Take mem-comparable form and unpack_info and unpack it to Table->record
+
+ @detail
+ not all indexes support this
+
+ @return
+ HA_EXIT_SUCCESS OK
+ other HA_ERR error code
+*/
+
+int Rdb_key_def::unpack_record(TABLE *const table, uchar *const buf,
+ const rocksdb::Slice *const packed_key,
+ const rocksdb::Slice *const unpack_info,
+ const bool verify_row_debug_checksums) const {
+ Rdb_string_reader reader(packed_key);
+ Rdb_string_reader unp_reader = Rdb_string_reader::read_or_empty(unpack_info);
+
+ // There is no checksuming data after unpack_info for primary keys, because
+ // the layout there is different. The checksum is verified in
+ // ha_rocksdb::convert_record_from_storage_format instead.
+ DBUG_ASSERT_IMP(!(m_index_type == INDEX_TYPE_SECONDARY),
+ !verify_row_debug_checksums);
+
+ // Skip the index number
+ if ((!reader.read(INDEX_NUMBER_SIZE))) {
+ return HA_ERR_ROCKSDB_CORRUPT_DATA;
+ }
+
+ // For secondary keys, we expect the value field to contain index flags,
+ // unpack data, and checksum data in that order. One or all can be missing,
+ // but they cannot be reordered.
+ if (unp_reader.remaining_bytes()) {
+ if (m_index_type == INDEX_TYPE_SECONDARY &&
+ m_total_index_flags_length > 0 &&
+ !unp_reader.read(m_total_index_flags_length)) {
+ return HA_ERR_ROCKSDB_CORRUPT_DATA;
+ }
+ }
+
+ const char *unpack_header = unp_reader.get_current_ptr();
+ bool has_unpack_info =
+ unp_reader.remaining_bytes() && is_unpack_data_tag(unpack_header[0]);
+ if (has_unpack_info) {
+ if (!unp_reader.read(get_unpack_header_size(unpack_header[0]))) {
+ return HA_ERR_ROCKSDB_CORRUPT_DATA;
+ }
+ }
+
+ // Read the covered bitmap
+ MY_BITMAP covered_bitmap;
+ my_bitmap_map covered_bits;
+ bool has_covered_bitmap =
+ has_unpack_info && (unpack_header[0] == RDB_UNPACK_COVERED_DATA_TAG);
+ if (has_covered_bitmap) {
+ bitmap_init(&covered_bitmap, &covered_bits, MAX_REF_PARTS, false);
+ covered_bits = rdb_netbuf_to_uint16((const uchar *)unpack_header +
+ sizeof(RDB_UNPACK_COVERED_DATA_TAG) +
+ RDB_UNPACK_COVERED_DATA_LEN_SIZE);
+ }
+
+ int err = HA_EXIT_SUCCESS;
+
+
+ Rdb_key_field_iterator iter(
+ this, m_pack_info, &reader, &unp_reader, table, has_unpack_info,
+ has_covered_bitmap ? &covered_bitmap : nullptr, buf);
+ while (iter.has_next()) {
+ err = iter.next();
+ if (err) {
+ return err;
+ }
+ }
+
+ /*
+ Check checksum values if present
+ */
+ const char *ptr;
+ if ((ptr = unp_reader.read(1)) && *ptr == RDB_CHECKSUM_DATA_TAG) {
+ if (verify_row_debug_checksums) {
+ uint32_t stored_key_chksum = rdb_netbuf_to_uint32(
+ (const uchar *)unp_reader.read(RDB_CHECKSUM_SIZE));
+ const uint32_t stored_val_chksum = rdb_netbuf_to_uint32(
+ (const uchar *)unp_reader.read(RDB_CHECKSUM_SIZE));
+
+ const uint32_t computed_key_chksum =
+ crc32(0, (const uchar *)packed_key->data(), packed_key->size());
+ const uint32_t computed_val_chksum =
+ crc32(0, (const uchar *)unpack_info->data(),
+ unpack_info->size() - RDB_CHECKSUM_CHUNK_SIZE);
+
+ DBUG_EXECUTE_IF("myrocks_simulate_bad_key_checksum1",
+ stored_key_chksum++;);
+
+ if (stored_key_chksum != computed_key_chksum) {
+ report_checksum_mismatch(true, packed_key->data(), packed_key->size());
+ return HA_ERR_ROCKSDB_CHECKSUM_MISMATCH;
+ }
+
+ if (stored_val_chksum != computed_val_chksum) {
+ report_checksum_mismatch(false, unpack_info->data(),
+ unpack_info->size() - RDB_CHECKSUM_CHUNK_SIZE);
+ return HA_ERR_ROCKSDB_CHECKSUM_MISMATCH;
+ }
+ } else {
+ /* The checksums are present but we are not checking checksums */
+ }
+ }
+
+ if (reader.remaining_bytes()) return HA_ERR_ROCKSDB_CORRUPT_DATA;
+
+ return HA_EXIT_SUCCESS;
+}
+
+bool Rdb_key_def::table_has_hidden_pk(const TABLE *const table) {
+ return table->s->primary_key == MAX_INDEXES;
+}
+
+void Rdb_key_def::report_checksum_mismatch(const bool is_key,
+ const char *const data,
+ const size_t data_size) const {
+ // NO_LINT_DEBUG
+ sql_print_error("Checksum mismatch in %s of key-value pair for index 0x%x",
+ is_key ? "key" : "value", get_index_number());
+
+ const std::string buf = rdb_hexdump(data, data_size, RDB_MAX_HEXDUMP_LEN);
+ // NO_LINT_DEBUG
+ sql_print_error("Data with incorrect checksum (%" PRIu64 " bytes): %s",
+ (uint64_t)data_size, buf.c_str());
+
+ my_error(ER_INTERNAL_ERROR, MYF(0), "Record checksum mismatch");
+}
+
+bool Rdb_key_def::index_format_min_check(const int pk_min,
+ const int sk_min) const {
+ switch (m_index_type) {
+ case INDEX_TYPE_PRIMARY:
+ case INDEX_TYPE_HIDDEN_PRIMARY:
+ return (m_kv_format_version >= pk_min);
+ case INDEX_TYPE_SECONDARY:
+ return (m_kv_format_version >= sk_min);
+ default:
+ DBUG_ASSERT(0);
+ return false;
+ }
+}
+
+///////////////////////////////////////////////////////////////////////////////////////////
+// Rdb_field_packing
+///////////////////////////////////////////////////////////////////////////////////////////
+
+/*
+ Function of type rdb_index_field_skip_t
+*/
+
+int Rdb_key_def::skip_max_length(const Rdb_field_packing *const fpi,
+ const Field *const field
+ MY_ATTRIBUTE((__unused__)),
+ Rdb_string_reader *const reader) {
+ if (!reader->read(fpi->m_max_image_len)) return HA_EXIT_FAILURE;
+ return HA_EXIT_SUCCESS;
+}
+
+/*
+ (RDB_ESCAPE_LENGTH-1) must be an even number so that pieces of lines are not
+ split in the middle of an UTF-8 character. See the implementation of
+ unpack_binary_or_utf8_varchar.
+*/
+#define RDB_ESCAPE_LENGTH 9
+#define RDB_LEGACY_ESCAPE_LENGTH RDB_ESCAPE_LENGTH
+static_assert((RDB_ESCAPE_LENGTH - 1) % 2 == 0,
+ "RDB_ESCAPE_LENGTH-1 must be even.");
+
+#define RDB_ENCODED_SIZE(len) \
+ ((len + (RDB_ESCAPE_LENGTH - 2)) / (RDB_ESCAPE_LENGTH - 1)) * \
+ RDB_ESCAPE_LENGTH
+
+#define RDB_LEGACY_ENCODED_SIZE(len) \
+ ((len + (RDB_LEGACY_ESCAPE_LENGTH - 1)) / (RDB_LEGACY_ESCAPE_LENGTH - 1)) * \
+ RDB_LEGACY_ESCAPE_LENGTH
+
+/*
+ Function of type rdb_index_field_skip_t
+*/
+
+int Rdb_key_def::skip_variable_length(const Rdb_field_packing *const fpi,
+ const Field *const field,
+ Rdb_string_reader *const reader) {
+ const uchar *ptr;
+ bool finished = false;
+
+ size_t dst_len; /* How much data can be there */
+ if (field) {
+ const Field_varstring *const field_var =
+ static_cast<const Field_varstring *>(field);
+ dst_len = field_var->pack_length() - field_var->length_bytes;
+ } else {
+ dst_len = UINT_MAX;
+ }
+
+ bool use_legacy_format = fpi->m_use_legacy_varbinary_format;
+
+ /* Decode the length-emitted encoding here */
+ while ((ptr = (const uchar *)reader->read(RDB_ESCAPE_LENGTH))) {
+ uint used_bytes;
+
+ /* See pack_with_varchar_encoding. */
+ if (use_legacy_format) {
+ used_bytes = calc_unpack_legacy_variable_format(
+ ptr[RDB_ESCAPE_LENGTH - 1], &finished);
+ } else {
+ used_bytes =
+ calc_unpack_variable_format(ptr[RDB_ESCAPE_LENGTH - 1], &finished);
+ }
+
+ if (used_bytes == (uint)-1 || dst_len < used_bytes) {
+ return HA_EXIT_FAILURE; // Corruption in the data
+ }
+
+ if (finished) {
+ break;
+ }
+
+ dst_len -= used_bytes;
+ }
+
+ if (!finished) {
+ return HA_EXIT_FAILURE;
+ }
+
+ return HA_EXIT_SUCCESS;
+}
+
+const int VARCHAR_CMP_LESS_THAN_SPACES = 1;
+const int VARCHAR_CMP_EQUAL_TO_SPACES = 2;
+const int VARCHAR_CMP_GREATER_THAN_SPACES = 3;
+
+/*
+ Skip a keypart that uses Variable-Length Space-Padded encoding
+*/
+
+int Rdb_key_def::skip_variable_space_pad(const Rdb_field_packing *const fpi,
+ const Field *const field,
+ Rdb_string_reader *const reader) {
+ const uchar *ptr;
+ bool finished = false;
+
+ size_t dst_len = UINT_MAX; /* How much data can be there */
+
+ if (field) {
+ const Field_varstring *const field_var =
+ static_cast<const Field_varstring *>(field);
+ dst_len = field_var->pack_length() - field_var->length_bytes;
+ }
+
+ /* Decode the length-emitted encoding here */
+ while ((ptr = (const uchar *)reader->read(fpi->m_segment_size))) {
+ // See pack_with_varchar_space_pad
+ const uchar c = ptr[fpi->m_segment_size - 1];
+ if (c == VARCHAR_CMP_EQUAL_TO_SPACES) {
+ // This is the last segment
+ finished = true;
+ break;
+ } else if (c == VARCHAR_CMP_LESS_THAN_SPACES ||
+ c == VARCHAR_CMP_GREATER_THAN_SPACES) {
+ // This is not the last segment
+ if ((fpi->m_segment_size - 1) > dst_len) {
+ // The segment is full of data but the table field can't hold that
+ // much! This must be data corruption.
+ return HA_EXIT_FAILURE;
+ }
+ dst_len -= (fpi->m_segment_size - 1);
+ } else {
+ // Encountered a value that's none of the VARCHAR_CMP* constants
+ // It's data corruption.
+ return HA_EXIT_FAILURE;
+ }
+ }
+ return finished ? HA_EXIT_SUCCESS : HA_EXIT_FAILURE;
+}
+
+/*
+ Function of type rdb_index_field_unpack_t
+*/
+
+int Rdb_key_def::unpack_integer(
+ Rdb_field_packing *const fpi, Field *const field, uchar *const to,
+ Rdb_string_reader *const reader,
+ Rdb_string_reader *const unp_reader MY_ATTRIBUTE((__unused__))) {
+ const int length = fpi->m_max_image_len;
+
+ const uchar *from;
+ if (!(from = (const uchar *)reader->read(length))) {
+ return UNPACK_FAILURE; /* Mem-comparable image doesn't have enough bytes */
+ }
+
+#ifdef WORDS_BIGENDIAN
+ {
+ if (static_cast<Field_num *>(field)->unsigned_flag) {
+ to[0] = from[0];
+ } else {
+ to[0] = static_cast<char>(from[0] ^ 128); // Reverse the sign bit.
+ }
+ memcpy(to + 1, from + 1, length - 1);
+ }
+#else
+ {
+ const int sign_byte = from[0];
+ if (static_cast<Field_num *>(field)->unsigned_flag) {
+ to[length - 1] = sign_byte;
+ } else {
+ to[length - 1] =
+ static_cast<char>(sign_byte ^ 128); // Reverse the sign bit.
+ }
+ for (int i = 0, j = length - 1; i < length - 1; ++i, --j) to[i] = from[j];
+ }
+#endif
+ return UNPACK_SUCCESS;
+}
+
+#if !defined(WORDS_BIGENDIAN)
+static void rdb_swap_double_bytes(uchar *const dst, const uchar *const src) {
+#if defined(__FLOAT_WORD_ORDER) && (__FLOAT_WORD_ORDER == __BIG_ENDIAN)
+ // A few systems store the most-significant _word_ first on little-endian
+ dst[0] = src[3];
+ dst[1] = src[2];
+ dst[2] = src[1];
+ dst[3] = src[0];
+ dst[4] = src[7];
+ dst[5] = src[6];
+ dst[6] = src[5];
+ dst[7] = src[4];
+#else
+ dst[0] = src[7];
+ dst[1] = src[6];
+ dst[2] = src[5];
+ dst[3] = src[4];
+ dst[4] = src[3];
+ dst[5] = src[2];
+ dst[6] = src[1];
+ dst[7] = src[0];
+#endif
+}
+
+static void rdb_swap_float_bytes(uchar *const dst, const uchar *const src) {
+ dst[0] = src[3];
+ dst[1] = src[2];
+ dst[2] = src[1];
+ dst[3] = src[0];
+}
+#else
+#define rdb_swap_double_bytes nullptr
+#define rdb_swap_float_bytes nullptr
+#endif
+
+int Rdb_key_def::unpack_floating_point(
+ uchar *const dst, Rdb_string_reader *const reader, const size_t size,
+ const int exp_digit, const uchar *const zero_pattern,
+ const uchar *const zero_val, void (*swap_func)(uchar *, const uchar *)) {
+ const uchar *const from = (const uchar *)reader->read(size);
+ if (from == nullptr) {
+ /* Mem-comparable image doesn't have enough bytes */
+ return UNPACK_FAILURE;
+ }
+
+ /* Check to see if the value is zero */
+ if (memcmp(from, zero_pattern, size) == 0) {
+ memcpy(dst, zero_val, size);
+ return UNPACK_SUCCESS;
+ }
+
+#if defined(WORDS_BIGENDIAN)
+ // On big-endian, output can go directly into result
+ uchar *const tmp = dst;
+#else
+ // Otherwise use a temporary buffer to make byte-swapping easier later
+ uchar tmp[8];
+#endif
+
+ memcpy(tmp, from, size);
+
+ if (tmp[0] & 0x80) {
+ // If the high bit is set the original value was positive so
+ // remove the high bit and subtract one from the exponent.
+ ushort exp_part = ((ushort)tmp[0] << 8) | (ushort)tmp[1];
+ exp_part &= 0x7FFF; // clear high bit;
+ exp_part -= (ushort)1 << (16 - 1 - exp_digit); // subtract from exponent
+ tmp[0] = (uchar)(exp_part >> 8);
+ tmp[1] = (uchar)exp_part;
+ } else {
+ // Otherwise the original value was negative and all bytes have been
+ // negated.
+ for (size_t ii = 0; ii < size; ii++) tmp[ii] ^= 0xFF;
+ }
+
+#if !defined(WORDS_BIGENDIAN)
+ // On little-endian, swap the bytes around
+ swap_func(dst, tmp);
+#else
+ DBUG_ASSERT(swap_func == nullptr);
+#endif
+
+ return UNPACK_SUCCESS;
+}
+
+#if !defined(DBL_EXP_DIG)
+#define DBL_EXP_DIG (sizeof(double) * 8 - DBL_MANT_DIG)
+#endif
+
+/*
+ Function of type rdb_index_field_unpack_t
+
+ Unpack a double by doing the reverse action of change_double_for_sort
+ (sql/filesort.cc). Note that this only works on IEEE values.
+ Note also that this code assumes that NaN and +/-Infinity are never
+ allowed in the database.
+*/
+int Rdb_key_def::unpack_double(
+ Rdb_field_packing *const fpi MY_ATTRIBUTE((__unused__)),
+ Field *const field MY_ATTRIBUTE((__unused__)), uchar *const field_ptr,
+ Rdb_string_reader *const reader,
+ Rdb_string_reader *const unp_reader MY_ATTRIBUTE((__unused__))) {
+ static double zero_val = 0.0;
+ static const uchar zero_pattern[8] = {128, 0, 0, 0, 0, 0, 0, 0};
+
+ return unpack_floating_point(field_ptr, reader, sizeof(double), DBL_EXP_DIG,
+ zero_pattern, (const uchar *)&zero_val,
+ rdb_swap_double_bytes);
+}
+
+#if !defined(FLT_EXP_DIG)
+#define FLT_EXP_DIG (sizeof(float) * 8 - FLT_MANT_DIG)
+#endif
+
+/*
+ Function of type rdb_index_field_unpack_t
+
+ Unpack a float by doing the reverse action of Field_float::make_sort_key
+ (sql/field.cc). Note that this only works on IEEE values.
+ Note also that this code assumes that NaN and +/-Infinity are never
+ allowed in the database.
+*/
+int Rdb_key_def::unpack_float(
+ Rdb_field_packing *const fpi, Field *const field MY_ATTRIBUTE((__unused__)),
+ uchar *const field_ptr, Rdb_string_reader *const reader,
+ Rdb_string_reader *const unp_reader MY_ATTRIBUTE((__unused__))) {
+ static float zero_val = 0.0;
+ static const uchar zero_pattern[4] = {128, 0, 0, 0};
+
+ return unpack_floating_point(field_ptr, reader, sizeof(float), FLT_EXP_DIG,
+ zero_pattern, (const uchar *)&zero_val,
+ rdb_swap_float_bytes);
+}
+
+/*
+ Function of type rdb_index_field_unpack_t used to
+ Unpack by doing the reverse action to Field_newdate::make_sort_key.
+*/
+
+int Rdb_key_def::unpack_newdate(
+ Rdb_field_packing *const fpi, Field *const field MY_ATTRIBUTE((__unused__)),
+ uchar *const field_ptr, Rdb_string_reader *const reader,
+ Rdb_string_reader *const unp_reader MY_ATTRIBUTE((__unused__))) {
+ const char *from;
+ DBUG_ASSERT(fpi->m_max_image_len == 3);
+
+ if (!(from = reader->read(3))) {
+ /* Mem-comparable image doesn't have enough bytes */
+ return UNPACK_FAILURE;
+ }
+
+ field_ptr[0] = from[2];
+ field_ptr[1] = from[1];
+ field_ptr[2] = from[0];
+ return UNPACK_SUCCESS;
+}
+
+/*
+ Function of type rdb_index_field_unpack_t, used to
+ Unpack the string by copying it over.
+ This is for BINARY(n) where the value occupies the whole length.
+*/
+
+int Rdb_key_def::unpack_binary_str(
+ Rdb_field_packing *const fpi, Field *const field, uchar *const to,
+ Rdb_string_reader *const reader,
+ Rdb_string_reader *const unp_reader MY_ATTRIBUTE((__unused__))) {
+ const char *from;
+ if (!(from = reader->read(fpi->m_max_image_len))) {
+ /* Mem-comparable image doesn't have enough bytes */
+ return UNPACK_FAILURE;
+ }
+
+ memcpy(to, from, fpi->m_max_image_len);
+ return UNPACK_SUCCESS;
+}
+
+/*
+ Function of type rdb_index_field_unpack_t.
+ For UTF-8, we need to convert 2-byte wide-character entities back into
+ UTF8 sequences.
+*/
+
+int Rdb_key_def::unpack_utf8_str(
+ Rdb_field_packing *const fpi, Field *const field, uchar *dst,
+ Rdb_string_reader *const reader,
+ Rdb_string_reader *const unp_reader MY_ATTRIBUTE((__unused__))) {
+ my_core::CHARSET_INFO *const cset = (my_core::CHARSET_INFO *)field->charset();
+ const uchar *src;
+ if (!(src = (const uchar *)reader->read(fpi->m_max_image_len))) {
+ /* Mem-comparable image doesn't have enough bytes */
+ return UNPACK_FAILURE;
+ }
+
+ const uchar *const src_end = src + fpi->m_max_image_len;
+ uchar *const dst_end = dst + field->pack_length();
+
+ while (src < src_end) {
+ my_wc_t wc = (src[0] << 8) | src[1];
+ src += 2;
+ int res = cset->cset->wc_mb(cset, wc, dst, dst_end);
+ DBUG_ASSERT(res > 0 && res <= 3);
+ if (res < 0) return UNPACK_FAILURE;
+ dst += res;
+ }
+
+ cset->cset->fill(cset, reinterpret_cast<char *>(dst), dst_end - dst,
+ cset->pad_char);
+ return UNPACK_SUCCESS;
+}
+
+/*
+ This is the original algorithm to encode a variable binary field. It
+ sets a flag byte every Nth byte. The flag value is (255 - #pad) where
+ #pad is the number of padding bytes that were needed (0 if all N-1
+ bytes were used).
+
+ If N=8 and the field is:
+ * 3 bytes (1, 2, 3) this is encoded as: 1, 2, 3, 0, 0, 0, 0, 251
+ * 4 bytes (1, 2, 3, 0) this is encoded as: 1, 2, 3, 0, 0, 0, 0, 252
+ And the 4 byte string compares as greater than the 3 byte string
+
+ Unfortunately the algorithm has a flaw. If the input is exactly a
+ multiple of N-1, an extra N bytes are written. Since we usually use
+ N=9, an 8 byte input will generate 18 bytes of output instead of the
+ 9 bytes of output that is optimal.
+
+ See pack_variable_format for the newer algorithm.
+*/
+void Rdb_key_def::pack_legacy_variable_format(
+ const uchar *src, // The data to encode
+ size_t src_len, // The length of the data to encode
+ uchar **dst) // The location to encode the data
+{
+ size_t copy_len;
+ size_t padding_bytes;
+ uchar *ptr = *dst;
+
+ do {
+ copy_len = std::min((size_t)RDB_LEGACY_ESCAPE_LENGTH - 1, src_len);
+ padding_bytes = RDB_LEGACY_ESCAPE_LENGTH - 1 - copy_len;
+ memcpy(ptr, src, copy_len);
+ ptr += copy_len;
+ src += copy_len;
+ // pad with zeros if necessary
+ if (padding_bytes > 0) {
+ memset(ptr, 0, padding_bytes);
+ ptr += padding_bytes;
+ }
+
+ *(ptr++) = 255 - padding_bytes;
+
+ src_len -= copy_len;
+ } while (padding_bytes == 0);
+
+ *dst = ptr;
+}
+
+/*
+ This is the new algorithm. Similarly to the legacy format the input
+ is split up into N-1 bytes and a flag byte is used as the Nth byte
+ in the output.
+
+ - If the previous segment needed any padding the flag is set to the
+ number of bytes used (0..N-2). 0 is possible in the first segment
+ if the input is 0 bytes long.
+ - If no padding was used and there is no more data left in the input
+ the flag is set to N-1
+ - If no padding was used and there is still data left in the input the
+ flag is set to N.
+
+ For N=9, the following input values encode to the specified
+ outout (where 'X' indicates a byte of the original input):
+ - 0 bytes is encoded as 0 0 0 0 0 0 0 0 0
+ - 1 byte is encoded as X 0 0 0 0 0 0 0 1
+ - 2 bytes is encoded as X X 0 0 0 0 0 0 2
+ - 7 bytes is encoded as X X X X X X X 0 7
+ - 8 bytes is encoded as X X X X X X X X 8
+ - 9 bytes is encoded as X X X X X X X X 9 X 0 0 0 0 0 0 0 1
+ - 10 bytes is encoded as X X X X X X X X 9 X X 0 0 0 0 0 0 2
+*/
+void Rdb_key_def::pack_variable_format(
+ const uchar *src, // The data to encode
+ size_t src_len, // The length of the data to encode
+ uchar **dst) // The location to encode the data
+{
+ uchar *ptr = *dst;
+
+ for (;;) {
+ // Figure out how many bytes to copy, copy them and adjust pointers
+ const size_t copy_len = std::min((size_t)RDB_ESCAPE_LENGTH - 1, src_len);
+ memcpy(ptr, src, copy_len);
+ ptr += copy_len;
+ src += copy_len;
+ src_len -= copy_len;
+
+ // Are we at the end of the input?
+ if (src_len == 0) {
+ // pad with zeros if necessary;
+ const size_t padding_bytes = RDB_ESCAPE_LENGTH - 1 - copy_len;
+ if (padding_bytes > 0) {
+ memset(ptr, 0, padding_bytes);
+ ptr += padding_bytes;
+ }
+
+ // Put the flag byte (0 - N-1) in the output
+ *(ptr++) = (uchar)copy_len;
+ break;
+ }
+
+ // We have more data - put the flag byte (N) in and continue
+ *(ptr++) = RDB_ESCAPE_LENGTH;
+ }
+
+ *dst = ptr;
+}
+
+/*
+ Function of type rdb_index_field_pack_t
+*/
+
+void Rdb_key_def::pack_with_varchar_encoding(
+ Rdb_field_packing *const fpi, Field *const field, uchar *buf, uchar **dst,
+ Rdb_pack_field_context *const pack_ctx MY_ATTRIBUTE((__unused__))) {
+ const CHARSET_INFO *const charset = field->charset();
+ Field_varstring *const field_var = (Field_varstring *)field;
+
+ const size_t value_length = (field_var->length_bytes == 1)
+ ? (uint)*field->ptr
+ : uint2korr(field->ptr);
+ size_t xfrm_len = charset->coll->strnxfrm(
+ charset, buf, fpi->m_max_image_len, field_var->char_length(),
+ field_var->ptr + field_var->length_bytes, value_length, 0);
+
+ /* Got a mem-comparable image in 'buf'. Now, produce varlength encoding */
+ if (fpi->m_use_legacy_varbinary_format) {
+ pack_legacy_variable_format(buf, xfrm_len, dst);
+ } else {
+ pack_variable_format(buf, xfrm_len, dst);
+ }
+}
+
+/*
+ Compare the string in [buf..buf_end) with a string that is an infinite
+ sequence of strings in space_xfrm
+*/
+
+static int rdb_compare_string_with_spaces(
+ const uchar *buf, const uchar *const buf_end,
+ const std::vector<uchar> *const space_xfrm) {
+ int cmp = 0;
+ while (buf < buf_end) {
+ size_t bytes = std::min((size_t)(buf_end - buf), space_xfrm->size());
+ if ((cmp = memcmp(buf, space_xfrm->data(), bytes)) != 0) break;
+ buf += bytes;
+ }
+ return cmp;
+}
+
+static const int RDB_TRIMMED_CHARS_OFFSET = 8;
+/*
+ Pack the data with Variable-Length Space-Padded Encoding.
+
+ The encoding is there to meet two goals:
+
+ Goal#1. Comparison. The SQL standard says
+
+ " If the collation for the comparison has the PAD SPACE characteristic,
+ for the purposes of the comparison, the shorter value is effectively
+ extended to the length of the longer by concatenation of <space>s on the
+ right.
+
+ At the moment, all MySQL collations except one have the PAD SPACE
+ characteristic. The exception is the "binary" collation that is used by
+ [VAR]BINARY columns. (Note that binary collations for specific charsets,
+ like utf8_bin or latin1_bin are not the same as "binary" collation, they have
+ the PAD SPACE characteristic).
+
+ Goal#2 is to preserve the number of trailing spaces in the original value.
+
+ This is achieved by using the following encoding:
+ The key part:
+ - Stores mem-comparable image of the column
+ - It is stored in chunks of fpi->m_segment_size bytes (*)
+ = If the remainder of the chunk is not occupied, it is padded with mem-
+ comparable image of the space character (cs->pad_char to be precise).
+ - The last byte of the chunk shows how the rest of column's mem-comparable
+ image would compare to mem-comparable image of the column extended with
+ spaces. There are three possible values.
+ - VARCHAR_CMP_LESS_THAN_SPACES,
+ - VARCHAR_CMP_EQUAL_TO_SPACES
+ - VARCHAR_CMP_GREATER_THAN_SPACES
+
+ VARCHAR_CMP_EQUAL_TO_SPACES means that this chunk is the last one (the rest
+ is spaces, or something that sorts as spaces, so there is no reason to store
+ it).
+
+ Example: if fpi->m_segment_size=5, and the collation is latin1_bin:
+
+ 'abcd\0' => [ 'abcd' <VARCHAR_CMP_LESS> ]['\0 ' <VARCHAR_CMP_EQUAL> ]
+ 'abcd' => [ 'abcd' <VARCHAR_CMP_EQUAL>]
+ 'abcd ' => [ 'abcd' <VARCHAR_CMP_EQUAL>]
+ 'abcdZZZZ' => [ 'abcd' <VARCHAR_CMP_GREATER>][ 'ZZZZ' <VARCHAR_CMP_EQUAL>]
+
+ As mentioned above, the last chunk is padded with mem-comparable images of
+ cs->pad_char. It can be 1-byte long (latin1), 2 (utf8_bin), 3 (utf8mb4), etc.
+
+ fpi->m_segment_size depends on the used collation. It is chosen to be such
+ that no mem-comparable image of space will ever stretch across the segments
+ (see get_segment_size_from_collation).
+
+ == The value part (aka unpack_info) ==
+ The value part stores the number of space characters that one needs to add
+ when unpacking the string.
+ - If the number is positive, it means add this many spaces at the end
+ - If the number is negative, it means padding has added extra spaces which
+ must be removed.
+
+ Storage considerations
+ - depending on column's max size, the number may occupy 1 or 2 bytes
+ - the number of spaces that need to be removed is not more than
+ RDB_TRIMMED_CHARS_OFFSET=8, so we offset the number by that value and
+ then store it as unsigned.
+
+ @seealso
+ unpack_binary_or_utf8_varchar_space_pad
+ unpack_simple_varchar_space_pad
+ dummy_make_unpack_info
+ skip_variable_space_pad
+*/
+
+void Rdb_key_def::pack_with_varchar_space_pad(
+ Rdb_field_packing *const fpi, Field *const field, uchar *buf, uchar **dst,
+ Rdb_pack_field_context *const pack_ctx) {
+ Rdb_string_writer *const unpack_info = pack_ctx->writer;
+ const CHARSET_INFO *const charset = field->charset();
+ const auto field_var = static_cast<Field_varstring *>(field);
+
+ const size_t value_length = (field_var->length_bytes == 1)
+ ? (uint)*field->ptr
+ : uint2korr(field->ptr);
+
+ const size_t trimmed_len = charset->cset->lengthsp(
+ charset, (const char *)field_var->ptr + field_var->length_bytes,
+ value_length);
+ const size_t xfrm_len = charset->coll->strnxfrm(
+ charset, buf, fpi->m_max_image_len, field_var->char_length(),
+ field_var->ptr + field_var->length_bytes, trimmed_len, 0);
+
+ /* Got a mem-comparable image in 'buf'. Now, produce varlength encoding */
+ uchar *const buf_end = buf + xfrm_len;
+
+ size_t encoded_size = 0;
+ uchar *ptr = *dst;
+ size_t padding_bytes;
+ while (true) {
+ const size_t copy_len =
+ std::min<size_t>(fpi->m_segment_size - 1, buf_end - buf);
+ padding_bytes = fpi->m_segment_size - 1 - copy_len;
+ memcpy(ptr, buf, copy_len);
+ ptr += copy_len;
+ buf += copy_len;
+
+ if (padding_bytes) {
+ memcpy(ptr, fpi->space_xfrm->data(), padding_bytes);
+ ptr += padding_bytes;
+ *ptr = VARCHAR_CMP_EQUAL_TO_SPACES; // last segment
+ } else {
+ // Compare the string suffix with a hypothetical infinite string of
+ // spaces. It could be that the first difference is beyond the end of
+ // current chunk.
+ const int cmp =
+ rdb_compare_string_with_spaces(buf, buf_end, fpi->space_xfrm);
+
+ if (cmp < 0) {
+ *ptr = VARCHAR_CMP_LESS_THAN_SPACES;
+ } else if (cmp > 0) {
+ *ptr = VARCHAR_CMP_GREATER_THAN_SPACES;
+ } else {
+ // It turns out all the rest are spaces.
+ *ptr = VARCHAR_CMP_EQUAL_TO_SPACES;
+ }
+ }
+ encoded_size += fpi->m_segment_size;
+
+ if (*(ptr++) == VARCHAR_CMP_EQUAL_TO_SPACES) break;
+ }
+
+ // m_unpack_info_stores_value means unpack_info stores the whole original
+ // value. There is no need to store the number of trimmed/padded endspaces
+ // in that case.
+ if (unpack_info && !fpi->m_unpack_info_stores_value) {
+ // (value_length - trimmed_len) is the number of trimmed space *characters*
+ // then, padding_bytes is the number of *bytes* added as padding
+ // then, we add 8, because we don't store negative values.
+ DBUG_ASSERT(padding_bytes % fpi->space_xfrm_len == 0);
+ DBUG_ASSERT((value_length - trimmed_len) % fpi->space_mb_len == 0);
+ const size_t removed_chars =
+ RDB_TRIMMED_CHARS_OFFSET +
+ (value_length - trimmed_len) / fpi->space_mb_len -
+ padding_bytes / fpi->space_xfrm_len;
+
+ if (fpi->m_unpack_info_uses_two_bytes) {
+ unpack_info->write_uint16(removed_chars);
+ } else {
+ DBUG_ASSERT(removed_chars < 0x100);
+ unpack_info->write_uint8(removed_chars);
+ }
+ }
+
+ *dst += encoded_size;
+}
+
+/*
+ Calculate the number of used bytes in the chunk and whether this is the
+ last chunk in the input. This is based on the old legacy format - see
+ pack_legacy_variable_format.
+ */
+uint Rdb_key_def::calc_unpack_legacy_variable_format(uchar flag, bool *done) {
+ uint pad = 255 - flag;
+ uint used_bytes = RDB_LEGACY_ESCAPE_LENGTH - 1 - pad;
+ if (used_bytes > RDB_LEGACY_ESCAPE_LENGTH - 1) {
+ return (uint)-1;
+ }
+
+ *done = used_bytes < RDB_LEGACY_ESCAPE_LENGTH - 1;
+ return used_bytes;
+}
+
+/*
+ Calculate the number of used bytes in the chunk and whether this is the
+ last chunk in the input. This is based on the new format - see
+ pack_variable_format.
+ */
+uint Rdb_key_def::calc_unpack_variable_format(uchar flag, bool *done) {
+ // Check for invalid flag values
+ if (flag > RDB_ESCAPE_LENGTH) {
+ return (uint)-1;
+ }
+
+ // Values from 1 to N-1 indicate this is the last chunk and that is how
+ // many bytes were used
+ if (flag < RDB_ESCAPE_LENGTH) {
+ *done = true;
+ return flag;
+ }
+
+ // A value of N means we used N-1 bytes and had more to go
+ *done = false;
+ return RDB_ESCAPE_LENGTH - 1;
+}
+
+/*
+ Unpack data that has charset information. Each two bytes of the input is
+ treated as a wide-character and converted to its multibyte equivalent in
+ the output.
+ */
+static int unpack_charset(
+ const CHARSET_INFO *cset, // character set information
+ const uchar *src, // source data to unpack
+ uint src_len, // length of source data
+ uchar *dst, // destination of unpacked data
+ uint dst_len, // length of destination data
+ uint *used_bytes) // output number of bytes used
+{
+ if (src_len & 1) {
+ /*
+ UTF-8 characters are encoded into two-byte entities. There is no way
+ we can have an odd number of bytes after encoding.
+ */
+ return UNPACK_FAILURE;
+ }
+
+ uchar *dst_end = dst + dst_len;
+ uint used = 0;
+
+ for (uint ii = 0; ii < src_len; ii += 2) {
+ my_wc_t wc = (src[ii] << 8) | src[ii + 1];
+ int res = cset->cset->wc_mb(cset, wc, dst + used, dst_end);
+ DBUG_ASSERT(res > 0 && res <= 3);
+ if (res < 0) {
+ return UNPACK_FAILURE;
+ }
+
+ used += res;
+ }
+
+ *used_bytes = used;
+ return UNPACK_SUCCESS;
+}
+
+/*
+ Function of type rdb_index_field_unpack_t
+*/
+
+int Rdb_key_def::unpack_binary_or_utf8_varchar(
+ Rdb_field_packing *const fpi, Field *const field, uchar *dst,
+ Rdb_string_reader *const reader,
+ Rdb_string_reader *const unp_reader MY_ATTRIBUTE((__unused__))) {
+ const uchar *ptr;
+ size_t len = 0;
+ bool finished = false;
+ uchar *d0 = dst;
+ Field_varstring *const field_var = (Field_varstring *)field;
+ dst += field_var->length_bytes;
+ // How much we can unpack
+ size_t dst_len = field_var->pack_length() - field_var->length_bytes;
+
+ bool use_legacy_format = fpi->m_use_legacy_varbinary_format;
+
+ /* Decode the length-emitted encoding here */
+ while ((ptr = (const uchar *)reader->read(RDB_ESCAPE_LENGTH))) {
+ uint used_bytes;
+
+ /* See pack_with_varchar_encoding. */
+ if (use_legacy_format) {
+ used_bytes = calc_unpack_legacy_variable_format(
+ ptr[RDB_ESCAPE_LENGTH - 1], &finished);
+ } else {
+ used_bytes =
+ calc_unpack_variable_format(ptr[RDB_ESCAPE_LENGTH - 1], &finished);
+ }
+
+ if (used_bytes == (uint)-1 || dst_len < used_bytes) {
+ return UNPACK_FAILURE; // Corruption in the data
+ }
+
+ /*
+ Now, we need to decode used_bytes of data and append them to the value.
+ */
+ if (fpi->m_varchar_charset->number == COLLATION_UTF8_BIN) {
+ int err = unpack_charset(fpi->m_varchar_charset, ptr, used_bytes, dst,
+ dst_len, &used_bytes);
+ if (err != UNPACK_SUCCESS) {
+ return err;
+ }
+ } else {
+ memcpy(dst, ptr, used_bytes);
+ }
+
+ dst += used_bytes;
+ dst_len -= used_bytes;
+ len += used_bytes;
+
+ if (finished) {
+ break;
+ }
+ }
+
+ if (!finished) {
+ return UNPACK_FAILURE;
+ }
+
+ /* Save the length */
+ if (field_var->length_bytes == 1) {
+ d0[0] = (uchar)len;
+ } else {
+ DBUG_ASSERT(field_var->length_bytes == 2);
+ int2store(d0, len);
+ }
+ return UNPACK_SUCCESS;
+}
+
+/*
+ @seealso
+ pack_with_varchar_space_pad - packing function
+ unpack_simple_varchar_space_pad - unpacking function for 'simple'
+ charsets.
+ skip_variable_space_pad - skip function
+*/
+int Rdb_key_def::unpack_binary_or_utf8_varchar_space_pad(
+ Rdb_field_packing *const fpi, Field *const field, uchar *dst,
+ Rdb_string_reader *const reader, Rdb_string_reader *const unp_reader) {
+ const uchar *ptr;
+ size_t len = 0;
+ bool finished = false;
+ Field_varstring *const field_var = static_cast<Field_varstring *>(field);
+ uchar *d0 = dst;
+ uchar *dst_end = dst + field_var->pack_length();
+ dst += field_var->length_bytes;
+
+ uint space_padding_bytes = 0;
+ uint extra_spaces;
+ if ((fpi->m_unpack_info_uses_two_bytes
+ ? unp_reader->read_uint16(&extra_spaces)
+ : unp_reader->read_uint8(&extra_spaces))) {
+ return UNPACK_FAILURE;
+ }
+
+ if (extra_spaces <= RDB_TRIMMED_CHARS_OFFSET) {
+ space_padding_bytes =
+ -(static_cast<int>(extra_spaces) - RDB_TRIMMED_CHARS_OFFSET);
+ extra_spaces = 0;
+ } else {
+ extra_spaces -= RDB_TRIMMED_CHARS_OFFSET;
+ }
+
+ space_padding_bytes *= fpi->space_xfrm_len;
+
+ /* Decode the length-emitted encoding here */
+ while ((ptr = (const uchar *)reader->read(fpi->m_segment_size))) {
+ const char last_byte = ptr[fpi->m_segment_size - 1];
+ size_t used_bytes;
+ if (last_byte == VARCHAR_CMP_EQUAL_TO_SPACES) // this is the last segment
+ {
+ if (space_padding_bytes > (fpi->m_segment_size - 1)) {
+ return UNPACK_FAILURE; // Cannot happen, corrupted data
+ }
+ used_bytes = (fpi->m_segment_size - 1) - space_padding_bytes;
+ finished = true;
+ } else {
+ if (last_byte != VARCHAR_CMP_LESS_THAN_SPACES &&
+ last_byte != VARCHAR_CMP_GREATER_THAN_SPACES) {
+ return UNPACK_FAILURE; // Invalid value
+ }
+ used_bytes = fpi->m_segment_size - 1;
+ }
+
+ // Now, need to decode used_bytes of data and append them to the value.
+ if (fpi->m_varchar_charset->number == COLLATION_UTF8_BIN) {
+ if (used_bytes & 1) {
+ /*
+ UTF-8 characters are encoded into two-byte entities. There is no way
+ we can have an odd number of bytes after encoding.
+ */
+ return UNPACK_FAILURE;
+ }
+
+ const uchar *src = ptr;
+ const uchar *const src_end = ptr + used_bytes;
+ while (src < src_end) {
+ my_wc_t wc = (src[0] << 8) | src[1];
+ src += 2;
+ const CHARSET_INFO *cset = fpi->m_varchar_charset;
+ int res = cset->cset->wc_mb(cset, wc, dst, dst_end);
+ DBUG_ASSERT(res <= 3);
+ if (res <= 0) return UNPACK_FAILURE;
+ dst += res;
+ len += res;
+ }
+ } else {
+ if (dst + used_bytes > dst_end) return UNPACK_FAILURE;
+ memcpy(dst, ptr, used_bytes);
+ dst += used_bytes;
+ len += used_bytes;
+ }
+
+ if (finished) {
+ if (extra_spaces) {
+ // Both binary and UTF-8 charset store space as ' ',
+ // so the following is ok:
+ if (dst + extra_spaces > dst_end) return UNPACK_FAILURE;
+ memset(dst, fpi->m_varchar_charset->pad_char, extra_spaces);
+ len += extra_spaces;
+ }
+ break;
+ }
+ }
+
+ if (!finished) return UNPACK_FAILURE;
+
+ /* Save the length */
+ if (field_var->length_bytes == 1) {
+ d0[0] = (uchar)len;
+ } else {
+ DBUG_ASSERT(field_var->length_bytes == 2);
+ int2store(d0, len);
+ }
+ return UNPACK_SUCCESS;
+}
+
+/////////////////////////////////////////////////////////////////////////
+
+/*
+ Function of type rdb_make_unpack_info_t
+*/
+
+void Rdb_key_def::make_unpack_unknown(
+ const Rdb_collation_codec *codec MY_ATTRIBUTE((__unused__)),
+ const Field *const field, Rdb_pack_field_context *const pack_ctx) {
+ pack_ctx->writer->write(field->ptr, field->pack_length());
+}
+
+/*
+ This point of this function is only to indicate that unpack_info is
+ available.
+
+ The actual unpack_info data is produced by the function that packs the key,
+ that is, pack_with_varchar_space_pad.
+*/
+
+void Rdb_key_def::dummy_make_unpack_info(
+ const Rdb_collation_codec *codec MY_ATTRIBUTE((__unused__)),
+ const Field *field MY_ATTRIBUTE((__unused__)),
+ Rdb_pack_field_context *pack_ctx MY_ATTRIBUTE((__unused__))) {
+ // Do nothing
+}
+
+/*
+ Function of type rdb_index_field_unpack_t
+*/
+
+int Rdb_key_def::unpack_unknown(Rdb_field_packing *const fpi,
+ Field *const field, uchar *const dst,
+ Rdb_string_reader *const reader,
+ Rdb_string_reader *const unp_reader) {
+ const uchar *ptr;
+ const uint len = fpi->m_unpack_data_len;
+ // We don't use anything from the key, so skip over it.
+ if (skip_max_length(fpi, field, reader)) {
+ return UNPACK_FAILURE;
+ }
+
+ DBUG_ASSERT_IMP(len > 0, unp_reader != nullptr);
+
+ if ((ptr = (const uchar *)unp_reader->read(len))) {
+ memcpy(dst, ptr, len);
+ return UNPACK_SUCCESS;
+ }
+ return UNPACK_FAILURE;
+}
+
+/*
+ Function of type rdb_make_unpack_info_t
+*/
+
+void Rdb_key_def::make_unpack_unknown_varchar(
+ const Rdb_collation_codec *const codec MY_ATTRIBUTE((__unused__)),
+ const Field *const field, Rdb_pack_field_context *const pack_ctx) {
+ const auto f = static_cast<const Field_varstring *>(field);
+ uint len = f->length_bytes == 1 ? (uint)*f->ptr : uint2korr(f->ptr);
+ len += f->length_bytes;
+ pack_ctx->writer->write(field->ptr, len);
+}
+
+/*
+ Function of type rdb_index_field_unpack_t
+
+ @detail
+ Unpack a key part in an "unknown" collation from its
+ (mem_comparable_form, unpack_info) form.
+
+ "Unknown" means we have no clue about how mem_comparable_form is made from
+ the original string, so we keep the whole original string in the unpack_info.
+
+ @seealso
+ make_unpack_unknown, unpack_unknown
+*/
+
+int Rdb_key_def::unpack_unknown_varchar(Rdb_field_packing *const fpi,
+ Field *const field, uchar *dst,
+ Rdb_string_reader *const reader,
+ Rdb_string_reader *const unp_reader) {
+ const uchar *ptr;
+ uchar *const d0 = dst;
+ const auto f = static_cast<Field_varstring *>(field);
+ dst += f->length_bytes;
+ const uint len_bytes = f->length_bytes;
+ // We don't use anything from the key, so skip over it.
+ if ((fpi->m_skip_func)(fpi, field, reader)) {
+ return UNPACK_FAILURE;
+ }
+
+ DBUG_ASSERT(len_bytes > 0);
+ DBUG_ASSERT(unp_reader != nullptr);
+
+ if ((ptr = (const uchar *)unp_reader->read(len_bytes))) {
+ memcpy(d0, ptr, len_bytes);
+ const uint len = len_bytes == 1 ? (uint)*ptr : uint2korr(ptr);
+ if ((ptr = (const uchar *)unp_reader->read(len))) {
+ memcpy(dst, ptr, len);
+ return UNPACK_SUCCESS;
+ }
+ }
+ return UNPACK_FAILURE;
+}
+
+/*
+ Write unpack_data for a "simple" collation
+*/
+static void rdb_write_unpack_simple(Rdb_bit_writer *const writer,
+ const Rdb_collation_codec *const codec,
+ const uchar *const src,
+ const size_t src_len) {
+ for (uint i = 0; i < src_len; i++) {
+ writer->write(codec->m_enc_size[src[i]], codec->m_enc_idx[src[i]]);
+ }
+}
+
+static uint rdb_read_unpack_simple(Rdb_bit_reader *const reader,
+ const Rdb_collation_codec *const codec,
+ const uchar *const src, const size_t src_len,
+ uchar *const dst) {
+ for (uint i = 0; i < src_len; i++) {
+ if (codec->m_dec_size[src[i]] > 0) {
+ uint *ret;
+ DBUG_ASSERT(reader != nullptr);
+
+ if ((ret = reader->read(codec->m_dec_size[src[i]])) == nullptr) {
+ return UNPACK_FAILURE;
+ }
+ dst[i] = codec->m_dec_idx[*ret][src[i]];
+ } else {
+ dst[i] = codec->m_dec_idx[0][src[i]];
+ }
+ }
+
+ return UNPACK_SUCCESS;
+}
+
+/*
+ Function of type rdb_make_unpack_info_t
+
+ @detail
+ Make unpack_data for VARCHAR(n) in a "simple" charset.
+*/
+
+void Rdb_key_def::make_unpack_simple_varchar(
+ const Rdb_collation_codec *const codec, const Field *const field,
+ Rdb_pack_field_context *const pack_ctx) {
+ const auto f = static_cast<const Field_varstring *>(field);
+ uchar *const src = f->ptr + f->length_bytes;
+ const size_t src_len =
+ f->length_bytes == 1 ? (uint)*f->ptr : uint2korr(f->ptr);
+ Rdb_bit_writer bit_writer(pack_ctx->writer);
+ // The std::min compares characters with bytes, but for simple collations,
+ // mbmaxlen = 1.
+ rdb_write_unpack_simple(&bit_writer, codec, src,
+ std::min((size_t)f->char_length(), src_len));
+}
+
+/*
+ Function of type rdb_index_field_unpack_t
+
+ @seealso
+ pack_with_varchar_space_pad - packing function
+ unpack_binary_or_utf8_varchar_space_pad - a similar unpacking function
+*/
+
+int Rdb_key_def::unpack_simple_varchar_space_pad(
+ Rdb_field_packing *const fpi, Field *const field, uchar *dst,
+ Rdb_string_reader *const reader, Rdb_string_reader *const unp_reader) {
+ const uchar *ptr;
+ size_t len = 0;
+ bool finished = false;
+ uchar *d0 = dst;
+ const Field_varstring *const field_var =
+ static_cast<Field_varstring *>(field);
+ // For simple collations, char_length is also number of bytes.
+ DBUG_ASSERT((size_t)fpi->m_max_image_len >= field_var->char_length());
+ uchar *dst_end = dst + field_var->pack_length();
+ dst += field_var->length_bytes;
+ Rdb_bit_reader bit_reader(unp_reader);
+
+ uint space_padding_bytes = 0;
+ uint extra_spaces;
+ DBUG_ASSERT(unp_reader != nullptr);
+
+ if ((fpi->m_unpack_info_uses_two_bytes
+ ? unp_reader->read_uint16(&extra_spaces)
+ : unp_reader->read_uint8(&extra_spaces))) {
+ return UNPACK_FAILURE;
+ }
+
+ if (extra_spaces <= 8) {
+ space_padding_bytes = -(static_cast<int>(extra_spaces) - 8);
+ extra_spaces = 0;
+ } else {
+ extra_spaces -= 8;
+ }
+
+ space_padding_bytes *= fpi->space_xfrm_len;
+
+ /* Decode the length-emitted encoding here */
+ while ((ptr = (const uchar *)reader->read(fpi->m_segment_size))) {
+ const char last_byte =
+ ptr[fpi->m_segment_size - 1]; // number of padding bytes
+ size_t used_bytes;
+ if (last_byte == VARCHAR_CMP_EQUAL_TO_SPACES) {
+ // this is the last one
+ if (space_padding_bytes > (fpi->m_segment_size - 1)) {
+ return UNPACK_FAILURE; // Cannot happen, corrupted data
+ }
+ used_bytes = (fpi->m_segment_size - 1) - space_padding_bytes;
+ finished = true;
+ } else {
+ if (last_byte != VARCHAR_CMP_LESS_THAN_SPACES &&
+ last_byte != VARCHAR_CMP_GREATER_THAN_SPACES) {
+ return UNPACK_FAILURE;
+ }
+ used_bytes = fpi->m_segment_size - 1;
+ }
+
+ if (dst + used_bytes > dst_end) {
+ // The value on disk is longer than the field definition allows?
+ return UNPACK_FAILURE;
+ }
+
+ uint ret;
+ if ((ret = rdb_read_unpack_simple(&bit_reader, fpi->m_charset_codec, ptr,
+ used_bytes, dst)) != UNPACK_SUCCESS) {
+ return ret;
+ }
+
+ dst += used_bytes;
+ len += used_bytes;
+
+ if (finished) {
+ if (extra_spaces) {
+ if (dst + extra_spaces > dst_end) return UNPACK_FAILURE;
+ // pad_char has a 1-byte form in all charsets that
+ // are handled by rdb_init_collation_mapping.
+ memset(dst, field_var->charset()->pad_char, extra_spaces);
+ len += extra_spaces;
+ }
+ break;
+ }
+ }
+
+ if (!finished) return UNPACK_FAILURE;
+
+ /* Save the length */
+ if (field_var->length_bytes == 1) {
+ d0[0] = (uchar)len;
+ } else {
+ DBUG_ASSERT(field_var->length_bytes == 2);
+ int2store(d0, len);
+ }
+ return UNPACK_SUCCESS;
+}
+
+/*
+ Function of type rdb_make_unpack_info_t
+
+ @detail
+ Make unpack_data for CHAR(n) value in a "simple" charset.
+ It is CHAR(N), so SQL layer has padded the value with spaces up to N chars.
+
+ @seealso
+ The VARCHAR variant is in make_unpack_simple_varchar
+*/
+
+void Rdb_key_def::make_unpack_simple(const Rdb_collation_codec *const codec,
+ const Field *const field,
+ Rdb_pack_field_context *const pack_ctx) {
+ const uchar *const src = field->ptr;
+ Rdb_bit_writer bit_writer(pack_ctx->writer);
+ rdb_write_unpack_simple(&bit_writer, codec, src, field->pack_length());
+}
+
+/*
+ Function of type rdb_index_field_unpack_t
+*/
+
+int Rdb_key_def::unpack_simple(Rdb_field_packing *const fpi,
+ Field *const field MY_ATTRIBUTE((__unused__)),
+ uchar *const dst,
+ Rdb_string_reader *const reader,
+ Rdb_string_reader *const unp_reader) {
+ const uchar *ptr;
+ const uint len = fpi->m_max_image_len;
+ Rdb_bit_reader bit_reader(unp_reader);
+
+ if (!(ptr = (const uchar *)reader->read(len))) {
+ return UNPACK_FAILURE;
+ }
+
+ return rdb_read_unpack_simple(unp_reader ? &bit_reader : nullptr,
+ fpi->m_charset_codec, ptr, len, dst);
+}
+
+// See Rdb_charset_space_info::spaces_xfrm
+const int RDB_SPACE_XFRM_SIZE = 32;
+
+// A class holding information about how space character is represented in a
+// charset.
+class Rdb_charset_space_info {
+ public:
+ Rdb_charset_space_info(const Rdb_charset_space_info &) = delete;
+ Rdb_charset_space_info &operator=(const Rdb_charset_space_info &) = delete;
+ Rdb_charset_space_info() = default;
+
+ // A few strxfrm'ed space characters, at least RDB_SPACE_XFRM_SIZE bytes
+ std::vector<uchar> spaces_xfrm;
+
+ // length(strxfrm(' '))
+ size_t space_xfrm_len;
+
+ // length of the space character itself
+ // Typically space is just 0x20 (length=1) but in ucs2 it is 0x00 0x20
+ // (length=2)
+ size_t space_mb_len;
+};
+
+static std::array<std::unique_ptr<Rdb_charset_space_info>, MY_ALL_CHARSETS_SIZE>
+ rdb_mem_comparable_space;
+
+/*
+ @brief
+ For a given charset, get
+ - strxfrm(' '), a sample that is at least RDB_SPACE_XFRM_SIZE bytes long.
+ - length of strxfrm(charset, ' ')
+ - length of the space character in the charset
+
+ @param cs IN Charset to get the space for
+ @param ptr OUT A few space characters
+ @param len OUT Return length of the space (in bytes)
+
+ @detail
+ It is tempting to pre-generate mem-comparable form of space character for
+ every charset on server startup.
+ One can't do that: some charsets are not initialized until somebody
+ attempts to use them (e.g. create or open a table that has a field that
+ uses the charset).
+*/
+
+static void rdb_get_mem_comparable_space(const CHARSET_INFO *const cs,
+ const std::vector<uchar> **xfrm,
+ size_t *const xfrm_len,
+ size_t *const mb_len) {
+ DBUG_ASSERT(cs->number < MY_ALL_CHARSETS_SIZE);
+ if (!rdb_mem_comparable_space[cs->number].get()) {
+ RDB_MUTEX_LOCK_CHECK(rdb_mem_cmp_space_mutex);
+ if (!rdb_mem_comparable_space[cs->number].get()) {
+ // Upper bound of how many bytes can be occupied by multi-byte form of a
+ // character in any charset.
+ const int MAX_MULTI_BYTE_CHAR_SIZE = 4;
+ DBUG_ASSERT(cs->mbmaxlen <= MAX_MULTI_BYTE_CHAR_SIZE);
+
+ // multi-byte form of the ' ' (space) character
+ uchar space_mb[MAX_MULTI_BYTE_CHAR_SIZE];
+
+ const size_t space_mb_len = cs->cset->wc_mb(
+ cs, (my_wc_t)cs->pad_char, space_mb, space_mb + sizeof(space_mb));
+
+ // mem-comparable image of the space character
+ std::array<uchar, 20> space;
+
+ const size_t space_len = cs->coll->strnxfrm(
+ cs, space.data(), sizeof(space), 1, space_mb, space_mb_len, 0);
+ Rdb_charset_space_info *const info = new Rdb_charset_space_info;
+ info->space_xfrm_len = space_len;
+ info->space_mb_len = space_mb_len;
+ while (info->spaces_xfrm.size() < RDB_SPACE_XFRM_SIZE) {
+ info->spaces_xfrm.insert(info->spaces_xfrm.end(), space.data(),
+ space.data() + space_len);
+ }
+ rdb_mem_comparable_space[cs->number].reset(info);
+ }
+ RDB_MUTEX_UNLOCK_CHECK(rdb_mem_cmp_space_mutex);
+ }
+
+ *xfrm = &rdb_mem_comparable_space[cs->number]->spaces_xfrm;
+ *xfrm_len = rdb_mem_comparable_space[cs->number]->space_xfrm_len;
+ *mb_len = rdb_mem_comparable_space[cs->number]->space_mb_len;
+}
+
+mysql_mutex_t rdb_mem_cmp_space_mutex;
+
+std::array<const Rdb_collation_codec *, MY_ALL_CHARSETS_SIZE>
+ rdb_collation_data;
+mysql_mutex_t rdb_collation_data_mutex;
+
+bool rdb_is_collation_supported(const my_core::CHARSET_INFO *const cs) {
+ return cs->strxfrm_multiply==1 && cs->mbmaxlen == 1 &&
+ !(cs->state & (MY_CS_BINSORT | MY_CS_NOPAD));
+}
+
+static const Rdb_collation_codec *rdb_init_collation_mapping(
+ const my_core::CHARSET_INFO *const cs) {
+ DBUG_ASSERT(cs && cs->state & MY_CS_AVAILABLE);
+ const Rdb_collation_codec *codec = rdb_collation_data[cs->number];
+
+ if (codec == nullptr && rdb_is_collation_supported(cs)) {
+ RDB_MUTEX_LOCK_CHECK(rdb_collation_data_mutex);
+
+ codec = rdb_collation_data[cs->number];
+ if (codec == nullptr) {
+ Rdb_collation_codec *cur = nullptr;
+
+ // Compute reverse mapping for simple collations.
+ if (rdb_is_collation_supported(cs)) {
+ cur = new Rdb_collation_codec;
+ std::map<uchar, std::vector<uchar>> rev_map;
+ size_t max_conflict_size = 0;
+ for (int src = 0; src < 256; src++) {
+ uchar dst = cs->sort_order[src];
+ rev_map[dst].push_back(src);
+ max_conflict_size = std::max(max_conflict_size, rev_map[dst].size());
+ }
+ cur->m_dec_idx.resize(max_conflict_size);
+
+ for (auto const &p : rev_map) {
+ uchar dst = p.first;
+ for (uint idx = 0; idx < p.second.size(); idx++) {
+ uchar src = p.second[idx];
+ uchar bits =
+ my_bit_log2(my_round_up_to_next_power(p.second.size()));
+ cur->m_enc_idx[src] = idx;
+ cur->m_enc_size[src] = bits;
+ cur->m_dec_size[dst] = bits;
+ cur->m_dec_idx[idx][dst] = src;
+ }
+ }
+
+ cur->m_make_unpack_info_func = {Rdb_key_def::make_unpack_simple_varchar,
+ Rdb_key_def::make_unpack_simple};
+ cur->m_unpack_func = {Rdb_key_def::unpack_simple_varchar_space_pad,
+ Rdb_key_def::unpack_simple};
+ } else {
+ // Out of luck for now.
+ }
+
+ if (cur != nullptr) {
+ codec = cur;
+ cur->m_cs = cs;
+ rdb_collation_data[cs->number] = cur;
+ }
+ }
+
+ RDB_MUTEX_UNLOCK_CHECK(rdb_collation_data_mutex);
+ }
+
+ return codec;
+}
+
+static int get_segment_size_from_collation(const CHARSET_INFO *const cs) {
+ int ret;
+ if (cs->number == COLLATION_UTF8MB4_BIN || cs->number == COLLATION_UTF16_BIN ||
+ cs->number == COLLATION_UTF16LE_BIN || cs->number == COLLATION_UTF32_BIN) {
+ /*
+ In these collations, a character produces one weight, which is 3 bytes.
+ Segment has 3 characters, add one byte for VARCHAR_CMP_* marker, and we
+ get 3*3+1=10
+ */
+ ret = 10;
+ } else {
+ /*
+ All other collations. There are two classes:
+ - Unicode-based, except for collations mentioned in the if-condition.
+ For these all weights are 2 bytes long, a character may produce 0..8
+ weights.
+ in any case, 8 bytes of payload in the segment guarantee that the last
+ space character won't span across segments.
+
+ - Collations not based on unicode. These have length(strxfrm(' '))=1,
+ there nothing to worry about.
+
+ In both cases, take 8 bytes payload + 1 byte for VARCHAR_CMP* marker.
+ */
+ ret = 9;
+ }
+ DBUG_ASSERT(ret < RDB_SPACE_XFRM_SIZE);
+ return ret;
+}
+
+/*
+ @brief
+ Setup packing of index field into its mem-comparable form
+
+ @detail
+ - It is possible produce mem-comparable form for any datatype.
+ - Some datatypes also allow to unpack the original value from its
+ mem-comparable form.
+ = Some of these require extra information to be stored in "unpack_info".
+ unpack_info is not a part of mem-comparable form, it is only used to
+ restore the original value
+
+ @param
+ field IN field to be packed/un-packed
+
+ @return
+ TRUE - Field can be read with index-only reads
+ FALSE - Otherwise
+*/
+
+bool Rdb_field_packing::setup(const Rdb_key_def *const key_descr,
+ const Field *const field, const uint keynr_arg,
+ const uint key_part_arg,
+ const uint16 key_length) {
+ int res = false;
+ enum_field_types type = field ? field->real_type() : MYSQL_TYPE_LONGLONG;
+
+ m_keynr = keynr_arg;
+ m_key_part = key_part_arg;
+
+ m_maybe_null = field ? field->real_maybe_null() : false;
+ m_unpack_func = nullptr;
+ m_make_unpack_info_func = nullptr;
+ m_unpack_data_len = 0;
+ space_xfrm = nullptr; // safety
+ // whether to use legacy format for varchar
+ m_use_legacy_varbinary_format = false;
+ // ha_rocksdb::index_flags() will pass key_descr == null to
+ // see whether field(column) can be read-only reads through return value,
+ // but the legacy vs. new varchar format doesn't affect return value.
+ // Just change m_use_legacy_varbinary_format to true if key_descr isn't given.
+ if (!key_descr || key_descr->use_legacy_varbinary_format()) {
+ m_use_legacy_varbinary_format = true;
+ }
+ /* Calculate image length. By default, is is pack_length() */
+ m_max_image_len =
+ field ? field->pack_length() : ROCKSDB_SIZEOF_HIDDEN_PK_COLUMN;
+ m_skip_func = Rdb_key_def::skip_max_length;
+ m_pack_func = Rdb_key_def::pack_with_make_sort_key;
+
+ m_covered = false;
+
+ switch (type) {
+ case MYSQL_TYPE_LONGLONG:
+ case MYSQL_TYPE_LONG:
+ case MYSQL_TYPE_INT24:
+ case MYSQL_TYPE_SHORT:
+ case MYSQL_TYPE_TINY:
+ m_unpack_func = Rdb_key_def::unpack_integer;
+ m_covered = true;
+ return true;
+
+ case MYSQL_TYPE_DOUBLE:
+ m_unpack_func = Rdb_key_def::unpack_double;
+ m_covered = true;
+ return true;
+
+ case MYSQL_TYPE_FLOAT:
+ m_unpack_func = Rdb_key_def::unpack_float;
+ m_covered = true;
+ return true;
+
+ case MYSQL_TYPE_NEWDECIMAL:
+ /*
+ Decimal is packed with Field_new_decimal::make_sort_key, which just
+ does memcpy.
+ Unpacking decimal values was supported only after fix for issue#253,
+ because of that ha_rocksdb::get_storage_type() handles decimal values
+ in a special way.
+ */
+ case MYSQL_TYPE_DATETIME2:
+ case MYSQL_TYPE_TIMESTAMP2:
+ /* These are packed with Field_temporal_with_date_and_timef::make_sort_key
+ */
+ case MYSQL_TYPE_TIME2: /* TIME is packed with Field_timef::make_sort_key */
+ case MYSQL_TYPE_YEAR: /* YEAR is packed with Field_tiny::make_sort_key */
+ /* Everything that comes here is packed with just a memcpy(). */
+ m_unpack_func = Rdb_key_def::unpack_binary_str;
+ m_covered = true;
+ return true;
+
+ case MYSQL_TYPE_NEWDATE:
+ /*
+ This is packed by Field_newdate::make_sort_key. It assumes the data is
+ 3 bytes, and packing is done by swapping the byte order (for both big-
+ and little-endian)
+ */
+ m_unpack_func = Rdb_key_def::unpack_newdate;
+ m_covered = true;
+ return true;
+ case MYSQL_TYPE_TINY_BLOB:
+ case MYSQL_TYPE_MEDIUM_BLOB:
+ case MYSQL_TYPE_LONG_BLOB:
+ case MYSQL_TYPE_BLOB: {
+ if (key_descr) {
+ // The my_charset_bin collation is special in that it will consider
+ // shorter strings sorting as less than longer strings.
+ //
+ // See Field_blob::make_sort_key for details.
+ m_max_image_len =
+ key_length + (field->charset()->number == COLLATION_BINARY
+ ? reinterpret_cast<const Field_blob *>(field)
+ ->pack_length_no_ptr()
+ : 0);
+ // Return false because indexes on text/blob will always require
+ // a prefix. With a prefix, the optimizer will not be able to do an
+ // index-only scan since there may be content occuring after the prefix
+ // length.
+ return false;
+ }
+ break;
+ }
+ default:
+ break;
+ }
+
+ m_unpack_info_stores_value = false;
+ /* Handle [VAR](CHAR|BINARY) */
+
+ if (type == MYSQL_TYPE_VARCHAR || type == MYSQL_TYPE_STRING) {
+ /*
+ For CHAR-based columns, check how strxfrm image will take.
+ field->field_length = field->char_length() * cs->mbmaxlen.
+ */
+ const CHARSET_INFO *cs = field->charset();
+ m_max_image_len = cs->coll->strnxfrmlen(cs, field->field_length);
+ }
+ const bool is_varchar = (type == MYSQL_TYPE_VARCHAR);
+ const CHARSET_INFO *cs = field->charset();
+ // max_image_len before chunking is taken into account
+ const int max_image_len_before_chunks = m_max_image_len;
+
+ if (is_varchar) {
+ // The default for varchar is variable-length, without space-padding for
+ // comparisons
+ m_varchar_charset = cs;
+ m_skip_func = Rdb_key_def::skip_variable_length;
+ m_pack_func = Rdb_key_def::pack_with_varchar_encoding;
+ if (!key_descr || key_descr->use_legacy_varbinary_format()) {
+ m_max_image_len = RDB_LEGACY_ENCODED_SIZE(m_max_image_len);
+ } else {
+ // Calculate the maximum size of the short section plus the
+ // maximum size of the long section
+ m_max_image_len = RDB_ENCODED_SIZE(m_max_image_len);
+ }
+
+ const auto field_var = static_cast<const Field_varstring *>(field);
+ m_unpack_info_uses_two_bytes = (field_var->field_length + 8 >= 0x100);
+ }
+
+ if (type == MYSQL_TYPE_VARCHAR || type == MYSQL_TYPE_STRING) {
+ // See http://dev.mysql.com/doc/refman/5.7/en/string-types.html for
+ // information about character-based datatypes are compared.
+ bool use_unknown_collation = false;
+ DBUG_EXECUTE_IF("myrocks_enable_unknown_collation_index_only_scans",
+ use_unknown_collation = true;);
+
+ if (cs->number == COLLATION_BINARY) {
+ // - SQL layer pads BINARY(N) so that it always is N bytes long.
+ // - For VARBINARY(N), values may have different lengths, so we're using
+ // variable-length encoding. This is also the only charset where the
+ // values are not space-padded for comparison.
+ m_unpack_func = is_varchar ? Rdb_key_def::unpack_binary_or_utf8_varchar
+ : Rdb_key_def::unpack_binary_str;
+ res = true;
+ } else if (cs->number == COLLATION_LATIN1_BIN || cs->number == COLLATION_UTF8_BIN) {
+ // For _bin collations, mem-comparable form of the string is the string
+ // itself.
+
+ if (is_varchar) {
+ // VARCHARs - are compared as if they were space-padded - but are
+ // not actually space-padded (reading the value back produces the
+ // original value, without the padding)
+ m_unpack_func = Rdb_key_def::unpack_binary_or_utf8_varchar_space_pad;
+ m_skip_func = Rdb_key_def::skip_variable_space_pad;
+ m_pack_func = Rdb_key_def::pack_with_varchar_space_pad;
+ m_make_unpack_info_func = Rdb_key_def::dummy_make_unpack_info;
+ m_segment_size = get_segment_size_from_collation(cs);
+ m_max_image_len =
+ (max_image_len_before_chunks / (m_segment_size - 1) + 1) *
+ m_segment_size;
+ rdb_get_mem_comparable_space(cs, &space_xfrm, &space_xfrm_len,
+ &space_mb_len);
+ } else {
+ // SQL layer pads CHAR(N) values to their maximum length.
+ // We just store that and restore it back.
+ m_unpack_func = (cs->number == COLLATION_LATIN1_BIN)
+ ? Rdb_key_def::unpack_binary_str
+ : Rdb_key_def::unpack_utf8_str;
+ }
+ res = true;
+ } else {
+ // This is [VAR]CHAR(n) and the collation is not $(charset_name)_bin
+
+ res = true; // index-only scans are possible
+ m_unpack_data_len = is_varchar ? 0 : field->field_length;
+ const uint idx = is_varchar ? 0 : 1;
+ const Rdb_collation_codec *codec = nullptr;
+
+ if (is_varchar) {
+ // VARCHAR requires space-padding for doing comparisons
+ //
+ // The check for cs->levels_for_order is to catch
+ // latin2_czech_cs and cp1250_czech_cs - multi-level collations
+ // that Variable-Length Space Padded Encoding can't handle.
+ // It is not expected to work for any other multi-level collations,
+ // either.
+ // Currently we handle these collations as NO_PAD, even if they have
+ // PAD_SPACE attribute.
+ if (cs->levels_for_order == 1) {
+ m_pack_func = Rdb_key_def::pack_with_varchar_space_pad;
+ m_skip_func = Rdb_key_def::skip_variable_space_pad;
+ m_segment_size = get_segment_size_from_collation(cs);
+ m_max_image_len =
+ (max_image_len_before_chunks / (m_segment_size - 1) + 1) *
+ m_segment_size;
+ rdb_get_mem_comparable_space(cs, &space_xfrm, &space_xfrm_len,
+ &space_mb_len);
+ } else {
+ // NO_LINT_DEBUG
+ sql_print_warning(
+ "RocksDB: you're trying to create an index "
+ "with a multi-level collation %s",
+ cs->name);
+ // NO_LINT_DEBUG
+ sql_print_warning(
+ "MyRocks will handle this collation internally "
+ " as if it had a NO_PAD attribute.");
+ m_pack_func = Rdb_key_def::pack_with_varchar_encoding;
+ m_skip_func = Rdb_key_def::skip_variable_length;
+ }
+ }
+
+ if ((codec = rdb_init_collation_mapping(cs)) != nullptr) {
+ // The collation allows to store extra information in the unpack_info
+ // which can be used to restore the original value from the
+ // mem-comparable form.
+ m_make_unpack_info_func = codec->m_make_unpack_info_func[idx];
+ m_unpack_func = codec->m_unpack_func[idx];
+ m_charset_codec = codec;
+ } else if (use_unknown_collation) {
+ // We have no clue about how this collation produces mem-comparable
+ // form. Our way of restoring the original value is to keep a copy of
+ // the original value in unpack_info.
+ m_unpack_info_stores_value = true;
+ m_make_unpack_info_func = is_varchar
+ ? Rdb_key_def::make_unpack_unknown_varchar
+ : Rdb_key_def::make_unpack_unknown;
+ m_unpack_func = is_varchar ? Rdb_key_def::unpack_unknown_varchar
+ : Rdb_key_def::unpack_unknown;
+ } else {
+ // Same as above: we don't know how to restore the value from its
+ // mem-comparable form.
+ // Here, we just indicate to the SQL layer we can't do it.
+ DBUG_ASSERT(m_unpack_func == nullptr);
+ m_unpack_info_stores_value = false;
+ res = false; // Indicate that index-only reads are not possible
+ }
+ }
+
+ // Make an adjustment: if this column is partially covered, tell the SQL
+ // layer we can't do index-only scans. Later when we perform an index read,
+ // we'll check on a record-by-record basis if we can do an index-only scan
+ // or not.
+ uint field_length;
+ if (field->table) {
+ field_length = field->table->field[field->field_index]->field_length;
+ } else {
+ field_length = field->field_length;
+ }
+
+ if (field_length != key_length) {
+ res = false;
+ // If this index doesn't support covered bitmaps, then we won't know
+ // during a read if the column is actually covered or not. If so, we need
+ // to assume the column isn't covered and skip it during unpacking.
+ //
+ // If key_descr == NULL, then this is a dummy field and we probably don't
+ // need to perform this step. However, to preserve the behavior before
+ // this change, we'll only skip this step if we have an index which
+ // supports covered bitmaps.
+ if (!key_descr || !key_descr->use_covered_bitmap_format()) {
+ m_unpack_func = nullptr;
+ m_make_unpack_info_func = nullptr;
+ m_unpack_info_stores_value = true;
+ }
+ }
+ }
+
+ m_covered = res;
+ return res;
+}
+
+Field *Rdb_field_packing::get_field_in_table(const TABLE *const tbl) const {
+ return tbl->key_info[m_keynr].key_part[m_key_part].field;
+}
+
+void Rdb_field_packing::fill_hidden_pk_val(uchar **dst,
+ const longlong hidden_pk_id) const {
+ DBUG_ASSERT(m_max_image_len == 8);
+
+ String to;
+ rdb_netstr_append_uint64(&to, hidden_pk_id);
+ memcpy(*dst, to.ptr(), m_max_image_len);
+
+ *dst += m_max_image_len;
+}
+
+///////////////////////////////////////////////////////////////////////////////////////////
+// Rdb_ddl_manager
+///////////////////////////////////////////////////////////////////////////////////////////
+
+Rdb_tbl_def::~Rdb_tbl_def() {
+ auto ddl_manager = rdb_get_ddl_manager();
+ /* Don't free key definitions */
+ if (m_key_descr_arr) {
+ for (uint i = 0; i < m_key_count; i++) {
+ if (ddl_manager && m_key_descr_arr[i]) {
+ ddl_manager->erase_index_num(m_key_descr_arr[i]->get_gl_index_id());
+ }
+
+ m_key_descr_arr[i] = nullptr;
+ }
+
+ delete[] m_key_descr_arr;
+ m_key_descr_arr = nullptr;
+ }
+}
+
+/*
+ Put table definition DDL entry. Actual write is done at
+ Rdb_dict_manager::commit.
+
+ We write
+ dbname.tablename -> version + {key_entry, key_entry, key_entry, ... }
+
+ Where key entries are a tuple of
+ ( cf_id, index_nr )
+*/
+
+bool Rdb_tbl_def::put_dict(Rdb_dict_manager *const dict,
+ rocksdb::WriteBatch *const batch,
+ const rocksdb::Slice &key) {
+ StringBuffer<8 * Rdb_key_def::PACKED_SIZE> indexes;
+ indexes.alloc(Rdb_key_def::VERSION_SIZE +
+ m_key_count * Rdb_key_def::PACKED_SIZE * 2);
+ rdb_netstr_append_uint16(&indexes, Rdb_key_def::DDL_ENTRY_INDEX_VERSION);
+
+ for (uint i = 0; i < m_key_count; i++) {
+ const Rdb_key_def &kd = *m_key_descr_arr[i];
+
+ uchar flags =
+ (kd.m_is_reverse_cf ? Rdb_key_def::REVERSE_CF_FLAG : 0) |
+ (kd.m_is_per_partition_cf ? Rdb_key_def::PER_PARTITION_CF_FLAG : 0);
+
+ const uint cf_id = kd.get_cf()->GetID();
+ /*
+ If cf_id already exists, cf_flags must be the same.
+ To prevent race condition, reading/modifying/committing CF flags
+ need to be protected by mutex (dict_manager->lock()).
+ When RocksDB supports transaction with pessimistic concurrency
+ control, we can switch to use it and removing mutex.
+ */
+ uint existing_cf_flags;
+ const std::string cf_name = kd.get_cf()->GetName();
+
+ if (dict->get_cf_flags(cf_id, &existing_cf_flags)) {
+ // For the purposes of comparison we'll clear the partitioning bit. The
+ // intent here is to make sure that both partitioned and non-partitioned
+ // tables can refer to the same CF.
+ existing_cf_flags &= ~Rdb_key_def::CF_FLAGS_TO_IGNORE;
+ flags &= ~Rdb_key_def::CF_FLAGS_TO_IGNORE;
+
+ if (existing_cf_flags != flags) {
+ my_error(ER_CF_DIFFERENT, MYF(0), cf_name.c_str(), flags,
+ existing_cf_flags);
+ return true;
+ }
+ } else {
+ dict->add_cf_flags(batch, cf_id, flags);
+ }
+
+ rdb_netstr_append_uint32(&indexes, cf_id);
+
+ uint32 index_number = kd.get_index_number();
+ rdb_netstr_append_uint32(&indexes, index_number);
+
+ struct Rdb_index_info index_info;
+ index_info.m_gl_index_id = {cf_id, index_number};
+ index_info.m_index_dict_version = Rdb_key_def::INDEX_INFO_VERSION_LATEST;
+ index_info.m_index_type = kd.m_index_type;
+ index_info.m_kv_version = kd.m_kv_format_version;
+ index_info.m_index_flags = kd.m_index_flags_bitmap;
+ index_info.m_ttl_duration = kd.m_ttl_duration;
+
+ dict->add_or_update_index_cf_mapping(batch, &index_info);
+ }
+
+ const rocksdb::Slice svalue(indexes.c_ptr(), indexes.length());
+
+ dict->put_key(batch, key, svalue);
+ return false;
+}
+
+// Length that each index flag takes inside the record.
+// Each index in the array maps to the enum INDEX_FLAG
+static const std::array<uint, 1> index_flag_lengths = {
+ {ROCKSDB_SIZEOF_TTL_RECORD}};
+
+bool Rdb_key_def::has_index_flag(uint32 index_flags, enum INDEX_FLAG flag) {
+ return flag & index_flags;
+}
+
+uint32 Rdb_key_def::calculate_index_flag_offset(uint32 index_flags,
+ enum INDEX_FLAG flag,
+ uint *const length) {
+ DBUG_ASSERT_IMP(flag != MAX_FLAG,
+ Rdb_key_def::has_index_flag(index_flags, flag));
+
+ uint offset = 0;
+ for (size_t bit = 0; bit < sizeof(index_flags) * CHAR_BIT; ++bit) {
+ int mask = 1 << bit;
+
+ /* Exit once we've reached the proper flag */
+ if (flag & mask) {
+ if (length != nullptr) {
+ *length = index_flag_lengths[bit];
+ }
+ break;
+ }
+
+ if (index_flags & mask) {
+ offset += index_flag_lengths[bit];
+ }
+ }
+
+ return offset;
+}
+
+void Rdb_key_def::write_index_flag_field(Rdb_string_writer *const buf,
+ const uchar *const val,
+ enum INDEX_FLAG flag) const {
+ uint len;
+ uint offset = calculate_index_flag_offset(m_index_flags_bitmap, flag, &len);
+ DBUG_ASSERT(offset + len <= buf->get_current_pos());
+ memcpy(buf->ptr() + offset, val, len);
+}
+
+void Rdb_tbl_def::check_if_is_mysql_system_table() {
+ static const char *const system_dbs[] = {
+ "mysql",
+ "performance_schema",
+ "information_schema",
+ };
+
+ m_is_mysql_system_table = false;
+ for (uint ii = 0; ii < array_elements(system_dbs); ii++) {
+ if (strcmp(m_dbname.c_str(), system_dbs[ii]) == 0) {
+ m_is_mysql_system_table = true;
+ break;
+ }
+ }
+}
+
+void Rdb_tbl_def::check_and_set_read_free_rpl_table() {
+ m_is_read_free_rpl_table =
+#if 0 // MARIAROCKS_NOT_YET : read-free replication is not supported
+ rdb_read_free_regex_handler.matches(base_tablename());
+#else
+ false;
+#endif
+}
+
+void Rdb_tbl_def::set_name(const std::string &name) {
+ int err MY_ATTRIBUTE((__unused__));
+
+ m_dbname_tablename = name;
+ err = rdb_split_normalized_tablename(name, &m_dbname, &m_tablename,
+ &m_partition);
+ DBUG_ASSERT(err == 0);
+
+ check_if_is_mysql_system_table();
+}
+
+GL_INDEX_ID Rdb_tbl_def::get_autoincr_gl_index_id() {
+ for (uint i = 0; i < m_key_count; i++) {
+ auto &k = m_key_descr_arr[i];
+ if (k->m_index_type == Rdb_key_def::INDEX_TYPE_PRIMARY ||
+ k->m_index_type == Rdb_key_def::INDEX_TYPE_HIDDEN_PRIMARY) {
+ return k->get_gl_index_id();
+ }
+ }
+
+ // Every table must have a primary key, even if it's hidden.
+ abort();
+ return GL_INDEX_ID();
+}
+
+void Rdb_ddl_manager::erase_index_num(const GL_INDEX_ID &gl_index_id) {
+ m_index_num_to_keydef.erase(gl_index_id);
+}
+
+void Rdb_ddl_manager::add_uncommitted_keydefs(
+ const std::unordered_set<std::shared_ptr<Rdb_key_def>> &indexes) {
+ mysql_rwlock_wrlock(&m_rwlock);
+ for (const auto &index : indexes) {
+ m_index_num_to_uncommitted_keydef[index->get_gl_index_id()] = index;
+ }
+ mysql_rwlock_unlock(&m_rwlock);
+}
+
+void Rdb_ddl_manager::remove_uncommitted_keydefs(
+ const std::unordered_set<std::shared_ptr<Rdb_key_def>> &indexes) {
+ mysql_rwlock_wrlock(&m_rwlock);
+ for (const auto &index : indexes) {
+ m_index_num_to_uncommitted_keydef.erase(index->get_gl_index_id());
+ }
+ mysql_rwlock_unlock(&m_rwlock);
+}
+
+namespace // anonymous namespace = not visible outside this source file
+{
+struct Rdb_validate_tbls : public Rdb_tables_scanner {
+ using tbl_info_t = std::pair<std::string, bool>;
+ using tbl_list_t = std::map<std::string, std::set<tbl_info_t>>;
+
+ tbl_list_t m_list;
+
+ int add_table(Rdb_tbl_def *tdef) override;
+
+ bool compare_to_actual_tables(const std::string &datadir, bool *has_errors);
+
+ bool scan_for_frms(const std::string &datadir, const std::string &dbname,
+ bool *has_errors);
+
+ bool check_frm_file(const std::string &fullpath, const std::string &dbname,
+ const std::string &tablename, bool *has_errors);
+};
+} // anonymous namespace
+
+/*
+ Get a list of tables that we expect to have .frm files for. This will use the
+ information just read from the RocksDB data dictionary.
+*/
+int Rdb_validate_tbls::add_table(Rdb_tbl_def *tdef) {
+ DBUG_ASSERT(tdef != nullptr);
+
+ /* Add the database/table into the list that are not temp table */
+ if (tdef->base_tablename().find(tmp_file_prefix) == std::string::npos) {
+ bool is_partition = tdef->base_partition().size() != 0;
+ m_list[tdef->base_dbname()].insert(
+ tbl_info_t(tdef->base_tablename(), is_partition));
+ }
+
+ return HA_EXIT_SUCCESS;
+}
+
+/*
+ Access the .frm file for this dbname/tablename and see if it is a RocksDB
+ table (or partition table).
+*/
+bool Rdb_validate_tbls::check_frm_file(const std::string &fullpath,
+ const std::string &dbname,
+ const std::string &tablename,
+ bool *has_errors) {
+ /* Check this .frm file to see what engine it uses */
+ String fullfilename(fullpath.c_str(), &my_charset_bin);
+ fullfilename.append(FN_DIRSEP);
+ fullfilename.append(tablename.c_str());
+ fullfilename.append(".frm");
+
+ /*
+ This function will return the legacy_db_type of the table. Currently
+ it does not reference the first parameter (THD* thd), but if it ever
+ did in the future we would need to make a version that does it without
+ the connection handle as we don't have one here.
+ */
+ char eng_type_buf[NAME_CHAR_LEN+1];
+ LEX_STRING eng_type_str = {eng_type_buf, 0};
+ //enum legacy_db_type eng_type;
+ frm_type_enum type = dd_frm_type(nullptr, fullfilename.c_ptr(), &eng_type_str);
+ if (type == FRMTYPE_ERROR) {
+ // NO_LINT_DEBUG
+ sql_print_warning("RocksDB: Failed to open/read .from file: %s",
+ fullfilename.ptr());
+ return false;
+ }
+
+ if (type == FRMTYPE_TABLE) {
+ /* For a RocksDB table do we have a reference in the data dictionary? */
+ if (!strncmp(eng_type_str.str, "ROCKSDB", eng_type_str.length)) {
+ /*
+ Attempt to remove the table entry from the list of tables. If this
+ fails then we know we had a .frm file that wasn't registered in RocksDB.
+ */
+ tbl_info_t element(tablename, false);
+ if (m_list.count(dbname) == 0 || m_list[dbname].erase(element) == 0) {
+ // NO_LINT_DEBUG
+ sql_print_warning(
+ "RocksDB: Schema mismatch - "
+ "A .frm file exists for table %s.%s, "
+ "but that table is not registered in RocksDB",
+ dbname.c_str(), tablename.c_str());
+ *has_errors = true;
+ }
+ } else if (!strncmp(eng_type_str.str, "partition", eng_type_str.length)) {
+ /*
+ For partition tables, see if it is in the m_list as a partition,
+ but don't generate an error if it isn't there - we don't know that the
+ .frm is for RocksDB.
+ */
+ if (m_list.count(dbname) > 0) {
+ m_list[dbname].erase(tbl_info_t(tablename, true));
+ }
+ }
+ }
+
+ return true;
+}
+
+/* Scan the database subdirectory for .frm files */
+bool Rdb_validate_tbls::scan_for_frms(const std::string &datadir,
+ const std::string &dbname,
+ bool *has_errors) {
+ bool result = true;
+ std::string fullpath = datadir + dbname;
+ struct st_my_dir *dir_info = my_dir(fullpath.c_str(), MYF(MY_DONT_SORT));
+
+ /* Access the directory */
+ if (dir_info == nullptr) {
+ // NO_LINT_DEBUG
+ sql_print_warning("RocksDB: Could not open database directory: %s",
+ fullpath.c_str());
+ return false;
+ }
+
+ /* Scan through the files in the directory */
+ struct fileinfo *file_info = dir_info->dir_entry;
+ for (uint ii = 0; ii < dir_info->number_of_files; ii++, file_info++) {
+ /* Find .frm files that are not temp files (those that contain '#sql') */
+ const char *ext = strrchr(file_info->name, '.');
+ if (ext != nullptr && strstr(file_info->name, tmp_file_prefix) == nullptr &&
+ strcmp(ext, ".frm") == 0) {
+ std::string tablename =
+ std::string(file_info->name, ext - file_info->name);
+
+ /* Check to see if the .frm file is from RocksDB */
+ if (!check_frm_file(fullpath, dbname, tablename, has_errors)) {
+ result = false;
+ break;
+ }
+ }
+ }
+
+ /* Remove any databases who have no more tables listed */
+ if (m_list.count(dbname) == 1 && m_list[dbname].size() == 0) {
+ m_list.erase(dbname);
+ }
+
+ /* Release the directory entry */
+ my_dirend(dir_info);
+
+ return result;
+}
+
+/*
+ Scan the datadir for all databases (subdirectories) and get a list of .frm
+ files they contain
+*/
+bool Rdb_validate_tbls::compare_to_actual_tables(const std::string &datadir,
+ bool *has_errors) {
+ bool result = true;
+ struct st_my_dir *dir_info;
+ struct fileinfo *file_info;
+
+ dir_info = my_dir(datadir.c_str(), MYF(MY_DONT_SORT | MY_WANT_STAT));
+ if (dir_info == nullptr) {
+ // NO_LINT_DEBUG
+ sql_print_warning("RocksDB: could not open datadir: %s", datadir.c_str());
+ return false;
+ }
+
+ file_info = dir_info->dir_entry;
+ for (uint ii = 0; ii < dir_info->number_of_files; ii++, file_info++) {
+ /* Ignore files/dirs starting with '.' */
+ if (file_info->name[0] == '.') continue;
+
+ /* Ignore all non-directory files */
+ if (!MY_S_ISDIR(file_info->mystat->st_mode)) continue;
+
+ /* Scan all the .frm files in the directory */
+ if (!scan_for_frms(datadir, file_info->name, has_errors)) {
+ result = false;
+ break;
+ }
+ }
+
+ /* Release the directory info */
+ my_dirend(dir_info);
+
+ return result;
+}
+
+/*
+ Validate that all auto increment values in the data dictionary are on a
+ supported version.
+*/
+bool Rdb_ddl_manager::validate_auto_incr() {
+ std::unique_ptr<rocksdb::Iterator> it(m_dict->new_iterator());
+
+ uchar auto_incr_entry[Rdb_key_def::INDEX_NUMBER_SIZE];
+ rdb_netbuf_store_index(auto_incr_entry, Rdb_key_def::AUTO_INC);
+ const rocksdb::Slice auto_incr_entry_slice(
+ reinterpret_cast<char *>(auto_incr_entry),
+ Rdb_key_def::INDEX_NUMBER_SIZE);
+ for (it->Seek(auto_incr_entry_slice); it->Valid(); it->Next()) {
+ const rocksdb::Slice key = it->key();
+ const rocksdb::Slice val = it->value();
+ GL_INDEX_ID gl_index_id;
+
+ if (key.size() >= Rdb_key_def::INDEX_NUMBER_SIZE &&
+ memcmp(key.data(), auto_incr_entry, Rdb_key_def::INDEX_NUMBER_SIZE)) {
+ break;
+ }
+
+ if (key.size() != Rdb_key_def::INDEX_NUMBER_SIZE * 3) {
+ return false;
+ }
+
+ if (val.size() <= Rdb_key_def::VERSION_SIZE) {
+ return false;
+ }
+
+ // Check if we have orphaned entries for whatever reason by cross
+ // referencing ddl entries.
+ auto ptr = reinterpret_cast<const uchar *>(key.data());
+ ptr += Rdb_key_def::INDEX_NUMBER_SIZE;
+ rdb_netbuf_read_gl_index(&ptr, &gl_index_id);
+ if (!m_dict->get_index_info(gl_index_id, nullptr)) {
+ // NO_LINT_DEBUG
+ sql_print_warning(
+ "RocksDB: AUTOINC mismatch - "
+ "Index number (%u, %u) found in AUTOINC "
+ "but does not exist as a DDL entry",
+ gl_index_id.cf_id, gl_index_id.index_id);
+ return false;
+ }
+
+ ptr = reinterpret_cast<const uchar *>(val.data());
+ const int version = rdb_netbuf_read_uint16(&ptr);
+ if (version > Rdb_key_def::AUTO_INCREMENT_VERSION) {
+ // NO_LINT_DEBUG
+ sql_print_warning(
+ "RocksDB: AUTOINC mismatch - "
+ "Index number (%u, %u) found in AUTOINC "
+ "is on unsupported version %d",
+ gl_index_id.cf_id, gl_index_id.index_id, version);
+ return false;
+ }
+ }
+
+ if (!it->status().ok()) {
+ return false;
+ }
+
+ return true;
+}
+
+/*
+ Validate that all the tables in the RocksDB database dictionary match the .frm
+ files in the datadir
+*/
+bool Rdb_ddl_manager::validate_schemas(void) {
+ bool has_errors = false;
+ const std::string datadir = std::string(mysql_real_data_home);
+ Rdb_validate_tbls table_list;
+
+ /* Get the list of tables from the database dictionary */
+ if (scan_for_tables(&table_list) != 0) {
+ return false;
+ }
+
+ /* Compare that to the list of actual .frm files */
+ if (!table_list.compare_to_actual_tables(datadir, &has_errors)) {
+ return false;
+ }
+
+ /*
+ Any tables left in the tables list are ones that are registered in RocksDB
+ but don't have .frm files.
+ */
+ for (const auto &db : table_list.m_list) {
+ for (const auto &table : db.second) {
+ // NO_LINT_DEBUG
+ sql_print_warning(
+ "RocksDB: Schema mismatch - "
+ "Table %s.%s is registered in RocksDB "
+ "but does not have a .frm file",
+ db.first.c_str(), table.first.c_str());
+ has_errors = true;
+ }
+ }
+
+ return !has_errors;
+}
+
+bool Rdb_ddl_manager::init(Rdb_dict_manager *const dict_arg,
+ Rdb_cf_manager *const cf_manager,
+ const uint32_t validate_tables) {
+ m_dict = dict_arg;
+ mysql_rwlock_init(0, &m_rwlock);
+
+ /* Read the data dictionary and populate the hash */
+ uchar ddl_entry[Rdb_key_def::INDEX_NUMBER_SIZE];
+ rdb_netbuf_store_index(ddl_entry, Rdb_key_def::DDL_ENTRY_INDEX_START_NUMBER);
+ const rocksdb::Slice ddl_entry_slice((char *)ddl_entry,
+ Rdb_key_def::INDEX_NUMBER_SIZE);
+
+ /* Reading data dictionary should always skip bloom filter */
+ rocksdb::Iterator *it = m_dict->new_iterator();
+ int i = 0;
+
+ uint max_index_id_in_dict = 0;
+ m_dict->get_max_index_id(&max_index_id_in_dict);
+
+ for (it->Seek(ddl_entry_slice); it->Valid(); it->Next()) {
+ const uchar *ptr;
+ const uchar *ptr_end;
+ const rocksdb::Slice key = it->key();
+ const rocksdb::Slice val = it->value();
+
+ if (key.size() >= Rdb_key_def::INDEX_NUMBER_SIZE &&
+ memcmp(key.data(), ddl_entry, Rdb_key_def::INDEX_NUMBER_SIZE)) {
+ break;
+ }
+
+ if (key.size() <= Rdb_key_def::INDEX_NUMBER_SIZE) {
+ // NO_LINT_DEBUG
+ sql_print_error("RocksDB: Table_store: key has length %d (corruption?)",
+ (int)key.size());
+ return true;
+ }
+
+ Rdb_tbl_def *const tdef =
+ new Rdb_tbl_def(key, Rdb_key_def::INDEX_NUMBER_SIZE);
+
+ // Now, read the DDLs.
+ const int real_val_size = val.size() - Rdb_key_def::VERSION_SIZE;
+ if (real_val_size % Rdb_key_def::PACKED_SIZE * 2 > 0) {
+ // NO_LINT_DEBUG
+ sql_print_error("RocksDB: Table_store: invalid keylist for table %s",
+ tdef->full_tablename().c_str());
+ return true;
+ }
+ tdef->m_key_count = real_val_size / (Rdb_key_def::PACKED_SIZE * 2);
+ tdef->m_key_descr_arr = new std::shared_ptr<Rdb_key_def>[tdef->m_key_count];
+
+ ptr = reinterpret_cast<const uchar *>(val.data());
+ const int version = rdb_netbuf_read_uint16(&ptr);
+ if (version != Rdb_key_def::DDL_ENTRY_INDEX_VERSION) {
+ // NO_LINT_DEBUG
+ sql_print_error(
+ "RocksDB: DDL ENTRY Version was not expected."
+ "Expected: %d, Actual: %d",
+ Rdb_key_def::DDL_ENTRY_INDEX_VERSION, version);
+ return true;
+ }
+ ptr_end = ptr + real_val_size;
+ for (uint keyno = 0; ptr < ptr_end; keyno++) {
+ GL_INDEX_ID gl_index_id;
+ rdb_netbuf_read_gl_index(&ptr, &gl_index_id);
+ uint flags = 0;
+ struct Rdb_index_info index_info;
+ if (!m_dict->get_index_info(gl_index_id, &index_info)) {
+ // NO_LINT_DEBUG
+ sql_print_error(
+ "RocksDB: Could not get index information "
+ "for Index Number (%u,%u), table %s",
+ gl_index_id.cf_id, gl_index_id.index_id,
+ tdef->full_tablename().c_str());
+ return true;
+ }
+ if (max_index_id_in_dict < gl_index_id.index_id) {
+ // NO_LINT_DEBUG
+ sql_print_error(
+ "RocksDB: Found max index id %u from data dictionary "
+ "but also found larger index id %u from dictionary. "
+ "This should never happen and possibly a bug.",
+ max_index_id_in_dict, gl_index_id.index_id);
+ return true;
+ }
+ if (!m_dict->get_cf_flags(gl_index_id.cf_id, &flags)) {
+ // NO_LINT_DEBUG
+ sql_print_error(
+ "RocksDB: Could not get Column Family Flags "
+ "for CF Number %d, table %s",
+ gl_index_id.cf_id, tdef->full_tablename().c_str());
+ return true;
+ }
+
+ if ((flags & Rdb_key_def::AUTO_CF_FLAG) != 0) {
+ // The per-index cf option is deprecated. Make sure we don't have the
+ // flag set in any existing database. NO_LINT_DEBUG
+ // NO_LINT_DEBUG
+ sql_print_error(
+ "RocksDB: The defunct AUTO_CF_FLAG is enabled for CF "
+ "number %d, table %s",
+ gl_index_id.cf_id, tdef->full_tablename().c_str());
+ }
+
+ rocksdb::ColumnFamilyHandle *const cfh =
+ cf_manager->get_cf(gl_index_id.cf_id);
+ DBUG_ASSERT(cfh != nullptr);
+
+ uint32 ttl_rec_offset =
+ Rdb_key_def::has_index_flag(index_info.m_index_flags,
+ Rdb_key_def::TTL_FLAG)
+ ? Rdb_key_def::calculate_index_flag_offset(
+ index_info.m_index_flags, Rdb_key_def::TTL_FLAG)
+ : UINT_MAX;
+
+ /*
+ We can't fully initialize Rdb_key_def object here, because full
+ initialization requires that there is an open TABLE* where we could
+ look at Field* objects and set max_length and other attributes
+ */
+ tdef->m_key_descr_arr[keyno] = std::make_shared<Rdb_key_def>(
+ gl_index_id.index_id, keyno, cfh, index_info.m_index_dict_version,
+ index_info.m_index_type, index_info.m_kv_version,
+ flags & Rdb_key_def::REVERSE_CF_FLAG,
+ flags & Rdb_key_def::PER_PARTITION_CF_FLAG, "",
+ m_dict->get_stats(gl_index_id), index_info.m_index_flags,
+ ttl_rec_offset, index_info.m_ttl_duration);
+ }
+ put(tdef);
+ i++;
+ }
+
+ /*
+ If validate_tables is greater than 0 run the validation. Only fail the
+ initialzation if the setting is 1. If the setting is 2 we continue.
+ */
+ if (validate_tables > 0) {
+ std::string msg;
+ if (!validate_schemas()) {
+ msg =
+ "RocksDB: Problems validating data dictionary "
+ "against .frm files, exiting";
+ } else if (!validate_auto_incr()) {
+ msg =
+ "RocksDB: Problems validating auto increment values in "
+ "data dictionary, exiting";
+ }
+ if (validate_tables == 1 && !msg.empty()) {
+ // NO_LINT_DEBUG
+ sql_print_error("%s", msg.c_str());
+ return true;
+ }
+ }
+
+ // index ids used by applications should not conflict with
+ // data dictionary index ids
+ if (max_index_id_in_dict < Rdb_key_def::END_DICT_INDEX_ID) {
+ max_index_id_in_dict = Rdb_key_def::END_DICT_INDEX_ID;
+ }
+
+ m_sequence.init(max_index_id_in_dict + 1);
+
+ if (!it->status().ok()) {
+ rdb_log_status_error(it->status(), "Table_store load error");
+ return true;
+ }
+ delete it;
+ // NO_LINT_DEBUG
+ sql_print_information("RocksDB: Table_store: loaded DDL data for %d tables",
+ i);
+ return false;
+}
+
+Rdb_tbl_def *Rdb_ddl_manager::find(const std::string &table_name,
+ const bool lock) {
+ if (lock) {
+ mysql_rwlock_rdlock(&m_rwlock);
+ }
+
+ Rdb_tbl_def *rec = nullptr;
+ const auto it = m_ddl_map.find(table_name);
+ if (it != m_ddl_map.end()) {
+ rec = it->second;
+ }
+
+ if (lock) {
+ mysql_rwlock_unlock(&m_rwlock);
+ }
+
+ return rec;
+}
+
+// this is a safe version of the find() function below. It acquires a read
+// lock on m_rwlock to make sure the Rdb_key_def is not discarded while we
+// are finding it. Copying it into 'ret' increments the count making sure
+// that the object will not be discarded until we are finished with it.
+std::shared_ptr<const Rdb_key_def> Rdb_ddl_manager::safe_find(
+ GL_INDEX_ID gl_index_id) {
+ std::shared_ptr<const Rdb_key_def> ret(nullptr);
+
+ mysql_rwlock_rdlock(&m_rwlock);
+
+ auto it = m_index_num_to_keydef.find(gl_index_id);
+ if (it != m_index_num_to_keydef.end()) {
+ const auto table_def = find(it->second.first, false);
+ if (table_def && it->second.second < table_def->m_key_count) {
+ const auto &kd = table_def->m_key_descr_arr[it->second.second];
+ if (kd->max_storage_fmt_length() != 0) {
+ ret = kd;
+ }
+ }
+ } else {
+ auto it = m_index_num_to_uncommitted_keydef.find(gl_index_id);
+ if (it != m_index_num_to_uncommitted_keydef.end()) {
+ const auto &kd = it->second;
+ if (kd->max_storage_fmt_length() != 0) {
+ ret = kd;
+ }
+ }
+ }
+
+ mysql_rwlock_unlock(&m_rwlock);
+
+ return ret;
+}
+
+// this method assumes at least read-only lock on m_rwlock
+const std::shared_ptr<Rdb_key_def> &Rdb_ddl_manager::find(
+ GL_INDEX_ID gl_index_id) {
+ auto it = m_index_num_to_keydef.find(gl_index_id);
+ if (it != m_index_num_to_keydef.end()) {
+ auto table_def = find(it->second.first, false);
+ if (table_def) {
+ if (it->second.second < table_def->m_key_count) {
+ return table_def->m_key_descr_arr[it->second.second];
+ }
+ }
+ } else {
+ auto it = m_index_num_to_uncommitted_keydef.find(gl_index_id);
+ if (it != m_index_num_to_uncommitted_keydef.end()) {
+ return it->second;
+ }
+ }
+
+ static std::shared_ptr<Rdb_key_def> empty = nullptr;
+
+ return empty;
+}
+
+// this method returns the name of the table based on an index id. It acquires
+// a read lock on m_rwlock.
+const std::string Rdb_ddl_manager::safe_get_table_name(
+ const GL_INDEX_ID &gl_index_id) {
+ std::string ret;
+ mysql_rwlock_rdlock(&m_rwlock);
+ auto it = m_index_num_to_keydef.find(gl_index_id);
+ if (it != m_index_num_to_keydef.end()) {
+ ret = it->second.first;
+ }
+ mysql_rwlock_unlock(&m_rwlock);
+ return ret;
+}
+
+void Rdb_ddl_manager::set_stats(
+ const std::unordered_map<GL_INDEX_ID, Rdb_index_stats> &stats) {
+ mysql_rwlock_wrlock(&m_rwlock);
+ for (auto src : stats) {
+ const auto &keydef = find(src.second.m_gl_index_id);
+ if (keydef) {
+ keydef->m_stats = src.second;
+ m_stats2store[keydef->m_stats.m_gl_index_id] = keydef->m_stats;
+ }
+ }
+ mysql_rwlock_unlock(&m_rwlock);
+}
+
+void Rdb_ddl_manager::adjust_stats(
+ const std::vector<Rdb_index_stats> &new_data,
+ const std::vector<Rdb_index_stats> &deleted_data) {
+ mysql_rwlock_wrlock(&m_rwlock);
+ int i = 0;
+ for (const auto &data : {new_data, deleted_data}) {
+ for (const auto &src : data) {
+ const auto &keydef = find(src.m_gl_index_id);
+ if (keydef) {
+ keydef->m_stats.m_distinct_keys_per_prefix.resize(
+ keydef->get_key_parts());
+ keydef->m_stats.merge(src, i == 0, keydef->max_storage_fmt_length());
+ m_stats2store[keydef->m_stats.m_gl_index_id] = keydef->m_stats;
+ }
+ }
+ i++;
+ }
+ const bool should_save_stats = !m_stats2store.empty();
+ mysql_rwlock_unlock(&m_rwlock);
+ if (should_save_stats) {
+ // Queue an async persist_stats(false) call to the background thread.
+ rdb_queue_save_stats_request();
+ }
+}
+
+void Rdb_ddl_manager::persist_stats(const bool sync) {
+ mysql_rwlock_wrlock(&m_rwlock);
+ const auto local_stats2store = std::move(m_stats2store);
+ m_stats2store.clear();
+ mysql_rwlock_unlock(&m_rwlock);
+
+ // Persist stats
+ const std::unique_ptr<rocksdb::WriteBatch> wb = m_dict->begin();
+ std::vector<Rdb_index_stats> stats;
+ std::transform(local_stats2store.begin(), local_stats2store.end(),
+ std::back_inserter(stats),
+ [](const std::pair<GL_INDEX_ID, Rdb_index_stats> &s) {
+ return s.second;
+ });
+ m_dict->add_stats(wb.get(), stats);
+ m_dict->commit(wb.get(), sync);
+}
+
+/*
+ Put table definition of `tbl` into the mapping, and also write it to the
+ on-disk data dictionary.
+*/
+
+int Rdb_ddl_manager::put_and_write(Rdb_tbl_def *const tbl,
+ rocksdb::WriteBatch *const batch) {
+ Rdb_buf_writer<FN_LEN * 2 + Rdb_key_def::INDEX_NUMBER_SIZE> buf_writer;
+
+ buf_writer.write_index(Rdb_key_def::DDL_ENTRY_INDEX_START_NUMBER);
+
+ const std::string &dbname_tablename = tbl->full_tablename();
+ buf_writer.write(dbname_tablename.c_str(), dbname_tablename.size());
+
+ int res;
+ if ((res = tbl->put_dict(m_dict, batch, buf_writer.to_slice()))) {
+ return res;
+ }
+ if ((res = put(tbl))) {
+ return res;
+ }
+ return HA_EXIT_SUCCESS;
+}
+
+/* Return 0 - ok, other value - error */
+/* TODO:
+ This function modifies m_ddl_map and m_index_num_to_keydef.
+ However, these changes need to be reversed if dict_manager.commit fails
+ See the discussion here: https://reviews.facebook.net/D35925#inline-259167
+ Tracked by https://github.com/facebook/mysql-5.6/issues/33
+*/
+int Rdb_ddl_manager::put(Rdb_tbl_def *const tbl, const bool lock) {
+ Rdb_tbl_def *rec;
+ const std::string &dbname_tablename = tbl->full_tablename();
+
+ if (lock) mysql_rwlock_wrlock(&m_rwlock);
+
+ // We have to do this find because 'tbl' is not yet in the list. We need
+ // to find the one we are replacing ('rec')
+ rec = find(dbname_tablename, false);
+ if (rec) {
+ // Free the old record.
+ delete rec;
+ m_ddl_map.erase(dbname_tablename);
+ }
+ m_ddl_map.emplace(dbname_tablename, tbl);
+
+ for (uint keyno = 0; keyno < tbl->m_key_count; keyno++) {
+ m_index_num_to_keydef[tbl->m_key_descr_arr[keyno]->get_gl_index_id()] =
+ std::make_pair(dbname_tablename, keyno);
+ }
+ tbl->check_and_set_read_free_rpl_table();
+
+ if (lock) mysql_rwlock_unlock(&m_rwlock);
+ return 0;
+}
+
+void Rdb_ddl_manager::remove(Rdb_tbl_def *const tbl,
+ rocksdb::WriteBatch *const batch,
+ const bool lock) {
+ if (lock) mysql_rwlock_wrlock(&m_rwlock);
+
+ Rdb_buf_writer<FN_LEN * 2 + Rdb_key_def::INDEX_NUMBER_SIZE> key_writer;
+ key_writer.write_index(Rdb_key_def::DDL_ENTRY_INDEX_START_NUMBER);
+ const std::string &dbname_tablename = tbl->full_tablename();
+ key_writer.write(dbname_tablename.c_str(), dbname_tablename.size());
+
+ m_dict->delete_key(batch, key_writer.to_slice());
+
+ const auto it = m_ddl_map.find(dbname_tablename);
+ if (it != m_ddl_map.end()) {
+ // Free Rdb_tbl_def
+ delete it->second;
+
+ m_ddl_map.erase(it);
+ }
+
+ if (lock) mysql_rwlock_unlock(&m_rwlock);
+}
+
+bool Rdb_ddl_manager::rename(const std::string &from, const std::string &to,
+ rocksdb::WriteBatch *const batch) {
+ Rdb_tbl_def *rec;
+ Rdb_tbl_def *new_rec;
+ bool res = true;
+ Rdb_buf_writer<FN_LEN * 2 + Rdb_key_def::INDEX_NUMBER_SIZE> new_buf_writer;
+
+ mysql_rwlock_wrlock(&m_rwlock);
+ if (!(rec = find(from, false))) {
+ mysql_rwlock_unlock(&m_rwlock);
+ return true;
+ }
+
+ new_rec = new Rdb_tbl_def(to);
+
+ new_rec->m_key_count = rec->m_key_count;
+ new_rec->m_auto_incr_val =
+ rec->m_auto_incr_val.load(std::memory_order_relaxed);
+ new_rec->m_key_descr_arr = rec->m_key_descr_arr;
+
+ new_rec->m_hidden_pk_val =
+ rec->m_hidden_pk_val.load(std::memory_order_relaxed);
+
+ // so that it's not free'd when deleting the old rec
+ rec->m_key_descr_arr = nullptr;
+
+ // Create a new key
+ new_buf_writer.write_index(Rdb_key_def::DDL_ENTRY_INDEX_START_NUMBER);
+
+ const std::string &dbname_tablename = new_rec->full_tablename();
+ new_buf_writer.write(dbname_tablename.c_str(), dbname_tablename.size());
+
+ // Create a key to add
+ if (!new_rec->put_dict(m_dict, batch, new_buf_writer.to_slice())) {
+ remove(rec, batch, false);
+ put(new_rec, false);
+ res = false; // ok
+ }
+
+ mysql_rwlock_unlock(&m_rwlock);
+ return res;
+}
+
+void Rdb_ddl_manager::cleanup() {
+ for (const auto &kv : m_ddl_map) {
+ delete kv.second;
+ }
+ m_ddl_map.clear();
+
+ mysql_rwlock_destroy(&m_rwlock);
+ m_sequence.cleanup();
+}
+
+int Rdb_ddl_manager::scan_for_tables(Rdb_tables_scanner *const tables_scanner) {
+ int ret;
+ Rdb_tbl_def *rec;
+
+ DBUG_ASSERT(tables_scanner != nullptr);
+
+ mysql_rwlock_rdlock(&m_rwlock);
+
+ ret = 0;
+
+ for (const auto &kv : m_ddl_map) {
+ rec = kv.second;
+ ret = tables_scanner->add_table(rec);
+ if (ret) break;
+ }
+
+ mysql_rwlock_unlock(&m_rwlock);
+ return ret;
+}
+
+/*
+ Rdb_binlog_manager class implementation
+*/
+
+bool Rdb_binlog_manager::init(Rdb_dict_manager *const dict_arg) {
+ DBUG_ASSERT(dict_arg != nullptr);
+ m_dict = dict_arg;
+
+ m_key_writer.reset();
+ m_key_writer.write_index(Rdb_key_def::BINLOG_INFO_INDEX_NUMBER);
+ m_key_slice = m_key_writer.to_slice();
+ return false;
+}
+
+void Rdb_binlog_manager::cleanup() {}
+
+/**
+ Set binlog name, pos and optionally gtid into WriteBatch.
+ This function should be called as part of transaction commit,
+ since binlog info is set only at transaction commit.
+ Actual write into RocksDB is not done here, so checking if
+ write succeeded or not is not possible here.
+ @param binlog_name Binlog name
+ @param binlog_pos Binlog pos
+ @param batch WriteBatch
+*/
+void Rdb_binlog_manager::update(const char *const binlog_name,
+ const my_off_t binlog_pos,
+ rocksdb::WriteBatchBase *const batch) {
+ if (binlog_name && binlog_pos) {
+ // max binlog length (512) + binlog pos (4) + binlog gtid (57) < 1024
+ const size_t RDB_MAX_BINLOG_INFO_LEN = 1024;
+ Rdb_buf_writer<RDB_MAX_BINLOG_INFO_LEN> value_writer;
+
+ // store version
+ value_writer.write_uint16(Rdb_key_def::BINLOG_INFO_INDEX_NUMBER_VERSION);
+
+ // store binlog file name length
+ DBUG_ASSERT(strlen(binlog_name) <= FN_REFLEN);
+ const uint16_t binlog_name_len = strlen(binlog_name);
+ value_writer.write_uint16(binlog_name_len);
+
+ // store binlog file name
+ value_writer.write(binlog_name, binlog_name_len);
+
+ // store binlog pos
+ value_writer.write_uint32(binlog_pos);
+
+#ifdef MARIADB_MERGE_2019
+ // store binlog gtid length.
+ // If gtid was not set, store 0 instead
+ const uint16_t binlog_max_gtid_len =
+ binlog_max_gtid ? strlen(binlog_max_gtid) : 0;
+ value_writer.write_uint16(binlog_max_gtid_len);
+
+ if (binlog_max_gtid_len > 0) {
+ // store binlog gtid
+ value_writer.write(binlog_max_gtid, binlog_max_gtid_len);
+ }
+#endif
+
+ m_dict->put_key(batch, m_key_slice, value_writer.to_slice());
+ }
+}
+
+/**
+ Read binlog committed entry stored in RocksDB, then unpack
+ @param[OUT] binlog_name Binlog name
+ @param[OUT] binlog_pos Binlog pos
+ @param[OUT] binlog_gtid Binlog GTID
+ @return
+ true is binlog info was found (valid behavior)
+ false otherwise
+*/
+bool Rdb_binlog_manager::read(char *const binlog_name,
+ my_off_t *const binlog_pos,
+ char *const binlog_gtid) const {
+ bool ret = false;
+ if (binlog_name) {
+ std::string value;
+ rocksdb::Status status = m_dict->get_value(m_key_slice, &value);
+ if (status.ok()) {
+ if (!unpack_value((const uchar *)value.c_str(), value.size(), binlog_name, binlog_pos,
+ binlog_gtid)) {
+ ret = true;
+ }
+ }
+ }
+ return ret;
+}
+
+/**
+ Unpack value then split into binlog_name, binlog_pos (and binlog_gtid)
+ @param[IN] value Binlog state info fetched from RocksDB
+ @param[OUT] binlog_name Binlog name
+ @param[OUT] binlog_pos Binlog pos
+ @param[OUT] binlog_gtid Binlog GTID
+ @return true on error
+*/
+bool Rdb_binlog_manager::unpack_value(const uchar *const value,
+ size_t value_size_arg,
+ char *const binlog_name,
+ my_off_t *const binlog_pos,
+ char *const binlog_gtid) const {
+ uint pack_len = 0;
+ intmax_t value_size= value_size_arg;
+
+ DBUG_ASSERT(binlog_pos != nullptr);
+
+ if ((value_size -= Rdb_key_def::VERSION_SIZE) < 0)
+ return true;
+ // read version
+ const uint16_t version = rdb_netbuf_to_uint16(value);
+
+ pack_len += Rdb_key_def::VERSION_SIZE;
+ if (version != Rdb_key_def::BINLOG_INFO_INDEX_NUMBER_VERSION) return true;
+
+ if ((value_size -= sizeof(uint16)) < 0)
+ return true;
+
+ // read binlog file name length
+ const uint16_t binlog_name_len = rdb_netbuf_to_uint16(value + pack_len);
+ pack_len += sizeof(uint16);
+
+ if (binlog_name_len >= (FN_REFLEN+1))
+ return true;
+
+ if ((value_size -= binlog_name_len) < 0)
+ return true;
+
+ if (binlog_name_len) {
+ // read and set binlog name
+ memcpy(binlog_name, value + pack_len, binlog_name_len);
+ binlog_name[binlog_name_len] = '\0';
+ pack_len += binlog_name_len;
+
+ if ((value_size -= sizeof(uint32)) < 0)
+ return true;
+ // read and set binlog pos
+ *binlog_pos = rdb_netbuf_to_uint32(value + pack_len);
+ pack_len += sizeof(uint32);
+
+ if ((value_size -= sizeof(uint16)) < 0)
+ return true;
+ // read gtid length
+ const uint16_t binlog_gtid_len = rdb_netbuf_to_uint16(value + pack_len);
+ pack_len += sizeof(uint16);
+
+ if (binlog_gtid_len >= GTID_BUF_LEN)
+ return true;
+ if ((value_size -= binlog_gtid_len) < 0)
+ return true;
+
+ if (binlog_gtid && binlog_gtid_len > 0) {
+ // read and set gtid
+ memcpy(binlog_gtid, value + pack_len, binlog_gtid_len);
+ binlog_gtid[binlog_gtid_len] = '\0';
+ pack_len += binlog_gtid_len;
+ }
+ }
+ return false;
+}
+
+/**
+ Inserts a row into mysql.slave_gtid_info table. Doing this inside
+ storage engine is more efficient than inserting/updating through MySQL.
+
+ @param[IN] id Primary key of the table.
+ @param[IN] db Database name. This is column 2 of the table.
+ @param[IN] gtid Gtid in human readable form. This is column 3 of the table.
+ @param[IN] write_batch Handle to storage engine writer.
+*/
+void Rdb_binlog_manager::update_slave_gtid_info(
+ const uint id, const char *const db, const char *const gtid,
+ rocksdb::WriteBatchBase *const write_batch) {
+ if (id && db && gtid) {
+ // Make sure that if the slave_gtid_info table exists we have a
+ // pointer to it via m_slave_gtid_info_tbl.
+ if (!m_slave_gtid_info_tbl.load()) {
+ m_slave_gtid_info_tbl.store(
+ rdb_get_ddl_manager()->find("mysql.slave_gtid_info"));
+ }
+ if (!m_slave_gtid_info_tbl.load()) {
+ // slave_gtid_info table is not present. Simply return.
+ return;
+ }
+ DBUG_ASSERT(m_slave_gtid_info_tbl.load()->m_key_count == 1);
+
+ const std::shared_ptr<const Rdb_key_def> &kd =
+ m_slave_gtid_info_tbl.load()->m_key_descr_arr[0];
+ String value;
+
+ // Build key
+ Rdb_buf_writer<Rdb_key_def::INDEX_NUMBER_SIZE + 4> key_writer;
+ key_writer.write_index(kd->get_index_number());
+ key_writer.write_uint32(id);
+
+ // Build value
+ Rdb_buf_writer<128> value_writer;
+ DBUG_ASSERT(gtid);
+ const uint db_len = strlen(db);
+ const uint gtid_len = strlen(gtid);
+ // 1 byte used for flags. Empty here.
+ value_writer.write_byte(0);
+
+ // Write column 1.
+ DBUG_ASSERT(strlen(db) <= 64);
+ value_writer.write_byte(db_len);
+ value_writer.write(db, db_len);
+
+ // Write column 2.
+ DBUG_ASSERT(gtid_len <= 56);
+ value_writer.write_byte(gtid_len);
+ value_writer.write(gtid, gtid_len);
+
+ write_batch->Put(kd->get_cf(), key_writer.to_slice(),
+ value_writer.to_slice());
+ }
+}
+
+bool Rdb_dict_manager::init(rocksdb::TransactionDB *const rdb_dict,
+ Rdb_cf_manager *const cf_manager) {
+ DBUG_ASSERT(rdb_dict != nullptr);
+ DBUG_ASSERT(cf_manager != nullptr);
+
+ mysql_mutex_init(0, &m_mutex, MY_MUTEX_INIT_FAST);
+
+ m_db = rdb_dict;
+
+ m_system_cfh = cf_manager->get_or_create_cf(m_db, DEFAULT_SYSTEM_CF_NAME);
+ rocksdb::ColumnFamilyHandle *default_cfh =
+ cf_manager->get_cf(DEFAULT_CF_NAME);
+
+ // System CF and default CF should be initialized
+ if (m_system_cfh == nullptr || default_cfh == nullptr) {
+ return HA_EXIT_FAILURE;
+ }
+
+ rdb_netbuf_store_index(m_key_buf_max_index_id, Rdb_key_def::MAX_INDEX_ID);
+
+ m_key_slice_max_index_id =
+ rocksdb::Slice(reinterpret_cast<char *>(m_key_buf_max_index_id),
+ Rdb_key_def::INDEX_NUMBER_SIZE);
+
+ resume_drop_indexes();
+ rollback_ongoing_index_creation();
+
+ // Initialize system CF and default CF flags
+ const std::unique_ptr<rocksdb::WriteBatch> wb = begin();
+ rocksdb::WriteBatch *const batch = wb.get();
+
+ add_cf_flags(batch, m_system_cfh->GetID(), 0);
+ add_cf_flags(batch, default_cfh->GetID(), 0);
+ commit(batch);
+
+ return HA_EXIT_SUCCESS;
+}
+
+std::unique_ptr<rocksdb::WriteBatch> Rdb_dict_manager::begin() const {
+ return std::unique_ptr<rocksdb::WriteBatch>(new rocksdb::WriteBatch);
+}
+
+void Rdb_dict_manager::put_key(rocksdb::WriteBatchBase *const batch,
+ const rocksdb::Slice &key,
+ const rocksdb::Slice &value) const {
+ batch->Put(m_system_cfh, key, value);
+}
+
+rocksdb::Status Rdb_dict_manager::get_value(const rocksdb::Slice &key,
+ std::string *const value) const {
+ rocksdb::ReadOptions options;
+ options.total_order_seek = true;
+ return m_db->Get(options, m_system_cfh, key, value);
+}
+
+void Rdb_dict_manager::delete_key(rocksdb::WriteBatchBase *batch,
+ const rocksdb::Slice &key) const {
+ batch->Delete(m_system_cfh, key);
+}
+
+rocksdb::Iterator *Rdb_dict_manager::new_iterator() const {
+ /* Reading data dictionary should always skip bloom filter */
+ rocksdb::ReadOptions read_options;
+ read_options.total_order_seek = true;
+ return m_db->NewIterator(read_options, m_system_cfh);
+}
+
+int Rdb_dict_manager::commit(rocksdb::WriteBatch *const batch,
+ const bool sync) const {
+ if (!batch) return HA_ERR_ROCKSDB_COMMIT_FAILED;
+ int res = HA_EXIT_SUCCESS;
+ rocksdb::WriteOptions options;
+ options.sync = sync;
+ rocksdb::TransactionDBWriteOptimizations optimize;
+ optimize.skip_concurrency_control = true;
+ rocksdb::Status s = m_db->Write(options, optimize, batch);
+ res = !s.ok(); // we return true when something failed
+ if (res) {
+ rdb_handle_io_error(s, RDB_IO_ERROR_DICT_COMMIT);
+ }
+ batch->Clear();
+ return res;
+}
+
+void Rdb_dict_manager::dump_index_id(uchar *const netbuf,
+ Rdb_key_def::DATA_DICT_TYPE dict_type,
+ const GL_INDEX_ID &gl_index_id) {
+ rdb_netbuf_store_uint32(netbuf, dict_type);
+ rdb_netbuf_store_uint32(netbuf + Rdb_key_def::INDEX_NUMBER_SIZE,
+ gl_index_id.cf_id);
+ rdb_netbuf_store_uint32(netbuf + 2 * Rdb_key_def::INDEX_NUMBER_SIZE,
+ gl_index_id.index_id);
+}
+
+void Rdb_dict_manager::delete_with_prefix(
+ rocksdb::WriteBatch *const batch, Rdb_key_def::DATA_DICT_TYPE dict_type,
+ const GL_INDEX_ID &gl_index_id) const {
+ Rdb_buf_writer<Rdb_key_def::INDEX_NUMBER_SIZE * 3> key_writer;
+ dump_index_id(&key_writer, dict_type, gl_index_id);
+
+ delete_key(batch, key_writer.to_slice());
+}
+
+void Rdb_dict_manager::add_or_update_index_cf_mapping(
+ rocksdb::WriteBatch *batch, struct Rdb_index_info *const index_info) const {
+ Rdb_buf_writer<Rdb_key_def::INDEX_NUMBER_SIZE * 3> key_writer;
+ dump_index_id(&key_writer, Rdb_key_def::INDEX_INFO,
+ index_info->m_gl_index_id);
+
+ Rdb_buf_writer<256> value_writer;
+
+ value_writer.write_uint16(Rdb_key_def::INDEX_INFO_VERSION_LATEST);
+ value_writer.write_byte(index_info->m_index_type);
+ value_writer.write_uint16(index_info->m_kv_version);
+ value_writer.write_uint32(index_info->m_index_flags);
+ value_writer.write_uint64(index_info->m_ttl_duration);
+
+ batch->Put(m_system_cfh, key_writer.to_slice(), value_writer.to_slice());
+}
+
+void Rdb_dict_manager::add_cf_flags(rocksdb::WriteBatch *const batch,
+ const uint32_t cf_id,
+ const uint32_t cf_flags) const {
+ DBUG_ASSERT(batch != nullptr);
+
+ Rdb_buf_writer<Rdb_key_def::INDEX_NUMBER_SIZE * 2> key_writer;
+ key_writer.write_uint32(Rdb_key_def::CF_DEFINITION);
+ key_writer.write_uint32(cf_id);
+
+ Rdb_buf_writer<Rdb_key_def::VERSION_SIZE + Rdb_key_def::INDEX_NUMBER_SIZE>
+ value_writer;
+ value_writer.write_uint16(Rdb_key_def::CF_DEFINITION_VERSION);
+ value_writer.write_uint32(cf_flags);
+
+ batch->Put(m_system_cfh, key_writer.to_slice(), value_writer.to_slice());
+}
+
+void Rdb_dict_manager::delete_index_info(rocksdb::WriteBatch *batch,
+ const GL_INDEX_ID &gl_index_id) const {
+ delete_with_prefix(batch, Rdb_key_def::INDEX_INFO, gl_index_id);
+ delete_with_prefix(batch, Rdb_key_def::INDEX_STATISTICS, gl_index_id);
+ delete_with_prefix(batch, Rdb_key_def::AUTO_INC, gl_index_id);
+}
+
+bool Rdb_dict_manager::get_index_info(
+ const GL_INDEX_ID &gl_index_id,
+ struct Rdb_index_info *const index_info) const {
+ if (index_info) {
+ index_info->m_gl_index_id = gl_index_id;
+ }
+
+ bool found = false;
+ bool error = false;
+ std::string value;
+ Rdb_buf_writer<Rdb_key_def::INDEX_NUMBER_SIZE * 3> key_writer;
+ dump_index_id(&key_writer, Rdb_key_def::INDEX_INFO, gl_index_id);
+
+ const rocksdb::Status &status = get_value(key_writer.to_slice(), &value);
+ if (status.ok()) {
+ if (!index_info) {
+ return true;
+ }
+
+ const uchar *const val = (const uchar *)value.c_str();
+ const uchar *ptr = val;
+ index_info->m_index_dict_version = rdb_netbuf_to_uint16(val);
+ ptr += RDB_SIZEOF_INDEX_INFO_VERSION;
+
+ switch (index_info->m_index_dict_version) {
+ case Rdb_key_def::INDEX_INFO_VERSION_FIELD_FLAGS:
+ /* Sanity check to prevent reading bogus TTL record. */
+ if (value.size() != RDB_SIZEOF_INDEX_INFO_VERSION +
+ RDB_SIZEOF_INDEX_TYPE + RDB_SIZEOF_KV_VERSION +
+ RDB_SIZEOF_INDEX_FLAGS +
+ ROCKSDB_SIZEOF_TTL_RECORD) {
+ error = true;
+ break;
+ }
+ index_info->m_index_type = rdb_netbuf_to_byte(ptr);
+ ptr += RDB_SIZEOF_INDEX_TYPE;
+ index_info->m_kv_version = rdb_netbuf_to_uint16(ptr);
+ ptr += RDB_SIZEOF_KV_VERSION;
+ index_info->m_index_flags = rdb_netbuf_to_uint32(ptr);
+ ptr += RDB_SIZEOF_INDEX_FLAGS;
+ index_info->m_ttl_duration = rdb_netbuf_to_uint64(ptr);
+ found = true;
+ break;
+
+ case Rdb_key_def::INDEX_INFO_VERSION_TTL:
+ /* Sanity check to prevent reading bogus into TTL record. */
+ if (value.size() != RDB_SIZEOF_INDEX_INFO_VERSION +
+ RDB_SIZEOF_INDEX_TYPE + RDB_SIZEOF_KV_VERSION +
+ ROCKSDB_SIZEOF_TTL_RECORD) {
+ error = true;
+ break;
+ }
+ index_info->m_index_type = rdb_netbuf_to_byte(ptr);
+ ptr += RDB_SIZEOF_INDEX_TYPE;
+ index_info->m_kv_version = rdb_netbuf_to_uint16(ptr);
+ ptr += RDB_SIZEOF_KV_VERSION;
+ index_info->m_ttl_duration = rdb_netbuf_to_uint64(ptr);
+ if ((index_info->m_kv_version ==
+ Rdb_key_def::PRIMARY_FORMAT_VERSION_TTL) &&
+ index_info->m_ttl_duration > 0) {
+ index_info->m_index_flags = Rdb_key_def::TTL_FLAG;
+ }
+ found = true;
+ break;
+
+ case Rdb_key_def::INDEX_INFO_VERSION_VERIFY_KV_FORMAT:
+ case Rdb_key_def::INDEX_INFO_VERSION_GLOBAL_ID:
+ index_info->m_index_type = rdb_netbuf_to_byte(ptr);
+ ptr += RDB_SIZEOF_INDEX_TYPE;
+ index_info->m_kv_version = rdb_netbuf_to_uint16(ptr);
+ found = true;
+ break;
+
+ default:
+ error = true;
+ break;
+ }
+
+ switch (index_info->m_index_type) {
+ case Rdb_key_def::INDEX_TYPE_PRIMARY:
+ case Rdb_key_def::INDEX_TYPE_HIDDEN_PRIMARY: {
+ error = index_info->m_kv_version >
+ Rdb_key_def::PRIMARY_FORMAT_VERSION_LATEST;
+ break;
+ }
+ case Rdb_key_def::INDEX_TYPE_SECONDARY:
+ error = index_info->m_kv_version >
+ Rdb_key_def::SECONDARY_FORMAT_VERSION_LATEST;
+ break;
+ default:
+ error = true;
+ break;
+ }
+ }
+
+ if (error) {
+ // NO_LINT_DEBUG
+ sql_print_error(
+ "RocksDB: Found invalid key version number (%u, %u, %u, %llu) "
+ "from data dictionary. This should never happen "
+ "and it may be a bug.",
+ index_info->m_index_dict_version, index_info->m_index_type,
+ index_info->m_kv_version, index_info->m_ttl_duration);
+ abort();
+ }
+
+ return found;
+}
+
+bool Rdb_dict_manager::get_cf_flags(const uint32_t cf_id,
+ uint32_t *const cf_flags) const {
+ DBUG_ASSERT(cf_flags != nullptr);
+
+ bool found = false;
+ std::string value;
+ Rdb_buf_writer<Rdb_key_def::INDEX_NUMBER_SIZE * 2> key_writer;
+
+ key_writer.write_uint32(Rdb_key_def::CF_DEFINITION);
+ key_writer.write_uint32(cf_id);
+
+ const rocksdb::Status status = get_value(key_writer.to_slice(), &value);
+
+ if (status.ok()) {
+ const uchar *val = (const uchar *)value.c_str();
+ DBUG_ASSERT(val);
+
+ const uint16_t version = rdb_netbuf_to_uint16(val);
+
+ if (version == Rdb_key_def::CF_DEFINITION_VERSION) {
+ *cf_flags = rdb_netbuf_to_uint32(val + Rdb_key_def::VERSION_SIZE);
+ found = true;
+ }
+ }
+
+ return found;
+}
+
+/*
+ Returning index ids that were marked as deleted (via DROP TABLE) but
+ still not removed by drop_index_thread yet, or indexes that are marked as
+ ongoing creation.
+ */
+void Rdb_dict_manager::get_ongoing_index_operation(
+ std::unordered_set<GL_INDEX_ID> *gl_index_ids,
+ Rdb_key_def::DATA_DICT_TYPE dd_type) const {
+ DBUG_ASSERT(dd_type == Rdb_key_def::DDL_DROP_INDEX_ONGOING ||
+ dd_type == Rdb_key_def::DDL_CREATE_INDEX_ONGOING);
+
+ Rdb_buf_writer<Rdb_key_def::INDEX_NUMBER_SIZE> index_writer;
+ index_writer.write_uint32(dd_type);
+ const rocksdb::Slice index_slice = index_writer.to_slice();
+
+ rocksdb::Iterator *it = new_iterator();
+ for (it->Seek(index_slice); it->Valid(); it->Next()) {
+ rocksdb::Slice key = it->key();
+ const uchar *const ptr = (const uchar *)key.data();
+
+ /*
+ Ongoing drop/create index operations require key to be of the form:
+ dd_type + cf_id + index_id (== INDEX_NUMBER_SIZE * 3)
+
+ This may need to be changed in the future if we want to process a new
+ ddl_type with different format.
+ */
+ if (key.size() != Rdb_key_def::INDEX_NUMBER_SIZE * 3 ||
+ rdb_netbuf_to_uint32(ptr) != dd_type) {
+ break;
+ }
+
+ // We don't check version right now since currently we always store only
+ // Rdb_key_def::DDL_DROP_INDEX_ONGOING_VERSION = 1 as a value.
+ // If increasing version number, we need to add version check logic here.
+ GL_INDEX_ID gl_index_id;
+ gl_index_id.cf_id =
+ rdb_netbuf_to_uint32(ptr + Rdb_key_def::INDEX_NUMBER_SIZE);
+ gl_index_id.index_id =
+ rdb_netbuf_to_uint32(ptr + 2 * Rdb_key_def::INDEX_NUMBER_SIZE);
+ gl_index_ids->insert(gl_index_id);
+ }
+ delete it;
+}
+
+/*
+ Returning true if index_id is create/delete ongoing (undergoing creation or
+ marked as deleted via DROP TABLE but drop_index_thread has not wiped yet)
+ or not.
+ */
+bool Rdb_dict_manager::is_index_operation_ongoing(
+ const GL_INDEX_ID &gl_index_id, Rdb_key_def::DATA_DICT_TYPE dd_type) const {
+ DBUG_ASSERT(dd_type == Rdb_key_def::DDL_DROP_INDEX_ONGOING ||
+ dd_type == Rdb_key_def::DDL_CREATE_INDEX_ONGOING);
+
+ bool found = false;
+ std::string value;
+ Rdb_buf_writer<Rdb_key_def::INDEX_NUMBER_SIZE * 3> key_writer;
+ dump_index_id(&key_writer, dd_type, gl_index_id);
+
+ const rocksdb::Status status = get_value(key_writer.to_slice(), &value);
+ if (status.ok()) {
+ found = true;
+ }
+ return found;
+}
+
+/*
+ Adding index_id to data dictionary so that the index id is removed
+ by drop_index_thread, or to track online index creation.
+ */
+void Rdb_dict_manager::start_ongoing_index_operation(
+ rocksdb::WriteBatch *const batch, const GL_INDEX_ID &gl_index_id,
+ Rdb_key_def::DATA_DICT_TYPE dd_type) const {
+ DBUG_ASSERT(dd_type == Rdb_key_def::DDL_DROP_INDEX_ONGOING ||
+ dd_type == Rdb_key_def::DDL_CREATE_INDEX_ONGOING);
+
+ Rdb_buf_writer<Rdb_key_def::INDEX_NUMBER_SIZE * 3> key_writer;
+ Rdb_buf_writer<Rdb_key_def::VERSION_SIZE> value_writer;
+
+ dump_index_id(&key_writer, dd_type, gl_index_id);
+
+ // version as needed
+ if (dd_type == Rdb_key_def::DDL_DROP_INDEX_ONGOING) {
+ value_writer.write_uint16(Rdb_key_def::DDL_DROP_INDEX_ONGOING_VERSION);
+ } else {
+ value_writer.write_uint16(Rdb_key_def::DDL_CREATE_INDEX_ONGOING_VERSION);
+ }
+
+ batch->Put(m_system_cfh, key_writer.to_slice(), value_writer.to_slice());
+}
+
+/*
+ Removing index_id from data dictionary to confirm drop_index_thread
+ completed dropping entire key/values of the index_id
+ */
+void Rdb_dict_manager::end_ongoing_index_operation(
+ rocksdb::WriteBatch *const batch, const GL_INDEX_ID &gl_index_id,
+ Rdb_key_def::DATA_DICT_TYPE dd_type) const {
+ DBUG_ASSERT(dd_type == Rdb_key_def::DDL_DROP_INDEX_ONGOING ||
+ dd_type == Rdb_key_def::DDL_CREATE_INDEX_ONGOING);
+
+ delete_with_prefix(batch, dd_type, gl_index_id);
+}
+
+/*
+ Returning true if there is no target index ids to be removed
+ by drop_index_thread
+ */
+bool Rdb_dict_manager::is_drop_index_empty() const {
+ std::unordered_set<GL_INDEX_ID> gl_index_ids;
+ get_ongoing_drop_indexes(&gl_index_ids);
+ return gl_index_ids.empty();
+}
+
+/*
+ This function is supposed to be called by DROP TABLE. Logging messages
+ that dropping indexes started, and adding data dictionary so that
+ all associated indexes to be removed
+ */
+void Rdb_dict_manager::add_drop_table(
+ std::shared_ptr<Rdb_key_def> *const key_descr, const uint32 n_keys,
+ rocksdb::WriteBatch *const batch) const {
+ std::unordered_set<GL_INDEX_ID> dropped_index_ids;
+ for (uint32 i = 0; i < n_keys; i++) {
+ dropped_index_ids.insert(key_descr[i]->get_gl_index_id());
+ }
+
+ add_drop_index(dropped_index_ids, batch);
+}
+
+/*
+ Called during inplace index drop operations. Logging messages
+ that dropping indexes started, and adding data dictionary so that
+ all associated indexes to be removed
+ */
+void Rdb_dict_manager::add_drop_index(
+ const std::unordered_set<GL_INDEX_ID> &gl_index_ids,
+ rocksdb::WriteBatch *const batch) const {
+ for (const auto &gl_index_id : gl_index_ids) {
+ log_start_drop_index(gl_index_id, "Begin");
+ start_drop_index(batch, gl_index_id);
+ }
+}
+
+/*
+ Called during inplace index creation operations. Logging messages
+ that adding indexes started, and updates data dictionary with all associated
+ indexes to be added.
+ */
+void Rdb_dict_manager::add_create_index(
+ const std::unordered_set<GL_INDEX_ID> &gl_index_ids,
+ rocksdb::WriteBatch *const batch) const {
+ for (const auto &gl_index_id : gl_index_ids) {
+ // NO_LINT_DEBUG
+ sql_print_verbose_info("RocksDB: Begin index creation (%u,%u)",
+ gl_index_id.cf_id, gl_index_id.index_id);
+ start_create_index(batch, gl_index_id);
+ }
+}
+
+/*
+ This function is supposed to be called by drop_index_thread, when it
+ finished dropping any index, or at the completion of online index creation.
+ */
+void Rdb_dict_manager::finish_indexes_operation(
+ const std::unordered_set<GL_INDEX_ID> &gl_index_ids,
+ Rdb_key_def::DATA_DICT_TYPE dd_type) const {
+ DBUG_ASSERT(dd_type == Rdb_key_def::DDL_DROP_INDEX_ONGOING ||
+ dd_type == Rdb_key_def::DDL_CREATE_INDEX_ONGOING);
+
+ const std::unique_ptr<rocksdb::WriteBatch> wb = begin();
+ rocksdb::WriteBatch *const batch = wb.get();
+
+ std::unordered_set<GL_INDEX_ID> incomplete_create_indexes;
+ get_ongoing_create_indexes(&incomplete_create_indexes);
+
+ for (const auto &gl_index_id : gl_index_ids) {
+ if (is_index_operation_ongoing(gl_index_id, dd_type)) {
+ end_ongoing_index_operation(batch, gl_index_id, dd_type);
+
+ /*
+ Remove the corresponding incomplete create indexes from data
+ dictionary as well
+ */
+ if (dd_type == Rdb_key_def::DDL_DROP_INDEX_ONGOING) {
+ if (incomplete_create_indexes.count(gl_index_id)) {
+ end_ongoing_index_operation(batch, gl_index_id,
+ Rdb_key_def::DDL_CREATE_INDEX_ONGOING);
+ }
+ }
+ }
+
+ if (dd_type == Rdb_key_def::DDL_DROP_INDEX_ONGOING) {
+ delete_index_info(batch, gl_index_id);
+ }
+ }
+ commit(batch);
+}
+
+/*
+ This function is supposed to be called when initializing
+ Rdb_dict_manager (at startup). If there is any index ids that are
+ drop ongoing, printing out messages for diagnostics purposes.
+ */
+void Rdb_dict_manager::resume_drop_indexes() const {
+ std::unordered_set<GL_INDEX_ID> gl_index_ids;
+ get_ongoing_drop_indexes(&gl_index_ids);
+
+ uint max_index_id_in_dict = 0;
+ get_max_index_id(&max_index_id_in_dict);
+
+ for (const auto &gl_index_id : gl_index_ids) {
+ log_start_drop_index(gl_index_id, "Resume");
+ if (max_index_id_in_dict < gl_index_id.index_id) {
+ // NO_LINT_DEBUG
+ sql_print_error(
+ "RocksDB: Found max index id %u from data dictionary "
+ "but also found dropped index id (%u,%u) from drop_index "
+ "dictionary. This should never happen and is possibly a "
+ "bug.",
+ max_index_id_in_dict, gl_index_id.cf_id, gl_index_id.index_id);
+ abort();
+ }
+ }
+}
+
+void Rdb_dict_manager::rollback_ongoing_index_creation() const {
+ const std::unique_ptr<rocksdb::WriteBatch> wb = begin();
+ rocksdb::WriteBatch *const batch = wb.get();
+
+ std::unordered_set<GL_INDEX_ID> gl_index_ids;
+ get_ongoing_create_indexes(&gl_index_ids);
+
+ for (const auto &gl_index_id : gl_index_ids) {
+ // NO_LINT_DEBUG
+ sql_print_verbose_info("RocksDB: Removing incomplete create index (%u,%u)",
+ gl_index_id.cf_id, gl_index_id.index_id);
+
+ start_drop_index(batch, gl_index_id);
+ }
+
+ commit(batch);
+}
+
+void Rdb_dict_manager::log_start_drop_table(
+ const std::shared_ptr<Rdb_key_def> *const key_descr, const uint32 n_keys,
+ const char *const log_action) const {
+ for (uint32 i = 0; i < n_keys; i++) {
+ log_start_drop_index(key_descr[i]->get_gl_index_id(), log_action);
+ }
+}
+
+void Rdb_dict_manager::log_start_drop_index(GL_INDEX_ID gl_index_id,
+ const char *log_action) const {
+ struct Rdb_index_info index_info;
+ if (!get_index_info(gl_index_id, &index_info)) {
+ /*
+ If we don't find the index info, it could be that it's because it was a
+ partially created index that isn't in the data dictionary yet that needs
+ to be rolled back.
+ */
+ std::unordered_set<GL_INDEX_ID> incomplete_create_indexes;
+ get_ongoing_create_indexes(&incomplete_create_indexes);
+
+ if (!incomplete_create_indexes.count(gl_index_id)) {
+ /* If it's not a partially created index, something is very wrong. */
+ // NO_LINT_DEBUG
+ sql_print_error(
+ "RocksDB: Failed to get column family info "
+ "from index id (%u,%u). MyRocks data dictionary may "
+ "get corrupted.",
+ gl_index_id.cf_id, gl_index_id.index_id);
+ abort();
+ }
+ }
+}
+
+bool Rdb_dict_manager::get_max_index_id(uint32_t *const index_id) const {
+ bool found = false;
+ std::string value;
+
+ const rocksdb::Status status = get_value(m_key_slice_max_index_id, &value);
+ if (status.ok()) {
+ const uchar *const val = (const uchar *)value.c_str();
+ const uint16_t version = rdb_netbuf_to_uint16(val);
+ if (version == Rdb_key_def::MAX_INDEX_ID_VERSION) {
+ *index_id = rdb_netbuf_to_uint32(val + Rdb_key_def::VERSION_SIZE);
+ found = true;
+ }
+ }
+ return found;
+}
+
+bool Rdb_dict_manager::update_max_index_id(rocksdb::WriteBatch *const batch,
+ const uint32_t index_id) const {
+ DBUG_ASSERT(batch != nullptr);
+
+ uint32_t old_index_id = -1;
+ if (get_max_index_id(&old_index_id)) {
+ if (old_index_id > index_id) {
+ // NO_LINT_DEBUG
+ sql_print_error(
+ "RocksDB: Found max index id %u from data dictionary "
+ "but trying to update to older value %u. This should "
+ "never happen and possibly a bug.",
+ old_index_id, index_id);
+ return true;
+ }
+ }
+
+ Rdb_buf_writer<Rdb_key_def::VERSION_SIZE + Rdb_key_def::INDEX_NUMBER_SIZE>
+ value_writer;
+ value_writer.write_uint16(Rdb_key_def::MAX_INDEX_ID_VERSION);
+ value_writer.write_uint32(index_id);
+
+ batch->Put(m_system_cfh, m_key_slice_max_index_id, value_writer.to_slice());
+ return false;
+}
+
+void Rdb_dict_manager::add_stats(
+ rocksdb::WriteBatch *const batch,
+ const std::vector<Rdb_index_stats> &stats) const {
+ DBUG_ASSERT(batch != nullptr);
+
+ for (const auto &it : stats) {
+ Rdb_buf_writer<Rdb_key_def::INDEX_NUMBER_SIZE * 3> key_writer;
+ dump_index_id(&key_writer, Rdb_key_def::INDEX_STATISTICS, it.m_gl_index_id);
+
+ // IndexStats::materialize takes complete care of serialization including
+ // storing the version
+ const auto value =
+ Rdb_index_stats::materialize(std::vector<Rdb_index_stats>{it});
+
+ batch->Put(m_system_cfh, key_writer.to_slice(), value);
+ }
+}
+
+Rdb_index_stats Rdb_dict_manager::get_stats(GL_INDEX_ID gl_index_id) const {
+ Rdb_buf_writer<Rdb_key_def::INDEX_NUMBER_SIZE * 3> key_writer;
+ dump_index_id(&key_writer, Rdb_key_def::INDEX_STATISTICS, gl_index_id);
+
+ std::string value;
+ const rocksdb::Status status = get_value(key_writer.to_slice(), &value);
+ if (status.ok()) {
+ std::vector<Rdb_index_stats> v;
+ // unmaterialize checks if the version matches
+ if (Rdb_index_stats::unmaterialize(value, &v) == 0 && v.size() == 1) {
+ return v[0];
+ }
+ }
+
+ return Rdb_index_stats();
+}
+
+rocksdb::Status Rdb_dict_manager::put_auto_incr_val(
+ rocksdb::WriteBatchBase *batch, const GL_INDEX_ID &gl_index_id,
+ ulonglong val, bool overwrite) const {
+ Rdb_buf_writer<Rdb_key_def::INDEX_NUMBER_SIZE * 3> key_writer;
+ dump_index_id(&key_writer, Rdb_key_def::AUTO_INC, gl_index_id);
+
+ // Value is constructed by storing the version and the value.
+ Rdb_buf_writer<RDB_SIZEOF_AUTO_INCREMENT_VERSION +
+ ROCKSDB_SIZEOF_AUTOINC_VALUE>
+ value_writer;
+ value_writer.write_uint16(Rdb_key_def::AUTO_INCREMENT_VERSION);
+ value_writer.write_uint64(val);
+
+ if (overwrite) {
+ return batch->Put(m_system_cfh, key_writer.to_slice(),
+ value_writer.to_slice());
+ }
+ return batch->Merge(m_system_cfh, key_writer.to_slice(),
+ value_writer.to_slice());
+}
+
+bool Rdb_dict_manager::get_auto_incr_val(const GL_INDEX_ID &gl_index_id,
+ ulonglong *new_val) const {
+ Rdb_buf_writer<Rdb_key_def::INDEX_NUMBER_SIZE * 3> key_writer;
+ dump_index_id(&key_writer, Rdb_key_def::AUTO_INC, gl_index_id);
+
+ std::string value;
+ const rocksdb::Status status = get_value(key_writer.to_slice(), &value);
+
+ if (status.ok()) {
+ const uchar *const val = reinterpret_cast<const uchar *>(value.data());
+
+ if (rdb_netbuf_to_uint16(val) <= Rdb_key_def::AUTO_INCREMENT_VERSION) {
+ *new_val = rdb_netbuf_to_uint64(val + RDB_SIZEOF_AUTO_INCREMENT_VERSION);
+ return true;
+ }
+ }
+ return false;
+}
+
+uint Rdb_seq_generator::get_and_update_next_number(
+ Rdb_dict_manager *const dict) {
+ DBUG_ASSERT(dict != nullptr);
+
+ uint res;
+ RDB_MUTEX_LOCK_CHECK(m_mutex);
+
+ res = m_next_number++;
+
+ const std::unique_ptr<rocksdb::WriteBatch> wb = dict->begin();
+ rocksdb::WriteBatch *const batch = wb.get();
+
+ DBUG_ASSERT(batch != nullptr);
+ dict->update_max_index_id(batch, res);
+ dict->commit(batch);
+
+ RDB_MUTEX_UNLOCK_CHECK(m_mutex);
+
+ return res;
+}
+
+} // namespace myrocks
diff --git a/storage/rocksdb/rdb_datadic.h b/storage/rocksdb/rdb_datadic.h
new file mode 100644
index 00000000000..c349c527836
--- /dev/null
+++ b/storage/rocksdb/rdb_datadic.h
@@ -0,0 +1,1620 @@
+/*
+ Copyright (c) 2012,2013 Monty Program Ab
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111-1301 USA */
+#pragma once
+
+/* C++ standard header files */
+#include <cstdlib>
+#include <algorithm>
+#include <atomic>
+#include <map>
+#include <mutex>
+#include <string>
+#include <unordered_map>
+#include <unordered_set>
+#include <utility>
+#include <vector>
+#include <array>
+
+/* C standard header files */
+#ifndef _WIN32
+#include <arpa/inet.h>
+#endif
+
+/* MyRocks header files */
+#include "./ha_rocksdb.h"
+#include "./properties_collector.h"
+#include "./rdb_buff.h"
+#include "./rdb_utils.h"
+
+namespace myrocks {
+
+class Rdb_dict_manager;
+class Rdb_key_def;
+class Rdb_field_packing;
+class Rdb_cf_manager;
+class Rdb_ddl_manager;
+
+const uint32_t GTID_BUF_LEN = 60;
+
+class Rdb_convert_to_record_key_decoder {
+ public:
+ Rdb_convert_to_record_key_decoder() = default;
+ Rdb_convert_to_record_key_decoder(
+ const Rdb_convert_to_record_key_decoder &decoder) = delete;
+ Rdb_convert_to_record_key_decoder &operator=(
+ const Rdb_convert_to_record_key_decoder &decoder) = delete;
+ static int decode(uchar *const buf, uint *offset, Rdb_field_packing *fpi,
+ TABLE *table, Field *field, bool has_unpack_info,
+ Rdb_string_reader *reader,
+ Rdb_string_reader *unpack_reader);
+ static int skip(const Rdb_field_packing *fpi, const Field *field,
+ Rdb_string_reader *reader, Rdb_string_reader *unpack_reader);
+
+ private:
+ static int decode_field(Rdb_field_packing *fpi, Field *field,
+ Rdb_string_reader *reader,
+ const uchar *const default_value,
+ Rdb_string_reader *unpack_reader);
+};
+
+/*
+ @brief
+ Field packing context.
+ The idea is to ensure that a call to rdb_index_field_pack_t function
+ is followed by a call to rdb_make_unpack_info_t.
+
+ @detail
+ For some datatypes, unpack_info is produced as a side effect of
+ rdb_index_field_pack_t function call.
+ For other datatypes, packing is just calling make_sort_key(), while
+ rdb_make_unpack_info_t is a custom function.
+ In order to accommodate both cases, we require both calls to be made and
+ unpack_info is passed as context data between the two.
+*/
+class Rdb_pack_field_context {
+ public:
+ Rdb_pack_field_context(const Rdb_pack_field_context &) = delete;
+ Rdb_pack_field_context &operator=(const Rdb_pack_field_context &) = delete;
+
+ explicit Rdb_pack_field_context(Rdb_string_writer *const writer_arg)
+ : writer(writer_arg) {}
+
+ // NULL means we're not producing unpack_info.
+ Rdb_string_writer *writer;
+};
+
+class Rdb_key_field_iterator {
+ private:
+ Rdb_field_packing *m_pack_info;
+ int m_iter_index;
+ int m_iter_end;
+ TABLE *m_table;
+ Rdb_string_reader *m_reader;
+ Rdb_string_reader *m_unp_reader;
+ uint m_curr_bitmap_pos;
+ const MY_BITMAP *m_covered_bitmap;
+ uchar *m_buf;
+ bool m_has_unpack_info;
+ const Rdb_key_def *m_key_def;
+ bool m_secondary_key;
+ bool m_hidden_pk_exists;
+ bool m_is_hidden_pk;
+ bool m_is_null;
+ Field *m_field;
+ uint m_offset;
+ Rdb_field_packing *m_fpi;
+
+ public:
+ Rdb_key_field_iterator(const Rdb_key_field_iterator &) = delete;
+ Rdb_key_field_iterator &operator=(const Rdb_key_field_iterator &) = delete;
+ Rdb_key_field_iterator(const Rdb_key_def *key_def,
+ Rdb_field_packing *pack_info,
+ Rdb_string_reader *reader,
+ Rdb_string_reader *unp_reader, TABLE *table,
+ bool has_unpack_info, const MY_BITMAP *covered_bitmap,
+ uchar *buf);
+
+ int next();
+ bool has_next();
+ bool get_is_null() const;
+ Field *get_field() const;
+ int get_field_index() const;
+ void *get_dst() const;
+};
+
+struct Rdb_collation_codec;
+struct Rdb_index_info;
+
+/*
+ C-style "virtual table" allowing different handling of packing logic based
+ on the field type. See Rdb_field_packing::setup() implementation.
+ */
+using rdb_make_unpack_info_t = void (*)(const Rdb_collation_codec *codec,
+ const Field *field,
+ Rdb_pack_field_context *pack_ctx);
+using rdb_index_field_unpack_t = int (*)(Rdb_field_packing *fpi, Field *field,
+ uchar *field_ptr,
+ Rdb_string_reader *reader,
+ Rdb_string_reader *unpack_reader);
+using rdb_index_field_skip_t = int (*)(const Rdb_field_packing *fpi,
+ const Field *field,
+ Rdb_string_reader *reader);
+using rdb_index_field_pack_t = void (*)(Rdb_field_packing *fpi, Field *field,
+ uchar *buf, uchar **dst,
+ Rdb_pack_field_context *pack_ctx);
+
+const uint RDB_INVALID_KEY_LEN = uint(-1);
+
+/* How much one checksum occupies when stored in the record */
+const size_t RDB_CHECKSUM_SIZE = sizeof(uint32_t);
+
+/*
+ How much the checksum data occupies in record, in total.
+ It is storing two checksums plus 1 tag-byte.
+*/
+const size_t RDB_CHECKSUM_CHUNK_SIZE = 2 * RDB_CHECKSUM_SIZE + 1;
+
+/*
+ Checksum data starts from CHECKSUM_DATA_TAG which is followed by two CRC32
+ checksums.
+*/
+const char RDB_CHECKSUM_DATA_TAG = 0x01;
+
+/*
+ Unpack data is variable length. The header is 1 tag-byte plus a two byte
+ length field. The length field includes the header as well.
+*/
+const char RDB_UNPACK_DATA_TAG = 0x02;
+const size_t RDB_UNPACK_DATA_LEN_SIZE = sizeof(uint16_t);
+const size_t RDB_UNPACK_HEADER_SIZE =
+ sizeof(RDB_UNPACK_DATA_TAG) + RDB_UNPACK_DATA_LEN_SIZE;
+
+/*
+ This header format is 1 tag-byte plus a two byte length field plus a two byte
+ covered bitmap. The length field includes the header size.
+*/
+const char RDB_UNPACK_COVERED_DATA_TAG = 0x03;
+const size_t RDB_UNPACK_COVERED_DATA_LEN_SIZE = sizeof(uint16_t);
+const size_t RDB_COVERED_BITMAP_SIZE = sizeof(uint16_t);
+const size_t RDB_UNPACK_COVERED_HEADER_SIZE =
+ sizeof(RDB_UNPACK_COVERED_DATA_TAG) + RDB_UNPACK_COVERED_DATA_LEN_SIZE +
+ RDB_COVERED_BITMAP_SIZE;
+
+/*
+ Data dictionary index info field sizes.
+*/
+const size_t RDB_SIZEOF_INDEX_INFO_VERSION = sizeof(uint16);
+const size_t RDB_SIZEOF_INDEX_TYPE = sizeof(uchar);
+const size_t RDB_SIZEOF_KV_VERSION = sizeof(uint16);
+const size_t RDB_SIZEOF_INDEX_FLAGS = sizeof(uint32);
+const size_t RDB_SIZEOF_AUTO_INCREMENT_VERSION = sizeof(uint16);
+
+// Possible return values for rdb_index_field_unpack_t functions.
+enum {
+ UNPACK_SUCCESS = 0,
+ UNPACK_FAILURE = 1,
+};
+
+/*
+ An object of this class represents information about an index in an SQL
+ table. It provides services to encode and decode index tuples.
+
+ Note: a table (as in, on-disk table) has a single Rdb_key_def object which
+ is shared across multiple TABLE* objects and may be used simultaneously from
+ different threads.
+
+ There are several data encodings:
+
+ === SQL LAYER ===
+ SQL layer uses two encodings:
+
+ - "Table->record format". This is the format that is used for the data in
+ the record buffers, table->record[i]
+
+ - KeyTupleFormat (see opt_range.cc) - this is used in parameters to index
+ lookup functions, like handler::index_read_map().
+
+ === Inside RocksDB ===
+ Primary Key is stored as a mapping:
+
+ index_tuple -> StoredRecord
+
+ StoredRecord is in Table->record format, except for blobs, which are stored
+ in-place. See ha_rocksdb::convert_record_to_storage_format for details.
+
+ Secondary indexes are stored as one of two variants:
+
+ index_tuple -> unpack_info
+ index_tuple -> empty_string
+
+ index_tuple here is the form of key that can be compared with memcmp(), aka
+ "mem-comparable form".
+
+ unpack_info is extra data that allows to restore the original value from its
+ mem-comparable form. It is present only if the index supports index-only
+ reads.
+*/
+
+class Rdb_key_def {
+ public:
+ /* Convert a key from KeyTupleFormat to mem-comparable form */
+ uint pack_index_tuple(TABLE *const tbl, uchar *const pack_buffer,
+ uchar *const packed_tuple, const uchar *const key_tuple,
+ const key_part_map &keypart_map) const;
+
+ uchar *pack_field(Field *const field, Rdb_field_packing *pack_info,
+ uchar *tuple, uchar *const packed_tuple,
+ uchar *const pack_buffer,
+ Rdb_string_writer *const unpack_info,
+ uint *const n_null_fields) const;
+ /* Convert a key from Table->record format to mem-comparable form */
+ uint pack_record(const TABLE *const tbl, uchar *const pack_buffer,
+ const uchar *const record, uchar *const packed_tuple,
+ Rdb_string_writer *const unpack_info,
+ const bool should_store_row_debug_checksums,
+ const longlong hidden_pk_id = 0, uint n_key_parts = 0,
+ uint *const n_null_fields = nullptr,
+ const char *const ttl_bytes = nullptr) const;
+ /* Pack the hidden primary key into mem-comparable form. */
+ uint pack_hidden_pk(const longlong hidden_pk_id,
+ uchar *const packed_tuple) const;
+ int unpack_record(TABLE *const table, uchar *const buf,
+ const rocksdb::Slice *const packed_key,
+ const rocksdb::Slice *const unpack_info,
+ const bool verify_row_debug_checksums) const;
+
+ static bool unpack_info_has_checksum(const rocksdb::Slice &unpack_info);
+ int compare_keys(const rocksdb::Slice *key1, const rocksdb::Slice *key2,
+ std::size_t *const column_index) const;
+
+ size_t key_length(const TABLE *const table, const rocksdb::Slice &key) const;
+
+ /* Get the key that is the "infimum" for this index */
+ inline void get_infimum_key(uchar *const key, uint *const size) const {
+ rdb_netbuf_store_index(key, m_index_number);
+ *size = INDEX_NUMBER_SIZE;
+ }
+
+ /* Get the key that is a "supremum" for this index */
+ inline void get_supremum_key(uchar *const key, uint *const size) const {
+ rdb_netbuf_store_index(key, m_index_number + 1);
+ *size = INDEX_NUMBER_SIZE;
+ }
+
+ /*
+ Get the first key that you need to position at to start iterating.
+ Stores into *key a "supremum" or "infimum" key value for the index.
+ @parameters key OUT Big Endian, value is m_index_number or
+ m_index_number + 1
+ @parameters size OUT key size, value is INDEX_NUMBER_SIZE
+ @return Number of bytes in the key that are usable for bloom filter use.
+ */
+ inline int get_first_key(uchar *const key, uint *const size) const {
+ if (m_is_reverse_cf) {
+ get_supremum_key(key, size);
+ /* Find out how many bytes of infimum are the same as m_index_number */
+ uchar unmodified_key[INDEX_NUMBER_SIZE];
+ rdb_netbuf_store_index(unmodified_key, m_index_number);
+ int i;
+ for (i = 0; i < INDEX_NUMBER_SIZE; i++) {
+ if (key[i] != unmodified_key[i]) {
+ break;
+ }
+ }
+ return i;
+ } else {
+ get_infimum_key(key, size);
+ // For infimum key, its value will be m_index_number
+ // Thus return its own size instead.
+ return INDEX_NUMBER_SIZE;
+ }
+ }
+
+ /*
+ The same as get_first_key, but get the key for the last entry in the index
+ @parameters key OUT Big Endian, value is m_index_number or
+ m_index_number + 1
+ @parameters size OUT key size, value is INDEX_NUMBER_SIZE
+
+ @return Number of bytes in the key that are usable for bloom filter use.
+ */
+ inline int get_last_key(uchar *const key, uint *const size) const {
+ if (m_is_reverse_cf) {
+ get_infimum_key(key, size);
+ // For infimum key, its value will be m_index_number
+ // Thus return its own size instead.
+ return INDEX_NUMBER_SIZE;
+ } else {
+ get_supremum_key(key, size);
+ /* Find out how many bytes are the same as m_index_number */
+ uchar unmodified_key[INDEX_NUMBER_SIZE];
+ rdb_netbuf_store_index(unmodified_key, m_index_number);
+ int i;
+ for (i = 0; i < INDEX_NUMBER_SIZE; i++) {
+ if (key[i] != unmodified_key[i]) {
+ break;
+ }
+ }
+ return i;
+ }
+ }
+
+ /* Make a key that is right after the given key. */
+ static int successor(uchar *const packed_tuple, const uint len);
+
+ /* Make a key that is right before the given key. */
+ static int predecessor(uchar *const packed_tuple, const uint len);
+
+ /*
+ This can be used to compare prefixes.
+ if X is a prefix of Y, then we consider that X = Y.
+ */
+ // b describes the lookup key, which can be a prefix of a.
+ // b might be outside of the index_number range, if successor() is called.
+ int cmp_full_keys(const rocksdb::Slice &a, const rocksdb::Slice &b) const {
+ DBUG_ASSERT(covers_key(a));
+
+ return memcmp(a.data(), b.data(), std::min(a.size(), b.size()));
+ }
+
+ /* Check if given mem-comparable key belongs to this index */
+ bool covers_key(const rocksdb::Slice &slice) const {
+ if (slice.size() < INDEX_NUMBER_SIZE) return false;
+
+ if (memcmp(slice.data(), m_index_number_storage_form, INDEX_NUMBER_SIZE)) {
+ return false;
+ }
+
+ return true;
+ }
+
+ void get_lookup_bitmap(const TABLE *table, MY_BITMAP *map) const;
+
+ bool covers_lookup(const rocksdb::Slice *const unpack_info,
+ const MY_BITMAP *const map) const;
+
+ inline bool use_covered_bitmap_format() const {
+ return m_index_type == INDEX_TYPE_SECONDARY &&
+ m_kv_format_version >= SECONDARY_FORMAT_VERSION_UPDATE3;
+ }
+
+ /* Indicates that all key parts can be unpacked to cover a secondary lookup */
+ bool can_cover_lookup() const;
+
+ /*
+ Return true if the passed mem-comparable key
+ - is from this index, and
+ - it matches the passed key prefix (the prefix is also in mem-comparable
+ form)
+ */
+ bool value_matches_prefix(const rocksdb::Slice &value,
+ const rocksdb::Slice &prefix) const {
+ return covers_key(value) && !cmp_full_keys(value, prefix);
+ }
+
+ uint32 get_keyno() const { return m_keyno; }
+
+ uint32 get_index_number() const { return m_index_number; }
+
+ GL_INDEX_ID get_gl_index_id() const {
+ const GL_INDEX_ID gl_index_id = {m_cf_handle->GetID(), m_index_number};
+ return gl_index_id;
+ }
+
+ int read_memcmp_key_part(const TABLE *table_arg, Rdb_string_reader *reader,
+ const uint part_num) const;
+
+ /* Must only be called for secondary keys: */
+ uint get_primary_key_tuple(const TABLE *const tbl,
+ const Rdb_key_def &pk_descr,
+ const rocksdb::Slice *const key,
+ uchar *const pk_buffer) const;
+
+ uint get_memcmp_sk_parts(const TABLE *table, const rocksdb::Slice &key,
+ uchar *sk_buffer, uint *n_null_fields) const;
+
+ /* Return max length of mem-comparable form */
+ uint max_storage_fmt_length() const { return m_maxlength; }
+
+ uint get_key_parts() const { return m_key_parts; }
+
+ uint get_ttl_field_index() const { return m_ttl_field_index; }
+
+ /*
+ Get a field object for key part #part_no
+
+ @detail
+ SQL layer thinks unique secondary indexes and indexes in partitioned
+ tables are not "Extended" with Primary Key columns.
+
+ Internally, we always extend all indexes with PK columns. This function
+ uses our definition of how the index is Extended.
+ */
+ inline Field *get_table_field_for_part_no(TABLE *table, uint part_no) const;
+
+ const std::string &get_name() const { return m_name; }
+
+ const rocksdb::SliceTransform *get_extractor() const {
+ return m_prefix_extractor.get();
+ }
+
+ static size_t get_unpack_header_size(char tag);
+
+ Rdb_key_def &operator=(const Rdb_key_def &) = delete;
+ Rdb_key_def(const Rdb_key_def &k);
+ Rdb_key_def(uint indexnr_arg, uint keyno_arg,
+ rocksdb::ColumnFamilyHandle *cf_handle_arg,
+ uint16_t index_dict_version_arg, uchar index_type_arg,
+ uint16_t kv_format_version_arg, bool is_reverse_cf_arg,
+ bool is_per_partition_cf, const char *name,
+ Rdb_index_stats stats = Rdb_index_stats(), uint32 index_flags = 0,
+ uint32 ttl_rec_offset = UINT_MAX, uint64 ttl_duration = 0);
+ ~Rdb_key_def();
+
+ enum {
+ INDEX_NUMBER_SIZE = 4,
+ VERSION_SIZE = 2,
+ CF_NUMBER_SIZE = 4,
+ CF_FLAG_SIZE = 4,
+ PACKED_SIZE = 4, // one int
+ };
+
+ // bit flags for combining bools when writing to disk
+ enum {
+ REVERSE_CF_FLAG = 1,
+ AUTO_CF_FLAG = 2, // Deprecated
+ PER_PARTITION_CF_FLAG = 4,
+ };
+
+ // bit flags which denote myrocks specific fields stored in the record
+ // currently only used for TTL.
+ enum INDEX_FLAG {
+ TTL_FLAG = 1 << 0,
+
+ // MAX_FLAG marks where the actual record starts
+ // This flag always needs to be set to the last index flag enum.
+ MAX_FLAG = TTL_FLAG << 1,
+ };
+
+ // Set of flags to ignore when comparing two CF-s and determining if
+ // they're same.
+ static const uint CF_FLAGS_TO_IGNORE = PER_PARTITION_CF_FLAG;
+
+ // Data dictionary types
+ enum DATA_DICT_TYPE {
+ DDL_ENTRY_INDEX_START_NUMBER = 1,
+ INDEX_INFO = 2,
+ CF_DEFINITION = 3,
+ BINLOG_INFO_INDEX_NUMBER = 4,
+ DDL_DROP_INDEX_ONGOING = 5,
+ INDEX_STATISTICS = 6,
+ MAX_INDEX_ID = 7,
+ DDL_CREATE_INDEX_ONGOING = 8,
+ AUTO_INC = 9,
+ END_DICT_INDEX_ID = 255
+ };
+
+ // Data dictionary schema version. Introduce newer versions
+ // if changing schema layout
+ enum {
+ DDL_ENTRY_INDEX_VERSION = 1,
+ CF_DEFINITION_VERSION = 1,
+ BINLOG_INFO_INDEX_NUMBER_VERSION = 1,
+ DDL_DROP_INDEX_ONGOING_VERSION = 1,
+ MAX_INDEX_ID_VERSION = 1,
+ DDL_CREATE_INDEX_ONGOING_VERSION = 1,
+ AUTO_INCREMENT_VERSION = 1,
+ // Version for index stats is stored in IndexStats struct
+ };
+
+ // Index info version. Introduce newer versions when changing the
+ // INDEX_INFO layout. Update INDEX_INFO_VERSION_LATEST to point to the
+ // latest version number.
+ enum {
+ INDEX_INFO_VERSION_INITIAL = 1, // Obsolete
+ INDEX_INFO_VERSION_KV_FORMAT,
+ INDEX_INFO_VERSION_GLOBAL_ID,
+ // There is no change to data format in this version, but this version
+ // verifies KV format version, whereas previous versions do not. A version
+ // bump is needed to prevent older binaries from skipping the KV version
+ // check inadvertently.
+ INDEX_INFO_VERSION_VERIFY_KV_FORMAT,
+ // This changes the data format to include a 8 byte TTL duration for tables
+ INDEX_INFO_VERSION_TTL,
+ // This changes the data format to include a bitmap before the TTL duration
+ // which will indicate in the future whether TTL or other special fields
+ // are turned on or off.
+ INDEX_INFO_VERSION_FIELD_FLAGS,
+ // This normally point to the latest (currently it does).
+ INDEX_INFO_VERSION_LATEST = INDEX_INFO_VERSION_FIELD_FLAGS,
+ };
+
+ // MyRocks index types
+ enum {
+ INDEX_TYPE_PRIMARY = 1,
+ INDEX_TYPE_SECONDARY = 2,
+ INDEX_TYPE_HIDDEN_PRIMARY = 3,
+ };
+
+ // Key/Value format version for each index type
+ enum {
+ PRIMARY_FORMAT_VERSION_INITIAL = 10,
+ // This change includes:
+ // - For columns that can be unpacked with unpack_info, PK
+ // stores the unpack_info.
+ // - DECIMAL datatype is no longer stored in the row (because
+ // it can be decoded from its mem-comparable form)
+ // - VARCHAR-columns use endspace-padding.
+ PRIMARY_FORMAT_VERSION_UPDATE1 = 11,
+ // This change includes:
+ // - Binary encoded variable length fields have a new format that avoids
+ // an inefficient where data that was a multiple of 8 bytes in length
+ // had an extra 9 bytes of encoded data.
+ PRIMARY_FORMAT_VERSION_UPDATE2 = 12,
+ // This change includes support for TTL
+ // - This means that when TTL is specified for the table an 8-byte TTL
+ // field is prepended in front of each value.
+ PRIMARY_FORMAT_VERSION_TTL = 13,
+ PRIMARY_FORMAT_VERSION_LATEST = PRIMARY_FORMAT_VERSION_TTL,
+
+ SECONDARY_FORMAT_VERSION_INITIAL = 10,
+ // This change the SK format to include unpack_info.
+ SECONDARY_FORMAT_VERSION_UPDATE1 = 11,
+ // This change includes:
+ // - Binary encoded variable length fields have a new format that avoids
+ // an inefficient where data that was a multiple of 8 bytes in length
+ // had an extra 9 bytes of encoded data.
+ SECONDARY_FORMAT_VERSION_UPDATE2 = 12,
+ // This change includes support for TTL
+ // - This means that when TTL is specified for the table an 8-byte TTL
+ // field is prepended in front of each value.
+ SECONDARY_FORMAT_VERSION_TTL = 13,
+ SECONDARY_FORMAT_VERSION_LATEST = SECONDARY_FORMAT_VERSION_TTL,
+ // This change includes support for covering SK lookups for varchars. A
+ // 2-byte bitmap is added after the tag-byte to unpack_info only for
+ // records which have covered varchar columns. Currently waiting before
+ // enabling in prod.
+ SECONDARY_FORMAT_VERSION_UPDATE3 = 65535,
+ };
+
+ void setup(const TABLE *const table, const Rdb_tbl_def *const tbl_def);
+
+ static uint extract_ttl_duration(const TABLE *const table_arg,
+ const Rdb_tbl_def *const tbl_def_arg,
+ uint64 *ttl_duration);
+ static uint extract_ttl_col(const TABLE *const table_arg,
+ const Rdb_tbl_def *const tbl_def_arg,
+ std::string *ttl_column, uint *ttl_field_index,
+ bool skip_checks = false);
+ inline bool has_ttl() const { return m_ttl_duration > 0; }
+
+ static bool has_index_flag(uint32 index_flags, enum INDEX_FLAG flag);
+ static uint32 calculate_index_flag_offset(uint32 index_flags,
+ enum INDEX_FLAG flag,
+ uint *const field_length = nullptr);
+ void write_index_flag_field(Rdb_string_writer *const buf,
+ const uchar *const val,
+ enum INDEX_FLAG flag) const;
+
+ static const std::string gen_qualifier_for_table(
+ const char *const qualifier, const std::string &partition_name = "");
+ static const std::string gen_cf_name_qualifier_for_partition(
+ const std::string &s);
+ static const std::string gen_ttl_duration_qualifier_for_partition(
+ const std::string &s);
+ static const std::string gen_ttl_col_qualifier_for_partition(
+ const std::string &s);
+
+ static const std::string parse_comment_for_qualifier(
+ const std::string &comment, const TABLE *const table_arg,
+ const Rdb_tbl_def *const tbl_def_arg, bool *per_part_match_found,
+ const char *const qualifier);
+
+ rocksdb::ColumnFamilyHandle *get_cf() const { return m_cf_handle; }
+
+ /* Check if keypart #kp can be unpacked from index tuple */
+ inline bool can_unpack(const uint kp) const;
+ /* Check if keypart #kp needs unpack info */
+ inline bool has_unpack_info(const uint kp) const;
+
+ /* Check if given table has a primary key */
+ static bool table_has_hidden_pk(const TABLE *const table);
+
+ void report_checksum_mismatch(const bool is_key, const char *const data,
+ const size_t data_size) const;
+
+ /* Check if index is at least pk_min if it is a PK,
+ or at least sk_min if SK.*/
+ bool index_format_min_check(const int pk_min, const int sk_min) const;
+
+ static void pack_with_make_sort_key(
+ Rdb_field_packing *const fpi, Field *const field,
+ uchar *buf MY_ATTRIBUTE((__unused__)), uchar **dst,
+ Rdb_pack_field_context *const pack_ctx MY_ATTRIBUTE((__unused__)));
+
+ static void pack_with_varchar_encoding(
+ Rdb_field_packing *const fpi, Field *const field, uchar *buf, uchar **dst,
+ Rdb_pack_field_context *const pack_ctx MY_ATTRIBUTE((__unused__)));
+
+ static void pack_with_varchar_space_pad(
+ Rdb_field_packing *const fpi, Field *const field, uchar *buf, uchar **dst,
+ Rdb_pack_field_context *const pack_ctx);
+
+ static int unpack_integer(Rdb_field_packing *const fpi, Field *const field,
+ uchar *const to, Rdb_string_reader *const reader,
+ Rdb_string_reader *const unp_reader
+ MY_ATTRIBUTE((__unused__)));
+
+ static int unpack_double(
+ Rdb_field_packing *const fpi MY_ATTRIBUTE((__unused__)),
+ Field *const field MY_ATTRIBUTE((__unused__)), uchar *const field_ptr,
+ Rdb_string_reader *const reader,
+ Rdb_string_reader *const unp_reader MY_ATTRIBUTE((__unused__)));
+
+ static int unpack_float(
+ Rdb_field_packing *const fpi,
+ Field *const field MY_ATTRIBUTE((__unused__)), uchar *const field_ptr,
+ Rdb_string_reader *const reader,
+ Rdb_string_reader *const unp_reader MY_ATTRIBUTE((__unused__)));
+
+ static int unpack_binary_str(Rdb_field_packing *const fpi, Field *const field,
+ uchar *const to, Rdb_string_reader *const reader,
+ Rdb_string_reader *const unp_reader
+ MY_ATTRIBUTE((__unused__)));
+
+ static int unpack_binary_or_utf8_varchar(
+ Rdb_field_packing *const fpi, Field *const field, uchar *dst,
+ Rdb_string_reader *const reader,
+ Rdb_string_reader *const unp_reader MY_ATTRIBUTE((__unused__)));
+
+ static int unpack_binary_or_utf8_varchar_space_pad(
+ Rdb_field_packing *const fpi, Field *const field, uchar *dst,
+ Rdb_string_reader *const reader, Rdb_string_reader *const unp_reader);
+
+ static int unpack_newdate(
+ Rdb_field_packing *const fpi,
+ Field *const field MY_ATTRIBUTE((__unused__)), uchar *const field_ptr,
+ Rdb_string_reader *const reader,
+ Rdb_string_reader *const unp_reader MY_ATTRIBUTE((__unused__)));
+
+ static int unpack_utf8_str(Rdb_field_packing *const fpi, Field *const field,
+ uchar *dst, Rdb_string_reader *const reader,
+ Rdb_string_reader *const unp_reader
+ MY_ATTRIBUTE((__unused__)));
+
+ static int unpack_unknown_varchar(Rdb_field_packing *const fpi,
+ Field *const field, uchar *dst,
+ Rdb_string_reader *const reader,
+ Rdb_string_reader *const unp_reader);
+
+ static int unpack_simple_varchar_space_pad(
+ Rdb_field_packing *const fpi, Field *const field, uchar *dst,
+ Rdb_string_reader *const reader, Rdb_string_reader *const unp_reader);
+
+ static int unpack_simple(Rdb_field_packing *const fpi,
+ Field *const field MY_ATTRIBUTE((__unused__)),
+ uchar *const dst, Rdb_string_reader *const reader,
+ Rdb_string_reader *const unp_reader);
+
+ static int unpack_unknown(Rdb_field_packing *const fpi, Field *const field,
+ uchar *const dst, Rdb_string_reader *const reader,
+ Rdb_string_reader *const unp_reader);
+
+ static int unpack_floating_point(uchar *const dst,
+ Rdb_string_reader *const reader,
+ const size_t size, const int exp_digit,
+ const uchar *const zero_pattern,
+ const uchar *const zero_val,
+ void (*swap_func)(uchar *, const uchar *));
+
+ static void make_unpack_simple_varchar(
+ const Rdb_collation_codec *const codec, const Field *const field,
+ Rdb_pack_field_context *const pack_ctx);
+
+ static void make_unpack_simple(const Rdb_collation_codec *const codec,
+ const Field *const field,
+ Rdb_pack_field_context *const pack_ctx);
+
+ static void make_unpack_unknown(
+ const Rdb_collation_codec *codec MY_ATTRIBUTE((__unused__)),
+ const Field *const field, Rdb_pack_field_context *const pack_ctx);
+
+ static void make_unpack_unknown_varchar(
+ const Rdb_collation_codec *const codec MY_ATTRIBUTE((__unused__)),
+ const Field *const field, Rdb_pack_field_context *const pack_ctx);
+
+ static void dummy_make_unpack_info(
+ const Rdb_collation_codec *codec MY_ATTRIBUTE((__unused__)),
+ const Field *field MY_ATTRIBUTE((__unused__)),
+ Rdb_pack_field_context *pack_ctx MY_ATTRIBUTE((__unused__)));
+
+ static int skip_max_length(const Rdb_field_packing *const fpi,
+ const Field *const field
+ MY_ATTRIBUTE((__unused__)),
+ Rdb_string_reader *const reader);
+
+ static int skip_variable_length(const Rdb_field_packing *const fpi,
+ const Field *const field,
+ Rdb_string_reader *const reader);
+
+ static int skip_variable_space_pad(const Rdb_field_packing *const fpi,
+ const Field *const field,
+ Rdb_string_reader *const reader);
+
+ inline bool use_legacy_varbinary_format() const {
+ return !index_format_min_check(PRIMARY_FORMAT_VERSION_UPDATE2,
+ SECONDARY_FORMAT_VERSION_UPDATE2);
+ }
+
+ static inline bool is_unpack_data_tag(char c) {
+ return c == RDB_UNPACK_DATA_TAG || c == RDB_UNPACK_COVERED_DATA_TAG;
+ }
+
+ private:
+#ifndef DBUG_OFF
+ inline bool is_storage_available(const int offset, const int needed) const {
+ const int storage_length = static_cast<int>(max_storage_fmt_length());
+ return (storage_length - offset) >= needed;
+ }
+#endif // DBUG_OFF
+
+ /* Global number of this index (used as prefix in StorageFormat) */
+ const uint32 m_index_number;
+
+ uchar m_index_number_storage_form[INDEX_NUMBER_SIZE];
+
+ rocksdb::ColumnFamilyHandle *m_cf_handle;
+
+ static void pack_legacy_variable_format(const uchar *src, size_t src_len,
+ uchar **dst);
+
+ static void pack_variable_format(const uchar *src, size_t src_len,
+ uchar **dst);
+
+ static uint calc_unpack_legacy_variable_format(uchar flag, bool *done);
+
+ static uint calc_unpack_variable_format(uchar flag, bool *done);
+
+ public:
+ uint16_t m_index_dict_version;
+ uchar m_index_type;
+ /* KV format version for the index id */
+ uint16_t m_kv_format_version;
+ /* If true, the column family stores data in the reverse order */
+ bool m_is_reverse_cf;
+
+ /* If true, then column family is created per partition. */
+ bool m_is_per_partition_cf;
+
+ std::string m_name;
+ mutable Rdb_index_stats m_stats;
+
+ /*
+ Bitmap containing information about whether TTL or other special fields
+ are enabled for the given index.
+ */
+ uint32 m_index_flags_bitmap;
+
+ /*
+ How much space in bytes the index flag fields occupy.
+ */
+ uint32 m_total_index_flags_length;
+
+ /*
+ Offset in the records where the 8-byte TTL is stored (UINT_MAX if no TTL)
+ */
+ uint32 m_ttl_rec_offset;
+
+ /* Default TTL duration */
+ uint64 m_ttl_duration;
+
+ /* TTL column (if defined by user, otherwise implicit TTL is used) */
+ std::string m_ttl_column;
+
+ private:
+ /* Number of key parts in the primary key*/
+ uint m_pk_key_parts;
+
+ /*
+ pk_part_no[X]=Y means that keypart #X of this key is key part #Y of the
+ primary key. Y==-1 means this column is not present in the primary key.
+ */
+ uint *m_pk_part_no;
+
+ /* Array of index-part descriptors. */
+ Rdb_field_packing *m_pack_info;
+
+ uint m_keyno; /* number of this index in the table */
+
+ /*
+ Number of key parts in the index (including "index extension"). This is how
+ many elements are in the m_pack_info array.
+ */
+ uint m_key_parts;
+
+ /*
+ If TTL column is part of the PK, offset of the column within pk.
+ Default is UINT_MAX to denote that TTL col is not part of PK.
+ */
+ uint m_ttl_pk_key_part_offset;
+
+ /*
+ Index of the TTL column in table->s->fields, if it exists.
+ Default is UINT_MAX to denote that it does not exist.
+ */
+ uint m_ttl_field_index;
+
+ /* Prefix extractor for the column family of the key definiton */
+ std::shared_ptr<const rocksdb::SliceTransform> m_prefix_extractor;
+
+ /* Maximum length of the mem-comparable form. */
+ uint m_maxlength;
+
+ /* mutex to protect setup */
+ mysql_mutex_t m_mutex;
+};
+
+// "Simple" collations (those specified in strings/ctype-simple.c) are simple
+// because their strnxfrm function maps one byte to one byte. However, the
+// mapping is not injective, so the inverse function will take in an extra
+// index parameter containing information to disambiguate what the original
+// character was.
+//
+// The m_enc* members are for encoding. Generally, we want encoding to be:
+// src -> (dst, idx)
+//
+// Since strnxfrm already gives us dst, we just need m_enc_idx[src] to give us
+// idx.
+//
+// For the inverse, we have:
+// (dst, idx) -> src
+//
+// We have m_dec_idx[idx][dst] = src to get our original character back.
+//
+struct Rdb_collation_codec {
+ const my_core::CHARSET_INFO *m_cs;
+ // The first element unpacks VARCHAR(n), the second one - CHAR(n).
+ std::array<rdb_make_unpack_info_t, 2> m_make_unpack_info_func;
+ std::array<rdb_index_field_unpack_t, 2> m_unpack_func;
+
+ std::array<uchar, 256> m_enc_idx;
+ std::array<uchar, 256> m_enc_size;
+
+ std::array<uchar, 256> m_dec_size;
+ std::vector<std::array<uchar, 256>> m_dec_idx;
+};
+
+extern mysql_mutex_t rdb_collation_data_mutex;
+extern mysql_mutex_t rdb_mem_cmp_space_mutex;
+extern std::array<const Rdb_collation_codec *, MY_ALL_CHARSETS_SIZE>
+ rdb_collation_data;
+
+class Rdb_field_packing {
+ public:
+ Rdb_field_packing(const Rdb_field_packing &) = delete;
+ Rdb_field_packing &operator=(const Rdb_field_packing &) = delete;
+ Rdb_field_packing() = default;
+
+ /* Length of mem-comparable image of the field, in bytes */
+ int m_max_image_len;
+
+ /* Length of image in the unpack data */
+ int m_unpack_data_len;
+ int m_unpack_data_offset;
+
+ bool m_maybe_null; /* TRUE <=> NULL-byte is stored */
+
+ /*
+ Valid only for VARCHAR fields.
+ */
+ const CHARSET_INFO *m_varchar_charset;
+ bool m_use_legacy_varbinary_format;
+
+ // (Valid when Variable Length Space Padded Encoding is used):
+ uint m_segment_size; // size of segment used
+
+ // number of bytes used to store number of trimmed (or added)
+ // spaces in the upack_info
+ bool m_unpack_info_uses_two_bytes;
+
+ /*
+ True implies that an index-only read is always possible for this field.
+ False means an index-only read may be possible depending on the record and
+ field type.
+ */
+ bool m_covered;
+
+ const std::vector<uchar> *space_xfrm;
+ size_t space_xfrm_len;
+ size_t space_mb_len;
+
+ const Rdb_collation_codec *m_charset_codec;
+
+ /*
+ @return TRUE: this field makes use of unpack_info.
+ */
+ bool uses_unpack_info() const { return (m_make_unpack_info_func != nullptr); }
+
+ /* TRUE means unpack_info stores the original field value */
+ bool m_unpack_info_stores_value;
+
+ rdb_index_field_pack_t m_pack_func;
+ rdb_make_unpack_info_t m_make_unpack_info_func;
+
+ /*
+ This function takes
+ - mem-comparable form
+ - unpack_info data
+ and restores the original value.
+ */
+ rdb_index_field_unpack_t m_unpack_func;
+
+ /*
+ This function skips over mem-comparable form.
+ */
+ rdb_index_field_skip_t m_skip_func;
+
+ private:
+ /*
+ Location of the field in the table (key number and key part number).
+
+ Note that this describes not the field, but rather a position of field in
+ the index. Consider an example:
+
+ col1 VARCHAR (100),
+ INDEX idx1 (col1)),
+ INDEX idx2 (col1(10)),
+
+ Here, idx2 has a special Field object that is set to describe a 10-char
+ prefix of col1.
+
+ We must also store the keynr. It is needed for implicit "extended keys".
+ Every key in MyRocks needs to include PK columns. Generally, SQL layer
+ includes PK columns as part of its "Extended Keys" feature, but sometimes
+ it does not (known examples are unique secondary indexes and partitioned
+ tables).
+ In that case, MyRocks's index descriptor has invisible suffix of PK
+ columns (and the point is that these columns are parts of PK, not parts
+ of the current index).
+ */
+ uint m_keynr;
+ uint m_key_part;
+
+ public:
+ bool setup(const Rdb_key_def *const key_descr, const Field *const field,
+ const uint keynr_arg, const uint key_part_arg,
+ const uint16 key_length);
+ Field *get_field_in_table(const TABLE *const tbl) const;
+ void fill_hidden_pk_val(uchar **dst, const longlong hidden_pk_id) const;
+};
+
+/*
+ Descriptor telling how to decode/encode a field to on-disk record storage
+ format. Not all information is in the structure yet, but eventually we
+ want to have as much as possible there to avoid virtual calls.
+
+ For encoding/decoding of index tuples, see Rdb_key_def.
+ */
+class Rdb_field_encoder {
+ public:
+ Rdb_field_encoder(const Rdb_field_encoder &) = delete;
+ Rdb_field_encoder &operator=(const Rdb_field_encoder &) = delete;
+ /*
+ STORE_NONE is set when a column can be decoded solely from their
+ mem-comparable form.
+ STORE_SOME is set when a column can be decoded from their mem-comparable
+ form plus unpack_info.
+ STORE_ALL is set when a column cannot be decoded, so its original value
+ must be stored in the PK records.
+ */
+ enum STORAGE_TYPE {
+ STORE_NONE,
+ STORE_SOME,
+ STORE_ALL,
+ };
+ STORAGE_TYPE m_storage_type;
+
+ uint m_null_offset;
+ uint16 m_field_index;
+
+ uchar m_null_mask; // 0 means the field cannot be null
+
+ my_core::enum_field_types m_field_type;
+
+ uint m_pack_length_in_rec;
+
+ bool maybe_null() const { return m_null_mask != 0; }
+
+ bool uses_variable_len_encoding() const {
+ return (m_field_type == MYSQL_TYPE_BLOB ||
+ m_field_type == MYSQL_TYPE_VARCHAR);
+ }
+};
+
+inline Field *Rdb_key_def::get_table_field_for_part_no(TABLE *table,
+ uint part_no) const {
+ DBUG_ASSERT(part_no < get_key_parts());
+ return m_pack_info[part_no].get_field_in_table(table);
+}
+
+inline bool Rdb_key_def::can_unpack(const uint kp) const {
+ DBUG_ASSERT(kp < m_key_parts);
+ return (m_pack_info[kp].m_unpack_func != nullptr);
+}
+
+inline bool Rdb_key_def::has_unpack_info(const uint kp) const {
+ DBUG_ASSERT(kp < m_key_parts);
+ return m_pack_info[kp].uses_unpack_info();
+}
+
+/*
+ A table definition. This is an entry in the mapping
+
+ dbname.tablename -> {index_nr, index_nr, ... }
+
+ There is only one Rdb_tbl_def object for a given table.
+ That's why we keep auto_increment value here, too.
+*/
+
+class Rdb_tbl_def {
+ private:
+ void check_if_is_mysql_system_table();
+
+ /* Stores 'dbname.tablename' */
+ std::string m_dbname_tablename;
+
+ /* Store the db name, table name, and partition name */
+ std::string m_dbname;
+ std::string m_tablename;
+ std::string m_partition;
+
+ void set_name(const std::string &name);
+
+ public:
+ Rdb_tbl_def(const Rdb_tbl_def &) = delete;
+ Rdb_tbl_def &operator=(const Rdb_tbl_def &) = delete;
+
+ explicit Rdb_tbl_def(const std::string &name)
+ : m_key_descr_arr(nullptr), m_hidden_pk_val(0), m_auto_incr_val(0) {
+ set_name(name);
+ }
+
+ Rdb_tbl_def(const char *const name, const size_t len)
+ : m_key_descr_arr(nullptr), m_hidden_pk_val(0), m_auto_incr_val(0) {
+ set_name(std::string(name, len));
+ }
+
+ explicit Rdb_tbl_def(const rocksdb::Slice &slice, const size_t pos = 0)
+ : m_key_descr_arr(nullptr), m_hidden_pk_val(0), m_auto_incr_val(0) {
+ set_name(std::string(slice.data() + pos, slice.size() - pos));
+ }
+
+ ~Rdb_tbl_def();
+
+ void check_and_set_read_free_rpl_table();
+
+ /* Number of indexes */
+ uint m_key_count;
+
+ /* Array of index descriptors */
+ std::shared_ptr<Rdb_key_def> *m_key_descr_arr;
+
+ std::atomic<longlong> m_hidden_pk_val;
+ std::atomic<ulonglong> m_auto_incr_val;
+
+ /* Is this a system table */
+ bool m_is_mysql_system_table;
+
+ /* Is this table read free repl enabled */
+ std::atomic_bool m_is_read_free_rpl_table{false};
+
+ bool put_dict(Rdb_dict_manager *const dict, rocksdb::WriteBatch *const batch,
+ const rocksdb::Slice &key);
+
+ const std::string &full_tablename() const { return m_dbname_tablename; }
+ const std::string &base_dbname() const { return m_dbname; }
+ const std::string &base_tablename() const { return m_tablename; }
+ const std::string &base_partition() const { return m_partition; }
+ GL_INDEX_ID get_autoincr_gl_index_id();
+};
+
+/*
+ A thread-safe sequential number generator. Its performance is not a concern
+ hence it is ok to protect it by a mutex.
+*/
+
+class Rdb_seq_generator {
+ uint m_next_number = 0;
+
+ mysql_mutex_t m_mutex;
+
+ public:
+ Rdb_seq_generator(const Rdb_seq_generator &) = delete;
+ Rdb_seq_generator &operator=(const Rdb_seq_generator &) = delete;
+ Rdb_seq_generator() = default;
+
+ void init(const uint initial_number) {
+ mysql_mutex_init(0, &m_mutex, MY_MUTEX_INIT_FAST);
+ m_next_number = initial_number;
+ }
+
+ uint get_and_update_next_number(Rdb_dict_manager *const dict);
+
+ void cleanup() { mysql_mutex_destroy(&m_mutex); }
+};
+
+interface Rdb_tables_scanner {
+ virtual int add_table(Rdb_tbl_def * tdef) = 0;
+ virtual ~Rdb_tables_scanner() {} /* Keep the compiler happy */
+};
+
+/*
+ This contains a mapping of
+
+ dbname.table_name -> array{Rdb_key_def}.
+
+ objects are shared among all threads.
+*/
+
+class Rdb_ddl_manager {
+ Rdb_dict_manager *m_dict = nullptr;
+
+ // Contains Rdb_tbl_def elements
+ std::unordered_map<std::string, Rdb_tbl_def *> m_ddl_map;
+
+ // Maps index id to <table_name, index number>
+ std::map<GL_INDEX_ID, std::pair<std::string, uint>> m_index_num_to_keydef;
+
+ // Maps index id to key definitons not yet committed to data dictionary.
+ // This is mainly used to store key definitions during ALTER TABLE.
+ std::map<GL_INDEX_ID, std::shared_ptr<Rdb_key_def>>
+ m_index_num_to_uncommitted_keydef;
+ mysql_rwlock_t m_rwlock;
+
+ Rdb_seq_generator m_sequence;
+ // A queue of table stats to write into data dictionary
+ // It is produced by event listener (ie compaction and flush threads)
+ // and consumed by the rocksdb background thread
+ std::map<GL_INDEX_ID, Rdb_index_stats> m_stats2store;
+
+ const std::shared_ptr<Rdb_key_def> &find(GL_INDEX_ID gl_index_id);
+
+ public:
+ Rdb_ddl_manager(const Rdb_ddl_manager &) = delete;
+ Rdb_ddl_manager &operator=(const Rdb_ddl_manager &) = delete;
+ Rdb_ddl_manager() {}
+
+ /* Load the data dictionary from on-disk storage */
+ bool init(Rdb_dict_manager *const dict_arg, Rdb_cf_manager *const cf_manager,
+ const uint32_t validate_tables);
+
+ void cleanup();
+
+ Rdb_tbl_def *find(const std::string &table_name, const bool lock = true);
+ std::shared_ptr<const Rdb_key_def> safe_find(GL_INDEX_ID gl_index_id);
+ void set_stats(const std::unordered_map<GL_INDEX_ID, Rdb_index_stats> &stats);
+ void adjust_stats(const std::vector<Rdb_index_stats> &new_data,
+ const std::vector<Rdb_index_stats> &deleted_data =
+ std::vector<Rdb_index_stats>());
+ void persist_stats(const bool sync = false);
+
+ /* Modify the mapping and write it to on-disk storage */
+ int put_and_write(Rdb_tbl_def *const key_descr,
+ rocksdb::WriteBatch *const batch);
+ void remove(Rdb_tbl_def *const rec, rocksdb::WriteBatch *const batch,
+ const bool lock = true);
+ bool rename(const std::string &from, const std::string &to,
+ rocksdb::WriteBatch *const batch);
+
+ uint get_and_update_next_number(Rdb_dict_manager *const dict) {
+ return m_sequence.get_and_update_next_number(dict);
+ }
+
+ const std::string safe_get_table_name(const GL_INDEX_ID &gl_index_id);
+
+ /* Walk the data dictionary */
+ int scan_for_tables(Rdb_tables_scanner *tables_scanner);
+
+ void erase_index_num(const GL_INDEX_ID &gl_index_id);
+ void add_uncommitted_keydefs(
+ const std::unordered_set<std::shared_ptr<Rdb_key_def>> &indexes);
+ void remove_uncommitted_keydefs(
+ const std::unordered_set<std::shared_ptr<Rdb_key_def>> &indexes);
+
+ private:
+ /* Put the data into in-memory table (only) */
+ int put(Rdb_tbl_def *const key_descr, const bool lock = true);
+
+ /* Helper functions to be passed to my_core::HASH object */
+ static const uchar *get_hash_key(Rdb_tbl_def *const rec, size_t *const length,
+ my_bool not_used MY_ATTRIBUTE((unused)));
+ static void free_hash_elem(void *const data);
+
+ bool validate_schemas();
+
+ bool validate_auto_incr();
+};
+
+/*
+ Writing binlog information into RocksDB at commit(),
+ and retrieving binlog information at crash recovery.
+ commit() and recovery are always executed by at most single client
+ at the same time, so concurrency control is not needed.
+
+ Binlog info is stored in RocksDB as the following.
+ key: BINLOG_INFO_INDEX_NUMBER
+ value: packed single row:
+ binlog_name_length (2 byte form)
+ binlog_name
+ binlog_position (4 byte form)
+ binlog_gtid_length (2 byte form)
+ binlog_gtid
+*/
+class Rdb_binlog_manager {
+ public:
+ Rdb_binlog_manager(const Rdb_binlog_manager &) = delete;
+ Rdb_binlog_manager &operator=(const Rdb_binlog_manager &) = delete;
+ Rdb_binlog_manager() = default;
+
+ bool init(Rdb_dict_manager *const dict);
+ void cleanup();
+ void update(const char *const binlog_name, const my_off_t binlog_pos,
+ rocksdb::WriteBatchBase *const batch);
+ bool read(char *const binlog_name, my_off_t *const binlog_pos,
+ char *const binlog_gtid) const;
+ void update_slave_gtid_info(const uint id, const char *const db,
+ const char *const gtid,
+ rocksdb::WriteBatchBase *const write_batch);
+
+ private:
+ Rdb_dict_manager *m_dict = nullptr;
+ Rdb_buf_writer<Rdb_key_def::INDEX_NUMBER_SIZE> m_key_writer;
+ rocksdb::Slice m_key_slice;
+
+ bool unpack_value(const uchar *const value, size_t value_size,
+ char *const binlog_name,
+ my_off_t *const binlog_pos, char *const binlog_gtid) const;
+
+ std::atomic<Rdb_tbl_def *> m_slave_gtid_info_tbl;
+};
+
+/*
+ Rdb_dict_manager manages how MySQL on RocksDB (MyRocks) stores its
+ internal data dictionary.
+ MyRocks stores data dictionary on dedicated system column family
+ named __system__. The system column family is used by MyRocks
+ internally only, and not used by applications.
+
+ Currently MyRocks has the following data dictionary data models.
+
+ 1. Table Name => internal index id mappings
+ key: Rdb_key_def::DDL_ENTRY_INDEX_START_NUMBER(0x1) + dbname.tablename
+ value: version, {cf_id, index_id}*n_indexes_of_the_table
+ version is 2 bytes. cf_id and index_id are 4 bytes.
+
+ 2. internal cf_id, index id => index information
+ key: Rdb_key_def::INDEX_INFO(0x2) + cf_id + index_id
+ value: version, index_type, kv_format_version, index_flags, ttl_duration
+ index_type is 1 byte, version and kv_format_version are 2 bytes.
+ index_flags is 4 bytes.
+ ttl_duration is 8 bytes.
+
+ 3. CF id => CF flags
+ key: Rdb_key_def::CF_DEFINITION(0x3) + cf_id
+ value: version, {is_reverse_cf, is_auto_cf (deprecated), is_per_partition_cf}
+ cf_flags is 4 bytes in total.
+
+ 4. Binlog entry (updated at commit)
+ key: Rdb_key_def::BINLOG_INFO_INDEX_NUMBER (0x4)
+ value: version, {binlog_name,binlog_pos,binlog_gtid}
+
+ 5. Ongoing drop index entry
+ key: Rdb_key_def::DDL_DROP_INDEX_ONGOING(0x5) + cf_id + index_id
+ value: version
+
+ 6. index stats
+ key: Rdb_key_def::INDEX_STATISTICS(0x6) + cf_id + index_id
+ value: version, {materialized PropertiesCollector::IndexStats}
+
+ 7. maximum index id
+ key: Rdb_key_def::MAX_INDEX_ID(0x7)
+ value: index_id
+ index_id is 4 bytes
+
+ 8. Ongoing create index entry
+ key: Rdb_key_def::DDL_CREATE_INDEX_ONGOING(0x8) + cf_id + index_id
+ value: version
+
+ 9. auto_increment values
+ key: Rdb_key_def::AUTO_INC(0x9) + cf_id + index_id
+ value: version, {max auto_increment so far}
+ max auto_increment is 8 bytes
+
+ Data dictionary operations are atomic inside RocksDB. For example,
+ when creating a table with two indexes, it is necessary to call Put
+ three times. They have to be atomic. Rdb_dict_manager has a wrapper function
+ begin() and commit() to make it easier to do atomic operations.
+
+*/
+class Rdb_dict_manager {
+ private:
+ mysql_mutex_t m_mutex;
+ rocksdb::TransactionDB *m_db = nullptr;
+ rocksdb::ColumnFamilyHandle *m_system_cfh = nullptr;
+ /* Utility to put INDEX_INFO and CF_DEFINITION */
+
+ uchar m_key_buf_max_index_id[Rdb_key_def::INDEX_NUMBER_SIZE] = {0};
+ rocksdb::Slice m_key_slice_max_index_id;
+
+ static void dump_index_id(uchar *const netbuf,
+ Rdb_key_def::DATA_DICT_TYPE dict_type,
+ const GL_INDEX_ID &gl_index_id);
+ template <size_t T>
+ static void dump_index_id(Rdb_buf_writer<T> *buf_writer,
+ Rdb_key_def::DATA_DICT_TYPE dict_type,
+ const GL_INDEX_ID &gl_index_id) {
+ buf_writer->write_uint32(dict_type);
+ buf_writer->write_uint32(gl_index_id.cf_id);
+ buf_writer->write_uint32(gl_index_id.index_id);
+ }
+
+ void delete_with_prefix(rocksdb::WriteBatch *const batch,
+ Rdb_key_def::DATA_DICT_TYPE dict_type,
+ const GL_INDEX_ID &gl_index_id) const;
+ /* Functions for fast DROP TABLE/INDEX */
+ void resume_drop_indexes() const;
+ void log_start_drop_table(const std::shared_ptr<Rdb_key_def> *const key_descr,
+ const uint32 n_keys,
+ const char *const log_action) const;
+ void log_start_drop_index(GL_INDEX_ID gl_index_id,
+ const char *log_action) const;
+
+ public:
+ Rdb_dict_manager(const Rdb_dict_manager &) = delete;
+ Rdb_dict_manager &operator=(const Rdb_dict_manager &) = delete;
+ Rdb_dict_manager() = default;
+
+ bool init(rocksdb::TransactionDB *const rdb_dict,
+ Rdb_cf_manager *const cf_manager);
+
+ inline void cleanup() { mysql_mutex_destroy(&m_mutex); }
+
+ inline void lock() { RDB_MUTEX_LOCK_CHECK(m_mutex); }
+
+ inline void unlock() { RDB_MUTEX_UNLOCK_CHECK(m_mutex); }
+
+ inline rocksdb::ColumnFamilyHandle *get_system_cf() const {
+ return m_system_cfh;
+ }
+
+ /* Raw RocksDB operations */
+ std::unique_ptr<rocksdb::WriteBatch> begin() const;
+ int commit(rocksdb::WriteBatch *const batch, const bool sync = true) const;
+ rocksdb::Status get_value(const rocksdb::Slice &key,
+ std::string *const value) const;
+ void put_key(rocksdb::WriteBatchBase *const batch, const rocksdb::Slice &key,
+ const rocksdb::Slice &value) const;
+ void delete_key(rocksdb::WriteBatchBase *batch,
+ const rocksdb::Slice &key) const;
+ rocksdb::Iterator *new_iterator() const;
+
+ /* Internal Index id => CF */
+ void add_or_update_index_cf_mapping(
+ rocksdb::WriteBatch *batch,
+ struct Rdb_index_info *const index_info) const;
+ void delete_index_info(rocksdb::WriteBatch *batch,
+ const GL_INDEX_ID &index_id) const;
+ bool get_index_info(const GL_INDEX_ID &gl_index_id,
+ struct Rdb_index_info *const index_info) const;
+
+ /* CF id => CF flags */
+ void add_cf_flags(rocksdb::WriteBatch *const batch, const uint cf_id,
+ const uint cf_flags) const;
+ bool get_cf_flags(const uint cf_id, uint *const cf_flags) const;
+
+ /* Functions for fast CREATE/DROP TABLE/INDEX */
+ void get_ongoing_index_operation(
+ std::unordered_set<GL_INDEX_ID> *gl_index_ids,
+ Rdb_key_def::DATA_DICT_TYPE dd_type) const;
+ bool is_index_operation_ongoing(const GL_INDEX_ID &gl_index_id,
+ Rdb_key_def::DATA_DICT_TYPE dd_type) const;
+ void start_ongoing_index_operation(rocksdb::WriteBatch *batch,
+ const GL_INDEX_ID &gl_index_id,
+ Rdb_key_def::DATA_DICT_TYPE dd_type) const;
+ void end_ongoing_index_operation(rocksdb::WriteBatch *const batch,
+ const GL_INDEX_ID &gl_index_id,
+ Rdb_key_def::DATA_DICT_TYPE dd_type) const;
+ bool is_drop_index_empty() const;
+ void add_drop_table(std::shared_ptr<Rdb_key_def> *const key_descr,
+ const uint32 n_keys,
+ rocksdb::WriteBatch *const batch) const;
+ void add_drop_index(const std::unordered_set<GL_INDEX_ID> &gl_index_ids,
+ rocksdb::WriteBatch *const batch) const;
+ void add_create_index(const std::unordered_set<GL_INDEX_ID> &gl_index_ids,
+ rocksdb::WriteBatch *const batch) const;
+ void finish_indexes_operation(
+ const std::unordered_set<GL_INDEX_ID> &gl_index_ids,
+ Rdb_key_def::DATA_DICT_TYPE dd_type) const;
+ void rollback_ongoing_index_creation() const;
+
+ inline void get_ongoing_drop_indexes(
+ std::unordered_set<GL_INDEX_ID> *gl_index_ids) const {
+ get_ongoing_index_operation(gl_index_ids,
+ Rdb_key_def::DDL_DROP_INDEX_ONGOING);
+ }
+ inline void get_ongoing_create_indexes(
+ std::unordered_set<GL_INDEX_ID> *gl_index_ids) const {
+ get_ongoing_index_operation(gl_index_ids,
+ Rdb_key_def::DDL_CREATE_INDEX_ONGOING);
+ }
+ inline void start_drop_index(rocksdb::WriteBatch *wb,
+ const GL_INDEX_ID &gl_index_id) const {
+ start_ongoing_index_operation(wb, gl_index_id,
+ Rdb_key_def::DDL_DROP_INDEX_ONGOING);
+ }
+ inline void start_create_index(rocksdb::WriteBatch *wb,
+ const GL_INDEX_ID &gl_index_id) const {
+ start_ongoing_index_operation(wb, gl_index_id,
+ Rdb_key_def::DDL_CREATE_INDEX_ONGOING);
+ }
+ inline void finish_drop_indexes(
+ const std::unordered_set<GL_INDEX_ID> &gl_index_ids) const {
+ finish_indexes_operation(gl_index_ids, Rdb_key_def::DDL_DROP_INDEX_ONGOING);
+ }
+ inline void finish_create_indexes(
+ const std::unordered_set<GL_INDEX_ID> &gl_index_ids) const {
+ finish_indexes_operation(gl_index_ids,
+ Rdb_key_def::DDL_CREATE_INDEX_ONGOING);
+ }
+ inline bool is_drop_index_ongoing(const GL_INDEX_ID &gl_index_id) const {
+ return is_index_operation_ongoing(gl_index_id,
+ Rdb_key_def::DDL_DROP_INDEX_ONGOING);
+ }
+ inline bool is_create_index_ongoing(const GL_INDEX_ID &gl_index_id) const {
+ return is_index_operation_ongoing(gl_index_id,
+ Rdb_key_def::DDL_CREATE_INDEX_ONGOING);
+ }
+
+ bool get_max_index_id(uint32_t *const index_id) const;
+ bool update_max_index_id(rocksdb::WriteBatch *const batch,
+ const uint32_t index_id) const;
+ void add_stats(rocksdb::WriteBatch *const batch,
+ const std::vector<Rdb_index_stats> &stats) const;
+ Rdb_index_stats get_stats(GL_INDEX_ID gl_index_id) const;
+
+ rocksdb::Status put_auto_incr_val(rocksdb::WriteBatchBase *batch,
+ const GL_INDEX_ID &gl_index_id,
+ ulonglong val,
+ bool overwrite = false) const;
+ bool get_auto_incr_val(const GL_INDEX_ID &gl_index_id,
+ ulonglong *new_val) const;
+};
+
+struct Rdb_index_info {
+ GL_INDEX_ID m_gl_index_id;
+ uint16_t m_index_dict_version = 0;
+ uchar m_index_type = 0;
+ uint16_t m_kv_version = 0;
+ uint32 m_index_flags = 0;
+ uint64 m_ttl_duration = 0;
+};
+
+/*
+ @brief
+ Merge Operator for the auto_increment value in the system_cf
+
+ @detail
+ This class implements the rocksdb Merge Operator for auto_increment values
+ that are stored to the data dictionary every transaction.
+
+ The actual Merge function is triggered on compaction, memtable flushes, or
+ when get() is called on the same key.
+
+ */
+class Rdb_system_merge_op : public rocksdb::AssociativeMergeOperator {
+ public:
+ /*
+ Updates the new value associated with a key to be the maximum of the
+ passed in value and the existing value.
+
+ @param[IN] key
+ @param[IN] existing_value existing value for a key; nullptr if nonexistent
+ key
+ @param[IN] value
+ @param[OUT] new_value new value after Merge
+ @param[IN] logger
+ */
+ bool Merge(const rocksdb::Slice &key, const rocksdb::Slice *existing_value,
+ const rocksdb::Slice &value, std::string *new_value,
+ rocksdb::Logger *logger) const override {
+ DBUG_ASSERT(new_value != nullptr);
+
+ if (key.size() != Rdb_key_def::INDEX_NUMBER_SIZE * 3 ||
+ GetKeyType(key) != Rdb_key_def::AUTO_INC ||
+ value.size() !=
+ RDB_SIZEOF_AUTO_INCREMENT_VERSION + ROCKSDB_SIZEOF_AUTOINC_VALUE ||
+ GetVersion(value) > Rdb_key_def::AUTO_INCREMENT_VERSION) {
+ abort();
+ }
+
+ uint64_t merged_value = Deserialize(value);
+
+ if (existing_value != nullptr) {
+ if (existing_value->size() != RDB_SIZEOF_AUTO_INCREMENT_VERSION +
+ ROCKSDB_SIZEOF_AUTOINC_VALUE ||
+ GetVersion(*existing_value) > Rdb_key_def::AUTO_INCREMENT_VERSION) {
+ abort();
+ }
+
+ merged_value = std::max(merged_value, Deserialize(*existing_value));
+ }
+ Serialize(merged_value, new_value);
+ return true;
+ }
+
+ virtual const char *Name() const override { return "Rdb_system_merge_op"; }
+
+ private:
+ /*
+ Serializes the integer data to the new_value buffer or the target buffer
+ the merge operator will update to
+ */
+ void Serialize(const uint64_t data, std::string *new_value) const {
+ uchar value_buf[RDB_SIZEOF_AUTO_INCREMENT_VERSION +
+ ROCKSDB_SIZEOF_AUTOINC_VALUE] = {0};
+ uchar *ptr = value_buf;
+ /* fill in the auto increment version */
+ rdb_netbuf_store_uint16(ptr, Rdb_key_def::AUTO_INCREMENT_VERSION);
+ ptr += RDB_SIZEOF_AUTO_INCREMENT_VERSION;
+ /* fill in the auto increment value */
+ rdb_netbuf_store_uint64(ptr, data);
+ ptr += ROCKSDB_SIZEOF_AUTOINC_VALUE;
+ new_value->assign(reinterpret_cast<char *>(value_buf), ptr - value_buf);
+ }
+
+ /*
+ Gets the value of auto_increment type in the data dictionary from the
+ value slice
+
+ @Note Only to be used on data dictionary keys for the auto_increment type
+ */
+ uint64_t Deserialize(const rocksdb::Slice &s) const {
+ return rdb_netbuf_to_uint64(reinterpret_cast<const uchar *>(s.data()) +
+ RDB_SIZEOF_AUTO_INCREMENT_VERSION);
+ }
+
+ /*
+ Gets the type of the key of the key in the data dictionary.
+
+ @Note Only to be used on data dictionary keys for the auto_increment type
+ */
+ uint16_t GetKeyType(const rocksdb::Slice &s) const {
+ return rdb_netbuf_to_uint32(reinterpret_cast<const uchar *>(s.data()));
+ }
+
+ /*
+ Gets the version of the auto_increment value in the data dictionary.
+
+ @Note Only to be used on data dictionary value for the auto_increment type
+ */
+ uint16_t GetVersion(const rocksdb::Slice &s) const {
+ return rdb_netbuf_to_uint16(reinterpret_cast<const uchar *>(s.data()));
+ }
+};
+
+bool rdb_is_collation_supported(const my_core::CHARSET_INFO *const cs);
+
+} // namespace myrocks
diff --git a/storage/rocksdb/rdb_global.h b/storage/rocksdb/rdb_global.h
new file mode 100644
index 00000000000..7213571bf61
--- /dev/null
+++ b/storage/rocksdb/rdb_global.h
@@ -0,0 +1,392 @@
+/*
+ Copyright (c) 2018, Facebook, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+/* MyRocks global type definitions goes here */
+
+#pragma once
+
+/* C++ standard header files */
+#include <limits>
+#include <string>
+#include <vector>
+
+/* MySQL header files */
+#include "./handler.h" /* handler */
+#include "./my_global.h" /* ulonglong */
+#include "./sql_string.h"
+#include "./ut0counter.h"
+
+namespace myrocks {
+/*
+ * class for exporting transaction information for
+ * information_schema.rocksdb_trx
+ */
+struct Rdb_trx_info {
+ std::string name;
+ ulonglong trx_id;
+ ulonglong write_count;
+ ulonglong lock_count;
+ int timeout_sec;
+ std::string state;
+ std::string waiting_key;
+ ulonglong waiting_cf_id;
+ int is_replication;
+ int skip_trx_api;
+ int read_only;
+ int deadlock_detect;
+ int num_ongoing_bulk_load;
+ ulong thread_id;
+ std::string query_str;
+};
+
+std::vector<Rdb_trx_info> rdb_get_all_trx_info();
+
+/*
+ * class for exporting deadlock transaction information for
+ * information_schema.rocksdb_deadlock
+ */
+struct Rdb_deadlock_info {
+ struct Rdb_dl_trx_info {
+ ulonglong trx_id;
+ std::string cf_name;
+ std::string waiting_key;
+ bool exclusive_lock;
+ std::string index_name;
+ std::string table_name;
+ };
+ std::vector<Rdb_dl_trx_info> path;
+ int64_t deadlock_time;
+ ulonglong victim_trx_id;
+};
+
+std::vector<Rdb_deadlock_info> rdb_get_deadlock_info();
+
+/*
+ This is
+ - the name of the default Column Family (the CF which stores indexes which
+ didn't explicitly specify which CF they are in)
+ - the name used to set the default column family parameter for per-cf
+ arguments.
+*/
+extern const std::string DEFAULT_CF_NAME;
+
+/*
+ This is the name of the Column Family used for storing the data dictionary.
+*/
+extern const std::string DEFAULT_SYSTEM_CF_NAME;
+
+/*
+ This is the name of the hidden primary key for tables with no pk.
+*/
+const char *const HIDDEN_PK_NAME = "HIDDEN_PK_ID";
+
+/*
+ Column family name which means "put this index into its own column family".
+ DEPRECATED!!!
+*/
+extern const std::string PER_INDEX_CF_NAME;
+
+/*
+ Name for the background thread.
+*/
+const char *const BG_THREAD_NAME = "myrocks-bg";
+
+/*
+ Name for the drop index thread.
+*/
+const char *const INDEX_THREAD_NAME = "myrocks-index";
+
+/*
+ Name for the manual compaction thread.
+*/
+const char *const MANUAL_COMPACTION_THREAD_NAME = "myrocks-mc";
+
+/*
+ Separator between partition name and the qualifier. Sample usage:
+
+ - p0_cfname=foo
+ - p3_tts_col=bar
+*/
+const char RDB_PER_PARTITION_QUALIFIER_NAME_SEP = '_';
+
+/*
+ Separator between qualifier name and value. Sample usage:
+
+ - p0_cfname=foo
+ - p3_tts_col=bar
+*/
+const char RDB_QUALIFIER_VALUE_SEP = '=';
+
+/*
+ Separator between multiple qualifier assignments. Sample usage:
+
+ - p0_cfname=foo;p1_cfname=bar;p2_cfname=baz
+*/
+const char RDB_QUALIFIER_SEP = ';';
+
+/*
+ Qualifier name for a custom per partition column family.
+*/
+const char *const RDB_CF_NAME_QUALIFIER = "cfname";
+
+/*
+ Qualifier name for a custom per partition ttl duration.
+*/
+const char *const RDB_TTL_DURATION_QUALIFIER = "ttl_duration";
+
+/*
+ Qualifier name for a custom per partition ttl duration.
+*/
+const char *const RDB_TTL_COL_QUALIFIER = "ttl_col";
+
+/*
+ Default, minimal valid, and maximum valid sampling rate values when collecting
+ statistics about table.
+*/
+#define RDB_DEFAULT_TBL_STATS_SAMPLE_PCT 10
+#define RDB_TBL_STATS_SAMPLE_PCT_MIN 1
+#define RDB_TBL_STATS_SAMPLE_PCT_MAX 100
+
+/*
+ Default and maximum values for rocksdb-compaction-sequential-deletes and
+ rocksdb-compaction-sequential-deletes-window to add basic boundary checking.
+*/
+#define DEFAULT_COMPACTION_SEQUENTIAL_DELETES 0
+#define MAX_COMPACTION_SEQUENTIAL_DELETES 2000000
+
+#define DEFAULT_COMPACTION_SEQUENTIAL_DELETES_WINDOW 0
+#define MAX_COMPACTION_SEQUENTIAL_DELETES_WINDOW 2000000
+
+/*
+ Default and maximum values for various compaction and flushing related
+ options. Numbers are based on the hardware we currently use and our internal
+ benchmarks which indicate that parallelization helps with the speed of
+ compactions.
+
+ Ideally of course we'll use heuristic technique to determine the number of
+ CPU-s and derive the values from there. This however has its own set of
+ problems and we'll choose simplicity for now.
+*/
+#define MAX_BACKGROUND_JOBS 64
+
+#define DEFAULT_SUBCOMPACTIONS 1
+#define MAX_SUBCOMPACTIONS 64
+
+/*
+ Default value for rocksdb_sst_mgr_rate_bytes_per_sec = 0 (disabled).
+*/
+#define DEFAULT_SST_MGR_RATE_BYTES_PER_SEC 0
+
+/*
+ Defines the field sizes for serializing XID object to a string representation.
+ string byte format: [field_size: field_value, ...]
+ [
+ 8: XID.formatID,
+ 1: XID.gtrid_length,
+ 1: XID.bqual_length,
+ XID.gtrid_length + XID.bqual_length: XID.data
+ ]
+*/
+#define RDB_FORMATID_SZ 8
+#define RDB_GTRID_SZ 1
+#define RDB_BQUAL_SZ 1
+#define RDB_XIDHDR_LEN (RDB_FORMATID_SZ + RDB_GTRID_SZ + RDB_BQUAL_SZ)
+
+/*
+ To fix an unhandled exception we specify the upper bound as LONGLONGMAX
+ instead of ULONGLONGMAX because the latter is -1 and causes an exception when
+ cast to jlong (signed) of JNI
+
+ The reason behind the cast issue is the lack of unsigned int support in Java.
+*/
+#define MAX_RATE_LIMITER_BYTES_PER_SEC static_cast<uint64_t>(LLONG_MAX)
+
+/*
+ Hidden PK column (for tables with no primary key) is a longlong (aka 8 bytes).
+ static_assert() in code will validate this assumption.
+*/
+#define ROCKSDB_SIZEOF_HIDDEN_PK_COLUMN sizeof(longlong)
+
+/*
+ Bytes used to store TTL, in the beginning of all records for tables with TTL
+ enabled.
+*/
+#define ROCKSDB_SIZEOF_TTL_RECORD sizeof(longlong)
+
+#define ROCKSDB_SIZEOF_AUTOINC_VALUE sizeof(longlong)
+
+/*
+ Maximum index prefix length in bytes.
+*/
+#define MAX_INDEX_COL_LEN_LARGE 3072
+#define MAX_INDEX_COL_LEN_SMALL 767
+
+/*
+ MyRocks specific error codes. NB! Please make sure that you will update
+ HA_ERR_ROCKSDB_LAST when adding new ones. Also update the strings in
+ rdb_error_messages to include any new error messages.
+*/
+#define HA_ERR_ROCKSDB_FIRST (HA_ERR_LAST + 1)
+#define HA_ERR_ROCKSDB_PK_REQUIRED (HA_ERR_ROCKSDB_FIRST + 0)
+#define HA_ERR_ROCKSDB_TABLE_DATA_DIRECTORY_NOT_SUPPORTED \
+ (HA_ERR_ROCKSDB_FIRST + 1)
+#define HA_ERR_ROCKSDB_TABLE_INDEX_DIRECTORY_NOT_SUPPORTED \
+ (HA_ERR_ROCKSDB_FIRST + 2)
+#define HA_ERR_ROCKSDB_COMMIT_FAILED (HA_ERR_ROCKSDB_FIRST + 3)
+#define HA_ERR_ROCKSDB_BULK_LOAD (HA_ERR_ROCKSDB_FIRST + 4)
+#define HA_ERR_ROCKSDB_CORRUPT_DATA (HA_ERR_ROCKSDB_FIRST + 5)
+#define HA_ERR_ROCKSDB_CHECKSUM_MISMATCH (HA_ERR_ROCKSDB_FIRST + 6)
+#define HA_ERR_ROCKSDB_INVALID_TABLE (HA_ERR_ROCKSDB_FIRST + 7)
+#define HA_ERR_ROCKSDB_PROPERTIES (HA_ERR_ROCKSDB_FIRST + 8)
+#define HA_ERR_ROCKSDB_MERGE_FILE_ERR (HA_ERR_ROCKSDB_FIRST + 9)
+/*
+ Each error code below maps to a RocksDB status code found in:
+ rocksdb/include/rocksdb/status.h
+*/
+#define HA_ERR_ROCKSDB_STATUS_NOT_FOUND (HA_ERR_LAST + 10)
+#define HA_ERR_ROCKSDB_STATUS_CORRUPTION (HA_ERR_LAST + 11)
+#define HA_ERR_ROCKSDB_STATUS_NOT_SUPPORTED (HA_ERR_LAST + 12)
+#define HA_ERR_ROCKSDB_STATUS_INVALID_ARGUMENT (HA_ERR_LAST + 13)
+#define HA_ERR_ROCKSDB_STATUS_IO_ERROR (HA_ERR_LAST + 14)
+#define HA_ERR_ROCKSDB_STATUS_NO_SPACE (HA_ERR_LAST + 15)
+#define HA_ERR_ROCKSDB_STATUS_MERGE_IN_PROGRESS (HA_ERR_LAST + 16)
+#define HA_ERR_ROCKSDB_STATUS_INCOMPLETE (HA_ERR_LAST + 17)
+#define HA_ERR_ROCKSDB_STATUS_SHUTDOWN_IN_PROGRESS (HA_ERR_LAST + 18)
+#define HA_ERR_ROCKSDB_STATUS_TIMED_OUT (HA_ERR_LAST + 19)
+#define HA_ERR_ROCKSDB_STATUS_ABORTED (HA_ERR_LAST + 20)
+#define HA_ERR_ROCKSDB_STATUS_LOCK_LIMIT (HA_ERR_LAST + 21)
+#define HA_ERR_ROCKSDB_STATUS_BUSY (HA_ERR_LAST + 22)
+#define HA_ERR_ROCKSDB_STATUS_DEADLOCK (HA_ERR_LAST + 23)
+#define HA_ERR_ROCKSDB_STATUS_EXPIRED (HA_ERR_LAST + 24)
+#define HA_ERR_ROCKSDB_STATUS_TRY_AGAIN (HA_ERR_LAST + 25)
+#define HA_ERR_ROCKSDB_LAST HA_ERR_ROCKSDB_STATUS_TRY_AGAIN
+
+const char *const rocksdb_hton_name = "ROCKSDB";
+
+typedef struct _gl_index_id_s {
+ uint32_t cf_id;
+ uint32_t index_id;
+ bool operator==(const struct _gl_index_id_s &other) const {
+ return cf_id == other.cf_id && index_id == other.index_id;
+ }
+ bool operator!=(const struct _gl_index_id_s &other) const {
+ return cf_id != other.cf_id || index_id != other.index_id;
+ }
+ bool operator<(const struct _gl_index_id_s &other) const {
+ return cf_id < other.cf_id ||
+ (cf_id == other.cf_id && index_id < other.index_id);
+ }
+ bool operator<=(const struct _gl_index_id_s &other) const {
+ return cf_id < other.cf_id ||
+ (cf_id == other.cf_id && index_id <= other.index_id);
+ }
+ bool operator>(const struct _gl_index_id_s &other) const {
+ return cf_id > other.cf_id ||
+ (cf_id == other.cf_id && index_id > other.index_id);
+ }
+ bool operator>=(const struct _gl_index_id_s &other) const {
+ return cf_id > other.cf_id ||
+ (cf_id == other.cf_id && index_id >= other.index_id);
+ }
+} GL_INDEX_ID;
+
+enum operation_type : int {
+ ROWS_DELETED = 0,
+ ROWS_INSERTED,
+ ROWS_READ,
+ ROWS_UPDATED,
+ ROWS_DELETED_BLIND,
+ ROWS_EXPIRED,
+ ROWS_FILTERED,
+ ROWS_HIDDEN_NO_SNAPSHOT,
+ ROWS_MAX
+};
+
+enum query_type : int { QUERIES_POINT = 0, QUERIES_RANGE, QUERIES_MAX };
+
+#if defined(HAVE_SCHED_GETCPU)
+#define RDB_INDEXER get_sched_indexer_t
+#else
+#define RDB_INDEXER thread_id_indexer_t
+#endif
+
+/* Global statistics struct used inside MyRocks */
+struct st_global_stats {
+ ib_counter_t<ulonglong, 64, RDB_INDEXER> rows[ROWS_MAX];
+
+ // system_rows_ stats are only for system
+ // tables. They are not counted in rows_* stats.
+ ib_counter_t<ulonglong, 64, RDB_INDEXER> system_rows[ROWS_MAX];
+
+ ib_counter_t<ulonglong, 64, RDB_INDEXER> queries[QUERIES_MAX];
+
+ ib_counter_t<ulonglong, 64, RDB_INDEXER> covered_secondary_key_lookups;
+};
+
+/* Struct used for exporting status to MySQL */
+struct st_export_stats {
+ ulonglong rows_deleted;
+ ulonglong rows_inserted;
+ ulonglong rows_read;
+ ulonglong rows_updated;
+ ulonglong rows_deleted_blind;
+ ulonglong rows_expired;
+ ulonglong rows_filtered;
+ ulonglong rows_hidden_no_snapshot;
+
+ ulonglong system_rows_deleted;
+ ulonglong system_rows_inserted;
+ ulonglong system_rows_read;
+ ulonglong system_rows_updated;
+
+ ulonglong queries_point;
+ ulonglong queries_range;
+
+ ulonglong covered_secondary_key_lookups;
+};
+
+/* Struct used for exporting RocksDB memory status */
+struct st_memory_stats {
+ ulonglong memtable_total;
+ ulonglong memtable_unflushed;
+};
+
+/* Struct used for exporting RocksDB IO stalls stats */
+struct st_io_stall_stats {
+ ulonglong level0_slowdown;
+ ulonglong level0_slowdown_with_compaction;
+ ulonglong level0_numfiles;
+ ulonglong level0_numfiles_with_compaction;
+ ulonglong stop_for_pending_compaction_bytes;
+ ulonglong slowdown_for_pending_compaction_bytes;
+ ulonglong memtable_compaction;
+ ulonglong memtable_slowdown;
+ ulonglong total_stop;
+ ulonglong total_slowdown;
+
+ st_io_stall_stats()
+ : level0_slowdown(0),
+ level0_slowdown_with_compaction(0),
+ level0_numfiles(0),
+ level0_numfiles_with_compaction(0),
+ stop_for_pending_compaction_bytes(0),
+ slowdown_for_pending_compaction_bytes(0),
+ memtable_compaction(0),
+ memtable_slowdown(0),
+ total_stop(0),
+ total_slowdown(0) {}
+};
+} // namespace myrocks
diff --git a/storage/rocksdb/rdb_i_s.cc b/storage/rocksdb/rdb_i_s.cc
new file mode 100644
index 00000000000..c4b2f4d3bb6
--- /dev/null
+++ b/storage/rocksdb/rdb_i_s.cc
@@ -0,0 +1,2020 @@
+/*
+ Copyright (c) 2012, Monty Program Ab
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111-1301 USA */
+
+#include <my_config.h>
+
+/* C++ standard header files */
+#include <algorithm>
+#include <cctype>
+#include <map>
+#include <string>
+#include <vector>
+
+/* MySQL header files */
+#include <sql_show.h>
+
+/* RocksDB header files */
+#include "rocksdb/compaction_filter.h"
+#include "rocksdb/convenience.h"
+#include "rocksdb/filter_policy.h"
+#include "rocksdb/memtablerep.h"
+#include "rocksdb/merge_operator.h"
+#include "rocksdb/slice_transform.h"
+#include "rocksdb/utilities/transaction_db.h"
+
+/* MyRocks header files */
+#include "./ha_rocksdb.h"
+#include "./ha_rocksdb_proto.h"
+#include "./rdb_cf_manager.h"
+#include "./rdb_datadic.h"
+#include "./rdb_utils.h"
+#include "./rdb_mariadb_server_port.h"
+
+#include "./rdb_mariadb_port.h"
+
+namespace myrocks {
+
+/**
+ Define the INFORMATION_SCHEMA (I_S) structures needed by MyRocks storage
+ engine.
+*/
+
+#define ROCKSDB_FIELD_INFO(_name_, _len_, _type_, _flag_) \
+ { _name_, _len_, _type_, 0, _flag_, nullptr, 0 }
+
+#define ROCKSDB_FIELD_INFO_END \
+ ROCKSDB_FIELD_INFO(nullptr, 0, MYSQL_TYPE_NULL, 0)
+
+/*
+ Support for INFORMATION_SCHEMA.ROCKSDB_CFSTATS dynamic table
+ */
+namespace RDB_CFSTATS_FIELD {
+enum { CF_NAME = 0, STAT_TYPE, VALUE };
+} // namespace RDB_CFSTATS_FIELD
+
+static ST_FIELD_INFO rdb_i_s_cfstats_fields_info[] = {
+ ROCKSDB_FIELD_INFO("CF_NAME", NAME_LEN + 1, MYSQL_TYPE_STRING, 0),
+ ROCKSDB_FIELD_INFO("STAT_TYPE", NAME_LEN + 1, MYSQL_TYPE_STRING, 0),
+ ROCKSDB_FIELD_INFO("VALUE", sizeof(uint64_t), MYSQL_TYPE_LONGLONG, 0),
+ ROCKSDB_FIELD_INFO_END};
+
+static int rdb_i_s_cfstats_fill_table(
+ my_core::THD *const thd, my_core::TABLE_LIST *const tables,
+ my_core::Item *const cond MY_ATTRIBUTE((__unused__))) {
+ DBUG_ENTER_FUNC();
+
+ DBUG_ASSERT(tables != nullptr);
+ DBUG_ASSERT(tables->table != nullptr);
+ DBUG_ASSERT(tables->table->field != nullptr);
+
+ int ret = 0;
+ uint64_t val;
+
+ const std::vector<std::pair<const std::string, std::string>> cf_properties = {
+ {rocksdb::DB::Properties::kNumImmutableMemTable,
+ "NUM_IMMUTABLE_MEM_TABLE"},
+ {rocksdb::DB::Properties::kMemTableFlushPending,
+ "MEM_TABLE_FLUSH_PENDING"},
+ {rocksdb::DB::Properties::kCompactionPending, "COMPACTION_PENDING"},
+ {rocksdb::DB::Properties::kCurSizeActiveMemTable,
+ "CUR_SIZE_ACTIVE_MEM_TABLE"},
+ {rocksdb::DB::Properties::kCurSizeAllMemTables,
+ "CUR_SIZE_ALL_MEM_TABLES"},
+ {rocksdb::DB::Properties::kNumEntriesActiveMemTable,
+ "NUM_ENTRIES_ACTIVE_MEM_TABLE"},
+ {rocksdb::DB::Properties::kNumEntriesImmMemTables,
+ "NUM_ENTRIES_IMM_MEM_TABLES"},
+ {rocksdb::DB::Properties::kEstimateTableReadersMem,
+ "NON_BLOCK_CACHE_SST_MEM_USAGE"},
+ {rocksdb::DB::Properties::kNumLiveVersions, "NUM_LIVE_VERSIONS"}};
+
+ rocksdb::DB *const rdb = rdb_get_rocksdb_db();
+
+ if (!rdb) {
+ DBUG_RETURN(ret);
+ }
+
+ const Rdb_cf_manager &cf_manager = rdb_get_cf_manager();
+
+ for (const auto &cf_name : cf_manager.get_cf_names()) {
+ DBUG_ASSERT(!cf_name.empty());
+ rocksdb::ColumnFamilyHandle *cfh = cf_manager.get_cf(cf_name);
+ if (cfh == nullptr) {
+ continue;
+ }
+
+ for (const auto &property : cf_properties) {
+ if (!rdb->GetIntProperty(cfh, property.first, &val)) {
+ continue;
+ }
+
+ tables->table->field[RDB_CFSTATS_FIELD::CF_NAME]->store(
+ cf_name.c_str(), cf_name.size(), system_charset_info);
+ tables->table->field[RDB_CFSTATS_FIELD::STAT_TYPE]->store(
+ property.second.c_str(), property.second.size(), system_charset_info);
+ tables->table->field[RDB_CFSTATS_FIELD::VALUE]->store(val, true);
+
+ ret = static_cast<int>(
+ my_core::schema_table_store_record(thd, tables->table));
+
+ if (ret) {
+ DBUG_RETURN(ret);
+ }
+ }
+ }
+
+ DBUG_RETURN(0);
+}
+
+static int rdb_i_s_cfstats_init(void *p) {
+ DBUG_ENTER_FUNC();
+
+ if (prevent_myrocks_loading)
+ DBUG_RETURN(1);
+
+ DBUG_ASSERT(p != nullptr);
+
+ my_core::ST_SCHEMA_TABLE *schema;
+
+ schema = (my_core::ST_SCHEMA_TABLE *)p;
+
+ schema->fields_info = rdb_i_s_cfstats_fields_info;
+ schema->fill_table = rdb_i_s_cfstats_fill_table;
+
+ DBUG_RETURN(0);
+}
+
+/*
+ Support for INFORMATION_SCHEMA.ROCKSDB_DBSTATS dynamic table
+ */
+namespace RDB_DBSTATS_FIELD {
+enum { STAT_TYPE = 0, VALUE };
+} // namespace RDB_DBSTATS_FIELD
+
+static ST_FIELD_INFO rdb_i_s_dbstats_fields_info[] = {
+ ROCKSDB_FIELD_INFO("STAT_TYPE", NAME_LEN + 1, MYSQL_TYPE_STRING, 0),
+ ROCKSDB_FIELD_INFO("VALUE", sizeof(uint64_t), MYSQL_TYPE_LONGLONG, 0),
+ ROCKSDB_FIELD_INFO_END};
+
+static int rdb_i_s_dbstats_fill_table(
+ my_core::THD *const thd, my_core::TABLE_LIST *const tables,
+ my_core::Item *const cond MY_ATTRIBUTE((__unused__))) {
+ DBUG_ENTER_FUNC();
+
+ DBUG_ASSERT(tables != nullptr);
+ DBUG_ASSERT(tables->table != nullptr);
+ DBUG_ASSERT(tables->table->field != nullptr);
+
+ int ret = 0;
+ uint64_t val;
+
+ const std::vector<std::pair<std::string, std::string>> db_properties = {
+ {rocksdb::DB::Properties::kBackgroundErrors, "DB_BACKGROUND_ERRORS"},
+ {rocksdb::DB::Properties::kNumSnapshots, "DB_NUM_SNAPSHOTS"},
+ {rocksdb::DB::Properties::kOldestSnapshotTime,
+ "DB_OLDEST_SNAPSHOT_TIME"}};
+
+ rocksdb::DB *const rdb = rdb_get_rocksdb_db();
+
+ if (!rdb) {
+ DBUG_RETURN(ret);
+ }
+
+ const rocksdb::BlockBasedTableOptions &table_options =
+ rdb_get_table_options();
+
+ for (const auto &property : db_properties) {
+ if (!rdb->GetIntProperty(property.first, &val)) {
+ continue;
+ }
+
+ tables->table->field[RDB_DBSTATS_FIELD::STAT_TYPE]->store(
+ property.second.c_str(), property.second.size(), system_charset_info);
+ tables->table->field[RDB_DBSTATS_FIELD::VALUE]->store(val, true);
+
+ ret = static_cast<int>(
+ my_core::schema_table_store_record(thd, tables->table));
+
+ if (ret) {
+ DBUG_RETURN(ret);
+ }
+ }
+
+ /*
+ Currently, this can only show the usage of a block cache allocated
+ directly by the handlerton. If the column family config specifies a block
+ cache (i.e. the column family option has a parameter such as
+ block_based_table_factory={block_cache=1G}), then the block cache is
+ allocated within the rocksdb::GetColumnFamilyOptionsFromString().
+
+ There is no interface to retrieve this block cache, nor fetch the usage
+ information from the column family.
+ */
+ val = (table_options.block_cache ? table_options.block_cache->GetUsage() : 0);
+
+ tables->table->field[RDB_DBSTATS_FIELD::STAT_TYPE]->store(
+ STRING_WITH_LEN("DB_BLOCK_CACHE_USAGE"), system_charset_info);
+ tables->table->field[RDB_DBSTATS_FIELD::VALUE]->store(val, true);
+
+ ret =
+ static_cast<int>(my_core::schema_table_store_record(thd, tables->table));
+
+ DBUG_RETURN(ret);
+}
+
+static int rdb_i_s_dbstats_init(void *const p) {
+ DBUG_ENTER_FUNC();
+
+ if (prevent_myrocks_loading)
+ DBUG_RETURN(1);
+
+ DBUG_ASSERT(p != nullptr);
+
+ my_core::ST_SCHEMA_TABLE *schema;
+
+ schema = (my_core::ST_SCHEMA_TABLE *)p;
+
+ schema->fields_info = rdb_i_s_dbstats_fields_info;
+ schema->fill_table = rdb_i_s_dbstats_fill_table;
+
+ DBUG_RETURN(0);
+}
+
+/*
+ Support for INFORMATION_SCHEMA.ROCKSDB_PERF_CONTEXT dynamic table
+ */
+namespace RDB_PERF_CONTEXT_FIELD {
+enum { TABLE_SCHEMA = 0, TABLE_NAME, PARTITION_NAME, STAT_TYPE, VALUE };
+} // namespace RDB_PERF_CONTEXT_FIELD
+
+static ST_FIELD_INFO rdb_i_s_perf_context_fields_info[] = {
+ ROCKSDB_FIELD_INFO("TABLE_SCHEMA", NAME_LEN + 1, MYSQL_TYPE_STRING, 0),
+ ROCKSDB_FIELD_INFO("TABLE_NAME", NAME_LEN + 1, MYSQL_TYPE_STRING, 0),
+ ROCKSDB_FIELD_INFO("PARTITION_NAME", NAME_LEN + 1, MYSQL_TYPE_STRING,
+ MY_I_S_MAYBE_NULL),
+ ROCKSDB_FIELD_INFO("STAT_TYPE", NAME_LEN + 1, MYSQL_TYPE_STRING, 0),
+ ROCKSDB_FIELD_INFO("VALUE", sizeof(uint64_t), MYSQL_TYPE_LONGLONG, 0),
+ ROCKSDB_FIELD_INFO_END};
+
+static int rdb_i_s_perf_context_fill_table(
+ my_core::THD *const thd, my_core::TABLE_LIST *const tables,
+ my_core::Item *const cond MY_ATTRIBUTE((__unused__))) {
+ DBUG_ENTER_FUNC();
+
+ DBUG_ASSERT(thd != nullptr);
+ DBUG_ASSERT(tables != nullptr);
+ DBUG_ASSERT(tables->table != nullptr);
+
+ int ret = 0;
+ Field **field = tables->table->field;
+ DBUG_ASSERT(field != nullptr);
+
+ rocksdb::DB *const rdb = rdb_get_rocksdb_db();
+
+ if (!rdb) {
+ DBUG_RETURN(ret);
+ }
+
+ const std::vector<std::string> tablenames = rdb_get_open_table_names();
+
+ for (const auto &it : tablenames) {
+ std::string str, dbname, tablename, partname;
+ Rdb_perf_counters counters;
+
+ int rc = rdb_normalize_tablename(it, &str);
+
+ if (rc != HA_EXIT_SUCCESS) {
+ DBUG_RETURN(rc);
+ }
+
+ if (rdb_split_normalized_tablename(str, &dbname, &tablename, &partname)) {
+ continue;
+ }
+
+ if (rdb_get_table_perf_counters(it.c_str(), &counters)) {
+ continue;
+ }
+
+ field[RDB_PERF_CONTEXT_FIELD::TABLE_SCHEMA]->store(
+ dbname.c_str(), dbname.size(), system_charset_info);
+ field[RDB_PERF_CONTEXT_FIELD::TABLE_NAME]->store(
+ tablename.c_str(), tablename.size(), system_charset_info);
+
+ if (partname.size() == 0) {
+ field[RDB_PERF_CONTEXT_FIELD::PARTITION_NAME]->set_null();
+ } else {
+ field[RDB_PERF_CONTEXT_FIELD::PARTITION_NAME]->set_notnull();
+ field[RDB_PERF_CONTEXT_FIELD::PARTITION_NAME]->store(
+ partname.c_str(), partname.size(), system_charset_info);
+ }
+
+ for (int i = 0; i < PC_MAX_IDX; i++) {
+ field[RDB_PERF_CONTEXT_FIELD::STAT_TYPE]->store(
+ rdb_pc_stat_types[i].c_str(), rdb_pc_stat_types[i].size(),
+ system_charset_info);
+ field[RDB_PERF_CONTEXT_FIELD::VALUE]->store(counters.m_value[i], true);
+
+ ret = static_cast<int>(
+ my_core::schema_table_store_record(thd, tables->table));
+
+ if (ret) {
+ DBUG_RETURN(ret);
+ }
+ }
+ }
+
+ DBUG_RETURN(0);
+}
+
+static int rdb_i_s_perf_context_init(void *const p) {
+ DBUG_ENTER_FUNC();
+
+ if (prevent_myrocks_loading)
+ DBUG_RETURN(1);
+ DBUG_ASSERT(p != nullptr);
+
+ my_core::ST_SCHEMA_TABLE *schema;
+
+ schema = (my_core::ST_SCHEMA_TABLE *)p;
+
+ schema->fields_info = rdb_i_s_perf_context_fields_info;
+ schema->fill_table = rdb_i_s_perf_context_fill_table;
+
+ DBUG_RETURN(0);
+}
+
+/*
+ Support for INFORMATION_SCHEMA.ROCKSDB_PERF_CONTEXT_GLOBAL dynamic table
+ */
+namespace RDB_PERF_CONTEXT_GLOBAL_FIELD {
+enum { STAT_TYPE = 0, VALUE };
+} // namespace RDB_PERF_CONTEXT_GLOBAL_FIELD
+
+static ST_FIELD_INFO rdb_i_s_perf_context_global_fields_info[] = {
+ ROCKSDB_FIELD_INFO("STAT_TYPE", NAME_LEN + 1, MYSQL_TYPE_STRING, 0),
+ ROCKSDB_FIELD_INFO("VALUE", sizeof(uint64_t), MYSQL_TYPE_LONGLONG, 0),
+ ROCKSDB_FIELD_INFO_END};
+
+static int rdb_i_s_perf_context_global_fill_table(
+ my_core::THD *const thd, my_core::TABLE_LIST *const tables,
+ my_core::Item *const cond MY_ATTRIBUTE((__unused__))) {
+ DBUG_ENTER_FUNC();
+
+ DBUG_ASSERT(thd != nullptr);
+ DBUG_ASSERT(tables != nullptr);
+ DBUG_ASSERT(tables->table != nullptr);
+ DBUG_ASSERT(tables->table->field != nullptr);
+
+ int ret = 0;
+
+ rocksdb::DB *const rdb = rdb_get_rocksdb_db();
+
+ if (!rdb) {
+ DBUG_RETURN(ret);
+ }
+
+ // Get a copy of the global perf counters.
+ Rdb_perf_counters global_counters;
+ rdb_get_global_perf_counters(&global_counters);
+
+ for (int i = 0; i < PC_MAX_IDX; i++) {
+ tables->table->field[RDB_PERF_CONTEXT_GLOBAL_FIELD::STAT_TYPE]->store(
+ rdb_pc_stat_types[i].c_str(), rdb_pc_stat_types[i].size(),
+ system_charset_info);
+ tables->table->field[RDB_PERF_CONTEXT_GLOBAL_FIELD::VALUE]->store(
+ global_counters.m_value[i], true);
+
+ ret = static_cast<int>(
+ my_core::schema_table_store_record(thd, tables->table));
+
+ if (ret) {
+ DBUG_RETURN(ret);
+ }
+ }
+
+ DBUG_RETURN(0);
+}
+
+static int rdb_i_s_perf_context_global_init(void *const p) {
+ DBUG_ENTER_FUNC();
+
+ if (prevent_myrocks_loading)
+ DBUG_RETURN(1);
+
+ DBUG_ASSERT(p != nullptr);
+
+ my_core::ST_SCHEMA_TABLE *schema;
+
+ schema = (my_core::ST_SCHEMA_TABLE *)p;
+
+ schema->fields_info = rdb_i_s_perf_context_global_fields_info;
+ schema->fill_table = rdb_i_s_perf_context_global_fill_table;
+
+ DBUG_RETURN(0);
+}
+
+/*
+ Support for INFORMATION_SCHEMA.ROCKSDB_CFOPTIONS dynamic table
+ */
+namespace RDB_CFOPTIONS_FIELD {
+enum { CF_NAME = 0, OPTION_TYPE, VALUE };
+} // namespace RDB_CFOPTIONS_FIELD
+
+static ST_FIELD_INFO rdb_i_s_cfoptions_fields_info[] = {
+ ROCKSDB_FIELD_INFO("CF_NAME", NAME_LEN + 1, MYSQL_TYPE_STRING, 0),
+ ROCKSDB_FIELD_INFO("OPTION_TYPE", NAME_LEN + 1, MYSQL_TYPE_STRING, 0),
+ ROCKSDB_FIELD_INFO("VALUE", NAME_LEN + 1, MYSQL_TYPE_STRING, 0),
+ ROCKSDB_FIELD_INFO_END};
+
+static int rdb_i_s_cfoptions_fill_table(
+ my_core::THD *const thd, my_core::TABLE_LIST *const tables,
+ my_core::Item *const cond MY_ATTRIBUTE((__unused__))) {
+ DBUG_ENTER_FUNC();
+
+ DBUG_ASSERT(thd != nullptr);
+ DBUG_ASSERT(tables != nullptr);
+
+ int ret = 0;
+
+ rocksdb::DB *const rdb = rdb_get_rocksdb_db();
+
+ if (!rdb) {
+ DBUG_RETURN(ret);
+ }
+
+ Rdb_cf_manager &cf_manager = rdb_get_cf_manager();
+
+ for (const auto &cf_name : cf_manager.get_cf_names()) {
+ std::string val;
+ rocksdb::ColumnFamilyOptions opts;
+
+ DBUG_ASSERT(!cf_name.empty());
+ cf_manager.get_cf_options(cf_name, &opts);
+
+ std::vector<std::pair<std::string, std::string>> cf_option_types = {
+ {"COMPARATOR", opts.comparator == nullptr
+ ? "NULL"
+ : std::string(opts.comparator->Name())},
+ {"MERGE_OPERATOR", opts.merge_operator == nullptr
+ ? "NULL"
+ : std::string(opts.merge_operator->Name())},
+ {"COMPACTION_FILTER",
+ opts.compaction_filter == nullptr
+ ? "NULL"
+ : std::string(opts.compaction_filter->Name())},
+ {"COMPACTION_FILTER_FACTORY",
+ opts.compaction_filter_factory == nullptr
+ ? "NULL"
+ : std::string(opts.compaction_filter_factory->Name())},
+ {"WRITE_BUFFER_SIZE", std::to_string(opts.write_buffer_size)},
+ {"MAX_WRITE_BUFFER_NUMBER",
+ std::to_string(opts.max_write_buffer_number)},
+ {"MIN_WRITE_BUFFER_NUMBER_TO_MERGE",
+ std::to_string(opts.min_write_buffer_number_to_merge)},
+ {"NUM_LEVELS", std::to_string(opts.num_levels)},
+ {"LEVEL0_FILE_NUM_COMPACTION_TRIGGER",
+ std::to_string(opts.level0_file_num_compaction_trigger)},
+ {"LEVEL0_SLOWDOWN_WRITES_TRIGGER",
+ std::to_string(opts.level0_slowdown_writes_trigger)},
+ {"LEVEL0_STOP_WRITES_TRIGGER",
+ std::to_string(opts.level0_stop_writes_trigger)},
+ {"MAX_MEM_COMPACTION_LEVEL",
+ std::to_string(opts.max_mem_compaction_level)},
+ {"TARGET_FILE_SIZE_BASE", std::to_string(opts.target_file_size_base)},
+ {"TARGET_FILE_SIZE_MULTIPLIER",
+ std::to_string(opts.target_file_size_multiplier)},
+ {"MAX_BYTES_FOR_LEVEL_BASE",
+ std::to_string(opts.max_bytes_for_level_base)},
+ {"LEVEL_COMPACTION_DYNAMIC_LEVEL_BYTES",
+ opts.level_compaction_dynamic_level_bytes ? "ON" : "OFF"},
+ {"MAX_BYTES_FOR_LEVEL_MULTIPLIER",
+ std::to_string(opts.max_bytes_for_level_multiplier)},
+ {"SOFT_RATE_LIMIT", std::to_string(opts.soft_rate_limit)},
+ {"HARD_RATE_LIMIT", std::to_string(opts.hard_rate_limit)},
+ {"RATE_LIMIT_DELAY_MAX_MILLISECONDS",
+ std::to_string(opts.rate_limit_delay_max_milliseconds)},
+ {"ARENA_BLOCK_SIZE", std::to_string(opts.arena_block_size)},
+ {"DISABLE_AUTO_COMPACTIONS",
+ opts.disable_auto_compactions ? "ON" : "OFF"},
+ {"PURGE_REDUNDANT_KVS_WHILE_FLUSH",
+ opts.purge_redundant_kvs_while_flush ? "ON" : "OFF"},
+ {"MAX_SEQUENTIAL_SKIP_IN_ITERATIONS",
+ std::to_string(opts.max_sequential_skip_in_iterations)},
+ {"MEMTABLE_FACTORY", opts.memtable_factory == nullptr
+ ? "NULL"
+ : opts.memtable_factory->Name()},
+ {"INPLACE_UPDATE_SUPPORT", opts.inplace_update_support ? "ON" : "OFF"},
+ {"INPLACE_UPDATE_NUM_LOCKS",
+ opts.inplace_update_num_locks ? "ON" : "OFF"},
+ {"MEMTABLE_PREFIX_BLOOM_BITS_RATIO",
+ std::to_string(opts.memtable_prefix_bloom_size_ratio)},
+ {"MEMTABLE_PREFIX_BLOOM_HUGE_PAGE_TLB_SIZE",
+ std::to_string(opts.memtable_huge_page_size)},
+ {"BLOOM_LOCALITY", std::to_string(opts.bloom_locality)},
+ {"MAX_SUCCESSIVE_MERGES", std::to_string(opts.max_successive_merges)},
+ {"OPTIMIZE_FILTERS_FOR_HITS",
+ (opts.optimize_filters_for_hits ? "ON" : "OFF")},
+ };
+
+ // get MAX_BYTES_FOR_LEVEL_MULTIPLIER_ADDITIONAL option value
+ val = opts.max_bytes_for_level_multiplier_additional.empty() ? "NULL" : "";
+
+ for (const auto &level : opts.max_bytes_for_level_multiplier_additional) {
+ val.append(std::to_string(level) + ":");
+ }
+
+ val.pop_back();
+ cf_option_types.push_back(
+ {"MAX_BYTES_FOR_LEVEL_MULTIPLIER_ADDITIONAL", val});
+
+ // get COMPRESSION_TYPE option value
+ GetStringFromCompressionType(&val, opts.compression);
+
+ if (val.empty()) {
+ val = "NULL";
+ }
+
+ cf_option_types.push_back({"COMPRESSION_TYPE", val});
+
+ // get COMPRESSION_PER_LEVEL option value
+ val = opts.compression_per_level.empty() ? "NULL" : "";
+
+ for (const auto &compression_type : opts.compression_per_level) {
+ std::string res;
+
+ GetStringFromCompressionType(&res, compression_type);
+
+ if (!res.empty()) {
+ val.append(res + ":");
+ }
+ }
+
+ val.pop_back();
+ cf_option_types.push_back({"COMPRESSION_PER_LEVEL", val});
+
+ // get compression_opts value
+ val = std::to_string(opts.compression_opts.window_bits) + ":";
+ val.append(std::to_string(opts.compression_opts.level) + ":");
+ val.append(std::to_string(opts.compression_opts.strategy));
+
+ cf_option_types.push_back({"COMPRESSION_OPTS", val});
+
+ // bottommost_compression
+ if (opts.bottommost_compression) {
+ std::string res;
+
+ GetStringFromCompressionType(&res, opts.bottommost_compression);
+
+ if (!res.empty()) {
+ cf_option_types.push_back({"BOTTOMMOST_COMPRESSION", res});
+ }
+ }
+
+ // get PREFIX_EXTRACTOR option
+ cf_option_types.push_back(
+ {"PREFIX_EXTRACTOR", opts.prefix_extractor == nullptr
+ ? "NULL"
+ : std::string(opts.prefix_extractor->Name())});
+
+ // get COMPACTION_STYLE option
+ switch (opts.compaction_style) {
+ case rocksdb::kCompactionStyleLevel:
+ val = "kCompactionStyleLevel";
+ break;
+ case rocksdb::kCompactionStyleUniversal:
+ val = "kCompactionStyleUniversal";
+ break;
+ case rocksdb::kCompactionStyleFIFO:
+ val = "kCompactionStyleFIFO";
+ break;
+ case rocksdb::kCompactionStyleNone:
+ val = "kCompactionStyleNone";
+ break;
+ default:
+ val = "NULL";
+ }
+
+ cf_option_types.push_back({"COMPACTION_STYLE", val});
+
+ // get COMPACTION_OPTIONS_UNIVERSAL related options
+ const rocksdb::CompactionOptionsUniversal compac_opts =
+ opts.compaction_options_universal;
+
+ val = "{SIZE_RATIO=";
+
+ val.append(std::to_string(compac_opts.size_ratio));
+ val.append("; MIN_MERGE_WIDTH=");
+ val.append(std::to_string(compac_opts.min_merge_width));
+ val.append("; MAX_MERGE_WIDTH=");
+ val.append(std::to_string(compac_opts.max_merge_width));
+ val.append("; MAX_SIZE_AMPLIFICATION_PERCENT=");
+ val.append(std::to_string(compac_opts.max_size_amplification_percent));
+ val.append("; COMPRESSION_SIZE_PERCENT=");
+ val.append(std::to_string(compac_opts.compression_size_percent));
+ val.append("; STOP_STYLE=");
+
+ switch (compac_opts.stop_style) {
+ case rocksdb::kCompactionStopStyleSimilarSize:
+ val.append("kCompactionStopStyleSimilarSize}");
+ break;
+ case rocksdb::kCompactionStopStyleTotalSize:
+ val.append("kCompactionStopStyleTotalSize}");
+ break;
+ default:
+ val.append("}");
+ }
+
+ cf_option_types.push_back({"COMPACTION_OPTIONS_UNIVERSAL", val});
+
+ // get COMPACTION_OPTION_FIFO option
+ cf_option_types.push_back(
+ {"COMPACTION_OPTION_FIFO::MAX_TABLE_FILES_SIZE",
+ std::to_string(opts.compaction_options_fifo.max_table_files_size)});
+
+ // get table related options
+ std::vector<std::string> table_options =
+ split_into_vector(opts.table_factory->GetPrintableTableOptions(), '\n');
+
+ for (auto option : table_options) {
+ option.erase(std::remove(option.begin(), option.end(), ' '),
+ option.end());
+
+ int pos = option.find(":");
+ std::string option_name = option.substr(0, pos);
+ std::string option_value = option.substr(pos + 1, option.length());
+ std::transform(option_name.begin(), option_name.end(),
+ option_name.begin(),
+ [](unsigned char c) { return std::toupper(c); });
+
+ cf_option_types.push_back(
+ {"TABLE_FACTORY::" + option_name, option_value});
+ }
+
+ for (const auto &cf_option_type : cf_option_types) {
+ DBUG_ASSERT(tables->table != nullptr);
+ DBUG_ASSERT(tables->table->field != nullptr);
+
+ tables->table->field[RDB_CFOPTIONS_FIELD::CF_NAME]->store(
+ cf_name.c_str(), cf_name.size(), system_charset_info);
+ tables->table->field[RDB_CFOPTIONS_FIELD::OPTION_TYPE]->store(
+ cf_option_type.first.c_str(), cf_option_type.first.size(),
+ system_charset_info);
+ tables->table->field[RDB_CFOPTIONS_FIELD::VALUE]->store(
+ cf_option_type.second.c_str(), cf_option_type.second.size(),
+ system_charset_info);
+
+ ret = static_cast<int>(
+ my_core::schema_table_store_record(thd, tables->table));
+
+ if (ret) {
+ DBUG_RETURN(ret);
+ }
+ }
+ }
+
+ DBUG_RETURN(0);
+}
+
+/*
+ Support for INFORMATION_SCHEMA.ROCKSDB_GLOBAL_INFO dynamic table
+ */
+namespace RDB_GLOBAL_INFO_FIELD {
+enum { TYPE = 0, NAME, VALUE };
+}
+
+static ST_FIELD_INFO rdb_i_s_global_info_fields_info[] = {
+ ROCKSDB_FIELD_INFO("TYPE", FN_REFLEN + 1, MYSQL_TYPE_STRING, 0),
+ ROCKSDB_FIELD_INFO("NAME", FN_REFLEN + 1, MYSQL_TYPE_STRING, 0),
+ ROCKSDB_FIELD_INFO("VALUE", FN_REFLEN + 1, MYSQL_TYPE_STRING, 0),
+ ROCKSDB_FIELD_INFO_END};
+
+/*
+ * helper function for rdb_i_s_global_info_fill_table
+ * to insert (TYPE, KEY, VALUE) rows into
+ * information_schema.rocksdb_global_info
+ */
+static int rdb_global_info_fill_row(my_core::THD *const thd,
+ my_core::TABLE_LIST *const tables,
+ const char *const type,
+ const char *const name,
+ const char *const value) {
+ DBUG_ASSERT(thd != nullptr);
+ DBUG_ASSERT(tables != nullptr);
+ DBUG_ASSERT(tables->table != nullptr);
+ DBUG_ASSERT(type != nullptr);
+ DBUG_ASSERT(name != nullptr);
+ DBUG_ASSERT(value != nullptr);
+
+ Field **field = tables->table->field;
+ DBUG_ASSERT(field != nullptr);
+
+ field[RDB_GLOBAL_INFO_FIELD::TYPE]->store(type, strlen(type),
+ system_charset_info);
+ field[RDB_GLOBAL_INFO_FIELD::NAME]->store(name, strlen(name),
+ system_charset_info);
+ field[RDB_GLOBAL_INFO_FIELD::VALUE]->store(value, strlen(value),
+ system_charset_info);
+
+ return my_core::schema_table_store_record(thd, tables->table);
+}
+
+static int rdb_i_s_global_info_fill_table(
+ my_core::THD *const thd, my_core::TABLE_LIST *const tables,
+ my_core::Item *const cond MY_ATTRIBUTE((__unused__))) {
+ DBUG_ENTER_FUNC();
+
+ DBUG_ASSERT(thd != nullptr);
+ DBUG_ASSERT(tables != nullptr);
+
+ static const uint32_t INT_BUF_LEN = 21;
+ static const uint32_t CF_ID_INDEX_BUF_LEN = 60;
+
+ int ret = 0;
+
+ rocksdb::DB *const rdb = rdb_get_rocksdb_db();
+
+ if (!rdb) {
+ DBUG_RETURN(ret);
+ }
+
+ /* binlog info */
+ Rdb_binlog_manager *const blm = rdb_get_binlog_manager();
+ DBUG_ASSERT(blm != nullptr);
+
+ char file_buf[FN_REFLEN + 1] = {0};
+ my_off_t pos = 0;
+ char pos_buf[INT_BUF_LEN] = {0};
+ char gtid_buf[GTID_BUF_LEN] = {0};
+
+ if (blm->read(file_buf, &pos, gtid_buf)) {
+ snprintf(pos_buf, INT_BUF_LEN, "%llu", (ulonglong)pos);
+
+ ret |= rdb_global_info_fill_row(thd, tables, "BINLOG", "FILE", file_buf);
+ ret |= rdb_global_info_fill_row(thd, tables, "BINLOG", "POS", pos_buf);
+ ret |= rdb_global_info_fill_row(thd, tables, "BINLOG", "GTID", gtid_buf);
+ }
+
+ /* max index info */
+ const Rdb_dict_manager *const dict_manager = rdb_get_dict_manager();
+ DBUG_ASSERT(dict_manager != nullptr);
+
+ uint32_t max_index_id;
+ char max_index_id_buf[INT_BUF_LEN] = {0};
+
+ if (dict_manager->get_max_index_id(&max_index_id)) {
+ snprintf(max_index_id_buf, INT_BUF_LEN, "%u", max_index_id);
+
+ ret |= rdb_global_info_fill_row(thd, tables, "MAX_INDEX_ID", "MAX_INDEX_ID",
+ max_index_id_buf);
+ }
+
+ /* cf_id -> cf_flags */
+ char cf_id_buf[INT_BUF_LEN] = {0};
+ char cf_value_buf[FN_REFLEN + 1] = {0};
+ const Rdb_cf_manager &cf_manager = rdb_get_cf_manager();
+
+ for (const auto &cf_handle : cf_manager.get_all_cf()) {
+ DBUG_ASSERT(cf_handle != nullptr);
+
+ uint flags;
+
+ if (!dict_manager->get_cf_flags(cf_handle->GetID(), &flags)) {
+ // NO_LINT_DEBUG
+ sql_print_error(
+ "RocksDB: Failed to get column family flags "
+ "from CF with id = %u. MyRocks data dictionary may "
+ "be corrupted.",
+ cf_handle->GetID());
+ abort();
+ }
+
+ snprintf(cf_id_buf, INT_BUF_LEN, "%u", cf_handle->GetID());
+ snprintf(cf_value_buf, FN_REFLEN, "%s [%u]", cf_handle->GetName().c_str(),
+ flags);
+
+ ret |= rdb_global_info_fill_row(thd, tables, "CF_FLAGS", cf_id_buf,
+ cf_value_buf);
+
+ if (ret) {
+ break;
+ }
+ }
+
+ /* DDL_DROP_INDEX_ONGOING */
+ std::unordered_set<GL_INDEX_ID> gl_index_ids;
+ dict_manager->get_ongoing_index_operation(
+ &gl_index_ids, Rdb_key_def::DDL_DROP_INDEX_ONGOING);
+ char cf_id_index_buf[CF_ID_INDEX_BUF_LEN] = {0};
+
+ for (auto gl_index_id : gl_index_ids) {
+ snprintf(cf_id_index_buf, CF_ID_INDEX_BUF_LEN, "cf_id:%u,index_id:%u",
+ gl_index_id.cf_id, gl_index_id.index_id);
+
+ ret |= rdb_global_info_fill_row(thd, tables, "DDL_DROP_INDEX_ONGOING",
+ cf_id_index_buf, "");
+
+ if (ret) {
+ break;
+ }
+ }
+
+ DBUG_RETURN(ret);
+}
+
+/*
+ Support for INFORMATION_SCHEMA.ROCKSDB_COMPACTION_STATS dynamic table
+ */
+static int rdb_i_s_compact_stats_fill_table(
+ my_core::THD *thd, my_core::TABLE_LIST *tables,
+ my_core::Item *cond MY_ATTRIBUTE((__unused__))) {
+ DBUG_ASSERT(thd != nullptr);
+ DBUG_ASSERT(tables != nullptr);
+
+ DBUG_ENTER_FUNC();
+
+ int ret = 0;
+ rocksdb::DB *rdb = rdb_get_rocksdb_db();
+
+ if (!rdb) {
+ DBUG_RETURN(ret);
+ }
+
+ Rdb_cf_manager &cf_manager = rdb_get_cf_manager();
+
+ for (auto cf_name : cf_manager.get_cf_names()) {
+ rocksdb::ColumnFamilyHandle *cfh = cf_manager.get_cf(cf_name);
+
+ if (cfh == nullptr) {
+ continue;
+ }
+
+ std::map<std::string, std::string> props;
+ bool bool_ret MY_ATTRIBUTE((__unused__));
+ bool_ret = rdb->GetMapProperty(cfh, "rocksdb.cfstats", &props);
+ DBUG_ASSERT(bool_ret);
+
+ const std::string prop_name_prefix = "compaction.";
+ for (auto const &prop_ent : props) {
+ std::string prop_name = prop_ent.first;
+ if (prop_name.find(prop_name_prefix) != 0) {
+ continue;
+ }
+ std::string value = prop_ent.second;
+ std::size_t del_pos = prop_name.find('.', prop_name_prefix.size());
+ DBUG_ASSERT(del_pos != std::string::npos);
+ std::string level_str = prop_name.substr(
+ prop_name_prefix.size(), del_pos - prop_name_prefix.size());
+ std::string type_str = prop_name.substr(del_pos + 1);
+
+ Field **field = tables->table->field;
+ DBUG_ASSERT(field != nullptr);
+
+ field[0]->store(cf_name.c_str(), cf_name.size(), system_charset_info);
+ field[1]->store(level_str.c_str(), level_str.size(), system_charset_info);
+ field[2]->store(type_str.c_str(), type_str.size(), system_charset_info);
+ field[3]->store(std::stod(value));
+
+ ret |= static_cast<int>(
+ my_core::schema_table_store_record(thd, tables->table));
+
+ if (ret != 0) {
+ DBUG_RETURN(ret);
+ }
+ }
+ }
+
+ DBUG_RETURN(ret);
+}
+
+static ST_FIELD_INFO rdb_i_s_compact_stats_fields_info[] = {
+ ROCKSDB_FIELD_INFO("CF_NAME", NAME_LEN + 1, MYSQL_TYPE_STRING, 0),
+ ROCKSDB_FIELD_INFO("LEVEL", FN_REFLEN + 1, MYSQL_TYPE_STRING, 0),
+ ROCKSDB_FIELD_INFO("TYPE", FN_REFLEN + 1, MYSQL_TYPE_STRING, 0),
+ ROCKSDB_FIELD_INFO("VALUE", sizeof(double), MYSQL_TYPE_DOUBLE, 0),
+ ROCKSDB_FIELD_INFO_END};
+
+namespace // anonymous namespace = not visible outside this source file
+{
+struct Rdb_ddl_scanner : public Rdb_tables_scanner {
+ my_core::THD *m_thd;
+ my_core::TABLE *m_table;
+
+ int add_table(Rdb_tbl_def *tdef) override;
+};
+} // anonymous namespace
+
+/*
+ Support for INFORMATION_SCHEMA.ROCKSDB_DDL dynamic table
+ */
+namespace RDB_DDL_FIELD {
+enum {
+ TABLE_SCHEMA = 0,
+ TABLE_NAME,
+ PARTITION_NAME,
+ INDEX_NAME,
+ COLUMN_FAMILY,
+ INDEX_NUMBER,
+ INDEX_TYPE,
+ KV_FORMAT_VERSION,
+ TTL_DURATION,
+ INDEX_FLAGS,
+ CF,
+ AUTO_INCREMENT
+};
+} // namespace RDB_DDL_FIELD
+
+static ST_FIELD_INFO rdb_i_s_ddl_fields_info[] = {
+ ROCKSDB_FIELD_INFO("TABLE_SCHEMA", NAME_LEN + 1, MYSQL_TYPE_STRING, 0),
+ ROCKSDB_FIELD_INFO("TABLE_NAME", NAME_LEN + 1, MYSQL_TYPE_STRING, 0),
+ ROCKSDB_FIELD_INFO("PARTITION_NAME", NAME_LEN + 1, MYSQL_TYPE_STRING,
+ MY_I_S_MAYBE_NULL),
+ ROCKSDB_FIELD_INFO("INDEX_NAME", NAME_LEN + 1, MYSQL_TYPE_STRING, 0),
+ ROCKSDB_FIELD_INFO("COLUMN_FAMILY", sizeof(uint32_t), MYSQL_TYPE_LONG, 0),
+ ROCKSDB_FIELD_INFO("INDEX_NUMBER", sizeof(uint32_t), MYSQL_TYPE_LONG, 0),
+ ROCKSDB_FIELD_INFO("INDEX_TYPE", sizeof(uint16_t), MYSQL_TYPE_SHORT, 0),
+ ROCKSDB_FIELD_INFO("KV_FORMAT_VERSION", sizeof(uint16_t), MYSQL_TYPE_SHORT,
+ 0),
+ ROCKSDB_FIELD_INFO("TTL_DURATION", sizeof(uint64), MYSQL_TYPE_LONGLONG, 0),
+ ROCKSDB_FIELD_INFO("INDEX_FLAGS", sizeof(uint64), MYSQL_TYPE_LONGLONG, 0),
+ ROCKSDB_FIELD_INFO("CF", NAME_LEN + 1, MYSQL_TYPE_STRING, 0),
+ ROCKSDB_FIELD_INFO("AUTO_INCREMENT", sizeof(uint64_t), MYSQL_TYPE_LONGLONG,
+ MY_I_S_MAYBE_NULL | MY_I_S_UNSIGNED),
+ ROCKSDB_FIELD_INFO_END};
+
+int Rdb_ddl_scanner::add_table(Rdb_tbl_def *tdef) {
+ DBUG_ASSERT(tdef != nullptr);
+
+ int ret = 0;
+
+ DBUG_ASSERT(m_table != nullptr);
+ Field **field = m_table->field;
+ DBUG_ASSERT(field != nullptr);
+ const Rdb_dict_manager *dict_manager = rdb_get_dict_manager();
+
+ const std::string &dbname = tdef->base_dbname();
+ field[RDB_DDL_FIELD::TABLE_SCHEMA]->store(dbname.c_str(), dbname.size(),
+ system_charset_info);
+
+ const std::string &tablename = tdef->base_tablename();
+ field[RDB_DDL_FIELD::TABLE_NAME]->store(tablename.c_str(), tablename.size(),
+ system_charset_info);
+
+ const std::string &partname = tdef->base_partition();
+ if (partname.length() == 0) {
+ field[RDB_DDL_FIELD::PARTITION_NAME]->set_null();
+ } else {
+ field[RDB_DDL_FIELD::PARTITION_NAME]->set_notnull();
+ field[RDB_DDL_FIELD::PARTITION_NAME]->store(
+ partname.c_str(), partname.size(), system_charset_info);
+ }
+
+ for (uint i = 0; i < tdef->m_key_count; i++) {
+ const Rdb_key_def &kd = *tdef->m_key_descr_arr[i];
+
+ field[RDB_DDL_FIELD::INDEX_NAME]->store(kd.m_name.c_str(), kd.m_name.size(),
+ system_charset_info);
+
+ GL_INDEX_ID gl_index_id = kd.get_gl_index_id();
+ field[RDB_DDL_FIELD::COLUMN_FAMILY]->store(gl_index_id.cf_id, true);
+ field[RDB_DDL_FIELD::INDEX_NUMBER]->store(gl_index_id.index_id, true);
+ field[RDB_DDL_FIELD::INDEX_TYPE]->store(kd.m_index_type, true);
+ field[RDB_DDL_FIELD::KV_FORMAT_VERSION]->store(kd.m_kv_format_version,
+ true);
+ field[RDB_DDL_FIELD::TTL_DURATION]->store(kd.m_ttl_duration, true);
+ field[RDB_DDL_FIELD::INDEX_FLAGS]->store(kd.m_index_flags_bitmap, true);
+
+ std::string cf_name = kd.get_cf()->GetName();
+ field[RDB_DDL_FIELD::CF]->store(cf_name.c_str(), cf_name.size(),
+ system_charset_info);
+ ulonglong auto_incr;
+ if (dict_manager->get_auto_incr_val(tdef->get_autoincr_gl_index_id(),
+ &auto_incr)) {
+ field[RDB_DDL_FIELD::AUTO_INCREMENT]->set_notnull();
+ field[RDB_DDL_FIELD::AUTO_INCREMENT]->store(auto_incr, true);
+ } else {
+ field[RDB_DDL_FIELD::AUTO_INCREMENT]->set_null();
+ }
+
+ ret = my_core::schema_table_store_record(m_thd, m_table);
+ if (ret) return ret;
+ }
+ return HA_EXIT_SUCCESS;
+}
+
+static int rdb_i_s_ddl_fill_table(my_core::THD *const thd,
+ my_core::TABLE_LIST *const tables,
+ my_core::Item *const cond) {
+ DBUG_ENTER_FUNC();
+
+ DBUG_ASSERT(thd != nullptr);
+ DBUG_ASSERT(tables != nullptr);
+ DBUG_ASSERT(tables->table != nullptr);
+
+ int ret = 0;
+ rocksdb::DB *const rdb = rdb_get_rocksdb_db();
+
+ if (!rdb) {
+ DBUG_RETURN(ret);
+ }
+
+ Rdb_ddl_scanner ddl_arg;
+
+ ddl_arg.m_thd = thd;
+ ddl_arg.m_table = tables->table;
+
+ Rdb_ddl_manager *ddl_manager = rdb_get_ddl_manager();
+ DBUG_ASSERT(ddl_manager != nullptr);
+
+ ret = ddl_manager->scan_for_tables(&ddl_arg);
+
+ DBUG_RETURN(ret);
+}
+
+static int rdb_i_s_ddl_init(void *const p) {
+ DBUG_ENTER_FUNC();
+
+ if (prevent_myrocks_loading)
+ DBUG_RETURN(1);
+
+ my_core::ST_SCHEMA_TABLE *schema;
+
+ DBUG_ASSERT(p != nullptr);
+
+ schema = (my_core::ST_SCHEMA_TABLE *)p;
+
+ schema->fields_info = rdb_i_s_ddl_fields_info;
+ schema->fill_table = rdb_i_s_ddl_fill_table;
+
+ DBUG_RETURN(0);
+}
+
+static int rdb_i_s_cfoptions_init(void *const p) {
+ DBUG_ENTER_FUNC();
+
+ if (prevent_myrocks_loading)
+ DBUG_RETURN(1);
+
+ DBUG_ASSERT(p != nullptr);
+
+ my_core::ST_SCHEMA_TABLE *schema;
+
+ schema = (my_core::ST_SCHEMA_TABLE *)p;
+
+ schema->fields_info = rdb_i_s_cfoptions_fields_info;
+ schema->fill_table = rdb_i_s_cfoptions_fill_table;
+
+ DBUG_RETURN(0);
+}
+
+static int rdb_i_s_global_info_init(void *const p) {
+ DBUG_ENTER_FUNC();
+
+ if (prevent_myrocks_loading)
+ DBUG_RETURN(1);
+
+ DBUG_ASSERT(p != nullptr);
+
+ my_core::ST_SCHEMA_TABLE *schema;
+
+ schema = reinterpret_cast<my_core::ST_SCHEMA_TABLE *>(p);
+
+ schema->fields_info = rdb_i_s_global_info_fields_info;
+ schema->fill_table = rdb_i_s_global_info_fill_table;
+
+ DBUG_RETURN(0);
+}
+
+static int rdb_i_s_compact_stats_init(void *p) {
+ my_core::ST_SCHEMA_TABLE *schema;
+
+ DBUG_ENTER_FUNC();
+
+ if (prevent_myrocks_loading)
+ DBUG_RETURN(1);
+
+ DBUG_ASSERT(p != nullptr);
+
+ schema = reinterpret_cast<my_core::ST_SCHEMA_TABLE *>(p);
+
+ schema->fields_info = rdb_i_s_compact_stats_fields_info;
+ schema->fill_table = rdb_i_s_compact_stats_fill_table;
+
+ DBUG_RETURN(0);
+}
+
+/* Given a path to a file return just the filename portion. */
+static std::string rdb_filename_without_path(const std::string &path) {
+ /* Find last slash in path */
+ const size_t pos = path.rfind('/');
+
+ /* None found? Just return the original string */
+ if (pos == std::string::npos) {
+ return std::string(path);
+ }
+
+ /* Return everything after the slash (or backslash) */
+ return path.substr(pos + 1);
+}
+
+/*
+ Support for INFORMATION_SCHEMA.ROCKSDB_SST_PROPS dynamic table
+ */
+namespace RDB_SST_PROPS_FIELD {
+enum {
+ SST_NAME = 0,
+ COLUMN_FAMILY,
+ DATA_BLOCKS,
+ ENTRIES,
+ RAW_KEY_SIZE,
+ RAW_VALUE_SIZE,
+ DATA_BLOCK_SIZE,
+ INDEX_BLOCK_SIZE,
+ INDEX_PARTITIONS,
+ TOP_LEVEL_INDEX_SIZE,
+ FILTER_BLOCK_SIZE,
+ COMPRESSION_ALGO,
+ CREATION_TIME,
+ FILE_CREATION_TIME,
+ OLDEST_KEY_TIME,
+ FILTER_POLICY,
+ COMPRESSION_OPTIONS,
+};
+} // namespace RDB_SST_PROPS_FIELD
+
+static ST_FIELD_INFO rdb_i_s_sst_props_fields_info[] = {
+ ROCKSDB_FIELD_INFO("SST_NAME", NAME_LEN + 1, MYSQL_TYPE_STRING, 0),
+ ROCKSDB_FIELD_INFO("COLUMN_FAMILY", sizeof(uint32_t), MYSQL_TYPE_LONG, 0),
+ ROCKSDB_FIELD_INFO("DATA_BLOCKS", sizeof(int64_t), MYSQL_TYPE_LONGLONG, 0),
+ ROCKSDB_FIELD_INFO("ENTRIES", sizeof(int64_t), MYSQL_TYPE_LONGLONG, 0),
+ ROCKSDB_FIELD_INFO("RAW_KEY_SIZE", sizeof(int64_t), MYSQL_TYPE_LONGLONG, 0),
+ ROCKSDB_FIELD_INFO("RAW_VALUE_SIZE", sizeof(int64_t), MYSQL_TYPE_LONGLONG,
+ 0),
+ ROCKSDB_FIELD_INFO("DATA_BLOCK_SIZE", sizeof(int64_t), MYSQL_TYPE_LONGLONG,
+ 0),
+ ROCKSDB_FIELD_INFO("INDEX_BLOCK_SIZE", sizeof(int64_t), MYSQL_TYPE_LONGLONG,
+ 0),
+ ROCKSDB_FIELD_INFO("INDEX_PARTITIONS", sizeof(uint32_t), MYSQL_TYPE_LONG,
+ 0),
+ ROCKSDB_FIELD_INFO("TOP_LEVEL_INDEX_SIZE", sizeof(int64_t),
+ MYSQL_TYPE_LONGLONG, 0),
+ ROCKSDB_FIELD_INFO("FILTER_BLOCK_SIZE", sizeof(int64_t),
+ MYSQL_TYPE_LONGLONG, 0),
+ ROCKSDB_FIELD_INFO("COMPRESSION_ALGO", NAME_LEN + 1, MYSQL_TYPE_STRING, 0),
+ ROCKSDB_FIELD_INFO("CREATION_TIME", sizeof(int64_t), MYSQL_TYPE_LONGLONG,
+ 0),
+ ROCKSDB_FIELD_INFO("FILE_CREATION_TIME", sizeof(int64_t),
+ MYSQL_TYPE_LONGLONG, 0),
+ ROCKSDB_FIELD_INFO("OLDEST_KEY_TIME", sizeof(int64_t), MYSQL_TYPE_LONGLONG,
+ 0),
+ ROCKSDB_FIELD_INFO("FILTER_POLICY", NAME_LEN + 1, MYSQL_TYPE_STRING, 0),
+ ROCKSDB_FIELD_INFO("COMPRESSION_OPTIONS", NAME_LEN + 1, MYSQL_TYPE_STRING,
+ 0),
+ ROCKSDB_FIELD_INFO_END};
+
+static int rdb_i_s_sst_props_fill_table(
+ my_core::THD *const thd, my_core::TABLE_LIST *const tables,
+ my_core::Item *const cond MY_ATTRIBUTE((__unused__))) {
+ DBUG_ENTER_FUNC();
+
+ DBUG_ASSERT(thd != nullptr);
+ DBUG_ASSERT(tables != nullptr);
+ DBUG_ASSERT(tables->table != nullptr);
+
+ int ret = 0;
+ Field **field = tables->table->field;
+ DBUG_ASSERT(field != nullptr);
+
+ /* Iterate over all the column families */
+ rocksdb::DB *const rdb = rdb_get_rocksdb_db();
+
+ if (!rdb) {
+ DBUG_RETURN(ret);
+ }
+
+ const Rdb_cf_manager &cf_manager = rdb_get_cf_manager();
+
+ for (const auto &cf_handle : cf_manager.get_all_cf()) {
+ /* Grab the the properties of all the tables in the column family */
+ rocksdb::TablePropertiesCollection table_props_collection;
+ const rocksdb::Status s =
+ rdb->GetPropertiesOfAllTables(cf_handle, &table_props_collection);
+
+ if (!s.ok()) {
+ continue;
+ }
+
+ /* Iterate over all the items in the collection, each of which contains a
+ * name and the actual properties */
+ for (const auto &props : table_props_collection) {
+ /* Add the SST name into the output */
+ const std::string sst_name = rdb_filename_without_path(props.first);
+
+ field[RDB_SST_PROPS_FIELD::SST_NAME]->store(
+ sst_name.data(), sst_name.size(), system_charset_info);
+
+ field[RDB_SST_PROPS_FIELD::COLUMN_FAMILY]->store(
+ props.second->column_family_id, true);
+ field[RDB_SST_PROPS_FIELD::DATA_BLOCKS]->store(
+ props.second->num_data_blocks, true);
+ field[RDB_SST_PROPS_FIELD::ENTRIES]->store(props.second->num_entries,
+ true);
+ field[RDB_SST_PROPS_FIELD::RAW_KEY_SIZE]->store(
+ props.second->raw_key_size, true);
+ field[RDB_SST_PROPS_FIELD::RAW_VALUE_SIZE]->store(
+ props.second->raw_value_size, true);
+ field[RDB_SST_PROPS_FIELD::DATA_BLOCK_SIZE]->store(
+ props.second->data_size, true);
+ field[RDB_SST_PROPS_FIELD::INDEX_BLOCK_SIZE]->store(
+ props.second->index_size, true);
+ field[RDB_SST_PROPS_FIELD::INDEX_PARTITIONS]->store(
+ props.second->index_partitions, true);
+ field[RDB_SST_PROPS_FIELD::TOP_LEVEL_INDEX_SIZE]->store(
+ props.second->top_level_index_size, true);
+ field[RDB_SST_PROPS_FIELD::FILTER_BLOCK_SIZE]->store(
+ props.second->filter_size, true);
+ if (props.second->compression_name.empty()) {
+ field[RDB_SST_PROPS_FIELD::COMPRESSION_ALGO]->set_null();
+ } else {
+ field[RDB_SST_PROPS_FIELD::COMPRESSION_ALGO]->store(
+ props.second->compression_name.c_str(),
+ props.second->compression_name.size(), system_charset_info);
+ }
+ field[RDB_SST_PROPS_FIELD::CREATION_TIME]->store(
+ props.second->creation_time, true);
+ field[RDB_SST_PROPS_FIELD::FILE_CREATION_TIME]->store(
+ props.second->file_creation_time, true);
+ field[RDB_SST_PROPS_FIELD::OLDEST_KEY_TIME]->store(
+ props.second->oldest_key_time, true);
+ if (props.second->filter_policy_name.empty()) {
+ field[RDB_SST_PROPS_FIELD::FILTER_POLICY]->set_null();
+ } else {
+ field[RDB_SST_PROPS_FIELD::FILTER_POLICY]->store(
+ props.second->filter_policy_name.c_str(),
+ props.second->filter_policy_name.size(), system_charset_info);
+ }
+ if (props.second->compression_options.empty()) {
+ field[RDB_SST_PROPS_FIELD::COMPRESSION_OPTIONS]->set_null();
+ } else {
+ field[RDB_SST_PROPS_FIELD::COMPRESSION_OPTIONS]->store(
+ props.second->compression_options.c_str(),
+ props.second->compression_options.size(), system_charset_info);
+ }
+
+ /* Tell MySQL about this row in the virtual table */
+ ret = static_cast<int>(
+ my_core::schema_table_store_record(thd, tables->table));
+
+ if (ret != 0) {
+ DBUG_RETURN(ret);
+ }
+ }
+ }
+
+ DBUG_RETURN(ret);
+}
+
+/* Initialize the information_schema.rocksdb_sst_props virtual table */
+static int rdb_i_s_sst_props_init(void *const p) {
+ DBUG_ENTER_FUNC();
+
+ DBUG_ASSERT(p != nullptr);
+
+ my_core::ST_SCHEMA_TABLE *schema;
+
+ schema = (my_core::ST_SCHEMA_TABLE *)p;
+
+ schema->fields_info = rdb_i_s_sst_props_fields_info;
+ schema->fill_table = rdb_i_s_sst_props_fill_table;
+
+ DBUG_RETURN(0);
+}
+
+/*
+ Support for INFORMATION_SCHEMA.ROCKSDB_INDEX_FILE_MAP dynamic table
+ */
+namespace RDB_INDEX_FILE_MAP_FIELD {
+enum {
+ COLUMN_FAMILY = 0,
+ INDEX_NUMBER,
+ SST_NAME,
+ NUM_ROWS,
+ DATA_SIZE,
+ ENTRY_DELETES,
+ ENTRY_SINGLEDELETES,
+ ENTRY_MERGES,
+ ENTRY_OTHERS,
+ DISTINCT_KEYS_PREFIX
+};
+} // namespace RDB_INDEX_FILE_MAP_FIELD
+
+static ST_FIELD_INFO rdb_i_s_index_file_map_fields_info[] = {
+ /* The information_schema.rocksdb_index_file_map virtual table has four
+ * fields:
+ * COLUMN_FAMILY => the index's column family contained in the SST file
+ * INDEX_NUMBER => the index id contained in the SST file
+ * SST_NAME => the name of the SST file containing some indexes
+ * NUM_ROWS => the number of entries of this index id in this SST file
+ * DATA_SIZE => the data size stored in this SST file for this index id */
+ ROCKSDB_FIELD_INFO("COLUMN_FAMILY", sizeof(uint32_t), MYSQL_TYPE_LONG, 0),
+ ROCKSDB_FIELD_INFO("INDEX_NUMBER", sizeof(uint32_t), MYSQL_TYPE_LONG, 0),
+ ROCKSDB_FIELD_INFO("SST_NAME", NAME_LEN + 1, MYSQL_TYPE_STRING, 0),
+ ROCKSDB_FIELD_INFO("NUM_ROWS", sizeof(int64_t), MYSQL_TYPE_LONGLONG, 0),
+ ROCKSDB_FIELD_INFO("DATA_SIZE", sizeof(int64_t), MYSQL_TYPE_LONGLONG, 0),
+ ROCKSDB_FIELD_INFO("ENTRY_DELETES", sizeof(int64_t), MYSQL_TYPE_LONGLONG,
+ 0),
+ ROCKSDB_FIELD_INFO("ENTRY_SINGLEDELETES", sizeof(int64_t),
+ MYSQL_TYPE_LONGLONG, 0),
+ ROCKSDB_FIELD_INFO("ENTRY_MERGES", sizeof(int64_t), MYSQL_TYPE_LONGLONG, 0),
+ ROCKSDB_FIELD_INFO("ENTRY_OTHERS", sizeof(int64_t), MYSQL_TYPE_LONGLONG, 0),
+ ROCKSDB_FIELD_INFO("DISTINCT_KEYS_PREFIX", MAX_REF_PARTS * 25,
+ MYSQL_TYPE_STRING, 0),
+ ROCKSDB_FIELD_INFO_END};
+
+/* Fill the information_schema.rocksdb_index_file_map virtual table */
+static int rdb_i_s_index_file_map_fill_table(
+ my_core::THD *const thd, my_core::TABLE_LIST *const tables,
+ my_core::Item *const cond MY_ATTRIBUTE((__unused__))) {
+ DBUG_ENTER_FUNC();
+
+ DBUG_ASSERT(thd != nullptr);
+ DBUG_ASSERT(tables != nullptr);
+ DBUG_ASSERT(tables->table != nullptr);
+
+ int ret = 0;
+ Field **field = tables->table->field;
+ DBUG_ASSERT(field != nullptr);
+
+ /* Iterate over all the column families */
+ rocksdb::DB *const rdb = rdb_get_rocksdb_db();
+
+ if (!rdb) {
+ DBUG_RETURN(ret);
+ }
+
+ const Rdb_cf_manager &cf_manager = rdb_get_cf_manager();
+
+ for (const auto &cf_handle : cf_manager.get_all_cf()) {
+ /* Grab the the properties of all the tables in the column family */
+ rocksdb::TablePropertiesCollection table_props_collection;
+ const rocksdb::Status s =
+ rdb->GetPropertiesOfAllTables(cf_handle, &table_props_collection);
+
+ if (!s.ok()) {
+ continue;
+ }
+
+ /* Iterate over all the items in the collection, each of which contains a
+ * name and the actual properties */
+ for (const auto &props : table_props_collection) {
+ /* Add the SST name into the output */
+ const std::string sst_name = rdb_filename_without_path(props.first);
+
+ field[RDB_INDEX_FILE_MAP_FIELD::SST_NAME]->store(
+ sst_name.data(), sst_name.size(), system_charset_info);
+
+ /* Get the __indexstats__ data out of the table property */
+ std::vector<Rdb_index_stats> stats;
+ Rdb_tbl_prop_coll::read_stats_from_tbl_props(props.second, &stats);
+
+ if (stats.empty()) {
+ field[RDB_INDEX_FILE_MAP_FIELD::COLUMN_FAMILY]->store(-1, true);
+ field[RDB_INDEX_FILE_MAP_FIELD::INDEX_NUMBER]->store(-1, true);
+ field[RDB_INDEX_FILE_MAP_FIELD::NUM_ROWS]->store(-1, true);
+ field[RDB_INDEX_FILE_MAP_FIELD::DATA_SIZE]->store(-1, true);
+ field[RDB_INDEX_FILE_MAP_FIELD::ENTRY_DELETES]->store(-1, true);
+ field[RDB_INDEX_FILE_MAP_FIELD::ENTRY_SINGLEDELETES]->store(-1, true);
+ field[RDB_INDEX_FILE_MAP_FIELD::ENTRY_MERGES]->store(-1, true);
+ field[RDB_INDEX_FILE_MAP_FIELD::ENTRY_OTHERS]->store(-1, true);
+ } else {
+ for (const auto &it : stats) {
+ /* Add the index number, the number of rows, and data size to the
+ * output */
+ field[RDB_INDEX_FILE_MAP_FIELD::COLUMN_FAMILY]->store(
+ it.m_gl_index_id.cf_id, true);
+ field[RDB_INDEX_FILE_MAP_FIELD::INDEX_NUMBER]->store(
+ it.m_gl_index_id.index_id, true);
+ field[RDB_INDEX_FILE_MAP_FIELD::NUM_ROWS]->store(it.m_rows, true);
+ field[RDB_INDEX_FILE_MAP_FIELD::DATA_SIZE]->store(it.m_data_size,
+ true);
+ field[RDB_INDEX_FILE_MAP_FIELD::ENTRY_DELETES]->store(
+ it.m_entry_deletes, true);
+ field[RDB_INDEX_FILE_MAP_FIELD::ENTRY_SINGLEDELETES]->store(
+ it.m_entry_single_deletes, true);
+ field[RDB_INDEX_FILE_MAP_FIELD::ENTRY_MERGES]->store(
+ it.m_entry_merges, true);
+ field[RDB_INDEX_FILE_MAP_FIELD::ENTRY_OTHERS]->store(
+ it.m_entry_others, true);
+
+ std::string distinct_keys_prefix;
+
+ for (size_t i = 0; i < it.m_distinct_keys_per_prefix.size(); i++) {
+ if (i > 0) {
+ distinct_keys_prefix += ",";
+ }
+
+ distinct_keys_prefix +=
+ std::to_string(it.m_distinct_keys_per_prefix[i]);
+ }
+
+ field[RDB_INDEX_FILE_MAP_FIELD::DISTINCT_KEYS_PREFIX]->store(
+ distinct_keys_prefix.data(), distinct_keys_prefix.size(),
+ system_charset_info);
+
+ /* Tell MySQL about this row in the virtual table */
+ ret = static_cast<int>(
+ my_core::schema_table_store_record(thd, tables->table));
+
+ if (ret != 0) {
+ break;
+ }
+ }
+ }
+ }
+ }
+
+ DBUG_RETURN(ret);
+}
+
+/* Initialize the information_schema.rocksdb_index_file_map virtual table */
+static int rdb_i_s_index_file_map_init(void *const p) {
+ DBUG_ENTER_FUNC();
+
+ if (prevent_myrocks_loading)
+ DBUG_RETURN(1);
+
+ DBUG_ASSERT(p != nullptr);
+
+ my_core::ST_SCHEMA_TABLE *schema;
+
+ schema = (my_core::ST_SCHEMA_TABLE *)p;
+
+ schema->fields_info = rdb_i_s_index_file_map_fields_info;
+ schema->fill_table = rdb_i_s_index_file_map_fill_table;
+
+ DBUG_RETURN(0);
+}
+
+/*
+ Support for INFORMATION_SCHEMA.ROCKSDB_LOCKS dynamic table
+ */
+namespace RDB_LOCKS_FIELD {
+enum { COLUMN_FAMILY_ID = 0, TRANSACTION_ID, KEY, MODE };
+} // namespace RDB_LOCKS_FIELD
+
+static ST_FIELD_INFO rdb_i_s_lock_info_fields_info[] = {
+ ROCKSDB_FIELD_INFO("COLUMN_FAMILY_ID", sizeof(uint32_t), MYSQL_TYPE_LONG,
+ 0),
+ ROCKSDB_FIELD_INFO("TRANSACTION_ID", sizeof(uint32_t), MYSQL_TYPE_LONG, 0),
+ ROCKSDB_FIELD_INFO("KEY", FN_REFLEN + 1, MYSQL_TYPE_STRING, 0),
+ ROCKSDB_FIELD_INFO("MODE", 32, MYSQL_TYPE_STRING, 0),
+ ROCKSDB_FIELD_INFO_END};
+
+/* Fill the information_schema.rocksdb_locks virtual table */
+static int rdb_i_s_lock_info_fill_table(
+ my_core::THD *const thd, my_core::TABLE_LIST *const tables,
+ my_core::Item *const cond MY_ATTRIBUTE((__unused__))) {
+ DBUG_ENTER_FUNC();
+
+ DBUG_ASSERT(thd != nullptr);
+ DBUG_ASSERT(tables != nullptr);
+ DBUG_ASSERT(tables->table != nullptr);
+ DBUG_ASSERT(tables->table->field != nullptr);
+
+ int ret = 0;
+
+ rocksdb::TransactionDB *const rdb = rdb_get_rocksdb_db();
+
+ if (!rdb) {
+ DBUG_RETURN(ret);
+ }
+
+ /* cf id -> rocksdb::KeyLockInfo */
+ std::unordered_multimap<uint32_t, rocksdb::KeyLockInfo> lock_info =
+ rdb->GetLockStatusData();
+
+ for (const auto &lock : lock_info) {
+ const uint32_t cf_id = lock.first;
+ const auto &key_lock_info = lock.second;
+ const auto key_hexstr = rdb_hexdump(key_lock_info.key.c_str(),
+ key_lock_info.key.length(), FN_REFLEN);
+
+ for (const auto &id : key_lock_info.ids) {
+ tables->table->field[RDB_LOCKS_FIELD::COLUMN_FAMILY_ID]->store(cf_id,
+ true);
+ tables->table->field[RDB_LOCKS_FIELD::TRANSACTION_ID]->store(id, true);
+
+ tables->table->field[RDB_LOCKS_FIELD::KEY]->store(
+ key_hexstr.c_str(), key_hexstr.size(), system_charset_info);
+ tables->table->field[RDB_LOCKS_FIELD::MODE]->store(
+ key_lock_info.exclusive ? "X" : "S", 1, system_charset_info);
+
+ /* Tell MySQL about this row in the virtual table */
+ ret = static_cast<int>(
+ my_core::schema_table_store_record(thd, tables->table));
+
+ if (ret != 0) {
+ break;
+ }
+ }
+ }
+
+ DBUG_RETURN(ret);
+}
+
+/* Initialize the information_schema.rocksdb_lock_info virtual table */
+static int rdb_i_s_lock_info_init(void *const p) {
+ DBUG_ENTER_FUNC();
+
+ if (prevent_myrocks_loading)
+ DBUG_RETURN(1);
+
+ DBUG_ASSERT(p != nullptr);
+
+ my_core::ST_SCHEMA_TABLE *schema;
+
+ schema = (my_core::ST_SCHEMA_TABLE *)p;
+
+ schema->fields_info = rdb_i_s_lock_info_fields_info;
+ schema->fill_table = rdb_i_s_lock_info_fill_table;
+
+ DBUG_RETURN(0);
+}
+
+/*
+ Support for INFORMATION_SCHEMA.ROCKSDB_TRX dynamic table
+ */
+namespace RDB_TRX_FIELD {
+enum {
+ TRANSACTION_ID = 0,
+ STATE,
+ NAME,
+ WRITE_COUNT,
+ LOCK_COUNT,
+ TIMEOUT_SEC,
+ WAITING_KEY,
+ WAITING_COLUMN_FAMILY_ID,
+ IS_REPLICATION,
+ SKIP_TRX_API,
+ READ_ONLY,
+ HAS_DEADLOCK_DETECTION,
+ NUM_ONGOING_BULKLOAD,
+ THREAD_ID,
+ QUERY
+};
+} // namespace RDB_TRX_FIELD
+
+static ST_FIELD_INFO rdb_i_s_trx_info_fields_info[] = {
+ ROCKSDB_FIELD_INFO("TRANSACTION_ID", sizeof(ulonglong), MYSQL_TYPE_LONGLONG,
+ 0),
+ ROCKSDB_FIELD_INFO("STATE", NAME_LEN + 1, MYSQL_TYPE_STRING, 0),
+ ROCKSDB_FIELD_INFO("NAME", NAME_LEN + 1, MYSQL_TYPE_STRING, 0),
+ ROCKSDB_FIELD_INFO("WRITE_COUNT", sizeof(ulonglong), MYSQL_TYPE_LONGLONG,
+ 0),
+ ROCKSDB_FIELD_INFO("LOCK_COUNT", sizeof(ulonglong), MYSQL_TYPE_LONGLONG, 0),
+ ROCKSDB_FIELD_INFO("TIMEOUT_SEC", sizeof(uint32_t), MYSQL_TYPE_LONG, 0),
+ ROCKSDB_FIELD_INFO("WAITING_KEY", FN_REFLEN + 1, MYSQL_TYPE_STRING, 0),
+ ROCKSDB_FIELD_INFO("WAITING_COLUMN_FAMILY_ID", sizeof(uint32_t),
+ MYSQL_TYPE_LONG, 0),
+ ROCKSDB_FIELD_INFO("IS_REPLICATION", sizeof(uint32_t), MYSQL_TYPE_LONG, 0),
+ ROCKSDB_FIELD_INFO("SKIP_TRX_API", sizeof(uint32_t), MYSQL_TYPE_LONG, 0),
+ ROCKSDB_FIELD_INFO("READ_ONLY", sizeof(uint32_t), MYSQL_TYPE_LONG, 0),
+ ROCKSDB_FIELD_INFO("HAS_DEADLOCK_DETECTION", sizeof(uint32_t),
+ MYSQL_TYPE_LONG, 0),
+ ROCKSDB_FIELD_INFO("NUM_ONGOING_BULKLOAD", sizeof(uint32_t),
+ MYSQL_TYPE_LONG, 0),
+ ROCKSDB_FIELD_INFO("THREAD_ID", sizeof(ulong), MYSQL_TYPE_LONG, 0),
+ ROCKSDB_FIELD_INFO("QUERY", NAME_LEN + 1, MYSQL_TYPE_STRING, 0),
+ ROCKSDB_FIELD_INFO_END};
+
+/* Fill the information_schema.rocksdb_trx virtual table */
+static int rdb_i_s_trx_info_fill_table(
+ my_core::THD *const thd, my_core::TABLE_LIST *const tables,
+ my_core::Item *const cond MY_ATTRIBUTE((__unused__))) {
+ DBUG_ENTER_FUNC();
+
+ DBUG_ASSERT(thd != nullptr);
+ DBUG_ASSERT(tables != nullptr);
+ DBUG_ASSERT(tables->table != nullptr);
+ DBUG_ASSERT(tables->table->field != nullptr);
+
+ int ret = 0;
+ rocksdb::DB *const rdb = rdb_get_rocksdb_db();
+
+ if (!rdb) {
+ DBUG_RETURN(ret);
+ }
+
+ const std::vector<Rdb_trx_info> &all_trx_info = rdb_get_all_trx_info();
+
+ for (const auto &info : all_trx_info) {
+ auto name_hexstr =
+ rdb_hexdump(info.name.c_str(), info.name.length(), NAME_LEN);
+ auto key_hexstr = rdb_hexdump(info.waiting_key.c_str(),
+ info.waiting_key.length(), FN_REFLEN);
+
+ tables->table->field[RDB_TRX_FIELD::TRANSACTION_ID]->store(info.trx_id,
+ true);
+ tables->table->field[RDB_TRX_FIELD::STATE]->store(
+ info.state.c_str(), info.state.length(), system_charset_info);
+ tables->table->field[RDB_TRX_FIELD::NAME]->store(
+ name_hexstr.c_str(), name_hexstr.length(), system_charset_info);
+ tables->table->field[RDB_TRX_FIELD::WRITE_COUNT]->store(info.write_count,
+ true);
+ tables->table->field[RDB_TRX_FIELD::LOCK_COUNT]->store(info.lock_count,
+ true);
+ tables->table->field[RDB_TRX_FIELD::TIMEOUT_SEC]->store(info.timeout_sec,
+ false);
+ tables->table->field[RDB_TRX_FIELD::WAITING_KEY]->store(
+ key_hexstr.c_str(), key_hexstr.length(), system_charset_info);
+ tables->table->field[RDB_TRX_FIELD::WAITING_COLUMN_FAMILY_ID]->store(
+ info.waiting_cf_id, true);
+ tables->table->field[RDB_TRX_FIELD::IS_REPLICATION]->store(
+ info.is_replication, false);
+ tables->table->field[RDB_TRX_FIELD::SKIP_TRX_API]->store(info.skip_trx_api,
+ false);
+ tables->table->field[RDB_TRX_FIELD::READ_ONLY]->store(info.read_only,
+ false);
+ tables->table->field[RDB_TRX_FIELD::HAS_DEADLOCK_DETECTION]->store(
+ info.deadlock_detect, false);
+ tables->table->field[RDB_TRX_FIELD::NUM_ONGOING_BULKLOAD]->store(
+ info.num_ongoing_bulk_load, false);
+ tables->table->field[RDB_TRX_FIELD::THREAD_ID]->store(info.thread_id, true);
+ tables->table->field[RDB_TRX_FIELD::QUERY]->store(
+ info.query_str.c_str(), info.query_str.length(), system_charset_info);
+
+ /* Tell MySQL about this row in the virtual table */
+ ret = static_cast<int>(
+ my_core::schema_table_store_record(thd, tables->table));
+
+ if (ret != 0) {
+ break;
+ }
+ }
+
+ DBUG_RETURN(ret);
+}
+
+/* Initialize the information_schema.rocksdb_trx_info virtual table */
+static int rdb_i_s_trx_info_init(void *const p) {
+ DBUG_ENTER_FUNC();
+
+ if (prevent_myrocks_loading)
+ DBUG_RETURN(1);
+
+ DBUG_ASSERT(p != nullptr);
+
+ my_core::ST_SCHEMA_TABLE *schema;
+
+ schema = (my_core::ST_SCHEMA_TABLE *)p;
+
+ schema->fields_info = rdb_i_s_trx_info_fields_info;
+ schema->fill_table = rdb_i_s_trx_info_fill_table;
+
+ DBUG_RETURN(0);
+}
+
+/*
+ Support for INFORMATION_SCHEMA.ROCKSDB_DEADLOCK dynamic table
+ */
+namespace RDB_DEADLOCK_FIELD {
+enum {
+ DEADLOCK_ID = 0,
+ TIMESTAMP,
+ TRANSACTION_ID,
+ CF_NAME,
+ WAITING_KEY,
+ LOCK_TYPE,
+ INDEX_NAME,
+ TABLE_NAME,
+ ROLLED_BACK,
+};
+} // namespace RDB_DEADLOCK_FIELD
+
+static ST_FIELD_INFO rdb_i_s_deadlock_info_fields_info[] = {
+ ROCKSDB_FIELD_INFO("DEADLOCK_ID", sizeof(ulonglong), MYSQL_TYPE_LONGLONG,
+ 0),
+ ROCKSDB_FIELD_INFO("TIMESTAMP", sizeof(ulonglong), MYSQL_TYPE_LONGLONG, 0),
+ ROCKSDB_FIELD_INFO("TRANSACTION_ID", sizeof(ulonglong), MYSQL_TYPE_LONGLONG,
+ 0),
+ ROCKSDB_FIELD_INFO("CF_NAME", NAME_LEN + 1, MYSQL_TYPE_STRING, 0),
+ ROCKSDB_FIELD_INFO("WAITING_KEY", FN_REFLEN + 1, MYSQL_TYPE_STRING, 0),
+ ROCKSDB_FIELD_INFO("LOCK_TYPE", NAME_LEN + 1, MYSQL_TYPE_STRING, 0),
+ ROCKSDB_FIELD_INFO("INDEX_NAME", NAME_LEN + 1, MYSQL_TYPE_STRING, 0),
+ ROCKSDB_FIELD_INFO("TABLE_NAME", NAME_LEN + 1, MYSQL_TYPE_STRING, 0),
+ ROCKSDB_FIELD_INFO("ROLLED_BACK", sizeof(ulonglong), MYSQL_TYPE_LONGLONG,
+ 0),
+ ROCKSDB_FIELD_INFO_END};
+
+/* Fill the information_schema.rocksdb_trx virtual table */
+static int rdb_i_s_deadlock_info_fill_table(
+ my_core::THD *const thd, my_core::TABLE_LIST *const tables,
+ my_core::Item *const cond MY_ATTRIBUTE((__unused__))) {
+ DBUG_ENTER_FUNC();
+
+ DBUG_ASSERT(thd != nullptr);
+ DBUG_ASSERT(tables != nullptr);
+ DBUG_ASSERT(tables->table != nullptr);
+ DBUG_ASSERT(tables->table->field != nullptr);
+
+ static const std::string str_exclusive("EXCLUSIVE");
+ static const std::string str_shared("SHARED");
+
+ int ret = 0;
+ rocksdb::DB *const rdb = rdb_get_rocksdb_db();
+
+ if (!rdb) {
+ DBUG_RETURN(ret);
+ }
+
+ const std::vector<Rdb_deadlock_info> &all_dl_info = rdb_get_deadlock_info();
+
+ ulonglong id = 0;
+ for (const auto &info : all_dl_info) {
+ auto deadlock_time = info.deadlock_time;
+ for (const auto &trx_info : info.path) {
+ tables->table->field[RDB_DEADLOCK_FIELD::DEADLOCK_ID]->store(id, true);
+ tables->table->field[RDB_DEADLOCK_FIELD::TIMESTAMP]->store(deadlock_time,
+ true);
+ tables->table->field[RDB_DEADLOCK_FIELD::TRANSACTION_ID]->store(
+ trx_info.trx_id, true);
+ tables->table->field[RDB_DEADLOCK_FIELD::CF_NAME]->store(
+ trx_info.cf_name.c_str(), trx_info.cf_name.length(),
+ system_charset_info);
+ tables->table->field[RDB_DEADLOCK_FIELD::WAITING_KEY]->store(
+ trx_info.waiting_key.c_str(), trx_info.waiting_key.length(),
+ system_charset_info);
+ if (trx_info.exclusive_lock) {
+ tables->table->field[RDB_DEADLOCK_FIELD::LOCK_TYPE]->store(
+ str_exclusive.c_str(), str_exclusive.length(), system_charset_info);
+ } else {
+ tables->table->field[RDB_DEADLOCK_FIELD::LOCK_TYPE]->store(
+ str_shared.c_str(), str_shared.length(), system_charset_info);
+ }
+ tables->table->field[RDB_DEADLOCK_FIELD::INDEX_NAME]->store(
+ trx_info.index_name.c_str(), trx_info.index_name.length(),
+ system_charset_info);
+ tables->table->field[RDB_DEADLOCK_FIELD::TABLE_NAME]->store(
+ trx_info.table_name.c_str(), trx_info.table_name.length(),
+ system_charset_info);
+ tables->table->field[RDB_DEADLOCK_FIELD::ROLLED_BACK]->store(
+ trx_info.trx_id == info.victim_trx_id, true);
+
+ /* Tell MySQL about this row in the virtual table */
+ ret = static_cast<int>(
+ my_core::schema_table_store_record(thd, tables->table));
+
+ if (ret != 0) {
+ break;
+ }
+ }
+ id++;
+ }
+
+ DBUG_RETURN(ret);
+}
+
+/* Initialize the information_schema.rocksdb_trx_info virtual table */
+static int rdb_i_s_deadlock_info_init(void *const p) {
+ DBUG_ENTER_FUNC();
+
+ DBUG_ASSERT(p != nullptr);
+
+ my_core::ST_SCHEMA_TABLE *schema;
+
+ schema = (my_core::ST_SCHEMA_TABLE *)p;
+
+ schema->fields_info = rdb_i_s_deadlock_info_fields_info;
+ schema->fill_table = rdb_i_s_deadlock_info_fill_table;
+
+ DBUG_RETURN(0);
+}
+
+static int rdb_i_s_deinit(void *p MY_ATTRIBUTE((__unused__))) {
+ DBUG_ENTER_FUNC();
+ DBUG_RETURN(0);
+}
+
+static struct st_mysql_information_schema rdb_i_s_info = {
+ MYSQL_INFORMATION_SCHEMA_INTERFACE_VERSION};
+
+struct st_maria_plugin rdb_i_s_cfstats = {
+ MYSQL_INFORMATION_SCHEMA_PLUGIN,
+ &rdb_i_s_info,
+ "ROCKSDB_CFSTATS",
+ "Facebook",
+ "RocksDB column family stats",
+ PLUGIN_LICENSE_GPL,
+ rdb_i_s_cfstats_init,
+ rdb_i_s_deinit,
+ 0x0001, /* version number (0.1) */
+ nullptr, /* status variables */
+ nullptr, /* system variables */
+ nullptr, /* config options */
+ MYROCKS_MARIADB_PLUGIN_MATURITY_LEVEL
+};
+
+struct st_maria_plugin rdb_i_s_dbstats = {
+ MYSQL_INFORMATION_SCHEMA_PLUGIN,
+ &rdb_i_s_info,
+ "ROCKSDB_DBSTATS",
+ "Facebook",
+ "RocksDB database stats",
+ PLUGIN_LICENSE_GPL,
+ rdb_i_s_dbstats_init,
+ rdb_i_s_deinit,
+ 0x0001, /* version number (0.1) */
+ nullptr, /* status variables */
+ nullptr, /* system variables */
+ nullptr, /* config options */
+ MYROCKS_MARIADB_PLUGIN_MATURITY_LEVEL
+};
+
+struct st_maria_plugin rdb_i_s_perf_context = {
+ MYSQL_INFORMATION_SCHEMA_PLUGIN,
+ &rdb_i_s_info,
+ "ROCKSDB_PERF_CONTEXT",
+ "Facebook",
+ "RocksDB perf context stats",
+ PLUGIN_LICENSE_GPL,
+ rdb_i_s_perf_context_init,
+ rdb_i_s_deinit,
+ 0x0001, /* version number (0.1) */
+ nullptr, /* status variables */
+ nullptr, /* system variables */
+ nullptr, /* config options */
+ MYROCKS_MARIADB_PLUGIN_MATURITY_LEVEL
+};
+
+struct st_maria_plugin rdb_i_s_perf_context_global = {
+ MYSQL_INFORMATION_SCHEMA_PLUGIN,
+ &rdb_i_s_info,
+ "ROCKSDB_PERF_CONTEXT_GLOBAL",
+ "Facebook",
+ "RocksDB perf context stats (all)",
+ PLUGIN_LICENSE_GPL,
+ rdb_i_s_perf_context_global_init,
+ rdb_i_s_deinit,
+ 0x0001, /* version number (0.1) */
+ nullptr, /* status variables */
+ nullptr, /* system variables */
+ nullptr, /* config options */
+ MYROCKS_MARIADB_PLUGIN_MATURITY_LEVEL
+};
+
+struct st_maria_plugin rdb_i_s_cfoptions = {
+ MYSQL_INFORMATION_SCHEMA_PLUGIN,
+ &rdb_i_s_info,
+ "ROCKSDB_CF_OPTIONS",
+ "Facebook",
+ "RocksDB column family options",
+ PLUGIN_LICENSE_GPL,
+ rdb_i_s_cfoptions_init,
+ rdb_i_s_deinit,
+ 0x0001, /* version number (0.1) */
+ nullptr, /* status variables */
+ nullptr, /* system variables */
+ nullptr, /* config options */
+ MYROCKS_MARIADB_PLUGIN_MATURITY_LEVEL
+};
+
+struct st_maria_plugin rdb_i_s_global_info = {
+ MYSQL_INFORMATION_SCHEMA_PLUGIN,
+ &rdb_i_s_info,
+ "ROCKSDB_GLOBAL_INFO",
+ "Facebook",
+ "RocksDB global info",
+ PLUGIN_LICENSE_GPL,
+ rdb_i_s_global_info_init,
+ rdb_i_s_deinit,
+ 0x0001, /* version number (0.1) */
+ nullptr, /* status variables */
+ nullptr, /* system variables */
+ nullptr, /* config options */
+ MYROCKS_MARIADB_PLUGIN_MATURITY_LEVEL
+};
+
+struct st_maria_plugin rdb_i_s_compact_stats = {
+ MYSQL_INFORMATION_SCHEMA_PLUGIN,
+ &rdb_i_s_info,
+ "ROCKSDB_COMPACTION_STATS",
+ "Facebook",
+ "RocksDB compaction stats",
+ PLUGIN_LICENSE_GPL,
+ rdb_i_s_compact_stats_init,
+ rdb_i_s_deinit,
+ 0x0001, /* version number (0.1) */
+ nullptr, /* status variables */
+ nullptr, /* system variables */
+ nullptr, /* config options */
+ MYROCKS_MARIADB_PLUGIN_MATURITY_LEVEL
+};
+
+struct st_maria_plugin rdb_i_s_ddl = {
+ MYSQL_INFORMATION_SCHEMA_PLUGIN,
+ &rdb_i_s_info,
+ "ROCKSDB_DDL",
+ "Facebook",
+ "RocksDB Data Dictionary",
+ PLUGIN_LICENSE_GPL,
+ rdb_i_s_ddl_init,
+ rdb_i_s_deinit,
+ 0x0001, /* version number (0.1) */
+ nullptr, /* status variables */
+ nullptr, /* system variables */
+ nullptr, /* config options */
+ MYROCKS_MARIADB_PLUGIN_MATURITY_LEVEL
+};
+
+struct st_maria_plugin rdb_i_s_sst_props = {
+ MYSQL_INFORMATION_SCHEMA_PLUGIN,
+ &rdb_i_s_info,
+ "ROCKSDB_SST_PROPS",
+ "Facebook",
+ "RocksDB SST Properties",
+ PLUGIN_LICENSE_GPL,
+ rdb_i_s_sst_props_init,
+ rdb_i_s_deinit,
+ 0x0001, /* version number (0.1) */
+ nullptr, /* status variables */
+ nullptr, /* system variables */
+ nullptr, /* config options */
+ MYROCKS_MARIADB_PLUGIN_MATURITY_LEVEL
+};
+
+struct st_maria_plugin rdb_i_s_index_file_map = {
+ MYSQL_INFORMATION_SCHEMA_PLUGIN,
+ &rdb_i_s_info,
+ "ROCKSDB_INDEX_FILE_MAP",
+ "Facebook",
+ "RocksDB index file map",
+ PLUGIN_LICENSE_GPL,
+ rdb_i_s_index_file_map_init,
+ rdb_i_s_deinit,
+ 0x0001, /* version number (0.1) */
+ nullptr, /* status variables */
+ nullptr, /* system variables */
+ nullptr, /* config options */
+ MYROCKS_MARIADB_PLUGIN_MATURITY_LEVEL
+};
+
+struct st_maria_plugin rdb_i_s_lock_info = {
+ MYSQL_INFORMATION_SCHEMA_PLUGIN,
+ &rdb_i_s_info,
+ "ROCKSDB_LOCKS",
+ "Facebook",
+ "RocksDB lock information",
+ PLUGIN_LICENSE_GPL,
+ rdb_i_s_lock_info_init,
+ nullptr,
+ 0x0001, /* version number (0.1) */
+ nullptr, /* status variables */
+ nullptr, /* system variables */
+ nullptr, /* config options */
+ MYROCKS_MARIADB_PLUGIN_MATURITY_LEVEL
+};
+
+struct st_maria_plugin rdb_i_s_trx_info = {
+ MYSQL_INFORMATION_SCHEMA_PLUGIN,
+ &rdb_i_s_info,
+ "ROCKSDB_TRX",
+ "Facebook",
+ "RocksDB transaction information",
+ PLUGIN_LICENSE_GPL,
+ rdb_i_s_trx_info_init,
+ nullptr,
+ 0x0001, /* version number (0.1) */
+ nullptr, /* status variables */
+ nullptr, /* system variables */
+ nullptr, /* config options */
+ MYROCKS_MARIADB_PLUGIN_MATURITY_LEVEL
+};
+
+struct st_maria_plugin rdb_i_s_deadlock_info = {
+ MYSQL_INFORMATION_SCHEMA_PLUGIN,
+ &rdb_i_s_info,
+ "ROCKSDB_DEADLOCK",
+ "Facebook",
+ "RocksDB transaction information",
+ PLUGIN_LICENSE_GPL,
+ rdb_i_s_deadlock_info_init,
+ nullptr,
+ 0x0001, /* version number (0.1) */
+ nullptr, /* status variables */
+ nullptr, /* system variables */
+ nullptr, /* config options */
+ MYROCKS_MARIADB_PLUGIN_MATURITY_LEVEL
+};
+} // namespace myrocks
diff --git a/storage/rocksdb/rdb_i_s.h b/storage/rocksdb/rdb_i_s.h
new file mode 100644
index 00000000000..6001742d984
--- /dev/null
+++ b/storage/rocksdb/rdb_i_s.h
@@ -0,0 +1,37 @@
+/*
+ Copyright (c) 2012,2013 Monty Program Ab
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111-1301 USA */
+#pragma once
+
+namespace myrocks {
+
+/*
+ Declare INFORMATION_SCHEMA (I_S) plugins needed by MyRocks storage engine.
+*/
+
+extern struct st_maria_plugin rdb_i_s_cfstats;
+extern struct st_maria_plugin rdb_i_s_dbstats;
+extern struct st_maria_plugin rdb_i_s_perf_context;
+extern struct st_maria_plugin rdb_i_s_perf_context_global;
+extern struct st_maria_plugin rdb_i_s_cfoptions;
+extern struct st_maria_plugin rdb_i_s_compact_stats;
+extern struct st_maria_plugin rdb_i_s_global_info;
+extern struct st_maria_plugin rdb_i_s_ddl;
+extern struct st_maria_plugin rdb_i_s_sst_props;
+extern struct st_maria_plugin rdb_i_s_index_file_map;
+extern struct st_maria_plugin rdb_i_s_lock_info;
+extern struct st_maria_plugin rdb_i_s_trx_info;
+extern struct st_maria_plugin rdb_i_s_deadlock_info;
+} // namespace myrocks
diff --git a/storage/rocksdb/rdb_index_merge.cc b/storage/rocksdb/rdb_index_merge.cc
new file mode 100644
index 00000000000..424a998548a
--- /dev/null
+++ b/storage/rocksdb/rdb_index_merge.cc
@@ -0,0 +1,630 @@
+/*
+ Copyright (c) 2016, Facebook, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111-1301 USA */
+
+#include <my_config.h>
+
+/* This C++ file's header file */
+#include "./rdb_index_merge.h"
+
+/* MySQL header files */
+#include "../sql/sql_class.h"
+
+/* MyRocks header files */
+#include "./ha_rocksdb.h"
+#include "./rdb_datadic.h"
+
+namespace myrocks {
+
+Rdb_index_merge::Rdb_index_merge(const char *const tmpfile_path,
+ const ulonglong merge_buf_size,
+ const ulonglong merge_combine_read_size,
+ const ulonglong merge_tmp_file_removal_delay,
+ rocksdb::ColumnFamilyHandle *cf)
+ : m_tmpfile_path(tmpfile_path),
+ m_merge_buf_size(merge_buf_size),
+ m_merge_combine_read_size(merge_combine_read_size),
+ m_merge_tmp_file_removal_delay(merge_tmp_file_removal_delay),
+ m_cf_handle(cf),
+ m_rec_buf_unsorted(nullptr),
+ m_output_buf(nullptr) {}
+
+Rdb_index_merge::~Rdb_index_merge() {
+ /*
+ If merge_tmp_file_removal_delay is set, sleep between calls to chsize.
+
+ This helps mitigate potential trim stalls on flash when large files are
+ being deleted too quickly.
+ */
+ if (m_merge_tmp_file_removal_delay > 0) {
+ uint64 curr_size = m_merge_buf_size * m_merge_file.m_num_sort_buffers;
+ for (uint i = 0; i < m_merge_file.m_num_sort_buffers; i++) {
+ if (my_chsize(m_merge_file.m_fd, curr_size, 0, MYF(MY_WME))) {
+ // NO_LINT_DEBUG
+ sql_print_error("Error truncating file during fast index creation.");
+ }
+
+ my_sleep(m_merge_tmp_file_removal_delay * 1000);
+ // Not aborting on fsync error since the tmp file is not used anymore
+ if (mysql_file_sync(m_merge_file.m_fd, MYF(MY_WME))) {
+ // NO_LINT_DEBUG
+ sql_print_error("Error flushing truncated MyRocks merge buffer.");
+ }
+ curr_size -= m_merge_buf_size;
+ }
+ }
+
+ /*
+ Close file descriptor, we don't need to worry about deletion,
+ mysql handles it.
+ */
+ my_close(m_merge_file.m_fd, MYF(MY_WME));
+}
+
+int Rdb_index_merge::init() {
+ /*
+ Create a temporary merge file on disk to store sorted chunks during
+ inplace index creation.
+ */
+ if (merge_file_create()) {
+ return HA_ERR_ROCKSDB_MERGE_FILE_ERR;
+ }
+
+ /*
+ Then, allocate buffer to store unsorted records before they are written
+ to disk. They will be written to disk sorted. A sorted tree is used to
+ keep track of the offset of each record within the unsorted buffer.
+ */
+ m_rec_buf_unsorted =
+ std::shared_ptr<merge_buf_info>(new merge_buf_info(m_merge_buf_size));
+
+ /*
+ Allocate output buffer that will contain sorted block that is written to
+ disk.
+ */
+ m_output_buf =
+ std::shared_ptr<merge_buf_info>(new merge_buf_info(m_merge_buf_size));
+
+ return HA_EXIT_SUCCESS;
+}
+
+/**
+ Create a merge file in the given location.
+*/
+int Rdb_index_merge::merge_file_create() {
+ DBUG_ASSERT(m_merge_file.m_fd == -1);
+
+ int fd;
+#ifdef MARIAROCKS_NOT_YET // mysql_tmpfile_path use
+ /* If no path set for tmpfile, use mysql_tmpdir by default */
+ if (m_tmpfile_path == nullptr) {
+ fd = mysql_tmpfile("myrocks");
+ } else {
+ fd = mysql_tmpfile_path(m_tmpfile_path, "myrocks");
+ }
+#else
+ fd = mysql_tmpfile("myrocks");
+#endif
+ if (fd < 0) {
+ // NO_LINT_DEBUG
+ sql_print_error("Failed to create temp file during fast index creation.");
+ return HA_ERR_ROCKSDB_MERGE_FILE_ERR;
+ }
+
+ m_merge_file.m_fd = fd;
+ m_merge_file.m_num_sort_buffers = 0;
+
+ return HA_EXIT_SUCCESS;
+}
+
+/**
+ Add record to offset tree (and unsorted merge buffer) in preparation for
+ writing out to disk in sorted chunks.
+
+ If buffer in memory is full, write the buffer out to disk sorted using the
+ offset tree, and clear the tree. (Happens in merge_buf_write)
+*/
+int Rdb_index_merge::add(const rocksdb::Slice &key, const rocksdb::Slice &val) {
+ /* Adding a record after heap is already created results in error */
+ DBUG_ASSERT(m_merge_min_heap.empty());
+
+ /*
+ Check if sort buffer is going to be out of space, if so write it
+ out to disk in sorted order using offset tree.
+ */
+ const uint total_offset = RDB_MERGE_CHUNK_LEN +
+ m_rec_buf_unsorted->m_curr_offset +
+ RDB_MERGE_KEY_DELIMITER + RDB_MERGE_VAL_DELIMITER +
+ key.size() + val.size();
+ if (total_offset >= m_rec_buf_unsorted->m_total_size) {
+ /*
+ If the offset tree is empty here, that means that the proposed key to
+ add is too large for the buffer.
+ */
+ if (m_offset_tree.empty()) {
+ // NO_LINT_DEBUG
+ sql_print_error(
+ "Sort buffer size is too small to process merge. "
+ "Please set merge buffer size to a higher value.");
+ return HA_ERR_ROCKSDB_MERGE_FILE_ERR;
+ }
+
+ if (merge_buf_write()) {
+ // NO_LINT_DEBUG
+ sql_print_error("Error writing sort buffer to disk.");
+ return HA_ERR_ROCKSDB_MERGE_FILE_ERR;
+ }
+ }
+
+ const ulonglong rec_offset = m_rec_buf_unsorted->m_curr_offset;
+
+ /*
+ Store key and value in temporary unsorted in memory buffer pointed to by
+ offset tree.
+ */
+ m_rec_buf_unsorted->store_key_value(key, val);
+
+ /* Find sort order of the new record */
+ auto res =
+ m_offset_tree.emplace(m_rec_buf_unsorted->m_block.get() + rec_offset,
+ m_cf_handle->GetComparator());
+ if (!res.second) {
+ my_printf_error(ER_DUP_ENTRY,
+ "Failed to insert the record: the key already exists",
+ MYF(0));
+ return ER_DUP_ENTRY;
+ }
+
+ return HA_EXIT_SUCCESS;
+}
+
+/**
+ Sort + write merge buffer chunk out to disk.
+*/
+int Rdb_index_merge::merge_buf_write() {
+ DBUG_ASSERT(m_merge_file.m_fd != -1);
+ DBUG_ASSERT(m_rec_buf_unsorted != nullptr);
+ DBUG_ASSERT(m_output_buf != nullptr);
+ DBUG_ASSERT(!m_offset_tree.empty());
+
+ /* Write actual chunk size to first 8 bytes of the merge buffer */
+ merge_store_uint64(m_output_buf->m_block.get(),
+ m_rec_buf_unsorted->m_curr_offset + RDB_MERGE_CHUNK_LEN);
+ m_output_buf->m_curr_offset += RDB_MERGE_CHUNK_LEN;
+
+ /*
+ Iterate through the offset tree. Should be ordered by the secondary key
+ at this point.
+ */
+ for (const auto &rec : m_offset_tree) {
+ DBUG_ASSERT(m_output_buf->m_curr_offset <= m_merge_buf_size);
+
+ /* Read record from offset (should never fail) */
+ rocksdb::Slice key;
+ rocksdb::Slice val;
+ merge_read_rec(rec.m_block, &key, &val);
+
+ /* Store key and value into sorted output buffer */
+ m_output_buf->store_key_value(key, val);
+ }
+
+ DBUG_ASSERT(m_output_buf->m_curr_offset <= m_output_buf->m_total_size);
+
+ /*
+ Write output buffer to disk.
+
+ Need to position cursor to the chunk it needs to be at on filesystem
+ then write into the respective merge buffer.
+ */
+ if (my_seek(m_merge_file.m_fd,
+ m_merge_file.m_num_sort_buffers * m_merge_buf_size, SEEK_SET,
+ MYF(0)) == MY_FILEPOS_ERROR) {
+ // NO_LINT_DEBUG
+ sql_print_error("Error seeking to location in merge file on disk.");
+ return HA_ERR_ROCKSDB_MERGE_FILE_ERR;
+ }
+
+ /*
+ Add a file sync call here to flush the data out. Otherwise, the filesystem
+ cache can flush out all of the files at the same time, causing a write
+ burst.
+ */
+ if (my_write(m_merge_file.m_fd, m_output_buf->m_block.get(),
+ m_output_buf->m_total_size, MYF(MY_WME | MY_NABP)) ||
+ mysql_file_sync(m_merge_file.m_fd, MYF(MY_WME))) {
+ // NO_LINT_DEBUG
+ sql_print_error("Error writing sorted merge buffer to disk.");
+ return HA_ERR_ROCKSDB_MERGE_FILE_ERR;
+ }
+
+ /* Increment merge file offset to track number of merge buffers written */
+ m_merge_file.m_num_sort_buffers += 1;
+
+ /* Reset everything for next run */
+ merge_reset();
+
+ return HA_EXIT_SUCCESS;
+}
+
+/**
+ Prepare n-way merge of n sorted buffers on disk, using a heap sorted by
+ secondary key records.
+*/
+int Rdb_index_merge::merge_heap_prepare() {
+ DBUG_ASSERT(m_merge_min_heap.empty());
+
+ /*
+ If the offset tree is not empty, there are still some records that need to
+ be written to disk. Write them out now.
+ */
+ if (!m_offset_tree.empty() && merge_buf_write()) {
+ return HA_ERR_ROCKSDB_MERGE_FILE_ERR;
+ }
+
+ DBUG_ASSERT(m_merge_file.m_num_sort_buffers > 0);
+
+ /*
+ For an n-way merge, we need to read chunks of each merge file
+ simultaneously.
+ */
+ ulonglong chunk_size =
+ m_merge_combine_read_size / m_merge_file.m_num_sort_buffers;
+ if (chunk_size >= m_merge_buf_size) {
+ chunk_size = m_merge_buf_size;
+ }
+
+ /* Allocate buffers for each chunk */
+ for (ulonglong i = 0; i < m_merge_file.m_num_sort_buffers; i++) {
+ const auto entry =
+ std::make_shared<merge_heap_entry>(m_cf_handle->GetComparator());
+
+ /*
+ Read chunk_size bytes from each chunk on disk, and place inside
+ respective chunk buffer.
+ */
+ const size_t total_size =
+ entry->prepare(m_merge_file.m_fd, i * m_merge_buf_size, chunk_size);
+
+ if (total_size == (size_t)-1) {
+ return HA_ERR_ROCKSDB_MERGE_FILE_ERR;
+ }
+
+ /* Can reach this condition if an index was added on table w/ no rows */
+ if (total_size - RDB_MERGE_CHUNK_LEN == 0) {
+ break;
+ }
+
+ /* Read the first record from each buffer to initially populate the heap */
+ if (entry->read_rec(&entry->m_key, &entry->m_val)) {
+ // NO_LINT_DEBUG
+ sql_print_error("Chunk size is too small to process merge.");
+ return HA_ERR_ROCKSDB_MERGE_FILE_ERR;
+ }
+
+ m_merge_min_heap.push(std::move(entry));
+ }
+
+ return HA_EXIT_SUCCESS;
+}
+
+/**
+ Create and/or iterate through keys in the merge heap.
+*/
+int Rdb_index_merge::next(rocksdb::Slice *const key,
+ rocksdb::Slice *const val) {
+ /*
+ If table fits in one sort buffer, we can optimize by writing
+ the sort buffer directly through to the sstfilewriter instead of
+ needing to create tmp files/heap to merge the sort buffers.
+
+ If there are no sort buffer records (alters on empty tables),
+ also exit here.
+ */
+ if (m_merge_file.m_num_sort_buffers == 0) {
+ if (m_offset_tree.empty()) {
+ return -1;
+ }
+
+ const auto rec = m_offset_tree.begin();
+
+ /* Read record from offset */
+ merge_read_rec(rec->m_block, key, val);
+
+ m_offset_tree.erase(rec);
+ return HA_EXIT_SUCCESS;
+ }
+
+ int res;
+
+ /*
+ If heap and heap chunk info are empty, we must be beginning the merge phase
+ of the external sort. Populate the heap with initial values from each
+ disk chunk.
+ */
+ if (m_merge_min_heap.empty()) {
+ if ((res = merge_heap_prepare())) {
+ // NO_LINT_DEBUG
+ sql_print_error("Error during preparation of heap.");
+ return res;
+ }
+
+ /*
+ Return the first top record without popping, as we haven't put this
+ inside the SST file yet.
+ */
+ merge_heap_top(key, val);
+ return HA_EXIT_SUCCESS;
+ }
+
+ DBUG_ASSERT(!m_merge_min_heap.empty());
+ return merge_heap_pop_and_get_next(key, val);
+}
+
+/**
+ Get current top record from the heap.
+*/
+void Rdb_index_merge::merge_heap_top(rocksdb::Slice *const key,
+ rocksdb::Slice *const val) {
+ DBUG_ASSERT(!m_merge_min_heap.empty());
+
+ const std::shared_ptr<merge_heap_entry> &entry = m_merge_min_heap.top();
+ *key = entry->m_key;
+ *val = entry->m_val;
+}
+
+/**
+ Pops the top record, and uses it to read next record from the
+ corresponding sort buffer and push onto the heap.
+
+ Returns -1 when there are no more records in the heap.
+*/
+int Rdb_index_merge::merge_heap_pop_and_get_next(rocksdb::Slice *const key,
+ rocksdb::Slice *const val) {
+ /*
+ Make a new reference to shared ptr so it doesn't get destroyed
+ during pop(). We are going to push this entry back onto the heap.
+ */
+ const std::shared_ptr<merge_heap_entry> entry = m_merge_min_heap.top();
+ m_merge_min_heap.pop();
+
+ /*
+ We are finished w/ current chunk if:
+ current_offset + disk_offset == m_total_size
+
+ Return without adding entry back onto heap.
+ If heap is also empty, we must be finished with merge.
+ */
+ if (entry->m_chunk_info->is_chunk_finished()) {
+ if (m_merge_min_heap.empty()) {
+ return -1;
+ }
+
+ merge_heap_top(key, val);
+ return HA_EXIT_SUCCESS;
+ }
+
+ /*
+ Make sure we haven't reached the end of the chunk.
+ */
+ DBUG_ASSERT(!entry->m_chunk_info->is_chunk_finished());
+
+ /*
+ If merge_read_rec fails, it means the either the chunk was cut off
+ or we've reached the end of the respective chunk.
+ */
+ if (entry->read_rec(&entry->m_key, &entry->m_val)) {
+ if (entry->read_next_chunk_from_disk(m_merge_file.m_fd)) {
+ return HA_ERR_ROCKSDB_MERGE_FILE_ERR;
+ }
+
+ /* Try reading record again, should never fail. */
+ if (entry->read_rec(&entry->m_key, &entry->m_val)) {
+ return HA_ERR_ROCKSDB_MERGE_FILE_ERR;
+ }
+ }
+
+ /* Push entry back on to the heap w/ updated buffer + offset ptr */
+ m_merge_min_heap.push(std::move(entry));
+
+ /* Return the current top record on heap */
+ merge_heap_top(key, val);
+ return HA_EXIT_SUCCESS;
+}
+
+int Rdb_index_merge::merge_heap_entry::read_next_chunk_from_disk(File fd) {
+ if (m_chunk_info->read_next_chunk_from_disk(fd)) {
+ return HA_EXIT_FAILURE;
+ }
+
+ m_block = m_chunk_info->m_block.get();
+ return HA_EXIT_SUCCESS;
+}
+
+int Rdb_index_merge::merge_buf_info::read_next_chunk_from_disk(File fd) {
+ m_disk_curr_offset += m_curr_offset;
+
+ if (my_seek(fd, m_disk_curr_offset, SEEK_SET, MYF(0)) == MY_FILEPOS_ERROR) {
+ // NO_LINT_DEBUG
+ sql_print_error("Error seeking to location in merge file on disk.");
+ return HA_EXIT_FAILURE;
+ }
+
+ /* Overwrite the old block */
+ const size_t bytes_read =
+ my_read(fd, m_block.get(), m_block_len, MYF(MY_WME));
+ if (bytes_read == (size_t)-1) {
+ // NO_LINT_DEBUG
+ sql_print_error("Error reading merge file from disk.");
+ return HA_EXIT_FAILURE;
+ }
+
+ m_curr_offset = 0;
+ return HA_EXIT_SUCCESS;
+}
+
+/**
+ Get records from offset within sort buffer and compare them.
+ Sort by least to greatest.
+*/
+int Rdb_index_merge::merge_record_compare(
+ const uchar *const a_block, const uchar *const b_block,
+ const rocksdb::Comparator *const comparator) {
+ return comparator->Compare(as_slice(a_block), as_slice(b_block));
+}
+
+/**
+ Given an offset in a merge sort buffer, read out the keys + values.
+ After this, block will point to the next record in the buffer.
+**/
+void Rdb_index_merge::merge_read_rec(const uchar *const block,
+ rocksdb::Slice *const key,
+ rocksdb::Slice *const val) {
+ /* Read key at block offset into key slice and the value into value slice*/
+ read_slice(key, block);
+ read_slice(val, block + RDB_MERGE_REC_DELIMITER + key->size());
+}
+
+void Rdb_index_merge::read_slice(rocksdb::Slice *slice,
+ const uchar *block_ptr) {
+ uint64 slice_len;
+ merge_read_uint64(&block_ptr, &slice_len);
+
+ *slice = rocksdb::Slice(reinterpret_cast<const char *>(block_ptr), slice_len);
+}
+
+int Rdb_index_merge::merge_heap_entry::read_rec(rocksdb::Slice *const key,
+ rocksdb::Slice *const val) {
+ const uchar *block_ptr = m_block;
+ const auto orig_offset = m_chunk_info->m_curr_offset;
+ const auto orig_block = m_block;
+
+ /* Read key at block offset into key slice and the value into value slice*/
+ if (read_slice(key, &block_ptr) != 0) {
+ return HA_EXIT_FAILURE;
+ }
+
+ m_chunk_info->m_curr_offset += (uintptr_t)block_ptr - (uintptr_t)m_block;
+ m_block += (uintptr_t)block_ptr - (uintptr_t)m_block;
+
+ if (read_slice(val, &block_ptr) != 0) {
+ m_chunk_info->m_curr_offset = orig_offset;
+ m_block = orig_block;
+ return HA_EXIT_FAILURE;
+ }
+
+ m_chunk_info->m_curr_offset += (uintptr_t)block_ptr - (uintptr_t)m_block;
+ m_block += (uintptr_t)block_ptr - (uintptr_t)m_block;
+
+ return HA_EXIT_SUCCESS;
+}
+
+int Rdb_index_merge::merge_heap_entry::read_slice(rocksdb::Slice *const slice,
+ const uchar **block_ptr) {
+ if (!m_chunk_info->has_space(RDB_MERGE_REC_DELIMITER)) {
+ return HA_EXIT_FAILURE;
+ }
+
+ uint64 slice_len;
+ merge_read_uint64(block_ptr, &slice_len);
+ if (!m_chunk_info->has_space(RDB_MERGE_REC_DELIMITER + slice_len)) {
+ return HA_EXIT_FAILURE;
+ }
+
+ *slice =
+ rocksdb::Slice(reinterpret_cast<const char *>(*block_ptr), slice_len);
+ *block_ptr += slice_len;
+ return HA_EXIT_SUCCESS;
+}
+
+size_t Rdb_index_merge::merge_heap_entry::prepare(File fd, ulonglong f_offset,
+ ulonglong chunk_size) {
+ m_chunk_info = std::make_shared<merge_buf_info>(chunk_size);
+ const size_t res = m_chunk_info->prepare(fd, f_offset);
+ if (res != (size_t)-1) {
+ m_block = m_chunk_info->m_block.get() + RDB_MERGE_CHUNK_LEN;
+ }
+
+ return res;
+}
+
+size_t Rdb_index_merge::merge_buf_info::prepare(File fd, ulonglong f_offset) {
+ m_disk_start_offset = f_offset;
+ m_disk_curr_offset = f_offset;
+
+ /*
+ Need to position cursor to the chunk it needs to be at on filesystem
+ then read 'chunk_size' bytes into the respective chunk buffer.
+ */
+ if (my_seek(fd, f_offset, SEEK_SET, MYF(0)) == MY_FILEPOS_ERROR) {
+ // NO_LINT_DEBUG
+ sql_print_error("Error seeking to location in merge file on disk.");
+ return (size_t)-1;
+ }
+
+ const size_t bytes_read =
+ my_read(fd, m_block.get(), m_total_size, MYF(MY_WME));
+ if (bytes_read == (size_t)-1) {
+ // NO_LINT_DEBUG
+ sql_print_error("Error reading merge file from disk.");
+ return (size_t)-1;
+ }
+
+ /*
+ Read the first 8 bytes of each chunk, this gives us the actual
+ size of each chunk.
+ */
+ const uchar *block_ptr = m_block.get();
+ merge_read_uint64(&block_ptr, &m_total_size);
+ m_curr_offset += RDB_MERGE_CHUNK_LEN;
+ return m_total_size;
+}
+
+/* Store key and value w/ their respective delimiters at the given offset */
+void Rdb_index_merge::merge_buf_info::store_key_value(
+ const rocksdb::Slice &key, const rocksdb::Slice &val) {
+ store_slice(key);
+ store_slice(val);
+}
+
+void Rdb_index_merge::merge_buf_info::store_slice(const rocksdb::Slice &slice) {
+ /* Store length delimiter */
+ merge_store_uint64(&m_block[m_curr_offset], slice.size());
+
+ /* Store slice data */
+ memcpy(&m_block[m_curr_offset + RDB_MERGE_REC_DELIMITER], slice.data(),
+ slice.size());
+
+ m_curr_offset += slice.size() + RDB_MERGE_REC_DELIMITER;
+}
+
+void Rdb_index_merge::merge_reset() {
+ /*
+ Either error, or all values in the sort buffer have been written to disk,
+ so we need to clear the offset tree.
+ */
+ m_offset_tree.clear();
+
+ /* Reset sort buffer block */
+ if (m_rec_buf_unsorted && m_rec_buf_unsorted->m_block) {
+ m_rec_buf_unsorted->m_curr_offset = 0;
+ }
+
+ /* Reset output buf */
+ if (m_output_buf && m_output_buf->m_block) {
+ m_output_buf->m_curr_offset = 0;
+ }
+}
+
+} // namespace myrocks
diff --git a/storage/rocksdb/rdb_index_merge.h b/storage/rocksdb/rdb_index_merge.h
new file mode 100644
index 00000000000..756b99ca4f1
--- /dev/null
+++ b/storage/rocksdb/rdb_index_merge.h
@@ -0,0 +1,227 @@
+/*
+ Copyright (c) 2016, Facebook, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111-1301 USA */
+
+#pragma once
+
+/* MySQL header files */
+#include "../sql/log.h"
+#include "./handler.h" /* handler */
+#include "./my_global.h" /* ulonglong */
+
+/* C++ standard header files */
+#include <queue>
+#include <set>
+#include <vector>
+
+/* RocksDB header files */
+#include "rocksdb/db.h"
+
+/* MyRocks header files */
+#include "./rdb_comparator.h"
+
+namespace myrocks {
+
+/*
+ Length of delimiters used during inplace index creation.
+*/
+#define RDB_MERGE_CHUNK_LEN sizeof(size_t)
+#define RDB_MERGE_REC_DELIMITER sizeof(size_t)
+#define RDB_MERGE_KEY_DELIMITER RDB_MERGE_REC_DELIMITER
+#define RDB_MERGE_VAL_DELIMITER RDB_MERGE_REC_DELIMITER
+
+class Rdb_key_def;
+class Rdb_tbl_def;
+
+class Rdb_index_merge {
+ Rdb_index_merge(const Rdb_index_merge &p) = delete;
+ Rdb_index_merge &operator=(const Rdb_index_merge &p) = delete;
+
+ public:
+ /* Information about temporary files used in external merge sort */
+ struct merge_file_info {
+ File m_fd = -1; /* file descriptor */
+ ulong m_num_sort_buffers = 0; /* number of sort buffers in temp file */
+ };
+
+ /* Buffer for sorting in main memory. */
+ struct merge_buf_info {
+ /* heap memory allocated for main memory sort/merge */
+ std::unique_ptr<uchar[]> m_block;
+ const ulonglong
+ m_block_len; /* amount of data bytes allocated for block above */
+ ulonglong m_curr_offset; /* offset of the record pointer for the block */
+ ulonglong m_disk_start_offset; /* where the chunk starts on disk */
+ ulonglong m_disk_curr_offset; /* current offset on disk */
+ ulonglong m_total_size; /* total # of data bytes in chunk */
+
+ void store_key_value(const rocksdb::Slice &key, const rocksdb::Slice &val)
+ MY_ATTRIBUTE((__nonnull__));
+
+ void store_slice(const rocksdb::Slice &slice) MY_ATTRIBUTE((__nonnull__));
+
+ size_t prepare(File fd, ulonglong f_offset) MY_ATTRIBUTE((__nonnull__));
+
+ int read_next_chunk_from_disk(File fd)
+ MY_ATTRIBUTE((__nonnull__, __warn_unused_result__));
+
+ inline bool is_chunk_finished() const {
+ return m_curr_offset + m_disk_curr_offset - m_disk_start_offset ==
+ m_total_size;
+ }
+
+ inline bool has_space(uint64 needed) const {
+ return m_curr_offset + needed <= m_block_len;
+ }
+
+ explicit merge_buf_info(const ulonglong merge_block_size)
+ : m_block(nullptr),
+ m_block_len(merge_block_size),
+ m_curr_offset(0),
+ m_disk_start_offset(0),
+ m_disk_curr_offset(0),
+ m_total_size(merge_block_size) {
+ /* Will throw an exception if it runs out of memory here */
+ m_block = std::unique_ptr<uchar[]>(new uchar[merge_block_size]);
+
+ /* Initialize entire buffer to 0 to avoid valgrind errors */
+ memset(m_block.get(), 0, merge_block_size);
+ }
+ };
+
+ /* Represents an entry in the heap during merge phase of external sort */
+ struct merge_heap_entry {
+ std::shared_ptr<merge_buf_info> m_chunk_info; /* pointer to buffer info */
+ uchar *m_block; /* pointer to heap memory where record is stored */
+ const rocksdb::Comparator *const m_comparator;
+ rocksdb::Slice m_key; /* current key pointed to by block ptr */
+ rocksdb::Slice m_val;
+
+ size_t prepare(File fd, ulonglong f_offset, ulonglong chunk_size)
+ MY_ATTRIBUTE((__nonnull__));
+
+ int read_next_chunk_from_disk(File fd)
+ MY_ATTRIBUTE((__nonnull__, __warn_unused_result__));
+
+ int read_rec(rocksdb::Slice *const key, rocksdb::Slice *const val)
+ MY_ATTRIBUTE((__nonnull__, __warn_unused_result__));
+
+ int read_slice(rocksdb::Slice *const slice, const uchar **block_ptr)
+ MY_ATTRIBUTE((__nonnull__, __warn_unused_result__));
+
+ explicit merge_heap_entry(const rocksdb::Comparator *const comparator)
+ : m_chunk_info(nullptr), m_block(nullptr), m_comparator(comparator) {}
+ };
+
+ struct merge_heap_comparator {
+ bool operator()(const std::shared_ptr<merge_heap_entry> &lhs,
+ const std::shared_ptr<merge_heap_entry> &rhs) {
+ return lhs->m_comparator->Compare(rhs->m_key, lhs->m_key) < 0;
+ }
+ };
+
+ /* Represents a record in unsorted buffer */
+ struct merge_record {
+ uchar *m_block; /* points to offset of key in sort buffer */
+ const rocksdb::Comparator *const m_comparator;
+
+ bool operator<(const merge_record &record) const {
+ return merge_record_compare(this->m_block, record.m_block, m_comparator) <
+ 0;
+ }
+
+ merge_record(uchar *const block,
+ const rocksdb::Comparator *const comparator)
+ : m_block(block), m_comparator(comparator) {}
+ };
+
+ private:
+ const char *m_tmpfile_path;
+ const ulonglong m_merge_buf_size;
+ const ulonglong m_merge_combine_read_size;
+ const ulonglong m_merge_tmp_file_removal_delay;
+ rocksdb::ColumnFamilyHandle *m_cf_handle;
+ struct merge_file_info m_merge_file;
+ std::shared_ptr<merge_buf_info> m_rec_buf_unsorted;
+ std::shared_ptr<merge_buf_info> m_output_buf;
+ std::set<merge_record> m_offset_tree;
+ std::priority_queue<std::shared_ptr<merge_heap_entry>,
+ std::vector<std::shared_ptr<merge_heap_entry>>,
+ merge_heap_comparator>
+ m_merge_min_heap;
+
+ static inline void merge_store_uint64(uchar *const dst, uint64 n) {
+ memcpy(dst, &n, sizeof(n));
+ }
+
+ static inline void merge_read_uint64(const uchar **buf_ptr,
+ uint64 *const dst) {
+ DBUG_ASSERT(buf_ptr != nullptr);
+ memcpy(dst, *buf_ptr, sizeof(uint64));
+ *buf_ptr += sizeof(uint64);
+ }
+
+ static inline rocksdb::Slice as_slice(const uchar *block) {
+ uint64 len;
+ merge_read_uint64(&block, &len);
+
+ return rocksdb::Slice(reinterpret_cast<const char *>(block), len);
+ }
+
+ static int merge_record_compare(const uchar *a_block, const uchar *b_block,
+ const rocksdb::Comparator *const comparator)
+ MY_ATTRIBUTE((__nonnull__, __warn_unused_result__));
+
+ void merge_read_rec(const uchar *const block, rocksdb::Slice *const key,
+ rocksdb::Slice *const val) MY_ATTRIBUTE((__nonnull__));
+
+ void read_slice(rocksdb::Slice *slice, const uchar *block_ptr)
+ MY_ATTRIBUTE((__nonnull__));
+
+ public:
+ Rdb_index_merge(const char *const tmpfile_path,
+ const ulonglong merge_buf_size,
+ const ulonglong merge_combine_read_size,
+ const ulonglong merge_tmp_file_removal_delay,
+ rocksdb::ColumnFamilyHandle *cf);
+ ~Rdb_index_merge();
+
+ int init() MY_ATTRIBUTE((__nonnull__, __warn_unused_result__));
+
+ int merge_file_create() MY_ATTRIBUTE((__nonnull__, __warn_unused_result__));
+
+ int add(const rocksdb::Slice &key, const rocksdb::Slice &val)
+ MY_ATTRIBUTE((__nonnull__, __warn_unused_result__));
+
+ int merge_buf_write() MY_ATTRIBUTE((__nonnull__, __warn_unused_result__));
+
+ int next(rocksdb::Slice *const key, rocksdb::Slice *const val)
+ MY_ATTRIBUTE((__nonnull__, __warn_unused_result__));
+
+ int merge_heap_prepare() MY_ATTRIBUTE((__nonnull__, __warn_unused_result__));
+
+ void merge_heap_top(rocksdb::Slice *key, rocksdb::Slice *val)
+ MY_ATTRIBUTE((__nonnull__));
+
+ int merge_heap_pop_and_get_next(rocksdb::Slice *const key,
+ rocksdb::Slice *const val)
+ MY_ATTRIBUTE((__nonnull__, __warn_unused_result__));
+
+ void merge_reset();
+
+ rocksdb::ColumnFamilyHandle *get_cf() const { return m_cf_handle; }
+};
+
+} // namespace myrocks
diff --git a/storage/rocksdb/rdb_io_watchdog.cc b/storage/rocksdb/rdb_io_watchdog.cc
new file mode 100644
index 00000000000..07834118db0
--- /dev/null
+++ b/storage/rocksdb/rdb_io_watchdog.cc
@@ -0,0 +1,240 @@
+/*
+ Copyright (c) 2017, Facebook, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111-1301 USA */
+
+/* This C++ file's header */
+#include "./rdb_io_watchdog.h"
+
+/* C++ standard header files */
+#include <string>
+#include <vector>
+
+/* Rdb_io_watchdog doesn't work on Windows [yet] */
+#ifdef HAVE_TIMER_DELETE
+
+namespace myrocks {
+
+void Rdb_io_watchdog::expire_io_callback(union sigval timer_data) {
+ DBUG_ASSERT(timer_data.sival_ptr != nullptr);
+
+ // The treatment of any pending signal generated by the deleted timer is
+ // unspecified. Therefore we still need to handle the rare case where we
+ // finished the I/O operation right before the timer was deleted and callback
+ // was in flight.
+ if (!m_io_in_progress.load()) {
+ return;
+ }
+
+ // At this point we know that I/O has been stuck in `write()` for more than
+ // `m_write_timeout` seconds. We'll log a message and shut down the service.
+ // NO_LINT_DEBUG
+ sql_print_error(
+ "MyRocks has detected a combination of I/O requests which "
+ "have cumulatively been blocking for more than %u seconds. "
+ "Shutting the service down.",
+ m_write_timeout);
+
+ abort();
+}
+
+void Rdb_io_watchdog::io_check_callback(union sigval timer_data) {
+ RDB_MUTEX_LOCK_CHECK(m_reset_mutex);
+
+ DBUG_ASSERT(timer_data.sival_ptr != nullptr);
+
+ struct sigevent e;
+
+ e.sigev_notify = SIGEV_THREAD;
+ e.sigev_notify_function = &Rdb_io_watchdog::expire_io_callback_wrapper;
+ e.sigev_value.sival_ptr = this;
+ e.sigev_notify_attributes = nullptr;
+
+ int ret = timer_create(CLOCK_MONOTONIC, &e, &m_io_check_watchdog_timer);
+
+ if (unlikely(ret)) {
+ // NO_LINT_DEBUG
+ sql_print_warning("Creating a watchdog I/O timer failed with %d.", errno);
+ RDB_MUTEX_UNLOCK_CHECK(m_reset_mutex);
+ return;
+ }
+
+ struct itimerspec timer_spec;
+ memset(&timer_spec, 0, sizeof(timer_spec));
+
+ // One time execution only for the watchdog. No interval.
+ timer_spec.it_value.tv_sec = m_write_timeout;
+
+ ret = timer_settime(m_io_check_watchdog_timer, 0, &timer_spec, nullptr);
+
+ if (unlikely(ret)) {
+ // NO_LINT_DEBUG
+ sql_print_warning("Setting time for a watchdog I/O timer failed with %d.",
+ errno);
+ RDB_MUTEX_UNLOCK_CHECK(m_reset_mutex);
+ return;
+ }
+
+ m_io_in_progress.store(true);
+
+ // Verify the write access to all directories we care about.
+ for (const std::string &directory : m_dirs_to_check) {
+ ret = check_write_access(directory);
+
+ // We'll log a warning and attept to continue to see if the problem happens
+ // in other cases as well.
+ if (unlikely(ret != HA_EXIT_SUCCESS)) {
+ // NO_LINT_DEBUG
+ sql_print_warning("Unable to verify write access to %s (error code %d).",
+ directory.c_str(), ret);
+ }
+ }
+
+ m_io_in_progress.store(false);
+
+ // Clean up the watchdog timer.
+ ret = timer_delete(m_io_check_watchdog_timer);
+
+ if (unlikely(ret)) {
+ // NO_LINT_DEBUG
+ sql_print_warning("Deleting the watchdog I/O timer failed with %d.", errno);
+ }
+
+ m_io_check_watchdog_timer = nullptr;
+
+ RDB_MUTEX_UNLOCK_CHECK(m_reset_mutex);
+}
+
+int Rdb_io_watchdog::check_write_access(const std::string &dirname) const {
+ DBUG_ASSERT(!dirname.empty());
+ DBUG_ASSERT(m_buf != nullptr);
+
+ const std::string fname = dirname + FN_DIRSEP + RDB_IO_DUMMY_FILE_NAME;
+
+ // O_DIRECT is a key flag here to make sure that we'll bypass the kernel's
+ // buffer cache.
+ int fd = open(fname.c_str(), O_WRONLY | O_DIRECT | O_CREAT | O_SYNC,
+ S_IRWXU | S_IWUSR);
+
+ if (unlikely(fd == -1)) {
+ return fd;
+ }
+
+ int ret = write(fd, m_buf, RDB_IO_WRITE_BUFFER_SIZE);
+
+ if (unlikely(ret != RDB_IO_WRITE_BUFFER_SIZE)) {
+ return ret;
+ }
+
+ ret = close(fd);
+
+ if (unlikely(ret)) {
+ return ret;
+ }
+
+ ret = unlink(fname.c_str());
+
+ if (unlikely(ret)) {
+ return ret;
+ }
+
+ return HA_EXIT_SUCCESS;
+}
+
+int Rdb_io_watchdog::reset_timeout(const uint32_t write_timeout) {
+ // This function will be called either from a thread initializing MyRocks
+ // engine or handling system variable changes. We need to account for the
+ // possibility of I/O callback executing at the same time. If that happens
+ // then we'll wait for it to finish.
+ RDB_MUTEX_LOCK_CHECK(m_reset_mutex);
+
+ struct sigevent e;
+
+ // In all the cases all the active timers needs to be stopped.
+ int ret = stop_timers();
+
+ if (unlikely(ret)) {
+ // NO_LINT_DEBUG
+ sql_print_warning("Stopping I/O timers failed with %d.", errno);
+ RDB_MUTEX_UNLOCK_CHECK(m_reset_mutex);
+ return ret;
+ }
+
+ m_write_timeout = write_timeout;
+ m_io_in_progress.store(false);
+
+ // Zero means that the I/O timer will be disabled. Therefore there's nothing
+ // for us to do here.
+ if (!write_timeout) {
+ RDB_MUTEX_UNLOCK_CHECK(m_reset_mutex);
+ return HA_EXIT_SUCCESS;
+ }
+
+ free(m_buf);
+
+ ret = posix_memalign(reinterpret_cast<void **>(&m_buf),
+ RDB_IO_WRITE_BUFFER_SIZE, RDB_IO_WRITE_BUFFER_SIZE);
+
+ if (unlikely(ret)) {
+ m_buf = nullptr;
+ RDB_MUTEX_UNLOCK_CHECK(m_reset_mutex);
+ // NB! The value of errno is not set.
+ return ret;
+ }
+
+ DBUG_ASSERT(m_buf != nullptr);
+ memset(m_buf, 0, RDB_IO_WRITE_BUFFER_SIZE);
+
+ // Common case gets handled here - we'll create a timer with a specific
+ // interval to check a set of directories for write access.
+ DBUG_ASSERT(m_dirs_to_check.size() > 0);
+
+ e.sigev_notify = SIGEV_THREAD;
+ e.sigev_notify_function = &Rdb_io_watchdog::io_check_callback_wrapper;
+ e.sigev_value.sival_ptr = this;
+ e.sigev_notify_attributes = nullptr;
+
+ ret = timer_create(CLOCK_MONOTONIC, &e, &m_io_check_timer);
+
+ if (unlikely(ret)) {
+ // NO_LINT_DEBUG
+ sql_print_warning("Creating a I/O timer failed with %d.", errno);
+ RDB_MUTEX_UNLOCK_CHECK(m_reset_mutex);
+ return ret;
+ }
+
+ struct itimerspec timer_spec;
+ memset(&timer_spec, 0, sizeof(timer_spec));
+
+ // I/O timer will need to execute on a certain interval.
+ timer_spec.it_value.tv_sec = m_write_timeout;
+ timer_spec.it_interval.tv_sec = m_write_timeout;
+
+ ret = timer_settime(m_io_check_timer, 0, &timer_spec, nullptr);
+
+ if (unlikely(ret)) {
+ // NO_LINT_DEBUG
+ sql_print_warning("Setting time for a watchdog I/O timer failed with %d.",
+ errno);
+ }
+
+ RDB_MUTEX_UNLOCK_CHECK(m_reset_mutex);
+
+ return HA_EXIT_SUCCESS;
+}
+
+} // namespace myrocks
+
+#endif
+
diff --git a/storage/rocksdb/rdb_io_watchdog.h b/storage/rocksdb/rdb_io_watchdog.h
new file mode 100644
index 00000000000..8ee5b1f6c93
--- /dev/null
+++ b/storage/rocksdb/rdb_io_watchdog.h
@@ -0,0 +1,119 @@
+/*
+ Copyright (c) 2017, Facebook, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111-1301 USA */
+
+#pragma once
+
+/* C++ standard header files */
+#include <signal.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+#include <atomic>
+#include <string>
+#include <vector>
+
+/* MySQL header files */
+#include "./my_global.h"
+#include "./my_stacktrace.h"
+
+/* MyRocks header files */
+#include "./rdb_utils.h"
+
+namespace myrocks {
+
+// Rdb_io_watchdog does not support Windows ATM.
+#ifdef HAVE_TIMER_DELETE
+
+class Rdb_io_watchdog {
+ const int RDB_IO_WRITE_BUFFER_SIZE = 4096;
+ const char *const RDB_IO_DUMMY_FILE_NAME = "myrocks_io_watchdog_write_file";
+
+ private:
+ timer_t m_io_check_timer, m_io_check_watchdog_timer;
+ std::atomic<bool> m_io_in_progress;
+ std::vector<std::string> m_dirs_to_check;
+ uint32_t m_write_timeout;
+ mysql_mutex_t m_reset_mutex;
+ char *m_buf;
+
+ int check_write_access(const std::string &dirname) const;
+ void io_check_callback(union sigval timer_data);
+ void expire_io_callback(union sigval timer_data);
+
+ int stop_timers() {
+ int ret = 0;
+
+ if (m_io_check_watchdog_timer) {
+ ret = timer_delete(m_io_check_watchdog_timer);
+
+ if (!ret) {
+ m_io_check_watchdog_timer = nullptr;
+ }
+ }
+
+ if (m_io_check_timer && !ret) {
+ ret = timer_delete(m_io_check_timer);
+
+ if (!ret) {
+ m_io_check_timer = nullptr;
+ }
+ }
+
+ return ret;
+ }
+
+ static void io_check_callback_wrapper(union sigval timer_data) {
+ Rdb_io_watchdog *io_watchdog =
+ static_cast<Rdb_io_watchdog *>(timer_data.sival_ptr);
+ DBUG_ASSERT(io_watchdog != nullptr);
+
+ io_watchdog->io_check_callback(timer_data);
+ }
+
+ static void expire_io_callback_wrapper(union sigval timer_data) {
+ Rdb_io_watchdog *io_watchdog =
+ static_cast<Rdb_io_watchdog *>(timer_data.sival_ptr);
+ DBUG_ASSERT(io_watchdog != nullptr);
+
+ io_watchdog->expire_io_callback(timer_data);
+ }
+
+ public:
+ explicit Rdb_io_watchdog(std::vector<std::string> &&directories)
+ : m_io_check_timer(nullptr),
+ m_io_check_watchdog_timer(nullptr),
+ m_io_in_progress(false),
+ m_dirs_to_check(std::move(directories)),
+ m_buf(nullptr) {
+ DBUG_ASSERT(m_dirs_to_check.size() > 0);
+ mysql_mutex_init(0, &m_reset_mutex, MY_MUTEX_INIT_FAST);
+ }
+
+ ~Rdb_io_watchdog() {
+ // We're shutting down. Ignore errors possibly coming from timer deletion.
+ static_cast<void>(stop_timers());
+ mysql_mutex_destroy(&m_reset_mutex);
+ free(m_buf);
+ }
+
+ int reset_timeout(const uint32_t write_timeout);
+
+ Rdb_io_watchdog(const Rdb_io_watchdog &) = delete;
+ Rdb_io_watchdog &operator=(const Rdb_io_watchdog &) = delete;
+};
+
+#endif
+} // namespace myrocks
diff --git a/storage/rocksdb/rdb_mariadb_port.h b/storage/rocksdb/rdb_mariadb_port.h
new file mode 100644
index 00000000000..627674905cc
--- /dev/null
+++ b/storage/rocksdb/rdb_mariadb_port.h
@@ -0,0 +1,55 @@
+/*
+ A temporary header to resolve WebScaleSQL vs MariaDB differences
+ when porting MyRocks to MariaDB.
+*/
+#ifndef RDB_MARIADB_PORT_H
+#define RDB_MARIADB_PORT_H
+
+#include "my_global.h" /* ulonglong */
+#include "atomic_stat.h"
+
+// These are for split_into_vector:
+#include <vector>
+#include <string>
+
+/* The following is copied from storage/innobase/univ.i: */
+#ifndef MY_ATTRIBUTE
+#if defined(__GNUC__)
+# define MY_ATTRIBUTE(A) __attribute__(A)
+#else
+# define MY_ATTRIBUTE(A)
+#endif
+#endif
+
+/* Struct used for IO performance counters, shared among multiple threads */
+struct my_io_perf_atomic_struct {
+ atomic_stat<ulonglong> bytes;
+ atomic_stat<ulonglong> requests;
+ atomic_stat<ulonglong> svc_time; /*!< time to do read or write operation */
+ atomic_stat<ulonglong> svc_time_max;
+ atomic_stat<ulonglong> wait_time; /*!< total time in the request array */
+ atomic_stat<ulonglong> wait_time_max;
+ atomic_stat<ulonglong> slow_ios; /*!< requests that take too long */
+};
+typedef struct my_io_perf_atomic_struct my_io_perf_atomic_t;
+
+////////////////////////////////////////////////////////////////////////////
+
+/*
+ Temporary stand-in for
+ fae59683dc116be2cc78b0b30d61c84659c33bd3
+ Print stack traces before committing suicide
+
+*/
+#define abort_with_stack_traces() { abort(); }
+
+////////////////////////////////////////////////////////////////////////////
+typedef struct my_io_perf_struct my_io_perf_t;
+
+std::vector<std::string> split_into_vector(const std::string& input,
+ char delimiter);
+
+void
+mysql_bin_log_commit_pos(THD *thd, ulonglong *out_pos, const char **out_file);
+
+#endif
diff --git a/storage/rocksdb/rdb_mariadb_server_port.cc b/storage/rocksdb/rdb_mariadb_server_port.cc
new file mode 100644
index 00000000000..f63e4bb36ad
--- /dev/null
+++ b/storage/rocksdb/rdb_mariadb_server_port.cc
@@ -0,0 +1,123 @@
+#include <my_config.h>
+
+
+/* MySQL includes */
+#include "./debug_sync.h"
+#include "./my_bit.h"
+#include "./my_stacktrace.h"
+#include "./sql_table.h"
+#include "./my_global.h"
+#include "./log.h"
+#include <mysys_err.h>
+#include <mysql/psi/mysql_table.h>
+//#include <mysql/thread_pool_priv.h>
+
+#include <string>
+
+/* MyRocks includes */
+#include "./rdb_threads.h"
+
+#include "rdb_mariadb_server_port.h"
+
+void warn_about_bad_patterns(const Regex_list_handler* regex_list_handler,
+ const char *name)
+{
+ // There was some invalid regular expression data in the patterns supplied
+
+ // NO_LINT_DEBUG
+ sql_print_warning("Invalid pattern in %s: %s", name,
+ regex_list_handler->bad_pattern().c_str());
+}
+
+
+/*
+ Set the patterns string. If there are invalid regex patterns they will
+ be stored in m_bad_patterns and the result will be false, otherwise the
+ result will be true.
+*/
+bool Regex_list_handler::set_patterns(const std::string& pattern_str)
+{
+ bool pattern_valid= true;
+
+ // Create a normalized version of the pattern string with all delimiters
+ // replaced by the '|' character
+ std::string norm_pattern= pattern_str;
+ std::replace(norm_pattern.begin(), norm_pattern.end(), m_delimiter, '|');
+
+ // Make sure no one else is accessing the list while we are changing it.
+ mysql_rwlock_wrlock(&m_rwlock);
+
+ // Clear out any old error information
+ m_bad_pattern_str.clear();
+
+ try
+ {
+ // Replace all delimiters with the '|' operator and create the regex
+ // Note that this means the delimiter can not be part of a regular
+ // expression. This is currently not a problem as we are using the comma
+ // character as a delimiter and commas are not valid in table names.
+ const std::regex* pattern= new std::regex(norm_pattern);
+
+ // Free any existing regex information and setup the new one
+ delete m_pattern;
+ m_pattern= pattern;
+ }
+ catch (const std::regex_error&)
+ {
+ // This pattern is invalid.
+ pattern_valid= false;
+
+ // Put the bad pattern into a member variable so it can be retrieved later.
+ m_bad_pattern_str= pattern_str;
+ }
+
+ // Release the lock
+ mysql_rwlock_unlock(&m_rwlock);
+
+ return pattern_valid;
+}
+
+bool Regex_list_handler::matches(const std::string& str) const
+{
+ DBUG_ASSERT(m_pattern != nullptr);
+
+ // Make sure no one else changes the list while we are accessing it.
+ mysql_rwlock_rdlock(&m_rwlock);
+
+ // See if the table name matches the regex we have created
+ bool found= std::regex_match(str, *m_pattern);
+
+ // Release the lock
+ mysql_rwlock_unlock(&m_rwlock);
+
+ return found;
+}
+
+// Split a string based on a delimiter. Two delimiters in a row will not add
+// an empty string in the set.
+std::vector<std::string> split_into_vector(const std::string& input,
+ char delimiter)
+{
+ size_t pos;
+ size_t start = 0;
+ std::vector<std::string> elems;
+
+ // Find next delimiter
+ while ((pos = input.find(delimiter, start)) != std::string::npos)
+ {
+ // If there is any data since the last delimiter add it to the list
+ if (pos > start)
+ elems.push_back(input.substr(start, pos - start));
+
+ // Set our start position to the character after the delimiter
+ start = pos + 1;
+ }
+
+ // Add a possible string since the last delimiter
+ if (input.length() > start)
+ elems.push_back(input.substr(start));
+
+ // Return the resulting list back to the caller
+ return elems;
+}
+
diff --git a/storage/rocksdb/rdb_mariadb_server_port.h b/storage/rocksdb/rdb_mariadb_server_port.h
new file mode 100644
index 00000000000..fe963446ebb
--- /dev/null
+++ b/storage/rocksdb/rdb_mariadb_server_port.h
@@ -0,0 +1,76 @@
+/*
+ A temporary header to resolve WebScaleSQL vs MariaDB differences
+ when porting MyRocks to MariaDB.
+*/
+#ifndef RDB_MARIADB_SERVER_PORT_H
+#define RDB_MARIADB_SERVER_PORT_H
+
+#include "my_global.h" /* ulonglong */
+#include "atomic_stat.h"
+#include "my_pthread.h"
+#include <mysql/psi/mysql_table.h>
+#include <mysql/psi/mysql_thread.h>
+
+/*
+ Code that is on SQL layer in facebook/mysql-5.6,
+ but is part of the storage engine in MariaRocks
+*/
+#include <regex>
+
+class Regex_list_handler
+{
+ private:
+#if defined(HAVE_PSI_INTERFACE)
+ const PSI_rwlock_key& m_key;
+#endif
+
+ char m_delimiter;
+ std::string m_bad_pattern_str;
+ const std::regex* m_pattern;
+
+ mutable mysql_rwlock_t m_rwlock;
+
+ Regex_list_handler(const Regex_list_handler& other)= delete;
+ Regex_list_handler& operator=(const Regex_list_handler& other)= delete;
+
+ public:
+#if defined(HAVE_PSI_INTERFACE)
+ Regex_list_handler(const PSI_rwlock_key& key,
+ char delimiter= ',') :
+ m_key(key),
+#else
+ Regex_list_handler(char delimiter= ',') :
+#endif
+ m_delimiter(delimiter),
+ m_bad_pattern_str(""),
+ m_pattern(nullptr)
+ {
+ mysql_rwlock_init(key, &m_rwlock);
+ }
+
+ ~Regex_list_handler()
+ {
+ mysql_rwlock_destroy(&m_rwlock);
+ delete m_pattern;
+ }
+
+ // Set the list of patterns
+ bool set_patterns(const std::string& patterns);
+
+ // See if a string matches at least one pattern
+ bool matches(const std::string& str) const;
+
+ // See the list of bad patterns
+ const std::string& bad_pattern() const
+ {
+ return m_bad_pattern_str;
+ }
+};
+
+void warn_about_bad_patterns(const Regex_list_handler* regex_list_handler,
+ const char *name);
+
+void print_keydup_error(TABLE *table, KEY *key, myf errflag,
+ const THD *thd, const char *org_table_name=NULL);
+
+#endif
diff --git a/storage/rocksdb/rdb_mutex_wrapper.cc b/storage/rocksdb/rdb_mutex_wrapper.cc
new file mode 100644
index 00000000000..2cc0bac41f9
--- /dev/null
+++ b/storage/rocksdb/rdb_mutex_wrapper.cc
@@ -0,0 +1,214 @@
+/*
+ Copyright (c) 2015, Facebook, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111-1301 USA */
+
+#include <my_config.h>
+
+/* This C++ file's header file */
+#include "./rdb_mutex_wrapper.h"
+
+/* The following are for THD_ENTER_COND: */
+#define MYSQL_SERVER 1
+#include "sql_priv.h"
+#include "my_decimal.h"
+#include "sql_class.h"
+//psergey-merge-todo: does MariaDB have/need: #include "../sql/replication.h"
+
+
+/* MyRocks header files */
+#include "./ha_rocksdb.h"
+#include "./rdb_utils.h"
+
+
+using namespace rocksdb;
+
+namespace myrocks {
+
+static PSI_stage_info stage_waiting_on_row_lock2 = {0, "Waiting for row lock",
+ 0};
+
+static const int64_t ONE_SECOND_IN_MICROSECS = 1000 * 1000;
+// A timeout as long as one full non-leap year worth of microseconds is as
+// good as infinite timeout.
+static const int64_t ONE_YEAR_IN_MICROSECS =
+ ONE_SECOND_IN_MICROSECS * 60 * 60 * 24 * 365;
+
+Rdb_cond_var::Rdb_cond_var() { mysql_cond_init(0, &m_cond, nullptr); }
+
+Rdb_cond_var::~Rdb_cond_var() { mysql_cond_destroy(&m_cond); }
+
+Status Rdb_cond_var::Wait(const std::shared_ptr<TransactionDBMutex> mutex_arg) {
+ return WaitFor(mutex_arg, ONE_YEAR_IN_MICROSECS);
+}
+
+/*
+ @brief
+ Wait on condition variable. The caller must make sure that we own
+ *mutex_ptr. The mutex is released and re-acquired by the wait function.
+
+ @param
+ timeout_micros Timeout in microseconds. Negative value means no timeout.
+
+ @return
+ Status::OK() - Wait successfull
+ Status::TimedOut() - Timed out or wait killed (the caller can check
+ thd_killed() to determine which occurred)
+*/
+
+Status Rdb_cond_var::WaitFor(
+ const std::shared_ptr<TransactionDBMutex> mutex_arg,
+ int64_t timeout_micros) {
+ auto *mutex_obj = reinterpret_cast<Rdb_mutex *>(mutex_arg.get());
+ DBUG_ASSERT(mutex_obj != nullptr);
+
+ mysql_mutex_t *const mutex_ptr = &mutex_obj->m_mutex;
+
+ int res = 0;
+ struct timespec wait_timeout;
+
+ if (timeout_micros < 0) timeout_micros = ONE_YEAR_IN_MICROSECS;
+ set_timespec_nsec(wait_timeout, timeout_micros * 1000);
+
+#ifndef STANDALONE_UNITTEST
+ PSI_stage_info old_stage;
+ mysql_mutex_assert_owner(mutex_ptr);
+
+ if (current_thd && mutex_obj->m_old_stage_info.count(current_thd) == 0) {
+ THD_ENTER_COND(current_thd, &m_cond, mutex_ptr, &stage_waiting_on_row_lock2,
+ &old_stage);
+ /*
+ After the mysql_cond_timedwait we need make this call
+
+ THD_EXIT_COND(thd, &old_stage);
+
+ to inform the SQL layer that KILLable wait has ended. However,
+ that will cause mutex to be released. Defer the release until the mutex
+ that is unlocked by RocksDB's Pessimistic Transactions system.
+ */
+ mutex_obj->set_unlock_action(&old_stage);
+ }
+
+#endif
+ bool killed = false;
+
+ do {
+ res = mysql_cond_timedwait(&m_cond, mutex_ptr, &wait_timeout);
+
+#ifndef STANDALONE_UNITTEST
+ if (current_thd) killed = thd_killed(current_thd);
+#endif
+ } while (!killed && res == EINTR);
+
+ if (res || killed) {
+ return Status::TimedOut();
+ } else {
+ return Status::OK();
+ }
+}
+
+/*
+
+ @note
+ This function may be called while not holding the mutex that is used to wait
+ on the condition variable.
+
+ The manual page says ( http://linux.die.net/man/3/pthread_cond_signal):
+
+ The pthread_cond_broadcast() or pthread_cond_signal() functions may be called
+ by a thread whether or not it currently owns the mutex that threads calling
+ pthread_cond_wait() or pthread_cond_timedwait() have associated with the
+ condition variable during their waits; however, IF PREDICTABLE SCHEDULING
+ BEHAVIOR IS REQUIRED, THEN THAT MUTEX SHALL BE LOCKED by the thread calling
+ pthread_cond_broadcast() or pthread_cond_signal().
+
+ What's "predicate scheduling" and do we need it? The explanation is here:
+
+ https://groups.google.com/forum/?hl=ky#!msg/comp.programming.threads/wEUgPq541v8/ZByyyS8acqMJ
+ "The problem (from the realtime side) with condition variables is that
+ if you can signal/broadcast without holding the mutex, and any thread
+ currently running can acquire an unlocked mutex and check a predicate
+ without reference to the condition variable, then you can have an
+ indirect priority inversion."
+
+ Another possible consequence is that one can create spurious wake-ups when
+ there are multiple threads signaling the condition.
+
+ None of this looks like a problem for our use case.
+*/
+
+void Rdb_cond_var::Notify() { mysql_cond_signal(&m_cond); }
+
+/*
+ @note
+ This is called without holding the mutex that's used for waiting on the
+ condition. See ::Notify().
+*/
+void Rdb_cond_var::NotifyAll() { mysql_cond_broadcast(&m_cond); }
+
+Rdb_mutex::Rdb_mutex() {
+ mysql_mutex_init(0 /* Don't register in P_S. */, &m_mutex,
+ MY_MUTEX_INIT_FAST);
+}
+
+Rdb_mutex::~Rdb_mutex() { mysql_mutex_destroy(&m_mutex); }
+
+Status Rdb_mutex::Lock() {
+ RDB_MUTEX_LOCK_CHECK(m_mutex);
+ DBUG_ASSERT(m_old_stage_info.count(current_thd) == 0);
+ return Status::OK();
+}
+
+// Attempt to acquire lock. If timeout is non-negative, operation may be
+// failed after this many milliseconds.
+// If implementing a custom version of this class, the implementation may
+// choose to ignore the timeout.
+// Return OK on success, or other Status on failure.
+Status Rdb_mutex::TryLockFor(int64_t timeout_time MY_ATTRIBUTE((__unused__))) {
+ /*
+ Note: PThreads API has pthread_mutex_timedlock(), but mysql's
+ mysql_mutex_* wrappers do not wrap that function.
+ */
+ RDB_MUTEX_LOCK_CHECK(m_mutex);
+ return Status::OK();
+}
+
+#ifndef STANDALONE_UNITTEST
+void Rdb_mutex::set_unlock_action(const PSI_stage_info *const old_stage_arg) {
+ DBUG_ASSERT(old_stage_arg != nullptr);
+
+ mysql_mutex_assert_owner(&m_mutex);
+ DBUG_ASSERT(m_old_stage_info.count(current_thd) == 0);
+
+ m_old_stage_info[current_thd] =
+ std::make_shared<PSI_stage_info>(*old_stage_arg);
+}
+#endif
+
+// Unlock Mutex that was successfully locked by Lock() or TryLockUntil()
+void Rdb_mutex::UnLock() {
+#ifndef STANDALONE_UNITTEST
+ if (m_old_stage_info.count(current_thd) > 0) {
+ const std::shared_ptr<PSI_stage_info> old_stage =
+ m_old_stage_info[current_thd];
+ m_old_stage_info.erase(current_thd);
+ /* The following will call mysql_mutex_unlock */
+ THD_EXIT_COND(current_thd, old_stage.get());
+ return;
+ }
+#endif
+ RDB_MUTEX_UNLOCK_CHECK(m_mutex);
+}
+
+} // namespace myrocks
diff --git a/storage/rocksdb/rdb_mutex_wrapper.h b/storage/rocksdb/rdb_mutex_wrapper.h
new file mode 100644
index 00000000000..33eefe9d50c
--- /dev/null
+++ b/storage/rocksdb/rdb_mutex_wrapper.h
@@ -0,0 +1,143 @@
+/*
+ Copyright (c) 2015, Facebook, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111-1301 USA */
+
+#pragma once
+
+/* C++ standard header file */
+#include <chrono>
+#include <condition_variable>
+#include <functional>
+#include <mutex>
+#include <unordered_map>
+
+/* MySQL header files */
+#include "./my_sys.h"
+#include "mysql/plugin.h"
+
+/* RocksDB header files */
+#include "rocksdb/utilities/transaction_db_mutex.h"
+#include "rdb_mariadb_port.h"
+
+namespace myrocks {
+
+class Rdb_mutex : public rocksdb::TransactionDBMutex {
+ Rdb_mutex(const Rdb_mutex &p) = delete;
+ Rdb_mutex &operator=(const Rdb_mutex &p) = delete;
+
+ public:
+ Rdb_mutex();
+ virtual ~Rdb_mutex() override;
+
+ /*
+ Override parent class's virtual methods of interrest.
+ */
+
+ // Attempt to acquire lock. Return OK on success, or other Status on failure.
+ // If returned status is OK, TransactionDB will eventually call UnLock().
+ virtual rocksdb::Status Lock() override;
+
+ // Attempt to acquire lock. If timeout is non-negative, operation should be
+ // failed after this many microseconds.
+ // Returns OK on success,
+ // TimedOut if timed out,
+ // or other Status on failure.
+ // If returned status is OK, TransactionDB will eventually call UnLock().
+ virtual rocksdb::Status TryLockFor(
+ int64_t timeout_time MY_ATTRIBUTE((__unused__))) override;
+
+ // Unlock Mutex that was successfully locked by Lock() or TryLockUntil()
+ virtual void UnLock() override;
+
+ private:
+ mysql_mutex_t m_mutex;
+ friend class Rdb_cond_var;
+
+#ifndef STANDALONE_UNITTEST
+ void set_unlock_action(const PSI_stage_info *const old_stage_arg);
+ std::unordered_map<THD *, std::shared_ptr<PSI_stage_info>> m_old_stage_info;
+#endif
+};
+
+class Rdb_cond_var : public rocksdb::TransactionDBCondVar {
+ Rdb_cond_var(const Rdb_cond_var &) = delete;
+ Rdb_cond_var &operator=(const Rdb_cond_var &) = delete;
+
+ public:
+ Rdb_cond_var();
+ virtual ~Rdb_cond_var() override;
+
+ /*
+ Override parent class's virtual methods of interrest.
+ */
+
+ // Block current thread until condition variable is notified by a call to
+ // Notify() or NotifyAll(). Wait() will be called with mutex locked.
+ // Returns OK if notified.
+ // Returns non-OK if TransactionDB should stop waiting and fail the operation.
+ // May return OK spuriously even if not notified.
+ virtual rocksdb::Status Wait(
+ const std::shared_ptr<rocksdb::TransactionDBMutex> mutex) override;
+
+ // Block current thread until condition variable is notifiesd by a call to
+ // Notify() or NotifyAll(), or if the timeout is reached.
+ // If timeout is non-negative, operation should be failed after this many
+ // microseconds.
+ // If implementing a custom version of this class, the implementation may
+ // choose to ignore the timeout.
+ //
+ // Returns OK if notified.
+ // Returns TimedOut if timeout is reached.
+ // Returns other status if TransactionDB should otherwis stop waiting and
+ // fail the operation.
+ // May return OK spuriously even if not notified.
+ virtual rocksdb::Status WaitFor(
+ const std::shared_ptr<rocksdb::TransactionDBMutex> mutex,
+ int64_t timeout_time) override;
+
+ // If any threads are waiting on *this, unblock at least one of the
+ // waiting threads.
+ virtual void Notify() override;
+
+ // Unblocks all threads waiting on *this.
+ virtual void NotifyAll() override;
+
+ private:
+ mysql_cond_t m_cond;
+};
+
+class Rdb_mutex_factory : public rocksdb::TransactionDBMutexFactory {
+ public:
+ Rdb_mutex_factory(const Rdb_mutex_factory &) = delete;
+ Rdb_mutex_factory &operator=(const Rdb_mutex_factory &) = delete;
+ Rdb_mutex_factory() {}
+ /*
+ Override parent class's virtual methods of interrest.
+ */
+
+ virtual std::shared_ptr<rocksdb::TransactionDBMutex> AllocateMutex()
+ override {
+ return std::make_shared<Rdb_mutex>();
+ }
+
+ virtual std::shared_ptr<rocksdb::TransactionDBCondVar> AllocateCondVar()
+ override {
+ return std::make_shared<Rdb_cond_var>();
+ }
+
+ virtual ~Rdb_mutex_factory() override {}
+};
+
+} // namespace myrocks
diff --git a/storage/rocksdb/rdb_perf_context.cc b/storage/rocksdb/rdb_perf_context.cc
new file mode 100644
index 00000000000..d7a126d86e4
--- /dev/null
+++ b/storage/rocksdb/rdb_perf_context.cc
@@ -0,0 +1,285 @@
+/*
+ Portions Copyright (c) 2015-Present, Facebook, Inc.
+ Portions Copyright (c) 2012, Monty Program Ab
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111-1301 USA */
+
+#include <my_config.h>
+
+#include "rdb_mariadb_port.h"
+/* This C++ file's header file */
+#include "./rdb_perf_context.h"
+
+/* C++ system header files */
+#include <string>
+
+/* RocksDB header files */
+#include "rocksdb/iostats_context.h"
+#include "rocksdb/perf_context.h"
+
+/* MyRocks header files */
+#include "./ha_rocksdb_proto.h"
+
+namespace myrocks {
+
+// To add a new metric:
+// 1. Update the PC enum in rdb_perf_context.h
+// 2. Update sections (A), (B), and (C) below
+// 3. Update perf_context.test and show_engine.test
+
+std::string rdb_pc_stat_types[] = {
+ // (A) These should be in the same order as the PC enum
+ "USER_KEY_COMPARISON_COUNT",
+ "BLOCK_CACHE_HIT_COUNT",
+ "BLOCK_READ_COUNT",
+ "BLOCK_READ_BYTE",
+ "BLOCK_READ_TIME",
+ "BLOCK_CHECKSUM_TIME",
+ "BLOCK_DECOMPRESS_TIME",
+ "GET_READ_BYTES",
+ "MULTIGET_READ_BYTES",
+ "ITER_READ_BYTES",
+ "INTERNAL_KEY_SKIPPED_COUNT",
+ "INTERNAL_DELETE_SKIPPED_COUNT",
+ "INTERNAL_RECENT_SKIPPED_COUNT",
+ "INTERNAL_MERGE_COUNT",
+ "GET_SNAPSHOT_TIME",
+ "GET_FROM_MEMTABLE_TIME",
+ "GET_FROM_MEMTABLE_COUNT",
+ "GET_POST_PROCESS_TIME",
+ "GET_FROM_OUTPUT_FILES_TIME",
+ "SEEK_ON_MEMTABLE_TIME",
+ "SEEK_ON_MEMTABLE_COUNT",
+ "NEXT_ON_MEMTABLE_COUNT",
+ "PREV_ON_MEMTABLE_COUNT",
+ "SEEK_CHILD_SEEK_TIME",
+ "SEEK_CHILD_SEEK_COUNT",
+ "SEEK_MIN_HEAP_TIME",
+ "SEEK_MAX_HEAP_TIME",
+ "SEEK_INTERNAL_SEEK_TIME",
+ "FIND_NEXT_USER_ENTRY_TIME",
+ "WRITE_WAL_TIME",
+ "WRITE_MEMTABLE_TIME",
+ "WRITE_DELAY_TIME",
+ "WRITE_PRE_AND_POST_PROCESS_TIME",
+ "DB_MUTEX_LOCK_NANOS",
+ "DB_CONDITION_WAIT_NANOS",
+ "MERGE_OPERATOR_TIME_NANOS",
+ "READ_INDEX_BLOCK_NANOS",
+ "READ_FILTER_BLOCK_NANOS",
+ "NEW_TABLE_BLOCK_ITER_NANOS",
+ "NEW_TABLE_ITERATOR_NANOS",
+ "BLOCK_SEEK_NANOS",
+ "FIND_TABLE_NANOS",
+ "BLOOM_MEMTABLE_HIT_COUNT",
+ "BLOOM_MEMTABLE_MISS_COUNT",
+ "BLOOM_SST_HIT_COUNT",
+ "BLOOM_SST_MISS_COUNT",
+ "KEY_LOCK_WAIT_TIME",
+ "KEY_LOCK_WAIT_COUNT",
+ "IO_THREAD_POOL_ID",
+ "IO_BYTES_WRITTEN",
+ "IO_BYTES_READ",
+ "IO_OPEN_NANOS",
+ "IO_ALLOCATE_NANOS",
+ "IO_WRITE_NANOS",
+ "IO_READ_NANOS",
+ "IO_RANGE_SYNC_NANOS",
+ "IO_LOGGER_NANOS"};
+
+#define IO_PERF_RECORD(_field_) \
+ do { \
+ if (rocksdb::get_perf_context()->_field_ > 0) { \
+ counters->m_value[idx] += rocksdb::get_perf_context()->_field_; \
+ } \
+ idx++; \
+ } while (0)
+#define IO_STAT_RECORD(_field_) \
+ do { \
+ if (rocksdb::get_iostats_context()->_field_ > 0) { \
+ counters->m_value[idx] += rocksdb::get_iostats_context()->_field_; \
+ } \
+ idx++; \
+ } while (0)
+
+static void harvest_diffs(Rdb_atomic_perf_counters *const counters) {
+ // (C) These should be in the same order as the PC enum
+ size_t idx = 0;
+ IO_PERF_RECORD(user_key_comparison_count);
+ IO_PERF_RECORD(block_cache_hit_count);
+ IO_PERF_RECORD(block_read_count);
+ IO_PERF_RECORD(block_read_byte);
+ IO_PERF_RECORD(block_read_time);
+ IO_PERF_RECORD(block_checksum_time);
+ IO_PERF_RECORD(block_decompress_time);
+ IO_PERF_RECORD(get_read_bytes);
+ IO_PERF_RECORD(multiget_read_bytes);
+ IO_PERF_RECORD(iter_read_bytes);
+ IO_PERF_RECORD(internal_key_skipped_count);
+ IO_PERF_RECORD(internal_delete_skipped_count);
+ IO_PERF_RECORD(internal_recent_skipped_count);
+ IO_PERF_RECORD(internal_merge_count);
+ IO_PERF_RECORD(get_snapshot_time);
+ IO_PERF_RECORD(get_from_memtable_time);
+ IO_PERF_RECORD(get_from_memtable_count);
+ IO_PERF_RECORD(get_post_process_time);
+ IO_PERF_RECORD(get_from_output_files_time);
+ IO_PERF_RECORD(seek_on_memtable_time);
+ IO_PERF_RECORD(seek_on_memtable_count);
+ IO_PERF_RECORD(next_on_memtable_count);
+ IO_PERF_RECORD(prev_on_memtable_count);
+ IO_PERF_RECORD(seek_child_seek_time);
+ IO_PERF_RECORD(seek_child_seek_count);
+ IO_PERF_RECORD(seek_min_heap_time);
+ IO_PERF_RECORD(seek_max_heap_time);
+ IO_PERF_RECORD(seek_internal_seek_time);
+ IO_PERF_RECORD(find_next_user_entry_time);
+ IO_PERF_RECORD(write_wal_time);
+ IO_PERF_RECORD(write_memtable_time);
+ IO_PERF_RECORD(write_delay_time);
+ IO_PERF_RECORD(write_pre_and_post_process_time);
+ IO_PERF_RECORD(db_mutex_lock_nanos);
+ IO_PERF_RECORD(db_condition_wait_nanos);
+ IO_PERF_RECORD(merge_operator_time_nanos);
+ IO_PERF_RECORD(read_index_block_nanos);
+ IO_PERF_RECORD(read_filter_block_nanos);
+ IO_PERF_RECORD(new_table_block_iter_nanos);
+ IO_PERF_RECORD(new_table_iterator_nanos);
+ IO_PERF_RECORD(block_seek_nanos);
+ IO_PERF_RECORD(find_table_nanos);
+ IO_PERF_RECORD(bloom_memtable_hit_count);
+ IO_PERF_RECORD(bloom_memtable_miss_count);
+ IO_PERF_RECORD(bloom_sst_hit_count);
+ IO_PERF_RECORD(bloom_sst_miss_count);
+ IO_PERF_RECORD(key_lock_wait_time);
+ IO_PERF_RECORD(key_lock_wait_count);
+
+ IO_STAT_RECORD(thread_pool_id);
+ IO_STAT_RECORD(bytes_written);
+ IO_STAT_RECORD(bytes_read);
+ IO_STAT_RECORD(open_nanos);
+ IO_STAT_RECORD(allocate_nanos);
+ IO_STAT_RECORD(write_nanos);
+ IO_STAT_RECORD(read_nanos);
+ IO_STAT_RECORD(range_sync_nanos);
+ IO_STAT_RECORD(logger_nanos);
+}
+
+#undef IO_PERF_DIFF
+#undef IO_STAT_DIFF
+
+static Rdb_atomic_perf_counters rdb_global_perf_counters;
+
+void rdb_get_global_perf_counters(Rdb_perf_counters *const counters) {
+ counters->load(rdb_global_perf_counters);
+}
+
+void Rdb_perf_counters::load(const Rdb_atomic_perf_counters &atomic_counters) {
+ for (int i = 0; i < PC_MAX_IDX; i++) {
+ m_value[i] = atomic_counters.m_value[i].load(std::memory_order_relaxed);
+ }
+}
+
+bool Rdb_io_perf::start(const uint32_t perf_context_level) {
+ const rocksdb::PerfLevel perf_level =
+ static_cast<rocksdb::PerfLevel>(perf_context_level);
+
+ if (rocksdb::GetPerfLevel() != perf_level) {
+ rocksdb::SetPerfLevel(perf_level);
+ }
+
+ if (perf_level == rocksdb::kDisable) {
+ return false;
+ }
+
+ rocksdb::get_perf_context()->Reset();
+ rocksdb::get_iostats_context()->Reset();
+ return true;
+}
+
+void Rdb_io_perf::update_bytes_written(const uint32_t perf_context_level,
+ ulonglong bytes_written) {
+ const rocksdb::PerfLevel perf_level =
+ static_cast<rocksdb::PerfLevel>(perf_context_level);
+ if (perf_level != rocksdb::kDisable && m_shared_io_perf_write) {
+ io_write_bytes += bytes_written;
+ io_write_requests += 1;
+ }
+}
+
+void Rdb_io_perf::end_and_record(const uint32_t perf_context_level) {
+ const rocksdb::PerfLevel perf_level =
+ static_cast<rocksdb::PerfLevel>(perf_context_level);
+
+ if (perf_level == rocksdb::kDisable) {
+ return;
+ }
+
+ if (m_atomic_counters) {
+ harvest_diffs(m_atomic_counters);
+ }
+ harvest_diffs(&rdb_global_perf_counters);
+
+ if (m_shared_io_perf_read &&
+ (rocksdb::get_perf_context()->block_read_byte != 0 ||
+ rocksdb::get_perf_context()->block_read_count != 0 ||
+ rocksdb::get_perf_context()->block_read_time != 0))
+ {
+#ifdef MARIAROCKS_NOT_YET
+ my_io_perf_t io_perf_read;
+
+ io_perf_read.init();
+ io_perf_read.bytes = rocksdb::get_perf_context()->block_read_byte;
+ io_perf_read.requests = rocksdb::get_perf_context()->block_read_count;
+
+ /*
+ Rocksdb does not distinguish between I/O service and wait time, so just
+ use svc time.
+ */
+ io_perf_read.svc_time_max = io_perf_read.svc_time =
+ rocksdb::get_perf_context()->block_read_time;
+
+ m_shared_io_perf_read->sum(io_perf_read);
+ m_stats->table_io_perf_read.sum(io_perf_read);
+#endif
+ }
+
+#ifdef MARIAROCKS_NOT_YET
+ if (m_shared_io_perf_write &&
+ (io_write_bytes != 0 || io_write_requests != 0)) {
+ my_io_perf_t io_perf_write;
+ io_perf_write.init();
+ io_perf_write.bytes = io_write_bytes;
+ io_perf_write.requests = io_write_requests;
+ m_shared_io_perf_write->sum(io_perf_write);
+ m_stats->table_io_perf_write.sum(io_perf_write);
+ io_write_bytes = 0;
+ io_write_requests = 0;
+ }
+
+ if (m_stats) {
+ if (rocksdb::get_perf_context()->internal_key_skipped_count != 0) {
+ m_stats->key_skipped +=
+ rocksdb::get_perf_context()->internal_key_skipped_count;
+ }
+
+ if (rocksdb::get_perf_context()->internal_delete_skipped_count != 0) {
+ m_stats->delete_skipped +=
+ rocksdb::get_perf_context()->internal_delete_skipped_count;
+ }
+ }
+#endif
+}
+
+} // namespace myrocks
diff --git a/storage/rocksdb/rdb_perf_context.h b/storage/rocksdb/rdb_perf_context.h
new file mode 100644
index 00000000000..d8381b8ea94
--- /dev/null
+++ b/storage/rocksdb/rdb_perf_context.h
@@ -0,0 +1,168 @@
+/*
+ Portions Copyright (c) 2015-Present, Facebook, Inc.
+ Portions Copyright (c) 2012,2013 Monty Program Ab
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111-1301 USA */
+#pragma once
+
+/* C++ standard header files */
+#include <atomic>
+#include <cstdint>
+#include <string>
+
+/* MySQL header files */
+#include <my_global.h>
+#include "./handler.h"
+
+#include "rdb_mariadb_port.h"
+
+namespace myrocks {
+
+enum {
+ PC_USER_KEY_COMPARISON_COUNT = 0,
+ PC_BLOCK_CACHE_HIT_COUNT,
+ PC_BLOCK_READ_COUNT,
+ PC_BLOCK_READ_BYTE,
+ PC_BLOCK_READ_TIME,
+ PC_BLOCK_CHECKSUM_TIME,
+ PC_BLOCK_DECOMPRESS_TIME,
+ PC_GET_READ_BYTES,
+ PC_MULTIGET_READ_BYTES,
+ PC_ITER_READ_BYTES,
+ PC_KEY_SKIPPED,
+ PC_DELETE_SKIPPED,
+ PC_RECENT_SKIPPED,
+ PC_MERGE,
+ PC_GET_SNAPSHOT_TIME,
+ PC_GET_FROM_MEMTABLE_TIME,
+ PC_GET_FROM_MEMTABLE_COUNT,
+ PC_GET_POST_PROCESS_TIME,
+ PC_GET_FROM_OUTPUT_FILES_TIME,
+ PC_SEEK_ON_MEMTABLE_TIME,
+ PC_SEEK_ON_MEMTABLE_COUNT,
+ PC_NEXT_ON_MEMTABLE_COUNT,
+ PC_PREV_ON_MEMTABLE_COUNT,
+ PC_SEEK_CHILD_SEEK_TIME,
+ PC_SEEK_CHILD_SEEK_COUNT,
+ PC_SEEK_MIN_HEAP_TIME,
+ PC_SEEK_MAX_HEAP_TIME,
+ PC_SEEK_INTERNAL_SEEK_TIME,
+ PC_FIND_NEXT_USER_ENTRY_TIME,
+ PC_WRITE_WAL_TIME,
+ PC_WRITE_MEMTABLE_TIME,
+ PC_WRITE_DELAY_TIME,
+ PC_WRITE_PRE_AND_POST_PROCESSS_TIME,
+ PC_DB_MUTEX_LOCK_NANOS,
+ PC_DB_CONDITION_WAIT_NANOS,
+ PC_MERGE_OPERATOR_TIME_NANOS,
+ PC_READ_INDEX_BLOCK_NANOS,
+ PC_READ_FILTER_BLOCK_NANOS,
+ PC_NEW_TABLE_BLOCK_ITER_NANOS,
+ PC_NEW_TABLE_ITERATOR_NANOS,
+ PC_BLOCK_SEEK_NANOS,
+ PC_FIND_TABLE_NANOS,
+ PC_BLOOM_MEMTABLE_HIT_COUNT,
+ PC_BLOOM_MEMTABLE_MISS_COUNT,
+ PC_BLOOM_SST_HIT_COUNT,
+ PC_BLOOM_SST_MISS_COUNT,
+ PC_KEY_LOCK_WAIT_TIME,
+ PC_KEY_LOCK_WAIT_COUNT,
+ PC_IO_THREAD_POOL_ID,
+ PC_IO_BYTES_WRITTEN,
+ PC_IO_BYTES_READ,
+ PC_IO_OPEN_NANOS,
+ PC_IO_ALLOCATE_NANOS,
+ PC_IO_WRITE_NANOS,
+ PC_IO_READ_NANOS,
+ PC_IO_RANGE_SYNC_NANOS,
+ PC_IO_LOGGER_NANOS,
+ PC_MAX_IDX
+};
+
+class Rdb_perf_counters;
+
+/*
+ A collection of performance counters that can be safely incremented by
+ multiple threads since it stores atomic datapoints.
+*/
+struct Rdb_atomic_perf_counters {
+ std::atomic_ullong m_value[PC_MAX_IDX];
+};
+
+/*
+ A collection of performance counters that is meant to be incremented by
+ a single thread.
+*/
+class Rdb_perf_counters {
+ Rdb_perf_counters(const Rdb_perf_counters &) = delete;
+ Rdb_perf_counters &operator=(const Rdb_perf_counters &) = delete;
+
+ public:
+ Rdb_perf_counters() = default;
+ uint64_t m_value[PC_MAX_IDX];
+
+ void load(const Rdb_atomic_perf_counters &atomic_counters);
+};
+
+extern std::string rdb_pc_stat_types[PC_MAX_IDX];
+
+/*
+ Perf timers for data reads
+ */
+class Rdb_io_perf {
+ // Context management
+ Rdb_atomic_perf_counters *m_atomic_counters = nullptr;
+ my_io_perf_atomic_t *m_shared_io_perf_read = nullptr;
+ my_io_perf_atomic_t *m_shared_io_perf_write = nullptr;
+ ha_statistics *m_stats = nullptr;
+
+ uint64_t io_write_bytes;
+ uint64_t io_write_requests;
+
+ public:
+ Rdb_io_perf(const Rdb_io_perf &) = delete;
+ Rdb_io_perf &operator=(const Rdb_io_perf &) = delete;
+
+ void init(Rdb_atomic_perf_counters *const atomic_counters,
+ my_io_perf_atomic_t *const shared_io_perf_read,
+ my_io_perf_atomic_t *const shared_io_perf_write,
+ ha_statistics *const stats) {
+ DBUG_ASSERT(atomic_counters != nullptr);
+ DBUG_ASSERT(shared_io_perf_read != nullptr);
+ DBUG_ASSERT(shared_io_perf_write != nullptr);
+ DBUG_ASSERT(stats != nullptr);
+
+ m_atomic_counters = atomic_counters;
+ m_shared_io_perf_read = shared_io_perf_read;
+ m_shared_io_perf_write = shared_io_perf_write;
+ m_stats = stats;
+
+ io_write_bytes = 0;
+ io_write_requests = 0;
+ }
+
+ bool start(const uint32_t perf_context_level);
+ void update_bytes_written(const uint32_t perf_context_level,
+ ulonglong bytes_written);
+ void end_and_record(const uint32_t perf_context_level);
+
+ explicit Rdb_io_perf()
+ : m_atomic_counters(nullptr),
+ m_shared_io_perf_read(nullptr),
+ m_stats(nullptr),
+ io_write_bytes(0),
+ io_write_requests(0) {}
+};
+
+} // namespace myrocks
diff --git a/storage/rocksdb/rdb_psi.cc b/storage/rocksdb/rdb_psi.cc
new file mode 100644
index 00000000000..77003b1bb48
--- /dev/null
+++ b/storage/rocksdb/rdb_psi.cc
@@ -0,0 +1,115 @@
+/* Copyright (c) 2017, Percona and/or its affiliates. All rights reserved.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
+
+#ifdef USE_PRAGMA_IMPLEMENTATION
+#pragma implementation // gcc: Class implementation
+#endif
+
+#define MYSQL_SERVER 1
+
+/* The C++ file's header */
+#include "./rdb_psi.h"
+
+namespace myrocks {
+
+/*
+ The following is needed as an argument for mysql_stage_register,
+ irrespectively of whether we're compiling with P_S or not.
+*/
+my_core::PSI_stage_info stage_waiting_on_row_lock = {0, "Waiting for row lock",
+ 0};
+
+#ifdef HAVE_PSI_INTERFACE
+my_core::PSI_stage_info *all_rocksdb_stages[] = {&stage_waiting_on_row_lock};
+
+my_core::PSI_thread_key rdb_background_psi_thread_key,
+ rdb_drop_idx_psi_thread_key, rdb_mc_psi_thread_key;
+
+my_core::PSI_thread_info all_rocksdb_threads[] = {
+ {&rdb_background_psi_thread_key, "background", PSI_FLAG_GLOBAL},
+ {&rdb_drop_idx_psi_thread_key, "drop index", PSI_FLAG_GLOBAL},
+ {&rdb_mc_psi_thread_key, "manual compaction", PSI_FLAG_GLOBAL},
+};
+
+my_core::PSI_mutex_key rdb_psi_open_tbls_mutex_key, rdb_signal_bg_psi_mutex_key,
+ rdb_signal_drop_idx_psi_mutex_key, rdb_signal_mc_psi_mutex_key,
+ rdb_collation_data_mutex_key, rdb_mem_cmp_space_mutex_key,
+ key_mutex_tx_list, rdb_sysvars_psi_mutex_key, rdb_cfm_mutex_key,
+ rdb_sst_commit_key, rdb_block_cache_resize_mutex_key;
+
+my_core::PSI_mutex_info all_rocksdb_mutexes[] = {
+ {&rdb_psi_open_tbls_mutex_key, "open tables", PSI_FLAG_GLOBAL},
+ {&rdb_signal_bg_psi_mutex_key, "stop background", PSI_FLAG_GLOBAL},
+ {&rdb_signal_drop_idx_psi_mutex_key, "signal drop index", PSI_FLAG_GLOBAL},
+ {&rdb_signal_mc_psi_mutex_key, "signal manual compaction", PSI_FLAG_GLOBAL},
+ {&rdb_collation_data_mutex_key, "collation data init", PSI_FLAG_GLOBAL},
+ {&rdb_mem_cmp_space_mutex_key, "collation space char data init",
+ PSI_FLAG_GLOBAL},
+ {&key_mutex_tx_list, "tx_list", PSI_FLAG_GLOBAL},
+ {&rdb_sysvars_psi_mutex_key, "setting sysvar", PSI_FLAG_GLOBAL},
+ {&rdb_cfm_mutex_key, "column family manager", PSI_FLAG_GLOBAL},
+ {&rdb_sst_commit_key, "sst commit", PSI_FLAG_GLOBAL},
+ {&rdb_block_cache_resize_mutex_key, "resizing block cache",
+ PSI_FLAG_GLOBAL},
+};
+
+my_core::PSI_rwlock_key key_rwlock_collation_exception_list,
+ key_rwlock_read_free_rpl_tables, key_rwlock_skip_unique_check_tables;
+
+my_core::PSI_rwlock_info all_rocksdb_rwlocks[] = {
+ {&key_rwlock_collation_exception_list, "collation_exception_list",
+ PSI_FLAG_GLOBAL},
+ {&key_rwlock_read_free_rpl_tables, "read_free_rpl_tables", PSI_FLAG_GLOBAL},
+ {&key_rwlock_skip_unique_check_tables, "skip_unique_check_tables",
+ PSI_FLAG_GLOBAL},
+};
+
+my_core::PSI_cond_key rdb_signal_bg_psi_cond_key,
+ rdb_signal_drop_idx_psi_cond_key, rdb_signal_mc_psi_cond_key;
+
+my_core::PSI_cond_info all_rocksdb_conds[] = {
+ {&rdb_signal_bg_psi_cond_key, "cond signal background", PSI_FLAG_GLOBAL},
+ {&rdb_signal_drop_idx_psi_cond_key, "cond signal drop index",
+ PSI_FLAG_GLOBAL},
+ {&rdb_signal_mc_psi_cond_key, "cond signal manual compaction",
+ PSI_FLAG_GLOBAL},
+};
+
+void init_rocksdb_psi_keys() {
+ const char *const category = "rocksdb";
+ int count;
+
+ count = array_elements(all_rocksdb_mutexes);
+ mysql_mutex_register(category, all_rocksdb_mutexes, count);
+
+ count = array_elements(all_rocksdb_rwlocks);
+ mysql_rwlock_register(category, all_rocksdb_rwlocks, count);
+
+ count = array_elements(all_rocksdb_conds);
+ // TODO(jay) Disabling PFS for conditions due to the bug
+ // https://github.com/MySQLOnRocksDB/mysql-5.6/issues/92
+ // PSI_server->register_cond(category, all_rocksdb_conds, count);
+
+ count = array_elements(all_rocksdb_stages);
+ mysql_stage_register(category, all_rocksdb_stages, count);
+
+ count = array_elements(all_rocksdb_threads);
+ mysql_thread_register(category, all_rocksdb_threads, count);
+}
+#else // HAVE_PSI_INTERFACE
+void init_rocksdb_psi_keys() {}
+#endif // HAVE_PSI_INTERFACE
+
+} // namespace myrocks
diff --git a/storage/rocksdb/rdb_psi.h b/storage/rocksdb/rdb_psi.h
new file mode 100644
index 00000000000..2703837a156
--- /dev/null
+++ b/storage/rocksdb/rdb_psi.h
@@ -0,0 +1,58 @@
+/* Copyright (c) 2017, Percona and/or its affiliates. All rights reserved.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
+#pragma once
+
+#ifndef _rdb_psi_h_
+#define _rdb_psi_h_
+
+/* MySQL header files */
+#include <my_global.h>
+#include <my_pthread.h>
+
+#include <mysql/psi/mysql_stage.h>
+
+/* MyRocks header files */
+#include "./rdb_utils.h"
+
+namespace myrocks {
+
+/*
+ The following is needed as an argument for mysql_stage_register,
+ irrespectively of whether we're compiling with P_S or not.
+*/
+extern my_core::PSI_stage_info stage_waiting_on_row_lock;
+
+#ifdef HAVE_PSI_INTERFACE
+extern my_core::PSI_thread_key rdb_background_psi_thread_key,
+ rdb_drop_idx_psi_thread_key, rdb_mc_psi_thread_key;
+
+extern my_core::PSI_mutex_key rdb_psi_open_tbls_mutex_key,
+ rdb_signal_bg_psi_mutex_key, rdb_signal_drop_idx_psi_mutex_key,
+ rdb_signal_mc_psi_mutex_key, rdb_collation_data_mutex_key,
+ rdb_mem_cmp_space_mutex_key, key_mutex_tx_list, rdb_sysvars_psi_mutex_key,
+ rdb_cfm_mutex_key, rdb_sst_commit_key, rdb_block_cache_resize_mutex_key;
+
+extern my_core::PSI_rwlock_key key_rwlock_collation_exception_list,
+ key_rwlock_read_free_rpl_tables, key_rwlock_skip_unique_check_tables;
+
+extern my_core::PSI_cond_key rdb_signal_bg_psi_cond_key,
+ rdb_signal_drop_idx_psi_cond_key, rdb_signal_mc_psi_cond_key;
+#endif // HAVE_PSI_INTERFACE
+
+void init_rocksdb_psi_keys();
+
+} // namespace myrocks
+
+#endif // _rdb_psi_h_
diff --git a/storage/rocksdb/rdb_source_revision.h.in b/storage/rocksdb/rdb_source_revision.h.in
new file mode 100644
index 00000000000..617b39c9186
--- /dev/null
+++ b/storage/rocksdb/rdb_source_revision.h.in
@@ -0,0 +1 @@
+#define ROCKSDB_GIT_HASH "@ROCKSDB_GIT_HASH@"
diff --git a/storage/rocksdb/rdb_sst_info.cc b/storage/rocksdb/rdb_sst_info.cc
new file mode 100644
index 00000000000..f7e944c2e39
--- /dev/null
+++ b/storage/rocksdb/rdb_sst_info.cc
@@ -0,0 +1,559 @@
+/*
+ Copyright (c) 2016, Facebook, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111-1301 USA */
+
+/* For PRIu64 use below: */
+#define __STDC_FORMAT_MACROS
+
+#include <my_config.h>
+
+/* This C++ file's header file */
+#include "./rdb_sst_info.h"
+
+#include <inttypes.h>
+
+/* C++ standard header files */
+#include <cstdio>
+#include <string>
+#include <utility>
+#include <vector>
+
+/* MySQL header files */
+#include <mysqld_error.h>
+#include "../sql/log.h"
+#include "./my_dir.h"
+
+/* RocksDB header files */
+#include "rocksdb/db.h"
+#include "rocksdb/options.h"
+
+/* MyRocks header files */
+#include "./ha_rocksdb.h"
+#include "./ha_rocksdb_proto.h"
+#include "./rdb_cf_options.h"
+#include "./rdb_psi.h"
+
+namespace myrocks {
+
+Rdb_sst_file_ordered::Rdb_sst_file::Rdb_sst_file(
+ rocksdb::DB *const db, rocksdb::ColumnFamilyHandle *const cf,
+ const rocksdb::DBOptions &db_options, const std::string &name,
+ const bool tracing)
+ : m_db(db),
+ m_cf(cf),
+ m_db_options(db_options),
+ m_sst_file_writer(nullptr),
+ m_name(name),
+ m_tracing(tracing),
+ m_comparator(cf->GetComparator()) {
+ DBUG_ASSERT(db != nullptr);
+ DBUG_ASSERT(cf != nullptr);
+}
+
+Rdb_sst_file_ordered::Rdb_sst_file::~Rdb_sst_file() {
+ // Make sure we clean up
+ delete m_sst_file_writer;
+ m_sst_file_writer = nullptr;
+}
+
+rocksdb::Status Rdb_sst_file_ordered::Rdb_sst_file::open() {
+ DBUG_ASSERT(m_sst_file_writer == nullptr);
+
+ rocksdb::ColumnFamilyDescriptor cf_descr;
+
+ rocksdb::Status s = m_cf->GetDescriptor(&cf_descr);
+ if (!s.ok()) {
+ return s;
+ }
+
+ // Create an sst file writer with the current options and comparator
+ const rocksdb::EnvOptions env_options(m_db_options);
+ const rocksdb::Options options(m_db_options, cf_descr.options);
+
+ m_sst_file_writer =
+ new rocksdb::SstFileWriter(env_options, options, m_comparator, m_cf, true,
+ rocksdb::Env::IOPriority::IO_TOTAL,
+ cf_descr.options.optimize_filters_for_hits);
+
+ s = m_sst_file_writer->Open(m_name);
+ if (m_tracing) {
+ // NO_LINT_DEBUG
+ sql_print_information("SST Tracing: Open(%s) returned %s", m_name.c_str(),
+ s.ok() ? "ok" : "not ok");
+ }
+
+ if (!s.ok()) {
+ delete m_sst_file_writer;
+ m_sst_file_writer = nullptr;
+ }
+
+ return s;
+}
+
+rocksdb::Status Rdb_sst_file_ordered::Rdb_sst_file::put(
+ const rocksdb::Slice &key, const rocksdb::Slice &value) {
+ DBUG_ASSERT(m_sst_file_writer != nullptr);
+
+#ifdef __GNUC__
+ // Add the specified key/value to the sst file writer
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
+#endif
+ return m_sst_file_writer->Add(key, value);
+}
+
+std::string Rdb_sst_file_ordered::Rdb_sst_file::generateKey(
+ const std::string &key) {
+ static char const hexdigit[] = {'0', '1', '2', '3', '4', '5', '6', '7',
+ '8', '9', 'A', 'B', 'C', 'D', 'E', 'F'};
+
+ std::string res;
+
+ res.reserve(key.size() * 2);
+
+ for (auto ch : key) {
+ res += hexdigit[((uint8_t)ch) >> 4];
+ res += hexdigit[((uint8_t)ch) & 0x0F];
+ }
+
+ return res;
+}
+
+// This function is run by the background thread
+rocksdb::Status Rdb_sst_file_ordered::Rdb_sst_file::commit() {
+ DBUG_ASSERT(m_sst_file_writer != nullptr);
+
+ rocksdb::Status s;
+ rocksdb::ExternalSstFileInfo fileinfo; /// Finish may should be modified
+
+ // Close out the sst file
+ s = m_sst_file_writer->Finish(&fileinfo);
+ if (m_tracing) {
+ // NO_LINT_DEBUG
+ sql_print_information("SST Tracing: Finish returned %s",
+ s.ok() ? "ok" : "not ok");
+ }
+
+ if (s.ok()) {
+ if (m_tracing) {
+ // NO_LINT_DEBUG
+ sql_print_information(
+ "SST Tracing: Adding file %s, smallest key: %s, "
+ "largest key: %s, file size: %" PRIu64
+ ", "
+ "num_entries: %" PRIu64,
+ fileinfo.file_path.c_str(),
+ generateKey(fileinfo.smallest_key).c_str(),
+ generateKey(fileinfo.largest_key).c_str(), fileinfo.file_size,
+ fileinfo.num_entries);
+ }
+ }
+
+ delete m_sst_file_writer;
+ m_sst_file_writer = nullptr;
+
+ return s;
+}
+
+void Rdb_sst_file_ordered::Rdb_sst_stack::push(const rocksdb::Slice &key,
+ const rocksdb::Slice &value) {
+ if (m_buffer == nullptr) {
+ m_buffer = new char[m_buffer_size];
+ }
+
+ // Put the actual key and value data unto our stack
+ size_t key_offset = m_offset;
+ memcpy(m_buffer + m_offset, key.data(), key.size());
+ m_offset += key.size();
+ memcpy(m_buffer + m_offset, value.data(), value.size());
+ m_offset += value.size();
+
+ // Push just the offset, the key length and the value length onto the stack
+ m_stack.push(std::make_tuple(key_offset, key.size(), value.size()));
+}
+
+std::pair<rocksdb::Slice, rocksdb::Slice>
+Rdb_sst_file_ordered::Rdb_sst_stack::top() {
+ size_t offset, key_len, value_len;
+ // Pop the next item off the internal stack
+ std::tie(offset, key_len, value_len) = m_stack.top();
+
+ // Make slices from the offset (first), key length (second), and value
+ // length (third)
+ DBUG_ASSERT(m_buffer != nullptr);
+ rocksdb::Slice key(m_buffer + offset, key_len);
+ rocksdb::Slice value(m_buffer + offset + key_len, value_len);
+
+ return std::make_pair(key, value);
+}
+
+Rdb_sst_file_ordered::Rdb_sst_file_ordered(
+ rocksdb::DB *const db, rocksdb::ColumnFamilyHandle *const cf,
+ const rocksdb::DBOptions &db_options, const std::string &name,
+ const bool tracing, size_t max_size)
+ : m_use_stack(false),
+ m_first(true),
+ m_stack(max_size),
+ m_file(db, cf, db_options, name, tracing) {
+ m_stack.reset();
+}
+
+rocksdb::Status Rdb_sst_file_ordered::apply_first() {
+ rocksdb::Slice first_key_slice(m_first_key);
+ rocksdb::Slice first_value_slice(m_first_value);
+ rocksdb::Status s;
+
+ if (m_use_stack) {
+ // Put the first key onto the stack
+ m_stack.push(first_key_slice, first_value_slice);
+ } else {
+ // Put the first key into the SST
+ s = m_file.put(first_key_slice, first_value_slice);
+ if (!s.ok()) {
+ return s;
+ }
+ }
+
+ // Clear out the 'first' strings for next key/value
+ m_first_key.clear();
+ m_first_value.clear();
+
+ return s;
+}
+
+rocksdb::Status Rdb_sst_file_ordered::put(const rocksdb::Slice &key,
+ const rocksdb::Slice &value) {
+ rocksdb::Status s;
+
+ // If this is the first key, just store a copy of the key and value
+ if (m_first) {
+ m_first_key = key.ToString();
+ m_first_value = value.ToString();
+ m_first = false;
+ return rocksdb::Status::OK();
+ }
+
+ // If the first key is not empty we must be the second key. Compare the
+ // new key with the first key to determine if the data will go straight
+ // the SST or be put on the stack to be retrieved later.
+ if (!m_first_key.empty()) {
+ rocksdb::Slice first_key_slice(m_first_key);
+ int cmp = m_file.compare(first_key_slice, key);
+ m_use_stack = (cmp > 0);
+
+ // Apply the first key to the stack or SST
+ s = apply_first();
+ if (!s.ok()) {
+ return s;
+ }
+ }
+
+ // Put this key on the stack or into the SST
+ if (m_use_stack) {
+ m_stack.push(key, value);
+ } else {
+ s = m_file.put(key, value);
+ }
+
+ return s;
+}
+
+rocksdb::Status Rdb_sst_file_ordered::commit() {
+ rocksdb::Status s;
+
+ // Make sure we get the first key if it was the only key given to us.
+ if (!m_first_key.empty()) {
+ s = apply_first();
+ if (!s.ok()) {
+ return s;
+ }
+ }
+
+ if (m_use_stack) {
+ rocksdb::Slice key;
+ rocksdb::Slice value;
+
+ // We are ready to commit, pull each entry off the stack (which reverses
+ // the original data) and send it to the SST file.
+ while (!m_stack.empty()) {
+ std::tie(key, value) = m_stack.top();
+ s = m_file.put(key, value);
+ if (!s.ok()) {
+ return s;
+ }
+
+ m_stack.pop();
+ }
+
+ // We have pulled everything off the stack, reset for the next time
+ m_stack.reset();
+ m_use_stack = false;
+ }
+
+ // reset m_first
+ m_first = true;
+
+ return m_file.commit();
+}
+
+Rdb_sst_info::Rdb_sst_info(rocksdb::DB *const db, const std::string &tablename,
+ const std::string &indexname,
+ rocksdb::ColumnFamilyHandle *const cf,
+ const rocksdb::DBOptions &db_options,
+ const bool tracing)
+ : m_db(db),
+ m_cf(cf),
+ m_db_options(db_options),
+ m_curr_size(0),
+ m_sst_count(0),
+ m_background_error(HA_EXIT_SUCCESS),
+ m_done(false),
+ m_sst_file(nullptr),
+ m_tracing(tracing),
+ m_print_client_error(true) {
+ m_prefix = db->GetName() + "/";
+
+ std::string normalized_table;
+ if (rdb_normalize_tablename(tablename.c_str(), &normalized_table)) {
+ // We failed to get a normalized table name. This should never happen,
+ // but handle it anyway.
+ m_prefix += "fallback_" +
+ std::to_string(reinterpret_cast<intptr_t>(
+ reinterpret_cast<void *>(this))) +
+ "_" + indexname + "_";
+ } else {
+ m_prefix += normalized_table + "_" + indexname + "_";
+ }
+
+ // Unique filename generated to prevent collisions when the same table
+ // is loaded in parallel
+ m_prefix += std::to_string(m_prefix_counter.fetch_add(1)) + "_";
+
+ rocksdb::ColumnFamilyDescriptor cf_descr;
+ const rocksdb::Status s = m_cf->GetDescriptor(&cf_descr);
+ if (!s.ok()) {
+ // Default size if we can't get the cf's target size
+ m_max_size = 64 * 1024 * 1024;
+ } else {
+ // Set the maximum size to 3 times the cf's target size
+ m_max_size = cf_descr.options.target_file_size_base * 3;
+ }
+ mysql_mutex_init(rdb_sst_commit_key, &m_commit_mutex, MY_MUTEX_INIT_FAST);
+}
+
+Rdb_sst_info::~Rdb_sst_info() {
+ DBUG_ASSERT(m_sst_file == nullptr);
+
+ for (auto sst_file : m_committed_files) {
+ // In case something went wrong attempt to delete the temporary file.
+ // If everything went fine that file will have been renamed and this
+ // function call will fail.
+ std::remove(sst_file.c_str());
+ }
+ m_committed_files.clear();
+
+ mysql_mutex_destroy(&m_commit_mutex);
+}
+
+int Rdb_sst_info::open_new_sst_file() {
+ DBUG_ASSERT(m_sst_file == nullptr);
+
+ // Create the new sst file's name
+ const std::string name = m_prefix + std::to_string(m_sst_count++) + m_suffix;
+
+ // Create the new sst file object
+ m_sst_file = new Rdb_sst_file_ordered(m_db, m_cf, m_db_options, name,
+ m_tracing, m_max_size);
+
+ // Open the sst file
+ const rocksdb::Status s = m_sst_file->open();
+ if (!s.ok()) {
+ set_error_msg(m_sst_file->get_name(), s);
+ delete m_sst_file;
+ m_sst_file = nullptr;
+ return HA_ERR_ROCKSDB_BULK_LOAD;
+ }
+
+ m_curr_size = 0;
+
+ return HA_EXIT_SUCCESS;
+}
+
+void Rdb_sst_info::commit_sst_file(Rdb_sst_file_ordered *sst_file) {
+ const rocksdb::Status s = sst_file->commit();
+ if (!s.ok()) {
+ set_error_msg(sst_file->get_name(), s);
+ set_background_error(HA_ERR_ROCKSDB_BULK_LOAD);
+ }
+
+ m_committed_files.push_back(sst_file->get_name());
+
+ delete sst_file;
+}
+
+void Rdb_sst_info::close_curr_sst_file() {
+ DBUG_ASSERT(m_sst_file != nullptr);
+ DBUG_ASSERT(m_curr_size > 0);
+
+ commit_sst_file(m_sst_file);
+
+ // Reset for next sst file
+ m_sst_file = nullptr;
+ m_curr_size = 0;
+}
+
+int Rdb_sst_info::put(const rocksdb::Slice &key, const rocksdb::Slice &value) {
+ int rc;
+
+ DBUG_ASSERT(!m_done);
+
+ if (m_curr_size + key.size() + value.size() >= m_max_size) {
+ // The current sst file has reached its maximum, close it out
+ close_curr_sst_file();
+
+ // While we are here, check to see if we have had any errors from the
+ // background thread - we don't want to wait for the end to report them
+ if (have_background_error()) {
+ return get_and_reset_background_error();
+ }
+ }
+
+ if (m_curr_size == 0) {
+ // We don't have an sst file open - open one
+ rc = open_new_sst_file();
+ if (rc != 0) {
+ return rc;
+ }
+ }
+
+ DBUG_ASSERT(m_sst_file != nullptr);
+
+ // Add the key/value to the current sst file
+ const rocksdb::Status s = m_sst_file->put(key, value);
+ if (!s.ok()) {
+ set_error_msg(m_sst_file->get_name(), s);
+ return HA_ERR_ROCKSDB_BULK_LOAD;
+ }
+
+ m_curr_size += key.size() + value.size();
+
+ return HA_EXIT_SUCCESS;
+}
+
+/*
+ Finish the current work and return the list of SST files ready to be
+ ingested. This function need to be idempotent and atomic
+ */
+int Rdb_sst_info::finish(Rdb_sst_commit_info *commit_info,
+ bool print_client_error) {
+ int ret = HA_EXIT_SUCCESS;
+
+ // Both the transaction clean up and the ha_rocksdb handler have
+ // references to this Rdb_sst_info and both can call commit, so
+ // synchronize on the object here.
+ // This also means in such case the bulk loading operation stop being truly
+ // atomic, and we should consider fixing this in the future
+ RDB_MUTEX_LOCK_CHECK(m_commit_mutex);
+
+ if (is_done()) {
+ RDB_MUTEX_UNLOCK_CHECK(m_commit_mutex);
+ return ret;
+ }
+
+ m_print_client_error = print_client_error;
+
+ if (m_curr_size > 0) {
+ // Close out any existing files
+ close_curr_sst_file();
+ }
+
+ // This checks out the list of files so that the caller can collect/group
+ // them and ingest them all in one go, and any racing calls to commit
+ // won't see them at all
+ commit_info->init(m_cf, std::move(m_committed_files));
+ DBUG_ASSERT(m_committed_files.size() == 0);
+
+ m_done = true;
+ RDB_MUTEX_UNLOCK_CHECK(m_commit_mutex);
+
+ // Did we get any errors?
+ if (have_background_error()) {
+ ret = get_and_reset_background_error();
+ }
+
+ m_print_client_error = true;
+ return ret;
+}
+
+void Rdb_sst_info::set_error_msg(const std::string &sst_file_name,
+ const rocksdb::Status &s) {
+ if (!m_print_client_error) return;
+
+ report_error_msg(s, sst_file_name.c_str());
+}
+
+void Rdb_sst_info::report_error_msg(const rocksdb::Status &s,
+ const char *sst_file_name) {
+ if (s.IsInvalidArgument() &&
+ strcmp(s.getState(), "Keys must be added in strict ascending order.") == 0) {
+ my_printf_error(ER_KEYS_OUT_OF_ORDER,
+ "Rows must be inserted in primary key order "
+ "during bulk load operation",
+ MYF(0));
+ } else if (s.IsInvalidArgument() &&
+ strcmp(s.getState(), "Global seqno is required, but disabled") ==
+ 0) {
+ my_printf_error(ER_OVERLAPPING_KEYS,
+ "Rows inserted during bulk load "
+ "must not overlap existing rows",
+ MYF(0));
+ } else {
+ my_printf_error(ER_UNKNOWN_ERROR, "[%s] bulk load error: %s", MYF(0),
+ sst_file_name, s.ToString().c_str());
+ }
+}
+
+void Rdb_sst_info::init(const rocksdb::DB *const db) {
+ const std::string path = db->GetName() + FN_DIRSEP;
+ struct st_my_dir *const dir_info = my_dir(path.c_str(), MYF(MY_DONT_SORT));
+
+ // Access the directory
+ if (dir_info == nullptr) {
+ // NO_LINT_DEBUG
+ sql_print_warning("RocksDB: Could not access database directory: %s",
+ path.c_str());
+ return;
+ }
+
+ // Scan through the files in the directory
+ const struct fileinfo *file_info = dir_info->dir_entry;
+ for (uint ii= 0; ii < dir_info->number_of_files; ii++, file_info++) {
+ // find any files ending with m_suffix ...
+ const std::string name = file_info->name;
+ const size_t pos = name.find(m_suffix);
+ if (pos != std::string::npos && name.size() - pos == m_suffix.size()) {
+ // ... and remove them
+ const std::string fullname = path + name;
+ my_delete(fullname.c_str(), MYF(0));
+ }
+ }
+
+ // Release the directory entry
+ my_dirend(dir_info);
+}
+
+std::atomic<uint64_t> Rdb_sst_info::m_prefix_counter(0);
+std::string Rdb_sst_info::m_suffix = ".bulk_load.tmp";
+} // namespace myrocks
diff --git a/storage/rocksdb/rdb_sst_info.h b/storage/rocksdb/rdb_sst_info.h
new file mode 100644
index 00000000000..66da3b7c1e7
--- /dev/null
+++ b/storage/rocksdb/rdb_sst_info.h
@@ -0,0 +1,265 @@
+/*
+ Copyright (c) 2016, Facebook, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111-1301 USA */
+
+#pragma once
+
+/* C++ standard header files */
+#include <atomic>
+#include <condition_variable>
+#include <mutex>
+#include <queue>
+#include <stack>
+#include <string>
+#include <thread>
+#include <utility>
+#include <vector>
+
+/* RocksDB header files */
+#include "rocksdb/db.h"
+#include "rocksdb/sst_file_writer.h"
+
+/* MyRocks header files */
+#include "./rdb_utils.h"
+
+namespace myrocks {
+
+class Rdb_sst_file_ordered {
+ private:
+ class Rdb_sst_file {
+ private:
+ Rdb_sst_file(const Rdb_sst_file &p) = delete;
+ Rdb_sst_file &operator=(const Rdb_sst_file &p) = delete;
+
+ rocksdb::DB *const m_db;
+ rocksdb::ColumnFamilyHandle *const m_cf;
+ const rocksdb::DBOptions &m_db_options;
+ rocksdb::SstFileWriter *m_sst_file_writer;
+ const std::string m_name;
+ const bool m_tracing;
+ const rocksdb::Comparator *m_comparator;
+
+ std::string generateKey(const std::string &key);
+
+ public:
+ Rdb_sst_file(rocksdb::DB *const db, rocksdb::ColumnFamilyHandle *const cf,
+ const rocksdb::DBOptions &db_options, const std::string &name,
+ const bool tracing);
+ ~Rdb_sst_file();
+
+ rocksdb::Status open();
+ rocksdb::Status put(const rocksdb::Slice &key, const rocksdb::Slice &value);
+ rocksdb::Status commit();
+
+ inline const std::string get_name() const { return m_name; }
+ inline int compare(rocksdb::Slice key1, rocksdb::Slice key2) {
+ return m_comparator->Compare(key1, key2);
+ }
+ };
+
+ class Rdb_sst_stack {
+ private:
+ char *m_buffer;
+ size_t m_buffer_size;
+ size_t m_offset;
+ std::stack<std::tuple<size_t, size_t, size_t>> m_stack;
+
+ public:
+ explicit Rdb_sst_stack(size_t max_size)
+ : m_buffer(nullptr), m_buffer_size(max_size) {}
+ ~Rdb_sst_stack() { delete[] m_buffer; }
+
+ void reset() { m_offset = 0; }
+ bool empty() { return m_stack.empty(); }
+ void push(const rocksdb::Slice &key, const rocksdb::Slice &value);
+ std::pair<rocksdb::Slice, rocksdb::Slice> top();
+ void pop() { m_stack.pop(); }
+ size_t size() { return m_stack.size(); }
+ };
+
+ bool m_use_stack;
+ bool m_first;
+ std::string m_first_key;
+ std::string m_first_value;
+ Rdb_sst_stack m_stack;
+ Rdb_sst_file m_file;
+
+ rocksdb::Status apply_first();
+
+ public:
+ Rdb_sst_file_ordered(rocksdb::DB *const db,
+ rocksdb::ColumnFamilyHandle *const cf,
+ const rocksdb::DBOptions &db_options,
+ const std::string &name, const bool tracing,
+ size_t max_size);
+
+ inline rocksdb::Status open() { return m_file.open(); }
+ rocksdb::Status put(const rocksdb::Slice &key, const rocksdb::Slice &value);
+ rocksdb::Status commit();
+ inline const std::string get_name() const { return m_file.get_name(); }
+};
+
+class Rdb_sst_info {
+ private:
+ Rdb_sst_info(const Rdb_sst_info &p) = delete;
+ Rdb_sst_info &operator=(const Rdb_sst_info &p) = delete;
+
+ rocksdb::DB *const m_db;
+ rocksdb::ColumnFamilyHandle *const m_cf;
+ const rocksdb::DBOptions &m_db_options;
+ uint64_t m_curr_size;
+ uint64_t m_max_size;
+ uint32_t m_sst_count;
+ std::atomic<int> m_background_error;
+ bool m_done;
+ std::string m_prefix;
+ static std::atomic<uint64_t> m_prefix_counter;
+ static std::string m_suffix;
+ mysql_mutex_t m_commit_mutex;
+ Rdb_sst_file_ordered *m_sst_file;
+
+ // List of committed SST files - we'll ingest them later in one single batch
+ std::vector<std::string> m_committed_files;
+
+ const bool m_tracing;
+ bool m_print_client_error;
+
+ int open_new_sst_file();
+ void close_curr_sst_file();
+ void commit_sst_file(Rdb_sst_file_ordered *sst_file);
+
+ void set_error_msg(const std::string &sst_file_name,
+ const rocksdb::Status &s);
+
+ public:
+ Rdb_sst_info(rocksdb::DB *const db, const std::string &tablename,
+ const std::string &indexname,
+ rocksdb::ColumnFamilyHandle *const cf,
+ const rocksdb::DBOptions &db_options, const bool tracing);
+ ~Rdb_sst_info();
+
+ /*
+ This is the unit of work returned from Rdb_sst_info::finish and represents
+ a group of SST to be ingested atomically with other Rdb_sst_commit_info.
+ This is always local to the bulk loading complete operation so no locking
+ is required
+ */
+ class Rdb_sst_commit_info {
+ public:
+ Rdb_sst_commit_info() : m_committed(true), m_cf(nullptr) {}
+
+ Rdb_sst_commit_info(Rdb_sst_commit_info &&rhs) noexcept
+ : m_committed(rhs.m_committed),
+ m_cf(rhs.m_cf),
+ m_committed_files(std::move(rhs.m_committed_files)) {
+ rhs.m_committed = true;
+ rhs.m_cf = nullptr;
+ }
+
+ Rdb_sst_commit_info &operator=(Rdb_sst_commit_info &&rhs) noexcept {
+ reset();
+
+ m_cf = rhs.m_cf;
+ m_committed_files = std::move(rhs.m_committed_files);
+ m_committed = rhs.m_committed;
+
+ rhs.m_committed = true;
+ rhs.m_cf = nullptr;
+
+ return *this;
+ }
+
+ Rdb_sst_commit_info(const Rdb_sst_commit_info &) = delete;
+ Rdb_sst_commit_info &operator=(const Rdb_sst_commit_info &) = delete;
+
+ ~Rdb_sst_commit_info() { reset(); }
+
+ void reset() {
+ if (!m_committed) {
+ for (auto sst_file : m_committed_files) {
+ // In case something went wrong attempt to delete the temporary file.
+ // If everything went fine that file will have been renamed and this
+ // function call will fail.
+ std::remove(sst_file.c_str());
+ }
+ }
+ m_committed_files.clear();
+ m_cf = nullptr;
+ m_committed = true;
+ }
+
+ bool has_work() const {
+ return m_cf != nullptr && m_committed_files.size() > 0;
+ }
+
+ void init(rocksdb::ColumnFamilyHandle *cf,
+ std::vector<std::string> &&files) {
+ DBUG_ASSERT(m_cf == nullptr && m_committed_files.size() == 0 &&
+ m_committed);
+ m_cf = cf;
+ m_committed_files = std::move(files);
+ m_committed = false;
+ }
+
+ rocksdb::ColumnFamilyHandle *get_cf() const { return m_cf; }
+
+ const std::vector<std::string> &get_committed_files() const {
+ return m_committed_files;
+ }
+
+ void commit() { m_committed = true; }
+
+ private:
+ bool m_committed;
+ rocksdb::ColumnFamilyHandle *m_cf;
+ std::vector<std::string> m_committed_files;
+ };
+
+ int put(const rocksdb::Slice &key, const rocksdb::Slice &value);
+ int finish(Rdb_sst_commit_info *commit_info, bool print_client_error = true);
+
+ bool is_done() const { return m_done; }
+
+ bool have_background_error() { return m_background_error != 0; }
+
+ int get_and_reset_background_error() {
+ int ret = m_background_error;
+ while (!m_background_error.compare_exchange_weak(ret, HA_EXIT_SUCCESS)) {
+ // Do nothing
+ }
+
+ return ret;
+ }
+
+ void set_background_error(int code) {
+ int expected = HA_EXIT_SUCCESS;
+ // Only assign 'code' into the error if it is already 0, otherwise ignore it
+ m_background_error.compare_exchange_strong(expected, code);
+ }
+
+ /** Return the list of committed files later to be ingested **/
+ const std::vector<std::string> &get_committed_files() {
+ return m_committed_files;
+ }
+
+ rocksdb::ColumnFamilyHandle *get_cf() const { return m_cf; }
+
+ static void init(const rocksdb::DB *const db);
+
+ static void report_error_msg(const rocksdb::Status &s,
+ const char *sst_file_name);
+};
+
+} // namespace myrocks
diff --git a/storage/rocksdb/rdb_threads.cc b/storage/rocksdb/rdb_threads.cc
new file mode 100644
index 00000000000..6f2377faff3
--- /dev/null
+++ b/storage/rocksdb/rdb_threads.cc
@@ -0,0 +1,83 @@
+/*
+ Portions Copyright (c) 2015-Present, Facebook, Inc.
+ Portions Copyright (c) 2012, Monty Program Ab
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111-1301 USA */
+
+#ifdef USE_PRAGMA_IMPLEMENTATION
+#pragma implementation // gcc: Class implementation
+#endif
+
+#include <my_config.h>
+
+/* The C++ file's header */
+#include "./rdb_threads.h"
+
+namespace myrocks {
+
+void *Rdb_thread::thread_func(void *const thread_ptr) {
+ DBUG_ASSERT(thread_ptr != nullptr);
+ Rdb_thread *const thread = static_cast<Rdb_thread *>(thread_ptr);
+ if (!thread->m_run_once.exchange(true)) {
+ thread->setname();
+ thread->run();
+ thread->uninit();
+ }
+ return nullptr;
+}
+
+void Rdb_thread::init(
+#ifdef HAVE_PSI_INTERFACE
+ my_core::PSI_mutex_key stop_bg_psi_mutex_key,
+ my_core::PSI_cond_key stop_bg_psi_cond_key
+#endif
+) {
+ DBUG_ASSERT(!m_run_once);
+ mysql_mutex_init(stop_bg_psi_mutex_key, &m_signal_mutex, MY_MUTEX_INIT_FAST);
+ mysql_cond_init(stop_bg_psi_cond_key, &m_signal_cond, nullptr);
+}
+
+void Rdb_thread::uninit() {
+ mysql_mutex_destroy(&m_signal_mutex);
+ mysql_cond_destroy(&m_signal_cond);
+}
+
+int Rdb_thread::create_thread(const std::string &thread_name
+#ifdef HAVE_PSI_INTERFACE
+ ,
+ PSI_thread_key background_psi_thread_key
+#endif
+) {
+ // Make a copy of the name so we can return without worrying that the
+ // caller will free the memory
+ m_name = thread_name;
+
+ return mysql_thread_create(background_psi_thread_key, &m_handle, nullptr,
+ thread_func, this);
+
+}
+
+void Rdb_thread::signal(const bool stop_thread) {
+ RDB_MUTEX_LOCK_CHECK(m_signal_mutex);
+
+ if (stop_thread) {
+ m_stop = true;
+ }
+
+ mysql_cond_signal(&m_signal_cond);
+
+ RDB_MUTEX_UNLOCK_CHECK(m_signal_mutex);
+}
+
+} // namespace myrocks
diff --git a/storage/rocksdb/rdb_threads.h b/storage/rocksdb/rdb_threads.h
new file mode 100644
index 00000000000..7d89fe0616b
--- /dev/null
+++ b/storage/rocksdb/rdb_threads.h
@@ -0,0 +1,195 @@
+/*
+ Portions Copyright (c) 2015-Present, Facebook, Inc.
+ Portions Copyright (c) 2012, Monty Program Ab
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111-1301 USA */
+#pragma once
+
+/* C++ standard header files */
+#include <map>
+#include <string>
+
+/* MySQL includes */
+#include "./my_global.h"
+#ifdef _WIN32
+#include <my_pthread.h>
+/*
+ Rocksdb implements their own pthread_key functions
+ undefine some my_pthread.h macros
+*/
+#undef pthread_key_create
+#undef pthread_key_delete
+#undef pthread_setspecific
+#undef pthread_getspecific
+#endif
+#include <mysql/psi/mysql_table.h>
+// #include <mysql/thread_pool_priv.h>
+
+/* MyRocks header files */
+#include "./rdb_utils.h"
+#include "rocksdb/db.h"
+
+namespace myrocks {
+
+class Rdb_thread {
+ private:
+ // Disable Copying
+ Rdb_thread(const Rdb_thread &);
+ Rdb_thread &operator=(const Rdb_thread &);
+
+ // Make sure we run only once
+ std::atomic_bool m_run_once;
+
+ pthread_t m_handle;
+
+ std::string m_name;
+
+ protected:
+ mysql_mutex_t m_signal_mutex;
+ mysql_cond_t m_signal_cond;
+ bool m_stop = false;
+
+ public:
+ Rdb_thread() : m_run_once(false) {}
+
+#ifdef HAVE_PSI_INTERFACE
+ void init(my_core::PSI_mutex_key stop_bg_psi_mutex_key,
+ my_core::PSI_cond_key stop_bg_psi_cond_key);
+ int create_thread(const std::string &thread_name,
+ my_core::PSI_thread_key background_psi_thread_key);
+#else
+ void init();
+ int create_thread(const std::string &thread_name);
+#endif
+
+ virtual void run(void) = 0;
+
+ void signal(const bool stop_thread = false);
+
+ int join()
+ {
+#ifndef _WIN32
+ return pthread_join(m_handle, nullptr);
+#else
+ /*
+ mysys on Windows creates "detached" threads in pthread_create().
+
+ m_handle here is the thread id I(it is not reused by the OS
+ thus it is safe to state there can't be other thread with
+ the same id at this point).
+
+ If thread is already finished before pthread_join(),
+ we get EINVAL, and it is safe to ignore and handle this as success.
+ */
+ pthread_join(m_handle, nullptr);
+ return 0;
+#endif
+ }
+
+ void setname() {
+ /*
+ mysql_thread_create() ends up doing some work underneath and setting the
+ thread name as "my-func". This isn't what we want. Our intent is to name
+ the threads according to their purpose so that when displayed under the
+ debugger then they'll be more easily identifiable. Therefore we'll reset
+ the name if thread was successfully created.
+ */
+
+ /*
+ We originally had the creator also set the thread name, but that seems to
+ not work correctly in all situations. Having the created thread do the
+ pthread_setname_np resolves the issue.
+ */
+ DBUG_ASSERT(!m_name.empty());
+#ifdef __linux__
+ int err = pthread_setname_np(m_handle, m_name.c_str());
+ if (err) {
+ // NO_LINT_DEBUG
+ sql_print_warning(
+ "MyRocks: Failed to set name (%s) for current thread, errno=%d,%d",
+ m_name.c_str(), errno, err);
+ }
+#endif
+ }
+
+ void uninit();
+
+ virtual ~Rdb_thread() {}
+
+ private:
+ static void *thread_func(void *const thread_ptr);
+};
+
+/**
+ MyRocks background thread control
+ N.B. This is on top of RocksDB's own background threads
+ (@see rocksdb::CancelAllBackgroundWork())
+*/
+
+class Rdb_background_thread : public Rdb_thread {
+ private:
+ bool m_save_stats = false;
+
+ void reset() {
+ mysql_mutex_assert_owner(&m_signal_mutex);
+ m_stop = false;
+ m_save_stats = false;
+ }
+
+ public:
+ virtual void run() override;
+
+ void request_save_stats() {
+ RDB_MUTEX_LOCK_CHECK(m_signal_mutex);
+
+ m_save_stats = true;
+
+ RDB_MUTEX_UNLOCK_CHECK(m_signal_mutex);
+ }
+};
+
+class Rdb_manual_compaction_thread : public Rdb_thread {
+ private:
+ struct Manual_compaction_request {
+ int mc_id;
+ enum mc_state { INITED = 0, RUNNING } state;
+ rocksdb::ColumnFamilyHandle *cf;
+ rocksdb::Slice *start;
+ rocksdb::Slice *limit;
+ int concurrency = 0;
+ };
+
+ int m_latest_mc_id;
+ mysql_mutex_t m_mc_mutex;
+ std::map<int, Manual_compaction_request> m_requests;
+
+ public:
+ virtual void run() override;
+ int request_manual_compaction(rocksdb::ColumnFamilyHandle *cf,
+ rocksdb::Slice *start, rocksdb::Slice *limit,
+ int concurrency = 0);
+ bool is_manual_compaction_finished(int mc_id);
+ void clear_manual_compaction_request(int mc_id, bool init_only = false);
+ void clear_all_manual_compaction_requests();
+};
+
+/*
+ Drop index thread control
+*/
+
+struct Rdb_drop_index_thread : public Rdb_thread {
+ virtual void run() override;
+};
+
+} // namespace myrocks
diff --git a/storage/rocksdb/rdb_utils.cc b/storage/rocksdb/rdb_utils.cc
new file mode 100644
index 00000000000..85eed64775c
--- /dev/null
+++ b/storage/rocksdb/rdb_utils.cc
@@ -0,0 +1,369 @@
+/*
+ Copyright (c) 2016, Facebook, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111-1301 USA */
+
+#include <my_config.h>
+
+/* This C++ file's header */
+#include "./rdb_utils.h"
+
+/* C++ standard header files */
+#include <array>
+#include <sstream>
+#include <string>
+#include <vector>
+
+/* C standard header files */
+#include <ctype.h>
+
+/* MyRocks header files */
+#include "./ha_rocksdb.h"
+
+/*
+ Both innobase/include/ut0counter.h and rocksdb/port/port_posix.h define
+ CACHE_LINE_SIZE.
+*/
+#ifdef CACHE_LINE_SIZE
+# undef CACHE_LINE_SIZE
+#endif
+
+/* RocksDB header files */
+#include "util/compression.h"
+
+namespace myrocks {
+
+/*
+ Skip past any spaces in the input
+*/
+const char *rdb_skip_spaces(const struct charset_info_st *const cs,
+ const char *str) {
+ while (my_isspace(cs, *str)) {
+ str++;
+ }
+
+ return str;
+}
+
+/*
+ Compare (ignoring case) to see if str2 is the next data in str1.
+ Note that str1 can be longer but we only compare up to the number
+ of characters in str2.
+*/
+bool rdb_compare_strings_ic(const char *const str1, const char *const str2) {
+ // Scan through the strings
+ size_t ii;
+ for (ii = 0; str2[ii]; ii++) {
+ if (toupper(static_cast<int>(str1[ii])) !=
+ toupper(static_cast<int>(str2[ii]))) {
+ return false;
+ }
+ }
+
+ return true;
+}
+
+/*
+ Scan through an input string looking for pattern, ignoring case
+ and skipping all data enclosed in quotes.
+*/
+const char *rdb_find_in_string(const char *str, const char *pattern,
+ bool *const succeeded) {
+ char quote = '\0';
+ bool escape = false;
+
+ *succeeded = false;
+
+ for (; *str; str++) {
+ /* If we found a our starting quote character */
+ if (*str == quote) {
+ /* If it was escaped ignore it */
+ if (escape) {
+ escape = false;
+ }
+ /* Otherwise we are now outside of the quoted string */
+ else {
+ quote = '\0';
+ }
+ }
+ /* Else if we are currently inside a quoted string? */
+ else if (quote != '\0') {
+ /* If so, check for the escape character */
+ escape = !escape && *str == '\\';
+ }
+ /* Else if we found a quote we are starting a quoted string */
+ else if (*str == '"' || *str == '\'' || *str == '`') {
+ quote = *str;
+ }
+ /* Else we are outside of a quoted string - look for our pattern */
+ else {
+ if (rdb_compare_strings_ic(str, pattern)) {
+ *succeeded = true;
+ return str;
+ }
+ }
+ }
+
+ // Return the character after the found pattern or the null terminateor
+ // if the pattern wasn't found.
+ return str;
+}
+
+/*
+ See if the next valid token matches the specified string
+*/
+const char *rdb_check_next_token(const struct charset_info_st *const cs,
+ const char *str, const char *const pattern,
+ bool *const succeeded) {
+ // Move past any spaces
+ str = rdb_skip_spaces(cs, str);
+
+ // See if the next characters match the pattern
+ if (rdb_compare_strings_ic(str, pattern)) {
+ *succeeded = true;
+ return str + strlen(pattern);
+ }
+
+ *succeeded = false;
+ return str;
+}
+
+/*
+ Parse id
+*/
+const char *rdb_parse_id(const struct charset_info_st *const cs,
+ const char *str, std::string *const id) {
+ // Move past any spaces
+ str = rdb_skip_spaces(cs, str);
+
+ if (*str == '\0') {
+ return str;
+ }
+
+ char quote = '\0';
+ if (*str == '`' || *str == '"') {
+ quote = *str++;
+ }
+
+ size_t len = 0;
+ const char *start = str;
+
+ if (quote != '\0') {
+ for (;;) {
+ if (*str == '\0') {
+ return str;
+ }
+
+ if (*str == quote) {
+ str++;
+ if (*str != quote) {
+ break;
+ }
+ }
+
+ str++;
+ len++;
+ }
+ } else {
+ while (!my_isspace(cs, *str) && *str != '(' && *str != ')' && *str != '.' &&
+ *str != ',' && *str != '\0') {
+ str++;
+ len++;
+ }
+ }
+
+ // If the user requested the id create it and return it
+ if (id != nullptr) {
+ *id = std::string("");
+ id->reserve(len);
+ while (len--) {
+ *id += *start;
+ if (*start++ == quote) {
+ start++;
+ }
+ }
+ }
+
+ return str;
+}
+
+/*
+ Skip id
+*/
+const char *rdb_skip_id(const struct charset_info_st *const cs,
+ const char *str) {
+ return rdb_parse_id(cs, str, nullptr);
+}
+
+/*
+ Parses a given string into tokens (if any) separated by a specific delimiter.
+*/
+const std::vector<std::string> parse_into_tokens(const std::string &s,
+ const char delim) {
+ std::vector<std::string> tokens;
+ std::string t;
+ std::stringstream ss(s);
+
+ while (getline(ss, t, delim)) {
+ tokens.push_back(t);
+ }
+
+ return tokens;
+}
+
+static const std::size_t rdb_hex_bytes_per_char = 2;
+static const std::array<char, 16> rdb_hexdigit = {{'0', '1', '2', '3', '4', '5',
+ '6', '7', '8', '9', 'a', 'b',
+ 'c', 'd', 'e', 'f'}};
+
+/*
+ Convert data into a hex string with optional maximum length.
+ If the data is larger than the maximum length trancate it and append "..".
+*/
+std::string rdb_hexdump(const char *data, const std::size_t data_len,
+ const std::size_t maxsize) {
+ // Count the elements in the string
+ std::size_t elems = data_len;
+ // Calculate the amount of output needed
+ std::size_t len = elems * rdb_hex_bytes_per_char;
+ std::string str;
+
+ if (maxsize != 0 && len > maxsize) {
+ // If the amount of output is too large adjust the settings
+ // and leave room for the ".." at the end
+ elems = (maxsize - 2) / rdb_hex_bytes_per_char;
+ len = elems * rdb_hex_bytes_per_char + 2;
+ }
+
+ // Reserve sufficient space to avoid reallocations
+ str.reserve(len);
+
+ // Loop through the input data and build the output string
+ for (std::size_t ii = 0; ii < elems; ii++, data++) {
+ uint8_t ch = (uint8_t)*data;
+ str += rdb_hexdigit[ch >> 4];
+ str += rdb_hexdigit[ch & 0x0F];
+ }
+
+ // If we can't fit it all add the ".."
+ if (elems != data_len) {
+ str += "..";
+ }
+
+ return str;
+}
+
+/*
+ Attempt to access the database subdirectory to see if it exists
+*/
+bool rdb_database_exists(const std::string &db_name) {
+ const std::string dir =
+ std::string(mysql_real_data_home) + FN_DIRSEP + db_name;
+ struct st_my_dir *const dir_info =
+ my_dir(dir.c_str(), MYF(MY_DONT_SORT | MY_WANT_STAT));
+ if (dir_info == nullptr) {
+ return false;
+ }
+
+ my_dirend(dir_info);
+ return true;
+}
+
+void rdb_log_status_error(const rocksdb::Status &s, const char *msg) {
+ if (msg == nullptr) {
+ // NO_LINT_DEBUG
+ sql_print_error("RocksDB: status error, code: %d, error message: %s",
+ s.code(), s.ToString().c_str());
+ return;
+ }
+
+ // NO_LINT_DEBUG
+ sql_print_error("RocksDB: %s, Status Code: %d, Status: %s", msg, s.code(),
+ s.ToString().c_str());
+}
+
+/*
+ @brief
+ Return a comma-separated string with compiled-in compression types.
+ Not thread-safe.
+*/
+const char *get_rocksdb_supported_compression_types()
+{
+ static std::string compression_methods_buf;
+ static bool inited=false;
+ if (!inited)
+ {
+ inited= true;
+ std::vector<rocksdb::CompressionType> known_types=
+ {
+ rocksdb::kSnappyCompression,
+ rocksdb::kZlibCompression,
+ rocksdb::kBZip2Compression,
+ rocksdb::kLZ4Compression,
+ rocksdb::kLZ4HCCompression,
+ rocksdb::kXpressCompression,
+ rocksdb::kZSTDNotFinalCompression
+ };
+
+ for (auto typ : known_types)
+ {
+ if (CompressionTypeSupported(typ))
+ {
+ if (compression_methods_buf.size())
+ compression_methods_buf.append(",");
+ compression_methods_buf.append(CompressionTypeToString(typ));
+ }
+ }
+ }
+ return compression_methods_buf.c_str();
+}
+
+bool rdb_check_rocksdb_corruption() {
+ return !my_access(myrocks::rdb_corruption_marker_file_name().c_str(), F_OK);
+}
+
+void rdb_persist_corruption_marker() {
+ const std::string &fileName(myrocks::rdb_corruption_marker_file_name());
+ /* O_SYNC is not supported on windows */
+ int fd = my_open(fileName.c_str(), O_CREAT | IF_WIN(0, O_SYNC), MYF(MY_WME));
+ if (fd < 0) {
+ // NO_LINT_DEBUG
+ sql_print_error(
+ "RocksDB: Can't create file %s to mark rocksdb as "
+ "corrupted.",
+ fileName.c_str());
+ } else {
+ // NO_LINT_DEBUG
+ sql_print_information(
+ "RocksDB: Creating the file %s to abort mysqld "
+ "restarts. Remove this file from the data directory "
+ "after fixing the corruption to recover. ",
+ fileName.c_str());
+ }
+
+#ifdef _WIN32
+ /* A replacement for O_SYNC flag above */
+ if (fd >= 0)
+ my_sync(fd, MYF(0));
+#endif
+
+ int ret = my_close(fd, MYF(MY_WME));
+ if (ret) {
+ // NO_LINT_DEBUG
+ sql_print_error("RocksDB: Error (%d) closing the file %s", ret,
+ fileName.c_str());
+ }
+}
+
+} // namespace myrocks
diff --git a/storage/rocksdb/rdb_utils.h b/storage/rocksdb/rdb_utils.h
new file mode 100644
index 00000000000..0ef74b9fd06
--- /dev/null
+++ b/storage/rocksdb/rdb_utils.h
@@ -0,0 +1,335 @@
+/*
+ Copyright (c) 2016, Facebook, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111-1301 USA */
+#pragma once
+
+#include "rdb_mariadb_port.h"
+
+/* C++ standard header files */
+#include <chrono>
+#include <string>
+#include <vector>
+#include <functional>
+
+/* MySQL header files */
+#include "../sql/log.h"
+#include "./my_stacktrace.h"
+#include "./sql_string.h"
+
+/* RocksDB header files */
+#include "rocksdb/slice.h"
+#include "rocksdb/status.h"
+
+#ifdef HAVE_JEMALLOC
+#include <jemalloc/jemalloc.h>
+#endif
+
+namespace myrocks {
+
+/*
+ Guess what?
+ An interface is a class where all members are public by default.
+*/
+
+#ifndef interface
+#define interface struct
+#endif // interface
+
+/*
+ Introduce C-style pseudo-namespaces, a handy way to make code more readble
+ when calling into a legacy API, which does not have any namespace defined.
+ Since we cannot or don't want to change the API in any way, we can use this
+ mechanism to define readability tokens that look like C++ namespaces, but are
+ not enforced in any way by the compiler, since the pre-compiler strips them
+ out. However, on the calling side, code looks like my_core::thd_get_ha_data()
+ rather than plain a thd_get_ha_data() call. This technique adds an immediate
+ visible cue on what type of API we are calling into.
+*/
+
+#ifndef my_core
+// C-style pseudo-namespace for MySQL Core API, to be used in decorating calls
+// to non-obvious MySQL functions, like the ones that do not start with well
+// known prefixes: "my_", "sql_", and "mysql_".
+#define my_core
+#endif // my_core
+
+/*
+ The intent behind a SHIP_ASSERT() macro is to have a mechanism for validating
+ invariants in retail builds. Traditionally assertions (such as macros defined
+ in <cassert>) are evaluated for performance reasons only in debug builds and
+ become NOOP in retail builds when DBUG_OFF is defined.
+
+ This macro is intended to validate the invariants which are critical for
+ making sure that data corruption and data loss won't take place. Proper
+ intended usage can be described as "If a particular condition is not true then
+ stop everything what's going on and terminate the process because continued
+ execution will cause really bad things to happen".
+
+ Use the power of SHIP_ASSERT() wisely.
+*/
+
+#ifndef SHIP_ASSERT
+#define SHIP_ASSERT(expr) \
+ do { \
+ if (!(expr)) { \
+ my_safe_printf_stderr("\nShip assert failure: \'%s\'\n", #expr); \
+ abort(); \
+ } \
+ } while (0)
+#endif // SHIP_ASSERT
+
+/*
+ Assert a implies b.
+ If a is true, then b must be true.
+ If a is false, then the value is b does not matter.
+*/
+#ifndef DBUG_ASSERT_IMP
+#define DBUG_ASSERT_IMP(a, b) DBUG_ASSERT(!(a) || (b))
+#endif
+
+/*
+ Assert a if and only if b.
+ a and b must be both true or both false.
+*/
+#ifndef DBUG_ASSERT_IFF
+#define DBUG_ASSERT_IFF(a, b) \
+ DBUG_ASSERT(static_cast<bool>(a) == static_cast<bool>(b))
+#endif
+
+
+/*
+ Portability: use __PRETTY_FUNCTION__ when available, otherwise use __func__
+ which is in the standard.
+*/
+
+#ifdef __GNUC__
+# define __MYROCKS_PORTABLE_PRETTY_FUNCTION__ __PRETTY_FUNCTION__
+#else
+# define __MYROCKS_PORTABLE_PRETTY_FUNCTION__ __func__
+#endif
+
+/*
+ Intent behind this macro is to avoid manually typing the function name every
+ time we want to add the debugging statement and use the compiler for this
+ work. This avoids typical refactoring problems when one renames a function,
+ but the tracing message doesn't get updated.
+
+ We could use __func__ or __FUNCTION__ macros, but __PRETTY_FUNCTION__
+ contains the signature of the function as well as its bare name and provides
+ therefore more context when interpreting the logs.
+*/
+#define DBUG_ENTER_FUNC() DBUG_ENTER(__MYROCKS_PORTABLE_PRETTY_FUNCTION__)
+
+/*
+ Error handling pattern used across MySQL abides by the following rules: "All
+ functions that can report an error (usually an allocation error), should
+ return 0/FALSE/false on success, 1/TRUE/true on failure."
+
+ https://dev.mysql.com/doc/internals/en/additional-suggestions.html has more
+ details.
+
+ To increase the comprehension and readability of MyRocks codebase we'll use
+ constants similar to ones from C standard (EXIT_SUCCESS and EXIT_FAILURE) to
+ make sure that both failure and success paths are clearly identifiable. The
+ definitions of FALSE and TRUE come from <my_global.h>.
+*/
+#define HA_EXIT_SUCCESS FALSE
+#define HA_EXIT_FAILURE TRUE
+
+/*
+ Macros to better convey the intent behind checking the results from locking
+ and unlocking mutexes.
+*/
+#define RDB_MUTEX_LOCK_CHECK(m) \
+ rdb_check_mutex_call_result(__MYROCKS_PORTABLE_PRETTY_FUNCTION__, true, \
+ mysql_mutex_lock(&m))
+#define RDB_MUTEX_UNLOCK_CHECK(m) \
+ rdb_check_mutex_call_result(__MYROCKS_PORTABLE_PRETTY_FUNCTION__, false, \
+ mysql_mutex_unlock(&m))
+
+/*
+ Generic constant.
+*/
+const size_t RDB_MAX_HEXDUMP_LEN = 1000;
+
+/*
+ Helper function to get an NULL terminated uchar* out of a given MySQL String.
+*/
+
+inline uchar *rdb_mysql_str_to_uchar_str(my_core::String *str) {
+ DBUG_ASSERT(str != nullptr);
+ return reinterpret_cast<uchar *>(str->c_ptr());
+}
+
+/*
+ Helper function to get plain (not necessary NULL terminated) uchar* out of a
+ given STL string.
+*/
+
+inline const uchar *rdb_std_str_to_uchar_ptr(const std::string &str) {
+ return reinterpret_cast<const uchar *>(str.data());
+}
+
+/*
+ Helper function to convert seconds to milliseconds.
+*/
+
+constexpr int rdb_convert_sec_to_ms(int sec) {
+ return std::chrono::milliseconds(std::chrono::seconds(sec)).count();
+}
+
+/*
+ Helper function to get plain (not necessary NULL terminated) uchar* out of a
+ given RocksDB item.
+*/
+
+inline const uchar *rdb_slice_to_uchar_ptr(const rocksdb::Slice *item) {
+ DBUG_ASSERT(item != nullptr);
+ return reinterpret_cast<const uchar *>(item->data());
+}
+
+/*
+ Call this function in cases when you can't rely on garbage collector and need
+ to explicitly purge all unused dirty pages. This should be a relatively rare
+ scenario for cases where it has been verified that this intervention has
+ noticeable benefits.
+*/
+inline int purge_all_jemalloc_arenas() {
+#ifdef HAVE_JEMALLOC
+ unsigned narenas = 0;
+ size_t sz = sizeof(unsigned);
+ char name[25] = {0};
+
+ // Get the number of arenas first. Please see `jemalloc` documentation for
+ // all the various options.
+ int result = mallctl("arenas.narenas", &narenas, &sz, nullptr, 0);
+
+ // `mallctl` returns 0 on success and we really want caller to know if all the
+ // trickery actually works.
+ if (result) {
+ return result;
+ }
+
+ // Form the command to be passed to `mallctl` and purge all the unused dirty
+ // pages.
+ snprintf(name, sizeof(name) / sizeof(char), "arena.%d.purge", narenas);
+ result = mallctl(name, nullptr, nullptr, nullptr, 0);
+
+ return result;
+#else
+ return EXIT_SUCCESS;
+#endif
+}
+
+/*
+ Helper function to check the result of locking or unlocking a mutex. We'll
+ intentionally abort in case of a failure because it's better to terminate
+ the process instead of continuing in an undefined state and corrupting data
+ as a result.
+*/
+inline void rdb_check_mutex_call_result(const char *function_name,
+ const bool attempt_lock,
+ const int result) {
+ if (unlikely(result)) {
+ /* NO_LINT_DEBUG */
+ sql_print_error(
+ "%s a mutex inside %s failed with an "
+ "error code %d.",
+ attempt_lock ? "Locking" : "Unlocking", function_name, result);
+
+ // This will hopefully result in a meaningful stack trace which we can use
+ // to efficiently debug the root cause.
+ abort();
+ }
+}
+
+void rdb_log_status_error(const rocksdb::Status &s, const char *msg = nullptr);
+
+// return true if the marker file exists which indicates that the corruption
+// has been detected
+bool rdb_check_rocksdb_corruption();
+
+// stores a marker file in the data directory so that after restart server
+// is still aware that rocksdb data is corrupted
+void rdb_persist_corruption_marker();
+
+/*
+ Helper functions to parse strings.
+*/
+
+const char *rdb_skip_spaces(const struct charset_info_st *const cs,
+ const char *str)
+ MY_ATTRIBUTE((__nonnull__, __warn_unused_result__));
+
+bool rdb_compare_strings_ic(const char *const str1, const char *const str2)
+ MY_ATTRIBUTE((__nonnull__, __warn_unused_result__));
+
+const char *rdb_find_in_string(const char *str, const char *pattern,
+ bool *const succeeded)
+ MY_ATTRIBUTE((__nonnull__, __warn_unused_result__));
+
+const char *rdb_check_next_token(const struct charset_info_st *const cs,
+ const char *str, const char *const pattern,
+ bool *const succeeded)
+ MY_ATTRIBUTE((__nonnull__, __warn_unused_result__));
+
+const char *rdb_parse_id(const struct charset_info_st *const cs,
+ const char *str, std::string *const id)
+ MY_ATTRIBUTE((__nonnull__(1, 2), __warn_unused_result__));
+
+const char *rdb_skip_id(const struct charset_info_st *const cs, const char *str)
+ MY_ATTRIBUTE((__nonnull__, __warn_unused_result__));
+
+const std::vector<std::string> parse_into_tokens(const std::string &s,
+ const char delim);
+
+/*
+ Helper functions to populate strings.
+*/
+
+std::string rdb_hexdump(const char *data, const std::size_t data_len,
+ const std::size_t maxsize = 0)
+ MY_ATTRIBUTE((__nonnull__));
+
+/*
+ Helper function to see if a database exists
+ */
+bool rdb_database_exists(const std::string &db_name);
+
+const char *get_rocksdb_supported_compression_types();
+
+/*
+ Helper class to make sure cleanup always happens. Helpful for complicated
+ logic where there can be multiple exits/returns requiring cleanup
+ */
+class Ensure_cleanup {
+ public:
+ explicit Ensure_cleanup(std::function<void()> cleanup)
+ : m_cleanup(cleanup), m_skip_cleanup(false) {}
+
+ ~Ensure_cleanup() {
+ if (!m_skip_cleanup) {
+ m_cleanup();
+ }
+ }
+
+ // If you want to skip cleanup (such as when the operation is successful)
+ void skip() { m_skip_cleanup = true; }
+
+ private:
+ std::function<void()> m_cleanup;
+ bool m_skip_cleanup;
+};
+} // namespace myrocks
diff --git a/storage/rocksdb/rocksdb b/storage/rocksdb/rocksdb
new file mode 160000
+Subproject bba5e7bc21093d7cfa765e1280a7c4fdcd28428
diff --git a/storage/rocksdb/rocksdb-range-access.txt b/storage/rocksdb/rocksdb-range-access.txt
new file mode 100644
index 00000000000..6b5a0db938a
--- /dev/null
+++ b/storage/rocksdb/rocksdb-range-access.txt
@@ -0,0 +1,292 @@
+
+This file describes how MySQL index navigation commands are translated into
+RocksDB index navigation commands.
+
+Index tuples are shown as
+
+ ( kv )-aaa-pkN
+
+here
+ * '(kv)' is the 4-byte index number.
+ * '-' is just for readability
+ * everything that follows the '-' is mem-comparable form of the key.
+ In ascii encoding, aaa < bbb < ccc < xxx.
+
+Tuples that start with '#' do not exist in the database. They are only shown
+to demonstrate where Seek() calls end up with.
+
+== HA_READ_KEY_EXACT, forward CF ==
+
+ (kv-1)-xxx-pk
+# ( kv )-aaa <-- "kv-aaa" doesn't exist in the database, but it would be
+ here.
+ ( kv )-aaa-pk <--- Seek("kv-aaa") will put us here on the next record.
+ ( kv )-aaa-pk2
+ ( kv )-bbb-...
+
+RocksDB calls:
+
+ it->Seek(kv);
+ if (it->Valid() && kd->covers_key(..) && kd->cmp_full_keys(...))
+ return record.
+
+== HA_READ_KEY_EXACT, backward CF ==
+
+When we need to seek to a tuple that is a prefix of a full key:
+
+ (kv+1)-xxx-pk
+ ( kv )-ccc-pk
+ ( kv )-bbb-pk3
+ ( kv )-bbb-pk2
+ ( kv )-bbb-pk1 <--- SeekForPrev("kv-bbb") will put us here on the previous
+ record.
+# ( kv )-bbb <--- "kv-bbb" doesn't exist in the database, but it would be
+ ( kv )-aaa-pk here.
+
+Even when (kv)-bbb-pk1 is the last record in the CF, SeekForPrev() will find the
+last record before "kv-bbb", so it already takes care of this case for us.
+
+RocksDB calls:
+
+ it->SeekForPrev(kv);
+ if (it->Valid() && kd->covers_key(..) && kd->cmp_full_keys(...))
+ return record.
+
+== HA_READ_KEY_OR_NEXT, forward CF ==
+
+This is finding min(key) such that key >= lookup_tuple.
+
+If lookup tuple is kv-bbb:
+
+ ( kv )-aaa-pk
+# ( kv )-bbb <-- "kv-bbb" doesn't exist in the database, but it would be
+ here.
+ ( kv )-bbb-pk1 <--- Seek("kv-bbb") will put us here on the next record.
+ ( kv )-bbb-pk2
+ ( kv )-bbb-...
+
+RocksDB calls:
+
+ Seek(kv);
+ if (it->Valid() && kd->covers_key(..))
+ return record.
+
+== HA_READ_KEY_OR_NEXT, backward CF ==
+
+When specified key tuple is a key prefix:
+
+ (kv+1)-xxx-pk
+ ( kv )-ccc-pk
+ ( kv )-bbb-pk3
+ ( kv )-bbb-pk2
+ ( kv )-bbb-pk1 <--- Seek("kv-bbb") will put us here on the previous record.
+# ( kv )-bbb <--- "kv-bbb" doesn't exist in the database, but it would be
+ here.
+ ( kv )-aaa-pk
+
+Even when (kv)-bbb-pk1 is the last record in the CF, SeekForPrev() will find the
+last record before "kv-bbb", so it already takes care of this case for us.
+
+Another kind of special case is when we need to seek to the full value.
+Suppose, the lookup tuple is kv-bbb-pk1:
+
+ (kv+1)-xxx-pk
+ ( kv )-ccc-pk
+ ( kv )-bbb-pk3
+ ( kv )-bbb-pk2
+ ( kv )-bbb-pk1 < -- SeekForPrev(kv-bbb-pk1)
+ ( kv )-bbb-pk0
+
+Then, SeekForPrev(kv-bbb-pk1) may position us exactly at the tuple we need.
+Even If kv-bbb-pk1 is not present in the database, we will be positioned on
+kv-bbb-pk2 no matter whether kv-bbb-pk2 is the last key or not.
+
+RocksDB calls:
+
+ SeekForPrev(...);
+ if (it->Valid() && kd->covers_key(..))
+ return record.
+
+== HA_READ_AFTER_KEY, forward CF ==
+
+This is finding min(key) such that key > lookup_key.
+
+Suppose lookup_key = kv-bbb
+
+ ( kv )-aaa-pk
+# ( kv )-bbb
+ ( kv )-bbb-pk1 <--- Seek("kv-bbb") will put us here. We need to
+ ( kv )-bbb-pk2 get to the value that is next after 'bbb'.
+ ( kv )-bbb-pk3
+ ( kv )-bbb-pk4
+ ( kv )-bbb-pk5
+ ( kv )-ccc-pkN <--- That is, we need to be here.
+
+However, we don't know that the next value is kv-ccc. Instead, we seek to the
+first value that strictly greater than 'kv-bbb'. It is Successor(kv-bbb).
+
+It doesn't matter if we're using a full extended key or not.
+
+RocksDB calls:
+
+ Seek(Successor(kv-bbb));
+ if (it->Valid() && kd->covers_key(...))
+ return record;
+
+Note that the code is the same as with HA_READ_KEY_OR_NEXT, except that
+we seek to Successor($lookup_key) instead of $lookup_key itself.
+
+== HA_READ_AFTER_KEY, backward CF ==
+
+Suppose, the lookup key is 'kv-bbb':
+
+ (kv+1)-xxx-pk
+ ( kv )-ccc-pk7
+ ( kv )-ccc-pk6 <-- We get here when we call Seek(Successor(kv-bbb))
+# Successor(kv-bbb)
+ ( kv )-bbb-pk5
+ ( kv )-bbb-pk4
+ ( kv )-bbb-pk3
+ ( kv )-bbb-pk2
+ ( kv )-bbb-pk1
+# ( kv )-bbb <-- We would get here if we called SeekForPrev(kv-bbb).
+ ( kv )-aaa-pk
+
+RocksDB calls:
+
+ SeekForPrev(Successor(kv-bbb));
+ if (it->Valid() && kd->covers_key(...))
+ return record.
+
+Note that the code is the same as with HA_READ_KEY_OR_NEXT, except that
+we seek to Successor($lookup_key) instead of $lookup_key itself.
+
+== HA_READ_BEFORE_KEY, forward CF ==
+
+This is finding max(key) such that key < lookup_tuple.
+
+Suppose, lookup_tuple=kv-bbb.
+
+ ( kv )-aaa-pk1
+ ( kv )-aaa-pk2
+ ( kv )-aaa-pk3 <-- SeekForPrev("kv-bbb") will put us here.
+# ( kv )-bbb
+ ( kv )-bbb-pk4
+ ( kv )-bbb-pk5
+ ( kv )-bbb-pk6
+
+If the lookup tuple is a full key (e.g. kv-bbb-pk3), and the key is present in
+the database, the iterator will be positioned on the key. We will need to call
+Prev() to get the next key.
+
+RocksDB calls:
+
+ it->SeekForPrev(kv-bbb);
+ if (it->Valid() && using_full_key &&
+ kd->value_matches_prefix(...))
+ {
+ /* We are using full key and we've hit an exact match */
+ it->Prev();
+ }
+
+ if (it->Valid() && kd->covers_key(...))
+ return record;
+
+== HA_READ_BEFORE_KEY, backward CF ==
+
+This is finding max(key) such that key < lookup_tuple.
+Suppose, lookup_tuple=kv-bbb, a prefix of the full key.
+
+ ( kv )-bbb-pk6
+ ( kv )-bbb-pk5
+ ( kv )-bbb-pk4
+# ( kv )-bbb
+ ( kv )-aaa-pk3 <-- Need to be here, and Seek("kv-bbb") will put us here
+ ( kv )-aaa-pk2
+ ( kv )-aaa-pk1
+
+If the lookup tuple is a full key (e.g. kv-bbb-pk4), and the key is present in
+the database, the iterator will be positioned on the key. We will need to call
+Next() to get the next key.
+
+RocksDB calls:
+
+ it->Seek(kv-bbb);
+ if (it->Valid() && using_full_key &&
+ kd->value_matches_prefix(...))
+ {
+ /* We are using full key and we've hit an exact match */
+ it->Next();
+ }
+
+ if (it->Valid() && kd->covers_key(...))
+ return record;
+
+== HA_READ_PREFIX_LAST, forward CF ==
+
+Find the last record with the specified index prefix lookup_tuple.
+
+Suppose, lookup_tuple='kv-bbb'
+
+ ( kv )-aaa-pk2
+ ( kv )-aaa-pk3
+# ( kv )-bbb
+ ( kv )-bbb-pk4
+ ( kv )-bbb-pk5
+ ( kv )-bbb-pk6
+ ( kv )-bbb-pk7 <--- SeekForPrev(Successor(kv-bbb)) will get us here
+# ( kv )-ccc
+ ( kv )-ccc-pk8
+ ( kv )-ccc-pk9
+
+RocksDB calls:
+
+ SeekForPrev(Successor(kv-bbb));
+ if (using_full_key && it->Valid() && !cmp_full_keys(Sucessor(lookup_key)))
+ it->Prev();
+ if (it->Valid() && kd->covers_key(...))
+ {
+ if (!cmp_full_keys(lookup_tuple)) // not needed in _OR_PREV
+ {
+ // the record's prefix matches lookup_tuple.
+ return record;
+ }
+ }
+
+== HA_READ_PREFIX_LAST, backward CF ==
+
+Suppose, lookup_tuple='kv-bbb'
+
+ ( kv )-ccc-pk9
+ ( kv )-ccc-pk8
+# ( kv )-ccc <-- 2. Seek(Successor(kv-bbb)) will point here
+ and it will fall down to the next row.
+ ( kv )-bbb-pk7 <--- 1. Need to be here.
+ ( kv )-bbb-pk6
+ ( kv )-bbb-pk5
+ ( kv )-bbb-pk4
+# ( kv )-bbb
+ ( kv )-aaa-pk3
+ ( kv )-aaa-pk2
+
+
+RocksDB calls:
+
+ it->Seek(Successor(kv-bbb));
+
+ if (using_full_key && it->Valid() && !cmp_full_keys(Sucessor(lookup_key)))
+ it->Next();
+
+ if (it->Valid() && kd->covers_key(..))
+ {
+ if (!cmp_full_keys(...)) // not needed in _OR_PREV
+ {
+ // the record's prefix matches lookup_tuple.
+ return record;
+ }
+ }
+
+== HA_READ_PREFIX_LAST_OR_PREV, forward or backward CF ==
+
+This is just like HA_READ_PREFIX_LAST but we don't need to check that the key
+we've got is in the search prefix. (search for "not needed in _OR_PREV" above)
diff --git a/storage/rocksdb/tools/mysql_ldb.cc b/storage/rocksdb/tools/mysql_ldb.cc
new file mode 100644
index 00000000000..ac61eb4f257
--- /dev/null
+++ b/storage/rocksdb/tools/mysql_ldb.cc
@@ -0,0 +1,18 @@
+// Copyright (c) 2013, Facebook, Inc. All rights reserved.
+// This source code is licensed under the BSD-style license found in the
+// LICENSE file in the root directory of this source tree. An additional grant
+// of patent rights can be found in the PATENTS file in the same directory.
+//
+#include <my_config.h>
+#include "../rdb_comparator.h"
+#include "rocksdb/ldb_tool.h"
+
+int main(int argc, char **argv) {
+ rocksdb::Options db_options;
+ myrocks::Rdb_pk_comparator pk_comparator;
+ db_options.comparator = &pk_comparator;
+
+ rocksdb::LDBTool tool;
+ tool.Run(argc, argv, db_options);
+ return 0;
+}
diff --git a/storage/rocksdb/unittest/CMakeLists.txt b/storage/rocksdb/unittest/CMakeLists.txt
new file mode 100644
index 00000000000..de8d0d82aea
--- /dev/null
+++ b/storage/rocksdb/unittest/CMakeLists.txt
@@ -0,0 +1,22 @@
+IF (TARGET rocksdb)
+ INCLUDE_DIRECTORIES(${CMAKE_SOURCE_DIR}/include ${CMAKE_SOURCE_DIR}/zlib
+ ${CMAKE_SOURCE_DIR}/unittest/mytap
+ ${CMAKE_SOURCE_DIR}/rocksdb/third-party/gtest-1.7.0/fused-src
+ )
+ LINK_LIBRARIES(mytap mysys dbug strings)
+
+ ADD_DEFINITIONS(-DSTANDALONE_UNITTEST)
+
+ MYSQL_ADD_EXECUTABLE(test_properties_collector
+ test_properties_collector.cc
+ )
+ TARGET_LINK_LIBRARIES(test_properties_collector mysqlserver)
+
+ # Necessary to make sure that we can use the jemalloc API calls.
+ GET_TARGET_PROPERTY(mysql_embedded LINK_FLAGS PREV_LINK_FLAGS)
+ IF(NOT PREV_LINK_FLAGS)
+ SET(PREV_LINK_FLAGS)
+ ENDIF()
+ SET_TARGET_PROPERTIES(test_properties_collector PROPERTIES LINK_FLAGS
+ "${PREV_LINK_FLAGS} ${WITH_MYSQLD_LDFLAGS}")
+ENDIF()
diff --git a/storage/rocksdb/unittest/test_properties_collector.cc b/storage/rocksdb/unittest/test_properties_collector.cc
new file mode 100644
index 00000000000..6870cd20803
--- /dev/null
+++ b/storage/rocksdb/unittest/test_properties_collector.cc
@@ -0,0 +1,54 @@
+/*
+ Copyright (c) 2015, Facebook, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111-1301 USA */
+
+/* MyRocks header files */
+#include "../ha_rocksdb.h"
+#include "../rdb_datadic.h"
+
+void putKeys(myrocks::Rdb_tbl_prop_coll *coll, int num, bool is_delete,
+ uint64_t expected_deleted) {
+ std::string str("aaaaaaaaaaaaaa");
+ rocksdb::Slice sl(str.data(), str.size());
+
+ for (int i = 0; i < num; i++) {
+ coll->AddUserKey(
+ sl, sl, is_delete ? rocksdb::kEntryDelete : rocksdb::kEntryPut, 0, 100);
+ }
+ DBUG_ASSERT(coll->GetMaxDeletedRows() == expected_deleted);
+}
+
+int main(int argc, char **argv) {
+ // test the circular buffer for delete flags
+ myrocks::Rdb_compact_params params;
+ params.m_file_size = 333;
+ params.m_deletes = 333; // irrelevant
+ params.m_window = 10;
+
+ myrocks::Rdb_tbl_prop_coll coll(nullptr, params, 0,
+ RDB_DEFAULT_TBL_STATS_SAMPLE_PCT);
+
+ putKeys(&coll, 2, true, 2); // [xx]
+ putKeys(&coll, 3, false, 2); // [xxo]
+ putKeys(&coll, 1, true, 3); // [xxox]
+ putKeys(&coll, 6, false, 3); // [xxoxoooooo]
+ putKeys(&coll, 3, true, 4); // xxo[xooooooxxx]
+ putKeys(&coll, 1, false, 4); // xxox[ooooooxxxo]
+ putKeys(&coll, 100, false, 4); // ....[oooooooooo]
+ putKeys(&coll, 100, true, 10); // ....[xxxxxxxxxx]
+ putKeys(&coll, 100, true, 10); // ....[oooooooooo]
+
+ return 0;
+}
diff --git a/storage/rocksdb/ut0counter.h b/storage/rocksdb/ut0counter.h
new file mode 100644
index 00000000000..3a7ee85d01c
--- /dev/null
+++ b/storage/rocksdb/ut0counter.h
@@ -0,0 +1,203 @@
+/*
+Copyright (c) 2012, Oracle and/or its affiliates. All Rights Reserved.
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+*****************************************************************************/
+
+/**************************************************//**
+@file include/ut0counter.h
+Counter utility class
+Created 2012/04/12 by Sunny Bains
+*******************************************************/
+
+#ifndef UT0COUNTER_H
+#define UT0COUNTER_H
+
+#include <string.h>
+
+/** CPU cache line size */
+#define UT_CACHE_LINE_SIZE 64
+
+/** Default number of slots to use in ib_counter_t */
+#define IB_N_SLOTS 64
+
+#ifdef __WIN__
+#define get_curr_thread_id() GetCurrentThreadId()
+#else
+#define get_curr_thread_id() pthread_self()
+#endif
+
+#define UT_ARRAY_SIZE(a) (sizeof(a) / sizeof((a)[0]))
+
+/** Get the offset into the counter array. */
+template <typename Type, int N>
+struct generic_indexer_t {
+ /** Default constructor/destructor should be OK. */
+
+ /** @return offset within m_counter */
+ size_t offset(size_t index) const {
+ return(((index % N) + 1) * (UT_CACHE_LINE_SIZE / sizeof(Type)));
+ }
+};
+
+#ifdef HAVE_SCHED_GETCPU
+//#include <utmpx.h> // Including this causes problems with EMPTY symbol
+#include <sched.h> // Include this instead
+/** Use the cpu id to index into the counter array. If it fails then
+use the thread id. */
+template <typename Type, int N>
+struct get_sched_indexer_t : public generic_indexer_t<Type, N> {
+ /** Default constructor/destructor should be OK. */
+
+ /* @return result from sched_getcpu(), the thread id if it fails. */
+ size_t get_rnd_index() const {
+
+ size_t cpu = sched_getcpu();
+ if (cpu == (size_t) -1) {
+ cpu = get_curr_thread_id();
+ }
+
+ return(cpu);
+ }
+};
+#endif /* HAVE_SCHED_GETCPU */
+
+/** Use the thread id to index into the counter array. */
+template <typename Type, int N>
+struct thread_id_indexer_t : public generic_indexer_t<Type, N> {
+ /** Default constructor/destructor should are OK. */
+
+ /* @return a random number, currently we use the thread id. Where
+ thread id is represented as a pointer, it may not work as
+ effectively. */
+ size_t get_rnd_index() const {
+ return (size_t)get_curr_thread_id();
+ }
+};
+
+/** For counters wher N=1 */
+template <typename Type, int N=1>
+struct single_indexer_t {
+ /** Default constructor/destructor should are OK. */
+
+ /** @return offset within m_counter */
+ size_t offset(size_t index) const {
+ DBUG_ASSERT(N == 1);
+ return((UT_CACHE_LINE_SIZE / sizeof(Type)));
+ }
+
+ /* @return 1 */
+ size_t get_rnd_index() const {
+ DBUG_ASSERT(N == 1);
+ return(1);
+ }
+};
+
+/** Class for using fuzzy counters. The counter is not protected by any
+mutex and the results are not guaranteed to be 100% accurate but close
+enough. Creates an array of counters and separates each element by the
+UT_CACHE_LINE_SIZE bytes */
+template <
+ typename Type,
+ int N = IB_N_SLOTS,
+ template<typename, int> class Indexer = thread_id_indexer_t>
+class ib_counter_t {
+public:
+ ib_counter_t() { memset(m_counter, 0x0, sizeof(m_counter)); }
+
+ ~ib_counter_t()
+ {
+ DBUG_ASSERT(validate());
+ }
+
+ bool validate() {
+#ifdef UNIV_DEBUG
+ size_t n = (UT_CACHE_LINE_SIZE / sizeof(Type));
+
+ /* Check that we aren't writing outside our defined bounds. */
+ for (size_t i = 0; i < UT_ARRAY_SIZE(m_counter); i += n) {
+ for (size_t j = 1; j < n - 1; ++j) {
+ DBUG_ASSERT(m_counter[i + j] == 0);
+ }
+ }
+#endif /* UNIV_DEBUG */
+ return(true);
+ }
+
+ /** If you can't use a good index id. Increment by 1. */
+ void inc() { add(1); }
+
+ /** If you can't use a good index id.
+ * @param n - is the amount to increment */
+ void add(Type n) {
+ size_t i = m_policy.offset(m_policy.get_rnd_index());
+
+ DBUG_ASSERT(i < UT_ARRAY_SIZE(m_counter));
+
+ m_counter[i] += n;
+ }
+
+ /** Use this if you can use a unique indentifier, saves a
+ call to get_rnd_index().
+ @param i - index into a slot
+ @param n - amount to increment */
+ void add(size_t index, Type n) {
+ size_t i = m_policy.offset(index);
+
+ DBUG_ASSERT(i < UT_ARRAY_SIZE(m_counter));
+
+ m_counter[i] += n;
+ }
+
+ /** If you can't use a good index id. Decrement by 1. */
+ void dec() { sub(1); }
+
+ /** If you can't use a good index id.
+ * @param - n is the amount to decrement */
+ void sub(Type n) {
+ size_t i = m_policy.offset(m_policy.get_rnd_index());
+
+ DBUG_ASSERT(i < UT_ARRAY_SIZE(m_counter));
+
+ m_counter[i] -= n;
+ }
+
+ /** Use this if you can use a unique indentifier, saves a
+ call to get_rnd_index().
+ @param i - index into a slot
+ @param n - amount to decrement */
+ void sub(size_t index, Type n) {
+ size_t i = m_policy.offset(index);
+
+ DBUG_ASSERT(i < UT_ARRAY_SIZE(m_counter));
+
+ m_counter[i] -= n;
+ }
+
+ /* @return total value - not 100% accurate, since it is not atomic. */
+ operator Type() const {
+ Type total = 0;
+
+ for (size_t i = 0; i < N; ++i) {
+ total += m_counter[m_policy.offset(i)];
+ }
+
+ return(total);
+ }
+
+private:
+ /** Indexer into the array */
+ Indexer<Type, N>m_policy;
+
+ /** Slot 0 is unused. */
+ Type m_counter[(N + 1) * (UT_CACHE_LINE_SIZE / sizeof(Type))];
+};
+
+#endif /* UT0COUNTER_H */
diff --git a/storage/sequence/sequence.cc b/storage/sequence/sequence.cc
index dbb3f7087b5..948f030b479 100644
--- a/storage/sequence/sequence.cc
+++ b/storage/sequence/sequence.cc
@@ -95,9 +95,9 @@ public:
ha_rows records_in_range(uint inx, key_range *min_key,
key_range *max_key);
- double scan_time() { return nvalues(); }
- double read_time(uint index, uint ranges, ha_rows rows) { return rows; }
- double keyread_time(uint index, uint ranges, ha_rows rows) { return rows; }
+ double scan_time() { return (double)nvalues(); }
+ double read_time(uint index, uint ranges, ha_rows rows) { return (double)rows; }
+ double keyread_time(uint index, uint ranges, ha_rows rows) { return (double)rows; }
private:
void set(uchar *buf);
diff --git a/storage/sphinx/ha_sphinx.cc b/storage/sphinx/ha_sphinx.cc
index 96d6bd1b10f..67bf0744c78 100644
--- a/storage/sphinx/ha_sphinx.cc
+++ b/storage/sphinx/ha_sphinx.cc
@@ -1099,7 +1099,7 @@ static bool ParseUrl ( CSphSEShare * share, TABLE * table, bool bCreate )
iPort = atoi(sPort);
if ( !iPort )
- iPort = SPHINXAPI_DEFAULT_PORT;
+ iPort = SPHINXAPI_DEFAULT_PORT;
}
} else
{
@@ -2162,7 +2162,7 @@ int ha_sphinx::Connect ( const char * sHost, ushort uPort )
#if MYSQL_VERSION_ID>=50515
struct addrinfo *hp = NULL;
tmp_errno = getaddrinfo ( sHost, NULL, NULL, &hp );
- if ( !tmp_errno || !hp || !hp->ai_addr )
+ if ( tmp_errno || !hp || !hp->ai_addr )
{
bError = true;
if ( hp )
@@ -2189,8 +2189,9 @@ int ha_sphinx::Connect ( const char * sHost, ushort uPort )
}
#if MYSQL_VERSION_ID>=50515
- memcpy ( &sin.sin_addr, hp->ai_addr, Min ( sizeof(sin.sin_addr), (size_t)hp->ai_addrlen ) );
- freeaddrinfo ( hp );
+ struct sockaddr_in *in = (sockaddr_in *)hp->ai_addr;
+ memcpy ( &sin.sin_addr, &in->sin_addr, Min ( sizeof(sin.sin_addr), sizeof(in->sin_addr) ) );
+ freeaddrinfo ( hp );
#else
memcpy ( &sin.sin_addr, hp->h_addr, Min ( sizeof(sin.sin_addr), (size_t)hp->h_length ) );
my_gethostbyname_r_free();
diff --git a/storage/sphinx/mysql-test/sphinx/disabled.def b/storage/sphinx/mysql-test/sphinx/disabled.def
deleted file mode 100644
index a85b8b71e52..00000000000
--- a/storage/sphinx/mysql-test/sphinx/disabled.def
+++ /dev/null
@@ -1,2 +0,0 @@
-sphinx : MDEV-10986, MDEV-10985
-union-5539 : MDEV-10986, MDEV-10985
diff --git a/storage/sphinx/mysql-test/sphinx/sphinx.result b/storage/sphinx/mysql-test/sphinx/sphinx.result
index 3536ba42af8..c462d0cc883 100644
--- a/storage/sphinx/mysql-test/sphinx/sphinx.result
+++ b/storage/sphinx/mysql-test/sphinx/sphinx.result
@@ -75,3 +75,23 @@ id w q
1 2 test;range=meta.foo_count,100,500
5 1 test;range=meta.foo_count,100,500
drop table ts;
+#
+# MDEV-19205: Sphinx unable to connect using a host name
+#
+create table ts ( id bigint unsigned not null, w int not null, q varchar(255) not null, index(q) ) engine=sphinx connection="sphinx://localhost:SPHINXSEARCH_PORT/*";
+select * from ts where q=';filter=meta.foo_count,100';
+id w q
+1 1 ;filter=meta.foo_count,100
+select * from ts where q='test;filter=meta.sub.int,7';
+id w q
+5 1 test;filter=meta.sub.int,7
+select * from ts where q=';filter=meta.sub.list[0],4';
+id w q
+select * from ts where q=';filter=meta.sub.list[1],4';
+id w q
+5 1 ;filter=meta.sub.list[1],4
+select * from ts where q='test;range=meta.foo_count,100,500';
+id w q
+1 2 test;range=meta.foo_count,100,500
+5 1 test;range=meta.foo_count,100,500
+drop table ts;
diff --git a/storage/sphinx/mysql-test/sphinx/sphinx.test b/storage/sphinx/mysql-test/sphinx/sphinx.test
index fe388f7ddd2..b733a3fc5ff 100644
--- a/storage/sphinx/mysql-test/sphinx/sphinx.test
+++ b/storage/sphinx/mysql-test/sphinx/sphinx.test
@@ -41,3 +41,16 @@ select * from ts where q=';filter=meta.sub.list[0],4';
select * from ts where q=';filter=meta.sub.list[1],4';
select * from ts where q='test;range=meta.foo_count,100,500';
drop table ts;
+
+--echo #
+--echo # MDEV-19205: Sphinx unable to connect using a host name
+--echo #
+
+--replace_result $SPHINXSEARCH_PORT SPHINXSEARCH_PORT
+eval create table ts ( id bigint unsigned not null, w int not null, q varchar(255) not null, index(q) ) engine=sphinx connection="sphinx://localhost:$SPHINXSEARCH_PORT/*";
+select * from ts where q=';filter=meta.foo_count,100';
+select * from ts where q='test;filter=meta.sub.int,7';
+select * from ts where q=';filter=meta.sub.list[0],4';
+select * from ts where q=';filter=meta.sub.list[1],4';
+select * from ts where q='test;range=meta.foo_count,100,500';
+drop table ts;
diff --git a/storage/sphinx/mysql-test/sphinx/suite.pm b/storage/sphinx/mysql-test/sphinx/suite.pm
index fc127ffd6c0..e44a8e626df 100644
--- a/storage/sphinx/mysql-test/sphinx/suite.pm
+++ b/storage/sphinx/mysql-test/sphinx/suite.pm
@@ -18,24 +18,16 @@ sub locate_sphinx_binary {
# Look for Sphinx binaries
my $exe_sphinx_indexer = &locate_sphinx_binary('indexer');
+return "'indexer' binary not found" unless $exe_sphinx_indexer;
-unless ($exe_sphinx_indexer) {
- mtr_report("Sphinx 'indexer' binary not found, sphinx suite will be skipped");
- return "No Sphinx";
-}
my $exe_sphinx_searchd = &locate_sphinx_binary('searchd');
+return "'searchd' binary not found" unless $exe_sphinx_searchd;
-unless ($exe_sphinx_searchd) {
- mtr_report("Sphinx 'searchd' binary not found, sphinx suite will be skipped");
- return "No Sphinx";
-}
+my $sphinx_config= "$::opt_vardir/my_sphinx.conf";
# Check for Sphinx engine
-unless ($ENV{HA_SPHINX_SO} or $::mysqld_variables{'sphinx'} eq "ON") {
- mtr_report("Sphinx engine not found, sphinx suite will be skipped");
- return "No SphinxSE";
-}
+return "SphinxSE not found" unless $ENV{HA_SPHINX_SO} or $::mysqld_variables{'sphinx'} eq "ON";
{
local $_ = `"$exe_sphinx_searchd" --help`;
@@ -105,11 +97,38 @@ sub searchd_start {
&::mtr_verbose("Started $sphinx->{proc}");
}
+sub wait_exp_backoff {
+ my $timeout= shift; # Seconds
+ my $start_wait= shift; # Seconds
+ my $scale_factor= shift;
+
+ $searchd_status= "$exe_sphinx_searchd --status" .
+ " --config $sphinx_config > /dev/null 2>&1";
+
+ my $scale= $start_wait;
+ my $total_sleep= 0;
+ while (1) {
+ my $status = system($searchd_status);
+ if (not $status) {
+ return 0;
+ }
+ if ($total_sleep >= $timeout) {
+ last;
+ }
+
+ &::mtr_milli_sleep($scale * 1000);
+ $total_sleep+= $scale;
+ $scale*= $scale_factor;
+ }
+
+ &::mtr_warning("Getting a response from searchd timed out");
+ return 1
+}
+
sub searchd_wait {
my ($sphinx) = @_; # My::Config::Group
- return not &::sleep_until_file_created($sphinx->value('pid_file'), 20,
- $sphinx->{'proc'})
+ return wait_exp_backoff(30, 0.1, 2)
}
############# declaration methods ######################
@@ -127,7 +146,7 @@ sub servers {
)
}
-sub is_default { 1 }
+sub is_default { 0 }
############# return an object ######################
bless { };
diff --git a/storage/sphinx/mysql-test/sphinx/union-5539.result b/storage/sphinx/mysql-test/sphinx/union-5539.result
index ab694b7db6a..945e0141b7b 100644
--- a/storage/sphinx/mysql-test/sphinx/union-5539.result
+++ b/storage/sphinx/mysql-test/sphinx/union-5539.result
@@ -5,10 +5,12 @@ id w query
2 1 ;mode=extended2;limit=1000000;maxmatches=500
3 1 ;mode=extended2;limit=1000000;maxmatches=500
4 1 ;mode=extended2;limit=1000000;maxmatches=500
+5 1 ;mode=extended2;limit=1000000;maxmatches=500
SELECT a.* FROM (SELECT * FROM ts si WHERE si.query='@* 123nothingtofind123;mode=extended2;limit=1000000;maxmatches=500') AS a UNION SELECT b.* FROM (SELECT * FROM ts si WHERE si.query=';mode=extended2;limit=1000000;maxmatches=500') AS b;
id w query
1 1 ;mode=extended2;limit=1000000;maxmatches=500
2 1 ;mode=extended2;limit=1000000;maxmatches=500
3 1 ;mode=extended2;limit=1000000;maxmatches=500
4 1 ;mode=extended2;limit=1000000;maxmatches=500
+5 1 ;mode=extended2;limit=1000000;maxmatches=500
drop table ts;
diff --git a/storage/sphinx/snippets_udf.cc b/storage/sphinx/snippets_udf.cc
index ea8246c46f3..edde806f682 100644
--- a/storage/sphinx/snippets_udf.cc
+++ b/storage/sphinx/snippets_udf.cc
@@ -450,7 +450,7 @@ int CSphUrl::Connect()
const char * pError = NULL;
do
{
- iSocket = socket ( iDomain, SOCK_STREAM, 0 );
+ iSocket = (int)socket ( iDomain, SOCK_STREAM, 0 );
if ( iSocket==-1 )
{
pError = "Failed to create client socket";
@@ -642,7 +642,7 @@ struct CSphSnippets
}
#define STRING CHECK_TYPE(STRING_RESULT)
-#define INT CHECK_TYPE(INT_RESULT); int iValue = *(long long *)pArgs->args[i]
+#define INT CHECK_TYPE(INT_RESULT); int iValue =(int)*(long long *)pArgs->args[i]
my_bool sphinx_snippets_init ( UDF_INIT * pUDF, UDF_ARGS * pArgs, char * sMessage )
{
diff --git a/storage/spider/CMakeLists.txt b/storage/spider/CMakeLists.txt
index 402c74b2cde..dec1cb4c6ba 100644
--- a/storage/spider/CMakeLists.txt
+++ b/storage/spider/CMakeLists.txt
@@ -34,6 +34,8 @@ IF(EXISTS ${PROJECT_SOURCE_DIR}/storage/mysql_storage_engine.cmake)
${CMAKE_SOURCE_DIR}/regex)
MYSQL_STORAGE_ENGINE(SPIDER)
+ELSEIF(PLUGIN_PARTITION MATCHES "^NO$")
+ MESSAGE(STATUS "Spider is skipped because partitioning is disabled")
ELSE()
INCLUDE_DIRECTORIES(${CMAKE_SOURCE_DIR}/storage/spider/hs_client)
@@ -41,7 +43,6 @@ ELSE()
${CMAKE_SOURCE_DIR}/storage/spider/scripts/install_spider.sql
DESTINATION ${INSTALL_MYSQLSHAREDIR} COMPONENT Server
)
- SET(SPIDER_DEB_FILES "usr/lib/mysql/plugin/ha_spider.so usr/share/mysql/install_spider.sql" PARENT_SCOPE)
MYSQL_ADD_PLUGIN(spider ${SPIDER_SOURCES} STORAGE_ENGINE MODULE_ONLY MODULE_OUTPUT_NAME "ha_spider")
ENDIF()
diff --git a/storage/spider/ha_spider.cc b/storage/spider/ha_spider.cc
index 6b105200c55..fb453ddc637 100644
--- a/storage/spider/ha_spider.cc
+++ b/storage/spider/ha_spider.cc
@@ -2109,6 +2109,7 @@ int ha_spider::index_read_map_internal(
result_list.desc_flg = FALSE;
result_list.sorted = TRUE;
result_list.key_info = &table->key_info[active_index];
+ check_distinct_key_query();
result_list.limit_num =
result_list.internal_limit >= result_list.split_read ?
result_list.split_read : result_list.internal_limit;
@@ -2624,6 +2625,7 @@ int ha_spider::index_read_last_map_internal(
result_list.desc_flg = TRUE;
result_list.sorted = TRUE;
result_list.key_info = &table->key_info[active_index];
+ check_distinct_key_query();
result_list.limit_num =
result_list.internal_limit >= result_list.split_read ?
result_list.split_read : result_list.internal_limit;
@@ -3089,6 +3091,7 @@ int ha_spider::index_first_internal(
result_list.sorted = TRUE;
result_list.key_info = &table->key_info[active_index];
result_list.key_order = 0;
+ check_distinct_key_query();
result_list.limit_num =
result_list.internal_limit >= result_list.split_read ?
result_list.split_read : result_list.internal_limit;
@@ -3472,6 +3475,7 @@ int ha_spider::index_last_internal(
result_list.sorted = TRUE;
result_list.key_info = &table->key_info[active_index];
result_list.key_order = 0;
+ check_distinct_key_query();
result_list.limit_num =
result_list.internal_limit >= result_list.split_read ?
result_list.split_read : result_list.internal_limit;
@@ -3914,6 +3918,7 @@ int ha_spider::read_range_first_internal(
result_list.desc_flg = FALSE;
result_list.sorted = sorted;
result_list.key_info = &table->key_info[active_index];
+ check_distinct_key_query();
result_list.limit_num =
result_list.internal_limit >= result_list.split_read ?
result_list.split_read : result_list.internal_limit;
@@ -7788,7 +7793,7 @@ int ha_spider::cmp_ref(
*field;
field++
) {
- if ((ret = (*field)->cmp_binary_offset(ptr_diff)))
+ if ((ret = (*field)->cmp_binary_offset((uint)ptr_diff)))
{
DBUG_PRINT("info",("spider different at %s", (*field)->field_name));
break;
@@ -9559,7 +9564,6 @@ int ha_spider::write_row(
DBUG_RETURN(error_num);
}
#endif
- ha_statistic_increment(&SSV::ha_write_count);
#if defined(MARIADB_BASE_VERSION) && MYSQL_VERSION_ID >= 100000
#else
if (table->timestamp_field_type & TIMESTAMP_AUTO_SET_ON_INSERT)
@@ -9795,7 +9799,6 @@ int ha_spider::update_row(
DBUG_RETURN(error_num);
}
#endif
- ha_statistic_increment(&SSV::ha_update_count);
#ifdef HANDLER_HAS_DIRECT_UPDATE_ROWS
do_direct_update = FALSE;
#endif
@@ -10138,7 +10141,6 @@ int ha_spider::delete_row(
DBUG_RETURN(error_num);
}
#endif
- ha_statistic_increment(&SSV::ha_delete_count);
#ifdef HANDLER_HAS_DIRECT_UPDATE_ROWS
do_direct_update = FALSE;
#endif
@@ -12080,6 +12082,81 @@ void ha_spider::check_direct_order_limit()
DBUG_VOID_RETURN;
}
+/********************************************************************
+ * Check whether the current query is a SELECT DISTINCT using an
+ * index in a non-partitioned Spider configuration, with a
+ * projection list that consists solely of the first key prefix
+ * column.
+ *
+ * For a SELECT DISTINCT query using an index in a non-partitioned
+ * Spider configuration, with a projection list that consists
+ * solely of the first key prefix, set the internal row retrieval
+ * limit to avoid visiting each row multiple times.
+ ********************************************************************/
+void ha_spider::check_distinct_key_query()
+{
+ DBUG_ENTER( "ha_spider::check_distinct_key_query" );
+
+ if ( result_list.direct_distinct && !partition_handler_share->handlers &&
+ result_list.keyread && result_list.check_direct_order_limit )
+ {
+ // SELECT DISTINCT query using an index in a non-partitioned configuration
+ KEY_PART_INFO* key_part = result_list.key_info->key_part;
+ Field* key_field = key_part->field;
+
+ if ( is_sole_projection_field( key_field->field_index ) )
+ {
+ // Projection list consists solely of the first key prefix column
+
+ // Set the internal row retrieval limit to avoid visiting each row
+ // multiple times. This fixes a Spider performance bug that
+ // caused each row to be visited multiple times.
+ result_list.internal_limit = 1;
+ }
+ }
+
+ DBUG_VOID_RETURN;
+}
+
+/********************************************************************
+ * Determine whether the current query's projection list
+ * consists solely of the specified column.
+ *
+ * Params IN - field_index:
+ * Field index of the column of interest within
+ * its table.
+ *
+ * Returns TRUE - if the query's projection list consists
+ * solely of the specified column.
+ * FALSE - otherwise.
+ ********************************************************************/
+bool ha_spider::is_sole_projection_field( uint16 field_index )
+{
+ // NOTE: It is assumed that spider_db_append_select_columns() has already been called
+ // to build the bitmap of projection fields
+ bool is_ha_sole_projection_field;
+ uint loop_index, dbton_id;
+ spider_db_handler* dbton_hdl;
+ DBUG_ENTER( "ha_spider::is_sole_projection_field" );
+
+ for ( loop_index = 0; loop_index < share->use_sql_dbton_count; loop_index++ )
+ {
+ dbton_id = share->use_sql_dbton_ids[ loop_index ];
+ dbton_hdl = dbton_handler[ dbton_id ];
+
+ if ( dbton_hdl->first_link_idx >= 0 )
+ {
+ is_ha_sole_projection_field = dbton_hdl->is_sole_projection_field( field_index );
+ if ( !is_ha_sole_projection_field )
+ {
+ DBUG_RETURN( FALSE );
+ }
+ }
+ }
+
+ DBUG_RETURN( TRUE );
+}
+
int ha_spider::check_ha_range_eof()
{
DBUG_ENTER("ha_spider::check_ha_range_eof");
diff --git a/storage/spider/ha_spider.h b/storage/spider/ha_spider.h
index e926bedc03b..3b16be09d38 100644
--- a/storage/spider/ha_spider.h
+++ b/storage/spider/ha_spider.h
@@ -751,6 +751,8 @@ public:
);
uint check_partitioned();
void check_direct_order_limit();
+ void check_distinct_key_query();
+ bool is_sole_projection_field( uint16 field_index );
int check_ha_range_eof();
int drop_tmp_tables();
bool handler_opened(
diff --git a/storage/spider/hs_client/allocator.hpp b/storage/spider/hs_client/allocator.hpp
index b54c7430d30..a29015e6886 100644
--- a/storage/spider/hs_client/allocator.hpp
+++ b/storage/spider/hs_client/allocator.hpp
@@ -31,7 +31,7 @@ extern "C" {
#if 1
#define DENA_ALLOCA_ALLOCATE(typ, len) \
- static_cast<typ *>(alloca((len) * sizeof(typ)))
+ (typ *) alloca((len) * sizeof(typ))
#define DENA_ALLOCA_FREE(x)
#else
#define DENA_ALLOCA_ALLOCATE(typ, len) \
diff --git a/storage/spider/hs_client/config.cpp b/storage/spider/hs_client/config.cpp
index b546230ca03..3bf0f3e5bdf 100644
--- a/storage/spider/hs_client/config.cpp
+++ b/storage/spider/hs_client/config.cpp
@@ -263,8 +263,8 @@ parse_args(int argc, char **argv, config& conf)
}
if (!(param = new conf_param()))
continue;
- uint32 key_len = eq - arg;
- uint32 val_len = strlen(eq + 1);
+ uint32 key_len = (uint32)(eq - arg);
+ uint32 val_len = (uint32)(strlen(eq + 1));
if (
param->key.reserve(key_len + 1) ||
param->val.reserve(val_len + 1)
diff --git a/storage/spider/hs_client/hstcpcli.cpp b/storage/spider/hs_client/hstcpcli.cpp
index fed87803f9c..60da87b9f20 100644
--- a/storage/spider/hs_client/hstcpcli.cpp
+++ b/storage/spider/hs_client/hstcpcli.cpp
@@ -497,7 +497,7 @@ hstcpcli::response_recv(size_t& num_flds_r)
char *const err_begin = start;
read_token(start, finish);
char *const err_end = start;
- String e = String(err_begin, err_end - err_begin, &my_charset_bin);
+ String e = String(err_begin, (uint32)(err_end - err_begin), &my_charset_bin);
if (!e.length()) {
e = String("unknown_error", &my_charset_bin);
}
diff --git a/storage/spider/hs_client/socket.cpp b/storage/spider/hs_client/socket.cpp
index c61b39d140f..0717acf0da1 100644
--- a/storage/spider/hs_client/socket.cpp
+++ b/storage/spider/hs_client/socket.cpp
@@ -223,7 +223,7 @@ socket_set_options(auto_file& fd, const socket_args& args, String& err_r)
int
socket_open(auto_file& fd, const socket_args& args, String& err_r)
{
- fd.reset(socket(args.family, args.socktype, args.protocol));
+ fd.reset((int)socket(args.family, args.socktype, args.protocol));
if (fd.get() < 0) {
return errno_string("socket", errno, err_r);
}
@@ -253,7 +253,7 @@ socket_connect(auto_file& fd, const socket_args& args, String& err_r)
int
socket_bind(auto_file& fd, const socket_args& args, String& err_r)
{
- fd.reset(socket(args.family, args.socktype, args.protocol));
+ fd.reset((int)socket(args.family, args.socktype, args.protocol));
if (fd.get() < 0) {
return errno_string("socket", errno, err_r);
}
@@ -300,7 +300,7 @@ int
socket_accept(int listen_fd, auto_file& fd, const socket_args& args,
sockaddr_storage& addr_r, socklen_t& addrlen_r, String& err_r)
{
- fd.reset(accept(listen_fd, reinterpret_cast<sockaddr *>(&addr_r),
+ fd.reset((int)accept(listen_fd, reinterpret_cast<sockaddr *>(&addr_r),
&addrlen_r));
if (fd.get() < 0) {
return errno_string("accept", errno, err_r);
diff --git a/storage/spider/mysql-test/spider/bg/r/basic_sql.result b/storage/spider/mysql-test/spider/bg/r/basic_sql.result
index 1e9fe78acea..94a09fc317b 100644
--- a/storage/spider/mysql-test/spider/bg/r/basic_sql.result
+++ b/storage/spider/mysql-test/spider/bg/r/basic_sql.result
@@ -9,22 +9,27 @@ child3_2
child3_3
drop and create databases
+connection master_1;
DROP DATABASE IF EXISTS auto_test_local;
CREATE DATABASE auto_test_local;
USE auto_test_local;
+connection child2_1;
DROP DATABASE IF EXISTS auto_test_remote;
CREATE DATABASE auto_test_remote;
USE auto_test_remote;
+connection child2_2;
DROP DATABASE IF EXISTS auto_test_remote2;
CREATE DATABASE auto_test_remote2;
USE auto_test_remote2;
test select 1
+connection master_1;
SELECT 1;
1
1
create table select test
+connection master_1;
DROP TABLE IF EXISTS tb_l;
CREATE TABLE tb_l (
a INT,
@@ -43,6 +48,7 @@ CREATE TABLE ta_l (
PRIMARY KEY(a)
) MASTER_1_ENGINE MASTER_1_CHARSET MASTER_1_COMMENT_2_1
SELECT a, b, c FROM tb_l
+connection master_1;
SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l ORDER BY a;
a b date_format(c, '%Y-%m-%d %H:%i:%s')
1 a 2008-08-01 10:21:39
@@ -52,6 +58,7 @@ a b date_format(c, '%Y-%m-%d %H:%i:%s')
5 c 2001-12-31 23:59:59
create table ignore select test
+connection master_1;
DROP TABLE IF EXISTS ta_l;
DROP TABLE IF EXISTS tb_l;
CREATE TABLE tb_l (
@@ -76,6 +83,7 @@ Warning 1062 Duplicate entry '2' for key 'PRIMARY'
Warning 1062 Duplicate entry '3' for key 'PRIMARY'
Warning 1062 Duplicate entry '4' for key 'PRIMARY'
Warning 1062 Duplicate entry '5' for key 'PRIMARY'
+connection master_1;
SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l ORDER BY a;
a b date_format(c, '%Y-%m-%d %H:%i:%s')
1 a 2008-08-01 10:21:39
@@ -85,11 +93,13 @@ a b date_format(c, '%Y-%m-%d %H:%i:%s')
5 c 2001-12-31 23:59:59
create table ignore select test
+connection master_1;
DROP TABLE IF EXISTS ta_l;
CREATE TABLE ta_l (
PRIMARY KEY(a)
) MASTER_1_ENGINE MASTER_1_CHARSET MASTER_1_COMMENT_2_1
REPLACE SELECT a, b, c FROM tb_l
+connection master_1;
SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l ORDER BY a;
a b date_format(c, '%Y-%m-%d %H:%i:%s')
1 f 2008-07-01 10:21:39
@@ -99,10 +109,12 @@ a b date_format(c, '%Y-%m-%d %H:%i:%s')
5 h 2001-10-31 23:59:59
create no index table
+connection master_1;
DROP TABLE IF EXISTS ta_l_no_idx;
CREATE TABLE ta_l_no_idx
MASTER_1_ENGINE MASTER_1_CHARSET MASTER_1_COMMENT2_2_1
SELECT a, b, c FROM tb_l
+connection master_1;
SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l_no_idx ORDER BY a;
a b date_format(c, '%Y-%m-%d %H:%i:%s')
1 f 2008-07-01 10:21:39
@@ -112,6 +124,7 @@ a b date_format(c, '%Y-%m-%d %H:%i:%s')
5 h 2001-10-31 23:59:59
select table
+connection master_1;
SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l ORDER BY a;
a b date_format(c, '%Y-%m-%d %H:%i:%s')
1 f 2008-07-01 10:21:39
@@ -121,6 +134,7 @@ a b date_format(c, '%Y-%m-%d %H:%i:%s')
5 h 2001-10-31 23:59:59
select table shared mode
+connection master_1;
SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l ORDER BY a
LOCK IN SHARE MODE;
a b date_format(c, '%Y-%m-%d %H:%i:%s')
@@ -131,6 +145,7 @@ a b date_format(c, '%Y-%m-%d %H:%i:%s')
5 h 2001-10-31 23:59:59
select table for update
+connection master_1;
SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l ORDER BY a
FOR UPDATE;
a b date_format(c, '%Y-%m-%d %H:%i:%s')
@@ -141,6 +156,7 @@ a b date_format(c, '%Y-%m-%d %H:%i:%s')
5 h 2001-10-31 23:59:59
select table join
+connection master_1;
SELECT a.a, a.b, date_format(b.c, '%Y-%m-%d %H:%i:%s') FROM ta_l a, tb_l b
WHERE a.a = b.a ORDER BY a.a;
a b date_format(b.c, '%Y-%m-%d %H:%i:%s')
@@ -151,6 +167,7 @@ a b date_format(b.c, '%Y-%m-%d %H:%i:%s')
5 h 2001-10-31 23:59:59
select table straight_join
+connection master_1;
SELECT STRAIGHT_JOIN a.a, a.b, date_format(b.c, '%Y-%m-%d %H:%i:%s')
FROM ta_l a, tb_l b WHERE a.a = b.a ORDER BY a.a;
a b date_format(b.c, '%Y-%m-%d %H:%i:%s')
@@ -161,6 +178,7 @@ a b date_format(b.c, '%Y-%m-%d %H:%i:%s')
5 h 2001-10-31 23:59:59
select sql_small_result
+connection master_1;
SELECT SQL_SMALL_RESULT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l
ORDER BY a;
a b date_format(c, '%Y-%m-%d %H:%i:%s')
@@ -171,6 +189,7 @@ a b date_format(c, '%Y-%m-%d %H:%i:%s')
5 h 2001-10-31 23:59:59
select sql_big_result
+connection master_1;
SELECT SQL_BIG_RESULT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l
ORDER BY a;
a b date_format(c, '%Y-%m-%d %H:%i:%s')
@@ -181,6 +200,7 @@ a b date_format(c, '%Y-%m-%d %H:%i:%s')
5 h 2001-10-31 23:59:59
select sql_buffer_result
+connection master_1;
SELECT SQL_BUFFER_RESULT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l
ORDER BY a;
a b date_format(c, '%Y-%m-%d %H:%i:%s')
@@ -191,6 +211,7 @@ a b date_format(c, '%Y-%m-%d %H:%i:%s')
5 h 2001-10-31 23:59:59
select sql_cache
+connection master_1;
SELECT SQL_CACHE a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l
ORDER BY a;
a b date_format(c, '%Y-%m-%d %H:%i:%s')
@@ -201,6 +222,7 @@ a b date_format(c, '%Y-%m-%d %H:%i:%s')
5 h 2001-10-31 23:59:59
select sql_no_cache
+connection master_1;
SELECT SQL_NO_CACHE a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l
ORDER BY a;
a b date_format(c, '%Y-%m-%d %H:%i:%s')
@@ -211,6 +233,7 @@ a b date_format(c, '%Y-%m-%d %H:%i:%s')
5 h 2001-10-31 23:59:59
select sql_calc_found_rows
+connection master_1;
SELECT SQL_CALC_FOUND_ROWS a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l
ORDER BY a LIMIT 4;
a b date_format(c, '%Y-%m-%d %H:%i:%s')
@@ -218,11 +241,13 @@ a b date_format(c, '%Y-%m-%d %H:%i:%s')
2 g 2000-02-01 00:00:00
3 j 2007-05-04 20:03:11
4 i 2003-10-30 05:01:03
+connection master_1;
SELECT found_rows();
found_rows()
5
select high_priority
+connection master_1;
SELECT HIGH_PRIORITY a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l
ORDER BY a;
a b date_format(c, '%Y-%m-%d %H:%i:%s')
@@ -233,6 +258,7 @@ a b date_format(c, '%Y-%m-%d %H:%i:%s')
5 h 2001-10-31 23:59:59
select distinct
+connection master_1;
SELECT DISTINCT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l
ORDER BY a;
a b date_format(c, '%Y-%m-%d %H:%i:%s')
@@ -243,11 +269,13 @@ a b date_format(c, '%Y-%m-%d %H:%i:%s')
5 h 2001-10-31 23:59:59
select count
+connection master_1;
SELECT count(*) FROM ta_l ORDER BY a;
count(*)
5
select table join not use index
+connection master_1;
SELECT a.a, a.b, date_format(a.c, '%Y-%m-%d %H:%i:%s') FROM tb_l a WHERE
EXISTS (SELECT * FROM ta_l b WHERE b.b = a.b) ORDER BY a.a;
a b date_format(a.c, '%Y-%m-%d %H:%i:%s')
@@ -258,27 +286,35 @@ a b date_format(a.c, '%Y-%m-%d %H:%i:%s')
5 h 2001-10-31 23:59:59
select using pushdown
+connection master_1;
SELECT a.a, a.b, date_format(a.c, '%Y-%m-%d %H:%i:%s') FROM ta_l a WHERE
a.b = 'g' ORDER BY a.a;
a b date_format(a.c, '%Y-%m-%d %H:%i:%s')
2 g 2000-02-01 00:00:00
select using index and pushdown
+connection master_1;
SELECT a.a, a.b, date_format(a.c, '%Y-%m-%d %H:%i:%s') FROM ta_l a WHERE
a.a > 0 AND a.b = 'g' ORDER BY a.a;
a b date_format(a.c, '%Y-%m-%d %H:%i:%s')
2 g 2000-02-01 00:00:00
insert
+connection master_1;
TRUNCATE TABLE ta_l;
+connection master_1;
INSERT INTO ta_l (a, b, c) VALUES (2, 'e', '2008-01-01 23:59:59');
+connection master_1;
SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l ORDER BY a;
a b date_format(c, '%Y-%m-%d %H:%i:%s')
2 e 2008-01-01 23:59:59
insert select
+connection master_1;
TRUNCATE TABLE ta_l;
+connection master_1;
INSERT INTO ta_l (a, b, c) SELECT a, b, c FROM tb_l;
+connection master_1;
SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l ORDER BY a;
a b date_format(c, '%Y-%m-%d %H:%i:%s')
1 f 2008-07-01 10:21:39
@@ -288,61 +324,82 @@ a b date_format(c, '%Y-%m-%d %H:%i:%s')
5 h 2001-10-31 23:59:59
insert select a
+connection master_1;
TRUNCATE TABLE ta_l;
+connection master_1;
INSERT INTO ta_l (a, b, c) VALUES ((SELECT a FROM tb_l ORDER BY a LIMIT 1),
'e', '2008-01-01 23:59:59');
+connection master_1;
SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l ORDER BY a;
a b date_format(c, '%Y-%m-%d %H:%i:%s')
1 e 2008-01-01 23:59:59
insert low_priority
+connection master_1;
TRUNCATE TABLE ta_l;
+connection master_1;
INSERT LOW_PRIORITY INTO ta_l (a, b, c) values (2, 'e', '2008-01-01 23:59:59');
+connection master_1;
SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l ORDER BY a;
a b date_format(c, '%Y-%m-%d %H:%i:%s')
2 e 2008-01-01 23:59:59
insert high_priority
+connection master_1;
TRUNCATE TABLE ta_l;
+connection master_1;
INSERT HIGH_PRIORITY INTO ta_l (a, b, c) VALUES (2, 'e',
'2008-01-01 23:59:59');
+connection master_1;
SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l ORDER BY a;
a b date_format(c, '%Y-%m-%d %H:%i:%s')
2 e 2008-01-01 23:59:59
insert ignore
+connection master_1;
INSERT IGNORE INTO ta_l (a, b, c) VALUES (2, 'd', '2009-02-02 01:01:01');
Warnings:
Warning 1062 Duplicate entry '2' for key 'PRIMARY'
+connection master_1;
SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l ORDER BY a;
a b date_format(c, '%Y-%m-%d %H:%i:%s')
2 e 2008-01-01 23:59:59
insert update (insert)
+connection master_1;
TRUNCATE TABLE ta_l;
+connection master_1;
INSERT INTO ta_l (a, b, c) VALUES (2, 'e', '2008-01-01 23:59:59') ON DUPLICATE
KEY UPDATE b = 'f', c = '2005-08-08 11:11:11';
+connection master_1;
SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l ORDER BY a;
a b date_format(c, '%Y-%m-%d %H:%i:%s')
2 e 2008-01-01 23:59:59
insert update (update)
+connection master_1;
INSERT INTO ta_l (a, b, c) VALUES (2, 'e', '2008-01-01 23:59:59') ON DUPLICATE
KEY UPDATE b = 'f', c = '2005-08-08 11:11:11';
+connection master_1;
SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l ORDER BY a;
a b date_format(c, '%Y-%m-%d %H:%i:%s')
2 f 2005-08-08 11:11:11
replace
+connection master_1;
TRUNCATE TABLE ta_l;
INSERT INTO ta_l (a, b, c) VALUES (2, 'e', '2008-01-01 23:59:59');
+connection master_1;
REPLACE INTO ta_l (a, b, c) VALUES (2, 'f', '2008-02-02 02:02:02');
+connection master_1;
SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l ORDER BY a;
a b date_format(c, '%Y-%m-%d %H:%i:%s')
2 f 2008-02-02 02:02:02
replace select
+connection master_1;
REPLACE INTO ta_l (a, b, c) SELECT a, b, c FROM tb_l;
+connection master_1;
SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l ORDER BY a;
a b date_format(c, '%Y-%m-%d %H:%i:%s')
1 f 2008-07-01 10:21:39
@@ -352,8 +409,10 @@ a b date_format(c, '%Y-%m-%d %H:%i:%s')
5 h 2001-10-31 23:59:59
replace select a
+connection master_1;
REPLACE INTO ta_l (a, b, c) VALUES ((SELECT a FROM tb_l ORDER BY a LIMIT 1),
'e', '2008-01-01 23:59:59');
+connection master_1;
SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l ORDER BY a;
a b date_format(c, '%Y-%m-%d %H:%i:%s')
1 e 2008-01-01 23:59:59
@@ -363,8 +422,10 @@ a b date_format(c, '%Y-%m-%d %H:%i:%s')
5 h 2001-10-31 23:59:59
replace low_priority
+connection master_1;
REPLACE LOW_PRIORITY INTO ta_l (a, b, c) VALUES (3, 'g',
'2009-03-03 03:03:03');
+connection master_1;
SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l ORDER BY a;
a b date_format(c, '%Y-%m-%d %H:%i:%s')
1 e 2008-01-01 23:59:59
@@ -377,66 +438,84 @@ update
TRUNCATE TABLE ta_l;
INSERT INTO ta_l (a, b, c) VALUES (1, 'e', '2008-01-01 23:59:59'),
(2, 'e', '2008-01-01 23:59:59');
+connection master_1;
UPDATE ta_l SET b = 'f', c = '2008-02-02 02:02:02' WHERE a = 2;
+connection master_1;
SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l ORDER BY a;
a b date_format(c, '%Y-%m-%d %H:%i:%s')
1 e 2008-01-01 23:59:59
2 f 2008-02-02 02:02:02
update select
+connection master_1;
UPDATE ta_l SET b = 'g', c = '2009-03-03 03:03:03' WHERE a IN (SELECT a FROM
tb_l);
+connection master_1;
SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l ORDER BY a;
a b date_format(c, '%Y-%m-%d %H:%i:%s')
1 g 2009-03-03 03:03:03
2 g 2009-03-03 03:03:03
update select a
+connection master_1;
UPDATE ta_l SET b = 'h', c = '2010-04-04 04:04:04' WHERE a = (SELECT a FROM
tb_l ORDER BY a LIMIT 1);
+connection master_1;
SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l ORDER BY a;
a b date_format(c, '%Y-%m-%d %H:%i:%s')
1 h 2010-04-04 04:04:04
2 g 2009-03-03 03:03:03
update join
+connection master_1;
UPDATE ta_l a, tb_l b SET a.b = b.b, a.c = b.c WHERE a.a = b.a;
+connection master_1;
SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l ORDER BY a;
a b date_format(c, '%Y-%m-%d %H:%i:%s')
1 f 2008-07-01 10:21:39
2 g 2000-02-01 00:00:00
update join a
+connection master_1;
UPDATE ta_l a, tb_l b SET a.b = 'g', a.c = '2009-03-03 03:03:03' WHERE
a.a = b.a;
+connection master_1;
SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l ORDER BY a;
a b date_format(c, '%Y-%m-%d %H:%i:%s')
1 g 2009-03-03 03:03:03
2 g 2009-03-03 03:03:03
update low_priority
+connection master_1;
UPDATE LOW_PRIORITY ta_l SET b = 'f', c = '2008-02-02 02:02:02' WHERE a = 2;
+connection master_1;
SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l ORDER BY a;
a b date_format(c, '%Y-%m-%d %H:%i:%s')
1 g 2009-03-03 03:03:03
2 f 2008-02-02 02:02:02
update ignore
+connection master_1;
UPDATE IGNORE ta_l SET a = 1, b = 'g', c = '2009-03-03 03:03:03' WHERE a = 2;
+connection master_1;
SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l ORDER BY a;
a b date_format(c, '%Y-%m-%d %H:%i:%s')
1 g 2009-03-03 03:03:03
2 f 2008-02-02 02:02:02
update pushdown
+connection master_1;
update ta_l set b = 'j', c = '2009-03-03 03:03:03' where b = 'f';
+connection master_1;
SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l ORDER BY a;
a b date_format(c, '%Y-%m-%d %H:%i:%s')
1 g 2009-03-03 03:03:03
2 j 2009-03-03 03:03:03
update index pushdown
+connection master_1;
UPDATE ta_l SET b = 'g', c = '2009-03-03 03:03:03' WHERE a > 0 AND b = 'j';
+connection master_1;
SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l ORDER BY a;
a b date_format(c, '%Y-%m-%d %H:%i:%s')
1 g 2009-03-03 03:03:03
@@ -450,7 +529,9 @@ INSERT INTO ta_l (a, b, c) VALUES (1, 'e', '2008-01-01 23:59:59'),
(6, 'e', '2008-01-01 23:59:59'), (7, 'e', '2008-01-01 23:59:59'),
(8, 'e', '2008-01-01 23:59:59'), (9, 'e', '2008-01-01 23:59:59'),
(10, 'j', '2008-01-01 23:59:59');
+connection master_1;
DELETE FROM ta_l WHERE a = 2;
+connection master_1;
SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l ORDER BY a;
a b date_format(c, '%Y-%m-%d %H:%i:%s')
1 e 2008-01-01 23:59:59
@@ -464,7 +545,9 @@ a b date_format(c, '%Y-%m-%d %H:%i:%s')
10 j 2008-01-01 23:59:59
delete all
+connection master_1;
DELETE FROM ta_l;
+connection master_1;
SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l ORDER BY a;
a b date_format(c, '%Y-%m-%d %H:%i:%s')
@@ -476,7 +559,9 @@ INSERT INTO ta_l (a, b, c) VALUES (1, 'e', '2008-01-01 23:59:59'),
(6, 'e', '2008-01-01 23:59:59'), (7, 'e', '2008-01-01 23:59:59'),
(8, 'e', '2008-01-01 23:59:59'), (9, 'e', '2008-01-01 23:59:59'),
(10, 'j', '2008-01-01 23:59:59');
+connection master_1;
DELETE FROM ta_l WHERE a IN (SELECT a FROM tb_l);
+connection master_1;
SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l ORDER BY a;
a b date_format(c, '%Y-%m-%d %H:%i:%s')
6 e 2008-01-01 23:59:59
@@ -493,7 +578,9 @@ INSERT INTO ta_l (a, b, c) VALUES (1, 'e', '2008-01-01 23:59:59'),
(6, 'e', '2008-01-01 23:59:59'), (7, 'e', '2008-01-01 23:59:59'),
(8, 'e', '2008-01-01 23:59:59'), (9, 'e', '2008-01-01 23:59:59'),
(10, 'j', '2008-01-01 23:59:59');
+connection master_1;
DELETE FROM ta_l WHERE a = (SELECT a FROM tb_l ORDER BY a LIMIT 1);
+connection master_1;
SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l ORDER BY a;
a b date_format(c, '%Y-%m-%d %H:%i:%s')
2 e 2008-01-01 23:59:59
@@ -514,7 +601,9 @@ INSERT INTO ta_l (a, b, c) VALUES (1, 'e', '2008-01-01 23:59:59'),
(6, 'e', '2008-01-01 23:59:59'), (7, 'e', '2008-01-01 23:59:59'),
(8, 'e', '2008-01-01 23:59:59'), (9, 'e', '2008-01-01 23:59:59'),
(10, 'j', '2008-01-01 23:59:59');
+connection master_1;
DELETE a FROM ta_l a, (SELECT a FROM tb_l ORDER BY a) b WHERE a.a = b.a;
+connection master_1;
SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l ORDER BY a;
a b date_format(c, '%Y-%m-%d %H:%i:%s')
6 e 2008-01-01 23:59:59
@@ -531,7 +620,9 @@ INSERT INTO ta_l (a, b, c) VALUES (1, 'e', '2008-01-01 23:59:59'),
(6, 'e', '2008-01-01 23:59:59'), (7, 'e', '2008-01-01 23:59:59'),
(8, 'e', '2008-01-01 23:59:59'), (9, 'e', '2008-01-01 23:59:59'),
(10, 'j', '2008-01-01 23:59:59');
+connection master_1;
DELETE LOW_PRIORITY FROM ta_l WHERE a = 2;
+connection master_1;
SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l ORDER BY a;
a b date_format(c, '%Y-%m-%d %H:%i:%s')
1 e 2008-01-01 23:59:59
@@ -552,7 +643,9 @@ INSERT INTO ta_l (a, b, c) VALUES (1, 'e', '2008-01-01 23:59:59'),
(6, 'e', '2008-01-01 23:59:59'), (7, 'e', '2008-01-01 23:59:59'),
(8, 'e', '2008-01-01 23:59:59'), (9, 'e', '2008-01-01 23:59:59'),
(10, 'j', '2008-01-01 23:59:59');
+connection master_1;
DELETE IGNORE FROM ta_l WHERE a = 2;
+connection master_1;
SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l ORDER BY a;
a b date_format(c, '%Y-%m-%d %H:%i:%s')
1 e 2008-01-01 23:59:59
@@ -573,7 +666,9 @@ INSERT INTO ta_l (a, b, c) VALUES (1, 'e', '2008-01-01 23:59:59'),
(6, 'e', '2008-01-01 23:59:59'), (7, 'e', '2008-01-01 23:59:59'),
(8, 'e', '2008-01-01 23:59:59'), (9, 'e', '2008-01-01 23:59:59'),
(10, 'j', '2008-01-01 23:59:59');
+connection master_1;
DELETE QUICK FROM ta_l WHERE a = 2;
+connection master_1;
SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l ORDER BY a;
a b date_format(c, '%Y-%m-%d %H:%i:%s')
1 e 2008-01-01 23:59:59
@@ -594,7 +689,9 @@ INSERT INTO ta_l (a, b, c) VALUES (1, 'e', '2008-01-01 23:59:59'),
(6, 'e', '2008-01-01 23:59:59'), (7, 'e', '2008-01-01 23:59:59'),
(8, 'e', '2008-01-01 23:59:59'), (9, 'e', '2008-01-01 23:59:59'),
(10, 'j', '2008-01-01 23:59:59');
+connection master_1;
DELETE FROM ta_l WHERE b = 'e';
+connection master_1;
SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l ORDER BY a;
a b date_format(c, '%Y-%m-%d %H:%i:%s')
10 j 2008-01-01 23:59:59
@@ -607,19 +704,26 @@ INSERT INTO ta_l (a, b, c) VALUES (1, 'e', '2008-01-01 23:59:59'),
(6, 'e', '2008-01-01 23:59:59'), (7, 'e', '2008-01-01 23:59:59'),
(8, 'e', '2008-01-01 23:59:59'), (9, 'e', '2008-01-01 23:59:59'),
(10, 'j', '2008-01-01 23:59:59');
+connection master_1;
DELETE FROM ta_l WHERE a > 0 AND b = 'e';
+connection master_1;
SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l ORDER BY a;
a b date_format(c, '%Y-%m-%d %H:%i:%s')
10 j 2008-01-01 23:59:59
truncate
+connection master_1;
TRUNCATE TABLE ta_l;
+connection master_1;
SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l ORDER BY a;
a b date_format(c, '%Y-%m-%d %H:%i:%s')
deinit
+connection master_1;
DROP DATABASE IF EXISTS auto_test_local;
+connection child2_1;
DROP DATABASE IF EXISTS auto_test_remote;
+connection child2_2;
DROP DATABASE IF EXISTS auto_test_remote2;
for master_1
for child2
diff --git a/storage/spider/mysql-test/spider/bg/r/basic_sql_part.result b/storage/spider/mysql-test/spider/bg/r/basic_sql_part.result
index 9e1201c17c9..0f4029404a7 100644
--- a/storage/spider/mysql-test/spider/bg/r/basic_sql_part.result
+++ b/storage/spider/mysql-test/spider/bg/r/basic_sql_part.result
@@ -9,20 +9,25 @@ child3_2
child3_3
drop and create databases
+connection master_1;
DROP DATABASE IF EXISTS auto_test_local;
CREATE DATABASE auto_test_local;
USE auto_test_local;
+connection child2_1;
DROP DATABASE IF EXISTS auto_test_remote;
CREATE DATABASE auto_test_remote;
USE auto_test_remote;
+connection child2_2;
DROP DATABASE IF EXISTS auto_test_remote2;
CREATE DATABASE auto_test_remote2;
USE auto_test_remote2;
test select 1
+connection master_1;
SELECT 1;
1
1
+connection master_1;
DROP TABLE IF EXISTS tb_l;
CREATE TABLE tb_l (
a INT,
@@ -38,10 +43,12 @@ INSERT INTO tb_l (a, b, c) VALUES
(5, 'h', '2001-10-31 23:59:59');
create table with partition and select test
+connection master_1;
CREATE TABLE ta_l2 (
PRIMARY KEY(a)
) MASTER_1_ENGINE MASTER_1_COMMENT_P_2_1
SELECT a, b, c FROM tb_l
+connection master_1;
SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l2 ORDER BY a;
a b date_format(c, '%Y-%m-%d %H:%i:%s')
1 f 2008-07-01 10:21:39
@@ -51,19 +58,23 @@ a b date_format(c, '%Y-%m-%d %H:%i:%s')
5 h 2001-10-31 23:59:59
select partition using pushdown
+connection master_1;
SELECT a.a, a.b, date_format(a.c, '%Y-%m-%d %H:%i:%s') FROM ta_l2 a WHERE
a.b = 'g' ORDER BY a.a;
a b date_format(a.c, '%Y-%m-%d %H:%i:%s')
2 g 2000-02-01 00:00:00
select partition using index pushdown
+connection master_1;
SELECT a.a, a.b, date_format(a.c, '%Y-%m-%d %H:%i:%s') FROM ta_l2 a WHERE
a.a > 0 AND a.b = 'g' ORDER BY a.a;
a b date_format(a.c, '%Y-%m-%d %H:%i:%s')
2 g 2000-02-01 00:00:00
update partition pushdown
+connection master_1;
UPDATE ta_l2 SET b = 'e', c = '2009-03-03 03:03:03' WHERE b = 'j';
+connection master_1;
SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l2 ORDER BY a;
a b date_format(c, '%Y-%m-%d %H:%i:%s')
1 f 2008-07-01 10:21:39
@@ -73,7 +84,9 @@ a b date_format(c, '%Y-%m-%d %H:%i:%s')
5 h 2001-10-31 23:59:59
update partition index pushdown
+connection master_1;
UPDATE ta_l2 SET b = 'j', c = '2009-03-03 03:03:03' WHERE a > 0 AND b = 'e';
+connection master_1;
SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l2 ORDER BY a;
a b date_format(c, '%Y-%m-%d %H:%i:%s')
1 f 2008-07-01 10:21:39
@@ -85,7 +98,9 @@ a b date_format(c, '%Y-%m-%d %H:%i:%s')
delete partition pushdown
TRUNCATE TABLE ta_l2;
INSERT INTO ta_l2 SELECT a, b, c FROM tb_l;
+connection master_1;
DELETE FROM ta_l2 WHERE b = 'g';
+connection master_1;
SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l2 ORDER BY a;
a b date_format(c, '%Y-%m-%d %H:%i:%s')
1 f 2008-07-01 10:21:39
@@ -96,7 +111,9 @@ a b date_format(c, '%Y-%m-%d %H:%i:%s')
delete partition index pushdown
TRUNCATE TABLE ta_l2;
INSERT INTO ta_l2 SELECT a, b, c FROM tb_l;
+connection master_1;
DELETE FROM ta_l2 WHERE a > 0 AND b = 'g';
+connection master_1;
SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l2 ORDER BY a;
a b date_format(c, '%Y-%m-%d %H:%i:%s')
1 f 2008-07-01 10:21:39
@@ -105,8 +122,11 @@ a b date_format(c, '%Y-%m-%d %H:%i:%s')
5 h 2001-10-31 23:59:59
deinit
+connection master_1;
DROP DATABASE IF EXISTS auto_test_local;
+connection child2_1;
DROP DATABASE IF EXISTS auto_test_remote;
+connection child2_2;
DROP DATABASE IF EXISTS auto_test_remote2;
for master_1
for child2
diff --git a/storage/spider/mysql-test/spider/bg/r/direct_aggregate.result b/storage/spider/mysql-test/spider/bg/r/direct_aggregate.result
index 3a9c7be3076..9a8660ba79e 100644
--- a/storage/spider/mysql-test/spider/bg/r/direct_aggregate.result
+++ b/storage/spider/mysql-test/spider/bg/r/direct_aggregate.result
@@ -9,22 +9,27 @@ child3_2
child3_3
drop and create databases
+connection master_1;
DROP DATABASE IF EXISTS auto_test_local;
CREATE DATABASE auto_test_local;
USE auto_test_local;
+connection child2_1;
DROP DATABASE IF EXISTS auto_test_remote;
CREATE DATABASE auto_test_remote;
USE auto_test_remote;
+connection child2_2;
DROP DATABASE IF EXISTS auto_test_remote2;
CREATE DATABASE auto_test_remote2;
USE auto_test_remote2;
test select 1
+connection master_1;
SELECT 1;
1
1
create table select test
+connection master_1;
DROP TABLE IF EXISTS ta_l;
CREATE TABLE ta_l (
a INT,
@@ -40,6 +45,7 @@ INSERT INTO ta_l (a, b, c) VALUES
(5, 'c', '2001-12-31 23:59:59');
direct_aggregating test
+connection master_1;
SHOW STATUS LIKE 'Spider_direct_aggregate';
Variable_name Value
Spider_direct_aggregate 0
@@ -75,8 +81,11 @@ Variable_name Value
Spider_direct_aggregate 0
deinit
+connection master_1;
DROP DATABASE IF EXISTS auto_test_local;
+connection child2_1;
DROP DATABASE IF EXISTS auto_test_remote;
+connection child2_2;
DROP DATABASE IF EXISTS auto_test_remote2;
for master_1
for child2
diff --git a/storage/spider/mysql-test/spider/bg/r/direct_aggregate_part.result b/storage/spider/mysql-test/spider/bg/r/direct_aggregate_part.result
index bbdc943601b..760b39e16d5 100644
--- a/storage/spider/mysql-test/spider/bg/r/direct_aggregate_part.result
+++ b/storage/spider/mysql-test/spider/bg/r/direct_aggregate_part.result
@@ -9,22 +9,27 @@ child3_2
child3_3
drop and create databases
+connection master_1;
DROP DATABASE IF EXISTS auto_test_local;
CREATE DATABASE auto_test_local;
USE auto_test_local;
+connection child2_1;
DROP DATABASE IF EXISTS auto_test_remote;
CREATE DATABASE auto_test_remote;
USE auto_test_remote;
+connection child2_2;
DROP DATABASE IF EXISTS auto_test_remote2;
CREATE DATABASE auto_test_remote2;
USE auto_test_remote2;
test select 1
+connection master_1;
SELECT 1;
1
1
with partition test
+connection master_1;
CREATE TABLE ta_l2 (
a INT,
b CHAR(1),
@@ -66,8 +71,11 @@ Variable_name Value
Spider_direct_aggregate 0
deinit
+connection master_1;
DROP DATABASE IF EXISTS auto_test_local;
+connection child2_1;
DROP DATABASE IF EXISTS auto_test_remote;
+connection child2_2;
DROP DATABASE IF EXISTS auto_test_remote2;
for master_1
for child2
diff --git a/storage/spider/mysql-test/spider/bg/r/direct_update.result b/storage/spider/mysql-test/spider/bg/r/direct_update.result
index 517491253e9..74dae7aec2e 100644
--- a/storage/spider/mysql-test/spider/bg/r/direct_update.result
+++ b/storage/spider/mysql-test/spider/bg/r/direct_update.result
@@ -9,22 +9,27 @@ child3_2
child3_3
drop and create databases
+connection master_1;
DROP DATABASE IF EXISTS auto_test_local;
CREATE DATABASE auto_test_local;
USE auto_test_local;
+connection child2_1;
DROP DATABASE IF EXISTS auto_test_remote;
CREATE DATABASE auto_test_remote;
USE auto_test_remote;
+connection child2_2;
DROP DATABASE IF EXISTS auto_test_remote2;
CREATE DATABASE auto_test_remote2;
USE auto_test_remote2;
test select 1
+connection master_1;
SELECT 1;
1
1
create table select test
+connection master_1;
DROP TABLE IF EXISTS ta_l;
CREATE TABLE ta_l (
a INT,
@@ -40,6 +45,7 @@ INSERT INTO ta_l (a, b, c) VALUES
(5, 'c', '2001-12-31 23:59:59');
direct_updating test
+connection master_1;
SHOW STATUS LIKE 'Spider_direct_update';
Variable_name Value
SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l ORDER BY a;
@@ -122,8 +128,11 @@ a b date_format(c, '%Y-%m-%d %H:%i:%s')
4 d 2003-12-01 05:01:03
deinit
+connection master_1;
DROP DATABASE IF EXISTS auto_test_local;
+connection child2_1;
DROP DATABASE IF EXISTS auto_test_remote;
+connection child2_2;
DROP DATABASE IF EXISTS auto_test_remote2;
for master_1
for child2
diff --git a/storage/spider/mysql-test/spider/bg/r/direct_update_part.result b/storage/spider/mysql-test/spider/bg/r/direct_update_part.result
index bd8f1b89f69..6db7c01f563 100644
--- a/storage/spider/mysql-test/spider/bg/r/direct_update_part.result
+++ b/storage/spider/mysql-test/spider/bg/r/direct_update_part.result
@@ -9,22 +9,27 @@ child3_2
child3_3
drop and create databases
+connection master_1;
DROP DATABASE IF EXISTS auto_test_local;
CREATE DATABASE auto_test_local;
USE auto_test_local;
+connection child2_1;
DROP DATABASE IF EXISTS auto_test_remote;
CREATE DATABASE auto_test_remote;
USE auto_test_remote;
+connection child2_2;
DROP DATABASE IF EXISTS auto_test_remote2;
CREATE DATABASE auto_test_remote2;
USE auto_test_remote2;
test select 1
+connection master_1;
SELECT 1;
1
1
with partition test
+connection master_1;
CREATE TABLE ta_l2 (
a INT,
b CHAR(1),
@@ -113,8 +118,11 @@ a b date_format(c, '%Y-%m-%d %H:%i:%s')
4 d 2003-12-01 05:01:03
deinit
+connection master_1;
DROP DATABASE IF EXISTS auto_test_local;
+connection child2_1;
DROP DATABASE IF EXISTS auto_test_remote;
+connection child2_2;
DROP DATABASE IF EXISTS auto_test_remote2;
for master_1
for child2
diff --git a/storage/spider/mysql-test/spider/bg/r/function.result b/storage/spider/mysql-test/spider/bg/r/function.result
index 764c774514b..c088a8a9541 100644
--- a/storage/spider/mysql-test/spider/bg/r/function.result
+++ b/storage/spider/mysql-test/spider/bg/r/function.result
@@ -9,22 +9,27 @@ child3_2
child3_3
drop and create databases
+connection master_1;
DROP DATABASE IF EXISTS auto_test_local;
CREATE DATABASE auto_test_local;
USE auto_test_local;
+connection child2_1;
DROP DATABASE IF EXISTS auto_test_remote;
CREATE DATABASE auto_test_remote;
USE auto_test_remote;
+connection child2_2;
DROP DATABASE IF EXISTS auto_test_remote2;
CREATE DATABASE auto_test_remote2;
USE auto_test_remote2;
test select 1
+connection master_1;
SELECT 1;
1
1
in()
+connection master_1;
CREATE TABLE t1 (
a VARCHAR(255),
PRIMARY KEY(a)
@@ -41,12 +46,14 @@ insert into t1 select a + 128 from t1;
insert into t1 select a + 256 from t1;
insert into t1 select a + 512 from t1;
flush tables;
+connection master_1;
select a from t1 where a in ('15', '120');
a
120
15
date_sub()
+connection master_1;
DROP TABLE IF EXISTS ta_l;
CREATE TABLE ta_l (
a INT,
@@ -124,6 +131,7 @@ a b date_format(c, '%Y-%m-%d %H:%i:%s')
4 d 2003-02-03 06:00:03
5 c 2001-03-07 00:58:59
UPDATE ta_l SET c = DATE_ADD(c, INTERVAL 1 SECOND);
+connection master_1;
SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l ORDER BY a;
a b date_format(c, '%Y-%m-%d %H:%i:%s')
1 a 2007-10-07 11:20:40
@@ -133,8 +141,11 @@ a b date_format(c, '%Y-%m-%d %H:%i:%s')
5 c 2001-03-07 00:59:00
deinit
+connection master_1;
DROP DATABASE IF EXISTS auto_test_local;
+connection child2_1;
DROP DATABASE IF EXISTS auto_test_remote;
+connection child2_2;
DROP DATABASE IF EXISTS auto_test_remote2;
for master_1
for child2
diff --git a/storage/spider/mysql-test/spider/bg/r/ha.result b/storage/spider/mysql-test/spider/bg/r/ha.result
index 9837faebd87..f8833c229ef 100644
--- a/storage/spider/mysql-test/spider/bg/r/ha.result
+++ b/storage/spider/mysql-test/spider/bg/r/ha.result
@@ -18,34 +18,43 @@ child3_2
child3_3
drop and create databases
+connection master_1;
DROP DATABASE IF EXISTS auto_test_local;
CREATE DATABASE auto_test_local;
USE auto_test_local;
+connection child2_1;
DROP DATABASE IF EXISTS auto_test_remote;
CREATE DATABASE auto_test_remote;
USE auto_test_remote;
+connection child2_2;
DROP DATABASE IF EXISTS auto_test_remote2;
CREATE DATABASE auto_test_remote2;
USE auto_test_remote2;
+connection child2_3;
DROP DATABASE IF EXISTS auto_test_remote3;
CREATE DATABASE auto_test_remote3;
USE auto_test_remote3;
+connection child3_1;
DROP DATABASE IF EXISTS auto_test_local;
CREATE DATABASE auto_test_local;
USE auto_test_local;
+connection child3_2;
DROP DATABASE IF EXISTS auto_test_local;
CREATE DATABASE auto_test_local;
USE auto_test_local;
+connection child3_3;
DROP DATABASE IF EXISTS auto_test_local;
CREATE DATABASE auto_test_local;
USE auto_test_local;
test select 1
+connection master_1;
SELECT 1;
1
1
create table test
+connection master_1;
DROP TABLE IF EXISTS ta_l;
CREATE TABLE ta_l (
a INT,
@@ -61,6 +70,7 @@ INSERT INTO ta_l (a, b, c) VALUES
(5, 'c', '2001-12-31 23:59:59');
select test
+connection master_1;
SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l ORDER BY a;
a b date_format(c, '%Y-%m-%d %H:%i:%s')
1 a 2008-08-01 10:21:39
@@ -70,6 +80,7 @@ a b date_format(c, '%Y-%m-%d %H:%i:%s')
5 c 2001-12-31 23:59:59
fail-over test
+connection master_1;
SHOW STATUS LIKE 'Spider_mon_table_cache_version%';
Variable_name Value
Spider_mon_table_cache_version 0
@@ -101,6 +112,7 @@ a b date_format(c, '%Y-%m-%d %H:%i:%s')
6 e 2011-05-05 20:04:05
recovery test
+connection master_1;
ALTER TABLE ta_l
CONNECTION='host "localhost", user "root", password "",
msi "5", mkd "2",
@@ -113,6 +125,7 @@ auto_test_local ta_l 1 2
SELECT spider_copy_tables('ta_l', '0', '1');
spider_copy_tables('ta_l', '0', '1')
1
+connection master_1;
ALTER TABLE ta_l
CONNECTION='host "localhost", user "root", password "",
msi "5", mkd "2",
@@ -134,12 +147,14 @@ a b date_format(c, '%Y-%m-%d %H:%i:%s')
6 e 2011-05-05 20:04:05
8 g 2011-05-05 21:33:30
DROP TABLE ta_l;
+connection master_1;
SELECT spider_flush_table_mon_cache();
spider_flush_table_mon_cache()
1
active standby test
create table test
+connection master_1;
DROP TABLE IF EXISTS ta_l;
CREATE TABLE ta_l (
a INT,
@@ -155,6 +170,7 @@ INSERT INTO ta_l (a, b, c) VALUES
(5, 'c', '2001-12-31 23:59:59');
select test
+connection master_1;
SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l ORDER BY a;
a b date_format(c, '%Y-%m-%d %H:%i:%s')
1 a 2008-08-01 10:21:39
@@ -164,6 +180,7 @@ a b date_format(c, '%Y-%m-%d %H:%i:%s')
5 c 2001-12-31 23:59:59
fail-over test
+connection master_1;
SHOW STATUS LIKE 'Spider_mon_table_cache_version%';
Variable_name Value
Spider_mon_table_cache_version 1
@@ -191,6 +208,7 @@ a b date_format(c, '%Y-%m-%d %H:%i:%s')
6 e 2011-05-05 20:04:05
recovery test
+connection master_1;
ALTER TABLE ta_l
CONNECTION='host "localhost", user "root", password "",
msi "5", mkd "2", alc "1",
@@ -206,17 +224,25 @@ SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l ORDER BY a;
a b date_format(c, '%Y-%m-%d %H:%i:%s')
8 g 2011-05-05 21:33:30
DROP TABLE ta_l;
+connection master_1;
SELECT spider_flush_table_mon_cache();
spider_flush_table_mon_cache()
1
deinit
+connection master_1;
DROP DATABASE IF EXISTS auto_test_local;
+connection child2_1;
DROP DATABASE IF EXISTS auto_test_remote;
+connection child2_2;
DROP DATABASE IF EXISTS auto_test_remote2;
+connection child2_3;
DROP DATABASE IF EXISTS auto_test_remote3;
+connection child3_1;
DROP DATABASE IF EXISTS auto_test_local;
+connection child3_2;
DROP DATABASE IF EXISTS auto_test_local;
+connection child3_3;
DROP DATABASE IF EXISTS auto_test_local;
for master_1
for child2
diff --git a/storage/spider/mysql-test/spider/bg/r/ha_part.result b/storage/spider/mysql-test/spider/bg/r/ha_part.result
index 8c0300ba5a5..315f37298bc 100644
--- a/storage/spider/mysql-test/spider/bg/r/ha_part.result
+++ b/storage/spider/mysql-test/spider/bg/r/ha_part.result
@@ -18,34 +18,43 @@ child3_2
child3_3
drop and create databases
+connection master_1;
DROP DATABASE IF EXISTS auto_test_local;
CREATE DATABASE auto_test_local;
USE auto_test_local;
+connection child2_1;
DROP DATABASE IF EXISTS auto_test_remote;
CREATE DATABASE auto_test_remote;
USE auto_test_remote;
+connection child2_2;
DROP DATABASE IF EXISTS auto_test_remote2;
CREATE DATABASE auto_test_remote2;
USE auto_test_remote2;
+connection child2_3;
DROP DATABASE IF EXISTS auto_test_remote3;
CREATE DATABASE auto_test_remote3;
USE auto_test_remote3;
+connection child3_1;
DROP DATABASE IF EXISTS auto_test_local;
CREATE DATABASE auto_test_local;
USE auto_test_local;
+connection child3_2;
DROP DATABASE IF EXISTS auto_test_local;
CREATE DATABASE auto_test_local;
USE auto_test_local;
+connection child3_3;
DROP DATABASE IF EXISTS auto_test_local;
CREATE DATABASE auto_test_local;
USE auto_test_local;
test select 1
+connection master_1;
SELECT 1;
1
1
create table with partition test
+connection master_1;
DROP TABLE IF EXISTS ta_l2;
CREATE TABLE ta_l2 (
a INT,
@@ -61,6 +70,7 @@ INSERT INTO ta_l2 (a, b, c) VALUES
(5, 'c', '2001-12-31 23:59:59');
select test
+connection master_1;
SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l2 ORDER BY a;
a b date_format(c, '%Y-%m-%d %H:%i:%s')
1 a 2008-08-01 10:21:39
@@ -70,6 +80,7 @@ a b date_format(c, '%Y-%m-%d %H:%i:%s')
5 c 2001-12-31 23:59:59
fail-over test
+connection master_1;
SHOW STATUS LIKE 'Spider_mon_table_cache_version%';
Variable_name Value
Spider_mon_table_cache_version 0
@@ -103,6 +114,7 @@ a b date_format(c, '%Y-%m-%d %H:%i:%s')
6 e 2011-05-05 20:04:05
recovery test
+connection master_1;
ALTER TABLE ta_l2
PARTITION BY KEY(a) (
PARTITION pt1 COMMENT='srv "s_2_1 s_2_2", tbl "ta_r ta_r3",
@@ -120,6 +132,7 @@ auto_test_local ta_l2#P#pt2 1 2
SELECT spider_copy_tables('ta_l2#P#pt2', '0', '1');
spider_copy_tables('ta_l2#P#pt2', '0', '1')
1
+connection master_1;
ALTER TABLE ta_l2
PARTITION BY KEY(a) (
PARTITION pt1 COMMENT='srv "s_2_1 s_2_2", tbl "ta_r ta_r3",
@@ -150,6 +163,7 @@ a b date_format(c, '%Y-%m-%d %H:%i:%s')
DROP TABLE ta_l2;
create table with partition test
+connection master_1;
DROP TABLE IF EXISTS ta_l2;
CREATE TABLE ta_l2 (
a INT,
@@ -165,6 +179,7 @@ INSERT INTO ta_l2 (a, b, c) VALUES
(5, 'c', '2001-12-31 23:59:59');
select test
+connection master_1;
SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l2 ORDER BY a;
a b date_format(c, '%Y-%m-%d %H:%i:%s')
1 a 2008-08-01 10:21:39
@@ -174,6 +189,7 @@ a b date_format(c, '%Y-%m-%d %H:%i:%s')
5 c 2001-12-31 23:59:59
fail-over test
+connection master_1;
SHOW STATUS LIKE 'Spider_mon_table_cache_version%';
Variable_name Value
Spider_mon_table_cache_version 1
@@ -206,6 +222,7 @@ a b date_format(c, '%Y-%m-%d %H:%i:%s')
6 e 2011-05-05 20:04:05
recovery test
+connection master_1;
ALTER TABLE ta_l2
PARTITION BY KEY(a) (
PARTITION pt1 COMMENT='srv "s_2_1 s_2_2", tbl "ta_r ta_r3",
@@ -233,12 +250,19 @@ a b date_format(c, '%Y-%m-%d %H:%i:%s')
DROP TABLE ta_l2;
deinit
+connection master_1;
DROP DATABASE IF EXISTS auto_test_local;
+connection child2_1;
DROP DATABASE IF EXISTS auto_test_remote;
+connection child2_2;
DROP DATABASE IF EXISTS auto_test_remote2;
+connection child2_3;
DROP DATABASE IF EXISTS auto_test_remote3;
+connection child3_1;
DROP DATABASE IF EXISTS auto_test_local;
+connection child3_2;
DROP DATABASE IF EXISTS auto_test_local;
+connection child3_3;
DROP DATABASE IF EXISTS auto_test_local;
for master_1
for child2
diff --git a/storage/spider/mysql-test/spider/bg/r/spider3_fixes.result b/storage/spider/mysql-test/spider/bg/r/spider3_fixes.result
index 98073fa5e54..aa734573a1a 100644
--- a/storage/spider/mysql-test/spider/bg/r/spider3_fixes.result
+++ b/storage/spider/mysql-test/spider/bg/r/spider3_fixes.result
@@ -10,26 +10,34 @@ child3_3
for slave1_1
drop and create databases
+connection master_1;
DROP DATABASE IF EXISTS auto_test_local;
CREATE DATABASE auto_test_local;
USE auto_test_local;
+connection slave1_1;
DROP DATABASE IF EXISTS auto_test_local;
CREATE DATABASE auto_test_local;
USE auto_test_local;
+connection child2_1;
DROP DATABASE IF EXISTS auto_test_remote;
CREATE DATABASE auto_test_remote;
USE auto_test_remote;
+connection child2_2;
DROP DATABASE IF EXISTS auto_test_remote2;
CREATE DATABASE auto_test_remote2;
USE auto_test_remote2;
test select 1
+connection master_1;
SELECT 1;
1
1
3.1
auto_increment
+connection master_1;
+connection slave1_1;
+connection master_1;
DROP TABLE IF EXISTS t1, t2;
CREATE TABLE t1 (
id int(11) NOT NULL AUTO_INCREMENT,
@@ -182,6 +190,7 @@ LAST_INSERT_ID()
SELECT MAX(id) FROM t2;
MAX(id)
10000
+connection slave1_1;
SELECT id FROM t1 ORDER BY id;
id
777
@@ -190,11 +199,16 @@ id
3108
5000
10000
+connection master_1;
deinit
+connection master_1;
DROP DATABASE IF EXISTS auto_test_local;
+connection slave1_1;
DROP DATABASE IF EXISTS auto_test_local;
+connection child2_1;
DROP DATABASE IF EXISTS auto_test_remote;
+connection child2_2;
DROP DATABASE IF EXISTS auto_test_remote2;
for slave1_1
for master_1
diff --git a/storage/spider/mysql-test/spider/bg/r/spider3_fixes_part.result b/storage/spider/mysql-test/spider/bg/r/spider3_fixes_part.result
index 12f43ef09b2..b793346df4b 100644
--- a/storage/spider/mysql-test/spider/bg/r/spider3_fixes_part.result
+++ b/storage/spider/mysql-test/spider/bg/r/spider3_fixes_part.result
@@ -10,24 +10,32 @@ child3_3
for slave1_1
drop and create databases
+connection master_1;
DROP DATABASE IF EXISTS auto_test_local;
CREATE DATABASE auto_test_local;
USE auto_test_local;
+connection slave1_1;
DROP DATABASE IF EXISTS auto_test_local;
CREATE DATABASE auto_test_local;
USE auto_test_local;
+connection child2_1;
DROP DATABASE IF EXISTS auto_test_remote;
CREATE DATABASE auto_test_remote;
USE auto_test_remote;
+connection child2_2;
DROP DATABASE IF EXISTS auto_test_remote2;
CREATE DATABASE auto_test_remote2;
USE auto_test_remote2;
test select 1
+connection master_1;
SELECT 1;
1
1
auto_increment with partition
+connection master_1;
+connection slave1_1;
+connection master_1;
DROP TABLE IF EXISTS t1, t2;
CREATE TABLE t1 (
id int(11) NOT NULL AUTO_INCREMENT,
@@ -180,6 +188,7 @@ LAST_INSERT_ID()
SELECT MAX(id) FROM t2;
MAX(id)
10000
+connection slave1_1;
SELECT id FROM t1 ORDER BY id;
id
777
@@ -188,11 +197,16 @@ id
3108
5000
10000
+connection master_1;
deinit
+connection master_1;
DROP DATABASE IF EXISTS auto_test_local;
+connection slave1_1;
DROP DATABASE IF EXISTS auto_test_local;
+connection child2_1;
DROP DATABASE IF EXISTS auto_test_remote;
+connection child2_2;
DROP DATABASE IF EXISTS auto_test_remote2;
for slave1_1
for master_1
diff --git a/storage/spider/mysql-test/spider/bg/r/spider_fixes.result b/storage/spider/mysql-test/spider/bg/r/spider_fixes.result
index 3033586821e..f50c9822534 100644
--- a/storage/spider/mysql-test/spider/bg/r/spider_fixes.result
+++ b/storage/spider/mysql-test/spider/bg/r/spider_fixes.result
@@ -10,25 +10,31 @@ child3_3
for slave1_1
drop and create databases
+connection master_1;
DROP DATABASE IF EXISTS auto_test_local;
CREATE DATABASE auto_test_local;
USE auto_test_local;
+connection slave1_1;
DROP DATABASE IF EXISTS auto_test_local;
CREATE DATABASE auto_test_local;
USE auto_test_local;
+connection child2_1;
DROP DATABASE IF EXISTS auto_test_remote;
CREATE DATABASE auto_test_remote;
USE auto_test_remote;
+connection child2_2;
DROP DATABASE IF EXISTS auto_test_remote2;
CREATE DATABASE auto_test_remote2;
USE auto_test_remote2;
test select 1
+connection master_1;
SELECT 1;
1
1
create table and insert
+connection master_1;
DROP TABLE IF EXISTS tb_l;
CREATE TABLE tb_l (
a INT,
@@ -50,6 +56,7 @@ INSERT INTO ta_l SELECT a, b, c FROM tb_l;
2.13
select table with "order by desc" and "<"
+connection master_1;
SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l
WHERE a < 5 ORDER BY a DESC LIMIT 3;
a b date_format(c, '%Y-%m-%d %H:%i:%s')
@@ -58,6 +65,7 @@ a b date_format(c, '%Y-%m-%d %H:%i:%s')
2 b 2000-01-01 00:00:00
select table with "order by desc" and "<="
+connection master_1;
SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l
WHERE a <= 5 ORDER BY a DESC LIMIT 3;
a b date_format(c, '%Y-%m-%d %H:%i:%s')
@@ -67,7 +75,9 @@ a b date_format(c, '%Y-%m-%d %H:%i:%s')
2.14
update table with range scan and split_read
+connection master_1;
UPDATE ta_l SET c = '2000-02-02 00:00:00' WHERE a > 1;
+connection master_1;
SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l ORDER BY a;
a b date_format(c, '%Y-%m-%d %H:%i:%s')
1 a 2008-08-01 10:21:39
@@ -80,6 +90,7 @@ a b date_format(c, '%Y-%m-%d %H:%i:%s')
select table with range scan
TRUNCATE TABLE ta_l;
DROP TABLE IF EXISTS ta_l;
+connection master_1;
CREATE TABLE ta_l (
a int(11) NOT NULL DEFAULT '0',
b char(1) DEFAULT NULL,
@@ -87,50 +98,62 @@ c datetime DEFAULT NULL,
PRIMARY KEY (a, b, c)
) MASTER_1_ENGINE MASTER_1_CHARSET MASTER_1_COMMENT5_2_1
INSERT INTO ta_l SELECT a, b, c FROM tb_l;
+connection master_1;
SELECT a, b, c FROM ta_l FORCE INDEX(PRIMARY) WHERE a = 4 AND b >= 'b'
AND c = '2003-11-30 05:01:03';
a b c
4 d 2003-11-30 05:01:03
+connection master_1;
SELECT a, b, c FROM ta_l FORCE INDEX(PRIMARY) WHERE a = 4 AND b > 'b'
AND c = '2003-11-30 05:01:03';
a b c
4 d 2003-11-30 05:01:03
+connection master_1;
SELECT a, b, c FROM ta_l FORCE INDEX(PRIMARY) WHERE a >= 4 AND b = 'd'
AND c = '2003-11-30 05:01:03';
a b c
4 d 2003-11-30 05:01:03
+connection master_1;
SELECT a, b, c FROM ta_l FORCE INDEX(PRIMARY) WHERE a > 4 AND b = 'c'
AND c = '2001-12-31 23:59:59';
a b c
5 c 2001-12-31 23:59:59
+connection master_1;
SELECT a, b, c FROM ta_l FORCE INDEX(PRIMARY) WHERE a = 4 AND b <= 'd'
AND c = '2003-11-30 05:01:03';
a b c
4 d 2003-11-30 05:01:03
+connection master_1;
SELECT a, b, c FROM ta_l FORCE INDEX(PRIMARY) WHERE a = 4 AND b < 'e'
AND c = '2003-11-30 05:01:03';
a b c
4 d 2003-11-30 05:01:03
+connection master_1;
SELECT a, b, c FROM ta_l FORCE INDEX(PRIMARY) WHERE a <= 4 AND b = 'b'
AND c = '2000-01-01 00:00:00';
a b c
2 b 2000-01-01 00:00:00
+connection master_1;
SELECT a, b, c FROM ta_l FORCE INDEX(PRIMARY) WHERE a < 4 AND b = 'b'
AND c = '2000-01-01 00:00:00';
a b c
2 b 2000-01-01 00:00:00
+connection master_1;
SELECT a, b, c FROM ta_l FORCE INDEX(PRIMARY) WHERE a = 4 AND b >= 'b'
AND b <= 'd' AND c = '2003-11-30 05:01:03';
a b c
4 d 2003-11-30 05:01:03
+connection master_1;
SELECT a, b, c FROM ta_l FORCE INDEX(PRIMARY) WHERE a = 4 AND b > 'b'
AND b < 'e' AND c = '2003-11-30 05:01:03';
a b c
4 d 2003-11-30 05:01:03
+connection master_1;
SELECT a, b, c FROM ta_l FORCE INDEX(PRIMARY) WHERE a <= 4 AND a >= 1
AND b >= 'b' AND c = '2003-11-30 05:01:03';
a b c
4 d 2003-11-30 05:01:03
+connection master_1;
SELECT a, b, c FROM ta_l FORCE INDEX(PRIMARY) WHERE a < 4 AND a > 1
AND b >= 'b' AND c = '2000-01-01 00:00:00';
a b c
@@ -138,6 +161,7 @@ a b c
2.16
auto_increment insert with trigger
+connection master_1;
CREATE TABLE ta_l_auto_inc (
a INT AUTO_INCREMENT,
b CHAR(1) DEFAULT 'c',
@@ -151,14 +175,17 @@ c DATETIME,
PRIMARY KEY(a)
) MASTER_1_ENGINE2 MASTER_1_CHARSET2
CREATE TRIGGER ins_ta_l_auto_inc AFTER INSERT ON ta_l_auto_inc FOR EACH ROW BEGIN INSERT INTO tc_l (a, b, c) VALUES (NEW.a, NEW.b, NEW.c); END;;
+connection master_1;
INSERT INTO ta_l_auto_inc (a, b, c) VALUES
(NULL, 's', '2008-12-31 20:59:59');
+connection master_1;
SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM tc_l ORDER BY a;
a b date_format(c, '%Y-%m-%d %H:%i:%s')
1 s 2008-12-31 20:59:59
2.17
engine-condition-pushdown with "or" and joining
+connection master_1;
SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l WHERE a = 1 OR a IN (SELECT a FROM tb_l);
a b date_format(c, '%Y-%m-%d %H:%i:%s')
1 a 2008-08-01 10:21:39
@@ -169,6 +196,7 @@ a b date_format(c, '%Y-%m-%d %H:%i:%s')
2.23
index merge
+connection master_1;
CREATE TABLE ta_l_int (
a INT AUTO_INCREMENT,
b INT DEFAULT 10,
@@ -182,6 +210,7 @@ INSERT INTO ta_l_int (a, b, c) SELECT a + 1, b + 1, c + 1 FROM ta_l_int;
INSERT INTO ta_l_int (a, b, c) SELECT a + 2, b + 2, c + 2 FROM ta_l_int;
INSERT INTO ta_l_int (a, b, c) SELECT a + 4, b + 4, c + 4 FROM ta_l_int;
INSERT INTO ta_l_int (a, b, c) SELECT a + 8, b + 8, c + 8 FROM ta_l_int;
+connection master_1;
SELECT a, b, c FROM ta_l_int force index(primary, idx1, idx2)
WHERE a = 5 OR b = 5 OR c = 5 ORDER BY a;
a b c
@@ -191,6 +220,7 @@ a b c
2.24
index scan update without PK
+connection master_1;
DROP TABLE IF EXISTS ta_l_int;
CREATE TABLE ta_l_int (
a INT NOT NULL,
@@ -219,7 +249,9 @@ a b c
16 17 18
INSERT INTO ta_l_int (a, b, c) VALUES (0, 2, 3);
INSERT INTO ta_l_int (a, b, c) VALUES (18, 2, 3);
+connection master_1;
UPDATE ta_l_int SET c = 4 WHERE b = 2;
+connection master_1;
SELECT a, b, c FROM ta_l_int ORDER BY a;
a b c
1 2 4
@@ -243,6 +275,7 @@ a b c
2.25
direct order limit
+connection master_1;
SHOW STATUS LIKE 'Spider_direct_order_limit';
Variable_name Value
Spider_direct_order_limit 2
@@ -257,6 +290,7 @@ Spider_direct_order_limit 3
2.26
lock tables
+connection master_1;
DROP TABLE IF EXISTS t1;
DROP TABLE IF EXISTS t2;
CREATE TABLE t1 (
@@ -271,6 +305,9 @@ LOCK TABLES t1 READ, t2 READ;
UNLOCK TABLES;
auto_increment
+connection master_1;
+connection slave1_1;
+connection master_1;
DROP TABLE IF EXISTS t1;
CREATE TABLE t1 (
id int(11) NOT NULL AUTO_INCREMENT,
@@ -358,6 +395,7 @@ LAST_INSERT_ID()
SELECT MAX(id) FROM t1;
MAX(id)
10000
+connection slave1_1;
SELECT id FROM t1 ORDER BY id;
id
777
@@ -371,8 +409,10 @@ id
5439
6216
10000
+connection master_1;
read only
+connection master_1;
DROP TABLE IF EXISTS t1;
CREATE TABLE t1 (
id int(11) NOT NULL,
@@ -404,6 +444,7 @@ ERROR HY000: Table 'auto_test_local.t1' is read only
2.27
error mode
+connection master_1;
DROP TABLE IF EXISTS t1;
CREATE TABLE t1 (
id int(11) NOT NULL,
@@ -427,6 +468,7 @@ Error 1146 Table 'auto_test_remote.ter1_1' doesn't exist
3.0
is null
+connection master_1;
DROP TABLE IF EXISTS t1;
CREATE TABLE t1 (
a VARCHAR(255),
@@ -449,6 +491,7 @@ insert into t1 select a + 128, b + 128, c + 128 from t1;
insert into t1 select a + 256, b + 256, c + 256 from t1;
insert into t1 select a + 512, b + 512, c + 512 from t1;
flush tables;
+connection master_1;
select a from t1 where a is null order by a limit 30;
a
NULL
@@ -515,6 +558,7 @@ NULL
NULL
direct_order_limit
+connection master_1;
TRUNCATE TABLE t1;
insert into t1 values ('1', '1', '1');
insert into t1 select a + 1, b + 1, c + 1 from t1;
@@ -526,6 +570,7 @@ insert into t1 select a, b + 32, c + 32 from t1;
insert into t1 select a, b + 64, c + 64 from t1;
insert into t1 select a, b + 128, c + 128 from t1;
flush tables;
+connection master_1;
select a, b, c from t1 where a = '10' and b <> '100' order by c desc limit 5;
a b c
10 74 74
@@ -542,9 +587,13 @@ a c
10 170
deinit
+connection master_1;
DROP DATABASE IF EXISTS auto_test_local;
+connection slave1_1;
DROP DATABASE IF EXISTS auto_test_local;
+connection child2_1;
DROP DATABASE IF EXISTS auto_test_remote;
+connection child2_2;
DROP DATABASE IF EXISTS auto_test_remote2;
for slave1_1
for master_1
diff --git a/storage/spider/mysql-test/spider/bg/r/spider_fixes_part.result b/storage/spider/mysql-test/spider/bg/r/spider_fixes_part.result
index 104ba971df9..faf0b6efbea 100644
--- a/storage/spider/mysql-test/spider/bg/r/spider_fixes_part.result
+++ b/storage/spider/mysql-test/spider/bg/r/spider_fixes_part.result
@@ -10,23 +10,29 @@ child3_3
for slave1_1
drop and create databases
+connection master_1;
DROP DATABASE IF EXISTS auto_test_local;
CREATE DATABASE auto_test_local;
USE auto_test_local;
+connection slave1_1;
DROP DATABASE IF EXISTS auto_test_local;
CREATE DATABASE auto_test_local;
USE auto_test_local;
+connection child2_1;
DROP DATABASE IF EXISTS auto_test_remote;
CREATE DATABASE auto_test_remote;
USE auto_test_remote;
+connection child2_2;
DROP DATABASE IF EXISTS auto_test_remote2;
CREATE DATABASE auto_test_remote2;
USE auto_test_remote2;
test select 1
+connection master_1;
SELECT 1;
1
1
+connection master_1;
DROP TABLE IF EXISTS tb_l;
CREATE TABLE tb_l (
a INT,
@@ -43,6 +49,7 @@ INSERT INTO tb_l (a, b, c) VALUES
2.17
partition with sort
+connection master_1;
CREATE TABLE ta_l2 (
a INT,
b CHAR(1),
@@ -60,7 +67,9 @@ a b date_format(c, '%Y-%m-%d %H:%i:%s')
2.23
partition update with moving partition
+connection master_1;
DROP TABLE IF EXISTS ta_l2;
+connection master_1;
CREATE TABLE ta_l2 (
a INT,
b CHAR(1),
@@ -73,7 +82,9 @@ SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l2;
a b date_format(c, '%Y-%m-%d %H:%i:%s')
4 B 2010-09-26 00:00:00
index merge with partition
+connection master_1;
DROP TABLE IF EXISTS ta_l_int;
+connection master_1;
CREATE TABLE ta_l_int (
a INT AUTO_INCREMENT,
b INT DEFAULT 10,
@@ -87,6 +98,7 @@ INSERT INTO ta_l_int (a, b, c) SELECT a + 1, b + 1, c + 1 FROM ta_l_int;
INSERT INTO ta_l_int (a, b, c) SELECT a + 2, b + 2, c + 2 FROM ta_l_int;
INSERT INTO ta_l_int (a, b, c) SELECT a + 4, b + 4, c + 4 FROM ta_l_int;
INSERT INTO ta_l_int (a, b, c) SELECT a + 8, b + 8, c + 8 FROM ta_l_int;
+connection master_1;
SELECT a, b, c FROM ta_l_int force index(primary, idx1, idx2)
WHERE a = 5 OR b = 5 OR c = 5 ORDER BY a;
a b c
@@ -96,6 +108,9 @@ a b c
2.26
auto_increment with partition
+connection master_1;
+connection slave1_1;
+connection master_1;
DROP TABLE IF EXISTS t1;
CREATE TABLE t1 (
id int(11) NOT NULL AUTO_INCREMENT,
@@ -183,6 +198,7 @@ LAST_INSERT_ID()
SELECT MAX(id) FROM t1;
MAX(id)
10000
+connection slave1_1;
SELECT id FROM t1 ORDER BY id;
id
777
@@ -196,11 +212,16 @@ id
5439
6216
10000
+connection master_1;
deinit
+connection master_1;
DROP DATABASE IF EXISTS auto_test_local;
+connection slave1_1;
DROP DATABASE IF EXISTS auto_test_local;
+connection child2_1;
DROP DATABASE IF EXISTS auto_test_remote;
+connection child2_2;
DROP DATABASE IF EXISTS auto_test_remote2;
for slave1_1
for master_1
diff --git a/storage/spider/mysql-test/spider/bg/r/vp_fixes.result b/storage/spider/mysql-test/spider/bg/r/vp_fixes.result
index 15dd29aa4d3..cc0e4105d61 100644
--- a/storage/spider/mysql-test/spider/bg/r/vp_fixes.result
+++ b/storage/spider/mysql-test/spider/bg/r/vp_fixes.result
@@ -9,22 +9,27 @@ child3_2
child3_3
drop and create databases
+connection master_1;
DROP DATABASE IF EXISTS auto_test_local;
CREATE DATABASE auto_test_local;
USE auto_test_local;
+connection child2_1;
DROP DATABASE IF EXISTS auto_test_remote;
CREATE DATABASE auto_test_remote;
USE auto_test_remote;
+connection child2_2;
DROP DATABASE IF EXISTS auto_test_remote2;
CREATE DATABASE auto_test_remote2;
USE auto_test_remote2;
test select 1
+connection master_1;
SELECT 1;
1
1
create table and insert
+connection master_1;
DROP TABLE IF EXISTS tb_l;
CREATE TABLE tb_l (
a INT,
@@ -46,26 +51,34 @@ INSERT INTO ta_l SELECT a, b, c FROM tb_l;
0.9
create different primary key table
+connection master_1;
CREATE TABLE ta_l_int (
a INT DEFAULT 10,
b INT AUTO_INCREMENT,
c INT DEFAULT 11,
PRIMARY KEY(b)
) MASTER_1_ENGINE MASTER_1_CHARSET MASTER_1_COMMENT4_2_1
+connection master_1;
INSERT INTO ta_l_int (a, b, c) VALUES (2, NULL, 3);
create un-correspond primary key table
+connection master_1;
DROP TABLE IF EXISTS ta_l_int;
+connection master_1;
CREATE TABLE ta_l_int (
a INT DEFAULT 10,
b INT DEFAULT 12,
c INT DEFAULT 11,
PRIMARY KEY(c)
) MASTER_1_ENGINE MASTER_1_CHARSET MASTER_1_COMMENT4_2_1
+connection master_1;
INSERT INTO ta_l_int (a, b, c) VALUES (2, NULL, 3);
deinit
+connection master_1;
DROP DATABASE IF EXISTS auto_test_local;
+connection child2_1;
DROP DATABASE IF EXISTS auto_test_remote;
+connection child2_2;
DROP DATABASE IF EXISTS auto_test_remote2;
for master_1
for child2
diff --git a/storage/spider/mysql-test/spider/bg/suite.pm b/storage/spider/mysql-test/spider/bg/suite.pm
index 1bb6d7592c8..f106147deb6 100644
--- a/storage/spider/mysql-test/spider/bg/suite.pm
+++ b/storage/spider/mysql-test/spider/bg/suite.pm
@@ -6,5 +6,7 @@ return "No Spider engine" unless $ENV{HA_SPIDER_SO};
return "Not run for embedded server" if $::opt_embedded_server;
return "Test needs --big-test" unless $::opt_big_test;
+sub is_default { 1 }
+
bless { };
diff --git a/storage/spider/mysql-test/spider/handler/r/basic_sql.result b/storage/spider/mysql-test/spider/handler/r/basic_sql.result
index da24c08e9fd..94a09fc317b 100644
--- a/storage/spider/mysql-test/spider/handler/r/basic_sql.result
+++ b/storage/spider/mysql-test/spider/handler/r/basic_sql.result
@@ -9,22 +9,27 @@ child3_2
child3_3
drop and create databases
+connection master_1;
DROP DATABASE IF EXISTS auto_test_local;
CREATE DATABASE auto_test_local;
USE auto_test_local;
+connection child2_1;
DROP DATABASE IF EXISTS auto_test_remote;
CREATE DATABASE auto_test_remote;
USE auto_test_remote;
+connection child2_2;
DROP DATABASE IF EXISTS auto_test_remote2;
CREATE DATABASE auto_test_remote2;
USE auto_test_remote2;
test select 1
+connection master_1;
SELECT 1;
1
1
create table select test
+connection master_1;
DROP TABLE IF EXISTS tb_l;
CREATE TABLE tb_l (
a INT,
@@ -43,6 +48,7 @@ CREATE TABLE ta_l (
PRIMARY KEY(a)
) MASTER_1_ENGINE MASTER_1_CHARSET MASTER_1_COMMENT_2_1
SELECT a, b, c FROM tb_l
+connection master_1;
SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l ORDER BY a;
a b date_format(c, '%Y-%m-%d %H:%i:%s')
1 a 2008-08-01 10:21:39
@@ -52,6 +58,7 @@ a b date_format(c, '%Y-%m-%d %H:%i:%s')
5 c 2001-12-31 23:59:59
create table ignore select test
+connection master_1;
DROP TABLE IF EXISTS ta_l;
DROP TABLE IF EXISTS tb_l;
CREATE TABLE tb_l (
@@ -70,6 +77,13 @@ CREATE TABLE ta_l (
PRIMARY KEY(a)
) MASTER_1_ENGINE MASTER_1_CHARSET MASTER_1_COMMENT_2_1
IGNORE SELECT a, b, c FROM tb_l
+Warnings:
+Warning 1062 Duplicate entry '1' for key 'PRIMARY'
+Warning 1062 Duplicate entry '2' for key 'PRIMARY'
+Warning 1062 Duplicate entry '3' for key 'PRIMARY'
+Warning 1062 Duplicate entry '4' for key 'PRIMARY'
+Warning 1062 Duplicate entry '5' for key 'PRIMARY'
+connection master_1;
SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l ORDER BY a;
a b date_format(c, '%Y-%m-%d %H:%i:%s')
1 a 2008-08-01 10:21:39
@@ -79,11 +93,13 @@ a b date_format(c, '%Y-%m-%d %H:%i:%s')
5 c 2001-12-31 23:59:59
create table ignore select test
+connection master_1;
DROP TABLE IF EXISTS ta_l;
CREATE TABLE ta_l (
PRIMARY KEY(a)
) MASTER_1_ENGINE MASTER_1_CHARSET MASTER_1_COMMENT_2_1
REPLACE SELECT a, b, c FROM tb_l
+connection master_1;
SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l ORDER BY a;
a b date_format(c, '%Y-%m-%d %H:%i:%s')
1 f 2008-07-01 10:21:39
@@ -92,24 +108,13 @@ a b date_format(c, '%Y-%m-%d %H:%i:%s')
4 i 2003-10-30 05:01:03
5 h 2001-10-31 23:59:59
-create table with partition and select test
-CREATE TABLE ta_l2 (
-PRIMARY KEY(a)
-) MASTER_1_ENGINE MASTER_1_COMMENT_P_2_1
-SELECT a, b, c FROM tb_l
-SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l2 ORDER BY a;
-a b date_format(c, '%Y-%m-%d %H:%i:%s')
-1 f 2008-07-01 10:21:39
-2 g 2000-02-01 00:00:00
-3 j 2007-05-04 20:03:11
-4 i 2003-10-30 05:01:03
-5 h 2001-10-31 23:59:59
-
create no index table
+connection master_1;
DROP TABLE IF EXISTS ta_l_no_idx;
CREATE TABLE ta_l_no_idx
MASTER_1_ENGINE MASTER_1_CHARSET MASTER_1_COMMENT2_2_1
SELECT a, b, c FROM tb_l
+connection master_1;
SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l_no_idx ORDER BY a;
a b date_format(c, '%Y-%m-%d %H:%i:%s')
1 f 2008-07-01 10:21:39
@@ -119,6 +124,7 @@ a b date_format(c, '%Y-%m-%d %H:%i:%s')
5 h 2001-10-31 23:59:59
select table
+connection master_1;
SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l ORDER BY a;
a b date_format(c, '%Y-%m-%d %H:%i:%s')
1 f 2008-07-01 10:21:39
@@ -128,6 +134,7 @@ a b date_format(c, '%Y-%m-%d %H:%i:%s')
5 h 2001-10-31 23:59:59
select table shared mode
+connection master_1;
SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l ORDER BY a
LOCK IN SHARE MODE;
a b date_format(c, '%Y-%m-%d %H:%i:%s')
@@ -138,6 +145,7 @@ a b date_format(c, '%Y-%m-%d %H:%i:%s')
5 h 2001-10-31 23:59:59
select table for update
+connection master_1;
SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l ORDER BY a
FOR UPDATE;
a b date_format(c, '%Y-%m-%d %H:%i:%s')
@@ -148,6 +156,7 @@ a b date_format(c, '%Y-%m-%d %H:%i:%s')
5 h 2001-10-31 23:59:59
select table join
+connection master_1;
SELECT a.a, a.b, date_format(b.c, '%Y-%m-%d %H:%i:%s') FROM ta_l a, tb_l b
WHERE a.a = b.a ORDER BY a.a;
a b date_format(b.c, '%Y-%m-%d %H:%i:%s')
@@ -158,6 +167,7 @@ a b date_format(b.c, '%Y-%m-%d %H:%i:%s')
5 h 2001-10-31 23:59:59
select table straight_join
+connection master_1;
SELECT STRAIGHT_JOIN a.a, a.b, date_format(b.c, '%Y-%m-%d %H:%i:%s')
FROM ta_l a, tb_l b WHERE a.a = b.a ORDER BY a.a;
a b date_format(b.c, '%Y-%m-%d %H:%i:%s')
@@ -168,6 +178,7 @@ a b date_format(b.c, '%Y-%m-%d %H:%i:%s')
5 h 2001-10-31 23:59:59
select sql_small_result
+connection master_1;
SELECT SQL_SMALL_RESULT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l
ORDER BY a;
a b date_format(c, '%Y-%m-%d %H:%i:%s')
@@ -178,6 +189,7 @@ a b date_format(c, '%Y-%m-%d %H:%i:%s')
5 h 2001-10-31 23:59:59
select sql_big_result
+connection master_1;
SELECT SQL_BIG_RESULT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l
ORDER BY a;
a b date_format(c, '%Y-%m-%d %H:%i:%s')
@@ -188,6 +200,7 @@ a b date_format(c, '%Y-%m-%d %H:%i:%s')
5 h 2001-10-31 23:59:59
select sql_buffer_result
+connection master_1;
SELECT SQL_BUFFER_RESULT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l
ORDER BY a;
a b date_format(c, '%Y-%m-%d %H:%i:%s')
@@ -198,6 +211,7 @@ a b date_format(c, '%Y-%m-%d %H:%i:%s')
5 h 2001-10-31 23:59:59
select sql_cache
+connection master_1;
SELECT SQL_CACHE a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l
ORDER BY a;
a b date_format(c, '%Y-%m-%d %H:%i:%s')
@@ -208,6 +222,7 @@ a b date_format(c, '%Y-%m-%d %H:%i:%s')
5 h 2001-10-31 23:59:59
select sql_no_cache
+connection master_1;
SELECT SQL_NO_CACHE a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l
ORDER BY a;
a b date_format(c, '%Y-%m-%d %H:%i:%s')
@@ -218,6 +233,7 @@ a b date_format(c, '%Y-%m-%d %H:%i:%s')
5 h 2001-10-31 23:59:59
select sql_calc_found_rows
+connection master_1;
SELECT SQL_CALC_FOUND_ROWS a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l
ORDER BY a LIMIT 4;
a b date_format(c, '%Y-%m-%d %H:%i:%s')
@@ -225,11 +241,13 @@ a b date_format(c, '%Y-%m-%d %H:%i:%s')
2 g 2000-02-01 00:00:00
3 j 2007-05-04 20:03:11
4 i 2003-10-30 05:01:03
+connection master_1;
SELECT found_rows();
found_rows()
5
select high_priority
+connection master_1;
SELECT HIGH_PRIORITY a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l
ORDER BY a;
a b date_format(c, '%Y-%m-%d %H:%i:%s')
@@ -240,6 +258,7 @@ a b date_format(c, '%Y-%m-%d %H:%i:%s')
5 h 2001-10-31 23:59:59
select distinct
+connection master_1;
SELECT DISTINCT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l
ORDER BY a;
a b date_format(c, '%Y-%m-%d %H:%i:%s')
@@ -250,11 +269,13 @@ a b date_format(c, '%Y-%m-%d %H:%i:%s')
5 h 2001-10-31 23:59:59
select count
+connection master_1;
SELECT count(*) FROM ta_l ORDER BY a;
count(*)
5
select table join not use index
+connection master_1;
SELECT a.a, a.b, date_format(a.c, '%Y-%m-%d %H:%i:%s') FROM tb_l a WHERE
EXISTS (SELECT * FROM ta_l b WHERE b.b = a.b) ORDER BY a.a;
a b date_format(a.c, '%Y-%m-%d %H:%i:%s')
@@ -265,39 +286,35 @@ a b date_format(a.c, '%Y-%m-%d %H:%i:%s')
5 h 2001-10-31 23:59:59
select using pushdown
+connection master_1;
SELECT a.a, a.b, date_format(a.c, '%Y-%m-%d %H:%i:%s') FROM ta_l a WHERE
a.b = 'g' ORDER BY a.a;
a b date_format(a.c, '%Y-%m-%d %H:%i:%s')
2 g 2000-02-01 00:00:00
select using index and pushdown
+connection master_1;
SELECT a.a, a.b, date_format(a.c, '%Y-%m-%d %H:%i:%s') FROM ta_l a WHERE
a.a > 0 AND a.b = 'g' ORDER BY a.a;
a b date_format(a.c, '%Y-%m-%d %H:%i:%s')
2 g 2000-02-01 00:00:00
-select partition using pushdown
-SELECT a.a, a.b, date_format(a.c, '%Y-%m-%d %H:%i:%s') FROM ta_l2 a WHERE
-a.b = 'g' ORDER BY a.a;
-a b date_format(a.c, '%Y-%m-%d %H:%i:%s')
-2 g 2000-02-01 00:00:00
-
-select partition using index pushdown
-SELECT a.a, a.b, date_format(a.c, '%Y-%m-%d %H:%i:%s') FROM ta_l2 a WHERE
-a.a > 0 AND a.b = 'g' ORDER BY a.a;
-a b date_format(a.c, '%Y-%m-%d %H:%i:%s')
-2 g 2000-02-01 00:00:00
-
insert
+connection master_1;
TRUNCATE TABLE ta_l;
+connection master_1;
INSERT INTO ta_l (a, b, c) VALUES (2, 'e', '2008-01-01 23:59:59');
+connection master_1;
SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l ORDER BY a;
a b date_format(c, '%Y-%m-%d %H:%i:%s')
2 e 2008-01-01 23:59:59
insert select
+connection master_1;
TRUNCATE TABLE ta_l;
+connection master_1;
INSERT INTO ta_l (a, b, c) SELECT a, b, c FROM tb_l;
+connection master_1;
SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l ORDER BY a;
a b date_format(c, '%Y-%m-%d %H:%i:%s')
1 f 2008-07-01 10:21:39
@@ -307,59 +324,82 @@ a b date_format(c, '%Y-%m-%d %H:%i:%s')
5 h 2001-10-31 23:59:59
insert select a
+connection master_1;
TRUNCATE TABLE ta_l;
+connection master_1;
INSERT INTO ta_l (a, b, c) VALUES ((SELECT a FROM tb_l ORDER BY a LIMIT 1),
'e', '2008-01-01 23:59:59');
+connection master_1;
SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l ORDER BY a;
a b date_format(c, '%Y-%m-%d %H:%i:%s')
1 e 2008-01-01 23:59:59
insert low_priority
+connection master_1;
TRUNCATE TABLE ta_l;
+connection master_1;
INSERT LOW_PRIORITY INTO ta_l (a, b, c) values (2, 'e', '2008-01-01 23:59:59');
+connection master_1;
SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l ORDER BY a;
a b date_format(c, '%Y-%m-%d %H:%i:%s')
2 e 2008-01-01 23:59:59
insert high_priority
+connection master_1;
TRUNCATE TABLE ta_l;
+connection master_1;
INSERT HIGH_PRIORITY INTO ta_l (a, b, c) VALUES (2, 'e',
'2008-01-01 23:59:59');
+connection master_1;
SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l ORDER BY a;
a b date_format(c, '%Y-%m-%d %H:%i:%s')
2 e 2008-01-01 23:59:59
insert ignore
+connection master_1;
INSERT IGNORE INTO ta_l (a, b, c) VALUES (2, 'd', '2009-02-02 01:01:01');
+Warnings:
+Warning 1062 Duplicate entry '2' for key 'PRIMARY'
+connection master_1;
SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l ORDER BY a;
a b date_format(c, '%Y-%m-%d %H:%i:%s')
2 e 2008-01-01 23:59:59
insert update (insert)
+connection master_1;
TRUNCATE TABLE ta_l;
+connection master_1;
INSERT INTO ta_l (a, b, c) VALUES (2, 'e', '2008-01-01 23:59:59') ON DUPLICATE
KEY UPDATE b = 'f', c = '2005-08-08 11:11:11';
+connection master_1;
SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l ORDER BY a;
a b date_format(c, '%Y-%m-%d %H:%i:%s')
2 e 2008-01-01 23:59:59
insert update (update)
+connection master_1;
INSERT INTO ta_l (a, b, c) VALUES (2, 'e', '2008-01-01 23:59:59') ON DUPLICATE
KEY UPDATE b = 'f', c = '2005-08-08 11:11:11';
+connection master_1;
SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l ORDER BY a;
a b date_format(c, '%Y-%m-%d %H:%i:%s')
2 f 2005-08-08 11:11:11
replace
+connection master_1;
TRUNCATE TABLE ta_l;
INSERT INTO ta_l (a, b, c) VALUES (2, 'e', '2008-01-01 23:59:59');
+connection master_1;
REPLACE INTO ta_l (a, b, c) VALUES (2, 'f', '2008-02-02 02:02:02');
+connection master_1;
SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l ORDER BY a;
a b date_format(c, '%Y-%m-%d %H:%i:%s')
2 f 2008-02-02 02:02:02
replace select
+connection master_1;
REPLACE INTO ta_l (a, b, c) SELECT a, b, c FROM tb_l;
+connection master_1;
SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l ORDER BY a;
a b date_format(c, '%Y-%m-%d %H:%i:%s')
1 f 2008-07-01 10:21:39
@@ -369,8 +409,10 @@ a b date_format(c, '%Y-%m-%d %H:%i:%s')
5 h 2001-10-31 23:59:59
replace select a
+connection master_1;
REPLACE INTO ta_l (a, b, c) VALUES ((SELECT a FROM tb_l ORDER BY a LIMIT 1),
'e', '2008-01-01 23:59:59');
+connection master_1;
SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l ORDER BY a;
a b date_format(c, '%Y-%m-%d %H:%i:%s')
1 e 2008-01-01 23:59:59
@@ -380,8 +422,10 @@ a b date_format(c, '%Y-%m-%d %H:%i:%s')
5 h 2001-10-31 23:59:59
replace low_priority
+connection master_1;
REPLACE LOW_PRIORITY INTO ta_l (a, b, c) VALUES (3, 'g',
'2009-03-03 03:03:03');
+connection master_1;
SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l ORDER BY a;
a b date_format(c, '%Y-%m-%d %H:%i:%s')
1 e 2008-01-01 23:59:59
@@ -394,91 +438,89 @@ update
TRUNCATE TABLE ta_l;
INSERT INTO ta_l (a, b, c) VALUES (1, 'e', '2008-01-01 23:59:59'),
(2, 'e', '2008-01-01 23:59:59');
+connection master_1;
UPDATE ta_l SET b = 'f', c = '2008-02-02 02:02:02' WHERE a = 2;
+connection master_1;
SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l ORDER BY a;
a b date_format(c, '%Y-%m-%d %H:%i:%s')
1 e 2008-01-01 23:59:59
2 f 2008-02-02 02:02:02
update select
+connection master_1;
UPDATE ta_l SET b = 'g', c = '2009-03-03 03:03:03' WHERE a IN (SELECT a FROM
tb_l);
+connection master_1;
SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l ORDER BY a;
a b date_format(c, '%Y-%m-%d %H:%i:%s')
1 g 2009-03-03 03:03:03
2 g 2009-03-03 03:03:03
update select a
+connection master_1;
UPDATE ta_l SET b = 'h', c = '2010-04-04 04:04:04' WHERE a = (SELECT a FROM
tb_l ORDER BY a LIMIT 1);
+connection master_1;
SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l ORDER BY a;
a b date_format(c, '%Y-%m-%d %H:%i:%s')
1 h 2010-04-04 04:04:04
2 g 2009-03-03 03:03:03
update join
+connection master_1;
UPDATE ta_l a, tb_l b SET a.b = b.b, a.c = b.c WHERE a.a = b.a;
+connection master_1;
SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l ORDER BY a;
a b date_format(c, '%Y-%m-%d %H:%i:%s')
1 f 2008-07-01 10:21:39
2 g 2000-02-01 00:00:00
update join a
+connection master_1;
UPDATE ta_l a, tb_l b SET a.b = 'g', a.c = '2009-03-03 03:03:03' WHERE
a.a = b.a;
+connection master_1;
SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l ORDER BY a;
a b date_format(c, '%Y-%m-%d %H:%i:%s')
1 g 2009-03-03 03:03:03
2 g 2009-03-03 03:03:03
update low_priority
+connection master_1;
UPDATE LOW_PRIORITY ta_l SET b = 'f', c = '2008-02-02 02:02:02' WHERE a = 2;
+connection master_1;
SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l ORDER BY a;
a b date_format(c, '%Y-%m-%d %H:%i:%s')
1 g 2009-03-03 03:03:03
2 f 2008-02-02 02:02:02
update ignore
+connection master_1;
UPDATE IGNORE ta_l SET a = 1, b = 'g', c = '2009-03-03 03:03:03' WHERE a = 2;
+connection master_1;
SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l ORDER BY a;
a b date_format(c, '%Y-%m-%d %H:%i:%s')
1 g 2009-03-03 03:03:03
2 f 2008-02-02 02:02:02
update pushdown
+connection master_1;
update ta_l set b = 'j', c = '2009-03-03 03:03:03' where b = 'f';
+connection master_1;
SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l ORDER BY a;
a b date_format(c, '%Y-%m-%d %H:%i:%s')
1 g 2009-03-03 03:03:03
2 j 2009-03-03 03:03:03
update index pushdown
+connection master_1;
UPDATE ta_l SET b = 'g', c = '2009-03-03 03:03:03' WHERE a > 0 AND b = 'j';
+connection master_1;
SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l ORDER BY a;
a b date_format(c, '%Y-%m-%d %H:%i:%s')
1 g 2009-03-03 03:03:03
2 g 2009-03-03 03:03:03
-update partition pushdown
-UPDATE ta_l2 SET b = 'e', c = '2009-03-03 03:03:03' WHERE b = 'j';
-SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l2 ORDER BY a;
-a b date_format(c, '%Y-%m-%d %H:%i:%s')
-1 f 2008-07-01 10:21:39
-2 g 2000-02-01 00:00:00
-3 e 2009-03-03 03:03:03
-4 i 2003-10-30 05:01:03
-5 h 2001-10-31 23:59:59
-
-update partition index pushdown
-UPDATE ta_l2 SET b = 'j', c = '2009-03-03 03:03:03' WHERE a > 0 AND b = 'e';
-SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l2 ORDER BY a;
-a b date_format(c, '%Y-%m-%d %H:%i:%s')
-1 f 2008-07-01 10:21:39
-2 g 2000-02-01 00:00:00
-3 j 2009-03-03 03:03:03
-4 i 2003-10-30 05:01:03
-5 h 2001-10-31 23:59:59
-
delete
TRUNCATE TABLE ta_l;
INSERT INTO ta_l (a, b, c) VALUES (1, 'e', '2008-01-01 23:59:59'),
@@ -487,7 +529,9 @@ INSERT INTO ta_l (a, b, c) VALUES (1, 'e', '2008-01-01 23:59:59'),
(6, 'e', '2008-01-01 23:59:59'), (7, 'e', '2008-01-01 23:59:59'),
(8, 'e', '2008-01-01 23:59:59'), (9, 'e', '2008-01-01 23:59:59'),
(10, 'j', '2008-01-01 23:59:59');
+connection master_1;
DELETE FROM ta_l WHERE a = 2;
+connection master_1;
SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l ORDER BY a;
a b date_format(c, '%Y-%m-%d %H:%i:%s')
1 e 2008-01-01 23:59:59
@@ -501,7 +545,9 @@ a b date_format(c, '%Y-%m-%d %H:%i:%s')
10 j 2008-01-01 23:59:59
delete all
+connection master_1;
DELETE FROM ta_l;
+connection master_1;
SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l ORDER BY a;
a b date_format(c, '%Y-%m-%d %H:%i:%s')
@@ -513,7 +559,9 @@ INSERT INTO ta_l (a, b, c) VALUES (1, 'e', '2008-01-01 23:59:59'),
(6, 'e', '2008-01-01 23:59:59'), (7, 'e', '2008-01-01 23:59:59'),
(8, 'e', '2008-01-01 23:59:59'), (9, 'e', '2008-01-01 23:59:59'),
(10, 'j', '2008-01-01 23:59:59');
+connection master_1;
DELETE FROM ta_l WHERE a IN (SELECT a FROM tb_l);
+connection master_1;
SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l ORDER BY a;
a b date_format(c, '%Y-%m-%d %H:%i:%s')
6 e 2008-01-01 23:59:59
@@ -530,7 +578,9 @@ INSERT INTO ta_l (a, b, c) VALUES (1, 'e', '2008-01-01 23:59:59'),
(6, 'e', '2008-01-01 23:59:59'), (7, 'e', '2008-01-01 23:59:59'),
(8, 'e', '2008-01-01 23:59:59'), (9, 'e', '2008-01-01 23:59:59'),
(10, 'j', '2008-01-01 23:59:59');
+connection master_1;
DELETE FROM ta_l WHERE a = (SELECT a FROM tb_l ORDER BY a LIMIT 1);
+connection master_1;
SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l ORDER BY a;
a b date_format(c, '%Y-%m-%d %H:%i:%s')
2 e 2008-01-01 23:59:59
@@ -551,7 +601,9 @@ INSERT INTO ta_l (a, b, c) VALUES (1, 'e', '2008-01-01 23:59:59'),
(6, 'e', '2008-01-01 23:59:59'), (7, 'e', '2008-01-01 23:59:59'),
(8, 'e', '2008-01-01 23:59:59'), (9, 'e', '2008-01-01 23:59:59'),
(10, 'j', '2008-01-01 23:59:59');
+connection master_1;
DELETE a FROM ta_l a, (SELECT a FROM tb_l ORDER BY a) b WHERE a.a = b.a;
+connection master_1;
SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l ORDER BY a;
a b date_format(c, '%Y-%m-%d %H:%i:%s')
6 e 2008-01-01 23:59:59
@@ -568,7 +620,9 @@ INSERT INTO ta_l (a, b, c) VALUES (1, 'e', '2008-01-01 23:59:59'),
(6, 'e', '2008-01-01 23:59:59'), (7, 'e', '2008-01-01 23:59:59'),
(8, 'e', '2008-01-01 23:59:59'), (9, 'e', '2008-01-01 23:59:59'),
(10, 'j', '2008-01-01 23:59:59');
+connection master_1;
DELETE LOW_PRIORITY FROM ta_l WHERE a = 2;
+connection master_1;
SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l ORDER BY a;
a b date_format(c, '%Y-%m-%d %H:%i:%s')
1 e 2008-01-01 23:59:59
@@ -589,7 +643,9 @@ INSERT INTO ta_l (a, b, c) VALUES (1, 'e', '2008-01-01 23:59:59'),
(6, 'e', '2008-01-01 23:59:59'), (7, 'e', '2008-01-01 23:59:59'),
(8, 'e', '2008-01-01 23:59:59'), (9, 'e', '2008-01-01 23:59:59'),
(10, 'j', '2008-01-01 23:59:59');
+connection master_1;
DELETE IGNORE FROM ta_l WHERE a = 2;
+connection master_1;
SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l ORDER BY a;
a b date_format(c, '%Y-%m-%d %H:%i:%s')
1 e 2008-01-01 23:59:59
@@ -610,7 +666,9 @@ INSERT INTO ta_l (a, b, c) VALUES (1, 'e', '2008-01-01 23:59:59'),
(6, 'e', '2008-01-01 23:59:59'), (7, 'e', '2008-01-01 23:59:59'),
(8, 'e', '2008-01-01 23:59:59'), (9, 'e', '2008-01-01 23:59:59'),
(10, 'j', '2008-01-01 23:59:59');
+connection master_1;
DELETE QUICK FROM ta_l WHERE a = 2;
+connection master_1;
SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l ORDER BY a;
a b date_format(c, '%Y-%m-%d %H:%i:%s')
1 e 2008-01-01 23:59:59
@@ -631,7 +689,9 @@ INSERT INTO ta_l (a, b, c) VALUES (1, 'e', '2008-01-01 23:59:59'),
(6, 'e', '2008-01-01 23:59:59'), (7, 'e', '2008-01-01 23:59:59'),
(8, 'e', '2008-01-01 23:59:59'), (9, 'e', '2008-01-01 23:59:59'),
(10, 'j', '2008-01-01 23:59:59');
+connection master_1;
DELETE FROM ta_l WHERE b = 'e';
+connection master_1;
SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l ORDER BY a;
a b date_format(c, '%Y-%m-%d %H:%i:%s')
10 j 2008-01-01 23:59:59
@@ -644,41 +704,26 @@ INSERT INTO ta_l (a, b, c) VALUES (1, 'e', '2008-01-01 23:59:59'),
(6, 'e', '2008-01-01 23:59:59'), (7, 'e', '2008-01-01 23:59:59'),
(8, 'e', '2008-01-01 23:59:59'), (9, 'e', '2008-01-01 23:59:59'),
(10, 'j', '2008-01-01 23:59:59');
+connection master_1;
DELETE FROM ta_l WHERE a > 0 AND b = 'e';
+connection master_1;
SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l ORDER BY a;
a b date_format(c, '%Y-%m-%d %H:%i:%s')
10 j 2008-01-01 23:59:59
-delete partition pushdown
-TRUNCATE TABLE ta_l2;
-INSERT INTO ta_l2 SELECT a, b, c FROM tb_l;
-DELETE FROM ta_l2 WHERE b = 'g';
-SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l2 ORDER BY a;
-a b date_format(c, '%Y-%m-%d %H:%i:%s')
-1 f 2008-07-01 10:21:39
-3 j 2007-05-04 20:03:11
-4 i 2003-10-30 05:01:03
-5 h 2001-10-31 23:59:59
-
-delete partition index pushdown
-TRUNCATE TABLE ta_l2;
-INSERT INTO ta_l2 SELECT a, b, c FROM tb_l;
-DELETE FROM ta_l2 WHERE a > 0 AND b = 'g';
-SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l2 ORDER BY a;
-a b date_format(c, '%Y-%m-%d %H:%i:%s')
-1 f 2008-07-01 10:21:39
-3 j 2007-05-04 20:03:11
-4 i 2003-10-30 05:01:03
-5 h 2001-10-31 23:59:59
-
truncate
+connection master_1;
TRUNCATE TABLE ta_l;
+connection master_1;
SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l ORDER BY a;
a b date_format(c, '%Y-%m-%d %H:%i:%s')
deinit
+connection master_1;
DROP DATABASE IF EXISTS auto_test_local;
+connection child2_1;
DROP DATABASE IF EXISTS auto_test_remote;
+connection child2_2;
DROP DATABASE IF EXISTS auto_test_remote2;
for master_1
for child2
diff --git a/storage/spider/mysql-test/spider/handler/r/basic_sql_part.result b/storage/spider/mysql-test/spider/handler/r/basic_sql_part.result
new file mode 100644
index 00000000000..0f4029404a7
--- /dev/null
+++ b/storage/spider/mysql-test/spider/handler/r/basic_sql_part.result
@@ -0,0 +1,141 @@
+for master_1
+for child2
+child2_1
+child2_2
+child2_3
+for child3
+child3_1
+child3_2
+child3_3
+
+drop and create databases
+connection master_1;
+DROP DATABASE IF EXISTS auto_test_local;
+CREATE DATABASE auto_test_local;
+USE auto_test_local;
+connection child2_1;
+DROP DATABASE IF EXISTS auto_test_remote;
+CREATE DATABASE auto_test_remote;
+USE auto_test_remote;
+connection child2_2;
+DROP DATABASE IF EXISTS auto_test_remote2;
+CREATE DATABASE auto_test_remote2;
+USE auto_test_remote2;
+
+test select 1
+connection master_1;
+SELECT 1;
+1
+1
+connection master_1;
+DROP TABLE IF EXISTS tb_l;
+CREATE TABLE tb_l (
+a INT,
+b CHAR(1),
+c DATETIME,
+PRIMARY KEY(a)
+) MASTER_1_ENGINE2 MASTER_1_CHARSET2
+INSERT INTO tb_l (a, b, c) VALUES
+(1, 'f', '2008-07-01 10:21:39'),
+(2, 'g', '2000-02-01 00:00:00'),
+(3, 'j', '2007-05-04 20:03:11'),
+(4, 'i', '2003-10-30 05:01:03'),
+(5, 'h', '2001-10-31 23:59:59');
+
+create table with partition and select test
+connection master_1;
+CREATE TABLE ta_l2 (
+PRIMARY KEY(a)
+) MASTER_1_ENGINE MASTER_1_COMMENT_P_2_1
+SELECT a, b, c FROM tb_l
+connection master_1;
+SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l2 ORDER BY a;
+a b date_format(c, '%Y-%m-%d %H:%i:%s')
+1 f 2008-07-01 10:21:39
+2 g 2000-02-01 00:00:00
+3 j 2007-05-04 20:03:11
+4 i 2003-10-30 05:01:03
+5 h 2001-10-31 23:59:59
+
+select partition using pushdown
+connection master_1;
+SELECT a.a, a.b, date_format(a.c, '%Y-%m-%d %H:%i:%s') FROM ta_l2 a WHERE
+a.b = 'g' ORDER BY a.a;
+a b date_format(a.c, '%Y-%m-%d %H:%i:%s')
+2 g 2000-02-01 00:00:00
+
+select partition using index pushdown
+connection master_1;
+SELECT a.a, a.b, date_format(a.c, '%Y-%m-%d %H:%i:%s') FROM ta_l2 a WHERE
+a.a > 0 AND a.b = 'g' ORDER BY a.a;
+a b date_format(a.c, '%Y-%m-%d %H:%i:%s')
+2 g 2000-02-01 00:00:00
+
+update partition pushdown
+connection master_1;
+UPDATE ta_l2 SET b = 'e', c = '2009-03-03 03:03:03' WHERE b = 'j';
+connection master_1;
+SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l2 ORDER BY a;
+a b date_format(c, '%Y-%m-%d %H:%i:%s')
+1 f 2008-07-01 10:21:39
+2 g 2000-02-01 00:00:00
+3 e 2009-03-03 03:03:03
+4 i 2003-10-30 05:01:03
+5 h 2001-10-31 23:59:59
+
+update partition index pushdown
+connection master_1;
+UPDATE ta_l2 SET b = 'j', c = '2009-03-03 03:03:03' WHERE a > 0 AND b = 'e';
+connection master_1;
+SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l2 ORDER BY a;
+a b date_format(c, '%Y-%m-%d %H:%i:%s')
+1 f 2008-07-01 10:21:39
+2 g 2000-02-01 00:00:00
+3 j 2009-03-03 03:03:03
+4 i 2003-10-30 05:01:03
+5 h 2001-10-31 23:59:59
+
+delete partition pushdown
+TRUNCATE TABLE ta_l2;
+INSERT INTO ta_l2 SELECT a, b, c FROM tb_l;
+connection master_1;
+DELETE FROM ta_l2 WHERE b = 'g';
+connection master_1;
+SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l2 ORDER BY a;
+a b date_format(c, '%Y-%m-%d %H:%i:%s')
+1 f 2008-07-01 10:21:39
+3 j 2007-05-04 20:03:11
+4 i 2003-10-30 05:01:03
+5 h 2001-10-31 23:59:59
+
+delete partition index pushdown
+TRUNCATE TABLE ta_l2;
+INSERT INTO ta_l2 SELECT a, b, c FROM tb_l;
+connection master_1;
+DELETE FROM ta_l2 WHERE a > 0 AND b = 'g';
+connection master_1;
+SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l2 ORDER BY a;
+a b date_format(c, '%Y-%m-%d %H:%i:%s')
+1 f 2008-07-01 10:21:39
+3 j 2007-05-04 20:03:11
+4 i 2003-10-30 05:01:03
+5 h 2001-10-31 23:59:59
+
+deinit
+connection master_1;
+DROP DATABASE IF EXISTS auto_test_local;
+connection child2_1;
+DROP DATABASE IF EXISTS auto_test_remote;
+connection child2_2;
+DROP DATABASE IF EXISTS auto_test_remote2;
+for master_1
+for child2
+child2_1
+child2_2
+child2_3
+for child3
+child3_1
+child3_2
+child3_3
+
+end of test
diff --git a/storage/spider/mysql-test/spider/handler/r/direct_aggregate.result b/storage/spider/mysql-test/spider/handler/r/direct_aggregate.result
index 0e845fabda1..9a8660ba79e 100644
--- a/storage/spider/mysql-test/spider/handler/r/direct_aggregate.result
+++ b/storage/spider/mysql-test/spider/handler/r/direct_aggregate.result
@@ -9,22 +9,27 @@ child3_2
child3_3
drop and create databases
+connection master_1;
DROP DATABASE IF EXISTS auto_test_local;
CREATE DATABASE auto_test_local;
USE auto_test_local;
+connection child2_1;
DROP DATABASE IF EXISTS auto_test_remote;
CREATE DATABASE auto_test_remote;
USE auto_test_remote;
+connection child2_2;
DROP DATABASE IF EXISTS auto_test_remote2;
CREATE DATABASE auto_test_remote2;
USE auto_test_remote2;
test select 1
+connection master_1;
SELECT 1;
1
1
create table select test
+connection master_1;
DROP TABLE IF EXISTS ta_l;
CREATE TABLE ta_l (
a INT,
@@ -40,6 +45,7 @@ INSERT INTO ta_l (a, b, c) VALUES
(5, 'c', '2001-12-31 23:59:59');
direct_aggregating test
+connection master_1;
SHOW STATUS LIKE 'Spider_direct_aggregate';
Variable_name Value
Spider_direct_aggregate 0
@@ -48,76 +54,38 @@ COUNT(*)
5
SHOW STATUS LIKE 'Spider_direct_aggregate';
Variable_name Value
-Spider_direct_aggregate 1
+Spider_direct_aggregate 0
SELECT MAX(a) FROM ta_l;
MAX(a)
5
SHOW STATUS LIKE 'Spider_direct_aggregate';
Variable_name Value
-Spider_direct_aggregate 2
+Spider_direct_aggregate 0
SELECT MIN(a) FROM ta_l;
MIN(a)
1
SHOW STATUS LIKE 'Spider_direct_aggregate';
Variable_name Value
-Spider_direct_aggregate 3
+Spider_direct_aggregate 0
SELECT MAX(a) FROM ta_l WHERE a < 5;
MAX(a)
4
SHOW STATUS LIKE 'Spider_direct_aggregate';
Variable_name Value
-Spider_direct_aggregate 4
+Spider_direct_aggregate 0
SELECT MIN(a) FROM ta_l WHERE a > 1;
MIN(a)
2
SHOW STATUS LIKE 'Spider_direct_aggregate';
Variable_name Value
-Spider_direct_aggregate 5
-
-handler with partition test
-CREATE TABLE ta_l2 (
-a INT,
-b CHAR(1),
-c DATETIME,
-PRIMARY KEY(a)
-) MASTER_1_ENGINE MASTER_1_COMMENT2_P_2_1
-SHOW STATUS LIKE 'Spider_direct_aggregate';
-Variable_name Value
-Spider_direct_aggregate 5
-SELECT COUNT(*) FROM ta_l2;
-COUNT(*)
-5
-SHOW STATUS LIKE 'Spider_direct_aggregate';
-Variable_name Value
-Spider_direct_aggregate 7
-SELECT MAX(a) FROM ta_l2;
-MAX(a)
-5
-SHOW STATUS LIKE 'Spider_direct_aggregate';
-Variable_name Value
-Spider_direct_aggregate 9
-SELECT MIN(a) FROM ta_l2;
-MIN(a)
-1
-SHOW STATUS LIKE 'Spider_direct_aggregate';
-Variable_name Value
-Spider_direct_aggregate 11
-SELECT MAX(a) FROM ta_l2 WHERE a < 5;
-MAX(a)
-4
-SHOW STATUS LIKE 'Spider_direct_aggregate';
-Variable_name Value
-Spider_direct_aggregate 13
-SELECT MIN(a) FROM ta_l2 WHERE a > 1;
-MIN(a)
-2
-SHOW STATUS LIKE 'Spider_direct_aggregate';
-Variable_name Value
-Spider_direct_aggregate 15
+Spider_direct_aggregate 0
deinit
+connection master_1;
DROP DATABASE IF EXISTS auto_test_local;
+connection child2_1;
DROP DATABASE IF EXISTS auto_test_remote;
+connection child2_2;
DROP DATABASE IF EXISTS auto_test_remote2;
for master_1
for child2
diff --git a/storage/spider/mysql-test/spider/handler/r/direct_aggregate_part.result b/storage/spider/mysql-test/spider/handler/r/direct_aggregate_part.result
new file mode 100644
index 00000000000..760b39e16d5
--- /dev/null
+++ b/storage/spider/mysql-test/spider/handler/r/direct_aggregate_part.result
@@ -0,0 +1,90 @@
+for master_1
+for child2
+child2_1
+child2_2
+child2_3
+for child3
+child3_1
+child3_2
+child3_3
+
+drop and create databases
+connection master_1;
+DROP DATABASE IF EXISTS auto_test_local;
+CREATE DATABASE auto_test_local;
+USE auto_test_local;
+connection child2_1;
+DROP DATABASE IF EXISTS auto_test_remote;
+CREATE DATABASE auto_test_remote;
+USE auto_test_remote;
+connection child2_2;
+DROP DATABASE IF EXISTS auto_test_remote2;
+CREATE DATABASE auto_test_remote2;
+USE auto_test_remote2;
+
+test select 1
+connection master_1;
+SELECT 1;
+1
+1
+
+with partition test
+connection master_1;
+CREATE TABLE ta_l2 (
+a INT,
+b CHAR(1),
+c DATETIME,
+PRIMARY KEY(a)
+) MASTER_1_ENGINE MASTER_1_COMMENT2_P_2_1
+SHOW STATUS LIKE 'Spider_direct_aggregate';
+Variable_name Value
+Spider_direct_aggregate 0
+SELECT COUNT(*) FROM ta_l2;
+COUNT(*)
+5
+SHOW STATUS LIKE 'Spider_direct_aggregate';
+Variable_name Value
+Spider_direct_aggregate 0
+SELECT MAX(a) FROM ta_l2;
+MAX(a)
+5
+SHOW STATUS LIKE 'Spider_direct_aggregate';
+Variable_name Value
+Spider_direct_aggregate 0
+SELECT MIN(a) FROM ta_l2;
+MIN(a)
+1
+SHOW STATUS LIKE 'Spider_direct_aggregate';
+Variable_name Value
+Spider_direct_aggregate 0
+SELECT MAX(a) FROM ta_l2 WHERE a < 5;
+MAX(a)
+4
+SHOW STATUS LIKE 'Spider_direct_aggregate';
+Variable_name Value
+Spider_direct_aggregate 0
+SELECT MIN(a) FROM ta_l2 WHERE a > 1;
+MIN(a)
+2
+SHOW STATUS LIKE 'Spider_direct_aggregate';
+Variable_name Value
+Spider_direct_aggregate 0
+
+deinit
+connection master_1;
+DROP DATABASE IF EXISTS auto_test_local;
+connection child2_1;
+DROP DATABASE IF EXISTS auto_test_remote;
+connection child2_2;
+DROP DATABASE IF EXISTS auto_test_remote2;
+for master_1
+for child2
+child2_1
+child2_2
+child2_3
+for child3
+child3_1
+child3_2
+child3_3
+
+end of test
diff --git a/storage/spider/mysql-test/spider/handler/r/direct_update.result b/storage/spider/mysql-test/spider/handler/r/direct_update.result
index ea3a23eb8b8..74dae7aec2e 100644
--- a/storage/spider/mysql-test/spider/handler/r/direct_update.result
+++ b/storage/spider/mysql-test/spider/handler/r/direct_update.result
@@ -9,22 +9,27 @@ child3_2
child3_3
drop and create databases
+connection master_1;
DROP DATABASE IF EXISTS auto_test_local;
CREATE DATABASE auto_test_local;
USE auto_test_local;
+connection child2_1;
DROP DATABASE IF EXISTS auto_test_remote;
CREATE DATABASE auto_test_remote;
USE auto_test_remote;
+connection child2_2;
DROP DATABASE IF EXISTS auto_test_remote2;
CREATE DATABASE auto_test_remote2;
USE auto_test_remote2;
test select 1
+connection master_1;
SELECT 1;
1
1
create table select test
+connection master_1;
DROP TABLE IF EXISTS ta_l;
CREATE TABLE ta_l (
a INT,
@@ -40,9 +45,9 @@ INSERT INTO ta_l (a, b, c) VALUES
(5, 'c', '2001-12-31 23:59:59');
direct_updating test
+connection master_1;
SHOW STATUS LIKE 'Spider_direct_update';
Variable_name Value
-Spider_direct_update 0
SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l ORDER BY a;
a b date_format(c, '%Y-%m-%d %H:%i:%s')
1 a 2008-08-01 10:21:39
@@ -54,7 +59,6 @@ update all rows with function
UPDATE ta_l SET c = ADDDATE(c, 1);
SHOW STATUS LIKE 'Spider_direct_update';
Variable_name Value
-Spider_direct_update 1
SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l ORDER BY a;
a b date_format(c, '%Y-%m-%d %H:%i:%s')
1 a 2008-08-02 10:21:39
@@ -66,7 +70,6 @@ update by primary key
UPDATE ta_l SET b = 'x' WHERE a = 3;
SHOW STATUS LIKE 'Spider_direct_update';
Variable_name Value
-Spider_direct_update 2
SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l ORDER BY a;
a b date_format(c, '%Y-%m-%d %H:%i:%s')
1 a 2008-08-02 10:21:39
@@ -78,7 +81,6 @@ update by a column without index
UPDATE ta_l SET c = '2011-10-17' WHERE b = 'x';
SHOW STATUS LIKE 'Spider_direct_update';
Variable_name Value
-Spider_direct_update 3
SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l ORDER BY a;
a b date_format(c, '%Y-%m-%d %H:%i:%s')
1 a 2008-08-02 10:21:39
@@ -90,7 +92,6 @@ update by primary key with order and limit
UPDATE ta_l SET c = ADDDATE(c, 1) WHERE a < 4 ORDER BY b DESC LIMIT 1;
SHOW STATUS LIKE 'Spider_direct_update';
Variable_name Value
-Spider_direct_update 4
SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l ORDER BY a;
a b date_format(c, '%Y-%m-%d %H:%i:%s')
1 a 2008-08-02 10:21:39
@@ -102,7 +103,6 @@ delete by primary key with order and limit
DELETE FROM ta_l WHERE a < 4 ORDER BY c LIMIT 1;
SHOW STATUS LIKE 'Spider_direct_delete';
Variable_name Value
-Spider_direct_delete 1
SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l ORDER BY a;
a b date_format(c, '%Y-%m-%d %H:%i:%s')
1 a 2008-08-02 10:21:39
@@ -113,7 +113,6 @@ delete by a column without index
DELETE FROM ta_l WHERE b = 'c';
SHOW STATUS LIKE 'Spider_direct_delete';
Variable_name Value
-Spider_direct_delete 2
SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l ORDER BY a;
a b date_format(c, '%Y-%m-%d %H:%i:%s')
1 a 2008-08-02 10:21:39
@@ -123,111 +122,17 @@ delete by primary key
DELETE FROM ta_l WHERE a = 3;
SHOW STATUS LIKE 'Spider_direct_delete';
Variable_name Value
-Spider_direct_delete 3
SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l ORDER BY a;
a b date_format(c, '%Y-%m-%d %H:%i:%s')
1 a 2008-08-02 10:21:39
4 d 2003-12-01 05:01:03
-handler with partition test
-CREATE TABLE ta_l2 (
-a INT,
-b CHAR(1),
-c DATETIME,
-PRIMARY KEY(a)
-) MASTER_1_ENGINE MASTER_1_COMMENT2_P_2_1
-SHOW STATUS LIKE 'Spider_direct_update';
-Variable_name Value
-Spider_direct_update 4
-SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l2 ORDER BY a;
-a b date_format(c, '%Y-%m-%d %H:%i:%s')
-1 a 2008-08-01 10:21:39
-2 b 2000-01-01 00:00:00
-3 e 2007-06-04 20:03:11
-4 d 2003-11-30 05:01:03
-5 c 2001-12-31 23:59:59
-update all rows with function
-UPDATE ta_l2 SET c = ADDDATE(c, 1);
-SHOW STATUS LIKE 'Spider_direct_update';
-Variable_name Value
-Spider_direct_update 6
-SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l2 ORDER BY a;
-a b date_format(c, '%Y-%m-%d %H:%i:%s')
-1 a 2008-08-02 10:21:39
-2 b 2000-01-02 00:00:00
-3 e 2007-06-05 20:03:11
-4 d 2003-12-01 05:01:03
-5 c 2002-01-01 23:59:59
-update by primary key
-UPDATE ta_l2 SET b = 'x' WHERE a = 3;
-SHOW STATUS LIKE 'Spider_direct_update';
-Variable_name Value
-Spider_direct_update 7
-SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l2 ORDER BY a;
-a b date_format(c, '%Y-%m-%d %H:%i:%s')
-1 a 2008-08-02 10:21:39
-2 b 2000-01-02 00:00:00
-3 x 2007-06-05 20:03:11
-4 d 2003-12-01 05:01:03
-5 c 2002-01-01 23:59:59
-update by a column without index
-UPDATE ta_l2 SET c = '2011-10-17' WHERE b = 'x';
-SHOW STATUS LIKE 'Spider_direct_update';
-Variable_name Value
-Spider_direct_update 9
-SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l2 ORDER BY a;
-a b date_format(c, '%Y-%m-%d %H:%i:%s')
-1 a 2008-08-02 10:21:39
-2 b 2000-01-02 00:00:00
-3 x 2011-10-17 00:00:00
-4 d 2003-12-01 05:01:03
-5 c 2002-01-01 23:59:59
-update by primary key with order and limit
-UPDATE ta_l2 SET c = ADDDATE(c, 1) WHERE a < 4 ORDER BY b DESC LIMIT 1;
-SHOW STATUS LIKE 'Spider_direct_update';
-Variable_name Value
-Spider_direct_update 10
-SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l2 ORDER BY a;
-a b date_format(c, '%Y-%m-%d %H:%i:%s')
-1 a 2008-08-02 10:21:39
-2 b 2000-01-02 00:00:00
-3 x 2011-10-18 00:00:00
-4 d 2003-12-01 05:01:03
-5 c 2002-01-01 23:59:59
-delete by primary key with order and limit
-DELETE FROM ta_l2 WHERE a < 4 ORDER BY c LIMIT 1;
-SHOW STATUS LIKE 'Spider_direct_delete';
-Variable_name Value
-Spider_direct_delete 4
-SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l2 ORDER BY a;
-a b date_format(c, '%Y-%m-%d %H:%i:%s')
-1 a 2008-08-02 10:21:39
-3 x 2011-10-18 00:00:00
-4 d 2003-12-01 05:01:03
-5 c 2002-01-01 23:59:59
-delete by a column without index
-DELETE FROM ta_l2 WHERE b = 'c';
-SHOW STATUS LIKE 'Spider_direct_delete';
-Variable_name Value
-Spider_direct_delete 6
-SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l2 ORDER BY a;
-a b date_format(c, '%Y-%m-%d %H:%i:%s')
-1 a 2008-08-02 10:21:39
-3 x 2011-10-18 00:00:00
-4 d 2003-12-01 05:01:03
-delete by primary key
-DELETE FROM ta_l2 WHERE a = 3;
-SHOW STATUS LIKE 'Spider_direct_delete';
-Variable_name Value
-Spider_direct_delete 7
-SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l2 ORDER BY a;
-a b date_format(c, '%Y-%m-%d %H:%i:%s')
-1 a 2008-08-02 10:21:39
-4 d 2003-12-01 05:01:03
-
deinit
+connection master_1;
DROP DATABASE IF EXISTS auto_test_local;
+connection child2_1;
DROP DATABASE IF EXISTS auto_test_remote;
+connection child2_2;
DROP DATABASE IF EXISTS auto_test_remote2;
for master_1
for child2
diff --git a/storage/spider/mysql-test/spider/handler/r/direct_update_part.result b/storage/spider/mysql-test/spider/handler/r/direct_update_part.result
new file mode 100644
index 00000000000..6db7c01f563
--- /dev/null
+++ b/storage/spider/mysql-test/spider/handler/r/direct_update_part.result
@@ -0,0 +1,137 @@
+for master_1
+for child2
+child2_1
+child2_2
+child2_3
+for child3
+child3_1
+child3_2
+child3_3
+
+drop and create databases
+connection master_1;
+DROP DATABASE IF EXISTS auto_test_local;
+CREATE DATABASE auto_test_local;
+USE auto_test_local;
+connection child2_1;
+DROP DATABASE IF EXISTS auto_test_remote;
+CREATE DATABASE auto_test_remote;
+USE auto_test_remote;
+connection child2_2;
+DROP DATABASE IF EXISTS auto_test_remote2;
+CREATE DATABASE auto_test_remote2;
+USE auto_test_remote2;
+
+test select 1
+connection master_1;
+SELECT 1;
+1
+1
+
+with partition test
+connection master_1;
+CREATE TABLE ta_l2 (
+a INT,
+b CHAR(1),
+c DATETIME,
+PRIMARY KEY(a)
+) MASTER_1_ENGINE MASTER_1_COMMENT2_P_2_1
+SHOW STATUS LIKE 'Spider_direct_update';
+Variable_name Value
+SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l2 ORDER BY a;
+a b date_format(c, '%Y-%m-%d %H:%i:%s')
+1 a 2008-08-01 10:21:39
+2 b 2000-01-01 00:00:00
+3 e 2007-06-04 20:03:11
+4 d 2003-11-30 05:01:03
+5 c 2001-12-31 23:59:59
+update all rows with function
+UPDATE ta_l2 SET c = ADDDATE(c, 1);
+SHOW STATUS LIKE 'Spider_direct_update';
+Variable_name Value
+SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l2 ORDER BY a;
+a b date_format(c, '%Y-%m-%d %H:%i:%s')
+1 a 2008-08-02 10:21:39
+2 b 2000-01-02 00:00:00
+3 e 2007-06-05 20:03:11
+4 d 2003-12-01 05:01:03
+5 c 2002-01-01 23:59:59
+update by primary key
+UPDATE ta_l2 SET b = 'x' WHERE a = 3;
+SHOW STATUS LIKE 'Spider_direct_update';
+Variable_name Value
+SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l2 ORDER BY a;
+a b date_format(c, '%Y-%m-%d %H:%i:%s')
+1 a 2008-08-02 10:21:39
+2 b 2000-01-02 00:00:00
+3 x 2007-06-05 20:03:11
+4 d 2003-12-01 05:01:03
+5 c 2002-01-01 23:59:59
+update by a column without index
+UPDATE ta_l2 SET c = '2011-10-17' WHERE b = 'x';
+SHOW STATUS LIKE 'Spider_direct_update';
+Variable_name Value
+SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l2 ORDER BY a;
+a b date_format(c, '%Y-%m-%d %H:%i:%s')
+1 a 2008-08-02 10:21:39
+2 b 2000-01-02 00:00:00
+3 x 2011-10-17 00:00:00
+4 d 2003-12-01 05:01:03
+5 c 2002-01-01 23:59:59
+update by primary key with order and limit
+UPDATE ta_l2 SET c = ADDDATE(c, 1) WHERE a < 4 ORDER BY b DESC LIMIT 1;
+SHOW STATUS LIKE 'Spider_direct_update';
+Variable_name Value
+SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l2 ORDER BY a;
+a b date_format(c, '%Y-%m-%d %H:%i:%s')
+1 a 2008-08-02 10:21:39
+2 b 2000-01-02 00:00:00
+3 x 2011-10-18 00:00:00
+4 d 2003-12-01 05:01:03
+5 c 2002-01-01 23:59:59
+delete by primary key with order and limit
+DELETE FROM ta_l2 WHERE a < 4 ORDER BY c LIMIT 1;
+SHOW STATUS LIKE 'Spider_direct_delete';
+Variable_name Value
+SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l2 ORDER BY a;
+a b date_format(c, '%Y-%m-%d %H:%i:%s')
+1 a 2008-08-02 10:21:39
+3 x 2011-10-18 00:00:00
+4 d 2003-12-01 05:01:03
+5 c 2002-01-01 23:59:59
+delete by a column without index
+DELETE FROM ta_l2 WHERE b = 'c';
+SHOW STATUS LIKE 'Spider_direct_delete';
+Variable_name Value
+SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l2 ORDER BY a;
+a b date_format(c, '%Y-%m-%d %H:%i:%s')
+1 a 2008-08-02 10:21:39
+3 x 2011-10-18 00:00:00
+4 d 2003-12-01 05:01:03
+delete by primary key
+DELETE FROM ta_l2 WHERE a = 3;
+SHOW STATUS LIKE 'Spider_direct_delete';
+Variable_name Value
+SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l2 ORDER BY a;
+a b date_format(c, '%Y-%m-%d %H:%i:%s')
+1 a 2008-08-02 10:21:39
+4 d 2003-12-01 05:01:03
+
+deinit
+connection master_1;
+DROP DATABASE IF EXISTS auto_test_local;
+connection child2_1;
+DROP DATABASE IF EXISTS auto_test_remote;
+connection child2_2;
+DROP DATABASE IF EXISTS auto_test_remote2;
+for master_1
+for child2
+child2_1
+child2_2
+child2_3
+for child3
+child3_1
+child3_2
+child3_3
+
+end of test
diff --git a/storage/spider/mysql-test/spider/handler/r/function.result b/storage/spider/mysql-test/spider/handler/r/function.result
index 764c774514b..c088a8a9541 100644
--- a/storage/spider/mysql-test/spider/handler/r/function.result
+++ b/storage/spider/mysql-test/spider/handler/r/function.result
@@ -9,22 +9,27 @@ child3_2
child3_3
drop and create databases
+connection master_1;
DROP DATABASE IF EXISTS auto_test_local;
CREATE DATABASE auto_test_local;
USE auto_test_local;
+connection child2_1;
DROP DATABASE IF EXISTS auto_test_remote;
CREATE DATABASE auto_test_remote;
USE auto_test_remote;
+connection child2_2;
DROP DATABASE IF EXISTS auto_test_remote2;
CREATE DATABASE auto_test_remote2;
USE auto_test_remote2;
test select 1
+connection master_1;
SELECT 1;
1
1
in()
+connection master_1;
CREATE TABLE t1 (
a VARCHAR(255),
PRIMARY KEY(a)
@@ -41,12 +46,14 @@ insert into t1 select a + 128 from t1;
insert into t1 select a + 256 from t1;
insert into t1 select a + 512 from t1;
flush tables;
+connection master_1;
select a from t1 where a in ('15', '120');
a
120
15
date_sub()
+connection master_1;
DROP TABLE IF EXISTS ta_l;
CREATE TABLE ta_l (
a INT,
@@ -124,6 +131,7 @@ a b date_format(c, '%Y-%m-%d %H:%i:%s')
4 d 2003-02-03 06:00:03
5 c 2001-03-07 00:58:59
UPDATE ta_l SET c = DATE_ADD(c, INTERVAL 1 SECOND);
+connection master_1;
SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l ORDER BY a;
a b date_format(c, '%Y-%m-%d %H:%i:%s')
1 a 2007-10-07 11:20:40
@@ -133,8 +141,11 @@ a b date_format(c, '%Y-%m-%d %H:%i:%s')
5 c 2001-03-07 00:59:00
deinit
+connection master_1;
DROP DATABASE IF EXISTS auto_test_local;
+connection child2_1;
DROP DATABASE IF EXISTS auto_test_remote;
+connection child2_2;
DROP DATABASE IF EXISTS auto_test_remote2;
for master_1
for child2
diff --git a/storage/spider/mysql-test/spider/handler/r/ha.result b/storage/spider/mysql-test/spider/handler/r/ha.result
index e05ecb32e1b..f8833c229ef 100644
--- a/storage/spider/mysql-test/spider/handler/r/ha.result
+++ b/storage/spider/mysql-test/spider/handler/r/ha.result
@@ -18,34 +18,43 @@ child3_2
child3_3
drop and create databases
+connection master_1;
DROP DATABASE IF EXISTS auto_test_local;
CREATE DATABASE auto_test_local;
USE auto_test_local;
+connection child2_1;
DROP DATABASE IF EXISTS auto_test_remote;
CREATE DATABASE auto_test_remote;
USE auto_test_remote;
+connection child2_2;
DROP DATABASE IF EXISTS auto_test_remote2;
CREATE DATABASE auto_test_remote2;
USE auto_test_remote2;
+connection child2_3;
DROP DATABASE IF EXISTS auto_test_remote3;
CREATE DATABASE auto_test_remote3;
USE auto_test_remote3;
+connection child3_1;
DROP DATABASE IF EXISTS auto_test_local;
CREATE DATABASE auto_test_local;
USE auto_test_local;
+connection child3_2;
DROP DATABASE IF EXISTS auto_test_local;
CREATE DATABASE auto_test_local;
USE auto_test_local;
+connection child3_3;
DROP DATABASE IF EXISTS auto_test_local;
CREATE DATABASE auto_test_local;
USE auto_test_local;
test select 1
+connection master_1;
SELECT 1;
1
1
create table test
+connection master_1;
DROP TABLE IF EXISTS ta_l;
CREATE TABLE ta_l (
a INT,
@@ -61,6 +70,7 @@ INSERT INTO ta_l (a, b, c) VALUES
(5, 'c', '2001-12-31 23:59:59');
select test
+connection master_1;
SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l ORDER BY a;
a b date_format(c, '%Y-%m-%d %H:%i:%s')
1 a 2008-08-01 10:21:39
@@ -70,6 +80,7 @@ a b date_format(c, '%Y-%m-%d %H:%i:%s')
5 c 2001-12-31 23:59:59
fail-over test
+connection master_1;
SHOW STATUS LIKE 'Spider_mon_table_cache_version%';
Variable_name Value
Spider_mon_table_cache_version 0
@@ -101,6 +112,7 @@ a b date_format(c, '%Y-%m-%d %H:%i:%s')
6 e 2011-05-05 20:04:05
recovery test
+connection master_1;
ALTER TABLE ta_l
CONNECTION='host "localhost", user "root", password "",
msi "5", mkd "2",
@@ -113,6 +125,7 @@ auto_test_local ta_l 1 2
SELECT spider_copy_tables('ta_l', '0', '1');
spider_copy_tables('ta_l', '0', '1')
1
+connection master_1;
ALTER TABLE ta_l
CONNECTION='host "localhost", user "root", password "",
msi "5", mkd "2",
@@ -134,117 +147,14 @@ a b date_format(c, '%Y-%m-%d %H:%i:%s')
6 e 2011-05-05 20:04:05
8 g 2011-05-05 21:33:30
DROP TABLE ta_l;
+connection master_1;
SELECT spider_flush_table_mon_cache();
spider_flush_table_mon_cache()
1
-create table with partition test
-DROP TABLE IF EXISTS ta_l2;
-CREATE TABLE ta_l2 (
-a INT,
-b CHAR(1),
-c DATETIME,
-PRIMARY KEY(a)
-) MASTER_1_ENGINE MASTER_1_CHARSET MASTER_1_COMMENT_HA_P_2_1
-INSERT INTO ta_l2 (a, b, c) VALUES
-(1, 'a', '2008-08-01 10:21:39'),
-(2, 'b', '2000-01-01 00:00:00'),
-(3, 'e', '2007-06-04 20:03:11'),
-(4, 'd', '2003-11-30 05:01:03'),
-(5, 'c', '2001-12-31 23:59:59');
-
-select test
-SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l2 ORDER BY a;
-a b date_format(c, '%Y-%m-%d %H:%i:%s')
-1 a 2008-08-01 10:21:39
-2 b 2000-01-01 00:00:00
-3 e 2007-06-04 20:03:11
-4 d 2003-11-30 05:01:03
-5 c 2001-12-31 23:59:59
-
-fail-over test
-SHOW STATUS LIKE 'Spider_mon_table_cache_version%';
-Variable_name Value
-Spider_mon_table_cache_version 1
-Spider_mon_table_cache_version_req 2
-INSERT INTO ta_l2 (a, b, c) VALUES
-(6, 'e', '2011-05-05 20:04:05');
-ERROR HY000: Table 'auto_test_remote3.ta_r4' get a problem
-SELECT db_name, table_name, link_id, link_status FROM mysql.spider_tables
-ORDER BY db_name, table_name, link_id;
-db_name table_name link_id link_status
-auto_test_local ta_l2#P#pt1 0 1
-auto_test_local ta_l2#P#pt1 1 1
-auto_test_local ta_l2#P#pt2 0 1
-auto_test_local ta_l2#P#pt2 1 3
-SELECT db_name, table_name, link_id FROM mysql.spider_link_failed_log;
-db_name table_name link_id
-auto_test_local ta_l 1
-auto_test_local ta_l2#P#pt2 1
-SHOW STATUS LIKE 'Spider_mon_table_cache_version%';
-Variable_name Value
-Spider_mon_table_cache_version 2
-Spider_mon_table_cache_version_req 2
-INSERT INTO ta_l2 (a, b, c) VALUES
-(6, 'e', '2011-05-05 20:04:05');
-SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l2 ORDER BY a;
-a b date_format(c, '%Y-%m-%d %H:%i:%s')
-1 a 2008-08-01 10:21:39
-2 b 2000-01-01 00:00:00
-3 e 2007-06-04 20:03:11
-4 d 2003-11-30 05:01:03
-5 c 2001-12-31 23:59:59
-6 e 2011-05-05 20:04:05
-
-recovery test
-ALTER TABLE ta_l2
-PARTITION BY KEY(a) (
-PARTITION pt1 COMMENT='srv "s_2_1 s_2_2", tbl "ta_r ta_r3",
- priority "1000"',
-PARTITION pt2 COMMENT='srv "s_2_1 s_2_3", tbl "ta_r2 ta_r4",
- priority "1000001", lst "0 2"'
- );
-SELECT db_name, table_name, link_id, link_status FROM mysql.spider_tables
-ORDER BY db_name, table_name, link_id;
-db_name table_name link_id link_status
-auto_test_local ta_l2#P#pt1 0 1
-auto_test_local ta_l2#P#pt1 1 1
-auto_test_local ta_l2#P#pt2 0 1
-auto_test_local ta_l2#P#pt2 1 2
-SELECT spider_copy_tables('ta_l2#P#pt2', '0', '1');
-spider_copy_tables('ta_l2#P#pt2', '0', '1')
-1
-ALTER TABLE ta_l2
-PARTITION BY KEY(a) (
-PARTITION pt1 COMMENT='srv "s_2_1 s_2_2", tbl "ta_r ta_r3",
- priority "1000"',
-PARTITION pt2 COMMENT='srv "s_2_1 s_2_3", tbl "ta_r2 ta_r4",
- priority "1000001", lst "0 1"'
- );
-SELECT db_name, table_name, link_id, link_status FROM mysql.spider_tables
-ORDER BY db_name, table_name, link_id;
-db_name table_name link_id link_status
-auto_test_local ta_l2#P#pt1 0 1
-auto_test_local ta_l2#P#pt1 1 1
-auto_test_local ta_l2#P#pt2 0 1
-auto_test_local ta_l2#P#pt2 1 1
-INSERT INTO ta_l2 (a, b, c) VALUES
-(8, 'g', '2011-05-05 21:33:30'),
-(9, 'h', '2011-05-05 22:32:10');
-SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l2 ORDER BY a;
-a b date_format(c, '%Y-%m-%d %H:%i:%s')
-1 a 2008-08-01 10:21:39
-2 b 2000-01-01 00:00:00
-3 e 2007-06-04 20:03:11
-4 d 2003-11-30 05:01:03
-5 c 2001-12-31 23:59:59
-6 e 2011-05-05 20:04:05
-8 g 2011-05-05 21:33:30
-9 h 2011-05-05 22:32:10
-DROP TABLE ta_l2;
-
active standby test
create table test
+connection master_1;
DROP TABLE IF EXISTS ta_l;
CREATE TABLE ta_l (
a INT,
@@ -260,6 +170,7 @@ INSERT INTO ta_l (a, b, c) VALUES
(5, 'c', '2001-12-31 23:59:59');
select test
+connection master_1;
SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l ORDER BY a;
a b date_format(c, '%Y-%m-%d %H:%i:%s')
1 a 2008-08-01 10:21:39
@@ -269,9 +180,10 @@ a b date_format(c, '%Y-%m-%d %H:%i:%s')
5 c 2001-12-31 23:59:59
fail-over test
+connection master_1;
SHOW STATUS LIKE 'Spider_mon_table_cache_version%';
Variable_name Value
-Spider_mon_table_cache_version 2
+Spider_mon_table_cache_version 1
Spider_mon_table_cache_version_req 2
INSERT INTO ta_l (a, b, c) VALUES
(6, 'e', '2011-05-05 20:04:05');
@@ -284,7 +196,6 @@ auto_test_local ta_l 1 1
SELECT db_name, table_name, link_id FROM mysql.spider_link_failed_log;
db_name table_name link_id
auto_test_local ta_l 1
-auto_test_local ta_l2#P#pt2 1
auto_test_local ta_l 0
SHOW STATUS LIKE 'Spider_mon_table_cache_version%';
Variable_name Value
@@ -297,6 +208,7 @@ a b date_format(c, '%Y-%m-%d %H:%i:%s')
6 e 2011-05-05 20:04:05
recovery test
+connection master_1;
ALTER TABLE ta_l
CONNECTION='host "localhost", user "root", password "",
msi "5", mkd "2", alc "1",
@@ -312,102 +224,25 @@ SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l ORDER BY a;
a b date_format(c, '%Y-%m-%d %H:%i:%s')
8 g 2011-05-05 21:33:30
DROP TABLE ta_l;
+connection master_1;
SELECT spider_flush_table_mon_cache();
spider_flush_table_mon_cache()
1
-create table with partition test
-DROP TABLE IF EXISTS ta_l2;
-CREATE TABLE ta_l2 (
-a INT,
-b CHAR(1),
-c DATETIME,
-PRIMARY KEY(a)
-) MASTER_1_ENGINE MASTER_1_CHARSET MASTER_1_COMMENT_HA_AS_P_2_1
-INSERT INTO ta_l2 (a, b, c) VALUES
-(1, 'a', '2008-08-01 10:21:39'),
-(2, 'b', '2000-01-01 00:00:00'),
-(3, 'e', '2007-06-04 20:03:11'),
-(4, 'd', '2003-11-30 05:01:03'),
-(5, 'c', '2001-12-31 23:59:59');
-
-select test
-SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l2 ORDER BY a;
-a b date_format(c, '%Y-%m-%d %H:%i:%s')
-1 a 2008-08-01 10:21:39
-2 b 2000-01-01 00:00:00
-3 e 2007-06-04 20:03:11
-4 d 2003-11-30 05:01:03
-5 c 2001-12-31 23:59:59
-
-fail-over test
-SHOW STATUS LIKE 'Spider_mon_table_cache_version%';
-Variable_name Value
-Spider_mon_table_cache_version 2
-Spider_mon_table_cache_version_req 3
-INSERT INTO ta_l2 (a, b, c) VALUES
-(6, 'e', '2011-05-05 20:04:05');
-ERROR HY000: Table 'auto_test_remote.ta_r2' get a problem
-SELECT db_name, table_name, link_id, link_status FROM mysql.spider_tables
-ORDER BY db_name, table_name, link_id;
-db_name table_name link_id link_status
-auto_test_local ta_l2#P#pt1 0 1
-auto_test_local ta_l2#P#pt1 1 1
-auto_test_local ta_l2#P#pt2 0 3
-auto_test_local ta_l2#P#pt2 1 1
-SELECT db_name, table_name, link_id FROM mysql.spider_link_failed_log;
-db_name table_name link_id
-auto_test_local ta_l 1
-auto_test_local ta_l2#P#pt2 1
-auto_test_local ta_l 0
-auto_test_local ta_l2#P#pt2 0
-SHOW STATUS LIKE 'Spider_mon_table_cache_version%';
-Variable_name Value
-Spider_mon_table_cache_version 3
-Spider_mon_table_cache_version_req 3
-INSERT INTO ta_l2 (a, b, c) VALUES
-(6, 'e', '2011-05-05 20:04:05');
-SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l2 ORDER BY a;
-a b date_format(c, '%Y-%m-%d %H:%i:%s')
-1 a 2008-08-01 10:21:39
-3 e 2007-06-04 20:03:11
-5 c 2001-12-31 23:59:59
-6 e 2011-05-05 20:04:05
-
-recovery test
-ALTER TABLE ta_l2
-PARTITION BY KEY(a) (
-PARTITION pt1 COMMENT='srv "s_2_1 s_2_2", tbl "ta_r ta_r3",
- priority "1000"',
-PARTITION pt2 COMMENT='srv "s_2_1 s_2_3", tbl "ta_r2 ta_r4",
- priority "1000001", lst "1 0"'
- );
-SELECT db_name, table_name, link_id, link_status FROM mysql.spider_tables
-ORDER BY db_name, table_name, link_id;
-db_name table_name link_id link_status
-auto_test_local ta_l2#P#pt1 0 1
-auto_test_local ta_l2#P#pt1 1 1
-auto_test_local ta_l2#P#pt2 0 1
-auto_test_local ta_l2#P#pt2 1 1
-INSERT INTO ta_l2 (a, b, c) VALUES
-(8, 'g', '2011-05-05 21:33:30'),
-(9, 'h', '2011-05-05 22:32:10');
-SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l2 ORDER BY a;
-a b date_format(c, '%Y-%m-%d %H:%i:%s')
-1 a 2008-08-01 10:21:39
-3 e 2007-06-04 20:03:11
-5 c 2001-12-31 23:59:59
-8 g 2011-05-05 21:33:30
-9 h 2011-05-05 22:32:10
-DROP TABLE ta_l2;
-
deinit
+connection master_1;
DROP DATABASE IF EXISTS auto_test_local;
+connection child2_1;
DROP DATABASE IF EXISTS auto_test_remote;
+connection child2_2;
DROP DATABASE IF EXISTS auto_test_remote2;
+connection child2_3;
DROP DATABASE IF EXISTS auto_test_remote3;
+connection child3_1;
DROP DATABASE IF EXISTS auto_test_local;
+connection child3_2;
DROP DATABASE IF EXISTS auto_test_local;
+connection child3_3;
DROP DATABASE IF EXISTS auto_test_local;
for master_1
for child2
diff --git a/storage/spider/mysql-test/spider/handler/r/ha_part.result b/storage/spider/mysql-test/spider/handler/r/ha_part.result
new file mode 100644
index 00000000000..315f37298bc
--- /dev/null
+++ b/storage/spider/mysql-test/spider/handler/r/ha_part.result
@@ -0,0 +1,286 @@
+for master_1
+for child2
+child2_1
+child2_2
+child2_3
+for child3
+child3_1
+child3_2
+child3_3
+for master_1
+for child2
+child2_1
+child2_2
+child2_3
+for child3
+child3_1
+child3_2
+child3_3
+
+drop and create databases
+connection master_1;
+DROP DATABASE IF EXISTS auto_test_local;
+CREATE DATABASE auto_test_local;
+USE auto_test_local;
+connection child2_1;
+DROP DATABASE IF EXISTS auto_test_remote;
+CREATE DATABASE auto_test_remote;
+USE auto_test_remote;
+connection child2_2;
+DROP DATABASE IF EXISTS auto_test_remote2;
+CREATE DATABASE auto_test_remote2;
+USE auto_test_remote2;
+connection child2_3;
+DROP DATABASE IF EXISTS auto_test_remote3;
+CREATE DATABASE auto_test_remote3;
+USE auto_test_remote3;
+connection child3_1;
+DROP DATABASE IF EXISTS auto_test_local;
+CREATE DATABASE auto_test_local;
+USE auto_test_local;
+connection child3_2;
+DROP DATABASE IF EXISTS auto_test_local;
+CREATE DATABASE auto_test_local;
+USE auto_test_local;
+connection child3_3;
+DROP DATABASE IF EXISTS auto_test_local;
+CREATE DATABASE auto_test_local;
+USE auto_test_local;
+
+test select 1
+connection master_1;
+SELECT 1;
+1
+1
+
+create table with partition test
+connection master_1;
+DROP TABLE IF EXISTS ta_l2;
+CREATE TABLE ta_l2 (
+a INT,
+b CHAR(1),
+c DATETIME,
+PRIMARY KEY(a)
+) MASTER_1_ENGINE MASTER_1_CHARSET MASTER_1_COMMENT_HA_P_2_1
+INSERT INTO ta_l2 (a, b, c) VALUES
+(1, 'a', '2008-08-01 10:21:39'),
+(2, 'b', '2000-01-01 00:00:00'),
+(3, 'e', '2007-06-04 20:03:11'),
+(4, 'd', '2003-11-30 05:01:03'),
+(5, 'c', '2001-12-31 23:59:59');
+
+select test
+connection master_1;
+SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l2 ORDER BY a;
+a b date_format(c, '%Y-%m-%d %H:%i:%s')
+1 a 2008-08-01 10:21:39
+2 b 2000-01-01 00:00:00
+3 e 2007-06-04 20:03:11
+4 d 2003-11-30 05:01:03
+5 c 2001-12-31 23:59:59
+
+fail-over test
+connection master_1;
+SHOW STATUS LIKE 'Spider_mon_table_cache_version%';
+Variable_name Value
+Spider_mon_table_cache_version 0
+Spider_mon_table_cache_version_req 1
+INSERT INTO ta_l2 (a, b, c) VALUES
+(6, 'e', '2011-05-05 20:04:05');
+ERROR HY000: Table 'auto_test_remote3.ta_r4' get a problem
+SELECT db_name, table_name, link_id, link_status FROM mysql.spider_tables
+ORDER BY db_name, table_name, link_id;
+db_name table_name link_id link_status
+auto_test_local ta_l2#P#pt1 0 1
+auto_test_local ta_l2#P#pt1 1 1
+auto_test_local ta_l2#P#pt2 0 1
+auto_test_local ta_l2#P#pt2 1 3
+SELECT db_name, table_name, link_id FROM mysql.spider_link_failed_log;
+db_name table_name link_id
+auto_test_local ta_l2#P#pt2 1
+SHOW STATUS LIKE 'Spider_mon_table_cache_version%';
+Variable_name Value
+Spider_mon_table_cache_version 1
+Spider_mon_table_cache_version_req 1
+INSERT INTO ta_l2 (a, b, c) VALUES
+(6, 'e', '2011-05-05 20:04:05');
+SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l2 ORDER BY a;
+a b date_format(c, '%Y-%m-%d %H:%i:%s')
+1 a 2008-08-01 10:21:39
+2 b 2000-01-01 00:00:00
+3 e 2007-06-04 20:03:11
+4 d 2003-11-30 05:01:03
+5 c 2001-12-31 23:59:59
+6 e 2011-05-05 20:04:05
+
+recovery test
+connection master_1;
+ALTER TABLE ta_l2
+PARTITION BY KEY(a) (
+PARTITION pt1 COMMENT='srv "s_2_1 s_2_2", tbl "ta_r ta_r3",
+ priority "1000"',
+PARTITION pt2 COMMENT='srv "s_2_1 s_2_3", tbl "ta_r2 ta_r4",
+ priority "1000001", lst "0 2"'
+ );
+SELECT db_name, table_name, link_id, link_status FROM mysql.spider_tables
+ORDER BY db_name, table_name, link_id;
+db_name table_name link_id link_status
+auto_test_local ta_l2#P#pt1 0 1
+auto_test_local ta_l2#P#pt1 1 1
+auto_test_local ta_l2#P#pt2 0 1
+auto_test_local ta_l2#P#pt2 1 2
+SELECT spider_copy_tables('ta_l2#P#pt2', '0', '1');
+spider_copy_tables('ta_l2#P#pt2', '0', '1')
+1
+connection master_1;
+ALTER TABLE ta_l2
+PARTITION BY KEY(a) (
+PARTITION pt1 COMMENT='srv "s_2_1 s_2_2", tbl "ta_r ta_r3",
+ priority "1000"',
+PARTITION pt2 COMMENT='srv "s_2_1 s_2_3", tbl "ta_r2 ta_r4",
+ priority "1000001", lst "0 1"'
+ );
+SELECT db_name, table_name, link_id, link_status FROM mysql.spider_tables
+ORDER BY db_name, table_name, link_id;
+db_name table_name link_id link_status
+auto_test_local ta_l2#P#pt1 0 1
+auto_test_local ta_l2#P#pt1 1 1
+auto_test_local ta_l2#P#pt2 0 1
+auto_test_local ta_l2#P#pt2 1 1
+INSERT INTO ta_l2 (a, b, c) VALUES
+(8, 'g', '2011-05-05 21:33:30'),
+(9, 'h', '2011-05-05 22:32:10');
+SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l2 ORDER BY a;
+a b date_format(c, '%Y-%m-%d %H:%i:%s')
+1 a 2008-08-01 10:21:39
+2 b 2000-01-01 00:00:00
+3 e 2007-06-04 20:03:11
+4 d 2003-11-30 05:01:03
+5 c 2001-12-31 23:59:59
+6 e 2011-05-05 20:04:05
+8 g 2011-05-05 21:33:30
+9 h 2011-05-05 22:32:10
+DROP TABLE ta_l2;
+
+create table with partition test
+connection master_1;
+DROP TABLE IF EXISTS ta_l2;
+CREATE TABLE ta_l2 (
+a INT,
+b CHAR(1),
+c DATETIME,
+PRIMARY KEY(a)
+) MASTER_1_ENGINE MASTER_1_CHARSET MASTER_1_COMMENT_HA_AS_P_2_1
+INSERT INTO ta_l2 (a, b, c) VALUES
+(1, 'a', '2008-08-01 10:21:39'),
+(2, 'b', '2000-01-01 00:00:00'),
+(3, 'e', '2007-06-04 20:03:11'),
+(4, 'd', '2003-11-30 05:01:03'),
+(5, 'c', '2001-12-31 23:59:59');
+
+select test
+connection master_1;
+SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l2 ORDER BY a;
+a b date_format(c, '%Y-%m-%d %H:%i:%s')
+1 a 2008-08-01 10:21:39
+2 b 2000-01-01 00:00:00
+3 e 2007-06-04 20:03:11
+4 d 2003-11-30 05:01:03
+5 c 2001-12-31 23:59:59
+
+fail-over test
+connection master_1;
+SHOW STATUS LIKE 'Spider_mon_table_cache_version%';
+Variable_name Value
+Spider_mon_table_cache_version 1
+Spider_mon_table_cache_version_req 1
+INSERT INTO ta_l2 (a, b, c) VALUES
+(6, 'e', '2011-05-05 20:04:05');
+ERROR HY000: Table 'auto_test_remote.ta_r2' get a problem
+SELECT db_name, table_name, link_id, link_status FROM mysql.spider_tables
+ORDER BY db_name, table_name, link_id;
+db_name table_name link_id link_status
+auto_test_local ta_l2#P#pt1 0 1
+auto_test_local ta_l2#P#pt1 1 1
+auto_test_local ta_l2#P#pt2 0 3
+auto_test_local ta_l2#P#pt2 1 1
+SELECT db_name, table_name, link_id FROM mysql.spider_link_failed_log;
+db_name table_name link_id
+auto_test_local ta_l2#P#pt2 1
+auto_test_local ta_l2#P#pt2 0
+SHOW STATUS LIKE 'Spider_mon_table_cache_version%';
+Variable_name Value
+Spider_mon_table_cache_version 1
+Spider_mon_table_cache_version_req 1
+INSERT INTO ta_l2 (a, b, c) VALUES
+(6, 'e', '2011-05-05 20:04:05');
+SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l2 ORDER BY a;
+a b date_format(c, '%Y-%m-%d %H:%i:%s')
+1 a 2008-08-01 10:21:39
+3 e 2007-06-04 20:03:11
+5 c 2001-12-31 23:59:59
+6 e 2011-05-05 20:04:05
+
+recovery test
+connection master_1;
+ALTER TABLE ta_l2
+PARTITION BY KEY(a) (
+PARTITION pt1 COMMENT='srv "s_2_1 s_2_2", tbl "ta_r ta_r3",
+ priority "1000"',
+PARTITION pt2 COMMENT='srv "s_2_1 s_2_3", tbl "ta_r2 ta_r4",
+ priority "1000001", lst "1 0"'
+ );
+SELECT db_name, table_name, link_id, link_status FROM mysql.spider_tables
+ORDER BY db_name, table_name, link_id;
+db_name table_name link_id link_status
+auto_test_local ta_l2#P#pt1 0 1
+auto_test_local ta_l2#P#pt1 1 1
+auto_test_local ta_l2#P#pt2 0 1
+auto_test_local ta_l2#P#pt2 1 1
+INSERT INTO ta_l2 (a, b, c) VALUES
+(8, 'g', '2011-05-05 21:33:30'),
+(9, 'h', '2011-05-05 22:32:10');
+SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l2 ORDER BY a;
+a b date_format(c, '%Y-%m-%d %H:%i:%s')
+1 a 2008-08-01 10:21:39
+3 e 2007-06-04 20:03:11
+5 c 2001-12-31 23:59:59
+8 g 2011-05-05 21:33:30
+9 h 2011-05-05 22:32:10
+DROP TABLE ta_l2;
+
+deinit
+connection master_1;
+DROP DATABASE IF EXISTS auto_test_local;
+connection child2_1;
+DROP DATABASE IF EXISTS auto_test_remote;
+connection child2_2;
+DROP DATABASE IF EXISTS auto_test_remote2;
+connection child2_3;
+DROP DATABASE IF EXISTS auto_test_remote3;
+connection child3_1;
+DROP DATABASE IF EXISTS auto_test_local;
+connection child3_2;
+DROP DATABASE IF EXISTS auto_test_local;
+connection child3_3;
+DROP DATABASE IF EXISTS auto_test_local;
+for master_1
+for child2
+child2_1
+child2_2
+child2_3
+for child3
+child3_1
+child3_2
+child3_3
+for master_1
+for child2
+child2_1
+child2_2
+child2_3
+for child3
+child3_1
+child3_2
+child3_3
+
+end of test
diff --git a/storage/spider/mysql-test/spider/handler/r/spider3_fixes.result b/storage/spider/mysql-test/spider/handler/r/spider3_fixes.result
index 6de174e1a2d..9a8a59153f0 100644
--- a/storage/spider/mysql-test/spider/handler/r/spider3_fixes.result
+++ b/storage/spider/mysql-test/spider/handler/r/spider3_fixes.result
@@ -10,26 +10,34 @@ child3_3
for slave1_1
drop and create databases
+connection master_1;
DROP DATABASE IF EXISTS auto_test_local;
CREATE DATABASE auto_test_local;
USE auto_test_local;
+connection slave1_1;
DROP DATABASE IF EXISTS auto_test_local;
CREATE DATABASE auto_test_local;
USE auto_test_local;
+connection child2_1;
DROP DATABASE IF EXISTS auto_test_remote;
CREATE DATABASE auto_test_remote;
USE auto_test_remote;
+connection child2_2;
DROP DATABASE IF EXISTS auto_test_remote2;
CREATE DATABASE auto_test_remote2;
USE auto_test_remote2;
test select 1
+connection master_1;
SELECT 1;
1
1
3.1
auto_increment
+connection master_1;
+connection slave1_1;
+connection master_1;
DROP TABLE IF EXISTS t1, t2;
CREATE TABLE t1 (
id int(11) NOT NULL AUTO_INCREMENT,
@@ -198,6 +206,7 @@ LAST_INSERT_ID()
SELECT MAX(id) FROM t2;
MAX(id)
46
+connection slave1_1;
SELECT id FROM t1 ORDER BY id;
id
2
@@ -206,188 +215,16 @@ id
1554
2331
10000
-auto_increment with partition
-DROP TABLE IF EXISTS t1, t2;
-CREATE TABLE t1 (
-id int(11) NOT NULL AUTO_INCREMENT,
-PRIMARY KEY (id)
-) MASTER_1_ENGINE MASTER_1_CHARSET MASTER_1_COMMENT_INCREMENT1_P_1
-CREATE TABLE t2 (
-id int(11) NOT NULL AUTO_INCREMENT,
-PRIMARY KEY (id)
-) MASTER_1_ENGINE MASTER_1_CHARSET MASTER_1_COMMENT_INCREMENT1_P_1
-MASTER_1_AUTO_INCREMENT_INCREMENT2
-MASTER_1_AUTO_INCREMENT_OFFSET2
-spider_direct_sql('SET SESSION AUTO_INCREMENT_INCREMENT = 4', '',
-'srv "s_2_1"')
-1
-spider_direct_sql('SET SESSION AUTO_INCREMENT_INCREMENT = 4', '',
-'srv "s_2_2"')
-1
-spider_bg_direct_sql('SET SESSION AUTO_INCREMENT_OFFSET = 2', '',
-'srv "s_2_1"')
-1
-spider_bg_direct_sql('SET SESSION AUTO_INCREMENT_OFFSET = 3', '',
-'srv "s_2_2"')
-1
-CREATE TABLE t1 (
-id int(11) NOT NULL AUTO_INCREMENT,
-PRIMARY KEY (id)
-) SLAVE1_1_ENGINE SLAVE1_1_CHARSET SLAVE1_1_COMMENT_INCREMENT1_P_1
-CREATE TABLE t2 (
-id int(11) NOT NULL AUTO_INCREMENT,
-PRIMARY KEY (id)
-) SLAVE1_1_ENGINE SLAVE1_1_CHARSET SLAVE1_1_COMMENT_INCREMENT1_P_1
-INSERT INTO t1 () VALUES ();
-SELECT LAST_INSERT_ID();
-LAST_INSERT_ID()
-2
-SELECT MAX(id) FROM t1;
-MAX(id)
-2
-INSERT INTO t2 () VALUES ();
-SELECT LAST_INSERT_ID();
-LAST_INSERT_ID()
-3
-SELECT MAX(id) FROM t2;
-MAX(id)
-3
-MASTER_1_AUTO_INCREMENT_OFFSET3
-INSERT INTO t1 (id) VALUES (null);
-SELECT LAST_INSERT_ID();
-LAST_INSERT_ID()
-7
-SELECT MAX(id) FROM t1;
-MAX(id)
-7
-MASTER_1_AUTO_INCREMENT_OFFSET4
-INSERT INTO t2 (id) VALUES (null);
-SELECT LAST_INSERT_ID();
-LAST_INSERT_ID()
-6
-SELECT MAX(id) FROM t2;
-MAX(id)
-7
-MASTER_1_AUTO_INCREMENT_OFFSET3
-INSERT INTO t1 () VALUES (),(),(),();
-Warnings:
-Note 12520 Binlog's auto-inc value is probably different from linked table's auto-inc value
-Note 12520 Binlog's auto-inc value is probably different from linked table's auto-inc value
-Note 12520 Binlog's auto-inc value is probably different from linked table's auto-inc value
-SELECT LAST_INSERT_ID();
-LAST_INSERT_ID()
-10
-SELECT id FROM t1 ORDER BY id;
-id
-2
-3
-6
-7
-10
-11
-14
-15
-MASTER_1_AUTO_INCREMENT_OFFSET4
-INSERT INTO t2 () VALUES (),(),(),();
-Warnings:
-Note 12520 Binlog's auto-inc value is probably different from linked table's auto-inc value
-Note 12520 Binlog's auto-inc value is probably different from linked table's auto-inc value
-Note 12520 Binlog's auto-inc value is probably different from linked table's auto-inc value
-SELECT LAST_INSERT_ID();
-LAST_INSERT_ID()
-18
-SELECT id FROM t2 ORDER BY id;
-id
-2
-3
-6
-7
-10
-11
-14
-15
-18
-19
-22
-23
-TRUNCATE TABLE t1;
-TRUNCATE TABLE t2;
-INSERT INTO t1 () VALUES (),(),(),();
-Warnings:
-Note 12520 Binlog's auto-inc value is probably different from linked table's auto-inc value
-Note 12520 Binlog's auto-inc value is probably different from linked table's auto-inc value
-Note 12520 Binlog's auto-inc value is probably different from linked table's auto-inc value
-SELECT LAST_INSERT_ID();
-LAST_INSERT_ID()
-2
-SELECT id FROM t1 ORDER BY id;
-id
-2
-3
-6
-7
-INSERT INTO t2 () VALUES (),(),(),();
-Warnings:
-Note 12520 Binlog's auto-inc value is probably different from linked table's auto-inc value
-Note 12520 Binlog's auto-inc value is probably different from linked table's auto-inc value
-Note 12520 Binlog's auto-inc value is probably different from linked table's auto-inc value
-SELECT LAST_INSERT_ID();
-LAST_INSERT_ID()
-10
-SELECT id FROM t2 ORDER BY id;
-id
-2
-3
-6
-7
-10
-11
-14
-15
-SET INSERT_ID=5000;
-MASTER_1_AUTO_INCREMENT_OFFSET3
-INSERT INTO t1 () VALUES ();
-SELECT LAST_INSERT_ID();
-LAST_INSERT_ID()
-18
-SELECT MAX(id) FROM t1;
-MAX(id)
-18
-MASTER_1_AUTO_INCREMENT_OFFSET4
-INSERT INTO t2 () VALUES ();
-SELECT LAST_INSERT_ID();
-LAST_INSERT_ID()
-19
-SELECT MAX(id) FROM t2;
-MAX(id)
-19
-INSERT INTO t1 (id) VALUES (10000);
-SELECT LAST_INSERT_ID();
-LAST_INSERT_ID()
-22
-SELECT MAX(id) FROM t1;
-MAX(id)
-22
-INSERT INTO t2 (id) VALUES (1000);
-SELECT LAST_INSERT_ID();
-LAST_INSERT_ID()
-26
-SELECT MAX(id) FROM t2;
-MAX(id)
-26
-SELECT id FROM t1 ORDER BY id;
-id
-2
-18
-777
-1554
-2331
-10000
+connection master_1;
deinit
+connection master_1;
DROP DATABASE IF EXISTS auto_test_local;
+connection slave1_1;
DROP DATABASE IF EXISTS auto_test_local;
+connection child2_1;
DROP DATABASE IF EXISTS auto_test_remote;
+connection child2_2;
DROP DATABASE IF EXISTS auto_test_remote2;
for slave1_1
for master_1
diff --git a/storage/spider/mysql-test/spider/handler/r/spider3_fixes_part.result b/storage/spider/mysql-test/spider/handler/r/spider3_fixes_part.result
new file mode 100644
index 00000000000..f8747cff5ea
--- /dev/null
+++ b/storage/spider/mysql-test/spider/handler/r/spider3_fixes_part.result
@@ -0,0 +1,238 @@
+for master_1
+for child2
+child2_1
+child2_2
+child2_3
+for child3
+child3_1
+child3_2
+child3_3
+for slave1_1
+
+drop and create databases
+connection master_1;
+DROP DATABASE IF EXISTS auto_test_local;
+CREATE DATABASE auto_test_local;
+USE auto_test_local;
+connection slave1_1;
+DROP DATABASE IF EXISTS auto_test_local;
+CREATE DATABASE auto_test_local;
+USE auto_test_local;
+connection child2_1;
+DROP DATABASE IF EXISTS auto_test_remote;
+CREATE DATABASE auto_test_remote;
+USE auto_test_remote;
+connection child2_2;
+DROP DATABASE IF EXISTS auto_test_remote2;
+CREATE DATABASE auto_test_remote2;
+USE auto_test_remote2;
+
+test select 1
+connection master_1;
+SELECT 1;
+1
+1
+auto_increment with partition
+connection master_1;
+connection slave1_1;
+connection master_1;
+DROP TABLE IF EXISTS t1, t2;
+CREATE TABLE t1 (
+id int(11) NOT NULL AUTO_INCREMENT,
+PRIMARY KEY (id)
+) MASTER_1_ENGINE MASTER_1_CHARSET MASTER_1_COMMENT_INCREMENT1_P_1
+CREATE TABLE t2 (
+id int(11) NOT NULL AUTO_INCREMENT,
+PRIMARY KEY (id)
+) MASTER_1_ENGINE MASTER_1_CHARSET MASTER_1_COMMENT_INCREMENT1_P_1
+MASTER_1_AUTO_INCREMENT_INCREMENT2
+MASTER_1_AUTO_INCREMENT_OFFSET2
+spider_direct_sql('SET SESSION AUTO_INCREMENT_INCREMENT = 4', '',
+'srv "s_2_1"')
+1
+spider_direct_sql('SET SESSION AUTO_INCREMENT_INCREMENT = 4', '',
+'srv "s_2_2"')
+1
+spider_bg_direct_sql('SET SESSION AUTO_INCREMENT_OFFSET = 2', '',
+'srv "s_2_1"')
+1
+spider_bg_direct_sql('SET SESSION AUTO_INCREMENT_OFFSET = 3', '',
+'srv "s_2_2"')
+1
+CREATE TABLE t1 (
+id int(11) NOT NULL AUTO_INCREMENT,
+PRIMARY KEY (id)
+) SLAVE1_1_ENGINE SLAVE1_1_CHARSET SLAVE1_1_COMMENT_INCREMENT1_P_1
+CREATE TABLE t2 (
+id int(11) NOT NULL AUTO_INCREMENT,
+PRIMARY KEY (id)
+) SLAVE1_1_ENGINE SLAVE1_1_CHARSET SLAVE1_1_COMMENT_INCREMENT1_P_1
+INSERT INTO t1 () VALUES ();
+SELECT LAST_INSERT_ID();
+LAST_INSERT_ID()
+3
+SELECT MAX(id) FROM t1;
+MAX(id)
+3
+INSERT INTO t2 () VALUES ();
+SELECT LAST_INSERT_ID();
+LAST_INSERT_ID()
+7
+SELECT MAX(id) FROM t2;
+MAX(id)
+7
+MASTER_1_AUTO_INCREMENT_OFFSET3
+INSERT INTO t1 (id) VALUES (null);
+SELECT LAST_INSERT_ID();
+LAST_INSERT_ID()
+2
+SELECT MAX(id) FROM t1;
+MAX(id)
+7
+MASTER_1_AUTO_INCREMENT_OFFSET4
+INSERT INTO t2 (id) VALUES (null);
+SELECT LAST_INSERT_ID();
+LAST_INSERT_ID()
+6
+SELECT MAX(id) FROM t2;
+MAX(id)
+7
+MASTER_1_AUTO_INCREMENT_OFFSET3
+INSERT INTO t1 () VALUES (),(),(),();
+Warnings:
+Note 12520 Binlog's auto-inc value is probably different from linked table's auto-inc value
+Note 12520 Binlog's auto-inc value is probably different from linked table's auto-inc value
+Note 12520 Binlog's auto-inc value is probably different from linked table's auto-inc value
+SELECT LAST_INSERT_ID();
+LAST_INSERT_ID()
+10
+SELECT id FROM t1 ORDER BY id;
+id
+2
+3
+6
+7
+10
+11
+14
+15
+MASTER_1_AUTO_INCREMENT_OFFSET4
+INSERT INTO t2 () VALUES (),(),(),();
+Warnings:
+Note 12520 Binlog's auto-inc value is probably different from linked table's auto-inc value
+Note 12520 Binlog's auto-inc value is probably different from linked table's auto-inc value
+Note 12520 Binlog's auto-inc value is probably different from linked table's auto-inc value
+SELECT LAST_INSERT_ID();
+LAST_INSERT_ID()
+18
+SELECT id FROM t2 ORDER BY id;
+id
+2
+3
+6
+7
+10
+11
+14
+15
+18
+19
+22
+23
+TRUNCATE TABLE t1;
+TRUNCATE TABLE t2;
+INSERT INTO t1 () VALUES (),(),(),();
+Warnings:
+Note 12520 Binlog's auto-inc value is probably different from linked table's auto-inc value
+Note 12520 Binlog's auto-inc value is probably different from linked table's auto-inc value
+Note 12520 Binlog's auto-inc value is probably different from linked table's auto-inc value
+SELECT LAST_INSERT_ID();
+LAST_INSERT_ID()
+2
+SELECT id FROM t1 ORDER BY id;
+id
+2
+3
+6
+7
+INSERT INTO t2 () VALUES (),(),(),();
+Warnings:
+Note 12520 Binlog's auto-inc value is probably different from linked table's auto-inc value
+Note 12520 Binlog's auto-inc value is probably different from linked table's auto-inc value
+Note 12520 Binlog's auto-inc value is probably different from linked table's auto-inc value
+SELECT LAST_INSERT_ID();
+LAST_INSERT_ID()
+10
+SELECT id FROM t2 ORDER BY id;
+id
+2
+3
+6
+7
+10
+11
+14
+15
+SET INSERT_ID=5000;
+MASTER_1_AUTO_INCREMENT_OFFSET3
+INSERT INTO t1 () VALUES ();
+SELECT LAST_INSERT_ID();
+LAST_INSERT_ID()
+18
+SELECT MAX(id) FROM t1;
+MAX(id)
+18
+MASTER_1_AUTO_INCREMENT_OFFSET4
+INSERT INTO t2 () VALUES ();
+SELECT LAST_INSERT_ID();
+LAST_INSERT_ID()
+19
+SELECT MAX(id) FROM t2;
+MAX(id)
+19
+INSERT INTO t1 (id) VALUES (10000);
+SELECT LAST_INSERT_ID();
+LAST_INSERT_ID()
+22
+SELECT MAX(id) FROM t1;
+MAX(id)
+22
+INSERT INTO t2 (id) VALUES (1000);
+SELECT LAST_INSERT_ID();
+LAST_INSERT_ID()
+26
+SELECT MAX(id) FROM t2;
+MAX(id)
+26
+connection slave1_1;
+SELECT id FROM t1 ORDER BY id;
+id
+2
+18
+777
+1554
+2331
+10000
+connection master_1;
+
+deinit
+connection master_1;
+DROP DATABASE IF EXISTS auto_test_local;
+connection slave1_1;
+DROP DATABASE IF EXISTS auto_test_local;
+connection child2_1;
+DROP DATABASE IF EXISTS auto_test_remote;
+connection child2_2;
+DROP DATABASE IF EXISTS auto_test_remote2;
+for slave1_1
+for master_1
+for child2
+child2_1
+child2_2
+child2_3
+for child3
+child3_1
+child3_2
+child3_3
+
+end of test
diff --git a/storage/spider/mysql-test/spider/handler/r/spider_fixes.result b/storage/spider/mysql-test/spider/handler/r/spider_fixes.result
index 9fd24bcc43f..9b14817eee4 100644
--- a/storage/spider/mysql-test/spider/handler/r/spider_fixes.result
+++ b/storage/spider/mysql-test/spider/handler/r/spider_fixes.result
@@ -10,25 +10,31 @@ child3_3
for slave1_1
drop and create databases
+connection master_1;
DROP DATABASE IF EXISTS auto_test_local;
CREATE DATABASE auto_test_local;
USE auto_test_local;
+connection slave1_1;
DROP DATABASE IF EXISTS auto_test_local;
CREATE DATABASE auto_test_local;
USE auto_test_local;
+connection child2_1;
DROP DATABASE IF EXISTS auto_test_remote;
CREATE DATABASE auto_test_remote;
USE auto_test_remote;
+connection child2_2;
DROP DATABASE IF EXISTS auto_test_remote2;
CREATE DATABASE auto_test_remote2;
USE auto_test_remote2;
test select 1
+connection master_1;
SELECT 1;
1
1
create table and insert
+connection master_1;
DROP TABLE IF EXISTS tb_l;
CREATE TABLE tb_l (
a INT,
@@ -50,6 +56,7 @@ INSERT INTO ta_l SELECT a, b, c FROM tb_l;
2.13
select table with "order by desc" and "<"
+connection master_1;
SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l
WHERE a < 5 ORDER BY a DESC LIMIT 3;
a b date_format(c, '%Y-%m-%d %H:%i:%s')
@@ -58,6 +65,7 @@ a b date_format(c, '%Y-%m-%d %H:%i:%s')
2 b 2000-01-01 00:00:00
select table with "order by desc" and "<="
+connection master_1;
SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l
WHERE a <= 5 ORDER BY a DESC LIMIT 3;
a b date_format(c, '%Y-%m-%d %H:%i:%s')
@@ -67,7 +75,9 @@ a b date_format(c, '%Y-%m-%d %H:%i:%s')
2.14
update table with range scan and split_read
+connection master_1;
UPDATE ta_l SET c = '2000-02-02 00:00:00' WHERE a > 1;
+connection master_1;
SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l ORDER BY a;
a b date_format(c, '%Y-%m-%d %H:%i:%s')
1 a 2008-08-01 10:21:39
@@ -80,6 +90,7 @@ a b date_format(c, '%Y-%m-%d %H:%i:%s')
select table with range scan
TRUNCATE TABLE ta_l;
DROP TABLE IF EXISTS ta_l;
+connection master_1;
CREATE TABLE ta_l (
a int(11) NOT NULL DEFAULT '0',
b char(1) DEFAULT NULL,
@@ -87,50 +98,62 @@ c datetime DEFAULT NULL,
PRIMARY KEY (a, b, c)
) MASTER_1_ENGINE MASTER_1_CHARSET MASTER_1_COMMENT5_2_1
INSERT INTO ta_l SELECT a, b, c FROM tb_l;
+connection master_1;
SELECT a, b, c FROM ta_l FORCE INDEX(PRIMARY) WHERE a = 4 AND b >= 'b'
AND c = '2003-11-30 05:01:03';
a b c
4 d 2003-11-30 05:01:03
+connection master_1;
SELECT a, b, c FROM ta_l FORCE INDEX(PRIMARY) WHERE a = 4 AND b > 'b'
AND c = '2003-11-30 05:01:03';
a b c
4 d 2003-11-30 05:01:03
+connection master_1;
SELECT a, b, c FROM ta_l FORCE INDEX(PRIMARY) WHERE a >= 4 AND b = 'd'
AND c = '2003-11-30 05:01:03';
a b c
4 d 2003-11-30 05:01:03
+connection master_1;
SELECT a, b, c FROM ta_l FORCE INDEX(PRIMARY) WHERE a > 4 AND b = 'c'
AND c = '2001-12-31 23:59:59';
a b c
5 c 2001-12-31 23:59:59
+connection master_1;
SELECT a, b, c FROM ta_l FORCE INDEX(PRIMARY) WHERE a = 4 AND b <= 'd'
AND c = '2003-11-30 05:01:03';
a b c
4 d 2003-11-30 05:01:03
+connection master_1;
SELECT a, b, c FROM ta_l FORCE INDEX(PRIMARY) WHERE a = 4 AND b < 'e'
AND c = '2003-11-30 05:01:03';
a b c
4 d 2003-11-30 05:01:03
+connection master_1;
SELECT a, b, c FROM ta_l FORCE INDEX(PRIMARY) WHERE a <= 4 AND b = 'b'
AND c = '2000-01-01 00:00:00';
a b c
2 b 2000-01-01 00:00:00
+connection master_1;
SELECT a, b, c FROM ta_l FORCE INDEX(PRIMARY) WHERE a < 4 AND b = 'b'
AND c = '2000-01-01 00:00:00';
a b c
2 b 2000-01-01 00:00:00
+connection master_1;
SELECT a, b, c FROM ta_l FORCE INDEX(PRIMARY) WHERE a = 4 AND b >= 'b'
AND b <= 'd' AND c = '2003-11-30 05:01:03';
a b c
4 d 2003-11-30 05:01:03
+connection master_1;
SELECT a, b, c FROM ta_l FORCE INDEX(PRIMARY) WHERE a = 4 AND b > 'b'
AND b < 'e' AND c = '2003-11-30 05:01:03';
a b c
4 d 2003-11-30 05:01:03
+connection master_1;
SELECT a, b, c FROM ta_l FORCE INDEX(PRIMARY) WHERE a <= 4 AND a >= 1
AND b >= 'b' AND c = '2003-11-30 05:01:03';
a b c
4 d 2003-11-30 05:01:03
+connection master_1;
SELECT a, b, c FROM ta_l FORCE INDEX(PRIMARY) WHERE a < 4 AND a > 1
AND b >= 'b' AND c = '2000-01-01 00:00:00';
a b c
@@ -138,6 +161,7 @@ a b c
2.16
auto_increment insert with trigger
+connection master_1;
CREATE TABLE ta_l_auto_inc (
a INT AUTO_INCREMENT,
b CHAR(1) DEFAULT 'c',
@@ -151,14 +175,17 @@ c DATETIME,
PRIMARY KEY(a)
) MASTER_1_ENGINE2 MASTER_1_CHARSET2
CREATE TRIGGER ins_ta_l_auto_inc AFTER INSERT ON ta_l_auto_inc FOR EACH ROW BEGIN INSERT INTO tc_l (a, b, c) VALUES (NEW.a, NEW.b, NEW.c); END;;
+connection master_1;
INSERT INTO ta_l_auto_inc (a, b, c) VALUES
(NULL, 's', '2008-12-31 20:59:59');
+connection master_1;
SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM tc_l ORDER BY a;
a b date_format(c, '%Y-%m-%d %H:%i:%s')
1 s 2008-12-31 20:59:59
2.17
engine-condition-pushdown with "or" and joining
+connection master_1;
SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l WHERE a = 1 OR a IN (SELECT a FROM tb_l);
a b date_format(c, '%Y-%m-%d %H:%i:%s')
1 a 2008-08-01 10:21:39
@@ -166,37 +193,10 @@ a b date_format(c, '%Y-%m-%d %H:%i:%s')
3 e 2007-06-04 20:03:11
4 d 2003-11-30 05:01:03
5 c 2001-12-31 23:59:59
-partition with sort
-CREATE TABLE ta_l2 (
-a INT,
-b CHAR(1),
-c DATETIME,
-PRIMARY KEY(a)
-) MASTER_1_ENGINE MASTER_1_COMMENT2_P_2_1
-INSERT INTO ta_l2 SELECT a, b, c FROM tb_l;
-SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l2 WHERE a > 1
-ORDER BY a;
-a b date_format(c, '%Y-%m-%d %H:%i:%s')
-2 b 2000-01-01 00:00:00
-3 e 2007-06-04 20:03:11
-4 d 2003-11-30 05:01:03
-5 c 2001-12-31 23:59:59
2.23
-partition update with moving partition
-DROP TABLE IF EXISTS ta_l2;
-CREATE TABLE ta_l2 (
-a INT,
-b CHAR(1),
-c DATETIME,
-PRIMARY KEY(a)
-) MASTER_1_ENGINE MASTER_1_COMMENT2_P_2_1
-INSERT INTO ta_l2 (a, b, c) VALUES (3, 'B', '2010-09-26 00:00:00');
-UPDATE ta_l2 SET a = 4 WHERE a = 3;
-SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l2;
-a b date_format(c, '%Y-%m-%d %H:%i:%s')
-4 B 2010-09-26 00:00:00
index merge
+connection master_1;
CREATE TABLE ta_l_int (
a INT AUTO_INCREMENT,
b INT DEFAULT 10,
@@ -210,27 +210,7 @@ INSERT INTO ta_l_int (a, b, c) SELECT a + 1, b + 1, c + 1 FROM ta_l_int;
INSERT INTO ta_l_int (a, b, c) SELECT a + 2, b + 2, c + 2 FROM ta_l_int;
INSERT INTO ta_l_int (a, b, c) SELECT a + 4, b + 4, c + 4 FROM ta_l_int;
INSERT INTO ta_l_int (a, b, c) SELECT a + 8, b + 8, c + 8 FROM ta_l_int;
-SELECT a, b, c FROM ta_l_int force index(primary, idx1, idx2)
-WHERE a = 5 OR b = 5 OR c = 5 ORDER BY a;
-a b c
-3 4 5
-4 5 6
-5 6 7
-index merge with partition
-DROP TABLE IF EXISTS ta_l_int;
-CREATE TABLE ta_l_int (
-a INT AUTO_INCREMENT,
-b INT DEFAULT 10,
-c INT DEFAULT 11,
-PRIMARY KEY(a),
-KEY idx1(b),
-KEY idx2(c)
-) MASTER_1_ENGINE MASTER_1_CHARSET MASTER_1_COMMENT3_P_2_1
-INSERT INTO ta_l_int (a, b, c) VALUES (1, 2, 3);
-INSERT INTO ta_l_int (a, b, c) SELECT a + 1, b + 1, c + 1 FROM ta_l_int;
-INSERT INTO ta_l_int (a, b, c) SELECT a + 2, b + 2, c + 2 FROM ta_l_int;
-INSERT INTO ta_l_int (a, b, c) SELECT a + 4, b + 4, c + 4 FROM ta_l_int;
-INSERT INTO ta_l_int (a, b, c) SELECT a + 8, b + 8, c + 8 FROM ta_l_int;
+connection master_1;
SELECT a, b, c FROM ta_l_int force index(primary, idx1, idx2)
WHERE a = 5 OR b = 5 OR c = 5 ORDER BY a;
a b c
@@ -240,6 +220,7 @@ a b c
2.24
index scan update without PK
+connection master_1;
DROP TABLE IF EXISTS ta_l_int;
CREATE TABLE ta_l_int (
a INT NOT NULL,
@@ -250,32 +231,51 @@ KEY idx2(c)
) MASTER_1_ENGINE MASTER_1_CHARSET MASTER_1_COMMENT4_2_1
SELECT a, b, c FROM ta_l_int ORDER BY a;
a b c
+1 2 3
2 3 4
+3 4 5
4 5 6
+5 6 7
6 7 8
+7 8 9
8 9 10
+9 10 11
10 11 12
+11 12 13
12 13 14
+13 14 15
14 15 16
+15 16 17
16 17 18
INSERT INTO ta_l_int (a, b, c) VALUES (0, 2, 3);
-INSERT INTO ta_l_int (a, b, c) VALUES (1, 2, 3);
+INSERT INTO ta_l_int (a, b, c) VALUES (18, 2, 3);
+connection master_1;
UPDATE ta_l_int SET c = 4 WHERE b = 2;
+connection master_1;
SELECT a, b, c FROM ta_l_int ORDER BY a;
a b c
1 2 4
2 3 4
+3 4 5
4 5 6
+5 6 7
6 7 8
+7 8 9
8 9 10
+9 10 11
10 11 12
+11 12 13
12 13 14
+13 14 15
14 15 16
+15 16 17
16 17 18
17 2 4
+18 2 4
2.25
direct order limit
+connection master_1;
SHOW STATUS LIKE 'Spider_direct_order_limit';
Variable_name Value
Spider_direct_order_limit 2
@@ -283,13 +283,14 @@ SELECT a, b, c FROM ta_l_int ORDER BY a LIMIT 3;
a b c
1 2 4
2 3 4
-4 5 6
+3 4 5
SHOW STATUS LIKE 'Spider_direct_order_limit';
Variable_name Value
Spider_direct_order_limit 3
2.26
lock tables
+connection master_1;
DROP TABLE IF EXISTS t1;
DROP TABLE IF EXISTS t2;
CREATE TABLE t1 (
@@ -304,6 +305,9 @@ LOCK TABLES t1 READ, t2 READ;
UNLOCK TABLES;
auto_increment
+connection master_1;
+connection slave1_1;
+connection master_1;
DROP TABLE IF EXISTS t1;
CREATE TABLE t1 (
id int(11) NOT NULL AUTO_INCREMENT,
@@ -395,6 +399,7 @@ LAST_INSERT_ID()
SELECT MAX(id) FROM t1;
MAX(id)
42
+connection slave1_1;
SELECT id FROM t1 ORDER BY id;
id
2
@@ -408,114 +413,10 @@ id
1554
2331
10000
-
-auto_increment with partition
-DROP TABLE IF EXISTS t1;
-CREATE TABLE t1 (
-id int(11) NOT NULL AUTO_INCREMENT,
-PRIMARY KEY (id)
-) MASTER_1_ENGINE MASTER_1_CHARSET MASTER_1_COMMENT_INCREMENT1_P_1
-MASTER_1_AUTO_INCREMENT_INCREMENT2
-MASTER_1_AUTO_INCREMENT_OFFSET2
-spider_direct_sql('SET SESSION AUTO_INCREMENT_INCREMENT = 4', '',
-'srv "s_2_1"')
-1
-spider_direct_sql('SET SESSION AUTO_INCREMENT_INCREMENT = 4', '',
-'srv "s_2_2"')
-1
-spider_bg_direct_sql('SET SESSION AUTO_INCREMENT_OFFSET = 2', '',
-'srv "s_2_1"')
-1
-spider_bg_direct_sql('SET SESSION AUTO_INCREMENT_OFFSET = 3', '',
-'srv "s_2_2"')
-1
-CREATE TABLE t1 (
-id int(11) NOT NULL AUTO_INCREMENT,
-PRIMARY KEY (id)
-) SLAVE1_1_ENGINE SLAVE1_1_CHARSET SLAVE1_1_COMMENT_INCREMENT1_P_1
-INSERT INTO t1 () VALUES ();
-SELECT LAST_INSERT_ID();
-LAST_INSERT_ID()
-2
-SELECT MAX(id) FROM t1;
-MAX(id)
-2
-INSERT INTO t1 () VALUES ();
-SELECT LAST_INSERT_ID();
-LAST_INSERT_ID()
-3
-SELECT MAX(id) FROM t1;
-MAX(id)
-3
-INSERT INTO t1 (id) VALUES (null);
-SELECT LAST_INSERT_ID();
-LAST_INSERT_ID()
-6
-SELECT MAX(id) FROM t1;
-MAX(id)
-6
-INSERT INTO t1 (id) VALUES (null);
-SELECT LAST_INSERT_ID();
-LAST_INSERT_ID()
-7
-SELECT MAX(id) FROM t1;
-MAX(id)
-7
-INSERT INTO t1 () VALUES (),(),(),();
-Warnings:
-Note 12520 Binlog's auto-inc value is probably different from linked table's auto-inc value
-Note 12520 Binlog's auto-inc value is probably different from linked table's auto-inc value
-Note 12520 Binlog's auto-inc value is probably different from linked table's auto-inc value
-SELECT LAST_INSERT_ID();
-LAST_INSERT_ID()
-10
-SELECT id FROM t1 ORDER BY id;
-id
-2
-3
-6
-7
-10
-11
-14
-15
-SET INSERT_ID=5000;
-INSERT INTO t1 () VALUES ();
-SELECT LAST_INSERT_ID();
-LAST_INSERT_ID()
-18
-SELECT MAX(id) FROM t1;
-MAX(id)
-18
-INSERT INTO t1 (id) VALUES (10000);
-SELECT LAST_INSERT_ID();
-LAST_INSERT_ID()
-22
-SELECT MAX(id) FROM t1;
-MAX(id)
-22
-INSERT INTO t1 (id) VALUES (1000);
-SELECT LAST_INSERT_ID();
-LAST_INSERT_ID()
-26
-SELECT MAX(id) FROM t1;
-MAX(id)
-26
-SELECT id FROM t1 ORDER BY id;
-id
-2
-3
-6
-7
-10
-18
-777
-1000
-1554
-2331
-10000
+connection master_1;
read only
+connection master_1;
DROP TABLE IF EXISTS t1;
CREATE TABLE t1 (
id int(11) NOT NULL,
@@ -530,6 +431,10 @@ id
18
22
26
+30
+34
+38
+42
INSERT INTO t1 (id) VALUES (1);
ERROR HY000: Table 'auto_test_local.t1' is read only
UPDATE t1 SET id = 4 WHERE id = 2;
@@ -543,6 +448,7 @@ ERROR HY000: Table 'auto_test_local.t1' is read only
2.27
error mode
+connection master_1;
DROP TABLE IF EXISTS t1;
CREATE TABLE t1 (
id int(11) NOT NULL,
@@ -566,6 +472,7 @@ Error 1146 Table 'auto_test_remote.ter1_1' doesn't exist
3.0
is null
+connection master_1;
DROP TABLE IF EXISTS t1;
CREATE TABLE t1 (
a VARCHAR(255),
@@ -588,6 +495,7 @@ insert into t1 select a + 128, b + 128, c + 128 from t1;
insert into t1 select a + 256, b + 256, c + 256 from t1;
insert into t1 select a + 512, b + 512, c + 512 from t1;
flush tables;
+connection master_1;
select a from t1 where a is null order by a limit 30;
a
NULL
@@ -654,6 +562,7 @@ NULL
NULL
direct_order_limit
+connection master_1;
TRUNCATE TABLE t1;
insert into t1 values ('1', '1', '1');
insert into t1 select a + 1, b + 1, c + 1 from t1;
@@ -665,6 +574,7 @@ insert into t1 select a, b + 32, c + 32 from t1;
insert into t1 select a, b + 64, c + 64 from t1;
insert into t1 select a, b + 128, c + 128 from t1;
flush tables;
+connection master_1;
select a, b, c from t1 where a = '10' and b <> '100' order by c desc limit 5;
a b c
10 74 74
@@ -681,9 +591,13 @@ a c
10 170
deinit
+connection master_1;
DROP DATABASE IF EXISTS auto_test_local;
+connection slave1_1;
DROP DATABASE IF EXISTS auto_test_local;
+connection child2_1;
DROP DATABASE IF EXISTS auto_test_remote;
+connection child2_2;
DROP DATABASE IF EXISTS auto_test_remote2;
for slave1_1
for master_1
diff --git a/storage/spider/mysql-test/spider/handler/r/spider_fixes_part.result b/storage/spider/mysql-test/spider/handler/r/spider_fixes_part.result
new file mode 100644
index 00000000000..c99c02071b6
--- /dev/null
+++ b/storage/spider/mysql-test/spider/handler/r/spider_fixes_part.result
@@ -0,0 +1,241 @@
+for master_1
+for child2
+child2_1
+child2_2
+child2_3
+for child3
+child3_1
+child3_2
+child3_3
+for slave1_1
+
+drop and create databases
+connection master_1;
+DROP DATABASE IF EXISTS auto_test_local;
+CREATE DATABASE auto_test_local;
+USE auto_test_local;
+connection slave1_1;
+DROP DATABASE IF EXISTS auto_test_local;
+CREATE DATABASE auto_test_local;
+USE auto_test_local;
+connection child2_1;
+DROP DATABASE IF EXISTS auto_test_remote;
+CREATE DATABASE auto_test_remote;
+USE auto_test_remote;
+connection child2_2;
+DROP DATABASE IF EXISTS auto_test_remote2;
+CREATE DATABASE auto_test_remote2;
+USE auto_test_remote2;
+
+test select 1
+connection master_1;
+SELECT 1;
+1
+1
+connection master_1;
+DROP TABLE IF EXISTS tb_l;
+CREATE TABLE tb_l (
+a INT,
+b CHAR(1),
+c DATETIME,
+PRIMARY KEY(a)
+) MASTER_1_ENGINE2 MASTER_1_CHARSET2
+INSERT INTO tb_l (a, b, c) VALUES
+(1, 'a', '2008-08-01 10:21:39'),
+(2, 'b', '2000-01-01 00:00:00'),
+(3, 'e', '2007-06-04 20:03:11'),
+(4, 'd', '2003-11-30 05:01:03'),
+(5, 'c', '2001-12-31 23:59:59');
+
+2.17
+partition with sort
+connection master_1;
+CREATE TABLE ta_l2 (
+a INT,
+b CHAR(1),
+c DATETIME,
+PRIMARY KEY(a)
+) MASTER_1_ENGINE MASTER_1_COMMENT2_P_2_1
+INSERT INTO ta_l2 SELECT a, b, c FROM tb_l;
+SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l2 WHERE a > 1
+ORDER BY a;
+a b date_format(c, '%Y-%m-%d %H:%i:%s')
+2 b 2000-01-01 00:00:00
+3 e 2007-06-04 20:03:11
+4 d 2003-11-30 05:01:03
+5 c 2001-12-31 23:59:59
+
+2.23
+partition update with moving partition
+connection master_1;
+DROP TABLE IF EXISTS ta_l2;
+connection master_1;
+CREATE TABLE ta_l2 (
+a INT,
+b CHAR(1),
+c DATETIME,
+PRIMARY KEY(a)
+) MASTER_1_ENGINE MASTER_1_COMMENT2_P_2_1
+INSERT INTO ta_l2 (a, b, c) VALUES (3, 'B', '2010-09-26 00:00:00');
+UPDATE ta_l2 SET a = 4 WHERE a = 3;
+SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l2;
+a b date_format(c, '%Y-%m-%d %H:%i:%s')
+4 B 2010-09-26 00:00:00
+index merge with partition
+connection master_1;
+DROP TABLE IF EXISTS ta_l_int;
+connection master_1;
+CREATE TABLE ta_l_int (
+a INT AUTO_INCREMENT,
+b INT DEFAULT 10,
+c INT DEFAULT 11,
+PRIMARY KEY(a),
+KEY idx1(b),
+KEY idx2(c)
+) MASTER_1_ENGINE MASTER_1_CHARSET MASTER_1_COMMENT3_P_2_1
+INSERT INTO ta_l_int (a, b, c) VALUES (1, 2, 3);
+INSERT INTO ta_l_int (a, b, c) SELECT a + 1, b + 1, c + 1 FROM ta_l_int;
+INSERT INTO ta_l_int (a, b, c) SELECT a + 2, b + 2, c + 2 FROM ta_l_int;
+INSERT INTO ta_l_int (a, b, c) SELECT a + 4, b + 4, c + 4 FROM ta_l_int;
+INSERT INTO ta_l_int (a, b, c) SELECT a + 8, b + 8, c + 8 FROM ta_l_int;
+connection master_1;
+SELECT a, b, c FROM ta_l_int force index(primary, idx1, idx2)
+WHERE a = 5 OR b = 5 OR c = 5 ORDER BY a;
+a b c
+3 4 5
+4 5 6
+5 6 7
+
+2.26
+auto_increment with partition
+connection master_1;
+connection slave1_1;
+connection master_1;
+DROP TABLE IF EXISTS t1;
+CREATE TABLE t1 (
+id int(11) NOT NULL AUTO_INCREMENT,
+PRIMARY KEY (id)
+) MASTER_1_ENGINE MASTER_1_CHARSET MASTER_1_COMMENT_INCREMENT1_P_1
+MASTER_1_AUTO_INCREMENT_INCREMENT2
+MASTER_1_AUTO_INCREMENT_OFFSET2
+spider_direct_sql('SET SESSION AUTO_INCREMENT_INCREMENT = 4', '',
+'srv "s_2_1"')
+1
+spider_direct_sql('SET SESSION AUTO_INCREMENT_INCREMENT = 4', '',
+'srv "s_2_2"')
+1
+spider_bg_direct_sql('SET SESSION AUTO_INCREMENT_OFFSET = 2', '',
+'srv "s_2_1"')
+1
+spider_bg_direct_sql('SET SESSION AUTO_INCREMENT_OFFSET = 3', '',
+'srv "s_2_2"')
+1
+CREATE TABLE t1 (
+id int(11) NOT NULL AUTO_INCREMENT,
+PRIMARY KEY (id)
+) SLAVE1_1_ENGINE SLAVE1_1_CHARSET SLAVE1_1_COMMENT_INCREMENT1_P_1
+INSERT INTO t1 () VALUES ();
+SELECT LAST_INSERT_ID();
+LAST_INSERT_ID()
+3
+SELECT MAX(id) FROM t1;
+MAX(id)
+3
+INSERT INTO t1 () VALUES ();
+SELECT LAST_INSERT_ID();
+LAST_INSERT_ID()
+2
+SELECT MAX(id) FROM t1;
+MAX(id)
+3
+INSERT INTO t1 (id) VALUES (null);
+SELECT LAST_INSERT_ID();
+LAST_INSERT_ID()
+7
+SELECT MAX(id) FROM t1;
+MAX(id)
+7
+INSERT INTO t1 (id) VALUES (null);
+SELECT LAST_INSERT_ID();
+LAST_INSERT_ID()
+6
+SELECT MAX(id) FROM t1;
+MAX(id)
+7
+INSERT INTO t1 () VALUES (),(),(),();
+Warnings:
+Note 12520 Binlog's auto-inc value is probably different from linked table's auto-inc value
+Note 12520 Binlog's auto-inc value is probably different from linked table's auto-inc value
+Note 12520 Binlog's auto-inc value is probably different from linked table's auto-inc value
+SELECT LAST_INSERT_ID();
+LAST_INSERT_ID()
+10
+SELECT id FROM t1 ORDER BY id;
+id
+2
+3
+6
+7
+10
+11
+14
+15
+SET INSERT_ID=5000;
+INSERT INTO t1 () VALUES ();
+SELECT LAST_INSERT_ID();
+LAST_INSERT_ID()
+18
+SELECT MAX(id) FROM t1;
+MAX(id)
+18
+INSERT INTO t1 (id) VALUES (10000);
+SELECT LAST_INSERT_ID();
+LAST_INSERT_ID()
+22
+SELECT MAX(id) FROM t1;
+MAX(id)
+22
+INSERT INTO t1 (id) VALUES (1000);
+SELECT LAST_INSERT_ID();
+LAST_INSERT_ID()
+26
+SELECT MAX(id) FROM t1;
+MAX(id)
+26
+connection slave1_1;
+SELECT id FROM t1 ORDER BY id;
+id
+2
+3
+6
+7
+10
+18
+777
+1000
+1554
+2331
+10000
+connection master_1;
+
+deinit
+connection master_1;
+DROP DATABASE IF EXISTS auto_test_local;
+connection slave1_1;
+DROP DATABASE IF EXISTS auto_test_local;
+connection child2_1;
+DROP DATABASE IF EXISTS auto_test_remote;
+connection child2_2;
+DROP DATABASE IF EXISTS auto_test_remote2;
+for slave1_1
+for master_1
+for child2
+child2_1
+child2_2
+child2_3
+for child3
+child3_1
+child3_2
+child3_3
+
+end of test
diff --git a/storage/spider/mysql-test/spider/handler/r/vp_fixes.result b/storage/spider/mysql-test/spider/handler/r/vp_fixes.result
index 15dd29aa4d3..cc0e4105d61 100644
--- a/storage/spider/mysql-test/spider/handler/r/vp_fixes.result
+++ b/storage/spider/mysql-test/spider/handler/r/vp_fixes.result
@@ -9,22 +9,27 @@ child3_2
child3_3
drop and create databases
+connection master_1;
DROP DATABASE IF EXISTS auto_test_local;
CREATE DATABASE auto_test_local;
USE auto_test_local;
+connection child2_1;
DROP DATABASE IF EXISTS auto_test_remote;
CREATE DATABASE auto_test_remote;
USE auto_test_remote;
+connection child2_2;
DROP DATABASE IF EXISTS auto_test_remote2;
CREATE DATABASE auto_test_remote2;
USE auto_test_remote2;
test select 1
+connection master_1;
SELECT 1;
1
1
create table and insert
+connection master_1;
DROP TABLE IF EXISTS tb_l;
CREATE TABLE tb_l (
a INT,
@@ -46,26 +51,34 @@ INSERT INTO ta_l SELECT a, b, c FROM tb_l;
0.9
create different primary key table
+connection master_1;
CREATE TABLE ta_l_int (
a INT DEFAULT 10,
b INT AUTO_INCREMENT,
c INT DEFAULT 11,
PRIMARY KEY(b)
) MASTER_1_ENGINE MASTER_1_CHARSET MASTER_1_COMMENT4_2_1
+connection master_1;
INSERT INTO ta_l_int (a, b, c) VALUES (2, NULL, 3);
create un-correspond primary key table
+connection master_1;
DROP TABLE IF EXISTS ta_l_int;
+connection master_1;
CREATE TABLE ta_l_int (
a INT DEFAULT 10,
b INT DEFAULT 12,
c INT DEFAULT 11,
PRIMARY KEY(c)
) MASTER_1_ENGINE MASTER_1_CHARSET MASTER_1_COMMENT4_2_1
+connection master_1;
INSERT INTO ta_l_int (a, b, c) VALUES (2, NULL, 3);
deinit
+connection master_1;
DROP DATABASE IF EXISTS auto_test_local;
+connection child2_1;
DROP DATABASE IF EXISTS auto_test_remote;
+connection child2_2;
DROP DATABASE IF EXISTS auto_test_remote2;
for master_1
for child2
diff --git a/storage/spider/mysql-test/spider/handler/suite.opt b/storage/spider/mysql-test/spider/handler/suite.opt
new file mode 100644
index 00000000000..48457b17309
--- /dev/null
+++ b/storage/spider/mysql-test/spider/handler/suite.opt
@@ -0,0 +1 @@
+--loose-innodb
diff --git a/storage/spider/mysql-test/spider/handler/suite.pm b/storage/spider/mysql-test/spider/handler/suite.pm
new file mode 100644
index 00000000000..f106147deb6
--- /dev/null
+++ b/storage/spider/mysql-test/spider/handler/suite.pm
@@ -0,0 +1,12 @@
+package My::Suite::Spider;
+
+@ISA = qw(My::Suite);
+
+return "No Spider engine" unless $ENV{HA_SPIDER_SO};
+return "Not run for embedded server" if $::opt_embedded_server;
+return "Test needs --big-test" unless $::opt_big_test;
+
+sub is_default { 1 }
+
+bless { };
+
diff --git a/storage/spider/mysql-test/spider/r/basic_sql.result b/storage/spider/mysql-test/spider/r/basic_sql.result
index 1e9fe78acea..94a09fc317b 100644
--- a/storage/spider/mysql-test/spider/r/basic_sql.result
+++ b/storage/spider/mysql-test/spider/r/basic_sql.result
@@ -9,22 +9,27 @@ child3_2
child3_3
drop and create databases
+connection master_1;
DROP DATABASE IF EXISTS auto_test_local;
CREATE DATABASE auto_test_local;
USE auto_test_local;
+connection child2_1;
DROP DATABASE IF EXISTS auto_test_remote;
CREATE DATABASE auto_test_remote;
USE auto_test_remote;
+connection child2_2;
DROP DATABASE IF EXISTS auto_test_remote2;
CREATE DATABASE auto_test_remote2;
USE auto_test_remote2;
test select 1
+connection master_1;
SELECT 1;
1
1
create table select test
+connection master_1;
DROP TABLE IF EXISTS tb_l;
CREATE TABLE tb_l (
a INT,
@@ -43,6 +48,7 @@ CREATE TABLE ta_l (
PRIMARY KEY(a)
) MASTER_1_ENGINE MASTER_1_CHARSET MASTER_1_COMMENT_2_1
SELECT a, b, c FROM tb_l
+connection master_1;
SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l ORDER BY a;
a b date_format(c, '%Y-%m-%d %H:%i:%s')
1 a 2008-08-01 10:21:39
@@ -52,6 +58,7 @@ a b date_format(c, '%Y-%m-%d %H:%i:%s')
5 c 2001-12-31 23:59:59
create table ignore select test
+connection master_1;
DROP TABLE IF EXISTS ta_l;
DROP TABLE IF EXISTS tb_l;
CREATE TABLE tb_l (
@@ -76,6 +83,7 @@ Warning 1062 Duplicate entry '2' for key 'PRIMARY'
Warning 1062 Duplicate entry '3' for key 'PRIMARY'
Warning 1062 Duplicate entry '4' for key 'PRIMARY'
Warning 1062 Duplicate entry '5' for key 'PRIMARY'
+connection master_1;
SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l ORDER BY a;
a b date_format(c, '%Y-%m-%d %H:%i:%s')
1 a 2008-08-01 10:21:39
@@ -85,11 +93,13 @@ a b date_format(c, '%Y-%m-%d %H:%i:%s')
5 c 2001-12-31 23:59:59
create table ignore select test
+connection master_1;
DROP TABLE IF EXISTS ta_l;
CREATE TABLE ta_l (
PRIMARY KEY(a)
) MASTER_1_ENGINE MASTER_1_CHARSET MASTER_1_COMMENT_2_1
REPLACE SELECT a, b, c FROM tb_l
+connection master_1;
SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l ORDER BY a;
a b date_format(c, '%Y-%m-%d %H:%i:%s')
1 f 2008-07-01 10:21:39
@@ -99,10 +109,12 @@ a b date_format(c, '%Y-%m-%d %H:%i:%s')
5 h 2001-10-31 23:59:59
create no index table
+connection master_1;
DROP TABLE IF EXISTS ta_l_no_idx;
CREATE TABLE ta_l_no_idx
MASTER_1_ENGINE MASTER_1_CHARSET MASTER_1_COMMENT2_2_1
SELECT a, b, c FROM tb_l
+connection master_1;
SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l_no_idx ORDER BY a;
a b date_format(c, '%Y-%m-%d %H:%i:%s')
1 f 2008-07-01 10:21:39
@@ -112,6 +124,7 @@ a b date_format(c, '%Y-%m-%d %H:%i:%s')
5 h 2001-10-31 23:59:59
select table
+connection master_1;
SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l ORDER BY a;
a b date_format(c, '%Y-%m-%d %H:%i:%s')
1 f 2008-07-01 10:21:39
@@ -121,6 +134,7 @@ a b date_format(c, '%Y-%m-%d %H:%i:%s')
5 h 2001-10-31 23:59:59
select table shared mode
+connection master_1;
SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l ORDER BY a
LOCK IN SHARE MODE;
a b date_format(c, '%Y-%m-%d %H:%i:%s')
@@ -131,6 +145,7 @@ a b date_format(c, '%Y-%m-%d %H:%i:%s')
5 h 2001-10-31 23:59:59
select table for update
+connection master_1;
SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l ORDER BY a
FOR UPDATE;
a b date_format(c, '%Y-%m-%d %H:%i:%s')
@@ -141,6 +156,7 @@ a b date_format(c, '%Y-%m-%d %H:%i:%s')
5 h 2001-10-31 23:59:59
select table join
+connection master_1;
SELECT a.a, a.b, date_format(b.c, '%Y-%m-%d %H:%i:%s') FROM ta_l a, tb_l b
WHERE a.a = b.a ORDER BY a.a;
a b date_format(b.c, '%Y-%m-%d %H:%i:%s')
@@ -151,6 +167,7 @@ a b date_format(b.c, '%Y-%m-%d %H:%i:%s')
5 h 2001-10-31 23:59:59
select table straight_join
+connection master_1;
SELECT STRAIGHT_JOIN a.a, a.b, date_format(b.c, '%Y-%m-%d %H:%i:%s')
FROM ta_l a, tb_l b WHERE a.a = b.a ORDER BY a.a;
a b date_format(b.c, '%Y-%m-%d %H:%i:%s')
@@ -161,6 +178,7 @@ a b date_format(b.c, '%Y-%m-%d %H:%i:%s')
5 h 2001-10-31 23:59:59
select sql_small_result
+connection master_1;
SELECT SQL_SMALL_RESULT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l
ORDER BY a;
a b date_format(c, '%Y-%m-%d %H:%i:%s')
@@ -171,6 +189,7 @@ a b date_format(c, '%Y-%m-%d %H:%i:%s')
5 h 2001-10-31 23:59:59
select sql_big_result
+connection master_1;
SELECT SQL_BIG_RESULT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l
ORDER BY a;
a b date_format(c, '%Y-%m-%d %H:%i:%s')
@@ -181,6 +200,7 @@ a b date_format(c, '%Y-%m-%d %H:%i:%s')
5 h 2001-10-31 23:59:59
select sql_buffer_result
+connection master_1;
SELECT SQL_BUFFER_RESULT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l
ORDER BY a;
a b date_format(c, '%Y-%m-%d %H:%i:%s')
@@ -191,6 +211,7 @@ a b date_format(c, '%Y-%m-%d %H:%i:%s')
5 h 2001-10-31 23:59:59
select sql_cache
+connection master_1;
SELECT SQL_CACHE a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l
ORDER BY a;
a b date_format(c, '%Y-%m-%d %H:%i:%s')
@@ -201,6 +222,7 @@ a b date_format(c, '%Y-%m-%d %H:%i:%s')
5 h 2001-10-31 23:59:59
select sql_no_cache
+connection master_1;
SELECT SQL_NO_CACHE a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l
ORDER BY a;
a b date_format(c, '%Y-%m-%d %H:%i:%s')
@@ -211,6 +233,7 @@ a b date_format(c, '%Y-%m-%d %H:%i:%s')
5 h 2001-10-31 23:59:59
select sql_calc_found_rows
+connection master_1;
SELECT SQL_CALC_FOUND_ROWS a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l
ORDER BY a LIMIT 4;
a b date_format(c, '%Y-%m-%d %H:%i:%s')
@@ -218,11 +241,13 @@ a b date_format(c, '%Y-%m-%d %H:%i:%s')
2 g 2000-02-01 00:00:00
3 j 2007-05-04 20:03:11
4 i 2003-10-30 05:01:03
+connection master_1;
SELECT found_rows();
found_rows()
5
select high_priority
+connection master_1;
SELECT HIGH_PRIORITY a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l
ORDER BY a;
a b date_format(c, '%Y-%m-%d %H:%i:%s')
@@ -233,6 +258,7 @@ a b date_format(c, '%Y-%m-%d %H:%i:%s')
5 h 2001-10-31 23:59:59
select distinct
+connection master_1;
SELECT DISTINCT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l
ORDER BY a;
a b date_format(c, '%Y-%m-%d %H:%i:%s')
@@ -243,11 +269,13 @@ a b date_format(c, '%Y-%m-%d %H:%i:%s')
5 h 2001-10-31 23:59:59
select count
+connection master_1;
SELECT count(*) FROM ta_l ORDER BY a;
count(*)
5
select table join not use index
+connection master_1;
SELECT a.a, a.b, date_format(a.c, '%Y-%m-%d %H:%i:%s') FROM tb_l a WHERE
EXISTS (SELECT * FROM ta_l b WHERE b.b = a.b) ORDER BY a.a;
a b date_format(a.c, '%Y-%m-%d %H:%i:%s')
@@ -258,27 +286,35 @@ a b date_format(a.c, '%Y-%m-%d %H:%i:%s')
5 h 2001-10-31 23:59:59
select using pushdown
+connection master_1;
SELECT a.a, a.b, date_format(a.c, '%Y-%m-%d %H:%i:%s') FROM ta_l a WHERE
a.b = 'g' ORDER BY a.a;
a b date_format(a.c, '%Y-%m-%d %H:%i:%s')
2 g 2000-02-01 00:00:00
select using index and pushdown
+connection master_1;
SELECT a.a, a.b, date_format(a.c, '%Y-%m-%d %H:%i:%s') FROM ta_l a WHERE
a.a > 0 AND a.b = 'g' ORDER BY a.a;
a b date_format(a.c, '%Y-%m-%d %H:%i:%s')
2 g 2000-02-01 00:00:00
insert
+connection master_1;
TRUNCATE TABLE ta_l;
+connection master_1;
INSERT INTO ta_l (a, b, c) VALUES (2, 'e', '2008-01-01 23:59:59');
+connection master_1;
SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l ORDER BY a;
a b date_format(c, '%Y-%m-%d %H:%i:%s')
2 e 2008-01-01 23:59:59
insert select
+connection master_1;
TRUNCATE TABLE ta_l;
+connection master_1;
INSERT INTO ta_l (a, b, c) SELECT a, b, c FROM tb_l;
+connection master_1;
SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l ORDER BY a;
a b date_format(c, '%Y-%m-%d %H:%i:%s')
1 f 2008-07-01 10:21:39
@@ -288,61 +324,82 @@ a b date_format(c, '%Y-%m-%d %H:%i:%s')
5 h 2001-10-31 23:59:59
insert select a
+connection master_1;
TRUNCATE TABLE ta_l;
+connection master_1;
INSERT INTO ta_l (a, b, c) VALUES ((SELECT a FROM tb_l ORDER BY a LIMIT 1),
'e', '2008-01-01 23:59:59');
+connection master_1;
SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l ORDER BY a;
a b date_format(c, '%Y-%m-%d %H:%i:%s')
1 e 2008-01-01 23:59:59
insert low_priority
+connection master_1;
TRUNCATE TABLE ta_l;
+connection master_1;
INSERT LOW_PRIORITY INTO ta_l (a, b, c) values (2, 'e', '2008-01-01 23:59:59');
+connection master_1;
SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l ORDER BY a;
a b date_format(c, '%Y-%m-%d %H:%i:%s')
2 e 2008-01-01 23:59:59
insert high_priority
+connection master_1;
TRUNCATE TABLE ta_l;
+connection master_1;
INSERT HIGH_PRIORITY INTO ta_l (a, b, c) VALUES (2, 'e',
'2008-01-01 23:59:59');
+connection master_1;
SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l ORDER BY a;
a b date_format(c, '%Y-%m-%d %H:%i:%s')
2 e 2008-01-01 23:59:59
insert ignore
+connection master_1;
INSERT IGNORE INTO ta_l (a, b, c) VALUES (2, 'd', '2009-02-02 01:01:01');
Warnings:
Warning 1062 Duplicate entry '2' for key 'PRIMARY'
+connection master_1;
SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l ORDER BY a;
a b date_format(c, '%Y-%m-%d %H:%i:%s')
2 e 2008-01-01 23:59:59
insert update (insert)
+connection master_1;
TRUNCATE TABLE ta_l;
+connection master_1;
INSERT INTO ta_l (a, b, c) VALUES (2, 'e', '2008-01-01 23:59:59') ON DUPLICATE
KEY UPDATE b = 'f', c = '2005-08-08 11:11:11';
+connection master_1;
SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l ORDER BY a;
a b date_format(c, '%Y-%m-%d %H:%i:%s')
2 e 2008-01-01 23:59:59
insert update (update)
+connection master_1;
INSERT INTO ta_l (a, b, c) VALUES (2, 'e', '2008-01-01 23:59:59') ON DUPLICATE
KEY UPDATE b = 'f', c = '2005-08-08 11:11:11';
+connection master_1;
SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l ORDER BY a;
a b date_format(c, '%Y-%m-%d %H:%i:%s')
2 f 2005-08-08 11:11:11
replace
+connection master_1;
TRUNCATE TABLE ta_l;
INSERT INTO ta_l (a, b, c) VALUES (2, 'e', '2008-01-01 23:59:59');
+connection master_1;
REPLACE INTO ta_l (a, b, c) VALUES (2, 'f', '2008-02-02 02:02:02');
+connection master_1;
SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l ORDER BY a;
a b date_format(c, '%Y-%m-%d %H:%i:%s')
2 f 2008-02-02 02:02:02
replace select
+connection master_1;
REPLACE INTO ta_l (a, b, c) SELECT a, b, c FROM tb_l;
+connection master_1;
SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l ORDER BY a;
a b date_format(c, '%Y-%m-%d %H:%i:%s')
1 f 2008-07-01 10:21:39
@@ -352,8 +409,10 @@ a b date_format(c, '%Y-%m-%d %H:%i:%s')
5 h 2001-10-31 23:59:59
replace select a
+connection master_1;
REPLACE INTO ta_l (a, b, c) VALUES ((SELECT a FROM tb_l ORDER BY a LIMIT 1),
'e', '2008-01-01 23:59:59');
+connection master_1;
SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l ORDER BY a;
a b date_format(c, '%Y-%m-%d %H:%i:%s')
1 e 2008-01-01 23:59:59
@@ -363,8 +422,10 @@ a b date_format(c, '%Y-%m-%d %H:%i:%s')
5 h 2001-10-31 23:59:59
replace low_priority
+connection master_1;
REPLACE LOW_PRIORITY INTO ta_l (a, b, c) VALUES (3, 'g',
'2009-03-03 03:03:03');
+connection master_1;
SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l ORDER BY a;
a b date_format(c, '%Y-%m-%d %H:%i:%s')
1 e 2008-01-01 23:59:59
@@ -377,66 +438,84 @@ update
TRUNCATE TABLE ta_l;
INSERT INTO ta_l (a, b, c) VALUES (1, 'e', '2008-01-01 23:59:59'),
(2, 'e', '2008-01-01 23:59:59');
+connection master_1;
UPDATE ta_l SET b = 'f', c = '2008-02-02 02:02:02' WHERE a = 2;
+connection master_1;
SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l ORDER BY a;
a b date_format(c, '%Y-%m-%d %H:%i:%s')
1 e 2008-01-01 23:59:59
2 f 2008-02-02 02:02:02
update select
+connection master_1;
UPDATE ta_l SET b = 'g', c = '2009-03-03 03:03:03' WHERE a IN (SELECT a FROM
tb_l);
+connection master_1;
SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l ORDER BY a;
a b date_format(c, '%Y-%m-%d %H:%i:%s')
1 g 2009-03-03 03:03:03
2 g 2009-03-03 03:03:03
update select a
+connection master_1;
UPDATE ta_l SET b = 'h', c = '2010-04-04 04:04:04' WHERE a = (SELECT a FROM
tb_l ORDER BY a LIMIT 1);
+connection master_1;
SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l ORDER BY a;
a b date_format(c, '%Y-%m-%d %H:%i:%s')
1 h 2010-04-04 04:04:04
2 g 2009-03-03 03:03:03
update join
+connection master_1;
UPDATE ta_l a, tb_l b SET a.b = b.b, a.c = b.c WHERE a.a = b.a;
+connection master_1;
SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l ORDER BY a;
a b date_format(c, '%Y-%m-%d %H:%i:%s')
1 f 2008-07-01 10:21:39
2 g 2000-02-01 00:00:00
update join a
+connection master_1;
UPDATE ta_l a, tb_l b SET a.b = 'g', a.c = '2009-03-03 03:03:03' WHERE
a.a = b.a;
+connection master_1;
SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l ORDER BY a;
a b date_format(c, '%Y-%m-%d %H:%i:%s')
1 g 2009-03-03 03:03:03
2 g 2009-03-03 03:03:03
update low_priority
+connection master_1;
UPDATE LOW_PRIORITY ta_l SET b = 'f', c = '2008-02-02 02:02:02' WHERE a = 2;
+connection master_1;
SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l ORDER BY a;
a b date_format(c, '%Y-%m-%d %H:%i:%s')
1 g 2009-03-03 03:03:03
2 f 2008-02-02 02:02:02
update ignore
+connection master_1;
UPDATE IGNORE ta_l SET a = 1, b = 'g', c = '2009-03-03 03:03:03' WHERE a = 2;
+connection master_1;
SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l ORDER BY a;
a b date_format(c, '%Y-%m-%d %H:%i:%s')
1 g 2009-03-03 03:03:03
2 f 2008-02-02 02:02:02
update pushdown
+connection master_1;
update ta_l set b = 'j', c = '2009-03-03 03:03:03' where b = 'f';
+connection master_1;
SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l ORDER BY a;
a b date_format(c, '%Y-%m-%d %H:%i:%s')
1 g 2009-03-03 03:03:03
2 j 2009-03-03 03:03:03
update index pushdown
+connection master_1;
UPDATE ta_l SET b = 'g', c = '2009-03-03 03:03:03' WHERE a > 0 AND b = 'j';
+connection master_1;
SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l ORDER BY a;
a b date_format(c, '%Y-%m-%d %H:%i:%s')
1 g 2009-03-03 03:03:03
@@ -450,7 +529,9 @@ INSERT INTO ta_l (a, b, c) VALUES (1, 'e', '2008-01-01 23:59:59'),
(6, 'e', '2008-01-01 23:59:59'), (7, 'e', '2008-01-01 23:59:59'),
(8, 'e', '2008-01-01 23:59:59'), (9, 'e', '2008-01-01 23:59:59'),
(10, 'j', '2008-01-01 23:59:59');
+connection master_1;
DELETE FROM ta_l WHERE a = 2;
+connection master_1;
SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l ORDER BY a;
a b date_format(c, '%Y-%m-%d %H:%i:%s')
1 e 2008-01-01 23:59:59
@@ -464,7 +545,9 @@ a b date_format(c, '%Y-%m-%d %H:%i:%s')
10 j 2008-01-01 23:59:59
delete all
+connection master_1;
DELETE FROM ta_l;
+connection master_1;
SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l ORDER BY a;
a b date_format(c, '%Y-%m-%d %H:%i:%s')
@@ -476,7 +559,9 @@ INSERT INTO ta_l (a, b, c) VALUES (1, 'e', '2008-01-01 23:59:59'),
(6, 'e', '2008-01-01 23:59:59'), (7, 'e', '2008-01-01 23:59:59'),
(8, 'e', '2008-01-01 23:59:59'), (9, 'e', '2008-01-01 23:59:59'),
(10, 'j', '2008-01-01 23:59:59');
+connection master_1;
DELETE FROM ta_l WHERE a IN (SELECT a FROM tb_l);
+connection master_1;
SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l ORDER BY a;
a b date_format(c, '%Y-%m-%d %H:%i:%s')
6 e 2008-01-01 23:59:59
@@ -493,7 +578,9 @@ INSERT INTO ta_l (a, b, c) VALUES (1, 'e', '2008-01-01 23:59:59'),
(6, 'e', '2008-01-01 23:59:59'), (7, 'e', '2008-01-01 23:59:59'),
(8, 'e', '2008-01-01 23:59:59'), (9, 'e', '2008-01-01 23:59:59'),
(10, 'j', '2008-01-01 23:59:59');
+connection master_1;
DELETE FROM ta_l WHERE a = (SELECT a FROM tb_l ORDER BY a LIMIT 1);
+connection master_1;
SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l ORDER BY a;
a b date_format(c, '%Y-%m-%d %H:%i:%s')
2 e 2008-01-01 23:59:59
@@ -514,7 +601,9 @@ INSERT INTO ta_l (a, b, c) VALUES (1, 'e', '2008-01-01 23:59:59'),
(6, 'e', '2008-01-01 23:59:59'), (7, 'e', '2008-01-01 23:59:59'),
(8, 'e', '2008-01-01 23:59:59'), (9, 'e', '2008-01-01 23:59:59'),
(10, 'j', '2008-01-01 23:59:59');
+connection master_1;
DELETE a FROM ta_l a, (SELECT a FROM tb_l ORDER BY a) b WHERE a.a = b.a;
+connection master_1;
SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l ORDER BY a;
a b date_format(c, '%Y-%m-%d %H:%i:%s')
6 e 2008-01-01 23:59:59
@@ -531,7 +620,9 @@ INSERT INTO ta_l (a, b, c) VALUES (1, 'e', '2008-01-01 23:59:59'),
(6, 'e', '2008-01-01 23:59:59'), (7, 'e', '2008-01-01 23:59:59'),
(8, 'e', '2008-01-01 23:59:59'), (9, 'e', '2008-01-01 23:59:59'),
(10, 'j', '2008-01-01 23:59:59');
+connection master_1;
DELETE LOW_PRIORITY FROM ta_l WHERE a = 2;
+connection master_1;
SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l ORDER BY a;
a b date_format(c, '%Y-%m-%d %H:%i:%s')
1 e 2008-01-01 23:59:59
@@ -552,7 +643,9 @@ INSERT INTO ta_l (a, b, c) VALUES (1, 'e', '2008-01-01 23:59:59'),
(6, 'e', '2008-01-01 23:59:59'), (7, 'e', '2008-01-01 23:59:59'),
(8, 'e', '2008-01-01 23:59:59'), (9, 'e', '2008-01-01 23:59:59'),
(10, 'j', '2008-01-01 23:59:59');
+connection master_1;
DELETE IGNORE FROM ta_l WHERE a = 2;
+connection master_1;
SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l ORDER BY a;
a b date_format(c, '%Y-%m-%d %H:%i:%s')
1 e 2008-01-01 23:59:59
@@ -573,7 +666,9 @@ INSERT INTO ta_l (a, b, c) VALUES (1, 'e', '2008-01-01 23:59:59'),
(6, 'e', '2008-01-01 23:59:59'), (7, 'e', '2008-01-01 23:59:59'),
(8, 'e', '2008-01-01 23:59:59'), (9, 'e', '2008-01-01 23:59:59'),
(10, 'j', '2008-01-01 23:59:59');
+connection master_1;
DELETE QUICK FROM ta_l WHERE a = 2;
+connection master_1;
SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l ORDER BY a;
a b date_format(c, '%Y-%m-%d %H:%i:%s')
1 e 2008-01-01 23:59:59
@@ -594,7 +689,9 @@ INSERT INTO ta_l (a, b, c) VALUES (1, 'e', '2008-01-01 23:59:59'),
(6, 'e', '2008-01-01 23:59:59'), (7, 'e', '2008-01-01 23:59:59'),
(8, 'e', '2008-01-01 23:59:59'), (9, 'e', '2008-01-01 23:59:59'),
(10, 'j', '2008-01-01 23:59:59');
+connection master_1;
DELETE FROM ta_l WHERE b = 'e';
+connection master_1;
SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l ORDER BY a;
a b date_format(c, '%Y-%m-%d %H:%i:%s')
10 j 2008-01-01 23:59:59
@@ -607,19 +704,26 @@ INSERT INTO ta_l (a, b, c) VALUES (1, 'e', '2008-01-01 23:59:59'),
(6, 'e', '2008-01-01 23:59:59'), (7, 'e', '2008-01-01 23:59:59'),
(8, 'e', '2008-01-01 23:59:59'), (9, 'e', '2008-01-01 23:59:59'),
(10, 'j', '2008-01-01 23:59:59');
+connection master_1;
DELETE FROM ta_l WHERE a > 0 AND b = 'e';
+connection master_1;
SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l ORDER BY a;
a b date_format(c, '%Y-%m-%d %H:%i:%s')
10 j 2008-01-01 23:59:59
truncate
+connection master_1;
TRUNCATE TABLE ta_l;
+connection master_1;
SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l ORDER BY a;
a b date_format(c, '%Y-%m-%d %H:%i:%s')
deinit
+connection master_1;
DROP DATABASE IF EXISTS auto_test_local;
+connection child2_1;
DROP DATABASE IF EXISTS auto_test_remote;
+connection child2_2;
DROP DATABASE IF EXISTS auto_test_remote2;
for master_1
for child2
diff --git a/storage/spider/mysql-test/spider/r/basic_sql_part.result b/storage/spider/mysql-test/spider/r/basic_sql_part.result
index 9e1201c17c9..0f4029404a7 100644
--- a/storage/spider/mysql-test/spider/r/basic_sql_part.result
+++ b/storage/spider/mysql-test/spider/r/basic_sql_part.result
@@ -9,20 +9,25 @@ child3_2
child3_3
drop and create databases
+connection master_1;
DROP DATABASE IF EXISTS auto_test_local;
CREATE DATABASE auto_test_local;
USE auto_test_local;
+connection child2_1;
DROP DATABASE IF EXISTS auto_test_remote;
CREATE DATABASE auto_test_remote;
USE auto_test_remote;
+connection child2_2;
DROP DATABASE IF EXISTS auto_test_remote2;
CREATE DATABASE auto_test_remote2;
USE auto_test_remote2;
test select 1
+connection master_1;
SELECT 1;
1
1
+connection master_1;
DROP TABLE IF EXISTS tb_l;
CREATE TABLE tb_l (
a INT,
@@ -38,10 +43,12 @@ INSERT INTO tb_l (a, b, c) VALUES
(5, 'h', '2001-10-31 23:59:59');
create table with partition and select test
+connection master_1;
CREATE TABLE ta_l2 (
PRIMARY KEY(a)
) MASTER_1_ENGINE MASTER_1_COMMENT_P_2_1
SELECT a, b, c FROM tb_l
+connection master_1;
SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l2 ORDER BY a;
a b date_format(c, '%Y-%m-%d %H:%i:%s')
1 f 2008-07-01 10:21:39
@@ -51,19 +58,23 @@ a b date_format(c, '%Y-%m-%d %H:%i:%s')
5 h 2001-10-31 23:59:59
select partition using pushdown
+connection master_1;
SELECT a.a, a.b, date_format(a.c, '%Y-%m-%d %H:%i:%s') FROM ta_l2 a WHERE
a.b = 'g' ORDER BY a.a;
a b date_format(a.c, '%Y-%m-%d %H:%i:%s')
2 g 2000-02-01 00:00:00
select partition using index pushdown
+connection master_1;
SELECT a.a, a.b, date_format(a.c, '%Y-%m-%d %H:%i:%s') FROM ta_l2 a WHERE
a.a > 0 AND a.b = 'g' ORDER BY a.a;
a b date_format(a.c, '%Y-%m-%d %H:%i:%s')
2 g 2000-02-01 00:00:00
update partition pushdown
+connection master_1;
UPDATE ta_l2 SET b = 'e', c = '2009-03-03 03:03:03' WHERE b = 'j';
+connection master_1;
SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l2 ORDER BY a;
a b date_format(c, '%Y-%m-%d %H:%i:%s')
1 f 2008-07-01 10:21:39
@@ -73,7 +84,9 @@ a b date_format(c, '%Y-%m-%d %H:%i:%s')
5 h 2001-10-31 23:59:59
update partition index pushdown
+connection master_1;
UPDATE ta_l2 SET b = 'j', c = '2009-03-03 03:03:03' WHERE a > 0 AND b = 'e';
+connection master_1;
SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l2 ORDER BY a;
a b date_format(c, '%Y-%m-%d %H:%i:%s')
1 f 2008-07-01 10:21:39
@@ -85,7 +98,9 @@ a b date_format(c, '%Y-%m-%d %H:%i:%s')
delete partition pushdown
TRUNCATE TABLE ta_l2;
INSERT INTO ta_l2 SELECT a, b, c FROM tb_l;
+connection master_1;
DELETE FROM ta_l2 WHERE b = 'g';
+connection master_1;
SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l2 ORDER BY a;
a b date_format(c, '%Y-%m-%d %H:%i:%s')
1 f 2008-07-01 10:21:39
@@ -96,7 +111,9 @@ a b date_format(c, '%Y-%m-%d %H:%i:%s')
delete partition index pushdown
TRUNCATE TABLE ta_l2;
INSERT INTO ta_l2 SELECT a, b, c FROM tb_l;
+connection master_1;
DELETE FROM ta_l2 WHERE a > 0 AND b = 'g';
+connection master_1;
SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l2 ORDER BY a;
a b date_format(c, '%Y-%m-%d %H:%i:%s')
1 f 2008-07-01 10:21:39
@@ -105,8 +122,11 @@ a b date_format(c, '%Y-%m-%d %H:%i:%s')
5 h 2001-10-31 23:59:59
deinit
+connection master_1;
DROP DATABASE IF EXISTS auto_test_local;
+connection child2_1;
DROP DATABASE IF EXISTS auto_test_remote;
+connection child2_2;
DROP DATABASE IF EXISTS auto_test_remote2;
for master_1
for child2
diff --git a/storage/spider/mysql-test/spider/r/direct_aggregate.result b/storage/spider/mysql-test/spider/r/direct_aggregate.result
index 3a9c7be3076..9a8660ba79e 100644
--- a/storage/spider/mysql-test/spider/r/direct_aggregate.result
+++ b/storage/spider/mysql-test/spider/r/direct_aggregate.result
@@ -9,22 +9,27 @@ child3_2
child3_3
drop and create databases
+connection master_1;
DROP DATABASE IF EXISTS auto_test_local;
CREATE DATABASE auto_test_local;
USE auto_test_local;
+connection child2_1;
DROP DATABASE IF EXISTS auto_test_remote;
CREATE DATABASE auto_test_remote;
USE auto_test_remote;
+connection child2_2;
DROP DATABASE IF EXISTS auto_test_remote2;
CREATE DATABASE auto_test_remote2;
USE auto_test_remote2;
test select 1
+connection master_1;
SELECT 1;
1
1
create table select test
+connection master_1;
DROP TABLE IF EXISTS ta_l;
CREATE TABLE ta_l (
a INT,
@@ -40,6 +45,7 @@ INSERT INTO ta_l (a, b, c) VALUES
(5, 'c', '2001-12-31 23:59:59');
direct_aggregating test
+connection master_1;
SHOW STATUS LIKE 'Spider_direct_aggregate';
Variable_name Value
Spider_direct_aggregate 0
@@ -75,8 +81,11 @@ Variable_name Value
Spider_direct_aggregate 0
deinit
+connection master_1;
DROP DATABASE IF EXISTS auto_test_local;
+connection child2_1;
DROP DATABASE IF EXISTS auto_test_remote;
+connection child2_2;
DROP DATABASE IF EXISTS auto_test_remote2;
for master_1
for child2
diff --git a/storage/spider/mysql-test/spider/r/direct_aggregate_part.result b/storage/spider/mysql-test/spider/r/direct_aggregate_part.result
index bbdc943601b..760b39e16d5 100644
--- a/storage/spider/mysql-test/spider/r/direct_aggregate_part.result
+++ b/storage/spider/mysql-test/spider/r/direct_aggregate_part.result
@@ -9,22 +9,27 @@ child3_2
child3_3
drop and create databases
+connection master_1;
DROP DATABASE IF EXISTS auto_test_local;
CREATE DATABASE auto_test_local;
USE auto_test_local;
+connection child2_1;
DROP DATABASE IF EXISTS auto_test_remote;
CREATE DATABASE auto_test_remote;
USE auto_test_remote;
+connection child2_2;
DROP DATABASE IF EXISTS auto_test_remote2;
CREATE DATABASE auto_test_remote2;
USE auto_test_remote2;
test select 1
+connection master_1;
SELECT 1;
1
1
with partition test
+connection master_1;
CREATE TABLE ta_l2 (
a INT,
b CHAR(1),
@@ -66,8 +71,11 @@ Variable_name Value
Spider_direct_aggregate 0
deinit
+connection master_1;
DROP DATABASE IF EXISTS auto_test_local;
+connection child2_1;
DROP DATABASE IF EXISTS auto_test_remote;
+connection child2_2;
DROP DATABASE IF EXISTS auto_test_remote2;
for master_1
for child2
diff --git a/storage/spider/mysql-test/spider/r/direct_update.result b/storage/spider/mysql-test/spider/r/direct_update.result
index 517491253e9..74dae7aec2e 100644
--- a/storage/spider/mysql-test/spider/r/direct_update.result
+++ b/storage/spider/mysql-test/spider/r/direct_update.result
@@ -9,22 +9,27 @@ child3_2
child3_3
drop and create databases
+connection master_1;
DROP DATABASE IF EXISTS auto_test_local;
CREATE DATABASE auto_test_local;
USE auto_test_local;
+connection child2_1;
DROP DATABASE IF EXISTS auto_test_remote;
CREATE DATABASE auto_test_remote;
USE auto_test_remote;
+connection child2_2;
DROP DATABASE IF EXISTS auto_test_remote2;
CREATE DATABASE auto_test_remote2;
USE auto_test_remote2;
test select 1
+connection master_1;
SELECT 1;
1
1
create table select test
+connection master_1;
DROP TABLE IF EXISTS ta_l;
CREATE TABLE ta_l (
a INT,
@@ -40,6 +45,7 @@ INSERT INTO ta_l (a, b, c) VALUES
(5, 'c', '2001-12-31 23:59:59');
direct_updating test
+connection master_1;
SHOW STATUS LIKE 'Spider_direct_update';
Variable_name Value
SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l ORDER BY a;
@@ -122,8 +128,11 @@ a b date_format(c, '%Y-%m-%d %H:%i:%s')
4 d 2003-12-01 05:01:03
deinit
+connection master_1;
DROP DATABASE IF EXISTS auto_test_local;
+connection child2_1;
DROP DATABASE IF EXISTS auto_test_remote;
+connection child2_2;
DROP DATABASE IF EXISTS auto_test_remote2;
for master_1
for child2
diff --git a/storage/spider/mysql-test/spider/r/direct_update_part.result b/storage/spider/mysql-test/spider/r/direct_update_part.result
index bd8f1b89f69..6db7c01f563 100644
--- a/storage/spider/mysql-test/spider/r/direct_update_part.result
+++ b/storage/spider/mysql-test/spider/r/direct_update_part.result
@@ -9,22 +9,27 @@ child3_2
child3_3
drop and create databases
+connection master_1;
DROP DATABASE IF EXISTS auto_test_local;
CREATE DATABASE auto_test_local;
USE auto_test_local;
+connection child2_1;
DROP DATABASE IF EXISTS auto_test_remote;
CREATE DATABASE auto_test_remote;
USE auto_test_remote;
+connection child2_2;
DROP DATABASE IF EXISTS auto_test_remote2;
CREATE DATABASE auto_test_remote2;
USE auto_test_remote2;
test select 1
+connection master_1;
SELECT 1;
1
1
with partition test
+connection master_1;
CREATE TABLE ta_l2 (
a INT,
b CHAR(1),
@@ -113,8 +118,11 @@ a b date_format(c, '%Y-%m-%d %H:%i:%s')
4 d 2003-12-01 05:01:03
deinit
+connection master_1;
DROP DATABASE IF EXISTS auto_test_local;
+connection child2_1;
DROP DATABASE IF EXISTS auto_test_remote;
+connection child2_2;
DROP DATABASE IF EXISTS auto_test_remote2;
for master_1
for child2
diff --git a/storage/spider/mysql-test/spider/r/function.result b/storage/spider/mysql-test/spider/r/function.result
index 764c774514b..c088a8a9541 100644
--- a/storage/spider/mysql-test/spider/r/function.result
+++ b/storage/spider/mysql-test/spider/r/function.result
@@ -9,22 +9,27 @@ child3_2
child3_3
drop and create databases
+connection master_1;
DROP DATABASE IF EXISTS auto_test_local;
CREATE DATABASE auto_test_local;
USE auto_test_local;
+connection child2_1;
DROP DATABASE IF EXISTS auto_test_remote;
CREATE DATABASE auto_test_remote;
USE auto_test_remote;
+connection child2_2;
DROP DATABASE IF EXISTS auto_test_remote2;
CREATE DATABASE auto_test_remote2;
USE auto_test_remote2;
test select 1
+connection master_1;
SELECT 1;
1
1
in()
+connection master_1;
CREATE TABLE t1 (
a VARCHAR(255),
PRIMARY KEY(a)
@@ -41,12 +46,14 @@ insert into t1 select a + 128 from t1;
insert into t1 select a + 256 from t1;
insert into t1 select a + 512 from t1;
flush tables;
+connection master_1;
select a from t1 where a in ('15', '120');
a
120
15
date_sub()
+connection master_1;
DROP TABLE IF EXISTS ta_l;
CREATE TABLE ta_l (
a INT,
@@ -124,6 +131,7 @@ a b date_format(c, '%Y-%m-%d %H:%i:%s')
4 d 2003-02-03 06:00:03
5 c 2001-03-07 00:58:59
UPDATE ta_l SET c = DATE_ADD(c, INTERVAL 1 SECOND);
+connection master_1;
SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l ORDER BY a;
a b date_format(c, '%Y-%m-%d %H:%i:%s')
1 a 2007-10-07 11:20:40
@@ -133,8 +141,11 @@ a b date_format(c, '%Y-%m-%d %H:%i:%s')
5 c 2001-03-07 00:59:00
deinit
+connection master_1;
DROP DATABASE IF EXISTS auto_test_local;
+connection child2_1;
DROP DATABASE IF EXISTS auto_test_remote;
+connection child2_2;
DROP DATABASE IF EXISTS auto_test_remote2;
for master_1
for child2
diff --git a/storage/spider/mysql-test/spider/r/ha.result b/storage/spider/mysql-test/spider/r/ha.result
index 9837faebd87..f8833c229ef 100644
--- a/storage/spider/mysql-test/spider/r/ha.result
+++ b/storage/spider/mysql-test/spider/r/ha.result
@@ -18,34 +18,43 @@ child3_2
child3_3
drop and create databases
+connection master_1;
DROP DATABASE IF EXISTS auto_test_local;
CREATE DATABASE auto_test_local;
USE auto_test_local;
+connection child2_1;
DROP DATABASE IF EXISTS auto_test_remote;
CREATE DATABASE auto_test_remote;
USE auto_test_remote;
+connection child2_2;
DROP DATABASE IF EXISTS auto_test_remote2;
CREATE DATABASE auto_test_remote2;
USE auto_test_remote2;
+connection child2_3;
DROP DATABASE IF EXISTS auto_test_remote3;
CREATE DATABASE auto_test_remote3;
USE auto_test_remote3;
+connection child3_1;
DROP DATABASE IF EXISTS auto_test_local;
CREATE DATABASE auto_test_local;
USE auto_test_local;
+connection child3_2;
DROP DATABASE IF EXISTS auto_test_local;
CREATE DATABASE auto_test_local;
USE auto_test_local;
+connection child3_3;
DROP DATABASE IF EXISTS auto_test_local;
CREATE DATABASE auto_test_local;
USE auto_test_local;
test select 1
+connection master_1;
SELECT 1;
1
1
create table test
+connection master_1;
DROP TABLE IF EXISTS ta_l;
CREATE TABLE ta_l (
a INT,
@@ -61,6 +70,7 @@ INSERT INTO ta_l (a, b, c) VALUES
(5, 'c', '2001-12-31 23:59:59');
select test
+connection master_1;
SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l ORDER BY a;
a b date_format(c, '%Y-%m-%d %H:%i:%s')
1 a 2008-08-01 10:21:39
@@ -70,6 +80,7 @@ a b date_format(c, '%Y-%m-%d %H:%i:%s')
5 c 2001-12-31 23:59:59
fail-over test
+connection master_1;
SHOW STATUS LIKE 'Spider_mon_table_cache_version%';
Variable_name Value
Spider_mon_table_cache_version 0
@@ -101,6 +112,7 @@ a b date_format(c, '%Y-%m-%d %H:%i:%s')
6 e 2011-05-05 20:04:05
recovery test
+connection master_1;
ALTER TABLE ta_l
CONNECTION='host "localhost", user "root", password "",
msi "5", mkd "2",
@@ -113,6 +125,7 @@ auto_test_local ta_l 1 2
SELECT spider_copy_tables('ta_l', '0', '1');
spider_copy_tables('ta_l', '0', '1')
1
+connection master_1;
ALTER TABLE ta_l
CONNECTION='host "localhost", user "root", password "",
msi "5", mkd "2",
@@ -134,12 +147,14 @@ a b date_format(c, '%Y-%m-%d %H:%i:%s')
6 e 2011-05-05 20:04:05
8 g 2011-05-05 21:33:30
DROP TABLE ta_l;
+connection master_1;
SELECT spider_flush_table_mon_cache();
spider_flush_table_mon_cache()
1
active standby test
create table test
+connection master_1;
DROP TABLE IF EXISTS ta_l;
CREATE TABLE ta_l (
a INT,
@@ -155,6 +170,7 @@ INSERT INTO ta_l (a, b, c) VALUES
(5, 'c', '2001-12-31 23:59:59');
select test
+connection master_1;
SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l ORDER BY a;
a b date_format(c, '%Y-%m-%d %H:%i:%s')
1 a 2008-08-01 10:21:39
@@ -164,6 +180,7 @@ a b date_format(c, '%Y-%m-%d %H:%i:%s')
5 c 2001-12-31 23:59:59
fail-over test
+connection master_1;
SHOW STATUS LIKE 'Spider_mon_table_cache_version%';
Variable_name Value
Spider_mon_table_cache_version 1
@@ -191,6 +208,7 @@ a b date_format(c, '%Y-%m-%d %H:%i:%s')
6 e 2011-05-05 20:04:05
recovery test
+connection master_1;
ALTER TABLE ta_l
CONNECTION='host "localhost", user "root", password "",
msi "5", mkd "2", alc "1",
@@ -206,17 +224,25 @@ SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l ORDER BY a;
a b date_format(c, '%Y-%m-%d %H:%i:%s')
8 g 2011-05-05 21:33:30
DROP TABLE ta_l;
+connection master_1;
SELECT spider_flush_table_mon_cache();
spider_flush_table_mon_cache()
1
deinit
+connection master_1;
DROP DATABASE IF EXISTS auto_test_local;
+connection child2_1;
DROP DATABASE IF EXISTS auto_test_remote;
+connection child2_2;
DROP DATABASE IF EXISTS auto_test_remote2;
+connection child2_3;
DROP DATABASE IF EXISTS auto_test_remote3;
+connection child3_1;
DROP DATABASE IF EXISTS auto_test_local;
+connection child3_2;
DROP DATABASE IF EXISTS auto_test_local;
+connection child3_3;
DROP DATABASE IF EXISTS auto_test_local;
for master_1
for child2
diff --git a/storage/spider/mysql-test/spider/r/ha_part.result b/storage/spider/mysql-test/spider/r/ha_part.result
index 8c0300ba5a5..315f37298bc 100644
--- a/storage/spider/mysql-test/spider/r/ha_part.result
+++ b/storage/spider/mysql-test/spider/r/ha_part.result
@@ -18,34 +18,43 @@ child3_2
child3_3
drop and create databases
+connection master_1;
DROP DATABASE IF EXISTS auto_test_local;
CREATE DATABASE auto_test_local;
USE auto_test_local;
+connection child2_1;
DROP DATABASE IF EXISTS auto_test_remote;
CREATE DATABASE auto_test_remote;
USE auto_test_remote;
+connection child2_2;
DROP DATABASE IF EXISTS auto_test_remote2;
CREATE DATABASE auto_test_remote2;
USE auto_test_remote2;
+connection child2_3;
DROP DATABASE IF EXISTS auto_test_remote3;
CREATE DATABASE auto_test_remote3;
USE auto_test_remote3;
+connection child3_1;
DROP DATABASE IF EXISTS auto_test_local;
CREATE DATABASE auto_test_local;
USE auto_test_local;
+connection child3_2;
DROP DATABASE IF EXISTS auto_test_local;
CREATE DATABASE auto_test_local;
USE auto_test_local;
+connection child3_3;
DROP DATABASE IF EXISTS auto_test_local;
CREATE DATABASE auto_test_local;
USE auto_test_local;
test select 1
+connection master_1;
SELECT 1;
1
1
create table with partition test
+connection master_1;
DROP TABLE IF EXISTS ta_l2;
CREATE TABLE ta_l2 (
a INT,
@@ -61,6 +70,7 @@ INSERT INTO ta_l2 (a, b, c) VALUES
(5, 'c', '2001-12-31 23:59:59');
select test
+connection master_1;
SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l2 ORDER BY a;
a b date_format(c, '%Y-%m-%d %H:%i:%s')
1 a 2008-08-01 10:21:39
@@ -70,6 +80,7 @@ a b date_format(c, '%Y-%m-%d %H:%i:%s')
5 c 2001-12-31 23:59:59
fail-over test
+connection master_1;
SHOW STATUS LIKE 'Spider_mon_table_cache_version%';
Variable_name Value
Spider_mon_table_cache_version 0
@@ -103,6 +114,7 @@ a b date_format(c, '%Y-%m-%d %H:%i:%s')
6 e 2011-05-05 20:04:05
recovery test
+connection master_1;
ALTER TABLE ta_l2
PARTITION BY KEY(a) (
PARTITION pt1 COMMENT='srv "s_2_1 s_2_2", tbl "ta_r ta_r3",
@@ -120,6 +132,7 @@ auto_test_local ta_l2#P#pt2 1 2
SELECT spider_copy_tables('ta_l2#P#pt2', '0', '1');
spider_copy_tables('ta_l2#P#pt2', '0', '1')
1
+connection master_1;
ALTER TABLE ta_l2
PARTITION BY KEY(a) (
PARTITION pt1 COMMENT='srv "s_2_1 s_2_2", tbl "ta_r ta_r3",
@@ -150,6 +163,7 @@ a b date_format(c, '%Y-%m-%d %H:%i:%s')
DROP TABLE ta_l2;
create table with partition test
+connection master_1;
DROP TABLE IF EXISTS ta_l2;
CREATE TABLE ta_l2 (
a INT,
@@ -165,6 +179,7 @@ INSERT INTO ta_l2 (a, b, c) VALUES
(5, 'c', '2001-12-31 23:59:59');
select test
+connection master_1;
SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l2 ORDER BY a;
a b date_format(c, '%Y-%m-%d %H:%i:%s')
1 a 2008-08-01 10:21:39
@@ -174,6 +189,7 @@ a b date_format(c, '%Y-%m-%d %H:%i:%s')
5 c 2001-12-31 23:59:59
fail-over test
+connection master_1;
SHOW STATUS LIKE 'Spider_mon_table_cache_version%';
Variable_name Value
Spider_mon_table_cache_version 1
@@ -206,6 +222,7 @@ a b date_format(c, '%Y-%m-%d %H:%i:%s')
6 e 2011-05-05 20:04:05
recovery test
+connection master_1;
ALTER TABLE ta_l2
PARTITION BY KEY(a) (
PARTITION pt1 COMMENT='srv "s_2_1 s_2_2", tbl "ta_r ta_r3",
@@ -233,12 +250,19 @@ a b date_format(c, '%Y-%m-%d %H:%i:%s')
DROP TABLE ta_l2;
deinit
+connection master_1;
DROP DATABASE IF EXISTS auto_test_local;
+connection child2_1;
DROP DATABASE IF EXISTS auto_test_remote;
+connection child2_2;
DROP DATABASE IF EXISTS auto_test_remote2;
+connection child2_3;
DROP DATABASE IF EXISTS auto_test_remote3;
+connection child3_1;
DROP DATABASE IF EXISTS auto_test_local;
+connection child3_2;
DROP DATABASE IF EXISTS auto_test_local;
+connection child3_3;
DROP DATABASE IF EXISTS auto_test_local;
for master_1
for child2
diff --git a/storage/spider/mysql-test/spider/r/pushdown_not_like.result b/storage/spider/mysql-test/spider/r/pushdown_not_like.result
new file mode 100644
index 00000000000..cd926962180
--- /dev/null
+++ b/storage/spider/mysql-test/spider/r/pushdown_not_like.result
@@ -0,0 +1,63 @@
+for master_1
+for child2
+child2_1
+child2_2
+child2_3
+for child3
+child3_1
+child3_2
+child3_3
+
+drop and create databases
+connection master_1;
+DROP DATABASE IF EXISTS auto_test_local;
+CREATE DATABASE auto_test_local;
+USE auto_test_local;
+connection child2_1;
+DROP DATABASE IF EXISTS auto_test_remote;
+CREATE DATABASE auto_test_remote;
+USE auto_test_remote;
+
+create table select test
+connection master_1;
+DROP TABLE IF EXISTS ta_l;
+CREATE TABLE ta_l (
+a INT,
+b CHAR(1),
+c DATETIME,
+PRIMARY KEY(a)
+) MASTER_1_ENGINE MASTER_1_CHARSET MASTER_1_COMMENT_2_1
+INSERT INTO ta_l (a, b, c) VALUES
+(1, 'a', '2018-11-01 10:21:39'),
+(2, 'b', '2015-06-30 23:59:59'),
+(3, 'c', '2013-11-01 01:01:01');
+
+spider not like bug fix test
+connection master_1;
+select * from ta_l where b not like 'a%';
+a b c
+2 b 2015-06-30 23:59:59
+3 c 2013-11-01 01:01:01
+connection child2_1;
+SELECT argument FROM mysql.general_log WHERE argument LIKE '%select%';
+argument
+select `a`,`b`,`c` from `auto_test_remote`.`ta_r` where (`b` not like 'a%')
+SELECT argument FROM mysql.general_log WHERE argument LIKE '%select%'
+
+deinit
+connection master_1;
+DROP DATABASE IF EXISTS auto_test_local;
+connection child2_1;
+DROP DATABASE IF EXISTS auto_test_remote;
+SET GLOBAL log_output = @old_log_output;
+for master_1
+for child2
+child2_1
+child2_2
+child2_3
+for child3
+child3_1
+child3_2
+child3_3
+
+end of test
diff --git a/storage/spider/mysql-test/spider/r/spider3_fixes.result b/storage/spider/mysql-test/spider/r/spider3_fixes.result
index cb62468d3ae..d6aec25bfc1 100644
--- a/storage/spider/mysql-test/spider/r/spider3_fixes.result
+++ b/storage/spider/mysql-test/spider/r/spider3_fixes.result
@@ -10,26 +10,34 @@ child3_3
for slave1_1
drop and create databases
+connection master_1;
DROP DATABASE IF EXISTS auto_test_local;
CREATE DATABASE auto_test_local;
USE auto_test_local;
+connection slave1_1;
DROP DATABASE IF EXISTS auto_test_local;
CREATE DATABASE auto_test_local;
USE auto_test_local;
+connection child2_1;
DROP DATABASE IF EXISTS auto_test_remote;
CREATE DATABASE auto_test_remote;
USE auto_test_remote;
+connection child2_2;
DROP DATABASE IF EXISTS auto_test_remote2;
CREATE DATABASE auto_test_remote2;
USE auto_test_remote2;
test select 1
+connection master_1;
SELECT 1;
1
1
3.1
auto_increment
+connection master_1;
+connection slave1_1;
+connection master_1;
DROP TABLE IF EXISTS t1, t2;
CREATE TABLE t1 (
id int(11) NOT NULL AUTO_INCREMENT,
@@ -182,6 +190,7 @@ LAST_INSERT_ID()
SELECT MAX(id) FROM t2;
MAX(id)
10000
+connection slave1_1;
SELECT id FROM t1 ORDER BY id;
id
777
@@ -190,11 +199,16 @@ id
3108
5000
10000
+connection master_1;
deinit
+connection master_1;
DROP DATABASE IF EXISTS auto_test_local;
+connection slave1_1;
DROP DATABASE IF EXISTS auto_test_local;
+connection child2_1;
DROP DATABASE IF EXISTS auto_test_remote;
+connection child2_2;
DROP DATABASE IF EXISTS auto_test_remote2;
for slave1_1
for master_1
diff --git a/storage/spider/mysql-test/spider/r/spider3_fixes_part.result b/storage/spider/mysql-test/spider/r/spider3_fixes_part.result
index 12f43ef09b2..b793346df4b 100644
--- a/storage/spider/mysql-test/spider/r/spider3_fixes_part.result
+++ b/storage/spider/mysql-test/spider/r/spider3_fixes_part.result
@@ -10,24 +10,32 @@ child3_3
for slave1_1
drop and create databases
+connection master_1;
DROP DATABASE IF EXISTS auto_test_local;
CREATE DATABASE auto_test_local;
USE auto_test_local;
+connection slave1_1;
DROP DATABASE IF EXISTS auto_test_local;
CREATE DATABASE auto_test_local;
USE auto_test_local;
+connection child2_1;
DROP DATABASE IF EXISTS auto_test_remote;
CREATE DATABASE auto_test_remote;
USE auto_test_remote;
+connection child2_2;
DROP DATABASE IF EXISTS auto_test_remote2;
CREATE DATABASE auto_test_remote2;
USE auto_test_remote2;
test select 1
+connection master_1;
SELECT 1;
1
1
auto_increment with partition
+connection master_1;
+connection slave1_1;
+connection master_1;
DROP TABLE IF EXISTS t1, t2;
CREATE TABLE t1 (
id int(11) NOT NULL AUTO_INCREMENT,
@@ -180,6 +188,7 @@ LAST_INSERT_ID()
SELECT MAX(id) FROM t2;
MAX(id)
10000
+connection slave1_1;
SELECT id FROM t1 ORDER BY id;
id
777
@@ -188,11 +197,16 @@ id
3108
5000
10000
+connection master_1;
deinit
+connection master_1;
DROP DATABASE IF EXISTS auto_test_local;
+connection slave1_1;
DROP DATABASE IF EXISTS auto_test_local;
+connection child2_1;
DROP DATABASE IF EXISTS auto_test_remote;
+connection child2_2;
DROP DATABASE IF EXISTS auto_test_remote2;
for slave1_1
for master_1
diff --git a/storage/spider/mysql-test/spider/r/spider_fixes.result b/storage/spider/mysql-test/spider/r/spider_fixes.result
index 3033586821e..f50c9822534 100644
--- a/storage/spider/mysql-test/spider/r/spider_fixes.result
+++ b/storage/spider/mysql-test/spider/r/spider_fixes.result
@@ -10,25 +10,31 @@ child3_3
for slave1_1
drop and create databases
+connection master_1;
DROP DATABASE IF EXISTS auto_test_local;
CREATE DATABASE auto_test_local;
USE auto_test_local;
+connection slave1_1;
DROP DATABASE IF EXISTS auto_test_local;
CREATE DATABASE auto_test_local;
USE auto_test_local;
+connection child2_1;
DROP DATABASE IF EXISTS auto_test_remote;
CREATE DATABASE auto_test_remote;
USE auto_test_remote;
+connection child2_2;
DROP DATABASE IF EXISTS auto_test_remote2;
CREATE DATABASE auto_test_remote2;
USE auto_test_remote2;
test select 1
+connection master_1;
SELECT 1;
1
1
create table and insert
+connection master_1;
DROP TABLE IF EXISTS tb_l;
CREATE TABLE tb_l (
a INT,
@@ -50,6 +56,7 @@ INSERT INTO ta_l SELECT a, b, c FROM tb_l;
2.13
select table with "order by desc" and "<"
+connection master_1;
SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l
WHERE a < 5 ORDER BY a DESC LIMIT 3;
a b date_format(c, '%Y-%m-%d %H:%i:%s')
@@ -58,6 +65,7 @@ a b date_format(c, '%Y-%m-%d %H:%i:%s')
2 b 2000-01-01 00:00:00
select table with "order by desc" and "<="
+connection master_1;
SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l
WHERE a <= 5 ORDER BY a DESC LIMIT 3;
a b date_format(c, '%Y-%m-%d %H:%i:%s')
@@ -67,7 +75,9 @@ a b date_format(c, '%Y-%m-%d %H:%i:%s')
2.14
update table with range scan and split_read
+connection master_1;
UPDATE ta_l SET c = '2000-02-02 00:00:00' WHERE a > 1;
+connection master_1;
SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l ORDER BY a;
a b date_format(c, '%Y-%m-%d %H:%i:%s')
1 a 2008-08-01 10:21:39
@@ -80,6 +90,7 @@ a b date_format(c, '%Y-%m-%d %H:%i:%s')
select table with range scan
TRUNCATE TABLE ta_l;
DROP TABLE IF EXISTS ta_l;
+connection master_1;
CREATE TABLE ta_l (
a int(11) NOT NULL DEFAULT '0',
b char(1) DEFAULT NULL,
@@ -87,50 +98,62 @@ c datetime DEFAULT NULL,
PRIMARY KEY (a, b, c)
) MASTER_1_ENGINE MASTER_1_CHARSET MASTER_1_COMMENT5_2_1
INSERT INTO ta_l SELECT a, b, c FROM tb_l;
+connection master_1;
SELECT a, b, c FROM ta_l FORCE INDEX(PRIMARY) WHERE a = 4 AND b >= 'b'
AND c = '2003-11-30 05:01:03';
a b c
4 d 2003-11-30 05:01:03
+connection master_1;
SELECT a, b, c FROM ta_l FORCE INDEX(PRIMARY) WHERE a = 4 AND b > 'b'
AND c = '2003-11-30 05:01:03';
a b c
4 d 2003-11-30 05:01:03
+connection master_1;
SELECT a, b, c FROM ta_l FORCE INDEX(PRIMARY) WHERE a >= 4 AND b = 'd'
AND c = '2003-11-30 05:01:03';
a b c
4 d 2003-11-30 05:01:03
+connection master_1;
SELECT a, b, c FROM ta_l FORCE INDEX(PRIMARY) WHERE a > 4 AND b = 'c'
AND c = '2001-12-31 23:59:59';
a b c
5 c 2001-12-31 23:59:59
+connection master_1;
SELECT a, b, c FROM ta_l FORCE INDEX(PRIMARY) WHERE a = 4 AND b <= 'd'
AND c = '2003-11-30 05:01:03';
a b c
4 d 2003-11-30 05:01:03
+connection master_1;
SELECT a, b, c FROM ta_l FORCE INDEX(PRIMARY) WHERE a = 4 AND b < 'e'
AND c = '2003-11-30 05:01:03';
a b c
4 d 2003-11-30 05:01:03
+connection master_1;
SELECT a, b, c FROM ta_l FORCE INDEX(PRIMARY) WHERE a <= 4 AND b = 'b'
AND c = '2000-01-01 00:00:00';
a b c
2 b 2000-01-01 00:00:00
+connection master_1;
SELECT a, b, c FROM ta_l FORCE INDEX(PRIMARY) WHERE a < 4 AND b = 'b'
AND c = '2000-01-01 00:00:00';
a b c
2 b 2000-01-01 00:00:00
+connection master_1;
SELECT a, b, c FROM ta_l FORCE INDEX(PRIMARY) WHERE a = 4 AND b >= 'b'
AND b <= 'd' AND c = '2003-11-30 05:01:03';
a b c
4 d 2003-11-30 05:01:03
+connection master_1;
SELECT a, b, c FROM ta_l FORCE INDEX(PRIMARY) WHERE a = 4 AND b > 'b'
AND b < 'e' AND c = '2003-11-30 05:01:03';
a b c
4 d 2003-11-30 05:01:03
+connection master_1;
SELECT a, b, c FROM ta_l FORCE INDEX(PRIMARY) WHERE a <= 4 AND a >= 1
AND b >= 'b' AND c = '2003-11-30 05:01:03';
a b c
4 d 2003-11-30 05:01:03
+connection master_1;
SELECT a, b, c FROM ta_l FORCE INDEX(PRIMARY) WHERE a < 4 AND a > 1
AND b >= 'b' AND c = '2000-01-01 00:00:00';
a b c
@@ -138,6 +161,7 @@ a b c
2.16
auto_increment insert with trigger
+connection master_1;
CREATE TABLE ta_l_auto_inc (
a INT AUTO_INCREMENT,
b CHAR(1) DEFAULT 'c',
@@ -151,14 +175,17 @@ c DATETIME,
PRIMARY KEY(a)
) MASTER_1_ENGINE2 MASTER_1_CHARSET2
CREATE TRIGGER ins_ta_l_auto_inc AFTER INSERT ON ta_l_auto_inc FOR EACH ROW BEGIN INSERT INTO tc_l (a, b, c) VALUES (NEW.a, NEW.b, NEW.c); END;;
+connection master_1;
INSERT INTO ta_l_auto_inc (a, b, c) VALUES
(NULL, 's', '2008-12-31 20:59:59');
+connection master_1;
SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM tc_l ORDER BY a;
a b date_format(c, '%Y-%m-%d %H:%i:%s')
1 s 2008-12-31 20:59:59
2.17
engine-condition-pushdown with "or" and joining
+connection master_1;
SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l WHERE a = 1 OR a IN (SELECT a FROM tb_l);
a b date_format(c, '%Y-%m-%d %H:%i:%s')
1 a 2008-08-01 10:21:39
@@ -169,6 +196,7 @@ a b date_format(c, '%Y-%m-%d %H:%i:%s')
2.23
index merge
+connection master_1;
CREATE TABLE ta_l_int (
a INT AUTO_INCREMENT,
b INT DEFAULT 10,
@@ -182,6 +210,7 @@ INSERT INTO ta_l_int (a, b, c) SELECT a + 1, b + 1, c + 1 FROM ta_l_int;
INSERT INTO ta_l_int (a, b, c) SELECT a + 2, b + 2, c + 2 FROM ta_l_int;
INSERT INTO ta_l_int (a, b, c) SELECT a + 4, b + 4, c + 4 FROM ta_l_int;
INSERT INTO ta_l_int (a, b, c) SELECT a + 8, b + 8, c + 8 FROM ta_l_int;
+connection master_1;
SELECT a, b, c FROM ta_l_int force index(primary, idx1, idx2)
WHERE a = 5 OR b = 5 OR c = 5 ORDER BY a;
a b c
@@ -191,6 +220,7 @@ a b c
2.24
index scan update without PK
+connection master_1;
DROP TABLE IF EXISTS ta_l_int;
CREATE TABLE ta_l_int (
a INT NOT NULL,
@@ -219,7 +249,9 @@ a b c
16 17 18
INSERT INTO ta_l_int (a, b, c) VALUES (0, 2, 3);
INSERT INTO ta_l_int (a, b, c) VALUES (18, 2, 3);
+connection master_1;
UPDATE ta_l_int SET c = 4 WHERE b = 2;
+connection master_1;
SELECT a, b, c FROM ta_l_int ORDER BY a;
a b c
1 2 4
@@ -243,6 +275,7 @@ a b c
2.25
direct order limit
+connection master_1;
SHOW STATUS LIKE 'Spider_direct_order_limit';
Variable_name Value
Spider_direct_order_limit 2
@@ -257,6 +290,7 @@ Spider_direct_order_limit 3
2.26
lock tables
+connection master_1;
DROP TABLE IF EXISTS t1;
DROP TABLE IF EXISTS t2;
CREATE TABLE t1 (
@@ -271,6 +305,9 @@ LOCK TABLES t1 READ, t2 READ;
UNLOCK TABLES;
auto_increment
+connection master_1;
+connection slave1_1;
+connection master_1;
DROP TABLE IF EXISTS t1;
CREATE TABLE t1 (
id int(11) NOT NULL AUTO_INCREMENT,
@@ -358,6 +395,7 @@ LAST_INSERT_ID()
SELECT MAX(id) FROM t1;
MAX(id)
10000
+connection slave1_1;
SELECT id FROM t1 ORDER BY id;
id
777
@@ -371,8 +409,10 @@ id
5439
6216
10000
+connection master_1;
read only
+connection master_1;
DROP TABLE IF EXISTS t1;
CREATE TABLE t1 (
id int(11) NOT NULL,
@@ -404,6 +444,7 @@ ERROR HY000: Table 'auto_test_local.t1' is read only
2.27
error mode
+connection master_1;
DROP TABLE IF EXISTS t1;
CREATE TABLE t1 (
id int(11) NOT NULL,
@@ -427,6 +468,7 @@ Error 1146 Table 'auto_test_remote.ter1_1' doesn't exist
3.0
is null
+connection master_1;
DROP TABLE IF EXISTS t1;
CREATE TABLE t1 (
a VARCHAR(255),
@@ -449,6 +491,7 @@ insert into t1 select a + 128, b + 128, c + 128 from t1;
insert into t1 select a + 256, b + 256, c + 256 from t1;
insert into t1 select a + 512, b + 512, c + 512 from t1;
flush tables;
+connection master_1;
select a from t1 where a is null order by a limit 30;
a
NULL
@@ -515,6 +558,7 @@ NULL
NULL
direct_order_limit
+connection master_1;
TRUNCATE TABLE t1;
insert into t1 values ('1', '1', '1');
insert into t1 select a + 1, b + 1, c + 1 from t1;
@@ -526,6 +570,7 @@ insert into t1 select a, b + 32, c + 32 from t1;
insert into t1 select a, b + 64, c + 64 from t1;
insert into t1 select a, b + 128, c + 128 from t1;
flush tables;
+connection master_1;
select a, b, c from t1 where a = '10' and b <> '100' order by c desc limit 5;
a b c
10 74 74
@@ -542,9 +587,13 @@ a c
10 170
deinit
+connection master_1;
DROP DATABASE IF EXISTS auto_test_local;
+connection slave1_1;
DROP DATABASE IF EXISTS auto_test_local;
+connection child2_1;
DROP DATABASE IF EXISTS auto_test_remote;
+connection child2_2;
DROP DATABASE IF EXISTS auto_test_remote2;
for slave1_1
for master_1
diff --git a/storage/spider/mysql-test/spider/r/spider_fixes_part.result b/storage/spider/mysql-test/spider/r/spider_fixes_part.result
index 2b313e0e9dc..d2367af9bbd 100644
--- a/storage/spider/mysql-test/spider/r/spider_fixes_part.result
+++ b/storage/spider/mysql-test/spider/r/spider_fixes_part.result
@@ -10,23 +10,29 @@ child3_3
for slave1_1
drop and create databases
+connection master_1;
DROP DATABASE IF EXISTS auto_test_local;
CREATE DATABASE auto_test_local;
USE auto_test_local;
+connection slave1_1;
DROP DATABASE IF EXISTS auto_test_local;
CREATE DATABASE auto_test_local;
USE auto_test_local;
+connection child2_1;
DROP DATABASE IF EXISTS auto_test_remote;
CREATE DATABASE auto_test_remote;
USE auto_test_remote;
+connection child2_2;
DROP DATABASE IF EXISTS auto_test_remote2;
CREATE DATABASE auto_test_remote2;
USE auto_test_remote2;
test select 1
+connection master_1;
SELECT 1;
1
1
+connection master_1;
DROP TABLE IF EXISTS tb_l;
CREATE TABLE tb_l (
a INT,
@@ -43,6 +49,7 @@ INSERT INTO tb_l (a, b, c) VALUES
2.17
partition with sort
+connection master_1;
CREATE TABLE ta_l2 (
a INT,
b CHAR(1),
@@ -60,7 +67,9 @@ a b date_format(c, '%Y-%m-%d %H:%i:%s')
2.23
partition update with moving partition
+connection master_1;
DROP TABLE IF EXISTS ta_l2;
+connection master_1;
CREATE TABLE ta_l2 (
a INT,
b CHAR(1),
@@ -73,7 +82,9 @@ SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_l2;
a b date_format(c, '%Y-%m-%d %H:%i:%s')
4 B 2010-09-26 00:00:00
index merge with partition
+connection master_1;
DROP TABLE IF EXISTS ta_l_int;
+connection master_1;
CREATE TABLE ta_l_int (
a INT AUTO_INCREMENT,
b INT DEFAULT 10,
@@ -87,6 +98,7 @@ INSERT INTO ta_l_int (a, b, c) SELECT a + 1, b + 1, c + 1 FROM ta_l_int;
INSERT INTO ta_l_int (a, b, c) SELECT a + 2, b + 2, c + 2 FROM ta_l_int;
INSERT INTO ta_l_int (a, b, c) SELECT a + 4, b + 4, c + 4 FROM ta_l_int;
INSERT INTO ta_l_int (a, b, c) SELECT a + 8, b + 8, c + 8 FROM ta_l_int;
+connection master_1;
SELECT a, b, c FROM ta_l_int force index(primary, idx1, idx2)
WHERE a = 5 OR b = 5 OR c = 5 ORDER BY a;
a b c
@@ -96,6 +108,9 @@ a b c
2.26
auto_increment with partition
+connection master_1;
+connection slave1_1;
+connection master_1;
DROP TABLE IF EXISTS t1;
CREATE TABLE t1 (
id int(11) NOT NULL AUTO_INCREMENT,
@@ -183,6 +198,7 @@ LAST_INSERT_ID()
SELECT MAX(id) FROM t1;
MAX(id)
10000
+connection slave1_1;
SELECT id FROM t1 ORDER BY id;
id
777
@@ -196,8 +212,10 @@ id
5439
6216
10000
+connection master_1;
Test ORDER BY with LIMIT and OFFSET
+connection master_1;
CREATE TABLE ta_ob (
a VARCHAR(50) NOT NULL,
b VARCHAR(50) NULL DEFAULT NULL,
@@ -246,9 +264,13 @@ a b c d e f
093B37A93A534DF883787AF5F6799674 996C7F14989D480589A553717D735E3E 51041110620302 2018-08-02 13:48:30 510411 0
deinit
+connection master_1;
DROP DATABASE IF EXISTS auto_test_local;
+connection slave1_1;
DROP DATABASE IF EXISTS auto_test_local;
+connection child2_1;
DROP DATABASE IF EXISTS auto_test_remote;
+connection child2_2;
DROP DATABASE IF EXISTS auto_test_remote2;
for slave1_1
for master_1
diff --git a/storage/spider/mysql-test/spider/r/vp_fixes.result b/storage/spider/mysql-test/spider/r/vp_fixes.result
index 15dd29aa4d3..cc0e4105d61 100644
--- a/storage/spider/mysql-test/spider/r/vp_fixes.result
+++ b/storage/spider/mysql-test/spider/r/vp_fixes.result
@@ -9,22 +9,27 @@ child3_2
child3_3
drop and create databases
+connection master_1;
DROP DATABASE IF EXISTS auto_test_local;
CREATE DATABASE auto_test_local;
USE auto_test_local;
+connection child2_1;
DROP DATABASE IF EXISTS auto_test_remote;
CREATE DATABASE auto_test_remote;
USE auto_test_remote;
+connection child2_2;
DROP DATABASE IF EXISTS auto_test_remote2;
CREATE DATABASE auto_test_remote2;
USE auto_test_remote2;
test select 1
+connection master_1;
SELECT 1;
1
1
create table and insert
+connection master_1;
DROP TABLE IF EXISTS tb_l;
CREATE TABLE tb_l (
a INT,
@@ -46,26 +51,34 @@ INSERT INTO ta_l SELECT a, b, c FROM tb_l;
0.9
create different primary key table
+connection master_1;
CREATE TABLE ta_l_int (
a INT DEFAULT 10,
b INT AUTO_INCREMENT,
c INT DEFAULT 11,
PRIMARY KEY(b)
) MASTER_1_ENGINE MASTER_1_CHARSET MASTER_1_COMMENT4_2_1
+connection master_1;
INSERT INTO ta_l_int (a, b, c) VALUES (2, NULL, 3);
create un-correspond primary key table
+connection master_1;
DROP TABLE IF EXISTS ta_l_int;
+connection master_1;
CREATE TABLE ta_l_int (
a INT DEFAULT 10,
b INT DEFAULT 12,
c INT DEFAULT 11,
PRIMARY KEY(c)
) MASTER_1_ENGINE MASTER_1_CHARSET MASTER_1_COMMENT4_2_1
+connection master_1;
INSERT INTO ta_l_int (a, b, c) VALUES (2, NULL, 3);
deinit
+connection master_1;
DROP DATABASE IF EXISTS auto_test_local;
+connection child2_1;
DROP DATABASE IF EXISTS auto_test_remote;
+connection child2_2;
DROP DATABASE IF EXISTS auto_test_remote2;
for master_1
for child2
diff --git a/storage/spider/mysql-test/spider/suite.pm b/storage/spider/mysql-test/spider/suite.pm
index 1bb6d7592c8..f106147deb6 100644
--- a/storage/spider/mysql-test/spider/suite.pm
+++ b/storage/spider/mysql-test/spider/suite.pm
@@ -6,5 +6,7 @@ return "No Spider engine" unless $ENV{HA_SPIDER_SO};
return "Not run for embedded server" if $::opt_embedded_server;
return "Test needs --big-test" unless $::opt_big_test;
+sub is_default { 1 }
+
bless { };
diff --git a/storage/spider/mysql-test/spider/t/pushdown_not_like.test b/storage/spider/mysql-test/spider/t/pushdown_not_like.test
new file mode 100644
index 00000000000..95e4fa6eea8
--- /dev/null
+++ b/storage/spider/mysql-test/spider/t/pushdown_not_like.test
@@ -0,0 +1,138 @@
+--disable_warnings
+--disable_query_log
+--disable_result_log
+--source test_init.inc
+--enable_result_log
+--enable_query_log
+
+
+--echo
+--echo drop and create databases
+--connection master_1
+DROP DATABASE IF EXISTS auto_test_local;
+CREATE DATABASE auto_test_local;
+USE auto_test_local;
+if ($USE_CHILD_GROUP2)
+{
+ --connection child2_1
+ DROP DATABASE IF EXISTS auto_test_remote;
+ CREATE DATABASE auto_test_remote;
+ USE auto_test_remote;
+}
+--enable_warnings
+
+
+--echo
+--echo create table select test
+if ($USE_CHILD_GROUP2)
+{
+ if (!$OUTPUT_CHILD_GROUP2)
+ {
+ --disable_query_log
+ --disable_result_log
+ }
+ --connection child2_1
+ if ($OUTPUT_CHILD_GROUP2)
+ {
+ --disable_query_log
+ echo CHILD2_1_DROP_TABLES;
+ echo CHILD2_1_CREATE_TABLES;
+ }
+ --disable_warnings
+ eval $CHILD2_1_DROP_TABLES;
+ --enable_warnings
+ eval $CHILD2_1_CREATE_TABLES;
+ if ($OUTPUT_CHILD_GROUP2)
+ {
+ --enable_query_log
+ }
+ if ($USE_GENERAL_LOG)
+ {
+ SET @old_log_output = @@global.log_output;
+ TRUNCATE TABLE mysql.general_log;
+ set global log_output = 'TABLE';
+ }
+ if (!$OUTPUT_CHILD_GROUP2)
+ {
+ --enable_query_log
+ --enable_result_log
+ }
+}
+
+--connection master_1
+--disable_warnings
+DROP TABLE IF EXISTS ta_l;
+--enable_warnings
+--disable_query_log
+echo CREATE TABLE ta_l (
+ a INT,
+ b CHAR(1),
+ c DATETIME,
+ PRIMARY KEY(a)
+) MASTER_1_ENGINE MASTER_1_CHARSET MASTER_1_COMMENT_2_1;
+eval CREATE TABLE ta_l (
+ a INT,
+ b CHAR(1),
+ c DATETIME,
+ PRIMARY KEY(a)
+) $MASTER_1_ENGINE $MASTER_1_CHARSET $MASTER_1_COMMENT_2_1;
+--enable_query_log
+INSERT INTO ta_l (a, b, c) VALUES
+ (1, 'a', '2018-11-01 10:21:39'),
+ (2, 'b', '2015-06-30 23:59:59'),
+ (3, 'c', '2013-11-01 01:01:01');
+
+--echo
+--echo spider not like bug fix test
+if ($USE_CHILD_GROUP2)
+{
+ if (!$OUTPUT_CHILD_GROUP2)
+ {
+ --disable_query_log
+ --disable_result_log
+ }
+ --connection child2_1
+ if ($USE_GENERAL_LOG)
+ {
+ TRUNCATE TABLE mysql.general_log;
+ }
+ if (!$OUTPUT_CHILD_GROUP2)
+ {
+ --enable_query_log
+ --enable_result_log
+ }
+}
+
+--connection master_1
+select * from ta_l where b not like 'a%';
+if ($USE_CHILD_GROUP2)
+{
+ --connection child2_1
+ if ($USE_GENERAL_LOG)
+ {
+ SELECT argument FROM mysql.general_log WHERE argument LIKE '%select%';
+ }
+}
+
+
+--echo
+--echo deinit
+--disable_warnings
+--connection master_1
+DROP DATABASE IF EXISTS auto_test_local;
+if ($USE_CHILD_GROUP2)
+{
+ --connection child2_1
+ DROP DATABASE IF EXISTS auto_test_remote;
+ SET GLOBAL log_output = @old_log_output;
+}
+
+
+--disable_query_log
+--disable_result_log
+--source test_deinit.inc
+--enable_result_log
+--enable_query_log
+--enable_warnings
+--echo
+--echo end of test
diff --git a/storage/spider/scripts/install_spider.sql b/storage/spider/scripts/install_spider.sql
index 173ca2e9bde..21a9836cbd2 100644
--- a/storage/spider/scripts/install_spider.sql
+++ b/storage/spider/scripts/install_spider.sql
@@ -132,7 +132,9 @@ drop procedure if exists mysql.spider_fix_one_table;
drop procedure if exists mysql.spider_fix_system_tables;
delimiter //
create procedure mysql.spider_fix_one_table
- (tab_name char(255), test_col_name char(255), _sql text)
+ (tab_name char(255) charset utf8 collate utf8_bin,
+ test_col_name char(255) charset utf8 collate utf8_bin,
+ _sql text charset utf8 collate utf8_bin)
begin
set @col_exists := 0;
select 1 into @col_exists from INFORMATION_SCHEMA.COLUMNS
@@ -295,9 +297,18 @@ delimiter //
create procedure mysql.spider_plugin_installer()
begin
set @win_plugin := IF(@@version_compile_os like 'Win%', 1, 0);
+ set @have_spider_i_s_plugin := 0;
+ select @have_spider_i_s_plugin := 1 from INFORMATION_SCHEMA.plugins where PLUGIN_NAME = 'SPIDER';
set @have_spider_plugin := 0;
- select @have_spider_plugin := 1 from INFORMATION_SCHEMA.plugins where PLUGIN_NAME = 'SPIDER';
- if @have_spider_plugin = 0 then
+ select @have_spider_plugin := 1 from mysql.plugin where name = 'spider';
+ if @have_spider_i_s_plugin = 0 then
+ if @have_spider_plugin = 1 then
+ -- spider plugin is present in mysql.plugin but not in
+ -- information_schema.plugins. Remove spider plugin entry
+ -- in mysql.plugin first.
+ delete from mysql.plugin where name = 'spider';
+ end if;
+ -- Install spider plugin
if @win_plugin = 0 then
install plugin spider soname 'ha_spider.so';
else
@@ -306,7 +317,16 @@ begin
end if;
set @have_spider_i_s_alloc_mem_plugin := 0;
select @have_spider_i_s_alloc_mem_plugin := 1 from INFORMATION_SCHEMA.plugins where PLUGIN_NAME = 'SPIDER_ALLOC_MEM';
- if @have_spider_i_s_alloc_mem_plugin = 0 then
+ set @have_spider_alloc_mem_plugin := 0;
+ select @have_spider_alloc_mem_plugin := 1 from mysql.plugin where name = 'spider_alloc_mem';
+ if @have_spider_i_s_alloc_mem_plugin = 0 then
+ if @have_spider_alloc_mem_plugin = 1 then
+ -- spider_alloc_mem plugin is present in mysql.plugin but not in
+ -- information_schema.plugins. Remove spider_alloc_mem plugin entry
+ -- in mysql.plugin first.
+ delete from mysql.plugin where name = 'spider_alloc_mem';
+ end if;
+ -- Install spider_alloc_mem plugin
if @win_plugin = 0 then
install plugin spider_alloc_mem soname 'ha_spider.so';
else
diff --git a/storage/spider/spd_conn.cc b/storage/spider/spd_conn.cc
index 99599bca1be..c1c2ce07476 100644
--- a/storage/spider/spd_conn.cc
+++ b/storage/spider/spd_conn.cc
@@ -37,8 +37,6 @@
#include "spd_ping_table.h"
#include "spd_malloc.h"
-extern ulong *spd_db_att_thread_id;
-
extern handlerton *spider_hton_ptr;
extern SPIDER_DBTON spider_dbton[SPIDER_DBTON_SIZE];
pthread_mutex_t spider_conn_id_mutex;
@@ -2258,7 +2256,7 @@ void *spider_bg_conn_action(
my_thread_init();
DBUG_ENTER("spider_bg_conn_action");
/* init start */
- if (!(thd = new THD()))
+ if (!(thd = new THD(next_thread_id())))
{
pthread_mutex_lock(&conn->bg_conn_sync_mutex);
pthread_cond_signal(&conn->bg_conn_sync_cond);
@@ -2266,9 +2264,6 @@ void *spider_bg_conn_action(
my_thread_end();
DBUG_RETURN(NULL);
}
- pthread_mutex_lock(&LOCK_thread_count);
- thd->thread_id = (*spd_db_att_thread_id)++;
- pthread_mutex_unlock(&LOCK_thread_count);
#ifdef HAVE_PSI_INTERFACE
mysql_thread_set_psi_id(thd->thread_id);
#endif
@@ -2767,7 +2762,7 @@ void *spider_bg_sts_action(
}
#endif
pthread_mutex_lock(&share->sts_mutex);
- if (!(thd = new THD()))
+ if (!(thd = new THD(next_thread_id())))
{
share->bg_sts_thd_wait = FALSE;
share->bg_sts_kill = FALSE;
@@ -2779,9 +2774,6 @@ void *spider_bg_sts_action(
#endif
DBUG_RETURN(NULL);
}
- pthread_mutex_lock(&LOCK_thread_count);
- thd->thread_id = (*spd_db_att_thread_id)++;
- pthread_mutex_unlock(&LOCK_thread_count);
#ifdef HAVE_PSI_INTERFACE
mysql_thread_set_psi_id(thd->thread_id);
#endif
@@ -3143,7 +3135,7 @@ void *spider_bg_crd_action(
}
#endif
pthread_mutex_lock(&share->crd_mutex);
- if (!(thd = new THD()))
+ if (!(thd = new THD(next_thread_id())))
{
share->bg_crd_thd_wait = FALSE;
share->bg_crd_kill = FALSE;
@@ -3155,9 +3147,6 @@ void *spider_bg_crd_action(
#endif
DBUG_RETURN(NULL);
}
- pthread_mutex_lock(&LOCK_thread_count);
- thd->thread_id = (*spd_db_att_thread_id)++;
- pthread_mutex_unlock(&LOCK_thread_count);
#ifdef HAVE_PSI_INTERFACE
mysql_thread_set_psi_id(thd->thread_id);
#endif
@@ -3629,7 +3618,7 @@ void *spider_bg_mon_action(
DBUG_ENTER("spider_bg_mon_action");
/* init start */
pthread_mutex_lock(&share->bg_mon_mutexes[link_idx]);
- if (!(thd = new THD()))
+ if (!(thd = new THD(next_thread_id())))
{
share->bg_mon_kill = FALSE;
share->bg_mon_init = FALSE;
@@ -3638,9 +3627,6 @@ void *spider_bg_mon_action(
my_thread_end();
DBUG_RETURN(NULL);
}
- pthread_mutex_lock(&LOCK_thread_count);
- thd->thread_id = (*spd_db_att_thread_id)++;
- pthread_mutex_unlock(&LOCK_thread_count);
#ifdef HAVE_PSI_INTERFACE
mysql_thread_set_psi_id(thd->thread_id);
#endif
diff --git a/storage/spider/spd_copy_tables.cc b/storage/spider/spd_copy_tables.cc
index 0d89f78ee92..dbd8ace3f47 100644
--- a/storage/spider/spd_copy_tables.cc
+++ b/storage/spider/spd_copy_tables.cc
@@ -85,13 +85,12 @@ int spider_udf_set_copy_tables_param_default(
if (!copy_tables->param_name) \
{ \
if ((copy_tables->param_name = spider_get_string_between_quote( \
- start_ptr, TRUE))) \
+ start_ptr, TRUE, &param_string_parse))) \
copy_tables->SPIDER_PARAM_STR_LEN(param_name) = \
strlen(copy_tables->param_name); \
- else { \
- error_num = ER_SPIDER_INVALID_UDF_PARAM_NUM; \
- my_printf_error(error_num, ER_SPIDER_INVALID_UDF_PARAM_STR, \
- MYF(0), tmp_ptr); \
+ else \
+ { \
+ error_num = param_string_parse.print_param_error(); \
goto error; \
} \
DBUG_PRINT("info",("spider " title_name "=%s", copy_tables->param_name)); \
@@ -111,9 +110,7 @@ int spider_udf_set_copy_tables_param_default(
{ \
if (hint_num < 0 || hint_num >= max_size) \
{ \
- error_num = ER_SPIDER_INVALID_UDF_PARAM_NUM; \
- my_printf_error(error_num, ER_SPIDER_INVALID_UDF_PARAM_STR, \
- MYF(0), tmp_ptr); \
+ error_num = param_string_parse.print_param_error(); \
goto error; \
} else if (copy_tables->param_name[hint_num] != -1) \
break; \
@@ -126,17 +123,13 @@ int spider_udf_set_copy_tables_param_default(
else if (copy_tables->param_name[hint_num] > max_val) \
copy_tables->param_name[hint_num] = max_val; \
} else { \
- error_num = ER_SPIDER_INVALID_UDF_PARAM_NUM; \
- my_printf_error(error_num, ER_SPIDER_INVALID_UDF_PARAM_STR, \
- MYF(0), tmp_ptr); \
+ error_num = param_string_parse.print_param_error(); \
goto error; \
} \
DBUG_PRINT("info",("spider " title_name "[%d]=%d", hint_num, \
copy_tables->param_name[hint_num])); \
} else { \
- error_num = ER_SPIDER_INVALID_UDF_PARAM_NUM; \
- my_printf_error(error_num, ER_SPIDER_INVALID_UDF_PARAM_STR, \
- MYF(0), tmp_ptr); \
+ error_num = param_string_parse.print_param_error(); \
goto error; \
} \
break; \
@@ -155,10 +148,11 @@ int spider_udf_set_copy_tables_param_default(
copy_tables->param_name = min_val; \
else if (copy_tables->param_name > max_val) \
copy_tables->param_name = max_val; \
+ param_string_parse.set_param_value(tmp_ptr2, \
+ tmp_ptr2 + \
+ strlen(tmp_ptr2) + 1); \
} else { \
- error_num = ER_SPIDER_INVALID_UDF_PARAM_NUM; \
- my_printf_error(error_num, ER_SPIDER_INVALID_UDF_PARAM_STR, \
- MYF(0), tmp_ptr); \
+ error_num = param_string_parse.print_param_error(); \
goto error; \
} \
DBUG_PRINT("info",("spider " title_name "=%d", copy_tables->param_name)); \
@@ -177,10 +171,11 @@ int spider_udf_set_copy_tables_param_default(
copy_tables->param_name = atoi(tmp_ptr2); \
if (copy_tables->param_name < min_val) \
copy_tables->param_name = min_val; \
+ param_string_parse.set_param_value(tmp_ptr2, \
+ tmp_ptr2 + \
+ strlen(tmp_ptr2) + 1); \
} else { \
- error_num = ER_SPIDER_INVALID_UDF_PARAM_NUM; \
- my_printf_error(error_num, ER_SPIDER_INVALID_UDF_PARAM_STR, \
- MYF(0), tmp_ptr); \
+ error_num = param_string_parse.print_param_error(); \
goto error; \
} \
DBUG_PRINT("info",("spider " title_name "=%d", copy_tables->param_name)); \
@@ -200,10 +195,11 @@ int spider_udf_set_copy_tables_param_default(
my_strtoll10(tmp_ptr2, (char**) NULL, &error_num); \
if (copy_tables->param_name < min_val) \
copy_tables->param_name = min_val; \
+ param_string_parse.set_param_value(tmp_ptr2, \
+ tmp_ptr2 + \
+ strlen(tmp_ptr2) + 1); \
} else { \
- error_num = ER_SPIDER_INVALID_UDF_PARAM_NUM; \
- my_printf_error(error_num, ER_SPIDER_INVALID_UDF_PARAM_STR, \
- MYF(0), tmp_ptr); \
+ error_num = param_string_parse.print_param_error(); \
goto error; \
} \
DBUG_PRINT("info",("spider " title_name "=%lld", \
@@ -222,6 +218,7 @@ int spider_udf_parse_copy_tables_param(
char *sprit_ptr[2];
char *tmp_ptr, *tmp_ptr2, *start_ptr;
int title_length;
+ SPIDER_PARAM_STRING_PARSE param_string_parse;
DBUG_ENTER("spider_udf_parse_copy_tables_param");
copy_tables->bulk_insert_interval = -1;
copy_tables->bulk_insert_rows = -1;
@@ -246,6 +243,7 @@ int spider_udf_parse_copy_tables_param(
DBUG_PRINT("info",("spider param_string=%s", param_string));
sprit_ptr[0] = param_string;
+ param_string_parse.init(param_string, ER_SPIDER_INVALID_UDF_PARAM_NUM);
while (sprit_ptr[0])
{
if ((sprit_ptr[1] = strchr(sprit_ptr[0], ',')))
@@ -272,10 +270,14 @@ int spider_udf_parse_copy_tables_param(
title_length++;
start_ptr++;
}
+ param_string_parse.set_param_title(tmp_ptr, tmp_ptr + title_length);
switch (title_length)
{
case 0:
+ error_num = param_string_parse.print_param_error();
+ if (error_num)
+ goto error;
continue;
case 3:
#ifndef WITHOUT_SPIDER_BG_SEARCH
@@ -286,55 +288,43 @@ int spider_udf_parse_copy_tables_param(
SPIDER_PARAM_STR("dtb", database);
SPIDER_PARAM_INT_WITH_MAX("utc", use_table_charset, 0, 1);
SPIDER_PARAM_INT_WITH_MAX("utr", use_transaction, 0, 1);
- error_num = ER_SPIDER_INVALID_UDF_PARAM_NUM;
- my_printf_error(error_num, ER_SPIDER_INVALID_UDF_PARAM_STR,
- MYF(0), tmp_ptr);
+ error_num = param_string_parse.print_param_error();
goto error;
#ifndef WITHOUT_SPIDER_BG_SEARCH
case 7:
SPIDER_PARAM_INT_WITH_MAX("bg_mode", bg_mode, 0, 1);
- error_num = ER_SPIDER_INVALID_UDF_PARAM_NUM;
- my_printf_error(error_num, ER_SPIDER_INVALID_UDF_PARAM_STR,
- MYF(0), tmp_ptr);
+ error_num = param_string_parse.print_param_error();
goto error;
#endif
case 8:
SPIDER_PARAM_STR("database", database);
- error_num = ER_SPIDER_INVALID_UDF_PARAM_NUM;
- my_printf_error(error_num, ER_SPIDER_INVALID_UDF_PARAM_STR,
- MYF(0), tmp_ptr);
+ error_num = param_string_parse.print_param_error();
goto error;
case 15:
SPIDER_PARAM_INT_WITH_MAX("use_transaction", use_transaction, 0, 1);
- error_num = ER_SPIDER_INVALID_UDF_PARAM_NUM;
- my_printf_error(error_num, ER_SPIDER_INVALID_UDF_PARAM_STR,
- MYF(0), tmp_ptr);
+ error_num = param_string_parse.print_param_error();
goto error;
case 16:
SPIDER_PARAM_LONGLONG("bulk_insert_rows", bulk_insert_rows, 1);
- error_num = ER_SPIDER_INVALID_UDF_PARAM_NUM;
- my_printf_error(error_num, ER_SPIDER_INVALID_UDF_PARAM_STR,
- MYF(0), tmp_ptr);
+ error_num = param_string_parse.print_param_error();
goto error;
case 17:
SPIDER_PARAM_INT_WITH_MAX(
"use_table_charset", use_table_charset, 0, 1);
- error_num = ER_SPIDER_INVALID_UDF_PARAM_NUM;
- my_printf_error(error_num, ER_SPIDER_INVALID_UDF_PARAM_STR,
- MYF(0), tmp_ptr);
+ error_num = param_string_parse.print_param_error();
goto error;
case 20:
SPIDER_PARAM_INT("bulk_insert_interval", bulk_insert_interval, 0);
- error_num = ER_SPIDER_INVALID_UDF_PARAM_NUM;
- my_printf_error(error_num, ER_SPIDER_INVALID_UDF_PARAM_STR,
- MYF(0), tmp_ptr);
+ error_num = param_string_parse.print_param_error();
goto error;
default:
- error_num = ER_SPIDER_INVALID_UDF_PARAM_NUM;
- my_printf_error(error_num, ER_SPIDER_INVALID_UDF_PARAM_STR,
- MYF(0), tmp_ptr);
+ error_num = param_string_parse.print_param_error();
goto error;
}
+
+ /* Verify that the remainder of the parameter value is whitespace */
+ if ((error_num = param_string_parse.has_extra_parameter_values()))
+ goto error;
}
set_default:
diff --git a/storage/spider/spd_db_conn.cc b/storage/spider/spd_db_conn.cc
index efb5e6d8c8c..13ff6c568de 100644
--- a/storage/spider/spd_db_conn.cc
+++ b/storage/spider/spd_db_conn.cc
@@ -119,7 +119,10 @@ int spider_db_connect(
conn->net_write_timeout = spider_param_net_write_timeout(thd,
share->net_write_timeouts[link_idx]);
connect_retry_interval = spider_param_connect_retry_interval(thd);
- connect_retry_count = spider_param_connect_retry_count(thd);
+ if (conn->disable_connect_retry)
+ connect_retry_count = 0;
+ else
+ connect_retry_count = spider_param_connect_retry_count(thd);
} else {
conn->connect_timeout = spider_param_connect_timeout(NULL,
share->connect_timeouts[link_idx]);
@@ -737,7 +740,8 @@ int spider_db_errorno(
"to %ld: %d %s\n",
l_time->tm_year + 1900, l_time->tm_mon + 1, l_time->tm_mday,
l_time->tm_hour, l_time->tm_min, l_time->tm_sec,
- current_thd->thread_id, error_num, conn->db_conn->get_error());
+ (ulong) current_thd->thread_id, error_num,
+ conn->db_conn->get_error());
}
if (!conn->mta_conn_mutex_unlock_later)
{
@@ -757,7 +761,8 @@ int spider_db_errorno(
"to %ld: %d %s\n",
l_time->tm_year + 1900, l_time->tm_mon + 1, l_time->tm_mday,
l_time->tm_hour, l_time->tm_min, l_time->tm_sec,
- current_thd->thread_id, error_num, conn->db_conn->get_error());
+ (ulong) current_thd->thread_id, error_num,
+ conn->db_conn->get_error());
}
if (!conn->mta_conn_mutex_unlock_later)
{
@@ -1368,7 +1373,7 @@ int spider_db_append_name_with_quote_str(
for (name_end = name + length; name < name_end; name += length)
{
head_code = *name;
- if (!(length = my_mbcharlen(system_charset_info, (uchar) head_code)))
+ if ((length= my_charlen(system_charset_info, name, name_end)) < 1)
{
my_message(ER_SPIDER_WRONG_CHARACTER_IN_NAME_NUM,
ER_SPIDER_WRONG_CHARACTER_IN_NAME_STR, MYF(0));
diff --git a/storage/spider/spd_db_handlersocket.cc b/storage/spider/spd_db_handlersocket.cc
index bc62914c6aa..b977444d4f2 100644
--- a/storage/spider/spd_db_handlersocket.cc
+++ b/storage/spider/spd_db_handlersocket.cc
@@ -4994,6 +4994,15 @@ int spider_handlersocket_handler::append_explain_select_part(
DBUG_RETURN(0);
}
+int spider_handlersocket_handler::is_sole_projection_field(
+ uint16 field_index
+) {
+ DBUG_ENTER("spider_handlersocket_handler::is_sole_projection_field");
+ DBUG_PRINT("info", ("spider this=%p", this));
+ DBUG_ASSERT(0);
+ DBUG_RETURN(0);
+}
+
bool spider_handlersocket_handler::is_bulk_insert_exec_period(
bool bulk_end
) {
diff --git a/storage/spider/spd_db_handlersocket.h b/storage/spider/spd_db_handlersocket.h
index 5c9133d14e9..6796acc26c3 100644
--- a/storage/spider/spd_db_handlersocket.h
+++ b/storage/spider/spd_db_handlersocket.h
@@ -776,6 +776,9 @@ public:
ulong sql_type,
int link_idx
);
+ bool is_sole_projection_field(
+ uint16 field_index
+ );
bool is_bulk_insert_exec_period(
bool bulk_end
);
diff --git a/storage/spider/spd_db_include.h b/storage/spider/spd_db_include.h
index 05e840ab6b7..39e2c8650c4 100644
--- a/storage/spider/spd_db_include.h
+++ b/storage/spider/spd_db_include.h
@@ -139,6 +139,8 @@ typedef st_spider_result SPIDER_RESULT;
#define SPIDER_SQL_IN_LEN (sizeof(SPIDER_SQL_IN_STR) - 1)
#define SPIDER_SQL_NOT_IN_STR "not in("
#define SPIDER_SQL_NOT_IN_LEN (sizeof(SPIDER_SQL_NOT_IN_STR) - 1)
+#define SPIDER_SQL_NOT_LIKE_STR "not like"
+#define SPIDER_SQL_NOT_LIKE_LEN (sizeof(SPIDER_SQL_NOT_LIKE_STR) - 1)
#define SPIDER_SQL_AS_CHAR_STR " as char"
#define SPIDER_SQL_AS_CHAR_LEN (sizeof(SPIDER_SQL_AS_CHAR_STR) - 1)
#define SPIDER_SQL_CAST_STR "cast("
@@ -182,32 +184,32 @@ typedef st_spider_result SPIDER_RESULT;
#define SPIDER_SQL_LCL_NAME_QUOTE_STR "`"
#define SPIDER_SQL_LCL_NAME_QUOTE_LEN (sizeof(SPIDER_SQL_LCL_NAME_QUOTE_STR) - 1)
-#define SPIDER_CONN_KIND_MYSQL (1 << 0)
+#define SPIDER_CONN_KIND_MYSQL (1U << 0)
#if defined(HS_HAS_SQLCOM) && defined(HAVE_HANDLERSOCKET)
-#define SPIDER_CONN_KIND_HS_READ (1 << 2)
-#define SPIDER_CONN_KIND_HS_WRITE (1 << 3)
+#define SPIDER_CONN_KIND_HS_READ (1U << 2)
+#define SPIDER_CONN_KIND_HS_WRITE (1U << 3)
#endif
-#define SPIDER_SQL_KIND_SQL (1 << 0)
-#define SPIDER_SQL_KIND_HANDLER (1 << 1)
+#define SPIDER_SQL_KIND_SQL (1U << 0)
+#define SPIDER_SQL_KIND_HANDLER (1U << 1)
#if defined(HS_HAS_SQLCOM) && defined(HAVE_HANDLERSOCKET)
-#define SPIDER_SQL_KIND_HS (1 << 2)
+#define SPIDER_SQL_KIND_HS (1U << 2)
#endif
-#define SPIDER_SQL_TYPE_SELECT_SQL (1 << 0)
-#define SPIDER_SQL_TYPE_INSERT_SQL (1 << 1)
-#define SPIDER_SQL_TYPE_UPDATE_SQL (1 << 2)
-#define SPIDER_SQL_TYPE_DELETE_SQL (1 << 3)
-#define SPIDER_SQL_TYPE_BULK_UPDATE_SQL (1 << 4)
-#define SPIDER_SQL_TYPE_TMP_SQL (1 << 5)
-#define SPIDER_SQL_TYPE_DROP_TMP_TABLE_SQL (1 << 6)
-#define SPIDER_SQL_TYPE_OTHER_SQL (1 << 7)
-#define SPIDER_SQL_TYPE_HANDLER (1 << 8)
-#define SPIDER_SQL_TYPE_SELECT_HS (1 << 9)
-#define SPIDER_SQL_TYPE_INSERT_HS (1 << 10)
-#define SPIDER_SQL_TYPE_UPDATE_HS (1 << 11)
-#define SPIDER_SQL_TYPE_DELETE_HS (1 << 12)
-#define SPIDER_SQL_TYPE_OTHER_HS (1 << 13)
+#define SPIDER_SQL_TYPE_SELECT_SQL (1U << 0)
+#define SPIDER_SQL_TYPE_INSERT_SQL (1U << 1)
+#define SPIDER_SQL_TYPE_UPDATE_SQL (1U << 2)
+#define SPIDER_SQL_TYPE_DELETE_SQL (1U << 3)
+#define SPIDER_SQL_TYPE_BULK_UPDATE_SQL (1U << 4)
+#define SPIDER_SQL_TYPE_TMP_SQL (1U << 5)
+#define SPIDER_SQL_TYPE_DROP_TMP_TABLE_SQL (1U << 6)
+#define SPIDER_SQL_TYPE_OTHER_SQL (1U << 7)
+#define SPIDER_SQL_TYPE_HANDLER (1U << 8)
+#define SPIDER_SQL_TYPE_SELECT_HS (1U << 9)
+#define SPIDER_SQL_TYPE_INSERT_HS (1U << 10)
+#define SPIDER_SQL_TYPE_UPDATE_HS (1U << 11)
+#define SPIDER_SQL_TYPE_DELETE_HS (1U << 12)
+#define SPIDER_SQL_TYPE_OTHER_HS (1U << 13)
enum spider_bulk_upd_start {
SPD_BU_NOT_START,
@@ -1279,6 +1281,9 @@ public:
ulong sql_type,
int link_idx
) = 0;
+ virtual bool is_sole_projection_field(
+ uint16 field_index
+ ) = 0;
virtual bool is_bulk_insert_exec_period(
bool bulk_end
) = 0;
diff --git a/storage/spider/spd_db_mysql.cc b/storage/spider/spd_db_mysql.cc
index cf1a07715fc..2be4c810c06 100644
--- a/storage/spider/spd_db_mysql.cc
+++ b/storage/spider/spd_db_mysql.cc
@@ -1717,7 +1717,7 @@ int spider_db_mysql::exec_query(
l_time->tm_hour, l_time->tm_min, l_time->tm_sec,
security_ctx->user ? security_ctx->user : "system user",
security_ctx->host_or_ip,
- thd->thread_id,
+ (ulong) thd->thread_id,
tmp_query_str.c_ptr_safe());
}
if (log_result_error_with_sql & 1)
@@ -1731,7 +1731,7 @@ int spider_db_mysql::exec_query(
"sql: %s\n",
l_time->tm_year + 1900, l_time->tm_mon + 1, l_time->tm_mday,
l_time->tm_hour, l_time->tm_min, l_time->tm_sec,
- thd->thread_id, conn->tgt_host, db_conn->thread_id,
+ (ulong) thd->thread_id, conn->tgt_host, (ulong) db_conn->thread_id,
tmp_query_str.c_ptr_safe());
}
}
@@ -1745,7 +1745,7 @@ int spider_db_mysql::exec_query(
"affected_rows: %llu id: %llu status: %u warning_count: %u\n",
l_time->tm_year + 1900, l_time->tm_mon + 1, l_time->tm_mday,
l_time->tm_hour, l_time->tm_min, l_time->tm_sec,
- conn->tgt_host, db_conn->thread_id, thd->thread_id,
+ conn->tgt_host, (ulong) db_conn->thread_id, (ulong) thd->thread_id,
db_conn->affected_rows, db_conn->insert_id,
db_conn->server_status, db_conn->warning_count);
if (spider_param_log_result_errors() >= 3)
@@ -1760,7 +1760,7 @@ int spider_db_mysql::exec_query(
"affected_rows: %llu id: %llu status: %u warning_count: %u\n",
l_time->tm_year + 1900, l_time->tm_mon + 1, l_time->tm_mday,
l_time->tm_hour, l_time->tm_min, l_time->tm_sec,
- conn->tgt_host, db_conn->thread_id, thd->thread_id,
+ conn->tgt_host, (ulong) db_conn->thread_id, (ulong) thd->thread_id,
db_conn->affected_rows, db_conn->insert_id,
db_conn->server_status, db_conn->warning_count);
}
@@ -1889,8 +1889,8 @@ void spider_db_mysql::print_warnings(
"from [%s] %ld to %ld: %s %s %s\n",
l_time->tm_year + 1900, l_time->tm_mon + 1, l_time->tm_mday,
l_time->tm_hour, l_time->tm_min, l_time->tm_sec,
- conn->tgt_host, db_conn->thread_id,
- current_thd->thread_id, row[0], row[1], row[2]);
+ conn->tgt_host, (ulong) db_conn->thread_id,
+ (ulong) current_thd->thread_id, row[0], row[1], row[2]);
row = mysql_fetch_row(res);
}
if (res)
@@ -3948,13 +3948,27 @@ int spider_db_mysql_util::open_item_func(
case Item_func::LE_FUNC:
case Item_func::GE_FUNC:
case Item_func::GT_FUNC:
- case Item_func::LIKE_FUNC:
if (str)
{
func_name = (char*) item_func->func_name();
func_name_length = strlen(func_name);
}
break;
+ case Item_func::LIKE_FUNC:
+ if (str)
+ {
+ if (((Item_func_like *)item_func)->negated)
+ {
+ func_name = SPIDER_SQL_NOT_LIKE_STR;
+ func_name_length = SPIDER_SQL_NOT_LIKE_LEN;
+ }
+ else
+ {
+ func_name = (char*)item_func->func_name();
+ func_name_length = strlen(func_name);
+ }
+ }
+ break;
default:
THD *thd = spider->trx->thd;
SPIDER_SHARE *share = spider->share;
@@ -8134,7 +8148,7 @@ int spider_mysql_handler::append_key_order_for_direct_order_limit_with_alias(
DBUG_PRINT("info",("spider error=%d", error_num));
DBUG_RETURN(error_num);
}
- if (order->asc)
+ if (order->direction == ORDER::ORDER_ASC)
{
if (str->reserve(SPIDER_SQL_COMMA_LEN))
DBUG_RETURN(HA_ERR_OUT_OF_MEM);
@@ -9516,6 +9530,65 @@ int spider_mysql_handler::append_explain_select(
DBUG_RETURN(0);
}
+/********************************************************************
+ * Determine whether the current query's projection list
+ * consists solely of the specified column.
+ *
+ * Params IN - field_index:
+ * Field index of the column of interest within
+ * its table.
+ *
+ * Returns TRUE - if the query's projection list consists
+ * solely of the specified column.
+ * FALSE - otherwise.
+ ********************************************************************/
+bool spider_mysql_handler::is_sole_projection_field( uint16 field_index )
+{
+ // Determine whether the projection list consists solely of the field of interest
+ bool is_field_in_projection_list = FALSE;
+ TABLE* table = spider->get_table();
+ uint16 projection_field_count = 0;
+ uint16 projection_field_index;
+ Field** field;
+ DBUG_ENTER( "spider_mysql_handler::is_sole_projection_field" );
+
+ for ( field = table->field; *field ; field++ )
+ {
+ projection_field_index = ( *field )->field_index;
+
+ if ( !( minimum_select_bit_is_set( projection_field_index ) ) )
+ {
+ // Current field is not in the projection list
+ continue;
+ }
+
+ projection_field_count++;
+
+ if ( !is_field_in_projection_list )
+ {
+ if ( field_index == projection_field_index )
+ {
+ // Field of interest is in the projection list
+ is_field_in_projection_list = TRUE;
+ }
+ }
+
+ if ( is_field_in_projection_list && ( projection_field_count != 1 ) )
+ {
+ // Field of interest is not the sole column in the projection list
+ DBUG_RETURN( FALSE );
+ }
+ }
+
+ if ( is_field_in_projection_list && ( projection_field_count == 1 ) )
+ {
+ // Field of interest is the only column in the projection list
+ DBUG_RETURN( TRUE );
+ }
+
+ DBUG_RETURN( FALSE );
+}
+
bool spider_mysql_handler::is_bulk_insert_exec_period(
bool bulk_end
) {
@@ -10271,6 +10344,7 @@ int spider_mysql_handler::show_table_status(
ulonglong auto_increment_value = 0;
DBUG_ENTER("spider_mysql_handler::show_table_status");
DBUG_PRINT("info",("spider sts_mode=%d", sts_mode));
+
if (sts_mode == 1)
{
pthread_mutex_lock(&conn->mta_conn_mutex);
@@ -10278,6 +10352,7 @@ int spider_mysql_handler::show_table_status(
conn->need_mon = &spider->need_mons[link_idx];
conn->mta_conn_mutex_lock_already = TRUE;
conn->mta_conn_mutex_unlock_later = TRUE;
+ conn->disable_connect_retry = TRUE;
spider_conn_set_timeout_from_share(conn, link_idx, spider->trx->thd,
share);
if (
@@ -10299,6 +10374,7 @@ int spider_mysql_handler::show_table_status(
/* retry */
if ((error_num = spider_db_ping(spider, conn, link_idx)))
{
+ conn->disable_connect_retry = FALSE;
conn->mta_conn_mutex_lock_already = FALSE;
conn->mta_conn_mutex_unlock_later = FALSE;
SPIDER_CLEAR_FILE_POS(&conn->mta_conn_mutex_file_pos);
@@ -10307,6 +10383,7 @@ int spider_mysql_handler::show_table_status(
}
if ((error_num = spider_db_set_names(spider, conn, link_idx)))
{
+ conn->disable_connect_retry = FALSE;
conn->mta_conn_mutex_lock_already = FALSE;
conn->mta_conn_mutex_unlock_later = FALSE;
SPIDER_CLEAR_FILE_POS(&conn->mta_conn_mutex_file_pos);
@@ -10322,11 +10399,13 @@ int spider_mysql_handler::show_table_status(
-1,
&spider->need_mons[link_idx])
) {
+ conn->disable_connect_retry = FALSE;
conn->mta_conn_mutex_lock_already = FALSE;
conn->mta_conn_mutex_unlock_later = FALSE;
DBUG_RETURN(spider_db_errorno(conn));
}
} else {
+ conn->disable_connect_retry = FALSE;
conn->mta_conn_mutex_lock_already = FALSE;
conn->mta_conn_mutex_unlock_later = FALSE;
SPIDER_CLEAR_FILE_POS(&conn->mta_conn_mutex_file_pos);
@@ -10342,6 +10421,7 @@ int spider_mysql_handler::show_table_status(
request_key.next = NULL;
if (spider_param_dry_access())
{
+ conn->disable_connect_retry = FALSE;
conn->mta_conn_mutex_lock_already = FALSE;
conn->mta_conn_mutex_unlock_later = FALSE;
SPIDER_CLEAR_FILE_POS(&conn->mta_conn_mutex_file_pos);
@@ -10350,11 +10430,13 @@ int spider_mysql_handler::show_table_status(
}
if (!(res = conn->db_conn->store_result(NULL, &request_key, &error_num)))
{
+ conn->disable_connect_retry = FALSE;
conn->mta_conn_mutex_lock_already = FALSE;
conn->mta_conn_mutex_unlock_later = FALSE;
if (error_num || (error_num = spider_db_errorno(conn)))
DBUG_RETURN(error_num);
- else {
+ else
+ {
my_printf_error(ER_SPIDER_REMOTE_TABLE_NOT_FOUND_NUM,
ER_SPIDER_REMOTE_TABLE_NOT_FOUND_STR, MYF(0),
mysql_share->db_names_str[spider->conn_link_idx[link_idx]].ptr(),
@@ -10363,6 +10445,7 @@ int spider_mysql_handler::show_table_status(
DBUG_RETURN(ER_SPIDER_REMOTE_TABLE_NOT_FOUND_NUM);
}
}
+ conn->disable_connect_retry = FALSE;
conn->mta_conn_mutex_lock_already = FALSE;
conn->mta_conn_mutex_unlock_later = FALSE;
SPIDER_CLEAR_FILE_POS(&conn->mta_conn_mutex_file_pos);
@@ -10410,6 +10493,7 @@ int spider_mysql_handler::show_table_status(
conn->need_mon = &spider->need_mons[link_idx];
conn->mta_conn_mutex_lock_already = TRUE;
conn->mta_conn_mutex_unlock_later = TRUE;
+ conn->disable_connect_retry = TRUE;
spider_conn_set_timeout_from_share(conn, link_idx, spider->trx->thd,
share);
if (
@@ -10431,6 +10515,7 @@ int spider_mysql_handler::show_table_status(
/* retry */
if ((error_num = spider_db_ping(spider, conn, link_idx)))
{
+ conn->disable_connect_retry = FALSE;
conn->mta_conn_mutex_lock_already = FALSE;
conn->mta_conn_mutex_unlock_later = FALSE;
SPIDER_CLEAR_FILE_POS(&conn->mta_conn_mutex_file_pos);
@@ -10439,6 +10524,7 @@ int spider_mysql_handler::show_table_status(
}
if ((error_num = spider_db_set_names(spider, conn, link_idx)))
{
+ conn->disable_connect_retry = FALSE;
conn->mta_conn_mutex_lock_already = FALSE;
conn->mta_conn_mutex_unlock_later = FALSE;
SPIDER_CLEAR_FILE_POS(&conn->mta_conn_mutex_file_pos);
@@ -10454,11 +10540,13 @@ int spider_mysql_handler::show_table_status(
-1,
&spider->need_mons[link_idx])
) {
+ conn->disable_connect_retry = FALSE;
conn->mta_conn_mutex_lock_already = FALSE;
conn->mta_conn_mutex_unlock_later = FALSE;
DBUG_RETURN(spider_db_errorno(conn));
}
} else {
+ conn->disable_connect_retry = FALSE;
conn->mta_conn_mutex_lock_already = FALSE;
conn->mta_conn_mutex_unlock_later = FALSE;
SPIDER_CLEAR_FILE_POS(&conn->mta_conn_mutex_file_pos);
@@ -10474,6 +10562,7 @@ int spider_mysql_handler::show_table_status(
request_key.next = NULL;
if (spider_param_dry_access())
{
+ conn->disable_connect_retry = FALSE;
conn->mta_conn_mutex_lock_already = FALSE;
conn->mta_conn_mutex_unlock_later = FALSE;
SPIDER_CLEAR_FILE_POS(&conn->mta_conn_mutex_file_pos);
@@ -10482,6 +10571,7 @@ int spider_mysql_handler::show_table_status(
}
if (!(res = conn->db_conn->store_result(NULL, &request_key, &error_num)))
{
+ conn->disable_connect_retry = FALSE;
conn->mta_conn_mutex_lock_already = FALSE;
conn->mta_conn_mutex_unlock_later = FALSE;
if (error_num || (error_num = spider_db_errorno(conn)))
@@ -10489,6 +10579,7 @@ int spider_mysql_handler::show_table_status(
else
DBUG_RETURN(ER_QUERY_ON_FOREIGN_DATA_SOURCE);
}
+ conn->disable_connect_retry = FALSE;
conn->mta_conn_mutex_lock_already = FALSE;
conn->mta_conn_mutex_unlock_later = FALSE;
SPIDER_CLEAR_FILE_POS(&conn->mta_conn_mutex_file_pos);
@@ -10545,6 +10636,7 @@ int spider_mysql_handler::show_table_status(
DBUG_PRINT("info",("spider auto_increment_value=%llu",
share->lgtm_tblhnd_share->auto_increment_value));
}
+
DBUG_RETURN(0);
}
diff --git a/storage/spider/spd_db_mysql.h b/storage/spider/spd_db_mysql.h
index 0bbfd8e8221..ecd2e8d6b3d 100644
--- a/storage/spider/spd_db_mysql.h
+++ b/storage/spider/spd_db_mysql.h
@@ -1128,6 +1128,9 @@ public:
ulong sql_type,
int link_idx
);
+ bool is_sole_projection_field(
+ uint16 field_index
+ );
bool is_bulk_insert_exec_period(
bool bulk_end
);
diff --git a/storage/spider/spd_db_oracle.cc b/storage/spider/spd_db_oracle.cc
index 4622f3254a1..281e3188067 100644
--- a/storage/spider/spd_db_oracle.cc
+++ b/storage/spider/spd_db_oracle.cc
@@ -9575,6 +9575,65 @@ int spider_oracle_handler::append_explain_select(
DBUG_RETURN(0);
}
+/********************************************************************
+ * Determine whether the current query's projection list
+ * consists solely of the specified column.
+ *
+ * Params IN - field_index:
+ * Field index of the column of interest within
+ * its table.
+ *
+ * Returns TRUE - if the query's projection list consists
+ * solely of the specified column.
+ * FALSE - otherwise.
+ ********************************************************************/
+bool spider_oracle_handler::is_sole_projection_field( uint16 field_index )
+{
+ // Determine whether the projection list consists solely of the field of interest
+ bool is_field_in_projection_list = FALSE;
+ TABLE* table = spider->get_table();
+ uint16 projection_field_count = 0;
+ uint16 projection_field_index;
+ Field** field;
+ DBUG_ENTER( "spider_oracle_handler::is_sole_projection_field" );
+
+ for ( field = table->field; *field; field++ )
+ {
+ projection_field_index = ( *field )->field_index;
+
+ if ( !( minimum_select_bit_is_set( projection_field_index ) ) )
+ {
+ // Current field is not in the projection list
+ continue;
+ }
+
+ projection_field_count++;
+
+ if ( !is_field_in_projection_list )
+ {
+ if (field_index == projection_field_index)
+ {
+ // Field of interest is in the projection list
+ is_field_in_projection_list = TRUE;
+ }
+ }
+
+ if ( is_field_in_projection_list && ( projection_field_count != 1 ) )
+ {
+ // Field of interest is not the sole column in the projection list
+ DBUG_RETURN( FALSE );
+ }
+ }
+
+ if ( is_field_in_projection_list && ( projection_field_count == 1 ) )
+ {
+ // Field of interest is the only column in the projection list
+ DBUG_RETURN( TRUE );
+ }
+
+ DBUG_RETURN( FALSE );
+}
+
bool spider_oracle_handler::is_bulk_insert_exec_period(
bool bulk_end
) {
diff --git a/storage/spider/spd_db_oracle.h b/storage/spider/spd_db_oracle.h
index ffd12db4e1e..f10a8cf8839 100644
--- a/storage/spider/spd_db_oracle.h
+++ b/storage/spider/spd_db_oracle.h
@@ -1208,6 +1208,9 @@ public:
ulong sql_type,
int link_idx
);
+ bool is_sole_projection_field(
+ uint16 field_index
+ );
bool is_bulk_insert_exec_period(
bool bulk_end
);
diff --git a/storage/spider/spd_direct_sql.cc b/storage/spider/spd_direct_sql.cc
index 267ac36ed13..3b6d167d14b 100644
--- a/storage/spider/spd_direct_sql.cc
+++ b/storage/spider/spd_direct_sql.cc
@@ -916,13 +916,12 @@ error:
if (!direct_sql->param_name) \
{ \
if ((direct_sql->param_name = spider_get_string_between_quote( \
- start_ptr, TRUE))) \
+ start_ptr, TRUE, &param_string_parse))) \
direct_sql->SPIDER_PARAM_STR_LEN(param_name) = \
strlen(direct_sql->param_name); \
- else { \
- error_num = ER_SPIDER_INVALID_CONNECT_INFO_NUM; \
- my_printf_error(error_num, ER_SPIDER_INVALID_CONNECT_INFO_STR, \
- MYF(0), tmp_ptr); \
+ else \
+ { \
+ error_num = param_string_parse.print_param_error(); \
goto error; \
} \
DBUG_PRINT("info",("spider " title_name "=%s", direct_sql->param_name)); \
@@ -942,9 +941,7 @@ error:
{ \
if (hint_num < 0 || hint_num >= max_size) \
{ \
- error_num = ER_SPIDER_INVALID_CONNECT_INFO_NUM; \
- my_printf_error(error_num, ER_SPIDER_INVALID_CONNECT_INFO_STR, \
- MYF(0), tmp_ptr); \
+ error_num = param_string_parse.print_param_error(); \
goto error; \
} else if (direct_sql->param_name[hint_num] != -1) \
break; \
@@ -957,17 +954,13 @@ error:
else if (direct_sql->param_name[hint_num] > max_val) \
direct_sql->param_name[hint_num] = max_val; \
} else { \
- error_num = ER_SPIDER_INVALID_CONNECT_INFO_NUM; \
- my_printf_error(error_num, ER_SPIDER_INVALID_CONNECT_INFO_STR, \
- MYF(0), tmp_ptr); \
+ error_num = param_string_parse.print_param_error(); \
goto error; \
} \
DBUG_PRINT("info",("spider " title_name "[%d]=%d", hint_num, \
direct_sql->param_name[hint_num])); \
} else { \
- error_num = ER_SPIDER_INVALID_CONNECT_INFO_NUM; \
- my_printf_error(error_num, ER_SPIDER_INVALID_CONNECT_INFO_STR, \
- MYF(0), tmp_ptr); \
+ error_num = param_string_parse.print_param_error(); \
goto error; \
} \
break; \
@@ -986,10 +979,11 @@ error:
direct_sql->param_name = min_val; \
else if (direct_sql->param_name > max_val) \
direct_sql->param_name = max_val; \
+ param_string_parse.set_param_value(tmp_ptr2, \
+ tmp_ptr2 + \
+ strlen(tmp_ptr2) + 1); \
} else { \
- error_num = ER_SPIDER_INVALID_CONNECT_INFO_NUM; \
- my_printf_error(error_num, ER_SPIDER_INVALID_CONNECT_INFO_STR, \
- MYF(0), tmp_ptr); \
+ error_num = param_string_parse.print_param_error(); \
goto error; \
} \
DBUG_PRINT("info",("spider " title_name "=%d", \
@@ -1009,10 +1003,11 @@ error:
direct_sql->param_name = atoi(tmp_ptr2); \
if (direct_sql->param_name < min_val) \
direct_sql->param_name = min_val; \
+ param_string_parse.set_param_value(tmp_ptr2, \
+ tmp_ptr2 + \
+ strlen(tmp_ptr2) + 1); \
} else { \
- error_num = ER_SPIDER_INVALID_CONNECT_INFO_NUM; \
- my_printf_error(error_num, ER_SPIDER_INVALID_CONNECT_INFO_STR, \
- MYF(0), tmp_ptr); \
+ error_num = param_string_parse.print_param_error(); \
goto error; \
} \
DBUG_PRINT("info",("spider " title_name "=%d", direct_sql->param_name)); \
@@ -1032,10 +1027,11 @@ error:
my_strtoll10(tmp_ptr2, (char**) NULL, &error_num); \
if (direct_sql->param_name < min_val) \
direct_sql->param_name = min_val; \
+ param_string_parse.set_param_value(tmp_ptr2, \
+ tmp_ptr2 + \
+ strlen(tmp_ptr2) + 1); \
} else { \
- error_num = ER_SPIDER_INVALID_CONNECT_INFO_NUM; \
- my_printf_error(error_num, ER_SPIDER_INVALID_CONNECT_INFO_STR, \
- MYF(0), tmp_ptr); \
+ error_num = param_string_parse.print_param_error(); \
goto error; \
} \
DBUG_PRINT("info",("spider " title_name "=%lld", \
@@ -1055,6 +1051,7 @@ int spider_udf_parse_direct_sql_param(
char *sprit_ptr[2];
char *tmp_ptr, *tmp_ptr2, *start_ptr;
int title_length;
+ SPIDER_PARAM_STRING_PARSE param_string_parse;
DBUG_ENTER("spider_udf_parse_direct_sql_param");
direct_sql->tgt_port = -1;
direct_sql->tgt_ssl_vsc = -1;
@@ -1091,6 +1088,7 @@ int spider_udf_parse_direct_sql_param(
DBUG_PRINT("info",("spider param_string=%s", param_string));
sprit_ptr[0] = param_string;
+ param_string_parse.init(param_string, ER_SPIDER_INVALID_UDF_PARAM_NUM);
while (sprit_ptr[0])
{
if ((sprit_ptr[1] = strchr(sprit_ptr[0], ',')))
@@ -1117,10 +1115,14 @@ int spider_udf_parse_direct_sql_param(
title_length++;
start_ptr++;
}
+ param_string_parse.set_param_title(tmp_ptr, tmp_ptr + title_length);
switch (title_length)
{
case 0:
+ error_num = param_string_parse.print_param_error();
+ if (error_num)
+ goto error;
continue;
case 3:
#if defined(HS_HAS_SQLCOM) && defined(HAVE_HANDLERSOCKET)
@@ -1146,120 +1148,92 @@ int spider_udf_parse_direct_sql_param(
SPIDER_PARAM_INT_WITH_MAX("urt", use_real_table, 0, 1);
#endif
SPIDER_PARAM_INT("wto", net_write_timeout, 0);
- error_num = ER_SPIDER_INVALID_UDF_PARAM_NUM;
- my_printf_error(error_num, ER_SPIDER_INVALID_UDF_PARAM_STR,
- MYF(0), tmp_ptr);
+ error_num = param_string_parse.print_param_error();
goto error;
case 4:
SPIDER_PARAM_INT_WITH_MAX("erwm", error_rw_mode, 0, 1);
SPIDER_PARAM_STR("host", tgt_host);
SPIDER_PARAM_INT_WITH_MAX("port", tgt_port, 0, 65535);
SPIDER_PARAM_STR("user", tgt_username);
- error_num = ER_SPIDER_INVALID_UDF_PARAM_NUM;
- my_printf_error(error_num, ER_SPIDER_INVALID_UDF_PARAM_STR,
- MYF(0), tmp_ptr);
+ error_num = param_string_parse.print_param_error();
goto error;
case 6:
SPIDER_PARAM_STR("server", server_name);
SPIDER_PARAM_STR("socket", tgt_socket);
SPIDER_PARAM_HINT_WITH_MAX("iop", iop, 3, direct_sql->table_count, 0, 2);
SPIDER_PARAM_STR("ssl_ca", tgt_ssl_ca);
- error_num = ER_SPIDER_INVALID_UDF_PARAM_NUM;
- my_printf_error(error_num, ER_SPIDER_INVALID_UDF_PARAM_STR,
- MYF(0), tmp_ptr);
+ error_num = param_string_parse.print_param_error();
goto error;
case 7:
SPIDER_PARAM_STR("wrapper", tgt_wrapper);
SPIDER_PARAM_STR("ssl_key", tgt_ssl_key);
- error_num = ER_SPIDER_INVALID_UDF_PARAM_NUM;
- my_printf_error(error_num, ER_SPIDER_INVALID_UDF_PARAM_STR,
- MYF(0), tmp_ptr);
+ error_num = param_string_parse.print_param_error();
goto error;
case 8:
SPIDER_PARAM_STR("database", tgt_default_db_name);
SPIDER_PARAM_STR("password", tgt_password);
SPIDER_PARAM_LONGLONG("priority", priority, 0);
SPIDER_PARAM_STR("ssl_cert", tgt_ssl_cert);
- error_num = ER_SPIDER_INVALID_UDF_PARAM_NUM;
- my_printf_error(error_num, ER_SPIDER_INVALID_UDF_PARAM_STR,
- MYF(0), tmp_ptr);
+ error_num = param_string_parse.print_param_error();
goto error;
case 10:
SPIDER_PARAM_STR("ssl_cipher", tgt_ssl_cipher);
SPIDER_PARAM_STR("ssl_capath", tgt_ssl_capath);
- error_num = ER_SPIDER_INVALID_UDF_PARAM_NUM;
- my_printf_error(error_num, ER_SPIDER_INVALID_UDF_PARAM_STR,
- MYF(0), tmp_ptr);
+ error_num = param_string_parse.print_param_error();
goto error;
case 11:
#if defined(HS_HAS_SQLCOM) && defined(HAVE_HANDLERSOCKET)
SPIDER_PARAM_INT_WITH_MAX("access_mode", access_mode, 0, 2);
#endif
- error_num = ER_SPIDER_INVALID_UDF_PARAM_NUM;
- my_printf_error(error_num, ER_SPIDER_INVALID_UDF_PARAM_STR,
- MYF(0), tmp_ptr);
+ error_num = param_string_parse.print_param_error();
goto error;
case 12:
SPIDER_PARAM_STR("default_file", tgt_default_file);
- error_num = ER_SPIDER_INVALID_UDF_PARAM_NUM;
- my_printf_error(error_num, ER_SPIDER_INVALID_UDF_PARAM_STR,
- MYF(0), tmp_ptr);
+ error_num = param_string_parse.print_param_error();
goto error;
case 13:
SPIDER_PARAM_STR("default_group", tgt_default_group);
SPIDER_PARAM_INT_WITH_MAX("error_rw_mode", error_rw_mode, 0, 1);
- error_num = ER_SPIDER_INVALID_UDF_PARAM_NUM;
- my_printf_error(error_num, ER_SPIDER_INVALID_UDF_PARAM_STR,
- MYF(0), tmp_ptr);
+ error_num = param_string_parse.print_param_error();
goto error;
case 14:
#if MYSQL_VERSION_ID < 50500
#else
SPIDER_PARAM_INT_WITH_MAX("use_real_table", use_real_table, 0, 1);
#endif
- error_num = ER_SPIDER_INVALID_UDF_PARAM_NUM;
- my_printf_error(error_num, ER_SPIDER_INVALID_UDF_PARAM_STR,
- MYF(0), tmp_ptr);
+ error_num = param_string_parse.print_param_error();
goto error;
case 15:
SPIDER_PARAM_INT_WITH_MAX("table_loop_mode", table_loop_mode, 0, 2);
SPIDER_PARAM_INT("connect_timeout", connect_timeout, 0);
- error_num = ER_SPIDER_INVALID_UDF_PARAM_NUM;
- my_printf_error(error_num, ER_SPIDER_INVALID_UDF_PARAM_STR,
- MYF(0), tmp_ptr);
+ error_num = param_string_parse.print_param_error();
goto error;
case 16:
SPIDER_PARAM_LONGLONG("bulk_insert_rows", bulk_insert_rows, 1);
SPIDER_PARAM_INT("net_read_timeout", net_read_timeout, 0);
- error_num = ER_SPIDER_INVALID_UDF_PARAM_NUM;
- my_printf_error(error_num, ER_SPIDER_INVALID_UDF_PARAM_STR,
- MYF(0), tmp_ptr);
+ error_num = param_string_parse.print_param_error();
goto error;
case 17:
SPIDER_PARAM_INT("net_write_timeout", net_write_timeout, 0);
- error_num = ER_SPIDER_INVALID_UDF_PARAM_NUM;
- my_printf_error(error_num, ER_SPIDER_INVALID_UDF_PARAM_STR,
- MYF(0), tmp_ptr);
+ error_num = param_string_parse.print_param_error();
goto error;
case 18:
SPIDER_PARAM_INT_WITH_MAX(
"connection_channel", connection_channel, 0, 63);
- error_num = ER_SPIDER_INVALID_UDF_PARAM_NUM;
- my_printf_error(error_num, ER_SPIDER_INVALID_UDF_PARAM_STR,
- MYF(0), tmp_ptr);
+ error_num = param_string_parse.print_param_error();
goto error;
case 22:
SPIDER_PARAM_INT_WITH_MAX("ssl_verify_server_cert", tgt_ssl_vsc, 0, 1);
- error_num = ER_SPIDER_INVALID_UDF_PARAM_NUM;
- my_printf_error(error_num, ER_SPIDER_INVALID_UDF_PARAM_STR,
- MYF(0), tmp_ptr);
+ error_num = param_string_parse.print_param_error();
goto error;
default:
- error_num = ER_SPIDER_INVALID_UDF_PARAM_NUM;
- my_printf_error(error_num, ER_SPIDER_INVALID_UDF_PARAM_STR,
- MYF(0), tmp_ptr);
+ error_num = param_string_parse.print_param_error();
goto error;
}
+
+ /* Verify that the remainder of the parameter value is whitespace */
+ if ((error_num = param_string_parse.has_extra_parameter_values()))
+ goto error;
}
set_default:
@@ -1656,7 +1630,7 @@ long long spider_direct_sql_body(
table_list.table_name = direct_sql->table_names[roop_count];
#endif
if (!(direct_sql->tables[roop_count] =
- find_temporary_table(thd, &table_list)))
+ thd->find_temporary_table(&table_list)))
{
#if MYSQL_VERSION_ID < 50500
#else
diff --git a/storage/spider/spd_include.h b/storage/spider/spd_include.h
index 472bcea298e..a6ce2efe774 100644
--- a/storage/spider/spd_include.h
+++ b/storage/spider/spd_include.h
@@ -449,6 +449,9 @@ typedef struct st_spider_conn
st_spider_conn *bulk_access_next;
#endif
+ bool disable_connect_retry; /* TRUE if it is unnecessary to
+ retry to connect after a
+ connection error */
bool connect_error_with_message;
char connect_error_msg[MYSQL_ERRMSG_SIZE];
int connect_error;
diff --git a/storage/spider/spd_param.cc b/storage/spider/spd_param.cc
index decdd9bd153..c4aaef3a404 100644
--- a/storage/spider/spd_param.cc
+++ b/storage/spider/spd_param.cc
@@ -929,19 +929,23 @@ bool spider_param_use_default_database(
}
/*
- FALSE: sql_log_off = 0
- TRUE: sql_log_off = 1
- */
-static MYSQL_THDVAR_BOOL(
- internal_sql_log_off, /* name */
- PLUGIN_VAR_OPCMDARG, /* opt */
- "Sync sql_log_off", /* comment */
- NULL, /* check */
- NULL, /* update */
- TRUE /* def */
-);
-
-bool spider_param_internal_sql_log_off(
+-1 :don't know or does not matter; don't send 'SET SQL_LOG_OFF' statement
+ 0 :do send 'SET SQL_LOG_OFF 0' statement to data nodes
+ 1 :do send 'SET SQL_LOG_OFF 1' statement to data nodes
+*/
+static MYSQL_THDVAR_INT(
+ internal_sql_log_off, /* name */
+ PLUGIN_VAR_RQCMDARG, /* opt */
+ "Manage SQL_LOG_OFF mode statement to the data nodes", /* comment */
+ NULL, /* check */
+ NULL, /* update */
+ -1, /* default */
+ -1, /* min */
+ 1, /* max */
+ 0 /* blk */
+);
+
+int spider_param_internal_sql_log_off(
THD *thd
) {
DBUG_ENTER("spider_param_internal_sql_log_off");
@@ -2182,15 +2186,15 @@ char *spider_param_remote_time_zone()
static int spider_remote_sql_log_off;
/*
- -1 :don't set
- 0 :sql_log_off = 0
- 1 :sql_log_off = 1
+ -1 :don't know the value on all data nodes, or does not matter
+ 0 :sql_log_off = 0 on all data nodes
+ 1 :sql_log_off = 1 on all data nodes
*/
static MYSQL_SYSVAR_INT(
remote_sql_log_off,
spider_remote_sql_log_off,
PLUGIN_VAR_RQCMDARG,
- "Set sql_log_off mode at connecting for improvement performance of connection if you know",
+ "Set SQL_LOG_OFF mode on connecting for improved performance of connection, if you know",
NULL,
NULL,
-1,
diff --git a/storage/spider/spd_param.h b/storage/spider/spd_param.h
index cc823858400..53ea03f9f7f 100644
--- a/storage/spider/spd_param.h
+++ b/storage/spider/spd_param.h
@@ -110,7 +110,7 @@ bool spider_param_sync_time_zone(
bool spider_param_use_default_database(
THD *thd
);
-bool spider_param_internal_sql_log_off(
+int spider_param_internal_sql_log_off(
THD *thd
);
int spider_param_bulk_size(
diff --git a/storage/spider/spd_sys_table.cc b/storage/spider/spd_sys_table.cc
index a118a7e416b..8924629b48d 100644
--- a/storage/spider/spd_sys_table.cc
+++ b/storage/spider/spd_sys_table.cc
@@ -35,6 +35,86 @@
extern handlerton *spider_hton_ptr;
+/**
+ Insert a Spider system table row.
+
+ @param table The spider system table.
+ @param do_handle_error TRUE if an error message should be printed
+ before returning.
+
+ @return Error code returned by the write.
+*/
+
+inline int spider_write_sys_table_row(TABLE *table, bool do_handle_error = TRUE)
+{
+ int error_num;
+ THD *thd = table->in_use;
+
+ tmp_disable_binlog(thd); /* Do not replicate the low-level changes. */
+ error_num = table->file->ha_write_row(table->record[0]);
+ reenable_binlog(thd);
+
+ if (error_num && do_handle_error)
+ table->file->print_error(error_num, MYF(0));
+
+ return error_num;
+}
+
+/**
+ Update a Spider system table row.
+
+ @param table The spider system table.
+
+ @return Error code returned by the update.
+*/
+
+inline int spider_update_sys_table_row(TABLE *table)
+{
+ int error_num;
+ THD *thd = table->in_use;
+
+ tmp_disable_binlog(thd); /* Do not replicate the low-level changes. */
+ error_num = table->file->ha_update_row(table->record[1], table->record[0]);
+ reenable_binlog(thd);
+
+ if (error_num)
+ {
+ if (error_num == HA_ERR_RECORD_IS_THE_SAME)
+ error_num = 0;
+ else
+ table->file->print_error(error_num, MYF(0));
+ }
+
+ return error_num;
+}
+
+/**
+ Delete a Spider system table row.
+
+ @param table The spider system table.
+ @param record_number Location of the record: 0 or 1.
+ @param do_handle_error TRUE if an error message should be printed
+ before returning.
+
+ @return Error code returned by the update.
+*/
+
+inline int spider_delete_sys_table_row(TABLE *table, int record_number = 0,
+ bool do_handle_error = TRUE)
+{
+ int error_num;
+ THD *thd = table->in_use;
+
+ tmp_disable_binlog(thd); /* Do not replicate the low-level changes. */
+ error_num = table->file->ha_delete_row(table->record[record_number]);
+ reenable_binlog(thd);
+
+ if (error_num && do_handle_error)
+ table->file->print_error(error_num, MYF(0));
+
+ return error_num;
+}
+
#if MYSQL_VERSION_ID < 50500
TABLE *spider_open_sys_table(
THD *thd,
@@ -227,7 +307,8 @@ void spider_close_sys_table(
close_performance_schema_table(thd, open_tables_backup);
} else {
table->file->ha_reset();
- closefrm(table, TRUE);
+ closefrm(table);
+ tdc_release_share(table->s);
spider_free(spider_current_trx, table, MYF(0));
thd->restore_backup_open_tables_state(open_tables_backup);
}
@@ -676,13 +757,13 @@ void spider_store_tables_name(
}
table->field[0]->store(
ptr_db,
- ptr_diff_table - 1,
+ (uint)(ptr_diff_table - 1),
system_charset_info);
DBUG_PRINT("info",("spider field[0]->null_bit = %d",
table->field[0]->null_bit));
table->field[1]->store(
ptr_table,
- name_length - ptr_diff_db - ptr_diff_table,
+ (uint)(name_length - ptr_diff_db - ptr_diff_table),
system_charset_info);
DBUG_PRINT("info",("spider field[1]->null_bit = %d",
table->field[1]->null_bit));
@@ -982,11 +1063,8 @@ int spider_insert_xa(
table->use_all_columns();
spider_store_xa_bqual_length(table, xid);
spider_store_xa_status(table, status);
- if ((error_num = table->file->ha_write_row(table->record[0])))
- {
- table->file->print_error(error_num, MYF(0));
+ if ((error_num = spider_write_sys_table_row(table)))
DBUG_RETURN(error_num);
- }
} else {
my_message(ER_SPIDER_XA_EXISTS_NUM, ER_SPIDER_XA_EXISTS_STR, MYF(0));
DBUG_RETURN(ER_SPIDER_XA_EXISTS_NUM);
@@ -1016,11 +1094,8 @@ int spider_insert_xa_member(
}
table->use_all_columns();
spider_store_xa_member_info(table, xid, conn);
- if ((error_num = table->file->ha_write_row(table->record[0])))
- {
- table->file->print_error(error_num, MYF(0));
+ if ((error_num = spider_write_sys_table_row(table)))
DBUG_RETURN(error_num);
- }
} else {
my_message(ER_SPIDER_XA_MEMBER_EXISTS_NUM, ER_SPIDER_XA_MEMBER_EXISTS_STR,
MYF(0));
@@ -1050,11 +1125,8 @@ int spider_insert_tables(
SPIDER_LINK_STATUS_NO_CHANGE ?
share->alter_table.tmp_link_statuses[roop_count] :
SPIDER_LINK_STATUS_OK);
- if ((error_num = table->file->ha_write_row(table->record[0])))
- {
- table->file->print_error(error_num, MYF(0));
+ if ((error_num = spider_write_sys_table_row(table)))
DBUG_RETURN(error_num);
- }
}
DBUG_RETURN(0);
@@ -1076,11 +1148,8 @@ int spider_log_tables_link_failed(
if (table->field[3] == table->timestamp_field)
table->timestamp_field->set_time();
#endif
- if ((error_num = table->file->ha_write_row(table->record[0])))
- {
- table->file->print_error(error_num, MYF(0));
+ if ((error_num = spider_write_sys_table_row(table)))
DBUG_RETURN(error_num);
- }
DBUG_RETURN(0);
}
@@ -1114,11 +1183,8 @@ int spider_log_xa_failed(
if (table->field[20] == table->timestamp_field)
table->timestamp_field->set_time();
#endif
- if ((error_num = table->file->ha_write_row(table->record[0])))
- {
- table->file->print_error(error_num, MYF(0));
+ if ((error_num = spider_write_sys_table_row(table)))
DBUG_RETURN(error_num);
- }
DBUG_RETURN(0);
}
@@ -1147,14 +1213,8 @@ int spider_update_xa(
store_record(table, record[1]);
table->use_all_columns();
spider_store_xa_status(table, status);
- if (
- (error_num = table->file->ha_update_row(
- table->record[1], table->record[0])) &&
- error_num != HA_ERR_RECORD_IS_THE_SAME
- ) {
- table->file->print_error(error_num, MYF(0));
+ if ((error_num = spider_update_sys_table_row(table)))
DBUG_RETURN(error_num);
- }
}
DBUG_RETURN(0);
@@ -1187,14 +1247,8 @@ int spider_update_tables_name(
store_record(table, record[1]);
table->use_all_columns();
spider_store_tables_name(table, to, strlen(to));
- if (
- (error_num = table->file->ha_update_row(
- table->record[1], table->record[0])) &&
- error_num != HA_ERR_RECORD_IS_THE_SAME
- ) {
- table->file->print_error(error_num, MYF(0));
+ if ((error_num = spider_update_sys_table_row(table)))
DBUG_RETURN(error_num);
- }
}
roop_count++;
}
@@ -1238,11 +1292,8 @@ int spider_update_tables_priority(
SPIDER_LINK_STATUS_NO_CHANGE ?
alter_table->tmp_link_statuses[roop_count] :
SPIDER_LINK_STATUS_OK);
- if ((error_num = table->file->ha_write_row(table->record[0])))
- {
- table->file->print_error(error_num, MYF(0));
+ if ((error_num = spider_write_sys_table_row(table)))
DBUG_RETURN(error_num);
- }
roop_count++;
} while (roop_count < (int) alter_table->all_link_count);
DBUG_RETURN(0);
@@ -1258,14 +1309,8 @@ int spider_update_tables_priority(
spider_store_tables_connect_info(table, alter_table, roop_count);
spider_store_tables_link_status(table,
alter_table->tmp_link_statuses[roop_count]);
- if (
- (error_num = table->file->ha_update_row(
- table->record[1], table->record[0])) &&
- error_num != HA_ERR_RECORD_IS_THE_SAME
- ) {
- table->file->print_error(error_num, MYF(0));
+ if ((error_num = spider_update_sys_table_row(table)))
DBUG_RETURN(error_num);
- }
}
}
while (TRUE)
@@ -1283,11 +1328,8 @@ int spider_update_tables_priority(
table->file->print_error(error_num, MYF(0));
DBUG_RETURN(error_num);
}
- if ((error_num = table->file->ha_delete_row(table->record[0])))
- {
- table->file->print_error(error_num, MYF(0));
+ if ((error_num = spider_delete_sys_table_row(table)))
DBUG_RETURN(error_num);
- }
}
roop_count++;
}
@@ -1323,14 +1365,8 @@ int spider_update_tables_link_status(
store_record(table, record[1]);
table->use_all_columns();
spider_store_tables_link_status(table, link_status);
- if (
- (error_num = table->file->ha_update_row(
- table->record[1], table->record[0])) &&
- error_num != HA_ERR_RECORD_IS_THE_SAME
- ) {
- table->file->print_error(error_num, MYF(0));
+ if ((error_num = spider_update_sys_table_row(table)))
DBUG_RETURN(error_num);
- }
}
DBUG_RETURN(0);
@@ -1357,11 +1393,8 @@ int spider_delete_xa(
MYF(0));
DBUG_RETURN(ER_SPIDER_XA_NOT_EXISTS_NUM);
} else {
- if ((error_num = table->file->ha_delete_row(table->record[0])))
- {
- table->file->print_error(error_num, MYF(0));
+ if ((error_num = spider_delete_sys_table_row(table)))
DBUG_RETURN(error_num);
- }
}
DBUG_RETURN(0);
@@ -1388,7 +1421,7 @@ int spider_delete_xa_member(
DBUG_RETURN(0);
} else {
do {
- if ((error_num = table->file->ha_delete_row(table->record[0])))
+ if ((error_num = spider_delete_sys_table_row(table, 0, FALSE)))
{
spider_sys_index_end(table);
table->file->print_error(error_num, MYF(0));
@@ -1423,11 +1456,8 @@ int spider_delete_tables(
if ((error_num = spider_check_sys_table(table, table_key)))
break;
else {
- if ((error_num = table->file->ha_delete_row(table->record[0])))
- {
- table->file->print_error(error_num, MYF(0));
+ if ((error_num = spider_delete_sys_table_row(table)))
DBUG_RETURN(error_num);
- }
}
roop_count++;
}
@@ -2304,7 +2334,7 @@ int spider_sys_replace(
char table_key[MAX_KEY_LENGTH];
DBUG_ENTER("spider_sys_replace");
- while ((error_num = table->file->ha_write_row(table->record[0])))
+ while ((error_num = spider_write_sys_table_row(table, FALSE)))
{
if (
table->file->is_fatal_error(error_num, HA_CHECK_DUP) ||
@@ -2356,13 +2386,11 @@ int spider_sys_replace(
last_uniq_key &&
!table->file->referenced_by_foreign_key()
) {
- error_num = table->file->ha_update_row(table->record[1],
- table->record[0]);
- if (error_num && error_num != HA_ERR_RECORD_IS_THE_SAME)
+ if ((error_num = spider_update_sys_table_row(table)))
goto error;
DBUG_RETURN(0);
} else {
- if ((error_num = table->file->ha_delete_row(table->record[1])))
+ if ((error_num = spider_delete_sys_table_row(table, 1, FALSE)))
goto error;
*modified_non_trans_table = TRUE;
}
diff --git a/storage/spider/spd_table.cc b/storage/spider/spd_table.cc
index 9726afdd66e..7c591cf691d 100644
--- a/storage/spider/spd_table.cc
+++ b/storage/spider/spd_table.cc
@@ -45,7 +45,7 @@ inline MYSQL_THD spider_create_thd()
{
THD *thd;
my_thread_init();
- if (!(thd = new THD()))
+ if (!(thd = new THD(next_thread_id())))
my_thread_end();
else
{
@@ -62,7 +62,6 @@ inline void spider_destroy_thd(MYSQL_THD thd)
delete thd;
}
-ulong *spd_db_att_thread_id;
#ifdef SPIDER_XID_USES_xid_cache_iterate
#else
#ifdef XID_CACHE_IS_SPLITTED
@@ -938,7 +937,8 @@ void spider_free_tmp_share_alloc(
char *spider_get_string_between_quote(
char *ptr,
- bool alloc
+ bool alloc,
+ SPIDER_PARAM_STRING_PARSE *param_string_parse
) {
char *start_ptr, *end_ptr, *tmp_ptr, *esc_ptr;
bool find_flg = FALSE, esc_flg = FALSE;
@@ -1029,6 +1029,10 @@ char *spider_get_string_between_quote(
strcpy(esc_ptr, esc_ptr + 1);
}
}
+
+ if (param_string_parse)
+ param_string_parse->set_param_value(start_ptr, start_ptr + strlen(start_ptr) + 1);
+
if (alloc)
{
DBUG_RETURN(
@@ -1046,7 +1050,8 @@ int spider_create_string_list(
uint **string_length_list,
uint *list_length,
char *str,
- uint length
+ uint length,
+ SPIDER_PARAM_STRING_PARSE *param_string_parse
) {
int roop_count;
char *tmp_ptr, *tmp_ptr2, *tmp_ptr3, *esc_ptr;
@@ -1054,6 +1059,7 @@ int spider_create_string_list(
DBUG_ENTER("spider_create_string_list");
*list_length = 0;
+ param_string_parse->init_param_value();
if (!str)
{
*string_list = NULL;
@@ -1164,6 +1170,10 @@ int spider_create_string_list(
my_error(ER_OUT_OF_RESOURCES, MYF(0), HA_ERR_OUT_OF_MEM);
DBUG_RETURN(HA_ERR_OUT_OF_MEM);
}
+
+ param_string_parse->set_param_value(tmp_ptr3,
+ tmp_ptr3 + strlen(tmp_ptr3) + 1);
+
DBUG_PRINT("info",("spider string_list[%d]=%s", roop_count,
(*string_list)[roop_count]));
@@ -1176,13 +1186,15 @@ int spider_create_long_list(
char *str,
uint length,
long min_val,
- long max_val
+ long max_val,
+ SPIDER_PARAM_STRING_PARSE *param_string_parse
) {
int roop_count;
char *tmp_ptr;
DBUG_ENTER("spider_create_long_list");
*list_length = 0;
+ param_string_parse->init_param_value();
if (!str)
{
*long_list = NULL;
@@ -1238,6 +1250,9 @@ int spider_create_long_list(
(*long_list)[roop_count] = max_val;
}
+ param_string_parse->set_param_value(tmp_ptr,
+ tmp_ptr + strlen(tmp_ptr) + 1);
+
#ifndef DBUG_OFF
for (roop_count = 0; roop_count < (int) *list_length; roop_count++)
{
@@ -1255,13 +1270,15 @@ int spider_create_longlong_list(
char *str,
uint length,
longlong min_val,
- longlong max_val
+ longlong max_val,
+ SPIDER_PARAM_STRING_PARSE *param_string_parse
) {
int error_num, roop_count;
char *tmp_ptr;
DBUG_ENTER("spider_create_longlong_list");
*list_length = 0;
+ param_string_parse->init_param_value();
if (!str)
{
*longlong_list = NULL;
@@ -1318,6 +1335,9 @@ int spider_create_longlong_list(
(*longlong_list)[roop_count] = max_val;
}
+ param_string_parse->set_param_value(tmp_ptr,
+ tmp_ptr + strlen(tmp_ptr) + 1);
+
#ifndef DBUG_OFF
for (roop_count = 0; roop_count < (int) *list_length; roop_count++)
{
@@ -1507,6 +1527,38 @@ static int spider_set_ll_value(
DBUG_RETURN(error_num);
}
+/**
+ Print a parameter string error message.
+
+ @return Error code.
+*/
+
+int st_spider_param_string_parse::print_param_error()
+{
+ if (start_title_ptr)
+ {
+ /* Restore the input delimiter characters */
+ restore_delims();
+
+ /* Print the error message */
+ switch (error_num)
+ {
+ case ER_SPIDER_INVALID_UDF_PARAM_NUM:
+ my_printf_error(error_num, ER_SPIDER_INVALID_UDF_PARAM_STR,
+ MYF(0), start_title_ptr);
+ break;
+ case ER_SPIDER_INVALID_CONNECT_INFO_NUM:
+ default:
+ my_printf_error(error_num, ER_SPIDER_INVALID_CONNECT_INFO_STR,
+ MYF(0), start_title_ptr);
+ }
+
+ return error_num;
+ }
+ else
+ return 0;
+}
+
#define SPIDER_PARAM_STR_LEN(name) name ## _length
#define SPIDER_PARAM_STR(title_name, param_name) \
if (!strncasecmp(tmp_ptr, title_name, title_length)) \
@@ -1515,12 +1567,11 @@ static int spider_set_ll_value(
if (!share->param_name) \
{ \
if ((share->param_name = spider_get_string_between_quote( \
- start_ptr, TRUE))) \
+ start_ptr, TRUE, &connect_string_parse))) \
share->SPIDER_PARAM_STR_LEN(param_name) = strlen(share->param_name); \
- else { \
- error_num = ER_SPIDER_INVALID_CONNECT_INFO_NUM; \
- my_printf_error(error_num, ER_SPIDER_INVALID_CONNECT_INFO_STR, \
- MYF(0), tmp_ptr); \
+ else \
+ { \
+ error_num = connect_string_parse.print_param_error(); \
goto error; \
} \
DBUG_PRINT("info",("spider " title_name "=%s", share->param_name)); \
@@ -1544,12 +1595,11 @@ static int spider_set_ll_value(
&share->SPIDER_PARAM_STR_LENS(param_name), \
&share->SPIDER_PARAM_STR_LEN(param_name), \
tmp_ptr2, \
- share->SPIDER_PARAM_STR_CHARLEN(param_name)))) \
+ share->SPIDER_PARAM_STR_CHARLEN(param_name), \
+ &connect_string_parse))) \
goto error; \
} else { \
- error_num = ER_SPIDER_INVALID_CONNECT_INFO_NUM; \
- my_printf_error(error_num, ER_SPIDER_INVALID_CONNECT_INFO_STR, \
- MYF(0), tmp_ptr); \
+ error_num = connect_string_parse.print_param_error(); \
goto error; \
} \
} \
@@ -1567,9 +1617,7 @@ static int spider_set_ll_value(
{ \
if (hint_num < 0 || hint_num >= max_size) \
{ \
- error_num = ER_SPIDER_INVALID_CONNECT_INFO_NUM; \
- my_printf_error(error_num, ER_SPIDER_INVALID_CONNECT_INFO_STR, \
- MYF(0), tmp_ptr); \
+ error_num = connect_string_parse.print_param_error(); \
goto error; \
} else if (share->param_name[hint_num].length() > 0) \
break; \
@@ -1580,9 +1628,7 @@ static int spider_set_ll_value(
DBUG_PRINT("info",("spider " title_name "[%d]=%s", hint_num, \
share->param_name[hint_num].ptr())); \
} else { \
- error_num = ER_SPIDER_INVALID_CONNECT_INFO_NUM; \
- my_printf_error(error_num, ER_SPIDER_INVALID_CONNECT_INFO_STR, \
- MYF(0), tmp_ptr); \
+ error_num = connect_string_parse.print_param_error(); \
goto error; \
} \
break; \
@@ -1599,9 +1645,7 @@ static int spider_set_ll_value(
{ \
if (hint_num < 0 || hint_num >= max_size) \
{ \
- error_num = ER_SPIDER_INVALID_CONNECT_INFO_NUM; \
- my_printf_error(error_num, ER_SPIDER_INVALID_CONNECT_INFO_STR, \
- MYF(0), tmp_ptr); \
+ error_num = connect_string_parse.print_param_error(); \
goto error; \
} else if (share->param_name[hint_num] != -1) \
break; \
@@ -1612,9 +1656,7 @@ static int spider_set_ll_value(
DBUG_PRINT("info",("spider " title_name "[%d]=%lld", hint_num, \
share->param_name[hint_num])); \
} else { \
- error_num = ER_SPIDER_INVALID_CONNECT_INFO_NUM; \
- my_printf_error(error_num, ER_SPIDER_INVALID_CONNECT_INFO_STR, \
- MYF(0), tmp_ptr); \
+ error_num = connect_string_parse.print_param_error(); \
goto error; \
} \
break; \
@@ -1635,12 +1677,11 @@ static int spider_set_ll_value(
&share->SPIDER_PARAM_LONG_LEN(param_name), \
tmp_ptr2, \
strlen(tmp_ptr2), \
- min_val, max_val))) \
+ min_val, max_val, \
+ &connect_string_parse))) \
goto error; \
} else { \
- error_num = ER_SPIDER_INVALID_CONNECT_INFO_NUM; \
- my_printf_error(error_num, ER_SPIDER_INVALID_CONNECT_INFO_STR, \
- MYF(0), tmp_ptr); \
+ error_num = connect_string_parse.print_param_error(); \
goto error; \
} \
} \
@@ -1662,12 +1703,11 @@ static int spider_set_ll_value(
&share->SPIDER_PARAM_LONGLONG_LEN(param_name), \
tmp_ptr2, \
strlen(tmp_ptr2), \
- min_val, max_val))) \
+ min_val, max_val, \
+ &connect_string_parse))) \
goto error; \
} else { \
- error_num = ER_SPIDER_INVALID_CONNECT_INFO_NUM; \
- my_printf_error(error_num, ER_SPIDER_INVALID_CONNECT_INFO_STR, \
- MYF(0), tmp_ptr); \
+ error_num = connect_string_parse.print_param_error(); \
goto error; \
} \
} \
@@ -1687,10 +1727,11 @@ static int spider_set_ll_value(
share->param_name = min_val; \
else if (share->param_name > max_val) \
share->param_name = max_val; \
+ connect_string_parse.set_param_value(tmp_ptr2, \
+ tmp_ptr2 + \
+ strlen(tmp_ptr2) + 1); \
} else { \
- error_num = ER_SPIDER_INVALID_CONNECT_INFO_NUM; \
- my_printf_error(error_num, ER_SPIDER_INVALID_CONNECT_INFO_STR, \
- MYF(0), tmp_ptr); \
+ error_num = connect_string_parse.print_param_error(); \
goto error; \
} \
DBUG_PRINT("info",("spider " title_name "=%d", share->param_name)); \
@@ -1709,10 +1750,11 @@ static int spider_set_ll_value(
share->param_name = atoi(tmp_ptr2); \
if (share->param_name < min_val) \
share->param_name = min_val; \
+ connect_string_parse.set_param_value(tmp_ptr2, \
+ tmp_ptr2 + \
+ strlen(tmp_ptr2) + 1); \
} else { \
- error_num = ER_SPIDER_INVALID_CONNECT_INFO_NUM; \
- my_printf_error(error_num, ER_SPIDER_INVALID_CONNECT_INFO_STR, \
- MYF(0), tmp_ptr); \
+ error_num = connect_string_parse.print_param_error(); \
goto error; \
} \
DBUG_PRINT("info",("spider " title_name "=%d", share->param_name)); \
@@ -1731,10 +1773,11 @@ static int spider_set_ll_value(
share->param_name = my_atof(tmp_ptr2); \
if (share->param_name < min_val) \
share->param_name = min_val; \
+ connect_string_parse.set_param_value(tmp_ptr2, \
+ tmp_ptr2 + \
+ strlen(tmp_ptr2) + 1); \
} else { \
- error_num = ER_SPIDER_INVALID_CONNECT_INFO_NUM; \
- my_printf_error(error_num, ER_SPIDER_INVALID_CONNECT_INFO_STR, \
- MYF(0), tmp_ptr); \
+ error_num = connect_string_parse.print_param_error(); \
goto error; \
} \
DBUG_PRINT("info",("spider " title_name "=%f", share->param_name)); \
@@ -1753,10 +1796,11 @@ static int spider_set_ll_value(
share->param_name = my_strtoll10(tmp_ptr2, (char**) NULL, &error_num); \
if (share->param_name < min_val) \
share->param_name = min_val; \
+ connect_string_parse.set_param_value(tmp_ptr2, \
+ tmp_ptr2 + \
+ strlen(tmp_ptr2) + 1); \
} else { \
- error_num = ER_SPIDER_INVALID_CONNECT_INFO_NUM; \
- my_printf_error(error_num, ER_SPIDER_INVALID_CONNECT_INFO_STR, \
- MYF(0), tmp_ptr); \
+ error_num = connect_string_parse.print_param_error(); \
goto error; \
} \
DBUG_PRINT("info",("spider " title_name "=%lld", share->param_name)); \
@@ -1778,6 +1822,7 @@ int spider_parse_connect_info(
char *tmp_ptr, *tmp_ptr2, *start_ptr;
int roop_count;
int title_length;
+ SPIDER_PARAM_STRING_PARSE connect_string_parse;
SPIDER_ALTER_TABLE *share_alter;
#ifdef WITH_PARTITION_STORAGE_ENGINE
partition_element *part_elem;
@@ -1961,6 +2006,7 @@ int spider_parse_connect_info(
}
sprit_ptr[0] = connect_string;
+ connect_string_parse.init(connect_string, ER_SPIDER_INVALID_CONNECT_INFO_NUM);
while (sprit_ptr[0])
{
if ((sprit_ptr[1] = strchr(sprit_ptr[0], ',')))
@@ -1987,10 +2033,14 @@ int spider_parse_connect_info(
title_length++;
start_ptr++;
}
+ connect_string_parse.set_param_title(tmp_ptr, tmp_ptr + title_length);
switch (title_length)
{
case 0:
+ error_num = connect_string_parse.print_param_error();
+ if (error_num)
+ goto error;
continue;
case 3:
SPIDER_PARAM_LONG_LIST_WITH_MAX("abl", access_balances, 0,
@@ -2124,23 +2174,17 @@ int spider_parse_connect_info(
#endif
SPIDER_PARAM_INT_WITH_MAX("upu", use_pushdown_udf, 0, 1);
SPIDER_PARAM_INT_WITH_MAX("utc", use_table_charset, 0, 1);
- error_num = ER_SPIDER_INVALID_CONNECT_INFO_NUM;
- my_printf_error(error_num, ER_SPIDER_INVALID_CONNECT_INFO_STR,
- MYF(0), tmp_ptr);
+ error_num = connect_string_parse.print_param_error();
goto error;
case 4:
SPIDER_PARAM_STR_LIST("host", tgt_hosts);
SPIDER_PARAM_STR_LIST("user", tgt_usernames);
SPIDER_PARAM_LONG_LIST_WITH_MAX("port", tgt_ports, 0, 65535);
- error_num = ER_SPIDER_INVALID_CONNECT_INFO_NUM;
- my_printf_error(error_num, ER_SPIDER_INVALID_CONNECT_INFO_STR,
- MYF(0), tmp_ptr);
+ error_num = connect_string_parse.print_param_error();
goto error;
case 5:
SPIDER_PARAM_STR_LIST("table", tgt_table_names);
- error_num = ER_SPIDER_INVALID_CONNECT_INFO_NUM;
- my_printf_error(error_num, ER_SPIDER_INVALID_CONNECT_INFO_STR,
- MYF(0), tmp_ptr);
+ error_num = connect_string_parse.print_param_error();
goto error;
case 6:
SPIDER_PARAM_STR_LIST("server", server_names);
@@ -2150,17 +2194,13 @@ int spider_parse_connect_info(
SPIDER_PARAM_STR_LIST("ssl_ca", tgt_ssl_cas);
SPIDER_PARAM_NUMHINT("skc", static_key_cardinality, 3,
(int) table_share->keys, spider_set_ll_value);
- error_num = ER_SPIDER_INVALID_CONNECT_INFO_NUM;
- my_printf_error(error_num, ER_SPIDER_INVALID_CONNECT_INFO_STR,
- MYF(0), tmp_ptr);
+ error_num = connect_string_parse.print_param_error();
goto error;
case 7:
SPIDER_PARAM_STR_LIST("wrapper", tgt_wrappers);
SPIDER_PARAM_STR_LIST("ssl_key", tgt_ssl_keys);
SPIDER_PARAM_STR_LIST("pk_name", tgt_pk_names);
- error_num = ER_SPIDER_INVALID_CONNECT_INFO_NUM;
- my_printf_error(error_num, ER_SPIDER_INVALID_CONNECT_INFO_STR,
- MYF(0), tmp_ptr);
+ error_num = connect_string_parse.print_param_error();
goto error;
case 8:
SPIDER_PARAM_STR_LIST("database", tgt_dbs);
@@ -2180,18 +2220,14 @@ int spider_parse_connect_info(
#endif
SPIDER_PARAM_STR_LIST("ssl_cert", tgt_ssl_certs);
SPIDER_PARAM_INT_WITH_MAX("bka_mode", bka_mode, 0, 2);
- error_num = ER_SPIDER_INVALID_CONNECT_INFO_NUM;
- my_printf_error(error_num, ER_SPIDER_INVALID_CONNECT_INFO_STR,
- MYF(0), tmp_ptr);
+ error_num = connect_string_parse.print_param_error();
goto error;
case 9:
SPIDER_PARAM_INT("max_order", max_order, 0);
SPIDER_PARAM_INT("bulk_size", bulk_size, 0);
SPIDER_PARAM_DOUBLE("scan_rate", scan_rate, 0);
SPIDER_PARAM_DOUBLE("read_rate", read_rate, 0);
- error_num = ER_SPIDER_INVALID_CONNECT_INFO_NUM;
- my_printf_error(error_num, ER_SPIDER_INVALID_CONNECT_INFO_STR,
- MYF(0), tmp_ptr);
+ error_num = connect_string_parse.print_param_error();
goto error;
case 10:
SPIDER_PARAM_DOUBLE("crd_weight", crd_weight, 1);
@@ -2201,9 +2237,7 @@ int spider_parse_connect_info(
SPIDER_PARAM_STR_LIST("ssl_capath", tgt_ssl_capaths);
SPIDER_PARAM_STR("bka_engine", bka_engine);
SPIDER_PARAM_LONGLONG("first_read", first_read, 0);
- error_num = ER_SPIDER_INVALID_CONNECT_INFO_NUM;
- my_printf_error(error_num, ER_SPIDER_INVALID_CONNECT_INFO_STR,
- MYF(0), tmp_ptr);
+ error_num = connect_string_parse.print_param_error();
goto error;
case 11:
SPIDER_PARAM_INT_WITH_MAX("query_cache", query_cache, 0, 2);
@@ -2217,9 +2251,7 @@ int spider_parse_connect_info(
SPIDER_PARAM_LONG_LIST_WITH_MAX("use_hs_read", use_hs_reads, 0, 1);
#endif
SPIDER_PARAM_INT_WITH_MAX("casual_read", casual_read, 0, 63);
- error_num = ER_SPIDER_INVALID_CONNECT_INFO_NUM;
- my_printf_error(error_num, ER_SPIDER_INVALID_CONNECT_INFO_STR,
- MYF(0), tmp_ptr);
+ error_num = connect_string_parse.print_param_error();
goto error;
case 12:
SPIDER_PARAM_DOUBLE("sts_interval", sts_interval, 0);
@@ -2232,9 +2264,7 @@ int spider_parse_connect_info(
SPIDER_PARAM_LONG_LIST_WITH_MAX(
"hs_read_port", hs_read_ports, 0, 65535);
#endif
- error_num = ER_SPIDER_INVALID_CONNECT_INFO_NUM;
- my_printf_error(error_num, ER_SPIDER_INVALID_CONNECT_INFO_STR,
- MYF(0), tmp_ptr);
+ error_num = connect_string_parse.print_param_error();
goto error;
case 13:
SPIDER_PARAM_STR_LIST("default_group", tgt_default_groups);
@@ -2243,9 +2273,7 @@ int spider_parse_connect_info(
"hs_write_port", hs_write_ports, 0, 65535);
#endif
SPIDER_PARAM_STR_LIST("sequence_name", tgt_sequence_names);
- error_num = ER_SPIDER_INVALID_CONNECT_INFO_NUM;
- my_printf_error(error_num, ER_SPIDER_INVALID_CONNECT_INFO_STR,
- MYF(0), tmp_ptr);
+ error_num = connect_string_parse.print_param_error();
goto error;
case 14:
SPIDER_PARAM_LONGLONG("internal_limit", internal_limit, 0);
@@ -2259,9 +2287,7 @@ int spider_parse_connect_info(
SPIDER_PARAM_INT_WITH_MAX("read_only_mode", read_only_mode, 0, 1);
SPIDER_PARAM_LONG_LIST_WITH_MAX("access_balance", access_balances, 0,
2147483647);
- error_num = ER_SPIDER_INVALID_CONNECT_INFO_NUM;
- my_printf_error(error_num, ER_SPIDER_INVALID_CONNECT_INFO_STR,
- MYF(0), tmp_ptr);
+ error_num = connect_string_parse.print_param_error();
goto error;
case 15:
SPIDER_PARAM_LONGLONG("internal_offset", internal_offset, 0);
@@ -2281,9 +2307,7 @@ int spider_parse_connect_info(
SPIDER_PARAM_LONG_LIST_WITH_MAX("connect_timeout", connect_timeouts,
0, 2147483647);
SPIDER_PARAM_INT_WITH_MAX("error_read_mode", error_read_mode, 0, 1);
- error_num = ER_SPIDER_INVALID_CONNECT_INFO_NUM;
- my_printf_error(error_num, ER_SPIDER_INVALID_CONNECT_INFO_STR,
- MYF(0), tmp_ptr);
+ error_num = connect_string_parse.print_param_error();
goto error;
case 16:
SPIDER_PARAM_INT_WITH_MAX(
@@ -2315,9 +2339,7 @@ int spider_parse_connect_info(
#endif
SPIDER_PARAM_INT_WITH_MAX(
"query_cache_sync", query_cache_sync, 0, 3);
- error_num = ER_SPIDER_INVALID_CONNECT_INFO_NUM;
- my_printf_error(error_num, ER_SPIDER_INVALID_CONNECT_INFO_STR,
- MYF(0), tmp_ptr);
+ error_num = connect_string_parse.print_param_error();
goto error;
case 17:
SPIDER_PARAM_INT_WITH_MAX(
@@ -2337,9 +2359,7 @@ int spider_parse_connect_info(
SPIDER_PARAM_INT_WITH_MAX(
"force_bulk_update", force_bulk_update, 0, 1);
#endif
- error_num = ER_SPIDER_INVALID_CONNECT_INFO_NUM;
- my_printf_error(error_num, ER_SPIDER_INVALID_CONNECT_INFO_STR,
- MYF(0), tmp_ptr);
+ error_num = connect_string_parse.print_param_error();
goto error;
case 18:
SPIDER_PARAM_INT_WITH_MAX(
@@ -2352,9 +2372,7 @@ int spider_parse_connect_info(
#endif
SPIDER_PARAM_LONGLONG(
"direct_order_limit", direct_order_limit, 0);
- error_num = ER_SPIDER_INVALID_CONNECT_INFO_NUM;
- my_printf_error(error_num, ER_SPIDER_INVALID_CONNECT_INFO_STR,
- MYF(0), tmp_ptr);
+ error_num = connect_string_parse.print_param_error();
goto error;
case 19:
SPIDER_PARAM_INT("init_sql_alloc_size", init_sql_alloc_size, 0);
@@ -2365,25 +2383,19 @@ int spider_parse_connect_info(
#endif
SPIDER_PARAM_LONG_LIST_WITH_MAX("bka_table_name_type",
bka_table_name_types, 0, 1);
- error_num = ER_SPIDER_INVALID_CONNECT_INFO_NUM;
- my_printf_error(error_num, ER_SPIDER_INVALID_CONNECT_INFO_STR,
- MYF(0), tmp_ptr);
+ error_num = connect_string_parse.print_param_error();
goto error;
case 20:
SPIDER_PARAM_LONGLONG_LIST_WITH_MAX(
"monitoring_server_id", monitoring_sid, 0, 4294967295LL);
SPIDER_PARAM_INT_WITH_MAX(
"delete_all_rows_type", delete_all_rows_type, 0, 1);
- error_num = ER_SPIDER_INVALID_CONNECT_INFO_NUM;
- my_printf_error(error_num, ER_SPIDER_INVALID_CONNECT_INFO_STR,
- MYF(0), tmp_ptr);
+ error_num = connect_string_parse.print_param_error();
goto error;
case 21:
SPIDER_PARAM_LONGLONG(
"semi_split_read_limit", semi_split_read_limit, 0);
- error_num = ER_SPIDER_INVALID_CONNECT_INFO_NUM;
- my_printf_error(error_num, ER_SPIDER_INVALID_CONNECT_INFO_STR,
- MYF(0), tmp_ptr);
+ error_num = connect_string_parse.print_param_error();
goto error;
case 22:
SPIDER_PARAM_LONG_LIST_WITH_MAX(
@@ -2396,39 +2408,33 @@ int spider_parse_connect_info(
"skip_default_condition", skip_default_condition, 0, 1);
SPIDER_PARAM_LONGLONG(
"static_mean_rec_length", static_mean_rec_length, 0);
- error_num = ER_SPIDER_INVALID_CONNECT_INFO_NUM;
- my_printf_error(error_num, ER_SPIDER_INVALID_CONNECT_INFO_STR,
- MYF(0), tmp_ptr);
+ error_num = connect_string_parse.print_param_error();
goto error;
case 23:
SPIDER_PARAM_INT_WITH_MAX(
"internal_optimize_local", internal_optimize_local, 0, 1);
- error_num = ER_SPIDER_INVALID_CONNECT_INFO_NUM;
- my_printf_error(error_num, ER_SPIDER_INVALID_CONNECT_INFO_STR,
- MYF(0), tmp_ptr);
+ error_num = connect_string_parse.print_param_error();
goto error;
case 25:
SPIDER_PARAM_LONGLONG("static_records_for_status",
static_records_for_status, 0);
SPIDER_PARAM_NUMHINT("static_key_cardinality", static_key_cardinality,
3, (int) table_share->keys, spider_set_ll_value);
- error_num = ER_SPIDER_INVALID_CONNECT_INFO_NUM;
- my_printf_error(error_num, ER_SPIDER_INVALID_CONNECT_INFO_STR,
- MYF(0), tmp_ptr);
+ error_num = connect_string_parse.print_param_error();
goto error;
case 26:
SPIDER_PARAM_INT_WITH_MAX(
"semi_table_lock_connection", semi_table_lock_conn, 0, 1);
- error_num = ER_SPIDER_INVALID_CONNECT_INFO_NUM;
- my_printf_error(error_num, ER_SPIDER_INVALID_CONNECT_INFO_STR,
- MYF(0), tmp_ptr);
+ error_num = connect_string_parse.print_param_error();
goto error;
default:
- error_num = ER_SPIDER_INVALID_CONNECT_INFO_NUM;
- my_printf_error(error_num, ER_SPIDER_INVALID_CONNECT_INFO_STR,
- MYF(0), tmp_ptr);
+ error_num = connect_string_parse.print_param_error();
goto error;
}
+
+ /* Verify that the remainder of the parameter value is whitespace */
+ if ((error_num = connect_string_parse.has_extra_parameter_values()))
+ goto error;
}
}
@@ -6350,8 +6356,6 @@ int spider_db_init(
#ifdef _WIN32
HMODULE current_module = GetModuleHandle(NULL);
- spd_db_att_thread_id = (ulong *)
- GetProcAddress(current_module, "?thread_id@@3KA");
#ifdef SPIDER_XID_USES_xid_cache_iterate
#else
#ifdef XID_CACHE_IS_SPLITTED
@@ -6385,7 +6389,6 @@ int spider_db_init(
spd_abort_loop = (bool volatile *)
GetProcAddress(current_module, "?abort_loop@@3_NC");
#else
- spd_db_att_thread_id = &thread_id;
#ifdef SPIDER_XID_USES_xid_cache_iterate
#else
#ifdef XID_CACHE_IS_SPLITTED
@@ -8582,8 +8585,8 @@ int spider_discover_table_structure(
DBUG_RETURN(ER_SPIDER_UNKNOWN_NUM);
}
#ifdef SPIDER_HAS_DISCOVER_TABLE_STRUCTURE_COMMENT
- if (!(part_syntax = generate_partition_syntax(part_info, &part_syntax_len,
- FALSE, TRUE, info, NULL, NULL)))
+ if (!(part_syntax = generate_partition_syntax(thd, part_info, &part_syntax_len,
+ TRUE, info, NULL)))
#else
if (!(part_syntax = generate_partition_syntax(part_info, &part_syntax_len,
FALSE, TRUE, info, NULL)))
@@ -8596,7 +8599,6 @@ int spider_discover_table_structure(
DBUG_RETURN(HA_ERR_OUT_OF_MEM);
}
str.q_append(part_syntax, part_syntax_len);
- my_free(part_syntax, MYF(0));
}
#endif
DBUG_PRINT("info",("spider str=%s", str.c_ptr_safe()));
diff --git a/storage/spider/spd_table.h b/storage/spider/spd_table.h
index 3eb1e61085c..19e1fd04769 100644
--- a/storage/spider/spd_table.h
+++ b/storage/spider/spd_table.h
@@ -13,6 +13,210 @@
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */
+/*
+ Structure used to manage Spider parameter string parsing. Types of
+ parameters include:
+ - connection strings
+ - UDF parameters
+
+ A parameter string consists of one or more parameter definitions using
+ the following syntax:
+ <parameter title> <parameter value>
+ A comma is the separator character between multiple parameter definitions.
+ Parameter titles must not be quoted. Parameter values must be quoted with
+ single or double quotes.
+*/
+
+typedef struct st_spider_param_string_parse
+{
+ char *start_ptr; /* Pointer to the start of the parameter string */
+ char *end_ptr; /* Pointer to the end of the parameter string */
+ char *start_title_ptr; /* Pointer to the start of the current parameter
+ title */
+ char *end_title_ptr; /* Pointer to the end of the current parameter
+ title */
+ char *start_value_ptr; /* Pointer to the start of the current parameter
+ value */
+ char *end_value_ptr; /* Pointer to the end of the current parameter
+ value */
+ int error_num; /* Error code of the error message to print when
+ an error is detected */
+ uint delim_title_len; /* Length of the paramater title's delimiter */
+ uint delim_value_len; /* Length of the paramater value's delimiter */
+ char delim_title; /* Current parameter title's delimiter character */
+ char delim_value; /* Current parameter value's delimiter character */
+
+ /**
+ Initialize the parameter string parse information.
+
+ @param param_string Pointer to the parameter string being parsed.
+ @param error_code Error code of the error message to print when
+ an error is detected.
+ */
+
+ inline void init(char *param_string, int error_code)
+ {
+ start_ptr = param_string;
+ end_ptr = start_ptr + strlen(start_ptr);
+
+ init_param_title();
+ init_param_value();
+
+ error_num = error_code;
+ }
+
+ /**
+ Initialize the current parameter title.
+ */
+
+ inline void init_param_title()
+ {
+ start_title_ptr = end_title_ptr = NULL;
+ delim_title_len = 0;
+ delim_title = '\0';
+ }
+
+ /**
+ Save pointers to the start and end positions of the current parameter
+ title in the parameter string. Also save the parameter title's
+ delimiter character.
+
+ @param start_value Pointer to the start position of the current
+ parameter title.
+ @param end_value Pointer to the end position of the current
+ parameter title.
+ */
+
+ inline void set_param_title(char *start_title, char *end_title)
+ {
+ start_title_ptr = start_title;
+ end_title_ptr = end_title;
+
+ if (*start_title == '"' ||
+ *start_title == '\'')
+ {
+ delim_title = *start_title;
+
+ if (start_title >= start_ptr && *--start_title == '\\')
+ delim_title_len = 2;
+ else
+ delim_title_len = 1;
+ }
+ }
+
+ /**
+ Initialize the current parameter value.
+ */
+
+ inline void init_param_value()
+ {
+ start_value_ptr = end_value_ptr = NULL;
+ delim_value_len = 0;
+ delim_value = '\0';
+ }
+
+ /**
+ Save pointers to the start and end positions of the current parameter
+ value in the parameter string. Also save the parameter value's
+ delimiter character.
+
+ @param start_value Pointer to the start position of the current
+ parameter value.
+ @param end_value Pointer to the end position of the current
+ parameter value.
+ */
+
+ inline void set_param_value(char *start_value, char *end_value)
+ {
+ start_value_ptr = start_value--;
+ end_value_ptr = end_value;
+
+ if (*start_value == '"' ||
+ *start_value == '\'')
+ {
+ delim_value = *start_value;
+
+ if (*--start_value == '\\')
+ delim_value_len = 2;
+ else
+ delim_value_len = 1;
+ }
+ }
+
+ /**
+ Determine whether the current parameter in the parameter string has
+ extra parameter values.
+
+ @return 0 Current parameter value in the parameter string
+ does not have extra parameter values.
+ <> 0 Error code indicating that the current parameter
+ value in the parameter string has extra
+ parameter values.
+ */
+
+ inline int has_extra_parameter_values()
+ {
+ int error_num = 0;
+ DBUG_ENTER("has_extra_parameter_values");
+
+ if (end_value_ptr)
+ {
+ /* There is a current parameter value */
+ char *end_param_ptr = end_value_ptr;
+
+ while (end_param_ptr < end_ptr &&
+ (*end_param_ptr == ' ' || *end_param_ptr == '\r' ||
+ *end_param_ptr == '\n' || *end_param_ptr == '\t'))
+ end_param_ptr++;
+
+ if (end_param_ptr < end_ptr && *end_param_ptr != '\0')
+ {
+ /* Extra values in parameter definition */
+ error_num = print_param_error();
+ }
+ }
+
+ DBUG_RETURN(error_num);
+ }
+
+ /**
+ Restore the current parameter's input delimiter characters in the
+ parameter string. They were NULLed during parameter parsing.
+ */
+
+ inline void restore_delims()
+ {
+ char *end = end_title_ptr - 1;
+
+ switch (delim_title_len)
+ {
+ case 2:
+ *end++ = '\\';
+ /* Fall through */
+ case 1:
+ *end = delim_title;
+ }
+
+ end = end_value_ptr - 1;
+ switch (delim_value_len)
+ {
+ case 2:
+ *end++ = '\\';
+ /* Fall through */
+ case 1:
+ *end = delim_value;
+ }
+ }
+
+ /**
+ Print a parameter string error message.
+
+ @return Error code.
+ */
+
+ int print_param_error();
+} SPIDER_PARAM_STRING_PARSE;
+
uchar *spider_tbl_get_key(
SPIDER_SHARE *share,
size_t *length,
@@ -60,7 +264,8 @@ void spider_free_tmp_share_alloc(
char *spider_get_string_between_quote(
char *ptr,
- bool alloc
+ bool alloc,
+ SPIDER_PARAM_STRING_PARSE *param_string_parse = NULL
);
int spider_create_string_list(
@@ -68,7 +273,8 @@ int spider_create_string_list(
uint **string_length_list,
uint *list_length,
char *str,
- uint length
+ uint length,
+ SPIDER_PARAM_STRING_PARSE *param_string_parse
);
int spider_create_long_list(
@@ -77,7 +283,8 @@ int spider_create_long_list(
char *str,
uint length,
long min_val,
- long max_val
+ long max_val,
+ SPIDER_PARAM_STRING_PARSE *param_string_parse
);
int spider_create_longlong_list(
@@ -86,7 +293,8 @@ int spider_create_longlong_list(
char *str,
uint length,
longlong min_val,
- longlong max_val
+ longlong max_val,
+ SPIDER_PARAM_STRING_PARSE *param_string_parse
);
int spider_increase_string_list(
diff --git a/storage/spider/spd_trx.cc b/storage/spider/spd_trx.cc
index 1264c593cac..fb999509b92 100644
--- a/storage/spider/spd_trx.cc
+++ b/storage/spider/spd_trx.cc
@@ -1590,15 +1590,20 @@ int spider_check_and_set_sql_log_off(
SPIDER_CONN *conn,
int *need_mon
) {
- bool internal_sql_log_off;
+ int internal_sql_log_off;
DBUG_ENTER("spider_check_and_set_sql_log_off");
internal_sql_log_off = spider_param_internal_sql_log_off(thd);
- if (internal_sql_log_off)
+ if (internal_sql_log_off != -1)
{
- spider_conn_queue_sql_log_off(conn, TRUE);
- } else {
- spider_conn_queue_sql_log_off(conn, FALSE);
+ if (internal_sql_log_off)
+ {
+ spider_conn_queue_sql_log_off(conn, TRUE);
+ }
+ else
+ {
+ spider_conn_queue_sql_log_off(conn, FALSE);
+ }
}
/*
if (internal_sql_log_off && conn->sql_log_off != 1)
@@ -2683,7 +2688,8 @@ int spider_initinal_xa_recover(
FALSE, open_tables_backup, TRUE, &error_num))
)
goto error_open_table;
- init_read_record(read_record, thd, table_xa, NULL, TRUE, FALSE, FALSE);
+ init_read_record(read_record, thd, table_xa, NULL, NULL, TRUE, FALSE,
+ FALSE);
}
SPD_INIT_ALLOC_ROOT(&mem_root, 4096, 0, MYF(MY_WME));
while ((!(read_record->read_record(read_record))) && cnt < (int) len)
@@ -4054,7 +4060,7 @@ THD *spider_create_tmp_thd()
{
THD *thd;
DBUG_ENTER("spider_create_tmp_thd");
- if (!(thd = new THD))
+ if (!(thd = new THD(0)))
DBUG_RETURN(NULL);
#if defined(MARIADB_BASE_VERSION) && MYSQL_VERSION_ID >= 100000
thd->killed = NOT_KILLED;
@@ -4065,7 +4071,6 @@ THD *spider_create_tmp_thd()
thd->locked_tables = FALSE;
#endif
thd->proc_info = "";
- thd->thread_id = thd->variables.pseudo_thread_id = 0;
thd->thread_stack = (char*) &thd;
if (thd->store_globals())
DBUG_RETURN(NULL);
diff --git a/storage/test_sql_discovery/mysql-test/sql_discovery/simple.result b/storage/test_sql_discovery/mysql-test/sql_discovery/simple.result
index 94f3bd1ed87..23b7804638f 100644
--- a/storage/test_sql_discovery/mysql-test/sql_discovery/simple.result
+++ b/storage/test_sql_discovery/mysql-test/sql_discovery/simple.result
@@ -4,7 +4,7 @@ test_sql_discovery_statement
test_sql_discovery_write_frm ON
set sql_quote_show_create=0;
create table t1 (a int) engine=test_sql_discovery;
-ERROR HY000: Can't create table `test`.`t1` (errno: 131 "Command not supported by database")
+ERROR HY000: Can't create table `test`.`t1` (errno: 131 "Command not supported by the engine")
select * from t1;
ERROR 42S02: Table 'test.t1' doesn't exist
set @@test_sql_discovery_statement='t1:foobar bwa-ha-ha';
@@ -122,9 +122,9 @@ Handler_discover 15
show create table t1;
Table Create Table
t1 CREATE TABLE t1 (
- a int(11) NOT NULL DEFAULT '5',
- b timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
- c tinyblob,
+ a int(11) NOT NULL DEFAULT 5,
+ b timestamp NOT NULL DEFAULT current_timestamp() ON UPDATE current_timestamp(),
+ c tinyblob DEFAULT NULL,
d decimal(5,2) DEFAULT NULL,
e varchar(30) CHARACTER SET ascii DEFAULT NULL,
f geometry NOT NULL,
diff --git a/storage/tokudb/CMakeLists.txt b/storage/tokudb/CMakeLists.txt
index 618d257be29..088a24f827b 100644
--- a/storage/tokudb/CMakeLists.txt
+++ b/storage/tokudb/CMakeLists.txt
@@ -1,6 +1,9 @@
SET(TOKUDB_VERSION 5.6.41-84.1)
# PerconaFT only supports x86-64 and cmake-2.8.9+
-IF(CMAKE_VERSION VERSION_LESS "2.8.9")
+IF(WIN32)
+ # tokudb never worked there
+ RETURN()
+ELSEIF(CMAKE_VERSION VERSION_LESS "2.8.9")
MESSAGE(STATUS "CMake 2.8.9 or higher is required by TokuDB")
ELSEIF(NOT HAVE_DLOPEN)
MESSAGE(STATUS "dlopen is required by TokuDB")
@@ -34,23 +37,62 @@ SET(TOKUDB_SOURCES
tokudb_sysvars.cc
tokudb_thread.cc
tokudb_dir_cmd.cc)
-MYSQL_ADD_PLUGIN(tokudb ${TOKUDB_SOURCES} STORAGE_ENGINE MODULE_ONLY)
+MYSQL_ADD_PLUGIN(tokudb ${TOKUDB_SOURCES} STORAGE_ENGINE MODULE_ONLY
+ COMPONENT tokudb-engine CONFIG ${CMAKE_CURRENT_BINARY_DIR}/tokudb.cnf)
IF(NOT TARGET tokudb)
RETURN()
ENDIF()
+INCLUDE(jemalloc)
+CHECK_JEMALLOC()
+
IF(NOT LIBJEMALLOC)
MESSAGE(WARNING "TokuDB is enabled, but jemalloc is not. This configuration is not supported")
+ELSEIF(LIBJEMALLOC STREQUAL jemalloc_pic)
+ CHECK_CXX_SOURCE_COMPILES(
+"
+#include <jemalloc/jemalloc.h>
+#if JEMALLOC_VERSION_MAJOR < 5
+int main() { return 0; }
+#else
+#error
+#endif
+" JEMALLOC_OK)
+ IF (NOT JEMALLOC_OK)
+ MESSAGE(FATAL_ERROR "static jemalloc_pic.a can only be used up to jemalloc 4")
+ ENDIF()
+ELSEIF(LIBJEMALLOC STREQUAL jemalloc)
+ FIND_LIBRARY(LIBJEMALLOC_SO jemalloc)
+ IF(NOT LIBJEMALLOC_SO)
+ MESSAGE(FATAL_ERROR "jemalloc is present, but cannot be found?")
+ ENDIF()
+ GET_FILENAME_COMPONENT(LIBJEMALLOC_PATH ${LIBJEMALLOC_SO} REALPATH CACHE)
+
+ IF(RPM OR DEB)
+ UNSET(LIBJEMALLOC)
+ GET_DIRECTORY_PROPERTY(V DIRECTORY ${CMAKE_SOURCE_DIR} DEFINITION CPACK_RPM_tokudb-engine_PACKAGE_REQUIRES)
+ SET(CPACK_RPM_tokudb-engine_PACKAGE_REQUIRES "${V} jemalloc" PARENT_SCOPE)
+ ENDIF()
+
+ IF(INSTALL_SYSCONFDIR)
+ SET(systemd_env "Environment=\"LD_PRELOAD=${LIBJEMALLOC_PATH}\"")
+ SET(cnf_malloc_lib "malloc-lib=${LIBJEMALLOC_PATH}")
+ CONFIGURE_FILE(tokudb.conf.in tokudb.conf @ONLY)
+ INSTALL(FILES ${CMAKE_CURRENT_BINARY_DIR}/tokudb.conf
+ DESTINATION ${INSTALL_SYSCONFDIR}/systemd/system/mariadb.service.d/
+ COMPONENT tokudb-engine)
+ ENDIF()
ENDIF()
+CONFIGURE_FILE(tokudb.cnf.in tokudb.cnf @ONLY)
+
MY_CHECK_AND_SET_COMPILER_FLAG("-Wno-shadow")
MY_CHECK_AND_SET_COMPILER_FLAG("-Wno-vla" DEBUG)
MY_CHECK_AND_SET_COMPILER_FLAG("-Wno-implicit-fallthrough")
MY_CHECK_AND_SET_COMPILER_FLAG("-Wno-cpp" DEBUG)
############################################
-SET(TOKUDB_DEB_FILES "usr/lib/mysql/plugin/ha_tokudb.so\netc/mysql/conf.d/tokudb.cnf\nusr/bin/tokuftdump" PARENT_SCOPE)
MARK_AS_ADVANCED(BUILDNAME)
MARK_AS_ADVANCED(BUILD_TESTING)
MARK_AS_ADVANCED(CMAKE_TOKUDB_REVISION)
@@ -122,14 +164,9 @@ INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR}/${TOKU_FT_DIR_NAME}/buildheader)
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR}/${TOKU_FT_DIR_NAME}/portability)
TARGET_LINK_LIBRARIES(tokudb tokufractaltree_static tokuportability_static
- ${ZLIB_LIBRARY} stdc++)
+ ${ZLIB_LIBRARY} ${LIBJEMALLOC} stdc++)
SET(CMAKE_MODULE_LINKER_FLAGS_RELEASE "${CMAKE_MODULE_LINKER_FLAGS_RELEASE} -flto -fuse-linker-plugin")
SET(CMAKE_MODULE_LINKER_FLAGS_RELWITHDEBINFO "${CMAKE_MODULE_LINKER_FLAGS_RELWITHDEBINFO} -flto -fuse-linker-plugin")
-SET(CPACK_RPM_server_PACKAGE_OBSOLETES
- "${CPACK_RPM_server_PACKAGE_OBSOLETES} MariaDB-tokudb-engine < 10.0.5" PARENT_SCOPE)
-
-IF (INSTALL_SYSCONF2DIR)
- INSTALL(FILES tokudb.cnf DESTINATION ${INSTALL_SYSCONF2DIR} COMPONENT Server)
-ENDIF(INSTALL_SYSCONF2DIR)
+ADD_SUBDIRECTORY(man)
diff --git a/storage/tokudb/PerconaFT/CMakeLists.txt b/storage/tokudb/PerconaFT/CMakeLists.txt
index a0918e505b1..672e4b107f9 100644
--- a/storage/tokudb/PerconaFT/CMakeLists.txt
+++ b/storage/tokudb/PerconaFT/CMakeLists.txt
@@ -55,7 +55,7 @@ endif()
include(TokuFeatureDetection)
include(TokuSetupCompiler)
-include(TokuSetupCTest)
+#include(TokuSetupCTest)
include(TokuThirdParty)
set(TOKU_CMAKE_SCRIPT_DIR "${CMAKE_CURRENT_SOURCE_DIR}/cmake")
diff --git a/storage/tokudb/PerconaFT/DartConfig.cmake b/storage/tokudb/PerconaFT/DartConfig.cmake
deleted file mode 100644
index 9ad189869c6..00000000000
--- a/storage/tokudb/PerconaFT/DartConfig.cmake
+++ /dev/null
@@ -1,10 +0,0 @@
-if(BUILD_TESTING)
- if (NOT CMAKE_SYSTEM_NAME STREQUAL Darwin)
- # Valgrind on OSX 10.8 generally works but outputs some warning junk
- # that is hard to parse out, so we'll just let it run alone
- set(MEMORYCHECK_COMMAND "${TokuDB_SOURCE_DIR}/scripts/tokuvalgrind")
- endif ()
- set(MEMORYCHECK_COMMAND_OPTIONS "--gen-suppressions=no --soname-synonyms=somalloc=*tokuportability* --quiet --num-callers=20 --leak-check=full --show-reachable=yes --trace-children=yes --trace-children-skip=sh,*/sh,basename,*/basename,dirname,*/dirname,rm,*/rm,cp,*/cp,mv,*/mv,cat,*/cat,diff,*/diff,grep,*/grep,date,*/date,test,*/tokudb_dump,*/tdb-recover --trace-children-skip-by-arg=--only_create,--test,--no-shutdown,novalgrind" CACHE INTERNAL "options for valgrind")
- set(MEMORYCHECK_SUPPRESSIONS_FILE "${CMAKE_CURRENT_BINARY_DIR}/valgrind.suppressions" CACHE INTERNAL "suppressions file for valgrind")
- set(UPDATE_COMMAND "svn")
-endif()
diff --git a/storage/tokudb/PerconaFT/ft/bndata.cc b/storage/tokudb/PerconaFT/ft/bndata.cc
index 4e2b4d090de..ecacb28acda 100644
--- a/storage/tokudb/PerconaFT/ft/bndata.cc
+++ b/storage/tokudb/PerconaFT/ft/bndata.cc
@@ -36,6 +36,7 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
+#include <my_global.h>
#include <ft/bndata.h>
#include <ft/ft-internal.h>
diff --git a/storage/tokudb/PerconaFT/ft/cachetable/cachetable.cc b/storage/tokudb/PerconaFT/ft/cachetable/cachetable.cc
index 8e9856b4060..034d5442742 100644
--- a/storage/tokudb/PerconaFT/ft/cachetable/cachetable.cc
+++ b/storage/tokudb/PerconaFT/ft/cachetable/cachetable.cc
@@ -36,6 +36,7 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
+#include <my_global.h>
#include <string.h>
#include <time.h>
#include <stdarg.h>
diff --git a/storage/tokudb/PerconaFT/ft/cachetable/checkpoint.cc b/storage/tokudb/PerconaFT/ft/cachetable/checkpoint.cc
index 13ff5eff5af..aad018f4097 100644
--- a/storage/tokudb/PerconaFT/ft/cachetable/checkpoint.cc
+++ b/storage/tokudb/PerconaFT/ft/cachetable/checkpoint.cc
@@ -73,6 +73,7 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
*
*****/
+#include <my_global.h>
#include <time.h>
#include "portability/toku_portability.h"
diff --git a/storage/tokudb/PerconaFT/ft/cursor.cc b/storage/tokudb/PerconaFT/ft/cursor.cc
index 8f598d0a0df..5402763f7a7 100644
--- a/storage/tokudb/PerconaFT/ft/cursor.cc
+++ b/storage/tokudb/PerconaFT/ft/cursor.cc
@@ -35,6 +35,7 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
+#include <my_global.h>
#include "ft/ft-internal.h"
#include "ft/cursor.h"
diff --git a/storage/tokudb/PerconaFT/ft/ft-cachetable-wrappers.cc b/storage/tokudb/PerconaFT/ft/ft-cachetable-wrappers.cc
index ab9802e88b0..439e0688e89 100644
--- a/storage/tokudb/PerconaFT/ft/ft-cachetable-wrappers.cc
+++ b/storage/tokudb/PerconaFT/ft/ft-cachetable-wrappers.cc
@@ -36,6 +36,7 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
+#include <my_global.h>
#include "ft/serialize/block_table.h"
#include "ft/ft-cachetable-wrappers.h"
#include "ft/ft-flusher.h"
diff --git a/storage/tokudb/PerconaFT/ft/ft-flusher.cc b/storage/tokudb/PerconaFT/ft/ft-flusher.cc
index 8e687d4ae58..bbb2a170cb9 100644
--- a/storage/tokudb/PerconaFT/ft/ft-flusher.cc
+++ b/storage/tokudb/PerconaFT/ft/ft-flusher.cc
@@ -36,6 +36,7 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
+#include <my_global.h>
#include "ft/ft.h"
#include "ft/ft-cachetable-wrappers.h"
#include "ft/ft-internal.h"
diff --git a/storage/tokudb/PerconaFT/ft/ft-hot-flusher.cc b/storage/tokudb/PerconaFT/ft/ft-hot-flusher.cc
index 405bcd5b8f1..ffab8647541 100644
--- a/storage/tokudb/PerconaFT/ft/ft-hot-flusher.cc
+++ b/storage/tokudb/PerconaFT/ft/ft-hot-flusher.cc
@@ -36,6 +36,7 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
+#include <my_global.h>
#include "ft/ft.h"
#include "ft/ft-cachetable-wrappers.h"
#include "ft/ft-flusher.h"
diff --git a/storage/tokudb/PerconaFT/ft/ft-ops.cc b/storage/tokudb/PerconaFT/ft/ft-ops.cc
index d2e92768dde..d752f13c9c3 100644
--- a/storage/tokudb/PerconaFT/ft/ft-ops.cc
+++ b/storage/tokudb/PerconaFT/ft/ft-ops.cc
@@ -147,6 +147,7 @@ basement nodes, bulk fetch, and partial fetch:
*/
+#include <my_global.h>
#include "ft/cachetable/checkpoint.h"
#include "ft/cursor.h"
#include "ft/ft-cachetable-wrappers.h"
diff --git a/storage/tokudb/PerconaFT/ft/ft-status.cc b/storage/tokudb/PerconaFT/ft/ft-status.cc
index 3305c1cf446..9b45ba21b4f 100644
--- a/storage/tokudb/PerconaFT/ft/ft-status.cc
+++ b/storage/tokudb/PerconaFT/ft/ft-status.cc
@@ -50,6 +50,7 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
+#include <my_global.h>
#include "ft/ft.h"
#include "ft/ft-status.h"
diff --git a/storage/tokudb/PerconaFT/ft/ft-test-helpers.cc b/storage/tokudb/PerconaFT/ft/ft-test-helpers.cc
index 8338a0777eb..ad1dda01760 100644
--- a/storage/tokudb/PerconaFT/ft/ft-test-helpers.cc
+++ b/storage/tokudb/PerconaFT/ft/ft-test-helpers.cc
@@ -36,6 +36,7 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
+#include <my_global.h>
#include "ft/ft.h"
#include "ft/ft-cachetable-wrappers.h"
#include "ft/ft-internal.h"
diff --git a/storage/tokudb/PerconaFT/ft/ft-verify.cc b/storage/tokudb/PerconaFT/ft/ft-verify.cc
index 3819799c32f..4f6e07e61f2 100644
--- a/storage/tokudb/PerconaFT/ft/ft-verify.cc
+++ b/storage/tokudb/PerconaFT/ft/ft-verify.cc
@@ -44,6 +44,7 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
* For each nonleaf node: All the messages have keys that are between the associated pivot keys ( left_pivot_key < message <= right_pivot_key)
*/
+#include <my_global.h>
#include "ft/serialize/block_table.h"
#include "ft/ft.h"
#include "ft/ft-cachetable-wrappers.h"
diff --git a/storage/tokudb/PerconaFT/ft/ft.cc b/storage/tokudb/PerconaFT/ft/ft.cc
index 5c9f27bf5ad..1106abfbfb4 100644
--- a/storage/tokudb/PerconaFT/ft/ft.cc
+++ b/storage/tokudb/PerconaFT/ft/ft.cc
@@ -36,6 +36,7 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
+#include <my_global.h>
#include "ft/serialize/block_table.h"
#include "ft/ft.h"
#include "ft/ft-cachetable-wrappers.h"
diff --git a/storage/tokudb/PerconaFT/ft/le-cursor.cc b/storage/tokudb/PerconaFT/ft/le-cursor.cc
index 81d0d3694a4..b90d48dc5ba 100644
--- a/storage/tokudb/PerconaFT/ft/le-cursor.cc
+++ b/storage/tokudb/PerconaFT/ft/le-cursor.cc
@@ -36,6 +36,7 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
+#include <my_global.h>
#include "ft/ft.h"
#include "ft/ft-internal.h"
#include "ft/le-cursor.h"
diff --git a/storage/tokudb/PerconaFT/ft/leafentry.cc b/storage/tokudb/PerconaFT/ft/leafentry.cc
index 56ce0c9d945..fcb9a344e27 100644
--- a/storage/tokudb/PerconaFT/ft/leafentry.cc
+++ b/storage/tokudb/PerconaFT/ft/leafentry.cc
@@ -36,6 +36,7 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
+#include <my_global.h>
#include "serialize/wbuf.h"
#include "leafentry.h"
diff --git a/storage/tokudb/PerconaFT/ft/loader/dbufio.cc b/storage/tokudb/PerconaFT/ft/loader/dbufio.cc
index 9ff712bcbae..90f76cecf90 100644
--- a/storage/tokudb/PerconaFT/ft/loader/dbufio.cc
+++ b/storage/tokudb/PerconaFT/ft/loader/dbufio.cc
@@ -36,6 +36,7 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
+#include <my_global.h>
#include <errno.h>
#include <string.h>
#include <unistd.h>
@@ -139,7 +140,7 @@ static ssize_t dbf_read_some_compressed(struct dbufio_file *dbf, char *buf, size
ret = 0;
goto exit;
}
- if (readcode < header_size) {
+ if (readcode < (ssize_t) header_size) {
errno = TOKUDB_NO_DATA;
ret = -1;
goto exit;
@@ -167,7 +168,7 @@ static ssize_t dbf_read_some_compressed(struct dbufio_file *dbf, char *buf, size
ret = -1;
goto exit;
}
- if (readcode < total_size) {
+ if (readcode < (ssize_t) total_size) {
errno = TOKUDB_NO_DATA;
ret = -1;
goto exit;
@@ -586,7 +587,7 @@ dbufio_print(DBUFIO_FILESET bfs) {
fprintf(stderr, "%s:%d bfs=%p", __FILE__, __LINE__, bfs);
if (bfs->panic)
fprintf(stderr, " panic=%d", bfs->panic_errno);
- fprintf(stderr, " N=%d %d %" PRIuMAX, bfs->N, bfs->n_not_done, bfs->bufsize);
+ fprintf(stderr, " N=%d %d %" PRIuMAX, bfs->N, bfs->n_not_done, (uintmax_t) bfs->bufsize);
for (int i = 0; i < bfs->N; i++) {
struct dbufio_file *dbf = &bfs->files[i];
if (dbf->error_code[0] || dbf->error_code[1])
diff --git a/storage/tokudb/PerconaFT/ft/loader/loader.cc b/storage/tokudb/PerconaFT/ft/loader/loader.cc
index 9528af95627..5f57b473bc5 100644
--- a/storage/tokudb/PerconaFT/ft/loader/loader.cc
+++ b/storage/tokudb/PerconaFT/ft/loader/loader.cc
@@ -36,6 +36,7 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
+#include <my_global.h>
#include <toku_portability.h>
#include <arpa/inet.h>
diff --git a/storage/tokudb/PerconaFT/ft/loader/pqueue.cc b/storage/tokudb/PerconaFT/ft/loader/pqueue.cc
index 9ca37b1564b..950ab259f46 100644
--- a/storage/tokudb/PerconaFT/ft/loader/pqueue.cc
+++ b/storage/tokudb/PerconaFT/ft/loader/pqueue.cc
@@ -36,6 +36,7 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
+#include <my_global.h>
#include <toku_portability.h>
#include "toku_os.h"
#include "ft-internal.h"
diff --git a/storage/tokudb/PerconaFT/ft/logger/log_upgrade.cc b/storage/tokudb/PerconaFT/ft/logger/log_upgrade.cc
index efaba49198d..3da9706359b 100644
--- a/storage/tokudb/PerconaFT/ft/logger/log_upgrade.cc
+++ b/storage/tokudb/PerconaFT/ft/logger/log_upgrade.cc
@@ -36,6 +36,7 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
+#include <my_global.h>
#include <ft/log_header.h>
#include "log-internal.h"
diff --git a/storage/tokudb/PerconaFT/ft/logger/logcursor.cc b/storage/tokudb/PerconaFT/ft/logger/logcursor.cc
index 494d3b1d531..07f57220bf0 100644
--- a/storage/tokudb/PerconaFT/ft/logger/logcursor.cc
+++ b/storage/tokudb/PerconaFT/ft/logger/logcursor.cc
@@ -36,6 +36,7 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
+#include <my_global.h>
#include "log-internal.h"
#include "logger/logcursor.h"
#include <limits.h>
diff --git a/storage/tokudb/PerconaFT/ft/logger/logfilemgr.cc b/storage/tokudb/PerconaFT/ft/logger/logfilemgr.cc
index af8eb19d770..e9028f49daf 100644
--- a/storage/tokudb/PerconaFT/ft/logger/logfilemgr.cc
+++ b/storage/tokudb/PerconaFT/ft/logger/logfilemgr.cc
@@ -36,6 +36,7 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
+#include <my_global.h>
#include "logger/log-internal.h"
#include "logger/logcursor.h"
#include "logger/logfilemgr.h"
diff --git a/storage/tokudb/PerconaFT/ft/logger/logger.cc b/storage/tokudb/PerconaFT/ft/logger/logger.cc
index 5b2d1492cc9..d66a1deecf9 100644
--- a/storage/tokudb/PerconaFT/ft/logger/logger.cc
+++ b/storage/tokudb/PerconaFT/ft/logger/logger.cc
@@ -36,6 +36,7 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
+#include <my_global.h>
#include <memory.h>
#include <ctype.h>
#include <limits.h>
diff --git a/storage/tokudb/PerconaFT/ft/msg.cc b/storage/tokudb/PerconaFT/ft/msg.cc
index f4f2c747734..b53b946b241 100644
--- a/storage/tokudb/PerconaFT/ft/msg.cc
+++ b/storage/tokudb/PerconaFT/ft/msg.cc
@@ -36,6 +36,7 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
+#include <my_global.h>
#include "portability/toku_portability.h"
#include "ft/msg.h"
diff --git a/storage/tokudb/PerconaFT/ft/node.cc b/storage/tokudb/PerconaFT/ft/node.cc
index 39a76c8615e..88f46c7812b 100644
--- a/storage/tokudb/PerconaFT/ft/node.cc
+++ b/storage/tokudb/PerconaFT/ft/node.cc
@@ -35,6 +35,7 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
+#include <my_global.h>
#include "ft/ft.h"
#include "ft/ft-internal.h"
#include "ft/serialize/ft_node-serialize.h"
diff --git a/storage/tokudb/PerconaFT/ft/pivotkeys.cc b/storage/tokudb/PerconaFT/ft/pivotkeys.cc
index b4a26d03ee1..b941ac62a42 100644
--- a/storage/tokudb/PerconaFT/ft/pivotkeys.cc
+++ b/storage/tokudb/PerconaFT/ft/pivotkeys.cc
@@ -35,6 +35,7 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
+#include <my_global.h>
#include <string>
#include "portability/memory.h"
diff --git a/storage/tokudb/PerconaFT/ft/serialize/block_table.cc b/storage/tokudb/PerconaFT/ft/serialize/block_table.cc
index c4c99844edf..e3606c11294 100644
--- a/storage/tokudb/PerconaFT/ft/serialize/block_table.cc
+++ b/storage/tokudb/PerconaFT/ft/serialize/block_table.cc
@@ -36,6 +36,7 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
+#include <my_global.h>
#include "portability/memory.h"
#include "portability/toku_assert.h"
#include "portability/toku_portability.h"
diff --git a/storage/tokudb/PerconaFT/ft/serialize/compress.cc b/storage/tokudb/PerconaFT/ft/serialize/compress.cc
index c2f815c6cf2..584faa5c3be 100644
--- a/storage/tokudb/PerconaFT/ft/serialize/compress.cc
+++ b/storage/tokudb/PerconaFT/ft/serialize/compress.cc
@@ -36,6 +36,7 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
+#include <my_global.h>
#include <toku_portability.h>
#include <util/scoped_malloc.h>
@@ -97,7 +98,6 @@ void toku_compress (enum toku_compression_method a,
static const int zlib_without_checksum_windowbits = -15;
a = normalize_compression_method(a);
- assert(sourceLen < (1LL << 32));
switch (a) {
case TOKU_NO_COMPRESSION:
dest[0] = TOKU_NO_COMPRESSION;
@@ -171,8 +171,10 @@ void toku_compress (enum toku_compression_method a,
return;
}
case TOKU_SNAPPY_METHOD: {
- snappy::RawCompress((char*)source, sourceLen, (char*)dest + 1, destLen);
- *destLen += 1;
+ size_t tmp_dest= *destLen;
+ snappy::RawCompress((char*)source, sourceLen, (char*)dest + 1,
+ &tmp_dest);
+ *destLen= tmp_dest + 1;
dest[0] = TOKU_SNAPPY_METHOD;
return;
}
diff --git a/storage/tokudb/PerconaFT/ft/serialize/ft_node-serialize.cc b/storage/tokudb/PerconaFT/ft/serialize/ft_node-serialize.cc
index 46f2e9600c5..e6648b76bf0 100644
--- a/storage/tokudb/PerconaFT/ft/serialize/ft_node-serialize.cc
+++ b/storage/tokudb/PerconaFT/ft/serialize/ft_node-serialize.cc
@@ -761,7 +761,7 @@ int toku_serialize_ftnode_to_memory(FTNODE node,
// Zero the rest of the buffer
memset(data + total_node_size, 0, total_buffer_size - total_node_size);
- assert(curr_ptr - data == total_node_size);
+ assert((uint32_t) (curr_ptr - data) == total_node_size);
*bytes_to_write = data;
*n_bytes_to_write = total_buffer_size;
*n_uncompressed_bytes = total_uncompressed_size;
@@ -3078,7 +3078,7 @@ decompress_from_raw_block_into_rbuf(uint8_t *raw_block, size_t raw_block_size, s
// decompress all the compressed sub blocks into the uncompressed buffer
r = decompress_all_sub_blocks(n_sub_blocks, sub_block, compressed_data, uncompressed_data, num_cores, ft_pool);
if (r != 0) {
- fprintf(stderr, "%s:%d block %" PRId64 " failed %d at %p size %lu\n", __FUNCTION__, __LINE__, blocknum.b, r, raw_block, raw_block_size);
+ fprintf(stderr, "%s:%d block %" PRId64 " failed %d at %p size %zu\n", __FUNCTION__, __LINE__, blocknum.b, r, raw_block, raw_block_size);
dump_bad_block(raw_block, raw_block_size);
goto exit;
}
diff --git a/storage/tokudb/PerconaFT/ft/txn/txn_manager.cc b/storage/tokudb/PerconaFT/ft/txn/txn_manager.cc
index 384a960b1f3..1b55844bc7d 100644
--- a/storage/tokudb/PerconaFT/ft/txn/txn_manager.cc
+++ b/storage/tokudb/PerconaFT/ft/txn/txn_manager.cc
@@ -974,11 +974,11 @@ int toku_txn_manager_recover_root_txn (
txn_manager->last_xid_seen_for_recover = curr_txn->txnid.parent_id64;
// if we found the maximum number of prepared transactions we are
// allowed to find, then break
- if (num_txns_returned >= count) {
+ if ((long) num_txns_returned >= count) {
break;
}
}
- invariant(num_txns_returned <= count);
+ invariant((long) num_txns_returned <= count);
*retp = num_txns_returned;
ret_val = 0;
exit:
diff --git a/storage/tokudb/PerconaFT/ft/ule.cc b/storage/tokudb/PerconaFT/ft/ule.cc
index e3dce6d27dd..f43094b6070 100644
--- a/storage/tokudb/PerconaFT/ft/ule.cc
+++ b/storage/tokudb/PerconaFT/ft/ule.cc
@@ -47,6 +47,7 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
// See design documentation for nested transactions at
// TokuWiki/Imp/TransactionsOverview.
+#include <my_global.h>
#include "portability/toku_portability.h"
#include "ft/ft-internal.h"
diff --git a/storage/tokudb/PerconaFT/portability/toku_config.h.in b/storage/tokudb/PerconaFT/portability/toku_config.h.in
index 18f6779796f..714835c2581 100644
--- a/storage/tokudb/PerconaFT/portability/toku_config.h.in
+++ b/storage/tokudb/PerconaFT/portability/toku_config.h.in
@@ -77,6 +77,10 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
#cmakedefine HAVE_O_DIRECT 1
#cmakedefine HAVE_F_NOCACHE 1
+#cmakedefine HAVE_MAP_ANONYMOUS 1
+#cmakedefine HAVE_MINCORE 1
+#cmakedefine HAVE_PR_SET_PTRACER 1
+#cmakedefine HAVE_PR_SET_PTRACER_ANY 1
#cmakedefine HAVE_MALLOC_SIZE 1
#cmakedefine HAVE_MALLOC_USABLE_SIZE 1
#cmakedefine HAVE_MEMALIGN 1
diff --git a/storage/tokudb/PerconaFT/portability/toku_portability.h b/storage/tokudb/PerconaFT/portability/toku_portability.h
index 8945ed6ff06..455a0307464 100644
--- a/storage/tokudb/PerconaFT/portability/toku_portability.h
+++ b/storage/tokudb/PerconaFT/portability/toku_portability.h
@@ -220,6 +220,8 @@ extern void *realloc(void*, size_t) __THROW __attribute__((__deprecat
# endif
#if !defined(__APPLE__)
// Darwin headers use these types, we should not poison them
+#undef TRUE
+#undef FALSE
# pragma GCC poison u_int8_t
# pragma GCC poison u_int16_t
# pragma GCC poison u_int32_t
diff --git a/storage/tokudb/PerconaFT/scripts/tokuvalgrind b/storage/tokudb/PerconaFT/scripts/tokuvalgrind
deleted file mode 120000
index 74517aa2975..00000000000
--- a/storage/tokudb/PerconaFT/scripts/tokuvalgrind
+++ /dev/null
@@ -1 +0,0 @@
-tokugrind \ No newline at end of file
diff --git a/storage/tokudb/PerconaFT/src/ydb.cc b/storage/tokudb/PerconaFT/src/ydb.cc
index 4d549c0ac73..b041928b679 100644
--- a/storage/tokudb/PerconaFT/src/ydb.cc
+++ b/storage/tokudb/PerconaFT/src/ydb.cc
@@ -39,6 +39,7 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
extern const char *toku_patent_string;
const char *toku_copyright_string = "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.";
+#include <my_global.h>
extern int writing_rollback;
diff --git a/storage/tokudb/PerconaFT/src/ydb_db.cc b/storage/tokudb/PerconaFT/src/ydb_db.cc
index ac44b8e7fd3..8b2b162abd2 100644
--- a/storage/tokudb/PerconaFT/src/ydb_db.cc
+++ b/storage/tokudb/PerconaFT/src/ydb_db.cc
@@ -36,6 +36,7 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
+#include <my_global.h>
#include <ctype.h>
#include <db.h>
diff --git a/storage/tokudb/PerconaFT/src/ydb_env_func.cc b/storage/tokudb/PerconaFT/src/ydb_env_func.cc
index b8f0a634116..aa8f9063a7e 100644
--- a/storage/tokudb/PerconaFT/src/ydb_env_func.cc
+++ b/storage/tokudb/PerconaFT/src/ydb_env_func.cc
@@ -36,6 +36,7 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
+#include <my_global.h>
#include <toku_portability.h>
#include <memory.h>
diff --git a/storage/tokudb/PerconaFT/third_party/xz-4.999.9beta/configure b/storage/tokudb/PerconaFT/third_party/xz-4.999.9beta/configure
index 288961ad7f7..d0f14a1f3ef 100755
--- a/storage/tokudb/PerconaFT/third_party/xz-4.999.9beta/configure
+++ b/storage/tokudb/PerconaFT/third_party/xz-4.999.9beta/configure
@@ -19893,7 +19893,7 @@ fi
# Set additional flags for static/dynamic linking. The idea is that every
# program (not library) being built will use either STATIC_{CPPFLAGS,LDFLAGS}
# or DYNAMIC_{CPPFLAGS,LDFLAGS} depending on which type of linkage is
-# preferred. These preferences get overriden by use of --disable-static,
+# preferred. These preferences get overridden by use of --disable-static,
# --disable-shared, or --enable-dynamic.
#
# This is quite messy, because we want to use LZMA_API_STATIC when linking
diff --git a/storage/tokudb/PerconaFT/third_party/xz-4.999.9beta/configure.ac b/storage/tokudb/PerconaFT/third_party/xz-4.999.9beta/configure.ac
index fbc59b230c9..e2bb87781c8 100644
--- a/storage/tokudb/PerconaFT/third_party/xz-4.999.9beta/configure.ac
+++ b/storage/tokudb/PerconaFT/third_party/xz-4.999.9beta/configure.ac
@@ -585,7 +585,7 @@ AC_SUBST([AM_CFLAGS])
# Set additional flags for static/dynamic linking. The idea is that every
# program (not library) being built will use either STATIC_{CPPFLAGS,LDFLAGS}
# or DYNAMIC_{CPPFLAGS,LDFLAGS} depending on which type of linkage is
-# preferred. These preferences get overriden by use of --disable-static,
+# preferred. These preferences get overridden by use of --disable-static,
# --disable-shared, or --enable-dynamic.
#
# This is quite messy, because we want to use LZMA_API_STATIC when linking
diff --git a/storage/tokudb/PerconaFT/third_party/xz-4.999.9beta/doc/man/txt/xz.txt b/storage/tokudb/PerconaFT/third_party/xz-4.999.9beta/doc/man/txt/xz.txt
index e3dacde4a11..e9b0ee5e810 100644
--- a/storage/tokudb/PerconaFT/third_party/xz-4.999.9beta/doc/man/txt/xz.txt
+++ b/storage/tokudb/PerconaFT/third_party/xz-4.999.9beta/doc/man/txt/xz.txt
@@ -112,7 +112,7 @@ DESCRIPTION
The absolute value of the active memory usage limit can be seen near
the bottom of the output of --long-help. The default limit can be
- overriden with --memory=limit.
+ overridden with --memory=limit.
OPTIONS
Integer suffixes and special values
diff --git a/storage/tokudb/PerconaFT/third_party/xz-4.999.9beta/src/xz/main.c b/storage/tokudb/PerconaFT/third_party/xz-4.999.9beta/src/xz/main.c
index 17fe0aa85b9..40f48645220 100644
--- a/storage/tokudb/PerconaFT/third_party/xz-4.999.9beta/src/xz/main.c
+++ b/storage/tokudb/PerconaFT/third_party/xz-4.999.9beta/src/xz/main.c
@@ -212,7 +212,7 @@ main(int argc, char **argv)
// do other message handling related initializations.
message_init(argv[0]);
- // Set hardware-dependent default values. These can be overriden
+ // Set hardware-dependent default values. These can be overridden
// on the command line, thus this must be done before parse_args().
hardware_init();
diff --git a/storage/tokudb/PerconaFT/third_party/xz-4.999.9beta/src/xz/xz.1 b/storage/tokudb/PerconaFT/third_party/xz-4.999.9beta/src/xz/xz.1
index 2fef75f43a5..ad3d8463141 100644
--- a/storage/tokudb/PerconaFT/third_party/xz-4.999.9beta/src/xz/xz.1
+++ b/storage/tokudb/PerconaFT/third_party/xz-4.999.9beta/src/xz/xz.1
@@ -222,7 +222,7 @@ data has already been decompressed.
The absolute value of the active memory usage limit can be seen near
the bottom of the output of
.BR \-\-long\-help .
-The default limit can be overriden with
+The default limit can be overridden with
\fB\-\-memory=\fIlimit\fR.
.SH OPTIONS
.SS "Integer suffixes and special values"
diff --git a/storage/tokudb/PerconaFT/tools/CMakeLists.txt b/storage/tokudb/PerconaFT/tools/CMakeLists.txt
index af40a838b9a..dd54249ab40 100644
--- a/storage/tokudb/PerconaFT/tools/CMakeLists.txt
+++ b/storage/tokudb/PerconaFT/tools/CMakeLists.txt
@@ -12,7 +12,7 @@ foreach(tool ${tools})
(CMAKE_CXX_FLAGS_DEBUG MATCHES " -DENABLED_DEBUG_SYNC"))
target_link_libraries(${tool} sql)
endif()
- target_link_libraries(${tool} mysqlclient)
+ target_link_libraries(${tool} mysys)
endif ()
add_space_separated_property(TARGET ${tool} COMPILE_FLAGS -fvisibility=hidden)
@@ -21,5 +21,5 @@ endforeach(tool)
# link in math.h library just for this tool.
target_link_libraries(ftverify m)
-install(TARGETS tokuftdump DESTINATION ${INSTALL_BINDIR} COMPONENT Server)
-install(TARGETS tokuft_logprint DESTINATION ${INSTALL_BINDIR} COMPONENT Server)
+install(TARGETS tokuftdump DESTINATION ${INSTALL_BINDIR} COMPONENT tokudb-engine)
+install(TARGETS tokuft_logprint DESTINATION ${INSTALL_BINDIR} COMPONENT tokudb-engine)
diff --git a/storage/tokudb/ha_tokudb.cc b/storage/tokudb/ha_tokudb.cc
index 39931e747ce..39bc286a617 100644
--- a/storage/tokudb/ha_tokudb.cc
+++ b/storage/tokudb/ha_tokudb.cc
@@ -2087,7 +2087,7 @@ int ha_tokudb::write_frm_data(DB* db, DB_TXN* txn, const char* frm_name) {
size_t frm_len = 0;
int error = 0;
-#if 100000 <= MYSQL_VERSION_ID && MYSQL_VERSION_ID <= 100199
+#if 100000 <= MYSQL_VERSION_ID
error = table_share->read_frm_image((const uchar**)&frm_data,&frm_len);
if (error) { goto cleanup; }
#else
@@ -2129,7 +2129,7 @@ int ha_tokudb::verify_frm_data(const char* frm_name, DB_TXN* txn) {
HA_METADATA_KEY curr_key = hatoku_frm_data;
// get the frm data from MySQL
-#if 100000 <= MYSQL_VERSION_ID && MYSQL_VERSION_ID <= 100199
+#if 100000 <= MYSQL_VERSION_ID
error = table_share->read_frm_image((const uchar**)&mysql_frm_data,&mysql_frm_len);
if (error) {
goto cleanup;
@@ -4017,7 +4017,6 @@ int ha_tokudb::write_row(uchar * record) {
// some crap that needs to be done because MySQL does not properly abstract
// this work away from us, namely filling in auto increment and setting auto timestamp
//
- ha_statistic_increment(&SSV::ha_write_count);
#if MYSQL_VERSION_ID < 50600
if (table->timestamp_field_type & TIMESTAMP_AUTO_SET_ON_INSERT) {
table->timestamp_field->set_time();
@@ -4202,7 +4201,6 @@ int ha_tokudb::update_row(const uchar * old_row, uchar * new_row) {
memset((void *) &prim_row, 0, sizeof(prim_row));
memset((void *) &old_prim_row, 0, sizeof(old_prim_row));
- ha_statistic_increment(&SSV::ha_update_count);
#if MYSQL_VERSION_ID < 50600
if (table->timestamp_field_type & TIMESTAMP_AUTO_SET_ON_UPDATE) {
table->timestamp_field->set_time();
@@ -4370,8 +4368,6 @@ int ha_tokudb::delete_row(const uchar * record) {
uint curr_num_DBs;
tokudb_trx_data* trx = (tokudb_trx_data *) thd_get_ha_data(thd, tokudb_hton);
- ha_statistic_increment(&SSV::ha_delete_count);
-
//
// grab reader lock on numDBs_lock
//
@@ -4929,7 +4925,6 @@ int ha_tokudb::read_full_row(uchar * buf) {
//
int ha_tokudb::index_next_same(uchar* buf, const uchar* key, uint keylen) {
TOKUDB_HANDLER_DBUG_ENTER("");
- ha_statistic_increment(&SSV::ha_read_next_count);
DBT curr_key;
DBT found_key;
@@ -5018,7 +5013,6 @@ int ha_tokudb::index_read(
cursor->c_remove_restriction(cursor);
}
- ha_statistic_increment(&SSV::ha_read_key_count);
memset((void *) &row, 0, sizeof(row));
info.ha = this;
@@ -5670,7 +5664,6 @@ cleanup:
//
int ha_tokudb::index_next(uchar * buf) {
TOKUDB_HANDLER_DBUG_ENTER("");
- ha_statistic_increment(&SSV::ha_read_next_count);
int error = get_next(buf, 1, NULL, key_read);
TOKUDB_HANDLER_DBUG_RETURN(error);
}
@@ -5692,7 +5685,6 @@ int ha_tokudb::index_read_last(uchar * buf, const uchar * key, uint key_len) {
//
int ha_tokudb::index_prev(uchar * buf) {
TOKUDB_HANDLER_DBUG_ENTER("");
- ha_statistic_increment(&SSV::ha_read_prev_count);
int error = get_next(buf, -1, NULL, key_read);
TOKUDB_HANDLER_DBUG_RETURN(error);
}
@@ -5716,8 +5708,6 @@ int ha_tokudb::index_first(uchar * buf) {
tokudb_trx_data* trx = (tokudb_trx_data *) thd_get_ha_data(thd, tokudb_hton);;
HANDLE_INVALID_CURSOR();
- ha_statistic_increment(&SSV::ha_read_first_count);
-
info.ha = this;
info.buf = buf;
info.keynr = tokudb_active_index;
@@ -5760,8 +5750,6 @@ int ha_tokudb::index_last(uchar * buf) {
tokudb_trx_data* trx = (tokudb_trx_data *) thd_get_ha_data(thd, tokudb_hton);;
HANDLE_INVALID_CURSOR();
- ha_statistic_increment(&SSV::ha_read_last_count);
-
info.ha = this;
info.buf = buf;
info.keynr = tokudb_active_index;
@@ -5841,7 +5829,6 @@ int ha_tokudb::rnd_end() {
//
int ha_tokudb::rnd_next(uchar * buf) {
TOKUDB_HANDLER_DBUG_ENTER("");
- ha_statistic_increment(&SSV::ha_read_rnd_next_count);
int error = get_next(buf, 1, NULL, false);
TOKUDB_HANDLER_DBUG_RETURN(error);
}
@@ -5947,7 +5934,6 @@ int ha_tokudb::rnd_pos(uchar * buf, uchar * pos) {
DBT* key = get_pos(&db_pos, pos);
unpack_entire_row = true;
- ha_statistic_increment(&SSV::ha_read_rnd_count);
tokudb_active_index = MAX_KEY;
THD *thd = ha_thd();
diff --git a/storage/tokudb/ha_tokudb_alter_56.cc b/storage/tokudb/ha_tokudb_alter_56.cc
index b579d00f67b..46da873d750 100644
--- a/storage/tokudb/ha_tokudb_alter_56.cc
+++ b/storage/tokudb/ha_tokudb_alter_56.cc
@@ -181,7 +181,7 @@ static ulong fix_handler_flags(
ulong handler_flags = ha_alter_info->handler_flags;
-#if 100000 <= MYSQL_VERSION_ID && MYSQL_VERSION_ID <= 100199
+#if 100000 <= MYSQL_VERSION_ID
// This is automatically supported, hide the flag from later checks
handler_flags &= ~Alter_inplace_info::ALTER_PARTITIONED;
#endif
@@ -212,11 +212,11 @@ static ulong fix_handler_flags(
handler_flags &= ~Alter_inplace_info::TOKU_ALTER_RENAME;
}
- // ALTER_COLUMN_TYPE may be set when no columns have been changed,
+ // ALTER_STORED_COLUMN_TYPE may be set when no columns have been changed,
// so turn off the flag
- if (handler_flags & Alter_inplace_info::ALTER_COLUMN_TYPE) {
+ if (handler_flags & Alter_inplace_info::ALTER_STORED_COLUMN_TYPE) {
if (all_fields_are_same_type(table, altered_table)) {
- handler_flags &= ~Alter_inplace_info::ALTER_COLUMN_TYPE;
+ handler_flags &= ~Alter_inplace_info::ALTER_STORED_COLUMN_TYPE;
}
}
@@ -388,7 +388,7 @@ enum_alter_inplace_result ha_tokudb::check_if_supported_inplace_alter(
// but let's do some more checks
// we will only allow an hcr if there are no changes
- // in column positions (ALTER_COLUMN_ORDER is not set)
+ // in column positions (ALTER_STORED_COLUMN_ORDER is not set)
// now need to verify that one and only one column
// has changed only its name. If we find anything to
@@ -399,7 +399,7 @@ enum_alter_inplace_result ha_tokudb::check_if_supported_inplace_alter(
table,
altered_table,
(ctx->handler_flags &
- Alter_inplace_info::ALTER_COLUMN_ORDER) != 0);
+ Alter_inplace_info::ALTER_STORED_COLUMN_ORDER) != 0);
if (cr_supported)
result = HA_ALTER_INPLACE_EXCLUSIVE_LOCK;
}
@@ -407,7 +407,7 @@ enum_alter_inplace_result ha_tokudb::check_if_supported_inplace_alter(
only_flags(
ctx->handler_flags,
Alter_inplace_info::ADD_COLUMN +
- Alter_inplace_info::ALTER_COLUMN_ORDER) &&
+ Alter_inplace_info::ALTER_STORED_COLUMN_ORDER) &&
setup_kc_info(altered_table, ctx->altered_table_kc_info) == 0) {
// add column
@@ -437,7 +437,7 @@ enum_alter_inplace_result ha_tokudb::check_if_supported_inplace_alter(
only_flags(
ctx->handler_flags,
Alter_inplace_info::DROP_COLUMN +
- Alter_inplace_info::ALTER_COLUMN_ORDER) &&
+ Alter_inplace_info::ALTER_STORED_COLUMN_ORDER) &&
setup_kc_info(altered_table, ctx->altered_table_kc_info) == 0) {
// drop column
@@ -479,10 +479,10 @@ enum_alter_inplace_result ha_tokudb::check_if_supported_inplace_alter(
if (change_length_is_supported(table, altered_table, ctx)) {
result = HA_ALTER_INPLACE_EXCLUSIVE_LOCK;
}
- } else if ((ctx->handler_flags & Alter_inplace_info::ALTER_COLUMN_TYPE) &&
+ } else if ((ctx->handler_flags & Alter_inplace_info::ALTER_STORED_COLUMN_TYPE) &&
only_flags(
ctx->handler_flags,
- Alter_inplace_info::ALTER_COLUMN_TYPE +
+ Alter_inplace_info::ALTER_STORED_COLUMN_TYPE +
Alter_inplace_info::ALTER_COLUMN_DEFAULT) &&
table->s->fields == altered_table->s->fields &&
find_changed_fields(
@@ -924,14 +924,14 @@ bool ha_tokudb::commit_inplace_alter_table(TOKUDB_UNUSED(TABLE* altered_table),
if (commit) {
#if (50613 <= MYSQL_VERSION_ID && MYSQL_VERSION_ID <= 50699) || \
(50700 <= MYSQL_VERSION_ID && MYSQL_VERSION_ID <= 50799) || \
- (100000 <= MYSQL_VERSION_ID && MYSQL_VERSION_ID <= 100199)
+ (100000 <= MYSQL_VERSION_ID)
if (ha_alter_info->group_commit_ctx) {
ha_alter_info->group_commit_ctx = NULL;
}
#endif
#if defined(TOKU_INCLUDE_WRITE_FRM_DATA) && TOKU_INCLUDE_WRITE_FRM_DATA
#if (50500 <= MYSQL_VERSION_ID && MYSQL_VERSION_ID <= 50599) || \
- (100000 <= MYSQL_VERSION_ID && MYSQL_VERSION_ID <= 100199)
+ (100000 <= MYSQL_VERSION_ID)
#if defined(WITH_PARTITION_STORAGE_ENGINE) && WITH_PARTITION_STORAGE_ENGINE
if (TOKU_PARTITION_WRITE_FRM_DATA || altered_table->part_info == NULL) {
#else
@@ -1548,7 +1548,7 @@ static bool change_field_type_is_supported(Field* old_field,
return false;
} else if (old_type == MYSQL_TYPE_VARCHAR) {
// varchar(X) -> varchar(Y) and varbinary(X) -> varbinary(Y) expansion
- // where X < 256 <= Y the ALTER_COLUMN_TYPE handler flag is set for
+ // where X < 256 <= Y the ALTER_STORED_COLUMN_TYPE handler flag is set for
// these cases
return change_varchar_length_is_supported(old_field, new_field, ctx);
} else if (old_type == MYSQL_TYPE_BLOB && new_type == MYSQL_TYPE_BLOB) {
diff --git a/storage/tokudb/hatoku_cmp.cc b/storage/tokudb/hatoku_cmp.cc
index 48d5f96d9ef..06eecf6381d 100644
--- a/storage/tokudb/hatoku_cmp.cc
+++ b/storage/tokudb/hatoku_cmp.cc
@@ -54,7 +54,7 @@ static bool field_valid_for_tokudb_table(Field* field) {
case MYSQL_TYPE_FLOAT:
#if (50600 <= MYSQL_VERSION_ID && MYSQL_VERSION_ID <= 50699) || \
(50700 <= MYSQL_VERSION_ID && MYSQL_VERSION_ID <= 50799) || \
- (100000 <= MYSQL_VERSION_ID && MYSQL_VERSION_ID <= 100199)
+ (100000 <= MYSQL_VERSION_ID)
case MYSQL_TYPE_DATETIME2:
case MYSQL_TYPE_TIMESTAMP2:
case MYSQL_TYPE_TIME2:
@@ -200,7 +200,7 @@ static TOKU_TYPE mysql_to_toku_type (Field* field) {
goto exit;
#if (50600 <= MYSQL_VERSION_ID && MYSQL_VERSION_ID <= 50699) || \
(50700 <= MYSQL_VERSION_ID && MYSQL_VERSION_ID <= 50799) || \
- (100000 <= MYSQL_VERSION_ID && MYSQL_VERSION_ID <= 100199)
+ (100000 <= MYSQL_VERSION_ID)
case MYSQL_TYPE_DATETIME2:
case MYSQL_TYPE_TIMESTAMP2:
case MYSQL_TYPE_TIME2:
@@ -918,8 +918,7 @@ static inline int cmp_toku_string(
a_buf,
a_num_bytes,
b_buf,
- b_num_bytes,
- 0
+ b_num_bytes
);
return ret_val;
}
@@ -3109,7 +3108,7 @@ static bool fields_are_same_type(Field* a, Field* b) {
case MYSQL_TYPE_TIMESTAMP:
#if (50600 <= MYSQL_VERSION_ID && MYSQL_VERSION_ID <= 50699) || \
(50700 <= MYSQL_VERSION_ID && MYSQL_VERSION_ID <= 50799) || \
- (100000 <= MYSQL_VERSION_ID && MYSQL_VERSION_ID <= 100199)
+ (100000 <= MYSQL_VERSION_ID)
case MYSQL_TYPE_DATETIME2:
case MYSQL_TYPE_TIMESTAMP2:
case MYSQL_TYPE_TIME2:
diff --git a/storage/tokudb/hatoku_defines.h b/storage/tokudb/hatoku_defines.h
index d71ed6e6688..ce3d5202915 100644
--- a/storage/tokudb/hatoku_defines.h
+++ b/storage/tokudb/hatoku_defines.h
@@ -87,7 +87,7 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
// extreme cases though where one side (WRITE) is supported but perhaps
// 'DISCOVERY' may not be, thus the need for individual indicators.
-#if 100000 <= MYSQL_VERSION_ID && MYSQL_VERSION_ID <= 100199
+#if 100000 <= MYSQL_VERSION_ID
// mariadb 10.0
#define TOKU_USE_DB_TYPE_TOKUDB 1
#define TOKU_INCLUDE_ALTER_56 1
diff --git a/storage/tokudb/hatoku_hton.cc b/storage/tokudb/hatoku_hton.cc
index 24e5113e72d..cbb6bb57657 100644
--- a/storage/tokudb/hatoku_hton.cc
+++ b/storage/tokudb/hatoku_hton.cc
@@ -90,7 +90,7 @@ static int tokudb_release_savepoint(
handlerton* hton,
THD* thd,
void* savepoint);
-#if 100000 <= MYSQL_VERSION_ID && MYSQL_VERSION_ID <= 100199
+#if 100000 <= MYSQL_VERSION_ID
static int tokudb_discover_table(handlerton *hton, THD* thd, TABLE_SHARE *ts);
static int tokudb_discover_table_existence(
handlerton* hton,
@@ -381,7 +381,7 @@ static int tokudb_init_func(void *p) {
tokudb_hton->savepoint_rollback = tokudb_rollback_to_savepoint;
tokudb_hton->savepoint_release = tokudb_release_savepoint;
-#if 100000 <= MYSQL_VERSION_ID && MYSQL_VERSION_ID <= 100199
+#if 100000 <= MYSQL_VERSION_ID
tokudb_hton->discover_table = tokudb_discover_table;
tokudb_hton->discover_table_existence = tokudb_discover_table_existence;
#else
@@ -1202,7 +1202,7 @@ static int tokudb_release_savepoint(
TOKUDB_DBUG_RETURN(error);
}
-#if 100000 <= MYSQL_VERSION_ID && MYSQL_VERSION_ID <= 100199
+#if 100000 <= MYSQL_VERSION_ID
static int tokudb_discover_table(handlerton *hton, THD* thd, TABLE_SHARE *ts) {
uchar *frmblob = 0;
size_t frmlen;
@@ -1283,7 +1283,7 @@ static int tokudb_discover3(TOKUDB_UNUSED(handlerton* hton),
DBT value = {};
bool do_commit = false;
-#if 100000 <= MYSQL_VERSION_ID && MYSQL_VERSION_ID <= 100199
+#if 100000 <= MYSQL_VERSION_ID
tokudb_trx_data* trx = (tokudb_trx_data*)thd_get_ha_data(thd, tokudb_hton);
if (thd_sql_command(thd) == SQLCOM_CREATE_TABLE &&
trx &&
diff --git a/storage/tokudb/man/CMakeLists.txt b/storage/tokudb/man/CMakeLists.txt
new file mode 100644
index 00000000000..192d8117119
--- /dev/null
+++ b/storage/tokudb/man/CMakeLists.txt
@@ -0,0 +1,2 @@
+SET(MAN1_TOKUDB tokuftdump.1 tokuft_logprint.1)
+INSTALL(FILES ${MAN1_TOKUDB} DESTINATION ${INSTALL_MANDIR}/man1 COMPONENT tokudb-engine)
diff --git a/storage/tokudb/man/tokuft_logprint.1 b/storage/tokudb/man/tokuft_logprint.1
new file mode 100644
index 00000000000..d6f69e0aeeb
--- /dev/null
+++ b/storage/tokudb/man/tokuft_logprint.1
@@ -0,0 +1,16 @@
+'\" t
+.\"
+.TH "\FBTOKUFT_LOGPRINT\FR" "1" "29 March 2019" "MariaDB 10\&.2" "MariaDB Database System"
+.\" -----------------------------------------------------------------
+.\" * set default formatting
+.\" -----------------------------------------------------------------
+.\" disable hyphenation
+.nh
+.\" disable justification (adjust text to left margin only)
+.ad l
+.SH NAME
+tokuft_logprint \- Dump the log from stdin to stdout
+.SH DESCRIPTION
+Use: Dump the log from stdin to stdout\. Use \fBtokuft_logprint \-\-help\fR for details on usage\.
+.PP
+For more information, please refer to the MariaDB Knowledge Base, available online at https://mariadb.com/kb/
diff --git a/storage/tokudb/man/tokuftdump.1 b/storage/tokudb/man/tokuftdump.1
new file mode 100644
index 00000000000..79129a1a2e1
--- /dev/null
+++ b/storage/tokudb/man/tokuftdump.1
@@ -0,0 +1,237 @@
+'\" t
+.\"
+.TH "\FBTOKUFTDUMP\FR" "1" "3 April 2017" "MariaDB 10\&.2" "MariaDB Database System"
+.\" -----------------------------------------------------------------
+.\" * set default formatting
+.\" -----------------------------------------------------------------
+.\" disable hyphenation
+.nh
+.\" disable justification (adjust text to left margin only)
+.ad l
+.\" -----------------------------------------------------------------
+.\" * MAIN CONTENT STARTS HERE *
+.\" -----------------------------------------------------------------
+.\" tokuftdump
+.\" upgrading MySQL
+.SH "NAME"
+tokuftdump \- look into the fractal tree file
+.SH "SYNOPSIS"
+.HP \w'\fBtokuftdump\ [\fR\fB\fIoptions\fR\fR\fB]\fR\ 'u
+\fBtokuftdump [\fR\fB\fIoptions\fR\fR\fB]\fR
+.SH "DESCRIPTION"
+.PP
+\fBtokuftdump\fR
+Investigates and diagnoses the fractal tree\&.
+.PP
+\fBtokuftdump\fR
+supports the following options for processing option files\&.
+.sp
+.RS 4
+.ie n \{\
+\h'-04'\(bu\h'+03'\c
+.\}
+.el \{\
+.sp -1
+.IP \(bu 2.3
+.\}
+.\" tokuftdump: interactive option
+.\" interactive option: tokuftdump
+\fB\-\-interactive\fR
+.sp
+Interactive\&.
+.RE
+.sp
+.RS 4
+.ie n \{\
+\h'-04'\(bu\h'+03'\c
+.\}
+.el \{\
+.sp -1
+.IP \(bu 2.3
+.\}
+.\" tokuftdump: support option
+.\" support option: tokuftdump
+\fB\-\-support \fI/path/to/fractal-tree/file\fR
+.sp
+An interactive way to see what messages and/or switch between FTs\&.
+.RE
+.sp
+.RS 4
+.ie n \{\
+\h'-04'\(bu\h'+03'\c
+.\}
+.el \{\
+.sp -1
+.IP \(bu 2.3
+.\}
+.\" tokuftdump: json option
+.\" json option: tokuftdump
+\fB\-\-json \fI/path/to/fractal-tree/file [output_json_file]\fR
+.sp
+If the output json file is left empty, FT\&.json will be created automatically\&.
+.RE
+.sp
+.RS 4
+.ie n \{\
+\h'-04'\(bu\h'+03'\c
+.\}
+.el \{\
+.sp -1
+.IP \(bu 2.3
+.\}
+.\" tokuftdump: nodata option
+.\" nodata option: tokuftdump
+\fB\-\-nodata\fR
+.sp
+Nodata\&.
+.RE
+.sp
+.RS 4
+.ie n \{\
+\h'-04'\(bu\h'+03'\c
+.\}
+.el \{\
+.sp -1
+.IP \(bu 2.3
+.\}
+.\" tokuftdump: dumpdata option
+.\" dumpdata option: tokuftdump
+\fB\-\-dumpdata = \fR\fB\fI0|1\fR\fR
+.sp
+Dumpdata\&.
+.RE
+.sp
+.RS 4
+.ie n \{\
+\h'-04'\(bu\h'+03'\c
+.\}
+.el \{\
+.sp -1
+.IP \(bu 2.3
+.\}
+.\" tokuftdump: header option
+.\" header option: tokuftdump
+\fB\-\-header\fR
+.sp
+Header\&.
+.RE
+.sp
+.RS 4
+.ie n \{\
+\h'-04'\(bu\h'+03'\c
+.\}
+.el \{\
+.sp -1
+.IP \(bu 2.3
+.\}
+.\" tokuftdump: rootnode option
+.\" rootnode option: tokuftdump
+\fB\-\-rootnode\fR
+.sp
+Rootnode\&.
+.RE
+.sp
+.RS 4
+.ie n \{\
+\h'-04'\(bu\h'+03'\c
+.\}
+.el \{\
+.sp -1
+.IP \(bu 2.3
+.\}
+.\" tokuftdump: node option
+.\" node option: tokuftdump
+\fB\-\-node \fIN\fR
+.sp
+Node\&.
+.RE
+.sp
+.RS 4
+.ie n \{\
+\h'-04'\(bu\h'+03'\c
+.\}
+.el \{\
+.sp -1
+.IP \(bu 2.3
+.\}
+.\" tokuftdump: fragmentation option
+.\" fragmentation option: tokuftdump
+\fB\-\-fragmentation\fR
+.sp
+Fragmentation\&.
+.RE
+.sp
+.RS 4
+.ie n \{\
+\h'-04'\(bu\h'+03'\c
+.\}
+.el \{\
+.sp -1
+.IP \(bu 2.3
+.\}
+.\" tokuftdump: garbage option
+.\" garbage option: tokuftdump
+\fB\-\-garbage\fR
+.sp
+Garbage\&.
+.RE
+.sp
+.RS 4
+.ie n \{\
+\h'-04'\(bu\h'+03'\c
+.\}
+.el \{\
+.sp -1
+.IP \(bu 2.3
+.\}
+.\" tokuftdump: tsv option
+.\" tsv option: tokuftdump
+\fB\-\-tsv\fR
+.sp
+TSV\&.
+.RE
+.sp
+.RS 4
+.ie n \{\
+\h'-04'\(bu\h'+03'\c
+.\}
+.el \{\
+.sp -1
+.IP \(bu 2.3
+.\}
+.\" tokuftdump: translation-table option
+.\" translation-table option: tokuftdump
+\fB\-\-translation\-table\fR
+.sp
+Translation table\&.
+.RE
+.sp
+.RS 4
+.ie n \{\
+\h'-04'\(bu\h'+03'\c
+.\}
+.el \{\
+.sp -1
+.IP \(bu 2.3
+.\}
+.\" tokuftdump: summary option
+.\" summary option: tokuftdump
+\fB\-\-summary\fR
+.sp
+Provide summary info\&.
+.RE
+.SH "COPYRIGHT"
+.br
+.PP
+Copyright 2016 MariaDB Foundation
+.PP
+This documentation is free software; you can redistribute it and/or modify it only under the terms of the GNU General Public License as published by the Free Software Foundation; version 2 of the License.
+.PP
+This documentation is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+.PP
+You should have received a copy of the GNU General Public License along with the program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA or see http://www.gnu.org/licenses/.
+.sp
+.SH "SEE ALSO"
+For more information, please refer to the MariaDB Knowledge Base, available online at https://mariadb.com/kb/
+.SH AUTHOR
+MariaDB Foundation (http://www.mariadb.org/).
diff --git a/storage/tokudb/mysql-test/rpl/r/rpl_deadlock_tokudb.result b/storage/tokudb/mysql-test/rpl/r/rpl_deadlock_tokudb.result
index 0e9b750f77d..2348fd0d9d4 100644
--- a/storage/tokudb/mysql-test/rpl/r/rpl_deadlock_tokudb.result
+++ b/storage/tokudb/mysql-test/rpl/r/rpl_deadlock_tokudb.result
@@ -1,9 +1,11 @@
include/master-slave.inc
[connection master]
*** Prepare tables and data ***
+connection master;
CREATE TABLE t1 (a INT NOT NULL, KEY(a)) ENGINE=TokuDB;
CREATE TABLE t2 (a INT) ENGINE=TokuDB;
CREATE TABLE t3 (a INT NOT NULL, KEY(a)) ENGINE=TokuDB;
+connection slave;
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
@@ -25,6 +27,7 @@ SHOW VARIABLES LIKE 'slave_transaction_retries';
Variable_name Value
slave_transaction_retries 2
include/stop_slave.inc
+connection master;
BEGIN;
INSERT INTO t1 VALUES (1);
INSERT INTO t2 VALUES (2), (2), (2), (2), (2), (2), (2), (2), (2), (2);
@@ -32,6 +35,7 @@ INSERT INTO t3 VALUES (3);
COMMIT;
*** Test deadlock ***
+connection slave;
BEGIN;
SELECT * FROM t1 FOR UPDATE;
a
@@ -49,6 +53,7 @@ a
include/check_slave_is_running.inc
*** Test lock wait timeout ***
+connection slave;
include/stop_slave.inc
DELETE FROM t2;
CHANGE MASTER TO MASTER_LOG_POS=<master_pos_begin>;
@@ -74,6 +79,7 @@ a
include/check_slave_is_running.inc
*** Test lock wait timeout and purged relay logs ***
+connection slave;
SET @my_max_relay_log_size= @@global.max_relay_log_size;
SET global max_relay_log_size=0;
Warnings:
@@ -106,7 +112,9 @@ a
include/check_slave_is_running.inc
*** Clean up ***
+connection master;
DROP TABLE t1,t2,t3;
+connection slave;
SET global max_relay_log_size= @my_max_relay_log_size;
End of 5.1 tests
include/rpl_end.inc
diff --git a/storage/tokudb/mysql-test/rpl/r/rpl_extra_col_master_tokudb.result b/storage/tokudb/mysql-test/rpl/r/rpl_extra_col_master_tokudb.result
index 929f2c54bf1..6982078d2b8 100644
--- a/storage/tokudb/mysql-test/rpl/r/rpl_extra_col_master_tokudb.result
+++ b/storage/tokudb/mysql-test/rpl/r/rpl_extra_col_master_tokudb.result
@@ -12,6 +12,7 @@ include/master-slave.inc
***** Testing more columns on the Master *****
+connection master;
CREATE TABLE t1 (f1 INT, f2 INT, f3 INT PRIMARY KEY, f4 CHAR(20),
/* extra */
f5 FLOAT DEFAULT '2.00',
@@ -24,10 +25,12 @@ f11 VARBINARY(64))ENGINE=TokuDB;
* Alter Table on Slave and drop columns f5 through f11 *
+connection slave;
alter table t1 drop f5, drop f6, drop f7, drop f8, drop f9, drop f10, drop f11;
* Insert data in Master then update and delete some rows*
+connection master;
* Select count and 20 rows from Master *
SELECT COUNT(*) FROM t1;
@@ -57,6 +60,7 @@ f1 f2 f3 f4 f5 f6 f7 f8 f9 hex(f10) hex(f11)
27 27 27 next 2 kaks 2 got stolen from the paradise very fat blob 1555 123456
29 29 29 second 2 kaks 2 got stolen from the paradise very fat blob 1555 123456
30 30 30 next 2 kaks 2 got stolen from the paradise very fat blob 1555 123456
+connection slave;
* Select count and 20 rows from Slave *
@@ -90,6 +94,7 @@ include/check_slave_is_running.inc
***** Testing Altering table def scenario *****
+connection master;
CREATE TABLE t2 (f1 INT, f2 INT, f3 INT PRIMARY KEY, f4 CHAR(20),
/* extra */
f5 DOUBLE DEFAULT '2.00',
@@ -160,6 +165,7 @@ f35 VARCHAR(257)
** Alter tables on slave and drop columns **
+connection slave;
alter table t2 drop f5, drop f6, drop f7, drop f8, drop f9, drop f10, drop f11, drop
f12;
alter table t3 drop f5, drop f6, drop f8, drop f10, drop f11, drop f12;
@@ -172,6 +178,7 @@ drop f26, drop f27, drop f28, drop f29, drop f30, drop f31, drop f32,
drop f33, drop f34, drop f35;
** Insert Data into Master **
+connection master;
INSERT into t2 set f1=1, f2=1, f3=1, f4='first', f8='f8: medium size blob', f10='f10:
some var char';
INSERT into t2 values (2, 2, 2, 'second',
@@ -184,11 +191,11 @@ INSERT into t3 set f1=1, f2=1, f3=1, f4='first', f10='f10: some var char';
INSERT into t4 set f1=1, f2=1, f3=1, f4='first', f7='f7: medium size blob', f10='f10:
binary data';
INSERT into t31 set f1=1, f2=1, f3=1, f4='first';
-INSERT into t31 set f1=1, f2=1, f3=2, f4='second',
+insert ignore into t31 set f1=1, f2=1, f3=2, f4='second',
f9=2.2, f10='seven samurai', f28=222.222, f35='222';
Warnings:
-Warning 1366 Incorrect integer value: 'seven samurai' for column 'f10' at row 1
-INSERT into t31 values (1, 1, 3, 'third',
+Warning 1366 Incorrect integer value: 'seven samurai' for column `test`.`t31`.`f10` at row 1
+insert ignore into t31 values (1, 1, 3, 'third',
/* f5 BIGINT, */ 333333333333333333333333,
/* f6 BLOB, */ '3333333333333333333333',
/* f7 DATE, */ '2007-07-18',
@@ -224,7 +231,7 @@ INSERT into t31 values (1, 1, 3, 'third',
Warnings:
Warning 1264 Out of range value for column 'f5' at row 1
Warning 1264 Out of range value for column 'f24' at row 1
-INSERT into t31 values (1, 1, 4, 'fourth',
+insert ignore into t31 values (1, 1, 4, 'fourth',
/* f5 BIGINT, */ 333333333333333333333333,
/* f6 BLOB, */ '3333333333333333333333',
/* f7 DATE, */ '2007-07-18',
@@ -333,6 +340,7 @@ Warning 1264 Out of range value for column 'f24' at row 3
** Sync slave with master **
** Do selects from tables **
+connection slave;
select * from t1 order by f3;
f1 f2 f3 f4
2 2 2 second
@@ -394,13 +402,14 @@ f1 f2 f3 f4
1 1 4 fourth
1 1 5 fifth
1 1 6 sixth
+connection master;
** Do updates master **
update t31 set f5=555555555555555 where f3=6;
update t31 set f2=2 where f3=2;
update t31 set f1=NULL where f3=1;
-update t31 set f3=NULL, f27=NULL, f35='f35 new value' where f3=3;
+update ignore t31 set f3=NULL, f27=NULL, f35='f35 new value' where f3=3;
Warnings:
Warning 1048 Column 'f3' cannot be null
@@ -411,12 +420,14 @@ delete from t2;
delete from t3;
delete from t4;
delete from t31;
+connection slave;
select * from t31;
f1 f2 f3 f4
** Check slave status **
include/check_slave_is_running.inc
+connection master;
****************************************
* columns in master at middle of table *
@@ -434,14 +445,17 @@ CREATE TABLE t10 (a INT PRIMARY KEY, b BLOB, c CHAR(5)
** Connect to master and create table **
+connection master;
CREATE TABLE t10 (a INT KEY, b BLOB, f DOUBLE DEFAULT '233',
c CHAR(5), e INT DEFAULT '1')ENGINE=TokuDB;
RESET MASTER;
*** Start Slave ***
+connection slave;
START SLAVE;
*** Master Data Insert ***
+connection master;
set @b1 = 'b1b1b1b1';
set @b1 = concat(@b1,@b1);
INSERT INTO t10 () VALUES(1,@b1,DEFAULT,'Kyle',DEFAULT),
@@ -452,11 +466,14 @@ INSERT INTO t10 () VALUES(1,@b1,DEFAULT,'Kyle',DEFAULT),
*** Expect slave to fail with Error 1677 ***
********************************************
+connection slave;
include/wait_for_slave_sql_error_and_skip.inc [errno=1677]
-Last_SQL_Error = 'Column 2 of table 'test.t10' cannot be converted from type 'double' to type 'char(5)''
+Last_SQL_Error = 'Column 2 of table 'test.t10' cannot be converted from type 'double' to type 'char(5 octets) character set latin1''
*** Drop t10 ***
+connection master;
DROP TABLE t10;
+connection slave;
*********************************************
* More columns in master at middle of table *
@@ -470,14 +487,17 @@ CREATE TABLE t11 (a INT PRIMARY KEY, b BLOB, c VARCHAR(254)
) ENGINE=TokuDB;
*** Create t11 on Master ***
+connection master;
CREATE TABLE t11 (a INT KEY, b BLOB, f TEXT,
c CHAR(5) DEFAULT 'test', e INT DEFAULT '1')ENGINE=TokuDB;
RESET MASTER;
*** Start Slave ***
+connection slave;
START SLAVE;
*** Master Data Insert ***
+connection master;
set @b1 = 'b1b1b1b1';
set @b1 = concat(@b1,@b1);
INSERT INTO t11 () VALUES(1,@b1,'Testing is fun','Kyle',DEFAULT),
@@ -488,11 +508,14 @@ INSERT INTO t11 () VALUES(1,@b1,'Testing is fun','Kyle',DEFAULT),
*** Expect slave to fail with Error 1677 ***
********************************************
+connection slave;
include/wait_for_slave_sql_error_and_skip.inc [errno=1677]
-Last_SQL_Error = 'Column 2 of table 'test.t11' cannot be converted from type 'tinyblob' to type 'varchar(254)''
+Last_SQL_Error = 'Column 2 of table 'test.t11' cannot be converted from type 'blob' to type 'varchar(254 octets) character set latin1''
*** Drop t11 ***
+connection master;
DROP TABLE t11;
+connection slave;
*********************************************
* More columns in master at middle of table *
@@ -506,14 +529,17 @@ CREATE TABLE t12 (a INT PRIMARY KEY, b BLOB, c BLOB
) ENGINE=TokuDB;
*** Create t12 on Master ***
+connection master;
CREATE TABLE t12 (a INT KEY, b BLOB, f TEXT,
c CHAR(5) DEFAULT 'test', e INT DEFAULT '1')ENGINE=TokuDB;
RESET MASTER;
*** Start Slave ***
+connection slave;
START SLAVE;
*** Master Data Insert ***
+connection master;
set @b1 = 'b1b1b1b1';
set @b1 = concat(@b1,@b1);
INSERT INTO t12 () VALUES(1,@b1,'Kyle',DEFAULT,DEFAULT),
@@ -527,6 +553,7 @@ a hex(b) f c e
3 62316231623162316231623162316231 QA test 1
*** Select on Slave ***
+connection slave;
SELECT a,hex(b),c FROM t12 ORDER BY a;
a hex(b) c
1 62316231623162316231623162316231 Kyle
@@ -534,7 +561,9 @@ a hex(b) c
3 62316231623162316231623162316231 QA
*** Drop t12 ***
+connection master;
DROP TABLE t12;
+connection slave;
****************************************************
* - Alter Master adding columns at middle of table *
@@ -549,6 +578,7 @@ CREATE TABLE t14 (c1 INT PRIMARY KEY, c4 BLOB, c5 CHAR(5)
) ENGINE=TokuDB;
*** Create t14 on Master ***
+connection master;
CREATE TABLE t14 (c1 INT KEY, c4 BLOB, c5 CHAR(5),
c6 INT DEFAULT '1',
c7 TIMESTAMP NULL DEFAULT CURRENT_TIMESTAMP
@@ -556,9 +586,11 @@ c7 TIMESTAMP NULL DEFAULT CURRENT_TIMESTAMP
RESET MASTER;
*** Start Slave ***
+connection slave;
START SLAVE;
*** Master Data Insert ***
+connection master;
ALTER TABLE t14 ADD COLUMN c2 DECIMAL(8,2) AFTER c1;
ALTER TABLE t14 ADD COLUMN c3 TEXT AFTER c2;
@@ -575,6 +607,7 @@ c1 c2 c3 hex(c4) c5 c6 c7
3 3.00 If is does not, I will open a bug 62316231623162316231623162316231 QA 1 CURRENT_TIMESTAMP
*** Select on Slave ****
+connection slave;
SELECT c1,c2,c3,hex(c4),c5 FROM t14 ORDER BY c1;
c1 c2 c3 hex(c4) c5
1 1.00 Replication Testing Extra Col 62316231623162316231623162316231 Kyle
@@ -587,6 +620,7 @@ c1 c2 c3 hex(c4) c5
****************************************************
*** connect to master and drop columns ***
+connection master;
ALTER TABLE t14 DROP COLUMN c2;
ALTER TABLE t14 DROP COLUMN c7;
@@ -600,15 +634,18 @@ c1 c3 hex(c4) c5 c6
************
* Bug30415 *
************
+connection slave;
include/wait_for_slave_sql_error.inc [errno=1091]
-Last_SQL_Error = 'Error 'Can't DROP 'c7'; check that column/key exists' on query. Default database: 'test'. Query: 'ALTER TABLE t14 DROP COLUMN c7''
+Last_SQL_Error = 'Error 'Can't DROP COLUMN `c7`; check that it exists' on query. Default database: 'test'. Query: 'ALTER TABLE t14 DROP COLUMN c7''
STOP SLAVE;
RESET SLAVE;
*** Drop t14 ***
DROP TABLE t14;
+connection master;
DROP TABLE t14;
RESET MASTER;
+connection slave;
START SLAVE;
*************************************************
@@ -623,6 +660,7 @@ CREATE TABLE t15 (c1 INT PRIMARY KEY, c4 BLOB, c5 CHAR(5)
) ENGINE=TokuDB;
*** Create t15 on Master ***
+connection master;
CREATE TABLE t15 (c1 INT KEY, c4 BLOB, c5 CHAR(5),
c6 INT DEFAULT '1',
c7 TIMESTAMP NULL DEFAULT CURRENT_TIMESTAMP
@@ -630,9 +668,11 @@ c7 TIMESTAMP NULL DEFAULT CURRENT_TIMESTAMP
RESET MASTER;
*** Start Slave ***
+connection slave;
START SLAVE;
*** Master Data Insert ***
+connection master;
ALTER TABLE t15 ADD COLUMN c2 DECIMAL(8,2) AFTER c7;
set @b1 = 'b1b1b1b1';
set @b1 = concat(@b1,@b1);
@@ -649,6 +689,7 @@ c1 hex(c4) c5 c6 c7 c2
*** Expect slave to fail with Error 1054 ***
********************************************
+connection slave;
include/wait_for_slave_sql_error.inc [errno=1054]
Last_SQL_Error = 'Error 'Unknown column 'c7' in 't15'' on query. Default database: 'test'. Query: 'ALTER TABLE t15 ADD COLUMN c2 DECIMAL(8,2) AFTER c7''
STOP SLAVE;
@@ -656,8 +697,10 @@ RESET SLAVE;
*** Drop t15 ***
DROP TABLE t15;
+connection master;
DROP TABLE t15;
RESET MASTER;
+connection slave;
START SLAVE;
************************************************
@@ -672,6 +715,7 @@ CREATE TABLE t16 (c1 INT PRIMARY KEY, c4 BLOB, c5 CHAR(5)
) ENGINE=TokuDB;
*** Create t16 on Master ***
+connection master;
CREATE TABLE t16 (c1 INT KEY, c4 BLOB, c5 CHAR(5),
c6 INT DEFAULT '1',
c7 TIMESTAMP NULL DEFAULT CURRENT_TIMESTAMP
@@ -679,9 +723,11 @@ c7 TIMESTAMP NULL DEFAULT CURRENT_TIMESTAMP
RESET MASTER;
*** Start Slave ***
+connection slave;
START SLAVE;
*** Master Create Index and Data Insert ***
+connection master;
CREATE INDEX part_of_c6 ON t16 (c6);
set @b1 = 'b1b1b1b1';
set @b1 = concat(@b1,@b1);
@@ -698,6 +744,7 @@ c1 hex(c4) c5 c6 c7
*** BUG 30434 ***
*****************
+connection slave;
include/wait_for_slave_sql_error.inc [errno=1072]
Last_SQL_Error = 'Error 'Key column 'c6' doesn't exist in table' on query. Default database: 'test'. Query: 'CREATE INDEX part_of_c6 ON t16 (c6)''
STOP SLAVE;
@@ -705,8 +752,10 @@ RESET SLAVE;
*** Drop t16 ***
DROP TABLE t16;
+connection master;
DROP TABLE t16;
RESET MASTER;
+connection slave;
START SLAVE;
*****************************************************
@@ -721,6 +770,7 @@ CREATE TABLE t17 (c1 INT PRIMARY KEY, c4 BLOB, c5 CHAR(5)
) ENGINE=TokuDB;
*** Create t17 on Master ***
+connection master;
CREATE TABLE t17 (c1 INT KEY, c4 BLOB, c5 CHAR(5),
c6 INT DEFAULT '1',
c7 TIMESTAMP NULL DEFAULT CURRENT_TIMESTAMP
@@ -728,9 +778,11 @@ c7 TIMESTAMP NULL DEFAULT CURRENT_TIMESTAMP
RESET MASTER;
*** Start Slave ***
+connection slave;
START SLAVE;
*** Master Data Insert ***
+connection master;
set @b1 = 'b1b1b1b1';
set @b1 = concat(@b1,@b1);
INSERT INTO t17 () VALUES(1,@b1,'Kyle',DEFAULT,DEFAULT),
@@ -743,6 +795,7 @@ c1 hex(c4) c5 c6 c7
3 62316231623162316231623162316231 QA 3 CURRENT_TIMESTAMP
** Select * from Slave **
+connection slave;
SELECT c1,hex(c4),c5 FROM t17 ORDER BY c1;
c1 hex(c4) c5
1 62316231623162316231623162316231 Kyle
@@ -750,6 +803,7 @@ c1 hex(c4) c5
3 62316231623162316231623162316231 QA
** Delete from master **
+connection master;
DELETE FROM t17 WHERE c6 = 3;
SELECT c1,hex(c4),c5,c6,c7 FROM t17 ORDER BY c1;
c1 hex(c4) c5 c6 c7
@@ -757,11 +811,14 @@ c1 hex(c4) c5 c6 c7
2 62316231623162316231623162316231 JOE 2 CURRENT_TIMESTAMP
** Check slave **
+connection slave;
SELECT c1,hex(c4),c5 FROM t17 ORDER BY c1;
c1 hex(c4) c5
1 62316231623162316231623162316231 Kyle
2 62316231623162316231623162316231 JOE
+connection master;
DROP TABLE t17;
+connection slave;
*****************************************************
@@ -779,6 +836,7 @@ CREATE TABLE t18 (c1 INT PRIMARY KEY, c4 BLOB, c5 CHAR(5)
) ENGINE=TokuDB;
*** Create t18 on Master ***
+connection master;
CREATE TABLE t18 (c1 INT KEY, c4 BLOB, c5 CHAR(5),
c6 INT DEFAULT '1',
c7 TIMESTAMP NULL DEFAULT CURRENT_TIMESTAMP
@@ -786,9 +844,11 @@ c7 TIMESTAMP NULL DEFAULT CURRENT_TIMESTAMP
RESET MASTER;
*** Start Slave ***
+connection slave;
START SLAVE;
*** Master Data Insert ***
+connection master;
set @b1 = 'b1b1b1b1';
set @b1 = concat(@b1,@b1);
INSERT INTO t18 () VALUES(1,@b1,'Kyle',DEFAULT,DEFAULT),
@@ -801,6 +861,7 @@ c1 hex(c4) c5 c6 c7
3 62316231623162316231623162316231 QA 3 CURRENT_TIMESTAMP
** Select * from Slave **
+connection slave;
SELECT c1,hex(c4),c5 FROM t18 ORDER BY c1;
c1 hex(c4) c5
1 62316231623162316231623162316231 Kyle
@@ -808,6 +869,7 @@ c1 hex(c4) c5
3 62316231623162316231623162316231 QA
** update from master **
+connection master;
UPDATE t18 SET c5 = 'TEST' WHERE c6 = 3;
SELECT c1,hex(c4),c5,c6,c7 FROM t18 ORDER BY c1;
c1 hex(c4) c5 c6 c7
@@ -816,12 +878,15 @@ c1 hex(c4) c5 c6 c7
3 62316231623162316231623162316231 TEST 3 CURRENT_TIMESTAMP
** Check slave **
+connection slave;
SELECT c1,hex(c4),c5 FROM t18 ORDER BY c1;
c1 hex(c4) c5
1 62316231623162316231623162316231 Kyle
2 62316231623162316231623162316231 JOE
3 62316231623162316231623162316231 TEST
+connection master;
DROP TABLE t18;
+connection slave;
*****************************************************
@@ -836,6 +901,7 @@ CREATE TABLE t5 (c1 INT PRIMARY KEY, c4 BLOB, c5 CHAR(5)
) ENGINE=TokuDB;
*** Create t5 on Master ***
+connection master;
CREATE TABLE t5 (c1 INT KEY, c4 BLOB, c5 CHAR(5),
c6 LONG,
c7 TIMESTAMP NULL DEFAULT CURRENT_TIMESTAMP
@@ -843,9 +909,11 @@ c7 TIMESTAMP NULL DEFAULT CURRENT_TIMESTAMP
RESET MASTER;
*** Start Slave ***
+connection slave;
START SLAVE;
*** Master Data Insert ***
+connection master;
set @b1 = 'b1b1b1b1';
INSERT INTO t5 () VALUES(1,@b1,'Kyle',UUID(),DEFAULT),
(2,@b1,'JOE',UUID(),DEFAULT),
@@ -857,10 +925,14 @@ c1 hex(c4) c5 c6 c7
3 6231623162316231 QA UUID TIME
** Select * from Slave **
+connection slave;
SELECT c1,hex(c4),c5 FROM t5 ORDER BY c1;
c1 hex(c4) c5
1 6231623162316231 Kyle
2 6231623162316231 JOE
3 6231623162316231 QA
+connection master;
DROP TABLE t5;
+connection slave;
+connection master;
include/rpl_end.inc
diff --git a/storage/tokudb/mysql-test/rpl/r/rpl_extra_col_slave_tokudb.result b/storage/tokudb/mysql-test/rpl/r/rpl_extra_col_slave_tokudb.result
index 6212c378f69..318d5496255 100644
--- a/storage/tokudb/mysql-test/rpl/r/rpl_extra_col_slave_tokudb.result
+++ b/storage/tokudb/mysql-test/rpl/r/rpl_extra_col_slave_tokudb.result
@@ -3,7 +3,7 @@ include/master-slave.inc
call mtr.add_suppression("Slave: Unknown table 't6' error.* 1051");
call mtr.add_suppression("Slave SQL.*Column [0-9] of table .test.t[0-9]*. cannot be converted from type.* error.* 1677");
**** Diff Table Def Start ****
-*** On Slave ***
+connection slave;
STOP SLAVE;
RESET SLAVE;
SET @saved_slave_type_conversions = @@slave_type_conversions;
@@ -13,12 +13,15 @@ d FLOAT DEFAULT '2.00',
e CHAR(4) DEFAULT 'TEST')
ENGINE='TokuDB';
*** Create t1 on Master ***
+connection master;
CREATE TABLE t1 (a INT PRIMARY KEY, b INT, c CHAR(10)
) ENGINE='TokuDB';
RESET MASTER;
*** Start Slave ***
+connection slave;
START SLAVE;
*** Master Data Insert ***
+connection master;
INSERT INTO t1 () VALUES(1,2,'TEXAS'),(2,1,'AUSTIN'),(3,4,'QA');
SELECT * FROM t1 ORDER BY a;
a b c
@@ -26,6 +29,7 @@ a b c
2 1 AUSTIN
3 4 QA
*** Select from slave ***
+connection slave;
SELECT * FROM t1 ORDER BY a;
a b c d e
1 2 TEXAS 2 TEST
@@ -33,7 +37,9 @@ a b c d e
3 4 QA 2 TEST
SET GLOBAL SLAVE_TYPE_CONVERSIONS = @saved_slave_type_conversions;
*** Drop t1 ***
+connection master;
DROP TABLE t1;
+connection slave;
*** Create t2 on slave ***
STOP SLAVE;
RESET SLAVE;
@@ -42,10 +48,12 @@ d FLOAT DEFAULT '2.00',
e CHAR(5) DEFAULT 'TEST2')
ENGINE='TokuDB';
*** Create t2 on Master ***
+connection master;
CREATE TABLE t2 (a INT PRIMARY KEY, b INT, c CHAR(10)
) ENGINE='TokuDB';
RESET MASTER;
*** Master Data Insert ***
+connection master;
INSERT INTO t2 () VALUES(1,2,'Kyle, TEX'),(2,1,'JOE AUSTIN'),(3,4,'QA TESTING');
SELECT * FROM t2 ORDER BY a;
a b c
@@ -53,17 +61,22 @@ a b c
2 1 JOE AUSTIN
3 4 QA TESTING
*** Start Slave ***
+connection slave;
START SLAVE;
include/wait_for_slave_sql_error.inc [errno=1677]
-Last_SQL_Error = 'Column 2 of table 'test.t2' cannot be converted from type 'char(10)' to type 'char(5)''
+Last_SQL_Error = 'Column 2 of table 'test.t2' cannot be converted from type 'char(10 octets)' to type 'char(5 octets) character set latin1''
STOP SLAVE;
RESET SLAVE;
SELECT * FROM t2 ORDER BY a;
a b c d e
+connection master;
RESET MASTER;
+connection slave;
START SLAVE;
*** Drop t2 ***
+connection master;
DROP TABLE t2;
+connection slave;
*** Create t3 on slave ***
STOP SLAVE;
RESET SLAVE;
@@ -72,22 +85,28 @@ d FLOAT DEFAULT '2.00',
e CHAR(5) DEFAULT 'TEST2')
ENGINE='TokuDB';
*** Create t3 on Master ***
+connection master;
CREATE TABLE t3 (a BLOB, b INT PRIMARY KEY, c CHAR(20)
) ENGINE='TokuDB';
RESET MASTER;
*** Start Slave ***
+connection slave;
START SLAVE;
*** Master Data Insert ***
+connection master;
set @b1 = 'b1';
set @b1 = concat(@b1,@b1);
INSERT INTO t3 () VALUES(@b1,2,'Kyle, TEX'),(@b1,1,'JOE AUSTIN'),(@b1,4,'QA TESTING');
********************************************
*** Expect slave to fail with Error 1677 ***
********************************************
+connection slave;
include/wait_for_slave_sql_error_and_skip.inc [errno=1677]
-Last_SQL_Error = 'Column 0 of table 'test.t3' cannot be converted from type 'tinyblob' to type 'int(11)''
+Last_SQL_Error = 'Column 0 of table 'test.t3' cannot be converted from type 'blob' to type 'int(11)''
*** Drop t3 ***
+connection master;
DROP TABLE t3;
+connection slave;
*** Create t4 on slave ***
STOP SLAVE;
RESET SLAVE;
@@ -96,21 +115,27 @@ d FLOAT DEFAULT '2.00',
e CHAR(5) DEFAULT 'TEST2')
ENGINE='TokuDB';
*** Create t4 on Master ***
+connection master;
CREATE TABLE t4 (a DECIMAL(8,2), b INT PRIMARY KEY, c CHAR(20)
) ENGINE='TokuDB';
RESET MASTER;
*** Start Slave ***
+connection slave;
START SLAVE;
*** Master Data Insert ***
+connection master;
INSERT INTO t4 () VALUES(100.22,2,'Kyle, TEX'),(200.26,1,'JOE AUSTIN'),
(30000.22,4,'QA TESTING');
********************************************
*** Expect slave to fail with Error 1677 ***
********************************************
+connection slave;
include/wait_for_slave_sql_error_and_skip.inc [errno=1677]
Last_SQL_Error = 'Column 0 of table 'test.t4' cannot be converted from type 'decimal(8,2)' to type 'int(11)''
*** Drop t4 ***
+connection master;
DROP TABLE t4;
+connection slave;
*** Create t5 on slave ***
STOP SLAVE;
RESET SLAVE;
@@ -118,45 +143,57 @@ CREATE TABLE t5 (a INT PRIMARY KEY, b CHAR(5),
c FLOAT, d INT, e DOUBLE,
f DECIMAL(8,2))ENGINE='TokuDB';
*** Create t5 on Master ***
+connection master;
CREATE TABLE t5 (a INT PRIMARY KEY, b VARCHAR(6),
c DECIMAL(8,2), d BIT, e BLOB,
f FLOAT) ENGINE='TokuDB';
RESET MASTER;
*** Start Slave ***
+connection slave;
START SLAVE;
*** Master Data Insert ***
+connection master;
INSERT INTO t5 () VALUES(1,'Kyle',200.23,1,'b1b1',23.00098),
(2,'JOE',300.01,0,'b2b2',1.0000009);
********************************************
*** Expect slave to fail with Error 1677 ***
********************************************
+connection slave;
include/wait_for_slave_sql_error_and_skip.inc [errno=1677]
-Last_SQL_Error = 'Column 1 of table 'test.t5' cannot be converted from type 'varchar(6)' to type 'char(5)''
+Last_SQL_Error = 'Column 1 of table 'test.t5' cannot be converted from type 'varchar(6 octets)' to type 'char(5 octets) character set latin1''
*** Drop t5 ***
+connection master;
DROP TABLE t5;
+connection slave;
*** Create t6 on slave ***
STOP SLAVE;
RESET SLAVE;
CREATE TABLE t6 (a INT PRIMARY KEY, b CHAR(5),
c FLOAT, d INT)ENGINE='TokuDB';
*** Create t6 on Master ***
+connection master;
CREATE TABLE t6 (a INT PRIMARY KEY, b VARCHAR(6),
c DECIMAL(8,2), d BIT
) ENGINE='TokuDB';
RESET MASTER;
*** Start Slave ***
+connection slave;
START SLAVE;
*** Master Data Insert ***
+connection master;
INSERT INTO t6 () VALUES(1,'Kyle',200.23,1),
(2,'JOE',300.01,0);
********************************************
*** Expect slave to fail with Error 1677 ***
********************************************
+connection slave;
include/wait_for_slave_sql_error.inc [errno=1677]
-Last_SQL_Error = 'Column 1 of table 'test.t6' cannot be converted from type 'varchar(6)' to type 'char(5)''
+Last_SQL_Error = 'Column 1 of table 'test.t6' cannot be converted from type 'varchar(6 octets)' to type 'char(5 octets) character set latin1''
*** Drop t6 ***
include/rpl_reset.inc
+connection master;
DROP TABLE t6;
+connection slave;
**** Diff Table Def End ****
**** Extra Colums Start ****
*** Create t7 on slave ***
@@ -167,12 +204,15 @@ d TIMESTAMP NULL DEFAULT '0000-00-00 00:00:00',
e CHAR(20) DEFAULT 'Extra Column Testing')
ENGINE='TokuDB';
*** Create t7 on Master ***
+connection master;
CREATE TABLE t7 (a INT PRIMARY KEY, b BLOB, c CHAR(5)
) ENGINE='TokuDB';
RESET MASTER;
*** Start Slave ***
+connection slave;
START SLAVE;
*** Master Data Insert ***
+connection master;
set @b1 = 'b1';
set @b1 = concat(@b1,@b1);
INSERT INTO t7 () VALUES(1,@b1,'Kyle'),(2,@b1,'JOE'),(3,@b1,'QA');
@@ -182,13 +222,16 @@ a b c
2 b1b1 JOE
3 b1b1 QA
*** Select from slave ***
+connection slave;
SELECT * FROM t7 ORDER BY a;
a b c d e
1 b1b1 Kyle 0000-00-00 00:00:00 Extra Column Testing
2 b1b1 JOE 0000-00-00 00:00:00 Extra Column Testing
3 b1b1 QA 0000-00-00 00:00:00 Extra Column Testing
*** Drop t7 ***
+connection master;
DROP TABLE t7;
+connection slave;
*** Create t8 on slave ***
STOP SLAVE;
RESET SLAVE;
@@ -196,17 +239,22 @@ CREATE TABLE t8 (a INT KEY, b BLOB, c CHAR(5),
d TIMESTAMP NULL DEFAULT '0000-00-00 00:00:00',
e INT)ENGINE='TokuDB';
*** Create t8 on Master ***
+connection master;
CREATE TABLE t8 (a INT PRIMARY KEY, b BLOB, c CHAR(5)
) ENGINE='TokuDB';
RESET MASTER;
*** Start Slave ***
+connection slave;
START SLAVE;
*** Master Data Insert ***
+connection master;
set @b1 = 'b1b1b1b1';
set @b1 = concat(@b1,@b1);
INSERT INTO t8 () VALUES(1,@b1,'Kyle'),(2,@b1,'JOE'),(3,@b1,'QA');
*** Drop t8 ***
+connection master;
DROP TABLE t8;
+connection slave;
STOP SLAVE;
RESET SLAVE;
CREATE TABLE t9 (a INT KEY, b BLOB, c CHAR(5),
@@ -218,77 +266,98 @@ g text,
h blob not null,
i blob) ENGINE='TokuDB';
*** Create t9 on Master ***
+connection master;
CREATE TABLE t9 (a INT PRIMARY KEY, b BLOB, c CHAR(5)
) ENGINE='TokuDB';
RESET MASTER;
*** Start Slave ***
+connection slave;
START SLAVE;
*** Master Data Insert ***
+connection master;
set @b1 = 'b1b1b1b1';
set @b1 = concat(@b1,@b1);
INSERT INTO t9 () VALUES(1,@b1,'Kyle'),(2,@b1,'JOE'),(3,@b1,'QA');
+connection slave;
select * from t9;
a b c d e f g h i
1 b1b1b1b1b1b1b1b1 Kyle 0000-00-00 00:00:00 0 NULL NULL
2 b1b1b1b1b1b1b1b1 JOE 0000-00-00 00:00:00 0 NULL NULL
3 b1b1b1b1b1b1b1b1 QA 0000-00-00 00:00:00 0 NULL NULL
+connection master;
DROP TABLE t9;
+connection slave;
*** Create t10 on slave ***
STOP SLAVE;
RESET SLAVE;
CREATE TABLE t10 (a INT KEY, b BLOB, f DOUBLE DEFAULT '233',
c CHAR(5), e INT DEFAULT '1')ENGINE='TokuDB';
*** Create t10 on Master ***
+connection master;
CREATE TABLE t10 (a INT PRIMARY KEY, b BLOB, c CHAR(5)
) ENGINE='TokuDB';
RESET MASTER;
*** Start Slave ***
+connection slave;
START SLAVE;
*** Master Data Insert ***
+connection master;
set @b1 = 'b1b1b1b1';
set @b1 = concat(@b1,@b1);
INSERT INTO t10 () VALUES(1,@b1,'Kyle'),(2,@b1,'JOE'),(3,@b1,'QA');
********************************************
*** Expect slave to fail with Error 1677 ***
********************************************
+connection slave;
include/wait_for_slave_sql_error_and_skip.inc [errno=1677]
-Last_SQL_Error = 'Column 2 of table 'test.t10' cannot be converted from type 'char(5)' to type 'double''
+Last_SQL_Error = 'Column 2 of table 'test.t10' cannot be converted from type 'char(5 octets)' to type 'double''
*** Drop t10 ***
+connection master;
DROP TABLE t10;
+connection slave;
*** Create t11 on slave ***
STOP SLAVE;
RESET SLAVE;
CREATE TABLE t11 (a INT KEY, b BLOB, f INT,
c CHAR(5) DEFAULT 'test', e INT DEFAULT '1')ENGINE='TokuDB';
*** Create t11 on Master ***
+connection master;
CREATE TABLE t11 (a INT PRIMARY KEY, b BLOB, c VARCHAR(254)
) ENGINE='TokuDB';
RESET MASTER;
*** Start Slave ***
+connection slave;
START SLAVE;
*** Master Data Insert ***
+connection master;
set @b1 = 'b1b1b1b1';
set @b1 = concat(@b1,@b1);
INSERT INTO t11 () VALUES(1,@b1,'Kyle'),(2,@b1,'JOE'),(3,@b1,'QA');
********************************************
*** Expect slave to fail with Error 1677 ***
********************************************
+connection slave;
include/wait_for_slave_sql_error_and_skip.inc [errno=1677]
-Last_SQL_Error = 'Column 2 of table 'test.t11' cannot be converted from type 'varchar(254)' to type 'int(11)''
+Last_SQL_Error = 'Column 2 of table 'test.t11' cannot be converted from type 'varchar(254 octets)' to type 'int(11)''
*** Drop t11 ***
+connection master;
DROP TABLE t11;
+connection slave;
*** Create t12 on slave ***
STOP SLAVE;
RESET SLAVE;
CREATE TABLE t12 (a INT KEY, b BLOB, f TEXT,
c CHAR(5) DEFAULT 'test', e INT DEFAULT '1')ENGINE='TokuDB';
*** Create t12 on Master ***
+connection master;
CREATE TABLE t12 (a INT PRIMARY KEY, b BLOB, c BLOB
) ENGINE='TokuDB';
RESET MASTER;
*** Start Slave ***
+connection slave;
START SLAVE;
*** Master Data Insert ***
+connection master;
set @b1 = 'b1b1b1b1';
set @b1 = concat(@b1,@b1);
INSERT INTO t12 () VALUES(1,@b1,'Kyle'),(2,@b1,'JOE'),(3,@b1,'QA');
@@ -298,13 +367,16 @@ a b c
2 b1b1b1b1b1b1b1b1 JOE
3 b1b1b1b1b1b1b1b1 QA
*** Select on Slave ***
+connection slave;
SELECT * FROM t12 ORDER BY a;
a b f c e
1 b1b1b1b1b1b1b1b1 Kyle test 1
2 b1b1b1b1b1b1b1b1 JOE test 1
3 b1b1b1b1b1b1b1b1 QA test 1
*** Drop t12 ***
+connection master;
DROP TABLE t12;
+connection slave;
**** Extra Colums End ****
*** BUG 22177 Start ***
*** Create t13 on slave ***
@@ -315,12 +387,15 @@ d INT DEFAULT '1',
e TIMESTAMP NULL DEFAULT CURRENT_TIMESTAMP
)ENGINE='TokuDB';
*** Create t13 on Master ***
+connection master;
CREATE TABLE t13 (a INT PRIMARY KEY, b BLOB, c CHAR(5)
) ENGINE='TokuDB';
RESET MASTER;
*** Start Slave ***
+connection slave;
START SLAVE;
*** Master Data Insert ***
+connection master;
set @b1 = 'b1b1b1b1';
set @b1 = concat(@b1,@b1);
INSERT INTO t13 () VALUES(1,@b1,'Kyle'),(2,@b1,'JOE'),(3,@b1,'QA');
@@ -330,13 +405,16 @@ a b c
2 b1b1b1b1b1b1b1b1 JOE
3 b1b1b1b1b1b1b1b1 QA
*** Select on Slave ****
+connection slave;
SELECT * FROM t13 ORDER BY a;
a b c d e
1 b1b1b1b1b1b1b1b1 Kyle 1 CURRENT_TIMESTAMP
2 b1b1b1b1b1b1b1b1 JOE 1 CURRENT_TIMESTAMP
3 b1b1b1b1b1b1b1b1 QA 1 CURRENT_TIMESTAMP
*** Drop t13 ***
+connection master;
DROP TABLE t13;
+connection slave;
*** 22117 END ***
*** Alter Master Table Testing Start ***
*** Create t14 on slave ***
@@ -347,12 +425,15 @@ c6 INT DEFAULT '1',
c7 TIMESTAMP NULL DEFAULT CURRENT_TIMESTAMP
)ENGINE='TokuDB';
*** Create t14 on Master ***
+connection master;
CREATE TABLE t14 (c1 INT PRIMARY KEY, c4 BLOB, c5 CHAR(5)
) ENGINE='TokuDB';
RESET MASTER;
*** Start Slave ***
+connection slave;
START SLAVE;
*** Master Data Insert ***
+connection master;
ALTER TABLE t14 ADD COLUMN c2 DECIMAL(8,2) AFTER c1;
ALTER TABLE t14 ADD COLUMN c3 TEXT AFTER c2;
set @b1 = 'b1b1b1b1';
@@ -366,6 +447,7 @@ c1 c2 c3 c4 c5
2 2.00 This Test Should work b1b1b1b1b1b1b1b1 JOE
3 3.00 If is does not, I will open a bug b1b1b1b1b1b1b1b1 QA
*** Select on Slave ****
+connection slave;
SELECT * FROM t14 ORDER BY c1;
c1 c2 c3 c4 c5 c6 c7
1 1.00 Replication Testing Extra Col b1b1b1b1b1b1b1b1 Kyle 1 CURRENT_TIMESTAMP
@@ -379,12 +461,15 @@ c6 INT DEFAULT '1',
c7 TIMESTAMP NULL DEFAULT CURRENT_TIMESTAMP
)ENGINE='TokuDB';
*** Create t14a on Master ***
+connection master;
CREATE TABLE t14a (c1 INT PRIMARY KEY, c4 BLOB, c5 CHAR(5)
) ENGINE='TokuDB';
RESET MASTER;
*** Start Slave ***
+connection slave;
START SLAVE;
*** Master Data Insert ***
+connection master;
set @b1 = 'b1b1b1b1';
set @b1 = concat(@b1,@b1);
INSERT INTO t14a () VALUES(1,@b1,'Kyle'),
@@ -396,6 +481,7 @@ c1 c4 c5
2 b1b1b1b1b1b1b1b1 JOE
3 b1b1b1b1b1b1b1b1 QA
*** Select on Slave ****
+connection slave;
SELECT * FROM t14a ORDER BY c1;
c1 c4 c5 c6 c7
1 b1b1b1b1b1b1b1b1 Kyle 1 CURRENT_TIMESTAMP
@@ -404,11 +490,14 @@ c1 c4 c5 c6 c7
STOP SLAVE;
RESET SLAVE;
*** Master Drop c5 ***
+connection master;
ALTER TABLE t14a DROP COLUMN c5;
RESET MASTER;
*** Start Slave ***
+connection slave;
START SLAVE;
*** Master Data Insert ***
+connection master;
set @b1 = 'b1b1b1b1';
set @b1 = concat(@b1,@b1);
INSERT INTO t14a () VALUES(4,@b1),
@@ -423,6 +512,7 @@ c1 c4
5 b1b1b1b1b1b1b1b1
6 b1b1b1b1b1b1b1b1
*** Select on Slave ****
+connection slave;
SELECT * FROM t14a ORDER BY c1;
c1 c4 c5 c6 c7
1 b1b1b1b1b1b1b1b1 Kyle 1 CURRENT_TIMESTAMP
@@ -431,8 +521,11 @@ c1 c4 c5 c6 c7
4 b1b1b1b1b1b1b1b1 NULL 1 CURRENT_TIMESTAMP
5 b1b1b1b1b1b1b1b1 NULL 1 CURRENT_TIMESTAMP
6 b1b1b1b1b1b1b1b1 NULL 1 CURRENT_TIMESTAMP
+connection master;
DROP TABLE t14a;
+connection slave;
*** connect to master and drop columns ***
+connection master;
ALTER TABLE t14 DROP COLUMN c2;
ALTER TABLE t14 DROP COLUMN c4;
*** Select from Master ***
@@ -442,13 +535,16 @@ c1 c3 c5
2 This Test Should work JOE
3 If is does not, I will open a bug QA
*** Select from Slave ***
+connection slave;
SELECT * FROM t14 ORDER BY c1;
c1 c3 c5 c6 c7
1 Replication Testing Extra Col Kyle 1 CURRENT_TIMESTAMP
2 This Test Should work JOE 1 CURRENT_TIMESTAMP
3 If is does not, I will open a bug QA 1 CURRENT_TIMESTAMP
*** Drop t14 ***
+connection master;
DROP TABLE t14;
+connection slave;
*** Create t15 on slave ***
STOP SLAVE;
RESET SLAVE;
@@ -458,15 +554,18 @@ c6 INT DEFAULT '1',
c7 TIMESTAMP NULL DEFAULT CURRENT_TIMESTAMP
)ENGINE='TokuDB';
*** Create t15 on Master ***
+connection master;
CREATE TABLE t15 (c1 INT PRIMARY KEY, c2 DECIMAL(8,2), c3 TEXT,
c4 BLOB, c5 CHAR(5)) ENGINE='TokuDB';
RESET MASTER;
*** Start Slave ***
+connection slave;
START SLAVE;
call mtr.add_suppression("Error .Unknown table .t6.. on query.* error.* 1051");
call mtr.add_suppression("Error .Duplicate column name .c6.. on query.* error.* 1060");
call mtr.add_suppression("Table definition on master and slave does not match: Column . ...e mismatch.* error.* 1535");
*** Master Data Insert ***
+connection master;
set @b1 = 'b1b1b1b1';
set @b1 = concat(@b1,@b1);
INSERT INTO t15 () VALUES(1,1.00,'Replication Testing Extra Col',@b1,'Kyle'),
@@ -478,19 +577,23 @@ c1 c2 c3 c4 c5
2 2.00 This Test Should work b1b1b1b1b1b1b1b1 JOE
3 3.00 If is does not, I will open a bug b1b1b1b1b1b1b1b1 QA
*** Select on Slave ****
+connection slave;
SELECT * FROM t15 ORDER BY c1;
c1 c2 c3 c4 c5 c6 c7
1 1.00 Replication Testing Extra Col b1b1b1b1b1b1b1b1 Kyle 1 CURRENT_TIMESTAMP
2 2.00 This Test Should work b1b1b1b1b1b1b1b1 JOE 1 CURRENT_TIMESTAMP
3 3.00 If is does not, I will open a bug b1b1b1b1b1b1b1b1 QA 1 CURRENT_TIMESTAMP
*** Add column on master that is a Extra on Slave ***
+connection master;
ALTER TABLE t15 ADD COLUMN c6 INT AFTER c5;
********************************************
*** Expect slave to fail with Error 1060 ***
********************************************
+connection slave;
include/wait_for_slave_sql_error_and_skip.inc [errno=1060]
Last_SQL_Error = 'Error 'Duplicate column name 'c6'' on query. Default database: 'test'. Query: 'ALTER TABLE t15 ADD COLUMN c6 INT AFTER c5''
*** Try to insert in master ****
+connection master;
INSERT INTO t15 () VALUES(5,2.00,'Replication Testing',@b1,'Buda',2);
SELECT * FROM t15 ORDER BY c1;
c1 c2 c3 c4 c5 c6
@@ -499,6 +602,7 @@ c1 c2 c3 c4 c5 c6
3 3.00 If is does not, I will open a bug b1b1b1b1b1b1b1b1 QA NULL
5 2.00 Replication Testing b1b1b1b1b1b1b1b1 Buda 2
*** Try to select from slave ****
+connection slave;
SELECT * FROM t15 ORDER BY c1;
c1 c2 c3 c4 c5 c6 c7
1 1.00 Replication Testing Extra Col b1b1b1b1b1b1b1b1 Kyle 1 CURRENT_TIMESTAMP
@@ -506,7 +610,9 @@ c1 c2 c3 c4 c5 c6 c7
3 3.00 If is does not, I will open a bug b1b1b1b1b1b1b1b1 QA 1 CURRENT_TIMESTAMP
5 2.00 Replication Testing b1b1b1b1b1b1b1b1 Buda 2 CURRENT_TIMESTAMP
*** DROP TABLE t15 ***
+connection master;
DROP TABLE t15;
+connection slave;
*** Create t16 on slave ***
STOP SLAVE;
RESET SLAVE;
@@ -516,12 +622,15 @@ c6 INT DEFAULT '1',
c7 TIMESTAMP NULL DEFAULT CURRENT_TIMESTAMP
)ENGINE='TokuDB';
*** Create t16 on Master ***
+connection master;
CREATE TABLE t16 (c1 INT PRIMARY KEY, c2 DECIMAL(8,2), c3 TEXT,
c4 BLOB, c5 CHAR(5))ENGINE='TokuDB';
RESET MASTER;
*** Start Slave ***
+connection slave;
START SLAVE;
*** Master Data Insert ***
+connection master;
set @b1 = 'b1b1b1b1';
set @b1 = concat(@b1,@b1);
INSERT INTO t16 () VALUES(1,1.00,'Replication Testing Extra Col',@b1,'Kyle'),
@@ -533,12 +642,14 @@ c1 c2 c3 c4 c5
2 2.00 This Test Should work b1b1b1b1b1b1b1b1 JOE
3 3.00 If is does not, I will open a bug b1b1b1b1b1b1b1b1 QA
*** Select on Slave ****
+connection slave;
SELECT * FROM t16 ORDER BY c1;
c1 c2 c3 c4 c5 c6 c7
1 1.00 Replication Testing Extra Col b1b1b1b1b1b1b1b1 Kyle 1 CURRENT_TIMESTAMP
2 2.00 This Test Should work b1b1b1b1b1b1b1b1 JOE 1 CURRENT_TIMESTAMP
3 3.00 If is does not, I will open a bug b1b1b1b1b1b1b1b1 QA 1 CURRENT_TIMESTAMP
*** Add Partition on master ***
+connection master;
ALTER TABLE t16 PARTITION BY KEY(c1) PARTITIONS 4;
INSERT INTO t16 () VALUES(4,1.00,'Replication Rocks',@b1,'Omer');
SHOW CREATE TABLE t16;
@@ -546,30 +657,33 @@ Table Create Table
t16 CREATE TABLE `t16` (
`c1` int(11) NOT NULL,
`c2` decimal(8,2) DEFAULT NULL,
- `c3` text,
- `c4` blob,
+ `c3` text DEFAULT NULL,
+ `c4` blob DEFAULT NULL,
`c5` char(5) DEFAULT NULL,
PRIMARY KEY (`c1`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY KEY (c1)
-PARTITIONS 4 */
+ PARTITION BY KEY (`c1`)
+PARTITIONS 4
*** Show table on Slave ****
+connection slave;
SHOW CREATE TABLE t16;
Table Create Table
t16 CREATE TABLE `t16` (
`c1` int(11) NOT NULL,
`c2` decimal(8,2) DEFAULT NULL,
- `c3` text,
- `c4` blob,
+ `c3` text DEFAULT NULL,
+ `c4` blob DEFAULT NULL,
`c5` char(5) DEFAULT NULL,
- `c6` int(11) DEFAULT '1',
- `c7` timestamp NULL DEFAULT CURRENT_TIMESTAMP,
+ `c6` int(11) DEFAULT 1,
+ `c7` timestamp NULL DEFAULT current_timestamp(),
PRIMARY KEY (`c1`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY KEY (c1)
-PARTITIONS 4 */
+ PARTITION BY KEY (`c1`)
+PARTITIONS 4
*** DROP TABLE t16 ***
+connection master;
DROP TABLE t16;
+connection slave;
*** Alter Master End ***
*** Create t17 on slave ***
STOP SLAVE;
@@ -579,18 +693,24 @@ d FLOAT DEFAULT '2.00',
e CHAR(5) DEFAULT 'TEST2')
ENGINE='TokuDB';
*** Create t17 on Master ***
+connection master;
CREATE TABLE t17 (a BIGINT PRIMARY KEY, b INT, c CHAR(10)
) ENGINE='TokuDB';
RESET MASTER;
*** Start Slave ***
+connection slave;
START SLAVE;
*** Master Data Insert ***
+connection master;
INSERT INTO t17 () VALUES(9223372036854775807,2,'Kyle, TEX');
********************************************
*** Expect slave to fail with Error 1677 ***
********************************************
+connection slave;
include/wait_for_slave_sql_error_and_skip.inc [errno=1677]
Last_SQL_Error = 'Column 0 of table 'test.t17' cannot be converted from type 'bigint' to type 'smallint(6)''
** DROP table t17 ***
+connection master;
DROP TABLE t17;
+connection slave;
include/rpl_end.inc
diff --git a/storage/tokudb/mysql-test/rpl/r/rpl_mixed_replace_into.result b/storage/tokudb/mysql-test/rpl/r/rpl_mixed_replace_into.result
index b1482aa0144..abe80743f62 100644
--- a/storage/tokudb/mysql-test/rpl/r/rpl_mixed_replace_into.result
+++ b/storage/tokudb/mysql-test/rpl/r/rpl_mixed_replace_into.result
@@ -11,11 +11,13 @@ pk num txt
2 2 twotwo
3 3 three
4 4 four
+connection slave;
select * from testr;
pk num txt
1 1 one
2 2 twotwo
3 3 three
4 4 four
+connection master;
drop table testr;
include/rpl_end.inc
diff --git a/storage/tokudb/mysql-test/rpl/r/rpl_not_null_tokudb.result b/storage/tokudb/mysql-test/rpl/r/rpl_not_null_tokudb.result
index c65c61a0a65..66aaaa6b6fa 100644
--- a/storage/tokudb/mysql-test/rpl/r/rpl_not_null_tokudb.result
+++ b/storage/tokudb/mysql-test/rpl/r/rpl_not_null_tokudb.result
@@ -1,5 +1,6 @@
include/master-slave.inc
[connection master]
+connection master;
SET SQL_LOG_BIN= 0;
CREATE TABLE t1(`a` INT, `b` DATE DEFAULT NULL,
`c` INT DEFAULT NULL,
@@ -12,6 +13,7 @@ CREATE TABLE t4(`a` INT, `b` DATE DEFAULT NULL,
`c` INT DEFAULT NULL,
PRIMARY KEY(`a`)) ENGINE=TokuDB DEFAULT CHARSET=LATIN1;
SET SQL_LOG_BIN= 1;
+connection slave;
CREATE TABLE t1(`a` INT, `b` DATE DEFAULT NULL,
`c` INT DEFAULT NULL,
PRIMARY KEY(`a`)) ENGINE=TokuDB DEFAULT CHARSET=LATIN1;
@@ -23,6 +25,7 @@ PRIMARY KEY(`a`)) ENGINE=TokuDB DEFAULT CHARSET=LATIN1;
CREATE TABLE t4(`a` INT, `b` DATE DEFAULT '0000-00-00',
PRIMARY KEY(`a`)) ENGINE=TokuDB DEFAULT CHARSET=LATIN1;
************* EXECUTION WITH INSERTS *************
+connection master;
INSERT INTO t1(a,b,c) VALUES (1, null, 1);
INSERT INTO t1(a,b,c) VALUES (2,'1111-11-11', 2);
INSERT INTO t1(a,b) VALUES (3, null);
@@ -40,20 +43,24 @@ INSERT INTO t4(a,b) VALUES (3, null);
INSERT INTO t4(a,c) VALUES (4, 4);
INSERT INTO t4(a) VALUES (5);
************* SHOWING THE RESULT SETS WITH INSERTS *************
+connection slave;
TABLES t1 and t2 must be equal otherwise an error will be thrown.
include/diff_tables.inc [master:t1, slave:t1]
include/diff_tables.inc [master:t2, slave:t2]
TABLES t2 and t3 must be different.
+connection master;
SELECT * FROM t3 ORDER BY a;
a b
1 NULL
2 1111-11-11
3 NULL
+connection slave;
SELECT * FROM t3 ORDER BY a;
a b c
1 NULL 500
2 1111-11-11 500
3 NULL 500
+connection master;
SELECT * FROM t4 ORDER BY a;
a b c
1 NULL 1
@@ -61,6 +68,7 @@ a b c
3 NULL NULL
4 NULL 4
5 NULL NULL
+connection slave;
SELECT * FROM t4 ORDER BY a;
a b
1 NULL
@@ -69,26 +77,33 @@ a b
4 NULL
5 NULL
************* EXECUTION WITH UPDATES and REPLACES *************
+connection master;
DELETE FROM t1;
INSERT INTO t1(a,b,c) VALUES (1,'1111-11-11', 1);
REPLACE INTO t1(a,b,c) VALUES (2,'1111-11-11', 2);
UPDATE t1 set b= NULL, c= 300 where a= 1;
REPLACE INTO t1(a,b,c) VALUES (2, NULL, 300);
************* SHOWING THE RESULT SETS WITH UPDATES and REPLACES *************
+connection slave;
TABLES t1 and t2 must be equal otherwise an error will be thrown.
include/diff_tables.inc [master:t1, slave:t1]
************* CLEANING *************
+connection master;
DROP TABLE t1;
DROP TABLE t2;
DROP TABLE t3;
DROP TABLE t4;
+connection slave;
+connection master;
SET SQL_LOG_BIN= 0;
CREATE TABLE t1 (`a` INT, `b` BIT DEFAULT NULL, `c` BIT DEFAULT NULL,
PRIMARY KEY (`a`)) ENGINE= TokuDB;
SET SQL_LOG_BIN= 1;
+connection slave;
CREATE TABLE t1 (`a` INT, `b` BIT DEFAULT b'01', `c` BIT DEFAULT NULL,
PRIMARY KEY (`a`)) ENGINE= TokuDB;
************* EXECUTION WITH INSERTS *************
+connection master;
INSERT INTO t1(a,b,c) VALUES (1, null, b'01');
INSERT INTO t1(a,b,c) VALUES (2,b'00', b'01');
INSERT INTO t1(a,b) VALUES (3, null);
@@ -96,6 +111,8 @@ INSERT INTO t1(a,c) VALUES (4, b'01');
INSERT INTO t1(a) VALUES (5);
************* SHOWING THE RESULT SETS WITH INSERTS *************
TABLES t1 and t2 must be different.
+connection slave;
+connection master;
SELECT a,b+0,c+0 FROM t1 ORDER BY a;
a b+0 c+0
1 NULL 1
@@ -103,6 +120,7 @@ a b+0 c+0
3 NULL NULL
4 NULL 1
5 NULL NULL
+connection slave;
SELECT a,b+0,c+0 FROM t1 ORDER BY a;
a b+0 c+0
1 NULL 1
@@ -111,6 +129,7 @@ a b+0 c+0
4 NULL 1
5 NULL NULL
************* EXECUTION WITH UPDATES and REPLACES *************
+connection master;
DELETE FROM t1;
INSERT INTO t1(a,b,c) VALUES (1,b'00', b'01');
REPLACE INTO t1(a,b,c) VALUES (2,b'00',b'01');
@@ -118,8 +137,11 @@ UPDATE t1 set b= NULL, c= b'00' where a= 1;
REPLACE INTO t1(a,b,c) VALUES (2, NULL, b'00');
************* SHOWING THE RESULT SETS WITH UPDATES and REPLACES *************
TABLES t1 and t2 must be equal otherwise an error will be thrown.
+connection slave;
include/diff_tables.inc [master:t1, slave:t1]
+connection master;
DROP TABLE t1;
+connection slave;
################################################################################
# NULL ---> NOT NULL (STRICT MODE)
# UNCOMMENT THIS AFTER FIXING BUG#43992
@@ -127,6 +149,7 @@ DROP TABLE t1;
################################################################################
# NULL ---> NOT NULL (NON-STRICT MODE)
################################################################################
+connection master;
SET SQL_LOG_BIN= 0;
CREATE TABLE t1(`a` INT NOT NULL, `b` INT,
PRIMARY KEY(`a`)) ENGINE=TokuDB DEFAULT CHARSET=LATIN1;
@@ -135,6 +158,7 @@ PRIMARY KEY(`a`)) ENGINE=TokuDB DEFAULT CHARSET=LATIN1;
CREATE TABLE t3(`a` INT NOT NULL, `b` INT,
PRIMARY KEY(`a`)) ENGINE=TokuDB DEFAULT CHARSET=LATIN1;
SET SQL_LOG_BIN= 1;
+connection slave;
CREATE TABLE t1(`a` INT NOT NULL, `b` INT NOT NULL,
`c` INT NOT NULL,
PRIMARY KEY(`a`)) ENGINE=TokuDB DEFAULT CHARSET=LATIN1;
@@ -145,6 +169,7 @@ CREATE TABLE t3(`a` INT NOT NULL, `b` INT NOT NULL,
`c` INT DEFAULT 500,
PRIMARY KEY(`a`)) ENGINE=TokuDB DEFAULT CHARSET=LATIN1;
************* EXECUTION WITH INSERTS *************
+connection master;
INSERT INTO t1(a) VALUES (1);
INSERT INTO t1(a, b) VALUES (2, NULL);
INSERT INTO t1(a, b) VALUES (3, 1);
@@ -159,26 +184,33 @@ REPLACE INTO t3(a, b) VALUES (5, null);
REPLACE INTO t3(a, b) VALUES (3, null);
UPDATE t3 SET b = NULL where a = 4;
************* SHOWING THE RESULT SETS *************
+connection master;
+connection slave;
+connection master;
SELECT * FROM t1 ORDER BY a;
a b
1 NULL
2 NULL
3 1
+connection slave;
SELECT * FROM t1 ORDER BY a;
a b c
1 0 0
2 0 0
3 1 0
+connection master;
SELECT * FROM t2 ORDER BY a;
a b
1 NULL
2 NULL
3 1
+connection slave;
SELECT * FROM t2 ORDER BY a;
a b c
1 0 NULL
2 0 NULL
3 1 NULL
+connection master;
SELECT * FROM t3 ORDER BY a;
a b
1 NULL
@@ -186,6 +218,7 @@ a b
3 NULL
4 NULL
5 NULL
+connection slave;
SELECT * FROM t3 ORDER BY a;
a b c
1 0 500
@@ -193,7 +226,9 @@ a b c
3 0 500
4 0 500
5 0 500
+connection master;
DROP TABLE t1;
DROP TABLE t2;
DROP TABLE t3;
+connection slave;
include/rpl_end.inc
diff --git a/storage/tokudb/mysql-test/rpl/r/rpl_parallel_tokudb.result b/storage/tokudb/mysql-test/rpl/r/rpl_parallel_tokudb.result
index e2daa5d1326..97b2b60942d 100644
--- a/storage/tokudb/mysql-test/rpl/r/rpl_parallel_tokudb.result
+++ b/storage/tokudb/mysql-test/rpl/r/rpl_parallel_tokudb.result
@@ -1,9 +1,15 @@
include/master-slave.inc
[connection master]
+connection master;
+connection slave;
call mtr.add_suppression('Slave: Error dropping database');
include/stop_slave.inc
start slave;
+connection master;
+connection slave;
stop slave sql_thread;
+connection master;
+connection slave;
insert into test0.benchmark set state='slave is processing load';
start slave sql_thread;
use test0;
@@ -17,6 +23,7 @@ select ts from test0.benchmark where state like 'slave is supposed to finish wit
select ts from test0.benchmark where state like 'slave ends load' into @s_1;
select ts from test0.benchmark where state like 'slave is processing load' into @s_0;
select time_to_sec(@m_1) - time_to_sec(@m_0) as 'delta.out';
+connection master;
include/diff_tables.inc [master:test15.ti_nk, slave:test15.ti_nk]
include/diff_tables.inc [master:test15.ti_wk, slave:test15.ti_wk]
include/diff_tables.inc [master:test14.ti_nk, slave:test14.ti_nk]
@@ -49,4 +56,6 @@ include/diff_tables.inc [master:test1.ti_nk, slave:test1.ti_nk]
include/diff_tables.inc [master:test1.ti_wk, slave:test1.ti_wk]
include/diff_tables.inc [master:test0.ti_nk, slave:test0.ti_nk]
include/diff_tables.inc [master:test0.ti_wk, slave:test0.ti_wk]
+connection master;
+connection slave;
include/rpl_end.inc
diff --git a/storage/tokudb/mysql-test/rpl/r/rpl_parallel_tokudb_delete_pk.result b/storage/tokudb/mysql-test/rpl/r/rpl_parallel_tokudb_delete_pk.result
index 48ea60013ad..9ad7708a11d 100644
--- a/storage/tokudb/mysql-test/rpl/r/rpl_parallel_tokudb_delete_pk.result
+++ b/storage/tokudb/mysql-test/rpl/r/rpl_parallel_tokudb_delete_pk.result
@@ -1,19 +1,31 @@
include/master-slave.inc
[connection master]
+connection master;
drop table if exists t;
+connection slave;
show variables like 'tokudb_rpl_%';
Variable_name Value
+connection master;
create table t (a bigint not null, primary key(a)) engine=tokudb;
insert into t values (1);
insert into t values (2),(3);
insert into t values (4);
+connection master;
+connection slave;
+connection master;
include/diff_tables.inc [master:test.t, slave:test.t]
+connection master;
delete from t where a=2;
select unix_timestamp() into @tstart;
+connection master;
+connection slave;
+connection master;
select unix_timestamp() into @tend;
select @tend-@tstart <= 5;
@tend-@tstart <= 5
1
include/diff_tables.inc [master:test.t, slave:test.t]
+connection master;
drop table if exists t;
+connection slave;
include/rpl_end.inc
diff --git a/storage/tokudb/mysql-test/rpl/r/rpl_parallel_tokudb_update_pk_uc0_lookup0.result b/storage/tokudb/mysql-test/rpl/r/rpl_parallel_tokudb_update_pk_uc0_lookup0.result
index 10375677c8d..10ab579de27 100644
--- a/storage/tokudb/mysql-test/rpl/r/rpl_parallel_tokudb_update_pk_uc0_lookup0.result
+++ b/storage/tokudb/mysql-test/rpl/r/rpl_parallel_tokudb_update_pk_uc0_lookup0.result
@@ -1,22 +1,33 @@
include/master-slave.inc
[connection master]
+connection master;
drop table if exists t;
+connection slave;
show variables like 'tokudb_rpl_%';
Variable_name Value
+connection master;
create table t (a bigint not null, b bigint not null, primary key(a)) engine=tokudb;
insert into t values (1,0);
insert into t values (2,0),(3,0);
insert into t values (4,0);
+connection master;
+connection slave;
+connection master;
include/diff_tables.inc [master:test.t, slave:test.t]
+connection master;
update t set b=b+1 where a=2;
update t set b=b+2 where a=1;
update t set b=b+3 where a=4;
update t set b=b+4 where a=3;
update t set b=b+1 where 1<=a and a<=3;
# select unix_timestamp() into @tstart;
+connection master;
+connection slave;
+connection master;
# Commented out for MariaDB
# select unix_timestamp() into @tend;
# select @tend-@tstart <= 5; # assert no delay in the delete time
+connection slave;
select * from t;
a b
1 3
@@ -24,5 +35,7 @@ a b
3 5
4 3
include/diff_tables.inc [master:test.t, slave:test.t]
+connection master;
drop table if exists t;
+connection slave;
include/rpl_end.inc
diff --git a/storage/tokudb/mysql-test/rpl/r/rpl_parallel_tokudb_write_pk.result b/storage/tokudb/mysql-test/rpl/r/rpl_parallel_tokudb_write_pk.result
index 1cb047bbf62..0ae63f0d02f 100644
--- a/storage/tokudb/mysql-test/rpl/r/rpl_parallel_tokudb_write_pk.result
+++ b/storage/tokudb/mysql-test/rpl/r/rpl_parallel_tokudb_write_pk.result
@@ -1,12 +1,21 @@
include/master-slave.inc
[connection master]
+connection master;
drop table if exists t;
+connection slave;
show variables like 'tokudb_rpl_unique_checks%';
Variable_name Value
+connection master;
create table t (a bigint not null, primary key(a)) engine=tokudb;
insert into t values (1);
insert into t values (2),(3);
insert into t values (4);
+connection slave;
+connection master;
+connection slave;
+connection master;
include/diff_tables.inc [master:test.t, slave:test.t]
+connection master;
drop table if exists t;
+connection slave;
include/rpl_end.inc
diff --git a/storage/tokudb/mysql-test/rpl/r/rpl_partition_tokudb.result b/storage/tokudb/mysql-test/rpl/r/rpl_partition_tokudb.result
index 4f289345a4b..d7e99db1d0e 100644
--- a/storage/tokudb/mysql-test/rpl/r/rpl_partition_tokudb.result
+++ b/storage/tokudb/mysql-test/rpl/r/rpl_partition_tokudb.result
@@ -55,21 +55,21 @@ show create table test.byrange_tbl;
Table Create Table
byrange_tbl CREATE TABLE `byrange_tbl` (
`id` int(11) NOT NULL AUTO_INCREMENT,
- `dt` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
+ `dt` timestamp NOT NULL DEFAULT current_timestamp() ON UPDATE current_timestamp(),
`user` char(255) DEFAULT NULL,
`uuidf` varbinary(255) DEFAULT NULL,
`fkid` int(11) DEFAULT NULL,
`filler` varchar(255) DEFAULT NULL,
PRIMARY KEY (`id`)
) ENGINE=TokuDB AUTO_INCREMENT=201 DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (id)
-(PARTITION pa100 VALUES LESS THAN (100) ENGINE = TokuDB,
- PARTITION paMax VALUES LESS THAN MAXVALUE ENGINE = TokuDB) */
+ PARTITION BY RANGE (`id`)
+(PARTITION `pa100` VALUES LESS THAN (100) ENGINE = TokuDB,
+ PARTITION `paMax` VALUES LESS THAN MAXVALUE ENGINE = TokuDB)
show create table test.regular_tbl;
Table Create Table
regular_tbl CREATE TABLE `regular_tbl` (
`id` int(11) NOT NULL AUTO_INCREMENT,
- `dt` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
+ `dt` timestamp NOT NULL DEFAULT current_timestamp() ON UPDATE current_timestamp(),
`user` char(255) DEFAULT NULL,
`uuidf` varbinary(255) DEFAULT NULL,
`fkid` int(11) DEFAULT NULL,
@@ -93,25 +93,27 @@ SELECT * FROM test.regular_tbl ORDER BY fkid DESC LIMIT 2;
id dt user uuidf fkid filler
1 date-time USER UUID 300 Partitioned table! Going to test replication for MySQL
2 date-time USER UUID 299 Partitioned table! Going to test replication for MySQL
+connection slave;
+connection slave;
show create table test.byrange_tbl;
Table Create Table
byrange_tbl CREATE TABLE `byrange_tbl` (
`id` int(11) NOT NULL AUTO_INCREMENT,
- `dt` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
+ `dt` timestamp NOT NULL DEFAULT current_timestamp() ON UPDATE current_timestamp(),
`user` char(255) DEFAULT NULL,
`uuidf` varbinary(255) DEFAULT NULL,
`fkid` int(11) DEFAULT NULL,
`filler` varchar(255) DEFAULT NULL,
PRIMARY KEY (`id`)
) ENGINE=TokuDB AUTO_INCREMENT=201 DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (id)
-(PARTITION pa100 VALUES LESS THAN (100) ENGINE = TokuDB,
- PARTITION paMax VALUES LESS THAN MAXVALUE ENGINE = TokuDB) */
+ PARTITION BY RANGE (`id`)
+(PARTITION `pa100` VALUES LESS THAN (100) ENGINE = TokuDB,
+ PARTITION `paMax` VALUES LESS THAN MAXVALUE ENGINE = TokuDB)
show create table test.regular_tbl;
Table Create Table
regular_tbl CREATE TABLE `regular_tbl` (
`id` int(11) NOT NULL AUTO_INCREMENT,
- `dt` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
+ `dt` timestamp NOT NULL DEFAULT current_timestamp() ON UPDATE current_timestamp(),
`user` char(255) DEFAULT NULL,
`uuidf` varbinary(255) DEFAULT NULL,
`fkid` int(11) DEFAULT NULL,
@@ -140,6 +142,7 @@ SELECT * FROM test.regular_tbl ORDER BY fkid DESC LIMIT 2;
id dt user uuidf fkid filler
1 date-time USER UUID 300 Partitioned table! Going to test replication for MySQL
2 date-time USER UUID 299 Partitioned table! Going to test replication for MySQL
+connection master;
DROP PROCEDURE test.proc_norm;
DROP PROCEDURE test.proc_byrange;
DROP TABLE test.regular_tbl;
diff --git a/storage/tokudb/mysql-test/rpl/r/rpl_relay_space_tokudb.result b/storage/tokudb/mysql-test/rpl/r/rpl_relay_space_tokudb.result
index 2eb4bce3efe..f014ca6a90d 100644
--- a/storage/tokudb/mysql-test/rpl/r/rpl_relay_space_tokudb.result
+++ b/storage/tokudb/mysql-test/rpl/r/rpl_relay_space_tokudb.result
@@ -14,10 +14,12 @@ name age id
Andy 31 00000001
Jacob 2 00000002
Caleb 1 00000003
+connection slave;
SELECT * FROM t1 ORDER BY id;
name age id
Andy 31 00000001
Jacob 2 00000002
Caleb 1 00000003
+connection master;
DROP TABLE t1;
include/rpl_end.inc
diff --git a/storage/tokudb/mysql-test/rpl/r/rpl_rfr_disable_on_expl_pk_absence.result b/storage/tokudb/mysql-test/rpl/r/rpl_rfr_disable_on_expl_pk_absence.result
index 2977dc859f5..127184a0766 100644
--- a/storage/tokudb/mysql-test/rpl/r/rpl_rfr_disable_on_expl_pk_absence.result
+++ b/storage/tokudb/mysql-test/rpl/r/rpl_rfr_disable_on_expl_pk_absence.result
@@ -1,8 +1,10 @@
include/master-slave.inc
[connection master]
call mtr.add_suppression("read free replication is disabled for tokudb table");
+connection master;
CREATE TABLE t (a int(11), b char(20)) ENGINE = TokuDB;
INSERT INTO t (a, b) VALUES (1, 'a'), (2, 'b'), (3, 'c'), (4, 'd'), (5, 'e');
+connection slave;
SELECT * FROM t;
a b
1 a
@@ -10,6 +12,7 @@ a b
3 c
4 d
5 e
+connection master;
UPDATE t SET a = a + 10 WHERE b = 'b';
SELECT * FROM t;
a b
@@ -18,6 +21,7 @@ a b
3 c
4 d
5 e
+connection slave;
SELECT * FROM t;
a b
1 a
@@ -25,6 +29,7 @@ a b
3 c
4 d
5 e
+connection master;
UPDATE t SET a = a + 10 WHERE b = 'b';
SELECT * FROM t;
a b
@@ -33,6 +38,7 @@ a b
3 c
4 d
5 e
+connection slave;
SELECT * FROM t;
a b
1 a
@@ -40,5 +46,7 @@ a b
3 c
4 d
5 e
+connection master;
DROP TABLE t;
+connection slave;
include/rpl_end.inc
diff --git a/storage/tokudb/mysql-test/rpl/r/rpl_row_basic_3tokudb.result b/storage/tokudb/mysql-test/rpl/r/rpl_row_basic_3tokudb.result
index f137414c6c9..e638a1aab12 100644
--- a/storage/tokudb/mysql-test/rpl/r/rpl_row_basic_3tokudb.result
+++ b/storage/tokudb/mysql-test/rpl/r/rpl_row_basic_3tokudb.result
@@ -1,10 +1,15 @@
include/master-slave.inc
[connection master]
+connection slave;
+connection slave;
+connection master;
CREATE TABLE t1 (C1 CHAR(1), C2 CHAR(1), INDEX (C1)) ENGINE = 'TokuDB' ;
SELECT * FROM t1;
C1 C2
+connection slave;
SELECT * FROM t1;
C1 C2
+connection master;
INSERT INTO t1 VALUES ('A','B'), ('X','Y'), ('X','X');
INSERT INTO t1 VALUES ('A','C'), ('X','Z'), ('A','A');
SELECT * FROM t1 ORDER BY C1,C2;
@@ -15,6 +20,7 @@ A C
X X
X Y
X Z
+connection slave;
SELECT * FROM t1 ORDER BY C1,C2;
C1 C2
A A
@@ -23,6 +29,7 @@ A C
X X
X Y
X Z
+connection master;
DELETE FROM t1 WHERE C1 = C2;
SELECT * FROM t1 ORDER BY C1,C2;
C1 C2
@@ -30,12 +37,14 @@ A B
A C
X Y
X Z
+connection slave;
SELECT * FROM t1 ORDER BY C1,C2;
C1 C2
A B
A C
X Y
X Z
+connection master;
UPDATE t1 SET C2 = 'I' WHERE C1 = 'A' AND C2 = 'C';
SELECT * FROM t1 ORDER BY C1,C2;
C1 C2
@@ -43,16 +52,19 @@ A B
A I
X Y
X Z
+connection slave;
SELECT * FROM t1 ORDER BY C1,C2;
C1 C2
A B
A I
X Y
X Z
+connection slave;
include/assert.inc [Counter for COM_COMMIT is consistent with the number of actual commits]
include/assert.inc [Counter for COM_INSERT is consistent with the number of actual inserts]
include/assert.inc [Counter for COM_DELETE is consistent with the number of actual deletes]
include/assert.inc [Counter for COM_UPDATE is consistent with the number of actual updates]
+connection master;
UPDATE t1 SET c2 = 'Q' WHERE c1 = 'A' AND c2 = 'N';
SELECT * FROM t1 ORDER BY c1,c2;
C1 C2
@@ -60,12 +72,14 @@ A B
A I
X Y
X Z
+connection slave;
SELECT * FROM t1 ORDER BY c1,c2;
C1 C2
A B
A I
X Y
X Z
+connection master;
CREATE TABLE t2 (c1 INT, c12 char(1), c2 INT, PRIMARY KEY (c1)) ENGINE = 'TokuDB' ;
INSERT INTO t2
VALUES (1,'A',2), (2,'A',4), (3,'A',9), (4,'A',15), (5,'A',25),
@@ -88,6 +102,7 @@ c1 c12 c2
5 A 25
8 A 64
9 A 81
+connection slave;
SELECT * FROM t2 ORDER BY c1,c2;
c1 c12 c2
1 A 2
@@ -106,6 +121,7 @@ c1 c12 c2
5 A 25
8 A 64
9 A 81
+connection master;
UPDATE t2 SET c2 = c1*c1 WHERE c2 != c1*c1;
SELECT * FROM t2 WHERE c2 = c1 * c1 ORDER BY c1,c2;
c1 c12 c2
@@ -118,6 +134,7 @@ c1 c12 c2
7 A 49
8 A 64
9 A 81
+connection slave;
SELECT * FROM t2 WHERE c2 = c1 * c1 ORDER BY c1,c2;
c1 c12 c2
1 A 1
@@ -129,6 +146,7 @@ c1 c12 c2
7 A 49
8 A 64
9 A 81
+connection master;
UPDATE t2 SET c12 = 'Q' WHERE c1 = 1 AND c2 = 999;
SELECT * FROM t2 ORDER BY c1,c2;
c1 c12 c2
@@ -141,6 +159,7 @@ c1 c12 c2
7 A 49
8 A 64
9 A 81
+connection slave;
SELECT * FROM t2 ORDER BY c1,c2;
c1 c12 c2
1 A 1
@@ -152,6 +171,7 @@ c1 c12 c2
7 A 49
8 A 64
9 A 81
+connection master;
DELETE FROM t2 WHERE c1 % 4 = 0;
SELECT * FROM t2 ORDER BY c1,c2;
c1 c12 c2
@@ -162,6 +182,7 @@ c1 c12 c2
6 A 36
7 A 49
9 A 81
+connection slave;
SELECT * FROM t2 ORDER BY c1,c2;
c1 c12 c2
1 A 1
@@ -171,7 +192,9 @@ c1 c12 c2
6 A 36
7 A 49
9 A 81
+connection master;
UPDATE t2 SET c12='X';
+connection master;
CREATE TABLE t3 (C1 CHAR(1), C2 CHAR(1), pk1 INT, C3 CHAR(1), pk2 INT, PRIMARY KEY (pk1,pk2)) ENGINE = 'TokuDB' ;
INSERT INTO t3 VALUES ('A','B',1,'B',1), ('X','Y',2,'B',1), ('X','X',3,'B',1);
INSERT INTO t3 VALUES ('A','C',1,'B',2), ('X','Z',2,'B',2), ('A','A',3,'B',2);
@@ -183,6 +206,7 @@ A C 1 B 2
X X 3 B 1
X Y 2 B 1
X Z 2 B 2
+connection slave;
SELECT * FROM t3 ORDER BY C1,C2;
C1 C2 pk1 C3 pk2
A A 3 B 2
@@ -191,6 +215,7 @@ A C 1 B 2
X X 3 B 1
X Y 2 B 1
X Z 2 B 2
+connection master;
DELETE FROM t3 WHERE C1 = C2;
SELECT * FROM t3 ORDER BY C1,C2;
C1 C2 pk1 C3 pk2
@@ -198,12 +223,14 @@ A B 1 B 1
A C 1 B 2
X Y 2 B 1
X Z 2 B 2
+connection slave;
SELECT * FROM t3 ORDER BY C1,C2;
C1 C2 pk1 C3 pk2
A B 1 B 1
A C 1 B 2
X Y 2 B 1
X Z 2 B 2
+connection master;
UPDATE t3 SET C2 = 'I' WHERE C1 = 'A' AND C2 = 'C';
SELECT * FROM t3 ORDER BY C1,C2;
C1 C2 pk1 C3 pk2
@@ -211,12 +238,14 @@ A B 1 B 1
A I 1 B 2
X Y 2 B 1
X Z 2 B 2
+connection slave;
SELECT * FROM t3 ORDER BY C1,C2;
C1 C2 pk1 C3 pk2
A B 1 B 1
A I 1 B 2
X Y 2 B 1
X Z 2 B 2
+connection master;
CREATE TABLE t6 (C1 CHAR(1), C2 CHAR(1), C3 INT) ENGINE = 'TokuDB';
INSERT INTO t6 VALUES ('A','B',1), ('X','Y',2), ('X','X',3);
INSERT INTO t6 VALUES ('A','C',4), ('X','Z',5), ('A','A',6);
@@ -228,6 +257,7 @@ X X 3
A C 4
X Z 5
A A 6
+connection slave;
SELECT * FROM t6 ORDER BY C3;
C1 C2 C3
A B 1
@@ -236,6 +266,7 @@ X X 3
A C 4
X Z 5
A A 6
+connection master;
DELETE FROM t6 WHERE C1 = C2;
SELECT * FROM t6 ORDER BY C3;
C1 C2 C3
@@ -243,12 +274,14 @@ A B 1
X Y 2
A C 4
X Z 5
+connection slave;
SELECT * FROM t6 ORDER BY C3;
C1 C2 C3
A B 1
X Y 2
A C 4
X Z 5
+connection master;
UPDATE t6 SET C2 = 'I' WHERE C1 = 'A' AND C2 = 'C';
SELECT * FROM t6 ORDER BY C3;
C1 C2 C3
@@ -256,12 +289,14 @@ A B 1
X Y 2
A I 4
X Z 5
+connection slave;
SELECT * FROM t6 ORDER BY C3;
C1 C2 C3
A B 1
X Y 2
A I 4
X Z 5
+connection master;
CREATE TABLE t5 (C1 CHAR(1), C2 CHAR(1), C3 INT PRIMARY KEY) ENGINE = 'TokuDB' ;
INSERT INTO t5 VALUES ('A','B',1), ('X','Y',2), ('X','X',3);
INSERT INTO t5 VALUES ('A','C',4), ('X','Z',5), ('A','A',6);
@@ -310,6 +345,7 @@ X Q 5 7 R 49 X Y 2 S 1
X Q 5 7 R 49 X Z 2 S 2
X Q 5 9 R 81 X Y 2 S 1
X Q 5 9 R 81 X Z 2 S 2
+connection slave;
SELECT * FROM t5,t2,t3 WHERE t5.C2='Q' AND t2.c12='R' AND t3.C3 ='S' ORDER BY t5.C3,t2.c1,t3.pk1,t3.pk2;
C1 C2 C3 c1 c12 c2 C1 C2 pk1 C3 pk2
X Q 2 1 R 1 X Y 2 S 1
@@ -354,19 +390,24 @@ X Q 5 7 R 49 X Y 2 S 1
X Q 5 7 R 49 X Z 2 S 2
X Q 5 9 R 81 X Y 2 S 1
X Q 5 9 R 81 X Z 2 S 2
+connection slave;
SET @saved_slave_type_conversions = @@SLAVE_TYPE_CONVERSIONS;
SET GLOBAL SLAVE_TYPE_CONVERSIONS = 'ALL_LOSSY';
+connection master;
CREATE TABLE t4 (C1 CHAR(1) PRIMARY KEY, B1 BIT(1), B2 BIT(1) NOT NULL DEFAULT 0, C2 CHAR(1) NOT NULL DEFAULT 'A') ENGINE = 'TokuDB' ;
INSERT INTO t4 SET C1 = 1;
SELECT C1,HEX(B1),HEX(B2) FROM t4 ORDER BY C1;
C1 HEX(B1) HEX(B2)
1 NULL 0
+connection slave;
SELECT C1,HEX(B1),HEX(B2) FROM t4 ORDER BY C1;
C1 HEX(B1) HEX(B2)
1 NULL 0
SET GLOBAL SLAVE_TYPE_CONVERSIONS = @saved_slave_type_conversions;
+connection master;
CREATE TABLE t7 (C1 INT PRIMARY KEY, C2 INT) ENGINE = 'TokuDB' ;
---- on slave: original values ---
+connection slave;
+--- original values ---
INSERT INTO t7 VALUES (1,3), (2,6), (3,9);
SELECT * FROM t7 ORDER BY C1;
C1 C2
@@ -374,21 +415,23 @@ C1 C2
2 6
3 9
set @@global.slave_exec_mode= 'IDEMPOTENT';
---- on master: new values inserted ---
+connection master;
+--- new values inserted ---
INSERT INTO t7 VALUES (1,2), (2,4), (3,6);
SELECT * FROM t7 ORDER BY C1;
C1 C2
1 2
2 4
3 6
+connection slave;
set @@global.slave_exec_mode= default;
---- on slave: old values should be overwritten by replicated values ---
+--- old values should be overwritten by replicated values ---
SELECT * FROM t7 ORDER BY C1;
C1 C2
1 2
2 4
3 6
---- on master ---
+connection master;
CREATE TABLE t8 (a INT PRIMARY KEY, b INT UNIQUE, c INT UNIQUE) ENGINE = 'TokuDB' ;
INSERT INTO t8 VALUES (99,99,99);
INSERT INTO t8 VALUES (99,22,33);
@@ -400,7 +443,7 @@ ERROR 23000: Duplicate entry '99' for key 'c'
SELECT * FROM t8 ORDER BY a;
a b c
99 99 99
---- on slave ---
+connection slave;
SELECT * FROM t8 ORDER BY a;
a b c
99 99 99
@@ -412,10 +455,10 @@ a b c
3 6 9
99 99 99
set @@global.slave_exec_mode= 'IDEMPOTENT';
---- on master ---
+connection master;
INSERT INTO t8 VALUES (2,4,8);
+connection slave;
set @@global.slave_exec_mode= default;
---- on slave ---
SELECT * FROM t8 ORDER BY a;
a b c
1 2 3
@@ -423,100 +466,136 @@ a b c
3 6 9
99 99 99
**** Test for BUG#31552 ****
-**** On Master ****
+connection master;
DELETE FROM t1;
+connection slave;
include/rpl_reset.inc
-**** On Master ****
+connection master;
INSERT INTO t1 VALUES ('K','K'), ('L','L'), ('M','M');
-**** On Master ****
+connection slave;
set @@global.slave_exec_mode= 'IDEMPOTENT';
DELETE FROM t1 WHERE C1 = 'L';
+connection master;
DELETE FROM t1;
SELECT COUNT(*) FROM t1 ORDER BY c1,c2;
COUNT(*) 0
+connection slave;
set @@global.slave_exec_mode= default;
include/check_slave_is_running.inc
SELECT COUNT(*) FROM t1 ORDER BY c1,c2;
COUNT(*) 0
**** Test for BUG#37076 ****
-**** On Master ****
+connection master;
DROP TABLE IF EXISTS t1;
CREATE TABLE t1 (a TIMESTAMP, b DATETIME, c DATE);
INSERT INTO t1 VALUES(
'2005-11-14 01:01:01', '2005-11-14 01:01:02', '2005-11-14');
-**** On Slave ****
+connection slave;
SELECT * FROM t1;
a b c
2005-11-14 01:01:01 2005-11-14 01:01:02 2005-11-14
+connection master;
DROP TABLE IF EXISTS t1,t2,t3,t4,t5,t6,t7,t8;
+connection slave;
+connection master;
CREATE TABLE t1 (i INT NOT NULL,
c CHAR(16) CHARACTER SET utf8 NOT NULL,
j INT NOT NULL) ENGINE = 'TokuDB' ;
CREATE TABLE t2 (i INT NOT NULL,
c CHAR(16) CHARACTER SET utf8 NOT NULL,
j INT NOT NULL) ENGINE = 'TokuDB' ;
+connection slave;
ALTER TABLE t2 MODIFY c CHAR(128) CHARACTER SET utf8 NOT NULL;
+connection master;
CREATE TABLE t3 (i INT NOT NULL,
c CHAR(128) CHARACTER SET utf8 NOT NULL,
j INT NOT NULL) ENGINE = 'TokuDB' ;
+connection slave;
ALTER TABLE t3 MODIFY c CHAR(16) CHARACTER SET utf8 NOT NULL;
+connection master;
CREATE TABLE t4 (i INT NOT NULL,
c CHAR(128) CHARACTER SET utf8 NOT NULL,
j INT NOT NULL) ENGINE = 'TokuDB' ;
CREATE TABLE t5 (i INT NOT NULL,
c CHAR(255) CHARACTER SET utf8 NOT NULL,
j INT NOT NULL) ENGINE = 'TokuDB' ;
+connection slave;
ALTER TABLE t5 MODIFY c CHAR(16) CHARACTER SET utf8 NOT NULL;
+connection master;
CREATE TABLE t6 (i INT NOT NULL,
c CHAR(255) CHARACTER SET utf8 NOT NULL,
j INT NOT NULL) ENGINE = 'TokuDB' ;
+connection slave;
ALTER TABLE t6 MODIFY c CHAR(128) CHARACTER SET utf8 NOT NULL;
+connection master;
CREATE TABLE t7 (i INT NOT NULL,
c CHAR(255) CHARACTER SET utf8 NOT NULL,
j INT NOT NULL) ENGINE = 'TokuDB' ;
+connection slave;
SET @saved_slave_type_conversions = @@slave_type_conversions;
SET GLOBAL SLAVE_TYPE_CONVERSIONS = 'ALL_NON_LOSSY';
[expecting slave to replicate correctly]
+connection master;
INSERT INTO t1 VALUES (1, "", 1);
INSERT INTO t1 VALUES (2, repeat(_utf8'a', 16), 2);
+connection slave;
include/diff_tables.inc [master:t1, slave:t1]
[expecting slave to replicate correctly]
+connection master;
INSERT INTO t2 VALUES (1, "", 1);
INSERT INTO t2 VALUES (2, repeat(_utf8'a', 16), 2);
+connection slave;
include/diff_tables.inc [master:t2, slave:t2]
+connection slave;
SET GLOBAL SLAVE_TYPE_CONVERSIONS = @saved_slave_type_conversions;
call mtr.add_suppression("Slave SQL.*Table definition on master and slave does not match: Column 1 size mismatch.* error.* 1535");
call mtr.add_suppression("Slave SQL.*Could not execute Delete_rows event on table test.t1.* error.* 1032");
call mtr.add_suppression("Slave SQL.*Column 1 of table .test.t.. cannot be converted from type.*, error.* 1677");
+call mtr.add_suppression("Can't find record in 't1'");
include/rpl_reset.inc
[expecting slave to replicate correctly]
+connection master;
INSERT INTO t4 VALUES (1, "", 1);
INSERT INTO t4 VALUES (2, repeat(_utf8'a', 128), 2);
+connection slave;
include/diff_tables.inc [master:t4, slave:t4]
[expecting slave to stop]
+connection master;
INSERT INTO t5 VALUES (1, "", 1);
INSERT INTO t5 VALUES (2, repeat(_utf8'a', 255), 2);
+connection slave;
include/wait_for_slave_sql_error.inc [errno=1677]
-Last_SQL_Error = 'Column 1 of table 'test.t5' cannot be converted from type 'char(255)' to type 'char(16)''
+Last_SQL_Error = 'Column 1 of table 'test.t5' cannot be converted from type 'char(765 octets)' to type 'char(48 octets) character set utf8''
include/rpl_reset.inc
[expecting slave to stop]
+connection master;
INSERT INTO t6 VALUES (1, "", 1);
INSERT INTO t6 VALUES (2, repeat(_utf8'a', 255), 2);
+connection slave;
include/wait_for_slave_sql_error.inc [errno=1677]
-Last_SQL_Error = 'Column 1 of table 'test.t6' cannot be converted from type 'char(255)' to type 'char(128)''
+Last_SQL_Error = 'Column 1 of table 'test.t6' cannot be converted from type 'char(765 octets)' to type 'char(384 octets) character set utf8''
include/rpl_reset.inc
[expecting slave to replicate correctly]
+connection master;
INSERT INTO t7 VALUES (1, "", 1);
INSERT INTO t7 VALUES (2, repeat(_utf8'a', 255), 2);
+connection slave;
include/diff_tables.inc [master:t7, slave:t7]
+connection master;
drop table t1, t2, t3, t4, t5, t6, t7;
+connection slave;
+connection master;
CREATE TABLE t1 (a INT PRIMARY KEY) ENGINE='TokuDB';
INSERT INTO t1 VALUES (1), (2), (3);
UPDATE t1 SET a = 10;
ERROR 23000: Duplicate entry '10' for key 'PRIMARY'
INSERT INTO t1 VALUES (4);
+connection slave;
include/diff_tables.inc [master:t1, slave:t1]
+connection master;
drop table t1;
+connection slave;
+connection master;
DROP TABLE IF EXISTS t1, t2;
CREATE TABLE t1 (
`pk` int(11) NOT NULL AUTO_INCREMENT,
@@ -568,10 +647,13 @@ UPDATE t1 SET `pk` = 6 ORDER BY `int_key` LIMIT 6;
ERROR 23000: Duplicate entry '6' for key 'PRIMARY'
DELETE FROM t2 WHERE `pk` < 7 LIMIT 1;
UPDATE t1 SET `int_key` = 4 ORDER BY `pk` LIMIT 6;
+connection slave;
*** results: t2 must be consistent ****
include/diff_tables.inc [master:t2, slave:t2]
+connection master;
DROP TABLE t1, t2;
EOF OF TESTS
+connection master;
CREATE TABLE t1 (a int) ENGINE='TokuDB';
INSERT IGNORE INTO t1 VALUES (NULL);
INSERT INTO t1 ( a ) VALUES ( 0 );
@@ -585,41 +667,47 @@ UPDATE t1 SET a = 9 WHERE a < 3;
INSERT INTO t1 ( a ) VALUES ( 3 );
UPDATE t1 SET a = 0 WHERE a < 4;
UPDATE t1 SET a = 8 WHERE a < 5;
+connection slave;
include/diff_tables.inc [master:t1, slave:t1]
+connection master;
drop table t1;
+connection slave;
+connection master;
+connection slave;
SET @saved_slave_type_conversions = @@SLAVE_TYPE_CONVERSIONS;
SET GLOBAL SLAVE_TYPE_CONVERSIONS = 'ALL_LOSSY';
+connection master;
CREATE TABLE t1 (a bit) ENGINE='TokuDB';
INSERT IGNORE INTO t1 VALUES (NULL);
INSERT INTO t1 ( a ) VALUES ( 0 );
UPDATE t1 SET a = 0 WHERE a = 1 LIMIT 3;
-INSERT INTO t1 ( a ) VALUES ( 5 );
+INSERT IGNORE INTO t1 ( a ) VALUES ( 5 );
DELETE FROM t1 WHERE a < 2 LIMIT 4;
DELETE FROM t1 WHERE a < 9 LIMIT 4;
-INSERT INTO t1 ( a ) VALUES ( 9 );
+INSERT IGNORE INTO t1 ( a ) VALUES ( 9 );
UPDATE t1 SET a = 8 WHERE a = 0 LIMIT 6;
-INSERT INTO t1 ( a ) VALUES ( 8 );
+INSERT IGNORE INTO t1 ( a ) VALUES ( 8 );
UPDATE t1 SET a = 0 WHERE a < 6 LIMIT 0;
-INSERT INTO t1 ( a ) VALUES ( 4 );
-INSERT INTO t1 ( a ) VALUES ( 3 );
+INSERT IGNORE INTO t1 ( a ) VALUES ( 4 );
+INSERT IGNORE INTO t1 ( a ) VALUES ( 3 );
UPDATE t1 SET a = 0 WHERE a = 7 LIMIT 6;
DELETE FROM t1 WHERE a = 4 LIMIT 7;
-UPDATE t1 SET a = 9 WHERE a < 2 LIMIT 9;
+UPDATE IGNORE t1 SET a = 9 WHERE a < 2 LIMIT 9;
UPDATE t1 SET a = 0 WHERE a < 9 LIMIT 2;
DELETE FROM t1 WHERE a < 0 LIMIT 5;
-INSERT INTO t1 ( a ) VALUES ( 5 );
-UPDATE t1 SET a = 4 WHERE a < 6 LIMIT 4;
-INSERT INTO t1 ( a ) VALUES ( 5 );
-UPDATE t1 SET a = 9 WHERE a < 5 LIMIT 8;
+INSERT IGNORE INTO t1 ( a ) VALUES ( 5 );
+UPDATE IGNORE t1 SET a = 4 WHERE a < 6 LIMIT 4;
+INSERT IGNORE INTO t1 ( a ) VALUES ( 5 );
+UPDATE IGNORE t1 SET a = 9 WHERE a < 5 LIMIT 8;
DELETE FROM t1 WHERE a < 8 LIMIT 8;
-INSERT INTO t1 ( a ) VALUES ( 6 );
+INSERT IGNORE INTO t1 ( a ) VALUES ( 6 );
DELETE FROM t1 WHERE a < 6 LIMIT 7;
UPDATE t1 SET a = 7 WHERE a = 3 LIMIT 7;
UPDATE t1 SET a = 8 WHERE a = 0 LIMIT 6;
-INSERT INTO t1 ( a ) VALUES ( 7 );
+INSERT IGNORE INTO t1 ( a ) VALUES ( 7 );
DELETE FROM t1 WHERE a < 9 LIMIT 4;
-INSERT INTO t1 ( a ) VALUES ( 7 );
-INSERT INTO t1 ( a ) VALUES ( 6 );
+INSERT IGNORE INTO t1 ( a ) VALUES ( 7 );
+INSERT IGNORE INTO t1 ( a ) VALUES ( 6 );
UPDATE t1 SET a = 8 WHERE a = 3 LIMIT 4;
DELETE FROM t1 WHERE a = 2 LIMIT 9;
DELETE FROM t1 WHERE a = 1 LIMIT 4;
@@ -628,8 +716,11 @@ INSERT INTO t1 ( a ) VALUES ( 0 );
DELETE FROM t1 WHERE a < 3 LIMIT 0;
UPDATE t1 SET a = 8 WHERE a = 5 LIMIT 2;
INSERT INTO t1 ( a ) VALUES ( 1 );
-UPDATE t1 SET a = 9 WHERE a < 5 LIMIT 3;
+UPDATE IGNORE t1 SET a = 9 WHERE a < 5 LIMIT 3;
+connection slave;
SET GLOBAL SLAVE_TYPE_CONVERSIONS = @saved_slave_type_conversions;
include/diff_tables.inc [master:t1, slave:t1]
+connection master;
drop table t1;
+connection slave;
include/rpl_end.inc
diff --git a/storage/tokudb/mysql-test/rpl/r/rpl_row_blob_tokudb.result b/storage/tokudb/mysql-test/rpl/r/rpl_row_blob_tokudb.result
index 4fb33dc4b6b..084089078cc 100644
--- a/storage/tokudb/mysql-test/rpl/r/rpl_row_blob_tokudb.result
+++ b/storage/tokudb/mysql-test/rpl/r/rpl_row_blob_tokudb.result
@@ -1,5 +1,6 @@
include/master-slave.inc
[connection master]
+connection master;
DROP TABLE IF EXISTS test.t1;
DROP TABLE IF EXISTS test.t2;
***** Table Create Section ****
@@ -24,6 +25,7 @@ LENGTH(data)
SELECT LENGTH(data) FROM test.t1 WHERE c1 = 3;
LENGTH(data)
16384
+connection slave;
**** Data Insert Validation Slave Section test.t1 ****
@@ -36,6 +38,7 @@ LENGTH(data)
SELECT LENGTH(data) FROM test.t1 WHERE c1 = 3;
LENGTH(data)
16384
+connection master;
**** Data Update Section test.t1 ****
@@ -50,6 +53,7 @@ LENGTH(data)
SELECT LENGTH(data) FROM test.t1 WHERE c1 = 2;
LENGTH(data)
17408
+connection slave;
**** Data Update Validation Slave Section test.t1 ****
@@ -59,6 +63,7 @@ LENGTH(data)
SELECT LENGTH(data) FROM test.t1 WHERE c1 = 2;
LENGTH(data)
17408
+connection master;
**** End Test Section test.t1 ****
@@ -109,6 +114,7 @@ SELECT c1, LENGTH(c2), SUBSTR(c2,1+2*900,2), LENGTH(c4), SUBSTR(c4,1+3*900,3)
FROM test.t2 WHERE c1=2;
c1 LENGTH(c2) SUBSTR(c2,1+2*900,2) LENGTH(c4) SUBSTR(c4,1+3*900,3)
2 20000 b2 30000 dd2
+connection slave;
**** Data Insert Validation Slave Section test.t2 ****
@@ -120,6 +126,7 @@ SELECT c1, LENGTH(c2), SUBSTR(c2,1+2*900,2), LENGTH(c4), SUBSTR(c4,1+3*900,3)
FROM test.t2 WHERE c1=2;
c1 LENGTH(c2) SUBSTR(c2,1+2*900,2) LENGTH(c4) SUBSTR(c4,1+3*900,3)
2 20000 b2 30000 dd2
+connection master;
**** Data Update Section test.t2 ****
@@ -136,6 +143,7 @@ SELECT c1, LENGTH(c2), SUBSTR(c2,1+2*900,2), LENGTH(c4), SUBSTR(c4,1+3*900,3)
FROM test.t2 WHERE c1=2;
c1 LENGTH(c2) SUBSTR(c2,1+2*900,2) LENGTH(c4) SUBSTR(c4,1+3*900,3)
2 2256 b1 3000 dd1
+connection slave;
**** Data Update Validation Slave Section test.t2 ****
@@ -147,7 +155,9 @@ SELECT c1, LENGTH(c2), SUBSTR(c2,1+2*900,2), LENGTH(c4), SUBSTR(c4,1+3*900,3)
FROM test.t2 WHERE c1=2;
c1 LENGTH(c2) SUBSTR(c2,1+2*900,2) LENGTH(c4) SUBSTR(c4,1+3*900,3)
2 2256 b1 3000 dd1
+connection master;
DROP TABLE IF EXISTS test.t1;
DROP TABLE IF EXISTS test.t2;
+connection slave;
include/rpl_end.inc
diff --git a/storage/tokudb/mysql-test/rpl/r/rpl_row_rec_comp_tokudb.result b/storage/tokudb/mysql-test/rpl/r/rpl_row_rec_comp_tokudb.result
index faebaba2add..df1fe125aa4 100644
--- a/storage/tokudb/mysql-test/rpl/r/rpl_row_rec_comp_tokudb.result
+++ b/storage/tokudb/mysql-test/rpl/r/rpl_row_rec_comp_tokudb.result
@@ -2,27 +2,39 @@ include/master-slave.inc
[connection master]
## case #1 - last_null_bit_pos==0 in record_compare without X bit
include/rpl_reset.inc
+connection master;
CREATE TABLE t1 (c1 bigint(20) DEFAULT 0, c2 bigint(20) DEFAULT 0, c3 bigint(20) DEFAULT 0, c4 varchar(1) DEFAULT '', c5 bigint(20) DEFAULT 0, c6 bigint(20) DEFAULT 0, c7 bigint(20) DEFAULT 0, c8 bigint(20) DEFAULT 0) ENGINE=TokuDB DEFAULT CHARSET=latin1;
INSERT INTO t1 ( c5, c6 ) VALUES ( 1 , 35 );
INSERT INTO t1 ( c5, c6 ) VALUES ( NULL, 35 );
-UPDATE t1 SET c5 = 'a';
+UPDATE IGNORE t1 SET c5 = 'a';
+connection slave;
include/diff_tables.inc [master:t1, slave:t1]
+connection master;
DROP TABLE t1;
+connection slave;
## case #1.1 - last_null_bit_pos==0 in record_compare with X bit
## (1 column less and no varchar)
include/rpl_reset.inc
+connection master;
CREATE TABLE t1 (c1 bigint(20) DEFAULT 0, c2 bigint(20) DEFAULT 0, c3 bigint(20) DEFAULT 0, c4 bigint(20) DEFAULT 0, c5 bigint(20) DEFAULT 0, c6 bigint(20) DEFAULT 0, c7 bigint(20) DEFAULT 0) ENGINE=TokuDB DEFAULT CHARSET=latin1;
INSERT INTO t1 ( c5, c6 ) VALUES ( 1 , 35 );
INSERT INTO t1 ( c5, c6 ) VALUES ( NULL, 35 );
-UPDATE t1 SET c5 = 'a';
+UPDATE IGNORE t1 SET c5 = 'a';
+connection slave;
include/diff_tables.inc [master:t1, slave:t1]
+connection master;
DROP TABLE t1;
+connection slave;
## case #2 - X bit is wrongly set.
include/rpl_reset.inc
+connection master;
CREATE TABLE t1 (c1 int, c2 varchar(1) default '') ENGINE=TokuDB DEFAULT CHARSET= latin1;
INSERT INTO t1(c1) VALUES (10);
INSERT INTO t1(c1) VALUES (NULL);
UPDATE t1 SET c1= 0;
+connection slave;
include/diff_tables.inc [master:t1, slave:t1]
+connection master;
DROP TABLE t1;
+connection slave;
include/rpl_end.inc
diff --git a/storage/tokudb/mysql-test/rpl/r/rpl_row_replace_into.result b/storage/tokudb/mysql-test/rpl/r/rpl_row_replace_into.result
index b1482aa0144..abe80743f62 100644
--- a/storage/tokudb/mysql-test/rpl/r/rpl_row_replace_into.result
+++ b/storage/tokudb/mysql-test/rpl/r/rpl_row_replace_into.result
@@ -11,11 +11,13 @@ pk num txt
2 2 twotwo
3 3 three
4 4 four
+connection slave;
select * from testr;
pk num txt
1 1 one
2 2 twotwo
3 3 three
4 4 four
+connection master;
drop table testr;
include/rpl_end.inc
diff --git a/storage/tokudb/mysql-test/rpl/r/rpl_row_sp007_tokudb.result b/storage/tokudb/mysql-test/rpl/r/rpl_row_sp007_tokudb.result
index dbe82cfe8ac..44aacc95124 100644
--- a/storage/tokudb/mysql-test/rpl/r/rpl_row_sp007_tokudb.result
+++ b/storage/tokudb/mysql-test/rpl/r/rpl_row_sp007_tokudb.result
@@ -12,32 +12,24 @@ INSERT INTO test.t1 VALUES (14);
ROLLBACK to savepoint t1_save;
COMMIT;
END|
-
-< ---- Master selects-- >
--------------------------
CALL test.p1(12);
SELECT * FROM test.t1;
num
12
-
-< ---- Slave selects-- >
-------------------------
+connection slave;
SELECT * FROM test.t1;
num
12
-
-< ---- Master selects-- >
--------------------------
+connection master;
CALL test.p1(13);
SELECT * FROM test.t1;
num
13
-
-< ---- Slave selects-- >
-------------------------
+connection slave;
SELECT * FROM test.t1;
num
13
+connection master;
DROP PROCEDURE test.p1;
DROP TABLE test.t1;
include/rpl_end.inc
diff --git a/storage/tokudb/mysql-test/rpl/r/rpl_row_tabledefs_3tokudb.result b/storage/tokudb/mysql-test/rpl/r/rpl_row_tabledefs_3tokudb.result
index d25adb366d5..165df929809 100644
--- a/storage/tokudb/mysql-test/rpl/r/rpl_row_tabledefs_3tokudb.result
+++ b/storage/tokudb/mysql-test/rpl/r/rpl_row_tabledefs_3tokudb.result
@@ -1,9 +1,12 @@
include/master-slave.inc
[connection master]
+connection master;
+connection slave;
STOP SLAVE;
SET @my_sql_mode= @@global.sql_mode;
SET GLOBAL SQL_MODE='STRICT_ALL_TABLES';
START SLAVE;
+connection master;
CREATE TABLE t1_int (a INT PRIMARY KEY, b INT) ENGINE=TokuDB;
CREATE TABLE t1_bit (a INT PRIMARY KEY, b INT) ENGINE=TokuDB;
CREATE TABLE t1_char (a INT PRIMARY KEY, b INT) ENGINE=TokuDB;
@@ -16,6 +19,7 @@ CREATE TABLE t6 (a INT, b INT, c INT) ENGINE=TokuDB;
CREATE TABLE t7 (a INT NOT NULL) ENGINE=TokuDB;
CREATE TABLE t8 (a INT NOT NULL) ENGINE=TokuDB;
CREATE TABLE t9 (a INT) ENGINE=TokuDB;
+connection slave;
ALTER TABLE t1_int ADD x INT DEFAULT 42;
ALTER TABLE t1_bit
ADD x BIT(3) DEFAULT b'011',
@@ -37,7 +41,7 @@ set @@global.slave_exec_mode= 'IDEMPOTENT';
INSERT INTO t1_int VALUES (2, 4, 4711);
INSERT INTO t1_char VALUES (2, 4, 'Foo is a bar');
INSERT INTO t1_bit VALUES (2, 4, b'101', b'11100', b'01');
-**** On Master ****
+connection master;
INSERT INTO t1_int VALUES (1,2);
INSERT INTO t1_int VALUES (2,5);
INSERT INTO t1_bit VALUES (1,2);
@@ -56,7 +60,7 @@ SELECT * FROM t1_char ORDER BY a;
a b
1 2
2 5
-**** On Slave ****
+connection slave;
set @@global.slave_exec_mode= default;
SELECT a,b,x FROM t1_int ORDER BY a;
a b x
@@ -70,7 +74,7 @@ SELECT a,b,x FROM t1_char ORDER BY a;
a b x
1 2 Just a test
2 5 Foo is a bar
-**** On Master ****
+connection master;
UPDATE t1_int SET b=2*b WHERE a=2;
UPDATE t1_char SET b=2*b WHERE a=2;
UPDATE t1_bit SET b=2*b WHERE a=2;
@@ -86,7 +90,7 @@ SELECT * FROM t1_char ORDER BY a;
a b
1 2
2 10
-**** On Slave ****
+connection slave;
SELECT a,b,x FROM t1_int ORDER BY a;
a b x
1 2 42
@@ -99,38 +103,59 @@ SELECT a,b,x FROM t1_char ORDER BY a;
a b x
1 2 Just a test
2 10 Foo is a bar
+connection master;
INSERT INTO t9 VALUES (2);
+connection slave;
+connection master;
INSERT INTO t1_nodef VALUES (1,2);
+connection slave;
select count(*) from t1_nodef;
count(*)
1
+connection master;
INSERT INTO t9 VALUES (2);
-**** On Master ****
+connection slave;
+connection master;
INSERT INTO t2 VALUES (2,4);
SELECT * FROM t2;
a b
2 4
-**** On Slave ****
+connection slave;
SELECT * FROM t2;
a
2
include/check_slave_is_running.inc
+connection master;
INSERT INTO t9 VALUES (4);
+connection slave;
+connection master;
INSERT INTO t4 VALUES (4);
+connection slave;
call mtr.add_suppression("Slave SQL.*Table definition on master and slave does not match: Column [012] type mismatch.* error.* 1535");
call mtr.add_suppression("Slave SQL.*Column [0-9] of table .test.t[0-9]. cannot be converted from type.* error.* 1677");
include/wait_for_slave_sql_error_and_skip.inc [errno=1677]
Last_SQL_Error = 'Column 0 of table 'test.t4' cannot be converted from type 'int' to type 'float''
+connection master;
INSERT INTO t9 VALUES (5);
+connection slave;
+connection master;
INSERT INTO t5 VALUES (5,10,25);
+connection slave;
include/wait_for_slave_sql_error_and_skip.inc [errno=1677]
Last_SQL_Error = 'Column 1 of table 'test.t5' cannot be converted from type 'int' to type 'float''
+connection master;
INSERT INTO t9 VALUES (6);
+connection slave;
+connection master;
INSERT INTO t6 VALUES (6,12,36);
+connection slave;
include/wait_for_slave_sql_error_and_skip.inc [errno=1677]
Last_SQL_Error = 'Column 2 of table 'test.t6' cannot be converted from type 'int' to type 'float''
+connection master;
INSERT INTO t9 VALUES (6);
+connection slave;
include/check_slave_is_running.inc
+connection master;
INSERT INTO t7 VALUES (1),(2),(3);
INSERT INTO t8 VALUES (1),(2),(3);
SELECT * FROM t7 ORDER BY a;
@@ -143,6 +168,7 @@ a
1
2
3
+connection slave;
SELECT * FROM t7 ORDER BY a;
a e1 e2 e3 e4 e5 e6 e7 e8
1 NULL NULL NULL NULL NULL NULL NULL NULL
@@ -153,37 +179,40 @@ a e1 e2 e3 e4 e5 e6 e7 e8
1 0 0 0 0 0 0 0 0
2 0 0 0 0 0 0 0 0
3 0 0 0 0 0 0 0 0
-**** On Master ****
+connection master;
TRUNCATE t1_nodef;
SET SQL_LOG_BIN=0;
INSERT INTO t1_nodef VALUES (1,2);
INSERT INTO t1_nodef VALUES (2,4);
SET SQL_LOG_BIN=1;
-**** On Slave ****
+connection slave;
+connection slave;
INSERT INTO t1_nodef VALUES (1,2,3,4,5);
INSERT INTO t1_nodef VALUES (2,4,6,8,10);
-**** On Master ****
+connection master;
UPDATE t1_nodef SET b=2*b WHERE a=1;
SELECT * FROM t1_nodef ORDER BY a;
a b
1 4
2 4
-**** On Slave ****
+connection slave;
SELECT * FROM t1_nodef ORDER BY a;
a b x y z
1 4 3 4 5
2 4 6 8 10
-**** On Master ****
+connection master;
DELETE FROM t1_nodef WHERE a=2;
SELECT * FROM t1_nodef ORDER BY a;
a b
1 4
-**** On Slave ****
+connection slave;
SELECT * FROM t1_nodef ORDER BY a;
a b x y z
1 4 3 4 5
**** Cleanup ****
+connection master;
DROP TABLE IF EXISTS t1_int,t1_bit,t1_char,t1_nodef;
DROP TABLE IF EXISTS t2,t3,t4,t5,t6,t7,t8,t9;
+connection slave;
SET @@global.sql_mode= @my_sql_mode;
include/rpl_end.inc
diff --git a/storage/tokudb/mysql-test/rpl/r/rpl_set_null_tokudb.result b/storage/tokudb/mysql-test/rpl/r/rpl_set_null_tokudb.result
index 5e7c3c94b14..19ca9b2c59e 100644
--- a/storage/tokudb/mysql-test/rpl/r/rpl_set_null_tokudb.result
+++ b/storage/tokudb/mysql-test/rpl/r/rpl_set_null_tokudb.result
@@ -1,22 +1,34 @@
include/master-slave.inc
[connection master]
include/rpl_reset.inc
+connection master;
CREATE TABLE t1 (c1 BIT, c2 INT) Engine=TokuDB;
INSERT INTO `t1` VALUES ( 1, 1 );
UPDATE t1 SET c1=NULL where c2=1;
+connection slave;
include/diff_tables.inc [master:t1, slave:t1]
+connection master;
DELETE FROM t1 WHERE c2=1 LIMIT 1;
+connection slave;
include/diff_tables.inc [master:t1, slave:t1]
+connection master;
DROP TABLE t1;
+connection slave;
include/rpl_reset.inc
+connection master;
CREATE TABLE t1 (c1 CHAR) Engine=TokuDB;
INSERT INTO t1 ( c1 ) VALUES ( 'w' ) ;
SELECT * FROM t1;
c1
w
UPDATE t1 SET c1=NULL WHERE c1='w';
+connection slave;
include/diff_tables.inc [master:t1, slave:t1]
+connection master;
DELETE FROM t1 LIMIT 2;
+connection slave;
include/diff_tables.inc [master:t1, slave:t1]
+connection master;
DROP TABLE t1;
+connection slave;
include/rpl_end.inc
diff --git a/storage/tokudb/mysql-test/rpl/r/rpl_stm_tokudb.result b/storage/tokudb/mysql-test/rpl/r/rpl_stm_tokudb.result
index 63ca932bf57..9151f407548 100644
--- a/storage/tokudb/mysql-test/rpl/r/rpl_stm_tokudb.result
+++ b/storage/tokudb/mysql-test/rpl/r/rpl_stm_tokudb.result
@@ -1,5 +1,6 @@
include/master-slave.inc
[connection master]
+connection master;
CREATE TABLE t4 (
id INT(5) unsigned NOT NULL auto_increment,
name varchar(15) NOT NULL default '',
@@ -15,10 +16,12 @@ SELECT * FROM t4;
id name number
1 XXX 12345
2 XXY 12345
+connection slave;
SELECT * FROM t4;
id name number
1 XXX 12345
2 XXY 12345
+connection master;
LOAD DATA
INFILE '../../std_data/loaddata_pair.dat'
REPLACE INTO TABLE t4
@@ -27,18 +30,26 @@ SELECT * FROM t4;
id name number
4 XXX 12345
5 XXY 12345
+connection slave;
SELECT * FROM t4;
id name number
4 XXX 12345
5 XXY 12345
+connection master;
+connection slave;
+connection master;
FLUSH LOGS;
+connection slave;
FLUSH LOGS;
+connection master;
DROP DATABASE IF EXISTS mysqltest1;
CREATE DATABASE mysqltest1;
CREATE TEMPORARY TABLE mysqltest1.tmp (f1 BIGINT) ENGINE=InnoDB;
CREATE TABLE mysqltest1.t1 (f1 BIGINT) ENGINE="TokuDB";
SET AUTOCOMMIT = 0;
+connection slave;
-------- switch to slave --------
+connection slave;
ALTER TABLE mysqltest1.t1 ENGINE = MyISAM;
SHOW CREATE TABLE mysqltest1.t1;
Table Create Table
@@ -46,6 +57,7 @@ t1 CREATE TABLE `t1` (
`f1` bigint(20) DEFAULT NULL
) ENGINE=MyISAM DEFAULT CHARSET=latin1
-------- switch to master --------
+connection master;
INSERT INTO mysqltest1.t1 SET f1= 1;
DROP TEMPORARY TABLE mysqltest1.tmp;
ROLLBACK;
@@ -67,7 +79,9 @@ tmp2 CREATE TEMPORARY TABLE `tmp2` (
SELECT COUNT(*) FROM mysqltest1.t1;
COUNT(*)
0
+connection slave;
-------- switch to slave --------
+connection slave;
SHOW CREATE TABLE mysqltest1.tmp;
ERROR 42S02: Table 'mysqltest1.tmp' doesn't exist
SHOW CREATE TABLE mysqltest1.tmp2;
@@ -78,6 +92,7 @@ COUNT(*)
2
FLUSH LOGS;
-------- switch to master --------
+connection master;
FLUSH LOGS;
DROP TEMPORARY TABLE IF EXISTS mysqltest1.tmp2;
DROP DATABASE mysqltest1;
@@ -101,11 +116,15 @@ INSERT INTO t1 (b) VALUES (1),(2),(3);
BEGIN;
INSERT INTO t1(b) VALUES (4);
-------- switch to master1 --------
+connection master1;
RENAME TABLE t1 TO t3, t2 TO t1;;
-------- switch to master --------
+connection master;
COMMIT;
-------- switch to master1 --------
+connection master1;
-------- switch to master --------
+connection master;
SELECT * FROM t1;
id b
SELECT * FROM t3;
@@ -114,7 +133,9 @@ id b
2 2
3 3
4 4
+connection slave;
-------- switch to slave --------
+connection slave;
SELECT * FROM t1;
id b
SELECT * FROM t3;
@@ -124,6 +145,7 @@ id b
3 3
4 4
-------- switch to master --------
+connection master;
DROP TABLE t1;
DROP TABLE t3;
End of 6.0 tests
diff --git a/storage/tokudb/mysql-test/rpl/r/rpl_stmt_replace_into.result b/storage/tokudb/mysql-test/rpl/r/rpl_stmt_replace_into.result
index b1482aa0144..abe80743f62 100644
--- a/storage/tokudb/mysql-test/rpl/r/rpl_stmt_replace_into.result
+++ b/storage/tokudb/mysql-test/rpl/r/rpl_stmt_replace_into.result
@@ -11,11 +11,13 @@ pk num txt
2 2 twotwo
3 3 three
4 4 four
+connection slave;
select * from testr;
pk num txt
1 1 one
2 2 twotwo
3 3 three
4 4 four
+connection master;
drop table testr;
include/rpl_end.inc
diff --git a/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_bug28430.result b/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_bug28430.result
index f5b84a27f72..c7450a1b9c0 100644
--- a/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_bug28430.result
+++ b/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_bug28430.result
@@ -102,37 +102,40 @@ Master bykey 500
CALL test.proc_byrange();
SELECT count(*) as "Master byrange" FROM test.byrange_tbl;
Master byrange 500
+connection slave;
+connection slave;
show create table test.byrange_tbl;
Table byrange_tbl
Create Table CREATE TABLE `byrange_tbl` (
`id` mediumint(9) NOT NULL AUTO_INCREMENT,
- `dt` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
+ `dt` timestamp NOT NULL DEFAULT current_timestamp() ON UPDATE current_timestamp(),
`user` char(255) DEFAULT NULL,
- `uuidf` longblob,
+ `uuidf` longblob DEFAULT NULL,
`fkid` mediumint(9) DEFAULT NULL,
`filler` varchar(255) DEFAULT NULL,
PRIMARY KEY (`id`)
) ENGINE=TokuDB AUTO_INCREMENT=1001 DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (id)
-SUBPARTITION BY HASH (id)
+ PARTITION BY RANGE (`id`)
+SUBPARTITION BY HASH (`id`)
SUBPARTITIONS 2
-(PARTITION pa1 VALUES LESS THAN (10) ENGINE = TokuDB,
- PARTITION pa2 VALUES LESS THAN (20) ENGINE = TokuDB,
- PARTITION pa3 VALUES LESS THAN (30) ENGINE = TokuDB,
- PARTITION pa4 VALUES LESS THAN (40) ENGINE = TokuDB,
- PARTITION pa5 VALUES LESS THAN (50) ENGINE = TokuDB,
- PARTITION pa6 VALUES LESS THAN (60) ENGINE = TokuDB,
- PARTITION pa7 VALUES LESS THAN (70) ENGINE = TokuDB,
- PARTITION pa8 VALUES LESS THAN (80) ENGINE = TokuDB,
- PARTITION pa9 VALUES LESS THAN (90) ENGINE = TokuDB,
- PARTITION pa10 VALUES LESS THAN (100) ENGINE = TokuDB,
- PARTITION pa11 VALUES LESS THAN MAXVALUE ENGINE = TokuDB) */
+(PARTITION `pa1` VALUES LESS THAN (10) ENGINE = TokuDB,
+ PARTITION `pa2` VALUES LESS THAN (20) ENGINE = TokuDB,
+ PARTITION `pa3` VALUES LESS THAN (30) ENGINE = TokuDB,
+ PARTITION `pa4` VALUES LESS THAN (40) ENGINE = TokuDB,
+ PARTITION `pa5` VALUES LESS THAN (50) ENGINE = TokuDB,
+ PARTITION `pa6` VALUES LESS THAN (60) ENGINE = TokuDB,
+ PARTITION `pa7` VALUES LESS THAN (70) ENGINE = TokuDB,
+ PARTITION `pa8` VALUES LESS THAN (80) ENGINE = TokuDB,
+ PARTITION `pa9` VALUES LESS THAN (90) ENGINE = TokuDB,
+ PARTITION `pa10` VALUES LESS THAN (100) ENGINE = TokuDB,
+ PARTITION `pa11` VALUES LESS THAN MAXVALUE ENGINE = TokuDB)
SELECT count(*) "Slave norm" FROM test.regular_tbl;
Slave norm 500
SELECT count(*) "Slave bykey" FROM test.bykey_tbl;
Slave bykey 500
SELECT count(*) "Slave byrange" FROM test.byrange_tbl;
Slave byrange 500
+connection master;
DROP PROCEDURE test.proc_norm;
DROP PROCEDURE test.proc_bykey;
DROP PROCEDURE test.proc_byrange;
diff --git a/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_bug30888.result b/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_bug30888.result
index ac9aa2460f7..f3ffc908504 100644
--- a/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_bug30888.result
+++ b/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_bug30888.result
@@ -27,6 +27,8 @@ SET del_count = del_count - 2;
END WHILE;
END|
CALL test.proc_norm();
+connection slave;
+connection master;
DROP PROCEDURE test.proc_norm;
DROP TABLE test.regular_tbl;
include/rpl_end.inc
diff --git a/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_commit_after_flush.result b/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_commit_after_flush.result
index d72837498cd..cc9174ba09e 100644
--- a/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_commit_after_flush.result
+++ b/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_commit_after_flush.result
@@ -5,6 +5,9 @@ begin;
insert into t1 values(1);
flush tables with read lock;
commit;
+connection slave;
+connection master;
unlock tables;
drop table t1;
+connection slave;
include/rpl_end.inc
diff --git a/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_insert_id.result b/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_insert_id.result
index fdff14d3cdc..02da7194eb5 100644
--- a/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_insert_id.result
+++ b/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_insert_id.result
@@ -9,13 +9,16 @@
#
include/master-slave.inc
[connection master]
+connection master;
SET @old_concurrent_insert= @@global.concurrent_insert;
SET @@global.concurrent_insert= 0;
+connection master;
create table t1(a int auto_increment, key(a)) engine=tokudb;
create table t2(b int auto_increment, c int, key(b)) engine=tokudb;
insert into t1 values (1),(2),(3);
insert into t1 values (null);
insert into t2 values (null,last_insert_id());
+connection slave;
select * from t1 ORDER BY a;
a
1
@@ -25,6 +28,7 @@ a
select * from t2 ORDER BY b;
b c
1 4
+connection master;
drop table t1;
drop table t2;
create table t1(a int auto_increment, key(a)) engine=tokudb;
@@ -35,6 +39,7 @@ insert into t1 values (null),(null),(null);
insert into t2 values (5,0);
insert into t2 values (null,last_insert_id());
SET FOREIGN_KEY_CHECKS=1;
+connection slave;
select * from t1;
a
10
@@ -45,6 +50,7 @@ select * from t2;
b c
5 0
6 11
+connection master;
#
# check if INSERT SELECT in auto_increment is well replicated (bug #490)
#
@@ -63,6 +69,7 @@ b c
7 11
8 12
9 13
+connection slave;
select * from t1 ORDER BY a;
a
10
@@ -76,21 +83,28 @@ b c
7 11
8 12
9 13
+connection master;
drop table t1;
drop table t2;
+connection slave;
#
# Bug#8412: Error codes reported in binary log for CHARACTER SET,
# FOREIGN_KEY_CHECKS
#
+connection master;
SET TIMESTAMP=1000000000;
CREATE TABLE t1 ( a INT UNIQUE ) engine=tokudb;
SET FOREIGN_KEY_CHECKS=0;
INSERT INTO t1 VALUES (1),(1);
Got one of the listed errors
+connection slave;
+connection master;
drop table t1;
+connection slave;
#
# Bug#14553: NULL in WHERE resets LAST_INSERT_ID
#
+connection master;
set @@session.sql_auto_is_null=1;
create table t1(a int auto_increment, key(a)) engine=tokudb;
create table t2(a int) engine=tokudb;
@@ -100,9 +114,12 @@ insert into t2 (a) select a from t1 where a is null;
select * from t2;
a
1
+connection slave;
+connection slave;
select * from t2;
a
1
+connection master;
drop table t1;
drop table t2;
#
@@ -113,6 +130,7 @@ drop table t2;
#
# The solution is not to reset last_insert_id on enter to sub-statement.
#
+connection master;
drop function if exists bug15728;
drop function if exists bug15728_insert;
drop table if exists t1, t2;
@@ -178,6 +196,7 @@ id last_id
1 3
2 4
3 5
+connection slave;
select * from t1;
id last_id
1 0
@@ -191,6 +210,7 @@ id last_id
1 3
2 4
3 5
+connection master;
drop function bug15728;
drop function bug15728_insert;
drop table t1,t2;
@@ -211,20 +231,24 @@ insert into t1 values(null,100);
select * from t1 order by n;
n b
1 100
+connection slave;
insert into t1 values(null,200),(null,300);
delete from t1 where b <> 100;
select * from t1 order by n;
n b
1 100
+connection master;
replace into t1 values(null,100),(null,350);
select * from t1 order by n;
n b
2 100
3 350
+connection slave;
select * from t1 order by n;
n b
2 100
3 350
+connection master;
insert into t1 values (NULL,400),(3,500),(NULL,600) on duplicate key UPDATE n=1000;
select * from t1 order by n;
n b
@@ -232,12 +256,14 @@ n b
4 400
1000 350
1001 600
+connection slave;
select * from t1 order by n;
n b
2 100
4 400
1000 350
1001 600
+connection master;
drop table t1;
create table t1 (n int primary key auto_increment not null,
b int, unique(b)) engine=tokudb;
@@ -245,21 +271,27 @@ insert into t1 values(null,100);
select * from t1 order by n;
n b
1 100
+connection slave;
insert into t1 values(null,200),(null,300);
delete from t1 where b <> 100;
select * from t1 order by n;
n b
1 100
+connection master;
insert into t1 values(null,100),(null,350) on duplicate key update n=2;
select * from t1 order by n;
n b
2 100
3 350
+connection slave;
select * from t1 order by n;
n b
2 100
3 350
+connection master;
drop table t1;
+connection slave;
+connection master;
CREATE TABLE t1 (a INT NOT NULL PRIMARY KEY AUTO_INCREMENT, b INT,
UNIQUE(b)) ENGINE=tokudb;
INSERT INTO t1(b) VALUES(1),(1),(2) ON DUPLICATE KEY UPDATE t1.b=10;
@@ -267,10 +299,12 @@ SELECT * FROM t1 ORDER BY a;
a b
1 10
2 2
+connection slave;
SELECT * FROM t1 ORDER BY a;
a b
1 10
2 2
+connection master;
drop table t1;
CREATE TABLE t1 (
id bigint(20) unsigned NOT NULL auto_increment,
@@ -309,6 +343,7 @@ id field_1 field_2 field_3
4 4 d 4d
5 5 e 5e
8 6 f 6f
+connection slave;
SELECT * FROM t1 ORDER BY id;
id field_1 field_2 field_3
1 1 a 1a
@@ -317,7 +352,9 @@ id field_1 field_2 field_3
4 4 d 4d
5 5 e 5e
8 6 f 6f
+connection master;
drop table t1, t2;
+connection master;
DROP PROCEDURE IF EXISTS p1;
DROP TABLE IF EXISTS t1, t2;
SELECT LAST_INSERT_ID(0);
@@ -345,12 +382,14 @@ id last_id
SELECT * FROM t2 ORDER BY id;
id last_id
1 0
+connection slave;
SELECT * FROM t1 ORDER BY id;
id last_id
0 1
SELECT * FROM t2 ORDER BY id;
id last_id
1 0
+connection master;
DROP PROCEDURE p1;
DROP TABLE t1, t2;
DROP PROCEDURE IF EXISTS p1;
@@ -396,7 +435,9 @@ INSERT INTO t1 VALUES (NULL, f2());
INSERT INTO t1 VALUES (NULL, 0), (NULL, LAST_INSERT_ID());
UPDATE t1 SET j= -1 WHERE i IS NULL;
INSERT INTO t1 (i) VALUES (NULL);
+connection master1;
INSERT INTO t1 (i) VALUES (NULL);
+connection master;
SELECT f3();
f3()
0
@@ -426,6 +467,7 @@ i
5
6
16
+connection slave;
SELECT * FROM t1;
i j
1 -1
@@ -452,19 +494,23 @@ i
5
6
16
+connection master;
DROP PROCEDURE p1;
DROP FUNCTION f1;
DROP FUNCTION f2;
DROP FUNCTION f3;
DROP TABLE t1, t2;
+connection slave;
#
# End of 5.0 tests
#
+connection master;
create table t2 (
id int not null auto_increment,
last_id int,
primary key (id)
) engine=tokudb;
+connection master;
truncate table t2;
create table t1 (id tinyint primary key) engine=tokudb;
create function insid() returns int
@@ -491,6 +537,7 @@ select * from t2 order by id;
id last_id
4 0
8 0
+connection slave;
select * from t1 order by id;
id
0
@@ -498,6 +545,7 @@ select * from t2 order by id;
id last_id
4 0
8 0
+connection master;
drop table t1;
drop function insid;
truncate table t2;
@@ -517,12 +565,14 @@ n b
select * from t2 order by id;
id last_id
1 3
+connection slave;
select * from t1 order by n;
n b
1 10
select * from t2 order by id;
id last_id
1 3
+connection master;
drop table t1, t2;
drop procedure foo;
SET @@global.concurrent_insert= @old_concurrent_insert;
diff --git a/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_insert_id_pk.result b/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_insert_id_pk.result
index 359904787e3..20369a1842c 100644
--- a/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_insert_id_pk.result
+++ b/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_insert_id_pk.result
@@ -6,6 +6,7 @@ create table t2(b int auto_increment, c int, primary key(b));
insert into t1 values (1),(2),(3);
insert into t1 values (null);
insert into t2 values (null,last_insert_id());
+connection slave;
select * from t1 ORDER BY a;
a
1
@@ -15,6 +16,7 @@ a
select * from t2 ORDER BY b;
b c
1 4
+connection master;
drop table t1;
drop table t2;
create table t1(a int auto_increment, key(a)) engine=tokudb;
@@ -25,6 +27,7 @@ insert into t1 values (null),(null),(null);
insert into t2 values (5,0);
insert into t2 values (null,last_insert_id());
SET FOREIGN_KEY_CHECKS=1;
+connection slave;
select * from t1;
a
10
@@ -35,6 +38,7 @@ select * from t2;
b c
5 0
6 11
+connection master;
drop table t2;
drop table t1;
create table t1(a int auto_increment, primary key(a));
@@ -50,6 +54,7 @@ b c
7 11
8 12
9 13
+connection slave;
select * from t1 ORDER BY a;
a
10
@@ -63,12 +68,17 @@ b c
7 11
8 12
9 13
+connection master;
drop table t1;
drop table t2;
+connection slave;
+connection master;
SET TIMESTAMP=1000000000;
CREATE TABLE t1 ( a INT UNIQUE );
SET FOREIGN_KEY_CHECKS=0;
INSERT INTO t1 VALUES (1),(1);
Got one of the listed errors
+connection slave;
+connection master;
drop table t1;
include/rpl_end.inc
diff --git a/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_mixed_ddl.result b/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_mixed_ddl.result
index b5ceeba165c..d6d4aebd523 100644
--- a/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_mixed_ddl.result
+++ b/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_mixed_ddl.result
@@ -1,6 +1,6 @@
include/master-slave.inc
[connection master]
-==========MASTER==========
+connection master;
show global variables like 'binlog_format%';
Variable_name Value
binlog_format MIXED
@@ -10,7 +10,7 @@ binlog_format MIXED
select @@global.binlog_format, @@session.binlog_format;
@@global.binlog_format @@session.binlog_format
MIXED MIXED
-==========SLAVE===========
+connection slave;
show global variables like 'binlog_format%';
Variable_name Value
binlog_format MIXED
@@ -20,6 +20,7 @@ binlog_format MIXED
select @@global.binlog_format, @@session.binlog_format;
@@global.binlog_format @@session.binlog_format
MIXED MIXED
+connection master;
******************** DDL for database ********************
DROP DATABASE IF EXISTS test_rpl;
@@ -29,14 +30,14 @@ ALTER DATABASE test_rpl_1 CHARACTER SET latin1 COLLATE latin1_general_ci;
DROP DATABASE test_rpl_1;
CREATE DATABASE test_rpl CHARACTER SET utf8 COLLATE utf8_general_ci;
ALTER DATABASE test_rpl CHARACTER SET latin1 COLLATE latin1_swedish_ci;
-==========MASTER==========
SHOW DATABASES LIKE 'test_rpl%';
Database (test_rpl%)
test_rpl
-==========SLAVE===========
+connection slave;
SHOW DATABASES LIKE 'test_rpl%';
Database (test_rpl%)
test_rpl
+connection master;
USE test_rpl;
******************** DDL for tables ********************
@@ -47,7 +48,6 @@ ALTER TABLE t0 ADD INDEX index1 (b);
ALTER TABLE t0 DROP COLUMN c;
RENAME TABLE t0 TO t1;
CREATE TABLE t2 LIKE t1;
-==========MASTER==========
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
@@ -64,7 +64,7 @@ t2 CREATE TABLE `t2` (
PRIMARY KEY (`a`),
KEY `index1` (`b`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-==========SLAVE===========
+connection slave;
USE test_rpl;
SHOW CREATE TABLE t1;
Table Create Table
@@ -82,12 +82,12 @@ t2 CREATE TABLE `t2` (
PRIMARY KEY (`a`),
KEY `index1` (`b`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
+connection master;
******************** DDL for indexes ********************
ALTER TABLE t2 ADD COLUMN d datetime;
CREATE INDEX index2 on t2 (d);
CREATE INDEX index3 on t2 (a, d);
-==========MASTER==========
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
@@ -107,7 +107,7 @@ t2 CREATE TABLE `t2` (
KEY `index2` (`d`),
KEY `index3` (`a`,`d`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-==========SLAVE===========
+connection slave;
USE test_rpl;
SHOW CREATE TABLE t1;
Table Create Table
@@ -128,6 +128,7 @@ t2 CREATE TABLE `t2` (
KEY `index2` (`d`),
KEY `index3` (`a`,`d`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
+connection master;
ALTER TABLE t2 DROP COLUMN d;
@@ -170,5 +171,8 @@ master-bin.000001 # Gtid # # GTID #-#-#
master-bin.000001 # Query # # use `test_rpl`; CREATE INDEX index3 on t2 (a, d)
master-bin.000001 # Gtid # # GTID #-#-#
master-bin.000001 # Query # # use `test_rpl`; ALTER TABLE t2 DROP COLUMN d
+connection slave;
+connection master;
drop database test_rpl;
+connection slave;
include/rpl_end.inc
diff --git a/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_mixed_dml.result b/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_mixed_dml.result
index c71dcadc32f..5f41fd328c9 100644
--- a/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_mixed_dml.result
+++ b/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_mixed_dml.result
@@ -3,7 +3,7 @@ Warnings:
Warning 131 Using tokudb_pk_insert_mode is deprecated and the parameter may be removed in future releases.
include/master-slave.inc
[connection master]
-==========MASTER==========
+connection master;
show global variables like 'binlog_format%';
Variable_name Value
binlog_format MIXED
@@ -13,7 +13,7 @@ binlog_format MIXED
select @@global.binlog_format, @@session.binlog_format;
@@global.binlog_format @@session.binlog_format
MIXED MIXED
-==========SLAVE===========
+connection slave;
show global variables like 'binlog_format%';
Variable_name Value
binlog_format MIXED
@@ -23,6 +23,7 @@ binlog_format MIXED
select @@global.binlog_format, @@session.binlog_format;
@@global.binlog_format @@session.binlog_format
MIXED MIXED
+connection master;
CREATE DATABASE test_rpl;
******************** PREPARE TESTING ********************
@@ -36,7 +37,6 @@ INSERT INTO t2 VALUES(1, 't2, text 1');
******************** DELETE ********************
DELETE FROM t1 WHERE a = 1;
DELETE FROM t2 WHERE b <> UUID();
-==========MASTER==========
SELECT COUNT(*) FROM t1;
COUNT(*)
1
@@ -48,7 +48,7 @@ COUNT(*)
0
SELECT * FROM t2 ORDER BY a;
a b
-==========SLAVE===========
+connection slave;
USE test_rpl;
SELECT COUNT(*) FROM t1;
COUNT(*)
@@ -61,6 +61,7 @@ COUNT(*)
0
SELECT * FROM t2 ORDER BY a;
a b
+connection master;
DELETE FROM t1;
DELETE FROM t2;
@@ -71,7 +72,6 @@ INSERT INTO t2 SELECT * FROM t1;
INSERT INTO t2 VALUES (1, 't1, text 1') ON DUPLICATE KEY UPDATE b = 't2, text 1';
DELETE FROM t1 WHERE a = 2;
DELETE FROM t2 WHERE a = 2;
-==========MASTER==========
SELECT COUNT(*) FROM t1;
COUNT(*)
1
@@ -84,7 +84,7 @@ COUNT(*)
SELECT * FROM t2 ORDER BY a;
a b
1 t2, text 1
-==========SLAVE===========
+connection slave;
USE test_rpl;
SELECT COUNT(*) FROM t1;
COUNT(*)
@@ -98,6 +98,7 @@ COUNT(*)
SELECT * FROM t2 ORDER BY a;
a b
1 t2, text 1
+connection master;
DELETE FROM t1;
DELETE FROM t2;
@@ -108,7 +109,6 @@ a b
10 line A
20 line B
30 line C
-==========MASTER==========
SELECT COUNT(*) FROM t1;
COUNT(*)
3
@@ -122,7 +122,7 @@ COUNT(*)
0
SELECT * FROM t2 ORDER BY a;
a b
-==========SLAVE===========
+connection slave;
USE test_rpl;
SELECT COUNT(*) FROM t1;
COUNT(*)
@@ -137,6 +137,7 @@ COUNT(*)
0
SELECT * FROM t2 ORDER BY a;
a b
+connection master;
DELETE FROM t1;
DELETE FROM t2;
@@ -148,7 +149,6 @@ REPLACE INTO t1 VALUES(1, 't1, text 11');
REPLACE INTO t1 VALUES(2, UUID());
REPLACE INTO t1 SET a=3, b='t1, text 33';
DELETE FROM t1 WHERE a = 2;
-==========MASTER==========
SELECT COUNT(*) FROM t1;
COUNT(*)
2
@@ -161,7 +161,7 @@ COUNT(*)
0
SELECT * FROM t2 ORDER BY a;
a b
-==========SLAVE===========
+connection slave;
USE test_rpl;
SELECT COUNT(*) FROM t1;
COUNT(*)
@@ -175,6 +175,7 @@ COUNT(*)
0
SELECT * FROM t2 ORDER BY a;
a b
+connection master;
DELETE FROM t1;
DELETE FROM t2;
@@ -214,7 +215,6 @@ DELETE FROM t2;
******************** TRUNCATE ********************
INSERT INTO t1 VALUES(1, 't1, text 1');
-==========MASTER==========
SELECT COUNT(*) FROM t1;
COUNT(*)
1
@@ -226,7 +226,7 @@ COUNT(*)
0
SELECT * FROM t2 ORDER BY a;
a b
-==========SLAVE===========
+connection slave;
USE test_rpl;
SELECT COUNT(*) FROM t1;
COUNT(*)
@@ -239,8 +239,8 @@ COUNT(*)
0
SELECT * FROM t2 ORDER BY a;
a b
+connection master;
TRUNCATE t1;
-==========MASTER==========
SELECT COUNT(*) FROM t1;
COUNT(*)
0
@@ -251,7 +251,7 @@ COUNT(*)
0
SELECT * FROM t2 ORDER BY a;
a b
-==========SLAVE===========
+connection slave;
USE test_rpl;
SELECT COUNT(*) FROM t1;
COUNT(*)
@@ -263,6 +263,7 @@ COUNT(*)
0
SELECT * FROM t2 ORDER BY a;
a b
+connection master;
DELETE FROM t1;
DELETE FROM t2;
@@ -270,7 +271,6 @@ DELETE FROM t2;
INSERT INTO t1 VALUES(1, 't1, text 1');
INSERT INTO t2 VALUES(1, 't2, text 1');
UPDATE t1 SET b = 't1, text 1 updated' WHERE a = 1;
-==========MASTER==========
SELECT COUNT(*) FROM t1;
COUNT(*)
1
@@ -283,7 +283,7 @@ COUNT(*)
SELECT * FROM t2 ORDER BY a;
a b
1 t2, text 1
-==========SLAVE===========
+connection slave;
USE test_rpl;
SELECT COUNT(*) FROM t1;
COUNT(*)
@@ -297,8 +297,8 @@ COUNT(*)
SELECT * FROM t2 ORDER BY a;
a b
1 t2, text 1
+connection master;
UPDATE t1, t2 SET t1.b = 'test', t2.b = 'test';
-==========MASTER==========
SELECT COUNT(*) FROM t1;
COUNT(*)
1
@@ -311,7 +311,7 @@ COUNT(*)
SELECT * FROM t2 ORDER BY a;
a b
1 test
-==========SLAVE===========
+connection slave;
USE test_rpl;
SELECT COUNT(*) FROM t1;
COUNT(*)
@@ -325,6 +325,7 @@ COUNT(*)
SELECT * FROM t2 ORDER BY a;
a b
1 test
+connection master;
DELETE FROM t1;
DELETE FROM t2;
@@ -344,7 +345,6 @@ USE test_rpl;
START TRANSACTION;
INSERT INTO t1 VALUES (1, 'start');
COMMIT;
-==========MASTER==========
SELECT COUNT(*) FROM t1;
COUNT(*)
1
@@ -356,7 +356,7 @@ COUNT(*)
0
SELECT * FROM t2 ORDER BY a;
a b
-==========SLAVE===========
+connection slave;
USE test_rpl;
SELECT COUNT(*) FROM t1;
COUNT(*)
@@ -369,10 +369,10 @@ COUNT(*)
0
SELECT * FROM t2 ORDER BY a;
a b
+connection master;
START TRANSACTION;
INSERT INTO t1 VALUES (2, 'rollback');
ROLLBACK;
-==========MASTER==========
SELECT COUNT(*) FROM t1;
COUNT(*)
1
@@ -384,7 +384,7 @@ COUNT(*)
0
SELECT * FROM t2 ORDER BY a;
a b
-==========SLAVE===========
+connection slave;
USE test_rpl;
SELECT COUNT(*) FROM t1;
COUNT(*)
@@ -397,12 +397,12 @@ COUNT(*)
0
SELECT * FROM t2 ORDER BY a;
a b
+connection master;
START TRANSACTION;
INSERT INTO t1 VALUES (3, 'before savepoint s1');
SAVEPOINT s1;
INSERT INTO t1 VALUES (4, 'after savepoint s1');
ROLLBACK TO SAVEPOINT s1;
-==========MASTER==========
SELECT COUNT(*) FROM t1;
COUNT(*)
2
@@ -415,7 +415,7 @@ COUNT(*)
0
SELECT * FROM t2 ORDER BY a;
a b
-==========SLAVE===========
+connection slave;
USE test_rpl;
SELECT COUNT(*) FROM t1;
COUNT(*)
@@ -428,6 +428,7 @@ COUNT(*)
0
SELECT * FROM t2 ORDER BY a;
a b
+connection master;
START TRANSACTION;
INSERT INTO t1 VALUES (5, 'before savepoint s2');
SAVEPOINT s2;
@@ -436,7 +437,6 @@ INSERT INTO t1 VALUES (7, CONCAT('with UUID() ',UUID()));
RELEASE SAVEPOINT s2;
COMMIT;
DELETE FROM t1 WHERE a = 7;
-==========MASTER==========
SELECT COUNT(*) FROM t1;
COUNT(*)
4
@@ -451,7 +451,7 @@ COUNT(*)
0
SELECT * FROM t2 ORDER BY a;
a b
-==========SLAVE===========
+connection slave;
USE test_rpl;
SELECT COUNT(*) FROM t1;
COUNT(*)
@@ -467,6 +467,7 @@ COUNT(*)
0
SELECT * FROM t2 ORDER BY a;
a b
+connection master;
DELETE FROM t1;
DELETE FROM t2;
@@ -482,73 +483,73 @@ SET TRANSACTION ISOLATION LEVEL SERIALIZABLE;
******************** CREATE USER ********************
CREATE USER 'user_test_rpl'@'localhost' IDENTIFIED BY PASSWORD '*1111111111111111111111111111111111111111';
-==========MASTER==========
-SELECT host, user, password, select_priv FROM mysql.user WHERE user LIKE 'user_test_rpl%';
-host user password select_priv
-localhost user_test_rpl *1111111111111111111111111111111111111111 N
-==========SLAVE===========
+SELECT host, user, password, plugin, authentication_string, select_priv FROM mysql.user WHERE user LIKE 'user_test_rpl%';
+host user password plugin authentication_string select_priv
+localhost user_test_rpl *1111111111111111111111111111111111111111 N
+connection slave;
USE test_rpl;
-SELECT host, user, password, select_priv FROM mysql.user WHERE user LIKE 'user_test_rpl%';
-host user password select_priv
-localhost user_test_rpl *1111111111111111111111111111111111111111 N
+SELECT host, user, password, plugin, authentication_string, select_priv FROM mysql.user WHERE user LIKE 'user_test_rpl%';
+host user password plugin authentication_string select_priv
+localhost user_test_rpl *1111111111111111111111111111111111111111 N
+connection master;
******************** GRANT ********************
GRANT SELECT ON *.* TO 'user_test_rpl'@'localhost';
-==========MASTER==========
-SELECT host, user, password, select_priv FROM mysql.user WHERE user LIKE 'user_test_rpl%';
-host user password select_priv
-localhost user_test_rpl *1111111111111111111111111111111111111111 Y
-==========SLAVE===========
+SELECT host, user, password, plugin, authentication_string, select_priv FROM mysql.user WHERE user LIKE 'user_test_rpl%';
+host user password plugin authentication_string select_priv
+localhost user_test_rpl *1111111111111111111111111111111111111111 Y
+connection slave;
USE test_rpl;
-SELECT host, user, password, select_priv FROM mysql.user WHERE user LIKE 'user_test_rpl%';
-host user password select_priv
-localhost user_test_rpl *1111111111111111111111111111111111111111 Y
+SELECT host, user, password, plugin, authentication_string, select_priv FROM mysql.user WHERE user LIKE 'user_test_rpl%';
+host user password plugin authentication_string select_priv
+localhost user_test_rpl *1111111111111111111111111111111111111111 Y
+connection master;
******************** REVOKE ********************
REVOKE SELECT ON *.* FROM 'user_test_rpl'@'localhost';
-==========MASTER==========
-SELECT host, user, password, select_priv FROM mysql.user WHERE user LIKE 'user_test_rpl%';
-host user password select_priv
-localhost user_test_rpl *1111111111111111111111111111111111111111 N
-==========SLAVE===========
+SELECT host, user, password, plugin, authentication_string, select_priv FROM mysql.user WHERE user LIKE 'user_test_rpl%';
+host user password plugin authentication_string select_priv
+localhost user_test_rpl *1111111111111111111111111111111111111111 N
+connection slave;
USE test_rpl;
-SELECT host, user, password, select_priv FROM mysql.user WHERE user LIKE 'user_test_rpl%';
-host user password select_priv
-localhost user_test_rpl *1111111111111111111111111111111111111111 N
+SELECT host, user, password, plugin, authentication_string, select_priv FROM mysql.user WHERE user LIKE 'user_test_rpl%';
+host user password plugin authentication_string select_priv
+localhost user_test_rpl *1111111111111111111111111111111111111111 N
+connection master;
******************** SET PASSWORD ********************
SET PASSWORD FOR 'user_test_rpl'@'localhost' = '*0000000000000000000000000000000000000000';
-==========MASTER==========
-SELECT host, user, password, select_priv FROM mysql.user WHERE user LIKE 'user_test_rpl%';
-host user password select_priv
-localhost user_test_rpl *0000000000000000000000000000000000000000 N
-==========SLAVE===========
+SELECT host, user, password, plugin, authentication_string, select_priv FROM mysql.user WHERE user LIKE 'user_test_rpl%';
+host user password plugin authentication_string select_priv
+localhost user_test_rpl *0000000000000000000000000000000000000000 mysql_native_password *0000000000000000000000000000000000000000 N
+connection slave;
USE test_rpl;
-SELECT host, user, password, select_priv FROM mysql.user WHERE user LIKE 'user_test_rpl%';
-host user password select_priv
-localhost user_test_rpl *0000000000000000000000000000000000000000 N
+SELECT host, user, password, plugin, authentication_string, select_priv FROM mysql.user WHERE user LIKE 'user_test_rpl%';
+host user password plugin authentication_string select_priv
+localhost user_test_rpl *0000000000000000000000000000000000000000 mysql_native_password *0000000000000000000000000000000000000000 N
+connection master;
******************** RENAME USER ********************
RENAME USER 'user_test_rpl'@'localhost' TO 'user_test_rpl_2'@'localhost';
-==========MASTER==========
-SELECT host, user, password, select_priv FROM mysql.user WHERE user LIKE 'user_test_rpl%';
-host user password select_priv
-localhost user_test_rpl_2 *0000000000000000000000000000000000000000 N
-==========SLAVE===========
+SELECT host, user, password, plugin, authentication_string, select_priv FROM mysql.user WHERE user LIKE 'user_test_rpl%';
+host user password plugin authentication_string select_priv
+localhost user_test_rpl_2 *0000000000000000000000000000000000000000 mysql_native_password *0000000000000000000000000000000000000000 N
+connection slave;
USE test_rpl;
-SELECT host, user, password, select_priv FROM mysql.user WHERE user LIKE 'user_test_rpl%';
-host user password select_priv
-localhost user_test_rpl_2 *0000000000000000000000000000000000000000 N
+SELECT host, user, password, plugin, authentication_string, select_priv FROM mysql.user WHERE user LIKE 'user_test_rpl%';
+host user password plugin authentication_string select_priv
+localhost user_test_rpl_2 *0000000000000000000000000000000000000000 mysql_native_password *0000000000000000000000000000000000000000 N
+connection master;
******************** DROP USER ********************
DROP USER 'user_test_rpl_2'@'localhost';
-==========MASTER==========
-SELECT host, user, password, select_priv FROM mysql.user WHERE user LIKE 'user_test_rpl%';
-host user password select_priv
-==========SLAVE===========
+SELECT host, user, password, plugin, authentication_string, select_priv FROM mysql.user WHERE user LIKE 'user_test_rpl%';
+host user password plugin authentication_string select_priv
+connection slave;
USE test_rpl;
-SELECT host, user, password, select_priv FROM mysql.user WHERE user LIKE 'user_test_rpl%';
-host user password select_priv
+SELECT host, user, password, plugin, authentication_string, select_priv FROM mysql.user WHERE user LIKE 'user_test_rpl%';
+host user password plugin authentication_string select_priv
+connection master;
INSERT INTO t1 VALUES(100, 'test');
******************** ANALYZE ********************
@@ -583,14 +584,14 @@ SHOW VARIABLES LIKE 'test_rpl_var';
Variable_name Value
******************** SHOW ********************
-==========MASTER==========
SHOW DATABASES LIKE 'test_rpl%';
Database (test_rpl%)
test_rpl
-==========SLAVE===========
+connection slave;
SHOW DATABASES LIKE 'test_rpl%';
Database (test_rpl%)
test_rpl
+connection master;
******************** PROCEDURE ********************
CREATE PROCEDURE p1 ()
@@ -606,7 +607,6 @@ CALL p1();
INSERT INTO t1 VALUES(202, 'test 202');
CALL p2();
DELETE FROM t1 WHERE a = 202;
-==========MASTER==========
SELECT COUNT(*) FROM t1;
COUNT(*)
2
@@ -619,7 +619,7 @@ COUNT(*)
0
SELECT * FROM t2 ORDER BY a;
a b
-==========SLAVE===========
+connection slave;
USE test_rpl;
SELECT COUNT(*) FROM t1;
COUNT(*)
@@ -633,6 +633,7 @@ COUNT(*)
0
SELECT * FROM t2 ORDER BY a;
a b
+connection master;
ALTER PROCEDURE p1 COMMENT 'p1';
DROP PROCEDURE p1;
DROP PROCEDURE p2;
@@ -645,7 +646,6 @@ FOR EACH ROW BEGIN
INSERT INTO t2 SET a = NEW.a, b = NEW.b;
END|
INSERT INTO t1 VALUES (1, 'test');
-==========MASTER==========
SELECT COUNT(*) FROM t1;
COUNT(*)
1
@@ -658,7 +658,7 @@ COUNT(*)
SELECT * FROM t2 ORDER BY a;
a b
1 test
-==========SLAVE===========
+connection slave;
USE test_rpl;
SELECT COUNT(*) FROM t1;
COUNT(*)
@@ -672,6 +672,7 @@ COUNT(*)
SELECT * FROM t2 ORDER BY a;
a b
1 test
+connection master;
DELETE FROM t1;
DELETE FROM t2;
DROP TRIGGER tr1;
@@ -683,16 +684,15 @@ INSERT INTO t1 VALUES(1, 'test1');
CREATE EVENT e1 ON SCHEDULE EVERY '1' SECOND COMMENT 'e_second_comment' DO DELETE FROM t1;
Warnings:
Warning 1105 Event scheduler is switched off, use SET GLOBAL event_scheduler=ON to enable it.
-==========MASTER==========
SHOW EVENTS;
Db Name Definer Time zone Type Execute at Interval value Interval field Starts Ends Status Originator character_set_client collation_connection Database Collation
test_rpl e1 root@localhost SYSTEM RECURRING NULL 1 # # NULL ENABLED 1 latin1 latin1_swedish_ci latin1_swedish_ci
-==========SLAVE===========
+connection slave;
USE test_rpl;
SHOW EVENTS;
Db Name Definer Time zone Type Execute at Interval value Interval field Starts Ends Status Originator character_set_client collation_connection Database Collation
test_rpl e1 root@localhost SYSTEM RECURRING NULL 1 # # NULL SLAVESIDE_DISABLED 1 latin1 latin1_swedish_ci latin1_swedish_ci
-==========MASTER==========
+connection master;
SELECT COUNT(*) FROM t1;
COUNT(*)
1
@@ -704,7 +704,7 @@ COUNT(*)
0
SELECT * FROM t2 ORDER BY a;
a b
-==========SLAVE===========
+connection slave;
USE test_rpl;
SELECT COUNT(*) FROM t1;
COUNT(*)
@@ -717,7 +717,7 @@ COUNT(*)
0
SELECT * FROM t2 ORDER BY a;
a b
-==========MASTER==========
+connection master;
SELECT COUNT(*) FROM t1;
COUNT(*)
1
@@ -729,7 +729,7 @@ COUNT(*)
0
SELECT * FROM t2 ORDER BY a;
a b
-==========SLAVE===========
+connection slave;
USE test_rpl;
SELECT COUNT(*) FROM t1;
COUNT(*)
@@ -742,17 +742,17 @@ COUNT(*)
0
SELECT * FROM t2 ORDER BY a;
a b
+connection master;
ALTER EVENT e1 RENAME TO e2;
-==========MASTER==========
SHOW EVENTS;
Db Name Definer Time zone Type Execute at Interval value Interval field Starts Ends Status Originator character_set_client collation_connection Database Collation
test_rpl e2 root@localhost SYSTEM RECURRING NULL 1 # # NULL ENABLED 1 latin1 latin1_swedish_ci latin1_swedish_ci
-==========SLAVE===========
+connection slave;
USE test_rpl;
SHOW EVENTS;
Db Name Definer Time zone Type Execute at Interval value Interval field Starts Ends Status Originator character_set_client collation_connection Database Collation
test_rpl e2 root@localhost SYSTEM RECURRING NULL 1 # # NULL SLAVESIDE_DISABLED 1 latin1 latin1_swedish_ci latin1_swedish_ci
-==========MASTER==========
+connection master;
SELECT COUNT(*) FROM t1;
COUNT(*)
1
@@ -764,7 +764,7 @@ COUNT(*)
0
SELECT * FROM t2 ORDER BY a;
a b
-==========SLAVE===========
+connection slave;
USE test_rpl;
SELECT COUNT(*) FROM t1;
COUNT(*)
@@ -777,14 +777,15 @@ COUNT(*)
0
SELECT * FROM t2 ORDER BY a;
a b
+connection master;
DROP EVENT e2;
-==========MASTER==========
SHOW EVENTS;
Db Name Definer Time zone Type Execute at Interval value Interval field Starts Ends Status Originator character_set_client collation_connection Database Collation
-==========SLAVE===========
+connection slave;
USE test_rpl;
SHOW EVENTS;
Db Name Definer Time zone Type Execute at Interval value Interval field Starts Ends Status Originator character_set_client collation_connection Database Collation
+connection master;
DELETE FROM t1;
DELETE FROM t2;
@@ -793,37 +794,37 @@ INSERT INTO t1 VALUES(1, 'test1');
INSERT INTO t1 VALUES(2, 'test2');
CREATE VIEW v1 AS SELECT * FROM t1 WHERE a = 1;
CREATE VIEW v2 AS SELECT * FROM t1 WHERE b <> UUID();
-==========MASTER==========
SHOW CREATE VIEW v1;
View Create View character_set_client collation_connection
-v1 CREATE ALGORITHM=UNDEFINED DEFINER=`root`@`localhost` SQL SECURITY DEFINER VIEW `v1` AS select `t1`.`a` AS `a`,`t1`.`b` AS `b` from `t1` where (`t1`.`a` = 1) latin1 latin1_swedish_ci
+v1 CREATE ALGORITHM=UNDEFINED DEFINER=`root`@`localhost` SQL SECURITY DEFINER VIEW `v1` AS select `t1`.`a` AS `a`,`t1`.`b` AS `b` from `t1` where `t1`.`a` = 1 latin1 latin1_swedish_ci
SELECT * FROM v1 ORDER BY a;
a b
1 test1
-==========SLAVE===========
+connection slave;
USE test_rpl;
SHOW CREATE VIEW v1;
View Create View character_set_client collation_connection
-v1 CREATE ALGORITHM=UNDEFINED DEFINER=`root`@`localhost` SQL SECURITY DEFINER VIEW `v1` AS select `t1`.`a` AS `a`,`t1`.`b` AS `b` from `t1` where (`t1`.`a` = 1) latin1 latin1_swedish_ci
+v1 CREATE ALGORITHM=UNDEFINED DEFINER=`root`@`localhost` SQL SECURITY DEFINER VIEW `v1` AS select `t1`.`a` AS `a`,`t1`.`b` AS `b` from `t1` where `t1`.`a` = 1 latin1 latin1_swedish_ci
SELECT * FROM v1 ORDER BY a;
a b
1 test1
+connection master;
ALTER VIEW v1 AS SELECT * FROM t1 WHERE a = 2;
-==========MASTER==========
SHOW CREATE VIEW v1;
View Create View character_set_client collation_connection
-v1 CREATE ALGORITHM=UNDEFINED DEFINER=`root`@`localhost` SQL SECURITY DEFINER VIEW `v1` AS select `t1`.`a` AS `a`,`t1`.`b` AS `b` from `t1` where (`t1`.`a` = 2) latin1 latin1_swedish_ci
+v1 CREATE ALGORITHM=UNDEFINED DEFINER=`root`@`localhost` SQL SECURITY DEFINER VIEW `v1` AS select `t1`.`a` AS `a`,`t1`.`b` AS `b` from `t1` where `t1`.`a` = 2 latin1 latin1_swedish_ci
SELECT * FROM v1 ORDER BY a;
a b
2 test2
-==========SLAVE===========
+connection slave;
USE test_rpl;
SHOW CREATE VIEW v1;
View Create View character_set_client collation_connection
-v1 CREATE ALGORITHM=UNDEFINED DEFINER=`root`@`localhost` SQL SECURITY DEFINER VIEW `v1` AS select `t1`.`a` AS `a`,`t1`.`b` AS `b` from `t1` where (`t1`.`a` = 2) latin1 latin1_swedish_ci
+v1 CREATE ALGORITHM=UNDEFINED DEFINER=`root`@`localhost` SQL SECURITY DEFINER VIEW `v1` AS select `t1`.`a` AS `a`,`t1`.`b` AS `b` from `t1` where `t1`.`a` = 2 latin1 latin1_swedish_ci
SELECT * FROM v1 ORDER BY a;
a b
2 test2
+connection master;
DROP VIEW v1;
DROP VIEW v2;
DELETE FROM t1;
@@ -852,6 +853,7 @@ master-bin.000001 # Gtid # # BEGIN GTID #-#-#
master-bin.000001 # Query # # use `test_rpl`; DELETE FROM t1 WHERE a = 1
master-bin.000001 # Xid # # COMMIT /* XID */
master-bin.000001 # Gtid # # BEGIN GTID #-#-#
+master-bin.000001 # Annotate_rows # # DELETE FROM t2 WHERE b <> UUID()
master-bin.000001 # Table_map # # table_id: # (test_rpl.t2)
master-bin.000001 # Delete_rows_v1 # # table_id: # flags: STMT_END_F
master-bin.000001 # Xid # # COMMIT /* XID */
@@ -865,10 +867,12 @@ master-bin.000001 # Gtid # # BEGIN GTID #-#-#
master-bin.000001 # Query # # use `test_rpl`; INSERT INTO t1 VALUES(1, 't1, text 1')
master-bin.000001 # Xid # # COMMIT /* XID */
master-bin.000001 # Gtid # # BEGIN GTID #-#-#
+master-bin.000001 # Annotate_rows # # INSERT INTO t1 VALUES(2, UUID())
master-bin.000001 # Table_map # # table_id: # (test_rpl.t1)
master-bin.000001 # Write_rows_v1 # # table_id: # flags: STMT_END_F
master-bin.000001 # Xid # # COMMIT /* XID */
master-bin.000001 # Gtid # # BEGIN GTID #-#-#
+master-bin.000001 # Annotate_rows # # INSERT INTO t2 SELECT * FROM t1
master-bin.000001 # Table_map # # table_id: # (test_rpl.t2)
master-bin.000001 # Write_rows_v1 # # table_id: # flags: STMT_END_F
master-bin.000001 # Xid # # COMMIT /* XID */
@@ -888,6 +892,7 @@ master-bin.000001 # Gtid # # BEGIN GTID #-#-#
master-bin.000001 # Query # # use `test_rpl`; DELETE FROM t2
master-bin.000001 # Xid # # COMMIT /* XID */
master-bin.000001 # Gtid # # BEGIN GTID #-#-#
+master-bin.000001 # Annotate_rows # # LOAD DATA INFILE 'MYSQLTEST_VARDIR/std_data/rpl_mixed.dat' INTO TABLE t1 FIELDS TERMINATED BY '|'
master-bin.000001 # Table_map # # table_id: # (test_rpl.t1)
master-bin.000001 # Write_rows_v1 # # table_id: # flags: STMT_END_F
master-bin.000001 # Xid # # COMMIT /* XID */
@@ -910,6 +915,7 @@ master-bin.000001 # Gtid # # BEGIN GTID #-#-#
master-bin.000001 # Query # # use `test_rpl`; REPLACE INTO t1 VALUES(1, 't1, text 11')
master-bin.000001 # Xid # # COMMIT /* XID */
master-bin.000001 # Gtid # # BEGIN GTID #-#-#
+master-bin.000001 # Annotate_rows # # REPLACE INTO t1 VALUES(2, UUID())
master-bin.000001 # Table_map # # table_id: # (test_rpl.t1)
master-bin.000001 # Update_rows_v1 # # table_id: # flags: STMT_END_F
master-bin.000001 # Xid # # COMMIT /* XID */
@@ -1004,6 +1010,7 @@ master-bin.000001 # Gtid # # BEGIN GTID #-#-#
master-bin.000001 # Query # # use `test_rpl`; INSERT INTO t1 VALUES (5, 'before savepoint s2')
master-bin.000001 # Query # # SAVEPOINT `s2`
master-bin.000001 # Query # # use `test_rpl`; INSERT INTO t1 VALUES (6, 'after savepoint s2')
+master-bin.000001 # Annotate_rows # # INSERT INTO t1 VALUES (7, CONCAT('with UUID() ',UUID()))
master-bin.000001 # Table_map # # table_id: # (test_rpl.t1)
master-bin.000001 # Write_rows_v1 # # table_id: # flags: STMT_END_F
master-bin.000001 # Xid # # COMMIT /* XID */
@@ -1057,6 +1064,7 @@ master-bin.000001 # Gtid # # BEGIN GTID #-#-#
master-bin.000001 # Query # # use `test_rpl`; INSERT INTO t1 VALUES(202, 'test 202')
master-bin.000001 # Xid # # COMMIT /* XID */
master-bin.000001 # Gtid # # BEGIN GTID #-#-#
+master-bin.000001 # Annotate_rows # # UPDATE t1 SET b = UUID() WHERE a = 202
master-bin.000001 # Table_map # # table_id: # (test_rpl.t1)
master-bin.000001 # Update_rows_v1 # # table_id: # flags: STMT_END_F
master-bin.000001 # Xid # # COMMIT /* XID */
@@ -1081,6 +1089,7 @@ FOR EACH ROW BEGIN
INSERT INTO t2 SET a = NEW.a, b = NEW.b;
END
master-bin.000001 # Gtid # # BEGIN GTID #-#-#
+master-bin.000001 # Annotate_rows # # INSERT INTO t2 SET a = NEW.a, b = NEW.b
master-bin.000001 # Table_map # # table_id: # (test_rpl.t1)
master-bin.000001 # Table_map # # table_id: # (test_rpl.t2)
master-bin.000001 # Write_rows_v1 # # table_id: #
@@ -1133,5 +1142,8 @@ master-bin.000001 # Xid # # COMMIT /* XID */
master-bin.000001 # Gtid # # BEGIN GTID #-#-#
master-bin.000001 # Query # # use `test_rpl`; DELETE FROM t2
master-bin.000001 # Xid # # COMMIT /* XID */
+connection slave;
+connection master;
drop database test_rpl;
+connection slave;
include/rpl_end.inc
diff --git a/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_multi_update.result b/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_multi_update.result
index 89236dcea4e..1036e753961 100644
--- a/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_multi_update.result
+++ b/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_multi_update.result
@@ -21,5 +21,7 @@ a b
1 0
2 1
UPDATE t1, t2 SET t1.b = t2.b WHERE t1.a = t2.a;
+connection slave;
+connection master;
drop table t1, t2;
include/rpl_end.inc
diff --git a/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_multi_update2.result b/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_multi_update2.result
index df5fac74cf0..e51eac13783 100644
--- a/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_multi_update2.result
+++ b/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_multi_update2.result
@@ -30,6 +30,7 @@ SELECT * FROM t2 ORDER BY a;
a b
1 0
2 1
+connection slave;
SELECT * FROM t1 ORDER BY a;
a b
1 4
@@ -38,16 +39,23 @@ SELECT * FROM t2 ORDER BY a;
a b
1 0
2 1
+connection master;
drop table t1,t2;
+connection slave;
reset master;
+connection master;
CREATE TABLE t1 ( a INT );
INSERT INTO t1 VALUES (0);
UPDATE t1, (SELECT 3 as b) AS x SET t1.a = x.b;
select * from t1;
a
3
+connection slave;
+connection slave;
select * from t1;
a
3
+connection master;
drop table t1;
+connection slave;
include/rpl_end.inc
diff --git a/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_multi_update3.result b/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_multi_update3.result
index bcad07eb0b6..aae924f2ae8 100644
--- a/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_multi_update3.result
+++ b/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_multi_update3.result
@@ -31,6 +31,8 @@ SELECT * FROM t2 ORDER BY a;
a b
1 6
2 6
+connection slave;
+connection slave;
SELECT * FROM t1 ORDER BY a;
a b
1 0
@@ -39,9 +41,11 @@ SELECT * FROM t2 ORDER BY a;
a b
1 6
2 6
+connection master;
drop table t1,t2;
-------- Test 1 for BUG#9361 --------
+connection master;
DROP TABLE IF EXISTS t1;
DROP TABLE IF EXISTS t2;
CREATE TABLE t1 (
@@ -66,15 +70,19 @@ WHERE b2 = 'baz')
AND a.a3 IS NULL
AND a.a4 = 'foo'
AND a.a5 = 'bar';
+connection slave;
+connection slave;
SELECT * FROM t1;
a1 a2 a3 a4 a5
No 1 NULL foo bar
SELECT * FROM t2;
b1 b2
1 baz
+connection master;
DROP TABLE t1, t2;
-------- Test 2 for BUG#9361 --------
+connection master;
DROP TABLE IF EXISTS t1;
DROP TABLE IF EXISTS t2;
DROP TABLE IF EXISTS t3;
@@ -115,9 +123,12 @@ FROM t3
WHERE y = 34
)
WHERE b.x = 23;
+connection slave;
+connection slave;
SELECT * FROM t1;
i j x y z
1 2 23 24 71
+connection master;
DROP TABLE t1, t2, t3;
DROP TABLE IF EXISTS t1;
Warnings:
@@ -175,6 +186,7 @@ idpro price nbprice
1 1.0000 3
2 1.0000 2
3 2.0000 1
+connection slave;
select "-- SLAVE AFTER JOIN --" as "";
-- SLAVE AFTER JOIN --
@@ -191,5 +203,6 @@ idpro price nbprice
1 1.0000 3
2 1.0000 2
3 2.0000 1
+connection master;
DROP TABLE t1, t2;
include/rpl_end.inc
diff --git a/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_read_only_ft.result b/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_read_only_ft.result
index c81a2ebac44..593f177569f 100644
--- a/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_read_only_ft.result
+++ b/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_read_only_ft.result
@@ -1,6 +1,8 @@
include/master-slave.inc
[connection master]
+connection master;
drop table if exists t;
+connection slave;
show variables like 'tokudb_rpl%';
Variable_name Value
tokudb_rpl_check_readonly ON
@@ -8,14 +10,21 @@ tokudb_rpl_lookup_rows ON
tokudb_rpl_lookup_rows_delay 0
tokudb_rpl_unique_checks OFF
tokudb_rpl_unique_checks_delay 5000
+connection master;
create table t (a bigint not null, primary key(a)) engine=tokudb;
select unix_timestamp() into @tstart;
insert into t values (1);
insert into t values (2),(3);
insert into t values (4);
+connection slave;
+connection master;
select unix_timestamp()-@tstart <= 10;
unix_timestamp()-@tstart <= 10
0
+connection slave;
+connection master;
include/diff_tables.inc [master:test.t, slave:test.t]
+connection master;
drop table if exists t;
+connection slave;
include/rpl_end.inc
diff --git a/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_row_crash_safe.result b/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_row_crash_safe.result
deleted file mode 100644
index 04578c3ff68..00000000000
--- a/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_row_crash_safe.result
+++ /dev/null
@@ -1,2183 +0,0 @@
-include/master-slave.inc
-Warnings:
-Note #### Sending passwords in plain text without SSL/TLS is extremely insecure.
-Note #### Storing MySQL user name or password information in the master info repository is not secure and is therefore not recommended. Please consider using the USER and PASSWORD connection options for START SLAVE; see the 'START SLAVE Syntax' in the MySQL Manual for more information.
-[connection master]
-call mtr.add_suppression('Attempting backtrace');
-call mtr.add_suppression("Recovery from master pos .* and file master-bin.000001");
-call mtr.add_suppression("Unsafe statement written to the binary log using statement format since BINLOG_FORMAT = STATEMENT");
-call mtr.add_suppression(".* InnoDB: Warning: allocated tablespace .*, old maximum was .*");
-###################################################################################
-# PREPARE EXECUTION
-###################################################################################
-include/stop_slave.inc
-SHOW CREATE TABLE mysql.slave_relay_log_info;
-Table Create Table
-slave_relay_log_info CREATE TABLE `slave_relay_log_info` (
- `Number_of_lines` int(10) unsigned NOT NULL COMMENT 'Number of lines in the file or rows in the table. Used to version table definitions.',
- `Relay_log_name` text CHARACTER SET utf8 COLLATE utf8_bin NOT NULL COMMENT 'The name of the current relay log file.',
- `Relay_log_pos` bigint(20) unsigned NOT NULL COMMENT 'The relay log position of the last executed event.',
- `Master_log_name` text CHARACTER SET utf8 COLLATE utf8_bin NOT NULL COMMENT 'The name of the master binary log file from which the events in the relay log file were read.',
- `Master_log_pos` bigint(20) unsigned NOT NULL COMMENT 'The master log position of the last executed event.',
- `Sql_delay` int(11) NOT NULL COMMENT 'The number of seconds that the slave must lag behind the master.',
- `Number_of_workers` int(10) unsigned NOT NULL,
- `Id` int(10) unsigned NOT NULL COMMENT 'Internal Id that uniquely identifies this record.',
- PRIMARY KEY (`Id`)
-) ENGINE=InnoDB DEFAULT CHARSET=utf8 STATS_PERSISTENT=0 COMMENT='Relay Log Information'
-SHOW CREATE TABLE mysql.slave_worker_info;
-Table Create Table
-slave_worker_info CREATE TABLE `slave_worker_info` (
- `Id` int(10) unsigned NOT NULL,
- `Relay_log_name` text CHARACTER SET utf8 COLLATE utf8_bin NOT NULL,
- `Relay_log_pos` bigint(20) unsigned NOT NULL,
- `Master_log_name` text CHARACTER SET utf8 COLLATE utf8_bin NOT NULL,
- `Master_log_pos` bigint(20) unsigned NOT NULL,
- `Checkpoint_relay_log_name` text CHARACTER SET utf8 COLLATE utf8_bin NOT NULL,
- `Checkpoint_relay_log_pos` bigint(20) unsigned NOT NULL,
- `Checkpoint_master_log_name` text CHARACTER SET utf8 COLLATE utf8_bin NOT NULL,
- `Checkpoint_master_log_pos` bigint(20) unsigned NOT NULL,
- `Checkpoint_seqno` int(10) unsigned NOT NULL,
- `Checkpoint_group_size` int(10) unsigned NOT NULL,
- `Checkpoint_group_bitmap` blob NOT NULL,
- PRIMARY KEY (`Id`)
-) ENGINE=InnoDB DEFAULT CHARSET=utf8 STATS_PERSISTENT=0 COMMENT='Worker Information'
-ALTER TABLE mysql.slave_relay_log_info ENGINE= Innodb;
-ALTER TABLE mysql.slave_worker_info ENGINE= Innodb;
-SHOW CREATE TABLE mysql.slave_relay_log_info;
-Table Create Table
-slave_relay_log_info CREATE TABLE `slave_relay_log_info` (
- `Number_of_lines` int(10) unsigned NOT NULL COMMENT 'Number of lines in the file or rows in the table. Used to version table definitions.',
- `Relay_log_name` text CHARACTER SET utf8 COLLATE utf8_bin NOT NULL COMMENT 'The name of the current relay log file.',
- `Relay_log_pos` bigint(20) unsigned NOT NULL COMMENT 'The relay log position of the last executed event.',
- `Master_log_name` text CHARACTER SET utf8 COLLATE utf8_bin NOT NULL COMMENT 'The name of the master binary log file from which the events in the relay log file were read.',
- `Master_log_pos` bigint(20) unsigned NOT NULL COMMENT 'The master log position of the last executed event.',
- `Sql_delay` int(11) NOT NULL COMMENT 'The number of seconds that the slave must lag behind the master.',
- `Number_of_workers` int(10) unsigned NOT NULL,
- `Id` int(10) unsigned NOT NULL COMMENT 'Internal Id that uniquely identifies this record.',
- PRIMARY KEY (`Id`)
-) ENGINE=InnoDB DEFAULT CHARSET=utf8 STATS_PERSISTENT=0 COMMENT='Relay Log Information'
-SHOW CREATE TABLE mysql.slave_worker_info;
-Table Create Table
-slave_worker_info CREATE TABLE `slave_worker_info` (
- `Id` int(10) unsigned NOT NULL,
- `Relay_log_name` text CHARACTER SET utf8 COLLATE utf8_bin NOT NULL,
- `Relay_log_pos` bigint(20) unsigned NOT NULL,
- `Master_log_name` text CHARACTER SET utf8 COLLATE utf8_bin NOT NULL,
- `Master_log_pos` bigint(20) unsigned NOT NULL,
- `Checkpoint_relay_log_name` text CHARACTER SET utf8 COLLATE utf8_bin NOT NULL,
- `Checkpoint_relay_log_pos` bigint(20) unsigned NOT NULL,
- `Checkpoint_master_log_name` text CHARACTER SET utf8 COLLATE utf8_bin NOT NULL,
- `Checkpoint_master_log_pos` bigint(20) unsigned NOT NULL,
- `Checkpoint_seqno` int(10) unsigned NOT NULL,
- `Checkpoint_group_size` int(10) unsigned NOT NULL,
- `Checkpoint_group_bitmap` blob NOT NULL,
- PRIMARY KEY (`Id`)
-) ENGINE=InnoDB DEFAULT CHARSET=utf8 STATS_PERSISTENT=0 COMMENT='Worker Information'
-include/start_slave.inc
-rpl_mixing_engines.inc [commands=configure]
-CREATE TABLE nt_1 (trans_id INT, stmt_id INT, info VARCHAR(64), PRIMARY KEY(trans_id, stmt_id)) ENGINE = MyISAM;
-CREATE TABLE nt_2 (trans_id INT, stmt_id INT, info VARCHAR(64), PRIMARY KEY(trans_id, stmt_id)) ENGINE = MyISAM;
-CREATE TABLE nt_3 (trans_id INT, stmt_id INT, info VARCHAR(64), PRIMARY KEY(trans_id, stmt_id)) ENGINE = MyISAM;
-CREATE TABLE nt_4 (trans_id INT, stmt_id INT, info VARCHAR(64), PRIMARY KEY(trans_id, stmt_id)) ENGINE = MyISAM;
-CREATE TABLE nt_5 (trans_id INT, stmt_id INT, info VARCHAR(64), PRIMARY KEY(trans_id, stmt_id)) ENGINE = MyISAM;
-CREATE TABLE nt_6 (trans_id INT, stmt_id INT, info VARCHAR(64), PRIMARY KEY(trans_id, stmt_id)) ENGINE = MyISAM;
-CREATE TABLE tt_1 (trans_id INT, stmt_id INT, info VARCHAR(64), PRIMARY KEY(trans_id, stmt_id)) ENGINE = TokuDB;
-CREATE TABLE tt_2 (trans_id INT, stmt_id INT, info VARCHAR(64), PRIMARY KEY(trans_id, stmt_id)) ENGINE = TokuDB;
-CREATE TABLE tt_3 (trans_id INT, stmt_id INT, info VARCHAR(64), PRIMARY KEY(trans_id, stmt_id)) ENGINE = TokuDB;
-CREATE TABLE tt_4 (trans_id INT, stmt_id INT, info VARCHAR(64), PRIMARY KEY(trans_id, stmt_id)) ENGINE = TokuDB;
-CREATE TABLE tt_5 (trans_id INT, stmt_id INT, info VARCHAR(64), PRIMARY KEY(trans_id, stmt_id)) ENGINE = TokuDB;
-CREATE TABLE tt_6 (trans_id INT, stmt_id INT, info VARCHAR(64), PRIMARY KEY(trans_id, stmt_id)) ENGINE = TokuDB;
-INSERT INTO nt_1(trans_id, stmt_id) VALUES(1,1);
-INSERT INTO nt_2(trans_id, stmt_id) VALUES(1,1);
-INSERT INTO nt_3(trans_id, stmt_id) VALUES(1,1);
-INSERT INTO nt_4(trans_id, stmt_id) VALUES(1,1);
-INSERT INTO nt_5(trans_id, stmt_id) VALUES(1,1);
-INSERT INTO nt_6(trans_id, stmt_id) VALUES(1,1);
-INSERT INTO tt_1(trans_id, stmt_id) VALUES(1,1);
-INSERT INTO tt_2(trans_id, stmt_id) VALUES(1,1);
-INSERT INTO tt_3(trans_id, stmt_id) VALUES(1,1);
-INSERT INTO tt_4(trans_id, stmt_id) VALUES(1,1);
-INSERT INTO tt_5(trans_id, stmt_id) VALUES(1,1);
-INSERT INTO tt_6(trans_id, stmt_id) VALUES(1,1);
-CREATE PROCEDURE pc_i_tt_5_suc (IN p_trans_id INTEGER, IN p_stmt_id INTEGER)
-BEGIN
-DECLARE in_stmt_id INTEGER;
-SELECT max(stmt_id) INTO in_stmt_id FROM tt_5 WHERE trans_id= p_trans_id;
-SELECT COALESCE(greatest(in_stmt_id + 1, p_stmt_id), 1) INTO in_stmt_id;
-INSERT INTO tt_5(trans_id, stmt_id) VALUES (p_trans_id, in_stmt_id);
-INSERT INTO tt_5(trans_id, stmt_id) VALUES (p_trans_id, in_stmt_id + 1);
-END|
-CREATE PROCEDURE pc_i_nt_5_suc (IN p_trans_id INTEGER, IN p_stmt_id INTEGER)
-BEGIN
-DECLARE in_stmt_id INTEGER;
-SELECT max(stmt_id) INTO in_stmt_id FROM nt_5 WHERE trans_id= p_trans_id;
-SELECT COALESCE(greatest(in_stmt_id + 1, p_stmt_id), 1) INTO in_stmt_id;
-INSERT INTO nt_5(trans_id, stmt_id) VALUES (p_trans_id, in_stmt_id);
-INSERT INTO nt_5(trans_id, stmt_id) VALUES (p_trans_id, in_stmt_id + 1);
-END|
-CREATE FUNCTION fc_i_tt_5_suc (p_trans_id INTEGER, p_stmt_id INTEGER) RETURNS VARCHAR(64)
-BEGIN
-DECLARE in_stmt_id INTEGER;
-SELECT max(stmt_id) INTO in_stmt_id FROM tt_5 WHERE trans_id= p_trans_id;
-SELECT COALESCE(greatest(in_stmt_id + 1, p_stmt_id), 1) INTO in_stmt_id;
-INSERT INTO tt_5(trans_id, stmt_id) VALUES (p_trans_id, in_stmt_id);
-INSERT INTO tt_5(trans_id, stmt_id) VALUES (p_trans_id, in_stmt_id + 1);
-RETURN "fc_i_tt_5_suc";
-END|
-CREATE FUNCTION fc_i_nt_5_suc (p_trans_id INTEGER, p_stmt_id INTEGER) RETURNS VARCHAR(64)
-BEGIN
-DECLARE in_stmt_id INTEGER;
-SELECT max(stmt_id) INTO in_stmt_id FROM nt_5 WHERE trans_id= p_trans_id;
-SELECT COALESCE(greatest(in_stmt_id + 1, p_stmt_id), 1) INTO in_stmt_id;
-INSERT INTO nt_5(trans_id, stmt_id) VALUES (p_trans_id, in_stmt_id);
-INSERT INTO nt_5(trans_id, stmt_id) VALUES (p_trans_id, in_stmt_id + 1);
-RETURN "fc_i_nt_5_suc";
-END|
-CREATE FUNCTION fc_i_nt_3_tt_3_suc (p_trans_id INTEGER, p_stmt_id INTEGER) RETURNS VARCHAR(64)
-BEGIN
-DECLARE in_stmt_id INTEGER;
-SELECT max(stmt_id) INTO in_stmt_id FROM nt_3 WHERE trans_id= p_trans_id;
-SELECT COALESCE(greatest(in_stmt_id + 1, p_stmt_id), 1) INTO in_stmt_id;
-INSERT INTO nt_3(trans_id, stmt_id) VALUES (p_trans_id, in_stmt_id);
-SELECT max(stmt_id) INTO in_stmt_id FROM tt_3 WHERE trans_id= p_trans_id;
-SELECT COALESCE(greatest(in_stmt_id + 1, p_stmt_id), 1) INTO in_stmt_id;
-INSERT INTO tt_3(trans_id, stmt_id) VALUES (p_trans_id, in_stmt_id);
-RETURN "fc_i_nt_3_tt_3_suc";
-END|
-CREATE TRIGGER tr_i_tt_3_to_nt_3 AFTER INSERT ON tt_3 FOR EACH ROW
-BEGIN
-DECLARE in_stmt_id INTEGER;
-SELECT max(stmt_id) INTO in_stmt_id FROM nt_3 WHERE trans_id= NEW.trans_id;
-SELECT COALESCE(greatest(in_stmt_id + 1, NEW.stmt_id), 1) INTO in_stmt_id;
-INSERT INTO nt_3(trans_id, stmt_id) VALUES (NEW.trans_id, in_stmt_id);
-INSERT INTO nt_3(trans_id, stmt_id) VALUES (NEW.trans_id, in_stmt_id + 1);
-END|
-CREATE TRIGGER tr_i_nt_4_to_tt_4 AFTER INSERT ON nt_4 FOR EACH ROW
-BEGIN
-DECLARE in_stmt_id INTEGER;
-SELECT max(stmt_id) INTO in_stmt_id FROM tt_4 WHERE trans_id= NEW.trans_id;
-SELECT COALESCE(greatest(in_stmt_id + 1, NEW.stmt_id), 1) INTO in_stmt_id;
-INSERT INTO tt_4(trans_id, stmt_id) VALUES (NEW.trans_id, in_stmt_id);
-INSERT INTO tt_4(trans_id, stmt_id) VALUES (NEW.trans_id, in_stmt_id + 1);
-END|
-CREATE TRIGGER tr_i_tt_5_to_tt_6 AFTER INSERT ON tt_5 FOR EACH ROW
-BEGIN
-DECLARE in_stmt_id INTEGER;
-SELECT max(stmt_id) INTO in_stmt_id FROM tt_6 WHERE trans_id= NEW.trans_id;
-SELECT COALESCE(greatest(in_stmt_id + 1, NEW.stmt_id, 1), 1) INTO in_stmt_id;
-INSERT INTO tt_6(trans_id, stmt_id) VALUES (NEW.trans_id, in_stmt_id);
-INSERT INTO tt_6(trans_id, stmt_id) VALUES (NEW.trans_id, in_stmt_id + 1);
-END|
-CREATE TRIGGER tr_i_nt_5_to_nt_6 AFTER INSERT ON nt_5 FOR EACH ROW
-BEGIN
-DECLARE in_stmt_id INTEGER;
-SELECT max(stmt_id) INTO in_stmt_id FROM nt_6 WHERE trans_id= NEW.trans_id;
-SELECT COALESCE(greatest(in_stmt_id + 1, NEW.stmt_id), 1) INTO in_stmt_id;
-INSERT INTO nt_6(trans_id, stmt_id) VALUES (NEW.trans_id, in_stmt_id);
-INSERT INTO nt_6(trans_id, stmt_id) VALUES (NEW.trans_id, in_stmt_id + 1);
-END|
-###################################################################################
-# EXECUTE CASES CRASHING THE XID
-###################################################################################
-
-
-
-STOP SLAVE SQL_THREAD;
-include/wait_for_slave_sql_to_stop.inc
-SET GLOBAL debug="d,crash_after_apply";;
-FAILURE d,crash_after_apply and OUTCOME O2
-rpl_mixing_engines.inc [commands=T]
--b-b-b-b-b-b-b-b-b-b-b- >> T << -b-b-b-b-b-b-b-b-b-b-b-
-INSERT INTO tt_1(trans_id, stmt_id) VALUES (7, 1);
-include/show_binlog_events.inc
-Log_name Pos Event_type Server_id End_log_pos Info
-master-bin.000001 # Query # # BEGIN
-master-bin.000001 # Table_map # # table_id: # (test.tt_1)
-master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
-master-bin.000001 # Xid # # COMMIT /* XID */
--e-e-e-e-e-e-e-e-e-e-e- >> T << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> T << -b-b-b-b-b-b-b-b-b-b-b-
-include/show_binlog_events.inc
-Log_name Pos Event_type Server_id End_log_pos Info
-master-bin.000001 # Query # # BEGIN
-master-bin.000001 # Table_map # # table_id: # (test.tt_1)
-master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
-master-bin.000001 # Xid # # COMMIT /* XID */
--e-e-e-e-e-e-e-e-e-e-e- >> T << -e-e-e-e-e-e-e-e-e-e-e-
-
-include/rpl_reconnect.inc
-START SLAVE;
-include/wait_for_slave_to_start.inc
-
-
-
-STOP SLAVE SQL_THREAD;
-include/wait_for_slave_sql_to_stop.inc
-SET GLOBAL debug="d,crash_after_apply";;
-FAILURE d,crash_after_apply and OUTCOME O2
-rpl_mixing_engines.inc [commands=T-trig]
--b-b-b-b-b-b-b-b-b-b-b- >> T-trig << -b-b-b-b-b-b-b-b-b-b-b-
-INSERT INTO tt_5(trans_id, stmt_id) VALUES (8, 1);
-include/show_binlog_events.inc
-Log_name Pos Event_type Server_id End_log_pos Info
-master-bin.000001 # Query # # BEGIN
-master-bin.000001 # Table_map # # table_id: # (test.tt_5)
-master-bin.000001 # Table_map # # table_id: # (test.tt_6)
-master-bin.000001 # Write_rows # # table_id: #
-master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
-master-bin.000001 # Xid # # COMMIT /* XID */
--e-e-e-e-e-e-e-e-e-e-e- >> T-trig << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> T-trig << -b-b-b-b-b-b-b-b-b-b-b-
-include/show_binlog_events.inc
-Log_name Pos Event_type Server_id End_log_pos Info
-master-bin.000001 # Query # # BEGIN
-master-bin.000001 # Table_map # # table_id: # (test.tt_5)
-master-bin.000001 # Table_map # # table_id: # (test.tt_6)
-master-bin.000001 # Write_rows # # table_id: #
-master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
-master-bin.000001 # Xid # # COMMIT /* XID */
--e-e-e-e-e-e-e-e-e-e-e- >> T-trig << -e-e-e-e-e-e-e-e-e-e-e-
-
-include/rpl_reconnect.inc
-START SLAVE;
-include/wait_for_slave_to_start.inc
-
-
-
-STOP SLAVE SQL_THREAD;
-include/wait_for_slave_sql_to_stop.inc
-SET GLOBAL debug="d,crash_after_apply";;
-FAILURE d,crash_after_apply and OUTCOME O2
-rpl_mixing_engines.inc [commands=T-func]
--b-b-b-b-b-b-b-b-b-b-b- >> T-func << -b-b-b-b-b-b-b-b-b-b-b-
-SELECT fc_i_tt_5_suc (9, 1);
-fc_i_tt_5_suc (9, 1)
-fc_i_tt_5_suc
-include/show_binlog_events.inc
-Log_name Pos Event_type Server_id End_log_pos Info
-master-bin.000001 # Query # # BEGIN
-master-bin.000001 # Table_map # # table_id: # (test.tt_5)
-master-bin.000001 # Table_map # # table_id: # (test.tt_6)
-master-bin.000001 # Write_rows # # table_id: #
-master-bin.000001 # Write_rows # # table_id: #
-master-bin.000001 # Write_rows # # table_id: #
-master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
-master-bin.000001 # Xid # # COMMIT /* XID */
--e-e-e-e-e-e-e-e-e-e-e- >> T-func << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> T-func << -b-b-b-b-b-b-b-b-b-b-b-
-include/show_binlog_events.inc
-Log_name Pos Event_type Server_id End_log_pos Info
-master-bin.000001 # Query # # BEGIN
-master-bin.000001 # Table_map # # table_id: # (test.tt_5)
-master-bin.000001 # Table_map # # table_id: # (test.tt_6)
-master-bin.000001 # Write_rows # # table_id: #
-master-bin.000001 # Write_rows # # table_id: #
-master-bin.000001 # Write_rows # # table_id: #
-master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
-master-bin.000001 # Xid # # COMMIT /* XID */
--e-e-e-e-e-e-e-e-e-e-e- >> T-func << -e-e-e-e-e-e-e-e-e-e-e-
-
-include/rpl_reconnect.inc
-START SLAVE;
-include/wait_for_slave_to_start.inc
-
-
-
-STOP SLAVE SQL_THREAD;
-include/wait_for_slave_sql_to_stop.inc
-SET GLOBAL debug="d,crash_after_apply";;
-FAILURE d,crash_after_apply and OUTCOME O2
-rpl_mixing_engines.inc [commands=T-proc]
--b-b-b-b-b-b-b-b-b-b-b- >> T-proc << -b-b-b-b-b-b-b-b-b-b-b-
-CALL pc_i_tt_5_suc (10, 1);
-include/show_binlog_events.inc
-Log_name Pos Event_type Server_id End_log_pos Info
-master-bin.000001 # Query # # BEGIN
-master-bin.000001 # Table_map # # table_id: # (test.tt_5)
-master-bin.000001 # Table_map # # table_id: # (test.tt_6)
-master-bin.000001 # Write_rows # # table_id: #
-master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
-master-bin.000001 # Xid # # COMMIT /* XID */
-master-bin.000001 # Query # # BEGIN
-master-bin.000001 # Table_map # # table_id: # (test.tt_5)
-master-bin.000001 # Table_map # # table_id: # (test.tt_6)
-master-bin.000001 # Write_rows # # table_id: #
-master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
-master-bin.000001 # Xid # # COMMIT /* XID */
--e-e-e-e-e-e-e-e-e-e-e- >> T-proc << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> T-proc << -b-b-b-b-b-b-b-b-b-b-b-
-include/show_binlog_events.inc
-Log_name Pos Event_type Server_id End_log_pos Info
-master-bin.000001 # Query # # BEGIN
-master-bin.000001 # Table_map # # table_id: # (test.tt_5)
-master-bin.000001 # Table_map # # table_id: # (test.tt_6)
-master-bin.000001 # Write_rows # # table_id: #
-master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
-master-bin.000001 # Xid # # COMMIT /* XID */
-master-bin.000001 # Query # # BEGIN
-master-bin.000001 # Table_map # # table_id: # (test.tt_5)
-master-bin.000001 # Table_map # # table_id: # (test.tt_6)
-master-bin.000001 # Write_rows # # table_id: #
-master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
-master-bin.000001 # Xid # # COMMIT /* XID */
--e-e-e-e-e-e-e-e-e-e-e- >> T-proc << -e-e-e-e-e-e-e-e-e-e-e-
-
-include/rpl_reconnect.inc
-START SLAVE;
-include/wait_for_slave_to_start.inc
-
-
-
-STOP SLAVE SQL_THREAD;
-include/wait_for_slave_sql_to_stop.inc
-SET GLOBAL debug="d,crash_after_apply";;
-FAILURE d,crash_after_apply and OUTCOME O2
-rpl_mixing_engines.inc [commands=B T T-trig C]
--b-b-b-b-b-b-b-b-b-b-b- >> B << -b-b-b-b-b-b-b-b-b-b-b-
-BEGIN;
-include/show_binlog_events.inc
--e-e-e-e-e-e-e-e-e-e-e- >> B << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> T << -b-b-b-b-b-b-b-b-b-b-b-
-INSERT INTO tt_1(trans_id, stmt_id) VALUES (11, 2);
-include/show_binlog_events.inc
--e-e-e-e-e-e-e-e-e-e-e- >> T << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> T-trig << -b-b-b-b-b-b-b-b-b-b-b-
-INSERT INTO tt_5(trans_id, stmt_id) VALUES (11, 4);
-include/show_binlog_events.inc
--e-e-e-e-e-e-e-e-e-e-e- >> T-trig << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> C << -b-b-b-b-b-b-b-b-b-b-b-
-COMMIT;
-include/show_binlog_events.inc
-Log_name Pos Event_type Server_id End_log_pos Info
-master-bin.000001 # Query # # BEGIN
-master-bin.000001 # Table_map # # table_id: # (test.tt_1)
-master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
-master-bin.000001 # Table_map # # table_id: # (test.tt_5)
-master-bin.000001 # Table_map # # table_id: # (test.tt_6)
-master-bin.000001 # Write_rows # # table_id: #
-master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
-master-bin.000001 # Xid # # COMMIT /* XID */
--e-e-e-e-e-e-e-e-e-e-e- >> C << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> B T T-trig C << -b-b-b-b-b-b-b-b-b-b-b-
-include/show_binlog_events.inc
-Log_name Pos Event_type Server_id End_log_pos Info
-master-bin.000001 # Query # # BEGIN
-master-bin.000001 # Table_map # # table_id: # (test.tt_1)
-master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
-master-bin.000001 # Table_map # # table_id: # (test.tt_5)
-master-bin.000001 # Table_map # # table_id: # (test.tt_6)
-master-bin.000001 # Write_rows # # table_id: #
-master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
-master-bin.000001 # Xid # # COMMIT /* XID */
--e-e-e-e-e-e-e-e-e-e-e- >> B T T-trig C << -e-e-e-e-e-e-e-e-e-e-e-
-
-include/rpl_reconnect.inc
-START SLAVE;
-include/wait_for_slave_to_start.inc
-
-
-
-STOP SLAVE SQL_THREAD;
-include/wait_for_slave_sql_to_stop.inc
-SET GLOBAL debug="d,crash_after_apply";;
-FAILURE d,crash_after_apply and OUTCOME O2
-rpl_mixing_engines.inc [commands=B T T-func C]
--b-b-b-b-b-b-b-b-b-b-b- >> B << -b-b-b-b-b-b-b-b-b-b-b-
-BEGIN;
-include/show_binlog_events.inc
--e-e-e-e-e-e-e-e-e-e-e- >> B << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> T << -b-b-b-b-b-b-b-b-b-b-b-
-INSERT INTO tt_1(trans_id, stmt_id) VALUES (12, 2);
-include/show_binlog_events.inc
--e-e-e-e-e-e-e-e-e-e-e- >> T << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> T-func << -b-b-b-b-b-b-b-b-b-b-b-
-SELECT fc_i_tt_5_suc (12, 4);
-fc_i_tt_5_suc (12, 4)
-fc_i_tt_5_suc
-include/show_binlog_events.inc
--e-e-e-e-e-e-e-e-e-e-e- >> T-func << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> C << -b-b-b-b-b-b-b-b-b-b-b-
-COMMIT;
-include/show_binlog_events.inc
-Log_name Pos Event_type Server_id End_log_pos Info
-master-bin.000001 # Query # # BEGIN
-master-bin.000001 # Table_map # # table_id: # (test.tt_1)
-master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
-master-bin.000001 # Table_map # # table_id: # (test.tt_5)
-master-bin.000001 # Table_map # # table_id: # (test.tt_6)
-master-bin.000001 # Write_rows # # table_id: #
-master-bin.000001 # Write_rows # # table_id: #
-master-bin.000001 # Write_rows # # table_id: #
-master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
-master-bin.000001 # Xid # # COMMIT /* XID */
--e-e-e-e-e-e-e-e-e-e-e- >> C << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> B T T-func C << -b-b-b-b-b-b-b-b-b-b-b-
-include/show_binlog_events.inc
-Log_name Pos Event_type Server_id End_log_pos Info
-master-bin.000001 # Query # # BEGIN
-master-bin.000001 # Table_map # # table_id: # (test.tt_1)
-master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
-master-bin.000001 # Table_map # # table_id: # (test.tt_5)
-master-bin.000001 # Table_map # # table_id: # (test.tt_6)
-master-bin.000001 # Write_rows # # table_id: #
-master-bin.000001 # Write_rows # # table_id: #
-master-bin.000001 # Write_rows # # table_id: #
-master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
-master-bin.000001 # Xid # # COMMIT /* XID */
--e-e-e-e-e-e-e-e-e-e-e- >> B T T-func C << -e-e-e-e-e-e-e-e-e-e-e-
-
-include/rpl_reconnect.inc
-START SLAVE;
-include/wait_for_slave_to_start.inc
-
-
-
-STOP SLAVE SQL_THREAD;
-include/wait_for_slave_sql_to_stop.inc
-SET GLOBAL debug="d,crash_after_apply";;
-FAILURE d,crash_after_apply and OUTCOME O2
-rpl_mixing_engines.inc [commands=B T T-proc C]
--b-b-b-b-b-b-b-b-b-b-b- >> B << -b-b-b-b-b-b-b-b-b-b-b-
-BEGIN;
-include/show_binlog_events.inc
--e-e-e-e-e-e-e-e-e-e-e- >> B << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> T << -b-b-b-b-b-b-b-b-b-b-b-
-INSERT INTO tt_1(trans_id, stmt_id) VALUES (13, 2);
-include/show_binlog_events.inc
--e-e-e-e-e-e-e-e-e-e-e- >> T << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> T-proc << -b-b-b-b-b-b-b-b-b-b-b-
-CALL pc_i_tt_5_suc (13, 4);
-include/show_binlog_events.inc
--e-e-e-e-e-e-e-e-e-e-e- >> T-proc << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> C << -b-b-b-b-b-b-b-b-b-b-b-
-COMMIT;
-include/show_binlog_events.inc
-Log_name Pos Event_type Server_id End_log_pos Info
-master-bin.000001 # Query # # BEGIN
-master-bin.000001 # Table_map # # table_id: # (test.tt_1)
-master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
-master-bin.000001 # Table_map # # table_id: # (test.tt_5)
-master-bin.000001 # Table_map # # table_id: # (test.tt_6)
-master-bin.000001 # Write_rows # # table_id: #
-master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
-master-bin.000001 # Table_map # # table_id: # (test.tt_5)
-master-bin.000001 # Table_map # # table_id: # (test.tt_6)
-master-bin.000001 # Write_rows # # table_id: #
-master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
-master-bin.000001 # Xid # # COMMIT /* XID */
--e-e-e-e-e-e-e-e-e-e-e- >> C << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> B T T-proc C << -b-b-b-b-b-b-b-b-b-b-b-
-include/show_binlog_events.inc
-Log_name Pos Event_type Server_id End_log_pos Info
-master-bin.000001 # Query # # BEGIN
-master-bin.000001 # Table_map # # table_id: # (test.tt_1)
-master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
-master-bin.000001 # Table_map # # table_id: # (test.tt_5)
-master-bin.000001 # Table_map # # table_id: # (test.tt_6)
-master-bin.000001 # Write_rows # # table_id: #
-master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
-master-bin.000001 # Table_map # # table_id: # (test.tt_5)
-master-bin.000001 # Table_map # # table_id: # (test.tt_6)
-master-bin.000001 # Write_rows # # table_id: #
-master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
-master-bin.000001 # Xid # # COMMIT /* XID */
--e-e-e-e-e-e-e-e-e-e-e- >> B T T-proc C << -e-e-e-e-e-e-e-e-e-e-e-
-
-include/rpl_reconnect.inc
-START SLAVE;
-include/wait_for_slave_to_start.inc
-
-
-
-STOP SLAVE SQL_THREAD;
-include/wait_for_slave_sql_to_stop.inc
-SET GLOBAL debug="d,crash_after_apply";;
-FAILURE d,crash_after_apply and OUTCOME O2
-rpl_mixing_engines.inc [commands=B T-trig T C]
--b-b-b-b-b-b-b-b-b-b-b- >> B << -b-b-b-b-b-b-b-b-b-b-b-
-BEGIN;
-include/show_binlog_events.inc
--e-e-e-e-e-e-e-e-e-e-e- >> B << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> T-trig << -b-b-b-b-b-b-b-b-b-b-b-
-INSERT INTO tt_5(trans_id, stmt_id) VALUES (14, 2);
-include/show_binlog_events.inc
--e-e-e-e-e-e-e-e-e-e-e- >> T-trig << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> T << -b-b-b-b-b-b-b-b-b-b-b-
-INSERT INTO tt_1(trans_id, stmt_id) VALUES (14, 4);
-include/show_binlog_events.inc
--e-e-e-e-e-e-e-e-e-e-e- >> T << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> C << -b-b-b-b-b-b-b-b-b-b-b-
-COMMIT;
-include/show_binlog_events.inc
-Log_name Pos Event_type Server_id End_log_pos Info
-master-bin.000001 # Query # # BEGIN
-master-bin.000001 # Table_map # # table_id: # (test.tt_5)
-master-bin.000001 # Table_map # # table_id: # (test.tt_6)
-master-bin.000001 # Write_rows # # table_id: #
-master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
-master-bin.000001 # Table_map # # table_id: # (test.tt_1)
-master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
-master-bin.000001 # Xid # # COMMIT /* XID */
--e-e-e-e-e-e-e-e-e-e-e- >> C << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> B T-trig T C << -b-b-b-b-b-b-b-b-b-b-b-
-include/show_binlog_events.inc
-Log_name Pos Event_type Server_id End_log_pos Info
-master-bin.000001 # Query # # BEGIN
-master-bin.000001 # Table_map # # table_id: # (test.tt_5)
-master-bin.000001 # Table_map # # table_id: # (test.tt_6)
-master-bin.000001 # Write_rows # # table_id: #
-master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
-master-bin.000001 # Table_map # # table_id: # (test.tt_1)
-master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
-master-bin.000001 # Xid # # COMMIT /* XID */
--e-e-e-e-e-e-e-e-e-e-e- >> B T-trig T C << -e-e-e-e-e-e-e-e-e-e-e-
-
-include/rpl_reconnect.inc
-START SLAVE;
-include/wait_for_slave_to_start.inc
-
-
-
-STOP SLAVE SQL_THREAD;
-include/wait_for_slave_sql_to_stop.inc
-SET GLOBAL debug="d,crash_after_apply";;
-FAILURE d,crash_after_apply and OUTCOME O2
-rpl_mixing_engines.inc [commands=B T-func T C]
--b-b-b-b-b-b-b-b-b-b-b- >> B << -b-b-b-b-b-b-b-b-b-b-b-
-BEGIN;
-include/show_binlog_events.inc
--e-e-e-e-e-e-e-e-e-e-e- >> B << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> T-func << -b-b-b-b-b-b-b-b-b-b-b-
-SELECT fc_i_tt_5_suc (15, 2);
-fc_i_tt_5_suc (15, 2)
-fc_i_tt_5_suc
-include/show_binlog_events.inc
--e-e-e-e-e-e-e-e-e-e-e- >> T-func << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> T << -b-b-b-b-b-b-b-b-b-b-b-
-INSERT INTO tt_1(trans_id, stmt_id) VALUES (15, 4);
-include/show_binlog_events.inc
--e-e-e-e-e-e-e-e-e-e-e- >> T << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> C << -b-b-b-b-b-b-b-b-b-b-b-
-COMMIT;
-include/show_binlog_events.inc
-Log_name Pos Event_type Server_id End_log_pos Info
-master-bin.000001 # Query # # BEGIN
-master-bin.000001 # Table_map # # table_id: # (test.tt_5)
-master-bin.000001 # Table_map # # table_id: # (test.tt_6)
-master-bin.000001 # Write_rows # # table_id: #
-master-bin.000001 # Write_rows # # table_id: #
-master-bin.000001 # Write_rows # # table_id: #
-master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
-master-bin.000001 # Table_map # # table_id: # (test.tt_1)
-master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
-master-bin.000001 # Xid # # COMMIT /* XID */
--e-e-e-e-e-e-e-e-e-e-e- >> C << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> B T-func T C << -b-b-b-b-b-b-b-b-b-b-b-
-include/show_binlog_events.inc
-Log_name Pos Event_type Server_id End_log_pos Info
-master-bin.000001 # Query # # BEGIN
-master-bin.000001 # Table_map # # table_id: # (test.tt_5)
-master-bin.000001 # Table_map # # table_id: # (test.tt_6)
-master-bin.000001 # Write_rows # # table_id: #
-master-bin.000001 # Write_rows # # table_id: #
-master-bin.000001 # Write_rows # # table_id: #
-master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
-master-bin.000001 # Table_map # # table_id: # (test.tt_1)
-master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
-master-bin.000001 # Xid # # COMMIT /* XID */
--e-e-e-e-e-e-e-e-e-e-e- >> B T-func T C << -e-e-e-e-e-e-e-e-e-e-e-
-
-include/rpl_reconnect.inc
-START SLAVE;
-include/wait_for_slave_to_start.inc
-
-
-
-STOP SLAVE SQL_THREAD;
-include/wait_for_slave_sql_to_stop.inc
-SET GLOBAL debug="d,crash_after_apply";;
-FAILURE d,crash_after_apply and OUTCOME O2
-rpl_mixing_engines.inc [commands=B T-proc T C]
--b-b-b-b-b-b-b-b-b-b-b- >> B << -b-b-b-b-b-b-b-b-b-b-b-
-BEGIN;
-include/show_binlog_events.inc
--e-e-e-e-e-e-e-e-e-e-e- >> B << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> T-proc << -b-b-b-b-b-b-b-b-b-b-b-
-CALL pc_i_tt_5_suc (16, 2);
-include/show_binlog_events.inc
--e-e-e-e-e-e-e-e-e-e-e- >> T-proc << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> T << -b-b-b-b-b-b-b-b-b-b-b-
-INSERT INTO tt_1(trans_id, stmt_id) VALUES (16, 4);
-include/show_binlog_events.inc
--e-e-e-e-e-e-e-e-e-e-e- >> T << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> C << -b-b-b-b-b-b-b-b-b-b-b-
-COMMIT;
-include/show_binlog_events.inc
-Log_name Pos Event_type Server_id End_log_pos Info
-master-bin.000001 # Query # # BEGIN
-master-bin.000001 # Table_map # # table_id: # (test.tt_5)
-master-bin.000001 # Table_map # # table_id: # (test.tt_6)
-master-bin.000001 # Write_rows # # table_id: #
-master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
-master-bin.000001 # Table_map # # table_id: # (test.tt_5)
-master-bin.000001 # Table_map # # table_id: # (test.tt_6)
-master-bin.000001 # Write_rows # # table_id: #
-master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
-master-bin.000001 # Table_map # # table_id: # (test.tt_1)
-master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
-master-bin.000001 # Xid # # COMMIT /* XID */
--e-e-e-e-e-e-e-e-e-e-e- >> C << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> B T-proc T C << -b-b-b-b-b-b-b-b-b-b-b-
-include/show_binlog_events.inc
-Log_name Pos Event_type Server_id End_log_pos Info
-master-bin.000001 # Query # # BEGIN
-master-bin.000001 # Table_map # # table_id: # (test.tt_5)
-master-bin.000001 # Table_map # # table_id: # (test.tt_6)
-master-bin.000001 # Write_rows # # table_id: #
-master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
-master-bin.000001 # Table_map # # table_id: # (test.tt_5)
-master-bin.000001 # Table_map # # table_id: # (test.tt_6)
-master-bin.000001 # Write_rows # # table_id: #
-master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
-master-bin.000001 # Table_map # # table_id: # (test.tt_1)
-master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
-master-bin.000001 # Xid # # COMMIT /* XID */
--e-e-e-e-e-e-e-e-e-e-e- >> B T-proc T C << -e-e-e-e-e-e-e-e-e-e-e-
-
-include/rpl_reconnect.inc
-START SLAVE;
-include/wait_for_slave_to_start.inc
-
-
-
-STOP SLAVE SQL_THREAD;
-include/wait_for_slave_sql_to_stop.inc
-SET GLOBAL debug="d,crash_before_update_pos";;
-FAILURE d,crash_before_update_pos and OUTCOME O1
-rpl_mixing_engines.inc [commands=T]
--b-b-b-b-b-b-b-b-b-b-b- >> T << -b-b-b-b-b-b-b-b-b-b-b-
-INSERT INTO tt_1(trans_id, stmt_id) VALUES (17, 1);
-include/show_binlog_events.inc
-Log_name Pos Event_type Server_id End_log_pos Info
-master-bin.000001 # Query # # BEGIN
-master-bin.000001 # Table_map # # table_id: # (test.tt_1)
-master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
-master-bin.000001 # Xid # # COMMIT /* XID */
--e-e-e-e-e-e-e-e-e-e-e- >> T << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> T << -b-b-b-b-b-b-b-b-b-b-b-
-include/show_binlog_events.inc
-Log_name Pos Event_type Server_id End_log_pos Info
-master-bin.000001 # Query # # BEGIN
-master-bin.000001 # Table_map # # table_id: # (test.tt_1)
-master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
-master-bin.000001 # Xid # # COMMIT /* XID */
--e-e-e-e-e-e-e-e-e-e-e- >> T << -e-e-e-e-e-e-e-e-e-e-e-
-
-include/rpl_reconnect.inc
-START SLAVE;
-include/wait_for_slave_to_start.inc
-
-
-
-STOP SLAVE SQL_THREAD;
-include/wait_for_slave_sql_to_stop.inc
-SET GLOBAL debug="d,crash_before_update_pos";;
-FAILURE d,crash_before_update_pos and OUTCOME O1
-rpl_mixing_engines.inc [commands=T-trig]
--b-b-b-b-b-b-b-b-b-b-b- >> T-trig << -b-b-b-b-b-b-b-b-b-b-b-
-INSERT INTO tt_5(trans_id, stmt_id) VALUES (18, 1);
-include/show_binlog_events.inc
-Log_name Pos Event_type Server_id End_log_pos Info
-master-bin.000001 # Query # # BEGIN
-master-bin.000001 # Table_map # # table_id: # (test.tt_5)
-master-bin.000001 # Table_map # # table_id: # (test.tt_6)
-master-bin.000001 # Write_rows # # table_id: #
-master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
-master-bin.000001 # Xid # # COMMIT /* XID */
--e-e-e-e-e-e-e-e-e-e-e- >> T-trig << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> T-trig << -b-b-b-b-b-b-b-b-b-b-b-
-include/show_binlog_events.inc
-Log_name Pos Event_type Server_id End_log_pos Info
-master-bin.000001 # Query # # BEGIN
-master-bin.000001 # Table_map # # table_id: # (test.tt_5)
-master-bin.000001 # Table_map # # table_id: # (test.tt_6)
-master-bin.000001 # Write_rows # # table_id: #
-master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
-master-bin.000001 # Xid # # COMMIT /* XID */
--e-e-e-e-e-e-e-e-e-e-e- >> T-trig << -e-e-e-e-e-e-e-e-e-e-e-
-
-include/rpl_reconnect.inc
-START SLAVE;
-include/wait_for_slave_to_start.inc
-
-
-
-STOP SLAVE SQL_THREAD;
-include/wait_for_slave_sql_to_stop.inc
-SET GLOBAL debug="d,crash_before_update_pos";;
-FAILURE d,crash_before_update_pos and OUTCOME O1
-rpl_mixing_engines.inc [commands=T-func]
--b-b-b-b-b-b-b-b-b-b-b- >> T-func << -b-b-b-b-b-b-b-b-b-b-b-
-SELECT fc_i_tt_5_suc (19, 1);
-fc_i_tt_5_suc (19, 1)
-fc_i_tt_5_suc
-include/show_binlog_events.inc
-Log_name Pos Event_type Server_id End_log_pos Info
-master-bin.000001 # Query # # BEGIN
-master-bin.000001 # Table_map # # table_id: # (test.tt_5)
-master-bin.000001 # Table_map # # table_id: # (test.tt_6)
-master-bin.000001 # Write_rows # # table_id: #
-master-bin.000001 # Write_rows # # table_id: #
-master-bin.000001 # Write_rows # # table_id: #
-master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
-master-bin.000001 # Xid # # COMMIT /* XID */
--e-e-e-e-e-e-e-e-e-e-e- >> T-func << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> T-func << -b-b-b-b-b-b-b-b-b-b-b-
-include/show_binlog_events.inc
-Log_name Pos Event_type Server_id End_log_pos Info
-master-bin.000001 # Query # # BEGIN
-master-bin.000001 # Table_map # # table_id: # (test.tt_5)
-master-bin.000001 # Table_map # # table_id: # (test.tt_6)
-master-bin.000001 # Write_rows # # table_id: #
-master-bin.000001 # Write_rows # # table_id: #
-master-bin.000001 # Write_rows # # table_id: #
-master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
-master-bin.000001 # Xid # # COMMIT /* XID */
--e-e-e-e-e-e-e-e-e-e-e- >> T-func << -e-e-e-e-e-e-e-e-e-e-e-
-
-include/rpl_reconnect.inc
-START SLAVE;
-include/wait_for_slave_to_start.inc
-
-
-
-STOP SLAVE SQL_THREAD;
-include/wait_for_slave_sql_to_stop.inc
-SET GLOBAL debug="d,crash_before_update_pos";;
-FAILURE d,crash_before_update_pos and OUTCOME O1
-rpl_mixing_engines.inc [commands=T-proc]
--b-b-b-b-b-b-b-b-b-b-b- >> T-proc << -b-b-b-b-b-b-b-b-b-b-b-
-CALL pc_i_tt_5_suc (20, 1);
-include/show_binlog_events.inc
-Log_name Pos Event_type Server_id End_log_pos Info
-master-bin.000001 # Query # # BEGIN
-master-bin.000001 # Table_map # # table_id: # (test.tt_5)
-master-bin.000001 # Table_map # # table_id: # (test.tt_6)
-master-bin.000001 # Write_rows # # table_id: #
-master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
-master-bin.000001 # Xid # # COMMIT /* XID */
-master-bin.000001 # Query # # BEGIN
-master-bin.000001 # Table_map # # table_id: # (test.tt_5)
-master-bin.000001 # Table_map # # table_id: # (test.tt_6)
-master-bin.000001 # Write_rows # # table_id: #
-master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
-master-bin.000001 # Xid # # COMMIT /* XID */
--e-e-e-e-e-e-e-e-e-e-e- >> T-proc << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> T-proc << -b-b-b-b-b-b-b-b-b-b-b-
-include/show_binlog_events.inc
-Log_name Pos Event_type Server_id End_log_pos Info
-master-bin.000001 # Query # # BEGIN
-master-bin.000001 # Table_map # # table_id: # (test.tt_5)
-master-bin.000001 # Table_map # # table_id: # (test.tt_6)
-master-bin.000001 # Write_rows # # table_id: #
-master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
-master-bin.000001 # Xid # # COMMIT /* XID */
-master-bin.000001 # Query # # BEGIN
-master-bin.000001 # Table_map # # table_id: # (test.tt_5)
-master-bin.000001 # Table_map # # table_id: # (test.tt_6)
-master-bin.000001 # Write_rows # # table_id: #
-master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
-master-bin.000001 # Xid # # COMMIT /* XID */
--e-e-e-e-e-e-e-e-e-e-e- >> T-proc << -e-e-e-e-e-e-e-e-e-e-e-
-
-include/rpl_reconnect.inc
-START SLAVE;
-include/wait_for_slave_to_start.inc
-
-
-
-STOP SLAVE SQL_THREAD;
-include/wait_for_slave_sql_to_stop.inc
-SET GLOBAL debug="d,crash_before_update_pos";;
-FAILURE d,crash_before_update_pos and OUTCOME O1
-rpl_mixing_engines.inc [commands=B T T-trig C]
--b-b-b-b-b-b-b-b-b-b-b- >> B << -b-b-b-b-b-b-b-b-b-b-b-
-BEGIN;
-include/show_binlog_events.inc
--e-e-e-e-e-e-e-e-e-e-e- >> B << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> T << -b-b-b-b-b-b-b-b-b-b-b-
-INSERT INTO tt_1(trans_id, stmt_id) VALUES (21, 2);
-include/show_binlog_events.inc
--e-e-e-e-e-e-e-e-e-e-e- >> T << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> T-trig << -b-b-b-b-b-b-b-b-b-b-b-
-INSERT INTO tt_5(trans_id, stmt_id) VALUES (21, 4);
-include/show_binlog_events.inc
--e-e-e-e-e-e-e-e-e-e-e- >> T-trig << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> C << -b-b-b-b-b-b-b-b-b-b-b-
-COMMIT;
-include/show_binlog_events.inc
-Log_name Pos Event_type Server_id End_log_pos Info
-master-bin.000001 # Query # # BEGIN
-master-bin.000001 # Table_map # # table_id: # (test.tt_1)
-master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
-master-bin.000001 # Table_map # # table_id: # (test.tt_5)
-master-bin.000001 # Table_map # # table_id: # (test.tt_6)
-master-bin.000001 # Write_rows # # table_id: #
-master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
-master-bin.000001 # Xid # # COMMIT /* XID */
--e-e-e-e-e-e-e-e-e-e-e- >> C << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> B T T-trig C << -b-b-b-b-b-b-b-b-b-b-b-
-include/show_binlog_events.inc
-Log_name Pos Event_type Server_id End_log_pos Info
-master-bin.000001 # Query # # BEGIN
-master-bin.000001 # Table_map # # table_id: # (test.tt_1)
-master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
-master-bin.000001 # Table_map # # table_id: # (test.tt_5)
-master-bin.000001 # Table_map # # table_id: # (test.tt_6)
-master-bin.000001 # Write_rows # # table_id: #
-master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
-master-bin.000001 # Xid # # COMMIT /* XID */
--e-e-e-e-e-e-e-e-e-e-e- >> B T T-trig C << -e-e-e-e-e-e-e-e-e-e-e-
-
-include/rpl_reconnect.inc
-START SLAVE;
-include/wait_for_slave_to_start.inc
-
-
-
-STOP SLAVE SQL_THREAD;
-include/wait_for_slave_sql_to_stop.inc
-SET GLOBAL debug="d,crash_before_update_pos";;
-FAILURE d,crash_before_update_pos and OUTCOME O1
-rpl_mixing_engines.inc [commands=B T T-func C]
--b-b-b-b-b-b-b-b-b-b-b- >> B << -b-b-b-b-b-b-b-b-b-b-b-
-BEGIN;
-include/show_binlog_events.inc
--e-e-e-e-e-e-e-e-e-e-e- >> B << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> T << -b-b-b-b-b-b-b-b-b-b-b-
-INSERT INTO tt_1(trans_id, stmt_id) VALUES (22, 2);
-include/show_binlog_events.inc
--e-e-e-e-e-e-e-e-e-e-e- >> T << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> T-func << -b-b-b-b-b-b-b-b-b-b-b-
-SELECT fc_i_tt_5_suc (22, 4);
-fc_i_tt_5_suc (22, 4)
-fc_i_tt_5_suc
-include/show_binlog_events.inc
--e-e-e-e-e-e-e-e-e-e-e- >> T-func << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> C << -b-b-b-b-b-b-b-b-b-b-b-
-COMMIT;
-include/show_binlog_events.inc
-Log_name Pos Event_type Server_id End_log_pos Info
-master-bin.000001 # Query # # BEGIN
-master-bin.000001 # Table_map # # table_id: # (test.tt_1)
-master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
-master-bin.000001 # Table_map # # table_id: # (test.tt_5)
-master-bin.000001 # Table_map # # table_id: # (test.tt_6)
-master-bin.000001 # Write_rows # # table_id: #
-master-bin.000001 # Write_rows # # table_id: #
-master-bin.000001 # Write_rows # # table_id: #
-master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
-master-bin.000001 # Xid # # COMMIT /* XID */
--e-e-e-e-e-e-e-e-e-e-e- >> C << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> B T T-func C << -b-b-b-b-b-b-b-b-b-b-b-
-include/show_binlog_events.inc
-Log_name Pos Event_type Server_id End_log_pos Info
-master-bin.000001 # Query # # BEGIN
-master-bin.000001 # Table_map # # table_id: # (test.tt_1)
-master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
-master-bin.000001 # Table_map # # table_id: # (test.tt_5)
-master-bin.000001 # Table_map # # table_id: # (test.tt_6)
-master-bin.000001 # Write_rows # # table_id: #
-master-bin.000001 # Write_rows # # table_id: #
-master-bin.000001 # Write_rows # # table_id: #
-master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
-master-bin.000001 # Xid # # COMMIT /* XID */
--e-e-e-e-e-e-e-e-e-e-e- >> B T T-func C << -e-e-e-e-e-e-e-e-e-e-e-
-
-include/rpl_reconnect.inc
-START SLAVE;
-include/wait_for_slave_to_start.inc
-
-
-
-STOP SLAVE SQL_THREAD;
-include/wait_for_slave_sql_to_stop.inc
-SET GLOBAL debug="d,crash_before_update_pos";;
-FAILURE d,crash_before_update_pos and OUTCOME O1
-rpl_mixing_engines.inc [commands=B T T-proc C]
--b-b-b-b-b-b-b-b-b-b-b- >> B << -b-b-b-b-b-b-b-b-b-b-b-
-BEGIN;
-include/show_binlog_events.inc
--e-e-e-e-e-e-e-e-e-e-e- >> B << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> T << -b-b-b-b-b-b-b-b-b-b-b-
-INSERT INTO tt_1(trans_id, stmt_id) VALUES (23, 2);
-include/show_binlog_events.inc
--e-e-e-e-e-e-e-e-e-e-e- >> T << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> T-proc << -b-b-b-b-b-b-b-b-b-b-b-
-CALL pc_i_tt_5_suc (23, 4);
-include/show_binlog_events.inc
--e-e-e-e-e-e-e-e-e-e-e- >> T-proc << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> C << -b-b-b-b-b-b-b-b-b-b-b-
-COMMIT;
-include/show_binlog_events.inc
-Log_name Pos Event_type Server_id End_log_pos Info
-master-bin.000001 # Query # # BEGIN
-master-bin.000001 # Table_map # # table_id: # (test.tt_1)
-master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
-master-bin.000001 # Table_map # # table_id: # (test.tt_5)
-master-bin.000001 # Table_map # # table_id: # (test.tt_6)
-master-bin.000001 # Write_rows # # table_id: #
-master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
-master-bin.000001 # Table_map # # table_id: # (test.tt_5)
-master-bin.000001 # Table_map # # table_id: # (test.tt_6)
-master-bin.000001 # Write_rows # # table_id: #
-master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
-master-bin.000001 # Xid # # COMMIT /* XID */
--e-e-e-e-e-e-e-e-e-e-e- >> C << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> B T T-proc C << -b-b-b-b-b-b-b-b-b-b-b-
-include/show_binlog_events.inc
-Log_name Pos Event_type Server_id End_log_pos Info
-master-bin.000001 # Query # # BEGIN
-master-bin.000001 # Table_map # # table_id: # (test.tt_1)
-master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
-master-bin.000001 # Table_map # # table_id: # (test.tt_5)
-master-bin.000001 # Table_map # # table_id: # (test.tt_6)
-master-bin.000001 # Write_rows # # table_id: #
-master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
-master-bin.000001 # Table_map # # table_id: # (test.tt_5)
-master-bin.000001 # Table_map # # table_id: # (test.tt_6)
-master-bin.000001 # Write_rows # # table_id: #
-master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
-master-bin.000001 # Xid # # COMMIT /* XID */
--e-e-e-e-e-e-e-e-e-e-e- >> B T T-proc C << -e-e-e-e-e-e-e-e-e-e-e-
-
-include/rpl_reconnect.inc
-START SLAVE;
-include/wait_for_slave_to_start.inc
-
-
-
-STOP SLAVE SQL_THREAD;
-include/wait_for_slave_sql_to_stop.inc
-SET GLOBAL debug="d,crash_before_update_pos";;
-FAILURE d,crash_before_update_pos and OUTCOME O1
-rpl_mixing_engines.inc [commands=B T-trig T C]
--b-b-b-b-b-b-b-b-b-b-b- >> B << -b-b-b-b-b-b-b-b-b-b-b-
-BEGIN;
-include/show_binlog_events.inc
--e-e-e-e-e-e-e-e-e-e-e- >> B << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> T-trig << -b-b-b-b-b-b-b-b-b-b-b-
-INSERT INTO tt_5(trans_id, stmt_id) VALUES (24, 2);
-include/show_binlog_events.inc
--e-e-e-e-e-e-e-e-e-e-e- >> T-trig << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> T << -b-b-b-b-b-b-b-b-b-b-b-
-INSERT INTO tt_1(trans_id, stmt_id) VALUES (24, 4);
-include/show_binlog_events.inc
--e-e-e-e-e-e-e-e-e-e-e- >> T << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> C << -b-b-b-b-b-b-b-b-b-b-b-
-COMMIT;
-include/show_binlog_events.inc
-Log_name Pos Event_type Server_id End_log_pos Info
-master-bin.000001 # Query # # BEGIN
-master-bin.000001 # Table_map # # table_id: # (test.tt_5)
-master-bin.000001 # Table_map # # table_id: # (test.tt_6)
-master-bin.000001 # Write_rows # # table_id: #
-master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
-master-bin.000001 # Table_map # # table_id: # (test.tt_1)
-master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
-master-bin.000001 # Xid # # COMMIT /* XID */
--e-e-e-e-e-e-e-e-e-e-e- >> C << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> B T-trig T C << -b-b-b-b-b-b-b-b-b-b-b-
-include/show_binlog_events.inc
-Log_name Pos Event_type Server_id End_log_pos Info
-master-bin.000001 # Query # # BEGIN
-master-bin.000001 # Table_map # # table_id: # (test.tt_5)
-master-bin.000001 # Table_map # # table_id: # (test.tt_6)
-master-bin.000001 # Write_rows # # table_id: #
-master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
-master-bin.000001 # Table_map # # table_id: # (test.tt_1)
-master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
-master-bin.000001 # Xid # # COMMIT /* XID */
--e-e-e-e-e-e-e-e-e-e-e- >> B T-trig T C << -e-e-e-e-e-e-e-e-e-e-e-
-
-include/rpl_reconnect.inc
-START SLAVE;
-include/wait_for_slave_to_start.inc
-
-
-
-STOP SLAVE SQL_THREAD;
-include/wait_for_slave_sql_to_stop.inc
-SET GLOBAL debug="d,crash_before_update_pos";;
-FAILURE d,crash_before_update_pos and OUTCOME O1
-rpl_mixing_engines.inc [commands=B T-func T C]
--b-b-b-b-b-b-b-b-b-b-b- >> B << -b-b-b-b-b-b-b-b-b-b-b-
-BEGIN;
-include/show_binlog_events.inc
--e-e-e-e-e-e-e-e-e-e-e- >> B << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> T-func << -b-b-b-b-b-b-b-b-b-b-b-
-SELECT fc_i_tt_5_suc (25, 2);
-fc_i_tt_5_suc (25, 2)
-fc_i_tt_5_suc
-include/show_binlog_events.inc
--e-e-e-e-e-e-e-e-e-e-e- >> T-func << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> T << -b-b-b-b-b-b-b-b-b-b-b-
-INSERT INTO tt_1(trans_id, stmt_id) VALUES (25, 4);
-include/show_binlog_events.inc
--e-e-e-e-e-e-e-e-e-e-e- >> T << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> C << -b-b-b-b-b-b-b-b-b-b-b-
-COMMIT;
-include/show_binlog_events.inc
-Log_name Pos Event_type Server_id End_log_pos Info
-master-bin.000001 # Query # # BEGIN
-master-bin.000001 # Table_map # # table_id: # (test.tt_5)
-master-bin.000001 # Table_map # # table_id: # (test.tt_6)
-master-bin.000001 # Write_rows # # table_id: #
-master-bin.000001 # Write_rows # # table_id: #
-master-bin.000001 # Write_rows # # table_id: #
-master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
-master-bin.000001 # Table_map # # table_id: # (test.tt_1)
-master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
-master-bin.000001 # Xid # # COMMIT /* XID */
--e-e-e-e-e-e-e-e-e-e-e- >> C << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> B T-func T C << -b-b-b-b-b-b-b-b-b-b-b-
-include/show_binlog_events.inc
-Log_name Pos Event_type Server_id End_log_pos Info
-master-bin.000001 # Query # # BEGIN
-master-bin.000001 # Table_map # # table_id: # (test.tt_5)
-master-bin.000001 # Table_map # # table_id: # (test.tt_6)
-master-bin.000001 # Write_rows # # table_id: #
-master-bin.000001 # Write_rows # # table_id: #
-master-bin.000001 # Write_rows # # table_id: #
-master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
-master-bin.000001 # Table_map # # table_id: # (test.tt_1)
-master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
-master-bin.000001 # Xid # # COMMIT /* XID */
--e-e-e-e-e-e-e-e-e-e-e- >> B T-func T C << -e-e-e-e-e-e-e-e-e-e-e-
-
-include/rpl_reconnect.inc
-START SLAVE;
-include/wait_for_slave_to_start.inc
-
-
-
-STOP SLAVE SQL_THREAD;
-include/wait_for_slave_sql_to_stop.inc
-SET GLOBAL debug="d,crash_before_update_pos";;
-FAILURE d,crash_before_update_pos and OUTCOME O1
-rpl_mixing_engines.inc [commands=B T-proc T C]
--b-b-b-b-b-b-b-b-b-b-b- >> B << -b-b-b-b-b-b-b-b-b-b-b-
-BEGIN;
-include/show_binlog_events.inc
--e-e-e-e-e-e-e-e-e-e-e- >> B << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> T-proc << -b-b-b-b-b-b-b-b-b-b-b-
-CALL pc_i_tt_5_suc (26, 2);
-include/show_binlog_events.inc
--e-e-e-e-e-e-e-e-e-e-e- >> T-proc << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> T << -b-b-b-b-b-b-b-b-b-b-b-
-INSERT INTO tt_1(trans_id, stmt_id) VALUES (26, 4);
-include/show_binlog_events.inc
--e-e-e-e-e-e-e-e-e-e-e- >> T << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> C << -b-b-b-b-b-b-b-b-b-b-b-
-COMMIT;
-include/show_binlog_events.inc
-Log_name Pos Event_type Server_id End_log_pos Info
-master-bin.000001 # Query # # BEGIN
-master-bin.000001 # Table_map # # table_id: # (test.tt_5)
-master-bin.000001 # Table_map # # table_id: # (test.tt_6)
-master-bin.000001 # Write_rows # # table_id: #
-master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
-master-bin.000001 # Table_map # # table_id: # (test.tt_5)
-master-bin.000001 # Table_map # # table_id: # (test.tt_6)
-master-bin.000001 # Write_rows # # table_id: #
-master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
-master-bin.000001 # Table_map # # table_id: # (test.tt_1)
-master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
-master-bin.000001 # Xid # # COMMIT /* XID */
--e-e-e-e-e-e-e-e-e-e-e- >> C << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> B T-proc T C << -b-b-b-b-b-b-b-b-b-b-b-
-include/show_binlog_events.inc
-Log_name Pos Event_type Server_id End_log_pos Info
-master-bin.000001 # Query # # BEGIN
-master-bin.000001 # Table_map # # table_id: # (test.tt_5)
-master-bin.000001 # Table_map # # table_id: # (test.tt_6)
-master-bin.000001 # Write_rows # # table_id: #
-master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
-master-bin.000001 # Table_map # # table_id: # (test.tt_5)
-master-bin.000001 # Table_map # # table_id: # (test.tt_6)
-master-bin.000001 # Write_rows # # table_id: #
-master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
-master-bin.000001 # Table_map # # table_id: # (test.tt_1)
-master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
-master-bin.000001 # Xid # # COMMIT /* XID */
--e-e-e-e-e-e-e-e-e-e-e- >> B T-proc T C << -e-e-e-e-e-e-e-e-e-e-e-
-
-include/rpl_reconnect.inc
-START SLAVE;
-include/wait_for_slave_to_start.inc
-
-
-
-STOP SLAVE SQL_THREAD;
-include/wait_for_slave_sql_to_stop.inc
-SET GLOBAL debug="d,crash_after_update_pos_before_apply";;
-FAILURE d,crash_after_update_pos_before_apply and OUTCOME O1
-rpl_mixing_engines.inc [commands=T]
--b-b-b-b-b-b-b-b-b-b-b- >> T << -b-b-b-b-b-b-b-b-b-b-b-
-INSERT INTO tt_1(trans_id, stmt_id) VALUES (27, 1);
-include/show_binlog_events.inc
-Log_name Pos Event_type Server_id End_log_pos Info
-master-bin.000001 # Query # # BEGIN
-master-bin.000001 # Table_map # # table_id: # (test.tt_1)
-master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
-master-bin.000001 # Xid # # COMMIT /* XID */
--e-e-e-e-e-e-e-e-e-e-e- >> T << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> T << -b-b-b-b-b-b-b-b-b-b-b-
-include/show_binlog_events.inc
-Log_name Pos Event_type Server_id End_log_pos Info
-master-bin.000001 # Query # # BEGIN
-master-bin.000001 # Table_map # # table_id: # (test.tt_1)
-master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
-master-bin.000001 # Xid # # COMMIT /* XID */
--e-e-e-e-e-e-e-e-e-e-e- >> T << -e-e-e-e-e-e-e-e-e-e-e-
-
-include/rpl_reconnect.inc
-START SLAVE;
-include/wait_for_slave_to_start.inc
-
-
-
-STOP SLAVE SQL_THREAD;
-include/wait_for_slave_sql_to_stop.inc
-SET GLOBAL debug="d,crash_after_update_pos_before_apply";;
-FAILURE d,crash_after_update_pos_before_apply and OUTCOME O1
-rpl_mixing_engines.inc [commands=T-trig]
--b-b-b-b-b-b-b-b-b-b-b- >> T-trig << -b-b-b-b-b-b-b-b-b-b-b-
-INSERT INTO tt_5(trans_id, stmt_id) VALUES (28, 1);
-include/show_binlog_events.inc
-Log_name Pos Event_type Server_id End_log_pos Info
-master-bin.000001 # Query # # BEGIN
-master-bin.000001 # Table_map # # table_id: # (test.tt_5)
-master-bin.000001 # Table_map # # table_id: # (test.tt_6)
-master-bin.000001 # Write_rows # # table_id: #
-master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
-master-bin.000001 # Xid # # COMMIT /* XID */
--e-e-e-e-e-e-e-e-e-e-e- >> T-trig << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> T-trig << -b-b-b-b-b-b-b-b-b-b-b-
-include/show_binlog_events.inc
-Log_name Pos Event_type Server_id End_log_pos Info
-master-bin.000001 # Query # # BEGIN
-master-bin.000001 # Table_map # # table_id: # (test.tt_5)
-master-bin.000001 # Table_map # # table_id: # (test.tt_6)
-master-bin.000001 # Write_rows # # table_id: #
-master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
-master-bin.000001 # Xid # # COMMIT /* XID */
--e-e-e-e-e-e-e-e-e-e-e- >> T-trig << -e-e-e-e-e-e-e-e-e-e-e-
-
-include/rpl_reconnect.inc
-START SLAVE;
-include/wait_for_slave_to_start.inc
-
-
-
-STOP SLAVE SQL_THREAD;
-include/wait_for_slave_sql_to_stop.inc
-SET GLOBAL debug="d,crash_after_update_pos_before_apply";;
-FAILURE d,crash_after_update_pos_before_apply and OUTCOME O1
-rpl_mixing_engines.inc [commands=T-func]
--b-b-b-b-b-b-b-b-b-b-b- >> T-func << -b-b-b-b-b-b-b-b-b-b-b-
-SELECT fc_i_tt_5_suc (29, 1);
-fc_i_tt_5_suc (29, 1)
-fc_i_tt_5_suc
-include/show_binlog_events.inc
-Log_name Pos Event_type Server_id End_log_pos Info
-master-bin.000001 # Query # # BEGIN
-master-bin.000001 # Table_map # # table_id: # (test.tt_5)
-master-bin.000001 # Table_map # # table_id: # (test.tt_6)
-master-bin.000001 # Write_rows # # table_id: #
-master-bin.000001 # Write_rows # # table_id: #
-master-bin.000001 # Write_rows # # table_id: #
-master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
-master-bin.000001 # Xid # # COMMIT /* XID */
--e-e-e-e-e-e-e-e-e-e-e- >> T-func << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> T-func << -b-b-b-b-b-b-b-b-b-b-b-
-include/show_binlog_events.inc
-Log_name Pos Event_type Server_id End_log_pos Info
-master-bin.000001 # Query # # BEGIN
-master-bin.000001 # Table_map # # table_id: # (test.tt_5)
-master-bin.000001 # Table_map # # table_id: # (test.tt_6)
-master-bin.000001 # Write_rows # # table_id: #
-master-bin.000001 # Write_rows # # table_id: #
-master-bin.000001 # Write_rows # # table_id: #
-master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
-master-bin.000001 # Xid # # COMMIT /* XID */
--e-e-e-e-e-e-e-e-e-e-e- >> T-func << -e-e-e-e-e-e-e-e-e-e-e-
-
-include/rpl_reconnect.inc
-START SLAVE;
-include/wait_for_slave_to_start.inc
-
-
-
-STOP SLAVE SQL_THREAD;
-include/wait_for_slave_sql_to_stop.inc
-SET GLOBAL debug="d,crash_after_update_pos_before_apply";;
-FAILURE d,crash_after_update_pos_before_apply and OUTCOME O1
-rpl_mixing_engines.inc [commands=T-proc]
--b-b-b-b-b-b-b-b-b-b-b- >> T-proc << -b-b-b-b-b-b-b-b-b-b-b-
-CALL pc_i_tt_5_suc (30, 1);
-include/show_binlog_events.inc
-Log_name Pos Event_type Server_id End_log_pos Info
-master-bin.000001 # Query # # BEGIN
-master-bin.000001 # Table_map # # table_id: # (test.tt_5)
-master-bin.000001 # Table_map # # table_id: # (test.tt_6)
-master-bin.000001 # Write_rows # # table_id: #
-master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
-master-bin.000001 # Xid # # COMMIT /* XID */
-master-bin.000001 # Query # # BEGIN
-master-bin.000001 # Table_map # # table_id: # (test.tt_5)
-master-bin.000001 # Table_map # # table_id: # (test.tt_6)
-master-bin.000001 # Write_rows # # table_id: #
-master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
-master-bin.000001 # Xid # # COMMIT /* XID */
--e-e-e-e-e-e-e-e-e-e-e- >> T-proc << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> T-proc << -b-b-b-b-b-b-b-b-b-b-b-
-include/show_binlog_events.inc
-Log_name Pos Event_type Server_id End_log_pos Info
-master-bin.000001 # Query # # BEGIN
-master-bin.000001 # Table_map # # table_id: # (test.tt_5)
-master-bin.000001 # Table_map # # table_id: # (test.tt_6)
-master-bin.000001 # Write_rows # # table_id: #
-master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
-master-bin.000001 # Xid # # COMMIT /* XID */
-master-bin.000001 # Query # # BEGIN
-master-bin.000001 # Table_map # # table_id: # (test.tt_5)
-master-bin.000001 # Table_map # # table_id: # (test.tt_6)
-master-bin.000001 # Write_rows # # table_id: #
-master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
-master-bin.000001 # Xid # # COMMIT /* XID */
--e-e-e-e-e-e-e-e-e-e-e- >> T-proc << -e-e-e-e-e-e-e-e-e-e-e-
-
-include/rpl_reconnect.inc
-START SLAVE;
-include/wait_for_slave_to_start.inc
-
-
-
-STOP SLAVE SQL_THREAD;
-include/wait_for_slave_sql_to_stop.inc
-SET GLOBAL debug="d,crash_after_update_pos_before_apply";;
-FAILURE d,crash_after_update_pos_before_apply and OUTCOME O1
-rpl_mixing_engines.inc [commands=B T T-trig C]
--b-b-b-b-b-b-b-b-b-b-b- >> B << -b-b-b-b-b-b-b-b-b-b-b-
-BEGIN;
-include/show_binlog_events.inc
--e-e-e-e-e-e-e-e-e-e-e- >> B << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> T << -b-b-b-b-b-b-b-b-b-b-b-
-INSERT INTO tt_1(trans_id, stmt_id) VALUES (31, 2);
-include/show_binlog_events.inc
--e-e-e-e-e-e-e-e-e-e-e- >> T << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> T-trig << -b-b-b-b-b-b-b-b-b-b-b-
-INSERT INTO tt_5(trans_id, stmt_id) VALUES (31, 4);
-include/show_binlog_events.inc
--e-e-e-e-e-e-e-e-e-e-e- >> T-trig << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> C << -b-b-b-b-b-b-b-b-b-b-b-
-COMMIT;
-include/show_binlog_events.inc
-Log_name Pos Event_type Server_id End_log_pos Info
-master-bin.000001 # Query # # BEGIN
-master-bin.000001 # Table_map # # table_id: # (test.tt_1)
-master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
-master-bin.000001 # Table_map # # table_id: # (test.tt_5)
-master-bin.000001 # Table_map # # table_id: # (test.tt_6)
-master-bin.000001 # Write_rows # # table_id: #
-master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
-master-bin.000001 # Xid # # COMMIT /* XID */
--e-e-e-e-e-e-e-e-e-e-e- >> C << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> B T T-trig C << -b-b-b-b-b-b-b-b-b-b-b-
-include/show_binlog_events.inc
-Log_name Pos Event_type Server_id End_log_pos Info
-master-bin.000001 # Query # # BEGIN
-master-bin.000001 # Table_map # # table_id: # (test.tt_1)
-master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
-master-bin.000001 # Table_map # # table_id: # (test.tt_5)
-master-bin.000001 # Table_map # # table_id: # (test.tt_6)
-master-bin.000001 # Write_rows # # table_id: #
-master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
-master-bin.000001 # Xid # # COMMIT /* XID */
--e-e-e-e-e-e-e-e-e-e-e- >> B T T-trig C << -e-e-e-e-e-e-e-e-e-e-e-
-
-include/rpl_reconnect.inc
-START SLAVE;
-include/wait_for_slave_to_start.inc
-
-
-
-STOP SLAVE SQL_THREAD;
-include/wait_for_slave_sql_to_stop.inc
-SET GLOBAL debug="d,crash_after_update_pos_before_apply";;
-FAILURE d,crash_after_update_pos_before_apply and OUTCOME O1
-rpl_mixing_engines.inc [commands=B T T-func C]
--b-b-b-b-b-b-b-b-b-b-b- >> B << -b-b-b-b-b-b-b-b-b-b-b-
-BEGIN;
-include/show_binlog_events.inc
--e-e-e-e-e-e-e-e-e-e-e- >> B << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> T << -b-b-b-b-b-b-b-b-b-b-b-
-INSERT INTO tt_1(trans_id, stmt_id) VALUES (32, 2);
-include/show_binlog_events.inc
--e-e-e-e-e-e-e-e-e-e-e- >> T << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> T-func << -b-b-b-b-b-b-b-b-b-b-b-
-SELECT fc_i_tt_5_suc (32, 4);
-fc_i_tt_5_suc (32, 4)
-fc_i_tt_5_suc
-include/show_binlog_events.inc
--e-e-e-e-e-e-e-e-e-e-e- >> T-func << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> C << -b-b-b-b-b-b-b-b-b-b-b-
-COMMIT;
-include/show_binlog_events.inc
-Log_name Pos Event_type Server_id End_log_pos Info
-master-bin.000001 # Query # # BEGIN
-master-bin.000001 # Table_map # # table_id: # (test.tt_1)
-master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
-master-bin.000001 # Table_map # # table_id: # (test.tt_5)
-master-bin.000001 # Table_map # # table_id: # (test.tt_6)
-master-bin.000001 # Write_rows # # table_id: #
-master-bin.000001 # Write_rows # # table_id: #
-master-bin.000001 # Write_rows # # table_id: #
-master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
-master-bin.000001 # Xid # # COMMIT /* XID */
--e-e-e-e-e-e-e-e-e-e-e- >> C << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> B T T-func C << -b-b-b-b-b-b-b-b-b-b-b-
-include/show_binlog_events.inc
-Log_name Pos Event_type Server_id End_log_pos Info
-master-bin.000001 # Query # # BEGIN
-master-bin.000001 # Table_map # # table_id: # (test.tt_1)
-master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
-master-bin.000001 # Table_map # # table_id: # (test.tt_5)
-master-bin.000001 # Table_map # # table_id: # (test.tt_6)
-master-bin.000001 # Write_rows # # table_id: #
-master-bin.000001 # Write_rows # # table_id: #
-master-bin.000001 # Write_rows # # table_id: #
-master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
-master-bin.000001 # Xid # # COMMIT /* XID */
--e-e-e-e-e-e-e-e-e-e-e- >> B T T-func C << -e-e-e-e-e-e-e-e-e-e-e-
-
-include/rpl_reconnect.inc
-START SLAVE;
-include/wait_for_slave_to_start.inc
-
-
-
-STOP SLAVE SQL_THREAD;
-include/wait_for_slave_sql_to_stop.inc
-SET GLOBAL debug="d,crash_after_update_pos_before_apply";;
-FAILURE d,crash_after_update_pos_before_apply and OUTCOME O1
-rpl_mixing_engines.inc [commands=B T T-proc C]
--b-b-b-b-b-b-b-b-b-b-b- >> B << -b-b-b-b-b-b-b-b-b-b-b-
-BEGIN;
-include/show_binlog_events.inc
--e-e-e-e-e-e-e-e-e-e-e- >> B << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> T << -b-b-b-b-b-b-b-b-b-b-b-
-INSERT INTO tt_1(trans_id, stmt_id) VALUES (33, 2);
-include/show_binlog_events.inc
--e-e-e-e-e-e-e-e-e-e-e- >> T << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> T-proc << -b-b-b-b-b-b-b-b-b-b-b-
-CALL pc_i_tt_5_suc (33, 4);
-include/show_binlog_events.inc
--e-e-e-e-e-e-e-e-e-e-e- >> T-proc << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> C << -b-b-b-b-b-b-b-b-b-b-b-
-COMMIT;
-include/show_binlog_events.inc
-Log_name Pos Event_type Server_id End_log_pos Info
-master-bin.000001 # Query # # BEGIN
-master-bin.000001 # Table_map # # table_id: # (test.tt_1)
-master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
-master-bin.000001 # Table_map # # table_id: # (test.tt_5)
-master-bin.000001 # Table_map # # table_id: # (test.tt_6)
-master-bin.000001 # Write_rows # # table_id: #
-master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
-master-bin.000001 # Table_map # # table_id: # (test.tt_5)
-master-bin.000001 # Table_map # # table_id: # (test.tt_6)
-master-bin.000001 # Write_rows # # table_id: #
-master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
-master-bin.000001 # Xid # # COMMIT /* XID */
--e-e-e-e-e-e-e-e-e-e-e- >> C << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> B T T-proc C << -b-b-b-b-b-b-b-b-b-b-b-
-include/show_binlog_events.inc
-Log_name Pos Event_type Server_id End_log_pos Info
-master-bin.000001 # Query # # BEGIN
-master-bin.000001 # Table_map # # table_id: # (test.tt_1)
-master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
-master-bin.000001 # Table_map # # table_id: # (test.tt_5)
-master-bin.000001 # Table_map # # table_id: # (test.tt_6)
-master-bin.000001 # Write_rows # # table_id: #
-master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
-master-bin.000001 # Table_map # # table_id: # (test.tt_5)
-master-bin.000001 # Table_map # # table_id: # (test.tt_6)
-master-bin.000001 # Write_rows # # table_id: #
-master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
-master-bin.000001 # Xid # # COMMIT /* XID */
--e-e-e-e-e-e-e-e-e-e-e- >> B T T-proc C << -e-e-e-e-e-e-e-e-e-e-e-
-
-include/rpl_reconnect.inc
-START SLAVE;
-include/wait_for_slave_to_start.inc
-
-
-
-STOP SLAVE SQL_THREAD;
-include/wait_for_slave_sql_to_stop.inc
-SET GLOBAL debug="d,crash_after_update_pos_before_apply";;
-FAILURE d,crash_after_update_pos_before_apply and OUTCOME O1
-rpl_mixing_engines.inc [commands=B T-trig T C]
--b-b-b-b-b-b-b-b-b-b-b- >> B << -b-b-b-b-b-b-b-b-b-b-b-
-BEGIN;
-include/show_binlog_events.inc
--e-e-e-e-e-e-e-e-e-e-e- >> B << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> T-trig << -b-b-b-b-b-b-b-b-b-b-b-
-INSERT INTO tt_5(trans_id, stmt_id) VALUES (34, 2);
-include/show_binlog_events.inc
--e-e-e-e-e-e-e-e-e-e-e- >> T-trig << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> T << -b-b-b-b-b-b-b-b-b-b-b-
-INSERT INTO tt_1(trans_id, stmt_id) VALUES (34, 4);
-include/show_binlog_events.inc
--e-e-e-e-e-e-e-e-e-e-e- >> T << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> C << -b-b-b-b-b-b-b-b-b-b-b-
-COMMIT;
-include/show_binlog_events.inc
-Log_name Pos Event_type Server_id End_log_pos Info
-master-bin.000001 # Query # # BEGIN
-master-bin.000001 # Table_map # # table_id: # (test.tt_5)
-master-bin.000001 # Table_map # # table_id: # (test.tt_6)
-master-bin.000001 # Write_rows # # table_id: #
-master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
-master-bin.000001 # Table_map # # table_id: # (test.tt_1)
-master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
-master-bin.000001 # Xid # # COMMIT /* XID */
--e-e-e-e-e-e-e-e-e-e-e- >> C << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> B T-trig T C << -b-b-b-b-b-b-b-b-b-b-b-
-include/show_binlog_events.inc
-Log_name Pos Event_type Server_id End_log_pos Info
-master-bin.000001 # Query # # BEGIN
-master-bin.000001 # Table_map # # table_id: # (test.tt_5)
-master-bin.000001 # Table_map # # table_id: # (test.tt_6)
-master-bin.000001 # Write_rows # # table_id: #
-master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
-master-bin.000001 # Table_map # # table_id: # (test.tt_1)
-master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
-master-bin.000001 # Xid # # COMMIT /* XID */
--e-e-e-e-e-e-e-e-e-e-e- >> B T-trig T C << -e-e-e-e-e-e-e-e-e-e-e-
-
-include/rpl_reconnect.inc
-START SLAVE;
-include/wait_for_slave_to_start.inc
-
-
-
-STOP SLAVE SQL_THREAD;
-include/wait_for_slave_sql_to_stop.inc
-SET GLOBAL debug="d,crash_after_update_pos_before_apply";;
-FAILURE d,crash_after_update_pos_before_apply and OUTCOME O1
-rpl_mixing_engines.inc [commands=B T-func T C]
--b-b-b-b-b-b-b-b-b-b-b- >> B << -b-b-b-b-b-b-b-b-b-b-b-
-BEGIN;
-include/show_binlog_events.inc
--e-e-e-e-e-e-e-e-e-e-e- >> B << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> T-func << -b-b-b-b-b-b-b-b-b-b-b-
-SELECT fc_i_tt_5_suc (35, 2);
-fc_i_tt_5_suc (35, 2)
-fc_i_tt_5_suc
-include/show_binlog_events.inc
--e-e-e-e-e-e-e-e-e-e-e- >> T-func << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> T << -b-b-b-b-b-b-b-b-b-b-b-
-INSERT INTO tt_1(trans_id, stmt_id) VALUES (35, 4);
-include/show_binlog_events.inc
--e-e-e-e-e-e-e-e-e-e-e- >> T << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> C << -b-b-b-b-b-b-b-b-b-b-b-
-COMMIT;
-include/show_binlog_events.inc
-Log_name Pos Event_type Server_id End_log_pos Info
-master-bin.000001 # Query # # BEGIN
-master-bin.000001 # Table_map # # table_id: # (test.tt_5)
-master-bin.000001 # Table_map # # table_id: # (test.tt_6)
-master-bin.000001 # Write_rows # # table_id: #
-master-bin.000001 # Write_rows # # table_id: #
-master-bin.000001 # Write_rows # # table_id: #
-master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
-master-bin.000001 # Table_map # # table_id: # (test.tt_1)
-master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
-master-bin.000001 # Xid # # COMMIT /* XID */
--e-e-e-e-e-e-e-e-e-e-e- >> C << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> B T-func T C << -b-b-b-b-b-b-b-b-b-b-b-
-include/show_binlog_events.inc
-Log_name Pos Event_type Server_id End_log_pos Info
-master-bin.000001 # Query # # BEGIN
-master-bin.000001 # Table_map # # table_id: # (test.tt_5)
-master-bin.000001 # Table_map # # table_id: # (test.tt_6)
-master-bin.000001 # Write_rows # # table_id: #
-master-bin.000001 # Write_rows # # table_id: #
-master-bin.000001 # Write_rows # # table_id: #
-master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
-master-bin.000001 # Table_map # # table_id: # (test.tt_1)
-master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
-master-bin.000001 # Xid # # COMMIT /* XID */
--e-e-e-e-e-e-e-e-e-e-e- >> B T-func T C << -e-e-e-e-e-e-e-e-e-e-e-
-
-include/rpl_reconnect.inc
-START SLAVE;
-include/wait_for_slave_to_start.inc
-
-
-
-STOP SLAVE SQL_THREAD;
-include/wait_for_slave_sql_to_stop.inc
-SET GLOBAL debug="d,crash_after_update_pos_before_apply";;
-FAILURE d,crash_after_update_pos_before_apply and OUTCOME O1
-rpl_mixing_engines.inc [commands=B T-proc T C]
--b-b-b-b-b-b-b-b-b-b-b- >> B << -b-b-b-b-b-b-b-b-b-b-b-
-BEGIN;
-include/show_binlog_events.inc
--e-e-e-e-e-e-e-e-e-e-e- >> B << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> T-proc << -b-b-b-b-b-b-b-b-b-b-b-
-CALL pc_i_tt_5_suc (36, 2);
-include/show_binlog_events.inc
--e-e-e-e-e-e-e-e-e-e-e- >> T-proc << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> T << -b-b-b-b-b-b-b-b-b-b-b-
-INSERT INTO tt_1(trans_id, stmt_id) VALUES (36, 4);
-include/show_binlog_events.inc
--e-e-e-e-e-e-e-e-e-e-e- >> T << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> C << -b-b-b-b-b-b-b-b-b-b-b-
-COMMIT;
-include/show_binlog_events.inc
-Log_name Pos Event_type Server_id End_log_pos Info
-master-bin.000001 # Query # # BEGIN
-master-bin.000001 # Table_map # # table_id: # (test.tt_5)
-master-bin.000001 # Table_map # # table_id: # (test.tt_6)
-master-bin.000001 # Write_rows # # table_id: #
-master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
-master-bin.000001 # Table_map # # table_id: # (test.tt_5)
-master-bin.000001 # Table_map # # table_id: # (test.tt_6)
-master-bin.000001 # Write_rows # # table_id: #
-master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
-master-bin.000001 # Table_map # # table_id: # (test.tt_1)
-master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
-master-bin.000001 # Xid # # COMMIT /* XID */
--e-e-e-e-e-e-e-e-e-e-e- >> C << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> B T-proc T C << -b-b-b-b-b-b-b-b-b-b-b-
-include/show_binlog_events.inc
-Log_name Pos Event_type Server_id End_log_pos Info
-master-bin.000001 # Query # # BEGIN
-master-bin.000001 # Table_map # # table_id: # (test.tt_5)
-master-bin.000001 # Table_map # # table_id: # (test.tt_6)
-master-bin.000001 # Write_rows # # table_id: #
-master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
-master-bin.000001 # Table_map # # table_id: # (test.tt_5)
-master-bin.000001 # Table_map # # table_id: # (test.tt_6)
-master-bin.000001 # Write_rows # # table_id: #
-master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
-master-bin.000001 # Table_map # # table_id: # (test.tt_1)
-master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
-master-bin.000001 # Xid # # COMMIT /* XID */
--e-e-e-e-e-e-e-e-e-e-e- >> B T-proc T C << -e-e-e-e-e-e-e-e-e-e-e-
-
-include/rpl_reconnect.inc
-START SLAVE;
-include/wait_for_slave_to_start.inc
-
-
-
-STOP SLAVE SQL_THREAD;
-include/wait_for_slave_sql_to_stop.inc
-SET GLOBAL debug="d,crash_after_commit_before_update_pos";;
-FAILURE d,crash_after_commit_before_update_pos and OUTCOME O2
-rpl_mixing_engines.inc [commands=T]
--b-b-b-b-b-b-b-b-b-b-b- >> T << -b-b-b-b-b-b-b-b-b-b-b-
-INSERT INTO tt_1(trans_id, stmt_id) VALUES (37, 1);
-include/show_binlog_events.inc
-Log_name Pos Event_type Server_id End_log_pos Info
-master-bin.000001 # Query # # BEGIN
-master-bin.000001 # Table_map # # table_id: # (test.tt_1)
-master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
-master-bin.000001 # Xid # # COMMIT /* XID */
--e-e-e-e-e-e-e-e-e-e-e- >> T << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> T << -b-b-b-b-b-b-b-b-b-b-b-
-include/show_binlog_events.inc
-Log_name Pos Event_type Server_id End_log_pos Info
-master-bin.000001 # Query # # BEGIN
-master-bin.000001 # Table_map # # table_id: # (test.tt_1)
-master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
-master-bin.000001 # Xid # # COMMIT /* XID */
--e-e-e-e-e-e-e-e-e-e-e- >> T << -e-e-e-e-e-e-e-e-e-e-e-
-
-include/rpl_reconnect.inc
-START SLAVE;
-include/wait_for_slave_to_start.inc
-
-
-
-STOP SLAVE SQL_THREAD;
-include/wait_for_slave_sql_to_stop.inc
-SET GLOBAL debug="d,crash_after_commit_before_update_pos";;
-FAILURE d,crash_after_commit_before_update_pos and OUTCOME O2
-rpl_mixing_engines.inc [commands=T-trig]
--b-b-b-b-b-b-b-b-b-b-b- >> T-trig << -b-b-b-b-b-b-b-b-b-b-b-
-INSERT INTO tt_5(trans_id, stmt_id) VALUES (38, 1);
-include/show_binlog_events.inc
-Log_name Pos Event_type Server_id End_log_pos Info
-master-bin.000001 # Query # # BEGIN
-master-bin.000001 # Table_map # # table_id: # (test.tt_5)
-master-bin.000001 # Table_map # # table_id: # (test.tt_6)
-master-bin.000001 # Write_rows # # table_id: #
-master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
-master-bin.000001 # Xid # # COMMIT /* XID */
--e-e-e-e-e-e-e-e-e-e-e- >> T-trig << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> T-trig << -b-b-b-b-b-b-b-b-b-b-b-
-include/show_binlog_events.inc
-Log_name Pos Event_type Server_id End_log_pos Info
-master-bin.000001 # Query # # BEGIN
-master-bin.000001 # Table_map # # table_id: # (test.tt_5)
-master-bin.000001 # Table_map # # table_id: # (test.tt_6)
-master-bin.000001 # Write_rows # # table_id: #
-master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
-master-bin.000001 # Xid # # COMMIT /* XID */
--e-e-e-e-e-e-e-e-e-e-e- >> T-trig << -e-e-e-e-e-e-e-e-e-e-e-
-
-include/rpl_reconnect.inc
-START SLAVE;
-include/wait_for_slave_to_start.inc
-
-
-
-STOP SLAVE SQL_THREAD;
-include/wait_for_slave_sql_to_stop.inc
-SET GLOBAL debug="d,crash_after_commit_before_update_pos";;
-FAILURE d,crash_after_commit_before_update_pos and OUTCOME O2
-rpl_mixing_engines.inc [commands=T-func]
--b-b-b-b-b-b-b-b-b-b-b- >> T-func << -b-b-b-b-b-b-b-b-b-b-b-
-SELECT fc_i_tt_5_suc (39, 1);
-fc_i_tt_5_suc (39, 1)
-fc_i_tt_5_suc
-include/show_binlog_events.inc
-Log_name Pos Event_type Server_id End_log_pos Info
-master-bin.000001 # Query # # BEGIN
-master-bin.000001 # Table_map # # table_id: # (test.tt_5)
-master-bin.000001 # Table_map # # table_id: # (test.tt_6)
-master-bin.000001 # Write_rows # # table_id: #
-master-bin.000001 # Write_rows # # table_id: #
-master-bin.000001 # Write_rows # # table_id: #
-master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
-master-bin.000001 # Xid # # COMMIT /* XID */
--e-e-e-e-e-e-e-e-e-e-e- >> T-func << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> T-func << -b-b-b-b-b-b-b-b-b-b-b-
-include/show_binlog_events.inc
-Log_name Pos Event_type Server_id End_log_pos Info
-master-bin.000001 # Query # # BEGIN
-master-bin.000001 # Table_map # # table_id: # (test.tt_5)
-master-bin.000001 # Table_map # # table_id: # (test.tt_6)
-master-bin.000001 # Write_rows # # table_id: #
-master-bin.000001 # Write_rows # # table_id: #
-master-bin.000001 # Write_rows # # table_id: #
-master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
-master-bin.000001 # Xid # # COMMIT /* XID */
--e-e-e-e-e-e-e-e-e-e-e- >> T-func << -e-e-e-e-e-e-e-e-e-e-e-
-
-include/rpl_reconnect.inc
-START SLAVE;
-include/wait_for_slave_to_start.inc
-
-
-
-STOP SLAVE SQL_THREAD;
-include/wait_for_slave_sql_to_stop.inc
-SET GLOBAL debug="d,crash_after_commit_before_update_pos";;
-FAILURE d,crash_after_commit_before_update_pos and OUTCOME O2
-rpl_mixing_engines.inc [commands=T-proc]
--b-b-b-b-b-b-b-b-b-b-b- >> T-proc << -b-b-b-b-b-b-b-b-b-b-b-
-CALL pc_i_tt_5_suc (40, 1);
-include/show_binlog_events.inc
-Log_name Pos Event_type Server_id End_log_pos Info
-master-bin.000001 # Query # # BEGIN
-master-bin.000001 # Table_map # # table_id: # (test.tt_5)
-master-bin.000001 # Table_map # # table_id: # (test.tt_6)
-master-bin.000001 # Write_rows # # table_id: #
-master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
-master-bin.000001 # Xid # # COMMIT /* XID */
-master-bin.000001 # Query # # BEGIN
-master-bin.000001 # Table_map # # table_id: # (test.tt_5)
-master-bin.000001 # Table_map # # table_id: # (test.tt_6)
-master-bin.000001 # Write_rows # # table_id: #
-master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
-master-bin.000001 # Xid # # COMMIT /* XID */
--e-e-e-e-e-e-e-e-e-e-e- >> T-proc << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> T-proc << -b-b-b-b-b-b-b-b-b-b-b-
-include/show_binlog_events.inc
-Log_name Pos Event_type Server_id End_log_pos Info
-master-bin.000001 # Query # # BEGIN
-master-bin.000001 # Table_map # # table_id: # (test.tt_5)
-master-bin.000001 # Table_map # # table_id: # (test.tt_6)
-master-bin.000001 # Write_rows # # table_id: #
-master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
-master-bin.000001 # Xid # # COMMIT /* XID */
-master-bin.000001 # Query # # BEGIN
-master-bin.000001 # Table_map # # table_id: # (test.tt_5)
-master-bin.000001 # Table_map # # table_id: # (test.tt_6)
-master-bin.000001 # Write_rows # # table_id: #
-master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
-master-bin.000001 # Xid # # COMMIT /* XID */
--e-e-e-e-e-e-e-e-e-e-e- >> T-proc << -e-e-e-e-e-e-e-e-e-e-e-
-
-include/rpl_reconnect.inc
-START SLAVE;
-include/wait_for_slave_to_start.inc
-
-
-
-STOP SLAVE SQL_THREAD;
-include/wait_for_slave_sql_to_stop.inc
-SET GLOBAL debug="d,crash_after_commit_before_update_pos";;
-FAILURE d,crash_after_commit_before_update_pos and OUTCOME O2
-rpl_mixing_engines.inc [commands=B T T-trig C]
--b-b-b-b-b-b-b-b-b-b-b- >> B << -b-b-b-b-b-b-b-b-b-b-b-
-BEGIN;
-include/show_binlog_events.inc
--e-e-e-e-e-e-e-e-e-e-e- >> B << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> T << -b-b-b-b-b-b-b-b-b-b-b-
-INSERT INTO tt_1(trans_id, stmt_id) VALUES (41, 2);
-include/show_binlog_events.inc
--e-e-e-e-e-e-e-e-e-e-e- >> T << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> T-trig << -b-b-b-b-b-b-b-b-b-b-b-
-INSERT INTO tt_5(trans_id, stmt_id) VALUES (41, 4);
-include/show_binlog_events.inc
--e-e-e-e-e-e-e-e-e-e-e- >> T-trig << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> C << -b-b-b-b-b-b-b-b-b-b-b-
-COMMIT;
-include/show_binlog_events.inc
-Log_name Pos Event_type Server_id End_log_pos Info
-master-bin.000001 # Query # # BEGIN
-master-bin.000001 # Table_map # # table_id: # (test.tt_1)
-master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
-master-bin.000001 # Table_map # # table_id: # (test.tt_5)
-master-bin.000001 # Table_map # # table_id: # (test.tt_6)
-master-bin.000001 # Write_rows # # table_id: #
-master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
-master-bin.000001 # Xid # # COMMIT /* XID */
--e-e-e-e-e-e-e-e-e-e-e- >> C << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> B T T-trig C << -b-b-b-b-b-b-b-b-b-b-b-
-include/show_binlog_events.inc
-Log_name Pos Event_type Server_id End_log_pos Info
-master-bin.000001 # Query # # BEGIN
-master-bin.000001 # Table_map # # table_id: # (test.tt_1)
-master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
-master-bin.000001 # Table_map # # table_id: # (test.tt_5)
-master-bin.000001 # Table_map # # table_id: # (test.tt_6)
-master-bin.000001 # Write_rows # # table_id: #
-master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
-master-bin.000001 # Xid # # COMMIT /* XID */
--e-e-e-e-e-e-e-e-e-e-e- >> B T T-trig C << -e-e-e-e-e-e-e-e-e-e-e-
-
-include/rpl_reconnect.inc
-START SLAVE;
-include/wait_for_slave_to_start.inc
-
-
-
-STOP SLAVE SQL_THREAD;
-include/wait_for_slave_sql_to_stop.inc
-SET GLOBAL debug="d,crash_after_commit_before_update_pos";;
-FAILURE d,crash_after_commit_before_update_pos and OUTCOME O2
-rpl_mixing_engines.inc [commands=B T T-func C]
--b-b-b-b-b-b-b-b-b-b-b- >> B << -b-b-b-b-b-b-b-b-b-b-b-
-BEGIN;
-include/show_binlog_events.inc
--e-e-e-e-e-e-e-e-e-e-e- >> B << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> T << -b-b-b-b-b-b-b-b-b-b-b-
-INSERT INTO tt_1(trans_id, stmt_id) VALUES (42, 2);
-include/show_binlog_events.inc
--e-e-e-e-e-e-e-e-e-e-e- >> T << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> T-func << -b-b-b-b-b-b-b-b-b-b-b-
-SELECT fc_i_tt_5_suc (42, 4);
-fc_i_tt_5_suc (42, 4)
-fc_i_tt_5_suc
-include/show_binlog_events.inc
--e-e-e-e-e-e-e-e-e-e-e- >> T-func << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> C << -b-b-b-b-b-b-b-b-b-b-b-
-COMMIT;
-include/show_binlog_events.inc
-Log_name Pos Event_type Server_id End_log_pos Info
-master-bin.000001 # Query # # BEGIN
-master-bin.000001 # Table_map # # table_id: # (test.tt_1)
-master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
-master-bin.000001 # Table_map # # table_id: # (test.tt_5)
-master-bin.000001 # Table_map # # table_id: # (test.tt_6)
-master-bin.000001 # Write_rows # # table_id: #
-master-bin.000001 # Write_rows # # table_id: #
-master-bin.000001 # Write_rows # # table_id: #
-master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
-master-bin.000001 # Xid # # COMMIT /* XID */
--e-e-e-e-e-e-e-e-e-e-e- >> C << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> B T T-func C << -b-b-b-b-b-b-b-b-b-b-b-
-include/show_binlog_events.inc
-Log_name Pos Event_type Server_id End_log_pos Info
-master-bin.000001 # Query # # BEGIN
-master-bin.000001 # Table_map # # table_id: # (test.tt_1)
-master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
-master-bin.000001 # Table_map # # table_id: # (test.tt_5)
-master-bin.000001 # Table_map # # table_id: # (test.tt_6)
-master-bin.000001 # Write_rows # # table_id: #
-master-bin.000001 # Write_rows # # table_id: #
-master-bin.000001 # Write_rows # # table_id: #
-master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
-master-bin.000001 # Xid # # COMMIT /* XID */
--e-e-e-e-e-e-e-e-e-e-e- >> B T T-func C << -e-e-e-e-e-e-e-e-e-e-e-
-
-include/rpl_reconnect.inc
-START SLAVE;
-include/wait_for_slave_to_start.inc
-
-
-
-STOP SLAVE SQL_THREAD;
-include/wait_for_slave_sql_to_stop.inc
-SET GLOBAL debug="d,crash_after_commit_before_update_pos";;
-FAILURE d,crash_after_commit_before_update_pos and OUTCOME O2
-rpl_mixing_engines.inc [commands=B T T-proc C]
--b-b-b-b-b-b-b-b-b-b-b- >> B << -b-b-b-b-b-b-b-b-b-b-b-
-BEGIN;
-include/show_binlog_events.inc
--e-e-e-e-e-e-e-e-e-e-e- >> B << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> T << -b-b-b-b-b-b-b-b-b-b-b-
-INSERT INTO tt_1(trans_id, stmt_id) VALUES (43, 2);
-include/show_binlog_events.inc
--e-e-e-e-e-e-e-e-e-e-e- >> T << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> T-proc << -b-b-b-b-b-b-b-b-b-b-b-
-CALL pc_i_tt_5_suc (43, 4);
-include/show_binlog_events.inc
--e-e-e-e-e-e-e-e-e-e-e- >> T-proc << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> C << -b-b-b-b-b-b-b-b-b-b-b-
-COMMIT;
-include/show_binlog_events.inc
-Log_name Pos Event_type Server_id End_log_pos Info
-master-bin.000001 # Query # # BEGIN
-master-bin.000001 # Table_map # # table_id: # (test.tt_1)
-master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
-master-bin.000001 # Table_map # # table_id: # (test.tt_5)
-master-bin.000001 # Table_map # # table_id: # (test.tt_6)
-master-bin.000001 # Write_rows # # table_id: #
-master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
-master-bin.000001 # Table_map # # table_id: # (test.tt_5)
-master-bin.000001 # Table_map # # table_id: # (test.tt_6)
-master-bin.000001 # Write_rows # # table_id: #
-master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
-master-bin.000001 # Xid # # COMMIT /* XID */
--e-e-e-e-e-e-e-e-e-e-e- >> C << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> B T T-proc C << -b-b-b-b-b-b-b-b-b-b-b-
-include/show_binlog_events.inc
-Log_name Pos Event_type Server_id End_log_pos Info
-master-bin.000001 # Query # # BEGIN
-master-bin.000001 # Table_map # # table_id: # (test.tt_1)
-master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
-master-bin.000001 # Table_map # # table_id: # (test.tt_5)
-master-bin.000001 # Table_map # # table_id: # (test.tt_6)
-master-bin.000001 # Write_rows # # table_id: #
-master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
-master-bin.000001 # Table_map # # table_id: # (test.tt_5)
-master-bin.000001 # Table_map # # table_id: # (test.tt_6)
-master-bin.000001 # Write_rows # # table_id: #
-master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
-master-bin.000001 # Xid # # COMMIT /* XID */
--e-e-e-e-e-e-e-e-e-e-e- >> B T T-proc C << -e-e-e-e-e-e-e-e-e-e-e-
-
-include/rpl_reconnect.inc
-START SLAVE;
-include/wait_for_slave_to_start.inc
-
-
-
-STOP SLAVE SQL_THREAD;
-include/wait_for_slave_sql_to_stop.inc
-SET GLOBAL debug="d,crash_after_commit_before_update_pos";;
-FAILURE d,crash_after_commit_before_update_pos and OUTCOME O2
-rpl_mixing_engines.inc [commands=B T-trig T C]
--b-b-b-b-b-b-b-b-b-b-b- >> B << -b-b-b-b-b-b-b-b-b-b-b-
-BEGIN;
-include/show_binlog_events.inc
--e-e-e-e-e-e-e-e-e-e-e- >> B << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> T-trig << -b-b-b-b-b-b-b-b-b-b-b-
-INSERT INTO tt_5(trans_id, stmt_id) VALUES (44, 2);
-include/show_binlog_events.inc
--e-e-e-e-e-e-e-e-e-e-e- >> T-trig << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> T << -b-b-b-b-b-b-b-b-b-b-b-
-INSERT INTO tt_1(trans_id, stmt_id) VALUES (44, 4);
-include/show_binlog_events.inc
--e-e-e-e-e-e-e-e-e-e-e- >> T << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> C << -b-b-b-b-b-b-b-b-b-b-b-
-COMMIT;
-include/show_binlog_events.inc
-Log_name Pos Event_type Server_id End_log_pos Info
-master-bin.000001 # Query # # BEGIN
-master-bin.000001 # Table_map # # table_id: # (test.tt_5)
-master-bin.000001 # Table_map # # table_id: # (test.tt_6)
-master-bin.000001 # Write_rows # # table_id: #
-master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
-master-bin.000001 # Table_map # # table_id: # (test.tt_1)
-master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
-master-bin.000001 # Xid # # COMMIT /* XID */
--e-e-e-e-e-e-e-e-e-e-e- >> C << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> B T-trig T C << -b-b-b-b-b-b-b-b-b-b-b-
-include/show_binlog_events.inc
-Log_name Pos Event_type Server_id End_log_pos Info
-master-bin.000001 # Query # # BEGIN
-master-bin.000001 # Table_map # # table_id: # (test.tt_5)
-master-bin.000001 # Table_map # # table_id: # (test.tt_6)
-master-bin.000001 # Write_rows # # table_id: #
-master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
-master-bin.000001 # Table_map # # table_id: # (test.tt_1)
-master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
-master-bin.000001 # Xid # # COMMIT /* XID */
--e-e-e-e-e-e-e-e-e-e-e- >> B T-trig T C << -e-e-e-e-e-e-e-e-e-e-e-
-
-include/rpl_reconnect.inc
-START SLAVE;
-include/wait_for_slave_to_start.inc
-
-
-
-STOP SLAVE SQL_THREAD;
-include/wait_for_slave_sql_to_stop.inc
-SET GLOBAL debug="d,crash_after_commit_before_update_pos";;
-FAILURE d,crash_after_commit_before_update_pos and OUTCOME O2
-rpl_mixing_engines.inc [commands=B T-func T C]
--b-b-b-b-b-b-b-b-b-b-b- >> B << -b-b-b-b-b-b-b-b-b-b-b-
-BEGIN;
-include/show_binlog_events.inc
--e-e-e-e-e-e-e-e-e-e-e- >> B << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> T-func << -b-b-b-b-b-b-b-b-b-b-b-
-SELECT fc_i_tt_5_suc (45, 2);
-fc_i_tt_5_suc (45, 2)
-fc_i_tt_5_suc
-include/show_binlog_events.inc
--e-e-e-e-e-e-e-e-e-e-e- >> T-func << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> T << -b-b-b-b-b-b-b-b-b-b-b-
-INSERT INTO tt_1(trans_id, stmt_id) VALUES (45, 4);
-include/show_binlog_events.inc
--e-e-e-e-e-e-e-e-e-e-e- >> T << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> C << -b-b-b-b-b-b-b-b-b-b-b-
-COMMIT;
-include/show_binlog_events.inc
-Log_name Pos Event_type Server_id End_log_pos Info
-master-bin.000001 # Query # # BEGIN
-master-bin.000001 # Table_map # # table_id: # (test.tt_5)
-master-bin.000001 # Table_map # # table_id: # (test.tt_6)
-master-bin.000001 # Write_rows # # table_id: #
-master-bin.000001 # Write_rows # # table_id: #
-master-bin.000001 # Write_rows # # table_id: #
-master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
-master-bin.000001 # Table_map # # table_id: # (test.tt_1)
-master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
-master-bin.000001 # Xid # # COMMIT /* XID */
--e-e-e-e-e-e-e-e-e-e-e- >> C << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> B T-func T C << -b-b-b-b-b-b-b-b-b-b-b-
-include/show_binlog_events.inc
-Log_name Pos Event_type Server_id End_log_pos Info
-master-bin.000001 # Query # # BEGIN
-master-bin.000001 # Table_map # # table_id: # (test.tt_5)
-master-bin.000001 # Table_map # # table_id: # (test.tt_6)
-master-bin.000001 # Write_rows # # table_id: #
-master-bin.000001 # Write_rows # # table_id: #
-master-bin.000001 # Write_rows # # table_id: #
-master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
-master-bin.000001 # Table_map # # table_id: # (test.tt_1)
-master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
-master-bin.000001 # Xid # # COMMIT /* XID */
--e-e-e-e-e-e-e-e-e-e-e- >> B T-func T C << -e-e-e-e-e-e-e-e-e-e-e-
-
-include/rpl_reconnect.inc
-START SLAVE;
-include/wait_for_slave_to_start.inc
-
-
-
-STOP SLAVE SQL_THREAD;
-include/wait_for_slave_sql_to_stop.inc
-SET GLOBAL debug="d,crash_after_commit_before_update_pos";;
-FAILURE d,crash_after_commit_before_update_pos and OUTCOME O2
-rpl_mixing_engines.inc [commands=B T-proc T C]
--b-b-b-b-b-b-b-b-b-b-b- >> B << -b-b-b-b-b-b-b-b-b-b-b-
-BEGIN;
-include/show_binlog_events.inc
--e-e-e-e-e-e-e-e-e-e-e- >> B << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> T-proc << -b-b-b-b-b-b-b-b-b-b-b-
-CALL pc_i_tt_5_suc (46, 2);
-include/show_binlog_events.inc
--e-e-e-e-e-e-e-e-e-e-e- >> T-proc << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> T << -b-b-b-b-b-b-b-b-b-b-b-
-INSERT INTO tt_1(trans_id, stmt_id) VALUES (46, 4);
-include/show_binlog_events.inc
--e-e-e-e-e-e-e-e-e-e-e- >> T << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> C << -b-b-b-b-b-b-b-b-b-b-b-
-COMMIT;
-include/show_binlog_events.inc
-Log_name Pos Event_type Server_id End_log_pos Info
-master-bin.000001 # Query # # BEGIN
-master-bin.000001 # Table_map # # table_id: # (test.tt_5)
-master-bin.000001 # Table_map # # table_id: # (test.tt_6)
-master-bin.000001 # Write_rows # # table_id: #
-master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
-master-bin.000001 # Table_map # # table_id: # (test.tt_5)
-master-bin.000001 # Table_map # # table_id: # (test.tt_6)
-master-bin.000001 # Write_rows # # table_id: #
-master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
-master-bin.000001 # Table_map # # table_id: # (test.tt_1)
-master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
-master-bin.000001 # Xid # # COMMIT /* XID */
--e-e-e-e-e-e-e-e-e-e-e- >> C << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> B T-proc T C << -b-b-b-b-b-b-b-b-b-b-b-
-include/show_binlog_events.inc
-Log_name Pos Event_type Server_id End_log_pos Info
-master-bin.000001 # Query # # BEGIN
-master-bin.000001 # Table_map # # table_id: # (test.tt_5)
-master-bin.000001 # Table_map # # table_id: # (test.tt_6)
-master-bin.000001 # Write_rows # # table_id: #
-master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
-master-bin.000001 # Table_map # # table_id: # (test.tt_5)
-master-bin.000001 # Table_map # # table_id: # (test.tt_6)
-master-bin.000001 # Write_rows # # table_id: #
-master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
-master-bin.000001 # Table_map # # table_id: # (test.tt_1)
-master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
-master-bin.000001 # Xid # # COMMIT /* XID */
--e-e-e-e-e-e-e-e-e-e-e- >> B T-proc T C << -e-e-e-e-e-e-e-e-e-e-e-
-
-include/rpl_reconnect.inc
-START SLAVE;
-include/wait_for_slave_to_start.inc
-###################################################################################
-# EXECUTE CASES CRASHING THE BEGIN/COMMIT
-###################################################################################
-
-
-
-STOP SLAVE SQL_THREAD;
-include/wait_for_slave_sql_to_stop.inc
-SET GLOBAL debug="d,crash_after_commit_and_update_pos";;
-FAILURE d,crash_after_commit_and_update_pos and OUTCOME O2
-rpl_mixing_engines.inc [commands=N]
--b-b-b-b-b-b-b-b-b-b-b- >> N << -b-b-b-b-b-b-b-b-b-b-b-
-INSERT INTO nt_1(trans_id, stmt_id) VALUES (47, 1);
-include/show_binlog_events.inc
-Log_name Pos Event_type Server_id End_log_pos Info
-master-bin.000001 # Query # # BEGIN
-master-bin.000001 # Table_map # # table_id: # (test.nt_1)
-master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
-master-bin.000001 # Query # # COMMIT
--e-e-e-e-e-e-e-e-e-e-e- >> N << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> N << -b-b-b-b-b-b-b-b-b-b-b-
-include/show_binlog_events.inc
-Log_name Pos Event_type Server_id End_log_pos Info
-master-bin.000001 # Query # # BEGIN
-master-bin.000001 # Table_map # # table_id: # (test.nt_1)
-master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
-master-bin.000001 # Query # # COMMIT
--e-e-e-e-e-e-e-e-e-e-e- >> N << -e-e-e-e-e-e-e-e-e-e-e-
-
-include/rpl_reconnect.inc
-START SLAVE;
-include/wait_for_slave_to_start.inc
-
-
-
-STOP SLAVE SQL_THREAD;
-include/wait_for_slave_sql_to_stop.inc
-SET GLOBAL debug="d,crash_after_commit_and_update_pos";;
-FAILURE d,crash_after_commit_and_update_pos and OUTCOME O2
-rpl_mixing_engines.inc [commands=N-trig]
--b-b-b-b-b-b-b-b-b-b-b- >> N-trig << -b-b-b-b-b-b-b-b-b-b-b-
-INSERT INTO nt_5(trans_id, stmt_id) VALUES (48, 1);
-include/show_binlog_events.inc
-Log_name Pos Event_type Server_id End_log_pos Info
-master-bin.000001 # Query # # BEGIN
-master-bin.000001 # Table_map # # table_id: # (test.nt_5)
-master-bin.000001 # Table_map # # table_id: # (test.nt_6)
-master-bin.000001 # Write_rows # # table_id: #
-master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
-master-bin.000001 # Query # # COMMIT
--e-e-e-e-e-e-e-e-e-e-e- >> N-trig << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> N-trig << -b-b-b-b-b-b-b-b-b-b-b-
-include/show_binlog_events.inc
-Log_name Pos Event_type Server_id End_log_pos Info
-master-bin.000001 # Query # # BEGIN
-master-bin.000001 # Table_map # # table_id: # (test.nt_5)
-master-bin.000001 # Table_map # # table_id: # (test.nt_6)
-master-bin.000001 # Write_rows # # table_id: #
-master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
-master-bin.000001 # Query # # COMMIT
--e-e-e-e-e-e-e-e-e-e-e- >> N-trig << -e-e-e-e-e-e-e-e-e-e-e-
-
-include/rpl_reconnect.inc
-START SLAVE;
-include/wait_for_slave_to_start.inc
-
-
-
-STOP SLAVE SQL_THREAD;
-include/wait_for_slave_sql_to_stop.inc
-SET GLOBAL debug="d,crash_after_commit_and_update_pos";;
-FAILURE d,crash_after_commit_and_update_pos and OUTCOME O2
-rpl_mixing_engines.inc [commands=N-func]
--b-b-b-b-b-b-b-b-b-b-b- >> N-func << -b-b-b-b-b-b-b-b-b-b-b-
-SELECT fc_i_nt_5_suc (49, 1);
-fc_i_nt_5_suc (49, 1)
-fc_i_nt_5_suc
-include/show_binlog_events.inc
-Log_name Pos Event_type Server_id End_log_pos Info
-master-bin.000001 # Query # # BEGIN
-master-bin.000001 # Table_map # # table_id: # (test.nt_5)
-master-bin.000001 # Table_map # # table_id: # (test.nt_6)
-master-bin.000001 # Write_rows # # table_id: #
-master-bin.000001 # Write_rows # # table_id: #
-master-bin.000001 # Write_rows # # table_id: #
-master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
-master-bin.000001 # Query # # COMMIT
--e-e-e-e-e-e-e-e-e-e-e- >> N-func << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> N-func << -b-b-b-b-b-b-b-b-b-b-b-
-include/show_binlog_events.inc
-Log_name Pos Event_type Server_id End_log_pos Info
-master-bin.000001 # Query # # BEGIN
-master-bin.000001 # Table_map # # table_id: # (test.nt_5)
-master-bin.000001 # Table_map # # table_id: # (test.nt_6)
-master-bin.000001 # Write_rows # # table_id: #
-master-bin.000001 # Write_rows # # table_id: #
-master-bin.000001 # Write_rows # # table_id: #
-master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
-master-bin.000001 # Query # # COMMIT
--e-e-e-e-e-e-e-e-e-e-e- >> N-func << -e-e-e-e-e-e-e-e-e-e-e-
-
-include/rpl_reconnect.inc
-START SLAVE;
-include/wait_for_slave_to_start.inc
-###################################################################################
-# CHECK CONSISTENCY
-###################################################################################
-include/sync_slave_sql_with_master.inc
-###################################################################################
-# CLEAN
-###################################################################################
-rpl_mixing_engines.inc [commands=clean]
-DROP TABLE tt_1;
-DROP TABLE tt_2;
-DROP TABLE tt_3;
-DROP TABLE tt_4;
-DROP TABLE tt_5;
-DROP TABLE tt_6;
-DROP TABLE nt_1;
-DROP TABLE nt_2;
-DROP TABLE nt_3;
-DROP TABLE nt_4;
-DROP TABLE nt_5;
-DROP TABLE nt_6;
-DROP PROCEDURE pc_i_tt_5_suc;
-DROP PROCEDURE pc_i_nt_5_suc;
-DROP FUNCTION fc_i_tt_5_suc;
-DROP FUNCTION fc_i_nt_5_suc;
-DROP FUNCTION fc_i_nt_3_tt_3_suc;
-include/rpl_end.inc
diff --git a/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_row_log.result b/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_row_log.result
index f283b3adf80..ab33725fa3f 100644
--- a/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_row_log.result
+++ b/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_row_log.result
@@ -1,13 +1,16 @@
include/master-slave.inc
[connection master]
+connection slave;
include/stop_slave.inc
include/wait_for_slave_to_stop.inc
reset master;
reset slave;
start slave;
include/wait_for_slave_to_start.inc
+connection slave;
set @save_slave_ddl_exec_mode=@@global.slave_ddl_exec_mode;
set @@global.slave_ddl_exec_mode=STRICT;
+connection master;
create table t1(n int not null auto_increment primary key)ENGINE=TokuDB;
insert into t1 values (NULL);
drop table t1;
@@ -21,6 +24,7 @@ Log_name Pos Event_type Server_id End_log_pos Info
master-bin.000001 # Gtid # # GTID #-#-#
master-bin.000001 # Query # # use `test`; create table t1(n int not null auto_increment primary key)ENGINE=TokuDB
master-bin.000001 # Gtid # # BEGIN GTID #-#-#
+master-bin.000001 # Annotate_rows # # insert into t1 values (NULL)
master-bin.000001 # Table_map # # table_id: # (test.t1)
master-bin.000001 # Write_rows_v1 # # table_id: # flags: STMT_END_F
master-bin.000001 # Xid # # COMMIT /* XID */
@@ -29,6 +33,7 @@ master-bin.000001 # Query # # use `test`; DROP TABLE `t1` /* generated by server
master-bin.000001 # Gtid # # GTID #-#-#
master-bin.000001 # Query # # use `test`; create table t1 (word char(20) not null)ENGINE=TokuDB
master-bin.000001 # Gtid # # BEGIN GTID #-#-#
+master-bin.000001 # Annotate_rows # # load data infile '../../std_data/words.dat' into table t1 ignore 1 lines
master-bin.000001 # Table_map # # table_id: # (test.t1)
master-bin.000001 # Write_rows_v1 # # table_id: # flags: STMT_END_F
master-bin.000001 # Xid # # COMMIT /* XID */
@@ -44,11 +49,13 @@ master-bin.000001 # Gtid # # BEGIN GTID #-#-#
include/show_binlog_events.inc
Log_name Pos Event_type Server_id End_log_pos Info
master-bin.000001 # Gtid # # BEGIN GTID #-#-#
+master-bin.000001 # Annotate_rows # # insert into t1 values (NULL)
master-bin.000001 # Table_map # # table_id: # (test.t1)
master-bin.000001 # Write_rows_v1 # # table_id: # flags: STMT_END_F
master-bin.000001 # Xid # # COMMIT /* XID */
flush logs;
create table t3 (a int)ENGINE=TokuDB;
+connection master;
select * from t1 order by 1 asc;
word
Aarhus
@@ -120,6 +127,7 @@ Aberdeen
Abernathy
aberrant
aberration
+connection slave;
select * from t1 order by 1 asc;
word
Aarhus
@@ -194,6 +202,7 @@ aberration
flush logs;
include/stop_slave.inc
include/start_slave.inc
+connection master;
create table t2 (n int)ENGINE=TokuDB;
insert into t2 values (1);
include/show_binlog_events.inc
@@ -201,6 +210,7 @@ Log_name Pos Event_type Server_id End_log_pos Info
master-bin.000001 # Gtid # # GTID #-#-#
master-bin.000001 # Query # # use `test`; create table t1(n int not null auto_increment primary key)ENGINE=TokuDB
master-bin.000001 # Gtid # # BEGIN GTID #-#-#
+master-bin.000001 # Annotate_rows # # insert into t1 values (NULL)
master-bin.000001 # Table_map # # table_id: # (test.t1)
master-bin.000001 # Write_rows_v1 # # table_id: # flags: STMT_END_F
master-bin.000001 # Xid # # COMMIT /* XID */
@@ -209,6 +219,7 @@ master-bin.000001 # Query # # use `test`; DROP TABLE `t1` /* generated by server
master-bin.000001 # Gtid # # GTID #-#-#
master-bin.000001 # Query # # use `test`; create table t1 (word char(20) not null)ENGINE=TokuDB
master-bin.000001 # Gtid # # BEGIN GTID #-#-#
+master-bin.000001 # Annotate_rows # # load data infile '../../std_data/words.dat' into table t1 ignore 1 lines
master-bin.000001 # Table_map # # table_id: # (test.t1)
master-bin.000001 # Write_rows_v1 # # table_id: # flags: STMT_END_F
master-bin.000001 # Xid # # COMMIT /* XID */
@@ -220,6 +231,7 @@ master-bin.000002 # Query # # use `test`; create table t3 (a int)ENGINE=TokuDB
master-bin.000002 # Gtid # # GTID #-#-#
master-bin.000002 # Query # # use `test`; create table t2 (n int)ENGINE=TokuDB
master-bin.000002 # Gtid # # BEGIN GTID #-#-#
+master-bin.000002 # Annotate_rows # # insert into t2 values (1)
master-bin.000002 # Table_map # # table_id: # (test.t2)
master-bin.000002 # Write_rows_v1 # # table_id: # flags: STMT_END_F
master-bin.000002 # Xid # # COMMIT /* XID */
@@ -227,6 +239,7 @@ show binary logs;
Log_name File_size
master-bin.000001 #
master-bin.000002 #
+connection slave;
show binary logs;
Log_name File_size
slave-bin.000001 #
@@ -236,6 +249,7 @@ Log_name Pos Event_type Server_id End_log_pos Info
slave-bin.000001 # Gtid # # GTID #-#-#
slave-bin.000001 # Query # # use `test`; create table t1(n int not null auto_increment primary key)ENGINE=TokuDB
slave-bin.000001 # Gtid # # BEGIN GTID #-#-#
+slave-bin.000001 # Annotate_rows # # insert into t1 values (NULL)
slave-bin.000001 # Table_map # # table_id: # (test.t1)
slave-bin.000001 # Write_rows_v1 # # table_id: # flags: STMT_END_F
slave-bin.000001 # Xid # # COMMIT /* XID */
@@ -244,6 +258,7 @@ slave-bin.000001 # Query # # use `test`; DROP TABLE `t1` /* generated by server
slave-bin.000001 # Gtid # # GTID #-#-#
slave-bin.000001 # Query # # use `test`; create table t1 (word char(20) not null)ENGINE=TokuDB
slave-bin.000001 # Gtid # # BEGIN GTID #-#-#
+slave-bin.000001 # Annotate_rows # # load data infile '../../std_data/words.dat' into table t1 ignore 1 lines
slave-bin.000001 # Table_map # # table_id: # (test.t1)
slave-bin.000001 # Write_rows_v1 # # table_id: # flags: STMT_END_F
slave-bin.000001 # Xid # # COMMIT /* XID */
@@ -255,16 +270,19 @@ Log_name Pos Event_type Server_id End_log_pos Info
slave-bin.000002 # Gtid # # GTID #-#-#
slave-bin.000002 # Query # # use `test`; create table t2 (n int)ENGINE=TokuDB
slave-bin.000002 # Gtid # # BEGIN GTID #-#-#
+slave-bin.000002 # Annotate_rows # # insert into t2 values (1)
slave-bin.000002 # Table_map # # table_id: # (test.t2)
slave-bin.000002 # Write_rows_v1 # # table_id: # flags: STMT_END_F
slave-bin.000002 # Xid # # COMMIT /* XID */
include/check_slave_is_running.inc
show binlog events in 'slave-bin.000005' from 4;
ERROR HY000: Error when executing command SHOW BINLOG EVENTS: Could not find target log
+connection master;
DROP TABLE t1;
DROP TABLE t2;
DROP TABLE t3;
include/rpl_reset.inc
+connection master;
create table t1(a int auto_increment primary key, b int);
insert into t1 values (NULL, 1);
set insert_id=5;
@@ -274,10 +292,12 @@ Log_name Pos Event_type Server_id End_log_pos Info
master-bin.000001 # Gtid # # GTID #-#-#
master-bin.000001 # Query # # use `test`; create table t1(a int auto_increment primary key, b int)
master-bin.000001 # Gtid # # BEGIN GTID #-#-#
+master-bin.000001 # Annotate_rows # # insert into t1 values (NULL, 1)
master-bin.000001 # Table_map # # table_id: # (test.t1)
master-bin.000001 # Write_rows_v1 # # table_id: # flags: STMT_END_F
master-bin.000001 # Query # # COMMIT
master-bin.000001 # Gtid # # BEGIN GTID #-#-#
+master-bin.000001 # Annotate_rows # # insert into t1 values (NULL, last_insert_id()), (NULL, last_insert_id())
master-bin.000001 # Table_map # # table_id: # (test.t1)
master-bin.000001 # Write_rows_v1 # # table_id: # flags: STMT_END_F
master-bin.000001 # Query # # COMMIT
@@ -287,5 +307,7 @@ a b
5 1
6 1
drop table t1;
+connection slave;
set @@global.slave_ddl_exec_mode=@save_slave_ddl_exec_mode;
+connection master;
include/rpl_end.inc
diff --git a/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_row_sp003.result b/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_row_sp003.result
index 4d9497386d6..c3345d52d06 100644
--- a/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_row_sp003.result
+++ b/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_row_sp003.result
@@ -1,5 +1,6 @@
include/master-slave.inc
[connection master]
+connection master;
DROP PROCEDURE IF EXISTS test.p1;
DROP PROCEDURE IF EXISTS test.p2;
DROP TABLE IF EXISTS test.t1;
@@ -17,22 +18,29 @@ END|
SELECT get_lock("test", 200);
get_lock("test", 200)
1
+connection master1;
CALL test.p1();
+connection master;
CALL test.p2();
SELECT release_lock("test");
release_lock("test")
1
+connection master1;
get_lock("test", 100)
1
SELECT release_lock("test");
release_lock("test")
1
+connection master;
SELECT * FROM test.t1;
a
5
+connection slave;
+connection slave;
SELECT * FROM test.t1;
a
5
+connection master;
DROP TABLE IF EXISTS test.t1;
CREATE TABLE test.t1(a INT,PRIMARY KEY(a))ENGINE=TOKUDB;
CALL test.p2();
@@ -45,10 +53,14 @@ release_lock("test")
SELECT * FROM test.t1;
a
8
+connection slave;
+connection slave;
SELECT * FROM test.t1;
a
8
+connection master;
DROP PROCEDURE IF EXISTS test.p1;
DROP PROCEDURE IF EXISTS test.p2;
DROP TABLE IF EXISTS test.t1;
+connection slave;
include/rpl_end.inc
diff --git a/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_row_sp006.result b/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_row_sp006.result
index c8da9ade375..33c6c266950 100644
--- a/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_row_sp006.result
+++ b/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_row_sp006.result
@@ -33,9 +33,14 @@ BEGIN
INSERT INTO t1 VALUES ('MySQL','1993-02-04'),('ROCKS', '1990-08-27'),('Texas', '1999-03-30'),('kyle','2005-1-1');
END|
CALL p2();
+connection slave;
+connection master;
CALL p1();
+connection slave;
+connection master;
DROP TABLE t1;
DROP TABLE t2;
DROP PROCEDURE p1;
DROP PROCEDURE p2;
+connection slave;
include/rpl_end.inc
diff --git a/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_row_trig004.result b/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_row_trig004.result
index a0573fac89e..4d7ab112bc8 100644
--- a/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_row_trig004.result
+++ b/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_row_trig004.result
@@ -1,5 +1,6 @@
include/master-slave.inc
[connection master]
+connection master;
DROP TRIGGER test.t1_bi_t2;
DROP TABLE IF EXISTS test.t1;
DROP TABLE IF EXISTS test.t2;
@@ -15,13 +16,17 @@ n d
select * from test.t2;
n f
1 0
+connection slave;
+connection slave;
select * from test.t1;
n d
1 1.234
select * from test.t2;
n f
1 0
+connection master;
DROP TRIGGER test.t1_bi_t2;
DROP TABLE test.t1;
DROP TABLE test.t2;
+connection slave;
include/rpl_end.inc
diff --git a/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_stm_log.result b/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_stm_log.result
index 0334000f12e..652ef18c039 100644
--- a/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_stm_log.result
+++ b/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_stm_log.result
@@ -1,13 +1,16 @@
include/master-slave.inc
[connection master]
+connection slave;
include/stop_slave.inc
include/wait_for_slave_to_stop.inc
reset master;
reset slave;
start slave;
include/wait_for_slave_to_start.inc
+connection slave;
set @save_slave_ddl_exec_mode=@@global.slave_ddl_exec_mode;
set @@global.slave_ddl_exec_mode=STRICT;
+connection master;
create table t1(n int not null auto_increment primary key)ENGINE=TokuDB;
insert into t1 values (NULL);
drop table t1;
@@ -47,8 +50,10 @@ master-bin.000001 # Gtid # # BEGIN GTID #-#-#
master-bin.000001 # Intvar # # INSERT_ID=1
master-bin.000001 # Query # # use `test`; insert into t1 values (NULL)
master-bin.000001 # Xid # # COMMIT /* XID */
+master-bin.000001 # Gtid # # GTID #-#-#
flush logs;
create table t3 (a int)ENGINE=TokuDB;
+connection master;
select * from t1 order by 1 asc;
word
Aarhus
@@ -120,6 +125,7 @@ Aberdeen
Abernathy
aberrant
aberration
+connection slave;
select * from t1 order by 1 asc;
word
Aarhus
@@ -194,6 +200,7 @@ aberration
flush logs;
include/stop_slave.inc
include/start_slave.inc
+connection master;
create table t2 (n int)ENGINE=TokuDB;
insert into t2 values (1);
include/show_binlog_events.inc
@@ -226,6 +233,7 @@ show binary logs;
Log_name File_size
master-bin.000001 #
master-bin.000002 #
+connection slave;
show binary logs;
Log_name File_size
slave-bin.000001 #
@@ -259,10 +267,12 @@ slave-bin.000002 # Xid # # COMMIT /* XID */
include/check_slave_is_running.inc
show binlog events in 'slave-bin.000005' from 4;
ERROR HY000: Error when executing command SHOW BINLOG EVENTS: Could not find target log
+connection master;
DROP TABLE t1;
DROP TABLE t2;
DROP TABLE t3;
include/rpl_reset.inc
+connection master;
create table t1(a int auto_increment primary key, b int);
insert into t1 values (NULL, 1);
set insert_id=5;
@@ -286,5 +296,7 @@ a b
5 1
6 1
drop table t1;
+connection slave;
set @@global.slave_ddl_exec_mode=@save_slave_ddl_exec_mode;
+connection master;
include/rpl_end.inc
diff --git a/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_stm_mixed_crash_safe.result b/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_stm_mixed_crash_safe.result
deleted file mode 100644
index 226a2b93140..00000000000
--- a/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_stm_mixed_crash_safe.result
+++ /dev/null
@@ -1,1773 +0,0 @@
-include/master-slave.inc
-Warnings:
-Note #### Sending passwords in plain text without SSL/TLS is extremely insecure.
-Note #### Storing MySQL user name or password information in the master info repository is not secure and is therefore not recommended. Please consider using the USER and PASSWORD connection options for START SLAVE; see the 'START SLAVE Syntax' in the MySQL Manual for more information.
-[connection master]
-call mtr.add_suppression('Attempting backtrace');
-call mtr.add_suppression("Recovery from master pos .* and file master-bin.000001");
-call mtr.add_suppression("Unsafe statement written to the binary log using statement format since BINLOG_FORMAT = STATEMENT");
-call mtr.add_suppression(".* InnoDB: Warning: allocated tablespace .*, old maximum was .*");
-###################################################################################
-# PREPARE EXECUTION
-###################################################################################
-include/stop_slave.inc
-SHOW CREATE TABLE mysql.slave_relay_log_info;
-Table Create Table
-slave_relay_log_info CREATE TABLE `slave_relay_log_info` (
- `Number_of_lines` int(10) unsigned NOT NULL COMMENT 'Number of lines in the file or rows in the table. Used to version table definitions.',
- `Relay_log_name` text CHARACTER SET utf8 COLLATE utf8_bin NOT NULL COMMENT 'The name of the current relay log file.',
- `Relay_log_pos` bigint(20) unsigned NOT NULL COMMENT 'The relay log position of the last executed event.',
- `Master_log_name` text CHARACTER SET utf8 COLLATE utf8_bin NOT NULL COMMENT 'The name of the master binary log file from which the events in the relay log file were read.',
- `Master_log_pos` bigint(20) unsigned NOT NULL COMMENT 'The master log position of the last executed event.',
- `Sql_delay` int(11) NOT NULL COMMENT 'The number of seconds that the slave must lag behind the master.',
- `Number_of_workers` int(10) unsigned NOT NULL,
- `Id` int(10) unsigned NOT NULL COMMENT 'Internal Id that uniquely identifies this record.',
- PRIMARY KEY (`Id`)
-) ENGINE=InnoDB DEFAULT CHARSET=utf8 STATS_PERSISTENT=0 COMMENT='Relay Log Information'
-SHOW CREATE TABLE mysql.slave_worker_info;
-Table Create Table
-slave_worker_info CREATE TABLE `slave_worker_info` (
- `Id` int(10) unsigned NOT NULL,
- `Relay_log_name` text CHARACTER SET utf8 COLLATE utf8_bin NOT NULL,
- `Relay_log_pos` bigint(20) unsigned NOT NULL,
- `Master_log_name` text CHARACTER SET utf8 COLLATE utf8_bin NOT NULL,
- `Master_log_pos` bigint(20) unsigned NOT NULL,
- `Checkpoint_relay_log_name` text CHARACTER SET utf8 COLLATE utf8_bin NOT NULL,
- `Checkpoint_relay_log_pos` bigint(20) unsigned NOT NULL,
- `Checkpoint_master_log_name` text CHARACTER SET utf8 COLLATE utf8_bin NOT NULL,
- `Checkpoint_master_log_pos` bigint(20) unsigned NOT NULL,
- `Checkpoint_seqno` int(10) unsigned NOT NULL,
- `Checkpoint_group_size` int(10) unsigned NOT NULL,
- `Checkpoint_group_bitmap` blob NOT NULL,
- PRIMARY KEY (`Id`)
-) ENGINE=InnoDB DEFAULT CHARSET=utf8 STATS_PERSISTENT=0 COMMENT='Worker Information'
-ALTER TABLE mysql.slave_relay_log_info ENGINE= Innodb;
-ALTER TABLE mysql.slave_worker_info ENGINE= Innodb;
-SHOW CREATE TABLE mysql.slave_relay_log_info;
-Table Create Table
-slave_relay_log_info CREATE TABLE `slave_relay_log_info` (
- `Number_of_lines` int(10) unsigned NOT NULL COMMENT 'Number of lines in the file or rows in the table. Used to version table definitions.',
- `Relay_log_name` text CHARACTER SET utf8 COLLATE utf8_bin NOT NULL COMMENT 'The name of the current relay log file.',
- `Relay_log_pos` bigint(20) unsigned NOT NULL COMMENT 'The relay log position of the last executed event.',
- `Master_log_name` text CHARACTER SET utf8 COLLATE utf8_bin NOT NULL COMMENT 'The name of the master binary log file from which the events in the relay log file were read.',
- `Master_log_pos` bigint(20) unsigned NOT NULL COMMENT 'The master log position of the last executed event.',
- `Sql_delay` int(11) NOT NULL COMMENT 'The number of seconds that the slave must lag behind the master.',
- `Number_of_workers` int(10) unsigned NOT NULL,
- `Id` int(10) unsigned NOT NULL COMMENT 'Internal Id that uniquely identifies this record.',
- PRIMARY KEY (`Id`)
-) ENGINE=InnoDB DEFAULT CHARSET=utf8 STATS_PERSISTENT=0 COMMENT='Relay Log Information'
-SHOW CREATE TABLE mysql.slave_worker_info;
-Table Create Table
-slave_worker_info CREATE TABLE `slave_worker_info` (
- `Id` int(10) unsigned NOT NULL,
- `Relay_log_name` text CHARACTER SET utf8 COLLATE utf8_bin NOT NULL,
- `Relay_log_pos` bigint(20) unsigned NOT NULL,
- `Master_log_name` text CHARACTER SET utf8 COLLATE utf8_bin NOT NULL,
- `Master_log_pos` bigint(20) unsigned NOT NULL,
- `Checkpoint_relay_log_name` text CHARACTER SET utf8 COLLATE utf8_bin NOT NULL,
- `Checkpoint_relay_log_pos` bigint(20) unsigned NOT NULL,
- `Checkpoint_master_log_name` text CHARACTER SET utf8 COLLATE utf8_bin NOT NULL,
- `Checkpoint_master_log_pos` bigint(20) unsigned NOT NULL,
- `Checkpoint_seqno` int(10) unsigned NOT NULL,
- `Checkpoint_group_size` int(10) unsigned NOT NULL,
- `Checkpoint_group_bitmap` blob NOT NULL,
- PRIMARY KEY (`Id`)
-) ENGINE=InnoDB DEFAULT CHARSET=utf8 STATS_PERSISTENT=0 COMMENT='Worker Information'
-include/start_slave.inc
-rpl_mixing_engines.inc [commands=configure]
-CREATE TABLE nt_1 (trans_id INT, stmt_id INT, info VARCHAR(64), PRIMARY KEY(trans_id, stmt_id)) ENGINE = MyISAM;
-CREATE TABLE nt_2 (trans_id INT, stmt_id INT, info VARCHAR(64), PRIMARY KEY(trans_id, stmt_id)) ENGINE = MyISAM;
-CREATE TABLE nt_3 (trans_id INT, stmt_id INT, info VARCHAR(64), PRIMARY KEY(trans_id, stmt_id)) ENGINE = MyISAM;
-CREATE TABLE nt_4 (trans_id INT, stmt_id INT, info VARCHAR(64), PRIMARY KEY(trans_id, stmt_id)) ENGINE = MyISAM;
-CREATE TABLE nt_5 (trans_id INT, stmt_id INT, info VARCHAR(64), PRIMARY KEY(trans_id, stmt_id)) ENGINE = MyISAM;
-CREATE TABLE nt_6 (trans_id INT, stmt_id INT, info VARCHAR(64), PRIMARY KEY(trans_id, stmt_id)) ENGINE = MyISAM;
-CREATE TABLE tt_1 (trans_id INT, stmt_id INT, info VARCHAR(64), PRIMARY KEY(trans_id, stmt_id)) ENGINE = TokuDB;
-CREATE TABLE tt_2 (trans_id INT, stmt_id INT, info VARCHAR(64), PRIMARY KEY(trans_id, stmt_id)) ENGINE = TokuDB;
-CREATE TABLE tt_3 (trans_id INT, stmt_id INT, info VARCHAR(64), PRIMARY KEY(trans_id, stmt_id)) ENGINE = TokuDB;
-CREATE TABLE tt_4 (trans_id INT, stmt_id INT, info VARCHAR(64), PRIMARY KEY(trans_id, stmt_id)) ENGINE = TokuDB;
-CREATE TABLE tt_5 (trans_id INT, stmt_id INT, info VARCHAR(64), PRIMARY KEY(trans_id, stmt_id)) ENGINE = TokuDB;
-CREATE TABLE tt_6 (trans_id INT, stmt_id INT, info VARCHAR(64), PRIMARY KEY(trans_id, stmt_id)) ENGINE = TokuDB;
-INSERT INTO nt_1(trans_id, stmt_id) VALUES(1,1);
-INSERT INTO nt_2(trans_id, stmt_id) VALUES(1,1);
-INSERT INTO nt_3(trans_id, stmt_id) VALUES(1,1);
-INSERT INTO nt_4(trans_id, stmt_id) VALUES(1,1);
-INSERT INTO nt_5(trans_id, stmt_id) VALUES(1,1);
-INSERT INTO nt_6(trans_id, stmt_id) VALUES(1,1);
-INSERT INTO tt_1(trans_id, stmt_id) VALUES(1,1);
-INSERT INTO tt_2(trans_id, stmt_id) VALUES(1,1);
-INSERT INTO tt_3(trans_id, stmt_id) VALUES(1,1);
-INSERT INTO tt_4(trans_id, stmt_id) VALUES(1,1);
-INSERT INTO tt_5(trans_id, stmt_id) VALUES(1,1);
-INSERT INTO tt_6(trans_id, stmt_id) VALUES(1,1);
-CREATE PROCEDURE pc_i_tt_5_suc (IN p_trans_id INTEGER, IN p_stmt_id INTEGER)
-BEGIN
-DECLARE in_stmt_id INTEGER;
-SELECT max(stmt_id) INTO in_stmt_id FROM tt_5 WHERE trans_id= p_trans_id;
-SELECT COALESCE(greatest(in_stmt_id + 1, p_stmt_id), 1) INTO in_stmt_id;
-INSERT INTO tt_5(trans_id, stmt_id) VALUES (p_trans_id, in_stmt_id);
-INSERT INTO tt_5(trans_id, stmt_id) VALUES (p_trans_id, in_stmt_id + 1);
-END|
-CREATE PROCEDURE pc_i_nt_5_suc (IN p_trans_id INTEGER, IN p_stmt_id INTEGER)
-BEGIN
-DECLARE in_stmt_id INTEGER;
-SELECT max(stmt_id) INTO in_stmt_id FROM nt_5 WHERE trans_id= p_trans_id;
-SELECT COALESCE(greatest(in_stmt_id + 1, p_stmt_id), 1) INTO in_stmt_id;
-INSERT INTO nt_5(trans_id, stmt_id) VALUES (p_trans_id, in_stmt_id);
-INSERT INTO nt_5(trans_id, stmt_id) VALUES (p_trans_id, in_stmt_id + 1);
-END|
-CREATE FUNCTION fc_i_tt_5_suc (p_trans_id INTEGER, p_stmt_id INTEGER) RETURNS VARCHAR(64)
-BEGIN
-DECLARE in_stmt_id INTEGER;
-SELECT max(stmt_id) INTO in_stmt_id FROM tt_5 WHERE trans_id= p_trans_id;
-SELECT COALESCE(greatest(in_stmt_id + 1, p_stmt_id), 1) INTO in_stmt_id;
-INSERT INTO tt_5(trans_id, stmt_id) VALUES (p_trans_id, in_stmt_id);
-INSERT INTO tt_5(trans_id, stmt_id) VALUES (p_trans_id, in_stmt_id + 1);
-RETURN "fc_i_tt_5_suc";
-END|
-CREATE FUNCTION fc_i_nt_5_suc (p_trans_id INTEGER, p_stmt_id INTEGER) RETURNS VARCHAR(64)
-BEGIN
-DECLARE in_stmt_id INTEGER;
-SELECT max(stmt_id) INTO in_stmt_id FROM nt_5 WHERE trans_id= p_trans_id;
-SELECT COALESCE(greatest(in_stmt_id + 1, p_stmt_id), 1) INTO in_stmt_id;
-INSERT INTO nt_5(trans_id, stmt_id) VALUES (p_trans_id, in_stmt_id);
-INSERT INTO nt_5(trans_id, stmt_id) VALUES (p_trans_id, in_stmt_id + 1);
-RETURN "fc_i_nt_5_suc";
-END|
-CREATE FUNCTION fc_i_nt_3_tt_3_suc (p_trans_id INTEGER, p_stmt_id INTEGER) RETURNS VARCHAR(64)
-BEGIN
-DECLARE in_stmt_id INTEGER;
-SELECT max(stmt_id) INTO in_stmt_id FROM nt_3 WHERE trans_id= p_trans_id;
-SELECT COALESCE(greatest(in_stmt_id + 1, p_stmt_id), 1) INTO in_stmt_id;
-INSERT INTO nt_3(trans_id, stmt_id) VALUES (p_trans_id, in_stmt_id);
-SELECT max(stmt_id) INTO in_stmt_id FROM tt_3 WHERE trans_id= p_trans_id;
-SELECT COALESCE(greatest(in_stmt_id + 1, p_stmt_id), 1) INTO in_stmt_id;
-INSERT INTO tt_3(trans_id, stmt_id) VALUES (p_trans_id, in_stmt_id);
-RETURN "fc_i_nt_3_tt_3_suc";
-END|
-CREATE TRIGGER tr_i_tt_3_to_nt_3 AFTER INSERT ON tt_3 FOR EACH ROW
-BEGIN
-DECLARE in_stmt_id INTEGER;
-SELECT max(stmt_id) INTO in_stmt_id FROM nt_3 WHERE trans_id= NEW.trans_id;
-SELECT COALESCE(greatest(in_stmt_id + 1, NEW.stmt_id), 1) INTO in_stmt_id;
-INSERT INTO nt_3(trans_id, stmt_id) VALUES (NEW.trans_id, in_stmt_id);
-INSERT INTO nt_3(trans_id, stmt_id) VALUES (NEW.trans_id, in_stmt_id + 1);
-END|
-CREATE TRIGGER tr_i_nt_4_to_tt_4 AFTER INSERT ON nt_4 FOR EACH ROW
-BEGIN
-DECLARE in_stmt_id INTEGER;
-SELECT max(stmt_id) INTO in_stmt_id FROM tt_4 WHERE trans_id= NEW.trans_id;
-SELECT COALESCE(greatest(in_stmt_id + 1, NEW.stmt_id), 1) INTO in_stmt_id;
-INSERT INTO tt_4(trans_id, stmt_id) VALUES (NEW.trans_id, in_stmt_id);
-INSERT INTO tt_4(trans_id, stmt_id) VALUES (NEW.trans_id, in_stmt_id + 1);
-END|
-CREATE TRIGGER tr_i_tt_5_to_tt_6 AFTER INSERT ON tt_5 FOR EACH ROW
-BEGIN
-DECLARE in_stmt_id INTEGER;
-SELECT max(stmt_id) INTO in_stmt_id FROM tt_6 WHERE trans_id= NEW.trans_id;
-SELECT COALESCE(greatest(in_stmt_id + 1, NEW.stmt_id, 1), 1) INTO in_stmt_id;
-INSERT INTO tt_6(trans_id, stmt_id) VALUES (NEW.trans_id, in_stmt_id);
-INSERT INTO tt_6(trans_id, stmt_id) VALUES (NEW.trans_id, in_stmt_id + 1);
-END|
-CREATE TRIGGER tr_i_nt_5_to_nt_6 AFTER INSERT ON nt_5 FOR EACH ROW
-BEGIN
-DECLARE in_stmt_id INTEGER;
-SELECT max(stmt_id) INTO in_stmt_id FROM nt_6 WHERE trans_id= NEW.trans_id;
-SELECT COALESCE(greatest(in_stmt_id + 1, NEW.stmt_id), 1) INTO in_stmt_id;
-INSERT INTO nt_6(trans_id, stmt_id) VALUES (NEW.trans_id, in_stmt_id);
-INSERT INTO nt_6(trans_id, stmt_id) VALUES (NEW.trans_id, in_stmt_id + 1);
-END|
-###################################################################################
-# EXECUTE CASES CRASHING THE XID
-###################################################################################
-
-
-
-STOP SLAVE SQL_THREAD;
-include/wait_for_slave_sql_to_stop.inc
-SET GLOBAL debug="d,crash_after_apply";;
-FAILURE d,crash_after_apply and OUTCOME O2
-rpl_mixing_engines.inc [commands=T]
--b-b-b-b-b-b-b-b-b-b-b- >> T << -b-b-b-b-b-b-b-b-b-b-b-
-INSERT INTO tt_1(trans_id, stmt_id) VALUES (7, 1);
-include/show_binlog_events.inc
-Log_name Pos Event_type Server_id End_log_pos Info
-master-bin.000001 # Query # # BEGIN
-master-bin.000001 # Query # # use `test`; INSERT INTO tt_1(trans_id, stmt_id) VALUES (7, 1)
-master-bin.000001 # Xid # # COMMIT /* XID */
--e-e-e-e-e-e-e-e-e-e-e- >> T << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> T << -b-b-b-b-b-b-b-b-b-b-b-
-include/show_binlog_events.inc
-Log_name Pos Event_type Server_id End_log_pos Info
-master-bin.000001 # Query # # BEGIN
-master-bin.000001 # Query # # use `test`; INSERT INTO tt_1(trans_id, stmt_id) VALUES (7, 1)
-master-bin.000001 # Xid # # COMMIT /* XID */
--e-e-e-e-e-e-e-e-e-e-e- >> T << -e-e-e-e-e-e-e-e-e-e-e-
-
-include/rpl_reconnect.inc
-START SLAVE;
-include/wait_for_slave_to_start.inc
-
-
-
-STOP SLAVE SQL_THREAD;
-include/wait_for_slave_sql_to_stop.inc
-SET GLOBAL debug="d,crash_after_apply";;
-FAILURE d,crash_after_apply and OUTCOME O2
-rpl_mixing_engines.inc [commands=T-trig]
--b-b-b-b-b-b-b-b-b-b-b- >> T-trig << -b-b-b-b-b-b-b-b-b-b-b-
-INSERT INTO tt_5(trans_id, stmt_id) VALUES (8, 1);
-include/show_binlog_events.inc
-Log_name Pos Event_type Server_id End_log_pos Info
-master-bin.000001 # Query # # BEGIN
-master-bin.000001 # Query # # use `test`; INSERT INTO tt_5(trans_id, stmt_id) VALUES (8, 1)
-master-bin.000001 # Xid # # COMMIT /* XID */
--e-e-e-e-e-e-e-e-e-e-e- >> T-trig << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> T-trig << -b-b-b-b-b-b-b-b-b-b-b-
-include/show_binlog_events.inc
-Log_name Pos Event_type Server_id End_log_pos Info
-master-bin.000001 # Query # # BEGIN
-master-bin.000001 # Query # # use `test`; INSERT INTO tt_5(trans_id, stmt_id) VALUES (8, 1)
-master-bin.000001 # Xid # # COMMIT /* XID */
--e-e-e-e-e-e-e-e-e-e-e- >> T-trig << -e-e-e-e-e-e-e-e-e-e-e-
-
-include/rpl_reconnect.inc
-START SLAVE;
-include/wait_for_slave_to_start.inc
-
-
-
-STOP SLAVE SQL_THREAD;
-include/wait_for_slave_sql_to_stop.inc
-SET GLOBAL debug="d,crash_after_apply";;
-FAILURE d,crash_after_apply and OUTCOME O2
-rpl_mixing_engines.inc [commands=T-func]
--b-b-b-b-b-b-b-b-b-b-b- >> T-func << -b-b-b-b-b-b-b-b-b-b-b-
-SELECT fc_i_tt_5_suc (9, 1);
-fc_i_tt_5_suc (9, 1)
-fc_i_tt_5_suc
-include/show_binlog_events.inc
-Log_name Pos Event_type Server_id End_log_pos Info
-master-bin.000001 # Query # # BEGIN
-master-bin.000001 # Query # # use `test`; SELECT `test`.`fc_i_tt_5_suc`(9,1)
-master-bin.000001 # Xid # # COMMIT /* XID */
--e-e-e-e-e-e-e-e-e-e-e- >> T-func << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> T-func << -b-b-b-b-b-b-b-b-b-b-b-
-include/show_binlog_events.inc
-Log_name Pos Event_type Server_id End_log_pos Info
-master-bin.000001 # Query # # BEGIN
-master-bin.000001 # Query # # use `test`; SELECT `test`.`fc_i_tt_5_suc`(9,1)
-master-bin.000001 # Xid # # COMMIT /* XID */
--e-e-e-e-e-e-e-e-e-e-e- >> T-func << -e-e-e-e-e-e-e-e-e-e-e-
-
-include/rpl_reconnect.inc
-START SLAVE;
-include/wait_for_slave_to_start.inc
-
-
-
-STOP SLAVE SQL_THREAD;
-include/wait_for_slave_sql_to_stop.inc
-SET GLOBAL debug="d,crash_after_apply";;
-FAILURE d,crash_after_apply and OUTCOME O2
-rpl_mixing_engines.inc [commands=T-proc]
--b-b-b-b-b-b-b-b-b-b-b- >> T-proc << -b-b-b-b-b-b-b-b-b-b-b-
-CALL pc_i_tt_5_suc (10, 1);
-include/show_binlog_events.inc
-Log_name Pos Event_type Server_id End_log_pos Info
-master-bin.000001 # Query # # BEGIN
-master-bin.000001 # Query # # use `test`; INSERT INTO tt_5(trans_id, stmt_id) VALUES ( NAME_CONST('p_trans_id',10), NAME_CONST('in_stmt_id',1))
-master-bin.000001 # Xid # # COMMIT /* XID */
-master-bin.000001 # Query # # BEGIN
-master-bin.000001 # Query # # use `test`; INSERT INTO tt_5(trans_id, stmt_id) VALUES ( NAME_CONST('p_trans_id',10), NAME_CONST('in_stmt_id',1) + 1)
-master-bin.000001 # Xid # # COMMIT /* XID */
--e-e-e-e-e-e-e-e-e-e-e- >> T-proc << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> T-proc << -b-b-b-b-b-b-b-b-b-b-b-
-include/show_binlog_events.inc
-Log_name Pos Event_type Server_id End_log_pos Info
-master-bin.000001 # Query # # BEGIN
-master-bin.000001 # Query # # use `test`; INSERT INTO tt_5(trans_id, stmt_id) VALUES ( NAME_CONST('p_trans_id',10), NAME_CONST('in_stmt_id',1))
-master-bin.000001 # Xid # # COMMIT /* XID */
-master-bin.000001 # Query # # BEGIN
-master-bin.000001 # Query # # use `test`; INSERT INTO tt_5(trans_id, stmt_id) VALUES ( NAME_CONST('p_trans_id',10), NAME_CONST('in_stmt_id',1) + 1)
-master-bin.000001 # Xid # # COMMIT /* XID */
--e-e-e-e-e-e-e-e-e-e-e- >> T-proc << -e-e-e-e-e-e-e-e-e-e-e-
-
-include/rpl_reconnect.inc
-START SLAVE;
-include/wait_for_slave_to_start.inc
-
-
-
-STOP SLAVE SQL_THREAD;
-include/wait_for_slave_sql_to_stop.inc
-SET GLOBAL debug="d,crash_after_apply";;
-FAILURE d,crash_after_apply and OUTCOME O2
-rpl_mixing_engines.inc [commands=B T T-trig C]
--b-b-b-b-b-b-b-b-b-b-b- >> B << -b-b-b-b-b-b-b-b-b-b-b-
-BEGIN;
-include/show_binlog_events.inc
--e-e-e-e-e-e-e-e-e-e-e- >> B << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> T << -b-b-b-b-b-b-b-b-b-b-b-
-INSERT INTO tt_1(trans_id, stmt_id) VALUES (11, 2);
-include/show_binlog_events.inc
--e-e-e-e-e-e-e-e-e-e-e- >> T << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> T-trig << -b-b-b-b-b-b-b-b-b-b-b-
-INSERT INTO tt_5(trans_id, stmt_id) VALUES (11, 4);
-include/show_binlog_events.inc
--e-e-e-e-e-e-e-e-e-e-e- >> T-trig << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> C << -b-b-b-b-b-b-b-b-b-b-b-
-COMMIT;
-include/show_binlog_events.inc
-Log_name Pos Event_type Server_id End_log_pos Info
-master-bin.000001 # Query # # BEGIN
-master-bin.000001 # Query # # use `test`; INSERT INTO tt_1(trans_id, stmt_id) VALUES (11, 2)
-master-bin.000001 # Query # # use `test`; INSERT INTO tt_5(trans_id, stmt_id) VALUES (11, 4)
-master-bin.000001 # Xid # # COMMIT /* XID */
--e-e-e-e-e-e-e-e-e-e-e- >> C << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> B T T-trig C << -b-b-b-b-b-b-b-b-b-b-b-
-include/show_binlog_events.inc
-Log_name Pos Event_type Server_id End_log_pos Info
-master-bin.000001 # Query # # BEGIN
-master-bin.000001 # Query # # use `test`; INSERT INTO tt_1(trans_id, stmt_id) VALUES (11, 2)
-master-bin.000001 # Query # # use `test`; INSERT INTO tt_5(trans_id, stmt_id) VALUES (11, 4)
-master-bin.000001 # Xid # # COMMIT /* XID */
--e-e-e-e-e-e-e-e-e-e-e- >> B T T-trig C << -e-e-e-e-e-e-e-e-e-e-e-
-
-include/rpl_reconnect.inc
-START SLAVE;
-include/wait_for_slave_to_start.inc
-
-
-
-STOP SLAVE SQL_THREAD;
-include/wait_for_slave_sql_to_stop.inc
-SET GLOBAL debug="d,crash_after_apply";;
-FAILURE d,crash_after_apply and OUTCOME O2
-rpl_mixing_engines.inc [commands=B T T-func C]
--b-b-b-b-b-b-b-b-b-b-b- >> B << -b-b-b-b-b-b-b-b-b-b-b-
-BEGIN;
-include/show_binlog_events.inc
--e-e-e-e-e-e-e-e-e-e-e- >> B << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> T << -b-b-b-b-b-b-b-b-b-b-b-
-INSERT INTO tt_1(trans_id, stmt_id) VALUES (12, 2);
-include/show_binlog_events.inc
--e-e-e-e-e-e-e-e-e-e-e- >> T << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> T-func << -b-b-b-b-b-b-b-b-b-b-b-
-SELECT fc_i_tt_5_suc (12, 4);
-fc_i_tt_5_suc (12, 4)
-fc_i_tt_5_suc
-include/show_binlog_events.inc
--e-e-e-e-e-e-e-e-e-e-e- >> T-func << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> C << -b-b-b-b-b-b-b-b-b-b-b-
-COMMIT;
-include/show_binlog_events.inc
-Log_name Pos Event_type Server_id End_log_pos Info
-master-bin.000001 # Query # # BEGIN
-master-bin.000001 # Query # # use `test`; INSERT INTO tt_1(trans_id, stmt_id) VALUES (12, 2)
-master-bin.000001 # Query # # use `test`; SELECT `test`.`fc_i_tt_5_suc`(12,4)
-master-bin.000001 # Xid # # COMMIT /* XID */
--e-e-e-e-e-e-e-e-e-e-e- >> C << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> B T T-func C << -b-b-b-b-b-b-b-b-b-b-b-
-include/show_binlog_events.inc
-Log_name Pos Event_type Server_id End_log_pos Info
-master-bin.000001 # Query # # BEGIN
-master-bin.000001 # Query # # use `test`; INSERT INTO tt_1(trans_id, stmt_id) VALUES (12, 2)
-master-bin.000001 # Query # # use `test`; SELECT `test`.`fc_i_tt_5_suc`(12,4)
-master-bin.000001 # Xid # # COMMIT /* XID */
--e-e-e-e-e-e-e-e-e-e-e- >> B T T-func C << -e-e-e-e-e-e-e-e-e-e-e-
-
-include/rpl_reconnect.inc
-START SLAVE;
-include/wait_for_slave_to_start.inc
-
-
-
-STOP SLAVE SQL_THREAD;
-include/wait_for_slave_sql_to_stop.inc
-SET GLOBAL debug="d,crash_after_apply";;
-FAILURE d,crash_after_apply and OUTCOME O2
-rpl_mixing_engines.inc [commands=B T T-proc C]
--b-b-b-b-b-b-b-b-b-b-b- >> B << -b-b-b-b-b-b-b-b-b-b-b-
-BEGIN;
-include/show_binlog_events.inc
--e-e-e-e-e-e-e-e-e-e-e- >> B << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> T << -b-b-b-b-b-b-b-b-b-b-b-
-INSERT INTO tt_1(trans_id, stmt_id) VALUES (13, 2);
-include/show_binlog_events.inc
--e-e-e-e-e-e-e-e-e-e-e- >> T << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> T-proc << -b-b-b-b-b-b-b-b-b-b-b-
-CALL pc_i_tt_5_suc (13, 4);
-include/show_binlog_events.inc
--e-e-e-e-e-e-e-e-e-e-e- >> T-proc << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> C << -b-b-b-b-b-b-b-b-b-b-b-
-COMMIT;
-include/show_binlog_events.inc
-Log_name Pos Event_type Server_id End_log_pos Info
-master-bin.000001 # Query # # BEGIN
-master-bin.000001 # Query # # use `test`; INSERT INTO tt_1(trans_id, stmt_id) VALUES (13, 2)
-master-bin.000001 # Query # # use `test`; INSERT INTO tt_5(trans_id, stmt_id) VALUES ( NAME_CONST('p_trans_id',13), NAME_CONST('in_stmt_id',1))
-master-bin.000001 # Query # # use `test`; INSERT INTO tt_5(trans_id, stmt_id) VALUES ( NAME_CONST('p_trans_id',13), NAME_CONST('in_stmt_id',1) + 1)
-master-bin.000001 # Xid # # COMMIT /* XID */
--e-e-e-e-e-e-e-e-e-e-e- >> C << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> B T T-proc C << -b-b-b-b-b-b-b-b-b-b-b-
-include/show_binlog_events.inc
-Log_name Pos Event_type Server_id End_log_pos Info
-master-bin.000001 # Query # # BEGIN
-master-bin.000001 # Query # # use `test`; INSERT INTO tt_1(trans_id, stmt_id) VALUES (13, 2)
-master-bin.000001 # Query # # use `test`; INSERT INTO tt_5(trans_id, stmt_id) VALUES ( NAME_CONST('p_trans_id',13), NAME_CONST('in_stmt_id',1))
-master-bin.000001 # Query # # use `test`; INSERT INTO tt_5(trans_id, stmt_id) VALUES ( NAME_CONST('p_trans_id',13), NAME_CONST('in_stmt_id',1) + 1)
-master-bin.000001 # Xid # # COMMIT /* XID */
--e-e-e-e-e-e-e-e-e-e-e- >> B T T-proc C << -e-e-e-e-e-e-e-e-e-e-e-
-
-include/rpl_reconnect.inc
-START SLAVE;
-include/wait_for_slave_to_start.inc
-
-
-
-STOP SLAVE SQL_THREAD;
-include/wait_for_slave_sql_to_stop.inc
-SET GLOBAL debug="d,crash_after_apply";;
-FAILURE d,crash_after_apply and OUTCOME O2
-rpl_mixing_engines.inc [commands=B T-trig T C]
--b-b-b-b-b-b-b-b-b-b-b- >> B << -b-b-b-b-b-b-b-b-b-b-b-
-BEGIN;
-include/show_binlog_events.inc
--e-e-e-e-e-e-e-e-e-e-e- >> B << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> T-trig << -b-b-b-b-b-b-b-b-b-b-b-
-INSERT INTO tt_5(trans_id, stmt_id) VALUES (14, 2);
-include/show_binlog_events.inc
--e-e-e-e-e-e-e-e-e-e-e- >> T-trig << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> T << -b-b-b-b-b-b-b-b-b-b-b-
-INSERT INTO tt_1(trans_id, stmt_id) VALUES (14, 4);
-include/show_binlog_events.inc
--e-e-e-e-e-e-e-e-e-e-e- >> T << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> C << -b-b-b-b-b-b-b-b-b-b-b-
-COMMIT;
-include/show_binlog_events.inc
-Log_name Pos Event_type Server_id End_log_pos Info
-master-bin.000001 # Query # # BEGIN
-master-bin.000001 # Query # # use `test`; INSERT INTO tt_5(trans_id, stmt_id) VALUES (14, 2)
-master-bin.000001 # Query # # use `test`; INSERT INTO tt_1(trans_id, stmt_id) VALUES (14, 4)
-master-bin.000001 # Xid # # COMMIT /* XID */
--e-e-e-e-e-e-e-e-e-e-e- >> C << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> B T-trig T C << -b-b-b-b-b-b-b-b-b-b-b-
-include/show_binlog_events.inc
-Log_name Pos Event_type Server_id End_log_pos Info
-master-bin.000001 # Query # # BEGIN
-master-bin.000001 # Query # # use `test`; INSERT INTO tt_5(trans_id, stmt_id) VALUES (14, 2)
-master-bin.000001 # Query # # use `test`; INSERT INTO tt_1(trans_id, stmt_id) VALUES (14, 4)
-master-bin.000001 # Xid # # COMMIT /* XID */
--e-e-e-e-e-e-e-e-e-e-e- >> B T-trig T C << -e-e-e-e-e-e-e-e-e-e-e-
-
-include/rpl_reconnect.inc
-START SLAVE;
-include/wait_for_slave_to_start.inc
-
-
-
-STOP SLAVE SQL_THREAD;
-include/wait_for_slave_sql_to_stop.inc
-SET GLOBAL debug="d,crash_after_apply";;
-FAILURE d,crash_after_apply and OUTCOME O2
-rpl_mixing_engines.inc [commands=B T-func T C]
--b-b-b-b-b-b-b-b-b-b-b- >> B << -b-b-b-b-b-b-b-b-b-b-b-
-BEGIN;
-include/show_binlog_events.inc
--e-e-e-e-e-e-e-e-e-e-e- >> B << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> T-func << -b-b-b-b-b-b-b-b-b-b-b-
-SELECT fc_i_tt_5_suc (15, 2);
-fc_i_tt_5_suc (15, 2)
-fc_i_tt_5_suc
-include/show_binlog_events.inc
--e-e-e-e-e-e-e-e-e-e-e- >> T-func << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> T << -b-b-b-b-b-b-b-b-b-b-b-
-INSERT INTO tt_1(trans_id, stmt_id) VALUES (15, 4);
-include/show_binlog_events.inc
--e-e-e-e-e-e-e-e-e-e-e- >> T << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> C << -b-b-b-b-b-b-b-b-b-b-b-
-COMMIT;
-include/show_binlog_events.inc
-Log_name Pos Event_type Server_id End_log_pos Info
-master-bin.000001 # Query # # BEGIN
-master-bin.000001 # Query # # use `test`; SELECT `test`.`fc_i_tt_5_suc`(15,2)
-master-bin.000001 # Query # # use `test`; INSERT INTO tt_1(trans_id, stmt_id) VALUES (15, 4)
-master-bin.000001 # Xid # # COMMIT /* XID */
--e-e-e-e-e-e-e-e-e-e-e- >> C << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> B T-func T C << -b-b-b-b-b-b-b-b-b-b-b-
-include/show_binlog_events.inc
-Log_name Pos Event_type Server_id End_log_pos Info
-master-bin.000001 # Query # # BEGIN
-master-bin.000001 # Query # # use `test`; SELECT `test`.`fc_i_tt_5_suc`(15,2)
-master-bin.000001 # Query # # use `test`; INSERT INTO tt_1(trans_id, stmt_id) VALUES (15, 4)
-master-bin.000001 # Xid # # COMMIT /* XID */
--e-e-e-e-e-e-e-e-e-e-e- >> B T-func T C << -e-e-e-e-e-e-e-e-e-e-e-
-
-include/rpl_reconnect.inc
-START SLAVE;
-include/wait_for_slave_to_start.inc
-
-
-
-STOP SLAVE SQL_THREAD;
-include/wait_for_slave_sql_to_stop.inc
-SET GLOBAL debug="d,crash_after_apply";;
-FAILURE d,crash_after_apply and OUTCOME O2
-rpl_mixing_engines.inc [commands=B T-proc T C]
--b-b-b-b-b-b-b-b-b-b-b- >> B << -b-b-b-b-b-b-b-b-b-b-b-
-BEGIN;
-include/show_binlog_events.inc
--e-e-e-e-e-e-e-e-e-e-e- >> B << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> T-proc << -b-b-b-b-b-b-b-b-b-b-b-
-CALL pc_i_tt_5_suc (16, 2);
-include/show_binlog_events.inc
--e-e-e-e-e-e-e-e-e-e-e- >> T-proc << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> T << -b-b-b-b-b-b-b-b-b-b-b-
-INSERT INTO tt_1(trans_id, stmt_id) VALUES (16, 4);
-include/show_binlog_events.inc
--e-e-e-e-e-e-e-e-e-e-e- >> T << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> C << -b-b-b-b-b-b-b-b-b-b-b-
-COMMIT;
-include/show_binlog_events.inc
-Log_name Pos Event_type Server_id End_log_pos Info
-master-bin.000001 # Query # # BEGIN
-master-bin.000001 # Query # # use `test`; INSERT INTO tt_5(trans_id, stmt_id) VALUES ( NAME_CONST('p_trans_id',16), NAME_CONST('in_stmt_id',1))
-master-bin.000001 # Query # # use `test`; INSERT INTO tt_5(trans_id, stmt_id) VALUES ( NAME_CONST('p_trans_id',16), NAME_CONST('in_stmt_id',1) + 1)
-master-bin.000001 # Query # # use `test`; INSERT INTO tt_1(trans_id, stmt_id) VALUES (16, 4)
-master-bin.000001 # Xid # # COMMIT /* XID */
--e-e-e-e-e-e-e-e-e-e-e- >> C << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> B T-proc T C << -b-b-b-b-b-b-b-b-b-b-b-
-include/show_binlog_events.inc
-Log_name Pos Event_type Server_id End_log_pos Info
-master-bin.000001 # Query # # BEGIN
-master-bin.000001 # Query # # use `test`; INSERT INTO tt_5(trans_id, stmt_id) VALUES ( NAME_CONST('p_trans_id',16), NAME_CONST('in_stmt_id',1))
-master-bin.000001 # Query # # use `test`; INSERT INTO tt_5(trans_id, stmt_id) VALUES ( NAME_CONST('p_trans_id',16), NAME_CONST('in_stmt_id',1) + 1)
-master-bin.000001 # Query # # use `test`; INSERT INTO tt_1(trans_id, stmt_id) VALUES (16, 4)
-master-bin.000001 # Xid # # COMMIT /* XID */
--e-e-e-e-e-e-e-e-e-e-e- >> B T-proc T C << -e-e-e-e-e-e-e-e-e-e-e-
-
-include/rpl_reconnect.inc
-START SLAVE;
-include/wait_for_slave_to_start.inc
-
-
-
-STOP SLAVE SQL_THREAD;
-include/wait_for_slave_sql_to_stop.inc
-SET GLOBAL debug="d,crash_before_update_pos";;
-FAILURE d,crash_before_update_pos and OUTCOME O1
-rpl_mixing_engines.inc [commands=T]
--b-b-b-b-b-b-b-b-b-b-b- >> T << -b-b-b-b-b-b-b-b-b-b-b-
-INSERT INTO tt_1(trans_id, stmt_id) VALUES (17, 1);
-include/show_binlog_events.inc
-Log_name Pos Event_type Server_id End_log_pos Info
-master-bin.000001 # Query # # BEGIN
-master-bin.000001 # Query # # use `test`; INSERT INTO tt_1(trans_id, stmt_id) VALUES (17, 1)
-master-bin.000001 # Xid # # COMMIT /* XID */
--e-e-e-e-e-e-e-e-e-e-e- >> T << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> T << -b-b-b-b-b-b-b-b-b-b-b-
-include/show_binlog_events.inc
-Log_name Pos Event_type Server_id End_log_pos Info
-master-bin.000001 # Query # # BEGIN
-master-bin.000001 # Query # # use `test`; INSERT INTO tt_1(trans_id, stmt_id) VALUES (17, 1)
-master-bin.000001 # Xid # # COMMIT /* XID */
--e-e-e-e-e-e-e-e-e-e-e- >> T << -e-e-e-e-e-e-e-e-e-e-e-
-
-include/rpl_reconnect.inc
-START SLAVE;
-include/wait_for_slave_to_start.inc
-
-
-
-STOP SLAVE SQL_THREAD;
-include/wait_for_slave_sql_to_stop.inc
-SET GLOBAL debug="d,crash_before_update_pos";;
-FAILURE d,crash_before_update_pos and OUTCOME O1
-rpl_mixing_engines.inc [commands=T-trig]
--b-b-b-b-b-b-b-b-b-b-b- >> T-trig << -b-b-b-b-b-b-b-b-b-b-b-
-INSERT INTO tt_5(trans_id, stmt_id) VALUES (18, 1);
-include/show_binlog_events.inc
-Log_name Pos Event_type Server_id End_log_pos Info
-master-bin.000001 # Query # # BEGIN
-master-bin.000001 # Query # # use `test`; INSERT INTO tt_5(trans_id, stmt_id) VALUES (18, 1)
-master-bin.000001 # Xid # # COMMIT /* XID */
--e-e-e-e-e-e-e-e-e-e-e- >> T-trig << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> T-trig << -b-b-b-b-b-b-b-b-b-b-b-
-include/show_binlog_events.inc
-Log_name Pos Event_type Server_id End_log_pos Info
-master-bin.000001 # Query # # BEGIN
-master-bin.000001 # Query # # use `test`; INSERT INTO tt_5(trans_id, stmt_id) VALUES (18, 1)
-master-bin.000001 # Xid # # COMMIT /* XID */
--e-e-e-e-e-e-e-e-e-e-e- >> T-trig << -e-e-e-e-e-e-e-e-e-e-e-
-
-include/rpl_reconnect.inc
-START SLAVE;
-include/wait_for_slave_to_start.inc
-
-
-
-STOP SLAVE SQL_THREAD;
-include/wait_for_slave_sql_to_stop.inc
-SET GLOBAL debug="d,crash_before_update_pos";;
-FAILURE d,crash_before_update_pos and OUTCOME O1
-rpl_mixing_engines.inc [commands=T-func]
--b-b-b-b-b-b-b-b-b-b-b- >> T-func << -b-b-b-b-b-b-b-b-b-b-b-
-SELECT fc_i_tt_5_suc (19, 1);
-fc_i_tt_5_suc (19, 1)
-fc_i_tt_5_suc
-include/show_binlog_events.inc
-Log_name Pos Event_type Server_id End_log_pos Info
-master-bin.000001 # Query # # BEGIN
-master-bin.000001 # Query # # use `test`; SELECT `test`.`fc_i_tt_5_suc`(19,1)
-master-bin.000001 # Xid # # COMMIT /* XID */
--e-e-e-e-e-e-e-e-e-e-e- >> T-func << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> T-func << -b-b-b-b-b-b-b-b-b-b-b-
-include/show_binlog_events.inc
-Log_name Pos Event_type Server_id End_log_pos Info
-master-bin.000001 # Query # # BEGIN
-master-bin.000001 # Query # # use `test`; SELECT `test`.`fc_i_tt_5_suc`(19,1)
-master-bin.000001 # Xid # # COMMIT /* XID */
--e-e-e-e-e-e-e-e-e-e-e- >> T-func << -e-e-e-e-e-e-e-e-e-e-e-
-
-include/rpl_reconnect.inc
-START SLAVE;
-include/wait_for_slave_to_start.inc
-
-
-
-STOP SLAVE SQL_THREAD;
-include/wait_for_slave_sql_to_stop.inc
-SET GLOBAL debug="d,crash_before_update_pos";;
-FAILURE d,crash_before_update_pos and OUTCOME O1
-rpl_mixing_engines.inc [commands=T-proc]
--b-b-b-b-b-b-b-b-b-b-b- >> T-proc << -b-b-b-b-b-b-b-b-b-b-b-
-CALL pc_i_tt_5_suc (20, 1);
-include/show_binlog_events.inc
-Log_name Pos Event_type Server_id End_log_pos Info
-master-bin.000001 # Query # # BEGIN
-master-bin.000001 # Query # # use `test`; INSERT INTO tt_5(trans_id, stmt_id) VALUES ( NAME_CONST('p_trans_id',20), NAME_CONST('in_stmt_id',1))
-master-bin.000001 # Xid # # COMMIT /* XID */
-master-bin.000001 # Query # # BEGIN
-master-bin.000001 # Query # # use `test`; INSERT INTO tt_5(trans_id, stmt_id) VALUES ( NAME_CONST('p_trans_id',20), NAME_CONST('in_stmt_id',1) + 1)
-master-bin.000001 # Xid # # COMMIT /* XID */
--e-e-e-e-e-e-e-e-e-e-e- >> T-proc << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> T-proc << -b-b-b-b-b-b-b-b-b-b-b-
-include/show_binlog_events.inc
-Log_name Pos Event_type Server_id End_log_pos Info
-master-bin.000001 # Query # # BEGIN
-master-bin.000001 # Query # # use `test`; INSERT INTO tt_5(trans_id, stmt_id) VALUES ( NAME_CONST('p_trans_id',20), NAME_CONST('in_stmt_id',1))
-master-bin.000001 # Xid # # COMMIT /* XID */
-master-bin.000001 # Query # # BEGIN
-master-bin.000001 # Query # # use `test`; INSERT INTO tt_5(trans_id, stmt_id) VALUES ( NAME_CONST('p_trans_id',20), NAME_CONST('in_stmt_id',1) + 1)
-master-bin.000001 # Xid # # COMMIT /* XID */
--e-e-e-e-e-e-e-e-e-e-e- >> T-proc << -e-e-e-e-e-e-e-e-e-e-e-
-
-include/rpl_reconnect.inc
-START SLAVE;
-include/wait_for_slave_to_start.inc
-
-
-
-STOP SLAVE SQL_THREAD;
-include/wait_for_slave_sql_to_stop.inc
-SET GLOBAL debug="d,crash_before_update_pos";;
-FAILURE d,crash_before_update_pos and OUTCOME O1
-rpl_mixing_engines.inc [commands=B T T-trig C]
--b-b-b-b-b-b-b-b-b-b-b- >> B << -b-b-b-b-b-b-b-b-b-b-b-
-BEGIN;
-include/show_binlog_events.inc
--e-e-e-e-e-e-e-e-e-e-e- >> B << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> T << -b-b-b-b-b-b-b-b-b-b-b-
-INSERT INTO tt_1(trans_id, stmt_id) VALUES (21, 2);
-include/show_binlog_events.inc
--e-e-e-e-e-e-e-e-e-e-e- >> T << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> T-trig << -b-b-b-b-b-b-b-b-b-b-b-
-INSERT INTO tt_5(trans_id, stmt_id) VALUES (21, 4);
-include/show_binlog_events.inc
--e-e-e-e-e-e-e-e-e-e-e- >> T-trig << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> C << -b-b-b-b-b-b-b-b-b-b-b-
-COMMIT;
-include/show_binlog_events.inc
-Log_name Pos Event_type Server_id End_log_pos Info
-master-bin.000001 # Query # # BEGIN
-master-bin.000001 # Query # # use `test`; INSERT INTO tt_1(trans_id, stmt_id) VALUES (21, 2)
-master-bin.000001 # Query # # use `test`; INSERT INTO tt_5(trans_id, stmt_id) VALUES (21, 4)
-master-bin.000001 # Xid # # COMMIT /* XID */
--e-e-e-e-e-e-e-e-e-e-e- >> C << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> B T T-trig C << -b-b-b-b-b-b-b-b-b-b-b-
-include/show_binlog_events.inc
-Log_name Pos Event_type Server_id End_log_pos Info
-master-bin.000001 # Query # # BEGIN
-master-bin.000001 # Query # # use `test`; INSERT INTO tt_1(trans_id, stmt_id) VALUES (21, 2)
-master-bin.000001 # Query # # use `test`; INSERT INTO tt_5(trans_id, stmt_id) VALUES (21, 4)
-master-bin.000001 # Xid # # COMMIT /* XID */
--e-e-e-e-e-e-e-e-e-e-e- >> B T T-trig C << -e-e-e-e-e-e-e-e-e-e-e-
-
-include/rpl_reconnect.inc
-START SLAVE;
-include/wait_for_slave_to_start.inc
-
-
-
-STOP SLAVE SQL_THREAD;
-include/wait_for_slave_sql_to_stop.inc
-SET GLOBAL debug="d,crash_before_update_pos";;
-FAILURE d,crash_before_update_pos and OUTCOME O1
-rpl_mixing_engines.inc [commands=B T T-func C]
--b-b-b-b-b-b-b-b-b-b-b- >> B << -b-b-b-b-b-b-b-b-b-b-b-
-BEGIN;
-include/show_binlog_events.inc
--e-e-e-e-e-e-e-e-e-e-e- >> B << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> T << -b-b-b-b-b-b-b-b-b-b-b-
-INSERT INTO tt_1(trans_id, stmt_id) VALUES (22, 2);
-include/show_binlog_events.inc
--e-e-e-e-e-e-e-e-e-e-e- >> T << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> T-func << -b-b-b-b-b-b-b-b-b-b-b-
-SELECT fc_i_tt_5_suc (22, 4);
-fc_i_tt_5_suc (22, 4)
-fc_i_tt_5_suc
-include/show_binlog_events.inc
--e-e-e-e-e-e-e-e-e-e-e- >> T-func << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> C << -b-b-b-b-b-b-b-b-b-b-b-
-COMMIT;
-include/show_binlog_events.inc
-Log_name Pos Event_type Server_id End_log_pos Info
-master-bin.000001 # Query # # BEGIN
-master-bin.000001 # Query # # use `test`; INSERT INTO tt_1(trans_id, stmt_id) VALUES (22, 2)
-master-bin.000001 # Query # # use `test`; SELECT `test`.`fc_i_tt_5_suc`(22,4)
-master-bin.000001 # Xid # # COMMIT /* XID */
--e-e-e-e-e-e-e-e-e-e-e- >> C << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> B T T-func C << -b-b-b-b-b-b-b-b-b-b-b-
-include/show_binlog_events.inc
-Log_name Pos Event_type Server_id End_log_pos Info
-master-bin.000001 # Query # # BEGIN
-master-bin.000001 # Query # # use `test`; INSERT INTO tt_1(trans_id, stmt_id) VALUES (22, 2)
-master-bin.000001 # Query # # use `test`; SELECT `test`.`fc_i_tt_5_suc`(22,4)
-master-bin.000001 # Xid # # COMMIT /* XID */
--e-e-e-e-e-e-e-e-e-e-e- >> B T T-func C << -e-e-e-e-e-e-e-e-e-e-e-
-
-include/rpl_reconnect.inc
-START SLAVE;
-include/wait_for_slave_to_start.inc
-
-
-
-STOP SLAVE SQL_THREAD;
-include/wait_for_slave_sql_to_stop.inc
-SET GLOBAL debug="d,crash_before_update_pos";;
-FAILURE d,crash_before_update_pos and OUTCOME O1
-rpl_mixing_engines.inc [commands=B T T-proc C]
--b-b-b-b-b-b-b-b-b-b-b- >> B << -b-b-b-b-b-b-b-b-b-b-b-
-BEGIN;
-include/show_binlog_events.inc
--e-e-e-e-e-e-e-e-e-e-e- >> B << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> T << -b-b-b-b-b-b-b-b-b-b-b-
-INSERT INTO tt_1(trans_id, stmt_id) VALUES (23, 2);
-include/show_binlog_events.inc
--e-e-e-e-e-e-e-e-e-e-e- >> T << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> T-proc << -b-b-b-b-b-b-b-b-b-b-b-
-CALL pc_i_tt_5_suc (23, 4);
-include/show_binlog_events.inc
--e-e-e-e-e-e-e-e-e-e-e- >> T-proc << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> C << -b-b-b-b-b-b-b-b-b-b-b-
-COMMIT;
-include/show_binlog_events.inc
-Log_name Pos Event_type Server_id End_log_pos Info
-master-bin.000001 # Query # # BEGIN
-master-bin.000001 # Query # # use `test`; INSERT INTO tt_1(trans_id, stmt_id) VALUES (23, 2)
-master-bin.000001 # Query # # use `test`; INSERT INTO tt_5(trans_id, stmt_id) VALUES ( NAME_CONST('p_trans_id',23), NAME_CONST('in_stmt_id',1))
-master-bin.000001 # Query # # use `test`; INSERT INTO tt_5(trans_id, stmt_id) VALUES ( NAME_CONST('p_trans_id',23), NAME_CONST('in_stmt_id',1) + 1)
-master-bin.000001 # Xid # # COMMIT /* XID */
--e-e-e-e-e-e-e-e-e-e-e- >> C << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> B T T-proc C << -b-b-b-b-b-b-b-b-b-b-b-
-include/show_binlog_events.inc
-Log_name Pos Event_type Server_id End_log_pos Info
-master-bin.000001 # Query # # BEGIN
-master-bin.000001 # Query # # use `test`; INSERT INTO tt_1(trans_id, stmt_id) VALUES (23, 2)
-master-bin.000001 # Query # # use `test`; INSERT INTO tt_5(trans_id, stmt_id) VALUES ( NAME_CONST('p_trans_id',23), NAME_CONST('in_stmt_id',1))
-master-bin.000001 # Query # # use `test`; INSERT INTO tt_5(trans_id, stmt_id) VALUES ( NAME_CONST('p_trans_id',23), NAME_CONST('in_stmt_id',1) + 1)
-master-bin.000001 # Xid # # COMMIT /* XID */
--e-e-e-e-e-e-e-e-e-e-e- >> B T T-proc C << -e-e-e-e-e-e-e-e-e-e-e-
-
-include/rpl_reconnect.inc
-START SLAVE;
-include/wait_for_slave_to_start.inc
-
-
-
-STOP SLAVE SQL_THREAD;
-include/wait_for_slave_sql_to_stop.inc
-SET GLOBAL debug="d,crash_before_update_pos";;
-FAILURE d,crash_before_update_pos and OUTCOME O1
-rpl_mixing_engines.inc [commands=B T-trig T C]
--b-b-b-b-b-b-b-b-b-b-b- >> B << -b-b-b-b-b-b-b-b-b-b-b-
-BEGIN;
-include/show_binlog_events.inc
--e-e-e-e-e-e-e-e-e-e-e- >> B << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> T-trig << -b-b-b-b-b-b-b-b-b-b-b-
-INSERT INTO tt_5(trans_id, stmt_id) VALUES (24, 2);
-include/show_binlog_events.inc
--e-e-e-e-e-e-e-e-e-e-e- >> T-trig << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> T << -b-b-b-b-b-b-b-b-b-b-b-
-INSERT INTO tt_1(trans_id, stmt_id) VALUES (24, 4);
-include/show_binlog_events.inc
--e-e-e-e-e-e-e-e-e-e-e- >> T << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> C << -b-b-b-b-b-b-b-b-b-b-b-
-COMMIT;
-include/show_binlog_events.inc
-Log_name Pos Event_type Server_id End_log_pos Info
-master-bin.000001 # Query # # BEGIN
-master-bin.000001 # Query # # use `test`; INSERT INTO tt_5(trans_id, stmt_id) VALUES (24, 2)
-master-bin.000001 # Query # # use `test`; INSERT INTO tt_1(trans_id, stmt_id) VALUES (24, 4)
-master-bin.000001 # Xid # # COMMIT /* XID */
--e-e-e-e-e-e-e-e-e-e-e- >> C << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> B T-trig T C << -b-b-b-b-b-b-b-b-b-b-b-
-include/show_binlog_events.inc
-Log_name Pos Event_type Server_id End_log_pos Info
-master-bin.000001 # Query # # BEGIN
-master-bin.000001 # Query # # use `test`; INSERT INTO tt_5(trans_id, stmt_id) VALUES (24, 2)
-master-bin.000001 # Query # # use `test`; INSERT INTO tt_1(trans_id, stmt_id) VALUES (24, 4)
-master-bin.000001 # Xid # # COMMIT /* XID */
--e-e-e-e-e-e-e-e-e-e-e- >> B T-trig T C << -e-e-e-e-e-e-e-e-e-e-e-
-
-include/rpl_reconnect.inc
-START SLAVE;
-include/wait_for_slave_to_start.inc
-
-
-
-STOP SLAVE SQL_THREAD;
-include/wait_for_slave_sql_to_stop.inc
-SET GLOBAL debug="d,crash_before_update_pos";;
-FAILURE d,crash_before_update_pos and OUTCOME O1
-rpl_mixing_engines.inc [commands=B T-func T C]
--b-b-b-b-b-b-b-b-b-b-b- >> B << -b-b-b-b-b-b-b-b-b-b-b-
-BEGIN;
-include/show_binlog_events.inc
--e-e-e-e-e-e-e-e-e-e-e- >> B << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> T-func << -b-b-b-b-b-b-b-b-b-b-b-
-SELECT fc_i_tt_5_suc (25, 2);
-fc_i_tt_5_suc (25, 2)
-fc_i_tt_5_suc
-include/show_binlog_events.inc
--e-e-e-e-e-e-e-e-e-e-e- >> T-func << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> T << -b-b-b-b-b-b-b-b-b-b-b-
-INSERT INTO tt_1(trans_id, stmt_id) VALUES (25, 4);
-include/show_binlog_events.inc
--e-e-e-e-e-e-e-e-e-e-e- >> T << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> C << -b-b-b-b-b-b-b-b-b-b-b-
-COMMIT;
-include/show_binlog_events.inc
-Log_name Pos Event_type Server_id End_log_pos Info
-master-bin.000001 # Query # # BEGIN
-master-bin.000001 # Query # # use `test`; SELECT `test`.`fc_i_tt_5_suc`(25,2)
-master-bin.000001 # Query # # use `test`; INSERT INTO tt_1(trans_id, stmt_id) VALUES (25, 4)
-master-bin.000001 # Xid # # COMMIT /* XID */
--e-e-e-e-e-e-e-e-e-e-e- >> C << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> B T-func T C << -b-b-b-b-b-b-b-b-b-b-b-
-include/show_binlog_events.inc
-Log_name Pos Event_type Server_id End_log_pos Info
-master-bin.000001 # Query # # BEGIN
-master-bin.000001 # Query # # use `test`; SELECT `test`.`fc_i_tt_5_suc`(25,2)
-master-bin.000001 # Query # # use `test`; INSERT INTO tt_1(trans_id, stmt_id) VALUES (25, 4)
-master-bin.000001 # Xid # # COMMIT /* XID */
--e-e-e-e-e-e-e-e-e-e-e- >> B T-func T C << -e-e-e-e-e-e-e-e-e-e-e-
-
-include/rpl_reconnect.inc
-START SLAVE;
-include/wait_for_slave_to_start.inc
-
-
-
-STOP SLAVE SQL_THREAD;
-include/wait_for_slave_sql_to_stop.inc
-SET GLOBAL debug="d,crash_before_update_pos";;
-FAILURE d,crash_before_update_pos and OUTCOME O1
-rpl_mixing_engines.inc [commands=B T-proc T C]
--b-b-b-b-b-b-b-b-b-b-b- >> B << -b-b-b-b-b-b-b-b-b-b-b-
-BEGIN;
-include/show_binlog_events.inc
--e-e-e-e-e-e-e-e-e-e-e- >> B << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> T-proc << -b-b-b-b-b-b-b-b-b-b-b-
-CALL pc_i_tt_5_suc (26, 2);
-include/show_binlog_events.inc
--e-e-e-e-e-e-e-e-e-e-e- >> T-proc << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> T << -b-b-b-b-b-b-b-b-b-b-b-
-INSERT INTO tt_1(trans_id, stmt_id) VALUES (26, 4);
-include/show_binlog_events.inc
--e-e-e-e-e-e-e-e-e-e-e- >> T << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> C << -b-b-b-b-b-b-b-b-b-b-b-
-COMMIT;
-include/show_binlog_events.inc
-Log_name Pos Event_type Server_id End_log_pos Info
-master-bin.000001 # Query # # BEGIN
-master-bin.000001 # Query # # use `test`; INSERT INTO tt_5(trans_id, stmt_id) VALUES ( NAME_CONST('p_trans_id',26), NAME_CONST('in_stmt_id',1))
-master-bin.000001 # Query # # use `test`; INSERT INTO tt_5(trans_id, stmt_id) VALUES ( NAME_CONST('p_trans_id',26), NAME_CONST('in_stmt_id',1) + 1)
-master-bin.000001 # Query # # use `test`; INSERT INTO tt_1(trans_id, stmt_id) VALUES (26, 4)
-master-bin.000001 # Xid # # COMMIT /* XID */
--e-e-e-e-e-e-e-e-e-e-e- >> C << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> B T-proc T C << -b-b-b-b-b-b-b-b-b-b-b-
-include/show_binlog_events.inc
-Log_name Pos Event_type Server_id End_log_pos Info
-master-bin.000001 # Query # # BEGIN
-master-bin.000001 # Query # # use `test`; INSERT INTO tt_5(trans_id, stmt_id) VALUES ( NAME_CONST('p_trans_id',26), NAME_CONST('in_stmt_id',1))
-master-bin.000001 # Query # # use `test`; INSERT INTO tt_5(trans_id, stmt_id) VALUES ( NAME_CONST('p_trans_id',26), NAME_CONST('in_stmt_id',1) + 1)
-master-bin.000001 # Query # # use `test`; INSERT INTO tt_1(trans_id, stmt_id) VALUES (26, 4)
-master-bin.000001 # Xid # # COMMIT /* XID */
--e-e-e-e-e-e-e-e-e-e-e- >> B T-proc T C << -e-e-e-e-e-e-e-e-e-e-e-
-
-include/rpl_reconnect.inc
-START SLAVE;
-include/wait_for_slave_to_start.inc
-
-
-
-STOP SLAVE SQL_THREAD;
-include/wait_for_slave_sql_to_stop.inc
-SET GLOBAL debug="d,crash_after_update_pos_before_apply";;
-FAILURE d,crash_after_update_pos_before_apply and OUTCOME O1
-rpl_mixing_engines.inc [commands=T]
--b-b-b-b-b-b-b-b-b-b-b- >> T << -b-b-b-b-b-b-b-b-b-b-b-
-INSERT INTO tt_1(trans_id, stmt_id) VALUES (27, 1);
-include/show_binlog_events.inc
-Log_name Pos Event_type Server_id End_log_pos Info
-master-bin.000001 # Query # # BEGIN
-master-bin.000001 # Query # # use `test`; INSERT INTO tt_1(trans_id, stmt_id) VALUES (27, 1)
-master-bin.000001 # Xid # # COMMIT /* XID */
--e-e-e-e-e-e-e-e-e-e-e- >> T << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> T << -b-b-b-b-b-b-b-b-b-b-b-
-include/show_binlog_events.inc
-Log_name Pos Event_type Server_id End_log_pos Info
-master-bin.000001 # Query # # BEGIN
-master-bin.000001 # Query # # use `test`; INSERT INTO tt_1(trans_id, stmt_id) VALUES (27, 1)
-master-bin.000001 # Xid # # COMMIT /* XID */
--e-e-e-e-e-e-e-e-e-e-e- >> T << -e-e-e-e-e-e-e-e-e-e-e-
-
-include/rpl_reconnect.inc
-START SLAVE;
-include/wait_for_slave_to_start.inc
-
-
-
-STOP SLAVE SQL_THREAD;
-include/wait_for_slave_sql_to_stop.inc
-SET GLOBAL debug="d,crash_after_update_pos_before_apply";;
-FAILURE d,crash_after_update_pos_before_apply and OUTCOME O1
-rpl_mixing_engines.inc [commands=T-trig]
--b-b-b-b-b-b-b-b-b-b-b- >> T-trig << -b-b-b-b-b-b-b-b-b-b-b-
-INSERT INTO tt_5(trans_id, stmt_id) VALUES (28, 1);
-include/show_binlog_events.inc
-Log_name Pos Event_type Server_id End_log_pos Info
-master-bin.000001 # Query # # BEGIN
-master-bin.000001 # Query # # use `test`; INSERT INTO tt_5(trans_id, stmt_id) VALUES (28, 1)
-master-bin.000001 # Xid # # COMMIT /* XID */
--e-e-e-e-e-e-e-e-e-e-e- >> T-trig << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> T-trig << -b-b-b-b-b-b-b-b-b-b-b-
-include/show_binlog_events.inc
-Log_name Pos Event_type Server_id End_log_pos Info
-master-bin.000001 # Query # # BEGIN
-master-bin.000001 # Query # # use `test`; INSERT INTO tt_5(trans_id, stmt_id) VALUES (28, 1)
-master-bin.000001 # Xid # # COMMIT /* XID */
--e-e-e-e-e-e-e-e-e-e-e- >> T-trig << -e-e-e-e-e-e-e-e-e-e-e-
-
-include/rpl_reconnect.inc
-START SLAVE;
-include/wait_for_slave_to_start.inc
-
-
-
-STOP SLAVE SQL_THREAD;
-include/wait_for_slave_sql_to_stop.inc
-SET GLOBAL debug="d,crash_after_update_pos_before_apply";;
-FAILURE d,crash_after_update_pos_before_apply and OUTCOME O1
-rpl_mixing_engines.inc [commands=T-func]
--b-b-b-b-b-b-b-b-b-b-b- >> T-func << -b-b-b-b-b-b-b-b-b-b-b-
-SELECT fc_i_tt_5_suc (29, 1);
-fc_i_tt_5_suc (29, 1)
-fc_i_tt_5_suc
-include/show_binlog_events.inc
-Log_name Pos Event_type Server_id End_log_pos Info
-master-bin.000001 # Query # # BEGIN
-master-bin.000001 # Query # # use `test`; SELECT `test`.`fc_i_tt_5_suc`(29,1)
-master-bin.000001 # Xid # # COMMIT /* XID */
--e-e-e-e-e-e-e-e-e-e-e- >> T-func << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> T-func << -b-b-b-b-b-b-b-b-b-b-b-
-include/show_binlog_events.inc
-Log_name Pos Event_type Server_id End_log_pos Info
-master-bin.000001 # Query # # BEGIN
-master-bin.000001 # Query # # use `test`; SELECT `test`.`fc_i_tt_5_suc`(29,1)
-master-bin.000001 # Xid # # COMMIT /* XID */
--e-e-e-e-e-e-e-e-e-e-e- >> T-func << -e-e-e-e-e-e-e-e-e-e-e-
-
-include/rpl_reconnect.inc
-START SLAVE;
-include/wait_for_slave_to_start.inc
-
-
-
-STOP SLAVE SQL_THREAD;
-include/wait_for_slave_sql_to_stop.inc
-SET GLOBAL debug="d,crash_after_update_pos_before_apply";;
-FAILURE d,crash_after_update_pos_before_apply and OUTCOME O1
-rpl_mixing_engines.inc [commands=T-proc]
--b-b-b-b-b-b-b-b-b-b-b- >> T-proc << -b-b-b-b-b-b-b-b-b-b-b-
-CALL pc_i_tt_5_suc (30, 1);
-include/show_binlog_events.inc
-Log_name Pos Event_type Server_id End_log_pos Info
-master-bin.000001 # Query # # BEGIN
-master-bin.000001 # Query # # use `test`; INSERT INTO tt_5(trans_id, stmt_id) VALUES ( NAME_CONST('p_trans_id',30), NAME_CONST('in_stmt_id',1))
-master-bin.000001 # Xid # # COMMIT /* XID */
-master-bin.000001 # Query # # BEGIN
-master-bin.000001 # Query # # use `test`; INSERT INTO tt_5(trans_id, stmt_id) VALUES ( NAME_CONST('p_trans_id',30), NAME_CONST('in_stmt_id',1) + 1)
-master-bin.000001 # Xid # # COMMIT /* XID */
--e-e-e-e-e-e-e-e-e-e-e- >> T-proc << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> T-proc << -b-b-b-b-b-b-b-b-b-b-b-
-include/show_binlog_events.inc
-Log_name Pos Event_type Server_id End_log_pos Info
-master-bin.000001 # Query # # BEGIN
-master-bin.000001 # Query # # use `test`; INSERT INTO tt_5(trans_id, stmt_id) VALUES ( NAME_CONST('p_trans_id',30), NAME_CONST('in_stmt_id',1))
-master-bin.000001 # Xid # # COMMIT /* XID */
-master-bin.000001 # Query # # BEGIN
-master-bin.000001 # Query # # use `test`; INSERT INTO tt_5(trans_id, stmt_id) VALUES ( NAME_CONST('p_trans_id',30), NAME_CONST('in_stmt_id',1) + 1)
-master-bin.000001 # Xid # # COMMIT /* XID */
--e-e-e-e-e-e-e-e-e-e-e- >> T-proc << -e-e-e-e-e-e-e-e-e-e-e-
-
-include/rpl_reconnect.inc
-START SLAVE;
-include/wait_for_slave_to_start.inc
-
-
-
-STOP SLAVE SQL_THREAD;
-include/wait_for_slave_sql_to_stop.inc
-SET GLOBAL debug="d,crash_after_update_pos_before_apply";;
-FAILURE d,crash_after_update_pos_before_apply and OUTCOME O1
-rpl_mixing_engines.inc [commands=B T T-trig C]
--b-b-b-b-b-b-b-b-b-b-b- >> B << -b-b-b-b-b-b-b-b-b-b-b-
-BEGIN;
-include/show_binlog_events.inc
--e-e-e-e-e-e-e-e-e-e-e- >> B << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> T << -b-b-b-b-b-b-b-b-b-b-b-
-INSERT INTO tt_1(trans_id, stmt_id) VALUES (31, 2);
-include/show_binlog_events.inc
--e-e-e-e-e-e-e-e-e-e-e- >> T << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> T-trig << -b-b-b-b-b-b-b-b-b-b-b-
-INSERT INTO tt_5(trans_id, stmt_id) VALUES (31, 4);
-include/show_binlog_events.inc
--e-e-e-e-e-e-e-e-e-e-e- >> T-trig << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> C << -b-b-b-b-b-b-b-b-b-b-b-
-COMMIT;
-include/show_binlog_events.inc
-Log_name Pos Event_type Server_id End_log_pos Info
-master-bin.000001 # Query # # BEGIN
-master-bin.000001 # Query # # use `test`; INSERT INTO tt_1(trans_id, stmt_id) VALUES (31, 2)
-master-bin.000001 # Query # # use `test`; INSERT INTO tt_5(trans_id, stmt_id) VALUES (31, 4)
-master-bin.000001 # Xid # # COMMIT /* XID */
--e-e-e-e-e-e-e-e-e-e-e- >> C << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> B T T-trig C << -b-b-b-b-b-b-b-b-b-b-b-
-include/show_binlog_events.inc
-Log_name Pos Event_type Server_id End_log_pos Info
-master-bin.000001 # Query # # BEGIN
-master-bin.000001 # Query # # use `test`; INSERT INTO tt_1(trans_id, stmt_id) VALUES (31, 2)
-master-bin.000001 # Query # # use `test`; INSERT INTO tt_5(trans_id, stmt_id) VALUES (31, 4)
-master-bin.000001 # Xid # # COMMIT /* XID */
--e-e-e-e-e-e-e-e-e-e-e- >> B T T-trig C << -e-e-e-e-e-e-e-e-e-e-e-
-
-include/rpl_reconnect.inc
-START SLAVE;
-include/wait_for_slave_to_start.inc
-
-
-
-STOP SLAVE SQL_THREAD;
-include/wait_for_slave_sql_to_stop.inc
-SET GLOBAL debug="d,crash_after_update_pos_before_apply";;
-FAILURE d,crash_after_update_pos_before_apply and OUTCOME O1
-rpl_mixing_engines.inc [commands=B T T-func C]
--b-b-b-b-b-b-b-b-b-b-b- >> B << -b-b-b-b-b-b-b-b-b-b-b-
-BEGIN;
-include/show_binlog_events.inc
--e-e-e-e-e-e-e-e-e-e-e- >> B << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> T << -b-b-b-b-b-b-b-b-b-b-b-
-INSERT INTO tt_1(trans_id, stmt_id) VALUES (32, 2);
-include/show_binlog_events.inc
--e-e-e-e-e-e-e-e-e-e-e- >> T << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> T-func << -b-b-b-b-b-b-b-b-b-b-b-
-SELECT fc_i_tt_5_suc (32, 4);
-fc_i_tt_5_suc (32, 4)
-fc_i_tt_5_suc
-include/show_binlog_events.inc
--e-e-e-e-e-e-e-e-e-e-e- >> T-func << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> C << -b-b-b-b-b-b-b-b-b-b-b-
-COMMIT;
-include/show_binlog_events.inc
-Log_name Pos Event_type Server_id End_log_pos Info
-master-bin.000001 # Query # # BEGIN
-master-bin.000001 # Query # # use `test`; INSERT INTO tt_1(trans_id, stmt_id) VALUES (32, 2)
-master-bin.000001 # Query # # use `test`; SELECT `test`.`fc_i_tt_5_suc`(32,4)
-master-bin.000001 # Xid # # COMMIT /* XID */
--e-e-e-e-e-e-e-e-e-e-e- >> C << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> B T T-func C << -b-b-b-b-b-b-b-b-b-b-b-
-include/show_binlog_events.inc
-Log_name Pos Event_type Server_id End_log_pos Info
-master-bin.000001 # Query # # BEGIN
-master-bin.000001 # Query # # use `test`; INSERT INTO tt_1(trans_id, stmt_id) VALUES (32, 2)
-master-bin.000001 # Query # # use `test`; SELECT `test`.`fc_i_tt_5_suc`(32,4)
-master-bin.000001 # Xid # # COMMIT /* XID */
--e-e-e-e-e-e-e-e-e-e-e- >> B T T-func C << -e-e-e-e-e-e-e-e-e-e-e-
-
-include/rpl_reconnect.inc
-START SLAVE;
-include/wait_for_slave_to_start.inc
-
-
-
-STOP SLAVE SQL_THREAD;
-include/wait_for_slave_sql_to_stop.inc
-SET GLOBAL debug="d,crash_after_update_pos_before_apply";;
-FAILURE d,crash_after_update_pos_before_apply and OUTCOME O1
-rpl_mixing_engines.inc [commands=B T T-proc C]
--b-b-b-b-b-b-b-b-b-b-b- >> B << -b-b-b-b-b-b-b-b-b-b-b-
-BEGIN;
-include/show_binlog_events.inc
--e-e-e-e-e-e-e-e-e-e-e- >> B << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> T << -b-b-b-b-b-b-b-b-b-b-b-
-INSERT INTO tt_1(trans_id, stmt_id) VALUES (33, 2);
-include/show_binlog_events.inc
--e-e-e-e-e-e-e-e-e-e-e- >> T << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> T-proc << -b-b-b-b-b-b-b-b-b-b-b-
-CALL pc_i_tt_5_suc (33, 4);
-include/show_binlog_events.inc
--e-e-e-e-e-e-e-e-e-e-e- >> T-proc << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> C << -b-b-b-b-b-b-b-b-b-b-b-
-COMMIT;
-include/show_binlog_events.inc
-Log_name Pos Event_type Server_id End_log_pos Info
-master-bin.000001 # Query # # BEGIN
-master-bin.000001 # Query # # use `test`; INSERT INTO tt_1(trans_id, stmt_id) VALUES (33, 2)
-master-bin.000001 # Query # # use `test`; INSERT INTO tt_5(trans_id, stmt_id) VALUES ( NAME_CONST('p_trans_id',33), NAME_CONST('in_stmt_id',1))
-master-bin.000001 # Query # # use `test`; INSERT INTO tt_5(trans_id, stmt_id) VALUES ( NAME_CONST('p_trans_id',33), NAME_CONST('in_stmt_id',1) + 1)
-master-bin.000001 # Xid # # COMMIT /* XID */
--e-e-e-e-e-e-e-e-e-e-e- >> C << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> B T T-proc C << -b-b-b-b-b-b-b-b-b-b-b-
-include/show_binlog_events.inc
-Log_name Pos Event_type Server_id End_log_pos Info
-master-bin.000001 # Query # # BEGIN
-master-bin.000001 # Query # # use `test`; INSERT INTO tt_1(trans_id, stmt_id) VALUES (33, 2)
-master-bin.000001 # Query # # use `test`; INSERT INTO tt_5(trans_id, stmt_id) VALUES ( NAME_CONST('p_trans_id',33), NAME_CONST('in_stmt_id',1))
-master-bin.000001 # Query # # use `test`; INSERT INTO tt_5(trans_id, stmt_id) VALUES ( NAME_CONST('p_trans_id',33), NAME_CONST('in_stmt_id',1) + 1)
-master-bin.000001 # Xid # # COMMIT /* XID */
--e-e-e-e-e-e-e-e-e-e-e- >> B T T-proc C << -e-e-e-e-e-e-e-e-e-e-e-
-
-include/rpl_reconnect.inc
-START SLAVE;
-include/wait_for_slave_to_start.inc
-
-
-
-STOP SLAVE SQL_THREAD;
-include/wait_for_slave_sql_to_stop.inc
-SET GLOBAL debug="d,crash_after_update_pos_before_apply";;
-FAILURE d,crash_after_update_pos_before_apply and OUTCOME O1
-rpl_mixing_engines.inc [commands=B T-trig T C]
--b-b-b-b-b-b-b-b-b-b-b- >> B << -b-b-b-b-b-b-b-b-b-b-b-
-BEGIN;
-include/show_binlog_events.inc
--e-e-e-e-e-e-e-e-e-e-e- >> B << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> T-trig << -b-b-b-b-b-b-b-b-b-b-b-
-INSERT INTO tt_5(trans_id, stmt_id) VALUES (34, 2);
-include/show_binlog_events.inc
--e-e-e-e-e-e-e-e-e-e-e- >> T-trig << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> T << -b-b-b-b-b-b-b-b-b-b-b-
-INSERT INTO tt_1(trans_id, stmt_id) VALUES (34, 4);
-include/show_binlog_events.inc
--e-e-e-e-e-e-e-e-e-e-e- >> T << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> C << -b-b-b-b-b-b-b-b-b-b-b-
-COMMIT;
-include/show_binlog_events.inc
-Log_name Pos Event_type Server_id End_log_pos Info
-master-bin.000001 # Query # # BEGIN
-master-bin.000001 # Query # # use `test`; INSERT INTO tt_5(trans_id, stmt_id) VALUES (34, 2)
-master-bin.000001 # Query # # use `test`; INSERT INTO tt_1(trans_id, stmt_id) VALUES (34, 4)
-master-bin.000001 # Xid # # COMMIT /* XID */
--e-e-e-e-e-e-e-e-e-e-e- >> C << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> B T-trig T C << -b-b-b-b-b-b-b-b-b-b-b-
-include/show_binlog_events.inc
-Log_name Pos Event_type Server_id End_log_pos Info
-master-bin.000001 # Query # # BEGIN
-master-bin.000001 # Query # # use `test`; INSERT INTO tt_5(trans_id, stmt_id) VALUES (34, 2)
-master-bin.000001 # Query # # use `test`; INSERT INTO tt_1(trans_id, stmt_id) VALUES (34, 4)
-master-bin.000001 # Xid # # COMMIT /* XID */
--e-e-e-e-e-e-e-e-e-e-e- >> B T-trig T C << -e-e-e-e-e-e-e-e-e-e-e-
-
-include/rpl_reconnect.inc
-START SLAVE;
-include/wait_for_slave_to_start.inc
-
-
-
-STOP SLAVE SQL_THREAD;
-include/wait_for_slave_sql_to_stop.inc
-SET GLOBAL debug="d,crash_after_update_pos_before_apply";;
-FAILURE d,crash_after_update_pos_before_apply and OUTCOME O1
-rpl_mixing_engines.inc [commands=B T-func T C]
--b-b-b-b-b-b-b-b-b-b-b- >> B << -b-b-b-b-b-b-b-b-b-b-b-
-BEGIN;
-include/show_binlog_events.inc
--e-e-e-e-e-e-e-e-e-e-e- >> B << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> T-func << -b-b-b-b-b-b-b-b-b-b-b-
-SELECT fc_i_tt_5_suc (35, 2);
-fc_i_tt_5_suc (35, 2)
-fc_i_tt_5_suc
-include/show_binlog_events.inc
--e-e-e-e-e-e-e-e-e-e-e- >> T-func << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> T << -b-b-b-b-b-b-b-b-b-b-b-
-INSERT INTO tt_1(trans_id, stmt_id) VALUES (35, 4);
-include/show_binlog_events.inc
--e-e-e-e-e-e-e-e-e-e-e- >> T << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> C << -b-b-b-b-b-b-b-b-b-b-b-
-COMMIT;
-include/show_binlog_events.inc
-Log_name Pos Event_type Server_id End_log_pos Info
-master-bin.000001 # Query # # BEGIN
-master-bin.000001 # Query # # use `test`; SELECT `test`.`fc_i_tt_5_suc`(35,2)
-master-bin.000001 # Query # # use `test`; INSERT INTO tt_1(trans_id, stmt_id) VALUES (35, 4)
-master-bin.000001 # Xid # # COMMIT /* XID */
--e-e-e-e-e-e-e-e-e-e-e- >> C << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> B T-func T C << -b-b-b-b-b-b-b-b-b-b-b-
-include/show_binlog_events.inc
-Log_name Pos Event_type Server_id End_log_pos Info
-master-bin.000001 # Query # # BEGIN
-master-bin.000001 # Query # # use `test`; SELECT `test`.`fc_i_tt_5_suc`(35,2)
-master-bin.000001 # Query # # use `test`; INSERT INTO tt_1(trans_id, stmt_id) VALUES (35, 4)
-master-bin.000001 # Xid # # COMMIT /* XID */
--e-e-e-e-e-e-e-e-e-e-e- >> B T-func T C << -e-e-e-e-e-e-e-e-e-e-e-
-
-include/rpl_reconnect.inc
-START SLAVE;
-include/wait_for_slave_to_start.inc
-
-
-
-STOP SLAVE SQL_THREAD;
-include/wait_for_slave_sql_to_stop.inc
-SET GLOBAL debug="d,crash_after_update_pos_before_apply";;
-FAILURE d,crash_after_update_pos_before_apply and OUTCOME O1
-rpl_mixing_engines.inc [commands=B T-proc T C]
--b-b-b-b-b-b-b-b-b-b-b- >> B << -b-b-b-b-b-b-b-b-b-b-b-
-BEGIN;
-include/show_binlog_events.inc
--e-e-e-e-e-e-e-e-e-e-e- >> B << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> T-proc << -b-b-b-b-b-b-b-b-b-b-b-
-CALL pc_i_tt_5_suc (36, 2);
-include/show_binlog_events.inc
--e-e-e-e-e-e-e-e-e-e-e- >> T-proc << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> T << -b-b-b-b-b-b-b-b-b-b-b-
-INSERT INTO tt_1(trans_id, stmt_id) VALUES (36, 4);
-include/show_binlog_events.inc
--e-e-e-e-e-e-e-e-e-e-e- >> T << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> C << -b-b-b-b-b-b-b-b-b-b-b-
-COMMIT;
-include/show_binlog_events.inc
-Log_name Pos Event_type Server_id End_log_pos Info
-master-bin.000001 # Query # # BEGIN
-master-bin.000001 # Query # # use `test`; INSERT INTO tt_5(trans_id, stmt_id) VALUES ( NAME_CONST('p_trans_id',36), NAME_CONST('in_stmt_id',1))
-master-bin.000001 # Query # # use `test`; INSERT INTO tt_5(trans_id, stmt_id) VALUES ( NAME_CONST('p_trans_id',36), NAME_CONST('in_stmt_id',1) + 1)
-master-bin.000001 # Query # # use `test`; INSERT INTO tt_1(trans_id, stmt_id) VALUES (36, 4)
-master-bin.000001 # Xid # # COMMIT /* XID */
--e-e-e-e-e-e-e-e-e-e-e- >> C << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> B T-proc T C << -b-b-b-b-b-b-b-b-b-b-b-
-include/show_binlog_events.inc
-Log_name Pos Event_type Server_id End_log_pos Info
-master-bin.000001 # Query # # BEGIN
-master-bin.000001 # Query # # use `test`; INSERT INTO tt_5(trans_id, stmt_id) VALUES ( NAME_CONST('p_trans_id',36), NAME_CONST('in_stmt_id',1))
-master-bin.000001 # Query # # use `test`; INSERT INTO tt_5(trans_id, stmt_id) VALUES ( NAME_CONST('p_trans_id',36), NAME_CONST('in_stmt_id',1) + 1)
-master-bin.000001 # Query # # use `test`; INSERT INTO tt_1(trans_id, stmt_id) VALUES (36, 4)
-master-bin.000001 # Xid # # COMMIT /* XID */
--e-e-e-e-e-e-e-e-e-e-e- >> B T-proc T C << -e-e-e-e-e-e-e-e-e-e-e-
-
-include/rpl_reconnect.inc
-START SLAVE;
-include/wait_for_slave_to_start.inc
-
-
-
-STOP SLAVE SQL_THREAD;
-include/wait_for_slave_sql_to_stop.inc
-SET GLOBAL debug="d,crash_after_commit_before_update_pos";;
-FAILURE d,crash_after_commit_before_update_pos and OUTCOME O2
-rpl_mixing_engines.inc [commands=T]
--b-b-b-b-b-b-b-b-b-b-b- >> T << -b-b-b-b-b-b-b-b-b-b-b-
-INSERT INTO tt_1(trans_id, stmt_id) VALUES (37, 1);
-include/show_binlog_events.inc
-Log_name Pos Event_type Server_id End_log_pos Info
-master-bin.000001 # Query # # BEGIN
-master-bin.000001 # Query # # use `test`; INSERT INTO tt_1(trans_id, stmt_id) VALUES (37, 1)
-master-bin.000001 # Xid # # COMMIT /* XID */
--e-e-e-e-e-e-e-e-e-e-e- >> T << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> T << -b-b-b-b-b-b-b-b-b-b-b-
-include/show_binlog_events.inc
-Log_name Pos Event_type Server_id End_log_pos Info
-master-bin.000001 # Query # # BEGIN
-master-bin.000001 # Query # # use `test`; INSERT INTO tt_1(trans_id, stmt_id) VALUES (37, 1)
-master-bin.000001 # Xid # # COMMIT /* XID */
--e-e-e-e-e-e-e-e-e-e-e- >> T << -e-e-e-e-e-e-e-e-e-e-e-
-
-include/rpl_reconnect.inc
-START SLAVE;
-include/wait_for_slave_to_start.inc
-
-
-
-STOP SLAVE SQL_THREAD;
-include/wait_for_slave_sql_to_stop.inc
-SET GLOBAL debug="d,crash_after_commit_before_update_pos";;
-FAILURE d,crash_after_commit_before_update_pos and OUTCOME O2
-rpl_mixing_engines.inc [commands=T-trig]
--b-b-b-b-b-b-b-b-b-b-b- >> T-trig << -b-b-b-b-b-b-b-b-b-b-b-
-INSERT INTO tt_5(trans_id, stmt_id) VALUES (38, 1);
-include/show_binlog_events.inc
-Log_name Pos Event_type Server_id End_log_pos Info
-master-bin.000001 # Query # # BEGIN
-master-bin.000001 # Query # # use `test`; INSERT INTO tt_5(trans_id, stmt_id) VALUES (38, 1)
-master-bin.000001 # Xid # # COMMIT /* XID */
--e-e-e-e-e-e-e-e-e-e-e- >> T-trig << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> T-trig << -b-b-b-b-b-b-b-b-b-b-b-
-include/show_binlog_events.inc
-Log_name Pos Event_type Server_id End_log_pos Info
-master-bin.000001 # Query # # BEGIN
-master-bin.000001 # Query # # use `test`; INSERT INTO tt_5(trans_id, stmt_id) VALUES (38, 1)
-master-bin.000001 # Xid # # COMMIT /* XID */
--e-e-e-e-e-e-e-e-e-e-e- >> T-trig << -e-e-e-e-e-e-e-e-e-e-e-
-
-include/rpl_reconnect.inc
-START SLAVE;
-include/wait_for_slave_to_start.inc
-
-
-
-STOP SLAVE SQL_THREAD;
-include/wait_for_slave_sql_to_stop.inc
-SET GLOBAL debug="d,crash_after_commit_before_update_pos";;
-FAILURE d,crash_after_commit_before_update_pos and OUTCOME O2
-rpl_mixing_engines.inc [commands=T-func]
--b-b-b-b-b-b-b-b-b-b-b- >> T-func << -b-b-b-b-b-b-b-b-b-b-b-
-SELECT fc_i_tt_5_suc (39, 1);
-fc_i_tt_5_suc (39, 1)
-fc_i_tt_5_suc
-include/show_binlog_events.inc
-Log_name Pos Event_type Server_id End_log_pos Info
-master-bin.000001 # Query # # BEGIN
-master-bin.000001 # Query # # use `test`; SELECT `test`.`fc_i_tt_5_suc`(39,1)
-master-bin.000001 # Xid # # COMMIT /* XID */
--e-e-e-e-e-e-e-e-e-e-e- >> T-func << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> T-func << -b-b-b-b-b-b-b-b-b-b-b-
-include/show_binlog_events.inc
-Log_name Pos Event_type Server_id End_log_pos Info
-master-bin.000001 # Query # # BEGIN
-master-bin.000001 # Query # # use `test`; SELECT `test`.`fc_i_tt_5_suc`(39,1)
-master-bin.000001 # Xid # # COMMIT /* XID */
--e-e-e-e-e-e-e-e-e-e-e- >> T-func << -e-e-e-e-e-e-e-e-e-e-e-
-
-include/rpl_reconnect.inc
-START SLAVE;
-include/wait_for_slave_to_start.inc
-
-
-
-STOP SLAVE SQL_THREAD;
-include/wait_for_slave_sql_to_stop.inc
-SET GLOBAL debug="d,crash_after_commit_before_update_pos";;
-FAILURE d,crash_after_commit_before_update_pos and OUTCOME O2
-rpl_mixing_engines.inc [commands=T-proc]
--b-b-b-b-b-b-b-b-b-b-b- >> T-proc << -b-b-b-b-b-b-b-b-b-b-b-
-CALL pc_i_tt_5_suc (40, 1);
-include/show_binlog_events.inc
-Log_name Pos Event_type Server_id End_log_pos Info
-master-bin.000001 # Query # # BEGIN
-master-bin.000001 # Query # # use `test`; INSERT INTO tt_5(trans_id, stmt_id) VALUES ( NAME_CONST('p_trans_id',40), NAME_CONST('in_stmt_id',1))
-master-bin.000001 # Xid # # COMMIT /* XID */
-master-bin.000001 # Query # # BEGIN
-master-bin.000001 # Query # # use `test`; INSERT INTO tt_5(trans_id, stmt_id) VALUES ( NAME_CONST('p_trans_id',40), NAME_CONST('in_stmt_id',1) + 1)
-master-bin.000001 # Xid # # COMMIT /* XID */
--e-e-e-e-e-e-e-e-e-e-e- >> T-proc << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> T-proc << -b-b-b-b-b-b-b-b-b-b-b-
-include/show_binlog_events.inc
-Log_name Pos Event_type Server_id End_log_pos Info
-master-bin.000001 # Query # # BEGIN
-master-bin.000001 # Query # # use `test`; INSERT INTO tt_5(trans_id, stmt_id) VALUES ( NAME_CONST('p_trans_id',40), NAME_CONST('in_stmt_id',1))
-master-bin.000001 # Xid # # COMMIT /* XID */
-master-bin.000001 # Query # # BEGIN
-master-bin.000001 # Query # # use `test`; INSERT INTO tt_5(trans_id, stmt_id) VALUES ( NAME_CONST('p_trans_id',40), NAME_CONST('in_stmt_id',1) + 1)
-master-bin.000001 # Xid # # COMMIT /* XID */
--e-e-e-e-e-e-e-e-e-e-e- >> T-proc << -e-e-e-e-e-e-e-e-e-e-e-
-
-include/rpl_reconnect.inc
-START SLAVE;
-include/wait_for_slave_to_start.inc
-
-
-
-STOP SLAVE SQL_THREAD;
-include/wait_for_slave_sql_to_stop.inc
-SET GLOBAL debug="d,crash_after_commit_before_update_pos";;
-FAILURE d,crash_after_commit_before_update_pos and OUTCOME O2
-rpl_mixing_engines.inc [commands=B T T-trig C]
--b-b-b-b-b-b-b-b-b-b-b- >> B << -b-b-b-b-b-b-b-b-b-b-b-
-BEGIN;
-include/show_binlog_events.inc
--e-e-e-e-e-e-e-e-e-e-e- >> B << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> T << -b-b-b-b-b-b-b-b-b-b-b-
-INSERT INTO tt_1(trans_id, stmt_id) VALUES (41, 2);
-include/show_binlog_events.inc
--e-e-e-e-e-e-e-e-e-e-e- >> T << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> T-trig << -b-b-b-b-b-b-b-b-b-b-b-
-INSERT INTO tt_5(trans_id, stmt_id) VALUES (41, 4);
-include/show_binlog_events.inc
--e-e-e-e-e-e-e-e-e-e-e- >> T-trig << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> C << -b-b-b-b-b-b-b-b-b-b-b-
-COMMIT;
-include/show_binlog_events.inc
-Log_name Pos Event_type Server_id End_log_pos Info
-master-bin.000001 # Query # # BEGIN
-master-bin.000001 # Query # # use `test`; INSERT INTO tt_1(trans_id, stmt_id) VALUES (41, 2)
-master-bin.000001 # Query # # use `test`; INSERT INTO tt_5(trans_id, stmt_id) VALUES (41, 4)
-master-bin.000001 # Xid # # COMMIT /* XID */
--e-e-e-e-e-e-e-e-e-e-e- >> C << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> B T T-trig C << -b-b-b-b-b-b-b-b-b-b-b-
-include/show_binlog_events.inc
-Log_name Pos Event_type Server_id End_log_pos Info
-master-bin.000001 # Query # # BEGIN
-master-bin.000001 # Query # # use `test`; INSERT INTO tt_1(trans_id, stmt_id) VALUES (41, 2)
-master-bin.000001 # Query # # use `test`; INSERT INTO tt_5(trans_id, stmt_id) VALUES (41, 4)
-master-bin.000001 # Xid # # COMMIT /* XID */
--e-e-e-e-e-e-e-e-e-e-e- >> B T T-trig C << -e-e-e-e-e-e-e-e-e-e-e-
-
-include/rpl_reconnect.inc
-START SLAVE;
-include/wait_for_slave_to_start.inc
-
-
-
-STOP SLAVE SQL_THREAD;
-include/wait_for_slave_sql_to_stop.inc
-SET GLOBAL debug="d,crash_after_commit_before_update_pos";;
-FAILURE d,crash_after_commit_before_update_pos and OUTCOME O2
-rpl_mixing_engines.inc [commands=B T T-func C]
--b-b-b-b-b-b-b-b-b-b-b- >> B << -b-b-b-b-b-b-b-b-b-b-b-
-BEGIN;
-include/show_binlog_events.inc
--e-e-e-e-e-e-e-e-e-e-e- >> B << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> T << -b-b-b-b-b-b-b-b-b-b-b-
-INSERT INTO tt_1(trans_id, stmt_id) VALUES (42, 2);
-include/show_binlog_events.inc
--e-e-e-e-e-e-e-e-e-e-e- >> T << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> T-func << -b-b-b-b-b-b-b-b-b-b-b-
-SELECT fc_i_tt_5_suc (42, 4);
-fc_i_tt_5_suc (42, 4)
-fc_i_tt_5_suc
-include/show_binlog_events.inc
--e-e-e-e-e-e-e-e-e-e-e- >> T-func << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> C << -b-b-b-b-b-b-b-b-b-b-b-
-COMMIT;
-include/show_binlog_events.inc
-Log_name Pos Event_type Server_id End_log_pos Info
-master-bin.000001 # Query # # BEGIN
-master-bin.000001 # Query # # use `test`; INSERT INTO tt_1(trans_id, stmt_id) VALUES (42, 2)
-master-bin.000001 # Query # # use `test`; SELECT `test`.`fc_i_tt_5_suc`(42,4)
-master-bin.000001 # Xid # # COMMIT /* XID */
--e-e-e-e-e-e-e-e-e-e-e- >> C << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> B T T-func C << -b-b-b-b-b-b-b-b-b-b-b-
-include/show_binlog_events.inc
-Log_name Pos Event_type Server_id End_log_pos Info
-master-bin.000001 # Query # # BEGIN
-master-bin.000001 # Query # # use `test`; INSERT INTO tt_1(trans_id, stmt_id) VALUES (42, 2)
-master-bin.000001 # Query # # use `test`; SELECT `test`.`fc_i_tt_5_suc`(42,4)
-master-bin.000001 # Xid # # COMMIT /* XID */
--e-e-e-e-e-e-e-e-e-e-e- >> B T T-func C << -e-e-e-e-e-e-e-e-e-e-e-
-
-include/rpl_reconnect.inc
-START SLAVE;
-include/wait_for_slave_to_start.inc
-
-
-
-STOP SLAVE SQL_THREAD;
-include/wait_for_slave_sql_to_stop.inc
-SET GLOBAL debug="d,crash_after_commit_before_update_pos";;
-FAILURE d,crash_after_commit_before_update_pos and OUTCOME O2
-rpl_mixing_engines.inc [commands=B T T-proc C]
--b-b-b-b-b-b-b-b-b-b-b- >> B << -b-b-b-b-b-b-b-b-b-b-b-
-BEGIN;
-include/show_binlog_events.inc
--e-e-e-e-e-e-e-e-e-e-e- >> B << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> T << -b-b-b-b-b-b-b-b-b-b-b-
-INSERT INTO tt_1(trans_id, stmt_id) VALUES (43, 2);
-include/show_binlog_events.inc
--e-e-e-e-e-e-e-e-e-e-e- >> T << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> T-proc << -b-b-b-b-b-b-b-b-b-b-b-
-CALL pc_i_tt_5_suc (43, 4);
-include/show_binlog_events.inc
--e-e-e-e-e-e-e-e-e-e-e- >> T-proc << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> C << -b-b-b-b-b-b-b-b-b-b-b-
-COMMIT;
-include/show_binlog_events.inc
-Log_name Pos Event_type Server_id End_log_pos Info
-master-bin.000001 # Query # # BEGIN
-master-bin.000001 # Query # # use `test`; INSERT INTO tt_1(trans_id, stmt_id) VALUES (43, 2)
-master-bin.000001 # Query # # use `test`; INSERT INTO tt_5(trans_id, stmt_id) VALUES ( NAME_CONST('p_trans_id',43), NAME_CONST('in_stmt_id',1))
-master-bin.000001 # Query # # use `test`; INSERT INTO tt_5(trans_id, stmt_id) VALUES ( NAME_CONST('p_trans_id',43), NAME_CONST('in_stmt_id',1) + 1)
-master-bin.000001 # Xid # # COMMIT /* XID */
--e-e-e-e-e-e-e-e-e-e-e- >> C << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> B T T-proc C << -b-b-b-b-b-b-b-b-b-b-b-
-include/show_binlog_events.inc
-Log_name Pos Event_type Server_id End_log_pos Info
-master-bin.000001 # Query # # BEGIN
-master-bin.000001 # Query # # use `test`; INSERT INTO tt_1(trans_id, stmt_id) VALUES (43, 2)
-master-bin.000001 # Query # # use `test`; INSERT INTO tt_5(trans_id, stmt_id) VALUES ( NAME_CONST('p_trans_id',43), NAME_CONST('in_stmt_id',1))
-master-bin.000001 # Query # # use `test`; INSERT INTO tt_5(trans_id, stmt_id) VALUES ( NAME_CONST('p_trans_id',43), NAME_CONST('in_stmt_id',1) + 1)
-master-bin.000001 # Xid # # COMMIT /* XID */
--e-e-e-e-e-e-e-e-e-e-e- >> B T T-proc C << -e-e-e-e-e-e-e-e-e-e-e-
-
-include/rpl_reconnect.inc
-START SLAVE;
-include/wait_for_slave_to_start.inc
-
-
-
-STOP SLAVE SQL_THREAD;
-include/wait_for_slave_sql_to_stop.inc
-SET GLOBAL debug="d,crash_after_commit_before_update_pos";;
-FAILURE d,crash_after_commit_before_update_pos and OUTCOME O2
-rpl_mixing_engines.inc [commands=B T-trig T C]
--b-b-b-b-b-b-b-b-b-b-b- >> B << -b-b-b-b-b-b-b-b-b-b-b-
-BEGIN;
-include/show_binlog_events.inc
--e-e-e-e-e-e-e-e-e-e-e- >> B << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> T-trig << -b-b-b-b-b-b-b-b-b-b-b-
-INSERT INTO tt_5(trans_id, stmt_id) VALUES (44, 2);
-include/show_binlog_events.inc
--e-e-e-e-e-e-e-e-e-e-e- >> T-trig << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> T << -b-b-b-b-b-b-b-b-b-b-b-
-INSERT INTO tt_1(trans_id, stmt_id) VALUES (44, 4);
-include/show_binlog_events.inc
--e-e-e-e-e-e-e-e-e-e-e- >> T << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> C << -b-b-b-b-b-b-b-b-b-b-b-
-COMMIT;
-include/show_binlog_events.inc
-Log_name Pos Event_type Server_id End_log_pos Info
-master-bin.000001 # Query # # BEGIN
-master-bin.000001 # Query # # use `test`; INSERT INTO tt_5(trans_id, stmt_id) VALUES (44, 2)
-master-bin.000001 # Query # # use `test`; INSERT INTO tt_1(trans_id, stmt_id) VALUES (44, 4)
-master-bin.000001 # Xid # # COMMIT /* XID */
--e-e-e-e-e-e-e-e-e-e-e- >> C << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> B T-trig T C << -b-b-b-b-b-b-b-b-b-b-b-
-include/show_binlog_events.inc
-Log_name Pos Event_type Server_id End_log_pos Info
-master-bin.000001 # Query # # BEGIN
-master-bin.000001 # Query # # use `test`; INSERT INTO tt_5(trans_id, stmt_id) VALUES (44, 2)
-master-bin.000001 # Query # # use `test`; INSERT INTO tt_1(trans_id, stmt_id) VALUES (44, 4)
-master-bin.000001 # Xid # # COMMIT /* XID */
--e-e-e-e-e-e-e-e-e-e-e- >> B T-trig T C << -e-e-e-e-e-e-e-e-e-e-e-
-
-include/rpl_reconnect.inc
-START SLAVE;
-include/wait_for_slave_to_start.inc
-
-
-
-STOP SLAVE SQL_THREAD;
-include/wait_for_slave_sql_to_stop.inc
-SET GLOBAL debug="d,crash_after_commit_before_update_pos";;
-FAILURE d,crash_after_commit_before_update_pos and OUTCOME O2
-rpl_mixing_engines.inc [commands=B T-func T C]
--b-b-b-b-b-b-b-b-b-b-b- >> B << -b-b-b-b-b-b-b-b-b-b-b-
-BEGIN;
-include/show_binlog_events.inc
--e-e-e-e-e-e-e-e-e-e-e- >> B << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> T-func << -b-b-b-b-b-b-b-b-b-b-b-
-SELECT fc_i_tt_5_suc (45, 2);
-fc_i_tt_5_suc (45, 2)
-fc_i_tt_5_suc
-include/show_binlog_events.inc
--e-e-e-e-e-e-e-e-e-e-e- >> T-func << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> T << -b-b-b-b-b-b-b-b-b-b-b-
-INSERT INTO tt_1(trans_id, stmt_id) VALUES (45, 4);
-include/show_binlog_events.inc
--e-e-e-e-e-e-e-e-e-e-e- >> T << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> C << -b-b-b-b-b-b-b-b-b-b-b-
-COMMIT;
-include/show_binlog_events.inc
-Log_name Pos Event_type Server_id End_log_pos Info
-master-bin.000001 # Query # # BEGIN
-master-bin.000001 # Query # # use `test`; SELECT `test`.`fc_i_tt_5_suc`(45,2)
-master-bin.000001 # Query # # use `test`; INSERT INTO tt_1(trans_id, stmt_id) VALUES (45, 4)
-master-bin.000001 # Xid # # COMMIT /* XID */
--e-e-e-e-e-e-e-e-e-e-e- >> C << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> B T-func T C << -b-b-b-b-b-b-b-b-b-b-b-
-include/show_binlog_events.inc
-Log_name Pos Event_type Server_id End_log_pos Info
-master-bin.000001 # Query # # BEGIN
-master-bin.000001 # Query # # use `test`; SELECT `test`.`fc_i_tt_5_suc`(45,2)
-master-bin.000001 # Query # # use `test`; INSERT INTO tt_1(trans_id, stmt_id) VALUES (45, 4)
-master-bin.000001 # Xid # # COMMIT /* XID */
--e-e-e-e-e-e-e-e-e-e-e- >> B T-func T C << -e-e-e-e-e-e-e-e-e-e-e-
-
-include/rpl_reconnect.inc
-START SLAVE;
-include/wait_for_slave_to_start.inc
-
-
-
-STOP SLAVE SQL_THREAD;
-include/wait_for_slave_sql_to_stop.inc
-SET GLOBAL debug="d,crash_after_commit_before_update_pos";;
-FAILURE d,crash_after_commit_before_update_pos and OUTCOME O2
-rpl_mixing_engines.inc [commands=B T-proc T C]
--b-b-b-b-b-b-b-b-b-b-b- >> B << -b-b-b-b-b-b-b-b-b-b-b-
-BEGIN;
-include/show_binlog_events.inc
--e-e-e-e-e-e-e-e-e-e-e- >> B << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> T-proc << -b-b-b-b-b-b-b-b-b-b-b-
-CALL pc_i_tt_5_suc (46, 2);
-include/show_binlog_events.inc
--e-e-e-e-e-e-e-e-e-e-e- >> T-proc << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> T << -b-b-b-b-b-b-b-b-b-b-b-
-INSERT INTO tt_1(trans_id, stmt_id) VALUES (46, 4);
-include/show_binlog_events.inc
--e-e-e-e-e-e-e-e-e-e-e- >> T << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> C << -b-b-b-b-b-b-b-b-b-b-b-
-COMMIT;
-include/show_binlog_events.inc
-Log_name Pos Event_type Server_id End_log_pos Info
-master-bin.000001 # Query # # BEGIN
-master-bin.000001 # Query # # use `test`; INSERT INTO tt_5(trans_id, stmt_id) VALUES ( NAME_CONST('p_trans_id',46), NAME_CONST('in_stmt_id',1))
-master-bin.000001 # Query # # use `test`; INSERT INTO tt_5(trans_id, stmt_id) VALUES ( NAME_CONST('p_trans_id',46), NAME_CONST('in_stmt_id',1) + 1)
-master-bin.000001 # Query # # use `test`; INSERT INTO tt_1(trans_id, stmt_id) VALUES (46, 4)
-master-bin.000001 # Xid # # COMMIT /* XID */
--e-e-e-e-e-e-e-e-e-e-e- >> C << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> B T-proc T C << -b-b-b-b-b-b-b-b-b-b-b-
-include/show_binlog_events.inc
-Log_name Pos Event_type Server_id End_log_pos Info
-master-bin.000001 # Query # # BEGIN
-master-bin.000001 # Query # # use `test`; INSERT INTO tt_5(trans_id, stmt_id) VALUES ( NAME_CONST('p_trans_id',46), NAME_CONST('in_stmt_id',1))
-master-bin.000001 # Query # # use `test`; INSERT INTO tt_5(trans_id, stmt_id) VALUES ( NAME_CONST('p_trans_id',46), NAME_CONST('in_stmt_id',1) + 1)
-master-bin.000001 # Query # # use `test`; INSERT INTO tt_1(trans_id, stmt_id) VALUES (46, 4)
-master-bin.000001 # Xid # # COMMIT /* XID */
--e-e-e-e-e-e-e-e-e-e-e- >> B T-proc T C << -e-e-e-e-e-e-e-e-e-e-e-
-
-include/rpl_reconnect.inc
-START SLAVE;
-include/wait_for_slave_to_start.inc
-###################################################################################
-# EXECUTE CASES CRASHING THE BEGIN/COMMIT
-###################################################################################
-
-
-
-STOP SLAVE SQL_THREAD;
-include/wait_for_slave_sql_to_stop.inc
-SET GLOBAL debug="d,crash_after_commit_and_update_pos";;
-FAILURE d,crash_after_commit_and_update_pos and OUTCOME O2
-rpl_mixing_engines.inc [commands=N]
--b-b-b-b-b-b-b-b-b-b-b- >> N << -b-b-b-b-b-b-b-b-b-b-b-
-INSERT INTO nt_1(trans_id, stmt_id) VALUES (47, 1);
-include/show_binlog_events.inc
-Log_name Pos Event_type Server_id End_log_pos Info
-master-bin.000001 # Query # # BEGIN
-master-bin.000001 # Query # # use `test`; INSERT INTO nt_1(trans_id, stmt_id) VALUES (47, 1)
-master-bin.000001 # Query # # COMMIT
--e-e-e-e-e-e-e-e-e-e-e- >> N << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> N << -b-b-b-b-b-b-b-b-b-b-b-
-include/show_binlog_events.inc
-Log_name Pos Event_type Server_id End_log_pos Info
-master-bin.000001 # Query # # BEGIN
-master-bin.000001 # Query # # use `test`; INSERT INTO nt_1(trans_id, stmt_id) VALUES (47, 1)
-master-bin.000001 # Query # # COMMIT
--e-e-e-e-e-e-e-e-e-e-e- >> N << -e-e-e-e-e-e-e-e-e-e-e-
-
-include/rpl_reconnect.inc
-START SLAVE;
-include/wait_for_slave_to_start.inc
-
-
-
-STOP SLAVE SQL_THREAD;
-include/wait_for_slave_sql_to_stop.inc
-SET GLOBAL debug="d,crash_after_commit_and_update_pos";;
-FAILURE d,crash_after_commit_and_update_pos and OUTCOME O2
-rpl_mixing_engines.inc [commands=N-trig]
--b-b-b-b-b-b-b-b-b-b-b- >> N-trig << -b-b-b-b-b-b-b-b-b-b-b-
-INSERT INTO nt_5(trans_id, stmt_id) VALUES (48, 1);
-include/show_binlog_events.inc
-Log_name Pos Event_type Server_id End_log_pos Info
-master-bin.000001 # Query # # BEGIN
-master-bin.000001 # Query # # use `test`; INSERT INTO nt_5(trans_id, stmt_id) VALUES (48, 1)
-master-bin.000001 # Query # # COMMIT
--e-e-e-e-e-e-e-e-e-e-e- >> N-trig << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> N-trig << -b-b-b-b-b-b-b-b-b-b-b-
-include/show_binlog_events.inc
-Log_name Pos Event_type Server_id End_log_pos Info
-master-bin.000001 # Query # # BEGIN
-master-bin.000001 # Query # # use `test`; INSERT INTO nt_5(trans_id, stmt_id) VALUES (48, 1)
-master-bin.000001 # Query # # COMMIT
--e-e-e-e-e-e-e-e-e-e-e- >> N-trig << -e-e-e-e-e-e-e-e-e-e-e-
-
-include/rpl_reconnect.inc
-START SLAVE;
-include/wait_for_slave_to_start.inc
-
-
-
-STOP SLAVE SQL_THREAD;
-include/wait_for_slave_sql_to_stop.inc
-SET GLOBAL debug="d,crash_after_commit_and_update_pos";;
-FAILURE d,crash_after_commit_and_update_pos and OUTCOME O2
-rpl_mixing_engines.inc [commands=N-func]
--b-b-b-b-b-b-b-b-b-b-b- >> N-func << -b-b-b-b-b-b-b-b-b-b-b-
-SELECT fc_i_nt_5_suc (49, 1);
-fc_i_nt_5_suc (49, 1)
-fc_i_nt_5_suc
-include/show_binlog_events.inc
-Log_name Pos Event_type Server_id End_log_pos Info
-master-bin.000001 # Query # # BEGIN
-master-bin.000001 # Query # # use `test`; SELECT `test`.`fc_i_nt_5_suc`(49,1)
-master-bin.000001 # Query # # COMMIT
--e-e-e-e-e-e-e-e-e-e-e- >> N-func << -e-e-e-e-e-e-e-e-e-e-e-
--b-b-b-b-b-b-b-b-b-b-b- >> N-func << -b-b-b-b-b-b-b-b-b-b-b-
-include/show_binlog_events.inc
-Log_name Pos Event_type Server_id End_log_pos Info
-master-bin.000001 # Query # # BEGIN
-master-bin.000001 # Query # # use `test`; SELECT `test`.`fc_i_nt_5_suc`(49,1)
-master-bin.000001 # Query # # COMMIT
--e-e-e-e-e-e-e-e-e-e-e- >> N-func << -e-e-e-e-e-e-e-e-e-e-e-
-
-include/rpl_reconnect.inc
-START SLAVE;
-include/wait_for_slave_to_start.inc
-###################################################################################
-# CHECK CONSISTENCY
-###################################################################################
-include/sync_slave_sql_with_master.inc
-###################################################################################
-# CLEAN
-###################################################################################
-rpl_mixing_engines.inc [commands=clean]
-DROP TABLE tt_1;
-DROP TABLE tt_2;
-DROP TABLE tt_3;
-DROP TABLE tt_4;
-DROP TABLE tt_5;
-DROP TABLE tt_6;
-DROP TABLE nt_1;
-DROP TABLE nt_2;
-DROP TABLE nt_3;
-DROP TABLE nt_4;
-DROP TABLE nt_5;
-DROP TABLE nt_6;
-DROP PROCEDURE pc_i_tt_5_suc;
-DROP PROCEDURE pc_i_nt_5_suc;
-DROP FUNCTION fc_i_tt_5_suc;
-DROP FUNCTION fc_i_nt_5_suc;
-DROP FUNCTION fc_i_nt_3_tt_3_suc;
-include/rpl_end.inc
diff --git a/storage/tokudb/mysql-test/rpl/r/rpl_truncate_3tokudb.result b/storage/tokudb/mysql-test/rpl/r/rpl_truncate_3tokudb.result
index 5915abe9e94..5d3737817a1 100644
--- a/storage/tokudb/mysql-test/rpl/r/rpl_truncate_3tokudb.result
+++ b/storage/tokudb/mysql-test/rpl/r/rpl_truncate_3tokudb.result
@@ -1,35 +1,47 @@
include/master-slave.inc
[connection master]
include/rpl_reset.inc
-**** On Master ****
+connection master;
CREATE TABLE t1 (a INT, b LONG) ENGINE=TokuDB;
INSERT INTO t1 VALUES (1,1), (2,2);
-**** On Master ****
+connection slave;
+connection master;
TRUNCATE TABLE t1;
+connection slave;
include/diff_tables.inc [master:t1, slave:t1]
==== Test using a table with delete triggers ====
-**** On Master ****
+connection master;
SET @count := 1;
CREATE TABLE t2 (a INT, b LONG) ENGINE=TokuDB;
CREATE TRIGGER trg1 BEFORE DELETE ON t1 FOR EACH ROW SET @count := @count + 1;
-**** On Master ****
+connection slave;
+connection master;
TRUNCATE TABLE t1;
+connection slave;
include/diff_tables.inc [master:t2, slave:t2]
+connection master;
DROP TABLE t1,t2;
+connection slave;
include/rpl_reset.inc
-**** On Master ****
+connection master;
CREATE TABLE t1 (a INT, b LONG) ENGINE=TokuDB;
INSERT INTO t1 VALUES (1,1), (2,2);
-**** On Master ****
+connection slave;
+connection master;
DELETE FROM t1;
+connection slave;
include/diff_tables.inc [master:t1, slave:t1]
==== Test using a table with delete triggers ====
-**** On Master ****
+connection master;
SET @count := 1;
CREATE TABLE t2 (a INT, b LONG) ENGINE=TokuDB;
CREATE TRIGGER trg1 BEFORE DELETE ON t1 FOR EACH ROW SET @count := @count + 1;
-**** On Master ****
+connection slave;
+connection master;
DELETE FROM t1;
+connection slave;
include/diff_tables.inc [master:t2, slave:t2]
+connection master;
DROP TABLE t1,t2;
+connection slave;
include/rpl_end.inc
diff --git a/storage/tokudb/mysql-test/rpl/r/rpl_typeconv_tokudb.result b/storage/tokudb/mysql-test/rpl/r/rpl_typeconv_tokudb.result
index daceec6180a..636792ce4db 100644
--- a/storage/tokudb/mysql-test/rpl/r/rpl_typeconv_tokudb.result
+++ b/storage/tokudb/mysql-test/rpl/r/rpl_typeconv_tokudb.result
@@ -1,10 +1,15 @@
include/master-slave.inc
[connection master]
+connection slave;
SET @saved_slave_type_conversions = @@GLOBAL.SLAVE_TYPE_CONVERSIONS;
SET GLOBAL SLAVE_TYPE_CONVERSIONS = '';
+connection master;
CREATE TABLE t1(b1 BIT(1), b2 BIT(2), b3 BIT(3)) ENGINE=TokuDB;
INSERT INTO t1 VALUES (b'0', b'01', b'101');
+connection slave;
include/diff_tables.inc [master:t1, slave:t1]
+connection master;
DROP TABLE t1;
+connection slave;
SET GLOBAL SLAVE_TYPE_CONVERSIONS = @saved_slave_type_conversions;
include/rpl_end.inc
diff --git a/storage/tokudb/mysql-test/rpl/r/rpl_xa_interleave.result b/storage/tokudb/mysql-test/rpl/r/rpl_xa_interleave.result
index 53564ab0fe4..98ded9d2097 100644
--- a/storage/tokudb/mysql-test/rpl/r/rpl_xa_interleave.result
+++ b/storage/tokudb/mysql-test/rpl/r/rpl_xa_interleave.result
@@ -1,10 +1,12 @@
include/master-slave.inc
[connection master]
CREATE TABLE t1(`a` INT) ENGINE=TokuDB;
+connection master;
XA START 'x1';
INSERT INTO t1 VALUES (1);
XA END 'x1';
XA PREPARE 'x1';
+connection master1;
BEGIN;
INSERT INTO t1 VALUES (10);
COMMIT;
@@ -12,8 +14,11 @@ XA START 'y1';
INSERT INTO t1 VALUES (2);
XA END 'y1';
XA PREPARE 'y1';
+connection master;
XA COMMIT 'x1';
+connection master1;
XA COMMIT 'y1';
+connection master;
BEGIN;
INSERT INTO t1 VALUES (11);
COMMIT;
@@ -21,16 +26,21 @@ XA START 'x2';
INSERT INTO t1 VALUES (3);
XA END 'x2';
XA PREPARE 'x2';
+connection master1;
XA START 'y2';
INSERT INTO t1 VALUES (4);
XA END 'y2';
XA PREPARE 'y2';
+connection master;
XA COMMIT 'x2';
+connection master1;
XA COMMIT 'y2';
+connection master;
XA START 'x1';
INSERT INTO t1 VALUES (1);
XA END 'x1';
XA PREPARE 'x1';
+connection master1;
BEGIN;
INSERT INTO t1 VALUES (10);
COMMIT;
@@ -38,8 +48,11 @@ XA START 'y1';
INSERT INTO t1 VALUES (2);
XA END 'y1';
XA PREPARE 'y1';
+connection master;
XA ROLLBACK 'x1';
+connection master1;
XA ROLLBACK 'y1';
+connection master;
BEGIN;
INSERT INTO t1 VALUES (11);
COMMIT;
@@ -47,13 +60,19 @@ XA START 'x2';
INSERT INTO t1 VALUES (3);
XA END 'x2';
XA PREPARE 'x2';
+connection master1;
XA START 'y2';
INSERT INTO t1 VALUES (4);
XA END 'y2';
XA PREPARE 'y2';
+connection master;
XA ROLLBACK 'x2';
+connection master1;
XA ROLLBACK 'y2';
+connection master;
+connection slave;
TABLES t1 and t2 must be equal otherwise an error will be thrown.
include/diff_tables.inc [master:test.t1, slave:test.t1]
+connection master;
DROP TABLE t1;
include/rpl_end.inc
diff --git a/storage/tokudb/mysql-test/rpl/r/tokudb_innodb_xa_crash.result b/storage/tokudb/mysql-test/rpl/r/tokudb_innodb_xa_crash.result
index 78a66421446..aa95091b40b 100644
--- a/storage/tokudb/mysql-test/rpl/r/tokudb_innodb_xa_crash.result
+++ b/storage/tokudb/mysql-test/rpl/r/tokudb_innodb_xa_crash.result
@@ -2,12 +2,15 @@ include/master-slave.inc
[connection master]
CREATE TABLE t1(`a` INT) ENGINE=TokuDB;
CREATE TABLE t2(`a` INT) ENGINE=InnoDB;
+connection master;
begin;
insert into t1 values (1);
insert into t2 values (1);
commit;
+connection slave;
TABLES t1 and t2 must be equal otherwise an error will be thrown.
include/diff_tables.inc [master:test.t1, slave:test.t1]
include/diff_tables.inc [master:test.t2, slave:test.t2]
+connection master;
drop table t1,t2;
include/rpl_end.inc
diff --git a/storage/tokudb/mysql-test/rpl/t/rpl_tokudb_row_crash_safe-master.opt b/storage/tokudb/mysql-test/rpl/t/rpl_tokudb_row_crash_safe-master.opt
deleted file mode 100644
index e980c2de7ff..00000000000
--- a/storage/tokudb/mysql-test/rpl/t/rpl_tokudb_row_crash_safe-master.opt
+++ /dev/null
@@ -1 +0,0 @@
---transaction_isolation=READ-COMMITTED
diff --git a/storage/tokudb/mysql-test/rpl/t/rpl_tokudb_row_crash_safe-slave.opt b/storage/tokudb/mysql-test/rpl/t/rpl_tokudb_row_crash_safe-slave.opt
deleted file mode 100644
index 264c5c1a4b4..00000000000
--- a/storage/tokudb/mysql-test/rpl/t/rpl_tokudb_row_crash_safe-slave.opt
+++ /dev/null
@@ -1 +0,0 @@
---skip-slave-start --relay-log-info-repository=TABLE --relay-log-recovery=1 --transaction_isolation=READ-COMMITTED
diff --git a/storage/tokudb/mysql-test/rpl/t/rpl_tokudb_row_crash_safe.test b/storage/tokudb/mysql-test/rpl/t/rpl_tokudb_row_crash_safe.test
deleted file mode 100644
index 6bd79691528..00000000000
--- a/storage/tokudb/mysql-test/rpl/t/rpl_tokudb_row_crash_safe.test
+++ /dev/null
@@ -1,19 +0,0 @@
-# This test takes long time, so only run it with the --big-test mtr-flag.
---source include/big_test.inc
---source include/not_embedded.inc
---source include/not_valgrind.inc
---source include/have_debug.inc
---source include/have_tokudb.inc
---source include/have_binlog_format_row.inc
---source include/not_mts_slave_parallel_workers.inc
---source include/master-slave.inc
-
-call mtr.add_suppression('Attempting backtrace');
-call mtr.add_suppression("Recovery from master pos .* and file master-bin.000001");
-call mtr.add_suppression("Unsafe statement written to the binary log using statement format since BINLOG_FORMAT = STATEMENT");
-call mtr.add_suppression(".* InnoDB: Warning: allocated tablespace .*, old maximum was .*");
-
-let $engine_type=TokuDB;
-let $database_name=test;
---source extra/rpl_tests/rpl_crash_safe.test
---source include/rpl_end.inc
diff --git a/storage/tokudb/mysql-test/rpl/t/rpl_tokudb_stm_mixed_crash_safe-master.opt b/storage/tokudb/mysql-test/rpl/t/rpl_tokudb_stm_mixed_crash_safe-master.opt
deleted file mode 100644
index e980c2de7ff..00000000000
--- a/storage/tokudb/mysql-test/rpl/t/rpl_tokudb_stm_mixed_crash_safe-master.opt
+++ /dev/null
@@ -1 +0,0 @@
---transaction_isolation=READ-COMMITTED
diff --git a/storage/tokudb/mysql-test/rpl/t/rpl_tokudb_stm_mixed_crash_safe-slave.opt b/storage/tokudb/mysql-test/rpl/t/rpl_tokudb_stm_mixed_crash_safe-slave.opt
deleted file mode 100644
index 264c5c1a4b4..00000000000
--- a/storage/tokudb/mysql-test/rpl/t/rpl_tokudb_stm_mixed_crash_safe-slave.opt
+++ /dev/null
@@ -1 +0,0 @@
---skip-slave-start --relay-log-info-repository=TABLE --relay-log-recovery=1 --transaction_isolation=READ-COMMITTED
diff --git a/storage/tokudb/mysql-test/rpl/t/rpl_tokudb_stm_mixed_crash_safe.test b/storage/tokudb/mysql-test/rpl/t/rpl_tokudb_stm_mixed_crash_safe.test
deleted file mode 100644
index 724550fae4a..00000000000
--- a/storage/tokudb/mysql-test/rpl/t/rpl_tokudb_stm_mixed_crash_safe.test
+++ /dev/null
@@ -1,18 +0,0 @@
---source include/big_test.inc
---source include/not_embedded.inc
---source include/not_valgrind.inc
---source include/have_debug.inc
---source include/have_tokudb.inc
---source include/have_binlog_format_mixed_or_statement.inc
---source include/not_mts_slave_parallel_workers.inc
---source include/master-slave.inc
-
-call mtr.add_suppression('Attempting backtrace');
-call mtr.add_suppression("Recovery from master pos .* and file master-bin.000001");
-call mtr.add_suppression("Unsafe statement written to the binary log using statement format since BINLOG_FORMAT = STATEMENT");
-call mtr.add_suppression(".* InnoDB: Warning: allocated tablespace .*, old maximum was .*");
-
-let $engine_type=TokuDB;
-let $database_name=test;
---source extra/rpl_tests/rpl_crash_safe.test
---source include/rpl_end.inc
diff --git a/storage/tokudb/mysql-test/tokudb/disabled.def b/storage/tokudb/mysql-test/tokudb/disabled.def
index 7f354cd8ba6..4f759001151 100644
--- a/storage/tokudb/mysql-test/tokudb/disabled.def
+++ b/storage/tokudb/mysql-test/tokudb/disabled.def
@@ -30,3 +30,5 @@ i_s_tokudb_lock_waits_released: unstable, race conditions
i_s_tokudb_locks_released: unstable, race conditions
row_format: n/a
nonflushing_analyze_debug: Freezes in MariaDB 10.0
+change_column_all_1000_1: We are too lazy to fix this properly
+change_column_all_1000_10: We are too lazy to fix this properly
diff --git a/storage/tokudb/mysql-test/tokudb/r/background_job_manager.result b/storage/tokudb/mysql-test/tokudb/r/background_job_manager.result
index 8b53f89efa3..69b55582aa2 100644
--- a/storage/tokudb/mysql-test/tokudb/r/background_job_manager.result
+++ b/storage/tokudb/mysql-test/tokudb/r/background_job_manager.result
@@ -17,7 +17,7 @@ set global tokudb_debug_pause_background_job_manager=TRUE;
show create table information_schema.tokudb_background_job_status;
Table Create Table
TokuDB_background_job_status CREATE TEMPORARY TABLE `TokuDB_background_job_status` (
- `id` bigint(0) NOT NULL DEFAULT '0',
+ `id` bigint(0) NOT NULL DEFAULT 0,
`database_name` varchar(256) NOT NULL DEFAULT '',
`table_name` varchar(256) NOT NULL DEFAULT '',
`job_type` varchar(256) NOT NULL DEFAULT '',
diff --git a/storage/tokudb/mysql-test/tokudb/r/bf_delete_trigger.result b/storage/tokudb/mysql-test/tokudb/r/bf_delete_trigger.result
index 78bd8d2fe14..56e05f44f7d 100644
--- a/storage/tokudb/mysql-test/tokudb/r/bf_delete_trigger.result
+++ b/storage/tokudb/mysql-test/tokudb/r/bf_delete_trigger.result
@@ -5,37 +5,37 @@ insert into t values (1,0),(2,0),(3,0),(4,0);
create trigger t_delete before delete on t for each row insert into t values (1000000,0);
begin;
delete from t where x=0;
-ERROR HY000: Can't update table 't' in stored function/trigger because it is already used by statement which invoked this stored function/trigger.
+ERROR HY000: Can't update table 't' in stored function/trigger because it is already used by statement which invoked this stored function/trigger
rollback;
drop trigger t_delete;
create trigger t_delete after delete on t for each row insert into t values (1000000,0);
begin;
delete from t where x=0;
-ERROR HY000: Can't update table 't' in stored function/trigger because it is already used by statement which invoked this stored function/trigger.
+ERROR HY000: Can't update table 't' in stored function/trigger because it is already used by statement which invoked this stored function/trigger
rollback;
drop trigger t_delete;
create trigger t_delete before delete on t for each row delete from t where id=1000000;
begin;
delete from t where x=0;
-ERROR HY000: Can't update table 't' in stored function/trigger because it is already used by statement which invoked this stored function/trigger.
+ERROR HY000: Can't update table 't' in stored function/trigger because it is already used by statement which invoked this stored function/trigger
rollback;
drop trigger t_delete;
create trigger t_delete after delete on t for each row delete from t where id=1000000;
begin;
delete from t where x=0;
-ERROR HY000: Can't update table 't' in stored function/trigger because it is already used by statement which invoked this stored function/trigger.
+ERROR HY000: Can't update table 't' in stored function/trigger because it is already used by statement which invoked this stored function/trigger
rollback;
drop trigger t_delete;
create trigger t_delete before delete on t for each row update t set x=x+1 where id=1000000;
begin;
delete from t where x=0;
-ERROR HY000: Can't update table 't' in stored function/trigger because it is already used by statement which invoked this stored function/trigger.
+ERROR HY000: Can't update table 't' in stored function/trigger because it is already used by statement which invoked this stored function/trigger
rollback;
drop trigger t_delete;
create trigger t_delete after delete on t for each row update t set x=x+1 where id=10000000;
begin;
delete from t where x=0;
-ERROR HY000: Can't update table 't' in stored function/trigger because it is already used by statement which invoked this stored function/trigger.
+ERROR HY000: Can't update table 't' in stored function/trigger because it is already used by statement which invoked this stored function/trigger
rollback;
drop trigger t_delete;
create table count (count bigint not null);
diff --git a/storage/tokudb/mysql-test/tokudb/r/bf_insert_select_trigger.result b/storage/tokudb/mysql-test/tokudb/r/bf_insert_select_trigger.result
index 860d26602dd..bfbbb956f81 100644
--- a/storage/tokudb/mysql-test/tokudb/r/bf_insert_select_trigger.result
+++ b/storage/tokudb/mysql-test/tokudb/r/bf_insert_select_trigger.result
@@ -9,37 +9,37 @@ rollback;
create trigger t_trigger before insert on t for each row insert into s values (1000000,0);
begin;
insert into t select * from s;
-ERROR HY000: Can't update table 's' in stored function/trigger because it is already used by statement which invoked this stored function/trigger.
+ERROR HY000: Can't update table 's' in stored function/trigger because it is already used by statement which invoked this stored function/trigger
rollback;
drop trigger t_trigger;
create trigger t_trigger after insert on t for each row insert into s values (1000000,0);
begin;
insert into t select * from s;
-ERROR HY000: Can't update table 's' in stored function/trigger because it is already used by statement which invoked this stored function/trigger.
+ERROR HY000: Can't update table 's' in stored function/trigger because it is already used by statement which invoked this stored function/trigger
rollback;
drop trigger t_trigger;
create trigger t_trigger before insert on t for each row delete from s where id=1000000;
begin;
insert into t select * from s;
-ERROR HY000: Can't update table 's' in stored function/trigger because it is already used by statement which invoked this stored function/trigger.
+ERROR HY000: Can't update table 's' in stored function/trigger because it is already used by statement which invoked this stored function/trigger
rollback;
drop trigger t_trigger;
create trigger t_trigger after insert on t for each row delete from s where id=1000000;
begin;
insert into t select * from s;
-ERROR HY000: Can't update table 's' in stored function/trigger because it is already used by statement which invoked this stored function/trigger.
+ERROR HY000: Can't update table 's' in stored function/trigger because it is already used by statement which invoked this stored function/trigger
rollback;
drop trigger t_trigger;
create trigger t_trigger before insert on t for each row update s set x=x+1 where id=1000000;
begin;
insert into t select * from s;
-ERROR HY000: Can't update table 's' in stored function/trigger because it is already used by statement which invoked this stored function/trigger.
+ERROR HY000: Can't update table 's' in stored function/trigger because it is already used by statement which invoked this stored function/trigger
rollback;
drop trigger t_trigger;
create trigger t_trigger after insert on t for each row update s set x=x+1 where id=1000000;
begin;
insert into t select * from s;
-ERROR HY000: Can't update table 's' in stored function/trigger because it is already used by statement which invoked this stored function/trigger.
+ERROR HY000: Can't update table 's' in stored function/trigger because it is already used by statement which invoked this stored function/trigger
rollback;
drop trigger t_trigger;
drop table s,t;
diff --git a/storage/tokudb/mysql-test/tokudb/r/bf_insert_select_update_trigger.result b/storage/tokudb/mysql-test/tokudb/r/bf_insert_select_update_trigger.result
index d7588441d92..506faee533f 100644
--- a/storage/tokudb/mysql-test/tokudb/r/bf_insert_select_update_trigger.result
+++ b/storage/tokudb/mysql-test/tokudb/r/bf_insert_select_update_trigger.result
@@ -9,37 +9,37 @@ rollback;
create trigger t_trigger before insert on t for each row insert into s values (1000000,0);
begin;
insert into t select * from s on duplicate key update x=t.x+1;
-ERROR HY000: Can't update table 's' in stored function/trigger because it is already used by statement which invoked this stored function/trigger.
+ERROR HY000: Can't update table 's' in stored function/trigger because it is already used by statement which invoked this stored function/trigger
rollback;
drop trigger t_trigger;
create trigger t_trigger after insert on t for each row insert into s values (1000000,0);
begin;
insert into t select * from s on duplicate key update x=t.x+1;
-ERROR HY000: Can't update table 's' in stored function/trigger because it is already used by statement which invoked this stored function/trigger.
+ERROR HY000: Can't update table 's' in stored function/trigger because it is already used by statement which invoked this stored function/trigger
rollback;
drop trigger t_trigger;
create trigger t_trigger before insert on t for each row delete from s where id=1000000;
begin;
insert into t select * from s on duplicate key update x=t.x+1;
-ERROR HY000: Can't update table 's' in stored function/trigger because it is already used by statement which invoked this stored function/trigger.
+ERROR HY000: Can't update table 's' in stored function/trigger because it is already used by statement which invoked this stored function/trigger
rollback;
drop trigger t_trigger;
create trigger t_trigger after insert on t for each row delete from s where id=1000000;
begin;
insert into t select * from s on duplicate key update x=t.x+1;
-ERROR HY000: Can't update table 's' in stored function/trigger because it is already used by statement which invoked this stored function/trigger.
+ERROR HY000: Can't update table 's' in stored function/trigger because it is already used by statement which invoked this stored function/trigger
rollback;
drop trigger t_trigger;
create trigger t_trigger before insert on t for each row update s set x=x+1 where id=1000000;
begin;
insert into t select * from s on duplicate key update x=t.x+1;
-ERROR HY000: Can't update table 's' in stored function/trigger because it is already used by statement which invoked this stored function/trigger.
+ERROR HY000: Can't update table 's' in stored function/trigger because it is already used by statement which invoked this stored function/trigger
rollback;
drop trigger t_trigger;
create trigger t_trigger after insert on t for each row update s set x=x+1 where id=1000000;
begin;
insert into t select * from s on duplicate key update x=t.x+1;
-ERROR HY000: Can't update table 's' in stored function/trigger because it is already used by statement which invoked this stored function/trigger.
+ERROR HY000: Can't update table 's' in stored function/trigger because it is already used by statement which invoked this stored function/trigger
rollback;
drop trigger t_trigger;
truncate table t;
@@ -47,37 +47,37 @@ insert into t values (1,0);
create trigger t_trigger before insert on t for each row insert into s values (1000000,0);
begin;
insert into t select * from s on duplicate key update x=t.x+1;
-ERROR HY000: Can't update table 's' in stored function/trigger because it is already used by statement which invoked this stored function/trigger.
+ERROR HY000: Can't update table 's' in stored function/trigger because it is already used by statement which invoked this stored function/trigger
rollback;
drop trigger t_trigger;
create trigger t_trigger after insert on t for each row insert into s values (1000000,0);
begin;
insert into t select * from s on duplicate key update x=t.x+1;
-ERROR HY000: Can't update table 's' in stored function/trigger because it is already used by statement which invoked this stored function/trigger.
+ERROR HY000: Can't update table 's' in stored function/trigger because it is already used by statement which invoked this stored function/trigger
rollback;
drop trigger t_trigger;
create trigger t_trigger before insert on t for each row delete from s where id=1000000;
begin;
insert into t select * from s on duplicate key update x=t.x+1;
-ERROR HY000: Can't update table 's' in stored function/trigger because it is already used by statement which invoked this stored function/trigger.
+ERROR HY000: Can't update table 's' in stored function/trigger because it is already used by statement which invoked this stored function/trigger
rollback;
drop trigger t_trigger;
create trigger t_trigger after insert on t for each row delete from s where id=1000000;
begin;
insert into t select * from s on duplicate key update x=t.x+1;
-ERROR HY000: Can't update table 's' in stored function/trigger because it is already used by statement which invoked this stored function/trigger.
+ERROR HY000: Can't update table 's' in stored function/trigger because it is already used by statement which invoked this stored function/trigger
rollback;
drop trigger t_trigger;
create trigger t_trigger before insert on t for each row update s set x=x+1 where id=1000000;
begin;
insert into t select * from s on duplicate key update x=t.x+1;
-ERROR HY000: Can't update table 's' in stored function/trigger because it is already used by statement which invoked this stored function/trigger.
+ERROR HY000: Can't update table 's' in stored function/trigger because it is already used by statement which invoked this stored function/trigger
rollback;
drop trigger t_trigger;
create trigger t_trigger after insert on t for each row update s set x=x+1 where id=1000000;
begin;
insert into t select * from s on duplicate key update x=t.x+1;
-ERROR HY000: Can't update table 's' in stored function/trigger because it is already used by statement which invoked this stored function/trigger.
+ERROR HY000: Can't update table 's' in stored function/trigger because it is already used by statement which invoked this stored function/trigger
rollback;
drop trigger t_trigger;
truncate table t;
@@ -85,37 +85,37 @@ insert into t values (1,0);
create trigger t_trigger before update on t for each row insert into s values (1000000,0);
begin;
insert into t select * from s on duplicate key update x=t.x+1;
-ERROR HY000: Can't update table 's' in stored function/trigger because it is already used by statement which invoked this stored function/trigger.
+ERROR HY000: Can't update table 's' in stored function/trigger because it is already used by statement which invoked this stored function/trigger
rollback;
drop trigger t_trigger;
create trigger t_trigger after update on t for each row insert into s values (1000000,0);
begin;
insert into t select * from s on duplicate key update x=t.x+1;
-ERROR HY000: Can't update table 's' in stored function/trigger because it is already used by statement which invoked this stored function/trigger.
+ERROR HY000: Can't update table 's' in stored function/trigger because it is already used by statement which invoked this stored function/trigger
rollback;
drop trigger t_trigger;
create trigger t_trigger before update on t for each row delete from s where id=1000000;
begin;
insert into t select * from s on duplicate key update x=t.x+1;
-ERROR HY000: Can't update table 's' in stored function/trigger because it is already used by statement which invoked this stored function/trigger.
+ERROR HY000: Can't update table 's' in stored function/trigger because it is already used by statement which invoked this stored function/trigger
rollback;
drop trigger t_trigger;
create trigger t_trigger after update on t for each row delete from s where id=1000000;
begin;
insert into t select * from s on duplicate key update x=t.x+1;
-ERROR HY000: Can't update table 's' in stored function/trigger because it is already used by statement which invoked this stored function/trigger.
+ERROR HY000: Can't update table 's' in stored function/trigger because it is already used by statement which invoked this stored function/trigger
rollback;
drop trigger t_trigger;
create trigger t_trigger before update on t for each row update s set x=x+1 where id=1000000;
begin;
insert into t select * from s on duplicate key update x=t.x+1;
-ERROR HY000: Can't update table 's' in stored function/trigger because it is already used by statement which invoked this stored function/trigger.
+ERROR HY000: Can't update table 's' in stored function/trigger because it is already used by statement which invoked this stored function/trigger
rollback;
drop trigger t_trigger;
create trigger t_trigger after update on t for each row update s set x=x+1 where id=1000000;
begin;
insert into t select * from s on duplicate key update x=t.x+1;
-ERROR HY000: Can't update table 's' in stored function/trigger because it is already used by statement which invoked this stored function/trigger.
+ERROR HY000: Can't update table 's' in stored function/trigger because it is already used by statement which invoked this stored function/trigger
rollback;
drop trigger t_trigger;
drop table s,t;
diff --git a/storage/tokudb/mysql-test/tokudb/r/bf_replace_select_trigger.result b/storage/tokudb/mysql-test/tokudb/r/bf_replace_select_trigger.result
index acd17170301..12ecbba576f 100644
--- a/storage/tokudb/mysql-test/tokudb/r/bf_replace_select_trigger.result
+++ b/storage/tokudb/mysql-test/tokudb/r/bf_replace_select_trigger.result
@@ -9,37 +9,37 @@ rollback;
create trigger t_trigger before insert on t for each row replace into s values (1000000,0);
begin;
replace into t select * from s;
-ERROR HY000: Can't update table 's' in stored function/trigger because it is already used by statement which invoked this stored function/trigger.
+ERROR HY000: Can't update table 's' in stored function/trigger because it is already used by statement which invoked this stored function/trigger
rollback;
drop trigger t_trigger;
create trigger t_trigger after insert on t for each row replace into s values (1000000,0);
begin;
replace into t select * from s;
-ERROR HY000: Can't update table 's' in stored function/trigger because it is already used by statement which invoked this stored function/trigger.
+ERROR HY000: Can't update table 's' in stored function/trigger because it is already used by statement which invoked this stored function/trigger
rollback;
drop trigger t_trigger;
create trigger t_trigger before insert on t for each row delete from s where id=1000000;
begin;
replace into t select * from s;
-ERROR HY000: Can't update table 's' in stored function/trigger because it is already used by statement which invoked this stored function/trigger.
+ERROR HY000: Can't update table 's' in stored function/trigger because it is already used by statement which invoked this stored function/trigger
rollback;
drop trigger t_trigger;
create trigger t_trigger after insert on t for each row delete from s where id=1000000;
begin;
replace into t select * from s;
-ERROR HY000: Can't update table 's' in stored function/trigger because it is already used by statement which invoked this stored function/trigger.
+ERROR HY000: Can't update table 's' in stored function/trigger because it is already used by statement which invoked this stored function/trigger
rollback;
drop trigger t_trigger;
create trigger t_trigger before insert on t for each row update s set x=x+1 where id=1000000;
begin;
replace into t select * from s;
-ERROR HY000: Can't update table 's' in stored function/trigger because it is already used by statement which invoked this stored function/trigger.
+ERROR HY000: Can't update table 's' in stored function/trigger because it is already used by statement which invoked this stored function/trigger
rollback;
drop trigger t_trigger;
create trigger t_trigger after insert on t for each row update s set x=x+1 where id=1000000;
begin;
replace into t select * from s;
-ERROR HY000: Can't update table 's' in stored function/trigger because it is already used by statement which invoked this stored function/trigger.
+ERROR HY000: Can't update table 's' in stored function/trigger because it is already used by statement which invoked this stored function/trigger
rollback;
drop trigger t_trigger;
truncate table t;
@@ -47,37 +47,37 @@ insert into t values (1,1);
create trigger t_trigger before insert on t for each row replace into s values (1000000,0);
begin;
replace into t select * from s;
-ERROR HY000: Can't update table 's' in stored function/trigger because it is already used by statement which invoked this stored function/trigger.
+ERROR HY000: Can't update table 's' in stored function/trigger because it is already used by statement which invoked this stored function/trigger
rollback;
drop trigger t_trigger;
create trigger t_trigger after insert on t for each row replace into s values (1000000,0);
begin;
replace into t select * from s;
-ERROR HY000: Can't update table 's' in stored function/trigger because it is already used by statement which invoked this stored function/trigger.
+ERROR HY000: Can't update table 's' in stored function/trigger because it is already used by statement which invoked this stored function/trigger
rollback;
drop trigger t_trigger;
create trigger t_trigger before insert on t for each row delete from s where id=1000000;
begin;
replace into t select * from s;
-ERROR HY000: Can't update table 's' in stored function/trigger because it is already used by statement which invoked this stored function/trigger.
+ERROR HY000: Can't update table 's' in stored function/trigger because it is already used by statement which invoked this stored function/trigger
rollback;
drop trigger t_trigger;
create trigger t_trigger after insert on t for each row delete from s where id=1000000;
begin;
replace into t select * from s;
-ERROR HY000: Can't update table 's' in stored function/trigger because it is already used by statement which invoked this stored function/trigger.
+ERROR HY000: Can't update table 's' in stored function/trigger because it is already used by statement which invoked this stored function/trigger
rollback;
drop trigger t_trigger;
create trigger t_trigger before insert on t for each row update s set x=x+1 where id=1000000;
begin;
replace into t select * from s;
-ERROR HY000: Can't update table 's' in stored function/trigger because it is already used by statement which invoked this stored function/trigger.
+ERROR HY000: Can't update table 's' in stored function/trigger because it is already used by statement which invoked this stored function/trigger
rollback;
drop trigger t_trigger;
create trigger t_trigger after insert on t for each row update s set x=x+1 where id=1000000;
begin;
replace into t select * from s;
-ERROR HY000: Can't update table 's' in stored function/trigger because it is already used by statement which invoked this stored function/trigger.
+ERROR HY000: Can't update table 's' in stored function/trigger because it is already used by statement which invoked this stored function/trigger
rollback;
drop trigger t_trigger;
truncate table t;
@@ -85,37 +85,37 @@ insert into t values (1,1);
create trigger t_trigger before delete on t for each row replace into s values (1000000,0);
begin;
replace into t select * from s;
-ERROR HY000: Can't update table 's' in stored function/trigger because it is already used by statement which invoked this stored function/trigger.
+ERROR HY000: Can't update table 's' in stored function/trigger because it is already used by statement which invoked this stored function/trigger
rollback;
drop trigger t_trigger;
create trigger t_trigger after delete on t for each row replace into s values (1000000,0);
begin;
replace into t select * from s;
-ERROR HY000: Can't update table 's' in stored function/trigger because it is already used by statement which invoked this stored function/trigger.
+ERROR HY000: Can't update table 's' in stored function/trigger because it is already used by statement which invoked this stored function/trigger
rollback;
drop trigger t_trigger;
create trigger t_trigger before delete on t for each row delete from s where id=1000000;
begin;
replace into t select * from s;
-ERROR HY000: Can't update table 's' in stored function/trigger because it is already used by statement which invoked this stored function/trigger.
+ERROR HY000: Can't update table 's' in stored function/trigger because it is already used by statement which invoked this stored function/trigger
rollback;
drop trigger t_trigger;
create trigger t_trigger after delete on t for each row delete from s where id=1000000;
begin;
replace into t select * from s;
-ERROR HY000: Can't update table 's' in stored function/trigger because it is already used by statement which invoked this stored function/trigger.
+ERROR HY000: Can't update table 's' in stored function/trigger because it is already used by statement which invoked this stored function/trigger
rollback;
drop trigger t_trigger;
create trigger t_trigger before delete on t for each row update s set x=x+1 where id=1000000;
begin;
replace into t select * from s;
-ERROR HY000: Can't update table 's' in stored function/trigger because it is already used by statement which invoked this stored function/trigger.
+ERROR HY000: Can't update table 's' in stored function/trigger because it is already used by statement which invoked this stored function/trigger
rollback;
drop trigger t_trigger;
create trigger t_trigger after delete on t for each row update s set x=x+1 where id=1000000;
begin;
replace into t select * from s;
-ERROR HY000: Can't update table 's' in stored function/trigger because it is already used by statement which invoked this stored function/trigger.
+ERROR HY000: Can't update table 's' in stored function/trigger because it is already used by statement which invoked this stored function/trigger
rollback;
drop trigger t_trigger;
drop table s,t;
diff --git a/storage/tokudb/mysql-test/tokudb/r/change_column_int_default.result b/storage/tokudb/mysql-test/tokudb/r/change_column_int_default.result
index 558d153711c..b119f1a0b61 100644
--- a/storage/tokudb/mysql-test/tokudb/r/change_column_int_default.result
+++ b/storage/tokudb/mysql-test/tokudb/r/change_column_int_default.result
@@ -6,31 +6,31 @@ ALTER TABLE t CHANGE COLUMN a a TINYINT DEFAULT 100;
SHOW CREATE TABLE t;
Table Create Table
t CREATE TABLE `t` (
- `a` tinyint(4) DEFAULT '100'
+ `a` tinyint(4) DEFAULT 100
) ENGINE=TokuDB DEFAULT CHARSET=latin1
ALTER TABLE t CHANGE COLUMN a a SMALLINT DEFAULT 200;
SHOW CREATE TABLE t;
Table Create Table
t CREATE TABLE `t` (
- `a` smallint(6) DEFAULT '200'
+ `a` smallint(6) DEFAULT 200
) ENGINE=TokuDB DEFAULT CHARSET=latin1
ALTER TABLE t CHANGE COLUMN a a MEDIUMINT DEFAULT 300;
SHOW CREATE TABLE t;
Table Create Table
t CREATE TABLE `t` (
- `a` mediumint(9) DEFAULT '300'
+ `a` mediumint(9) DEFAULT 300
) ENGINE=TokuDB DEFAULT CHARSET=latin1
ALTER TABLE t CHANGE COLUMN a a INT DEFAULT 400;
SHOW CREATE TABLE t;
Table Create Table
t CREATE TABLE `t` (
- `a` int(11) DEFAULT '400'
+ `a` int(11) DEFAULT 400
) ENGINE=TokuDB DEFAULT CHARSET=latin1
ALTER TABLE t CHANGE COLUMN a a BIGINT DEFAULT 500;
SHOW CREATE TABLE t;
Table Create Table
t CREATE TABLE `t` (
- `a` bigint(20) DEFAULT '500'
+ `a` bigint(20) DEFAULT 500
) ENGINE=TokuDB DEFAULT CHARSET=latin1
DROP TABLE t;
CREATE TABLE t (a TINYINT NOT NULL DEFAULT 1);
@@ -38,31 +38,31 @@ ALTER TABLE t CHANGE COLUMN a a TINYINT NOT NULL DEFAULT 100;
SHOW CREATE TABLE t;
Table Create Table
t CREATE TABLE `t` (
- `a` tinyint(4) NOT NULL DEFAULT '100'
+ `a` tinyint(4) NOT NULL DEFAULT 100
) ENGINE=TokuDB DEFAULT CHARSET=latin1
ALTER TABLE t CHANGE COLUMN a a SMALLINT NOT NULL DEFAULT 200;
SHOW CREATE TABLE t;
Table Create Table
t CREATE TABLE `t` (
- `a` smallint(6) NOT NULL DEFAULT '200'
+ `a` smallint(6) NOT NULL DEFAULT 200
) ENGINE=TokuDB DEFAULT CHARSET=latin1
ALTER TABLE t CHANGE COLUMN a a MEDIUMINT NOT NULL DEFAULT 300;
SHOW CREATE TABLE t;
Table Create Table
t CREATE TABLE `t` (
- `a` mediumint(9) NOT NULL DEFAULT '300'
+ `a` mediumint(9) NOT NULL DEFAULT 300
) ENGINE=TokuDB DEFAULT CHARSET=latin1
ALTER TABLE t CHANGE COLUMN a a INT NOT NULL DEFAULT 400;
SHOW CREATE TABLE t;
Table Create Table
t CREATE TABLE `t` (
- `a` int(11) NOT NULL DEFAULT '400'
+ `a` int(11) NOT NULL DEFAULT 400
) ENGINE=TokuDB DEFAULT CHARSET=latin1
ALTER TABLE t CHANGE COLUMN a a BIGINT NOT NULL DEFAULT 500;
SHOW CREATE TABLE t;
Table Create Table
t CREATE TABLE `t` (
- `a` bigint(20) NOT NULL DEFAULT '500'
+ `a` bigint(20) NOT NULL DEFAULT 500
) ENGINE=TokuDB DEFAULT CHARSET=latin1
DROP TABLE t;
CREATE TABLE t (a TINYINT UNSIGNED DEFAULT 1);
@@ -70,31 +70,31 @@ ALTER TABLE t CHANGE COLUMN a a TINYINT UNSIGNED DEFAULT 100;
SHOW CREATE TABLE t;
Table Create Table
t CREATE TABLE `t` (
- `a` tinyint(3) unsigned DEFAULT '100'
+ `a` tinyint(3) unsigned DEFAULT 100
) ENGINE=TokuDB DEFAULT CHARSET=latin1
ALTER TABLE t CHANGE COLUMN a a SMALLINT UNSIGNED DEFAULT 200;
SHOW CREATE TABLE t;
Table Create Table
t CREATE TABLE `t` (
- `a` smallint(5) unsigned DEFAULT '200'
+ `a` smallint(5) unsigned DEFAULT 200
) ENGINE=TokuDB DEFAULT CHARSET=latin1
ALTER TABLE t CHANGE COLUMN a a MEDIUMINT UNSIGNED DEFAULT 300;
SHOW CREATE TABLE t;
Table Create Table
t CREATE TABLE `t` (
- `a` mediumint(8) unsigned DEFAULT '300'
+ `a` mediumint(8) unsigned DEFAULT 300
) ENGINE=TokuDB DEFAULT CHARSET=latin1
ALTER TABLE t CHANGE COLUMN a a INT UNSIGNED DEFAULT 400;
SHOW CREATE TABLE t;
Table Create Table
t CREATE TABLE `t` (
- `a` int(10) unsigned DEFAULT '400'
+ `a` int(10) unsigned DEFAULT 400
) ENGINE=TokuDB DEFAULT CHARSET=latin1
ALTER TABLE t CHANGE COLUMN a a BIGINT UNSIGNED DEFAULT 500;
SHOW CREATE TABLE t;
Table Create Table
t CREATE TABLE `t` (
- `a` bigint(20) unsigned DEFAULT '500'
+ `a` bigint(20) unsigned DEFAULT 500
) ENGINE=TokuDB DEFAULT CHARSET=latin1
DROP TABLE t;
CREATE TABLE t (a TINYINT UNSIGNED NOT NULL DEFAULT 1);
@@ -102,30 +102,30 @@ ALTER TABLE t CHANGE COLUMN a a TINYINT UNSIGNED NOT NULL DEFAULT 100;
SHOW CREATE TABLE t;
Table Create Table
t CREATE TABLE `t` (
- `a` tinyint(3) unsigned NOT NULL DEFAULT '100'
+ `a` tinyint(3) unsigned NOT NULL DEFAULT 100
) ENGINE=TokuDB DEFAULT CHARSET=latin1
ALTER TABLE t CHANGE COLUMN a a SMALLINT UNSIGNED NOT NULL DEFAULT 200;
SHOW CREATE TABLE t;
Table Create Table
t CREATE TABLE `t` (
- `a` smallint(5) unsigned NOT NULL DEFAULT '200'
+ `a` smallint(5) unsigned NOT NULL DEFAULT 200
) ENGINE=TokuDB DEFAULT CHARSET=latin1
ALTER TABLE t CHANGE COLUMN a a MEDIUMINT UNSIGNED NOT NULL DEFAULT 300;
SHOW CREATE TABLE t;
Table Create Table
t CREATE TABLE `t` (
- `a` mediumint(8) unsigned NOT NULL DEFAULT '300'
+ `a` mediumint(8) unsigned NOT NULL DEFAULT 300
) ENGINE=TokuDB DEFAULT CHARSET=latin1
ALTER TABLE t CHANGE COLUMN a a INT UNSIGNED NOT NULL DEFAULT 400;
SHOW CREATE TABLE t;
Table Create Table
t CREATE TABLE `t` (
- `a` int(10) unsigned NOT NULL DEFAULT '400'
+ `a` int(10) unsigned NOT NULL DEFAULT 400
) ENGINE=TokuDB DEFAULT CHARSET=latin1
ALTER TABLE t CHANGE COLUMN a a BIGINT UNSIGNED NOT NULL DEFAULT 500;
SHOW CREATE TABLE t;
Table Create Table
t CREATE TABLE `t` (
- `a` bigint(20) unsigned NOT NULL DEFAULT '500'
+ `a` bigint(20) unsigned NOT NULL DEFAULT 500
) ENGINE=TokuDB DEFAULT CHARSET=latin1
DROP TABLE t;
diff --git a/storage/tokudb/mysql-test/tokudb/r/cluster_2968-1.result b/storage/tokudb/mysql-test/tokudb/r/cluster_2968-1.result
index a8cb371bbd7..758d51be01c 100644
--- a/storage/tokudb/mysql-test/tokudb/r/cluster_2968-1.result
+++ b/storage/tokudb/mysql-test/tokudb/r/cluster_2968-1.result
@@ -1045,10 +1045,10 @@ id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t ref b b 5 test.s.b 1
alter table s add key(b) clustering=yes;
Warnings:
-Note 1831 Duplicate index `b_2`. This is deprecated and will be disallowed in a future release.
+Note 1831 Duplicate index `b_2`. This is deprecated and will be disallowed in a future release
alter table t add key(b) clustering=yes;
Warnings:
-Note 1831 Duplicate index `b_2`. This is deprecated and will be disallowed in a future release.
+Note 1831 Duplicate index `b_2`. This is deprecated and will be disallowed in a future release
show create table s;
Table Create Table
s CREATE TABLE `s` (
@@ -1095,10 +1095,10 @@ id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t ref b_2 b_2 5 test.s.b 1 Using index
alter table s add key(b);
Warnings:
-Note 1831 Duplicate index `b`. This is deprecated and will be disallowed in a future release.
+Note 1831 Duplicate index `b`. This is deprecated and will be disallowed in a future release
alter table t add key(b);
Warnings:
-Note 1831 Duplicate index `b`. This is deprecated and will be disallowed in a future release.
+Note 1831 Duplicate index `b`. This is deprecated and will be disallowed in a future release
show create table s;
Table Create Table
s CREATE TABLE `s` (
diff --git a/storage/tokudb/mysql-test/tokudb/r/cluster_2968-2.result b/storage/tokudb/mysql-test/tokudb/r/cluster_2968-2.result
index de74a25921a..4c3f971770e 100644
--- a/storage/tokudb/mysql-test/tokudb/r/cluster_2968-2.result
+++ b/storage/tokudb/mysql-test/tokudb/r/cluster_2968-2.result
@@ -1069,10 +1069,10 @@ id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t ref b,b_2 b_2 5 test.s.b 1 Using index
alter table s add key(b) clustering=yes;
Warnings:
-Note 1831 Duplicate index `b_3`. This is deprecated and will be disallowed in a future release.
+Note 1831 Duplicate index `b_3`. This is deprecated and will be disallowed in a future release
alter table t add key(b) clustering=yes;
Warnings:
-Note 1831 Duplicate index `b_3`. This is deprecated and will be disallowed in a future release.
+Note 1831 Duplicate index `b_3`. This is deprecated and will be disallowed in a future release
show create table s;
Table Create Table
s CREATE TABLE `s` (
diff --git a/storage/tokudb/mysql-test/tokudb/r/cluster_2968-3.result b/storage/tokudb/mysql-test/tokudb/r/cluster_2968-3.result
index da68e0b1733..407ef9cc316 100644
--- a/storage/tokudb/mysql-test/tokudb/r/cluster_2968-3.result
+++ b/storage/tokudb/mysql-test/tokudb/r/cluster_2968-3.result
@@ -1066,13 +1066,13 @@ id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE u ref c c 5 test.s.c 1
alter table s add key (b) clustering=yes;
Warnings:
-Note 1831 Duplicate index `b_2`. This is deprecated and will be disallowed in a future release.
+Note 1831 Duplicate index `b_2`. This is deprecated and will be disallowed in a future release
alter table t add key (b) clustering=yes;
Warnings:
-Note 1831 Duplicate index `b_2`. This is deprecated and will be disallowed in a future release.
+Note 1831 Duplicate index `b_2`. This is deprecated and will be disallowed in a future release
alter table u add key (c) clustering=yes;
Warnings:
-Note 1831 Duplicate index `c_2`. This is deprecated and will be disallowed in a future release.
+Note 1831 Duplicate index `c_2`. This is deprecated and will be disallowed in a future release
show create table s;
Table Create Table
s CREATE TABLE `s` (
diff --git a/storage/tokudb/mysql-test/tokudb/r/cluster_key_part.result b/storage/tokudb/mysql-test/tokudb/r/cluster_key_part.result
index 6df54cac05a..61dba48febc 100644
--- a/storage/tokudb/mysql-test/tokudb/r/cluster_key_part.result
+++ b/storage/tokudb/mysql-test/tokudb/r/cluster_key_part.result
@@ -23,8 +23,8 @@ t CREATE TABLE `t` (
`y` int(11) NOT NULL,
PRIMARY KEY (`x`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY HASH (x)
-PARTITIONS 2 */
+ PARTITION BY HASH (x)
+PARTITIONS 2
ALTER TABLE t ADD CLUSTERING KEY(y);
SHOW CREATE TABLE t;
Table Create Table
@@ -34,8 +34,8 @@ t CREATE TABLE `t` (
PRIMARY KEY (`x`),
CLUSTERING KEY `y` (`y`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY HASH (x)
-PARTITIONS 2 */
+ PARTITION BY HASH (x)
+PARTITIONS 2
DROP TABLE t;
CREATE TABLE t1(a INT, b INT, c INT, d INT, PRIMARY KEY(a,b,c), CLUSTERING KEY(b), KEY (c)) ENGINE=TOKUDB
PARTITION BY RANGE(a) (PARTITION p0 VALUES LESS THAN (5) ENGINE = TOKUDB, PARTITION p2 VALUES LESS THAN MAXVALUE ENGINE = TOKUDB);
diff --git a/storage/tokudb/mysql-test/tokudb/r/compressions.result b/storage/tokudb/mysql-test/tokudb/r/compressions.result
index 03e0d18e9eb..435b34b6af3 100644
--- a/storage/tokudb/mysql-test/tokudb/r/compressions.result
+++ b/storage/tokudb/mysql-test/tokudb/r/compressions.result
@@ -3,9 +3,9 @@ CREATE TABLE t2 (a INT) ENGINE=TokuDB COMPRESSION=TOKUDB_SNAPPY;
CREATE TABLE t3 (a INT) ENGINE=TokuDB COMPRESSION=TOKUDB_QUICKLZ;
CREATE TABLE t4 (a INT) ENGINE=TokuDB COMPRESSION=TOKUDB_LZMA;
CREATE TABLE t5 (a INT) ENGINE=TokuDB COMPRESSION=TOKUDB_ZLIB;
-FOUND /compression_method=0/ in dump
-FOUND /compression_method=7/ in dump
-FOUND /compression_method=9/ in dump
-FOUND /compression_method=10/ in dump
-FOUND /compression_method=11/ in dump
+FOUND 1 /compression_method=0/ in dump
+FOUND 1 /compression_method=7/ in dump
+FOUND 1 /compression_method=9/ in dump
+FOUND 1 /compression_method=10/ in dump
+FOUND 1 /compression_method=11/ in dump
DROP TABLE t1, t2, t3, t4, t5;
diff --git a/storage/tokudb/mysql-test/tokudb/r/ctype_collate.result b/storage/tokudb/mysql-test/tokudb/r/ctype_collate.result
index 6254005d2bb..37d0d54f979 100644
--- a/storage/tokudb/mysql-test/tokudb/r/ctype_collate.result
+++ b/storage/tokudb/mysql-test/tokudb/r/ctype_collate.result
@@ -520,7 +520,7 @@ explain extended SELECT charset('a'),collation('a'),coercibility('a'),'a'='A';
id select_type table type possible_keys key key_len ref rows filtered Extra
1 SIMPLE NULL NULL NULL NULL NULL NULL NULL NULL No tables used
Warnings:
-Note 1003 select charset('a') AS `charset('a')`,collation('a') AS `collation('a')`,coercibility('a') AS `coercibility('a')`,('a' = 'A') AS `'a'='A'`
+Note 1003 select charset('a') AS `charset('a')`,collation('a') AS `collation('a')`,coercibility('a') AS `coercibility('a')`,'a' = 'A' AS `'a'='A'`
SET CHARACTER SET koi8r;
SHOW VARIABLES LIKE 'collation_client';
Variable_name Value
diff --git a/storage/tokudb/mysql-test/tokudb/r/ctype_cp1250_ch.result b/storage/tokudb/mysql-test/tokudb/r/ctype_cp1250_ch.result
index 3efeb757b61..2dae25d4aa4 100644
--- a/storage/tokudb/mysql-test/tokudb/r/ctype_cp1250_ch.result
+++ b/storage/tokudb/mysql-test/tokudb/r/ctype_cp1250_ch.result
@@ -60,7 +60,7 @@ want1result
location
DROP TABLE t1;
create table t1 (a set('a') not null);
-insert into t1 values (),();
+insert ignore into t1 values (),();
Warnings:
Warning 1364 Field 'a' doesn't have a default value
select cast(a as char(1)) from t1;
@@ -162,7 +162,7 @@ want1result
location
DROP TABLE t1;
create table t1 (a set('a') not null);
-insert into t1 values (),();
+insert ignore into t1 values (),();
Warnings:
Warning 1364 Field 'a' doesn't have a default value
select cast(a as char(1)) from t1;
diff --git a/storage/tokudb/mysql-test/tokudb/r/dir_per_db_rename_to_nonexisting_schema.result b/storage/tokudb/mysql-test/tokudb/r/dir_per_db_rename_to_nonexisting_schema.result
index 74148bd4e74..992f380591f 100644
--- a/storage/tokudb/mysql-test/tokudb/r/dir_per_db_rename_to_nonexisting_schema.result
+++ b/storage/tokudb/mysql-test/tokudb/r/dir_per_db_rename_to_nonexisting_schema.result
@@ -6,6 +6,7 @@ CREATE DATABASE new_db;
CREATE TABLE t1 (id INT AUTO_INCREMENT PRIMARY KEY NOT NULL) ENGINE=tokudb;
ALTER TABLE test.t1 RENAME new_db.t1;
The content of "test" directory:
+db.opt
The content of "new_db" directory:
db.opt
t1.frm
diff --git a/storage/tokudb/mysql-test/tokudb/r/ext_key_1_innodb.result b/storage/tokudb/mysql-test/tokudb/r/ext_key_1_innodb.result
index 43d8a526665..91913f33b03 100644
--- a/storage/tokudb/mysql-test/tokudb/r/ext_key_1_innodb.result
+++ b/storage/tokudb/mysql-test/tokudb/r/ext_key_1_innodb.result
@@ -1,7 +1,4 @@
drop table if exists t;
-select @@optimizer_switch;
-@@optimizer_switch
-index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_merge_sort_intersection=off,engine_condition_pushdown=off,index_condition_pushdown=on,derived_merge=on,derived_with_keys=on,firstmatch=on,loosescan=on,materialization=on,in_to_exists=on,semijoin=on,partial_match_rowid_merge=on,partial_match_table_scan=on,subquery_cache=on,mrr=off,mrr_cost_based=off,mrr_sort_keys=off,outer_join_with_cache=on,semijoin_with_cache=on,join_cache_incremental=on,join_cache_hashed=on,join_cache_bka=on,optimize_join_buffer_size=off,table_elimination=on,extended_keys=on,exists_to_in=on,orderby_uses_equalities=off
create table t (id int not null, x int not null, y int not null, primary key(id), key(x)) engine=innodb;
insert into t values (0,0,0),(1,1,1),(2,2,2),(3,2,3),(4,2,4);
explain select x,id from t force index (x) where x=0 and id=0;
diff --git a/storage/tokudb/mysql-test/tokudb/r/ext_key_1_tokudb.result b/storage/tokudb/mysql-test/tokudb/r/ext_key_1_tokudb.result
index f539746a1b1..8c8afb63bea 100644
--- a/storage/tokudb/mysql-test/tokudb/r/ext_key_1_tokudb.result
+++ b/storage/tokudb/mysql-test/tokudb/r/ext_key_1_tokudb.result
@@ -1,7 +1,4 @@
drop table if exists t;
-select @@optimizer_switch;
-@@optimizer_switch
-index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_merge_sort_intersection=off,engine_condition_pushdown=off,index_condition_pushdown=on,derived_merge=on,derived_with_keys=on,firstmatch=on,loosescan=on,materialization=on,in_to_exists=on,semijoin=on,partial_match_rowid_merge=on,partial_match_table_scan=on,subquery_cache=on,mrr=off,mrr_cost_based=off,mrr_sort_keys=off,outer_join_with_cache=on,semijoin_with_cache=on,join_cache_incremental=on,join_cache_hashed=on,join_cache_bka=on,optimize_join_buffer_size=off,table_elimination=on,extended_keys=on,exists_to_in=on,orderby_uses_equalities=off
create table t (id int not null, x int not null, y int not null, primary key(id), key(x)) engine=tokudb;
insert into t values (0,0,0),(1,1,1),(2,2,2),(3,2,3),(4,2,4);
explain select x,id from t force index (x) where x=0 and id=0;
diff --git a/storage/tokudb/mysql-test/tokudb/r/ext_key_2_innodb.result b/storage/tokudb/mysql-test/tokudb/r/ext_key_2_innodb.result
index 191f509c915..9fb1a7f0880 100644
--- a/storage/tokudb/mysql-test/tokudb/r/ext_key_2_innodb.result
+++ b/storage/tokudb/mysql-test/tokudb/r/ext_key_2_innodb.result
@@ -1,7 +1,4 @@
drop table if exists t;
-select @@optimizer_switch;
-@@optimizer_switch
-index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_merge_sort_intersection=off,engine_condition_pushdown=off,index_condition_pushdown=on,derived_merge=on,derived_with_keys=on,firstmatch=on,loosescan=on,materialization=on,in_to_exists=on,semijoin=on,partial_match_rowid_merge=on,partial_match_table_scan=on,subquery_cache=on,mrr=off,mrr_cost_based=off,mrr_sort_keys=off,outer_join_with_cache=on,semijoin_with_cache=on,join_cache_incremental=on,join_cache_hashed=on,join_cache_bka=on,optimize_join_buffer_size=off,table_elimination=on,extended_keys=on,exists_to_in=on,orderby_uses_equalities=off
create table t (a int not null, b int not null, c int not null, d int not null, primary key(a,b), key(c,a)) engine=innodb;
insert into t values (0,0,0,0),(0,1,0,1);
explain select c,a,b from t where c=0 and a=0 and b=1;
diff --git a/storage/tokudb/mysql-test/tokudb/r/ext_key_2_tokudb.result b/storage/tokudb/mysql-test/tokudb/r/ext_key_2_tokudb.result
index 14756b1e66b..3efbfb7f643 100644
--- a/storage/tokudb/mysql-test/tokudb/r/ext_key_2_tokudb.result
+++ b/storage/tokudb/mysql-test/tokudb/r/ext_key_2_tokudb.result
@@ -1,7 +1,4 @@
drop table if exists t;
-select @@optimizer_switch;
-@@optimizer_switch
-index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_merge_sort_intersection=off,engine_condition_pushdown=off,index_condition_pushdown=on,derived_merge=on,derived_with_keys=on,firstmatch=on,loosescan=on,materialization=on,in_to_exists=on,semijoin=on,partial_match_rowid_merge=on,partial_match_table_scan=on,subquery_cache=on,mrr=off,mrr_cost_based=off,mrr_sort_keys=off,outer_join_with_cache=on,semijoin_with_cache=on,join_cache_incremental=on,join_cache_hashed=on,join_cache_bka=on,optimize_join_buffer_size=off,table_elimination=on,extended_keys=on,exists_to_in=on,orderby_uses_equalities=off
create table t (a int not null, b int not null, c int not null, d int not null, primary key(a,b), key(c,a)) engine=tokudb;
insert into t values (0,0,0,0),(0,1,0,1);
explain select c,a,b from t where c=0 and a=0 and b=1;
diff --git a/storage/tokudb/mysql-test/tokudb/r/hotindex-del-0.result b/storage/tokudb/mysql-test/tokudb/r/hotindex-del-0.result
index 6e6874f6d8b..9cec96096e0 100644
--- a/storage/tokudb/mysql-test/tokudb/r/hotindex-del-0.result
+++ b/storage/tokudb/mysql-test/tokudb/r/hotindex-del-0.result
@@ -1,5 +1,7 @@
SET DEFAULT_STORAGE_ENGINE='tokudb';
# Establish connection conn1 (user = root)
+connect conn1,localhost,root,,;
+connection default;
drop table if exists s;
create table s (a int, b int);
insert into s values (10000,0),(10000,1),(10000,2),(10000,3),(10000,4),(10000,5),(10000,6),(10000,7),(10000,8),(10000,9);
@@ -10004,6 +10006,7 @@ insert into s values (2,0),(2,1),(2,2),(2,3),(2,4),(2,5),(2,6),(2,7),(2,8),(2,9)
insert into s values (1,0),(1,1),(1,2),(1,3),(1,4),(1,5),(1,6),(1,7),(1,8),(1,9);
set tokudb_create_index_online=1;
create index i_a on s(a);
+connection conn1;
delete from s where a=10000;
delete from s where a=9999;
delete from s where a=9998;
@@ -20004,10 +20007,12 @@ delete from s where a=4;
delete from s where a=3;
delete from s where a=2;
delete from s where a=1;
+connection default;
select count(*) from s use index();
count(*)
0
select count(*) from s use index(i_a);
count(*)
0
+disconnect conn1;
drop table s;
diff --git a/storage/tokudb/mysql-test/tokudb/r/hotindex-del-1.result b/storage/tokudb/mysql-test/tokudb/r/hotindex-del-1.result
index a45809b719a..112a09c9a69 100644
--- a/storage/tokudb/mysql-test/tokudb/r/hotindex-del-1.result
+++ b/storage/tokudb/mysql-test/tokudb/r/hotindex-del-1.result
@@ -1,5 +1,7 @@
SET DEFAULT_STORAGE_ENGINE='tokudb';
# Establish connection conn1 (user = root)
+connect conn1,localhost,root,,;
+connection default;
drop table if exists s;
create table s (a int, b int);
begin;
@@ -10006,6 +10008,7 @@ insert into s values (1,0),(1,1),(1,2),(1,3),(1,4),(1,5),(1,6),(1,7),(1,8),(1,9)
commit;
set tokudb_create_index_online=1;
create index i_a on s(a) clustering=yes;
+connection conn1;
delete from s where a=10000;
delete from s where a=9999;
delete from s where a=9998;
@@ -20006,10 +20009,12 @@ delete from s where a=4;
delete from s where a=3;
delete from s where a=2;
delete from s where a=1;
+connection default;
select count(*) from s use index();
count(*)
0
select count(*) from s use index(i_a);
count(*)
0
+disconnect conn1;
drop table s;
diff --git a/storage/tokudb/mysql-test/tokudb/r/hotindex-del-fast.result b/storage/tokudb/mysql-test/tokudb/r/hotindex-del-fast.result
index ecb2d225181..b6ca720e135 100644
--- a/storage/tokudb/mysql-test/tokudb/r/hotindex-del-fast.result
+++ b/storage/tokudb/mysql-test/tokudb/r/hotindex-del-fast.result
@@ -1,5 +1,7 @@
SET DEFAULT_STORAGE_ENGINE='tokudb';
# Establish connection conn1 (user = root)
+connect conn1,localhost,root,,;
+connection default;
drop table if exists s;
create table s (a int, b int);
insert into s values (10000,0),(10000,1),(10000,2),(10000,3),(10000,4),(10000,5),(10000,6),(10000,7),(10000,8),(10000,9);
@@ -10003,6 +10005,7 @@ insert into s values (3,0),(3,1),(3,2),(3,3),(3,4),(3,5),(3,6),(3,7),(3,8),(3,9)
insert into s values (2,0),(2,1),(2,2),(2,3),(2,4),(2,5),(2,6),(2,7),(2,8),(2,9);
insert into s values (1,0),(1,1),(1,2),(1,3),(1,4),(1,5),(1,6),(1,7),(1,8),(1,9);
create index i_a on s(a);
+connection conn1;
delete from s where a=10000;
show create table s;
Table Create Table
@@ -90003,10 +90006,12 @@ s CREATE TABLE `s` (
`b` int(11) DEFAULT NULL,
KEY `i_a` (`a`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
+connection default;
select count(*) from s use index();
count(*)
0
select count(*) from s use index(i_a);
count(*)
0
+disconnect conn1;
drop table s;
diff --git a/storage/tokudb/mysql-test/tokudb/r/hotindex-del-slow.result b/storage/tokudb/mysql-test/tokudb/r/hotindex-del-slow.result
index e1e298eb29f..0ef9595d546 100644
--- a/storage/tokudb/mysql-test/tokudb/r/hotindex-del-slow.result
+++ b/storage/tokudb/mysql-test/tokudb/r/hotindex-del-slow.result
@@ -1,5 +1,7 @@
SET DEFAULT_STORAGE_ENGINE='tokudb';
# Establish connection conn1 (user = root)
+connect conn1,localhost,root,,;
+connection default;
drop table if exists s;
create table s (a int, b int);
insert into s values (10000,0),(10000,1),(10000,2),(10000,3),(10000,4),(10000,5),(10000,6),(10000,7),(10000,8),(10000,9);
@@ -10004,6 +10006,7 @@ insert into s values (2,0),(2,1),(2,2),(2,3),(2,4),(2,5),(2,6),(2,7),(2,8),(2,9)
insert into s values (1,0),(1,1),(1,2),(1,3),(1,4),(1,5),(1,6),(1,7),(1,8),(1,9);
set tokudb_create_index_online=1;
create index i_a on s(a);
+connection conn1;
delete from s where a=10000;
show create table s;
Table Create Table
@@ -90004,10 +90007,12 @@ s CREATE TABLE `s` (
`b` int(11) DEFAULT NULL,
KEY `i_a` (`a`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
+connection default;
select count(*) from s use index();
count(*)
0
select count(*) from s use index(i_a);
count(*)
0
+disconnect conn1;
drop table s;
diff --git a/storage/tokudb/mysql-test/tokudb/r/hotindex-insert-0.result b/storage/tokudb/mysql-test/tokudb/r/hotindex-insert-0.result
index 78515053ff6..cff2c3b705a 100644
--- a/storage/tokudb/mysql-test/tokudb/r/hotindex-insert-0.result
+++ b/storage/tokudb/mysql-test/tokudb/r/hotindex-insert-0.result
@@ -1,5 +1,7 @@
SET DEFAULT_STORAGE_ENGINE='tokudb';
# Establish connection conn1 (user = root)
+connect conn1,localhost,root,,;
+connection default;
drop table if exists s;
create table s (a int, b int);
insert into s values (10000,0),(10000,1),(10000,2),(10000,3),(10000,4),(10000,5),(10000,6),(10000,7),(10000,8),(10000,9);
@@ -10004,6 +10006,7 @@ insert into s values (2,0),(2,1),(2,2),(2,3),(2,4),(2,5),(2,6),(2,7),(2,8),(2,9)
insert into s values (1,0),(1,1),(1,2),(1,3),(1,4),(1,5),(1,6),(1,7),(1,8),(1,9);
set tokudb_create_index_online=1;
create index i_a on s(a);
+connection conn1;
insert into s values (1000000000,10000);
insert into s values (1000000000,9999);
insert into s values (1000000000,9998);
@@ -20004,10 +20007,12 @@ insert into s values (1000000000,4);
insert into s values (1000000000,3);
insert into s values (1000000000,2);
insert into s values (1000000000,1);
+connection default;
select count(*) from s use index();
count(*)
110000
select count(*) from s use index(i_a);
count(*)
110000
+disconnect conn1;
drop table s;
diff --git a/storage/tokudb/mysql-test/tokudb/r/hotindex-insert-1.result b/storage/tokudb/mysql-test/tokudb/r/hotindex-insert-1.result
index 4d2e5ed6b76..51803eb956b 100644
--- a/storage/tokudb/mysql-test/tokudb/r/hotindex-insert-1.result
+++ b/storage/tokudb/mysql-test/tokudb/r/hotindex-insert-1.result
@@ -1,5 +1,7 @@
SET DEFAULT_STORAGE_ENGINE='tokudb';
# Establish connection conn1 (user = root)
+connect conn1,localhost,root,,;
+connection default;
drop table if exists s;
create table s (a int, b int);
begin;
@@ -10006,6 +10008,7 @@ insert into s values (1,0),(1,1),(1,2),(1,3),(1,4),(1,5),(1,6),(1,7),(1,8),(1,9)
commit;
set tokudb_create_index_online=1;
create index i_a on s(a) clustering=yes;
+connection conn1;
insert into s values (1000000000,10000);
insert into s values (1000000000,9999);
insert into s values (1000000000,9998);
@@ -20006,10 +20009,12 @@ insert into s values (1000000000,4);
insert into s values (1000000000,3);
insert into s values (1000000000,2);
insert into s values (1000000000,1);
+connection default;
select count(*) from s use index();
count(*)
110000
select count(*) from s use index(i_a);
count(*)
110000
+disconnect conn1;
drop table s;
diff --git a/storage/tokudb/mysql-test/tokudb/r/hotindex-insert-2.result b/storage/tokudb/mysql-test/tokudb/r/hotindex-insert-2.result
index 1c8b7df1ebd..927a321972f 100644
--- a/storage/tokudb/mysql-test/tokudb/r/hotindex-insert-2.result
+++ b/storage/tokudb/mysql-test/tokudb/r/hotindex-insert-2.result
@@ -1,17 +1,22 @@
SET DEFAULT_STORAGE_ENGINE='tokudb';
# Establish connection conn1 (user = root)
+connect conn1,localhost,root,,;
+connection default;
drop table if exists s;
create table s (a int auto_increment, b int, c int, primary key (a));
# populate table s
# done inserting elements
set tokudb_create_index_online=1;
create index i_a on s(c);
+connection conn1;
# starting insert while create index is happening
# done with insert
+connection default;
select count(*) from s use index(primary);
count(*)
100000
select count(*) from s use index(i_a);
count(*)
100000
+disconnect conn1;
drop table s;
diff --git a/storage/tokudb/mysql-test/tokudb/r/hotindex-insert-bigchar.result b/storage/tokudb/mysql-test/tokudb/r/hotindex-insert-bigchar.result
index e250e9a3c7a..c0b293edb22 100644
--- a/storage/tokudb/mysql-test/tokudb/r/hotindex-insert-bigchar.result
+++ b/storage/tokudb/mysql-test/tokudb/r/hotindex-insert-bigchar.result
@@ -1,9 +1,12 @@
SET DEFAULT_STORAGE_ENGINE='tokudb';
# Establish connection conn1 (user = root)
+connect conn1,localhost,root,,;
+connection default;
drop table if exists s;
create table s (a int, b varchar(2000));
set tokudb_create_index_online=1;
create index i_a on s(b) clustering=yes;
+connection conn1;
insert into s values (1000000000,repeat('a', 2000));
insert into s values (1000000000,repeat('a', 2000));
insert into s values (1000000000,repeat('a', 2000));
@@ -10004,10 +10007,12 @@ insert into s values (1000000000,repeat('a', 2000));
insert into s values (1000000000,repeat('a', 2000));
insert into s values (1000000000,repeat('a', 2000));
insert into s values (1000000000,repeat('a', 2000));
+connection default;
select count(*) from s use index();
count(*)
110000
select count(*) from s use index(i_a);
count(*)
110000
+disconnect conn1;
drop table s;
diff --git a/storage/tokudb/mysql-test/tokudb/r/hotindex-update-0.result b/storage/tokudb/mysql-test/tokudb/r/hotindex-update-0.result
index 8ec9af009bb..5a82eb269f0 100644
--- a/storage/tokudb/mysql-test/tokudb/r/hotindex-update-0.result
+++ b/storage/tokudb/mysql-test/tokudb/r/hotindex-update-0.result
@@ -1,5 +1,7 @@
SET DEFAULT_STORAGE_ENGINE='tokudb';
# Establish connection conn1 (user = root)
+connect conn1,localhost,root,,;
+connection default;
drop table if exists s;
create table s (a int, b int);
insert into s values (10000,0),(10000,1),(10000,2),(10000,3),(10000,4),(10000,5),(10000,6),(10000,7),(10000,8),(10000,9);
@@ -10004,6 +10006,7 @@ insert into s values (2,0),(2,1),(2,2),(2,3),(2,4),(2,5),(2,6),(2,7),(2,8),(2,9)
insert into s values (1,0),(1,1),(1,2),(1,3),(1,4),(1,5),(1,6),(1,7),(1,8),(1,9);
set tokudb_create_index_online=1;
create index i_a on s(a);
+connection conn1;
update s set a=20000+10000 where a=10000;
update s set a=20000+9999 where a=9999;
update s set a=20000+9998 where a=9998;
@@ -20004,4 +20007,6 @@ update s set a=20000+4 where a=4;
update s set a=20000+3 where a=3;
update s set a=20000+2 where a=2;
update s set a=20000+1 where a=1;
+connection default;
+disconnect conn1;
drop table s;
diff --git a/storage/tokudb/mysql-test/tokudb/r/hotindex-update-1.result b/storage/tokudb/mysql-test/tokudb/r/hotindex-update-1.result
index 7561c61f10b..d27ce0d2c9a 100644
--- a/storage/tokudb/mysql-test/tokudb/r/hotindex-update-1.result
+++ b/storage/tokudb/mysql-test/tokudb/r/hotindex-update-1.result
@@ -1,5 +1,7 @@
SET DEFAULT_STORAGE_ENGINE='tokudb';
# Establish connection conn1 (user = root)
+connect conn1,localhost,root,,;
+connection default;
drop table if exists s;
create table s (a int, b int);
begin;
@@ -10006,6 +10008,7 @@ insert into s values (1,0),(1,1),(1,2),(1,3),(1,4),(1,5),(1,6),(1,7),(1,8),(1,9)
commit;
set tokudb_create_index_online=1;
create index i_a on s(a) clustering=yes;
+connection conn1;
update s set a=20000+10000 where a=10000;
update s set a=20000+9999 where a=9999;
update s set a=20000+9998 where a=9998;
@@ -20006,4 +20009,6 @@ update s set a=20000+4 where a=4;
update s set a=20000+3 where a=3;
update s set a=20000+2 where a=2;
update s set a=20000+1 where a=1;
+connection default;
+disconnect conn1;
drop table s;
diff --git a/storage/tokudb/mysql-test/tokudb/r/i_s_tokudb_lock_waits_timeout.result b/storage/tokudb/mysql-test/tokudb/r/i_s_tokudb_lock_waits_timeout.result
index ce8f7d2d7ec..cc8b5f2f43f 100644
--- a/storage/tokudb/mysql-test/tokudb/r/i_s_tokudb_lock_waits_timeout.result
+++ b/storage/tokudb/mysql-test/tokudb/r/i_s_tokudb_lock_waits_timeout.result
@@ -8,13 +8,16 @@ select * from information_schema.tokudb_locks;
locks_trx_id locks_mysql_thread_id locks_dname locks_key_left locks_key_right locks_table_schema locks_table_name locks_table_dictionary_name
select * from information_schema.tokudb_lock_waits;
requesting_trx_id blocking_trx_id lock_waits_dname lock_waits_key_left lock_waits_key_right lock_waits_start_time lock_waits_table_schema lock_waits_table_name lock_waits_table_dictionary_name
+connect conn_a,localhost,root,,;
set autocommit=0;
set tokudb_prelock_empty=OFF;
insert into t values (1);
+connect conn_b,localhost,root,,;
set autocommit=0;
set tokudb_prelock_empty=OFF;
set tokudb_lock_timeout=60000;
replace into t values (1);
+connection default;
select * from information_schema.tokudb_locks;
locks_trx_id locks_mysql_thread_id locks_dname locks_key_left locks_key_right locks_table_schema locks_table_name locks_table_dictionary_name
TRX_ID MYSQL_ID ./test/t-main 0001000000 0001000000 test t main
@@ -25,13 +28,18 @@ select trx_id,trx_mysql_thread_id from information_schema.tokudb_trx;
trx_id trx_mysql_thread_id
TRX_ID MYSQL_ID
TRX_ID MYSQL_ID
+connection conn_a;
commit;
select * from information_schema.tokudb_locks;
locks_trx_id locks_mysql_thread_id locks_dname locks_key_left locks_key_right locks_table_schema locks_table_name locks_table_dictionary_name
select * from information_schema.tokudb_lock_waits;
requesting_trx_id blocking_trx_id lock_waits_dname lock_waits_key_left lock_waits_key_right lock_waits_start_time lock_waits_table_schema lock_waits_table_name lock_waits_table_dictionary_name
+connection conn_b;
ERROR HY000: Lock wait timeout exceeded; try restarting transaction
commit;
+connection default;
+disconnect conn_a;
+disconnect conn_b;
select trx_id,trx_mysql_thread_id from information_schema.tokudb_trx;
trx_id trx_mysql_thread_id
select * from information_schema.tokudb_locks;
diff --git a/storage/tokudb/mysql-test/tokudb/r/i_s_tokudb_locks.result b/storage/tokudb/mysql-test/tokudb/r/i_s_tokudb_locks.result
index 070f42b30de..edb488c69b3 100644
--- a/storage/tokudb/mysql-test/tokudb/r/i_s_tokudb_locks.result
+++ b/storage/tokudb/mysql-test/tokudb/r/i_s_tokudb_locks.result
@@ -8,10 +8,12 @@ locks_dname locks_key_left locks_key_right locks_table_schema locks_table_name l
insert into t values (1);
insert into t values (3);
insert into t values (5);
+connect conn_a,localhost,root,,;
set autocommit=0;
insert into t values (2);
insert into t values (4);
insert into t values (6);
+connection default;
select locks_dname,locks_key_left,locks_key_right,locks_table_schema,locks_table_name,locks_table_dictionary_name from information_schema.tokudb_locks where locks_table_schema='test' and locks_table_name='t' and locks_table_dictionary_name='main' order by locks_key_left, locks_key_right;
locks_dname locks_key_left locks_key_right locks_table_schema locks_table_name locks_table_dictionary_name
./test/t-main 0001000000 0001000000 test t main
@@ -20,9 +22,12 @@ locks_dname locks_key_left locks_key_right locks_table_schema locks_table_name l
./test/t-main 0004000000 0004000000 test t main
./test/t-main 0005000000 0005000000 test t main
./test/t-main 0006000000 0006000000 test t main
+connection conn_a;
commit;
+connection default;
commit;
select locks_dname,locks_key_left,locks_key_right,locks_table_schema,locks_table_name,locks_table_dictionary_name from information_schema.tokudb_locks where locks_table_schema='test' and locks_table_name='t' and locks_table_dictionary_name='main' order by locks_key_left, locks_key_right;
locks_dname locks_key_left locks_key_right locks_table_schema locks_table_name locks_table_dictionary_name
commit;
+disconnect conn_a;
drop table t;
diff --git a/storage/tokudb/mysql-test/tokudb/r/i_s_tokudb_trx.result b/storage/tokudb/mysql-test/tokudb/r/i_s_tokudb_trx.result
index 3a9a936a7a6..21baa9510c8 100644
--- a/storage/tokudb/mysql-test/tokudb/r/i_s_tokudb_trx.result
+++ b/storage/tokudb/mysql-test/tokudb/r/i_s_tokudb_trx.result
@@ -12,15 +12,20 @@ count(trx_mysql_thread_id)
commit;
select trx_id,trx_mysql_thread_id from information_schema.tokudb_trx where trx_mysql_thread_id in(connection_id());
trx_id trx_mysql_thread_id
+connect conn_a,localhost,root,,;
set autocommit=0;
insert into t values (2);
select count(trx_mysql_thread_id) from information_schema.tokudb_trx where trx_mysql_thread_id in(connection_id());
count(trx_mysql_thread_id)
1
+connection default;
select count(trx_mysql_thread_id) from information_schema.tokudb_trx where trx_mysql_thread_id in(connection_id());
count(trx_mysql_thread_id)
0
+connection conn_a;
commit;
+connection default;
select trx_id,trx_mysql_thread_id from information_schema.tokudb_trx where trx_mysql_thread_id in(connection_id());
trx_id trx_mysql_thread_id
+disconnect conn_a;
drop table t;
diff --git a/storage/tokudb/mysql-test/tokudb/r/locking-read-repeatable-read-1.result b/storage/tokudb/mysql-test/tokudb/r/locking-read-repeatable-read-1.result
index c9fcb5e2273..433b5baf18b 100644
--- a/storage/tokudb/mysql-test/tokudb/r/locking-read-repeatable-read-1.result
+++ b/storage/tokudb/mysql-test/tokudb/r/locking-read-repeatable-read-1.result
@@ -5,9 +5,12 @@ begin;
select * from t where a=1 lock in share mode;
a b
1 0
+connect conn1,localhost,root;
set session transaction isolation level repeatable read;
begin;
update t set b=b+1 where a=1;
ERROR HY000: Lock wait timeout exceeded; try restarting transaction
+connection default;
commit;
+disconnect conn1;
drop table t;
diff --git a/storage/tokudb/mysql-test/tokudb/r/locking-read-repeatable-read-2.result b/storage/tokudb/mysql-test/tokudb/r/locking-read-repeatable-read-2.result
index f1b214e514c..79e886a8682 100644
--- a/storage/tokudb/mysql-test/tokudb/r/locking-read-repeatable-read-2.result
+++ b/storage/tokudb/mysql-test/tokudb/r/locking-read-repeatable-read-2.result
@@ -9,9 +9,12 @@ a b
1 0
2 1
3 2
+connect conn1,localhost,root;
set session transaction isolation level repeatable read;
begin;
update t set b=b+1 where a=2;
ERROR HY000: Lock wait timeout exceeded; try restarting transaction
+connection default;
commit;
+disconnect conn1;
drop table t;
diff --git a/storage/tokudb/mysql-test/tokudb/r/lockretry-insert.writelocktable.result b/storage/tokudb/mysql-test/tokudb/r/lockretry-insert.writelocktable.result
index 439c032639d..072c7769f69 100644
--- a/storage/tokudb/mysql-test/tokudb/r/lockretry-insert.writelocktable.result
+++ b/storage/tokudb/mysql-test/tokudb/r/lockretry-insert.writelocktable.result
@@ -1,5004 +1,9008 @@
DROP TABLE IF EXISTS t1;
CREATE TABLE t1 (a INT PRIMARY KEY) ENGINE=TokuDB;
+connect conn1,localhost,root,,;
+connection default;
SET AUTOCOMMIT=OFF;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (1000);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (999);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (998);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (997);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (996);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (995);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (994);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (993);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (992);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (991);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (990);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (989);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (988);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (987);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (986);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (985);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (984);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (983);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (982);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (981);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (980);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (979);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (978);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (977);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (976);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (975);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (974);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (973);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (972);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (971);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (970);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (969);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (968);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (967);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (966);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (965);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (964);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (963);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (962);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (961);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (960);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (959);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (958);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (957);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (956);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (955);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (954);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (953);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (952);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (951);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (950);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (949);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (948);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (947);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (946);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (945);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (944);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (943);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (942);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (941);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (940);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (939);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (938);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (937);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (936);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (935);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (934);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (933);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (932);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (931);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (930);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (929);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (928);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (927);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (926);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (925);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (924);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (923);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (922);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (921);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (920);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (919);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (918);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (917);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (916);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (915);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (914);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (913);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (912);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (911);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (910);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (909);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (908);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (907);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (906);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (905);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (904);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (903);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (902);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (901);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (900);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (899);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (898);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (897);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (896);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (895);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (894);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (893);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (892);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (891);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (890);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (889);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (888);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (887);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (886);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (885);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (884);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (883);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (882);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (881);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (880);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (879);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (878);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (877);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (876);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (875);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (874);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (873);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (872);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (871);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (870);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (869);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (868);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (867);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (866);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (865);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (864);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (863);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (862);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (861);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (860);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (859);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (858);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (857);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (856);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (855);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (854);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (853);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (852);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (851);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (850);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (849);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (848);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (847);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (846);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (845);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (844);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (843);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (842);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (841);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (840);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (839);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (838);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (837);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (836);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (835);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (834);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (833);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (832);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (831);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (830);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (829);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (828);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (827);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (826);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (825);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (824);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (823);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (822);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (821);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (820);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (819);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (818);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (817);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (816);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (815);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (814);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (813);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (812);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (811);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (810);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (809);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (808);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (807);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (806);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (805);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (804);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (803);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (802);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (801);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (800);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (799);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (798);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (797);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (796);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (795);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (794);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (793);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (792);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (791);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (790);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (789);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (788);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (787);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (786);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (785);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (784);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (783);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (782);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (781);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (780);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (779);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (778);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (777);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (776);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (775);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (774);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (773);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (772);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (771);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (770);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (769);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (768);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (767);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (766);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (765);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (764);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (763);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (762);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (761);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (760);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (759);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (758);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (757);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (756);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (755);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (754);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (753);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (752);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (751);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (750);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (749);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (748);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (747);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (746);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (745);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (744);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (743);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (742);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (741);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (740);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (739);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (738);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (737);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (736);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (735);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (734);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (733);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (732);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (731);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (730);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (729);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (728);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (727);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (726);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (725);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (724);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (723);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (722);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (721);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (720);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (719);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (718);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (717);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (716);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (715);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (714);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (713);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (712);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (711);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (710);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (709);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (708);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (707);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (706);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (705);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (704);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (703);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (702);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (701);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (700);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (699);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (698);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (697);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (696);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (695);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (694);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (693);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (692);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (691);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (690);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (689);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (688);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (687);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (686);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (685);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (684);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (683);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (682);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (681);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (680);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (679);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (678);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (677);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (676);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (675);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (674);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (673);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (672);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (671);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (670);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (669);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (668);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (667);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (666);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (665);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (664);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (663);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (662);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (661);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (660);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (659);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (658);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (657);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (656);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (655);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (654);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (653);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (652);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (651);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (650);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (649);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (648);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (647);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (646);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (645);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (644);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (643);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (642);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (641);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (640);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (639);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (638);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (637);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (636);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (635);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (634);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (633);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (632);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (631);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (630);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (629);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (628);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (627);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (626);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (625);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (624);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (623);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (622);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (621);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (620);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (619);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (618);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (617);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (616);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (615);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (614);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (613);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (612);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (611);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (610);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (609);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (608);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (607);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (606);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (605);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (604);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (603);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (602);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (601);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (600);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (599);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (598);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (597);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (596);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (595);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (594);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (593);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (592);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (591);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (590);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (589);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (588);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (587);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (586);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (585);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (584);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (583);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (582);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (581);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (580);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (579);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (578);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (577);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (576);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (575);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (574);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (573);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (572);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (571);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (570);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (569);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (568);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (567);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (566);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (565);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (564);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (563);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (562);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (561);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (560);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (559);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (558);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (557);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (556);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (555);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (554);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (553);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (552);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (551);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (550);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (549);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (548);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (547);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (546);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (545);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (544);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (543);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (542);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (541);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (540);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (539);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (538);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (537);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (536);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (535);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (534);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (533);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (532);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (531);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (530);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (529);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (528);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (527);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (526);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (525);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (524);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (523);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (522);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (521);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (520);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (519);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (518);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (517);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (516);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (515);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (514);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (513);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (512);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (511);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (510);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (509);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (508);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (507);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (506);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (505);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (504);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (503);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (502);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (501);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (500);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (499);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (498);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (497);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (496);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (495);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (494);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (493);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (492);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (491);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (490);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (489);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (488);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (487);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (486);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (485);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (484);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (483);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (482);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (481);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (480);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (479);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (478);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (477);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (476);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (475);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (474);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (473);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (472);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (471);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (470);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (469);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (468);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (467);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (466);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (465);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (464);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (463);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (462);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (461);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (460);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (459);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (458);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (457);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (456);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (455);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (454);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (453);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (452);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (451);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (450);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (449);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (448);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (447);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (446);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (445);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (444);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (443);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (442);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (441);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (440);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (439);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (438);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (437);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (436);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (435);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (434);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (433);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (432);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (431);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (430);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (429);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (428);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (427);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (426);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (425);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (424);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (423);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (422);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (421);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (420);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (419);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (418);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (417);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (416);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (415);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (414);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (413);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (412);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (411);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (410);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (409);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (408);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (407);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (406);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (405);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (404);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (403);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (402);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (401);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (400);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (399);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (398);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (397);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (396);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (395);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (394);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (393);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (392);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (391);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (390);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (389);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (388);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (387);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (386);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (385);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (384);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (383);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (382);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (381);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (380);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (379);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (378);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (377);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (376);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (375);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (374);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (373);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (372);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (371);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (370);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (369);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (368);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (367);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (366);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (365);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (364);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (363);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (362);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (361);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (360);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (359);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (358);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (357);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (356);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (355);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (354);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (353);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (352);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (351);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (350);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (349);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (348);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (347);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (346);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (345);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (344);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (343);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (342);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (341);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (340);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (339);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (338);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (337);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (336);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (335);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (334);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (333);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (332);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (331);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (330);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (329);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (328);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (327);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (326);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (325);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (324);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (323);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (322);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (321);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (320);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (319);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (318);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (317);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (316);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (315);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (314);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (313);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (312);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (311);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (310);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (309);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (308);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (307);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (306);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (305);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (304);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (303);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (302);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (301);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (300);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (299);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (298);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (297);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (296);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (295);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (294);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (293);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (292);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (291);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (290);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (289);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (288);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (287);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (286);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (285);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (284);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (283);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (282);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (281);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (280);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (279);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (278);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (277);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (276);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (275);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (274);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (273);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (272);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (271);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (270);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (269);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (268);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (267);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (266);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (265);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (264);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (263);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (262);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (261);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (260);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (259);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (258);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (257);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (256);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (255);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (254);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (253);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (252);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (251);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (250);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (249);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (248);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (247);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (246);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (245);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (244);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (243);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (242);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (241);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (240);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (239);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (238);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (237);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (236);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (235);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (234);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (233);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (232);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (231);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (230);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (229);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (228);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (227);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (226);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (225);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (224);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (223);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (222);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (221);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (220);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (219);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (218);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (217);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (216);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (215);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (214);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (213);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (212);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (211);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (210);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (209);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (208);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (207);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (206);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (205);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (204);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (203);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (202);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (201);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (200);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (199);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (198);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (197);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (196);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (195);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (194);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (193);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (192);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (191);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (190);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (189);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (188);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (187);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (186);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (185);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (184);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (183);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (182);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (181);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (180);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (179);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (178);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (177);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (176);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (175);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (174);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (173);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (172);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (171);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (170);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (169);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (168);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (167);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (166);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (165);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (164);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (163);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (162);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (161);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (160);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (159);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (158);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (157);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (156);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (155);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (154);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (153);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (152);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (151);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (150);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (149);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (148);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (147);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (146);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (145);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (144);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (143);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (142);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (141);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (140);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (139);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (138);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (137);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (136);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (135);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (134);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (133);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (132);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (131);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (130);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (129);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (128);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (127);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (126);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (125);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (124);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (123);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (122);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (121);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (120);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (119);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (118);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (117);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (116);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (115);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (114);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (113);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (112);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (111);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (110);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (109);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (108);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (107);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (106);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (105);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (104);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (103);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (102);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (101);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (100);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (99);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (98);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (97);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (96);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (95);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (94);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (93);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (92);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (91);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (90);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (89);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (88);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (87);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (86);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (85);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (84);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (83);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (82);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (81);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (80);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (79);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (78);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (77);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (76);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (75);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (74);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (73);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (72);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (71);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (70);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (69);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (68);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (67);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (66);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (65);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (64);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (63);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (62);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (61);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (60);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (59);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (58);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (57);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (56);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (55);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (54);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (53);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (52);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (51);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (50);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (49);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (48);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (47);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (46);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (45);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (44);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (43);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (42);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (41);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (40);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (39);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (38);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (37);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (36);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (35);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (34);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (33);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (32);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (31);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (30);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (29);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (28);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (27);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (26);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (25);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (24);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (23);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (22);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (21);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (20);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (19);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (18);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (17);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (16);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (15);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (14);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (13);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (12);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (11);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (10);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (9);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (8);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (7);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (6);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (5);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (4);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (3);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (2);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+connection default;
BEGIN;
INSERT INTO t1 VALUES (1);
+connection conn1;
LOCK TABLES t1 WRITE;
+connection default;
COMMIT;
+connection conn1;
UNLOCK TABLES;
+disconnect conn1;
+connection default;
DROP TABLE t1;
diff --git a/storage/tokudb/mysql-test/tokudb/r/lockretry-writelocktable.insert.result b/storage/tokudb/mysql-test/tokudb/r/lockretry-writelocktable.insert.result
index d8dad90efdc..7a25b98e52e 100644
--- a/storage/tokudb/mysql-test/tokudb/r/lockretry-writelocktable.insert.result
+++ b/storage/tokudb/mysql-test/tokudb/r/lockretry-writelocktable.insert.result
@@ -1,3003 +1,7006 @@
DROP TABLE IF EXISTS t1;
CREATE TABLE t1 (a INT PRIMARY KEY) ENGINE=TokuDB;
+connect conn1,localhost,root,,;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (1000);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (999);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (998);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (997);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (996);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (995);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (994);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (993);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (992);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (991);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (990);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (989);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (988);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (987);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (986);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (985);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (984);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (983);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (982);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (981);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (980);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (979);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (978);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (977);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (976);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (975);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (974);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (973);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (972);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (971);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (970);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (969);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (968);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (967);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (966);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (965);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (964);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (963);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (962);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (961);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (960);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (959);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (958);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (957);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (956);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (955);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (954);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (953);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (952);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (951);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (950);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (949);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (948);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (947);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (946);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (945);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (944);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (943);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (942);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (941);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (940);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (939);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (938);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (937);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (936);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (935);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (934);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (933);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (932);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (931);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (930);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (929);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (928);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (927);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (926);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (925);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (924);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (923);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (922);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (921);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (920);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (919);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (918);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (917);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (916);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (915);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (914);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (913);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (912);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (911);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (910);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (909);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (908);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (907);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (906);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (905);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (904);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (903);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (902);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (901);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (900);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (899);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (898);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (897);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (896);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (895);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (894);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (893);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (892);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (891);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (890);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (889);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (888);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (887);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (886);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (885);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (884);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (883);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (882);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (881);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (880);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (879);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (878);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (877);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (876);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (875);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (874);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (873);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (872);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (871);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (870);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (869);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (868);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (867);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (866);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (865);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (864);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (863);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (862);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (861);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (860);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (859);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (858);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (857);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (856);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (855);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (854);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (853);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (852);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (851);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (850);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (849);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (848);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (847);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (846);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (845);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (844);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (843);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (842);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (841);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (840);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (839);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (838);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (837);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (836);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (835);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (834);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (833);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (832);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (831);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (830);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (829);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (828);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (827);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (826);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (825);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (824);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (823);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (822);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (821);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (820);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (819);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (818);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (817);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (816);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (815);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (814);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (813);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (812);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (811);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (810);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (809);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (808);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (807);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (806);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (805);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (804);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (803);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (802);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (801);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (800);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (799);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (798);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (797);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (796);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (795);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (794);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (793);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (792);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (791);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (790);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (789);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (788);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (787);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (786);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (785);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (784);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (783);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (782);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (781);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (780);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (779);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (778);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (777);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (776);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (775);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (774);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (773);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (772);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (771);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (770);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (769);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (768);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (767);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (766);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (765);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (764);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (763);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (762);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (761);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (760);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (759);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (758);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (757);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (756);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (755);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (754);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (753);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (752);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (751);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (750);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (749);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (748);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (747);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (746);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (745);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (744);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (743);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (742);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (741);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (740);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (739);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (738);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (737);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (736);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (735);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (734);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (733);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (732);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (731);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (730);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (729);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (728);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (727);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (726);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (725);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (724);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (723);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (722);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (721);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (720);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (719);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (718);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (717);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (716);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (715);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (714);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (713);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (712);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (711);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (710);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (709);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (708);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (707);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (706);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (705);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (704);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (703);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (702);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (701);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (700);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (699);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (698);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (697);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (696);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (695);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (694);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (693);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (692);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (691);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (690);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (689);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (688);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (687);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (686);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (685);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (684);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (683);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (682);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (681);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (680);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (679);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (678);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (677);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (676);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (675);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (674);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (673);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (672);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (671);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (670);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (669);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (668);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (667);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (666);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (665);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (664);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (663);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (662);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (661);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (660);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (659);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (658);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (657);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (656);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (655);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (654);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (653);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (652);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (651);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (650);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (649);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (648);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (647);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (646);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (645);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (644);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (643);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (642);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (641);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (640);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (639);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (638);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (637);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (636);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (635);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (634);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (633);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (632);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (631);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (630);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (629);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (628);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (627);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (626);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (625);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (624);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (623);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (622);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (621);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (620);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (619);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (618);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (617);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (616);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (615);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (614);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (613);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (612);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (611);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (610);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (609);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (608);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (607);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (606);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (605);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (604);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (603);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (602);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (601);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (600);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (599);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (598);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (597);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (596);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (595);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (594);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (593);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (592);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (591);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (590);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (589);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (588);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (587);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (586);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (585);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (584);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (583);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (582);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (581);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (580);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (579);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (578);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (577);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (576);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (575);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (574);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (573);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (572);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (571);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (570);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (569);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (568);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (567);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (566);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (565);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (564);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (563);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (562);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (561);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (560);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (559);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (558);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (557);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (556);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (555);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (554);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (553);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (552);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (551);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (550);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (549);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (548);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (547);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (546);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (545);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (544);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (543);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (542);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (541);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (540);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (539);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (538);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (537);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (536);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (535);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (534);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (533);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (532);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (531);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (530);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (529);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (528);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (527);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (526);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (525);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (524);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (523);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (522);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (521);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (520);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (519);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (518);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (517);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (516);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (515);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (514);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (513);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (512);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (511);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (510);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (509);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (508);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (507);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (506);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (505);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (504);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (503);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (502);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (501);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (500);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (499);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (498);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (497);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (496);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (495);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (494);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (493);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (492);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (491);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (490);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (489);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (488);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (487);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (486);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (485);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (484);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (483);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (482);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (481);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (480);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (479);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (478);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (477);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (476);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (475);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (474);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (473);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (472);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (471);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (470);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (469);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (468);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (467);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (466);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (465);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (464);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (463);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (462);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (461);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (460);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (459);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (458);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (457);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (456);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (455);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (454);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (453);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (452);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (451);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (450);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (449);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (448);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (447);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (446);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (445);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (444);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (443);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (442);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (441);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (440);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (439);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (438);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (437);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (436);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (435);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (434);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (433);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (432);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (431);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (430);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (429);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (428);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (427);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (426);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (425);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (424);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (423);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (422);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (421);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (420);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (419);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (418);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (417);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (416);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (415);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (414);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (413);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (412);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (411);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (410);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (409);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (408);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (407);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (406);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (405);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (404);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (403);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (402);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (401);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (400);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (399);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (398);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (397);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (396);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (395);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (394);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (393);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (392);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (391);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (390);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (389);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (388);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (387);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (386);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (385);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (384);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (383);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (382);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (381);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (380);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (379);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (378);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (377);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (376);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (375);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (374);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (373);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (372);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (371);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (370);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (369);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (368);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (367);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (366);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (365);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (364);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (363);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (362);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (361);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (360);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (359);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (358);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (357);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (356);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (355);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (354);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (353);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (352);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (351);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (350);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (349);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (348);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (347);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (346);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (345);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (344);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (343);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (342);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (341);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (340);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (339);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (338);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (337);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (336);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (335);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (334);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (333);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (332);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (331);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (330);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (329);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (328);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (327);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (326);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (325);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (324);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (323);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (322);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (321);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (320);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (319);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (318);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (317);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (316);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (315);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (314);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (313);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (312);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (311);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (310);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (309);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (308);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (307);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (306);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (305);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (304);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (303);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (302);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (301);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (300);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (299);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (298);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (297);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (296);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (295);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (294);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (293);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (292);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (291);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (290);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (289);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (288);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (287);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (286);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (285);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (284);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (283);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (282);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (281);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (280);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (279);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (278);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (277);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (276);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (275);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (274);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (273);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (272);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (271);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (270);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (269);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (268);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (267);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (266);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (265);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (264);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (263);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (262);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (261);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (260);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (259);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (258);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (257);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (256);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (255);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (254);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (253);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (252);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (251);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (250);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (249);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (248);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (247);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (246);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (245);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (244);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (243);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (242);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (241);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (240);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (239);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (238);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (237);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (236);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (235);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (234);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (233);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (232);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (231);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (230);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (229);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (228);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (227);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (226);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (225);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (224);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (223);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (222);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (221);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (220);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (219);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (218);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (217);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (216);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (215);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (214);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (213);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (212);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (211);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (210);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (209);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (208);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (207);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (206);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (205);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (204);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (203);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (202);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (201);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (200);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (199);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (198);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (197);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (196);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (195);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (194);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (193);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (192);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (191);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (190);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (189);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (188);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (187);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (186);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (185);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (184);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (183);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (182);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (181);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (180);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (179);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (178);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (177);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (176);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (175);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (174);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (173);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (172);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (171);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (170);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (169);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (168);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (167);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (166);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (165);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (164);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (163);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (162);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (161);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (160);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (159);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (158);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (157);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (156);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (155);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (154);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (153);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (152);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (151);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (150);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (149);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (148);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (147);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (146);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (145);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (144);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (143);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (142);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (141);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (140);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (139);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (138);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (137);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (136);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (135);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (134);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (133);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (132);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (131);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (130);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (129);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (128);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (127);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (126);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (125);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (124);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (123);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (122);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (121);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (120);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (119);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (118);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (117);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (116);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (115);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (114);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (113);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (112);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (111);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (110);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (109);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (108);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (107);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (106);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (105);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (104);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (103);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (102);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (101);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (100);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (99);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (98);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (97);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (96);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (95);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (94);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (93);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (92);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (91);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (90);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (89);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (88);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (87);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (86);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (85);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (84);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (83);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (82);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (81);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (80);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (79);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (78);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (77);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (76);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (75);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (74);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (73);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (72);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (71);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (70);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (69);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (68);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (67);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (66);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (65);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (64);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (63);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (62);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (61);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (60);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (59);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (58);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (57);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (56);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (55);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (54);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (53);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (52);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (51);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (50);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (49);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (48);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (47);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (46);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (45);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (44);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (43);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (42);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (41);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (40);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (39);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (38);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (37);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (36);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (35);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (34);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (33);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (32);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (31);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (30);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (29);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (28);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (27);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (26);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (25);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (24);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (23);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (22);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (21);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (20);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (19);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (18);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (17);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (16);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (15);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (14);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (13);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (12);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (11);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (10);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (9);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (8);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (7);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (6);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (5);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (4);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (3);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (2);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
+connection conn1;
INSERT INTO t1 VALUES (1);
+connection default;
UNLOCK TABLES;
+connection conn1;
+disconnect conn1;
+connection default;
DROP TABLE t1;
diff --git a/storage/tokudb/mysql-test/tokudb/r/lockretry-writelocktable.insert2.result b/storage/tokudb/mysql-test/tokudb/r/lockretry-writelocktable.insert2.result
index 36073f932d4..6f23f8abbbd 100644
--- a/storage/tokudb/mysql-test/tokudb/r/lockretry-writelocktable.insert2.result
+++ b/storage/tokudb/mysql-test/tokudb/r/lockretry-writelocktable.insert2.result
@@ -1,4003 +1,8006 @@
DROP TABLE IF EXISTS t1;
CREATE TABLE t1 (a INT PRIMARY KEY) ENGINE=TokuDB;
+connect conn1,localhost,root,,;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-1000);
+connection conn1;
INSERT INTO t1 VALUES (1000);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-999);
+connection conn1;
INSERT INTO t1 VALUES (999);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-998);
+connection conn1;
INSERT INTO t1 VALUES (998);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-997);
+connection conn1;
INSERT INTO t1 VALUES (997);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-996);
+connection conn1;
INSERT INTO t1 VALUES (996);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-995);
+connection conn1;
INSERT INTO t1 VALUES (995);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-994);
+connection conn1;
INSERT INTO t1 VALUES (994);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-993);
+connection conn1;
INSERT INTO t1 VALUES (993);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-992);
+connection conn1;
INSERT INTO t1 VALUES (992);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-991);
+connection conn1;
INSERT INTO t1 VALUES (991);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-990);
+connection conn1;
INSERT INTO t1 VALUES (990);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-989);
+connection conn1;
INSERT INTO t1 VALUES (989);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-988);
+connection conn1;
INSERT INTO t1 VALUES (988);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-987);
+connection conn1;
INSERT INTO t1 VALUES (987);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-986);
+connection conn1;
INSERT INTO t1 VALUES (986);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-985);
+connection conn1;
INSERT INTO t1 VALUES (985);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-984);
+connection conn1;
INSERT INTO t1 VALUES (984);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-983);
+connection conn1;
INSERT INTO t1 VALUES (983);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-982);
+connection conn1;
INSERT INTO t1 VALUES (982);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-981);
+connection conn1;
INSERT INTO t1 VALUES (981);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-980);
+connection conn1;
INSERT INTO t1 VALUES (980);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-979);
+connection conn1;
INSERT INTO t1 VALUES (979);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-978);
+connection conn1;
INSERT INTO t1 VALUES (978);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-977);
+connection conn1;
INSERT INTO t1 VALUES (977);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-976);
+connection conn1;
INSERT INTO t1 VALUES (976);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-975);
+connection conn1;
INSERT INTO t1 VALUES (975);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-974);
+connection conn1;
INSERT INTO t1 VALUES (974);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-973);
+connection conn1;
INSERT INTO t1 VALUES (973);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-972);
+connection conn1;
INSERT INTO t1 VALUES (972);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-971);
+connection conn1;
INSERT INTO t1 VALUES (971);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-970);
+connection conn1;
INSERT INTO t1 VALUES (970);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-969);
+connection conn1;
INSERT INTO t1 VALUES (969);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-968);
+connection conn1;
INSERT INTO t1 VALUES (968);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-967);
+connection conn1;
INSERT INTO t1 VALUES (967);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-966);
+connection conn1;
INSERT INTO t1 VALUES (966);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-965);
+connection conn1;
INSERT INTO t1 VALUES (965);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-964);
+connection conn1;
INSERT INTO t1 VALUES (964);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-963);
+connection conn1;
INSERT INTO t1 VALUES (963);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-962);
+connection conn1;
INSERT INTO t1 VALUES (962);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-961);
+connection conn1;
INSERT INTO t1 VALUES (961);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-960);
+connection conn1;
INSERT INTO t1 VALUES (960);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-959);
+connection conn1;
INSERT INTO t1 VALUES (959);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-958);
+connection conn1;
INSERT INTO t1 VALUES (958);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-957);
+connection conn1;
INSERT INTO t1 VALUES (957);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-956);
+connection conn1;
INSERT INTO t1 VALUES (956);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-955);
+connection conn1;
INSERT INTO t1 VALUES (955);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-954);
+connection conn1;
INSERT INTO t1 VALUES (954);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-953);
+connection conn1;
INSERT INTO t1 VALUES (953);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-952);
+connection conn1;
INSERT INTO t1 VALUES (952);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-951);
+connection conn1;
INSERT INTO t1 VALUES (951);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-950);
+connection conn1;
INSERT INTO t1 VALUES (950);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-949);
+connection conn1;
INSERT INTO t1 VALUES (949);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-948);
+connection conn1;
INSERT INTO t1 VALUES (948);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-947);
+connection conn1;
INSERT INTO t1 VALUES (947);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-946);
+connection conn1;
INSERT INTO t1 VALUES (946);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-945);
+connection conn1;
INSERT INTO t1 VALUES (945);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-944);
+connection conn1;
INSERT INTO t1 VALUES (944);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-943);
+connection conn1;
INSERT INTO t1 VALUES (943);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-942);
+connection conn1;
INSERT INTO t1 VALUES (942);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-941);
+connection conn1;
INSERT INTO t1 VALUES (941);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-940);
+connection conn1;
INSERT INTO t1 VALUES (940);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-939);
+connection conn1;
INSERT INTO t1 VALUES (939);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-938);
+connection conn1;
INSERT INTO t1 VALUES (938);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-937);
+connection conn1;
INSERT INTO t1 VALUES (937);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-936);
+connection conn1;
INSERT INTO t1 VALUES (936);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-935);
+connection conn1;
INSERT INTO t1 VALUES (935);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-934);
+connection conn1;
INSERT INTO t1 VALUES (934);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-933);
+connection conn1;
INSERT INTO t1 VALUES (933);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-932);
+connection conn1;
INSERT INTO t1 VALUES (932);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-931);
+connection conn1;
INSERT INTO t1 VALUES (931);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-930);
+connection conn1;
INSERT INTO t1 VALUES (930);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-929);
+connection conn1;
INSERT INTO t1 VALUES (929);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-928);
+connection conn1;
INSERT INTO t1 VALUES (928);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-927);
+connection conn1;
INSERT INTO t1 VALUES (927);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-926);
+connection conn1;
INSERT INTO t1 VALUES (926);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-925);
+connection conn1;
INSERT INTO t1 VALUES (925);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-924);
+connection conn1;
INSERT INTO t1 VALUES (924);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-923);
+connection conn1;
INSERT INTO t1 VALUES (923);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-922);
+connection conn1;
INSERT INTO t1 VALUES (922);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-921);
+connection conn1;
INSERT INTO t1 VALUES (921);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-920);
+connection conn1;
INSERT INTO t1 VALUES (920);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-919);
+connection conn1;
INSERT INTO t1 VALUES (919);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-918);
+connection conn1;
INSERT INTO t1 VALUES (918);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-917);
+connection conn1;
INSERT INTO t1 VALUES (917);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-916);
+connection conn1;
INSERT INTO t1 VALUES (916);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-915);
+connection conn1;
INSERT INTO t1 VALUES (915);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-914);
+connection conn1;
INSERT INTO t1 VALUES (914);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-913);
+connection conn1;
INSERT INTO t1 VALUES (913);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-912);
+connection conn1;
INSERT INTO t1 VALUES (912);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-911);
+connection conn1;
INSERT INTO t1 VALUES (911);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-910);
+connection conn1;
INSERT INTO t1 VALUES (910);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-909);
+connection conn1;
INSERT INTO t1 VALUES (909);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-908);
+connection conn1;
INSERT INTO t1 VALUES (908);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-907);
+connection conn1;
INSERT INTO t1 VALUES (907);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-906);
+connection conn1;
INSERT INTO t1 VALUES (906);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-905);
+connection conn1;
INSERT INTO t1 VALUES (905);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-904);
+connection conn1;
INSERT INTO t1 VALUES (904);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-903);
+connection conn1;
INSERT INTO t1 VALUES (903);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-902);
+connection conn1;
INSERT INTO t1 VALUES (902);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-901);
+connection conn1;
INSERT INTO t1 VALUES (901);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-900);
+connection conn1;
INSERT INTO t1 VALUES (900);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-899);
+connection conn1;
INSERT INTO t1 VALUES (899);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-898);
+connection conn1;
INSERT INTO t1 VALUES (898);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-897);
+connection conn1;
INSERT INTO t1 VALUES (897);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-896);
+connection conn1;
INSERT INTO t1 VALUES (896);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-895);
+connection conn1;
INSERT INTO t1 VALUES (895);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-894);
+connection conn1;
INSERT INTO t1 VALUES (894);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-893);
+connection conn1;
INSERT INTO t1 VALUES (893);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-892);
+connection conn1;
INSERT INTO t1 VALUES (892);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-891);
+connection conn1;
INSERT INTO t1 VALUES (891);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-890);
+connection conn1;
INSERT INTO t1 VALUES (890);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-889);
+connection conn1;
INSERT INTO t1 VALUES (889);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-888);
+connection conn1;
INSERT INTO t1 VALUES (888);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-887);
+connection conn1;
INSERT INTO t1 VALUES (887);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-886);
+connection conn1;
INSERT INTO t1 VALUES (886);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-885);
+connection conn1;
INSERT INTO t1 VALUES (885);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-884);
+connection conn1;
INSERT INTO t1 VALUES (884);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-883);
+connection conn1;
INSERT INTO t1 VALUES (883);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-882);
+connection conn1;
INSERT INTO t1 VALUES (882);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-881);
+connection conn1;
INSERT INTO t1 VALUES (881);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-880);
+connection conn1;
INSERT INTO t1 VALUES (880);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-879);
+connection conn1;
INSERT INTO t1 VALUES (879);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-878);
+connection conn1;
INSERT INTO t1 VALUES (878);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-877);
+connection conn1;
INSERT INTO t1 VALUES (877);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-876);
+connection conn1;
INSERT INTO t1 VALUES (876);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-875);
+connection conn1;
INSERT INTO t1 VALUES (875);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-874);
+connection conn1;
INSERT INTO t1 VALUES (874);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-873);
+connection conn1;
INSERT INTO t1 VALUES (873);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-872);
+connection conn1;
INSERT INTO t1 VALUES (872);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-871);
+connection conn1;
INSERT INTO t1 VALUES (871);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-870);
+connection conn1;
INSERT INTO t1 VALUES (870);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-869);
+connection conn1;
INSERT INTO t1 VALUES (869);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-868);
+connection conn1;
INSERT INTO t1 VALUES (868);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-867);
+connection conn1;
INSERT INTO t1 VALUES (867);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-866);
+connection conn1;
INSERT INTO t1 VALUES (866);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-865);
+connection conn1;
INSERT INTO t1 VALUES (865);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-864);
+connection conn1;
INSERT INTO t1 VALUES (864);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-863);
+connection conn1;
INSERT INTO t1 VALUES (863);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-862);
+connection conn1;
INSERT INTO t1 VALUES (862);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-861);
+connection conn1;
INSERT INTO t1 VALUES (861);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-860);
+connection conn1;
INSERT INTO t1 VALUES (860);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-859);
+connection conn1;
INSERT INTO t1 VALUES (859);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-858);
+connection conn1;
INSERT INTO t1 VALUES (858);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-857);
+connection conn1;
INSERT INTO t1 VALUES (857);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-856);
+connection conn1;
INSERT INTO t1 VALUES (856);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-855);
+connection conn1;
INSERT INTO t1 VALUES (855);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-854);
+connection conn1;
INSERT INTO t1 VALUES (854);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-853);
+connection conn1;
INSERT INTO t1 VALUES (853);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-852);
+connection conn1;
INSERT INTO t1 VALUES (852);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-851);
+connection conn1;
INSERT INTO t1 VALUES (851);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-850);
+connection conn1;
INSERT INTO t1 VALUES (850);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-849);
+connection conn1;
INSERT INTO t1 VALUES (849);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-848);
+connection conn1;
INSERT INTO t1 VALUES (848);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-847);
+connection conn1;
INSERT INTO t1 VALUES (847);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-846);
+connection conn1;
INSERT INTO t1 VALUES (846);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-845);
+connection conn1;
INSERT INTO t1 VALUES (845);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-844);
+connection conn1;
INSERT INTO t1 VALUES (844);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-843);
+connection conn1;
INSERT INTO t1 VALUES (843);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-842);
+connection conn1;
INSERT INTO t1 VALUES (842);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-841);
+connection conn1;
INSERT INTO t1 VALUES (841);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-840);
+connection conn1;
INSERT INTO t1 VALUES (840);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-839);
+connection conn1;
INSERT INTO t1 VALUES (839);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-838);
+connection conn1;
INSERT INTO t1 VALUES (838);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-837);
+connection conn1;
INSERT INTO t1 VALUES (837);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-836);
+connection conn1;
INSERT INTO t1 VALUES (836);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-835);
+connection conn1;
INSERT INTO t1 VALUES (835);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-834);
+connection conn1;
INSERT INTO t1 VALUES (834);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-833);
+connection conn1;
INSERT INTO t1 VALUES (833);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-832);
+connection conn1;
INSERT INTO t1 VALUES (832);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-831);
+connection conn1;
INSERT INTO t1 VALUES (831);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-830);
+connection conn1;
INSERT INTO t1 VALUES (830);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-829);
+connection conn1;
INSERT INTO t1 VALUES (829);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-828);
+connection conn1;
INSERT INTO t1 VALUES (828);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-827);
+connection conn1;
INSERT INTO t1 VALUES (827);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-826);
+connection conn1;
INSERT INTO t1 VALUES (826);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-825);
+connection conn1;
INSERT INTO t1 VALUES (825);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-824);
+connection conn1;
INSERT INTO t1 VALUES (824);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-823);
+connection conn1;
INSERT INTO t1 VALUES (823);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-822);
+connection conn1;
INSERT INTO t1 VALUES (822);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-821);
+connection conn1;
INSERT INTO t1 VALUES (821);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-820);
+connection conn1;
INSERT INTO t1 VALUES (820);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-819);
+connection conn1;
INSERT INTO t1 VALUES (819);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-818);
+connection conn1;
INSERT INTO t1 VALUES (818);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-817);
+connection conn1;
INSERT INTO t1 VALUES (817);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-816);
+connection conn1;
INSERT INTO t1 VALUES (816);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-815);
+connection conn1;
INSERT INTO t1 VALUES (815);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-814);
+connection conn1;
INSERT INTO t1 VALUES (814);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-813);
+connection conn1;
INSERT INTO t1 VALUES (813);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-812);
+connection conn1;
INSERT INTO t1 VALUES (812);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-811);
+connection conn1;
INSERT INTO t1 VALUES (811);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-810);
+connection conn1;
INSERT INTO t1 VALUES (810);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-809);
+connection conn1;
INSERT INTO t1 VALUES (809);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-808);
+connection conn1;
INSERT INTO t1 VALUES (808);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-807);
+connection conn1;
INSERT INTO t1 VALUES (807);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-806);
+connection conn1;
INSERT INTO t1 VALUES (806);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-805);
+connection conn1;
INSERT INTO t1 VALUES (805);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-804);
+connection conn1;
INSERT INTO t1 VALUES (804);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-803);
+connection conn1;
INSERT INTO t1 VALUES (803);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-802);
+connection conn1;
INSERT INTO t1 VALUES (802);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-801);
+connection conn1;
INSERT INTO t1 VALUES (801);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-800);
+connection conn1;
INSERT INTO t1 VALUES (800);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-799);
+connection conn1;
INSERT INTO t1 VALUES (799);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-798);
+connection conn1;
INSERT INTO t1 VALUES (798);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-797);
+connection conn1;
INSERT INTO t1 VALUES (797);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-796);
+connection conn1;
INSERT INTO t1 VALUES (796);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-795);
+connection conn1;
INSERT INTO t1 VALUES (795);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-794);
+connection conn1;
INSERT INTO t1 VALUES (794);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-793);
+connection conn1;
INSERT INTO t1 VALUES (793);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-792);
+connection conn1;
INSERT INTO t1 VALUES (792);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-791);
+connection conn1;
INSERT INTO t1 VALUES (791);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-790);
+connection conn1;
INSERT INTO t1 VALUES (790);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-789);
+connection conn1;
INSERT INTO t1 VALUES (789);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-788);
+connection conn1;
INSERT INTO t1 VALUES (788);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-787);
+connection conn1;
INSERT INTO t1 VALUES (787);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-786);
+connection conn1;
INSERT INTO t1 VALUES (786);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-785);
+connection conn1;
INSERT INTO t1 VALUES (785);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-784);
+connection conn1;
INSERT INTO t1 VALUES (784);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-783);
+connection conn1;
INSERT INTO t1 VALUES (783);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-782);
+connection conn1;
INSERT INTO t1 VALUES (782);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-781);
+connection conn1;
INSERT INTO t1 VALUES (781);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-780);
+connection conn1;
INSERT INTO t1 VALUES (780);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-779);
+connection conn1;
INSERT INTO t1 VALUES (779);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-778);
+connection conn1;
INSERT INTO t1 VALUES (778);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-777);
+connection conn1;
INSERT INTO t1 VALUES (777);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-776);
+connection conn1;
INSERT INTO t1 VALUES (776);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-775);
+connection conn1;
INSERT INTO t1 VALUES (775);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-774);
+connection conn1;
INSERT INTO t1 VALUES (774);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-773);
+connection conn1;
INSERT INTO t1 VALUES (773);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-772);
+connection conn1;
INSERT INTO t1 VALUES (772);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-771);
+connection conn1;
INSERT INTO t1 VALUES (771);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-770);
+connection conn1;
INSERT INTO t1 VALUES (770);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-769);
+connection conn1;
INSERT INTO t1 VALUES (769);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-768);
+connection conn1;
INSERT INTO t1 VALUES (768);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-767);
+connection conn1;
INSERT INTO t1 VALUES (767);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-766);
+connection conn1;
INSERT INTO t1 VALUES (766);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-765);
+connection conn1;
INSERT INTO t1 VALUES (765);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-764);
+connection conn1;
INSERT INTO t1 VALUES (764);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-763);
+connection conn1;
INSERT INTO t1 VALUES (763);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-762);
+connection conn1;
INSERT INTO t1 VALUES (762);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-761);
+connection conn1;
INSERT INTO t1 VALUES (761);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-760);
+connection conn1;
INSERT INTO t1 VALUES (760);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-759);
+connection conn1;
INSERT INTO t1 VALUES (759);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-758);
+connection conn1;
INSERT INTO t1 VALUES (758);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-757);
+connection conn1;
INSERT INTO t1 VALUES (757);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-756);
+connection conn1;
INSERT INTO t1 VALUES (756);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-755);
+connection conn1;
INSERT INTO t1 VALUES (755);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-754);
+connection conn1;
INSERT INTO t1 VALUES (754);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-753);
+connection conn1;
INSERT INTO t1 VALUES (753);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-752);
+connection conn1;
INSERT INTO t1 VALUES (752);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-751);
+connection conn1;
INSERT INTO t1 VALUES (751);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-750);
+connection conn1;
INSERT INTO t1 VALUES (750);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-749);
+connection conn1;
INSERT INTO t1 VALUES (749);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-748);
+connection conn1;
INSERT INTO t1 VALUES (748);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-747);
+connection conn1;
INSERT INTO t1 VALUES (747);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-746);
+connection conn1;
INSERT INTO t1 VALUES (746);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-745);
+connection conn1;
INSERT INTO t1 VALUES (745);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-744);
+connection conn1;
INSERT INTO t1 VALUES (744);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-743);
+connection conn1;
INSERT INTO t1 VALUES (743);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-742);
+connection conn1;
INSERT INTO t1 VALUES (742);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-741);
+connection conn1;
INSERT INTO t1 VALUES (741);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-740);
+connection conn1;
INSERT INTO t1 VALUES (740);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-739);
+connection conn1;
INSERT INTO t1 VALUES (739);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-738);
+connection conn1;
INSERT INTO t1 VALUES (738);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-737);
+connection conn1;
INSERT INTO t1 VALUES (737);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-736);
+connection conn1;
INSERT INTO t1 VALUES (736);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-735);
+connection conn1;
INSERT INTO t1 VALUES (735);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-734);
+connection conn1;
INSERT INTO t1 VALUES (734);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-733);
+connection conn1;
INSERT INTO t1 VALUES (733);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-732);
+connection conn1;
INSERT INTO t1 VALUES (732);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-731);
+connection conn1;
INSERT INTO t1 VALUES (731);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-730);
+connection conn1;
INSERT INTO t1 VALUES (730);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-729);
+connection conn1;
INSERT INTO t1 VALUES (729);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-728);
+connection conn1;
INSERT INTO t1 VALUES (728);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-727);
+connection conn1;
INSERT INTO t1 VALUES (727);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-726);
+connection conn1;
INSERT INTO t1 VALUES (726);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-725);
+connection conn1;
INSERT INTO t1 VALUES (725);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-724);
+connection conn1;
INSERT INTO t1 VALUES (724);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-723);
+connection conn1;
INSERT INTO t1 VALUES (723);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-722);
+connection conn1;
INSERT INTO t1 VALUES (722);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-721);
+connection conn1;
INSERT INTO t1 VALUES (721);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-720);
+connection conn1;
INSERT INTO t1 VALUES (720);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-719);
+connection conn1;
INSERT INTO t1 VALUES (719);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-718);
+connection conn1;
INSERT INTO t1 VALUES (718);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-717);
+connection conn1;
INSERT INTO t1 VALUES (717);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-716);
+connection conn1;
INSERT INTO t1 VALUES (716);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-715);
+connection conn1;
INSERT INTO t1 VALUES (715);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-714);
+connection conn1;
INSERT INTO t1 VALUES (714);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-713);
+connection conn1;
INSERT INTO t1 VALUES (713);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-712);
+connection conn1;
INSERT INTO t1 VALUES (712);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-711);
+connection conn1;
INSERT INTO t1 VALUES (711);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-710);
+connection conn1;
INSERT INTO t1 VALUES (710);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-709);
+connection conn1;
INSERT INTO t1 VALUES (709);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-708);
+connection conn1;
INSERT INTO t1 VALUES (708);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-707);
+connection conn1;
INSERT INTO t1 VALUES (707);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-706);
+connection conn1;
INSERT INTO t1 VALUES (706);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-705);
+connection conn1;
INSERT INTO t1 VALUES (705);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-704);
+connection conn1;
INSERT INTO t1 VALUES (704);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-703);
+connection conn1;
INSERT INTO t1 VALUES (703);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-702);
+connection conn1;
INSERT INTO t1 VALUES (702);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-701);
+connection conn1;
INSERT INTO t1 VALUES (701);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-700);
+connection conn1;
INSERT INTO t1 VALUES (700);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-699);
+connection conn1;
INSERT INTO t1 VALUES (699);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-698);
+connection conn1;
INSERT INTO t1 VALUES (698);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-697);
+connection conn1;
INSERT INTO t1 VALUES (697);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-696);
+connection conn1;
INSERT INTO t1 VALUES (696);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-695);
+connection conn1;
INSERT INTO t1 VALUES (695);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-694);
+connection conn1;
INSERT INTO t1 VALUES (694);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-693);
+connection conn1;
INSERT INTO t1 VALUES (693);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-692);
+connection conn1;
INSERT INTO t1 VALUES (692);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-691);
+connection conn1;
INSERT INTO t1 VALUES (691);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-690);
+connection conn1;
INSERT INTO t1 VALUES (690);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-689);
+connection conn1;
INSERT INTO t1 VALUES (689);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-688);
+connection conn1;
INSERT INTO t1 VALUES (688);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-687);
+connection conn1;
INSERT INTO t1 VALUES (687);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-686);
+connection conn1;
INSERT INTO t1 VALUES (686);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-685);
+connection conn1;
INSERT INTO t1 VALUES (685);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-684);
+connection conn1;
INSERT INTO t1 VALUES (684);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-683);
+connection conn1;
INSERT INTO t1 VALUES (683);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-682);
+connection conn1;
INSERT INTO t1 VALUES (682);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-681);
+connection conn1;
INSERT INTO t1 VALUES (681);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-680);
+connection conn1;
INSERT INTO t1 VALUES (680);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-679);
+connection conn1;
INSERT INTO t1 VALUES (679);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-678);
+connection conn1;
INSERT INTO t1 VALUES (678);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-677);
+connection conn1;
INSERT INTO t1 VALUES (677);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-676);
+connection conn1;
INSERT INTO t1 VALUES (676);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-675);
+connection conn1;
INSERT INTO t1 VALUES (675);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-674);
+connection conn1;
INSERT INTO t1 VALUES (674);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-673);
+connection conn1;
INSERT INTO t1 VALUES (673);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-672);
+connection conn1;
INSERT INTO t1 VALUES (672);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-671);
+connection conn1;
INSERT INTO t1 VALUES (671);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-670);
+connection conn1;
INSERT INTO t1 VALUES (670);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-669);
+connection conn1;
INSERT INTO t1 VALUES (669);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-668);
+connection conn1;
INSERT INTO t1 VALUES (668);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-667);
+connection conn1;
INSERT INTO t1 VALUES (667);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-666);
+connection conn1;
INSERT INTO t1 VALUES (666);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-665);
+connection conn1;
INSERT INTO t1 VALUES (665);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-664);
+connection conn1;
INSERT INTO t1 VALUES (664);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-663);
+connection conn1;
INSERT INTO t1 VALUES (663);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-662);
+connection conn1;
INSERT INTO t1 VALUES (662);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-661);
+connection conn1;
INSERT INTO t1 VALUES (661);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-660);
+connection conn1;
INSERT INTO t1 VALUES (660);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-659);
+connection conn1;
INSERT INTO t1 VALUES (659);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-658);
+connection conn1;
INSERT INTO t1 VALUES (658);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-657);
+connection conn1;
INSERT INTO t1 VALUES (657);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-656);
+connection conn1;
INSERT INTO t1 VALUES (656);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-655);
+connection conn1;
INSERT INTO t1 VALUES (655);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-654);
+connection conn1;
INSERT INTO t1 VALUES (654);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-653);
+connection conn1;
INSERT INTO t1 VALUES (653);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-652);
+connection conn1;
INSERT INTO t1 VALUES (652);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-651);
+connection conn1;
INSERT INTO t1 VALUES (651);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-650);
+connection conn1;
INSERT INTO t1 VALUES (650);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-649);
+connection conn1;
INSERT INTO t1 VALUES (649);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-648);
+connection conn1;
INSERT INTO t1 VALUES (648);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-647);
+connection conn1;
INSERT INTO t1 VALUES (647);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-646);
+connection conn1;
INSERT INTO t1 VALUES (646);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-645);
+connection conn1;
INSERT INTO t1 VALUES (645);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-644);
+connection conn1;
INSERT INTO t1 VALUES (644);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-643);
+connection conn1;
INSERT INTO t1 VALUES (643);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-642);
+connection conn1;
INSERT INTO t1 VALUES (642);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-641);
+connection conn1;
INSERT INTO t1 VALUES (641);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-640);
+connection conn1;
INSERT INTO t1 VALUES (640);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-639);
+connection conn1;
INSERT INTO t1 VALUES (639);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-638);
+connection conn1;
INSERT INTO t1 VALUES (638);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-637);
+connection conn1;
INSERT INTO t1 VALUES (637);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-636);
+connection conn1;
INSERT INTO t1 VALUES (636);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-635);
+connection conn1;
INSERT INTO t1 VALUES (635);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-634);
+connection conn1;
INSERT INTO t1 VALUES (634);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-633);
+connection conn1;
INSERT INTO t1 VALUES (633);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-632);
+connection conn1;
INSERT INTO t1 VALUES (632);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-631);
+connection conn1;
INSERT INTO t1 VALUES (631);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-630);
+connection conn1;
INSERT INTO t1 VALUES (630);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-629);
+connection conn1;
INSERT INTO t1 VALUES (629);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-628);
+connection conn1;
INSERT INTO t1 VALUES (628);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-627);
+connection conn1;
INSERT INTO t1 VALUES (627);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-626);
+connection conn1;
INSERT INTO t1 VALUES (626);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-625);
+connection conn1;
INSERT INTO t1 VALUES (625);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-624);
+connection conn1;
INSERT INTO t1 VALUES (624);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-623);
+connection conn1;
INSERT INTO t1 VALUES (623);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-622);
+connection conn1;
INSERT INTO t1 VALUES (622);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-621);
+connection conn1;
INSERT INTO t1 VALUES (621);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-620);
+connection conn1;
INSERT INTO t1 VALUES (620);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-619);
+connection conn1;
INSERT INTO t1 VALUES (619);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-618);
+connection conn1;
INSERT INTO t1 VALUES (618);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-617);
+connection conn1;
INSERT INTO t1 VALUES (617);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-616);
+connection conn1;
INSERT INTO t1 VALUES (616);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-615);
+connection conn1;
INSERT INTO t1 VALUES (615);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-614);
+connection conn1;
INSERT INTO t1 VALUES (614);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-613);
+connection conn1;
INSERT INTO t1 VALUES (613);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-612);
+connection conn1;
INSERT INTO t1 VALUES (612);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-611);
+connection conn1;
INSERT INTO t1 VALUES (611);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-610);
+connection conn1;
INSERT INTO t1 VALUES (610);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-609);
+connection conn1;
INSERT INTO t1 VALUES (609);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-608);
+connection conn1;
INSERT INTO t1 VALUES (608);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-607);
+connection conn1;
INSERT INTO t1 VALUES (607);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-606);
+connection conn1;
INSERT INTO t1 VALUES (606);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-605);
+connection conn1;
INSERT INTO t1 VALUES (605);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-604);
+connection conn1;
INSERT INTO t1 VALUES (604);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-603);
+connection conn1;
INSERT INTO t1 VALUES (603);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-602);
+connection conn1;
INSERT INTO t1 VALUES (602);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-601);
+connection conn1;
INSERT INTO t1 VALUES (601);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-600);
+connection conn1;
INSERT INTO t1 VALUES (600);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-599);
+connection conn1;
INSERT INTO t1 VALUES (599);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-598);
+connection conn1;
INSERT INTO t1 VALUES (598);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-597);
+connection conn1;
INSERT INTO t1 VALUES (597);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-596);
+connection conn1;
INSERT INTO t1 VALUES (596);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-595);
+connection conn1;
INSERT INTO t1 VALUES (595);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-594);
+connection conn1;
INSERT INTO t1 VALUES (594);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-593);
+connection conn1;
INSERT INTO t1 VALUES (593);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-592);
+connection conn1;
INSERT INTO t1 VALUES (592);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-591);
+connection conn1;
INSERT INTO t1 VALUES (591);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-590);
+connection conn1;
INSERT INTO t1 VALUES (590);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-589);
+connection conn1;
INSERT INTO t1 VALUES (589);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-588);
+connection conn1;
INSERT INTO t1 VALUES (588);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-587);
+connection conn1;
INSERT INTO t1 VALUES (587);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-586);
+connection conn1;
INSERT INTO t1 VALUES (586);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-585);
+connection conn1;
INSERT INTO t1 VALUES (585);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-584);
+connection conn1;
INSERT INTO t1 VALUES (584);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-583);
+connection conn1;
INSERT INTO t1 VALUES (583);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-582);
+connection conn1;
INSERT INTO t1 VALUES (582);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-581);
+connection conn1;
INSERT INTO t1 VALUES (581);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-580);
+connection conn1;
INSERT INTO t1 VALUES (580);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-579);
+connection conn1;
INSERT INTO t1 VALUES (579);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-578);
+connection conn1;
INSERT INTO t1 VALUES (578);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-577);
+connection conn1;
INSERT INTO t1 VALUES (577);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-576);
+connection conn1;
INSERT INTO t1 VALUES (576);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-575);
+connection conn1;
INSERT INTO t1 VALUES (575);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-574);
+connection conn1;
INSERT INTO t1 VALUES (574);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-573);
+connection conn1;
INSERT INTO t1 VALUES (573);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-572);
+connection conn1;
INSERT INTO t1 VALUES (572);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-571);
+connection conn1;
INSERT INTO t1 VALUES (571);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-570);
+connection conn1;
INSERT INTO t1 VALUES (570);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-569);
+connection conn1;
INSERT INTO t1 VALUES (569);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-568);
+connection conn1;
INSERT INTO t1 VALUES (568);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-567);
+connection conn1;
INSERT INTO t1 VALUES (567);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-566);
+connection conn1;
INSERT INTO t1 VALUES (566);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-565);
+connection conn1;
INSERT INTO t1 VALUES (565);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-564);
+connection conn1;
INSERT INTO t1 VALUES (564);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-563);
+connection conn1;
INSERT INTO t1 VALUES (563);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-562);
+connection conn1;
INSERT INTO t1 VALUES (562);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-561);
+connection conn1;
INSERT INTO t1 VALUES (561);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-560);
+connection conn1;
INSERT INTO t1 VALUES (560);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-559);
+connection conn1;
INSERT INTO t1 VALUES (559);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-558);
+connection conn1;
INSERT INTO t1 VALUES (558);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-557);
+connection conn1;
INSERT INTO t1 VALUES (557);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-556);
+connection conn1;
INSERT INTO t1 VALUES (556);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-555);
+connection conn1;
INSERT INTO t1 VALUES (555);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-554);
+connection conn1;
INSERT INTO t1 VALUES (554);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-553);
+connection conn1;
INSERT INTO t1 VALUES (553);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-552);
+connection conn1;
INSERT INTO t1 VALUES (552);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-551);
+connection conn1;
INSERT INTO t1 VALUES (551);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-550);
+connection conn1;
INSERT INTO t1 VALUES (550);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-549);
+connection conn1;
INSERT INTO t1 VALUES (549);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-548);
+connection conn1;
INSERT INTO t1 VALUES (548);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-547);
+connection conn1;
INSERT INTO t1 VALUES (547);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-546);
+connection conn1;
INSERT INTO t1 VALUES (546);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-545);
+connection conn1;
INSERT INTO t1 VALUES (545);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-544);
+connection conn1;
INSERT INTO t1 VALUES (544);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-543);
+connection conn1;
INSERT INTO t1 VALUES (543);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-542);
+connection conn1;
INSERT INTO t1 VALUES (542);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-541);
+connection conn1;
INSERT INTO t1 VALUES (541);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-540);
+connection conn1;
INSERT INTO t1 VALUES (540);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-539);
+connection conn1;
INSERT INTO t1 VALUES (539);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-538);
+connection conn1;
INSERT INTO t1 VALUES (538);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-537);
+connection conn1;
INSERT INTO t1 VALUES (537);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-536);
+connection conn1;
INSERT INTO t1 VALUES (536);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-535);
+connection conn1;
INSERT INTO t1 VALUES (535);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-534);
+connection conn1;
INSERT INTO t1 VALUES (534);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-533);
+connection conn1;
INSERT INTO t1 VALUES (533);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-532);
+connection conn1;
INSERT INTO t1 VALUES (532);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-531);
+connection conn1;
INSERT INTO t1 VALUES (531);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-530);
+connection conn1;
INSERT INTO t1 VALUES (530);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-529);
+connection conn1;
INSERT INTO t1 VALUES (529);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-528);
+connection conn1;
INSERT INTO t1 VALUES (528);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-527);
+connection conn1;
INSERT INTO t1 VALUES (527);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-526);
+connection conn1;
INSERT INTO t1 VALUES (526);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-525);
+connection conn1;
INSERT INTO t1 VALUES (525);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-524);
+connection conn1;
INSERT INTO t1 VALUES (524);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-523);
+connection conn1;
INSERT INTO t1 VALUES (523);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-522);
+connection conn1;
INSERT INTO t1 VALUES (522);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-521);
+connection conn1;
INSERT INTO t1 VALUES (521);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-520);
+connection conn1;
INSERT INTO t1 VALUES (520);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-519);
+connection conn1;
INSERT INTO t1 VALUES (519);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-518);
+connection conn1;
INSERT INTO t1 VALUES (518);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-517);
+connection conn1;
INSERT INTO t1 VALUES (517);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-516);
+connection conn1;
INSERT INTO t1 VALUES (516);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-515);
+connection conn1;
INSERT INTO t1 VALUES (515);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-514);
+connection conn1;
INSERT INTO t1 VALUES (514);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-513);
+connection conn1;
INSERT INTO t1 VALUES (513);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-512);
+connection conn1;
INSERT INTO t1 VALUES (512);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-511);
+connection conn1;
INSERT INTO t1 VALUES (511);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-510);
+connection conn1;
INSERT INTO t1 VALUES (510);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-509);
+connection conn1;
INSERT INTO t1 VALUES (509);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-508);
+connection conn1;
INSERT INTO t1 VALUES (508);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-507);
+connection conn1;
INSERT INTO t1 VALUES (507);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-506);
+connection conn1;
INSERT INTO t1 VALUES (506);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-505);
+connection conn1;
INSERT INTO t1 VALUES (505);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-504);
+connection conn1;
INSERT INTO t1 VALUES (504);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-503);
+connection conn1;
INSERT INTO t1 VALUES (503);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-502);
+connection conn1;
INSERT INTO t1 VALUES (502);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-501);
+connection conn1;
INSERT INTO t1 VALUES (501);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-500);
+connection conn1;
INSERT INTO t1 VALUES (500);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-499);
+connection conn1;
INSERT INTO t1 VALUES (499);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-498);
+connection conn1;
INSERT INTO t1 VALUES (498);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-497);
+connection conn1;
INSERT INTO t1 VALUES (497);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-496);
+connection conn1;
INSERT INTO t1 VALUES (496);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-495);
+connection conn1;
INSERT INTO t1 VALUES (495);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-494);
+connection conn1;
INSERT INTO t1 VALUES (494);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-493);
+connection conn1;
INSERT INTO t1 VALUES (493);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-492);
+connection conn1;
INSERT INTO t1 VALUES (492);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-491);
+connection conn1;
INSERT INTO t1 VALUES (491);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-490);
+connection conn1;
INSERT INTO t1 VALUES (490);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-489);
+connection conn1;
INSERT INTO t1 VALUES (489);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-488);
+connection conn1;
INSERT INTO t1 VALUES (488);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-487);
+connection conn1;
INSERT INTO t1 VALUES (487);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-486);
+connection conn1;
INSERT INTO t1 VALUES (486);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-485);
+connection conn1;
INSERT INTO t1 VALUES (485);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-484);
+connection conn1;
INSERT INTO t1 VALUES (484);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-483);
+connection conn1;
INSERT INTO t1 VALUES (483);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-482);
+connection conn1;
INSERT INTO t1 VALUES (482);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-481);
+connection conn1;
INSERT INTO t1 VALUES (481);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-480);
+connection conn1;
INSERT INTO t1 VALUES (480);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-479);
+connection conn1;
INSERT INTO t1 VALUES (479);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-478);
+connection conn1;
INSERT INTO t1 VALUES (478);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-477);
+connection conn1;
INSERT INTO t1 VALUES (477);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-476);
+connection conn1;
INSERT INTO t1 VALUES (476);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-475);
+connection conn1;
INSERT INTO t1 VALUES (475);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-474);
+connection conn1;
INSERT INTO t1 VALUES (474);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-473);
+connection conn1;
INSERT INTO t1 VALUES (473);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-472);
+connection conn1;
INSERT INTO t1 VALUES (472);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-471);
+connection conn1;
INSERT INTO t1 VALUES (471);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-470);
+connection conn1;
INSERT INTO t1 VALUES (470);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-469);
+connection conn1;
INSERT INTO t1 VALUES (469);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-468);
+connection conn1;
INSERT INTO t1 VALUES (468);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-467);
+connection conn1;
INSERT INTO t1 VALUES (467);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-466);
+connection conn1;
INSERT INTO t1 VALUES (466);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-465);
+connection conn1;
INSERT INTO t1 VALUES (465);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-464);
+connection conn1;
INSERT INTO t1 VALUES (464);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-463);
+connection conn1;
INSERT INTO t1 VALUES (463);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-462);
+connection conn1;
INSERT INTO t1 VALUES (462);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-461);
+connection conn1;
INSERT INTO t1 VALUES (461);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-460);
+connection conn1;
INSERT INTO t1 VALUES (460);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-459);
+connection conn1;
INSERT INTO t1 VALUES (459);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-458);
+connection conn1;
INSERT INTO t1 VALUES (458);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-457);
+connection conn1;
INSERT INTO t1 VALUES (457);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-456);
+connection conn1;
INSERT INTO t1 VALUES (456);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-455);
+connection conn1;
INSERT INTO t1 VALUES (455);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-454);
+connection conn1;
INSERT INTO t1 VALUES (454);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-453);
+connection conn1;
INSERT INTO t1 VALUES (453);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-452);
+connection conn1;
INSERT INTO t1 VALUES (452);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-451);
+connection conn1;
INSERT INTO t1 VALUES (451);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-450);
+connection conn1;
INSERT INTO t1 VALUES (450);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-449);
+connection conn1;
INSERT INTO t1 VALUES (449);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-448);
+connection conn1;
INSERT INTO t1 VALUES (448);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-447);
+connection conn1;
INSERT INTO t1 VALUES (447);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-446);
+connection conn1;
INSERT INTO t1 VALUES (446);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-445);
+connection conn1;
INSERT INTO t1 VALUES (445);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-444);
+connection conn1;
INSERT INTO t1 VALUES (444);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-443);
+connection conn1;
INSERT INTO t1 VALUES (443);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-442);
+connection conn1;
INSERT INTO t1 VALUES (442);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-441);
+connection conn1;
INSERT INTO t1 VALUES (441);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-440);
+connection conn1;
INSERT INTO t1 VALUES (440);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-439);
+connection conn1;
INSERT INTO t1 VALUES (439);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-438);
+connection conn1;
INSERT INTO t1 VALUES (438);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-437);
+connection conn1;
INSERT INTO t1 VALUES (437);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-436);
+connection conn1;
INSERT INTO t1 VALUES (436);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-435);
+connection conn1;
INSERT INTO t1 VALUES (435);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-434);
+connection conn1;
INSERT INTO t1 VALUES (434);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-433);
+connection conn1;
INSERT INTO t1 VALUES (433);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-432);
+connection conn1;
INSERT INTO t1 VALUES (432);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-431);
+connection conn1;
INSERT INTO t1 VALUES (431);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-430);
+connection conn1;
INSERT INTO t1 VALUES (430);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-429);
+connection conn1;
INSERT INTO t1 VALUES (429);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-428);
+connection conn1;
INSERT INTO t1 VALUES (428);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-427);
+connection conn1;
INSERT INTO t1 VALUES (427);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-426);
+connection conn1;
INSERT INTO t1 VALUES (426);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-425);
+connection conn1;
INSERT INTO t1 VALUES (425);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-424);
+connection conn1;
INSERT INTO t1 VALUES (424);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-423);
+connection conn1;
INSERT INTO t1 VALUES (423);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-422);
+connection conn1;
INSERT INTO t1 VALUES (422);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-421);
+connection conn1;
INSERT INTO t1 VALUES (421);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-420);
+connection conn1;
INSERT INTO t1 VALUES (420);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-419);
+connection conn1;
INSERT INTO t1 VALUES (419);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-418);
+connection conn1;
INSERT INTO t1 VALUES (418);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-417);
+connection conn1;
INSERT INTO t1 VALUES (417);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-416);
+connection conn1;
INSERT INTO t1 VALUES (416);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-415);
+connection conn1;
INSERT INTO t1 VALUES (415);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-414);
+connection conn1;
INSERT INTO t1 VALUES (414);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-413);
+connection conn1;
INSERT INTO t1 VALUES (413);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-412);
+connection conn1;
INSERT INTO t1 VALUES (412);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-411);
+connection conn1;
INSERT INTO t1 VALUES (411);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-410);
+connection conn1;
INSERT INTO t1 VALUES (410);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-409);
+connection conn1;
INSERT INTO t1 VALUES (409);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-408);
+connection conn1;
INSERT INTO t1 VALUES (408);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-407);
+connection conn1;
INSERT INTO t1 VALUES (407);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-406);
+connection conn1;
INSERT INTO t1 VALUES (406);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-405);
+connection conn1;
INSERT INTO t1 VALUES (405);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-404);
+connection conn1;
INSERT INTO t1 VALUES (404);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-403);
+connection conn1;
INSERT INTO t1 VALUES (403);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-402);
+connection conn1;
INSERT INTO t1 VALUES (402);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-401);
+connection conn1;
INSERT INTO t1 VALUES (401);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-400);
+connection conn1;
INSERT INTO t1 VALUES (400);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-399);
+connection conn1;
INSERT INTO t1 VALUES (399);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-398);
+connection conn1;
INSERT INTO t1 VALUES (398);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-397);
+connection conn1;
INSERT INTO t1 VALUES (397);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-396);
+connection conn1;
INSERT INTO t1 VALUES (396);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-395);
+connection conn1;
INSERT INTO t1 VALUES (395);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-394);
+connection conn1;
INSERT INTO t1 VALUES (394);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-393);
+connection conn1;
INSERT INTO t1 VALUES (393);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-392);
+connection conn1;
INSERT INTO t1 VALUES (392);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-391);
+connection conn1;
INSERT INTO t1 VALUES (391);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-390);
+connection conn1;
INSERT INTO t1 VALUES (390);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-389);
+connection conn1;
INSERT INTO t1 VALUES (389);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-388);
+connection conn1;
INSERT INTO t1 VALUES (388);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-387);
+connection conn1;
INSERT INTO t1 VALUES (387);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-386);
+connection conn1;
INSERT INTO t1 VALUES (386);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-385);
+connection conn1;
INSERT INTO t1 VALUES (385);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-384);
+connection conn1;
INSERT INTO t1 VALUES (384);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-383);
+connection conn1;
INSERT INTO t1 VALUES (383);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-382);
+connection conn1;
INSERT INTO t1 VALUES (382);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-381);
+connection conn1;
INSERT INTO t1 VALUES (381);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-380);
+connection conn1;
INSERT INTO t1 VALUES (380);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-379);
+connection conn1;
INSERT INTO t1 VALUES (379);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-378);
+connection conn1;
INSERT INTO t1 VALUES (378);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-377);
+connection conn1;
INSERT INTO t1 VALUES (377);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-376);
+connection conn1;
INSERT INTO t1 VALUES (376);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-375);
+connection conn1;
INSERT INTO t1 VALUES (375);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-374);
+connection conn1;
INSERT INTO t1 VALUES (374);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-373);
+connection conn1;
INSERT INTO t1 VALUES (373);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-372);
+connection conn1;
INSERT INTO t1 VALUES (372);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-371);
+connection conn1;
INSERT INTO t1 VALUES (371);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-370);
+connection conn1;
INSERT INTO t1 VALUES (370);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-369);
+connection conn1;
INSERT INTO t1 VALUES (369);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-368);
+connection conn1;
INSERT INTO t1 VALUES (368);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-367);
+connection conn1;
INSERT INTO t1 VALUES (367);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-366);
+connection conn1;
INSERT INTO t1 VALUES (366);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-365);
+connection conn1;
INSERT INTO t1 VALUES (365);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-364);
+connection conn1;
INSERT INTO t1 VALUES (364);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-363);
+connection conn1;
INSERT INTO t1 VALUES (363);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-362);
+connection conn1;
INSERT INTO t1 VALUES (362);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-361);
+connection conn1;
INSERT INTO t1 VALUES (361);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-360);
+connection conn1;
INSERT INTO t1 VALUES (360);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-359);
+connection conn1;
INSERT INTO t1 VALUES (359);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-358);
+connection conn1;
INSERT INTO t1 VALUES (358);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-357);
+connection conn1;
INSERT INTO t1 VALUES (357);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-356);
+connection conn1;
INSERT INTO t1 VALUES (356);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-355);
+connection conn1;
INSERT INTO t1 VALUES (355);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-354);
+connection conn1;
INSERT INTO t1 VALUES (354);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-353);
+connection conn1;
INSERT INTO t1 VALUES (353);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-352);
+connection conn1;
INSERT INTO t1 VALUES (352);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-351);
+connection conn1;
INSERT INTO t1 VALUES (351);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-350);
+connection conn1;
INSERT INTO t1 VALUES (350);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-349);
+connection conn1;
INSERT INTO t1 VALUES (349);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-348);
+connection conn1;
INSERT INTO t1 VALUES (348);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-347);
+connection conn1;
INSERT INTO t1 VALUES (347);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-346);
+connection conn1;
INSERT INTO t1 VALUES (346);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-345);
+connection conn1;
INSERT INTO t1 VALUES (345);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-344);
+connection conn1;
INSERT INTO t1 VALUES (344);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-343);
+connection conn1;
INSERT INTO t1 VALUES (343);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-342);
+connection conn1;
INSERT INTO t1 VALUES (342);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-341);
+connection conn1;
INSERT INTO t1 VALUES (341);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-340);
+connection conn1;
INSERT INTO t1 VALUES (340);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-339);
+connection conn1;
INSERT INTO t1 VALUES (339);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-338);
+connection conn1;
INSERT INTO t1 VALUES (338);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-337);
+connection conn1;
INSERT INTO t1 VALUES (337);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-336);
+connection conn1;
INSERT INTO t1 VALUES (336);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-335);
+connection conn1;
INSERT INTO t1 VALUES (335);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-334);
+connection conn1;
INSERT INTO t1 VALUES (334);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-333);
+connection conn1;
INSERT INTO t1 VALUES (333);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-332);
+connection conn1;
INSERT INTO t1 VALUES (332);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-331);
+connection conn1;
INSERT INTO t1 VALUES (331);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-330);
+connection conn1;
INSERT INTO t1 VALUES (330);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-329);
+connection conn1;
INSERT INTO t1 VALUES (329);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-328);
+connection conn1;
INSERT INTO t1 VALUES (328);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-327);
+connection conn1;
INSERT INTO t1 VALUES (327);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-326);
+connection conn1;
INSERT INTO t1 VALUES (326);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-325);
+connection conn1;
INSERT INTO t1 VALUES (325);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-324);
+connection conn1;
INSERT INTO t1 VALUES (324);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-323);
+connection conn1;
INSERT INTO t1 VALUES (323);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-322);
+connection conn1;
INSERT INTO t1 VALUES (322);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-321);
+connection conn1;
INSERT INTO t1 VALUES (321);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-320);
+connection conn1;
INSERT INTO t1 VALUES (320);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-319);
+connection conn1;
INSERT INTO t1 VALUES (319);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-318);
+connection conn1;
INSERT INTO t1 VALUES (318);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-317);
+connection conn1;
INSERT INTO t1 VALUES (317);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-316);
+connection conn1;
INSERT INTO t1 VALUES (316);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-315);
+connection conn1;
INSERT INTO t1 VALUES (315);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-314);
+connection conn1;
INSERT INTO t1 VALUES (314);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-313);
+connection conn1;
INSERT INTO t1 VALUES (313);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-312);
+connection conn1;
INSERT INTO t1 VALUES (312);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-311);
+connection conn1;
INSERT INTO t1 VALUES (311);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-310);
+connection conn1;
INSERT INTO t1 VALUES (310);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-309);
+connection conn1;
INSERT INTO t1 VALUES (309);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-308);
+connection conn1;
INSERT INTO t1 VALUES (308);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-307);
+connection conn1;
INSERT INTO t1 VALUES (307);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-306);
+connection conn1;
INSERT INTO t1 VALUES (306);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-305);
+connection conn1;
INSERT INTO t1 VALUES (305);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-304);
+connection conn1;
INSERT INTO t1 VALUES (304);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-303);
+connection conn1;
INSERT INTO t1 VALUES (303);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-302);
+connection conn1;
INSERT INTO t1 VALUES (302);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-301);
+connection conn1;
INSERT INTO t1 VALUES (301);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-300);
+connection conn1;
INSERT INTO t1 VALUES (300);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-299);
+connection conn1;
INSERT INTO t1 VALUES (299);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-298);
+connection conn1;
INSERT INTO t1 VALUES (298);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-297);
+connection conn1;
INSERT INTO t1 VALUES (297);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-296);
+connection conn1;
INSERT INTO t1 VALUES (296);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-295);
+connection conn1;
INSERT INTO t1 VALUES (295);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-294);
+connection conn1;
INSERT INTO t1 VALUES (294);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-293);
+connection conn1;
INSERT INTO t1 VALUES (293);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-292);
+connection conn1;
INSERT INTO t1 VALUES (292);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-291);
+connection conn1;
INSERT INTO t1 VALUES (291);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-290);
+connection conn1;
INSERT INTO t1 VALUES (290);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-289);
+connection conn1;
INSERT INTO t1 VALUES (289);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-288);
+connection conn1;
INSERT INTO t1 VALUES (288);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-287);
+connection conn1;
INSERT INTO t1 VALUES (287);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-286);
+connection conn1;
INSERT INTO t1 VALUES (286);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-285);
+connection conn1;
INSERT INTO t1 VALUES (285);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-284);
+connection conn1;
INSERT INTO t1 VALUES (284);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-283);
+connection conn1;
INSERT INTO t1 VALUES (283);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-282);
+connection conn1;
INSERT INTO t1 VALUES (282);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-281);
+connection conn1;
INSERT INTO t1 VALUES (281);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-280);
+connection conn1;
INSERT INTO t1 VALUES (280);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-279);
+connection conn1;
INSERT INTO t1 VALUES (279);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-278);
+connection conn1;
INSERT INTO t1 VALUES (278);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-277);
+connection conn1;
INSERT INTO t1 VALUES (277);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-276);
+connection conn1;
INSERT INTO t1 VALUES (276);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-275);
+connection conn1;
INSERT INTO t1 VALUES (275);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-274);
+connection conn1;
INSERT INTO t1 VALUES (274);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-273);
+connection conn1;
INSERT INTO t1 VALUES (273);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-272);
+connection conn1;
INSERT INTO t1 VALUES (272);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-271);
+connection conn1;
INSERT INTO t1 VALUES (271);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-270);
+connection conn1;
INSERT INTO t1 VALUES (270);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-269);
+connection conn1;
INSERT INTO t1 VALUES (269);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-268);
+connection conn1;
INSERT INTO t1 VALUES (268);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-267);
+connection conn1;
INSERT INTO t1 VALUES (267);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-266);
+connection conn1;
INSERT INTO t1 VALUES (266);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-265);
+connection conn1;
INSERT INTO t1 VALUES (265);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-264);
+connection conn1;
INSERT INTO t1 VALUES (264);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-263);
+connection conn1;
INSERT INTO t1 VALUES (263);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-262);
+connection conn1;
INSERT INTO t1 VALUES (262);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-261);
+connection conn1;
INSERT INTO t1 VALUES (261);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-260);
+connection conn1;
INSERT INTO t1 VALUES (260);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-259);
+connection conn1;
INSERT INTO t1 VALUES (259);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-258);
+connection conn1;
INSERT INTO t1 VALUES (258);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-257);
+connection conn1;
INSERT INTO t1 VALUES (257);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-256);
+connection conn1;
INSERT INTO t1 VALUES (256);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-255);
+connection conn1;
INSERT INTO t1 VALUES (255);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-254);
+connection conn1;
INSERT INTO t1 VALUES (254);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-253);
+connection conn1;
INSERT INTO t1 VALUES (253);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-252);
+connection conn1;
INSERT INTO t1 VALUES (252);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-251);
+connection conn1;
INSERT INTO t1 VALUES (251);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-250);
+connection conn1;
INSERT INTO t1 VALUES (250);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-249);
+connection conn1;
INSERT INTO t1 VALUES (249);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-248);
+connection conn1;
INSERT INTO t1 VALUES (248);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-247);
+connection conn1;
INSERT INTO t1 VALUES (247);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-246);
+connection conn1;
INSERT INTO t1 VALUES (246);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-245);
+connection conn1;
INSERT INTO t1 VALUES (245);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-244);
+connection conn1;
INSERT INTO t1 VALUES (244);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-243);
+connection conn1;
INSERT INTO t1 VALUES (243);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-242);
+connection conn1;
INSERT INTO t1 VALUES (242);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-241);
+connection conn1;
INSERT INTO t1 VALUES (241);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-240);
+connection conn1;
INSERT INTO t1 VALUES (240);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-239);
+connection conn1;
INSERT INTO t1 VALUES (239);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-238);
+connection conn1;
INSERT INTO t1 VALUES (238);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-237);
+connection conn1;
INSERT INTO t1 VALUES (237);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-236);
+connection conn1;
INSERT INTO t1 VALUES (236);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-235);
+connection conn1;
INSERT INTO t1 VALUES (235);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-234);
+connection conn1;
INSERT INTO t1 VALUES (234);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-233);
+connection conn1;
INSERT INTO t1 VALUES (233);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-232);
+connection conn1;
INSERT INTO t1 VALUES (232);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-231);
+connection conn1;
INSERT INTO t1 VALUES (231);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-230);
+connection conn1;
INSERT INTO t1 VALUES (230);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-229);
+connection conn1;
INSERT INTO t1 VALUES (229);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-228);
+connection conn1;
INSERT INTO t1 VALUES (228);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-227);
+connection conn1;
INSERT INTO t1 VALUES (227);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-226);
+connection conn1;
INSERT INTO t1 VALUES (226);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-225);
+connection conn1;
INSERT INTO t1 VALUES (225);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-224);
+connection conn1;
INSERT INTO t1 VALUES (224);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-223);
+connection conn1;
INSERT INTO t1 VALUES (223);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-222);
+connection conn1;
INSERT INTO t1 VALUES (222);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-221);
+connection conn1;
INSERT INTO t1 VALUES (221);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-220);
+connection conn1;
INSERT INTO t1 VALUES (220);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-219);
+connection conn1;
INSERT INTO t1 VALUES (219);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-218);
+connection conn1;
INSERT INTO t1 VALUES (218);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-217);
+connection conn1;
INSERT INTO t1 VALUES (217);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-216);
+connection conn1;
INSERT INTO t1 VALUES (216);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-215);
+connection conn1;
INSERT INTO t1 VALUES (215);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-214);
+connection conn1;
INSERT INTO t1 VALUES (214);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-213);
+connection conn1;
INSERT INTO t1 VALUES (213);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-212);
+connection conn1;
INSERT INTO t1 VALUES (212);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-211);
+connection conn1;
INSERT INTO t1 VALUES (211);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-210);
+connection conn1;
INSERT INTO t1 VALUES (210);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-209);
+connection conn1;
INSERT INTO t1 VALUES (209);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-208);
+connection conn1;
INSERT INTO t1 VALUES (208);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-207);
+connection conn1;
INSERT INTO t1 VALUES (207);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-206);
+connection conn1;
INSERT INTO t1 VALUES (206);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-205);
+connection conn1;
INSERT INTO t1 VALUES (205);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-204);
+connection conn1;
INSERT INTO t1 VALUES (204);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-203);
+connection conn1;
INSERT INTO t1 VALUES (203);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-202);
+connection conn1;
INSERT INTO t1 VALUES (202);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-201);
+connection conn1;
INSERT INTO t1 VALUES (201);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-200);
+connection conn1;
INSERT INTO t1 VALUES (200);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-199);
+connection conn1;
INSERT INTO t1 VALUES (199);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-198);
+connection conn1;
INSERT INTO t1 VALUES (198);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-197);
+connection conn1;
INSERT INTO t1 VALUES (197);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-196);
+connection conn1;
INSERT INTO t1 VALUES (196);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-195);
+connection conn1;
INSERT INTO t1 VALUES (195);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-194);
+connection conn1;
INSERT INTO t1 VALUES (194);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-193);
+connection conn1;
INSERT INTO t1 VALUES (193);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-192);
+connection conn1;
INSERT INTO t1 VALUES (192);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-191);
+connection conn1;
INSERT INTO t1 VALUES (191);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-190);
+connection conn1;
INSERT INTO t1 VALUES (190);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-189);
+connection conn1;
INSERT INTO t1 VALUES (189);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-188);
+connection conn1;
INSERT INTO t1 VALUES (188);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-187);
+connection conn1;
INSERT INTO t1 VALUES (187);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-186);
+connection conn1;
INSERT INTO t1 VALUES (186);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-185);
+connection conn1;
INSERT INTO t1 VALUES (185);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-184);
+connection conn1;
INSERT INTO t1 VALUES (184);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-183);
+connection conn1;
INSERT INTO t1 VALUES (183);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-182);
+connection conn1;
INSERT INTO t1 VALUES (182);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-181);
+connection conn1;
INSERT INTO t1 VALUES (181);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-180);
+connection conn1;
INSERT INTO t1 VALUES (180);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-179);
+connection conn1;
INSERT INTO t1 VALUES (179);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-178);
+connection conn1;
INSERT INTO t1 VALUES (178);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-177);
+connection conn1;
INSERT INTO t1 VALUES (177);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-176);
+connection conn1;
INSERT INTO t1 VALUES (176);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-175);
+connection conn1;
INSERT INTO t1 VALUES (175);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-174);
+connection conn1;
INSERT INTO t1 VALUES (174);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-173);
+connection conn1;
INSERT INTO t1 VALUES (173);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-172);
+connection conn1;
INSERT INTO t1 VALUES (172);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-171);
+connection conn1;
INSERT INTO t1 VALUES (171);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-170);
+connection conn1;
INSERT INTO t1 VALUES (170);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-169);
+connection conn1;
INSERT INTO t1 VALUES (169);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-168);
+connection conn1;
INSERT INTO t1 VALUES (168);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-167);
+connection conn1;
INSERT INTO t1 VALUES (167);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-166);
+connection conn1;
INSERT INTO t1 VALUES (166);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-165);
+connection conn1;
INSERT INTO t1 VALUES (165);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-164);
+connection conn1;
INSERT INTO t1 VALUES (164);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-163);
+connection conn1;
INSERT INTO t1 VALUES (163);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-162);
+connection conn1;
INSERT INTO t1 VALUES (162);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-161);
+connection conn1;
INSERT INTO t1 VALUES (161);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-160);
+connection conn1;
INSERT INTO t1 VALUES (160);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-159);
+connection conn1;
INSERT INTO t1 VALUES (159);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-158);
+connection conn1;
INSERT INTO t1 VALUES (158);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-157);
+connection conn1;
INSERT INTO t1 VALUES (157);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-156);
+connection conn1;
INSERT INTO t1 VALUES (156);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-155);
+connection conn1;
INSERT INTO t1 VALUES (155);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-154);
+connection conn1;
INSERT INTO t1 VALUES (154);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-153);
+connection conn1;
INSERT INTO t1 VALUES (153);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-152);
+connection conn1;
INSERT INTO t1 VALUES (152);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-151);
+connection conn1;
INSERT INTO t1 VALUES (151);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-150);
+connection conn1;
INSERT INTO t1 VALUES (150);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-149);
+connection conn1;
INSERT INTO t1 VALUES (149);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-148);
+connection conn1;
INSERT INTO t1 VALUES (148);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-147);
+connection conn1;
INSERT INTO t1 VALUES (147);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-146);
+connection conn1;
INSERT INTO t1 VALUES (146);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-145);
+connection conn1;
INSERT INTO t1 VALUES (145);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-144);
+connection conn1;
INSERT INTO t1 VALUES (144);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-143);
+connection conn1;
INSERT INTO t1 VALUES (143);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-142);
+connection conn1;
INSERT INTO t1 VALUES (142);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-141);
+connection conn1;
INSERT INTO t1 VALUES (141);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-140);
+connection conn1;
INSERT INTO t1 VALUES (140);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-139);
+connection conn1;
INSERT INTO t1 VALUES (139);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-138);
+connection conn1;
INSERT INTO t1 VALUES (138);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-137);
+connection conn1;
INSERT INTO t1 VALUES (137);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-136);
+connection conn1;
INSERT INTO t1 VALUES (136);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-135);
+connection conn1;
INSERT INTO t1 VALUES (135);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-134);
+connection conn1;
INSERT INTO t1 VALUES (134);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-133);
+connection conn1;
INSERT INTO t1 VALUES (133);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-132);
+connection conn1;
INSERT INTO t1 VALUES (132);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-131);
+connection conn1;
INSERT INTO t1 VALUES (131);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-130);
+connection conn1;
INSERT INTO t1 VALUES (130);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-129);
+connection conn1;
INSERT INTO t1 VALUES (129);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-128);
+connection conn1;
INSERT INTO t1 VALUES (128);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-127);
+connection conn1;
INSERT INTO t1 VALUES (127);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-126);
+connection conn1;
INSERT INTO t1 VALUES (126);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-125);
+connection conn1;
INSERT INTO t1 VALUES (125);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-124);
+connection conn1;
INSERT INTO t1 VALUES (124);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-123);
+connection conn1;
INSERT INTO t1 VALUES (123);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-122);
+connection conn1;
INSERT INTO t1 VALUES (122);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-121);
+connection conn1;
INSERT INTO t1 VALUES (121);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-120);
+connection conn1;
INSERT INTO t1 VALUES (120);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-119);
+connection conn1;
INSERT INTO t1 VALUES (119);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-118);
+connection conn1;
INSERT INTO t1 VALUES (118);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-117);
+connection conn1;
INSERT INTO t1 VALUES (117);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-116);
+connection conn1;
INSERT INTO t1 VALUES (116);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-115);
+connection conn1;
INSERT INTO t1 VALUES (115);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-114);
+connection conn1;
INSERT INTO t1 VALUES (114);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-113);
+connection conn1;
INSERT INTO t1 VALUES (113);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-112);
+connection conn1;
INSERT INTO t1 VALUES (112);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-111);
+connection conn1;
INSERT INTO t1 VALUES (111);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-110);
+connection conn1;
INSERT INTO t1 VALUES (110);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-109);
+connection conn1;
INSERT INTO t1 VALUES (109);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-108);
+connection conn1;
INSERT INTO t1 VALUES (108);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-107);
+connection conn1;
INSERT INTO t1 VALUES (107);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-106);
+connection conn1;
INSERT INTO t1 VALUES (106);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-105);
+connection conn1;
INSERT INTO t1 VALUES (105);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-104);
+connection conn1;
INSERT INTO t1 VALUES (104);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-103);
+connection conn1;
INSERT INTO t1 VALUES (103);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-102);
+connection conn1;
INSERT INTO t1 VALUES (102);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-101);
+connection conn1;
INSERT INTO t1 VALUES (101);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-100);
+connection conn1;
INSERT INTO t1 VALUES (100);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-99);
+connection conn1;
INSERT INTO t1 VALUES (99);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-98);
+connection conn1;
INSERT INTO t1 VALUES (98);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-97);
+connection conn1;
INSERT INTO t1 VALUES (97);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-96);
+connection conn1;
INSERT INTO t1 VALUES (96);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-95);
+connection conn1;
INSERT INTO t1 VALUES (95);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-94);
+connection conn1;
INSERT INTO t1 VALUES (94);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-93);
+connection conn1;
INSERT INTO t1 VALUES (93);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-92);
+connection conn1;
INSERT INTO t1 VALUES (92);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-91);
+connection conn1;
INSERT INTO t1 VALUES (91);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-90);
+connection conn1;
INSERT INTO t1 VALUES (90);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-89);
+connection conn1;
INSERT INTO t1 VALUES (89);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-88);
+connection conn1;
INSERT INTO t1 VALUES (88);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-87);
+connection conn1;
INSERT INTO t1 VALUES (87);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-86);
+connection conn1;
INSERT INTO t1 VALUES (86);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-85);
+connection conn1;
INSERT INTO t1 VALUES (85);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-84);
+connection conn1;
INSERT INTO t1 VALUES (84);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-83);
+connection conn1;
INSERT INTO t1 VALUES (83);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-82);
+connection conn1;
INSERT INTO t1 VALUES (82);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-81);
+connection conn1;
INSERT INTO t1 VALUES (81);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-80);
+connection conn1;
INSERT INTO t1 VALUES (80);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-79);
+connection conn1;
INSERT INTO t1 VALUES (79);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-78);
+connection conn1;
INSERT INTO t1 VALUES (78);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-77);
+connection conn1;
INSERT INTO t1 VALUES (77);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-76);
+connection conn1;
INSERT INTO t1 VALUES (76);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-75);
+connection conn1;
INSERT INTO t1 VALUES (75);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-74);
+connection conn1;
INSERT INTO t1 VALUES (74);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-73);
+connection conn1;
INSERT INTO t1 VALUES (73);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-72);
+connection conn1;
INSERT INTO t1 VALUES (72);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-71);
+connection conn1;
INSERT INTO t1 VALUES (71);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-70);
+connection conn1;
INSERT INTO t1 VALUES (70);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-69);
+connection conn1;
INSERT INTO t1 VALUES (69);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-68);
+connection conn1;
INSERT INTO t1 VALUES (68);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-67);
+connection conn1;
INSERT INTO t1 VALUES (67);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-66);
+connection conn1;
INSERT INTO t1 VALUES (66);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-65);
+connection conn1;
INSERT INTO t1 VALUES (65);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-64);
+connection conn1;
INSERT INTO t1 VALUES (64);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-63);
+connection conn1;
INSERT INTO t1 VALUES (63);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-62);
+connection conn1;
INSERT INTO t1 VALUES (62);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-61);
+connection conn1;
INSERT INTO t1 VALUES (61);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-60);
+connection conn1;
INSERT INTO t1 VALUES (60);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-59);
+connection conn1;
INSERT INTO t1 VALUES (59);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-58);
+connection conn1;
INSERT INTO t1 VALUES (58);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-57);
+connection conn1;
INSERT INTO t1 VALUES (57);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-56);
+connection conn1;
INSERT INTO t1 VALUES (56);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-55);
+connection conn1;
INSERT INTO t1 VALUES (55);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-54);
+connection conn1;
INSERT INTO t1 VALUES (54);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-53);
+connection conn1;
INSERT INTO t1 VALUES (53);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-52);
+connection conn1;
INSERT INTO t1 VALUES (52);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-51);
+connection conn1;
INSERT INTO t1 VALUES (51);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-50);
+connection conn1;
INSERT INTO t1 VALUES (50);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-49);
+connection conn1;
INSERT INTO t1 VALUES (49);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-48);
+connection conn1;
INSERT INTO t1 VALUES (48);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-47);
+connection conn1;
INSERT INTO t1 VALUES (47);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-46);
+connection conn1;
INSERT INTO t1 VALUES (46);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-45);
+connection conn1;
INSERT INTO t1 VALUES (45);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-44);
+connection conn1;
INSERT INTO t1 VALUES (44);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-43);
+connection conn1;
INSERT INTO t1 VALUES (43);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-42);
+connection conn1;
INSERT INTO t1 VALUES (42);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-41);
+connection conn1;
INSERT INTO t1 VALUES (41);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-40);
+connection conn1;
INSERT INTO t1 VALUES (40);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-39);
+connection conn1;
INSERT INTO t1 VALUES (39);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-38);
+connection conn1;
INSERT INTO t1 VALUES (38);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-37);
+connection conn1;
INSERT INTO t1 VALUES (37);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-36);
+connection conn1;
INSERT INTO t1 VALUES (36);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-35);
+connection conn1;
INSERT INTO t1 VALUES (35);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-34);
+connection conn1;
INSERT INTO t1 VALUES (34);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-33);
+connection conn1;
INSERT INTO t1 VALUES (33);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-32);
+connection conn1;
INSERT INTO t1 VALUES (32);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-31);
+connection conn1;
INSERT INTO t1 VALUES (31);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-30);
+connection conn1;
INSERT INTO t1 VALUES (30);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-29);
+connection conn1;
INSERT INTO t1 VALUES (29);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-28);
+connection conn1;
INSERT INTO t1 VALUES (28);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-27);
+connection conn1;
INSERT INTO t1 VALUES (27);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-26);
+connection conn1;
INSERT INTO t1 VALUES (26);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-25);
+connection conn1;
INSERT INTO t1 VALUES (25);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-24);
+connection conn1;
INSERT INTO t1 VALUES (24);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-23);
+connection conn1;
INSERT INTO t1 VALUES (23);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-22);
+connection conn1;
INSERT INTO t1 VALUES (22);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-21);
+connection conn1;
INSERT INTO t1 VALUES (21);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-20);
+connection conn1;
INSERT INTO t1 VALUES (20);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-19);
+connection conn1;
INSERT INTO t1 VALUES (19);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-18);
+connection conn1;
INSERT INTO t1 VALUES (18);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-17);
+connection conn1;
INSERT INTO t1 VALUES (17);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-16);
+connection conn1;
INSERT INTO t1 VALUES (16);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-15);
+connection conn1;
INSERT INTO t1 VALUES (15);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-14);
+connection conn1;
INSERT INTO t1 VALUES (14);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-13);
+connection conn1;
INSERT INTO t1 VALUES (13);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-12);
+connection conn1;
INSERT INTO t1 VALUES (12);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-11);
+connection conn1;
INSERT INTO t1 VALUES (11);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-10);
+connection conn1;
INSERT INTO t1 VALUES (10);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-9);
+connection conn1;
INSERT INTO t1 VALUES (9);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-8);
+connection conn1;
INSERT INTO t1 VALUES (8);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-7);
+connection conn1;
INSERT INTO t1 VALUES (7);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-6);
+connection conn1;
INSERT INTO t1 VALUES (6);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-5);
+connection conn1;
INSERT INTO t1 VALUES (5);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-4);
+connection conn1;
INSERT INTO t1 VALUES (4);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-3);
+connection conn1;
INSERT INTO t1 VALUES (3);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-2);
+connection conn1;
INSERT INTO t1 VALUES (2);
+connection default;
UNLOCK TABLES;
+connection conn1;
+connection default;
LOCK TABLES t1 WRITE;
INSERT INTO t1 VALUES (-1);
+connection conn1;
INSERT INTO t1 VALUES (1);
+connection default;
UNLOCK TABLES;
+connection conn1;
+disconnect conn1;
+connection default;
DROP TABLE t1;
diff --git a/storage/tokudb/mysql-test/tokudb/r/locks-blocking-row-locks.result b/storage/tokudb/mysql-test/tokudb/r/locks-blocking-row-locks.result
index 060aeda803b..fe6a283a281 100644
--- a/storage/tokudb/mysql-test/tokudb/r/locks-blocking-row-locks.result
+++ b/storage/tokudb/mysql-test/tokudb/r/locks-blocking-row-locks.result
@@ -1,5 +1,8 @@
+connect conn1, localhost, root;
set session transaction isolation level serializable;
+connect conn2, localhost, root;
set session transaction isolation level serializable;
+connection conn1;
drop table if exists t;
create table t (a int primary key, b int) engine=tokudb;
insert ignore t values(1, 1);
@@ -8,12 +11,16 @@ insert ignore t values(3, 9);
insert ignore t values(4, 16);
insert ignore t values(5, 25);
insert ignore t values(6, 36);
+connection conn1;
set session tokudb_lock_timeout=0;
+connection conn2;
set session tokudb_lock_timeout=0;
+connection conn1;
begin;
select * from t where a=1 for update;
a b
1 1
+connection conn2;
select * from t where a=1 for update;
ERROR HY000: Lock wait timeout exceeded; try restarting transaction
select * from t where a=2 for update;
@@ -38,7 +45,9 @@ a b
5 25
6 36
commit;
+connection conn1;
commit;
+connection conn2;
select * from t where a=1 for update;
a b
1 1
@@ -52,10 +61,13 @@ a b
5 25
6 36
commit;
+connection conn1;
+connection conn1;
begin;
select * from t where a=1 for update;
a b
1 1
+connection conn2;
update t set b=b where a=1;
ERROR HY000: Lock wait timeout exceeded; try restarting transaction
update t set b=b where a=2;
@@ -74,7 +86,9 @@ a b
5 25
6 36
commit;
+connection conn1;
commit;
+connection conn2;
update t set b=b where a=1;
begin;
select * from t;
@@ -86,10 +100,13 @@ a b
5 25
6 36
commit;
+connection conn1;
+connection conn1;
begin;
select * from t where a=1 for update;
a b
1 1
+connection conn2;
insert ignore t values(1, 100);
ERROR HY000: Lock wait timeout exceeded; try restarting transaction
insert ignore t values(2, 100);
@@ -108,7 +125,9 @@ a b
5 25
6 36
commit;
+connection conn1;
commit;
+connection conn2;
insert ignore t values(1, 100);
begin;
select * from t;
@@ -120,10 +139,13 @@ a b
5 25
6 36
commit;
+connection conn1;
+connection conn1;
begin;
select * from t where a=1 for update;
a b
1 1
+connection conn2;
replace t values(1, 100);
ERROR HY000: Lock wait timeout exceeded; try restarting transaction
replace t values(2, 100);
@@ -142,7 +164,9 @@ a b
5 25
6 36
commit;
+connection conn1;
commit;
+connection conn2;
replace t values(1, 100);
begin;
select * from t;
@@ -154,10 +178,13 @@ a b
5 25
6 36
commit;
+connection conn1;
+connection conn1;
begin;
select * from t where a=1 for update;
a b
1 100
+connection conn2;
select * from t where a<=2 for update;
ERROR HY000: Lock wait timeout exceeded; try restarting transaction
select * from t where a>=0 for update;
@@ -181,7 +208,9 @@ a b
5 25
6 36
commit;
+connection conn1;
commit;
+connection conn2;
select * from t where a<=2 for update;
a b
1 100
@@ -204,10 +233,13 @@ a b
5 25
6 36
commit;
+connection conn1;
+connection conn1;
begin;
select * from t where a=1 for update;
a b
1 100
+connection conn2;
update t set b=b where a<=2;
ERROR HY000: Lock wait timeout exceeded; try restarting transaction
update t set b=b where a>=0;
@@ -226,7 +258,9 @@ a b
5 25
6 36
commit;
+connection conn1;
commit;
+connection conn2;
update t set b=b where a<=2;
update t set b=b where a>=0;
begin;
@@ -239,8 +273,11 @@ a b
5 25
6 36
commit;
+connection conn1;
+connection conn1;
begin;
update t set b=b where a=1;
+connection conn2;
select * from t where a=1 for update;
ERROR HY000: Lock wait timeout exceeded; try restarting transaction
select * from t where a=2 for update;
@@ -265,7 +302,9 @@ a b
5 25
6 36
commit;
+connection conn1;
commit;
+connection conn2;
select * from t where a=1 for update;
a b
1 100
@@ -279,8 +318,11 @@ a b
5 25
6 36
commit;
+connection conn1;
+connection conn1;
begin;
update t set b=b where a=1;
+connection conn2;
update t set b=b where a=1;
ERROR HY000: Lock wait timeout exceeded; try restarting transaction
update t set b=b where a=2;
@@ -299,7 +341,9 @@ a b
5 25
6 36
commit;
+connection conn1;
commit;
+connection conn2;
update t set b=b where a=1;
begin;
select * from t;
@@ -311,8 +355,11 @@ a b
5 25
6 36
commit;
+connection conn1;
+connection conn1;
begin;
update t set b=b where a=1;
+connection conn2;
insert ignore t values(1, 100);
ERROR HY000: Lock wait timeout exceeded; try restarting transaction
insert ignore t values(2, 100);
@@ -331,7 +378,9 @@ a b
5 25
6 36
commit;
+connection conn1;
commit;
+connection conn2;
insert ignore t values(1, 100);
begin;
select * from t;
@@ -343,8 +392,11 @@ a b
5 25
6 36
commit;
+connection conn1;
+connection conn1;
begin;
update t set b=b where a=1;
+connection conn2;
replace t values(1, 100);
ERROR HY000: Lock wait timeout exceeded; try restarting transaction
replace t values(2, 100);
@@ -363,7 +415,9 @@ a b
5 25
6 36
commit;
+connection conn1;
commit;
+connection conn2;
replace t values(1, 100);
begin;
select * from t;
@@ -375,8 +429,11 @@ a b
5 25
6 36
commit;
+connection conn1;
+connection conn1;
begin;
update t set b=b where a=1;
+connection conn2;
select * from t where a<=2 for update;
ERROR HY000: Lock wait timeout exceeded; try restarting transaction
select * from t where a>=0 for update;
@@ -400,7 +457,9 @@ a b
5 25
6 36
commit;
+connection conn1;
commit;
+connection conn2;
select * from t where a<=2 for update;
a b
1 100
@@ -423,8 +482,11 @@ a b
5 25
6 36
commit;
+connection conn1;
+connection conn1;
begin;
update t set b=b where a=1;
+connection conn2;
update t set b=b where a<=2;
ERROR HY000: Lock wait timeout exceeded; try restarting transaction
update t set b=b where a>=0;
@@ -443,7 +505,9 @@ a b
5 25
6 36
commit;
+connection conn1;
commit;
+connection conn2;
update t set b=b where a<=2;
update t set b=b where a>=0;
begin;
@@ -456,8 +520,11 @@ a b
5 25
6 36
commit;
+connection conn1;
+connection conn1;
begin;
insert ignore t values(1, 100);
+connection conn2;
select * from t where a=1 for update;
ERROR HY000: Lock wait timeout exceeded; try restarting transaction
select * from t where a=2 for update;
@@ -482,7 +549,9 @@ a b
5 25
6 36
commit;
+connection conn1;
commit;
+connection conn2;
select * from t where a=1 for update;
a b
1 100
@@ -496,8 +565,11 @@ a b
5 25
6 36
commit;
+connection conn1;
+connection conn1;
begin;
insert ignore t values(1, 100);
+connection conn2;
update t set b=b where a=1;
ERROR HY000: Lock wait timeout exceeded; try restarting transaction
update t set b=b where a=2;
@@ -516,7 +588,9 @@ a b
5 25
6 36
commit;
+connection conn1;
commit;
+connection conn2;
update t set b=b where a=1;
begin;
select * from t;
@@ -528,8 +602,11 @@ a b
5 25
6 36
commit;
+connection conn1;
+connection conn1;
begin;
insert ignore t values(1, 100);
+connection conn2;
insert ignore t values(1, 100);
ERROR HY000: Lock wait timeout exceeded; try restarting transaction
insert ignore t values(2, 100);
@@ -548,7 +625,9 @@ a b
5 25
6 36
commit;
+connection conn1;
commit;
+connection conn2;
insert ignore t values(1, 100);
begin;
select * from t;
@@ -560,8 +639,11 @@ a b
5 25
6 36
commit;
+connection conn1;
+connection conn1;
begin;
insert ignore t values(1, 100);
+connection conn2;
replace t values(1, 100);
ERROR HY000: Lock wait timeout exceeded; try restarting transaction
replace t values(2, 100);
@@ -580,7 +662,9 @@ a b
5 25
6 36
commit;
+connection conn1;
commit;
+connection conn2;
replace t values(1, 100);
begin;
select * from t;
@@ -592,8 +676,11 @@ a b
5 25
6 36
commit;
+connection conn1;
+connection conn1;
begin;
insert ignore t values(1, 100);
+connection conn2;
select * from t where a<=2 for update;
ERROR HY000: Lock wait timeout exceeded; try restarting transaction
select * from t where a>=0 for update;
@@ -617,7 +704,9 @@ a b
5 25
6 36
commit;
+connection conn1;
commit;
+connection conn2;
select * from t where a<=2 for update;
a b
1 100
@@ -640,8 +729,11 @@ a b
5 25
6 36
commit;
+connection conn1;
+connection conn1;
begin;
insert ignore t values(1, 100);
+connection conn2;
update t set b=b where a<=2;
ERROR HY000: Lock wait timeout exceeded; try restarting transaction
update t set b=b where a>=0;
@@ -660,7 +752,9 @@ a b
5 25
6 36
commit;
+connection conn1;
commit;
+connection conn2;
update t set b=b where a<=2;
update t set b=b where a>=0;
begin;
@@ -673,8 +767,11 @@ a b
5 25
6 36
commit;
+connection conn1;
+connection conn1;
begin;
replace t values(1, 100);
+connection conn2;
select * from t where a=1 for update;
ERROR HY000: Lock wait timeout exceeded; try restarting transaction
select * from t where a=2 for update;
@@ -699,7 +796,9 @@ a b
5 25
6 36
commit;
+connection conn1;
commit;
+connection conn2;
select * from t where a=1 for update;
a b
1 100
@@ -713,8 +812,11 @@ a b
5 25
6 36
commit;
+connection conn1;
+connection conn1;
begin;
replace t values(1, 100);
+connection conn2;
update t set b=b where a=1;
ERROR HY000: Lock wait timeout exceeded; try restarting transaction
update t set b=b where a=2;
@@ -733,7 +835,9 @@ a b
5 25
6 36
commit;
+connection conn1;
commit;
+connection conn2;
update t set b=b where a=1;
begin;
select * from t;
@@ -745,8 +849,11 @@ a b
5 25
6 36
commit;
+connection conn1;
+connection conn1;
begin;
replace t values(1, 100);
+connection conn2;
insert ignore t values(1, 100);
ERROR HY000: Lock wait timeout exceeded; try restarting transaction
insert ignore t values(2, 100);
@@ -765,7 +872,9 @@ a b
5 25
6 36
commit;
+connection conn1;
commit;
+connection conn2;
insert ignore t values(1, 100);
begin;
select * from t;
@@ -777,8 +886,11 @@ a b
5 25
6 36
commit;
+connection conn1;
+connection conn1;
begin;
replace t values(1, 100);
+connection conn2;
replace t values(1, 100);
ERROR HY000: Lock wait timeout exceeded; try restarting transaction
replace t values(2, 100);
@@ -797,7 +909,9 @@ a b
5 25
6 36
commit;
+connection conn1;
commit;
+connection conn2;
replace t values(1, 100);
begin;
select * from t;
@@ -809,8 +923,11 @@ a b
5 25
6 36
commit;
+connection conn1;
+connection conn1;
begin;
replace t values(1, 100);
+connection conn2;
select * from t where a<=2 for update;
ERROR HY000: Lock wait timeout exceeded; try restarting transaction
select * from t where a>=0 for update;
@@ -834,7 +951,9 @@ a b
5 25
6 36
commit;
+connection conn1;
commit;
+connection conn2;
select * from t where a<=2 for update;
a b
1 100
@@ -857,8 +976,11 @@ a b
5 25
6 36
commit;
+connection conn1;
+connection conn1;
begin;
replace t values(1, 100);
+connection conn2;
update t set b=b where a<=2;
ERROR HY000: Lock wait timeout exceeded; try restarting transaction
update t set b=b where a>=0;
@@ -877,7 +999,9 @@ a b
5 25
6 36
commit;
+connection conn1;
commit;
+connection conn2;
update t set b=b where a<=2;
update t set b=b where a>=0;
begin;
@@ -890,12 +1014,15 @@ a b
5 25
6 36
commit;
+connection conn1;
+connection conn1;
begin;
select * from t where a>=2 and a<=4 for update;
a b
2 100
3 100
4 100
+connection conn2;
select * from t where a>=0 and a<=3 for update;
ERROR HY000: Lock wait timeout exceeded; try restarting transaction
select * from t where a>=3 and a<=6 for update;
@@ -918,7 +1045,9 @@ a b
5 25
6 36
commit;
+connection conn1;
commit;
+connection conn2;
select * from t where a>=0 and a<=3 for update;
a b
1 100
@@ -944,12 +1073,15 @@ a b
5 25
6 36
commit;
+connection conn1;
+connection conn1;
begin;
select * from t where a>=2 and a<=4 for update;
a b
2 100
3 100
4 100
+connection conn2;
update t set b=b where a>=0 and a<=3;
ERROR HY000: Lock wait timeout exceeded; try restarting transaction
update t set b=b where a>=3 and a<=6;
@@ -969,7 +1101,9 @@ a b
5 25
6 36
commit;
+connection conn1;
commit;
+connection conn2;
update t set b=b where a>=0 and a<=3;
update t set b=b where a>=3 and a<=6;
update t set b=b where a<=2;
@@ -983,8 +1117,11 @@ a b
5 25
6 36
commit;
+connection conn1;
+connection conn1;
begin;
update t set b=b where a>=2 and a<=4;
+connection conn2;
select * from t where a>=0 and a<=3 for update;
ERROR HY000: Lock wait timeout exceeded; try restarting transaction
select * from t where a>=3 and a<=6 for update;
@@ -1007,7 +1144,9 @@ a b
5 25
6 36
commit;
+connection conn1;
commit;
+connection conn2;
select * from t where a>=0 and a<=3 for update;
a b
1 100
@@ -1033,8 +1172,11 @@ a b
5 25
6 36
commit;
+connection conn1;
+connection conn1;
begin;
update t set b=b where a>=2 and a<=4;
+connection conn2;
update t set b=b where a>=0 and a<=3;
ERROR HY000: Lock wait timeout exceeded; try restarting transaction
update t set b=b where a>=3 and a<=6;
@@ -1054,7 +1196,9 @@ a b
5 25
6 36
commit;
+connection conn1;
commit;
+connection conn2;
update t set b=b where a>=0 and a<=3;
update t set b=b where a>=3 and a<=6;
update t set b=b where a<=2;
@@ -1068,12 +1212,17 @@ a b
5 25
6 36
commit;
+connection conn1;
+connection conn1;
set session tokudb_lock_timeout=500;
+connection conn2;
set session tokudb_lock_timeout=500;
+connection conn1;
begin;
select * from t where a=1 for update;
a b
1 100
+connection conn2;
select * from t where a=1 for update;
ERROR HY000: Lock wait timeout exceeded; try restarting transaction
select * from t where a=2 for update;
@@ -1098,7 +1247,9 @@ a b
5 25
6 36
commit;
+connection conn1;
commit;
+connection conn2;
select * from t where a=1 for update;
a b
1 100
@@ -1112,18 +1263,25 @@ a b
5 25
6 36
commit;
+connection conn1;
+connection conn1;
begin;
select * from t where a=1 for update;
a b
1 100
+connection conn2;
select * from t where a=1 for update;
+connection conn1;
commit;
+connection conn2;
a b
1 100
+connection conn1;
begin;
select * from t where a=1 for update;
a b
1 100
+connection conn2;
update t set b=b where a=1;
ERROR HY000: Lock wait timeout exceeded; try restarting transaction
update t set b=b where a=2;
@@ -1142,7 +1300,9 @@ a b
5 25
6 36
commit;
+connection conn1;
commit;
+connection conn2;
update t set b=b where a=1;
begin;
select * from t;
@@ -1154,16 +1314,23 @@ a b
5 25
6 36
commit;
+connection conn1;
+connection conn1;
begin;
select * from t where a=1 for update;
a b
1 100
+connection conn2;
update t set b=b where a=1;
+connection conn1;
commit;
+connection conn2;
+connection conn1;
begin;
select * from t where a=1 for update;
a b
1 100
+connection conn2;
insert ignore t values(1, 100);
ERROR HY000: Lock wait timeout exceeded; try restarting transaction
insert ignore t values(2, 100);
@@ -1182,7 +1349,9 @@ a b
5 25
6 36
commit;
+connection conn1;
commit;
+connection conn2;
insert ignore t values(1, 100);
begin;
select * from t;
@@ -1194,16 +1363,23 @@ a b
5 25
6 36
commit;
+connection conn1;
+connection conn1;
begin;
select * from t where a=1 for update;
a b
1 100
+connection conn2;
insert ignore t values(1, 175);
+connection conn1;
commit;
+connection conn2;
+connection conn1;
begin;
select * from t where a=1 for update;
a b
1 100
+connection conn2;
replace t values(1, 100);
ERROR HY000: Lock wait timeout exceeded; try restarting transaction
replace t values(2, 100);
@@ -1222,7 +1398,9 @@ a b
5 25
6 36
commit;
+connection conn1;
commit;
+connection conn2;
replace t values(1, 100);
begin;
select * from t;
@@ -1234,16 +1412,23 @@ a b
5 25
6 36
commit;
+connection conn1;
+connection conn1;
begin;
select * from t where a=1 for update;
a b
1 100
+connection conn2;
replace t values(1, 175);
+connection conn1;
commit;
+connection conn2;
+connection conn1;
begin;
select * from t where a=1 for update;
a b
1 175
+connection conn2;
select * from t where a<=2 for update;
ERROR HY000: Lock wait timeout exceeded; try restarting transaction
select * from t where a>=0 for update;
@@ -1267,7 +1452,9 @@ a b
5 25
6 36
commit;
+connection conn1;
commit;
+connection conn2;
select * from t where a<=2 for update;
a b
1 175
@@ -1290,19 +1477,26 @@ a b
5 25
6 36
commit;
+connection conn1;
+connection conn1;
begin;
select * from t where a=1 for update;
a b
1 175
+connection conn2;
select * from t where a<=2 for update;
+connection conn1;
commit;
+connection conn2;
a b
1 175
2 100
+connection conn1;
begin;
select * from t where a=1 for update;
a b
1 175
+connection conn2;
update t set b=b where a<=2;
ERROR HY000: Lock wait timeout exceeded; try restarting transaction
update t set b=b where a>=0;
@@ -1321,7 +1515,9 @@ a b
5 25
6 36
commit;
+connection conn1;
commit;
+connection conn2;
update t set b=b where a<=2;
update t set b=b where a>=0;
begin;
@@ -1334,14 +1530,21 @@ a b
5 25
6 36
commit;
+connection conn1;
+connection conn1;
begin;
select * from t where a=1 for update;
a b
1 175
+connection conn2;
update t set b=b where a<=2;
+connection conn1;
commit;
+connection conn2;
+connection conn1;
begin;
update t set b=b where a=1;
+connection conn2;
select * from t where a=1 for update;
ERROR HY000: Lock wait timeout exceeded; try restarting transaction
select * from t where a=2 for update;
@@ -1366,7 +1569,9 @@ a b
5 25
6 36
commit;
+connection conn1;
commit;
+connection conn2;
select * from t where a=1 for update;
a b
1 175
@@ -1380,14 +1585,21 @@ a b
5 25
6 36
commit;
+connection conn1;
+connection conn1;
begin;
update t set b=b where a=1;
+connection conn2;
select * from t where a=1 for update;
+connection conn1;
commit;
+connection conn2;
a b
1 175
+connection conn1;
begin;
update t set b=b where a=1;
+connection conn2;
update t set b=b where a=1;
ERROR HY000: Lock wait timeout exceeded; try restarting transaction
update t set b=b where a=2;
@@ -1406,7 +1618,9 @@ a b
5 25
6 36
commit;
+connection conn1;
commit;
+connection conn2;
update t set b=b where a=1;
begin;
select * from t;
@@ -1418,12 +1632,19 @@ a b
5 25
6 36
commit;
+connection conn1;
+connection conn1;
begin;
update t set b=b where a=1;
+connection conn2;
update t set b=b where a=1;
+connection conn1;
commit;
+connection conn2;
+connection conn1;
begin;
update t set b=b where a=1;
+connection conn2;
insert ignore t values(1, 100);
ERROR HY000: Lock wait timeout exceeded; try restarting transaction
insert ignore t values(2, 100);
@@ -1442,7 +1663,9 @@ a b
5 25
6 36
commit;
+connection conn1;
commit;
+connection conn2;
insert ignore t values(1, 100);
begin;
select * from t;
@@ -1454,12 +1677,19 @@ a b
5 25
6 36
commit;
+connection conn1;
+connection conn1;
begin;
update t set b=b where a=1;
+connection conn2;
insert ignore t values(1, 175);
+connection conn1;
commit;
+connection conn2;
+connection conn1;
begin;
update t set b=b where a=1;
+connection conn2;
replace t values(1, 100);
ERROR HY000: Lock wait timeout exceeded; try restarting transaction
replace t values(2, 100);
@@ -1478,7 +1708,9 @@ a b
5 25
6 36
commit;
+connection conn1;
commit;
+connection conn2;
replace t values(1, 100);
begin;
select * from t;
@@ -1490,12 +1722,19 @@ a b
5 25
6 36
commit;
+connection conn1;
+connection conn1;
begin;
update t set b=b where a=1;
+connection conn2;
replace t values(1, 175);
+connection conn1;
commit;
+connection conn2;
+connection conn1;
begin;
update t set b=b where a=1;
+connection conn2;
select * from t where a<=2 for update;
ERROR HY000: Lock wait timeout exceeded; try restarting transaction
select * from t where a>=0 for update;
@@ -1519,7 +1758,9 @@ a b
5 25
6 36
commit;
+connection conn1;
commit;
+connection conn2;
select * from t where a<=2 for update;
a b
1 175
@@ -1542,15 +1783,22 @@ a b
5 25
6 36
commit;
+connection conn1;
+connection conn1;
begin;
update t set b=b where a=1;
+connection conn2;
select * from t where a<=2 for update;
+connection conn1;
commit;
+connection conn2;
a b
1 175
2 100
+connection conn1;
begin;
update t set b=b where a=1;
+connection conn2;
update t set b=b where a<=2;
ERROR HY000: Lock wait timeout exceeded; try restarting transaction
update t set b=b where a>=0;
@@ -1569,7 +1817,9 @@ a b
5 25
6 36
commit;
+connection conn1;
commit;
+connection conn2;
update t set b=b where a<=2;
update t set b=b where a>=0;
begin;
@@ -1582,12 +1832,19 @@ a b
5 25
6 36
commit;
+connection conn1;
+connection conn1;
begin;
update t set b=b where a=1;
+connection conn2;
update t set b=b where a<=2;
+connection conn1;
commit;
+connection conn2;
+connection conn1;
begin;
insert ignore t values(1, 100);
+connection conn2;
select * from t where a=1 for update;
ERROR HY000: Lock wait timeout exceeded; try restarting transaction
select * from t where a=2 for update;
@@ -1612,7 +1869,9 @@ a b
5 25
6 36
commit;
+connection conn1;
commit;
+connection conn2;
select * from t where a=1 for update;
a b
1 175
@@ -1626,14 +1885,21 @@ a b
5 25
6 36
commit;
+connection conn1;
+connection conn1;
begin;
insert ignore t values(1, 150);
+connection conn2;
select * from t where a=1 for update;
+connection conn1;
commit;
+connection conn2;
a b
1 175
+connection conn1;
begin;
insert ignore t values(1, 100);
+connection conn2;
update t set b=b where a=1;
ERROR HY000: Lock wait timeout exceeded; try restarting transaction
update t set b=b where a=2;
@@ -1652,7 +1918,9 @@ a b
5 25
6 36
commit;
+connection conn1;
commit;
+connection conn2;
update t set b=b where a=1;
begin;
select * from t;
@@ -1664,12 +1932,19 @@ a b
5 25
6 36
commit;
+connection conn1;
+connection conn1;
begin;
insert ignore t values(1, 150);
+connection conn2;
update t set b=b where a=1;
+connection conn1;
commit;
+connection conn2;
+connection conn1;
begin;
insert ignore t values(1, 100);
+connection conn2;
insert ignore t values(1, 100);
ERROR HY000: Lock wait timeout exceeded; try restarting transaction
insert ignore t values(2, 100);
@@ -1688,7 +1963,9 @@ a b
5 25
6 36
commit;
+connection conn1;
commit;
+connection conn2;
insert ignore t values(1, 100);
begin;
select * from t;
@@ -1700,12 +1977,19 @@ a b
5 25
6 36
commit;
+connection conn1;
+connection conn1;
begin;
insert ignore t values(1, 150);
+connection conn2;
insert ignore t values(1, 175);
+connection conn1;
commit;
+connection conn2;
+connection conn1;
begin;
insert ignore t values(1, 100);
+connection conn2;
replace t values(1, 100);
ERROR HY000: Lock wait timeout exceeded; try restarting transaction
replace t values(2, 100);
@@ -1724,7 +2008,9 @@ a b
5 25
6 36
commit;
+connection conn1;
commit;
+connection conn2;
replace t values(1, 100);
begin;
select * from t;
@@ -1736,12 +2022,19 @@ a b
5 25
6 36
commit;
+connection conn1;
+connection conn1;
begin;
insert ignore t values(1, 150);
+connection conn2;
replace t values(1, 175);
+connection conn1;
commit;
+connection conn2;
+connection conn1;
begin;
insert ignore t values(1, 100);
+connection conn2;
select * from t where a<=2 for update;
ERROR HY000: Lock wait timeout exceeded; try restarting transaction
select * from t where a>=0 for update;
@@ -1765,7 +2058,9 @@ a b
5 25
6 36
commit;
+connection conn1;
commit;
+connection conn2;
select * from t where a<=2 for update;
a b
1 175
@@ -1788,15 +2083,22 @@ a b
5 25
6 36
commit;
+connection conn1;
+connection conn1;
begin;
insert ignore t values(1, 150);
+connection conn2;
select * from t where a<=2 for update;
+connection conn1;
commit;
+connection conn2;
a b
1 175
2 100
+connection conn1;
begin;
insert ignore t values(1, 100);
+connection conn2;
update t set b=b where a<=2;
ERROR HY000: Lock wait timeout exceeded; try restarting transaction
update t set b=b where a>=0;
@@ -1815,7 +2117,9 @@ a b
5 25
6 36
commit;
+connection conn1;
commit;
+connection conn2;
update t set b=b where a<=2;
update t set b=b where a>=0;
begin;
@@ -1828,12 +2132,19 @@ a b
5 25
6 36
commit;
+connection conn1;
+connection conn1;
begin;
insert ignore t values(1, 150);
+connection conn2;
update t set b=b where a<=2;
+connection conn1;
commit;
+connection conn2;
+connection conn1;
begin;
replace t values(1, 100);
+connection conn2;
select * from t where a=1 for update;
ERROR HY000: Lock wait timeout exceeded; try restarting transaction
select * from t where a=2 for update;
@@ -1858,7 +2169,9 @@ a b
5 25
6 36
commit;
+connection conn1;
commit;
+connection conn2;
select * from t where a=1 for update;
a b
1 100
@@ -1872,14 +2185,21 @@ a b
5 25
6 36
commit;
+connection conn1;
+connection conn1;
begin;
replace t values(1, 150);
+connection conn2;
select * from t where a=1 for update;
+connection conn1;
commit;
+connection conn2;
a b
1 150
+connection conn1;
begin;
replace t values(1, 100);
+connection conn2;
update t set b=b where a=1;
ERROR HY000: Lock wait timeout exceeded; try restarting transaction
update t set b=b where a=2;
@@ -1898,7 +2218,9 @@ a b
5 25
6 36
commit;
+connection conn1;
commit;
+connection conn2;
update t set b=b where a=1;
begin;
select * from t;
@@ -1910,12 +2232,19 @@ a b
5 25
6 36
commit;
+connection conn1;
+connection conn1;
begin;
replace t values(1, 150);
+connection conn2;
update t set b=b where a=1;
+connection conn1;
commit;
+connection conn2;
+connection conn1;
begin;
replace t values(1, 100);
+connection conn2;
insert ignore t values(1, 100);
ERROR HY000: Lock wait timeout exceeded; try restarting transaction
insert ignore t values(2, 100);
@@ -1934,7 +2263,9 @@ a b
5 25
6 36
commit;
+connection conn1;
commit;
+connection conn2;
insert ignore t values(1, 100);
begin;
select * from t;
@@ -1946,12 +2277,19 @@ a b
5 25
6 36
commit;
+connection conn1;
+connection conn1;
begin;
replace t values(1, 150);
+connection conn2;
insert ignore t values(1, 175);
+connection conn1;
commit;
+connection conn2;
+connection conn1;
begin;
replace t values(1, 100);
+connection conn2;
replace t values(1, 100);
ERROR HY000: Lock wait timeout exceeded; try restarting transaction
replace t values(2, 100);
@@ -1970,7 +2308,9 @@ a b
5 25
6 36
commit;
+connection conn1;
commit;
+connection conn2;
replace t values(1, 100);
begin;
select * from t;
@@ -1982,12 +2322,19 @@ a b
5 25
6 36
commit;
+connection conn1;
+connection conn1;
begin;
replace t values(1, 150);
+connection conn2;
replace t values(1, 175);
+connection conn1;
commit;
+connection conn2;
+connection conn1;
begin;
replace t values(1, 100);
+connection conn2;
select * from t where a<=2 for update;
ERROR HY000: Lock wait timeout exceeded; try restarting transaction
select * from t where a>=0 for update;
@@ -2011,7 +2358,9 @@ a b
5 25
6 36
commit;
+connection conn1;
commit;
+connection conn2;
select * from t where a<=2 for update;
a b
1 100
@@ -2034,15 +2383,22 @@ a b
5 25
6 36
commit;
+connection conn1;
+connection conn1;
begin;
replace t values(1, 150);
+connection conn2;
select * from t where a<=2 for update;
+connection conn1;
commit;
+connection conn2;
a b
1 150
2 100
+connection conn1;
begin;
replace t values(1, 100);
+connection conn2;
update t set b=b where a<=2;
ERROR HY000: Lock wait timeout exceeded; try restarting transaction
update t set b=b where a>=0;
@@ -2061,7 +2417,9 @@ a b
5 25
6 36
commit;
+connection conn1;
commit;
+connection conn2;
update t set b=b where a<=2;
update t set b=b where a>=0;
begin;
@@ -2074,16 +2432,23 @@ a b
5 25
6 36
commit;
+connection conn1;
+connection conn1;
begin;
replace t values(1, 150);
+connection conn2;
update t set b=b where a<=2;
+connection conn1;
commit;
+connection conn2;
+connection conn1;
begin;
select * from t where a>=2 and a<=4 for update;
a b
2 100
3 100
4 100
+connection conn2;
select * from t where a>=0 and a<=3 for update;
ERROR HY000: Lock wait timeout exceeded; try restarting transaction
select * from t where a>=3 and a<=6 for update;
@@ -2106,7 +2471,9 @@ a b
5 25
6 36
commit;
+connection conn1;
commit;
+connection conn2;
select * from t where a>=0 and a<=3 for update;
a b
1 150
@@ -2132,24 +2499,31 @@ a b
5 25
6 36
commit;
+connection conn1;
+connection conn1;
begin;
select * from t where a>=2 and a<=4 for update;
a b
2 100
3 100
4 100
+connection conn2;
select * from t where a>=0 and a<=3 for update;
+connection conn1;
commit;
+connection conn2;
a b
1 150
2 100
3 100
+connection conn1;
begin;
select * from t where a>=2 and a<=4 for update;
a b
2 100
3 100
4 100
+connection conn2;
update t set b=b where a>=0 and a<=3;
ERROR HY000: Lock wait timeout exceeded; try restarting transaction
update t set b=b where a>=3 and a<=6;
@@ -2169,7 +2543,9 @@ a b
5 25
6 36
commit;
+connection conn1;
commit;
+connection conn2;
update t set b=b where a>=0 and a<=3;
update t set b=b where a>=3 and a<=6;
update t set b=b where a<=2;
@@ -2183,16 +2559,23 @@ a b
5 25
6 36
commit;
+connection conn1;
+connection conn1;
begin;
select * from t where a>=2 and a<=4 for update;
a b
2 100
3 100
4 100
+connection conn2;
update t set b=b where a>=0 and a<=3;
+connection conn1;
commit;
+connection conn2;
+connection conn1;
begin;
update t set b=b where a>=2 and a<=4;
+connection conn2;
select * from t where a>=0 and a<=3 for update;
ERROR HY000: Lock wait timeout exceeded; try restarting transaction
select * from t where a>=3 and a<=6 for update;
@@ -2215,7 +2598,9 @@ a b
5 25
6 36
commit;
+connection conn1;
commit;
+connection conn2;
select * from t where a>=0 and a<=3 for update;
a b
1 150
@@ -2241,16 +2626,23 @@ a b
5 25
6 36
commit;
+connection conn1;
+connection conn1;
begin;
update t set b=b where a>=2 and a<=4;
+connection conn2;
select * from t where a>=0 and a<=3 for update;
+connection conn1;
commit;
+connection conn2;
a b
1 150
2 100
3 100
+connection conn1;
begin;
update t set b=b where a>=2 and a<=4;
+connection conn2;
update t set b=b where a>=0 and a<=3;
ERROR HY000: Lock wait timeout exceeded; try restarting transaction
update t set b=b where a>=3 and a<=6;
@@ -2270,7 +2662,9 @@ a b
5 25
6 36
commit;
+connection conn1;
commit;
+connection conn2;
update t set b=b where a>=0 and a<=3;
update t set b=b where a>=3 and a<=6;
update t set b=b where a<=2;
@@ -2284,8 +2678,13 @@ a b
5 25
6 36
commit;
+connection conn1;
+connection conn1;
begin;
update t set b=b where a>=2 and a<=4;
+connection conn2;
update t set b=b where a>=0 and a<=3;
+connection conn1;
commit;
+connection conn2;
drop table t;
diff --git a/storage/tokudb/mysql-test/tokudb/r/locks-delete-deadlock-1.result b/storage/tokudb/mysql-test/tokudb/r/locks-delete-deadlock-1.result
index d2feccac563..59fe0a606c7 100644
--- a/storage/tokudb/mysql-test/tokudb/r/locks-delete-deadlock-1.result
+++ b/storage/tokudb/mysql-test/tokudb/r/locks-delete-deadlock-1.result
@@ -1,13 +1,20 @@
SET DEFAULT_STORAGE_ENGINE='tokudb';
drop table if exists t;
+connect conn1,localhost,root;
+connection default;
create table t (a int primary key, b int);
insert into t values (1,0),(2,1),(3,0);
begin;
delete from t where b>0;
+connection conn1;
set transaction isolation level serializable;
select * from t where a!=2;
+connection default;
commit;
+connection conn1;
a b
1 0
3 0
+connection default;
+disconnect conn1;
drop table t;
diff --git a/storage/tokudb/mysql-test/tokudb/r/locks-no-read-lock-serializable-autocommit.result b/storage/tokudb/mysql-test/tokudb/r/locks-no-read-lock-serializable-autocommit.result
index fa6b28ddc93..b271102eb74 100644
--- a/storage/tokudb/mysql-test/tokudb/r/locks-no-read-lock-serializable-autocommit.result
+++ b/storage/tokudb/mysql-test/tokudb/r/locks-no-read-lock-serializable-autocommit.result
@@ -9,6 +9,7 @@ a
1
2
3
+connect conn1, localhost, root;
select * from t;
a
1
@@ -27,18 +28,22 @@ replace into t values (1);
ERROR HY000: Lock wait timeout exceeded; try restarting transaction
insert ignore t values (3);
ERROR HY000: Lock wait timeout exceeded; try restarting transaction
+connection default;
commit;
+connection default;
begin;
select * from t;
a
1
2
3
+connection conn1;
select * from t for update;
a
1
2
3
+connection default;
commit;
drop table t;
set global transaction isolation level repeatable read;
diff --git a/storage/tokudb/mysql-test/tokudb/r/locks-select-update-1.result b/storage/tokudb/mysql-test/tokudb/r/locks-select-update-1.result
index a816808a2d5..4ea99c9b15f 100644
--- a/storage/tokudb/mysql-test/tokudb/r/locks-select-update-1.result
+++ b/storage/tokudb/mysql-test/tokudb/r/locks-select-update-1.result
@@ -8,10 +8,13 @@ select * from t where a=1 for update;
a b
1 0
update t set b=b+1 where a=1;
+connect conn1,localhost,root;
set session transaction isolation level repeatable read;
begin;
select * from t where a=1 for update;
+connection default;
commit;
+connection conn1;
a b
1 1
update t set b=b+1 where a=1;
@@ -19,4 +22,6 @@ select * from t;
a b
1 2
commit;
+connection default;
+disconnect conn1;
drop table t;
diff --git a/storage/tokudb/mysql-test/tokudb/r/locks-select-update-2.result b/storage/tokudb/mysql-test/tokudb/r/locks-select-update-2.result
index cea61214669..d8929b7340e 100644
--- a/storage/tokudb/mysql-test/tokudb/r/locks-select-update-2.result
+++ b/storage/tokudb/mysql-test/tokudb/r/locks-select-update-2.result
@@ -8,10 +8,13 @@ select * from t where a=1 for update;
a b
1 0
update t set b=b+1 where a=1;
+connect conn1,localhost,root;
set session transaction isolation level serializable;
begin;
select * from t where a=1 for update;
+connection default;
commit;
+connection conn1;
a b
1 1
update t set b=b+1 where a=1;
@@ -19,4 +22,6 @@ select * from t;
a b
1 2
commit;
+connection default;
+disconnect conn1;
drop table t;
diff --git a/storage/tokudb/mysql-test/tokudb/r/locks-select-update-3.result b/storage/tokudb/mysql-test/tokudb/r/locks-select-update-3.result
index 3f05f65661c..af728338110 100644
--- a/storage/tokudb/mysql-test/tokudb/r/locks-select-update-3.result
+++ b/storage/tokudb/mysql-test/tokudb/r/locks-select-update-3.result
@@ -6,11 +6,14 @@ select * from t where a=1 for update;
a b
1 0
update t set b=b+1 where a=1;
+connect conn1,localhost,root;
set session tokudb_lock_timeout=60000;
set session transaction isolation level read committed;
begin;
select * from t where a=1 for update;
+connection default;
commit;
+connection conn1;
a b
1 1
update t set b=b+1 where a=1;
@@ -18,4 +21,6 @@ select * from t;
a b
1 2
commit;
+connection default;
+disconnect conn1;
drop table t;
diff --git a/storage/tokudb/mysql-test/tokudb/r/locks-update-deadlock-1.result b/storage/tokudb/mysql-test/tokudb/r/locks-update-deadlock-1.result
index 1357c30b22d..3fe46387aa1 100644
--- a/storage/tokudb/mysql-test/tokudb/r/locks-update-deadlock-1.result
+++ b/storage/tokudb/mysql-test/tokudb/r/locks-update-deadlock-1.result
@@ -1,13 +1,20 @@
SET DEFAULT_STORAGE_ENGINE='tokudb';
drop table if exists t;
+connect conn1,localhost,root;
+connection default;
create table t (a int primary key, b int);
insert into t values (1,0),(2,1),(3,0);
begin;
update t set b=b+1 where b>0;
+connection conn1;
set transaction isolation level serializable;
select * from t where a!=2;
+connection default;
commit;
+connection conn1;
a b
1 0
3 0
+connection default;
+disconnect conn1;
drop table t;
diff --git a/storage/tokudb/mysql-test/tokudb/r/mvcc-10.result b/storage/tokudb/mysql-test/tokudb/r/mvcc-10.result
index 6ebeb298d84..6c90973f1dc 100644
--- a/storage/tokudb/mysql-test/tokudb/r/mvcc-10.result
+++ b/storage/tokudb/mysql-test/tokudb/r/mvcc-10.result
@@ -1,6 +1,12 @@
SET DEFAULT_STORAGE_ENGINE = 'tokudb';
# Establish connection conn1 (user = root)
+connect conn1,localhost,root,,;
+connect conn2,localhost,root,,;
+connect conn3,localhost,root,,;
+connect conn4,localhost,root,,;
+connect conn5,localhost,root,,;
DROP TABLE IF EXISTS foo;
+connection default;
create table foo (a int, b varchar (100), primary key (a)) engine=TokuDB;
show create table foo;
Table Create Table
@@ -9,48 +15,69 @@ foo CREATE TABLE `foo` (
`b` varchar(100) DEFAULT NULL,
PRIMARY KEY (`a`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
+connection conn1;
set session transaction isolation level repeatable read;
begin;
select * from foo;
a b
+connection default;
replace into foo values (1, "a");
+connection conn2;
set session transaction isolation level repeatable read;
begin;
select * from foo;
a b
1 a
+connection default;
delete from foo;
+connection conn3;
set session transaction isolation level repeatable read;
begin;
select * from foo;
a b
+connection default;
replace into foo values (1,"abc");
+connection conn4;
set session transaction isolation level repeatable read;
begin;
select * from foo;
a b
1 abc
+connection default;
delete from foo;
+connection conn5;
set session transaction isolation level repeatable read;
begin;
select * from foo;
a b
+connection conn1;
select * from foo;
a b
commit;
+connection conn2;
select * from foo;
a b
1 a
commit;
+connection conn3;
select * from foo;
a b
commit;
+connection conn4;
select * from foo;
a b
1 abc
commit;
+connection conn5;
select * from foo;
a b
commit;
+connection default;
+disconnect conn1;
+disconnect conn2;
+disconnect conn3;
+disconnect conn4;
+disconnect conn5;
+connection default;
set session transaction isolation level serializable;
DROP TABLE foo;
diff --git a/storage/tokudb/mysql-test/tokudb/r/mvcc-11.result b/storage/tokudb/mysql-test/tokudb/r/mvcc-11.result
index a89ba26a461..f81bfda92cb 100644
--- a/storage/tokudb/mysql-test/tokudb/r/mvcc-11.result
+++ b/storage/tokudb/mysql-test/tokudb/r/mvcc-11.result
@@ -1,6 +1,8 @@
SET DEFAULT_STORAGE_ENGINE = 'tokudb';
# Establish connection conn1 (user = root)
+connect conn1,localhost,root,,;
DROP TABLE IF EXISTS foo;
+connection default;
create table foo (a int) engine=TokuDB;
show create table foo;
Table Create Table
@@ -10,6 +12,7 @@ foo CREATE TABLE `foo` (
insert into foo values (1);
begin;
insert into foo values (2);
+connection conn1;
set session transaction isolation level serializable;
begin;
select * from foo;
@@ -20,6 +23,9 @@ select * from foo;
a
1
2
+connection default;
commit;
+disconnect conn1;
+connection default;
set session transaction isolation level serializable;
DROP TABLE foo;
diff --git a/storage/tokudb/mysql-test/tokudb/r/mvcc-12.result b/storage/tokudb/mysql-test/tokudb/r/mvcc-12.result
index b307f487f03..83702cbc49b 100644
--- a/storage/tokudb/mysql-test/tokudb/r/mvcc-12.result
+++ b/storage/tokudb/mysql-test/tokudb/r/mvcc-12.result
@@ -1,6 +1,8 @@
SET DEFAULT_STORAGE_ENGINE = 'tokudb';
# Establish connection conn1 (user = root)
+connect conn1,localhost,root,,;
DROP TABLE IF EXISTS foo;
+connection default;
set session transaction isolation level serializable;
create table foo (a int) engine=TokuDB;
show create table foo;
@@ -13,6 +15,7 @@ begin;
select * from foo;
a
1
+connection conn1;
set session transaction isolation level serializable;
insert into foo values (3);
ERROR HY000: Lock wait timeout exceeded; try restarting transaction
@@ -25,6 +28,9 @@ ERROR HY000: Lock wait timeout exceeded; try restarting transaction
set session transaction isolation level read uncommitted;
insert into foo values (3);
ERROR HY000: Lock wait timeout exceeded; try restarting transaction
+connection default;
commit;
+disconnect conn1;
+connection default;
set session transaction isolation level serializable;
DROP TABLE foo;
diff --git a/storage/tokudb/mysql-test/tokudb/r/mvcc-13.result b/storage/tokudb/mysql-test/tokudb/r/mvcc-13.result
index 2f87ddb26cc..46393b1fe8c 100644
--- a/storage/tokudb/mysql-test/tokudb/r/mvcc-13.result
+++ b/storage/tokudb/mysql-test/tokudb/r/mvcc-13.result
@@ -1,6 +1,8 @@
SET DEFAULT_STORAGE_ENGINE = 'tokudb';
# Establish connection conn1 (user = root)
+connect conn1,localhost,root,,;
DROP TABLE IF EXISTS foo,foo1;
+connection default;
set session transaction isolation level repeatable read;
create table foo (a int) engine=TokuDB;
show create table foo;
@@ -11,10 +13,15 @@ foo CREATE TABLE `foo` (
begin;
select * from foo;
a
+connection conn1;
set session transaction isolation level repeatable read;
create table foo1(a int) engine=TokuDB;
insert into foo1 values(1);
+connection default;
select * from foo1;
ERROR HY000: Table definition has changed, please retry transaction
+connection default;
+disconnect conn1;
+connection default;
set session transaction isolation level serializable;
DROP TABLE foo,foo1;
diff --git a/storage/tokudb/mysql-test/tokudb/r/mvcc-14.result b/storage/tokudb/mysql-test/tokudb/r/mvcc-14.result
index eecb674346f..25f3940d34e 100644
--- a/storage/tokudb/mysql-test/tokudb/r/mvcc-14.result
+++ b/storage/tokudb/mysql-test/tokudb/r/mvcc-14.result
@@ -1,6 +1,8 @@
SET DEFAULT_STORAGE_ENGINE = 'tokudb';
# Establish connection conn1 (user = root)
+connect conn1,localhost,root,,;
DROP TABLE IF EXISTS foo,foo1;
+connection default;
set session transaction isolation level read uncommitted;
create table foo (a int) engine=TokuDB;
show create table foo;
@@ -11,11 +13,16 @@ foo CREATE TABLE `foo` (
begin;
select * from foo;
a
+connection conn1;
set session transaction isolation level repeatable read;
create table foo1(a int) engine=TokuDB;
insert into foo1 values(1);
+connection default;
select * from foo1;
a
1
+connection default;
+disconnect conn1;
+connection default;
set session transaction isolation level serializable;
DROP TABLE foo,foo1;
diff --git a/storage/tokudb/mysql-test/tokudb/r/mvcc-15.result b/storage/tokudb/mysql-test/tokudb/r/mvcc-15.result
index 0b7d96dd7b5..b4b111ca801 100644
--- a/storage/tokudb/mysql-test/tokudb/r/mvcc-15.result
+++ b/storage/tokudb/mysql-test/tokudb/r/mvcc-15.result
@@ -1,6 +1,8 @@
SET DEFAULT_STORAGE_ENGINE = 'tokudb';
# Establish connection conn1 (user = root)
+connect conn1,localhost,root,,;
DROP TABLE IF EXISTS foo,foo1;
+connection default;
set session transaction isolation level serializable;
create table foo (a int) engine=TokuDB;
show create table foo;
@@ -11,11 +13,16 @@ foo CREATE TABLE `foo` (
begin;
select * from foo;
a
+connection conn1;
set session transaction isolation level repeatable read;
create table foo1(a int) engine=TokuDB;
insert into foo1 values(1);
+connection default;
select * from foo1;
a
1
+connection default;
+disconnect conn1;
+connection default;
set session transaction isolation level serializable;
DROP TABLE foo,foo1;
diff --git a/storage/tokudb/mysql-test/tokudb/r/mvcc-16.result b/storage/tokudb/mysql-test/tokudb/r/mvcc-16.result
index 7ee801d88b2..7e6f482602b 100644
--- a/storage/tokudb/mysql-test/tokudb/r/mvcc-16.result
+++ b/storage/tokudb/mysql-test/tokudb/r/mvcc-16.result
@@ -1,6 +1,8 @@
SET DEFAULT_STORAGE_ENGINE = 'tokudb';
# Establish connection conn1 (user = root)
+connect conn1,localhost,root,,;
DROP TABLE IF EXISTS foo,foo1;
+connection default;
set session transaction isolation level repeatable read;
create table foo (a int) engine=TokuDB;
show create table foo;
@@ -11,9 +13,11 @@ foo CREATE TABLE `foo` (
begin;
select * from foo;
a
+connection conn1;
set session transaction isolation level repeatable read;
create table foo1(a int, b int, c int, primary key (a)) engine=TokuDB;
insert into foo1 values (1,10,100),(2,20,200),(3,30,300),(4,40,400),(5,50,500);
+connection default;
select a from foo1;
ERROR HY000: Table definition has changed, please retry transaction
select a from foo1 order by a desc;
@@ -26,5 +30,8 @@ select * from foo1 where a>3 order by a desc;
ERROR HY000: Table definition has changed, please retry transaction
select * from foo1;
ERROR HY000: Table definition has changed, please retry transaction
+connection default;
+disconnect conn1;
+connection default;
set session transaction isolation level serializable;
DROP TABLE foo,foo1;
diff --git a/storage/tokudb/mysql-test/tokudb/r/mvcc-17.result b/storage/tokudb/mysql-test/tokudb/r/mvcc-17.result
index fe3322b9fb7..62ede774d3c 100644
--- a/storage/tokudb/mysql-test/tokudb/r/mvcc-17.result
+++ b/storage/tokudb/mysql-test/tokudb/r/mvcc-17.result
@@ -1,6 +1,8 @@
SET DEFAULT_STORAGE_ENGINE = 'tokudb';
# Establish connection conn1 (user = root)
+connect conn1,localhost,root,,;
DROP TABLE IF EXISTS foo;
+connection default;
set session tokudb_load_save_space=0;
set session transaction isolation level repeatable read;
create table foo (a int)engine=TokuDB;
@@ -9,11 +11,13 @@ Table Create Table
foo CREATE TABLE `foo` (
`a` int(11) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
+connection conn1;
set session tokudb_load_save_space=0;
set session transaction isolation level repeatable read;
begin;
select * from foo;
a
+connection default;
begin;
insert into foo values (1),(2),(3);
# should return 1,2,3
@@ -22,6 +26,7 @@ a
1
2
3
+connection conn1;
# should be empty
select * from foo;
a
@@ -29,13 +34,18 @@ a
insert into foo values (10000);
ERROR HY000: Lock wait timeout exceeded; try restarting transaction
commit;
+connection default;
commit;
+connection conn1;
# should see 1,2,3
select * from foo;
a
1
2
3
+connection default;
commit;
+disconnect conn1;
+connection default;
set session transaction isolation level serializable;
DROP TABLE foo;
diff --git a/storage/tokudb/mysql-test/tokudb/r/mvcc-18.result b/storage/tokudb/mysql-test/tokudb/r/mvcc-18.result
index 1edda2650e2..9997b0545d4 100644
--- a/storage/tokudb/mysql-test/tokudb/r/mvcc-18.result
+++ b/storage/tokudb/mysql-test/tokudb/r/mvcc-18.result
@@ -1,6 +1,8 @@
SET DEFAULT_STORAGE_ENGINE = 'tokudb';
# Establish connection conn1 (user = root)
+connect conn1,localhost,root,,;
DROP TABLE IF EXISTS foo;
+connection default;
set session tokudb_load_save_space=1;
set session transaction isolation level repeatable read;
create table foo (a int)engine=TokuDB;
@@ -9,11 +11,13 @@ Table Create Table
foo CREATE TABLE `foo` (
`a` int(11) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
+connection conn1;
set session tokudb_load_save_space=1;
set session transaction isolation level repeatable read;
begin;
select * from foo;
a
+connection default;
begin;
insert into foo values (1),(2),(3);
# should return 1,2,3
@@ -22,6 +26,7 @@ a
1
2
3
+connection conn1;
# should be empty
select * from foo;
a
@@ -29,13 +34,18 @@ a
insert into foo values (10000);
ERROR HY000: Lock wait timeout exceeded; try restarting transaction
commit;
+connection default;
commit;
+connection conn1;
# should see 1,2,3
select * from foo;
a
1
2
3
+connection default;
commit;
+disconnect conn1;
+connection default;
set session transaction isolation level serializable;
DROP TABLE foo;
diff --git a/storage/tokudb/mysql-test/tokudb/r/mvcc-2.result b/storage/tokudb/mysql-test/tokudb/r/mvcc-2.result
index 1223360fc3f..3cb20740c8b 100644
--- a/storage/tokudb/mysql-test/tokudb/r/mvcc-2.result
+++ b/storage/tokudb/mysql-test/tokudb/r/mvcc-2.result
@@ -1,7 +1,9 @@
SET DEFAULT_STORAGE_ENGINE = 'tokudb';
set session transaction isolation level repeatable read;
# Establish connection conn1 (user = root)
+connect conn1,localhost,root,,;
DROP TABLE IF EXISTS foo;
+connection conn1;
set session transaction isolation level repeatable read;
create table foo (a int) engine=TokuDB;
insert into foo values (1);
@@ -9,14 +11,21 @@ begin;
select * from foo;
a
1
+connection default;
insert into foo values (1000000);
+connection conn1;
select * From foo;
a
1
+connection default;
select * from foo;
a
1
1000000
+connection conn1;
commit;
+connection default;
+disconnect conn1;
+connection default;
set session transaction isolation level serializable;
DROP TABLE foo;
diff --git a/storage/tokudb/mysql-test/tokudb/r/mvcc-21.result b/storage/tokudb/mysql-test/tokudb/r/mvcc-21.result
index ced1e5b045e..950b6d8bce7 100644
--- a/storage/tokudb/mysql-test/tokudb/r/mvcc-21.result
+++ b/storage/tokudb/mysql-test/tokudb/r/mvcc-21.result
@@ -1,6 +1,12 @@
SET DEFAULT_STORAGE_ENGINE = 'tokudb';
# Establish connection conn1 (user = root)
+connect conn1,localhost,root,,;
+connect conn2,localhost,root,,;
+connect conn3,localhost,root,,;
+connect conn4,localhost,root,,;
+connect conn5,localhost,root,,;
DROP TABLE IF EXISTS foo;
+connection default;
create table foo (a int, b varchar (100), primary key (a)) engine=TokuDB;
show create table foo;
Table Create Table
@@ -9,26 +15,33 @@ foo CREATE TABLE `foo` (
`b` varchar(100) DEFAULT NULL,
PRIMARY KEY (`a`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
+connection conn1;
set session transaction isolation level repeatable read;
begin;
select * from foo;
a b
+connection default;
replace into foo values (1, "a");
+connection conn2;
set session transaction isolation level repeatable read;
begin;
select * from foo;
a b
1 a
+connection default;
replace into foo values (1,"abGARBAGE"), (2, "abGARBAGE");
replace into foo values (1,"ab"), (2, "ab");
+connection conn3;
set session transaction isolation level repeatable read;
begin;
select * from foo;
a b
1 ab
2 ab
+connection default;
replace into foo values (1,"abcGARBAGE"),(2,"abcGARBAGE"),(3, "abcGARBAGE");
replace into foo values (1,"abc"),(2,"abc"),(3, "abc");
+connection conn4;
set session transaction isolation level repeatable read;
begin;
select * from foo;
@@ -36,8 +49,10 @@ a b
1 abc
2 abc
3 abc
+connection default;
replace into foo values (1,"abcdGARBAGE"),(2,"abcdGARBAGE"),(3, "abcdGARBAGE"),(4, "abcdGARBAGE");
replace into foo values (1,"abcd"),(2,"abcd"),(3, "abcd"),(4, "abcd");
+connection conn5;
set session transaction isolation level repeatable read;
begin;
select * from foo;
@@ -46,24 +61,29 @@ a b
2 abcd
3 abcd
4 abcd
+connection conn1;
select * from foo;
a b
commit;
+connection conn2;
select * from foo;
a b
1 a
commit;
+connection conn3;
select * from foo;
a b
1 ab
2 ab
commit;
+connection conn4;
select * from foo;
a b
1 abc
2 abc
3 abc
commit;
+connection conn5;
select * from foo;
a b
1 abcd
@@ -71,6 +91,7 @@ a b
3 abcd
4 abcd
commit;
+connection default;
select * from foo;
a b
1 abcd
@@ -84,5 +105,11 @@ a b
2 2
3 3
4 4
+disconnect conn1;
+disconnect conn2;
+disconnect conn3;
+disconnect conn4;
+disconnect conn5;
+connection default;
set session transaction isolation level serializable;
DROP TABLE foo;
diff --git a/storage/tokudb/mysql-test/tokudb/r/mvcc-22.result b/storage/tokudb/mysql-test/tokudb/r/mvcc-22.result
index 412f5afb9e0..b0d557b24fe 100644
--- a/storage/tokudb/mysql-test/tokudb/r/mvcc-22.result
+++ b/storage/tokudb/mysql-test/tokudb/r/mvcc-22.result
@@ -1,6 +1,12 @@
SET DEFAULT_STORAGE_ENGINE = 'tokudb';
# Establish connection conn1 (user = root)
+connect conn1,localhost,root,,;
+connect conn2,localhost,root,,;
+connect conn3,localhost,root,,;
+connect conn4,localhost,root,,;
+connect conn5,localhost,root,,;
DROP TABLE IF EXISTS foo;
+connection default;
create table foo (a int, b varchar (100), primary key (a)) engine=TokuDB;
show create table foo;
Table Create Table
@@ -9,57 +15,72 @@ foo CREATE TABLE `foo` (
`b` varchar(100) DEFAULT NULL,
PRIMARY KEY (`a`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
+connection conn1;
set session transaction isolation level repeatable read;
begin;
select * from foo;
a b
+connection default;
replace into foo values (1,"aGARBAGE");
replace into foo values (1, "a");
+connection conn2;
set session transaction isolation level repeatable read;
begin;
select * from foo;
a b
1 a
+connection default;
replace into foo values (1,"abGARBAGE");
replace into foo values (1,"ab");
+connection conn3;
set session transaction isolation level repeatable read;
begin;
select * from foo;
a b
1 ab
+connection default;
replace into foo values (1,"abcGARBAGE");
replace into foo values (1,"abc");
+connection conn4;
set session transaction isolation level repeatable read;
begin;
select * from foo;
a b
1 abc
+connection default;
replace into foo values (1,"abcdGARBAGE");
replace into foo values (1,"abcd");
+connection conn5;
set session transaction isolation level repeatable read;
begin;
select * from foo;
a b
1 abcd
+connection conn1;
select * from foo;
a b
commit;
+connection conn2;
select * from foo;
a b
1 a
commit;
+connection conn3;
select * from foo;
a b
1 ab
commit;
+connection conn4;
select * from foo;
a b
1 abc
commit;
+connection conn5;
select * from foo;
a b
1 abcd
commit;
+connection default;
select * from foo;
a b
1 abcd
@@ -67,5 +88,11 @@ replace into foo values (1,"1");
select * from foo;
a b
1 1
+disconnect conn1;
+disconnect conn2;
+disconnect conn3;
+disconnect conn4;
+disconnect conn5;
+connection default;
set session transaction isolation level serializable;
DROP TABLE foo;
diff --git a/storage/tokudb/mysql-test/tokudb/r/mvcc-23.result b/storage/tokudb/mysql-test/tokudb/r/mvcc-23.result
index 9af10709db6..97814d0cd96 100644
--- a/storage/tokudb/mysql-test/tokudb/r/mvcc-23.result
+++ b/storage/tokudb/mysql-test/tokudb/r/mvcc-23.result
@@ -1,6 +1,12 @@
SET DEFAULT_STORAGE_ENGINE = 'tokudb';
# Establish connection conn1 (user = root)
+connect conn1,localhost,root,,;
+connect conn2,localhost,root,,;
+connect conn3,localhost,root,,;
+connect conn4,localhost,root,,;
+connect conn5,localhost,root,,;
DROP TABLE IF EXISTS foo;
+connection default;
create table foo (a int, b varchar (100), primary key (a)) engine=TokuDB;
show create table foo;
Table Create Table
@@ -9,51 +15,72 @@ foo CREATE TABLE `foo` (
`b` varchar(100) DEFAULT NULL,
PRIMARY KEY (`a`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
+connection conn1;
set session transaction isolation level repeatable read;
begin;
select * from foo;
a b
+connection default;
replace into foo values (1, "aGARBAGE");
replace into foo values (1, "a");
+connection conn2;
set session transaction isolation level repeatable read;
begin;
select * from foo;
a b
1 a
+connection default;
replace into foo values (1, "delete1Garbage");
delete from foo;
+connection conn3;
set session transaction isolation level repeatable read;
begin;
select * from foo;
a b
+connection default;
replace into foo values (1,"abcGARBAGE");
replace into foo values (1,"abc");
+connection conn4;
set session transaction isolation level repeatable read;
begin;
select * from foo;
a b
1 abc
+connection default;
delete from foo;
+connection conn5;
set session transaction isolation level repeatable read;
begin;
select * from foo;
a b
+connection conn1;
select * from foo;
a b
commit;
+connection conn2;
select * from foo;
a b
1 a
commit;
+connection conn3;
select * from foo;
a b
commit;
+connection conn4;
select * from foo;
a b
1 abc
commit;
+connection conn5;
select * from foo;
a b
commit;
+connection default;
+disconnect conn1;
+disconnect conn2;
+disconnect conn3;
+disconnect conn4;
+disconnect conn5;
+connection default;
set session transaction isolation level serializable;
DROP TABLE foo;
diff --git a/storage/tokudb/mysql-test/tokudb/r/mvcc-24.result b/storage/tokudb/mysql-test/tokudb/r/mvcc-24.result
index aad61cdb8dc..d1ac4bc5654 100644
--- a/storage/tokudb/mysql-test/tokudb/r/mvcc-24.result
+++ b/storage/tokudb/mysql-test/tokudb/r/mvcc-24.result
@@ -1,28 +1,41 @@
SET DEFAULT_STORAGE_ENGINE = 'tokudb';
# Establish connection conn1 (user = root)
+connect conn1,localhost,root,,;
+connect conn2,localhost,root,,;
DROP TABLE IF EXISTS foo;
+connection default;
set session transaction isolation level repeatable read;
create table foo (a int, b varchar (100), primary key (a))engine=TokuDB;
+connection conn1;
set session transaction isolation level repeatable read;
begin;
# should return nothing
select * from foo;
a b
+connection default;
insert into foo values (1, "G");
replace into foo values (1, "Ga");
replace into foo values (1, "Gar");
replace into foo values (1, "Garb");
replace into foo values (1, "Garba");
replace into foo values (1, "a");
+connection conn2;
begin;
# Should read (1, "a")
select * from foo;
a b
1 a
+connection conn1;
# Should be empty
select * from foo;
a b
+connection conn2;
commit;
+connection conn1;
commit;
+connection default;
+disconnect conn1;
+disconnect conn2;
+connection default;
set session transaction isolation level serializable;
DROP TABLE foo;
diff --git a/storage/tokudb/mysql-test/tokudb/r/mvcc-25.result b/storage/tokudb/mysql-test/tokudb/r/mvcc-25.result
index 69183cf37d3..7c7a1486ecb 100644
--- a/storage/tokudb/mysql-test/tokudb/r/mvcc-25.result
+++ b/storage/tokudb/mysql-test/tokudb/r/mvcc-25.result
@@ -1,6 +1,10 @@
SET DEFAULT_STORAGE_ENGINE = 'tokudb';
# Establish connection conn1 (user = root)
+connect conn1,localhost,root,,;
+connect conn2,localhost,root,,;
+connect conn3,localhost,root,,;
DROP TABLE IF EXISTS foo,bar;
+connection default;
set session transaction isolation level repeatable read;
create table foo (a int, b varchar (100), primary key (a))engine=TokuDB;
create table bar like foo;
@@ -18,37 +22,49 @@ bar CREATE TABLE `bar` (
`b` varchar(100) DEFAULT NULL,
PRIMARY KEY (`a`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
+connection conn1;
set session transaction isolation level repeatable read;
begin;
# should be empty
select * from foo;
a b
+connection default;
begin;
# should be empty
select * from bar;
a b
+connection conn2;
begin;
# should be empty
select * from foo;
a b
+connection default;
insert into foo values (1,"g");
commit;
replace into foo values (1,"ga");
replace into foo values (1,"gar");
replace into foo values (1,"garb");
replace into foo values (1,"aaaa");
+connection conn3;
# should have one value
select * from foo;
a b
1 aaaa
commit;
+connection conn2;
# should be empty
select * from foo;
a b
commit;
+connection conn1;
# should be empty
select * from foo;
a b
commit;
+connection default;
+disconnect conn1;
+disconnect conn2;
+disconnect conn3;
+connection default;
set session transaction isolation level serializable;
DROP TABLE foo,bar;
diff --git a/storage/tokudb/mysql-test/tokudb/r/mvcc-28.result b/storage/tokudb/mysql-test/tokudb/r/mvcc-28.result
index 7562495484c..ce099f5f27a 100644
--- a/storage/tokudb/mysql-test/tokudb/r/mvcc-28.result
+++ b/storage/tokudb/mysql-test/tokudb/r/mvcc-28.result
@@ -1,15 +1,19 @@
SET DEFAULT_STORAGE_ENGINE = 'tokudb';
# Establish connection conn1 (user = root)
+connect conn1,localhost,root,,;
DROP TABLE IF EXISTS foo;
+connection default;
set session transaction isolation level serializable;
create table foo (a int, b varchar(10), primary key (a))engine=TokuDB;
insert into foo values (1,"a");
+connection conn1;
set session transaction isolation level repeatable read;
begin;
# should read ONLY (1,"a")
select * from foo;
a b
1 a
+connection default;
delete from foo where a=1;
insert into foo values (2,"bb");
# should read ONLY (2,"bb")
@@ -17,6 +21,7 @@ begin;
select * from foo;
a b
2 bb
+connection conn1;
# should read ONLY (1,"a")
select * From foo;
a b
@@ -24,6 +29,9 @@ a b
commit;
insert into foo values ("101000","asdf");
ERROR HY000: Lock wait timeout exceeded; try restarting transaction
+connection default;
commit;
+disconnect conn1;
+connection default;
set session transaction isolation level serializable;
DROP TABLE foo;
diff --git a/storage/tokudb/mysql-test/tokudb/r/mvcc-2808-read-committed.result b/storage/tokudb/mysql-test/tokudb/r/mvcc-2808-read-committed.result
index 80f4b229987..4a9493cc437 100644
--- a/storage/tokudb/mysql-test/tokudb/r/mvcc-2808-read-committed.result
+++ b/storage/tokudb/mysql-test/tokudb/r/mvcc-2808-read-committed.result
@@ -1,6 +1,8 @@
# Establish connection conn1 (user = root)
+connect conn1,localhost,root,,;
SET DEFAULT_STORAGE_ENGINE = 'tokudb';
DROP TABLE IF EXISTS foo,foo_isam;
+connection conn1;
set session transaction isolation level read committed;
create table foo ( a int, b int, primary key (a));
insert into foo values (1,1),(2,2),(3,1),(4,3);
@@ -18,8 +20,10 @@ a b
2 2
3 10
4 3
+connection default;
insert into foo values (5,5);
ERROR HY000: Lock wait timeout exceeded; try restarting transaction
+connection conn1;
rollback;
select * from foo;
a b
@@ -34,8 +38,10 @@ a b
1 1
3 1
4 3
+connection default;
insert into foo values (5,5);
ERROR HY000: Lock wait timeout exceeded; try restarting transaction
+connection conn1;
rollback;
select * from foo;
a b
@@ -46,6 +52,7 @@ a b
create table foo_isam (a int, b int)engine=MyISAM;
begin;
insert into foo_isam select * from foo;
+connection default;
insert into foo values (5,5);
select * from foo;
a b
@@ -54,6 +61,10 @@ a b
3 1
4 3
5 5
+connection conn1;
commit;
+connection default;
+disconnect conn1;
+connection default;
set session transaction isolation level serializable;
DROP TABLE foo, foo_isam;
diff --git a/storage/tokudb/mysql-test/tokudb/r/mvcc-2808-read-uncommitted.result b/storage/tokudb/mysql-test/tokudb/r/mvcc-2808-read-uncommitted.result
index e8ffcc7f843..8e2d2b74276 100644
--- a/storage/tokudb/mysql-test/tokudb/r/mvcc-2808-read-uncommitted.result
+++ b/storage/tokudb/mysql-test/tokudb/r/mvcc-2808-read-uncommitted.result
@@ -1,6 +1,8 @@
# Establish connection conn1 (user = root)
+connect conn1,localhost,root,,;
SET DEFAULT_STORAGE_ENGINE = 'tokudb';
DROP TABLE IF EXISTS foo,foo_isam;
+connection conn1;
set session transaction isolation level read uncommitted;
create table foo ( a int, b int, primary key (a));
insert into foo values (1,1),(2,2),(3,1),(4,3);
@@ -18,8 +20,10 @@ a b
2 2
3 10
4 3
+connection default;
insert into foo values (5,5);
ERROR HY000: Lock wait timeout exceeded; try restarting transaction
+connection conn1;
rollback;
select * from foo;
a b
@@ -34,8 +38,10 @@ a b
1 1
3 1
4 3
+connection default;
insert into foo values (5,5);
ERROR HY000: Lock wait timeout exceeded; try restarting transaction
+connection conn1;
rollback;
select * from foo;
a b
@@ -46,6 +52,7 @@ a b
create table foo_isam (a int, b int)engine=MyISAM;
begin;
insert into foo_isam select * from foo;
+connection default;
insert into foo values (5,5);
select * from foo;
a b
@@ -54,6 +61,10 @@ a b
3 1
4 3
5 5
+connection conn1;
commit;
+connection default;
+disconnect conn1;
+connection default;
set session transaction isolation level serializable;
DROP TABLE foo, foo_isam;
diff --git a/storage/tokudb/mysql-test/tokudb/r/mvcc-29.result b/storage/tokudb/mysql-test/tokudb/r/mvcc-29.result
index b532eabb301..f741dca5e3b 100644
--- a/storage/tokudb/mysql-test/tokudb/r/mvcc-29.result
+++ b/storage/tokudb/mysql-test/tokudb/r/mvcc-29.result
@@ -1,10 +1,14 @@
SET DEFAULT_STORAGE_ENGINE = 'tokudb';
# Establish connection conn1 (user = root)
+connect conn1,localhost,root,,;
DROP TABLE IF EXISTS foo;
+connection conn1;
set session transaction isolation level repeatable read;
+connection default;
set session transaction isolation level repeatable read;
create table foo (a int, b int, primary key (a))engine=TokUDB;
insert into foo values (1,1),(2,2),(3,3),(4,4),(5,5),(10,10),(20,20),(30,30),(40,40),(50,50);
+connection conn1;
begin;
select * from foo;
a b
@@ -22,6 +26,7 @@ a b
explain select * from foo where a > 1;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE foo range PRIMARY PRIMARY 4 NULL 9 Using where
+connection default;
delete from foo where a > 5;
# number of rows should be 9
explain select * from foo where a > 1;
@@ -34,6 +39,7 @@ a b
3 3
4 4
5 5
+connection conn1;
# number of rows should be 9
explain select * from foo where a > 1;
id select_type table type possible_keys key key_len ref rows Extra
@@ -51,5 +57,8 @@ a b
40 40
50 50
commit;
+connection default;
+disconnect conn1;
+connection default;
set session transaction isolation level serializable;
DROP TABLE foo;
diff --git a/storage/tokudb/mysql-test/tokudb/r/mvcc-3.result b/storage/tokudb/mysql-test/tokudb/r/mvcc-3.result
index 470771b968f..bf1e030e80b 100644
--- a/storage/tokudb/mysql-test/tokudb/r/mvcc-3.result
+++ b/storage/tokudb/mysql-test/tokudb/r/mvcc-3.result
@@ -1,7 +1,9 @@
set session transaction isolation level repeatable read;
SET DEFAULT_STORAGE_ENGINE = 'tokudb';
# Establish connection conn1 (user = root)
+connect conn1,localhost,root,,;
DROP TABLE IF EXISTS foo;
+connection conn1;
set session transaction isolation level repeatable read;
create table foo (a int, b int, primary key (a))engine=TokuDB;
show create table foo;
@@ -16,30 +18,38 @@ begin;
select * from foo;
a b
1 1
+connection default;
begin;
select * from foo;
a b
1 1
+connection conn1;
replace into foo values (1,100), (2,200);
#transaction that did the insert about to read
select * from foo;
a b
1 100
2 200
+connection default;
#this should read just (1,1)
select * from foo;
a b
1 1
+connection conn1;
commit;
# this should read 2 values, (1,100) and (2,200)
select * from foo;
a b
1 100
2 200
+connection default;
#this should read just (1,1)
select * from foo;
a b
1 1
commit;
+connection default;
+disconnect conn1;
+connection default;
set session transaction isolation level serializable;
DROP TABLE foo;
diff --git a/storage/tokudb/mysql-test/tokudb/r/mvcc-30.result b/storage/tokudb/mysql-test/tokudb/r/mvcc-30.result
index f293fe94ab2..37701efd366 100644
--- a/storage/tokudb/mysql-test/tokudb/r/mvcc-30.result
+++ b/storage/tokudb/mysql-test/tokudb/r/mvcc-30.result
@@ -1,10 +1,14 @@
SET DEFAULT_STORAGE_ENGINE = 'tokudb';
# Establish connection conn1 (user = root)
+connect conn1,localhost,root,,;
DROP TABLE IF EXISTS foo;
+connection conn1;
set session transaction isolation level repeatable read;
+connection default;
set session transaction isolation level repeatable read;
create table foo (a int, b int, primary key (a))engine=TokUDB;
insert into foo values (1,1),(2,2),(3,3),(4,4),(5,5),(10,10),(20,20),(30,30),(40,40),(50,50);
+connection conn1;
begin;
select * from foo;
a b
@@ -22,6 +26,7 @@ a b
explain select * from foo where a < 50;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE foo range PRIMARY PRIMARY 4 NULL 9 Using where
+connection default;
delete from foo where a < 10;
# number of rows should be 9
explain select * from foo where a < 50;
@@ -34,6 +39,7 @@ a b
20 20
30 30
40 40
+connection conn1;
# number of rows should be 9
explain select * from foo where a < 50;
id select_type table type possible_keys key key_len ref rows Extra
@@ -51,5 +57,8 @@ a b
30 30
40 40
commit;
+connection default;
+disconnect conn1;
+connection default;
set session transaction isolation level serializable;
DROP TABLE foo;
diff --git a/storage/tokudb/mysql-test/tokudb/r/mvcc-31.result b/storage/tokudb/mysql-test/tokudb/r/mvcc-31.result
index cb55f679061..a4043482397 100644
--- a/storage/tokudb/mysql-test/tokudb/r/mvcc-31.result
+++ b/storage/tokudb/mysql-test/tokudb/r/mvcc-31.result
@@ -1,10 +1,14 @@
SET DEFAULT_STORAGE_ENGINE = 'tokudb';
# Establish connection conn1 (user = root)
+connect conn1,localhost,root,,;
DROP TABLE IF EXISTS foo;
+connection conn1;
set session transaction isolation level repeatable read;
+connection default;
set session transaction isolation level repeatable read;
create table foo (a int, b int, primary key (a))engine=TokUDB;
insert into foo values (1,1),(2,2),(3,3),(4,4),(5,5),(10,10),(20,20),(30,30),(40,40),(50,50);
+connection conn1;
begin;
select * from foo;
a b
@@ -22,6 +26,7 @@ a b
explain select * from foo where a > 1 and a < 50;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE foo range PRIMARY PRIMARY 4 NULL 8 Using where
+connection default;
delete from foo where a = 2 or a = 4 or a = 10 or a = 30 or a = 50;
# number of rows should be 8
explain select * from foo where a > 1 and a < 50;
@@ -34,6 +39,7 @@ a b
5 5
20 20
40 40
+connection conn1;
# number of rows should be 8
explain select * from foo where a > 1 and a < 50;
id select_type table type possible_keys key key_len ref rows Extra
@@ -50,5 +56,8 @@ a b
30 30
40 40
commit;
+connection default;
+disconnect conn1;
+connection default;
set session transaction isolation level serializable;
DROP TABLE foo;
diff --git a/storage/tokudb/mysql-test/tokudb/r/mvcc-33.result b/storage/tokudb/mysql-test/tokudb/r/mvcc-33.result
index 989cfeb43a1..c8450ed0d55 100644
--- a/storage/tokudb/mysql-test/tokudb/r/mvcc-33.result
+++ b/storage/tokudb/mysql-test/tokudb/r/mvcc-33.result
@@ -1,6 +1,8 @@
SET DEFAULT_STORAGE_ENGINE = 'tokudb';
# Establish connection conn1 (user = root)
+connect conn1,localhost,root,,;
DROP TABLE IF EXISTS foo;
+connection default;
set session transaction isolation level repeatable read;
create table foo (a int, b int, c int, primary key (a), key (b))engine=TokuDB;
show create table foo;
@@ -33,6 +35,7 @@ id select_type table type possible_keys key key_len ref rows Extra
select * from foo where b=50;
a b c
5 50 500
+connection conn1;
set session transaction isolation level repeatable read;
replace into foo values (5,50,555);
select * from foo;
@@ -46,6 +49,7 @@ a b c
7 70 700
8 80 800
9 90 900
+connection default;
# should use key b
explain select * from foo where b=50;
id select_type table type possible_keys key key_len ref rows Extra
@@ -59,5 +63,8 @@ replace into foo values (5,50,111111111);
select * from foo where b=50;
a b c
5 50 111111111
+connection default;
+disconnect conn1;
+connection default;
set session transaction isolation level serializable;
DROP TABLE foo;
diff --git a/storage/tokudb/mysql-test/tokudb/r/mvcc-34.result b/storage/tokudb/mysql-test/tokudb/r/mvcc-34.result
index bf1d47f8eae..47f4afa23e4 100644
--- a/storage/tokudb/mysql-test/tokudb/r/mvcc-34.result
+++ b/storage/tokudb/mysql-test/tokudb/r/mvcc-34.result
@@ -1,6 +1,8 @@
SET DEFAULT_STORAGE_ENGINE = 'tokudb';
# Establish connection conn1 (user = root)
+connect conn1,localhost,root,,;
DROP TABLE IF EXISTS foo;
+connection default;
set session transaction isolation level repeatable read;
create table foo (a int, b int, c int, primary key (a), key (b))engine=TokuDB;
show create table foo;
@@ -34,6 +36,7 @@ select * from foo where b=50;
a b c
5 50 500
replace into foo values (5,50,1515);
+connection conn1;
set session transaction isolation level repeatable read;
begin;
# should use key b
@@ -44,6 +47,7 @@ id select_type table type possible_keys key key_len ref rows Extra
select * from foo where b=50;
a b c
5 50 500
+connection default;
commit;
# should use key b
explain select * from foo where b=50;
@@ -53,6 +57,7 @@ id select_type table type possible_keys key key_len ref rows Extra
select * from foo where b=50;
a b c
5 50 1515
+connection conn1;
# should use key b
explain select * from foo where b=50;
id select_type table type possible_keys key key_len ref rows Extra
@@ -70,5 +75,8 @@ id select_type table type possible_keys key key_len ref rows Extra
select * from foo where b=50;
a b c
5 50 1515
+connection default;
+disconnect conn1;
+connection default;
set session transaction isolation level serializable;
DROP TABLE foo;
diff --git a/storage/tokudb/mysql-test/tokudb/r/mvcc-35.result b/storage/tokudb/mysql-test/tokudb/r/mvcc-35.result
index 557609e0345..83af37b1cfc 100644
--- a/storage/tokudb/mysql-test/tokudb/r/mvcc-35.result
+++ b/storage/tokudb/mysql-test/tokudb/r/mvcc-35.result
@@ -1,6 +1,8 @@
SET DEFAULT_STORAGE_ENGINE = 'tokudb';
# Establish connection conn1 (user = root)
+connect conn1,localhost,root,,;
DROP TABLE IF EXISTS foo;
+connection default;
set session transaction isolation level read committed;
create table foo (a int, b int, c int, primary key (a), key (b))engine=TokuDB;
show create table foo;
@@ -34,6 +36,7 @@ select * from foo where b=50;
a b c
5 50 500
replace into foo values (5,50,1515);
+connection conn1;
set session transaction isolation level read committed;
begin;
# should use key b
@@ -44,6 +47,7 @@ id select_type table type possible_keys key key_len ref rows Extra
select * from foo where b=50;
a b c
5 50 500
+connection default;
commit;
# should use key b
explain select * from foo where b=50;
@@ -53,6 +57,7 @@ id select_type table type possible_keys key key_len ref rows Extra
select * from foo where b=50;
a b c
5 50 1515
+connection conn1;
# should use key b
explain select * from foo where b=50;
id select_type table type possible_keys key key_len ref rows Extra
@@ -70,5 +75,8 @@ id select_type table type possible_keys key key_len ref rows Extra
select * from foo where b=50;
a b c
5 50 1515
+connection default;
+disconnect conn1;
+connection default;
set session transaction isolation level serializable;
DROP TABLE foo;
diff --git a/storage/tokudb/mysql-test/tokudb/r/mvcc-36.result b/storage/tokudb/mysql-test/tokudb/r/mvcc-36.result
index b38f6d98f0e..cbdd963c2e8 100644
--- a/storage/tokudb/mysql-test/tokudb/r/mvcc-36.result
+++ b/storage/tokudb/mysql-test/tokudb/r/mvcc-36.result
@@ -1,6 +1,8 @@
SET DEFAULT_STORAGE_ENGINE = 'tokudb';
# Establish connection conn1 (user = root)
+connect conn1,localhost,root,,;
DROP TABLE IF EXISTS foo;
+connection default;
set session transaction isolation level serializable;
create table foo (a int, b int, c int, primary key (a), key (b))engine=TokuDB;
show create table foo;
@@ -34,6 +36,7 @@ select * from foo where b=50;
a b c
5 50 500
replace into foo values (5,50,1515);
+connection conn1;
set session transaction isolation level serializable;
begin;
# should use key b
@@ -43,6 +46,7 @@ id select_type table type possible_keys key key_len ref rows Extra
# timeout
select * from foo where b=50;
ERROR HY000: Lock wait timeout exceeded; try restarting transaction
+connection default;
commit;
# should use key b
explain select * from foo where b=50;
@@ -52,6 +56,7 @@ id select_type table type possible_keys key key_len ref rows Extra
select * from foo where b=50;
a b c
5 50 1515
+connection conn1;
# should use key b
explain select * from foo where b=50;
id select_type table type possible_keys key key_len ref rows Extra
@@ -69,5 +74,8 @@ id select_type table type possible_keys key key_len ref rows Extra
select * from foo where b=50;
a b c
5 50 1515
+connection default;
+disconnect conn1;
+connection default;
set session transaction isolation level serializable;
DROP TABLE foo;
diff --git a/storage/tokudb/mysql-test/tokudb/r/mvcc-37.result b/storage/tokudb/mysql-test/tokudb/r/mvcc-37.result
index 4319ef036fa..5f397d7dffb 100644
--- a/storage/tokudb/mysql-test/tokudb/r/mvcc-37.result
+++ b/storage/tokudb/mysql-test/tokudb/r/mvcc-37.result
@@ -1,6 +1,8 @@
SET DEFAULT_STORAGE_ENGINE = 'tokudb';
# Establish connection conn1 (user = root)
+connect conn1,localhost,root,,;
DROP TABLE IF EXISTS foo;
+connection default;
set session transaction isolation level read uncommitted;
create table foo (a int, b int, c int, primary key (a), key (b))engine=TokuDB;
show create table foo;
@@ -34,6 +36,7 @@ select * from foo where b=50;
a b c
5 50 500
replace into foo values (5,50,1515);
+connection conn1;
set session transaction isolation level read uncommitted;
begin;
# should use key b
@@ -44,6 +47,7 @@ id select_type table type possible_keys key key_len ref rows Extra
select * from foo where b=50;
a b c
5 50 1515
+connection default;
commit;
# should use key b
explain select * from foo where b=50;
@@ -53,6 +57,7 @@ id select_type table type possible_keys key key_len ref rows Extra
select * from foo where b=50;
a b c
5 50 1515
+connection conn1;
# should use key b
explain select * from foo where b=50;
id select_type table type possible_keys key key_len ref rows Extra
@@ -70,5 +75,8 @@ id select_type table type possible_keys key key_len ref rows Extra
select * from foo where b=50;
a b c
5 50 1515
+connection default;
+disconnect conn1;
+connection default;
set session transaction isolation level serializable;
DROP TABLE foo;
diff --git a/storage/tokudb/mysql-test/tokudb/r/mvcc-38.result b/storage/tokudb/mysql-test/tokudb/r/mvcc-38.result
index 9e2996931c6..8cbcf3dffd2 100644
--- a/storage/tokudb/mysql-test/tokudb/r/mvcc-38.result
+++ b/storage/tokudb/mysql-test/tokudb/r/mvcc-38.result
@@ -1,6 +1,8 @@
SET DEFAULT_STORAGE_ENGINE = 'tokudb';
# Establish connection conn1 (user = root)
+connect conn1,localhost,root,,;
DROP TABLE IF EXISTS foo, foo_isam;
+connection default;
set session transaction isolation level repeatable read;
create table foo (a int, b int, c int, primary key (a), key (b))engine=TokUDB;
show create table foo;
@@ -36,9 +38,13 @@ insert into foo_isam select * from foo where b=50;
select * From foo_isam;
a b c
5 50 500
+connection conn1;
set session transaction isolation level repeatable read;
# should fail with lock timeout because of read lock grabbed earlier
replace into foo values (5, 1,1);
ERROR HY000: Lock wait timeout exceeded; try restarting transaction
+connection default;
+disconnect conn1;
+connection default;
set session transaction isolation level serializable;
DROP TABLE foo, foo_isam;
diff --git a/storage/tokudb/mysql-test/tokudb/r/mvcc-39.result b/storage/tokudb/mysql-test/tokudb/r/mvcc-39.result
index 905d5c2292a..5dcdc259995 100644
--- a/storage/tokudb/mysql-test/tokudb/r/mvcc-39.result
+++ b/storage/tokudb/mysql-test/tokudb/r/mvcc-39.result
@@ -1,6 +1,8 @@
SET DEFAULT_STORAGE_ENGINE = 'tokudb';
# Establish connection conn1 (user = root)
+connect conn1,localhost,root,,;
DROP TABLE IF EXISTS foo;
+connection default;
set session tokudb_load_save_space=1;
create table foo (a int) engine=TokuDB;
show create table foo;
@@ -9,6 +11,7 @@ foo CREATE TABLE `foo` (
`a` int(11) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
insert into foo values (1),(2),(3);
+connection conn1;
set session transaction isolation level repeatable read;
begin;
# should return (1),(2),(3)
@@ -17,17 +20,22 @@ a
1
2
3
+connection default;
delete from foo;
insert into foo values (4);
# should return (4)
select * from foo;
a
4
+connection conn1;
# should return (1),(2),(3)
select * from foo;
a
1
2
3
+connection default;
+disconnect conn1;
+connection default;
set session transaction isolation level serializable;
DROP TABLE foo;
diff --git a/storage/tokudb/mysql-test/tokudb/r/mvcc-4.result b/storage/tokudb/mysql-test/tokudb/r/mvcc-4.result
index 5e13ce32a24..f05ec03b7de 100644
--- a/storage/tokudb/mysql-test/tokudb/r/mvcc-4.result
+++ b/storage/tokudb/mysql-test/tokudb/r/mvcc-4.result
@@ -1,7 +1,9 @@
SET DEFAULT_STORAGE_ENGINE = 'tokudb';
set session transaction isolation level repeatable read;
# Establish connection conn1 (user = root)
+connect conn1,localhost,root,,;
DROP TABLE IF EXISTS foo;
+connection conn1;
set session transaction isolation level repeatable read;
create table foo (a int, b int, primary key (a))engine=TokuDB;
insert into foo values (1,1);
@@ -19,10 +21,14 @@ a b
begin;
replace into foo values (1,100), (2,200);
commit;
+connection default;
#should read (1,100),(2,200)
select * from foo;
a b
1 100
2 200
+connection default;
+disconnect conn1;
+connection default;
set session transaction isolation level serializable;
DROP TABLE foo;
diff --git a/storage/tokudb/mysql-test/tokudb/r/mvcc-40.result b/storage/tokudb/mysql-test/tokudb/r/mvcc-40.result
index 1330fe41c50..7099b787659 100644
--- a/storage/tokudb/mysql-test/tokudb/r/mvcc-40.result
+++ b/storage/tokudb/mysql-test/tokudb/r/mvcc-40.result
@@ -1,6 +1,8 @@
SET DEFAULT_STORAGE_ENGINE = 'tokudb';
# Establish connection conn1 (user = root)
+connect conn1,localhost,root,,;
DROP TABLE IF EXISTS foo;
+connection default;
set session tokudb_load_save_space=0;
create table foo (a int) engine=TokuDB;
show create table foo;
@@ -9,6 +11,7 @@ foo CREATE TABLE `foo` (
`a` int(11) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
insert into foo values (1),(2),(3);
+connection conn1;
set session transaction isolation level repeatable read;
begin;
# should return (1),(2),(3)
@@ -17,17 +20,22 @@ a
1
2
3
+connection default;
delete from foo;
insert into foo values (4);
# should return (4)
select * from foo;
a
4
+connection conn1;
# should return (1),(2),(3)
select * from foo;
a
1
2
3
+connection default;
+disconnect conn1;
+connection default;
set session transaction isolation level serializable;
DROP TABLE foo;
diff --git a/storage/tokudb/mysql-test/tokudb/r/mvcc-5.result b/storage/tokudb/mysql-test/tokudb/r/mvcc-5.result
index 3df8f211589..fdb3c97e9b5 100644
--- a/storage/tokudb/mysql-test/tokudb/r/mvcc-5.result
+++ b/storage/tokudb/mysql-test/tokudb/r/mvcc-5.result
@@ -1,6 +1,8 @@
SET DEFAULT_STORAGE_ENGINE = 'tokudb';
set session transaction isolation level repeatable read;
# Establish connection conn1 (user = root)
+connect conn1,localhost,root,,;
+connect conn2,localhost,root,,;
DROP TABLE IF EXISTS foo;
create table foo (a int, b int, primary key (a))engine=TokuDB;
show create table foo;
@@ -11,34 +13,42 @@ foo CREATE TABLE `foo` (
PRIMARY KEY (`a`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
insert into foo values (1,1);
+connection conn1;
set session transaction isolation level repeatable read;
begin;
# Should read just (1,1)
select * from foo;
a b
1 1
+connection conn2;
set session transaction isolation level read committed;
begin;
# Should read just (1,1)
select * from foo;
a b
1 1
+connection default;
replace into foo values (1,10),(2,20);
+connection conn1;
# Should read just (1,1)
select * from foo;
a b
1 1
+connection conn2;
# Should read just (1,10), (2,20)
select * from foo;
a b
1 10
2 20
+connection default;
replace into foo values (1,100),(2,200),(3,300);
+connection conn1;
# Should read just (1,1)
select * from foo;
a b
1 1
commit;
+connection conn2;
# Should read just (1,100), (2,200),(3,300)
select * from foo;
a b
@@ -46,5 +56,9 @@ a b
2 200
3 300
commit;
+connection default;
+disconnect conn1;
+disconnect conn2;
+connection default;
set session transaction isolation level serializable;
DROP TABLE foo;
diff --git a/storage/tokudb/mysql-test/tokudb/r/mvcc-6.result b/storage/tokudb/mysql-test/tokudb/r/mvcc-6.result
index b45faa49026..a74e398c8bb 100644
--- a/storage/tokudb/mysql-test/tokudb/r/mvcc-6.result
+++ b/storage/tokudb/mysql-test/tokudb/r/mvcc-6.result
@@ -1,6 +1,8 @@
SET DEFAULT_STORAGE_ENGINE = 'tokudb';
# Establish connection conn1 (user = root)
+connect conn1,localhost,root,,;
DROP TABLE IF EXISTS foo;
+connection default;
set session transaction isolation level repeatable read;
create table foo (a int, b int, primary key (a), key (b))engine=TokuDB;
show create table foo;
@@ -14,6 +16,7 @@ foo CREATE TABLE `foo` (
insert into foo values (100,100);
begin;
insert into foo values (1,100);
+connection conn1;
set session transaction isolation level repeatable read;
begin;
# should NOT see (1,100)
@@ -26,12 +29,14 @@ a b
# should fail with a lock wait timeout
insert into foo values (1,1000);
ERROR HY000: Lock wait timeout exceeded; try restarting transaction
+connection default;
commit;
# should return two values
select * from foo;
a b
1 100
100 100
+connection conn1;
# should be empty
select * from foo where a=1;
a b
@@ -39,5 +44,8 @@ a b
insert into foo values (1,1000);
ERROR 23000: Duplicate entry '1' for key 'PRIMARY'
commit;
+connection default;
+disconnect conn1;
+connection default;
set session transaction isolation level serializable;
DROP TABLE foo;
diff --git a/storage/tokudb/mysql-test/tokudb/r/mvcc-7.result b/storage/tokudb/mysql-test/tokudb/r/mvcc-7.result
index 3baa212c490..2d5e0de2ac0 100644
--- a/storage/tokudb/mysql-test/tokudb/r/mvcc-7.result
+++ b/storage/tokudb/mysql-test/tokudb/r/mvcc-7.result
@@ -1,6 +1,8 @@
SET DEFAULT_STORAGE_ENGINE = 'tokudb';
# Establish connection conn1 (user = root)
+connect conn1,localhost,root,,;
DROP TABLE IF EXISTS foo;
+connection default;
set session transaction isolation level repeatable read;
create table foo (a int, b int, primary key (a))engine=TokuDB;
show create table foo;
@@ -21,11 +23,13 @@ select * from foo;
a b
1 100
100 100
+connection conn1;
set session transaction isolation level repeatable read;
# should NOT see (1,100)
select * from foo;
a b
1 100
+connection default;
# should see (1,100)
select * from foo;
a b
@@ -36,5 +40,7 @@ rollback;
select * from foo;
a b
1 100
+disconnect conn1;
+connection default;
set session transaction isolation level serializable;
DROP TABLE foo;
diff --git a/storage/tokudb/mysql-test/tokudb/r/mvcc-8.result b/storage/tokudb/mysql-test/tokudb/r/mvcc-8.result
index 450ad63fd42..cc7e12193ea 100644
--- a/storage/tokudb/mysql-test/tokudb/r/mvcc-8.result
+++ b/storage/tokudb/mysql-test/tokudb/r/mvcc-8.result
@@ -1,6 +1,8 @@
SET DEFAULT_STORAGE_ENGINE = 'tokudb';
# Establish connection conn1 (user = root)
+connect conn1,localhost,root,,;
DROP TABLE IF EXISTS foo, foo_isam;
+connection default;
set session transaction isolation level repeatable read;
create table foo ( a int, b int, c int, primary key (a), key (b))engine=TokuDB;
show create table foo;
@@ -20,10 +22,14 @@ id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE foo ref b b 5 const 1
begin;
insert into foo_isam select * from foo where b=30;
+connection conn1;
set session transaction isolation level repeatable read;
# should get a lock error
replace into foo values (3,3,3);
ERROR HY000: Lock wait timeout exceeded; try restarting transaction
+connection default;
commit;
+disconnect conn1;
+connection default;
set session transaction isolation level serializable;
DROP TABLE foo, foo_isam;
diff --git a/storage/tokudb/mysql-test/tokudb/r/mvcc-9.result b/storage/tokudb/mysql-test/tokudb/r/mvcc-9.result
index 7dcae265518..fe6be9b24cb 100644
--- a/storage/tokudb/mysql-test/tokudb/r/mvcc-9.result
+++ b/storage/tokudb/mysql-test/tokudb/r/mvcc-9.result
@@ -1,6 +1,12 @@
SET DEFAULT_STORAGE_ENGINE = 'tokudb';
# Establish connection conn1 (user = root)
+connect conn1,localhost,root,,;
+connect conn2,localhost,root,,;
+connect conn3,localhost,root,,;
+connect conn4,localhost,root,,;
+connect conn5,localhost,root,,;
DROP TABLE IF EXISTS foo;
+connection default;
create table foo (a int, b varchar (100), primary key (a)) engine=TokuDB;
show create table foo;
Table Create Table
@@ -9,24 +15,31 @@ foo CREATE TABLE `foo` (
`b` varchar(100) DEFAULT NULL,
PRIMARY KEY (`a`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
+connection conn1;
set session transaction isolation level repeatable read;
begin;
select * from foo;
a b
+connection default;
replace into foo values (1, "a");
+connection conn2;
set session transaction isolation level repeatable read;
begin;
select * from foo;
a b
1 a
+connection default;
replace into foo values (1,"ab"), (2, "ab");
+connection conn3;
set session transaction isolation level repeatable read;
begin;
select * from foo;
a b
1 ab
2 ab
+connection default;
replace into foo values (1,"abc"),(2,"abc"),(3, "abc");
+connection conn4;
set session transaction isolation level repeatable read;
begin;
select * from foo;
@@ -34,7 +47,9 @@ a b
1 abc
2 abc
3 abc
+connection default;
replace into foo values (1,"abcd"),(2,"abcd"),(3, "abcd"),(4, "abcd");
+connection conn5;
set session transaction isolation level repeatable read;
begin;
select * from foo;
@@ -43,24 +58,29 @@ a b
2 abcd
3 abcd
4 abcd
+connection conn1;
select * from foo;
a b
commit;
+connection conn2;
select * from foo;
a b
1 a
commit;
+connection conn3;
select * from foo;
a b
1 ab
2 ab
commit;
+connection conn4;
select * from foo;
a b
1 abc
2 abc
3 abc
commit;
+connection conn5;
select * from foo;
a b
1 abcd
@@ -68,5 +88,12 @@ a b
3 abcd
4 abcd
commit;
+connection default;
+disconnect conn1;
+disconnect conn2;
+disconnect conn3;
+disconnect conn4;
+disconnect conn5;
+connection default;
set session transaction isolation level serializable;
DROP TABLE foo;
diff --git a/storage/tokudb/mysql-test/tokudb/r/mvcc-checksum-locks.result b/storage/tokudb/mysql-test/tokudb/r/mvcc-checksum-locks.result
index c6bea70265b..8e0a2d45926 100644
--- a/storage/tokudb/mysql-test/tokudb/r/mvcc-checksum-locks.result
+++ b/storage/tokudb/mysql-test/tokudb/r/mvcc-checksum-locks.result
@@ -1,6 +1,8 @@
SET DEFAULT_STORAGE_ENGINE = 'tokudb';
# Establish connection conn1 (user = root)
+connect conn1,localhost,root,,;
DROP TABLE IF EXISTS foo;
+connection default;
create table foo (a int)engine=TokuDB;
show create table foo;
Table Create Table
@@ -10,6 +12,7 @@ foo CREATE TABLE `foo` (
insert into foo values (1),(2),(3);
begin;
insert into foo values (4);
+connection conn1;
set session transaction isolation level repeatable read;
# 1,2,3
select * from foo;
@@ -50,6 +53,9 @@ commit;
checksum table foo;
Table Checksum
test.foo NULL
+connection default;
commit;
+disconnect conn1;
+connection default;
set session transaction isolation level serializable;
DROP TABLE foo;
diff --git a/storage/tokudb/mysql-test/tokudb/r/replace-ignore.result b/storage/tokudb/mysql-test/tokudb/r/replace-ignore.result
index 247c23f9788..e6f19fc2ead 100644
--- a/storage/tokudb/mysql-test/tokudb/r/replace-ignore.result
+++ b/storage/tokudb/mysql-test/tokudb/r/replace-ignore.result
@@ -185,7 +185,7 @@ a b c
8 8 80
explain select * from t where c = -1;
id select_type table type possible_keys key key_len ref rows Extra
-1 SIMPLE t ref c c 5 const 1 Using where; Using index
+1 SIMPLE t ref c c 5 const 1 Using index
select * from t where c = -1;
a b c
insert ignore t values (15, 15, -1);
@@ -211,7 +211,7 @@ a b c
15 15 150
explain select * from t where c = -1;
id select_type table type possible_keys key key_len ref rows Extra
-1 SIMPLE t ref c c 5 const 1 Using where; Using index
+1 SIMPLE t ref c c 5 const 1 Using index
select * from t where c = -1;
a b c
drop table t;
@@ -399,7 +399,7 @@ select * from t where c = 80;
a b c
explain select * from t where c = -1;
id select_type table type possible_keys key key_len ref rows Extra
-1 SIMPLE t ref c c 5 const 1 Using where; Using index
+1 SIMPLE t ref c c 5 const 1 Using index
select * from t where c = -1;
a b c
8 8 -1
@@ -423,7 +423,7 @@ select * from t where c = 150;
a b c
explain select * from t where c = -1;
id select_type table type possible_keys key key_len ref rows Extra
-1 SIMPLE t ref c c 5 const 2 Using where; Using index
+1 SIMPLE t ref c c 5 const 2 Using index
select * from t where c = -1;
a b c
8 8 -1
diff --git a/storage/tokudb/mysql-test/tokudb/r/rows-32m-0.result b/storage/tokudb/mysql-test/tokudb/r/rows-32m-0.result
index db72519f024..bcbd908ef6c 100644
--- a/storage/tokudb/mysql-test/tokudb/r/rows-32m-0.result
+++ b/storage/tokudb/mysql-test/tokudb/r/rows-32m-0.result
@@ -5,6 +5,7 @@ drop table if exists t;
create table t (id int not null primary key, v longblob not null);
select @@max_allowed_packet into @my_max_allowed_packet;
set global max_allowed_packet=100000000;
+connect conn1,localhost,root,,;
insert into t values (1,repeat('a',32*1024*1024));
ERROR HY000: Got error 22 "Invalid argument" from storage engine TokuDB
insert into t values (1,repeat('a',32*1024*1024-1));
@@ -32,5 +33,7 @@ id length(v)
2 1
3 1
4 1
+connection default;
+disconnect conn1;
set global max_allowed_packet=@my_max_allowed_packet;
drop table t;
diff --git a/storage/tokudb/mysql-test/tokudb/r/rows-32m-1.result b/storage/tokudb/mysql-test/tokudb/r/rows-32m-1.result
index b97d1825fb3..01cc05bffe1 100644
--- a/storage/tokudb/mysql-test/tokudb/r/rows-32m-1.result
+++ b/storage/tokudb/mysql-test/tokudb/r/rows-32m-1.result
@@ -5,6 +5,7 @@ drop table if exists t;
create table t (id int not null primary key, v0 longblob not null,v1 longblob not null);
select @@max_allowed_packet into @my_max_allowed_packet;
set global max_allowed_packet=100000000;
+connect conn1,localhost,root,,;
insert into t values (1,repeat('a',16*1024*1024),repeat('b',16*1024*1024));
ERROR HY000: Got error 22 "Invalid argument" from storage engine TokuDB
insert into t values (1,repeat('a',16*1024*1024),repeat('b',16*1024*1024-1));
@@ -40,5 +41,7 @@ id length(v0) length(v1)
2 1 1
3 1 1
4 1 1
+connection default;
+disconnect conn1;
set global max_allowed_packet=@my_max_allowed_packet;
drop table t;
diff --git a/storage/tokudb/mysql-test/tokudb/r/rows-32m-rand-insert.result b/storage/tokudb/mysql-test/tokudb/r/rows-32m-rand-insert.result
index b287c70469e..894de50f863 100644
--- a/storage/tokudb/mysql-test/tokudb/r/rows-32m-rand-insert.result
+++ b/storage/tokudb/mysql-test/tokudb/r/rows-32m-rand-insert.result
@@ -3,6 +3,7 @@ drop table if exists t;
create table t (id int not null primary key, v longblob not null);
select @@max_allowed_packet into @my_max_allowed_packet;
set global max_allowed_packet=100000000;
+connect conn1,localhost,root,,;
insert ignore into t (id,v) values (floor(rand()*1000),repeat('a',(32*1024*1024-4)*rand()));
insert ignore into t (id,v) values (floor(rand()*1000),repeat('a',(32*1024*1024-4)*rand()));
insert ignore into t (id,v) values (floor(rand()*1000),repeat('a',(32*1024*1024-4)*rand()));
@@ -1003,6 +1004,8 @@ insert ignore into t (id,v) values (floor(rand()*1000),repeat('a',(32*1024*1024-
insert ignore into t (id,v) values (floor(rand()*1000),repeat('a',(32*1024*1024-4)*rand()));
insert ignore into t (id,v) values (floor(rand()*1000),repeat('a',(32*1024*1024-4)*rand()));
insert ignore into t (id,v) values (floor(rand()*1000),repeat('a',(32*1024*1024-4)*rand()));
+connection default;
+disconnect conn1;
set global max_allowed_packet=@my_max_allowed_packet;
check table t;
Table Op Msg_type Msg_text
diff --git a/storage/tokudb/mysql-test/tokudb/r/rows-32m-seq-insert.result b/storage/tokudb/mysql-test/tokudb/r/rows-32m-seq-insert.result
index a242cac98c2..ddd5b908231 100644
--- a/storage/tokudb/mysql-test/tokudb/r/rows-32m-seq-insert.result
+++ b/storage/tokudb/mysql-test/tokudb/r/rows-32m-seq-insert.result
@@ -3,6 +3,7 @@ drop table if exists t;
create table t (id int not null auto_increment primary key, v longblob not null);
select @@max_allowed_packet into @my_max_allowed_packet;
set global max_allowed_packet=100000000;
+connect conn1,localhost,root,,;
insert into t (v) values (repeat('a',(32*1024*1024-4)*rand()));
insert into t (v) values (repeat('a',(32*1024*1024-4)*rand()));
insert into t (v) values (repeat('a',(32*1024*1024-4)*rand()));
@@ -1003,6 +1004,8 @@ insert into t (v) values (repeat('a',(32*1024*1024-4)*rand()));
insert into t (v) values (repeat('a',(32*1024*1024-4)*rand()));
insert into t (v) values (repeat('a',(32*1024*1024-4)*rand()));
insert into t (v) values (repeat('a',(32*1024*1024-4)*rand()));
+connection default;
+disconnect conn1;
set global max_allowed_packet=@my_max_allowed_packet;
check table t;
Table Op Msg_type Msg_text
diff --git a/storage/tokudb/mysql-test/tokudb/r/tokudb_mrr.result b/storage/tokudb/mysql-test/tokudb/r/tokudb_mrr.result
index d79f19202a3..024580d4258 100644
--- a/storage/tokudb/mysql-test/tokudb/r/tokudb_mrr.result
+++ b/storage/tokudb/mysql-test/tokudb/r/tokudb_mrr.result
@@ -296,6 +296,9 @@ drop table t1, t2;
#
# Bug#41029 "MRR: SELECT FOR UPDATE fails to lock gaps (InnoDB table)"
#
+connect con1,localhost,root,,;
+connect con2,localhost,root,,;
+connection con1;
SET AUTOCOMMIT=0;
CREATE TABLE t1 (
dummy INT PRIMARY KEY,
@@ -316,11 +319,16 @@ SELECT * FROM t1 WHERE a >= 2 FOR UPDATE;
dummy a b
3 3 3
5 5 5
+connection con2;
SET AUTOCOMMIT=0;
SET TOKUDB_LOCK_TIMEOUT=2;
START TRANSACTION;
INSERT INTO t1 VALUES (2,2,2);
ERROR HY000: Lock wait timeout exceeded; try restarting transaction
ROLLBACK;
+connection con1;
ROLLBACK;
DROP TABLE t1;
+connection default;
+disconnect con1;
+disconnect con2;
diff --git a/storage/tokudb/mysql-test/tokudb/r/tokudb_support_xa.result b/storage/tokudb/mysql-test/tokudb/r/tokudb_support_xa.result
index c265f38cdc2..120e8de7c7f 100644
--- a/storage/tokudb/mysql-test/tokudb/r/tokudb_support_xa.result
+++ b/storage/tokudb/mysql-test/tokudb/r/tokudb_support_xa.result
@@ -88,6 +88,8 @@ begin;
ERROR XAE07: XAER_RMFAIL: The command cannot be executed when global transaction is in the IDLE state
create table t2 (a int);
ERROR XAE07: XAER_RMFAIL: The command cannot be executed when global transaction is in the IDLE state
+connect con1,localhost,root,,;
+connection con1;
xa start 'testa','testb';
ERROR XAE08: XAER_DUPID: The XID already exists
xa start 'testa','testb', 123;
@@ -101,6 +103,7 @@ ERROR XAE07: XAER_RMFAIL: The command cannot be executed when global transaction
xa recover;
formatID gtrid_length bqual_length data
11 5 5 testb 0@P`
+connection default;
xa prepare 'testa','testb';
xa recover;
formatID gtrid_length bqual_length data
@@ -114,6 +117,8 @@ ERROR 42000: You have an error in your SQL syntax; check the manual that corresp
select * from t1;
a
20
+disconnect con1;
+connection default;
drop table t1;
'#--------------------end------------------------#'
SET @@session.tokudb_support_xa = @session_start_value;
diff --git a/storage/tokudb/mysql-test/tokudb/r/type_bit_innodb.result b/storage/tokudb/mysql-test/tokudb/r/type_bit_innodb.result
index 64a445ebacb..46d120813a5 100644
--- a/storage/tokudb/mysql-test/tokudb/r/type_bit_innodb.result
+++ b/storage/tokudb/mysql-test/tokudb/r/type_bit_innodb.result
@@ -258,7 +258,7 @@ a+0 b+0
127 403
explain select a+0, b+0 from t1 where a > 40 and a < 70 order by 2;
id select_type table type possible_keys key key_len ref rows Extra
-1 SIMPLE t1 range a a 2 NULL 8 Using where; Using index; Using filesort
+1 SIMPLE t1 range a a 2 NULL 9 Using where; Using index; Using filesort
select a+0, b+0 from t1 where a > 40 and a < 70 order by 2;
a+0 b+0
57 135
diff --git a/storage/tokudb/mysql-test/tokudb/r/type_blob.result b/storage/tokudb/mysql-test/tokudb/r/type_blob.result
index 9aa6fef163c..1350bc03045 100644
--- a/storage/tokudb/mysql-test/tokudb/r/type_blob.result
+++ b/storage/tokudb/mysql-test/tokudb/r/type_blob.result
@@ -24,8 +24,8 @@ create table t3 (a long, b long byte);
show create TABLE t3;
Table Create Table
t3 CREATE TABLE `t3` (
- `a` mediumtext,
- `b` mediumblob
+ `a` mediumtext DEFAULT NULL,
+ `b` mediumblob DEFAULT NULL
) ENGINE=ENGINE DEFAULT CHARSET=latin1
show create TABLE t4;
Table Create Table
@@ -38,9 +38,9 @@ ERROR 42000: Column length too big for column 'a' (max = 255); use BLOB or TEXT
CREATE TABLE t2 (a char(256));
ERROR 42000: Column length too big for column 'a' (max = 255); use BLOB or TEXT instead
CREATE TABLE t1 (a varchar(70000) default "hello");
-ERROR 42000: Column length too big for column 'a' (max = 65532); use BLOB or TEXT instead
+Warnings:
+Note 1246 Converting column 'a' from VARCHAR to TEXT
CREATE TABLE t2 (a blob default "hello");
-ERROR 42000: BLOB/TEXT column 'a' can't have a default value
drop table if exists t1,t2;
create table t1 (nr int(5) not null auto_increment,b blob,str char(10), primary key (nr));
insert into t1 values (null,"a","A");
@@ -363,7 +363,7 @@ create table t1 (a text, key (a(2100)));
show create table t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `a` text,
+ `a` text DEFAULT NULL,
KEY `a` (`a`(2100))
) ENGINE=ENGINE DEFAULT CHARSET=latin1
drop table t1;
@@ -502,11 +502,7 @@ foobar boggle
fish 10
drop table t1;
create table t1 (id integer auto_increment unique,imagem LONGBLOB not null default '');
-Warnings:
-Warning 1101 BLOB/TEXT column 'imagem' can't have a default value
insert into t1 (id) values (1);
-Warnings:
-Warning 1364 Field 'imagem' doesn't have a default value
select
charset(load_file('../../std_data/words.dat')),
collation(load_file('../../std_data/words.dat')),
@@ -776,21 +772,19 @@ NULL
620000000000
drop table t1;
create table t1 (a text default '');
-Warnings:
-Warning 1101 BLOB/TEXT column 'a' can't have a default value
show create table t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `a` text
+ `a` text DEFAULT ''
) ENGINE=ENGINE DEFAULT CHARSET=latin1
insert into t1 values (default);
select * from t1;
a
-NULL
+
drop table t1;
set @@sql_mode='TRADITIONAL';
create table t1 (a text default '');
-ERROR 42000: BLOB/TEXT column 'a' can't have a default value
+drop table t1;
set @@sql_mode='';
CREATE TABLE t (c TEXT CHARSET ASCII);
INSERT INTO t (c) VALUES (REPEAT('1',65537));
@@ -861,7 +855,7 @@ ERROR 42000: Column length too big for column 'a' (max = 255); use BLOB or TEXT
CREATE TABLE b15776 (a char(4294967295));
ERROR 42000: Column length too big for column 'a' (max = 255); use BLOB or TEXT instead
CREATE TABLE b15776 (a char(4294967296));
-ERROR 42000: Display width out of range for 'a' (max = 4294967295)
+ERROR 42000: Column length too big for column 'a' (max = 255); use BLOB or TEXT instead
CREATE TABLE b15776 (a year(4294967295));
Warnings:
Note 1287 'YEAR(4294967295)' is deprecated and will be removed in a future release. Please use YEAR(4) instead
@@ -870,8 +864,6 @@ SELECT * FROM b15776;
a
2042
DROP TABLE b15776;
-CREATE TABLE b15776 (a year(4294967296));
-ERROR 42000: Display width out of range for 'a' (max = 4294967295)
CREATE TABLE b15776 (a year(0));
Warnings:
Note 1287 'YEAR(0)' is deprecated and will be removed in a future release. Please use YEAR(4) instead
@@ -879,11 +871,9 @@ DROP TABLE b15776;
CREATE TABLE b15776 (a year(-2));
ERROR 42000: You have an error in your SQL syntax; check the manual that corresponds to your MariaDB server version for the right syntax to use near '-2))' at line 1
CREATE TABLE b15776 (a int(999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999));
-ERROR 42000: Display width out of range for 'a' (max = 4294967295)
+ERROR 42000: Display width out of range for 'a' (max = 255)
CREATE TABLE b15776 (a char(999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999));
-ERROR 42000: Display width out of range for 'a' (max = 4294967295)
-CREATE TABLE b15776 (a year(999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999));
-ERROR 42000: Display width out of range for 'a' (max = 4294967295)
+ERROR 42000: Column length too big for column 'a' (max = 255); use BLOB or TEXT instead
CREATE TABLE b15776 select cast(null as char(4294967295));
show columns from b15776;
Field Type Null Key Default Extra
diff --git a/storage/tokudb/mysql-test/tokudb/r/type_date.result b/storage/tokudb/mysql-test/tokudb/r/type_date.result
index 047dc6dc777..70281843ac6 100644
--- a/storage/tokudb/mysql-test/tokudb/r/type_date.result
+++ b/storage/tokudb/mysql-test/tokudb/r/type_date.result
@@ -100,7 +100,7 @@ DROP TABLE t1, t2, t3;
CREATE TABLE t1 (y YEAR);
INSERT INTO t1 VALUES ('abc');
Warnings:
-Warning 1366 Incorrect integer value: 'abc' for column 'y' at row 1
+Warning 1366 Incorrect integer value: 'abc' for column `test`.`t1`.`y` at row 1
SELECT * FROM t1;
y
0000
@@ -211,7 +211,7 @@ a
0000-00-00
0000-00-00
INSERT INTO t1 VALUES ('0000-00-00');
-ERROR 22007: Incorrect date value: '0000-00-00' for column 'a' at row 1
+ERROR 22007: Incorrect date value: '0000-00-00' for column `test`.`t1`.`a` at row 1
SET SQL_MODE=DEFAULT;
DROP TABLE t1,t2;
CREATE TABLE t1 (a DATE);
@@ -240,7 +240,7 @@ a
1000-00-00
1000-00-00
INSERT INTO t1 VALUES ('1000-00-00');
-ERROR 22007: Incorrect date value: '1000-00-00' for column 'a' at row 1
+ERROR 22007: Incorrect date value: '1000-00-00' for column `test`.`t1`.`a` at row 1
SET SQL_MODE=DEFAULT;
DROP TABLE t1,t2;
CREATE TABLE t1 SELECT curdate() AS f1;
diff --git a/storage/tokudb/mysql-test/tokudb/r/type_datetime.result b/storage/tokudb/mysql-test/tokudb/r/type_datetime.result
index 678be1655e4..ed980f8cee1 100644
--- a/storage/tokudb/mysql-test/tokudb/r/type_datetime.result
+++ b/storage/tokudb/mysql-test/tokudb/r/type_datetime.result
@@ -218,7 +218,7 @@ insert into t1 set dt='2007-03-23 13:49:38',da=dt;
Warnings:
Note 1265 Data truncated for column 'da' at row 1
insert into t1 values ('2007-03-32','2007-03-23 13:49:38');
-ERROR 22007: Incorrect date value: '2007-03-32' for column 'da' at row 1
+ERROR 22007: Incorrect date value: '2007-03-32' for column `test`.`t1`.`da` at row 1
select * from t1;
da dt
1962-03-03 1962-03-03 00:00:00
@@ -520,7 +520,7 @@ id select_type table type possible_keys key key_len ref rows filtered Extra
1 PRIMARY x1 ALL NULL NULL NULL NULL 1 100.00 Using where; FirstMatch(t1); Using join buffer (flat, BNL join)
Warnings:
Note 1276 Field or reference 'test.t1.cur_date' of SELECT #2 was resolved in SELECT #1
-Note 1003 select `test`.`t1`.`id` AS `id`,`test`.`t1`.`cur_date` AS `cur_date` from `test`.`t1` semi join (`test`.`t1` `x1`) where ((`test`.`x1`.`id` = `test`.`t1`.`id`) and (`test`.`t1`.`cur_date` = 0))
+Note 1003 select `test`.`t1`.`id` AS `id`,`test`.`t1`.`cur_date` AS `cur_date` from `test`.`t1` semi join (`test`.`t1` `x1`) where `test`.`x1`.`id` = `test`.`t1`.`id` and `test`.`t1`.`cur_date` = 0
select * from t1
where id in (select id from t1 as x1 where (t1.cur_date is null));
id cur_date
@@ -532,7 +532,7 @@ id select_type table type possible_keys key key_len ref rows filtered Extra
1 PRIMARY x1 ALL NULL NULL NULL NULL 1 100.00 Using where; FirstMatch(t2); Using join buffer (flat, BNL join)
Warnings:
Note 1276 Field or reference 'test.t2.cur_date' of SELECT #2 was resolved in SELECT #1
-Note 1003 select `test`.`t2`.`id` AS `id`,`test`.`t2`.`cur_date` AS `cur_date` from `test`.`t2` semi join (`test`.`t2` `x1`) where ((`test`.`x1`.`id` = `test`.`t2`.`id`) and (`test`.`t2`.`cur_date` = 0))
+Note 1003 select `test`.`t2`.`id` AS `id`,`test`.`t2`.`cur_date` AS `cur_date` from `test`.`t2` semi join (`test`.`t2` `x1`) where `test`.`x1`.`id` = `test`.`t2`.`id` and `test`.`t2`.`cur_date` = 0
select * from t2
where id in (select id from t2 as x1 where (t2.cur_date is null));
id cur_date
@@ -546,7 +546,7 @@ id select_type table type possible_keys key key_len ref rows filtered Extra
1 PRIMARY x1 ALL NULL NULL NULL NULL 2 100.00 Using where; FirstMatch(t1); Using join buffer (flat, BNL join)
Warnings:
Note 1276 Field or reference 'test.t1.cur_date' of SELECT #2 was resolved in SELECT #1
-Note 1003 select `test`.`t1`.`id` AS `id`,`test`.`t1`.`cur_date` AS `cur_date` from `test`.`t1` semi join (`test`.`t1` `x1`) where ((`test`.`x1`.`id` = `test`.`t1`.`id`) and (`test`.`t1`.`cur_date` = 0))
+Note 1003 select `test`.`t1`.`id` AS `id`,`test`.`t1`.`cur_date` AS `cur_date` from `test`.`t1` semi join (`test`.`t1` `x1`) where `test`.`x1`.`id` = `test`.`t1`.`id` and `test`.`t1`.`cur_date` = 0
select * from t1
where id in (select id from t1 as x1 where (t1.cur_date is null));
id cur_date
@@ -558,7 +558,7 @@ id select_type table type possible_keys key key_len ref rows filtered Extra
1 PRIMARY x1 ALL NULL NULL NULL NULL 2 100.00 Using where; FirstMatch(t2); Using join buffer (flat, BNL join)
Warnings:
Note 1276 Field or reference 'test.t2.cur_date' of SELECT #2 was resolved in SELECT #1
-Note 1003 select `test`.`t2`.`id` AS `id`,`test`.`t2`.`cur_date` AS `cur_date` from `test`.`t2` semi join (`test`.`t2` `x1`) where ((`test`.`x1`.`id` = `test`.`t2`.`id`) and (`test`.`t2`.`cur_date` = 0))
+Note 1003 select `test`.`t2`.`id` AS `id`,`test`.`t2`.`cur_date` AS `cur_date` from `test`.`t2` semi join (`test`.`t2` `x1`) where `test`.`x1`.`id` = `test`.`t2`.`id` and `test`.`t2`.`cur_date` = 0
select * from t2
where id in (select id from t2 as x1 where (t2.cur_date is null));
id cur_date
@@ -586,7 +586,7 @@ insert into t1 set dt='2007-03-23 13:49:38',da=dt;
Warnings:
Note 1265 Data truncated for column 'da' at row 1
insert into t1 values ('2007-03-32','2007-03-23 13:49:38');
-ERROR 22007: Incorrect date value: '2007-03-32' for column 'da' at row 1
+ERROR 22007: Incorrect date value: '2007-03-32' for column `test`.`t1`.`da` at row 1
select * from t1;
da dt
1962-03-03 1962-03-03 00:00:00
diff --git a/storage/tokudb/mysql-test/tokudb/r/type_decimal.result b/storage/tokudb/mysql-test/tokudb/r/type_decimal.result
index 46cf3f784c2..3b82bbcef4f 100644
--- a/storage/tokudb/mysql-test/tokudb/r/type_decimal.result
+++ b/storage/tokudb/mysql-test/tokudb/r/type_decimal.result
@@ -177,9 +177,9 @@ Note 1265 Data truncated for column 'a' at row 2
insert into t1 values ("1e+18446744073709551615"),("1e+18446744073709551616"),("1e-9223372036854775807"),("1e-9223372036854775809");
Warnings:
Warning 1264 Out of range value for column 'a' at row 1
-Warning 1366 Incorrect decimal value: '1e+18446744073709551616' for column 'a' at row 2
+Warning 1366 Incorrect decimal value: '1e+18446744073709551616' for column `test`.`t1`.`a` at row 2
Note 1265 Data truncated for column 'a' at row 3
-Warning 1366 Incorrect decimal value: '1e-9223372036854775809' for column 'a' at row 4
+Warning 1366 Incorrect decimal value: '1e-9223372036854775809' for column `test`.`t1`.`a` at row 4
insert into t1 values ("123.4e"),("123.4e+2"),("123.4e-2"),("123e1"),("123e+0");
Warnings:
Warning 1265 Data truncated for column 'a' at row 1
@@ -478,7 +478,7 @@ ERROR 42000: You have an error in your SQL syntax; check the manual that corresp
CREATE TABLE t1 (a_dec DECIMAL(-1,1));
ERROR 42000: You have an error in your SQL syntax; check the manual that corresponds to your MariaDB server version for the right syntax to use near '-1,1))' at line 1
CREATE TABLE t1 (a_dec DECIMAL(0,11));
-ERROR 42000: For float(M,D), double(M,D) or decimal(M,D), M must be >= D (column 'a_dec').
+ERROR 42000: For float(M,D), double(M,D) or decimal(M,D), M must be >= D (column 'a_dec')
create table t1(a decimal(7,3));
insert into t1 values ('1'),('+1'),('-1'),('0000000001'),('+0000000001'),('-0000000001'),('10'),('+10'),('-10'),('0000000010'),('+0000000010'),('-0000000010'),('100'),('+100'),('-100'),('0000000100'),('+0000000100'),('-0000000100'),('1000'),('+1000'),('-1000'),('0000001000'),('+0000001000'),('-0000001000'),('10000'),('+10000'),('-10000'),('0000010000'),('+0000010000'),('-0000010000'),('100000'),('+100000'),('-100000'),('0000100000'),('+0000100000'),('-0000100000'),('1000000'),('+1000000'),('-1000000'),('0001000000'),('+0001000000'),('-0001000000'),('10000000'),('+10000000'),('-10000000'),('0010000000'),('+0010000000'),('-0010000000'),('100000000'),('+100000000'),('-100000000'),('0100000000'),('+0100000000'),('-0100000000'),('1000000000'),('+1000000000'),('-1000000000'),('1000000000'),('+1000000000'),('-1000000000');
select * from t1;
@@ -723,7 +723,7 @@ t1 CREATE TABLE `t1` (
) ENGINE=ENGINE DEFAULT CHARSET=latin1
drop table t1;
create table t1 (d decimal(66,0));
-ERROR 42000: Too big precision 66 specified for 'd'. Maximum is 65.
+ERROR 42000: Too big precision 66 specified for 'd'. Maximum is 65
CREATE TABLE t1 (i INT, d1 DECIMAL(9,2), d2 DECIMAL(9,2));
INSERT INTO t1 VALUES (1, 101.40, 21.40), (1, -80.00, 0.00),
(2, 0.00, 0.00), (2, -13.20, 0.00), (2, 59.60, 46.40),
@@ -812,10 +812,10 @@ c1
drop table t1;
SELECT 1 % .123456789123456789123456789123456789123456789123456789123456789123456789123456789 AS '%';
%
-0.012345687012345687012345687012
+0.01234568701234568701234568701234568701
SELECT MOD(1, .123456789123456789123456789123456789123456789123456789123456789123456789123456789) AS 'MOD()';
MOD()
-0.012345687012345687012345687012
+0.01234568701234568701234568701234568701
create table t1 (f1 decimal(6,6),f2 decimal(6,6) zerofill);
insert into t1 values (-0.123456,0.123456);
select group_concat(f1),group_concat(f2) from t1;
@@ -846,8 +846,8 @@ c
123456
SELECT ROUND( a, 100 ) AS c FROM t1 ORDER BY c;
c
-123456.000000000000000000000000000000
-123456.000000000000000000000000000000
+123456.00000000000000000000000000000000000000
+123456.00000000000000000000000000000000000000
CREATE TABLE t2( a NUMERIC, b INT );
INSERT INTO t2 VALUES (123456, 100);
SELECT TRUNCATE( a, b ) AS c FROM t2 ORDER BY c;
@@ -868,8 +868,8 @@ c
123456
SELECT ROUND( a, 100 ) AS c FROM t3 ORDER BY c;
c
-123456.000000000000000000000000000000
-123456.000000000000000000000000000000
+123456.00000000000000000000000000000000000000
+123456.00000000000000000000000000000000000000
CREATE TABLE t4( a DECIMAL, b INT );
INSERT INTO t4 VALUES (123456, 40), (123456, 40);
SELECT TRUNCATE( a, b ) AS c FROM t4 ORDER BY c;
@@ -882,8 +882,8 @@ c
123456
SELECT ROUND( a, 100 ) AS c FROM t4 ORDER BY c;
c
-123456.000000000000000000000000000000
-123456.000000000000000000000000000000
+123456.00000000000000000000000000000000000000
+123456.00000000000000000000000000000000000000
delete from t1;
INSERT INTO t1 VALUES (1234567890, 20), (999.99, 5);
Warnings:
diff --git a/storage/tokudb/mysql-test/tokudb/r/type_enum.result b/storage/tokudb/mysql-test/tokudb/r/type_enum.result
index 3a1654ef287..a1e61df126b 100644
--- a/storage/tokudb/mysql-test/tokudb/r/type_enum.result
+++ b/storage/tokudb/mysql-test/tokudb/r/type_enum.result
@@ -1670,7 +1670,7 @@ b ENUM('value','öäü_value','ÊÃÕ') character set latin1 NOT NULL
show create table t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `a` int(11) DEFAULT '1',
+ `a` int(11) DEFAULT 1,
`b` enum('value','öäü_value','ÊÃÕ') NOT NULL
) ENGINE=ENGINE DEFAULT CHARSET=latin1
show columns from t1;
diff --git a/storage/tokudb/mysql-test/tokudb/r/type_float.result b/storage/tokudb/mysql-test/tokudb/r/type_float.result
index cdbe26651e8..3bfda535e87 100644
--- a/storage/tokudb/mysql-test/tokudb/r/type_float.result
+++ b/storage/tokudb/mysql-test/tokudb/r/type_float.result
@@ -134,7 +134,7 @@ min(a)
-0.010
drop table t1;
create table t1 (a float(200,100), b double(200,100));
-ERROR 42000: Too big scale 100 specified for 'a'. Maximum is 30.
+ERROR 42000: Too big scale 100 specified for 'a'. Maximum is 30
create table t1 (c20 char);
insert into t1 values (5000.0);
Warnings:
@@ -342,9 +342,9 @@ s
drop table t1;
End of 4.1 tests
create table t1 (s1 float(0,2));
-ERROR 42000: For float(M,D), double(M,D) or decimal(M,D), M must be >= D (column 's1').
+ERROR 42000: For float(M,D), double(M,D) or decimal(M,D), M must be >= D (column 's1')
create table t1 (s1 float(1,2));
-ERROR 42000: For float(M,D), double(M,D) or decimal(M,D), M must be >= D (column 's1').
+ERROR 42000: For float(M,D), double(M,D) or decimal(M,D), M must be >= D (column 's1')
CREATE TABLE t1 (
f1 real zerofill,
f2 double zerofill,
@@ -459,7 +459,7 @@ Warnings:
Warning 1265 Data truncated for column 'f' at row 1
INSERT INTO t1 VALUES ('.');
Warnings:
-Warning 1366 Incorrect double value: '.' for column 'f' at row 1
+Warning 1366 Incorrect double value: '.' for column `test`.`t1`.`f` at row 1
SELECT * FROM t1 ORDER BY f;
f
0
diff --git a/storage/tokudb/mysql-test/tokudb/r/type_newdecimal.result b/storage/tokudb/mysql-test/tokudb/r/type_newdecimal.result
index 7d5b555488e..ad920deeda4 100644
--- a/storage/tokudb/mysql-test/tokudb/r/type_newdecimal.result
+++ b/storage/tokudb/mysql-test/tokudb/r/type_newdecimal.result
@@ -177,7 +177,7 @@ Table Create Table
t1 CREATE TABLE `t1` (
`round(15.4,-1)` decimal(3,0) NOT NULL,
`truncate(-5678.123451,-3)` decimal(4,0) NOT NULL,
- `abs(-1.1)` decimal(3,1) NOT NULL,
+ `abs(-1.1)` decimal(2,1) NOT NULL,
`-(-1.1)` decimal(2,1) NOT NULL
) ENGINE=ENGINE DEFAULT CHARSET=latin1
drop table t1;
@@ -701,10 +701,10 @@ select .7777777777777777777777777777777777777 *
1000000000000000000;
.7777777777777777777777777777777777777 *
1000000000000000000
-777777777777777777.777777777777777777700000000000
+777777777777777777.7777777777777777777000000000000000000
select .7777777777777777777777777777777777777 - 0.1;
.7777777777777777777777777777777777777 - 0.1
-0.677777777777777777777777777778
+0.6777777777777777777777777777777777777
select .343434343434343434 + .343434343434343434;
.343434343434343434 + .343434343434343434
0.686868686868686868
@@ -759,12 +759,12 @@ round(99999999999999999.999,3)
select round(-99999999999999999.999,3);
round(-99999999999999999.999,3)
-99999999999999999.999
-select truncate(99999999999999999999999999999999999999,31);
-truncate(99999999999999999999999999999999999999,31)
-99999999999999999999999999999999999999.000000000000000000000000000000
-select truncate(99.999999999999999999999999999999999999,31);
-truncate(99.999999999999999999999999999999999999,31)
-99.999999999999999999999999999999
+select truncate(99999999999999999999999999999999999999,49);
+truncate(99999999999999999999999999999999999999,49)
+99999999999999999999999999999999999999.000000000000000000000000000000000000
+select truncate(99.999999999999999999999999999999999999,49);
+truncate(99.999999999999999999999999999999999999,49)
+99.99999999999999999999999999999999999900
select truncate(99999999999999999999999999999999999999,-31);
truncate(99999999999999999999999999999999999999,-31)
99999990000000000000000000000000000000
@@ -826,7 +826,7 @@ Warning 1365 Division by 0
Warning 1365 Division by 0
Warning 1365 Division by 0
INSERT INTO Sow6_2f VALUES ('a59b');
-ERROR 22007: Incorrect decimal value: 'a59b' for column 'col1' at row 1
+ERROR 22007: Incorrect decimal value: 'a59b' for column `test`.`Sow6_2f`.`col1` at row 1
drop table Sow6_2f;
select 10.3330000000000/12.34500000;
10.3330000000000/12.34500000
@@ -839,12 +839,12 @@ select 9999999999999999999999999999999999999999999999999999999999999999999999999
x
99999999999999999999999999999999999999999999999999999999999999999
Warnings:
-Warning 1916 Got overflow when converting '' to DECIMAL. Value truncated.
+Warning 1916 Got overflow when converting '' to DECIMAL. Value truncated
select 9999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999 + 1 as x;
x
100000000000000000000000000000000000000000000000000000000000000000
Warnings:
-Warning 1916 Got overflow when converting '' to DECIMAL. Value truncated.
+Warning 1916 Got overflow when converting '' to DECIMAL. Value truncated
select 0.190287977636363637 + 0.040372670 * 0 - 0;
0.190287977636363637 + 0.040372670 * 0 - 0
0.190287977636363637
@@ -890,7 +890,7 @@ create table t1 (col1 int, col2 decimal(30,25), col3 numeric(30,25));
insert into t1 values (1,0.0123456789012345678912345,0.0123456789012345678912345);
select col2/9999999999 from t1 where col1=1;
col2/9999999999
-0.000000000001234567890246913578
+0.00000000000123456789024691357814814136
select 9999999999/col2 from t1 where col1=1;
9999999999/col2
810000007209.000065537105051
@@ -920,17 +920,19 @@ select ln(14000) c1, convert(ln(14000),decimal(5,3)) c2, cast(ln(14000) as decim
c1 c2 c3
9.546812608597396 9.547 9.547
select convert(ln(14000),decimal(2,3)) c1;
-ERROR 42000: For float(M,D), double(M,D) or decimal(M,D), M must be >= D (column '').
+ERROR 42000: For float(M,D), double(M,D) or decimal(M,D), M must be >= D (column '')
select cast(ln(14000) as decimal(2,3)) c1;
-ERROR 42000: For float(M,D), double(M,D) or decimal(M,D), M must be >= D (column '').
+ERROR 42000: For float(M,D), double(M,D) or decimal(M,D), M must be >= D (column '')
create table t1 (sl decimal(70,30));
-ERROR 42000: Too big precision 70 specified for 'sl'. Maximum is 65.
-create table t1 (sl decimal(32,31));
-ERROR 42000: Too big scale 31 specified for 'sl'. Maximum is 30.
-create table t1 (sl decimal(0,38));
-ERROR 42000: Too big scale 38 specified for 'sl'. Maximum is 30.
+ERROR 42000: Too big precision 70 specified for 'sl'. Maximum is 65
+create table t1 (sl decimal(32,39));
+ERROR 42000: Too big scale 39 specified for 'sl'. Maximum is 38
+create table t1 (sl decimal(67,38));
+ERROR 42000: Too big precision 67 specified for 'sl'. Maximum is 65
+create table t1 (sl decimal(0,50));
+ERROR 42000: Too big scale 50 specified for 'sl'. Maximum is 38
create table t1 (sl decimal(0,30));
-ERROR 42000: For float(M,D), double(M,D) or decimal(M,D), M must be >= D (column 'sl').
+ERROR 42000: For float(M,D), double(M,D) or decimal(M,D), M must be >= D (column 'sl')
create table t1 (sl decimal(5, 5));
show create table t1;
Table Create Table
@@ -938,11 +940,11 @@ t1 CREATE TABLE `t1` (
`sl` decimal(5,5) DEFAULT NULL
) ENGINE=ENGINE DEFAULT CHARSET=latin1
drop table t1;
-create table t1 (sl decimal(65, 30));
+create table t1 (sl decimal(65, 38));
show create table t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `sl` decimal(65,30) DEFAULT NULL
+ `sl` decimal(65,38) DEFAULT NULL
) ENGINE=ENGINE DEFAULT CHARSET=latin1
drop table t1;
create table t1 (
@@ -974,8 +976,8 @@ f1 decimal (0,0) zerofill not null default 0);
show create table t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f0` decimal(30,30) unsigned zerofill NOT NULL DEFAULT '0.000000000000000000000000000000',
- `f1` decimal(10,0) unsigned zerofill NOT NULL DEFAULT '0000000000'
+ `f0` decimal(30,30) unsigned zerofill NOT NULL DEFAULT 0.000000000000000000000000000000,
+ `f1` decimal(10,0) unsigned zerofill NOT NULL DEFAULT 0000000000
) ENGINE=ENGINE DEFAULT CHARSET=latin1
drop table t1;
drop procedure if exists wg2;
@@ -1012,7 +1014,7 @@ select cast(@non_existing_user_var/2 as DECIMAL);
cast(@non_existing_user_var/2 as DECIMAL)
NULL
create table t (d decimal(0,10));
-ERROR 42000: For float(M,D), double(M,D) or decimal(M,D), M must be >= D (column 'd').
+ERROR 42000: For float(M,D), double(M,D) or decimal(M,D), M must be >= D (column 'd')
CREATE TABLE t1 (
my_float FLOAT,
my_double DOUBLE,
@@ -1381,12 +1383,12 @@ create table t1 (c1 decimal(64));
insert into t1 values(
89000000000000000000000000000000000000000000000000000000000000000000000000000000000000000);
Warnings:
-Warning 1916 Got overflow when converting '' to DECIMAL. Value truncated.
+Warning 1916 Got overflow when converting '' to DECIMAL. Value truncated
Warning 1264 Out of range value for column 'c1' at row 1
insert into t1 values(
99999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999 *
99999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999);
-ERROR 22003: DECIMAL value is out of range in '(99999999999999999999999999999999999999999999999999999999999999999 * 99999999999999999999999999999999999999999999999999999999999999999)'
+ERROR 22003: DECIMAL value is out of range in '99999999999999999999999999999999999999999999999999999999999999999 * 99999999999999999999999999999999999999999999999999999999999999999'
insert into t1 values(1e100);
Warnings:
Warning 1264 Out of range value for column 'c1' at row 1
@@ -1428,7 +1430,7 @@ select cast(19999999999999999999 as unsigned);
cast(19999999999999999999 as unsigned)
18446744073709551615
Warnings:
-Warning 1916 Got overflow when converting '19999999999999999999' to UNSIGNED INT. Value truncated.
+Warning 1916 Got overflow when converting '19999999999999999999' to UNSIGNED INT. Value truncated
create table t1(a decimal(18));
insert into t1 values(123456789012345678);
alter table t1 modify column a decimal(19);
@@ -1481,12 +1483,12 @@ SELECT CAST(1 AS decimal(65,10));
CAST(1 AS decimal(65,10))
1.0000000000
SELECT CAST(1 AS decimal(66,10));
-ERROR 42000: Too big precision 66 specified for '1'. Maximum is 65.
-SELECT CAST(1 AS decimal(65,30));
-CAST(1 AS decimal(65,30))
-1.000000000000000000000000000000
-SELECT CAST(1 AS decimal(65,31));
-ERROR 42000: Too big scale 31 specified for '1'. Maximum is 30.
+ERROR 42000: Too big precision 66 specified for '1'. Maximum is 65
+SELECT CAST(1 AS decimal(65,38));
+CAST(1 AS decimal(65,38))
+1.00000000000000000000000000000000000000
+SELECT CAST(1 AS decimal(65,39));
+ERROR 42000: Too big scale 39 specified for '1'. Maximum is 38
CREATE TABLE t1 (a int DEFAULT NULL, b int DEFAULT NULL);
INSERT INTO t1 VALUES (3,30), (1,10), (2,10);
SELECT a+CAST(1 AS decimal(65,30)) AS aa, SUM(b) FROM t1 GROUP BY aa;
@@ -1494,8 +1496,8 @@ aa SUM(b)
2.000000000000000000000000000000 10
3.000000000000000000000000000000 10
4.000000000000000000000000000000 30
-SELECT a+CAST(1 AS decimal(65,31)) AS aa, SUM(b) FROM t1 GROUP BY aa;
-ERROR 42000: Too big scale 31 specified for '1'. Maximum is 30.
+SELECT a+CAST(1 AS decimal(65,49)) AS aa, SUM(b) FROM t1 GROUP BY aa;
+ERROR 42000: Too big scale 49 specified for '1'. Maximum is 38
DROP TABLE t1;
CREATE TABLE t1 (a int DEFAULT NULL, b int DEFAULT NULL);
INSERT INTO t1 VALUES (3,30), (1,10), (2,10);
@@ -1505,31 +1507,31 @@ SELECT 1 FROM t1 GROUP BY @b := @a, @b;
1
1
DROP TABLE t1;
-CREATE TABLE t1 SELECT 0.123456789012345678901234567890123456 AS f1;
+CREATE TABLE t1 SELECT 0.1234567890123456789012345678901234567890123456789 AS f1;
Warnings:
Note 1265 Data truncated for column 'f1' at row 1
DESC t1;
Field Type Null Key Default Extra
-f1 decimal(31,30) NO NULL
+f1 decimal(39,38) NO NULL
SELECT f1 FROM t1;
f1
-0.123456789012345678901234567890
+0.12345678901234567890123456789012345679
DROP TABLE t1;
CREATE TABLE t1 SELECT 123451234512345123451234512345123451234512345.678906789067890678906789067890678906789067890 AS f1;
Warnings:
Warning 1264 Out of range value for column 'f1' at row 1
DESC t1;
Field Type Null Key Default Extra
-f1 decimal(65,30) NO NULL
+f1 decimal(65,36) NO NULL
SELECT f1 FROM t1;
f1
-99999999999999999999999999999999999.999999999999999999999999999999
+99999999999999999999999999999.999999999999999999999999999999999999
DROP TABLE t1;
select (1.20396873 * 0.89550000 * 0.68000000 * 1.08721696 * 0.99500000 *
1.01500000 * 1.01500000 * 0.99500000);
(1.20396873 * 0.89550000 * 0.68000000 * 1.08721696 * 0.99500000 *
1.01500000 * 1.01500000 * 0.99500000)
-0.812988073953673124592306939480
+0.81298807395367312459230693948000000000
create table t1 as select 5.05 / 0.014;
Warnings:
Note 1265 Data truncated for column '5.05 / 0.014' at row 1
@@ -1553,8 +1555,8 @@ SELECT substring(('M') FROM (999999999999999999999999999999999999999999999999999
foo
Warnings:
-Warning 1916 Got overflow when converting '999999999999999999999999999999999999999999999999999999999999999999999999999999999' to INT. Value truncated.
-Warning 1916 Got overflow when converting '999999999999999999999999999999999999999999999999999999999999999999999999999999999' to INT. Value truncated.
+Warning 1916 Got overflow when converting '999999999999999999999999999999999999999999999999999999999999999999999999999999999' to INT. Value truncated
+Warning 1916 Got overflow when converting '999999999999999999999999999999999999999999999999999999999999999999999999999999999' to INT. Value truncated
SELECT min(999999999999999999999999999999999999999999999999999999999999999999999999999999999) AS foo;
foo
999999999999999999999999999999999999999999999999999999999999999999999999999999999
@@ -1562,7 +1564,7 @@ SELECT multipolygonfromtext(('4294967294.1'),(9999999999999999999999999999999999
foo
NULL
Warnings:
-Warning 1916 Got overflow when converting '999999999999999999999999999999999999999999999999999999999999999999999999999999999' to INT. Value truncated.
+Warning 1916 Got overflow when converting '999999999999999999999999999999999999999999999999999999999999999999999999999999999' to INT. Value truncated
SELECT convert((999999999999999999999999999999999999999999999999999999999999999999999999999999999), decimal(30,30)) AS foo;
foo
0.999999999999999999999999999999
@@ -1572,7 +1574,7 @@ SELECT bit_xor(99999999999999999999999999999999999999999999999999999999999999999
foo
9223372036854775807
Warnings:
-Warning 1916 Got overflow when converting '999999999999999999999999999999999999999999999999999999999999999999999999999999999' to INT. Value truncated.
+Warning 1916 Got overflow when converting '999999999999999999999999999999999999999999999999999999999999999999999999999999999' to INT. Value truncated
SELECT -(999999999999999999999999999999999999999999999999999999999999999999999999999999999) AS foo;
foo
-999999999999999999999999999999999999999999999999999999999999999999999999999999999
@@ -1624,30 +1626,30 @@ Warnings:
Note 1265 Data truncated for column 'my_col' at row 1
DESCRIBE t1;
Field Type Null Key Default Extra
-my_col decimal(30,30) NO NULL
+my_col decimal(38,38) NO NULL
SELECT my_col FROM t1;
my_col
-0.123456789123456789123456789123
+0.12345678912345678912345678912345678912
DROP TABLE t1;
CREATE TABLE t1 SELECT 1 + .123456789123456789123456789123456789123456789123456789123456789123456789123456789 AS my_col;
Warnings:
Note 1265 Data truncated for column 'my_col' at row 1
DESCRIBE t1;
Field Type Null Key Default Extra
-my_col decimal(65,30) NO NULL
+my_col decimal(65,38) NO NULL
SELECT my_col FROM t1;
my_col
-1.123456789123456789123456789123
+1.12345678912345678912345678912345678912
DROP TABLE t1;
CREATE TABLE t1 SELECT 1 * .123456789123456789123456789123456789123456789123456789123456789123456789123456789 AS my_col;
Warnings:
Note 1265 Data truncated for column 'my_col' at row 1
DESCRIBE t1;
Field Type Null Key Default Extra
-my_col decimal(65,30) NO NULL
+my_col decimal(65,38) NO NULL
SELECT my_col FROM t1;
my_col
-0.123456789123456789123456789123
+0.12345678912345678912345678912345678912
DROP TABLE t1;
CREATE TABLE t1 SELECT 1 / .123456789123456789123456789123456789123456789123456789123456789123456789123456789 AS my_col;
Warnings:
@@ -1664,10 +1666,10 @@ Warnings:
Note 1265 Data truncated for column 'my_col' at row 1
DESCRIBE t1;
Field Type Null Key Default Extra
-my_col decimal(65,30) YES NULL
+my_col decimal(65,38) YES NULL
SELECT my_col FROM t1;
my_col
-0.012345687012345687012345687012
+0.01234568701234568701234568701234568701
DROP TABLE t1;
#
# Bug#45261: Crash, stored procedure + decimal
@@ -1713,7 +1715,7 @@ CREATE TABLE t1 SELECT
/* 82 */ 1000000000000000000000000000000000000000000000000000000000000000000000000000000001
AS c1;
Warnings:
-Warning 1916 Got overflow when converting '' to DECIMAL. Value truncated.
+Warning 1916 Got overflow when converting '' to DECIMAL. Value truncated
DESC t1;
Field Type Null Key Default Extra
c1 decimal(65,0) NO NULL
@@ -1728,30 +1730,30 @@ Warnings:
Warning 1264 Out of range value for column 'c1' at row 1
DESC t1;
Field Type Null Key Default Extra
-c1 decimal(65,30) NO NULL
+c1 decimal(65,36) NO NULL
SELECT * FROM t1;
c1
-99999999999999999999999999999999999.999999999999999999999999999999
+99999999999999999999999999999.999999999999999999999999999999999999
DROP TABLE t1;
CREATE TABLE t1 SELECT
/* 1 */ 1.10000000000000000000000000000000000000000000000000000000000000000000000000000001 /* 80 */
AS c1;
DESC t1;
Field Type Null Key Default Extra
-c1 decimal(31,30) NO NULL
+c1 decimal(39,38) NO NULL
SELECT * FROM t1;
c1
-1.100000000000000000000000000000
+1.10000000000000000000000000000000000000
DROP TABLE t1;
CREATE TABLE t1 SELECT
/* 1 */ 1.100000000000000000000000000000000000000000000000000000000000000000000000000000001 /* 81 */
AS c1;
DESC t1;
Field Type Null Key Default Extra
-c1 decimal(31,30) NO NULL
+c1 decimal(39,38) NO NULL
SELECT * FROM t1;
c1
-1.100000000000000000000000000000
+1.10000000000000000000000000000000000000
DROP TABLE t1;
CREATE TABLE t1 SELECT
.100000000000000000000000000000000000000000000000000000000000000000000000000000001 /* 81 */
@@ -1760,10 +1762,10 @@ Warnings:
Note 1265 Data truncated for column 'c1' at row 1
DESC t1;
Field Type Null Key Default Extra
-c1 decimal(30,30) NO NULL
+c1 decimal(38,38) NO NULL
SELECT * FROM t1;
c1
-0.100000000000000000000000000000
+0.10000000000000000000000000000000000000
DROP TABLE t1;
CREATE TABLE t1 SELECT
/* 45 */ 123456789012345678901234567890123456789012345.123456789012345678901234567890123456789012345 /* 45 */
@@ -1772,10 +1774,10 @@ Warnings:
Warning 1264 Out of range value for column 'c1' at row 1
DESC t1;
Field Type Null Key Default Extra
-c1 decimal(65,30) NO NULL
+c1 decimal(65,36) NO NULL
SELECT * FROM t1;
c1
-99999999999999999999999999999999999.999999999999999999999999999999
+99999999999999999999999999999.999999999999999999999999999999999999
DROP TABLE t1;
CREATE TABLE t1 SELECT
/* 65 */ 12345678901234567890123456789012345678901234567890123456789012345.1 /* 1 */
@@ -1808,20 +1810,18 @@ Warnings:
Note 1265 Data truncated for column 'c1' at row 1
DESC t1;
Field Type Null Key Default Extra
-c1 decimal(30,30) NO NULL
+c1 decimal(38,38) NO NULL
SELECT * FROM t1;
c1
-0.123456789012345678901234567890
+0.12345678901234567890123456789012345679
DROP TABLE t1;
CREATE TABLE t1 AS SELECT 123.1234567890123456789012345678901 /* 31 */ AS c1;
-Warnings:
-Note 1265 Data truncated for column 'c1' at row 1
DESC t1;
Field Type Null Key Default Extra
-c1 decimal(33,30) NO NULL
+c1 decimal(34,31) NO NULL
SELECT * FROM t1;
c1
-123.123456789012345678901234567890
+123.1234567890123456789012345678901
DROP TABLE t1;
CREATE TABLE t1 SELECT 1.1 + CAST(1 AS DECIMAL(65,30)) AS c1;
DESC t1;
@@ -1837,22 +1837,20 @@ DROP TABLE t1;
CREATE TABLE t1 (a DECIMAL(30,30));
INSERT INTO t1 VALUES (0.1),(0.2),(0.3);
CREATE TABLE t2 SELECT MIN(a + 0.0000000000000000000000000000001) AS c1 FROM t1;
-Warnings:
-Note 1265 Data truncated for column 'c1' at row 4
DESC t2;
Field Type Null Key Default Extra
-c1 decimal(33,30) YES NULL
+c1 decimal(33,31) YES NULL
DROP TABLE t1,t2;
CREATE TABLE t1 (a DECIMAL(30,30));
INSERT INTO t1 VALUES (0.1),(0.2),(0.3);
-CREATE TABLE t2 SELECT IFNULL(a + 0.0000000000000000000000000000001, NULL) AS c1 FROM t1;
+CREATE TABLE t2 SELECT IFNULL(a + 0.00000000000000000000000000000000000000000000000001, NULL) AS c1 FROM t1;
Warnings:
Note 1265 Data truncated for column 'c1' at row 1
Note 1265 Data truncated for column 'c1' at row 2
Note 1265 Data truncated for column 'c1' at row 3
DESC t2;
Field Type Null Key Default Extra
-c1 decimal(33,30) YES NULL
+c1 decimal(52,38) YES NULL
DROP TABLE t1,t2;
CREATE TABLE t1 (a DECIMAL(30,30));
INSERT INTO t1 VALUES (0.1),(0.2),(0.3);
@@ -1861,7 +1859,7 @@ Warnings:
Note 1265 Data truncated for column 'c1' at row 1
DESC t2;
Field Type Null Key Default Extra
-c1 decimal(65,30) YES NULL
+c1 decimal(65,38) YES NULL
DROP TABLE t1,t2;
#
# Test that variables get maximum precision.
@@ -1870,10 +1868,10 @@ SET @decimal= 1.1;
CREATE TABLE t1 SELECT @decimal AS c1;
DESC t1;
Field Type Null Key Default Extra
-c1 decimal(65,30) YES NULL
+c1 decimal(65,38) YES NULL
SELECT * FROM t1;
c1
-1.100000000000000000000000000000
+1.10000000000000000000000000000000000000
DROP TABLE t1;
#
# Bug #45261 : Crash, stored procedure + decimal
diff --git a/storage/tokudb/mysql-test/tokudb/r/type_ranges.result b/storage/tokudb/mysql-test/tokudb/r/type_ranges.result
index bd8491336b3..1c9cd769a14 100644
--- a/storage/tokudb/mysql-test/tokudb/r/type_ranges.result
+++ b/storage/tokudb/mysql-test/tokudb/r/type_ranges.result
@@ -39,9 +39,6 @@ KEY (ulong),
KEY (ulonglong,ulong),
KEY (options,flags)
);
-Warnings:
-Warning 1101 BLOB/TEXT column 'mediumblob_col' can't have a default value
-Warning 1101 BLOB/TEXT column 'longblob_col' can't have a default value
show full fields from t1;
Field Type Collation Null Key Default Extra Privileges Comment
auto int(5) unsigned NULL NO PRI NULL auto_increment #
@@ -58,14 +55,14 @@ ushort smallint(5) unsigned zerofill NULL NO MUL 00000 #
umedium mediumint(8) unsigned NULL NO MUL 0 #
ulong int(11) unsigned NULL NO MUL 0 #
ulonglong bigint(13) unsigned NULL NO MUL 0 #
-time_stamp timestamp NULL NO CURRENT_TIMESTAMP on update CURRENT_TIMESTAMP #
+time_stamp timestamp NULL NO current_timestamp() on update current_timestamp() #
date_field date NULL YES NULL #
time_field time NULL YES NULL #
date_time datetime NULL YES NULL #
blob_col blob NULL YES NULL #
tinyblob_col tinyblob NULL YES NULL #
-mediumblob_col mediumblob NULL NO NULL #
-longblob_col longblob NULL NO NULL #
+mediumblob_col mediumblob NULL NO '' #
+longblob_col longblob NULL NO '' #
options enum('one','two','tree') latin1_swedish_ci NO MUL NULL #
flags set('one','two','tree') latin1_swedish_ci NO #
show keys from t1;
@@ -87,10 +84,10 @@ t1 1 options 2 flags A NA NULL NULL BTREE
CREATE UNIQUE INDEX test on t1 ( auto ) ;
CREATE INDEX test2 on t1 ( ulonglong,ulong) ;
Warnings:
-Note 1831 Duplicate index `test2`. This is deprecated and will be disallowed in a future release.
+Note 1831 Duplicate index `test2`. This is deprecated and will be disallowed in a future release
CREATE INDEX test3 on t1 ( medium ) ;
Warnings:
-Note 1831 Duplicate index `test3`. This is deprecated and will be disallowed in a future release.
+Note 1831 Duplicate index `test3`. This is deprecated and will be disallowed in a future release
DROP INDEX test ON t1;
insert into t1 values (10, 1,1,1,1,1,1,1,1,1,1,1,1,1,NULL,0,0,0,1,1,1,1,'one','one');
insert into t1 values (NULL,2,2,2,2,2,2,2,2,2,2,2,2,2,NULL,NULL,NULL,NULL,NULL,NULL,2,2,'two','two,one');
@@ -130,9 +127,6 @@ Warning 1264 Out of range value for column 'ushort' at row 1
Warning 1264 Out of range value for column 'umedium' at row 1
Warning 1265 Data truncated for column 'options' at row 1
insert into t1 (tiny) values (1);
-Warnings:
-Warning 1364 Field 'mediumblob_col' doesn't have a default value
-Warning 1364 Field 'longblob_col' doesn't have a default value
select auto,string,tiny,short,medium,long_int,longlong,real_float,real_double,utiny,ushort,umedium,ulong,ulonglong,mod(floor(time_stamp/1000000),1000000)-mod(curdate(),1000000),date_field,time_field,date_time,blob_col,tinyblob_col,mediumblob_col,longblob_col from t1;
auto string tiny short medium long_int longlong real_float real_double utiny ushort umedium ulong ulonglong mod(floor(time_stamp/1000000),1000000)-mod(curdate(),1000000) date_field time_field date_time blob_col tinyblob_col mediumblob_col longblob_col
10 1 1 1 1 1 1 1.0 1.0000 1 00001 1 1 1 0 0000-00-00 00:00:00 0000-00-00 00:00:00 1 1 1 1
@@ -233,13 +227,13 @@ ushort smallint(5) unsigned zerofill NULL NO 00000 #
umedium mediumint(8) unsigned NULL NO MUL 0 #
ulong int(11) unsigned NULL NO MUL 0 #
ulonglong bigint(13) unsigned NULL NO MUL 0 #
-time_stamp timestamp NULL NO CURRENT_TIMESTAMP on update CURRENT_TIMESTAMP #
+time_stamp timestamp NULL NO current_timestamp() on update current_timestamp() #
date_field char(10) latin1_swedish_ci YES NULL #
time_field time NULL YES NULL #
date_time datetime NULL YES NULL #
new_blob_col varchar(20) latin1_swedish_ci YES NULL #
tinyblob_col tinyblob NULL YES NULL #
-mediumblob_col mediumblob NULL NO NULL #
+mediumblob_col mediumblob NULL NO '' #
options enum('one','two','tree') latin1_swedish_ci NO MUL NULL #
flags set('one','two','tree') latin1_swedish_ci NO #
new_field char(10) latin1_swedish_ci NO new #
@@ -259,13 +253,13 @@ ushort smallint(5) unsigned zerofill NULL NO 00000 #
umedium mediumint(8) unsigned NULL NO 0 #
ulong int(11) unsigned NULL NO 0 #
ulonglong bigint(13) unsigned NULL NO 0 #
-time_stamp timestamp NULL NO CURRENT_TIMESTAMP on update CURRENT_TIMESTAMP #
+time_stamp timestamp NULL NO current_timestamp() on update current_timestamp() #
date_field char(10) latin1_swedish_ci YES NULL #
time_field time NULL YES NULL #
date_time datetime NULL YES NULL #
new_blob_col varchar(20) latin1_swedish_ci YES NULL #
tinyblob_col tinyblob NULL YES NULL #
-mediumblob_col mediumblob NULL NO NULL #
+mediumblob_col mediumblob NULL NO '' #
options enum('one','two','tree') latin1_swedish_ci NO NULL #
flags set('one','two','tree') latin1_swedish_ci NO #
new_field char(10) latin1_swedish_ci NO new #
@@ -312,7 +306,7 @@ const int(1) NULL NO NULL #
drop table t1,t2,t3;
create table t1 ( myfield INT NOT NULL, UNIQUE INDEX (myfield), unique (myfield), index(myfield));
Warnings:
-Note 1831 Duplicate index `myfield_2`. This is deprecated and will be disallowed in a future release.
+Note 1831 Duplicate index `myfield_2`. This is deprecated and will be disallowed in a future release
drop table t1;
create table t1 ( id integer unsigned not null primary key );
create table t2 ( id integer unsigned not null primary key );
diff --git a/storage/tokudb/mysql-test/tokudb/r/type_timestamp.result b/storage/tokudb/mysql-test/tokudb/r/type_timestamp.result
index c412620173c..c19bf85e2b1 100644
--- a/storage/tokudb/mysql-test/tokudb/r/type_timestamp.result
+++ b/storage/tokudb/mysql-test/tokudb/r/type_timestamp.result
@@ -216,13 +216,13 @@ t1 t2 t3
show create table t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `t1` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP,
+ `t1` timestamp NOT NULL DEFAULT current_timestamp(),
`t2` datetime DEFAULT NULL,
`t3` timestamp NOT NULL DEFAULT '0000-00-00 00:00:00'
) ENGINE=TokuDB DEFAULT CHARSET=latin1
show columns from t1;
Field Type Null Key Default Extra
-t1 timestamp NO CURRENT_TIMESTAMP
+t1 timestamp NO current_timestamp()
t2 datetime YES NULL
t3 timestamp NO 0000-00-00 00:00:00
drop table t1;
@@ -243,12 +243,12 @@ t1 t2
show create table t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `t1` timestamp NOT NULL DEFAULT '2003-01-01 00:00:00' ON UPDATE CURRENT_TIMESTAMP,
+ `t1` timestamp NOT NULL DEFAULT '2003-01-01 00:00:00' ON UPDATE current_timestamp(),
`t2` datetime DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
show columns from t1;
Field Type Null Key Default Extra
-t1 timestamp NO 2003-01-01 00:00:00 on update CURRENT_TIMESTAMP
+t1 timestamp NO 2003-01-01 00:00:00 on update current_timestamp()
t2 datetime YES NULL
drop table t1;
create table t1 (t1 timestamp NOT NULL DEFAULT now() on update now(), t2 datetime);
@@ -268,12 +268,12 @@ t1 t2
show create table t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `t1` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
+ `t1` timestamp NOT NULL DEFAULT current_timestamp() ON UPDATE current_timestamp(),
`t2` datetime DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
show columns from t1;
Field Type Null Key Default Extra
-t1 timestamp NO CURRENT_TIMESTAMP on update CURRENT_TIMESTAMP
+t1 timestamp NO current_timestamp() on update current_timestamp()
t2 datetime YES NULL
drop table t1;
create table t1 (t1 timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP, t2 datetime, t3 timestamp NOT NULL DEFAULT '0000-00-00 00:00:00');
@@ -293,13 +293,13 @@ t1 t2 t3
show create table t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `t1` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
+ `t1` timestamp NOT NULL DEFAULT current_timestamp() ON UPDATE current_timestamp(),
`t2` datetime DEFAULT NULL,
`t3` timestamp NOT NULL DEFAULT '0000-00-00 00:00:00'
) ENGINE=TokuDB DEFAULT CHARSET=latin1
show columns from t1;
Field Type Null Key Default Extra
-t1 timestamp NO CURRENT_TIMESTAMP on update CURRENT_TIMESTAMP
+t1 timestamp NO current_timestamp() on update current_timestamp()
t2 datetime YES NULL
t3 timestamp NO 0000-00-00 00:00:00
drop table t1;
@@ -320,12 +320,12 @@ t1 t2
show create table t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `t1` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
+ `t1` timestamp NOT NULL DEFAULT current_timestamp() ON UPDATE current_timestamp(),
`t2` datetime DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
show columns from t1;
Field Type Null Key Default Extra
-t1 timestamp NO CURRENT_TIMESTAMP on update CURRENT_TIMESTAMP
+t1 timestamp NO current_timestamp() on update current_timestamp()
t2 datetime YES NULL
truncate table t1;
insert into t1 values ('2004-04-01 00:00:00', '2004-04-01 00:00:00');
@@ -390,7 +390,7 @@ create table t1 (a timestamp null default current_timestamp on update current_ti
show create table t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `a` timestamp NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
+ `a` timestamp NULL DEFAULT current_timestamp() ON UPDATE current_timestamp(),
`b` timestamp NULL DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
insert into t1 values (NULL, NULL);
diff --git a/storage/tokudb/mysql-test/tokudb/r/type_timestamp_explicit.result b/storage/tokudb/mysql-test/tokudb/r/type_timestamp_explicit.result
index 4fdd04f7bda..290daee2ff1 100644
--- a/storage/tokudb/mysql-test/tokudb/r/type_timestamp_explicit.result
+++ b/storage/tokudb/mysql-test/tokudb/r/type_timestamp_explicit.result
@@ -648,20 +648,20 @@ SET @org_mode=@@sql_mode;
#Table creation in strict mode
SET @@sql_mode='NO_ZERO_DATE,STRICT_ALL_TABLES';
Warnings:
-Warning 1681 'NO_ZERO_DATE' is deprecated and will be removed in a future release.
+Warning 1681 'NO_ZERO_DATE' is deprecated and will be removed in a future release
CREATE TABLE t1 (c1 TIMESTAMP DEFAULT 0);
ERROR 42000: Invalid default value for 'c1'
CREATE TABLE t1 (c1 TIMESTAMP DEFAULT '0000-00-00 00:00:00');
ERROR 42000: Invalid default value for 'c1'
SET @@sql_mode='NO_ZERO_IN_DATE,STRICT_ALL_TABLES';
Warnings:
-Warning 1681 'NO_ZERO_IN_DATE' is deprecated and will be removed in a future release.
+Warning 1681 'NO_ZERO_IN_DATE' is deprecated and will be removed in a future release
CREATE TABLE t1 (c1 TIMESTAMP DEFAULT '2012-02-00 12:12:12');
ERROR 42000: Invalid default value for 'c1'
#Table creation in non-strict mode but with NO_ZERO_DATE/NO_ZERO_IN_DATE
SET @@sql_mode='NO_ZERO_DATE';
Warnings:
-Warning 1681 'NO_ZERO_DATE' is deprecated and will be removed in a future release.
+Warning 1681 'NO_ZERO_DATE' is deprecated and will be removed in a future release
CREATE TABLE t1 (c1 TIMESTAMP DEFAULT 0);
Warnings:
Warning 1264 Out of range value for column 'c1' at row 1
@@ -670,7 +670,7 @@ Warnings:
Warning 1264 Out of range value for column 'c1' at row 1
SET @@sql_mode='NO_ZERO_IN_DATE';
Warnings:
-Warning 1681 'NO_ZERO_IN_DATE' is deprecated and will be removed in a future release.
+Warning 1681 'NO_ZERO_IN_DATE' is deprecated and will be removed in a future release
CREATE TABLE t3 (c1 TIMESTAMP DEFAULT '2012-02-00 12:12:12');
Warnings:
Warning 1264 Out of range value for column 'c1' at row 1
@@ -687,20 +687,20 @@ CREATE TABLE t1 (c1 INT);
#Alter table in strict mode with NO_ZERO_DATE/NO_ZERO_IN_DATE
SET @@sql_mode='NO_ZERO_DATE,STRICT_ALL_TABLES';
Warnings:
-Warning 1681 'NO_ZERO_DATE' is deprecated and will be removed in a future release.
+Warning 1681 'NO_ZERO_DATE' is deprecated and will be removed in a future release
ALTER TABLE t1 ADD c2 TIMESTAMP DEFAULT 0;
ERROR 42000: Invalid default value for 'c2'
ALTER TABLE t1 ADD c2 TIMESTAMP DEFAULT '0000-00-00';
ERROR 42000: Invalid default value for 'c2'
SET @@sql_mode='NO_ZERO_IN_DATE,STRICT_ALL_TABLES';
Warnings:
-Warning 1681 'NO_ZERO_IN_DATE' is deprecated and will be removed in a future release.
+Warning 1681 'NO_ZERO_IN_DATE' is deprecated and will be removed in a future release
ALTER TABLE t1 ADD c2 TIMESTAMP DEFAULT '2012-02-00';
ERROR 42000: Invalid default value for 'c2'
#Alter table in non-strict mode but with NO_ZERO_DATE/NO_ZERO_IN_DATE
SET @@sql_mode='NO_ZERO_DATE';
Warnings:
-Warning 1681 'NO_ZERO_DATE' is deprecated and will be removed in a future release.
+Warning 1681 'NO_ZERO_DATE' is deprecated and will be removed in a future release
ALTER TABLE t1 ADD c2 TIMESTAMP DEFAULT 0;
Warnings:
Warning 1264 Out of range value for column 'c2' at row 1
@@ -710,7 +710,7 @@ Warning 1264 Out of range value for column 'c2' at row 1
Warning 1264 Out of range value for column 'c3' at row 1
SET @@sql_mode='NO_ZERO_IN_DATE';
Warnings:
-Warning 1681 'NO_ZERO_IN_DATE' is deprecated and will be removed in a future release.
+Warning 1681 'NO_ZERO_IN_DATE' is deprecated and will be removed in a future release
ALTER TABLE t1 ADD c4 TIMESTAMP DEFAULT '2012-02-00';
Warnings:
Warning 1264 Out of range value for column 'c4' at row 1
diff --git a/storage/tokudb/mysql-test/tokudb/r/type_varchar.result b/storage/tokudb/mysql-test/tokudb/r/type_varchar.result
index 8a8dde74a3b..bf98e12ce16 100644
--- a/storage/tokudb/mysql-test/tokudb/r/type_varchar.result
+++ b/storage/tokudb/mysql-test/tokudb/r/type_varchar.result
@@ -8,7 +8,7 @@ t1 CREATE TABLE `t1` (
`v` varchar(30) DEFAULT NULL,
`c` char(3) DEFAULT NULL,
`e` enum('abc','def','ghi') DEFAULT NULL,
- `t` text
+ `t` text DEFAULT NULL
) ENGINE=ENGINE DEFAULT CHARSET=latin1
show create table vchar;
Table Create Table
@@ -16,7 +16,7 @@ vchar CREATE TABLE `vchar` (
`v` varchar(30) DEFAULT NULL,
`c` char(3) DEFAULT NULL,
`e` enum('abc','def','ghi') DEFAULT NULL,
- `t` text
+ `t` text DEFAULT NULL
) ENGINE=ENGINE DEFAULT CHARSET=latin1
insert into t1 values ('abc', 'de', 'ghi', 'jkl');
insert into t1 values ('abc ', 'de ', 'ghi', 'jkl ');
@@ -41,7 +41,7 @@ vchar CREATE TABLE `vchar` (
`v` varchar(30) DEFAULT NULL,
`c` char(3) DEFAULT NULL,
`e` enum('abc','def','ghi') DEFAULT NULL,
- `t` text,
+ `t` text DEFAULT NULL,
`i` int(11) DEFAULT NULL
) ENGINE=ENGINE DEFAULT CHARSET=latin1
select length(v),length(c),length(e),length(t) from vchar;
@@ -412,7 +412,7 @@ alter table t1 modify f1 tinytext;
show create table t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f1` tinytext,
+ `f1` tinytext DEFAULT NULL,
KEY `index1` (`f1`(10))
) ENGINE=ENGINE DEFAULT CHARSET=latin1
drop table t1;
diff --git a/storage/tokudb/mysql-test/tokudb/t/change_column_all.py b/storage/tokudb/mysql-test/tokudb/t/change_column_all.py
index 04e7d143696..75cffb88dff 100644
--- a/storage/tokudb/mysql-test/tokudb/t/change_column_all.py
+++ b/storage/tokudb/mysql-test/tokudb/t/change_column_all.py
@@ -168,6 +168,7 @@ def header():
print "# generated from change_column_all.py"
print "# test random column change on wide tables"
print "source include/have_tokudb.inc;"
+ print "--source include/big_test.inc"
print "--disable_warnings"
print "DROP TABLE IF EXISTS t, ti;"
print "--enable_warnings"
diff --git a/storage/tokudb/mysql-test/tokudb/t/cluster_2968-0.test b/storage/tokudb/mysql-test/tokudb/t/cluster_2968-0.test
index eaac82d260d..ee6ac5dd5d1 100644
--- a/storage/tokudb/mysql-test/tokudb/t/cluster_2968-0.test
+++ b/storage/tokudb/mysql-test/tokudb/t/cluster_2968-0.test
@@ -1,4 +1,5 @@
source include/have_tokudb.inc;
+source include/default_optimizer_switch.inc;
# test that the query planner picks clustering keys for joins
# create table s
diff --git a/storage/tokudb/mysql-test/tokudb/t/cluster_2968-1.test b/storage/tokudb/mysql-test/tokudb/t/cluster_2968-1.test
index 7ed2fc2cd7d..58b24bc93c8 100644
--- a/storage/tokudb/mysql-test/tokudb/t/cluster_2968-1.test
+++ b/storage/tokudb/mysql-test/tokudb/t/cluster_2968-1.test
@@ -1,4 +1,5 @@
source include/have_tokudb.inc;
+source include/default_optimizer_switch.inc;
# test that the query planner picks clustering keys for joins
# create table s
diff --git a/storage/tokudb/mysql-test/tokudb/t/cluster_2968-2.test b/storage/tokudb/mysql-test/tokudb/t/cluster_2968-2.test
index 5b648a5325c..f24bfc08b3f 100644
--- a/storage/tokudb/mysql-test/tokudb/t/cluster_2968-2.test
+++ b/storage/tokudb/mysql-test/tokudb/t/cluster_2968-2.test
@@ -1,4 +1,5 @@
source include/have_tokudb.inc;
+source include/default_optimizer_switch.inc;
# test that the query planner picks clustering keys for joins
# create table s
diff --git a/storage/tokudb/mysql-test/tokudb/t/cluster_2968-3.test b/storage/tokudb/mysql-test/tokudb/t/cluster_2968-3.test
index afbfeed2799..14c9de53236 100644
--- a/storage/tokudb/mysql-test/tokudb/t/cluster_2968-3.test
+++ b/storage/tokudb/mysql-test/tokudb/t/cluster_2968-3.test
@@ -1,4 +1,6 @@
source include/have_tokudb.inc;
+source include/default_optimizer_switch.inc;
+
# test that the query planner picks clustering keys for 3 table joins
# create table s
diff --git a/storage/tokudb/mysql-test/tokudb/t/ext_key_1_innodb.test b/storage/tokudb/mysql-test/tokudb/t/ext_key_1_innodb.test
index 488ccc422e9..766d0c107db 100644
--- a/storage/tokudb/mysql-test/tokudb/t/ext_key_1_innodb.test
+++ b/storage/tokudb/mysql-test/tokudb/t/ext_key_1_innodb.test
@@ -5,8 +5,6 @@ disable_warnings;
drop table if exists t;
enable_warnings;
-select @@optimizer_switch;
-
create table t (id int not null, x int not null, y int not null, primary key(id), key(x)) engine=innodb;
insert into t values (0,0,0),(1,1,1),(2,2,2),(3,2,3),(4,2,4);
diff --git a/storage/tokudb/mysql-test/tokudb/t/ext_key_1_tokudb.test b/storage/tokudb/mysql-test/tokudb/t/ext_key_1_tokudb.test
index 14a72a460e4..254207ec9a1 100644
--- a/storage/tokudb/mysql-test/tokudb/t/ext_key_1_tokudb.test
+++ b/storage/tokudb/mysql-test/tokudb/t/ext_key_1_tokudb.test
@@ -4,8 +4,6 @@ disable_warnings;
drop table if exists t;
enable_warnings;
-select @@optimizer_switch;
-
create table t (id int not null, x int not null, y int not null, primary key(id), key(x)) engine=tokudb;
insert into t values (0,0,0),(1,1,1),(2,2,2),(3,2,3),(4,2,4);
diff --git a/storage/tokudb/mysql-test/tokudb/t/ext_key_2_innodb.test b/storage/tokudb/mysql-test/tokudb/t/ext_key_2_innodb.test
index bfb56faf48e..92c1c90861e 100644
--- a/storage/tokudb/mysql-test/tokudb/t/ext_key_2_innodb.test
+++ b/storage/tokudb/mysql-test/tokudb/t/ext_key_2_innodb.test
@@ -5,8 +5,6 @@ disable_warnings;
drop table if exists t;
enable_warnings;
-select @@optimizer_switch;
-
create table t (a int not null, b int not null, c int not null, d int not null, primary key(a,b), key(c,a)) engine=innodb;
insert into t values (0,0,0,0),(0,1,0,1);
diff --git a/storage/tokudb/mysql-test/tokudb/t/ext_key_2_tokudb.test b/storage/tokudb/mysql-test/tokudb/t/ext_key_2_tokudb.test
index a088a16c0fd..bb4b97bf011 100644
--- a/storage/tokudb/mysql-test/tokudb/t/ext_key_2_tokudb.test
+++ b/storage/tokudb/mysql-test/tokudb/t/ext_key_2_tokudb.test
@@ -4,8 +4,6 @@ disable_warnings;
drop table if exists t;
enable_warnings;
-select @@optimizer_switch;
-
create table t (a int not null, b int not null, c int not null, d int not null, primary key(a,b), key(c,a)) engine=tokudb;
insert into t values (0,0,0,0),(0,1,0,1);
diff --git a/storage/tokudb/mysql-test/tokudb/t/type_blob.test b/storage/tokudb/mysql-test/tokudb/t/type_blob.test
index 4ea452ab732..6a429c46a55 100644
--- a/storage/tokudb/mysql-test/tokudb/t/type_blob.test
+++ b/storage/tokudb/mysql-test/tokudb/t/type_blob.test
@@ -37,9 +37,7 @@ drop table t1,t2,t3,t4;
CREATE TABLE t1 (a char(257) default "hello");
--error 1074
CREATE TABLE t2 (a char(256));
---error 1074
CREATE TABLE t1 (a varchar(70000) default "hello");
---error 1101
CREATE TABLE t2 (a blob default "hello");
# Safety to be able to continue with other tests if above fails
@@ -439,8 +437,8 @@ insert into t1 values (default);
select * from t1;
drop table t1;
set @@sql_mode='TRADITIONAL';
---error ER_BLOB_CANT_HAVE_DEFAULT
create table t1 (a text default '');
+drop table t1;
set @@sql_mode='';
#
@@ -522,7 +520,7 @@ CREATE TABLE b15776 (a char(2147483648));
--error ER_TOO_BIG_FIELDLENGTH
CREATE TABLE b15776 (a char(4294967295));
# Even BLOB won't hold
---error ER_TOO_BIG_DISPLAYWIDTH
+--error ER_TOO_BIG_FIELDLENGTH
CREATE TABLE b15776 (a char(4294967296));
@@ -534,8 +532,6 @@ CREATE TABLE b15776 (a year(4294967295));
INSERT INTO b15776 VALUES (42);
SELECT * FROM b15776;
DROP TABLE b15776;
---error ER_TOO_BIG_DISPLAYWIDTH
-CREATE TABLE b15776 (a year(4294967296));
CREATE TABLE b15776 (a year(0)); # 0 is special case, means default size
DROP TABLE b15776;
--error ER_PARSE_ERROR
@@ -543,13 +539,11 @@ CREATE TABLE b15776 (a year(-2));
# We've already tested the case, but this should visually show that
-# widths that are too large to be interpreted cause DISPLAYWIDTH errors.
+# widths that are too large to be interpreted cause errors.
--error ER_TOO_BIG_DISPLAYWIDTH
CREATE TABLE b15776 (a int(999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999));
---error ER_TOO_BIG_DISPLAYWIDTH
+--error ER_TOO_BIG_FIELDLENGTH
CREATE TABLE b15776 (a char(999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999));
---error ER_TOO_BIG_DISPLAYWIDTH
-CREATE TABLE b15776 (a year(999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999));
## Do not select, too much memory needed.
CREATE TABLE b15776 select cast(null as char(4294967295));
diff --git a/storage/tokudb/mysql-test/tokudb/t/type_newdecimal.test b/storage/tokudb/mysql-test/tokudb/t/type_newdecimal.test
index 6047993e1ad..17916288e85 100644
--- a/storage/tokudb/mysql-test/tokudb/t/type_newdecimal.test
+++ b/storage/tokudb/mysql-test/tokudb/t/type_newdecimal.test
@@ -611,14 +611,14 @@ select round(99999999999999999.999,3);
select round(-99999999999999999.999,3);
#-- should return -100000000000000000.000
#
-select truncate(99999999999999999999999999999999999999,31);
+select truncate(99999999999999999999999999999999999999,49);
#-- should return 99999999999999999999999999999999999999.000
#
-select truncate(99.999999999999999999999999999999999999,31);
+select truncate(99.999999999999999999999999999999999999,49);
#-- should return 99.9999999999999999999999999999999
#
select truncate(99999999999999999999999999999999999999,-31);
-# should return 90000000000000000000000000000000
+# should return 99999990000000000000000000000000000000
#
#-- 6. Set functions (AVG, SUM, COUNT) should work.
#
@@ -965,9 +965,11 @@ select cast(ln(14000) as decimal(2,3)) c1;
--error 1426
create table t1 (sl decimal(70,30));
--error 1425
-create table t1 (sl decimal(32,31));
+create table t1 (sl decimal(32,39));
+--error 1426
+create table t1 (sl decimal(67,38));
--error 1425
-create table t1 (sl decimal(0,38));
+create table t1 (sl decimal(0,50));
--error 1427
create table t1 (sl decimal(0,30));
create table t1 (sl decimal(5, 5));
@@ -975,7 +977,7 @@ replace_regex /ENGINE=[a-zA-Z]*/ENGINE=ENGINE/;
show create table t1;
drop table t1;
# Test limits
-create table t1 (sl decimal(65, 30));
+create table t1 (sl decimal(65, 38));
replace_regex /ENGINE=[a-zA-Z]*/ENGINE=ENGINE/;
show create table t1;
drop table t1;
@@ -1190,15 +1192,15 @@ SELECT CAST(1 AS decimal(65,10));
--error ER_TOO_BIG_PRECISION
SELECT CAST(1 AS decimal(66,10));
-SELECT CAST(1 AS decimal(65,30));
+SELECT CAST(1 AS decimal(65,38));
--error ER_TOO_BIG_SCALE
-SELECT CAST(1 AS decimal(65,31));
+SELECT CAST(1 AS decimal(65,39));
CREATE TABLE t1 (a int DEFAULT NULL, b int DEFAULT NULL);
INSERT INTO t1 VALUES (3,30), (1,10), (2,10);
SELECT a+CAST(1 AS decimal(65,30)) AS aa, SUM(b) FROM t1 GROUP BY aa;
--error ER_TOO_BIG_SCALE
-SELECT a+CAST(1 AS decimal(65,31)) AS aa, SUM(b) FROM t1 GROUP BY aa;
+SELECT a+CAST(1 AS decimal(65,49)) AS aa, SUM(b) FROM t1 GROUP BY aa;
DROP TABLE t1;
@@ -1223,7 +1225,7 @@ DROP TABLE t1;
# maxmimum precision of 30 places after the decimal point. Show that
# temp field creation beyond that works and throws a truncation warning.
# DECIMAL(37,36) should be adjusted to DECIMAL(31,30).
-CREATE TABLE t1 SELECT 0.123456789012345678901234567890123456 AS f1;
+CREATE TABLE t1 SELECT 0.1234567890123456789012345678901234567890123456789 AS f1;
DESC t1;
SELECT f1 FROM t1;
DROP TABLE t1;
@@ -1432,7 +1434,7 @@ DROP TABLE t1,t2;
CREATE TABLE t1 (a DECIMAL(30,30));
INSERT INTO t1 VALUES (0.1),(0.2),(0.3);
-CREATE TABLE t2 SELECT IFNULL(a + 0.0000000000000000000000000000001, NULL) AS c1 FROM t1;
+CREATE TABLE t2 SELECT IFNULL(a + 0.00000000000000000000000000000000000000000000000001, NULL) AS c1 FROM t1;
DESC t2;
DROP TABLE t1,t2;
diff --git a/storage/tokudb/mysql-test/tokudb/t/type_varchar.opt b/storage/tokudb/mysql-test/tokudb/t/type_varchar.opt
new file mode 100644
index 00000000000..35db081c15b
--- /dev/null
+++ b/storage/tokudb/mysql-test/tokudb/t/type_varchar.opt
@@ -0,0 +1 @@
+--character-set-server=latin1 --collation_server=latin1_swedish_ci
diff --git a/storage/tokudb/mysql-test/tokudb/t/type_varchar.test b/storage/tokudb/mysql-test/tokudb/t/type_varchar.test
index 5becbf138a3..7db848c959a 100644
--- a/storage/tokudb/mysql-test/tokudb/t/type_varchar.test
+++ b/storage/tokudb/mysql-test/tokudb/t/type_varchar.test
@@ -1,4 +1,5 @@
source include/have_tokudb.inc;
+source include/default_charset.inc;
set default_storage_engine='tokudb';
--disable_warnings
drop table if exists t1, t2;
diff --git a/storage/tokudb/mysql-test/tokudb_alter_table/r/ai_part.result b/storage/tokudb/mysql-test/tokudb_alter_table/r/ai_part.result
index 6bfe78bbef8..29d2be47f1c 100644
--- a/storage/tokudb/mysql-test/tokudb_alter_table/r/ai_part.result
+++ b/storage/tokudb/mysql-test/tokudb_alter_table/r/ai_part.result
@@ -10,8 +10,8 @@ foo CREATE TABLE `foo` (
`b` int(11) DEFAULT NULL,
PRIMARY KEY (`a`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY HASH (a)
-PARTITIONS 2 */
+ PARTITION BY HASH (`a`)
+PARTITIONS 2
ALTER TABLE foo ADD KEY(b);
SHOW CREATE TABLE foo;
Table Create Table
@@ -21,6 +21,6 @@ foo CREATE TABLE `foo` (
PRIMARY KEY (`a`),
KEY `b` (`b`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY HASH (a)
-PARTITIONS 2 */
+ PARTITION BY HASH (`a`)
+PARTITIONS 2
DROP TABLE foo;
diff --git a/storage/tokudb/mysql-test/tokudb_alter_table/r/alter_column_default.result b/storage/tokudb/mysql-test/tokudb_alter_table/r/alter_column_default.result
index 2c1390ad2a4..4c63047444d 100644
--- a/storage/tokudb/mysql-test/tokudb_alter_table/r/alter_column_default.result
+++ b/storage/tokudb/mysql-test/tokudb_alter_table/r/alter_column_default.result
@@ -5,14 +5,14 @@ CREATE TABLE foo (a INT NOT NULL DEFAULT 0, b INT DEFAULT NULL);
SHOW CREATE TABLE foo;
Table Create Table
foo CREATE TABLE `foo` (
- `a` int(11) NOT NULL DEFAULT '0',
+ `a` int(11) NOT NULL DEFAULT 0,
`b` int(11) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
ALTER TABLE foo ALTER COLUMN a SET DEFAULT 100;
SHOW CREATE TABLE foo;
Table Create Table
foo CREATE TABLE `foo` (
- `a` int(11) NOT NULL DEFAULT '100',
+ `a` int(11) NOT NULL DEFAULT 100,
`b` int(11) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
ALTER TABLE foo ALTER COLUMN a DROP DEFAULT;
@@ -27,7 +27,7 @@ SHOW CREATE TABLE foo;
Table Create Table
foo CREATE TABLE `foo` (
`a` int(11) NOT NULL,
- `b` int(11) DEFAULT '42'
+ `b` int(11) DEFAULT 42
) ENGINE=TokuDB DEFAULT CHARSET=latin1
ALTER TABLE foo ALTER COLUMN b DROP DEFAULT;
SHOW CREATE TABLE foo;
diff --git a/storage/tokudb/mysql-test/tokudb_alter_table/r/frm_discover_partition.result b/storage/tokudb/mysql-test/tokudb_alter_table/r/frm_discover_partition.result
index 4e49d3642d2..8087757395d 100644
--- a/storage/tokudb/mysql-test/tokudb_alter_table/r/frm_discover_partition.result
+++ b/storage/tokudb/mysql-test/tokudb_alter_table/r/frm_discover_partition.result
@@ -7,8 +7,8 @@ foo CREATE TABLE `foo` (
`id` int(11) NOT NULL,
PRIMARY KEY (`id`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY HASH (id)
-PARTITIONS 2 */
+ PARTITION BY HASH (id)
+PARTITIONS 2
CREATE TABLE bar LIKE foo;
SHOW CREATE TABLE bar;
Table Create Table
@@ -16,8 +16,8 @@ bar CREATE TABLE `bar` (
`id` int(11) NOT NULL,
PRIMARY KEY (`id`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY HASH (id)
-PARTITIONS 2 */
+ PARTITION BY HASH (id)
+PARTITIONS 2
ALTER TABLE foo ADD COLUMN a INT;
SHOW CREATE TABLE foo;
Table Create Table
@@ -26,8 +26,8 @@ foo CREATE TABLE `foo` (
`a` int(11) DEFAULT NULL,
PRIMARY KEY (`id`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY HASH (id)
-PARTITIONS 2 */
+ PARTITION BY HASH (id)
+PARTITIONS 2
FLUSH TABLES;
SHOW CREATE TABLE foo;
Table Create Table
@@ -36,6 +36,6 @@ foo CREATE TABLE `foo` (
`a` int(11) DEFAULT NULL,
PRIMARY KEY (`id`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY HASH (id)
-PARTITIONS 2 */
+ PARTITION BY HASH (id)
+PARTITIONS 2
DROP TABLE foo, bar;
diff --git a/storage/tokudb/mysql-test/tokudb_alter_table/r/hcad_part.result b/storage/tokudb/mysql-test/tokudb_alter_table/r/hcad_part.result
index 1c7ee865451..99f87aec225 100644
--- a/storage/tokudb/mysql-test/tokudb_alter_table/r/hcad_part.result
+++ b/storage/tokudb/mysql-test/tokudb_alter_table/r/hcad_part.result
@@ -9,8 +9,8 @@ foo CREATE TABLE `foo` (
`b` int(11) DEFAULT NULL,
PRIMARY KEY (`a`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY HASH (a)
-PARTITIONS 2 */
+ PARTITION BY HASH (`a`)
+PARTITIONS 2
ALTER TABLE foo ADD COLUMN c INT;
SHOW CREATE TABLE foo;
Table Create Table
@@ -20,6 +20,6 @@ foo CREATE TABLE `foo` (
`c` int(11) DEFAULT NULL,
PRIMARY KEY (`a`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY HASH (a)
-PARTITIONS 2 */
+ PARTITION BY HASH (`a`)
+PARTITIONS 2
DROP TABLE foo;
diff --git a/storage/tokudb/mysql-test/tokudb_alter_table/r/hcad_tmp_tables_56.result b/storage/tokudb/mysql-test/tokudb_alter_table/r/hcad_tmp_tables_56.result
index ff03e9c81ab..932a6171781 100644
--- a/storage/tokudb/mysql-test/tokudb_alter_table/r/hcad_tmp_tables_56.result
+++ b/storage/tokudb/mysql-test/tokudb_alter_table/r/hcad_tmp_tables_56.result
@@ -5,7 +5,7 @@ create temporary table bar (a int, key(a))engine=TOkuDB;
alter table bar add column c int default 0;
create index blah on bar(a);
Warnings:
-Note 1831 Duplicate index `blah`. This is deprecated and will be disallowed in a future release.
+Note 1831 Duplicate index `blah`. This is deprecated and will be disallowed in a future release
drop index a on bar;
set session tokudb_disable_slow_alter=OFF;
insert into bar (a) values (1),(2),(3);
diff --git a/storage/tokudb/mysql-test/tokudb_alter_table/r/hcad_with_dels.result b/storage/tokudb/mysql-test/tokudb_alter_table/r/hcad_with_dels.result
index 4d89ee445b0..8bbee9405b8 100644
--- a/storage/tokudb/mysql-test/tokudb_alter_table/r/hcad_with_dels.result
+++ b/storage/tokudb/mysql-test/tokudb_alter_table/r/hcad_with_dels.result
@@ -2,7 +2,9 @@ SET DEFAULT_STORAGE_ENGINE = 'tokudb';
set session transaction isolation level repeatable read;
set session tokudb_disable_slow_alter=ON;
# Establish connection conn1 (user = root)
+connect conn1,localhost,root,,;
DROP TABLE IF EXISTS foo,bar;
+connection conn1;
set session transaction isolation level repeatable read;
create table foo (a int, b int) engine=TokuDB;
create table bar (a int) engine=TokuDB;
@@ -10,11 +12,16 @@ insert into foo values (1,10),(2,20),(3,30);
begin;
select * from bar;
a
+connection default;
delete from foo;
select * from foo;
a b
alter table foo add column c int first;
alter table foo drop column b;
+connection conn1;
commit;
+connection default;
+disconnect conn1;
+connection default;
set session transaction isolation level serializable;
DROP TABLE foo,bar;
diff --git a/storage/tokudb/mysql-test/tokudb_alter_table/r/hcad_with_lock_sps.result b/storage/tokudb/mysql-test/tokudb_alter_table/r/hcad_with_lock_sps.result
index 26601df5f93..88f28362119 100644
--- a/storage/tokudb/mysql-test/tokudb_alter_table/r/hcad_with_lock_sps.result
+++ b/storage/tokudb/mysql-test/tokudb_alter_table/r/hcad_with_lock_sps.result
@@ -120,7 +120,7 @@ create function f0() returns int
begin
alter table foo add column ggg int;
end|
-ERROR HY000: Explicit or implicit commit is not allowed in stored function or trigger.
+ERROR HY000: Explicit or implicit commit is not allowed in stored function or trigger
set autocommit=0;
create procedure p1()
begin
diff --git a/storage/tokudb/mysql-test/tokudb_alter_table/r/hcr.result b/storage/tokudb/mysql-test/tokudb_alter_table/r/hcr.result
index f4fc1b67bb6..f850e28081e 100644
--- a/storage/tokudb/mysql-test/tokudb_alter_table/r/hcr.result
+++ b/storage/tokudb/mysql-test/tokudb_alter_table/r/hcr.result
@@ -31,7 +31,7 @@ Table Create Table
foo CREATE TABLE `foo` (
`a` int(11) DEFAULT NULL,
`b` varchar(10) DEFAULT NULL,
- `c` blob,
+ `c` blob DEFAULT NULL,
KEY `b` (`b`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
select * from foo;
diff --git a/storage/tokudb/mysql-test/tokudb_alter_table/r/hcr3.result b/storage/tokudb/mysql-test/tokudb_alter_table/r/hcr3.result
index 4c700489b7f..b7aefaa4b8d 100644
--- a/storage/tokudb/mysql-test/tokudb_alter_table/r/hcr3.result
+++ b/storage/tokudb/mysql-test/tokudb_alter_table/r/hcr3.result
@@ -14,7 +14,7 @@ foo CREATE TABLE `foo` (
`b` bigint(20) DEFAULT NULL,
`c` char(10) DEFAULT NULL,
`d` varchar(10) DEFAULT NULL,
- `e` text,
+ `e` text DEFAULT NULL,
PRIMARY KEY (`aa`),
KEY `b` (`b`),
KEY `d` (`d`) `clustering`=yes
@@ -36,7 +36,7 @@ foo CREATE TABLE `foo` (
`bb` bigint(20) DEFAULT NULL,
`c` char(10) DEFAULT NULL,
`d` varchar(10) DEFAULT NULL,
- `e` text,
+ `e` text DEFAULT NULL,
PRIMARY KEY (`aa`),
KEY `b` (`bb`),
KEY `d` (`d`) `clustering`=yes
@@ -55,7 +55,7 @@ foo CREATE TABLE `foo` (
`bb` bigint(20) DEFAULT NULL,
`c` char(10) DEFAULT NULL,
`dd` varchar(10) DEFAULT NULL,
- `e` text,
+ `e` text DEFAULT NULL,
PRIMARY KEY (`aa`),
KEY `b` (`bb`),
KEY `d` (`dd`) `clustering`=yes
diff --git a/storage/tokudb/mysql-test/tokudb_alter_table/r/mod_enum.result b/storage/tokudb/mysql-test/tokudb_alter_table/r/mod_enum.result
index 21bfb990ba9..4ab905cfa71 100644
--- a/storage/tokudb/mysql-test/tokudb_alter_table/r/mod_enum.result
+++ b/storage/tokudb/mysql-test/tokudb_alter_table/r/mod_enum.result
@@ -49,6 +49,7 @@ ALTER TABLE test_enum MODIFY COLUMN col2 ENUM('value1','value2','value4');
ERROR 42000: Table 'test_enum' uses an extension that doesn't exist in this XYZ version
set tokudb_disable_hot_alter=1;
set tokudb_disable_slow_alter=0;
+SET STATEMENT sql_mode = 'NO_ENGINE_SUBSTITUTION' FOR
ALTER TABLE test_enum MODIFY COLUMN col2 ENUM('value1','value2','value4');
Warnings:
Warning 1265 Data truncated for column 'col2' at row 3
diff --git a/storage/tokudb/mysql-test/tokudb_alter_table/r/null_bytes_add_key.result b/storage/tokudb/mysql-test/tokudb_alter_table/r/null_bytes_add_key.result
index 9e1504a9d89..82373ac318e 100644
--- a/storage/tokudb/mysql-test/tokudb_alter_table/r/null_bytes_add_key.result
+++ b/storage/tokudb/mysql-test/tokudb_alter_table/r/null_bytes_add_key.result
@@ -38,6 +38,7 @@ c31 date,
key(c31)) ENGINE=tokudb;
INSERT INTO t (c25) VALUES (NULL);
UPDATE t SET c27=0;
+SET STATEMENT sql_mode = 'NO_ENGINE_SUBSTITUTION' FOR
ALTER TABLE t ADD PRIMARY KEY(c19,c27)USING HASH;
Warnings:
Warning 1265 Data truncated for column 'c19' at row 1
diff --git a/storage/tokudb/mysql-test/tokudb_alter_table/r/null_bytes_col_rename.result b/storage/tokudb/mysql-test/tokudb_alter_table/r/null_bytes_col_rename.result
index 45a12fba8d2..95a869add10 100644
--- a/storage/tokudb/mysql-test/tokudb_alter_table/r/null_bytes_col_rename.result
+++ b/storage/tokudb/mysql-test/tokudb_alter_table/r/null_bytes_col_rename.result
@@ -38,6 +38,7 @@ c31 date,
key(c31)) ENGINE=tokudb;
INSERT INTO t (c25) VALUES (NULL);
UPDATE t SET c27=0;
+SET STATEMENT sql_mode = 'NO_ENGINE_SUBSTITUTION' FOR
ALTER TABLE t ADD PRIMARY KEY(c19,c27)USING HASH;
Warnings:
Warning 1265 Data truncated for column 'c19' at row 1
diff --git a/storage/tokudb/mysql-test/tokudb_alter_table/r/null_bytes_drop_default.result b/storage/tokudb/mysql-test/tokudb_alter_table/r/null_bytes_drop_default.result
index d5ff7e092fe..4cb323d0161 100644
--- a/storage/tokudb/mysql-test/tokudb_alter_table/r/null_bytes_drop_default.result
+++ b/storage/tokudb/mysql-test/tokudb_alter_table/r/null_bytes_drop_default.result
@@ -38,6 +38,7 @@ c31 date,
key(c31)) ENGINE=tokudb;
INSERT INTO t (c25) VALUES (NULL);
UPDATE t SET c27=0;
+SET STATEMENT sql_mode = 'NO_ENGINE_SUBSTITUTION' FOR
ALTER TABLE t ADD PRIMARY KEY(c19,c27)USING HASH;
Warnings:
Warning 1265 Data truncated for column 'c19' at row 1
diff --git a/storage/tokudb/mysql-test/tokudb_alter_table/r/null_bytes_drop_key.result b/storage/tokudb/mysql-test/tokudb_alter_table/r/null_bytes_drop_key.result
index 09b0deaf990..59ca690adc4 100644
--- a/storage/tokudb/mysql-test/tokudb_alter_table/r/null_bytes_drop_key.result
+++ b/storage/tokudb/mysql-test/tokudb_alter_table/r/null_bytes_drop_key.result
@@ -38,6 +38,7 @@ c31 date,
key(c31)) ENGINE=tokudb;
INSERT INTO t (c25) VALUES (NULL);
UPDATE t SET c27=0;
+SET STATEMENT sql_mode = 'NO_ENGINE_SUBSTITUTION' FOR
ALTER TABLE t ADD PRIMARY KEY(c19,c27)USING HASH;
Warnings:
Warning 1265 Data truncated for column 'c19' at row 1
diff --git a/storage/tokudb/mysql-test/tokudb_alter_table/t/mod_enum.test b/storage/tokudb/mysql-test/tokudb_alter_table/t/mod_enum.test
index 4efa3afd96e..5166c6cc10e 100644
--- a/storage/tokudb/mysql-test/tokudb_alter_table/t/mod_enum.test
+++ b/storage/tokudb/mysql-test/tokudb_alter_table/t/mod_enum.test
@@ -50,6 +50,7 @@ ALTER TABLE test_enum MODIFY COLUMN col2 ENUM('value1','value2','value4');
set tokudb_disable_hot_alter=1;
set tokudb_disable_slow_alter=0;
+SET STATEMENT sql_mode = 'NO_ENGINE_SUBSTITUTION' FOR
ALTER TABLE test_enum MODIFY COLUMN col2 ENUM('value1','value2','value4');
SELECT * FROM test_enum;
diff --git a/storage/tokudb/mysql-test/tokudb_alter_table/t/null_bytes_add_key.test b/storage/tokudb/mysql-test/tokudb_alter_table/t/null_bytes_add_key.test
index 70e2a07e4cc..28870a86bdc 100644
--- a/storage/tokudb/mysql-test/tokudb_alter_table/t/null_bytes_add_key.test
+++ b/storage/tokudb/mysql-test/tokudb_alter_table/t/null_bytes_add_key.test
@@ -47,6 +47,7 @@ c31 date,
key(c31)) ENGINE=tokudb;
INSERT INTO t (c25) VALUES (NULL);
UPDATE t SET c27=0;
+SET STATEMENT sql_mode = 'NO_ENGINE_SUBSTITUTION' FOR
ALTER TABLE t ADD PRIMARY KEY(c19,c27)USING HASH;
UPDATE t SET c27=0;
ALTER TABLE t ROW_FORMAT=FIXED KEY_BLOCK_SIZE=1;
diff --git a/storage/tokudb/mysql-test/tokudb_alter_table/t/null_bytes_col_rename.test b/storage/tokudb/mysql-test/tokudb_alter_table/t/null_bytes_col_rename.test
index 048e39dc757..fe7b4ec1544 100644
--- a/storage/tokudb/mysql-test/tokudb_alter_table/t/null_bytes_col_rename.test
+++ b/storage/tokudb/mysql-test/tokudb_alter_table/t/null_bytes_col_rename.test
@@ -47,6 +47,7 @@ c31 date,
key(c31)) ENGINE=tokudb;
INSERT INTO t (c25) VALUES (NULL);
UPDATE t SET c27=0;
+SET STATEMENT sql_mode = 'NO_ENGINE_SUBSTITUTION' FOR
ALTER TABLE t ADD PRIMARY KEY(c19,c27)USING HASH;
UPDATE t SET c27=0;
ALTER TABLE t ROW_FORMAT=FIXED KEY_BLOCK_SIZE=1;
diff --git a/storage/tokudb/mysql-test/tokudb_alter_table/t/null_bytes_drop_default.test b/storage/tokudb/mysql-test/tokudb_alter_table/t/null_bytes_drop_default.test
index a5695e8a4c6..ef2b83a3d31 100644
--- a/storage/tokudb/mysql-test/tokudb_alter_table/t/null_bytes_drop_default.test
+++ b/storage/tokudb/mysql-test/tokudb_alter_table/t/null_bytes_drop_default.test
@@ -47,6 +47,7 @@ c31 date,
key(c31)) ENGINE=tokudb;
INSERT INTO t (c25) VALUES (NULL);
UPDATE t SET c27=0;
+SET STATEMENT sql_mode = 'NO_ENGINE_SUBSTITUTION' FOR
ALTER TABLE t ADD PRIMARY KEY(c19,c27)USING HASH;
UPDATE t SET c27=0;
ALTER TABLE t ROW_FORMAT=FIXED KEY_BLOCK_SIZE=1;
diff --git a/storage/tokudb/mysql-test/tokudb_alter_table/t/null_bytes_drop_key.test b/storage/tokudb/mysql-test/tokudb_alter_table/t/null_bytes_drop_key.test
index d2726c30ddd..41ea0e2fd2e 100644
--- a/storage/tokudb/mysql-test/tokudb_alter_table/t/null_bytes_drop_key.test
+++ b/storage/tokudb/mysql-test/tokudb_alter_table/t/null_bytes_drop_key.test
@@ -47,6 +47,7 @@ c31 date,
key(c31)) ENGINE=tokudb;
INSERT INTO t (c25) VALUES (NULL);
UPDATE t SET c27=0;
+SET STATEMENT sql_mode = 'NO_ENGINE_SUBSTITUTION' FOR
ALTER TABLE t ADD PRIMARY KEY(c19,c27)USING HASH;
UPDATE t SET c27=0;
ALTER TABLE t ADD KEY (c25);
diff --git a/storage/tokudb/mysql-test/tokudb_bugs/r/1853.result b/storage/tokudb/mysql-test/tokudb_bugs/r/1853.result
index 72843d83b25..5fd72cb3ff2 100644
--- a/storage/tokudb/mysql-test/tokudb_bugs/r/1853.result
+++ b/storage/tokudb/mysql-test/tokudb_bugs/r/1853.result
@@ -1,6 +1,8 @@
# Establish connection conn1 (user = root)
+connect conn1,localhost,root,,;
SET DEFAULT_STORAGE_ENGINE = 'tokudb';
DROP TABLE IF EXISTS foo;
+connection conn1;
set session transaction isolation level serializable;
create table foo ( a double, b double, c double, primary key (a,b));
insert into foo values (4,4,4),(4,5,5),(4,6,6),(5,4,4),(5,5,5),(5,6,6),(6,4,4),(6,5,5),(6,6,6);
@@ -10,10 +12,12 @@ a b c
6 4 4
6 5 5
6 6 6
+connection default;
set session transaction isolation level serializable;
insert into foo values (5,10,10);
insert into foo values (5.0001,10,10);
ERROR HY000: Lock wait timeout exceeded; try restarting transaction
+connection conn1;
commit;
begin;
select * from foo where a >= 5;
@@ -25,9 +29,11 @@ a b c
6 4 4
6 5 5
6 6 6
+connection default;
insert into foo values (5,1,10);
ERROR HY000: Lock wait timeout exceeded; try restarting transaction
insert into foo values (4.999,10,10);
+connection conn1;
commit;
begin;
select * from foo where a < 5;
@@ -36,9 +42,11 @@ a b c
4 5 5
4 6 6
4.999 10 10
+connection default;
insert into foo values (5,0.1,10);
insert into foo values (4.9999,10,10);
ERROR HY000: Lock wait timeout exceeded; try restarting transaction
+connection conn1;
commit;
begin;
select * from foo where a <= 5;
@@ -52,9 +60,11 @@ a b c
5 5 5
5 6 6
5 10 10
+connection default;
insert into foo values (5,0.01,10);
ERROR HY000: Lock wait timeout exceeded; try restarting transaction
insert into foo values (5.001,10,10);
+connection conn1;
commit;
begin;
select * from foo where a = 5;
@@ -64,10 +74,12 @@ a b c
5 5 5
5 6 6
5 10 10
+connection default;
insert into foo values (5,0.01,10);
ERROR HY000: Lock wait timeout exceeded; try restarting transaction
insert into foo values (5.0001,10,10);
insert into foo values (4.99999,1,10);
+connection conn1;
commit;
begin;
select * from foo where a > 4 and a < 6;
@@ -81,12 +93,14 @@ a b c
5 10 10
5.0001 10 10
5.001 10 10
+connection default;
insert into foo values (4.1,0.01,10);
ERROR HY000: Lock wait timeout exceeded; try restarting transaction
insert into foo values (5.9,10,10);
ERROR HY000: Lock wait timeout exceeded; try restarting transaction
insert into foo values (6,10,10);
insert into foo values (4,10,10);
+connection conn1;
commit;
begin;
select * from foo where a >= 4 and a < 6;
@@ -104,12 +118,14 @@ a b c
5 10 10
5.0001 10 10
5.001 10 10
+connection default;
insert into foo values (4,0.01,10);
ERROR HY000: Lock wait timeout exceeded; try restarting transaction
insert into foo values (5.9,10,10);
ERROR HY000: Lock wait timeout exceeded; try restarting transaction
insert into foo values (6,1.1,10);
insert into foo values (3.99,10,10);
+connection conn1;
commit;
begin;
select * from foo where a > 4 and a <= 6;
@@ -128,12 +144,14 @@ a b c
6 5 5
6 6 6
6 10 10
+connection default;
insert into foo values (4.0001,0.01,10);
ERROR HY000: Lock wait timeout exceeded; try restarting transaction
insert into foo values (6,1110,10);
ERROR HY000: Lock wait timeout exceeded; try restarting transaction
insert into foo values (6.001,1.1,10);
insert into foo values (4,1110,10);
+connection conn1;
commit;
begin;
select * from foo where a >= 4 and a <= 6;
@@ -157,12 +175,14 @@ a b c
6 5 5
6 6 6
6 10 10
+connection default;
insert into foo values (4,0.001,10);
ERROR HY000: Lock wait timeout exceeded; try restarting transaction
insert into foo values (6,11110,10);
ERROR HY000: Lock wait timeout exceeded; try restarting transaction
insert into foo values (6.0001,1.1,10);
insert into foo values (3.99,10110,10);
+connection conn1;
commit;
delete from foo;
insert into foo values (4,4,4),(4,5,5),(4,6,6),(5,4,4),(5,5,5),(5,6,6),(6,4,4),(6,5,5),(6,6,6);
@@ -172,9 +192,11 @@ a b c
6 6 6
6 5 5
6 4 4
+connection default;
insert into foo values (5,10,10);
insert into foo values (5.0001,10,10);
ERROR HY000: Lock wait timeout exceeded; try restarting transaction
+connection conn1;
commit;
begin;
select * from foo where a >= 5 order by a desc;
@@ -186,9 +208,11 @@ a b c
5 6 6
5 5 5
5 4 4
+connection default;
insert into foo values (5,1,10);
ERROR HY000: Lock wait timeout exceeded; try restarting transaction
insert into foo values (4.999,10,10);
+connection conn1;
commit;
begin;
select * from foo where a < 5 order by a desc;
@@ -197,9 +221,11 @@ a b c
4 6 6
4 5 5
4 4 4
+connection default;
insert into foo values (5,0.1,10);
insert into foo values (4.9999,10,10);
ERROR HY000: Lock wait timeout exceeded; try restarting transaction
+connection conn1;
commit;
begin;
select * from foo where a <= 5 order by a desc;
@@ -213,9 +239,11 @@ a b c
4 6 6
4 5 5
4 4 4
+connection default;
insert into foo values (5,0.01,10);
ERROR HY000: Lock wait timeout exceeded; try restarting transaction
insert into foo values (5.001,10,10);
+connection conn1;
commit;
begin;
select * from foo where a = 5 order by a desc;
@@ -225,10 +253,12 @@ a b c
5 5 5
5 6 6
5 10 10
+connection default;
insert into foo values (5,0.01,10);
ERROR HY000: Lock wait timeout exceeded; try restarting transaction
insert into foo values (5.0001,10,10);
insert into foo values (4.99999,1,10);
+connection conn1;
commit;
begin;
select * from foo where a > 4 and a < 6 order by a desc;
@@ -242,12 +272,14 @@ a b c
5 0.1 10
4.99999 1 10
4.999 10 10
+connection default;
insert into foo values (4.1,0.01,10);
ERROR HY000: Lock wait timeout exceeded; try restarting transaction
insert into foo values (5.9,10,10);
ERROR HY000: Lock wait timeout exceeded; try restarting transaction
insert into foo values (6,10,10);
insert into foo values (4,10,10);
+connection conn1;
commit;
begin;
select * from foo where a >= 4 and a < 6 order by a desc;
@@ -265,12 +297,14 @@ a b c
4 6 6
4 5 5
4 4 4
+connection default;
insert into foo values (4,0.01,10);
ERROR HY000: Lock wait timeout exceeded; try restarting transaction
insert into foo values (5.9,10,10);
ERROR HY000: Lock wait timeout exceeded; try restarting transaction
insert into foo values (6,1.1,10);
insert into foo values (3.99,10,10);
+connection conn1;
commit;
begin;
select * from foo where a > 4 and a <= 6 order by a desc;
@@ -289,12 +323,14 @@ a b c
5 0.1 10
4.99999 1 10
4.999 10 10
+connection default;
insert into foo values (4.0001,0.01,10);
ERROR HY000: Lock wait timeout exceeded; try restarting transaction
insert into foo values (6,1110,10);
ERROR HY000: Lock wait timeout exceeded; try restarting transaction
insert into foo values (6.001,1.1,10);
insert into foo values (4,1110,10);
+connection conn1;
commit;
begin;
select * from foo where a >= 4 and a <= 6 order by a desc;
@@ -318,21 +354,25 @@ a b c
4 6 6
4 5 5
4 4 4
+connection default;
insert into foo values (4,0.001,10);
ERROR HY000: Lock wait timeout exceeded; try restarting transaction
insert into foo values (6,11110,10);
ERROR HY000: Lock wait timeout exceeded; try restarting transaction
insert into foo values (6.0001,1.1,10);
insert into foo values (3.99,10110,10);
+connection conn1;
commit;
begin;
select count(*) from foo;
count(*)
23
+connection default;
insert into foo values (0,0.001,10);
ERROR HY000: Lock wait timeout exceeded; try restarting transaction
insert into foo values (9999999,11110,10);
ERROR HY000: Lock wait timeout exceeded; try restarting transaction
+connection conn1;
commit;
begin;
select * from foo order by a desc;
@@ -360,10 +400,12 @@ a b c
4 1110 10
3.99 10110 10
3.99 10 10
+connection default;
insert into foo values (0,0.001,10);
ERROR HY000: Lock wait timeout exceeded; try restarting transaction
insert into foo values (9999999,11110,10);
ERROR HY000: Lock wait timeout exceeded; try restarting transaction
+connection conn1;
commit;
alter table foo drop primary key;
begin;
@@ -392,9 +434,13 @@ a b c
6 10 10
6.0001 1.1 10
6.001 1.1 10
+connection default;
insert into foo values (0,0.001,10);
ERROR HY000: Lock wait timeout exceeded; try restarting transaction
insert into foo values (9999999,11110,10);
ERROR HY000: Lock wait timeout exceeded; try restarting transaction
+connection conn1;
commit;
+disconnect conn1;
+connection default;
DROP TABLE foo;
diff --git a/storage/tokudb/mysql-test/tokudb_bugs/r/2494-read-committed.result b/storage/tokudb/mysql-test/tokudb_bugs/r/2494-read-committed.result
index 884767dbe94..63a9ef3d8ac 100644
--- a/storage/tokudb/mysql-test/tokudb_bugs/r/2494-read-committed.result
+++ b/storage/tokudb/mysql-test/tokudb_bugs/r/2494-read-committed.result
@@ -1,6 +1,8 @@
# Establish connection conn1 (user = root)
+connect conn1,localhost,root,,;
SET DEFAULT_STORAGE_ENGINE = 'tokudb';
DROP TABLE IF EXISTS foo;
+connection conn1;
set session transaction isolation level read committed;
create table foo ( a int, b int, primary key (a));
insert into foo values (1,1),(2,2),(3,1),(4,3);
@@ -18,6 +20,7 @@ a b
2 2
3 10
4 3
+connection default;
set session transaction isolation level read committed;
select * from foo;
a b
@@ -32,6 +35,7 @@ a b
2 2
3 10
4 3
+connection conn1;
rollback;
begin;
insert into foo values (5,1),(6,2),(7,1),(8,3);
@@ -45,6 +49,7 @@ a b
6 2
7 1
8 3
+connection default;
set session transaction isolation level read committed;
select * from foo;
a b
@@ -63,6 +68,7 @@ a b
6 2
7 1
8 3
+connection conn1;
commit;
begin;
delete from foo where b=1;
@@ -72,6 +78,7 @@ a b
4 3
6 2
8 3
+connection default;
set session transaction isolation level read committed;
select * from foo;
a b
@@ -90,6 +97,10 @@ a b
4 3
6 2
8 3
+connection conn1;
commit;
+connection default;
+disconnect conn1;
+connection default;
set session transaction isolation level serializable;
DROP TABLE foo;
diff --git a/storage/tokudb/mysql-test/tokudb_bugs/r/2641.result b/storage/tokudb/mysql-test/tokudb_bugs/r/2641.result
index bf4e7aec2d9..0af82cae523 100644
--- a/storage/tokudb/mysql-test/tokudb_bugs/r/2641.result
+++ b/storage/tokudb/mysql-test/tokudb_bugs/r/2641.result
@@ -1,18 +1,25 @@
# Establish connection conn1 (user = root)
+connect conn1,localhost,root,,;
SET DEFAULT_STORAGE_ENGINE = 'tokudb';
DROP TABLE IF EXISTS foo, bar;
+connection conn1;
set session transaction isolation level read committed;
create table foo ( a int, b int, primary key (a));
create table bar (a int);
begin;
insert into foo values (1,1),(2,2),(3,1),(4,3);
+connection default;
set session transaction isolation level repeatable read;
begin;
select * from bar;
a
+connection conn1;
commit;
+connection default;
select * from foo;
a b
commit;
+disconnect conn1;
+connection default;
set session transaction isolation level serializable;
DROP TABLE foo, bar;
diff --git a/storage/tokudb/mysql-test/tokudb_bugs/r/2952.result b/storage/tokudb/mysql-test/tokudb_bugs/r/2952.result
index d0319ab9f74..df6dd0604dc 100644
--- a/storage/tokudb/mysql-test/tokudb_bugs/r/2952.result
+++ b/storage/tokudb/mysql-test/tokudb_bugs/r/2952.result
@@ -1,6 +1,8 @@
SET DEFAULT_STORAGE_ENGINE = 'tokudb';
# Establish connection conn1 (user = root)
+connect conn1,localhost,root,,;
DROP TABLE IF EXISTS foo;
+connection default;
set session transaction isolation level read uncommitted;
set session tokudb_prelock_empty=1;
set session tokudb_load_save_space=0;
@@ -15,6 +17,7 @@ foo CREATE TABLE `foo` (
) ENGINE=TokuDB DEFAULT CHARSET=latin1
begin;
insert into foo values (1,10,100),(2,20,200),(3,30,300);
+connection conn1;
set session transaction isolation level read uncommitted;
set session lock_wait_timeout=1;
insert into foo values (100,100,100);
@@ -23,6 +26,7 @@ alter table foo drop index a;
ERROR HY000: Lock wait timeout exceeded; try restarting transaction
truncate table foo;
ERROR HY000: Lock wait timeout exceeded; try restarting transaction
+connection default;
commit;
drop table foo;
set session tokudb_prelock_empty=1;
@@ -38,12 +42,14 @@ foo CREATE TABLE `foo` (
) ENGINE=TokuDB DEFAULT CHARSET=latin1
begin;
insert into foo values (1,10,100),(2,20,200),(3,30,300);
+connection conn1;
insert into foo values (100,100,100);
ERROR HY000: Lock wait timeout exceeded; try restarting transaction
alter table foo drop index a;
ERROR HY000: Lock wait timeout exceeded; try restarting transaction
truncate table foo;
ERROR HY000: Lock wait timeout exceeded; try restarting transaction
+connection default;
commit;
drop table foo;
set session tokudb_prelock_empty=0;
@@ -59,10 +65,14 @@ foo CREATE TABLE `foo` (
) ENGINE=TokuDB DEFAULT CHARSET=latin1
begin;
insert into foo values (1,10,100),(2,20,200),(3,30,300);
+connection conn1;
insert into foo values (100,100,100);
alter table foo drop index a;
ERROR HY000: Lock wait timeout exceeded; try restarting transaction
truncate table foo;
ERROR HY000: Lock wait timeout exceeded; try restarting transaction
+connection default;
+disconnect conn1;
+connection default;
set session transaction isolation level serializable;
DROP TABLE foo;
diff --git a/storage/tokudb/mysql-test/tokudb_bugs/r/5974-2.result b/storage/tokudb/mysql-test/tokudb_bugs/r/5974-2.result
index ee6970fce7d..b397c604b6b 100644
--- a/storage/tokudb/mysql-test/tokudb_bugs/r/5974-2.result
+++ b/storage/tokudb/mysql-test/tokudb_bugs/r/5974-2.result
@@ -4,11 +4,15 @@ create table toku1 (a int) engine=TokuDB;
create table toku2 (a int) engine=TokuDB;
insert into toku1 values (1),(2),(3);
insert into toku2 values (1),(2),(3);
+connect conn1,localhost,root,,;
+connection default;
set session transaction isolation level READ UNCOMMITTED;
begin;
insert into toku2 select * from toku1;
+connection conn1;
set session transaction isolation level READ UNCOMMITTED;
insert into toku1 values (4);
+connection default;
commit;
select * from toku2;
a
@@ -18,6 +22,7 @@ a
1
2
3
+connection conn1;
commit;
select * from toku1;
a
@@ -25,4 +30,6 @@ a
2
3
4
+connection default;
+disconnect conn1;
DROP TABLE toku1, toku2;
diff --git a/storage/tokudb/mysql-test/tokudb_bugs/r/5974.result b/storage/tokudb/mysql-test/tokudb_bugs/r/5974.result
index c22e565bcb4..338786c442e 100644
--- a/storage/tokudb/mysql-test/tokudb_bugs/r/5974.result
+++ b/storage/tokudb/mysql-test/tokudb_bugs/r/5974.result
@@ -4,11 +4,15 @@ create table toku1 (a int) engine=TokuDB;
create table toku2 (a int) engine=TokuDB;
insert into toku1 values (1),(2),(3);
insert into toku2 values (1),(2),(3);
+connect conn1,localhost,root,,;
+connection default;
set session transaction isolation level READ COMMITTED;
begin;
insert into toku2 select * from toku1;
+connection conn1;
set session transaction isolation level READ COMMITTED;
insert into toku1 values (4);
+connection default;
commit;
select * from toku2;
a
@@ -18,6 +22,7 @@ a
1
2
3
+connection conn1;
commit;
select * from toku1;
a
@@ -25,4 +30,6 @@ a
2
3
4
+connection default;
+disconnect conn1;
DROP TABLE toku1, toku2;
diff --git a/storage/tokudb/mysql-test/tokudb_bugs/r/PS-3773.result b/storage/tokudb/mysql-test/tokudb_bugs/r/PS-3773.result
index 49c61790837..c870ac1c784 100644
--- a/storage/tokudb/mysql-test/tokudb_bugs/r/PS-3773.result
+++ b/storage/tokudb/mysql-test/tokudb_bugs/r/PS-3773.result
@@ -3,6 +3,6 @@ SET tokudb_auto_analyze=0;
INSERT INTO t1 VALUES(0,0,0), (1,1,1), (2,2,2), (3,3,3), (4,4,4), (5,5,5);
SET GLOBAL debug_dbug = "+d,tokudb_fake_db_notfound_error_in_read_full_row";
SELECT * FROM t1 WHERE b = 2;
-ERROR HY000: Incorrect key file for table 't1'; try to repair it
+ERROR HY000: Index for table 't1' is corrupt; try to repair it
DROP TABLE t1;
-FOUND /ha_tokudb::read_full_row on table/ in tokudb.bugs.PS-3773.log
+FOUND 1 /ha_tokudb::read_full_row on table/ in tokudb.bugs.PS-3773.log
diff --git a/storage/tokudb/mysql-test/tokudb_bugs/r/PS-5163.result b/storage/tokudb/mysql-test/tokudb_bugs/r/PS-5163.result
index a203787f11d..27e19150945 100644
--- a/storage/tokudb/mysql-test/tokudb_bugs/r/PS-5163.result
+++ b/storage/tokudb/mysql-test/tokudb_bugs/r/PS-5163.result
@@ -1,4 +1,4 @@
-CREATE TABLE t1(c1 INT,c2 INT,c3 CHAR(10),c4 CHAR(10),c5 CHAR(10),PRIMARY KEY(c1),INDEX(c3,c4(1),c5(1)),INDEX(c2)) ENGINE=TokuDB;
+CREATE TABLE t1(c1 INT default 0,c2 INT,c3 CHAR(10),c4 CHAR(10),c5 CHAR(10),PRIMARY KEY(c1),INDEX(c3,c4(1),c5(1)),INDEX(c2)) ENGINE=TokuDB;
INSERT INTO t1 VALUES(),(),(),(),();
ERROR 23000: Duplicate entry '0' for key 'PRIMARY'
UPDATE t1 SET c1=1 WHERE c1=1 OR c2=1;
diff --git a/storage/tokudb/mysql-test/tokudb_bugs/r/alter_table_copy_table.result b/storage/tokudb/mysql-test/tokudb_bugs/r/alter_table_copy_table.result
index e8765ec7ab9..25e181d90cc 100644
--- a/storage/tokudb/mysql-test/tokudb_bugs/r/alter_table_copy_table.result
+++ b/storage/tokudb/mysql-test/tokudb_bugs/r/alter_table_copy_table.result
@@ -3,6 +3,7 @@ set tokudb_disable_hot_alter=ON;
set default_storage_engine=tokudb;
create table t (id int, x char(2), primary key (id), unique key (x));
insert into t values (1,'aa'),(2,'ab');
+set statement sql_mode = '' for
alter table t modify x char(1);
ERROR 23000: Duplicate entry 'a' for key 'x'
drop table t;
diff --git a/storage/tokudb/mysql-test/tokudb_bugs/r/checkpoint_lock.result b/storage/tokudb/mysql-test/tokudb_bugs/r/checkpoint_lock.result
index f93f567961a..dff746fa280 100644
--- a/storage/tokudb/mysql-test/tokudb_bugs/r/checkpoint_lock.result
+++ b/storage/tokudb/mysql-test/tokudb_bugs/r/checkpoint_lock.result
@@ -1,6 +1,8 @@
SET DEFAULT_STORAGE_ENGINE = 'tokudb';
set global tokudb_checkpoint_on_flush_logs=ON;
# Establish connection conn1 (user = root)
+connect conn1,localhost,root,,;
+connection default;
select DB, command, state, info from information_schema.processlist where id != connection_id();
DB command state info
test Sleep NULL
@@ -8,10 +10,15 @@ flush logs;
select DB, command, state, info from information_schema.processlist where id != connection_id();
DB command state info
test Sleep NULL
+connection conn1;
set tokudb_checkpoint_lock=1;
+connection default;
flush logs;;
+connection conn1;
select DB, command, state, info from information_schema.processlist where id != connection_id();
DB command state info
test Query init flush logs
set tokudb_checkpoint_lock=0;
+connection default;
+disconnect conn1;
set global tokudb_checkpoint_on_flush_logs=OFF;
diff --git a/storage/tokudb/mysql-test/tokudb_bugs/r/checkpoint_lock_3.result b/storage/tokudb/mysql-test/tokudb_bugs/r/checkpoint_lock_3.result
index 3e689191d59..01d6d29d659 100644
--- a/storage/tokudb/mysql-test/tokudb_bugs/r/checkpoint_lock_3.result
+++ b/storage/tokudb/mysql-test/tokudb_bugs/r/checkpoint_lock_3.result
@@ -1,5 +1,7 @@
SET DEFAULT_STORAGE_ENGINE = 'tokudb';
# Establish connection conn1 (user = root)
+connect conn1,localhost,root,,;
+connection default;
select DB, command, state, info from information_schema.processlist where id != connection_id();
DB command state info
test Sleep NULL
@@ -7,9 +9,14 @@ flush logs;
select DB, command, state, info from information_schema.processlist where id != connection_id();
DB command state info
test Sleep NULL
+connection conn1;
set tokudb_checkpoint_lock=1;
+connection default;
flush logs;;
+connection conn1;
select DB, command, state, info from information_schema.processlist where id != connection_id();
DB command state info
test Sleep NULL
set tokudb_checkpoint_lock=0;
+connection default;
+disconnect conn1;
diff --git a/storage/tokudb/mysql-test/tokudb_bugs/r/db397_delete_trigger.result b/storage/tokudb/mysql-test/tokudb_bugs/r/db397_delete_trigger.result
index c8565fb4b2b..dce79487f51 100644
--- a/storage/tokudb/mysql-test/tokudb_bugs/r/db397_delete_trigger.result
+++ b/storage/tokudb/mysql-test/tokudb_bugs/r/db397_delete_trigger.result
@@ -12,6 +12,7 @@ ts timestamp not null default now());
insert into t1 (col1,col2) values (0,0);
insert into t1_audit (col1,action) values (0,'DUMMY');
set local tokudb_prelock_empty=0;
+set statement sql_mode = '' for
create trigger t1_trigger before delete on t1
for each row
insert into t1_audit (col1, action) values (old.col1, 'BEFORE DELETE');
@@ -32,6 +33,7 @@ locks_dname locks_key_left locks_key_right
./test/t1_audit-main 0200000000000000 0200000000000000
commit;
drop trigger t1_trigger;
+set statement sql_mode = '' for
create trigger t1_trigger after delete on t1
for each row
insert into t1_audit (col1, action) values (old.col1, 'AFTER DELETE');
diff --git a/storage/tokudb/mysql-test/tokudb_bugs/r/db801.result b/storage/tokudb/mysql-test/tokudb_bugs/r/db801.result
index 800db69ba39..92a18f4a97b 100644
--- a/storage/tokudb/mysql-test/tokudb_bugs/r/db801.result
+++ b/storage/tokudb/mysql-test/tokudb_bugs/r/db801.result
@@ -2,17 +2,27 @@ set default_storage_engine=tokudb;
drop table if exists t;
create table t (id int not null primary key, c int not null) engine=tokudb;
insert into t values (1,0);
+connect conn1,localhost,root,,;
+connection default;
begin;
update t set c=10 where id=1;
+connection conn1;
update t set c=100;
ERROR HY000: Lock wait timeout exceeded; try restarting transaction
+connection default;
rollback;
+disconnect conn1;
drop table t;
create table t (id int not null primary key, c int not null) engine=tokudb partition by hash(id) partitions 1;
insert into t values (1,0);
+connect conn1,localhost,root,,;
+connection default;
begin;
update t set c=10 where id=1;
+connection conn1;
update t set c=100;
ERROR HY000: Lock wait timeout exceeded; try restarting transaction
+connection default;
rollback;
+disconnect conn1;
drop table t;
diff --git a/storage/tokudb/mysql-test/tokudb_bugs/r/db938.result b/storage/tokudb/mysql-test/tokudb_bugs/r/db938.result
index 30e0bdbebd7..fb332155563 100644
--- a/storage/tokudb/mysql-test/tokudb_bugs/r/db938.result
+++ b/storage/tokudb/mysql-test/tokudb_bugs/r/db938.result
@@ -19,10 +19,15 @@ insert into t1(b,c) values(0,0), (1,1), (2,2), (3,3);
select database_name, table_name, job_type, job_params, scheduler from information_schema.tokudb_background_job_status;
database_name table_name job_type job_params scheduler
test t1 TOKUDB_ANALYZE_MODE_STANDARD TOKUDB_ANALYZE_DELETE_FRACTION=1.000000; TOKUDB_ANALYZE_TIME=0; TOKUDB_ANALYZE_THROTTLE=0; AUTO
+connect conn1, localhost, root;
set DEBUG_SYNC = 'tokudb_after_truncate_all_dictionarys SIGNAL closed WAIT_FOR done';
TRUNCATE TABLE t1;
+connection default;
set global tokudb_debug_pause_background_job_manager = FALSE;
set DEBUG_SYNC = 'now SIGNAL done';
+connection conn1;
+connection default;
+disconnect conn1;
set DEBUG_SYNC = 'RESET';
drop table t1;
set session tokudb_auto_analyze = @orig_auto_analyze;
diff --git a/storage/tokudb/mysql-test/tokudb_bugs/r/db945.result b/storage/tokudb/mysql-test/tokudb_bugs/r/db945.result
index 6b3c239d602..edebae3b9e0 100644
--- a/storage/tokudb/mysql-test/tokudb_bugs/r/db945.result
+++ b/storage/tokudb/mysql-test/tokudb_bugs/r/db945.result
@@ -7,7 +7,7 @@ set session tokudb_analyze_throttle = 0;
set session tokudb_analyze_time = 0;
create table t1(a int, b text(1), c text(1), filler text(1), primary key(a, b(1)), unique key (a, c(1)));
lock tables t1 write, t1 as a read, t1 as b read;
-insert into t1(a) values(1);
+insert ignore into t1(a) values(1);
Warnings:
Warning 1364 Field 'b' doesn't have a default value
alter table t1 drop key a;
diff --git a/storage/tokudb/mysql-test/tokudb_bugs/r/fileops-2.result b/storage/tokudb/mysql-test/tokudb_bugs/r/fileops-2.result
index 3f9da4ac4af..fa7edccc719 100644
--- a/storage/tokudb/mysql-test/tokudb_bugs/r/fileops-2.result
+++ b/storage/tokudb/mysql-test/tokudb_bugs/r/fileops-2.result
@@ -1,10 +1,13 @@
# Establish connection conn1 (user = root)
+connect conn1,localhost,root,,;
SET DEFAULT_STORAGE_ENGINE = 'tokudb';
DROP TABLE IF EXISTS foo;
+connection conn1;
create table foo ( a int, b int, c int, key (a), key (b));
insert into foo values (1,10,100);
begin;
insert into foo values(2,20,200);
+connection default;
set session lock_wait_timeout=1;
select * from foo;
a b c
@@ -19,5 +22,8 @@ alter table foo add index (c);
ERROR HY000: Lock wait timeout exceeded; try restarting transaction
alter table foo drop index a;
ERROR HY000: Lock wait timeout exceeded; try restarting transaction
+connection conn1;
commit;
+disconnect conn1;
+connection default;
DROP TABLE foo;
diff --git a/storage/tokudb/mysql-test/tokudb_bugs/r/fileops-4.result b/storage/tokudb/mysql-test/tokudb_bugs/r/fileops-4.result
index e026a151726..747869b37e9 100644
--- a/storage/tokudb/mysql-test/tokudb_bugs/r/fileops-4.result
+++ b/storage/tokudb/mysql-test/tokudb_bugs/r/fileops-4.result
@@ -1,10 +1,13 @@
# Establish connection conn1 (user = root)
+connect conn1,localhost,root,,;
SET DEFAULT_STORAGE_ENGINE = 'tokudb';
DROP TABLE IF EXISTS foo;
+connection conn1;
create table foo ( a int, b int, c int, key (a), key (b));
insert into foo values (1,10,100);
begin;
insert into foo values (2,20,200);
+connection default;
set session lock_wait_timeout=1;
select * from foo;
a b c
@@ -17,5 +20,9 @@ truncate table foo;
ERROR HY000: Lock wait timeout exceeded; try restarting transaction
alter table foo drop index a;
ERROR HY000: Lock wait timeout exceeded; try restarting transaction
+connection conn1;
commit;
+connection default;
+disconnect conn1;
+connection default;
DROP TABLE foo;
diff --git a/storage/tokudb/mysql-test/tokudb_bugs/r/ft-index-40.result b/storage/tokudb/mysql-test/tokudb_bugs/r/ft-index-40.result
index d4b2e44af3b..56d33a39494 100644
--- a/storage/tokudb/mysql-test/tokudb_bugs/r/ft-index-40.result
+++ b/storage/tokudb/mysql-test/tokudb_bugs/r/ft-index-40.result
@@ -1,22 +1,32 @@
drop table if exists t;
+connect conn1,localhost,root,,;
+connection default;
create table t (a varchar(50), primary key(a)) engine='tokudb';
insert into t values ("hello world");
select * from t;
a
hello world
begin;
+connection conn1;
select * from t;
a
hello world
+connection default;
update t set a="HELLO WORLD";
+connection conn1;
select * from t;
a
hello world
+connection default;
select * from t;
a
HELLO WORLD
rollback;
+connection conn1;
select * from t;
a
hello world
+connection default;
+connection default;
+disconnect conn1;
drop table t;
diff --git a/storage/tokudb/mysql-test/tokudb_bugs/r/leak172.result b/storage/tokudb/mysql-test/tokudb_bugs/r/leak172.result
index c3d8358724f..fa98811c803 100644
--- a/storage/tokudb/mysql-test/tokudb_bugs/r/leak172.result
+++ b/storage/tokudb/mysql-test/tokudb_bugs/r/leak172.result
@@ -117,8 +117,13 @@ KEY `c27` (`c27`(255)),
KEY `c31` (`c31`)
);
LOAD DATA INFILE 'leak172_t1.data' INTO TABLE `t1` fields terminated by ',';
+connect conn1,localhost,root,,;
set session debug_dbug="+d,tokudb_end_bulk_insert_sleep";
LOAD DATA INFILE 'leak172_t2.data' INTO TABLE `t2` fields terminated by ',';
+connection default;
UPDATE t1, t2 SET t1.`c5` = 4 WHERE t1.`c6` <= 'o';
ERROR HY000: Lock wait timeout exceeded; try restarting transaction
+connection conn1;
+connection default;
+disconnect conn1;
drop table t1,t2;
diff --git a/storage/tokudb/mysql-test/tokudb_bugs/r/lock_uniq_key_empty.result b/storage/tokudb/mysql-test/tokudb_bugs/r/lock_uniq_key_empty.result
index 325aef46afe..eabe936c6c1 100644
--- a/storage/tokudb/mysql-test/tokudb_bugs/r/lock_uniq_key_empty.result
+++ b/storage/tokudb/mysql-test/tokudb_bugs/r/lock_uniq_key_empty.result
@@ -1,28 +1,42 @@
set default_storage_engine=tokudb;
drop table if exists t;
create table t (id int, unique key(id));
+connect c1,localhost,root,,;
set tokudb_prelock_empty=OFF;
begin;
insert into t values (1);
+connect c2,localhost,root,,;
begin;
insert into t values (2);
+connection c1;
commit;
+connection c2;
commit;
+connection default;
select * from t;
id
1
2
+disconnect c1;
+disconnect c2;
drop table if exists t;
create table t (id int not null, unique key(id));
+connect c1,localhost,root,,;
set tokudb_prelock_empty=OFF;
begin;
insert into t values (1);
+connect c2,localhost,root,,;
begin;
insert into t values (2);
+connection c1;
commit;
+connection c2;
commit;
+connection default;
select * from t;
id
1
2
+disconnect c1;
+disconnect c2;
drop table if exists t;
diff --git a/storage/tokudb/mysql-test/tokudb_bugs/r/lock_uniq_key_left.result b/storage/tokudb/mysql-test/tokudb_bugs/r/lock_uniq_key_left.result
index b94dbbbd1b5..a561f2bb3a0 100644
--- a/storage/tokudb/mysql-test/tokudb_bugs/r/lock_uniq_key_left.result
+++ b/storage/tokudb/mysql-test/tokudb_bugs/r/lock_uniq_key_left.result
@@ -2,31 +2,45 @@ set default_storage_engine=tokudb;
drop table if exists t;
create table t (id int, unique key(id));
insert into t values (10),(100);
+connect c1,localhost,root,,;
begin;
insert into t values (5);
+connect c2,localhost,root,,;
begin;
insert into t values (6);
+connection c1;
commit;
+connection c2;
commit;
+connection default;
select * from t;
id
5
6
10
100
+disconnect c1;
+disconnect c2;
drop table if exists t;
create table t (id int not null, unique key(id));
insert into t values (10),(100);
+connect c1,localhost,root,,;
begin;
insert into t values (5);
+connect c2,localhost,root,,;
begin;
insert into t values (6);
+connection c1;
commit;
+connection c2;
commit;
+connection default;
select * from t;
id
5
6
10
100
+disconnect c1;
+disconnect c2;
drop table if exists t;
diff --git a/storage/tokudb/mysql-test/tokudb_bugs/r/lock_uniq_key_middle.result b/storage/tokudb/mysql-test/tokudb_bugs/r/lock_uniq_key_middle.result
index 763cfc88812..87515acab10 100644
--- a/storage/tokudb/mysql-test/tokudb_bugs/r/lock_uniq_key_middle.result
+++ b/storage/tokudb/mysql-test/tokudb_bugs/r/lock_uniq_key_middle.result
@@ -2,31 +2,45 @@ set default_storage_engine=tokudb;
drop table if exists t;
create table t (id int, unique key(id));
insert into t values (10),(100);
+connect c1,localhost,root,,;
begin;
insert into t values (50);
+connect c2,localhost,root,,;
begin;
insert into t values (60);
+connection c1;
commit;
+connection c2;
commit;
+connection default;
select * from t;
id
10
50
60
100
+disconnect c1;
+disconnect c2;
drop table if exists t;
create table t (id int not null, unique key(id));
insert into t values (10),(100);
+connect c1,localhost,root,,;
begin;
insert into t values (50);
+connect c2,localhost,root,,;
begin;
insert into t values (60);
+connection c1;
commit;
+connection c2;
commit;
+connection default;
select * from t;
id
10
50
60
100
+disconnect c1;
+disconnect c2;
drop table if exists t;
diff --git a/storage/tokudb/mysql-test/tokudb_bugs/r/lock_uniq_key_right.result b/storage/tokudb/mysql-test/tokudb_bugs/r/lock_uniq_key_right.result
index b820a4f0806..6ec937f5850 100644
--- a/storage/tokudb/mysql-test/tokudb_bugs/r/lock_uniq_key_right.result
+++ b/storage/tokudb/mysql-test/tokudb_bugs/r/lock_uniq_key_right.result
@@ -2,31 +2,45 @@ set default_storage_engine=tokudb;
drop table if exists t;
create table t (id int, unique key(id));
insert into t values (10),(100);
+connect c1,localhost,root,,;
begin;
insert into t values (500);
+connect c2,localhost,root,,;
begin;
insert into t values (600);
+connection c1;
commit;
+connection c2;
commit;
+connection default;
select * from t;
id
10
100
500
600
+disconnect c1;
+disconnect c2;
drop table if exists t;
create table t (id int not null, unique key(id));
insert into t values (10),(100);
+connect c1,localhost,root,,;
begin;
insert into t values (500);
+connect c2,localhost,root,,;
begin;
insert into t values (600);
+connection c1;
commit;
+connection c2;
commit;
+connection default;
select * from t;
id
10
100
500
600
+disconnect c1;
+disconnect c2;
drop table if exists t;
diff --git a/storage/tokudb/mysql-test/tokudb_bugs/r/xa.result b/storage/tokudb/mysql-test/tokudb_bugs/r/xa.result
index 5a117087412..7dd7bb625cf 100644
--- a/storage/tokudb/mysql-test/tokudb_bugs/r/xa.result
+++ b/storage/tokudb/mysql-test/tokudb_bugs/r/xa.result
@@ -1,3 +1,4 @@
+call mtr.add_suppression("Deadlock found when trying to get lock; try restarting transaction");
drop table if exists t1, t2;
create table t1 (a int) engine=tokudb;
xa start 'test1';
@@ -28,6 +29,8 @@ begin;
ERROR XAE07: XAER_RMFAIL: The command cannot be executed when global transaction is in the IDLE state
create table t2 (a int);
ERROR XAE07: XAER_RMFAIL: The command cannot be executed when global transaction is in the IDLE state
+connect con1,localhost,root,,;
+connection con1;
xa start 'testa','testb';
ERROR XAE08: XAER_DUPID: The XID already exists
xa start 'testa','testb', 123;
@@ -41,6 +44,7 @@ ERROR XAE07: XAER_RMFAIL: The command cannot be executed when global transaction
xa recover;
formatID gtrid_length bqual_length data
11 5 5 testb 0@P`
+connection default;
xa prepare 'testa','testb';
xa recover;
formatID gtrid_length bqual_length data
@@ -51,20 +55,29 @@ ERROR XAE04: XAER_NOTA: Unknown XID
xa rollback 'testa','testb';
xa start 'zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz';
ERROR 42000: You have an error in your SQL syntax; check the manual that corresponds to your MariaDB server version for the right syntax to use near '' at line 1
+connection con1;
xa rollback 'testb',0x2030405060,11;
+connection default;
select * from t1;
a
20
drop table t1;
+disconnect con1;
drop table if exists t1;
create table t1(a int, b int, c varchar(20), primary key(a)) engine = tokudb;
insert into t1 values(1, 1, 'a');
insert into t1 values(2, 2, 'b');
+connect con1,localhost,root,,;
+connect con2,localhost,root,,;
+connection con1;
xa start 'a','b';
update t1 set c = 'aa' where a = 1;
+connection con2;
xa start 'a','c';
update t1 set c = 'bb' where a = 2;
+connection con1;
update t1 set c = 'bb' where a = 2;
+connection con2;
update t1 set c = 'aa' where a = 1;
ERROR 40001: Deadlock found when trying to get lock; try restarting transaction
select count(*) from t1;
@@ -72,9 +85,17 @@ count(*)
2
xa end 'a','c';
xa rollback 'a','c';
+disconnect con2;
+connect con3,localhost,root,,;
+connection con3;
xa start 'a','c';
+connection con1;
xa end 'a','b';
xa rollback 'a','b';
+connection con3;
+disconnect con1;
+disconnect con3;
+connection default;
drop table t1;
#
# BUG#51342 - more xid crashing
@@ -110,20 +131,28 @@ xa prepare 'a';
xa commit 'a';
CREATE TABLE t1(a INT, KEY(a)) ENGINE=TokuDB;
INSERT INTO t1 VALUES(1),(2);
+connect con1,localhost,root,,;
BEGIN;
UPDATE t1 SET a=3 WHERE a=1;
+connection default;
BEGIN;
UPDATE t1 SET a=4 WHERE a=2;
+connection con1;
UPDATE t1 SET a=5 WHERE a=2;
+connection default;
UPDATE t1 SET a=5 WHERE a=1;
ERROR 40001: Deadlock found when trying to get lock; try restarting transaction
ROLLBACK;
+connection con1;
ROLLBACK;
BEGIN;
UPDATE t1 SET a=3 WHERE a=1;
+connection default;
XA START 'xid1';
UPDATE t1 SET a=4 WHERE a=2;
+connection con1;
UPDATE t1 SET a=5 WHERE a=2;
+connection default;
UPDATE t1 SET a=5 WHERE a=1;
ERROR 40001: Deadlock found when trying to get lock; try restarting transaction
XA END 'xid1';
@@ -131,5 +160,8 @@ XA ROLLBACK 'xid1';
XA START 'xid1';
XA END 'xid1';
XA ROLLBACK 'xid1';
+connection con1;
commit;
+connection default;
+disconnect con1;
DROP TABLE t1;
diff --git a/storage/tokudb/mysql-test/tokudb_bugs/t/PS-5163.test b/storage/tokudb/mysql-test/tokudb_bugs/t/PS-5163.test
index 5fc01bb5f0e..d370bab6517 100644
--- a/storage/tokudb/mysql-test/tokudb_bugs/t/PS-5163.test
+++ b/storage/tokudb/mysql-test/tokudb_bugs/t/PS-5163.test
@@ -1,6 +1,6 @@
--source include/have_tokudb.inc
-CREATE TABLE t1(c1 INT,c2 INT,c3 CHAR(10),c4 CHAR(10),c5 CHAR(10),PRIMARY KEY(c1),INDEX(c3,c4(1),c5(1)),INDEX(c2)) ENGINE=TokuDB;
+CREATE TABLE t1(c1 INT default 0,c2 INT,c3 CHAR(10),c4 CHAR(10),c5 CHAR(10),PRIMARY KEY(c1),INDEX(c3,c4(1),c5(1)),INDEX(c2)) ENGINE=TokuDB;
--error ER_DUP_ENTRY
INSERT INTO t1 VALUES(),(),(),(),();
diff --git a/storage/tokudb/mysql-test/tokudb_bugs/t/alter_table_copy_table.test b/storage/tokudb/mysql-test/tokudb_bugs/t/alter_table_copy_table.test
index 6e4433183b0..0c85115bd20 100644
--- a/storage/tokudb/mysql-test/tokudb_bugs/t/alter_table_copy_table.test
+++ b/storage/tokudb/mysql-test/tokudb_bugs/t/alter_table_copy_table.test
@@ -8,5 +8,6 @@ set default_storage_engine=tokudb;
create table t (id int, x char(2), primary key (id), unique key (x));
insert into t values (1,'aa'),(2,'ab');
--error 1062
+set statement sql_mode = '' for
alter table t modify x char(1);
drop table t; \ No newline at end of file
diff --git a/storage/tokudb/mysql-test/tokudb_bugs/t/db397_delete_trigger.test b/storage/tokudb/mysql-test/tokudb_bugs/t/db397_delete_trigger.test
index 7343768a7d7..0502b35bc2c 100644
--- a/storage/tokudb/mysql-test/tokudb_bugs/t/db397_delete_trigger.test
+++ b/storage/tokudb/mysql-test/tokudb_bugs/t/db397_delete_trigger.test
@@ -21,6 +21,7 @@ insert into t1 (col1,col2) values (0,0);
insert into t1_audit (col1,action) values (0,'DUMMY');
set local tokudb_prelock_empty=0;
+set statement sql_mode = '' for
create trigger t1_trigger before delete on t1
for each row
insert into t1_audit (col1, action) values (old.col1, 'BEFORE DELETE');
@@ -34,6 +35,7 @@ select locks_dname,locks_key_left,locks_key_right from information_schema.tokudb
commit;
drop trigger t1_trigger;
+set statement sql_mode = '' for
create trigger t1_trigger after delete on t1
for each row
insert into t1_audit (col1, action) values (old.col1, 'AFTER DELETE');
diff --git a/storage/tokudb/mysql-test/tokudb_bugs/t/db945.test b/storage/tokudb/mysql-test/tokudb_bugs/t/db945.test
index 7996f9f5792..50dc91829d8 100644
--- a/storage/tokudb/mysql-test/tokudb_bugs/t/db945.test
+++ b/storage/tokudb/mysql-test/tokudb_bugs/t/db945.test
@@ -13,7 +13,7 @@ set session tokudb_analyze_time = 0;
create table t1(a int, b text(1), c text(1), filler text(1), primary key(a, b(1)), unique key (a, c(1)));
lock tables t1 write, t1 as a read, t1 as b read;
-insert into t1(a) values(1);
+insert ignore into t1(a) values(1);
alter table t1 drop key a;
unlock tables;
diff --git a/storage/tokudb/mysql-test/tokudb_bugs/t/xa.test b/storage/tokudb/mysql-test/tokudb_bugs/t/xa.test
index 3d3d558c0c9..96ccf04fb02 100644
--- a/storage/tokudb/mysql-test/tokudb_bugs/t/xa.test
+++ b/storage/tokudb/mysql-test/tokudb_bugs/t/xa.test
@@ -6,6 +6,8 @@
# Save the initial number of concurrent sessions
--source include/count_sessions.inc
+call mtr.add_suppression("Deadlock found when trying to get lock; try restarting transaction");
+
--disable_warnings
drop table if exists t1, t2;
--enable_warnings
diff --git a/storage/tokudb/mysql-test/tokudb_mariadb/r/alter.result b/storage/tokudb/mysql-test/tokudb_mariadb/r/alter.result
index 600e34dfffd..95364221920 100644
--- a/storage/tokudb/mysql-test/tokudb_mariadb/r/alter.result
+++ b/storage/tokudb/mysql-test/tokudb_mariadb/r/alter.result
@@ -8,7 +8,7 @@ show create table t1;
Table Create Table
t1 CREATE TABLE `t1` (
`i` int(11) DEFAULT NULL,
- `j` int(11) DEFAULT '0'
+ `j` int(11) DEFAULT 0
) ENGINE=TokuDB DEFAULT CHARSET=latin1
alter table t1 modify i int default '1';
select * from t1;
@@ -17,8 +17,8 @@ i j
show create table t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `i` int(11) DEFAULT '1',
- `j` int(11) DEFAULT '0'
+ `i` int(11) DEFAULT 1,
+ `j` int(11) DEFAULT 0
) ENGINE=TokuDB DEFAULT CHARSET=latin1
alter table t1 modify j int default '2', rename t2;
select * from t1;
@@ -29,7 +29,7 @@ i j
show create table t2;
Table Create Table
t2 CREATE TABLE `t2` (
- `i` int(11) DEFAULT '1',
- `j` int(11) DEFAULT '2'
+ `i` int(11) DEFAULT 1,
+ `j` int(11) DEFAULT 2
) ENGINE=TokuDB DEFAULT CHARSET=latin1
drop table t2;
diff --git a/storage/tokudb/mysql-test/tokudb_mariadb/r/autoinc.result b/storage/tokudb/mysql-test/tokudb_mariadb/r/autoinc.result
index 3d424357736..098fcb67549 100644
--- a/storage/tokudb/mysql-test/tokudb_mariadb/r/autoinc.result
+++ b/storage/tokudb/mysql-test/tokudb_mariadb/r/autoinc.result
@@ -1,29 +1,36 @@
create table t1 (a int auto_increment, b bigint(20), primary key (b,a)) engine=tokudb;
start transaction;
insert t1 (b) values (1);
+connect con2,localhost,root;
set tokudb_lock_timeout=1;
insert t1 (b) values (1);
ERROR HY000: Lock wait timeout exceeded; try restarting transaction
set tokudb_lock_timeout=default;
insert t1 (b) values (1);
+connection default;
insert t1 (b) values (1);
commit;
+connection con2;
commit;
select * from t1;
a b
1 1
2 1
3 1
+connection default;
alter table t1 partition by range (b) (partition p0 values less than (9));
start transaction;
insert t1 (b) values (2);
+connection con2;
set tokudb_lock_timeout=1;
insert t1 (b) values (2);
ERROR HY000: Lock wait timeout exceeded; try restarting transaction
set tokudb_lock_timeout=default;
insert t1 (b) values (2);
+connection default;
insert t1 (b) values (2);
commit;
+connection con2;
commit;
select * from t1;
a b
diff --git a/storage/tokudb/mysql-test/tokudb_mariadb/r/clustering.result b/storage/tokudb/mysql-test/tokudb_mariadb/r/clustering.result
index 86f6fd4cdc2..28c758ff628 100644
--- a/storage/tokudb/mysql-test/tokudb_mariadb/r/clustering.result
+++ b/storage/tokudb/mysql-test/tokudb_mariadb/r/clustering.result
@@ -4,7 +4,7 @@ Table Create Table
t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` int(11) DEFAULT NULL,
- `c` text,
+ `c` text DEFAULT NULL,
KEY `a` (`a`) `clustering`=yes,
KEY `b` (`b`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
diff --git a/storage/tokudb/mysql-test/tokudb_mariadb/r/mdev5426.result b/storage/tokudb/mysql-test/tokudb_mariadb/r/mdev5426.result
index 086c4f4cc18..38114567d71 100644
--- a/storage/tokudb/mysql-test/tokudb_mariadb/r/mdev5426.result
+++ b/storage/tokudb/mysql-test/tokudb_mariadb/r/mdev5426.result
@@ -2,5 +2,8 @@ CREATE TABLE t1 (i INT) ENGINE=TokuDB;
EXPLAIN INSERT INTO t1 SELECT * FROM t1;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 ALL NULL NULL NULL NULL 1 Using temporary
+connect con1,localhost,root,,test;
INSERT INTO t1 SELECT * FROM t1;
+connection default;
+disconnect con1;
DROP TABLE t1;
diff --git a/storage/tokudb/mysql-test/tokudb_mariadb/r/optimize.result b/storage/tokudb/mysql-test/tokudb_mariadb/r/optimize.result
index c32a7d61129..ac7174137a8 100644
--- a/storage/tokudb/mysql-test/tokudb_mariadb/r/optimize.result
+++ b/storage/tokudb/mysql-test/tokudb_mariadb/r/optimize.result
@@ -2,12 +2,15 @@ create table t1 (a int) engine=tokudb;
insert t1 values (1),(2),(3);
set debug_sync='before_admin_operator_func WAIT_FOR go';
OPTIMIZE TABLE t1;
+connect c1,localhost,root,,;
select * from t1;
a
1
2
3
set debug_sync='now SIGNAL go';
+disconnect c1;
+connection default;
Table Op Msg_type Msg_text
test.t1 optimize note Table does not support optimize, doing recreate + analyze instead
test.t1 optimize status OK
diff --git a/storage/tokudb/mysql-test/tokudb_mariadb/r/xa.result b/storage/tokudb/mysql-test/tokudb_mariadb/r/xa.result
index ca86d854bdb..4724a0af926 100644
--- a/storage/tokudb/mysql-test/tokudb_mariadb/r/xa.result
+++ b/storage/tokudb/mysql-test/tokudb_mariadb/r/xa.result
@@ -34,6 +34,8 @@ begin;
ERROR XAE07: XAER_RMFAIL: The command cannot be executed when global transaction is in the IDLE state
create table t2 (a int);
ERROR XAE07: XAER_RMFAIL: The command cannot be executed when global transaction is in the IDLE state
+connect con1,localhost,root,,;
+connection con1;
xa start 'testa','testb';
ERROR XAE08: XAER_DUPID: The XID already exists
xa start 'testa','testb', 123;
@@ -47,6 +49,7 @@ ERROR XAE07: XAER_RMFAIL: The command cannot be executed when global transaction
xa recover;
formatID gtrid_length bqual_length data
11 5 5 testb 0@P`
+connection default;
xa prepare 'testa','testb';
xa recover;
formatID gtrid_length bqual_length data
@@ -60,4 +63,6 @@ ERROR 42000: You have an error in your SQL syntax; check the manual that corresp
select * from t1;
a
20
+disconnect con1;
+connection default;
drop table t1;
diff --git a/storage/tokudb/mysql-test/tokudb_mariadb/t/xa-recovery-9214.test b/storage/tokudb/mysql-test/tokudb_mariadb/t/xa-recovery-9214.test
index 3854fb99a3c..7d396eb84f4 100644
--- a/storage/tokudb/mysql-test/tokudb_mariadb/t/xa-recovery-9214.test
+++ b/storage/tokudb/mysql-test/tokudb_mariadb/t/xa-recovery-9214.test
@@ -1,7 +1,7 @@
#
# MDEV-9214 Server miscalculates the number of XA-capable engines
#
---source include/have_xtradb.inc
+--source include/have_innodb.inc
select 1;
diff --git a/storage/tokudb/mysql-test/tokudb_parts/disabled.def b/storage/tokudb/mysql-test/tokudb_parts/disabled.def
index 17a8ddcc12e..385772e3bcc 100644
--- a/storage/tokudb/mysql-test/tokudb_parts/disabled.def
+++ b/storage/tokudb/mysql-test/tokudb_parts/disabled.def
@@ -8,3 +8,4 @@ partition_max_sub_parts_key_range_tokudb: 5.6 test not merged yet
partition_max_sub_parts_list_tokudb: 5.6 test not merged yet
partition_max_sub_parts_range_tokudb: 5.6 test not merged yet
nonflushing_analyze_debug: Freezes in MariaDB 10.0
+partition_alter4_tokudb : Takes frequently longer than 900 seconds
diff --git a/storage/tokudb/mysql-test/tokudb_parts/r/part_supported_sql_func_tokudb.result b/storage/tokudb/mysql-test/tokudb_parts/r/part_supported_sql_func_tokudb.result
index 5b860845490..6bb98671feb 100644
--- a/storage/tokudb/mysql-test/tokudb_parts/r/part_supported_sql_func_tokudb.result
+++ b/storage/tokudb/mysql-test/tokudb_parts/r/part_supported_sql_func_tokudb.result
@@ -620,15 +620,15 @@ t55 CREATE TABLE `t55` (
`colint` int(11) DEFAULT NULL,
`col1` int(11) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (colint)
-SUBPARTITION BY HASH (abs(col1))
+ PARTITION BY LIST (`colint`)
+SUBPARTITION BY HASH (abs(`col1`))
SUBPARTITIONS 5
-(PARTITION p0 VALUES IN (1,2,3,4,5,6,7,8,9,10) ENGINE = TokuDB,
- PARTITION p1 VALUES IN (11,12,13,14,15,16,17,18,19,20) ENGINE = TokuDB,
- PARTITION p2 VALUES IN (21,22,23,24,25,26,27,28,29,30) ENGINE = TokuDB,
- PARTITION p3 VALUES IN (31,32,33,34,35,36,37,38,39,40) ENGINE = TokuDB,
- PARTITION p4 VALUES IN (41,42,43,44,45,46,47,48,49,50) ENGINE = TokuDB,
- PARTITION p5 VALUES IN (51,52,53,54,55,56,57,58,59,60) ENGINE = TokuDB) */
+(PARTITION `p0` VALUES IN (1,2,3,4,5,6,7,8,9,10) ENGINE = TokuDB,
+ PARTITION `p1` VALUES IN (11,12,13,14,15,16,17,18,19,20) ENGINE = TokuDB,
+ PARTITION `p2` VALUES IN (21,22,23,24,25,26,27,28,29,30) ENGINE = TokuDB,
+ PARTITION `p3` VALUES IN (31,32,33,34,35,36,37,38,39,40) ENGINE = TokuDB,
+ PARTITION `p4` VALUES IN (41,42,43,44,45,46,47,48,49,50) ENGINE = TokuDB,
+ PARTITION `p5` VALUES IN (51,52,53,54,55,56,57,58,59,60) ENGINE = TokuDB)
select * from t55 order by colint;
colint col1
1 15
@@ -2317,15 +2317,15 @@ t55 CREATE TABLE `t55` (
`colint` int(11) DEFAULT NULL,
`col1` int(11) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (colint)
-SUBPARTITION BY HASH (mod(col1,10))
+ PARTITION BY LIST (`colint`)
+SUBPARTITION BY HASH (`col1` % 10)
SUBPARTITIONS 5
-(PARTITION p0 VALUES IN (1,2,3,4,5,6,7,8,9,10) ENGINE = TokuDB,
- PARTITION p1 VALUES IN (11,12,13,14,15,16,17,18,19,20) ENGINE = TokuDB,
- PARTITION p2 VALUES IN (21,22,23,24,25,26,27,28,29,30) ENGINE = TokuDB,
- PARTITION p3 VALUES IN (31,32,33,34,35,36,37,38,39,40) ENGINE = TokuDB,
- PARTITION p4 VALUES IN (41,42,43,44,45,46,47,48,49,50) ENGINE = TokuDB,
- PARTITION p5 VALUES IN (51,52,53,54,55,56,57,58,59,60) ENGINE = TokuDB) */
+(PARTITION `p0` VALUES IN (1,2,3,4,5,6,7,8,9,10) ENGINE = TokuDB,
+ PARTITION `p1` VALUES IN (11,12,13,14,15,16,17,18,19,20) ENGINE = TokuDB,
+ PARTITION `p2` VALUES IN (21,22,23,24,25,26,27,28,29,30) ENGINE = TokuDB,
+ PARTITION `p3` VALUES IN (31,32,33,34,35,36,37,38,39,40) ENGINE = TokuDB,
+ PARTITION `p4` VALUES IN (41,42,43,44,45,46,47,48,49,50) ENGINE = TokuDB,
+ PARTITION `p5` VALUES IN (51,52,53,54,55,56,57,58,59,60) ENGINE = TokuDB)
select * from t55 order by colint;
colint col1
1 15
@@ -3667,15 +3667,15 @@ t55 CREATE TABLE `t55` (
`colint` int(11) DEFAULT NULL,
`col1` date DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (colint)
-SUBPARTITION BY HASH (day(col1))
+ PARTITION BY LIST (`colint`)
+SUBPARTITION BY HASH (dayofmonth(`col1`))
SUBPARTITIONS 5
-(PARTITION p0 VALUES IN (1,2,3,4,5,6,7,8,9,10) ENGINE = TokuDB,
- PARTITION p1 VALUES IN (11,12,13,14,15,16,17,18,19,20) ENGINE = TokuDB,
- PARTITION p2 VALUES IN (21,22,23,24,25,26,27,28,29,30) ENGINE = TokuDB,
- PARTITION p3 VALUES IN (31,32,33,34,35,36,37,38,39,40) ENGINE = TokuDB,
- PARTITION p4 VALUES IN (41,42,43,44,45,46,47,48,49,50) ENGINE = TokuDB,
- PARTITION p5 VALUES IN (51,52,53,54,55,56,57,58,59,60) ENGINE = TokuDB) */
+(PARTITION `p0` VALUES IN (1,2,3,4,5,6,7,8,9,10) ENGINE = TokuDB,
+ PARTITION `p1` VALUES IN (11,12,13,14,15,16,17,18,19,20) ENGINE = TokuDB,
+ PARTITION `p2` VALUES IN (21,22,23,24,25,26,27,28,29,30) ENGINE = TokuDB,
+ PARTITION `p3` VALUES IN (31,32,33,34,35,36,37,38,39,40) ENGINE = TokuDB,
+ PARTITION `p4` VALUES IN (41,42,43,44,45,46,47,48,49,50) ENGINE = TokuDB,
+ PARTITION `p5` VALUES IN (51,52,53,54,55,56,57,58,59,60) ENGINE = TokuDB)
select * from t55 order by colint;
colint col1
1 2006-02-05
@@ -4184,15 +4184,15 @@ t55 CREATE TABLE `t55` (
`colint` int(11) DEFAULT NULL,
`col1` date DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (colint)
-SUBPARTITION BY HASH (dayofmonth(col1))
+ PARTITION BY LIST (`colint`)
+SUBPARTITION BY HASH (dayofmonth(`col1`))
SUBPARTITIONS 5
-(PARTITION p0 VALUES IN (1,2,3,4,5,6,7,8,9,10) ENGINE = TokuDB,
- PARTITION p1 VALUES IN (11,12,13,14,15,16,17,18,19,20) ENGINE = TokuDB,
- PARTITION p2 VALUES IN (21,22,23,24,25,26,27,28,29,30) ENGINE = TokuDB,
- PARTITION p3 VALUES IN (31,32,33,34,35,36,37,38,39,40) ENGINE = TokuDB,
- PARTITION p4 VALUES IN (41,42,43,44,45,46,47,48,49,50) ENGINE = TokuDB,
- PARTITION p5 VALUES IN (51,52,53,54,55,56,57,58,59,60) ENGINE = TokuDB) */
+(PARTITION `p0` VALUES IN (1,2,3,4,5,6,7,8,9,10) ENGINE = TokuDB,
+ PARTITION `p1` VALUES IN (11,12,13,14,15,16,17,18,19,20) ENGINE = TokuDB,
+ PARTITION `p2` VALUES IN (21,22,23,24,25,26,27,28,29,30) ENGINE = TokuDB,
+ PARTITION `p3` VALUES IN (31,32,33,34,35,36,37,38,39,40) ENGINE = TokuDB,
+ PARTITION `p4` VALUES IN (41,42,43,44,45,46,47,48,49,50) ENGINE = TokuDB,
+ PARTITION `p5` VALUES IN (51,52,53,54,55,56,57,58,59,60) ENGINE = TokuDB)
select * from t55 order by colint;
colint col1
1 2006-02-05
@@ -4701,15 +4701,15 @@ t55 CREATE TABLE `t55` (
`colint` int(11) DEFAULT NULL,
`col1` date DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (colint)
-SUBPARTITION BY HASH (dayofweek(col1))
+ PARTITION BY LIST (`colint`)
+SUBPARTITION BY HASH (dayofweek(`col1`))
SUBPARTITIONS 5
-(PARTITION p0 VALUES IN (1,2,3,4,5,6,7,8,9,10) ENGINE = TokuDB,
- PARTITION p1 VALUES IN (11,12,13,14,15,16,17,18,19,20) ENGINE = TokuDB,
- PARTITION p2 VALUES IN (21,22,23,24,25,26,27,28,29,30) ENGINE = TokuDB,
- PARTITION p3 VALUES IN (31,32,33,34,35,36,37,38,39,40) ENGINE = TokuDB,
- PARTITION p4 VALUES IN (41,42,43,44,45,46,47,48,49,50) ENGINE = TokuDB,
- PARTITION p5 VALUES IN (51,52,53,54,55,56,57,58,59,60) ENGINE = TokuDB) */
+(PARTITION `p0` VALUES IN (1,2,3,4,5,6,7,8,9,10) ENGINE = TokuDB,
+ PARTITION `p1` VALUES IN (11,12,13,14,15,16,17,18,19,20) ENGINE = TokuDB,
+ PARTITION `p2` VALUES IN (21,22,23,24,25,26,27,28,29,30) ENGINE = TokuDB,
+ PARTITION `p3` VALUES IN (31,32,33,34,35,36,37,38,39,40) ENGINE = TokuDB,
+ PARTITION `p4` VALUES IN (41,42,43,44,45,46,47,48,49,50) ENGINE = TokuDB,
+ PARTITION `p5` VALUES IN (51,52,53,54,55,56,57,58,59,60) ENGINE = TokuDB)
select * from t55 order by colint;
colint col1
1 2006-02-03
@@ -5230,15 +5230,15 @@ t55 CREATE TABLE `t55` (
`colint` int(11) DEFAULT NULL,
`col1` date DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (colint)
-SUBPARTITION BY HASH (dayofyear(col1))
+ PARTITION BY LIST (`colint`)
+SUBPARTITION BY HASH (dayofyear(`col1`))
SUBPARTITIONS 5
-(PARTITION p0 VALUES IN (1,2,3,4,5,6,7,8,9,10) ENGINE = TokuDB,
- PARTITION p1 VALUES IN (11,12,13,14,15,16,17,18,19,20) ENGINE = TokuDB,
- PARTITION p2 VALUES IN (21,22,23,24,25,26,27,28,29,30) ENGINE = TokuDB,
- PARTITION p3 VALUES IN (31,32,33,34,35,36,37,38,39,40) ENGINE = TokuDB,
- PARTITION p4 VALUES IN (41,42,43,44,45,46,47,48,49,50) ENGINE = TokuDB,
- PARTITION p5 VALUES IN (51,52,53,54,55,56,57,58,59,60) ENGINE = TokuDB) */
+(PARTITION `p0` VALUES IN (1,2,3,4,5,6,7,8,9,10) ENGINE = TokuDB,
+ PARTITION `p1` VALUES IN (11,12,13,14,15,16,17,18,19,20) ENGINE = TokuDB,
+ PARTITION `p2` VALUES IN (21,22,23,24,25,26,27,28,29,30) ENGINE = TokuDB,
+ PARTITION `p3` VALUES IN (31,32,33,34,35,36,37,38,39,40) ENGINE = TokuDB,
+ PARTITION `p4` VALUES IN (41,42,43,44,45,46,47,48,49,50) ENGINE = TokuDB,
+ PARTITION `p5` VALUES IN (51,52,53,54,55,56,57,58,59,60) ENGINE = TokuDB)
select * from t55 order by colint;
colint col1
1 2006-02-03
@@ -5749,15 +5749,15 @@ t55 CREATE TABLE `t55` (
`colint` int(11) DEFAULT NULL,
`col1` date DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (colint)
-SUBPARTITION BY HASH (extract(month from col1))
+ PARTITION BY LIST (`colint`)
+SUBPARTITION BY HASH (extract(month from `col1`))
SUBPARTITIONS 5
-(PARTITION p0 VALUES IN (1,2,3,4,5,6,7,8,9,10) ENGINE = TokuDB,
- PARTITION p1 VALUES IN (11,12,13,14,15,16,17,18,19,20) ENGINE = TokuDB,
- PARTITION p2 VALUES IN (21,22,23,24,25,26,27,28,29,30) ENGINE = TokuDB,
- PARTITION p3 VALUES IN (31,32,33,34,35,36,37,38,39,40) ENGINE = TokuDB,
- PARTITION p4 VALUES IN (41,42,43,44,45,46,47,48,49,50) ENGINE = TokuDB,
- PARTITION p5 VALUES IN (51,52,53,54,55,56,57,58,59,60) ENGINE = TokuDB) */
+(PARTITION `p0` VALUES IN (1,2,3,4,5,6,7,8,9,10) ENGINE = TokuDB,
+ PARTITION `p1` VALUES IN (11,12,13,14,15,16,17,18,19,20) ENGINE = TokuDB,
+ PARTITION `p2` VALUES IN (21,22,23,24,25,26,27,28,29,30) ENGINE = TokuDB,
+ PARTITION `p3` VALUES IN (31,32,33,34,35,36,37,38,39,40) ENGINE = TokuDB,
+ PARTITION `p4` VALUES IN (41,42,43,44,45,46,47,48,49,50) ENGINE = TokuDB,
+ PARTITION `p5` VALUES IN (51,52,53,54,55,56,57,58,59,60) ENGINE = TokuDB)
select * from t55 order by colint;
colint col1
1 2006-02-03
@@ -6268,15 +6268,15 @@ t55 CREATE TABLE `t55` (
`colint` int(11) DEFAULT NULL,
`col1` time DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (colint)
-SUBPARTITION BY HASH (hour(col1))
+ PARTITION BY LIST (`colint`)
+SUBPARTITION BY HASH (hour(`col1`))
SUBPARTITIONS 5
-(PARTITION p0 VALUES IN (1,2,3,4,5,6,7,8,9,10) ENGINE = TokuDB,
- PARTITION p1 VALUES IN (11,12,13,14,15,16,17,18,19,20) ENGINE = TokuDB,
- PARTITION p2 VALUES IN (21,22,23,24,25,26,27,28,29,30) ENGINE = TokuDB,
- PARTITION p3 VALUES IN (31,32,33,34,35,36,37,38,39,40) ENGINE = TokuDB,
- PARTITION p4 VALUES IN (41,42,43,44,45,46,47,48,49,50) ENGINE = TokuDB,
- PARTITION p5 VALUES IN (51,52,53,54,55,56,57,58,59,60) ENGINE = TokuDB) */
+(PARTITION `p0` VALUES IN (1,2,3,4,5,6,7,8,9,10) ENGINE = TokuDB,
+ PARTITION `p1` VALUES IN (11,12,13,14,15,16,17,18,19,20) ENGINE = TokuDB,
+ PARTITION `p2` VALUES IN (21,22,23,24,25,26,27,28,29,30) ENGINE = TokuDB,
+ PARTITION `p3` VALUES IN (31,32,33,34,35,36,37,38,39,40) ENGINE = TokuDB,
+ PARTITION `p4` VALUES IN (41,42,43,44,45,46,47,48,49,50) ENGINE = TokuDB,
+ PARTITION `p5` VALUES IN (51,52,53,54,55,56,57,58,59,60) ENGINE = TokuDB)
select * from t55 order by colint;
colint col1
1 09:09:15
@@ -6793,15 +6793,15 @@ t55 CREATE TABLE `t55` (
`colint` int(11) DEFAULT NULL,
`col1` time(6) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (colint)
-SUBPARTITION BY HASH (microsecond(col1))
+ PARTITION BY LIST (`colint`)
+SUBPARTITION BY HASH (microsecond(`col1`))
SUBPARTITIONS 5
-(PARTITION p0 VALUES IN (1,2,3,4,5,6,7,8,9,10) ENGINE = TokuDB,
- PARTITION p1 VALUES IN (11,12,13,14,15,16,17,18,19,20) ENGINE = TokuDB,
- PARTITION p2 VALUES IN (21,22,23,24,25,26,27,28,29,30) ENGINE = TokuDB,
- PARTITION p3 VALUES IN (31,32,33,34,35,36,37,38,39,40) ENGINE = TokuDB,
- PARTITION p4 VALUES IN (41,42,43,44,45,46,47,48,49,50) ENGINE = TokuDB,
- PARTITION p5 VALUES IN (51,52,53,54,55,56,57,58,59,60) ENGINE = TokuDB) */
+(PARTITION `p0` VALUES IN (1,2,3,4,5,6,7,8,9,10) ENGINE = TokuDB,
+ PARTITION `p1` VALUES IN (11,12,13,14,15,16,17,18,19,20) ENGINE = TokuDB,
+ PARTITION `p2` VALUES IN (21,22,23,24,25,26,27,28,29,30) ENGINE = TokuDB,
+ PARTITION `p3` VALUES IN (31,32,33,34,35,36,37,38,39,40) ENGINE = TokuDB,
+ PARTITION `p4` VALUES IN (41,42,43,44,45,46,47,48,49,50) ENGINE = TokuDB,
+ PARTITION `p5` VALUES IN (51,52,53,54,55,56,57,58,59,60) ENGINE = TokuDB)
select * from t55 order by colint;
colint col1
1 05:30:34.000037
@@ -7314,15 +7314,15 @@ t55 CREATE TABLE `t55` (
`colint` int(11) DEFAULT NULL,
`col1` time DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (colint)
-SUBPARTITION BY HASH (minute(col1))
+ PARTITION BY LIST (`colint`)
+SUBPARTITION BY HASH (minute(`col1`))
SUBPARTITIONS 5
-(PARTITION p0 VALUES IN (1,2,3,4,5,6,7,8,9,10) ENGINE = TokuDB,
- PARTITION p1 VALUES IN (11,12,13,14,15,16,17,18,19,20) ENGINE = TokuDB,
- PARTITION p2 VALUES IN (21,22,23,24,25,26,27,28,29,30) ENGINE = TokuDB,
- PARTITION p3 VALUES IN (31,32,33,34,35,36,37,38,39,40) ENGINE = TokuDB,
- PARTITION p4 VALUES IN (41,42,43,44,45,46,47,48,49,50) ENGINE = TokuDB,
- PARTITION p5 VALUES IN (51,52,53,54,55,56,57,58,59,60) ENGINE = TokuDB) */
+(PARTITION `p0` VALUES IN (1,2,3,4,5,6,7,8,9,10) ENGINE = TokuDB,
+ PARTITION `p1` VALUES IN (11,12,13,14,15,16,17,18,19,20) ENGINE = TokuDB,
+ PARTITION `p2` VALUES IN (21,22,23,24,25,26,27,28,29,30) ENGINE = TokuDB,
+ PARTITION `p3` VALUES IN (31,32,33,34,35,36,37,38,39,40) ENGINE = TokuDB,
+ PARTITION `p4` VALUES IN (41,42,43,44,45,46,47,48,49,50) ENGINE = TokuDB,
+ PARTITION `p5` VALUES IN (51,52,53,54,55,56,57,58,59,60) ENGINE = TokuDB)
select * from t55 order by colint;
colint col1
1 10:24:23
@@ -7845,15 +7845,15 @@ t55 CREATE TABLE `t55` (
`colint` int(11) DEFAULT NULL,
`col1` time DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (colint)
-SUBPARTITION BY HASH (second(col1))
+ PARTITION BY LIST (`colint`)
+SUBPARTITION BY HASH (second(`col1`))
SUBPARTITIONS 5
-(PARTITION p0 VALUES IN (1,2,3,4,5,6,7,8,9,10) ENGINE = TokuDB,
- PARTITION p1 VALUES IN (11,12,13,14,15,16,17,18,19,20) ENGINE = TokuDB,
- PARTITION p2 VALUES IN (21,22,23,24,25,26,27,28,29,30) ENGINE = TokuDB,
- PARTITION p3 VALUES IN (31,32,33,34,35,36,37,38,39,40) ENGINE = TokuDB,
- PARTITION p4 VALUES IN (41,42,43,44,45,46,47,48,49,50) ENGINE = TokuDB,
- PARTITION p5 VALUES IN (51,52,53,54,55,56,57,58,59,60) ENGINE = TokuDB) */
+(PARTITION `p0` VALUES IN (1,2,3,4,5,6,7,8,9,10) ENGINE = TokuDB,
+ PARTITION `p1` VALUES IN (11,12,13,14,15,16,17,18,19,20) ENGINE = TokuDB,
+ PARTITION `p2` VALUES IN (21,22,23,24,25,26,27,28,29,30) ENGINE = TokuDB,
+ PARTITION `p3` VALUES IN (31,32,33,34,35,36,37,38,39,40) ENGINE = TokuDB,
+ PARTITION `p4` VALUES IN (41,42,43,44,45,46,47,48,49,50) ENGINE = TokuDB,
+ PARTITION `p5` VALUES IN (51,52,53,54,55,56,57,58,59,60) ENGINE = TokuDB)
select * from t55 order by colint;
colint col1
1 09:09:15
@@ -8376,15 +8376,15 @@ t55 CREATE TABLE `t55` (
`colint` int(11) DEFAULT NULL,
`col1` date DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (colint)
-SUBPARTITION BY HASH (month(col1))
+ PARTITION BY LIST (`colint`)
+SUBPARTITION BY HASH (month(`col1`))
SUBPARTITIONS 5
-(PARTITION p0 VALUES IN (1,2,3,4,5,6,7,8,9,10) ENGINE = TokuDB,
- PARTITION p1 VALUES IN (11,12,13,14,15,16,17,18,19,20) ENGINE = TokuDB,
- PARTITION p2 VALUES IN (21,22,23,24,25,26,27,28,29,30) ENGINE = TokuDB,
- PARTITION p3 VALUES IN (31,32,33,34,35,36,37,38,39,40) ENGINE = TokuDB,
- PARTITION p4 VALUES IN (41,42,43,44,45,46,47,48,49,50) ENGINE = TokuDB,
- PARTITION p5 VALUES IN (51,52,53,54,55,56,57,58,59,60) ENGINE = TokuDB) */
+(PARTITION `p0` VALUES IN (1,2,3,4,5,6,7,8,9,10) ENGINE = TokuDB,
+ PARTITION `p1` VALUES IN (11,12,13,14,15,16,17,18,19,20) ENGINE = TokuDB,
+ PARTITION `p2` VALUES IN (21,22,23,24,25,26,27,28,29,30) ENGINE = TokuDB,
+ PARTITION `p3` VALUES IN (31,32,33,34,35,36,37,38,39,40) ENGINE = TokuDB,
+ PARTITION `p4` VALUES IN (41,42,43,44,45,46,47,48,49,50) ENGINE = TokuDB,
+ PARTITION `p5` VALUES IN (51,52,53,54,55,56,57,58,59,60) ENGINE = TokuDB)
select * from t55 order by colint;
colint col1
1 2006-02-03
@@ -8901,15 +8901,15 @@ t55 CREATE TABLE `t55` (
`colint` int(11) DEFAULT NULL,
`col1` date DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (colint)
-SUBPARTITION BY HASH (quarter(col1))
+ PARTITION BY LIST (`colint`)
+SUBPARTITION BY HASH (quarter(`col1`))
SUBPARTITIONS 5
-(PARTITION p0 VALUES IN (1,2,3,4,5,6,7,8,9,10) ENGINE = TokuDB,
- PARTITION p1 VALUES IN (11,12,13,14,15,16,17,18,19,20) ENGINE = TokuDB,
- PARTITION p2 VALUES IN (21,22,23,24,25,26,27,28,29,30) ENGINE = TokuDB,
- PARTITION p3 VALUES IN (31,32,33,34,35,36,37,38,39,40) ENGINE = TokuDB,
- PARTITION p4 VALUES IN (41,42,43,44,45,46,47,48,49,50) ENGINE = TokuDB,
- PARTITION p5 VALUES IN (51,52,53,54,55,56,57,58,59,60) ENGINE = TokuDB) */
+(PARTITION `p0` VALUES IN (1,2,3,4,5,6,7,8,9,10) ENGINE = TokuDB,
+ PARTITION `p1` VALUES IN (11,12,13,14,15,16,17,18,19,20) ENGINE = TokuDB,
+ PARTITION `p2` VALUES IN (21,22,23,24,25,26,27,28,29,30) ENGINE = TokuDB,
+ PARTITION `p3` VALUES IN (31,32,33,34,35,36,37,38,39,40) ENGINE = TokuDB,
+ PARTITION `p4` VALUES IN (41,42,43,44,45,46,47,48,49,50) ENGINE = TokuDB,
+ PARTITION `p5` VALUES IN (51,52,53,54,55,56,57,58,59,60) ENGINE = TokuDB)
select * from t55 order by colint;
colint col1
1 2006-02-03
@@ -9424,15 +9424,15 @@ t55 CREATE TABLE `t55` (
`colint` int(11) DEFAULT NULL,
`col1` date DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (colint)
-SUBPARTITION BY HASH (weekday(col1))
+ PARTITION BY LIST (`colint`)
+SUBPARTITION BY HASH (weekday(`col1`))
SUBPARTITIONS 5
-(PARTITION p0 VALUES IN (1,2,3,4,5,6,7,8,9,10) ENGINE = TokuDB,
- PARTITION p1 VALUES IN (11,12,13,14,15,16,17,18,19,20) ENGINE = TokuDB,
- PARTITION p2 VALUES IN (21,22,23,24,25,26,27,28,29,30) ENGINE = TokuDB,
- PARTITION p3 VALUES IN (31,32,33,34,35,36,37,38,39,40) ENGINE = TokuDB,
- PARTITION p4 VALUES IN (41,42,43,44,45,46,47,48,49,50) ENGINE = TokuDB,
- PARTITION p5 VALUES IN (51,52,53,54,55,56,57,58,59,60) ENGINE = TokuDB) */
+(PARTITION `p0` VALUES IN (1,2,3,4,5,6,7,8,9,10) ENGINE = TokuDB,
+ PARTITION `p1` VALUES IN (11,12,13,14,15,16,17,18,19,20) ENGINE = TokuDB,
+ PARTITION `p2` VALUES IN (21,22,23,24,25,26,27,28,29,30) ENGINE = TokuDB,
+ PARTITION `p3` VALUES IN (31,32,33,34,35,36,37,38,39,40) ENGINE = TokuDB,
+ PARTITION `p4` VALUES IN (41,42,43,44,45,46,47,48,49,50) ENGINE = TokuDB,
+ PARTITION `p5` VALUES IN (51,52,53,54,55,56,57,58,59,60) ENGINE = TokuDB)
select * from t55 order by colint;
colint col1
1 2006-02-03
@@ -9945,15 +9945,15 @@ t55 CREATE TABLE `t55` (
`colint` int(11) DEFAULT NULL,
`col1` date DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (colint)
-SUBPARTITION BY HASH (year(col1)-1990)
+ PARTITION BY LIST (`colint`)
+SUBPARTITION BY HASH (year(`col1`) - 1990)
SUBPARTITIONS 5
-(PARTITION p0 VALUES IN (1,2,3,4,5,6,7,8,9,10) ENGINE = TokuDB,
- PARTITION p1 VALUES IN (11,12,13,14,15,16,17,18,19,20) ENGINE = TokuDB,
- PARTITION p2 VALUES IN (21,22,23,24,25,26,27,28,29,30) ENGINE = TokuDB,
- PARTITION p3 VALUES IN (31,32,33,34,35,36,37,38,39,40) ENGINE = TokuDB,
- PARTITION p4 VALUES IN (41,42,43,44,45,46,47,48,49,50) ENGINE = TokuDB,
- PARTITION p5 VALUES IN (51,52,53,54,55,56,57,58,59,60) ENGINE = TokuDB) */
+(PARTITION `p0` VALUES IN (1,2,3,4,5,6,7,8,9,10) ENGINE = TokuDB,
+ PARTITION `p1` VALUES IN (11,12,13,14,15,16,17,18,19,20) ENGINE = TokuDB,
+ PARTITION `p2` VALUES IN (21,22,23,24,25,26,27,28,29,30) ENGINE = TokuDB,
+ PARTITION `p3` VALUES IN (31,32,33,34,35,36,37,38,39,40) ENGINE = TokuDB,
+ PARTITION `p4` VALUES IN (41,42,43,44,45,46,47,48,49,50) ENGINE = TokuDB,
+ PARTITION `p5` VALUES IN (51,52,53,54,55,56,57,58,59,60) ENGINE = TokuDB)
select * from t55 order by colint;
colint col1
1 2006-02-03
@@ -10470,15 +10470,15 @@ t55 CREATE TABLE `t55` (
`colint` int(11) DEFAULT NULL,
`col1` date DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (colint)
-SUBPARTITION BY HASH (yearweek(col1)-200600)
+ PARTITION BY LIST (`colint`)
+SUBPARTITION BY HASH (yearweek(`col1`,0) - 200600)
SUBPARTITIONS 5
-(PARTITION p0 VALUES IN (1,2,3,4,5,6,7,8,9,10) ENGINE = TokuDB,
- PARTITION p1 VALUES IN (11,12,13,14,15,16,17,18,19,20) ENGINE = TokuDB,
- PARTITION p2 VALUES IN (21,22,23,24,25,26,27,28,29,30) ENGINE = TokuDB,
- PARTITION p3 VALUES IN (31,32,33,34,35,36,37,38,39,40) ENGINE = TokuDB,
- PARTITION p4 VALUES IN (41,42,43,44,45,46,47,48,49,50) ENGINE = TokuDB,
- PARTITION p5 VALUES IN (51,52,53,54,55,56,57,58,59,60) ENGINE = TokuDB) */
+(PARTITION `p0` VALUES IN (1,2,3,4,5,6,7,8,9,10) ENGINE = TokuDB,
+ PARTITION `p1` VALUES IN (11,12,13,14,15,16,17,18,19,20) ENGINE = TokuDB,
+ PARTITION `p2` VALUES IN (21,22,23,24,25,26,27,28,29,30) ENGINE = TokuDB,
+ PARTITION `p3` VALUES IN (31,32,33,34,35,36,37,38,39,40) ENGINE = TokuDB,
+ PARTITION `p4` VALUES IN (41,42,43,44,45,46,47,48,49,50) ENGINE = TokuDB,
+ PARTITION `p5` VALUES IN (51,52,53,54,55,56,57,58,59,60) ENGINE = TokuDB)
select * from t55 order by colint;
colint col1
1 2006-02-03
diff --git a/storage/tokudb/mysql-test/tokudb_parts/r/partition_alter1_1_2_tokudb.result b/storage/tokudb/mysql-test/tokudb_parts/r/partition_alter1_1_2_tokudb.result
index 3fb51c67d00..3d944dccdb1 100644
--- a/storage/tokudb/mysql-test/tokudb_parts/r/partition_alter1_1_2_tokudb.result
+++ b/storage/tokudb/mysql-test/tokudb_parts/r/partition_alter1_1_2_tokudb.result
@@ -68,15 +68,15 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) NOT NULL DEFAULT '0',
- `f_int2` int(11) NOT NULL DEFAULT '0',
+ `f_int1` int(11) NOT NULL DEFAULT 0,
+ `f_int2` int(11) NOT NULL DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL,
PRIMARY KEY (`f_int1`,`f_int2`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY HASH (f_int1 + f_int2)
-PARTITIONS 2 */
+ PARTITION BY HASH (`f_int1` + `f_int2`)
+PARTITIONS 2
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -560,15 +560,15 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) NOT NULL DEFAULT '0',
- `f_int2` int(11) NOT NULL DEFAULT '0',
+ `f_int1` int(11) NOT NULL DEFAULT 0,
+ `f_int2` int(11) NOT NULL DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL,
PRIMARY KEY (`f_int1`,`f_int2`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY KEY (f_int1,f_int2)
-PARTITIONS 5 */
+ PARTITION BY KEY (`f_int1`,`f_int2`)
+PARTITIONS 5
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -1060,22 +1060,22 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) NOT NULL DEFAULT '0',
- `f_int2` int(11) NOT NULL DEFAULT '0',
+ `f_int1` int(11) NOT NULL DEFAULT 0,
+ `f_int2` int(11) NOT NULL DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL,
PRIMARY KEY (`f_int1`,`f_int2`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (MOD(f_int1 + f_int2,4))
-(PARTITION part_3 VALUES IN (-3) ENGINE = TokuDB,
- PARTITION part_2 VALUES IN (-2) ENGINE = TokuDB,
- PARTITION part_1 VALUES IN (-1) ENGINE = TokuDB,
- PARTITION part_N VALUES IN (NULL) ENGINE = TokuDB,
- PARTITION part0 VALUES IN (0) ENGINE = TokuDB,
- PARTITION part1 VALUES IN (1) ENGINE = TokuDB,
- PARTITION part2 VALUES IN (2) ENGINE = TokuDB,
- PARTITION part3 VALUES IN (3) ENGINE = TokuDB) */
+ PARTITION BY LIST ((`f_int1` + `f_int2`) % 4)
+(PARTITION `part_3` VALUES IN (-3) ENGINE = TokuDB,
+ PARTITION `part_2` VALUES IN (-2) ENGINE = TokuDB,
+ PARTITION `part_1` VALUES IN (-1) ENGINE = TokuDB,
+ PARTITION `part_N` VALUES IN (NULL) ENGINE = TokuDB,
+ PARTITION `part0` VALUES IN (0) ENGINE = TokuDB,
+ PARTITION `part1` VALUES IN (1) ENGINE = TokuDB,
+ PARTITION `part2` VALUES IN (2) ENGINE = TokuDB,
+ PARTITION `part3` VALUES IN (3) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -1565,20 +1565,20 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) NOT NULL DEFAULT '0',
- `f_int2` int(11) NOT NULL DEFAULT '0',
+ `f_int1` int(11) NOT NULL DEFAULT 0,
+ `f_int2` int(11) NOT NULL DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL,
PRIMARY KEY (`f_int1`,`f_int2`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE ((f_int1 + f_int2) DIV 2)
-(PARTITION parta VALUES LESS THAN (0) ENGINE = TokuDB,
- PARTITION partb VALUES LESS THAN (5) ENGINE = TokuDB,
- PARTITION partc VALUES LESS THAN (10) ENGINE = TokuDB,
- PARTITION partd VALUES LESS THAN (15) ENGINE = TokuDB,
- PARTITION parte VALUES LESS THAN (20) ENGINE = TokuDB,
- PARTITION partf VALUES LESS THAN (2147483646) ENGINE = TokuDB) */
+ PARTITION BY RANGE ((`f_int1` + `f_int2`) DIV 2)
+(PARTITION `parta` VALUES LESS THAN (0) ENGINE = TokuDB,
+ PARTITION `partb` VALUES LESS THAN (5) ENGINE = TokuDB,
+ PARTITION `partc` VALUES LESS THAN (10) ENGINE = TokuDB,
+ PARTITION `partd` VALUES LESS THAN (15) ENGINE = TokuDB,
+ PARTITION `parte` VALUES LESS THAN (20) ENGINE = TokuDB,
+ PARTITION `partf` VALUES LESS THAN (2147483646) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -2064,20 +2064,20 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) NOT NULL DEFAULT '0',
- `f_int2` int(11) NOT NULL DEFAULT '0',
+ `f_int1` int(11) NOT NULL DEFAULT 0,
+ `f_int2` int(11) NOT NULL DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL,
PRIMARY KEY (`f_int1`,`f_int2`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (f_int1)
-SUBPARTITION BY HASH (f_int2)
+ PARTITION BY RANGE (`f_int1`)
+SUBPARTITION BY HASH (`f_int2`)
SUBPARTITIONS 2
-(PARTITION parta VALUES LESS THAN (0) ENGINE = TokuDB,
- PARTITION partb VALUES LESS THAN (5) ENGINE = TokuDB,
- PARTITION partc VALUES LESS THAN (10) ENGINE = TokuDB,
- PARTITION partd VALUES LESS THAN (2147483646) ENGINE = TokuDB) */
+(PARTITION `parta` VALUES LESS THAN (0) ENGINE = TokuDB,
+ PARTITION `partb` VALUES LESS THAN (5) ENGINE = TokuDB,
+ PARTITION `partc` VALUES LESS THAN (10) ENGINE = TokuDB,
+ PARTITION `partd` VALUES LESS THAN (2147483646) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -2567,27 +2567,27 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) NOT NULL DEFAULT '0',
- `f_int2` int(11) NOT NULL DEFAULT '0',
+ `f_int1` int(11) NOT NULL DEFAULT 0,
+ `f_int2` int(11) NOT NULL DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL,
PRIMARY KEY (`f_int1`,`f_int2`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (f_int1)
-SUBPARTITION BY KEY (f_int2)
-(PARTITION part1 VALUES LESS THAN (0)
- (SUBPARTITION subpart11 ENGINE = TokuDB,
- SUBPARTITION subpart12 ENGINE = TokuDB),
- PARTITION part2 VALUES LESS THAN (5)
- (SUBPARTITION subpart21 ENGINE = TokuDB,
- SUBPARTITION subpart22 ENGINE = TokuDB),
- PARTITION part3 VALUES LESS THAN (10)
- (SUBPARTITION subpart31 ENGINE = TokuDB,
- SUBPARTITION subpart32 ENGINE = TokuDB),
- PARTITION part4 VALUES LESS THAN (2147483646)
- (SUBPARTITION subpart41 ENGINE = TokuDB,
- SUBPARTITION subpart42 ENGINE = TokuDB)) */
+ PARTITION BY RANGE (`f_int1`)
+SUBPARTITION BY KEY (`f_int2`)
+(PARTITION `part1` VALUES LESS THAN (0)
+ (SUBPARTITION `subpart11` ENGINE = TokuDB,
+ SUBPARTITION `subpart12` ENGINE = TokuDB),
+ PARTITION `part2` VALUES LESS THAN (5)
+ (SUBPARTITION `subpart21` ENGINE = TokuDB,
+ SUBPARTITION `subpart22` ENGINE = TokuDB),
+ PARTITION `part3` VALUES LESS THAN (10)
+ (SUBPARTITION `subpart31` ENGINE = TokuDB,
+ SUBPARTITION `subpart32` ENGINE = TokuDB),
+ PARTITION `part4` VALUES LESS THAN (2147483646)
+ (SUBPARTITION `subpart41` ENGINE = TokuDB,
+ SUBPARTITION `subpart42` ENGINE = TokuDB))
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -3077,27 +3077,27 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) NOT NULL DEFAULT '0',
- `f_int2` int(11) NOT NULL DEFAULT '0',
+ `f_int1` int(11) NOT NULL DEFAULT 0,
+ `f_int2` int(11) NOT NULL DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL,
PRIMARY KEY (`f_int1`,`f_int2`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (ABS(MOD(f_int1,3)))
-SUBPARTITION BY HASH (f_int2 + 1)
-(PARTITION part1 VALUES IN (0)
- (SUBPARTITION sp11 ENGINE = TokuDB,
- SUBPARTITION sp12 ENGINE = TokuDB),
- PARTITION part2 VALUES IN (1)
- (SUBPARTITION sp21 ENGINE = TokuDB,
- SUBPARTITION sp22 ENGINE = TokuDB),
- PARTITION part3 VALUES IN (2)
- (SUBPARTITION sp31 ENGINE = TokuDB,
- SUBPARTITION sp32 ENGINE = TokuDB),
- PARTITION part4 VALUES IN (NULL)
- (SUBPARTITION sp41 ENGINE = TokuDB,
- SUBPARTITION sp42 ENGINE = TokuDB)) */
+ PARTITION BY LIST (abs(`f_int1` % 3))
+SUBPARTITION BY HASH (`f_int2` + 1)
+(PARTITION `part1` VALUES IN (0)
+ (SUBPARTITION `sp11` ENGINE = TokuDB,
+ SUBPARTITION `sp12` ENGINE = TokuDB),
+ PARTITION `part2` VALUES IN (1)
+ (SUBPARTITION `sp21` ENGINE = TokuDB,
+ SUBPARTITION `sp22` ENGINE = TokuDB),
+ PARTITION `part3` VALUES IN (2)
+ (SUBPARTITION `sp31` ENGINE = TokuDB,
+ SUBPARTITION `sp32` ENGINE = TokuDB),
+ PARTITION `part4` VALUES IN (NULL)
+ (SUBPARTITION `sp41` ENGINE = TokuDB,
+ SUBPARTITION `sp42` ENGINE = TokuDB))
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -3585,19 +3585,19 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) NOT NULL DEFAULT '0',
- `f_int2` int(11) NOT NULL DEFAULT '0',
+ `f_int1` int(11) NOT NULL DEFAULT 0,
+ `f_int2` int(11) NOT NULL DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL,
PRIMARY KEY (`f_int1`,`f_int2`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (ABS(MOD(f_int1,2)))
-SUBPARTITION BY KEY (f_int2)
+ PARTITION BY LIST (abs(`f_int1` % 2))
+SUBPARTITION BY KEY (`f_int2`)
SUBPARTITIONS 3
-(PARTITION part1 VALUES IN (0) ENGINE = TokuDB,
- PARTITION part2 VALUES IN (1) ENGINE = TokuDB,
- PARTITION part3 VALUES IN (NULL) ENGINE = TokuDB) */
+(PARTITION `part1` VALUES IN (0) ENGINE = TokuDB,
+ PARTITION `part2` VALUES IN (1) ENGINE = TokuDB,
+ PARTITION `part3` VALUES IN (NULL) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -4082,15 +4082,15 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) NOT NULL DEFAULT '0',
- `f_int2` int(11) NOT NULL DEFAULT '0',
+ `f_int1` int(11) NOT NULL DEFAULT 0,
+ `f_int2` int(11) NOT NULL DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL,
PRIMARY KEY (`f_int2`,`f_int1`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY HASH (f_int1 + f_int2)
-PARTITIONS 2 */
+ PARTITION BY HASH (`f_int1` + `f_int2`)
+PARTITIONS 2
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -4574,15 +4574,15 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) NOT NULL DEFAULT '0',
- `f_int2` int(11) NOT NULL DEFAULT '0',
+ `f_int1` int(11) NOT NULL DEFAULT 0,
+ `f_int2` int(11) NOT NULL DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL,
PRIMARY KEY (`f_int2`,`f_int1`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY KEY (f_int1,f_int2)
-PARTITIONS 5 */
+ PARTITION BY KEY (`f_int1`,`f_int2`)
+PARTITIONS 5
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -5074,22 +5074,22 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) NOT NULL DEFAULT '0',
- `f_int2` int(11) NOT NULL DEFAULT '0',
+ `f_int1` int(11) NOT NULL DEFAULT 0,
+ `f_int2` int(11) NOT NULL DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL,
PRIMARY KEY (`f_int2`,`f_int1`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (MOD(f_int1 + f_int2,4))
-(PARTITION part_3 VALUES IN (-3) ENGINE = TokuDB,
- PARTITION part_2 VALUES IN (-2) ENGINE = TokuDB,
- PARTITION part_1 VALUES IN (-1) ENGINE = TokuDB,
- PARTITION part_N VALUES IN (NULL) ENGINE = TokuDB,
- PARTITION part0 VALUES IN (0) ENGINE = TokuDB,
- PARTITION part1 VALUES IN (1) ENGINE = TokuDB,
- PARTITION part2 VALUES IN (2) ENGINE = TokuDB,
- PARTITION part3 VALUES IN (3) ENGINE = TokuDB) */
+ PARTITION BY LIST ((`f_int1` + `f_int2`) % 4)
+(PARTITION `part_3` VALUES IN (-3) ENGINE = TokuDB,
+ PARTITION `part_2` VALUES IN (-2) ENGINE = TokuDB,
+ PARTITION `part_1` VALUES IN (-1) ENGINE = TokuDB,
+ PARTITION `part_N` VALUES IN (NULL) ENGINE = TokuDB,
+ PARTITION `part0` VALUES IN (0) ENGINE = TokuDB,
+ PARTITION `part1` VALUES IN (1) ENGINE = TokuDB,
+ PARTITION `part2` VALUES IN (2) ENGINE = TokuDB,
+ PARTITION `part3` VALUES IN (3) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -5579,20 +5579,20 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) NOT NULL DEFAULT '0',
- `f_int2` int(11) NOT NULL DEFAULT '0',
+ `f_int1` int(11) NOT NULL DEFAULT 0,
+ `f_int2` int(11) NOT NULL DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL,
PRIMARY KEY (`f_int2`,`f_int1`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE ((f_int1 + f_int2) DIV 2)
-(PARTITION parta VALUES LESS THAN (0) ENGINE = TokuDB,
- PARTITION partb VALUES LESS THAN (5) ENGINE = TokuDB,
- PARTITION partc VALUES LESS THAN (10) ENGINE = TokuDB,
- PARTITION partd VALUES LESS THAN (15) ENGINE = TokuDB,
- PARTITION parte VALUES LESS THAN (20) ENGINE = TokuDB,
- PARTITION partf VALUES LESS THAN (2147483646) ENGINE = TokuDB) */
+ PARTITION BY RANGE ((`f_int1` + `f_int2`) DIV 2)
+(PARTITION `parta` VALUES LESS THAN (0) ENGINE = TokuDB,
+ PARTITION `partb` VALUES LESS THAN (5) ENGINE = TokuDB,
+ PARTITION `partc` VALUES LESS THAN (10) ENGINE = TokuDB,
+ PARTITION `partd` VALUES LESS THAN (15) ENGINE = TokuDB,
+ PARTITION `parte` VALUES LESS THAN (20) ENGINE = TokuDB,
+ PARTITION `partf` VALUES LESS THAN (2147483646) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -6078,20 +6078,20 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) NOT NULL DEFAULT '0',
- `f_int2` int(11) NOT NULL DEFAULT '0',
+ `f_int1` int(11) NOT NULL DEFAULT 0,
+ `f_int2` int(11) NOT NULL DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL,
PRIMARY KEY (`f_int2`,`f_int1`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (f_int1)
-SUBPARTITION BY HASH (f_int2)
+ PARTITION BY RANGE (`f_int1`)
+SUBPARTITION BY HASH (`f_int2`)
SUBPARTITIONS 2
-(PARTITION parta VALUES LESS THAN (0) ENGINE = TokuDB,
- PARTITION partb VALUES LESS THAN (5) ENGINE = TokuDB,
- PARTITION partc VALUES LESS THAN (10) ENGINE = TokuDB,
- PARTITION partd VALUES LESS THAN (2147483646) ENGINE = TokuDB) */
+(PARTITION `parta` VALUES LESS THAN (0) ENGINE = TokuDB,
+ PARTITION `partb` VALUES LESS THAN (5) ENGINE = TokuDB,
+ PARTITION `partc` VALUES LESS THAN (10) ENGINE = TokuDB,
+ PARTITION `partd` VALUES LESS THAN (2147483646) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -6581,27 +6581,27 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) NOT NULL DEFAULT '0',
- `f_int2` int(11) NOT NULL DEFAULT '0',
+ `f_int1` int(11) NOT NULL DEFAULT 0,
+ `f_int2` int(11) NOT NULL DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL,
PRIMARY KEY (`f_int2`,`f_int1`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (f_int1)
-SUBPARTITION BY KEY (f_int2)
-(PARTITION part1 VALUES LESS THAN (0)
- (SUBPARTITION subpart11 ENGINE = TokuDB,
- SUBPARTITION subpart12 ENGINE = TokuDB),
- PARTITION part2 VALUES LESS THAN (5)
- (SUBPARTITION subpart21 ENGINE = TokuDB,
- SUBPARTITION subpart22 ENGINE = TokuDB),
- PARTITION part3 VALUES LESS THAN (10)
- (SUBPARTITION subpart31 ENGINE = TokuDB,
- SUBPARTITION subpart32 ENGINE = TokuDB),
- PARTITION part4 VALUES LESS THAN (2147483646)
- (SUBPARTITION subpart41 ENGINE = TokuDB,
- SUBPARTITION subpart42 ENGINE = TokuDB)) */
+ PARTITION BY RANGE (`f_int1`)
+SUBPARTITION BY KEY (`f_int2`)
+(PARTITION `part1` VALUES LESS THAN (0)
+ (SUBPARTITION `subpart11` ENGINE = TokuDB,
+ SUBPARTITION `subpart12` ENGINE = TokuDB),
+ PARTITION `part2` VALUES LESS THAN (5)
+ (SUBPARTITION `subpart21` ENGINE = TokuDB,
+ SUBPARTITION `subpart22` ENGINE = TokuDB),
+ PARTITION `part3` VALUES LESS THAN (10)
+ (SUBPARTITION `subpart31` ENGINE = TokuDB,
+ SUBPARTITION `subpart32` ENGINE = TokuDB),
+ PARTITION `part4` VALUES LESS THAN (2147483646)
+ (SUBPARTITION `subpart41` ENGINE = TokuDB,
+ SUBPARTITION `subpart42` ENGINE = TokuDB))
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -7091,27 +7091,27 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) NOT NULL DEFAULT '0',
- `f_int2` int(11) NOT NULL DEFAULT '0',
+ `f_int1` int(11) NOT NULL DEFAULT 0,
+ `f_int2` int(11) NOT NULL DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL,
PRIMARY KEY (`f_int2`,`f_int1`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (ABS(MOD(f_int1,3)))
-SUBPARTITION BY HASH (f_int2 + 1)
-(PARTITION part1 VALUES IN (0)
- (SUBPARTITION sp11 ENGINE = TokuDB,
- SUBPARTITION sp12 ENGINE = TokuDB),
- PARTITION part2 VALUES IN (1)
- (SUBPARTITION sp21 ENGINE = TokuDB,
- SUBPARTITION sp22 ENGINE = TokuDB),
- PARTITION part3 VALUES IN (2)
- (SUBPARTITION sp31 ENGINE = TokuDB,
- SUBPARTITION sp32 ENGINE = TokuDB),
- PARTITION part4 VALUES IN (NULL)
- (SUBPARTITION sp41 ENGINE = TokuDB,
- SUBPARTITION sp42 ENGINE = TokuDB)) */
+ PARTITION BY LIST (abs(`f_int1` % 3))
+SUBPARTITION BY HASH (`f_int2` + 1)
+(PARTITION `part1` VALUES IN (0)
+ (SUBPARTITION `sp11` ENGINE = TokuDB,
+ SUBPARTITION `sp12` ENGINE = TokuDB),
+ PARTITION `part2` VALUES IN (1)
+ (SUBPARTITION `sp21` ENGINE = TokuDB,
+ SUBPARTITION `sp22` ENGINE = TokuDB),
+ PARTITION `part3` VALUES IN (2)
+ (SUBPARTITION `sp31` ENGINE = TokuDB,
+ SUBPARTITION `sp32` ENGINE = TokuDB),
+ PARTITION `part4` VALUES IN (NULL)
+ (SUBPARTITION `sp41` ENGINE = TokuDB,
+ SUBPARTITION `sp42` ENGINE = TokuDB))
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -7599,19 +7599,19 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) NOT NULL DEFAULT '0',
- `f_int2` int(11) NOT NULL DEFAULT '0',
+ `f_int1` int(11) NOT NULL DEFAULT 0,
+ `f_int2` int(11) NOT NULL DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL,
PRIMARY KEY (`f_int2`,`f_int1`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (ABS(MOD(f_int1,2)))
-SUBPARTITION BY KEY (f_int2)
+ PARTITION BY LIST (abs(`f_int1` % 2))
+SUBPARTITION BY KEY (`f_int2`)
SUBPARTITIONS 3
-(PARTITION part1 VALUES IN (0) ENGINE = TokuDB,
- PARTITION part2 VALUES IN (1) ENGINE = TokuDB,
- PARTITION part3 VALUES IN (NULL) ENGINE = TokuDB) */
+(PARTITION `part1` VALUES IN (0) ENGINE = TokuDB,
+ PARTITION `part2` VALUES IN (1) ENGINE = TokuDB,
+ PARTITION `part3` VALUES IN (NULL) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -8097,15 +8097,15 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL,
UNIQUE KEY `uidx1` (`f_int1`,`f_int2`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY HASH (f_int1 + f_int2)
-PARTITIONS 2 */
+ PARTITION BY HASH (`f_int1` + `f_int2`)
+PARTITIONS 2
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -8605,15 +8605,15 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL,
UNIQUE KEY `uidx1` (`f_int1`,`f_int2`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY KEY (f_int1,f_int2)
-PARTITIONS 5 */
+ PARTITION BY KEY (`f_int1`,`f_int2`)
+PARTITIONS 5
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -9121,22 +9121,22 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL,
UNIQUE KEY `uidx1` (`f_int1`,`f_int2`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (MOD(f_int1 + f_int2,4))
-(PARTITION part_3 VALUES IN (-3) ENGINE = TokuDB,
- PARTITION part_2 VALUES IN (-2) ENGINE = TokuDB,
- PARTITION part_1 VALUES IN (-1) ENGINE = TokuDB,
- PARTITION part_N VALUES IN (NULL) ENGINE = TokuDB,
- PARTITION part0 VALUES IN (0) ENGINE = TokuDB,
- PARTITION part1 VALUES IN (1) ENGINE = TokuDB,
- PARTITION part2 VALUES IN (2) ENGINE = TokuDB,
- PARTITION part3 VALUES IN (3) ENGINE = TokuDB) */
+ PARTITION BY LIST ((`f_int1` + `f_int2`) % 4)
+(PARTITION `part_3` VALUES IN (-3) ENGINE = TokuDB,
+ PARTITION `part_2` VALUES IN (-2) ENGINE = TokuDB,
+ PARTITION `part_1` VALUES IN (-1) ENGINE = TokuDB,
+ PARTITION `part_N` VALUES IN (NULL) ENGINE = TokuDB,
+ PARTITION `part0` VALUES IN (0) ENGINE = TokuDB,
+ PARTITION `part1` VALUES IN (1) ENGINE = TokuDB,
+ PARTITION `part2` VALUES IN (2) ENGINE = TokuDB,
+ PARTITION `part3` VALUES IN (3) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -9642,20 +9642,20 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL,
UNIQUE KEY `uidx1` (`f_int1`,`f_int2`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE ((f_int1 + f_int2) DIV 2)
-(PARTITION parta VALUES LESS THAN (0) ENGINE = TokuDB,
- PARTITION partb VALUES LESS THAN (5) ENGINE = TokuDB,
- PARTITION partc VALUES LESS THAN (10) ENGINE = TokuDB,
- PARTITION partd VALUES LESS THAN (15) ENGINE = TokuDB,
- PARTITION parte VALUES LESS THAN (20) ENGINE = TokuDB,
- PARTITION partf VALUES LESS THAN (2147483646) ENGINE = TokuDB) */
+ PARTITION BY RANGE ((`f_int1` + `f_int2`) DIV 2)
+(PARTITION `parta` VALUES LESS THAN (0) ENGINE = TokuDB,
+ PARTITION `partb` VALUES LESS THAN (5) ENGINE = TokuDB,
+ PARTITION `partc` VALUES LESS THAN (10) ENGINE = TokuDB,
+ PARTITION `partd` VALUES LESS THAN (15) ENGINE = TokuDB,
+ PARTITION `parte` VALUES LESS THAN (20) ENGINE = TokuDB,
+ PARTITION `partf` VALUES LESS THAN (2147483646) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -10157,20 +10157,20 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL,
UNIQUE KEY `uidx1` (`f_int1`,`f_int2`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (f_int1)
-SUBPARTITION BY HASH (f_int2)
+ PARTITION BY RANGE (`f_int1`)
+SUBPARTITION BY HASH (`f_int2`)
SUBPARTITIONS 2
-(PARTITION parta VALUES LESS THAN (0) ENGINE = TokuDB,
- PARTITION partb VALUES LESS THAN (5) ENGINE = TokuDB,
- PARTITION partc VALUES LESS THAN (10) ENGINE = TokuDB,
- PARTITION partd VALUES LESS THAN (2147483646) ENGINE = TokuDB) */
+(PARTITION `parta` VALUES LESS THAN (0) ENGINE = TokuDB,
+ PARTITION `partb` VALUES LESS THAN (5) ENGINE = TokuDB,
+ PARTITION `partc` VALUES LESS THAN (10) ENGINE = TokuDB,
+ PARTITION `partd` VALUES LESS THAN (2147483646) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -10676,27 +10676,27 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL,
UNIQUE KEY `uidx1` (`f_int1`,`f_int2`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (f_int1)
-SUBPARTITION BY KEY (f_int2)
-(PARTITION part1 VALUES LESS THAN (0)
- (SUBPARTITION subpart11 ENGINE = TokuDB,
- SUBPARTITION subpart12 ENGINE = TokuDB),
- PARTITION part2 VALUES LESS THAN (5)
- (SUBPARTITION subpart21 ENGINE = TokuDB,
- SUBPARTITION subpart22 ENGINE = TokuDB),
- PARTITION part3 VALUES LESS THAN (10)
- (SUBPARTITION subpart31 ENGINE = TokuDB,
- SUBPARTITION subpart32 ENGINE = TokuDB),
- PARTITION part4 VALUES LESS THAN (2147483646)
- (SUBPARTITION subpart41 ENGINE = TokuDB,
- SUBPARTITION subpart42 ENGINE = TokuDB)) */
+ PARTITION BY RANGE (`f_int1`)
+SUBPARTITION BY KEY (`f_int2`)
+(PARTITION `part1` VALUES LESS THAN (0)
+ (SUBPARTITION `subpart11` ENGINE = TokuDB,
+ SUBPARTITION `subpart12` ENGINE = TokuDB),
+ PARTITION `part2` VALUES LESS THAN (5)
+ (SUBPARTITION `subpart21` ENGINE = TokuDB,
+ SUBPARTITION `subpart22` ENGINE = TokuDB),
+ PARTITION `part3` VALUES LESS THAN (10)
+ (SUBPARTITION `subpart31` ENGINE = TokuDB,
+ SUBPARTITION `subpart32` ENGINE = TokuDB),
+ PARTITION `part4` VALUES LESS THAN (2147483646)
+ (SUBPARTITION `subpart41` ENGINE = TokuDB,
+ SUBPARTITION `subpart42` ENGINE = TokuDB))
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -11202,27 +11202,27 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL,
UNIQUE KEY `uidx1` (`f_int1`,`f_int2`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (ABS(MOD(f_int1,3)))
-SUBPARTITION BY HASH (f_int2 + 1)
-(PARTITION part1 VALUES IN (0)
- (SUBPARTITION sp11 ENGINE = TokuDB,
- SUBPARTITION sp12 ENGINE = TokuDB),
- PARTITION part2 VALUES IN (1)
- (SUBPARTITION sp21 ENGINE = TokuDB,
- SUBPARTITION sp22 ENGINE = TokuDB),
- PARTITION part3 VALUES IN (2)
- (SUBPARTITION sp31 ENGINE = TokuDB,
- SUBPARTITION sp32 ENGINE = TokuDB),
- PARTITION part4 VALUES IN (NULL)
- (SUBPARTITION sp41 ENGINE = TokuDB,
- SUBPARTITION sp42 ENGINE = TokuDB)) */
+ PARTITION BY LIST (abs(`f_int1` % 3))
+SUBPARTITION BY HASH (`f_int2` + 1)
+(PARTITION `part1` VALUES IN (0)
+ (SUBPARTITION `sp11` ENGINE = TokuDB,
+ SUBPARTITION `sp12` ENGINE = TokuDB),
+ PARTITION `part2` VALUES IN (1)
+ (SUBPARTITION `sp21` ENGINE = TokuDB,
+ SUBPARTITION `sp22` ENGINE = TokuDB),
+ PARTITION `part3` VALUES IN (2)
+ (SUBPARTITION `sp31` ENGINE = TokuDB,
+ SUBPARTITION `sp32` ENGINE = TokuDB),
+ PARTITION `part4` VALUES IN (NULL)
+ (SUBPARTITION `sp41` ENGINE = TokuDB,
+ SUBPARTITION `sp42` ENGINE = TokuDB))
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -11726,19 +11726,19 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL,
UNIQUE KEY `uidx1` (`f_int1`,`f_int2`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (ABS(MOD(f_int1,2)))
-SUBPARTITION BY KEY (f_int2)
+ PARTITION BY LIST (abs(`f_int1` % 2))
+SUBPARTITION BY KEY (`f_int2`)
SUBPARTITIONS 3
-(PARTITION part1 VALUES IN (0) ENGINE = TokuDB,
- PARTITION part2 VALUES IN (1) ENGINE = TokuDB,
- PARTITION part3 VALUES IN (NULL) ENGINE = TokuDB) */
+(PARTITION `part1` VALUES IN (0) ENGINE = TokuDB,
+ PARTITION `part2` VALUES IN (1) ENGINE = TokuDB,
+ PARTITION `part3` VALUES IN (NULL) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -12239,15 +12239,15 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL,
UNIQUE KEY `uidx1` (`f_int2`,`f_int1`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY HASH (f_int1 + f_int2)
-PARTITIONS 2 */
+ PARTITION BY HASH (`f_int1` + `f_int2`)
+PARTITIONS 2
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -12747,15 +12747,15 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL,
UNIQUE KEY `uidx1` (`f_int2`,`f_int1`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY KEY (f_int1,f_int2)
-PARTITIONS 5 */
+ PARTITION BY KEY (`f_int1`,`f_int2`)
+PARTITIONS 5
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -13263,22 +13263,22 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL,
UNIQUE KEY `uidx1` (`f_int2`,`f_int1`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (MOD(f_int1 + f_int2,4))
-(PARTITION part_3 VALUES IN (-3) ENGINE = TokuDB,
- PARTITION part_2 VALUES IN (-2) ENGINE = TokuDB,
- PARTITION part_1 VALUES IN (-1) ENGINE = TokuDB,
- PARTITION part_N VALUES IN (NULL) ENGINE = TokuDB,
- PARTITION part0 VALUES IN (0) ENGINE = TokuDB,
- PARTITION part1 VALUES IN (1) ENGINE = TokuDB,
- PARTITION part2 VALUES IN (2) ENGINE = TokuDB,
- PARTITION part3 VALUES IN (3) ENGINE = TokuDB) */
+ PARTITION BY LIST ((`f_int1` + `f_int2`) % 4)
+(PARTITION `part_3` VALUES IN (-3) ENGINE = TokuDB,
+ PARTITION `part_2` VALUES IN (-2) ENGINE = TokuDB,
+ PARTITION `part_1` VALUES IN (-1) ENGINE = TokuDB,
+ PARTITION `part_N` VALUES IN (NULL) ENGINE = TokuDB,
+ PARTITION `part0` VALUES IN (0) ENGINE = TokuDB,
+ PARTITION `part1` VALUES IN (1) ENGINE = TokuDB,
+ PARTITION `part2` VALUES IN (2) ENGINE = TokuDB,
+ PARTITION `part3` VALUES IN (3) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -13784,20 +13784,20 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL,
UNIQUE KEY `uidx1` (`f_int2`,`f_int1`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE ((f_int1 + f_int2) DIV 2)
-(PARTITION parta VALUES LESS THAN (0) ENGINE = TokuDB,
- PARTITION partb VALUES LESS THAN (5) ENGINE = TokuDB,
- PARTITION partc VALUES LESS THAN (10) ENGINE = TokuDB,
- PARTITION partd VALUES LESS THAN (15) ENGINE = TokuDB,
- PARTITION parte VALUES LESS THAN (20) ENGINE = TokuDB,
- PARTITION partf VALUES LESS THAN (2147483646) ENGINE = TokuDB) */
+ PARTITION BY RANGE ((`f_int1` + `f_int2`) DIV 2)
+(PARTITION `parta` VALUES LESS THAN (0) ENGINE = TokuDB,
+ PARTITION `partb` VALUES LESS THAN (5) ENGINE = TokuDB,
+ PARTITION `partc` VALUES LESS THAN (10) ENGINE = TokuDB,
+ PARTITION `partd` VALUES LESS THAN (15) ENGINE = TokuDB,
+ PARTITION `parte` VALUES LESS THAN (20) ENGINE = TokuDB,
+ PARTITION `partf` VALUES LESS THAN (2147483646) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -14299,20 +14299,20 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL,
UNIQUE KEY `uidx1` (`f_int2`,`f_int1`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (f_int1)
-SUBPARTITION BY HASH (f_int2)
+ PARTITION BY RANGE (`f_int1`)
+SUBPARTITION BY HASH (`f_int2`)
SUBPARTITIONS 2
-(PARTITION parta VALUES LESS THAN (0) ENGINE = TokuDB,
- PARTITION partb VALUES LESS THAN (5) ENGINE = TokuDB,
- PARTITION partc VALUES LESS THAN (10) ENGINE = TokuDB,
- PARTITION partd VALUES LESS THAN (2147483646) ENGINE = TokuDB) */
+(PARTITION `parta` VALUES LESS THAN (0) ENGINE = TokuDB,
+ PARTITION `partb` VALUES LESS THAN (5) ENGINE = TokuDB,
+ PARTITION `partc` VALUES LESS THAN (10) ENGINE = TokuDB,
+ PARTITION `partd` VALUES LESS THAN (2147483646) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -14818,27 +14818,27 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL,
UNIQUE KEY `uidx1` (`f_int2`,`f_int1`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (f_int1)
-SUBPARTITION BY KEY (f_int2)
-(PARTITION part1 VALUES LESS THAN (0)
- (SUBPARTITION subpart11 ENGINE = TokuDB,
- SUBPARTITION subpart12 ENGINE = TokuDB),
- PARTITION part2 VALUES LESS THAN (5)
- (SUBPARTITION subpart21 ENGINE = TokuDB,
- SUBPARTITION subpart22 ENGINE = TokuDB),
- PARTITION part3 VALUES LESS THAN (10)
- (SUBPARTITION subpart31 ENGINE = TokuDB,
- SUBPARTITION subpart32 ENGINE = TokuDB),
- PARTITION part4 VALUES LESS THAN (2147483646)
- (SUBPARTITION subpart41 ENGINE = TokuDB,
- SUBPARTITION subpart42 ENGINE = TokuDB)) */
+ PARTITION BY RANGE (`f_int1`)
+SUBPARTITION BY KEY (`f_int2`)
+(PARTITION `part1` VALUES LESS THAN (0)
+ (SUBPARTITION `subpart11` ENGINE = TokuDB,
+ SUBPARTITION `subpart12` ENGINE = TokuDB),
+ PARTITION `part2` VALUES LESS THAN (5)
+ (SUBPARTITION `subpart21` ENGINE = TokuDB,
+ SUBPARTITION `subpart22` ENGINE = TokuDB),
+ PARTITION `part3` VALUES LESS THAN (10)
+ (SUBPARTITION `subpart31` ENGINE = TokuDB,
+ SUBPARTITION `subpart32` ENGINE = TokuDB),
+ PARTITION `part4` VALUES LESS THAN (2147483646)
+ (SUBPARTITION `subpart41` ENGINE = TokuDB,
+ SUBPARTITION `subpart42` ENGINE = TokuDB))
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -15344,27 +15344,27 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL,
UNIQUE KEY `uidx1` (`f_int2`,`f_int1`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (ABS(MOD(f_int1,3)))
-SUBPARTITION BY HASH (f_int2 + 1)
-(PARTITION part1 VALUES IN (0)
- (SUBPARTITION sp11 ENGINE = TokuDB,
- SUBPARTITION sp12 ENGINE = TokuDB),
- PARTITION part2 VALUES IN (1)
- (SUBPARTITION sp21 ENGINE = TokuDB,
- SUBPARTITION sp22 ENGINE = TokuDB),
- PARTITION part3 VALUES IN (2)
- (SUBPARTITION sp31 ENGINE = TokuDB,
- SUBPARTITION sp32 ENGINE = TokuDB),
- PARTITION part4 VALUES IN (NULL)
- (SUBPARTITION sp41 ENGINE = TokuDB,
- SUBPARTITION sp42 ENGINE = TokuDB)) */
+ PARTITION BY LIST (abs(`f_int1` % 3))
+SUBPARTITION BY HASH (`f_int2` + 1)
+(PARTITION `part1` VALUES IN (0)
+ (SUBPARTITION `sp11` ENGINE = TokuDB,
+ SUBPARTITION `sp12` ENGINE = TokuDB),
+ PARTITION `part2` VALUES IN (1)
+ (SUBPARTITION `sp21` ENGINE = TokuDB,
+ SUBPARTITION `sp22` ENGINE = TokuDB),
+ PARTITION `part3` VALUES IN (2)
+ (SUBPARTITION `sp31` ENGINE = TokuDB,
+ SUBPARTITION `sp32` ENGINE = TokuDB),
+ PARTITION `part4` VALUES IN (NULL)
+ (SUBPARTITION `sp41` ENGINE = TokuDB,
+ SUBPARTITION `sp42` ENGINE = TokuDB))
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -15868,19 +15868,19 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL,
UNIQUE KEY `uidx1` (`f_int2`,`f_int1`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (ABS(MOD(f_int1,2)))
-SUBPARTITION BY KEY (f_int2)
+ PARTITION BY LIST (abs(`f_int1` % 2))
+SUBPARTITION BY KEY (`f_int2`)
SUBPARTITIONS 3
-(PARTITION part1 VALUES IN (0) ENGINE = TokuDB,
- PARTITION part2 VALUES IN (1) ENGINE = TokuDB,
- PARTITION part3 VALUES IN (NULL) ENGINE = TokuDB) */
+(PARTITION `part1` VALUES IN (0) ENGINE = TokuDB,
+ PARTITION `part2` VALUES IN (1) ENGINE = TokuDB,
+ PARTITION `part3` VALUES IN (NULL) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -16382,16 +16382,16 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) NOT NULL DEFAULT '0',
- `f_int2` int(11) NOT NULL DEFAULT '0',
+ `f_int1` int(11) NOT NULL DEFAULT 0,
+ `f_int2` int(11) NOT NULL DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL,
PRIMARY KEY (`f_int2`,`f_int1`),
UNIQUE KEY `uidx1` (`f_int1`,`f_int2`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY HASH (f_int1 + f_int2)
-PARTITIONS 2 */
+ PARTITION BY HASH (`f_int1` + `f_int2`)
+PARTITIONS 2
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -16875,16 +16875,16 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) NOT NULL DEFAULT '0',
- `f_int2` int(11) NOT NULL DEFAULT '0',
+ `f_int1` int(11) NOT NULL DEFAULT 0,
+ `f_int2` int(11) NOT NULL DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL,
PRIMARY KEY (`f_int2`,`f_int1`),
UNIQUE KEY `uidx1` (`f_int1`,`f_int2`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY KEY (f_int1,f_int2)
-PARTITIONS 5 */
+ PARTITION BY KEY (`f_int1`,`f_int2`)
+PARTITIONS 5
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -17376,23 +17376,23 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) NOT NULL DEFAULT '0',
- `f_int2` int(11) NOT NULL DEFAULT '0',
+ `f_int1` int(11) NOT NULL DEFAULT 0,
+ `f_int2` int(11) NOT NULL DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL,
PRIMARY KEY (`f_int2`,`f_int1`),
UNIQUE KEY `uidx1` (`f_int1`,`f_int2`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (MOD(f_int1 + f_int2,4))
-(PARTITION part_3 VALUES IN (-3) ENGINE = TokuDB,
- PARTITION part_2 VALUES IN (-2) ENGINE = TokuDB,
- PARTITION part_1 VALUES IN (-1) ENGINE = TokuDB,
- PARTITION part_N VALUES IN (NULL) ENGINE = TokuDB,
- PARTITION part0 VALUES IN (0) ENGINE = TokuDB,
- PARTITION part1 VALUES IN (1) ENGINE = TokuDB,
- PARTITION part2 VALUES IN (2) ENGINE = TokuDB,
- PARTITION part3 VALUES IN (3) ENGINE = TokuDB) */
+ PARTITION BY LIST ((`f_int1` + `f_int2`) % 4)
+(PARTITION `part_3` VALUES IN (-3) ENGINE = TokuDB,
+ PARTITION `part_2` VALUES IN (-2) ENGINE = TokuDB,
+ PARTITION `part_1` VALUES IN (-1) ENGINE = TokuDB,
+ PARTITION `part_N` VALUES IN (NULL) ENGINE = TokuDB,
+ PARTITION `part0` VALUES IN (0) ENGINE = TokuDB,
+ PARTITION `part1` VALUES IN (1) ENGINE = TokuDB,
+ PARTITION `part2` VALUES IN (2) ENGINE = TokuDB,
+ PARTITION `part3` VALUES IN (3) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -17882,21 +17882,21 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) NOT NULL DEFAULT '0',
- `f_int2` int(11) NOT NULL DEFAULT '0',
+ `f_int1` int(11) NOT NULL DEFAULT 0,
+ `f_int2` int(11) NOT NULL DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL,
PRIMARY KEY (`f_int2`,`f_int1`),
UNIQUE KEY `uidx1` (`f_int1`,`f_int2`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE ((f_int1 + f_int2) DIV 2)
-(PARTITION parta VALUES LESS THAN (0) ENGINE = TokuDB,
- PARTITION partb VALUES LESS THAN (5) ENGINE = TokuDB,
- PARTITION partc VALUES LESS THAN (10) ENGINE = TokuDB,
- PARTITION partd VALUES LESS THAN (15) ENGINE = TokuDB,
- PARTITION parte VALUES LESS THAN (20) ENGINE = TokuDB,
- PARTITION partf VALUES LESS THAN (2147483646) ENGINE = TokuDB) */
+ PARTITION BY RANGE ((`f_int1` + `f_int2`) DIV 2)
+(PARTITION `parta` VALUES LESS THAN (0) ENGINE = TokuDB,
+ PARTITION `partb` VALUES LESS THAN (5) ENGINE = TokuDB,
+ PARTITION `partc` VALUES LESS THAN (10) ENGINE = TokuDB,
+ PARTITION `partd` VALUES LESS THAN (15) ENGINE = TokuDB,
+ PARTITION `parte` VALUES LESS THAN (20) ENGINE = TokuDB,
+ PARTITION `partf` VALUES LESS THAN (2147483646) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -18382,21 +18382,21 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) NOT NULL DEFAULT '0',
- `f_int2` int(11) NOT NULL DEFAULT '0',
+ `f_int1` int(11) NOT NULL DEFAULT 0,
+ `f_int2` int(11) NOT NULL DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL,
PRIMARY KEY (`f_int2`,`f_int1`),
UNIQUE KEY `uidx1` (`f_int1`,`f_int2`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (f_int1)
-SUBPARTITION BY HASH (f_int2)
+ PARTITION BY RANGE (`f_int1`)
+SUBPARTITION BY HASH (`f_int2`)
SUBPARTITIONS 2
-(PARTITION parta VALUES LESS THAN (0) ENGINE = TokuDB,
- PARTITION partb VALUES LESS THAN (5) ENGINE = TokuDB,
- PARTITION partc VALUES LESS THAN (10) ENGINE = TokuDB,
- PARTITION partd VALUES LESS THAN (2147483646) ENGINE = TokuDB) */
+(PARTITION `parta` VALUES LESS THAN (0) ENGINE = TokuDB,
+ PARTITION `partb` VALUES LESS THAN (5) ENGINE = TokuDB,
+ PARTITION `partc` VALUES LESS THAN (10) ENGINE = TokuDB,
+ PARTITION `partd` VALUES LESS THAN (2147483646) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -18886,28 +18886,28 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) NOT NULL DEFAULT '0',
- `f_int2` int(11) NOT NULL DEFAULT '0',
+ `f_int1` int(11) NOT NULL DEFAULT 0,
+ `f_int2` int(11) NOT NULL DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL,
PRIMARY KEY (`f_int2`,`f_int1`),
UNIQUE KEY `uidx1` (`f_int1`,`f_int2`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (f_int1)
-SUBPARTITION BY KEY (f_int2)
-(PARTITION part1 VALUES LESS THAN (0)
- (SUBPARTITION subpart11 ENGINE = TokuDB,
- SUBPARTITION subpart12 ENGINE = TokuDB),
- PARTITION part2 VALUES LESS THAN (5)
- (SUBPARTITION subpart21 ENGINE = TokuDB,
- SUBPARTITION subpart22 ENGINE = TokuDB),
- PARTITION part3 VALUES LESS THAN (10)
- (SUBPARTITION subpart31 ENGINE = TokuDB,
- SUBPARTITION subpart32 ENGINE = TokuDB),
- PARTITION part4 VALUES LESS THAN (2147483646)
- (SUBPARTITION subpart41 ENGINE = TokuDB,
- SUBPARTITION subpart42 ENGINE = TokuDB)) */
+ PARTITION BY RANGE (`f_int1`)
+SUBPARTITION BY KEY (`f_int2`)
+(PARTITION `part1` VALUES LESS THAN (0)
+ (SUBPARTITION `subpart11` ENGINE = TokuDB,
+ SUBPARTITION `subpart12` ENGINE = TokuDB),
+ PARTITION `part2` VALUES LESS THAN (5)
+ (SUBPARTITION `subpart21` ENGINE = TokuDB,
+ SUBPARTITION `subpart22` ENGINE = TokuDB),
+ PARTITION `part3` VALUES LESS THAN (10)
+ (SUBPARTITION `subpart31` ENGINE = TokuDB,
+ SUBPARTITION `subpart32` ENGINE = TokuDB),
+ PARTITION `part4` VALUES LESS THAN (2147483646)
+ (SUBPARTITION `subpart41` ENGINE = TokuDB,
+ SUBPARTITION `subpart42` ENGINE = TokuDB))
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -19397,28 +19397,28 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) NOT NULL DEFAULT '0',
- `f_int2` int(11) NOT NULL DEFAULT '0',
+ `f_int1` int(11) NOT NULL DEFAULT 0,
+ `f_int2` int(11) NOT NULL DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL,
PRIMARY KEY (`f_int2`,`f_int1`),
UNIQUE KEY `uidx1` (`f_int1`,`f_int2`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (ABS(MOD(f_int1,3)))
-SUBPARTITION BY HASH (f_int2 + 1)
-(PARTITION part1 VALUES IN (0)
- (SUBPARTITION sp11 ENGINE = TokuDB,
- SUBPARTITION sp12 ENGINE = TokuDB),
- PARTITION part2 VALUES IN (1)
- (SUBPARTITION sp21 ENGINE = TokuDB,
- SUBPARTITION sp22 ENGINE = TokuDB),
- PARTITION part3 VALUES IN (2)
- (SUBPARTITION sp31 ENGINE = TokuDB,
- SUBPARTITION sp32 ENGINE = TokuDB),
- PARTITION part4 VALUES IN (NULL)
- (SUBPARTITION sp41 ENGINE = TokuDB,
- SUBPARTITION sp42 ENGINE = TokuDB)) */
+ PARTITION BY LIST (abs(`f_int1` % 3))
+SUBPARTITION BY HASH (`f_int2` + 1)
+(PARTITION `part1` VALUES IN (0)
+ (SUBPARTITION `sp11` ENGINE = TokuDB,
+ SUBPARTITION `sp12` ENGINE = TokuDB),
+ PARTITION `part2` VALUES IN (1)
+ (SUBPARTITION `sp21` ENGINE = TokuDB,
+ SUBPARTITION `sp22` ENGINE = TokuDB),
+ PARTITION `part3` VALUES IN (2)
+ (SUBPARTITION `sp31` ENGINE = TokuDB,
+ SUBPARTITION `sp32` ENGINE = TokuDB),
+ PARTITION `part4` VALUES IN (NULL)
+ (SUBPARTITION `sp41` ENGINE = TokuDB,
+ SUBPARTITION `sp42` ENGINE = TokuDB))
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -19906,20 +19906,20 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) NOT NULL DEFAULT '0',
- `f_int2` int(11) NOT NULL DEFAULT '0',
+ `f_int1` int(11) NOT NULL DEFAULT 0,
+ `f_int2` int(11) NOT NULL DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL,
PRIMARY KEY (`f_int2`,`f_int1`),
UNIQUE KEY `uidx1` (`f_int1`,`f_int2`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (ABS(MOD(f_int1,2)))
-SUBPARTITION BY KEY (f_int2)
+ PARTITION BY LIST (abs(`f_int1` % 2))
+SUBPARTITION BY KEY (`f_int2`)
SUBPARTITIONS 3
-(PARTITION part1 VALUES IN (0) ENGINE = TokuDB,
- PARTITION part2 VALUES IN (1) ENGINE = TokuDB,
- PARTITION part3 VALUES IN (NULL) ENGINE = TokuDB) */
+(PARTITION `part1` VALUES IN (0) ENGINE = TokuDB,
+ PARTITION `part2` VALUES IN (1) ENGINE = TokuDB,
+ PARTITION `part3` VALUES IN (NULL) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -20404,16 +20404,16 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) NOT NULL DEFAULT '0',
- `f_int2` int(11) NOT NULL DEFAULT '0',
+ `f_int1` int(11) NOT NULL DEFAULT 0,
+ `f_int2` int(11) NOT NULL DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL,
PRIMARY KEY (`f_int1`,`f_int2`),
UNIQUE KEY `uidx1` (`f_int2`,`f_int1`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY HASH (f_int1 + f_int2)
-PARTITIONS 2 */
+ PARTITION BY HASH (`f_int1` + `f_int2`)
+PARTITIONS 2
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -20897,16 +20897,16 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) NOT NULL DEFAULT '0',
- `f_int2` int(11) NOT NULL DEFAULT '0',
+ `f_int1` int(11) NOT NULL DEFAULT 0,
+ `f_int2` int(11) NOT NULL DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL,
PRIMARY KEY (`f_int1`,`f_int2`),
UNIQUE KEY `uidx1` (`f_int2`,`f_int1`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY KEY (f_int1,f_int2)
-PARTITIONS 5 */
+ PARTITION BY KEY (`f_int1`,`f_int2`)
+PARTITIONS 5
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -21398,23 +21398,23 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) NOT NULL DEFAULT '0',
- `f_int2` int(11) NOT NULL DEFAULT '0',
+ `f_int1` int(11) NOT NULL DEFAULT 0,
+ `f_int2` int(11) NOT NULL DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL,
PRIMARY KEY (`f_int1`,`f_int2`),
UNIQUE KEY `uidx1` (`f_int2`,`f_int1`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (MOD(f_int1 + f_int2,4))
-(PARTITION part_3 VALUES IN (-3) ENGINE = TokuDB,
- PARTITION part_2 VALUES IN (-2) ENGINE = TokuDB,
- PARTITION part_1 VALUES IN (-1) ENGINE = TokuDB,
- PARTITION part_N VALUES IN (NULL) ENGINE = TokuDB,
- PARTITION part0 VALUES IN (0) ENGINE = TokuDB,
- PARTITION part1 VALUES IN (1) ENGINE = TokuDB,
- PARTITION part2 VALUES IN (2) ENGINE = TokuDB,
- PARTITION part3 VALUES IN (3) ENGINE = TokuDB) */
+ PARTITION BY LIST ((`f_int1` + `f_int2`) % 4)
+(PARTITION `part_3` VALUES IN (-3) ENGINE = TokuDB,
+ PARTITION `part_2` VALUES IN (-2) ENGINE = TokuDB,
+ PARTITION `part_1` VALUES IN (-1) ENGINE = TokuDB,
+ PARTITION `part_N` VALUES IN (NULL) ENGINE = TokuDB,
+ PARTITION `part0` VALUES IN (0) ENGINE = TokuDB,
+ PARTITION `part1` VALUES IN (1) ENGINE = TokuDB,
+ PARTITION `part2` VALUES IN (2) ENGINE = TokuDB,
+ PARTITION `part3` VALUES IN (3) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -21904,21 +21904,21 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) NOT NULL DEFAULT '0',
- `f_int2` int(11) NOT NULL DEFAULT '0',
+ `f_int1` int(11) NOT NULL DEFAULT 0,
+ `f_int2` int(11) NOT NULL DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL,
PRIMARY KEY (`f_int1`,`f_int2`),
UNIQUE KEY `uidx1` (`f_int2`,`f_int1`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE ((f_int1 + f_int2) DIV 2)
-(PARTITION parta VALUES LESS THAN (0) ENGINE = TokuDB,
- PARTITION partb VALUES LESS THAN (5) ENGINE = TokuDB,
- PARTITION partc VALUES LESS THAN (10) ENGINE = TokuDB,
- PARTITION partd VALUES LESS THAN (15) ENGINE = TokuDB,
- PARTITION parte VALUES LESS THAN (20) ENGINE = TokuDB,
- PARTITION partf VALUES LESS THAN (2147483646) ENGINE = TokuDB) */
+ PARTITION BY RANGE ((`f_int1` + `f_int2`) DIV 2)
+(PARTITION `parta` VALUES LESS THAN (0) ENGINE = TokuDB,
+ PARTITION `partb` VALUES LESS THAN (5) ENGINE = TokuDB,
+ PARTITION `partc` VALUES LESS THAN (10) ENGINE = TokuDB,
+ PARTITION `partd` VALUES LESS THAN (15) ENGINE = TokuDB,
+ PARTITION `parte` VALUES LESS THAN (20) ENGINE = TokuDB,
+ PARTITION `partf` VALUES LESS THAN (2147483646) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -22404,21 +22404,21 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) NOT NULL DEFAULT '0',
- `f_int2` int(11) NOT NULL DEFAULT '0',
+ `f_int1` int(11) NOT NULL DEFAULT 0,
+ `f_int2` int(11) NOT NULL DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL,
PRIMARY KEY (`f_int1`,`f_int2`),
UNIQUE KEY `uidx1` (`f_int2`,`f_int1`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (f_int1)
-SUBPARTITION BY HASH (f_int2)
+ PARTITION BY RANGE (`f_int1`)
+SUBPARTITION BY HASH (`f_int2`)
SUBPARTITIONS 2
-(PARTITION parta VALUES LESS THAN (0) ENGINE = TokuDB,
- PARTITION partb VALUES LESS THAN (5) ENGINE = TokuDB,
- PARTITION partc VALUES LESS THAN (10) ENGINE = TokuDB,
- PARTITION partd VALUES LESS THAN (2147483646) ENGINE = TokuDB) */
+(PARTITION `parta` VALUES LESS THAN (0) ENGINE = TokuDB,
+ PARTITION `partb` VALUES LESS THAN (5) ENGINE = TokuDB,
+ PARTITION `partc` VALUES LESS THAN (10) ENGINE = TokuDB,
+ PARTITION `partd` VALUES LESS THAN (2147483646) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -22908,28 +22908,28 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) NOT NULL DEFAULT '0',
- `f_int2` int(11) NOT NULL DEFAULT '0',
+ `f_int1` int(11) NOT NULL DEFAULT 0,
+ `f_int2` int(11) NOT NULL DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL,
PRIMARY KEY (`f_int1`,`f_int2`),
UNIQUE KEY `uidx1` (`f_int2`,`f_int1`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (f_int1)
-SUBPARTITION BY KEY (f_int2)
-(PARTITION part1 VALUES LESS THAN (0)
- (SUBPARTITION subpart11 ENGINE = TokuDB,
- SUBPARTITION subpart12 ENGINE = TokuDB),
- PARTITION part2 VALUES LESS THAN (5)
- (SUBPARTITION subpart21 ENGINE = TokuDB,
- SUBPARTITION subpart22 ENGINE = TokuDB),
- PARTITION part3 VALUES LESS THAN (10)
- (SUBPARTITION subpart31 ENGINE = TokuDB,
- SUBPARTITION subpart32 ENGINE = TokuDB),
- PARTITION part4 VALUES LESS THAN (2147483646)
- (SUBPARTITION subpart41 ENGINE = TokuDB,
- SUBPARTITION subpart42 ENGINE = TokuDB)) */
+ PARTITION BY RANGE (`f_int1`)
+SUBPARTITION BY KEY (`f_int2`)
+(PARTITION `part1` VALUES LESS THAN (0)
+ (SUBPARTITION `subpart11` ENGINE = TokuDB,
+ SUBPARTITION `subpart12` ENGINE = TokuDB),
+ PARTITION `part2` VALUES LESS THAN (5)
+ (SUBPARTITION `subpart21` ENGINE = TokuDB,
+ SUBPARTITION `subpart22` ENGINE = TokuDB),
+ PARTITION `part3` VALUES LESS THAN (10)
+ (SUBPARTITION `subpart31` ENGINE = TokuDB,
+ SUBPARTITION `subpart32` ENGINE = TokuDB),
+ PARTITION `part4` VALUES LESS THAN (2147483646)
+ (SUBPARTITION `subpart41` ENGINE = TokuDB,
+ SUBPARTITION `subpart42` ENGINE = TokuDB))
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -23419,28 +23419,28 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) NOT NULL DEFAULT '0',
- `f_int2` int(11) NOT NULL DEFAULT '0',
+ `f_int1` int(11) NOT NULL DEFAULT 0,
+ `f_int2` int(11) NOT NULL DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL,
PRIMARY KEY (`f_int1`,`f_int2`),
UNIQUE KEY `uidx1` (`f_int2`,`f_int1`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (ABS(MOD(f_int1,3)))
-SUBPARTITION BY HASH (f_int2 + 1)
-(PARTITION part1 VALUES IN (0)
- (SUBPARTITION sp11 ENGINE = TokuDB,
- SUBPARTITION sp12 ENGINE = TokuDB),
- PARTITION part2 VALUES IN (1)
- (SUBPARTITION sp21 ENGINE = TokuDB,
- SUBPARTITION sp22 ENGINE = TokuDB),
- PARTITION part3 VALUES IN (2)
- (SUBPARTITION sp31 ENGINE = TokuDB,
- SUBPARTITION sp32 ENGINE = TokuDB),
- PARTITION part4 VALUES IN (NULL)
- (SUBPARTITION sp41 ENGINE = TokuDB,
- SUBPARTITION sp42 ENGINE = TokuDB)) */
+ PARTITION BY LIST (abs(`f_int1` % 3))
+SUBPARTITION BY HASH (`f_int2` + 1)
+(PARTITION `part1` VALUES IN (0)
+ (SUBPARTITION `sp11` ENGINE = TokuDB,
+ SUBPARTITION `sp12` ENGINE = TokuDB),
+ PARTITION `part2` VALUES IN (1)
+ (SUBPARTITION `sp21` ENGINE = TokuDB,
+ SUBPARTITION `sp22` ENGINE = TokuDB),
+ PARTITION `part3` VALUES IN (2)
+ (SUBPARTITION `sp31` ENGINE = TokuDB,
+ SUBPARTITION `sp32` ENGINE = TokuDB),
+ PARTITION `part4` VALUES IN (NULL)
+ (SUBPARTITION `sp41` ENGINE = TokuDB,
+ SUBPARTITION `sp42` ENGINE = TokuDB))
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -23928,20 +23928,20 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) NOT NULL DEFAULT '0',
- `f_int2` int(11) NOT NULL DEFAULT '0',
+ `f_int1` int(11) NOT NULL DEFAULT 0,
+ `f_int2` int(11) NOT NULL DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL,
PRIMARY KEY (`f_int1`,`f_int2`),
UNIQUE KEY `uidx1` (`f_int2`,`f_int1`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (ABS(MOD(f_int1,2)))
-SUBPARTITION BY KEY (f_int2)
+ PARTITION BY LIST (abs(`f_int1` % 2))
+SUBPARTITION BY KEY (`f_int2`)
SUBPARTITIONS 3
-(PARTITION part1 VALUES IN (0) ENGINE = TokuDB,
- PARTITION part2 VALUES IN (1) ENGINE = TokuDB,
- PARTITION part3 VALUES IN (NULL) ENGINE = TokuDB) */
+(PARTITION `part1` VALUES IN (0) ENGINE = TokuDB,
+ PARTITION `part2` VALUES IN (1) ENGINE = TokuDB,
+ PARTITION `part3` VALUES IN (NULL) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -24426,16 +24426,16 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) NOT NULL DEFAULT '0',
- `f_int2` int(11) NOT NULL DEFAULT '0',
+ `f_int1` int(11) NOT NULL DEFAULT 0,
+ `f_int2` int(11) NOT NULL DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL,
PRIMARY KEY (`f_int1`,`f_int2`),
UNIQUE KEY `uidx1` (`f_int2`,`f_int1`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY HASH (f_int1 + f_int2)
-PARTITIONS 2 */
+ PARTITION BY HASH (`f_int1` + `f_int2`)
+PARTITIONS 2
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -24919,16 +24919,16 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) NOT NULL DEFAULT '0',
- `f_int2` int(11) NOT NULL DEFAULT '0',
+ `f_int1` int(11) NOT NULL DEFAULT 0,
+ `f_int2` int(11) NOT NULL DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL,
PRIMARY KEY (`f_int1`,`f_int2`),
UNIQUE KEY `uidx1` (`f_int2`,`f_int1`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY KEY (f_int1,f_int2)
-PARTITIONS 5 */
+ PARTITION BY KEY (`f_int1`,`f_int2`)
+PARTITIONS 5
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -25420,23 +25420,23 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) NOT NULL DEFAULT '0',
- `f_int2` int(11) NOT NULL DEFAULT '0',
+ `f_int1` int(11) NOT NULL DEFAULT 0,
+ `f_int2` int(11) NOT NULL DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL,
PRIMARY KEY (`f_int1`,`f_int2`),
UNIQUE KEY `uidx1` (`f_int2`,`f_int1`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (MOD(f_int1 + f_int2,4))
-(PARTITION part_3 VALUES IN (-3) ENGINE = TokuDB,
- PARTITION part_2 VALUES IN (-2) ENGINE = TokuDB,
- PARTITION part_1 VALUES IN (-1) ENGINE = TokuDB,
- PARTITION part_N VALUES IN (NULL) ENGINE = TokuDB,
- PARTITION part0 VALUES IN (0) ENGINE = TokuDB,
- PARTITION part1 VALUES IN (1) ENGINE = TokuDB,
- PARTITION part2 VALUES IN (2) ENGINE = TokuDB,
- PARTITION part3 VALUES IN (3) ENGINE = TokuDB) */
+ PARTITION BY LIST ((`f_int1` + `f_int2`) % 4)
+(PARTITION `part_3` VALUES IN (-3) ENGINE = TokuDB,
+ PARTITION `part_2` VALUES IN (-2) ENGINE = TokuDB,
+ PARTITION `part_1` VALUES IN (-1) ENGINE = TokuDB,
+ PARTITION `part_N` VALUES IN (NULL) ENGINE = TokuDB,
+ PARTITION `part0` VALUES IN (0) ENGINE = TokuDB,
+ PARTITION `part1` VALUES IN (1) ENGINE = TokuDB,
+ PARTITION `part2` VALUES IN (2) ENGINE = TokuDB,
+ PARTITION `part3` VALUES IN (3) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -25926,21 +25926,21 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) NOT NULL DEFAULT '0',
- `f_int2` int(11) NOT NULL DEFAULT '0',
+ `f_int1` int(11) NOT NULL DEFAULT 0,
+ `f_int2` int(11) NOT NULL DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL,
PRIMARY KEY (`f_int1`,`f_int2`),
UNIQUE KEY `uidx1` (`f_int2`,`f_int1`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE ((f_int1 + f_int2) DIV 2)
-(PARTITION parta VALUES LESS THAN (0) ENGINE = TokuDB,
- PARTITION partb VALUES LESS THAN (5) ENGINE = TokuDB,
- PARTITION partc VALUES LESS THAN (10) ENGINE = TokuDB,
- PARTITION partd VALUES LESS THAN (15) ENGINE = TokuDB,
- PARTITION parte VALUES LESS THAN (20) ENGINE = TokuDB,
- PARTITION partf VALUES LESS THAN (2147483646) ENGINE = TokuDB) */
+ PARTITION BY RANGE ((`f_int1` + `f_int2`) DIV 2)
+(PARTITION `parta` VALUES LESS THAN (0) ENGINE = TokuDB,
+ PARTITION `partb` VALUES LESS THAN (5) ENGINE = TokuDB,
+ PARTITION `partc` VALUES LESS THAN (10) ENGINE = TokuDB,
+ PARTITION `partd` VALUES LESS THAN (15) ENGINE = TokuDB,
+ PARTITION `parte` VALUES LESS THAN (20) ENGINE = TokuDB,
+ PARTITION `partf` VALUES LESS THAN (2147483646) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -26426,21 +26426,21 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) NOT NULL DEFAULT '0',
- `f_int2` int(11) NOT NULL DEFAULT '0',
+ `f_int1` int(11) NOT NULL DEFAULT 0,
+ `f_int2` int(11) NOT NULL DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL,
PRIMARY KEY (`f_int1`,`f_int2`),
UNIQUE KEY `uidx1` (`f_int2`,`f_int1`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (f_int1)
-SUBPARTITION BY HASH (f_int2)
+ PARTITION BY RANGE (`f_int1`)
+SUBPARTITION BY HASH (`f_int2`)
SUBPARTITIONS 2
-(PARTITION parta VALUES LESS THAN (0) ENGINE = TokuDB,
- PARTITION partb VALUES LESS THAN (5) ENGINE = TokuDB,
- PARTITION partc VALUES LESS THAN (10) ENGINE = TokuDB,
- PARTITION partd VALUES LESS THAN (2147483646) ENGINE = TokuDB) */
+(PARTITION `parta` VALUES LESS THAN (0) ENGINE = TokuDB,
+ PARTITION `partb` VALUES LESS THAN (5) ENGINE = TokuDB,
+ PARTITION `partc` VALUES LESS THAN (10) ENGINE = TokuDB,
+ PARTITION `partd` VALUES LESS THAN (2147483646) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -26930,28 +26930,28 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) NOT NULL DEFAULT '0',
- `f_int2` int(11) NOT NULL DEFAULT '0',
+ `f_int1` int(11) NOT NULL DEFAULT 0,
+ `f_int2` int(11) NOT NULL DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL,
PRIMARY KEY (`f_int1`,`f_int2`),
UNIQUE KEY `uidx1` (`f_int2`,`f_int1`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (f_int1)
-SUBPARTITION BY KEY (f_int2)
-(PARTITION part1 VALUES LESS THAN (0)
- (SUBPARTITION subpart11 ENGINE = TokuDB,
- SUBPARTITION subpart12 ENGINE = TokuDB),
- PARTITION part2 VALUES LESS THAN (5)
- (SUBPARTITION subpart21 ENGINE = TokuDB,
- SUBPARTITION subpart22 ENGINE = TokuDB),
- PARTITION part3 VALUES LESS THAN (10)
- (SUBPARTITION subpart31 ENGINE = TokuDB,
- SUBPARTITION subpart32 ENGINE = TokuDB),
- PARTITION part4 VALUES LESS THAN (2147483646)
- (SUBPARTITION subpart41 ENGINE = TokuDB,
- SUBPARTITION subpart42 ENGINE = TokuDB)) */
+ PARTITION BY RANGE (`f_int1`)
+SUBPARTITION BY KEY (`f_int2`)
+(PARTITION `part1` VALUES LESS THAN (0)
+ (SUBPARTITION `subpart11` ENGINE = TokuDB,
+ SUBPARTITION `subpart12` ENGINE = TokuDB),
+ PARTITION `part2` VALUES LESS THAN (5)
+ (SUBPARTITION `subpart21` ENGINE = TokuDB,
+ SUBPARTITION `subpart22` ENGINE = TokuDB),
+ PARTITION `part3` VALUES LESS THAN (10)
+ (SUBPARTITION `subpart31` ENGINE = TokuDB,
+ SUBPARTITION `subpart32` ENGINE = TokuDB),
+ PARTITION `part4` VALUES LESS THAN (2147483646)
+ (SUBPARTITION `subpart41` ENGINE = TokuDB,
+ SUBPARTITION `subpart42` ENGINE = TokuDB))
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -27441,28 +27441,28 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) NOT NULL DEFAULT '0',
- `f_int2` int(11) NOT NULL DEFAULT '0',
+ `f_int1` int(11) NOT NULL DEFAULT 0,
+ `f_int2` int(11) NOT NULL DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL,
PRIMARY KEY (`f_int1`,`f_int2`),
UNIQUE KEY `uidx1` (`f_int2`,`f_int1`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (ABS(MOD(f_int1,3)))
-SUBPARTITION BY HASH (f_int2 + 1)
-(PARTITION part1 VALUES IN (0)
- (SUBPARTITION sp11 ENGINE = TokuDB,
- SUBPARTITION sp12 ENGINE = TokuDB),
- PARTITION part2 VALUES IN (1)
- (SUBPARTITION sp21 ENGINE = TokuDB,
- SUBPARTITION sp22 ENGINE = TokuDB),
- PARTITION part3 VALUES IN (2)
- (SUBPARTITION sp31 ENGINE = TokuDB,
- SUBPARTITION sp32 ENGINE = TokuDB),
- PARTITION part4 VALUES IN (NULL)
- (SUBPARTITION sp41 ENGINE = TokuDB,
- SUBPARTITION sp42 ENGINE = TokuDB)) */
+ PARTITION BY LIST (abs(`f_int1` % 3))
+SUBPARTITION BY HASH (`f_int2` + 1)
+(PARTITION `part1` VALUES IN (0)
+ (SUBPARTITION `sp11` ENGINE = TokuDB,
+ SUBPARTITION `sp12` ENGINE = TokuDB),
+ PARTITION `part2` VALUES IN (1)
+ (SUBPARTITION `sp21` ENGINE = TokuDB,
+ SUBPARTITION `sp22` ENGINE = TokuDB),
+ PARTITION `part3` VALUES IN (2)
+ (SUBPARTITION `sp31` ENGINE = TokuDB,
+ SUBPARTITION `sp32` ENGINE = TokuDB),
+ PARTITION `part4` VALUES IN (NULL)
+ (SUBPARTITION `sp41` ENGINE = TokuDB,
+ SUBPARTITION `sp42` ENGINE = TokuDB))
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -27950,20 +27950,20 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) NOT NULL DEFAULT '0',
- `f_int2` int(11) NOT NULL DEFAULT '0',
+ `f_int1` int(11) NOT NULL DEFAULT 0,
+ `f_int2` int(11) NOT NULL DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL,
PRIMARY KEY (`f_int1`,`f_int2`),
UNIQUE KEY `uidx1` (`f_int2`,`f_int1`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (ABS(MOD(f_int1,2)))
-SUBPARTITION BY KEY (f_int2)
+ PARTITION BY LIST (abs(`f_int1` % 2))
+SUBPARTITION BY KEY (`f_int2`)
SUBPARTITIONS 3
-(PARTITION part1 VALUES IN (0) ENGINE = TokuDB,
- PARTITION part2 VALUES IN (1) ENGINE = TokuDB,
- PARTITION part3 VALUES IN (NULL) ENGINE = TokuDB) */
+(PARTITION `part1` VALUES IN (0) ENGINE = TokuDB,
+ PARTITION `part2` VALUES IN (1) ENGINE = TokuDB,
+ PARTITION `part3` VALUES IN (NULL) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
diff --git a/storage/tokudb/mysql-test/tokudb_parts/r/partition_alter1_1_tokudb.result b/storage/tokudb/mysql-test/tokudb_parts/r/partition_alter1_1_tokudb.result
index 2cc7b4298fc..7ad3d72441c 100644
--- a/storage/tokudb/mysql-test/tokudb_parts/r/partition_alter1_1_tokudb.result
+++ b/storage/tokudb/mysql-test/tokudb_parts/r/partition_alter1_1_tokudb.result
@@ -386,15 +386,15 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) NOT NULL DEFAULT '0',
- `f_int2` int(11) NOT NULL DEFAULT '0',
+ `f_int1` int(11) NOT NULL DEFAULT 0,
+ `f_int2` int(11) NOT NULL DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL,
PRIMARY KEY (`f_int1`,`f_int2`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY HASH (f_int1)
-PARTITIONS 2 */
+ PARTITION BY HASH (`f_int1`)
+PARTITIONS 2
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -878,15 +878,15 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) NOT NULL DEFAULT '0',
- `f_int2` int(11) NOT NULL DEFAULT '0',
+ `f_int1` int(11) NOT NULL DEFAULT 0,
+ `f_int2` int(11) NOT NULL DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL,
PRIMARY KEY (`f_int1`,`f_int2`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY KEY (f_int1)
-PARTITIONS 5 */
+ PARTITION BY KEY (`f_int1`)
+PARTITIONS 5
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -1378,22 +1378,22 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) NOT NULL DEFAULT '0',
- `f_int2` int(11) NOT NULL DEFAULT '0',
+ `f_int1` int(11) NOT NULL DEFAULT 0,
+ `f_int2` int(11) NOT NULL DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL,
PRIMARY KEY (`f_int1`,`f_int2`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (MOD(f_int1,4))
-(PARTITION part_3 VALUES IN (-3) ENGINE = TokuDB,
- PARTITION part_2 VALUES IN (-2) ENGINE = TokuDB,
- PARTITION part_1 VALUES IN (-1) ENGINE = TokuDB,
- PARTITION part_N VALUES IN (NULL) ENGINE = TokuDB,
- PARTITION part0 VALUES IN (0) ENGINE = TokuDB,
- PARTITION part1 VALUES IN (1) ENGINE = TokuDB,
- PARTITION part2 VALUES IN (2) ENGINE = TokuDB,
- PARTITION part3 VALUES IN (3) ENGINE = TokuDB) */
+ PARTITION BY LIST (`f_int1` % 4)
+(PARTITION `part_3` VALUES IN (-3) ENGINE = TokuDB,
+ PARTITION `part_2` VALUES IN (-2) ENGINE = TokuDB,
+ PARTITION `part_1` VALUES IN (-1) ENGINE = TokuDB,
+ PARTITION `part_N` VALUES IN (NULL) ENGINE = TokuDB,
+ PARTITION `part0` VALUES IN (0) ENGINE = TokuDB,
+ PARTITION `part1` VALUES IN (1) ENGINE = TokuDB,
+ PARTITION `part2` VALUES IN (2) ENGINE = TokuDB,
+ PARTITION `part3` VALUES IN (3) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -1883,20 +1883,20 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) NOT NULL DEFAULT '0',
- `f_int2` int(11) NOT NULL DEFAULT '0',
+ `f_int1` int(11) NOT NULL DEFAULT 0,
+ `f_int2` int(11) NOT NULL DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL,
PRIMARY KEY (`f_int1`,`f_int2`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (f_int1)
-(PARTITION parta VALUES LESS THAN (0) ENGINE = TokuDB,
- PARTITION partb VALUES LESS THAN (5) ENGINE = TokuDB,
- PARTITION partc VALUES LESS THAN (10) ENGINE = TokuDB,
- PARTITION partd VALUES LESS THAN (15) ENGINE = TokuDB,
- PARTITION parte VALUES LESS THAN (20) ENGINE = TokuDB,
- PARTITION partf VALUES LESS THAN (2147483646) ENGINE = TokuDB) */
+ PARTITION BY RANGE (`f_int1`)
+(PARTITION `parta` VALUES LESS THAN (0) ENGINE = TokuDB,
+ PARTITION `partb` VALUES LESS THAN (5) ENGINE = TokuDB,
+ PARTITION `partc` VALUES LESS THAN (10) ENGINE = TokuDB,
+ PARTITION `partd` VALUES LESS THAN (15) ENGINE = TokuDB,
+ PARTITION `parte` VALUES LESS THAN (20) ENGINE = TokuDB,
+ PARTITION `partf` VALUES LESS THAN (2147483646) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -2382,20 +2382,20 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) NOT NULL DEFAULT '0',
- `f_int2` int(11) NOT NULL DEFAULT '0',
+ `f_int1` int(11) NOT NULL DEFAULT 0,
+ `f_int2` int(11) NOT NULL DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL,
PRIMARY KEY (`f_int1`,`f_int2`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (f_int1 DIV 2)
-SUBPARTITION BY HASH (f_int1)
+ PARTITION BY RANGE (`f_int1` DIV 2)
+SUBPARTITION BY HASH (`f_int1`)
SUBPARTITIONS 2
-(PARTITION parta VALUES LESS THAN (0) ENGINE = TokuDB,
- PARTITION partb VALUES LESS THAN (5) ENGINE = TokuDB,
- PARTITION partc VALUES LESS THAN (10) ENGINE = TokuDB,
- PARTITION partd VALUES LESS THAN (2147483646) ENGINE = TokuDB) */
+(PARTITION `parta` VALUES LESS THAN (0) ENGINE = TokuDB,
+ PARTITION `partb` VALUES LESS THAN (5) ENGINE = TokuDB,
+ PARTITION `partc` VALUES LESS THAN (10) ENGINE = TokuDB,
+ PARTITION `partd` VALUES LESS THAN (2147483646) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -2887,27 +2887,27 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) NOT NULL DEFAULT '0',
- `f_int2` int(11) NOT NULL DEFAULT '0',
+ `f_int1` int(11) NOT NULL DEFAULT 0,
+ `f_int2` int(11) NOT NULL DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL,
PRIMARY KEY (`f_int1`,`f_int2`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (f_int1)
-SUBPARTITION BY KEY (f_int1)
-(PARTITION part1 VALUES LESS THAN (0)
- (SUBPARTITION subpart11 ENGINE = TokuDB,
- SUBPARTITION subpart12 ENGINE = TokuDB),
- PARTITION part2 VALUES LESS THAN (5)
- (SUBPARTITION subpart21 ENGINE = TokuDB,
- SUBPARTITION subpart22 ENGINE = TokuDB),
- PARTITION part3 VALUES LESS THAN (10)
- (SUBPARTITION subpart31 ENGINE = TokuDB,
- SUBPARTITION subpart32 ENGINE = TokuDB),
- PARTITION part4 VALUES LESS THAN (2147483646)
- (SUBPARTITION subpart41 ENGINE = TokuDB,
- SUBPARTITION subpart42 ENGINE = TokuDB)) */
+ PARTITION BY RANGE (`f_int1`)
+SUBPARTITION BY KEY (`f_int1`)
+(PARTITION `part1` VALUES LESS THAN (0)
+ (SUBPARTITION `subpart11` ENGINE = TokuDB,
+ SUBPARTITION `subpart12` ENGINE = TokuDB),
+ PARTITION `part2` VALUES LESS THAN (5)
+ (SUBPARTITION `subpart21` ENGINE = TokuDB,
+ SUBPARTITION `subpart22` ENGINE = TokuDB),
+ PARTITION `part3` VALUES LESS THAN (10)
+ (SUBPARTITION `subpart31` ENGINE = TokuDB,
+ SUBPARTITION `subpart32` ENGINE = TokuDB),
+ PARTITION `part4` VALUES LESS THAN (2147483646)
+ (SUBPARTITION `subpart41` ENGINE = TokuDB,
+ SUBPARTITION `subpart42` ENGINE = TokuDB))
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -3397,27 +3397,27 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) NOT NULL DEFAULT '0',
- `f_int2` int(11) NOT NULL DEFAULT '0',
+ `f_int1` int(11) NOT NULL DEFAULT 0,
+ `f_int2` int(11) NOT NULL DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL,
PRIMARY KEY (`f_int1`,`f_int2`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (ABS(MOD(f_int1,3)))
-SUBPARTITION BY HASH (f_int1 + 1)
-(PARTITION part1 VALUES IN (0)
- (SUBPARTITION sp11 ENGINE = TokuDB,
- SUBPARTITION sp12 ENGINE = TokuDB),
- PARTITION part2 VALUES IN (1)
- (SUBPARTITION sp21 ENGINE = TokuDB,
- SUBPARTITION sp22 ENGINE = TokuDB),
- PARTITION part3 VALUES IN (2)
- (SUBPARTITION sp31 ENGINE = TokuDB,
- SUBPARTITION sp32 ENGINE = TokuDB),
- PARTITION part4 VALUES IN (NULL)
- (SUBPARTITION sp41 ENGINE = TokuDB,
- SUBPARTITION sp42 ENGINE = TokuDB)) */
+ PARTITION BY LIST (abs(`f_int1` % 3))
+SUBPARTITION BY HASH (`f_int1` + 1)
+(PARTITION `part1` VALUES IN (0)
+ (SUBPARTITION `sp11` ENGINE = TokuDB,
+ SUBPARTITION `sp12` ENGINE = TokuDB),
+ PARTITION `part2` VALUES IN (1)
+ (SUBPARTITION `sp21` ENGINE = TokuDB,
+ SUBPARTITION `sp22` ENGINE = TokuDB),
+ PARTITION `part3` VALUES IN (2)
+ (SUBPARTITION `sp31` ENGINE = TokuDB,
+ SUBPARTITION `sp32` ENGINE = TokuDB),
+ PARTITION `part4` VALUES IN (NULL)
+ (SUBPARTITION `sp41` ENGINE = TokuDB,
+ SUBPARTITION `sp42` ENGINE = TokuDB))
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -3905,19 +3905,19 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) NOT NULL DEFAULT '0',
- `f_int2` int(11) NOT NULL DEFAULT '0',
+ `f_int1` int(11) NOT NULL DEFAULT 0,
+ `f_int2` int(11) NOT NULL DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL,
PRIMARY KEY (`f_int1`,`f_int2`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (ABS(MOD(f_int1,2)))
-SUBPARTITION BY KEY (f_int1)
+ PARTITION BY LIST (abs(`f_int1` % 2))
+SUBPARTITION BY KEY (`f_int1`)
SUBPARTITIONS 3
-(PARTITION part1 VALUES IN (0) ENGINE = TokuDB,
- PARTITION part2 VALUES IN (1) ENGINE = TokuDB,
- PARTITION part3 VALUES IN (NULL) ENGINE = TokuDB) */
+(PARTITION `part1` VALUES IN (0) ENGINE = TokuDB,
+ PARTITION `part2` VALUES IN (1) ENGINE = TokuDB,
+ PARTITION `part3` VALUES IN (NULL) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -4402,15 +4402,15 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) NOT NULL DEFAULT '0',
- `f_int2` int(11) NOT NULL DEFAULT '0',
+ `f_int1` int(11) NOT NULL DEFAULT 0,
+ `f_int2` int(11) NOT NULL DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL,
PRIMARY KEY (`f_int2`,`f_int1`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY HASH (f_int1)
-PARTITIONS 2 */
+ PARTITION BY HASH (`f_int1`)
+PARTITIONS 2
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -4894,15 +4894,15 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) NOT NULL DEFAULT '0',
- `f_int2` int(11) NOT NULL DEFAULT '0',
+ `f_int1` int(11) NOT NULL DEFAULT 0,
+ `f_int2` int(11) NOT NULL DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL,
PRIMARY KEY (`f_int2`,`f_int1`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY KEY (f_int1)
-PARTITIONS 5 */
+ PARTITION BY KEY (`f_int1`)
+PARTITIONS 5
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -5394,22 +5394,22 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) NOT NULL DEFAULT '0',
- `f_int2` int(11) NOT NULL DEFAULT '0',
+ `f_int1` int(11) NOT NULL DEFAULT 0,
+ `f_int2` int(11) NOT NULL DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL,
PRIMARY KEY (`f_int2`,`f_int1`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (MOD(f_int1,4))
-(PARTITION part_3 VALUES IN (-3) ENGINE = TokuDB,
- PARTITION part_2 VALUES IN (-2) ENGINE = TokuDB,
- PARTITION part_1 VALUES IN (-1) ENGINE = TokuDB,
- PARTITION part_N VALUES IN (NULL) ENGINE = TokuDB,
- PARTITION part0 VALUES IN (0) ENGINE = TokuDB,
- PARTITION part1 VALUES IN (1) ENGINE = TokuDB,
- PARTITION part2 VALUES IN (2) ENGINE = TokuDB,
- PARTITION part3 VALUES IN (3) ENGINE = TokuDB) */
+ PARTITION BY LIST (`f_int1` % 4)
+(PARTITION `part_3` VALUES IN (-3) ENGINE = TokuDB,
+ PARTITION `part_2` VALUES IN (-2) ENGINE = TokuDB,
+ PARTITION `part_1` VALUES IN (-1) ENGINE = TokuDB,
+ PARTITION `part_N` VALUES IN (NULL) ENGINE = TokuDB,
+ PARTITION `part0` VALUES IN (0) ENGINE = TokuDB,
+ PARTITION `part1` VALUES IN (1) ENGINE = TokuDB,
+ PARTITION `part2` VALUES IN (2) ENGINE = TokuDB,
+ PARTITION `part3` VALUES IN (3) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -5899,20 +5899,20 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) NOT NULL DEFAULT '0',
- `f_int2` int(11) NOT NULL DEFAULT '0',
+ `f_int1` int(11) NOT NULL DEFAULT 0,
+ `f_int2` int(11) NOT NULL DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL,
PRIMARY KEY (`f_int2`,`f_int1`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (f_int1)
-(PARTITION parta VALUES LESS THAN (0) ENGINE = TokuDB,
- PARTITION partb VALUES LESS THAN (5) ENGINE = TokuDB,
- PARTITION partc VALUES LESS THAN (10) ENGINE = TokuDB,
- PARTITION partd VALUES LESS THAN (15) ENGINE = TokuDB,
- PARTITION parte VALUES LESS THAN (20) ENGINE = TokuDB,
- PARTITION partf VALUES LESS THAN (2147483646) ENGINE = TokuDB) */
+ PARTITION BY RANGE (`f_int1`)
+(PARTITION `parta` VALUES LESS THAN (0) ENGINE = TokuDB,
+ PARTITION `partb` VALUES LESS THAN (5) ENGINE = TokuDB,
+ PARTITION `partc` VALUES LESS THAN (10) ENGINE = TokuDB,
+ PARTITION `partd` VALUES LESS THAN (15) ENGINE = TokuDB,
+ PARTITION `parte` VALUES LESS THAN (20) ENGINE = TokuDB,
+ PARTITION `partf` VALUES LESS THAN (2147483646) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -6398,20 +6398,20 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) NOT NULL DEFAULT '0',
- `f_int2` int(11) NOT NULL DEFAULT '0',
+ `f_int1` int(11) NOT NULL DEFAULT 0,
+ `f_int2` int(11) NOT NULL DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL,
PRIMARY KEY (`f_int2`,`f_int1`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (f_int1 DIV 2)
-SUBPARTITION BY HASH (f_int1)
+ PARTITION BY RANGE (`f_int1` DIV 2)
+SUBPARTITION BY HASH (`f_int1`)
SUBPARTITIONS 2
-(PARTITION parta VALUES LESS THAN (0) ENGINE = TokuDB,
- PARTITION partb VALUES LESS THAN (5) ENGINE = TokuDB,
- PARTITION partc VALUES LESS THAN (10) ENGINE = TokuDB,
- PARTITION partd VALUES LESS THAN (2147483646) ENGINE = TokuDB) */
+(PARTITION `parta` VALUES LESS THAN (0) ENGINE = TokuDB,
+ PARTITION `partb` VALUES LESS THAN (5) ENGINE = TokuDB,
+ PARTITION `partc` VALUES LESS THAN (10) ENGINE = TokuDB,
+ PARTITION `partd` VALUES LESS THAN (2147483646) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -6903,27 +6903,27 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) NOT NULL DEFAULT '0',
- `f_int2` int(11) NOT NULL DEFAULT '0',
+ `f_int1` int(11) NOT NULL DEFAULT 0,
+ `f_int2` int(11) NOT NULL DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL,
PRIMARY KEY (`f_int2`,`f_int1`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (f_int1)
-SUBPARTITION BY KEY (f_int1)
-(PARTITION part1 VALUES LESS THAN (0)
- (SUBPARTITION subpart11 ENGINE = TokuDB,
- SUBPARTITION subpart12 ENGINE = TokuDB),
- PARTITION part2 VALUES LESS THAN (5)
- (SUBPARTITION subpart21 ENGINE = TokuDB,
- SUBPARTITION subpart22 ENGINE = TokuDB),
- PARTITION part3 VALUES LESS THAN (10)
- (SUBPARTITION subpart31 ENGINE = TokuDB,
- SUBPARTITION subpart32 ENGINE = TokuDB),
- PARTITION part4 VALUES LESS THAN (2147483646)
- (SUBPARTITION subpart41 ENGINE = TokuDB,
- SUBPARTITION subpart42 ENGINE = TokuDB)) */
+ PARTITION BY RANGE (`f_int1`)
+SUBPARTITION BY KEY (`f_int1`)
+(PARTITION `part1` VALUES LESS THAN (0)
+ (SUBPARTITION `subpart11` ENGINE = TokuDB,
+ SUBPARTITION `subpart12` ENGINE = TokuDB),
+ PARTITION `part2` VALUES LESS THAN (5)
+ (SUBPARTITION `subpart21` ENGINE = TokuDB,
+ SUBPARTITION `subpart22` ENGINE = TokuDB),
+ PARTITION `part3` VALUES LESS THAN (10)
+ (SUBPARTITION `subpart31` ENGINE = TokuDB,
+ SUBPARTITION `subpart32` ENGINE = TokuDB),
+ PARTITION `part4` VALUES LESS THAN (2147483646)
+ (SUBPARTITION `subpart41` ENGINE = TokuDB,
+ SUBPARTITION `subpart42` ENGINE = TokuDB))
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -7413,27 +7413,27 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) NOT NULL DEFAULT '0',
- `f_int2` int(11) NOT NULL DEFAULT '0',
+ `f_int1` int(11) NOT NULL DEFAULT 0,
+ `f_int2` int(11) NOT NULL DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL,
PRIMARY KEY (`f_int2`,`f_int1`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (ABS(MOD(f_int1,3)))
-SUBPARTITION BY HASH (f_int1 + 1)
-(PARTITION part1 VALUES IN (0)
- (SUBPARTITION sp11 ENGINE = TokuDB,
- SUBPARTITION sp12 ENGINE = TokuDB),
- PARTITION part2 VALUES IN (1)
- (SUBPARTITION sp21 ENGINE = TokuDB,
- SUBPARTITION sp22 ENGINE = TokuDB),
- PARTITION part3 VALUES IN (2)
- (SUBPARTITION sp31 ENGINE = TokuDB,
- SUBPARTITION sp32 ENGINE = TokuDB),
- PARTITION part4 VALUES IN (NULL)
- (SUBPARTITION sp41 ENGINE = TokuDB,
- SUBPARTITION sp42 ENGINE = TokuDB)) */
+ PARTITION BY LIST (abs(`f_int1` % 3))
+SUBPARTITION BY HASH (`f_int1` + 1)
+(PARTITION `part1` VALUES IN (0)
+ (SUBPARTITION `sp11` ENGINE = TokuDB,
+ SUBPARTITION `sp12` ENGINE = TokuDB),
+ PARTITION `part2` VALUES IN (1)
+ (SUBPARTITION `sp21` ENGINE = TokuDB,
+ SUBPARTITION `sp22` ENGINE = TokuDB),
+ PARTITION `part3` VALUES IN (2)
+ (SUBPARTITION `sp31` ENGINE = TokuDB,
+ SUBPARTITION `sp32` ENGINE = TokuDB),
+ PARTITION `part4` VALUES IN (NULL)
+ (SUBPARTITION `sp41` ENGINE = TokuDB,
+ SUBPARTITION `sp42` ENGINE = TokuDB))
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -7921,19 +7921,19 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) NOT NULL DEFAULT '0',
- `f_int2` int(11) NOT NULL DEFAULT '0',
+ `f_int1` int(11) NOT NULL DEFAULT 0,
+ `f_int2` int(11) NOT NULL DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL,
PRIMARY KEY (`f_int2`,`f_int1`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (ABS(MOD(f_int1,2)))
-SUBPARTITION BY KEY (f_int1)
+ PARTITION BY LIST (abs(`f_int1` % 2))
+SUBPARTITION BY KEY (`f_int1`)
SUBPARTITIONS 3
-(PARTITION part1 VALUES IN (0) ENGINE = TokuDB,
- PARTITION part2 VALUES IN (1) ENGINE = TokuDB,
- PARTITION part3 VALUES IN (NULL) ENGINE = TokuDB) */
+(PARTITION `part1` VALUES IN (0) ENGINE = TokuDB,
+ PARTITION `part2` VALUES IN (1) ENGINE = TokuDB,
+ PARTITION `part3` VALUES IN (NULL) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -8419,15 +8419,15 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL,
UNIQUE KEY `uidx1` (`f_int1`,`f_int2`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY HASH (f_int1)
-PARTITIONS 2 */
+ PARTITION BY HASH (`f_int1`)
+PARTITIONS 2
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -8927,15 +8927,15 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL,
UNIQUE KEY `uidx1` (`f_int1`,`f_int2`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY KEY (f_int1)
-PARTITIONS 5 */
+ PARTITION BY KEY (`f_int1`)
+PARTITIONS 5
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -9443,22 +9443,22 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL,
UNIQUE KEY `uidx1` (`f_int1`,`f_int2`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (MOD(f_int1,4))
-(PARTITION part_3 VALUES IN (-3) ENGINE = TokuDB,
- PARTITION part_2 VALUES IN (-2) ENGINE = TokuDB,
- PARTITION part_1 VALUES IN (-1) ENGINE = TokuDB,
- PARTITION part_N VALUES IN (NULL) ENGINE = TokuDB,
- PARTITION part0 VALUES IN (0) ENGINE = TokuDB,
- PARTITION part1 VALUES IN (1) ENGINE = TokuDB,
- PARTITION part2 VALUES IN (2) ENGINE = TokuDB,
- PARTITION part3 VALUES IN (3) ENGINE = TokuDB) */
+ PARTITION BY LIST (`f_int1` % 4)
+(PARTITION `part_3` VALUES IN (-3) ENGINE = TokuDB,
+ PARTITION `part_2` VALUES IN (-2) ENGINE = TokuDB,
+ PARTITION `part_1` VALUES IN (-1) ENGINE = TokuDB,
+ PARTITION `part_N` VALUES IN (NULL) ENGINE = TokuDB,
+ PARTITION `part0` VALUES IN (0) ENGINE = TokuDB,
+ PARTITION `part1` VALUES IN (1) ENGINE = TokuDB,
+ PARTITION `part2` VALUES IN (2) ENGINE = TokuDB,
+ PARTITION `part3` VALUES IN (3) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -9964,20 +9964,20 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL,
UNIQUE KEY `uidx1` (`f_int1`,`f_int2`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (f_int1)
-(PARTITION parta VALUES LESS THAN (0) ENGINE = TokuDB,
- PARTITION partb VALUES LESS THAN (5) ENGINE = TokuDB,
- PARTITION partc VALUES LESS THAN (10) ENGINE = TokuDB,
- PARTITION partd VALUES LESS THAN (15) ENGINE = TokuDB,
- PARTITION parte VALUES LESS THAN (20) ENGINE = TokuDB,
- PARTITION partf VALUES LESS THAN (2147483646) ENGINE = TokuDB) */
+ PARTITION BY RANGE (`f_int1`)
+(PARTITION `parta` VALUES LESS THAN (0) ENGINE = TokuDB,
+ PARTITION `partb` VALUES LESS THAN (5) ENGINE = TokuDB,
+ PARTITION `partc` VALUES LESS THAN (10) ENGINE = TokuDB,
+ PARTITION `partd` VALUES LESS THAN (15) ENGINE = TokuDB,
+ PARTITION `parte` VALUES LESS THAN (20) ENGINE = TokuDB,
+ PARTITION `partf` VALUES LESS THAN (2147483646) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -10479,20 +10479,20 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL,
UNIQUE KEY `uidx1` (`f_int1`,`f_int2`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (f_int1 DIV 2)
-SUBPARTITION BY HASH (f_int1)
+ PARTITION BY RANGE (`f_int1` DIV 2)
+SUBPARTITION BY HASH (`f_int1`)
SUBPARTITIONS 2
-(PARTITION parta VALUES LESS THAN (0) ENGINE = TokuDB,
- PARTITION partb VALUES LESS THAN (5) ENGINE = TokuDB,
- PARTITION partc VALUES LESS THAN (10) ENGINE = TokuDB,
- PARTITION partd VALUES LESS THAN (2147483646) ENGINE = TokuDB) */
+(PARTITION `parta` VALUES LESS THAN (0) ENGINE = TokuDB,
+ PARTITION `partb` VALUES LESS THAN (5) ENGINE = TokuDB,
+ PARTITION `partc` VALUES LESS THAN (10) ENGINE = TokuDB,
+ PARTITION `partd` VALUES LESS THAN (2147483646) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -11000,27 +11000,27 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL,
UNIQUE KEY `uidx1` (`f_int1`,`f_int2`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (f_int1)
-SUBPARTITION BY KEY (f_int1)
-(PARTITION part1 VALUES LESS THAN (0)
- (SUBPARTITION subpart11 ENGINE = TokuDB,
- SUBPARTITION subpart12 ENGINE = TokuDB),
- PARTITION part2 VALUES LESS THAN (5)
- (SUBPARTITION subpart21 ENGINE = TokuDB,
- SUBPARTITION subpart22 ENGINE = TokuDB),
- PARTITION part3 VALUES LESS THAN (10)
- (SUBPARTITION subpart31 ENGINE = TokuDB,
- SUBPARTITION subpart32 ENGINE = TokuDB),
- PARTITION part4 VALUES LESS THAN (2147483646)
- (SUBPARTITION subpart41 ENGINE = TokuDB,
- SUBPARTITION subpart42 ENGINE = TokuDB)) */
+ PARTITION BY RANGE (`f_int1`)
+SUBPARTITION BY KEY (`f_int1`)
+(PARTITION `part1` VALUES LESS THAN (0)
+ (SUBPARTITION `subpart11` ENGINE = TokuDB,
+ SUBPARTITION `subpart12` ENGINE = TokuDB),
+ PARTITION `part2` VALUES LESS THAN (5)
+ (SUBPARTITION `subpart21` ENGINE = TokuDB,
+ SUBPARTITION `subpart22` ENGINE = TokuDB),
+ PARTITION `part3` VALUES LESS THAN (10)
+ (SUBPARTITION `subpart31` ENGINE = TokuDB,
+ SUBPARTITION `subpart32` ENGINE = TokuDB),
+ PARTITION `part4` VALUES LESS THAN (2147483646)
+ (SUBPARTITION `subpart41` ENGINE = TokuDB,
+ SUBPARTITION `subpart42` ENGINE = TokuDB))
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -11526,27 +11526,27 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL,
UNIQUE KEY `uidx1` (`f_int1`,`f_int2`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (ABS(MOD(f_int1,3)))
-SUBPARTITION BY HASH (f_int1 + 1)
-(PARTITION part1 VALUES IN (0)
- (SUBPARTITION sp11 ENGINE = TokuDB,
- SUBPARTITION sp12 ENGINE = TokuDB),
- PARTITION part2 VALUES IN (1)
- (SUBPARTITION sp21 ENGINE = TokuDB,
- SUBPARTITION sp22 ENGINE = TokuDB),
- PARTITION part3 VALUES IN (2)
- (SUBPARTITION sp31 ENGINE = TokuDB,
- SUBPARTITION sp32 ENGINE = TokuDB),
- PARTITION part4 VALUES IN (NULL)
- (SUBPARTITION sp41 ENGINE = TokuDB,
- SUBPARTITION sp42 ENGINE = TokuDB)) */
+ PARTITION BY LIST (abs(`f_int1` % 3))
+SUBPARTITION BY HASH (`f_int1` + 1)
+(PARTITION `part1` VALUES IN (0)
+ (SUBPARTITION `sp11` ENGINE = TokuDB,
+ SUBPARTITION `sp12` ENGINE = TokuDB),
+ PARTITION `part2` VALUES IN (1)
+ (SUBPARTITION `sp21` ENGINE = TokuDB,
+ SUBPARTITION `sp22` ENGINE = TokuDB),
+ PARTITION `part3` VALUES IN (2)
+ (SUBPARTITION `sp31` ENGINE = TokuDB,
+ SUBPARTITION `sp32` ENGINE = TokuDB),
+ PARTITION `part4` VALUES IN (NULL)
+ (SUBPARTITION `sp41` ENGINE = TokuDB,
+ SUBPARTITION `sp42` ENGINE = TokuDB))
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -12050,19 +12050,19 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL,
UNIQUE KEY `uidx1` (`f_int1`,`f_int2`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (ABS(MOD(f_int1,2)))
-SUBPARTITION BY KEY (f_int1)
+ PARTITION BY LIST (abs(`f_int1` % 2))
+SUBPARTITION BY KEY (`f_int1`)
SUBPARTITIONS 3
-(PARTITION part1 VALUES IN (0) ENGINE = TokuDB,
- PARTITION part2 VALUES IN (1) ENGINE = TokuDB,
- PARTITION part3 VALUES IN (NULL) ENGINE = TokuDB) */
+(PARTITION `part1` VALUES IN (0) ENGINE = TokuDB,
+ PARTITION `part2` VALUES IN (1) ENGINE = TokuDB,
+ PARTITION `part3` VALUES IN (NULL) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -12563,15 +12563,15 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL,
UNIQUE KEY `uidx1` (`f_int2`,`f_int1`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY HASH (f_int1)
-PARTITIONS 2 */
+ PARTITION BY HASH (`f_int1`)
+PARTITIONS 2
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -13071,15 +13071,15 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL,
UNIQUE KEY `uidx1` (`f_int2`,`f_int1`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY KEY (f_int1)
-PARTITIONS 5 */
+ PARTITION BY KEY (`f_int1`)
+PARTITIONS 5
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -13587,22 +13587,22 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL,
UNIQUE KEY `uidx1` (`f_int2`,`f_int1`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (MOD(f_int1,4))
-(PARTITION part_3 VALUES IN (-3) ENGINE = TokuDB,
- PARTITION part_2 VALUES IN (-2) ENGINE = TokuDB,
- PARTITION part_1 VALUES IN (-1) ENGINE = TokuDB,
- PARTITION part_N VALUES IN (NULL) ENGINE = TokuDB,
- PARTITION part0 VALUES IN (0) ENGINE = TokuDB,
- PARTITION part1 VALUES IN (1) ENGINE = TokuDB,
- PARTITION part2 VALUES IN (2) ENGINE = TokuDB,
- PARTITION part3 VALUES IN (3) ENGINE = TokuDB) */
+ PARTITION BY LIST (`f_int1` % 4)
+(PARTITION `part_3` VALUES IN (-3) ENGINE = TokuDB,
+ PARTITION `part_2` VALUES IN (-2) ENGINE = TokuDB,
+ PARTITION `part_1` VALUES IN (-1) ENGINE = TokuDB,
+ PARTITION `part_N` VALUES IN (NULL) ENGINE = TokuDB,
+ PARTITION `part0` VALUES IN (0) ENGINE = TokuDB,
+ PARTITION `part1` VALUES IN (1) ENGINE = TokuDB,
+ PARTITION `part2` VALUES IN (2) ENGINE = TokuDB,
+ PARTITION `part3` VALUES IN (3) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -14108,20 +14108,20 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL,
UNIQUE KEY `uidx1` (`f_int2`,`f_int1`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (f_int1)
-(PARTITION parta VALUES LESS THAN (0) ENGINE = TokuDB,
- PARTITION partb VALUES LESS THAN (5) ENGINE = TokuDB,
- PARTITION partc VALUES LESS THAN (10) ENGINE = TokuDB,
- PARTITION partd VALUES LESS THAN (15) ENGINE = TokuDB,
- PARTITION parte VALUES LESS THAN (20) ENGINE = TokuDB,
- PARTITION partf VALUES LESS THAN (2147483646) ENGINE = TokuDB) */
+ PARTITION BY RANGE (`f_int1`)
+(PARTITION `parta` VALUES LESS THAN (0) ENGINE = TokuDB,
+ PARTITION `partb` VALUES LESS THAN (5) ENGINE = TokuDB,
+ PARTITION `partc` VALUES LESS THAN (10) ENGINE = TokuDB,
+ PARTITION `partd` VALUES LESS THAN (15) ENGINE = TokuDB,
+ PARTITION `parte` VALUES LESS THAN (20) ENGINE = TokuDB,
+ PARTITION `partf` VALUES LESS THAN (2147483646) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -14623,20 +14623,20 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL,
UNIQUE KEY `uidx1` (`f_int2`,`f_int1`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (f_int1 DIV 2)
-SUBPARTITION BY HASH (f_int1)
+ PARTITION BY RANGE (`f_int1` DIV 2)
+SUBPARTITION BY HASH (`f_int1`)
SUBPARTITIONS 2
-(PARTITION parta VALUES LESS THAN (0) ENGINE = TokuDB,
- PARTITION partb VALUES LESS THAN (5) ENGINE = TokuDB,
- PARTITION partc VALUES LESS THAN (10) ENGINE = TokuDB,
- PARTITION partd VALUES LESS THAN (2147483646) ENGINE = TokuDB) */
+(PARTITION `parta` VALUES LESS THAN (0) ENGINE = TokuDB,
+ PARTITION `partb` VALUES LESS THAN (5) ENGINE = TokuDB,
+ PARTITION `partc` VALUES LESS THAN (10) ENGINE = TokuDB,
+ PARTITION `partd` VALUES LESS THAN (2147483646) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -15144,27 +15144,27 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL,
UNIQUE KEY `uidx1` (`f_int2`,`f_int1`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (f_int1)
-SUBPARTITION BY KEY (f_int1)
-(PARTITION part1 VALUES LESS THAN (0)
- (SUBPARTITION subpart11 ENGINE = TokuDB,
- SUBPARTITION subpart12 ENGINE = TokuDB),
- PARTITION part2 VALUES LESS THAN (5)
- (SUBPARTITION subpart21 ENGINE = TokuDB,
- SUBPARTITION subpart22 ENGINE = TokuDB),
- PARTITION part3 VALUES LESS THAN (10)
- (SUBPARTITION subpart31 ENGINE = TokuDB,
- SUBPARTITION subpart32 ENGINE = TokuDB),
- PARTITION part4 VALUES LESS THAN (2147483646)
- (SUBPARTITION subpart41 ENGINE = TokuDB,
- SUBPARTITION subpart42 ENGINE = TokuDB)) */
+ PARTITION BY RANGE (`f_int1`)
+SUBPARTITION BY KEY (`f_int1`)
+(PARTITION `part1` VALUES LESS THAN (0)
+ (SUBPARTITION `subpart11` ENGINE = TokuDB,
+ SUBPARTITION `subpart12` ENGINE = TokuDB),
+ PARTITION `part2` VALUES LESS THAN (5)
+ (SUBPARTITION `subpart21` ENGINE = TokuDB,
+ SUBPARTITION `subpart22` ENGINE = TokuDB),
+ PARTITION `part3` VALUES LESS THAN (10)
+ (SUBPARTITION `subpart31` ENGINE = TokuDB,
+ SUBPARTITION `subpart32` ENGINE = TokuDB),
+ PARTITION `part4` VALUES LESS THAN (2147483646)
+ (SUBPARTITION `subpart41` ENGINE = TokuDB,
+ SUBPARTITION `subpart42` ENGINE = TokuDB))
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -15670,27 +15670,27 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL,
UNIQUE KEY `uidx1` (`f_int2`,`f_int1`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (ABS(MOD(f_int1,3)))
-SUBPARTITION BY HASH (f_int1 + 1)
-(PARTITION part1 VALUES IN (0)
- (SUBPARTITION sp11 ENGINE = TokuDB,
- SUBPARTITION sp12 ENGINE = TokuDB),
- PARTITION part2 VALUES IN (1)
- (SUBPARTITION sp21 ENGINE = TokuDB,
- SUBPARTITION sp22 ENGINE = TokuDB),
- PARTITION part3 VALUES IN (2)
- (SUBPARTITION sp31 ENGINE = TokuDB,
- SUBPARTITION sp32 ENGINE = TokuDB),
- PARTITION part4 VALUES IN (NULL)
- (SUBPARTITION sp41 ENGINE = TokuDB,
- SUBPARTITION sp42 ENGINE = TokuDB)) */
+ PARTITION BY LIST (abs(`f_int1` % 3))
+SUBPARTITION BY HASH (`f_int1` + 1)
+(PARTITION `part1` VALUES IN (0)
+ (SUBPARTITION `sp11` ENGINE = TokuDB,
+ SUBPARTITION `sp12` ENGINE = TokuDB),
+ PARTITION `part2` VALUES IN (1)
+ (SUBPARTITION `sp21` ENGINE = TokuDB,
+ SUBPARTITION `sp22` ENGINE = TokuDB),
+ PARTITION `part3` VALUES IN (2)
+ (SUBPARTITION `sp31` ENGINE = TokuDB,
+ SUBPARTITION `sp32` ENGINE = TokuDB),
+ PARTITION `part4` VALUES IN (NULL)
+ (SUBPARTITION `sp41` ENGINE = TokuDB,
+ SUBPARTITION `sp42` ENGINE = TokuDB))
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -16194,19 +16194,19 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL,
UNIQUE KEY `uidx1` (`f_int2`,`f_int1`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (ABS(MOD(f_int1,2)))
-SUBPARTITION BY KEY (f_int1)
+ PARTITION BY LIST (abs(`f_int1` % 2))
+SUBPARTITION BY KEY (`f_int1`)
SUBPARTITIONS 3
-(PARTITION part1 VALUES IN (0) ENGINE = TokuDB,
- PARTITION part2 VALUES IN (1) ENGINE = TokuDB,
- PARTITION part3 VALUES IN (NULL) ENGINE = TokuDB) */
+(PARTITION `part1` VALUES IN (0) ENGINE = TokuDB,
+ PARTITION `part2` VALUES IN (1) ENGINE = TokuDB,
+ PARTITION `part3` VALUES IN (NULL) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
diff --git a/storage/tokudb/mysql-test/tokudb_parts/r/partition_alter1_2_tokudb.result b/storage/tokudb/mysql-test/tokudb_parts/r/partition_alter1_2_tokudb.result
index 4aded14f336..9f53437fdf7 100644
--- a/storage/tokudb/mysql-test/tokudb_parts/r/partition_alter1_2_tokudb.result
+++ b/storage/tokudb/mysql-test/tokudb_parts/r/partition_alter1_2_tokudb.result
@@ -67,14 +67,14 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) NOT NULL DEFAULT '0',
- `f_int2` int(11) NOT NULL DEFAULT '0',
+ `f_int1` int(11) NOT NULL DEFAULT 0,
+ `f_int2` int(11) NOT NULL DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY HASH (f_int1)
-PARTITIONS 2 */
+ PARTITION BY HASH (`f_int1`)
+PARTITIONS 2
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -507,14 +507,14 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) NOT NULL DEFAULT '0',
- `f_int2` int(11) NOT NULL DEFAULT '0',
+ `f_int1` int(11) NOT NULL DEFAULT 0,
+ `f_int2` int(11) NOT NULL DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY KEY (f_int1)
-PARTITIONS 5 */
+ PARTITION BY KEY (`f_int1`)
+PARTITIONS 5
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -955,21 +955,21 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) NOT NULL DEFAULT '0',
- `f_int2` int(11) NOT NULL DEFAULT '0',
+ `f_int1` int(11) NOT NULL DEFAULT 0,
+ `f_int2` int(11) NOT NULL DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (MOD(f_int1,4))
-(PARTITION part_3 VALUES IN (-3) ENGINE = TokuDB,
- PARTITION part_2 VALUES IN (-2) ENGINE = TokuDB,
- PARTITION part_1 VALUES IN (-1) ENGINE = TokuDB,
- PARTITION part_N VALUES IN (NULL) ENGINE = TokuDB,
- PARTITION part0 VALUES IN (0) ENGINE = TokuDB,
- PARTITION part1 VALUES IN (1) ENGINE = TokuDB,
- PARTITION part2 VALUES IN (2) ENGINE = TokuDB,
- PARTITION part3 VALUES IN (3) ENGINE = TokuDB) */
+ PARTITION BY LIST (`f_int1` % 4)
+(PARTITION `part_3` VALUES IN (-3) ENGINE = TokuDB,
+ PARTITION `part_2` VALUES IN (-2) ENGINE = TokuDB,
+ PARTITION `part_1` VALUES IN (-1) ENGINE = TokuDB,
+ PARTITION `part_N` VALUES IN (NULL) ENGINE = TokuDB,
+ PARTITION `part0` VALUES IN (0) ENGINE = TokuDB,
+ PARTITION `part1` VALUES IN (1) ENGINE = TokuDB,
+ PARTITION `part2` VALUES IN (2) ENGINE = TokuDB,
+ PARTITION `part3` VALUES IN (3) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -1408,19 +1408,19 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) NOT NULL DEFAULT '0',
- `f_int2` int(11) NOT NULL DEFAULT '0',
+ `f_int1` int(11) NOT NULL DEFAULT 0,
+ `f_int2` int(11) NOT NULL DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (f_int1)
-(PARTITION parta VALUES LESS THAN (0) ENGINE = TokuDB,
- PARTITION partb VALUES LESS THAN (5) ENGINE = TokuDB,
- PARTITION partc VALUES LESS THAN (10) ENGINE = TokuDB,
- PARTITION partd VALUES LESS THAN (15) ENGINE = TokuDB,
- PARTITION parte VALUES LESS THAN (20) ENGINE = TokuDB,
- PARTITION partf VALUES LESS THAN (2147483646) ENGINE = TokuDB) */
+ PARTITION BY RANGE (`f_int1`)
+(PARTITION `parta` VALUES LESS THAN (0) ENGINE = TokuDB,
+ PARTITION `partb` VALUES LESS THAN (5) ENGINE = TokuDB,
+ PARTITION `partc` VALUES LESS THAN (10) ENGINE = TokuDB,
+ PARTITION `partd` VALUES LESS THAN (15) ENGINE = TokuDB,
+ PARTITION `parte` VALUES LESS THAN (20) ENGINE = TokuDB,
+ PARTITION `partf` VALUES LESS THAN (2147483646) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -1855,19 +1855,19 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) NOT NULL DEFAULT '0',
- `f_int2` int(11) NOT NULL DEFAULT '0',
+ `f_int1` int(11) NOT NULL DEFAULT 0,
+ `f_int2` int(11) NOT NULL DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (f_int1 DIV 2)
-SUBPARTITION BY HASH (f_int1)
+ PARTITION BY RANGE (`f_int1` DIV 2)
+SUBPARTITION BY HASH (`f_int1`)
SUBPARTITIONS 2
-(PARTITION parta VALUES LESS THAN (0) ENGINE = TokuDB,
- PARTITION partb VALUES LESS THAN (5) ENGINE = TokuDB,
- PARTITION partc VALUES LESS THAN (10) ENGINE = TokuDB,
- PARTITION partd VALUES LESS THAN (2147483646) ENGINE = TokuDB) */
+(PARTITION `parta` VALUES LESS THAN (0) ENGINE = TokuDB,
+ PARTITION `partb` VALUES LESS THAN (5) ENGINE = TokuDB,
+ PARTITION `partc` VALUES LESS THAN (10) ENGINE = TokuDB,
+ PARTITION `partd` VALUES LESS THAN (2147483646) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -2308,26 +2308,26 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) NOT NULL DEFAULT '0',
- `f_int2` int(11) NOT NULL DEFAULT '0',
+ `f_int1` int(11) NOT NULL DEFAULT 0,
+ `f_int2` int(11) NOT NULL DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (f_int1)
-SUBPARTITION BY KEY (f_int1)
-(PARTITION part1 VALUES LESS THAN (0)
- (SUBPARTITION subpart11 ENGINE = TokuDB,
- SUBPARTITION subpart12 ENGINE = TokuDB),
- PARTITION part2 VALUES LESS THAN (5)
- (SUBPARTITION subpart21 ENGINE = TokuDB,
- SUBPARTITION subpart22 ENGINE = TokuDB),
- PARTITION part3 VALUES LESS THAN (10)
- (SUBPARTITION subpart31 ENGINE = TokuDB,
- SUBPARTITION subpart32 ENGINE = TokuDB),
- PARTITION part4 VALUES LESS THAN (2147483646)
- (SUBPARTITION subpart41 ENGINE = TokuDB,
- SUBPARTITION subpart42 ENGINE = TokuDB)) */
+ PARTITION BY RANGE (`f_int1`)
+SUBPARTITION BY KEY (`f_int1`)
+(PARTITION `part1` VALUES LESS THAN (0)
+ (SUBPARTITION `subpart11` ENGINE = TokuDB,
+ SUBPARTITION `subpart12` ENGINE = TokuDB),
+ PARTITION `part2` VALUES LESS THAN (5)
+ (SUBPARTITION `subpart21` ENGINE = TokuDB,
+ SUBPARTITION `subpart22` ENGINE = TokuDB),
+ PARTITION `part3` VALUES LESS THAN (10)
+ (SUBPARTITION `subpart31` ENGINE = TokuDB,
+ SUBPARTITION `subpart32` ENGINE = TokuDB),
+ PARTITION `part4` VALUES LESS THAN (2147483646)
+ (SUBPARTITION `subpart41` ENGINE = TokuDB,
+ SUBPARTITION `subpart42` ENGINE = TokuDB))
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -2766,26 +2766,26 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) NOT NULL DEFAULT '0',
- `f_int2` int(11) NOT NULL DEFAULT '0',
+ `f_int1` int(11) NOT NULL DEFAULT 0,
+ `f_int2` int(11) NOT NULL DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (ABS(MOD(f_int1,3)))
-SUBPARTITION BY HASH (f_int1 + 1)
-(PARTITION part1 VALUES IN (0)
- (SUBPARTITION sp11 ENGINE = TokuDB,
- SUBPARTITION sp12 ENGINE = TokuDB),
- PARTITION part2 VALUES IN (1)
- (SUBPARTITION sp21 ENGINE = TokuDB,
- SUBPARTITION sp22 ENGINE = TokuDB),
- PARTITION part3 VALUES IN (2)
- (SUBPARTITION sp31 ENGINE = TokuDB,
- SUBPARTITION sp32 ENGINE = TokuDB),
- PARTITION part4 VALUES IN (NULL)
- (SUBPARTITION sp41 ENGINE = TokuDB,
- SUBPARTITION sp42 ENGINE = TokuDB)) */
+ PARTITION BY LIST (abs(`f_int1` % 3))
+SUBPARTITION BY HASH (`f_int1` + 1)
+(PARTITION `part1` VALUES IN (0)
+ (SUBPARTITION `sp11` ENGINE = TokuDB,
+ SUBPARTITION `sp12` ENGINE = TokuDB),
+ PARTITION `part2` VALUES IN (1)
+ (SUBPARTITION `sp21` ENGINE = TokuDB,
+ SUBPARTITION `sp22` ENGINE = TokuDB),
+ PARTITION `part3` VALUES IN (2)
+ (SUBPARTITION `sp31` ENGINE = TokuDB,
+ SUBPARTITION `sp32` ENGINE = TokuDB),
+ PARTITION `part4` VALUES IN (NULL)
+ (SUBPARTITION `sp41` ENGINE = TokuDB,
+ SUBPARTITION `sp42` ENGINE = TokuDB))
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -3222,18 +3222,18 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) NOT NULL DEFAULT '0',
- `f_int2` int(11) NOT NULL DEFAULT '0',
+ `f_int1` int(11) NOT NULL DEFAULT 0,
+ `f_int2` int(11) NOT NULL DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (ABS(MOD(f_int1,2)))
-SUBPARTITION BY KEY (f_int1)
+ PARTITION BY LIST (abs(`f_int1` % 2))
+SUBPARTITION BY KEY (`f_int1`)
SUBPARTITIONS 3
-(PARTITION part1 VALUES IN (0) ENGINE = TokuDB,
- PARTITION part2 VALUES IN (1) ENGINE = TokuDB,
- PARTITION part3 VALUES IN (NULL) ENGINE = TokuDB) */
+(PARTITION `part1` VALUES IN (0) ENGINE = TokuDB,
+ PARTITION `part2` VALUES IN (1) ENGINE = TokuDB,
+ PARTITION `part3` VALUES IN (NULL) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -3667,14 +3667,14 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) NOT NULL DEFAULT '0',
- `f_int2` int(11) NOT NULL DEFAULT '0',
+ `f_int1` int(11) NOT NULL DEFAULT 0,
+ `f_int2` int(11) NOT NULL DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY HASH (f_int1)
-PARTITIONS 2 */
+ PARTITION BY HASH (`f_int1`)
+PARTITIONS 2
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -4107,14 +4107,14 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) NOT NULL DEFAULT '0',
- `f_int2` int(11) NOT NULL DEFAULT '0',
+ `f_int1` int(11) NOT NULL DEFAULT 0,
+ `f_int2` int(11) NOT NULL DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY KEY (f_int1)
-PARTITIONS 5 */
+ PARTITION BY KEY (`f_int1`)
+PARTITIONS 5
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -4555,21 +4555,21 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) NOT NULL DEFAULT '0',
- `f_int2` int(11) NOT NULL DEFAULT '0',
+ `f_int1` int(11) NOT NULL DEFAULT 0,
+ `f_int2` int(11) NOT NULL DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (MOD(f_int1,4))
-(PARTITION part_3 VALUES IN (-3) ENGINE = TokuDB,
- PARTITION part_2 VALUES IN (-2) ENGINE = TokuDB,
- PARTITION part_1 VALUES IN (-1) ENGINE = TokuDB,
- PARTITION part_N VALUES IN (NULL) ENGINE = TokuDB,
- PARTITION part0 VALUES IN (0) ENGINE = TokuDB,
- PARTITION part1 VALUES IN (1) ENGINE = TokuDB,
- PARTITION part2 VALUES IN (2) ENGINE = TokuDB,
- PARTITION part3 VALUES IN (3) ENGINE = TokuDB) */
+ PARTITION BY LIST (`f_int1` % 4)
+(PARTITION `part_3` VALUES IN (-3) ENGINE = TokuDB,
+ PARTITION `part_2` VALUES IN (-2) ENGINE = TokuDB,
+ PARTITION `part_1` VALUES IN (-1) ENGINE = TokuDB,
+ PARTITION `part_N` VALUES IN (NULL) ENGINE = TokuDB,
+ PARTITION `part0` VALUES IN (0) ENGINE = TokuDB,
+ PARTITION `part1` VALUES IN (1) ENGINE = TokuDB,
+ PARTITION `part2` VALUES IN (2) ENGINE = TokuDB,
+ PARTITION `part3` VALUES IN (3) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -5008,19 +5008,19 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) NOT NULL DEFAULT '0',
- `f_int2` int(11) NOT NULL DEFAULT '0',
+ `f_int1` int(11) NOT NULL DEFAULT 0,
+ `f_int2` int(11) NOT NULL DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (f_int1)
-(PARTITION parta VALUES LESS THAN (0) ENGINE = TokuDB,
- PARTITION partb VALUES LESS THAN (5) ENGINE = TokuDB,
- PARTITION partc VALUES LESS THAN (10) ENGINE = TokuDB,
- PARTITION partd VALUES LESS THAN (15) ENGINE = TokuDB,
- PARTITION parte VALUES LESS THAN (20) ENGINE = TokuDB,
- PARTITION partf VALUES LESS THAN (2147483646) ENGINE = TokuDB) */
+ PARTITION BY RANGE (`f_int1`)
+(PARTITION `parta` VALUES LESS THAN (0) ENGINE = TokuDB,
+ PARTITION `partb` VALUES LESS THAN (5) ENGINE = TokuDB,
+ PARTITION `partc` VALUES LESS THAN (10) ENGINE = TokuDB,
+ PARTITION `partd` VALUES LESS THAN (15) ENGINE = TokuDB,
+ PARTITION `parte` VALUES LESS THAN (20) ENGINE = TokuDB,
+ PARTITION `partf` VALUES LESS THAN (2147483646) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -5455,19 +5455,19 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) NOT NULL DEFAULT '0',
- `f_int2` int(11) NOT NULL DEFAULT '0',
+ `f_int1` int(11) NOT NULL DEFAULT 0,
+ `f_int2` int(11) NOT NULL DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (f_int1 DIV 2)
-SUBPARTITION BY HASH (f_int1)
+ PARTITION BY RANGE (`f_int1` DIV 2)
+SUBPARTITION BY HASH (`f_int1`)
SUBPARTITIONS 2
-(PARTITION parta VALUES LESS THAN (0) ENGINE = TokuDB,
- PARTITION partb VALUES LESS THAN (5) ENGINE = TokuDB,
- PARTITION partc VALUES LESS THAN (10) ENGINE = TokuDB,
- PARTITION partd VALUES LESS THAN (2147483646) ENGINE = TokuDB) */
+(PARTITION `parta` VALUES LESS THAN (0) ENGINE = TokuDB,
+ PARTITION `partb` VALUES LESS THAN (5) ENGINE = TokuDB,
+ PARTITION `partc` VALUES LESS THAN (10) ENGINE = TokuDB,
+ PARTITION `partd` VALUES LESS THAN (2147483646) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -5908,26 +5908,26 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) NOT NULL DEFAULT '0',
- `f_int2` int(11) NOT NULL DEFAULT '0',
+ `f_int1` int(11) NOT NULL DEFAULT 0,
+ `f_int2` int(11) NOT NULL DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (f_int1)
-SUBPARTITION BY KEY (f_int1)
-(PARTITION part1 VALUES LESS THAN (0)
- (SUBPARTITION subpart11 ENGINE = TokuDB,
- SUBPARTITION subpart12 ENGINE = TokuDB),
- PARTITION part2 VALUES LESS THAN (5)
- (SUBPARTITION subpart21 ENGINE = TokuDB,
- SUBPARTITION subpart22 ENGINE = TokuDB),
- PARTITION part3 VALUES LESS THAN (10)
- (SUBPARTITION subpart31 ENGINE = TokuDB,
- SUBPARTITION subpart32 ENGINE = TokuDB),
- PARTITION part4 VALUES LESS THAN (2147483646)
- (SUBPARTITION subpart41 ENGINE = TokuDB,
- SUBPARTITION subpart42 ENGINE = TokuDB)) */
+ PARTITION BY RANGE (`f_int1`)
+SUBPARTITION BY KEY (`f_int1`)
+(PARTITION `part1` VALUES LESS THAN (0)
+ (SUBPARTITION `subpart11` ENGINE = TokuDB,
+ SUBPARTITION `subpart12` ENGINE = TokuDB),
+ PARTITION `part2` VALUES LESS THAN (5)
+ (SUBPARTITION `subpart21` ENGINE = TokuDB,
+ SUBPARTITION `subpart22` ENGINE = TokuDB),
+ PARTITION `part3` VALUES LESS THAN (10)
+ (SUBPARTITION `subpart31` ENGINE = TokuDB,
+ SUBPARTITION `subpart32` ENGINE = TokuDB),
+ PARTITION `part4` VALUES LESS THAN (2147483646)
+ (SUBPARTITION `subpart41` ENGINE = TokuDB,
+ SUBPARTITION `subpart42` ENGINE = TokuDB))
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -6366,26 +6366,26 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) NOT NULL DEFAULT '0',
- `f_int2` int(11) NOT NULL DEFAULT '0',
+ `f_int1` int(11) NOT NULL DEFAULT 0,
+ `f_int2` int(11) NOT NULL DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (ABS(MOD(f_int1,3)))
-SUBPARTITION BY HASH (f_int1 + 1)
-(PARTITION part1 VALUES IN (0)
- (SUBPARTITION sp11 ENGINE = TokuDB,
- SUBPARTITION sp12 ENGINE = TokuDB),
- PARTITION part2 VALUES IN (1)
- (SUBPARTITION sp21 ENGINE = TokuDB,
- SUBPARTITION sp22 ENGINE = TokuDB),
- PARTITION part3 VALUES IN (2)
- (SUBPARTITION sp31 ENGINE = TokuDB,
- SUBPARTITION sp32 ENGINE = TokuDB),
- PARTITION part4 VALUES IN (NULL)
- (SUBPARTITION sp41 ENGINE = TokuDB,
- SUBPARTITION sp42 ENGINE = TokuDB)) */
+ PARTITION BY LIST (abs(`f_int1` % 3))
+SUBPARTITION BY HASH (`f_int1` + 1)
+(PARTITION `part1` VALUES IN (0)
+ (SUBPARTITION `sp11` ENGINE = TokuDB,
+ SUBPARTITION `sp12` ENGINE = TokuDB),
+ PARTITION `part2` VALUES IN (1)
+ (SUBPARTITION `sp21` ENGINE = TokuDB,
+ SUBPARTITION `sp22` ENGINE = TokuDB),
+ PARTITION `part3` VALUES IN (2)
+ (SUBPARTITION `sp31` ENGINE = TokuDB,
+ SUBPARTITION `sp32` ENGINE = TokuDB),
+ PARTITION `part4` VALUES IN (NULL)
+ (SUBPARTITION `sp41` ENGINE = TokuDB,
+ SUBPARTITION `sp42` ENGINE = TokuDB))
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -6822,18 +6822,18 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) NOT NULL DEFAULT '0',
- `f_int2` int(11) NOT NULL DEFAULT '0',
+ `f_int1` int(11) NOT NULL DEFAULT 0,
+ `f_int2` int(11) NOT NULL DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (ABS(MOD(f_int1,2)))
-SUBPARTITION BY KEY (f_int1)
+ PARTITION BY LIST (abs(`f_int1` % 2))
+SUBPARTITION BY KEY (`f_int1`)
SUBPARTITIONS 3
-(PARTITION part1 VALUES IN (0) ENGINE = TokuDB,
- PARTITION part2 VALUES IN (1) ENGINE = TokuDB,
- PARTITION part3 VALUES IN (NULL) ENGINE = TokuDB) */
+(PARTITION `part1` VALUES IN (0) ENGINE = TokuDB,
+ PARTITION `part2` VALUES IN (1) ENGINE = TokuDB,
+ PARTITION `part3` VALUES IN (NULL) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -7267,14 +7267,14 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY HASH (f_int1)
-PARTITIONS 2 */
+ PARTITION BY HASH (`f_int1`)
+PARTITIONS 2
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -7723,14 +7723,14 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY KEY (f_int1)
-PARTITIONS 5 */
+ PARTITION BY KEY (`f_int1`)
+PARTITIONS 5
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -8187,21 +8187,21 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (MOD(f_int1,4))
-(PARTITION part_3 VALUES IN (-3) ENGINE = TokuDB,
- PARTITION part_2 VALUES IN (-2) ENGINE = TokuDB,
- PARTITION part_1 VALUES IN (-1) ENGINE = TokuDB,
- PARTITION part_N VALUES IN (NULL) ENGINE = TokuDB,
- PARTITION part0 VALUES IN (0) ENGINE = TokuDB,
- PARTITION part1 VALUES IN (1) ENGINE = TokuDB,
- PARTITION part2 VALUES IN (2) ENGINE = TokuDB,
- PARTITION part3 VALUES IN (3) ENGINE = TokuDB) */
+ PARTITION BY LIST (`f_int1` % 4)
+(PARTITION `part_3` VALUES IN (-3) ENGINE = TokuDB,
+ PARTITION `part_2` VALUES IN (-2) ENGINE = TokuDB,
+ PARTITION `part_1` VALUES IN (-1) ENGINE = TokuDB,
+ PARTITION `part_N` VALUES IN (NULL) ENGINE = TokuDB,
+ PARTITION `part0` VALUES IN (0) ENGINE = TokuDB,
+ PARTITION `part1` VALUES IN (1) ENGINE = TokuDB,
+ PARTITION `part2` VALUES IN (2) ENGINE = TokuDB,
+ PARTITION `part3` VALUES IN (3) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -8656,19 +8656,19 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (f_int1)
-(PARTITION parta VALUES LESS THAN (0) ENGINE = TokuDB,
- PARTITION partb VALUES LESS THAN (5) ENGINE = TokuDB,
- PARTITION partc VALUES LESS THAN (10) ENGINE = TokuDB,
- PARTITION partd VALUES LESS THAN (15) ENGINE = TokuDB,
- PARTITION parte VALUES LESS THAN (20) ENGINE = TokuDB,
- PARTITION partf VALUES LESS THAN (2147483646) ENGINE = TokuDB) */
+ PARTITION BY RANGE (`f_int1`)
+(PARTITION `parta` VALUES LESS THAN (0) ENGINE = TokuDB,
+ PARTITION `partb` VALUES LESS THAN (5) ENGINE = TokuDB,
+ PARTITION `partc` VALUES LESS THAN (10) ENGINE = TokuDB,
+ PARTITION `partd` VALUES LESS THAN (15) ENGINE = TokuDB,
+ PARTITION `parte` VALUES LESS THAN (20) ENGINE = TokuDB,
+ PARTITION `partf` VALUES LESS THAN (2147483646) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -9119,19 +9119,19 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (f_int1 DIV 2)
-SUBPARTITION BY HASH (f_int1)
+ PARTITION BY RANGE (`f_int1` DIV 2)
+SUBPARTITION BY HASH (`f_int1`)
SUBPARTITIONS 2
-(PARTITION parta VALUES LESS THAN (0) ENGINE = TokuDB,
- PARTITION partb VALUES LESS THAN (5) ENGINE = TokuDB,
- PARTITION partc VALUES LESS THAN (10) ENGINE = TokuDB,
- PARTITION partd VALUES LESS THAN (2147483646) ENGINE = TokuDB) */
+(PARTITION `parta` VALUES LESS THAN (0) ENGINE = TokuDB,
+ PARTITION `partb` VALUES LESS THAN (5) ENGINE = TokuDB,
+ PARTITION `partc` VALUES LESS THAN (10) ENGINE = TokuDB,
+ PARTITION `partd` VALUES LESS THAN (2147483646) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -9588,26 +9588,26 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (f_int1)
-SUBPARTITION BY KEY (f_int1)
-(PARTITION part1 VALUES LESS THAN (0)
- (SUBPARTITION subpart11 ENGINE = TokuDB,
- SUBPARTITION subpart12 ENGINE = TokuDB),
- PARTITION part2 VALUES LESS THAN (5)
- (SUBPARTITION subpart21 ENGINE = TokuDB,
- SUBPARTITION subpart22 ENGINE = TokuDB),
- PARTITION part3 VALUES LESS THAN (10)
- (SUBPARTITION subpart31 ENGINE = TokuDB,
- SUBPARTITION subpart32 ENGINE = TokuDB),
- PARTITION part4 VALUES LESS THAN (2147483646)
- (SUBPARTITION subpart41 ENGINE = TokuDB,
- SUBPARTITION subpart42 ENGINE = TokuDB)) */
+ PARTITION BY RANGE (`f_int1`)
+SUBPARTITION BY KEY (`f_int1`)
+(PARTITION `part1` VALUES LESS THAN (0)
+ (SUBPARTITION `subpart11` ENGINE = TokuDB,
+ SUBPARTITION `subpart12` ENGINE = TokuDB),
+ PARTITION `part2` VALUES LESS THAN (5)
+ (SUBPARTITION `subpart21` ENGINE = TokuDB,
+ SUBPARTITION `subpart22` ENGINE = TokuDB),
+ PARTITION `part3` VALUES LESS THAN (10)
+ (SUBPARTITION `subpart31` ENGINE = TokuDB,
+ SUBPARTITION `subpart32` ENGINE = TokuDB),
+ PARTITION `part4` VALUES LESS THAN (2147483646)
+ (SUBPARTITION `subpart41` ENGINE = TokuDB,
+ SUBPARTITION `subpart42` ENGINE = TokuDB))
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -10062,26 +10062,26 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (ABS(MOD(f_int1,3)))
-SUBPARTITION BY HASH (f_int1 + 1)
-(PARTITION part1 VALUES IN (0)
- (SUBPARTITION sp11 ENGINE = TokuDB,
- SUBPARTITION sp12 ENGINE = TokuDB),
- PARTITION part2 VALUES IN (1)
- (SUBPARTITION sp21 ENGINE = TokuDB,
- SUBPARTITION sp22 ENGINE = TokuDB),
- PARTITION part3 VALUES IN (2)
- (SUBPARTITION sp31 ENGINE = TokuDB,
- SUBPARTITION sp32 ENGINE = TokuDB),
- PARTITION part4 VALUES IN (NULL)
- (SUBPARTITION sp41 ENGINE = TokuDB,
- SUBPARTITION sp42 ENGINE = TokuDB)) */
+ PARTITION BY LIST (abs(`f_int1` % 3))
+SUBPARTITION BY HASH (`f_int1` + 1)
+(PARTITION `part1` VALUES IN (0)
+ (SUBPARTITION `sp11` ENGINE = TokuDB,
+ SUBPARTITION `sp12` ENGINE = TokuDB),
+ PARTITION `part2` VALUES IN (1)
+ (SUBPARTITION `sp21` ENGINE = TokuDB,
+ SUBPARTITION `sp22` ENGINE = TokuDB),
+ PARTITION `part3` VALUES IN (2)
+ (SUBPARTITION `sp31` ENGINE = TokuDB,
+ SUBPARTITION `sp32` ENGINE = TokuDB),
+ PARTITION `part4` VALUES IN (NULL)
+ (SUBPARTITION `sp41` ENGINE = TokuDB,
+ SUBPARTITION `sp42` ENGINE = TokuDB))
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -10534,18 +10534,18 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (ABS(MOD(f_int1,2)))
-SUBPARTITION BY KEY (f_int1)
+ PARTITION BY LIST (abs(`f_int1` % 2))
+SUBPARTITION BY KEY (`f_int1`)
SUBPARTITIONS 3
-(PARTITION part1 VALUES IN (0) ENGINE = TokuDB,
- PARTITION part2 VALUES IN (1) ENGINE = TokuDB,
- PARTITION part3 VALUES IN (NULL) ENGINE = TokuDB) */
+(PARTITION `part1` VALUES IN (0) ENGINE = TokuDB,
+ PARTITION `part2` VALUES IN (1) ENGINE = TokuDB,
+ PARTITION `part3` VALUES IN (NULL) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -10999,14 +10999,14 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) NOT NULL DEFAULT '0',
- `f_int2` int(11) NOT NULL DEFAULT '0',
+ `f_int1` int(11) NOT NULL DEFAULT 0,
+ `f_int2` int(11) NOT NULL DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY HASH (f_int1 + f_int2)
-PARTITIONS 2 */
+ PARTITION BY HASH (`f_int1` + `f_int2`)
+PARTITIONS 2
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -11439,14 +11439,14 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) NOT NULL DEFAULT '0',
- `f_int2` int(11) NOT NULL DEFAULT '0',
+ `f_int1` int(11) NOT NULL DEFAULT 0,
+ `f_int2` int(11) NOT NULL DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY KEY (f_int1,f_int2)
-PARTITIONS 5 */
+ PARTITION BY KEY (`f_int1`,`f_int2`)
+PARTITIONS 5
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -11887,21 +11887,21 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) NOT NULL DEFAULT '0',
- `f_int2` int(11) NOT NULL DEFAULT '0',
+ `f_int1` int(11) NOT NULL DEFAULT 0,
+ `f_int2` int(11) NOT NULL DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (MOD(f_int1 + f_int2,4))
-(PARTITION part_3 VALUES IN (-3) ENGINE = TokuDB,
- PARTITION part_2 VALUES IN (-2) ENGINE = TokuDB,
- PARTITION part_1 VALUES IN (-1) ENGINE = TokuDB,
- PARTITION part_N VALUES IN (NULL) ENGINE = TokuDB,
- PARTITION part0 VALUES IN (0) ENGINE = TokuDB,
- PARTITION part1 VALUES IN (1) ENGINE = TokuDB,
- PARTITION part2 VALUES IN (2) ENGINE = TokuDB,
- PARTITION part3 VALUES IN (3) ENGINE = TokuDB) */
+ PARTITION BY LIST ((`f_int1` + `f_int2`) % 4)
+(PARTITION `part_3` VALUES IN (-3) ENGINE = TokuDB,
+ PARTITION `part_2` VALUES IN (-2) ENGINE = TokuDB,
+ PARTITION `part_1` VALUES IN (-1) ENGINE = TokuDB,
+ PARTITION `part_N` VALUES IN (NULL) ENGINE = TokuDB,
+ PARTITION `part0` VALUES IN (0) ENGINE = TokuDB,
+ PARTITION `part1` VALUES IN (1) ENGINE = TokuDB,
+ PARTITION `part2` VALUES IN (2) ENGINE = TokuDB,
+ PARTITION `part3` VALUES IN (3) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -12340,19 +12340,19 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) NOT NULL DEFAULT '0',
- `f_int2` int(11) NOT NULL DEFAULT '0',
+ `f_int1` int(11) NOT NULL DEFAULT 0,
+ `f_int2` int(11) NOT NULL DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE ((f_int1 + f_int2) DIV 2)
-(PARTITION parta VALUES LESS THAN (0) ENGINE = TokuDB,
- PARTITION partb VALUES LESS THAN (5) ENGINE = TokuDB,
- PARTITION partc VALUES LESS THAN (10) ENGINE = TokuDB,
- PARTITION partd VALUES LESS THAN (15) ENGINE = TokuDB,
- PARTITION parte VALUES LESS THAN (20) ENGINE = TokuDB,
- PARTITION partf VALUES LESS THAN (2147483646) ENGINE = TokuDB) */
+ PARTITION BY RANGE ((`f_int1` + `f_int2`) DIV 2)
+(PARTITION `parta` VALUES LESS THAN (0) ENGINE = TokuDB,
+ PARTITION `partb` VALUES LESS THAN (5) ENGINE = TokuDB,
+ PARTITION `partc` VALUES LESS THAN (10) ENGINE = TokuDB,
+ PARTITION `partd` VALUES LESS THAN (15) ENGINE = TokuDB,
+ PARTITION `parte` VALUES LESS THAN (20) ENGINE = TokuDB,
+ PARTITION `partf` VALUES LESS THAN (2147483646) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -12787,19 +12787,19 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) NOT NULL DEFAULT '0',
- `f_int2` int(11) NOT NULL DEFAULT '0',
+ `f_int1` int(11) NOT NULL DEFAULT 0,
+ `f_int2` int(11) NOT NULL DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (f_int1)
-SUBPARTITION BY HASH (f_int2)
+ PARTITION BY RANGE (`f_int1`)
+SUBPARTITION BY HASH (`f_int2`)
SUBPARTITIONS 2
-(PARTITION parta VALUES LESS THAN (0) ENGINE = TokuDB,
- PARTITION partb VALUES LESS THAN (5) ENGINE = TokuDB,
- PARTITION partc VALUES LESS THAN (10) ENGINE = TokuDB,
- PARTITION partd VALUES LESS THAN (2147483646) ENGINE = TokuDB) */
+(PARTITION `parta` VALUES LESS THAN (0) ENGINE = TokuDB,
+ PARTITION `partb` VALUES LESS THAN (5) ENGINE = TokuDB,
+ PARTITION `partc` VALUES LESS THAN (10) ENGINE = TokuDB,
+ PARTITION `partd` VALUES LESS THAN (2147483646) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -13238,26 +13238,26 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) NOT NULL DEFAULT '0',
- `f_int2` int(11) NOT NULL DEFAULT '0',
+ `f_int1` int(11) NOT NULL DEFAULT 0,
+ `f_int2` int(11) NOT NULL DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (f_int1)
-SUBPARTITION BY KEY (f_int2)
-(PARTITION part1 VALUES LESS THAN (0)
- (SUBPARTITION subpart11 ENGINE = TokuDB,
- SUBPARTITION subpart12 ENGINE = TokuDB),
- PARTITION part2 VALUES LESS THAN (5)
- (SUBPARTITION subpart21 ENGINE = TokuDB,
- SUBPARTITION subpart22 ENGINE = TokuDB),
- PARTITION part3 VALUES LESS THAN (10)
- (SUBPARTITION subpart31 ENGINE = TokuDB,
- SUBPARTITION subpart32 ENGINE = TokuDB),
- PARTITION part4 VALUES LESS THAN (2147483646)
- (SUBPARTITION subpart41 ENGINE = TokuDB,
- SUBPARTITION subpart42 ENGINE = TokuDB)) */
+ PARTITION BY RANGE (`f_int1`)
+SUBPARTITION BY KEY (`f_int2`)
+(PARTITION `part1` VALUES LESS THAN (0)
+ (SUBPARTITION `subpart11` ENGINE = TokuDB,
+ SUBPARTITION `subpart12` ENGINE = TokuDB),
+ PARTITION `part2` VALUES LESS THAN (5)
+ (SUBPARTITION `subpart21` ENGINE = TokuDB,
+ SUBPARTITION `subpart22` ENGINE = TokuDB),
+ PARTITION `part3` VALUES LESS THAN (10)
+ (SUBPARTITION `subpart31` ENGINE = TokuDB,
+ SUBPARTITION `subpart32` ENGINE = TokuDB),
+ PARTITION `part4` VALUES LESS THAN (2147483646)
+ (SUBPARTITION `subpart41` ENGINE = TokuDB,
+ SUBPARTITION `subpart42` ENGINE = TokuDB))
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -13696,26 +13696,26 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) NOT NULL DEFAULT '0',
- `f_int2` int(11) NOT NULL DEFAULT '0',
+ `f_int1` int(11) NOT NULL DEFAULT 0,
+ `f_int2` int(11) NOT NULL DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (ABS(MOD(f_int1,3)))
-SUBPARTITION BY HASH (f_int2 + 1)
-(PARTITION part1 VALUES IN (0)
- (SUBPARTITION sp11 ENGINE = TokuDB,
- SUBPARTITION sp12 ENGINE = TokuDB),
- PARTITION part2 VALUES IN (1)
- (SUBPARTITION sp21 ENGINE = TokuDB,
- SUBPARTITION sp22 ENGINE = TokuDB),
- PARTITION part3 VALUES IN (2)
- (SUBPARTITION sp31 ENGINE = TokuDB,
- SUBPARTITION sp32 ENGINE = TokuDB),
- PARTITION part4 VALUES IN (NULL)
- (SUBPARTITION sp41 ENGINE = TokuDB,
- SUBPARTITION sp42 ENGINE = TokuDB)) */
+ PARTITION BY LIST (abs(`f_int1` % 3))
+SUBPARTITION BY HASH (`f_int2` + 1)
+(PARTITION `part1` VALUES IN (0)
+ (SUBPARTITION `sp11` ENGINE = TokuDB,
+ SUBPARTITION `sp12` ENGINE = TokuDB),
+ PARTITION `part2` VALUES IN (1)
+ (SUBPARTITION `sp21` ENGINE = TokuDB,
+ SUBPARTITION `sp22` ENGINE = TokuDB),
+ PARTITION `part3` VALUES IN (2)
+ (SUBPARTITION `sp31` ENGINE = TokuDB,
+ SUBPARTITION `sp32` ENGINE = TokuDB),
+ PARTITION `part4` VALUES IN (NULL)
+ (SUBPARTITION `sp41` ENGINE = TokuDB,
+ SUBPARTITION `sp42` ENGINE = TokuDB))
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -14152,18 +14152,18 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) NOT NULL DEFAULT '0',
- `f_int2` int(11) NOT NULL DEFAULT '0',
+ `f_int1` int(11) NOT NULL DEFAULT 0,
+ `f_int2` int(11) NOT NULL DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (ABS(MOD(f_int1,2)))
-SUBPARTITION BY KEY (f_int2)
+ PARTITION BY LIST (abs(`f_int1` % 2))
+SUBPARTITION BY KEY (`f_int2`)
SUBPARTITIONS 3
-(PARTITION part1 VALUES IN (0) ENGINE = TokuDB,
- PARTITION part2 VALUES IN (1) ENGINE = TokuDB,
- PARTITION part3 VALUES IN (NULL) ENGINE = TokuDB) */
+(PARTITION `part1` VALUES IN (0) ENGINE = TokuDB,
+ PARTITION `part2` VALUES IN (1) ENGINE = TokuDB,
+ PARTITION `part3` VALUES IN (NULL) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -14597,14 +14597,14 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) NOT NULL DEFAULT '0',
- `f_int2` int(11) NOT NULL DEFAULT '0',
+ `f_int1` int(11) NOT NULL DEFAULT 0,
+ `f_int2` int(11) NOT NULL DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY HASH (f_int1 + f_int2)
-PARTITIONS 2 */
+ PARTITION BY HASH (`f_int1` + `f_int2`)
+PARTITIONS 2
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -15037,14 +15037,14 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) NOT NULL DEFAULT '0',
- `f_int2` int(11) NOT NULL DEFAULT '0',
+ `f_int1` int(11) NOT NULL DEFAULT 0,
+ `f_int2` int(11) NOT NULL DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY KEY (f_int1,f_int2)
-PARTITIONS 5 */
+ PARTITION BY KEY (`f_int1`,`f_int2`)
+PARTITIONS 5
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -15485,21 +15485,21 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) NOT NULL DEFAULT '0',
- `f_int2` int(11) NOT NULL DEFAULT '0',
+ `f_int1` int(11) NOT NULL DEFAULT 0,
+ `f_int2` int(11) NOT NULL DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (MOD(f_int1 + f_int2,4))
-(PARTITION part_3 VALUES IN (-3) ENGINE = TokuDB,
- PARTITION part_2 VALUES IN (-2) ENGINE = TokuDB,
- PARTITION part_1 VALUES IN (-1) ENGINE = TokuDB,
- PARTITION part_N VALUES IN (NULL) ENGINE = TokuDB,
- PARTITION part0 VALUES IN (0) ENGINE = TokuDB,
- PARTITION part1 VALUES IN (1) ENGINE = TokuDB,
- PARTITION part2 VALUES IN (2) ENGINE = TokuDB,
- PARTITION part3 VALUES IN (3) ENGINE = TokuDB) */
+ PARTITION BY LIST ((`f_int1` + `f_int2`) % 4)
+(PARTITION `part_3` VALUES IN (-3) ENGINE = TokuDB,
+ PARTITION `part_2` VALUES IN (-2) ENGINE = TokuDB,
+ PARTITION `part_1` VALUES IN (-1) ENGINE = TokuDB,
+ PARTITION `part_N` VALUES IN (NULL) ENGINE = TokuDB,
+ PARTITION `part0` VALUES IN (0) ENGINE = TokuDB,
+ PARTITION `part1` VALUES IN (1) ENGINE = TokuDB,
+ PARTITION `part2` VALUES IN (2) ENGINE = TokuDB,
+ PARTITION `part3` VALUES IN (3) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -15938,19 +15938,19 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) NOT NULL DEFAULT '0',
- `f_int2` int(11) NOT NULL DEFAULT '0',
+ `f_int1` int(11) NOT NULL DEFAULT 0,
+ `f_int2` int(11) NOT NULL DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE ((f_int1 + f_int2) DIV 2)
-(PARTITION parta VALUES LESS THAN (0) ENGINE = TokuDB,
- PARTITION partb VALUES LESS THAN (5) ENGINE = TokuDB,
- PARTITION partc VALUES LESS THAN (10) ENGINE = TokuDB,
- PARTITION partd VALUES LESS THAN (15) ENGINE = TokuDB,
- PARTITION parte VALUES LESS THAN (20) ENGINE = TokuDB,
- PARTITION partf VALUES LESS THAN (2147483646) ENGINE = TokuDB) */
+ PARTITION BY RANGE ((`f_int1` + `f_int2`) DIV 2)
+(PARTITION `parta` VALUES LESS THAN (0) ENGINE = TokuDB,
+ PARTITION `partb` VALUES LESS THAN (5) ENGINE = TokuDB,
+ PARTITION `partc` VALUES LESS THAN (10) ENGINE = TokuDB,
+ PARTITION `partd` VALUES LESS THAN (15) ENGINE = TokuDB,
+ PARTITION `parte` VALUES LESS THAN (20) ENGINE = TokuDB,
+ PARTITION `partf` VALUES LESS THAN (2147483646) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -16385,19 +16385,19 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) NOT NULL DEFAULT '0',
- `f_int2` int(11) NOT NULL DEFAULT '0',
+ `f_int1` int(11) NOT NULL DEFAULT 0,
+ `f_int2` int(11) NOT NULL DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (f_int1)
-SUBPARTITION BY HASH (f_int2)
+ PARTITION BY RANGE (`f_int1`)
+SUBPARTITION BY HASH (`f_int2`)
SUBPARTITIONS 2
-(PARTITION parta VALUES LESS THAN (0) ENGINE = TokuDB,
- PARTITION partb VALUES LESS THAN (5) ENGINE = TokuDB,
- PARTITION partc VALUES LESS THAN (10) ENGINE = TokuDB,
- PARTITION partd VALUES LESS THAN (2147483646) ENGINE = TokuDB) */
+(PARTITION `parta` VALUES LESS THAN (0) ENGINE = TokuDB,
+ PARTITION `partb` VALUES LESS THAN (5) ENGINE = TokuDB,
+ PARTITION `partc` VALUES LESS THAN (10) ENGINE = TokuDB,
+ PARTITION `partd` VALUES LESS THAN (2147483646) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -16836,26 +16836,26 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) NOT NULL DEFAULT '0',
- `f_int2` int(11) NOT NULL DEFAULT '0',
+ `f_int1` int(11) NOT NULL DEFAULT 0,
+ `f_int2` int(11) NOT NULL DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (f_int1)
-SUBPARTITION BY KEY (f_int2)
-(PARTITION part1 VALUES LESS THAN (0)
- (SUBPARTITION subpart11 ENGINE = TokuDB,
- SUBPARTITION subpart12 ENGINE = TokuDB),
- PARTITION part2 VALUES LESS THAN (5)
- (SUBPARTITION subpart21 ENGINE = TokuDB,
- SUBPARTITION subpart22 ENGINE = TokuDB),
- PARTITION part3 VALUES LESS THAN (10)
- (SUBPARTITION subpart31 ENGINE = TokuDB,
- SUBPARTITION subpart32 ENGINE = TokuDB),
- PARTITION part4 VALUES LESS THAN (2147483646)
- (SUBPARTITION subpart41 ENGINE = TokuDB,
- SUBPARTITION subpart42 ENGINE = TokuDB)) */
+ PARTITION BY RANGE (`f_int1`)
+SUBPARTITION BY KEY (`f_int2`)
+(PARTITION `part1` VALUES LESS THAN (0)
+ (SUBPARTITION `subpart11` ENGINE = TokuDB,
+ SUBPARTITION `subpart12` ENGINE = TokuDB),
+ PARTITION `part2` VALUES LESS THAN (5)
+ (SUBPARTITION `subpart21` ENGINE = TokuDB,
+ SUBPARTITION `subpart22` ENGINE = TokuDB),
+ PARTITION `part3` VALUES LESS THAN (10)
+ (SUBPARTITION `subpart31` ENGINE = TokuDB,
+ SUBPARTITION `subpart32` ENGINE = TokuDB),
+ PARTITION `part4` VALUES LESS THAN (2147483646)
+ (SUBPARTITION `subpart41` ENGINE = TokuDB,
+ SUBPARTITION `subpart42` ENGINE = TokuDB))
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -17294,26 +17294,26 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) NOT NULL DEFAULT '0',
- `f_int2` int(11) NOT NULL DEFAULT '0',
+ `f_int1` int(11) NOT NULL DEFAULT 0,
+ `f_int2` int(11) NOT NULL DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (ABS(MOD(f_int1,3)))
-SUBPARTITION BY HASH (f_int2 + 1)
-(PARTITION part1 VALUES IN (0)
- (SUBPARTITION sp11 ENGINE = TokuDB,
- SUBPARTITION sp12 ENGINE = TokuDB),
- PARTITION part2 VALUES IN (1)
- (SUBPARTITION sp21 ENGINE = TokuDB,
- SUBPARTITION sp22 ENGINE = TokuDB),
- PARTITION part3 VALUES IN (2)
- (SUBPARTITION sp31 ENGINE = TokuDB,
- SUBPARTITION sp32 ENGINE = TokuDB),
- PARTITION part4 VALUES IN (NULL)
- (SUBPARTITION sp41 ENGINE = TokuDB,
- SUBPARTITION sp42 ENGINE = TokuDB)) */
+ PARTITION BY LIST (abs(`f_int1` % 3))
+SUBPARTITION BY HASH (`f_int2` + 1)
+(PARTITION `part1` VALUES IN (0)
+ (SUBPARTITION `sp11` ENGINE = TokuDB,
+ SUBPARTITION `sp12` ENGINE = TokuDB),
+ PARTITION `part2` VALUES IN (1)
+ (SUBPARTITION `sp21` ENGINE = TokuDB,
+ SUBPARTITION `sp22` ENGINE = TokuDB),
+ PARTITION `part3` VALUES IN (2)
+ (SUBPARTITION `sp31` ENGINE = TokuDB,
+ SUBPARTITION `sp32` ENGINE = TokuDB),
+ PARTITION `part4` VALUES IN (NULL)
+ (SUBPARTITION `sp41` ENGINE = TokuDB,
+ SUBPARTITION `sp42` ENGINE = TokuDB))
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -17750,18 +17750,18 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) NOT NULL DEFAULT '0',
- `f_int2` int(11) NOT NULL DEFAULT '0',
+ `f_int1` int(11) NOT NULL DEFAULT 0,
+ `f_int2` int(11) NOT NULL DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (ABS(MOD(f_int1,2)))
-SUBPARTITION BY KEY (f_int2)
+ PARTITION BY LIST (abs(`f_int1` % 2))
+SUBPARTITION BY KEY (`f_int2`)
SUBPARTITIONS 3
-(PARTITION part1 VALUES IN (0) ENGINE = TokuDB,
- PARTITION part2 VALUES IN (1) ENGINE = TokuDB,
- PARTITION part3 VALUES IN (NULL) ENGINE = TokuDB) */
+(PARTITION `part1` VALUES IN (0) ENGINE = TokuDB,
+ PARTITION `part2` VALUES IN (1) ENGINE = TokuDB,
+ PARTITION `part3` VALUES IN (NULL) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -18196,14 +18196,14 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY HASH (f_int1 + f_int2)
-PARTITIONS 2 */
+ PARTITION BY HASH (`f_int1` + `f_int2`)
+PARTITIONS 2
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -18652,14 +18652,14 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY KEY (f_int1,f_int2)
-PARTITIONS 5 */
+ PARTITION BY KEY (`f_int1`,`f_int2`)
+PARTITIONS 5
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -19116,21 +19116,21 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (MOD(f_int1 + f_int2,4))
-(PARTITION part_3 VALUES IN (-3) ENGINE = TokuDB,
- PARTITION part_2 VALUES IN (-2) ENGINE = TokuDB,
- PARTITION part_1 VALUES IN (-1) ENGINE = TokuDB,
- PARTITION part_N VALUES IN (NULL) ENGINE = TokuDB,
- PARTITION part0 VALUES IN (0) ENGINE = TokuDB,
- PARTITION part1 VALUES IN (1) ENGINE = TokuDB,
- PARTITION part2 VALUES IN (2) ENGINE = TokuDB,
- PARTITION part3 VALUES IN (3) ENGINE = TokuDB) */
+ PARTITION BY LIST ((`f_int1` + `f_int2`) % 4)
+(PARTITION `part_3` VALUES IN (-3) ENGINE = TokuDB,
+ PARTITION `part_2` VALUES IN (-2) ENGINE = TokuDB,
+ PARTITION `part_1` VALUES IN (-1) ENGINE = TokuDB,
+ PARTITION `part_N` VALUES IN (NULL) ENGINE = TokuDB,
+ PARTITION `part0` VALUES IN (0) ENGINE = TokuDB,
+ PARTITION `part1` VALUES IN (1) ENGINE = TokuDB,
+ PARTITION `part2` VALUES IN (2) ENGINE = TokuDB,
+ PARTITION `part3` VALUES IN (3) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -19585,19 +19585,19 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE ((f_int1 + f_int2) DIV 2)
-(PARTITION parta VALUES LESS THAN (0) ENGINE = TokuDB,
- PARTITION partb VALUES LESS THAN (5) ENGINE = TokuDB,
- PARTITION partc VALUES LESS THAN (10) ENGINE = TokuDB,
- PARTITION partd VALUES LESS THAN (15) ENGINE = TokuDB,
- PARTITION parte VALUES LESS THAN (20) ENGINE = TokuDB,
- PARTITION partf VALUES LESS THAN (2147483646) ENGINE = TokuDB) */
+ PARTITION BY RANGE ((`f_int1` + `f_int2`) DIV 2)
+(PARTITION `parta` VALUES LESS THAN (0) ENGINE = TokuDB,
+ PARTITION `partb` VALUES LESS THAN (5) ENGINE = TokuDB,
+ PARTITION `partc` VALUES LESS THAN (10) ENGINE = TokuDB,
+ PARTITION `partd` VALUES LESS THAN (15) ENGINE = TokuDB,
+ PARTITION `parte` VALUES LESS THAN (20) ENGINE = TokuDB,
+ PARTITION `partf` VALUES LESS THAN (2147483646) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -20048,19 +20048,19 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (f_int1)
-SUBPARTITION BY HASH (f_int2)
+ PARTITION BY RANGE (`f_int1`)
+SUBPARTITION BY HASH (`f_int2`)
SUBPARTITIONS 2
-(PARTITION parta VALUES LESS THAN (0) ENGINE = TokuDB,
- PARTITION partb VALUES LESS THAN (5) ENGINE = TokuDB,
- PARTITION partc VALUES LESS THAN (10) ENGINE = TokuDB,
- PARTITION partd VALUES LESS THAN (2147483646) ENGINE = TokuDB) */
+(PARTITION `parta` VALUES LESS THAN (0) ENGINE = TokuDB,
+ PARTITION `partb` VALUES LESS THAN (5) ENGINE = TokuDB,
+ PARTITION `partc` VALUES LESS THAN (10) ENGINE = TokuDB,
+ PARTITION `partd` VALUES LESS THAN (2147483646) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -20515,26 +20515,26 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (f_int1)
-SUBPARTITION BY KEY (f_int2)
-(PARTITION part1 VALUES LESS THAN (0)
- (SUBPARTITION subpart11 ENGINE = TokuDB,
- SUBPARTITION subpart12 ENGINE = TokuDB),
- PARTITION part2 VALUES LESS THAN (5)
- (SUBPARTITION subpart21 ENGINE = TokuDB,
- SUBPARTITION subpart22 ENGINE = TokuDB),
- PARTITION part3 VALUES LESS THAN (10)
- (SUBPARTITION subpart31 ENGINE = TokuDB,
- SUBPARTITION subpart32 ENGINE = TokuDB),
- PARTITION part4 VALUES LESS THAN (2147483646)
- (SUBPARTITION subpart41 ENGINE = TokuDB,
- SUBPARTITION subpart42 ENGINE = TokuDB)) */
+ PARTITION BY RANGE (`f_int1`)
+SUBPARTITION BY KEY (`f_int2`)
+(PARTITION `part1` VALUES LESS THAN (0)
+ (SUBPARTITION `subpart11` ENGINE = TokuDB,
+ SUBPARTITION `subpart12` ENGINE = TokuDB),
+ PARTITION `part2` VALUES LESS THAN (5)
+ (SUBPARTITION `subpart21` ENGINE = TokuDB,
+ SUBPARTITION `subpart22` ENGINE = TokuDB),
+ PARTITION `part3` VALUES LESS THAN (10)
+ (SUBPARTITION `subpart31` ENGINE = TokuDB,
+ SUBPARTITION `subpart32` ENGINE = TokuDB),
+ PARTITION `part4` VALUES LESS THAN (2147483646)
+ (SUBPARTITION `subpart41` ENGINE = TokuDB,
+ SUBPARTITION `subpart42` ENGINE = TokuDB))
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -20989,26 +20989,26 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (ABS(MOD(f_int1,3)))
-SUBPARTITION BY HASH (f_int2 + 1)
-(PARTITION part1 VALUES IN (0)
- (SUBPARTITION sp11 ENGINE = TokuDB,
- SUBPARTITION sp12 ENGINE = TokuDB),
- PARTITION part2 VALUES IN (1)
- (SUBPARTITION sp21 ENGINE = TokuDB,
- SUBPARTITION sp22 ENGINE = TokuDB),
- PARTITION part3 VALUES IN (2)
- (SUBPARTITION sp31 ENGINE = TokuDB,
- SUBPARTITION sp32 ENGINE = TokuDB),
- PARTITION part4 VALUES IN (NULL)
- (SUBPARTITION sp41 ENGINE = TokuDB,
- SUBPARTITION sp42 ENGINE = TokuDB)) */
+ PARTITION BY LIST (abs(`f_int1` % 3))
+SUBPARTITION BY HASH (`f_int2` + 1)
+(PARTITION `part1` VALUES IN (0)
+ (SUBPARTITION `sp11` ENGINE = TokuDB,
+ SUBPARTITION `sp12` ENGINE = TokuDB),
+ PARTITION `part2` VALUES IN (1)
+ (SUBPARTITION `sp21` ENGINE = TokuDB,
+ SUBPARTITION `sp22` ENGINE = TokuDB),
+ PARTITION `part3` VALUES IN (2)
+ (SUBPARTITION `sp31` ENGINE = TokuDB,
+ SUBPARTITION `sp32` ENGINE = TokuDB),
+ PARTITION `part4` VALUES IN (NULL)
+ (SUBPARTITION `sp41` ENGINE = TokuDB,
+ SUBPARTITION `sp42` ENGINE = TokuDB))
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -21461,18 +21461,18 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (ABS(MOD(f_int1,2)))
-SUBPARTITION BY KEY (f_int2)
+ PARTITION BY LIST (abs(`f_int1` % 2))
+SUBPARTITION BY KEY (`f_int2`)
SUBPARTITIONS 3
-(PARTITION part1 VALUES IN (0) ENGINE = TokuDB,
- PARTITION part2 VALUES IN (1) ENGINE = TokuDB,
- PARTITION part3 VALUES IN (NULL) ENGINE = TokuDB) */
+(PARTITION `part1` VALUES IN (0) ENGINE = TokuDB,
+ PARTITION `part2` VALUES IN (1) ENGINE = TokuDB,
+ PARTITION `part3` VALUES IN (NULL) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -21922,14 +21922,14 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY HASH (f_int1 + f_int2)
-PARTITIONS 2 */
+ PARTITION BY HASH (`f_int1` + `f_int2`)
+PARTITIONS 2
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -22378,14 +22378,14 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY KEY (f_int1,f_int2)
-PARTITIONS 5 */
+ PARTITION BY KEY (`f_int1`,`f_int2`)
+PARTITIONS 5
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -22842,21 +22842,21 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (MOD(f_int1 + f_int2,4))
-(PARTITION part_3 VALUES IN (-3) ENGINE = TokuDB,
- PARTITION part_2 VALUES IN (-2) ENGINE = TokuDB,
- PARTITION part_1 VALUES IN (-1) ENGINE = TokuDB,
- PARTITION part_N VALUES IN (NULL) ENGINE = TokuDB,
- PARTITION part0 VALUES IN (0) ENGINE = TokuDB,
- PARTITION part1 VALUES IN (1) ENGINE = TokuDB,
- PARTITION part2 VALUES IN (2) ENGINE = TokuDB,
- PARTITION part3 VALUES IN (3) ENGINE = TokuDB) */
+ PARTITION BY LIST ((`f_int1` + `f_int2`) % 4)
+(PARTITION `part_3` VALUES IN (-3) ENGINE = TokuDB,
+ PARTITION `part_2` VALUES IN (-2) ENGINE = TokuDB,
+ PARTITION `part_1` VALUES IN (-1) ENGINE = TokuDB,
+ PARTITION `part_N` VALUES IN (NULL) ENGINE = TokuDB,
+ PARTITION `part0` VALUES IN (0) ENGINE = TokuDB,
+ PARTITION `part1` VALUES IN (1) ENGINE = TokuDB,
+ PARTITION `part2` VALUES IN (2) ENGINE = TokuDB,
+ PARTITION `part3` VALUES IN (3) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -23311,19 +23311,19 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE ((f_int1 + f_int2) DIV 2)
-(PARTITION parta VALUES LESS THAN (0) ENGINE = TokuDB,
- PARTITION partb VALUES LESS THAN (5) ENGINE = TokuDB,
- PARTITION partc VALUES LESS THAN (10) ENGINE = TokuDB,
- PARTITION partd VALUES LESS THAN (15) ENGINE = TokuDB,
- PARTITION parte VALUES LESS THAN (20) ENGINE = TokuDB,
- PARTITION partf VALUES LESS THAN (2147483646) ENGINE = TokuDB) */
+ PARTITION BY RANGE ((`f_int1` + `f_int2`) DIV 2)
+(PARTITION `parta` VALUES LESS THAN (0) ENGINE = TokuDB,
+ PARTITION `partb` VALUES LESS THAN (5) ENGINE = TokuDB,
+ PARTITION `partc` VALUES LESS THAN (10) ENGINE = TokuDB,
+ PARTITION `partd` VALUES LESS THAN (15) ENGINE = TokuDB,
+ PARTITION `parte` VALUES LESS THAN (20) ENGINE = TokuDB,
+ PARTITION `partf` VALUES LESS THAN (2147483646) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -23774,19 +23774,19 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (f_int1)
-SUBPARTITION BY HASH (f_int2)
+ PARTITION BY RANGE (`f_int1`)
+SUBPARTITION BY HASH (`f_int2`)
SUBPARTITIONS 2
-(PARTITION parta VALUES LESS THAN (0) ENGINE = TokuDB,
- PARTITION partb VALUES LESS THAN (5) ENGINE = TokuDB,
- PARTITION partc VALUES LESS THAN (10) ENGINE = TokuDB,
- PARTITION partd VALUES LESS THAN (2147483646) ENGINE = TokuDB) */
+(PARTITION `parta` VALUES LESS THAN (0) ENGINE = TokuDB,
+ PARTITION `partb` VALUES LESS THAN (5) ENGINE = TokuDB,
+ PARTITION `partc` VALUES LESS THAN (10) ENGINE = TokuDB,
+ PARTITION `partd` VALUES LESS THAN (2147483646) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -24241,26 +24241,26 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (f_int1)
-SUBPARTITION BY KEY (f_int2)
-(PARTITION part1 VALUES LESS THAN (0)
- (SUBPARTITION subpart11 ENGINE = TokuDB,
- SUBPARTITION subpart12 ENGINE = TokuDB),
- PARTITION part2 VALUES LESS THAN (5)
- (SUBPARTITION subpart21 ENGINE = TokuDB,
- SUBPARTITION subpart22 ENGINE = TokuDB),
- PARTITION part3 VALUES LESS THAN (10)
- (SUBPARTITION subpart31 ENGINE = TokuDB,
- SUBPARTITION subpart32 ENGINE = TokuDB),
- PARTITION part4 VALUES LESS THAN (2147483646)
- (SUBPARTITION subpart41 ENGINE = TokuDB,
- SUBPARTITION subpart42 ENGINE = TokuDB)) */
+ PARTITION BY RANGE (`f_int1`)
+SUBPARTITION BY KEY (`f_int2`)
+(PARTITION `part1` VALUES LESS THAN (0)
+ (SUBPARTITION `subpart11` ENGINE = TokuDB,
+ SUBPARTITION `subpart12` ENGINE = TokuDB),
+ PARTITION `part2` VALUES LESS THAN (5)
+ (SUBPARTITION `subpart21` ENGINE = TokuDB,
+ SUBPARTITION `subpart22` ENGINE = TokuDB),
+ PARTITION `part3` VALUES LESS THAN (10)
+ (SUBPARTITION `subpart31` ENGINE = TokuDB,
+ SUBPARTITION `subpart32` ENGINE = TokuDB),
+ PARTITION `part4` VALUES LESS THAN (2147483646)
+ (SUBPARTITION `subpart41` ENGINE = TokuDB,
+ SUBPARTITION `subpart42` ENGINE = TokuDB))
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -24715,26 +24715,26 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (ABS(MOD(f_int1,3)))
-SUBPARTITION BY HASH (f_int2 + 1)
-(PARTITION part1 VALUES IN (0)
- (SUBPARTITION sp11 ENGINE = TokuDB,
- SUBPARTITION sp12 ENGINE = TokuDB),
- PARTITION part2 VALUES IN (1)
- (SUBPARTITION sp21 ENGINE = TokuDB,
- SUBPARTITION sp22 ENGINE = TokuDB),
- PARTITION part3 VALUES IN (2)
- (SUBPARTITION sp31 ENGINE = TokuDB,
- SUBPARTITION sp32 ENGINE = TokuDB),
- PARTITION part4 VALUES IN (NULL)
- (SUBPARTITION sp41 ENGINE = TokuDB,
- SUBPARTITION sp42 ENGINE = TokuDB)) */
+ PARTITION BY LIST (abs(`f_int1` % 3))
+SUBPARTITION BY HASH (`f_int2` + 1)
+(PARTITION `part1` VALUES IN (0)
+ (SUBPARTITION `sp11` ENGINE = TokuDB,
+ SUBPARTITION `sp12` ENGINE = TokuDB),
+ PARTITION `part2` VALUES IN (1)
+ (SUBPARTITION `sp21` ENGINE = TokuDB,
+ SUBPARTITION `sp22` ENGINE = TokuDB),
+ PARTITION `part3` VALUES IN (2)
+ (SUBPARTITION `sp31` ENGINE = TokuDB,
+ SUBPARTITION `sp32` ENGINE = TokuDB),
+ PARTITION `part4` VALUES IN (NULL)
+ (SUBPARTITION `sp41` ENGINE = TokuDB,
+ SUBPARTITION `sp42` ENGINE = TokuDB))
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -25187,18 +25187,18 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (ABS(MOD(f_int1,2)))
-SUBPARTITION BY KEY (f_int2)
+ PARTITION BY LIST (abs(`f_int1` % 2))
+SUBPARTITION BY KEY (`f_int2`)
SUBPARTITIONS 3
-(PARTITION part1 VALUES IN (0) ENGINE = TokuDB,
- PARTITION part2 VALUES IN (1) ENGINE = TokuDB,
- PARTITION part3 VALUES IN (NULL) ENGINE = TokuDB) */
+(PARTITION `part1` VALUES IN (0) ENGINE = TokuDB,
+ PARTITION `part2` VALUES IN (1) ENGINE = TokuDB,
+ PARTITION `part3` VALUES IN (NULL) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -25649,14 +25649,14 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) NOT NULL DEFAULT '0',
- `f_int2` int(11) NOT NULL DEFAULT '0',
+ `f_int1` int(11) NOT NULL DEFAULT 0,
+ `f_int2` int(11) NOT NULL DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY HASH (f_int1 + f_int2)
-PARTITIONS 2 */
+ PARTITION BY HASH (`f_int1` + `f_int2`)
+PARTITIONS 2
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -26089,14 +26089,14 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) NOT NULL DEFAULT '0',
- `f_int2` int(11) NOT NULL DEFAULT '0',
+ `f_int1` int(11) NOT NULL DEFAULT 0,
+ `f_int2` int(11) NOT NULL DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY KEY (f_int1,f_int2)
-PARTITIONS 5 */
+ PARTITION BY KEY (`f_int1`,`f_int2`)
+PARTITIONS 5
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -26537,21 +26537,21 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) NOT NULL DEFAULT '0',
- `f_int2` int(11) NOT NULL DEFAULT '0',
+ `f_int1` int(11) NOT NULL DEFAULT 0,
+ `f_int2` int(11) NOT NULL DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (MOD(f_int1 + f_int2,4))
-(PARTITION part_3 VALUES IN (-3) ENGINE = TokuDB,
- PARTITION part_2 VALUES IN (-2) ENGINE = TokuDB,
- PARTITION part_1 VALUES IN (-1) ENGINE = TokuDB,
- PARTITION part_N VALUES IN (NULL) ENGINE = TokuDB,
- PARTITION part0 VALUES IN (0) ENGINE = TokuDB,
- PARTITION part1 VALUES IN (1) ENGINE = TokuDB,
- PARTITION part2 VALUES IN (2) ENGINE = TokuDB,
- PARTITION part3 VALUES IN (3) ENGINE = TokuDB) */
+ PARTITION BY LIST ((`f_int1` + `f_int2`) % 4)
+(PARTITION `part_3` VALUES IN (-3) ENGINE = TokuDB,
+ PARTITION `part_2` VALUES IN (-2) ENGINE = TokuDB,
+ PARTITION `part_1` VALUES IN (-1) ENGINE = TokuDB,
+ PARTITION `part_N` VALUES IN (NULL) ENGINE = TokuDB,
+ PARTITION `part0` VALUES IN (0) ENGINE = TokuDB,
+ PARTITION `part1` VALUES IN (1) ENGINE = TokuDB,
+ PARTITION `part2` VALUES IN (2) ENGINE = TokuDB,
+ PARTITION `part3` VALUES IN (3) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -26990,19 +26990,19 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) NOT NULL DEFAULT '0',
- `f_int2` int(11) NOT NULL DEFAULT '0',
+ `f_int1` int(11) NOT NULL DEFAULT 0,
+ `f_int2` int(11) NOT NULL DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE ((f_int1 + f_int2) DIV 2)
-(PARTITION parta VALUES LESS THAN (0) ENGINE = TokuDB,
- PARTITION partb VALUES LESS THAN (5) ENGINE = TokuDB,
- PARTITION partc VALUES LESS THAN (10) ENGINE = TokuDB,
- PARTITION partd VALUES LESS THAN (15) ENGINE = TokuDB,
- PARTITION parte VALUES LESS THAN (20) ENGINE = TokuDB,
- PARTITION partf VALUES LESS THAN (2147483646) ENGINE = TokuDB) */
+ PARTITION BY RANGE ((`f_int1` + `f_int2`) DIV 2)
+(PARTITION `parta` VALUES LESS THAN (0) ENGINE = TokuDB,
+ PARTITION `partb` VALUES LESS THAN (5) ENGINE = TokuDB,
+ PARTITION `partc` VALUES LESS THAN (10) ENGINE = TokuDB,
+ PARTITION `partd` VALUES LESS THAN (15) ENGINE = TokuDB,
+ PARTITION `parte` VALUES LESS THAN (20) ENGINE = TokuDB,
+ PARTITION `partf` VALUES LESS THAN (2147483646) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -27437,19 +27437,19 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) NOT NULL DEFAULT '0',
- `f_int2` int(11) NOT NULL DEFAULT '0',
+ `f_int1` int(11) NOT NULL DEFAULT 0,
+ `f_int2` int(11) NOT NULL DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (f_int1)
-SUBPARTITION BY HASH (f_int2)
+ PARTITION BY RANGE (`f_int1`)
+SUBPARTITION BY HASH (`f_int2`)
SUBPARTITIONS 2
-(PARTITION parta VALUES LESS THAN (0) ENGINE = TokuDB,
- PARTITION partb VALUES LESS THAN (5) ENGINE = TokuDB,
- PARTITION partc VALUES LESS THAN (10) ENGINE = TokuDB,
- PARTITION partd VALUES LESS THAN (2147483646) ENGINE = TokuDB) */
+(PARTITION `parta` VALUES LESS THAN (0) ENGINE = TokuDB,
+ PARTITION `partb` VALUES LESS THAN (5) ENGINE = TokuDB,
+ PARTITION `partc` VALUES LESS THAN (10) ENGINE = TokuDB,
+ PARTITION `partd` VALUES LESS THAN (2147483646) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -27888,26 +27888,26 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) NOT NULL DEFAULT '0',
- `f_int2` int(11) NOT NULL DEFAULT '0',
+ `f_int1` int(11) NOT NULL DEFAULT 0,
+ `f_int2` int(11) NOT NULL DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (f_int1)
-SUBPARTITION BY KEY (f_int2)
-(PARTITION part1 VALUES LESS THAN (0)
- (SUBPARTITION subpart11 ENGINE = TokuDB,
- SUBPARTITION subpart12 ENGINE = TokuDB),
- PARTITION part2 VALUES LESS THAN (5)
- (SUBPARTITION subpart21 ENGINE = TokuDB,
- SUBPARTITION subpart22 ENGINE = TokuDB),
- PARTITION part3 VALUES LESS THAN (10)
- (SUBPARTITION subpart31 ENGINE = TokuDB,
- SUBPARTITION subpart32 ENGINE = TokuDB),
- PARTITION part4 VALUES LESS THAN (2147483646)
- (SUBPARTITION subpart41 ENGINE = TokuDB,
- SUBPARTITION subpart42 ENGINE = TokuDB)) */
+ PARTITION BY RANGE (`f_int1`)
+SUBPARTITION BY KEY (`f_int2`)
+(PARTITION `part1` VALUES LESS THAN (0)
+ (SUBPARTITION `subpart11` ENGINE = TokuDB,
+ SUBPARTITION `subpart12` ENGINE = TokuDB),
+ PARTITION `part2` VALUES LESS THAN (5)
+ (SUBPARTITION `subpart21` ENGINE = TokuDB,
+ SUBPARTITION `subpart22` ENGINE = TokuDB),
+ PARTITION `part3` VALUES LESS THAN (10)
+ (SUBPARTITION `subpart31` ENGINE = TokuDB,
+ SUBPARTITION `subpart32` ENGINE = TokuDB),
+ PARTITION `part4` VALUES LESS THAN (2147483646)
+ (SUBPARTITION `subpart41` ENGINE = TokuDB,
+ SUBPARTITION `subpart42` ENGINE = TokuDB))
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -28346,26 +28346,26 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) NOT NULL DEFAULT '0',
- `f_int2` int(11) NOT NULL DEFAULT '0',
+ `f_int1` int(11) NOT NULL DEFAULT 0,
+ `f_int2` int(11) NOT NULL DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (ABS(MOD(f_int1,3)))
-SUBPARTITION BY HASH (f_int2 + 1)
-(PARTITION part1 VALUES IN (0)
- (SUBPARTITION sp11 ENGINE = TokuDB,
- SUBPARTITION sp12 ENGINE = TokuDB),
- PARTITION part2 VALUES IN (1)
- (SUBPARTITION sp21 ENGINE = TokuDB,
- SUBPARTITION sp22 ENGINE = TokuDB),
- PARTITION part3 VALUES IN (2)
- (SUBPARTITION sp31 ENGINE = TokuDB,
- SUBPARTITION sp32 ENGINE = TokuDB),
- PARTITION part4 VALUES IN (NULL)
- (SUBPARTITION sp41 ENGINE = TokuDB,
- SUBPARTITION sp42 ENGINE = TokuDB)) */
+ PARTITION BY LIST (abs(`f_int1` % 3))
+SUBPARTITION BY HASH (`f_int2` + 1)
+(PARTITION `part1` VALUES IN (0)
+ (SUBPARTITION `sp11` ENGINE = TokuDB,
+ SUBPARTITION `sp12` ENGINE = TokuDB),
+ PARTITION `part2` VALUES IN (1)
+ (SUBPARTITION `sp21` ENGINE = TokuDB,
+ SUBPARTITION `sp22` ENGINE = TokuDB),
+ PARTITION `part3` VALUES IN (2)
+ (SUBPARTITION `sp31` ENGINE = TokuDB,
+ SUBPARTITION `sp32` ENGINE = TokuDB),
+ PARTITION `part4` VALUES IN (NULL)
+ (SUBPARTITION `sp41` ENGINE = TokuDB,
+ SUBPARTITION `sp42` ENGINE = TokuDB))
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -28802,18 +28802,18 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) NOT NULL DEFAULT '0',
- `f_int2` int(11) NOT NULL DEFAULT '0',
+ `f_int1` int(11) NOT NULL DEFAULT 0,
+ `f_int2` int(11) NOT NULL DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (ABS(MOD(f_int1,2)))
-SUBPARTITION BY KEY (f_int2)
+ PARTITION BY LIST (abs(`f_int1` % 2))
+SUBPARTITION BY KEY (`f_int2`)
SUBPARTITIONS 3
-(PARTITION part1 VALUES IN (0) ENGINE = TokuDB,
- PARTITION part2 VALUES IN (1) ENGINE = TokuDB,
- PARTITION part3 VALUES IN (NULL) ENGINE = TokuDB) */
+(PARTITION `part1` VALUES IN (0) ENGINE = TokuDB,
+ PARTITION `part2` VALUES IN (1) ENGINE = TokuDB,
+ PARTITION `part3` VALUES IN (NULL) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -29247,14 +29247,14 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) NOT NULL DEFAULT '0',
- `f_int2` int(11) NOT NULL DEFAULT '0',
+ `f_int1` int(11) NOT NULL DEFAULT 0,
+ `f_int2` int(11) NOT NULL DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY HASH (f_int1 + f_int2)
-PARTITIONS 2 */
+ PARTITION BY HASH (`f_int1` + `f_int2`)
+PARTITIONS 2
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -29687,14 +29687,14 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) NOT NULL DEFAULT '0',
- `f_int2` int(11) NOT NULL DEFAULT '0',
+ `f_int1` int(11) NOT NULL DEFAULT 0,
+ `f_int2` int(11) NOT NULL DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY KEY (f_int1,f_int2)
-PARTITIONS 5 */
+ PARTITION BY KEY (`f_int1`,`f_int2`)
+PARTITIONS 5
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -30135,21 +30135,21 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) NOT NULL DEFAULT '0',
- `f_int2` int(11) NOT NULL DEFAULT '0',
+ `f_int1` int(11) NOT NULL DEFAULT 0,
+ `f_int2` int(11) NOT NULL DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (MOD(f_int1 + f_int2,4))
-(PARTITION part_3 VALUES IN (-3) ENGINE = TokuDB,
- PARTITION part_2 VALUES IN (-2) ENGINE = TokuDB,
- PARTITION part_1 VALUES IN (-1) ENGINE = TokuDB,
- PARTITION part_N VALUES IN (NULL) ENGINE = TokuDB,
- PARTITION part0 VALUES IN (0) ENGINE = TokuDB,
- PARTITION part1 VALUES IN (1) ENGINE = TokuDB,
- PARTITION part2 VALUES IN (2) ENGINE = TokuDB,
- PARTITION part3 VALUES IN (3) ENGINE = TokuDB) */
+ PARTITION BY LIST ((`f_int1` + `f_int2`) % 4)
+(PARTITION `part_3` VALUES IN (-3) ENGINE = TokuDB,
+ PARTITION `part_2` VALUES IN (-2) ENGINE = TokuDB,
+ PARTITION `part_1` VALUES IN (-1) ENGINE = TokuDB,
+ PARTITION `part_N` VALUES IN (NULL) ENGINE = TokuDB,
+ PARTITION `part0` VALUES IN (0) ENGINE = TokuDB,
+ PARTITION `part1` VALUES IN (1) ENGINE = TokuDB,
+ PARTITION `part2` VALUES IN (2) ENGINE = TokuDB,
+ PARTITION `part3` VALUES IN (3) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -30588,19 +30588,19 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) NOT NULL DEFAULT '0',
- `f_int2` int(11) NOT NULL DEFAULT '0',
+ `f_int1` int(11) NOT NULL DEFAULT 0,
+ `f_int2` int(11) NOT NULL DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE ((f_int1 + f_int2) DIV 2)
-(PARTITION parta VALUES LESS THAN (0) ENGINE = TokuDB,
- PARTITION partb VALUES LESS THAN (5) ENGINE = TokuDB,
- PARTITION partc VALUES LESS THAN (10) ENGINE = TokuDB,
- PARTITION partd VALUES LESS THAN (15) ENGINE = TokuDB,
- PARTITION parte VALUES LESS THAN (20) ENGINE = TokuDB,
- PARTITION partf VALUES LESS THAN (2147483646) ENGINE = TokuDB) */
+ PARTITION BY RANGE ((`f_int1` + `f_int2`) DIV 2)
+(PARTITION `parta` VALUES LESS THAN (0) ENGINE = TokuDB,
+ PARTITION `partb` VALUES LESS THAN (5) ENGINE = TokuDB,
+ PARTITION `partc` VALUES LESS THAN (10) ENGINE = TokuDB,
+ PARTITION `partd` VALUES LESS THAN (15) ENGINE = TokuDB,
+ PARTITION `parte` VALUES LESS THAN (20) ENGINE = TokuDB,
+ PARTITION `partf` VALUES LESS THAN (2147483646) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -31035,19 +31035,19 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) NOT NULL DEFAULT '0',
- `f_int2` int(11) NOT NULL DEFAULT '0',
+ `f_int1` int(11) NOT NULL DEFAULT 0,
+ `f_int2` int(11) NOT NULL DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (f_int1)
-SUBPARTITION BY HASH (f_int2)
+ PARTITION BY RANGE (`f_int1`)
+SUBPARTITION BY HASH (`f_int2`)
SUBPARTITIONS 2
-(PARTITION parta VALUES LESS THAN (0) ENGINE = TokuDB,
- PARTITION partb VALUES LESS THAN (5) ENGINE = TokuDB,
- PARTITION partc VALUES LESS THAN (10) ENGINE = TokuDB,
- PARTITION partd VALUES LESS THAN (2147483646) ENGINE = TokuDB) */
+(PARTITION `parta` VALUES LESS THAN (0) ENGINE = TokuDB,
+ PARTITION `partb` VALUES LESS THAN (5) ENGINE = TokuDB,
+ PARTITION `partc` VALUES LESS THAN (10) ENGINE = TokuDB,
+ PARTITION `partd` VALUES LESS THAN (2147483646) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -31486,26 +31486,26 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) NOT NULL DEFAULT '0',
- `f_int2` int(11) NOT NULL DEFAULT '0',
+ `f_int1` int(11) NOT NULL DEFAULT 0,
+ `f_int2` int(11) NOT NULL DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (f_int1)
-SUBPARTITION BY KEY (f_int2)
-(PARTITION part1 VALUES LESS THAN (0)
- (SUBPARTITION subpart11 ENGINE = TokuDB,
- SUBPARTITION subpart12 ENGINE = TokuDB),
- PARTITION part2 VALUES LESS THAN (5)
- (SUBPARTITION subpart21 ENGINE = TokuDB,
- SUBPARTITION subpart22 ENGINE = TokuDB),
- PARTITION part3 VALUES LESS THAN (10)
- (SUBPARTITION subpart31 ENGINE = TokuDB,
- SUBPARTITION subpart32 ENGINE = TokuDB),
- PARTITION part4 VALUES LESS THAN (2147483646)
- (SUBPARTITION subpart41 ENGINE = TokuDB,
- SUBPARTITION subpart42 ENGINE = TokuDB)) */
+ PARTITION BY RANGE (`f_int1`)
+SUBPARTITION BY KEY (`f_int2`)
+(PARTITION `part1` VALUES LESS THAN (0)
+ (SUBPARTITION `subpart11` ENGINE = TokuDB,
+ SUBPARTITION `subpart12` ENGINE = TokuDB),
+ PARTITION `part2` VALUES LESS THAN (5)
+ (SUBPARTITION `subpart21` ENGINE = TokuDB,
+ SUBPARTITION `subpart22` ENGINE = TokuDB),
+ PARTITION `part3` VALUES LESS THAN (10)
+ (SUBPARTITION `subpart31` ENGINE = TokuDB,
+ SUBPARTITION `subpart32` ENGINE = TokuDB),
+ PARTITION `part4` VALUES LESS THAN (2147483646)
+ (SUBPARTITION `subpart41` ENGINE = TokuDB,
+ SUBPARTITION `subpart42` ENGINE = TokuDB))
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -31944,26 +31944,26 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) NOT NULL DEFAULT '0',
- `f_int2` int(11) NOT NULL DEFAULT '0',
+ `f_int1` int(11) NOT NULL DEFAULT 0,
+ `f_int2` int(11) NOT NULL DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (ABS(MOD(f_int1,3)))
-SUBPARTITION BY HASH (f_int2 + 1)
-(PARTITION part1 VALUES IN (0)
- (SUBPARTITION sp11 ENGINE = TokuDB,
- SUBPARTITION sp12 ENGINE = TokuDB),
- PARTITION part2 VALUES IN (1)
- (SUBPARTITION sp21 ENGINE = TokuDB,
- SUBPARTITION sp22 ENGINE = TokuDB),
- PARTITION part3 VALUES IN (2)
- (SUBPARTITION sp31 ENGINE = TokuDB,
- SUBPARTITION sp32 ENGINE = TokuDB),
- PARTITION part4 VALUES IN (NULL)
- (SUBPARTITION sp41 ENGINE = TokuDB,
- SUBPARTITION sp42 ENGINE = TokuDB)) */
+ PARTITION BY LIST (abs(`f_int1` % 3))
+SUBPARTITION BY HASH (`f_int2` + 1)
+(PARTITION `part1` VALUES IN (0)
+ (SUBPARTITION `sp11` ENGINE = TokuDB,
+ SUBPARTITION `sp12` ENGINE = TokuDB),
+ PARTITION `part2` VALUES IN (1)
+ (SUBPARTITION `sp21` ENGINE = TokuDB,
+ SUBPARTITION `sp22` ENGINE = TokuDB),
+ PARTITION `part3` VALUES IN (2)
+ (SUBPARTITION `sp31` ENGINE = TokuDB,
+ SUBPARTITION `sp32` ENGINE = TokuDB),
+ PARTITION `part4` VALUES IN (NULL)
+ (SUBPARTITION `sp41` ENGINE = TokuDB,
+ SUBPARTITION `sp42` ENGINE = TokuDB))
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -32400,18 +32400,18 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) NOT NULL DEFAULT '0',
- `f_int2` int(11) NOT NULL DEFAULT '0',
+ `f_int1` int(11) NOT NULL DEFAULT 0,
+ `f_int2` int(11) NOT NULL DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (ABS(MOD(f_int1,2)))
-SUBPARTITION BY KEY (f_int2)
+ PARTITION BY LIST (abs(`f_int1` % 2))
+SUBPARTITION BY KEY (`f_int2`)
SUBPARTITIONS 3
-(PARTITION part1 VALUES IN (0) ENGINE = TokuDB,
- PARTITION part2 VALUES IN (1) ENGINE = TokuDB,
- PARTITION part3 VALUES IN (NULL) ENGINE = TokuDB) */
+(PARTITION `part1` VALUES IN (0) ENGINE = TokuDB,
+ PARTITION `part2` VALUES IN (1) ENGINE = TokuDB,
+ PARTITION `part3` VALUES IN (NULL) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -32845,14 +32845,14 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY HASH (f_int1 + f_int2)
-PARTITIONS 2 */
+ PARTITION BY HASH (`f_int1` + `f_int2`)
+PARTITIONS 2
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -33301,14 +33301,14 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY KEY (f_int1,f_int2)
-PARTITIONS 5 */
+ PARTITION BY KEY (`f_int1`,`f_int2`)
+PARTITIONS 5
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -33765,21 +33765,21 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (MOD(f_int1 + f_int2,4))
-(PARTITION part_3 VALUES IN (-3) ENGINE = TokuDB,
- PARTITION part_2 VALUES IN (-2) ENGINE = TokuDB,
- PARTITION part_1 VALUES IN (-1) ENGINE = TokuDB,
- PARTITION part_N VALUES IN (NULL) ENGINE = TokuDB,
- PARTITION part0 VALUES IN (0) ENGINE = TokuDB,
- PARTITION part1 VALUES IN (1) ENGINE = TokuDB,
- PARTITION part2 VALUES IN (2) ENGINE = TokuDB,
- PARTITION part3 VALUES IN (3) ENGINE = TokuDB) */
+ PARTITION BY LIST ((`f_int1` + `f_int2`) % 4)
+(PARTITION `part_3` VALUES IN (-3) ENGINE = TokuDB,
+ PARTITION `part_2` VALUES IN (-2) ENGINE = TokuDB,
+ PARTITION `part_1` VALUES IN (-1) ENGINE = TokuDB,
+ PARTITION `part_N` VALUES IN (NULL) ENGINE = TokuDB,
+ PARTITION `part0` VALUES IN (0) ENGINE = TokuDB,
+ PARTITION `part1` VALUES IN (1) ENGINE = TokuDB,
+ PARTITION `part2` VALUES IN (2) ENGINE = TokuDB,
+ PARTITION `part3` VALUES IN (3) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -34234,19 +34234,19 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE ((f_int1 + f_int2) DIV 2)
-(PARTITION parta VALUES LESS THAN (0) ENGINE = TokuDB,
- PARTITION partb VALUES LESS THAN (5) ENGINE = TokuDB,
- PARTITION partc VALUES LESS THAN (10) ENGINE = TokuDB,
- PARTITION partd VALUES LESS THAN (15) ENGINE = TokuDB,
- PARTITION parte VALUES LESS THAN (20) ENGINE = TokuDB,
- PARTITION partf VALUES LESS THAN (2147483646) ENGINE = TokuDB) */
+ PARTITION BY RANGE ((`f_int1` + `f_int2`) DIV 2)
+(PARTITION `parta` VALUES LESS THAN (0) ENGINE = TokuDB,
+ PARTITION `partb` VALUES LESS THAN (5) ENGINE = TokuDB,
+ PARTITION `partc` VALUES LESS THAN (10) ENGINE = TokuDB,
+ PARTITION `partd` VALUES LESS THAN (15) ENGINE = TokuDB,
+ PARTITION `parte` VALUES LESS THAN (20) ENGINE = TokuDB,
+ PARTITION `partf` VALUES LESS THAN (2147483646) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -34697,19 +34697,19 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (f_int1)
-SUBPARTITION BY HASH (f_int2)
+ PARTITION BY RANGE (`f_int1`)
+SUBPARTITION BY HASH (`f_int2`)
SUBPARTITIONS 2
-(PARTITION parta VALUES LESS THAN (0) ENGINE = TokuDB,
- PARTITION partb VALUES LESS THAN (5) ENGINE = TokuDB,
- PARTITION partc VALUES LESS THAN (10) ENGINE = TokuDB,
- PARTITION partd VALUES LESS THAN (2147483646) ENGINE = TokuDB) */
+(PARTITION `parta` VALUES LESS THAN (0) ENGINE = TokuDB,
+ PARTITION `partb` VALUES LESS THAN (5) ENGINE = TokuDB,
+ PARTITION `partc` VALUES LESS THAN (10) ENGINE = TokuDB,
+ PARTITION `partd` VALUES LESS THAN (2147483646) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -35164,26 +35164,26 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (f_int1)
-SUBPARTITION BY KEY (f_int2)
-(PARTITION part1 VALUES LESS THAN (0)
- (SUBPARTITION subpart11 ENGINE = TokuDB,
- SUBPARTITION subpart12 ENGINE = TokuDB),
- PARTITION part2 VALUES LESS THAN (5)
- (SUBPARTITION subpart21 ENGINE = TokuDB,
- SUBPARTITION subpart22 ENGINE = TokuDB),
- PARTITION part3 VALUES LESS THAN (10)
- (SUBPARTITION subpart31 ENGINE = TokuDB,
- SUBPARTITION subpart32 ENGINE = TokuDB),
- PARTITION part4 VALUES LESS THAN (2147483646)
- (SUBPARTITION subpart41 ENGINE = TokuDB,
- SUBPARTITION subpart42 ENGINE = TokuDB)) */
+ PARTITION BY RANGE (`f_int1`)
+SUBPARTITION BY KEY (`f_int2`)
+(PARTITION `part1` VALUES LESS THAN (0)
+ (SUBPARTITION `subpart11` ENGINE = TokuDB,
+ SUBPARTITION `subpart12` ENGINE = TokuDB),
+ PARTITION `part2` VALUES LESS THAN (5)
+ (SUBPARTITION `subpart21` ENGINE = TokuDB,
+ SUBPARTITION `subpart22` ENGINE = TokuDB),
+ PARTITION `part3` VALUES LESS THAN (10)
+ (SUBPARTITION `subpart31` ENGINE = TokuDB,
+ SUBPARTITION `subpart32` ENGINE = TokuDB),
+ PARTITION `part4` VALUES LESS THAN (2147483646)
+ (SUBPARTITION `subpart41` ENGINE = TokuDB,
+ SUBPARTITION `subpart42` ENGINE = TokuDB))
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -35638,26 +35638,26 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (ABS(MOD(f_int1,3)))
-SUBPARTITION BY HASH (f_int2 + 1)
-(PARTITION part1 VALUES IN (0)
- (SUBPARTITION sp11 ENGINE = TokuDB,
- SUBPARTITION sp12 ENGINE = TokuDB),
- PARTITION part2 VALUES IN (1)
- (SUBPARTITION sp21 ENGINE = TokuDB,
- SUBPARTITION sp22 ENGINE = TokuDB),
- PARTITION part3 VALUES IN (2)
- (SUBPARTITION sp31 ENGINE = TokuDB,
- SUBPARTITION sp32 ENGINE = TokuDB),
- PARTITION part4 VALUES IN (NULL)
- (SUBPARTITION sp41 ENGINE = TokuDB,
- SUBPARTITION sp42 ENGINE = TokuDB)) */
+ PARTITION BY LIST (abs(`f_int1` % 3))
+SUBPARTITION BY HASH (`f_int2` + 1)
+(PARTITION `part1` VALUES IN (0)
+ (SUBPARTITION `sp11` ENGINE = TokuDB,
+ SUBPARTITION `sp12` ENGINE = TokuDB),
+ PARTITION `part2` VALUES IN (1)
+ (SUBPARTITION `sp21` ENGINE = TokuDB,
+ SUBPARTITION `sp22` ENGINE = TokuDB),
+ PARTITION `part3` VALUES IN (2)
+ (SUBPARTITION `sp31` ENGINE = TokuDB,
+ SUBPARTITION `sp32` ENGINE = TokuDB),
+ PARTITION `part4` VALUES IN (NULL)
+ (SUBPARTITION `sp41` ENGINE = TokuDB,
+ SUBPARTITION `sp42` ENGINE = TokuDB))
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -36110,18 +36110,18 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (ABS(MOD(f_int1,2)))
-SUBPARTITION BY KEY (f_int2)
+ PARTITION BY LIST (abs(`f_int1` % 2))
+SUBPARTITION BY KEY (`f_int2`)
SUBPARTITIONS 3
-(PARTITION part1 VALUES IN (0) ENGINE = TokuDB,
- PARTITION part2 VALUES IN (1) ENGINE = TokuDB,
- PARTITION part3 VALUES IN (NULL) ENGINE = TokuDB) */
+(PARTITION `part1` VALUES IN (0) ENGINE = TokuDB,
+ PARTITION `part2` VALUES IN (1) ENGINE = TokuDB,
+ PARTITION `part3` VALUES IN (NULL) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
diff --git a/storage/tokudb/mysql-test/tokudb_parts/r/partition_alter2_1_1_tokudb.result b/storage/tokudb/mysql-test/tokudb_parts/r/partition_alter2_1_1_tokudb.result
index 6838b33d89d..d994f5f2654 100644
--- a/storage/tokudb/mysql-test/tokudb_parts/r/partition_alter2_1_1_tokudb.result
+++ b/storage/tokudb/mysql-test/tokudb_parts/r/partition_alter2_1_1_tokudb.result
@@ -68,14 +68,14 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
`f_int2` bigint(20) DEFAULT NULL,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY HASH (f_int1)
-PARTITIONS 2 */
+ PARTITION BY HASH (`f_int1`)
+PARTITIONS 2
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -524,14 +524,14 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
`f_int2` bigint(20) DEFAULT NULL,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY KEY (f_int1)
-PARTITIONS 5 */
+ PARTITION BY KEY (`f_int1`)
+PARTITIONS 5
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -988,21 +988,21 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
`f_int2` bigint(20) DEFAULT NULL,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (MOD(f_int1,4))
-(PARTITION part_3 VALUES IN (-3) ENGINE = TokuDB,
- PARTITION part_2 VALUES IN (-2) ENGINE = TokuDB,
- PARTITION part_1 VALUES IN (-1) ENGINE = TokuDB,
- PARTITION part_N VALUES IN (NULL) ENGINE = TokuDB,
- PARTITION part0 VALUES IN (0) ENGINE = TokuDB,
- PARTITION part1 VALUES IN (1) ENGINE = TokuDB,
- PARTITION part2 VALUES IN (2) ENGINE = TokuDB,
- PARTITION part3 VALUES IN (3) ENGINE = TokuDB) */
+ PARTITION BY LIST (`f_int1` % 4)
+(PARTITION `part_3` VALUES IN (-3) ENGINE = TokuDB,
+ PARTITION `part_2` VALUES IN (-2) ENGINE = TokuDB,
+ PARTITION `part_1` VALUES IN (-1) ENGINE = TokuDB,
+ PARTITION `part_N` VALUES IN (NULL) ENGINE = TokuDB,
+ PARTITION `part0` VALUES IN (0) ENGINE = TokuDB,
+ PARTITION `part1` VALUES IN (1) ENGINE = TokuDB,
+ PARTITION `part2` VALUES IN (2) ENGINE = TokuDB,
+ PARTITION `part3` VALUES IN (3) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -1457,19 +1457,19 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
`f_int2` bigint(20) DEFAULT NULL,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (f_int1)
-(PARTITION parta VALUES LESS THAN (0) ENGINE = TokuDB,
- PARTITION partb VALUES LESS THAN (5) ENGINE = TokuDB,
- PARTITION partc VALUES LESS THAN (10) ENGINE = TokuDB,
- PARTITION partd VALUES LESS THAN (15) ENGINE = TokuDB,
- PARTITION parte VALUES LESS THAN (20) ENGINE = TokuDB,
- PARTITION partf VALUES LESS THAN (2147483646) ENGINE = TokuDB) */
+ PARTITION BY RANGE (`f_int1`)
+(PARTITION `parta` VALUES LESS THAN (0) ENGINE = TokuDB,
+ PARTITION `partb` VALUES LESS THAN (5) ENGINE = TokuDB,
+ PARTITION `partc` VALUES LESS THAN (10) ENGINE = TokuDB,
+ PARTITION `partd` VALUES LESS THAN (15) ENGINE = TokuDB,
+ PARTITION `parte` VALUES LESS THAN (20) ENGINE = TokuDB,
+ PARTITION `partf` VALUES LESS THAN (2147483646) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -1920,19 +1920,19 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
`f_int2` bigint(20) DEFAULT NULL,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (f_int1 DIV 2)
-SUBPARTITION BY HASH (f_int1)
+ PARTITION BY RANGE (`f_int1` DIV 2)
+SUBPARTITION BY HASH (`f_int1`)
SUBPARTITIONS 2
-(PARTITION parta VALUES LESS THAN (0) ENGINE = TokuDB,
- PARTITION partb VALUES LESS THAN (5) ENGINE = TokuDB,
- PARTITION partc VALUES LESS THAN (10) ENGINE = TokuDB,
- PARTITION partd VALUES LESS THAN (2147483646) ENGINE = TokuDB) */
+(PARTITION `parta` VALUES LESS THAN (0) ENGINE = TokuDB,
+ PARTITION `partb` VALUES LESS THAN (5) ENGINE = TokuDB,
+ PARTITION `partc` VALUES LESS THAN (10) ENGINE = TokuDB,
+ PARTITION `partd` VALUES LESS THAN (2147483646) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -2389,26 +2389,26 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
`f_int2` bigint(20) DEFAULT NULL,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (f_int1)
-SUBPARTITION BY KEY (f_int1)
-(PARTITION part1 VALUES LESS THAN (0)
- (SUBPARTITION subpart11 ENGINE = TokuDB,
- SUBPARTITION subpart12 ENGINE = TokuDB),
- PARTITION part2 VALUES LESS THAN (5)
- (SUBPARTITION subpart21 ENGINE = TokuDB,
- SUBPARTITION subpart22 ENGINE = TokuDB),
- PARTITION part3 VALUES LESS THAN (10)
- (SUBPARTITION subpart31 ENGINE = TokuDB,
- SUBPARTITION subpart32 ENGINE = TokuDB),
- PARTITION part4 VALUES LESS THAN (2147483646)
- (SUBPARTITION subpart41 ENGINE = TokuDB,
- SUBPARTITION subpart42 ENGINE = TokuDB)) */
+ PARTITION BY RANGE (`f_int1`)
+SUBPARTITION BY KEY (`f_int1`)
+(PARTITION `part1` VALUES LESS THAN (0)
+ (SUBPARTITION `subpart11` ENGINE = TokuDB,
+ SUBPARTITION `subpart12` ENGINE = TokuDB),
+ PARTITION `part2` VALUES LESS THAN (5)
+ (SUBPARTITION `subpart21` ENGINE = TokuDB,
+ SUBPARTITION `subpart22` ENGINE = TokuDB),
+ PARTITION `part3` VALUES LESS THAN (10)
+ (SUBPARTITION `subpart31` ENGINE = TokuDB,
+ SUBPARTITION `subpart32` ENGINE = TokuDB),
+ PARTITION `part4` VALUES LESS THAN (2147483646)
+ (SUBPARTITION `subpart41` ENGINE = TokuDB,
+ SUBPARTITION `subpart42` ENGINE = TokuDB))
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -2863,26 +2863,26 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
`f_int2` bigint(20) DEFAULT NULL,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (ABS(MOD(f_int1,3)))
-SUBPARTITION BY HASH (f_int1 + 1)
-(PARTITION part1 VALUES IN (0)
- (SUBPARTITION sp11 ENGINE = TokuDB,
- SUBPARTITION sp12 ENGINE = TokuDB),
- PARTITION part2 VALUES IN (1)
- (SUBPARTITION sp21 ENGINE = TokuDB,
- SUBPARTITION sp22 ENGINE = TokuDB),
- PARTITION part3 VALUES IN (2)
- (SUBPARTITION sp31 ENGINE = TokuDB,
- SUBPARTITION sp32 ENGINE = TokuDB),
- PARTITION part4 VALUES IN (NULL)
- (SUBPARTITION sp41 ENGINE = TokuDB,
- SUBPARTITION sp42 ENGINE = TokuDB)) */
+ PARTITION BY LIST (abs(`f_int1` % 3))
+SUBPARTITION BY HASH (`f_int1` + 1)
+(PARTITION `part1` VALUES IN (0)
+ (SUBPARTITION `sp11` ENGINE = TokuDB,
+ SUBPARTITION `sp12` ENGINE = TokuDB),
+ PARTITION `part2` VALUES IN (1)
+ (SUBPARTITION `sp21` ENGINE = TokuDB,
+ SUBPARTITION `sp22` ENGINE = TokuDB),
+ PARTITION `part3` VALUES IN (2)
+ (SUBPARTITION `sp31` ENGINE = TokuDB,
+ SUBPARTITION `sp32` ENGINE = TokuDB),
+ PARTITION `part4` VALUES IN (NULL)
+ (SUBPARTITION `sp41` ENGINE = TokuDB,
+ SUBPARTITION `sp42` ENGINE = TokuDB))
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -3335,18 +3335,18 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
`f_int2` bigint(20) DEFAULT NULL,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (ABS(MOD(f_int1,2)))
-SUBPARTITION BY KEY (f_int1)
+ PARTITION BY LIST (abs(`f_int1` % 2))
+SUBPARTITION BY KEY (`f_int1`)
SUBPARTITIONS 3
-(PARTITION part1 VALUES IN (0) ENGINE = TokuDB,
- PARTITION part2 VALUES IN (1) ENGINE = TokuDB,
- PARTITION part3 VALUES IN (NULL) ENGINE = TokuDB) */
+(PARTITION `part1` VALUES IN (0) ENGINE = TokuDB,
+ PARTITION `part2` VALUES IN (1) ENGINE = TokuDB,
+ PARTITION `part3` VALUES IN (NULL) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -3797,15 +3797,15 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) NOT NULL DEFAULT '0',
+ `f_int1` int(11) NOT NULL DEFAULT 0,
`f_int2` bigint(20) NOT NULL,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL,
PRIMARY KEY (`f_int1`,`f_int2`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY HASH (f_int1)
-PARTITIONS 2 */
+ PARTITION BY HASH (`f_int1`)
+PARTITIONS 2
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -4291,15 +4291,15 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) NOT NULL DEFAULT '0',
+ `f_int1` int(11) NOT NULL DEFAULT 0,
`f_int2` bigint(20) NOT NULL,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL,
PRIMARY KEY (`f_int1`,`f_int2`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY KEY (f_int1)
-PARTITIONS 5 */
+ PARTITION BY KEY (`f_int1`)
+PARTITIONS 5
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -4793,22 +4793,22 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) NOT NULL DEFAULT '0',
+ `f_int1` int(11) NOT NULL DEFAULT 0,
`f_int2` bigint(20) NOT NULL,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL,
PRIMARY KEY (`f_int1`,`f_int2`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (MOD(f_int1,4))
-(PARTITION part_3 VALUES IN (-3) ENGINE = TokuDB,
- PARTITION part_2 VALUES IN (-2) ENGINE = TokuDB,
- PARTITION part_1 VALUES IN (-1) ENGINE = TokuDB,
- PARTITION part_N VALUES IN (NULL) ENGINE = TokuDB,
- PARTITION part0 VALUES IN (0) ENGINE = TokuDB,
- PARTITION part1 VALUES IN (1) ENGINE = TokuDB,
- PARTITION part2 VALUES IN (2) ENGINE = TokuDB,
- PARTITION part3 VALUES IN (3) ENGINE = TokuDB) */
+ PARTITION BY LIST (`f_int1` % 4)
+(PARTITION `part_3` VALUES IN (-3) ENGINE = TokuDB,
+ PARTITION `part_2` VALUES IN (-2) ENGINE = TokuDB,
+ PARTITION `part_1` VALUES IN (-1) ENGINE = TokuDB,
+ PARTITION `part_N` VALUES IN (NULL) ENGINE = TokuDB,
+ PARTITION `part0` VALUES IN (0) ENGINE = TokuDB,
+ PARTITION `part1` VALUES IN (1) ENGINE = TokuDB,
+ PARTITION `part2` VALUES IN (2) ENGINE = TokuDB,
+ PARTITION `part3` VALUES IN (3) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -5300,20 +5300,20 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) NOT NULL DEFAULT '0',
+ `f_int1` int(11) NOT NULL DEFAULT 0,
`f_int2` bigint(20) NOT NULL,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL,
PRIMARY KEY (`f_int1`,`f_int2`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (f_int1)
-(PARTITION parta VALUES LESS THAN (0) ENGINE = TokuDB,
- PARTITION partb VALUES LESS THAN (5) ENGINE = TokuDB,
- PARTITION partc VALUES LESS THAN (10) ENGINE = TokuDB,
- PARTITION partd VALUES LESS THAN (15) ENGINE = TokuDB,
- PARTITION parte VALUES LESS THAN (20) ENGINE = TokuDB,
- PARTITION partf VALUES LESS THAN (2147483646) ENGINE = TokuDB) */
+ PARTITION BY RANGE (`f_int1`)
+(PARTITION `parta` VALUES LESS THAN (0) ENGINE = TokuDB,
+ PARTITION `partb` VALUES LESS THAN (5) ENGINE = TokuDB,
+ PARTITION `partc` VALUES LESS THAN (10) ENGINE = TokuDB,
+ PARTITION `partd` VALUES LESS THAN (15) ENGINE = TokuDB,
+ PARTITION `parte` VALUES LESS THAN (20) ENGINE = TokuDB,
+ PARTITION `partf` VALUES LESS THAN (2147483646) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -5801,20 +5801,20 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) NOT NULL DEFAULT '0',
+ `f_int1` int(11) NOT NULL DEFAULT 0,
`f_int2` bigint(20) NOT NULL,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL,
PRIMARY KEY (`f_int1`,`f_int2`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (f_int1 DIV 2)
-SUBPARTITION BY HASH (f_int1)
+ PARTITION BY RANGE (`f_int1` DIV 2)
+SUBPARTITION BY HASH (`f_int1`)
SUBPARTITIONS 2
-(PARTITION parta VALUES LESS THAN (0) ENGINE = TokuDB,
- PARTITION partb VALUES LESS THAN (5) ENGINE = TokuDB,
- PARTITION partc VALUES LESS THAN (10) ENGINE = TokuDB,
- PARTITION partd VALUES LESS THAN (2147483646) ENGINE = TokuDB) */
+(PARTITION `parta` VALUES LESS THAN (0) ENGINE = TokuDB,
+ PARTITION `partb` VALUES LESS THAN (5) ENGINE = TokuDB,
+ PARTITION `partc` VALUES LESS THAN (10) ENGINE = TokuDB,
+ PARTITION `partd` VALUES LESS THAN (2147483646) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -6308,27 +6308,27 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) NOT NULL DEFAULT '0',
+ `f_int1` int(11) NOT NULL DEFAULT 0,
`f_int2` bigint(20) NOT NULL,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL,
PRIMARY KEY (`f_int1`,`f_int2`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (f_int1)
-SUBPARTITION BY KEY (f_int1)
-(PARTITION part1 VALUES LESS THAN (0)
- (SUBPARTITION subpart11 ENGINE = TokuDB,
- SUBPARTITION subpart12 ENGINE = TokuDB),
- PARTITION part2 VALUES LESS THAN (5)
- (SUBPARTITION subpart21 ENGINE = TokuDB,
- SUBPARTITION subpart22 ENGINE = TokuDB),
- PARTITION part3 VALUES LESS THAN (10)
- (SUBPARTITION subpart31 ENGINE = TokuDB,
- SUBPARTITION subpart32 ENGINE = TokuDB),
- PARTITION part4 VALUES LESS THAN (2147483646)
- (SUBPARTITION subpart41 ENGINE = TokuDB,
- SUBPARTITION subpart42 ENGINE = TokuDB)) */
+ PARTITION BY RANGE (`f_int1`)
+SUBPARTITION BY KEY (`f_int1`)
+(PARTITION `part1` VALUES LESS THAN (0)
+ (SUBPARTITION `subpart11` ENGINE = TokuDB,
+ SUBPARTITION `subpart12` ENGINE = TokuDB),
+ PARTITION `part2` VALUES LESS THAN (5)
+ (SUBPARTITION `subpart21` ENGINE = TokuDB,
+ SUBPARTITION `subpart22` ENGINE = TokuDB),
+ PARTITION `part3` VALUES LESS THAN (10)
+ (SUBPARTITION `subpart31` ENGINE = TokuDB,
+ SUBPARTITION `subpart32` ENGINE = TokuDB),
+ PARTITION `part4` VALUES LESS THAN (2147483646)
+ (SUBPARTITION `subpart41` ENGINE = TokuDB,
+ SUBPARTITION `subpart42` ENGINE = TokuDB))
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -6820,27 +6820,27 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) NOT NULL DEFAULT '0',
+ `f_int1` int(11) NOT NULL DEFAULT 0,
`f_int2` bigint(20) NOT NULL,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL,
PRIMARY KEY (`f_int1`,`f_int2`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (ABS(MOD(f_int1,3)))
-SUBPARTITION BY HASH (f_int1 + 1)
-(PARTITION part1 VALUES IN (0)
- (SUBPARTITION sp11 ENGINE = TokuDB,
- SUBPARTITION sp12 ENGINE = TokuDB),
- PARTITION part2 VALUES IN (1)
- (SUBPARTITION sp21 ENGINE = TokuDB,
- SUBPARTITION sp22 ENGINE = TokuDB),
- PARTITION part3 VALUES IN (2)
- (SUBPARTITION sp31 ENGINE = TokuDB,
- SUBPARTITION sp32 ENGINE = TokuDB),
- PARTITION part4 VALUES IN (NULL)
- (SUBPARTITION sp41 ENGINE = TokuDB,
- SUBPARTITION sp42 ENGINE = TokuDB)) */
+ PARTITION BY LIST (abs(`f_int1` % 3))
+SUBPARTITION BY HASH (`f_int1` + 1)
+(PARTITION `part1` VALUES IN (0)
+ (SUBPARTITION `sp11` ENGINE = TokuDB,
+ SUBPARTITION `sp12` ENGINE = TokuDB),
+ PARTITION `part2` VALUES IN (1)
+ (SUBPARTITION `sp21` ENGINE = TokuDB,
+ SUBPARTITION `sp22` ENGINE = TokuDB),
+ PARTITION `part3` VALUES IN (2)
+ (SUBPARTITION `sp31` ENGINE = TokuDB,
+ SUBPARTITION `sp32` ENGINE = TokuDB),
+ PARTITION `part4` VALUES IN (NULL)
+ (SUBPARTITION `sp41` ENGINE = TokuDB,
+ SUBPARTITION `sp42` ENGINE = TokuDB))
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -7330,19 +7330,19 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) NOT NULL DEFAULT '0',
+ `f_int1` int(11) NOT NULL DEFAULT 0,
`f_int2` bigint(20) NOT NULL,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL,
PRIMARY KEY (`f_int1`,`f_int2`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (ABS(MOD(f_int1,2)))
-SUBPARTITION BY KEY (f_int1)
+ PARTITION BY LIST (abs(`f_int1` % 2))
+SUBPARTITION BY KEY (`f_int1`)
SUBPARTITIONS 3
-(PARTITION part1 VALUES IN (0) ENGINE = TokuDB,
- PARTITION part2 VALUES IN (1) ENGINE = TokuDB,
- PARTITION part3 VALUES IN (NULL) ENGINE = TokuDB) */
+(PARTITION `part1` VALUES IN (0) ENGINE = TokuDB,
+ PARTITION `part2` VALUES IN (1) ENGINE = TokuDB,
+ PARTITION `part3` VALUES IN (NULL) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -7829,15 +7829,15 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) NOT NULL DEFAULT '0',
+ `f_int1` int(11) NOT NULL DEFAULT 0,
`f_int2` bigint(20) NOT NULL,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL,
PRIMARY KEY (`f_int2`,`f_int1`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY HASH (f_int1)
-PARTITIONS 2 */
+ PARTITION BY HASH (`f_int1`)
+PARTITIONS 2
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -8323,15 +8323,15 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) NOT NULL DEFAULT '0',
+ `f_int1` int(11) NOT NULL DEFAULT 0,
`f_int2` bigint(20) NOT NULL,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL,
PRIMARY KEY (`f_int2`,`f_int1`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY KEY (f_int1)
-PARTITIONS 5 */
+ PARTITION BY KEY (`f_int1`)
+PARTITIONS 5
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -8825,22 +8825,22 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) NOT NULL DEFAULT '0',
+ `f_int1` int(11) NOT NULL DEFAULT 0,
`f_int2` bigint(20) NOT NULL,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL,
PRIMARY KEY (`f_int2`,`f_int1`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (MOD(f_int1,4))
-(PARTITION part_3 VALUES IN (-3) ENGINE = TokuDB,
- PARTITION part_2 VALUES IN (-2) ENGINE = TokuDB,
- PARTITION part_1 VALUES IN (-1) ENGINE = TokuDB,
- PARTITION part_N VALUES IN (NULL) ENGINE = TokuDB,
- PARTITION part0 VALUES IN (0) ENGINE = TokuDB,
- PARTITION part1 VALUES IN (1) ENGINE = TokuDB,
- PARTITION part2 VALUES IN (2) ENGINE = TokuDB,
- PARTITION part3 VALUES IN (3) ENGINE = TokuDB) */
+ PARTITION BY LIST (`f_int1` % 4)
+(PARTITION `part_3` VALUES IN (-3) ENGINE = TokuDB,
+ PARTITION `part_2` VALUES IN (-2) ENGINE = TokuDB,
+ PARTITION `part_1` VALUES IN (-1) ENGINE = TokuDB,
+ PARTITION `part_N` VALUES IN (NULL) ENGINE = TokuDB,
+ PARTITION `part0` VALUES IN (0) ENGINE = TokuDB,
+ PARTITION `part1` VALUES IN (1) ENGINE = TokuDB,
+ PARTITION `part2` VALUES IN (2) ENGINE = TokuDB,
+ PARTITION `part3` VALUES IN (3) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -9332,20 +9332,20 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) NOT NULL DEFAULT '0',
+ `f_int1` int(11) NOT NULL DEFAULT 0,
`f_int2` bigint(20) NOT NULL,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL,
PRIMARY KEY (`f_int2`,`f_int1`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (f_int1)
-(PARTITION parta VALUES LESS THAN (0) ENGINE = TokuDB,
- PARTITION partb VALUES LESS THAN (5) ENGINE = TokuDB,
- PARTITION partc VALUES LESS THAN (10) ENGINE = TokuDB,
- PARTITION partd VALUES LESS THAN (15) ENGINE = TokuDB,
- PARTITION parte VALUES LESS THAN (20) ENGINE = TokuDB,
- PARTITION partf VALUES LESS THAN (2147483646) ENGINE = TokuDB) */
+ PARTITION BY RANGE (`f_int1`)
+(PARTITION `parta` VALUES LESS THAN (0) ENGINE = TokuDB,
+ PARTITION `partb` VALUES LESS THAN (5) ENGINE = TokuDB,
+ PARTITION `partc` VALUES LESS THAN (10) ENGINE = TokuDB,
+ PARTITION `partd` VALUES LESS THAN (15) ENGINE = TokuDB,
+ PARTITION `parte` VALUES LESS THAN (20) ENGINE = TokuDB,
+ PARTITION `partf` VALUES LESS THAN (2147483646) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -9833,20 +9833,20 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) NOT NULL DEFAULT '0',
+ `f_int1` int(11) NOT NULL DEFAULT 0,
`f_int2` bigint(20) NOT NULL,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL,
PRIMARY KEY (`f_int2`,`f_int1`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (f_int1 DIV 2)
-SUBPARTITION BY HASH (f_int1)
+ PARTITION BY RANGE (`f_int1` DIV 2)
+SUBPARTITION BY HASH (`f_int1`)
SUBPARTITIONS 2
-(PARTITION parta VALUES LESS THAN (0) ENGINE = TokuDB,
- PARTITION partb VALUES LESS THAN (5) ENGINE = TokuDB,
- PARTITION partc VALUES LESS THAN (10) ENGINE = TokuDB,
- PARTITION partd VALUES LESS THAN (2147483646) ENGINE = TokuDB) */
+(PARTITION `parta` VALUES LESS THAN (0) ENGINE = TokuDB,
+ PARTITION `partb` VALUES LESS THAN (5) ENGINE = TokuDB,
+ PARTITION `partc` VALUES LESS THAN (10) ENGINE = TokuDB,
+ PARTITION `partd` VALUES LESS THAN (2147483646) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -10340,27 +10340,27 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) NOT NULL DEFAULT '0',
+ `f_int1` int(11) NOT NULL DEFAULT 0,
`f_int2` bigint(20) NOT NULL,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL,
PRIMARY KEY (`f_int2`,`f_int1`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (f_int1)
-SUBPARTITION BY KEY (f_int1)
-(PARTITION part1 VALUES LESS THAN (0)
- (SUBPARTITION subpart11 ENGINE = TokuDB,
- SUBPARTITION subpart12 ENGINE = TokuDB),
- PARTITION part2 VALUES LESS THAN (5)
- (SUBPARTITION subpart21 ENGINE = TokuDB,
- SUBPARTITION subpart22 ENGINE = TokuDB),
- PARTITION part3 VALUES LESS THAN (10)
- (SUBPARTITION subpart31 ENGINE = TokuDB,
- SUBPARTITION subpart32 ENGINE = TokuDB),
- PARTITION part4 VALUES LESS THAN (2147483646)
- (SUBPARTITION subpart41 ENGINE = TokuDB,
- SUBPARTITION subpart42 ENGINE = TokuDB)) */
+ PARTITION BY RANGE (`f_int1`)
+SUBPARTITION BY KEY (`f_int1`)
+(PARTITION `part1` VALUES LESS THAN (0)
+ (SUBPARTITION `subpart11` ENGINE = TokuDB,
+ SUBPARTITION `subpart12` ENGINE = TokuDB),
+ PARTITION `part2` VALUES LESS THAN (5)
+ (SUBPARTITION `subpart21` ENGINE = TokuDB,
+ SUBPARTITION `subpart22` ENGINE = TokuDB),
+ PARTITION `part3` VALUES LESS THAN (10)
+ (SUBPARTITION `subpart31` ENGINE = TokuDB,
+ SUBPARTITION `subpart32` ENGINE = TokuDB),
+ PARTITION `part4` VALUES LESS THAN (2147483646)
+ (SUBPARTITION `subpart41` ENGINE = TokuDB,
+ SUBPARTITION `subpart42` ENGINE = TokuDB))
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -10852,27 +10852,27 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) NOT NULL DEFAULT '0',
+ `f_int1` int(11) NOT NULL DEFAULT 0,
`f_int2` bigint(20) NOT NULL,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL,
PRIMARY KEY (`f_int2`,`f_int1`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (ABS(MOD(f_int1,3)))
-SUBPARTITION BY HASH (f_int1 + 1)
-(PARTITION part1 VALUES IN (0)
- (SUBPARTITION sp11 ENGINE = TokuDB,
- SUBPARTITION sp12 ENGINE = TokuDB),
- PARTITION part2 VALUES IN (1)
- (SUBPARTITION sp21 ENGINE = TokuDB,
- SUBPARTITION sp22 ENGINE = TokuDB),
- PARTITION part3 VALUES IN (2)
- (SUBPARTITION sp31 ENGINE = TokuDB,
- SUBPARTITION sp32 ENGINE = TokuDB),
- PARTITION part4 VALUES IN (NULL)
- (SUBPARTITION sp41 ENGINE = TokuDB,
- SUBPARTITION sp42 ENGINE = TokuDB)) */
+ PARTITION BY LIST (abs(`f_int1` % 3))
+SUBPARTITION BY HASH (`f_int1` + 1)
+(PARTITION `part1` VALUES IN (0)
+ (SUBPARTITION `sp11` ENGINE = TokuDB,
+ SUBPARTITION `sp12` ENGINE = TokuDB),
+ PARTITION `part2` VALUES IN (1)
+ (SUBPARTITION `sp21` ENGINE = TokuDB,
+ SUBPARTITION `sp22` ENGINE = TokuDB),
+ PARTITION `part3` VALUES IN (2)
+ (SUBPARTITION `sp31` ENGINE = TokuDB,
+ SUBPARTITION `sp32` ENGINE = TokuDB),
+ PARTITION `part4` VALUES IN (NULL)
+ (SUBPARTITION `sp41` ENGINE = TokuDB,
+ SUBPARTITION `sp42` ENGINE = TokuDB))
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -11362,19 +11362,19 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) NOT NULL DEFAULT '0',
+ `f_int1` int(11) NOT NULL DEFAULT 0,
`f_int2` bigint(20) NOT NULL,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL,
PRIMARY KEY (`f_int2`,`f_int1`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (ABS(MOD(f_int1,2)))
-SUBPARTITION BY KEY (f_int1)
+ PARTITION BY LIST (abs(`f_int1` % 2))
+SUBPARTITION BY KEY (`f_int1`)
SUBPARTITIONS 3
-(PARTITION part1 VALUES IN (0) ENGINE = TokuDB,
- PARTITION part2 VALUES IN (1) ENGINE = TokuDB,
- PARTITION part3 VALUES IN (NULL) ENGINE = TokuDB) */
+(PARTITION `part1` VALUES IN (0) ENGINE = TokuDB,
+ PARTITION `part2` VALUES IN (1) ENGINE = TokuDB,
+ PARTITION `part3` VALUES IN (NULL) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -11862,15 +11862,15 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
`f_int2` bigint(20) DEFAULT NULL,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL,
UNIQUE KEY `uidx1` (`f_int1`,`f_int2`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY HASH (f_int1)
-PARTITIONS 2 */
+ PARTITION BY HASH (`f_int1`)
+PARTITIONS 2
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -12370,15 +12370,15 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
`f_int2` bigint(20) DEFAULT NULL,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL,
UNIQUE KEY `uidx1` (`f_int1`,`f_int2`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY KEY (f_int1)
-PARTITIONS 5 */
+ PARTITION BY KEY (`f_int1`)
+PARTITIONS 5
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -12886,22 +12886,22 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
`f_int2` bigint(20) DEFAULT NULL,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL,
UNIQUE KEY `uidx1` (`f_int1`,`f_int2`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (MOD(f_int1,4))
-(PARTITION part_3 VALUES IN (-3) ENGINE = TokuDB,
- PARTITION part_2 VALUES IN (-2) ENGINE = TokuDB,
- PARTITION part_1 VALUES IN (-1) ENGINE = TokuDB,
- PARTITION part_N VALUES IN (NULL) ENGINE = TokuDB,
- PARTITION part0 VALUES IN (0) ENGINE = TokuDB,
- PARTITION part1 VALUES IN (1) ENGINE = TokuDB,
- PARTITION part2 VALUES IN (2) ENGINE = TokuDB,
- PARTITION part3 VALUES IN (3) ENGINE = TokuDB) */
+ PARTITION BY LIST (`f_int1` % 4)
+(PARTITION `part_3` VALUES IN (-3) ENGINE = TokuDB,
+ PARTITION `part_2` VALUES IN (-2) ENGINE = TokuDB,
+ PARTITION `part_1` VALUES IN (-1) ENGINE = TokuDB,
+ PARTITION `part_N` VALUES IN (NULL) ENGINE = TokuDB,
+ PARTITION `part0` VALUES IN (0) ENGINE = TokuDB,
+ PARTITION `part1` VALUES IN (1) ENGINE = TokuDB,
+ PARTITION `part2` VALUES IN (2) ENGINE = TokuDB,
+ PARTITION `part3` VALUES IN (3) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -13407,20 +13407,20 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
`f_int2` bigint(20) DEFAULT NULL,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL,
UNIQUE KEY `uidx1` (`f_int1`,`f_int2`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (f_int1)
-(PARTITION parta VALUES LESS THAN (0) ENGINE = TokuDB,
- PARTITION partb VALUES LESS THAN (5) ENGINE = TokuDB,
- PARTITION partc VALUES LESS THAN (10) ENGINE = TokuDB,
- PARTITION partd VALUES LESS THAN (15) ENGINE = TokuDB,
- PARTITION parte VALUES LESS THAN (20) ENGINE = TokuDB,
- PARTITION partf VALUES LESS THAN (2147483646) ENGINE = TokuDB) */
+ PARTITION BY RANGE (`f_int1`)
+(PARTITION `parta` VALUES LESS THAN (0) ENGINE = TokuDB,
+ PARTITION `partb` VALUES LESS THAN (5) ENGINE = TokuDB,
+ PARTITION `partc` VALUES LESS THAN (10) ENGINE = TokuDB,
+ PARTITION `partd` VALUES LESS THAN (15) ENGINE = TokuDB,
+ PARTITION `parte` VALUES LESS THAN (20) ENGINE = TokuDB,
+ PARTITION `partf` VALUES LESS THAN (2147483646) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -13922,20 +13922,20 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
`f_int2` bigint(20) DEFAULT NULL,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL,
UNIQUE KEY `uidx1` (`f_int1`,`f_int2`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (f_int1 DIV 2)
-SUBPARTITION BY HASH (f_int1)
+ PARTITION BY RANGE (`f_int1` DIV 2)
+SUBPARTITION BY HASH (`f_int1`)
SUBPARTITIONS 2
-(PARTITION parta VALUES LESS THAN (0) ENGINE = TokuDB,
- PARTITION partb VALUES LESS THAN (5) ENGINE = TokuDB,
- PARTITION partc VALUES LESS THAN (10) ENGINE = TokuDB,
- PARTITION partd VALUES LESS THAN (2147483646) ENGINE = TokuDB) */
+(PARTITION `parta` VALUES LESS THAN (0) ENGINE = TokuDB,
+ PARTITION `partb` VALUES LESS THAN (5) ENGINE = TokuDB,
+ PARTITION `partc` VALUES LESS THAN (10) ENGINE = TokuDB,
+ PARTITION `partd` VALUES LESS THAN (2147483646) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -14443,27 +14443,27 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
`f_int2` bigint(20) DEFAULT NULL,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL,
UNIQUE KEY `uidx1` (`f_int1`,`f_int2`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (f_int1)
-SUBPARTITION BY KEY (f_int1)
-(PARTITION part1 VALUES LESS THAN (0)
- (SUBPARTITION subpart11 ENGINE = TokuDB,
- SUBPARTITION subpart12 ENGINE = TokuDB),
- PARTITION part2 VALUES LESS THAN (5)
- (SUBPARTITION subpart21 ENGINE = TokuDB,
- SUBPARTITION subpart22 ENGINE = TokuDB),
- PARTITION part3 VALUES LESS THAN (10)
- (SUBPARTITION subpart31 ENGINE = TokuDB,
- SUBPARTITION subpart32 ENGINE = TokuDB),
- PARTITION part4 VALUES LESS THAN (2147483646)
- (SUBPARTITION subpart41 ENGINE = TokuDB,
- SUBPARTITION subpart42 ENGINE = TokuDB)) */
+ PARTITION BY RANGE (`f_int1`)
+SUBPARTITION BY KEY (`f_int1`)
+(PARTITION `part1` VALUES LESS THAN (0)
+ (SUBPARTITION `subpart11` ENGINE = TokuDB,
+ SUBPARTITION `subpart12` ENGINE = TokuDB),
+ PARTITION `part2` VALUES LESS THAN (5)
+ (SUBPARTITION `subpart21` ENGINE = TokuDB,
+ SUBPARTITION `subpart22` ENGINE = TokuDB),
+ PARTITION `part3` VALUES LESS THAN (10)
+ (SUBPARTITION `subpart31` ENGINE = TokuDB,
+ SUBPARTITION `subpart32` ENGINE = TokuDB),
+ PARTITION `part4` VALUES LESS THAN (2147483646)
+ (SUBPARTITION `subpart41` ENGINE = TokuDB,
+ SUBPARTITION `subpart42` ENGINE = TokuDB))
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -14969,27 +14969,27 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
`f_int2` bigint(20) DEFAULT NULL,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL,
UNIQUE KEY `uidx1` (`f_int1`,`f_int2`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (ABS(MOD(f_int1,3)))
-SUBPARTITION BY HASH (f_int1 + 1)
-(PARTITION part1 VALUES IN (0)
- (SUBPARTITION sp11 ENGINE = TokuDB,
- SUBPARTITION sp12 ENGINE = TokuDB),
- PARTITION part2 VALUES IN (1)
- (SUBPARTITION sp21 ENGINE = TokuDB,
- SUBPARTITION sp22 ENGINE = TokuDB),
- PARTITION part3 VALUES IN (2)
- (SUBPARTITION sp31 ENGINE = TokuDB,
- SUBPARTITION sp32 ENGINE = TokuDB),
- PARTITION part4 VALUES IN (NULL)
- (SUBPARTITION sp41 ENGINE = TokuDB,
- SUBPARTITION sp42 ENGINE = TokuDB)) */
+ PARTITION BY LIST (abs(`f_int1` % 3))
+SUBPARTITION BY HASH (`f_int1` + 1)
+(PARTITION `part1` VALUES IN (0)
+ (SUBPARTITION `sp11` ENGINE = TokuDB,
+ SUBPARTITION `sp12` ENGINE = TokuDB),
+ PARTITION `part2` VALUES IN (1)
+ (SUBPARTITION `sp21` ENGINE = TokuDB,
+ SUBPARTITION `sp22` ENGINE = TokuDB),
+ PARTITION `part3` VALUES IN (2)
+ (SUBPARTITION `sp31` ENGINE = TokuDB,
+ SUBPARTITION `sp32` ENGINE = TokuDB),
+ PARTITION `part4` VALUES IN (NULL)
+ (SUBPARTITION `sp41` ENGINE = TokuDB,
+ SUBPARTITION `sp42` ENGINE = TokuDB))
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -15493,19 +15493,19 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
`f_int2` bigint(20) DEFAULT NULL,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL,
UNIQUE KEY `uidx1` (`f_int1`,`f_int2`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (ABS(MOD(f_int1,2)))
-SUBPARTITION BY KEY (f_int1)
+ PARTITION BY LIST (abs(`f_int1` % 2))
+SUBPARTITION BY KEY (`f_int1`)
SUBPARTITIONS 3
-(PARTITION part1 VALUES IN (0) ENGINE = TokuDB,
- PARTITION part2 VALUES IN (1) ENGINE = TokuDB,
- PARTITION part3 VALUES IN (NULL) ENGINE = TokuDB) */
+(PARTITION `part1` VALUES IN (0) ENGINE = TokuDB,
+ PARTITION `part2` VALUES IN (1) ENGINE = TokuDB,
+ PARTITION `part3` VALUES IN (NULL) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -16006,15 +16006,15 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
`f_int2` bigint(20) DEFAULT NULL,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL,
UNIQUE KEY `uidx1` (`f_int2`,`f_int1`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY HASH (f_int1)
-PARTITIONS 2 */
+ PARTITION BY HASH (`f_int1`)
+PARTITIONS 2
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -16514,15 +16514,15 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
`f_int2` bigint(20) DEFAULT NULL,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL,
UNIQUE KEY `uidx1` (`f_int2`,`f_int1`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY KEY (f_int1)
-PARTITIONS 5 */
+ PARTITION BY KEY (`f_int1`)
+PARTITIONS 5
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -17030,22 +17030,22 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
`f_int2` bigint(20) DEFAULT NULL,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL,
UNIQUE KEY `uidx1` (`f_int2`,`f_int1`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (MOD(f_int1,4))
-(PARTITION part_3 VALUES IN (-3) ENGINE = TokuDB,
- PARTITION part_2 VALUES IN (-2) ENGINE = TokuDB,
- PARTITION part_1 VALUES IN (-1) ENGINE = TokuDB,
- PARTITION part_N VALUES IN (NULL) ENGINE = TokuDB,
- PARTITION part0 VALUES IN (0) ENGINE = TokuDB,
- PARTITION part1 VALUES IN (1) ENGINE = TokuDB,
- PARTITION part2 VALUES IN (2) ENGINE = TokuDB,
- PARTITION part3 VALUES IN (3) ENGINE = TokuDB) */
+ PARTITION BY LIST (`f_int1` % 4)
+(PARTITION `part_3` VALUES IN (-3) ENGINE = TokuDB,
+ PARTITION `part_2` VALUES IN (-2) ENGINE = TokuDB,
+ PARTITION `part_1` VALUES IN (-1) ENGINE = TokuDB,
+ PARTITION `part_N` VALUES IN (NULL) ENGINE = TokuDB,
+ PARTITION `part0` VALUES IN (0) ENGINE = TokuDB,
+ PARTITION `part1` VALUES IN (1) ENGINE = TokuDB,
+ PARTITION `part2` VALUES IN (2) ENGINE = TokuDB,
+ PARTITION `part3` VALUES IN (3) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -17551,20 +17551,20 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
`f_int2` bigint(20) DEFAULT NULL,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL,
UNIQUE KEY `uidx1` (`f_int2`,`f_int1`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (f_int1)
-(PARTITION parta VALUES LESS THAN (0) ENGINE = TokuDB,
- PARTITION partb VALUES LESS THAN (5) ENGINE = TokuDB,
- PARTITION partc VALUES LESS THAN (10) ENGINE = TokuDB,
- PARTITION partd VALUES LESS THAN (15) ENGINE = TokuDB,
- PARTITION parte VALUES LESS THAN (20) ENGINE = TokuDB,
- PARTITION partf VALUES LESS THAN (2147483646) ENGINE = TokuDB) */
+ PARTITION BY RANGE (`f_int1`)
+(PARTITION `parta` VALUES LESS THAN (0) ENGINE = TokuDB,
+ PARTITION `partb` VALUES LESS THAN (5) ENGINE = TokuDB,
+ PARTITION `partc` VALUES LESS THAN (10) ENGINE = TokuDB,
+ PARTITION `partd` VALUES LESS THAN (15) ENGINE = TokuDB,
+ PARTITION `parte` VALUES LESS THAN (20) ENGINE = TokuDB,
+ PARTITION `partf` VALUES LESS THAN (2147483646) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -18066,20 +18066,20 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
`f_int2` bigint(20) DEFAULT NULL,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL,
UNIQUE KEY `uidx1` (`f_int2`,`f_int1`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (f_int1 DIV 2)
-SUBPARTITION BY HASH (f_int1)
+ PARTITION BY RANGE (`f_int1` DIV 2)
+SUBPARTITION BY HASH (`f_int1`)
SUBPARTITIONS 2
-(PARTITION parta VALUES LESS THAN (0) ENGINE = TokuDB,
- PARTITION partb VALUES LESS THAN (5) ENGINE = TokuDB,
- PARTITION partc VALUES LESS THAN (10) ENGINE = TokuDB,
- PARTITION partd VALUES LESS THAN (2147483646) ENGINE = TokuDB) */
+(PARTITION `parta` VALUES LESS THAN (0) ENGINE = TokuDB,
+ PARTITION `partb` VALUES LESS THAN (5) ENGINE = TokuDB,
+ PARTITION `partc` VALUES LESS THAN (10) ENGINE = TokuDB,
+ PARTITION `partd` VALUES LESS THAN (2147483646) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -18587,27 +18587,27 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
`f_int2` bigint(20) DEFAULT NULL,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL,
UNIQUE KEY `uidx1` (`f_int2`,`f_int1`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (f_int1)
-SUBPARTITION BY KEY (f_int1)
-(PARTITION part1 VALUES LESS THAN (0)
- (SUBPARTITION subpart11 ENGINE = TokuDB,
- SUBPARTITION subpart12 ENGINE = TokuDB),
- PARTITION part2 VALUES LESS THAN (5)
- (SUBPARTITION subpart21 ENGINE = TokuDB,
- SUBPARTITION subpart22 ENGINE = TokuDB),
- PARTITION part3 VALUES LESS THAN (10)
- (SUBPARTITION subpart31 ENGINE = TokuDB,
- SUBPARTITION subpart32 ENGINE = TokuDB),
- PARTITION part4 VALUES LESS THAN (2147483646)
- (SUBPARTITION subpart41 ENGINE = TokuDB,
- SUBPARTITION subpart42 ENGINE = TokuDB)) */
+ PARTITION BY RANGE (`f_int1`)
+SUBPARTITION BY KEY (`f_int1`)
+(PARTITION `part1` VALUES LESS THAN (0)
+ (SUBPARTITION `subpart11` ENGINE = TokuDB,
+ SUBPARTITION `subpart12` ENGINE = TokuDB),
+ PARTITION `part2` VALUES LESS THAN (5)
+ (SUBPARTITION `subpart21` ENGINE = TokuDB,
+ SUBPARTITION `subpart22` ENGINE = TokuDB),
+ PARTITION `part3` VALUES LESS THAN (10)
+ (SUBPARTITION `subpart31` ENGINE = TokuDB,
+ SUBPARTITION `subpart32` ENGINE = TokuDB),
+ PARTITION `part4` VALUES LESS THAN (2147483646)
+ (SUBPARTITION `subpart41` ENGINE = TokuDB,
+ SUBPARTITION `subpart42` ENGINE = TokuDB))
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -19113,27 +19113,27 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
`f_int2` bigint(20) DEFAULT NULL,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL,
UNIQUE KEY `uidx1` (`f_int2`,`f_int1`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (ABS(MOD(f_int1,3)))
-SUBPARTITION BY HASH (f_int1 + 1)
-(PARTITION part1 VALUES IN (0)
- (SUBPARTITION sp11 ENGINE = TokuDB,
- SUBPARTITION sp12 ENGINE = TokuDB),
- PARTITION part2 VALUES IN (1)
- (SUBPARTITION sp21 ENGINE = TokuDB,
- SUBPARTITION sp22 ENGINE = TokuDB),
- PARTITION part3 VALUES IN (2)
- (SUBPARTITION sp31 ENGINE = TokuDB,
- SUBPARTITION sp32 ENGINE = TokuDB),
- PARTITION part4 VALUES IN (NULL)
- (SUBPARTITION sp41 ENGINE = TokuDB,
- SUBPARTITION sp42 ENGINE = TokuDB)) */
+ PARTITION BY LIST (abs(`f_int1` % 3))
+SUBPARTITION BY HASH (`f_int1` + 1)
+(PARTITION `part1` VALUES IN (0)
+ (SUBPARTITION `sp11` ENGINE = TokuDB,
+ SUBPARTITION `sp12` ENGINE = TokuDB),
+ PARTITION `part2` VALUES IN (1)
+ (SUBPARTITION `sp21` ENGINE = TokuDB,
+ SUBPARTITION `sp22` ENGINE = TokuDB),
+ PARTITION `part3` VALUES IN (2)
+ (SUBPARTITION `sp31` ENGINE = TokuDB,
+ SUBPARTITION `sp32` ENGINE = TokuDB),
+ PARTITION `part4` VALUES IN (NULL)
+ (SUBPARTITION `sp41` ENGINE = TokuDB,
+ SUBPARTITION `sp42` ENGINE = TokuDB))
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -19637,19 +19637,19 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
`f_int2` bigint(20) DEFAULT NULL,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL,
UNIQUE KEY `uidx1` (`f_int2`,`f_int1`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (ABS(MOD(f_int1,2)))
-SUBPARTITION BY KEY (f_int1)
+ PARTITION BY LIST (abs(`f_int1` % 2))
+SUBPARTITION BY KEY (`f_int1`)
SUBPARTITIONS 3
-(PARTITION part1 VALUES IN (0) ENGINE = TokuDB,
- PARTITION part2 VALUES IN (1) ENGINE = TokuDB,
- PARTITION part3 VALUES IN (NULL) ENGINE = TokuDB) */
+(PARTITION `part1` VALUES IN (0) ENGINE = TokuDB,
+ PARTITION `part2` VALUES IN (1) ENGINE = TokuDB,
+ PARTITION `part3` VALUES IN (NULL) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
diff --git a/storage/tokudb/mysql-test/tokudb_parts/r/partition_alter2_1_2_tokudb.result b/storage/tokudb/mysql-test/tokudb_parts/r/partition_alter2_1_2_tokudb.result
index f6e98be3c98..f8c7eadb44c 100644
--- a/storage/tokudb/mysql-test/tokudb_parts/r/partition_alter2_1_2_tokudb.result
+++ b/storage/tokudb/mysql-test/tokudb_parts/r/partition_alter2_1_2_tokudb.result
@@ -70,8 +70,8 @@ t1 CREATE TABLE `t1` (
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY HASH (f_int1 + f_int2)
-PARTITIONS 2 */
+ PARTITION BY HASH (`f_int1` + `f_int2`)
+PARTITIONS 2
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -526,8 +526,8 @@ t1 CREATE TABLE `t1` (
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY KEY (f_int1,f_int2)
-PARTITIONS 5 */
+ PARTITION BY KEY (`f_int1`,`f_int2`)
+PARTITIONS 5
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -990,15 +990,15 @@ t1 CREATE TABLE `t1` (
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (MOD(f_int1 + f_int2,4))
-(PARTITION part_3 VALUES IN (-3) ENGINE = TokuDB,
- PARTITION part_2 VALUES IN (-2) ENGINE = TokuDB,
- PARTITION part_1 VALUES IN (-1) ENGINE = TokuDB,
- PARTITION part_N VALUES IN (NULL) ENGINE = TokuDB,
- PARTITION part0 VALUES IN (0) ENGINE = TokuDB,
- PARTITION part1 VALUES IN (1) ENGINE = TokuDB,
- PARTITION part2 VALUES IN (2) ENGINE = TokuDB,
- PARTITION part3 VALUES IN (3) ENGINE = TokuDB) */
+ PARTITION BY LIST ((`f_int1` + `f_int2`) % 4)
+(PARTITION `part_3` VALUES IN (-3) ENGINE = TokuDB,
+ PARTITION `part_2` VALUES IN (-2) ENGINE = TokuDB,
+ PARTITION `part_1` VALUES IN (-1) ENGINE = TokuDB,
+ PARTITION `part_N` VALUES IN (NULL) ENGINE = TokuDB,
+ PARTITION `part0` VALUES IN (0) ENGINE = TokuDB,
+ PARTITION `part1` VALUES IN (1) ENGINE = TokuDB,
+ PARTITION `part2` VALUES IN (2) ENGINE = TokuDB,
+ PARTITION `part3` VALUES IN (3) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -1459,13 +1459,13 @@ t1 CREATE TABLE `t1` (
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE ((f_int1 + f_int2) DIV 2)
-(PARTITION parta VALUES LESS THAN (0) ENGINE = TokuDB,
- PARTITION partb VALUES LESS THAN (5) ENGINE = TokuDB,
- PARTITION partc VALUES LESS THAN (10) ENGINE = TokuDB,
- PARTITION partd VALUES LESS THAN (15) ENGINE = TokuDB,
- PARTITION parte VALUES LESS THAN (20) ENGINE = TokuDB,
- PARTITION partf VALUES LESS THAN (2147483646) ENGINE = TokuDB) */
+ PARTITION BY RANGE ((`f_int1` + `f_int2`) DIV 2)
+(PARTITION `parta` VALUES LESS THAN (0) ENGINE = TokuDB,
+ PARTITION `partb` VALUES LESS THAN (5) ENGINE = TokuDB,
+ PARTITION `partc` VALUES LESS THAN (10) ENGINE = TokuDB,
+ PARTITION `partd` VALUES LESS THAN (15) ENGINE = TokuDB,
+ PARTITION `parte` VALUES LESS THAN (20) ENGINE = TokuDB,
+ PARTITION `partf` VALUES LESS THAN (2147483646) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -1922,13 +1922,13 @@ t1 CREATE TABLE `t1` (
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (f_int1)
-SUBPARTITION BY HASH (f_int2)
+ PARTITION BY RANGE (`f_int1`)
+SUBPARTITION BY HASH (`f_int2`)
SUBPARTITIONS 2
-(PARTITION parta VALUES LESS THAN (0) ENGINE = TokuDB,
- PARTITION partb VALUES LESS THAN (5) ENGINE = TokuDB,
- PARTITION partc VALUES LESS THAN (10) ENGINE = TokuDB,
- PARTITION partd VALUES LESS THAN (2147483646) ENGINE = TokuDB) */
+(PARTITION `parta` VALUES LESS THAN (0) ENGINE = TokuDB,
+ PARTITION `partb` VALUES LESS THAN (5) ENGINE = TokuDB,
+ PARTITION `partc` VALUES LESS THAN (10) ENGINE = TokuDB,
+ PARTITION `partd` VALUES LESS THAN (2147483646) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -2389,20 +2389,20 @@ t1 CREATE TABLE `t1` (
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (f_int1)
-SUBPARTITION BY KEY (f_int2)
-(PARTITION part1 VALUES LESS THAN (0)
- (SUBPARTITION subpart11 ENGINE = TokuDB,
- SUBPARTITION subpart12 ENGINE = TokuDB),
- PARTITION part2 VALUES LESS THAN (5)
- (SUBPARTITION subpart21 ENGINE = TokuDB,
- SUBPARTITION subpart22 ENGINE = TokuDB),
- PARTITION part3 VALUES LESS THAN (10)
- (SUBPARTITION subpart31 ENGINE = TokuDB,
- SUBPARTITION subpart32 ENGINE = TokuDB),
- PARTITION part4 VALUES LESS THAN (2147483646)
- (SUBPARTITION subpart41 ENGINE = TokuDB,
- SUBPARTITION subpart42 ENGINE = TokuDB)) */
+ PARTITION BY RANGE (`f_int1`)
+SUBPARTITION BY KEY (`f_int2`)
+(PARTITION `part1` VALUES LESS THAN (0)
+ (SUBPARTITION `subpart11` ENGINE = TokuDB,
+ SUBPARTITION `subpart12` ENGINE = TokuDB),
+ PARTITION `part2` VALUES LESS THAN (5)
+ (SUBPARTITION `subpart21` ENGINE = TokuDB,
+ SUBPARTITION `subpart22` ENGINE = TokuDB),
+ PARTITION `part3` VALUES LESS THAN (10)
+ (SUBPARTITION `subpart31` ENGINE = TokuDB,
+ SUBPARTITION `subpart32` ENGINE = TokuDB),
+ PARTITION `part4` VALUES LESS THAN (2147483646)
+ (SUBPARTITION `subpart41` ENGINE = TokuDB,
+ SUBPARTITION `subpart42` ENGINE = TokuDB))
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -2863,20 +2863,20 @@ t1 CREATE TABLE `t1` (
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (ABS(MOD(f_int1,3)))
-SUBPARTITION BY HASH (f_int2 + 1)
-(PARTITION part1 VALUES IN (0)
- (SUBPARTITION sp11 ENGINE = TokuDB,
- SUBPARTITION sp12 ENGINE = TokuDB),
- PARTITION part2 VALUES IN (1)
- (SUBPARTITION sp21 ENGINE = TokuDB,
- SUBPARTITION sp22 ENGINE = TokuDB),
- PARTITION part3 VALUES IN (2)
- (SUBPARTITION sp31 ENGINE = TokuDB,
- SUBPARTITION sp32 ENGINE = TokuDB),
- PARTITION part4 VALUES IN (NULL)
- (SUBPARTITION sp41 ENGINE = TokuDB,
- SUBPARTITION sp42 ENGINE = TokuDB)) */
+ PARTITION BY LIST (abs(`f_int1` % 3))
+SUBPARTITION BY HASH (`f_int2` + 1)
+(PARTITION `part1` VALUES IN (0)
+ (SUBPARTITION `sp11` ENGINE = TokuDB,
+ SUBPARTITION `sp12` ENGINE = TokuDB),
+ PARTITION `part2` VALUES IN (1)
+ (SUBPARTITION `sp21` ENGINE = TokuDB,
+ SUBPARTITION `sp22` ENGINE = TokuDB),
+ PARTITION `part3` VALUES IN (2)
+ (SUBPARTITION `sp31` ENGINE = TokuDB,
+ SUBPARTITION `sp32` ENGINE = TokuDB),
+ PARTITION `part4` VALUES IN (NULL)
+ (SUBPARTITION `sp41` ENGINE = TokuDB,
+ SUBPARTITION `sp42` ENGINE = TokuDB))
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -3335,12 +3335,12 @@ t1 CREATE TABLE `t1` (
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (ABS(MOD(f_int1,2)))
-SUBPARTITION BY KEY (f_int2)
+ PARTITION BY LIST (abs(`f_int1` % 2))
+SUBPARTITION BY KEY (`f_int2`)
SUBPARTITIONS 3
-(PARTITION part1 VALUES IN (0) ENGINE = TokuDB,
- PARTITION part2 VALUES IN (1) ENGINE = TokuDB,
- PARTITION part3 VALUES IN (NULL) ENGINE = TokuDB) */
+(PARTITION `part1` VALUES IN (0) ENGINE = TokuDB,
+ PARTITION `part2` VALUES IN (1) ENGINE = TokuDB,
+ PARTITION `part3` VALUES IN (NULL) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -3798,8 +3798,8 @@ t1 CREATE TABLE `t1` (
`f_charbig` varchar(1000) DEFAULT NULL,
PRIMARY KEY (`f_int1`,`f_int2`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY HASH (f_int1 + f_int2)
-PARTITIONS 2 */
+ PARTITION BY HASH (`f_int1` + `f_int2`)
+PARTITIONS 2
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -4293,8 +4293,8 @@ t1 CREATE TABLE `t1` (
`f_charbig` varchar(1000) DEFAULT NULL,
PRIMARY KEY (`f_int1`,`f_int2`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY KEY (f_int1,f_int2)
-PARTITIONS 5 */
+ PARTITION BY KEY (`f_int1`,`f_int2`)
+PARTITIONS 5
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -4796,15 +4796,15 @@ t1 CREATE TABLE `t1` (
`f_charbig` varchar(1000) DEFAULT NULL,
PRIMARY KEY (`f_int1`,`f_int2`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (MOD(f_int1 + f_int2,4))
-(PARTITION part_3 VALUES IN (-3) ENGINE = TokuDB,
- PARTITION part_2 VALUES IN (-2) ENGINE = TokuDB,
- PARTITION part_1 VALUES IN (-1) ENGINE = TokuDB,
- PARTITION part_N VALUES IN (NULL) ENGINE = TokuDB,
- PARTITION part0 VALUES IN (0) ENGINE = TokuDB,
- PARTITION part1 VALUES IN (1) ENGINE = TokuDB,
- PARTITION part2 VALUES IN (2) ENGINE = TokuDB,
- PARTITION part3 VALUES IN (3) ENGINE = TokuDB) */
+ PARTITION BY LIST ((`f_int1` + `f_int2`) % 4)
+(PARTITION `part_3` VALUES IN (-3) ENGINE = TokuDB,
+ PARTITION `part_2` VALUES IN (-2) ENGINE = TokuDB,
+ PARTITION `part_1` VALUES IN (-1) ENGINE = TokuDB,
+ PARTITION `part_N` VALUES IN (NULL) ENGINE = TokuDB,
+ PARTITION `part0` VALUES IN (0) ENGINE = TokuDB,
+ PARTITION `part1` VALUES IN (1) ENGINE = TokuDB,
+ PARTITION `part2` VALUES IN (2) ENGINE = TokuDB,
+ PARTITION `part3` VALUES IN (3) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -5304,13 +5304,13 @@ t1 CREATE TABLE `t1` (
`f_charbig` varchar(1000) DEFAULT NULL,
PRIMARY KEY (`f_int1`,`f_int2`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE ((f_int1 + f_int2) DIV 2)
-(PARTITION parta VALUES LESS THAN (0) ENGINE = TokuDB,
- PARTITION partb VALUES LESS THAN (5) ENGINE = TokuDB,
- PARTITION partc VALUES LESS THAN (10) ENGINE = TokuDB,
- PARTITION partd VALUES LESS THAN (15) ENGINE = TokuDB,
- PARTITION parte VALUES LESS THAN (20) ENGINE = TokuDB,
- PARTITION partf VALUES LESS THAN (2147483646) ENGINE = TokuDB) */
+ PARTITION BY RANGE ((`f_int1` + `f_int2`) DIV 2)
+(PARTITION `parta` VALUES LESS THAN (0) ENGINE = TokuDB,
+ PARTITION `partb` VALUES LESS THAN (5) ENGINE = TokuDB,
+ PARTITION `partc` VALUES LESS THAN (10) ENGINE = TokuDB,
+ PARTITION `partd` VALUES LESS THAN (15) ENGINE = TokuDB,
+ PARTITION `parte` VALUES LESS THAN (20) ENGINE = TokuDB,
+ PARTITION `partf` VALUES LESS THAN (2147483646) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -5806,13 +5806,13 @@ t1 CREATE TABLE `t1` (
`f_charbig` varchar(1000) DEFAULT NULL,
PRIMARY KEY (`f_int1`,`f_int2`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (f_int1)
-SUBPARTITION BY HASH (f_int2)
+ PARTITION BY RANGE (`f_int1`)
+SUBPARTITION BY HASH (`f_int2`)
SUBPARTITIONS 2
-(PARTITION parta VALUES LESS THAN (0) ENGINE = TokuDB,
- PARTITION partb VALUES LESS THAN (5) ENGINE = TokuDB,
- PARTITION partc VALUES LESS THAN (10) ENGINE = TokuDB,
- PARTITION partd VALUES LESS THAN (2147483646) ENGINE = TokuDB) */
+(PARTITION `parta` VALUES LESS THAN (0) ENGINE = TokuDB,
+ PARTITION `partb` VALUES LESS THAN (5) ENGINE = TokuDB,
+ PARTITION `partc` VALUES LESS THAN (10) ENGINE = TokuDB,
+ PARTITION `partd` VALUES LESS THAN (2147483646) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -6312,20 +6312,20 @@ t1 CREATE TABLE `t1` (
`f_charbig` varchar(1000) DEFAULT NULL,
PRIMARY KEY (`f_int1`,`f_int2`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (f_int1)
-SUBPARTITION BY KEY (f_int2)
-(PARTITION part1 VALUES LESS THAN (0)
- (SUBPARTITION subpart11 ENGINE = TokuDB,
- SUBPARTITION subpart12 ENGINE = TokuDB),
- PARTITION part2 VALUES LESS THAN (5)
- (SUBPARTITION subpart21 ENGINE = TokuDB,
- SUBPARTITION subpart22 ENGINE = TokuDB),
- PARTITION part3 VALUES LESS THAN (10)
- (SUBPARTITION subpart31 ENGINE = TokuDB,
- SUBPARTITION subpart32 ENGINE = TokuDB),
- PARTITION part4 VALUES LESS THAN (2147483646)
- (SUBPARTITION subpart41 ENGINE = TokuDB,
- SUBPARTITION subpart42 ENGINE = TokuDB)) */
+ PARTITION BY RANGE (`f_int1`)
+SUBPARTITION BY KEY (`f_int2`)
+(PARTITION `part1` VALUES LESS THAN (0)
+ (SUBPARTITION `subpart11` ENGINE = TokuDB,
+ SUBPARTITION `subpart12` ENGINE = TokuDB),
+ PARTITION `part2` VALUES LESS THAN (5)
+ (SUBPARTITION `subpart21` ENGINE = TokuDB,
+ SUBPARTITION `subpart22` ENGINE = TokuDB),
+ PARTITION `part3` VALUES LESS THAN (10)
+ (SUBPARTITION `subpart31` ENGINE = TokuDB,
+ SUBPARTITION `subpart32` ENGINE = TokuDB),
+ PARTITION `part4` VALUES LESS THAN (2147483646)
+ (SUBPARTITION `subpart41` ENGINE = TokuDB,
+ SUBPARTITION `subpart42` ENGINE = TokuDB))
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -6825,20 +6825,20 @@ t1 CREATE TABLE `t1` (
`f_charbig` varchar(1000) DEFAULT NULL,
PRIMARY KEY (`f_int1`,`f_int2`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (ABS(MOD(f_int1,3)))
-SUBPARTITION BY HASH (f_int2 + 1)
-(PARTITION part1 VALUES IN (0)
- (SUBPARTITION sp11 ENGINE = TokuDB,
- SUBPARTITION sp12 ENGINE = TokuDB),
- PARTITION part2 VALUES IN (1)
- (SUBPARTITION sp21 ENGINE = TokuDB,
- SUBPARTITION sp22 ENGINE = TokuDB),
- PARTITION part3 VALUES IN (2)
- (SUBPARTITION sp31 ENGINE = TokuDB,
- SUBPARTITION sp32 ENGINE = TokuDB),
- PARTITION part4 VALUES IN (NULL)
- (SUBPARTITION sp41 ENGINE = TokuDB,
- SUBPARTITION sp42 ENGINE = TokuDB)) */
+ PARTITION BY LIST (abs(`f_int1` % 3))
+SUBPARTITION BY HASH (`f_int2` + 1)
+(PARTITION `part1` VALUES IN (0)
+ (SUBPARTITION `sp11` ENGINE = TokuDB,
+ SUBPARTITION `sp12` ENGINE = TokuDB),
+ PARTITION `part2` VALUES IN (1)
+ (SUBPARTITION `sp21` ENGINE = TokuDB,
+ SUBPARTITION `sp22` ENGINE = TokuDB),
+ PARTITION `part3` VALUES IN (2)
+ (SUBPARTITION `sp31` ENGINE = TokuDB,
+ SUBPARTITION `sp32` ENGINE = TokuDB),
+ PARTITION `part4` VALUES IN (NULL)
+ (SUBPARTITION `sp41` ENGINE = TokuDB,
+ SUBPARTITION `sp42` ENGINE = TokuDB))
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -7336,12 +7336,12 @@ t1 CREATE TABLE `t1` (
`f_charbig` varchar(1000) DEFAULT NULL,
PRIMARY KEY (`f_int1`,`f_int2`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (ABS(MOD(f_int1,2)))
-SUBPARTITION BY KEY (f_int2)
+ PARTITION BY LIST (abs(`f_int1` % 2))
+SUBPARTITION BY KEY (`f_int2`)
SUBPARTITIONS 3
-(PARTITION part1 VALUES IN (0) ENGINE = TokuDB,
- PARTITION part2 VALUES IN (1) ENGINE = TokuDB,
- PARTITION part3 VALUES IN (NULL) ENGINE = TokuDB) */
+(PARTITION `part1` VALUES IN (0) ENGINE = TokuDB,
+ PARTITION `part2` VALUES IN (1) ENGINE = TokuDB,
+ PARTITION `part3` VALUES IN (NULL) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -7836,8 +7836,8 @@ t1 CREATE TABLE `t1` (
`f_charbig` varchar(1000) DEFAULT NULL,
PRIMARY KEY (`f_int2`,`f_int1`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY HASH (f_int1 + f_int2)
-PARTITIONS 2 */
+ PARTITION BY HASH (`f_int1` + `f_int2`)
+PARTITIONS 2
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -8331,8 +8331,8 @@ t1 CREATE TABLE `t1` (
`f_charbig` varchar(1000) DEFAULT NULL,
PRIMARY KEY (`f_int2`,`f_int1`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY KEY (f_int1,f_int2)
-PARTITIONS 5 */
+ PARTITION BY KEY (`f_int1`,`f_int2`)
+PARTITIONS 5
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -8834,15 +8834,15 @@ t1 CREATE TABLE `t1` (
`f_charbig` varchar(1000) DEFAULT NULL,
PRIMARY KEY (`f_int2`,`f_int1`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (MOD(f_int1 + f_int2,4))
-(PARTITION part_3 VALUES IN (-3) ENGINE = TokuDB,
- PARTITION part_2 VALUES IN (-2) ENGINE = TokuDB,
- PARTITION part_1 VALUES IN (-1) ENGINE = TokuDB,
- PARTITION part_N VALUES IN (NULL) ENGINE = TokuDB,
- PARTITION part0 VALUES IN (0) ENGINE = TokuDB,
- PARTITION part1 VALUES IN (1) ENGINE = TokuDB,
- PARTITION part2 VALUES IN (2) ENGINE = TokuDB,
- PARTITION part3 VALUES IN (3) ENGINE = TokuDB) */
+ PARTITION BY LIST ((`f_int1` + `f_int2`) % 4)
+(PARTITION `part_3` VALUES IN (-3) ENGINE = TokuDB,
+ PARTITION `part_2` VALUES IN (-2) ENGINE = TokuDB,
+ PARTITION `part_1` VALUES IN (-1) ENGINE = TokuDB,
+ PARTITION `part_N` VALUES IN (NULL) ENGINE = TokuDB,
+ PARTITION `part0` VALUES IN (0) ENGINE = TokuDB,
+ PARTITION `part1` VALUES IN (1) ENGINE = TokuDB,
+ PARTITION `part2` VALUES IN (2) ENGINE = TokuDB,
+ PARTITION `part3` VALUES IN (3) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -9342,13 +9342,13 @@ t1 CREATE TABLE `t1` (
`f_charbig` varchar(1000) DEFAULT NULL,
PRIMARY KEY (`f_int2`,`f_int1`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE ((f_int1 + f_int2) DIV 2)
-(PARTITION parta VALUES LESS THAN (0) ENGINE = TokuDB,
- PARTITION partb VALUES LESS THAN (5) ENGINE = TokuDB,
- PARTITION partc VALUES LESS THAN (10) ENGINE = TokuDB,
- PARTITION partd VALUES LESS THAN (15) ENGINE = TokuDB,
- PARTITION parte VALUES LESS THAN (20) ENGINE = TokuDB,
- PARTITION partf VALUES LESS THAN (2147483646) ENGINE = TokuDB) */
+ PARTITION BY RANGE ((`f_int1` + `f_int2`) DIV 2)
+(PARTITION `parta` VALUES LESS THAN (0) ENGINE = TokuDB,
+ PARTITION `partb` VALUES LESS THAN (5) ENGINE = TokuDB,
+ PARTITION `partc` VALUES LESS THAN (10) ENGINE = TokuDB,
+ PARTITION `partd` VALUES LESS THAN (15) ENGINE = TokuDB,
+ PARTITION `parte` VALUES LESS THAN (20) ENGINE = TokuDB,
+ PARTITION `partf` VALUES LESS THAN (2147483646) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -9844,13 +9844,13 @@ t1 CREATE TABLE `t1` (
`f_charbig` varchar(1000) DEFAULT NULL,
PRIMARY KEY (`f_int2`,`f_int1`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (f_int1)
-SUBPARTITION BY HASH (f_int2)
+ PARTITION BY RANGE (`f_int1`)
+SUBPARTITION BY HASH (`f_int2`)
SUBPARTITIONS 2
-(PARTITION parta VALUES LESS THAN (0) ENGINE = TokuDB,
- PARTITION partb VALUES LESS THAN (5) ENGINE = TokuDB,
- PARTITION partc VALUES LESS THAN (10) ENGINE = TokuDB,
- PARTITION partd VALUES LESS THAN (2147483646) ENGINE = TokuDB) */
+(PARTITION `parta` VALUES LESS THAN (0) ENGINE = TokuDB,
+ PARTITION `partb` VALUES LESS THAN (5) ENGINE = TokuDB,
+ PARTITION `partc` VALUES LESS THAN (10) ENGINE = TokuDB,
+ PARTITION `partd` VALUES LESS THAN (2147483646) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -10350,20 +10350,20 @@ t1 CREATE TABLE `t1` (
`f_charbig` varchar(1000) DEFAULT NULL,
PRIMARY KEY (`f_int2`,`f_int1`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (f_int1)
-SUBPARTITION BY KEY (f_int2)
-(PARTITION part1 VALUES LESS THAN (0)
- (SUBPARTITION subpart11 ENGINE = TokuDB,
- SUBPARTITION subpart12 ENGINE = TokuDB),
- PARTITION part2 VALUES LESS THAN (5)
- (SUBPARTITION subpart21 ENGINE = TokuDB,
- SUBPARTITION subpart22 ENGINE = TokuDB),
- PARTITION part3 VALUES LESS THAN (10)
- (SUBPARTITION subpart31 ENGINE = TokuDB,
- SUBPARTITION subpart32 ENGINE = TokuDB),
- PARTITION part4 VALUES LESS THAN (2147483646)
- (SUBPARTITION subpart41 ENGINE = TokuDB,
- SUBPARTITION subpart42 ENGINE = TokuDB)) */
+ PARTITION BY RANGE (`f_int1`)
+SUBPARTITION BY KEY (`f_int2`)
+(PARTITION `part1` VALUES LESS THAN (0)
+ (SUBPARTITION `subpart11` ENGINE = TokuDB,
+ SUBPARTITION `subpart12` ENGINE = TokuDB),
+ PARTITION `part2` VALUES LESS THAN (5)
+ (SUBPARTITION `subpart21` ENGINE = TokuDB,
+ SUBPARTITION `subpart22` ENGINE = TokuDB),
+ PARTITION `part3` VALUES LESS THAN (10)
+ (SUBPARTITION `subpart31` ENGINE = TokuDB,
+ SUBPARTITION `subpart32` ENGINE = TokuDB),
+ PARTITION `part4` VALUES LESS THAN (2147483646)
+ (SUBPARTITION `subpart41` ENGINE = TokuDB,
+ SUBPARTITION `subpart42` ENGINE = TokuDB))
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -10863,20 +10863,20 @@ t1 CREATE TABLE `t1` (
`f_charbig` varchar(1000) DEFAULT NULL,
PRIMARY KEY (`f_int2`,`f_int1`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (ABS(MOD(f_int1,3)))
-SUBPARTITION BY HASH (f_int2 + 1)
-(PARTITION part1 VALUES IN (0)
- (SUBPARTITION sp11 ENGINE = TokuDB,
- SUBPARTITION sp12 ENGINE = TokuDB),
- PARTITION part2 VALUES IN (1)
- (SUBPARTITION sp21 ENGINE = TokuDB,
- SUBPARTITION sp22 ENGINE = TokuDB),
- PARTITION part3 VALUES IN (2)
- (SUBPARTITION sp31 ENGINE = TokuDB,
- SUBPARTITION sp32 ENGINE = TokuDB),
- PARTITION part4 VALUES IN (NULL)
- (SUBPARTITION sp41 ENGINE = TokuDB,
- SUBPARTITION sp42 ENGINE = TokuDB)) */
+ PARTITION BY LIST (abs(`f_int1` % 3))
+SUBPARTITION BY HASH (`f_int2` + 1)
+(PARTITION `part1` VALUES IN (0)
+ (SUBPARTITION `sp11` ENGINE = TokuDB,
+ SUBPARTITION `sp12` ENGINE = TokuDB),
+ PARTITION `part2` VALUES IN (1)
+ (SUBPARTITION `sp21` ENGINE = TokuDB,
+ SUBPARTITION `sp22` ENGINE = TokuDB),
+ PARTITION `part3` VALUES IN (2)
+ (SUBPARTITION `sp31` ENGINE = TokuDB,
+ SUBPARTITION `sp32` ENGINE = TokuDB),
+ PARTITION `part4` VALUES IN (NULL)
+ (SUBPARTITION `sp41` ENGINE = TokuDB,
+ SUBPARTITION `sp42` ENGINE = TokuDB))
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -11374,12 +11374,12 @@ t1 CREATE TABLE `t1` (
`f_charbig` varchar(1000) DEFAULT NULL,
PRIMARY KEY (`f_int2`,`f_int1`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (ABS(MOD(f_int1,2)))
-SUBPARTITION BY KEY (f_int2)
+ PARTITION BY LIST (abs(`f_int1` % 2))
+SUBPARTITION BY KEY (`f_int2`)
SUBPARTITIONS 3
-(PARTITION part1 VALUES IN (0) ENGINE = TokuDB,
- PARTITION part2 VALUES IN (1) ENGINE = TokuDB,
- PARTITION part3 VALUES IN (NULL) ENGINE = TokuDB) */
+(PARTITION `part1` VALUES IN (0) ENGINE = TokuDB,
+ PARTITION `part2` VALUES IN (1) ENGINE = TokuDB,
+ PARTITION `part3` VALUES IN (NULL) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -11875,8 +11875,8 @@ t1 CREATE TABLE `t1` (
`f_charbig` varchar(1000) DEFAULT NULL,
UNIQUE KEY `uidx` (`f_int1`,`f_int2`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY HASH (f_int1 + f_int2)
-PARTITIONS 2 */
+ PARTITION BY HASH (`f_int1` + `f_int2`)
+PARTITIONS 2
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -12383,8 +12383,8 @@ t1 CREATE TABLE `t1` (
`f_charbig` varchar(1000) DEFAULT NULL,
UNIQUE KEY `uidx` (`f_int1`,`f_int2`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY KEY (f_int1,f_int2)
-PARTITIONS 5 */
+ PARTITION BY KEY (`f_int1`,`f_int2`)
+PARTITIONS 5
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -12899,15 +12899,15 @@ t1 CREATE TABLE `t1` (
`f_charbig` varchar(1000) DEFAULT NULL,
UNIQUE KEY `uidx` (`f_int1`,`f_int2`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (MOD(f_int1 + f_int2,4))
-(PARTITION part_3 VALUES IN (-3) ENGINE = TokuDB,
- PARTITION part_2 VALUES IN (-2) ENGINE = TokuDB,
- PARTITION part_1 VALUES IN (-1) ENGINE = TokuDB,
- PARTITION part_N VALUES IN (NULL) ENGINE = TokuDB,
- PARTITION part0 VALUES IN (0) ENGINE = TokuDB,
- PARTITION part1 VALUES IN (1) ENGINE = TokuDB,
- PARTITION part2 VALUES IN (2) ENGINE = TokuDB,
- PARTITION part3 VALUES IN (3) ENGINE = TokuDB) */
+ PARTITION BY LIST ((`f_int1` + `f_int2`) % 4)
+(PARTITION `part_3` VALUES IN (-3) ENGINE = TokuDB,
+ PARTITION `part_2` VALUES IN (-2) ENGINE = TokuDB,
+ PARTITION `part_1` VALUES IN (-1) ENGINE = TokuDB,
+ PARTITION `part_N` VALUES IN (NULL) ENGINE = TokuDB,
+ PARTITION `part0` VALUES IN (0) ENGINE = TokuDB,
+ PARTITION `part1` VALUES IN (1) ENGINE = TokuDB,
+ PARTITION `part2` VALUES IN (2) ENGINE = TokuDB,
+ PARTITION `part3` VALUES IN (3) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -13420,13 +13420,13 @@ t1 CREATE TABLE `t1` (
`f_charbig` varchar(1000) DEFAULT NULL,
UNIQUE KEY `uidx` (`f_int1`,`f_int2`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE ((f_int1 + f_int2) DIV 2)
-(PARTITION parta VALUES LESS THAN (0) ENGINE = TokuDB,
- PARTITION partb VALUES LESS THAN (5) ENGINE = TokuDB,
- PARTITION partc VALUES LESS THAN (10) ENGINE = TokuDB,
- PARTITION partd VALUES LESS THAN (15) ENGINE = TokuDB,
- PARTITION parte VALUES LESS THAN (20) ENGINE = TokuDB,
- PARTITION partf VALUES LESS THAN (2147483646) ENGINE = TokuDB) */
+ PARTITION BY RANGE ((`f_int1` + `f_int2`) DIV 2)
+(PARTITION `parta` VALUES LESS THAN (0) ENGINE = TokuDB,
+ PARTITION `partb` VALUES LESS THAN (5) ENGINE = TokuDB,
+ PARTITION `partc` VALUES LESS THAN (10) ENGINE = TokuDB,
+ PARTITION `partd` VALUES LESS THAN (15) ENGINE = TokuDB,
+ PARTITION `parte` VALUES LESS THAN (20) ENGINE = TokuDB,
+ PARTITION `partf` VALUES LESS THAN (2147483646) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -13935,13 +13935,13 @@ t1 CREATE TABLE `t1` (
`f_charbig` varchar(1000) DEFAULT NULL,
UNIQUE KEY `uidx` (`f_int1`,`f_int2`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (f_int1)
-SUBPARTITION BY HASH (f_int2)
+ PARTITION BY RANGE (`f_int1`)
+SUBPARTITION BY HASH (`f_int2`)
SUBPARTITIONS 2
-(PARTITION parta VALUES LESS THAN (0) ENGINE = TokuDB,
- PARTITION partb VALUES LESS THAN (5) ENGINE = TokuDB,
- PARTITION partc VALUES LESS THAN (10) ENGINE = TokuDB,
- PARTITION partd VALUES LESS THAN (2147483646) ENGINE = TokuDB) */
+(PARTITION `parta` VALUES LESS THAN (0) ENGINE = TokuDB,
+ PARTITION `partb` VALUES LESS THAN (5) ENGINE = TokuDB,
+ PARTITION `partc` VALUES LESS THAN (10) ENGINE = TokuDB,
+ PARTITION `partd` VALUES LESS THAN (2147483646) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -14454,20 +14454,20 @@ t1 CREATE TABLE `t1` (
`f_charbig` varchar(1000) DEFAULT NULL,
UNIQUE KEY `uidx` (`f_int1`,`f_int2`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (f_int1)
-SUBPARTITION BY KEY (f_int2)
-(PARTITION part1 VALUES LESS THAN (0)
- (SUBPARTITION subpart11 ENGINE = TokuDB,
- SUBPARTITION subpart12 ENGINE = TokuDB),
- PARTITION part2 VALUES LESS THAN (5)
- (SUBPARTITION subpart21 ENGINE = TokuDB,
- SUBPARTITION subpart22 ENGINE = TokuDB),
- PARTITION part3 VALUES LESS THAN (10)
- (SUBPARTITION subpart31 ENGINE = TokuDB,
- SUBPARTITION subpart32 ENGINE = TokuDB),
- PARTITION part4 VALUES LESS THAN (2147483646)
- (SUBPARTITION subpart41 ENGINE = TokuDB,
- SUBPARTITION subpart42 ENGINE = TokuDB)) */
+ PARTITION BY RANGE (`f_int1`)
+SUBPARTITION BY KEY (`f_int2`)
+(PARTITION `part1` VALUES LESS THAN (0)
+ (SUBPARTITION `subpart11` ENGINE = TokuDB,
+ SUBPARTITION `subpart12` ENGINE = TokuDB),
+ PARTITION `part2` VALUES LESS THAN (5)
+ (SUBPARTITION `subpart21` ENGINE = TokuDB,
+ SUBPARTITION `subpart22` ENGINE = TokuDB),
+ PARTITION `part3` VALUES LESS THAN (10)
+ (SUBPARTITION `subpart31` ENGINE = TokuDB,
+ SUBPARTITION `subpart32` ENGINE = TokuDB),
+ PARTITION `part4` VALUES LESS THAN (2147483646)
+ (SUBPARTITION `subpart41` ENGINE = TokuDB,
+ SUBPARTITION `subpart42` ENGINE = TokuDB))
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -14980,20 +14980,20 @@ t1 CREATE TABLE `t1` (
`f_charbig` varchar(1000) DEFAULT NULL,
UNIQUE KEY `uidx` (`f_int1`,`f_int2`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (ABS(MOD(f_int1,3)))
-SUBPARTITION BY HASH (f_int2 + 1)
-(PARTITION part1 VALUES IN (0)
- (SUBPARTITION sp11 ENGINE = TokuDB,
- SUBPARTITION sp12 ENGINE = TokuDB),
- PARTITION part2 VALUES IN (1)
- (SUBPARTITION sp21 ENGINE = TokuDB,
- SUBPARTITION sp22 ENGINE = TokuDB),
- PARTITION part3 VALUES IN (2)
- (SUBPARTITION sp31 ENGINE = TokuDB,
- SUBPARTITION sp32 ENGINE = TokuDB),
- PARTITION part4 VALUES IN (NULL)
- (SUBPARTITION sp41 ENGINE = TokuDB,
- SUBPARTITION sp42 ENGINE = TokuDB)) */
+ PARTITION BY LIST (abs(`f_int1` % 3))
+SUBPARTITION BY HASH (`f_int2` + 1)
+(PARTITION `part1` VALUES IN (0)
+ (SUBPARTITION `sp11` ENGINE = TokuDB,
+ SUBPARTITION `sp12` ENGINE = TokuDB),
+ PARTITION `part2` VALUES IN (1)
+ (SUBPARTITION `sp21` ENGINE = TokuDB,
+ SUBPARTITION `sp22` ENGINE = TokuDB),
+ PARTITION `part3` VALUES IN (2)
+ (SUBPARTITION `sp31` ENGINE = TokuDB,
+ SUBPARTITION `sp32` ENGINE = TokuDB),
+ PARTITION `part4` VALUES IN (NULL)
+ (SUBPARTITION `sp41` ENGINE = TokuDB,
+ SUBPARTITION `sp42` ENGINE = TokuDB))
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -15504,12 +15504,12 @@ t1 CREATE TABLE `t1` (
`f_charbig` varchar(1000) DEFAULT NULL,
UNIQUE KEY `uidx` (`f_int1`,`f_int2`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (ABS(MOD(f_int1,2)))
-SUBPARTITION BY KEY (f_int2)
+ PARTITION BY LIST (abs(`f_int1` % 2))
+SUBPARTITION BY KEY (`f_int2`)
SUBPARTITIONS 3
-(PARTITION part1 VALUES IN (0) ENGINE = TokuDB,
- PARTITION part2 VALUES IN (1) ENGINE = TokuDB,
- PARTITION part3 VALUES IN (NULL) ENGINE = TokuDB) */
+(PARTITION `part1` VALUES IN (0) ENGINE = TokuDB,
+ PARTITION `part2` VALUES IN (1) ENGINE = TokuDB,
+ PARTITION `part3` VALUES IN (NULL) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -16017,8 +16017,8 @@ t1 CREATE TABLE `t1` (
`f_charbig` varchar(1000) DEFAULT NULL,
UNIQUE KEY `uidx` (`f_int2`,`f_int1`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY HASH (f_int1 + f_int2)
-PARTITIONS 2 */
+ PARTITION BY HASH (`f_int1` + `f_int2`)
+PARTITIONS 2
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -16525,8 +16525,8 @@ t1 CREATE TABLE `t1` (
`f_charbig` varchar(1000) DEFAULT NULL,
UNIQUE KEY `uidx` (`f_int2`,`f_int1`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY KEY (f_int1,f_int2)
-PARTITIONS 5 */
+ PARTITION BY KEY (`f_int1`,`f_int2`)
+PARTITIONS 5
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -17041,15 +17041,15 @@ t1 CREATE TABLE `t1` (
`f_charbig` varchar(1000) DEFAULT NULL,
UNIQUE KEY `uidx` (`f_int2`,`f_int1`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (MOD(f_int1 + f_int2,4))
-(PARTITION part_3 VALUES IN (-3) ENGINE = TokuDB,
- PARTITION part_2 VALUES IN (-2) ENGINE = TokuDB,
- PARTITION part_1 VALUES IN (-1) ENGINE = TokuDB,
- PARTITION part_N VALUES IN (NULL) ENGINE = TokuDB,
- PARTITION part0 VALUES IN (0) ENGINE = TokuDB,
- PARTITION part1 VALUES IN (1) ENGINE = TokuDB,
- PARTITION part2 VALUES IN (2) ENGINE = TokuDB,
- PARTITION part3 VALUES IN (3) ENGINE = TokuDB) */
+ PARTITION BY LIST ((`f_int1` + `f_int2`) % 4)
+(PARTITION `part_3` VALUES IN (-3) ENGINE = TokuDB,
+ PARTITION `part_2` VALUES IN (-2) ENGINE = TokuDB,
+ PARTITION `part_1` VALUES IN (-1) ENGINE = TokuDB,
+ PARTITION `part_N` VALUES IN (NULL) ENGINE = TokuDB,
+ PARTITION `part0` VALUES IN (0) ENGINE = TokuDB,
+ PARTITION `part1` VALUES IN (1) ENGINE = TokuDB,
+ PARTITION `part2` VALUES IN (2) ENGINE = TokuDB,
+ PARTITION `part3` VALUES IN (3) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -17562,13 +17562,13 @@ t1 CREATE TABLE `t1` (
`f_charbig` varchar(1000) DEFAULT NULL,
UNIQUE KEY `uidx` (`f_int2`,`f_int1`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE ((f_int1 + f_int2) DIV 2)
-(PARTITION parta VALUES LESS THAN (0) ENGINE = TokuDB,
- PARTITION partb VALUES LESS THAN (5) ENGINE = TokuDB,
- PARTITION partc VALUES LESS THAN (10) ENGINE = TokuDB,
- PARTITION partd VALUES LESS THAN (15) ENGINE = TokuDB,
- PARTITION parte VALUES LESS THAN (20) ENGINE = TokuDB,
- PARTITION partf VALUES LESS THAN (2147483646) ENGINE = TokuDB) */
+ PARTITION BY RANGE ((`f_int1` + `f_int2`) DIV 2)
+(PARTITION `parta` VALUES LESS THAN (0) ENGINE = TokuDB,
+ PARTITION `partb` VALUES LESS THAN (5) ENGINE = TokuDB,
+ PARTITION `partc` VALUES LESS THAN (10) ENGINE = TokuDB,
+ PARTITION `partd` VALUES LESS THAN (15) ENGINE = TokuDB,
+ PARTITION `parte` VALUES LESS THAN (20) ENGINE = TokuDB,
+ PARTITION `partf` VALUES LESS THAN (2147483646) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -18077,13 +18077,13 @@ t1 CREATE TABLE `t1` (
`f_charbig` varchar(1000) DEFAULT NULL,
UNIQUE KEY `uidx` (`f_int2`,`f_int1`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (f_int1)
-SUBPARTITION BY HASH (f_int2)
+ PARTITION BY RANGE (`f_int1`)
+SUBPARTITION BY HASH (`f_int2`)
SUBPARTITIONS 2
-(PARTITION parta VALUES LESS THAN (0) ENGINE = TokuDB,
- PARTITION partb VALUES LESS THAN (5) ENGINE = TokuDB,
- PARTITION partc VALUES LESS THAN (10) ENGINE = TokuDB,
- PARTITION partd VALUES LESS THAN (2147483646) ENGINE = TokuDB) */
+(PARTITION `parta` VALUES LESS THAN (0) ENGINE = TokuDB,
+ PARTITION `partb` VALUES LESS THAN (5) ENGINE = TokuDB,
+ PARTITION `partc` VALUES LESS THAN (10) ENGINE = TokuDB,
+ PARTITION `partd` VALUES LESS THAN (2147483646) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -18596,20 +18596,20 @@ t1 CREATE TABLE `t1` (
`f_charbig` varchar(1000) DEFAULT NULL,
UNIQUE KEY `uidx` (`f_int2`,`f_int1`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (f_int1)
-SUBPARTITION BY KEY (f_int2)
-(PARTITION part1 VALUES LESS THAN (0)
- (SUBPARTITION subpart11 ENGINE = TokuDB,
- SUBPARTITION subpart12 ENGINE = TokuDB),
- PARTITION part2 VALUES LESS THAN (5)
- (SUBPARTITION subpart21 ENGINE = TokuDB,
- SUBPARTITION subpart22 ENGINE = TokuDB),
- PARTITION part3 VALUES LESS THAN (10)
- (SUBPARTITION subpart31 ENGINE = TokuDB,
- SUBPARTITION subpart32 ENGINE = TokuDB),
- PARTITION part4 VALUES LESS THAN (2147483646)
- (SUBPARTITION subpart41 ENGINE = TokuDB,
- SUBPARTITION subpart42 ENGINE = TokuDB)) */
+ PARTITION BY RANGE (`f_int1`)
+SUBPARTITION BY KEY (`f_int2`)
+(PARTITION `part1` VALUES LESS THAN (0)
+ (SUBPARTITION `subpart11` ENGINE = TokuDB,
+ SUBPARTITION `subpart12` ENGINE = TokuDB),
+ PARTITION `part2` VALUES LESS THAN (5)
+ (SUBPARTITION `subpart21` ENGINE = TokuDB,
+ SUBPARTITION `subpart22` ENGINE = TokuDB),
+ PARTITION `part3` VALUES LESS THAN (10)
+ (SUBPARTITION `subpart31` ENGINE = TokuDB,
+ SUBPARTITION `subpart32` ENGINE = TokuDB),
+ PARTITION `part4` VALUES LESS THAN (2147483646)
+ (SUBPARTITION `subpart41` ENGINE = TokuDB,
+ SUBPARTITION `subpart42` ENGINE = TokuDB))
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -19122,20 +19122,20 @@ t1 CREATE TABLE `t1` (
`f_charbig` varchar(1000) DEFAULT NULL,
UNIQUE KEY `uidx` (`f_int2`,`f_int1`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (ABS(MOD(f_int1,3)))
-SUBPARTITION BY HASH (f_int2 + 1)
-(PARTITION part1 VALUES IN (0)
- (SUBPARTITION sp11 ENGINE = TokuDB,
- SUBPARTITION sp12 ENGINE = TokuDB),
- PARTITION part2 VALUES IN (1)
- (SUBPARTITION sp21 ENGINE = TokuDB,
- SUBPARTITION sp22 ENGINE = TokuDB),
- PARTITION part3 VALUES IN (2)
- (SUBPARTITION sp31 ENGINE = TokuDB,
- SUBPARTITION sp32 ENGINE = TokuDB),
- PARTITION part4 VALUES IN (NULL)
- (SUBPARTITION sp41 ENGINE = TokuDB,
- SUBPARTITION sp42 ENGINE = TokuDB)) */
+ PARTITION BY LIST (abs(`f_int1` % 3))
+SUBPARTITION BY HASH (`f_int2` + 1)
+(PARTITION `part1` VALUES IN (0)
+ (SUBPARTITION `sp11` ENGINE = TokuDB,
+ SUBPARTITION `sp12` ENGINE = TokuDB),
+ PARTITION `part2` VALUES IN (1)
+ (SUBPARTITION `sp21` ENGINE = TokuDB,
+ SUBPARTITION `sp22` ENGINE = TokuDB),
+ PARTITION `part3` VALUES IN (2)
+ (SUBPARTITION `sp31` ENGINE = TokuDB,
+ SUBPARTITION `sp32` ENGINE = TokuDB),
+ PARTITION `part4` VALUES IN (NULL)
+ (SUBPARTITION `sp41` ENGINE = TokuDB,
+ SUBPARTITION `sp42` ENGINE = TokuDB))
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -19646,12 +19646,12 @@ t1 CREATE TABLE `t1` (
`f_charbig` varchar(1000) DEFAULT NULL,
UNIQUE KEY `uidx` (`f_int2`,`f_int1`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (ABS(MOD(f_int1,2)))
-SUBPARTITION BY KEY (f_int2)
+ PARTITION BY LIST (abs(`f_int1` % 2))
+SUBPARTITION BY KEY (`f_int2`)
SUBPARTITIONS 3
-(PARTITION part1 VALUES IN (0) ENGINE = TokuDB,
- PARTITION part2 VALUES IN (1) ENGINE = TokuDB,
- PARTITION part3 VALUES IN (NULL) ENGINE = TokuDB) */
+(PARTITION `part1` VALUES IN (0) ENGINE = TokuDB,
+ PARTITION `part2` VALUES IN (1) ENGINE = TokuDB,
+ PARTITION `part3` VALUES IN (NULL) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
diff --git a/storage/tokudb/mysql-test/tokudb_parts/r/partition_alter2_2_1_tokudb.result b/storage/tokudb/mysql-test/tokudb_parts/r/partition_alter2_2_1_tokudb.result
index 883c9ec3453..1a4d1210a23 100644
--- a/storage/tokudb/mysql-test/tokudb_parts/r/partition_alter2_2_1_tokudb.result
+++ b/storage/tokudb/mysql-test/tokudb_parts/r/partition_alter2_2_1_tokudb.result
@@ -68,14 +68,14 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
`f_int2` mediumint(9) DEFAULT NULL,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY HASH (f_int1)
-PARTITIONS 2 */
+ PARTITION BY HASH (`f_int1`)
+PARTITIONS 2
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -526,14 +526,14 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
`f_int2` mediumint(9) DEFAULT NULL,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY KEY (f_int1)
-PARTITIONS 5 */
+ PARTITION BY KEY (`f_int1`)
+PARTITIONS 5
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -992,21 +992,21 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
`f_int2` mediumint(9) DEFAULT NULL,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (MOD(f_int1,4))
-(PARTITION part_3 VALUES IN (-3) ENGINE = TokuDB,
- PARTITION part_2 VALUES IN (-2) ENGINE = TokuDB,
- PARTITION part_1 VALUES IN (-1) ENGINE = TokuDB,
- PARTITION part_N VALUES IN (NULL) ENGINE = TokuDB,
- PARTITION part0 VALUES IN (0) ENGINE = TokuDB,
- PARTITION part1 VALUES IN (1) ENGINE = TokuDB,
- PARTITION part2 VALUES IN (2) ENGINE = TokuDB,
- PARTITION part3 VALUES IN (3) ENGINE = TokuDB) */
+ PARTITION BY LIST (`f_int1` % 4)
+(PARTITION `part_3` VALUES IN (-3) ENGINE = TokuDB,
+ PARTITION `part_2` VALUES IN (-2) ENGINE = TokuDB,
+ PARTITION `part_1` VALUES IN (-1) ENGINE = TokuDB,
+ PARTITION `part_N` VALUES IN (NULL) ENGINE = TokuDB,
+ PARTITION `part0` VALUES IN (0) ENGINE = TokuDB,
+ PARTITION `part1` VALUES IN (1) ENGINE = TokuDB,
+ PARTITION `part2` VALUES IN (2) ENGINE = TokuDB,
+ PARTITION `part3` VALUES IN (3) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -1463,19 +1463,19 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
`f_int2` mediumint(9) DEFAULT NULL,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (f_int1)
-(PARTITION parta VALUES LESS THAN (0) ENGINE = TokuDB,
- PARTITION partb VALUES LESS THAN (5) ENGINE = TokuDB,
- PARTITION partc VALUES LESS THAN (10) ENGINE = TokuDB,
- PARTITION partd VALUES LESS THAN (15) ENGINE = TokuDB,
- PARTITION parte VALUES LESS THAN (20) ENGINE = TokuDB,
- PARTITION partf VALUES LESS THAN (2147483646) ENGINE = TokuDB) */
+ PARTITION BY RANGE (`f_int1`)
+(PARTITION `parta` VALUES LESS THAN (0) ENGINE = TokuDB,
+ PARTITION `partb` VALUES LESS THAN (5) ENGINE = TokuDB,
+ PARTITION `partc` VALUES LESS THAN (10) ENGINE = TokuDB,
+ PARTITION `partd` VALUES LESS THAN (15) ENGINE = TokuDB,
+ PARTITION `parte` VALUES LESS THAN (20) ENGINE = TokuDB,
+ PARTITION `partf` VALUES LESS THAN (2147483646) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -1926,19 +1926,19 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
`f_int2` mediumint(9) DEFAULT NULL,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (f_int1 DIV 2)
-SUBPARTITION BY HASH (f_int1)
+ PARTITION BY RANGE (`f_int1` DIV 2)
+SUBPARTITION BY HASH (`f_int1`)
SUBPARTITIONS 2
-(PARTITION parta VALUES LESS THAN (0) ENGINE = TokuDB,
- PARTITION partb VALUES LESS THAN (5) ENGINE = TokuDB,
- PARTITION partc VALUES LESS THAN (10) ENGINE = TokuDB,
- PARTITION partd VALUES LESS THAN (2147483646) ENGINE = TokuDB) */
+(PARTITION `parta` VALUES LESS THAN (0) ENGINE = TokuDB,
+ PARTITION `partb` VALUES LESS THAN (5) ENGINE = TokuDB,
+ PARTITION `partc` VALUES LESS THAN (10) ENGINE = TokuDB,
+ PARTITION `partd` VALUES LESS THAN (2147483646) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -2397,26 +2397,26 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
`f_int2` mediumint(9) DEFAULT NULL,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (f_int1)
-SUBPARTITION BY KEY (f_int1)
-(PARTITION part1 VALUES LESS THAN (0)
- (SUBPARTITION subpart11 ENGINE = TokuDB,
- SUBPARTITION subpart12 ENGINE = TokuDB),
- PARTITION part2 VALUES LESS THAN (5)
- (SUBPARTITION subpart21 ENGINE = TokuDB,
- SUBPARTITION subpart22 ENGINE = TokuDB),
- PARTITION part3 VALUES LESS THAN (10)
- (SUBPARTITION subpart31 ENGINE = TokuDB,
- SUBPARTITION subpart32 ENGINE = TokuDB),
- PARTITION part4 VALUES LESS THAN (2147483646)
- (SUBPARTITION subpart41 ENGINE = TokuDB,
- SUBPARTITION subpart42 ENGINE = TokuDB)) */
+ PARTITION BY RANGE (`f_int1`)
+SUBPARTITION BY KEY (`f_int1`)
+(PARTITION `part1` VALUES LESS THAN (0)
+ (SUBPARTITION `subpart11` ENGINE = TokuDB,
+ SUBPARTITION `subpart12` ENGINE = TokuDB),
+ PARTITION `part2` VALUES LESS THAN (5)
+ (SUBPARTITION `subpart21` ENGINE = TokuDB,
+ SUBPARTITION `subpart22` ENGINE = TokuDB),
+ PARTITION `part3` VALUES LESS THAN (10)
+ (SUBPARTITION `subpart31` ENGINE = TokuDB,
+ SUBPARTITION `subpart32` ENGINE = TokuDB),
+ PARTITION `part4` VALUES LESS THAN (2147483646)
+ (SUBPARTITION `subpart41` ENGINE = TokuDB,
+ SUBPARTITION `subpart42` ENGINE = TokuDB))
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -2871,26 +2871,26 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
`f_int2` mediumint(9) DEFAULT NULL,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (ABS(MOD(f_int1,3)))
-SUBPARTITION BY HASH (f_int1 + 1)
-(PARTITION part1 VALUES IN (0)
- (SUBPARTITION sp11 ENGINE = TokuDB,
- SUBPARTITION sp12 ENGINE = TokuDB),
- PARTITION part2 VALUES IN (1)
- (SUBPARTITION sp21 ENGINE = TokuDB,
- SUBPARTITION sp22 ENGINE = TokuDB),
- PARTITION part3 VALUES IN (2)
- (SUBPARTITION sp31 ENGINE = TokuDB,
- SUBPARTITION sp32 ENGINE = TokuDB),
- PARTITION part4 VALUES IN (NULL)
- (SUBPARTITION sp41 ENGINE = TokuDB,
- SUBPARTITION sp42 ENGINE = TokuDB)) */
+ PARTITION BY LIST (abs(`f_int1` % 3))
+SUBPARTITION BY HASH (`f_int1` + 1)
+(PARTITION `part1` VALUES IN (0)
+ (SUBPARTITION `sp11` ENGINE = TokuDB,
+ SUBPARTITION `sp12` ENGINE = TokuDB),
+ PARTITION `part2` VALUES IN (1)
+ (SUBPARTITION `sp21` ENGINE = TokuDB,
+ SUBPARTITION `sp22` ENGINE = TokuDB),
+ PARTITION `part3` VALUES IN (2)
+ (SUBPARTITION `sp31` ENGINE = TokuDB,
+ SUBPARTITION `sp32` ENGINE = TokuDB),
+ PARTITION `part4` VALUES IN (NULL)
+ (SUBPARTITION `sp41` ENGINE = TokuDB,
+ SUBPARTITION `sp42` ENGINE = TokuDB))
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -3345,18 +3345,18 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
`f_int2` mediumint(9) DEFAULT NULL,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (ABS(MOD(f_int1,2)))
-SUBPARTITION BY KEY (f_int1)
+ PARTITION BY LIST (abs(`f_int1` % 2))
+SUBPARTITION BY KEY (`f_int1`)
SUBPARTITIONS 3
-(PARTITION part1 VALUES IN (0) ENGINE = TokuDB,
- PARTITION part2 VALUES IN (1) ENGINE = TokuDB,
- PARTITION part3 VALUES IN (NULL) ENGINE = TokuDB) */
+(PARTITION `part1` VALUES IN (0) ENGINE = TokuDB,
+ PARTITION `part2` VALUES IN (1) ENGINE = TokuDB,
+ PARTITION `part3` VALUES IN (NULL) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -3808,15 +3808,15 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) NOT NULL DEFAULT '0',
+ `f_int1` int(11) NOT NULL DEFAULT 0,
`f_int2` mediumint(9) NOT NULL,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL,
PRIMARY KEY (`f_int1`,`f_int2`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY HASH (f_int1)
-PARTITIONS 2 */
+ PARTITION BY HASH (`f_int1`)
+PARTITIONS 2
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -4304,15 +4304,15 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) NOT NULL DEFAULT '0',
+ `f_int1` int(11) NOT NULL DEFAULT 0,
`f_int2` mediumint(9) NOT NULL,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL,
PRIMARY KEY (`f_int1`,`f_int2`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY KEY (f_int1)
-PARTITIONS 5 */
+ PARTITION BY KEY (`f_int1`)
+PARTITIONS 5
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -4808,22 +4808,22 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) NOT NULL DEFAULT '0',
+ `f_int1` int(11) NOT NULL DEFAULT 0,
`f_int2` mediumint(9) NOT NULL,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL,
PRIMARY KEY (`f_int1`,`f_int2`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (MOD(f_int1,4))
-(PARTITION part_3 VALUES IN (-3) ENGINE = TokuDB,
- PARTITION part_2 VALUES IN (-2) ENGINE = TokuDB,
- PARTITION part_1 VALUES IN (-1) ENGINE = TokuDB,
- PARTITION part_N VALUES IN (NULL) ENGINE = TokuDB,
- PARTITION part0 VALUES IN (0) ENGINE = TokuDB,
- PARTITION part1 VALUES IN (1) ENGINE = TokuDB,
- PARTITION part2 VALUES IN (2) ENGINE = TokuDB,
- PARTITION part3 VALUES IN (3) ENGINE = TokuDB) */
+ PARTITION BY LIST (`f_int1` % 4)
+(PARTITION `part_3` VALUES IN (-3) ENGINE = TokuDB,
+ PARTITION `part_2` VALUES IN (-2) ENGINE = TokuDB,
+ PARTITION `part_1` VALUES IN (-1) ENGINE = TokuDB,
+ PARTITION `part_N` VALUES IN (NULL) ENGINE = TokuDB,
+ PARTITION `part0` VALUES IN (0) ENGINE = TokuDB,
+ PARTITION `part1` VALUES IN (1) ENGINE = TokuDB,
+ PARTITION `part2` VALUES IN (2) ENGINE = TokuDB,
+ PARTITION `part3` VALUES IN (3) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -5317,20 +5317,20 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) NOT NULL DEFAULT '0',
+ `f_int1` int(11) NOT NULL DEFAULT 0,
`f_int2` mediumint(9) NOT NULL,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL,
PRIMARY KEY (`f_int1`,`f_int2`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (f_int1)
-(PARTITION parta VALUES LESS THAN (0) ENGINE = TokuDB,
- PARTITION partb VALUES LESS THAN (5) ENGINE = TokuDB,
- PARTITION partc VALUES LESS THAN (10) ENGINE = TokuDB,
- PARTITION partd VALUES LESS THAN (15) ENGINE = TokuDB,
- PARTITION parte VALUES LESS THAN (20) ENGINE = TokuDB,
- PARTITION partf VALUES LESS THAN (2147483646) ENGINE = TokuDB) */
+ PARTITION BY RANGE (`f_int1`)
+(PARTITION `parta` VALUES LESS THAN (0) ENGINE = TokuDB,
+ PARTITION `partb` VALUES LESS THAN (5) ENGINE = TokuDB,
+ PARTITION `partc` VALUES LESS THAN (10) ENGINE = TokuDB,
+ PARTITION `partd` VALUES LESS THAN (15) ENGINE = TokuDB,
+ PARTITION `parte` VALUES LESS THAN (20) ENGINE = TokuDB,
+ PARTITION `partf` VALUES LESS THAN (2147483646) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -5818,20 +5818,20 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) NOT NULL DEFAULT '0',
+ `f_int1` int(11) NOT NULL DEFAULT 0,
`f_int2` mediumint(9) NOT NULL,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL,
PRIMARY KEY (`f_int1`,`f_int2`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (f_int1 DIV 2)
-SUBPARTITION BY HASH (f_int1)
+ PARTITION BY RANGE (`f_int1` DIV 2)
+SUBPARTITION BY HASH (`f_int1`)
SUBPARTITIONS 2
-(PARTITION parta VALUES LESS THAN (0) ENGINE = TokuDB,
- PARTITION partb VALUES LESS THAN (5) ENGINE = TokuDB,
- PARTITION partc VALUES LESS THAN (10) ENGINE = TokuDB,
- PARTITION partd VALUES LESS THAN (2147483646) ENGINE = TokuDB) */
+(PARTITION `parta` VALUES LESS THAN (0) ENGINE = TokuDB,
+ PARTITION `partb` VALUES LESS THAN (5) ENGINE = TokuDB,
+ PARTITION `partc` VALUES LESS THAN (10) ENGINE = TokuDB,
+ PARTITION `partd` VALUES LESS THAN (2147483646) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -6327,27 +6327,27 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) NOT NULL DEFAULT '0',
+ `f_int1` int(11) NOT NULL DEFAULT 0,
`f_int2` mediumint(9) NOT NULL,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL,
PRIMARY KEY (`f_int1`,`f_int2`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (f_int1)
-SUBPARTITION BY KEY (f_int1)
-(PARTITION part1 VALUES LESS THAN (0)
- (SUBPARTITION subpart11 ENGINE = TokuDB,
- SUBPARTITION subpart12 ENGINE = TokuDB),
- PARTITION part2 VALUES LESS THAN (5)
- (SUBPARTITION subpart21 ENGINE = TokuDB,
- SUBPARTITION subpart22 ENGINE = TokuDB),
- PARTITION part3 VALUES LESS THAN (10)
- (SUBPARTITION subpart31 ENGINE = TokuDB,
- SUBPARTITION subpart32 ENGINE = TokuDB),
- PARTITION part4 VALUES LESS THAN (2147483646)
- (SUBPARTITION subpart41 ENGINE = TokuDB,
- SUBPARTITION subpart42 ENGINE = TokuDB)) */
+ PARTITION BY RANGE (`f_int1`)
+SUBPARTITION BY KEY (`f_int1`)
+(PARTITION `part1` VALUES LESS THAN (0)
+ (SUBPARTITION `subpart11` ENGINE = TokuDB,
+ SUBPARTITION `subpart12` ENGINE = TokuDB),
+ PARTITION `part2` VALUES LESS THAN (5)
+ (SUBPARTITION `subpart21` ENGINE = TokuDB,
+ SUBPARTITION `subpart22` ENGINE = TokuDB),
+ PARTITION `part3` VALUES LESS THAN (10)
+ (SUBPARTITION `subpart31` ENGINE = TokuDB,
+ SUBPARTITION `subpart32` ENGINE = TokuDB),
+ PARTITION `part4` VALUES LESS THAN (2147483646)
+ (SUBPARTITION `subpart41` ENGINE = TokuDB,
+ SUBPARTITION `subpart42` ENGINE = TokuDB))
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -6839,27 +6839,27 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) NOT NULL DEFAULT '0',
+ `f_int1` int(11) NOT NULL DEFAULT 0,
`f_int2` mediumint(9) NOT NULL,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL,
PRIMARY KEY (`f_int1`,`f_int2`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (ABS(MOD(f_int1,3)))
-SUBPARTITION BY HASH (f_int1 + 1)
-(PARTITION part1 VALUES IN (0)
- (SUBPARTITION sp11 ENGINE = TokuDB,
- SUBPARTITION sp12 ENGINE = TokuDB),
- PARTITION part2 VALUES IN (1)
- (SUBPARTITION sp21 ENGINE = TokuDB,
- SUBPARTITION sp22 ENGINE = TokuDB),
- PARTITION part3 VALUES IN (2)
- (SUBPARTITION sp31 ENGINE = TokuDB,
- SUBPARTITION sp32 ENGINE = TokuDB),
- PARTITION part4 VALUES IN (NULL)
- (SUBPARTITION sp41 ENGINE = TokuDB,
- SUBPARTITION sp42 ENGINE = TokuDB)) */
+ PARTITION BY LIST (abs(`f_int1` % 3))
+SUBPARTITION BY HASH (`f_int1` + 1)
+(PARTITION `part1` VALUES IN (0)
+ (SUBPARTITION `sp11` ENGINE = TokuDB,
+ SUBPARTITION `sp12` ENGINE = TokuDB),
+ PARTITION `part2` VALUES IN (1)
+ (SUBPARTITION `sp21` ENGINE = TokuDB,
+ SUBPARTITION `sp22` ENGINE = TokuDB),
+ PARTITION `part3` VALUES IN (2)
+ (SUBPARTITION `sp31` ENGINE = TokuDB,
+ SUBPARTITION `sp32` ENGINE = TokuDB),
+ PARTITION `part4` VALUES IN (NULL)
+ (SUBPARTITION `sp41` ENGINE = TokuDB,
+ SUBPARTITION `sp42` ENGINE = TokuDB))
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -7351,19 +7351,19 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) NOT NULL DEFAULT '0',
+ `f_int1` int(11) NOT NULL DEFAULT 0,
`f_int2` mediumint(9) NOT NULL,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL,
PRIMARY KEY (`f_int1`,`f_int2`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (ABS(MOD(f_int1,2)))
-SUBPARTITION BY KEY (f_int1)
+ PARTITION BY LIST (abs(`f_int1` % 2))
+SUBPARTITION BY KEY (`f_int1`)
SUBPARTITIONS 3
-(PARTITION part1 VALUES IN (0) ENGINE = TokuDB,
- PARTITION part2 VALUES IN (1) ENGINE = TokuDB,
- PARTITION part3 VALUES IN (NULL) ENGINE = TokuDB) */
+(PARTITION `part1` VALUES IN (0) ENGINE = TokuDB,
+ PARTITION `part2` VALUES IN (1) ENGINE = TokuDB,
+ PARTITION `part3` VALUES IN (NULL) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -7852,15 +7852,15 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) NOT NULL DEFAULT '0',
+ `f_int1` int(11) NOT NULL DEFAULT 0,
`f_int2` mediumint(9) NOT NULL,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL,
PRIMARY KEY (`f_int2`,`f_int1`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY HASH (f_int1)
-PARTITIONS 2 */
+ PARTITION BY HASH (`f_int1`)
+PARTITIONS 2
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -8348,15 +8348,15 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) NOT NULL DEFAULT '0',
+ `f_int1` int(11) NOT NULL DEFAULT 0,
`f_int2` mediumint(9) NOT NULL,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL,
PRIMARY KEY (`f_int2`,`f_int1`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY KEY (f_int1)
-PARTITIONS 5 */
+ PARTITION BY KEY (`f_int1`)
+PARTITIONS 5
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -8852,22 +8852,22 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) NOT NULL DEFAULT '0',
+ `f_int1` int(11) NOT NULL DEFAULT 0,
`f_int2` mediumint(9) NOT NULL,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL,
PRIMARY KEY (`f_int2`,`f_int1`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (MOD(f_int1,4))
-(PARTITION part_3 VALUES IN (-3) ENGINE = TokuDB,
- PARTITION part_2 VALUES IN (-2) ENGINE = TokuDB,
- PARTITION part_1 VALUES IN (-1) ENGINE = TokuDB,
- PARTITION part_N VALUES IN (NULL) ENGINE = TokuDB,
- PARTITION part0 VALUES IN (0) ENGINE = TokuDB,
- PARTITION part1 VALUES IN (1) ENGINE = TokuDB,
- PARTITION part2 VALUES IN (2) ENGINE = TokuDB,
- PARTITION part3 VALUES IN (3) ENGINE = TokuDB) */
+ PARTITION BY LIST (`f_int1` % 4)
+(PARTITION `part_3` VALUES IN (-3) ENGINE = TokuDB,
+ PARTITION `part_2` VALUES IN (-2) ENGINE = TokuDB,
+ PARTITION `part_1` VALUES IN (-1) ENGINE = TokuDB,
+ PARTITION `part_N` VALUES IN (NULL) ENGINE = TokuDB,
+ PARTITION `part0` VALUES IN (0) ENGINE = TokuDB,
+ PARTITION `part1` VALUES IN (1) ENGINE = TokuDB,
+ PARTITION `part2` VALUES IN (2) ENGINE = TokuDB,
+ PARTITION `part3` VALUES IN (3) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -9361,20 +9361,20 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) NOT NULL DEFAULT '0',
+ `f_int1` int(11) NOT NULL DEFAULT 0,
`f_int2` mediumint(9) NOT NULL,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL,
PRIMARY KEY (`f_int2`,`f_int1`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (f_int1)
-(PARTITION parta VALUES LESS THAN (0) ENGINE = TokuDB,
- PARTITION partb VALUES LESS THAN (5) ENGINE = TokuDB,
- PARTITION partc VALUES LESS THAN (10) ENGINE = TokuDB,
- PARTITION partd VALUES LESS THAN (15) ENGINE = TokuDB,
- PARTITION parte VALUES LESS THAN (20) ENGINE = TokuDB,
- PARTITION partf VALUES LESS THAN (2147483646) ENGINE = TokuDB) */
+ PARTITION BY RANGE (`f_int1`)
+(PARTITION `parta` VALUES LESS THAN (0) ENGINE = TokuDB,
+ PARTITION `partb` VALUES LESS THAN (5) ENGINE = TokuDB,
+ PARTITION `partc` VALUES LESS THAN (10) ENGINE = TokuDB,
+ PARTITION `partd` VALUES LESS THAN (15) ENGINE = TokuDB,
+ PARTITION `parte` VALUES LESS THAN (20) ENGINE = TokuDB,
+ PARTITION `partf` VALUES LESS THAN (2147483646) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -9862,20 +9862,20 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) NOT NULL DEFAULT '0',
+ `f_int1` int(11) NOT NULL DEFAULT 0,
`f_int2` mediumint(9) NOT NULL,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL,
PRIMARY KEY (`f_int2`,`f_int1`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (f_int1 DIV 2)
-SUBPARTITION BY HASH (f_int1)
+ PARTITION BY RANGE (`f_int1` DIV 2)
+SUBPARTITION BY HASH (`f_int1`)
SUBPARTITIONS 2
-(PARTITION parta VALUES LESS THAN (0) ENGINE = TokuDB,
- PARTITION partb VALUES LESS THAN (5) ENGINE = TokuDB,
- PARTITION partc VALUES LESS THAN (10) ENGINE = TokuDB,
- PARTITION partd VALUES LESS THAN (2147483646) ENGINE = TokuDB) */
+(PARTITION `parta` VALUES LESS THAN (0) ENGINE = TokuDB,
+ PARTITION `partb` VALUES LESS THAN (5) ENGINE = TokuDB,
+ PARTITION `partc` VALUES LESS THAN (10) ENGINE = TokuDB,
+ PARTITION `partd` VALUES LESS THAN (2147483646) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -10371,27 +10371,27 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) NOT NULL DEFAULT '0',
+ `f_int1` int(11) NOT NULL DEFAULT 0,
`f_int2` mediumint(9) NOT NULL,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL,
PRIMARY KEY (`f_int2`,`f_int1`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (f_int1)
-SUBPARTITION BY KEY (f_int1)
-(PARTITION part1 VALUES LESS THAN (0)
- (SUBPARTITION subpart11 ENGINE = TokuDB,
- SUBPARTITION subpart12 ENGINE = TokuDB),
- PARTITION part2 VALUES LESS THAN (5)
- (SUBPARTITION subpart21 ENGINE = TokuDB,
- SUBPARTITION subpart22 ENGINE = TokuDB),
- PARTITION part3 VALUES LESS THAN (10)
- (SUBPARTITION subpart31 ENGINE = TokuDB,
- SUBPARTITION subpart32 ENGINE = TokuDB),
- PARTITION part4 VALUES LESS THAN (2147483646)
- (SUBPARTITION subpart41 ENGINE = TokuDB,
- SUBPARTITION subpart42 ENGINE = TokuDB)) */
+ PARTITION BY RANGE (`f_int1`)
+SUBPARTITION BY KEY (`f_int1`)
+(PARTITION `part1` VALUES LESS THAN (0)
+ (SUBPARTITION `subpart11` ENGINE = TokuDB,
+ SUBPARTITION `subpart12` ENGINE = TokuDB),
+ PARTITION `part2` VALUES LESS THAN (5)
+ (SUBPARTITION `subpart21` ENGINE = TokuDB,
+ SUBPARTITION `subpart22` ENGINE = TokuDB),
+ PARTITION `part3` VALUES LESS THAN (10)
+ (SUBPARTITION `subpart31` ENGINE = TokuDB,
+ SUBPARTITION `subpart32` ENGINE = TokuDB),
+ PARTITION `part4` VALUES LESS THAN (2147483646)
+ (SUBPARTITION `subpart41` ENGINE = TokuDB,
+ SUBPARTITION `subpart42` ENGINE = TokuDB))
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -10883,27 +10883,27 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) NOT NULL DEFAULT '0',
+ `f_int1` int(11) NOT NULL DEFAULT 0,
`f_int2` mediumint(9) NOT NULL,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL,
PRIMARY KEY (`f_int2`,`f_int1`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (ABS(MOD(f_int1,3)))
-SUBPARTITION BY HASH (f_int1 + 1)
-(PARTITION part1 VALUES IN (0)
- (SUBPARTITION sp11 ENGINE = TokuDB,
- SUBPARTITION sp12 ENGINE = TokuDB),
- PARTITION part2 VALUES IN (1)
- (SUBPARTITION sp21 ENGINE = TokuDB,
- SUBPARTITION sp22 ENGINE = TokuDB),
- PARTITION part3 VALUES IN (2)
- (SUBPARTITION sp31 ENGINE = TokuDB,
- SUBPARTITION sp32 ENGINE = TokuDB),
- PARTITION part4 VALUES IN (NULL)
- (SUBPARTITION sp41 ENGINE = TokuDB,
- SUBPARTITION sp42 ENGINE = TokuDB)) */
+ PARTITION BY LIST (abs(`f_int1` % 3))
+SUBPARTITION BY HASH (`f_int1` + 1)
+(PARTITION `part1` VALUES IN (0)
+ (SUBPARTITION `sp11` ENGINE = TokuDB,
+ SUBPARTITION `sp12` ENGINE = TokuDB),
+ PARTITION `part2` VALUES IN (1)
+ (SUBPARTITION `sp21` ENGINE = TokuDB,
+ SUBPARTITION `sp22` ENGINE = TokuDB),
+ PARTITION `part3` VALUES IN (2)
+ (SUBPARTITION `sp31` ENGINE = TokuDB,
+ SUBPARTITION `sp32` ENGINE = TokuDB),
+ PARTITION `part4` VALUES IN (NULL)
+ (SUBPARTITION `sp41` ENGINE = TokuDB,
+ SUBPARTITION `sp42` ENGINE = TokuDB))
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -11395,19 +11395,19 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) NOT NULL DEFAULT '0',
+ `f_int1` int(11) NOT NULL DEFAULT 0,
`f_int2` mediumint(9) NOT NULL,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL,
PRIMARY KEY (`f_int2`,`f_int1`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (ABS(MOD(f_int1,2)))
-SUBPARTITION BY KEY (f_int1)
+ PARTITION BY LIST (abs(`f_int1` % 2))
+SUBPARTITION BY KEY (`f_int1`)
SUBPARTITIONS 3
-(PARTITION part1 VALUES IN (0) ENGINE = TokuDB,
- PARTITION part2 VALUES IN (1) ENGINE = TokuDB,
- PARTITION part3 VALUES IN (NULL) ENGINE = TokuDB) */
+(PARTITION `part1` VALUES IN (0) ENGINE = TokuDB,
+ PARTITION `part2` VALUES IN (1) ENGINE = TokuDB,
+ PARTITION `part3` VALUES IN (NULL) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -11897,15 +11897,15 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
`f_int2` mediumint(9) DEFAULT NULL,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL,
UNIQUE KEY `uidx1` (`f_int1`,`f_int2`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY HASH (f_int1)
-PARTITIONS 2 */
+ PARTITION BY HASH (`f_int1`)
+PARTITIONS 2
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -12407,15 +12407,15 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
`f_int2` mediumint(9) DEFAULT NULL,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL,
UNIQUE KEY `uidx1` (`f_int1`,`f_int2`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY KEY (f_int1)
-PARTITIONS 5 */
+ PARTITION BY KEY (`f_int1`)
+PARTITIONS 5
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -12925,22 +12925,22 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
`f_int2` mediumint(9) DEFAULT NULL,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL,
UNIQUE KEY `uidx1` (`f_int1`,`f_int2`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (MOD(f_int1,4))
-(PARTITION part_3 VALUES IN (-3) ENGINE = TokuDB,
- PARTITION part_2 VALUES IN (-2) ENGINE = TokuDB,
- PARTITION part_1 VALUES IN (-1) ENGINE = TokuDB,
- PARTITION part_N VALUES IN (NULL) ENGINE = TokuDB,
- PARTITION part0 VALUES IN (0) ENGINE = TokuDB,
- PARTITION part1 VALUES IN (1) ENGINE = TokuDB,
- PARTITION part2 VALUES IN (2) ENGINE = TokuDB,
- PARTITION part3 VALUES IN (3) ENGINE = TokuDB) */
+ PARTITION BY LIST (`f_int1` % 4)
+(PARTITION `part_3` VALUES IN (-3) ENGINE = TokuDB,
+ PARTITION `part_2` VALUES IN (-2) ENGINE = TokuDB,
+ PARTITION `part_1` VALUES IN (-1) ENGINE = TokuDB,
+ PARTITION `part_N` VALUES IN (NULL) ENGINE = TokuDB,
+ PARTITION `part0` VALUES IN (0) ENGINE = TokuDB,
+ PARTITION `part1` VALUES IN (1) ENGINE = TokuDB,
+ PARTITION `part2` VALUES IN (2) ENGINE = TokuDB,
+ PARTITION `part3` VALUES IN (3) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -13448,20 +13448,20 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
`f_int2` mediumint(9) DEFAULT NULL,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL,
UNIQUE KEY `uidx1` (`f_int1`,`f_int2`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (f_int1)
-(PARTITION parta VALUES LESS THAN (0) ENGINE = TokuDB,
- PARTITION partb VALUES LESS THAN (5) ENGINE = TokuDB,
- PARTITION partc VALUES LESS THAN (10) ENGINE = TokuDB,
- PARTITION partd VALUES LESS THAN (15) ENGINE = TokuDB,
- PARTITION parte VALUES LESS THAN (20) ENGINE = TokuDB,
- PARTITION partf VALUES LESS THAN (2147483646) ENGINE = TokuDB) */
+ PARTITION BY RANGE (`f_int1`)
+(PARTITION `parta` VALUES LESS THAN (0) ENGINE = TokuDB,
+ PARTITION `partb` VALUES LESS THAN (5) ENGINE = TokuDB,
+ PARTITION `partc` VALUES LESS THAN (10) ENGINE = TokuDB,
+ PARTITION `partd` VALUES LESS THAN (15) ENGINE = TokuDB,
+ PARTITION `parte` VALUES LESS THAN (20) ENGINE = TokuDB,
+ PARTITION `partf` VALUES LESS THAN (2147483646) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -13963,20 +13963,20 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
`f_int2` mediumint(9) DEFAULT NULL,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL,
UNIQUE KEY `uidx1` (`f_int1`,`f_int2`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (f_int1 DIV 2)
-SUBPARTITION BY HASH (f_int1)
+ PARTITION BY RANGE (`f_int1` DIV 2)
+SUBPARTITION BY HASH (`f_int1`)
SUBPARTITIONS 2
-(PARTITION parta VALUES LESS THAN (0) ENGINE = TokuDB,
- PARTITION partb VALUES LESS THAN (5) ENGINE = TokuDB,
- PARTITION partc VALUES LESS THAN (10) ENGINE = TokuDB,
- PARTITION partd VALUES LESS THAN (2147483646) ENGINE = TokuDB) */
+(PARTITION `parta` VALUES LESS THAN (0) ENGINE = TokuDB,
+ PARTITION `partb` VALUES LESS THAN (5) ENGINE = TokuDB,
+ PARTITION `partc` VALUES LESS THAN (10) ENGINE = TokuDB,
+ PARTITION `partd` VALUES LESS THAN (2147483646) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -14486,27 +14486,27 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
`f_int2` mediumint(9) DEFAULT NULL,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL,
UNIQUE KEY `uidx1` (`f_int1`,`f_int2`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (f_int1)
-SUBPARTITION BY KEY (f_int1)
-(PARTITION part1 VALUES LESS THAN (0)
- (SUBPARTITION subpart11 ENGINE = TokuDB,
- SUBPARTITION subpart12 ENGINE = TokuDB),
- PARTITION part2 VALUES LESS THAN (5)
- (SUBPARTITION subpart21 ENGINE = TokuDB,
- SUBPARTITION subpart22 ENGINE = TokuDB),
- PARTITION part3 VALUES LESS THAN (10)
- (SUBPARTITION subpart31 ENGINE = TokuDB,
- SUBPARTITION subpart32 ENGINE = TokuDB),
- PARTITION part4 VALUES LESS THAN (2147483646)
- (SUBPARTITION subpart41 ENGINE = TokuDB,
- SUBPARTITION subpart42 ENGINE = TokuDB)) */
+ PARTITION BY RANGE (`f_int1`)
+SUBPARTITION BY KEY (`f_int1`)
+(PARTITION `part1` VALUES LESS THAN (0)
+ (SUBPARTITION `subpart11` ENGINE = TokuDB,
+ SUBPARTITION `subpart12` ENGINE = TokuDB),
+ PARTITION `part2` VALUES LESS THAN (5)
+ (SUBPARTITION `subpart21` ENGINE = TokuDB,
+ SUBPARTITION `subpart22` ENGINE = TokuDB),
+ PARTITION `part3` VALUES LESS THAN (10)
+ (SUBPARTITION `subpart31` ENGINE = TokuDB,
+ SUBPARTITION `subpart32` ENGINE = TokuDB),
+ PARTITION `part4` VALUES LESS THAN (2147483646)
+ (SUBPARTITION `subpart41` ENGINE = TokuDB,
+ SUBPARTITION `subpart42` ENGINE = TokuDB))
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -15012,27 +15012,27 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
`f_int2` mediumint(9) DEFAULT NULL,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL,
UNIQUE KEY `uidx1` (`f_int1`,`f_int2`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (ABS(MOD(f_int1,3)))
-SUBPARTITION BY HASH (f_int1 + 1)
-(PARTITION part1 VALUES IN (0)
- (SUBPARTITION sp11 ENGINE = TokuDB,
- SUBPARTITION sp12 ENGINE = TokuDB),
- PARTITION part2 VALUES IN (1)
- (SUBPARTITION sp21 ENGINE = TokuDB,
- SUBPARTITION sp22 ENGINE = TokuDB),
- PARTITION part3 VALUES IN (2)
- (SUBPARTITION sp31 ENGINE = TokuDB,
- SUBPARTITION sp32 ENGINE = TokuDB),
- PARTITION part4 VALUES IN (NULL)
- (SUBPARTITION sp41 ENGINE = TokuDB,
- SUBPARTITION sp42 ENGINE = TokuDB)) */
+ PARTITION BY LIST (abs(`f_int1` % 3))
+SUBPARTITION BY HASH (`f_int1` + 1)
+(PARTITION `part1` VALUES IN (0)
+ (SUBPARTITION `sp11` ENGINE = TokuDB,
+ SUBPARTITION `sp12` ENGINE = TokuDB),
+ PARTITION `part2` VALUES IN (1)
+ (SUBPARTITION `sp21` ENGINE = TokuDB,
+ SUBPARTITION `sp22` ENGINE = TokuDB),
+ PARTITION `part3` VALUES IN (2)
+ (SUBPARTITION `sp31` ENGINE = TokuDB,
+ SUBPARTITION `sp32` ENGINE = TokuDB),
+ PARTITION `part4` VALUES IN (NULL)
+ (SUBPARTITION `sp41` ENGINE = TokuDB,
+ SUBPARTITION `sp42` ENGINE = TokuDB))
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -15538,19 +15538,19 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
`f_int2` mediumint(9) DEFAULT NULL,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL,
UNIQUE KEY `uidx1` (`f_int1`,`f_int2`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (ABS(MOD(f_int1,2)))
-SUBPARTITION BY KEY (f_int1)
+ PARTITION BY LIST (abs(`f_int1` % 2))
+SUBPARTITION BY KEY (`f_int1`)
SUBPARTITIONS 3
-(PARTITION part1 VALUES IN (0) ENGINE = TokuDB,
- PARTITION part2 VALUES IN (1) ENGINE = TokuDB,
- PARTITION part3 VALUES IN (NULL) ENGINE = TokuDB) */
+(PARTITION `part1` VALUES IN (0) ENGINE = TokuDB,
+ PARTITION `part2` VALUES IN (1) ENGINE = TokuDB,
+ PARTITION `part3` VALUES IN (NULL) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -16053,15 +16053,15 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
`f_int2` mediumint(9) DEFAULT NULL,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL,
UNIQUE KEY `uidx1` (`f_int2`,`f_int1`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY HASH (f_int1)
-PARTITIONS 2 */
+ PARTITION BY HASH (`f_int1`)
+PARTITIONS 2
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -16563,15 +16563,15 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
`f_int2` mediumint(9) DEFAULT NULL,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL,
UNIQUE KEY `uidx1` (`f_int2`,`f_int1`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY KEY (f_int1)
-PARTITIONS 5 */
+ PARTITION BY KEY (`f_int1`)
+PARTITIONS 5
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -17081,22 +17081,22 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
`f_int2` mediumint(9) DEFAULT NULL,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL,
UNIQUE KEY `uidx1` (`f_int2`,`f_int1`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (MOD(f_int1,4))
-(PARTITION part_3 VALUES IN (-3) ENGINE = TokuDB,
- PARTITION part_2 VALUES IN (-2) ENGINE = TokuDB,
- PARTITION part_1 VALUES IN (-1) ENGINE = TokuDB,
- PARTITION part_N VALUES IN (NULL) ENGINE = TokuDB,
- PARTITION part0 VALUES IN (0) ENGINE = TokuDB,
- PARTITION part1 VALUES IN (1) ENGINE = TokuDB,
- PARTITION part2 VALUES IN (2) ENGINE = TokuDB,
- PARTITION part3 VALUES IN (3) ENGINE = TokuDB) */
+ PARTITION BY LIST (`f_int1` % 4)
+(PARTITION `part_3` VALUES IN (-3) ENGINE = TokuDB,
+ PARTITION `part_2` VALUES IN (-2) ENGINE = TokuDB,
+ PARTITION `part_1` VALUES IN (-1) ENGINE = TokuDB,
+ PARTITION `part_N` VALUES IN (NULL) ENGINE = TokuDB,
+ PARTITION `part0` VALUES IN (0) ENGINE = TokuDB,
+ PARTITION `part1` VALUES IN (1) ENGINE = TokuDB,
+ PARTITION `part2` VALUES IN (2) ENGINE = TokuDB,
+ PARTITION `part3` VALUES IN (3) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -17604,20 +17604,20 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
`f_int2` mediumint(9) DEFAULT NULL,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL,
UNIQUE KEY `uidx1` (`f_int2`,`f_int1`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (f_int1)
-(PARTITION parta VALUES LESS THAN (0) ENGINE = TokuDB,
- PARTITION partb VALUES LESS THAN (5) ENGINE = TokuDB,
- PARTITION partc VALUES LESS THAN (10) ENGINE = TokuDB,
- PARTITION partd VALUES LESS THAN (15) ENGINE = TokuDB,
- PARTITION parte VALUES LESS THAN (20) ENGINE = TokuDB,
- PARTITION partf VALUES LESS THAN (2147483646) ENGINE = TokuDB) */
+ PARTITION BY RANGE (`f_int1`)
+(PARTITION `parta` VALUES LESS THAN (0) ENGINE = TokuDB,
+ PARTITION `partb` VALUES LESS THAN (5) ENGINE = TokuDB,
+ PARTITION `partc` VALUES LESS THAN (10) ENGINE = TokuDB,
+ PARTITION `partd` VALUES LESS THAN (15) ENGINE = TokuDB,
+ PARTITION `parte` VALUES LESS THAN (20) ENGINE = TokuDB,
+ PARTITION `partf` VALUES LESS THAN (2147483646) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -18119,20 +18119,20 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
`f_int2` mediumint(9) DEFAULT NULL,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL,
UNIQUE KEY `uidx1` (`f_int2`,`f_int1`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (f_int1 DIV 2)
-SUBPARTITION BY HASH (f_int1)
+ PARTITION BY RANGE (`f_int1` DIV 2)
+SUBPARTITION BY HASH (`f_int1`)
SUBPARTITIONS 2
-(PARTITION parta VALUES LESS THAN (0) ENGINE = TokuDB,
- PARTITION partb VALUES LESS THAN (5) ENGINE = TokuDB,
- PARTITION partc VALUES LESS THAN (10) ENGINE = TokuDB,
- PARTITION partd VALUES LESS THAN (2147483646) ENGINE = TokuDB) */
+(PARTITION `parta` VALUES LESS THAN (0) ENGINE = TokuDB,
+ PARTITION `partb` VALUES LESS THAN (5) ENGINE = TokuDB,
+ PARTITION `partc` VALUES LESS THAN (10) ENGINE = TokuDB,
+ PARTITION `partd` VALUES LESS THAN (2147483646) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -18642,27 +18642,27 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
`f_int2` mediumint(9) DEFAULT NULL,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL,
UNIQUE KEY `uidx1` (`f_int2`,`f_int1`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (f_int1)
-SUBPARTITION BY KEY (f_int1)
-(PARTITION part1 VALUES LESS THAN (0)
- (SUBPARTITION subpart11 ENGINE = TokuDB,
- SUBPARTITION subpart12 ENGINE = TokuDB),
- PARTITION part2 VALUES LESS THAN (5)
- (SUBPARTITION subpart21 ENGINE = TokuDB,
- SUBPARTITION subpart22 ENGINE = TokuDB),
- PARTITION part3 VALUES LESS THAN (10)
- (SUBPARTITION subpart31 ENGINE = TokuDB,
- SUBPARTITION subpart32 ENGINE = TokuDB),
- PARTITION part4 VALUES LESS THAN (2147483646)
- (SUBPARTITION subpart41 ENGINE = TokuDB,
- SUBPARTITION subpart42 ENGINE = TokuDB)) */
+ PARTITION BY RANGE (`f_int1`)
+SUBPARTITION BY KEY (`f_int1`)
+(PARTITION `part1` VALUES LESS THAN (0)
+ (SUBPARTITION `subpart11` ENGINE = TokuDB,
+ SUBPARTITION `subpart12` ENGINE = TokuDB),
+ PARTITION `part2` VALUES LESS THAN (5)
+ (SUBPARTITION `subpart21` ENGINE = TokuDB,
+ SUBPARTITION `subpart22` ENGINE = TokuDB),
+ PARTITION `part3` VALUES LESS THAN (10)
+ (SUBPARTITION `subpart31` ENGINE = TokuDB,
+ SUBPARTITION `subpart32` ENGINE = TokuDB),
+ PARTITION `part4` VALUES LESS THAN (2147483646)
+ (SUBPARTITION `subpart41` ENGINE = TokuDB,
+ SUBPARTITION `subpart42` ENGINE = TokuDB))
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -19168,27 +19168,27 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
`f_int2` mediumint(9) DEFAULT NULL,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL,
UNIQUE KEY `uidx1` (`f_int2`,`f_int1`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (ABS(MOD(f_int1,3)))
-SUBPARTITION BY HASH (f_int1 + 1)
-(PARTITION part1 VALUES IN (0)
- (SUBPARTITION sp11 ENGINE = TokuDB,
- SUBPARTITION sp12 ENGINE = TokuDB),
- PARTITION part2 VALUES IN (1)
- (SUBPARTITION sp21 ENGINE = TokuDB,
- SUBPARTITION sp22 ENGINE = TokuDB),
- PARTITION part3 VALUES IN (2)
- (SUBPARTITION sp31 ENGINE = TokuDB,
- SUBPARTITION sp32 ENGINE = TokuDB),
- PARTITION part4 VALUES IN (NULL)
- (SUBPARTITION sp41 ENGINE = TokuDB,
- SUBPARTITION sp42 ENGINE = TokuDB)) */
+ PARTITION BY LIST (abs(`f_int1` % 3))
+SUBPARTITION BY HASH (`f_int1` + 1)
+(PARTITION `part1` VALUES IN (0)
+ (SUBPARTITION `sp11` ENGINE = TokuDB,
+ SUBPARTITION `sp12` ENGINE = TokuDB),
+ PARTITION `part2` VALUES IN (1)
+ (SUBPARTITION `sp21` ENGINE = TokuDB,
+ SUBPARTITION `sp22` ENGINE = TokuDB),
+ PARTITION `part3` VALUES IN (2)
+ (SUBPARTITION `sp31` ENGINE = TokuDB,
+ SUBPARTITION `sp32` ENGINE = TokuDB),
+ PARTITION `part4` VALUES IN (NULL)
+ (SUBPARTITION `sp41` ENGINE = TokuDB,
+ SUBPARTITION `sp42` ENGINE = TokuDB))
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -19694,19 +19694,19 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
`f_int2` mediumint(9) DEFAULT NULL,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL,
UNIQUE KEY `uidx1` (`f_int2`,`f_int1`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (ABS(MOD(f_int1,2)))
-SUBPARTITION BY KEY (f_int1)
+ PARTITION BY LIST (abs(`f_int1` % 2))
+SUBPARTITION BY KEY (`f_int1`)
SUBPARTITIONS 3
-(PARTITION part1 VALUES IN (0) ENGINE = TokuDB,
- PARTITION part2 VALUES IN (1) ENGINE = TokuDB,
- PARTITION part3 VALUES IN (NULL) ENGINE = TokuDB) */
+(PARTITION `part1` VALUES IN (0) ENGINE = TokuDB,
+ PARTITION `part2` VALUES IN (1) ENGINE = TokuDB,
+ PARTITION `part3` VALUES IN (NULL) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
diff --git a/storage/tokudb/mysql-test/tokudb_parts/r/partition_alter2_2_2_tokudb.result b/storage/tokudb/mysql-test/tokudb_parts/r/partition_alter2_2_2_tokudb.result
index 993025c9fb2..36be0936c09 100644
--- a/storage/tokudb/mysql-test/tokudb_parts/r/partition_alter2_2_2_tokudb.result
+++ b/storage/tokudb/mysql-test/tokudb_parts/r/partition_alter2_2_2_tokudb.result
@@ -69,8 +69,8 @@ t1 CREATE TABLE `t1` (
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY HASH (f_int1 + f_int2)
-PARTITIONS 2 */
+ PARTITION BY HASH (`f_int1` + `f_int2`)
+PARTITIONS 2
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -528,8 +528,8 @@ t1 CREATE TABLE `t1` (
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY KEY (f_int1,f_int2)
-PARTITIONS 5 */
+ PARTITION BY KEY (`f_int1`,`f_int2`)
+PARTITIONS 5
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -995,15 +995,15 @@ t1 CREATE TABLE `t1` (
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (MOD(f_int1 + f_int2,4))
-(PARTITION part_3 VALUES IN (-3) ENGINE = TokuDB,
- PARTITION part_2 VALUES IN (-2) ENGINE = TokuDB,
- PARTITION part_1 VALUES IN (-1) ENGINE = TokuDB,
- PARTITION part_N VALUES IN (NULL) ENGINE = TokuDB,
- PARTITION part0 VALUES IN (0) ENGINE = TokuDB,
- PARTITION part1 VALUES IN (1) ENGINE = TokuDB,
- PARTITION part2 VALUES IN (2) ENGINE = TokuDB,
- PARTITION part3 VALUES IN (3) ENGINE = TokuDB) */
+ PARTITION BY LIST ((`f_int1` + `f_int2`) % 4)
+(PARTITION `part_3` VALUES IN (-3) ENGINE = TokuDB,
+ PARTITION `part_2` VALUES IN (-2) ENGINE = TokuDB,
+ PARTITION `part_1` VALUES IN (-1) ENGINE = TokuDB,
+ PARTITION `part_N` VALUES IN (NULL) ENGINE = TokuDB,
+ PARTITION `part0` VALUES IN (0) ENGINE = TokuDB,
+ PARTITION `part1` VALUES IN (1) ENGINE = TokuDB,
+ PARTITION `part2` VALUES IN (2) ENGINE = TokuDB,
+ PARTITION `part3` VALUES IN (3) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -1467,13 +1467,13 @@ t1 CREATE TABLE `t1` (
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE ((f_int1 + f_int2) DIV 2)
-(PARTITION parta VALUES LESS THAN (0) ENGINE = TokuDB,
- PARTITION partb VALUES LESS THAN (5) ENGINE = TokuDB,
- PARTITION partc VALUES LESS THAN (10) ENGINE = TokuDB,
- PARTITION partd VALUES LESS THAN (15) ENGINE = TokuDB,
- PARTITION parte VALUES LESS THAN (20) ENGINE = TokuDB,
- PARTITION partf VALUES LESS THAN (2147483646) ENGINE = TokuDB) */
+ PARTITION BY RANGE ((`f_int1` + `f_int2`) DIV 2)
+(PARTITION `parta` VALUES LESS THAN (0) ENGINE = TokuDB,
+ PARTITION `partb` VALUES LESS THAN (5) ENGINE = TokuDB,
+ PARTITION `partc` VALUES LESS THAN (10) ENGINE = TokuDB,
+ PARTITION `partd` VALUES LESS THAN (15) ENGINE = TokuDB,
+ PARTITION `parte` VALUES LESS THAN (20) ENGINE = TokuDB,
+ PARTITION `partf` VALUES LESS THAN (2147483646) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -1935,13 +1935,13 @@ t1 CREATE TABLE `t1` (
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (f_int1)
-SUBPARTITION BY HASH (f_int2)
+ PARTITION BY RANGE (`f_int1`)
+SUBPARTITION BY HASH (`f_int2`)
SUBPARTITIONS 2
-(PARTITION parta VALUES LESS THAN (0) ENGINE = TokuDB,
- PARTITION partb VALUES LESS THAN (5) ENGINE = TokuDB,
- PARTITION partc VALUES LESS THAN (10) ENGINE = TokuDB,
- PARTITION partd VALUES LESS THAN (2147483646) ENGINE = TokuDB) */
+(PARTITION `parta` VALUES LESS THAN (0) ENGINE = TokuDB,
+ PARTITION `partb` VALUES LESS THAN (5) ENGINE = TokuDB,
+ PARTITION `partc` VALUES LESS THAN (10) ENGINE = TokuDB,
+ PARTITION `partd` VALUES LESS THAN (2147483646) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -2407,20 +2407,20 @@ t1 CREATE TABLE `t1` (
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (f_int1)
-SUBPARTITION BY KEY (f_int2)
-(PARTITION part1 VALUES LESS THAN (0)
- (SUBPARTITION subpart11 ENGINE = TokuDB,
- SUBPARTITION subpart12 ENGINE = TokuDB),
- PARTITION part2 VALUES LESS THAN (5)
- (SUBPARTITION subpart21 ENGINE = TokuDB,
- SUBPARTITION subpart22 ENGINE = TokuDB),
- PARTITION part3 VALUES LESS THAN (10)
- (SUBPARTITION subpart31 ENGINE = TokuDB,
- SUBPARTITION subpart32 ENGINE = TokuDB),
- PARTITION part4 VALUES LESS THAN (2147483646)
- (SUBPARTITION subpart41 ENGINE = TokuDB,
- SUBPARTITION subpart42 ENGINE = TokuDB)) */
+ PARTITION BY RANGE (`f_int1`)
+SUBPARTITION BY KEY (`f_int2`)
+(PARTITION `part1` VALUES LESS THAN (0)
+ (SUBPARTITION `subpart11` ENGINE = TokuDB,
+ SUBPARTITION `subpart12` ENGINE = TokuDB),
+ PARTITION `part2` VALUES LESS THAN (5)
+ (SUBPARTITION `subpart21` ENGINE = TokuDB,
+ SUBPARTITION `subpart22` ENGINE = TokuDB),
+ PARTITION `part3` VALUES LESS THAN (10)
+ (SUBPARTITION `subpart31` ENGINE = TokuDB,
+ SUBPARTITION `subpart32` ENGINE = TokuDB),
+ PARTITION `part4` VALUES LESS THAN (2147483646)
+ (SUBPARTITION `subpart41` ENGINE = TokuDB,
+ SUBPARTITION `subpart42` ENGINE = TokuDB))
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -2886,20 +2886,20 @@ t1 CREATE TABLE `t1` (
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (ABS(MOD(f_int1,3)))
-SUBPARTITION BY HASH (f_int2 + 1)
-(PARTITION part1 VALUES IN (0)
- (SUBPARTITION sp11 ENGINE = TokuDB,
- SUBPARTITION sp12 ENGINE = TokuDB),
- PARTITION part2 VALUES IN (1)
- (SUBPARTITION sp21 ENGINE = TokuDB,
- SUBPARTITION sp22 ENGINE = TokuDB),
- PARTITION part3 VALUES IN (2)
- (SUBPARTITION sp31 ENGINE = TokuDB,
- SUBPARTITION sp32 ENGINE = TokuDB),
- PARTITION part4 VALUES IN (NULL)
- (SUBPARTITION sp41 ENGINE = TokuDB,
- SUBPARTITION sp42 ENGINE = TokuDB)) */
+ PARTITION BY LIST (abs(`f_int1` % 3))
+SUBPARTITION BY HASH (`f_int2` + 1)
+(PARTITION `part1` VALUES IN (0)
+ (SUBPARTITION `sp11` ENGINE = TokuDB,
+ SUBPARTITION `sp12` ENGINE = TokuDB),
+ PARTITION `part2` VALUES IN (1)
+ (SUBPARTITION `sp21` ENGINE = TokuDB,
+ SUBPARTITION `sp22` ENGINE = TokuDB),
+ PARTITION `part3` VALUES IN (2)
+ (SUBPARTITION `sp31` ENGINE = TokuDB,
+ SUBPARTITION `sp32` ENGINE = TokuDB),
+ PARTITION `part4` VALUES IN (NULL)
+ (SUBPARTITION `sp41` ENGINE = TokuDB,
+ SUBPARTITION `sp42` ENGINE = TokuDB))
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -3361,12 +3361,12 @@ t1 CREATE TABLE `t1` (
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (ABS(MOD(f_int1,2)))
-SUBPARTITION BY KEY (f_int2)
+ PARTITION BY LIST (abs(`f_int1` % 2))
+SUBPARTITION BY KEY (`f_int2`)
SUBPARTITIONS 3
-(PARTITION part1 VALUES IN (0) ENGINE = TokuDB,
- PARTITION part2 VALUES IN (1) ENGINE = TokuDB,
- PARTITION part3 VALUES IN (NULL) ENGINE = TokuDB) */
+(PARTITION `part1` VALUES IN (0) ENGINE = TokuDB,
+ PARTITION `part2` VALUES IN (1) ENGINE = TokuDB,
+ PARTITION `part3` VALUES IN (NULL) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -3827,8 +3827,8 @@ t1 CREATE TABLE `t1` (
`f_charbig` varchar(1000) DEFAULT NULL,
PRIMARY KEY (`f_int1`,`f_int2`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY HASH (f_int1 + f_int2)
-PARTITIONS 2 */
+ PARTITION BY HASH (`f_int1` + `f_int2`)
+PARTITIONS 2
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -4325,8 +4325,8 @@ t1 CREATE TABLE `t1` (
`f_charbig` varchar(1000) DEFAULT NULL,
PRIMARY KEY (`f_int1`,`f_int2`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY KEY (f_int1,f_int2)
-PARTITIONS 5 */
+ PARTITION BY KEY (`f_int1`,`f_int2`)
+PARTITIONS 5
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -4831,15 +4831,15 @@ t1 CREATE TABLE `t1` (
`f_charbig` varchar(1000) DEFAULT NULL,
PRIMARY KEY (`f_int1`,`f_int2`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (MOD(f_int1 + f_int2,4))
-(PARTITION part_3 VALUES IN (-3) ENGINE = TokuDB,
- PARTITION part_2 VALUES IN (-2) ENGINE = TokuDB,
- PARTITION part_1 VALUES IN (-1) ENGINE = TokuDB,
- PARTITION part_N VALUES IN (NULL) ENGINE = TokuDB,
- PARTITION part0 VALUES IN (0) ENGINE = TokuDB,
- PARTITION part1 VALUES IN (1) ENGINE = TokuDB,
- PARTITION part2 VALUES IN (2) ENGINE = TokuDB,
- PARTITION part3 VALUES IN (3) ENGINE = TokuDB) */
+ PARTITION BY LIST ((`f_int1` + `f_int2`) % 4)
+(PARTITION `part_3` VALUES IN (-3) ENGINE = TokuDB,
+ PARTITION `part_2` VALUES IN (-2) ENGINE = TokuDB,
+ PARTITION `part_1` VALUES IN (-1) ENGINE = TokuDB,
+ PARTITION `part_N` VALUES IN (NULL) ENGINE = TokuDB,
+ PARTITION `part0` VALUES IN (0) ENGINE = TokuDB,
+ PARTITION `part1` VALUES IN (1) ENGINE = TokuDB,
+ PARTITION `part2` VALUES IN (2) ENGINE = TokuDB,
+ PARTITION `part3` VALUES IN (3) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -5342,13 +5342,13 @@ t1 CREATE TABLE `t1` (
`f_charbig` varchar(1000) DEFAULT NULL,
PRIMARY KEY (`f_int1`,`f_int2`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE ((f_int1 + f_int2) DIV 2)
-(PARTITION parta VALUES LESS THAN (0) ENGINE = TokuDB,
- PARTITION partb VALUES LESS THAN (5) ENGINE = TokuDB,
- PARTITION partc VALUES LESS THAN (10) ENGINE = TokuDB,
- PARTITION partd VALUES LESS THAN (15) ENGINE = TokuDB,
- PARTITION parte VALUES LESS THAN (20) ENGINE = TokuDB,
- PARTITION partf VALUES LESS THAN (2147483646) ENGINE = TokuDB) */
+ PARTITION BY RANGE ((`f_int1` + `f_int2`) DIV 2)
+(PARTITION `parta` VALUES LESS THAN (0) ENGINE = TokuDB,
+ PARTITION `partb` VALUES LESS THAN (5) ENGINE = TokuDB,
+ PARTITION `partc` VALUES LESS THAN (10) ENGINE = TokuDB,
+ PARTITION `partd` VALUES LESS THAN (15) ENGINE = TokuDB,
+ PARTITION `parte` VALUES LESS THAN (20) ENGINE = TokuDB,
+ PARTITION `partf` VALUES LESS THAN (2147483646) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -5849,13 +5849,13 @@ t1 CREATE TABLE `t1` (
`f_charbig` varchar(1000) DEFAULT NULL,
PRIMARY KEY (`f_int1`,`f_int2`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (f_int1)
-SUBPARTITION BY HASH (f_int2)
+ PARTITION BY RANGE (`f_int1`)
+SUBPARTITION BY HASH (`f_int2`)
SUBPARTITIONS 2
-(PARTITION parta VALUES LESS THAN (0) ENGINE = TokuDB,
- PARTITION partb VALUES LESS THAN (5) ENGINE = TokuDB,
- PARTITION partc VALUES LESS THAN (10) ENGINE = TokuDB,
- PARTITION partd VALUES LESS THAN (2147483646) ENGINE = TokuDB) */
+(PARTITION `parta` VALUES LESS THAN (0) ENGINE = TokuDB,
+ PARTITION `partb` VALUES LESS THAN (5) ENGINE = TokuDB,
+ PARTITION `partc` VALUES LESS THAN (10) ENGINE = TokuDB,
+ PARTITION `partd` VALUES LESS THAN (2147483646) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -6360,20 +6360,20 @@ t1 CREATE TABLE `t1` (
`f_charbig` varchar(1000) DEFAULT NULL,
PRIMARY KEY (`f_int1`,`f_int2`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (f_int1)
-SUBPARTITION BY KEY (f_int2)
-(PARTITION part1 VALUES LESS THAN (0)
- (SUBPARTITION subpart11 ENGINE = TokuDB,
- SUBPARTITION subpart12 ENGINE = TokuDB),
- PARTITION part2 VALUES LESS THAN (5)
- (SUBPARTITION subpart21 ENGINE = TokuDB,
- SUBPARTITION subpart22 ENGINE = TokuDB),
- PARTITION part3 VALUES LESS THAN (10)
- (SUBPARTITION subpart31 ENGINE = TokuDB,
- SUBPARTITION subpart32 ENGINE = TokuDB),
- PARTITION part4 VALUES LESS THAN (2147483646)
- (SUBPARTITION subpart41 ENGINE = TokuDB,
- SUBPARTITION subpart42 ENGINE = TokuDB)) */
+ PARTITION BY RANGE (`f_int1`)
+SUBPARTITION BY KEY (`f_int2`)
+(PARTITION `part1` VALUES LESS THAN (0)
+ (SUBPARTITION `subpart11` ENGINE = TokuDB,
+ SUBPARTITION `subpart12` ENGINE = TokuDB),
+ PARTITION `part2` VALUES LESS THAN (5)
+ (SUBPARTITION `subpart21` ENGINE = TokuDB,
+ SUBPARTITION `subpart22` ENGINE = TokuDB),
+ PARTITION `part3` VALUES LESS THAN (10)
+ (SUBPARTITION `subpart31` ENGINE = TokuDB,
+ SUBPARTITION `subpart32` ENGINE = TokuDB),
+ PARTITION `part4` VALUES LESS THAN (2147483646)
+ (SUBPARTITION `subpart41` ENGINE = TokuDB,
+ SUBPARTITION `subpart42` ENGINE = TokuDB))
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -6878,20 +6878,20 @@ t1 CREATE TABLE `t1` (
`f_charbig` varchar(1000) DEFAULT NULL,
PRIMARY KEY (`f_int1`,`f_int2`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (ABS(MOD(f_int1,3)))
-SUBPARTITION BY HASH (f_int2 + 1)
-(PARTITION part1 VALUES IN (0)
- (SUBPARTITION sp11 ENGINE = TokuDB,
- SUBPARTITION sp12 ENGINE = TokuDB),
- PARTITION part2 VALUES IN (1)
- (SUBPARTITION sp21 ENGINE = TokuDB,
- SUBPARTITION sp22 ENGINE = TokuDB),
- PARTITION part3 VALUES IN (2)
- (SUBPARTITION sp31 ENGINE = TokuDB,
- SUBPARTITION sp32 ENGINE = TokuDB),
- PARTITION part4 VALUES IN (NULL)
- (SUBPARTITION sp41 ENGINE = TokuDB,
- SUBPARTITION sp42 ENGINE = TokuDB)) */
+ PARTITION BY LIST (abs(`f_int1` % 3))
+SUBPARTITION BY HASH (`f_int2` + 1)
+(PARTITION `part1` VALUES IN (0)
+ (SUBPARTITION `sp11` ENGINE = TokuDB,
+ SUBPARTITION `sp12` ENGINE = TokuDB),
+ PARTITION `part2` VALUES IN (1)
+ (SUBPARTITION `sp21` ENGINE = TokuDB,
+ SUBPARTITION `sp22` ENGINE = TokuDB),
+ PARTITION `part3` VALUES IN (2)
+ (SUBPARTITION `sp31` ENGINE = TokuDB,
+ SUBPARTITION `sp32` ENGINE = TokuDB),
+ PARTITION `part4` VALUES IN (NULL)
+ (SUBPARTITION `sp41` ENGINE = TokuDB,
+ SUBPARTITION `sp42` ENGINE = TokuDB))
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -7392,12 +7392,12 @@ t1 CREATE TABLE `t1` (
`f_charbig` varchar(1000) DEFAULT NULL,
PRIMARY KEY (`f_int1`,`f_int2`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (ABS(MOD(f_int1,2)))
-SUBPARTITION BY KEY (f_int2)
+ PARTITION BY LIST (abs(`f_int1` % 2))
+SUBPARTITION BY KEY (`f_int2`)
SUBPARTITIONS 3
-(PARTITION part1 VALUES IN (0) ENGINE = TokuDB,
- PARTITION part2 VALUES IN (1) ENGINE = TokuDB,
- PARTITION part3 VALUES IN (NULL) ENGINE = TokuDB) */
+(PARTITION `part1` VALUES IN (0) ENGINE = TokuDB,
+ PARTITION `part2` VALUES IN (1) ENGINE = TokuDB,
+ PARTITION `part3` VALUES IN (NULL) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -7895,8 +7895,8 @@ t1 CREATE TABLE `t1` (
`f_charbig` varchar(1000) DEFAULT NULL,
PRIMARY KEY (`f_int2`,`f_int1`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY HASH (f_int1 + f_int2)
-PARTITIONS 2 */
+ PARTITION BY HASH (`f_int1` + `f_int2`)
+PARTITIONS 2
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -8393,8 +8393,8 @@ t1 CREATE TABLE `t1` (
`f_charbig` varchar(1000) DEFAULT NULL,
PRIMARY KEY (`f_int2`,`f_int1`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY KEY (f_int1,f_int2)
-PARTITIONS 5 */
+ PARTITION BY KEY (`f_int1`,`f_int2`)
+PARTITIONS 5
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -8899,15 +8899,15 @@ t1 CREATE TABLE `t1` (
`f_charbig` varchar(1000) DEFAULT NULL,
PRIMARY KEY (`f_int2`,`f_int1`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (MOD(f_int1 + f_int2,4))
-(PARTITION part_3 VALUES IN (-3) ENGINE = TokuDB,
- PARTITION part_2 VALUES IN (-2) ENGINE = TokuDB,
- PARTITION part_1 VALUES IN (-1) ENGINE = TokuDB,
- PARTITION part_N VALUES IN (NULL) ENGINE = TokuDB,
- PARTITION part0 VALUES IN (0) ENGINE = TokuDB,
- PARTITION part1 VALUES IN (1) ENGINE = TokuDB,
- PARTITION part2 VALUES IN (2) ENGINE = TokuDB,
- PARTITION part3 VALUES IN (3) ENGINE = TokuDB) */
+ PARTITION BY LIST ((`f_int1` + `f_int2`) % 4)
+(PARTITION `part_3` VALUES IN (-3) ENGINE = TokuDB,
+ PARTITION `part_2` VALUES IN (-2) ENGINE = TokuDB,
+ PARTITION `part_1` VALUES IN (-1) ENGINE = TokuDB,
+ PARTITION `part_N` VALUES IN (NULL) ENGINE = TokuDB,
+ PARTITION `part0` VALUES IN (0) ENGINE = TokuDB,
+ PARTITION `part1` VALUES IN (1) ENGINE = TokuDB,
+ PARTITION `part2` VALUES IN (2) ENGINE = TokuDB,
+ PARTITION `part3` VALUES IN (3) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -9410,13 +9410,13 @@ t1 CREATE TABLE `t1` (
`f_charbig` varchar(1000) DEFAULT NULL,
PRIMARY KEY (`f_int2`,`f_int1`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE ((f_int1 + f_int2) DIV 2)
-(PARTITION parta VALUES LESS THAN (0) ENGINE = TokuDB,
- PARTITION partb VALUES LESS THAN (5) ENGINE = TokuDB,
- PARTITION partc VALUES LESS THAN (10) ENGINE = TokuDB,
- PARTITION partd VALUES LESS THAN (15) ENGINE = TokuDB,
- PARTITION parte VALUES LESS THAN (20) ENGINE = TokuDB,
- PARTITION partf VALUES LESS THAN (2147483646) ENGINE = TokuDB) */
+ PARTITION BY RANGE ((`f_int1` + `f_int2`) DIV 2)
+(PARTITION `parta` VALUES LESS THAN (0) ENGINE = TokuDB,
+ PARTITION `partb` VALUES LESS THAN (5) ENGINE = TokuDB,
+ PARTITION `partc` VALUES LESS THAN (10) ENGINE = TokuDB,
+ PARTITION `partd` VALUES LESS THAN (15) ENGINE = TokuDB,
+ PARTITION `parte` VALUES LESS THAN (20) ENGINE = TokuDB,
+ PARTITION `partf` VALUES LESS THAN (2147483646) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -9917,13 +9917,13 @@ t1 CREATE TABLE `t1` (
`f_charbig` varchar(1000) DEFAULT NULL,
PRIMARY KEY (`f_int2`,`f_int1`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (f_int1)
-SUBPARTITION BY HASH (f_int2)
+ PARTITION BY RANGE (`f_int1`)
+SUBPARTITION BY HASH (`f_int2`)
SUBPARTITIONS 2
-(PARTITION parta VALUES LESS THAN (0) ENGINE = TokuDB,
- PARTITION partb VALUES LESS THAN (5) ENGINE = TokuDB,
- PARTITION partc VALUES LESS THAN (10) ENGINE = TokuDB,
- PARTITION partd VALUES LESS THAN (2147483646) ENGINE = TokuDB) */
+(PARTITION `parta` VALUES LESS THAN (0) ENGINE = TokuDB,
+ PARTITION `partb` VALUES LESS THAN (5) ENGINE = TokuDB,
+ PARTITION `partc` VALUES LESS THAN (10) ENGINE = TokuDB,
+ PARTITION `partd` VALUES LESS THAN (2147483646) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -10428,20 +10428,20 @@ t1 CREATE TABLE `t1` (
`f_charbig` varchar(1000) DEFAULT NULL,
PRIMARY KEY (`f_int2`,`f_int1`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (f_int1)
-SUBPARTITION BY KEY (f_int2)
-(PARTITION part1 VALUES LESS THAN (0)
- (SUBPARTITION subpart11 ENGINE = TokuDB,
- SUBPARTITION subpart12 ENGINE = TokuDB),
- PARTITION part2 VALUES LESS THAN (5)
- (SUBPARTITION subpart21 ENGINE = TokuDB,
- SUBPARTITION subpart22 ENGINE = TokuDB),
- PARTITION part3 VALUES LESS THAN (10)
- (SUBPARTITION subpart31 ENGINE = TokuDB,
- SUBPARTITION subpart32 ENGINE = TokuDB),
- PARTITION part4 VALUES LESS THAN (2147483646)
- (SUBPARTITION subpart41 ENGINE = TokuDB,
- SUBPARTITION subpart42 ENGINE = TokuDB)) */
+ PARTITION BY RANGE (`f_int1`)
+SUBPARTITION BY KEY (`f_int2`)
+(PARTITION `part1` VALUES LESS THAN (0)
+ (SUBPARTITION `subpart11` ENGINE = TokuDB,
+ SUBPARTITION `subpart12` ENGINE = TokuDB),
+ PARTITION `part2` VALUES LESS THAN (5)
+ (SUBPARTITION `subpart21` ENGINE = TokuDB,
+ SUBPARTITION `subpart22` ENGINE = TokuDB),
+ PARTITION `part3` VALUES LESS THAN (10)
+ (SUBPARTITION `subpart31` ENGINE = TokuDB,
+ SUBPARTITION `subpart32` ENGINE = TokuDB),
+ PARTITION `part4` VALUES LESS THAN (2147483646)
+ (SUBPARTITION `subpart41` ENGINE = TokuDB,
+ SUBPARTITION `subpart42` ENGINE = TokuDB))
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -10946,20 +10946,20 @@ t1 CREATE TABLE `t1` (
`f_charbig` varchar(1000) DEFAULT NULL,
PRIMARY KEY (`f_int2`,`f_int1`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (ABS(MOD(f_int1,3)))
-SUBPARTITION BY HASH (f_int2 + 1)
-(PARTITION part1 VALUES IN (0)
- (SUBPARTITION sp11 ENGINE = TokuDB,
- SUBPARTITION sp12 ENGINE = TokuDB),
- PARTITION part2 VALUES IN (1)
- (SUBPARTITION sp21 ENGINE = TokuDB,
- SUBPARTITION sp22 ENGINE = TokuDB),
- PARTITION part3 VALUES IN (2)
- (SUBPARTITION sp31 ENGINE = TokuDB,
- SUBPARTITION sp32 ENGINE = TokuDB),
- PARTITION part4 VALUES IN (NULL)
- (SUBPARTITION sp41 ENGINE = TokuDB,
- SUBPARTITION sp42 ENGINE = TokuDB)) */
+ PARTITION BY LIST (abs(`f_int1` % 3))
+SUBPARTITION BY HASH (`f_int2` + 1)
+(PARTITION `part1` VALUES IN (0)
+ (SUBPARTITION `sp11` ENGINE = TokuDB,
+ SUBPARTITION `sp12` ENGINE = TokuDB),
+ PARTITION `part2` VALUES IN (1)
+ (SUBPARTITION `sp21` ENGINE = TokuDB,
+ SUBPARTITION `sp22` ENGINE = TokuDB),
+ PARTITION `part3` VALUES IN (2)
+ (SUBPARTITION `sp31` ENGINE = TokuDB,
+ SUBPARTITION `sp32` ENGINE = TokuDB),
+ PARTITION `part4` VALUES IN (NULL)
+ (SUBPARTITION `sp41` ENGINE = TokuDB,
+ SUBPARTITION `sp42` ENGINE = TokuDB))
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -11460,12 +11460,12 @@ t1 CREATE TABLE `t1` (
`f_charbig` varchar(1000) DEFAULT NULL,
PRIMARY KEY (`f_int2`,`f_int1`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (ABS(MOD(f_int1,2)))
-SUBPARTITION BY KEY (f_int2)
+ PARTITION BY LIST (abs(`f_int1` % 2))
+SUBPARTITION BY KEY (`f_int2`)
SUBPARTITIONS 3
-(PARTITION part1 VALUES IN (0) ENGINE = TokuDB,
- PARTITION part2 VALUES IN (1) ENGINE = TokuDB,
- PARTITION part3 VALUES IN (NULL) ENGINE = TokuDB) */
+(PARTITION `part1` VALUES IN (0) ENGINE = TokuDB,
+ PARTITION `part2` VALUES IN (1) ENGINE = TokuDB,
+ PARTITION `part3` VALUES IN (NULL) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -11964,8 +11964,8 @@ t1 CREATE TABLE `t1` (
`f_charbig` varchar(1000) DEFAULT NULL,
UNIQUE KEY `uidx` (`f_int1`,`f_int2`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY HASH (f_int1 + f_int2)
-PARTITIONS 2 */
+ PARTITION BY HASH (`f_int1` + `f_int2`)
+PARTITIONS 2
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -12475,8 +12475,8 @@ t1 CREATE TABLE `t1` (
`f_charbig` varchar(1000) DEFAULT NULL,
UNIQUE KEY `uidx` (`f_int1`,`f_int2`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY KEY (f_int1,f_int2)
-PARTITIONS 5 */
+ PARTITION BY KEY (`f_int1`,`f_int2`)
+PARTITIONS 5
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -12994,15 +12994,15 @@ t1 CREATE TABLE `t1` (
`f_charbig` varchar(1000) DEFAULT NULL,
UNIQUE KEY `uidx` (`f_int1`,`f_int2`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (MOD(f_int1 + f_int2,4))
-(PARTITION part_3 VALUES IN (-3) ENGINE = TokuDB,
- PARTITION part_2 VALUES IN (-2) ENGINE = TokuDB,
- PARTITION part_1 VALUES IN (-1) ENGINE = TokuDB,
- PARTITION part_N VALUES IN (NULL) ENGINE = TokuDB,
- PARTITION part0 VALUES IN (0) ENGINE = TokuDB,
- PARTITION part1 VALUES IN (1) ENGINE = TokuDB,
- PARTITION part2 VALUES IN (2) ENGINE = TokuDB,
- PARTITION part3 VALUES IN (3) ENGINE = TokuDB) */
+ PARTITION BY LIST ((`f_int1` + `f_int2`) % 4)
+(PARTITION `part_3` VALUES IN (-3) ENGINE = TokuDB,
+ PARTITION `part_2` VALUES IN (-2) ENGINE = TokuDB,
+ PARTITION `part_1` VALUES IN (-1) ENGINE = TokuDB,
+ PARTITION `part_N` VALUES IN (NULL) ENGINE = TokuDB,
+ PARTITION `part0` VALUES IN (0) ENGINE = TokuDB,
+ PARTITION `part1` VALUES IN (1) ENGINE = TokuDB,
+ PARTITION `part2` VALUES IN (2) ENGINE = TokuDB,
+ PARTITION `part3` VALUES IN (3) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -13518,13 +13518,13 @@ t1 CREATE TABLE `t1` (
`f_charbig` varchar(1000) DEFAULT NULL,
UNIQUE KEY `uidx` (`f_int1`,`f_int2`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE ((f_int1 + f_int2) DIV 2)
-(PARTITION parta VALUES LESS THAN (0) ENGINE = TokuDB,
- PARTITION partb VALUES LESS THAN (5) ENGINE = TokuDB,
- PARTITION partc VALUES LESS THAN (10) ENGINE = TokuDB,
- PARTITION partd VALUES LESS THAN (15) ENGINE = TokuDB,
- PARTITION parte VALUES LESS THAN (20) ENGINE = TokuDB,
- PARTITION partf VALUES LESS THAN (2147483646) ENGINE = TokuDB) */
+ PARTITION BY RANGE ((`f_int1` + `f_int2`) DIV 2)
+(PARTITION `parta` VALUES LESS THAN (0) ENGINE = TokuDB,
+ PARTITION `partb` VALUES LESS THAN (5) ENGINE = TokuDB,
+ PARTITION `partc` VALUES LESS THAN (10) ENGINE = TokuDB,
+ PARTITION `partd` VALUES LESS THAN (15) ENGINE = TokuDB,
+ PARTITION `parte` VALUES LESS THAN (20) ENGINE = TokuDB,
+ PARTITION `partf` VALUES LESS THAN (2147483646) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -14038,13 +14038,13 @@ t1 CREATE TABLE `t1` (
`f_charbig` varchar(1000) DEFAULT NULL,
UNIQUE KEY `uidx` (`f_int1`,`f_int2`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (f_int1)
-SUBPARTITION BY HASH (f_int2)
+ PARTITION BY RANGE (`f_int1`)
+SUBPARTITION BY HASH (`f_int2`)
SUBPARTITIONS 2
-(PARTITION parta VALUES LESS THAN (0) ENGINE = TokuDB,
- PARTITION partb VALUES LESS THAN (5) ENGINE = TokuDB,
- PARTITION partc VALUES LESS THAN (10) ENGINE = TokuDB,
- PARTITION partd VALUES LESS THAN (2147483646) ENGINE = TokuDB) */
+(PARTITION `parta` VALUES LESS THAN (0) ENGINE = TokuDB,
+ PARTITION `partb` VALUES LESS THAN (5) ENGINE = TokuDB,
+ PARTITION `partc` VALUES LESS THAN (10) ENGINE = TokuDB,
+ PARTITION `partd` VALUES LESS THAN (2147483646) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -14562,20 +14562,20 @@ t1 CREATE TABLE `t1` (
`f_charbig` varchar(1000) DEFAULT NULL,
UNIQUE KEY `uidx` (`f_int1`,`f_int2`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (f_int1)
-SUBPARTITION BY KEY (f_int2)
-(PARTITION part1 VALUES LESS THAN (0)
- (SUBPARTITION subpart11 ENGINE = TokuDB,
- SUBPARTITION subpart12 ENGINE = TokuDB),
- PARTITION part2 VALUES LESS THAN (5)
- (SUBPARTITION subpart21 ENGINE = TokuDB,
- SUBPARTITION subpart22 ENGINE = TokuDB),
- PARTITION part3 VALUES LESS THAN (10)
- (SUBPARTITION subpart31 ENGINE = TokuDB,
- SUBPARTITION subpart32 ENGINE = TokuDB),
- PARTITION part4 VALUES LESS THAN (2147483646)
- (SUBPARTITION subpart41 ENGINE = TokuDB,
- SUBPARTITION subpart42 ENGINE = TokuDB)) */
+ PARTITION BY RANGE (`f_int1`)
+SUBPARTITION BY KEY (`f_int2`)
+(PARTITION `part1` VALUES LESS THAN (0)
+ (SUBPARTITION `subpart11` ENGINE = TokuDB,
+ SUBPARTITION `subpart12` ENGINE = TokuDB),
+ PARTITION `part2` VALUES LESS THAN (5)
+ (SUBPARTITION `subpart21` ENGINE = TokuDB,
+ SUBPARTITION `subpart22` ENGINE = TokuDB),
+ PARTITION `part3` VALUES LESS THAN (10)
+ (SUBPARTITION `subpart31` ENGINE = TokuDB,
+ SUBPARTITION `subpart32` ENGINE = TokuDB),
+ PARTITION `part4` VALUES LESS THAN (2147483646)
+ (SUBPARTITION `subpart41` ENGINE = TokuDB,
+ SUBPARTITION `subpart42` ENGINE = TokuDB))
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -15093,20 +15093,20 @@ t1 CREATE TABLE `t1` (
`f_charbig` varchar(1000) DEFAULT NULL,
UNIQUE KEY `uidx` (`f_int1`,`f_int2`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (ABS(MOD(f_int1,3)))
-SUBPARTITION BY HASH (f_int2 + 1)
-(PARTITION part1 VALUES IN (0)
- (SUBPARTITION sp11 ENGINE = TokuDB,
- SUBPARTITION sp12 ENGINE = TokuDB),
- PARTITION part2 VALUES IN (1)
- (SUBPARTITION sp21 ENGINE = TokuDB,
- SUBPARTITION sp22 ENGINE = TokuDB),
- PARTITION part3 VALUES IN (2)
- (SUBPARTITION sp31 ENGINE = TokuDB,
- SUBPARTITION sp32 ENGINE = TokuDB),
- PARTITION part4 VALUES IN (NULL)
- (SUBPARTITION sp41 ENGINE = TokuDB,
- SUBPARTITION sp42 ENGINE = TokuDB)) */
+ PARTITION BY LIST (abs(`f_int1` % 3))
+SUBPARTITION BY HASH (`f_int2` + 1)
+(PARTITION `part1` VALUES IN (0)
+ (SUBPARTITION `sp11` ENGINE = TokuDB,
+ SUBPARTITION `sp12` ENGINE = TokuDB),
+ PARTITION `part2` VALUES IN (1)
+ (SUBPARTITION `sp21` ENGINE = TokuDB,
+ SUBPARTITION `sp22` ENGINE = TokuDB),
+ PARTITION `part3` VALUES IN (2)
+ (SUBPARTITION `sp31` ENGINE = TokuDB,
+ SUBPARTITION `sp32` ENGINE = TokuDB),
+ PARTITION `part4` VALUES IN (NULL)
+ (SUBPARTITION `sp41` ENGINE = TokuDB,
+ SUBPARTITION `sp42` ENGINE = TokuDB))
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -15620,12 +15620,12 @@ t1 CREATE TABLE `t1` (
`f_charbig` varchar(1000) DEFAULT NULL,
UNIQUE KEY `uidx` (`f_int1`,`f_int2`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (ABS(MOD(f_int1,2)))
-SUBPARTITION BY KEY (f_int2)
+ PARTITION BY LIST (abs(`f_int1` % 2))
+SUBPARTITION BY KEY (`f_int2`)
SUBPARTITIONS 3
-(PARTITION part1 VALUES IN (0) ENGINE = TokuDB,
- PARTITION part2 VALUES IN (1) ENGINE = TokuDB,
- PARTITION part3 VALUES IN (NULL) ENGINE = TokuDB) */
+(PARTITION `part1` VALUES IN (0) ENGINE = TokuDB,
+ PARTITION `part2` VALUES IN (1) ENGINE = TokuDB,
+ PARTITION `part3` VALUES IN (NULL) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -16136,8 +16136,8 @@ t1 CREATE TABLE `t1` (
`f_charbig` varchar(1000) DEFAULT NULL,
UNIQUE KEY `uidx` (`f_int2`,`f_int1`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY HASH (f_int1 + f_int2)
-PARTITIONS 2 */
+ PARTITION BY HASH (`f_int1` + `f_int2`)
+PARTITIONS 2
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -16647,8 +16647,8 @@ t1 CREATE TABLE `t1` (
`f_charbig` varchar(1000) DEFAULT NULL,
UNIQUE KEY `uidx` (`f_int2`,`f_int1`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY KEY (f_int1,f_int2)
-PARTITIONS 5 */
+ PARTITION BY KEY (`f_int1`,`f_int2`)
+PARTITIONS 5
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -17166,15 +17166,15 @@ t1 CREATE TABLE `t1` (
`f_charbig` varchar(1000) DEFAULT NULL,
UNIQUE KEY `uidx` (`f_int2`,`f_int1`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (MOD(f_int1 + f_int2,4))
-(PARTITION part_3 VALUES IN (-3) ENGINE = TokuDB,
- PARTITION part_2 VALUES IN (-2) ENGINE = TokuDB,
- PARTITION part_1 VALUES IN (-1) ENGINE = TokuDB,
- PARTITION part_N VALUES IN (NULL) ENGINE = TokuDB,
- PARTITION part0 VALUES IN (0) ENGINE = TokuDB,
- PARTITION part1 VALUES IN (1) ENGINE = TokuDB,
- PARTITION part2 VALUES IN (2) ENGINE = TokuDB,
- PARTITION part3 VALUES IN (3) ENGINE = TokuDB) */
+ PARTITION BY LIST ((`f_int1` + `f_int2`) % 4)
+(PARTITION `part_3` VALUES IN (-3) ENGINE = TokuDB,
+ PARTITION `part_2` VALUES IN (-2) ENGINE = TokuDB,
+ PARTITION `part_1` VALUES IN (-1) ENGINE = TokuDB,
+ PARTITION `part_N` VALUES IN (NULL) ENGINE = TokuDB,
+ PARTITION `part0` VALUES IN (0) ENGINE = TokuDB,
+ PARTITION `part1` VALUES IN (1) ENGINE = TokuDB,
+ PARTITION `part2` VALUES IN (2) ENGINE = TokuDB,
+ PARTITION `part3` VALUES IN (3) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -17690,13 +17690,13 @@ t1 CREATE TABLE `t1` (
`f_charbig` varchar(1000) DEFAULT NULL,
UNIQUE KEY `uidx` (`f_int2`,`f_int1`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE ((f_int1 + f_int2) DIV 2)
-(PARTITION parta VALUES LESS THAN (0) ENGINE = TokuDB,
- PARTITION partb VALUES LESS THAN (5) ENGINE = TokuDB,
- PARTITION partc VALUES LESS THAN (10) ENGINE = TokuDB,
- PARTITION partd VALUES LESS THAN (15) ENGINE = TokuDB,
- PARTITION parte VALUES LESS THAN (20) ENGINE = TokuDB,
- PARTITION partf VALUES LESS THAN (2147483646) ENGINE = TokuDB) */
+ PARTITION BY RANGE ((`f_int1` + `f_int2`) DIV 2)
+(PARTITION `parta` VALUES LESS THAN (0) ENGINE = TokuDB,
+ PARTITION `partb` VALUES LESS THAN (5) ENGINE = TokuDB,
+ PARTITION `partc` VALUES LESS THAN (10) ENGINE = TokuDB,
+ PARTITION `partd` VALUES LESS THAN (15) ENGINE = TokuDB,
+ PARTITION `parte` VALUES LESS THAN (20) ENGINE = TokuDB,
+ PARTITION `partf` VALUES LESS THAN (2147483646) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -18210,13 +18210,13 @@ t1 CREATE TABLE `t1` (
`f_charbig` varchar(1000) DEFAULT NULL,
UNIQUE KEY `uidx` (`f_int2`,`f_int1`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (f_int1)
-SUBPARTITION BY HASH (f_int2)
+ PARTITION BY RANGE (`f_int1`)
+SUBPARTITION BY HASH (`f_int2`)
SUBPARTITIONS 2
-(PARTITION parta VALUES LESS THAN (0) ENGINE = TokuDB,
- PARTITION partb VALUES LESS THAN (5) ENGINE = TokuDB,
- PARTITION partc VALUES LESS THAN (10) ENGINE = TokuDB,
- PARTITION partd VALUES LESS THAN (2147483646) ENGINE = TokuDB) */
+(PARTITION `parta` VALUES LESS THAN (0) ENGINE = TokuDB,
+ PARTITION `partb` VALUES LESS THAN (5) ENGINE = TokuDB,
+ PARTITION `partc` VALUES LESS THAN (10) ENGINE = TokuDB,
+ PARTITION `partd` VALUES LESS THAN (2147483646) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -18734,20 +18734,20 @@ t1 CREATE TABLE `t1` (
`f_charbig` varchar(1000) DEFAULT NULL,
UNIQUE KEY `uidx` (`f_int2`,`f_int1`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (f_int1)
-SUBPARTITION BY KEY (f_int2)
-(PARTITION part1 VALUES LESS THAN (0)
- (SUBPARTITION subpart11 ENGINE = TokuDB,
- SUBPARTITION subpart12 ENGINE = TokuDB),
- PARTITION part2 VALUES LESS THAN (5)
- (SUBPARTITION subpart21 ENGINE = TokuDB,
- SUBPARTITION subpart22 ENGINE = TokuDB),
- PARTITION part3 VALUES LESS THAN (10)
- (SUBPARTITION subpart31 ENGINE = TokuDB,
- SUBPARTITION subpart32 ENGINE = TokuDB),
- PARTITION part4 VALUES LESS THAN (2147483646)
- (SUBPARTITION subpart41 ENGINE = TokuDB,
- SUBPARTITION subpart42 ENGINE = TokuDB)) */
+ PARTITION BY RANGE (`f_int1`)
+SUBPARTITION BY KEY (`f_int2`)
+(PARTITION `part1` VALUES LESS THAN (0)
+ (SUBPARTITION `subpart11` ENGINE = TokuDB,
+ SUBPARTITION `subpart12` ENGINE = TokuDB),
+ PARTITION `part2` VALUES LESS THAN (5)
+ (SUBPARTITION `subpart21` ENGINE = TokuDB,
+ SUBPARTITION `subpart22` ENGINE = TokuDB),
+ PARTITION `part3` VALUES LESS THAN (10)
+ (SUBPARTITION `subpart31` ENGINE = TokuDB,
+ SUBPARTITION `subpart32` ENGINE = TokuDB),
+ PARTITION `part4` VALUES LESS THAN (2147483646)
+ (SUBPARTITION `subpart41` ENGINE = TokuDB,
+ SUBPARTITION `subpart42` ENGINE = TokuDB))
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -19265,20 +19265,20 @@ t1 CREATE TABLE `t1` (
`f_charbig` varchar(1000) DEFAULT NULL,
UNIQUE KEY `uidx` (`f_int2`,`f_int1`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (ABS(MOD(f_int1,3)))
-SUBPARTITION BY HASH (f_int2 + 1)
-(PARTITION part1 VALUES IN (0)
- (SUBPARTITION sp11 ENGINE = TokuDB,
- SUBPARTITION sp12 ENGINE = TokuDB),
- PARTITION part2 VALUES IN (1)
- (SUBPARTITION sp21 ENGINE = TokuDB,
- SUBPARTITION sp22 ENGINE = TokuDB),
- PARTITION part3 VALUES IN (2)
- (SUBPARTITION sp31 ENGINE = TokuDB,
- SUBPARTITION sp32 ENGINE = TokuDB),
- PARTITION part4 VALUES IN (NULL)
- (SUBPARTITION sp41 ENGINE = TokuDB,
- SUBPARTITION sp42 ENGINE = TokuDB)) */
+ PARTITION BY LIST (abs(`f_int1` % 3))
+SUBPARTITION BY HASH (`f_int2` + 1)
+(PARTITION `part1` VALUES IN (0)
+ (SUBPARTITION `sp11` ENGINE = TokuDB,
+ SUBPARTITION `sp12` ENGINE = TokuDB),
+ PARTITION `part2` VALUES IN (1)
+ (SUBPARTITION `sp21` ENGINE = TokuDB,
+ SUBPARTITION `sp22` ENGINE = TokuDB),
+ PARTITION `part3` VALUES IN (2)
+ (SUBPARTITION `sp31` ENGINE = TokuDB,
+ SUBPARTITION `sp32` ENGINE = TokuDB),
+ PARTITION `part4` VALUES IN (NULL)
+ (SUBPARTITION `sp41` ENGINE = TokuDB,
+ SUBPARTITION `sp42` ENGINE = TokuDB))
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -19792,12 +19792,12 @@ t1 CREATE TABLE `t1` (
`f_charbig` varchar(1000) DEFAULT NULL,
UNIQUE KEY `uidx` (`f_int2`,`f_int1`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (ABS(MOD(f_int1,2)))
-SUBPARTITION BY KEY (f_int2)
+ PARTITION BY LIST (abs(`f_int1` % 2))
+SUBPARTITION BY KEY (`f_int2`)
SUBPARTITIONS 3
-(PARTITION part1 VALUES IN (0) ENGINE = TokuDB,
- PARTITION part2 VALUES IN (1) ENGINE = TokuDB,
- PARTITION part3 VALUES IN (NULL) ENGINE = TokuDB) */
+(PARTITION `part1` VALUES IN (0) ENGINE = TokuDB,
+ PARTITION `part2` VALUES IN (1) ENGINE = TokuDB,
+ PARTITION `part3` VALUES IN (NULL) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
diff --git a/storage/tokudb/mysql-test/tokudb_parts/r/partition_alter3_tokudb.result b/storage/tokudb/mysql-test/tokudb_parts/r/partition_alter3_tokudb.result
index b596e84d630..1a958e8a6b9 100644
--- a/storage/tokudb/mysql-test/tokudb_parts/r/partition_alter3_tokudb.result
+++ b/storage/tokudb/mysql-test/tokudb_parts/r/partition_alter3_tokudb.result
@@ -77,7 +77,7 @@ t1 CREATE TABLE `t1` (
`f_date` date DEFAULT NULL,
`f_varchar` varchar(30) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY HASH (YEAR(f_date)) */
+ PARTITION BY HASH (year(`f_date`))
t1.frm
t1.par
EXPLAIN PARTITIONS SELECT COUNT(*) FROM t1 WHERE f_date = '1000-02-10';
@@ -95,7 +95,7 @@ t1 CREATE TABLE `t1` (
`f_date` date DEFAULT NULL,
`f_varchar` varchar(30) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY HASH (DAYOFYEAR(f_date)) */
+ PARTITION BY HASH (dayofyear(`f_date`))
t1.frm
t1.par
EXPLAIN PARTITIONS SELECT COUNT(*) FROM t1 WHERE f_date = '1000-02-10';
@@ -111,7 +111,7 @@ t1 CREATE TABLE `t1` (
`f_date` date DEFAULT NULL,
`f_varchar` varchar(30) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY HASH (YEAR(f_date)) */
+ PARTITION BY HASH (year(`f_date`))
t1.frm
t1.par
EXPLAIN PARTITIONS SELECT COUNT(*) FROM t1 WHERE f_date = '1000-02-10';
@@ -133,10 +133,10 @@ t1 CREATE TABLE `t1` (
`f_date` date DEFAULT NULL,
`f_varchar` varchar(30) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY HASH (YEAR(f_date))
-(PARTITION p0 ENGINE = TokuDB,
- PARTITION part1 ENGINE = TokuDB,
- PARTITION part7 ENGINE = TokuDB) */
+ PARTITION BY HASH (year(`f_date`))
+(PARTITION `p0` ENGINE = TokuDB,
+ PARTITION `part1` ENGINE = TokuDB,
+ PARTITION `part7` ENGINE = TokuDB)
t1.frm
t1.par
EXPLAIN PARTITIONS SELECT COUNT(*) FROM t1 WHERE f_date = '1000-02-10';
@@ -156,11 +156,11 @@ t1 CREATE TABLE `t1` (
`f_date` date DEFAULT NULL,
`f_varchar` varchar(30) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY HASH (YEAR(f_date))
-(PARTITION p0 ENGINE = TokuDB,
- PARTITION part1 ENGINE = TokuDB,
- PARTITION part7 ENGINE = TokuDB,
- PARTITION part2 ENGINE = TokuDB) */
+ PARTITION BY HASH (year(`f_date`))
+(PARTITION `p0` ENGINE = TokuDB,
+ PARTITION `part1` ENGINE = TokuDB,
+ PARTITION `part7` ENGINE = TokuDB,
+ PARTITION `part2` ENGINE = TokuDB)
t1.frm
t1.par
EXPLAIN PARTITIONS SELECT COUNT(*) FROM t1 WHERE f_date = '1000-02-10';
@@ -177,15 +177,15 @@ t1 CREATE TABLE `t1` (
`f_date` date DEFAULT NULL,
`f_varchar` varchar(30) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY HASH (YEAR(f_date))
-(PARTITION p0 ENGINE = TokuDB,
- PARTITION part1 ENGINE = TokuDB,
- PARTITION part7 ENGINE = TokuDB,
- PARTITION part2 ENGINE = TokuDB,
- PARTITION p4 ENGINE = TokuDB,
- PARTITION p5 ENGINE = TokuDB,
- PARTITION p6 ENGINE = TokuDB,
- PARTITION p7 ENGINE = TokuDB) */
+ PARTITION BY HASH (year(`f_date`))
+(PARTITION `p0` ENGINE = TokuDB,
+ PARTITION `part1` ENGINE = TokuDB,
+ PARTITION `part7` ENGINE = TokuDB,
+ PARTITION `part2` ENGINE = TokuDB,
+ PARTITION `p4` ENGINE = TokuDB,
+ PARTITION `p5` ENGINE = TokuDB,
+ PARTITION `p6` ENGINE = TokuDB,
+ PARTITION `p7` ENGINE = TokuDB)
t1.frm
t1.par
EXPLAIN PARTITIONS SELECT COUNT(*) FROM t1 WHERE f_date = '1000-02-10';
@@ -214,14 +214,14 @@ t1 CREATE TABLE `t1` (
`f_date` date DEFAULT NULL,
`f_varchar` varchar(30) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY HASH (YEAR(f_date))
-(PARTITION p0 ENGINE = TokuDB,
- PARTITION part1 ENGINE = TokuDB,
- PARTITION part7 ENGINE = TokuDB,
- PARTITION part2 ENGINE = TokuDB,
- PARTITION p4 ENGINE = TokuDB,
- PARTITION p5 ENGINE = TokuDB,
- PARTITION p6 ENGINE = TokuDB) */
+ PARTITION BY HASH (year(`f_date`))
+(PARTITION `p0` ENGINE = TokuDB,
+ PARTITION `part1` ENGINE = TokuDB,
+ PARTITION `part7` ENGINE = TokuDB,
+ PARTITION `part2` ENGINE = TokuDB,
+ PARTITION `p4` ENGINE = TokuDB,
+ PARTITION `p5` ENGINE = TokuDB,
+ PARTITION `p6` ENGINE = TokuDB)
t1.frm
t1.par
EXPLAIN PARTITIONS SELECT COUNT(*) FROM t1 WHERE f_date = '1000-02-10';
@@ -237,13 +237,13 @@ t1 CREATE TABLE `t1` (
`f_date` date DEFAULT NULL,
`f_varchar` varchar(30) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY HASH (YEAR(f_date))
-(PARTITION p0 ENGINE = TokuDB,
- PARTITION part1 ENGINE = TokuDB,
- PARTITION part7 ENGINE = TokuDB,
- PARTITION part2 ENGINE = TokuDB,
- PARTITION p4 ENGINE = TokuDB,
- PARTITION p5 ENGINE = TokuDB) */
+ PARTITION BY HASH (year(`f_date`))
+(PARTITION `p0` ENGINE = TokuDB,
+ PARTITION `part1` ENGINE = TokuDB,
+ PARTITION `part7` ENGINE = TokuDB,
+ PARTITION `part2` ENGINE = TokuDB,
+ PARTITION `p4` ENGINE = TokuDB,
+ PARTITION `p5` ENGINE = TokuDB)
t1.frm
t1.par
EXPLAIN PARTITIONS SELECT COUNT(*) FROM t1 WHERE f_date = '1000-02-10';
@@ -259,12 +259,12 @@ t1 CREATE TABLE `t1` (
`f_date` date DEFAULT NULL,
`f_varchar` varchar(30) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY HASH (YEAR(f_date))
-(PARTITION p0 ENGINE = TokuDB,
- PARTITION part1 ENGINE = TokuDB,
- PARTITION part7 ENGINE = TokuDB,
- PARTITION part2 ENGINE = TokuDB,
- PARTITION p4 ENGINE = TokuDB) */
+ PARTITION BY HASH (year(`f_date`))
+(PARTITION `p0` ENGINE = TokuDB,
+ PARTITION `part1` ENGINE = TokuDB,
+ PARTITION `part7` ENGINE = TokuDB,
+ PARTITION `part2` ENGINE = TokuDB,
+ PARTITION `p4` ENGINE = TokuDB)
t1.frm
t1.par
EXPLAIN PARTITIONS SELECT COUNT(*) FROM t1 WHERE f_date = '1000-02-10';
@@ -280,11 +280,11 @@ t1 CREATE TABLE `t1` (
`f_date` date DEFAULT NULL,
`f_varchar` varchar(30) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY HASH (YEAR(f_date))
-(PARTITION p0 ENGINE = TokuDB,
- PARTITION part1 ENGINE = TokuDB,
- PARTITION part7 ENGINE = TokuDB,
- PARTITION part2 ENGINE = TokuDB) */
+ PARTITION BY HASH (year(`f_date`))
+(PARTITION `p0` ENGINE = TokuDB,
+ PARTITION `part1` ENGINE = TokuDB,
+ PARTITION `part7` ENGINE = TokuDB,
+ PARTITION `part2` ENGINE = TokuDB)
t1.frm
t1.par
EXPLAIN PARTITIONS SELECT COUNT(*) FROM t1 WHERE f_date = '1000-02-10';
@@ -300,10 +300,10 @@ t1 CREATE TABLE `t1` (
`f_date` date DEFAULT NULL,
`f_varchar` varchar(30) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY HASH (YEAR(f_date))
-(PARTITION p0 ENGINE = TokuDB,
- PARTITION part1 ENGINE = TokuDB,
- PARTITION part7 ENGINE = TokuDB) */
+ PARTITION BY HASH (year(`f_date`))
+(PARTITION `p0` ENGINE = TokuDB,
+ PARTITION `part1` ENGINE = TokuDB,
+ PARTITION `part7` ENGINE = TokuDB)
t1.frm
t1.par
EXPLAIN PARTITIONS SELECT COUNT(*) FROM t1 WHERE f_date = '1000-02-10';
@@ -319,9 +319,9 @@ t1 CREATE TABLE `t1` (
`f_date` date DEFAULT NULL,
`f_varchar` varchar(30) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY HASH (YEAR(f_date))
-(PARTITION p0 ENGINE = TokuDB,
- PARTITION part1 ENGINE = TokuDB) */
+ PARTITION BY HASH (year(`f_date`))
+(PARTITION `p0` ENGINE = TokuDB,
+ PARTITION `part1` ENGINE = TokuDB)
t1.frm
t1.par
EXPLAIN PARTITIONS SELECT COUNT(*) FROM t1 WHERE f_date = '1000-02-10';
@@ -337,8 +337,8 @@ t1 CREATE TABLE `t1` (
`f_date` date DEFAULT NULL,
`f_varchar` varchar(30) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY HASH (YEAR(f_date))
-(PARTITION p0 ENGINE = TokuDB) */
+ PARTITION BY HASH (year(`f_date`))
+(PARTITION `p0` ENGINE = TokuDB)
t1.frm
t1.par
EXPLAIN PARTITIONS SELECT COUNT(*) FROM t1 WHERE f_date = '1000-02-10';
@@ -392,8 +392,8 @@ SELECT f_int1,f_int2,f_char1,f_char2,f_charbig FROM t0_template;
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
@@ -414,13 +414,13 @@ ALTER TABLE t1 PARTITION BY KEY(f_int1);
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY KEY (f_int1) */
+ PARTITION BY KEY (`f_int1`)
t1.frm
t1.par
EXPLAIN PARTITIONS SELECT COUNT(*) <> 1 FROM t1 WHERE f_int1 = 3;
@@ -439,16 +439,16 @@ ALTER TABLE t1 ADD PARTITION (PARTITION part1, PARTITION part7);
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY KEY (f_int1)
-(PARTITION p0 ENGINE = TokuDB,
- PARTITION part1 ENGINE = TokuDB,
- PARTITION part7 ENGINE = TokuDB) */
+ PARTITION BY KEY (`f_int1`)
+(PARTITION `p0` ENGINE = TokuDB,
+ PARTITION `part1` ENGINE = TokuDB,
+ PARTITION `part7` ENGINE = TokuDB)
t1.frm
t1.par
EXPLAIN PARTITIONS SELECT COUNT(*) <> 1 FROM t1 WHERE f_int1 = 3;
@@ -462,17 +462,17 @@ ALTER TABLE t1 ADD PARTITION (PARTITION part2);
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY KEY (f_int1)
-(PARTITION p0 ENGINE = TokuDB,
- PARTITION part1 ENGINE = TokuDB,
- PARTITION part7 ENGINE = TokuDB,
- PARTITION part2 ENGINE = TokuDB) */
+ PARTITION BY KEY (`f_int1`)
+(PARTITION `p0` ENGINE = TokuDB,
+ PARTITION `part1` ENGINE = TokuDB,
+ PARTITION `part7` ENGINE = TokuDB,
+ PARTITION `part2` ENGINE = TokuDB)
t1.frm
t1.par
EXPLAIN PARTITIONS SELECT COUNT(*) <> 1 FROM t1 WHERE f_int1 = 3;
@@ -486,21 +486,21 @@ ALTER TABLE t1 ADD PARTITION PARTITIONS 4;
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY KEY (f_int1)
-(PARTITION p0 ENGINE = TokuDB,
- PARTITION part1 ENGINE = TokuDB,
- PARTITION part7 ENGINE = TokuDB,
- PARTITION part2 ENGINE = TokuDB,
- PARTITION p4 ENGINE = TokuDB,
- PARTITION p5 ENGINE = TokuDB,
- PARTITION p6 ENGINE = TokuDB,
- PARTITION p7 ENGINE = TokuDB) */
+ PARTITION BY KEY (`f_int1`)
+(PARTITION `p0` ENGINE = TokuDB,
+ PARTITION `part1` ENGINE = TokuDB,
+ PARTITION `part7` ENGINE = TokuDB,
+ PARTITION `part2` ENGINE = TokuDB,
+ PARTITION `p4` ENGINE = TokuDB,
+ PARTITION `p5` ENGINE = TokuDB,
+ PARTITION `p6` ENGINE = TokuDB,
+ PARTITION `p7` ENGINE = TokuDB)
t1.frm
t1.par
EXPLAIN PARTITIONS SELECT COUNT(*) <> 1 FROM t1 WHERE f_int1 = 3;
@@ -521,20 +521,20 @@ ALTER TABLE t1 COALESCE PARTITION 1;
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY KEY (f_int1)
-(PARTITION p0 ENGINE = TokuDB,
- PARTITION part1 ENGINE = TokuDB,
- PARTITION part7 ENGINE = TokuDB,
- PARTITION part2 ENGINE = TokuDB,
- PARTITION p4 ENGINE = TokuDB,
- PARTITION p5 ENGINE = TokuDB,
- PARTITION p6 ENGINE = TokuDB) */
+ PARTITION BY KEY (`f_int1`)
+(PARTITION `p0` ENGINE = TokuDB,
+ PARTITION `part1` ENGINE = TokuDB,
+ PARTITION `part7` ENGINE = TokuDB,
+ PARTITION `part2` ENGINE = TokuDB,
+ PARTITION `p4` ENGINE = TokuDB,
+ PARTITION `p5` ENGINE = TokuDB,
+ PARTITION `p6` ENGINE = TokuDB)
t1.frm
t1.par
EXPLAIN PARTITIONS SELECT COUNT(*) <> 1 FROM t1 WHERE f_int1 = 3;
@@ -547,19 +547,19 @@ ALTER TABLE t1 COALESCE PARTITION 1;
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY KEY (f_int1)
-(PARTITION p0 ENGINE = TokuDB,
- PARTITION part1 ENGINE = TokuDB,
- PARTITION part7 ENGINE = TokuDB,
- PARTITION part2 ENGINE = TokuDB,
- PARTITION p4 ENGINE = TokuDB,
- PARTITION p5 ENGINE = TokuDB) */
+ PARTITION BY KEY (`f_int1`)
+(PARTITION `p0` ENGINE = TokuDB,
+ PARTITION `part1` ENGINE = TokuDB,
+ PARTITION `part7` ENGINE = TokuDB,
+ PARTITION `part2` ENGINE = TokuDB,
+ PARTITION `p4` ENGINE = TokuDB,
+ PARTITION `p5` ENGINE = TokuDB)
t1.frm
t1.par
EXPLAIN PARTITIONS SELECT COUNT(*) <> 1 FROM t1 WHERE f_int1 = 3;
@@ -572,18 +572,18 @@ ALTER TABLE t1 COALESCE PARTITION 1;
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY KEY (f_int1)
-(PARTITION p0 ENGINE = TokuDB,
- PARTITION part1 ENGINE = TokuDB,
- PARTITION part7 ENGINE = TokuDB,
- PARTITION part2 ENGINE = TokuDB,
- PARTITION p4 ENGINE = TokuDB) */
+ PARTITION BY KEY (`f_int1`)
+(PARTITION `p0` ENGINE = TokuDB,
+ PARTITION `part1` ENGINE = TokuDB,
+ PARTITION `part7` ENGINE = TokuDB,
+ PARTITION `part2` ENGINE = TokuDB,
+ PARTITION `p4` ENGINE = TokuDB)
t1.frm
t1.par
EXPLAIN PARTITIONS SELECT COUNT(*) <> 1 FROM t1 WHERE f_int1 = 3;
@@ -596,17 +596,17 @@ ALTER TABLE t1 COALESCE PARTITION 1;
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY KEY (f_int1)
-(PARTITION p0 ENGINE = TokuDB,
- PARTITION part1 ENGINE = TokuDB,
- PARTITION part7 ENGINE = TokuDB,
- PARTITION part2 ENGINE = TokuDB) */
+ PARTITION BY KEY (`f_int1`)
+(PARTITION `p0` ENGINE = TokuDB,
+ PARTITION `part1` ENGINE = TokuDB,
+ PARTITION `part7` ENGINE = TokuDB,
+ PARTITION `part2` ENGINE = TokuDB)
t1.frm
t1.par
EXPLAIN PARTITIONS SELECT COUNT(*) <> 1 FROM t1 WHERE f_int1 = 3;
@@ -619,16 +619,16 @@ ALTER TABLE t1 COALESCE PARTITION 1;
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY KEY (f_int1)
-(PARTITION p0 ENGINE = TokuDB,
- PARTITION part1 ENGINE = TokuDB,
- PARTITION part7 ENGINE = TokuDB) */
+ PARTITION BY KEY (`f_int1`)
+(PARTITION `p0` ENGINE = TokuDB,
+ PARTITION `part1` ENGINE = TokuDB,
+ PARTITION `part7` ENGINE = TokuDB)
t1.frm
t1.par
EXPLAIN PARTITIONS SELECT COUNT(*) <> 1 FROM t1 WHERE f_int1 = 3;
@@ -641,15 +641,15 @@ ALTER TABLE t1 COALESCE PARTITION 1;
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY KEY (f_int1)
-(PARTITION p0 ENGINE = TokuDB,
- PARTITION part1 ENGINE = TokuDB) */
+ PARTITION BY KEY (`f_int1`)
+(PARTITION `p0` ENGINE = TokuDB,
+ PARTITION `part1` ENGINE = TokuDB)
t1.frm
t1.par
EXPLAIN PARTITIONS SELECT COUNT(*) <> 1 FROM t1 WHERE f_int1 = 3;
@@ -662,14 +662,14 @@ ALTER TABLE t1 COALESCE PARTITION 1;
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY KEY (f_int1)
-(PARTITION p0 ENGINE = TokuDB) */
+ PARTITION BY KEY (`f_int1`)
+(PARTITION `p0` ENGINE = TokuDB)
t1.frm
t1.par
EXPLAIN PARTITIONS SELECT COUNT(*) <> 1 FROM t1 WHERE f_int1 = 3;
@@ -686,8 +686,8 @@ ALTER TABLE t1 REMOVE PARTITIONING;
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
diff --git a/storage/tokudb/mysql-test/tokudb_parts/r/partition_alter4_tokudb.result b/storage/tokudb/mysql-test/tokudb_parts/r/partition_alter4_tokudb.result
index 40d167b57ea..3409f1d380e 100644
--- a/storage/tokudb/mysql-test/tokudb_parts/r/partition_alter4_tokudb.result
+++ b/storage/tokudb/mysql-test/tokudb_parts/r/partition_alter4_tokudb.result
@@ -69,15 +69,15 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY HASH (f_int1)
-(PARTITION part_1 ENGINE = TokuDB,
- PARTITION part_2 ENGINE = TokuDB) */
+ PARTITION BY HASH (`f_int1`)
+(PARTITION `part_1` ENGINE = TokuDB,
+ PARTITION `part_2` ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -528,18 +528,18 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY KEY (f_int1)
-(PARTITION part_1 ENGINE = TokuDB,
- PARTITION part_2 ENGINE = TokuDB,
- PARTITION part_3 ENGINE = TokuDB,
- PARTITION part_4 ENGINE = TokuDB,
- PARTITION part_5 ENGINE = TokuDB) */
+ PARTITION BY KEY (`f_int1`)
+(PARTITION `part_1` ENGINE = TokuDB,
+ PARTITION `part_2` ENGINE = TokuDB,
+ PARTITION `part_3` ENGINE = TokuDB,
+ PARTITION `part_4` ENGINE = TokuDB,
+ PARTITION `part_5` ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -998,21 +998,21 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (MOD(f_int1,4))
-(PARTITION part_3 VALUES IN (-3) ENGINE = TokuDB,
- PARTITION part_2 VALUES IN (-2) ENGINE = TokuDB,
- PARTITION part_1 VALUES IN (-1) ENGINE = TokuDB,
- PARTITION part_N VALUES IN (NULL) ENGINE = TokuDB,
- PARTITION part0 VALUES IN (0) ENGINE = TokuDB,
- PARTITION part1 VALUES IN (1) ENGINE = TokuDB,
- PARTITION part2 VALUES IN (2) ENGINE = TokuDB,
- PARTITION part3 VALUES IN (3) ENGINE = TokuDB) */
+ PARTITION BY LIST (`f_int1` % 4)
+(PARTITION `part_3` VALUES IN (-3) ENGINE = TokuDB,
+ PARTITION `part_2` VALUES IN (-2) ENGINE = TokuDB,
+ PARTITION `part_1` VALUES IN (-1) ENGINE = TokuDB,
+ PARTITION `part_N` VALUES IN (NULL) ENGINE = TokuDB,
+ PARTITION `part0` VALUES IN (0) ENGINE = TokuDB,
+ PARTITION `part1` VALUES IN (1) ENGINE = TokuDB,
+ PARTITION `part2` VALUES IN (2) ENGINE = TokuDB,
+ PARTITION `part3` VALUES IN (3) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -1469,19 +1469,19 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (f_int1)
-(PARTITION parta VALUES LESS THAN (0) ENGINE = TokuDB,
- PARTITION part_1 VALUES LESS THAN (5) ENGINE = TokuDB,
- PARTITION part_2 VALUES LESS THAN (10) ENGINE = TokuDB,
- PARTITION part_3 VALUES LESS THAN (15) ENGINE = TokuDB,
- PARTITION part_4 VALUES LESS THAN (20) ENGINE = TokuDB,
- PARTITION part_5 VALUES LESS THAN (2147483646) ENGINE = TokuDB) */
+ PARTITION BY RANGE (`f_int1`)
+(PARTITION `parta` VALUES LESS THAN (0) ENGINE = TokuDB,
+ PARTITION `part_1` VALUES LESS THAN (5) ENGINE = TokuDB,
+ PARTITION `part_2` VALUES LESS THAN (10) ENGINE = TokuDB,
+ PARTITION `part_3` VALUES LESS THAN (15) ENGINE = TokuDB,
+ PARTITION `part_4` VALUES LESS THAN (20) ENGINE = TokuDB,
+ PARTITION `part_5` VALUES LESS THAN (2147483646) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -1934,19 +1934,19 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (f_int1 DIV 2)
-SUBPARTITION BY HASH (f_int1)
+ PARTITION BY RANGE (`f_int1` DIV 2)
+SUBPARTITION BY HASH (`f_int1`)
SUBPARTITIONS 2
-(PARTITION part_1 VALUES LESS THAN (0) ENGINE = TokuDB,
- PARTITION part_2 VALUES LESS THAN (5) ENGINE = TokuDB,
- PARTITION part_3 VALUES LESS THAN (10) ENGINE = TokuDB,
- PARTITION part_4 VALUES LESS THAN (2147483646) ENGINE = TokuDB) */
+(PARTITION `part_1` VALUES LESS THAN (0) ENGINE = TokuDB,
+ PARTITION `part_2` VALUES LESS THAN (5) ENGINE = TokuDB,
+ PARTITION `part_3` VALUES LESS THAN (10) ENGINE = TokuDB,
+ PARTITION `part_4` VALUES LESS THAN (2147483646) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -2405,26 +2405,26 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (f_int1)
-SUBPARTITION BY KEY (f_int1)
-(PARTITION part_1 VALUES LESS THAN (0)
- (SUBPARTITION subpart11 ENGINE = TokuDB,
- SUBPARTITION subpart12 ENGINE = TokuDB),
- PARTITION part_2 VALUES LESS THAN (5)
- (SUBPARTITION subpart21 ENGINE = TokuDB,
- SUBPARTITION subpart22 ENGINE = TokuDB),
- PARTITION part_3 VALUES LESS THAN (10)
- (SUBPARTITION subpart31 ENGINE = TokuDB,
- SUBPARTITION subpart32 ENGINE = TokuDB),
- PARTITION part_4 VALUES LESS THAN (2147483646)
- (SUBPARTITION subpart41 ENGINE = TokuDB,
- SUBPARTITION subpart42 ENGINE = TokuDB)) */
+ PARTITION BY RANGE (`f_int1`)
+SUBPARTITION BY KEY (`f_int1`)
+(PARTITION `part_1` VALUES LESS THAN (0)
+ (SUBPARTITION `subpart11` ENGINE = TokuDB,
+ SUBPARTITION `subpart12` ENGINE = TokuDB),
+ PARTITION `part_2` VALUES LESS THAN (5)
+ (SUBPARTITION `subpart21` ENGINE = TokuDB,
+ SUBPARTITION `subpart22` ENGINE = TokuDB),
+ PARTITION `part_3` VALUES LESS THAN (10)
+ (SUBPARTITION `subpart31` ENGINE = TokuDB,
+ SUBPARTITION `subpart32` ENGINE = TokuDB),
+ PARTITION `part_4` VALUES LESS THAN (2147483646)
+ (SUBPARTITION `subpart41` ENGINE = TokuDB,
+ SUBPARTITION `subpart42` ENGINE = TokuDB))
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -2881,26 +2881,26 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (ABS(MOD(f_int1,3)))
-SUBPARTITION BY HASH (f_int1 + 1)
-(PARTITION part_1 VALUES IN (0)
- (SUBPARTITION sp11 ENGINE = TokuDB,
- SUBPARTITION sp12 ENGINE = TokuDB),
- PARTITION part_2 VALUES IN (1)
- (SUBPARTITION sp21 ENGINE = TokuDB,
- SUBPARTITION sp22 ENGINE = TokuDB),
- PARTITION part_3 VALUES IN (2)
- (SUBPARTITION sp31 ENGINE = TokuDB,
- SUBPARTITION sp32 ENGINE = TokuDB),
- PARTITION part_4 VALUES IN (NULL)
- (SUBPARTITION sp41 ENGINE = TokuDB,
- SUBPARTITION sp42 ENGINE = TokuDB)) */
+ PARTITION BY LIST (abs(`f_int1` % 3))
+SUBPARTITION BY HASH (`f_int1` + 1)
+(PARTITION `part_1` VALUES IN (0)
+ (SUBPARTITION `sp11` ENGINE = TokuDB,
+ SUBPARTITION `sp12` ENGINE = TokuDB),
+ PARTITION `part_2` VALUES IN (1)
+ (SUBPARTITION `sp21` ENGINE = TokuDB,
+ SUBPARTITION `sp22` ENGINE = TokuDB),
+ PARTITION `part_3` VALUES IN (2)
+ (SUBPARTITION `sp31` ENGINE = TokuDB,
+ SUBPARTITION `sp32` ENGINE = TokuDB),
+ PARTITION `part_4` VALUES IN (NULL)
+ (SUBPARTITION `sp41` ENGINE = TokuDB,
+ SUBPARTITION `sp42` ENGINE = TokuDB))
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -3355,18 +3355,18 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (ABS(MOD(f_int1,2)))
-SUBPARTITION BY KEY (f_int1)
+ PARTITION BY LIST (abs(`f_int1` % 2))
+SUBPARTITION BY KEY (`f_int1`)
SUBPARTITIONS 3
-(PARTITION part_1 VALUES IN (0) ENGINE = TokuDB,
- PARTITION part_2 VALUES IN (1) ENGINE = TokuDB,
- PARTITION part_3 VALUES IN (NULL) ENGINE = TokuDB) */
+(PARTITION `part_1` VALUES IN (0) ENGINE = TokuDB,
+ PARTITION `part_2` VALUES IN (1) ENGINE = TokuDB,
+ PARTITION `part_3` VALUES IN (NULL) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -3819,15 +3819,15 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY HASH (f_int1)
-(PARTITION part_1 ENGINE = TokuDB,
- PARTITION part_2 ENGINE = TokuDB) */
+ PARTITION BY HASH (`f_int1`)
+(PARTITION `part_1` ENGINE = TokuDB,
+ PARTITION `part_2` ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -4278,18 +4278,18 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY KEY (f_int1)
-(PARTITION part_1 ENGINE = TokuDB,
- PARTITION part_2 ENGINE = TokuDB,
- PARTITION part_3 ENGINE = TokuDB,
- PARTITION part_4 ENGINE = TokuDB,
- PARTITION part_5 ENGINE = TokuDB) */
+ PARTITION BY KEY (`f_int1`)
+(PARTITION `part_1` ENGINE = TokuDB,
+ PARTITION `part_2` ENGINE = TokuDB,
+ PARTITION `part_3` ENGINE = TokuDB,
+ PARTITION `part_4` ENGINE = TokuDB,
+ PARTITION `part_5` ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -4748,21 +4748,21 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (MOD(f_int1,4))
-(PARTITION part_3 VALUES IN (-3) ENGINE = TokuDB,
- PARTITION part_2 VALUES IN (-2) ENGINE = TokuDB,
- PARTITION part_1 VALUES IN (-1) ENGINE = TokuDB,
- PARTITION part_N VALUES IN (NULL) ENGINE = TokuDB,
- PARTITION part0 VALUES IN (0) ENGINE = TokuDB,
- PARTITION part1 VALUES IN (1) ENGINE = TokuDB,
- PARTITION part2 VALUES IN (2) ENGINE = TokuDB,
- PARTITION part3 VALUES IN (3) ENGINE = TokuDB) */
+ PARTITION BY LIST (`f_int1` % 4)
+(PARTITION `part_3` VALUES IN (-3) ENGINE = TokuDB,
+ PARTITION `part_2` VALUES IN (-2) ENGINE = TokuDB,
+ PARTITION `part_1` VALUES IN (-1) ENGINE = TokuDB,
+ PARTITION `part_N` VALUES IN (NULL) ENGINE = TokuDB,
+ PARTITION `part0` VALUES IN (0) ENGINE = TokuDB,
+ PARTITION `part1` VALUES IN (1) ENGINE = TokuDB,
+ PARTITION `part2` VALUES IN (2) ENGINE = TokuDB,
+ PARTITION `part3` VALUES IN (3) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -5219,19 +5219,19 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (f_int1)
-(PARTITION parta VALUES LESS THAN (0) ENGINE = TokuDB,
- PARTITION part_1 VALUES LESS THAN (5) ENGINE = TokuDB,
- PARTITION part_2 VALUES LESS THAN (10) ENGINE = TokuDB,
- PARTITION part_3 VALUES LESS THAN (15) ENGINE = TokuDB,
- PARTITION part_4 VALUES LESS THAN (20) ENGINE = TokuDB,
- PARTITION part_5 VALUES LESS THAN (2147483646) ENGINE = TokuDB) */
+ PARTITION BY RANGE (`f_int1`)
+(PARTITION `parta` VALUES LESS THAN (0) ENGINE = TokuDB,
+ PARTITION `part_1` VALUES LESS THAN (5) ENGINE = TokuDB,
+ PARTITION `part_2` VALUES LESS THAN (10) ENGINE = TokuDB,
+ PARTITION `part_3` VALUES LESS THAN (15) ENGINE = TokuDB,
+ PARTITION `part_4` VALUES LESS THAN (20) ENGINE = TokuDB,
+ PARTITION `part_5` VALUES LESS THAN (2147483646) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -5684,19 +5684,19 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (f_int1 DIV 2)
-SUBPARTITION BY HASH (f_int1)
+ PARTITION BY RANGE (`f_int1` DIV 2)
+SUBPARTITION BY HASH (`f_int1`)
SUBPARTITIONS 2
-(PARTITION part_1 VALUES LESS THAN (0) ENGINE = TokuDB,
- PARTITION part_2 VALUES LESS THAN (5) ENGINE = TokuDB,
- PARTITION part_3 VALUES LESS THAN (10) ENGINE = TokuDB,
- PARTITION part_4 VALUES LESS THAN (2147483646) ENGINE = TokuDB) */
+(PARTITION `part_1` VALUES LESS THAN (0) ENGINE = TokuDB,
+ PARTITION `part_2` VALUES LESS THAN (5) ENGINE = TokuDB,
+ PARTITION `part_3` VALUES LESS THAN (10) ENGINE = TokuDB,
+ PARTITION `part_4` VALUES LESS THAN (2147483646) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -6155,26 +6155,26 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (f_int1)
-SUBPARTITION BY KEY (f_int1)
-(PARTITION part_1 VALUES LESS THAN (0)
- (SUBPARTITION subpart11 ENGINE = TokuDB,
- SUBPARTITION subpart12 ENGINE = TokuDB),
- PARTITION part_2 VALUES LESS THAN (5)
- (SUBPARTITION subpart21 ENGINE = TokuDB,
- SUBPARTITION subpart22 ENGINE = TokuDB),
- PARTITION part_3 VALUES LESS THAN (10)
- (SUBPARTITION subpart31 ENGINE = TokuDB,
- SUBPARTITION subpart32 ENGINE = TokuDB),
- PARTITION part_4 VALUES LESS THAN (2147483646)
- (SUBPARTITION subpart41 ENGINE = TokuDB,
- SUBPARTITION subpart42 ENGINE = TokuDB)) */
+ PARTITION BY RANGE (`f_int1`)
+SUBPARTITION BY KEY (`f_int1`)
+(PARTITION `part_1` VALUES LESS THAN (0)
+ (SUBPARTITION `subpart11` ENGINE = TokuDB,
+ SUBPARTITION `subpart12` ENGINE = TokuDB),
+ PARTITION `part_2` VALUES LESS THAN (5)
+ (SUBPARTITION `subpart21` ENGINE = TokuDB,
+ SUBPARTITION `subpart22` ENGINE = TokuDB),
+ PARTITION `part_3` VALUES LESS THAN (10)
+ (SUBPARTITION `subpart31` ENGINE = TokuDB,
+ SUBPARTITION `subpart32` ENGINE = TokuDB),
+ PARTITION `part_4` VALUES LESS THAN (2147483646)
+ (SUBPARTITION `subpart41` ENGINE = TokuDB,
+ SUBPARTITION `subpart42` ENGINE = TokuDB))
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -6631,26 +6631,26 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (ABS(MOD(f_int1,3)))
-SUBPARTITION BY HASH (f_int1 + 1)
-(PARTITION part_1 VALUES IN (0)
- (SUBPARTITION sp11 ENGINE = TokuDB,
- SUBPARTITION sp12 ENGINE = TokuDB),
- PARTITION part_2 VALUES IN (1)
- (SUBPARTITION sp21 ENGINE = TokuDB,
- SUBPARTITION sp22 ENGINE = TokuDB),
- PARTITION part_3 VALUES IN (2)
- (SUBPARTITION sp31 ENGINE = TokuDB,
- SUBPARTITION sp32 ENGINE = TokuDB),
- PARTITION part_4 VALUES IN (NULL)
- (SUBPARTITION sp41 ENGINE = TokuDB,
- SUBPARTITION sp42 ENGINE = TokuDB)) */
+ PARTITION BY LIST (abs(`f_int1` % 3))
+SUBPARTITION BY HASH (`f_int1` + 1)
+(PARTITION `part_1` VALUES IN (0)
+ (SUBPARTITION `sp11` ENGINE = TokuDB,
+ SUBPARTITION `sp12` ENGINE = TokuDB),
+ PARTITION `part_2` VALUES IN (1)
+ (SUBPARTITION `sp21` ENGINE = TokuDB,
+ SUBPARTITION `sp22` ENGINE = TokuDB),
+ PARTITION `part_3` VALUES IN (2)
+ (SUBPARTITION `sp31` ENGINE = TokuDB,
+ SUBPARTITION `sp32` ENGINE = TokuDB),
+ PARTITION `part_4` VALUES IN (NULL)
+ (SUBPARTITION `sp41` ENGINE = TokuDB,
+ SUBPARTITION `sp42` ENGINE = TokuDB))
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -7105,18 +7105,18 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (ABS(MOD(f_int1,2)))
-SUBPARTITION BY KEY (f_int1)
+ PARTITION BY LIST (abs(`f_int1` % 2))
+SUBPARTITION BY KEY (`f_int1`)
SUBPARTITIONS 3
-(PARTITION part_1 VALUES IN (0) ENGINE = TokuDB,
- PARTITION part_2 VALUES IN (1) ENGINE = TokuDB,
- PARTITION part_3 VALUES IN (NULL) ENGINE = TokuDB) */
+(PARTITION `part_1` VALUES IN (0) ENGINE = TokuDB,
+ PARTITION `part_2` VALUES IN (1) ENGINE = TokuDB,
+ PARTITION `part_3` VALUES IN (NULL) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -7569,15 +7569,15 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY HASH (f_int1)
-(PARTITION part_1 ENGINE = TokuDB,
- PARTITION part_2 ENGINE = TokuDB) */
+ PARTITION BY HASH (`f_int1`)
+(PARTITION `part_1` ENGINE = TokuDB,
+ PARTITION `part_2` ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -8028,18 +8028,18 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY KEY (f_int1)
-(PARTITION part_1 ENGINE = TokuDB,
- PARTITION part_2 ENGINE = TokuDB,
- PARTITION part_3 ENGINE = TokuDB,
- PARTITION part_4 ENGINE = TokuDB,
- PARTITION part_5 ENGINE = TokuDB) */
+ PARTITION BY KEY (`f_int1`)
+(PARTITION `part_1` ENGINE = TokuDB,
+ PARTITION `part_2` ENGINE = TokuDB,
+ PARTITION `part_3` ENGINE = TokuDB,
+ PARTITION `part_4` ENGINE = TokuDB,
+ PARTITION `part_5` ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -8498,21 +8498,21 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (MOD(f_int1,4))
-(PARTITION part_3 VALUES IN (-3) ENGINE = TokuDB,
- PARTITION part_2 VALUES IN (-2) ENGINE = TokuDB,
- PARTITION part_1 VALUES IN (-1) ENGINE = TokuDB,
- PARTITION part_N VALUES IN (NULL) ENGINE = TokuDB,
- PARTITION part0 VALUES IN (0) ENGINE = TokuDB,
- PARTITION part1 VALUES IN (1) ENGINE = TokuDB,
- PARTITION part2 VALUES IN (2) ENGINE = TokuDB,
- PARTITION part3 VALUES IN (3) ENGINE = TokuDB) */
+ PARTITION BY LIST (`f_int1` % 4)
+(PARTITION `part_3` VALUES IN (-3) ENGINE = TokuDB,
+ PARTITION `part_2` VALUES IN (-2) ENGINE = TokuDB,
+ PARTITION `part_1` VALUES IN (-1) ENGINE = TokuDB,
+ PARTITION `part_N` VALUES IN (NULL) ENGINE = TokuDB,
+ PARTITION `part0` VALUES IN (0) ENGINE = TokuDB,
+ PARTITION `part1` VALUES IN (1) ENGINE = TokuDB,
+ PARTITION `part2` VALUES IN (2) ENGINE = TokuDB,
+ PARTITION `part3` VALUES IN (3) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -8969,19 +8969,19 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (f_int1)
-(PARTITION parta VALUES LESS THAN (0) ENGINE = TokuDB,
- PARTITION part_1 VALUES LESS THAN (5) ENGINE = TokuDB,
- PARTITION part_2 VALUES LESS THAN (10) ENGINE = TokuDB,
- PARTITION part_3 VALUES LESS THAN (15) ENGINE = TokuDB,
- PARTITION part_4 VALUES LESS THAN (20) ENGINE = TokuDB,
- PARTITION part_5 VALUES LESS THAN (2147483646) ENGINE = TokuDB) */
+ PARTITION BY RANGE (`f_int1`)
+(PARTITION `parta` VALUES LESS THAN (0) ENGINE = TokuDB,
+ PARTITION `part_1` VALUES LESS THAN (5) ENGINE = TokuDB,
+ PARTITION `part_2` VALUES LESS THAN (10) ENGINE = TokuDB,
+ PARTITION `part_3` VALUES LESS THAN (15) ENGINE = TokuDB,
+ PARTITION `part_4` VALUES LESS THAN (20) ENGINE = TokuDB,
+ PARTITION `part_5` VALUES LESS THAN (2147483646) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -9434,19 +9434,19 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (f_int1 DIV 2)
-SUBPARTITION BY HASH (f_int1)
+ PARTITION BY RANGE (`f_int1` DIV 2)
+SUBPARTITION BY HASH (`f_int1`)
SUBPARTITIONS 2
-(PARTITION part_1 VALUES LESS THAN (0) ENGINE = TokuDB,
- PARTITION part_2 VALUES LESS THAN (5) ENGINE = TokuDB,
- PARTITION part_3 VALUES LESS THAN (10) ENGINE = TokuDB,
- PARTITION part_4 VALUES LESS THAN (2147483646) ENGINE = TokuDB) */
+(PARTITION `part_1` VALUES LESS THAN (0) ENGINE = TokuDB,
+ PARTITION `part_2` VALUES LESS THAN (5) ENGINE = TokuDB,
+ PARTITION `part_3` VALUES LESS THAN (10) ENGINE = TokuDB,
+ PARTITION `part_4` VALUES LESS THAN (2147483646) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -9905,26 +9905,26 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (f_int1)
-SUBPARTITION BY KEY (f_int1)
-(PARTITION part_1 VALUES LESS THAN (0)
- (SUBPARTITION subpart11 ENGINE = TokuDB,
- SUBPARTITION subpart12 ENGINE = TokuDB),
- PARTITION part_2 VALUES LESS THAN (5)
- (SUBPARTITION subpart21 ENGINE = TokuDB,
- SUBPARTITION subpart22 ENGINE = TokuDB),
- PARTITION part_3 VALUES LESS THAN (10)
- (SUBPARTITION subpart31 ENGINE = TokuDB,
- SUBPARTITION subpart32 ENGINE = TokuDB),
- PARTITION part_4 VALUES LESS THAN (2147483646)
- (SUBPARTITION subpart41 ENGINE = TokuDB,
- SUBPARTITION subpart42 ENGINE = TokuDB)) */
+ PARTITION BY RANGE (`f_int1`)
+SUBPARTITION BY KEY (`f_int1`)
+(PARTITION `part_1` VALUES LESS THAN (0)
+ (SUBPARTITION `subpart11` ENGINE = TokuDB,
+ SUBPARTITION `subpart12` ENGINE = TokuDB),
+ PARTITION `part_2` VALUES LESS THAN (5)
+ (SUBPARTITION `subpart21` ENGINE = TokuDB,
+ SUBPARTITION `subpart22` ENGINE = TokuDB),
+ PARTITION `part_3` VALUES LESS THAN (10)
+ (SUBPARTITION `subpart31` ENGINE = TokuDB,
+ SUBPARTITION `subpart32` ENGINE = TokuDB),
+ PARTITION `part_4` VALUES LESS THAN (2147483646)
+ (SUBPARTITION `subpart41` ENGINE = TokuDB,
+ SUBPARTITION `subpart42` ENGINE = TokuDB))
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -10381,26 +10381,26 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (ABS(MOD(f_int1,3)))
-SUBPARTITION BY HASH (f_int1 + 1)
-(PARTITION part_1 VALUES IN (0)
- (SUBPARTITION sp11 ENGINE = TokuDB,
- SUBPARTITION sp12 ENGINE = TokuDB),
- PARTITION part_2 VALUES IN (1)
- (SUBPARTITION sp21 ENGINE = TokuDB,
- SUBPARTITION sp22 ENGINE = TokuDB),
- PARTITION part_3 VALUES IN (2)
- (SUBPARTITION sp31 ENGINE = TokuDB,
- SUBPARTITION sp32 ENGINE = TokuDB),
- PARTITION part_4 VALUES IN (NULL)
- (SUBPARTITION sp41 ENGINE = TokuDB,
- SUBPARTITION sp42 ENGINE = TokuDB)) */
+ PARTITION BY LIST (abs(`f_int1` % 3))
+SUBPARTITION BY HASH (`f_int1` + 1)
+(PARTITION `part_1` VALUES IN (0)
+ (SUBPARTITION `sp11` ENGINE = TokuDB,
+ SUBPARTITION `sp12` ENGINE = TokuDB),
+ PARTITION `part_2` VALUES IN (1)
+ (SUBPARTITION `sp21` ENGINE = TokuDB,
+ SUBPARTITION `sp22` ENGINE = TokuDB),
+ PARTITION `part_3` VALUES IN (2)
+ (SUBPARTITION `sp31` ENGINE = TokuDB,
+ SUBPARTITION `sp32` ENGINE = TokuDB),
+ PARTITION `part_4` VALUES IN (NULL)
+ (SUBPARTITION `sp41` ENGINE = TokuDB,
+ SUBPARTITION `sp42` ENGINE = TokuDB))
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -10855,18 +10855,18 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (ABS(MOD(f_int1,2)))
-SUBPARTITION BY KEY (f_int1)
+ PARTITION BY LIST (abs(`f_int1` % 2))
+SUBPARTITION BY KEY (`f_int1`)
SUBPARTITIONS 3
-(PARTITION part_1 VALUES IN (0) ENGINE = TokuDB,
- PARTITION part_2 VALUES IN (1) ENGINE = TokuDB,
- PARTITION part_3 VALUES IN (NULL) ENGINE = TokuDB) */
+(PARTITION `part_1` VALUES IN (0) ENGINE = TokuDB,
+ PARTITION `part_2` VALUES IN (1) ENGINE = TokuDB,
+ PARTITION `part_3` VALUES IN (NULL) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -11319,15 +11319,15 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY HASH (f_int1)
-(PARTITION part_1 ENGINE = TokuDB,
- PARTITION part_2 ENGINE = TokuDB) */
+ PARTITION BY HASH (`f_int1`)
+(PARTITION `part_1` ENGINE = TokuDB,
+ PARTITION `part_2` ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -11778,18 +11778,18 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY KEY (f_int1)
-(PARTITION part_1 ENGINE = TokuDB,
- PARTITION part_2 ENGINE = TokuDB,
- PARTITION part_3 ENGINE = TokuDB,
- PARTITION part_4 ENGINE = TokuDB,
- PARTITION part_5 ENGINE = TokuDB) */
+ PARTITION BY KEY (`f_int1`)
+(PARTITION `part_1` ENGINE = TokuDB,
+ PARTITION `part_2` ENGINE = TokuDB,
+ PARTITION `part_3` ENGINE = TokuDB,
+ PARTITION `part_4` ENGINE = TokuDB,
+ PARTITION `part_5` ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -12248,21 +12248,21 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (MOD(f_int1,4))
-(PARTITION part_3 VALUES IN (-3) ENGINE = TokuDB,
- PARTITION part_2 VALUES IN (-2) ENGINE = TokuDB,
- PARTITION part_1 VALUES IN (-1) ENGINE = TokuDB,
- PARTITION part_N VALUES IN (NULL) ENGINE = TokuDB,
- PARTITION part0 VALUES IN (0) ENGINE = TokuDB,
- PARTITION part1 VALUES IN (1) ENGINE = TokuDB,
- PARTITION part2 VALUES IN (2) ENGINE = TokuDB,
- PARTITION part3 VALUES IN (3) ENGINE = TokuDB) */
+ PARTITION BY LIST (`f_int1` % 4)
+(PARTITION `part_3` VALUES IN (-3) ENGINE = TokuDB,
+ PARTITION `part_2` VALUES IN (-2) ENGINE = TokuDB,
+ PARTITION `part_1` VALUES IN (-1) ENGINE = TokuDB,
+ PARTITION `part_N` VALUES IN (NULL) ENGINE = TokuDB,
+ PARTITION `part0` VALUES IN (0) ENGINE = TokuDB,
+ PARTITION `part1` VALUES IN (1) ENGINE = TokuDB,
+ PARTITION `part2` VALUES IN (2) ENGINE = TokuDB,
+ PARTITION `part3` VALUES IN (3) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -12719,19 +12719,19 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (f_int1)
-(PARTITION parta VALUES LESS THAN (0) ENGINE = TokuDB,
- PARTITION part_1 VALUES LESS THAN (5) ENGINE = TokuDB,
- PARTITION part_2 VALUES LESS THAN (10) ENGINE = TokuDB,
- PARTITION part_3 VALUES LESS THAN (15) ENGINE = TokuDB,
- PARTITION part_4 VALUES LESS THAN (20) ENGINE = TokuDB,
- PARTITION part_5 VALUES LESS THAN (2147483646) ENGINE = TokuDB) */
+ PARTITION BY RANGE (`f_int1`)
+(PARTITION `parta` VALUES LESS THAN (0) ENGINE = TokuDB,
+ PARTITION `part_1` VALUES LESS THAN (5) ENGINE = TokuDB,
+ PARTITION `part_2` VALUES LESS THAN (10) ENGINE = TokuDB,
+ PARTITION `part_3` VALUES LESS THAN (15) ENGINE = TokuDB,
+ PARTITION `part_4` VALUES LESS THAN (20) ENGINE = TokuDB,
+ PARTITION `part_5` VALUES LESS THAN (2147483646) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -13184,19 +13184,19 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (f_int1 DIV 2)
-SUBPARTITION BY HASH (f_int1)
+ PARTITION BY RANGE (`f_int1` DIV 2)
+SUBPARTITION BY HASH (`f_int1`)
SUBPARTITIONS 2
-(PARTITION part_1 VALUES LESS THAN (0) ENGINE = TokuDB,
- PARTITION part_2 VALUES LESS THAN (5) ENGINE = TokuDB,
- PARTITION part_3 VALUES LESS THAN (10) ENGINE = TokuDB,
- PARTITION part_4 VALUES LESS THAN (2147483646) ENGINE = TokuDB) */
+(PARTITION `part_1` VALUES LESS THAN (0) ENGINE = TokuDB,
+ PARTITION `part_2` VALUES LESS THAN (5) ENGINE = TokuDB,
+ PARTITION `part_3` VALUES LESS THAN (10) ENGINE = TokuDB,
+ PARTITION `part_4` VALUES LESS THAN (2147483646) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -13655,26 +13655,26 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (f_int1)
-SUBPARTITION BY KEY (f_int1)
-(PARTITION part_1 VALUES LESS THAN (0)
- (SUBPARTITION subpart11 ENGINE = TokuDB,
- SUBPARTITION subpart12 ENGINE = TokuDB),
- PARTITION part_2 VALUES LESS THAN (5)
- (SUBPARTITION subpart21 ENGINE = TokuDB,
- SUBPARTITION subpart22 ENGINE = TokuDB),
- PARTITION part_3 VALUES LESS THAN (10)
- (SUBPARTITION subpart31 ENGINE = TokuDB,
- SUBPARTITION subpart32 ENGINE = TokuDB),
- PARTITION part_4 VALUES LESS THAN (2147483646)
- (SUBPARTITION subpart41 ENGINE = TokuDB,
- SUBPARTITION subpart42 ENGINE = TokuDB)) */
+ PARTITION BY RANGE (`f_int1`)
+SUBPARTITION BY KEY (`f_int1`)
+(PARTITION `part_1` VALUES LESS THAN (0)
+ (SUBPARTITION `subpart11` ENGINE = TokuDB,
+ SUBPARTITION `subpart12` ENGINE = TokuDB),
+ PARTITION `part_2` VALUES LESS THAN (5)
+ (SUBPARTITION `subpart21` ENGINE = TokuDB,
+ SUBPARTITION `subpart22` ENGINE = TokuDB),
+ PARTITION `part_3` VALUES LESS THAN (10)
+ (SUBPARTITION `subpart31` ENGINE = TokuDB,
+ SUBPARTITION `subpart32` ENGINE = TokuDB),
+ PARTITION `part_4` VALUES LESS THAN (2147483646)
+ (SUBPARTITION `subpart41` ENGINE = TokuDB,
+ SUBPARTITION `subpart42` ENGINE = TokuDB))
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -14131,26 +14131,26 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (ABS(MOD(f_int1,3)))
-SUBPARTITION BY HASH (f_int1 + 1)
-(PARTITION part_1 VALUES IN (0)
- (SUBPARTITION sp11 ENGINE = TokuDB,
- SUBPARTITION sp12 ENGINE = TokuDB),
- PARTITION part_2 VALUES IN (1)
- (SUBPARTITION sp21 ENGINE = TokuDB,
- SUBPARTITION sp22 ENGINE = TokuDB),
- PARTITION part_3 VALUES IN (2)
- (SUBPARTITION sp31 ENGINE = TokuDB,
- SUBPARTITION sp32 ENGINE = TokuDB),
- PARTITION part_4 VALUES IN (NULL)
- (SUBPARTITION sp41 ENGINE = TokuDB,
- SUBPARTITION sp42 ENGINE = TokuDB)) */
+ PARTITION BY LIST (abs(`f_int1` % 3))
+SUBPARTITION BY HASH (`f_int1` + 1)
+(PARTITION `part_1` VALUES IN (0)
+ (SUBPARTITION `sp11` ENGINE = TokuDB,
+ SUBPARTITION `sp12` ENGINE = TokuDB),
+ PARTITION `part_2` VALUES IN (1)
+ (SUBPARTITION `sp21` ENGINE = TokuDB,
+ SUBPARTITION `sp22` ENGINE = TokuDB),
+ PARTITION `part_3` VALUES IN (2)
+ (SUBPARTITION `sp31` ENGINE = TokuDB,
+ SUBPARTITION `sp32` ENGINE = TokuDB),
+ PARTITION `part_4` VALUES IN (NULL)
+ (SUBPARTITION `sp41` ENGINE = TokuDB,
+ SUBPARTITION `sp42` ENGINE = TokuDB))
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -14605,18 +14605,18 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (ABS(MOD(f_int1,2)))
-SUBPARTITION BY KEY (f_int1)
+ PARTITION BY LIST (abs(`f_int1` % 2))
+SUBPARTITION BY KEY (`f_int1`)
SUBPARTITIONS 3
-(PARTITION part_1 VALUES IN (0) ENGINE = TokuDB,
- PARTITION part_2 VALUES IN (1) ENGINE = TokuDB,
- PARTITION part_3 VALUES IN (NULL) ENGINE = TokuDB) */
+(PARTITION `part_1` VALUES IN (0) ENGINE = TokuDB,
+ PARTITION `part_2` VALUES IN (1) ENGINE = TokuDB,
+ PARTITION `part_3` VALUES IN (NULL) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -15069,15 +15069,15 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY HASH (f_int1)
-(PARTITION part_1 ENGINE = TokuDB,
- PARTITION part_2 ENGINE = TokuDB) */
+ PARTITION BY HASH (`f_int1`)
+(PARTITION `part_1` ENGINE = TokuDB,
+ PARTITION `part_2` ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -15528,18 +15528,18 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY KEY (f_int1)
-(PARTITION part_1 ENGINE = TokuDB,
- PARTITION part_2 ENGINE = TokuDB,
- PARTITION part_3 ENGINE = TokuDB,
- PARTITION part_4 ENGINE = TokuDB,
- PARTITION part_5 ENGINE = TokuDB) */
+ PARTITION BY KEY (`f_int1`)
+(PARTITION `part_1` ENGINE = TokuDB,
+ PARTITION `part_2` ENGINE = TokuDB,
+ PARTITION `part_3` ENGINE = TokuDB,
+ PARTITION `part_4` ENGINE = TokuDB,
+ PARTITION `part_5` ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -15998,21 +15998,21 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (MOD(f_int1,4))
-(PARTITION part_3 VALUES IN (-3) ENGINE = TokuDB,
- PARTITION part_2 VALUES IN (-2) ENGINE = TokuDB,
- PARTITION part_1 VALUES IN (-1) ENGINE = TokuDB,
- PARTITION part_N VALUES IN (NULL) ENGINE = TokuDB,
- PARTITION part0 VALUES IN (0) ENGINE = TokuDB,
- PARTITION part1 VALUES IN (1) ENGINE = TokuDB,
- PARTITION part2 VALUES IN (2) ENGINE = TokuDB,
- PARTITION part3 VALUES IN (3) ENGINE = TokuDB) */
+ PARTITION BY LIST (`f_int1` % 4)
+(PARTITION `part_3` VALUES IN (-3) ENGINE = TokuDB,
+ PARTITION `part_2` VALUES IN (-2) ENGINE = TokuDB,
+ PARTITION `part_1` VALUES IN (-1) ENGINE = TokuDB,
+ PARTITION `part_N` VALUES IN (NULL) ENGINE = TokuDB,
+ PARTITION `part0` VALUES IN (0) ENGINE = TokuDB,
+ PARTITION `part1` VALUES IN (1) ENGINE = TokuDB,
+ PARTITION `part2` VALUES IN (2) ENGINE = TokuDB,
+ PARTITION `part3` VALUES IN (3) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -16469,19 +16469,19 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (f_int1)
-(PARTITION parta VALUES LESS THAN (0) ENGINE = TokuDB,
- PARTITION part_1 VALUES LESS THAN (5) ENGINE = TokuDB,
- PARTITION part_2 VALUES LESS THAN (10) ENGINE = TokuDB,
- PARTITION part_3 VALUES LESS THAN (15) ENGINE = TokuDB,
- PARTITION part_4 VALUES LESS THAN (20) ENGINE = TokuDB,
- PARTITION part_5 VALUES LESS THAN (2147483646) ENGINE = TokuDB) */
+ PARTITION BY RANGE (`f_int1`)
+(PARTITION `parta` VALUES LESS THAN (0) ENGINE = TokuDB,
+ PARTITION `part_1` VALUES LESS THAN (5) ENGINE = TokuDB,
+ PARTITION `part_2` VALUES LESS THAN (10) ENGINE = TokuDB,
+ PARTITION `part_3` VALUES LESS THAN (15) ENGINE = TokuDB,
+ PARTITION `part_4` VALUES LESS THAN (20) ENGINE = TokuDB,
+ PARTITION `part_5` VALUES LESS THAN (2147483646) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -16934,19 +16934,19 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (f_int1 DIV 2)
-SUBPARTITION BY HASH (f_int1)
+ PARTITION BY RANGE (`f_int1` DIV 2)
+SUBPARTITION BY HASH (`f_int1`)
SUBPARTITIONS 2
-(PARTITION part_1 VALUES LESS THAN (0) ENGINE = TokuDB,
- PARTITION part_2 VALUES LESS THAN (5) ENGINE = TokuDB,
- PARTITION part_3 VALUES LESS THAN (10) ENGINE = TokuDB,
- PARTITION part_4 VALUES LESS THAN (2147483646) ENGINE = TokuDB) */
+(PARTITION `part_1` VALUES LESS THAN (0) ENGINE = TokuDB,
+ PARTITION `part_2` VALUES LESS THAN (5) ENGINE = TokuDB,
+ PARTITION `part_3` VALUES LESS THAN (10) ENGINE = TokuDB,
+ PARTITION `part_4` VALUES LESS THAN (2147483646) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -17405,26 +17405,26 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (f_int1)
-SUBPARTITION BY KEY (f_int1)
-(PARTITION part_1 VALUES LESS THAN (0)
- (SUBPARTITION subpart11 ENGINE = TokuDB,
- SUBPARTITION subpart12 ENGINE = TokuDB),
- PARTITION part_2 VALUES LESS THAN (5)
- (SUBPARTITION subpart21 ENGINE = TokuDB,
- SUBPARTITION subpart22 ENGINE = TokuDB),
- PARTITION part_3 VALUES LESS THAN (10)
- (SUBPARTITION subpart31 ENGINE = TokuDB,
- SUBPARTITION subpart32 ENGINE = TokuDB),
- PARTITION part_4 VALUES LESS THAN (2147483646)
- (SUBPARTITION subpart41 ENGINE = TokuDB,
- SUBPARTITION subpart42 ENGINE = TokuDB)) */
+ PARTITION BY RANGE (`f_int1`)
+SUBPARTITION BY KEY (`f_int1`)
+(PARTITION `part_1` VALUES LESS THAN (0)
+ (SUBPARTITION `subpart11` ENGINE = TokuDB,
+ SUBPARTITION `subpart12` ENGINE = TokuDB),
+ PARTITION `part_2` VALUES LESS THAN (5)
+ (SUBPARTITION `subpart21` ENGINE = TokuDB,
+ SUBPARTITION `subpart22` ENGINE = TokuDB),
+ PARTITION `part_3` VALUES LESS THAN (10)
+ (SUBPARTITION `subpart31` ENGINE = TokuDB,
+ SUBPARTITION `subpart32` ENGINE = TokuDB),
+ PARTITION `part_4` VALUES LESS THAN (2147483646)
+ (SUBPARTITION `subpart41` ENGINE = TokuDB,
+ SUBPARTITION `subpart42` ENGINE = TokuDB))
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -17881,26 +17881,26 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (ABS(MOD(f_int1,3)))
-SUBPARTITION BY HASH (f_int1 + 1)
-(PARTITION part_1 VALUES IN (0)
- (SUBPARTITION sp11 ENGINE = TokuDB,
- SUBPARTITION sp12 ENGINE = TokuDB),
- PARTITION part_2 VALUES IN (1)
- (SUBPARTITION sp21 ENGINE = TokuDB,
- SUBPARTITION sp22 ENGINE = TokuDB),
- PARTITION part_3 VALUES IN (2)
- (SUBPARTITION sp31 ENGINE = TokuDB,
- SUBPARTITION sp32 ENGINE = TokuDB),
- PARTITION part_4 VALUES IN (NULL)
- (SUBPARTITION sp41 ENGINE = TokuDB,
- SUBPARTITION sp42 ENGINE = TokuDB)) */
+ PARTITION BY LIST (abs(`f_int1` % 3))
+SUBPARTITION BY HASH (`f_int1` + 1)
+(PARTITION `part_1` VALUES IN (0)
+ (SUBPARTITION `sp11` ENGINE = TokuDB,
+ SUBPARTITION `sp12` ENGINE = TokuDB),
+ PARTITION `part_2` VALUES IN (1)
+ (SUBPARTITION `sp21` ENGINE = TokuDB,
+ SUBPARTITION `sp22` ENGINE = TokuDB),
+ PARTITION `part_3` VALUES IN (2)
+ (SUBPARTITION `sp31` ENGINE = TokuDB,
+ SUBPARTITION `sp32` ENGINE = TokuDB),
+ PARTITION `part_4` VALUES IN (NULL)
+ (SUBPARTITION `sp41` ENGINE = TokuDB,
+ SUBPARTITION `sp42` ENGINE = TokuDB))
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -18355,18 +18355,18 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (ABS(MOD(f_int1,2)))
-SUBPARTITION BY KEY (f_int1)
+ PARTITION BY LIST (abs(`f_int1` % 2))
+SUBPARTITION BY KEY (`f_int1`)
SUBPARTITIONS 3
-(PARTITION part_1 VALUES IN (0) ENGINE = TokuDB,
- PARTITION part_2 VALUES IN (1) ENGINE = TokuDB,
- PARTITION part_3 VALUES IN (NULL) ENGINE = TokuDB) */
+(PARTITION `part_1` VALUES IN (0) ENGINE = TokuDB,
+ PARTITION `part_2` VALUES IN (1) ENGINE = TokuDB,
+ PARTITION `part_3` VALUES IN (NULL) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -18822,15 +18822,15 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY HASH (f_int1)
-(PARTITION part_1 ENGINE = TokuDB,
- PARTITION part_2 ENGINE = TokuDB) */
+ PARTITION BY HASH (`f_int1`)
+(PARTITION `part_1` ENGINE = TokuDB,
+ PARTITION `part_2` ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -19281,18 +19281,18 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY KEY (f_int1)
-(PARTITION part_1 ENGINE = TokuDB,
- PARTITION part_2 ENGINE = TokuDB,
- PARTITION part_3 ENGINE = TokuDB,
- PARTITION part_4 ENGINE = TokuDB,
- PARTITION part_5 ENGINE = TokuDB) */
+ PARTITION BY KEY (`f_int1`)
+(PARTITION `part_1` ENGINE = TokuDB,
+ PARTITION `part_2` ENGINE = TokuDB,
+ PARTITION `part_3` ENGINE = TokuDB,
+ PARTITION `part_4` ENGINE = TokuDB,
+ PARTITION `part_5` ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -19751,21 +19751,21 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (MOD(f_int1,4))
-(PARTITION part_3 VALUES IN (-3) ENGINE = TokuDB,
- PARTITION part_2 VALUES IN (-2) ENGINE = TokuDB,
- PARTITION part_1 VALUES IN (-1) ENGINE = TokuDB,
- PARTITION part_N VALUES IN (NULL) ENGINE = TokuDB,
- PARTITION part0 VALUES IN (0) ENGINE = TokuDB,
- PARTITION part1 VALUES IN (1) ENGINE = TokuDB,
- PARTITION part2 VALUES IN (2) ENGINE = TokuDB,
- PARTITION part3 VALUES IN (3) ENGINE = TokuDB) */
+ PARTITION BY LIST (`f_int1` % 4)
+(PARTITION `part_3` VALUES IN (-3) ENGINE = TokuDB,
+ PARTITION `part_2` VALUES IN (-2) ENGINE = TokuDB,
+ PARTITION `part_1` VALUES IN (-1) ENGINE = TokuDB,
+ PARTITION `part_N` VALUES IN (NULL) ENGINE = TokuDB,
+ PARTITION `part0` VALUES IN (0) ENGINE = TokuDB,
+ PARTITION `part1` VALUES IN (1) ENGINE = TokuDB,
+ PARTITION `part2` VALUES IN (2) ENGINE = TokuDB,
+ PARTITION `part3` VALUES IN (3) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -20222,19 +20222,19 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (f_int1)
-(PARTITION parta VALUES LESS THAN (0) ENGINE = TokuDB,
- PARTITION part_1 VALUES LESS THAN (5) ENGINE = TokuDB,
- PARTITION part_2 VALUES LESS THAN (10) ENGINE = TokuDB,
- PARTITION part_3 VALUES LESS THAN (15) ENGINE = TokuDB,
- PARTITION part_4 VALUES LESS THAN (20) ENGINE = TokuDB,
- PARTITION part_5 VALUES LESS THAN (2147483646) ENGINE = TokuDB) */
+ PARTITION BY RANGE (`f_int1`)
+(PARTITION `parta` VALUES LESS THAN (0) ENGINE = TokuDB,
+ PARTITION `part_1` VALUES LESS THAN (5) ENGINE = TokuDB,
+ PARTITION `part_2` VALUES LESS THAN (10) ENGINE = TokuDB,
+ PARTITION `part_3` VALUES LESS THAN (15) ENGINE = TokuDB,
+ PARTITION `part_4` VALUES LESS THAN (20) ENGINE = TokuDB,
+ PARTITION `part_5` VALUES LESS THAN (2147483646) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -20687,19 +20687,19 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (f_int1 DIV 2)
-SUBPARTITION BY HASH (f_int1)
+ PARTITION BY RANGE (`f_int1` DIV 2)
+SUBPARTITION BY HASH (`f_int1`)
SUBPARTITIONS 2
-(PARTITION part_1 VALUES LESS THAN (0) ENGINE = TokuDB,
- PARTITION part_2 VALUES LESS THAN (5) ENGINE = TokuDB,
- PARTITION part_3 VALUES LESS THAN (10) ENGINE = TokuDB,
- PARTITION part_4 VALUES LESS THAN (2147483646) ENGINE = TokuDB) */
+(PARTITION `part_1` VALUES LESS THAN (0) ENGINE = TokuDB,
+ PARTITION `part_2` VALUES LESS THAN (5) ENGINE = TokuDB,
+ PARTITION `part_3` VALUES LESS THAN (10) ENGINE = TokuDB,
+ PARTITION `part_4` VALUES LESS THAN (2147483646) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -21158,26 +21158,26 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (f_int1)
-SUBPARTITION BY KEY (f_int1)
-(PARTITION part_1 VALUES LESS THAN (0)
- (SUBPARTITION subpart11 ENGINE = TokuDB,
- SUBPARTITION subpart12 ENGINE = TokuDB),
- PARTITION part_2 VALUES LESS THAN (5)
- (SUBPARTITION subpart21 ENGINE = TokuDB,
- SUBPARTITION subpart22 ENGINE = TokuDB),
- PARTITION part_3 VALUES LESS THAN (10)
- (SUBPARTITION subpart31 ENGINE = TokuDB,
- SUBPARTITION subpart32 ENGINE = TokuDB),
- PARTITION part_4 VALUES LESS THAN (2147483646)
- (SUBPARTITION subpart41 ENGINE = TokuDB,
- SUBPARTITION subpart42 ENGINE = TokuDB)) */
+ PARTITION BY RANGE (`f_int1`)
+SUBPARTITION BY KEY (`f_int1`)
+(PARTITION `part_1` VALUES LESS THAN (0)
+ (SUBPARTITION `subpart11` ENGINE = TokuDB,
+ SUBPARTITION `subpart12` ENGINE = TokuDB),
+ PARTITION `part_2` VALUES LESS THAN (5)
+ (SUBPARTITION `subpart21` ENGINE = TokuDB,
+ SUBPARTITION `subpart22` ENGINE = TokuDB),
+ PARTITION `part_3` VALUES LESS THAN (10)
+ (SUBPARTITION `subpart31` ENGINE = TokuDB,
+ SUBPARTITION `subpart32` ENGINE = TokuDB),
+ PARTITION `part_4` VALUES LESS THAN (2147483646)
+ (SUBPARTITION `subpart41` ENGINE = TokuDB,
+ SUBPARTITION `subpart42` ENGINE = TokuDB))
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -21634,26 +21634,26 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (ABS(MOD(f_int1,3)))
-SUBPARTITION BY HASH (f_int1 + 1)
-(PARTITION part_1 VALUES IN (0)
- (SUBPARTITION sp11 ENGINE = TokuDB,
- SUBPARTITION sp12 ENGINE = TokuDB),
- PARTITION part_2 VALUES IN (1)
- (SUBPARTITION sp21 ENGINE = TokuDB,
- SUBPARTITION sp22 ENGINE = TokuDB),
- PARTITION part_3 VALUES IN (2)
- (SUBPARTITION sp31 ENGINE = TokuDB,
- SUBPARTITION sp32 ENGINE = TokuDB),
- PARTITION part_4 VALUES IN (NULL)
- (SUBPARTITION sp41 ENGINE = TokuDB,
- SUBPARTITION sp42 ENGINE = TokuDB)) */
+ PARTITION BY LIST (abs(`f_int1` % 3))
+SUBPARTITION BY HASH (`f_int1` + 1)
+(PARTITION `part_1` VALUES IN (0)
+ (SUBPARTITION `sp11` ENGINE = TokuDB,
+ SUBPARTITION `sp12` ENGINE = TokuDB),
+ PARTITION `part_2` VALUES IN (1)
+ (SUBPARTITION `sp21` ENGINE = TokuDB,
+ SUBPARTITION `sp22` ENGINE = TokuDB),
+ PARTITION `part_3` VALUES IN (2)
+ (SUBPARTITION `sp31` ENGINE = TokuDB,
+ SUBPARTITION `sp32` ENGINE = TokuDB),
+ PARTITION `part_4` VALUES IN (NULL)
+ (SUBPARTITION `sp41` ENGINE = TokuDB,
+ SUBPARTITION `sp42` ENGINE = TokuDB))
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -22108,18 +22108,18 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (ABS(MOD(f_int1,2)))
-SUBPARTITION BY KEY (f_int1)
+ PARTITION BY LIST (abs(`f_int1` % 2))
+SUBPARTITION BY KEY (`f_int1`)
SUBPARTITIONS 3
-(PARTITION part_1 VALUES IN (0) ENGINE = TokuDB,
- PARTITION part_2 VALUES IN (1) ENGINE = TokuDB,
- PARTITION part_3 VALUES IN (NULL) ENGINE = TokuDB) */
+(PARTITION `part_1` VALUES IN (0) ENGINE = TokuDB,
+ PARTITION `part_2` VALUES IN (1) ENGINE = TokuDB,
+ PARTITION `part_3` VALUES IN (NULL) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -22572,15 +22572,15 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY HASH (f_int1)
-(PARTITION part_1 ENGINE = TokuDB,
- PARTITION part_2 ENGINE = TokuDB) */
+ PARTITION BY HASH (`f_int1`)
+(PARTITION `part_1` ENGINE = TokuDB,
+ PARTITION `part_2` ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -23031,18 +23031,18 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY KEY (f_int1)
-(PARTITION part_1 ENGINE = TokuDB,
- PARTITION part_2 ENGINE = TokuDB,
- PARTITION part_3 ENGINE = TokuDB,
- PARTITION part_4 ENGINE = TokuDB,
- PARTITION part_5 ENGINE = TokuDB) */
+ PARTITION BY KEY (`f_int1`)
+(PARTITION `part_1` ENGINE = TokuDB,
+ PARTITION `part_2` ENGINE = TokuDB,
+ PARTITION `part_3` ENGINE = TokuDB,
+ PARTITION `part_4` ENGINE = TokuDB,
+ PARTITION `part_5` ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -23501,21 +23501,21 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (MOD(f_int1,4))
-(PARTITION part_3 VALUES IN (-3) ENGINE = TokuDB,
- PARTITION part_2 VALUES IN (-2) ENGINE = TokuDB,
- PARTITION part_1 VALUES IN (-1) ENGINE = TokuDB,
- PARTITION part_N VALUES IN (NULL) ENGINE = TokuDB,
- PARTITION part0 VALUES IN (0) ENGINE = TokuDB,
- PARTITION part1 VALUES IN (1) ENGINE = TokuDB,
- PARTITION part2 VALUES IN (2) ENGINE = TokuDB,
- PARTITION part3 VALUES IN (3) ENGINE = TokuDB) */
+ PARTITION BY LIST (`f_int1` % 4)
+(PARTITION `part_3` VALUES IN (-3) ENGINE = TokuDB,
+ PARTITION `part_2` VALUES IN (-2) ENGINE = TokuDB,
+ PARTITION `part_1` VALUES IN (-1) ENGINE = TokuDB,
+ PARTITION `part_N` VALUES IN (NULL) ENGINE = TokuDB,
+ PARTITION `part0` VALUES IN (0) ENGINE = TokuDB,
+ PARTITION `part1` VALUES IN (1) ENGINE = TokuDB,
+ PARTITION `part2` VALUES IN (2) ENGINE = TokuDB,
+ PARTITION `part3` VALUES IN (3) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -23972,19 +23972,19 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (f_int1)
-(PARTITION parta VALUES LESS THAN (0) ENGINE = TokuDB,
- PARTITION part_1 VALUES LESS THAN (5) ENGINE = TokuDB,
- PARTITION part_2 VALUES LESS THAN (10) ENGINE = TokuDB,
- PARTITION part_3 VALUES LESS THAN (15) ENGINE = TokuDB,
- PARTITION part_4 VALUES LESS THAN (20) ENGINE = TokuDB,
- PARTITION part_5 VALUES LESS THAN (2147483646) ENGINE = TokuDB) */
+ PARTITION BY RANGE (`f_int1`)
+(PARTITION `parta` VALUES LESS THAN (0) ENGINE = TokuDB,
+ PARTITION `part_1` VALUES LESS THAN (5) ENGINE = TokuDB,
+ PARTITION `part_2` VALUES LESS THAN (10) ENGINE = TokuDB,
+ PARTITION `part_3` VALUES LESS THAN (15) ENGINE = TokuDB,
+ PARTITION `part_4` VALUES LESS THAN (20) ENGINE = TokuDB,
+ PARTITION `part_5` VALUES LESS THAN (2147483646) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -24437,19 +24437,19 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (f_int1 DIV 2)
-SUBPARTITION BY HASH (f_int1)
+ PARTITION BY RANGE (`f_int1` DIV 2)
+SUBPARTITION BY HASH (`f_int1`)
SUBPARTITIONS 2
-(PARTITION part_1 VALUES LESS THAN (0) ENGINE = TokuDB,
- PARTITION part_2 VALUES LESS THAN (5) ENGINE = TokuDB,
- PARTITION part_3 VALUES LESS THAN (10) ENGINE = TokuDB,
- PARTITION part_4 VALUES LESS THAN (2147483646) ENGINE = TokuDB) */
+(PARTITION `part_1` VALUES LESS THAN (0) ENGINE = TokuDB,
+ PARTITION `part_2` VALUES LESS THAN (5) ENGINE = TokuDB,
+ PARTITION `part_3` VALUES LESS THAN (10) ENGINE = TokuDB,
+ PARTITION `part_4` VALUES LESS THAN (2147483646) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -24908,26 +24908,26 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (f_int1)
-SUBPARTITION BY KEY (f_int1)
-(PARTITION part_1 VALUES LESS THAN (0)
- (SUBPARTITION subpart11 ENGINE = TokuDB,
- SUBPARTITION subpart12 ENGINE = TokuDB),
- PARTITION part_2 VALUES LESS THAN (5)
- (SUBPARTITION subpart21 ENGINE = TokuDB,
- SUBPARTITION subpart22 ENGINE = TokuDB),
- PARTITION part_3 VALUES LESS THAN (10)
- (SUBPARTITION subpart31 ENGINE = TokuDB,
- SUBPARTITION subpart32 ENGINE = TokuDB),
- PARTITION part_4 VALUES LESS THAN (2147483646)
- (SUBPARTITION subpart41 ENGINE = TokuDB,
- SUBPARTITION subpart42 ENGINE = TokuDB)) */
+ PARTITION BY RANGE (`f_int1`)
+SUBPARTITION BY KEY (`f_int1`)
+(PARTITION `part_1` VALUES LESS THAN (0)
+ (SUBPARTITION `subpart11` ENGINE = TokuDB,
+ SUBPARTITION `subpart12` ENGINE = TokuDB),
+ PARTITION `part_2` VALUES LESS THAN (5)
+ (SUBPARTITION `subpart21` ENGINE = TokuDB,
+ SUBPARTITION `subpart22` ENGINE = TokuDB),
+ PARTITION `part_3` VALUES LESS THAN (10)
+ (SUBPARTITION `subpart31` ENGINE = TokuDB,
+ SUBPARTITION `subpart32` ENGINE = TokuDB),
+ PARTITION `part_4` VALUES LESS THAN (2147483646)
+ (SUBPARTITION `subpart41` ENGINE = TokuDB,
+ SUBPARTITION `subpart42` ENGINE = TokuDB))
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -25384,26 +25384,26 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (ABS(MOD(f_int1,3)))
-SUBPARTITION BY HASH (f_int1 + 1)
-(PARTITION part_1 VALUES IN (0)
- (SUBPARTITION sp11 ENGINE = TokuDB,
- SUBPARTITION sp12 ENGINE = TokuDB),
- PARTITION part_2 VALUES IN (1)
- (SUBPARTITION sp21 ENGINE = TokuDB,
- SUBPARTITION sp22 ENGINE = TokuDB),
- PARTITION part_3 VALUES IN (2)
- (SUBPARTITION sp31 ENGINE = TokuDB,
- SUBPARTITION sp32 ENGINE = TokuDB),
- PARTITION part_4 VALUES IN (NULL)
- (SUBPARTITION sp41 ENGINE = TokuDB,
- SUBPARTITION sp42 ENGINE = TokuDB)) */
+ PARTITION BY LIST (abs(`f_int1` % 3))
+SUBPARTITION BY HASH (`f_int1` + 1)
+(PARTITION `part_1` VALUES IN (0)
+ (SUBPARTITION `sp11` ENGINE = TokuDB,
+ SUBPARTITION `sp12` ENGINE = TokuDB),
+ PARTITION `part_2` VALUES IN (1)
+ (SUBPARTITION `sp21` ENGINE = TokuDB,
+ SUBPARTITION `sp22` ENGINE = TokuDB),
+ PARTITION `part_3` VALUES IN (2)
+ (SUBPARTITION `sp31` ENGINE = TokuDB,
+ SUBPARTITION `sp32` ENGINE = TokuDB),
+ PARTITION `part_4` VALUES IN (NULL)
+ (SUBPARTITION `sp41` ENGINE = TokuDB,
+ SUBPARTITION `sp42` ENGINE = TokuDB))
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -25858,18 +25858,18 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (ABS(MOD(f_int1,2)))
-SUBPARTITION BY KEY (f_int1)
+ PARTITION BY LIST (abs(`f_int1` % 2))
+SUBPARTITION BY KEY (`f_int1`)
SUBPARTITIONS 3
-(PARTITION part_1 VALUES IN (0) ENGINE = TokuDB,
- PARTITION part_2 VALUES IN (1) ENGINE = TokuDB,
- PARTITION part_3 VALUES IN (NULL) ENGINE = TokuDB) */
+(PARTITION `part_1` VALUES IN (0) ENGINE = TokuDB,
+ PARTITION `part_2` VALUES IN (1) ENGINE = TokuDB,
+ PARTITION `part_3` VALUES IN (NULL) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -26322,15 +26322,15 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY HASH (f_int1)
-(PARTITION part_1 ENGINE = TokuDB,
- PARTITION part_2 ENGINE = TokuDB) */
+ PARTITION BY HASH (`f_int1`)
+(PARTITION `part_1` ENGINE = TokuDB,
+ PARTITION `part_2` ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -26781,18 +26781,18 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY KEY (f_int1)
-(PARTITION part_1 ENGINE = TokuDB,
- PARTITION part_2 ENGINE = TokuDB,
- PARTITION part_3 ENGINE = TokuDB,
- PARTITION part_4 ENGINE = TokuDB,
- PARTITION part_5 ENGINE = TokuDB) */
+ PARTITION BY KEY (`f_int1`)
+(PARTITION `part_1` ENGINE = TokuDB,
+ PARTITION `part_2` ENGINE = TokuDB,
+ PARTITION `part_3` ENGINE = TokuDB,
+ PARTITION `part_4` ENGINE = TokuDB,
+ PARTITION `part_5` ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -27251,21 +27251,21 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (MOD(f_int1,4))
-(PARTITION part_3 VALUES IN (-3) ENGINE = TokuDB,
- PARTITION part_2 VALUES IN (-2) ENGINE = TokuDB,
- PARTITION part_1 VALUES IN (-1) ENGINE = TokuDB,
- PARTITION part_N VALUES IN (NULL) ENGINE = TokuDB,
- PARTITION part0 VALUES IN (0) ENGINE = TokuDB,
- PARTITION part1 VALUES IN (1) ENGINE = TokuDB,
- PARTITION part2 VALUES IN (2) ENGINE = TokuDB,
- PARTITION part3 VALUES IN (3) ENGINE = TokuDB) */
+ PARTITION BY LIST (`f_int1` % 4)
+(PARTITION `part_3` VALUES IN (-3) ENGINE = TokuDB,
+ PARTITION `part_2` VALUES IN (-2) ENGINE = TokuDB,
+ PARTITION `part_1` VALUES IN (-1) ENGINE = TokuDB,
+ PARTITION `part_N` VALUES IN (NULL) ENGINE = TokuDB,
+ PARTITION `part0` VALUES IN (0) ENGINE = TokuDB,
+ PARTITION `part1` VALUES IN (1) ENGINE = TokuDB,
+ PARTITION `part2` VALUES IN (2) ENGINE = TokuDB,
+ PARTITION `part3` VALUES IN (3) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -27722,19 +27722,19 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (f_int1)
-(PARTITION parta VALUES LESS THAN (0) ENGINE = TokuDB,
- PARTITION part_1 VALUES LESS THAN (5) ENGINE = TokuDB,
- PARTITION part_2 VALUES LESS THAN (10) ENGINE = TokuDB,
- PARTITION part_3 VALUES LESS THAN (15) ENGINE = TokuDB,
- PARTITION part_4 VALUES LESS THAN (20) ENGINE = TokuDB,
- PARTITION part_5 VALUES LESS THAN (2147483646) ENGINE = TokuDB) */
+ PARTITION BY RANGE (`f_int1`)
+(PARTITION `parta` VALUES LESS THAN (0) ENGINE = TokuDB,
+ PARTITION `part_1` VALUES LESS THAN (5) ENGINE = TokuDB,
+ PARTITION `part_2` VALUES LESS THAN (10) ENGINE = TokuDB,
+ PARTITION `part_3` VALUES LESS THAN (15) ENGINE = TokuDB,
+ PARTITION `part_4` VALUES LESS THAN (20) ENGINE = TokuDB,
+ PARTITION `part_5` VALUES LESS THAN (2147483646) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -28187,19 +28187,19 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (f_int1 DIV 2)
-SUBPARTITION BY HASH (f_int1)
+ PARTITION BY RANGE (`f_int1` DIV 2)
+SUBPARTITION BY HASH (`f_int1`)
SUBPARTITIONS 2
-(PARTITION part_1 VALUES LESS THAN (0) ENGINE = TokuDB,
- PARTITION part_2 VALUES LESS THAN (5) ENGINE = TokuDB,
- PARTITION part_3 VALUES LESS THAN (10) ENGINE = TokuDB,
- PARTITION part_4 VALUES LESS THAN (2147483646) ENGINE = TokuDB) */
+(PARTITION `part_1` VALUES LESS THAN (0) ENGINE = TokuDB,
+ PARTITION `part_2` VALUES LESS THAN (5) ENGINE = TokuDB,
+ PARTITION `part_3` VALUES LESS THAN (10) ENGINE = TokuDB,
+ PARTITION `part_4` VALUES LESS THAN (2147483646) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -28658,26 +28658,26 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (f_int1)
-SUBPARTITION BY KEY (f_int1)
-(PARTITION part_1 VALUES LESS THAN (0)
- (SUBPARTITION subpart11 ENGINE = TokuDB,
- SUBPARTITION subpart12 ENGINE = TokuDB),
- PARTITION part_2 VALUES LESS THAN (5)
- (SUBPARTITION subpart21 ENGINE = TokuDB,
- SUBPARTITION subpart22 ENGINE = TokuDB),
- PARTITION part_3 VALUES LESS THAN (10)
- (SUBPARTITION subpart31 ENGINE = TokuDB,
- SUBPARTITION subpart32 ENGINE = TokuDB),
- PARTITION part_4 VALUES LESS THAN (2147483646)
- (SUBPARTITION subpart41 ENGINE = TokuDB,
- SUBPARTITION subpart42 ENGINE = TokuDB)) */
+ PARTITION BY RANGE (`f_int1`)
+SUBPARTITION BY KEY (`f_int1`)
+(PARTITION `part_1` VALUES LESS THAN (0)
+ (SUBPARTITION `subpart11` ENGINE = TokuDB,
+ SUBPARTITION `subpart12` ENGINE = TokuDB),
+ PARTITION `part_2` VALUES LESS THAN (5)
+ (SUBPARTITION `subpart21` ENGINE = TokuDB,
+ SUBPARTITION `subpart22` ENGINE = TokuDB),
+ PARTITION `part_3` VALUES LESS THAN (10)
+ (SUBPARTITION `subpart31` ENGINE = TokuDB,
+ SUBPARTITION `subpart32` ENGINE = TokuDB),
+ PARTITION `part_4` VALUES LESS THAN (2147483646)
+ (SUBPARTITION `subpart41` ENGINE = TokuDB,
+ SUBPARTITION `subpart42` ENGINE = TokuDB))
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -29134,26 +29134,26 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (ABS(MOD(f_int1,3)))
-SUBPARTITION BY HASH (f_int1 + 1)
-(PARTITION part_1 VALUES IN (0)
- (SUBPARTITION sp11 ENGINE = TokuDB,
- SUBPARTITION sp12 ENGINE = TokuDB),
- PARTITION part_2 VALUES IN (1)
- (SUBPARTITION sp21 ENGINE = TokuDB,
- SUBPARTITION sp22 ENGINE = TokuDB),
- PARTITION part_3 VALUES IN (2)
- (SUBPARTITION sp31 ENGINE = TokuDB,
- SUBPARTITION sp32 ENGINE = TokuDB),
- PARTITION part_4 VALUES IN (NULL)
- (SUBPARTITION sp41 ENGINE = TokuDB,
- SUBPARTITION sp42 ENGINE = TokuDB)) */
+ PARTITION BY LIST (abs(`f_int1` % 3))
+SUBPARTITION BY HASH (`f_int1` + 1)
+(PARTITION `part_1` VALUES IN (0)
+ (SUBPARTITION `sp11` ENGINE = TokuDB,
+ SUBPARTITION `sp12` ENGINE = TokuDB),
+ PARTITION `part_2` VALUES IN (1)
+ (SUBPARTITION `sp21` ENGINE = TokuDB,
+ SUBPARTITION `sp22` ENGINE = TokuDB),
+ PARTITION `part_3` VALUES IN (2)
+ (SUBPARTITION `sp31` ENGINE = TokuDB,
+ SUBPARTITION `sp32` ENGINE = TokuDB),
+ PARTITION `part_4` VALUES IN (NULL)
+ (SUBPARTITION `sp41` ENGINE = TokuDB,
+ SUBPARTITION `sp42` ENGINE = TokuDB))
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -29608,18 +29608,18 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (ABS(MOD(f_int1,2)))
-SUBPARTITION BY KEY (f_int1)
+ PARTITION BY LIST (abs(`f_int1` % 2))
+SUBPARTITION BY KEY (`f_int1`)
SUBPARTITIONS 3
-(PARTITION part_1 VALUES IN (0) ENGINE = TokuDB,
- PARTITION part_2 VALUES IN (1) ENGINE = TokuDB,
- PARTITION part_3 VALUES IN (NULL) ENGINE = TokuDB) */
+(PARTITION `part_1` VALUES IN (0) ENGINE = TokuDB,
+ PARTITION `part_2` VALUES IN (1) ENGINE = TokuDB,
+ PARTITION `part_3` VALUES IN (NULL) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -30072,15 +30072,15 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY HASH (f_int1)
-(PARTITION part_1 ENGINE = TokuDB,
- PARTITION part_2 ENGINE = TokuDB) */
+ PARTITION BY HASH (`f_int1`)
+(PARTITION `part_1` ENGINE = TokuDB,
+ PARTITION `part_2` ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -30531,18 +30531,18 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY KEY (f_int1)
-(PARTITION part_1 ENGINE = TokuDB,
- PARTITION part_2 ENGINE = TokuDB,
- PARTITION part_3 ENGINE = TokuDB,
- PARTITION part_4 ENGINE = TokuDB,
- PARTITION part_5 ENGINE = TokuDB) */
+ PARTITION BY KEY (`f_int1`)
+(PARTITION `part_1` ENGINE = TokuDB,
+ PARTITION `part_2` ENGINE = TokuDB,
+ PARTITION `part_3` ENGINE = TokuDB,
+ PARTITION `part_4` ENGINE = TokuDB,
+ PARTITION `part_5` ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -31001,21 +31001,21 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (MOD(f_int1,4))
-(PARTITION part_3 VALUES IN (-3) ENGINE = TokuDB,
- PARTITION part_2 VALUES IN (-2) ENGINE = TokuDB,
- PARTITION part_1 VALUES IN (-1) ENGINE = TokuDB,
- PARTITION part_N VALUES IN (NULL) ENGINE = TokuDB,
- PARTITION part0 VALUES IN (0) ENGINE = TokuDB,
- PARTITION part1 VALUES IN (1) ENGINE = TokuDB,
- PARTITION part2 VALUES IN (2) ENGINE = TokuDB,
- PARTITION part3 VALUES IN (3) ENGINE = TokuDB) */
+ PARTITION BY LIST (`f_int1` % 4)
+(PARTITION `part_3` VALUES IN (-3) ENGINE = TokuDB,
+ PARTITION `part_2` VALUES IN (-2) ENGINE = TokuDB,
+ PARTITION `part_1` VALUES IN (-1) ENGINE = TokuDB,
+ PARTITION `part_N` VALUES IN (NULL) ENGINE = TokuDB,
+ PARTITION `part0` VALUES IN (0) ENGINE = TokuDB,
+ PARTITION `part1` VALUES IN (1) ENGINE = TokuDB,
+ PARTITION `part2` VALUES IN (2) ENGINE = TokuDB,
+ PARTITION `part3` VALUES IN (3) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -31472,19 +31472,19 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (f_int1)
-(PARTITION parta VALUES LESS THAN (0) ENGINE = TokuDB,
- PARTITION part_1 VALUES LESS THAN (5) ENGINE = TokuDB,
- PARTITION part_2 VALUES LESS THAN (10) ENGINE = TokuDB,
- PARTITION part_3 VALUES LESS THAN (15) ENGINE = TokuDB,
- PARTITION part_4 VALUES LESS THAN (20) ENGINE = TokuDB,
- PARTITION part_5 VALUES LESS THAN (2147483646) ENGINE = TokuDB) */
+ PARTITION BY RANGE (`f_int1`)
+(PARTITION `parta` VALUES LESS THAN (0) ENGINE = TokuDB,
+ PARTITION `part_1` VALUES LESS THAN (5) ENGINE = TokuDB,
+ PARTITION `part_2` VALUES LESS THAN (10) ENGINE = TokuDB,
+ PARTITION `part_3` VALUES LESS THAN (15) ENGINE = TokuDB,
+ PARTITION `part_4` VALUES LESS THAN (20) ENGINE = TokuDB,
+ PARTITION `part_5` VALUES LESS THAN (2147483646) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -31937,19 +31937,19 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (f_int1 DIV 2)
-SUBPARTITION BY HASH (f_int1)
+ PARTITION BY RANGE (`f_int1` DIV 2)
+SUBPARTITION BY HASH (`f_int1`)
SUBPARTITIONS 2
-(PARTITION part_1 VALUES LESS THAN (0) ENGINE = TokuDB,
- PARTITION part_2 VALUES LESS THAN (5) ENGINE = TokuDB,
- PARTITION part_3 VALUES LESS THAN (10) ENGINE = TokuDB,
- PARTITION part_4 VALUES LESS THAN (2147483646) ENGINE = TokuDB) */
+(PARTITION `part_1` VALUES LESS THAN (0) ENGINE = TokuDB,
+ PARTITION `part_2` VALUES LESS THAN (5) ENGINE = TokuDB,
+ PARTITION `part_3` VALUES LESS THAN (10) ENGINE = TokuDB,
+ PARTITION `part_4` VALUES LESS THAN (2147483646) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -32408,26 +32408,26 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (f_int1)
-SUBPARTITION BY KEY (f_int1)
-(PARTITION part_1 VALUES LESS THAN (0)
- (SUBPARTITION subpart11 ENGINE = TokuDB,
- SUBPARTITION subpart12 ENGINE = TokuDB),
- PARTITION part_2 VALUES LESS THAN (5)
- (SUBPARTITION subpart21 ENGINE = TokuDB,
- SUBPARTITION subpart22 ENGINE = TokuDB),
- PARTITION part_3 VALUES LESS THAN (10)
- (SUBPARTITION subpart31 ENGINE = TokuDB,
- SUBPARTITION subpart32 ENGINE = TokuDB),
- PARTITION part_4 VALUES LESS THAN (2147483646)
- (SUBPARTITION subpart41 ENGINE = TokuDB,
- SUBPARTITION subpart42 ENGINE = TokuDB)) */
+ PARTITION BY RANGE (`f_int1`)
+SUBPARTITION BY KEY (`f_int1`)
+(PARTITION `part_1` VALUES LESS THAN (0)
+ (SUBPARTITION `subpart11` ENGINE = TokuDB,
+ SUBPARTITION `subpart12` ENGINE = TokuDB),
+ PARTITION `part_2` VALUES LESS THAN (5)
+ (SUBPARTITION `subpart21` ENGINE = TokuDB,
+ SUBPARTITION `subpart22` ENGINE = TokuDB),
+ PARTITION `part_3` VALUES LESS THAN (10)
+ (SUBPARTITION `subpart31` ENGINE = TokuDB,
+ SUBPARTITION `subpart32` ENGINE = TokuDB),
+ PARTITION `part_4` VALUES LESS THAN (2147483646)
+ (SUBPARTITION `subpart41` ENGINE = TokuDB,
+ SUBPARTITION `subpart42` ENGINE = TokuDB))
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -32884,26 +32884,26 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (ABS(MOD(f_int1,3)))
-SUBPARTITION BY HASH (f_int1 + 1)
-(PARTITION part_1 VALUES IN (0)
- (SUBPARTITION sp11 ENGINE = TokuDB,
- SUBPARTITION sp12 ENGINE = TokuDB),
- PARTITION part_2 VALUES IN (1)
- (SUBPARTITION sp21 ENGINE = TokuDB,
- SUBPARTITION sp22 ENGINE = TokuDB),
- PARTITION part_3 VALUES IN (2)
- (SUBPARTITION sp31 ENGINE = TokuDB,
- SUBPARTITION sp32 ENGINE = TokuDB),
- PARTITION part_4 VALUES IN (NULL)
- (SUBPARTITION sp41 ENGINE = TokuDB,
- SUBPARTITION sp42 ENGINE = TokuDB)) */
+ PARTITION BY LIST (abs(`f_int1` % 3))
+SUBPARTITION BY HASH (`f_int1` + 1)
+(PARTITION `part_1` VALUES IN (0)
+ (SUBPARTITION `sp11` ENGINE = TokuDB,
+ SUBPARTITION `sp12` ENGINE = TokuDB),
+ PARTITION `part_2` VALUES IN (1)
+ (SUBPARTITION `sp21` ENGINE = TokuDB,
+ SUBPARTITION `sp22` ENGINE = TokuDB),
+ PARTITION `part_3` VALUES IN (2)
+ (SUBPARTITION `sp31` ENGINE = TokuDB,
+ SUBPARTITION `sp32` ENGINE = TokuDB),
+ PARTITION `part_4` VALUES IN (NULL)
+ (SUBPARTITION `sp41` ENGINE = TokuDB,
+ SUBPARTITION `sp42` ENGINE = TokuDB))
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -33358,18 +33358,18 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (ABS(MOD(f_int1,2)))
-SUBPARTITION BY KEY (f_int1)
+ PARTITION BY LIST (abs(`f_int1` % 2))
+SUBPARTITION BY KEY (`f_int1`)
SUBPARTITIONS 3
-(PARTITION part_1 VALUES IN (0) ENGINE = TokuDB,
- PARTITION part_2 VALUES IN (1) ENGINE = TokuDB,
- PARTITION part_3 VALUES IN (NULL) ENGINE = TokuDB) */
+(PARTITION `part_1` VALUES IN (0) ENGINE = TokuDB,
+ PARTITION `part_2` VALUES IN (1) ENGINE = TokuDB,
+ PARTITION `part_3` VALUES IN (NULL) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -33822,15 +33822,15 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY HASH (f_int1)
-(PARTITION part_1 ENGINE = TokuDB,
- PARTITION part_2 ENGINE = TokuDB) */
+ PARTITION BY HASH (`f_int1`)
+(PARTITION `part_1` ENGINE = TokuDB,
+ PARTITION `part_2` ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -34281,18 +34281,18 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY KEY (f_int1)
-(PARTITION part_1 ENGINE = TokuDB,
- PARTITION part_2 ENGINE = TokuDB,
- PARTITION part_3 ENGINE = TokuDB,
- PARTITION part_4 ENGINE = TokuDB,
- PARTITION part_5 ENGINE = TokuDB) */
+ PARTITION BY KEY (`f_int1`)
+(PARTITION `part_1` ENGINE = TokuDB,
+ PARTITION `part_2` ENGINE = TokuDB,
+ PARTITION `part_3` ENGINE = TokuDB,
+ PARTITION `part_4` ENGINE = TokuDB,
+ PARTITION `part_5` ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -34751,21 +34751,21 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (MOD(f_int1,4))
-(PARTITION part_3 VALUES IN (-3) ENGINE = TokuDB,
- PARTITION part_2 VALUES IN (-2) ENGINE = TokuDB,
- PARTITION part_1 VALUES IN (-1) ENGINE = TokuDB,
- PARTITION part_N VALUES IN (NULL) ENGINE = TokuDB,
- PARTITION part0 VALUES IN (0) ENGINE = TokuDB,
- PARTITION part1 VALUES IN (1) ENGINE = TokuDB,
- PARTITION part2 VALUES IN (2) ENGINE = TokuDB,
- PARTITION part3 VALUES IN (3) ENGINE = TokuDB) */
+ PARTITION BY LIST (`f_int1` % 4)
+(PARTITION `part_3` VALUES IN (-3) ENGINE = TokuDB,
+ PARTITION `part_2` VALUES IN (-2) ENGINE = TokuDB,
+ PARTITION `part_1` VALUES IN (-1) ENGINE = TokuDB,
+ PARTITION `part_N` VALUES IN (NULL) ENGINE = TokuDB,
+ PARTITION `part0` VALUES IN (0) ENGINE = TokuDB,
+ PARTITION `part1` VALUES IN (1) ENGINE = TokuDB,
+ PARTITION `part2` VALUES IN (2) ENGINE = TokuDB,
+ PARTITION `part3` VALUES IN (3) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -35222,19 +35222,19 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (f_int1)
-(PARTITION parta VALUES LESS THAN (0) ENGINE = TokuDB,
- PARTITION part_1 VALUES LESS THAN (5) ENGINE = TokuDB,
- PARTITION part_2 VALUES LESS THAN (10) ENGINE = TokuDB,
- PARTITION part_3 VALUES LESS THAN (15) ENGINE = TokuDB,
- PARTITION part_4 VALUES LESS THAN (20) ENGINE = TokuDB,
- PARTITION part_5 VALUES LESS THAN (2147483646) ENGINE = TokuDB) */
+ PARTITION BY RANGE (`f_int1`)
+(PARTITION `parta` VALUES LESS THAN (0) ENGINE = TokuDB,
+ PARTITION `part_1` VALUES LESS THAN (5) ENGINE = TokuDB,
+ PARTITION `part_2` VALUES LESS THAN (10) ENGINE = TokuDB,
+ PARTITION `part_3` VALUES LESS THAN (15) ENGINE = TokuDB,
+ PARTITION `part_4` VALUES LESS THAN (20) ENGINE = TokuDB,
+ PARTITION `part_5` VALUES LESS THAN (2147483646) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -35687,19 +35687,19 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (f_int1 DIV 2)
-SUBPARTITION BY HASH (f_int1)
+ PARTITION BY RANGE (`f_int1` DIV 2)
+SUBPARTITION BY HASH (`f_int1`)
SUBPARTITIONS 2
-(PARTITION part_1 VALUES LESS THAN (0) ENGINE = TokuDB,
- PARTITION part_2 VALUES LESS THAN (5) ENGINE = TokuDB,
- PARTITION part_3 VALUES LESS THAN (10) ENGINE = TokuDB,
- PARTITION part_4 VALUES LESS THAN (2147483646) ENGINE = TokuDB) */
+(PARTITION `part_1` VALUES LESS THAN (0) ENGINE = TokuDB,
+ PARTITION `part_2` VALUES LESS THAN (5) ENGINE = TokuDB,
+ PARTITION `part_3` VALUES LESS THAN (10) ENGINE = TokuDB,
+ PARTITION `part_4` VALUES LESS THAN (2147483646) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -36158,26 +36158,26 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (f_int1)
-SUBPARTITION BY KEY (f_int1)
-(PARTITION part_1 VALUES LESS THAN (0)
- (SUBPARTITION subpart11 ENGINE = TokuDB,
- SUBPARTITION subpart12 ENGINE = TokuDB),
- PARTITION part_2 VALUES LESS THAN (5)
- (SUBPARTITION subpart21 ENGINE = TokuDB,
- SUBPARTITION subpart22 ENGINE = TokuDB),
- PARTITION part_3 VALUES LESS THAN (10)
- (SUBPARTITION subpart31 ENGINE = TokuDB,
- SUBPARTITION subpart32 ENGINE = TokuDB),
- PARTITION part_4 VALUES LESS THAN (2147483646)
- (SUBPARTITION subpart41 ENGINE = TokuDB,
- SUBPARTITION subpart42 ENGINE = TokuDB)) */
+ PARTITION BY RANGE (`f_int1`)
+SUBPARTITION BY KEY (`f_int1`)
+(PARTITION `part_1` VALUES LESS THAN (0)
+ (SUBPARTITION `subpart11` ENGINE = TokuDB,
+ SUBPARTITION `subpart12` ENGINE = TokuDB),
+ PARTITION `part_2` VALUES LESS THAN (5)
+ (SUBPARTITION `subpart21` ENGINE = TokuDB,
+ SUBPARTITION `subpart22` ENGINE = TokuDB),
+ PARTITION `part_3` VALUES LESS THAN (10)
+ (SUBPARTITION `subpart31` ENGINE = TokuDB,
+ SUBPARTITION `subpart32` ENGINE = TokuDB),
+ PARTITION `part_4` VALUES LESS THAN (2147483646)
+ (SUBPARTITION `subpart41` ENGINE = TokuDB,
+ SUBPARTITION `subpart42` ENGINE = TokuDB))
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -36634,26 +36634,26 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (ABS(MOD(f_int1,3)))
-SUBPARTITION BY HASH (f_int1 + 1)
-(PARTITION part_1 VALUES IN (0)
- (SUBPARTITION sp11 ENGINE = TokuDB,
- SUBPARTITION sp12 ENGINE = TokuDB),
- PARTITION part_2 VALUES IN (1)
- (SUBPARTITION sp21 ENGINE = TokuDB,
- SUBPARTITION sp22 ENGINE = TokuDB),
- PARTITION part_3 VALUES IN (2)
- (SUBPARTITION sp31 ENGINE = TokuDB,
- SUBPARTITION sp32 ENGINE = TokuDB),
- PARTITION part_4 VALUES IN (NULL)
- (SUBPARTITION sp41 ENGINE = TokuDB,
- SUBPARTITION sp42 ENGINE = TokuDB)) */
+ PARTITION BY LIST (abs(`f_int1` % 3))
+SUBPARTITION BY HASH (`f_int1` + 1)
+(PARTITION `part_1` VALUES IN (0)
+ (SUBPARTITION `sp11` ENGINE = TokuDB,
+ SUBPARTITION `sp12` ENGINE = TokuDB),
+ PARTITION `part_2` VALUES IN (1)
+ (SUBPARTITION `sp21` ENGINE = TokuDB,
+ SUBPARTITION `sp22` ENGINE = TokuDB),
+ PARTITION `part_3` VALUES IN (2)
+ (SUBPARTITION `sp31` ENGINE = TokuDB,
+ SUBPARTITION `sp32` ENGINE = TokuDB),
+ PARTITION `part_4` VALUES IN (NULL)
+ (SUBPARTITION `sp41` ENGINE = TokuDB,
+ SUBPARTITION `sp42` ENGINE = TokuDB))
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -37108,18 +37108,18 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (ABS(MOD(f_int1,2)))
-SUBPARTITION BY KEY (f_int1)
+ PARTITION BY LIST (abs(`f_int1` % 2))
+SUBPARTITION BY KEY (`f_int1`)
SUBPARTITIONS 3
-(PARTITION part_1 VALUES IN (0) ENGINE = TokuDB,
- PARTITION part_2 VALUES IN (1) ENGINE = TokuDB,
- PARTITION part_3 VALUES IN (NULL) ENGINE = TokuDB) */
+(PARTITION `part_1` VALUES IN (0) ENGINE = TokuDB,
+ PARTITION `part_2` VALUES IN (1) ENGINE = TokuDB,
+ PARTITION `part_3` VALUES IN (NULL) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -37576,15 +37576,15 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY HASH (f_int1)
-(PARTITION part_1 ENGINE = TokuDB,
- PARTITION part_2 ENGINE = TokuDB) */
+ PARTITION BY HASH (`f_int1`)
+(PARTITION `part_1` ENGINE = TokuDB,
+ PARTITION `part_2` ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -38036,18 +38036,18 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY KEY (f_int1)
-(PARTITION part_1 ENGINE = TokuDB,
- PARTITION part_2 ENGINE = TokuDB,
- PARTITION part_3 ENGINE = TokuDB,
- PARTITION part_4 ENGINE = TokuDB,
- PARTITION part_5 ENGINE = TokuDB) */
+ PARTITION BY KEY (`f_int1`)
+(PARTITION `part_1` ENGINE = TokuDB,
+ PARTITION `part_2` ENGINE = TokuDB,
+ PARTITION `part_3` ENGINE = TokuDB,
+ PARTITION `part_4` ENGINE = TokuDB,
+ PARTITION `part_5` ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -38507,21 +38507,21 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (MOD(f_int1,4))
-(PARTITION part_3 VALUES IN (-3) ENGINE = TokuDB,
- PARTITION part_2 VALUES IN (-2) ENGINE = TokuDB,
- PARTITION part_1 VALUES IN (-1) ENGINE = TokuDB,
- PARTITION part_N VALUES IN (NULL) ENGINE = TokuDB,
- PARTITION part0 VALUES IN (0) ENGINE = TokuDB,
- PARTITION part1 VALUES IN (1) ENGINE = TokuDB,
- PARTITION part2 VALUES IN (2) ENGINE = TokuDB,
- PARTITION part3 VALUES IN (3) ENGINE = TokuDB) */
+ PARTITION BY LIST (`f_int1` % 4)
+(PARTITION `part_3` VALUES IN (-3) ENGINE = TokuDB,
+ PARTITION `part_2` VALUES IN (-2) ENGINE = TokuDB,
+ PARTITION `part_1` VALUES IN (-1) ENGINE = TokuDB,
+ PARTITION `part_N` VALUES IN (NULL) ENGINE = TokuDB,
+ PARTITION `part0` VALUES IN (0) ENGINE = TokuDB,
+ PARTITION `part1` VALUES IN (1) ENGINE = TokuDB,
+ PARTITION `part2` VALUES IN (2) ENGINE = TokuDB,
+ PARTITION `part3` VALUES IN (3) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -38979,19 +38979,19 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (f_int1)
-(PARTITION parta VALUES LESS THAN (0) ENGINE = TokuDB,
- PARTITION part_1 VALUES LESS THAN (5) ENGINE = TokuDB,
- PARTITION part_2 VALUES LESS THAN (10) ENGINE = TokuDB,
- PARTITION part_3 VALUES LESS THAN (15) ENGINE = TokuDB,
- PARTITION part_4 VALUES LESS THAN (20) ENGINE = TokuDB,
- PARTITION part_5 VALUES LESS THAN (2147483646) ENGINE = TokuDB) */
+ PARTITION BY RANGE (`f_int1`)
+(PARTITION `parta` VALUES LESS THAN (0) ENGINE = TokuDB,
+ PARTITION `part_1` VALUES LESS THAN (5) ENGINE = TokuDB,
+ PARTITION `part_2` VALUES LESS THAN (10) ENGINE = TokuDB,
+ PARTITION `part_3` VALUES LESS THAN (15) ENGINE = TokuDB,
+ PARTITION `part_4` VALUES LESS THAN (20) ENGINE = TokuDB,
+ PARTITION `part_5` VALUES LESS THAN (2147483646) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -39445,19 +39445,19 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (f_int1 DIV 2)
-SUBPARTITION BY HASH (f_int1)
+ PARTITION BY RANGE (`f_int1` DIV 2)
+SUBPARTITION BY HASH (`f_int1`)
SUBPARTITIONS 2
-(PARTITION part_1 VALUES LESS THAN (0) ENGINE = TokuDB,
- PARTITION part_2 VALUES LESS THAN (5) ENGINE = TokuDB,
- PARTITION part_3 VALUES LESS THAN (10) ENGINE = TokuDB,
- PARTITION part_4 VALUES LESS THAN (2147483646) ENGINE = TokuDB) */
+(PARTITION `part_1` VALUES LESS THAN (0) ENGINE = TokuDB,
+ PARTITION `part_2` VALUES LESS THAN (5) ENGINE = TokuDB,
+ PARTITION `part_3` VALUES LESS THAN (10) ENGINE = TokuDB,
+ PARTITION `part_4` VALUES LESS THAN (2147483646) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -39917,26 +39917,26 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (f_int1)
-SUBPARTITION BY KEY (f_int1)
-(PARTITION part_1 VALUES LESS THAN (0)
- (SUBPARTITION subpart11 ENGINE = TokuDB,
- SUBPARTITION subpart12 ENGINE = TokuDB),
- PARTITION part_2 VALUES LESS THAN (5)
- (SUBPARTITION subpart21 ENGINE = TokuDB,
- SUBPARTITION subpart22 ENGINE = TokuDB),
- PARTITION part_3 VALUES LESS THAN (10)
- (SUBPARTITION subpart31 ENGINE = TokuDB,
- SUBPARTITION subpart32 ENGINE = TokuDB),
- PARTITION part_4 VALUES LESS THAN (2147483646)
- (SUBPARTITION subpart41 ENGINE = TokuDB,
- SUBPARTITION subpart42 ENGINE = TokuDB)) */
+ PARTITION BY RANGE (`f_int1`)
+SUBPARTITION BY KEY (`f_int1`)
+(PARTITION `part_1` VALUES LESS THAN (0)
+ (SUBPARTITION `subpart11` ENGINE = TokuDB,
+ SUBPARTITION `subpart12` ENGINE = TokuDB),
+ PARTITION `part_2` VALUES LESS THAN (5)
+ (SUBPARTITION `subpart21` ENGINE = TokuDB,
+ SUBPARTITION `subpart22` ENGINE = TokuDB),
+ PARTITION `part_3` VALUES LESS THAN (10)
+ (SUBPARTITION `subpart31` ENGINE = TokuDB,
+ SUBPARTITION `subpart32` ENGINE = TokuDB),
+ PARTITION `part_4` VALUES LESS THAN (2147483646)
+ (SUBPARTITION `subpart41` ENGINE = TokuDB,
+ SUBPARTITION `subpart42` ENGINE = TokuDB))
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -40394,26 +40394,26 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (ABS(MOD(f_int1,3)))
-SUBPARTITION BY HASH (f_int1 + 1)
-(PARTITION part_1 VALUES IN (0)
- (SUBPARTITION sp11 ENGINE = TokuDB,
- SUBPARTITION sp12 ENGINE = TokuDB),
- PARTITION part_2 VALUES IN (1)
- (SUBPARTITION sp21 ENGINE = TokuDB,
- SUBPARTITION sp22 ENGINE = TokuDB),
- PARTITION part_3 VALUES IN (2)
- (SUBPARTITION sp31 ENGINE = TokuDB,
- SUBPARTITION sp32 ENGINE = TokuDB),
- PARTITION part_4 VALUES IN (NULL)
- (SUBPARTITION sp41 ENGINE = TokuDB,
- SUBPARTITION sp42 ENGINE = TokuDB)) */
+ PARTITION BY LIST (abs(`f_int1` % 3))
+SUBPARTITION BY HASH (`f_int1` + 1)
+(PARTITION `part_1` VALUES IN (0)
+ (SUBPARTITION `sp11` ENGINE = TokuDB,
+ SUBPARTITION `sp12` ENGINE = TokuDB),
+ PARTITION `part_2` VALUES IN (1)
+ (SUBPARTITION `sp21` ENGINE = TokuDB,
+ SUBPARTITION `sp22` ENGINE = TokuDB),
+ PARTITION `part_3` VALUES IN (2)
+ (SUBPARTITION `sp31` ENGINE = TokuDB,
+ SUBPARTITION `sp32` ENGINE = TokuDB),
+ PARTITION `part_4` VALUES IN (NULL)
+ (SUBPARTITION `sp41` ENGINE = TokuDB,
+ SUBPARTITION `sp42` ENGINE = TokuDB))
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -40869,18 +40869,18 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (ABS(MOD(f_int1,2)))
-SUBPARTITION BY KEY (f_int1)
+ PARTITION BY LIST (abs(`f_int1` % 2))
+SUBPARTITION BY KEY (`f_int1`)
SUBPARTITIONS 3
-(PARTITION part_1 VALUES IN (0) ENGINE = TokuDB,
- PARTITION part_2 VALUES IN (1) ENGINE = TokuDB,
- PARTITION part_3 VALUES IN (NULL) ENGINE = TokuDB) */
+(PARTITION `part_1` VALUES IN (0) ENGINE = TokuDB,
+ PARTITION `part_2` VALUES IN (1) ENGINE = TokuDB,
+ PARTITION `part_3` VALUES IN (NULL) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -41334,15 +41334,15 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY HASH (f_int1)
-(PARTITION part_1 ENGINE = TokuDB,
- PARTITION part_2 ENGINE = TokuDB) */
+ PARTITION BY HASH (`f_int1`)
+(PARTITION `part_1` ENGINE = TokuDB,
+ PARTITION `part_2` ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -41794,18 +41794,18 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY KEY (f_int1)
-(PARTITION part_1 ENGINE = TokuDB,
- PARTITION part_2 ENGINE = TokuDB,
- PARTITION part_3 ENGINE = TokuDB,
- PARTITION part_4 ENGINE = TokuDB,
- PARTITION part_5 ENGINE = TokuDB) */
+ PARTITION BY KEY (`f_int1`)
+(PARTITION `part_1` ENGINE = TokuDB,
+ PARTITION `part_2` ENGINE = TokuDB,
+ PARTITION `part_3` ENGINE = TokuDB,
+ PARTITION `part_4` ENGINE = TokuDB,
+ PARTITION `part_5` ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -42265,21 +42265,21 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (MOD(f_int1,4))
-(PARTITION part_3 VALUES IN (-3) ENGINE = TokuDB,
- PARTITION part_2 VALUES IN (-2) ENGINE = TokuDB,
- PARTITION part_1 VALUES IN (-1) ENGINE = TokuDB,
- PARTITION part_N VALUES IN (NULL) ENGINE = TokuDB,
- PARTITION part0 VALUES IN (0) ENGINE = TokuDB,
- PARTITION part1 VALUES IN (1) ENGINE = TokuDB,
- PARTITION part2 VALUES IN (2) ENGINE = TokuDB,
- PARTITION part3 VALUES IN (3) ENGINE = TokuDB) */
+ PARTITION BY LIST (`f_int1` % 4)
+(PARTITION `part_3` VALUES IN (-3) ENGINE = TokuDB,
+ PARTITION `part_2` VALUES IN (-2) ENGINE = TokuDB,
+ PARTITION `part_1` VALUES IN (-1) ENGINE = TokuDB,
+ PARTITION `part_N` VALUES IN (NULL) ENGINE = TokuDB,
+ PARTITION `part0` VALUES IN (0) ENGINE = TokuDB,
+ PARTITION `part1` VALUES IN (1) ENGINE = TokuDB,
+ PARTITION `part2` VALUES IN (2) ENGINE = TokuDB,
+ PARTITION `part3` VALUES IN (3) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -42737,19 +42737,19 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (f_int1)
-(PARTITION parta VALUES LESS THAN (0) ENGINE = TokuDB,
- PARTITION part_1 VALUES LESS THAN (5) ENGINE = TokuDB,
- PARTITION part_2 VALUES LESS THAN (10) ENGINE = TokuDB,
- PARTITION part_3 VALUES LESS THAN (15) ENGINE = TokuDB,
- PARTITION part_4 VALUES LESS THAN (20) ENGINE = TokuDB,
- PARTITION part_5 VALUES LESS THAN (2147483646) ENGINE = TokuDB) */
+ PARTITION BY RANGE (`f_int1`)
+(PARTITION `parta` VALUES LESS THAN (0) ENGINE = TokuDB,
+ PARTITION `part_1` VALUES LESS THAN (5) ENGINE = TokuDB,
+ PARTITION `part_2` VALUES LESS THAN (10) ENGINE = TokuDB,
+ PARTITION `part_3` VALUES LESS THAN (15) ENGINE = TokuDB,
+ PARTITION `part_4` VALUES LESS THAN (20) ENGINE = TokuDB,
+ PARTITION `part_5` VALUES LESS THAN (2147483646) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -43203,19 +43203,19 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (f_int1 DIV 2)
-SUBPARTITION BY HASH (f_int1)
+ PARTITION BY RANGE (`f_int1` DIV 2)
+SUBPARTITION BY HASH (`f_int1`)
SUBPARTITIONS 2
-(PARTITION part_1 VALUES LESS THAN (0) ENGINE = TokuDB,
- PARTITION part_2 VALUES LESS THAN (5) ENGINE = TokuDB,
- PARTITION part_3 VALUES LESS THAN (10) ENGINE = TokuDB,
- PARTITION part_4 VALUES LESS THAN (2147483646) ENGINE = TokuDB) */
+(PARTITION `part_1` VALUES LESS THAN (0) ENGINE = TokuDB,
+ PARTITION `part_2` VALUES LESS THAN (5) ENGINE = TokuDB,
+ PARTITION `part_3` VALUES LESS THAN (10) ENGINE = TokuDB,
+ PARTITION `part_4` VALUES LESS THAN (2147483646) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -43675,26 +43675,26 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (f_int1)
-SUBPARTITION BY KEY (f_int1)
-(PARTITION part_1 VALUES LESS THAN (0)
- (SUBPARTITION subpart11 ENGINE = TokuDB,
- SUBPARTITION subpart12 ENGINE = TokuDB),
- PARTITION part_2 VALUES LESS THAN (5)
- (SUBPARTITION subpart21 ENGINE = TokuDB,
- SUBPARTITION subpart22 ENGINE = TokuDB),
- PARTITION part_3 VALUES LESS THAN (10)
- (SUBPARTITION subpart31 ENGINE = TokuDB,
- SUBPARTITION subpart32 ENGINE = TokuDB),
- PARTITION part_4 VALUES LESS THAN (2147483646)
- (SUBPARTITION subpart41 ENGINE = TokuDB,
- SUBPARTITION subpart42 ENGINE = TokuDB)) */
+ PARTITION BY RANGE (`f_int1`)
+SUBPARTITION BY KEY (`f_int1`)
+(PARTITION `part_1` VALUES LESS THAN (0)
+ (SUBPARTITION `subpart11` ENGINE = TokuDB,
+ SUBPARTITION `subpart12` ENGINE = TokuDB),
+ PARTITION `part_2` VALUES LESS THAN (5)
+ (SUBPARTITION `subpart21` ENGINE = TokuDB,
+ SUBPARTITION `subpart22` ENGINE = TokuDB),
+ PARTITION `part_3` VALUES LESS THAN (10)
+ (SUBPARTITION `subpart31` ENGINE = TokuDB,
+ SUBPARTITION `subpart32` ENGINE = TokuDB),
+ PARTITION `part_4` VALUES LESS THAN (2147483646)
+ (SUBPARTITION `subpart41` ENGINE = TokuDB,
+ SUBPARTITION `subpart42` ENGINE = TokuDB))
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -44152,26 +44152,26 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (ABS(MOD(f_int1,3)))
-SUBPARTITION BY HASH (f_int1 + 1)
-(PARTITION part_1 VALUES IN (0)
- (SUBPARTITION sp11 ENGINE = TokuDB,
- SUBPARTITION sp12 ENGINE = TokuDB),
- PARTITION part_2 VALUES IN (1)
- (SUBPARTITION sp21 ENGINE = TokuDB,
- SUBPARTITION sp22 ENGINE = TokuDB),
- PARTITION part_3 VALUES IN (2)
- (SUBPARTITION sp31 ENGINE = TokuDB,
- SUBPARTITION sp32 ENGINE = TokuDB),
- PARTITION part_4 VALUES IN (NULL)
- (SUBPARTITION sp41 ENGINE = TokuDB,
- SUBPARTITION sp42 ENGINE = TokuDB)) */
+ PARTITION BY LIST (abs(`f_int1` % 3))
+SUBPARTITION BY HASH (`f_int1` + 1)
+(PARTITION `part_1` VALUES IN (0)
+ (SUBPARTITION `sp11` ENGINE = TokuDB,
+ SUBPARTITION `sp12` ENGINE = TokuDB),
+ PARTITION `part_2` VALUES IN (1)
+ (SUBPARTITION `sp21` ENGINE = TokuDB,
+ SUBPARTITION `sp22` ENGINE = TokuDB),
+ PARTITION `part_3` VALUES IN (2)
+ (SUBPARTITION `sp31` ENGINE = TokuDB,
+ SUBPARTITION `sp32` ENGINE = TokuDB),
+ PARTITION `part_4` VALUES IN (NULL)
+ (SUBPARTITION `sp41` ENGINE = TokuDB,
+ SUBPARTITION `sp42` ENGINE = TokuDB))
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -44627,18 +44627,18 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (ABS(MOD(f_int1,2)))
-SUBPARTITION BY KEY (f_int1)
+ PARTITION BY LIST (abs(`f_int1` % 2))
+SUBPARTITION BY KEY (`f_int1`)
SUBPARTITIONS 3
-(PARTITION part_1 VALUES IN (0) ENGINE = TokuDB,
- PARTITION part_2 VALUES IN (1) ENGINE = TokuDB,
- PARTITION part_3 VALUES IN (NULL) ENGINE = TokuDB) */
+(PARTITION `part_1` VALUES IN (0) ENGINE = TokuDB,
+ PARTITION `part_2` VALUES IN (1) ENGINE = TokuDB,
+ PARTITION `part_3` VALUES IN (NULL) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -45091,15 +45091,15 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY HASH (f_int1)
-(PARTITION part_1 ENGINE = TokuDB,
- PARTITION part_2 ENGINE = TokuDB) */
+ PARTITION BY HASH (`f_int1`)
+(PARTITION `part_1` ENGINE = TokuDB,
+ PARTITION `part_2` ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -45550,18 +45550,18 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY KEY (f_int1)
-(PARTITION part_1 ENGINE = TokuDB,
- PARTITION part_2 ENGINE = TokuDB,
- PARTITION part_3 ENGINE = TokuDB,
- PARTITION part_4 ENGINE = TokuDB,
- PARTITION part_5 ENGINE = TokuDB) */
+ PARTITION BY KEY (`f_int1`)
+(PARTITION `part_1` ENGINE = TokuDB,
+ PARTITION `part_2` ENGINE = TokuDB,
+ PARTITION `part_3` ENGINE = TokuDB,
+ PARTITION `part_4` ENGINE = TokuDB,
+ PARTITION `part_5` ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -46020,21 +46020,21 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (MOD(f_int1,4))
-(PARTITION part_3 VALUES IN (-3) ENGINE = TokuDB,
- PARTITION part_2 VALUES IN (-2) ENGINE = TokuDB,
- PARTITION part_1 VALUES IN (-1) ENGINE = TokuDB,
- PARTITION part_N VALUES IN (NULL) ENGINE = TokuDB,
- PARTITION part0 VALUES IN (0) ENGINE = TokuDB,
- PARTITION part1 VALUES IN (1) ENGINE = TokuDB,
- PARTITION part2 VALUES IN (2) ENGINE = TokuDB,
- PARTITION part3 VALUES IN (3) ENGINE = TokuDB) */
+ PARTITION BY LIST (`f_int1` % 4)
+(PARTITION `part_3` VALUES IN (-3) ENGINE = TokuDB,
+ PARTITION `part_2` VALUES IN (-2) ENGINE = TokuDB,
+ PARTITION `part_1` VALUES IN (-1) ENGINE = TokuDB,
+ PARTITION `part_N` VALUES IN (NULL) ENGINE = TokuDB,
+ PARTITION `part0` VALUES IN (0) ENGINE = TokuDB,
+ PARTITION `part1` VALUES IN (1) ENGINE = TokuDB,
+ PARTITION `part2` VALUES IN (2) ENGINE = TokuDB,
+ PARTITION `part3` VALUES IN (3) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -46491,19 +46491,19 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (f_int1)
-(PARTITION parta VALUES LESS THAN (0) ENGINE = TokuDB,
- PARTITION part_1 VALUES LESS THAN (5) ENGINE = TokuDB,
- PARTITION part_2 VALUES LESS THAN (10) ENGINE = TokuDB,
- PARTITION part_3 VALUES LESS THAN (15) ENGINE = TokuDB,
- PARTITION part_4 VALUES LESS THAN (20) ENGINE = TokuDB,
- PARTITION part_5 VALUES LESS THAN (2147483646) ENGINE = TokuDB) */
+ PARTITION BY RANGE (`f_int1`)
+(PARTITION `parta` VALUES LESS THAN (0) ENGINE = TokuDB,
+ PARTITION `part_1` VALUES LESS THAN (5) ENGINE = TokuDB,
+ PARTITION `part_2` VALUES LESS THAN (10) ENGINE = TokuDB,
+ PARTITION `part_3` VALUES LESS THAN (15) ENGINE = TokuDB,
+ PARTITION `part_4` VALUES LESS THAN (20) ENGINE = TokuDB,
+ PARTITION `part_5` VALUES LESS THAN (2147483646) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -46956,19 +46956,19 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (f_int1 DIV 2)
-SUBPARTITION BY HASH (f_int1)
+ PARTITION BY RANGE (`f_int1` DIV 2)
+SUBPARTITION BY HASH (`f_int1`)
SUBPARTITIONS 2
-(PARTITION part_1 VALUES LESS THAN (0) ENGINE = TokuDB,
- PARTITION part_2 VALUES LESS THAN (5) ENGINE = TokuDB,
- PARTITION part_3 VALUES LESS THAN (10) ENGINE = TokuDB,
- PARTITION part_4 VALUES LESS THAN (2147483646) ENGINE = TokuDB) */
+(PARTITION `part_1` VALUES LESS THAN (0) ENGINE = TokuDB,
+ PARTITION `part_2` VALUES LESS THAN (5) ENGINE = TokuDB,
+ PARTITION `part_3` VALUES LESS THAN (10) ENGINE = TokuDB,
+ PARTITION `part_4` VALUES LESS THAN (2147483646) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -47427,26 +47427,26 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (f_int1)
-SUBPARTITION BY KEY (f_int1)
-(PARTITION part_1 VALUES LESS THAN (0)
- (SUBPARTITION subpart11 ENGINE = TokuDB,
- SUBPARTITION subpart12 ENGINE = TokuDB),
- PARTITION part_2 VALUES LESS THAN (5)
- (SUBPARTITION subpart21 ENGINE = TokuDB,
- SUBPARTITION subpart22 ENGINE = TokuDB),
- PARTITION part_3 VALUES LESS THAN (10)
- (SUBPARTITION subpart31 ENGINE = TokuDB,
- SUBPARTITION subpart32 ENGINE = TokuDB),
- PARTITION part_4 VALUES LESS THAN (2147483646)
- (SUBPARTITION subpart41 ENGINE = TokuDB,
- SUBPARTITION subpart42 ENGINE = TokuDB)) */
+ PARTITION BY RANGE (`f_int1`)
+SUBPARTITION BY KEY (`f_int1`)
+(PARTITION `part_1` VALUES LESS THAN (0)
+ (SUBPARTITION `subpart11` ENGINE = TokuDB,
+ SUBPARTITION `subpart12` ENGINE = TokuDB),
+ PARTITION `part_2` VALUES LESS THAN (5)
+ (SUBPARTITION `subpart21` ENGINE = TokuDB,
+ SUBPARTITION `subpart22` ENGINE = TokuDB),
+ PARTITION `part_3` VALUES LESS THAN (10)
+ (SUBPARTITION `subpart31` ENGINE = TokuDB,
+ SUBPARTITION `subpart32` ENGINE = TokuDB),
+ PARTITION `part_4` VALUES LESS THAN (2147483646)
+ (SUBPARTITION `subpart41` ENGINE = TokuDB,
+ SUBPARTITION `subpart42` ENGINE = TokuDB))
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -47903,26 +47903,26 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (ABS(MOD(f_int1,3)))
-SUBPARTITION BY HASH (f_int1 + 1)
-(PARTITION part_1 VALUES IN (0)
- (SUBPARTITION sp11 ENGINE = TokuDB,
- SUBPARTITION sp12 ENGINE = TokuDB),
- PARTITION part_2 VALUES IN (1)
- (SUBPARTITION sp21 ENGINE = TokuDB,
- SUBPARTITION sp22 ENGINE = TokuDB),
- PARTITION part_3 VALUES IN (2)
- (SUBPARTITION sp31 ENGINE = TokuDB,
- SUBPARTITION sp32 ENGINE = TokuDB),
- PARTITION part_4 VALUES IN (NULL)
- (SUBPARTITION sp41 ENGINE = TokuDB,
- SUBPARTITION sp42 ENGINE = TokuDB)) */
+ PARTITION BY LIST (abs(`f_int1` % 3))
+SUBPARTITION BY HASH (`f_int1` + 1)
+(PARTITION `part_1` VALUES IN (0)
+ (SUBPARTITION `sp11` ENGINE = TokuDB,
+ SUBPARTITION `sp12` ENGINE = TokuDB),
+ PARTITION `part_2` VALUES IN (1)
+ (SUBPARTITION `sp21` ENGINE = TokuDB,
+ SUBPARTITION `sp22` ENGINE = TokuDB),
+ PARTITION `part_3` VALUES IN (2)
+ (SUBPARTITION `sp31` ENGINE = TokuDB,
+ SUBPARTITION `sp32` ENGINE = TokuDB),
+ PARTITION `part_4` VALUES IN (NULL)
+ (SUBPARTITION `sp41` ENGINE = TokuDB,
+ SUBPARTITION `sp42` ENGINE = TokuDB))
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -48377,18 +48377,18 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (ABS(MOD(f_int1,2)))
-SUBPARTITION BY KEY (f_int1)
+ PARTITION BY LIST (abs(`f_int1` % 2))
+SUBPARTITION BY KEY (`f_int1`)
SUBPARTITIONS 3
-(PARTITION part_1 VALUES IN (0) ENGINE = TokuDB,
- PARTITION part_2 VALUES IN (1) ENGINE = TokuDB,
- PARTITION part_3 VALUES IN (NULL) ENGINE = TokuDB) */
+(PARTITION `part_1` VALUES IN (0) ENGINE = TokuDB,
+ PARTITION `part_2` VALUES IN (1) ENGINE = TokuDB,
+ PARTITION `part_3` VALUES IN (NULL) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -48841,15 +48841,15 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY HASH (f_int1)
-(PARTITION part_1 ENGINE = TokuDB,
- PARTITION part_2 ENGINE = TokuDB) */
+ PARTITION BY HASH (`f_int1`)
+(PARTITION `part_1` ENGINE = TokuDB,
+ PARTITION `part_2` ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -49300,18 +49300,18 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY KEY (f_int1)
-(PARTITION part_1 ENGINE = TokuDB,
- PARTITION part_2 ENGINE = TokuDB,
- PARTITION part_3 ENGINE = TokuDB,
- PARTITION part_4 ENGINE = TokuDB,
- PARTITION part_5 ENGINE = TokuDB) */
+ PARTITION BY KEY (`f_int1`)
+(PARTITION `part_1` ENGINE = TokuDB,
+ PARTITION `part_2` ENGINE = TokuDB,
+ PARTITION `part_3` ENGINE = TokuDB,
+ PARTITION `part_4` ENGINE = TokuDB,
+ PARTITION `part_5` ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -49770,21 +49770,21 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (MOD(f_int1,4))
-(PARTITION part_3 VALUES IN (-3) ENGINE = TokuDB,
- PARTITION part_2 VALUES IN (-2) ENGINE = TokuDB,
- PARTITION part_1 VALUES IN (-1) ENGINE = TokuDB,
- PARTITION part_N VALUES IN (NULL) ENGINE = TokuDB,
- PARTITION part0 VALUES IN (0) ENGINE = TokuDB,
- PARTITION part1 VALUES IN (1) ENGINE = TokuDB,
- PARTITION part2 VALUES IN (2) ENGINE = TokuDB,
- PARTITION part3 VALUES IN (3) ENGINE = TokuDB) */
+ PARTITION BY LIST (`f_int1` % 4)
+(PARTITION `part_3` VALUES IN (-3) ENGINE = TokuDB,
+ PARTITION `part_2` VALUES IN (-2) ENGINE = TokuDB,
+ PARTITION `part_1` VALUES IN (-1) ENGINE = TokuDB,
+ PARTITION `part_N` VALUES IN (NULL) ENGINE = TokuDB,
+ PARTITION `part0` VALUES IN (0) ENGINE = TokuDB,
+ PARTITION `part1` VALUES IN (1) ENGINE = TokuDB,
+ PARTITION `part2` VALUES IN (2) ENGINE = TokuDB,
+ PARTITION `part3` VALUES IN (3) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -50241,19 +50241,19 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (f_int1)
-(PARTITION parta VALUES LESS THAN (0) ENGINE = TokuDB,
- PARTITION part_1 VALUES LESS THAN (5) ENGINE = TokuDB,
- PARTITION part_2 VALUES LESS THAN (10) ENGINE = TokuDB,
- PARTITION part_3 VALUES LESS THAN (15) ENGINE = TokuDB,
- PARTITION part_4 VALUES LESS THAN (20) ENGINE = TokuDB,
- PARTITION part_5 VALUES LESS THAN (2147483646) ENGINE = TokuDB) */
+ PARTITION BY RANGE (`f_int1`)
+(PARTITION `parta` VALUES LESS THAN (0) ENGINE = TokuDB,
+ PARTITION `part_1` VALUES LESS THAN (5) ENGINE = TokuDB,
+ PARTITION `part_2` VALUES LESS THAN (10) ENGINE = TokuDB,
+ PARTITION `part_3` VALUES LESS THAN (15) ENGINE = TokuDB,
+ PARTITION `part_4` VALUES LESS THAN (20) ENGINE = TokuDB,
+ PARTITION `part_5` VALUES LESS THAN (2147483646) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -50706,19 +50706,19 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (f_int1 DIV 2)
-SUBPARTITION BY HASH (f_int1)
+ PARTITION BY RANGE (`f_int1` DIV 2)
+SUBPARTITION BY HASH (`f_int1`)
SUBPARTITIONS 2
-(PARTITION part_1 VALUES LESS THAN (0) ENGINE = TokuDB,
- PARTITION part_2 VALUES LESS THAN (5) ENGINE = TokuDB,
- PARTITION part_3 VALUES LESS THAN (10) ENGINE = TokuDB,
- PARTITION part_4 VALUES LESS THAN (2147483646) ENGINE = TokuDB) */
+(PARTITION `part_1` VALUES LESS THAN (0) ENGINE = TokuDB,
+ PARTITION `part_2` VALUES LESS THAN (5) ENGINE = TokuDB,
+ PARTITION `part_3` VALUES LESS THAN (10) ENGINE = TokuDB,
+ PARTITION `part_4` VALUES LESS THAN (2147483646) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -51177,26 +51177,26 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (f_int1)
-SUBPARTITION BY KEY (f_int1)
-(PARTITION part_1 VALUES LESS THAN (0)
- (SUBPARTITION subpart11 ENGINE = TokuDB,
- SUBPARTITION subpart12 ENGINE = TokuDB),
- PARTITION part_2 VALUES LESS THAN (5)
- (SUBPARTITION subpart21 ENGINE = TokuDB,
- SUBPARTITION subpart22 ENGINE = TokuDB),
- PARTITION part_3 VALUES LESS THAN (10)
- (SUBPARTITION subpart31 ENGINE = TokuDB,
- SUBPARTITION subpart32 ENGINE = TokuDB),
- PARTITION part_4 VALUES LESS THAN (2147483646)
- (SUBPARTITION subpart41 ENGINE = TokuDB,
- SUBPARTITION subpart42 ENGINE = TokuDB)) */
+ PARTITION BY RANGE (`f_int1`)
+SUBPARTITION BY KEY (`f_int1`)
+(PARTITION `part_1` VALUES LESS THAN (0)
+ (SUBPARTITION `subpart11` ENGINE = TokuDB,
+ SUBPARTITION `subpart12` ENGINE = TokuDB),
+ PARTITION `part_2` VALUES LESS THAN (5)
+ (SUBPARTITION `subpart21` ENGINE = TokuDB,
+ SUBPARTITION `subpart22` ENGINE = TokuDB),
+ PARTITION `part_3` VALUES LESS THAN (10)
+ (SUBPARTITION `subpart31` ENGINE = TokuDB,
+ SUBPARTITION `subpart32` ENGINE = TokuDB),
+ PARTITION `part_4` VALUES LESS THAN (2147483646)
+ (SUBPARTITION `subpart41` ENGINE = TokuDB,
+ SUBPARTITION `subpart42` ENGINE = TokuDB))
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -51653,26 +51653,26 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (ABS(MOD(f_int1,3)))
-SUBPARTITION BY HASH (f_int1 + 1)
-(PARTITION part_1 VALUES IN (0)
- (SUBPARTITION sp11 ENGINE = TokuDB,
- SUBPARTITION sp12 ENGINE = TokuDB),
- PARTITION part_2 VALUES IN (1)
- (SUBPARTITION sp21 ENGINE = TokuDB,
- SUBPARTITION sp22 ENGINE = TokuDB),
- PARTITION part_3 VALUES IN (2)
- (SUBPARTITION sp31 ENGINE = TokuDB,
- SUBPARTITION sp32 ENGINE = TokuDB),
- PARTITION part_4 VALUES IN (NULL)
- (SUBPARTITION sp41 ENGINE = TokuDB,
- SUBPARTITION sp42 ENGINE = TokuDB)) */
+ PARTITION BY LIST (abs(`f_int1` % 3))
+SUBPARTITION BY HASH (`f_int1` + 1)
+(PARTITION `part_1` VALUES IN (0)
+ (SUBPARTITION `sp11` ENGINE = TokuDB,
+ SUBPARTITION `sp12` ENGINE = TokuDB),
+ PARTITION `part_2` VALUES IN (1)
+ (SUBPARTITION `sp21` ENGINE = TokuDB,
+ SUBPARTITION `sp22` ENGINE = TokuDB),
+ PARTITION `part_3` VALUES IN (2)
+ (SUBPARTITION `sp31` ENGINE = TokuDB,
+ SUBPARTITION `sp32` ENGINE = TokuDB),
+ PARTITION `part_4` VALUES IN (NULL)
+ (SUBPARTITION `sp41` ENGINE = TokuDB,
+ SUBPARTITION `sp42` ENGINE = TokuDB))
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -52127,18 +52127,18 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (ABS(MOD(f_int1,2)))
-SUBPARTITION BY KEY (f_int1)
+ PARTITION BY LIST (abs(`f_int1` % 2))
+SUBPARTITION BY KEY (`f_int1`)
SUBPARTITIONS 3
-(PARTITION part_1 VALUES IN (0) ENGINE = TokuDB,
- PARTITION part_2 VALUES IN (1) ENGINE = TokuDB,
- PARTITION part_3 VALUES IN (NULL) ENGINE = TokuDB) */
+(PARTITION `part_1` VALUES IN (0) ENGINE = TokuDB,
+ PARTITION `part_2` VALUES IN (1) ENGINE = TokuDB,
+ PARTITION `part_3` VALUES IN (NULL) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -52592,15 +52592,15 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY HASH (f_int1)
-(PARTITION part_1 ENGINE = TokuDB,
- PARTITION part_2 ENGINE = TokuDB) */
+ PARTITION BY HASH (`f_int1`)
+(PARTITION `part_1` ENGINE = TokuDB,
+ PARTITION `part_2` ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -53052,18 +53052,18 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY KEY (f_int1)
-(PARTITION part_1 ENGINE = TokuDB,
- PARTITION part_2 ENGINE = TokuDB,
- PARTITION part_3 ENGINE = TokuDB,
- PARTITION part_4 ENGINE = TokuDB,
- PARTITION part_5 ENGINE = TokuDB) */
+ PARTITION BY KEY (`f_int1`)
+(PARTITION `part_1` ENGINE = TokuDB,
+ PARTITION `part_2` ENGINE = TokuDB,
+ PARTITION `part_3` ENGINE = TokuDB,
+ PARTITION `part_4` ENGINE = TokuDB,
+ PARTITION `part_5` ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -53523,21 +53523,21 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (MOD(f_int1,4))
-(PARTITION part_3 VALUES IN (-3) ENGINE = TokuDB,
- PARTITION part_2 VALUES IN (-2) ENGINE = TokuDB,
- PARTITION part_1 VALUES IN (-1) ENGINE = TokuDB,
- PARTITION part_N VALUES IN (NULL) ENGINE = TokuDB,
- PARTITION part0 VALUES IN (0) ENGINE = TokuDB,
- PARTITION part1 VALUES IN (1) ENGINE = TokuDB,
- PARTITION part2 VALUES IN (2) ENGINE = TokuDB,
- PARTITION part3 VALUES IN (3) ENGINE = TokuDB) */
+ PARTITION BY LIST (`f_int1` % 4)
+(PARTITION `part_3` VALUES IN (-3) ENGINE = TokuDB,
+ PARTITION `part_2` VALUES IN (-2) ENGINE = TokuDB,
+ PARTITION `part_1` VALUES IN (-1) ENGINE = TokuDB,
+ PARTITION `part_N` VALUES IN (NULL) ENGINE = TokuDB,
+ PARTITION `part0` VALUES IN (0) ENGINE = TokuDB,
+ PARTITION `part1` VALUES IN (1) ENGINE = TokuDB,
+ PARTITION `part2` VALUES IN (2) ENGINE = TokuDB,
+ PARTITION `part3` VALUES IN (3) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -53995,19 +53995,19 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (f_int1)
-(PARTITION parta VALUES LESS THAN (0) ENGINE = TokuDB,
- PARTITION part_1 VALUES LESS THAN (5) ENGINE = TokuDB,
- PARTITION part_2 VALUES LESS THAN (10) ENGINE = TokuDB,
- PARTITION part_3 VALUES LESS THAN (15) ENGINE = TokuDB,
- PARTITION part_4 VALUES LESS THAN (20) ENGINE = TokuDB,
- PARTITION part_5 VALUES LESS THAN (2147483646) ENGINE = TokuDB) */
+ PARTITION BY RANGE (`f_int1`)
+(PARTITION `parta` VALUES LESS THAN (0) ENGINE = TokuDB,
+ PARTITION `part_1` VALUES LESS THAN (5) ENGINE = TokuDB,
+ PARTITION `part_2` VALUES LESS THAN (10) ENGINE = TokuDB,
+ PARTITION `part_3` VALUES LESS THAN (15) ENGINE = TokuDB,
+ PARTITION `part_4` VALUES LESS THAN (20) ENGINE = TokuDB,
+ PARTITION `part_5` VALUES LESS THAN (2147483646) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -54461,19 +54461,19 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (f_int1 DIV 2)
-SUBPARTITION BY HASH (f_int1)
+ PARTITION BY RANGE (`f_int1` DIV 2)
+SUBPARTITION BY HASH (`f_int1`)
SUBPARTITIONS 2
-(PARTITION part_1 VALUES LESS THAN (0) ENGINE = TokuDB,
- PARTITION part_2 VALUES LESS THAN (5) ENGINE = TokuDB,
- PARTITION part_3 VALUES LESS THAN (10) ENGINE = TokuDB,
- PARTITION part_4 VALUES LESS THAN (2147483646) ENGINE = TokuDB) */
+(PARTITION `part_1` VALUES LESS THAN (0) ENGINE = TokuDB,
+ PARTITION `part_2` VALUES LESS THAN (5) ENGINE = TokuDB,
+ PARTITION `part_3` VALUES LESS THAN (10) ENGINE = TokuDB,
+ PARTITION `part_4` VALUES LESS THAN (2147483646) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -54933,26 +54933,26 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (f_int1)
-SUBPARTITION BY KEY (f_int1)
-(PARTITION part_1 VALUES LESS THAN (0)
- (SUBPARTITION subpart11 ENGINE = TokuDB,
- SUBPARTITION subpart12 ENGINE = TokuDB),
- PARTITION part_2 VALUES LESS THAN (5)
- (SUBPARTITION subpart21 ENGINE = TokuDB,
- SUBPARTITION subpart22 ENGINE = TokuDB),
- PARTITION part_3 VALUES LESS THAN (10)
- (SUBPARTITION subpart31 ENGINE = TokuDB,
- SUBPARTITION subpart32 ENGINE = TokuDB),
- PARTITION part_4 VALUES LESS THAN (2147483646)
- (SUBPARTITION subpart41 ENGINE = TokuDB,
- SUBPARTITION subpart42 ENGINE = TokuDB)) */
+ PARTITION BY RANGE (`f_int1`)
+SUBPARTITION BY KEY (`f_int1`)
+(PARTITION `part_1` VALUES LESS THAN (0)
+ (SUBPARTITION `subpart11` ENGINE = TokuDB,
+ SUBPARTITION `subpart12` ENGINE = TokuDB),
+ PARTITION `part_2` VALUES LESS THAN (5)
+ (SUBPARTITION `subpart21` ENGINE = TokuDB,
+ SUBPARTITION `subpart22` ENGINE = TokuDB),
+ PARTITION `part_3` VALUES LESS THAN (10)
+ (SUBPARTITION `subpart31` ENGINE = TokuDB,
+ SUBPARTITION `subpart32` ENGINE = TokuDB),
+ PARTITION `part_4` VALUES LESS THAN (2147483646)
+ (SUBPARTITION `subpart41` ENGINE = TokuDB,
+ SUBPARTITION `subpart42` ENGINE = TokuDB))
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -55410,26 +55410,26 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (ABS(MOD(f_int1,3)))
-SUBPARTITION BY HASH (f_int1 + 1)
-(PARTITION part_1 VALUES IN (0)
- (SUBPARTITION sp11 ENGINE = TokuDB,
- SUBPARTITION sp12 ENGINE = TokuDB),
- PARTITION part_2 VALUES IN (1)
- (SUBPARTITION sp21 ENGINE = TokuDB,
- SUBPARTITION sp22 ENGINE = TokuDB),
- PARTITION part_3 VALUES IN (2)
- (SUBPARTITION sp31 ENGINE = TokuDB,
- SUBPARTITION sp32 ENGINE = TokuDB),
- PARTITION part_4 VALUES IN (NULL)
- (SUBPARTITION sp41 ENGINE = TokuDB,
- SUBPARTITION sp42 ENGINE = TokuDB)) */
+ PARTITION BY LIST (abs(`f_int1` % 3))
+SUBPARTITION BY HASH (`f_int1` + 1)
+(PARTITION `part_1` VALUES IN (0)
+ (SUBPARTITION `sp11` ENGINE = TokuDB,
+ SUBPARTITION `sp12` ENGINE = TokuDB),
+ PARTITION `part_2` VALUES IN (1)
+ (SUBPARTITION `sp21` ENGINE = TokuDB,
+ SUBPARTITION `sp22` ENGINE = TokuDB),
+ PARTITION `part_3` VALUES IN (2)
+ (SUBPARTITION `sp31` ENGINE = TokuDB,
+ SUBPARTITION `sp32` ENGINE = TokuDB),
+ PARTITION `part_4` VALUES IN (NULL)
+ (SUBPARTITION `sp41` ENGINE = TokuDB,
+ SUBPARTITION `sp42` ENGINE = TokuDB))
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -55885,18 +55885,18 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (ABS(MOD(f_int1,2)))
-SUBPARTITION BY KEY (f_int1)
+ PARTITION BY LIST (abs(`f_int1` % 2))
+SUBPARTITION BY KEY (`f_int1`)
SUBPARTITIONS 3
-(PARTITION part_1 VALUES IN (0) ENGINE = TokuDB,
- PARTITION part_2 VALUES IN (1) ENGINE = TokuDB,
- PARTITION part_3 VALUES IN (NULL) ENGINE = TokuDB) */
+(PARTITION `part_1` VALUES IN (0) ENGINE = TokuDB,
+ PARTITION `part_2` VALUES IN (1) ENGINE = TokuDB,
+ PARTITION `part_3` VALUES IN (NULL) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -56350,15 +56350,15 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY HASH (f_int1)
-(PARTITION part_1 ENGINE = TokuDB,
- PARTITION part_2 ENGINE = TokuDB) */
+ PARTITION BY HASH (`f_int1`)
+(PARTITION `part_1` ENGINE = TokuDB,
+ PARTITION `part_2` ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -56807,18 +56807,18 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY KEY (f_int1)
-(PARTITION part_1 ENGINE = TokuDB,
- PARTITION part_2 ENGINE = TokuDB,
- PARTITION part_3 ENGINE = TokuDB,
- PARTITION part_4 ENGINE = TokuDB,
- PARTITION part_5 ENGINE = TokuDB) */
+ PARTITION BY KEY (`f_int1`)
+(PARTITION `part_1` ENGINE = TokuDB,
+ PARTITION `part_2` ENGINE = TokuDB,
+ PARTITION `part_3` ENGINE = TokuDB,
+ PARTITION `part_4` ENGINE = TokuDB,
+ PARTITION `part_5` ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -57275,21 +57275,21 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (MOD(f_int1,4))
-(PARTITION part_3 VALUES IN (-3) ENGINE = TokuDB,
- PARTITION part_2 VALUES IN (-2) ENGINE = TokuDB,
- PARTITION part_1 VALUES IN (-1) ENGINE = TokuDB,
- PARTITION part_N VALUES IN (NULL) ENGINE = TokuDB,
- PARTITION part0 VALUES IN (0) ENGINE = TokuDB,
- PARTITION part1 VALUES IN (1) ENGINE = TokuDB,
- PARTITION part2 VALUES IN (2) ENGINE = TokuDB,
- PARTITION part3 VALUES IN (3) ENGINE = TokuDB) */
+ PARTITION BY LIST (`f_int1` % 4)
+(PARTITION `part_3` VALUES IN (-3) ENGINE = TokuDB,
+ PARTITION `part_2` VALUES IN (-2) ENGINE = TokuDB,
+ PARTITION `part_1` VALUES IN (-1) ENGINE = TokuDB,
+ PARTITION `part_N` VALUES IN (NULL) ENGINE = TokuDB,
+ PARTITION `part0` VALUES IN (0) ENGINE = TokuDB,
+ PARTITION `part1` VALUES IN (1) ENGINE = TokuDB,
+ PARTITION `part2` VALUES IN (2) ENGINE = TokuDB,
+ PARTITION `part3` VALUES IN (3) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -57744,19 +57744,19 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (f_int1)
-(PARTITION parta VALUES LESS THAN (0) ENGINE = TokuDB,
- PARTITION part_1 VALUES LESS THAN (5) ENGINE = TokuDB,
- PARTITION part_2 VALUES LESS THAN (10) ENGINE = TokuDB,
- PARTITION part_3 VALUES LESS THAN (15) ENGINE = TokuDB,
- PARTITION part_4 VALUES LESS THAN (20) ENGINE = TokuDB,
- PARTITION part_5 VALUES LESS THAN (2147483646) ENGINE = TokuDB) */
+ PARTITION BY RANGE (`f_int1`)
+(PARTITION `parta` VALUES LESS THAN (0) ENGINE = TokuDB,
+ PARTITION `part_1` VALUES LESS THAN (5) ENGINE = TokuDB,
+ PARTITION `part_2` VALUES LESS THAN (10) ENGINE = TokuDB,
+ PARTITION `part_3` VALUES LESS THAN (15) ENGINE = TokuDB,
+ PARTITION `part_4` VALUES LESS THAN (20) ENGINE = TokuDB,
+ PARTITION `part_5` VALUES LESS THAN (2147483646) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -58207,19 +58207,19 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (f_int1 DIV 2)
-SUBPARTITION BY HASH (f_int1)
+ PARTITION BY RANGE (`f_int1` DIV 2)
+SUBPARTITION BY HASH (`f_int1`)
SUBPARTITIONS 2
-(PARTITION part_1 VALUES LESS THAN (0) ENGINE = TokuDB,
- PARTITION part_2 VALUES LESS THAN (5) ENGINE = TokuDB,
- PARTITION part_3 VALUES LESS THAN (10) ENGINE = TokuDB,
- PARTITION part_4 VALUES LESS THAN (2147483646) ENGINE = TokuDB) */
+(PARTITION `part_1` VALUES LESS THAN (0) ENGINE = TokuDB,
+ PARTITION `part_2` VALUES LESS THAN (5) ENGINE = TokuDB,
+ PARTITION `part_3` VALUES LESS THAN (10) ENGINE = TokuDB,
+ PARTITION `part_4` VALUES LESS THAN (2147483646) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -58676,26 +58676,26 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (f_int1)
-SUBPARTITION BY KEY (f_int1)
-(PARTITION part_1 VALUES LESS THAN (0)
- (SUBPARTITION subpart11 ENGINE = TokuDB,
- SUBPARTITION subpart12 ENGINE = TokuDB),
- PARTITION part_2 VALUES LESS THAN (5)
- (SUBPARTITION subpart21 ENGINE = TokuDB,
- SUBPARTITION subpart22 ENGINE = TokuDB),
- PARTITION part_3 VALUES LESS THAN (10)
- (SUBPARTITION subpart31 ENGINE = TokuDB,
- SUBPARTITION subpart32 ENGINE = TokuDB),
- PARTITION part_4 VALUES LESS THAN (2147483646)
- (SUBPARTITION subpart41 ENGINE = TokuDB,
- SUBPARTITION subpart42 ENGINE = TokuDB)) */
+ PARTITION BY RANGE (`f_int1`)
+SUBPARTITION BY KEY (`f_int1`)
+(PARTITION `part_1` VALUES LESS THAN (0)
+ (SUBPARTITION `subpart11` ENGINE = TokuDB,
+ SUBPARTITION `subpart12` ENGINE = TokuDB),
+ PARTITION `part_2` VALUES LESS THAN (5)
+ (SUBPARTITION `subpart21` ENGINE = TokuDB,
+ SUBPARTITION `subpart22` ENGINE = TokuDB),
+ PARTITION `part_3` VALUES LESS THAN (10)
+ (SUBPARTITION `subpart31` ENGINE = TokuDB,
+ SUBPARTITION `subpart32` ENGINE = TokuDB),
+ PARTITION `part_4` VALUES LESS THAN (2147483646)
+ (SUBPARTITION `subpart41` ENGINE = TokuDB,
+ SUBPARTITION `subpart42` ENGINE = TokuDB))
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -59150,26 +59150,26 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (ABS(MOD(f_int1,3)))
-SUBPARTITION BY HASH (f_int1 + 1)
-(PARTITION part_1 VALUES IN (0)
- (SUBPARTITION sp11 ENGINE = TokuDB,
- SUBPARTITION sp12 ENGINE = TokuDB),
- PARTITION part_2 VALUES IN (1)
- (SUBPARTITION sp21 ENGINE = TokuDB,
- SUBPARTITION sp22 ENGINE = TokuDB),
- PARTITION part_3 VALUES IN (2)
- (SUBPARTITION sp31 ENGINE = TokuDB,
- SUBPARTITION sp32 ENGINE = TokuDB),
- PARTITION part_4 VALUES IN (NULL)
- (SUBPARTITION sp41 ENGINE = TokuDB,
- SUBPARTITION sp42 ENGINE = TokuDB)) */
+ PARTITION BY LIST (abs(`f_int1` % 3))
+SUBPARTITION BY HASH (`f_int1` + 1)
+(PARTITION `part_1` VALUES IN (0)
+ (SUBPARTITION `sp11` ENGINE = TokuDB,
+ SUBPARTITION `sp12` ENGINE = TokuDB),
+ PARTITION `part_2` VALUES IN (1)
+ (SUBPARTITION `sp21` ENGINE = TokuDB,
+ SUBPARTITION `sp22` ENGINE = TokuDB),
+ PARTITION `part_3` VALUES IN (2)
+ (SUBPARTITION `sp31` ENGINE = TokuDB,
+ SUBPARTITION `sp32` ENGINE = TokuDB),
+ PARTITION `part_4` VALUES IN (NULL)
+ (SUBPARTITION `sp41` ENGINE = TokuDB,
+ SUBPARTITION `sp42` ENGINE = TokuDB))
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -59622,18 +59622,18 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (ABS(MOD(f_int1,2)))
-SUBPARTITION BY KEY (f_int1)
+ PARTITION BY LIST (abs(`f_int1` % 2))
+SUBPARTITION BY KEY (`f_int1`)
SUBPARTITIONS 3
-(PARTITION part_1 VALUES IN (0) ENGINE = TokuDB,
- PARTITION part_2 VALUES IN (1) ENGINE = TokuDB,
- PARTITION part_3 VALUES IN (NULL) ENGINE = TokuDB) */
+(PARTITION `part_1` VALUES IN (0) ENGINE = TokuDB,
+ PARTITION `part_2` VALUES IN (1) ENGINE = TokuDB,
+ PARTITION `part_3` VALUES IN (NULL) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -60084,15 +60084,15 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY HASH (f_int1)
-(PARTITION part_1 ENGINE = TokuDB,
- PARTITION part_2 ENGINE = TokuDB) */
+ PARTITION BY HASH (`f_int1`)
+(PARTITION `part_1` ENGINE = TokuDB,
+ PARTITION `part_2` ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -60541,18 +60541,18 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY KEY (f_int1)
-(PARTITION part_1 ENGINE = TokuDB,
- PARTITION part_2 ENGINE = TokuDB,
- PARTITION part_3 ENGINE = TokuDB,
- PARTITION part_4 ENGINE = TokuDB,
- PARTITION part_5 ENGINE = TokuDB) */
+ PARTITION BY KEY (`f_int1`)
+(PARTITION `part_1` ENGINE = TokuDB,
+ PARTITION `part_2` ENGINE = TokuDB,
+ PARTITION `part_3` ENGINE = TokuDB,
+ PARTITION `part_4` ENGINE = TokuDB,
+ PARTITION `part_5` ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -61009,21 +61009,21 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (MOD(f_int1,4))
-(PARTITION part_3 VALUES IN (-3) ENGINE = TokuDB,
- PARTITION part_2 VALUES IN (-2) ENGINE = TokuDB,
- PARTITION part_1 VALUES IN (-1) ENGINE = TokuDB,
- PARTITION part_N VALUES IN (NULL) ENGINE = TokuDB,
- PARTITION part0 VALUES IN (0) ENGINE = TokuDB,
- PARTITION part1 VALUES IN (1) ENGINE = TokuDB,
- PARTITION part2 VALUES IN (2) ENGINE = TokuDB,
- PARTITION part3 VALUES IN (3) ENGINE = TokuDB) */
+ PARTITION BY LIST (`f_int1` % 4)
+(PARTITION `part_3` VALUES IN (-3) ENGINE = TokuDB,
+ PARTITION `part_2` VALUES IN (-2) ENGINE = TokuDB,
+ PARTITION `part_1` VALUES IN (-1) ENGINE = TokuDB,
+ PARTITION `part_N` VALUES IN (NULL) ENGINE = TokuDB,
+ PARTITION `part0` VALUES IN (0) ENGINE = TokuDB,
+ PARTITION `part1` VALUES IN (1) ENGINE = TokuDB,
+ PARTITION `part2` VALUES IN (2) ENGINE = TokuDB,
+ PARTITION `part3` VALUES IN (3) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -61478,19 +61478,19 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (f_int1)
-(PARTITION parta VALUES LESS THAN (0) ENGINE = TokuDB,
- PARTITION part_1 VALUES LESS THAN (5) ENGINE = TokuDB,
- PARTITION part_2 VALUES LESS THAN (10) ENGINE = TokuDB,
- PARTITION part_3 VALUES LESS THAN (15) ENGINE = TokuDB,
- PARTITION part_4 VALUES LESS THAN (20) ENGINE = TokuDB,
- PARTITION part_5 VALUES LESS THAN (2147483646) ENGINE = TokuDB) */
+ PARTITION BY RANGE (`f_int1`)
+(PARTITION `parta` VALUES LESS THAN (0) ENGINE = TokuDB,
+ PARTITION `part_1` VALUES LESS THAN (5) ENGINE = TokuDB,
+ PARTITION `part_2` VALUES LESS THAN (10) ENGINE = TokuDB,
+ PARTITION `part_3` VALUES LESS THAN (15) ENGINE = TokuDB,
+ PARTITION `part_4` VALUES LESS THAN (20) ENGINE = TokuDB,
+ PARTITION `part_5` VALUES LESS THAN (2147483646) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -61941,19 +61941,19 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (f_int1 DIV 2)
-SUBPARTITION BY HASH (f_int1)
+ PARTITION BY RANGE (`f_int1` DIV 2)
+SUBPARTITION BY HASH (`f_int1`)
SUBPARTITIONS 2
-(PARTITION part_1 VALUES LESS THAN (0) ENGINE = TokuDB,
- PARTITION part_2 VALUES LESS THAN (5) ENGINE = TokuDB,
- PARTITION part_3 VALUES LESS THAN (10) ENGINE = TokuDB,
- PARTITION part_4 VALUES LESS THAN (2147483646) ENGINE = TokuDB) */
+(PARTITION `part_1` VALUES LESS THAN (0) ENGINE = TokuDB,
+ PARTITION `part_2` VALUES LESS THAN (5) ENGINE = TokuDB,
+ PARTITION `part_3` VALUES LESS THAN (10) ENGINE = TokuDB,
+ PARTITION `part_4` VALUES LESS THAN (2147483646) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -62410,26 +62410,26 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (f_int1)
-SUBPARTITION BY KEY (f_int1)
-(PARTITION part_1 VALUES LESS THAN (0)
- (SUBPARTITION subpart11 ENGINE = TokuDB,
- SUBPARTITION subpart12 ENGINE = TokuDB),
- PARTITION part_2 VALUES LESS THAN (5)
- (SUBPARTITION subpart21 ENGINE = TokuDB,
- SUBPARTITION subpart22 ENGINE = TokuDB),
- PARTITION part_3 VALUES LESS THAN (10)
- (SUBPARTITION subpart31 ENGINE = TokuDB,
- SUBPARTITION subpart32 ENGINE = TokuDB),
- PARTITION part_4 VALUES LESS THAN (2147483646)
- (SUBPARTITION subpart41 ENGINE = TokuDB,
- SUBPARTITION subpart42 ENGINE = TokuDB)) */
+ PARTITION BY RANGE (`f_int1`)
+SUBPARTITION BY KEY (`f_int1`)
+(PARTITION `part_1` VALUES LESS THAN (0)
+ (SUBPARTITION `subpart11` ENGINE = TokuDB,
+ SUBPARTITION `subpart12` ENGINE = TokuDB),
+ PARTITION `part_2` VALUES LESS THAN (5)
+ (SUBPARTITION `subpart21` ENGINE = TokuDB,
+ SUBPARTITION `subpart22` ENGINE = TokuDB),
+ PARTITION `part_3` VALUES LESS THAN (10)
+ (SUBPARTITION `subpart31` ENGINE = TokuDB,
+ SUBPARTITION `subpart32` ENGINE = TokuDB),
+ PARTITION `part_4` VALUES LESS THAN (2147483646)
+ (SUBPARTITION `subpart41` ENGINE = TokuDB,
+ SUBPARTITION `subpart42` ENGINE = TokuDB))
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -62884,26 +62884,26 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (ABS(MOD(f_int1,3)))
-SUBPARTITION BY HASH (f_int1 + 1)
-(PARTITION part_1 VALUES IN (0)
- (SUBPARTITION sp11 ENGINE = TokuDB,
- SUBPARTITION sp12 ENGINE = TokuDB),
- PARTITION part_2 VALUES IN (1)
- (SUBPARTITION sp21 ENGINE = TokuDB,
- SUBPARTITION sp22 ENGINE = TokuDB),
- PARTITION part_3 VALUES IN (2)
- (SUBPARTITION sp31 ENGINE = TokuDB,
- SUBPARTITION sp32 ENGINE = TokuDB),
- PARTITION part_4 VALUES IN (NULL)
- (SUBPARTITION sp41 ENGINE = TokuDB,
- SUBPARTITION sp42 ENGINE = TokuDB)) */
+ PARTITION BY LIST (abs(`f_int1` % 3))
+SUBPARTITION BY HASH (`f_int1` + 1)
+(PARTITION `part_1` VALUES IN (0)
+ (SUBPARTITION `sp11` ENGINE = TokuDB,
+ SUBPARTITION `sp12` ENGINE = TokuDB),
+ PARTITION `part_2` VALUES IN (1)
+ (SUBPARTITION `sp21` ENGINE = TokuDB,
+ SUBPARTITION `sp22` ENGINE = TokuDB),
+ PARTITION `part_3` VALUES IN (2)
+ (SUBPARTITION `sp31` ENGINE = TokuDB,
+ SUBPARTITION `sp32` ENGINE = TokuDB),
+ PARTITION `part_4` VALUES IN (NULL)
+ (SUBPARTITION `sp41` ENGINE = TokuDB,
+ SUBPARTITION `sp42` ENGINE = TokuDB))
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -63356,18 +63356,18 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (ABS(MOD(f_int1,2)))
-SUBPARTITION BY KEY (f_int1)
+ PARTITION BY LIST (abs(`f_int1` % 2))
+SUBPARTITION BY KEY (`f_int1`)
SUBPARTITIONS 3
-(PARTITION part_1 VALUES IN (0) ENGINE = TokuDB,
- PARTITION part_2 VALUES IN (1) ENGINE = TokuDB,
- PARTITION part_3 VALUES IN (NULL) ENGINE = TokuDB) */
+(PARTITION `part_1` VALUES IN (0) ENGINE = TokuDB,
+ PARTITION `part_2` VALUES IN (1) ENGINE = TokuDB,
+ PARTITION `part_3` VALUES IN (NULL) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -64138,15 +64138,15 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY HASH (f_int1)
-(PARTITION part_1 ENGINE = TokuDB,
- PARTITION part_2 ENGINE = TokuDB) */
+ PARTITION BY HASH (`f_int1`)
+(PARTITION `part_1` ENGINE = TokuDB,
+ PARTITION `part_2` ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -64595,18 +64595,18 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY KEY (f_int1)
-(PARTITION part_1 ENGINE = TokuDB,
- PARTITION part_2 ENGINE = TokuDB,
- PARTITION part_3 ENGINE = TokuDB,
- PARTITION part_4 ENGINE = TokuDB,
- PARTITION part_5 ENGINE = TokuDB) */
+ PARTITION BY KEY (`f_int1`)
+(PARTITION `part_1` ENGINE = TokuDB,
+ PARTITION `part_2` ENGINE = TokuDB,
+ PARTITION `part_3` ENGINE = TokuDB,
+ PARTITION `part_4` ENGINE = TokuDB,
+ PARTITION `part_5` ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -65063,21 +65063,21 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (MOD(f_int1,4))
-(PARTITION part_3 VALUES IN (-3) ENGINE = TokuDB,
- PARTITION part_2 VALUES IN (-2) ENGINE = TokuDB,
- PARTITION part_1 VALUES IN (-1) ENGINE = TokuDB,
- PARTITION part_N VALUES IN (NULL) ENGINE = TokuDB,
- PARTITION part0 VALUES IN (0) ENGINE = TokuDB,
- PARTITION part1 VALUES IN (1) ENGINE = TokuDB,
- PARTITION part2 VALUES IN (2) ENGINE = TokuDB,
- PARTITION part3 VALUES IN (3) ENGINE = TokuDB) */
+ PARTITION BY LIST (`f_int1` % 4)
+(PARTITION `part_3` VALUES IN (-3) ENGINE = TokuDB,
+ PARTITION `part_2` VALUES IN (-2) ENGINE = TokuDB,
+ PARTITION `part_1` VALUES IN (-1) ENGINE = TokuDB,
+ PARTITION `part_N` VALUES IN (NULL) ENGINE = TokuDB,
+ PARTITION `part0` VALUES IN (0) ENGINE = TokuDB,
+ PARTITION `part1` VALUES IN (1) ENGINE = TokuDB,
+ PARTITION `part2` VALUES IN (2) ENGINE = TokuDB,
+ PARTITION `part3` VALUES IN (3) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -65532,19 +65532,19 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (f_int1)
-(PARTITION parta VALUES LESS THAN (0) ENGINE = TokuDB,
- PARTITION part_1 VALUES LESS THAN (5) ENGINE = TokuDB,
- PARTITION part_2 VALUES LESS THAN (10) ENGINE = TokuDB,
- PARTITION part_3 VALUES LESS THAN (15) ENGINE = TokuDB,
- PARTITION part_4 VALUES LESS THAN (20) ENGINE = TokuDB,
- PARTITION part_5 VALUES LESS THAN (2147483646) ENGINE = TokuDB) */
+ PARTITION BY RANGE (`f_int1`)
+(PARTITION `parta` VALUES LESS THAN (0) ENGINE = TokuDB,
+ PARTITION `part_1` VALUES LESS THAN (5) ENGINE = TokuDB,
+ PARTITION `part_2` VALUES LESS THAN (10) ENGINE = TokuDB,
+ PARTITION `part_3` VALUES LESS THAN (15) ENGINE = TokuDB,
+ PARTITION `part_4` VALUES LESS THAN (20) ENGINE = TokuDB,
+ PARTITION `part_5` VALUES LESS THAN (2147483646) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -65995,19 +65995,19 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (f_int1 DIV 2)
-SUBPARTITION BY HASH (f_int1)
+ PARTITION BY RANGE (`f_int1` DIV 2)
+SUBPARTITION BY HASH (`f_int1`)
SUBPARTITIONS 2
-(PARTITION part_1 VALUES LESS THAN (0) ENGINE = TokuDB,
- PARTITION part_2 VALUES LESS THAN (5) ENGINE = TokuDB,
- PARTITION part_3 VALUES LESS THAN (10) ENGINE = TokuDB,
- PARTITION part_4 VALUES LESS THAN (2147483646) ENGINE = TokuDB) */
+(PARTITION `part_1` VALUES LESS THAN (0) ENGINE = TokuDB,
+ PARTITION `part_2` VALUES LESS THAN (5) ENGINE = TokuDB,
+ PARTITION `part_3` VALUES LESS THAN (10) ENGINE = TokuDB,
+ PARTITION `part_4` VALUES LESS THAN (2147483646) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -66464,26 +66464,26 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (f_int1)
-SUBPARTITION BY KEY (f_int1)
-(PARTITION part_1 VALUES LESS THAN (0)
- (SUBPARTITION subpart11 ENGINE = TokuDB,
- SUBPARTITION subpart12 ENGINE = TokuDB),
- PARTITION part_2 VALUES LESS THAN (5)
- (SUBPARTITION subpart21 ENGINE = TokuDB,
- SUBPARTITION subpart22 ENGINE = TokuDB),
- PARTITION part_3 VALUES LESS THAN (10)
- (SUBPARTITION subpart31 ENGINE = TokuDB,
- SUBPARTITION subpart32 ENGINE = TokuDB),
- PARTITION part_4 VALUES LESS THAN (2147483646)
- (SUBPARTITION subpart41 ENGINE = TokuDB,
- SUBPARTITION subpart42 ENGINE = TokuDB)) */
+ PARTITION BY RANGE (`f_int1`)
+SUBPARTITION BY KEY (`f_int1`)
+(PARTITION `part_1` VALUES LESS THAN (0)
+ (SUBPARTITION `subpart11` ENGINE = TokuDB,
+ SUBPARTITION `subpart12` ENGINE = TokuDB),
+ PARTITION `part_2` VALUES LESS THAN (5)
+ (SUBPARTITION `subpart21` ENGINE = TokuDB,
+ SUBPARTITION `subpart22` ENGINE = TokuDB),
+ PARTITION `part_3` VALUES LESS THAN (10)
+ (SUBPARTITION `subpart31` ENGINE = TokuDB,
+ SUBPARTITION `subpart32` ENGINE = TokuDB),
+ PARTITION `part_4` VALUES LESS THAN (2147483646)
+ (SUBPARTITION `subpart41` ENGINE = TokuDB,
+ SUBPARTITION `subpart42` ENGINE = TokuDB))
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -66938,26 +66938,26 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (ABS(MOD(f_int1,3)))
-SUBPARTITION BY HASH (f_int1 + 1)
-(PARTITION part_1 VALUES IN (0)
- (SUBPARTITION sp11 ENGINE = TokuDB,
- SUBPARTITION sp12 ENGINE = TokuDB),
- PARTITION part_2 VALUES IN (1)
- (SUBPARTITION sp21 ENGINE = TokuDB,
- SUBPARTITION sp22 ENGINE = TokuDB),
- PARTITION part_3 VALUES IN (2)
- (SUBPARTITION sp31 ENGINE = TokuDB,
- SUBPARTITION sp32 ENGINE = TokuDB),
- PARTITION part_4 VALUES IN (NULL)
- (SUBPARTITION sp41 ENGINE = TokuDB,
- SUBPARTITION sp42 ENGINE = TokuDB)) */
+ PARTITION BY LIST (abs(`f_int1` % 3))
+SUBPARTITION BY HASH (`f_int1` + 1)
+(PARTITION `part_1` VALUES IN (0)
+ (SUBPARTITION `sp11` ENGINE = TokuDB,
+ SUBPARTITION `sp12` ENGINE = TokuDB),
+ PARTITION `part_2` VALUES IN (1)
+ (SUBPARTITION `sp21` ENGINE = TokuDB,
+ SUBPARTITION `sp22` ENGINE = TokuDB),
+ PARTITION `part_3` VALUES IN (2)
+ (SUBPARTITION `sp31` ENGINE = TokuDB,
+ SUBPARTITION `sp32` ENGINE = TokuDB),
+ PARTITION `part_4` VALUES IN (NULL)
+ (SUBPARTITION `sp41` ENGINE = TokuDB,
+ SUBPARTITION `sp42` ENGINE = TokuDB))
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -67410,18 +67410,18 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (ABS(MOD(f_int1,2)))
-SUBPARTITION BY KEY (f_int1)
+ PARTITION BY LIST (abs(`f_int1` % 2))
+SUBPARTITION BY KEY (`f_int1`)
SUBPARTITIONS 3
-(PARTITION part_1 VALUES IN (0) ENGINE = TokuDB,
- PARTITION part_2 VALUES IN (1) ENGINE = TokuDB,
- PARTITION part_3 VALUES IN (NULL) ENGINE = TokuDB) */
+(PARTITION `part_1` VALUES IN (0) ENGINE = TokuDB,
+ PARTITION `part_2` VALUES IN (1) ENGINE = TokuDB,
+ PARTITION `part_3` VALUES IN (NULL) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -67877,15 +67877,15 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY HASH (f_int1)
-(PARTITION part_1 ENGINE = TokuDB,
- PARTITION part_2 ENGINE = TokuDB) */
+ PARTITION BY HASH (`f_int1`)
+(PARTITION `part_1` ENGINE = TokuDB,
+ PARTITION `part_2` ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -68336,18 +68336,18 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY KEY (f_int1)
-(PARTITION part_1 ENGINE = TokuDB,
- PARTITION part_2 ENGINE = TokuDB,
- PARTITION part_3 ENGINE = TokuDB,
- PARTITION part_4 ENGINE = TokuDB,
- PARTITION part_5 ENGINE = TokuDB) */
+ PARTITION BY KEY (`f_int1`)
+(PARTITION `part_1` ENGINE = TokuDB,
+ PARTITION `part_2` ENGINE = TokuDB,
+ PARTITION `part_3` ENGINE = TokuDB,
+ PARTITION `part_4` ENGINE = TokuDB,
+ PARTITION `part_5` ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -68806,21 +68806,21 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (MOD(f_int1,4))
-(PARTITION part_3 VALUES IN (-3) ENGINE = TokuDB,
- PARTITION part_2 VALUES IN (-2) ENGINE = TokuDB,
- PARTITION part_1 VALUES IN (-1) ENGINE = TokuDB,
- PARTITION part_N VALUES IN (NULL) ENGINE = TokuDB,
- PARTITION part0 VALUES IN (0) ENGINE = TokuDB,
- PARTITION part1 VALUES IN (1) ENGINE = TokuDB,
- PARTITION part2 VALUES IN (2) ENGINE = TokuDB,
- PARTITION part3 VALUES IN (3) ENGINE = TokuDB) */
+ PARTITION BY LIST (`f_int1` % 4)
+(PARTITION `part_3` VALUES IN (-3) ENGINE = TokuDB,
+ PARTITION `part_2` VALUES IN (-2) ENGINE = TokuDB,
+ PARTITION `part_1` VALUES IN (-1) ENGINE = TokuDB,
+ PARTITION `part_N` VALUES IN (NULL) ENGINE = TokuDB,
+ PARTITION `part0` VALUES IN (0) ENGINE = TokuDB,
+ PARTITION `part1` VALUES IN (1) ENGINE = TokuDB,
+ PARTITION `part2` VALUES IN (2) ENGINE = TokuDB,
+ PARTITION `part3` VALUES IN (3) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -69277,19 +69277,19 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (f_int1)
-(PARTITION parta VALUES LESS THAN (0) ENGINE = TokuDB,
- PARTITION part_1 VALUES LESS THAN (5) ENGINE = TokuDB,
- PARTITION part_2 VALUES LESS THAN (10) ENGINE = TokuDB,
- PARTITION part_3 VALUES LESS THAN (15) ENGINE = TokuDB,
- PARTITION part_4 VALUES LESS THAN (20) ENGINE = TokuDB,
- PARTITION part_5 VALUES LESS THAN (2147483646) ENGINE = TokuDB) */
+ PARTITION BY RANGE (`f_int1`)
+(PARTITION `parta` VALUES LESS THAN (0) ENGINE = TokuDB,
+ PARTITION `part_1` VALUES LESS THAN (5) ENGINE = TokuDB,
+ PARTITION `part_2` VALUES LESS THAN (10) ENGINE = TokuDB,
+ PARTITION `part_3` VALUES LESS THAN (15) ENGINE = TokuDB,
+ PARTITION `part_4` VALUES LESS THAN (20) ENGINE = TokuDB,
+ PARTITION `part_5` VALUES LESS THAN (2147483646) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -69742,19 +69742,19 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (f_int1 DIV 2)
-SUBPARTITION BY HASH (f_int1)
+ PARTITION BY RANGE (`f_int1` DIV 2)
+SUBPARTITION BY HASH (`f_int1`)
SUBPARTITIONS 2
-(PARTITION part_1 VALUES LESS THAN (0) ENGINE = TokuDB,
- PARTITION part_2 VALUES LESS THAN (5) ENGINE = TokuDB,
- PARTITION part_3 VALUES LESS THAN (10) ENGINE = TokuDB,
- PARTITION part_4 VALUES LESS THAN (2147483646) ENGINE = TokuDB) */
+(PARTITION `part_1` VALUES LESS THAN (0) ENGINE = TokuDB,
+ PARTITION `part_2` VALUES LESS THAN (5) ENGINE = TokuDB,
+ PARTITION `part_3` VALUES LESS THAN (10) ENGINE = TokuDB,
+ PARTITION `part_4` VALUES LESS THAN (2147483646) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -70213,26 +70213,26 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (f_int1)
-SUBPARTITION BY KEY (f_int1)
-(PARTITION part_1 VALUES LESS THAN (0)
- (SUBPARTITION subpart11 ENGINE = TokuDB,
- SUBPARTITION subpart12 ENGINE = TokuDB),
- PARTITION part_2 VALUES LESS THAN (5)
- (SUBPARTITION subpart21 ENGINE = TokuDB,
- SUBPARTITION subpart22 ENGINE = TokuDB),
- PARTITION part_3 VALUES LESS THAN (10)
- (SUBPARTITION subpart31 ENGINE = TokuDB,
- SUBPARTITION subpart32 ENGINE = TokuDB),
- PARTITION part_4 VALUES LESS THAN (2147483646)
- (SUBPARTITION subpart41 ENGINE = TokuDB,
- SUBPARTITION subpart42 ENGINE = TokuDB)) */
+ PARTITION BY RANGE (`f_int1`)
+SUBPARTITION BY KEY (`f_int1`)
+(PARTITION `part_1` VALUES LESS THAN (0)
+ (SUBPARTITION `subpart11` ENGINE = TokuDB,
+ SUBPARTITION `subpart12` ENGINE = TokuDB),
+ PARTITION `part_2` VALUES LESS THAN (5)
+ (SUBPARTITION `subpart21` ENGINE = TokuDB,
+ SUBPARTITION `subpart22` ENGINE = TokuDB),
+ PARTITION `part_3` VALUES LESS THAN (10)
+ (SUBPARTITION `subpart31` ENGINE = TokuDB,
+ SUBPARTITION `subpart32` ENGINE = TokuDB),
+ PARTITION `part_4` VALUES LESS THAN (2147483646)
+ (SUBPARTITION `subpart41` ENGINE = TokuDB,
+ SUBPARTITION `subpart42` ENGINE = TokuDB))
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -70689,26 +70689,26 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (ABS(MOD(f_int1,3)))
-SUBPARTITION BY HASH (f_int1 + 1)
-(PARTITION part_1 VALUES IN (0)
- (SUBPARTITION sp11 ENGINE = TokuDB,
- SUBPARTITION sp12 ENGINE = TokuDB),
- PARTITION part_2 VALUES IN (1)
- (SUBPARTITION sp21 ENGINE = TokuDB,
- SUBPARTITION sp22 ENGINE = TokuDB),
- PARTITION part_3 VALUES IN (2)
- (SUBPARTITION sp31 ENGINE = TokuDB,
- SUBPARTITION sp32 ENGINE = TokuDB),
- PARTITION part_4 VALUES IN (NULL)
- (SUBPARTITION sp41 ENGINE = TokuDB,
- SUBPARTITION sp42 ENGINE = TokuDB)) */
+ PARTITION BY LIST (abs(`f_int1` % 3))
+SUBPARTITION BY HASH (`f_int1` + 1)
+(PARTITION `part_1` VALUES IN (0)
+ (SUBPARTITION `sp11` ENGINE = TokuDB,
+ SUBPARTITION `sp12` ENGINE = TokuDB),
+ PARTITION `part_2` VALUES IN (1)
+ (SUBPARTITION `sp21` ENGINE = TokuDB,
+ SUBPARTITION `sp22` ENGINE = TokuDB),
+ PARTITION `part_3` VALUES IN (2)
+ (SUBPARTITION `sp31` ENGINE = TokuDB,
+ SUBPARTITION `sp32` ENGINE = TokuDB),
+ PARTITION `part_4` VALUES IN (NULL)
+ (SUBPARTITION `sp41` ENGINE = TokuDB,
+ SUBPARTITION `sp42` ENGINE = TokuDB))
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -71163,18 +71163,18 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (ABS(MOD(f_int1,2)))
-SUBPARTITION BY KEY (f_int1)
+ PARTITION BY LIST (abs(`f_int1` % 2))
+SUBPARTITION BY KEY (`f_int1`)
SUBPARTITIONS 3
-(PARTITION part_1 VALUES IN (0) ENGINE = TokuDB,
- PARTITION part_2 VALUES IN (1) ENGINE = TokuDB,
- PARTITION part_3 VALUES IN (NULL) ENGINE = TokuDB) */
+(PARTITION `part_1` VALUES IN (0) ENGINE = TokuDB,
+ PARTITION `part_2` VALUES IN (1) ENGINE = TokuDB,
+ PARTITION `part_3` VALUES IN (NULL) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -71627,15 +71627,15 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY HASH (f_int1)
-(PARTITION part_1 ENGINE = TokuDB,
- PARTITION part_2 ENGINE = TokuDB) */
+ PARTITION BY HASH (`f_int1`)
+(PARTITION `part_1` ENGINE = TokuDB,
+ PARTITION `part_2` ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -72086,18 +72086,18 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY KEY (f_int1)
-(PARTITION part_1 ENGINE = TokuDB,
- PARTITION part_2 ENGINE = TokuDB,
- PARTITION part_3 ENGINE = TokuDB,
- PARTITION part_4 ENGINE = TokuDB,
- PARTITION part_5 ENGINE = TokuDB) */
+ PARTITION BY KEY (`f_int1`)
+(PARTITION `part_1` ENGINE = TokuDB,
+ PARTITION `part_2` ENGINE = TokuDB,
+ PARTITION `part_3` ENGINE = TokuDB,
+ PARTITION `part_4` ENGINE = TokuDB,
+ PARTITION `part_5` ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -72556,21 +72556,21 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (MOD(f_int1,4))
-(PARTITION part_3 VALUES IN (-3) ENGINE = TokuDB,
- PARTITION part_2 VALUES IN (-2) ENGINE = TokuDB,
- PARTITION part_1 VALUES IN (-1) ENGINE = TokuDB,
- PARTITION part_N VALUES IN (NULL) ENGINE = TokuDB,
- PARTITION part0 VALUES IN (0) ENGINE = TokuDB,
- PARTITION part1 VALUES IN (1) ENGINE = TokuDB,
- PARTITION part2 VALUES IN (2) ENGINE = TokuDB,
- PARTITION part3 VALUES IN (3) ENGINE = TokuDB) */
+ PARTITION BY LIST (`f_int1` % 4)
+(PARTITION `part_3` VALUES IN (-3) ENGINE = TokuDB,
+ PARTITION `part_2` VALUES IN (-2) ENGINE = TokuDB,
+ PARTITION `part_1` VALUES IN (-1) ENGINE = TokuDB,
+ PARTITION `part_N` VALUES IN (NULL) ENGINE = TokuDB,
+ PARTITION `part0` VALUES IN (0) ENGINE = TokuDB,
+ PARTITION `part1` VALUES IN (1) ENGINE = TokuDB,
+ PARTITION `part2` VALUES IN (2) ENGINE = TokuDB,
+ PARTITION `part3` VALUES IN (3) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -73027,19 +73027,19 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (f_int1)
-(PARTITION parta VALUES LESS THAN (0) ENGINE = TokuDB,
- PARTITION part_1 VALUES LESS THAN (5) ENGINE = TokuDB,
- PARTITION part_2 VALUES LESS THAN (10) ENGINE = TokuDB,
- PARTITION part_3 VALUES LESS THAN (15) ENGINE = TokuDB,
- PARTITION part_4 VALUES LESS THAN (20) ENGINE = TokuDB,
- PARTITION part_5 VALUES LESS THAN (2147483646) ENGINE = TokuDB) */
+ PARTITION BY RANGE (`f_int1`)
+(PARTITION `parta` VALUES LESS THAN (0) ENGINE = TokuDB,
+ PARTITION `part_1` VALUES LESS THAN (5) ENGINE = TokuDB,
+ PARTITION `part_2` VALUES LESS THAN (10) ENGINE = TokuDB,
+ PARTITION `part_3` VALUES LESS THAN (15) ENGINE = TokuDB,
+ PARTITION `part_4` VALUES LESS THAN (20) ENGINE = TokuDB,
+ PARTITION `part_5` VALUES LESS THAN (2147483646) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -73492,19 +73492,19 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (f_int1 DIV 2)
-SUBPARTITION BY HASH (f_int1)
+ PARTITION BY RANGE (`f_int1` DIV 2)
+SUBPARTITION BY HASH (`f_int1`)
SUBPARTITIONS 2
-(PARTITION part_1 VALUES LESS THAN (0) ENGINE = TokuDB,
- PARTITION part_2 VALUES LESS THAN (5) ENGINE = TokuDB,
- PARTITION part_3 VALUES LESS THAN (10) ENGINE = TokuDB,
- PARTITION part_4 VALUES LESS THAN (2147483646) ENGINE = TokuDB) */
+(PARTITION `part_1` VALUES LESS THAN (0) ENGINE = TokuDB,
+ PARTITION `part_2` VALUES LESS THAN (5) ENGINE = TokuDB,
+ PARTITION `part_3` VALUES LESS THAN (10) ENGINE = TokuDB,
+ PARTITION `part_4` VALUES LESS THAN (2147483646) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -73963,26 +73963,26 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (f_int1)
-SUBPARTITION BY KEY (f_int1)
-(PARTITION part_1 VALUES LESS THAN (0)
- (SUBPARTITION subpart11 ENGINE = TokuDB,
- SUBPARTITION subpart12 ENGINE = TokuDB),
- PARTITION part_2 VALUES LESS THAN (5)
- (SUBPARTITION subpart21 ENGINE = TokuDB,
- SUBPARTITION subpart22 ENGINE = TokuDB),
- PARTITION part_3 VALUES LESS THAN (10)
- (SUBPARTITION subpart31 ENGINE = TokuDB,
- SUBPARTITION subpart32 ENGINE = TokuDB),
- PARTITION part_4 VALUES LESS THAN (2147483646)
- (SUBPARTITION subpart41 ENGINE = TokuDB,
- SUBPARTITION subpart42 ENGINE = TokuDB)) */
+ PARTITION BY RANGE (`f_int1`)
+SUBPARTITION BY KEY (`f_int1`)
+(PARTITION `part_1` VALUES LESS THAN (0)
+ (SUBPARTITION `subpart11` ENGINE = TokuDB,
+ SUBPARTITION `subpart12` ENGINE = TokuDB),
+ PARTITION `part_2` VALUES LESS THAN (5)
+ (SUBPARTITION `subpart21` ENGINE = TokuDB,
+ SUBPARTITION `subpart22` ENGINE = TokuDB),
+ PARTITION `part_3` VALUES LESS THAN (10)
+ (SUBPARTITION `subpart31` ENGINE = TokuDB,
+ SUBPARTITION `subpart32` ENGINE = TokuDB),
+ PARTITION `part_4` VALUES LESS THAN (2147483646)
+ (SUBPARTITION `subpart41` ENGINE = TokuDB,
+ SUBPARTITION `subpart42` ENGINE = TokuDB))
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -74439,26 +74439,26 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (ABS(MOD(f_int1,3)))
-SUBPARTITION BY HASH (f_int1 + 1)
-(PARTITION part_1 VALUES IN (0)
- (SUBPARTITION sp11 ENGINE = TokuDB,
- SUBPARTITION sp12 ENGINE = TokuDB),
- PARTITION part_2 VALUES IN (1)
- (SUBPARTITION sp21 ENGINE = TokuDB,
- SUBPARTITION sp22 ENGINE = TokuDB),
- PARTITION part_3 VALUES IN (2)
- (SUBPARTITION sp31 ENGINE = TokuDB,
- SUBPARTITION sp32 ENGINE = TokuDB),
- PARTITION part_4 VALUES IN (NULL)
- (SUBPARTITION sp41 ENGINE = TokuDB,
- SUBPARTITION sp42 ENGINE = TokuDB)) */
+ PARTITION BY LIST (abs(`f_int1` % 3))
+SUBPARTITION BY HASH (`f_int1` + 1)
+(PARTITION `part_1` VALUES IN (0)
+ (SUBPARTITION `sp11` ENGINE = TokuDB,
+ SUBPARTITION `sp12` ENGINE = TokuDB),
+ PARTITION `part_2` VALUES IN (1)
+ (SUBPARTITION `sp21` ENGINE = TokuDB,
+ SUBPARTITION `sp22` ENGINE = TokuDB),
+ PARTITION `part_3` VALUES IN (2)
+ (SUBPARTITION `sp31` ENGINE = TokuDB,
+ SUBPARTITION `sp32` ENGINE = TokuDB),
+ PARTITION `part_4` VALUES IN (NULL)
+ (SUBPARTITION `sp41` ENGINE = TokuDB,
+ SUBPARTITION `sp42` ENGINE = TokuDB))
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -74913,18 +74913,18 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (ABS(MOD(f_int1,2)))
-SUBPARTITION BY KEY (f_int1)
+ PARTITION BY LIST (abs(`f_int1` % 2))
+SUBPARTITION BY KEY (`f_int1`)
SUBPARTITIONS 3
-(PARTITION part_1 VALUES IN (0) ENGINE = TokuDB,
- PARTITION part_2 VALUES IN (1) ENGINE = TokuDB,
- PARTITION part_3 VALUES IN (NULL) ENGINE = TokuDB) */
+(PARTITION `part_1` VALUES IN (0) ENGINE = TokuDB,
+ PARTITION `part_2` VALUES IN (1) ENGINE = TokuDB,
+ PARTITION `part_3` VALUES IN (NULL) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -75377,15 +75377,15 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY HASH (f_int1)
-(PARTITION part_1 ENGINE = TokuDB,
- PARTITION part_2 ENGINE = TokuDB) */
+ PARTITION BY HASH (`f_int1`)
+(PARTITION `part_1` ENGINE = TokuDB,
+ PARTITION `part_2` ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -75836,18 +75836,18 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY KEY (f_int1)
-(PARTITION part_1 ENGINE = TokuDB,
- PARTITION part_2 ENGINE = TokuDB,
- PARTITION part_3 ENGINE = TokuDB,
- PARTITION part_4 ENGINE = TokuDB,
- PARTITION part_5 ENGINE = TokuDB) */
+ PARTITION BY KEY (`f_int1`)
+(PARTITION `part_1` ENGINE = TokuDB,
+ PARTITION `part_2` ENGINE = TokuDB,
+ PARTITION `part_3` ENGINE = TokuDB,
+ PARTITION `part_4` ENGINE = TokuDB,
+ PARTITION `part_5` ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -76306,21 +76306,21 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (MOD(f_int1,4))
-(PARTITION part_3 VALUES IN (-3) ENGINE = TokuDB,
- PARTITION part_2 VALUES IN (-2) ENGINE = TokuDB,
- PARTITION part_1 VALUES IN (-1) ENGINE = TokuDB,
- PARTITION part_N VALUES IN (NULL) ENGINE = TokuDB,
- PARTITION part0 VALUES IN (0) ENGINE = TokuDB,
- PARTITION part1 VALUES IN (1) ENGINE = TokuDB,
- PARTITION part2 VALUES IN (2) ENGINE = TokuDB,
- PARTITION part3 VALUES IN (3) ENGINE = TokuDB) */
+ PARTITION BY LIST (`f_int1` % 4)
+(PARTITION `part_3` VALUES IN (-3) ENGINE = TokuDB,
+ PARTITION `part_2` VALUES IN (-2) ENGINE = TokuDB,
+ PARTITION `part_1` VALUES IN (-1) ENGINE = TokuDB,
+ PARTITION `part_N` VALUES IN (NULL) ENGINE = TokuDB,
+ PARTITION `part0` VALUES IN (0) ENGINE = TokuDB,
+ PARTITION `part1` VALUES IN (1) ENGINE = TokuDB,
+ PARTITION `part2` VALUES IN (2) ENGINE = TokuDB,
+ PARTITION `part3` VALUES IN (3) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -76777,19 +76777,19 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (f_int1)
-(PARTITION parta VALUES LESS THAN (0) ENGINE = TokuDB,
- PARTITION part_1 VALUES LESS THAN (5) ENGINE = TokuDB,
- PARTITION part_2 VALUES LESS THAN (10) ENGINE = TokuDB,
- PARTITION part_3 VALUES LESS THAN (15) ENGINE = TokuDB,
- PARTITION part_4 VALUES LESS THAN (20) ENGINE = TokuDB,
- PARTITION part_5 VALUES LESS THAN (2147483646) ENGINE = TokuDB) */
+ PARTITION BY RANGE (`f_int1`)
+(PARTITION `parta` VALUES LESS THAN (0) ENGINE = TokuDB,
+ PARTITION `part_1` VALUES LESS THAN (5) ENGINE = TokuDB,
+ PARTITION `part_2` VALUES LESS THAN (10) ENGINE = TokuDB,
+ PARTITION `part_3` VALUES LESS THAN (15) ENGINE = TokuDB,
+ PARTITION `part_4` VALUES LESS THAN (20) ENGINE = TokuDB,
+ PARTITION `part_5` VALUES LESS THAN (2147483646) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -77242,19 +77242,19 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (f_int1 DIV 2)
-SUBPARTITION BY HASH (f_int1)
+ PARTITION BY RANGE (`f_int1` DIV 2)
+SUBPARTITION BY HASH (`f_int1`)
SUBPARTITIONS 2
-(PARTITION part_1 VALUES LESS THAN (0) ENGINE = TokuDB,
- PARTITION part_2 VALUES LESS THAN (5) ENGINE = TokuDB,
- PARTITION part_3 VALUES LESS THAN (10) ENGINE = TokuDB,
- PARTITION part_4 VALUES LESS THAN (2147483646) ENGINE = TokuDB) */
+(PARTITION `part_1` VALUES LESS THAN (0) ENGINE = TokuDB,
+ PARTITION `part_2` VALUES LESS THAN (5) ENGINE = TokuDB,
+ PARTITION `part_3` VALUES LESS THAN (10) ENGINE = TokuDB,
+ PARTITION `part_4` VALUES LESS THAN (2147483646) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -77713,26 +77713,26 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (f_int1)
-SUBPARTITION BY KEY (f_int1)
-(PARTITION part_1 VALUES LESS THAN (0)
- (SUBPARTITION subpart11 ENGINE = TokuDB,
- SUBPARTITION subpart12 ENGINE = TokuDB),
- PARTITION part_2 VALUES LESS THAN (5)
- (SUBPARTITION subpart21 ENGINE = TokuDB,
- SUBPARTITION subpart22 ENGINE = TokuDB),
- PARTITION part_3 VALUES LESS THAN (10)
- (SUBPARTITION subpart31 ENGINE = TokuDB,
- SUBPARTITION subpart32 ENGINE = TokuDB),
- PARTITION part_4 VALUES LESS THAN (2147483646)
- (SUBPARTITION subpart41 ENGINE = TokuDB,
- SUBPARTITION subpart42 ENGINE = TokuDB)) */
+ PARTITION BY RANGE (`f_int1`)
+SUBPARTITION BY KEY (`f_int1`)
+(PARTITION `part_1` VALUES LESS THAN (0)
+ (SUBPARTITION `subpart11` ENGINE = TokuDB,
+ SUBPARTITION `subpart12` ENGINE = TokuDB),
+ PARTITION `part_2` VALUES LESS THAN (5)
+ (SUBPARTITION `subpart21` ENGINE = TokuDB,
+ SUBPARTITION `subpart22` ENGINE = TokuDB),
+ PARTITION `part_3` VALUES LESS THAN (10)
+ (SUBPARTITION `subpart31` ENGINE = TokuDB,
+ SUBPARTITION `subpart32` ENGINE = TokuDB),
+ PARTITION `part_4` VALUES LESS THAN (2147483646)
+ (SUBPARTITION `subpart41` ENGINE = TokuDB,
+ SUBPARTITION `subpart42` ENGINE = TokuDB))
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -78189,26 +78189,26 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (ABS(MOD(f_int1,3)))
-SUBPARTITION BY HASH (f_int1 + 1)
-(PARTITION part_1 VALUES IN (0)
- (SUBPARTITION sp11 ENGINE = TokuDB,
- SUBPARTITION sp12 ENGINE = TokuDB),
- PARTITION part_2 VALUES IN (1)
- (SUBPARTITION sp21 ENGINE = TokuDB,
- SUBPARTITION sp22 ENGINE = TokuDB),
- PARTITION part_3 VALUES IN (2)
- (SUBPARTITION sp31 ENGINE = TokuDB,
- SUBPARTITION sp32 ENGINE = TokuDB),
- PARTITION part_4 VALUES IN (NULL)
- (SUBPARTITION sp41 ENGINE = TokuDB,
- SUBPARTITION sp42 ENGINE = TokuDB)) */
+ PARTITION BY LIST (abs(`f_int1` % 3))
+SUBPARTITION BY HASH (`f_int1` + 1)
+(PARTITION `part_1` VALUES IN (0)
+ (SUBPARTITION `sp11` ENGINE = TokuDB,
+ SUBPARTITION `sp12` ENGINE = TokuDB),
+ PARTITION `part_2` VALUES IN (1)
+ (SUBPARTITION `sp21` ENGINE = TokuDB,
+ SUBPARTITION `sp22` ENGINE = TokuDB),
+ PARTITION `part_3` VALUES IN (2)
+ (SUBPARTITION `sp31` ENGINE = TokuDB,
+ SUBPARTITION `sp32` ENGINE = TokuDB),
+ PARTITION `part_4` VALUES IN (NULL)
+ (SUBPARTITION `sp41` ENGINE = TokuDB,
+ SUBPARTITION `sp42` ENGINE = TokuDB))
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -78663,18 +78663,18 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (ABS(MOD(f_int1,2)))
-SUBPARTITION BY KEY (f_int1)
+ PARTITION BY LIST (abs(`f_int1` % 2))
+SUBPARTITION BY KEY (`f_int1`)
SUBPARTITIONS 3
-(PARTITION part_1 VALUES IN (0) ENGINE = TokuDB,
- PARTITION part_2 VALUES IN (1) ENGINE = TokuDB,
- PARTITION part_3 VALUES IN (NULL) ENGINE = TokuDB) */
+(PARTITION `part_1` VALUES IN (0) ENGINE = TokuDB,
+ PARTITION `part_2` VALUES IN (1) ENGINE = TokuDB,
+ PARTITION `part_3` VALUES IN (NULL) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -79127,15 +79127,15 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY HASH (f_int1)
-(PARTITION part_1 ENGINE = TokuDB,
- PARTITION part_2 ENGINE = TokuDB) */
+ PARTITION BY HASH (`f_int1`)
+(PARTITION `part_1` ENGINE = TokuDB,
+ PARTITION `part_2` ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -79586,18 +79586,18 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY KEY (f_int1)
-(PARTITION part_1 ENGINE = TokuDB,
- PARTITION part_2 ENGINE = TokuDB,
- PARTITION part_3 ENGINE = TokuDB,
- PARTITION part_4 ENGINE = TokuDB,
- PARTITION part_5 ENGINE = TokuDB) */
+ PARTITION BY KEY (`f_int1`)
+(PARTITION `part_1` ENGINE = TokuDB,
+ PARTITION `part_2` ENGINE = TokuDB,
+ PARTITION `part_3` ENGINE = TokuDB,
+ PARTITION `part_4` ENGINE = TokuDB,
+ PARTITION `part_5` ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -80056,21 +80056,21 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (MOD(f_int1,4))
-(PARTITION part_3 VALUES IN (-3) ENGINE = TokuDB,
- PARTITION part_2 VALUES IN (-2) ENGINE = TokuDB,
- PARTITION part_1 VALUES IN (-1) ENGINE = TokuDB,
- PARTITION part_N VALUES IN (NULL) ENGINE = TokuDB,
- PARTITION part0 VALUES IN (0) ENGINE = TokuDB,
- PARTITION part1 VALUES IN (1) ENGINE = TokuDB,
- PARTITION part2 VALUES IN (2) ENGINE = TokuDB,
- PARTITION part3 VALUES IN (3) ENGINE = TokuDB) */
+ PARTITION BY LIST (`f_int1` % 4)
+(PARTITION `part_3` VALUES IN (-3) ENGINE = TokuDB,
+ PARTITION `part_2` VALUES IN (-2) ENGINE = TokuDB,
+ PARTITION `part_1` VALUES IN (-1) ENGINE = TokuDB,
+ PARTITION `part_N` VALUES IN (NULL) ENGINE = TokuDB,
+ PARTITION `part0` VALUES IN (0) ENGINE = TokuDB,
+ PARTITION `part1` VALUES IN (1) ENGINE = TokuDB,
+ PARTITION `part2` VALUES IN (2) ENGINE = TokuDB,
+ PARTITION `part3` VALUES IN (3) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -80527,19 +80527,19 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (f_int1)
-(PARTITION parta VALUES LESS THAN (0) ENGINE = TokuDB,
- PARTITION part_1 VALUES LESS THAN (5) ENGINE = TokuDB,
- PARTITION part_2 VALUES LESS THAN (10) ENGINE = TokuDB,
- PARTITION part_3 VALUES LESS THAN (15) ENGINE = TokuDB,
- PARTITION part_4 VALUES LESS THAN (20) ENGINE = TokuDB,
- PARTITION part_5 VALUES LESS THAN (2147483646) ENGINE = TokuDB) */
+ PARTITION BY RANGE (`f_int1`)
+(PARTITION `parta` VALUES LESS THAN (0) ENGINE = TokuDB,
+ PARTITION `part_1` VALUES LESS THAN (5) ENGINE = TokuDB,
+ PARTITION `part_2` VALUES LESS THAN (10) ENGINE = TokuDB,
+ PARTITION `part_3` VALUES LESS THAN (15) ENGINE = TokuDB,
+ PARTITION `part_4` VALUES LESS THAN (20) ENGINE = TokuDB,
+ PARTITION `part_5` VALUES LESS THAN (2147483646) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -80992,19 +80992,19 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (f_int1 DIV 2)
-SUBPARTITION BY HASH (f_int1)
+ PARTITION BY RANGE (`f_int1` DIV 2)
+SUBPARTITION BY HASH (`f_int1`)
SUBPARTITIONS 2
-(PARTITION part_1 VALUES LESS THAN (0) ENGINE = TokuDB,
- PARTITION part_2 VALUES LESS THAN (5) ENGINE = TokuDB,
- PARTITION part_3 VALUES LESS THAN (10) ENGINE = TokuDB,
- PARTITION part_4 VALUES LESS THAN (2147483646) ENGINE = TokuDB) */
+(PARTITION `part_1` VALUES LESS THAN (0) ENGINE = TokuDB,
+ PARTITION `part_2` VALUES LESS THAN (5) ENGINE = TokuDB,
+ PARTITION `part_3` VALUES LESS THAN (10) ENGINE = TokuDB,
+ PARTITION `part_4` VALUES LESS THAN (2147483646) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -81463,26 +81463,26 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (f_int1)
-SUBPARTITION BY KEY (f_int1)
-(PARTITION part_1 VALUES LESS THAN (0)
- (SUBPARTITION subpart11 ENGINE = TokuDB,
- SUBPARTITION subpart12 ENGINE = TokuDB),
- PARTITION part_2 VALUES LESS THAN (5)
- (SUBPARTITION subpart21 ENGINE = TokuDB,
- SUBPARTITION subpart22 ENGINE = TokuDB),
- PARTITION part_3 VALUES LESS THAN (10)
- (SUBPARTITION subpart31 ENGINE = TokuDB,
- SUBPARTITION subpart32 ENGINE = TokuDB),
- PARTITION part_4 VALUES LESS THAN (2147483646)
- (SUBPARTITION subpart41 ENGINE = TokuDB,
- SUBPARTITION subpart42 ENGINE = TokuDB)) */
+ PARTITION BY RANGE (`f_int1`)
+SUBPARTITION BY KEY (`f_int1`)
+(PARTITION `part_1` VALUES LESS THAN (0)
+ (SUBPARTITION `subpart11` ENGINE = TokuDB,
+ SUBPARTITION `subpart12` ENGINE = TokuDB),
+ PARTITION `part_2` VALUES LESS THAN (5)
+ (SUBPARTITION `subpart21` ENGINE = TokuDB,
+ SUBPARTITION `subpart22` ENGINE = TokuDB),
+ PARTITION `part_3` VALUES LESS THAN (10)
+ (SUBPARTITION `subpart31` ENGINE = TokuDB,
+ SUBPARTITION `subpart32` ENGINE = TokuDB),
+ PARTITION `part_4` VALUES LESS THAN (2147483646)
+ (SUBPARTITION `subpart41` ENGINE = TokuDB,
+ SUBPARTITION `subpart42` ENGINE = TokuDB))
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -81939,26 +81939,26 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (ABS(MOD(f_int1,3)))
-SUBPARTITION BY HASH (f_int1 + 1)
-(PARTITION part_1 VALUES IN (0)
- (SUBPARTITION sp11 ENGINE = TokuDB,
- SUBPARTITION sp12 ENGINE = TokuDB),
- PARTITION part_2 VALUES IN (1)
- (SUBPARTITION sp21 ENGINE = TokuDB,
- SUBPARTITION sp22 ENGINE = TokuDB),
- PARTITION part_3 VALUES IN (2)
- (SUBPARTITION sp31 ENGINE = TokuDB,
- SUBPARTITION sp32 ENGINE = TokuDB),
- PARTITION part_4 VALUES IN (NULL)
- (SUBPARTITION sp41 ENGINE = TokuDB,
- SUBPARTITION sp42 ENGINE = TokuDB)) */
+ PARTITION BY LIST (abs(`f_int1` % 3))
+SUBPARTITION BY HASH (`f_int1` + 1)
+(PARTITION `part_1` VALUES IN (0)
+ (SUBPARTITION `sp11` ENGINE = TokuDB,
+ SUBPARTITION `sp12` ENGINE = TokuDB),
+ PARTITION `part_2` VALUES IN (1)
+ (SUBPARTITION `sp21` ENGINE = TokuDB,
+ SUBPARTITION `sp22` ENGINE = TokuDB),
+ PARTITION `part_3` VALUES IN (2)
+ (SUBPARTITION `sp31` ENGINE = TokuDB,
+ SUBPARTITION `sp32` ENGINE = TokuDB),
+ PARTITION `part_4` VALUES IN (NULL)
+ (SUBPARTITION `sp41` ENGINE = TokuDB,
+ SUBPARTITION `sp42` ENGINE = TokuDB))
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -82413,18 +82413,18 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (ABS(MOD(f_int1,2)))
-SUBPARTITION BY KEY (f_int1)
+ PARTITION BY LIST (abs(`f_int1` % 2))
+SUBPARTITION BY KEY (`f_int1`)
SUBPARTITIONS 3
-(PARTITION part_1 VALUES IN (0) ENGINE = TokuDB,
- PARTITION part_2 VALUES IN (1) ENGINE = TokuDB,
- PARTITION part_3 VALUES IN (NULL) ENGINE = TokuDB) */
+(PARTITION `part_1` VALUES IN (0) ENGINE = TokuDB,
+ PARTITION `part_2` VALUES IN (1) ENGINE = TokuDB,
+ PARTITION `part_3` VALUES IN (NULL) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -82877,15 +82877,15 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY HASH (f_int1)
-(PARTITION part_1 ENGINE = TokuDB,
- PARTITION part_2 ENGINE = TokuDB) */
+ PARTITION BY HASH (`f_int1`)
+(PARTITION `part_1` ENGINE = TokuDB,
+ PARTITION `part_2` ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -83336,18 +83336,18 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY KEY (f_int1)
-(PARTITION part_1 ENGINE = TokuDB,
- PARTITION part_2 ENGINE = TokuDB,
- PARTITION part_3 ENGINE = TokuDB,
- PARTITION part_4 ENGINE = TokuDB,
- PARTITION part_5 ENGINE = TokuDB) */
+ PARTITION BY KEY (`f_int1`)
+(PARTITION `part_1` ENGINE = TokuDB,
+ PARTITION `part_2` ENGINE = TokuDB,
+ PARTITION `part_3` ENGINE = TokuDB,
+ PARTITION `part_4` ENGINE = TokuDB,
+ PARTITION `part_5` ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -83806,21 +83806,21 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (MOD(f_int1,4))
-(PARTITION part_3 VALUES IN (-3) ENGINE = TokuDB,
- PARTITION part_2 VALUES IN (-2) ENGINE = TokuDB,
- PARTITION part_1 VALUES IN (-1) ENGINE = TokuDB,
- PARTITION part_N VALUES IN (NULL) ENGINE = TokuDB,
- PARTITION part0 VALUES IN (0) ENGINE = TokuDB,
- PARTITION part1 VALUES IN (1) ENGINE = TokuDB,
- PARTITION part2 VALUES IN (2) ENGINE = TokuDB,
- PARTITION part3 VALUES IN (3) ENGINE = TokuDB) */
+ PARTITION BY LIST (`f_int1` % 4)
+(PARTITION `part_3` VALUES IN (-3) ENGINE = TokuDB,
+ PARTITION `part_2` VALUES IN (-2) ENGINE = TokuDB,
+ PARTITION `part_1` VALUES IN (-1) ENGINE = TokuDB,
+ PARTITION `part_N` VALUES IN (NULL) ENGINE = TokuDB,
+ PARTITION `part0` VALUES IN (0) ENGINE = TokuDB,
+ PARTITION `part1` VALUES IN (1) ENGINE = TokuDB,
+ PARTITION `part2` VALUES IN (2) ENGINE = TokuDB,
+ PARTITION `part3` VALUES IN (3) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -84277,19 +84277,19 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (f_int1)
-(PARTITION parta VALUES LESS THAN (0) ENGINE = TokuDB,
- PARTITION part_1 VALUES LESS THAN (5) ENGINE = TokuDB,
- PARTITION part_2 VALUES LESS THAN (10) ENGINE = TokuDB,
- PARTITION part_3 VALUES LESS THAN (15) ENGINE = TokuDB,
- PARTITION part_4 VALUES LESS THAN (20) ENGINE = TokuDB,
- PARTITION part_5 VALUES LESS THAN (2147483646) ENGINE = TokuDB) */
+ PARTITION BY RANGE (`f_int1`)
+(PARTITION `parta` VALUES LESS THAN (0) ENGINE = TokuDB,
+ PARTITION `part_1` VALUES LESS THAN (5) ENGINE = TokuDB,
+ PARTITION `part_2` VALUES LESS THAN (10) ENGINE = TokuDB,
+ PARTITION `part_3` VALUES LESS THAN (15) ENGINE = TokuDB,
+ PARTITION `part_4` VALUES LESS THAN (20) ENGINE = TokuDB,
+ PARTITION `part_5` VALUES LESS THAN (2147483646) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -84742,19 +84742,19 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (f_int1 DIV 2)
-SUBPARTITION BY HASH (f_int1)
+ PARTITION BY RANGE (`f_int1` DIV 2)
+SUBPARTITION BY HASH (`f_int1`)
SUBPARTITIONS 2
-(PARTITION part_1 VALUES LESS THAN (0) ENGINE = TokuDB,
- PARTITION part_2 VALUES LESS THAN (5) ENGINE = TokuDB,
- PARTITION part_3 VALUES LESS THAN (10) ENGINE = TokuDB,
- PARTITION part_4 VALUES LESS THAN (2147483646) ENGINE = TokuDB) */
+(PARTITION `part_1` VALUES LESS THAN (0) ENGINE = TokuDB,
+ PARTITION `part_2` VALUES LESS THAN (5) ENGINE = TokuDB,
+ PARTITION `part_3` VALUES LESS THAN (10) ENGINE = TokuDB,
+ PARTITION `part_4` VALUES LESS THAN (2147483646) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -85213,26 +85213,26 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (f_int1)
-SUBPARTITION BY KEY (f_int1)
-(PARTITION part_1 VALUES LESS THAN (0)
- (SUBPARTITION subpart11 ENGINE = TokuDB,
- SUBPARTITION subpart12 ENGINE = TokuDB),
- PARTITION part_2 VALUES LESS THAN (5)
- (SUBPARTITION subpart21 ENGINE = TokuDB,
- SUBPARTITION subpart22 ENGINE = TokuDB),
- PARTITION part_3 VALUES LESS THAN (10)
- (SUBPARTITION subpart31 ENGINE = TokuDB,
- SUBPARTITION subpart32 ENGINE = TokuDB),
- PARTITION part_4 VALUES LESS THAN (2147483646)
- (SUBPARTITION subpart41 ENGINE = TokuDB,
- SUBPARTITION subpart42 ENGINE = TokuDB)) */
+ PARTITION BY RANGE (`f_int1`)
+SUBPARTITION BY KEY (`f_int1`)
+(PARTITION `part_1` VALUES LESS THAN (0)
+ (SUBPARTITION `subpart11` ENGINE = TokuDB,
+ SUBPARTITION `subpart12` ENGINE = TokuDB),
+ PARTITION `part_2` VALUES LESS THAN (5)
+ (SUBPARTITION `subpart21` ENGINE = TokuDB,
+ SUBPARTITION `subpart22` ENGINE = TokuDB),
+ PARTITION `part_3` VALUES LESS THAN (10)
+ (SUBPARTITION `subpart31` ENGINE = TokuDB,
+ SUBPARTITION `subpart32` ENGINE = TokuDB),
+ PARTITION `part_4` VALUES LESS THAN (2147483646)
+ (SUBPARTITION `subpart41` ENGINE = TokuDB,
+ SUBPARTITION `subpart42` ENGINE = TokuDB))
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -85689,26 +85689,26 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (ABS(MOD(f_int1,3)))
-SUBPARTITION BY HASH (f_int1 + 1)
-(PARTITION part_1 VALUES IN (0)
- (SUBPARTITION sp11 ENGINE = TokuDB,
- SUBPARTITION sp12 ENGINE = TokuDB),
- PARTITION part_2 VALUES IN (1)
- (SUBPARTITION sp21 ENGINE = TokuDB,
- SUBPARTITION sp22 ENGINE = TokuDB),
- PARTITION part_3 VALUES IN (2)
- (SUBPARTITION sp31 ENGINE = TokuDB,
- SUBPARTITION sp32 ENGINE = TokuDB),
- PARTITION part_4 VALUES IN (NULL)
- (SUBPARTITION sp41 ENGINE = TokuDB,
- SUBPARTITION sp42 ENGINE = TokuDB)) */
+ PARTITION BY LIST (abs(`f_int1` % 3))
+SUBPARTITION BY HASH (`f_int1` + 1)
+(PARTITION `part_1` VALUES IN (0)
+ (SUBPARTITION `sp11` ENGINE = TokuDB,
+ SUBPARTITION `sp12` ENGINE = TokuDB),
+ PARTITION `part_2` VALUES IN (1)
+ (SUBPARTITION `sp21` ENGINE = TokuDB,
+ SUBPARTITION `sp22` ENGINE = TokuDB),
+ PARTITION `part_3` VALUES IN (2)
+ (SUBPARTITION `sp31` ENGINE = TokuDB,
+ SUBPARTITION `sp32` ENGINE = TokuDB),
+ PARTITION `part_4` VALUES IN (NULL)
+ (SUBPARTITION `sp41` ENGINE = TokuDB,
+ SUBPARTITION `sp42` ENGINE = TokuDB))
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -86163,18 +86163,18 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (ABS(MOD(f_int1,2)))
-SUBPARTITION BY KEY (f_int1)
+ PARTITION BY LIST (abs(`f_int1` % 2))
+SUBPARTITION BY KEY (`f_int1`)
SUBPARTITIONS 3
-(PARTITION part_1 VALUES IN (0) ENGINE = TokuDB,
- PARTITION part_2 VALUES IN (1) ENGINE = TokuDB,
- PARTITION part_3 VALUES IN (NULL) ENGINE = TokuDB) */
+(PARTITION `part_1` VALUES IN (0) ENGINE = TokuDB,
+ PARTITION `part_2` VALUES IN (1) ENGINE = TokuDB,
+ PARTITION `part_3` VALUES IN (NULL) ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -86628,8 +86628,8 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
@@ -87082,8 +87082,8 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
@@ -87544,8 +87544,8 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
@@ -88004,8 +88004,8 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
@@ -88462,8 +88462,8 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
@@ -88924,8 +88924,8 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
@@ -89386,8 +89386,8 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
@@ -89844,8 +89844,8 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
diff --git a/storage/tokudb/mysql-test/tokudb_parts/r/partition_auto_increment_tokudb.result b/storage/tokudb/mysql-test/tokudb_parts/r/partition_auto_increment_tokudb.result
index aadfaba3b7b..b18f970d2ce 100644
--- a/storage/tokudb/mysql-test/tokudb_parts/r/partition_auto_increment_tokudb.result
+++ b/storage/tokudb/mysql-test/tokudb_parts/r/partition_auto_increment_tokudb.result
@@ -189,8 +189,8 @@ t1 CREATE TABLE `t1` (
`c1` int(11) NOT NULL AUTO_INCREMENT,
PRIMARY KEY (`c1`)
) ENGINE=TokuDB AUTO_INCREMENT=2 DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY HASH (c1)
-PARTITIONS 2 */
+ PARTITION BY HASH (`c1`)
+PARTITIONS 2
SELECT * FROM t1;
c1
1
@@ -413,8 +413,8 @@ t1 CREATE TABLE `t1` (
`c1` int(11) NOT NULL AUTO_INCREMENT,
PRIMARY KEY (`c1`)
) ENGINE=TokuDB AUTO_INCREMENT=27 DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY HASH (c1)
-PARTITIONS 2 */
+ PARTITION BY HASH (`c1`)
+PARTITIONS 2
SELECT * FROM t1 ORDER BY c1;
c1
26
@@ -426,8 +426,8 @@ t1 CREATE TABLE `t1` (
`c1` int(11) NOT NULL AUTO_INCREMENT,
PRIMARY KEY (`c1`)
) ENGINE=TokuDB AUTO_INCREMENT=2 DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY HASH (c1)
-PARTITIONS 2 */
+ PARTITION BY HASH (`c1`)
+PARTITIONS 2
SELECT * FROM t1 ORDER BY c1;
c1
1
@@ -444,32 +444,34 @@ t1 CREATE TABLE `t1` (
`c1` int(11) NOT NULL AUTO_INCREMENT,
PRIMARY KEY (`c1`)
) ENGINE=TokuDB AUTO_INCREMENT=102 DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY HASH (c1)
-PARTITIONS 2 */
+ PARTITION BY HASH (`c1`)
+PARTITIONS 2
DROP TABLE t1;
# Test with two threads
-# con default
+connection default;
CREATE TABLE t1 (c1 INT NOT NULL AUTO_INCREMENT, PRIMARY KEY (c1))
ENGINE = 'TokuDB'
PARTITION BY HASH(c1)
PARTITIONS 2;
INSERT INTO t1 (c1) VALUES (2);
INSERT INTO t1 (c1) VALUES (4);
-# con1
+connect con1, localhost, root,,;
+connection con1;
INSERT INTO t1 (c1) VALUES (NULL);
INSERT INTO t1 (c1) VALUES (10);
-# con default
+connection default;
INSERT INTO t1 (c1) VALUES (NULL);
INSERT INTO t1 (c1) VALUES (NULL);
INSERT INTO t1 (c1) VALUES (19);
INSERT INTO t1 (c1) VALUES (21);
-# con1
+connection con1;
INSERT INTO t1 (c1) VALUES (NULL);
-# con default
+connection default;
INSERT INTO t1 (c1) VALUES (16);
-# con1
+connection con1;
INSERT INTO t1 (c1) VALUES (NULL);
-# con default
+disconnect con1;
+connection default;
INSERT INTO t1 (c1) VALUES (NULL);
SELECT * FROM t1 ORDER BY c1;
c1
@@ -487,26 +489,27 @@ c1
24
DROP TABLE t1;
# Test with two threads + start transaction NO PARTITIONING
-# con default
+connect con1, localhost, root,,;
+connection default;
CREATE TABLE t1 (c1 INT NOT NULL AUTO_INCREMENT, PRIMARY KEY (c1))
ENGINE = 'TokuDB';
START TRANSACTION;
INSERT INTO t1 (c1) VALUES (2);
INSERT INTO t1 (c1) VALUES (4);
-# con1
+connection con1;
START TRANSACTION;
INSERT INTO t1 (c1) VALUES (NULL);
INSERT INTO t1 (c1) VALUES (10);
-# con default
+connection default;
INSERT INTO t1 (c1) VALUES (NULL);
INSERT INTO t1 (c1) VALUES (NULL);
INSERT INTO t1 (c1) VALUES (19);
INSERT INTO t1 (c1) VALUES (21);
-# con1
+connection con1;
INSERT INTO t1 (c1) VALUES (NULL);
-# con default
+connection default;
INSERT INTO t1 (c1) VALUES (16);
-# con1
+connection con1;
INSERT INTO t1 (c1) VALUES (NULL);
SELECT * FROM t1 ORDER BY c1;
c1
@@ -521,7 +524,8 @@ c1
10
22
23
-# con default
+disconnect con1;
+connection default;
INSERT INTO t1 (c1) VALUES (NULL);
SELECT * FROM t1 ORDER BY c1;
c1
@@ -550,7 +554,8 @@ c1
24
DROP TABLE t1;
# Test with two threads + start transaction
-# con default
+connect con1, localhost, root,,;
+connection default;
CREATE TABLE t1 (c1 INT NOT NULL AUTO_INCREMENT, PRIMARY KEY (c1))
ENGINE = 'TokuDB'
PARTITION BY HASH(c1)
@@ -558,17 +563,17 @@ PARTITIONS 2;
START TRANSACTION;
INSERT INTO t1 (c1) VALUES (2);
INSERT INTO t1 (c1) VALUES (4);
-# con1
+connection con1;
START TRANSACTION;
INSERT INTO t1 (c1) VALUES (NULL), (10);
-# con default
+connection default;
INSERT INTO t1 (c1) VALUES (NULL), (NULL), (19);
INSERT INTO t1 (c1) VALUES (21);
-# con1
+connection con1;
INSERT INTO t1 (c1) VALUES (NULL);
-# con default
+connection default;
INSERT INTO t1 (c1) VALUES (16);
-# con1
+connection con1;
INSERT INTO t1 (c1) VALUES (NULL);
SELECT * FROM t1 ORDER BY c1;
c1
@@ -583,7 +588,8 @@ c1
10
22
23
-# con default
+disconnect con1;
+connection default;
INSERT INTO t1 (c1) VALUES (NULL);
SELECT * FROM t1 ORDER BY c1;
c1
@@ -709,8 +715,8 @@ t1 CREATE TABLE `t1` (
`c1` int(11) NOT NULL AUTO_INCREMENT,
PRIMARY KEY (`c1`)
) ENGINE=TokuDB AUTO_INCREMENT=15 DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY HASH (c1)
-PARTITIONS 2 */
+ PARTITION BY HASH (`c1`)
+PARTITIONS 2
INSERT INTO t1 (c1) VALUES (4);
SHOW CREATE TABLE t1;
Table Create Table
@@ -718,8 +724,8 @@ t1 CREATE TABLE `t1` (
`c1` int(11) NOT NULL AUTO_INCREMENT,
PRIMARY KEY (`c1`)
) ENGINE=TokuDB AUTO_INCREMENT=15 DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY HASH (c1)
-PARTITIONS 2 */
+ PARTITION BY HASH (`c1`)
+PARTITIONS 2
INSERT INTO t1 (c1) VALUES (0);
SHOW CREATE TABLE t1;
Table Create Table
@@ -727,8 +733,8 @@ t1 CREATE TABLE `t1` (
`c1` int(11) NOT NULL AUTO_INCREMENT,
PRIMARY KEY (`c1`)
) ENGINE=TokuDB AUTO_INCREMENT=16 DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY HASH (c1)
-PARTITIONS 2 */
+ PARTITION BY HASH (`c1`)
+PARTITIONS 2
INSERT INTO t1 (c1) VALUES (NULL);
SHOW CREATE TABLE t1;
Table Create Table
@@ -736,8 +742,8 @@ t1 CREATE TABLE `t1` (
`c1` int(11) NOT NULL AUTO_INCREMENT,
PRIMARY KEY (`c1`)
) ENGINE=TokuDB AUTO_INCREMENT=17 DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY HASH (c1)
-PARTITIONS 2 */
+ PARTITION BY HASH (`c1`)
+PARTITIONS 2
SELECT * FROM t1 ORDER BY c1;
c1
4
@@ -752,8 +758,8 @@ t1 CREATE TABLE `t1` (
`c1` int(11) NOT NULL AUTO_INCREMENT,
PRIMARY KEY (`c1`)
) ENGINE=TokuDB AUTO_INCREMENT=301 DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY HASH (c1)
-PARTITIONS 2 */
+ PARTITION BY HASH (`c1`)
+PARTITIONS 2
INSERT INTO t1 (c1) VALUES (0);
SHOW CREATE TABLE t1;
Table Create Table
@@ -761,8 +767,8 @@ t1 CREATE TABLE `t1` (
`c1` int(11) NOT NULL AUTO_INCREMENT,
PRIMARY KEY (`c1`)
) ENGINE=TokuDB AUTO_INCREMENT=301 DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY HASH (c1)
-PARTITIONS 2 */
+ PARTITION BY HASH (`c1`)
+PARTITIONS 2
INSERT INTO t1 (c1) VALUES (NULL);
SHOW CREATE TABLE t1;
Table Create Table
@@ -770,8 +776,8 @@ t1 CREATE TABLE `t1` (
`c1` int(11) NOT NULL AUTO_INCREMENT,
PRIMARY KEY (`c1`)
) ENGINE=TokuDB AUTO_INCREMENT=302 DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY HASH (c1)
-PARTITIONS 2 */
+ PARTITION BY HASH (`c1`)
+PARTITIONS 2
SELECT * FROM t1 ORDER BY c1;
c1
0
@@ -793,8 +799,8 @@ t1 CREATE TABLE `t1` (
`c1` int(11) NOT NULL AUTO_INCREMENT,
PRIMARY KEY (`c1`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY HASH (c1)
-PARTITIONS 2 */
+ PARTITION BY HASH (`c1`)
+PARTITIONS 2
INSERT INTO t1 (c1) VALUES (NULL);
SHOW CREATE TABLE t1;
Table Create Table
@@ -802,8 +808,8 @@ t1 CREATE TABLE `t1` (
`c1` int(11) NOT NULL AUTO_INCREMENT,
PRIMARY KEY (`c1`)
) ENGINE=TokuDB AUTO_INCREMENT=2 DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY HASH (c1)
-PARTITIONS 2 */
+ PARTITION BY HASH (`c1`)
+PARTITIONS 2
SELECT * FROM t1;
c1
1
@@ -814,8 +820,8 @@ t1 CREATE TABLE `t1` (
`c1` int(11) NOT NULL AUTO_INCREMENT,
PRIMARY KEY (`c1`)
) ENGINE=TokuDB AUTO_INCREMENT=2 DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY HASH (c1)
-PARTITIONS 2 */
+ PARTITION BY HASH (`c1`)
+PARTITIONS 2
INSERT INTO t1 (c1) VALUES (NULL);
SHOW CREATE TABLE t1;
Table Create Table
@@ -823,8 +829,8 @@ t1 CREATE TABLE `t1` (
`c1` int(11) NOT NULL AUTO_INCREMENT,
PRIMARY KEY (`c1`)
) ENGINE=TokuDB AUTO_INCREMENT=24 DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY HASH (c1)
-PARTITIONS 2 */
+ PARTITION BY HASH (`c1`)
+PARTITIONS 2
SET INSERT_ID = 22;
INSERT INTO t1 VALUES (NULL), (NULL), (NULL);
INSERT INTO t1 VALUES (NULL);
@@ -847,8 +853,8 @@ t1 CREATE TABLE `t1` (
`c1` int(11) NOT NULL AUTO_INCREMENT,
PRIMARY KEY (`c1`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY HASH (c1)
-PARTITIONS 2 */
+ PARTITION BY HASH (`c1`)
+PARTITIONS 2
FLUSH TABLE;
SHOW CREATE TABLE t1;
Table Create Table
@@ -856,8 +862,8 @@ t1 CREATE TABLE `t1` (
`c1` int(11) NOT NULL AUTO_INCREMENT,
PRIMARY KEY (`c1`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY HASH (c1)
-PARTITIONS 2 */
+ PARTITION BY HASH (`c1`)
+PARTITIONS 2
INSERT INTO t1 VALUES (4);
FLUSH TABLE;
SHOW CREATE TABLE t1;
@@ -866,8 +872,8 @@ t1 CREATE TABLE `t1` (
`c1` int(11) NOT NULL AUTO_INCREMENT,
PRIMARY KEY (`c1`)
) ENGINE=TokuDB AUTO_INCREMENT=5 DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY HASH (c1)
-PARTITIONS 2 */
+ PARTITION BY HASH (`c1`)
+PARTITIONS 2
INSERT INTO t1 VALUES (NULL);
FLUSH TABLE;
SHOW CREATE TABLE t1;
@@ -876,8 +882,8 @@ t1 CREATE TABLE `t1` (
`c1` int(11) NOT NULL AUTO_INCREMENT,
PRIMARY KEY (`c1`)
) ENGINE=TokuDB AUTO_INCREMENT=6 DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY HASH (c1)
-PARTITIONS 2 */
+ PARTITION BY HASH (`c1`)
+PARTITIONS 2
SELECT * FROM t1 ORDER BY c1;
c1
4
diff --git a/storage/tokudb/mysql-test/tokudb_parts/r/partition_basic_tokudb.result b/storage/tokudb/mysql-test/tokudb_parts/r/partition_basic_tokudb.result
index 8182dce5625..fca6cbe169e 100644
--- a/storage/tokudb/mysql-test/tokudb_parts/r/partition_basic_tokudb.result
+++ b/storage/tokudb/mysql-test/tokudb_parts/r/partition_basic_tokudb.result
@@ -67,14 +67,14 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY HASH (f_int1)
-PARTITIONS 2 */
+ PARTITION BY HASH (`f_int1`)
+PARTITIONS 2
unified filelist
t1.frm
@@ -522,14 +522,14 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY KEY (f_int1)
-PARTITIONS 5 */
+ PARTITION BY KEY (`f_int1`)
+PARTITIONS 5
unified filelist
t1.frm
@@ -985,21 +985,21 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (MOD(f_int1,4))
-(PARTITION part_3 VALUES IN (-3) ENGINE = TokuDB,
- PARTITION part_2 VALUES IN (-2) ENGINE = TokuDB,
- PARTITION part_1 VALUES IN (-1) ENGINE = TokuDB,
- PARTITION part_N VALUES IN (NULL) ENGINE = TokuDB,
- PARTITION part0 VALUES IN (0) ENGINE = TokuDB,
- PARTITION part1 VALUES IN (1) ENGINE = TokuDB,
- PARTITION part2 VALUES IN (2) ENGINE = TokuDB,
- PARTITION part3 VALUES IN (3) ENGINE = TokuDB) */
+ PARTITION BY LIST (`f_int1` % 4)
+(PARTITION `part_3` VALUES IN (-3) ENGINE = TokuDB,
+ PARTITION `part_2` VALUES IN (-2) ENGINE = TokuDB,
+ PARTITION `part_1` VALUES IN (-1) ENGINE = TokuDB,
+ PARTITION `part_N` VALUES IN (NULL) ENGINE = TokuDB,
+ PARTITION `part0` VALUES IN (0) ENGINE = TokuDB,
+ PARTITION `part1` VALUES IN (1) ENGINE = TokuDB,
+ PARTITION `part2` VALUES IN (2) ENGINE = TokuDB,
+ PARTITION `part3` VALUES IN (3) ENGINE = TokuDB)
unified filelist
t1.frm
@@ -1453,19 +1453,19 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (f_int1)
-(PARTITION parta VALUES LESS THAN (0) ENGINE = TokuDB,
- PARTITION partb VALUES LESS THAN (5) ENGINE = TokuDB,
- PARTITION partc VALUES LESS THAN (10) ENGINE = TokuDB,
- PARTITION partd VALUES LESS THAN (15) ENGINE = TokuDB,
- PARTITION parte VALUES LESS THAN (20) ENGINE = TokuDB,
- PARTITION partf VALUES LESS THAN (2147483646) ENGINE = TokuDB) */
+ PARTITION BY RANGE (`f_int1`)
+(PARTITION `parta` VALUES LESS THAN (0) ENGINE = TokuDB,
+ PARTITION `partb` VALUES LESS THAN (5) ENGINE = TokuDB,
+ PARTITION `partc` VALUES LESS THAN (10) ENGINE = TokuDB,
+ PARTITION `partd` VALUES LESS THAN (15) ENGINE = TokuDB,
+ PARTITION `parte` VALUES LESS THAN (20) ENGINE = TokuDB,
+ PARTITION `partf` VALUES LESS THAN (2147483646) ENGINE = TokuDB)
unified filelist
t1.frm
@@ -1915,19 +1915,19 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (f_int1 DIV 2)
-SUBPARTITION BY HASH (f_int1)
+ PARTITION BY RANGE (`f_int1` DIV 2)
+SUBPARTITION BY HASH (`f_int1`)
SUBPARTITIONS 2
-(PARTITION parta VALUES LESS THAN (0) ENGINE = TokuDB,
- PARTITION partb VALUES LESS THAN (5) ENGINE = TokuDB,
- PARTITION partc VALUES LESS THAN (10) ENGINE = TokuDB,
- PARTITION partd VALUES LESS THAN (2147483646) ENGINE = TokuDB) */
+(PARTITION `parta` VALUES LESS THAN (0) ENGINE = TokuDB,
+ PARTITION `partb` VALUES LESS THAN (5) ENGINE = TokuDB,
+ PARTITION `partc` VALUES LESS THAN (10) ENGINE = TokuDB,
+ PARTITION `partd` VALUES LESS THAN (2147483646) ENGINE = TokuDB)
unified filelist
t1.frm
@@ -2383,26 +2383,26 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (f_int1)
-SUBPARTITION BY KEY (f_int1)
-(PARTITION part1 VALUES LESS THAN (0)
- (SUBPARTITION subpart11 ENGINE = TokuDB,
- SUBPARTITION subpart12 ENGINE = TokuDB),
- PARTITION part2 VALUES LESS THAN (5)
- (SUBPARTITION subpart21 ENGINE = TokuDB,
- SUBPARTITION subpart22 ENGINE = TokuDB),
- PARTITION part3 VALUES LESS THAN (10)
- (SUBPARTITION subpart31 ENGINE = TokuDB,
- SUBPARTITION subpart32 ENGINE = TokuDB),
- PARTITION part4 VALUES LESS THAN (2147483646)
- (SUBPARTITION subpart41 ENGINE = TokuDB,
- SUBPARTITION subpart42 ENGINE = TokuDB)) */
+ PARTITION BY RANGE (`f_int1`)
+SUBPARTITION BY KEY (`f_int1`)
+(PARTITION `part1` VALUES LESS THAN (0)
+ (SUBPARTITION `subpart11` ENGINE = TokuDB,
+ SUBPARTITION `subpart12` ENGINE = TokuDB),
+ PARTITION `part2` VALUES LESS THAN (5)
+ (SUBPARTITION `subpart21` ENGINE = TokuDB,
+ SUBPARTITION `subpart22` ENGINE = TokuDB),
+ PARTITION `part3` VALUES LESS THAN (10)
+ (SUBPARTITION `subpart31` ENGINE = TokuDB,
+ SUBPARTITION `subpart32` ENGINE = TokuDB),
+ PARTITION `part4` VALUES LESS THAN (2147483646)
+ (SUBPARTITION `subpart41` ENGINE = TokuDB,
+ SUBPARTITION `subpart42` ENGINE = TokuDB))
unified filelist
t1.frm
@@ -2860,26 +2860,26 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (ABS(MOD(f_int1,3)))
-SUBPARTITION BY HASH (f_int1 + 1)
-(PARTITION part1 VALUES IN (0)
- (SUBPARTITION sp11 ENGINE = TokuDB,
- SUBPARTITION sp12 ENGINE = TokuDB),
- PARTITION part2 VALUES IN (1)
- (SUBPARTITION sp21 ENGINE = TokuDB,
- SUBPARTITION sp22 ENGINE = TokuDB),
- PARTITION part3 VALUES IN (2)
- (SUBPARTITION sp31 ENGINE = TokuDB,
- SUBPARTITION sp32 ENGINE = TokuDB),
- PARTITION part4 VALUES IN (NULL)
- (SUBPARTITION sp41 ENGINE = TokuDB,
- SUBPARTITION sp42 ENGINE = TokuDB)) */
+ PARTITION BY LIST (abs(`f_int1` % 3))
+SUBPARTITION BY HASH (`f_int1` + 1)
+(PARTITION `part1` VALUES IN (0)
+ (SUBPARTITION `sp11` ENGINE = TokuDB,
+ SUBPARTITION `sp12` ENGINE = TokuDB),
+ PARTITION `part2` VALUES IN (1)
+ (SUBPARTITION `sp21` ENGINE = TokuDB,
+ SUBPARTITION `sp22` ENGINE = TokuDB),
+ PARTITION `part3` VALUES IN (2)
+ (SUBPARTITION `sp31` ENGINE = TokuDB,
+ SUBPARTITION `sp32` ENGINE = TokuDB),
+ PARTITION `part4` VALUES IN (NULL)
+ (SUBPARTITION `sp41` ENGINE = TokuDB,
+ SUBPARTITION `sp42` ENGINE = TokuDB))
unified filelist
t1.frm
@@ -3331,18 +3331,18 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (ABS(MOD(f_int1,2)))
-SUBPARTITION BY KEY (f_int1)
+ PARTITION BY LIST (abs(`f_int1` % 2))
+SUBPARTITION BY KEY (`f_int1`)
SUBPARTITIONS 3
-(PARTITION part1 VALUES IN (0) ENGINE = TokuDB,
- PARTITION part2 VALUES IN (1) ENGINE = TokuDB,
- PARTITION part3 VALUES IN (NULL) ENGINE = TokuDB) */
+(PARTITION `part1` VALUES IN (0) ENGINE = TokuDB,
+ PARTITION `part2` VALUES IN (1) ENGINE = TokuDB,
+ PARTITION `part3` VALUES IN (NULL) ENGINE = TokuDB)
unified filelist
t1.frm
@@ -3792,14 +3792,14 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY HASH (f_int1 + f_int2)
-PARTITIONS 2 */
+ PARTITION BY HASH (`f_int1` + `f_int2`)
+PARTITIONS 2
unified filelist
t1.frm
@@ -4247,14 +4247,14 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY KEY (f_int1,f_int2)
-PARTITIONS 5 */
+ PARTITION BY KEY (`f_int1`,`f_int2`)
+PARTITIONS 5
unified filelist
t1.frm
@@ -4710,21 +4710,21 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (MOD(f_int1 + f_int2,4))
-(PARTITION part_3 VALUES IN (-3) ENGINE = TokuDB,
- PARTITION part_2 VALUES IN (-2) ENGINE = TokuDB,
- PARTITION part_1 VALUES IN (-1) ENGINE = TokuDB,
- PARTITION part_N VALUES IN (NULL) ENGINE = TokuDB,
- PARTITION part0 VALUES IN (0) ENGINE = TokuDB,
- PARTITION part1 VALUES IN (1) ENGINE = TokuDB,
- PARTITION part2 VALUES IN (2) ENGINE = TokuDB,
- PARTITION part3 VALUES IN (3) ENGINE = TokuDB) */
+ PARTITION BY LIST ((`f_int1` + `f_int2`) % 4)
+(PARTITION `part_3` VALUES IN (-3) ENGINE = TokuDB,
+ PARTITION `part_2` VALUES IN (-2) ENGINE = TokuDB,
+ PARTITION `part_1` VALUES IN (-1) ENGINE = TokuDB,
+ PARTITION `part_N` VALUES IN (NULL) ENGINE = TokuDB,
+ PARTITION `part0` VALUES IN (0) ENGINE = TokuDB,
+ PARTITION `part1` VALUES IN (1) ENGINE = TokuDB,
+ PARTITION `part2` VALUES IN (2) ENGINE = TokuDB,
+ PARTITION `part3` VALUES IN (3) ENGINE = TokuDB)
unified filelist
t1.frm
@@ -5178,19 +5178,19 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE ((f_int1 + f_int2) DIV 2)
-(PARTITION parta VALUES LESS THAN (0) ENGINE = TokuDB,
- PARTITION partb VALUES LESS THAN (5) ENGINE = TokuDB,
- PARTITION partc VALUES LESS THAN (10) ENGINE = TokuDB,
- PARTITION partd VALUES LESS THAN (15) ENGINE = TokuDB,
- PARTITION parte VALUES LESS THAN (20) ENGINE = TokuDB,
- PARTITION partf VALUES LESS THAN (2147483646) ENGINE = TokuDB) */
+ PARTITION BY RANGE ((`f_int1` + `f_int2`) DIV 2)
+(PARTITION `parta` VALUES LESS THAN (0) ENGINE = TokuDB,
+ PARTITION `partb` VALUES LESS THAN (5) ENGINE = TokuDB,
+ PARTITION `partc` VALUES LESS THAN (10) ENGINE = TokuDB,
+ PARTITION `partd` VALUES LESS THAN (15) ENGINE = TokuDB,
+ PARTITION `parte` VALUES LESS THAN (20) ENGINE = TokuDB,
+ PARTITION `partf` VALUES LESS THAN (2147483646) ENGINE = TokuDB)
unified filelist
t1.frm
@@ -5640,19 +5640,19 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (f_int1)
-SUBPARTITION BY HASH (f_int2)
+ PARTITION BY RANGE (`f_int1`)
+SUBPARTITION BY HASH (`f_int2`)
SUBPARTITIONS 2
-(PARTITION parta VALUES LESS THAN (0) ENGINE = TokuDB,
- PARTITION partb VALUES LESS THAN (5) ENGINE = TokuDB,
- PARTITION partc VALUES LESS THAN (10) ENGINE = TokuDB,
- PARTITION partd VALUES LESS THAN (2147483646) ENGINE = TokuDB) */
+(PARTITION `parta` VALUES LESS THAN (0) ENGINE = TokuDB,
+ PARTITION `partb` VALUES LESS THAN (5) ENGINE = TokuDB,
+ PARTITION `partc` VALUES LESS THAN (10) ENGINE = TokuDB,
+ PARTITION `partd` VALUES LESS THAN (2147483646) ENGINE = TokuDB)
unified filelist
t1.frm
@@ -6106,26 +6106,26 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (f_int1)
-SUBPARTITION BY KEY (f_int2)
-(PARTITION part1 VALUES LESS THAN (0)
- (SUBPARTITION subpart11 ENGINE = TokuDB,
- SUBPARTITION subpart12 ENGINE = TokuDB),
- PARTITION part2 VALUES LESS THAN (5)
- (SUBPARTITION subpart21 ENGINE = TokuDB,
- SUBPARTITION subpart22 ENGINE = TokuDB),
- PARTITION part3 VALUES LESS THAN (10)
- (SUBPARTITION subpart31 ENGINE = TokuDB,
- SUBPARTITION subpart32 ENGINE = TokuDB),
- PARTITION part4 VALUES LESS THAN (2147483646)
- (SUBPARTITION subpart41 ENGINE = TokuDB,
- SUBPARTITION subpart42 ENGINE = TokuDB)) */
+ PARTITION BY RANGE (`f_int1`)
+SUBPARTITION BY KEY (`f_int2`)
+(PARTITION `part1` VALUES LESS THAN (0)
+ (SUBPARTITION `subpart11` ENGINE = TokuDB,
+ SUBPARTITION `subpart12` ENGINE = TokuDB),
+ PARTITION `part2` VALUES LESS THAN (5)
+ (SUBPARTITION `subpart21` ENGINE = TokuDB,
+ SUBPARTITION `subpart22` ENGINE = TokuDB),
+ PARTITION `part3` VALUES LESS THAN (10)
+ (SUBPARTITION `subpart31` ENGINE = TokuDB,
+ SUBPARTITION `subpart32` ENGINE = TokuDB),
+ PARTITION `part4` VALUES LESS THAN (2147483646)
+ (SUBPARTITION `subpart41` ENGINE = TokuDB,
+ SUBPARTITION `subpart42` ENGINE = TokuDB))
unified filelist
t1.frm
@@ -6579,26 +6579,26 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (ABS(MOD(f_int1,3)))
-SUBPARTITION BY HASH (f_int2 + 1)
-(PARTITION part1 VALUES IN (0)
- (SUBPARTITION sp11 ENGINE = TokuDB,
- SUBPARTITION sp12 ENGINE = TokuDB),
- PARTITION part2 VALUES IN (1)
- (SUBPARTITION sp21 ENGINE = TokuDB,
- SUBPARTITION sp22 ENGINE = TokuDB),
- PARTITION part3 VALUES IN (2)
- (SUBPARTITION sp31 ENGINE = TokuDB,
- SUBPARTITION sp32 ENGINE = TokuDB),
- PARTITION part4 VALUES IN (NULL)
- (SUBPARTITION sp41 ENGINE = TokuDB,
- SUBPARTITION sp42 ENGINE = TokuDB)) */
+ PARTITION BY LIST (abs(`f_int1` % 3))
+SUBPARTITION BY HASH (`f_int2` + 1)
+(PARTITION `part1` VALUES IN (0)
+ (SUBPARTITION `sp11` ENGINE = TokuDB,
+ SUBPARTITION `sp12` ENGINE = TokuDB),
+ PARTITION `part2` VALUES IN (1)
+ (SUBPARTITION `sp21` ENGINE = TokuDB,
+ SUBPARTITION `sp22` ENGINE = TokuDB),
+ PARTITION `part3` VALUES IN (2)
+ (SUBPARTITION `sp31` ENGINE = TokuDB,
+ SUBPARTITION `sp32` ENGINE = TokuDB),
+ PARTITION `part4` VALUES IN (NULL)
+ (SUBPARTITION `sp41` ENGINE = TokuDB,
+ SUBPARTITION `sp42` ENGINE = TokuDB))
unified filelist
t1.frm
@@ -7050,18 +7050,18 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (ABS(MOD(f_int1,2)))
-SUBPARTITION BY KEY (f_int2)
+ PARTITION BY LIST (abs(`f_int1` % 2))
+SUBPARTITION BY KEY (`f_int2`)
SUBPARTITIONS 3
-(PARTITION part1 VALUES IN (0) ENGINE = TokuDB,
- PARTITION part2 VALUES IN (1) ENGINE = TokuDB,
- PARTITION part3 VALUES IN (NULL) ENGINE = TokuDB) */
+(PARTITION `part1` VALUES IN (0) ENGINE = TokuDB,
+ PARTITION `part2` VALUES IN (1) ENGINE = TokuDB,
+ PARTITION `part3` VALUES IN (NULL) ENGINE = TokuDB)
unified filelist
t1.frm
@@ -7515,16 +7515,16 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) NOT NULL DEFAULT '0',
- `f_int2` int(11) NOT NULL DEFAULT '0',
+ `f_int1` int(11) NOT NULL DEFAULT 0,
+ `f_int2` int(11) NOT NULL DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL,
PRIMARY KEY (`f_int2`,`f_int1`),
UNIQUE KEY `uidx1` (`f_int1`,`f_int2`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY HASH (f_int1)
-PARTITIONS 2 */
+ PARTITION BY HASH (`f_int1`)
+PARTITIONS 2
unified filelist
t1.frm
@@ -8007,16 +8007,16 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) NOT NULL DEFAULT '0',
- `f_int2` int(11) NOT NULL DEFAULT '0',
+ `f_int1` int(11) NOT NULL DEFAULT 0,
+ `f_int2` int(11) NOT NULL DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL,
PRIMARY KEY (`f_int2`,`f_int1`),
UNIQUE KEY `uidx1` (`f_int1`,`f_int2`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY KEY (f_int1)
-PARTITIONS 5 */
+ PARTITION BY KEY (`f_int1`)
+PARTITIONS 5
unified filelist
t1.frm
@@ -8507,23 +8507,23 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) NOT NULL DEFAULT '0',
- `f_int2` int(11) NOT NULL DEFAULT '0',
+ `f_int1` int(11) NOT NULL DEFAULT 0,
+ `f_int2` int(11) NOT NULL DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL,
PRIMARY KEY (`f_int2`,`f_int1`),
UNIQUE KEY `uidx1` (`f_int1`,`f_int2`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (MOD(f_int1,4))
-(PARTITION part_3 VALUES IN (-3) ENGINE = TokuDB,
- PARTITION part_2 VALUES IN (-2) ENGINE = TokuDB,
- PARTITION part_1 VALUES IN (-1) ENGINE = TokuDB,
- PARTITION part_N VALUES IN (NULL) ENGINE = TokuDB,
- PARTITION part0 VALUES IN (0) ENGINE = TokuDB,
- PARTITION part1 VALUES IN (1) ENGINE = TokuDB,
- PARTITION part2 VALUES IN (2) ENGINE = TokuDB,
- PARTITION part3 VALUES IN (3) ENGINE = TokuDB) */
+ PARTITION BY LIST (`f_int1` % 4)
+(PARTITION `part_3` VALUES IN (-3) ENGINE = TokuDB,
+ PARTITION `part_2` VALUES IN (-2) ENGINE = TokuDB,
+ PARTITION `part_1` VALUES IN (-1) ENGINE = TokuDB,
+ PARTITION `part_N` VALUES IN (NULL) ENGINE = TokuDB,
+ PARTITION `part0` VALUES IN (0) ENGINE = TokuDB,
+ PARTITION `part1` VALUES IN (1) ENGINE = TokuDB,
+ PARTITION `part2` VALUES IN (2) ENGINE = TokuDB,
+ PARTITION `part3` VALUES IN (3) ENGINE = TokuDB)
unified filelist
t1.frm
@@ -9012,21 +9012,21 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) NOT NULL DEFAULT '0',
- `f_int2` int(11) NOT NULL DEFAULT '0',
+ `f_int1` int(11) NOT NULL DEFAULT 0,
+ `f_int2` int(11) NOT NULL DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL,
PRIMARY KEY (`f_int2`,`f_int1`),
UNIQUE KEY `uidx1` (`f_int1`,`f_int2`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (f_int1)
-(PARTITION parta VALUES LESS THAN (0) ENGINE = TokuDB,
- PARTITION partb VALUES LESS THAN (5) ENGINE = TokuDB,
- PARTITION partc VALUES LESS THAN (10) ENGINE = TokuDB,
- PARTITION partd VALUES LESS THAN (15) ENGINE = TokuDB,
- PARTITION parte VALUES LESS THAN (20) ENGINE = TokuDB,
- PARTITION partf VALUES LESS THAN (2147483646) ENGINE = TokuDB) */
+ PARTITION BY RANGE (`f_int1`)
+(PARTITION `parta` VALUES LESS THAN (0) ENGINE = TokuDB,
+ PARTITION `partb` VALUES LESS THAN (5) ENGINE = TokuDB,
+ PARTITION `partc` VALUES LESS THAN (10) ENGINE = TokuDB,
+ PARTITION `partd` VALUES LESS THAN (15) ENGINE = TokuDB,
+ PARTITION `parte` VALUES LESS THAN (20) ENGINE = TokuDB,
+ PARTITION `partf` VALUES LESS THAN (2147483646) ENGINE = TokuDB)
unified filelist
t1.frm
@@ -9511,21 +9511,21 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) NOT NULL DEFAULT '0',
- `f_int2` int(11) NOT NULL DEFAULT '0',
+ `f_int1` int(11) NOT NULL DEFAULT 0,
+ `f_int2` int(11) NOT NULL DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL,
PRIMARY KEY (`f_int2`,`f_int1`),
UNIQUE KEY `uidx1` (`f_int1`,`f_int2`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (f_int1 DIV 2)
-SUBPARTITION BY HASH (f_int1)
+ PARTITION BY RANGE (`f_int1` DIV 2)
+SUBPARTITION BY HASH (`f_int1`)
SUBPARTITIONS 2
-(PARTITION parta VALUES LESS THAN (0) ENGINE = TokuDB,
- PARTITION partb VALUES LESS THAN (5) ENGINE = TokuDB,
- PARTITION partc VALUES LESS THAN (10) ENGINE = TokuDB,
- PARTITION partd VALUES LESS THAN (2147483646) ENGINE = TokuDB) */
+(PARTITION `parta` VALUES LESS THAN (0) ENGINE = TokuDB,
+ PARTITION `partb` VALUES LESS THAN (5) ENGINE = TokuDB,
+ PARTITION `partc` VALUES LESS THAN (10) ENGINE = TokuDB,
+ PARTITION `partd` VALUES LESS THAN (2147483646) ENGINE = TokuDB)
unified filelist
t1.frm
@@ -10016,28 +10016,28 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) NOT NULL DEFAULT '0',
- `f_int2` int(11) NOT NULL DEFAULT '0',
+ `f_int1` int(11) NOT NULL DEFAULT 0,
+ `f_int2` int(11) NOT NULL DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL,
PRIMARY KEY (`f_int2`,`f_int1`),
UNIQUE KEY `uidx1` (`f_int1`,`f_int2`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (f_int1)
-SUBPARTITION BY KEY (f_int1)
-(PARTITION part1 VALUES LESS THAN (0)
- (SUBPARTITION subpart11 ENGINE = TokuDB,
- SUBPARTITION subpart12 ENGINE = TokuDB),
- PARTITION part2 VALUES LESS THAN (5)
- (SUBPARTITION subpart21 ENGINE = TokuDB,
- SUBPARTITION subpart22 ENGINE = TokuDB),
- PARTITION part3 VALUES LESS THAN (10)
- (SUBPARTITION subpart31 ENGINE = TokuDB,
- SUBPARTITION subpart32 ENGINE = TokuDB),
- PARTITION part4 VALUES LESS THAN (2147483646)
- (SUBPARTITION subpart41 ENGINE = TokuDB,
- SUBPARTITION subpart42 ENGINE = TokuDB)) */
+ PARTITION BY RANGE (`f_int1`)
+SUBPARTITION BY KEY (`f_int1`)
+(PARTITION `part1` VALUES LESS THAN (0)
+ (SUBPARTITION `subpart11` ENGINE = TokuDB,
+ SUBPARTITION `subpart12` ENGINE = TokuDB),
+ PARTITION `part2` VALUES LESS THAN (5)
+ (SUBPARTITION `subpart21` ENGINE = TokuDB,
+ SUBPARTITION `subpart22` ENGINE = TokuDB),
+ PARTITION `part3` VALUES LESS THAN (10)
+ (SUBPARTITION `subpart31` ENGINE = TokuDB,
+ SUBPARTITION `subpart32` ENGINE = TokuDB),
+ PARTITION `part4` VALUES LESS THAN (2147483646)
+ (SUBPARTITION `subpart41` ENGINE = TokuDB,
+ SUBPARTITION `subpart42` ENGINE = TokuDB))
unified filelist
t1.frm
@@ -10530,28 +10530,28 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) NOT NULL DEFAULT '0',
- `f_int2` int(11) NOT NULL DEFAULT '0',
+ `f_int1` int(11) NOT NULL DEFAULT 0,
+ `f_int2` int(11) NOT NULL DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL,
PRIMARY KEY (`f_int2`,`f_int1`),
UNIQUE KEY `uidx1` (`f_int1`,`f_int2`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (ABS(MOD(f_int1,3)))
-SUBPARTITION BY HASH (f_int1 + 1)
-(PARTITION part1 VALUES IN (0)
- (SUBPARTITION sp11 ENGINE = TokuDB,
- SUBPARTITION sp12 ENGINE = TokuDB),
- PARTITION part2 VALUES IN (1)
- (SUBPARTITION sp21 ENGINE = TokuDB,
- SUBPARTITION sp22 ENGINE = TokuDB),
- PARTITION part3 VALUES IN (2)
- (SUBPARTITION sp31 ENGINE = TokuDB,
- SUBPARTITION sp32 ENGINE = TokuDB),
- PARTITION part4 VALUES IN (NULL)
- (SUBPARTITION sp41 ENGINE = TokuDB,
- SUBPARTITION sp42 ENGINE = TokuDB)) */
+ PARTITION BY LIST (abs(`f_int1` % 3))
+SUBPARTITION BY HASH (`f_int1` + 1)
+(PARTITION `part1` VALUES IN (0)
+ (SUBPARTITION `sp11` ENGINE = TokuDB,
+ SUBPARTITION `sp12` ENGINE = TokuDB),
+ PARTITION `part2` VALUES IN (1)
+ (SUBPARTITION `sp21` ENGINE = TokuDB,
+ SUBPARTITION `sp22` ENGINE = TokuDB),
+ PARTITION `part3` VALUES IN (2)
+ (SUBPARTITION `sp31` ENGINE = TokuDB,
+ SUBPARTITION `sp32` ENGINE = TokuDB),
+ PARTITION `part4` VALUES IN (NULL)
+ (SUBPARTITION `sp41` ENGINE = TokuDB,
+ SUBPARTITION `sp42` ENGINE = TokuDB))
unified filelist
t1.frm
@@ -11038,20 +11038,20 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) NOT NULL DEFAULT '0',
- `f_int2` int(11) NOT NULL DEFAULT '0',
+ `f_int1` int(11) NOT NULL DEFAULT 0,
+ `f_int2` int(11) NOT NULL DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL,
PRIMARY KEY (`f_int2`,`f_int1`),
UNIQUE KEY `uidx1` (`f_int1`,`f_int2`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (ABS(MOD(f_int1,2)))
-SUBPARTITION BY KEY (f_int1)
+ PARTITION BY LIST (abs(`f_int1` % 2))
+SUBPARTITION BY KEY (`f_int1`)
SUBPARTITIONS 3
-(PARTITION part1 VALUES IN (0) ENGINE = TokuDB,
- PARTITION part2 VALUES IN (1) ENGINE = TokuDB,
- PARTITION part3 VALUES IN (NULL) ENGINE = TokuDB) */
+(PARTITION `part1` VALUES IN (0) ENGINE = TokuDB,
+ PARTITION `part2` VALUES IN (1) ENGINE = TokuDB,
+ PARTITION `part3` VALUES IN (NULL) ENGINE = TokuDB)
unified filelist
t1.frm
@@ -11535,16 +11535,16 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) NOT NULL DEFAULT '0',
- `f_int2` int(11) NOT NULL DEFAULT '0',
+ `f_int1` int(11) NOT NULL DEFAULT 0,
+ `f_int2` int(11) NOT NULL DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL,
PRIMARY KEY (`f_int1`,`f_int2`),
UNIQUE KEY `uidx1` (`f_int2`,`f_int1`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY HASH (f_int1)
-PARTITIONS 2 */
+ PARTITION BY HASH (`f_int1`)
+PARTITIONS 2
unified filelist
t1.frm
@@ -12027,16 +12027,16 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) NOT NULL DEFAULT '0',
- `f_int2` int(11) NOT NULL DEFAULT '0',
+ `f_int1` int(11) NOT NULL DEFAULT 0,
+ `f_int2` int(11) NOT NULL DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL,
PRIMARY KEY (`f_int1`,`f_int2`),
UNIQUE KEY `uidx1` (`f_int2`,`f_int1`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY KEY (f_int1)
-PARTITIONS 5 */
+ PARTITION BY KEY (`f_int1`)
+PARTITIONS 5
unified filelist
t1.frm
@@ -12527,23 +12527,23 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) NOT NULL DEFAULT '0',
- `f_int2` int(11) NOT NULL DEFAULT '0',
+ `f_int1` int(11) NOT NULL DEFAULT 0,
+ `f_int2` int(11) NOT NULL DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL,
PRIMARY KEY (`f_int1`,`f_int2`),
UNIQUE KEY `uidx1` (`f_int2`,`f_int1`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (MOD(f_int1,4))
-(PARTITION part_3 VALUES IN (-3) ENGINE = TokuDB,
- PARTITION part_2 VALUES IN (-2) ENGINE = TokuDB,
- PARTITION part_1 VALUES IN (-1) ENGINE = TokuDB,
- PARTITION part_N VALUES IN (NULL) ENGINE = TokuDB,
- PARTITION part0 VALUES IN (0) ENGINE = TokuDB,
- PARTITION part1 VALUES IN (1) ENGINE = TokuDB,
- PARTITION part2 VALUES IN (2) ENGINE = TokuDB,
- PARTITION part3 VALUES IN (3) ENGINE = TokuDB) */
+ PARTITION BY LIST (`f_int1` % 4)
+(PARTITION `part_3` VALUES IN (-3) ENGINE = TokuDB,
+ PARTITION `part_2` VALUES IN (-2) ENGINE = TokuDB,
+ PARTITION `part_1` VALUES IN (-1) ENGINE = TokuDB,
+ PARTITION `part_N` VALUES IN (NULL) ENGINE = TokuDB,
+ PARTITION `part0` VALUES IN (0) ENGINE = TokuDB,
+ PARTITION `part1` VALUES IN (1) ENGINE = TokuDB,
+ PARTITION `part2` VALUES IN (2) ENGINE = TokuDB,
+ PARTITION `part3` VALUES IN (3) ENGINE = TokuDB)
unified filelist
t1.frm
@@ -13032,21 +13032,21 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) NOT NULL DEFAULT '0',
- `f_int2` int(11) NOT NULL DEFAULT '0',
+ `f_int1` int(11) NOT NULL DEFAULT 0,
+ `f_int2` int(11) NOT NULL DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL,
PRIMARY KEY (`f_int1`,`f_int2`),
UNIQUE KEY `uidx1` (`f_int2`,`f_int1`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (f_int1)
-(PARTITION parta VALUES LESS THAN (0) ENGINE = TokuDB,
- PARTITION partb VALUES LESS THAN (5) ENGINE = TokuDB,
- PARTITION partc VALUES LESS THAN (10) ENGINE = TokuDB,
- PARTITION partd VALUES LESS THAN (15) ENGINE = TokuDB,
- PARTITION parte VALUES LESS THAN (20) ENGINE = TokuDB,
- PARTITION partf VALUES LESS THAN (2147483646) ENGINE = TokuDB) */
+ PARTITION BY RANGE (`f_int1`)
+(PARTITION `parta` VALUES LESS THAN (0) ENGINE = TokuDB,
+ PARTITION `partb` VALUES LESS THAN (5) ENGINE = TokuDB,
+ PARTITION `partc` VALUES LESS THAN (10) ENGINE = TokuDB,
+ PARTITION `partd` VALUES LESS THAN (15) ENGINE = TokuDB,
+ PARTITION `parte` VALUES LESS THAN (20) ENGINE = TokuDB,
+ PARTITION `partf` VALUES LESS THAN (2147483646) ENGINE = TokuDB)
unified filelist
t1.frm
@@ -13531,21 +13531,21 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) NOT NULL DEFAULT '0',
- `f_int2` int(11) NOT NULL DEFAULT '0',
+ `f_int1` int(11) NOT NULL DEFAULT 0,
+ `f_int2` int(11) NOT NULL DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL,
PRIMARY KEY (`f_int1`,`f_int2`),
UNIQUE KEY `uidx1` (`f_int2`,`f_int1`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (f_int1 DIV 2)
-SUBPARTITION BY HASH (f_int1)
+ PARTITION BY RANGE (`f_int1` DIV 2)
+SUBPARTITION BY HASH (`f_int1`)
SUBPARTITIONS 2
-(PARTITION parta VALUES LESS THAN (0) ENGINE = TokuDB,
- PARTITION partb VALUES LESS THAN (5) ENGINE = TokuDB,
- PARTITION partc VALUES LESS THAN (10) ENGINE = TokuDB,
- PARTITION partd VALUES LESS THAN (2147483646) ENGINE = TokuDB) */
+(PARTITION `parta` VALUES LESS THAN (0) ENGINE = TokuDB,
+ PARTITION `partb` VALUES LESS THAN (5) ENGINE = TokuDB,
+ PARTITION `partc` VALUES LESS THAN (10) ENGINE = TokuDB,
+ PARTITION `partd` VALUES LESS THAN (2147483646) ENGINE = TokuDB)
unified filelist
t1.frm
@@ -14036,28 +14036,28 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) NOT NULL DEFAULT '0',
- `f_int2` int(11) NOT NULL DEFAULT '0',
+ `f_int1` int(11) NOT NULL DEFAULT 0,
+ `f_int2` int(11) NOT NULL DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL,
PRIMARY KEY (`f_int1`,`f_int2`),
UNIQUE KEY `uidx1` (`f_int2`,`f_int1`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (f_int1)
-SUBPARTITION BY KEY (f_int1)
-(PARTITION part1 VALUES LESS THAN (0)
- (SUBPARTITION subpart11 ENGINE = TokuDB,
- SUBPARTITION subpart12 ENGINE = TokuDB),
- PARTITION part2 VALUES LESS THAN (5)
- (SUBPARTITION subpart21 ENGINE = TokuDB,
- SUBPARTITION subpart22 ENGINE = TokuDB),
- PARTITION part3 VALUES LESS THAN (10)
- (SUBPARTITION subpart31 ENGINE = TokuDB,
- SUBPARTITION subpart32 ENGINE = TokuDB),
- PARTITION part4 VALUES LESS THAN (2147483646)
- (SUBPARTITION subpart41 ENGINE = TokuDB,
- SUBPARTITION subpart42 ENGINE = TokuDB)) */
+ PARTITION BY RANGE (`f_int1`)
+SUBPARTITION BY KEY (`f_int1`)
+(PARTITION `part1` VALUES LESS THAN (0)
+ (SUBPARTITION `subpart11` ENGINE = TokuDB,
+ SUBPARTITION `subpart12` ENGINE = TokuDB),
+ PARTITION `part2` VALUES LESS THAN (5)
+ (SUBPARTITION `subpart21` ENGINE = TokuDB,
+ SUBPARTITION `subpart22` ENGINE = TokuDB),
+ PARTITION `part3` VALUES LESS THAN (10)
+ (SUBPARTITION `subpart31` ENGINE = TokuDB,
+ SUBPARTITION `subpart32` ENGINE = TokuDB),
+ PARTITION `part4` VALUES LESS THAN (2147483646)
+ (SUBPARTITION `subpart41` ENGINE = TokuDB,
+ SUBPARTITION `subpart42` ENGINE = TokuDB))
unified filelist
t1.frm
@@ -14550,28 +14550,28 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) NOT NULL DEFAULT '0',
- `f_int2` int(11) NOT NULL DEFAULT '0',
+ `f_int1` int(11) NOT NULL DEFAULT 0,
+ `f_int2` int(11) NOT NULL DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL,
PRIMARY KEY (`f_int1`,`f_int2`),
UNIQUE KEY `uidx1` (`f_int2`,`f_int1`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (ABS(MOD(f_int1,3)))
-SUBPARTITION BY HASH (f_int1 + 1)
-(PARTITION part1 VALUES IN (0)
- (SUBPARTITION sp11 ENGINE = TokuDB,
- SUBPARTITION sp12 ENGINE = TokuDB),
- PARTITION part2 VALUES IN (1)
- (SUBPARTITION sp21 ENGINE = TokuDB,
- SUBPARTITION sp22 ENGINE = TokuDB),
- PARTITION part3 VALUES IN (2)
- (SUBPARTITION sp31 ENGINE = TokuDB,
- SUBPARTITION sp32 ENGINE = TokuDB),
- PARTITION part4 VALUES IN (NULL)
- (SUBPARTITION sp41 ENGINE = TokuDB,
- SUBPARTITION sp42 ENGINE = TokuDB)) */
+ PARTITION BY LIST (abs(`f_int1` % 3))
+SUBPARTITION BY HASH (`f_int1` + 1)
+(PARTITION `part1` VALUES IN (0)
+ (SUBPARTITION `sp11` ENGINE = TokuDB,
+ SUBPARTITION `sp12` ENGINE = TokuDB),
+ PARTITION `part2` VALUES IN (1)
+ (SUBPARTITION `sp21` ENGINE = TokuDB,
+ SUBPARTITION `sp22` ENGINE = TokuDB),
+ PARTITION `part3` VALUES IN (2)
+ (SUBPARTITION `sp31` ENGINE = TokuDB,
+ SUBPARTITION `sp32` ENGINE = TokuDB),
+ PARTITION `part4` VALUES IN (NULL)
+ (SUBPARTITION `sp41` ENGINE = TokuDB,
+ SUBPARTITION `sp42` ENGINE = TokuDB))
unified filelist
t1.frm
@@ -15058,20 +15058,20 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) NOT NULL DEFAULT '0',
- `f_int2` int(11) NOT NULL DEFAULT '0',
+ `f_int1` int(11) NOT NULL DEFAULT 0,
+ `f_int2` int(11) NOT NULL DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL,
PRIMARY KEY (`f_int1`,`f_int2`),
UNIQUE KEY `uidx1` (`f_int2`,`f_int1`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (ABS(MOD(f_int1,2)))
-SUBPARTITION BY KEY (f_int1)
+ PARTITION BY LIST (abs(`f_int1` % 2))
+SUBPARTITION BY KEY (`f_int1`)
SUBPARTITIONS 3
-(PARTITION part1 VALUES IN (0) ENGINE = TokuDB,
- PARTITION part2 VALUES IN (1) ENGINE = TokuDB,
- PARTITION part3 VALUES IN (NULL) ENGINE = TokuDB) */
+(PARTITION `part1` VALUES IN (0) ENGINE = TokuDB,
+ PARTITION `part2` VALUES IN (1) ENGINE = TokuDB,
+ PARTITION `part3` VALUES IN (NULL) ENGINE = TokuDB)
unified filelist
t1.frm
@@ -15555,16 +15555,16 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL,
UNIQUE KEY `uidx1` (`f_int1`,`f_int2`),
UNIQUE KEY `uidx2` (`f_int2`,`f_int1`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY HASH (f_int1)
-PARTITIONS 2 */
+ PARTITION BY HASH (`f_int1`)
+PARTITIONS 2
unified filelist
t1.frm
@@ -16063,16 +16063,16 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL,
UNIQUE KEY `uidx1` (`f_int1`,`f_int2`),
UNIQUE KEY `uidx2` (`f_int2`,`f_int1`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY KEY (f_int1)
-PARTITIONS 5 */
+ PARTITION BY KEY (`f_int1`)
+PARTITIONS 5
unified filelist
t1.frm
@@ -16579,23 +16579,23 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL,
UNIQUE KEY `uidx1` (`f_int1`,`f_int2`),
UNIQUE KEY `uidx2` (`f_int2`,`f_int1`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (MOD(f_int1,4))
-(PARTITION part_3 VALUES IN (-3) ENGINE = TokuDB,
- PARTITION part_2 VALUES IN (-2) ENGINE = TokuDB,
- PARTITION part_1 VALUES IN (-1) ENGINE = TokuDB,
- PARTITION part_N VALUES IN (NULL) ENGINE = TokuDB,
- PARTITION part0 VALUES IN (0) ENGINE = TokuDB,
- PARTITION part1 VALUES IN (1) ENGINE = TokuDB,
- PARTITION part2 VALUES IN (2) ENGINE = TokuDB,
- PARTITION part3 VALUES IN (3) ENGINE = TokuDB) */
+ PARTITION BY LIST (`f_int1` % 4)
+(PARTITION `part_3` VALUES IN (-3) ENGINE = TokuDB,
+ PARTITION `part_2` VALUES IN (-2) ENGINE = TokuDB,
+ PARTITION `part_1` VALUES IN (-1) ENGINE = TokuDB,
+ PARTITION `part_N` VALUES IN (NULL) ENGINE = TokuDB,
+ PARTITION `part0` VALUES IN (0) ENGINE = TokuDB,
+ PARTITION `part1` VALUES IN (1) ENGINE = TokuDB,
+ PARTITION `part2` VALUES IN (2) ENGINE = TokuDB,
+ PARTITION `part3` VALUES IN (3) ENGINE = TokuDB)
unified filelist
t1.frm
@@ -17100,21 +17100,21 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL,
UNIQUE KEY `uidx1` (`f_int1`,`f_int2`),
UNIQUE KEY `uidx2` (`f_int2`,`f_int1`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (f_int1)
-(PARTITION parta VALUES LESS THAN (0) ENGINE = TokuDB,
- PARTITION partb VALUES LESS THAN (5) ENGINE = TokuDB,
- PARTITION partc VALUES LESS THAN (10) ENGINE = TokuDB,
- PARTITION partd VALUES LESS THAN (15) ENGINE = TokuDB,
- PARTITION parte VALUES LESS THAN (20) ENGINE = TokuDB,
- PARTITION partf VALUES LESS THAN (2147483646) ENGINE = TokuDB) */
+ PARTITION BY RANGE (`f_int1`)
+(PARTITION `parta` VALUES LESS THAN (0) ENGINE = TokuDB,
+ PARTITION `partb` VALUES LESS THAN (5) ENGINE = TokuDB,
+ PARTITION `partc` VALUES LESS THAN (10) ENGINE = TokuDB,
+ PARTITION `partd` VALUES LESS THAN (15) ENGINE = TokuDB,
+ PARTITION `parte` VALUES LESS THAN (20) ENGINE = TokuDB,
+ PARTITION `partf` VALUES LESS THAN (2147483646) ENGINE = TokuDB)
unified filelist
t1.frm
@@ -17615,21 +17615,21 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL,
UNIQUE KEY `uidx1` (`f_int1`,`f_int2`),
UNIQUE KEY `uidx2` (`f_int2`,`f_int1`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (f_int1 DIV 2)
-SUBPARTITION BY HASH (f_int1)
+ PARTITION BY RANGE (`f_int1` DIV 2)
+SUBPARTITION BY HASH (`f_int1`)
SUBPARTITIONS 2
-(PARTITION parta VALUES LESS THAN (0) ENGINE = TokuDB,
- PARTITION partb VALUES LESS THAN (5) ENGINE = TokuDB,
- PARTITION partc VALUES LESS THAN (10) ENGINE = TokuDB,
- PARTITION partd VALUES LESS THAN (2147483646) ENGINE = TokuDB) */
+(PARTITION `parta` VALUES LESS THAN (0) ENGINE = TokuDB,
+ PARTITION `partb` VALUES LESS THAN (5) ENGINE = TokuDB,
+ PARTITION `partc` VALUES LESS THAN (10) ENGINE = TokuDB,
+ PARTITION `partd` VALUES LESS THAN (2147483646) ENGINE = TokuDB)
unified filelist
t1.frm
@@ -18136,28 +18136,28 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL,
UNIQUE KEY `uidx1` (`f_int1`,`f_int2`),
UNIQUE KEY `uidx2` (`f_int2`,`f_int1`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (f_int1)
-SUBPARTITION BY KEY (f_int1)
-(PARTITION part1 VALUES LESS THAN (0)
- (SUBPARTITION subpart11 ENGINE = TokuDB,
- SUBPARTITION subpart12 ENGINE = TokuDB),
- PARTITION part2 VALUES LESS THAN (5)
- (SUBPARTITION subpart21 ENGINE = TokuDB,
- SUBPARTITION subpart22 ENGINE = TokuDB),
- PARTITION part3 VALUES LESS THAN (10)
- (SUBPARTITION subpart31 ENGINE = TokuDB,
- SUBPARTITION subpart32 ENGINE = TokuDB),
- PARTITION part4 VALUES LESS THAN (2147483646)
- (SUBPARTITION subpart41 ENGINE = TokuDB,
- SUBPARTITION subpart42 ENGINE = TokuDB)) */
+ PARTITION BY RANGE (`f_int1`)
+SUBPARTITION BY KEY (`f_int1`)
+(PARTITION `part1` VALUES LESS THAN (0)
+ (SUBPARTITION `subpart11` ENGINE = TokuDB,
+ SUBPARTITION `subpart12` ENGINE = TokuDB),
+ PARTITION `part2` VALUES LESS THAN (5)
+ (SUBPARTITION `subpart21` ENGINE = TokuDB,
+ SUBPARTITION `subpart22` ENGINE = TokuDB),
+ PARTITION `part3` VALUES LESS THAN (10)
+ (SUBPARTITION `subpart31` ENGINE = TokuDB,
+ SUBPARTITION `subpart32` ENGINE = TokuDB),
+ PARTITION `part4` VALUES LESS THAN (2147483646)
+ (SUBPARTITION `subpart41` ENGINE = TokuDB,
+ SUBPARTITION `subpart42` ENGINE = TokuDB))
unified filelist
t1.frm
@@ -18666,28 +18666,28 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL,
UNIQUE KEY `uidx1` (`f_int1`,`f_int2`),
UNIQUE KEY `uidx2` (`f_int2`,`f_int1`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (ABS(MOD(f_int1,3)))
-SUBPARTITION BY HASH (f_int1 + 1)
-(PARTITION part1 VALUES IN (0)
- (SUBPARTITION sp11 ENGINE = TokuDB,
- SUBPARTITION sp12 ENGINE = TokuDB),
- PARTITION part2 VALUES IN (1)
- (SUBPARTITION sp21 ENGINE = TokuDB,
- SUBPARTITION sp22 ENGINE = TokuDB),
- PARTITION part3 VALUES IN (2)
- (SUBPARTITION sp31 ENGINE = TokuDB,
- SUBPARTITION sp32 ENGINE = TokuDB),
- PARTITION part4 VALUES IN (NULL)
- (SUBPARTITION sp41 ENGINE = TokuDB,
- SUBPARTITION sp42 ENGINE = TokuDB)) */
+ PARTITION BY LIST (abs(`f_int1` % 3))
+SUBPARTITION BY HASH (`f_int1` + 1)
+(PARTITION `part1` VALUES IN (0)
+ (SUBPARTITION `sp11` ENGINE = TokuDB,
+ SUBPARTITION `sp12` ENGINE = TokuDB),
+ PARTITION `part2` VALUES IN (1)
+ (SUBPARTITION `sp21` ENGINE = TokuDB,
+ SUBPARTITION `sp22` ENGINE = TokuDB),
+ PARTITION `part3` VALUES IN (2)
+ (SUBPARTITION `sp31` ENGINE = TokuDB,
+ SUBPARTITION `sp32` ENGINE = TokuDB),
+ PARTITION `part4` VALUES IN (NULL)
+ (SUBPARTITION `sp41` ENGINE = TokuDB,
+ SUBPARTITION `sp42` ENGINE = TokuDB))
unified filelist
t1.frm
@@ -19190,20 +19190,20 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL,
UNIQUE KEY `uidx1` (`f_int1`,`f_int2`),
UNIQUE KEY `uidx2` (`f_int2`,`f_int1`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (ABS(MOD(f_int1,2)))
-SUBPARTITION BY KEY (f_int1)
+ PARTITION BY LIST (abs(`f_int1` % 2))
+SUBPARTITION BY KEY (`f_int1`)
SUBPARTITIONS 3
-(PARTITION part1 VALUES IN (0) ENGINE = TokuDB,
- PARTITION part2 VALUES IN (1) ENGINE = TokuDB,
- PARTITION part3 VALUES IN (NULL) ENGINE = TokuDB) */
+(PARTITION `part1` VALUES IN (0) ENGINE = TokuDB,
+ PARTITION `part2` VALUES IN (1) ENGINE = TokuDB,
+ PARTITION `part3` VALUES IN (NULL) ENGINE = TokuDB)
unified filelist
t1.frm
@@ -19708,16 +19708,16 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) NOT NULL DEFAULT '0',
- `f_int2` int(11) NOT NULL DEFAULT '0',
+ `f_int1` int(11) NOT NULL DEFAULT 0,
+ `f_int2` int(11) NOT NULL DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL,
PRIMARY KEY (`f_int2`,`f_int1`),
UNIQUE KEY `uidx1` (`f_int1`,`f_int2`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY HASH (f_int1 + f_int2)
-PARTITIONS 2 */
+ PARTITION BY HASH (`f_int1` + `f_int2`)
+PARTITIONS 2
unified filelist
t1.frm
@@ -20200,16 +20200,16 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) NOT NULL DEFAULT '0',
- `f_int2` int(11) NOT NULL DEFAULT '0',
+ `f_int1` int(11) NOT NULL DEFAULT 0,
+ `f_int2` int(11) NOT NULL DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL,
PRIMARY KEY (`f_int2`,`f_int1`),
UNIQUE KEY `uidx1` (`f_int1`,`f_int2`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY KEY (f_int1,f_int2)
-PARTITIONS 5 */
+ PARTITION BY KEY (`f_int1`,`f_int2`)
+PARTITIONS 5
unified filelist
t1.frm
@@ -20700,23 +20700,23 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) NOT NULL DEFAULT '0',
- `f_int2` int(11) NOT NULL DEFAULT '0',
+ `f_int1` int(11) NOT NULL DEFAULT 0,
+ `f_int2` int(11) NOT NULL DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL,
PRIMARY KEY (`f_int2`,`f_int1`),
UNIQUE KEY `uidx1` (`f_int1`,`f_int2`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (MOD(f_int1 + f_int2,4))
-(PARTITION part_3 VALUES IN (-3) ENGINE = TokuDB,
- PARTITION part_2 VALUES IN (-2) ENGINE = TokuDB,
- PARTITION part_1 VALUES IN (-1) ENGINE = TokuDB,
- PARTITION part_N VALUES IN (NULL) ENGINE = TokuDB,
- PARTITION part0 VALUES IN (0) ENGINE = TokuDB,
- PARTITION part1 VALUES IN (1) ENGINE = TokuDB,
- PARTITION part2 VALUES IN (2) ENGINE = TokuDB,
- PARTITION part3 VALUES IN (3) ENGINE = TokuDB) */
+ PARTITION BY LIST ((`f_int1` + `f_int2`) % 4)
+(PARTITION `part_3` VALUES IN (-3) ENGINE = TokuDB,
+ PARTITION `part_2` VALUES IN (-2) ENGINE = TokuDB,
+ PARTITION `part_1` VALUES IN (-1) ENGINE = TokuDB,
+ PARTITION `part_N` VALUES IN (NULL) ENGINE = TokuDB,
+ PARTITION `part0` VALUES IN (0) ENGINE = TokuDB,
+ PARTITION `part1` VALUES IN (1) ENGINE = TokuDB,
+ PARTITION `part2` VALUES IN (2) ENGINE = TokuDB,
+ PARTITION `part3` VALUES IN (3) ENGINE = TokuDB)
unified filelist
t1.frm
@@ -21205,21 +21205,21 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) NOT NULL DEFAULT '0',
- `f_int2` int(11) NOT NULL DEFAULT '0',
+ `f_int1` int(11) NOT NULL DEFAULT 0,
+ `f_int2` int(11) NOT NULL DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL,
PRIMARY KEY (`f_int2`,`f_int1`),
UNIQUE KEY `uidx1` (`f_int1`,`f_int2`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE ((f_int1 + f_int2) DIV 2)
-(PARTITION parta VALUES LESS THAN (0) ENGINE = TokuDB,
- PARTITION partb VALUES LESS THAN (5) ENGINE = TokuDB,
- PARTITION partc VALUES LESS THAN (10) ENGINE = TokuDB,
- PARTITION partd VALUES LESS THAN (15) ENGINE = TokuDB,
- PARTITION parte VALUES LESS THAN (20) ENGINE = TokuDB,
- PARTITION partf VALUES LESS THAN (2147483646) ENGINE = TokuDB) */
+ PARTITION BY RANGE ((`f_int1` + `f_int2`) DIV 2)
+(PARTITION `parta` VALUES LESS THAN (0) ENGINE = TokuDB,
+ PARTITION `partb` VALUES LESS THAN (5) ENGINE = TokuDB,
+ PARTITION `partc` VALUES LESS THAN (10) ENGINE = TokuDB,
+ PARTITION `partd` VALUES LESS THAN (15) ENGINE = TokuDB,
+ PARTITION `parte` VALUES LESS THAN (20) ENGINE = TokuDB,
+ PARTITION `partf` VALUES LESS THAN (2147483646) ENGINE = TokuDB)
unified filelist
t1.frm
@@ -21704,21 +21704,21 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) NOT NULL DEFAULT '0',
- `f_int2` int(11) NOT NULL DEFAULT '0',
+ `f_int1` int(11) NOT NULL DEFAULT 0,
+ `f_int2` int(11) NOT NULL DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL,
PRIMARY KEY (`f_int2`,`f_int1`),
UNIQUE KEY `uidx1` (`f_int1`,`f_int2`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (f_int1)
-SUBPARTITION BY HASH (f_int2)
+ PARTITION BY RANGE (`f_int1`)
+SUBPARTITION BY HASH (`f_int2`)
SUBPARTITIONS 2
-(PARTITION parta VALUES LESS THAN (0) ENGINE = TokuDB,
- PARTITION partb VALUES LESS THAN (5) ENGINE = TokuDB,
- PARTITION partc VALUES LESS THAN (10) ENGINE = TokuDB,
- PARTITION partd VALUES LESS THAN (2147483646) ENGINE = TokuDB) */
+(PARTITION `parta` VALUES LESS THAN (0) ENGINE = TokuDB,
+ PARTITION `partb` VALUES LESS THAN (5) ENGINE = TokuDB,
+ PARTITION `partc` VALUES LESS THAN (10) ENGINE = TokuDB,
+ PARTITION `partd` VALUES LESS THAN (2147483646) ENGINE = TokuDB)
unified filelist
t1.frm
@@ -22207,28 +22207,28 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) NOT NULL DEFAULT '0',
- `f_int2` int(11) NOT NULL DEFAULT '0',
+ `f_int1` int(11) NOT NULL DEFAULT 0,
+ `f_int2` int(11) NOT NULL DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL,
PRIMARY KEY (`f_int2`,`f_int1`),
UNIQUE KEY `uidx1` (`f_int1`,`f_int2`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (f_int1)
-SUBPARTITION BY KEY (f_int2)
-(PARTITION part1 VALUES LESS THAN (0)
- (SUBPARTITION subpart11 ENGINE = TokuDB,
- SUBPARTITION subpart12 ENGINE = TokuDB),
- PARTITION part2 VALUES LESS THAN (5)
- (SUBPARTITION subpart21 ENGINE = TokuDB,
- SUBPARTITION subpart22 ENGINE = TokuDB),
- PARTITION part3 VALUES LESS THAN (10)
- (SUBPARTITION subpart31 ENGINE = TokuDB,
- SUBPARTITION subpart32 ENGINE = TokuDB),
- PARTITION part4 VALUES LESS THAN (2147483646)
- (SUBPARTITION subpart41 ENGINE = TokuDB,
- SUBPARTITION subpart42 ENGINE = TokuDB)) */
+ PARTITION BY RANGE (`f_int1`)
+SUBPARTITION BY KEY (`f_int2`)
+(PARTITION `part1` VALUES LESS THAN (0)
+ (SUBPARTITION `subpart11` ENGINE = TokuDB,
+ SUBPARTITION `subpart12` ENGINE = TokuDB),
+ PARTITION `part2` VALUES LESS THAN (5)
+ (SUBPARTITION `subpart21` ENGINE = TokuDB,
+ SUBPARTITION `subpart22` ENGINE = TokuDB),
+ PARTITION `part3` VALUES LESS THAN (10)
+ (SUBPARTITION `subpart31` ENGINE = TokuDB,
+ SUBPARTITION `subpart32` ENGINE = TokuDB),
+ PARTITION `part4` VALUES LESS THAN (2147483646)
+ (SUBPARTITION `subpart41` ENGINE = TokuDB,
+ SUBPARTITION `subpart42` ENGINE = TokuDB))
unified filelist
t1.frm
@@ -22717,28 +22717,28 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) NOT NULL DEFAULT '0',
- `f_int2` int(11) NOT NULL DEFAULT '0',
+ `f_int1` int(11) NOT NULL DEFAULT 0,
+ `f_int2` int(11) NOT NULL DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL,
PRIMARY KEY (`f_int2`,`f_int1`),
UNIQUE KEY `uidx1` (`f_int1`,`f_int2`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (ABS(MOD(f_int1,3)))
-SUBPARTITION BY HASH (f_int2 + 1)
-(PARTITION part1 VALUES IN (0)
- (SUBPARTITION sp11 ENGINE = TokuDB,
- SUBPARTITION sp12 ENGINE = TokuDB),
- PARTITION part2 VALUES IN (1)
- (SUBPARTITION sp21 ENGINE = TokuDB,
- SUBPARTITION sp22 ENGINE = TokuDB),
- PARTITION part3 VALUES IN (2)
- (SUBPARTITION sp31 ENGINE = TokuDB,
- SUBPARTITION sp32 ENGINE = TokuDB),
- PARTITION part4 VALUES IN (NULL)
- (SUBPARTITION sp41 ENGINE = TokuDB,
- SUBPARTITION sp42 ENGINE = TokuDB)) */
+ PARTITION BY LIST (abs(`f_int1` % 3))
+SUBPARTITION BY HASH (`f_int2` + 1)
+(PARTITION `part1` VALUES IN (0)
+ (SUBPARTITION `sp11` ENGINE = TokuDB,
+ SUBPARTITION `sp12` ENGINE = TokuDB),
+ PARTITION `part2` VALUES IN (1)
+ (SUBPARTITION `sp21` ENGINE = TokuDB,
+ SUBPARTITION `sp22` ENGINE = TokuDB),
+ PARTITION `part3` VALUES IN (2)
+ (SUBPARTITION `sp31` ENGINE = TokuDB,
+ SUBPARTITION `sp32` ENGINE = TokuDB),
+ PARTITION `part4` VALUES IN (NULL)
+ (SUBPARTITION `sp41` ENGINE = TokuDB,
+ SUBPARTITION `sp42` ENGINE = TokuDB))
unified filelist
t1.frm
@@ -23225,20 +23225,20 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) NOT NULL DEFAULT '0',
- `f_int2` int(11) NOT NULL DEFAULT '0',
+ `f_int1` int(11) NOT NULL DEFAULT 0,
+ `f_int2` int(11) NOT NULL DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL,
PRIMARY KEY (`f_int2`,`f_int1`),
UNIQUE KEY `uidx1` (`f_int1`,`f_int2`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (ABS(MOD(f_int1,2)))
-SUBPARTITION BY KEY (f_int2)
+ PARTITION BY LIST (abs(`f_int1` % 2))
+SUBPARTITION BY KEY (`f_int2`)
SUBPARTITIONS 3
-(PARTITION part1 VALUES IN (0) ENGINE = TokuDB,
- PARTITION part2 VALUES IN (1) ENGINE = TokuDB,
- PARTITION part3 VALUES IN (NULL) ENGINE = TokuDB) */
+(PARTITION `part1` VALUES IN (0) ENGINE = TokuDB,
+ PARTITION `part2` VALUES IN (1) ENGINE = TokuDB,
+ PARTITION `part3` VALUES IN (NULL) ENGINE = TokuDB)
unified filelist
t1.frm
@@ -23722,16 +23722,16 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) NOT NULL DEFAULT '0',
- `f_int2` int(11) NOT NULL DEFAULT '0',
+ `f_int1` int(11) NOT NULL DEFAULT 0,
+ `f_int2` int(11) NOT NULL DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL,
PRIMARY KEY (`f_int1`,`f_int2`),
UNIQUE KEY `uidx1` (`f_int2`,`f_int1`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY HASH (f_int1 + f_int2)
-PARTITIONS 2 */
+ PARTITION BY HASH (`f_int1` + `f_int2`)
+PARTITIONS 2
unified filelist
t1.frm
@@ -24214,16 +24214,16 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) NOT NULL DEFAULT '0',
- `f_int2` int(11) NOT NULL DEFAULT '0',
+ `f_int1` int(11) NOT NULL DEFAULT 0,
+ `f_int2` int(11) NOT NULL DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL,
PRIMARY KEY (`f_int1`,`f_int2`),
UNIQUE KEY `uidx1` (`f_int2`,`f_int1`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY KEY (f_int1,f_int2)
-PARTITIONS 5 */
+ PARTITION BY KEY (`f_int1`,`f_int2`)
+PARTITIONS 5
unified filelist
t1.frm
@@ -24714,23 +24714,23 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) NOT NULL DEFAULT '0',
- `f_int2` int(11) NOT NULL DEFAULT '0',
+ `f_int1` int(11) NOT NULL DEFAULT 0,
+ `f_int2` int(11) NOT NULL DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL,
PRIMARY KEY (`f_int1`,`f_int2`),
UNIQUE KEY `uidx1` (`f_int2`,`f_int1`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (MOD(f_int1 + f_int2,4))
-(PARTITION part_3 VALUES IN (-3) ENGINE = TokuDB,
- PARTITION part_2 VALUES IN (-2) ENGINE = TokuDB,
- PARTITION part_1 VALUES IN (-1) ENGINE = TokuDB,
- PARTITION part_N VALUES IN (NULL) ENGINE = TokuDB,
- PARTITION part0 VALUES IN (0) ENGINE = TokuDB,
- PARTITION part1 VALUES IN (1) ENGINE = TokuDB,
- PARTITION part2 VALUES IN (2) ENGINE = TokuDB,
- PARTITION part3 VALUES IN (3) ENGINE = TokuDB) */
+ PARTITION BY LIST ((`f_int1` + `f_int2`) % 4)
+(PARTITION `part_3` VALUES IN (-3) ENGINE = TokuDB,
+ PARTITION `part_2` VALUES IN (-2) ENGINE = TokuDB,
+ PARTITION `part_1` VALUES IN (-1) ENGINE = TokuDB,
+ PARTITION `part_N` VALUES IN (NULL) ENGINE = TokuDB,
+ PARTITION `part0` VALUES IN (0) ENGINE = TokuDB,
+ PARTITION `part1` VALUES IN (1) ENGINE = TokuDB,
+ PARTITION `part2` VALUES IN (2) ENGINE = TokuDB,
+ PARTITION `part3` VALUES IN (3) ENGINE = TokuDB)
unified filelist
t1.frm
@@ -25219,21 +25219,21 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) NOT NULL DEFAULT '0',
- `f_int2` int(11) NOT NULL DEFAULT '0',
+ `f_int1` int(11) NOT NULL DEFAULT 0,
+ `f_int2` int(11) NOT NULL DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL,
PRIMARY KEY (`f_int1`,`f_int2`),
UNIQUE KEY `uidx1` (`f_int2`,`f_int1`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE ((f_int1 + f_int2) DIV 2)
-(PARTITION parta VALUES LESS THAN (0) ENGINE = TokuDB,
- PARTITION partb VALUES LESS THAN (5) ENGINE = TokuDB,
- PARTITION partc VALUES LESS THAN (10) ENGINE = TokuDB,
- PARTITION partd VALUES LESS THAN (15) ENGINE = TokuDB,
- PARTITION parte VALUES LESS THAN (20) ENGINE = TokuDB,
- PARTITION partf VALUES LESS THAN (2147483646) ENGINE = TokuDB) */
+ PARTITION BY RANGE ((`f_int1` + `f_int2`) DIV 2)
+(PARTITION `parta` VALUES LESS THAN (0) ENGINE = TokuDB,
+ PARTITION `partb` VALUES LESS THAN (5) ENGINE = TokuDB,
+ PARTITION `partc` VALUES LESS THAN (10) ENGINE = TokuDB,
+ PARTITION `partd` VALUES LESS THAN (15) ENGINE = TokuDB,
+ PARTITION `parte` VALUES LESS THAN (20) ENGINE = TokuDB,
+ PARTITION `partf` VALUES LESS THAN (2147483646) ENGINE = TokuDB)
unified filelist
t1.frm
@@ -25718,21 +25718,21 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) NOT NULL DEFAULT '0',
- `f_int2` int(11) NOT NULL DEFAULT '0',
+ `f_int1` int(11) NOT NULL DEFAULT 0,
+ `f_int2` int(11) NOT NULL DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL,
PRIMARY KEY (`f_int1`,`f_int2`),
UNIQUE KEY `uidx1` (`f_int2`,`f_int1`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (f_int1)
-SUBPARTITION BY HASH (f_int2)
+ PARTITION BY RANGE (`f_int1`)
+SUBPARTITION BY HASH (`f_int2`)
SUBPARTITIONS 2
-(PARTITION parta VALUES LESS THAN (0) ENGINE = TokuDB,
- PARTITION partb VALUES LESS THAN (5) ENGINE = TokuDB,
- PARTITION partc VALUES LESS THAN (10) ENGINE = TokuDB,
- PARTITION partd VALUES LESS THAN (2147483646) ENGINE = TokuDB) */
+(PARTITION `parta` VALUES LESS THAN (0) ENGINE = TokuDB,
+ PARTITION `partb` VALUES LESS THAN (5) ENGINE = TokuDB,
+ PARTITION `partc` VALUES LESS THAN (10) ENGINE = TokuDB,
+ PARTITION `partd` VALUES LESS THAN (2147483646) ENGINE = TokuDB)
unified filelist
t1.frm
@@ -26221,28 +26221,28 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) NOT NULL DEFAULT '0',
- `f_int2` int(11) NOT NULL DEFAULT '0',
+ `f_int1` int(11) NOT NULL DEFAULT 0,
+ `f_int2` int(11) NOT NULL DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL,
PRIMARY KEY (`f_int1`,`f_int2`),
UNIQUE KEY `uidx1` (`f_int2`,`f_int1`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (f_int1)
-SUBPARTITION BY KEY (f_int2)
-(PARTITION part1 VALUES LESS THAN (0)
- (SUBPARTITION subpart11 ENGINE = TokuDB,
- SUBPARTITION subpart12 ENGINE = TokuDB),
- PARTITION part2 VALUES LESS THAN (5)
- (SUBPARTITION subpart21 ENGINE = TokuDB,
- SUBPARTITION subpart22 ENGINE = TokuDB),
- PARTITION part3 VALUES LESS THAN (10)
- (SUBPARTITION subpart31 ENGINE = TokuDB,
- SUBPARTITION subpart32 ENGINE = TokuDB),
- PARTITION part4 VALUES LESS THAN (2147483646)
- (SUBPARTITION subpart41 ENGINE = TokuDB,
- SUBPARTITION subpart42 ENGINE = TokuDB)) */
+ PARTITION BY RANGE (`f_int1`)
+SUBPARTITION BY KEY (`f_int2`)
+(PARTITION `part1` VALUES LESS THAN (0)
+ (SUBPARTITION `subpart11` ENGINE = TokuDB,
+ SUBPARTITION `subpart12` ENGINE = TokuDB),
+ PARTITION `part2` VALUES LESS THAN (5)
+ (SUBPARTITION `subpart21` ENGINE = TokuDB,
+ SUBPARTITION `subpart22` ENGINE = TokuDB),
+ PARTITION `part3` VALUES LESS THAN (10)
+ (SUBPARTITION `subpart31` ENGINE = TokuDB,
+ SUBPARTITION `subpart32` ENGINE = TokuDB),
+ PARTITION `part4` VALUES LESS THAN (2147483646)
+ (SUBPARTITION `subpart41` ENGINE = TokuDB,
+ SUBPARTITION `subpart42` ENGINE = TokuDB))
unified filelist
t1.frm
@@ -26731,28 +26731,28 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) NOT NULL DEFAULT '0',
- `f_int2` int(11) NOT NULL DEFAULT '0',
+ `f_int1` int(11) NOT NULL DEFAULT 0,
+ `f_int2` int(11) NOT NULL DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL,
PRIMARY KEY (`f_int1`,`f_int2`),
UNIQUE KEY `uidx1` (`f_int2`,`f_int1`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (ABS(MOD(f_int1,3)))
-SUBPARTITION BY HASH (f_int2 + 1)
-(PARTITION part1 VALUES IN (0)
- (SUBPARTITION sp11 ENGINE = TokuDB,
- SUBPARTITION sp12 ENGINE = TokuDB),
- PARTITION part2 VALUES IN (1)
- (SUBPARTITION sp21 ENGINE = TokuDB,
- SUBPARTITION sp22 ENGINE = TokuDB),
- PARTITION part3 VALUES IN (2)
- (SUBPARTITION sp31 ENGINE = TokuDB,
- SUBPARTITION sp32 ENGINE = TokuDB),
- PARTITION part4 VALUES IN (NULL)
- (SUBPARTITION sp41 ENGINE = TokuDB,
- SUBPARTITION sp42 ENGINE = TokuDB)) */
+ PARTITION BY LIST (abs(`f_int1` % 3))
+SUBPARTITION BY HASH (`f_int2` + 1)
+(PARTITION `part1` VALUES IN (0)
+ (SUBPARTITION `sp11` ENGINE = TokuDB,
+ SUBPARTITION `sp12` ENGINE = TokuDB),
+ PARTITION `part2` VALUES IN (1)
+ (SUBPARTITION `sp21` ENGINE = TokuDB,
+ SUBPARTITION `sp22` ENGINE = TokuDB),
+ PARTITION `part3` VALUES IN (2)
+ (SUBPARTITION `sp31` ENGINE = TokuDB,
+ SUBPARTITION `sp32` ENGINE = TokuDB),
+ PARTITION `part4` VALUES IN (NULL)
+ (SUBPARTITION `sp41` ENGINE = TokuDB,
+ SUBPARTITION `sp42` ENGINE = TokuDB))
unified filelist
t1.frm
@@ -27239,20 +27239,20 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) NOT NULL DEFAULT '0',
- `f_int2` int(11) NOT NULL DEFAULT '0',
+ `f_int1` int(11) NOT NULL DEFAULT 0,
+ `f_int2` int(11) NOT NULL DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL,
PRIMARY KEY (`f_int1`,`f_int2`),
UNIQUE KEY `uidx1` (`f_int2`,`f_int1`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (ABS(MOD(f_int1,2)))
-SUBPARTITION BY KEY (f_int2)
+ PARTITION BY LIST (abs(`f_int1` % 2))
+SUBPARTITION BY KEY (`f_int2`)
SUBPARTITIONS 3
-(PARTITION part1 VALUES IN (0) ENGINE = TokuDB,
- PARTITION part2 VALUES IN (1) ENGINE = TokuDB,
- PARTITION part3 VALUES IN (NULL) ENGINE = TokuDB) */
+(PARTITION `part1` VALUES IN (0) ENGINE = TokuDB,
+ PARTITION `part2` VALUES IN (1) ENGINE = TokuDB,
+ PARTITION `part3` VALUES IN (NULL) ENGINE = TokuDB)
unified filelist
t1.frm
@@ -27736,16 +27736,16 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL,
UNIQUE KEY `uidx1` (`f_int1`,`f_int2`),
UNIQUE KEY `uidx2` (`f_int2`,`f_int1`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY HASH (f_int1 + f_int2)
-PARTITIONS 2 */
+ PARTITION BY HASH (`f_int1` + `f_int2`)
+PARTITIONS 2
unified filelist
t1.frm
@@ -28244,16 +28244,16 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL,
UNIQUE KEY `uidx1` (`f_int1`,`f_int2`),
UNIQUE KEY `uidx2` (`f_int2`,`f_int1`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY KEY (f_int1,f_int2)
-PARTITIONS 5 */
+ PARTITION BY KEY (`f_int1`,`f_int2`)
+PARTITIONS 5
unified filelist
t1.frm
@@ -28760,23 +28760,23 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL,
UNIQUE KEY `uidx1` (`f_int1`,`f_int2`),
UNIQUE KEY `uidx2` (`f_int2`,`f_int1`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (MOD(f_int1 + f_int2,4))
-(PARTITION part_3 VALUES IN (-3) ENGINE = TokuDB,
- PARTITION part_2 VALUES IN (-2) ENGINE = TokuDB,
- PARTITION part_1 VALUES IN (-1) ENGINE = TokuDB,
- PARTITION part_N VALUES IN (NULL) ENGINE = TokuDB,
- PARTITION part0 VALUES IN (0) ENGINE = TokuDB,
- PARTITION part1 VALUES IN (1) ENGINE = TokuDB,
- PARTITION part2 VALUES IN (2) ENGINE = TokuDB,
- PARTITION part3 VALUES IN (3) ENGINE = TokuDB) */
+ PARTITION BY LIST ((`f_int1` + `f_int2`) % 4)
+(PARTITION `part_3` VALUES IN (-3) ENGINE = TokuDB,
+ PARTITION `part_2` VALUES IN (-2) ENGINE = TokuDB,
+ PARTITION `part_1` VALUES IN (-1) ENGINE = TokuDB,
+ PARTITION `part_N` VALUES IN (NULL) ENGINE = TokuDB,
+ PARTITION `part0` VALUES IN (0) ENGINE = TokuDB,
+ PARTITION `part1` VALUES IN (1) ENGINE = TokuDB,
+ PARTITION `part2` VALUES IN (2) ENGINE = TokuDB,
+ PARTITION `part3` VALUES IN (3) ENGINE = TokuDB)
unified filelist
t1.frm
@@ -29281,21 +29281,21 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL,
UNIQUE KEY `uidx1` (`f_int1`,`f_int2`),
UNIQUE KEY `uidx2` (`f_int2`,`f_int1`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE ((f_int1 + f_int2) DIV 2)
-(PARTITION parta VALUES LESS THAN (0) ENGINE = TokuDB,
- PARTITION partb VALUES LESS THAN (5) ENGINE = TokuDB,
- PARTITION partc VALUES LESS THAN (10) ENGINE = TokuDB,
- PARTITION partd VALUES LESS THAN (15) ENGINE = TokuDB,
- PARTITION parte VALUES LESS THAN (20) ENGINE = TokuDB,
- PARTITION partf VALUES LESS THAN (2147483646) ENGINE = TokuDB) */
+ PARTITION BY RANGE ((`f_int1` + `f_int2`) DIV 2)
+(PARTITION `parta` VALUES LESS THAN (0) ENGINE = TokuDB,
+ PARTITION `partb` VALUES LESS THAN (5) ENGINE = TokuDB,
+ PARTITION `partc` VALUES LESS THAN (10) ENGINE = TokuDB,
+ PARTITION `partd` VALUES LESS THAN (15) ENGINE = TokuDB,
+ PARTITION `parte` VALUES LESS THAN (20) ENGINE = TokuDB,
+ PARTITION `partf` VALUES LESS THAN (2147483646) ENGINE = TokuDB)
unified filelist
t1.frm
@@ -29796,21 +29796,21 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL,
UNIQUE KEY `uidx1` (`f_int1`,`f_int2`),
UNIQUE KEY `uidx2` (`f_int2`,`f_int1`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (f_int1)
-SUBPARTITION BY HASH (f_int2)
+ PARTITION BY RANGE (`f_int1`)
+SUBPARTITION BY HASH (`f_int2`)
SUBPARTITIONS 2
-(PARTITION parta VALUES LESS THAN (0) ENGINE = TokuDB,
- PARTITION partb VALUES LESS THAN (5) ENGINE = TokuDB,
- PARTITION partc VALUES LESS THAN (10) ENGINE = TokuDB,
- PARTITION partd VALUES LESS THAN (2147483646) ENGINE = TokuDB) */
+(PARTITION `parta` VALUES LESS THAN (0) ENGINE = TokuDB,
+ PARTITION `partb` VALUES LESS THAN (5) ENGINE = TokuDB,
+ PARTITION `partc` VALUES LESS THAN (10) ENGINE = TokuDB,
+ PARTITION `partd` VALUES LESS THAN (2147483646) ENGINE = TokuDB)
unified filelist
t1.frm
@@ -30315,28 +30315,28 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL,
UNIQUE KEY `uidx1` (`f_int1`,`f_int2`),
UNIQUE KEY `uidx2` (`f_int2`,`f_int1`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (f_int1)
-SUBPARTITION BY KEY (f_int2)
-(PARTITION part1 VALUES LESS THAN (0)
- (SUBPARTITION subpart11 ENGINE = TokuDB,
- SUBPARTITION subpart12 ENGINE = TokuDB),
- PARTITION part2 VALUES LESS THAN (5)
- (SUBPARTITION subpart21 ENGINE = TokuDB,
- SUBPARTITION subpart22 ENGINE = TokuDB),
- PARTITION part3 VALUES LESS THAN (10)
- (SUBPARTITION subpart31 ENGINE = TokuDB,
- SUBPARTITION subpart32 ENGINE = TokuDB),
- PARTITION part4 VALUES LESS THAN (2147483646)
- (SUBPARTITION subpart41 ENGINE = TokuDB,
- SUBPARTITION subpart42 ENGINE = TokuDB)) */
+ PARTITION BY RANGE (`f_int1`)
+SUBPARTITION BY KEY (`f_int2`)
+(PARTITION `part1` VALUES LESS THAN (0)
+ (SUBPARTITION `subpart11` ENGINE = TokuDB,
+ SUBPARTITION `subpart12` ENGINE = TokuDB),
+ PARTITION `part2` VALUES LESS THAN (5)
+ (SUBPARTITION `subpart21` ENGINE = TokuDB,
+ SUBPARTITION `subpart22` ENGINE = TokuDB),
+ PARTITION `part3` VALUES LESS THAN (10)
+ (SUBPARTITION `subpart31` ENGINE = TokuDB,
+ SUBPARTITION `subpart32` ENGINE = TokuDB),
+ PARTITION `part4` VALUES LESS THAN (2147483646)
+ (SUBPARTITION `subpart41` ENGINE = TokuDB,
+ SUBPARTITION `subpart42` ENGINE = TokuDB))
unified filelist
t1.frm
@@ -30841,28 +30841,28 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL,
UNIQUE KEY `uidx1` (`f_int1`,`f_int2`),
UNIQUE KEY `uidx2` (`f_int2`,`f_int1`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (ABS(MOD(f_int1,3)))
-SUBPARTITION BY HASH (f_int2 + 1)
-(PARTITION part1 VALUES IN (0)
- (SUBPARTITION sp11 ENGINE = TokuDB,
- SUBPARTITION sp12 ENGINE = TokuDB),
- PARTITION part2 VALUES IN (1)
- (SUBPARTITION sp21 ENGINE = TokuDB,
- SUBPARTITION sp22 ENGINE = TokuDB),
- PARTITION part3 VALUES IN (2)
- (SUBPARTITION sp31 ENGINE = TokuDB,
- SUBPARTITION sp32 ENGINE = TokuDB),
- PARTITION part4 VALUES IN (NULL)
- (SUBPARTITION sp41 ENGINE = TokuDB,
- SUBPARTITION sp42 ENGINE = TokuDB)) */
+ PARTITION BY LIST (abs(`f_int1` % 3))
+SUBPARTITION BY HASH (`f_int2` + 1)
+(PARTITION `part1` VALUES IN (0)
+ (SUBPARTITION `sp11` ENGINE = TokuDB,
+ SUBPARTITION `sp12` ENGINE = TokuDB),
+ PARTITION `part2` VALUES IN (1)
+ (SUBPARTITION `sp21` ENGINE = TokuDB,
+ SUBPARTITION `sp22` ENGINE = TokuDB),
+ PARTITION `part3` VALUES IN (2)
+ (SUBPARTITION `sp31` ENGINE = TokuDB,
+ SUBPARTITION `sp32` ENGINE = TokuDB),
+ PARTITION `part4` VALUES IN (NULL)
+ (SUBPARTITION `sp41` ENGINE = TokuDB,
+ SUBPARTITION `sp42` ENGINE = TokuDB))
unified filelist
t1.frm
@@ -31365,20 +31365,20 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL,
UNIQUE KEY `uidx1` (`f_int1`,`f_int2`),
UNIQUE KEY `uidx2` (`f_int2`,`f_int1`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (ABS(MOD(f_int1,2)))
-SUBPARTITION BY KEY (f_int2)
+ PARTITION BY LIST (abs(`f_int1` % 2))
+SUBPARTITION BY KEY (`f_int2`)
SUBPARTITIONS 3
-(PARTITION part1 VALUES IN (0) ENGINE = TokuDB,
- PARTITION part2 VALUES IN (1) ENGINE = TokuDB,
- PARTITION part3 VALUES IN (NULL) ENGINE = TokuDB) */
+(PARTITION `part1` VALUES IN (0) ENGINE = TokuDB,
+ PARTITION `part2` VALUES IN (1) ENGINE = TokuDB,
+ PARTITION `part3` VALUES IN (NULL) ENGINE = TokuDB)
unified filelist
t1.frm
diff --git a/storage/tokudb/mysql-test/tokudb_parts/r/partition_bit_tokudb.result b/storage/tokudb/mysql-test/tokudb_parts/r/partition_bit_tokudb.result
index 6eec1bce210..5cde7dd065e 100644
--- a/storage/tokudb/mysql-test/tokudb_parts/r/partition_bit_tokudb.result
+++ b/storage/tokudb/mysql-test/tokudb_parts/r/partition_bit_tokudb.result
@@ -9,7 +9,7 @@ t1 CREATE TABLE `t1` (
`a` bit(1) NOT NULL,
PRIMARY KEY (`a`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY KEY (a) */
+ PARTITION BY KEY (`a`)
drop table t1;
create table t1 (a bit(0), primary key (a)) engine='TOKUDB'
partition by key (a) (
@@ -21,9 +21,9 @@ t1 CREATE TABLE `t1` (
`a` bit(1) NOT NULL,
PRIMARY KEY (`a`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY KEY (a)
-(PARTITION pa1 ENGINE = TokuDB,
- PARTITION pa2 ENGINE = TokuDB) */
+ PARTITION BY KEY (`a`)
+(PARTITION `pa1` ENGINE = TokuDB,
+ PARTITION `pa2` ENGINE = TokuDB)
drop table t1;
create table t1 (a bit(64), primary key (a)) engine='TOKUDB'
partition by key (a) partitions 2;
@@ -33,8 +33,8 @@ t1 CREATE TABLE `t1` (
`a` bit(64) NOT NULL,
PRIMARY KEY (`a`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY KEY (a)
-PARTITIONS 2 */
+ PARTITION BY KEY (`a`)
+PARTITIONS 2
insert into t1 values
(b'1111111111111111111111111111111111111111111111111111111111111111'),
(b'1000000000000000000000000000000000000000000000000000000000000000'),
@@ -61,11 +61,11 @@ t1 CREATE TABLE `t1` (
`a` bit(64) NOT NULL,
PRIMARY KEY (`a`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY KEY (a)
-(PARTITION pa1 MAX_ROWS = 20 MIN_ROWS = 2 ENGINE = TokuDB,
- PARTITION pa2 MAX_ROWS = 30 MIN_ROWS = 3 ENGINE = TokuDB,
- PARTITION pa3 MAX_ROWS = 30 MIN_ROWS = 4 ENGINE = TokuDB,
- PARTITION pa4 MAX_ROWS = 40 MIN_ROWS = 2 ENGINE = TokuDB) */
+ PARTITION BY KEY (`a`)
+(PARTITION `pa1` MAX_ROWS = 20 MIN_ROWS = 2 ENGINE = TokuDB,
+ PARTITION `pa2` MAX_ROWS = 30 MIN_ROWS = 3 ENGINE = TokuDB,
+ PARTITION `pa3` MAX_ROWS = 30 MIN_ROWS = 4 ENGINE = TokuDB,
+ PARTITION `pa4` MAX_ROWS = 40 MIN_ROWS = 2 ENGINE = TokuDB)
insert into t1 values
(b'1111111111111111111111111111111111111111111111111111111111111111'),
(b'1000000000000000000000000000000000000000000000000000000000000000'),
@@ -91,8 +91,8 @@ t2 CREATE TABLE `t2` (
`a` bit(1) NOT NULL,
PRIMARY KEY (`a`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY KEY (a)
-PARTITIONS 4 */
+ PARTITION BY KEY (`a`)
+PARTITIONS 4
insert into t2 values (b'0'), (b'1');
select hex(a) from t2;
hex(a)
@@ -104,8 +104,8 @@ Table Create Table
t2 CREATE TABLE `t2` (
`a` bit(1) NOT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY KEY (a)
-PARTITIONS 4 */
+ PARTITION BY KEY (`a`)
+PARTITIONS 4
select hex(a) from t2;
hex(a)
0
@@ -117,8 +117,8 @@ t2 CREATE TABLE `t2` (
`a` bit(1) NOT NULL,
PRIMARY KEY (`a`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY KEY (a)
-PARTITIONS 4 */
+ PARTITION BY KEY (`a`)
+PARTITIONS 4
select hex(a) from t2;
hex(a)
0
@@ -136,13 +136,13 @@ t3 CREATE TABLE `t3` (
`a` bit(8) NOT NULL,
PRIMARY KEY (`a`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (a)
-SUBPARTITION BY KEY (a)
+ PARTITION BY RANGE (`a`)
+SUBPARTITION BY KEY (`a`)
SUBPARTITIONS 2
-(PARTITION pa1 VALUES LESS THAN (3) ENGINE = TokuDB,
- PARTITION pa2 VALUES LESS THAN (16) ENGINE = TokuDB,
- PARTITION pa3 VALUES LESS THAN (64) ENGINE = TokuDB,
- PARTITION pa4 VALUES LESS THAN (256) ENGINE = TokuDB) */
+(PARTITION `pa1` VALUES LESS THAN (3) ENGINE = TokuDB,
+ PARTITION `pa2` VALUES LESS THAN (16) ENGINE = TokuDB,
+ PARTITION `pa3` VALUES LESS THAN (64) ENGINE = TokuDB,
+ PARTITION `pa4` VALUES LESS THAN (256) ENGINE = TokuDB)
255 inserts;
select hex(a) from t3 where a=b'01010101';
hex(a)
@@ -419,12 +419,12 @@ t4 CREATE TABLE `t4` (
`a` bit(8) NOT NULL,
PRIMARY KEY (`a`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (a)
-SUBPARTITION BY KEY (a)
+ PARTITION BY LIST (`a`)
+SUBPARTITION BY KEY (`a`)
SUBPARTITIONS 2
-(PARTITION pa1 VALUES IN (0,1,2,3) ENGINE = TokuDB,
- PARTITION pa2 VALUES IN (4,5,6,7,8,9,10,11,12,13,14,15,16) ENGINE = TokuDB,
- PARTITION pa3 VALUES IN (17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32) ENGINE = TokuDB) */
+(PARTITION `pa1` VALUES IN (0,1,2,3) ENGINE = TokuDB,
+ PARTITION `pa2` VALUES IN (4,5,6,7,8,9,10,11,12,13,14,15,16) ENGINE = TokuDB,
+ PARTITION `pa3` VALUES IN (17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32) ENGINE = TokuDB)
32 inserts;
select hex(a) from t4 where a=b'00000001';
hex(a)
diff --git a/storage/tokudb/mysql-test/tokudb_parts/r/partition_char_tokudb.result b/storage/tokudb/mysql-test/tokudb_parts/r/partition_char_tokudb.result
index 178174872b5..86686296b81 100644
--- a/storage/tokudb/mysql-test/tokudb_parts/r/partition_char_tokudb.result
+++ b/storage/tokudb/mysql-test/tokudb_parts/r/partition_char_tokudb.result
Binary files differ
diff --git a/storage/tokudb/mysql-test/tokudb_parts/r/partition_datetime_tokudb.result b/storage/tokudb/mysql-test/tokudb_parts/r/partition_datetime_tokudb.result
index 8cec39930fc..0bbd8bd75cc 100644
--- a/storage/tokudb/mysql-test/tokudb_parts/r/partition_datetime_tokudb.result
+++ b/storage/tokudb/mysql-test/tokudb_parts/r/partition_datetime_tokudb.result
@@ -7,14 +7,14 @@ partition pa4 max_rows=40 min_rows=2);
show create table t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `a` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
+ `a` timestamp NOT NULL DEFAULT current_timestamp() ON UPDATE current_timestamp(),
PRIMARY KEY (`a`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY KEY (a)
-(PARTITION pa1 MAX_ROWS = 20 MIN_ROWS = 2 ENGINE = TokuDB,
- PARTITION pa2 MAX_ROWS = 30 MIN_ROWS = 3 ENGINE = TokuDB,
- PARTITION pa3 MAX_ROWS = 30 MIN_ROWS = 4 ENGINE = TokuDB,
- PARTITION pa4 MAX_ROWS = 40 MIN_ROWS = 2 ENGINE = TokuDB) */
+ PARTITION BY KEY (`a`)
+(PARTITION `pa1` MAX_ROWS = 20 MIN_ROWS = 2 ENGINE = TokuDB,
+ PARTITION `pa2` MAX_ROWS = 30 MIN_ROWS = 3 ENGINE = TokuDB,
+ PARTITION `pa3` MAX_ROWS = 30 MIN_ROWS = 4 ENGINE = TokuDB,
+ PARTITION `pa4` MAX_ROWS = 40 MIN_ROWS = 2 ENGINE = TokuDB)
insert into t1 values ('1975-01-01 21:21:21'), ('2020-12-31 12:10:30'), ('1980-10-14 03:03'), ('2000-06-15 23:59');
select * from t1;
a
@@ -37,11 +37,11 @@ partition by key (a) partitions 12;
show create table t2;
Table Create Table
t2 CREATE TABLE `t2` (
- `a` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
+ `a` timestamp NOT NULL DEFAULT current_timestamp() ON UPDATE current_timestamp(),
PRIMARY KEY (`a`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY KEY (a)
-PARTITIONS 12 */
+ PARTITION BY KEY (`a`)
+PARTITIONS 12
insert into t2 values ('1975-01-01 0:1:1'), ('2020-12-31 10:11:12'), ('1980-10-14 13:14:15'), ('2000-06-15 14:15:16');
select * from t2;
a
@@ -137,11 +137,11 @@ t1 CREATE TABLE `t1` (
`a` date NOT NULL,
PRIMARY KEY (`a`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY KEY (a)
-(PARTITION pa1 MAX_ROWS = 20 MIN_ROWS = 2 ENGINE = TokuDB,
- PARTITION pa2 MAX_ROWS = 30 MIN_ROWS = 3 ENGINE = TokuDB,
- PARTITION pa3 MAX_ROWS = 30 MIN_ROWS = 4 ENGINE = TokuDB,
- PARTITION pa4 MAX_ROWS = 40 MIN_ROWS = 2 ENGINE = TokuDB) */
+ PARTITION BY KEY (`a`)
+(PARTITION `pa1` MAX_ROWS = 20 MIN_ROWS = 2 ENGINE = TokuDB,
+ PARTITION `pa2` MAX_ROWS = 30 MIN_ROWS = 3 ENGINE = TokuDB,
+ PARTITION `pa3` MAX_ROWS = 30 MIN_ROWS = 4 ENGINE = TokuDB,
+ PARTITION `pa4` MAX_ROWS = 40 MIN_ROWS = 2 ENGINE = TokuDB)
insert into t1 values ('1975-01-01'), ('2020-12-31'), ('1980-10-14'), ('2000-06-15');
select * from t1;
a
@@ -167,8 +167,8 @@ t2 CREATE TABLE `t2` (
`a` date NOT NULL,
PRIMARY KEY (`a`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY KEY (a)
-PARTITIONS 12 */
+ PARTITION BY KEY (`a`)
+PARTITIONS 12
insert into t2 values ('1975-01-01'), ('2020-12-31'), ('1980-10-14'), ('2000-06-15');
select * from t2;
a
@@ -291,13 +291,13 @@ t3 CREATE TABLE `t3` (
`a` date NOT NULL,
PRIMARY KEY (`a`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (month(a))
-SUBPARTITION BY KEY (a)
+ PARTITION BY RANGE (month(`a`))
+SUBPARTITION BY KEY (`a`)
SUBPARTITIONS 3
-(PARTITION quarter1 VALUES LESS THAN (4) ENGINE = TokuDB,
- PARTITION quarter2 VALUES LESS THAN (7) ENGINE = TokuDB,
- PARTITION quarter3 VALUES LESS THAN (10) ENGINE = TokuDB,
- PARTITION quarter4 VALUES LESS THAN (13) ENGINE = TokuDB) */
+(PARTITION `quarter1` VALUES LESS THAN (4) ENGINE = TokuDB,
+ PARTITION `quarter2` VALUES LESS THAN (7) ENGINE = TokuDB,
+ PARTITION `quarter3` VALUES LESS THAN (10) ENGINE = TokuDB,
+ PARTITION `quarter4` VALUES LESS THAN (13) ENGINE = TokuDB)
12 inserts;
select count(*) from t3;
count(*)
@@ -331,13 +331,13 @@ t4 CREATE TABLE `t4` (
`a` date NOT NULL,
PRIMARY KEY (`a`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (month(a))
-SUBPARTITION BY KEY (a)
+ PARTITION BY LIST (month(`a`))
+SUBPARTITION BY KEY (`a`)
SUBPARTITIONS 3
-(PARTITION quarter1 VALUES IN (1,2,3) ENGINE = TokuDB,
- PARTITION quarter2 VALUES IN (4,5,6) ENGINE = TokuDB,
- PARTITION quarter3 VALUES IN (7,8,9) ENGINE = TokuDB,
- PARTITION quarter4 VALUES IN (10,11,12) ENGINE = TokuDB) */
+(PARTITION `quarter1` VALUES IN (1,2,3) ENGINE = TokuDB,
+ PARTITION `quarter2` VALUES IN (4,5,6) ENGINE = TokuDB,
+ PARTITION `quarter3` VALUES IN (7,8,9) ENGINE = TokuDB,
+ PARTITION `quarter4` VALUES IN (10,11,12) ENGINE = TokuDB)
12 inserts;
select count(*) from t4;
count(*)
@@ -369,11 +369,11 @@ t1 CREATE TABLE `t1` (
`a` time NOT NULL,
PRIMARY KEY (`a`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY KEY (a)
-(PARTITION pa1 MAX_ROWS = 20 MIN_ROWS = 2 ENGINE = TokuDB,
- PARTITION pa2 MAX_ROWS = 30 MIN_ROWS = 3 ENGINE = TokuDB,
- PARTITION pa3 MAX_ROWS = 30 MIN_ROWS = 4 ENGINE = TokuDB,
- PARTITION pa4 MAX_ROWS = 40 MIN_ROWS = 2 ENGINE = TokuDB) */
+ PARTITION BY KEY (`a`)
+(PARTITION `pa1` MAX_ROWS = 20 MIN_ROWS = 2 ENGINE = TokuDB,
+ PARTITION `pa2` MAX_ROWS = 30 MIN_ROWS = 3 ENGINE = TokuDB,
+ PARTITION `pa3` MAX_ROWS = 30 MIN_ROWS = 4 ENGINE = TokuDB,
+ PARTITION `pa4` MAX_ROWS = 40 MIN_ROWS = 2 ENGINE = TokuDB)
insert into t1 values ('21:21:21'), ('12:10:30'), ('03:03:03'), ('23:59');
select * from t1;
a
@@ -399,8 +399,8 @@ t2 CREATE TABLE `t2` (
`a` time NOT NULL,
PRIMARY KEY (`a`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY KEY (a)
-PARTITIONS 12 */
+ PARTITION BY KEY (`a`)
+PARTITIONS 12
insert into t2 values ('0:1:1'), ('10:11:12'), ('13:14:15'), ('14:15:16');
select * from t2;
a
@@ -498,13 +498,13 @@ t3 CREATE TABLE `t3` (
`a` time NOT NULL,
PRIMARY KEY (`a`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (second(a))
-SUBPARTITION BY KEY (a)
+ PARTITION BY RANGE (second(`a`))
+SUBPARTITION BY KEY (`a`)
SUBPARTITIONS 3
-(PARTITION quarter1 VALUES LESS THAN (16) ENGINE = TokuDB,
- PARTITION quarter2 VALUES LESS THAN (31) ENGINE = TokuDB,
- PARTITION quarter3 VALUES LESS THAN (46) ENGINE = TokuDB,
- PARTITION quarter4 VALUES LESS THAN (61) ENGINE = TokuDB) */
+(PARTITION `quarter1` VALUES LESS THAN (16) ENGINE = TokuDB,
+ PARTITION `quarter2` VALUES LESS THAN (31) ENGINE = TokuDB,
+ PARTITION `quarter3` VALUES LESS THAN (46) ENGINE = TokuDB,
+ PARTITION `quarter4` VALUES LESS THAN (61) ENGINE = TokuDB)
59 inserts;
select count(*) from t3;
count(*)
@@ -585,13 +585,13 @@ t4 CREATE TABLE `t4` (
`a` time NOT NULL,
PRIMARY KEY (`a`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (second(a))
-SUBPARTITION BY KEY (a)
+ PARTITION BY LIST (second(`a`))
+SUBPARTITION BY KEY (`a`)
SUBPARTITIONS 3
-(PARTITION quarter1 VALUES IN (1,2,3,4,5,6,7,8,9,10,11,12,13,14,15) ENGINE = TokuDB,
- PARTITION quarter2 VALUES IN (16,17,18,19,20,21,22,23,24,25,26,27,28,29,30) ENGINE = TokuDB,
- PARTITION quarter3 VALUES IN (31,32,33,34,35,36,37,38,39,40,41,42,43,44,45) ENGINE = TokuDB,
- PARTITION quarter4 VALUES IN (46,47,48,49,50,51,52,53,54,55,56,57,58,59,60) ENGINE = TokuDB) */
+(PARTITION `quarter1` VALUES IN (1,2,3,4,5,6,7,8,9,10,11,12,13,14,15) ENGINE = TokuDB,
+ PARTITION `quarter2` VALUES IN (16,17,18,19,20,21,22,23,24,25,26,27,28,29,30) ENGINE = TokuDB,
+ PARTITION `quarter3` VALUES IN (31,32,33,34,35,36,37,38,39,40,41,42,43,44,45) ENGINE = TokuDB,
+ PARTITION `quarter4` VALUES IN (46,47,48,49,50,51,52,53,54,55,56,57,58,59,60) ENGINE = TokuDB)
59 inserts;
select count(*) from t4;
count(*)
@@ -670,11 +670,11 @@ t1 CREATE TABLE `t1` (
`a` datetime NOT NULL,
PRIMARY KEY (`a`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY KEY (a)
-(PARTITION pa1 MAX_ROWS = 20 MIN_ROWS = 2 ENGINE = TokuDB,
- PARTITION pa2 MAX_ROWS = 30 MIN_ROWS = 3 ENGINE = TokuDB,
- PARTITION pa3 MAX_ROWS = 30 MIN_ROWS = 4 ENGINE = TokuDB,
- PARTITION pa4 MAX_ROWS = 40 MIN_ROWS = 2 ENGINE = TokuDB) */
+ PARTITION BY KEY (`a`)
+(PARTITION `pa1` MAX_ROWS = 20 MIN_ROWS = 2 ENGINE = TokuDB,
+ PARTITION `pa2` MAX_ROWS = 30 MIN_ROWS = 3 ENGINE = TokuDB,
+ PARTITION `pa3` MAX_ROWS = 30 MIN_ROWS = 4 ENGINE = TokuDB,
+ PARTITION `pa4` MAX_ROWS = 40 MIN_ROWS = 2 ENGINE = TokuDB)
insert into t1 values ('1975-01-01 21:21:21'), ('2020-12-31 12:10:30'), ('1980-10-14 03:03'), ('2000-06-15 23:59');
select * from t1;
a
@@ -700,8 +700,8 @@ t2 CREATE TABLE `t2` (
`a` datetime NOT NULL,
PRIMARY KEY (`a`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY KEY (a)
-PARTITIONS 12 */
+ PARTITION BY KEY (`a`)
+PARTITIONS 12
insert into t2 values ('1975-01-01 0:1:1'), ('2020-12-31 10:11:12'), ('1980-10-14 13:14:15'), ('2000-06-15 14:15:16');
select * from t2;
a
@@ -799,13 +799,13 @@ t3 CREATE TABLE `t3` (
`a` datetime NOT NULL,
PRIMARY KEY (`a`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (month(a))
-SUBPARTITION BY KEY (a)
+ PARTITION BY RANGE (month(`a`))
+SUBPARTITION BY KEY (`a`)
SUBPARTITIONS 3
-(PARTITION quarter1 VALUES LESS THAN (4) ENGINE = TokuDB,
- PARTITION quarter2 VALUES LESS THAN (7) ENGINE = TokuDB,
- PARTITION quarter3 VALUES LESS THAN (10) ENGINE = TokuDB,
- PARTITION quarter4 VALUES LESS THAN (13) ENGINE = TokuDB) */
+(PARTITION `quarter1` VALUES LESS THAN (4) ENGINE = TokuDB,
+ PARTITION `quarter2` VALUES LESS THAN (7) ENGINE = TokuDB,
+ PARTITION `quarter3` VALUES LESS THAN (10) ENGINE = TokuDB,
+ PARTITION `quarter4` VALUES LESS THAN (13) ENGINE = TokuDB)
12 inserts;
select count(*) from t3;
count(*)
@@ -839,13 +839,13 @@ t4 CREATE TABLE `t4` (
`a` datetime NOT NULL,
PRIMARY KEY (`a`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (month(a))
-SUBPARTITION BY KEY (a)
+ PARTITION BY LIST (month(`a`))
+SUBPARTITION BY KEY (`a`)
SUBPARTITIONS 3
-(PARTITION quarter1 VALUES IN (1,2,3) ENGINE = TokuDB,
- PARTITION quarter2 VALUES IN (4,5,6) ENGINE = TokuDB,
- PARTITION quarter3 VALUES IN (7,8,9) ENGINE = TokuDB,
- PARTITION quarter4 VALUES IN (10,11,12) ENGINE = TokuDB) */
+(PARTITION `quarter1` VALUES IN (1,2,3) ENGINE = TokuDB,
+ PARTITION `quarter2` VALUES IN (4,5,6) ENGINE = TokuDB,
+ PARTITION `quarter3` VALUES IN (7,8,9) ENGINE = TokuDB,
+ PARTITION `quarter4` VALUES IN (10,11,12) ENGINE = TokuDB)
12 inserts;
select count(*) from t4;
count(*)
@@ -877,11 +877,11 @@ t1 CREATE TABLE `t1` (
`a` year(4) NOT NULL,
PRIMARY KEY (`a`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY KEY (a)
-(PARTITION pa1 MAX_ROWS = 20 MIN_ROWS = 2 ENGINE = TokuDB,
- PARTITION pa2 MAX_ROWS = 30 MIN_ROWS = 3 ENGINE = TokuDB,
- PARTITION pa3 MAX_ROWS = 30 MIN_ROWS = 4 ENGINE = TokuDB,
- PARTITION pa4 MAX_ROWS = 40 MIN_ROWS = 2 ENGINE = TokuDB) */
+ PARTITION BY KEY (`a`)
+(PARTITION `pa1` MAX_ROWS = 20 MIN_ROWS = 2 ENGINE = TokuDB,
+ PARTITION `pa2` MAX_ROWS = 30 MIN_ROWS = 3 ENGINE = TokuDB,
+ PARTITION `pa3` MAX_ROWS = 30 MIN_ROWS = 4 ENGINE = TokuDB,
+ PARTITION `pa4` MAX_ROWS = 40 MIN_ROWS = 2 ENGINE = TokuDB)
insert into t1 values ('1975'), (2020), ('1980'), ('2000');
select * from t1;
a
@@ -907,8 +907,8 @@ t2 CREATE TABLE `t2` (
`a` year(4) NOT NULL,
PRIMARY KEY (`a`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY KEY (a)
-PARTITIONS 12 */
+ PARTITION BY KEY (`a`)
+PARTITIONS 12
insert into t2 values ('1975'), ('2020'), ('1980'), ('2000');
select * from t2;
a
diff --git a/storage/tokudb/mysql-test/tokudb_parts/r/partition_debug_sync_tokudb.result b/storage/tokudb/mysql-test/tokudb_parts/r/partition_debug_sync_tokudb.result
index 7776c44c8e8..12dfab60b66 100644
--- a/storage/tokudb/mysql-test/tokudb_parts/r/partition_debug_sync_tokudb.result
+++ b/storage/tokudb/mysql-test/tokudb_parts/r/partition_debug_sync_tokudb.result
@@ -15,8 +15,10 @@ a
1
21
33
+connect con1, localhost, root,,;
# con1 (send)
ALTER TABLE t1 TRUNCATE PARTITION pMax;
+connection default;
# con default
SELECT * FROM t1;
a
@@ -26,7 +28,10 @@ a
# Commit will allow the TRUNCATE to finish
COMMIT;
# con1 (reap)
+connection con1;
# con1 (disconnect)
+disconnect con1;
+connection default;
# default connection
SELECT * FROM t1;
a
@@ -51,8 +56,9 @@ Table Create Table
t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1 STATS_PERSISTENT=0
-/*!50100 PARTITION BY RANGE (a)
-(PARTITION p0 VALUES LESS THAN MAXVALUE ENGINE = TokuDB) */
+ PARTITION BY RANGE (`a`)
+(PARTITION `p0` VALUES LESS THAN MAXVALUE ENGINE = TokuDB)
+db.opt
t1.frm
t1.par
SET DEBUG_SYNC='before_open_in_get_all_tables SIGNAL parked WAIT_FOR open';
@@ -61,6 +67,7 @@ SELECT TABLE_SCHEMA, TABLE_NAME, PARTITION_NAME, PARTITION_ORDINAL_POSITION,
PARTITION_DESCRIPTION, TABLE_ROWS
FROM INFORMATION_SCHEMA.PARTITIONS
WHERE TABLE_NAME = 't1' AND TABLE_SCHEMA = 'test';
+connect con1, localhost, root,,;
SET DEBUG_SYNC = 'now WAIT_FOR parked';
# When waiting for the name lock in get_all_tables in sql_show.cc
# this will not be concurrent any more, thus the TIMEOUT
@@ -72,9 +79,12 @@ ALTER TABLE t1 REORGANIZE PARTITION p0 INTO
PARTITION p10 VALUES LESS THAN MAXVALUE);
Warnings:
Warning 1639 debug sync point wait timed out
+disconnect con1;
+connection default;
TABLE_SCHEMA TABLE_NAME PARTITION_NAME PARTITION_ORDINAL_POSITION PARTITION_DESCRIPTION TABLE_ROWS
test t1 p0 1 10 1
test t1 p10 2 MAXVALUE 3
+db.opt
t1.frm
t1.par
SHOW CREATE TABLE t1;
@@ -82,9 +92,9 @@ Table Create Table
t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1 STATS_PERSISTENT=0
-/*!50100 PARTITION BY RANGE (a)
-(PARTITION p0 VALUES LESS THAN (10) ENGINE = TokuDB,
- PARTITION p10 VALUES LESS THAN MAXVALUE ENGINE = TokuDB) */
+ PARTITION BY RANGE (`a`)
+(PARTITION `p0` VALUES LESS THAN (10) ENGINE = TokuDB,
+ PARTITION `p10` VALUES LESS THAN MAXVALUE ENGINE = TokuDB)
SELECT * FROM t1;
a
1
@@ -92,4 +102,5 @@ a
21
33
drop table t1;
+db.opt
SET DEBUG_SYNC = 'RESET';
diff --git a/storage/tokudb/mysql-test/tokudb_parts/r/partition_debug_tokudb.result b/storage/tokudb/mysql-test/tokudb_parts/r/partition_debug_tokudb.result
index 4d9705e7f5b..ae95fc691a4 100644
--- a/storage/tokudb/mysql-test/tokudb_parts/r/partition_debug_tokudb.result
+++ b/storage/tokudb/mysql-test/tokudb_parts/r/partition_debug_tokudb.result
@@ -21,6 +21,7 @@ PARTITION BY LIST (a)
PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19));
INSERT INTO t1 VALUES (1, "Original from partition p0"), (2, "Original from partition p0"), (3, "Original from partition p0"), (4, "Original from partition p0"), (11, "Original from partition p1"), (12, "Original from partition p1"), (13, "Original from partition p1"), (14, "Original from partition p1");
# State before crash
+db.opt
t1.frm
t1.par
SHOW CREATE TABLE t1;
@@ -29,9 +30,9 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (a)
-(PARTITION p0 VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
- PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB) */
+ PARTITION BY LIST (`a`)
+(PARTITION `p0` VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
+ PARTITION `p10` VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -46,9 +47,11 @@ ALTER TABLE t1 ADD PARTITION
(PARTITION p20 VALUES IN (20,21,22,23,24,25,26,27,28,29));
ERROR HY000: Lost connection to MySQL server during query
# State after crash (before recovery)
+db.opt
t1.frm
t1.par
# State after crash recovery
+db.opt
t1.frm
t1.par
SHOW CREATE TABLE t1;
@@ -57,9 +60,9 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (a)
-(PARTITION p0 VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
- PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB) */
+ PARTITION BY LIST (`a`)
+(PARTITION `p0` VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
+ PARTITION `p10` VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -80,6 +83,7 @@ PARTITION BY LIST (a)
PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19));
INSERT INTO t1 VALUES (1, "Original from partition p0"), (2, "Original from partition p0"), (3, "Original from partition p0"), (4, "Original from partition p0"), (11, "Original from partition p1"), (12, "Original from partition p1"), (13, "Original from partition p1"), (14, "Original from partition p1");
# State before crash
+db.opt
t1.frm
t1.par
SHOW CREATE TABLE t1;
@@ -88,9 +92,9 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (a)
-(PARTITION p0 VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
- PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB) */
+ PARTITION BY LIST (`a`)
+(PARTITION `p0` VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
+ PARTITION `p10` VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -107,9 +111,11 @@ ERROR HY000: Lost connection to MySQL server during query
# State after crash (before recovery)
#sql-t1.frm
#sql-t1.par
+db.opt
t1.frm
t1.par
# State after crash recovery
+db.opt
t1.frm
t1.par
SHOW CREATE TABLE t1;
@@ -118,9 +124,9 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (a)
-(PARTITION p0 VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
- PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB) */
+ PARTITION BY LIST (`a`)
+(PARTITION `p0` VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
+ PARTITION `p10` VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -141,6 +147,7 @@ PARTITION BY LIST (a)
PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19));
INSERT INTO t1 VALUES (1, "Original from partition p0"), (2, "Original from partition p0"), (3, "Original from partition p0"), (4, "Original from partition p0"), (11, "Original from partition p1"), (12, "Original from partition p1"), (13, "Original from partition p1"), (14, "Original from partition p1");
# State before crash
+db.opt
t1.frm
t1.par
SHOW CREATE TABLE t1;
@@ -149,9 +156,9 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (a)
-(PARTITION p0 VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
- PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB) */
+ PARTITION BY LIST (`a`)
+(PARTITION `p0` VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
+ PARTITION `p10` VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -168,9 +175,11 @@ ERROR HY000: Lost connection to MySQL server during query
# State after crash (before recovery)
#sql-t1.frm
#sql-t1.par
+db.opt
t1.frm
t1.par
# State after crash recovery
+db.opt
t1.frm
t1.par
SHOW CREATE TABLE t1;
@@ -179,9 +188,9 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (a)
-(PARTITION p0 VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
- PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB) */
+ PARTITION BY LIST (`a`)
+(PARTITION `p0` VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
+ PARTITION `p10` VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -202,6 +211,7 @@ PARTITION BY LIST (a)
PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19));
INSERT INTO t1 VALUES (1, "Original from partition p0"), (2, "Original from partition p0"), (3, "Original from partition p0"), (4, "Original from partition p0"), (11, "Original from partition p1"), (12, "Original from partition p1"), (13, "Original from partition p1"), (14, "Original from partition p1");
# State before crash
+db.opt
t1.frm
t1.par
SHOW CREATE TABLE t1;
@@ -210,9 +220,9 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (a)
-(PARTITION p0 VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
- PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB) */
+ PARTITION BY LIST (`a`)
+(PARTITION `p0` VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
+ PARTITION `p10` VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -229,9 +239,11 @@ ERROR HY000: Lost connection to MySQL server during query
# State after crash (before recovery)
#sql-t1.frm
#sql-t1.par
+db.opt
t1.frm
t1.par
# State after crash recovery
+db.opt
t1.frm
t1.par
SHOW CREATE TABLE t1;
@@ -240,9 +252,9 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (a)
-(PARTITION p0 VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
- PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB) */
+ PARTITION BY LIST (`a`)
+(PARTITION `p0` VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
+ PARTITION `p10` VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -263,6 +275,7 @@ PARTITION BY LIST (a)
PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19));
INSERT INTO t1 VALUES (1, "Original from partition p0"), (2, "Original from partition p0"), (3, "Original from partition p0"), (4, "Original from partition p0"), (11, "Original from partition p1"), (12, "Original from partition p1"), (13, "Original from partition p1"), (14, "Original from partition p1");
# State before crash
+db.opt
t1.frm
t1.par
SHOW CREATE TABLE t1;
@@ -271,9 +284,9 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (a)
-(PARTITION p0 VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
- PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB) */
+ PARTITION BY LIST (`a`)
+(PARTITION `p0` VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
+ PARTITION `p10` VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -290,9 +303,11 @@ ERROR HY000: Lost connection to MySQL server during query
# State after crash (before recovery)
#sql-t1.frm
#sql-t1.par
+db.opt
t1.frm
t1.par
# State after crash recovery
+db.opt
t1.frm
t1.par
SHOW CREATE TABLE t1;
@@ -301,9 +316,9 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (a)
-(PARTITION p0 VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
- PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB) */
+ PARTITION BY LIST (`a`)
+(PARTITION `p0` VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
+ PARTITION `p10` VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -324,6 +339,7 @@ PARTITION BY LIST (a)
PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19));
INSERT INTO t1 VALUES (1, "Original from partition p0"), (2, "Original from partition p0"), (3, "Original from partition p0"), (4, "Original from partition p0"), (11, "Original from partition p1"), (12, "Original from partition p1"), (13, "Original from partition p1"), (14, "Original from partition p1");
# State before crash
+db.opt
t1.frm
t1.par
SHOW CREATE TABLE t1;
@@ -332,9 +348,9 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (a)
-(PARTITION p0 VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
- PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB) */
+ PARTITION BY LIST (`a`)
+(PARTITION `p0` VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
+ PARTITION `p10` VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -351,9 +367,11 @@ ERROR HY000: Lost connection to MySQL server during query
# State after crash (before recovery)
#sql-t1.frm
#sql-t1.par
+db.opt
t1.frm
t1.par
# State after crash recovery
+db.opt
t1.frm
t1.par
SHOW CREATE TABLE t1;
@@ -362,9 +380,9 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (a)
-(PARTITION p0 VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
- PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB) */
+ PARTITION BY LIST (`a`)
+(PARTITION `p0` VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
+ PARTITION `p10` VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -385,6 +403,7 @@ PARTITION BY LIST (a)
PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19));
INSERT INTO t1 VALUES (1, "Original from partition p0"), (2, "Original from partition p0"), (3, "Original from partition p0"), (4, "Original from partition p0"), (11, "Original from partition p1"), (12, "Original from partition p1"), (13, "Original from partition p1"), (14, "Original from partition p1");
# State before crash
+db.opt
t1.frm
t1.par
SHOW CREATE TABLE t1;
@@ -393,9 +412,9 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (a)
-(PARTITION p0 VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
- PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB) */
+ PARTITION BY LIST (`a`)
+(PARTITION `p0` VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
+ PARTITION `p10` VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -412,9 +431,11 @@ ERROR HY000: Lost connection to MySQL server during query
# State after crash (before recovery)
#sql-t1.frm
#sql-t1.par
+db.opt
t1.frm
t1.par
# State after crash recovery
+db.opt
t1.frm
t1.par
SHOW CREATE TABLE t1;
@@ -423,9 +444,9 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (a)
-(PARTITION p0 VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
- PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB) */
+ PARTITION BY LIST (`a`)
+(PARTITION `p0` VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
+ PARTITION `p10` VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -446,6 +467,7 @@ PARTITION BY LIST (a)
PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19));
INSERT INTO t1 VALUES (1, "Original from partition p0"), (2, "Original from partition p0"), (3, "Original from partition p0"), (4, "Original from partition p0"), (11, "Original from partition p1"), (12, "Original from partition p1"), (13, "Original from partition p1"), (14, "Original from partition p1");
# State before crash
+db.opt
t1.frm
t1.par
SHOW CREATE TABLE t1;
@@ -454,9 +476,9 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (a)
-(PARTITION p0 VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
- PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB) */
+ PARTITION BY LIST (`a`)
+(PARTITION `p0` VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
+ PARTITION `p10` VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -473,9 +495,11 @@ ERROR HY000: Lost connection to MySQL server during query
# State after crash (before recovery)
#sql-t1.frm
#sql-t1.par
+db.opt
t1.frm
t1.par
# State after crash recovery
+db.opt
t1.frm
t1.par
SHOW CREATE TABLE t1;
@@ -484,10 +508,10 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (a)
-(PARTITION p0 VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
- PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB,
- PARTITION p20 VALUES IN (20,21,22,23,24,25,26,27,28,29) ENGINE = TokuDB) */
+ PARTITION BY LIST (`a`)
+(PARTITION `p0` VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
+ PARTITION `p10` VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB,
+ PARTITION `p20` VALUES IN (20,21,22,23,24,25,26,27,28,29) ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -508,6 +532,7 @@ PARTITION BY LIST (a)
PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19));
INSERT INTO t1 VALUES (1, "Original from partition p0"), (2, "Original from partition p0"), (3, "Original from partition p0"), (4, "Original from partition p0"), (11, "Original from partition p1"), (12, "Original from partition p1"), (13, "Original from partition p1"), (14, "Original from partition p1");
# State before crash
+db.opt
t1.frm
t1.par
SHOW CREATE TABLE t1;
@@ -516,9 +541,9 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (a)
-(PARTITION p0 VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
- PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB) */
+ PARTITION BY LIST (`a`)
+(PARTITION `p0` VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
+ PARTITION `p10` VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -533,9 +558,11 @@ ALTER TABLE t1 ADD PARTITION
(PARTITION p20 VALUES IN (20,21,22,23,24,25,26,27,28,29));
ERROR HY000: Lost connection to MySQL server during query
# State after crash (before recovery)
+db.opt
t1.frm
t1.par
# State after crash recovery
+db.opt
t1.frm
t1.par
SHOW CREATE TABLE t1;
@@ -544,10 +571,10 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (a)
-(PARTITION p0 VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
- PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB,
- PARTITION p20 VALUES IN (20,21,22,23,24,25,26,27,28,29) ENGINE = TokuDB) */
+ PARTITION BY LIST (`a`)
+(PARTITION `p0` VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
+ PARTITION `p10` VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB,
+ PARTITION `p20` VALUES IN (20,21,22,23,24,25,26,27,28,29) ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -568,6 +595,7 @@ PARTITION BY LIST (a)
PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19));
INSERT INTO t1 VALUES (1, "Original from partition p0"), (2, "Original from partition p0"), (3, "Original from partition p0"), (4, "Original from partition p0"), (11, "Original from partition p1"), (12, "Original from partition p1"), (13, "Original from partition p1"), (14, "Original from partition p1");
# State before crash
+db.opt
t1.frm
t1.par
SHOW CREATE TABLE t1;
@@ -576,9 +604,9 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (a)
-(PARTITION p0 VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
- PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB) */
+ PARTITION BY LIST (`a`)
+(PARTITION `p0` VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
+ PARTITION `p10` VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -593,9 +621,11 @@ ALTER TABLE t1 ADD PARTITION
(PARTITION p20 VALUES IN (20,21,22,23,24,25,26,27,28,29));
ERROR HY000: Lost connection to MySQL server during query
# State after crash (before recovery)
+db.opt
t1.frm
t1.par
# State after crash recovery
+db.opt
t1.frm
t1.par
SHOW CREATE TABLE t1;
@@ -604,10 +634,10 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (a)
-(PARTITION p0 VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
- PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB,
- PARTITION p20 VALUES IN (20,21,22,23,24,25,26,27,28,29) ENGINE = TokuDB) */
+ PARTITION BY LIST (`a`)
+(PARTITION `p0` VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
+ PARTITION `p10` VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB,
+ PARTITION `p20` VALUES IN (20,21,22,23,24,25,26,27,28,29) ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -630,6 +660,7 @@ PARTITION BY LIST (a)
PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19));
INSERT INTO t1 VALUES (1, "Original from partition p0"), (2, "Original from partition p0"), (3, "Original from partition p0"), (4, "Original from partition p0"), (11, "Original from partition p1"), (12, "Original from partition p1"), (13, "Original from partition p1"), (14, "Original from partition p1");
# State before failure
+db.opt
t1.frm
t1.par
SHOW CREATE TABLE t1;
@@ -638,9 +669,9 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (a)
-(PARTITION p0 VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
- PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB) */
+ PARTITION BY LIST (`a`)
+(PARTITION `p0` VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
+ PARTITION `p10` VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -655,6 +686,7 @@ ALTER TABLE t1 ADD PARTITION
(PARTITION p20 VALUES IN (20,21,22,23,24,25,26,27,28,29));
ERROR HY000: Unknown error
# State after failure
+db.opt
t1.frm
t1.par
SHOW CREATE TABLE t1;
@@ -663,9 +695,9 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (a)
-(PARTITION p0 VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
- PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB) */
+ PARTITION BY LIST (`a`)
+(PARTITION `p0` VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
+ PARTITION `p10` VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -685,6 +717,7 @@ PARTITION BY LIST (a)
PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19));
INSERT INTO t1 VALUES (1, "Original from partition p0"), (2, "Original from partition p0"), (3, "Original from partition p0"), (4, "Original from partition p0"), (11, "Original from partition p1"), (12, "Original from partition p1"), (13, "Original from partition p1"), (14, "Original from partition p1");
# State before failure
+db.opt
t1.frm
t1.par
SHOW CREATE TABLE t1;
@@ -693,9 +726,9 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (a)
-(PARTITION p0 VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
- PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB) */
+ PARTITION BY LIST (`a`)
+(PARTITION `p0` VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
+ PARTITION `p10` VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -711,6 +744,7 @@ ALTER TABLE t1 ADD PARTITION
(PARTITION p20 VALUES IN (20,21,22,23,24,25,26,27,28,29));
ERROR HY000: Unknown error
# State after failure
+db.opt
t1.frm
t1.par
SHOW CREATE TABLE t1;
@@ -719,9 +753,9 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (a)
-(PARTITION p0 VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
- PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB) */
+ PARTITION BY LIST (`a`)
+(PARTITION `p0` VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
+ PARTITION `p10` VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -743,6 +777,7 @@ PARTITION BY LIST (a)
PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19));
INSERT INTO t1 VALUES (1, "Original from partition p0"), (2, "Original from partition p0"), (3, "Original from partition p0"), (4, "Original from partition p0"), (11, "Original from partition p1"), (12, "Original from partition p1"), (13, "Original from partition p1"), (14, "Original from partition p1");
# State before failure
+db.opt
t1.frm
t1.par
SHOW CREATE TABLE t1;
@@ -751,9 +786,9 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (a)
-(PARTITION p0 VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
- PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB) */
+ PARTITION BY LIST (`a`)
+(PARTITION `p0` VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
+ PARTITION `p10` VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -768,6 +803,7 @@ ALTER TABLE t1 ADD PARTITION
(PARTITION p20 VALUES IN (20,21,22,23,24,25,26,27,28,29));
ERROR HY000: Unknown error
# State after failure
+db.opt
t1.frm
t1.par
SHOW CREATE TABLE t1;
@@ -776,9 +812,9 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (a)
-(PARTITION p0 VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
- PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB) */
+ PARTITION BY LIST (`a`)
+(PARTITION `p0` VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
+ PARTITION `p10` VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -798,6 +834,7 @@ PARTITION BY LIST (a)
PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19));
INSERT INTO t1 VALUES (1, "Original from partition p0"), (2, "Original from partition p0"), (3, "Original from partition p0"), (4, "Original from partition p0"), (11, "Original from partition p1"), (12, "Original from partition p1"), (13, "Original from partition p1"), (14, "Original from partition p1");
# State before failure
+db.opt
t1.frm
t1.par
SHOW CREATE TABLE t1;
@@ -806,9 +843,9 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (a)
-(PARTITION p0 VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
- PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB) */
+ PARTITION BY LIST (`a`)
+(PARTITION `p0` VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
+ PARTITION `p10` VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -824,6 +861,7 @@ ALTER TABLE t1 ADD PARTITION
(PARTITION p20 VALUES IN (20,21,22,23,24,25,26,27,28,29));
ERROR HY000: Unknown error
# State after failure
+db.opt
t1.frm
t1.par
SHOW CREATE TABLE t1;
@@ -832,9 +870,9 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (a)
-(PARTITION p0 VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
- PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB) */
+ PARTITION BY LIST (`a`)
+(PARTITION `p0` VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
+ PARTITION `p10` VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -856,6 +894,7 @@ PARTITION BY LIST (a)
PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19));
INSERT INTO t1 VALUES (1, "Original from partition p0"), (2, "Original from partition p0"), (3, "Original from partition p0"), (4, "Original from partition p0"), (11, "Original from partition p1"), (12, "Original from partition p1"), (13, "Original from partition p1"), (14, "Original from partition p1");
# State before failure
+db.opt
t1.frm
t1.par
SHOW CREATE TABLE t1;
@@ -864,9 +903,9 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (a)
-(PARTITION p0 VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
- PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB) */
+ PARTITION BY LIST (`a`)
+(PARTITION `p0` VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
+ PARTITION `p10` VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -881,6 +920,7 @@ ALTER TABLE t1 ADD PARTITION
(PARTITION p20 VALUES IN (20,21,22,23,24,25,26,27,28,29));
ERROR HY000: Unknown error
# State after failure
+db.opt
t1.frm
t1.par
SHOW CREATE TABLE t1;
@@ -889,9 +929,9 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (a)
-(PARTITION p0 VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
- PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB) */
+ PARTITION BY LIST (`a`)
+(PARTITION `p0` VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
+ PARTITION `p10` VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -911,6 +951,7 @@ PARTITION BY LIST (a)
PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19));
INSERT INTO t1 VALUES (1, "Original from partition p0"), (2, "Original from partition p0"), (3, "Original from partition p0"), (4, "Original from partition p0"), (11, "Original from partition p1"), (12, "Original from partition p1"), (13, "Original from partition p1"), (14, "Original from partition p1");
# State before failure
+db.opt
t1.frm
t1.par
SHOW CREATE TABLE t1;
@@ -919,9 +960,9 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (a)
-(PARTITION p0 VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
- PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB) */
+ PARTITION BY LIST (`a`)
+(PARTITION `p0` VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
+ PARTITION `p10` VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -937,6 +978,7 @@ ALTER TABLE t1 ADD PARTITION
(PARTITION p20 VALUES IN (20,21,22,23,24,25,26,27,28,29));
ERROR HY000: Unknown error
# State after failure
+db.opt
t1.frm
t1.par
SHOW CREATE TABLE t1;
@@ -945,9 +987,9 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (a)
-(PARTITION p0 VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
- PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB) */
+ PARTITION BY LIST (`a`)
+(PARTITION `p0` VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
+ PARTITION `p10` VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -969,6 +1011,7 @@ PARTITION BY LIST (a)
PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19));
INSERT INTO t1 VALUES (1, "Original from partition p0"), (2, "Original from partition p0"), (3, "Original from partition p0"), (4, "Original from partition p0"), (11, "Original from partition p1"), (12, "Original from partition p1"), (13, "Original from partition p1"), (14, "Original from partition p1");
# State before failure
+db.opt
t1.frm
t1.par
SHOW CREATE TABLE t1;
@@ -977,9 +1020,9 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (a)
-(PARTITION p0 VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
- PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB) */
+ PARTITION BY LIST (`a`)
+(PARTITION `p0` VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
+ PARTITION `p10` VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -994,6 +1037,7 @@ ALTER TABLE t1 ADD PARTITION
(PARTITION p20 VALUES IN (20,21,22,23,24,25,26,27,28,29));
ERROR HY000: Unknown error
# State after failure
+db.opt
t1.frm
t1.par
SHOW CREATE TABLE t1;
@@ -1002,9 +1046,9 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (a)
-(PARTITION p0 VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
- PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB) */
+ PARTITION BY LIST (`a`)
+(PARTITION `p0` VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
+ PARTITION `p10` VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -1024,6 +1068,7 @@ PARTITION BY LIST (a)
PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19));
INSERT INTO t1 VALUES (1, "Original from partition p0"), (2, "Original from partition p0"), (3, "Original from partition p0"), (4, "Original from partition p0"), (11, "Original from partition p1"), (12, "Original from partition p1"), (13, "Original from partition p1"), (14, "Original from partition p1");
# State before failure
+db.opt
t1.frm
t1.par
SHOW CREATE TABLE t1;
@@ -1032,9 +1077,9 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (a)
-(PARTITION p0 VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
- PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB) */
+ PARTITION BY LIST (`a`)
+(PARTITION `p0` VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
+ PARTITION `p10` VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -1050,6 +1095,7 @@ ALTER TABLE t1 ADD PARTITION
(PARTITION p20 VALUES IN (20,21,22,23,24,25,26,27,28,29));
ERROR HY000: Unknown error
# State after failure
+db.opt
t1.frm
t1.par
SHOW CREATE TABLE t1;
@@ -1058,9 +1104,9 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (a)
-(PARTITION p0 VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
- PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB) */
+ PARTITION BY LIST (`a`)
+(PARTITION `p0` VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
+ PARTITION `p10` VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -1082,6 +1128,7 @@ PARTITION BY LIST (a)
PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19));
INSERT INTO t1 VALUES (1, "Original from partition p0"), (2, "Original from partition p0"), (3, "Original from partition p0"), (4, "Original from partition p0"), (11, "Original from partition p1"), (12, "Original from partition p1"), (13, "Original from partition p1"), (14, "Original from partition p1");
# State before failure
+db.opt
t1.frm
t1.par
SHOW CREATE TABLE t1;
@@ -1090,9 +1137,9 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (a)
-(PARTITION p0 VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
- PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB) */
+ PARTITION BY LIST (`a`)
+(PARTITION `p0` VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
+ PARTITION `p10` VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -1107,6 +1154,7 @@ ALTER TABLE t1 ADD PARTITION
(PARTITION p20 VALUES IN (20,21,22,23,24,25,26,27,28,29));
ERROR HY000: Unknown error
# State after failure
+db.opt
t1.frm
t1.par
SHOW CREATE TABLE t1;
@@ -1115,9 +1163,9 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (a)
-(PARTITION p0 VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
- PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB) */
+ PARTITION BY LIST (`a`)
+(PARTITION `p0` VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
+ PARTITION `p10` VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -1137,6 +1185,7 @@ PARTITION BY LIST (a)
PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19));
INSERT INTO t1 VALUES (1, "Original from partition p0"), (2, "Original from partition p0"), (3, "Original from partition p0"), (4, "Original from partition p0"), (11, "Original from partition p1"), (12, "Original from partition p1"), (13, "Original from partition p1"), (14, "Original from partition p1");
# State before failure
+db.opt
t1.frm
t1.par
SHOW CREATE TABLE t1;
@@ -1145,9 +1194,9 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (a)
-(PARTITION p0 VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
- PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB) */
+ PARTITION BY LIST (`a`)
+(PARTITION `p0` VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
+ PARTITION `p10` VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -1163,6 +1212,7 @@ ALTER TABLE t1 ADD PARTITION
(PARTITION p20 VALUES IN (20,21,22,23,24,25,26,27,28,29));
ERROR HY000: Unknown error
# State after failure
+db.opt
t1.frm
t1.par
SHOW CREATE TABLE t1;
@@ -1171,9 +1221,9 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (a)
-(PARTITION p0 VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
- PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB) */
+ PARTITION BY LIST (`a`)
+(PARTITION `p0` VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
+ PARTITION `p10` VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -1195,6 +1245,7 @@ PARTITION BY LIST (a)
PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19));
INSERT INTO t1 VALUES (1, "Original from partition p0"), (2, "Original from partition p0"), (3, "Original from partition p0"), (4, "Original from partition p0"), (11, "Original from partition p1"), (12, "Original from partition p1"), (13, "Original from partition p1"), (14, "Original from partition p1");
# State before failure
+db.opt
t1.frm
t1.par
SHOW CREATE TABLE t1;
@@ -1203,9 +1254,9 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (a)
-(PARTITION p0 VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
- PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB) */
+ PARTITION BY LIST (`a`)
+(PARTITION `p0` VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
+ PARTITION `p10` VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -1220,6 +1271,7 @@ ALTER TABLE t1 ADD PARTITION
(PARTITION p20 VALUES IN (20,21,22,23,24,25,26,27,28,29));
ERROR HY000: Unknown error
# State after failure
+db.opt
t1.frm
t1.par
SHOW CREATE TABLE t1;
@@ -1228,9 +1280,9 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (a)
-(PARTITION p0 VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
- PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB) */
+ PARTITION BY LIST (`a`)
+(PARTITION `p0` VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
+ PARTITION `p10` VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -1250,6 +1302,7 @@ PARTITION BY LIST (a)
PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19));
INSERT INTO t1 VALUES (1, "Original from partition p0"), (2, "Original from partition p0"), (3, "Original from partition p0"), (4, "Original from partition p0"), (11, "Original from partition p1"), (12, "Original from partition p1"), (13, "Original from partition p1"), (14, "Original from partition p1");
# State before failure
+db.opt
t1.frm
t1.par
SHOW CREATE TABLE t1;
@@ -1258,9 +1311,9 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (a)
-(PARTITION p0 VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
- PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB) */
+ PARTITION BY LIST (`a`)
+(PARTITION `p0` VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
+ PARTITION `p10` VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -1276,6 +1329,7 @@ ALTER TABLE t1 ADD PARTITION
(PARTITION p20 VALUES IN (20,21,22,23,24,25,26,27,28,29));
ERROR HY000: Unknown error
# State after failure
+db.opt
t1.frm
t1.par
SHOW CREATE TABLE t1;
@@ -1284,9 +1338,9 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (a)
-(PARTITION p0 VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
- PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB) */
+ PARTITION BY LIST (`a`)
+(PARTITION `p0` VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
+ PARTITION `p10` VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -1308,6 +1362,7 @@ PARTITION BY LIST (a)
PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19));
INSERT INTO t1 VALUES (1, "Original from partition p0"), (2, "Original from partition p0"), (3, "Original from partition p0"), (4, "Original from partition p0"), (11, "Original from partition p1"), (12, "Original from partition p1"), (13, "Original from partition p1"), (14, "Original from partition p1");
# State before failure
+db.opt
t1.frm
t1.par
SHOW CREATE TABLE t1;
@@ -1316,9 +1371,9 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (a)
-(PARTITION p0 VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
- PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB) */
+ PARTITION BY LIST (`a`)
+(PARTITION `p0` VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
+ PARTITION `p10` VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -1333,6 +1388,7 @@ ALTER TABLE t1 ADD PARTITION
(PARTITION p20 VALUES IN (20,21,22,23,24,25,26,27,28,29));
ERROR HY000: Unknown error
# State after failure
+db.opt
t1.frm
t1.par
SHOW CREATE TABLE t1;
@@ -1341,9 +1397,9 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (a)
-(PARTITION p0 VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
- PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB) */
+ PARTITION BY LIST (`a`)
+(PARTITION `p0` VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
+ PARTITION `p10` VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -1363,6 +1419,7 @@ PARTITION BY LIST (a)
PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19));
INSERT INTO t1 VALUES (1, "Original from partition p0"), (2, "Original from partition p0"), (3, "Original from partition p0"), (4, "Original from partition p0"), (11, "Original from partition p1"), (12, "Original from partition p1"), (13, "Original from partition p1"), (14, "Original from partition p1");
# State before failure
+db.opt
t1.frm
t1.par
SHOW CREATE TABLE t1;
@@ -1371,9 +1428,9 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (a)
-(PARTITION p0 VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
- PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB) */
+ PARTITION BY LIST (`a`)
+(PARTITION `p0` VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
+ PARTITION `p10` VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -1389,6 +1446,7 @@ ALTER TABLE t1 ADD PARTITION
(PARTITION p20 VALUES IN (20,21,22,23,24,25,26,27,28,29));
ERROR HY000: Unknown error
# State after failure
+db.opt
t1.frm
t1.par
SHOW CREATE TABLE t1;
@@ -1397,9 +1455,9 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (a)
-(PARTITION p0 VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
- PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB) */
+ PARTITION BY LIST (`a`)
+(PARTITION `p0` VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
+ PARTITION `p10` VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -1421,6 +1479,7 @@ PARTITION BY LIST (a)
PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19));
INSERT INTO t1 VALUES (1, "Original from partition p0"), (2, "Original from partition p0"), (3, "Original from partition p0"), (4, "Original from partition p0"), (11, "Original from partition p1"), (12, "Original from partition p1"), (13, "Original from partition p1"), (14, "Original from partition p1");
# State before failure
+db.opt
t1.frm
t1.par
SHOW CREATE TABLE t1;
@@ -1429,9 +1488,9 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (a)
-(PARTITION p0 VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
- PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB) */
+ PARTITION BY LIST (`a`)
+(PARTITION `p0` VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
+ PARTITION `p10` VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -1446,6 +1505,7 @@ ALTER TABLE t1 ADD PARTITION
(PARTITION p20 VALUES IN (20,21,22,23,24,25,26,27,28,29));
ERROR HY000: Unknown error
# State after failure
+db.opt
t1.frm
t1.par
SHOW CREATE TABLE t1;
@@ -1454,10 +1514,10 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (a)
-(PARTITION p0 VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
- PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB,
- PARTITION p20 VALUES IN (20,21,22,23,24,25,26,27,28,29) ENGINE = TokuDB) */
+ PARTITION BY LIST (`a`)
+(PARTITION `p0` VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
+ PARTITION `p10` VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB,
+ PARTITION `p20` VALUES IN (20,21,22,23,24,25,26,27,28,29) ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -1477,6 +1537,7 @@ PARTITION BY LIST (a)
PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19));
INSERT INTO t1 VALUES (1, "Original from partition p0"), (2, "Original from partition p0"), (3, "Original from partition p0"), (4, "Original from partition p0"), (11, "Original from partition p1"), (12, "Original from partition p1"), (13, "Original from partition p1"), (14, "Original from partition p1");
# State before failure
+db.opt
t1.frm
t1.par
SHOW CREATE TABLE t1;
@@ -1485,9 +1546,9 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (a)
-(PARTITION p0 VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
- PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB) */
+ PARTITION BY LIST (`a`)
+(PARTITION `p0` VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
+ PARTITION `p10` VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -1503,6 +1564,7 @@ ALTER TABLE t1 ADD PARTITION
(PARTITION p20 VALUES IN (20,21,22,23,24,25,26,27,28,29));
ERROR HY000: Unknown error
# State after failure
+db.opt
t1.frm
t1.par
SHOW CREATE TABLE t1;
@@ -1511,10 +1573,10 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (a)
-(PARTITION p0 VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
- PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB,
- PARTITION p20 VALUES IN (20,21,22,23,24,25,26,27,28,29) ENGINE = TokuDB) */
+ PARTITION BY LIST (`a`)
+(PARTITION `p0` VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
+ PARTITION `p10` VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB,
+ PARTITION `p20` VALUES IN (20,21,22,23,24,25,26,27,28,29) ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -1536,6 +1598,7 @@ PARTITION BY LIST (a)
PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19));
INSERT INTO t1 VALUES (1, "Original from partition p0"), (2, "Original from partition p0"), (3, "Original from partition p0"), (4, "Original from partition p0"), (11, "Original from partition p1"), (12, "Original from partition p1"), (13, "Original from partition p1"), (14, "Original from partition p1");
# State before failure
+db.opt
t1.frm
t1.par
SHOW CREATE TABLE t1;
@@ -1544,9 +1607,9 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (a)
-(PARTITION p0 VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
- PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB) */
+ PARTITION BY LIST (`a`)
+(PARTITION `p0` VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
+ PARTITION `p10` VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -1561,6 +1624,7 @@ ALTER TABLE t1 ADD PARTITION
(PARTITION p20 VALUES IN (20,21,22,23,24,25,26,27,28,29));
ERROR HY000: Unknown error
# State after failure
+db.opt
t1.frm
t1.par
SHOW CREATE TABLE t1;
@@ -1569,10 +1633,10 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (a)
-(PARTITION p0 VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
- PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB,
- PARTITION p20 VALUES IN (20,21,22,23,24,25,26,27,28,29) ENGINE = TokuDB) */
+ PARTITION BY LIST (`a`)
+(PARTITION `p0` VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
+ PARTITION `p10` VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB,
+ PARTITION `p20` VALUES IN (20,21,22,23,24,25,26,27,28,29) ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -1592,6 +1656,7 @@ PARTITION BY LIST (a)
PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19));
INSERT INTO t1 VALUES (1, "Original from partition p0"), (2, "Original from partition p0"), (3, "Original from partition p0"), (4, "Original from partition p0"), (11, "Original from partition p1"), (12, "Original from partition p1"), (13, "Original from partition p1"), (14, "Original from partition p1");
# State before failure
+db.opt
t1.frm
t1.par
SHOW CREATE TABLE t1;
@@ -1600,9 +1665,9 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (a)
-(PARTITION p0 VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
- PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB) */
+ PARTITION BY LIST (`a`)
+(PARTITION `p0` VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
+ PARTITION `p10` VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -1618,6 +1683,7 @@ ALTER TABLE t1 ADD PARTITION
(PARTITION p20 VALUES IN (20,21,22,23,24,25,26,27,28,29));
ERROR HY000: Unknown error
# State after failure
+db.opt
t1.frm
t1.par
SHOW CREATE TABLE t1;
@@ -1626,10 +1692,10 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (a)
-(PARTITION p0 VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
- PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB,
- PARTITION p20 VALUES IN (20,21,22,23,24,25,26,27,28,29) ENGINE = TokuDB) */
+ PARTITION BY LIST (`a`)
+(PARTITION `p0` VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
+ PARTITION `p10` VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB,
+ PARTITION `p20` VALUES IN (20,21,22,23,24,25,26,27,28,29) ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -1651,6 +1717,7 @@ PARTITION BY LIST (a)
PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19));
INSERT INTO t1 VALUES (1, "Original from partition p0"), (2, "Original from partition p0"), (3, "Original from partition p0"), (4, "Original from partition p0"), (11, "Original from partition p1"), (12, "Original from partition p1"), (13, "Original from partition p1"), (14, "Original from partition p1");
# State before failure
+db.opt
t1.frm
t1.par
SHOW CREATE TABLE t1;
@@ -1659,9 +1726,9 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (a)
-(PARTITION p0 VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
- PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB) */
+ PARTITION BY LIST (`a`)
+(PARTITION `p0` VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
+ PARTITION `p10` VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -1676,6 +1743,7 @@ ALTER TABLE t1 ADD PARTITION
(PARTITION p20 VALUES IN (20,21,22,23,24,25,26,27,28,29));
ERROR HY000: Unknown error
# State after failure
+db.opt
t1.frm
t1.par
SHOW CREATE TABLE t1;
@@ -1684,10 +1752,10 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (a)
-(PARTITION p0 VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
- PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB,
- PARTITION p20 VALUES IN (20,21,22,23,24,25,26,27,28,29) ENGINE = TokuDB) */
+ PARTITION BY LIST (`a`)
+(PARTITION `p0` VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
+ PARTITION `p10` VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB,
+ PARTITION `p20` VALUES IN (20,21,22,23,24,25,26,27,28,29) ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -1707,6 +1775,7 @@ PARTITION BY LIST (a)
PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19));
INSERT INTO t1 VALUES (1, "Original from partition p0"), (2, "Original from partition p0"), (3, "Original from partition p0"), (4, "Original from partition p0"), (11, "Original from partition p1"), (12, "Original from partition p1"), (13, "Original from partition p1"), (14, "Original from partition p1");
# State before failure
+db.opt
t1.frm
t1.par
SHOW CREATE TABLE t1;
@@ -1715,9 +1784,9 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (a)
-(PARTITION p0 VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
- PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB) */
+ PARTITION BY LIST (`a`)
+(PARTITION `p0` VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
+ PARTITION `p10` VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -1733,6 +1802,7 @@ ALTER TABLE t1 ADD PARTITION
(PARTITION p20 VALUES IN (20,21,22,23,24,25,26,27,28,29));
ERROR HY000: Unknown error
# State after failure
+db.opt
t1.frm
t1.par
SHOW CREATE TABLE t1;
@@ -1741,10 +1811,10 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (a)
-(PARTITION p0 VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
- PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB,
- PARTITION p20 VALUES IN (20,21,22,23,24,25,26,27,28,29) ENGINE = TokuDB) */
+ PARTITION BY LIST (`a`)
+(PARTITION `p0` VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
+ PARTITION `p10` VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB,
+ PARTITION `p20` VALUES IN (20,21,22,23,24,25,26,27,28,29) ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -1768,6 +1838,7 @@ PARTITION BY LIST (a)
PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19));
INSERT INTO t1 VALUES (1, "Original from partition p0"), (2, "Original from partition p0"), (3, "Original from partition p0"), (4, "Original from partition p0"), (11, "Original from partition p1"), (12, "Original from partition p1"), (13, "Original from partition p1"), (14, "Original from partition p1");
# State before crash
+db.opt
t1.frm
t1.par
SHOW CREATE TABLE t1;
@@ -1776,9 +1847,9 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (a)
-(PARTITION p0 VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
- PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB) */
+ PARTITION BY LIST (`a`)
+(PARTITION `p0` VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
+ PARTITION `p10` VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -1792,9 +1863,11 @@ a b
ALTER TABLE t1 DROP PARTITION p10;
ERROR HY000: Lost connection to MySQL server during query
# State after crash (before recovery)
+db.opt
t1.frm
t1.par
# State after crash recovery
+db.opt
t1.frm
t1.par
SHOW CREATE TABLE t1;
@@ -1803,9 +1876,9 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (a)
-(PARTITION p0 VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
- PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB) */
+ PARTITION BY LIST (`a`)
+(PARTITION `p0` VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
+ PARTITION `p10` VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -1826,6 +1899,7 @@ PARTITION BY LIST (a)
PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19));
INSERT INTO t1 VALUES (1, "Original from partition p0"), (2, "Original from partition p0"), (3, "Original from partition p0"), (4, "Original from partition p0"), (11, "Original from partition p1"), (12, "Original from partition p1"), (13, "Original from partition p1"), (14, "Original from partition p1");
# State before crash
+db.opt
t1.frm
t1.par
SHOW CREATE TABLE t1;
@@ -1834,9 +1908,9 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (a)
-(PARTITION p0 VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
- PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB) */
+ PARTITION BY LIST (`a`)
+(PARTITION `p0` VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
+ PARTITION `p10` VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -1852,9 +1926,11 @@ ERROR HY000: Lost connection to MySQL server during query
# State after crash (before recovery)
#sql-t1.frm
#sql-t1.par
+db.opt
t1.frm
t1.par
# State after crash recovery
+db.opt
t1.frm
t1.par
SHOW CREATE TABLE t1;
@@ -1863,9 +1939,9 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (a)
-(PARTITION p0 VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
- PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB) */
+ PARTITION BY LIST (`a`)
+(PARTITION `p0` VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
+ PARTITION `p10` VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -1886,6 +1962,7 @@ PARTITION BY LIST (a)
PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19));
INSERT INTO t1 VALUES (1, "Original from partition p0"), (2, "Original from partition p0"), (3, "Original from partition p0"), (4, "Original from partition p0"), (11, "Original from partition p1"), (12, "Original from partition p1"), (13, "Original from partition p1"), (14, "Original from partition p1");
# State before crash
+db.opt
t1.frm
t1.par
SHOW CREATE TABLE t1;
@@ -1894,9 +1971,9 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (a)
-(PARTITION p0 VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
- PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB) */
+ PARTITION BY LIST (`a`)
+(PARTITION `p0` VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
+ PARTITION `p10` VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -1912,9 +1989,11 @@ ERROR HY000: Lost connection to MySQL server during query
# State after crash (before recovery)
#sql-t1.frm
#sql-t1.par
+db.opt
t1.frm
t1.par
# State after crash recovery
+db.opt
t1.frm
t1.par
SHOW CREATE TABLE t1;
@@ -1923,9 +2002,9 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (a)
-(PARTITION p0 VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
- PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB) */
+ PARTITION BY LIST (`a`)
+(PARTITION `p0` VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
+ PARTITION `p10` VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -1946,6 +2025,7 @@ PARTITION BY LIST (a)
PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19));
INSERT INTO t1 VALUES (1, "Original from partition p0"), (2, "Original from partition p0"), (3, "Original from partition p0"), (4, "Original from partition p0"), (11, "Original from partition p1"), (12, "Original from partition p1"), (13, "Original from partition p1"), (14, "Original from partition p1");
# State before crash
+db.opt
t1.frm
t1.par
SHOW CREATE TABLE t1;
@@ -1954,9 +2034,9 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (a)
-(PARTITION p0 VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
- PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB) */
+ PARTITION BY LIST (`a`)
+(PARTITION `p0` VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
+ PARTITION `p10` VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -1972,9 +2052,11 @@ ERROR HY000: Lost connection to MySQL server during query
# State after crash (before recovery)
#sql-t1.frm
#sql-t1.par
+db.opt
t1.frm
t1.par
# State after crash recovery
+db.opt
t1.frm
t1.par
SHOW CREATE TABLE t1;
@@ -1983,8 +2065,8 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (a)
-(PARTITION p0 VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB) */
+ PARTITION BY LIST (`a`)
+(PARTITION `p0` VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -2001,6 +2083,7 @@ PARTITION BY LIST (a)
PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19));
INSERT INTO t1 VALUES (1, "Original from partition p0"), (2, "Original from partition p0"), (3, "Original from partition p0"), (4, "Original from partition p0"), (11, "Original from partition p1"), (12, "Original from partition p1"), (13, "Original from partition p1"), (14, "Original from partition p1");
# State before crash
+db.opt
t1.frm
t1.par
SHOW CREATE TABLE t1;
@@ -2009,9 +2092,9 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (a)
-(PARTITION p0 VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
- PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB) */
+ PARTITION BY LIST (`a`)
+(PARTITION `p0` VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
+ PARTITION `p10` VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -2027,9 +2110,11 @@ ERROR HY000: Lost connection to MySQL server during query
# State after crash (before recovery)
#sql-t1.frm
#sql-t1.par
+db.opt
t1.frm
t1.par
# State after crash recovery
+db.opt
t1.frm
t1.par
SHOW CREATE TABLE t1;
@@ -2038,8 +2123,8 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (a)
-(PARTITION p0 VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB) */
+ PARTITION BY LIST (`a`)
+(PARTITION `p0` VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -2056,6 +2141,7 @@ PARTITION BY LIST (a)
PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19));
INSERT INTO t1 VALUES (1, "Original from partition p0"), (2, "Original from partition p0"), (3, "Original from partition p0"), (4, "Original from partition p0"), (11, "Original from partition p1"), (12, "Original from partition p1"), (13, "Original from partition p1"), (14, "Original from partition p1");
# State before crash
+db.opt
t1.frm
t1.par
SHOW CREATE TABLE t1;
@@ -2064,9 +2150,9 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (a)
-(PARTITION p0 VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
- PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB) */
+ PARTITION BY LIST (`a`)
+(PARTITION `p0` VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
+ PARTITION `p10` VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -2082,9 +2168,11 @@ ERROR HY000: Lost connection to MySQL server during query
# State after crash (before recovery)
#sql-t1.frm
#sql-t1.par
+db.opt
t1.frm
t1.par
# State after crash recovery
+db.opt
t1.frm
t1.par
SHOW CREATE TABLE t1;
@@ -2093,8 +2181,8 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (a)
-(PARTITION p0 VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB) */
+ PARTITION BY LIST (`a`)
+(PARTITION `p0` VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -2111,6 +2199,7 @@ PARTITION BY LIST (a)
PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19));
INSERT INTO t1 VALUES (1, "Original from partition p0"), (2, "Original from partition p0"), (3, "Original from partition p0"), (4, "Original from partition p0"), (11, "Original from partition p1"), (12, "Original from partition p1"), (13, "Original from partition p1"), (14, "Original from partition p1");
# State before crash
+db.opt
t1.frm
t1.par
SHOW CREATE TABLE t1;
@@ -2119,9 +2208,9 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (a)
-(PARTITION p0 VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
- PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB) */
+ PARTITION BY LIST (`a`)
+(PARTITION `p0` VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
+ PARTITION `p10` VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -2135,9 +2224,11 @@ a b
ALTER TABLE t1 DROP PARTITION p10;
ERROR HY000: Lost connection to MySQL server during query
# State after crash (before recovery)
+db.opt
t1.frm
t1.par
# State after crash recovery
+db.opt
t1.frm
t1.par
SHOW CREATE TABLE t1;
@@ -2146,8 +2237,8 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (a)
-(PARTITION p0 VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB) */
+ PARTITION BY LIST (`a`)
+(PARTITION `p0` VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -2164,6 +2255,7 @@ PARTITION BY LIST (a)
PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19));
INSERT INTO t1 VALUES (1, "Original from partition p0"), (2, "Original from partition p0"), (3, "Original from partition p0"), (4, "Original from partition p0"), (11, "Original from partition p1"), (12, "Original from partition p1"), (13, "Original from partition p1"), (14, "Original from partition p1");
# State before crash
+db.opt
t1.frm
t1.par
SHOW CREATE TABLE t1;
@@ -2172,9 +2264,9 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (a)
-(PARTITION p0 VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
- PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB) */
+ PARTITION BY LIST (`a`)
+(PARTITION `p0` VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
+ PARTITION `p10` VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -2188,9 +2280,11 @@ a b
ALTER TABLE t1 DROP PARTITION p10;
ERROR HY000: Lost connection to MySQL server during query
# State after crash (before recovery)
+db.opt
t1.frm
t1.par
# State after crash recovery
+db.opt
t1.frm
t1.par
SHOW CREATE TABLE t1;
@@ -2199,8 +2293,8 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (a)
-(PARTITION p0 VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB) */
+ PARTITION BY LIST (`a`)
+(PARTITION `p0` VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -2217,6 +2311,7 @@ PARTITION BY LIST (a)
PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19));
INSERT INTO t1 VALUES (1, "Original from partition p0"), (2, "Original from partition p0"), (3, "Original from partition p0"), (4, "Original from partition p0"), (11, "Original from partition p1"), (12, "Original from partition p1"), (13, "Original from partition p1"), (14, "Original from partition p1");
# State before crash
+db.opt
t1.frm
t1.par
SHOW CREATE TABLE t1;
@@ -2225,9 +2320,9 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (a)
-(PARTITION p0 VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
- PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB) */
+ PARTITION BY LIST (`a`)
+(PARTITION `p0` VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
+ PARTITION `p10` VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -2241,9 +2336,11 @@ a b
ALTER TABLE t1 DROP PARTITION p10;
ERROR HY000: Lost connection to MySQL server during query
# State after crash (before recovery)
+db.opt
t1.frm
t1.par
# State after crash recovery
+db.opt
t1.frm
t1.par
SHOW CREATE TABLE t1;
@@ -2252,8 +2349,8 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (a)
-(PARTITION p0 VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB) */
+ PARTITION BY LIST (`a`)
+(PARTITION `p0` VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -2272,6 +2369,7 @@ PARTITION BY LIST (a)
PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19));
INSERT INTO t1 VALUES (1, "Original from partition p0"), (2, "Original from partition p0"), (3, "Original from partition p0"), (4, "Original from partition p0"), (11, "Original from partition p1"), (12, "Original from partition p1"), (13, "Original from partition p1"), (14, "Original from partition p1");
# State before failure
+db.opt
t1.frm
t1.par
SHOW CREATE TABLE t1;
@@ -2280,9 +2378,9 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (a)
-(PARTITION p0 VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
- PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB) */
+ PARTITION BY LIST (`a`)
+(PARTITION `p0` VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
+ PARTITION `p10` VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -2296,6 +2394,7 @@ a b
ALTER TABLE t1 DROP PARTITION p10;
ERROR HY000: Unknown error
# State after failure
+db.opt
t1.frm
t1.par
SHOW CREATE TABLE t1;
@@ -2304,9 +2403,9 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (a)
-(PARTITION p0 VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
- PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB) */
+ PARTITION BY LIST (`a`)
+(PARTITION `p0` VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
+ PARTITION `p10` VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -2326,6 +2425,7 @@ PARTITION BY LIST (a)
PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19));
INSERT INTO t1 VALUES (1, "Original from partition p0"), (2, "Original from partition p0"), (3, "Original from partition p0"), (4, "Original from partition p0"), (11, "Original from partition p1"), (12, "Original from partition p1"), (13, "Original from partition p1"), (14, "Original from partition p1");
# State before failure
+db.opt
t1.frm
t1.par
SHOW CREATE TABLE t1;
@@ -2334,9 +2434,9 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (a)
-(PARTITION p0 VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
- PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB) */
+ PARTITION BY LIST (`a`)
+(PARTITION `p0` VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
+ PARTITION `p10` VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -2351,6 +2451,7 @@ LOCK TABLE t1 WRITE;
ALTER TABLE t1 DROP PARTITION p10;
ERROR HY000: Unknown error
# State after failure
+db.opt
t1.frm
t1.par
SHOW CREATE TABLE t1;
@@ -2359,9 +2460,9 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (a)
-(PARTITION p0 VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
- PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB) */
+ PARTITION BY LIST (`a`)
+(PARTITION `p0` VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
+ PARTITION `p10` VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -2383,6 +2484,7 @@ PARTITION BY LIST (a)
PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19));
INSERT INTO t1 VALUES (1, "Original from partition p0"), (2, "Original from partition p0"), (3, "Original from partition p0"), (4, "Original from partition p0"), (11, "Original from partition p1"), (12, "Original from partition p1"), (13, "Original from partition p1"), (14, "Original from partition p1");
# State before failure
+db.opt
t1.frm
t1.par
SHOW CREATE TABLE t1;
@@ -2391,9 +2493,9 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (a)
-(PARTITION p0 VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
- PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB) */
+ PARTITION BY LIST (`a`)
+(PARTITION `p0` VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
+ PARTITION `p10` VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -2407,6 +2509,7 @@ a b
ALTER TABLE t1 DROP PARTITION p10;
ERROR HY000: Unknown error
# State after failure
+db.opt
t1.frm
t1.par
SHOW CREATE TABLE t1;
@@ -2415,9 +2518,9 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (a)
-(PARTITION p0 VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
- PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB) */
+ PARTITION BY LIST (`a`)
+(PARTITION `p0` VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
+ PARTITION `p10` VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -2437,6 +2540,7 @@ PARTITION BY LIST (a)
PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19));
INSERT INTO t1 VALUES (1, "Original from partition p0"), (2, "Original from partition p0"), (3, "Original from partition p0"), (4, "Original from partition p0"), (11, "Original from partition p1"), (12, "Original from partition p1"), (13, "Original from partition p1"), (14, "Original from partition p1");
# State before failure
+db.opt
t1.frm
t1.par
SHOW CREATE TABLE t1;
@@ -2445,9 +2549,9 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (a)
-(PARTITION p0 VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
- PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB) */
+ PARTITION BY LIST (`a`)
+(PARTITION `p0` VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
+ PARTITION `p10` VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -2462,6 +2566,7 @@ LOCK TABLE t1 WRITE;
ALTER TABLE t1 DROP PARTITION p10;
ERROR HY000: Unknown error
# State after failure
+db.opt
t1.frm
t1.par
SHOW CREATE TABLE t1;
@@ -2470,9 +2575,9 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (a)
-(PARTITION p0 VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
- PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB) */
+ PARTITION BY LIST (`a`)
+(PARTITION `p0` VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
+ PARTITION `p10` VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -2494,6 +2599,7 @@ PARTITION BY LIST (a)
PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19));
INSERT INTO t1 VALUES (1, "Original from partition p0"), (2, "Original from partition p0"), (3, "Original from partition p0"), (4, "Original from partition p0"), (11, "Original from partition p1"), (12, "Original from partition p1"), (13, "Original from partition p1"), (14, "Original from partition p1");
# State before failure
+db.opt
t1.frm
t1.par
SHOW CREATE TABLE t1;
@@ -2502,9 +2608,9 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (a)
-(PARTITION p0 VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
- PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB) */
+ PARTITION BY LIST (`a`)
+(PARTITION `p0` VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
+ PARTITION `p10` VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -2518,6 +2624,7 @@ a b
ALTER TABLE t1 DROP PARTITION p10;
ERROR HY000: Unknown error
# State after failure
+db.opt
t1.frm
t1.par
SHOW CREATE TABLE t1;
@@ -2526,9 +2633,9 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (a)
-(PARTITION p0 VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
- PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB) */
+ PARTITION BY LIST (`a`)
+(PARTITION `p0` VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
+ PARTITION `p10` VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -2548,6 +2655,7 @@ PARTITION BY LIST (a)
PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19));
INSERT INTO t1 VALUES (1, "Original from partition p0"), (2, "Original from partition p0"), (3, "Original from partition p0"), (4, "Original from partition p0"), (11, "Original from partition p1"), (12, "Original from partition p1"), (13, "Original from partition p1"), (14, "Original from partition p1");
# State before failure
+db.opt
t1.frm
t1.par
SHOW CREATE TABLE t1;
@@ -2556,9 +2664,9 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (a)
-(PARTITION p0 VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
- PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB) */
+ PARTITION BY LIST (`a`)
+(PARTITION `p0` VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
+ PARTITION `p10` VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -2573,6 +2681,7 @@ LOCK TABLE t1 WRITE;
ALTER TABLE t1 DROP PARTITION p10;
ERROR HY000: Unknown error
# State after failure
+db.opt
t1.frm
t1.par
SHOW CREATE TABLE t1;
@@ -2581,9 +2690,9 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (a)
-(PARTITION p0 VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
- PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB) */
+ PARTITION BY LIST (`a`)
+(PARTITION `p0` VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
+ PARTITION `p10` VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -2605,6 +2714,7 @@ PARTITION BY LIST (a)
PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19));
INSERT INTO t1 VALUES (1, "Original from partition p0"), (2, "Original from partition p0"), (3, "Original from partition p0"), (4, "Original from partition p0"), (11, "Original from partition p1"), (12, "Original from partition p1"), (13, "Original from partition p1"), (14, "Original from partition p1");
# State before failure
+db.opt
t1.frm
t1.par
SHOW CREATE TABLE t1;
@@ -2613,9 +2723,9 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (a)
-(PARTITION p0 VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
- PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB) */
+ PARTITION BY LIST (`a`)
+(PARTITION `p0` VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
+ PARTITION `p10` VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -2629,6 +2739,7 @@ a b
ALTER TABLE t1 DROP PARTITION p10;
ERROR HY000: Unknown error
# State after failure
+db.opt
t1.frm
t1.par
SHOW CREATE TABLE t1;
@@ -2637,8 +2748,8 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (a)
-(PARTITION p0 VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB) */
+ PARTITION BY LIST (`a`)
+(PARTITION `p0` VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -2654,6 +2765,7 @@ PARTITION BY LIST (a)
PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19));
INSERT INTO t1 VALUES (1, "Original from partition p0"), (2, "Original from partition p0"), (3, "Original from partition p0"), (4, "Original from partition p0"), (11, "Original from partition p1"), (12, "Original from partition p1"), (13, "Original from partition p1"), (14, "Original from partition p1");
# State before failure
+db.opt
t1.frm
t1.par
SHOW CREATE TABLE t1;
@@ -2662,9 +2774,9 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (a)
-(PARTITION p0 VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
- PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB) */
+ PARTITION BY LIST (`a`)
+(PARTITION `p0` VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
+ PARTITION `p10` VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -2679,6 +2791,7 @@ LOCK TABLE t1 WRITE;
ALTER TABLE t1 DROP PARTITION p10;
ERROR HY000: Unknown error
# State after failure
+db.opt
t1.frm
t1.par
SHOW CREATE TABLE t1;
@@ -2687,8 +2800,8 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (a)
-(PARTITION p0 VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB) */
+ PARTITION BY LIST (`a`)
+(PARTITION `p0` VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -2706,6 +2819,7 @@ PARTITION BY LIST (a)
PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19));
INSERT INTO t1 VALUES (1, "Original from partition p0"), (2, "Original from partition p0"), (3, "Original from partition p0"), (4, "Original from partition p0"), (11, "Original from partition p1"), (12, "Original from partition p1"), (13, "Original from partition p1"), (14, "Original from partition p1");
# State before failure
+db.opt
t1.frm
t1.par
SHOW CREATE TABLE t1;
@@ -2714,9 +2828,9 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (a)
-(PARTITION p0 VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
- PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB) */
+ PARTITION BY LIST (`a`)
+(PARTITION `p0` VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
+ PARTITION `p10` VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -2730,6 +2844,7 @@ a b
ALTER TABLE t1 DROP PARTITION p10;
ERROR HY000: Unknown error
# State after failure
+db.opt
t1.frm
t1.par
SHOW CREATE TABLE t1;
@@ -2738,8 +2853,8 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (a)
-(PARTITION p0 VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB) */
+ PARTITION BY LIST (`a`)
+(PARTITION `p0` VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -2755,6 +2870,7 @@ PARTITION BY LIST (a)
PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19));
INSERT INTO t1 VALUES (1, "Original from partition p0"), (2, "Original from partition p0"), (3, "Original from partition p0"), (4, "Original from partition p0"), (11, "Original from partition p1"), (12, "Original from partition p1"), (13, "Original from partition p1"), (14, "Original from partition p1");
# State before failure
+db.opt
t1.frm
t1.par
SHOW CREATE TABLE t1;
@@ -2763,9 +2879,9 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (a)
-(PARTITION p0 VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
- PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB) */
+ PARTITION BY LIST (`a`)
+(PARTITION `p0` VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
+ PARTITION `p10` VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -2780,6 +2896,7 @@ LOCK TABLE t1 WRITE;
ALTER TABLE t1 DROP PARTITION p10;
ERROR HY000: Unknown error
# State after failure
+db.opt
t1.frm
t1.par
SHOW CREATE TABLE t1;
@@ -2788,8 +2905,8 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (a)
-(PARTITION p0 VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB) */
+ PARTITION BY LIST (`a`)
+(PARTITION `p0` VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -2807,6 +2924,7 @@ PARTITION BY LIST (a)
PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19));
INSERT INTO t1 VALUES (1, "Original from partition p0"), (2, "Original from partition p0"), (3, "Original from partition p0"), (4, "Original from partition p0"), (11, "Original from partition p1"), (12, "Original from partition p1"), (13, "Original from partition p1"), (14, "Original from partition p1");
# State before failure
+db.opt
t1.frm
t1.par
SHOW CREATE TABLE t1;
@@ -2815,9 +2933,9 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (a)
-(PARTITION p0 VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
- PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB) */
+ PARTITION BY LIST (`a`)
+(PARTITION `p0` VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
+ PARTITION `p10` VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -2831,6 +2949,7 @@ a b
ALTER TABLE t1 DROP PARTITION p10;
ERROR HY000: Unknown error
# State after failure
+db.opt
t1.frm
t1.par
SHOW CREATE TABLE t1;
@@ -2839,8 +2958,8 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (a)
-(PARTITION p0 VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB) */
+ PARTITION BY LIST (`a`)
+(PARTITION `p0` VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -2856,6 +2975,7 @@ PARTITION BY LIST (a)
PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19));
INSERT INTO t1 VALUES (1, "Original from partition p0"), (2, "Original from partition p0"), (3, "Original from partition p0"), (4, "Original from partition p0"), (11, "Original from partition p1"), (12, "Original from partition p1"), (13, "Original from partition p1"), (14, "Original from partition p1");
# State before failure
+db.opt
t1.frm
t1.par
SHOW CREATE TABLE t1;
@@ -2864,9 +2984,9 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (a)
-(PARTITION p0 VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
- PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB) */
+ PARTITION BY LIST (`a`)
+(PARTITION `p0` VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
+ PARTITION `p10` VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -2881,6 +3001,7 @@ LOCK TABLE t1 WRITE;
ALTER TABLE t1 DROP PARTITION p10;
ERROR HY000: Unknown error
# State after failure
+db.opt
t1.frm
t1.par
SHOW CREATE TABLE t1;
@@ -2889,8 +3010,8 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (a)
-(PARTITION p0 VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB) */
+ PARTITION BY LIST (`a`)
+(PARTITION `p0` VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -2908,6 +3029,7 @@ PARTITION BY LIST (a)
PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19));
INSERT INTO t1 VALUES (1, "Original from partition p0"), (2, "Original from partition p0"), (3, "Original from partition p0"), (4, "Original from partition p0"), (11, "Original from partition p1"), (12, "Original from partition p1"), (13, "Original from partition p1"), (14, "Original from partition p1");
# State before failure
+db.opt
t1.frm
t1.par
SHOW CREATE TABLE t1;
@@ -2916,9 +3038,9 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (a)
-(PARTITION p0 VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
- PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB) */
+ PARTITION BY LIST (`a`)
+(PARTITION `p0` VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
+ PARTITION `p10` VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -2932,6 +3054,7 @@ a b
ALTER TABLE t1 DROP PARTITION p10;
ERROR HY000: Unknown error
# State after failure
+db.opt
t1.frm
t1.par
SHOW CREATE TABLE t1;
@@ -2940,8 +3063,8 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (a)
-(PARTITION p0 VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB) */
+ PARTITION BY LIST (`a`)
+(PARTITION `p0` VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -2957,6 +3080,7 @@ PARTITION BY LIST (a)
PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19));
INSERT INTO t1 VALUES (1, "Original from partition p0"), (2, "Original from partition p0"), (3, "Original from partition p0"), (4, "Original from partition p0"), (11, "Original from partition p1"), (12, "Original from partition p1"), (13, "Original from partition p1"), (14, "Original from partition p1");
# State before failure
+db.opt
t1.frm
t1.par
SHOW CREATE TABLE t1;
@@ -2965,9 +3089,9 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (a)
-(PARTITION p0 VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
- PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB) */
+ PARTITION BY LIST (`a`)
+(PARTITION `p0` VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
+ PARTITION `p10` VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -2982,6 +3106,7 @@ LOCK TABLE t1 WRITE;
ALTER TABLE t1 DROP PARTITION p10;
ERROR HY000: Unknown error
# State after failure
+db.opt
t1.frm
t1.par
SHOW CREATE TABLE t1;
@@ -2990,8 +3115,8 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (a)
-(PARTITION p0 VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB) */
+ PARTITION BY LIST (`a`)
+(PARTITION `p0` VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -3009,6 +3134,7 @@ PARTITION BY LIST (a)
PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19));
INSERT INTO t1 VALUES (1, "Original from partition p0"), (2, "Original from partition p0"), (3, "Original from partition p0"), (4, "Original from partition p0"), (11, "Original from partition p1"), (12, "Original from partition p1"), (13, "Original from partition p1"), (14, "Original from partition p1");
# State before failure
+db.opt
t1.frm
t1.par
SHOW CREATE TABLE t1;
@@ -3017,9 +3143,9 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (a)
-(PARTITION p0 VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
- PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB) */
+ PARTITION BY LIST (`a`)
+(PARTITION `p0` VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
+ PARTITION `p10` VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -3033,6 +3159,7 @@ a b
ALTER TABLE t1 DROP PARTITION p10;
ERROR HY000: Unknown error
# State after failure
+db.opt
t1.frm
t1.par
SHOW CREATE TABLE t1;
@@ -3041,8 +3168,8 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (a)
-(PARTITION p0 VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB) */
+ PARTITION BY LIST (`a`)
+(PARTITION `p0` VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -3058,6 +3185,7 @@ PARTITION BY LIST (a)
PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19));
INSERT INTO t1 VALUES (1, "Original from partition p0"), (2, "Original from partition p0"), (3, "Original from partition p0"), (4, "Original from partition p0"), (11, "Original from partition p1"), (12, "Original from partition p1"), (13, "Original from partition p1"), (14, "Original from partition p1");
# State before failure
+db.opt
t1.frm
t1.par
SHOW CREATE TABLE t1;
@@ -3066,9 +3194,9 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (a)
-(PARTITION p0 VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
- PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB) */
+ PARTITION BY LIST (`a`)
+(PARTITION `p0` VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
+ PARTITION `p10` VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -3083,6 +3211,7 @@ LOCK TABLE t1 WRITE;
ALTER TABLE t1 DROP PARTITION p10;
ERROR HY000: Unknown error
# State after failure
+db.opt
t1.frm
t1.par
SHOW CREATE TABLE t1;
@@ -3091,8 +3220,8 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (a)
-(PARTITION p0 VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB) */
+ PARTITION BY LIST (`a`)
+(PARTITION `p0` VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -3110,6 +3239,7 @@ PARTITION BY LIST (a)
PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19));
INSERT INTO t1 VALUES (1, "Original from partition p0"), (2, "Original from partition p0"), (3, "Original from partition p0"), (4, "Original from partition p0"), (11, "Original from partition p1"), (12, "Original from partition p1"), (13, "Original from partition p1"), (14, "Original from partition p1");
# State before failure
+db.opt
t1.frm
t1.par
SHOW CREATE TABLE t1;
@@ -3118,9 +3248,9 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (a)
-(PARTITION p0 VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
- PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB) */
+ PARTITION BY LIST (`a`)
+(PARTITION `p0` VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
+ PARTITION `p10` VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -3134,6 +3264,7 @@ a b
ALTER TABLE t1 DROP PARTITION p10;
ERROR HY000: Unknown error
# State after failure
+db.opt
t1.frm
t1.par
SHOW CREATE TABLE t1;
@@ -3142,8 +3273,8 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (a)
-(PARTITION p0 VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB) */
+ PARTITION BY LIST (`a`)
+(PARTITION `p0` VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -3159,6 +3290,7 @@ PARTITION BY LIST (a)
PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19));
INSERT INTO t1 VALUES (1, "Original from partition p0"), (2, "Original from partition p0"), (3, "Original from partition p0"), (4, "Original from partition p0"), (11, "Original from partition p1"), (12, "Original from partition p1"), (13, "Original from partition p1"), (14, "Original from partition p1");
# State before failure
+db.opt
t1.frm
t1.par
SHOW CREATE TABLE t1;
@@ -3167,9 +3299,9 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (a)
-(PARTITION p0 VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
- PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB) */
+ PARTITION BY LIST (`a`)
+(PARTITION `p0` VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
+ PARTITION `p10` VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -3184,6 +3316,7 @@ LOCK TABLE t1 WRITE;
ALTER TABLE t1 DROP PARTITION p10;
ERROR HY000: Unknown error
# State after failure
+db.opt
t1.frm
t1.par
SHOW CREATE TABLE t1;
@@ -3192,8 +3325,8 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (a)
-(PARTITION p0 VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB) */
+ PARTITION BY LIST (`a`)
+(PARTITION `p0` VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -3214,6 +3347,7 @@ PARTITION BY LIST (a)
PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19));
INSERT INTO t1 VALUES (1, "Original from partition p0"), (2, "Original from partition p0"), (3, "Original from partition p0"), (4, "Original from partition p0"), (11, "Original from partition p1"), (12, "Original from partition p1"), (13, "Original from partition p1"), (14, "Original from partition p1");
# State before crash
+db.opt
t1.frm
t1.par
SHOW CREATE TABLE t1;
@@ -3222,9 +3356,9 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (a)
-(PARTITION p0 VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
- PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB) */
+ PARTITION BY LIST (`a`)
+(PARTITION `p0` VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
+ PARTITION `p10` VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -3240,9 +3374,11 @@ ALTER TABLE t1 REORGANIZE PARTITION p10 INTO
PARTITION p20 VALUES IN (20,21,22,23,24,25,26,27,28,29));
ERROR HY000: Lost connection to MySQL server during query
# State after crash (before recovery)
+db.opt
t1.frm
t1.par
# State after crash recovery
+db.opt
t1.frm
t1.par
SHOW CREATE TABLE t1;
@@ -3251,9 +3387,9 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (a)
-(PARTITION p0 VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
- PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB) */
+ PARTITION BY LIST (`a`)
+(PARTITION `p0` VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
+ PARTITION `p10` VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -3274,6 +3410,7 @@ PARTITION BY LIST (a)
PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19));
INSERT INTO t1 VALUES (1, "Original from partition p0"), (2, "Original from partition p0"), (3, "Original from partition p0"), (4, "Original from partition p0"), (11, "Original from partition p1"), (12, "Original from partition p1"), (13, "Original from partition p1"), (14, "Original from partition p1");
# State before crash
+db.opt
t1.frm
t1.par
SHOW CREATE TABLE t1;
@@ -3282,9 +3419,9 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (a)
-(PARTITION p0 VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
- PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB) */
+ PARTITION BY LIST (`a`)
+(PARTITION `p0` VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
+ PARTITION `p10` VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -3302,9 +3439,11 @@ ERROR HY000: Lost connection to MySQL server during query
# State after crash (before recovery)
#sql-t1.frm
#sql-t1.par
+db.opt
t1.frm
t1.par
# State after crash recovery
+db.opt
t1.frm
t1.par
SHOW CREATE TABLE t1;
@@ -3313,9 +3452,9 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (a)
-(PARTITION p0 VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
- PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB) */
+ PARTITION BY LIST (`a`)
+(PARTITION `p0` VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
+ PARTITION `p10` VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -3336,6 +3475,7 @@ PARTITION BY LIST (a)
PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19));
INSERT INTO t1 VALUES (1, "Original from partition p0"), (2, "Original from partition p0"), (3, "Original from partition p0"), (4, "Original from partition p0"), (11, "Original from partition p1"), (12, "Original from partition p1"), (13, "Original from partition p1"), (14, "Original from partition p1");
# State before crash
+db.opt
t1.frm
t1.par
SHOW CREATE TABLE t1;
@@ -3344,9 +3484,9 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (a)
-(PARTITION p0 VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
- PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB) */
+ PARTITION BY LIST (`a`)
+(PARTITION `p0` VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
+ PARTITION `p10` VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -3364,9 +3504,11 @@ ERROR HY000: Lost connection to MySQL server during query
# State after crash (before recovery)
#sql-t1.frm
#sql-t1.par
+db.opt
t1.frm
t1.par
# State after crash recovery
+db.opt
t1.frm
t1.par
SHOW CREATE TABLE t1;
@@ -3375,9 +3517,9 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (a)
-(PARTITION p0 VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
- PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB) */
+ PARTITION BY LIST (`a`)
+(PARTITION `p0` VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
+ PARTITION `p10` VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -3398,6 +3540,7 @@ PARTITION BY LIST (a)
PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19));
INSERT INTO t1 VALUES (1, "Original from partition p0"), (2, "Original from partition p0"), (3, "Original from partition p0"), (4, "Original from partition p0"), (11, "Original from partition p1"), (12, "Original from partition p1"), (13, "Original from partition p1"), (14, "Original from partition p1");
# State before crash
+db.opt
t1.frm
t1.par
SHOW CREATE TABLE t1;
@@ -3406,9 +3549,9 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (a)
-(PARTITION p0 VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
- PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB) */
+ PARTITION BY LIST (`a`)
+(PARTITION `p0` VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
+ PARTITION `p10` VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -3426,9 +3569,11 @@ ERROR HY000: Lost connection to MySQL server during query
# State after crash (before recovery)
#sql-t1.frm
#sql-t1.par
+db.opt
t1.frm
t1.par
# State after crash recovery
+db.opt
t1.frm
t1.par
SHOW CREATE TABLE t1;
@@ -3437,9 +3582,9 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (a)
-(PARTITION p0 VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
- PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB) */
+ PARTITION BY LIST (`a`)
+(PARTITION `p0` VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
+ PARTITION `p10` VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -3460,6 +3605,7 @@ PARTITION BY LIST (a)
PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19));
INSERT INTO t1 VALUES (1, "Original from partition p0"), (2, "Original from partition p0"), (3, "Original from partition p0"), (4, "Original from partition p0"), (11, "Original from partition p1"), (12, "Original from partition p1"), (13, "Original from partition p1"), (14, "Original from partition p1");
# State before crash
+db.opt
t1.frm
t1.par
SHOW CREATE TABLE t1;
@@ -3468,9 +3614,9 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (a)
-(PARTITION p0 VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
- PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB) */
+ PARTITION BY LIST (`a`)
+(PARTITION `p0` VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
+ PARTITION `p10` VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -3488,9 +3634,11 @@ ERROR HY000: Lost connection to MySQL server during query
# State after crash (before recovery)
#sql-t1.frm
#sql-t1.par
+db.opt
t1.frm
t1.par
# State after crash recovery
+db.opt
t1.frm
t1.par
SHOW CREATE TABLE t1;
@@ -3499,9 +3647,9 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (a)
-(PARTITION p0 VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
- PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB) */
+ PARTITION BY LIST (`a`)
+(PARTITION `p0` VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
+ PARTITION `p10` VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -3522,6 +3670,7 @@ PARTITION BY LIST (a)
PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19));
INSERT INTO t1 VALUES (1, "Original from partition p0"), (2, "Original from partition p0"), (3, "Original from partition p0"), (4, "Original from partition p0"), (11, "Original from partition p1"), (12, "Original from partition p1"), (13, "Original from partition p1"), (14, "Original from partition p1");
# State before crash
+db.opt
t1.frm
t1.par
SHOW CREATE TABLE t1;
@@ -3530,9 +3679,9 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (a)
-(PARTITION p0 VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
- PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB) */
+ PARTITION BY LIST (`a`)
+(PARTITION `p0` VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
+ PARTITION `p10` VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -3550,9 +3699,11 @@ ERROR HY000: Lost connection to MySQL server during query
# State after crash (before recovery)
#sql-t1.frm
#sql-t1.par
+db.opt
t1.frm
t1.par
# State after crash recovery
+db.opt
t1.frm
t1.par
SHOW CREATE TABLE t1;
@@ -3561,9 +3712,9 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (a)
-(PARTITION p0 VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
- PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB) */
+ PARTITION BY LIST (`a`)
+(PARTITION `p0` VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
+ PARTITION `p10` VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -3584,6 +3735,7 @@ PARTITION BY LIST (a)
PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19));
INSERT INTO t1 VALUES (1, "Original from partition p0"), (2, "Original from partition p0"), (3, "Original from partition p0"), (4, "Original from partition p0"), (11, "Original from partition p1"), (12, "Original from partition p1"), (13, "Original from partition p1"), (14, "Original from partition p1");
# State before crash
+db.opt
t1.frm
t1.par
SHOW CREATE TABLE t1;
@@ -3592,9 +3744,9 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (a)
-(PARTITION p0 VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
- PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB) */
+ PARTITION BY LIST (`a`)
+(PARTITION `p0` VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
+ PARTITION `p10` VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -3612,9 +3764,11 @@ ERROR HY000: Lost connection to MySQL server during query
# State after crash (before recovery)
#sql-t1.frm
#sql-t1.par
+db.opt
t1.frm
t1.par
# State after crash recovery
+db.opt
t1.frm
t1.par
SHOW CREATE TABLE t1;
@@ -3623,10 +3777,10 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (a)
-(PARTITION p0 VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
- PARTITION p10 VALUES IN (10,11,12,13,14,15,16,17,18,19) ENGINE = TokuDB,
- PARTITION p20 VALUES IN (20,21,22,23,24,25,26,27,28,29) ENGINE = TokuDB) */
+ PARTITION BY LIST (`a`)
+(PARTITION `p0` VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
+ PARTITION `p10` VALUES IN (10,11,12,13,14,15,16,17,18,19) ENGINE = TokuDB,
+ PARTITION `p20` VALUES IN (20,21,22,23,24,25,26,27,28,29) ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -3647,6 +3801,7 @@ PARTITION BY LIST (a)
PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19));
INSERT INTO t1 VALUES (1, "Original from partition p0"), (2, "Original from partition p0"), (3, "Original from partition p0"), (4, "Original from partition p0"), (11, "Original from partition p1"), (12, "Original from partition p1"), (13, "Original from partition p1"), (14, "Original from partition p1");
# State before crash
+db.opt
t1.frm
t1.par
SHOW CREATE TABLE t1;
@@ -3655,9 +3810,9 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (a)
-(PARTITION p0 VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
- PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB) */
+ PARTITION BY LIST (`a`)
+(PARTITION `p0` VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
+ PARTITION `p10` VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -3675,9 +3830,11 @@ ERROR HY000: Lost connection to MySQL server during query
# State after crash (before recovery)
#sql-t1.frm
#sql-t1.par
+db.opt
t1.frm
t1.par
# State after crash recovery
+db.opt
t1.frm
t1.par
SHOW CREATE TABLE t1;
@@ -3686,10 +3843,10 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (a)
-(PARTITION p0 VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
- PARTITION p10 VALUES IN (10,11,12,13,14,15,16,17,18,19) ENGINE = TokuDB,
- PARTITION p20 VALUES IN (20,21,22,23,24,25,26,27,28,29) ENGINE = TokuDB) */
+ PARTITION BY LIST (`a`)
+(PARTITION `p0` VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
+ PARTITION `p10` VALUES IN (10,11,12,13,14,15,16,17,18,19) ENGINE = TokuDB,
+ PARTITION `p20` VALUES IN (20,21,22,23,24,25,26,27,28,29) ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -3710,6 +3867,7 @@ PARTITION BY LIST (a)
PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19));
INSERT INTO t1 VALUES (1, "Original from partition p0"), (2, "Original from partition p0"), (3, "Original from partition p0"), (4, "Original from partition p0"), (11, "Original from partition p1"), (12, "Original from partition p1"), (13, "Original from partition p1"), (14, "Original from partition p1");
# State before crash
+db.opt
t1.frm
t1.par
SHOW CREATE TABLE t1;
@@ -3718,9 +3876,9 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (a)
-(PARTITION p0 VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
- PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB) */
+ PARTITION BY LIST (`a`)
+(PARTITION `p0` VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
+ PARTITION `p10` VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -3736,9 +3894,11 @@ ALTER TABLE t1 REORGANIZE PARTITION p10 INTO
PARTITION p20 VALUES IN (20,21,22,23,24,25,26,27,28,29));
ERROR HY000: Lost connection to MySQL server during query
# State after crash (before recovery)
+db.opt
t1.frm
t1.par
# State after crash recovery
+db.opt
t1.frm
t1.par
SHOW CREATE TABLE t1;
@@ -3747,10 +3907,10 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (a)
-(PARTITION p0 VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
- PARTITION p10 VALUES IN (10,11,12,13,14,15,16,17,18,19) ENGINE = TokuDB,
- PARTITION p20 VALUES IN (20,21,22,23,24,25,26,27,28,29) ENGINE = TokuDB) */
+ PARTITION BY LIST (`a`)
+(PARTITION `p0` VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
+ PARTITION `p10` VALUES IN (10,11,12,13,14,15,16,17,18,19) ENGINE = TokuDB,
+ PARTITION `p20` VALUES IN (20,21,22,23,24,25,26,27,28,29) ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -3771,6 +3931,7 @@ PARTITION BY LIST (a)
PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19));
INSERT INTO t1 VALUES (1, "Original from partition p0"), (2, "Original from partition p0"), (3, "Original from partition p0"), (4, "Original from partition p0"), (11, "Original from partition p1"), (12, "Original from partition p1"), (13, "Original from partition p1"), (14, "Original from partition p1");
# State before crash
+db.opt
t1.frm
t1.par
SHOW CREATE TABLE t1;
@@ -3779,9 +3940,9 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (a)
-(PARTITION p0 VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
- PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB) */
+ PARTITION BY LIST (`a`)
+(PARTITION `p0` VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
+ PARTITION `p10` VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -3797,9 +3958,11 @@ ALTER TABLE t1 REORGANIZE PARTITION p10 INTO
PARTITION p20 VALUES IN (20,21,22,23,24,25,26,27,28,29));
ERROR HY000: Lost connection to MySQL server during query
# State after crash (before recovery)
+db.opt
t1.frm
t1.par
# State after crash recovery
+db.opt
t1.frm
t1.par
SHOW CREATE TABLE t1;
@@ -3808,10 +3971,10 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (a)
-(PARTITION p0 VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
- PARTITION p10 VALUES IN (10,11,12,13,14,15,16,17,18,19) ENGINE = TokuDB,
- PARTITION p20 VALUES IN (20,21,22,23,24,25,26,27,28,29) ENGINE = TokuDB) */
+ PARTITION BY LIST (`a`)
+(PARTITION `p0` VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
+ PARTITION `p10` VALUES IN (10,11,12,13,14,15,16,17,18,19) ENGINE = TokuDB,
+ PARTITION `p20` VALUES IN (20,21,22,23,24,25,26,27,28,29) ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -3832,6 +3995,7 @@ PARTITION BY LIST (a)
PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19));
INSERT INTO t1 VALUES (1, "Original from partition p0"), (2, "Original from partition p0"), (3, "Original from partition p0"), (4, "Original from partition p0"), (11, "Original from partition p1"), (12, "Original from partition p1"), (13, "Original from partition p1"), (14, "Original from partition p1");
# State before crash
+db.opt
t1.frm
t1.par
SHOW CREATE TABLE t1;
@@ -3840,9 +4004,9 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (a)
-(PARTITION p0 VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
- PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB) */
+ PARTITION BY LIST (`a`)
+(PARTITION `p0` VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
+ PARTITION `p10` VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -3858,9 +4022,11 @@ ALTER TABLE t1 REORGANIZE PARTITION p10 INTO
PARTITION p20 VALUES IN (20,21,22,23,24,25,26,27,28,29));
ERROR HY000: Lost connection to MySQL server during query
# State after crash (before recovery)
+db.opt
t1.frm
t1.par
# State after crash recovery
+db.opt
t1.frm
t1.par
SHOW CREATE TABLE t1;
@@ -3869,10 +4035,10 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (a)
-(PARTITION p0 VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
- PARTITION p10 VALUES IN (10,11,12,13,14,15,16,17,18,19) ENGINE = TokuDB,
- PARTITION p20 VALUES IN (20,21,22,23,24,25,26,27,28,29) ENGINE = TokuDB) */
+ PARTITION BY LIST (`a`)
+(PARTITION `p0` VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
+ PARTITION `p10` VALUES IN (10,11,12,13,14,15,16,17,18,19) ENGINE = TokuDB,
+ PARTITION `p20` VALUES IN (20,21,22,23,24,25,26,27,28,29) ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -3893,6 +4059,7 @@ PARTITION BY LIST (a)
PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19));
INSERT INTO t1 VALUES (1, "Original from partition p0"), (2, "Original from partition p0"), (3, "Original from partition p0"), (4, "Original from partition p0"), (11, "Original from partition p1"), (12, "Original from partition p1"), (13, "Original from partition p1"), (14, "Original from partition p1");
# State before crash
+db.opt
t1.frm
t1.par
SHOW CREATE TABLE t1;
@@ -3901,9 +4068,9 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (a)
-(PARTITION p0 VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
- PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB) */
+ PARTITION BY LIST (`a`)
+(PARTITION `p0` VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
+ PARTITION `p10` VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -3919,9 +4086,11 @@ ALTER TABLE t1 REORGANIZE PARTITION p10 INTO
PARTITION p20 VALUES IN (20,21,22,23,24,25,26,27,28,29));
ERROR HY000: Lost connection to MySQL server during query
# State after crash (before recovery)
+db.opt
t1.frm
t1.par
# State after crash recovery
+db.opt
t1.frm
t1.par
SHOW CREATE TABLE t1;
@@ -3930,10 +4099,10 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (a)
-(PARTITION p0 VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
- PARTITION p10 VALUES IN (10,11,12,13,14,15,16,17,18,19) ENGINE = TokuDB,
- PARTITION p20 VALUES IN (20,21,22,23,24,25,26,27,28,29) ENGINE = TokuDB) */
+ PARTITION BY LIST (`a`)
+(PARTITION `p0` VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
+ PARTITION `p10` VALUES IN (10,11,12,13,14,15,16,17,18,19) ENGINE = TokuDB,
+ PARTITION `p20` VALUES IN (20,21,22,23,24,25,26,27,28,29) ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -3957,6 +4126,7 @@ PARTITION BY LIST (a)
PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19));
INSERT INTO t1 VALUES (1, "Original from partition p0"), (2, "Original from partition p0"), (3, "Original from partition p0"), (4, "Original from partition p0"), (11, "Original from partition p1"), (12, "Original from partition p1"), (13, "Original from partition p1"), (14, "Original from partition p1");
# State before failure
+db.opt
t1.frm
t1.par
SHOW CREATE TABLE t1;
@@ -3965,9 +4135,9 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (a)
-(PARTITION p0 VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
- PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB) */
+ PARTITION BY LIST (`a`)
+(PARTITION `p0` VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
+ PARTITION `p10` VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -3983,6 +4153,7 @@ ALTER TABLE t1 REORGANIZE PARTITION p10 INTO
PARTITION p20 VALUES IN (20,21,22,23,24,25,26,27,28,29));
ERROR HY000: Unknown error
# State after failure
+db.opt
t1.frm
t1.par
SHOW CREATE TABLE t1;
@@ -3991,9 +4162,9 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (a)
-(PARTITION p0 VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
- PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB) */
+ PARTITION BY LIST (`a`)
+(PARTITION `p0` VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
+ PARTITION `p10` VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -4013,6 +4184,7 @@ PARTITION BY LIST (a)
PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19));
INSERT INTO t1 VALUES (1, "Original from partition p0"), (2, "Original from partition p0"), (3, "Original from partition p0"), (4, "Original from partition p0"), (11, "Original from partition p1"), (12, "Original from partition p1"), (13, "Original from partition p1"), (14, "Original from partition p1");
# State before failure
+db.opt
t1.frm
t1.par
SHOW CREATE TABLE t1;
@@ -4021,9 +4193,9 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (a)
-(PARTITION p0 VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
- PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB) */
+ PARTITION BY LIST (`a`)
+(PARTITION `p0` VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
+ PARTITION `p10` VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -4040,6 +4212,7 @@ ALTER TABLE t1 REORGANIZE PARTITION p10 INTO
PARTITION p20 VALUES IN (20,21,22,23,24,25,26,27,28,29));
ERROR HY000: Unknown error
# State after failure
+db.opt
t1.frm
t1.par
SHOW CREATE TABLE t1;
@@ -4048,9 +4221,9 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (a)
-(PARTITION p0 VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
- PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB) */
+ PARTITION BY LIST (`a`)
+(PARTITION `p0` VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
+ PARTITION `p10` VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -4072,6 +4245,7 @@ PARTITION BY LIST (a)
PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19));
INSERT INTO t1 VALUES (1, "Original from partition p0"), (2, "Original from partition p0"), (3, "Original from partition p0"), (4, "Original from partition p0"), (11, "Original from partition p1"), (12, "Original from partition p1"), (13, "Original from partition p1"), (14, "Original from partition p1");
# State before failure
+db.opt
t1.frm
t1.par
SHOW CREATE TABLE t1;
@@ -4080,9 +4254,9 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (a)
-(PARTITION p0 VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
- PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB) */
+ PARTITION BY LIST (`a`)
+(PARTITION `p0` VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
+ PARTITION `p10` VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -4098,6 +4272,7 @@ ALTER TABLE t1 REORGANIZE PARTITION p10 INTO
PARTITION p20 VALUES IN (20,21,22,23,24,25,26,27,28,29));
ERROR HY000: Unknown error
# State after failure
+db.opt
t1.frm
t1.par
SHOW CREATE TABLE t1;
@@ -4106,9 +4281,9 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (a)
-(PARTITION p0 VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
- PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB) */
+ PARTITION BY LIST (`a`)
+(PARTITION `p0` VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
+ PARTITION `p10` VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -4128,6 +4303,7 @@ PARTITION BY LIST (a)
PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19));
INSERT INTO t1 VALUES (1, "Original from partition p0"), (2, "Original from partition p0"), (3, "Original from partition p0"), (4, "Original from partition p0"), (11, "Original from partition p1"), (12, "Original from partition p1"), (13, "Original from partition p1"), (14, "Original from partition p1");
# State before failure
+db.opt
t1.frm
t1.par
SHOW CREATE TABLE t1;
@@ -4136,9 +4312,9 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (a)
-(PARTITION p0 VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
- PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB) */
+ PARTITION BY LIST (`a`)
+(PARTITION `p0` VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
+ PARTITION `p10` VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -4155,6 +4331,7 @@ ALTER TABLE t1 REORGANIZE PARTITION p10 INTO
PARTITION p20 VALUES IN (20,21,22,23,24,25,26,27,28,29));
ERROR HY000: Unknown error
# State after failure
+db.opt
t1.frm
t1.par
SHOW CREATE TABLE t1;
@@ -4163,9 +4340,9 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (a)
-(PARTITION p0 VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
- PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB) */
+ PARTITION BY LIST (`a`)
+(PARTITION `p0` VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
+ PARTITION `p10` VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -4187,6 +4364,7 @@ PARTITION BY LIST (a)
PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19));
INSERT INTO t1 VALUES (1, "Original from partition p0"), (2, "Original from partition p0"), (3, "Original from partition p0"), (4, "Original from partition p0"), (11, "Original from partition p1"), (12, "Original from partition p1"), (13, "Original from partition p1"), (14, "Original from partition p1");
# State before failure
+db.opt
t1.frm
t1.par
SHOW CREATE TABLE t1;
@@ -4195,9 +4373,9 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (a)
-(PARTITION p0 VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
- PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB) */
+ PARTITION BY LIST (`a`)
+(PARTITION `p0` VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
+ PARTITION `p10` VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -4213,6 +4391,7 @@ ALTER TABLE t1 REORGANIZE PARTITION p10 INTO
PARTITION p20 VALUES IN (20,21,22,23,24,25,26,27,28,29));
ERROR HY000: Unknown error
# State after failure
+db.opt
t1.frm
t1.par
SHOW CREATE TABLE t1;
@@ -4221,9 +4400,9 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (a)
-(PARTITION p0 VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
- PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB) */
+ PARTITION BY LIST (`a`)
+(PARTITION `p0` VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
+ PARTITION `p10` VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -4243,6 +4422,7 @@ PARTITION BY LIST (a)
PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19));
INSERT INTO t1 VALUES (1, "Original from partition p0"), (2, "Original from partition p0"), (3, "Original from partition p0"), (4, "Original from partition p0"), (11, "Original from partition p1"), (12, "Original from partition p1"), (13, "Original from partition p1"), (14, "Original from partition p1");
# State before failure
+db.opt
t1.frm
t1.par
SHOW CREATE TABLE t1;
@@ -4251,9 +4431,9 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (a)
-(PARTITION p0 VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
- PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB) */
+ PARTITION BY LIST (`a`)
+(PARTITION `p0` VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
+ PARTITION `p10` VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -4270,6 +4450,7 @@ ALTER TABLE t1 REORGANIZE PARTITION p10 INTO
PARTITION p20 VALUES IN (20,21,22,23,24,25,26,27,28,29));
ERROR HY000: Unknown error
# State after failure
+db.opt
t1.frm
t1.par
SHOW CREATE TABLE t1;
@@ -4278,9 +4459,9 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (a)
-(PARTITION p0 VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
- PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB) */
+ PARTITION BY LIST (`a`)
+(PARTITION `p0` VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
+ PARTITION `p10` VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -4302,6 +4483,7 @@ PARTITION BY LIST (a)
PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19));
INSERT INTO t1 VALUES (1, "Original from partition p0"), (2, "Original from partition p0"), (3, "Original from partition p0"), (4, "Original from partition p0"), (11, "Original from partition p1"), (12, "Original from partition p1"), (13, "Original from partition p1"), (14, "Original from partition p1");
# State before failure
+db.opt
t1.frm
t1.par
SHOW CREATE TABLE t1;
@@ -4310,9 +4492,9 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (a)
-(PARTITION p0 VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
- PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB) */
+ PARTITION BY LIST (`a`)
+(PARTITION `p0` VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
+ PARTITION `p10` VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -4328,6 +4510,7 @@ ALTER TABLE t1 REORGANIZE PARTITION p10 INTO
PARTITION p20 VALUES IN (20,21,22,23,24,25,26,27,28,29));
ERROR HY000: Unknown error
# State after failure
+db.opt
t1.frm
t1.par
SHOW CREATE TABLE t1;
@@ -4336,9 +4519,9 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (a)
-(PARTITION p0 VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
- PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB) */
+ PARTITION BY LIST (`a`)
+(PARTITION `p0` VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
+ PARTITION `p10` VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -4358,6 +4541,7 @@ PARTITION BY LIST (a)
PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19));
INSERT INTO t1 VALUES (1, "Original from partition p0"), (2, "Original from partition p0"), (3, "Original from partition p0"), (4, "Original from partition p0"), (11, "Original from partition p1"), (12, "Original from partition p1"), (13, "Original from partition p1"), (14, "Original from partition p1");
# State before failure
+db.opt
t1.frm
t1.par
SHOW CREATE TABLE t1;
@@ -4366,9 +4550,9 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (a)
-(PARTITION p0 VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
- PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB) */
+ PARTITION BY LIST (`a`)
+(PARTITION `p0` VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
+ PARTITION `p10` VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -4385,6 +4569,7 @@ ALTER TABLE t1 REORGANIZE PARTITION p10 INTO
PARTITION p20 VALUES IN (20,21,22,23,24,25,26,27,28,29));
ERROR HY000: Unknown error
# State after failure
+db.opt
t1.frm
t1.par
SHOW CREATE TABLE t1;
@@ -4393,9 +4578,9 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (a)
-(PARTITION p0 VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
- PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB) */
+ PARTITION BY LIST (`a`)
+(PARTITION `p0` VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
+ PARTITION `p10` VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -4417,6 +4602,7 @@ PARTITION BY LIST (a)
PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19));
INSERT INTO t1 VALUES (1, "Original from partition p0"), (2, "Original from partition p0"), (3, "Original from partition p0"), (4, "Original from partition p0"), (11, "Original from partition p1"), (12, "Original from partition p1"), (13, "Original from partition p1"), (14, "Original from partition p1");
# State before failure
+db.opt
t1.frm
t1.par
SHOW CREATE TABLE t1;
@@ -4425,9 +4611,9 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (a)
-(PARTITION p0 VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
- PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB) */
+ PARTITION BY LIST (`a`)
+(PARTITION `p0` VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
+ PARTITION `p10` VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -4443,6 +4629,7 @@ ALTER TABLE t1 REORGANIZE PARTITION p10 INTO
PARTITION p20 VALUES IN (20,21,22,23,24,25,26,27,28,29));
ERROR HY000: Unknown error
# State after failure
+db.opt
t1.frm
t1.par
SHOW CREATE TABLE t1;
@@ -4451,9 +4638,9 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (a)
-(PARTITION p0 VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
- PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB) */
+ PARTITION BY LIST (`a`)
+(PARTITION `p0` VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
+ PARTITION `p10` VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -4473,6 +4660,7 @@ PARTITION BY LIST (a)
PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19));
INSERT INTO t1 VALUES (1, "Original from partition p0"), (2, "Original from partition p0"), (3, "Original from partition p0"), (4, "Original from partition p0"), (11, "Original from partition p1"), (12, "Original from partition p1"), (13, "Original from partition p1"), (14, "Original from partition p1");
# State before failure
+db.opt
t1.frm
t1.par
SHOW CREATE TABLE t1;
@@ -4481,9 +4669,9 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (a)
-(PARTITION p0 VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
- PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB) */
+ PARTITION BY LIST (`a`)
+(PARTITION `p0` VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
+ PARTITION `p10` VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -4500,6 +4688,7 @@ ALTER TABLE t1 REORGANIZE PARTITION p10 INTO
PARTITION p20 VALUES IN (20,21,22,23,24,25,26,27,28,29));
ERROR HY000: Unknown error
# State after failure
+db.opt
t1.frm
t1.par
SHOW CREATE TABLE t1;
@@ -4508,9 +4697,9 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (a)
-(PARTITION p0 VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
- PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB) */
+ PARTITION BY LIST (`a`)
+(PARTITION `p0` VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
+ PARTITION `p10` VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -4532,6 +4721,7 @@ PARTITION BY LIST (a)
PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19));
INSERT INTO t1 VALUES (1, "Original from partition p0"), (2, "Original from partition p0"), (3, "Original from partition p0"), (4, "Original from partition p0"), (11, "Original from partition p1"), (12, "Original from partition p1"), (13, "Original from partition p1"), (14, "Original from partition p1");
# State before failure
+db.opt
t1.frm
t1.par
SHOW CREATE TABLE t1;
@@ -4540,9 +4730,9 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (a)
-(PARTITION p0 VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
- PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB) */
+ PARTITION BY LIST (`a`)
+(PARTITION `p0` VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
+ PARTITION `p10` VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -4558,6 +4748,7 @@ ALTER TABLE t1 REORGANIZE PARTITION p10 INTO
PARTITION p20 VALUES IN (20,21,22,23,24,25,26,27,28,29));
ERROR HY000: Unknown error
# State after failure
+db.opt
t1.frm
t1.par
SHOW CREATE TABLE t1;
@@ -4566,9 +4757,9 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (a)
-(PARTITION p0 VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
- PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB) */
+ PARTITION BY LIST (`a`)
+(PARTITION `p0` VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
+ PARTITION `p10` VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -4588,6 +4779,7 @@ PARTITION BY LIST (a)
PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19));
INSERT INTO t1 VALUES (1, "Original from partition p0"), (2, "Original from partition p0"), (3, "Original from partition p0"), (4, "Original from partition p0"), (11, "Original from partition p1"), (12, "Original from partition p1"), (13, "Original from partition p1"), (14, "Original from partition p1");
# State before failure
+db.opt
t1.frm
t1.par
SHOW CREATE TABLE t1;
@@ -4596,9 +4788,9 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (a)
-(PARTITION p0 VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
- PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB) */
+ PARTITION BY LIST (`a`)
+(PARTITION `p0` VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
+ PARTITION `p10` VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -4615,6 +4807,7 @@ ALTER TABLE t1 REORGANIZE PARTITION p10 INTO
PARTITION p20 VALUES IN (20,21,22,23,24,25,26,27,28,29));
ERROR HY000: Unknown error
# State after failure
+db.opt
t1.frm
t1.par
SHOW CREATE TABLE t1;
@@ -4623,9 +4816,9 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (a)
-(PARTITION p0 VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
- PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB) */
+ PARTITION BY LIST (`a`)
+(PARTITION `p0` VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
+ PARTITION `p10` VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -4647,6 +4840,7 @@ PARTITION BY LIST (a)
PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19));
INSERT INTO t1 VALUES (1, "Original from partition p0"), (2, "Original from partition p0"), (3, "Original from partition p0"), (4, "Original from partition p0"), (11, "Original from partition p1"), (12, "Original from partition p1"), (13, "Original from partition p1"), (14, "Original from partition p1");
# State before failure
+db.opt
t1.frm
t1.par
SHOW CREATE TABLE t1;
@@ -4655,9 +4849,9 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (a)
-(PARTITION p0 VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
- PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB) */
+ PARTITION BY LIST (`a`)
+(PARTITION `p0` VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
+ PARTITION `p10` VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -4673,6 +4867,7 @@ ALTER TABLE t1 REORGANIZE PARTITION p10 INTO
PARTITION p20 VALUES IN (20,21,22,23,24,25,26,27,28,29));
ERROR HY000: Unknown error
# State after failure
+db.opt
t1.frm
t1.par
SHOW CREATE TABLE t1;
@@ -4681,10 +4876,10 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (a)
-(PARTITION p0 VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
- PARTITION p10 VALUES IN (10,11,12,13,14,15,16,17,18,19) ENGINE = TokuDB,
- PARTITION p20 VALUES IN (20,21,22,23,24,25,26,27,28,29) ENGINE = TokuDB) */
+ PARTITION BY LIST (`a`)
+(PARTITION `p0` VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
+ PARTITION `p10` VALUES IN (10,11,12,13,14,15,16,17,18,19) ENGINE = TokuDB,
+ PARTITION `p20` VALUES IN (20,21,22,23,24,25,26,27,28,29) ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -4704,6 +4899,7 @@ PARTITION BY LIST (a)
PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19));
INSERT INTO t1 VALUES (1, "Original from partition p0"), (2, "Original from partition p0"), (3, "Original from partition p0"), (4, "Original from partition p0"), (11, "Original from partition p1"), (12, "Original from partition p1"), (13, "Original from partition p1"), (14, "Original from partition p1");
# State before failure
+db.opt
t1.frm
t1.par
SHOW CREATE TABLE t1;
@@ -4712,9 +4908,9 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (a)
-(PARTITION p0 VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
- PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB) */
+ PARTITION BY LIST (`a`)
+(PARTITION `p0` VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
+ PARTITION `p10` VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -4731,6 +4927,7 @@ ALTER TABLE t1 REORGANIZE PARTITION p10 INTO
PARTITION p20 VALUES IN (20,21,22,23,24,25,26,27,28,29));
ERROR HY000: Unknown error
# State after failure
+db.opt
t1.frm
t1.par
SHOW CREATE TABLE t1;
@@ -4739,10 +4936,10 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (a)
-(PARTITION p0 VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
- PARTITION p10 VALUES IN (10,11,12,13,14,15,16,17,18,19) ENGINE = TokuDB,
- PARTITION p20 VALUES IN (20,21,22,23,24,25,26,27,28,29) ENGINE = TokuDB) */
+ PARTITION BY LIST (`a`)
+(PARTITION `p0` VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
+ PARTITION `p10` VALUES IN (10,11,12,13,14,15,16,17,18,19) ENGINE = TokuDB,
+ PARTITION `p20` VALUES IN (20,21,22,23,24,25,26,27,28,29) ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -4764,6 +4961,7 @@ PARTITION BY LIST (a)
PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19));
INSERT INTO t1 VALUES (1, "Original from partition p0"), (2, "Original from partition p0"), (3, "Original from partition p0"), (4, "Original from partition p0"), (11, "Original from partition p1"), (12, "Original from partition p1"), (13, "Original from partition p1"), (14, "Original from partition p1");
# State before failure
+db.opt
t1.frm
t1.par
SHOW CREATE TABLE t1;
@@ -4772,9 +4970,9 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (a)
-(PARTITION p0 VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
- PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB) */
+ PARTITION BY LIST (`a`)
+(PARTITION `p0` VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
+ PARTITION `p10` VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -4790,6 +4988,7 @@ ALTER TABLE t1 REORGANIZE PARTITION p10 INTO
PARTITION p20 VALUES IN (20,21,22,23,24,25,26,27,28,29));
ERROR HY000: Unknown error
# State after failure
+db.opt
t1.frm
t1.par
SHOW CREATE TABLE t1;
@@ -4798,10 +4997,10 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (a)
-(PARTITION p0 VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
- PARTITION p10 VALUES IN (10,11,12,13,14,15,16,17,18,19) ENGINE = TokuDB,
- PARTITION p20 VALUES IN (20,21,22,23,24,25,26,27,28,29) ENGINE = TokuDB) */
+ PARTITION BY LIST (`a`)
+(PARTITION `p0` VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
+ PARTITION `p10` VALUES IN (10,11,12,13,14,15,16,17,18,19) ENGINE = TokuDB,
+ PARTITION `p20` VALUES IN (20,21,22,23,24,25,26,27,28,29) ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -4821,6 +5020,7 @@ PARTITION BY LIST (a)
PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19));
INSERT INTO t1 VALUES (1, "Original from partition p0"), (2, "Original from partition p0"), (3, "Original from partition p0"), (4, "Original from partition p0"), (11, "Original from partition p1"), (12, "Original from partition p1"), (13, "Original from partition p1"), (14, "Original from partition p1");
# State before failure
+db.opt
t1.frm
t1.par
SHOW CREATE TABLE t1;
@@ -4829,9 +5029,9 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (a)
-(PARTITION p0 VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
- PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB) */
+ PARTITION BY LIST (`a`)
+(PARTITION `p0` VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
+ PARTITION `p10` VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -4848,6 +5048,7 @@ ALTER TABLE t1 REORGANIZE PARTITION p10 INTO
PARTITION p20 VALUES IN (20,21,22,23,24,25,26,27,28,29));
ERROR HY000: Unknown error
# State after failure
+db.opt
t1.frm
t1.par
SHOW CREATE TABLE t1;
@@ -4856,10 +5057,10 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (a)
-(PARTITION p0 VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
- PARTITION p10 VALUES IN (10,11,12,13,14,15,16,17,18,19) ENGINE = TokuDB,
- PARTITION p20 VALUES IN (20,21,22,23,24,25,26,27,28,29) ENGINE = TokuDB) */
+ PARTITION BY LIST (`a`)
+(PARTITION `p0` VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
+ PARTITION `p10` VALUES IN (10,11,12,13,14,15,16,17,18,19) ENGINE = TokuDB,
+ PARTITION `p20` VALUES IN (20,21,22,23,24,25,26,27,28,29) ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -4881,6 +5082,7 @@ PARTITION BY LIST (a)
PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19));
INSERT INTO t1 VALUES (1, "Original from partition p0"), (2, "Original from partition p0"), (3, "Original from partition p0"), (4, "Original from partition p0"), (11, "Original from partition p1"), (12, "Original from partition p1"), (13, "Original from partition p1"), (14, "Original from partition p1");
# State before failure
+db.opt
t1.frm
t1.par
SHOW CREATE TABLE t1;
@@ -4889,9 +5091,9 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (a)
-(PARTITION p0 VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
- PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB) */
+ PARTITION BY LIST (`a`)
+(PARTITION `p0` VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
+ PARTITION `p10` VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -4907,6 +5109,7 @@ ALTER TABLE t1 REORGANIZE PARTITION p10 INTO
PARTITION p20 VALUES IN (20,21,22,23,24,25,26,27,28,29));
ERROR HY000: Unknown error
# State after failure
+db.opt
t1.frm
t1.par
SHOW CREATE TABLE t1;
@@ -4915,10 +5118,10 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (a)
-(PARTITION p0 VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
- PARTITION p10 VALUES IN (10,11,12,13,14,15,16,17,18,19) ENGINE = TokuDB,
- PARTITION p20 VALUES IN (20,21,22,23,24,25,26,27,28,29) ENGINE = TokuDB) */
+ PARTITION BY LIST (`a`)
+(PARTITION `p0` VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
+ PARTITION `p10` VALUES IN (10,11,12,13,14,15,16,17,18,19) ENGINE = TokuDB,
+ PARTITION `p20` VALUES IN (20,21,22,23,24,25,26,27,28,29) ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -4938,6 +5141,7 @@ PARTITION BY LIST (a)
PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19));
INSERT INTO t1 VALUES (1, "Original from partition p0"), (2, "Original from partition p0"), (3, "Original from partition p0"), (4, "Original from partition p0"), (11, "Original from partition p1"), (12, "Original from partition p1"), (13, "Original from partition p1"), (14, "Original from partition p1");
# State before failure
+db.opt
t1.frm
t1.par
SHOW CREATE TABLE t1;
@@ -4946,9 +5150,9 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (a)
-(PARTITION p0 VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
- PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB) */
+ PARTITION BY LIST (`a`)
+(PARTITION `p0` VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
+ PARTITION `p10` VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -4965,6 +5169,7 @@ ALTER TABLE t1 REORGANIZE PARTITION p10 INTO
PARTITION p20 VALUES IN (20,21,22,23,24,25,26,27,28,29));
ERROR HY000: Unknown error
# State after failure
+db.opt
t1.frm
t1.par
SHOW CREATE TABLE t1;
@@ -4973,10 +5178,10 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (a)
-(PARTITION p0 VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
- PARTITION p10 VALUES IN (10,11,12,13,14,15,16,17,18,19) ENGINE = TokuDB,
- PARTITION p20 VALUES IN (20,21,22,23,24,25,26,27,28,29) ENGINE = TokuDB) */
+ PARTITION BY LIST (`a`)
+(PARTITION `p0` VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
+ PARTITION `p10` VALUES IN (10,11,12,13,14,15,16,17,18,19) ENGINE = TokuDB,
+ PARTITION `p20` VALUES IN (20,21,22,23,24,25,26,27,28,29) ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -4998,6 +5203,7 @@ PARTITION BY LIST (a)
PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19));
INSERT INTO t1 VALUES (1, "Original from partition p0"), (2, "Original from partition p0"), (3, "Original from partition p0"), (4, "Original from partition p0"), (11, "Original from partition p1"), (12, "Original from partition p1"), (13, "Original from partition p1"), (14, "Original from partition p1");
# State before failure
+db.opt
t1.frm
t1.par
SHOW CREATE TABLE t1;
@@ -5006,9 +5212,9 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (a)
-(PARTITION p0 VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
- PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB) */
+ PARTITION BY LIST (`a`)
+(PARTITION `p0` VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
+ PARTITION `p10` VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -5024,6 +5230,7 @@ ALTER TABLE t1 REORGANIZE PARTITION p10 INTO
PARTITION p20 VALUES IN (20,21,22,23,24,25,26,27,28,29));
ERROR HY000: Unknown error
# State after failure
+db.opt
t1.frm
t1.par
SHOW CREATE TABLE t1;
@@ -5032,10 +5239,10 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (a)
-(PARTITION p0 VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
- PARTITION p10 VALUES IN (10,11,12,13,14,15,16,17,18,19) ENGINE = TokuDB,
- PARTITION p20 VALUES IN (20,21,22,23,24,25,26,27,28,29) ENGINE = TokuDB) */
+ PARTITION BY LIST (`a`)
+(PARTITION `p0` VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
+ PARTITION `p10` VALUES IN (10,11,12,13,14,15,16,17,18,19) ENGINE = TokuDB,
+ PARTITION `p20` VALUES IN (20,21,22,23,24,25,26,27,28,29) ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -5055,6 +5262,7 @@ PARTITION BY LIST (a)
PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19));
INSERT INTO t1 VALUES (1, "Original from partition p0"), (2, "Original from partition p0"), (3, "Original from partition p0"), (4, "Original from partition p0"), (11, "Original from partition p1"), (12, "Original from partition p1"), (13, "Original from partition p1"), (14, "Original from partition p1");
# State before failure
+db.opt
t1.frm
t1.par
SHOW CREATE TABLE t1;
@@ -5063,9 +5271,9 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (a)
-(PARTITION p0 VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
- PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB) */
+ PARTITION BY LIST (`a`)
+(PARTITION `p0` VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
+ PARTITION `p10` VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -5082,6 +5290,7 @@ ALTER TABLE t1 REORGANIZE PARTITION p10 INTO
PARTITION p20 VALUES IN (20,21,22,23,24,25,26,27,28,29));
ERROR HY000: Unknown error
# State after failure
+db.opt
t1.frm
t1.par
SHOW CREATE TABLE t1;
@@ -5090,10 +5299,10 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (a)
-(PARTITION p0 VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
- PARTITION p10 VALUES IN (10,11,12,13,14,15,16,17,18,19) ENGINE = TokuDB,
- PARTITION p20 VALUES IN (20,21,22,23,24,25,26,27,28,29) ENGINE = TokuDB) */
+ PARTITION BY LIST (`a`)
+(PARTITION `p0` VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
+ PARTITION `p10` VALUES IN (10,11,12,13,14,15,16,17,18,19) ENGINE = TokuDB,
+ PARTITION `p20` VALUES IN (20,21,22,23,24,25,26,27,28,29) ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -5115,6 +5324,7 @@ PARTITION BY LIST (a)
PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19));
INSERT INTO t1 VALUES (1, "Original from partition p0"), (2, "Original from partition p0"), (3, "Original from partition p0"), (4, "Original from partition p0"), (11, "Original from partition p1"), (12, "Original from partition p1"), (13, "Original from partition p1"), (14, "Original from partition p1");
# State before failure
+db.opt
t1.frm
t1.par
SHOW CREATE TABLE t1;
@@ -5123,9 +5333,9 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (a)
-(PARTITION p0 VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
- PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB) */
+ PARTITION BY LIST (`a`)
+(PARTITION `p0` VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
+ PARTITION `p10` VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -5141,6 +5351,7 @@ ALTER TABLE t1 REORGANIZE PARTITION p10 INTO
PARTITION p20 VALUES IN (20,21,22,23,24,25,26,27,28,29));
ERROR HY000: Unknown error
# State after failure
+db.opt
t1.frm
t1.par
SHOW CREATE TABLE t1;
@@ -5149,10 +5360,10 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (a)
-(PARTITION p0 VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
- PARTITION p10 VALUES IN (10,11,12,13,14,15,16,17,18,19) ENGINE = TokuDB,
- PARTITION p20 VALUES IN (20,21,22,23,24,25,26,27,28,29) ENGINE = TokuDB) */
+ PARTITION BY LIST (`a`)
+(PARTITION `p0` VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
+ PARTITION `p10` VALUES IN (10,11,12,13,14,15,16,17,18,19) ENGINE = TokuDB,
+ PARTITION `p20` VALUES IN (20,21,22,23,24,25,26,27,28,29) ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -5172,6 +5383,7 @@ PARTITION BY LIST (a)
PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19));
INSERT INTO t1 VALUES (1, "Original from partition p0"), (2, "Original from partition p0"), (3, "Original from partition p0"), (4, "Original from partition p0"), (11, "Original from partition p1"), (12, "Original from partition p1"), (13, "Original from partition p1"), (14, "Original from partition p1");
# State before failure
+db.opt
t1.frm
t1.par
SHOW CREATE TABLE t1;
@@ -5180,9 +5392,9 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (a)
-(PARTITION p0 VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
- PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB) */
+ PARTITION BY LIST (`a`)
+(PARTITION `p0` VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
+ PARTITION `p10` VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -5199,6 +5411,7 @@ ALTER TABLE t1 REORGANIZE PARTITION p10 INTO
PARTITION p20 VALUES IN (20,21,22,23,24,25,26,27,28,29));
ERROR HY000: Unknown error
# State after failure
+db.opt
t1.frm
t1.par
SHOW CREATE TABLE t1;
@@ -5207,10 +5420,10 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (a)
-(PARTITION p0 VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
- PARTITION p10 VALUES IN (10,11,12,13,14,15,16,17,18,19) ENGINE = TokuDB,
- PARTITION p20 VALUES IN (20,21,22,23,24,25,26,27,28,29) ENGINE = TokuDB) */
+ PARTITION BY LIST (`a`)
+(PARTITION `p0` VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
+ PARTITION `p10` VALUES IN (10,11,12,13,14,15,16,17,18,19) ENGINE = TokuDB,
+ PARTITION `p20` VALUES IN (20,21,22,23,24,25,26,27,28,29) ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -5232,6 +5445,7 @@ PARTITION BY LIST (a)
PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19));
INSERT INTO t1 VALUES (1, "Original from partition p0"), (2, "Original from partition p0"), (3, "Original from partition p0"), (4, "Original from partition p0"), (11, "Original from partition p1"), (12, "Original from partition p1"), (13, "Original from partition p1"), (14, "Original from partition p1");
# State before failure
+db.opt
t1.frm
t1.par
SHOW CREATE TABLE t1;
@@ -5240,9 +5454,9 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (a)
-(PARTITION p0 VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
- PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB) */
+ PARTITION BY LIST (`a`)
+(PARTITION `p0` VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
+ PARTITION `p10` VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -5258,6 +5472,7 @@ ALTER TABLE t1 REORGANIZE PARTITION p10 INTO
PARTITION p20 VALUES IN (20,21,22,23,24,25,26,27,28,29));
ERROR HY000: Unknown error
# State after failure
+db.opt
t1.frm
t1.par
SHOW CREATE TABLE t1;
@@ -5266,10 +5481,10 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (a)
-(PARTITION p0 VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
- PARTITION p10 VALUES IN (10,11,12,13,14,15,16,17,18,19) ENGINE = TokuDB,
- PARTITION p20 VALUES IN (20,21,22,23,24,25,26,27,28,29) ENGINE = TokuDB) */
+ PARTITION BY LIST (`a`)
+(PARTITION `p0` VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
+ PARTITION `p10` VALUES IN (10,11,12,13,14,15,16,17,18,19) ENGINE = TokuDB,
+ PARTITION `p20` VALUES IN (20,21,22,23,24,25,26,27,28,29) ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -5289,6 +5504,7 @@ PARTITION BY LIST (a)
PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19));
INSERT INTO t1 VALUES (1, "Original from partition p0"), (2, "Original from partition p0"), (3, "Original from partition p0"), (4, "Original from partition p0"), (11, "Original from partition p1"), (12, "Original from partition p1"), (13, "Original from partition p1"), (14, "Original from partition p1");
# State before failure
+db.opt
t1.frm
t1.par
SHOW CREATE TABLE t1;
@@ -5297,9 +5513,9 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (a)
-(PARTITION p0 VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
- PARTITION p10 VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB) */
+ PARTITION BY LIST (`a`)
+(PARTITION `p0` VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
+ PARTITION `p10` VALUES IN (11,12,13,14,15,16,17,18,19) ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -5316,6 +5532,7 @@ ALTER TABLE t1 REORGANIZE PARTITION p10 INTO
PARTITION p20 VALUES IN (20,21,22,23,24,25,26,27,28,29));
ERROR HY000: Unknown error
# State after failure
+db.opt
t1.frm
t1.par
SHOW CREATE TABLE t1;
@@ -5324,10 +5541,10 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (a)
-(PARTITION p0 VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
- PARTITION p10 VALUES IN (10,11,12,13,14,15,16,17,18,19) ENGINE = TokuDB,
- PARTITION p20 VALUES IN (20,21,22,23,24,25,26,27,28,29) ENGINE = TokuDB) */
+ PARTITION BY LIST (`a`)
+(PARTITION `p0` VALUES IN (0,1,2,3,4,5,6,7,8,9) ENGINE = TokuDB,
+ PARTITION `p10` VALUES IN (10,11,12,13,14,15,16,17,18,19) ENGINE = TokuDB,
+ PARTITION `p20` VALUES IN (20,21,22,23,24,25,26,27,28,29) ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -5370,6 +5587,7 @@ PARTITION BY RANGE (a)
PARTITION p1 VALUES LESS THAN MAXVALUE);
INSERT INTO t1 VALUES (1, "Original from partition p0"), (2, "Original from partition p0"), (3, "Original from partition p0"), (4, "Original from partition p0"), (11, "Original from partition p1"), (12, "Original from partition p1"), (13, "Original from partition p1"), (14, "Original from partition p1"), (21, "Original from partition p1"), (22, "Original from partition p1"), (23, "Original from partition p1"), (24, "Original from partition p1");
# State before crash
+db.opt
t1.frm
t1.par
t2.frm
@@ -5379,9 +5597,9 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (a)
-(PARTITION p0 VALUES LESS THAN (10) ENGINE = TokuDB,
- PARTITION p1 VALUES LESS THAN MAXVALUE ENGINE = TokuDB) */
+ PARTITION BY RANGE (`a`)
+(PARTITION `p0` VALUES LESS THAN (10) ENGINE = TokuDB,
+ PARTITION `p1` VALUES LESS THAN MAXVALUE ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -5399,10 +5617,12 @@ a b
ALTER TABLE t1 EXCHANGE PARTITION p0 WITH TABLE t2;
ERROR HY000: Lost connection to MySQL server during query
# State after crash (before recovery)
+db.opt
t1.frm
t1.par
t2.frm
# State after crash recovery
+db.opt
t1.frm
t1.par
t2.frm
@@ -5412,9 +5632,9 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (a)
-(PARTITION p0 VALUES LESS THAN (10) ENGINE = TokuDB,
- PARTITION p1 VALUES LESS THAN MAXVALUE ENGINE = TokuDB) */
+ PARTITION BY RANGE (`a`)
+(PARTITION `p0` VALUES LESS THAN (10) ENGINE = TokuDB,
+ PARTITION `p1` VALUES LESS THAN MAXVALUE ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -5466,6 +5686,7 @@ PARTITION BY RANGE (a)
PARTITION p1 VALUES LESS THAN MAXVALUE);
INSERT INTO t1 VALUES (1, "Original from partition p0"), (2, "Original from partition p0"), (3, "Original from partition p0"), (4, "Original from partition p0"), (11, "Original from partition p1"), (12, "Original from partition p1"), (13, "Original from partition p1"), (14, "Original from partition p1"), (21, "Original from partition p1"), (22, "Original from partition p1"), (23, "Original from partition p1"), (24, "Original from partition p1");
# State before crash
+db.opt
t1.frm
t1.par
t2.frm
@@ -5475,9 +5696,9 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (a)
-(PARTITION p0 VALUES LESS THAN (10) ENGINE = TokuDB,
- PARTITION p1 VALUES LESS THAN MAXVALUE ENGINE = TokuDB) */
+ PARTITION BY RANGE (`a`)
+(PARTITION `p0` VALUES LESS THAN (10) ENGINE = TokuDB,
+ PARTITION `p1` VALUES LESS THAN MAXVALUE ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -5495,10 +5716,12 @@ a b
ALTER TABLE t1 EXCHANGE PARTITION p0 WITH TABLE t2;
ERROR HY000: Lost connection to MySQL server during query
# State after crash (before recovery)
+db.opt
t1.frm
t1.par
t2.frm
# State after crash recovery
+db.opt
t1.frm
t1.par
t2.frm
@@ -5508,9 +5731,9 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (a)
-(PARTITION p0 VALUES LESS THAN (10) ENGINE = TokuDB,
- PARTITION p1 VALUES LESS THAN MAXVALUE ENGINE = TokuDB) */
+ PARTITION BY RANGE (`a`)
+(PARTITION `p0` VALUES LESS THAN (10) ENGINE = TokuDB,
+ PARTITION `p1` VALUES LESS THAN MAXVALUE ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -5562,6 +5785,7 @@ PARTITION BY RANGE (a)
PARTITION p1 VALUES LESS THAN MAXVALUE);
INSERT INTO t1 VALUES (1, "Original from partition p0"), (2, "Original from partition p0"), (3, "Original from partition p0"), (4, "Original from partition p0"), (11, "Original from partition p1"), (12, "Original from partition p1"), (13, "Original from partition p1"), (14, "Original from partition p1"), (21, "Original from partition p1"), (22, "Original from partition p1"), (23, "Original from partition p1"), (24, "Original from partition p1");
# State before crash
+db.opt
t1.frm
t1.par
t2.frm
@@ -5571,9 +5795,9 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (a)
-(PARTITION p0 VALUES LESS THAN (10) ENGINE = TokuDB,
- PARTITION p1 VALUES LESS THAN MAXVALUE ENGINE = TokuDB) */
+ PARTITION BY RANGE (`a`)
+(PARTITION `p0` VALUES LESS THAN (10) ENGINE = TokuDB,
+ PARTITION `p1` VALUES LESS THAN MAXVALUE ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -5591,10 +5815,12 @@ a b
ALTER TABLE t1 EXCHANGE PARTITION p0 WITH TABLE t2;
ERROR HY000: Lost connection to MySQL server during query
# State after crash (before recovery)
+db.opt
t1.frm
t1.par
t2.frm
# State after crash recovery
+db.opt
t1.frm
t1.par
t2.frm
@@ -5604,9 +5830,9 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (a)
-(PARTITION p0 VALUES LESS THAN (10) ENGINE = TokuDB,
- PARTITION p1 VALUES LESS THAN MAXVALUE ENGINE = TokuDB) */
+ PARTITION BY RANGE (`a`)
+(PARTITION `p0` VALUES LESS THAN (10) ENGINE = TokuDB,
+ PARTITION `p1` VALUES LESS THAN MAXVALUE ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -5658,6 +5884,7 @@ PARTITION BY RANGE (a)
PARTITION p1 VALUES LESS THAN MAXVALUE);
INSERT INTO t1 VALUES (1, "Original from partition p0"), (2, "Original from partition p0"), (3, "Original from partition p0"), (4, "Original from partition p0"), (11, "Original from partition p1"), (12, "Original from partition p1"), (13, "Original from partition p1"), (14, "Original from partition p1"), (21, "Original from partition p1"), (22, "Original from partition p1"), (23, "Original from partition p1"), (24, "Original from partition p1");
# State before crash
+db.opt
t1.frm
t1.par
t2.frm
@@ -5667,9 +5894,9 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (a)
-(PARTITION p0 VALUES LESS THAN (10) ENGINE = TokuDB,
- PARTITION p1 VALUES LESS THAN MAXVALUE ENGINE = TokuDB) */
+ PARTITION BY RANGE (`a`)
+(PARTITION `p0` VALUES LESS THAN (10) ENGINE = TokuDB,
+ PARTITION `p1` VALUES LESS THAN MAXVALUE ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -5687,10 +5914,12 @@ a b
ALTER TABLE t1 EXCHANGE PARTITION p0 WITH TABLE t2;
ERROR HY000: Lost connection to MySQL server during query
# State after crash (before recovery)
+db.opt
t1.frm
t1.par
t2.frm
# State after crash recovery
+db.opt
t1.frm
t1.par
t2.frm
@@ -5700,9 +5929,9 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (a)
-(PARTITION p0 VALUES LESS THAN (10) ENGINE = TokuDB,
- PARTITION p1 VALUES LESS THAN MAXVALUE ENGINE = TokuDB) */
+ PARTITION BY RANGE (`a`)
+(PARTITION `p0` VALUES LESS THAN (10) ENGINE = TokuDB,
+ PARTITION `p1` VALUES LESS THAN MAXVALUE ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -5754,6 +5983,7 @@ PARTITION BY RANGE (a)
PARTITION p1 VALUES LESS THAN MAXVALUE);
INSERT INTO t1 VALUES (1, "Original from partition p0"), (2, "Original from partition p0"), (3, "Original from partition p0"), (4, "Original from partition p0"), (11, "Original from partition p1"), (12, "Original from partition p1"), (13, "Original from partition p1"), (14, "Original from partition p1"), (21, "Original from partition p1"), (22, "Original from partition p1"), (23, "Original from partition p1"), (24, "Original from partition p1");
# State before crash
+db.opt
t1.frm
t1.par
t2.frm
@@ -5763,9 +5993,9 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (a)
-(PARTITION p0 VALUES LESS THAN (10) ENGINE = TokuDB,
- PARTITION p1 VALUES LESS THAN MAXVALUE ENGINE = TokuDB) */
+ PARTITION BY RANGE (`a`)
+(PARTITION `p0` VALUES LESS THAN (10) ENGINE = TokuDB,
+ PARTITION `p1` VALUES LESS THAN MAXVALUE ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -5783,10 +6013,12 @@ a b
ALTER TABLE t1 EXCHANGE PARTITION p0 WITH TABLE t2;
ERROR HY000: Lost connection to MySQL server during query
# State after crash (before recovery)
+db.opt
t1.frm
t1.par
t2.frm
# State after crash recovery
+db.opt
t1.frm
t1.par
t2.frm
@@ -5796,9 +6028,9 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (a)
-(PARTITION p0 VALUES LESS THAN (10) ENGINE = TokuDB,
- PARTITION p1 VALUES LESS THAN MAXVALUE ENGINE = TokuDB) */
+ PARTITION BY RANGE (`a`)
+(PARTITION `p0` VALUES LESS THAN (10) ENGINE = TokuDB,
+ PARTITION `p1` VALUES LESS THAN MAXVALUE ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -5850,6 +6082,7 @@ PARTITION BY RANGE (a)
PARTITION p1 VALUES LESS THAN MAXVALUE);
INSERT INTO t1 VALUES (1, "Original from partition p0"), (2, "Original from partition p0"), (3, "Original from partition p0"), (4, "Original from partition p0"), (11, "Original from partition p1"), (12, "Original from partition p1"), (13, "Original from partition p1"), (14, "Original from partition p1"), (21, "Original from partition p1"), (22, "Original from partition p1"), (23, "Original from partition p1"), (24, "Original from partition p1");
# State before crash
+db.opt
t1.frm
t1.par
t2.frm
@@ -5859,9 +6092,9 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (a)
-(PARTITION p0 VALUES LESS THAN (10) ENGINE = TokuDB,
- PARTITION p1 VALUES LESS THAN MAXVALUE ENGINE = TokuDB) */
+ PARTITION BY RANGE (`a`)
+(PARTITION `p0` VALUES LESS THAN (10) ENGINE = TokuDB,
+ PARTITION `p1` VALUES LESS THAN MAXVALUE ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -5879,10 +6112,12 @@ a b
ALTER TABLE t1 EXCHANGE PARTITION p0 WITH TABLE t2;
ERROR HY000: Lost connection to MySQL server during query
# State after crash (before recovery)
+db.opt
t1.frm
t1.par
t2.frm
# State after crash recovery
+db.opt
t1.frm
t1.par
t2.frm
@@ -5892,9 +6127,9 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (a)
-(PARTITION p0 VALUES LESS THAN (10) ENGINE = TokuDB,
- PARTITION p1 VALUES LESS THAN MAXVALUE ENGINE = TokuDB) */
+ PARTITION BY RANGE (`a`)
+(PARTITION `p0` VALUES LESS THAN (10) ENGINE = TokuDB,
+ PARTITION `p1` VALUES LESS THAN MAXVALUE ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -5946,6 +6181,7 @@ PARTITION BY RANGE (a)
PARTITION p1 VALUES LESS THAN MAXVALUE);
INSERT INTO t1 VALUES (1, "Original from partition p0"), (2, "Original from partition p0"), (3, "Original from partition p0"), (4, "Original from partition p0"), (11, "Original from partition p1"), (12, "Original from partition p1"), (13, "Original from partition p1"), (14, "Original from partition p1"), (21, "Original from partition p1"), (22, "Original from partition p1"), (23, "Original from partition p1"), (24, "Original from partition p1");
# State before crash
+db.opt
t1.frm
t1.par
t2.frm
@@ -5955,9 +6191,9 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (a)
-(PARTITION p0 VALUES LESS THAN (10) ENGINE = TokuDB,
- PARTITION p1 VALUES LESS THAN MAXVALUE ENGINE = TokuDB) */
+ PARTITION BY RANGE (`a`)
+(PARTITION `p0` VALUES LESS THAN (10) ENGINE = TokuDB,
+ PARTITION `p1` VALUES LESS THAN MAXVALUE ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -5975,10 +6211,12 @@ a b
ALTER TABLE t1 EXCHANGE PARTITION p0 WITH TABLE t2;
ERROR HY000: Lost connection to MySQL server during query
# State after crash (before recovery)
+db.opt
t1.frm
t1.par
t2.frm
# State after crash recovery
+db.opt
t1.frm
t1.par
t2.frm
@@ -5988,9 +6226,9 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (a)
-(PARTITION p0 VALUES LESS THAN (10) ENGINE = TokuDB,
- PARTITION p1 VALUES LESS THAN MAXVALUE ENGINE = TokuDB) */
+ PARTITION BY RANGE (`a`)
+(PARTITION `p0` VALUES LESS THAN (10) ENGINE = TokuDB,
+ PARTITION `p1` VALUES LESS THAN MAXVALUE ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -6042,6 +6280,7 @@ PARTITION BY RANGE (a)
PARTITION p1 VALUES LESS THAN MAXVALUE);
INSERT INTO t1 VALUES (1, "Original from partition p0"), (2, "Original from partition p0"), (3, "Original from partition p0"), (4, "Original from partition p0"), (11, "Original from partition p1"), (12, "Original from partition p1"), (13, "Original from partition p1"), (14, "Original from partition p1"), (21, "Original from partition p1"), (22, "Original from partition p1"), (23, "Original from partition p1"), (24, "Original from partition p1");
# State before crash
+db.opt
t1.frm
t1.par
t2.frm
@@ -6051,9 +6290,9 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (a)
-(PARTITION p0 VALUES LESS THAN (10) ENGINE = TokuDB,
- PARTITION p1 VALUES LESS THAN MAXVALUE ENGINE = TokuDB) */
+ PARTITION BY RANGE (`a`)
+(PARTITION `p0` VALUES LESS THAN (10) ENGINE = TokuDB,
+ PARTITION `p1` VALUES LESS THAN MAXVALUE ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -6071,10 +6310,12 @@ a b
ALTER TABLE t1 EXCHANGE PARTITION p0 WITH TABLE t2;
ERROR HY000: Lost connection to MySQL server during query
# State after crash (before recovery)
+db.opt
t1.frm
t1.par
t2.frm
# State after crash recovery
+db.opt
t1.frm
t1.par
t2.frm
@@ -6084,9 +6325,9 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (a)
-(PARTITION p0 VALUES LESS THAN (10) ENGINE = TokuDB,
- PARTITION p1 VALUES LESS THAN MAXVALUE ENGINE = TokuDB) */
+ PARTITION BY RANGE (`a`)
+(PARTITION `p0` VALUES LESS THAN (10) ENGINE = TokuDB,
+ PARTITION `p1` VALUES LESS THAN MAXVALUE ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -6138,6 +6379,7 @@ PARTITION BY RANGE (a)
PARTITION p1 VALUES LESS THAN MAXVALUE);
INSERT INTO t1 VALUES (1, "Original from partition p0"), (2, "Original from partition p0"), (3, "Original from partition p0"), (4, "Original from partition p0"), (11, "Original from partition p1"), (12, "Original from partition p1"), (13, "Original from partition p1"), (14, "Original from partition p1"), (21, "Original from partition p1"), (22, "Original from partition p1"), (23, "Original from partition p1"), (24, "Original from partition p1");
# State before crash
+db.opt
t1.frm
t1.par
t2.frm
@@ -6147,9 +6389,9 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (a)
-(PARTITION p0 VALUES LESS THAN (10) ENGINE = TokuDB,
- PARTITION p1 VALUES LESS THAN MAXVALUE ENGINE = TokuDB) */
+ PARTITION BY RANGE (`a`)
+(PARTITION `p0` VALUES LESS THAN (10) ENGINE = TokuDB,
+ PARTITION `p1` VALUES LESS THAN MAXVALUE ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -6167,10 +6409,12 @@ a b
ALTER TABLE t1 EXCHANGE PARTITION p0 WITH TABLE t2;
ERROR HY000: Lost connection to MySQL server during query
# State after crash (before recovery)
+db.opt
t1.frm
t1.par
t2.frm
# State after crash recovery
+db.opt
t1.frm
t1.par
t2.frm
@@ -6180,9 +6424,9 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (a)
-(PARTITION p0 VALUES LESS THAN (10) ENGINE = TokuDB,
- PARTITION p1 VALUES LESS THAN MAXVALUE ENGINE = TokuDB) */
+ PARTITION BY RANGE (`a`)
+(PARTITION `p0` VALUES LESS THAN (10) ENGINE = TokuDB,
+ PARTITION `p1` VALUES LESS THAN MAXVALUE ENGINE = TokuDB)
SELECT * FROM t1;
a b
11 Original from partition p1
@@ -6235,6 +6479,7 @@ PARTITION BY RANGE (a)
PARTITION p1 VALUES LESS THAN MAXVALUE);
INSERT INTO t1 VALUES (1, "Original from partition p0"), (2, "Original from partition p0"), (3, "Original from partition p0"), (4, "Original from partition p0"), (11, "Original from partition p1"), (12, "Original from partition p1"), (13, "Original from partition p1"), (14, "Original from partition p1"), (21, "Original from partition p1"), (22, "Original from partition p1"), (23, "Original from partition p1"), (24, "Original from partition p1");
# State before failure
+db.opt
t1.frm
t1.par
t2.frm
@@ -6244,9 +6489,9 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (a)
-(PARTITION p0 VALUES LESS THAN (10) ENGINE = TokuDB,
- PARTITION p1 VALUES LESS THAN MAXVALUE ENGINE = TokuDB) */
+ PARTITION BY RANGE (`a`)
+(PARTITION `p0` VALUES LESS THAN (10) ENGINE = TokuDB,
+ PARTITION `p1` VALUES LESS THAN MAXVALUE ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -6264,6 +6509,7 @@ a b
ALTER TABLE t1 EXCHANGE PARTITION p0 WITH TABLE t2;
ERROR HY000: Error in DDL log
# State after failure
+db.opt
t1.frm
t1.par
t2.frm
@@ -6273,9 +6519,9 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (a)
-(PARTITION p0 VALUES LESS THAN (10) ENGINE = TokuDB,
- PARTITION p1 VALUES LESS THAN MAXVALUE ENGINE = TokuDB) */
+ PARTITION BY RANGE (`a`)
+(PARTITION `p0` VALUES LESS THAN (10) ENGINE = TokuDB,
+ PARTITION `p1` VALUES LESS THAN MAXVALUE ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -6327,6 +6573,7 @@ PARTITION BY RANGE (a)
PARTITION p1 VALUES LESS THAN MAXVALUE);
INSERT INTO t1 VALUES (1, "Original from partition p0"), (2, "Original from partition p0"), (3, "Original from partition p0"), (4, "Original from partition p0"), (11, "Original from partition p1"), (12, "Original from partition p1"), (13, "Original from partition p1"), (14, "Original from partition p1"), (21, "Original from partition p1"), (22, "Original from partition p1"), (23, "Original from partition p1"), (24, "Original from partition p1");
# State before failure
+db.opt
t1.frm
t1.par
t2.frm
@@ -6336,9 +6583,9 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (a)
-(PARTITION p0 VALUES LESS THAN (10) ENGINE = TokuDB,
- PARTITION p1 VALUES LESS THAN MAXVALUE ENGINE = TokuDB) */
+ PARTITION BY RANGE (`a`)
+(PARTITION `p0` VALUES LESS THAN (10) ENGINE = TokuDB,
+ PARTITION `p1` VALUES LESS THAN MAXVALUE ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -6356,6 +6603,7 @@ a b
ALTER TABLE t1 EXCHANGE PARTITION p0 WITH TABLE t2;
ERROR HY000: Error in DDL log
# State after failure
+db.opt
t1.frm
t1.par
t2.frm
@@ -6365,9 +6613,9 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (a)
-(PARTITION p0 VALUES LESS THAN (10) ENGINE = TokuDB,
- PARTITION p1 VALUES LESS THAN MAXVALUE ENGINE = TokuDB) */
+ PARTITION BY RANGE (`a`)
+(PARTITION `p0` VALUES LESS THAN (10) ENGINE = TokuDB,
+ PARTITION `p1` VALUES LESS THAN MAXVALUE ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -6419,6 +6667,7 @@ PARTITION BY RANGE (a)
PARTITION p1 VALUES LESS THAN MAXVALUE);
INSERT INTO t1 VALUES (1, "Original from partition p0"), (2, "Original from partition p0"), (3, "Original from partition p0"), (4, "Original from partition p0"), (11, "Original from partition p1"), (12, "Original from partition p1"), (13, "Original from partition p1"), (14, "Original from partition p1"), (21, "Original from partition p1"), (22, "Original from partition p1"), (23, "Original from partition p1"), (24, "Original from partition p1");
# State before failure
+db.opt
t1.frm
t1.par
t2.frm
@@ -6428,9 +6677,9 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (a)
-(PARTITION p0 VALUES LESS THAN (10) ENGINE = TokuDB,
- PARTITION p1 VALUES LESS THAN MAXVALUE ENGINE = TokuDB) */
+ PARTITION BY RANGE (`a`)
+(PARTITION `p0` VALUES LESS THAN (10) ENGINE = TokuDB,
+ PARTITION `p1` VALUES LESS THAN MAXVALUE ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -6448,6 +6697,7 @@ a b
ALTER TABLE t1 EXCHANGE PARTITION p0 WITH TABLE t2;
ERROR HY000: Error on rename of './test/t2' to './test/#sqlx-nnnn_nnnn' (errno: 0 "Internal error/check (Not system error)")
# State after failure
+db.opt
t1.frm
t1.par
t2.frm
@@ -6457,9 +6707,9 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (a)
-(PARTITION p0 VALUES LESS THAN (10) ENGINE = TokuDB,
- PARTITION p1 VALUES LESS THAN MAXVALUE ENGINE = TokuDB) */
+ PARTITION BY RANGE (`a`)
+(PARTITION `p0` VALUES LESS THAN (10) ENGINE = TokuDB,
+ PARTITION `p1` VALUES LESS THAN MAXVALUE ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -6511,6 +6761,7 @@ PARTITION BY RANGE (a)
PARTITION p1 VALUES LESS THAN MAXVALUE);
INSERT INTO t1 VALUES (1, "Original from partition p0"), (2, "Original from partition p0"), (3, "Original from partition p0"), (4, "Original from partition p0"), (11, "Original from partition p1"), (12, "Original from partition p1"), (13, "Original from partition p1"), (14, "Original from partition p1"), (21, "Original from partition p1"), (22, "Original from partition p1"), (23, "Original from partition p1"), (24, "Original from partition p1");
# State before failure
+db.opt
t1.frm
t1.par
t2.frm
@@ -6520,9 +6771,9 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (a)
-(PARTITION p0 VALUES LESS THAN (10) ENGINE = TokuDB,
- PARTITION p1 VALUES LESS THAN MAXVALUE ENGINE = TokuDB) */
+ PARTITION BY RANGE (`a`)
+(PARTITION `p0` VALUES LESS THAN (10) ENGINE = TokuDB,
+ PARTITION `p1` VALUES LESS THAN MAXVALUE ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -6540,6 +6791,7 @@ a b
ALTER TABLE t1 EXCHANGE PARTITION p0 WITH TABLE t2;
ERROR HY000: Error in DDL log
# State after failure
+db.opt
t1.frm
t1.par
t2.frm
@@ -6549,9 +6801,9 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (a)
-(PARTITION p0 VALUES LESS THAN (10) ENGINE = TokuDB,
- PARTITION p1 VALUES LESS THAN MAXVALUE ENGINE = TokuDB) */
+ PARTITION BY RANGE (`a`)
+(PARTITION `p0` VALUES LESS THAN (10) ENGINE = TokuDB,
+ PARTITION `p1` VALUES LESS THAN MAXVALUE ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -6603,6 +6855,7 @@ PARTITION BY RANGE (a)
PARTITION p1 VALUES LESS THAN MAXVALUE);
INSERT INTO t1 VALUES (1, "Original from partition p0"), (2, "Original from partition p0"), (3, "Original from partition p0"), (4, "Original from partition p0"), (11, "Original from partition p1"), (12, "Original from partition p1"), (13, "Original from partition p1"), (14, "Original from partition p1"), (21, "Original from partition p1"), (22, "Original from partition p1"), (23, "Original from partition p1"), (24, "Original from partition p1");
# State before failure
+db.opt
t1.frm
t1.par
t2.frm
@@ -6612,9 +6865,9 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (a)
-(PARTITION p0 VALUES LESS THAN (10) ENGINE = TokuDB,
- PARTITION p1 VALUES LESS THAN MAXVALUE ENGINE = TokuDB) */
+ PARTITION BY RANGE (`a`)
+(PARTITION `p0` VALUES LESS THAN (10) ENGINE = TokuDB,
+ PARTITION `p1` VALUES LESS THAN MAXVALUE ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -6632,6 +6885,7 @@ a b
ALTER TABLE t1 EXCHANGE PARTITION p0 WITH TABLE t2;
ERROR HY000: Error on rename of './test/t1#P#p0' to './test/t2' (errno: 0 "Internal error/check (Not system error)")
# State after failure
+db.opt
t1.frm
t1.par
t2.frm
@@ -6641,9 +6895,9 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (a)
-(PARTITION p0 VALUES LESS THAN (10) ENGINE = TokuDB,
- PARTITION p1 VALUES LESS THAN MAXVALUE ENGINE = TokuDB) */
+ PARTITION BY RANGE (`a`)
+(PARTITION `p0` VALUES LESS THAN (10) ENGINE = TokuDB,
+ PARTITION `p1` VALUES LESS THAN MAXVALUE ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -6695,6 +6949,7 @@ PARTITION BY RANGE (a)
PARTITION p1 VALUES LESS THAN MAXVALUE);
INSERT INTO t1 VALUES (1, "Original from partition p0"), (2, "Original from partition p0"), (3, "Original from partition p0"), (4, "Original from partition p0"), (11, "Original from partition p1"), (12, "Original from partition p1"), (13, "Original from partition p1"), (14, "Original from partition p1"), (21, "Original from partition p1"), (22, "Original from partition p1"), (23, "Original from partition p1"), (24, "Original from partition p1");
# State before failure
+db.opt
t1.frm
t1.par
t2.frm
@@ -6704,9 +6959,9 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (a)
-(PARTITION p0 VALUES LESS THAN (10) ENGINE = TokuDB,
- PARTITION p1 VALUES LESS THAN MAXVALUE ENGINE = TokuDB) */
+ PARTITION BY RANGE (`a`)
+(PARTITION `p0` VALUES LESS THAN (10) ENGINE = TokuDB,
+ PARTITION `p1` VALUES LESS THAN MAXVALUE ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -6724,6 +6979,7 @@ a b
ALTER TABLE t1 EXCHANGE PARTITION p0 WITH TABLE t2;
ERROR HY000: Error in DDL log
# State after failure
+db.opt
t1.frm
t1.par
t2.frm
@@ -6733,9 +6989,9 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (a)
-(PARTITION p0 VALUES LESS THAN (10) ENGINE = TokuDB,
- PARTITION p1 VALUES LESS THAN MAXVALUE ENGINE = TokuDB) */
+ PARTITION BY RANGE (`a`)
+(PARTITION `p0` VALUES LESS THAN (10) ENGINE = TokuDB,
+ PARTITION `p1` VALUES LESS THAN MAXVALUE ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -6787,6 +7043,7 @@ PARTITION BY RANGE (a)
PARTITION p1 VALUES LESS THAN MAXVALUE);
INSERT INTO t1 VALUES (1, "Original from partition p0"), (2, "Original from partition p0"), (3, "Original from partition p0"), (4, "Original from partition p0"), (11, "Original from partition p1"), (12, "Original from partition p1"), (13, "Original from partition p1"), (14, "Original from partition p1"), (21, "Original from partition p1"), (22, "Original from partition p1"), (23, "Original from partition p1"), (24, "Original from partition p1");
# State before failure
+db.opt
t1.frm
t1.par
t2.frm
@@ -6796,9 +7053,9 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (a)
-(PARTITION p0 VALUES LESS THAN (10) ENGINE = TokuDB,
- PARTITION p1 VALUES LESS THAN MAXVALUE ENGINE = TokuDB) */
+ PARTITION BY RANGE (`a`)
+(PARTITION `p0` VALUES LESS THAN (10) ENGINE = TokuDB,
+ PARTITION `p1` VALUES LESS THAN MAXVALUE ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -6816,6 +7073,7 @@ a b
ALTER TABLE t1 EXCHANGE PARTITION p0 WITH TABLE t2;
ERROR HY000: Error on rename of './test/#sqlx-nnnn_nnnn' to './test/t1#P#p0' (errno: 0 "Internal error/check (Not system error)")
# State after failure
+db.opt
t1.frm
t1.par
t2.frm
@@ -6825,9 +7083,9 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (a)
-(PARTITION p0 VALUES LESS THAN (10) ENGINE = TokuDB,
- PARTITION p1 VALUES LESS THAN MAXVALUE ENGINE = TokuDB) */
+ PARTITION BY RANGE (`a`)
+(PARTITION `p0` VALUES LESS THAN (10) ENGINE = TokuDB,
+ PARTITION `p1` VALUES LESS THAN MAXVALUE ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -6879,6 +7137,7 @@ PARTITION BY RANGE (a)
PARTITION p1 VALUES LESS THAN MAXVALUE);
INSERT INTO t1 VALUES (1, "Original from partition p0"), (2, "Original from partition p0"), (3, "Original from partition p0"), (4, "Original from partition p0"), (11, "Original from partition p1"), (12, "Original from partition p1"), (13, "Original from partition p1"), (14, "Original from partition p1"), (21, "Original from partition p1"), (22, "Original from partition p1"), (23, "Original from partition p1"), (24, "Original from partition p1");
# State before failure
+db.opt
t1.frm
t1.par
t2.frm
@@ -6888,9 +7147,9 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (a)
-(PARTITION p0 VALUES LESS THAN (10) ENGINE = TokuDB,
- PARTITION p1 VALUES LESS THAN MAXVALUE ENGINE = TokuDB) */
+ PARTITION BY RANGE (`a`)
+(PARTITION `p0` VALUES LESS THAN (10) ENGINE = TokuDB,
+ PARTITION `p1` VALUES LESS THAN MAXVALUE ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -6908,6 +7167,7 @@ a b
ALTER TABLE t1 EXCHANGE PARTITION p0 WITH TABLE t2;
ERROR HY000: Error in DDL log
# State after failure
+db.opt
t1.frm
t1.par
t2.frm
@@ -6917,9 +7177,9 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (a)
-(PARTITION p0 VALUES LESS THAN (10) ENGINE = TokuDB,
- PARTITION p1 VALUES LESS THAN MAXVALUE ENGINE = TokuDB) */
+ PARTITION BY RANGE (`a`)
+(PARTITION `p0` VALUES LESS THAN (10) ENGINE = TokuDB,
+ PARTITION `p1` VALUES LESS THAN MAXVALUE ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -6971,6 +7231,7 @@ PARTITION BY RANGE (a)
PARTITION p1 VALUES LESS THAN MAXVALUE);
INSERT INTO t1 VALUES (1, "Original from partition p0"), (2, "Original from partition p0"), (3, "Original from partition p0"), (4, "Original from partition p0"), (11, "Original from partition p1"), (12, "Original from partition p1"), (13, "Original from partition p1"), (14, "Original from partition p1"), (21, "Original from partition p1"), (22, "Original from partition p1"), (23, "Original from partition p1"), (24, "Original from partition p1");
# State before failure
+db.opt
t1.frm
t1.par
t2.frm
@@ -6980,9 +7241,9 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (a)
-(PARTITION p0 VALUES LESS THAN (10) ENGINE = TokuDB,
- PARTITION p1 VALUES LESS THAN MAXVALUE ENGINE = TokuDB) */
+ PARTITION BY RANGE (`a`)
+(PARTITION `p0` VALUES LESS THAN (10) ENGINE = TokuDB,
+ PARTITION `p1` VALUES LESS THAN MAXVALUE ENGINE = TokuDB)
SELECT * FROM t1;
a b
1 Original from partition p0
@@ -7000,6 +7261,7 @@ a b
ALTER TABLE t1 EXCHANGE PARTITION p0 WITH TABLE t2;
ERROR HY000: Error in DDL log
# State after failure
+db.opt
t1.frm
t1.par
t2.frm
@@ -7009,9 +7271,9 @@ t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
`b` varchar(64) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (a)
-(PARTITION p0 VALUES LESS THAN (10) ENGINE = TokuDB,
- PARTITION p1 VALUES LESS THAN MAXVALUE ENGINE = TokuDB) */
+ PARTITION BY RANGE (`a`)
+(PARTITION `p0` VALUES LESS THAN (10) ENGINE = TokuDB,
+ PARTITION `p1` VALUES LESS THAN MAXVALUE ENGINE = TokuDB)
SELECT * FROM t1;
a b
11 Original from partition p1
diff --git a/storage/tokudb/mysql-test/tokudb_parts/r/partition_decimal_tokudb.result b/storage/tokudb/mysql-test/tokudb_parts/r/partition_decimal_tokudb.result
index 20d2ad2e293..6dfeda71b48 100644
--- a/storage/tokudb/mysql-test/tokudb_parts/r/partition_decimal_tokudb.result
+++ b/storage/tokudb/mysql-test/tokudb_parts/r/partition_decimal_tokudb.result
@@ -10,11 +10,11 @@ t1 CREATE TABLE `t1` (
`a` decimal(10,4) NOT NULL,
PRIMARY KEY (`a`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY KEY (a)
-(PARTITION pa1 MAX_ROWS = 20 MIN_ROWS = 2 ENGINE = TokuDB,
- PARTITION pa2 MAX_ROWS = 30 MIN_ROWS = 3 ENGINE = TokuDB,
- PARTITION pa3 MAX_ROWS = 30 MIN_ROWS = 4 ENGINE = TokuDB,
- PARTITION pa4 MAX_ROWS = 40 MIN_ROWS = 2 ENGINE = TokuDB) */
+ PARTITION BY KEY (`a`)
+(PARTITION `pa1` MAX_ROWS = 20 MIN_ROWS = 2 ENGINE = TokuDB,
+ PARTITION `pa2` MAX_ROWS = 30 MIN_ROWS = 3 ENGINE = TokuDB,
+ PARTITION `pa3` MAX_ROWS = 30 MIN_ROWS = 4 ENGINE = TokuDB,
+ PARTITION `pa4` MAX_ROWS = 40 MIN_ROWS = 2 ENGINE = TokuDB)
insert into t1 values (999999.9999), (-999999.9999), (123456.7899), (-123456.7899), (-1.5), (1), (0), (-1), (1.5), (1234.567), (-1234.567);
select * from t1;
a
@@ -54,8 +54,8 @@ t2 CREATE TABLE `t2` (
`a` decimal(18,9) NOT NULL,
PRIMARY KEY (`a`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY KEY (a)
-PARTITIONS 10 */
+ PARTITION BY KEY (`a`)
+PARTITIONS 10
insert into t2 values (999999999.999999999), (-999999999.999999999), (-1.5), (-1), (0), (1.5), (1234.567), (-1234.567);
select * from t2;
a
@@ -100,14 +100,14 @@ t3 CREATE TABLE `t3` (
`a` decimal(18,9) NOT NULL,
PRIMARY KEY (`a`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (floor(a))
-SUBPARTITION BY KEY (a)
+ PARTITION BY RANGE (floor(`a`))
+SUBPARTITION BY KEY (`a`)
SUBPARTITIONS 2
-(PARTITION pa2 VALUES LESS THAN (2) ENGINE = TokuDB,
- PARTITION pa4 VALUES LESS THAN (4) ENGINE = TokuDB,
- PARTITION pa6 VALUES LESS THAN (6) ENGINE = TokuDB,
- PARTITION pa8 VALUES LESS THAN (8) ENGINE = TokuDB,
- PARTITION pa10 VALUES LESS THAN (10) ENGINE = TokuDB) */
+(PARTITION `pa2` VALUES LESS THAN (2) ENGINE = TokuDB,
+ PARTITION `pa4` VALUES LESS THAN (4) ENGINE = TokuDB,
+ PARTITION `pa6` VALUES LESS THAN (6) ENGINE = TokuDB,
+ PARTITION `pa8` VALUES LESS THAN (8) ENGINE = TokuDB,
+ PARTITION `pa10` VALUES LESS THAN (10) ENGINE = TokuDB)
9*3 inserts;
select count(*) from t3;
count(*)
@@ -127,14 +127,14 @@ t4 CREATE TABLE `t4` (
`a` decimal(18,9) NOT NULL,
PRIMARY KEY (`a`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (ceiling(a))
-SUBPARTITION BY KEY (a)
+ PARTITION BY LIST (ceiling(`a`))
+SUBPARTITION BY KEY (`a`)
SUBPARTITIONS 2
-(PARTITION pa2 VALUES IN (1,2) ENGINE = TokuDB,
- PARTITION pa4 VALUES IN (3,4) ENGINE = TokuDB,
- PARTITION pa6 VALUES IN (5,6) ENGINE = TokuDB,
- PARTITION pa8 VALUES IN (7,8) ENGINE = TokuDB,
- PARTITION pa10 VALUES IN (9,10) ENGINE = TokuDB) */
+(PARTITION `pa2` VALUES IN (1,2) ENGINE = TokuDB,
+ PARTITION `pa4` VALUES IN (3,4) ENGINE = TokuDB,
+ PARTITION `pa6` VALUES IN (5,6) ENGINE = TokuDB,
+ PARTITION `pa8` VALUES IN (7,8) ENGINE = TokuDB,
+ PARTITION `pa10` VALUES IN (9,10) ENGINE = TokuDB)
9*3 inserts;
select count(*) from t4;
count(*)
diff --git a/storage/tokudb/mysql-test/tokudb_parts/r/partition_engine_tokudb.result b/storage/tokudb/mysql-test/tokudb_parts/r/partition_engine_tokudb.result
index 7df4c93cd85..a921ba0f56d 100644
--- a/storage/tokudb/mysql-test/tokudb_parts/r/partition_engine_tokudb.result
+++ b/storage/tokudb/mysql-test/tokudb_parts/r/partition_engine_tokudb.result
@@ -62,14 +62,14 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY HASH (f_int1)
-PARTITIONS 2 */
+ PARTITION BY HASH (`f_int1`)
+PARTITIONS 2
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -519,15 +519,15 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY HASH (f_int1)
-(PARTITION part1 ENGINE = TokuDB,
- PARTITION part2 ENGINE = TokuDB) */
+ PARTITION BY HASH (`f_int1`)
+(PARTITION `part1` ENGINE = TokuDB,
+ PARTITION `part2` ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -978,20 +978,20 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (f_int1)
-SUBPARTITION BY HASH (f_int1)
-(PARTITION part1 VALUES LESS THAN (10)
- (SUBPARTITION subpart11 ENGINE = TokuDB,
- SUBPARTITION subpart12 ENGINE = TokuDB),
- PARTITION part2 VALUES LESS THAN (2147483646)
- (SUBPARTITION subpart21 ENGINE = TokuDB,
- SUBPARTITION subpart22 ENGINE = TokuDB)) */
+ PARTITION BY RANGE (`f_int1`)
+SUBPARTITION BY HASH (`f_int1`)
+(PARTITION `part1` VALUES LESS THAN (10)
+ (SUBPARTITION `subpart11` ENGINE = TokuDB,
+ SUBPARTITION `subpart12` ENGINE = TokuDB),
+ PARTITION `part2` VALUES LESS THAN (2147483646)
+ (SUBPARTITION `subpart21` ENGINE = TokuDB,
+ SUBPARTITION `subpart22` ENGINE = TokuDB))
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -1503,20 +1503,20 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (f_int1)
-SUBPARTITION BY HASH (f_int1)
-(PARTITION part1 VALUES LESS THAN (10)
- (SUBPARTITION subpart11 ENGINE = TokuDB,
- SUBPARTITION subpart12 ENGINE = TokuDB),
- PARTITION part2 VALUES LESS THAN (2147483646)
- (SUBPARTITION subpart21 ENGINE = TokuDB,
- SUBPARTITION subpart22 ENGINE = TokuDB)) */
+ PARTITION BY RANGE (`f_int1`)
+SUBPARTITION BY HASH (`f_int1`)
+(PARTITION `part1` VALUES LESS THAN (10)
+ (SUBPARTITION `subpart11` ENGINE = TokuDB,
+ SUBPARTITION `subpart12` ENGINE = TokuDB),
+ PARTITION `part2` VALUES LESS THAN (2147483646)
+ (SUBPARTITION `subpart21` ENGINE = TokuDB,
+ SUBPARTITION `subpart22` ENGINE = TokuDB))
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -2004,20 +2004,20 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (f_int1)
-SUBPARTITION BY HASH (f_int1)
-(PARTITION part1 VALUES LESS THAN (10)
- (SUBPARTITION subpart11 ENGINE = TokuDB,
- SUBPARTITION subpart12 ENGINE = TokuDB),
- PARTITION part2 VALUES LESS THAN (2147483646)
- (SUBPARTITION subpart21 ENGINE = TokuDB,
- SUBPARTITION subpart22 ENGINE = TokuDB)) */
+ PARTITION BY RANGE (`f_int1`)
+SUBPARTITION BY HASH (`f_int1`)
+(PARTITION `part1` VALUES LESS THAN (10)
+ (SUBPARTITION `subpart11` ENGINE = TokuDB,
+ SUBPARTITION `subpart12` ENGINE = TokuDB),
+ PARTITION `part2` VALUES LESS THAN (2147483646)
+ (SUBPARTITION `subpart21` ENGINE = TokuDB,
+ SUBPARTITION `subpart22` ENGINE = TokuDB))
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -2466,20 +2466,20 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (f_int1)
-SUBPARTITION BY HASH (f_int1)
-(PARTITION part1 VALUES LESS THAN (10)
- (SUBPARTITION subpart11 ENGINE = TokuDB,
- SUBPARTITION subpart12 ENGINE = TokuDB),
- PARTITION part2 VALUES LESS THAN (2147483646)
- (SUBPARTITION subpart21 ENGINE = TokuDB,
- SUBPARTITION subpart22 ENGINE = TokuDB)) */
+ PARTITION BY RANGE (`f_int1`)
+SUBPARTITION BY HASH (`f_int1`)
+(PARTITION `part1` VALUES LESS THAN (10)
+ (SUBPARTITION `subpart11` ENGINE = TokuDB,
+ SUBPARTITION `subpart12` ENGINE = TokuDB),
+ PARTITION `part2` VALUES LESS THAN (2147483646)
+ (SUBPARTITION `subpart21` ENGINE = TokuDB,
+ SUBPARTITION `subpart22` ENGINE = TokuDB))
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -2928,15 +2928,15 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY HASH (f_int1)
-(PARTITION part1 ENGINE = TokuDB,
- PARTITION part2 ENGINE = TokuDB) */
+ PARTITION BY HASH (`f_int1`)
+(PARTITION `part1` ENGINE = TokuDB,
+ PARTITION `part2` ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -3387,20 +3387,20 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (f_int1)
-SUBPARTITION BY HASH (f_int1)
-(PARTITION part1 VALUES LESS THAN (10)
- (SUBPARTITION subpart11 ENGINE = TokuDB,
- SUBPARTITION subpart12 ENGINE = TokuDB),
- PARTITION part2 VALUES LESS THAN (2147483646)
- (SUBPARTITION subpart21 ENGINE = TokuDB,
- SUBPARTITION subpart22 ENGINE = TokuDB)) */
+ PARTITION BY RANGE (`f_int1`)
+SUBPARTITION BY HASH (`f_int1`)
+(PARTITION `part1` VALUES LESS THAN (10)
+ (SUBPARTITION `subpart11` ENGINE = TokuDB,
+ SUBPARTITION `subpart12` ENGINE = TokuDB),
+ PARTITION `part2` VALUES LESS THAN (2147483646)
+ (SUBPARTITION `subpart21` ENGINE = TokuDB,
+ SUBPARTITION `subpart22` ENGINE = TokuDB))
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -3852,20 +3852,20 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (f_int1)
-SUBPARTITION BY HASH (f_int1)
-(PARTITION part1 VALUES LESS THAN (10)
- (SUBPARTITION subpart11 ENGINE = TokuDB,
- SUBPARTITION subpart12 ENGINE = TokuDB),
- PARTITION part2 VALUES LESS THAN (2147483646)
- (SUBPARTITION subpart21 ENGINE = TokuDB,
- SUBPARTITION subpart22 ENGINE = TokuDB)) */
+ PARTITION BY RANGE (`f_int1`)
+SUBPARTITION BY HASH (`f_int1`)
+(PARTITION `part1` VALUES LESS THAN (10)
+ (SUBPARTITION `subpart11` ENGINE = TokuDB,
+ SUBPARTITION `subpart12` ENGINE = TokuDB),
+ PARTITION `part2` VALUES LESS THAN (2147483646)
+ (SUBPARTITION `subpart21` ENGINE = TokuDB,
+ SUBPARTITION `subpart22` ENGINE = TokuDB))
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -4310,14 +4310,14 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY HASH (f_int1)
-(PARTITION part1 ENGINE = TokuDB) */
+ PARTITION BY HASH (`f_int1`)
+(PARTITION `part1` ENGINE = TokuDB)
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
@@ -4764,17 +4764,17 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (f_int1)
-SUBPARTITION BY HASH (f_int1)
-(PARTITION part1 VALUES LESS THAN (1000)
- (SUBPARTITION subpart11 ENGINE = TokuDB,
- SUBPARTITION subpart12 ENGINE = TokuDB)) */
+ PARTITION BY RANGE (`f_int1`)
+SUBPARTITION BY HASH (`f_int1`)
+(PARTITION `part1` VALUES LESS THAN (1000)
+ (SUBPARTITION `subpart11` ENGINE = TokuDB,
+ SUBPARTITION `subpart12` ENGINE = TokuDB))
# check prerequisites-1 success: 1
# check COUNT(*) success: 1
diff --git a/storage/tokudb/mysql-test/tokudb_parts/r/partition_exch_qa_1_tokudb.result b/storage/tokudb/mysql-test/tokudb_parts/r/partition_exch_qa_1_tokudb.result
index 78005c5abb0..8f3d888a5a3 100644
--- a/storage/tokudb/mysql-test/tokudb_parts/r/partition_exch_qa_1_tokudb.result
+++ b/storage/tokudb/mysql-test/tokudb_parts/r/partition_exch_qa_1_tokudb.result
@@ -128,10 +128,10 @@ tp CREATE TABLE `tp` (
PRIMARY KEY (`a`),
UNIQUE KEY `a` (`a`) USING BTREE
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (a)
-(PARTITION p0 VALUES LESS THAN (10) ENGINE = TokuDB,
- PARTITION p1 VALUES LESS THAN (100) ENGINE = TokuDB,
- PARTITION p2 VALUES LESS THAN (1000) ENGINE = TokuDB) */
+ PARTITION BY RANGE (`a`)
+(PARTITION `p0` VALUES LESS THAN (10) ENGINE = TokuDB,
+ PARTITION `p1` VALUES LESS THAN (100) ENGINE = TokuDB,
+ PARTITION `p2` VALUES LESS THAN (1000) ENGINE = TokuDB)
ALTER TABLE tp DROP INDEX a;
ALTER TABLE t_10 DROP INDEX a;
ALTER TABLE tp ADD UNIQUE INDEX USING BTREE (a,b);
@@ -153,10 +153,10 @@ tp CREATE TABLE `tp` (
PRIMARY KEY (`a`),
UNIQUE KEY `a` (`a`,`b`) USING BTREE
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (a)
-(PARTITION p0 VALUES LESS THAN (10) ENGINE = TokuDB,
- PARTITION p1 VALUES LESS THAN (100) ENGINE = TokuDB,
- PARTITION p2 VALUES LESS THAN (1000) ENGINE = TokuDB) */
+ PARTITION BY RANGE (`a`)
+(PARTITION `p0` VALUES LESS THAN (10) ENGINE = TokuDB,
+ PARTITION `p1` VALUES LESS THAN (100) ENGINE = TokuDB,
+ PARTITION `p2` VALUES LESS THAN (1000) ENGINE = TokuDB)
DROP TABLE IF EXISTS t_10;
DROP TABLE IF EXISTS t_100;
DROP TABLE IF EXISTS t_1000;
diff --git a/storage/tokudb/mysql-test/tokudb_parts/r/partition_exch_qa_5_tokudb.result b/storage/tokudb/mysql-test/tokudb_parts/r/partition_exch_qa_5_tokudb.result
index e539c37a0b4..03ef0ed2c09 100644
--- a/storage/tokudb/mysql-test/tokudb_parts/r/partition_exch_qa_5_tokudb.result
+++ b/storage/tokudb/mysql-test/tokudb_parts/r/partition_exch_qa_5_tokudb.result
@@ -4,7 +4,7 @@ GRANT USAGE ON *.* TO test1@localhost;
GRANT USAGE ON *.* TO test2@localhost;
GRANT CREATE, DROP, INSERT, SELECT ON test.* TO test1@localhost;
GRANT CREATE, DROP, ALTER, UPDATE, INSERT, SELECT ON test.* TO test2@localhost;
-connect (test1,localhost,test1,,test,MASTER_MYPORT,MASTER_MYSOCK);
+connect test1,localhost,test1,,test,$MASTER_MYPORT,$MASTER_MYSOCK;
SELECT current_user();
current_user()
test1@localhost
@@ -15,7 +15,7 @@ GRANT SELECT, INSERT, CREATE, DROP ON `test`.* TO 'test1'@'localhost'
ALTER TABLE tp EXCHANGE PARTITION p0 WITH TABLE t_10;
ERROR 42000: ALTER command denied to user 'test1'@'localhost' for table 'tp'
disconnect test1;
-connect (test2,localhost,test2,,test,MASTER_MYPORT,MASTER_MYSOCK);
+connect test2,localhost,test2,,test,$MASTER_MYPORT,$MASTER_MYSOCK;
USE test;
SELECT current_user();
current_user()
@@ -73,7 +73,7 @@ a b
disconnect test2;
connection default;
REVOKE ALTER ON test.* FROM test2@localhost;
-connect (test2,localhost,test2,,test,MASTER_MYPORT,MASTER_MYSOCK);
+connect test2,localhost,test2,,test,$MASTER_MYPORT,$MASTER_MYSOCK;
USE test;
SELECT current_user();
current_user()
diff --git a/storage/tokudb/mysql-test/tokudb_parts/r/partition_exch_qa_7_tokudb.result b/storage/tokudb/mysql-test/tokudb_parts/r/partition_exch_qa_7_tokudb.result
index 8ae4bfd3d3a..0913bbaa55b 100644
--- a/storage/tokudb/mysql-test/tokudb_parts/r/partition_exch_qa_7_tokudb.result
+++ b/storage/tokudb/mysql-test/tokudb_parts/r/partition_exch_qa_7_tokudb.result
@@ -1,7 +1,7 @@
CREATE USER test_2@localhost;
send
ALTER TABLE tp EXCHANGE PARTITION p0 WITH TABLE t_10;
-connect (test_2,localhost,test_2,,test,MASTER_MYPORT,MASTER_MYSOCK);
+connect test_2,localhost,test_2,,test,$MASTER_MYPORT,$MASTER_MYSOCK;
SELECT * FROM t_10;
a b
2 Two
diff --git a/storage/tokudb/mysql-test/tokudb_parts/r/partition_exch_qa_8_tokudb.result b/storage/tokudb/mysql-test/tokudb_parts/r/partition_exch_qa_8_tokudb.result
index b252fc616b1..2fd45be9261 100644
--- a/storage/tokudb/mysql-test/tokudb_parts/r/partition_exch_qa_8_tokudb.result
+++ b/storage/tokudb/mysql-test/tokudb_parts/r/partition_exch_qa_8_tokudb.result
@@ -4,7 +4,7 @@ USE testdb;
USE test;
GRANT CREATE, ALTER, DROP, INSERT, SELECT on test.* TO test2@localhost;
GRANT CREATE, ALTER, DROP, INSERT, SELECT on testdb.* TO test2@localhost;
-connect (test2,localhost,test2,,test,MASTER_MYPORT,MASTER_MYSOCK);
+connect test2,localhost,test2,,test,$MASTER_MYPORT,$MASTER_MYSOCK;
ALTER TABLE tp EXCHANGE PARTITION p0 WITH TABLE testdb.t_10;
SELECT * FROM testdb.t_10;
a b
@@ -60,7 +60,7 @@ a b
disconnect test2;
connection default;
REVOKE INSERT ON testdb.* FROM test2@localhost;
-connect (test2,localhost,test2,,test,MASTER_MYPORT,MASTER_MYSOCK);
+connect test2,localhost,test2,,test,$MASTER_MYPORT,$MASTER_MYSOCK;
ALTER TABLE tp EXCHANGE PARTITION p0 WITH TABLE testdb.t_10;
ERROR 42000: INSERT command denied to user 'test2'@'localhost' for table 't_10'
ALTER TABLE testdb.tp EXCHANGE PARTITION p0 WITH TABLE t_10;
diff --git a/storage/tokudb/mysql-test/tokudb_parts/r/partition_exchange_tokudb.result b/storage/tokudb/mysql-test/tokudb_parts/r/partition_exchange_tokudb.result
index beed3486d98..b3cc206e854 100644
--- a/storage/tokudb/mysql-test/tokudb_parts/r/partition_exchange_tokudb.result
+++ b/storage/tokudb/mysql-test/tokudb_parts/r/partition_exchange_tokudb.result
@@ -13,8 +13,8 @@ tp CREATE TABLE `tp` (
`b` varchar(24) DEFAULT NULL,
PRIMARY KEY (`a`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY HASH (a)
-PARTITIONS 4 */
+ PARTITION BY HASH (`a`)
+PARTITIONS 4
SHOW CREATE TABLE t;
Table Create Table
t CREATE TABLE `t` (
@@ -86,8 +86,8 @@ tp CREATE TABLE `tp` (
`b` varchar(24) DEFAULT NULL,
PRIMARY KEY (`a`)
) ENGINE=TokuDB AUTO_INCREMENT=112 DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY HASH (a)
-PARTITIONS 4 */
+ PARTITION BY HASH (`a`)
+PARTITIONS 4
SHOW CREATE TABLE t;
Table Create Table
t CREATE TABLE `t` (
@@ -147,13 +147,13 @@ a b
64 Sixty four
# Start by testing read/write locking
SET AUTOCOMMIT = 1;
-# con1
+connect con1, localhost, root,,;
SET DEBUG_SYNC= 'swap_partition_after_compare_tables SIGNAL swap_in_progress WAIT_FOR goto_verification';
SET DEBUG_SYNC= 'swap_partition_first_row_read SIGNAL swap_in_progress WAIT_FOR goto_wait';
SET DEBUG_SYNC= 'swap_partition_after_wait SIGNAL swap_in_progress WAIT_FOR goto_rename';
SET DEBUG_SYNC= 'swap_partition_before_rename SIGNAL swap_in_progress WAIT_FOR test_done';
ALTER TABLE tp EXCHANGE PARTITION p0 WITH TABLE t;
-# con default
+connection default;
SET DEBUG_SYNC= 'now WAIT_FOR swap_in_progress';
# select from t and select/update/delete/insert from tp should work
SELECT * FROM t WHERE a = 99;
@@ -194,9 +194,9 @@ tp CREATE TABLE `tp` (
`b` varchar(55) DEFAULT NULL,
PRIMARY KEY (`a`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (a)
-(PARTITION p0 VALUES LESS THAN (100) ENGINE = TokuDB,
- PARTITION p1 VALUES LESS THAN MAXVALUE ENGINE = TokuDB) */
+ PARTITION BY RANGE (`a`)
+(PARTITION `p0` VALUES LESS THAN (100) ENGINE = TokuDB,
+ PARTITION `p1` VALUES LESS THAN MAXVALUE ENGINE = TokuDB)
SET DEBUG_SYNC= 'now SIGNAL goto_verification';
SET DEBUG_SYNC= 'now WAIT_FOR swap_in_progress';
# select from t and select/update/delete/insert from tp should work
@@ -237,9 +237,9 @@ tp CREATE TABLE `tp` (
`b` varchar(55) DEFAULT NULL,
PRIMARY KEY (`a`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (a)
-(PARTITION p0 VALUES LESS THAN (100) ENGINE = TokuDB,
- PARTITION p1 VALUES LESS THAN MAXVALUE ENGINE = TokuDB) */
+ PARTITION BY RANGE (`a`)
+(PARTITION `p0` VALUES LESS THAN (100) ENGINE = TokuDB,
+ PARTITION `p1` VALUES LESS THAN MAXVALUE ENGINE = TokuDB)
SET DEBUG_SYNC= 'now SIGNAL goto_wait';
SET DEBUG_SYNC= 'now WAIT_FOR swap_in_progress';
# Both tables should now be under exclusive lock, even SHOW should fail
@@ -295,8 +295,8 @@ ERROR HY000: Lock wait timeout exceeded; try restarting transaction
SHOW CREATE TABLE tp;
ERROR HY000: Lock wait timeout exceeded; try restarting transaction
SET DEBUG_SYNC= 'now SIGNAL test_done';
-# con1
-# con default
+connection con1;
+connection default;
# Tables should now be as normal
SHOW CREATE TABLE t;
Table Create Table
@@ -312,9 +312,9 @@ tp CREATE TABLE `tp` (
`b` varchar(55) DEFAULT NULL,
PRIMARY KEY (`a`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (a)
-(PARTITION p0 VALUES LESS THAN (100) ENGINE = TokuDB,
- PARTITION p1 VALUES LESS THAN MAXVALUE ENGINE = TokuDB) */
+ PARTITION BY RANGE (`a`)
+(PARTITION `p0` VALUES LESS THAN (100) ENGINE = TokuDB,
+ PARTITION `p1` VALUES LESS THAN MAXVALUE ENGINE = TokuDB)
SELECT * FROM tp WHERE a = 99;
a b
99 End of values
@@ -329,7 +329,8 @@ INSERT INTO tp VALUES (63, "Sixty three, new"), (59, "To be deleted");
DELETE FROM tp WHERE a = 3;
ALTER TABLE t ENGINE = 'TokuDB';
ALTER TABLE tp ENGINE = 'TokuDB';
-# con default
+disconnect con1;
+connection default;
SET DEBUG_SYNC= 'RESET';
SHOW CREATE TABLE t;
Table Create Table
@@ -345,9 +346,9 @@ tp CREATE TABLE `tp` (
`b` varchar(55) DEFAULT NULL,
PRIMARY KEY (`a`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (a)
-(PARTITION p0 VALUES LESS THAN (100) ENGINE = TokuDB,
- PARTITION p1 VALUES LESS THAN MAXVALUE ENGINE = TokuDB) */
+ PARTITION BY RANGE (`a`)
+(PARTITION `p0` VALUES LESS THAN (100) ENGINE = TokuDB,
+ PARTITION `p1` VALUES LESS THAN MAXVALUE ENGINE = TokuDB)
SELECT * FROM t;
a b
10 Ten
diff --git a/storage/tokudb/mysql-test/tokudb_parts/r/partition_float_tokudb.result b/storage/tokudb/mysql-test/tokudb_parts/r/partition_float_tokudb.result
index e24be66ba86..41cf88139ed 100644
--- a/storage/tokudb/mysql-test/tokudb_parts/r/partition_float_tokudb.result
+++ b/storage/tokudb/mysql-test/tokudb_parts/r/partition_float_tokudb.result
@@ -10,11 +10,11 @@ t1 CREATE TABLE `t1` (
`a` float NOT NULL,
PRIMARY KEY (`a`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY KEY (a)
-(PARTITION pa1 MAX_ROWS = 20 MIN_ROWS = 2 ENGINE = TokuDB,
- PARTITION pa2 MAX_ROWS = 30 MIN_ROWS = 3 ENGINE = TokuDB,
- PARTITION pa3 MAX_ROWS = 30 MIN_ROWS = 4 ENGINE = TokuDB,
- PARTITION pa4 MAX_ROWS = 40 MIN_ROWS = 2 ENGINE = TokuDB) */
+ PARTITION BY KEY (`a`)
+(PARTITION `pa1` MAX_ROWS = 20 MIN_ROWS = 2 ENGINE = TokuDB,
+ PARTITION `pa2` MAX_ROWS = 30 MIN_ROWS = 3 ENGINE = TokuDB,
+ PARTITION `pa3` MAX_ROWS = 30 MIN_ROWS = 4 ENGINE = TokuDB,
+ PARTITION `pa4` MAX_ROWS = 40 MIN_ROWS = 2 ENGINE = TokuDB)
insert into t1 values (-3.402823466E+38), (3.402823466E+38), (-1.5), (-1), (0), (1), (1.5);
select * from t1;
a
@@ -46,8 +46,8 @@ t2 CREATE TABLE `t2` (
`a` float NOT NULL,
PRIMARY KEY (`a`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY KEY (a)
-PARTITIONS 10 */
+ PARTITION BY KEY (`a`)
+PARTITIONS 10
insert into t2 values (-3.402823466E+38), (-3.402823466E+37), (-123.456), (0), (1234546.789), (123.456), (1.5);
select * from t2;
a
@@ -100,11 +100,11 @@ t1 CREATE TABLE `t1` (
`a` double NOT NULL,
PRIMARY KEY (`a`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY KEY (a)
-(PARTITION pa1 MAX_ROWS = 20 MIN_ROWS = 2 ENGINE = TokuDB,
- PARTITION pa2 MAX_ROWS = 30 MIN_ROWS = 3 ENGINE = TokuDB,
- PARTITION pa3 MAX_ROWS = 30 MIN_ROWS = 4 ENGINE = TokuDB,
- PARTITION pa4 MAX_ROWS = 40 MIN_ROWS = 2 ENGINE = TokuDB) */
+ PARTITION BY KEY (`a`)
+(PARTITION `pa1` MAX_ROWS = 20 MIN_ROWS = 2 ENGINE = TokuDB,
+ PARTITION `pa2` MAX_ROWS = 30 MIN_ROWS = 3 ENGINE = TokuDB,
+ PARTITION `pa3` MAX_ROWS = 30 MIN_ROWS = 4 ENGINE = TokuDB,
+ PARTITION `pa4` MAX_ROWS = 40 MIN_ROWS = 2 ENGINE = TokuDB)
insert into t1 values (-2.2250738585072014E+208), (-2.2250738585072014E-208), (-1.5), (-1), (0), (1.5), (1234.567), (2.2250738585072014E+208);
select * from t1;
a
@@ -138,8 +138,8 @@ t2 CREATE TABLE `t2` (
`a` double NOT NULL,
PRIMARY KEY (`a`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY KEY (a)
-PARTITIONS 10 */
+ PARTITION BY KEY (`a`)
+PARTITIONS 10
insert into t2 values (-2.2250738585072014E+208), (-2.2250738585072014E-208), (-1.5), (-1), (0), (1.5), (1234.567), (2.2250738585072014E+208);
select * from t2;
a
diff --git a/storage/tokudb/mysql-test/tokudb_parts/r/partition_int_tokudb.result b/storage/tokudb/mysql-test/tokudb_parts/r/partition_int_tokudb.result
index 2c3e7f6e093..87ad2fc16f2 100644
--- a/storage/tokudb/mysql-test/tokudb_parts/r/partition_int_tokudb.result
+++ b/storage/tokudb/mysql-test/tokudb_parts/r/partition_int_tokudb.result
@@ -10,11 +10,11 @@ t1 CREATE TABLE `t1` (
`a` tinyint(3) unsigned NOT NULL,
PRIMARY KEY (`a`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY KEY (a)
-(PARTITION pa1 MAX_ROWS = 20 MIN_ROWS = 2 ENGINE = TokuDB,
- PARTITION pa2 MAX_ROWS = 30 MIN_ROWS = 3 ENGINE = TokuDB,
- PARTITION pa3 MAX_ROWS = 30 MIN_ROWS = 4 ENGINE = TokuDB,
- PARTITION pa4 MAX_ROWS = 40 MIN_ROWS = 2 ENGINE = TokuDB) */
+ PARTITION BY KEY (`a`)
+(PARTITION `pa1` MAX_ROWS = 20 MIN_ROWS = 2 ENGINE = TokuDB,
+ PARTITION `pa2` MAX_ROWS = 30 MIN_ROWS = 3 ENGINE = TokuDB,
+ PARTITION `pa3` MAX_ROWS = 30 MIN_ROWS = 4 ENGINE = TokuDB,
+ PARTITION `pa4` MAX_ROWS = 40 MIN_ROWS = 2 ENGINE = TokuDB)
insert into t1 values (255), (254), (253), (252), (1), (2), (128);
select * from t1;
a
@@ -46,8 +46,8 @@ t2 CREATE TABLE `t2` (
`a` tinyint(3) unsigned NOT NULL,
PRIMARY KEY (`a`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY KEY (a)
-PARTITIONS 8 */
+ PARTITION BY KEY (`a`)
+PARTITIONS 8
insert into t2 values (255), (254), (253), (252);
select * from t2;
a
@@ -78,8 +78,8 @@ t3 CREATE TABLE `t3` (
`a` tinyint(4) NOT NULL,
PRIMARY KEY (`a`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY KEY (a)
-PARTITIONS 7 */
+ PARTITION BY KEY (`a`)
+PARTITIONS 7
insert into t3 values (127), (126), (125), (124), (-128), (-127), (1), (-1), (0);
select * from t3;
a
@@ -119,11 +119,11 @@ t1 CREATE TABLE `t1` (
`a` smallint(5) unsigned NOT NULL,
PRIMARY KEY (`a`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY KEY (a)
-(PARTITION pa1 MAX_ROWS = 20 MIN_ROWS = 2 ENGINE = TokuDB,
- PARTITION pa2 MAX_ROWS = 30 MIN_ROWS = 3 ENGINE = TokuDB,
- PARTITION pa3 MAX_ROWS = 30 MIN_ROWS = 4 ENGINE = TokuDB,
- PARTITION pa4 MAX_ROWS = 40 MIN_ROWS = 2 ENGINE = TokuDB) */
+ PARTITION BY KEY (`a`)
+(PARTITION `pa1` MAX_ROWS = 20 MIN_ROWS = 2 ENGINE = TokuDB,
+ PARTITION `pa2` MAX_ROWS = 30 MIN_ROWS = 3 ENGINE = TokuDB,
+ PARTITION `pa3` MAX_ROWS = 30 MIN_ROWS = 4 ENGINE = TokuDB,
+ PARTITION `pa4` MAX_ROWS = 40 MIN_ROWS = 2 ENGINE = TokuDB)
insert into t1 values (65535), (65534), (65533), (65532), (1), (2), (256);
select * from t1;
a
@@ -155,8 +155,8 @@ t2 CREATE TABLE `t2` (
`a` smallint(5) unsigned NOT NULL,
PRIMARY KEY (`a`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY KEY (a)
-PARTITIONS 8 */
+ PARTITION BY KEY (`a`)
+PARTITIONS 8
insert into t2 values (65535), (65534), (65533), (65532);
select * from t2;
a
@@ -187,8 +187,8 @@ t3 CREATE TABLE `t3` (
`a` smallint(6) NOT NULL,
PRIMARY KEY (`a`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY KEY (a)
-PARTITIONS 7 */
+ PARTITION BY KEY (`a`)
+PARTITIONS 7
insert into t3 values (32767), (32766), (32765), (32764), (-32768), (-32767), (1), (-1), (0);
select * from t3;
a
@@ -228,11 +228,11 @@ t1 CREATE TABLE `t1` (
`a` int(10) unsigned NOT NULL,
PRIMARY KEY (`a`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY KEY (a)
-(PARTITION pa1 MAX_ROWS = 20 MIN_ROWS = 2 ENGINE = TokuDB,
- PARTITION pa2 MAX_ROWS = 30 MIN_ROWS = 3 ENGINE = TokuDB,
- PARTITION pa3 MAX_ROWS = 30 MIN_ROWS = 4 ENGINE = TokuDB,
- PARTITION pa4 MAX_ROWS = 40 MIN_ROWS = 2 ENGINE = TokuDB) */
+ PARTITION BY KEY (`a`)
+(PARTITION `pa1` MAX_ROWS = 20 MIN_ROWS = 2 ENGINE = TokuDB,
+ PARTITION `pa2` MAX_ROWS = 30 MIN_ROWS = 3 ENGINE = TokuDB,
+ PARTITION `pa3` MAX_ROWS = 30 MIN_ROWS = 4 ENGINE = TokuDB,
+ PARTITION `pa4` MAX_ROWS = 40 MIN_ROWS = 2 ENGINE = TokuDB)
insert into t1 values (4294967295), (4294967294), (4294967293), (4294967292), (1), (2), (65535);
select * from t1;
a
@@ -264,8 +264,8 @@ t2 CREATE TABLE `t2` (
`a` int(10) unsigned NOT NULL,
PRIMARY KEY (`a`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY KEY (a)
-PARTITIONS 8 */
+ PARTITION BY KEY (`a`)
+PARTITIONS 8
insert into t2 values (4294967295), (4294967294), (4294967293), (4294967292);
select * from t2;
a
@@ -296,8 +296,8 @@ t3 CREATE TABLE `t3` (
`a` int(11) NOT NULL,
PRIMARY KEY (`a`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY KEY (a)
-PARTITIONS 7 */
+ PARTITION BY KEY (`a`)
+PARTITIONS 7
insert into t3 values (2147483647), (2147483646), (2147483645), (2147483644), (-2147483648), (-2147483647), (1), (-1), (0);
select * from t3;
a
@@ -337,11 +337,11 @@ t1 CREATE TABLE `t1` (
`a` mediumint(8) unsigned NOT NULL,
PRIMARY KEY (`a`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY KEY (a)
-(PARTITION pa1 MAX_ROWS = 20 MIN_ROWS = 2 ENGINE = TokuDB,
- PARTITION pa2 MAX_ROWS = 30 MIN_ROWS = 3 ENGINE = TokuDB,
- PARTITION pa3 MAX_ROWS = 30 MIN_ROWS = 4 ENGINE = TokuDB,
- PARTITION pa4 MAX_ROWS = 40 MIN_ROWS = 2 ENGINE = TokuDB) */
+ PARTITION BY KEY (`a`)
+(PARTITION `pa1` MAX_ROWS = 20 MIN_ROWS = 2 ENGINE = TokuDB,
+ PARTITION `pa2` MAX_ROWS = 30 MIN_ROWS = 3 ENGINE = TokuDB,
+ PARTITION `pa3` MAX_ROWS = 30 MIN_ROWS = 4 ENGINE = TokuDB,
+ PARTITION `pa4` MAX_ROWS = 40 MIN_ROWS = 2 ENGINE = TokuDB)
insert into t1 values (16777215), (16777214), (16777213), (16777212), (1), (2), (65535);
select * from t1;
a
@@ -373,8 +373,8 @@ t2 CREATE TABLE `t2` (
`a` mediumint(8) unsigned NOT NULL,
PRIMARY KEY (`a`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY KEY (a)
-PARTITIONS 8 */
+ PARTITION BY KEY (`a`)
+PARTITIONS 8
insert into t2 values (16777215), (16777214), (16777213), (16777212);
select * from t2;
a
@@ -405,8 +405,8 @@ t3 CREATE TABLE `t3` (
`a` mediumint(9) NOT NULL,
PRIMARY KEY (`a`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY KEY (a)
-PARTITIONS 7 */
+ PARTITION BY KEY (`a`)
+PARTITIONS 7
insert into t3 values (8388607), (8388606), (8388605), (8388604), (-8388608), (-8388607), (1), (-1), (0);
select * from t3;
a
@@ -446,11 +446,11 @@ t1 CREATE TABLE `t1` (
`a` bigint(20) unsigned NOT NULL,
PRIMARY KEY (`a`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY KEY (a)
-(PARTITION pa1 MAX_ROWS = 20 MIN_ROWS = 2 ENGINE = TokuDB,
- PARTITION pa2 MAX_ROWS = 30 MIN_ROWS = 3 ENGINE = TokuDB,
- PARTITION pa3 MAX_ROWS = 30 MIN_ROWS = 4 ENGINE = TokuDB,
- PARTITION pa4 MAX_ROWS = 40 MIN_ROWS = 2 ENGINE = TokuDB) */
+ PARTITION BY KEY (`a`)
+(PARTITION `pa1` MAX_ROWS = 20 MIN_ROWS = 2 ENGINE = TokuDB,
+ PARTITION `pa2` MAX_ROWS = 30 MIN_ROWS = 3 ENGINE = TokuDB,
+ PARTITION `pa3` MAX_ROWS = 30 MIN_ROWS = 4 ENGINE = TokuDB,
+ PARTITION `pa4` MAX_ROWS = 40 MIN_ROWS = 2 ENGINE = TokuDB)
insert into t1 values (18446744073709551615), (0xFFFFFFFFFFFFFFFE), (18446744073709551613), (18446744073709551612), (1), (2), (65535);
select * from t1;
a
@@ -494,8 +494,8 @@ t2 CREATE TABLE `t2` (
`a` bigint(20) unsigned NOT NULL,
PRIMARY KEY (`a`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY KEY (a)
-PARTITIONS 8 */
+ PARTITION BY KEY (`a`)
+PARTITIONS 8
insert into t2 values (18446744073709551615), (0xFFFFFFFFFFFFFFFE), (18446744073709551613), (18446744073709551612);
select * from t2;
a
@@ -526,8 +526,8 @@ t3 CREATE TABLE `t3` (
`a` bigint(20) NOT NULL,
PRIMARY KEY (`a`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY KEY (a)
-PARTITIONS 7 */
+ PARTITION BY KEY (`a`)
+PARTITIONS 7
insert into t3 values (9223372036854775807), (9223372036854775806), (9223372036854775805), (9223372036854775804), (-9223372036854775808), (-9223372036854775807), (1), (-1), (0);
select * from t3;
a
diff --git a/storage/tokudb/mysql-test/tokudb_parts/r/partition_mgm_lc0_tokudb.result b/storage/tokudb/mysql-test/tokudb_parts/r/partition_mgm_lc0_tokudb.result
index 1442a99bbbb..4c30d47f526 100644
--- a/storage/tokudb/mysql-test/tokudb_parts/r/partition_mgm_lc0_tokudb.result
+++ b/storage/tokudb/mysql-test/tokudb_parts/r/partition_mgm_lc0_tokudb.result
@@ -56,14 +56,14 @@ Table Create Table
TableA CREATE TABLE `TableA` (
`a` int(11) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY KEY (a)
-(PARTITION parta ENGINE = TokuDB,
- PARTITION partB ENGINE = TokuDB,
- PARTITION Partc ENGINE = TokuDB,
- PARTITION PartD ENGINE = TokuDB,
- PARTITION partE ENGINE = TokuDB,
- PARTITION Partf ENGINE = TokuDB,
- PARTITION PartG ENGINE = TokuDB) */
+ PARTITION BY KEY (`a`)
+(PARTITION `parta` ENGINE = TokuDB,
+ PARTITION `partB` ENGINE = TokuDB,
+ PARTITION `Partc` ENGINE = TokuDB,
+ PARTITION `PartD` ENGINE = TokuDB,
+ PARTITION `partE` ENGINE = TokuDB,
+ PARTITION `Partf` ENGINE = TokuDB,
+ PARTITION `PartG` ENGINE = TokuDB)
ALTER TABLE TableA COALESCE PARTITION 4;
SELECT * FROM TableA;
a
@@ -84,10 +84,10 @@ Table Create Table
TableA CREATE TABLE `TableA` (
`a` int(11) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY KEY (a)
-(PARTITION parta ENGINE = TokuDB,
- PARTITION partB ENGINE = TokuDB,
- PARTITION Partc ENGINE = TokuDB) */
+ PARTITION BY KEY (`a`)
+(PARTITION `parta` ENGINE = TokuDB,
+ PARTITION `partB` ENGINE = TokuDB,
+ PARTITION `Partc` ENGINE = TokuDB)
# Test of EXCHANGE PARTITION WITH TABLE
SELECT PARTITION_NAME, TABLE_ROWS FROM INFORMATION_SCHEMA.PARTITIONS WHERE TABLE_SCHEMA ='MySQL_Test_DB' AND TABLE_NAME = 'TableA';
PARTITION_NAME TABLE_ROWS
@@ -112,10 +112,10 @@ Table Create Table
TableA CREATE TABLE `TableA` (
`a` int(11) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY KEY (a)
-(PARTITION parta ENGINE = TokuDB,
- PARTITION partB ENGINE = TokuDB,
- PARTITION Partc ENGINE = TokuDB) */
+ PARTITION BY KEY (`a`)
+(PARTITION `parta` ENGINE = TokuDB,
+ PARTITION `partB` ENGINE = TokuDB,
+ PARTITION `Partc` ENGINE = TokuDB)
SELECT * FROM TableB;
a
10
@@ -156,10 +156,10 @@ Table Create Table
TableA CREATE TABLE `TableA` (
`a` int(11) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY KEY (a)
-(PARTITION parta ENGINE = TokuDB,
- PARTITION partB ENGINE = TokuDB,
- PARTITION Partc ENGINE = TokuDB) */
+ PARTITION BY KEY (`a`)
+(PARTITION `parta` ENGINE = TokuDB,
+ PARTITION `partB` ENGINE = TokuDB,
+ PARTITION `Partc` ENGINE = TokuDB)
# Test of REORGANIZE PARTITIONS
# Should not work on HASH/KEY
ALTER TABLE TableA REORGANIZE PARTITION parta,partB,Partc INTO
@@ -192,10 +192,10 @@ Table Create Table
TableA CREATE TABLE `TableA` (
`a` int(11) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY KEY (a)
-(PARTITION partB COMMENT = 'Previusly named parta' ENGINE = TokuDB,
- PARTITION parta COMMENT = 'Previusly named partB' ENGINE = TokuDB,
- PARTITION Partc ENGINE = TokuDB) */
+ PARTITION BY KEY (`a`)
+(PARTITION `partB` COMMENT = 'Previusly named parta' ENGINE = TokuDB,
+ PARTITION `parta` COMMENT = 'Previusly named partB' ENGINE = TokuDB,
+ PARTITION `Partc` ENGINE = TokuDB)
# Test of RENAME TABLE
RENAME TABLE TableA to TableB;
SELECT * FROM TableB;
@@ -271,11 +271,11 @@ Table Create Table
tablea CREATE TABLE `tablea` (
`a` int(11) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY KEY (a)
-(PARTITION parta ENGINE = TokuDB,
- PARTITION partB ENGINE = TokuDB,
- PARTITION Partc ENGINE = TokuDB,
- PARTITION PartD ENGINE = TokuDB) */
+ PARTITION BY KEY (`a`)
+(PARTITION `parta` ENGINE = TokuDB,
+ PARTITION `partB` ENGINE = TokuDB,
+ PARTITION `Partc` ENGINE = TokuDB,
+ PARTITION `PartD` ENGINE = TokuDB)
DROP TABLE tablea;
# Test of REMOVE PARTITIONING
ALTER TABLE TableA REMOVE PARTITIONING;
@@ -364,14 +364,14 @@ Table Create Table
TableA CREATE TABLE `TableA` (
`a` int(11) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY HASH (a)
-(PARTITION parta ENGINE = TokuDB,
- PARTITION partB ENGINE = TokuDB,
- PARTITION Partc ENGINE = TokuDB,
- PARTITION PartD ENGINE = TokuDB,
- PARTITION partE ENGINE = TokuDB,
- PARTITION Partf ENGINE = TokuDB,
- PARTITION PartG ENGINE = TokuDB) */
+ PARTITION BY HASH (`a`)
+(PARTITION `parta` ENGINE = TokuDB,
+ PARTITION `partB` ENGINE = TokuDB,
+ PARTITION `Partc` ENGINE = TokuDB,
+ PARTITION `PartD` ENGINE = TokuDB,
+ PARTITION `partE` ENGINE = TokuDB,
+ PARTITION `Partf` ENGINE = TokuDB,
+ PARTITION `PartG` ENGINE = TokuDB)
ALTER TABLE TableA COALESCE PARTITION 4;
SELECT * FROM TableA;
a
@@ -392,10 +392,10 @@ Table Create Table
TableA CREATE TABLE `TableA` (
`a` int(11) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY HASH (a)
-(PARTITION parta ENGINE = TokuDB,
- PARTITION partB ENGINE = TokuDB,
- PARTITION Partc ENGINE = TokuDB) */
+ PARTITION BY HASH (`a`)
+(PARTITION `parta` ENGINE = TokuDB,
+ PARTITION `partB` ENGINE = TokuDB,
+ PARTITION `Partc` ENGINE = TokuDB)
# Test of REORGANIZE PARTITIONS
# Should not work on HASH/KEY
ALTER TABLE TableA REORGANIZE PARTITION parta,partB,Partc INTO
@@ -428,10 +428,10 @@ Table Create Table
TableA CREATE TABLE `TableA` (
`a` int(11) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY HASH (a)
-(PARTITION partB COMMENT = 'Previusly named parta' ENGINE = TokuDB,
- PARTITION parta COMMENT = 'Previusly named partB' ENGINE = TokuDB,
- PARTITION Partc ENGINE = TokuDB) */
+ PARTITION BY HASH (`a`)
+(PARTITION `partB` COMMENT = 'Previusly named parta' ENGINE = TokuDB,
+ PARTITION `parta` COMMENT = 'Previusly named partB' ENGINE = TokuDB,
+ PARTITION `Partc` ENGINE = TokuDB)
# Test of RENAME TABLE
RENAME TABLE TableA to TableB;
SELECT * FROM TableB;
@@ -507,11 +507,11 @@ Table Create Table
tablea CREATE TABLE `tablea` (
`a` int(11) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY HASH (a)
-(PARTITION parta ENGINE = TokuDB,
- PARTITION partB ENGINE = TokuDB,
- PARTITION Partc ENGINE = TokuDB,
- PARTITION PartD ENGINE = TokuDB) */
+ PARTITION BY HASH (`a`)
+(PARTITION `parta` ENGINE = TokuDB,
+ PARTITION `partB` ENGINE = TokuDB,
+ PARTITION `Partc` ENGINE = TokuDB,
+ PARTITION `PartD` ENGINE = TokuDB)
DROP TABLE tablea;
# Test of REMOVE PARTITIONING
ALTER TABLE TableA REMOVE PARTITIONING;
@@ -589,14 +589,14 @@ Table Create Table
TableA CREATE TABLE `TableA` (
`a` int(11) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (a)
-(PARTITION parta VALUES LESS THAN (4) ENGINE = TokuDB,
- PARTITION partB VALUES LESS THAN (7) ENGINE = TokuDB,
- PARTITION Partc VALUES LESS THAN (10) ENGINE = TokuDB,
- PARTITION PartD VALUES LESS THAN (13) ENGINE = TokuDB,
- PARTITION partE VALUES LESS THAN (16) ENGINE = TokuDB,
- PARTITION Partf VALUES LESS THAN (19) ENGINE = TokuDB,
- PARTITION PartG VALUES LESS THAN (22) ENGINE = TokuDB) */
+ PARTITION BY RANGE (`a`)
+(PARTITION `parta` VALUES LESS THAN (4) ENGINE = TokuDB,
+ PARTITION `partB` VALUES LESS THAN (7) ENGINE = TokuDB,
+ PARTITION `Partc` VALUES LESS THAN (10) ENGINE = TokuDB,
+ PARTITION `PartD` VALUES LESS THAN (13) ENGINE = TokuDB,
+ PARTITION `partE` VALUES LESS THAN (16) ENGINE = TokuDB,
+ PARTITION `Partf` VALUES LESS THAN (19) ENGINE = TokuDB,
+ PARTITION `PartG` VALUES LESS THAN (22) ENGINE = TokuDB)
ALTER TABLE TableA DROP PARTITION partE, PartG;
ALTER TABLE TableA DROP PARTITION Partf;
ALTER TABLE TableA ADD PARTITION
@@ -620,12 +620,12 @@ Table Create Table
TableA CREATE TABLE `TableA` (
`a` int(11) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (a)
-(PARTITION parta VALUES LESS THAN (4) ENGINE = TokuDB,
- PARTITION partB VALUES LESS THAN (7) ENGINE = TokuDB,
- PARTITION Partc VALUES LESS THAN (10) ENGINE = TokuDB,
- PARTITION PartD VALUES LESS THAN (13) ENGINE = TokuDB,
- PARTITION PartE VALUES LESS THAN MAXVALUE ENGINE = TokuDB) */
+ PARTITION BY RANGE (`a`)
+(PARTITION `parta` VALUES LESS THAN (4) ENGINE = TokuDB,
+ PARTITION `partB` VALUES LESS THAN (7) ENGINE = TokuDB,
+ PARTITION `Partc` VALUES LESS THAN (10) ENGINE = TokuDB,
+ PARTITION `PartD` VALUES LESS THAN (13) ENGINE = TokuDB,
+ PARTITION `PartE` VALUES LESS THAN MAXVALUE ENGINE = TokuDB)
# Test of REORGANIZE PARTITIONS
# Error since it must reorganize a consecutive range
ALTER TABLE TableA REORGANIZE PARTITION parta,Partc INTO
@@ -658,11 +658,11 @@ Table Create Table
TableA CREATE TABLE `TableA` (
`a` int(11) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (a)
-(PARTITION parta VALUES LESS THAN (4) ENGINE = TokuDB,
- PARTITION partD VALUES LESS THAN (8) COMMENT = 'Previously partB and partly Partc' ENGINE = TokuDB,
- PARTITION partB VALUES LESS THAN (11) COMMENT = 'Previously partly Partc and partly PartD' ENGINE = TokuDB,
- PARTITION partC VALUES LESS THAN MAXVALUE COMMENT = 'Previously partly PartD' ENGINE = TokuDB) */
+ PARTITION BY RANGE (`a`)
+(PARTITION `parta` VALUES LESS THAN (4) ENGINE = TokuDB,
+ PARTITION `partD` VALUES LESS THAN (8) COMMENT = 'Previously partB and partly Partc' ENGINE = TokuDB,
+ PARTITION `partB` VALUES LESS THAN (11) COMMENT = 'Previously partly Partc and partly PartD' ENGINE = TokuDB,
+ PARTITION `partC` VALUES LESS THAN MAXVALUE COMMENT = 'Previously partly PartD' ENGINE = TokuDB)
# Test of RENAME TABLE
RENAME TABLE TableA to TableB;
SELECT * FROM TableB;
@@ -738,11 +738,11 @@ Table Create Table
tablea CREATE TABLE `tablea` (
`a` int(11) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (a)
-(PARTITION parta VALUES LESS THAN (4) ENGINE = TokuDB,
- PARTITION partB VALUES LESS THAN (7) ENGINE = TokuDB,
- PARTITION Partc VALUES LESS THAN (10) ENGINE = TokuDB,
- PARTITION PartD VALUES LESS THAN (13) ENGINE = TokuDB) */
+ PARTITION BY RANGE (`a`)
+(PARTITION `parta` VALUES LESS THAN (4) ENGINE = TokuDB,
+ PARTITION `partB` VALUES LESS THAN (7) ENGINE = TokuDB,
+ PARTITION `Partc` VALUES LESS THAN (10) ENGINE = TokuDB,
+ PARTITION `PartD` VALUES LESS THAN (13) ENGINE = TokuDB)
DROP TABLE tablea;
# Test of REMOVE PARTITIONING
ALTER TABLE TableA REMOVE PARTITIONING;
@@ -820,14 +820,14 @@ Table Create Table
TableA CREATE TABLE `TableA` (
`a` int(11) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (a)
-(PARTITION parta VALUES IN (1,8,9) ENGINE = TokuDB,
- PARTITION partB VALUES IN (2,10,11) ENGINE = TokuDB,
- PARTITION Partc VALUES IN (3,4,7) ENGINE = TokuDB,
- PARTITION PartD VALUES IN (5,6,12) ENGINE = TokuDB,
- PARTITION partE VALUES IN (16) ENGINE = TokuDB,
- PARTITION Partf VALUES IN (19) ENGINE = TokuDB,
- PARTITION PartG VALUES IN (22) ENGINE = TokuDB) */
+ PARTITION BY LIST (`a`)
+(PARTITION `parta` VALUES IN (1,8,9) ENGINE = TokuDB,
+ PARTITION `partB` VALUES IN (2,10,11) ENGINE = TokuDB,
+ PARTITION `Partc` VALUES IN (3,4,7) ENGINE = TokuDB,
+ PARTITION `PartD` VALUES IN (5,6,12) ENGINE = TokuDB,
+ PARTITION `partE` VALUES IN (16) ENGINE = TokuDB,
+ PARTITION `Partf` VALUES IN (19) ENGINE = TokuDB,
+ PARTITION `PartG` VALUES IN (22) ENGINE = TokuDB)
ALTER TABLE TableA DROP PARTITION partE, PartG;
ALTER TABLE TableA DROP PARTITION Partf;
ALTER TABLE TableA ADD PARTITION
@@ -851,12 +851,12 @@ Table Create Table
TableA CREATE TABLE `TableA` (
`a` int(11) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (a)
-(PARTITION parta VALUES IN (1,8,9) ENGINE = TokuDB,
- PARTITION partB VALUES IN (2,10,11) ENGINE = TokuDB,
- PARTITION Partc VALUES IN (3,4,7) ENGINE = TokuDB,
- PARTITION PartD VALUES IN (5,6,12) ENGINE = TokuDB,
- PARTITION PartE VALUES IN (13) ENGINE = TokuDB) */
+ PARTITION BY LIST (`a`)
+(PARTITION `parta` VALUES IN (1,8,9) ENGINE = TokuDB,
+ PARTITION `partB` VALUES IN (2,10,11) ENGINE = TokuDB,
+ PARTITION `Partc` VALUES IN (3,4,7) ENGINE = TokuDB,
+ PARTITION `PartD` VALUES IN (5,6,12) ENGINE = TokuDB,
+ PARTITION `PartE` VALUES IN (13) ENGINE = TokuDB)
# Test of REORGANIZE PARTITIONS
ALTER TABLE TableA REORGANIZE PARTITION parta,Partc INTO
(PARTITION Partc VALUES IN (1,7)
@@ -889,12 +889,12 @@ Table Create Table
TableA CREATE TABLE `TableA` (
`a` int(11) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (a)
-(PARTITION Partc VALUES IN (1,7) COMMENT = 'Mix 1 of old parta and Partc' ENGINE = TokuDB,
- PARTITION parta VALUES IN (3,9) COMMENT = 'Mix 2 of old parta and Partc' ENGINE = TokuDB,
- PARTITION partB VALUES IN (4,8) COMMENT = 'Mix 3 of old parta and Partc' ENGINE = TokuDB,
- PARTITION PartD VALUES IN (5,6,12) ENGINE = TokuDB,
- PARTITION PartE VALUES IN (13) ENGINE = TokuDB) */
+ PARTITION BY LIST (`a`)
+(PARTITION `Partc` VALUES IN (1,7) COMMENT = 'Mix 1 of old parta and Partc' ENGINE = TokuDB,
+ PARTITION `parta` VALUES IN (3,9) COMMENT = 'Mix 2 of old parta and Partc' ENGINE = TokuDB,
+ PARTITION `partB` VALUES IN (4,8) COMMENT = 'Mix 3 of old parta and Partc' ENGINE = TokuDB,
+ PARTITION `PartD` VALUES IN (5,6,12) ENGINE = TokuDB,
+ PARTITION `PartE` VALUES IN (13) ENGINE = TokuDB)
# Test of RENAME TABLE
RENAME TABLE TableA to TableB;
SELECT * FROM TableB;
@@ -961,11 +961,11 @@ Table Create Table
tablea CREATE TABLE `tablea` (
`a` int(11) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (a)
-(PARTITION parta VALUES IN (1,8,9) ENGINE = TokuDB,
- PARTITION partB VALUES IN (2,10,11) ENGINE = TokuDB,
- PARTITION Partc VALUES IN (3,4,7) ENGINE = TokuDB,
- PARTITION PartD VALUES IN (5,6,12) ENGINE = TokuDB) */
+ PARTITION BY LIST (`a`)
+(PARTITION `parta` VALUES IN (1,8,9) ENGINE = TokuDB,
+ PARTITION `partB` VALUES IN (2,10,11) ENGINE = TokuDB,
+ PARTITION `Partc` VALUES IN (3,4,7) ENGINE = TokuDB,
+ PARTITION `PartD` VALUES IN (5,6,12) ENGINE = TokuDB)
DROP TABLE tablea;
# Test of REMOVE PARTITIONING
ALTER TABLE TableA REMOVE PARTITIONING;
@@ -1004,10 +1004,10 @@ t1 CREATE TABLE `t1` (
`b` varchar(255) DEFAULT NULL,
PRIMARY KEY (`a`)
) ENGINE=TokuDB AUTO_INCREMENT=2002 DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (a)
-(PARTITION LT1000 VALUES LESS THAN (1000) ENGINE = TokuDB,
- PARTITION LT2000 VALUES LESS THAN (2000) ENGINE = TokuDB,
- PARTITION MAX VALUES LESS THAN MAXVALUE ENGINE = TokuDB) */
+ PARTITION BY RANGE (`a`)
+(PARTITION `LT1000` VALUES LESS THAN (1000) ENGINE = TokuDB,
+ PARTITION `LT2000` VALUES LESS THAN (2000) ENGINE = TokuDB,
+ PARTITION `MAX` VALUES LESS THAN MAXVALUE ENGINE = TokuDB)
SELECT * FROM t1 ORDER BY a;
a b
1 First
diff --git a/storage/tokudb/mysql-test/tokudb_parts/r/partition_mgm_lc10_tokudb.result b/storage/tokudb/mysql-test/tokudb_parts/r/partition_mgm_lc10_tokudb.result
index 84d350fcdfa..6ccd9afefab 100644
--- a/storage/tokudb/mysql-test/tokudb_parts/r/partition_mgm_lc10_tokudb.result
+++ b/storage/tokudb/mysql-test/tokudb_parts/r/partition_mgm_lc10_tokudb.result
@@ -56,14 +56,14 @@ Table Create Table
TableA CREATE TABLE `TableA` (
`a` int(11) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY KEY (a)
-(PARTITION parta ENGINE = TokuDB,
- PARTITION partB ENGINE = TokuDB,
- PARTITION Partc ENGINE = TokuDB,
- PARTITION PartD ENGINE = TokuDB,
- PARTITION partE ENGINE = TokuDB,
- PARTITION Partf ENGINE = TokuDB,
- PARTITION PartG ENGINE = TokuDB) */
+ PARTITION BY KEY (`a`)
+(PARTITION `parta` ENGINE = TokuDB,
+ PARTITION `partB` ENGINE = TokuDB,
+ PARTITION `Partc` ENGINE = TokuDB,
+ PARTITION `PartD` ENGINE = TokuDB,
+ PARTITION `partE` ENGINE = TokuDB,
+ PARTITION `Partf` ENGINE = TokuDB,
+ PARTITION `PartG` ENGINE = TokuDB)
ALTER TABLE TableA COALESCE PARTITION 4;
SELECT * FROM TableA;
a
@@ -84,10 +84,10 @@ Table Create Table
TableA CREATE TABLE `TableA` (
`a` int(11) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY KEY (a)
-(PARTITION parta ENGINE = TokuDB,
- PARTITION partB ENGINE = TokuDB,
- PARTITION Partc ENGINE = TokuDB) */
+ PARTITION BY KEY (`a`)
+(PARTITION `parta` ENGINE = TokuDB,
+ PARTITION `partB` ENGINE = TokuDB,
+ PARTITION `Partc` ENGINE = TokuDB)
# Test of EXCHANGE PARTITION WITH TABLE
# Test of REORGANIZE PARTITIONS
# Should not work on HASH/KEY
@@ -121,10 +121,10 @@ Table Create Table
TableA CREATE TABLE `TableA` (
`a` int(11) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY KEY (a)
-(PARTITION partB COMMENT = 'Previusly named parta' ENGINE = TokuDB,
- PARTITION parta COMMENT = 'Previusly named partB' ENGINE = TokuDB,
- PARTITION Partc ENGINE = TokuDB) */
+ PARTITION BY KEY (`a`)
+(PARTITION `partB` COMMENT = 'Previusly named parta' ENGINE = TokuDB,
+ PARTITION `parta` COMMENT = 'Previusly named partB' ENGINE = TokuDB,
+ PARTITION `Partc` ENGINE = TokuDB)
# Test of RENAME TABLE
RENAME TABLE TableA to TableB;
SELECT * FROM TableB;
@@ -200,11 +200,11 @@ Table Create Table
tablea CREATE TABLE `tablea` (
`a` int(11) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY KEY (a)
-(PARTITION parta ENGINE = TokuDB,
- PARTITION partB ENGINE = TokuDB,
- PARTITION Partc ENGINE = TokuDB,
- PARTITION PartD ENGINE = TokuDB) */
+ PARTITION BY KEY (`a`)
+(PARTITION `parta` ENGINE = TokuDB,
+ PARTITION `partB` ENGINE = TokuDB,
+ PARTITION `Partc` ENGINE = TokuDB,
+ PARTITION `PartD` ENGINE = TokuDB)
DROP TABLE tablea;
# Test of REMOVE PARTITIONING
ALTER TABLE TableA REMOVE PARTITIONING;
@@ -293,14 +293,14 @@ Table Create Table
TableA CREATE TABLE `TableA` (
`a` int(11) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY HASH (a)
-(PARTITION parta ENGINE = TokuDB,
- PARTITION partB ENGINE = TokuDB,
- PARTITION Partc ENGINE = TokuDB,
- PARTITION PartD ENGINE = TokuDB,
- PARTITION partE ENGINE = TokuDB,
- PARTITION Partf ENGINE = TokuDB,
- PARTITION PartG ENGINE = TokuDB) */
+ PARTITION BY HASH (`a`)
+(PARTITION `parta` ENGINE = TokuDB,
+ PARTITION `partB` ENGINE = TokuDB,
+ PARTITION `Partc` ENGINE = TokuDB,
+ PARTITION `PartD` ENGINE = TokuDB,
+ PARTITION `partE` ENGINE = TokuDB,
+ PARTITION `Partf` ENGINE = TokuDB,
+ PARTITION `PartG` ENGINE = TokuDB)
ALTER TABLE TableA COALESCE PARTITION 4;
SELECT * FROM TableA;
a
@@ -321,10 +321,10 @@ Table Create Table
TableA CREATE TABLE `TableA` (
`a` int(11) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY HASH (a)
-(PARTITION parta ENGINE = TokuDB,
- PARTITION partB ENGINE = TokuDB,
- PARTITION Partc ENGINE = TokuDB) */
+ PARTITION BY HASH (`a`)
+(PARTITION `parta` ENGINE = TokuDB,
+ PARTITION `partB` ENGINE = TokuDB,
+ PARTITION `Partc` ENGINE = TokuDB)
# Test of REORGANIZE PARTITIONS
# Should not work on HASH/KEY
ALTER TABLE TableA REORGANIZE PARTITION parta,partB,Partc INTO
@@ -357,10 +357,10 @@ Table Create Table
TableA CREATE TABLE `TableA` (
`a` int(11) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY HASH (a)
-(PARTITION partB COMMENT = 'Previusly named parta' ENGINE = TokuDB,
- PARTITION parta COMMENT = 'Previusly named partB' ENGINE = TokuDB,
- PARTITION Partc ENGINE = TokuDB) */
+ PARTITION BY HASH (`a`)
+(PARTITION `partB` COMMENT = 'Previusly named parta' ENGINE = TokuDB,
+ PARTITION `parta` COMMENT = 'Previusly named partB' ENGINE = TokuDB,
+ PARTITION `Partc` ENGINE = TokuDB)
# Test of RENAME TABLE
RENAME TABLE TableA to TableB;
SELECT * FROM TableB;
@@ -436,11 +436,11 @@ Table Create Table
tablea CREATE TABLE `tablea` (
`a` int(11) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY HASH (a)
-(PARTITION parta ENGINE = TokuDB,
- PARTITION partB ENGINE = TokuDB,
- PARTITION Partc ENGINE = TokuDB,
- PARTITION PartD ENGINE = TokuDB) */
+ PARTITION BY HASH (`a`)
+(PARTITION `parta` ENGINE = TokuDB,
+ PARTITION `partB` ENGINE = TokuDB,
+ PARTITION `Partc` ENGINE = TokuDB,
+ PARTITION `PartD` ENGINE = TokuDB)
DROP TABLE tablea;
# Test of REMOVE PARTITIONING
ALTER TABLE TableA REMOVE PARTITIONING;
@@ -518,14 +518,14 @@ Table Create Table
TableA CREATE TABLE `TableA` (
`a` int(11) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (a)
-(PARTITION parta VALUES LESS THAN (4) ENGINE = TokuDB,
- PARTITION partB VALUES LESS THAN (7) ENGINE = TokuDB,
- PARTITION Partc VALUES LESS THAN (10) ENGINE = TokuDB,
- PARTITION PartD VALUES LESS THAN (13) ENGINE = TokuDB,
- PARTITION partE VALUES LESS THAN (16) ENGINE = TokuDB,
- PARTITION Partf VALUES LESS THAN (19) ENGINE = TokuDB,
- PARTITION PartG VALUES LESS THAN (22) ENGINE = TokuDB) */
+ PARTITION BY RANGE (`a`)
+(PARTITION `parta` VALUES LESS THAN (4) ENGINE = TokuDB,
+ PARTITION `partB` VALUES LESS THAN (7) ENGINE = TokuDB,
+ PARTITION `Partc` VALUES LESS THAN (10) ENGINE = TokuDB,
+ PARTITION `PartD` VALUES LESS THAN (13) ENGINE = TokuDB,
+ PARTITION `partE` VALUES LESS THAN (16) ENGINE = TokuDB,
+ PARTITION `Partf` VALUES LESS THAN (19) ENGINE = TokuDB,
+ PARTITION `PartG` VALUES LESS THAN (22) ENGINE = TokuDB)
ALTER TABLE TableA DROP PARTITION partE, PartG;
ALTER TABLE TableA DROP PARTITION Partf;
ALTER TABLE TableA ADD PARTITION
@@ -549,12 +549,12 @@ Table Create Table
TableA CREATE TABLE `TableA` (
`a` int(11) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (a)
-(PARTITION parta VALUES LESS THAN (4) ENGINE = TokuDB,
- PARTITION partB VALUES LESS THAN (7) ENGINE = TokuDB,
- PARTITION Partc VALUES LESS THAN (10) ENGINE = TokuDB,
- PARTITION PartD VALUES LESS THAN (13) ENGINE = TokuDB,
- PARTITION PartE VALUES LESS THAN MAXVALUE ENGINE = TokuDB) */
+ PARTITION BY RANGE (`a`)
+(PARTITION `parta` VALUES LESS THAN (4) ENGINE = TokuDB,
+ PARTITION `partB` VALUES LESS THAN (7) ENGINE = TokuDB,
+ PARTITION `Partc` VALUES LESS THAN (10) ENGINE = TokuDB,
+ PARTITION `PartD` VALUES LESS THAN (13) ENGINE = TokuDB,
+ PARTITION `PartE` VALUES LESS THAN MAXVALUE ENGINE = TokuDB)
# Test of REORGANIZE PARTITIONS
# Error since it must reorganize a consecutive range
ALTER TABLE TableA REORGANIZE PARTITION parta,Partc INTO
@@ -587,11 +587,11 @@ Table Create Table
TableA CREATE TABLE `TableA` (
`a` int(11) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (a)
-(PARTITION parta VALUES LESS THAN (4) ENGINE = TokuDB,
- PARTITION partD VALUES LESS THAN (8) COMMENT = 'Previously partB and partly Partc' ENGINE = TokuDB,
- PARTITION partB VALUES LESS THAN (11) COMMENT = 'Previously partly Partc and partly PartD' ENGINE = TokuDB,
- PARTITION partC VALUES LESS THAN MAXVALUE COMMENT = 'Previously partly PartD' ENGINE = TokuDB) */
+ PARTITION BY RANGE (`a`)
+(PARTITION `parta` VALUES LESS THAN (4) ENGINE = TokuDB,
+ PARTITION `partD` VALUES LESS THAN (8) COMMENT = 'Previously partB and partly Partc' ENGINE = TokuDB,
+ PARTITION `partB` VALUES LESS THAN (11) COMMENT = 'Previously partly Partc and partly PartD' ENGINE = TokuDB,
+ PARTITION `partC` VALUES LESS THAN MAXVALUE COMMENT = 'Previously partly PartD' ENGINE = TokuDB)
# Test of RENAME TABLE
RENAME TABLE TableA to TableB;
SELECT * FROM TableB;
@@ -667,11 +667,11 @@ Table Create Table
tablea CREATE TABLE `tablea` (
`a` int(11) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (a)
-(PARTITION parta VALUES LESS THAN (4) ENGINE = TokuDB,
- PARTITION partB VALUES LESS THAN (7) ENGINE = TokuDB,
- PARTITION Partc VALUES LESS THAN (10) ENGINE = TokuDB,
- PARTITION PartD VALUES LESS THAN (13) ENGINE = TokuDB) */
+ PARTITION BY RANGE (`a`)
+(PARTITION `parta` VALUES LESS THAN (4) ENGINE = TokuDB,
+ PARTITION `partB` VALUES LESS THAN (7) ENGINE = TokuDB,
+ PARTITION `Partc` VALUES LESS THAN (10) ENGINE = TokuDB,
+ PARTITION `PartD` VALUES LESS THAN (13) ENGINE = TokuDB)
DROP TABLE tablea;
# Test of REMOVE PARTITIONING
ALTER TABLE TableA REMOVE PARTITIONING;
@@ -749,14 +749,14 @@ Table Create Table
TableA CREATE TABLE `TableA` (
`a` int(11) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (a)
-(PARTITION parta VALUES IN (1,8,9) ENGINE = TokuDB,
- PARTITION partB VALUES IN (2,10,11) ENGINE = TokuDB,
- PARTITION Partc VALUES IN (3,4,7) ENGINE = TokuDB,
- PARTITION PartD VALUES IN (5,6,12) ENGINE = TokuDB,
- PARTITION partE VALUES IN (16) ENGINE = TokuDB,
- PARTITION Partf VALUES IN (19) ENGINE = TokuDB,
- PARTITION PartG VALUES IN (22) ENGINE = TokuDB) */
+ PARTITION BY LIST (`a`)
+(PARTITION `parta` VALUES IN (1,8,9) ENGINE = TokuDB,
+ PARTITION `partB` VALUES IN (2,10,11) ENGINE = TokuDB,
+ PARTITION `Partc` VALUES IN (3,4,7) ENGINE = TokuDB,
+ PARTITION `PartD` VALUES IN (5,6,12) ENGINE = TokuDB,
+ PARTITION `partE` VALUES IN (16) ENGINE = TokuDB,
+ PARTITION `Partf` VALUES IN (19) ENGINE = TokuDB,
+ PARTITION `PartG` VALUES IN (22) ENGINE = TokuDB)
ALTER TABLE TableA DROP PARTITION partE, PartG;
ALTER TABLE TableA DROP PARTITION Partf;
ALTER TABLE TableA ADD PARTITION
@@ -780,12 +780,12 @@ Table Create Table
TableA CREATE TABLE `TableA` (
`a` int(11) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (a)
-(PARTITION parta VALUES IN (1,8,9) ENGINE = TokuDB,
- PARTITION partB VALUES IN (2,10,11) ENGINE = TokuDB,
- PARTITION Partc VALUES IN (3,4,7) ENGINE = TokuDB,
- PARTITION PartD VALUES IN (5,6,12) ENGINE = TokuDB,
- PARTITION PartE VALUES IN (13) ENGINE = TokuDB) */
+ PARTITION BY LIST (`a`)
+(PARTITION `parta` VALUES IN (1,8,9) ENGINE = TokuDB,
+ PARTITION `partB` VALUES IN (2,10,11) ENGINE = TokuDB,
+ PARTITION `Partc` VALUES IN (3,4,7) ENGINE = TokuDB,
+ PARTITION `PartD` VALUES IN (5,6,12) ENGINE = TokuDB,
+ PARTITION `PartE` VALUES IN (13) ENGINE = TokuDB)
# Test of REORGANIZE PARTITIONS
ALTER TABLE TableA REORGANIZE PARTITION parta,Partc INTO
(PARTITION Partc VALUES IN (1,7)
@@ -818,12 +818,12 @@ Table Create Table
TableA CREATE TABLE `TableA` (
`a` int(11) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (a)
-(PARTITION Partc VALUES IN (1,7) COMMENT = 'Mix 1 of old parta and Partc' ENGINE = TokuDB,
- PARTITION parta VALUES IN (3,9) COMMENT = 'Mix 2 of old parta and Partc' ENGINE = TokuDB,
- PARTITION partB VALUES IN (4,8) COMMENT = 'Mix 3 of old parta and Partc' ENGINE = TokuDB,
- PARTITION PartD VALUES IN (5,6,12) ENGINE = TokuDB,
- PARTITION PartE VALUES IN (13) ENGINE = TokuDB) */
+ PARTITION BY LIST (`a`)
+(PARTITION `Partc` VALUES IN (1,7) COMMENT = 'Mix 1 of old parta and Partc' ENGINE = TokuDB,
+ PARTITION `parta` VALUES IN (3,9) COMMENT = 'Mix 2 of old parta and Partc' ENGINE = TokuDB,
+ PARTITION `partB` VALUES IN (4,8) COMMENT = 'Mix 3 of old parta and Partc' ENGINE = TokuDB,
+ PARTITION `PartD` VALUES IN (5,6,12) ENGINE = TokuDB,
+ PARTITION `PartE` VALUES IN (13) ENGINE = TokuDB)
# Test of RENAME TABLE
RENAME TABLE TableA to TableB;
SELECT * FROM TableB;
@@ -890,11 +890,11 @@ Table Create Table
tablea CREATE TABLE `tablea` (
`a` int(11) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (a)
-(PARTITION parta VALUES IN (1,8,9) ENGINE = TokuDB,
- PARTITION partB VALUES IN (2,10,11) ENGINE = TokuDB,
- PARTITION Partc VALUES IN (3,4,7) ENGINE = TokuDB,
- PARTITION PartD VALUES IN (5,6,12) ENGINE = TokuDB) */
+ PARTITION BY LIST (`a`)
+(PARTITION `parta` VALUES IN (1,8,9) ENGINE = TokuDB,
+ PARTITION `partB` VALUES IN (2,10,11) ENGINE = TokuDB,
+ PARTITION `Partc` VALUES IN (3,4,7) ENGINE = TokuDB,
+ PARTITION `PartD` VALUES IN (5,6,12) ENGINE = TokuDB)
DROP TABLE tablea;
# Test of REMOVE PARTITIONING
ALTER TABLE TableA REMOVE PARTITIONING;
@@ -933,10 +933,10 @@ t1 CREATE TABLE `t1` (
`b` varchar(255) DEFAULT NULL,
PRIMARY KEY (`a`)
) ENGINE=TokuDB AUTO_INCREMENT=2002 DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (a)
-(PARTITION LT1000 VALUES LESS THAN (1000) ENGINE = TokuDB,
- PARTITION LT2000 VALUES LESS THAN (2000) ENGINE = TokuDB,
- PARTITION MAX VALUES LESS THAN MAXVALUE ENGINE = TokuDB) */
+ PARTITION BY RANGE (`a`)
+(PARTITION `LT1000` VALUES LESS THAN (1000) ENGINE = TokuDB,
+ PARTITION `LT2000` VALUES LESS THAN (2000) ENGINE = TokuDB,
+ PARTITION `MAX` VALUES LESS THAN MAXVALUE ENGINE = TokuDB)
SELECT * FROM t1 ORDER BY a;
a b
1 First
diff --git a/storage/tokudb/mysql-test/tokudb_parts/r/partition_mgm_lc1_tokudb.result b/storage/tokudb/mysql-test/tokudb_parts/r/partition_mgm_lc1_tokudb.result
index 0a882a532e5..fce0d496032 100644
--- a/storage/tokudb/mysql-test/tokudb_parts/r/partition_mgm_lc1_tokudb.result
+++ b/storage/tokudb/mysql-test/tokudb_parts/r/partition_mgm_lc1_tokudb.result
@@ -56,14 +56,14 @@ Table Create Table
TableA CREATE TABLE `tablea` (
`a` int(11) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY KEY (a)
-(PARTITION parta ENGINE = TokuDB,
- PARTITION partB ENGINE = TokuDB,
- PARTITION Partc ENGINE = TokuDB,
- PARTITION PartD ENGINE = TokuDB,
- PARTITION partE ENGINE = TokuDB,
- PARTITION Partf ENGINE = TokuDB,
- PARTITION PartG ENGINE = TokuDB) */
+ PARTITION BY KEY (`a`)
+(PARTITION `parta` ENGINE = TokuDB,
+ PARTITION `partB` ENGINE = TokuDB,
+ PARTITION `Partc` ENGINE = TokuDB,
+ PARTITION `PartD` ENGINE = TokuDB,
+ PARTITION `partE` ENGINE = TokuDB,
+ PARTITION `Partf` ENGINE = TokuDB,
+ PARTITION `PartG` ENGINE = TokuDB)
ALTER TABLE TableA COALESCE PARTITION 4;
SELECT * FROM TableA;
a
@@ -84,10 +84,10 @@ Table Create Table
TableA CREATE TABLE `tablea` (
`a` int(11) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY KEY (a)
-(PARTITION parta ENGINE = TokuDB,
- PARTITION partB ENGINE = TokuDB,
- PARTITION Partc ENGINE = TokuDB) */
+ PARTITION BY KEY (`a`)
+(PARTITION `parta` ENGINE = TokuDB,
+ PARTITION `partB` ENGINE = TokuDB,
+ PARTITION `Partc` ENGINE = TokuDB)
# Test of EXCHANGE PARTITION WITH TABLE
SELECT PARTITION_NAME, TABLE_ROWS FROM INFORMATION_SCHEMA.PARTITIONS WHERE TABLE_SCHEMA ='MySQL_Test_DB' AND TABLE_NAME = 'TableA';
PARTITION_NAME TABLE_ROWS
@@ -112,10 +112,10 @@ Table Create Table
TableA CREATE TABLE `tablea` (
`a` int(11) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY KEY (a)
-(PARTITION parta ENGINE = TokuDB,
- PARTITION partB ENGINE = TokuDB,
- PARTITION Partc ENGINE = TokuDB) */
+ PARTITION BY KEY (`a`)
+(PARTITION `parta` ENGINE = TokuDB,
+ PARTITION `partB` ENGINE = TokuDB,
+ PARTITION `Partc` ENGINE = TokuDB)
SELECT * FROM TableB;
a
10
@@ -156,10 +156,10 @@ Table Create Table
TableA CREATE TABLE `tablea` (
`a` int(11) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY KEY (a)
-(PARTITION parta ENGINE = TokuDB,
- PARTITION partB ENGINE = TokuDB,
- PARTITION Partc ENGINE = TokuDB) */
+ PARTITION BY KEY (`a`)
+(PARTITION `parta` ENGINE = TokuDB,
+ PARTITION `partB` ENGINE = TokuDB,
+ PARTITION `Partc` ENGINE = TokuDB)
# Test of REORGANIZE PARTITIONS
# Should not work on HASH/KEY
ALTER TABLE TableA REORGANIZE PARTITION parta,partB,Partc INTO
@@ -192,10 +192,10 @@ Table Create Table
TableA CREATE TABLE `tablea` (
`a` int(11) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY KEY (a)
-(PARTITION partB COMMENT = 'Previusly named parta' ENGINE = TokuDB,
- PARTITION parta COMMENT = 'Previusly named partB' ENGINE = TokuDB,
- PARTITION Partc ENGINE = TokuDB) */
+ PARTITION BY KEY (`a`)
+(PARTITION `partB` COMMENT = 'Previusly named parta' ENGINE = TokuDB,
+ PARTITION `parta` COMMENT = 'Previusly named partB' ENGINE = TokuDB,
+ PARTITION `Partc` ENGINE = TokuDB)
# Test of RENAME TABLE
RENAME TABLE TableA to TableB;
SELECT * FROM TableB;
@@ -264,10 +264,10 @@ Table Create Table
tablea CREATE TABLE `tablea` (
`a` int(11) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY KEY (a)
-(PARTITION partB COMMENT = 'Previusly named parta' ENGINE = TokuDB,
- PARTITION parta COMMENT = 'Previusly named partB' ENGINE = TokuDB,
- PARTITION Partc ENGINE = TokuDB) */
+ PARTITION BY KEY (`a`)
+(PARTITION `partB` COMMENT = 'Previusly named parta' ENGINE = TokuDB,
+ PARTITION `parta` COMMENT = 'Previusly named partB' ENGINE = TokuDB,
+ PARTITION `Partc` ENGINE = TokuDB)
# Test of REMOVE PARTITIONING
ALTER TABLE TableA REMOVE PARTITIONING;
SELECT * FROM TableA;
@@ -355,14 +355,14 @@ Table Create Table
TableA CREATE TABLE `tablea` (
`a` int(11) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY HASH (a)
-(PARTITION parta ENGINE = TokuDB,
- PARTITION partB ENGINE = TokuDB,
- PARTITION Partc ENGINE = TokuDB,
- PARTITION PartD ENGINE = TokuDB,
- PARTITION partE ENGINE = TokuDB,
- PARTITION Partf ENGINE = TokuDB,
- PARTITION PartG ENGINE = TokuDB) */
+ PARTITION BY HASH (`a`)
+(PARTITION `parta` ENGINE = TokuDB,
+ PARTITION `partB` ENGINE = TokuDB,
+ PARTITION `Partc` ENGINE = TokuDB,
+ PARTITION `PartD` ENGINE = TokuDB,
+ PARTITION `partE` ENGINE = TokuDB,
+ PARTITION `Partf` ENGINE = TokuDB,
+ PARTITION `PartG` ENGINE = TokuDB)
ALTER TABLE TableA COALESCE PARTITION 4;
SELECT * FROM TableA;
a
@@ -383,10 +383,10 @@ Table Create Table
TableA CREATE TABLE `tablea` (
`a` int(11) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY HASH (a)
-(PARTITION parta ENGINE = TokuDB,
- PARTITION partB ENGINE = TokuDB,
- PARTITION Partc ENGINE = TokuDB) */
+ PARTITION BY HASH (`a`)
+(PARTITION `parta` ENGINE = TokuDB,
+ PARTITION `partB` ENGINE = TokuDB,
+ PARTITION `Partc` ENGINE = TokuDB)
# Test of REORGANIZE PARTITIONS
# Should not work on HASH/KEY
ALTER TABLE TableA REORGANIZE PARTITION parta,partB,Partc INTO
@@ -419,10 +419,10 @@ Table Create Table
TableA CREATE TABLE `tablea` (
`a` int(11) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY HASH (a)
-(PARTITION partB COMMENT = 'Previusly named parta' ENGINE = TokuDB,
- PARTITION parta COMMENT = 'Previusly named partB' ENGINE = TokuDB,
- PARTITION Partc ENGINE = TokuDB) */
+ PARTITION BY HASH (`a`)
+(PARTITION `partB` COMMENT = 'Previusly named parta' ENGINE = TokuDB,
+ PARTITION `parta` COMMENT = 'Previusly named partB' ENGINE = TokuDB,
+ PARTITION `Partc` ENGINE = TokuDB)
# Test of RENAME TABLE
RENAME TABLE TableA to TableB;
SELECT * FROM TableB;
@@ -491,10 +491,10 @@ Table Create Table
tablea CREATE TABLE `tablea` (
`a` int(11) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY HASH (a)
-(PARTITION partB COMMENT = 'Previusly named parta' ENGINE = TokuDB,
- PARTITION parta COMMENT = 'Previusly named partB' ENGINE = TokuDB,
- PARTITION Partc ENGINE = TokuDB) */
+ PARTITION BY HASH (`a`)
+(PARTITION `partB` COMMENT = 'Previusly named parta' ENGINE = TokuDB,
+ PARTITION `parta` COMMENT = 'Previusly named partB' ENGINE = TokuDB,
+ PARTITION `Partc` ENGINE = TokuDB)
# Test of REMOVE PARTITIONING
ALTER TABLE TableA REMOVE PARTITIONING;
SELECT * FROM TableA;
@@ -571,14 +571,14 @@ Table Create Table
TableA CREATE TABLE `tablea` (
`a` int(11) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (a)
-(PARTITION parta VALUES LESS THAN (4) ENGINE = TokuDB,
- PARTITION partB VALUES LESS THAN (7) ENGINE = TokuDB,
- PARTITION Partc VALUES LESS THAN (10) ENGINE = TokuDB,
- PARTITION PartD VALUES LESS THAN (13) ENGINE = TokuDB,
- PARTITION partE VALUES LESS THAN (16) ENGINE = TokuDB,
- PARTITION Partf VALUES LESS THAN (19) ENGINE = TokuDB,
- PARTITION PartG VALUES LESS THAN (22) ENGINE = TokuDB) */
+ PARTITION BY RANGE (`a`)
+(PARTITION `parta` VALUES LESS THAN (4) ENGINE = TokuDB,
+ PARTITION `partB` VALUES LESS THAN (7) ENGINE = TokuDB,
+ PARTITION `Partc` VALUES LESS THAN (10) ENGINE = TokuDB,
+ PARTITION `PartD` VALUES LESS THAN (13) ENGINE = TokuDB,
+ PARTITION `partE` VALUES LESS THAN (16) ENGINE = TokuDB,
+ PARTITION `Partf` VALUES LESS THAN (19) ENGINE = TokuDB,
+ PARTITION `PartG` VALUES LESS THAN (22) ENGINE = TokuDB)
ALTER TABLE TableA DROP PARTITION partE, PartG;
ALTER TABLE TableA DROP PARTITION Partf;
ALTER TABLE TableA ADD PARTITION
@@ -602,12 +602,12 @@ Table Create Table
TableA CREATE TABLE `tablea` (
`a` int(11) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (a)
-(PARTITION parta VALUES LESS THAN (4) ENGINE = TokuDB,
- PARTITION partB VALUES LESS THAN (7) ENGINE = TokuDB,
- PARTITION Partc VALUES LESS THAN (10) ENGINE = TokuDB,
- PARTITION PartD VALUES LESS THAN (13) ENGINE = TokuDB,
- PARTITION PartE VALUES LESS THAN MAXVALUE ENGINE = TokuDB) */
+ PARTITION BY RANGE (`a`)
+(PARTITION `parta` VALUES LESS THAN (4) ENGINE = TokuDB,
+ PARTITION `partB` VALUES LESS THAN (7) ENGINE = TokuDB,
+ PARTITION `Partc` VALUES LESS THAN (10) ENGINE = TokuDB,
+ PARTITION `PartD` VALUES LESS THAN (13) ENGINE = TokuDB,
+ PARTITION `PartE` VALUES LESS THAN MAXVALUE ENGINE = TokuDB)
# Test of REORGANIZE PARTITIONS
# Error since it must reorganize a consecutive range
ALTER TABLE TableA REORGANIZE PARTITION parta,Partc INTO
@@ -640,11 +640,11 @@ Table Create Table
TableA CREATE TABLE `tablea` (
`a` int(11) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (a)
-(PARTITION parta VALUES LESS THAN (4) ENGINE = TokuDB,
- PARTITION partD VALUES LESS THAN (8) COMMENT = 'Previously partB and partly Partc' ENGINE = TokuDB,
- PARTITION partB VALUES LESS THAN (11) COMMENT = 'Previously partly Partc and partly PartD' ENGINE = TokuDB,
- PARTITION partC VALUES LESS THAN MAXVALUE COMMENT = 'Previously partly PartD' ENGINE = TokuDB) */
+ PARTITION BY RANGE (`a`)
+(PARTITION `parta` VALUES LESS THAN (4) ENGINE = TokuDB,
+ PARTITION `partD` VALUES LESS THAN (8) COMMENT = 'Previously partB and partly Partc' ENGINE = TokuDB,
+ PARTITION `partB` VALUES LESS THAN (11) COMMENT = 'Previously partly Partc and partly PartD' ENGINE = TokuDB,
+ PARTITION `partC` VALUES LESS THAN MAXVALUE COMMENT = 'Previously partly PartD' ENGINE = TokuDB)
# Test of RENAME TABLE
RENAME TABLE TableA to TableB;
SELECT * FROM TableB;
@@ -713,11 +713,11 @@ Table Create Table
tablea CREATE TABLE `tablea` (
`a` int(11) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (a)
-(PARTITION parta VALUES LESS THAN (4) ENGINE = TokuDB,
- PARTITION partD VALUES LESS THAN (8) COMMENT = 'Previously partB and partly Partc' ENGINE = TokuDB,
- PARTITION partB VALUES LESS THAN (11) COMMENT = 'Previously partly Partc and partly PartD' ENGINE = TokuDB,
- PARTITION partC VALUES LESS THAN MAXVALUE COMMENT = 'Previously partly PartD' ENGINE = TokuDB) */
+ PARTITION BY RANGE (`a`)
+(PARTITION `parta` VALUES LESS THAN (4) ENGINE = TokuDB,
+ PARTITION `partD` VALUES LESS THAN (8) COMMENT = 'Previously partB and partly Partc' ENGINE = TokuDB,
+ PARTITION `partB` VALUES LESS THAN (11) COMMENT = 'Previously partly Partc and partly PartD' ENGINE = TokuDB,
+ PARTITION `partC` VALUES LESS THAN MAXVALUE COMMENT = 'Previously partly PartD' ENGINE = TokuDB)
# Test of REMOVE PARTITIONING
ALTER TABLE TableA REMOVE PARTITIONING;
SELECT * FROM TableA;
@@ -794,14 +794,14 @@ Table Create Table
TableA CREATE TABLE `tablea` (
`a` int(11) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (a)
-(PARTITION parta VALUES IN (1,8,9) ENGINE = TokuDB,
- PARTITION partB VALUES IN (2,10,11) ENGINE = TokuDB,
- PARTITION Partc VALUES IN (3,4,7) ENGINE = TokuDB,
- PARTITION PartD VALUES IN (5,6,12) ENGINE = TokuDB,
- PARTITION partE VALUES IN (16) ENGINE = TokuDB,
- PARTITION Partf VALUES IN (19) ENGINE = TokuDB,
- PARTITION PartG VALUES IN (22) ENGINE = TokuDB) */
+ PARTITION BY LIST (`a`)
+(PARTITION `parta` VALUES IN (1,8,9) ENGINE = TokuDB,
+ PARTITION `partB` VALUES IN (2,10,11) ENGINE = TokuDB,
+ PARTITION `Partc` VALUES IN (3,4,7) ENGINE = TokuDB,
+ PARTITION `PartD` VALUES IN (5,6,12) ENGINE = TokuDB,
+ PARTITION `partE` VALUES IN (16) ENGINE = TokuDB,
+ PARTITION `Partf` VALUES IN (19) ENGINE = TokuDB,
+ PARTITION `PartG` VALUES IN (22) ENGINE = TokuDB)
ALTER TABLE TableA DROP PARTITION partE, PartG;
ALTER TABLE TableA DROP PARTITION Partf;
ALTER TABLE TableA ADD PARTITION
@@ -825,12 +825,12 @@ Table Create Table
TableA CREATE TABLE `tablea` (
`a` int(11) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (a)
-(PARTITION parta VALUES IN (1,8,9) ENGINE = TokuDB,
- PARTITION partB VALUES IN (2,10,11) ENGINE = TokuDB,
- PARTITION Partc VALUES IN (3,4,7) ENGINE = TokuDB,
- PARTITION PartD VALUES IN (5,6,12) ENGINE = TokuDB,
- PARTITION PartE VALUES IN (13) ENGINE = TokuDB) */
+ PARTITION BY LIST (`a`)
+(PARTITION `parta` VALUES IN (1,8,9) ENGINE = TokuDB,
+ PARTITION `partB` VALUES IN (2,10,11) ENGINE = TokuDB,
+ PARTITION `Partc` VALUES IN (3,4,7) ENGINE = TokuDB,
+ PARTITION `PartD` VALUES IN (5,6,12) ENGINE = TokuDB,
+ PARTITION `PartE` VALUES IN (13) ENGINE = TokuDB)
# Test of REORGANIZE PARTITIONS
ALTER TABLE TableA REORGANIZE PARTITION parta,Partc INTO
(PARTITION Partc VALUES IN (1,7)
@@ -863,12 +863,12 @@ Table Create Table
TableA CREATE TABLE `tablea` (
`a` int(11) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (a)
-(PARTITION Partc VALUES IN (1,7) COMMENT = 'Mix 1 of old parta and Partc' ENGINE = TokuDB,
- PARTITION parta VALUES IN (3,9) COMMENT = 'Mix 2 of old parta and Partc' ENGINE = TokuDB,
- PARTITION partB VALUES IN (4,8) COMMENT = 'Mix 3 of old parta and Partc' ENGINE = TokuDB,
- PARTITION PartD VALUES IN (5,6,12) ENGINE = TokuDB,
- PARTITION PartE VALUES IN (13) ENGINE = TokuDB) */
+ PARTITION BY LIST (`a`)
+(PARTITION `Partc` VALUES IN (1,7) COMMENT = 'Mix 1 of old parta and Partc' ENGINE = TokuDB,
+ PARTITION `parta` VALUES IN (3,9) COMMENT = 'Mix 2 of old parta and Partc' ENGINE = TokuDB,
+ PARTITION `partB` VALUES IN (4,8) COMMENT = 'Mix 3 of old parta and Partc' ENGINE = TokuDB,
+ PARTITION `PartD` VALUES IN (5,6,12) ENGINE = TokuDB,
+ PARTITION `PartE` VALUES IN (13) ENGINE = TokuDB)
# Test of RENAME TABLE
RENAME TABLE TableA to TableB;
SELECT * FROM TableB;
@@ -928,12 +928,12 @@ Table Create Table
tablea CREATE TABLE `tablea` (
`a` int(11) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (a)
-(PARTITION Partc VALUES IN (1,7) COMMENT = 'Mix 1 of old parta and Partc' ENGINE = TokuDB,
- PARTITION parta VALUES IN (3,9) COMMENT = 'Mix 2 of old parta and Partc' ENGINE = TokuDB,
- PARTITION partB VALUES IN (4,8) COMMENT = 'Mix 3 of old parta and Partc' ENGINE = TokuDB,
- PARTITION PartD VALUES IN (5,6,12) ENGINE = TokuDB,
- PARTITION PartE VALUES IN (13) ENGINE = TokuDB) */
+ PARTITION BY LIST (`a`)
+(PARTITION `Partc` VALUES IN (1,7) COMMENT = 'Mix 1 of old parta and Partc' ENGINE = TokuDB,
+ PARTITION `parta` VALUES IN (3,9) COMMENT = 'Mix 2 of old parta and Partc' ENGINE = TokuDB,
+ PARTITION `partB` VALUES IN (4,8) COMMENT = 'Mix 3 of old parta and Partc' ENGINE = TokuDB,
+ PARTITION `PartD` VALUES IN (5,6,12) ENGINE = TokuDB,
+ PARTITION `PartE` VALUES IN (13) ENGINE = TokuDB)
# Test of REMOVE PARTITIONING
ALTER TABLE TableA REMOVE PARTITIONING;
SELECT * FROM TableA;
@@ -971,10 +971,10 @@ t1 CREATE TABLE `t1` (
`b` varchar(255) DEFAULT NULL,
PRIMARY KEY (`a`)
) ENGINE=TokuDB AUTO_INCREMENT=2002 DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (a)
-(PARTITION LT1000 VALUES LESS THAN (1000) ENGINE = TokuDB,
- PARTITION LT2000 VALUES LESS THAN (2000) ENGINE = TokuDB,
- PARTITION MAX VALUES LESS THAN MAXVALUE ENGINE = TokuDB) */
+ PARTITION BY RANGE (`a`)
+(PARTITION `LT1000` VALUES LESS THAN (1000) ENGINE = TokuDB,
+ PARTITION `LT2000` VALUES LESS THAN (2000) ENGINE = TokuDB,
+ PARTITION `MAX` VALUES LESS THAN MAXVALUE ENGINE = TokuDB)
SELECT * FROM t1 ORDER BY a;
a b
1 First
diff --git a/storage/tokudb/mysql-test/tokudb_parts/r/partition_special_tokudb.result b/storage/tokudb/mysql-test/tokudb_parts/r/partition_special_tokudb.result
index 349d96a9dd7..a2df5bc5831 100644
--- a/storage/tokudb/mysql-test/tokudb_parts/r/partition_special_tokudb.result
+++ b/storage/tokudb/mysql-test/tokudb_parts/r/partition_special_tokudb.result
@@ -13,11 +13,11 @@ t1 CREATE TABLE `t1` (
`d` enum('m','w') NOT NULL,
PRIMARY KEY (`a`,`b`,`c`,`d`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY KEY (a,b,c,d)
-(PARTITION pa1 MAX_ROWS = 20 MIN_ROWS = 2 ENGINE = TokuDB,
- PARTITION pa2 MAX_ROWS = 30 MIN_ROWS = 3 ENGINE = TokuDB,
- PARTITION pa3 MAX_ROWS = 30 MIN_ROWS = 4 ENGINE = TokuDB,
- PARTITION pa4 MAX_ROWS = 40 MIN_ROWS = 2 ENGINE = TokuDB) */
+ PARTITION BY KEY (`a`,`b`,`c`,`d`)
+(PARTITION `pa1` MAX_ROWS = 20 MIN_ROWS = 2 ENGINE = TokuDB,
+ PARTITION `pa2` MAX_ROWS = 30 MIN_ROWS = 3 ENGINE = TokuDB,
+ PARTITION `pa3` MAX_ROWS = 30 MIN_ROWS = 4 ENGINE = TokuDB,
+ PARTITION `pa4` MAX_ROWS = 40 MIN_ROWS = 2 ENGINE = TokuDB)
insert into t1 values
('1975-01-01', 'abcde', 'abcde','m'),
('1983-12-31', 'cdef', 'srtbvsr', 'w'),
@@ -55,11 +55,11 @@ t1 CREATE TABLE `t1` (
`i` char(255) DEFAULT NULL,
PRIMARY KEY (`a`,`b`,`c`,`d`,`e`,`f`,`g`,`h`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY KEY (a,b,c,d,e,f,g,h)
-(PARTITION pa1 MAX_ROWS = 20 MIN_ROWS = 2 ENGINE = TokuDB,
- PARTITION pa2 MAX_ROWS = 30 MIN_ROWS = 3 ENGINE = TokuDB,
- PARTITION pa3 MAX_ROWS = 30 MIN_ROWS = 4 ENGINE = TokuDB,
- PARTITION pa4 MAX_ROWS = 40 MIN_ROWS = 2 ENGINE = TokuDB) */
+ PARTITION BY KEY (`a`,`b`,`c`,`d`,`e`,`f`,`g`,`h`)
+(PARTITION `pa1` MAX_ROWS = 20 MIN_ROWS = 2 ENGINE = TokuDB,
+ PARTITION `pa2` MAX_ROWS = 30 MIN_ROWS = 3 ENGINE = TokuDB,
+ PARTITION `pa3` MAX_ROWS = 30 MIN_ROWS = 4 ENGINE = TokuDB,
+ PARTITION `pa4` MAX_ROWS = 40 MIN_ROWS = 2 ENGINE = TokuDB)
insert into t1 values
('1975-01-01', 'abcde', 'abcde','m', 1234, 123.45, 32412341234, 113, 'tbhth nrzh ztfghgfh fzh ftzhj fztjh'),
('1983-12-31', 'cdef', 'srtbvsr', 'w', 45634, 13452.56, 3452346456, 127, 'liuugbzvdmrlti b itiortudirtfgtibm dfi'),
@@ -105,11 +105,11 @@ t1 CREATE TABLE `t1` (
`i` char(255) DEFAULT NULL,
PRIMARY KEY (`a`,`b`,`c`,`d`,`e`,`f`,`g`,`h`,`a1`,`b1`,`c1`,`d1`,`e1`,`f1`,`g1`,`h1`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY KEY (a,b,c,d,e,f,g,h,a1,b1,c1,d1,e1,f1,g1,h1)
-(PARTITION pa1 MAX_ROWS = 20 MIN_ROWS = 2 ENGINE = TokuDB,
- PARTITION pa2 MAX_ROWS = 30 MIN_ROWS = 3 ENGINE = TokuDB,
- PARTITION pa3 MAX_ROWS = 30 MIN_ROWS = 4 ENGINE = TokuDB,
- PARTITION pa4 MAX_ROWS = 40 MIN_ROWS = 2 ENGINE = TokuDB) */
+ PARTITION BY KEY (`a`,`b`,`c`,`d`,`e`,`f`,`g`,`h`,`a1`,`b1`,`c1`,`d1`,`e1`,`f1`,`g1`,`h1`)
+(PARTITION `pa1` MAX_ROWS = 20 MIN_ROWS = 2 ENGINE = TokuDB,
+ PARTITION `pa2` MAX_ROWS = 30 MIN_ROWS = 3 ENGINE = TokuDB,
+ PARTITION `pa3` MAX_ROWS = 30 MIN_ROWS = 4 ENGINE = TokuDB,
+ PARTITION `pa4` MAX_ROWS = 40 MIN_ROWS = 2 ENGINE = TokuDB)
insert into t1 values
('1975-01-01', 'abcde', 'abcde','m', 1234, 123.45, 32412341234, 113,'1975-01-01', 'abcde', 'abcde','m', 1234, 123.45, 32412341234, 113, 'tbhth nrzh ztfghgfh fzh ftzhj fztjh'),
('1983-12-31', 'cdef', 'srtbvsr', 'w', 45634, 13452.56, 3452346456, 127,'1983-12-31', 'cdef', 'srtbvsr', 'w', 45634, 13452.56, 3452346456, 127, 'liuugbzvdmrlti b itiortudirtfgtibm dfi'),
@@ -185,11 +185,11 @@ t1 CREATE TABLE `t1` (
`i` char(255) DEFAULT NULL,
PRIMARY KEY (`a`,`b`,`c`,`d`,`e`,`f`,`g`,`h`,`a1`,`b1`,`c1`,`d1`,`e1`,`f1`,`g1`,`h1`)
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY KEY (a,b,c,d,e,f,g,h)
-(PARTITION pa1 MAX_ROWS = 20 MIN_ROWS = 2 ENGINE = TokuDB,
- PARTITION pa2 MAX_ROWS = 30 MIN_ROWS = 3 ENGINE = TokuDB,
- PARTITION pa3 MAX_ROWS = 30 MIN_ROWS = 4 ENGINE = TokuDB,
- PARTITION pa4 MAX_ROWS = 40 MIN_ROWS = 2 ENGINE = TokuDB) */
+ PARTITION BY KEY (`a`,`b`,`c`,`d`,`e`,`f`,`g`,`h`)
+(PARTITION `pa1` MAX_ROWS = 20 MIN_ROWS = 2 ENGINE = TokuDB,
+ PARTITION `pa2` MAX_ROWS = 30 MIN_ROWS = 3 ENGINE = TokuDB,
+ PARTITION `pa3` MAX_ROWS = 30 MIN_ROWS = 4 ENGINE = TokuDB,
+ PARTITION `pa4` MAX_ROWS = 40 MIN_ROWS = 2 ENGINE = TokuDB)
insert into t1 values
('1975-01-01', 'abcde', 'abcde','m', 1234, 123.45, 32412341234, 113,'1975-01-01', 'abcde', 'abcde','m', 1234, 123.45, 32412341234, 113,'1975-01-01', 'abcde', 'abcde','m', 1234, 123.45, 32412341234, 113, '1975-01-01', 'abcde', 'abcde','m', 1234, 123.45, 32412341234, 113, 'tbhth nrzh ztfghgfh fzh ftzhj fztjh'),
('1983-12-31', 'cdef', 'srtbvsr', 'w', 45634, 13452.56, 3452346456, 127,'1983-12-31', 'cdef', 'srtbvsr', 'w', 45634, 13452.56, 3452346456, 127, '1983-12-31', 'cdef', 'srtbvsr', 'w', 45634, 13452.56, 3452346456, 127, '1983-12-31', 'cdef', 'srtbvsr', 'w', 45634, 13452.56, 3452346456, 127, 'liuugbzvdmrlti b itiortudirtfgtibm dfi'),
@@ -215,22 +215,33 @@ PRIMARY KEY (a))
ENGINE = TokuDB
PARTITION BY HASH (a)
PARTITIONS 2;
+connect con1, localhost, root,,;
+connect con2, localhost, root,,;
+connection con1;
SET autocommit=OFF;
START TRANSACTION;
INSERT INTO t1 VALUES (NULL, 'first row t2');
+connection con2;
SET autocommit=OFF;
SET SESSION lock_wait_timeout= 1;
ALTER TABLE t1 AUTO_INCREMENT = 10;
ERROR HY000: Lock wait timeout exceeded; try restarting transaction
+disconnect con2;
+disconnect con1;
+connection default;
DROP TABLE t1;
#
# Bug#53676: Unexpected errors and possible table corruption on
# ADD PARTITION and LOCK TABLE
+connect con1,localhost,root,,;
CREATE TABLE t1 ( i INT NOT NULL AUTO_INCREMENT PRIMARY KEY, f INT )
ENGINE = TokuDB PARTITION BY HASH(i) PARTITIONS 2;
+connect con2,localhost,root,,;
SET lock_wait_timeout = 2;
+connection con1;
#Connection 1 locks the table
LOCK TABLE t1 READ;
+connection con2;
# Connection 2 tries to add partitions:
# First attempt: lock wait timeout (as expected)
ALTER TABLE t1 ADD PARTITION PARTITIONS 2;
@@ -238,9 +249,11 @@ ERROR HY000: Lock wait timeout exceeded; try restarting transaction
# Second attempt: says that partition already exists
ALTER TABLE t1 ADD PARTITION PARTITIONS 2;
ERROR HY000: Lock wait timeout exceeded; try restarting transaction
+connection con1;
# Connection 1 unlocks the table and locks it again:
UNLOCK TABLES;
LOCK TABLE t1 READ;
+connection con2;
# Connection 2 tries again to add partitions:
# Third attempt: says that the table does not exist
ALTER TABLE t1 ADD PARTITION PARTITIONS 2;
@@ -249,39 +262,57 @@ ERROR HY000: Lock wait timeout exceeded; try restarting transaction
CHECK TABLE t1;
Table Op Msg_type Msg_text
test.t1 check status OK
+connection con1;
UNLOCK TABLES;
+connection con2;
DROP TABLE t1;
+connection con1;
CREATE TABLE t2 ( i INT NOT NULL AUTO_INCREMENT PRIMARY KEY, f INT )
ENGINE = TokuDB PARTITION BY HASH(i) PARTITIONS 2;
+connection con2;
SET lock_wait_timeout = 2;
+connection con1;
LOCK TABLE t2 READ;
+connection con2;
ALTER TABLE t2 ADD PARTITION PARTITIONS 2;
ERROR HY000: Lock wait timeout exceeded; try restarting transaction
ALTER TABLE t2 ADD PARTITION PARTITIONS 2;
+connection con1;
UNLOCK TABLES;
+connection con2;
+connect con3,localhost,root,,;
CHECK TABLE t2;
Table Op Msg_type Msg_text
test.t2 check status OK
SELECT * FROM t2;
i f
DROP TABLE t2;
+connection con1;
CREATE TABLE t3 ( i INT NOT NULL AUTO_INCREMENT PRIMARY KEY, f INT )
ENGINE = TokuDB PARTITION BY HASH(i) PARTITIONS 2;
+connection con2;
SET lock_wait_timeout = 2;
+connection con1;
# Connection 1 locks the table
LOCK TABLE t3 READ;
+connection con2;
# Connection 2 tries to add partitions (timeout):
ALTER TABLE t3 ADD PARTITION PARTITIONS 2;
ERROR HY000: Lock wait timeout exceeded; try restarting transaction
+connection con3;
SET lock_wait_timeout = 2;
# Connection 3 tries to add partitions (partition already exists):
ALTER TABLE t3 ADD PARTITION PARTITIONS 2;
ERROR HY000: Lock wait timeout exceeded; try restarting transaction
+connect con4,localhost,root,,;
# Connection 4 tries to rename the table:
RENAME TABLE t3 TO t4;
+connection con1;
# Connection 1 unlocks the table:
UNLOCK TABLES;
+connection con4;
# Connection 4 gets error on rename:
+connect con5,localhost,root,,;
# SHOW TABLES returns the table (not renamed):
SHOW TABLES;
Tables_in_test
@@ -290,3 +321,9 @@ t4
SELECT * FROM t3;
ERROR 42S02: Table 'test.t3' doesn't exist
DROP TABLE t4;
+disconnect con5;
+disconnect con4;
+disconnect con3;
+disconnect con2;
+disconnect con1;
+connection default;
diff --git a/storage/tokudb/mysql-test/tokudb_parts/r/partition_syntax_tokudb.result b/storage/tokudb/mysql-test/tokudb_parts/r/partition_syntax_tokudb.result
index 340bbf07f35..e7bb6c7d70f 100644
--- a/storage/tokudb/mysql-test/tokudb_parts/r/partition_syntax_tokudb.result
+++ b/storage/tokudb/mysql-test/tokudb_parts/r/partition_syntax_tokudb.result
@@ -652,15 +652,15 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (MOD(f_int1,2))
-(PARTITION part1 VALUES IN (NULL) ENGINE = TokuDB,
- PARTITION part3 VALUES IN (1) ENGINE = TokuDB) */
+ PARTITION BY LIST (`f_int1` % 2)
+(PARTITION `part1` VALUES IN (NULL) ENGINE = TokuDB,
+ PARTITION `part3` VALUES IN (1) ENGINE = TokuDB)
DROP TABLE t1;
# 3.5.3 Reveal that IN (...NULL) is not mapped to IN(0)
@@ -679,16 +679,16 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY LIST (MOD(f_int1,2))
-(PARTITION part1 VALUES IN (NULL) ENGINE = TokuDB,
- PARTITION part2 VALUES IN (0) ENGINE = TokuDB,
- PARTITION part3 VALUES IN (1) ENGINE = TokuDB) */
+ PARTITION BY LIST (`f_int1` % 2)
+(PARTITION `part1` VALUES IN (NULL) ENGINE = TokuDB,
+ PARTITION `part2` VALUES IN (0) ENGINE = TokuDB,
+ PARTITION `part3` VALUES IN (1) ENGINE = TokuDB)
DROP TABLE t1;
@@ -713,13 +713,13 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY HASH (f_int1) */
+ PARTITION BY HASH (`f_int1`)
DROP TABLE t1;
# 4.1.2 no partition number, named partitions
@@ -735,15 +735,15 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY HASH (f_int1)
-(PARTITION part1 ENGINE = TokuDB,
- PARTITION part2 ENGINE = TokuDB) */
+ PARTITION BY HASH (`f_int1`)
+(PARTITION `part1` ENGINE = TokuDB,
+ PARTITION `part2` ENGINE = TokuDB)
DROP TABLE t1;
# 4.1.3 variations on no partition/subpartition number, named partitions,
@@ -770,7 +770,7 @@ f_char2 CHAR(20),
f_charbig VARCHAR(1000) )
PARTITION BY RANGE(f_int1) SUBPARTITION BY HASH(f_int1) (PARTITION part1 VALUES LESS THAN (10), PARTITION part2 VALUES LESS THAN (20)
(SUBPARTITION subpart21 , SUBPARTITION subpart22 ), PARTITION part3 VALUES LESS THAN (2147483646)) ;
-ERROR 42000: Wrong number of subpartitions defined, mismatch with previous setting near 'SUBPARTITION subpart21 , SUBPARTITION subpart22 ), PARTITION part3 VALUES LESS T' at line 7
+ERROR 42000: Wrong number of subpartitions defined, mismatch with previous setting near 'SUBPARTITION subpart21 , SUBPARTITION subpart22 ), PARTITION part3 VALUES LES...' at line 7
CREATE TABLE t1 ( f_int1 INTEGER DEFAULT 0,
f_int2 INTEGER DEFAULT 0,
f_char1 CHAR(20),
@@ -779,7 +779,7 @@ f_charbig VARCHAR(1000) )
PARTITION BY RANGE(f_int1) SUBPARTITION BY HASH(f_int1) (PARTITION part1 VALUES LESS THAN (10), PARTITION part2 VALUES LESS THAN (20)
(SUBPARTITION subpart21 , SUBPARTITION subpart22 ), PARTITION part3 VALUES LESS THAN (2147483646)
(SUBPARTITION subpart31 , SUBPARTITION subpart32 )) ;
-ERROR 42000: Wrong number of subpartitions defined, mismatch with previous setting near 'SUBPARTITION subpart21 , SUBPARTITION subpart22 ), PARTITION part3 VALUES LESS T' at line 7
+ERROR 42000: Wrong number of subpartitions defined, mismatch with previous setting near 'SUBPARTITION subpart21 , SUBPARTITION subpart22 ), PARTITION part3 VALUES LES...' at line 7
CREATE TABLE t1 ( f_int1 INTEGER DEFAULT 0,
f_int2 INTEGER DEFAULT 0,
f_char1 CHAR(20),
@@ -797,7 +797,7 @@ PARTITION BY RANGE(f_int1) SUBPARTITION BY HASH(f_int1) (PARTITION part1 VALUES
(SUBPARTITION subpart11 , SUBPARTITION subpart12 ), PARTITION part2 VALUES LESS THAN (20), PARTITION part3 VALUES LESS THAN (2147483646)
(SUBPARTITION subpart31 , SUBPARTITION subpart32 )) ;
ERROR 42000: Wrong number of subpartitions defined, mismatch with previous setting near ' PARTITION part3 VALUES LESS THAN (2147483646)
-(SUBPARTITION subpart31 , SUBPART' at line 7
+(SUBPARTITION subpart31 , SUBP...' at line 7
CREATE TABLE t1 ( f_int1 INTEGER DEFAULT 0,
f_int2 INTEGER DEFAULT 0,
f_char1 CHAR(20),
@@ -820,23 +820,23 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (f_int1)
-SUBPARTITION BY HASH (f_int1)
-(PARTITION part1 VALUES LESS THAN (10)
- (SUBPARTITION subpart11 ENGINE = TokuDB,
- SUBPARTITION subpart12 ENGINE = TokuDB),
- PARTITION part2 VALUES LESS THAN (20)
- (SUBPARTITION subpart21 ENGINE = TokuDB,
- SUBPARTITION subpart22 ENGINE = TokuDB),
- PARTITION part3 VALUES LESS THAN (2147483646)
- (SUBPARTITION subpart31 ENGINE = TokuDB,
- SUBPARTITION subpart32 ENGINE = TokuDB)) */
+ PARTITION BY RANGE (`f_int1`)
+SUBPARTITION BY HASH (`f_int1`)
+(PARTITION `part1` VALUES LESS THAN (10)
+ (SUBPARTITION `subpart11` ENGINE = TokuDB,
+ SUBPARTITION `subpart12` ENGINE = TokuDB),
+ PARTITION `part2` VALUES LESS THAN (20)
+ (SUBPARTITION `subpart21` ENGINE = TokuDB,
+ SUBPARTITION `subpart22` ENGINE = TokuDB),
+ PARTITION `part3` VALUES LESS THAN (2147483646)
+ (SUBPARTITION `subpart31` ENGINE = TokuDB,
+ SUBPARTITION `subpart32` ENGINE = TokuDB))
DROP TABLE t1;
#------------------------------------------------------------------------
@@ -856,14 +856,14 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY HASH (f_int1)
-PARTITIONS 2 */
+ PARTITION BY HASH (`f_int1`)
+PARTITIONS 2
DROP TABLE t1;
CREATE TABLE t1 (
@@ -881,17 +881,17 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (f_int1)
-SUBPARTITION BY HASH (f_int1)
+ PARTITION BY RANGE (`f_int1`)
+SUBPARTITION BY HASH (`f_int1`)
SUBPARTITIONS 2
-(PARTITION part1 VALUES LESS THAN (10) ENGINE = TokuDB,
- PARTITION part2 VALUES LESS THAN (2147483646) ENGINE = TokuDB) */
+(PARTITION `part1` VALUES LESS THAN (10) ENGINE = TokuDB,
+ PARTITION `part2` VALUES LESS THAN (2147483646) ENGINE = TokuDB)
DROP TABLE t1;
CREATE TABLE t1 (
@@ -906,14 +906,14 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY HASH (f_int1)
-PARTITIONS 1 */
+ PARTITION BY HASH (`f_int1`)
+PARTITIONS 1
DROP TABLE t1;
CREATE TABLE t1 (
@@ -931,17 +931,17 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (f_int1)
-SUBPARTITION BY HASH (f_int1)
+ PARTITION BY RANGE (`f_int1`)
+SUBPARTITION BY HASH (`f_int1`)
SUBPARTITIONS 1
-(PARTITION part1 VALUES LESS THAN (10) ENGINE = TokuDB,
- PARTITION part2 VALUES LESS THAN (2147483646) ENGINE = TokuDB) */
+(PARTITION `part1` VALUES LESS THAN (10) ENGINE = TokuDB,
+ PARTITION `part2` VALUES LESS THAN (2147483646) ENGINE = TokuDB)
DROP TABLE t1;
CREATE TABLE t1 (
@@ -987,7 +987,7 @@ SUBPARTITIONS -1
PARTITION part2 VALUES LESS THAN (2147483646));
ERROR 42000: You have an error in your SQL syntax; check the manual that corresponds to your MariaDB server version for the right syntax to use near '-1
(PARTITION part1 VALUES LESS THAN (10),
-PARTITION part2 VALUES LESS THAN (214' at line 9
+PARTITION part2 VALUES LESS THAN (...' at line 9
CREATE TABLE t1 (
f_int1 INTEGER DEFAULT 0,
f_int2 INTEGER DEFAULT 0,
@@ -1032,7 +1032,7 @@ SUBPARTITIONS 2.0
PARTITION part2 VALUES LESS THAN (2147483646));
ERROR 42000: Only integers allowed as number here near '2.0
(PARTITION part1 VALUES LESS THAN (10),
-PARTITION part2 VALUES LESS THAN (21' at line 9
+PARTITION part2 VALUES LESS THAN ...' at line 9
CREATE TABLE t1 (
f_int1 INTEGER DEFAULT 0,
f_int2 INTEGER DEFAULT 0,
@@ -1055,7 +1055,7 @@ SUBPARTITIONS -2.0
PARTITION part2 VALUES LESS THAN (2147483646));
ERROR 42000: You have an error in your SQL syntax; check the manual that corresponds to your MariaDB server version for the right syntax to use near '-2.0
(PARTITION part1 VALUES LESS THAN (10),
-PARTITION part2 VALUES LESS THAN (2' at line 9
+PARTITION part2 VALUES LESS THAN...' at line 9
CREATE TABLE t1 (
f_int1 INTEGER DEFAULT 0,
f_int2 INTEGER DEFAULT 0,
@@ -1078,7 +1078,7 @@ SUBPARTITIONS 0.0
PARTITION part2 VALUES LESS THAN (2147483646));
ERROR 42000: Only integers allowed as number here near '0.0
(PARTITION part1 VALUES LESS THAN (10),
-PARTITION part2 VALUES LESS THAN (21' at line 9
+PARTITION part2 VALUES LESS THAN ...' at line 9
CREATE TABLE t1 (
f_int1 INTEGER DEFAULT 0,
f_int2 INTEGER DEFAULT 0,
@@ -1101,7 +1101,7 @@ SUBPARTITIONS 1.6
PARTITION part2 VALUES LESS THAN (2147483646));
ERROR 42000: Only integers allowed as number here near '1.6
(PARTITION part1 VALUES LESS THAN (10),
-PARTITION part2 VALUES LESS THAN (21' at line 9
+PARTITION part2 VALUES LESS THAN ...' at line 9
CREATE TABLE t1 (
f_int1 INTEGER DEFAULT 0,
f_int2 INTEGER DEFAULT 0,
@@ -1123,7 +1123,7 @@ SUBPARTITIONS 999999999999999999999999999999.999999999999999999999999999999
(PARTITION part1 VALUES LESS THAN (10),
PARTITION part2 VALUES LESS THAN (2147483646));
ERROR 42000: Only integers allowed as number here near '999999999999999999999999999999.999999999999999999999999999999
-(PARTITION part1 V' at line 9
+(PARTITION part...' at line 9
CREATE TABLE t1 (
f_int1 INTEGER DEFAULT 0,
f_int2 INTEGER DEFAULT 0,
@@ -1146,7 +1146,7 @@ SUBPARTITIONS 0.000000000000000000000000000001
PARTITION part2 VALUES LESS THAN (2147483646));
ERROR 42000: Only integers allowed as number here near '0.000000000000000000000000000001
(PARTITION part1 VALUES LESS THAN (10),
-PARTITI' at line 9
+PART...' at line 9
# 4.2.3 partition/subpartition numbers FLOAT notation
CREATE TABLE t1 (
f_int1 INTEGER DEFAULT 0,
@@ -1170,7 +1170,7 @@ SUBPARTITIONS 2.0E+0
PARTITION part2 VALUES LESS THAN (2147483646));
ERROR 42000: Only integers allowed as number here near '2.0E+0
(PARTITION part1 VALUES LESS THAN (10),
-PARTITION part2 VALUES LESS THAN ' at line 9
+PARTITION part2 VALUES LESS TH...' at line 9
CREATE TABLE t1 (
f_int1 INTEGER DEFAULT 0,
f_int2 INTEGER DEFAULT 0,
@@ -1193,7 +1193,7 @@ SUBPARTITIONS 0.2E+1
PARTITION part2 VALUES LESS THAN (2147483646));
ERROR 42000: Only integers allowed as number here near '0.2E+1
(PARTITION part1 VALUES LESS THAN (10),
-PARTITION part2 VALUES LESS THAN ' at line 9
+PARTITION part2 VALUES LESS TH...' at line 9
CREATE TABLE t1 (
f_int1 INTEGER DEFAULT 0,
f_int2 INTEGER DEFAULT 0,
@@ -1216,7 +1216,7 @@ SUBPARTITIONS -2.0E+0
PARTITION part2 VALUES LESS THAN (2147483646));
ERROR 42000: You have an error in your SQL syntax; check the manual that corresponds to your MariaDB server version for the right syntax to use near '-2.0E+0
(PARTITION part1 VALUES LESS THAN (10),
-PARTITION part2 VALUES LESS THAN' at line 9
+PARTITION part2 VALUES LESS T...' at line 9
CREATE TABLE t1 (
f_int1 INTEGER DEFAULT 0,
f_int2 INTEGER DEFAULT 0,
@@ -1239,7 +1239,7 @@ SUBPARTITIONS 0.16E+1
PARTITION part2 VALUES LESS THAN (2147483646));
ERROR 42000: Only integers allowed as number here near '0.16E+1
(PARTITION part1 VALUES LESS THAN (10),
-PARTITION part2 VALUES LESS THAN' at line 9
+PARTITION part2 VALUES LESS T...' at line 9
CREATE TABLE t1 (
f_int1 INTEGER DEFAULT 0,
f_int2 INTEGER DEFAULT 0,
@@ -1262,7 +1262,7 @@ SUBPARTITIONS 0.0E+300
PARTITION part2 VALUES LESS THAN (2147483646));
ERROR 42000: Only integers allowed as number here near '0.0E+300
(PARTITION part1 VALUES LESS THAN (10),
-PARTITION part2 VALUES LESS THA' at line 9
+PARTITION part2 VALUES LESS ...' at line 9
CREATE TABLE t1 (
f_int1 INTEGER DEFAULT 0,
f_int2 INTEGER DEFAULT 0,
@@ -1285,7 +1285,7 @@ SUBPARTITIONS 1E+300
PARTITION part2 VALUES LESS THAN (2147483646));
ERROR 42000: Only integers allowed as number here near '1E+300
(PARTITION part1 VALUES LESS THAN (10),
-PARTITION part2 VALUES LESS THAN ' at line 9
+PARTITION part2 VALUES LESS TH...' at line 9
CREATE TABLE t1 (
f_int1 INTEGER DEFAULT 0,
f_int2 INTEGER DEFAULT 0,
@@ -1308,7 +1308,7 @@ SUBPARTITIONS 1E-300
PARTITION part2 VALUES LESS THAN (2147483646));
ERROR 42000: Only integers allowed as number here near '1E-300
(PARTITION part1 VALUES LESS THAN (10),
-PARTITION part2 VALUES LESS THAN ' at line 9
+PARTITION part2 VALUES LESS TH...' at line 9
# 4.2.4 partition/subpartition numbers STRING notation
CREATE TABLE t1 (
f_int1 INTEGER DEFAULT 0,
@@ -1332,7 +1332,7 @@ SUBPARTITIONS '2'
PARTITION part2 VALUES LESS THAN (2147483646));
ERROR 42000: You have an error in your SQL syntax; check the manual that corresponds to your MariaDB server version for the right syntax to use near ''2'
(PARTITION part1 VALUES LESS THAN (10),
-PARTITION part2 VALUES LESS THAN (21' at line 9
+PARTITION part2 VALUES LESS THAN ...' at line 9
CREATE TABLE t1 (
f_int1 INTEGER DEFAULT 0,
f_int2 INTEGER DEFAULT 0,
@@ -1355,7 +1355,7 @@ SUBPARTITIONS '2.0'
PARTITION part2 VALUES LESS THAN (2147483646));
ERROR 42000: You have an error in your SQL syntax; check the manual that corresponds to your MariaDB server version for the right syntax to use near ''2.0'
(PARTITION part1 VALUES LESS THAN (10),
-PARTITION part2 VALUES LESS THAN (' at line 9
+PARTITION part2 VALUES LESS THA...' at line 9
CREATE TABLE t1 (
f_int1 INTEGER DEFAULT 0,
f_int2 INTEGER DEFAULT 0,
@@ -1378,7 +1378,7 @@ SUBPARTITIONS '0.2E+1'
PARTITION part2 VALUES LESS THAN (2147483646));
ERROR 42000: You have an error in your SQL syntax; check the manual that corresponds to your MariaDB server version for the right syntax to use near ''0.2E+1'
(PARTITION part1 VALUES LESS THAN (10),
-PARTITION part2 VALUES LESS THA' at line 9
+PARTITION part2 VALUES LESS ...' at line 9
CREATE TABLE t1 (
f_int1 INTEGER DEFAULT 0,
f_int2 INTEGER DEFAULT 0,
@@ -1401,7 +1401,7 @@ SUBPARTITIONS '2A'
PARTITION part2 VALUES LESS THAN (2147483646));
ERROR 42000: You have an error in your SQL syntax; check the manual that corresponds to your MariaDB server version for the right syntax to use near ''2A'
(PARTITION part1 VALUES LESS THAN (10),
-PARTITION part2 VALUES LESS THAN (2' at line 9
+PARTITION part2 VALUES LESS THAN...' at line 9
CREATE TABLE t1 (
f_int1 INTEGER DEFAULT 0,
f_int2 INTEGER DEFAULT 0,
@@ -1424,7 +1424,7 @@ SUBPARTITIONS 'A2'
PARTITION part2 VALUES LESS THAN (2147483646));
ERROR 42000: You have an error in your SQL syntax; check the manual that corresponds to your MariaDB server version for the right syntax to use near ''A2'
(PARTITION part1 VALUES LESS THAN (10),
-PARTITION part2 VALUES LESS THAN (2' at line 9
+PARTITION part2 VALUES LESS THAN...' at line 9
CREATE TABLE t1 (
f_int1 INTEGER DEFAULT 0,
f_int2 INTEGER DEFAULT 0,
@@ -1447,7 +1447,7 @@ SUBPARTITIONS ''
PARTITION part2 VALUES LESS THAN (2147483646));
ERROR 42000: You have an error in your SQL syntax; check the manual that corresponds to your MariaDB server version for the right syntax to use near '''
(PARTITION part1 VALUES LESS THAN (10),
-PARTITION part2 VALUES LESS THAN (214' at line 9
+PARTITION part2 VALUES LESS THAN (...' at line 9
CREATE TABLE t1 (
f_int1 INTEGER DEFAULT 0,
f_int2 INTEGER DEFAULT 0,
@@ -1470,7 +1470,7 @@ SUBPARTITIONS 'GARBAGE'
PARTITION part2 VALUES LESS THAN (2147483646));
ERROR 42000: You have an error in your SQL syntax; check the manual that corresponds to your MariaDB server version for the right syntax to use near ''GARBAGE'
(PARTITION part1 VALUES LESS THAN (10),
-PARTITION part2 VALUES LESS TH' at line 9
+PARTITION part2 VALUES LESS...' at line 9
# 4.2.5 partition/subpartition numbers other notations
CREATE TABLE t1 (
f_int1 INTEGER DEFAULT 0,
@@ -1494,7 +1494,7 @@ SUBPARTITIONS 2A
PARTITION part2 VALUES LESS THAN (2147483646));
ERROR 42000: You have an error in your SQL syntax; check the manual that corresponds to your MariaDB server version for the right syntax to use near '2A
(PARTITION part1 VALUES LESS THAN (10),
-PARTITION part2 VALUES LESS THAN (214' at line 9
+PARTITION part2 VALUES LESS THAN (...' at line 9
CREATE TABLE t1 (
f_int1 INTEGER DEFAULT 0,
f_int2 INTEGER DEFAULT 0,
@@ -1517,7 +1517,7 @@ SUBPARTITIONS A2
PARTITION part2 VALUES LESS THAN (2147483646));
ERROR 42000: You have an error in your SQL syntax; check the manual that corresponds to your MariaDB server version for the right syntax to use near 'A2
(PARTITION part1 VALUES LESS THAN (10),
-PARTITION part2 VALUES LESS THAN (214' at line 9
+PARTITION part2 VALUES LESS THAN (...' at line 9
CREATE TABLE t1 (
f_int1 INTEGER DEFAULT 0,
f_int2 INTEGER DEFAULT 0,
@@ -1540,7 +1540,7 @@ SUBPARTITIONS GARBAGE
PARTITION part2 VALUES LESS THAN (2147483646));
ERROR 42000: You have an error in your SQL syntax; check the manual that corresponds to your MariaDB server version for the right syntax to use near 'GARBAGE
(PARTITION part1 VALUES LESS THAN (10),
-PARTITION part2 VALUES LESS THAN' at line 9
+PARTITION part2 VALUES LESS T...' at line 9
CREATE TABLE t1 (
f_int1 INTEGER DEFAULT 0,
f_int2 INTEGER DEFAULT 0,
@@ -1563,7 +1563,7 @@ SUBPARTITIONS "2"
PARTITION part2 VALUES LESS THAN (2147483646));
ERROR 42000: You have an error in your SQL syntax; check the manual that corresponds to your MariaDB server version for the right syntax to use near '"2"
(PARTITION part1 VALUES LESS THAN (10),
-PARTITION part2 VALUES LESS THAN (21' at line 9
+PARTITION part2 VALUES LESS THAN ...' at line 9
CREATE TABLE t1 (
f_int1 INTEGER DEFAULT 0,
f_int2 INTEGER DEFAULT 0,
@@ -1586,7 +1586,7 @@ SUBPARTITIONS "2A"
PARTITION part2 VALUES LESS THAN (2147483646));
ERROR 42000: You have an error in your SQL syntax; check the manual that corresponds to your MariaDB server version for the right syntax to use near '"2A"
(PARTITION part1 VALUES LESS THAN (10),
-PARTITION part2 VALUES LESS THAN (2' at line 9
+PARTITION part2 VALUES LESS THAN...' at line 9
CREATE TABLE t1 (
f_int1 INTEGER DEFAULT 0,
f_int2 INTEGER DEFAULT 0,
@@ -1609,7 +1609,7 @@ SUBPARTITIONS "A2"
PARTITION part2 VALUES LESS THAN (2147483646));
ERROR 42000: You have an error in your SQL syntax; check the manual that corresponds to your MariaDB server version for the right syntax to use near '"A2"
(PARTITION part1 VALUES LESS THAN (10),
-PARTITION part2 VALUES LESS THAN (2' at line 9
+PARTITION part2 VALUES LESS THAN...' at line 9
CREATE TABLE t1 (
f_int1 INTEGER DEFAULT 0,
f_int2 INTEGER DEFAULT 0,
@@ -1632,7 +1632,7 @@ SUBPARTITIONS "GARBAGE"
PARTITION part2 VALUES LESS THAN (2147483646));
ERROR 42000: You have an error in your SQL syntax; check the manual that corresponds to your MariaDB server version for the right syntax to use near '"GARBAGE"
(PARTITION part1 VALUES LESS THAN (10),
-PARTITION part2 VALUES LESS TH' at line 9
+PARTITION part2 VALUES LESS...' at line 9
# 4.2.6 (negative) partition/subpartition numbers per @variables
SET @aux = 5;
CREATE TABLE t1 (
@@ -1657,7 +1657,7 @@ SUBPARTITIONS @aux = 5
PARTITION part2 VALUES LESS THAN (2147483646));
ERROR 42000: You have an error in your SQL syntax; check the manual that corresponds to your MariaDB server version for the right syntax to use near '@aux = 5
(PARTITION part1 VALUES LESS THAN (10),
-PARTITION part2 VALUES LESS THA' at line 9
+PARTITION part2 VALUES LESS ...' at line 9
#------------------------------------------------------------------------
# 4.3 Mixups of assigned partition/subpartition numbers and names
#------------------------------------------------------------------------
@@ -1675,15 +1675,15 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY HASH (f_int1)
-(PARTITION part1 ENGINE = TokuDB,
- PARTITION part2 ENGINE = TokuDB) */
+ PARTITION BY HASH (`f_int1`)
+(PARTITION `part1` ENGINE = TokuDB,
+ PARTITION `part2` ENGINE = TokuDB)
DROP TABLE t1;
CREATE TABLE t1 (
@@ -1704,20 +1704,20 @@ create_command
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `f_int1` int(11) DEFAULT '0',
- `f_int2` int(11) DEFAULT '0',
+ `f_int1` int(11) DEFAULT 0,
+ `f_int2` int(11) DEFAULT 0,
`f_char1` char(20) DEFAULT NULL,
`f_char2` char(20) DEFAULT NULL,
`f_charbig` varchar(1000) DEFAULT NULL
) ENGINE=TokuDB DEFAULT CHARSET=latin1
-/*!50100 PARTITION BY RANGE (f_int1)
-SUBPARTITION BY HASH (f_int1)
-(PARTITION part1 VALUES LESS THAN (1000)
- (SUBPARTITION subpart11 ENGINE = TokuDB,
- SUBPARTITION subpart12 ENGINE = TokuDB),
- PARTITION part2 VALUES LESS THAN (2147483646)
- (SUBPARTITION subpart21 ENGINE = TokuDB,
- SUBPARTITION subpart22 ENGINE = TokuDB)) */
+ PARTITION BY RANGE (`f_int1`)
+SUBPARTITION BY HASH (`f_int1`)
+(PARTITION `part1` VALUES LESS THAN (1000)
+ (SUBPARTITION `subpart11` ENGINE = TokuDB,
+ SUBPARTITION `subpart12` ENGINE = TokuDB),
+ PARTITION `part2` VALUES LESS THAN (2147483646)
+ (SUBPARTITION `subpart21` ENGINE = TokuDB,
+ SUBPARTITION `subpart22` ENGINE = TokuDB))
DROP TABLE t1;
# 4.3.2 (positive) number of partition/subpartition ,
@@ -1750,7 +1750,7 @@ PARTITION part2 VALUES LESS THAN (2147483646)
);
ERROR 42000: Wrong number of subpartitions defined, mismatch with previous setting near '),
PARTITION part2 VALUES LESS THAN (2147483646)
-(SUBPARTITION subpart21, SUBPAR' at line 11
+(SUBPARTITION subpart21, SUB...' at line 11
CREATE TABLE t1 (
f_int1 INTEGER DEFAULT 0,
f_int2 INTEGER DEFAULT 0,
@@ -1769,7 +1769,7 @@ PARTITION part3 VALUES LESS THAN (2147483646)
);
ERROR 42000: Wrong number of subpartitions defined, mismatch with previous setting near '),
PARTITION part3 VALUES LESS THAN (2147483646)
-(SUBPARTITION subpart31, SUBPAR' at line 13
+(SUBPARTITION subpart31, SUB...' at line 13
CREATE TABLE t1 (
f_int1 INTEGER DEFAULT 0,
f_int2 INTEGER DEFAULT 0,
@@ -1812,7 +1812,7 @@ PARTITION part2 VALUES LESS THAN (2147483646)
);
ERROR 42000: Wrong number of subpartitions defined, mismatch with previous setting near '),
PARTITION part2 VALUES LESS THAN (2147483646)
-(SUBPARTITION subpart21, SUBPAR' at line 11
+(SUBPARTITION subpart21, SUB...' at line 11
CREATE TABLE t1 (
f_int1 INTEGER DEFAULT 0,
f_int2 INTEGER DEFAULT 0,
@@ -1831,7 +1831,7 @@ PARTITION part3 VALUES LESS THAN (2147483646)
);
ERROR 42000: Wrong number of subpartitions defined, mismatch with previous setting near '),
PARTITION part2 VALUES LESS THAN (2000)
-(SUBPARTITION subpart21 ' at line 11
+(SUBPARTITION subpart21 ...' at line 11
CREATE TABLE t1 (
f_int1 INTEGER DEFAULT 0,
f_int2 INTEGER DEFAULT 0,
@@ -1848,7 +1848,7 @@ PARTITION part2 VALUES LESS THAN (2147483646)
);
ERROR 42000: Wrong number of subpartitions defined, mismatch with previous setting near '),
PARTITION part2 VALUES LESS THAN (2147483646)
-(SUBPARTITION subpart21, SUBPAR' at line 11
+(SUBPARTITION subpart21, SUB...' at line 11
#========================================================================
# 5. Checks of logical partition/subpartition name
diff --git a/storage/tokudb/tokudb.cnf b/storage/tokudb/tokudb.cnf.in
index f94a128c02c..ff7f0a5f5f6 100644
--- a/storage/tokudb/tokudb.cnf
+++ b/storage/tokudb/tokudb.cnf.in
@@ -1,9 +1,8 @@
[mariadb]
-# See https://mariadb.com/kb/en/how-to-enable-tokudb-in-mariadb/
-# for instructions how to enable TokuDB
-#
# See https://mariadb.com/kb/en/tokudb-differences/ for differences
# between TokuDB in MariaDB and TokuDB from http://www.tokutek.com/
-#plugin-load-add=ha_tokudb.so
+plugin-load-add=ha_tokudb.so
+[mysqld_safe]
+@cnf_malloc_lib@
diff --git a/storage/tokudb/tokudb.conf.in b/storage/tokudb/tokudb.conf.in
new file mode 100644
index 00000000000..a5ff055f44c
--- /dev/null
+++ b/storage/tokudb/tokudb.conf.in
@@ -0,0 +1,2 @@
+[Service]
+@systemd_env@
diff --git a/storage/xtradb/CMakeLists.txt b/storage/xtradb/CMakeLists.txt
index 465d93eafc5..bed5946541c 100644
--- a/storage/xtradb/CMakeLists.txt
+++ b/storage/xtradb/CMakeLists.txt
@@ -14,6 +14,7 @@
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA
# This is the CMakeLists for XtraDB
+RETURN() # until upgraded to 5.7 XtraDB doesn't compile at all
INCLUDE(CheckFunctionExists)
INCLUDE(CheckCSourceCompiles)
@@ -23,12 +24,14 @@ INCLUDE(lzo)
INCLUDE(lzma)
INCLUDE(bzip2)
INCLUDE(snappy)
+INCLUDE(numa)
MYSQL_CHECK_LZ4()
MYSQL_CHECK_LZO()
MYSQL_CHECK_LZMA()
MYSQL_CHECK_BZIP2()
MYSQL_CHECK_SNAPPY()
+MYSQL_CHECK_NUMA()
IF(CMAKE_CROSSCOMPILING)
# Use CHECK_C_SOURCE_COMPILES instead of CHECK_C_SOURCE_RUNS when
@@ -88,6 +91,9 @@ SET(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -DUNIV_DEBUG -DUNIV_SYNC_DEB
#ENDIF()
CHECK_FUNCTION_EXISTS(sched_getcpu HAVE_SCHED_GETCPU)
+IF(HAVE_SCHED_GETCPU)
+ ADD_DEFINITIONS(-DHAVE_SCHED_GETCPU)
+ENDIF()
MY_CHECK_AND_SET_COMPILER_FLAG("-Wno-class-memaccess")
@@ -249,11 +255,13 @@ IF(HAVE_IB_ATOMIC_PTHREAD_T_GCC)
ADD_DEFINITIONS(-DHAVE_IB_ATOMIC_PTHREAD_T_GCC=1)
ENDIF()
-CHECK_C_SOURCE_COMPILES("struct t1{ int a; char *b; }; struct t1 c= { .a=1, .b=0 }; main() { }" HAVE_C99_INITIALIZERS)
+CHECK_CXX_SOURCE_COMPILES("struct t1{ int a; char *b; }; struct t1 c= { .a=1, .b=0 }; main() { }" HAVE_C99_INITIALIZERS)
+IF(HAVE_C99_INITIALIZERS)
+ ADD_DEFINITIONS(-DHAVE_C99_INITIALIZERS)
+ENDIF()
ENDIF(NOT MSVC)
-CHECK_FUNCTION_EXISTS(asprintf HAVE_ASPRINTF)
CHECK_FUNCTION_EXISTS(vasprintf HAVE_VASPRINTF)
# Solaris atomics
@@ -506,7 +514,11 @@ ENDIF()
MYSQL_ADD_PLUGIN(xtradb ${INNOBASE_SOURCES} STORAGE_ENGINE
DEFAULT
RECOMPILE_FOR_EMBEDDED
- LINK_LIBRARIES ${ZLIB_LIBRARY} ${NUMA_LIBRARY} ${LINKER_SCRIPT})
+ LINK_LIBRARIES
+ ${ZLIB_LIBRARY}
+ ${CRC32_VPMSUM_LIBRARY}
+ ${NUMA_LIBRARY}
+ ${LINKER_SCRIPT})
IF(TARGET xtradb)
IF(NOT XTRADB_OK)
diff --git a/storage/xtradb/api/api0api.cc b/storage/xtradb/api/api0api.cc
index 1cb11d5e8c8..5cbe70f7aa6 100644
--- a/storage/xtradb/api/api0api.cc
+++ b/storage/xtradb/api/api0api.cc
@@ -2336,7 +2336,7 @@ ib_col_set_value(
if (len > 0 && cs->mbmaxlen > 1) {
true_len = (ulint)
- cs->cset->well_formed_len(
+ my_well_formed_length(
cs,
(const char*)src,
(const char*)src + len,
diff --git a/storage/xtradb/btr/btr0sea.cc b/storage/xtradb/btr/btr0sea.cc
index 5f3bcca4123..a820858ff23 100644
--- a/storage/xtradb/btr/btr0sea.cc
+++ b/storage/xtradb/btr/btr0sea.cc
@@ -54,17 +54,10 @@ UNIV_INTERN ulint btr_search_index_num;
/** A dummy variable to fool the compiler */
UNIV_INTERN ulint btr_search_this_is_zero = 0;
-#ifdef UNIV_SEARCH_PERF_STAT
-/** Number of successful adaptive hash index lookups */
-UNIV_INTERN ulint btr_search_n_succ = 0;
-/** Number of failed adaptive hash index lookups */
-UNIV_INTERN ulint btr_search_n_hash_fail = 0;
-#endif /* UNIV_SEARCH_PERF_STAT */
-
/** padding to prevent other memory update
hotspots from residing on the same memory
cache line as btr_search_latch */
-UNIV_INTERN byte btr_sea_pad1[64];
+UNIV_INTERN byte btr_sea_pad1[CACHE_LINE_SIZE];
/** Array of latches protecting individual AHI partitions. The latches
protect: (1) positions of records on those pages where a hash index from the
@@ -77,7 +70,7 @@ UNIV_INTERN prio_rw_lock_t* btr_search_latch_arr;
/** padding to prevent other memory update hotspots from residing on
the same memory cache line */
-UNIV_INTERN byte btr_sea_pad2[64];
+UNIV_INTERN byte btr_sea_pad2[CACHE_LINE_SIZE];
/** The adaptive hash index */
UNIV_INTERN btr_search_sys_t* btr_search_sys;
@@ -699,10 +692,6 @@ btr_search_info_update_slow(
if (cursor->flag == BTR_CUR_HASH_FAIL) {
/* Update the hash node reference, if appropriate */
-#ifdef UNIV_SEARCH_PERF_STAT
- btr_search_n_hash_fail++;
-#endif /* UNIV_SEARCH_PERF_STAT */
-
rw_lock_x_lock(btr_search_get_latch(cursor->index));
btr_search_update_hash_ref(info, block, cursor);
@@ -1060,7 +1049,6 @@ btr_search_guess_on_hash(
info->last_hash_succ = TRUE;
#ifdef UNIV_SEARCH_PERF_STAT
- btr_search_n_succ++;
#endif
if (UNIV_LIKELY(!has_search_latch)
&& buf_page_peek_if_too_old(&block->page)) {
diff --git a/storage/xtradb/buf/buf0buf.cc b/storage/xtradb/buf/buf0buf.cc
index 398e1e84994..48f2d63bfb5 100644
--- a/storage/xtradb/buf/buf0buf.cc
+++ b/storage/xtradb/buf/buf0buf.cc
@@ -1433,10 +1433,11 @@ buf_chunk_init(
#ifdef HAVE_LIBNUMA
if (srv_numa_interleave) {
+ struct bitmask *numa_mems_allowed = numa_get_mems_allowed();
int st = mbind(chunk->mem, chunk->mem_size,
MPOL_INTERLEAVE,
- numa_all_nodes_ptr->maskp,
- numa_all_nodes_ptr->size,
+ numa_mems_allowed->maskp,
+ numa_mems_allowed->size,
MPOL_MF_MOVE);
if (st != 0) {
ib_logf(IB_LOG_LEVEL_WARN,
@@ -1837,11 +1838,13 @@ buf_pool_init(
#ifdef HAVE_LIBNUMA
if (srv_numa_interleave) {
+ struct bitmask *numa_mems_allowed = numa_get_mems_allowed();
+
ib_logf(IB_LOG_LEVEL_INFO,
"Setting NUMA memory policy to MPOL_INTERLEAVE");
if (set_mempolicy(MPOL_INTERLEAVE,
- numa_all_nodes_ptr->maskp,
- numa_all_nodes_ptr->size) != 0) {
+ numa_mems_allowed->maskp,
+ numa_mems_allowed->size) != 0) {
ib_logf(IB_LOG_LEVEL_WARN,
"Failed to set NUMA memory policy to"
" MPOL_INTERLEAVE (error: %s).",
diff --git a/storage/xtradb/buf/buf0dblwr.cc b/storage/xtradb/buf/buf0dblwr.cc
index 5df40b5f4e8..9f03778e572 100644
--- a/storage/xtradb/buf/buf0dblwr.cc
+++ b/storage/xtradb/buf/buf0dblwr.cc
@@ -638,7 +638,7 @@ bad:
if (page_no == 0) {
/* Check the FSP_SPACE_FLAGS. */
ulint flags = fsp_header_get_flags(page);
- if (!fsp_flags_is_valid(flags, space_id)
+ if (!fsp_flags_is_valid(flags)
&& fsp_flags_convert_from_101(flags)
== ULINT_UNDEFINED) {
ib_logf(IB_LOG_LEVEL_WARN,
diff --git a/storage/xtradb/buf/buf0flu.cc b/storage/xtradb/buf/buf0flu.cc
index eccc7c281c1..9c427395231 100644
--- a/storage/xtradb/buf/buf0flu.cc
+++ b/storage/xtradb/buf/buf0flu.cc
@@ -2454,7 +2454,7 @@ ulint
af_get_pct_for_dirty()
/*==================*/
{
- ulint dirty_pct = buf_get_modified_ratio_pct();
+ ulint dirty_pct = (ulint) buf_get_modified_ratio_pct();
if (dirty_pct > 0 && srv_max_buf_pool_modified_pct == 0) {
return(100);
@@ -2474,7 +2474,7 @@ af_get_pct_for_dirty()
}
} else if (dirty_pct > srv_max_dirty_pages_pct_lwm) {
/* We should start flushing pages gradually. */
- return((dirty_pct * 100)
+ return (ulint) ((dirty_pct * 100)
/ (srv_max_buf_pool_modified_pct + 1));
}
@@ -2492,8 +2492,8 @@ af_get_pct_for_lsn(
{
lsn_t max_async_age;
lsn_t lsn_age_factor;
- lsn_t af_lwm = (srv_adaptive_flushing_lwm
- * log_get_capacity()) / 100;
+ lsn_t af_lwm = (lsn_t) ((srv_adaptive_flushing_lwm
+ * log_get_capacity()) / 100);
if (age < af_lwm) {
/* No adaptive flushing. */
diff --git a/storage/xtradb/dict/dict0load.cc b/storage/xtradb/dict/dict0load.cc
index 531174bc11f..b3a2932109c 100644
--- a/storage/xtradb/dict/dict0load.cc
+++ b/storage/xtradb/dict/dict0load.cc
@@ -490,7 +490,7 @@ err_len:
return("incorrect column length in SYS_FOREIGN");
}
- /* This recieves a dict_foreign_t* that points to a stack variable.
+ /* This receives a dict_foreign_t* that points to a stack variable.
So dict_foreign_free(foreign) is not used as elsewhere.
Since the heap used here is freed elsewhere, foreign->heap
is not assigned. */
diff --git a/storage/xtradb/fil/fil0crypt.cc b/storage/xtradb/fil/fil0crypt.cc
index 901b28c8c62..7219e641e30 100644
--- a/storage/xtradb/fil/fil0crypt.cc
+++ b/storage/xtradb/fil/fil0crypt.cc
@@ -505,7 +505,7 @@ fil_parse_write_crypt_data(
dberr_t* err)
{
/* check that redo log entry is complete */
- uint entry_size =
+ size_t entry_size =
4 + // size of space_id
2 + // size of offset
1 + // size of type
@@ -526,7 +526,7 @@ fil_parse_write_crypt_data(
ptr += 2;
uint type = mach_read_from_1(ptr);
ptr += 1;
- uint len = mach_read_from_1(ptr);
+ size_t len = mach_read_from_1(ptr);
ptr += 1;
if ((type != CRYPT_SCHEME_1 && type != CRYPT_SCHEME_UNENCRYPTED)
diff --git a/storage/xtradb/fil/fil0fil.cc b/storage/xtradb/fil/fil0fil.cc
index 766e9d91cfb..4a982df3fda 100644
--- a/storage/xtradb/fil/fil0fil.cc
+++ b/storage/xtradb/fil/fil0fil.cc
@@ -653,7 +653,7 @@ fil_node_open_file(
ut_free(buf2);
os_file_close(node->handle);
- if (!fsp_flags_is_valid(flags, space->id)) {
+ if (!fsp_flags_is_valid(flags)) {
ulint cflags = fsp_flags_convert_from_101(flags);
if (cflags == ULINT_UNDEFINED) {
ib_logf(IB_LOG_LEVEL_ERROR,
@@ -2242,7 +2242,7 @@ fil_read_first_page(
FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION);
}
- if (!fsp_flags_is_valid(*flags, *space_id)) {
+ if (!fsp_flags_is_valid(*flags)) {
ulint cflags = fsp_flags_convert_from_101(*flags);
if (cflags == ULINT_UNDEFINED) {
ib_logf(IB_LOG_LEVEL_ERROR,
@@ -2358,7 +2358,7 @@ fil_op_write_log(
ulint len;
log_ptr = mlog_open(mtr, 11 + 2 + 1);
- ut_ad(fsp_flags_is_valid(flags, space_id));
+ ut_ad(fsp_flags_is_valid(flags));
if (!log_ptr) {
/* Logging in mtr is switched off during crash recovery:
@@ -3548,7 +3548,7 @@ fil_create_new_single_table_tablespace(
ut_ad(!srv_read_only_mode);
ut_a(space_id < SRV_LOG_SPACE_FIRST_ID);
ut_a(size >= FIL_IBD_FILE_INITIAL_SIZE);
- ut_a(fsp_flags_is_valid(flags & ~FSP_FLAGS_MEM_MASK, space_id));
+ ut_a(fsp_flags_is_valid(flags & ~FSP_FLAGS_MEM_MASK));
if (is_temp) {
/* Temporary table filepath */
@@ -3942,7 +3942,7 @@ void
fsp_flags_try_adjust(ulint space_id, ulint flags)
{
ut_ad(!srv_read_only_mode);
- ut_ad(fsp_flags_is_valid(flags, space_id));
+ ut_ad(fsp_flags_is_valid(flags));
mtr_t mtr;
mtr_start(&mtr);
@@ -4554,7 +4554,7 @@ fil_user_tablespace_restore_page(
flags = mach_read_from_4(FSP_HEADER_OFFSET + FSP_SPACE_FLAGS + page);
- if (!fsp_flags_is_valid(flags, fsp->id)) {
+ if (!fsp_flags_is_valid(flags)) {
ulint cflags = fsp_flags_convert_from_101(flags);
if (cflags == ULINT_UNDEFINED) {
ib_logf(IB_LOG_LEVEL_WARN,
diff --git a/storage/xtradb/fsp/fsp0fsp.cc b/storage/xtradb/fsp/fsp0fsp.cc
index fc2e192a3b1..5604a1b48c5 100644
--- a/storage/xtradb/fsp/fsp0fsp.cc
+++ b/storage/xtradb/fsp/fsp0fsp.cc
@@ -665,7 +665,7 @@ fsp_header_init_fields(
ulint flags) /*!< in: tablespace flags (FSP_SPACE_FLAGS) */
{
flags &= ~FSP_FLAGS_MEM_MASK;
- ut_a(fsp_flags_is_valid(flags, space_id));
+ ut_a(fsp_flags_is_valid(flags));
mach_write_to_4(FSP_HEADER_OFFSET + FSP_SPACE_ID + page,
space_id);
diff --git a/storage/xtradb/handler/ha_innodb.cc b/storage/xtradb/handler/ha_innodb.cc
index 2ec16280eb6..23a0e90f849 100644
--- a/storage/xtradb/handler/ha_innodb.cc
+++ b/storage/xtradb/handler/ha_innodb.cc
@@ -222,6 +222,11 @@ set by user, however, it will be adjusted to the newer file format if
a table of such format is created/opened. */
static char* innobase_file_format_max = NULL;
+/** Default value of innodb_file_format */
+static const char* innodb_file_format_default = "Barracuda";
+/** Default value of innodb_file_format_max */
+static const char* innodb_file_format_max_default = "Antelope";
+
static char* innobase_file_flush_method = NULL;
/* This variable can be set in the server configure file, specifying
@@ -1018,7 +1023,7 @@ static MYSQL_THDVAR_BOOL(table_locks, PLUGIN_VAR_OPCMDARG,
static MYSQL_THDVAR_BOOL(strict_mode, PLUGIN_VAR_OPCMDARG,
"Use strict mode when evaluating create options.",
- NULL, NULL, FALSE);
+ NULL, NULL, TRUE);
static MYSQL_THDVAR_BOOL(ft_enable_stopword, PLUGIN_VAR_OPCMDARG,
"Create FTS index with stopword.",
@@ -2174,39 +2179,6 @@ ha_innobase::is_fake_change_enabled(THD* thd)
}
/********************************************************************//**
-In XtraDB it is impossible for a transaction to own a search latch outside of
-InnoDB code, so there is nothing to release on demand. We keep this function to
-simplify maintenance.
-@return 0 */
-static
-int
-innobase_release_temporary_latches(
-/*===============================*/
- handlerton* hton MY_ATTRIBUTE((unused)), /*!< in: handlerton */
- THD* thd MY_ATTRIBUTE((unused))) /*!< in: MySQL thread */
-{
-#ifdef UNIV_DEBUG
- DBUG_ASSERT(hton == innodb_hton_ptr);
-
- if (!innodb_inited || thd == NULL) {
-
- return(0);
- }
-
- trx_t* trx = thd_to_trx(thd);
-
- if (trx != NULL) {
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(!btr_search_own_any());
-#endif
- trx_search_latch_release_if_reserved(trx);
- }
-#endif
-
- return(0);
-}
-
-/********************************************************************//**
Increments innobase_active_counter and every INNOBASE_WAKE_INTERVALth
time calls srv_active_wake_master_thread. This function should be used
when a single database operation may introduce a small need for
@@ -3884,9 +3856,6 @@ innobase_init(
innobase_hton->flags = HTON_SUPPORTS_EXTENDED_KEYS |
HTON_SUPPORTS_FOREIGN_KEYS;
- innobase_hton->release_temporary_latches =
- innobase_release_temporary_latches;
-
innobase_hton->kill_query = innobase_kill_query;
if (srv_file_per_table)
@@ -6320,9 +6289,6 @@ ha_innobase::open(
thd = ha_thd();
- /* No-op in XtraDB */
- innobase_release_temporary_latches(ht, thd);
-
normalize_table_name(norm_name, name);
user_thd = NULL;
@@ -6781,9 +6747,6 @@ ha_innobase::close()
thd = ha_thd();
- /* No-op in XtraDB */
- innobase_release_temporary_latches(ht, thd);
-
row_prebuilt_free(prebuilt, FALSE);
if (upd_buf != NULL) {
@@ -8902,8 +8865,8 @@ no_commit:
/* We need the upper limit of the col type to check for
whether we update the table autoinc counter or not. */
- col_max_value =
- table->next_number_field->get_max_int_value();
+ col_max_value = innobase_get_int_col_max_value(
+ table->next_number_field);
/* Get the value that MySQL attempted to store in the table.*/
auto_inc = table->next_number_field->val_uint();
@@ -8978,33 +8941,15 @@ set_max_autoinc:
/* This should filter out the negative
values set explicitly by the user. */
if (auto_inc <= col_max_value) {
+ ut_a(prebuilt->autoinc_increment > 0);
ulonglong offset;
ulonglong increment;
dberr_t err;
-#ifdef WITH_WSREP
- /* Applier threads which are processing
- ROW events and don't go through server
- level autoinc processing, therefore
- prebuilt autoinc values don't get
- properly assigned. Fetch values from
- server side. */
- if (trx->is_wsrep() &&
- wsrep_thd_exec_mode(user_thd) == REPL_RECV)
- {
- wsrep_thd_auto_increment_variables(
- user_thd, &offset, &increment);
- }
- else
- {
-#endif /* WITH_WSREP */
- ut_a(prebuilt->autoinc_increment > 0);
- offset = prebuilt->autoinc_offset;
- increment = prebuilt->autoinc_increment;
-#ifdef WITH_WSREP
- }
-#endif /* WITH_WSREP */
+ offset = prebuilt->autoinc_offset;
+ increment = prebuilt->autoinc_increment;
+
auto_inc = innobase_next_autoinc(
auto_inc,
1, increment, offset,
@@ -9514,35 +9459,17 @@ ha_innobase::update_row(
/* We need the upper limit of the col type to check for
whether we update the table autoinc counter or not. */
- col_max_value =
- table->next_number_field->get_max_int_value();
+ col_max_value = innobase_get_int_col_max_value(
+ table->next_number_field);
if (auto_inc <= col_max_value && auto_inc != 0) {
ulonglong offset;
ulonglong increment;
-#ifdef WITH_WSREP
- /* Applier threads which are processing
- ROW events and don't go through server
- level autoinc processing, therefore
- prebuilt autoinc values don't get
- properly assigned. Fetch values from
- server side. */
- if (trx->is_wsrep() &&
- wsrep_thd_exec_mode(user_thd) == REPL_RECV)
- {
- wsrep_thd_auto_increment_variables(
- user_thd, &offset, &increment);
- }
- else
- {
-#endif /* WITH_WSREP */
- offset = prebuilt->autoinc_offset;
- increment = prebuilt->autoinc_increment;
-#ifdef WITH_WSREP
- }
-#endif /* WITH_WSREP */
+ offset = prebuilt->autoinc_offset;
+ increment = prebuilt->autoinc_increment;
+
auto_inc = innobase_next_autoinc(
auto_inc, 1, increment, offset, col_max_value);
@@ -19886,7 +19813,7 @@ static MYSQL_SYSVAR_ENUM(checksum_algorithm, srv_checksum_algorithm,
"magic number when reading; "
"Files updated when this option is set to crc32 or strict_crc32 will "
"not be readable by MySQL versions older than 5.6.3",
- NULL, NULL, SRV_CHECKSUM_ALGORITHM_INNODB,
+ NULL, NULL, SRV_CHECKSUM_ALGORITHM_CRC32,
&innodb_checksum_algorithm_typelib);
@@ -20019,7 +19946,7 @@ static MYSQL_SYSVAR_ULONG(purge_threads, srv_n_purge_threads,
PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_READONLY,
"Purge threads can be from 1 to 32. Default is 1.",
NULL, NULL,
- 1, /* Default setting */
+ 4, /* Default setting */
1, /* Minimum value */
SRV_MAX_N_PURGE_THREADS, 0); /* Maximum value */
@@ -20046,7 +19973,7 @@ static MYSQL_SYSVAR_STR(file_format, innobase_file_format_name,
PLUGIN_VAR_RQCMDARG,
"File format to use for new tables in .ibd files.",
innodb_file_format_name_validate,
- innodb_file_format_name_update, "Antelope");
+ innodb_file_format_name_update, innodb_file_format_default);
/* "innobase_file_format_check" decides whether we would continue
booting the server if the file format stamped on the system
@@ -20067,7 +19994,7 @@ static MYSQL_SYSVAR_STR(file_format_max, innobase_file_format_max,
PLUGIN_VAR_OPCMDARG,
"The highest file format in the tablespace.",
innodb_file_format_max_validate,
- innodb_file_format_max_update, "Antelope");
+ innodb_file_format_max_update, innodb_file_format_max_default);
static MYSQL_SYSVAR_STR(ft_server_stopword_table, innobase_server_stopword_table,
PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_MEMALLOC,
@@ -20101,7 +20028,7 @@ static MYSQL_SYSVAR_STR(flush_method, innobase_file_flush_method,
static MYSQL_SYSVAR_BOOL(large_prefix, innobase_large_prefix,
PLUGIN_VAR_NOCMDARG,
"Support large index prefix length of REC_VERSION_56_MAX_INDEX_COL_LEN (3072) bytes.",
- NULL, NULL, FALSE);
+ NULL, NULL, TRUE);
static MYSQL_SYSVAR_BOOL(force_load_corrupted, srv_load_corrupted,
PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY,
@@ -20453,12 +20380,12 @@ static MYSQL_SYSVAR_BOOL(buffer_pool_dump_now, innodb_buffer_pool_dump_now,
static MYSQL_SYSVAR_BOOL(buffer_pool_dump_at_shutdown, srv_buffer_pool_dump_at_shutdown,
PLUGIN_VAR_RQCMDARG,
"Dump the buffer pool into a file named @@innodb_buffer_pool_filename",
- NULL, NULL, FALSE);
+ NULL, NULL, TRUE);
static MYSQL_SYSVAR_ULONG(buffer_pool_dump_pct, srv_buf_pool_dump_pct,
PLUGIN_VAR_RQCMDARG,
- "Dump only the hottest N% of each buffer pool, defaults to 100",
- NULL, NULL, 100, 1, 100, 0);
+ "Dump only the hottest N% of each buffer pool, defaults to 25",
+ NULL, NULL, 25, 1, 100, 0);
#ifdef UNIV_DEBUG
static MYSQL_SYSVAR_STR(buffer_pool_evict, srv_buffer_pool_evict,
@@ -20481,7 +20408,7 @@ static MYSQL_SYSVAR_BOOL(buffer_pool_load_abort, innodb_buffer_pool_load_abort,
static MYSQL_SYSVAR_BOOL(buffer_pool_load_at_startup, srv_buffer_pool_load_at_startup,
PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
"Load the buffer pool from a file named @@innodb_buffer_pool_filename",
- NULL, NULL, FALSE);
+ NULL, NULL, TRUE);
static MYSQL_SYSVAR_BOOL(defragment, srv_defragment,
PLUGIN_VAR_RQCMDARG,
diff --git a/storage/xtradb/handler/handler0alter.cc b/storage/xtradb/handler/handler0alter.cc
index e1b2dcd2441..2ce30488d8c 100644
--- a/storage/xtradb/handler/handler0alter.cc
+++ b/storage/xtradb/handler/handler0alter.cc
@@ -393,7 +393,7 @@ ha_innobase::check_if_supported_inplace_alter(
const Field* field = table->field[i];
const dict_col_t* col = dict_table_get_nth_col(prebuilt->table, icol);
ulint unsigned_flag;
- if (!field->stored_in_db)
+ if (!field->stored_in_db())
continue;
icol++;
@@ -426,7 +426,7 @@ ha_innobase::check_if_supported_inplace_alter(
we must use "Copy" method. */
if (is_timestamp_type(def->sql_type)) {
if ((def->flags & NOT_NULL_FLAG) != 0 && // NOT NULL
- (def->def != NULL || // constant default ?
+ (def->default_value != NULL || // constant default ?
def->unireg_check != Field::NONE)) { // function default
ha_alter_info->unsupported_reason = innobase_get_err_msg(
ER_ALTER_OPERATION_NOT_SUPPORTED_REASON_NOT_NULL);
@@ -1102,9 +1102,9 @@ innobase_get_foreign_key_info(
referenced_column_names, referenced_num_col)) {
mutex_exit(&dict_sys->mutex);
my_error(
- ER_FK_DUP_NAME,
+ ER_DUP_CONSTRAINT_NAME,
MYF(0),
- add_fk[num_fk]->id);
+ "FOREIGN KEY", add_fk[num_fk]->id);
goto err_exit;
}
@@ -1253,7 +1253,7 @@ innobase_rec_to_mysql(
ulint ilen;
const uchar* ifield;
- while (!((field= table->field[sql_idx])->stored_in_db))
+ while (!((field= table->field[sql_idx])->stored_in_db()))
sql_idx++;
field->reset();
@@ -1306,7 +1306,7 @@ innobase_fields_to_mysql(
Field* field;
ulint ipos;
- while (!((field= table->field[sql_idx])->stored_in_db))
+ while (!((field= table->field[sql_idx])->stored_in_db()))
sql_idx++;
field->reset();
@@ -1355,7 +1355,7 @@ innobase_row_to_mysql(
Field* field;
const dfield_t* df = dtuple_get_nth_field(row, i);
- while (!((field= table->field[sql_idx])->stored_in_db))
+ while (!((field= table->field[sql_idx])->stored_in_db()))
sql_idx++;
field->reset();
@@ -1665,7 +1665,7 @@ innobase_fts_check_doc_id_col(
for (i = 0; i < n_cols; i++, sql_idx++) {
const Field* field;
while (!((field= altered_table->field[sql_idx])->
- stored_in_db))
+ stored_in_db()))
sql_idx++;
if (my_strcasecmp(system_charset_info,
field->field_name, FTS_DOC_ID_COL_NAME)) {
@@ -2539,7 +2539,7 @@ innobase_build_col_map(
}
while (const Create_field* new_field = cf_it++) {
- if (!new_field->stored_in_db)
+ if (!new_field->stored_in_db())
{
sql_idx++;
continue;
@@ -2548,7 +2548,7 @@ innobase_build_col_map(
table->field[old_i];
old_i++) {
const Field* field = table->field[old_i];
- if (!table->field[old_i]->stored_in_db)
+ if (!table->field[old_i]->stored_in_db())
continue;
if (new_field->field == field) {
col_map[old_innobase_i] = i;
@@ -2945,7 +2945,7 @@ prepare_inplace_alter_table_dict(
for (uint i = 0; i < altered_table->s->stored_fields; i++, sql_idx++) {
const Field* field;
while (!((field= altered_table->field[sql_idx])->
- stored_in_db))
+ stored_in_db()))
sql_idx++;
ulint is_unsigned;
ulint field_type
@@ -3851,7 +3851,7 @@ check_if_ok_to_rename:
}
my_error(ER_CANT_DROP_FIELD_OR_KEY, MYF(0),
- drop->name);
+ drop->type_name(), drop->name);
goto err_exit;
found_fk:
for (ulint i = n_drop_fk; i--; ) {
@@ -4125,7 +4125,7 @@ func_exit:
ha_alter_info->alter_info->create_list);
while (const Create_field* new_field = cf_it++) {
const Field* field;
- if (!new_field->stored_in_db) {
+ if (!new_field->stored_in_db()) {
i++;
continue;
}
@@ -4134,7 +4134,7 @@ func_exit:
DBUG_ASSERT(innodb_idx < altered_table->s->stored_fields);
for (uint old_i = 0; table->field[old_i]; old_i++) {
- if (!table->field[old_i]->stored_in_db)
+ if (!table->field[old_i]->stored_in_db())
continue;
if (new_field->field == table->field[old_i]) {
goto found_col;
@@ -4858,7 +4858,7 @@ innobase_rename_columns_try(
& Alter_inplace_info::ALTER_COLUMN_NAME);
for (Field** fp = table->field; *fp; fp++, i++) {
- if (!((*fp)->flags & FIELD_IS_RENAMED) || !((*fp)->stored_in_db)) {
+ if (!((*fp)->flags & FIELD_IS_RENAMED) || !((*fp)->stored_in_db())) {
continue;
}
diff --git a/storage/xtradb/include/btr0sea.h b/storage/xtradb/include/btr0sea.h
index 9d415b10a98..0b7f5f6fded 100644
--- a/storage/xtradb/include/btr0sea.h
+++ b/storage/xtradb/include/btr0sea.h
@@ -326,13 +326,6 @@ struct btr_search_sys_t{
/** The adaptive hash index */
extern btr_search_sys_t* btr_search_sys;
-#ifdef UNIV_SEARCH_PERF_STAT
-/** Number of successful adaptive hash index lookups */
-extern ulint btr_search_n_succ;
-/** Number of failed adaptive hash index lookups */
-extern ulint btr_search_n_hash_fail;
-#endif /* UNIV_SEARCH_PERF_STAT */
-
/** After change in n_fields or n_bytes in info, this many rounds are waited
before starting the hash analysis again: this is to save CPU time when there
is no hope in building a hash index. */
diff --git a/storage/xtradb/include/dict0dict.ic b/storage/xtradb/include/dict0dict.ic
index 475391a3f75..7256f7f119b 100644
--- a/storage/xtradb/include/dict0dict.ic
+++ b/storage/xtradb/include/dict0dict.ic
@@ -897,7 +897,7 @@ dict_tf_to_fsp_flags(
fsp_flags |= FSP_FLAGS_MASK_PAGE_COMPRESSION;
}
- ut_a(fsp_flags_is_valid(fsp_flags, false));
+ ut_a(fsp_flags_is_valid(fsp_flags));
if (DICT_TF_HAS_DATA_DIR(table_flags)) {
fsp_flags |= 1U << FSP_FLAGS_MEM_DATA_DIR;
diff --git a/storage/xtradb/include/fsp0fsp.h b/storage/xtradb/include/fsp0fsp.h
index b31a7574529..c8359473670 100644
--- a/storage/xtradb/include/fsp0fsp.h
+++ b/storage/xtradb/include/fsp0fsp.h
@@ -787,12 +787,11 @@ fseg_print(
/** Validate the tablespace flags, which are stored in the
tablespace header at offset FSP_SPACE_FLAGS.
@param[in] flags the contents of FSP_SPACE_FLAGS
-@param[in] is_ibd whether this is an .ibd file (not system tablespace)
@return whether the flags are correct (not in the buggy 10.1) format */
MY_ATTRIBUTE((warn_unused_result, const))
UNIV_INLINE
bool
-fsp_flags_is_valid(ulint flags, bool is_ibd)
+fsp_flags_is_valid(ulint flags)
{
DBUG_EXECUTE_IF("fsp_flags_is_valid_failure",
return(false););
@@ -816,7 +815,7 @@ fsp_flags_is_valid(ulint flags, bool is_ibd)
bits 10..14 would be nonzero 0bsssaa where sss is
nonzero PAGE_SSIZE (3, 4, 6, or 7)
and aa is ATOMIC_WRITES (not 0b11). */
- if (FSP_FLAGS_GET_RESERVED(flags) & ~1U) {
+ if (FSP_FLAGS_GET_RESERVED(flags) & ~1) {
return(false);
}
@@ -839,12 +838,7 @@ fsp_flags_is_valid(ulint flags, bool is_ibd)
return(false);
}
- /* The flags do look valid. But, avoid misinterpreting
- buggy MariaDB 10.1 format flags for
- PAGE_COMPRESSED=1 PAGE_COMPRESSION_LEVEL={0,2,3}
- as valid-looking PAGE_SSIZE if this is known to be
- an .ibd file and we are using the default innodb_page_size=16k. */
- return(ssize == 0 || !is_ibd || srv_page_size != UNIV_PAGE_SIZE_ORIG);
+ return(true);
}
/** Convert FSP_SPACE_FLAGS from the buggy MariaDB 10.1.0..10.1.20 format.
@@ -953,7 +947,7 @@ fsp_flags_convert_from_101(ulint flags)
flags = ((flags & 0x3f) | ssize << FSP_FLAGS_POS_PAGE_SSIZE
| FSP_FLAGS_GET_PAGE_COMPRESSION_MARIADB101(flags)
<< FSP_FLAGS_POS_PAGE_COMPRESSION);
- ut_ad(fsp_flags_is_valid(flags, false));
+ ut_ad(fsp_flags_is_valid(flags));
return(flags);
}
@@ -967,7 +961,7 @@ bool
fsp_flags_match(ulint expected, ulint actual)
{
expected &= ~FSP_FLAGS_MEM_MASK;
- ut_ad(fsp_flags_is_valid(expected, false));
+ ut_ad(fsp_flags_is_valid(expected));
if (actual == expected) {
return(true);
diff --git a/storage/xtradb/include/ha_prototypes.h b/storage/xtradb/include/ha_prototypes.h
index e5b545e0727..2488918436f 100644
--- a/storage/xtradb/include/ha_prototypes.h
+++ b/storage/xtradb/include/ha_prototypes.h
@@ -33,6 +33,7 @@ Created 5/11/2006 Osku Salerma
#include "my_sys.h"
#include "m_string.h"
#include "my_base.h"
+#include "dur_prop.h"
#ifndef UNIV_INNOCHECKSUM
#include "mysqld_error.h"
diff --git a/storage/xtradb/include/log0log.h b/storage/xtradb/include/log0log.h
index 5f4ca08c03c..7ab06a8e4ef 100644
--- a/storage/xtradb/include/log0log.h
+++ b/storage/xtradb/include/log0log.h
@@ -836,7 +836,7 @@ struct log_group_t{
/** Redo log buffer */
struct log_t{
- byte pad[64]; /*!< padding to prevent other memory
+ byte pad[CACHE_LINE_SIZE]; /*!< padding to prevent other memory
update hotspots from residing on the
same memory cache line */
lsn_t lsn; /*!< log sequence number */
diff --git a/storage/xtradb/include/os0sync.h b/storage/xtradb/include/os0sync.h
index d2dfcca5448..aafaf13963d 100644
--- a/storage/xtradb/include/os0sync.h
+++ b/storage/xtradb/include/os0sync.h
@@ -39,12 +39,11 @@ Created 9/6/1995 Heikki Tuuri
#include "ut0lst.h"
#include "sync0types.h"
-/** CPU cache line size */
-#ifdef __powerpc__
-#define CACHE_LINE_SIZE 128
+#ifdef CPU_LEVEL1_DCACHE_LINESIZE
+# define CACHE_LINE_SIZE CPU_LEVEL1_DCACHE_LINESIZE
#else
-#define CACHE_LINE_SIZE 64
-#endif
+# error CPU_LEVEL1_DCACHE_LINESIZE is undefined
+#endif /* CPU_LEVEL1_DCACHE_LINESIZE */
#ifdef HAVE_WINDOWS_ATOMICS
typedef LONG lock_word_t; /*!< On Windows, InterlockedExchange operates
diff --git a/storage/xtradb/include/srv0srv.h b/storage/xtradb/include/srv0srv.h
index c222c419f9d..8461811db31 100644
--- a/storage/xtradb/include/srv0srv.h
+++ b/storage/xtradb/include/srv0srv.h
@@ -322,9 +322,6 @@ use simulated aio we build below with threads.
Currently we support native aio on windows and linux */
extern my_bool srv_use_native_aio;
extern my_bool srv_numa_interleave;
-#ifdef __WIN__
-extern ibool srv_use_native_conditions;
-#endif /* __WIN__ */
#endif /* !UNIV_HOTBACKUP */
/* Use trim operation */
diff --git a/storage/xtradb/include/trx0sys.h b/storage/xtradb/include/trx0sys.h
index ed1820825fc..971613a372d 100644
--- a/storage/xtradb/include/trx0sys.h
+++ b/storage/xtradb/include/trx0sys.h
@@ -743,17 +743,17 @@ struct trx_sys_t{
trx_id_t max_trx_id; /*!< The smallest number not yet
assigned as a transaction id or
transaction number */
- char pad1[64]; /*!< Ensure max_trx_id does not share
+ char pad1[CACHE_LINE_SIZE]; /*!< Ensure max_trx_id does not share
cache line with other fields. */
trx_id_t* descriptors; /*!< Array of trx descriptors */
ulint descr_n_max; /*!< The current size of the descriptors
array. */
- char pad2[64]; /*!< Ensure static descriptor fields
+ char pad2[CACHE_LINE_SIZE]; /*!< Ensure static descriptor fields
do not share cache lines with
descr_n_used */
ulint descr_n_used; /*!< Number of used elements in the
descriptors array. */
- char pad3[64]; /*!< Ensure descriptors do not share
+ char pad3[CACHE_LINE_SIZE]; /*!< Ensure descriptors do not share
cache line with other fields */
#ifdef UNIV_DEBUG
trx_id_t rw_max_trx_id; /*!< Max trx id of read-write transactions
@@ -763,7 +763,7 @@ struct trx_sys_t{
memory read-write transactions, sorted
on trx id, biggest first. Recovered
transactions are always on this list. */
- char pad4[64]; /*!< Ensure list base nodes do not
+ char pad4[CACHE_LINE_SIZE]; /*!< Ensure list base nodes do not
share cache line with other fields */
trx_list_t ro_trx_list; /*!< List of active and committed in
memory read-only transactions, sorted
@@ -772,7 +772,7 @@ struct trx_sys_t{
is not necessary. We should exploit
this and increase concurrency during
add/remove. */
- char pad5[64]; /*!< Ensure list base nodes do not
+ char pad5[CACHE_LINE_SIZE]; /*!< Ensure list base nodes do not
share cache line with other fields */
trx_list_t mysql_trx_list; /*!< List of transactions created
for MySQL. All transactions on
@@ -786,14 +786,14 @@ struct trx_sys_t{
mysql_trx_list may additionally contain
transactions that have not yet been
started in InnoDB. */
- char pad6[64]; /*!< Ensure list base nodes do not
+ char pad6[CACHE_LINE_SIZE]; /*!< Ensure list base nodes do not
share cache line with other fields */
trx_list_t trx_serial_list;
/*!< trx->no ordered List of
transactions in either TRX_PREPARED or
TRX_ACTIVE which have already been
assigned a serialization number */
- char pad7[64]; /*!< Ensure list base nodes do not
+ char pad7[CACHE_LINE_SIZE]; /*!< Ensure list base nodes do not
share cache line with other fields */
trx_rseg_t* const rseg_array[TRX_SYS_N_RSEGS];
/*!< Pointer array to rollback
diff --git a/storage/xtradb/include/trx0trx.h b/storage/xtradb/include/trx0trx.h
index 5fed8d68e50..2d7baff6b04 100644
--- a/storage/xtradb/include/trx0trx.h
+++ b/storage/xtradb/include/trx0trx.h
@@ -744,13 +744,10 @@ lock_rec_convert_impl_to_expl()) will access transactions associated
to other connections. The locks of transactions are protected by
lock_sys->mutex and sometimes by trx->mutex. */
-enum trx_abort_t {
+typedef enum {
TRX_SERVER_ABORT = 0,
-#ifdef WITH_WSREP
- TRX_WSREP_ABORT,
-#endif
- TRX_REPLICATION_ABORT
-};
+ TRX_WSREP_ABORT = 1
+} trx_abort_t;
struct trx_t{
ulint magic_n;
diff --git a/storage/xtradb/include/ut0counter.h b/storage/xtradb/include/ut0counter.h
index d2a6c1eb3e3..29d9625ae15 100644
--- a/storage/xtradb/include/ut0counter.h
+++ b/storage/xtradb/include/ut0counter.h
@@ -31,9 +31,19 @@ Created 2012/04/12 by Sunny Bains
#include "univ.i"
#include <string.h>
#include "os0thread.h"
-#include "os0sync.h"
#include "my_atomic.h"
+/** CPU cache line size */
+#ifndef UNIV_HOTBACKUP
+# ifdef CPU_LEVEL1_DCACHE_LINESIZE
+# define CACHE_LINE_SIZE CPU_LEVEL1_DCACHE_LINESIZE
+# else
+# error CPU_LEVEL1_DCACHE_LINESIZE is undefined
+# endif /* CPU_LEVEL1_DCACHE_LINESIZE */
+#else
+# define CACHE_LINE_SIZE 64
+#endif /* UNIV_HOTBACKUP */
+
/** Default number of slots to use in ib_counter_t */
#define IB_N_SLOTS 64
diff --git a/storage/xtradb/include/ut0crc32.h b/storage/xtradb/include/ut0crc32.h
index 49a1bd132de..75ebe6934e0 100644
--- a/storage/xtradb/include/ut0crc32.h
+++ b/storage/xtradb/include/ut0crc32.h
@@ -46,7 +46,6 @@ typedef ib_uint32_t (*ib_ut_crc32_t)(const byte* ptr, ulint len);
extern ib_ut_crc32_t ut_crc32;
-extern bool ut_crc32_sse2_enabled;
-extern bool ut_crc32_power8_enabled;
+extern const char *ut_crc32_implementation;
#endif /* ut0crc32_h */
diff --git a/storage/xtradb/include/ut0wqueue.h b/storage/xtradb/include/ut0wqueue.h
index 4b0014e3091..136bac1cc18 100644
--- a/storage/xtradb/include/ut0wqueue.h
+++ b/storage/xtradb/include/ut0wqueue.h
@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 2006, 2009, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2017, 2019, MariaDB Corporation.
+Copyright (c) 2017, MariaDB Corporation. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -66,10 +66,15 @@ void
ib_wqueue_add(ib_wqueue_t* wq, void* item, mem_heap_t* heap,
bool wq_locked = false);
-/** Check if queue is empty.
-@param wq wait queue
-@return whether the queue is empty */
-bool ib_wqueue_is_empty(ib_wqueue_t* wq);
+/********************************************************************
+Check if queue is empty. */
+
+ibool
+ib_wqueue_is_empty(
+/*===============*/
+ /* out: TRUE if queue empty
+ else FALSE */
+ const ib_wqueue_t* wq); /* in: work queue */
/****************************************************************//**
Wait for a work item to appear in the queue.
diff --git a/storage/xtradb/mysql-test/storage_engine/lock_concurrent.rdiff b/storage/xtradb/mysql-test/storage_engine/lock_concurrent.rdiff
index fe4a0087fa9..6b7a52046e2 100644
--- a/storage/xtradb/mysql-test/storage_engine/lock_concurrent.rdiff
+++ b/storage/xtradb/mysql-test/storage_engine/lock_concurrent.rdiff
@@ -1,7 +1,7 @@
--- suite/storage_engine/lock_concurrent.result 2012-06-24 23:55:19.539380000 +0400
+++ suite/storage_engine/lock_concurrent.reject 2012-07-15 17:50:21.279222746 +0400
-@@ -3,10 +3,19 @@
- LOCK TABLES t1 WRITE CONCURRENT, t1 AS t2 READ;
+@@ -4,6 +4,14 @@
+ connect con1,localhost,root,,;
SET lock_wait_timeout = 1;
LOCK TABLES t1 READ LOCAL;
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction
@@ -13,10 +13,13 @@
+# Also, this problem may cause a chain effect (more errors of different kinds in the test).
+# -------------------------------------------
UNLOCK TABLES;
+ connection default;
UNLOCK TABLES;
+@@ -11,6 +19,7 @@
LOCK TABLES t1 READ LOCAL;
+ connection default;
LOCK TABLES t1 WRITE CONCURRENT, t1 AS t2 READ;
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction
UNLOCK TABLES;
+ connection con1;
UNLOCK TABLES;
- DROP TABLE t1;
diff --git a/storage/xtradb/mysql-test/storage_engine/parts/suite.pm b/storage/xtradb/mysql-test/storage_engine/parts/suite.pm
new file mode 100644
index 00000000000..e186a532dcc
--- /dev/null
+++ b/storage/xtradb/mysql-test/storage_engine/parts/suite.pm
@@ -0,0 +1,8 @@
+package My::Suite::SE::XtraDB;
+
+@ISA = qw(My::Suite);
+
+return "Need XtraDB engine";
+
+bless { };
+
diff --git a/storage/xtradb/mysql-test/storage_engine/suite.pm b/storage/xtradb/mysql-test/storage_engine/suite.pm
new file mode 100644
index 00000000000..e186a532dcc
--- /dev/null
+++ b/storage/xtradb/mysql-test/storage_engine/suite.pm
@@ -0,0 +1,8 @@
+package My::Suite::SE::XtraDB;
+
+@ISA = qw(My::Suite);
+
+return "Need XtraDB engine";
+
+bless { };
+
diff --git a/storage/xtradb/mysql-test/storage_engine/tbl_opt_data_index_dir.rdiff b/storage/xtradb/mysql-test/storage_engine/tbl_opt_index_dir.rdiff
index e09e50b17ec..e09e50b17ec 100644
--- a/storage/xtradb/mysql-test/storage_engine/tbl_opt_data_index_dir.rdiff
+++ b/storage/xtradb/mysql-test/storage_engine/tbl_opt_index_dir.rdiff
diff --git a/storage/xtradb/mysql-test/storage_engine/trx/suite.pm b/storage/xtradb/mysql-test/storage_engine/trx/suite.pm
new file mode 100644
index 00000000000..e186a532dcc
--- /dev/null
+++ b/storage/xtradb/mysql-test/storage_engine/trx/suite.pm
@@ -0,0 +1,8 @@
+package My::Suite::SE::XtraDB;
+
+@ISA = qw(My::Suite);
+
+return "Need XtraDB engine";
+
+bless { };
+
diff --git a/storage/xtradb/os/os0sync.cc b/storage/xtradb/os/os0sync.cc
index e409529f132..675832c49e6 100644
--- a/storage/xtradb/os/os0sync.cc
+++ b/storage/xtradb/os/os0sync.cc
@@ -64,32 +64,6 @@ UNIV_INTERN mysql_pfs_key_t event_os_mutex_key;
UNIV_INTERN mysql_pfs_key_t os_mutex_key;
#endif
-/* On Windows (Vista and later), load function pointers for condition
-variable handling. Those functions are not available in prior versions,
-so we have to use them via runtime loading, as long as we support XP. */
-static void os_cond_module_init(void);
-
-#ifdef __WIN__
-/* Prototypes and function pointers for condition variable functions */
-typedef VOID (WINAPI* InitializeConditionVariableProc)
- (PCONDITION_VARIABLE ConditionVariable);
-static InitializeConditionVariableProc initialize_condition_variable;
-
-typedef BOOL (WINAPI* SleepConditionVariableCSProc)
- (PCONDITION_VARIABLE ConditionVariable,
- PCRITICAL_SECTION CriticalSection,
- DWORD dwMilliseconds);
-static SleepConditionVariableCSProc sleep_condition_variable;
-
-typedef VOID (WINAPI* WakeAllConditionVariableProc)
- (PCONDITION_VARIABLE ConditionVariable);
-static WakeAllConditionVariableProc wake_all_condition_variable;
-
-typedef VOID (WINAPI* WakeConditionVariableProc)
- (PCONDITION_VARIABLE ConditionVariable);
-static WakeConditionVariableProc wake_condition_variable;
-#endif
-
/*********************************************************//**
Initialitze condition variable */
UNIV_INLINE
@@ -101,8 +75,7 @@ os_cond_init(
ut_a(cond);
#ifdef __WIN__
- ut_a(initialize_condition_variable != NULL);
- initialize_condition_variable(cond);
+ InitializeConditionVariable(cond);
#else
ut_a(pthread_cond_init(cond, NULL) == 0);
#endif
@@ -130,9 +103,8 @@ os_cond_wait_timed(
BOOL ret;
DWORD err;
- ut_a(sleep_condition_variable != NULL);
- ret = sleep_condition_variable(cond, mutex, time_in_ms);
+ ret = SleepConditionVariableCS(cond, mutex, time_in_ms);
if (!ret) {
err = GetLastError();
@@ -187,8 +159,7 @@ os_cond_wait(
ut_a(mutex);
#ifdef __WIN__
- ut_a(sleep_condition_variable != NULL);
- ut_a(sleep_condition_variable(cond, mutex, INFINITE));
+ ut_a(SleepConditionVariableCS(cond, mutex, INFINITE));
#else
ut_a(pthread_cond_wait(cond, mutex) == 0);
#endif
@@ -205,8 +176,7 @@ os_cond_broadcast(
ut_a(cond);
#ifdef __WIN__
- ut_a(wake_all_condition_variable != NULL);
- wake_all_condition_variable(cond);
+ WakeAllConditionVariable(cond);
#else
ut_a(pthread_cond_broadcast(cond) == 0);
#endif
@@ -228,48 +198,12 @@ os_cond_destroy(
}
/*********************************************************//**
-On Windows (Vista and later), load function pointers for condition variable
-handling. Those functions are not available in prior versions, so we have to
-use them via runtime loading, as long as we support XP. */
-static
-void
-os_cond_module_init(void)
-/*=====================*/
-{
-#ifdef __WIN__
- HMODULE h_dll;
-
- if (!srv_use_native_conditions)
- return;
-
- h_dll = GetModuleHandle("kernel32");
-
- initialize_condition_variable = (InitializeConditionVariableProc)
- GetProcAddress(h_dll, "InitializeConditionVariable");
- sleep_condition_variable = (SleepConditionVariableCSProc)
- GetProcAddress(h_dll, "SleepConditionVariableCS");
- wake_all_condition_variable = (WakeAllConditionVariableProc)
- GetProcAddress(h_dll, "WakeAllConditionVariable");
- wake_condition_variable = (WakeConditionVariableProc)
- GetProcAddress(h_dll, "WakeConditionVariable");
-
- /* When using native condition variables, check function pointers */
- ut_a(initialize_condition_variable);
- ut_a(sleep_condition_variable);
- ut_a(wake_all_condition_variable);
- ut_a(wake_condition_variable);
-#endif
-}
-
-/*********************************************************//**
Initializes global event and OS 'slow' mutex lists. */
UNIV_INTERN
void
os_sync_init(void)
/*==============*/
{
- /* Now for Windows only */
- os_cond_module_init();
}
/** Create an event semaphore, i.e., a semaphore which may just have two
@@ -280,29 +214,15 @@ UNIV_INTERN
void
os_event_create(os_event_t event)
{
-#ifdef __WIN__
- if(!srv_use_native_conditions) {
-
- event->handle = CreateEvent(NULL, TRUE, FALSE, NULL);
- if (!event->handle) {
- fprintf(stderr,
- "InnoDB: Could not create a Windows event"
- " semaphore; Windows error %lu\n",
- (ulong) GetLastError());
- }
- } else /* Windows with condition variables */
-#endif
- {
#ifndef PFS_SKIP_EVENT_MUTEX
- os_fast_mutex_init(event_os_mutex_key, &event->os_mutex);
+ os_fast_mutex_init(event_os_mutex_key, &event->os_mutex);
#else
- os_fast_mutex_init(PFS_NOT_INSTRUMENTED, &event->os_mutex);
+ os_fast_mutex_init(PFS_NOT_INSTRUMENTED, &event->os_mutex);
#endif
- os_cond_init(&(event->cond_var));
+ os_cond_init(&(event->cond_var));
event->init_count_and_set();
- }
os_atomic_increment_ulint(&os_event_count, 1);
}
@@ -335,13 +255,6 @@ os_event_set(
{
ut_a(event);
-#ifdef __WIN__
- if (!srv_use_native_conditions) {
- ut_a(SetEvent(event->handle));
- return;
- }
-#endif
-
os_fast_mutex_lock(&(event->os_mutex));
if (UNIV_UNLIKELY(event->is_set())) {
@@ -373,13 +286,6 @@ os_event_reset(
ut_a(event);
-#ifdef __WIN__
- if(!srv_use_native_conditions) {
- ut_a(ResetEvent(event->handle));
- return(0);
- }
-#endif
-
os_fast_mutex_lock(&(event->os_mutex));
if (UNIV_UNLIKELY(!event->is_set())) {
@@ -405,16 +311,10 @@ os_event_free(
{
ut_a(event);
-#ifdef __WIN__
- if(!srv_use_native_conditions){
- ut_a(CloseHandle(event->handle));
- } else /*Windows with condition variables */
-#endif
- {
- os_fast_mutex_free(&(event->os_mutex));
- os_cond_destroy(&(event->cond_var));
- }
+ os_fast_mutex_free(&(event->os_mutex));
+
+ os_cond_destroy(&(event->cond_var));
os_atomic_decrement_ulint(&os_event_count, 1);
@@ -448,21 +348,6 @@ os_event_wait_low(
returned by previous call of
os_event_reset(). */
{
-#ifdef __WIN__
- if(!srv_use_native_conditions) {
- DWORD err;
-
- ut_a(event);
-
- UT_NOT_USED(reset_sig_count);
-
- /* Specify an infinite wait */
- err = WaitForSingleObject(event->handle, INFINITE);
-
- ut_a(err == WAIT_OBJECT_0);
- return;
- }
-#endif
os_fast_mutex_lock(&event->os_mutex);
@@ -501,36 +386,10 @@ os_event_wait_time_low(
#ifdef __WIN__
DWORD time_in_ms;
-
- if (!srv_use_native_conditions) {
- DWORD err;
-
- ut_a(event);
-
- if (time_in_usec != OS_SYNC_INFINITE_TIME) {
- time_in_ms = static_cast<DWORD>(time_in_usec / 1000);
- err = WaitForSingleObject(event->handle, time_in_ms);
- } else {
- err = WaitForSingleObject(event->handle, INFINITE);
- }
-
- if (err == WAIT_OBJECT_0) {
- return(0);
- } else if ((err == WAIT_TIMEOUT) || (err == ERROR_TIMEOUT)) {
- return(OS_SYNC_TIME_EXCEEDED);
- }
-
- ut_error;
- /* Dummy value to eliminate compiler warning. */
- return(42);
+ if (time_in_usec != OS_SYNC_INFINITE_TIME) {
+ time_in_ms = static_cast<DWORD>(time_in_usec / 1000);
} else {
- ut_a(sleep_condition_variable != NULL);
-
- if (time_in_usec != OS_SYNC_INFINITE_TIME) {
- time_in_ms = static_cast<DWORD>(time_in_usec / 1000);
- } else {
- time_in_ms = INFINITE;
- }
+ time_in_ms = INFINITE;
}
#else
struct timespec abstime;
diff --git a/storage/xtradb/row/row0import.cc b/storage/xtradb/row/row0import.cc
index f5e882f96fe..bafea49fae9 100644
--- a/storage/xtradb/row/row0import.cc
+++ b/storage/xtradb/row/row0import.cc
@@ -594,7 +594,7 @@ AbstractCallback::init(
const page_t* page = block->frame;
m_space_flags = fsp_header_get_flags(page);
- if (!fsp_flags_is_valid(m_space_flags, true)) {
+ if (!fsp_flags_is_valid(m_space_flags)) {
ulint cflags = fsp_flags_convert_from_101(m_space_flags);
if (cflags == ULINT_UNDEFINED) {
ib_logf(IB_LOG_LEVEL_ERROR,
diff --git a/storage/xtradb/row/row0merge.cc b/storage/xtradb/row/row0merge.cc
index a79a766bcaf..88237471d5f 100644
--- a/storage/xtradb/row/row0merge.cc
+++ b/storage/xtradb/row/row0merge.cc
@@ -1972,7 +1972,7 @@ write_buffers:
pct_cost :
((pct_cost * read_rows) / table_total_rows);
/* presenting 10.12% as 1012 integer */
- onlineddl_pct_progress = curr_progress * 100;
+ onlineddl_pct_progress = (ulint) (curr_progress * 100);
}
}
@@ -2560,7 +2560,7 @@ row_merge_sort(
pct_cost :
((pct_cost * merge_count) / total_merge_sort_count);
/* presenting 10.12% as 1012 integer */;
- onlineddl_pct_progress = (pct_progress + curr_progress) * 100;
+ onlineddl_pct_progress = (ulint) ((pct_progress + curr_progress) * 100);
}
if (error != DB_SUCCESS) {
@@ -2845,7 +2845,7 @@ row_merge_insert_index_tuples(
((pct_cost * inserted_rows) / table_total_rows);
/* presenting 10.12% as 1012 integer */;
- onlineddl_pct_progress = (pct_progress + curr_progress) * 100;
+ onlineddl_pct_progress = (ulint) ((pct_progress + curr_progress) * 100);
}
}
}
diff --git a/storage/xtradb/srv/srv0conc.cc b/storage/xtradb/srv/srv0conc.cc
index d2ededc3894..1a2a6e0342f 100644
--- a/storage/xtradb/srv/srv0conc.cc
+++ b/storage/xtradb/srv/srv0conc.cc
@@ -113,7 +113,7 @@ UNIV_INTERN mysql_pfs_key_t srv_conc_mutex_key;
/** Variables tracking the active and waiting threads. */
struct srv_conc_t {
- char pad[64 - (sizeof(ulint) + sizeof(lint))];
+ char pad[CACHE_LINE_SIZE - (sizeof(ulint) + sizeof(lint))];
/** Number of transactions that have declared_to_be_inside_innodb set.
It used to be a non-error for this value to drop below zero temporarily.
diff --git a/storage/xtradb/srv/srv0srv.cc b/storage/xtradb/srv/srv0srv.cc
index 84417b5888b..91234a0cca8 100644
--- a/storage/xtradb/srv/srv0srv.cc
+++ b/storage/xtradb/srv/srv0srv.cc
@@ -572,11 +572,6 @@ UNIV_INTERN ulint srv_available_undo_logs = 0;
/* Ensure status variables are on separate cache lines */
-#ifdef __powerpc__
-#define CACHE_LINE_SIZE 128
-#else
-#define CACHE_LINE_SIZE 64
-#endif
#define CACHE_ALIGNED MY_ATTRIBUTE((aligned (CACHE_LINE_SIZE)))
UNIV_INTERN byte
diff --git a/storage/xtradb/srv/srv0start.cc b/storage/xtradb/srv/srv0start.cc
index 153599391dc..6ea430699d4 100644
--- a/storage/xtradb/srv/srv0start.cc
+++ b/storage/xtradb/srv/srv0start.cc
@@ -1920,32 +1920,7 @@ innobase_start_or_create_for_mysql()
srv_startup_is_before_trx_rollback_phase = TRUE;
#ifdef __WIN__
- switch (os_get_os_version()) {
- case OS_WIN95:
- case OS_WIN31:
- case OS_WINNT:
- srv_use_native_conditions = FALSE;
- /* On Win 95, 98, ME, Win32 subsystem for Windows 3.1,
- and NT use simulated aio. In NT Windows provides async i/o,
- but when run in conjunction with InnoDB Hot Backup, it seemed
- to corrupt the data files. */
-
- srv_use_native_aio = FALSE;
- break;
-
- case OS_WIN2000:
- case OS_WINXP:
- /* On 2000 and XP, async IO is available, but no condition variables. */
- srv_use_native_aio = TRUE;
- srv_use_native_conditions = FALSE;
- break;
-
- default:
- /* Vista and later have both async IO and condition variables */
- srv_use_native_aio = TRUE;
- srv_use_native_conditions = TRUE;
- break;
- }
+ srv_use_native_aio = TRUE;
#elif defined(LINUX_NATIVE_AIO)
@@ -2047,13 +2022,7 @@ innobase_start_or_create_for_mysql()
srv_boot();
- if (ut_crc32_sse2_enabled) {
- ib_logf(IB_LOG_LEVEL_INFO, "Using SSE crc32 instructions");
- } else if (ut_crc32_power8_enabled) {
- ib_logf(IB_LOG_LEVEL_INFO, "Using POWER8 crc32 instructions");
- } else {
- ib_logf(IB_LOG_LEVEL_INFO, "Using generic crc32 instructions");
- }
+ ib_logf(IB_LOG_LEVEL_INFO, ut_crc32_implementation);
if (!srv_read_only_mode) {
diff --git a/storage/xtradb/ut/crc32_power8/crc32.S b/storage/xtradb/ut/crc32_power8/crc32.S
deleted file mode 100644
index b064ce3dc96..00000000000
--- a/storage/xtradb/ut/crc32_power8/crc32.S
+++ /dev/null
@@ -1,775 +0,0 @@
-/*
- * Calculate the checksum of data that is 16 byte aligned and a multiple of
- * 16 bytes.
- *
- * The first step is to reduce it to 1024 bits. We do this in 8 parallel
- * chunks in order to mask the latency of the vpmsum instructions. If we
- * have more than 32 kB of data to checksum we repeat this step multiple
- * times, passing in the previous 1024 bits.
- *
- * The next step is to reduce the 1024 bits to 64 bits. This step adds
- * 32 bits of 0s to the end - this matches what a CRC does. We just
- * calculate constants that land the data in this 32 bits.
- *
- * We then use fixed point Barrett reduction to compute a mod n over GF(2)
- * for n = CRC using POWER8 instructions. We use x = 32.
- *
- * http://en.wikipedia.org/wiki/Barrett_reduction
- *
- * Copyright (C) 2015 Anton Blanchard <anton@au.ibm.com>, IBM
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#ifdef __powerpc__
-
-#include <ppc-asm.h>
-#include "ppc-opcode.h"
-
-#undef toc
-
-#ifndef r1
-#define r1 1
-#endif
-
-#ifndef r2
-#define r2 2
-#endif
-
- .section .rodata
-.balign 16
-
-.byteswap_constant:
- /* byte reverse permute constant */
- .octa 0x0F0E0D0C0B0A09080706050403020100
-
-#define __ASSEMBLY__
-#include "crc32_constants.h"
-
- .text
-
-#if defined(__BIG_ENDIAN__) && defined(REFLECT)
-#define BYTESWAP_DATA
-#elif defined(__LITTLE_ENDIAN__) && !defined(REFLECT)
-#define BYTESWAP_DATA
-#else
-#undef BYTESWAP_DATA
-#endif
-
-#define off16 r25
-#define off32 r26
-#define off48 r27
-#define off64 r28
-#define off80 r29
-#define off96 r30
-#define off112 r31
-
-#define const1 v24
-#define const2 v25
-
-#define byteswap v26
-#define mask_32bit v27
-#define mask_64bit v28
-#define zeroes v29
-
-#ifdef BYTESWAP_DATA
-#define VPERM(A, B, C, D) vperm A, B, C, D
-#else
-#define VPERM(A, B, C, D)
-#endif
-
-/* unsigned int __crc32_vpmsum(unsigned int crc, void *p, unsigned long len) */
-FUNC_START(__crc32_vpmsum)
- std r31,-8(r1)
- std r30,-16(r1)
- std r29,-24(r1)
- std r28,-32(r1)
- std r27,-40(r1)
- std r26,-48(r1)
- std r25,-56(r1)
-
- li off16,16
- li off32,32
- li off48,48
- li off64,64
- li off80,80
- li off96,96
- li off112,112
- li r0,0
-
- /* Enough room for saving 10 non volatile VMX registers */
- subi r6,r1,56+10*16
- subi r7,r1,56+2*16
-
- stvx v20,0,r6
- stvx v21,off16,r6
- stvx v22,off32,r6
- stvx v23,off48,r6
- stvx v24,off64,r6
- stvx v25,off80,r6
- stvx v26,off96,r6
- stvx v27,off112,r6
- stvx v28,0,r7
- stvx v29,off16,r7
-
- mr r10,r3
-
- vxor zeroes,zeroes,zeroes
- vspltisw v0,-1
-
- vsldoi mask_32bit,zeroes,v0,4
- vsldoi mask_64bit,zeroes,v0,8
-
- /* Get the initial value into v8 */
- vxor v8,v8,v8
- MTVRD(v8, r3)
-#ifdef REFLECT
- vsldoi v8,zeroes,v8,8 /* shift into bottom 32 bits */
-#else
- vsldoi v8,v8,zeroes,4 /* shift into top 32 bits */
-#endif
-
-#ifdef BYTESWAP_DATA
- addis r3,r2,.byteswap_constant@toc@ha
- addi r3,r3,.byteswap_constant@toc@l
-
- lvx byteswap,0,r3
- addi r3,r3,16
-#endif
-
- cmpdi r5,256
- blt .Lshort
-
- rldicr r6,r5,0,56
-
- /* Checksum in blocks of MAX_SIZE */
-1: lis r7,MAX_SIZE@h
- ori r7,r7,MAX_SIZE@l
- mr r9,r7
- cmpd r6,r7
- bgt 2f
- mr r7,r6
-2: subf r6,r7,r6
-
- /* our main loop does 128 bytes at a time */
- srdi r7,r7,7
-
- /*
- * Work out the offset into the constants table to start at. Each
- * constant is 16 bytes, and it is used against 128 bytes of input
- * data - 128 / 16 = 8
- */
- sldi r8,r7,4
- srdi r9,r9,3
- subf r8,r8,r9
-
- /* We reduce our final 128 bytes in a separate step */
- addi r7,r7,-1
- mtctr r7
-
- addis r3,r2,.constants@toc@ha
- addi r3,r3,.constants@toc@l
-
- /* Find the start of our constants */
- add r3,r3,r8
-
- /* zero v0-v7 which will contain our checksums */
- vxor v0,v0,v0
- vxor v1,v1,v1
- vxor v2,v2,v2
- vxor v3,v3,v3
- vxor v4,v4,v4
- vxor v5,v5,v5
- vxor v6,v6,v6
- vxor v7,v7,v7
-
- lvx const1,0,r3
-
- /*
- * If we are looping back to consume more data we use the values
- * already in v16-v23.
- */
- cmpdi r0,1
- beq 2f
-
- /* First warm up pass */
- lvx v16,0,r4
- lvx v17,off16,r4
- VPERM(v16,v16,v16,byteswap)
- VPERM(v17,v17,v17,byteswap)
- lvx v18,off32,r4
- lvx v19,off48,r4
- VPERM(v18,v18,v18,byteswap)
- VPERM(v19,v19,v19,byteswap)
- lvx v20,off64,r4
- lvx v21,off80,r4
- VPERM(v20,v20,v20,byteswap)
- VPERM(v21,v21,v21,byteswap)
- lvx v22,off96,r4
- lvx v23,off112,r4
- VPERM(v22,v22,v22,byteswap)
- VPERM(v23,v23,v23,byteswap)
- addi r4,r4,8*16
-
- /* xor in initial value */
- vxor v16,v16,v8
-
-2: bdz .Lfirst_warm_up_done
-
- addi r3,r3,16
- lvx const2,0,r3
-
- /* Second warm up pass */
- VPMSUMD(v8,v16,const1)
- lvx v16,0,r4
- VPERM(v16,v16,v16,byteswap)
- ori r2,r2,0
-
- VPMSUMD(v9,v17,const1)
- lvx v17,off16,r4
- VPERM(v17,v17,v17,byteswap)
- ori r2,r2,0
-
- VPMSUMD(v10,v18,const1)
- lvx v18,off32,r4
- VPERM(v18,v18,v18,byteswap)
- ori r2,r2,0
-
- VPMSUMD(v11,v19,const1)
- lvx v19,off48,r4
- VPERM(v19,v19,v19,byteswap)
- ori r2,r2,0
-
- VPMSUMD(v12,v20,const1)
- lvx v20,off64,r4
- VPERM(v20,v20,v20,byteswap)
- ori r2,r2,0
-
- VPMSUMD(v13,v21,const1)
- lvx v21,off80,r4
- VPERM(v21,v21,v21,byteswap)
- ori r2,r2,0
-
- VPMSUMD(v14,v22,const1)
- lvx v22,off96,r4
- VPERM(v22,v22,v22,byteswap)
- ori r2,r2,0
-
- VPMSUMD(v15,v23,const1)
- lvx v23,off112,r4
- VPERM(v23,v23,v23,byteswap)
-
- addi r4,r4,8*16
-
- bdz .Lfirst_cool_down
-
- /*
- * main loop. We modulo schedule it such that it takes three iterations
- * to complete - first iteration load, second iteration vpmsum, third
- * iteration xor.
- */
- .balign 16
-4: lvx const1,0,r3
- addi r3,r3,16
- ori r2,r2,0
-
- vxor v0,v0,v8
- VPMSUMD(v8,v16,const2)
- lvx v16,0,r4
- VPERM(v16,v16,v16,byteswap)
- ori r2,r2,0
-
- vxor v1,v1,v9
- VPMSUMD(v9,v17,const2)
- lvx v17,off16,r4
- VPERM(v17,v17,v17,byteswap)
- ori r2,r2,0
-
- vxor v2,v2,v10
- VPMSUMD(v10,v18,const2)
- lvx v18,off32,r4
- VPERM(v18,v18,v18,byteswap)
- ori r2,r2,0
-
- vxor v3,v3,v11
- VPMSUMD(v11,v19,const2)
- lvx v19,off48,r4
- VPERM(v19,v19,v19,byteswap)
- lvx const2,0,r3
- ori r2,r2,0
-
- vxor v4,v4,v12
- VPMSUMD(v12,v20,const1)
- lvx v20,off64,r4
- VPERM(v20,v20,v20,byteswap)
- ori r2,r2,0
-
- vxor v5,v5,v13
- VPMSUMD(v13,v21,const1)
- lvx v21,off80,r4
- VPERM(v21,v21,v21,byteswap)
- ori r2,r2,0
-
- vxor v6,v6,v14
- VPMSUMD(v14,v22,const1)
- lvx v22,off96,r4
- VPERM(v22,v22,v22,byteswap)
- ori r2,r2,0
-
- vxor v7,v7,v15
- VPMSUMD(v15,v23,const1)
- lvx v23,off112,r4
- VPERM(v23,v23,v23,byteswap)
-
- addi r4,r4,8*16
-
- bdnz 4b
-
-.Lfirst_cool_down:
- /* First cool down pass */
- lvx const1,0,r3
- addi r3,r3,16
-
- vxor v0,v0,v8
- VPMSUMD(v8,v16,const1)
- ori r2,r2,0
-
- vxor v1,v1,v9
- VPMSUMD(v9,v17,const1)
- ori r2,r2,0
-
- vxor v2,v2,v10
- VPMSUMD(v10,v18,const1)
- ori r2,r2,0
-
- vxor v3,v3,v11
- VPMSUMD(v11,v19,const1)
- ori r2,r2,0
-
- vxor v4,v4,v12
- VPMSUMD(v12,v20,const1)
- ori r2,r2,0
-
- vxor v5,v5,v13
- VPMSUMD(v13,v21,const1)
- ori r2,r2,0
-
- vxor v6,v6,v14
- VPMSUMD(v14,v22,const1)
- ori r2,r2,0
-
- vxor v7,v7,v15
- VPMSUMD(v15,v23,const1)
- ori r2,r2,0
-
-.Lsecond_cool_down:
- /* Second cool down pass */
- vxor v0,v0,v8
- vxor v1,v1,v9
- vxor v2,v2,v10
- vxor v3,v3,v11
- vxor v4,v4,v12
- vxor v5,v5,v13
- vxor v6,v6,v14
- vxor v7,v7,v15
-
-#ifdef REFLECT
- /*
- * vpmsumd produces a 96 bit result in the least significant bits
- * of the register. Since we are bit reflected we have to shift it
- * left 32 bits so it occupies the least significant bits in the
- * bit reflected domain.
- */
- vsldoi v0,v0,zeroes,4
- vsldoi v1,v1,zeroes,4
- vsldoi v2,v2,zeroes,4
- vsldoi v3,v3,zeroes,4
- vsldoi v4,v4,zeroes,4
- vsldoi v5,v5,zeroes,4
- vsldoi v6,v6,zeroes,4
- vsldoi v7,v7,zeroes,4
-#endif
-
- /* xor with last 1024 bits */
- lvx v8,0,r4
- lvx v9,off16,r4
- VPERM(v8,v8,v8,byteswap)
- VPERM(v9,v9,v9,byteswap)
- lvx v10,off32,r4
- lvx v11,off48,r4
- VPERM(v10,v10,v10,byteswap)
- VPERM(v11,v11,v11,byteswap)
- lvx v12,off64,r4
- lvx v13,off80,r4
- VPERM(v12,v12,v12,byteswap)
- VPERM(v13,v13,v13,byteswap)
- lvx v14,off96,r4
- lvx v15,off112,r4
- VPERM(v14,v14,v14,byteswap)
- VPERM(v15,v15,v15,byteswap)
-
- addi r4,r4,8*16
-
- vxor v16,v0,v8
- vxor v17,v1,v9
- vxor v18,v2,v10
- vxor v19,v3,v11
- vxor v20,v4,v12
- vxor v21,v5,v13
- vxor v22,v6,v14
- vxor v23,v7,v15
-
- li r0,1
- cmpdi r6,0
- addi r6,r6,128
- bne 1b
-
- /* Work out how many bytes we have left */
- andi. r5,r5,127
-
- /* Calculate where in the constant table we need to start */
- subfic r6,r5,128
- add r3,r3,r6
-
- /* How many 16 byte chunks are in the tail */
- srdi r7,r5,4
- mtctr r7
-
- /*
- * Reduce the previously calculated 1024 bits to 64 bits, shifting
- * 32 bits to include the trailing 32 bits of zeros
- */
- lvx v0,0,r3
- lvx v1,off16,r3
- lvx v2,off32,r3
- lvx v3,off48,r3
- lvx v4,off64,r3
- lvx v5,off80,r3
- lvx v6,off96,r3
- lvx v7,off112,r3
- addi r3,r3,8*16
-
- VPMSUMW(v0,v16,v0)
- VPMSUMW(v1,v17,v1)
- VPMSUMW(v2,v18,v2)
- VPMSUMW(v3,v19,v3)
- VPMSUMW(v4,v20,v4)
- VPMSUMW(v5,v21,v5)
- VPMSUMW(v6,v22,v6)
- VPMSUMW(v7,v23,v7)
-
- /* Now reduce the tail (0 - 112 bytes) */
- cmpdi r7,0
- beq 1f
-
- lvx v16,0,r4
- lvx v17,0,r3
- VPERM(v16,v16,v16,byteswap)
- VPMSUMW(v16,v16,v17)
- vxor v0,v0,v16
- bdz 1f
-
- lvx v16,off16,r4
- lvx v17,off16,r3
- VPERM(v16,v16,v16,byteswap)
- VPMSUMW(v16,v16,v17)
- vxor v0,v0,v16
- bdz 1f
-
- lvx v16,off32,r4
- lvx v17,off32,r3
- VPERM(v16,v16,v16,byteswap)
- VPMSUMW(v16,v16,v17)
- vxor v0,v0,v16
- bdz 1f
-
- lvx v16,off48,r4
- lvx v17,off48,r3
- VPERM(v16,v16,v16,byteswap)
- VPMSUMW(v16,v16,v17)
- vxor v0,v0,v16
- bdz 1f
-
- lvx v16,off64,r4
- lvx v17,off64,r3
- VPERM(v16,v16,v16,byteswap)
- VPMSUMW(v16,v16,v17)
- vxor v0,v0,v16
- bdz 1f
-
- lvx v16,off80,r4
- lvx v17,off80,r3
- VPERM(v16,v16,v16,byteswap)
- VPMSUMW(v16,v16,v17)
- vxor v0,v0,v16
- bdz 1f
-
- lvx v16,off96,r4
- lvx v17,off96,r3
- VPERM(v16,v16,v16,byteswap)
- VPMSUMW(v16,v16,v17)
- vxor v0,v0,v16
-
- /* Now xor all the parallel chunks together */
-1: vxor v0,v0,v1
- vxor v2,v2,v3
- vxor v4,v4,v5
- vxor v6,v6,v7
-
- vxor v0,v0,v2
- vxor v4,v4,v6
-
- vxor v0,v0,v4
-
-.Lbarrett_reduction:
- /* Barrett constants */
- addis r3,r2,.barrett_constants@toc@ha
- addi r3,r3,.barrett_constants@toc@l
-
- lvx const1,0,r3
- lvx const2,off16,r3
-
- vsldoi v1,v0,v0,8
- vxor v0,v0,v1 /* xor two 64 bit results together */
-
-#ifdef REFLECT
- /* shift left one bit */
- vspltisb v1,1
- vsl v0,v0,v1
-#endif
-
- vand v0,v0,mask_64bit
-
-#ifndef REFLECT
- /*
- * Now for the Barrett reduction algorithm. The idea is to calculate q,
- * the multiple of our polynomial that we need to subtract. By
- * doing the computation 2x bits higher (ie 64 bits) and shifting the
- * result back down 2x bits, we round down to the nearest multiple.
- */
- VPMSUMD(v1,v0,const1) /* ma */
- vsldoi v1,zeroes,v1,8 /* q = floor(ma/(2^64)) */
- VPMSUMD(v1,v1,const2) /* qn */
- vxor v0,v0,v1 /* a - qn, subtraction is xor in GF(2) */
-
- /*
- * Get the result into r3. We need to shift it left 8 bytes:
- * V0 [ 0 1 2 X ]
- * V0 [ 0 X 2 3 ]
- */
- vsldoi v0,v0,zeroes,8 /* shift result into top 64 bits */
-#else
- /*
- * The reflected version of Barrett reduction. Instead of bit
- * reflecting our data (which is expensive to do), we bit reflect our
- * constants and our algorithm, which means the intermediate data in
- * our vector registers goes from 0-63 instead of 63-0. We can reflect
- * the algorithm because we don't carry in mod 2 arithmetic.
- */
- vand v1,v0,mask_32bit /* bottom 32 bits of a */
- VPMSUMD(v1,v1,const1) /* ma */
- vand v1,v1,mask_32bit /* bottom 32bits of ma */
- VPMSUMD(v1,v1,const2) /* qn */
- vxor v0,v0,v1 /* a - qn, subtraction is xor in GF(2) */
-
- /*
- * Since we are bit reflected, the result (ie the low 32 bits) is in
- * the high 32 bits. We just need to shift it left 4 bytes
- * V0 [ 0 1 X 3 ]
- * V0 [ 0 X 2 3 ]
- */
- vsldoi v0,v0,zeroes,4 /* shift result into top 64 bits of */
-#endif
-
-.Lout:
- subi r6,r1,56+10*16
- subi r7,r1,56+2*16
-
- lvx v20,0,r6
- lvx v21,off16,r6
- lvx v22,off32,r6
- lvx v23,off48,r6
- lvx v24,off64,r6
- lvx v25,off80,r6
- lvx v26,off96,r6
- lvx v27,off112,r6
- lvx v28,0,r7
- lvx v29,off16,r7
-
- /* Get it into r3 */
- MFVRD(r3, v0)
-
- ld r31,-8(r1)
- ld r30,-16(r1)
- ld r29,-24(r1)
- ld r28,-32(r1)
- ld r27,-40(r1)
- ld r26,-48(r1)
- ld r25,-56(r1)
-
- blr
-
-.Lfirst_warm_up_done:
- lvx const1,0,r3
- addi r3,r3,16
-
- VPMSUMD(v8,v16,const1)
- VPMSUMD(v9,v17,const1)
- VPMSUMD(v10,v18,const1)
- VPMSUMD(v11,v19,const1)
- VPMSUMD(v12,v20,const1)
- VPMSUMD(v13,v21,const1)
- VPMSUMD(v14,v22,const1)
- VPMSUMD(v15,v23,const1)
-
- b .Lsecond_cool_down
-
-.Lshort:
- cmpdi r5,0
- beq .Lzero
-
- addis r3,r2,.short_constants@toc@ha
- addi r3,r3,.short_constants@toc@l
-
- /* Calculate where in the constant table we need to start */
- subfic r6,r5,256
- add r3,r3,r6
-
- /* How many 16 byte chunks? */
- srdi r7,r5,4
- mtctr r7
-
- vxor v19,v19,v19
- vxor v20,v20,v20
-
- lvx v0,0,r4
- lvx v16,0,r3
- VPERM(v0,v0,v16,byteswap)
- vxor v0,v0,v8 /* xor in initial value */
- VPMSUMW(v0,v0,v16)
- bdz .Lv0
-
- lvx v1,off16,r4
- lvx v17,off16,r3
- VPERM(v1,v1,v17,byteswap)
- VPMSUMW(v1,v1,v17)
- bdz .Lv1
-
- lvx v2,off32,r4
- lvx v16,off32,r3
- VPERM(v2,v2,v16,byteswap)
- VPMSUMW(v2,v2,v16)
- bdz .Lv2
-
- lvx v3,off48,r4
- lvx v17,off48,r3
- VPERM(v3,v3,v17,byteswap)
- VPMSUMW(v3,v3,v17)
- bdz .Lv3
-
- lvx v4,off64,r4
- lvx v16,off64,r3
- VPERM(v4,v4,v16,byteswap)
- VPMSUMW(v4,v4,v16)
- bdz .Lv4
-
- lvx v5,off80,r4
- lvx v17,off80,r3
- VPERM(v5,v5,v17,byteswap)
- VPMSUMW(v5,v5,v17)
- bdz .Lv5
-
- lvx v6,off96,r4
- lvx v16,off96,r3
- VPERM(v6,v6,v16,byteswap)
- VPMSUMW(v6,v6,v16)
- bdz .Lv6
-
- lvx v7,off112,r4
- lvx v17,off112,r3
- VPERM(v7,v7,v17,byteswap)
- VPMSUMW(v7,v7,v17)
- bdz .Lv7
-
- addi r3,r3,128
- addi r4,r4,128
-
- lvx v8,0,r4
- lvx v16,0,r3
- VPERM(v8,v8,v16,byteswap)
- VPMSUMW(v8,v8,v16)
- bdz .Lv8
-
- lvx v9,off16,r4
- lvx v17,off16,r3
- VPERM(v9,v9,v17,byteswap)
- VPMSUMW(v9,v9,v17)
- bdz .Lv9
-
- lvx v10,off32,r4
- lvx v16,off32,r3
- VPERM(v10,v10,v16,byteswap)
- VPMSUMW(v10,v10,v16)
- bdz .Lv10
-
- lvx v11,off48,r4
- lvx v17,off48,r3
- VPERM(v11,v11,v17,byteswap)
- VPMSUMW(v11,v11,v17)
- bdz .Lv11
-
- lvx v12,off64,r4
- lvx v16,off64,r3
- VPERM(v12,v12,v16,byteswap)
- VPMSUMW(v12,v12,v16)
- bdz .Lv12
-
- lvx v13,off80,r4
- lvx v17,off80,r3
- VPERM(v13,v13,v17,byteswap)
- VPMSUMW(v13,v13,v17)
- bdz .Lv13
-
- lvx v14,off96,r4
- lvx v16,off96,r3
- VPERM(v14,v14,v16,byteswap)
- VPMSUMW(v14,v14,v16)
- bdz .Lv14
-
- lvx v15,off112,r4
- lvx v17,off112,r3
- VPERM(v15,v15,v17,byteswap)
- VPMSUMW(v15,v15,v17)
-
-.Lv15: vxor v19,v19,v15
-.Lv14: vxor v20,v20,v14
-.Lv13: vxor v19,v19,v13
-.Lv12: vxor v20,v20,v12
-.Lv11: vxor v19,v19,v11
-.Lv10: vxor v20,v20,v10
-.Lv9: vxor v19,v19,v9
-.Lv8: vxor v20,v20,v8
-.Lv7: vxor v19,v19,v7
-.Lv6: vxor v20,v20,v6
-.Lv5: vxor v19,v19,v5
-.Lv4: vxor v20,v20,v4
-.Lv3: vxor v19,v19,v3
-.Lv2: vxor v20,v20,v2
-.Lv1: vxor v19,v19,v1
-.Lv0: vxor v20,v20,v0
-
- vxor v0,v19,v20
-
- b .Lbarrett_reduction
-
-.Lzero:
- mr r3,r10
- blr
- b .Lout
-
-FUNC_END(__crc32_vpmsum)
-
-#endif /* __powerpc__ */
diff --git a/storage/xtradb/ut/crc32_power8/crc32_constants.h b/storage/xtradb/ut/crc32_power8/crc32_constants.h
deleted file mode 100644
index ba2592b829c..00000000000
--- a/storage/xtradb/ut/crc32_power8/crc32_constants.h
+++ /dev/null
@@ -1,911 +0,0 @@
-#ifndef CRC32_CONSTANTS_H
-#define CRC32_CONSTANTS_H
-
-#ifdef __powerpc__
-
-
-#define CRC 0x1edc6f41
-#define CRC_XOR
-#define REFLECT
-
-#ifndef __ASSEMBLY__
-#ifdef CRC_TABLE
-static const unsigned int crc_table[] = {
- 0x00000000, 0xf26b8303, 0xe13b70f7, 0x1350f3f4,
- 0xc79a971f, 0x35f1141c, 0x26a1e7e8, 0xd4ca64eb,
- 0x8ad958cf, 0x78b2dbcc, 0x6be22838, 0x9989ab3b,
- 0x4d43cfd0, 0xbf284cd3, 0xac78bf27, 0x5e133c24,
- 0x105ec76f, 0xe235446c, 0xf165b798, 0x030e349b,
- 0xd7c45070, 0x25afd373, 0x36ff2087, 0xc494a384,
- 0x9a879fa0, 0x68ec1ca3, 0x7bbcef57, 0x89d76c54,
- 0x5d1d08bf, 0xaf768bbc, 0xbc267848, 0x4e4dfb4b,
- 0x20bd8ede, 0xd2d60ddd, 0xc186fe29, 0x33ed7d2a,
- 0xe72719c1, 0x154c9ac2, 0x061c6936, 0xf477ea35,
- 0xaa64d611, 0x580f5512, 0x4b5fa6e6, 0xb93425e5,
- 0x6dfe410e, 0x9f95c20d, 0x8cc531f9, 0x7eaeb2fa,
- 0x30e349b1, 0xc288cab2, 0xd1d83946, 0x23b3ba45,
- 0xf779deae, 0x05125dad, 0x1642ae59, 0xe4292d5a,
- 0xba3a117e, 0x4851927d, 0x5b016189, 0xa96ae28a,
- 0x7da08661, 0x8fcb0562, 0x9c9bf696, 0x6ef07595,
- 0x417b1dbc, 0xb3109ebf, 0xa0406d4b, 0x522bee48,
- 0x86e18aa3, 0x748a09a0, 0x67dafa54, 0x95b17957,
- 0xcba24573, 0x39c9c670, 0x2a993584, 0xd8f2b687,
- 0x0c38d26c, 0xfe53516f, 0xed03a29b, 0x1f682198,
- 0x5125dad3, 0xa34e59d0, 0xb01eaa24, 0x42752927,
- 0x96bf4dcc, 0x64d4cecf, 0x77843d3b, 0x85efbe38,
- 0xdbfc821c, 0x2997011f, 0x3ac7f2eb, 0xc8ac71e8,
- 0x1c661503, 0xee0d9600, 0xfd5d65f4, 0x0f36e6f7,
- 0x61c69362, 0x93ad1061, 0x80fde395, 0x72966096,
- 0xa65c047d, 0x5437877e, 0x4767748a, 0xb50cf789,
- 0xeb1fcbad, 0x197448ae, 0x0a24bb5a, 0xf84f3859,
- 0x2c855cb2, 0xdeeedfb1, 0xcdbe2c45, 0x3fd5af46,
- 0x7198540d, 0x83f3d70e, 0x90a324fa, 0x62c8a7f9,
- 0xb602c312, 0x44694011, 0x5739b3e5, 0xa55230e6,
- 0xfb410cc2, 0x092a8fc1, 0x1a7a7c35, 0xe811ff36,
- 0x3cdb9bdd, 0xceb018de, 0xdde0eb2a, 0x2f8b6829,
- 0x82f63b78, 0x709db87b, 0x63cd4b8f, 0x91a6c88c,
- 0x456cac67, 0xb7072f64, 0xa457dc90, 0x563c5f93,
- 0x082f63b7, 0xfa44e0b4, 0xe9141340, 0x1b7f9043,
- 0xcfb5f4a8, 0x3dde77ab, 0x2e8e845f, 0xdce5075c,
- 0x92a8fc17, 0x60c37f14, 0x73938ce0, 0x81f80fe3,
- 0x55326b08, 0xa759e80b, 0xb4091bff, 0x466298fc,
- 0x1871a4d8, 0xea1a27db, 0xf94ad42f, 0x0b21572c,
- 0xdfeb33c7, 0x2d80b0c4, 0x3ed04330, 0xccbbc033,
- 0xa24bb5a6, 0x502036a5, 0x4370c551, 0xb11b4652,
- 0x65d122b9, 0x97baa1ba, 0x84ea524e, 0x7681d14d,
- 0x2892ed69, 0xdaf96e6a, 0xc9a99d9e, 0x3bc21e9d,
- 0xef087a76, 0x1d63f975, 0x0e330a81, 0xfc588982,
- 0xb21572c9, 0x407ef1ca, 0x532e023e, 0xa145813d,
- 0x758fe5d6, 0x87e466d5, 0x94b49521, 0x66df1622,
- 0x38cc2a06, 0xcaa7a905, 0xd9f75af1, 0x2b9cd9f2,
- 0xff56bd19, 0x0d3d3e1a, 0x1e6dcdee, 0xec064eed,
- 0xc38d26c4, 0x31e6a5c7, 0x22b65633, 0xd0ddd530,
- 0x0417b1db, 0xf67c32d8, 0xe52cc12c, 0x1747422f,
- 0x49547e0b, 0xbb3ffd08, 0xa86f0efc, 0x5a048dff,
- 0x8ecee914, 0x7ca56a17, 0x6ff599e3, 0x9d9e1ae0,
- 0xd3d3e1ab, 0x21b862a8, 0x32e8915c, 0xc083125f,
- 0x144976b4, 0xe622f5b7, 0xf5720643, 0x07198540,
- 0x590ab964, 0xab613a67, 0xb831c993, 0x4a5a4a90,
- 0x9e902e7b, 0x6cfbad78, 0x7fab5e8c, 0x8dc0dd8f,
- 0xe330a81a, 0x115b2b19, 0x020bd8ed, 0xf0605bee,
- 0x24aa3f05, 0xd6c1bc06, 0xc5914ff2, 0x37faccf1,
- 0x69e9f0d5, 0x9b8273d6, 0x88d28022, 0x7ab90321,
- 0xae7367ca, 0x5c18e4c9, 0x4f48173d, 0xbd23943e,
- 0xf36e6f75, 0x0105ec76, 0x12551f82, 0xe03e9c81,
- 0x34f4f86a, 0xc69f7b69, 0xd5cf889d, 0x27a40b9e,
- 0x79b737ba, 0x8bdcb4b9, 0x988c474d, 0x6ae7c44e,
- 0xbe2da0a5, 0x4c4623a6, 0x5f16d052, 0xad7d5351,};
-
-#endif
-#else
-#define MAX_SIZE 32768
-.constants:
-
- /* Reduce 262144 kbits to 1024 bits */
- /* x^261120 mod p(x)` << 1, x^261184 mod p(x)` << 1 */
- .octa 0x00000000b6ca9e20000000009c37c408
-
- /* x^260096 mod p(x)` << 1, x^260160 mod p(x)` << 1 */
- .octa 0x00000000350249a800000001b51df26c
-
- /* x^259072 mod p(x)` << 1, x^259136 mod p(x)` << 1 */
- .octa 0x00000001862dac54000000000724b9d0
-
- /* x^258048 mod p(x)` << 1, x^258112 mod p(x)` << 1 */
- .octa 0x00000001d87fb48c00000001c00532fe
-
- /* x^257024 mod p(x)` << 1, x^257088 mod p(x)` << 1 */
- .octa 0x00000001f39b699e00000000f05a9362
-
- /* x^256000 mod p(x)` << 1, x^256064 mod p(x)` << 1 */
- .octa 0x0000000101da11b400000001e1007970
-
- /* x^254976 mod p(x)` << 1, x^255040 mod p(x)` << 1 */
- .octa 0x00000001cab571e000000000a57366ee
-
- /* x^253952 mod p(x)` << 1, x^254016 mod p(x)` << 1 */
- .octa 0x00000000c7020cfe0000000192011284
-
- /* x^252928 mod p(x)` << 1, x^252992 mod p(x)` << 1 */
- .octa 0x00000000cdaed1ae0000000162716d9a
-
- /* x^251904 mod p(x)` << 1, x^251968 mod p(x)` << 1 */
- .octa 0x00000001e804effc00000000cd97ecde
-
- /* x^250880 mod p(x)` << 1, x^250944 mod p(x)` << 1 */
- .octa 0x0000000077c3ea3a0000000058812bc0
-
- /* x^249856 mod p(x)` << 1, x^249920 mod p(x)` << 1 */
- .octa 0x0000000068df31b40000000088b8c12e
-
- /* x^248832 mod p(x)` << 1, x^248896 mod p(x)` << 1 */
- .octa 0x00000000b059b6c200000001230b234c
-
- /* x^247808 mod p(x)` << 1, x^247872 mod p(x)` << 1 */
- .octa 0x0000000145fb8ed800000001120b416e
-
- /* x^246784 mod p(x)` << 1, x^246848 mod p(x)` << 1 */
- .octa 0x00000000cbc0916800000001974aecb0
-
- /* x^245760 mod p(x)` << 1, x^245824 mod p(x)` << 1 */
- .octa 0x000000005ceeedc2000000008ee3f226
-
- /* x^244736 mod p(x)` << 1, x^244800 mod p(x)` << 1 */
- .octa 0x0000000047d74e8600000001089aba9a
-
- /* x^243712 mod p(x)` << 1, x^243776 mod p(x)` << 1 */
- .octa 0x00000001407e9e220000000065113872
-
- /* x^242688 mod p(x)` << 1, x^242752 mod p(x)` << 1 */
- .octa 0x00000001da967bda000000005c07ec10
-
- /* x^241664 mod p(x)` << 1, x^241728 mod p(x)` << 1 */
- .octa 0x000000006c8983680000000187590924
-
- /* x^240640 mod p(x)` << 1, x^240704 mod p(x)` << 1 */
- .octa 0x00000000f2d14c9800000000e35da7c6
-
- /* x^239616 mod p(x)` << 1, x^239680 mod p(x)` << 1 */
- .octa 0x00000001993c6ad4000000000415855a
-
- /* x^238592 mod p(x)` << 1, x^238656 mod p(x)` << 1 */
- .octa 0x000000014683d1ac0000000073617758
-
- /* x^237568 mod p(x)` << 1, x^237632 mod p(x)` << 1 */
- .octa 0x00000001a7c93e6c0000000176021d28
-
- /* x^236544 mod p(x)` << 1, x^236608 mod p(x)` << 1 */
- .octa 0x000000010211e90a00000001c358fd0a
-
- /* x^235520 mod p(x)` << 1, x^235584 mod p(x)` << 1 */
- .octa 0x000000001119403e00000001ff7a2c18
-
- /* x^234496 mod p(x)` << 1, x^234560 mod p(x)` << 1 */
- .octa 0x000000001c3261aa00000000f2d9f7e4
-
- /* x^233472 mod p(x)` << 1, x^233536 mod p(x)` << 1 */
- .octa 0x000000014e37a634000000016cf1f9c8
-
- /* x^232448 mod p(x)` << 1, x^232512 mod p(x)` << 1 */
- .octa 0x0000000073786c0c000000010af9279a
-
- /* x^231424 mod p(x)` << 1, x^231488 mod p(x)` << 1 */
- .octa 0x000000011dc037f80000000004f101e8
-
- /* x^230400 mod p(x)` << 1, x^230464 mod p(x)` << 1 */
- .octa 0x0000000031433dfc0000000070bcf184
-
- /* x^229376 mod p(x)` << 1, x^229440 mod p(x)` << 1 */
- .octa 0x000000009cde8348000000000a8de642
-
- /* x^228352 mod p(x)` << 1, x^228416 mod p(x)` << 1 */
- .octa 0x0000000038d3c2a60000000062ea130c
-
- /* x^227328 mod p(x)` << 1, x^227392 mod p(x)` << 1 */
- .octa 0x000000011b25f26000000001eb31cbb2
-
- /* x^226304 mod p(x)` << 1, x^226368 mod p(x)` << 1 */
- .octa 0x000000001629e6f00000000170783448
-
- /* x^225280 mod p(x)` << 1, x^225344 mod p(x)` << 1 */
- .octa 0x0000000160838b4c00000001a684b4c6
-
- /* x^224256 mod p(x)` << 1, x^224320 mod p(x)` << 1 */
- .octa 0x000000007a44011c00000000253ca5b4
-
- /* x^223232 mod p(x)` << 1, x^223296 mod p(x)` << 1 */
- .octa 0x00000000226f417a0000000057b4b1e2
-
- /* x^222208 mod p(x)` << 1, x^222272 mod p(x)` << 1 */
- .octa 0x0000000045eb2eb400000000b6bd084c
-
- /* x^221184 mod p(x)` << 1, x^221248 mod p(x)` << 1 */
- .octa 0x000000014459d70c0000000123c2d592
-
- /* x^220160 mod p(x)` << 1, x^220224 mod p(x)` << 1 */
- .octa 0x00000001d406ed8200000000159dafce
-
- /* x^219136 mod p(x)` << 1, x^219200 mod p(x)` << 1 */
- .octa 0x0000000160c8e1a80000000127e1a64e
-
- /* x^218112 mod p(x)` << 1, x^218176 mod p(x)` << 1 */
- .octa 0x0000000027ba80980000000056860754
-
- /* x^217088 mod p(x)` << 1, x^217152 mod p(x)` << 1 */
- .octa 0x000000006d92d01800000001e661aae8
-
- /* x^216064 mod p(x)` << 1, x^216128 mod p(x)` << 1 */
- .octa 0x000000012ed7e3f200000000f82c6166
-
- /* x^215040 mod p(x)` << 1, x^215104 mod p(x)` << 1 */
- .octa 0x000000002dc8778800000000c4f9c7ae
-
- /* x^214016 mod p(x)` << 1, x^214080 mod p(x)` << 1 */
- .octa 0x0000000018240bb80000000074203d20
-
- /* x^212992 mod p(x)` << 1, x^213056 mod p(x)` << 1 */
- .octa 0x000000001ad381580000000198173052
-
- /* x^211968 mod p(x)` << 1, x^212032 mod p(x)` << 1 */
- .octa 0x00000001396b78f200000001ce8aba54
-
- /* x^210944 mod p(x)` << 1, x^211008 mod p(x)` << 1 */
- .octa 0x000000011a68133400000001850d5d94
-
- /* x^209920 mod p(x)` << 1, x^209984 mod p(x)` << 1 */
- .octa 0x000000012104732e00000001d609239c
-
- /* x^208896 mod p(x)` << 1, x^208960 mod p(x)` << 1 */
- .octa 0x00000000a140d90c000000001595f048
-
- /* x^207872 mod p(x)` << 1, x^207936 mod p(x)` << 1 */
- .octa 0x00000001b7215eda0000000042ccee08
-
- /* x^206848 mod p(x)` << 1, x^206912 mod p(x)` << 1 */
- .octa 0x00000001aaf1df3c000000010a389d74
-
- /* x^205824 mod p(x)` << 1, x^205888 mod p(x)` << 1 */
- .octa 0x0000000029d15b8a000000012a840da6
-
- /* x^204800 mod p(x)` << 1, x^204864 mod p(x)` << 1 */
- .octa 0x00000000f1a96922000000001d181c0c
-
- /* x^203776 mod p(x)` << 1, x^203840 mod p(x)` << 1 */
- .octa 0x00000001ac80d03c0000000068b7d1f6
-
- /* x^202752 mod p(x)` << 1, x^202816 mod p(x)` << 1 */
- .octa 0x000000000f11d56a000000005b0f14fc
-
- /* x^201728 mod p(x)` << 1, x^201792 mod p(x)` << 1 */
- .octa 0x00000001f1c022a20000000179e9e730
-
- /* x^200704 mod p(x)` << 1, x^200768 mod p(x)` << 1 */
- .octa 0x0000000173d00ae200000001ce1368d6
-
- /* x^199680 mod p(x)` << 1, x^199744 mod p(x)` << 1 */
- .octa 0x00000001d4ffe4ac0000000112c3a84c
-
- /* x^198656 mod p(x)` << 1, x^198720 mod p(x)` << 1 */
- .octa 0x000000016edc5ae400000000de940fee
-
- /* x^197632 mod p(x)` << 1, x^197696 mod p(x)` << 1 */
- .octa 0x00000001f1a0214000000000fe896b7e
-
- /* x^196608 mod p(x)` << 1, x^196672 mod p(x)` << 1 */
- .octa 0x00000000ca0b28a000000001f797431c
-
- /* x^195584 mod p(x)` << 1, x^195648 mod p(x)` << 1 */
- .octa 0x00000001928e30a20000000053e989ba
-
- /* x^194560 mod p(x)` << 1, x^194624 mod p(x)` << 1 */
- .octa 0x0000000097b1b002000000003920cd16
-
- /* x^193536 mod p(x)` << 1, x^193600 mod p(x)` << 1 */
- .octa 0x00000000b15bf90600000001e6f579b8
-
- /* x^192512 mod p(x)` << 1, x^192576 mod p(x)` << 1 */
- .octa 0x00000000411c5d52000000007493cb0a
-
- /* x^191488 mod p(x)` << 1, x^191552 mod p(x)` << 1 */
- .octa 0x00000001c36f330000000001bdd376d8
-
- /* x^190464 mod p(x)` << 1, x^190528 mod p(x)` << 1 */
- .octa 0x00000001119227e0000000016badfee6
-
- /* x^189440 mod p(x)` << 1, x^189504 mod p(x)` << 1 */
- .octa 0x00000000114d47020000000071de5c58
-
- /* x^188416 mod p(x)` << 1, x^188480 mod p(x)` << 1 */
- .octa 0x00000000458b5b9800000000453f317c
-
- /* x^187392 mod p(x)` << 1, x^187456 mod p(x)` << 1 */
- .octa 0x000000012e31fb8e0000000121675cce
-
- /* x^186368 mod p(x)` << 1, x^186432 mod p(x)` << 1 */
- .octa 0x000000005cf619d800000001f409ee92
-
- /* x^185344 mod p(x)` << 1, x^185408 mod p(x)` << 1 */
- .octa 0x0000000063f4d8b200000000f36b9c88
-
- /* x^184320 mod p(x)` << 1, x^184384 mod p(x)` << 1 */
- .octa 0x000000004138dc8a0000000036b398f4
-
- /* x^183296 mod p(x)` << 1, x^183360 mod p(x)` << 1 */
- .octa 0x00000001d29ee8e000000001748f9adc
-
- /* x^182272 mod p(x)` << 1, x^182336 mod p(x)` << 1 */
- .octa 0x000000006a08ace800000001be94ec00
-
- /* x^181248 mod p(x)` << 1, x^181312 mod p(x)` << 1 */
- .octa 0x0000000127d4201000000000b74370d6
-
- /* x^180224 mod p(x)` << 1, x^180288 mod p(x)` << 1 */
- .octa 0x0000000019d76b6200000001174d0b98
-
- /* x^179200 mod p(x)` << 1, x^179264 mod p(x)` << 1 */
- .octa 0x00000001b1471f6e00000000befc06a4
-
- /* x^178176 mod p(x)` << 1, x^178240 mod p(x)` << 1 */
- .octa 0x00000001f64c19cc00000001ae125288
-
- /* x^177152 mod p(x)` << 1, x^177216 mod p(x)` << 1 */
- .octa 0x00000000003c0ea00000000095c19b34
-
- /* x^176128 mod p(x)` << 1, x^176192 mod p(x)` << 1 */
- .octa 0x000000014d73abf600000001a78496f2
-
- /* x^175104 mod p(x)` << 1, x^175168 mod p(x)` << 1 */
- .octa 0x00000001620eb84400000001ac5390a0
-
- /* x^174080 mod p(x)` << 1, x^174144 mod p(x)` << 1 */
- .octa 0x0000000147655048000000002a80ed6e
-
- /* x^173056 mod p(x)` << 1, x^173120 mod p(x)` << 1 */
- .octa 0x0000000067b5077e00000001fa9b0128
-
- /* x^172032 mod p(x)` << 1, x^172096 mod p(x)` << 1 */
- .octa 0x0000000010ffe20600000001ea94929e
-
- /* x^171008 mod p(x)` << 1, x^171072 mod p(x)` << 1 */
- .octa 0x000000000fee8f1e0000000125f4305c
-
- /* x^169984 mod p(x)` << 1, x^170048 mod p(x)` << 1 */
- .octa 0x00000001da26fbae00000001471e2002
-
- /* x^168960 mod p(x)` << 1, x^169024 mod p(x)` << 1 */
- .octa 0x00000001b3a8bd880000000132d2253a
-
- /* x^167936 mod p(x)` << 1, x^168000 mod p(x)` << 1 */
- .octa 0x00000000e8f3898e00000000f26b3592
-
- /* x^166912 mod p(x)` << 1, x^166976 mod p(x)` << 1 */
- .octa 0x00000000b0d0d28c00000000bc8b67b0
-
- /* x^165888 mod p(x)` << 1, x^165952 mod p(x)` << 1 */
- .octa 0x0000000030f2a798000000013a826ef2
-
- /* x^164864 mod p(x)` << 1, x^164928 mod p(x)` << 1 */
- .octa 0x000000000fba10020000000081482c84
-
- /* x^163840 mod p(x)` << 1, x^163904 mod p(x)` << 1 */
- .octa 0x00000000bdb9bd7200000000e77307c2
-
- /* x^162816 mod p(x)` << 1, x^162880 mod p(x)` << 1 */
- .octa 0x0000000075d3bf5a00000000d4a07ec8
-
- /* x^161792 mod p(x)` << 1, x^161856 mod p(x)` << 1 */
- .octa 0x00000000ef1f98a00000000017102100
-
- /* x^160768 mod p(x)` << 1, x^160832 mod p(x)` << 1 */
- .octa 0x00000000689c760200000000db406486
-
- /* x^159744 mod p(x)` << 1, x^159808 mod p(x)` << 1 */
- .octa 0x000000016d5fa5fe0000000192db7f88
-
- /* x^158720 mod p(x)` << 1, x^158784 mod p(x)` << 1 */
- .octa 0x00000001d0d2b9ca000000018bf67b1e
-
- /* x^157696 mod p(x)` << 1, x^157760 mod p(x)` << 1 */
- .octa 0x0000000041e7b470000000007c09163e
-
- /* x^156672 mod p(x)` << 1, x^156736 mod p(x)` << 1 */
- .octa 0x00000001cbb6495e000000000adac060
-
- /* x^155648 mod p(x)` << 1, x^155712 mod p(x)` << 1 */
- .octa 0x000000010052a0b000000000bd8316ae
-
- /* x^154624 mod p(x)` << 1, x^154688 mod p(x)` << 1 */
- .octa 0x00000001d8effb5c000000019f09ab54
-
- /* x^153600 mod p(x)` << 1, x^153664 mod p(x)` << 1 */
- .octa 0x00000001d969853c0000000125155542
-
- /* x^152576 mod p(x)` << 1, x^152640 mod p(x)` << 1 */
- .octa 0x00000000523ccce2000000018fdb5882
-
- /* x^151552 mod p(x)` << 1, x^151616 mod p(x)` << 1 */
- .octa 0x000000001e2436bc00000000e794b3f4
-
- /* x^150528 mod p(x)` << 1, x^150592 mod p(x)` << 1 */
- .octa 0x00000000ddd1c3a2000000016f9bb022
-
- /* x^149504 mod p(x)` << 1, x^149568 mod p(x)` << 1 */
- .octa 0x0000000019fcfe3800000000290c9978
-
- /* x^148480 mod p(x)` << 1, x^148544 mod p(x)` << 1 */
- .octa 0x00000001ce95db640000000083c0f350
-
- /* x^147456 mod p(x)` << 1, x^147520 mod p(x)` << 1 */
- .octa 0x00000000af5828060000000173ea6628
-
- /* x^146432 mod p(x)` << 1, x^146496 mod p(x)` << 1 */
- .octa 0x00000001006388f600000001c8b4e00a
-
- /* x^145408 mod p(x)` << 1, x^145472 mod p(x)` << 1 */
- .octa 0x0000000179eca00a00000000de95d6aa
-
- /* x^144384 mod p(x)` << 1, x^144448 mod p(x)` << 1 */
- .octa 0x0000000122410a6a000000010b7f7248
-
- /* x^143360 mod p(x)` << 1, x^143424 mod p(x)` << 1 */
- .octa 0x000000004288e87c00000001326e3a06
-
- /* x^142336 mod p(x)` << 1, x^142400 mod p(x)` << 1 */
- .octa 0x000000016c5490da00000000bb62c2e6
-
- /* x^141312 mod p(x)` << 1, x^141376 mod p(x)` << 1 */
- .octa 0x00000000d1c71f6e0000000156a4b2c2
-
- /* x^140288 mod p(x)` << 1, x^140352 mod p(x)` << 1 */
- .octa 0x00000001b4ce08a6000000011dfe763a
-
- /* x^139264 mod p(x)` << 1, x^139328 mod p(x)` << 1 */
- .octa 0x00000001466ba60c000000007bcca8e2
-
- /* x^138240 mod p(x)` << 1, x^138304 mod p(x)` << 1 */
- .octa 0x00000001f6c488a40000000186118faa
-
- /* x^137216 mod p(x)` << 1, x^137280 mod p(x)` << 1 */
- .octa 0x000000013bfb06820000000111a65a88
-
- /* x^136192 mod p(x)` << 1, x^136256 mod p(x)` << 1 */
- .octa 0x00000000690e9e54000000003565e1c4
-
- /* x^135168 mod p(x)` << 1, x^135232 mod p(x)` << 1 */
- .octa 0x00000000281346b6000000012ed02a82
-
- /* x^134144 mod p(x)` << 1, x^134208 mod p(x)` << 1 */
- .octa 0x000000015646402400000000c486ecfc
-
- /* x^133120 mod p(x)` << 1, x^133184 mod p(x)` << 1 */
- .octa 0x000000016063a8dc0000000001b951b2
-
- /* x^132096 mod p(x)` << 1, x^132160 mod p(x)` << 1 */
- .octa 0x0000000116a663620000000048143916
-
- /* x^131072 mod p(x)` << 1, x^131136 mod p(x)` << 1 */
- .octa 0x000000017e8aa4d200000001dc2ae124
-
- /* x^130048 mod p(x)` << 1, x^130112 mod p(x)` << 1 */
- .octa 0x00000001728eb10c00000001416c58d6
-
- /* x^129024 mod p(x)` << 1, x^129088 mod p(x)` << 1 */
- .octa 0x00000001b08fd7fa00000000a479744a
-
- /* x^128000 mod p(x)` << 1, x^128064 mod p(x)` << 1 */
- .octa 0x00000001092a16e80000000096ca3a26
-
- /* x^126976 mod p(x)` << 1, x^127040 mod p(x)` << 1 */
- .octa 0x00000000a505637c00000000ff223d4e
-
- /* x^125952 mod p(x)` << 1, x^126016 mod p(x)` << 1 */
- .octa 0x00000000d94869b2000000010e84da42
-
- /* x^124928 mod p(x)` << 1, x^124992 mod p(x)` << 1 */
- .octa 0x00000001c8b203ae00000001b61ba3d0
-
- /* x^123904 mod p(x)` << 1, x^123968 mod p(x)` << 1 */
- .octa 0x000000005704aea000000000680f2de8
-
- /* x^122880 mod p(x)` << 1, x^122944 mod p(x)` << 1 */
- .octa 0x000000012e295fa2000000008772a9a8
-
- /* x^121856 mod p(x)` << 1, x^121920 mod p(x)` << 1 */
- .octa 0x000000011d0908bc0000000155f295bc
-
- /* x^120832 mod p(x)` << 1, x^120896 mod p(x)` << 1 */
- .octa 0x0000000193ed97ea00000000595f9282
-
- /* x^119808 mod p(x)` << 1, x^119872 mod p(x)` << 1 */
- .octa 0x000000013a0f1c520000000164b1c25a
-
- /* x^118784 mod p(x)` << 1, x^118848 mod p(x)` << 1 */
- .octa 0x000000010c2c40c000000000fbd67c50
-
- /* x^117760 mod p(x)` << 1, x^117824 mod p(x)` << 1 */
- .octa 0x00000000ff6fac3e0000000096076268
-
- /* x^116736 mod p(x)` << 1, x^116800 mod p(x)` << 1 */
- .octa 0x000000017b3609c000000001d288e4cc
-
- /* x^115712 mod p(x)` << 1, x^115776 mod p(x)` << 1 */
- .octa 0x0000000088c8c92200000001eaac1bdc
-
- /* x^114688 mod p(x)` << 1, x^114752 mod p(x)` << 1 */
- .octa 0x00000001751baae600000001f1ea39e2
-
- /* x^113664 mod p(x)` << 1, x^113728 mod p(x)` << 1 */
- .octa 0x000000010795297200000001eb6506fc
-
- /* x^112640 mod p(x)` << 1, x^112704 mod p(x)` << 1 */
- .octa 0x0000000162b00abe000000010f806ffe
-
- /* x^111616 mod p(x)` << 1, x^111680 mod p(x)` << 1 */
- .octa 0x000000000d7b404c000000010408481e
-
- /* x^110592 mod p(x)` << 1, x^110656 mod p(x)` << 1 */
- .octa 0x00000000763b13d40000000188260534
-
- /* x^109568 mod p(x)` << 1, x^109632 mod p(x)` << 1 */
- .octa 0x00000000f6dc22d80000000058fc73e0
-
- /* x^108544 mod p(x)` << 1, x^108608 mod p(x)` << 1 */
- .octa 0x000000007daae06000000000391c59b8
-
- /* x^107520 mod p(x)` << 1, x^107584 mod p(x)` << 1 */
- .octa 0x000000013359ab7c000000018b638400
-
- /* x^106496 mod p(x)` << 1, x^106560 mod p(x)` << 1 */
- .octa 0x000000008add438a000000011738f5c4
-
- /* x^105472 mod p(x)` << 1, x^105536 mod p(x)` << 1 */
- .octa 0x00000001edbefdea000000008cf7c6da
-
- /* x^104448 mod p(x)` << 1, x^104512 mod p(x)` << 1 */
- .octa 0x000000004104e0f800000001ef97fb16
-
- /* x^103424 mod p(x)` << 1, x^103488 mod p(x)` << 1 */
- .octa 0x00000000b48a82220000000102130e20
-
- /* x^102400 mod p(x)` << 1, x^102464 mod p(x)` << 1 */
- .octa 0x00000001bcb4684400000000db968898
-
- /* x^101376 mod p(x)` << 1, x^101440 mod p(x)` << 1 */
- .octa 0x000000013293ce0a00000000b5047b5e
-
- /* x^100352 mod p(x)` << 1, x^100416 mod p(x)` << 1 */
- .octa 0x00000001710d0844000000010b90fdb2
-
- /* x^99328 mod p(x)` << 1, x^99392 mod p(x)` << 1 */
- .octa 0x0000000117907f6e000000004834a32e
-
- /* x^98304 mod p(x)` << 1, x^98368 mod p(x)` << 1 */
- .octa 0x0000000087ddf93e0000000059c8f2b0
-
- /* x^97280 mod p(x)` << 1, x^97344 mod p(x)` << 1 */
- .octa 0x000000005970e9b00000000122cec508
-
- /* x^96256 mod p(x)` << 1, x^96320 mod p(x)` << 1 */
- .octa 0x0000000185b2b7d0000000000a330cda
-
- /* x^95232 mod p(x)` << 1, x^95296 mod p(x)` << 1 */
- .octa 0x00000001dcee0efc000000014a47148c
-
- /* x^94208 mod p(x)` << 1, x^94272 mod p(x)` << 1 */
- .octa 0x0000000030da27220000000042c61cb8
-
- /* x^93184 mod p(x)` << 1, x^93248 mod p(x)` << 1 */
- .octa 0x000000012f925a180000000012fe6960
-
- /* x^92160 mod p(x)` << 1, x^92224 mod p(x)` << 1 */
- .octa 0x00000000dd2e357c00000000dbda2c20
-
- /* x^91136 mod p(x)` << 1, x^91200 mod p(x)` << 1 */
- .octa 0x00000000071c80de000000011122410c
-
- /* x^90112 mod p(x)` << 1, x^90176 mod p(x)` << 1 */
- .octa 0x000000011513140a00000000977b2070
-
- /* x^89088 mod p(x)` << 1, x^89152 mod p(x)` << 1 */
- .octa 0x00000001df876e8e000000014050438e
-
- /* x^88064 mod p(x)` << 1, x^88128 mod p(x)` << 1 */
- .octa 0x000000015f81d6ce0000000147c840e8
-
- /* x^87040 mod p(x)` << 1, x^87104 mod p(x)` << 1 */
- .octa 0x000000019dd94dbe00000001cc7c88ce
-
- /* x^86016 mod p(x)` << 1, x^86080 mod p(x)` << 1 */
- .octa 0x00000001373d206e00000001476b35a4
-
- /* x^84992 mod p(x)` << 1, x^85056 mod p(x)` << 1 */
- .octa 0x00000000668ccade000000013d52d508
-
- /* x^83968 mod p(x)` << 1, x^84032 mod p(x)` << 1 */
- .octa 0x00000001b192d268000000008e4be32e
-
- /* x^82944 mod p(x)` << 1, x^83008 mod p(x)` << 1 */
- .octa 0x00000000e30f3a7800000000024120fe
-
- /* x^81920 mod p(x)` << 1, x^81984 mod p(x)` << 1 */
- .octa 0x000000010ef1f7bc00000000ddecddb4
-
- /* x^80896 mod p(x)` << 1, x^80960 mod p(x)` << 1 */
- .octa 0x00000001f5ac738000000000d4d403bc
-
- /* x^79872 mod p(x)` << 1, x^79936 mod p(x)` << 1 */
- .octa 0x000000011822ea7000000001734b89aa
-
- /* x^78848 mod p(x)` << 1, x^78912 mod p(x)` << 1 */
- .octa 0x00000000c3a33848000000010e7a58d6
-
- /* x^77824 mod p(x)` << 1, x^77888 mod p(x)` << 1 */
- .octa 0x00000001bd151c2400000001f9f04e9c
-
- /* x^76800 mod p(x)` << 1, x^76864 mod p(x)` << 1 */
- .octa 0x0000000056002d7600000000b692225e
-
- /* x^75776 mod p(x)` << 1, x^75840 mod p(x)` << 1 */
- .octa 0x000000014657c4f4000000019b8d3f3e
-
- /* x^74752 mod p(x)` << 1, x^74816 mod p(x)` << 1 */
- .octa 0x0000000113742d7c00000001a874f11e
-
- /* x^73728 mod p(x)` << 1, x^73792 mod p(x)` << 1 */
- .octa 0x000000019c5920ba000000010d5a4254
-
- /* x^72704 mod p(x)` << 1, x^72768 mod p(x)` << 1 */
- .octa 0x000000005216d2d600000000bbb2f5d6
-
- /* x^71680 mod p(x)` << 1, x^71744 mod p(x)` << 1 */
- .octa 0x0000000136f5ad8a0000000179cc0e36
-
- /* x^70656 mod p(x)` << 1, x^70720 mod p(x)` << 1 */
- .octa 0x000000018b07beb600000001dca1da4a
-
- /* x^69632 mod p(x)` << 1, x^69696 mod p(x)` << 1 */
- .octa 0x00000000db1e93b000000000feb1a192
-
- /* x^68608 mod p(x)` << 1, x^68672 mod p(x)` << 1 */
- .octa 0x000000000b96fa3a00000000d1eeedd6
-
- /* x^67584 mod p(x)` << 1, x^67648 mod p(x)` << 1 */
- .octa 0x00000001d9968af0000000008fad9bb4
-
- /* x^66560 mod p(x)` << 1, x^66624 mod p(x)` << 1 */
- .octa 0x000000000e4a77a200000001884938e4
-
- /* x^65536 mod p(x)` << 1, x^65600 mod p(x)` << 1 */
- .octa 0x00000000508c2ac800000001bc2e9bc0
-
- /* x^64512 mod p(x)` << 1, x^64576 mod p(x)` << 1 */
- .octa 0x0000000021572a8000000001f9658a68
-
- /* x^63488 mod p(x)` << 1, x^63552 mod p(x)` << 1 */
- .octa 0x00000001b859daf2000000001b9224fc
-
- /* x^62464 mod p(x)` << 1, x^62528 mod p(x)` << 1 */
- .octa 0x000000016f7884740000000055b2fb84
-
- /* x^61440 mod p(x)` << 1, x^61504 mod p(x)` << 1 */
- .octa 0x00000001b438810e000000018b090348
-
- /* x^60416 mod p(x)` << 1, x^60480 mod p(x)` << 1 */
- .octa 0x0000000095ddc6f2000000011ccbd5ea
-
- /* x^59392 mod p(x)` << 1, x^59456 mod p(x)` << 1 */
- .octa 0x00000001d977c20c0000000007ae47f8
-
- /* x^58368 mod p(x)` << 1, x^58432 mod p(x)` << 1 */
- .octa 0x00000000ebedb99a0000000172acbec0
-
- /* x^57344 mod p(x)` << 1, x^57408 mod p(x)` << 1 */
- .octa 0x00000001df9e9e9200000001c6e3ff20
-
- /* x^56320 mod p(x)` << 1, x^56384 mod p(x)` << 1 */
- .octa 0x00000001a4a3f95200000000e1b38744
-
- /* x^55296 mod p(x)` << 1, x^55360 mod p(x)` << 1 */
- .octa 0x00000000e2f5122000000000791585b2
-
- /* x^54272 mod p(x)` << 1, x^54336 mod p(x)` << 1 */
- .octa 0x000000004aa01f3e00000000ac53b894
-
- /* x^53248 mod p(x)` << 1, x^53312 mod p(x)` << 1 */
- .octa 0x00000000b3e90a5800000001ed5f2cf4
-
- /* x^52224 mod p(x)` << 1, x^52288 mod p(x)` << 1 */
- .octa 0x000000000c9ca2aa00000001df48b2e0
-
- /* x^51200 mod p(x)` << 1, x^51264 mod p(x)` << 1 */
- .octa 0x000000015168231600000000049c1c62
-
- /* x^50176 mod p(x)` << 1, x^50240 mod p(x)` << 1 */
- .octa 0x0000000036fce78c000000017c460c12
-
- /* x^49152 mod p(x)` << 1, x^49216 mod p(x)` << 1 */
- .octa 0x000000009037dc10000000015be4da7e
-
- /* x^48128 mod p(x)` << 1, x^48192 mod p(x)` << 1 */
- .octa 0x00000000d3298582000000010f38f668
-
- /* x^47104 mod p(x)` << 1, x^47168 mod p(x)` << 1 */
- .octa 0x00000001b42e8ad60000000039f40a00
-
- /* x^46080 mod p(x)` << 1, x^46144 mod p(x)` << 1 */
- .octa 0x00000000142a983800000000bd4c10c4
-
- /* x^45056 mod p(x)` << 1, x^45120 mod p(x)` << 1 */
- .octa 0x0000000109c7f1900000000042db1d98
-
- /* x^44032 mod p(x)` << 1, x^44096 mod p(x)` << 1 */
- .octa 0x0000000056ff931000000001c905bae6
-
- /* x^43008 mod p(x)` << 1, x^43072 mod p(x)` << 1 */
- .octa 0x00000001594513aa00000000069d40ea
-
- /* x^41984 mod p(x)` << 1, x^42048 mod p(x)` << 1 */
- .octa 0x00000001e3b5b1e8000000008e4fbad0
-
- /* x^40960 mod p(x)` << 1, x^41024 mod p(x)` << 1 */
- .octa 0x000000011dd5fc080000000047bedd46
-
- /* x^39936 mod p(x)` << 1, x^40000 mod p(x)` << 1 */
- .octa 0x00000001675f0cc20000000026396bf8
-
- /* x^38912 mod p(x)` << 1, x^38976 mod p(x)` << 1 */
- .octa 0x00000000d1c8dd4400000000379beb92
-
- /* x^37888 mod p(x)` << 1, x^37952 mod p(x)` << 1 */
- .octa 0x0000000115ebd3d8000000000abae54a
-
- /* x^36864 mod p(x)` << 1, x^36928 mod p(x)` << 1 */
- .octa 0x00000001ecbd0dac0000000007e6a128
-
- /* x^35840 mod p(x)` << 1, x^35904 mod p(x)` << 1 */
- .octa 0x00000000cdf67af2000000000ade29d2
-
- /* x^34816 mod p(x)` << 1, x^34880 mod p(x)` << 1 */
- .octa 0x000000004c01ff4c00000000f974c45c
-
- /* x^33792 mod p(x)` << 1, x^33856 mod p(x)` << 1 */
- .octa 0x00000000f2d8657e00000000e77ac60a
-
- /* x^32768 mod p(x)` << 1, x^32832 mod p(x)` << 1 */
- .octa 0x000000006bae74c40000000145895816
-
- /* x^31744 mod p(x)` << 1, x^31808 mod p(x)` << 1 */
- .octa 0x0000000152af8aa00000000038e362be
-
- /* x^30720 mod p(x)` << 1, x^30784 mod p(x)` << 1 */
- .octa 0x0000000004663802000000007f991a64
-
- /* x^29696 mod p(x)` << 1, x^29760 mod p(x)` << 1 */
- .octa 0x00000001ab2f5afc00000000fa366d3a
-
- /* x^28672 mod p(x)` << 1, x^28736 mod p(x)` << 1 */
- .octa 0x0000000074a4ebd400000001a2bb34f0
-
- /* x^27648 mod p(x)` << 1, x^27712 mod p(x)` << 1 */
- .octa 0x00000001d7ab3a4c0000000028a9981e
-
- /* x^26624 mod p(x)` << 1, x^26688 mod p(x)` << 1 */
- .octa 0x00000001a8da60c600000001dbc672be
-
- /* x^25600 mod p(x)` << 1, x^25664 mod p(x)` << 1 */
- .octa 0x000000013cf6382000000000b04d77f6
-
- /* x^24576 mod p(x)` << 1, x^24640 mod p(x)` << 1 */
- .octa 0x00000000bec12e1e0000000124400d96
-
- /* x^23552 mod p(x)` << 1, x^23616 mod p(x)` << 1 */
- .octa 0x00000001c6368010000000014ca4b414
-
- /* x^22528 mod p(x)` << 1, x^22592 mod p(x)` << 1 */
- .octa 0x00000001e6e78758000000012fe2c938
-
- /* x^21504 mod p(x)` << 1, x^21568 mod p(x)` << 1 */
- .octa 0x000000008d7f2b3c00000001faed01e6
-
- /* x^20480 mod p(x)` << 1, x^20544 mod p(x)` << 1 */
- .octa 0x000000016b4a156e000000007e80ecfe
-
- /* x^19456 mod p(x)` << 1, x^19520 mod p(x)` << 1 */
- .octa 0x00000001c63cfeb60000000098daee94
-
- /* x^18432 mod p(x)` << 1, x^18496 mod p(x)` << 1 */
- .octa 0x000000015f902670000000010a04edea
-
- /* x^17408 mod p(x)` << 1, x^17472 mod p(x)` << 1 */
- .octa 0x00000001cd5de11e00000001c00b4524
-
- /* x^16384 mod p(x)` << 1, x^16448 mod p(x)` << 1 */
- .octa 0x000000001acaec540000000170296550
-
- /* x^15360 mod p(x)` << 1, x^15424 mod p(x)` << 1 */
- .octa 0x000000002bd0ca780000000181afaa48
-
- /* x^14336 mod p(x)` << 1, x^14400 mod p(x)` << 1 */
- .octa 0x0000000032d63d5c0000000185a31ffa
-
- /* x^13312 mod p(x)` << 1, x^13376 mod p(x)` << 1 */
- .octa 0x000000001c6d4e4c000000002469f608
-
- /* x^12288 mod p(x)` << 1, x^12352 mod p(x)` << 1 */
- .octa 0x0000000106a60b92000000006980102a
-
- /* x^11264 mod p(x)` << 1, x^11328 mod p(x)` << 1 */
- .octa 0x00000000d3855e120000000111ea9ca8
-
- /* x^10240 mod p(x)` << 1, x^10304 mod p(x)` << 1 */
- .octa 0x00000000e312563600000001bd1d29ce
-
- /* x^9216 mod p(x)` << 1, x^9280 mod p(x)` << 1 */
- .octa 0x000000009e8f7ea400000001b34b9580
-
- /* x^8192 mod p(x)` << 1, x^8256 mod p(x)` << 1 */
- .octa 0x00000001c82e562c000000003076054e
-
- /* x^7168 mod p(x)` << 1, x^7232 mod p(x)` << 1 */
- .octa 0x00000000ca9f09ce000000012a608ea4
-
- /* x^6144 mod p(x)` << 1, x^6208 mod p(x)` << 1 */
- .octa 0x00000000c63764e600000000784d05fe
-
- /* x^5120 mod p(x)` << 1, x^5184 mod p(x)` << 1 */
- .octa 0x0000000168d2e49e000000016ef0d82a
-
- /* x^4096 mod p(x)` << 1, x^4160 mod p(x)` << 1 */
- .octa 0x00000000e986c1480000000075bda454
-
- /* x^3072 mod p(x)` << 1, x^3136 mod p(x)` << 1 */
- .octa 0x00000000cfb65894000000003dc0a1c4
-
- /* x^2048 mod p(x)` << 1, x^2112 mod p(x)` << 1 */
- .octa 0x0000000111cadee400000000e9a5d8be
-
- /* x^1024 mod p(x)` << 1, x^1088 mod p(x)` << 1 */
- .octa 0x0000000171fb63ce00000001609bc4b4
-
-.short_constants:
-
- /* Reduce final 1024-2048 bits to 64 bits, shifting 32 bits to include the trailing 32 bits of zeros */
- /* x^1952 mod p(x)`, x^1984 mod p(x)`, x^2016 mod p(x)`, x^2048 mod p(x)` */
- .octa 0x7fec2963e5bf80485cf015c388e56f72
-
- /* x^1824 mod p(x)`, x^1856 mod p(x)`, x^1888 mod p(x)`, x^1920 mod p(x)` */
- .octa 0x38e888d4844752a9963a18920246e2e6
-
- /* x^1696 mod p(x)`, x^1728 mod p(x)`, x^1760 mod p(x)`, x^1792 mod p(x)` */
- .octa 0x42316c00730206ad419a441956993a31
-
- /* x^1568 mod p(x)`, x^1600 mod p(x)`, x^1632 mod p(x)`, x^1664 mod p(x)` */
- .octa 0x543d5c543e65ddf9924752ba2b830011
-
- /* x^1440 mod p(x)`, x^1472 mod p(x)`, x^1504 mod p(x)`, x^1536 mod p(x)` */
- .octa 0x78e87aaf56767c9255bd7f9518e4a304
-
- /* x^1312 mod p(x)`, x^1344 mod p(x)`, x^1376 mod p(x)`, x^1408 mod p(x)` */
- .octa 0x8f68fcec1903da7f6d76739fe0553f1e
-
- /* x^1184 mod p(x)`, x^1216 mod p(x)`, x^1248 mod p(x)`, x^1280 mod p(x)` */
- .octa 0x3f4840246791d588c133722b1fe0b5c3
-
- /* x^1056 mod p(x)`, x^1088 mod p(x)`, x^1120 mod p(x)`, x^1152 mod p(x)` */
- .octa 0x34c96751b04de25a64b67ee0e55ef1f3
-
- /* x^928 mod p(x)`, x^960 mod p(x)`, x^992 mod p(x)`, x^1024 mod p(x)` */
- .octa 0x156c8e180b4a395b069db049b8fdb1e7
-
- /* x^800 mod p(x)`, x^832 mod p(x)`, x^864 mod p(x)`, x^896 mod p(x)` */
- .octa 0xe0b99ccbe661f7bea11bfaf3c9e90b9e
-
- /* x^672 mod p(x)`, x^704 mod p(x)`, x^736 mod p(x)`, x^768 mod p(x)` */
- .octa 0x041d37768cd75659817cdc5119b29a35
-
- /* x^544 mod p(x)`, x^576 mod p(x)`, x^608 mod p(x)`, x^640 mod p(x)` */
- .octa 0x3a0777818cfaa9651ce9d94b36c41f1c
-
- /* x^416 mod p(x)`, x^448 mod p(x)`, x^480 mod p(x)`, x^512 mod p(x)` */
- .octa 0x0e148e8252377a554f256efcb82be955
-
- /* x^288 mod p(x)`, x^320 mod p(x)`, x^352 mod p(x)`, x^384 mod p(x)` */
- .octa 0x9c25531d19e65ddeec1631edb2dea967
-
- /* x^160 mod p(x)`, x^192 mod p(x)`, x^224 mod p(x)`, x^256 mod p(x)` */
- .octa 0x790606ff9957c0a65d27e147510ac59a
-
- /* x^32 mod p(x)`, x^64 mod p(x)`, x^96 mod p(x)`, x^128 mod p(x)` */
- .octa 0x82f63b786ea2d55ca66805eb18b8ea18
-
-
-.barrett_constants:
- /* 33 bit reflected Barrett constant m - (4^32)/n */
- .octa 0x000000000000000000000000dea713f1 /* x^64 div p(x)` */
- /* 33 bit reflected Barrett constant n */
- .octa 0x00000000000000000000000105ec76f1
-#endif
-
-#endif /* __powerpc__ */
-
-#endif
diff --git a/storage/xtradb/ut/crc32_power8/crc32_wrapper.c b/storage/xtradb/ut/crc32_power8/crc32_wrapper.c
deleted file mode 100644
index d4c91371fa1..00000000000
--- a/storage/xtradb/ut/crc32_power8/crc32_wrapper.c
+++ /dev/null
@@ -1,68 +0,0 @@
-#ifdef __powerpc__
-
-#define CRC_TABLE
-#include "crc32_constants.h"
-
-#define VMX_ALIGN 16
-#define VMX_ALIGN_MASK (VMX_ALIGN-1)
-
-#ifdef REFLECT
-static unsigned int crc32_align(unsigned int crc, unsigned char *p,
- unsigned long len)
-{
- while (len--)
- crc = crc_table[(crc ^ *p++) & 0xff] ^ (crc >> 8);
- return crc;
-}
-#else
-static unsigned int crc32_align(unsigned int crc, unsigned char *p,
- unsigned long len)
-{
- while (len--)
- crc = crc_table[((crc >> 24) ^ *p++) & 0xff] ^ (crc << 8);
- return crc;
-}
-#endif
-
-unsigned int __crc32_vpmsum(unsigned int crc, unsigned char *p,
- unsigned long len);
-
-unsigned int crc32_vpmsum(unsigned int crc, unsigned char *p,
- unsigned long len)
-{
- unsigned int prealign;
- unsigned int tail;
-
-#ifdef CRC_XOR
- crc ^= 0xffffffff;
-#endif
-
- if (len < VMX_ALIGN + VMX_ALIGN_MASK) {
- crc = crc32_align(crc, p, len);
- goto out;
- }
-
- if ((unsigned long)p & VMX_ALIGN_MASK) {
- prealign = VMX_ALIGN - ((unsigned long)p & VMX_ALIGN_MASK);
- crc = crc32_align(crc, p, prealign);
- len -= prealign;
- p += prealign;
- }
-
- crc = __crc32_vpmsum(crc, p, len & ~VMX_ALIGN_MASK);
-
- tail = len & VMX_ALIGN_MASK;
- if (tail) {
- p += len & ~VMX_ALIGN_MASK;
- crc = crc32_align(crc, p, tail);
- }
-
-out:
-#ifdef CRC_XOR
- crc ^= 0xffffffff;
-#endif
-
- return crc;
-}
-
-#endif /* __powerpc__ */
diff --git a/storage/xtradb/ut/crc32_power8/ppc-opcode.h b/storage/xtradb/ut/crc32_power8/ppc-opcode.h
deleted file mode 100644
index 5942bd4923a..00000000000
--- a/storage/xtradb/ut/crc32_power8/ppc-opcode.h
+++ /dev/null
@@ -1,23 +0,0 @@
-#ifndef __OPCODES_H
-#define __OPCODES_H
-
-#define __PPC_RA(a) (((a) & 0x1f) << 16)
-#define __PPC_RB(b) (((b) & 0x1f) << 11)
-#define __PPC_XA(a) ((((a) & 0x1f) << 16) | (((a) & 0x20) >> 3))
-#define __PPC_XB(b) ((((b) & 0x1f) << 11) | (((b) & 0x20) >> 4))
-#define __PPC_XS(s) ((((s) & 0x1f) << 21) | (((s) & 0x20) >> 5))
-#define __PPC_XT(s) __PPC_XS(s)
-#define VSX_XX3(t, a, b) (__PPC_XT(t) | __PPC_XA(a) | __PPC_XB(b))
-#define VSX_XX1(s, a, b) (__PPC_XS(s) | __PPC_RA(a) | __PPC_RB(b))
-
-#define PPC_INST_VPMSUMW 0x10000488
-#define PPC_INST_VPMSUMD 0x100004c8
-#define PPC_INST_MFVSRD 0x7c000066
-#define PPC_INST_MTVSRD 0x7c000166
-
-#define VPMSUMW(t, a, b) .long PPC_INST_VPMSUMW | VSX_XX3((t), a, b)
-#define VPMSUMD(t, a, b) .long PPC_INST_VPMSUMD | VSX_XX3((t), a, b)
-#define MFVRD(a, t) .long PPC_INST_MFVSRD | VSX_XX1((t)+32, a, 0)
-#define MTVRD(t, a) .long PPC_INST_MTVSRD | VSX_XX1((t)+32, a, 0)
-
-#endif
diff --git a/storage/xtradb/ut/ut0crc32.cc b/storage/xtradb/ut/ut0crc32.cc
index 4ace913726d..4bb2a865ffe 100644
--- a/storage/xtradb/ut/ut0crc32.cc
+++ b/storage/xtradb/ut/ut0crc32.cc
@@ -97,13 +97,13 @@ have support for it */
static ib_uint32_t ut_crc32_slice8_table[8][256];
static ibool ut_crc32_slice8_table_initialized = FALSE;
-/* Flag that tells whether the CPU supports CRC32 or not */
-UNIV_INTERN bool ut_crc32_sse2_enabled = false;
-UNIV_INTERN bool ut_crc32_power8_enabled = false;
+/** Text description of CRC32 implementation */
+const char *ut_crc32_implementation = NULL;
/********************************************************************//**
Initializes the table that is used to generate the CRC32 if the CPU does
not have support for it. */
+#ifndef HAVE_CRC32_VPMSUM
static
void
ut_crc32_slice8_table_init()
@@ -133,6 +133,7 @@ ut_crc32_slice8_table_init()
ut_crc32_slice8_table_initialized = TRUE;
}
+#endif
#if defined(__GNUC__) && defined(__x86_64__)
/********************************************************************//**
@@ -181,27 +182,22 @@ for RHEL4 support (GCC 3 doesn't support this instruction) */
len -= 8, buf += 8
#endif /* defined(__GNUC__) && defined(__x86_64__) */
-#if defined(__powerpc__)
+
+#ifdef HAVE_CRC32_VPMSUM
extern "C" {
-unsigned int crc32_vpmsum(unsigned int crc, const unsigned char *p, unsigned long len);
+unsigned int crc32c_vpmsum(unsigned int crc, const unsigned char *p, unsigned long len);
};
-#endif /* __powerpc__ */
UNIV_INLINE
ib_uint32_t
ut_crc32_power8(
/*===========*/
const byte* buf, /*!< in: data over which to calculate CRC32 */
- ulint len) /*!< in: data length */
+ ulint len) /*!< in: data length */
{
-#if defined(__powerpc__) && !defined(WORDS_BIGENDIAN)
- return crc32_vpmsum(0, buf, len);
-#else
- ut_error;
- /* silence compiler warning about unused parameters */
- return((ib_uint32_t) buf[len]);
-#endif /* __powerpc__ */
+ return crc32c_vpmsum(0, buf, len);
}
+#endif
/********************************************************************//**
Calculates CRC32 using CPU instructions.
@@ -216,8 +212,6 @@ ut_crc32_sse42(
#if defined(__GNUC__) && defined(__x86_64__)
ib_uint64_t crc = (ib_uint32_t) (-1);
- ut_a(ut_crc32_sse2_enabled);
-
while (len && ((ulint) buf & 7)) {
ut_crc32_sse42_byte;
}
@@ -305,6 +299,10 @@ void
ut_crc32_init()
/*===========*/
{
+ ut_crc32_slice8_table_init();
+ ut_crc32 = ut_crc32_slice8;
+ ut_crc32_implementation = "Using generic crc32 instructions";
+
#if defined(__GNUC__) && defined(__x86_64__)
ib_uint32_t vend[3];
ib_uint32_t model;
@@ -316,21 +314,13 @@ ut_crc32_init()
ut_cpuid(vend, &model, &family, &stepping,
&features_ecx, &features_edx);
- ut_crc32_sse2_enabled = (features_ecx >> 20) & 1;
-#endif /* defined(__GNUC__) && defined(__x86_64__) */
-
-#if defined(__linux__) && defined(__powerpc__) && defined(AT_HWCAP2) \
- && !defined(WORDS_BIGENDIAN)
- if (getauxval(AT_HWCAP2) & PPC_FEATURE2_ARCH_2_07)
- ut_crc32_power8_enabled = true;
-#endif /* defined(__linux__) && defined(__powerpc__) */
-
- if (ut_crc32_sse2_enabled) {
+ if ((features_ecx >> 20) & 1) {
ut_crc32 = ut_crc32_sse42;
- } else if (ut_crc32_power8_enabled) {
- ut_crc32 = ut_crc32_power8;
- } else {
- ut_crc32_slice8_table_init();
- ut_crc32 = ut_crc32_slice8;
+ ut_crc32_implementation = "Using SSE2 crc32 instructions";
}
+
+#elif defined(HAVE_CRC32_VPMSUM)
+ ut_crc32 = ut_crc32_power8;
+ ut_crc32_implementation = "Using POWER8 crc32 instructions";
+#endif
}
diff --git a/storage/xtradb/ut/ut0wqueue.cc b/storage/xtradb/ut/ut0wqueue.cc
index e6a27263ac3..8aec05ca929 100644
--- a/storage/xtradb/ut/ut0wqueue.cc
+++ b/storage/xtradb/ut/ut0wqueue.cc
@@ -1,7 +1,6 @@
/*****************************************************************************
Copyright (c) 2006, 2011, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2019, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -196,15 +195,17 @@ ib_wqueue_nowait(
return (node ? node->data : NULL);
}
-/** Check if queue is empty.
-@param wq wait queue
-@return whether the queue is empty */
-bool ib_wqueue_is_empty(ib_wqueue_t* wq)
+/********************************************************************
+Check if queue is empty. */
+
+ibool
+ib_wqueue_is_empty(
+/*===============*/
+ /* out: TRUE if queue empty
+ else FALSE */
+ const ib_wqueue_t* wq) /* in: work queue */
{
- mutex_enter(&wq->mutex);
- bool is_empty = ib_list_is_empty(wq->items);
- mutex_exit(&wq->mutex);
- return is_empty;
+ return(ib_list_is_empty(wq->items));
}
/********************************************************************